Vendor import of llvm trunk r178860:vendor/llvm/llvm-trunk-r178860

http://llvm.org/svn/llvm-project/llvm/trunk@178860
author: Dimitry Andric <dim@FreeBSD.org> 2013-04-08 18:41:23 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2013-04-08 18:41:23 +0000
commit: 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree: 06099edc18d30894081a822b756f117cbe0b8207
parent: 482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
download: src-test2-vendor/llvm/llvm-trunk-r178860.tar.gz
src-test2-vendor/llvm/llvm-trunk-r178860.zip
4182 files changed, 418797 insertions, 203079 deletions
diff --git a/.arcconfig b/.arcconfig
new file mode 100644
index 000000000000..4711195a1d01
--- /dev/null
+++ b/.arcconfig
@@ -0,0 +1,4 @@
+{
+  "project_id" : "llvm",
+  "conduit_uri" : "http://llvm-reviews.chandlerc.com/"
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3edc0219858..6871e654fb1f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,7 @@ set(CMAKE_MODULE_PATH
   )
 
 set(LLVM_VERSION_MAJOR 3)
-set(LLVM_VERSION_MINOR 2)
+set(LLVM_VERSION_MINOR 3)
 
 set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn")
 
@@ -74,8 +74,8 @@ set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
 set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
 
 set(LLVM_ALL_TARGETS
+  AArch64
   ARM
-  CellSPU
   CppBackend
   Hexagon
   Mips
@@ -186,13 +186,16 @@ endif( LLVM_USE_INTEL_JITEVENTS )
 option(LLVM_USE_OPROFILE
   "Use opagent JIT interface to inform OProfile about JIT code" OFF)
 
-# If enabled, ierify we are on a platform that supports oprofile.
+# If enabled, verify we are on a platform that supports oprofile.
 if( LLVM_USE_OPROFILE )
   if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
     message(FATAL_ERROR "OProfile support is available on Linux only.") 
   endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
 endif( LLVM_USE_OPROFILE )
 
+set(LLVM_USE_SANITIZER "" CACHE STRING
+  "Define the sanitizer used to build binaries and tests.")
+
 # Define an option controlling whether we should build for 32-bit on 64-bit
 # platforms, where supported.
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
@@ -243,8 +246,7 @@ include(config-ix)
 # invocation time.
 set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}" CACHE STRING
   "Default target for which LLVM will generate code." )
-set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}" CACHE STRING
-  "Default target for which LLVM will generate code." )
+set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 
 include(HandleLLVMOptions)
 
@@ -377,10 +379,21 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
 include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
 
+if( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD )
+  # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM
+  # with libxml2, iconv.h, etc., we must add /usr/local paths.
+  include_directories("/usr/local/include")
+  link_directories("/usr/local/lib")
+endif( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD )
+
 if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h")
 endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
 
+# Make sure we don't get -rdynamic in every binary. For those that need it,
+# use set_target_properties(target PROPERTIES ENABLE_EXPORTS 1)
+set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
+
 include(AddLLVM)
 include(TableGen)
 
@@ -406,7 +419,6 @@ add_subdirectory(utils/count)
 add_subdirectory(utils/not)
 add_subdirectory(utils/llvm-lit)
 add_subdirectory(utils/yaml-bench)
-add_subdirectory(utils/obj2yaml)
 add_subdirectory(utils/yaml2obj)
 
 add_subdirectory(projects)
@@ -428,7 +440,7 @@ if( LLVM_INCLUDE_TESTS )
   add_subdirectory(utils/unittest)
   add_subdirectory(unittests)
   if (MSVC)
-    # This utility is used to prevent chrashing tests from calling Dr. Watson on
+    # This utility is used to prevent crashing tests from calling Dr. Watson on
     # Windows.
     add_subdirectory(utils/KillTheDoctor)
   endif()
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index fd7bcda3b768..10bf071801fd 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -8,9 +8,30 @@ beautification by scripts.  The fields are: name (N), email (E), web-address
 (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
 (S).
 
+N: Joe Abbey
+E: jabbey@arxan.com
+D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
+
+N: Owen Anderson
+E: resistor@mac.com
+D: SelectionDAG (lib/CodeGen/SelectionDAG/*)
+
+N: Rafael Avila de Espindola
+E: rafael.espindola@gmail.com
+D: Gold plugin (tools/gold/*)
+
+N: Chandler Carruth
+E: chandlerc@gmail.com
+E: chandlerc@google.com
+D: Config, ADT, Support, inlining & related passse, SROA/mem2reg & related passes, CMake, library layering
+
 N: Evan Cheng
 E: evan.cheng@apple.com
-D: Code generator and all targets
+D: ARM target, parts of code generator not covered by someone else
+
+N: Eric Christopher
+E: echristo@gmail.com
+D: Debug Information, autotools/configure/make build, inline assembly
 
 N: Greg Clayton
 D: LLDB
@@ -18,34 +39,93 @@ D: LLDB
 N: Peter Collingbourne
 D: libclc
 
-N: Doug Gregor
-D: Clang Frontend Libraries
+N: Anshuman Dasgupta
+E: adasgupt@codeaurora.org
+D: Hexagon Backend
+
+N: Hal Finkel
+E: hfinkel@anl.gov
+D: BBVectorize and the PowerPC target
+
+N: Venkatraman Govindaraju
+E: venkatra@cs.wisc.edu
+D: Sparc Backend (lib/Target/Sparc/*)
 
 N: Tobias Grosser
 D: Polly
 
+N: James Grosbach
+E: grosbach@apple.com
+D: MC layer
+
 N: Howard Hinnant
 D: libc++
 
+N: Justin Holewinski
+E: jholewinski@nvidia.com
+D: NVPTX Target (lib/Target/NVPTX/*)
+
+N: Andy Kaylor
+E: andrew.kaylor@intel.com
+D: MCJIT, RuntimeDyld and JIT event listeners
+
+N: Galina Kistanova
+E: gkistanova@gmail.com
+D: LLVM Buildbot
+
 N: Anton Korobeynikov
-E: asl@math.spbu.ru
-D: Exception handling, debug information, and Windows codegen
+E: anton@korobeynikov.info
+D: Exception handling, Windows codegen, ARM EABI
+
+N: Benjamin Kramer
+E: benny.kra@gmail.com
+D: DWARF Parser
 
-N: Ted Kremenek
-D: Clang Static Analyzer
+N: Sergei Larin
+E: slarin@codeaurora.org
+D: VLIW Instruction Scheduling, Packetization
 
 N: Chris Lattner
 E: sabre@nondot.org
 W: http://nondot.org/~sabre/
 D: Everything not covered by someone else
 
-N: John McCall
-E: rjmccall@apple.com
-D: Clang LLVM IR generation
+N: Tim Northover
+E: Tim.Northover@arm.com
+D: AArch64 backend
 
 N: Jakob Olesen
 D: Register allocators and TableGen
 
+N: Richard Osborne
+E: richard@xmos.com
+D: XCore Backend
+
+N: Chad Rosier
+E: mcrosier@apple.com
+D: Fast-Isel
+
+N: Nadav Rotem
+E: nrotem@apple.com
+D: X86 Backend, Loop Vectorizer
+
 N: Duncan Sands
 E: baldrick@free.fr
 D: DragonEgg
+
+N: Michael Spencer
+E: bigcheesegs@gmail.com
+D: Windows parts of Support, Object, ar, nm, objdump, ranlib, size
+
+N: Tom Stellard
+E: thomas.stellard@amd.com
+E: mesa-dev@lists.freedesktop.org
+D: R600 Backend
+
+N: Andrew Trick
+E: atrick@apple.com
+D: IndVar Simplify, Loop Strength Reduction, Instruction Scheduling
+
+N: Bill Wendling
+E: wendling@apple.com
+D: libLTO & IR Linker
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 02579182589a..e89f19e79446 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -60,9 +60,11 @@ D: Loop unrolling with run-time trip counts.
 
 N: Chandler Carruth
 E: chandlerc@gmail.com
+E: chandlerc@google.com
 D: Hashing algorithms and interfaces
 D: Inline cost analysis
 D: Machine block placement pass
+D: SROA
 
 N: Casey Carter
 E: ccarter@uiuc.edu
@@ -98,7 +100,7 @@ E: adasgupt@codeaurora.org
 D: Deterministic finite automaton based infrastructure for VLIW packetization
 
 N: Stefanus Du Toit
-E: stefanus.dutoit@rapidmind.com
+E: stefanus.du.toit@intel.com
 D: Bug fixes and minor improvements
 
 N: Rafael Avila de Espindola
@@ -141,7 +143,7 @@ E: foldr@codedgers.com
 D: Author of llvmc2
 
 N: Dan Gohman
-E: gohman@apple.com
+E: dan433584@gmail.com
 D: Miscellaneous bug fixes
 
 N: David Goodwin
@@ -361,8 +363,8 @@ D: ARM fast-isel improvements
 D: Performance monitoring
 
 N: Nadav Rotem
-E: nadav.rotem@intel.com
-D: Vector code generation improvements.
+E: nrotem@apple.com
+D: X86 code generation improvements, Loop Vectorizer.
 
 N: Roman Samoilov
 E: roman@codedgers.com
@@ -402,6 +404,10 @@ E: rspencer@reidspencer.com
 W: http://reidspencer.com/
 D: Lots of stuff, see: http://wiki.llvm.org/index.php/User:Reid
 
+N: Craig Topper
+E: craig.topper@gmail.com
+D: X86 codegen and disassembler improvements. AVX2 support.
+
 N: Edwin Torok
 E: edwintorok@gmail.com
 D: Miscellaneous bug fixes
@@ -417,7 +423,6 @@ D: Thread Local Storage implementation
 
 N: Bill Wendling
 E: wendling@apple.com
-D: Exception handling
 D: Bunches of stuff
 
 N: Bob Wilson
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 00cf60116941..aa7b11922ec0 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -4,7 +4,7 @@ LLVM Release License
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
 All rights reserved.
 
 Developed by:
@@ -64,7 +64,7 @@ Program             Directory
 Autoconf            llvm/autoconf
                     llvm/projects/ModuleMaker/autoconf
                     llvm/projects/sample/autoconf
-CellSPU backend     llvm/lib/Target/CellSPU/README.txt
 Google Test         llvm/utils/unittest/googletest
 OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
 pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
+ARM contributions   llvm/lib/Target/ARM/LICENSE.TXT
diff --git a/Makefile b/Makefile
index 1e5dae470d26..7a1b19045945 100644
--- a/Makefile
+++ b/Makefile
@@ -11,8 +11,8 @@ LEVEL := .
 
 # Top-Level LLVM Build Stages:
 #   1. Build lib/Support and lib/TableGen, which are used by utils (tblgen).
-#   2. Build utils, which is used by VMCore.
-#   3. Build VMCore, which builds the Intrinsics.inc file used by libs.
+#   2. Build utils, which is used by IR.
+#   3. Build IR, which builds the Intrinsics.inc file used by libs.
 #   4. Build libs, which are needed by llvm-config.
 #   5. Build llvm-config, which determines inter-lib dependencies for tools.
 #   6. Build tools, runtime, docs.
@@ -30,7 +30,7 @@ ifeq ($(BUILD_DIRS_ONLY),1)
   DIRS := lib/Support lib/TableGen utils tools/llvm-config
   OPTIONAL_DIRS := tools/clang/utils/TableGen
 else
-  DIRS := lib/Support lib/TableGen utils lib/VMCore lib tools/llvm-shlib \
+  DIRS := lib/Support lib/TableGen utils lib/IR lib tools/llvm-shlib \
           tools/llvm-config tools runtime docs unittests
   OPTIONAL_DIRS := projects bindings
 endif
@@ -248,13 +248,26 @@ build-for-llvm-top:
 SVN = svn
 SVN-UPDATE-OPTIONS =
 AWK = awk
-SUB-SVN-DIRS = $(AWK) '/I|\?      / {print $$2}'   \
-		| LC_ALL=C xargs $(SVN) info 2>/dev/null \
-		| $(AWK) '/^Path:\ / {print $$2}'
+
+# Multiline variable defining a recursive function for finding svn repos rooted at
+# a given path. svnup() requires one argument: the root to search from.
+define SUB_SVN_DIRS
+svnup() {
+  dirs=`svn status --no-ignore $$1 | awk '/I|\?      / {print $$2}' | LC_ALL=C xargs svn info 2>/dev/null | awk '/^Path:\ / {print $$2}'`;
+  if [ "$$dirs" = "" ]; then
+    return;
+  fi;
+  for f in $$dirs; do
+	  echo $$f;
+    svnup $$f;
+  done
+}
+endef
+export SUB_SVN_DIRS
 
 update:
 	$(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT)
-	@ $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
+	@eval $$SUB_SVN_DIRS; $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | svnup $(LLVM_SRC_ROOT) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
 
 happiness: update all check-all
 
diff --git a/Makefile.common b/Makefile.common
index 55e2b63434c8..a157abaef274 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -28,7 +28,7 @@
 #    built in any order.  All DIRS are built in order before PARALLEL_DIRS are
 #    built, which are then built in any order.
 #
-# 4. Source - If specified, this sets the source code filenames.  If this
+# 4. SOURCES - If specified, this sets the source code filenames.  If this
 #    is not set, it defaults to be all of the .cpp, .c, .y, and .l files
 #    in the current directory.
 #
diff --git a/Makefile.config.in b/Makefile.config.in
index b4ecea631e3c..26e3709fee00 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -134,6 +134,9 @@ BUILD_CXX=@BUILD_CXX@
 # Triple for configuring build tools when cross-compiling
 BUILD_TRIPLE=@build@
 
+# Target triple (cpu-vendor-os) which LLVM is compiled for
+HOST_TRIPLE=@host@
+
 # Target triple (cpu-vendor-os) for which we should generate code
 TARGET_TRIPLE=@target@
 
@@ -153,8 +156,17 @@ CXX = @CXX@
 # Path to the CC binary, which use used by testcases for native builds.
 CC := @CC@
 
+# C/C++ preprocessor flags.
+CPPFLAGS += @CPPFLAGS@
+
+# C compiler flags.
+CFLAGS += @CFLAGS@
+
+# C++ compiler flags.
+CXXFLAGS += @CXXFLAGS@
+
 # Linker flags.
-LDFLAGS+=@LDFLAGS@
+LDFLAGS += @LDFLAGS@
 
 # Path to the library archiver program.
 AR_PATH = @AR@
@@ -176,6 +188,7 @@ RANLIB     := @RANLIB@
 RM         := @RM@
 SED        := @SED@
 TAR        := @TAR@
+PYTHON     := @PYTHON@
 
 # Paths to miscellaneous programs we hope are present but might not be
 BZIP2      := @BZIP2@
@@ -222,6 +235,15 @@ ENABLE_LIBCPP = @ENABLE_LIBCPP@
 # When ENABLE_CXX11 is enabled, LLVM uses c++11 mode by default to build.
 ENABLE_CXX11 = @ENABLE_CXX11@
 
+# When ENABLE_CLANG_ARCMT is enabled, clang will have ARCMigrationTool.
+ENABLE_CLANG_ARCMT = @ENABLE_CLANG_ARCMT@
+
+# When ENABLE_CLANG_REWRITER is enabled, clang will have Rewriter.
+ENABLE_CLANG_REWRITER = @ENABLE_CLANG_REWRITER@
+
+# When ENABLE_CLANG_STATIC_ANALYZER is enabled, clang will have StaticAnalyzer.
+ENABLE_CLANG_STATIC_ANALYZER = @ENABLE_CLANG_STATIC_ANALYZER@
+
 # When ENABLE_WERROR is enabled, we'll pass -Werror on the command line
 ENABLE_WERROR = @ENABLE_WERROR@
 
@@ -278,7 +300,7 @@ ENABLE_DOCS = @ENABLE_DOCS@
 ENABLE_DOXYGEN = @ENABLE_DOXYGEN@
 
 # Do we want to enable threads?
-ENABLE_THREADS := @ENABLE_THREADS@
+ENABLE_THREADS := @LLVM_ENABLE_THREADS@
 
 # Do we want to build with position independent code?
 ENABLE_PIC := @ENABLE_PIC@
@@ -349,6 +371,10 @@ NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
 NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
 # -Wcovered-switch-default
 COVERED_SWITCH_DEFAULT = @COVERED_SWITCH_DEFAULT@
+# -Wno-uninitialized
+NO_UNINITIALIZED = @NO_UNINITIALIZED@
+# -Wno-maybe-uninitialized
+NO_MAYBE_UNINITIALIZED = @NO_MAYBE_UNINITIALIZED@
 
 # Was polly found in tools/polly?
 LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
diff --git a/Makefile.rules b/Makefile.rules
index b2b02c25d44b..2c834aac63fc 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -97,7 +97,7 @@ endif
 $(LLVMBuildMakeFrag): $(PROJ_SRC_ROOT)/Makefile.rules \
 		      $(PROJ_OBJ_ROOT)/Makefile.config
 	$(Echo) Constructing LLVMBuild project information.
-	$(Verb) $(LLVMBuildTool) \
+	$(Verb)$(PYTHON) $(LLVMBuildTool) \
 	  --native-target "$(TARGET_NATIVE_ARCH)" \
 	  --enable-targets "$(TARGETS_TO_BUILD)" \
 	  --enable-optional-components "$(OPTIONAL_COMPONENTS)" \
@@ -280,12 +280,6 @@ ifeq ($(ENABLE_OPTIMIZED),1)
   endif
   endif
 
-  # Darwin requires -fstrict-aliasing to be explicitly enabled.
-  # Avoid -fstrict-aliasing on Darwin for now, there are unresolved issues
-  # with -fstrict-aliasing and ipa-type-escape radr://6756684
-  #ifeq ($(HOST_OS),Darwin)
-  #  EXTRA_OPTIONS += -fstrict-aliasing -Wstrict-aliasing
-  #endif
   CXX.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer)
   C.Flags   += $(OPTIMIZE_OPTION) $(OmitFramePointer)
   LD.Flags  += $(OPTIMIZE_OPTION)
@@ -583,16 +577,24 @@ ifeq ($(HOST_OS),Darwin)
 
   LoadableModuleOptions := -Wl,-flat_namespace -Wl,-undefined,suppress
   SharedLinkOptions := -dynamiclib
-  ifneq ($(ARCH),ARM)
-    SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+  ifdef DEPLOYMENT_TARGET
+    SharedLinkOptions += $(DEPLOYMENT_TARGET)
+  else
+    ifneq ($(ARCH),ARM)
+      SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+    endif
   endif
 else
   SharedLinkOptions=-shared
 endif
 
 ifeq ($(TARGET_OS),Darwin)
-  ifneq ($(ARCH),ARM)
-    TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+  ifdef DEPLOYMENT_TARGET
+    TargetCommonOpts += $(DEPLOYMENT_TARGET)
+  else
+    ifneq ($(ARCH),ARM)
+      TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+    endif
   endif
 endif
 
@@ -648,7 +650,7 @@ else
   ifneq ($(DARWIN_MAJVERS),4)
     LD.Flags += $(RPATH) -Wl,@executable_path/../lib
   endif
-  ifeq ($(RC_BUILDIT),YES)
+  ifeq ($(RC_XBS),YES)
     TempFile := $(shell mkdir -p ${OBJROOT}/dSYMs ; mktemp ${OBJROOT}/dSYMs/llvm-lto.XXXXXX)
     LD.Flags += -Wl,-object_path_lto -Wl,$(TempFile)
   endif
@@ -668,7 +670,9 @@ ifndef NO_PEDANTIC
 CompileCommonOpts += -pedantic -Wno-long-long
 endif
 CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
-                     $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT)
+                     $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT) \
+                     $(NO_UNINITIALIZED) $(NO_MAYBE_UNINITIALIZED) \
+                     $(NO_MISSING_FIELD_INITIALIZERS)
 # Enable cast-qual for C++; the workaround is to use const_cast.
 CXX.Flags += -Wcast-qual
 
@@ -824,7 +828,7 @@ ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc)
 #----------------------------------------------------------
 
 ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE)))
-  ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])"
+  ECHOPATH := $(Verb)$(PYTHON) -u -c "import sys;print ' '.join(sys.argv[1:])"
 else
   ECHOPATH := $(Verb)$(ECHO)
 endif
@@ -1814,7 +1818,7 @@ TDFiles := $(strip $(wildcard $(PROJ_SRC_DIR)/*.td) \
            $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSchedule.td \
            $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSelectionDAG.td \
            $(LLVM_SRC_ROOT)/include/llvm/CodeGen/ValueTypes.td) \
-           $(wildcard $(LLVM_SRC_ROOT)/include/llvm/Intrinsics*.td)
+           $(wildcard $(LLVM_SRC_ROOT)/include/llvm/IR/Intrinsics*.td)
 
 # All .inc.tmp files depend on the .td files.
 $(INCTMPFiles) : $(TDFiles)
@@ -1869,11 +1873,6 @@ $(ObjDir)/%GenDisassemblerTables.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
 	$(Echo) "Building $(<F) disassembly tables with tblgen"
 	$(Verb) $(LLVMTableGen) -gen-disassembler -o $(call SYSPATH, $@) $<
 
-$(TARGET:%=$(ObjDir)/%GenEDInfo.inc.tmp): \
-$(ObjDir)/%GenEDInfo.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
-	$(Echo) "Building $(<F) enhanced disassembly information with tblgen"
-	$(Verb) $(LLVMTableGen) -gen-enhanced-disassembly-info -o $(call SYSPATH, $@) $<
-
 $(TARGET:%=$(ObjDir)/%GenFastISel.inc.tmp): \
 $(ObjDir)/%GenFastISel.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
 	$(Echo) "Building $(<F) \"fast\" instruction selector implementation with tblgen"
diff --git a/README.txt b/README.txt
index 0d39ed6934b5..193330f774aa 100644
--- a/README.txt
+++ b/README.txt
@@ -8,10 +8,10 @@ optimizers, and runtime environments.
 LLVM is open source software. You may freely distribute it under the terms of
 the license agreement found in LICENSE.txt.
 
-Please see the HTML documentation provided in docs/index.html for further
-assistance with LLVM.
+Please see the documentation provided in docs/ for further
+assistance with LLVM, and in particular docs/GettingStarted.rst for getting
+started with LLVM and docs/README.txt for an overview of LLVM's
+documentation setup.
 
-If you're writing a package for LLVM, see docs/Packaging.html for our
+If you're writing a package for LLVM, see docs/Packaging.rst for our
 suggestions.
-
-
diff --git a/autoconf/AutoRegen.sh b/autoconf/AutoRegen.sh
index 7809667ac5f1..cbca7387a85c 100755
--- a/autoconf/AutoRegen.sh
+++ b/autoconf/AutoRegen.sh
@@ -13,7 +13,7 @@ clean() {
 ### These variables specify the tool versions we want to use.
 ### Periods should be escaped with backslash for use by grep.
 ###
-### If you update these, please also update docs/GettingStarted.html
+### If you update these, please also update docs/GettingStarted.rst
 want_autoconf_version='2\.60'
 want_autoheader_version=$want_autoconf_version
 want_aclocal_version='1\.9\.6'
diff --git a/autoconf/config.sub b/autoconf/config.sub
index 9942491533e8..a8d85281f902 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -251,7 +251,8 @@ case $basic_machine in
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+   | aarch64 \
+   | be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@@ -359,6 +360,7 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+   | aarch64-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 7715531a338d..0097db3d681d 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,21 +31,21 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.
-AC_INIT([LLVM],[3.2svn],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.3svn],[http://llvm.org/bugs/])
 AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
-AC_DEFINE([LLVM_VERSION_MINOR], [2], [Minor version of the LLVM API])
+AC_DEFINE([LLVM_VERSION_MINOR], [3], [Minor version of the LLVM API])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
-AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign."])
-AC_COPYRIGHT([Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.])
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.])
 
 dnl Indicate that we require autoconf 2.60 or later.
 AC_PREREQ(2.60)
 
 dnl Verify that the source directory is valid. This makes sure that we are
 dnl configuring LLVM and not some other package (it validates --srcdir argument)
-AC_CONFIG_SRCDIR([lib/VMCore/Module.cpp])
+AC_CONFIG_SRCDIR([lib/IR/Module.cpp])
 
 dnl Place all of the extra autoconf files into the config subdirectory. Tell
 dnl various tools where the m4 autoconf macros are.
@@ -59,12 +59,43 @@ if test ${srcdir} != "." ; then
   fi
 fi
 
+dnl Default to empty (i.e. assigning the null string to) CFLAGS and CXXFLAGS,
+dnl instead of the autoconf default (for example, '-g -O2' for CC=gcc).
+${CFLAGS=}
+${CXXFLAGS=}
+
 dnl We need to check for the compiler up here to avoid anything else
 dnl starting with a different one.
 AC_PROG_CC(clang llvm-gcc gcc)
 AC_PROG_CXX(clang++ llvm-g++ g++)
 AC_PROG_CPP
 
+dnl If CXX is Clang, check that it can find and parse C++ standard library
+dnl headers.
+if test "$CXX" = "clang++" ; then
+  AC_MSG_CHECKING([whether clang works])
+  AC_LANG_PUSH([C++])
+  dnl Note that space between 'include' and '(' is required.  There's a broken
+  dnl regex in aclocal that otherwise will think that we call m4's include
+  dnl builtin.
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <limits>
+#if __has_include (<cxxabi.h>)
+#include <cxxabi.h>
+#endif
+#if __has_include (<unwind.h>)
+#include <unwind.h>
+#endif
+]])],
+[
+  AC_MSG_RESULT([yes])
+],
+[
+  AC_MSG_RESULT([no])
+  AC_MSG_ERROR([Selected compiler could not find or parse C++ standard library headers.  Rerun with CC=c-compiler CXX=c++-compiler ./configure ...])
+])
+  AC_LANG_POP([C++])
+fi
+
 dnl Configure all of the projects present in our source tree. While we could
 dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a
 dnl configure script, that usage of the AC_CONFIG_SUBDIRS macro is deprecated.
@@ -363,6 +394,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -396,6 +428,7 @@ case $host in
   sparc*-*)               host_arch="Sparc" ;;
   powerpc*-*)             host_arch="PowerPC" ;;
   arm*-*)                 host_arch="ARM" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -475,6 +508,54 @@ case "$enableval" in
   *) AC_MSG_ERROR([Invalid setting for --enable-cxx11. Use "yes" or "no"]) ;;
 esac
 
+dnl --enable-clang-arcmt: check whether to enable clang arcmt
+clang_arcmt="yes"
+AC_ARG_ENABLE(clang-arcmt,
+              AS_HELP_STRING([--enable-clang-arcmt],
+                             [Enable building of clang ARCMT (default is YES)]),
+                             clang_arcmt="$enableval",
+                             enableval="yes")
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_CLANG_ARCMT,[1]) ;;
+  no)  AC_SUBST(ENABLE_CLANG_ARCMT,[0]) ;;
+  default) AC_SUBST(ENABLE_CLANG_ARCMT,[1]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-clang-arcmt. Use "yes" or "no"]) ;;
+esac
+
+dnl --enable-clang-static-analyzer: check whether to enable static-analyzer
+clang_static_analyzer="yes"
+AC_ARG_ENABLE(clang-static-analyzer,
+              AS_HELP_STRING([--enable-clang-static-analyzer],
+                             [Enable building of clang Static Analyzer (default is YES)]),
+                             clang_static_analyzer="$enableval",
+                             enableval="yes")
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_CLANG_STATIC_ANALYZER,[1]) ;;
+  no)  AC_SUBST(ENABLE_CLANG_STATIC_ANALYZER,[0]) ;;
+  default) AC_SUBST(ENABLE_CLANG_STATIC_ANALYZER,[1]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-clang-static-analyzer. Use "yes" or "no"]) ;;
+esac
+
+dnl --enable-clang-rewriter: check whether to enable clang rewriter
+AC_ARG_ENABLE(clang-rewriter,
+              AS_HELP_STRING([--enable-clang-rewriter],
+                             [Enable building of clang rewriter (default is YES)]),,
+                             enableval="yes")
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_CLANG_REWRITER,[1]) ;;
+  no)  
+    if test ${clang_arcmt} != "no" ; then
+      AC_MSG_ERROR([Cannot enable clang ARC Migration Tool while disabling rewriter.])
+    fi
+    if test ${clang_static_analyzer} != "no" ; then
+      AC_MSG_ERROR([Cannot enable clang static analyzer while disabling rewriter.])
+    fi
+    AC_SUBST(ENABLE_CLANG_REWRITER,[0]) 
+    ;;
+  default) AC_SUBST(ENABLE_CLANG_REWRITER,[1]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-clang-rewriter. Use "yes" or "no"]) ;;
+esac
+
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
  --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
@@ -566,6 +647,7 @@ else
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -697,26 +779,26 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+     host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
      xcore, msp430, nvptx, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mipsel)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mips64)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
-        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
@@ -731,7 +813,6 @@ case "$enableval" in
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
-            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -1165,10 +1246,15 @@ fi
 dnl Verify that GCC is version 3.0 or higher
 if test "$GCC" = "yes"
 then
-  AC_COMPILE_IFELSE([[#if !defined(__GNUC__) || __GNUC__ < 3
-#error Unsupported GCC version
-#endif
-]], [], [AC_MSG_ERROR([gcc 3.x required, but you have a lower version])])
+  AC_COMPILE_IFELSE(
+[
+  AC_LANG_SOURCE([[
+    #if !defined(__GNUC__) || __GNUC__ < 3
+    #error Unsupported GCC version
+    #endif
+  ]])
+],
+[], [AC_MSG_ERROR([gcc 3.x required, but you have a lower version])])
 fi
 
 dnl Check for GNU Make.  We use its extensions, so don't build without it
@@ -1185,7 +1271,53 @@ AC_MSG_CHECKING([optional compiler flags])
 CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
 CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
 CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default])
-AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT])
+
+dnl GCC's potential uninitialized use analysis is weak and presents lots of
+dnl false positives, so disable it.
+NO_UNINITIALIZED=
+NO_MAYBE_UNINITIALIZED=
+if test "$GXX" = "yes"
+then
+  CXX_FLAG_CHECK(NO_MAYBE_UNINITIALIZED, [-Wno-maybe-uninitialized])
+  dnl gcc 4.7 introduced -Wmaybe-uninitialized to distinguish cases which are
+  dnl known to be uninitialized from cases which might be uninitialized.  We
+  dnl still want to catch the first kind of errors.
+  if test -z "$NO_MAYBE_UNINITIALIZED"
+  then
+    CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized])
+  fi
+fi
+AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED])
+
+AC_ARG_WITH([python],
+            [AS_HELP_STRING([--with-python], [path to python])],
+            [PYTHON="$withval"])
+
+if test -n "$PYTHON" && test -x "$PYTHON" ; then
+  AC_MSG_CHECKING([for python])
+  AC_MSG_RESULT([user defined: $with_python])
+else
+  if test -n "$PYTHON" ; then
+    AC_MSG_WARN([specified python ($PYTHON) is not usable, searching path])
+  fi
+
+  AC_PATH_PROG([PYTHON], [python python2 python26],
+               [AC_MSG_RESULT([not found])
+                AC_MSG_ERROR([could not find python 2.5 or higher])])
+fi
+
+AC_MSG_CHECKING([for python >= 2.5])
+ac_python_version=`$PYTHON -c 'import sys; print sys.version.split()[[0]]'`
+ac_python_version_major=`echo $ac_python_version | cut -d'.' -f1`
+ac_python_version_minor=`echo $ac_python_version | cut -d'.' -f2`
+ac_python_version_patch=`echo $ac_python_version | cut -d'.' -f3`
+if   test "$ac_python_version_major" -eq "2" \
+   && test "$ac_python_version_minor" -ge "5" ; then
+  AC_MSG_RESULT([$PYTHON ($ac_python_version)])
+else
+  AC_MSG_RESULT([not found])
+  AC_MSG_FAILURE([found python $ac_python_version ($PYTHON); required >= 2.5])
+fi
 
 dnl===-----------------------------------------------------------------------===
 dnl===
@@ -1204,6 +1336,11 @@ AC_SEARCH_LIBS(dlopen,dl,AC_DEFINE([HAVE_DLOPEN],[1],
                [Define if dlopen() is available on this platform.]),
                AC_MSG_WARN([dlopen() not found - disabling plugin support]))
 
+dnl Search for the clock_gettime() function. Note that we rely on the POSIX
+dnl macros to detect whether clock_gettime is available, this just finds the
+dnl right libraries to link with.
+AC_SEARCH_LIBS(clock_gettime,rt)
+
 dnl libffi is optional; used to call external functions from the interpreter
 if test "$llvm_cv_enable_libffi" = "yes" ; then
   AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
@@ -1356,6 +1493,7 @@ AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
 AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
 AC_CHECK_HEADERS([valgrind/valgrind.h])
 AC_CHECK_HEADERS([fenv.h])
+AC_CHECK_DECLS([FE_ALL_EXCEPT, FE_INEXACT], [], [], [[#include <fenv.h>]])
 if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_HEADERS(pthread.h,
                    AC_SUBST(HAVE_PTHREAD, 1),
@@ -1375,18 +1513,23 @@ AC_CHECK_HEADERS([CrashReporterClient.h])
 dnl Try to find Darwin specific crash reporting global.
 AC_MSG_CHECKING([__crashreporter_info__])
 AC_LINK_IFELSE(
-  AC_LANG_SOURCE(
-    [[extern const char *__crashreporter_info__;
-      int main() {
-        __crashreporter_info__ = "test";
-        return 0;
-      }
-    ]]),
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 1, Can use __crashreporter_info__),
-  AC_MSG_RESULT(no)
-  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 0,
-            Define if __crashreporter_info__ exists.))
+[
+  AC_LANG_SOURCE([[
+    extern const char *__crashreporter_info__;
+    int main() {
+      __crashreporter_info__ = "test";
+      return 0;
+    }
+  ]])
+],
+[
+  AC_MSG_RESULT([yes])
+  AC_DEFINE([HAVE_CRASHREPORTER_INFO], [1], [can use __crashreporter_info__])
+],
+[
+  AC_MSG_RESULT([no])
+  AC_DEFINE([HAVE_CRASHREPORTER_INFO], [0], [can use __crashreporter_info__])
+])
 
 dnl===-----------------------------------------------------------------------===
 dnl===
@@ -1412,6 +1555,7 @@ dnl===-----------------------------------------------------------------------===
 
 AC_CHECK_FUNCS([backtrace ceilf floorf roundf rintf nearbyintf getcwd ])
 AC_CHECK_FUNCS([powf fmodf strtof round ])
+AC_CHECK_FUNCS([log log2 log10 exp exp2])
 AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
 AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
 AC_CHECK_FUNCS([mktemp posix_spawn pread realpath sbrk setrlimit strdup ])
@@ -1449,10 +1593,15 @@ fi
 dnl Check Win32 API EnumerateLoadedModules.
 if test "$llvm_cv_os_type" = "MingW" ; then
   AC_MSG_CHECKING([whether EnumerateLoadedModules() accepts new decl])
-  AC_COMPILE_IFELSE([[#include <windows.h>
-#include <imagehlp.h>
-extern void foo(PENUMLOADED_MODULES_CALLBACK);
-extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));]],
+  AC_COMPILE_IFELSE(
+[
+  AC_LANG_SOURCE([[
+    #include <windows.h>
+    #include <imagehlp.h>
+    extern void foo(PENUMLOADED_MODULES_CALLBACK);
+    extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));
+  ]])
+],
 [
   AC_MSG_RESULT([yes])
   llvm_cv_win32_elmcb_pcstr="PCSTR"
@@ -1493,22 +1642,28 @@ dnl Since we'll be using these atomic builtins in C++ files we should test
 dnl the C++ compiler.
 AC_LANG_PUSH([C++])
 AC_LINK_IFELSE(
-  AC_LANG_SOURCE(
-    [[int main() {
-        volatile unsigned long val = 1;
-        __sync_synchronize();
-        __sync_val_compare_and_swap(&val, 1, 0);
-        __sync_add_and_fetch(&val, 1);
-        __sync_sub_and_fetch(&val, 1);
-        return 0;
-      }
-    ]]),
-  AC_LANG_POP([C++])
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(LLVM_HAS_ATOMICS, 1, Has gcc/MSVC atomic intrinsics),
-  AC_MSG_RESULT(no)
-  AC_DEFINE(LLVM_HAS_ATOMICS, 0, Has gcc/MSVC atomic intrinsics)
-  AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing]))
+[
+  AC_LANG_SOURCE([[
+    int main() {
+      volatile unsigned long val = 1;
+      __sync_synchronize();
+      __sync_val_compare_and_swap(&val, 1, 0);
+      __sync_add_and_fetch(&val, 1);
+      __sync_sub_and_fetch(&val, 1);
+      return 0;
+    }
+  ]])
+],
+[
+  AC_MSG_RESULT([yes])
+  AC_DEFINE([LLVM_HAS_ATOMICS], [1], [Has gcc/MSVC atomic intrinsics])
+],
+[
+  AC_MSG_RESULT([no])
+  AC_DEFINE([LLVM_HAS_ATOMICS], [0], [Has gcc/MSVC atomic intrinsics])
+  AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing])
+])
+AC_LANG_POP([C++])
 
 dnl===-----------------------------------------------------------------------===
 dnl===
diff --git a/autoconf/m4/cxx_flag_check.m4 b/autoconf/m4/cxx_flag_check.m4
index 62454b7147f9..4b0974455015 100644
--- a/autoconf/m4/cxx_flag_check.m4
+++ b/autoconf/m4/cxx_flag_check.m4
@@ -1,2 +1,2 @@
 AC_DEFUN([CXX_FLAG_CHECK],
-  [AC_SUBST($1, `$CXX -Werror $2 -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)])
+  [AC_SUBST($1, `$CXX -Werror patsubst($2, [^-Wno-], [-W]) -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)])
diff --git a/autoconf/m4/func_isinf.m4 b/autoconf/m4/func_isinf.m4
index 22ba81d54d59..40dc48b2b8a8 100644
--- a/autoconf/m4/func_isinf.m4
+++ b/autoconf/m4/func_isinf.m4
@@ -1,34 +1,40 @@
-#
-# This function determins if the isinf function isavailable on this
-# platform.
-#
+dnl
+dnl This function determins if the isinf function isavailable on this
+dnl platform.
+dnl
+
 AC_DEFUN([AC_FUNC_ISINF],[
+
 AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_math_h],   
                     [isinf], [<math.h>],
                     [float f; isinf(f);])
 if test "$ac_cv_func_isinf_in_math_h" = "yes" ; then 
-  AC_DEFINE([HAVE_ISINF_IN_MATH_H],1,[Set to 1 if the isinf function is found in <math.h>])
+  AC_DEFINE([HAVE_ISINF_IN_MATH_H], [1],
+            [Set to 1 if the isinf function is found in <math.h>])
 fi
 
 AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_cmath],    
                     [isinf], [<cmath>],
                     [float f; isinf(f);])
 if test "$ac_cv_func_isinf_in_cmath" = "yes" ; then
-  AC_DEFINE([HAVE_ISINF_IN_CMATH],1,[Set to 1 if the isinf function is found in <cmath>])
+  AC_DEFINE([HAVE_ISINF_IN_CMATH], [1],
+            [Set to 1 if the isinf function is found in <cmath>])
 fi
 
 AC_SINGLE_CXX_CHECK([ac_cv_func_std_isinf_in_cmath],
                     [std::isinf], [<cmath>],
                     [float f; std::isinf(f);])
 if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then 
-  AC_DEFINE([HAVE_STD_ISINF_IN_CMATH],1,[Set to 1 if the std::isinf function is found in <cmath>])
+  AC_DEFINE([HAVE_STD_ISINF_IN_CMATH], [1],
+            [Set to 1 if the std::isinf function is found in <cmath>])
 fi
 
 AC_SINGLE_CXX_CHECK([ac_cv_func_finite_in_ieeefp_h],
                     [finite], [<ieeefp.h>],
                     [float f; finite(f);])
 if test "$ac_cv_func_finite_in_ieeefp_h" = "yes" ; then
-  AC_DEFINE([HAVE_FINITE_IN_IEEEFP_H],1,[Set to 1 if the finite function is found in <ieeefp.h>])
+  AC_DEFINE([HAVE_FINITE_IN_IEEEFP_H], [1],
+            [Set to 1 if the finite function is found in <ieeefp.h>])
 fi
 
 ])
diff --git a/autoconf/m4/huge_val.m4 b/autoconf/m4/huge_val.m4
index 6c9a22eab002..d224d7cb64eb 100644
--- a/autoconf/m4/huge_val.m4
+++ b/autoconf/m4/huge_val.m4
@@ -7,12 +7,10 @@ AC_DEFUN([AC_HUGE_VAL_CHECK],[
     AC_LANG_PUSH([C++])
     ac_save_CXXFLAGS=$CXXFLAGS
     CXXFLAGS="$CXXFLAGS -pedantic"
-    AC_RUN_IFELSE(
-      AC_LANG_PROGRAM(
-        [#include <math.h>],
-        [double x = HUGE_VAL; return x != x; ]),
-      [ac_cv_huge_val_sanity=yes],[ac_cv_huge_val_sanity=no],
-      [ac_cv_huge_val_sanity=yes])
+    AC_RUN_IFELSE([AC_LANG_PROGRAM([[#include <math.h>]],
+                                   [[double x = HUGE_VAL; return x != x;]])],
+                  [ac_cv_huge_val_sanity=yes],[ac_cv_huge_val_sanity=no],
+                  [ac_cv_huge_val_sanity=yes])
     CXXFLAGS=$ac_save_CXXFLAGS
     AC_LANG_POP([C++])
     ])
diff --git a/autoconf/m4/single_cxx_check.m4 b/autoconf/m4/single_cxx_check.m4
index 21efa4bed353..cb4732641825 100644
--- a/autoconf/m4/single_cxx_check.m4
+++ b/autoconf/m4/single_cxx_check.m4
@@ -1,10 +1,16 @@
+dnl
 dnl AC_SINGLE_CXX_CHECK(CACHEVAR, FUNCTION, HEADER, PROGRAM)
-dnl                     $1,       $2,       $3,     $4,     
-dnl 
+dnl                     $1,       $2,       $3,     $4,
+
 AC_DEFUN([AC_SINGLE_CXX_CHECK],
- [AC_CACHE_CHECK([for $2 in $3], [$1],
-  [AC_LANG_PUSH([C++])
-   AC_COMPILE_IFELSE(AC_LANG_PROGRAM([#include $3],[$4]),[$1=yes],[$1=no])
-  AC_LANG_POP([C++])])
- ])
+[
+  AC_CACHE_CHECK([for $2 in $3], [$1],
+  [
+    AC_LANG_PUSH([C++])
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]][$3], [$4])],
+                      [$1][[=yes]],
+                      [$1][[=no]])
+    AC_LANG_POP([C++])
+  ])
+])
 
diff --git a/bindings/python/llvm/common.py b/bindings/python/llvm/common.py
index 0c5fcd03d844..17c22b8ef448 100644
--- a/bindings/python/llvm/common.py
+++ b/bindings/python/llvm/common.py
@@ -12,10 +12,14 @@ from ctypes import c_void_p
 from ctypes import cdll
 
 import ctypes.util
+import platform
+
+# LLVM_VERSION: sync with PACKAGE_VERSION in autoconf/configure.ac and CMakeLists.txt
+#               but leave out the 'svn' suffix.
+LLVM_VERSION = '3.3'
 
 __all__ = [
     'c_object_p',
-    'find_library',
     'get_library',
 ]
 
@@ -87,20 +91,36 @@ class CachedProperty(object):
 
         return value
 
-def find_library():
-    # FIXME should probably have build system define absolute path of shared
-    # library at install time.
-    for lib in ['LLVM-3.1svn', 'libLLVM-3.1svn', 'LLVM', 'libLLVM']:
-        result = ctypes.util.find_library(lib)
-        if result:
-            return result
-
-    return None
-
 def get_library():
     """Obtain a reference to the llvm library."""
-    lib = find_library()
-    if not lib:
-        raise Exception('LLVM shared library not found!')
 
-    return cdll.LoadLibrary(lib)
+    # On Linux, ctypes.cdll.LoadLibrary() respects LD_LIBRARY_PATH
+    # while ctypes.util.find_library() doesn't.
+    # See http://docs.python.org/2/library/ctypes.html#finding-shared-libraries
+    #
+    # To make it possible to run the unit tests without installing the LLVM shared
+    # library into a default linker search path.  Always Try ctypes.cdll.LoadLibrary()
+    # with all possible library names first, then try ctypes.util.find_library().
+
+    names = ['LLVM-' + LLVM_VERSION, 'LLVM-' + LLVM_VERSION + 'svn']
+    t = platform.system()
+    if t == 'Darwin':
+        pfx, ext = 'lib', '.dylib'
+    elif t == 'Windows':
+        pfx, ext = '', '.dll'
+    else:
+        pfx, ext = 'lib', '.so'
+
+    for i in names:
+        try:
+            lib = cdll.LoadLibrary(pfx + i + ext)
+        except OSError:
+            pass
+        else:
+            return lib
+
+    for i in names:
+        t = ctypes.util.find_library(i)
+        if t:
+            return cdll.LoadLibrary(t)
+    raise Exception('LLVM shared library not found!')
diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py
index 5030b989a944..dcef9ac26905 100644
--- a/bindings/python/llvm/disassembler.py
+++ b/bindings/python/llvm/disassembler.py
@@ -31,6 +31,9 @@ __all__ = [
 lib = get_library()
 callbacks = {}
 
+# Constants for set_options
+Option_UseMarkup = 1
+
 class Disassembler(LLVMObject):
     """Represents a disassembler instance.
 
@@ -113,6 +116,10 @@ class Disassembler(LLVMObject):
             address += result
             offset += result
 
+    def set_options(self, options):
+        if not lib.LLVMSetDisasmOptions(self, options):
+            raise Exception('Unable to set all disassembler options in %i' % options)
+
 
 def register_library(library):
     library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
@@ -125,6 +132,10 @@ def register_library(library):
             c_uint64, c_uint64, c_char_p, c_size_t]
     library.LLVMDisasmInstruction.restype = c_size_t
 
+    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
+    library.LLVMSetDisasmOptions.restype = c_int
+
+
 callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
                                  c_int, c_void_p)
 callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py
index 545e8668b6c9..46d12f705626 100644
--- a/bindings/python/llvm/tests/test_disassembler.py
+++ b/bindings/python/llvm/tests/test_disassembler.py
@@ -1,6 +1,6 @@
 from .base import TestBase
 
-from ..disassembler import Disassembler
+from ..disassembler import Disassembler, Option_UseMarkup
 
 class TestDisassembler(TestBase):
     def test_instantiate(self):
@@ -26,3 +26,14 @@ class TestDisassembler(TestBase):
 
         self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127'))
         self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi'))
+
+    def test_set_options(self):
+        sequence = '\x10\x40\x2d\xe9'
+        triple = 'arm-linux-android'
+
+        disassembler = Disassembler(triple)
+        disassembler.set_options(Option_UseMarkup)
+        count, s = disassembler.get_instruction(sequence)
+        print s
+        self.assertEqual(count, 4)
+        self.assertEqual(s, '\tpush\t{<reg:r4>, <reg:lr>}')
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index fcd5dd556676..7cad190c11a0 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -54,6 +54,7 @@ check_include_file(ndir.h HAVE_NDIR_H)
 if( NOT PURE_WINDOWS )
   check_include_file(pthread.h HAVE_PTHREAD_H)
 endif()
+check_include_file(sanitizer/msan_interface.h HAVE_SANITIZER_MSAN_INTERFACE_H)
 check_include_file(setjmp.h HAVE_SETJMP_H)
 check_include_file(signal.h HAVE_SIGNAL_H)
 check_include_file(stdint.h HAVE_STDINT_H)
@@ -79,6 +80,9 @@ check_include_file(utime.h HAVE_UTIME_H)
 check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
 check_include_file(windows.h HAVE_WINDOWS_H)
 check_include_file(fenv.h HAVE_FENV_H)
+check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT)
+check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT)
+
 check_include_file(mach/mach.h HAVE_MACH_MACH_H)
 check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H)
 
@@ -99,6 +103,7 @@ if( NOT PURE_WINDOWS )
     endif()
   endif()
   check_library_exists(dl dlopen "" HAVE_LIBDL)
+  check_library_exists(rt clock_gettime "" HAVE_LIBRT)
 endif()
 
 # function checks
@@ -117,6 +122,12 @@ check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
 check_symbol_exists(ceilf math.h HAVE_CEILF)
 check_symbol_exists(floorf math.h HAVE_FLOORF)
 check_symbol_exists(fmodf math.h HAVE_FMODF)
+check_symbol_exists(log math.h HAVE_LOG)
+check_symbol_exists(log2 math.h HAVE_LOG2)
+check_symbol_exists(log10 math.h HAVE_LOG10)
+check_symbol_exists(exp math.h HAVE_EXP)
+check_symbol_exists(exp2 math.h HAVE_EXP2)
+check_symbol_exists(exp10 math.h HAVE_EXP10)
 if( HAVE_SETJMP_H )
   check_symbol_exists(longjmp setjmp.h HAVE_LONGJMP)
   check_symbol_exists(setjmp setjmp.h HAVE_SETJMP)
@@ -294,10 +305,33 @@ else()
   set(ENABLE_PIC 0)
 endif()
 
+find_package(LibXml2)
+if (LIBXML2_FOUND)
+  set(CLANG_HAVE_LIBXML 1)
+endif ()
+
 include(CheckCXXCompilerFlag)
 
 check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
 
+set(USE_NO_MAYBE_UNINITIALIZED 0)
+set(USE_NO_UNINITIALIZED 0)
+
+# Disable gcc's potentially uninitialized use analysis as it presents lots of
+# false positives.
+if (CMAKE_COMPILER_IS_GNUCXX)
+  check_cxx_compiler_flag("-Wmaybe-uninitialized" HAS_MAYBE_UNINITIALIZED)
+  if (HAS_MAYBE_UNINITIALIZED)
+    set(USE_NO_MAYBE_UNINITIALIZED 1)
+  else()
+    # Only recent versions of gcc make the distinction between -Wuninitialized
+    # and -Wmaybe-uninitialized. If -Wmaybe-uninitialized isn't supported, just
+    # turn off all uninitialized use warnings.
+    check_cxx_compiler_flag("-Wuninitialized" HAS_UNINITIALIZED)
+    set(USE_NO_UNINITIALIZED ${HAS_UNINITIALIZED})
+  endif()
+endif()
+
 include(GetHostTriple)
 get_host_triple(LLVM_HOST_TRIPLE)
 
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 43ee9a08b27f..f0b31ce65385 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -157,12 +157,7 @@ endmacro(add_llvm_external_project)
 
 # Generic support for adding a unittest.
 function(add_unittest test_suite test_name)
-  if (CMAKE_BUILD_TYPE)
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY
-      ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE})
-  else()
-    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-  endif()
+  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
   if( NOT LLVM_BUILD_TESTS )
     set(EXCLUDE_FROM_ALL ON)
   endif()
@@ -239,8 +234,8 @@ function(configure_lit_site_cfg input output)
 
   set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
   set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
-  set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
-  set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/%(build_config)s")
+  set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_mode)s")
+  set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/%(build_mode)s")
   set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
   set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
   set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
@@ -251,8 +246,8 @@ function(configure_lit_site_cfg input output)
     set(ENABLE_ASSERTIONS "0")
   endif()
 
-  set(HOST_OS ${CMAKE_HOST_SYSTEM_NAME})
-  set(HOST_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR})
+  set(HOST_OS ${CMAKE_SYSTEM_NAME})
+  set(HOST_ARCH ${CMAKE_SYSTEM_PROCESSOR})
 
   configure_file(${input} ${output} @ONLY)
 endfunction()
@@ -266,18 +261,23 @@ function(add_lit_target target comment)
   set(LIT_COMMAND
     ${PYTHON_EXECUTABLE}
     ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py
-    --param build_config=${CMAKE_CFG_INTDIR}
-    --param build_mode=${RUNTIME_BUILD_MODE}
+    --param build_mode=${CMAKE_CFG_INTDIR}
     ${LIT_ARGS}
     )
   foreach(param ${ARG_PARAMS})
     list(APPEND LIT_COMMAND --param ${param})
   endforeach()
-  add_custom_target(${target}
-    COMMAND ${LIT_COMMAND} ${ARG_DEFAULT_ARGS}
-    COMMENT "${comment}"
-    )
-  add_dependencies(${target} ${ARG_DEPENDS})
+  if( ARG_DEPENDS )
+    add_custom_target(${target}
+      COMMAND ${LIT_COMMAND} ${ARG_DEFAULT_ARGS}
+      COMMENT "${comment}"
+      )
+    add_dependencies(${target} ${ARG_DEPENDS})
+  else()
+    add_custom_target(${target}
+      COMMAND cmake -E echo "${target} does nothing, no tools built.")
+    message(STATUS "${target} does nothing.")
+  endif()
 endfunction()
 
 # A function to add a set of lit test suites to be driven through 'check-*' targets.
diff --git a/cmake/modules/GetSVN.cmake b/cmake/modules/GetSVN.cmake
new file mode 100644
index 000000000000..acccc12a94e8
--- /dev/null
+++ b/cmake/modules/GetSVN.cmake
@@ -0,0 +1,25 @@
+# CMake project that writes Subversion revision information to a header.
+#
+# Input variables:
+#   FIRST_SOURCE_DIR  - First source directory
+#   FIRST_REPOSITORY  - The macro to define to the first revision number.
+#   SECOND_SOURCE_DIR - Second source directory
+#   SECOND_REPOSITORY - The macro to define to the second revision number.
+#   HEADER_FILE       - The header file to write
+include(FindSubversion)
+if (Subversion_FOUND AND EXISTS "${FIRST_SOURCE_DIR}/.svn")
+  # Repository information for the first repository.
+  Subversion_WC_INFO(${FIRST_SOURCE_DIR} MY)
+  file(WRITE ${HEADER_FILE}.txt "#define ${FIRST_REPOSITORY} \"${MY_WC_REVISION}\"\n")
+
+  # Repository information for the second repository.
+  if (EXISTS "${SECOND_SOURCE_DIR}/.svn")
+    Subversion_WC_INFO(${SECOND_SOURCE_DIR} MY)
+    file(APPEND ${HEADER_FILE}.txt 
+      "#define ${SECOND_REPOSITORY} \"${MY_WC_REVISION}\"\n")
+  endif ()
+
+  # Copy the file only if it has changed.
+  execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
+    ${HEADER_FILE}.txt ${HEADER_FILE})
+endif()
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index b5f96e8f7114..4e59a3e3d906 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -3,6 +3,8 @@
 # selections.
 
 include(AddLLVMDefinitions)
+include(CheckCCompilerFlag)
+include(CheckCXXCompilerFlag)
 
 if( CMAKE_COMPILER_IS_GNUCXX )
   set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
@@ -10,20 +12,6 @@ elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
   set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
 endif()
 
-# Run-time build mode; It is used for unittests.
-if(MSVC_IDE)
-  # Expect "$(Configuration)", "$(OutDir)", etc.
-  # It is expanded by msbuild or similar.
-  set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
-elseif(NOT CMAKE_BUILD_TYPE STREQUAL "")
-  # Expect "Release" "Debug", etc.
-  # Or unittests could not run.
-  set(RUNTIME_BUILD_MODE ${CMAKE_BUILD_TYPE})
-else()
-  # It might be "."
-  set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
-endif()
-
 if( LLVM_ENABLE_ASSERTIONS )
   # MSVC doesn't like _DEBUG on release builds. See PR 4379.
   if( NOT MSVC )
@@ -71,6 +59,39 @@ else(WIN32)
   endif(UNIX)
 endif(WIN32)
 
+function(add_flag_or_print_warning flag)
+  check_c_compiler_flag(${flag} C_SUPPORTS_FLAG)
+  check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG)
+  if (C_SUPPORTS_FLAG AND CXX_SUPPORTS_FLAG)
+    message(STATUS "Building with ${flag}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE)
+  else()
+    message(WARNING "${flag} is not supported.")
+  endif()
+endfunction()
+
+function(append value)
+  foreach(variable ${ARGN})
+    set(${variable} "${${variable}} ${value}" PARENT_SCOPE)
+  endforeach(variable)
+endfunction()
+
+function(append_if condition value)
+  if (${condition})
+    foreach(variable ${ARGN})
+      set(${variable} "${${variable}} ${value}" PARENT_SCOPE)
+    endforeach(variable)
+  endif()
+endfunction()
+
+macro(add_flag_if_supported flag)
+  check_c_compiler_flag(${flag} C_SUPPORTS_FLAG)
+  append_if(C_SUPPORTS_FLAG "${flag}" CMAKE_C_FLAGS)
+  check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG)
+  append_if(CXX_SUPPORTS_FLAG "${flag}" CMAKE_CXX_FLAGS)
+endmacro()
+
 if( LLVM_ENABLE_PIC )
   if( XCODE )
     # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
@@ -79,24 +100,14 @@ if( LLVM_ENABLE_PIC )
   elseif( WIN32 OR CYGWIN)
     # On Windows all code is PIC. MinGW warns if -fPIC is used.
   else()
-    include(CheckCXXCompilerFlag)
-    check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG)
-    if( SUPPORTS_FPIC_FLAG )
-      message(STATUS "Building with -fPIC")
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
-    else( SUPPORTS_FPIC_FLAG )
-      message(WARNING "-fPIC not supported.")
-    endif()
+    add_flag_or_print_warning("-fPIC")
 
     if( WIN32 OR CYGWIN)
       # MinGW warns if -fvisibility-inlines-hidden is used.
     else()
       check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG)
-      if( SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG )
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden")
-      endif()
-     endif()
+      append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS)
+    endif()
   endif()
 endif()
 
@@ -168,6 +179,7 @@ if( MSVC )
     -wd4551 # Suppress 'function call missing argument list'
     -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
     -wd4715 # Suppress ''function' : not all control paths return a value'
+    -wd4722 # Suppress ''function' : destructor never returns, potential memory leak'
     -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
 
     # Promoted warnings.
@@ -175,7 +187,6 @@ if( MSVC )
 
     # Promoted warnings to errors.
     -we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error.
-    -we4239 # Promote 'nonstandard extension used : 'token' : conversion from 'type' to 'type'' to error.
     )
 
   # Enable warnings
@@ -190,20 +201,67 @@ if( MSVC )
   endif (LLVM_ENABLE_WERROR)
 elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
   if (LLVM_ENABLE_WARNINGS)
-    add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings )
-    if (LLVM_ENABLE_PEDANTIC)
-      add_llvm_definitions( -pedantic -Wno-long-long )
-    endif (LLVM_ENABLE_PEDANTIC)
-    check_cxx_compiler_flag("-Werror -Wcovered-switch-default" SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
-    if( SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG )
-      add_llvm_definitions( -Wcovered-switch-default )
+    append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+
+    # Turn off missing field initializer warnings for gcc to avoid noise from
+    # false positives with empty {}. Turn them on otherwise (they're off by
+    # default for clang).
+    check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
+    if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
+      if (CMAKE_COMPILER_IS_GNUCXX)
+        append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+      else()
+        append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+      endif()
     endif()
+
+    append_if(LLVM_ENABLE_PEDANTIC "-pedantic -Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    check_cxx_compiler_flag("-Werror -Wcovered-switch-default" CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
+    append_if(CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_CXX_FLAGS)
+    check_c_compiler_flag("-Werror -Wcovered-switch-default" C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
+    append_if(C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_C_FLAGS)
+    append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
+    append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
+    check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor" CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG)
+    append_if(CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
   endif (LLVM_ENABLE_WARNINGS)
   if (LLVM_ENABLE_WERROR)
     add_llvm_definitions( -Werror )
   endif (LLVM_ENABLE_WERROR)
 endif( MSVC )
 
+macro(append_common_sanitizer_flags)
+  # Append -fno-omit-frame-pointer and turn on debug info to get better
+  # stack traces.
+  add_flag_if_supported("-fno-omit-frame-pointer")
+  if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND
+      NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO")
+    add_flag_if_supported("-gline-tables-only")
+  endif()
+endmacro()
+
+# Turn on sanitizers if necessary.
+if(LLVM_USE_SANITIZER)
+  if (LLVM_ON_UNIX)
+    if (LLVM_USE_SANITIZER STREQUAL "Address")
+      append_common_sanitizer_flags()
+      add_flag_or_print_warning("-fsanitize=address")
+    elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?")
+      append_common_sanitizer_flags()
+      add_flag_or_print_warning("-fsanitize=memory")
+      # -pie is required for MSan.
+      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie")
+      if(LLVM_USE_SANITIZER STREQUAL "MemoryWithOrigins")
+        add_flag_or_print_warning("-fsanitize-memory-track-origins")
+      endif()
+    else()
+      message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}")
+    endif()
+  else()
+    message(WARNING "LLVM_USE_SANITIZER is not supported on this platform.")
+  endif()
+endif()
+
 add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
 add_llvm_definitions( -D__STDC_FORMAT_MACROS )
 add_llvm_definitions( -D__STDC_LIMIT_MACROS )
diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake
index 574335c49d0e..163401c857c5 100755
--- a/cmake/modules/LLVM-Config.cmake
+++ b/cmake/modules/LLVM-Config.cmake
@@ -4,11 +4,14 @@ function(get_system_libs return_var)
     if( MINGW )
       set(system_libs ${system_libs} imagehlp psapi)
     elseif( CMAKE_HOST_UNIX )
+      if( HAVE_LIBRT )
+        set(system_libs ${system_libs} rt)
+      endif()
       if( HAVE_LIBDL )
-	set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
+        set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
       endif()
       if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
-	set(system_libs ${system_libs} pthread)
+        set(system_libs ${system_libs} pthread)
       endif()
     endif( MINGW )
   endif( NOT MSVC )
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index d6a2ae5f45f5..26314d4126d2 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -20,49 +20,51 @@ function(add_version_info_from_vcs VERS)
   elseif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git )
     set(result "${result}git")
     # Try to get a ref-id
-    find_program(git_executable NAMES git git.exe git.cmd)
-    if( git_executable )
-      set(is_git_svn_rev_exact false)
-      execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
-                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                      TIMEOUT 5
-                      RESULT_VARIABLE git_result
-                      OUTPUT_VARIABLE git_output)
-      if( git_result EQUAL 0 )
-        string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
-        string(LENGTH "${git_svn_rev}" rev_length)
-        math(EXPR rev_length "${rev_length}-1")
-        string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number)
-        set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
-        set(git_svn_rev "-svn-${git_svn_rev}")
-
-        # Determine if the HEAD points directly at a subversion revision.
-        execute_process(COMMAND ${git_executable} svn find-rev HEAD
-                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                        TIMEOUT 5
-                        RESULT_VARIABLE git_result
-                        OUTPUT_VARIABLE git_output)
+    if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git/svn )
+      find_program(git_executable NAMES git git.exe git.cmd)
+      if( git_executable )
+        set(is_git_svn_rev_exact false)
+        execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
+          WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+          TIMEOUT 5
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output)
         if( git_result EQUAL 0 )
-          string(STRIP "${git_output}" git_head_svn_rev_number)
-          if( git_head_svn_rev_number EQUAL git_svn_rev_number )
-            set(is_git_svn_rev_exact true)
+          string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
+          string(LENGTH "${git_svn_rev}" rev_length)
+          math(EXPR rev_length "${rev_length}-1")
+          string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number)
+          set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
+          set(git_svn_rev "-svn-${git_svn_rev}")
+
+          # Determine if the HEAD points directly at a subversion revision.
+          execute_process(COMMAND ${git_executable} svn find-rev HEAD
+            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+            TIMEOUT 5
+            RESULT_VARIABLE git_result
+            OUTPUT_VARIABLE git_output)
+          if( git_result EQUAL 0 )
+            string(STRIP "${git_output}" git_head_svn_rev_number)
+            if( git_head_svn_rev_number EQUAL git_svn_rev_number )
+              set(is_git_svn_rev_exact true)
+            endif()
           endif()
+        else()
+          set(git_svn_rev "")
+        endif()
+        execute_process(COMMAND
+          ${git_executable} rev-parse --short HEAD
+          WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+          TIMEOUT 5
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output)
+        if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
+          string(STRIP "${git_output}" git_ref_id)
+          set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
+          set(result "${result}${git_svn_rev}-${git_ref_id}")
+        else()
+          set(result "${result}${git_svn_rev}")
         endif()
-      else()
-        set(git_svn_rev "")
-      endif()
-      execute_process(COMMAND
-                      ${git_executable} rev-parse --short HEAD
-                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                      TIMEOUT 5
-                      RESULT_VARIABLE git_result
-                      OUTPUT_VARIABLE git_output)
-      if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
-        string(STRIP "${git_output}" git_ref_id)
-        set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
-        set(result "${result}${git_svn_rev}-${git_ref_id}")
-      else()
-        set(result "${result}${git_svn_rev}")
       endif()
     endif()
   endif()
diff --git a/configure b/configure
index 4fa070549196..e70b13100a70 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.2svn.
+# Generated by GNU Autoconf 2.60 for LLVM 3.3svn.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@@ -9,7 +9,7 @@
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
+# Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
 ## --------------------- ##
 ## M4sh Initialization.  ##
 ## --------------------- ##
@@ -561,11 +561,11 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.2svn'
-PACKAGE_STRING='LLVM 3.2svn'
+PACKAGE_VERSION='3.3svn'
+PACKAGE_STRING='LLVM 3.3svn'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'
 
-ac_unique_file="lib/VMCore/Module.cpp"
+ac_unique_file="lib/IR/Module.cpp"
 # Factoring default headers for most tests.
 ac_includes_default="\
 #include <stdio.h>
@@ -685,6 +685,9 @@ BUILD_CXX
 CVSBUILD
 ENABLE_LIBCPP
 ENABLE_CXX11
+ENABLE_CLANG_ARCMT
+ENABLE_CLANG_STATIC_ANALYZER
+ENABLE_CLANG_REWRITER
 ENABLE_OPTIMIZED
 ENABLE_PROFILING
 DISABLE_ASSERTIONS
@@ -764,6 +767,9 @@ LIBADD_DL
 NO_VARIADIC_MACROS
 NO_MISSING_FIELD_INITIALIZERS
 COVERED_SWITCH_DEFAULT
+NO_MAYBE_UNINITIALIZED
+NO_UNINITIALIZED
+PYTHON
 USE_UDIS86
 USE_OPROFILE
 USE_INTEL_JITEVENTS
@@ -1321,7 +1327,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures LLVM 3.2svn to adapt to many kinds of systems.
+\`configure' configures LLVM 3.3svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1387,7 +1393,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.2svn:";;
+     short | recursive ) echo "Configuration of LLVM 3.3svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1397,6 +1403,11 @@ Optional Features:
   --enable-polly          Use polly if available (default is YES)
   --enable-libcpp         Use libc++ if available (default is NO)
   --enable-cxx11          Use c++11 if available (default is NO)
+  --enable-clang-arcmt    Enable building of clang ARCMT (default is YES)
+  --enable-clang-static-analyzer
+                          Enable building of clang Static Analyzer (default is
+                          YES)
+  --enable-clang-rewriter Enable building of clang rewriter (default is YES)
   --enable-optimized      Compile with optimizations enabled (default is NO)
   --enable-profiling      Compile with profiling enabled (default is NO)
   --enable-assertions     Compile with assertion checks enabled (default is
@@ -1427,7 +1438,7 @@ Optional Features:
                           YES)
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
-                          x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+                          x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
                           xcore, msp430, nvptx, and cpp (default=all)
   --enable-experimental-targets
                           Build experimental host targets: disable or
@@ -1459,6 +1470,7 @@ Optional Packages:
   --with-bug-report-url   Specify the URL where bug reports should be
                           submitted (default=http://llvm.org/bugs/)
   --with-internal-prefix  Installation directory for internal files
+  --with-python           path to python
   --with-udis86=<path>    Use udis86 external x86 disassembler library
   --with-oprofile=<prefix>
                           Tell OProfile >= 0.9.4 how to symbolize JIT output
@@ -1540,7 +1552,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-LLVM configure 3.2svn
+LLVM configure 3.3svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1548,7 +1560,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
 _ACEOF
   exit
 fi
@@ -1556,7 +1568,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by LLVM $as_me 3.2svn, which was
+It was created by LLVM $as_me 3.3svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -1916,11 +1928,11 @@ _ACEOF
 
 
 cat >>confdefs.h <<\_ACEOF
-#define LLVM_VERSION_MINOR 2
+#define LLVM_VERSION_MINOR 3
 _ACEOF
 
 
-LLVM_COPYRIGHT="Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign."
+LLVM_COPYRIGHT="Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."
 
 
 
@@ -1968,6 +1980,9 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;}
   fi
 fi
 
+${CFLAGS=}
+${CXXFLAGS=}
+
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -3463,6 +3478,98 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
+if test "$CXX" = "clang++" ; then
+  { echo "$as_me:$LINENO: checking whether clang works" >&5
+echo $ECHO_N "checking whether clang works... $ECHO_C" >&6; }
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <limits>
+#if __has_include (<cxxabi.h>)
+#include <cxxabi.h>
+#endif
+#if __has_include (<unwind.h>)
+#include <unwind.h>
+#endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+  { { echo "$as_me:$LINENO: error: Selected compiler could not find or parse C++ standard library headers.  Rerun with CC=c-compiler CXX=c++-compiler ./configure ..." >&5
+echo "$as_me: error: Selected compiler could not find or parse C++ standard library headers.  Rerun with CC=c-compiler CXX=c++-compiler ./configure ..." >&2;}
+   { (exit 1); exit 1; }; }
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+
 
 
 if test -d ${srcdir}/projects/llvm-gcc ; then
@@ -3904,6 +4011,7 @@ else
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -3937,6 +4045,7 @@ case $host in
   sparc*-*)               host_arch="Sparc" ;;
   powerpc*-*)             host_arch="PowerPC" ;;
   arm*-*)                 host_arch="ARM" ;;
+  aarch64*-*)             host_arch="AArch64" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -5047,6 +5156,77 @@ echo "$as_me: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\""
    { (exit 1); exit 1; }; } ;;
 esac
 
+clang_arcmt="yes"
+# Check whether --enable-clang-arcmt was given.
+if test "${enable_clang_arcmt+set}" = set; then
+  enableval=$enable_clang_arcmt; clang_arcmt="$enableval"
+else
+  enableval="yes"
+fi
+
+case "$enableval" in
+  yes) ENABLE_CLANG_ARCMT=1
+ ;;
+  no)  ENABLE_CLANG_ARCMT=0
+ ;;
+  default) ENABLE_CLANG_ARCMT=1
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-clang-arcmt. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-clang-arcmt. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+clang_static_analyzer="yes"
+# Check whether --enable-clang-static-analyzer was given.
+if test "${enable_clang_static_analyzer+set}" = set; then
+  enableval=$enable_clang_static_analyzer; clang_static_analyzer="$enableval"
+else
+  enableval="yes"
+fi
+
+case "$enableval" in
+  yes) ENABLE_CLANG_STATIC_ANALYZER=1
+ ;;
+  no)  ENABLE_CLANG_STATIC_ANALYZER=0
+ ;;
+  default) ENABLE_CLANG_STATIC_ANALYZER=1
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-clang-static-analyzer. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-clang-static-analyzer. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-clang-rewriter was given.
+if test "${enable_clang_rewriter+set}" = set; then
+  enableval=$enable_clang_rewriter;
+else
+  enableval="yes"
+fi
+
+case "$enableval" in
+  yes) ENABLE_CLANG_REWRITER=1
+ ;;
+  no)
+    if test ${clang_arcmt} != "no" ; then
+      { { echo "$as_me:$LINENO: error: Cannot enable clang ARC Migration Tool while disabling rewriter." >&5
+echo "$as_me: error: Cannot enable clang ARC Migration Tool while disabling rewriter." >&2;}
+   { (exit 1); exit 1; }; }
+    fi
+    if test ${clang_static_analyzer} != "no" ; then
+      { { echo "$as_me:$LINENO: error: Cannot enable clang static analyzer while disabling rewriter." >&5
+echo "$as_me: error: Cannot enable clang static analyzer while disabling rewriter." >&2;}
+   { (exit 1); exit 1; }; }
+    fi
+    ENABLE_CLANG_REWRITER=0
+
+    ;;
+  default) ENABLE_CLANG_REWRITER=1
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-clang-rewriter. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-clang-rewriter. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-optimized was given.
 if test "${enable_optimized+set}" = set; then
   enableval=$enable_optimized;
@@ -5198,6 +5378,8 @@ else
  ;;
     ARM)         TARGET_HAS_JIT=1
  ;;
+    AArch64)     TARGET_HAS_JIT=0
+ ;;
     Mips)        TARGET_HAS_JIT=1
  ;;
     XCore)       TARGET_HAS_JIT=0
@@ -5419,19 +5601,19 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mipsel)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mips64)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
-        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
@@ -5446,7 +5628,6 @@ case "$enableval" in
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
-            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -10316,7 +10497,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10317 "configure"
+#line 10500 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12007,9 +12188,17 @@ fi
 if test "$GCC" = "yes"
 then
   cat >conftest.$ac_ext <<_ACEOF
-#if !defined(__GNUC__) || __GNUC__ < 3
-#error Unsupported GCC version
-#endif
+
+  /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+    #if !defined(__GNUC__) || __GNUC__ < 3
+    #error Unsupported GCC version
+    #endif
+
 
 _ACEOF
 rm -f conftest.$ac_objext
@@ -12071,14 +12260,112 @@ echo "${ECHO_T}ok" >&6; }
 
 { echo "$as_me:$LINENO: checking optional compiler flags" >&5
 echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; }
-NO_VARIADIC_MACROS=`$CXX -Werror -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
 
-NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
+NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
 
 COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default`
 
-{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&5
-echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&6; }
+
+NO_UNINITIALIZED=
+NO_MAYBE_UNINITIALIZED=
+if test "$GXX" = "yes"
+then
+  NO_MAYBE_UNINITIALIZED=`$CXX -Werror -Wmaybe-uninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-maybe-uninitialized`
+
+        if test -z "$NO_MAYBE_UNINITIALIZED"
+  then
+    NO_UNINITIALIZED=`$CXX -Werror -Wuninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-uninitialized`
+
+  fi
+fi
+{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&5
+echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&6; }
+
+
+# Check whether --with-python was given.
+if test "${with_python+set}" = set; then
+  withval=$with_python; PYTHON="$withval"
+fi
+
+
+if test -n "$PYTHON" && test -x "$PYTHON" ; then
+  { echo "$as_me:$LINENO: checking for python" >&5
+echo $ECHO_N "checking for python... $ECHO_C" >&6; }
+  { echo "$as_me:$LINENO: result: user defined: $with_python" >&5
+echo "${ECHO_T}user defined: $with_python" >&6; }
+else
+  if test -n "$PYTHON" ; then
+    { echo "$as_me:$LINENO: WARNING: specified python ($PYTHON) is not usable, searching path" >&5
+echo "$as_me: WARNING: specified python ($PYTHON) is not usable, searching path" >&2;}
+  fi
+
+  # Extract the first word of "python python2 python26", so it can be a program name with args.
+set dummy python python2 python26; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_PYTHON+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $PYTHON in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PYTHON="$PYTHON" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_PYTHON="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_PYTHON" && ac_cv_path_PYTHON="{ echo "$as_me:$LINENO: result: not found" >&5
+echo "${ECHO_T}not found" >&6; }
+                { { echo "$as_me:$LINENO: error: could not find python 2.5 or higher" >&5
+echo "$as_me: error: could not find python 2.5 or higher" >&2;}
+   { (exit 1); exit 1; }; }"
+  ;;
+esac
+fi
+PYTHON=$ac_cv_path_PYTHON
+if test -n "$PYTHON"; then
+  { echo "$as_me:$LINENO: result: $PYTHON" >&5
+echo "${ECHO_T}$PYTHON" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+
+{ echo "$as_me:$LINENO: checking for python >= 2.5" >&5
+echo $ECHO_N "checking for python >= 2.5... $ECHO_C" >&6; }
+ac_python_version=`$PYTHON -c 'import sys; print sys.version.split()[0]'`
+ac_python_version_major=`echo $ac_python_version | cut -d'.' -f1`
+ac_python_version_minor=`echo $ac_python_version | cut -d'.' -f2`
+ac_python_version_patch=`echo $ac_python_version | cut -d'.' -f3`
+if   test "$ac_python_version_major" -eq "2" \
+   && test "$ac_python_version_minor" -ge "5" ; then
+  { echo "$as_me:$LINENO: result: $PYTHON ($ac_python_version)" >&5
+echo "${ECHO_T}$PYTHON ($ac_python_version)" >&6; }
+else
+  { echo "$as_me:$LINENO: result: not found" >&5
+echo "${ECHO_T}not found" >&6; }
+  { { echo "$as_me:$LINENO: error: found python $ac_python_version ($PYTHON); required >= 2.5
+See \`config.log' for more details." >&5
+echo "$as_me: error: found python $ac_python_version ($PYTHON); required >= 2.5
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
 
 
 
@@ -12440,6 +12727,106 @@ echo "$as_me: WARNING: dlopen() not found - disabling plugin support" >&2;}
 fi
 
 
+{ echo "$as_me:$LINENO: checking for library containing clock_gettime" >&5
+echo $ECHO_N "checking for library containing clock_gettime... $ECHO_C" >&6; }
+if test "${ac_cv_search_clock_gettime+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_clock_gettime=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_clock_gettime+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_clock_gettime+set}" = set; then
+  :
+else
+  ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_clock_gettime" >&5
+echo "${ECHO_T}$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+
 if test "$llvm_cv_enable_libffi" = "yes" ; then
   { echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
 echo $ECHO_N "checking for library containing ffi_call... $ECHO_C" >&6; }
@@ -15557,6 +15944,178 @@ fi
 
 done
 
+{ echo "$as_me:$LINENO: checking whether FE_ALL_EXCEPT is declared" >&5
+echo $ECHO_N "checking whether FE_ALL_EXCEPT is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_FE_ALL_EXCEPT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <fenv.h>
+
+int
+main ()
+{
+#ifndef FE_ALL_EXCEPT
+  char *p = (char *) FE_ALL_EXCEPT;
+  return !p;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_have_decl_FE_ALL_EXCEPT=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_have_decl_FE_ALL_EXCEPT=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_ALL_EXCEPT" >&5
+echo "${ECHO_T}$ac_cv_have_decl_FE_ALL_EXCEPT" >&6; }
+if test $ac_cv_have_decl_FE_ALL_EXCEPT = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_ALL_EXCEPT 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_ALL_EXCEPT 0
+_ACEOF
+
+
+fi
+{ echo "$as_me:$LINENO: checking whether FE_INEXACT is declared" >&5
+echo $ECHO_N "checking whether FE_INEXACT is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_FE_INEXACT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <fenv.h>
+
+int
+main ()
+{
+#ifndef FE_INEXACT
+  char *p = (char *) FE_INEXACT;
+  return !p;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_have_decl_FE_INEXACT=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_have_decl_FE_INEXACT=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_INEXACT" >&5
+echo "${ECHO_T}$ac_cv_have_decl_FE_INEXACT" >&6; }
+if test $ac_cv_have_decl_FE_INEXACT = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_INEXACT 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_FE_INEXACT 0
+_ACEOF
+
+
+fi
+
+
 if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 for ac_header in pthread.h
@@ -16082,16 +16641,19 @@ done
 { echo "$as_me:$LINENO: checking __crashreporter_info__" >&5
 echo $ECHO_N "checking __crashreporter_info__... $ECHO_C" >&6; }
 cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
+
+  /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-extern const char *__crashreporter_info__;
-      int main() {
-        __crashreporter_info__ = "test";
-        return 0;
-      }
+
+    extern const char *__crashreporter_info__;
+    int main() {
+      __crashreporter_info__ = "test";
+      return 0;
+    }
+
 
 _ACEOF
 rm -f conftest.$ac_objext conftest$ac_exeext
@@ -16128,6 +16690,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
+
   { echo "$as_me:$LINENO: result: yes" >&5
 echo "${ECHO_T}yes" >&6; }
 
@@ -16135,17 +16698,20 @@ cat >>confdefs.h <<\_ACEOF
 #define HAVE_CRASHREPORTER_INFO 1
 _ACEOF
 
+
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	{ echo "$as_me:$LINENO: result: no" >&5
+
+  { echo "$as_me:$LINENO: result: no" >&5
 echo "${ECHO_T}no" >&6; }
 
 cat >>confdefs.h <<\_ACEOF
 #define HAVE_CRASHREPORTER_INFO 0
 _ACEOF
 
+
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
@@ -16153,7 +16719,6 @@ rm -f core conftest.err conftest.$ac_objext \
 
 
 
-
   { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5
 echo $ECHO_N "checking for HUGE_VAL sanity... $ECHO_C" >&6; }
 if test "${ac_cv_huge_val_sanity+set}" = set; then
@@ -16956,6 +17521,120 @@ done
 
 
 
+for ac_func in log log2 log10 exp exp2
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
 for ac_func in getpagesize getrusage getrlimit setrlimit gettimeofday
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
@@ -17728,18 +18407,20 @@ _ACEOF
  fi
 
 
-{ echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
+
+  { echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
 echo $ECHO_N "checking for srand48/lrand48/drand48 in <stdlib.h>... $ECHO_C" >&6; }
 if test "${ac_cv_func_rand48+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -17797,12 +18478,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5
 echo "${ECHO_T}$ac_cv_func_rand48" >&6; }
@@ -19272,10 +19954,19 @@ if test "$llvm_cv_os_type" = "MingW" ; then
   { echo "$as_me:$LINENO: checking whether EnumerateLoadedModules() accepts new decl" >&5
 echo $ECHO_N "checking whether EnumerateLoadedModules() accepts new decl... $ECHO_C" >&6; }
   cat >conftest.$ac_ext <<_ACEOF
-#include <windows.h>
-#include <imagehlp.h>
-extern void foo(PENUMLOADED_MODULES_CALLBACK);
-extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));
+
+  /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+    #include <windows.h>
+    #include <imagehlp.h>
+    extern void foo(PENUMLOADED_MODULES_CALLBACK);
+    extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));
+
+
 _ACEOF
 rm -f conftest.$ac_objext
 if { (ac_try="$ac_compile"
@@ -19336,18 +20027,20 @@ _ACEOF
 fi
 
 
-{ echo "$as_me:$LINENO: checking for isnan in <math.h>" >&5
+
+  { echo "$as_me:$LINENO: checking for isnan in <math.h>" >&5
 echo $ECHO_N "checking for isnan in <math.h>... $ECHO_C" >&6; }
 if test "${ac_cv_func_isnan_in_math_h+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19405,12 +20098,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_math_h" >&5
 echo "${ECHO_T}$ac_cv_func_isnan_in_math_h" >&6; }
@@ -19424,18 +20118,20 @@ _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for isnan in <cmath>" >&5
+
+  { echo "$as_me:$LINENO: checking for isnan in <cmath>" >&5
 echo $ECHO_N "checking for isnan in <cmath>... $ECHO_C" >&6; }
 if test "${ac_cv_func_isnan_in_cmath+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19493,12 +20189,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_cmath" >&5
 echo "${ECHO_T}$ac_cv_func_isnan_in_cmath" >&6; }
@@ -19511,18 +20208,20 @@ _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for std::isnan in <cmath>" >&5
+
+  { echo "$as_me:$LINENO: checking for std::isnan in <cmath>" >&5
 echo $ECHO_N "checking for std::isnan in <cmath>... $ECHO_C" >&6; }
 if test "${ac_cv_func_std_isnan_in_cmath+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19580,12 +20279,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_std_isnan_in_cmath" >&5
 echo "${ECHO_T}$ac_cv_func_std_isnan_in_cmath" >&6; }
@@ -19599,18 +20299,21 @@ _ACEOF
 fi
 
 
-{ echo "$as_me:$LINENO: checking for isinf in <math.h>" >&5
+
+
+  { echo "$as_me:$LINENO: checking for isinf in <math.h>" >&5
 echo $ECHO_N "checking for isinf in <math.h>... $ECHO_C" >&6; }
 if test "${ac_cv_func_isinf_in_math_h+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19668,12 +20371,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_math_h" >&5
 echo "${ECHO_T}$ac_cv_func_isinf_in_math_h" >&6; }
@@ -19686,18 +20390,20 @@ _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for isinf in <cmath>" >&5
+
+  { echo "$as_me:$LINENO: checking for isinf in <cmath>" >&5
 echo $ECHO_N "checking for isinf in <cmath>... $ECHO_C" >&6; }
 if test "${ac_cv_func_isinf_in_cmath+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19755,12 +20461,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_cmath" >&5
 echo "${ECHO_T}$ac_cv_func_isinf_in_cmath" >&6; }
@@ -19773,18 +20480,20 @@ _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for std::isinf in <cmath>" >&5
+
+  { echo "$as_me:$LINENO: checking for std::isinf in <cmath>" >&5
 echo $ECHO_N "checking for std::isinf in <cmath>... $ECHO_C" >&6; }
 if test "${ac_cv_func_std_isinf_in_cmath+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19842,12 +20551,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_std_isinf_in_cmath" >&5
 echo "${ECHO_T}$ac_cv_func_std_isinf_in_cmath" >&6; }
@@ -19860,18 +20570,20 @@ _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for finite in <ieeefp.h>" >&5
+
+  { echo "$as_me:$LINENO: checking for finite in <ieeefp.h>" >&5
 echo $ECHO_N "checking for finite in <ieeefp.h>... $ECHO_C" >&6; }
 if test "${ac_cv_func_finite_in_ieeefp_h+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
 else
-  ac_ext=cpp
+
+    ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
-   cat >conftest.$ac_ext <<_ACEOF
+    cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
@@ -19929,12 +20641,13 @@ sed 's/^/| /' conftest.$ac_ext >&5
 fi
 
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  ac_ext=c
+    ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
+
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_finite_in_ieeefp_h" >&5
 echo "${ECHO_T}$ac_cv_func_finite_in_ieeefp_h" >&6; }
@@ -20551,19 +21264,22 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
 cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
+
+  /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-int main() {
-        volatile unsigned long val = 1;
-        __sync_synchronize();
-        __sync_val_compare_and_swap(&val, 1, 0);
-        __sync_add_and_fetch(&val, 1);
-        __sync_sub_and_fetch(&val, 1);
-        return 0;
-      }
+
+    int main() {
+      volatile unsigned long val = 1;
+      __sync_synchronize();
+      __sync_val_compare_and_swap(&val, 1, 0);
+      __sync_add_and_fetch(&val, 1);
+      __sync_sub_and_fetch(&val, 1);
+      return 0;
+    }
+
 
 _ACEOF
 rm -f conftest.$ac_objext conftest$ac_exeext
@@ -20580,7 +21296,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
   { (case "(($ac_try" in
   *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
   *) ac_try_echo=$ac_try;;
@@ -20600,11 +21316,6 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   ac_status=$?
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
   (exit $ac_status); }; }; then
-  ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
   { echo "$as_me:$LINENO: result: yes" >&5
 echo "${ECHO_T}yes" >&6; }
@@ -20613,11 +21324,13 @@ cat >>confdefs.h <<\_ACEOF
 #define LLVM_HAS_ATOMICS 1
 _ACEOF
 
+
 else
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
-	{ echo "$as_me:$LINENO: result: no" >&5
+
+  { echo "$as_me:$LINENO: result: no" >&5
 echo "${ECHO_T}no" >&6; }
 
 cat >>confdefs.h <<\_ACEOF
@@ -20626,10 +21339,17 @@ _ACEOF
 
   { echo "$as_me:$LINENO: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&5
 echo "$as_me: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&2;}
+
 fi
 
 rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
 
 
 if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
@@ -21574,7 +22294,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.2svn, which was
+This file was extended by LLVM $as_me 3.3svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21627,7 +22347,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.2svn
+LLVM config.status 3.3svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -21912,6 +22632,9 @@ BUILD_CXX!$BUILD_CXX$ac_delim
 CVSBUILD!$CVSBUILD$ac_delim
 ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim
 ENABLE_CXX11!$ENABLE_CXX11$ac_delim
+ENABLE_CLANG_ARCMT!$ENABLE_CLANG_ARCMT$ac_delim
+ENABLE_CLANG_STATIC_ANALYZER!$ENABLE_CLANG_STATIC_ANALYZER$ac_delim
+ENABLE_CLANG_REWRITER!$ENABLE_CLANG_REWRITER$ac_delim
 ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
 ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
 DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
@@ -21923,9 +22646,6 @@ DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
 KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
-ENABLE_DOCS!$ENABLE_DOCS$ac_delim
-ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
-LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -21967,6 +22687,9 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+ENABLE_DOCS!$ENABLE_DOCS$ac_delim
+ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
+LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
 ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
@@ -22032,6 +22755,9 @@ LIBADD_DL!$LIBADD_DL$ac_delim
 NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim
 NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
 COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim
+NO_MAYBE_UNINITIALIZED!$NO_MAYBE_UNINITIALIZED$ac_delim
+NO_UNINITIALIZED!$NO_UNINITIALIZED$ac_delim
+PYTHON!$PYTHON$ac_delim
 USE_UDIS86!$USE_UDIS86$ac_delim
 USE_OPROFILE!$USE_OPROFILE$ac_delim
 USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
@@ -22058,13 +22784,54 @@ ALL_BINDINGS!$ALL_BINDINGS$ac_delim
 OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
 ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
 RPATH!$RPATH$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+CEOF$ac_eof
+_ACEOF
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
 RDYNAMIC!$RDYNAMIC$ac_delim
 program_prefix!$program_prefix$ac_delim
 LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 4; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
@@ -22082,7 +22849,7 @@ if test -n "$ac_eof"; then
 fi
 
 cat >>$CONFIG_STATUS <<_ACEOF
-cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof
+cat >"\$tmp/subs-3.sed" <<\CEOF$ac_eof
 /@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
 _ACEOF
 sed '
@@ -22345,7 +23112,7 @@ s&@abs_builddir@&$ac_abs_builddir&;t t
 s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
 s&@INSTALL@&$ac_INSTALL&;t t
 $ac_datarootdir_hack
-" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" | sed -f "$tmp/subs-3.sed" >$tmp/out
 
 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
   { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst
index fdaec89cdf6b..712d57d14b12 100644
--- a/docs/AliasAnalysis.rst
+++ b/docs/AliasAnalysis.rst
@@ -1,5 +1,3 @@
-.. _alias_analysis:
-
 ==================================
 LLVM Alias Analysis Infrastructure
 ==================================
@@ -205,7 +203,7 @@ look at the `various alias analysis implementations`_ included with LLVM.
 Different Pass styles
 ---------------------
 
-The first step to determining what type of `LLVM pass <WritingAnLLVMPass.html>`_
+The first step to determining what type of :doc:`LLVM pass <WritingAnLLVMPass>`
 you need to use for your Alias Analysis.  As is the case with most other
 analyses and transformations, the answer should be fairly obvious from what type
 of problem you are trying to solve:
@@ -253,25 +251,24 @@ Interfaces which may be specified
 
 All of the `AliasAnalysis
 <http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`__ virtual methods
-default to providing `chaining`_ to another alias analysis implementation, which
-ends up returning conservatively correct information (returning "May" Alias and
-"Mod/Ref" for alias and mod/ref queries respectively).  Depending on the
-capabilities of the analysis you are implementing, you just override the
-interfaces you can improve.
+default to providing :ref:`chaining <aliasanalysis-chaining>` to another alias
+analysis implementation, which ends up returning conservatively correct
+information (returning "May" Alias and "Mod/Ref" for alias and mod/ref queries
+respectively).  Depending on the capabilities of the analysis you are
+implementing, you just override the interfaces you can improve.
 
-.. _chaining:
-.. _chain:
+.. _aliasanalysis-chaining:
 
 ``AliasAnalysis`` chaining behavior
 -----------------------------------
 
-With only one special exception (the `no-aa`_ pass) every alias analysis pass
-chains to another alias analysis implementation (for example, the user can
-specify "``-basicaa -ds-aa -licm``" to get the maximum benefit from both alias
-analyses).  The alias analysis class automatically takes care of most of this
-for methods that you don't override.  For methods that you do override, in code
-paths that return a conservative MayAlias or Mod/Ref result, simply return
-whatever the superclass computes.  For example:
+With only one special exception (the :ref:`-no-aa <aliasanalysis-no-aa>` pass)
+every alias analysis pass chains to another alias analysis implementation (for
+example, the user can specify "``-basicaa -ds-aa -licm``" to get the maximum
+benefit from both alias analyses).  The alias analysis class automatically
+takes care of most of this for methods that you don't override.  For methods
+that you do override, in code paths that return a conservative MayAlias or
+Mod/Ref result, simply return whatever the superclass computes.  For example:
 
 .. code-block:: c++
 
@@ -504,11 +501,11 @@ Available ``AliasAnalysis`` implementations
 -------------------------------------------
 
 This section lists the various implementations of the ``AliasAnalysis``
-interface.  With the exception of the `-no-aa`_ implementation, all of these
-`chain`_ to other alias analysis implementations.
+interface.  With the exception of the :ref:`-no-aa <aliasanalysis-no-aa>`
+implementation, all of these :ref:`chain <aliasanalysis-chaining>` to other
+alias analysis implementations.
 
-.. _no-aa:
-.. _-no-aa:
+.. _aliasanalysis-no-aa:
 
 The ``-no-aa`` pass
 ^^^^^^^^^^^^^^^^^^^
diff --git a/docs/Atomics.rst b/docs/Atomics.rst
index 1bca53e2b17e..705d73fbaba4 100644
--- a/docs/Atomics.rst
+++ b/docs/Atomics.rst
@@ -1,5 +1,3 @@
-.. _atomics:
-
 ==============================================
 LLVM Atomic Instructions and Concurrency Guide
 ==============================================
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index bd26f7b1502e..c83b6c1801cd 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -1,5 +1,3 @@
-.. _bitcode_format:
-
 .. role:: raw-html(raw)
    :format: html
 
@@ -54,8 +52,8 @@ structure.  This structure consists of the following concepts:
 
 * Abbreviations, which specify compression optimizations for the file.
 
-Note that the `llvm-bcanalyzer <CommandGuide/html/llvm-bcanalyzer.html>`_ tool
-can be used to dump and inspect arbitrary bitstreams, which is very useful for
+Note that the :doc:`llvm-bcanalyzer <CommandGuide/llvm-bcanalyzer>` tool can be
+used to dump and inspect arbitrary bitstreams, which is very useful for
 understanding the encoding.
 
 .. _magic number:
diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst
index f0df971f8745..71ecd34c8216 100644
--- a/docs/BranchWeightMetadata.rst
+++ b/docs/BranchWeightMetadata.rst
@@ -1,5 +1,3 @@
-.. _branch_weight:
-
 ===========================
 LLVM Branch Weight Metadata
 ===========================
@@ -27,8 +25,8 @@ Supported Instructions
 ``BranchInst``
 ^^^^^^^^^^^^^^
 
-Metadata is only assign to the conditional branches. There are two extra
-operarands, for the true and the false branch.
+Metadata is only assigned to the conditional branches. There are two extra
+operarands for the true and the false branch.
 
 .. code-block:: llvm
 
@@ -41,8 +39,8 @@ operarands, for the true and the false branch.
 ``SwitchInst``
 ^^^^^^^^^^^^^^
 
-Branch weights are assign to every case (including ``default`` case which is
-always case #0).
+Branch weights are assigned to every case (including the ``default`` case which
+is always case #0).
 
 .. code-block:: llvm
 
@@ -55,7 +53,7 @@ always case #0).
 ``IndirectBrInst``
 ^^^^^^^^^^^^^^^^^^
 
-Branch weights are assign to every destination.
+Branch weights are assigned to every destination.
 
 .. code-block:: llvm
 
diff --git a/docs/Bugpoint.rst b/docs/Bugpoint.rst
index 9ccf0cc2d9d6..1a5fc8c02764 100644
--- a/docs/Bugpoint.rst
+++ b/docs/Bugpoint.rst
@@ -1,5 +1,3 @@
-.. _bugpoint:
-
 ====================================
 LLVM bugpoint tool: design and usage
 ====================================
@@ -136,9 +134,9 @@ non-obvious ways.  Here are some hints and tips:
   It is often useful to capture the output of the program to file.  For example,
   in the C shell, you can run:
 
-  .. code-block:: bash
+  .. code-block:: console
 
-    bugpoint  ... |& tee bugpoint.log
+    $ bugpoint  ... |& tee bugpoint.log
 
   to get a copy of ``bugpoint``'s output in the file ``bugpoint.log``, as well
   as on your terminal.
diff --git a/docs/CMake.rst b/docs/CMake.rst
index 7f0420c4469f..fb081d7b98c6 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -1,5 +1,3 @@
-.. _building-with-cmake:
-
 ========================
 Building LLVM with CMake
 ========================
@@ -36,7 +34,7 @@ We use here the command-line, non-interactive CMake interface.
 #. Create a directory for containing the build. It is not supported to build
    LLVM on the source directory. cd to this directory:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      $ mkdir mybuilddir
      $ cd mybuilddir
@@ -44,7 +42,7 @@ We use here the command-line, non-interactive CMake interface.
 #. Execute this command on the shell replacing `path/to/llvm/source/root` with
    the path to the root of your LLVM source tree:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      $ cmake path/to/llvm/source/root
 
@@ -80,14 +78,14 @@ the corresponding *Generator* for creating files for your build tool. You can
 explicitly specify the generator with the command line option ``-G "Name of the
 generator"``. For knowing the available generators on your platform, execute
 
-.. code-block:: bash
+.. code-block:: console
 
   $ cmake --help
 
 This will list the generator's names at the end of the help text. Generator's
 names are case-sensitive. Example:
 
-.. code-block:: bash
+.. code-block:: console
 
   $ cmake -G "Visual Studio 9 2008" path/to/llvm/source/root
 
@@ -110,14 +108,14 @@ Variables customize how the build will be generated. Options are boolean
 variables, with possible values ON/OFF. Options and variables are defined on the
 CMake command line like this:
 
-.. code-block:: bash
+.. code-block:: console
 
   $ cmake -DVARIABLE=value path/to/llvm/source
 
 You can set a variable after the initial CMake invocation for changing its
 value. You can also undefine a variable:
 
-.. code-block:: bash
+.. code-block:: console
 
   $ cmake -UVARIABLE path/to/llvm/source
 
@@ -127,7 +125,7 @@ on the root of the build directory. Do not hand-edit it.
 Variables are listed here appending its type after a colon. It is correct to
 write the variable and the type on the CMake command line:
 
-.. code-block:: bash
+.. code-block:: console
 
   $ cmake -DVARIABLE:TYPE=value path/to/llvm/source
 
@@ -206,7 +204,7 @@ LLVM-specific variables
   tests.
 
 **LLVM_APPEND_VC_REV**:BOOL
-  Append version control revision info (svn revision number or git revision id)
+  Append version control revision info (svn revision number or Git revision id)
   to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work
   cmake must be invoked before the build. Defaults to OFF.
 
@@ -280,7 +278,7 @@ Testing is performed when the *check* target is built. For instance, if you are
 using makefiles, execute this command while on the top level of your build
 directory:
 
-.. code-block:: bash
+.. code-block:: console
 
   $ make check
 
@@ -355,13 +353,15 @@ an equivalent variant of snippet shown above:
 
   target_link_libraries(mycompiler ${REQ_LLVM_LIBRARIES})
 
+.. _cmake-out-of-source-pass:
+
 Developing LLVM pass out of source
 ----------------------------------
 
 It is possible to develop LLVM passes against installed LLVM.  An example of
 project layout provided below:
 
-.. code-block:: bash
+.. code-block:: none
 
   <project dir>/
       |
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 5fab76ec1a44..75415ab9ccda 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -1,5 +1,3 @@
-.. _code_generator:
-
 ==========================================
 The LLVM Target-Independent Code Generator
 ==========================================
@@ -17,6 +15,8 @@ The LLVM Target-Independent Code Generator
     .partial { background-color: #F88017 }
     .yes { background-color: #0F0; }
     .yes:before { content: "Y" }
+    .na { background-color: #6666FF; }
+    .na:before { content: "N/A" }
   </style>
 
 .. contents::
@@ -172,7 +172,7 @@ architecture.  These target descriptions often have a large amount of common
 information (e.g., an ``add`` instruction is almost identical to a ``sub``
 instruction).  In order to allow the maximum amount of commonality to be
 factored out, the LLVM code generator uses the
-`TableGen <TableGenFundamentals.html>`_ tool to describe big chunks of the
+:doc:`TableGen <TableGenFundamentals>` tool to describe big chunks of the
 target machine, which allows the use of domain-specific and target-specific
 abstractions to reduce the amount of repetition.
 
@@ -230,7 +230,7 @@ for structures, the alignment requirements for various data types, the size of
 pointers in the target, and whether the target is little-endian or
 big-endian.
 
-.. _targetlowering:
+.. _TargetLowering:
 
 The ``TargetLowering`` class
 ----------------------------
@@ -250,6 +250,8 @@ operations.  Among other things, this class indicates:
 * various high-level characteristics, like whether it is profitable to turn
   division by a constant into a multiplication sequence.
 
+.. _TargetRegisterInfo:
+
 The ``TargetRegisterInfo`` class
 --------------------------------
 
@@ -283,12 +285,10 @@ The ``TargetInstrInfo`` class
 -----------------------------
 
 The ``TargetInstrInfo`` class is used to describe the machine instructions
-supported by the target. It is essentially an array of ``TargetInstrDescriptor``
-objects, each of which describes one instruction the target
-supports. Descriptors define things like the mnemonic for the opcode, the number
-of operands, the list of implicit register uses and defs, whether the
-instruction has certain target-independent properties (accesses memory, is
-commutable, etc), and holds any target-specific flags.
+supported by the target.  Descriptions define things like the mnemonic for
+the opcode, the number of operands, the list of implicit register uses and defs,
+whether the instruction has certain target-independent properties (accesses
+memory, is commutable, etc), and holds any target-specific flags.
 
 The ``TargetFrameInfo`` class
 -----------------------------
@@ -771,6 +771,8 @@ value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a
 SREM or UREM operation.  The `legalize types`_ and `legalize operations`_ phases
 are responsible for turning an illegal DAG into a legal DAG.
 
+.. _SelectionDAG-Process:
+
 SelectionDAG Instruction Selection Process
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -874,7 +876,7 @@ found, the elements are converted to scalars ("scalarizing").
 
 A target implementation tells the legalizer which types are supported (and which
 register class to use for them) by calling the ``addRegisterClass`` method in
-its TargetLowering constructor.
+its ``TargetLowering`` constructor.
 
 .. _legalize operations:
 .. _Legalizer:
@@ -968,7 +970,8 @@ The ``FADDS`` instruction is a simple binary single-precision add instruction.
 To perform this pattern match, the PowerPC backend includes the following
 instruction definitions:
 
-::
+.. code-block:: text
+  :emphasize-lines: 4-5,9
 
   def FMADDS : AForm_1<59, 29,
                       (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
@@ -980,10 +983,10 @@ instruction definitions:
                       "fadds $FRT, $FRA, $FRB",
                       [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
 
-The portion of the instruction definition in bold indicates the pattern used to
-match the instruction.  The DAG operators (like ``fmul``/``fadd``) are defined
-in the ``include/llvm/Target/TargetSelectionDAG.td`` file.  " ``F4RC``" is the
-register class of the input and result values.
+The highlighted portion of the instruction definitions indicates the pattern
+used to match the instructions. The DAG operators (like ``fmul``/``fadd``)
+are defined in the ``include/llvm/Target/TargetSelectionDAG.td`` file.
+"``F4RC``" is the register class of the input and result values.
 
 The TableGen DAG instruction selector generator reads the instruction patterns
 in the ``.td`` file and automatically builds parts of the pattern matching code
@@ -1035,6 +1038,24 @@ for your target.  It has the following strengths:
   are used to manipulate the input immediate (in this case, take the high or low
   16-bits of the immediate).
 
+* When using the 'Pat' class to map a pattern to an instruction that has one
+  or more complex operands (like e.g. `X86 addressing mode`_), the pattern may
+  either specify the operand as a whole using a ``ComplexPattern``, or else it
+  may specify the components of the complex operand separately.  The latter is
+  done e.g. for pre-increment instructions by the PowerPC back end:
+
+  ::
+
+    def STWU  : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "stwu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+
+    def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff),
+              (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>;
+
+  Here, the pair of ``ptroff`` and ``ptrreg`` operands is matched onto the
+  complex operand ``dst`` of class ``memri`` in the ``STWU`` instruction.
+
 * While the system does automate a lot, it still allows you to write custom C++
   code to match special cases if there is something that is hard to
   express.
@@ -1727,6 +1748,8 @@ This section of the document explains features or design decisions that are
 specific to the code generator for a particular target.  First we start with a
 table that summarizes what features are supported by each target.
 
+.. _target-feature-matrix:
+
 Target Feature Matrix
 ---------------------
 
@@ -1741,12 +1764,14 @@ the key:
 :raw-html:`<table border="1" cellspacing="0">`
 :raw-html:`<tr>`
 :raw-html:`<th>Unknown</th>`
+:raw-html:`<th>Not Applicable</th>`
 :raw-html:`<th>No support</th>`
 :raw-html:`<th>Partial Support</th>`
 :raw-html:`<th>Complete Support</th>`
 :raw-html:`</tr>`
 :raw-html:`<tr>`
 :raw-html:`<td class="unknown"></td>`
+:raw-html:`<td class="na"></td>`
 :raw-html:`<td class="no"></td>`
 :raw-html:`<td class="partial"></td>`
 :raw-html:`<td class="yes"></td>`
@@ -1762,12 +1787,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<th>Feature</th>`
 :raw-html:`<th>ARM</th>`
-:raw-html:`<th>CellSPU</th>`
 :raw-html:`<th>Hexagon</th>`
 :raw-html:`<th>MBlaze</th>`
 :raw-html:`<th>MSP430</th>`
 :raw-html:`<th>Mips</th>`
-:raw-html:`<th>PTX</th>`
+:raw-html:`<th>NVPTX</th>`
 :raw-html:`<th>PowerPC</th>`
 :raw-html:`<th>Sparc</th>`
 :raw-html:`<th>X86</th>`
@@ -1777,12 +1801,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_reliable">is generally reliable</a></td>`
 :raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="yes"></td> <!-- Hexagon -->`
 :raw-html:`<td class="no"></td> <!-- MBlaze -->`
 :raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
 :raw-html:`<td class="yes"></td> <!-- Mips -->`
-:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="yes"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="yes"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1792,12 +1815,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_asmparser">assembly parser</a></td>`
 :raw-html:`<td class="no"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="no"></td> <!-- Hexagon -->`
 :raw-html:`<td class="yes"></td> <!-- MBlaze -->`
 :raw-html:`<td class="no"></td> <!-- MSP430 -->`
 :raw-html:`<td class="no"></td> <!-- Mips -->`
-:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1807,12 +1829,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_disassembler">disassembler</a></td>`
 :raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="no"></td> <!-- Hexagon -->`
 :raw-html:`<td class="yes"></td> <!-- MBlaze -->`
 :raw-html:`<td class="no"></td> <!-- MSP430 -->`
 :raw-html:`<td class="no"></td> <!-- Mips -->`
-:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="na"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1822,12 +1843,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_inlineasm">inline asm</a></td>`
 :raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="yes"></td> <!-- Hexagon -->`
 :raw-html:`<td class="yes"></td> <!-- MBlaze -->`
 :raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
 :raw-html:`<td class="no"></td> <!-- Mips -->`
-:raw-html:`<td class="unknown"></td> <!-- PTX -->`
+:raw-html:`<td class="yes"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="unknown"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1837,12 +1857,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_jit">jit</a></td>`
 :raw-html:`<td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="no"></td> <!-- Hexagon -->`
 :raw-html:`<td class="no"></td> <!-- MBlaze -->`
 :raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
 :raw-html:`<td class="yes"></td> <!-- Mips -->`
-:raw-html:`<td class="unknown"></td> <!-- PTX -->`
+:raw-html:`<td class="na"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="unknown"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1852,12 +1871,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>`
 :raw-html:`<td class="no"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="no"></td> <!-- Hexagon -->`
 :raw-html:`<td class="yes"></td> <!-- MBlaze -->`
 :raw-html:`<td class="no"></td> <!-- MSP430 -->`
 :raw-html:`<td class="no"></td> <!-- Mips -->`
-:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="na"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1867,12 +1885,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a hr:raw-html:`ef="#feat_tailcall">tail calls</a></td>`
 :raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="yes"></td> <!-- Hexagon -->`
 :raw-html:`<td class="no"></td> <!-- MBlaze -->`
 :raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
 :raw-html:`<td class="no"></td> <!-- Mips -->`
-:raw-html:`<td class="unknown"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="unknown"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
@@ -1882,12 +1899,11 @@ Here is the table:
 :raw-html:`<tr>`
 :raw-html:`<td><a href="#feat_segstacks">segmented stacks</a></td>`
 :raw-html:`<td class="no"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
 :raw-html:`<td class="no"></td> <!-- Hexagon -->`
 :raw-html:`<td class="no"></td> <!-- MBlaze -->`
 :raw-html:`<td class="no"></td> <!-- MSP430 -->`
 :raw-html:`<td class="no"></td> <!-- Mips -->`
-:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
 :raw-html:`<td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->`
@@ -1991,8 +2007,8 @@ Tail call optimization
 Tail call optimization, callee reusing the stack of the caller, is currently
 supported on x86/x86-64 and PowerPC. It is performed if:
 
-* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC
-  call convention).
+* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
+  calling convention) or ``cc 11`` (HiPE calling convention).
 
 * The call is a tail call - in tail position (ret immediately follows call and
   ret uses value of call or is void).
@@ -2369,17 +2385,17 @@ Dynamic Allocation
 
   TODO - More to come.
 
-The PTX backend
----------------
+The NVPTX backend
+-----------------
 
-The PTX code generator lives in the lib/Target/PTX directory. It is currently a
-work-in-progress, but already supports most of the code generation functionality
-needed to generate correct PTX kernels for CUDA devices.
+The NVPTX code generator under lib/Target/NVPTX is an open-source version of
+the NVIDIA NVPTX code generator for LLVM.  It is contributed by NVIDIA and is
+a port of the code generator used in the CUDA compiler (nvcc).  It targets the
+PTX 3.0/3.1 ISA and can target any compute capability greater than or equal to
+2.0 (Fermi).
 
-The code generator can target PTX 2.0+, and shader model 1.0+.  The PTX ISA
-Reference Manual is used as the primary source of ISA information, though an
-effort is made to make the output of the code generator match the output of the
-NVidia nvcc compiler, whenever possible.
+This target is of production quality and should be completely compatible with
+the official NVIDIA toolchain.
 
 Code Generator Options:
 
@@ -2389,39 +2405,28 @@ Code Generator Options:
 :raw-html:`<th>Description</th>`
 :raw-html:`</tr>`
 :raw-html:`<tr>`
-:raw-html:`<td>``double``</td>`
-:raw-html:`<td align="left">If enabled, the map_f64_to_f32 directive is disabled in the PTX output, allowing native double-precision arithmetic</td>`
+:raw-html:`<td>sm_20</td>`
+:raw-html:`<td align="left">Set shader model/compute capability to 2.0</td>`
 :raw-html:`</tr>`
 :raw-html:`<tr>`
-:raw-html:`<td>``no-fma``</td>`
-:raw-html:`<td align="left">Disable generation of Fused-Multiply Add instructions, which may be beneficial for some devices</td>`
+:raw-html:`<td>sm_21</td>`
+:raw-html:`<td align="left">Set shader model/compute capability to 2.1</td>`
 :raw-html:`</tr>`
 :raw-html:`<tr>`
-:raw-html:`<td>``smxy / computexy``</td>`
-:raw-html:`<td align="left">Set shader model/compute capability to x.y, e.g. sm20 or compute13</td>`
+:raw-html:`<td>sm_30</td>`
+:raw-html:`<td align="left">Set shader model/compute capability to 3.0</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>sm_35</td>`
+:raw-html:`<td align="left">Set shader model/compute capability to 3.5</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>ptx30</td>`
+:raw-html:`<td align="left">Target PTX 3.0</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>ptx31</td>`
+:raw-html:`<td align="left">Target PTX 3.1</td>`
 :raw-html:`</tr>`
 :raw-html:`</table>`
 
-Working:
-
-* Arithmetic instruction selection (including combo FMA)
-
-* Bitwise instruction selection
-
-* Control-flow instruction selection
-
-* Function calls (only on SM 2.0+ and no return arguments)
-
-* Addresses spaces (0 = global, 1 = constant, 2 = local, 4 = shared)
-
-* Thread synchronization (bar.sync)
-
-* Special register reads ([N]TID, [N]CTAID, PMx, CLOCK, etc.)
-
-In Progress:
-
-* Robust call instruction selection
-
-* Stack frame allocation
-
-* Device-specific instruction scheduling optimizations
diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst
index 90835307b15c..4d66ad757435 100644
--- a/docs/CodingStandards.rst
+++ b/docs/CodingStandards.rst
@@ -1,5 +1,3 @@
-.. _coding_standards:
-
 =====================
 LLVM Coding Standards
 =====================
@@ -284,17 +282,10 @@ listed.  We prefer these ``#include``\s to be listed in this order:
 
 #. Main Module Header
 #. Local/Private Headers
-#. ``llvm/*``
-#. ``llvm/Analysis/*``
-#. ``llvm/Assembly/*``
-#. ``llvm/Bitcode/*``
-#. ``llvm/CodeGen/*``
-#. ...
-#. ``llvm/Support/*``
-#. ``llvm/Config/*``
+#. ``llvm/...``
 #. System ``#include``\s
 
-and each category should be sorted by name.
+and each category should be sorted lexicographically by the full path.
 
 The `Main Module Header`_ file applies to ``.cpp`` files which implement an
 interface defined by a ``.h`` file.  This ``#include`` should always be included
@@ -409,7 +400,8 @@ code.
 
 That said, LLVM does make extensive use of a hand-rolled form of RTTI that use
 templates like `isa<>, cast<>, and dyn_cast<> <ProgrammersManual.html#isa>`_.
-This form of RTTI is opt-in and can be added to any class.  It is also
+This form of RTTI is opt-in and can be
+:doc:`added to any class <HowToSetUpLLVMStyleRTTI>`. It is also
 substantially more efficient than ``dynamic_cast<>``.
 
 .. _static constructor:
@@ -713,8 +705,8 @@ sort of thing is:
 .. code-block:: c++
 
   bool FoundFoo = false;
-  for (unsigned i = 0, e = BarList.size(); i != e; ++i)
-    if (BarList[i]->isFoo()) {
+  for (unsigned I = 0, E = BarList.size(); I != E; ++I)
+    if (BarList[I]->isFoo()) {
       FoundFoo = true;
       break;
     }
@@ -732,8 +724,8 @@ code to be structured like this:
 
   /// \returns true if the specified list has an element that is a foo.
   static bool containsFoo(const std::vector<Bar*> &List) {
-    for (unsigned i = 0, e = List.size(); i != e; ++i)
-      if (List[i]->isFoo())
+    for (unsigned I = 0, E = List.size(); I != E; ++I)
+      if (List[I]->isFoo())
         return true;
     return false;
   }
@@ -820,8 +812,8 @@ Here are some examples of good and bad names:
 
   Vehicle MakeVehicle(VehicleType Type) {
     VehicleMaker M;                         // Might be OK if having a short life-span.
-    Tire tmp1 = M.makeTire();               // Bad -- 'tmp1' provides no information.
-    Light headlight = M.makeLight("head");  // Good -- descriptive.
+    Tire Tmp1 = M.makeTire();               // Bad -- 'Tmp1' provides no information.
+    Light Headlight = M.makeLight("head");  // Good -- descriptive.
     ...
   }
 
@@ -841,9 +833,9 @@ enforced, and hopefully what to do about it.  Here is one complete example:
 
 .. code-block:: c++
 
-  inline Value *getOperand(unsigned i) { 
-    assert(i < Operands.size() && "getOperand() out of range!");
-    return Operands[i]; 
+  inline Value *getOperand(unsigned I) {
+    assert(I < Operands.size() && "getOperand() out of range!");
+    return Operands[I];
   }
 
 Here are more examples:
@@ -1035,7 +1027,7 @@ form has two problems. First it may be less efficient than evaluating it at the
 start of the loop.  In this case, the cost is probably minor --- a few extra
 loads every time through the loop.  However, if the base expression is more
 complex, then the cost can rise quickly.  I've seen loops where the end
-expression was actually something like: "``SomeMap[x]->end()``" and map lookups
+expression was actually something like: "``SomeMap[X]->end()``" and map lookups
 really aren't cheap.  By writing it in the second form consistently, you
 eliminate the issue entirely and don't even have to think about it.
 
@@ -1096,6 +1088,34 @@ flushes the output stream.  In other words, these are equivalent:
 Most of the time, you probably have no reason to flush the output stream, so
 it's better to use a literal ``'\n'``.
 
+Don't use ``inline`` when defining a function in a class definition
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A member function defined in a class definition is implicitly inline, so don't
+put the ``inline`` keyword in this case.
+
+Don't:
+
+.. code-block:: c++
+
+  class Foo {
+  public:
+    inline void bar() {
+      // ...
+    }
+  };
+
+Do:
+
+.. code-block:: c++
+
+  class Foo {
+  public:
+    void bar() {
+      // ...
+    }
+  };
+
 Microscopic Details
 -------------------
 
@@ -1111,27 +1131,27 @@ macros.  For example, this is good:
 
 .. code-block:: c++
 
-  if (x) ...
-  for (i = 0; i != 100; ++i) ...
-  while (llvm_rocks) ...
+  if (X) ...
+  for (I = 0; I != 100; ++I) ...
+  while (LLVMRocks) ...
 
   somefunc(42);
   assert(3 != 4 && "laws of math are failing me");
   
-  a = foo(42, 92) + bar(x);
+  A = foo(42, 92) + bar(X);
 
 and this is bad:
 
 .. code-block:: c++
 
-  if(x) ...
-  for(i = 0; i != 100; ++i) ...
-  while(llvm_rocks) ...
+  if(X) ...
+  for(I = 0; I != 100; ++I) ...
+  while(LLVMRocks) ...
 
   somefunc (42);
   assert (3 != 4 && "laws of math are failing me");
   
-  a = foo (42, 92) + bar (x);
+  A = foo (42, 92) + bar (X);
 
 The reason for doing this is not completely arbitrary.  This style makes control
 flow operators stand out more, and makes expressions flow better. The function
@@ -1139,11 +1159,11 @@ call operator binds very tightly as a postfix operator.  Putting a space after a
 function name (as in the last example) makes it appear that the code might bind
 the arguments of the left-hand-side of a binary operator with the argument list
 of a function and the name of the right side.  More specifically, it is easy to
-misread the "``a``" example as:
+misread the "``A``" example as:
 
 .. code-block:: c++
 
-  a = foo ((42, 92) + bar) (x);
+  A = foo ((42, 92) + bar) (X);
 
 when skimming through the code.  By avoiding a space in a function, we avoid
 this misinterpretation.
@@ -1310,7 +1330,7 @@ namespace just because it was declared there.
 See Also
 ========
 
-A lot of these comments and recommendations have been culled for other sources.
+A lot of these comments and recommendations have been culled from other sources.
 Two particularly important books for our work are:
 
 #. `Effective C++
diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst
index 1d7a462bd71f..fce63ba688cc 100644
--- a/docs/CommandGuide/FileCheck.rst
+++ b/docs/CommandGuide/FileCheck.rst
@@ -1,99 +1,79 @@
 FileCheck - Flexible pattern matching file verifier
 ===================================================
 
-
 SYNOPSIS
 --------
 
-
-**FileCheck** *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*]
-
+:program:`FileCheck` *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*]
 
 DESCRIPTION
 -----------
 
+:program:`FileCheck` reads two files (one from standard input, and one
+specified on the command line) and uses one to verify the other.  This
+behavior is particularly useful for the testsuite, which wants to verify that
+the output of some tool (e.g. :program:`llc`) contains the expected information
+(for example, a movsd from esp or whatever is interesting).  This is similar to
+using :program:`grep`, but it is optimized for matching multiple different
+inputs in one file in a specific order.
 
-**FileCheck** reads two files (one from standard input, and one specified on the
-command line) and uses one to verify the other.  This behavior is particularly
-useful for the testsuite, which wants to verify that the output of some tool
-(e.g. llc) contains the expected information (for example, a movsd from esp or
-whatever is interesting).  This is similar to using grep, but it is optimized
-for matching multiple different inputs in one file in a specific order.
-
-The *match-filename* file specifies the file that contains the patterns to
+The ``match-filename`` file specifies the file that contains the patterns to
 match.  The file to verify is always read from standard input.
 
-
 OPTIONS
 -------
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: --check-prefix prefix
 
+ FileCheck searches the contents of ``match-filename`` for patterns to match.
+ By default, these patterns are prefixed with "``CHECK:``".  If you'd like to
+ use a different prefix (e.g. because the same input file is checking multiple
+ different tool or options), the :option:`--check-prefix` argument allows you
+ to specify a specific prefix to match.
 
-**--check-prefix** *prefix*
-
- FileCheck searches the contents of *match-filename* for patterns to match.  By
- default, these patterns are prefixed with "CHECK:".  If you'd like to use a
- different prefix (e.g. because the same input file is checking multiple
- different tool or options), the **--check-prefix** argument allows you to specify
- a specific prefix to match.
-
-
-
-**--input-file** *filename*
+.. option:: --input-file filename
 
   File to check (defaults to stdin).
 
-
-**--strict-whitespace**
+.. option:: --strict-whitespace
 
  By default, FileCheck canonicalizes input horizontal whitespace (spaces and
  tabs) which causes it to ignore these differences (a space will match a tab).
- The --strict-whitespace argument disables this behavior.
-
-
+ The :option:`--strict-whitespace` argument disables this behavior. End-of-line
+ sequences are canonicalized to UNIX-style '\n' in all modes.
 
-**-version**
+.. option:: -version
 
  Show the version number of this program.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **FileCheck** verifies that the file matches the expected contents, it exits
-with 0.  Otherwise, if not, or if an error occurs, it will exit with a non-zero
-value.
-
+If :program:`FileCheck` verifies that the file matches the expected contents,
+it exits with 0.  Otherwise, if not, or if an error occurs, it will exit with a
+non-zero value.
 
 TUTORIAL
 --------
 
-
 FileCheck is typically used from LLVM regression tests, being invoked on the RUN
 line of the test.  A simple example of using FileCheck from a RUN line looks
 like this:
 
-
 .. code-block:: llvm
 
    ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
 
-
-This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
-llc, then pipe the output of llc into FileCheck.  This means that FileCheck will
-be verifying its standard input (the llc output) against the filename argument
-specified (the original .ll file specified by "%s").  To see how this works,
-let's look at the rest of the .ll file (after the RUN line):
-
+This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe
+that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``.  This
+means that FileCheck will be verifying its standard input (the llc output)
+against the filename argument specified (the original ``.ll`` file specified by
+"``%s``").  To see how this works, let's look at the rest of the ``.ll`` file
+(after the RUN line):
 
 .. code-block:: llvm
 
@@ -113,32 +93,30 @@ let's look at the rest of the .ll file (after the RUN line):
            ret void
    }
 
+Here you can see some "``CHECK:``" lines specified in comments.  Now you can
+see how the file is piped into ``llvm-as``, then ``llc``, and the machine code
+output is what we are verifying.  FileCheck checks the machine code output to
+verify that it matches what the "``CHECK:``" lines specify.
 
-Here you can see some "CHECK:" lines specified in comments.  Now you can see
-how the file is piped into llvm-as, then llc, and the machine code output is
-what we are verifying.  FileCheck checks the machine code output to verify that
-it matches what the "CHECK:" lines specify.
-
-The syntax of the CHECK: lines is very simple: they are fixed strings that
+The syntax of the "``CHECK:``" lines is very simple: they are fixed strings that
 must occur in order.  FileCheck defaults to ignoring horizontal whitespace
 differences (e.g. a space is allowed to match a tab) but otherwise, the contents
-of the CHECK: line is required to match some thing in the test file exactly.
+of the "``CHECK:``" line is required to match some thing in the test file exactly.
 
 One nice thing about FileCheck (compared to grep) is that it allows merging
 test cases together into logical groups.  For example, because the test above
-is checking for the "sub1:" and "inc4:" labels, it will not match unless there
-is a "subl" in between those labels.  If it existed somewhere else in the file,
-that would not count: "grep subl" matches if subl exists anywhere in the
-file.
+is checking for the "``sub1:``" and "``inc4:``" labels, it will not match
+unless there is a "``subl``" in between those labels.  If it existed somewhere
+else in the file, that would not count: "``grep subl``" matches if "``subl``"
+exists anywhere in the file.
 
 The FileCheck -check-prefix option
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-The FileCheck -check-prefix option allows multiple test configurations to be
-driven from one .ll file.  This is useful in many circumstances, for example,
-testing different architectural variants with llc.  Here's a simple example:
-
+The FileCheck :option:`-check-prefix` option allows multiple test
+configurations to be driven from one `.ll` file.  This is useful in many
+circumstances, for example, testing different architectural variants with
+:program:`llc`.  Here's a simple example:
 
 .. code-block:: llvm
 
@@ -157,21 +135,17 @@ testing different architectural variants with llc.  Here's a simple example:
    ; X64:    pinsrd $1, %edi, %xmm0
    }
 
-
 In this case, we're testing that we get the expected code generation with
 both 32-bit and 64-bit code generation.
 
-
 The "CHECK-NEXT:" directive
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
 Sometimes you want to match lines and would like to verify that matches
 happen on exactly consecutive lines with no other lines in between them.  In
-this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
-you specified a custom check prefix, just use "<PREFIX>-NEXT:".  For
-example, something like this works as you'd expect:
-
+this case, you can use "``CHECK:``" and "``CHECK-NEXT:``" directives to specify
+this.  If you specified a custom check prefix, just use "``<PREFIX>-NEXT:``".
+For example, something like this works as you'd expect:
 
 .. code-block:: llvm
 
@@ -193,22 +167,18 @@ example, something like this works as you'd expect:
    ; CHECK-NEXT: 	ret
    }
 
-
-CHECK-NEXT: directives reject the input unless there is exactly one newline
-between it an the previous directive.  A CHECK-NEXT cannot be the first
-directive in a file.
-
+"``CHECK-NEXT:``" directives reject the input unless there is exactly one
+newline between it and the previous directive.  A "``CHECK-NEXT:``" cannot be
+the first directive in a file.
 
 The "CHECK-NOT:" directive
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-The CHECK-NOT: directive is used to verify that a string doesn't occur
+The "``CHECK-NOT:``" directive is used to verify that a string doesn't occur
 between two matches (or before the first match, or after the last match).  For
 example, to verify that a load is removed by a transformation, a test like this
 can be used:
 
-
 .. code-block:: llvm
 
    define i8 @coerce_offset0(i32 %V, i32* %P) {
@@ -224,27 +194,22 @@ can be used:
    ; CHECK: ret i8
    }
 
-
-
 FileCheck Pattern Matching Syntax
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
-uses of FileCheck, fixed string matching is perfectly sufficient.  For some
-things, a more flexible form of matching is desired.  To support this, FileCheck
-allows you to specify regular expressions in matching strings, surrounded by
-double braces: **{{yourregex}}**.  Because we want to use fixed string
-matching for a majority of what we do, FileCheck has been designed to support
-mixing and matching fixed string matching with regular expressions.  This allows
-you to write things like this:
-
+The "``CHECK:``" and "``CHECK-NOT:``" directives both take a pattern to match.
+For most uses of FileCheck, fixed string matching is perfectly sufficient.  For
+some things, a more flexible form of matching is desired.  To support this,
+FileCheck allows you to specify regular expressions in matching strings,
+surrounded by double braces: ``{{yourregex}}``.  Because we want to use fixed
+string matching for a majority of what we do, FileCheck has been designed to
+support mixing and matching fixed string matching with regular expressions.
+This allows you to write things like this:
 
 .. code-block:: llvm
 
    ; CHECK: movhpd	{{[0-9]+}}(%esp), {{%xmm[0-7]}}
 
-
 In this case, any offset from the ESP register will be allowed, and any xmm
 register will be allowed.
 
@@ -252,19 +217,16 @@ Because regular expressions are enclosed with double braces, they are
 visually distinct, and you don't need to use escape characters within the double
 braces like you would in C.  In the rare case that you want to match double
 braces explicitly from the input, you can use something ugly like
-**{{[{][{]}}** as your pattern.
-
+``{{[{][{]}}`` as your pattern.
 
 FileCheck Variables
 ~~~~~~~~~~~~~~~~~~~
 
-
 It is often useful to match a pattern and then verify that it occurs again
 later in the file.  For codegen tests, this can be useful to allow any register,
-but verify that that register is used consistently later.  To do this, FileCheck
-allows named variables to be defined and substituted into patterns.  Here is a
-simple example:
-
+but verify that that register is used consistently later.  To do this,
+:program:`FileCheck` allows named variables to be defined and substituted into
+patterns.  Here is a simple example:
 
 .. code-block:: llvm
 
@@ -272,19 +234,46 @@ simple example:
    ; CHECK:    notw	[[REGISTER:%[a-z]+]]
    ; CHECK:    andw	{{.*}}[[REGISTER]]
 
+The first check line matches a regex ``%[a-z]+`` and captures it into the
+variable ``REGISTER``.  The second line verifies that whatever is in
+``REGISTER`` occurs later in the file after an "``andw``".  :program:`FileCheck`
+variable references are always contained in ``[[ ]]`` pairs, and their names can
+be formed with the regex ``[a-zA-Z][a-zA-Z0-9]*``.  If a colon follows the name,
+then it is a definition of the variable; otherwise, it is a use.
+
+:program:`FileCheck` variables can be defined multiple times, and uses always
+get the latest value.  Variables can also be used later on the same line they
+were defined on. For example:
+
+.. code-block:: llvm
+
+    ; CHECK: op [[REG:r[0-9]+]], [[REG]]
+
+Can be useful if you want the operands of ``op`` to be the same register,
+and don't care exactly which register it is.
+
+FileCheck Expressions
+~~~~~~~~~~~~~~~~~~~~~
+
+Sometimes there's a need to verify output which refers line numbers of the
+match file, e.g. when testing compiler diagnostics.  This introduces a certain
+fragility of the match file structure, as "``CHECK:``" lines contain absolute
+line numbers in the same file, which have to be updated whenever line numbers
+change due to text addition or deletion.
+
+To support this case, FileCheck allows using ``[[@LINE]]``,
+``[[@LINE+<offset>]]``, ``[[@LINE-<offset>]]`` expressions in patterns. These
+expressions expand to a number of the line where a pattern is located (with an
+optional integer offset).
+
+This way match patterns can be put near the relevant test lines and include
+relative line number references, for example:
+
+.. code-block:: c++
+
+   // CHECK: test.cpp:[[@LINE+4]]:6: error: expected ';' after top level declarator
+   // CHECK-NEXT: {{^int a}}
+   // CHECK-NEXT: {{^     \^}}
+   // CHECK-NEXT: {{^     ;}}
+   int a
 
-The first check line matches a regex (**%[a-z]+**) and captures it into
-the variable "REGISTER".  The second line verifies that whatever is in REGISTER
-occurs later in the file after an "andw".  FileCheck variable references are
-always contained in **[[ ]]** pairs, and their names can be formed with the
-regex **[a-zA-Z][a-zA-Z0-9]***.  If a colon follows the name, then it is a
-definition of the variable; otherwise, it is a use.
-
-FileCheck variables can be defined multiple times, and uses always get the
-latest value.  Note that variables are all read at the start of a "CHECK" line
-and are all defined at the end.  This means that if you have something like
-"**CHECK: [[XYZ:.\\*]]x[[XYZ]]**", the check line will read the previous
-value of the XYZ variable and define a new one after the match is performed.  If
-you need to do something like this you can probably take advantage of the fact
-that FileCheck is not actually line-oriented when it matches, this allows you to
-define two separate CHECK lines that match on the same line.
diff --git a/docs/CommandGuide/bugpoint.rst b/docs/CommandGuide/bugpoint.rst
index c1b3b6eca627..e4663e5d4477 100644
--- a/docs/CommandGuide/bugpoint.rst
+++ b/docs/CommandGuide/bugpoint.rst
@@ -1,19 +1,15 @@
 bugpoint - automatic test case reduction tool
 =============================================
 
-
 SYNOPSIS
 --------
 
-
 **bugpoint** [*options*] [*input LLVM ll/bc files*] [*LLVM passes*] **--args**
 *program arguments*
 
-
 DESCRIPTION
 -----------
 
-
 **bugpoint** narrows down the source of problems in LLVM tools and passes.  It
 can be used to debug three types of failures: optimizer crashes, miscompilations
 by optimizers, or bad native code generation (including problems in the static
@@ -22,82 +18,61 @@ For more information on the design and inner workings of **bugpoint**, as well a
 advice for using bugpoint, see *llvm/docs/Bugpoint.html* in the LLVM
 distribution.
 
-
 OPTIONS
 -------
 
-
-
 **--additional-so** *library*
 
  Load the dynamic shared object *library* into the test program whenever it is
  run.  This is useful if you are debugging programs which depend on non-LLVM
  libraries (such as the X or curses libraries) to run.
 
-
-
 **--append-exit-code**\ =\ *{true,false}*
 
  Append the test programs exit code to the output file so that a change in exit
  code is considered a test failure. Defaults to false.
 
-
-
 **--args** *program args*
 
- Pass all arguments specified after -args to the test program whenever it runs.
- Note that if any of the *program args* start with a '-', you should use:
-
+ Pass all arguments specified after **--args** to the test program whenever it runs.
+ Note that if any of the *program args* start with a "``-``", you should use:
 
- .. code-block:: perl
+ .. code-block:: bash
 
       bugpoint [bugpoint args] --args -- [program args]
 
-
- The "--" right after the **--args** option tells **bugpoint** to consider any
- options starting with ``-`` to be part of the **--args** option, not as options to
- **bugpoint** itself.
-
-
+ The "``--``" right after the **--args** option tells **bugpoint** to consider
+ any options starting with "``-``" to be part of the **--args** option, not as
+ options to **bugpoint** itself.
 
 **--tool-args** *tool args*
 
- Pass all arguments specified after --tool-args to the LLVM tool under test
+ Pass all arguments specified after **--tool-args** to the LLVM tool under test
  (**llc**, **lli**, etc.) whenever it runs.  You should use this option in the
  following way:
 
-
- .. code-block:: perl
+ .. code-block:: bash
 
       bugpoint [bugpoint args] --tool-args -- [tool args]
 
-
- The "--" right after the **--tool-args** option tells **bugpoint** to consider any
- options starting with ``-`` to be part of the **--tool-args** option, not as
- options to **bugpoint** itself. (See **--args**, above.)
-
-
+ The "``--``" right after the **--tool-args** option tells **bugpoint** to
+ consider any options starting with "``-``" to be part of the **--tool-args**
+ option, not as options to **bugpoint** itself. (See **--args**, above.)
 
 **--safe-tool-args** *tool args*
 
  Pass all arguments specified after **--safe-tool-args** to the "safe" execution
  tool.
 
-
-
 **--gcc-tool-args** *gcc tool args*
 
  Pass all arguments specified after **--gcc-tool-args** to the invocation of
  **gcc**.
 
-
-
 **--opt-args** *opt args*
 
  Pass all arguments specified after **--opt-args** to the invocation of **opt**.
 
-
-
 **--disable-{dce,simplifycfg}**
 
  Do not run the specified passes to clean up and reduce the size of the test
@@ -105,36 +80,26 @@ OPTIONS
  reduce test programs.  If you're trying to find a bug in one of these passes,
  **bugpoint** may crash.
 
-
-
 **--enable-valgrind**
 
  Use valgrind to find faults in the optimization phase. This will allow
  bugpoint to find otherwise asymptomatic problems caused by memory
  mis-management.
 
-
-
 **-find-bugs**
 
  Continually randomize the specified passes and run them on the test program
  until a bug is found or the user kills **bugpoint**.
 
-
-
 **-help**
 
  Print a summary of command line options.
 
-
-
 **--input** *filename*
 
  Open *filename* and redirect the standard input of the test program, whenever
  it runs, to come from that file.
 
-
-
 **--load** *plugin*
 
  Load the dynamic object *plugin* into **bugpoint** itself.  This object should
@@ -143,20 +108,15 @@ OPTIONS
  optimizations, use the **-help** and **--load** options together; for example:
 
 
- .. code-block:: perl
+ .. code-block:: bash
 
       bugpoint --load myNewPass.so -help
 
-
-
-
 **--mlimit** *megabytes*
 
  Specifies an upper limit on memory usage of the optimization and codegen. Set
  to zero to disable the limit.
 
-
-
 **--output** *filename*
 
  Whenever the test program produces output on its standard output stream, it
@@ -164,14 +124,10 @@ OPTIONS
  do not use this option, **bugpoint** will attempt to generate a reference output
  by compiling the program with the "safe" backend and running it.
 
-
-
 **--profile-info-file** *filename*
 
  Profile file loaded by **--profile-loader**.
 
-
-
 **--run-{int,jit,llc,custom}**
 
  Whenever the test program is compiled, **bugpoint** should generate code for it
@@ -179,8 +135,6 @@ OPTIONS
  interpreter, the JIT compiler, the static native code compiler, or a
  custom command (see **--exec-command**) respectively.
 
-
-
 **--safe-{llc,custom}**
 
  When debugging a code generator, **bugpoint** should use the specified code
@@ -192,16 +146,12 @@ OPTIONS
  respectively. The interpreter and the JIT backends cannot currently
  be used as the "safe" backends.
 
-
-
 **--exec-command** *command*
 
  This option defines the command to use with the **--run-custom** and
  **--safe-custom** options to execute the bitcode testcase. This can
  be useful for cross-compilation.
 
-
-
 **--compile-command** *command*
 
  This option defines the command to use with the **--compile-custom**
@@ -210,38 +160,28 @@ OPTIONS
  generate a reduced unit test, you may add CHECK directives to the
  testcase and pass the name of an executable compile-command script in this form:
 
-
  .. code-block:: sh
 
       #!/bin/sh
       llc "$@"
       not FileCheck [bugpoint input file].ll < bugpoint-test-program.s
 
-
  This script will "fail" as long as FileCheck passes. So the result
  will be the minimum bitcode that passes FileCheck.
 
-
-
 **--safe-path** *path*
 
  This option defines the path to the command to execute with the
  **--safe-{int,jit,llc,custom}**
  option.
 
-
-
-
 EXIT STATUS
 -----------
 
-
 If **bugpoint** succeeds in finding a problem, it will exit with 0.  Otherwise,
 if an error occurs, it will exit with a non-zero value.
 
-
 SEE ALSO
 --------
 
-
 opt|opt
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
index 73a4835dd7a1..ac8a944a2e76 100644
--- a/docs/CommandGuide/index.rst
+++ b/docs/CommandGuide/index.rst
@@ -1,5 +1,3 @@
-.. _commands:
-
 LLVM Command Guide
 ------------------
 
@@ -30,6 +28,7 @@ Basic Commands
    llvm-diff
    llvm-cov
    llvm-stress
+   llvm-symbolizer
 
 Debugging Tools
 ~~~~~~~~~~~~~~~
diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst
index 9e96cd2a4bfd..40c7646260a4 100644
--- a/docs/CommandGuide/lit.rst
+++ b/docs/CommandGuide/lit.rst
@@ -1,351 +1,278 @@
 lit - LLVM Integrated Tester
 ============================
 
-
 SYNOPSIS
 --------
 
-
-**lit** [*options*] [*tests*]
-
+:program:`lit` [*options*] [*tests*]
 
 DESCRIPTION
 -----------
 
+:program:`lit` is a portable tool for executing LLVM and Clang style test
+suites, summarizing their results, and providing indication of failures.
+:program:`lit` is designed to be a lightweight testing tool with as simple a
+user interface as possible.
 
-**lit** is a portable tool for executing LLVM and Clang style test suites,
-summarizing their results, and providing indication of failures. **lit** is
-designed to be a lightweight testing tool with as simple a user interface as
-possible.
-
-**lit** should be run with one or more *tests* to run specified on the command
-line. Tests can be either individual test files or directories to search for
-tests (see "TEST DISCOVERY").
+:program:`lit` should be run with one or more *tests* to run specified on the
+command line.  Tests can be either individual test files or directories to
+search for tests (see :ref:`test-discovery`).
 
 Each specified test will be executed (potentially in parallel) and once all
-tests have been run **lit** will print summary information on the number of tests
-which passed or failed (see "TEST STATUS RESULTS"). The **lit** program will
-execute with a non-zero exit code if any tests fail.
-
-By default **lit** will use a succinct progress display and will only print
-summary information for test failures. See "OUTPUT OPTIONS" for options
-controlling the **lit** progress display and output.
+tests have been run :program:`lit` will print summary information on the number
+of tests which passed or failed (see :ref:`test-status-results`).  The
+:program:`lit` program will execute with a non-zero exit code if any tests
+fail.
 
-**lit** also includes a number of options for controlling how tests are executed
-(specific features may depend on the particular test format). See "EXECUTION
-OPTIONS" for more information.
+By default :program:`lit` will use a succinct progress display and will only
+print summary information for test failures.  See :ref:`output-options` for
+options controlling the :program:`lit` progress display and output.
 
-Finally, **lit** also supports additional options for only running a subset of
-the options specified on the command line, see "SELECTION OPTIONS" for
-more information.
+:program:`lit` also includes a number of options for controlling how tests are
+executed (specific features may depend on the particular test format).  See
+:ref:`execution-options` for more information.
 
-Users interested in the **lit** architecture or designing a **lit** testing
-implementation should see "LIT INFRASTRUCTURE"
+Finally, :program:`lit` also supports additional options for only running a
+subset of the options specified on the command line, see
+:ref:`selection-options` for more information.
 
+Users interested in the :program:`lit` architecture or designing a
+:program:`lit` testing implementation should see :ref:`lit-infrastructure`.
 
 GENERAL OPTIONS
 ---------------
 
+.. option:: -h, --help
 
+ Show the :program:`lit` help message.
 
-**-h**, **--help**
-
- Show the **lit** help message.
-
-
-
-**-j** *N*, **--threads**\ =\ *N*
-
- Run *N* tests in parallel. By default, this is automatically chosen to match
- the number of detected available CPUs.
-
-
+.. option:: -j N, --threads=N
 
-**--config-prefix**\ =\ *NAME*
+ Run ``N`` tests in parallel.  By default, this is automatically chosen to
+ match the number of detected available CPUs.
 
- Search for *NAME.cfg* and *NAME.site.cfg* when searching for test suites,
- instead of *lit.cfg* and *lit.site.cfg*.
+.. option:: --config-prefix=NAME
 
+ Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for
+ test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`.
 
+.. option:: --param NAME, --param NAME=VALUE
 
-**--param** *NAME*, **--param** *NAME*\ =\ *VALUE*
-
- Add a user defined parameter *NAME* with the given *VALUE* (or the empty
- string if not given). The meaning and use of these parameters is test suite
+ Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty
+ string if not given).  The meaning and use of these parameters is test suite
  dependent.
 
-
-
+.. _output-options:
 
 OUTPUT OPTIONS
 --------------
 
-
-
-**-q**, **--quiet**
+.. option:: -q, --quiet
 
  Suppress any output except for test failures.
 
-
-
-**-s**, **--succinct**
+.. option:: -s, --succinct
 
  Show less output, for example don't show information on tests that pass.
 
-
-
-**-v**, **--verbose**
+.. option:: -v, --verbose
 
  Show more information on test failures, for example the entire test output
  instead of just the test result.
 
-
-
-**--no-progress-bar**
+.. option:: --no-progress-bar
 
  Do not use curses based progress bar.
 
-
-
+.. _execution-options:
 
 EXECUTION OPTIONS
 -----------------
 
+.. option:: --path=PATH
 
+ Specify an additional ``PATH`` to use when searching for executables in tests.
 
-**--path**\ =\ *PATH*
-
- Specify an addition *PATH* to use when searching for executables in tests.
-
-
-
-**--vg**
-
- Run individual tests under valgrind (using the memcheck tool). The
- *--error-exitcode* argument for valgrind is used so that valgrind failures will
- cause the program to exit with a non-zero status.
-
- When this option is enabled, **lit** will also automatically provide a
- "valgrind" feature that can be used to conditionally disable (or expect failure
- in) certain tests.
-
-
-
-**--vg-arg**\ =\ *ARG*
-
- When *--vg* is used, specify an additional argument to pass to valgrind itself.
-
+.. option:: --vg
 
+ Run individual tests under valgrind (using the memcheck tool).  The
+ ``--error-exitcode`` argument for valgrind is used so that valgrind failures
+ will cause the program to exit with a non-zero status.
 
-**--vg-leak**
+ When this option is enabled, :program:`lit` will also automatically provide a
+ "``valgrind``" feature that can be used to conditionally disable (or expect
+ failure in) certain tests.
 
- When *--vg* is used, enable memory leak checks. When this option is enabled,
- **lit** will also automatically provide a "vg_leak" feature that can be
- used to conditionally disable (or expect failure in) certain tests.
+.. option:: --vg-arg=ARG
 
+ When :option:`--vg` is used, specify an additional argument to pass to
+ :program:`valgrind` itself.
 
+.. option:: --vg-leak
 
+ When :option:`--vg` is used, enable memory leak checks.  When this option is
+ enabled, :program:`lit` will also automatically provide a "``vg_leak``"
+ feature that can be used to conditionally disable (or expect failure in)
+ certain tests.
 
-**--time-tests**
-
- Track the wall time individual tests take to execute and includes the results in
- the summary output. This is useful for determining which tests in a test suite
- take the most time to execute. Note that this option is most useful with *-j
- 1*.
-
+.. option:: --time-tests
 
+ Track the wall time individual tests take to execute and includes the results
+ in the summary output.  This is useful for determining which tests in a test
+ suite take the most time to execute.  Note that this option is most useful
+ with ``-j 1``.
 
+.. _selection-options:
 
 SELECTION OPTIONS
 -----------------
 
+.. option:: --max-tests=N
 
+ Run at most ``N`` tests and then terminate.
 
-**--max-tests**\ =\ *N*
-
- Run at most *N* tests and then terminate.
-
-
-
-**--max-time**\ =\ *N*
+.. option:: --max-time=N
 
- Spend at most *N* seconds (approximately) running tests and then terminate.
+ Spend at most ``N`` seconds (approximately) running tests and then terminate.
 
-
-
-**--shuffle**
+.. option:: --shuffle
 
  Run the tests in a random order.
 
-
-
-
 ADDITIONAL OPTIONS
 ------------------
 
+.. option:: --debug
 
+ Run :program:`lit` in debug mode, for debugging configuration issues and
+ :program:`lit` itself.
 
-**--debug**
-
- Run **lit** in debug mode, for debugging configuration issues and **lit** itself.
-
-
-
-**--show-suites**
+.. option:: --show-suites
 
  List the discovered test suites as part of the standard output.
 
+.. option:: --repeat=N
 
-
-**--no-tcl-as-sh**
-
- Run Tcl scripts internally (instead of converting to shell scripts).
-
-
-
-**--repeat**\ =\ *N*
-
- Run each test *N* times. Currently this is primarily useful for timing tests,
- other results are not collated in any reasonable fashion.
-
-
-
+ Run each test ``N`` times.  Currently this is primarily useful for timing
+ tests, other results are not collated in any reasonable fashion.
 
 EXIT STATUS
 -----------
 
-
-**lit** will exit with an exit code of 1 if there are any FAIL or XPASS
-results. Otherwise, it will exit with the status 0. Other exit codes are used
+:program:`lit` will exit with an exit code of 1 if there are any FAIL or XPASS
+results.  Otherwise, it will exit with the status 0.  Other exit codes are used
 for non-test related failures (for example a user error or an internal program
 error).
 
+.. _test-discovery:
 
 TEST DISCOVERY
 --------------
 
+The inputs passed to :program:`lit` can be either individual tests, or entire
+directories or hierarchies of tests to run.  When :program:`lit` starts up, the
+first thing it does is convert the inputs into a complete list of tests to run
+as part of *test discovery*.
 
-The inputs passed to **lit** can be either individual tests, or entire
-directories or hierarchies of tests to run. When **lit** starts up, the first
-thing it does is convert the inputs into a complete list of tests to run as part
-of *test discovery*.
-
-In the **lit** model, every test must exist inside some *test suite*. **lit**
-resolves the inputs specified on the command line to test suites by searching
-upwards from the input path until it finds a *lit.cfg* or *lit.site.cfg*
-file. These files serve as both a marker of test suites and as configuration
-files which **lit** loads in order to understand how to find and run the tests
-inside the test suite.
+In the :program:`lit` model, every test must exist inside some *test suite*.
+:program:`lit` resolves the inputs specified on the command line to test suites
+by searching upwards from the input path until it finds a :file:`lit.cfg` or
+:file:`lit.site.cfg` file.  These files serve as both a marker of test suites
+and as configuration files which :program:`lit` loads in order to understand
+how to find and run the tests inside the test suite.
 
-Once **lit** has mapped the inputs into test suites it traverses the list of
-inputs adding tests for individual files and recursively searching for tests in
-directories.
+Once :program:`lit` has mapped the inputs into test suites it traverses the
+list of inputs adding tests for individual files and recursively searching for
+tests in directories.
 
 This behavior makes it easy to specify a subset of tests to run, while still
 allowing the test suite configuration to control exactly how tests are
-interpreted. In addition, **lit** always identifies tests by the test suite they
-are in, and their relative path inside the test suite. For appropriately
-configured projects, this allows **lit** to provide convenient and flexible
-support for out-of-tree builds.
+interpreted.  In addition, :program:`lit` always identifies tests by the test
+suite they are in, and their relative path inside the test suite.  For
+appropriately configured projects, this allows :program:`lit` to provide
+convenient and flexible support for out-of-tree builds.
 
+.. _test-status-results:
 
 TEST STATUS RESULTS
 -------------------
 
-
 Each test ultimately produces one of the following six results:
 
-
 **PASS**
 
  The test succeeded.
 
-
-
 **XFAIL**
 
- The test failed, but that is expected. This is used for test formats which allow
+ The test failed, but that is expected.  This is used for test formats which allow
  specifying that a test does not currently work, but wish to leave it in the test
  suite.
 
-
-
 **XPASS**
 
- The test succeeded, but it was expected to fail. This is used for tests which
+ The test succeeded, but it was expected to fail.  This is used for tests which
  were specified as expected to fail, but are now succeeding (generally because
  the feature they test was broken and has been fixed).
 
-
-
 **FAIL**
 
  The test failed.
 
-
-
 **UNRESOLVED**
 
- The test result could not be determined. For example, this occurs when the test
+ The test result could not be determined.  For example, this occurs when the test
  could not be run, the test itself is invalid, or the test was interrupted.
 
-
-
 **UNSUPPORTED**
 
- The test is not supported in this environment. This is used by test formats
+ The test is not supported in this environment.  This is used by test formats
  which can report unsupported tests.
 
-
-
 Depending on the test format tests may produce additional information about
-their status (generally only for failures). See the Output|"OUTPUT OPTIONS"
+their status (generally only for failures).  See the :ref:`output-options`
 section for more information.
 
+.. _lit-infrastructure:
 
 LIT INFRASTRUCTURE
 ------------------
 
+This section describes the :program:`lit` testing architecture for users interested in
+creating a new :program:`lit` testing implementation, or extending an existing one.
 
-This section describes the **lit** testing architecture for users interested in
-creating a new **lit** testing implementation, or extending an existing one.
-
-**lit** proper is primarily an infrastructure for discovering and running
+:program:`lit` proper is primarily an infrastructure for discovering and running
 arbitrary tests, and to expose a single convenient interface to these
-tests. **lit** itself doesn't know how to run tests, rather this logic is
+tests. :program:`lit` itself doesn't know how to run tests, rather this logic is
 defined by *test suites*.
 
 TEST SUITES
 ~~~~~~~~~~~
 
-
-As described in "TEST DISCOVERY", tests are always located inside a *test
-suite*. Test suites serve to define the format of the tests they contain, the
+As described in :ref:`test-discovery`, tests are always located inside a *test
+suite*.  Test suites serve to define the format of the tests they contain, the
 logic for finding those tests, and any additional information to run the tests.
 
-**lit** identifies test suites as directories containing *lit.cfg* or
-*lit.site.cfg* files (see also **--config-prefix**). Test suites are initially
-discovered by recursively searching up the directory hierarchy for all the input
-files passed on the command line. You can use **--show-suites** to display the
-discovered test suites at startup.
+:program:`lit` identifies test suites as directories containing ``lit.cfg`` or
+``lit.site.cfg`` files (see also :option:`--config-prefix`).  Test suites are
+initially discovered by recursively searching up the directory hierarchy for
+all the input files passed on the command line.  You can use
+:option:`--show-suites` to display the discovered test suites at startup.
 
-Once a test suite is discovered, its config file is loaded. Config files
-themselves are Python modules which will be executed. When the config file is
+Once a test suite is discovered, its config file is loaded.  Config files
+themselves are Python modules which will be executed.  When the config file is
 executed, two important global variables are predefined:
 
-
 **lit**
 
  The global **lit** configuration object (a *LitConfig* instance), which defines
  the builtin test formats, global configuration parameters, and other helper
  routines for implementing test configurations.
 
-
-
 **config**
 
  This is the config object (a *TestingConfig* instance) for the test suite,
- which the config file is expected to populate. The following variables are also
+ which the config file is expected to populate.  The following variables are also
  available on the *config* object, some of which must be set by the config and
  others are optional or predefined:
 
@@ -353,135 +280,132 @@ executed, two important global variables are predefined:
  diagnostics.
 
  **test_format** *[required]* The test format object which will be used to
- discover and run tests in the test suite. Generally this will be a builtin test
+ discover and run tests in the test suite.  Generally this will be a builtin test
  format available from the *lit.formats* module.
 
- **test_src_root** The filesystem path to the test suite root. For out-of-dir
+ **test_src_root** The filesystem path to the test suite root.  For out-of-dir
  builds this is the directory that will be scanned for tests.
 
  **test_exec_root** For out-of-dir builds, the path to the test suite root inside
- the object directory. This is where tests will be run and temporary output files
+ the object directory.  This is where tests will be run and temporary output files
  placed.
 
  **environment** A dictionary representing the environment to use when executing
  tests in the suite.
 
  **suffixes** For **lit** test formats which scan directories for tests, this
- variable is a list of suffixes to identify test files. Used by: *ShTest*,
- *TclTest*.
+ variable is a list of suffixes to identify test files.  Used by: *ShTest*.
 
  **substitutions** For **lit** test formats which substitute variables into a test
- script, the list of substitutions to perform. Used by: *ShTest*, *TclTest*.
+ script, the list of substitutions to perform.  Used by: *ShTest*.
 
  **unsupported** Mark an unsupported directory, all tests within it will be
- reported as unsupported. Used by: *ShTest*, *TclTest*.
+ reported as unsupported.  Used by: *ShTest*.
 
  **parent** The parent configuration, this is the config object for the directory
  containing the test suite, or None.
 
- **root** The root configuration. This is the top-most **lit** configuration in
+ **root** The root configuration.  This is the top-most :program:`lit` configuration in
  the project.
 
  **on_clone** The config is actually cloned for every subdirectory inside a test
- suite, to allow local configuration on a per-directory basis. The *on_clone*
+ suite, to allow local configuration on a per-directory basis.  The *on_clone*
  variable can be set to a Python function which will be called whenever a
- configuration is cloned (for a subdirectory). The function should takes three
+ configuration is cloned (for a subdirectory).  The function should takes three
  arguments: (1) the parent configuration, (2) the new configuration (which the
  *on_clone* function will generally modify), and (3) the test path to the new
  directory being scanned.
 
-
-
-
 TEST DISCOVERY
 ~~~~~~~~~~~~~~
 
-
-Once test suites are located, **lit** recursively traverses the source directory
-(following *test_src_root*) looking for tests. When **lit** enters a
-sub-directory, it first checks to see if a nested test suite is defined in that
-directory. If so, it loads that test suite recursively, otherwise it
-instantiates a local test config for the directory (see "LOCAL CONFIGURATION
-FILES").
+Once test suites are located, :program:`lit` recursively traverses the source
+directory (following *test_src_root*) looking for tests.  When :program:`lit`
+enters a sub-directory, it first checks to see if a nested test suite is
+defined in that directory.  If so, it loads that test suite recursively,
+otherwise it instantiates a local test config for the directory (see
+:ref:`local-configuration-files`).
 
 Tests are identified by the test suite they are contained within, and the
-relative path inside that suite. Note that the relative path may not refer to an
-actual file on disk; some test formats (such as *GoogleTest*) define "virtual
-tests" which have a path that contains both the path to the actual test file and
-a subpath to identify the virtual test.
+relative path inside that suite.  Note that the relative path may not refer to
+an actual file on disk; some test formats (such as *GoogleTest*) define
+"virtual tests" which have a path that contains both the path to the actual
+test file and a subpath to identify the virtual test.
 
+.. _local-configuration-files:
 
 LOCAL CONFIGURATION FILES
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-When **lit** loads a subdirectory in a test suite, it instantiates a local test
-configuration by cloning the configuration for the parent direction -- the root
-of this configuration chain will always be a test suite. Once the test
-configuration is cloned **lit** checks for a *lit.local.cfg* file in the
-subdirectory. If present, this file will be loaded and can be used to specialize
-the configuration for each individual directory. This facility can be used to
-define subdirectories of optional tests, or to change other configuration
-parameters -- for example, to change the test format, or the suffixes which
-identify test files.
-
+When :program:`lit` loads a subdirectory in a test suite, it instantiates a
+local test configuration by cloning the configuration for the parent direction
+--- the root of this configuration chain will always be a test suite.  Once the
+test configuration is cloned :program:`lit` checks for a *lit.local.cfg* file
+in the subdirectory.  If present, this file will be loaded and can be used to
+specialize the configuration for each individual directory.  This facility can
+be used to define subdirectories of optional tests, or to change other
+configuration parameters --- for example, to change the test format, or the
+suffixes which identify test files.
 
 TEST RUN OUTPUT FORMAT
 ~~~~~~~~~~~~~~~~~~~~~~
 
-
-The b<lit> output for a test run conforms to the following schema, in both short
-and verbose modes (although in short mode no PASS lines will be shown). This
-schema has been chosen to be relatively easy to reliably parse by a machine (for
-example in buildbot log scraping), and for other tools to generate.
+The :program:`lit` output for a test run conforms to the following schema, in
+both short and verbose modes (although in short mode no PASS lines will be
+shown).  This schema has been chosen to be relatively easy to reliably parse by
+a machine (for example in buildbot log scraping), and for other tools to
+generate.
 
 Each test result is expected to appear on a line that matches:
 
-<result code>: <test name> (<progress info>)
+.. code-block:: none
+
+  <result code>: <test name> (<progress info>)
 
-where <result-code> is a standard test result such as PASS, FAIL, XFAIL, XPASS,
-UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and
+where ``<result-code>`` is a standard test result such as PASS, FAIL, XFAIL,
+XPASS, UNRESOLVED, or UNSUPPORTED.  The performance result codes of IMPROVED and
 REGRESSED are also allowed.
 
-The <test name> field can consist of an arbitrary string containing no newline.
+The ``<test name>`` field can consist of an arbitrary string containing no
+newline.
 
-The <progress info> field can be used to report progress information such as
-(1/300) or can be empty, but even when empty the parentheses are required.
+The ``<progress info>`` field can be used to report progress information such
+as (1/300) or can be empty, but even when empty the parentheses are required.
 
 Each test result may include additional (multiline) log information in the
-following format.
+following format:
+
+.. code-block:: none
 
-<log delineator> TEST '(<test name>)' <trailing delineator>
-... log message ...
-<log delineator>
+  <log delineator> TEST '(<test name>)' <trailing delineator>
+  ... log message ...
+  <log delineator>
 
-where <test name> should be the name of a preceding reported test, <log
-delineator> is a string of '\*' characters *at least* four characters long (the
-recommended length is 20), and <trailing delineator> is an arbitrary (unparsed)
-string.
+where ``<test name>`` should be the name of a preceding reported test, ``<log
+delineator>`` is a string of "*" characters *at least* four characters long
+(the recommended length is 20), and ``<trailing delineator>`` is an arbitrary
+(unparsed) string.
 
 The following is an example of a test run output which consists of four tests A,
-B, C, and D, and a log message for the failing test C::
+B, C, and D, and a log message for the failing test C:
+
+.. code-block:: none
 
   PASS: A (1 of 4)
   PASS: B (2 of 4)
   FAIL: C (3 of 4)
-  \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* TEST 'C' FAILED \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*
+  ******************** TEST 'C' FAILED ********************
   Test 'C' failed as a result of exit code 1.
-  \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*
+  ********************
   PASS: D (4 of 4)
 
-
 LIT EXAMPLE TESTS
 ~~~~~~~~~~~~~~~~~
 
-
-The **lit** distribution contains several example implementations of test suites
-in the *ExampleTests* directory.
-
+The :program:`lit` distribution contains several example implementations of
+test suites in the *ExampleTests* directory.
 
 SEE ALSO
 --------
 
-
 valgrind(1)
diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst
index 6f1c486c3f42..70354b0343e5 100644
--- a/docs/CommandGuide/llc.rst
+++ b/docs/CommandGuide/llc.rst
@@ -1,251 +1,187 @@
 llc - LLVM static compiler
 ==========================
 
-
 SYNOPSIS
 --------
 
-
-**llc** [*options*] [*filename*]
-
+:program:`llc` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
-
-The **llc** command compiles LLVM source inputs into assembly language for a
-specified architecture.  The assembly language output can then be passed through
-a native assembler and linker to generate a native executable.
+The :program:`llc` command compiles LLVM source inputs into assembly language
+for a specified architecture.  The assembly language output can then be passed
+through a native assembler and linker to generate a native executable.
 
 The choice of architecture for the output assembly code is automatically
-determined from the input file, unless the **-march** option is used to override
-the default.
-
+determined from the input file, unless the :option:`-march` option is used to
+override the default.
 
 OPTIONS
 -------
 
+If ``filename`` is "``-``" or omitted, :program:`llc` reads from standard input.
+Otherwise, it will from ``filename``.  Inputs can be in either the LLVM assembly
+language format (``.ll``) or the LLVM bitcode format (``.bc``).
 
-If *filename* is - or omitted, **llc** reads from standard input.  Otherwise, it
-will from *filename*.  Inputs can be in either the LLVM assembly language
-format (.ll) or the LLVM bitcode format (.bc).
+If the :option:`-o` option is omitted, then :program:`llc` will send its output
+to standard output if the input is from standard input.  If the :option:`-o`
+option specifies "``-``", then the output will also be sent to standard output.
 
-If the **-o** option is omitted, then **llc** will send its output to standard
-output if the input is from standard input.  If the **-o** option specifies -,
-then the output will also be sent to standard output.
+If no :option:`-o` option is specified and an input file other than "``-``" is
+specified, then :program:`llc` creates the output filename by taking the input
+filename, removing any existing ``.bc`` extension, and adding a ``.s`` suffix.
 
-If no **-o** option is specified and an input file other than - is specified,
-then **llc** creates the output filename by taking the input filename,
-removing any existing *.bc* extension, and adding a *.s* suffix.
-
-Other **llc** options are as follows:
+Other :program:`llc` options are described below.
 
 End-user Options
 ~~~~~~~~~~~~~~~~
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -O=uint
 
+ Generate code at different optimization levels.  These correspond to the
+ ``-O0``, ``-O1``, ``-O2``, and ``-O3`` optimization levels used by
+ :program:`llvm-gcc` and :program:`clang`.
 
-**-O**\ =\ *uint*
-
- Generate code at different optimization levels. These correspond to the *-O0*,
- *-O1*, *-O2*, and *-O3* optimization levels used by **llvm-gcc** and
- **clang**.
-
-
-
-**-mtriple**\ =\ *target triple*
+.. option:: -mtriple=<target triple>
 
  Override the target triple specified in the input file with the specified
  string.
 
-
-
-**-march**\ =\ *arch*
+.. option:: -march=<arch>
 
  Specify the architecture for which to generate assembly, overriding the target
- encoded in the input file.  See the output of **llc -help** for a list of
+ encoded in the input file.  See the output of ``llc -help`` for a list of
  valid architectures.  By default this is inferred from the target triple or
  autodetected to the current architecture.
 
-
-
-**-mcpu**\ =\ *cpuname*
+.. option:: -mcpu=<cpuname>
 
  Specify a specific chip in the current architecture to generate code for.
  By default this is inferred from the target triple and autodetected to
  the current architecture.  For a list of available CPUs, use:
- **llvm-as < /dev/null | llc -march=xyz -mcpu=help**
 
+ .. code-block:: none
 
+   llvm-as < /dev/null | llc -march=xyz -mcpu=help
 
-**-mattr**\ =\ *a1,+a2,-a3,...*
+.. option:: -mattr=a1,+a2,-a3,...
 
  Override or control specific attributes of the target, such as whether SIMD
  operations are enabled or not.  The default set of attributes is set by the
  current CPU.  For a list of available attributes, use:
- **llvm-as < /dev/null | llc -march=xyz -mattr=help**
 
+ .. code-block:: none
 
+   llvm-as < /dev/null | llc -march=xyz -mattr=help
 
-**--disable-fp-elim**
+.. option:: --disable-fp-elim
 
  Disable frame pointer elimination optimization.
 
-
-
-**--disable-excess-fp-precision**
+.. option:: --disable-excess-fp-precision
 
  Disable optimizations that may produce excess precision for floating point.
  Note that this option can dramatically slow down code on some systems
  (e.g. X86).
 
-
-
-**--enable-no-infs-fp-math**
+.. option:: --enable-no-infs-fp-math
 
  Enable optimizations that assume no Inf values.
 
-
-
-**--enable-no-nans-fp-math**
+.. option:: --enable-no-nans-fp-math
 
  Enable optimizations that assume no NAN values.
 
-
-
-**--enable-unsafe-fp-math**
+.. option:: --enable-unsafe-fp-math
 
  Enable optimizations that make unsafe assumptions about IEEE math (e.g. that
  addition is associative) or may not work for all input ranges.  These
  optimizations allow the code generator to make use of some instructions which
- would otherwise not be usable (such as fsin on X86).
-
+ would otherwise not be usable (such as ``fsin`` on X86).
 
+.. option:: --enable-correct-eh-support
 
-**--enable-correct-eh-support**
+ Instruct the **lowerinvoke** pass to insert code for correct exception
+ handling support.  This is expensive and is by default omitted for efficiency.
 
- Instruct the **lowerinvoke** pass to insert code for correct exception handling
- support.  This is expensive and is by default omitted for efficiency.
-
-
-
-**--stats**
+.. option:: --stats
 
  Print statistics recorded by code-generation passes.
 
-
-
-**--time-passes**
+.. option:: --time-passes
 
  Record the amount of time needed for each pass and print a report to standard
  error.
 
+.. option:: --load=<dso_path>
 
-
-**--load**\ =\ *dso_path*
-
- Dynamically load *dso_path* (a path to a dynamically shared object) that
- implements an LLVM target. This will permit the target name to be used with the
- **-march** option so that code can be generated for that target.
-
-
-
+ Dynamically load ``dso_path`` (a path to a dynamically shared object) that
+ implements an LLVM target.  This will permit the target name to be used with
+ the :option:`-march` option so that code can be generated for that target.
 
 Tuning/Configuration Options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-
-**--print-machineinstrs**
+.. option:: --print-machineinstrs
 
  Print generated machine code between compilation phases (useful for debugging).
 
+.. option:: --regalloc=<allocator>
 
-
-**--regalloc**\ =\ *allocator*
-
- Specify the register allocator to use. The default *allocator* is *local*.
+ Specify the register allocator to use.  The default ``allocator`` is *local*.
  Valid register allocators are:
 
-
  *simple*
 
   Very simple "always spill" register allocator
 
-
-
  *local*
 
   Local register allocator
 
-
-
  *linearscan*
 
   Linear scan global register allocator
 
-
-
  *iterativescan*
 
   Iterative scan global register allocator
 
-
-
-
-
-**--spiller**\ =\ *spiller*
+.. option:: --spiller=<spiller>
 
  Specify the spiller to use for register allocators that support it.  Currently
- this option is used only by the linear scan register allocator. The default
- *spiller* is *local*.  Valid spillers are:
-
+ this option is used only by the linear scan register allocator.  The default
+ ``spiller`` is *local*.  Valid spillers are:
 
  *simple*
 
   Simple spiller
 
-
-
  *local*
 
   Local spiller
 
-
-
-
-
-
 Intel IA-32-specific Options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. option:: --x86-asm-syntax=[att|intel]
 
-
-**--x86-asm-syntax=att|intel**
-
- Specify whether to emit assembly code in AT&T syntax (the default) or intel
+ Specify whether to emit assembly code in AT&T syntax (the default) or Intel
  syntax.
 
-
-
-
-
 EXIT STATUS
 -----------
 
-
-If **llc** succeeds, it will exit with 0.  Otherwise, if an error occurs,
-it will exit with a non-zero value.
-
+If :program:`llc` succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
 
 SEE ALSO
 --------
 
+lli
 
-lli|lli
diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst
index 7cc128444dac..a9aaf310e1f3 100644
--- a/docs/CommandGuide/lli.rst
+++ b/docs/CommandGuide/lli.rst
@@ -50,7 +50,7 @@ GENERAL OPTIONS
 
 
 
-**-load**\ =\ *puginfilename*
+**-load**\ =\ *pluginfilename*
 
  Causes **lli** to load the plugin (shared object) named *pluginfilename* and use
  it for optimization.
diff --git a/docs/CommandGuide/llvm-bcanalyzer.rst b/docs/CommandGuide/llvm-bcanalyzer.rst
index f1e4eac1be50..7254088ec946 100644
--- a/docs/CommandGuide/llvm-bcanalyzer.rst
+++ b/docs/CommandGuide/llvm-bcanalyzer.rst
@@ -1,424 +1,305 @@
 llvm-bcanalyzer - LLVM bitcode analyzer
 =======================================
 
-
 SYNOPSIS
 --------
 
-
-**llvm-bcanalyzer** [*options*] [*filename*]
-
+:program:`llvm-bcanalyzer` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
+The :program:`llvm-bcanalyzer` command is a small utility for analyzing bitcode
+files.  The tool reads a bitcode file (such as generated with the
+:program:`llvm-as` tool) and produces a statistical report on the contents of
+the bitcode file.  The tool can also dump a low level but human readable
+version of the bitcode file.  This tool is probably not of much interest or
+utility except for those working directly with the bitcode file format.  Most
+LLVM users can just ignore this tool.
 
-The **llvm-bcanalyzer** command is a small utility for analyzing bitcode files.
-The tool reads a bitcode file (such as generated with the **llvm-as** tool) and
-produces a statistical report on the contents of the bitcode file.  The tool
-can also dump a low level but human readable version of the bitcode file.
-This tool is probably not of much interest or utility except for those working
-directly with the bitcode file format. Most LLVM users can just ignore
-this tool.
-
-If *filename* is omitted or is ``-``, then **llvm-bcanalyzer** reads its input
-from standard input. This is useful for combining the tool into a pipeline.
-Output is written to the standard output.
-
+If *filename* is omitted or is ``-``, then :program:`llvm-bcanalyzer` reads its
+input from standard input.  This is useful for combining the tool into a
+pipeline.  Output is written to the standard output.
 
 OPTIONS
 -------
 
+.. program:: llvm-bcanalyzer
 
+.. option:: -nodetails
 
-**-nodetails**
-
- Causes **llvm-bcanalyzer** to abbreviate its output by writing out only a module
- level summary. The details for individual functions are not displayed.
-
-
+ Causes :program:`llvm-bcanalyzer` to abbreviate its output by writing out only
+ a module level summary.  The details for individual functions are not
+ displayed.
 
-**-dump**
+.. option:: -dump
 
- Causes **llvm-bcanalyzer** to dump the bitcode in a human readable format. This
- format is significantly different from LLVM assembly and provides details about
- the encoding of the bitcode file.
+ Causes :program:`llvm-bcanalyzer` to dump the bitcode in a human readable
+ format.  This format is significantly different from LLVM assembly and
+ provides details about the encoding of the bitcode file.
 
+.. option:: -verify
 
-
-**-verify**
-
- Causes **llvm-bcanalyzer** to verify the module produced by reading the
- bitcode. This ensures that the statistics generated are based on a consistent
+ Causes :program:`llvm-bcanalyzer` to verify the module produced by reading the
+ bitcode.  This ensures that the statistics generated are based on a consistent
  module.
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **llvm-bcanalyzer** succeeds, it will exit with 0.  Otherwise, if an error
-occurs, it will exit with a non-zero value, usually 1.
-
+If :program:`llvm-bcanalyzer` succeeds, it will exit with 0.  Otherwise, if an
+error occurs, it will exit with a non-zero value, usually 1.
 
 SUMMARY OUTPUT DEFINITIONS
 --------------------------
 
-
-The following items are always printed by llvm-bcanalyzer. They comprize the
+The following items are always printed by llvm-bcanalyzer.  They comprize the
 summary output.
 
-
 **Bitcode Analysis Of Module**
 
  This just provides the name of the module for which bitcode analysis is being
  generated.
 
-
-
 **Bitcode Version Number**
 
  The bitcode version (not LLVM version) of the file read by the analyzer.
 
-
-
 **File Size**
 
  The size, in bytes, of the entire bitcode file.
 
-
-
 **Module Bytes**
 
- The size, in bytes, of the module block. Percentage is relative to File Size.
-
-
+ The size, in bytes, of the module block.  Percentage is relative to File Size.
 
 **Function Bytes**
 
- The size, in bytes, of all the function blocks. Percentage is relative to File
+ The size, in bytes, of all the function blocks.  Percentage is relative to File
  Size.
 
-
-
 **Global Types Bytes**
 
- The size, in bytes, of the Global Types Pool. Percentage is relative to File
- Size. This is the size of the definitions of all types in the bitcode file.
-
-
+ The size, in bytes, of the Global Types Pool.  Percentage is relative to File
+ Size.  This is the size of the definitions of all types in the bitcode file.
 
 **Constant Pool Bytes**
 
  The size, in bytes, of the Constant Pool Blocks Percentage is relative to File
  Size.
 
-
-
 **Module Globals Bytes**
 
  Ths size, in bytes, of the Global Variable Definitions and their initializers.
  Percentage is relative to File Size.
 
-
-
 **Instruction List Bytes**
 
  The size, in bytes, of all the instruction lists in all the functions.
- Percentage is relative to File Size. Note that this value is also included in
+ Percentage is relative to File Size.  Note that this value is also included in
  the Function Bytes.
 
-
-
 **Compaction Table Bytes**
 
  The size, in bytes, of all the compaction tables in all the functions.
- Percentage is relative to File Size. Note that this value is also included in
+ Percentage is relative to File Size.  Note that this value is also included in
  the Function Bytes.
 
-
-
 **Symbol Table Bytes**
 
- The size, in bytes, of all the symbol tables in all the functions. Percentage is
- relative to File Size. Note that this value is also included in the Function
+ The size, in bytes, of all the symbol tables in all the functions.  Percentage is
+ relative to File Size.  Note that this value is also included in the Function
  Bytes.
 
-
-
 **Dependent Libraries Bytes**
 
- The size, in bytes, of the list of dependent libraries in the module. Percentage
- is relative to File Size. Note that this value is also included in the Module
+ The size, in bytes, of the list of dependent libraries in the module.  Percentage
+ is relative to File Size.  Note that this value is also included in the Module
  Global Bytes.
 
-
-
 **Number Of Bitcode Blocks**
 
  The total number of blocks of any kind in the bitcode file.
 
-
-
 **Number Of Functions**
 
  The total number of function definitions in the bitcode file.
 
-
-
 **Number Of Types**
 
  The total number of types defined in the Global Types Pool.
 
-
-
 **Number Of Constants**
 
  The total number of constants (of any type) defined in the Constant Pool.
 
-
-
 **Number Of Basic Blocks**
 
  The total number of basic blocks defined in all functions in the bitcode file.
 
-
-
 **Number Of Instructions**
 
  The total number of instructions defined in all functions in the bitcode file.
 
-
-
 **Number Of Long Instructions**
 
  The total number of long instructions defined in all functions in the bitcode
- file. Long instructions are those taking greater than 4 bytes. Typically long
+ file.  Long instructions are those taking greater than 4 bytes.  Typically long
  instructions are GetElementPtr with several indices, PHI nodes, and calls to
  functions with large numbers of arguments.
 
-
-
 **Number Of Operands**
 
  The total number of operands used in all instructions in the bitcode file.
 
-
-
 **Number Of Compaction Tables**
 
  The total number of compaction tables in all functions in the bitcode file.
 
-
-
 **Number Of Symbol Tables**
 
  The total number of symbol tables in all functions in the bitcode file.
 
-
-
 **Number Of Dependent Libs**
 
  The total number of dependent libraries found in the bitcode file.
 
-
-
 **Total Instruction Size**
 
  The total size of the instructions in all functions in the bitcode file.
 
-
-
 **Average Instruction Size**
 
  The average number of bytes per instruction across all functions in the bitcode
- file. This value is computed by dividing Total Instruction Size by Number Of
+ file.  This value is computed by dividing Total Instruction Size by Number Of
  Instructions.
 
-
-
 **Maximum Type Slot Number**
 
- The maximum value used for a type's slot number. Larger slot number values take
+ The maximum value used for a type's slot number.  Larger slot number values take
  more bytes to encode.
 
-
-
 **Maximum Value Slot Number**
 
- The maximum value used for a value's slot number. Larger slot number values take
+ The maximum value used for a value's slot number.  Larger slot number values take
  more bytes to encode.
 
-
-
 **Bytes Per Value**
 
- The average size of a Value definition (of any type). This is computed by
+ The average size of a Value definition (of any type).  This is computed by
  dividing File Size by the total number of values of any type.
 
-
-
 **Bytes Per Global**
 
  The average size of a global definition (constants and global variables).
 
-
-
 **Bytes Per Function**
 
- The average number of bytes per function definition. This is computed by
+ The average number of bytes per function definition.  This is computed by
  dividing Function Bytes by Number Of Functions.
 
-
-
 **# of VBR 32-bit Integers**
 
  The total number of 32-bit integers encoded using the Variable Bit Rate
  encoding scheme.
 
-
-
 **# of VBR 64-bit Integers**
 
  The total number of 64-bit integers encoded using the Variable Bit Rate encoding
  scheme.
 
-
-
 **# of VBR Compressed Bytes**
 
  The total number of bytes consumed by the 32-bit and 64-bit integers that use
  the Variable Bit Rate encoding scheme.
 
-
-
 **# of VBR Expanded Bytes**
 
  The total number of bytes that would have been consumed by the 32-bit and 64-bit
  integers had they not been compressed with the Variable Bit Rage encoding
  scheme.
 
-
-
 **Bytes Saved With VBR**
 
  The total number of bytes saved by using the Variable Bit Rate encoding scheme.
  The percentage is relative to # of VBR Expanded Bytes.
 
-
-
-
 DETAILED OUTPUT DEFINITIONS
 ---------------------------
 
-
 The following definitions occur only if the -nodetails option was not given.
 The detailed output provides additional information on a per-function basis.
 
-
 **Type**
 
  The type signature of the function.
 
-
-
 **Byte Size**
 
  The total number of bytes in the function's block.
 
-
-
 **Basic Blocks**
 
  The number of basic blocks defined by the function.
 
-
-
 **Instructions**
 
  The number of instructions defined by the function.
 
-
-
 **Long Instructions**
 
  The number of instructions using the long instruction format in the function.
 
-
-
 **Operands**
 
  The number of operands used by all instructions in the function.
 
-
-
 **Instruction Size**
 
  The number of bytes consumed by instructions in the function.
 
-
-
 **Average Instruction Size**
 
- The average number of bytes consumed by the instructions in the function. This
- value is computed by dividing Instruction Size by Instructions.
-
-
+ The average number of bytes consumed by the instructions in the function.
+ This value is computed by dividing Instruction Size by Instructions.
 
 **Bytes Per Instruction**
 
- The average number of bytes used by the function per instruction. This value is
- computed by dividing Byte Size by Instructions. Note that this is not the same
- as Average Instruction Size. It computes a number relative to the total function
- size not just the size of the instruction list.
-
-
+ The average number of bytes used by the function per instruction.  This value
+ is computed by dividing Byte Size by Instructions.  Note that this is not the
+ same as Average Instruction Size.  It computes a number relative to the total
+ function size not just the size of the instruction list.
 
 **Number of VBR 32-bit Integers**
 
  The total number of 32-bit integers found in this function (for any use).
 
-
-
 **Number of VBR 64-bit Integers**
 
  The total number of 64-bit integers found in this function (for any use).
 
-
-
 **Number of VBR Compressed Bytes**
 
  The total number of bytes in this function consumed by the 32-bit and 64-bit
  integers that use the Variable Bit Rate encoding scheme.
 
-
-
 **Number of VBR Expanded Bytes**
 
  The total number of bytes in this function that would have been consumed by
  the 32-bit and 64-bit integers had they not been compressed with the Variable
  Bit Rate encoding scheme.
 
-
-
 **Bytes Saved With VBR**
 
  The total number of bytes saved in this function by using the Variable Bit
- Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes.
-
-
-
+ Rate encoding scheme.  The percentage is relative to # of VBR Expanded Bytes.
 
 SEE ALSO
 --------
 
+:doc:`/CommandGuide/llvm-dis`, :doc:`/BitCodeFormat`
 
-llvm-dis|llvm-dis, `http://llvm.org/docs/BitCodeFormat.html <http://llvm.org/docs/BitCodeFormat.html>`_
diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst
index 09275f6af714..524f24087f21 100644
--- a/docs/CommandGuide/llvm-cov.rst
+++ b/docs/CommandGuide/llvm-cov.rst
@@ -1,51 +1,39 @@
 llvm-cov - emit coverage information
 ====================================
 
-
 SYNOPSIS
 --------
 
-
-**llvm-cov** [-gcno=filename] [-gcda=filename] [dump]
-
+:program:`llvm-cov` [-gcno=filename] [-gcda=filename] [dump]
 
 DESCRIPTION
 -----------
 
-
-The experimental **llvm-cov** tool reads in description file generated by compiler
-and coverage data file generated by instrumented program. This program assumes
-that the description and data file uses same format as gcov files.
-
+The experimental :program:`llvm-cov` tool reads in description file generated
+by compiler and coverage data file generated by instrumented program.  This
+program assumes that the description and data file uses same format as gcov
+files.
 
 OPTIONS
 -------
 
+.. option:: -gcno=filename
 
+ This option selects input description file generated by compiler while
+ instrumenting program.
 
-**-gcno=filename]**
-
- This option selects input description file generated by compiler while instrumenting
- program.
-
-
-
-**-gcda=filename]**
+.. option:: -gcda=filename
 
  This option selects coverage data file generated by instrumented compiler.
 
+.. option:: -dump
 
-
-**-dump**
-
- This options enables output dump that is suitable for a developer to help debug
- **llvm-cov** itself.
-
-
-
+ This options enables output dump that is suitable for a developer to help
+ debug :program:`llvm-cov` itself.
 
 EXIT STATUS
 -----------
 
+:program:`llvm-cov` returns 1 if it cannot read input files.  Otherwise, it
+exits with zero.
 
-**llvm-cov** returns 1 if it cannot read input files. Otherwise, it exits with zero.
diff --git a/docs/CommandGuide/llvm-link.rst b/docs/CommandGuide/llvm-link.rst
index 63019d7cca78..3bcfa68c2599 100644
--- a/docs/CommandGuide/llvm-link.rst
+++ b/docs/CommandGuide/llvm-link.rst
@@ -1,96 +1,56 @@
-llvm-link - LLVM linker
-=======================
-
+llvm-link - LLVM bitcode linker
+===============================
 
 SYNOPSIS
 --------
 
-
-**llvm-link** [*options*] *filename ...*
-
+:program:`llvm-link` [*options*] *filename ...*
 
 DESCRIPTION
 -----------
 
-
-**llvm-link** takes several LLVM bitcode files and links them together into a
-single LLVM bitcode file.  It writes the output file to standard output, unless
-the **-o** option is used to specify a filename.
-
-**llvm-link** attempts to load the input files from the current directory.  If
-that fails, it looks for each file in each of the directories specified by the
-**-L** options on the command line.  The library search paths are global; each
-one is searched for every input file if necessary.  The directories are searched
-in the order they were specified on the command line.
-
+:program:`llvm-link` takes several LLVM bitcode files and links them together
+into a single LLVM bitcode file.  It writes the output file to standard output,
+unless the :option:`-o` option is used to specify a filename.
 
 OPTIONS
 -------
 
+.. option:: -f
 
+ Enable binary output on terminals.  Normally, :program:`llvm-link` will refuse
+ to write raw bitcode output if the output stream is a terminal. With this
+ option, :program:`llvm-link` will write raw bitcode regardless of the output
+ device.
 
-**-L** *directory*
-
- Add the specified *directory* to the library search path.  When looking for
- libraries, **llvm-link** will look in path name for libraries.  This option can be
- specified multiple times; **llvm-link** will search inside these directories in
- the order in which they were specified on the command line.
-
-
-
-**-f**
-
- Enable binary output on terminals.  Normally, **llvm-link** will refuse to
- write raw bitcode output if the output stream is a terminal. With this option,
- **llvm-link** will write raw bitcode regardless of the output device.
-
-
-
-**-o** *filename*
-
- Specify the output file name.  If *filename* is ``-``, then **llvm-link** will
- write its output to standard output.
+.. option:: -o filename
 
+ Specify the output file name.  If ``filename`` is "``-``", then
+ :program:`llvm-link` will write its output to standard output.
 
-
-**-S**
+.. option:: -S
 
  Write output in LLVM intermediate language (instead of bitcode).
 
+.. option:: -d
 
+ If specified, :program:`llvm-link` prints a human-readable version of the
+ output bitcode file to standard error.
 
-**-d**
-
- If specified, **llvm-link** prints a human-readable version of the output
- bitcode file to standard error.
-
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -v
 
-
-**-v**
-
- Verbose mode.  Print information about what **llvm-link** is doing.  This
- typically includes a message for each bitcode file linked in and for each
+ Verbose mode.  Print information about what :program:`llvm-link` is doing.
+ This typically includes a message for each bitcode file linked in and for each
  library found.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **llvm-link** succeeds, it will exit with 0.  Otherwise, if an error
+If :program:`llvm-link` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
 
 
-SEE ALSO
---------
-
-
-gccld|gccld
diff --git a/docs/CommandGuide/llvm-stress.rst b/docs/CommandGuide/llvm-stress.rst
index 44aa32c7557f..fb006f562b12 100644
--- a/docs/CommandGuide/llvm-stress.rst
+++ b/docs/CommandGuide/llvm-stress.rst
@@ -1,48 +1,34 @@
 llvm-stress - generate random .ll files
 =======================================
 
-
 SYNOPSIS
 --------
 
-
-**llvm-stress** [-size=filesize] [-seed=initialseed] [-o=outfile]
-
+:program:`llvm-stress` [-size=filesize] [-seed=initialseed] [-o=outfile]
 
 DESCRIPTION
 -----------
 
-
-The **llvm-stress** tool is used to generate random .ll files that can be used to
-test different components of LLVM.
-
+The :program:`llvm-stress` tool is used to generate random ``.ll`` files that
+can be used to test different components of LLVM.
 
 OPTIONS
 -------
 
-
-
-**-o** *filename*
+.. option:: -o filename
 
  Specify the output filename.
 
+.. option:: -size size
 
+ Specify the size of the generated ``.ll`` file.
 
-**-size** *size*
-
- Specify the size of the generated .ll file.
-
-
-
-**-seed** *seed*
+.. option:: -seed seed
 
  Specify the seed to be used for the randomly generated instructions.
 
-
-
-
 EXIT STATUS
 -----------
 
+:program:`llvm-stress` returns 0.
 
-**llvm-stress** returns 0.
diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst
new file mode 100644
index 000000000000..73babb1e5c55
--- /dev/null
+++ b/docs/CommandGuide/llvm-symbolizer.rst
@@ -0,0 +1,65 @@
+llvm-symbolizer - convert addresses into source code locations
+==============================================================
+
+SYNOPSIS
+--------
+
+:program:`llvm-symbolizer` [options]
+
+DESCRIPTION
+-----------
+
+:program:`llvm-symbolizer` reads object file names and addresses from standard
+input and prints corresponding source code locations to standard output. This
+program uses debug info sections and symbol table in the object files.
+
+EXAMPLE
+--------
+
+.. code-block:: console
+
+  $ cat addr.txt
+  a.out 0x4004f4
+  /tmp/b.out 0x400528
+  /tmp/c.so 0x710
+  $ llvm-symbolizer < addr.txt
+  main
+  /tmp/a.cc:4
+  
+  f(int, int)
+  /tmp/b.cc:11
+
+  h_inlined_into_g
+  /tmp/header.h:2
+  g_inlined_into_f
+  /tmp/header.h:7
+  f_inlined_into_main
+  /tmp/source.cc:3
+  main
+  /tmp/source.cc:8
+
+OPTIONS
+-------
+
+.. option:: -functions
+
+  Print function names as well as source file/line locations. Defaults to true.
+
+.. option:: -use-symbol-table
+
+ Prefer function names stored in symbol table to function names
+ in debug info sections. Defaults to true.
+
+.. option:: -demangle
+
+ Print demangled function names. Defaults to true.
+
+.. option:: -inlining 
+
+ If a source code location is in an inlined function, prints all the
+ inlnied frames. Defaults to true.
+
+EXIT STATUS
+-----------
+
+:program:`llvm-symbolizer` returns 0. Other exit codes imply internal program error.
diff --git a/docs/CommandGuide/opt.rst b/docs/CommandGuide/opt.rst
index 72f19034c9ed..179c297c2209 100644
--- a/docs/CommandGuide/opt.rst
+++ b/docs/CommandGuide/opt.rst
@@ -1,183 +1,143 @@
 opt - LLVM optimizer
 ====================
 
-
 SYNOPSIS
 --------
 
-
-**opt** [*options*] [*filename*]
-
+:program:`opt` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
+The :program:`opt` command is the modular LLVM optimizer and analyzer.  It
+takes LLVM source files as input, runs the specified optimizations or analyses
+on it, and then outputs the optimized file or the analysis results.  The
+function of :program:`opt` depends on whether the :option:`-analyze` option is
+given.
 
-The **opt** command is the modular LLVM optimizer and analyzer.  It takes LLVM
-source files as input, runs the specified optimizations or analyses on it, and then
-outputs the optimized file or the analysis results.  The function of
-**opt** depends on whether the **-analyze** option is given.
-
-When **-analyze** is specified, **opt** performs various analyses of the input
-source.  It will usually print the results on standard output, but in a few
-cases, it will print output to standard error or generate a file with the
-analysis output, which is usually done when the output is meant for another
+When :option:`-analyze` is specified, :program:`opt` performs various analyses
+of the input source.  It will usually print the results on standard output, but
+in a few cases, it will print output to standard error or generate a file with
+the analysis output, which is usually done when the output is meant for another
 program.
 
-While **-analyze** is *not* given, **opt** attempts to produce an optimized
-output file.  The optimizations available via **opt** depend upon what
-libraries were linked into it as well as any additional libraries that have
-been loaded with the **-load** option.  Use the **-help** option to determine
-what optimizations you can use.
-
-If *filename* is omitted from the command line or is *-*, **opt** reads its
-input from standard input. Inputs can be in either the LLVM assembly language
-format (.ll) or the LLVM bitcode format (.bc).
+While :option:`-analyze` is *not* given, :program:`opt` attempts to produce an
+optimized output file.  The optimizations available via :program:`opt` depend
+upon what libraries were linked into it as well as any additional libraries
+that have been loaded with the :option:`-load` option.  Use the :option:`-help`
+option to determine what optimizations you can use.
 
-If an output filename is not specified with the **-o** option, **opt**
-writes its output to the standard output.
+If ``filename`` is omitted from the command line or is "``-``", :program:`opt`
+reads its input from standard input.  Inputs can be in either the LLVM assembly
+language format (``.ll``) or the LLVM bitcode format (``.bc``).
 
+If an output filename is not specified with the :option:`-o` option,
+:program:`opt` writes its output to the standard output.
 
 OPTIONS
 -------
 
+.. option:: -f
 
+ Enable binary output on terminals.  Normally, :program:`opt` will refuse to
+ write raw bitcode output if the output stream is a terminal.  With this option,
+ :program:`opt` will write raw bitcode regardless of the output device.
 
-**-f**
-
- Enable binary output on terminals.  Normally, **opt** will refuse to
- write raw bitcode output if the output stream is a terminal. With this option,
- **opt** will write raw bitcode regardless of the output device.
-
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
-
-
-**-o** *filename*
+.. option:: -o <filename>
 
  Specify the output filename.
 
-
-
-**-S**
+.. option:: -S
 
  Write output in LLVM intermediate language (instead of bitcode).
 
+.. option:: -{passname}
 
+ :program:`opt` provides the ability to run any of LLVM's optimization or
+ analysis passes in any order.  The :option:`-help` option lists all the passes
+ available.  The order in which the options occur on the command line are the
+ order in which they are executed (within pass constraints).
 
-**-{passname}**
-
- **opt** provides the ability to run any of LLVM's optimization or analysis passes
- in any order. The **-help** option lists all the passes available. The order in
- which the options occur on the command line are the order in which they are
- executed (within pass constraints).
-
-
-
-**-std-compile-opts**
+.. option:: -std-compile-opts
 
  This is short hand for a standard list of *compile time optimization* passes.
- This is typically used to optimize the output from the llvm-gcc front end. It
- might be useful for other front end compilers as well. To discover the full set
- of options available, use the following command:
-
+ This is typically used to optimize the output from the llvm-gcc front end.  It
+ might be useful for other front end compilers as well.  To discover the full
+ set of options available, use the following command:
 
  .. code-block:: sh
 
      llvm-as < /dev/null | opt -std-compile-opts -disable-output -debug-pass=Arguments
 
+.. option:: -disable-inlining
 
+ This option is only meaningful when :option:`-std-compile-opts` is given.  It
+ simply removes the inlining pass from the standard list.
 
+.. option:: -disable-opt
 
-**-disable-inlining**
-
- This option is only meaningful when **-std-compile-opts** is given. It simply
- removes the inlining pass from the standard list.
-
-
-
-**-disable-opt**
-
- This option is only meaningful when **-std-compile-opts** is given. It disables
- most, but not all, of the **-std-compile-opts**. The ones that remain are
- **-verify**, **-lower-setjmp**, and **-funcresolve**.
+ This option is only meaningful when :option:`-std-compile-opts` is given.  It
+ disables most, but not all, of the :option:`-std-compile-opts`.  The ones that
+ remain are :option:`-verify`, :option:`-lower-setjmp`, and
+ :option:`-funcresolve`.
 
-
-
-**-strip-debug**
+.. option:: -strip-debug
 
  This option causes opt to strip debug information from the module before
- applying other optimizations. It is essentially the same as **-strip** but it
- ensures that stripping of debug information is done first.
-
-
-
-**-verify-each**
-
- This option causes opt to add a verify pass after every pass otherwise specified
- on the command line (including **-verify**).  This is useful for cases where it
- is suspected that a pass is creating an invalid module but it is not clear which
- pass is doing it. The combination of **-std-compile-opts** and **-verify-each**
- can quickly track down this kind of problem.
+ applying other optimizations.  It is essentially the same as :option:`-strip`
+ but it ensures that stripping of debug information is done first.
 
+.. option:: -verify-each
 
+ This option causes opt to add a verify pass after every pass otherwise
+ specified on the command line (including :option:`-verify`).  This is useful
+ for cases where it is suspected that a pass is creating an invalid module but
+ it is not clear which pass is doing it.  The combination of
+ :option:`-std-compile-opts` and :option:`-verify-each` can quickly track down
+ this kind of problem.
 
-**-profile-info-file** *filename*
+.. option:: -profile-info-file <filename>
 
- Specify the name of the file loaded by the -profile-loader option.
+ Specify the name of the file loaded by the ``-profile-loader`` option.
 
-
-
-**-stats**
+.. option:: -stats
 
  Print statistics.
 
-
-
-**-time-passes**
+.. option:: -time-passes
 
  Record the amount of time needed for each pass and print it to standard
  error.
 
+.. option:: -debug
 
+ If this is a debug build, this option will enable debug printouts from passes
+ which use the ``DEBUG()`` macro.  See the `LLVM Programmer's Manual
+ <../ProgrammersManual.html>`_, section ``#DEBUG`` for more information.
 
-**-debug**
-
- If this is a debug build, this option will enable debug printouts
- from passes which use the *DEBUG()* macro.  See the **LLVM Programmer's
- Manual**, section *#DEBUG* for more information.
-
-
-
-**-load**\ =\ *plugin*
-
- Load the dynamic object *plugin*.  This object should register new optimization
- or analysis passes. Once loaded, the object will add new command line options to
- enable various optimizations or analyses.  To see the new complete list of
- optimizations, use the **-help** and **-load** options together. For example:
+.. option:: -load=<plugin>
 
+ Load the dynamic object ``plugin``.  This object should register new
+ optimization or analysis passes.  Once loaded, the object will add new command
+ line options to enable various optimizations or analyses.  To see the new
+ complete list of optimizations, use the :option:`-help` and :option:`-load`
+ options together.  For example:
 
  .. code-block:: sh
 
      opt -load=plugin.so -help
 
-
-
-
-**-p**
+.. option:: -p
 
  Print module after each transformation.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **opt** succeeds, it will exit with 0.  Otherwise, if an error
+If :program:`opt` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
+
diff --git a/docs/CommandGuide/tblgen.rst b/docs/CommandGuide/tblgen.rst
index 2d191676d9f0..1858ee447d07 100644
--- a/docs/CommandGuide/tblgen.rst
+++ b/docs/CommandGuide/tblgen.rst
@@ -1,186 +1,129 @@
 tblgen - Target Description To C++ Code Generator
 =================================================
 
-
 SYNOPSIS
 --------
 
-
-**tblgen** [*options*] [*filename*]
-
+:program:`tblgen` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
+:program:`tblgen` translates from target description (``.td``) files into C++
+code that can be included in the definition of an LLVM target library.  Most
+users of LLVM will not need to use this program.  It is only for assisting with
+writing an LLVM target backend.
 
-**tblgen** translates from target description (.td) files into C++ code that can
-be included in the definition of an LLVM target library. Most users of LLVM will
-not need to use this program. It is only for assisting with writing an LLVM
-target backend.
-
-The input and output of **tblgen** is beyond the scope of this short
-introduction. Please see the *CodeGeneration* page in the LLVM documentation.
-
-The *filename* argument specifies the name of a Target Description (.td) file
-to read as input.
+The input and output of :program:`tblgen` is beyond the scope of this short
+introduction.  Please see :doc:`../TableGenFundamentals`.
 
+The *filename* argument specifies the name of a Target Description (``.td``)
+file to read as input.
 
 OPTIONS
 -------
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -o filename
 
+ Specify the output file name.  If ``filename`` is ``-``, then
+ :program:`tblgen` sends its output to standard output.
 
-**-o** *filename*
-
- Specify the output file name.  If *filename* is ``-``, then **tblgen**
- sends its output to standard output.
-
-
-
-**-I** *directory*
-
- Specify where to find other target description files for inclusion. The
- *directory* value should be a full or partial path to a directory that contains
- target description files.
-
-
-
-**-asmparsernum** *N*
+.. option:: -I directory
 
- Make -gen-asm-parser emit assembly writer number *N*.
+ Specify where to find other target description files for inclusion.  The
+ ``directory`` value should be a full or partial path to a directory that
+ contains target description files.
 
+.. option:: -asmparsernum N
 
+ Make -gen-asm-parser emit assembly writer number ``N``.
 
-**-asmwriternum** *N*
+.. option:: -asmwriternum N
 
- Make -gen-asm-writer emit assembly writer number *N*.
+ Make -gen-asm-writer emit assembly writer number ``N``.
 
-
-
-**-class** *class Name*
+.. option:: -class className
 
  Print the enumeration list for this class.
 
-
-
-**-print-records**
+.. option:: -print-records
 
  Print all records to standard output (default).
 
-
-
-**-print-enums**
+.. option:: -print-enums
 
  Print enumeration values for a class
 
-
-
-**-print-sets**
+.. option:: -print-sets
 
  Print expanded sets for testing DAG exprs.
 
-
-
-**-gen-emitter**
+.. option:: -gen-emitter
 
  Generate machine code emitter.
 
-
-
-**-gen-register-info**
+.. option:: -gen-register-info
 
  Generate registers and register classes info.
 
-
-
-**-gen-instr-info**
+.. option:: -gen-instr-info
 
  Generate instruction descriptions.
 
-
-
-**-gen-asm-writer**
+.. option:: -gen-asm-writer
 
  Generate the assembly writer.
 
-
-
-**-gen-disassembler**
+.. option:: -gen-disassembler
 
  Generate disassembler.
 
-
-
-**-gen-pseudo-lowering**
+.. option:: -gen-pseudo-lowering
 
  Generate pseudo instruction lowering.
 
-
-
-**-gen-dag-isel**
+.. option:: -gen-dag-isel
 
  Generate a DAG (Directed Acycle Graph) instruction selector.
 
-
-
-**-gen-asm-matcher**
+.. option:: -gen-asm-matcher
 
  Generate assembly instruction matcher.
 
-
-
-**-gen-dfa-packetizer**
+.. option:: -gen-dfa-packetizer
 
  Generate DFA Packetizer for VLIW targets.
 
-
-
-**-gen-fast-isel**
+.. option:: -gen-fast-isel
 
  Generate a "fast" instruction selector.
 
-
-
-**-gen-subtarget**
+.. option:: -gen-subtarget
 
  Generate subtarget enumerations.
 
-
-
-**-gen-intrinsic**
+.. option:: -gen-intrinsic
 
  Generate intrinsic information.
 
-
-
-**-gen-tgt-intrinsic**
+.. option:: -gen-tgt-intrinsic
 
  Generate target intrinsic information.
 
-
-
-**-gen-enhanced-disassembly-info**
+.. option:: -gen-enhanced-disassembly-info
 
  Generate enhanced disassembly info.
 
-
-
-**-version**
+.. option:: -version
 
  Show the version number of this program.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **tblgen** succeeds, it will exit with 0.  Otherwise, if an error
+If :program:`tblgen` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
diff --git a/docs/CommandLine.rst b/docs/CommandLine.rst
index 302f5a4cf591..073958b16bad 100644
--- a/docs/CommandLine.rst
+++ b/docs/CommandLine.rst
@@ -1,5 +1,3 @@
-.. _commandline:
-
 ==============================
 CommandLine 2.0 Library Manual
 ==============================
@@ -68,9 +66,7 @@ CommandLine library to have the following features:
 
 This document will hopefully let you jump in and start using CommandLine in your
 utility quickly and painlessly.  Additionally it should be a simple reference
-manual to figure out how stuff works.  If it is failing in some area (or you
-want an extension to the library), nag the author, `Chris
-Lattner <mailto:sabre@nondot.org>`_.
+manual to figure out how stuff works.
 
 Quick Start Guide
 =================
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index e41f5f9eecea..681777c12d0b 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -1,5 +1,3 @@
-.. _compiler_writer_info:
-
 ========================================================
 Architecture & Platform Information for Compiler Writers
 ========================================================
@@ -12,8 +10,6 @@ Architecture & Platform Information for Compiler Writers
   This document is a work-in-progress.  Additions and clarifications are
   welcome.
 
-  Compiled by `Misha Brukman <http://misha.brukman.net>`_.
-
 Hardware
 ========
 
@@ -24,6 +20,11 @@ ARM
 
 * `ABI <http://www.arm.com/products/DevTools/ABI.html>`_
 
+AArch64
+-------
+
+* `ARMv8 Instruction Set Overview <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.genc010197a/index.html>`_
+
 Itanium (ia64)
 --------------
 
@@ -40,19 +41,15 @@ PowerPC
 IBM - Official manuals and docs
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* `PowerPC Architecture Book <http://www-106.ibm.com/developerworks/eserver/articles/archguide.html>`_
+* `Power Instruction Set Architecture, Versions 2.03 through 2.06 (authentication required, free sign-up) <https://www.power.org/technology-introduction/standards-specifications>`_
 
-  * Book I: `PowerPC User Instruction Set Architecture <http://www-106.ibm.com/developerworks/eserver/pdfs/archpub1.pdf>`_
+* `PowerPC Compiler Writer's Guide <http://www.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF7785256996007558C6>`_
 
-  * Book II: `PowerPC Virtual Environment Architecture <http://www-106.ibm.com/developerworks/eserver/pdfs/archpub2.pdf>`_
+* `Intro to PowerPC Architecture <http://www.ibm.com/developerworks/linux/library/l-powarch/>`_
 
-  * Book III: `PowerPC Operating Environment Architecture <http://www-106.ibm.com/developerworks/eserver/pdfs/archpub3.pdf>`_
+* `PowerPC Processor Manuals (embedded) <http://www.ibm.com/chips/techlib/techlib.nsf/products/PowerPC>`_
 
-* `PowerPC Compiler Writer's Guide <http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF7785256996007558C6>`_
-
-* `PowerPC Processor Manuals <http://www-3.ibm.com/chips/techlib/techlib.nsf/products/PowerPC>`_
-
-* `Intro to PowerPC Architecture <http://www-106.ibm.com/developerworks/linux/library/l-powarch/>`_
+* `Various IBM specifications and white papers <https://www.power.org/documentation/?document_company=105&document_category=all&publish_year=all&grid_order=DESC&grid_sort=title>`_
 
 * `IBM AIX/5L for POWER Assembly Reference <http://publibn.boulder.ibm.com/doc_link/en_US/a_doc_lib/aixassem/alangref/alangreftfrm.htm>`_
 
@@ -81,13 +78,13 @@ AMD - Official manuals and docs
 Intel - Official manuals and docs
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* `IA-32 manuals <http://developer.intel.com/design/pentium4/manuals/index_new.htm>`_
+* `Intel 64 and IA-32 manuals <http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html>`_
 * `Intel Itanium documentation <http://www.intel.com/design/itanium/documentation.htm?iid=ipp_srvr_proc_itanium2+techdocs>`_
 
 Other x86-specific information
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* `Calling conventions for different C++ compilers and operating systems  <http://www.agner.org/assem/calling_conventions.pdf>`_
+* `Calling conventions for different C++ compilers and operating systems  <http://www.agner.org/optimize/calling_conventions.pdf>`_
 
 Other relevant lists
 --------------------
@@ -101,6 +98,8 @@ Linux
 -----
 
 * `PowerPC 64-bit ELF ABI Supplement <http://www.linuxbase.org/spec/ELF/ppc64/>`_
+* `Procedure Call Standard for the AArch64 Architecture <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf>`_
+* `ELF for the ARM 64-bit Architecture (AArch64) <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf>`_
 
 OS X
 ----
@@ -108,6 +107,12 @@ OS X
 * `Mach-O Runtime Architecture <http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html>`_
 * `Notes on Mach-O ABI <http://www.unsanity.org/archives/000044.php>`_
 
+NVPTX
+=====
+
+* `CUDA Documentation <http://docs.nvidia.com/cuda/index.html>`_ includes the PTX
+  ISA and Driver API documentation
+
 Miscellaneous Resources
 =======================
 
diff --git a/docs/DebuggingJITedCode.rst b/docs/DebuggingJITedCode.rst
index eeb2f7787dae..d6101d510034 100644
--- a/docs/DebuggingJITedCode.rst
+++ b/docs/DebuggingJITedCode.rst
@@ -1,11 +1,7 @@
-.. _debugging-jited-code:
-
 ==============================
 Debugging JIT-ed Code With GDB
 ==============================
 
-.. sectionauthor:: Reid Kleckner and Eli Bendersky
-
 Background
 ==========
 
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index e35e72955640..43bdc8598531 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -1,5 +1,3 @@
-.. _developer_policy:
-
 =====================
 LLVM Developer Policy
 =====================
@@ -26,8 +24,8 @@ This policy is also designed to accomplish the following objectives:
 
 #. Keep the top of Subversion trees as stable as possible.
 
-#. Establish awareness of the project's `copyright, license, and patent
-   policies`_ with contributors to the project.
+#. Establish awareness of the project's :ref:`copyright, license, and patent
+   policies <copyright-license-patents>` with contributors to the project.
 
 This policy is aimed at frequent contributors to LLVM. People interested in
 contributing one-off patches can do so in an informal way by sending them to the
@@ -180,8 +178,8 @@ Developers are required to create test cases for any bugs fixed and any new
 features added.  Some tips for getting your testcase approved:
 
 * All feature and regression test cases are added to the ``llvm/test``
-  directory. The appropriate sub-directory should be selected (see the `Testing
-  Guide <TestingGuide.html>`_ for details).
+  directory. The appropriate sub-directory should be selected (see the
+  :doc:`Testing Guide <TestingGuide>` for details).
 
 * Test cases should be written in `LLVM assembly language <LangRef.html>`_
   unless the feature or regression being tested requires another language
@@ -401,7 +399,7 @@ Hacker!" in the commit message.
 
 Overall, please do not add contributor names to the source code.
 
-.. _copyright, license, and patent policies:
+.. _copyright-license-patents:
 
 Copyright, License, and Patents
 ===============================
diff --git a/docs/Dummy.html b/docs/Dummy.html
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/docs/Dummy.html
diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst
index 190f18261da0..0a86607556ab 100644
--- a/docs/ExceptionHandling.rst
+++ b/docs/ExceptionHandling.rst
@@ -1,5 +1,3 @@
-.. _exception_handling:
-
 ==========================
 Exception Handling in LLVM
 ==========================
@@ -34,13 +32,13 @@ execution of an application.
 
 A more complete description of the Itanium ABI exception handling runtime
 support of can be found at `Itanium C++ ABI: Exception Handling
-<http://www.codesourcery.com/cxx-abi/abi-eh.html>`_. A description of the
+<http://mentorembedded.github.com/cxx-abi/abi-eh.html>`_. A description of the
 exception frame format can be found at `Exception Frames
-<http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html>`_,
+<http://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html>`_,
 with details of the DWARF 4 specification at `DWARF 4 Standard
 <http://dwarfstd.org/Dwarf4Std.php>`_.  A description for the C++ exception
 table formats can be found at `Exception Handling Tables
-<http://www.codesourcery.com/cxx-abi/exceptions.pdf>`_.
+<http://mentorembedded.github.com/cxx-abi/exceptions.pdf>`_.
 
 Setjmp/Longjmp Exception Handling
 ---------------------------------
@@ -151,10 +149,10 @@ type info index are passed in as arguments. The landing pad saves the exception
 structure reference and then proceeds to select the catch block that corresponds
 to the type info of the exception object.
 
-The LLVM `landingpad instruction <LangRef.html#i_landingpad>`_ is used to convey
-information about the landing pad to the back end. For C++, the ``landingpad``
-instruction returns a pointer and integer pair corresponding to the pointer to
-the *exception structure* and the *selector value* respectively.
+The LLVM :ref:`i_landingpad` is used to convey information about the landing
+pad to the back end. For C++, the ``landingpad`` instruction returns a pointer
+and integer pair corresponding to the pointer to the *exception structure* and
+the *selector value* respectively.
 
 The ``landingpad`` instruction takes a reference to the personality function to
 be used for this ``try``/``catch`` sequence. The remainder of the instruction is
@@ -203,10 +201,9 @@ A cleanup is extra code which needs to be run as part of unwinding a scope.  C++
 destructors are a typical example, but other languages and language extensions
 provide a variety of different kinds of cleanups. In general, a landing pad may
 need to run arbitrary amounts of cleanup code before actually entering a catch
-block. To indicate the presence of cleanups, a `landingpad
-instruction <LangRef.html#i_landingpad>`_ should have a *cleanup*
-clause. Otherwise, the unwinder will not stop at the landing pad if there are no
-catches or filters that require it to.
+block. To indicate the presence of cleanups, a :ref:`i_landingpad` should have
+a *cleanup* clause.  Otherwise, the unwinder will not stop at the landing pad if
+there are no catches or filters that require it to.
 
 .. note::
 
@@ -226,9 +223,9 @@ Throw Filters
 
 C++ allows the specification of which exception types may be thrown from a
 function. To represent this, a top level landing pad may exist to filter out
-invalid types. To express this in LLVM code the `landingpad
-instruction <LangRef.html#i_landingpad>`_ will have a filter clause. The clause
-consists of an array of type infos.  ``landingpad`` will return a negative value
+invalid types. To express this in LLVM code the :ref:`i_landingpad` will have a
+filter clause. The clause consists of an array of type infos.
+``landingpad`` will return a negative value
 if the exception does not match any of the type infos. If no match is found then
 a call to ``__cxa_call_unexpected`` should be made, otherwise
 ``_Unwind_Resume``.  Each of these functions requires a reference to the
@@ -269,8 +266,8 @@ handling information at various points in generated code.
 
 .. _llvm.eh.typeid.for:
 
-llvm.eh.typeid.for
-------------------
+``llvm.eh.typeid.for``
+----------------------
 
 .. code-block:: llvm
 
@@ -283,8 +280,8 @@ function.  This value can be used to compare against the result of
 
 .. _llvm.eh.sjlj.setjmp:
 
-llvm.eh.sjlj.setjmp
--------------------
+``llvm.eh.sjlj.setjmp``
+-----------------------
 
 .. code-block:: llvm
 
@@ -305,8 +302,8 @@ available for use in a target-specific manner.
 
 .. _llvm.eh.sjlj.longjmp:
 
-llvm.eh.sjlj.longjmp
---------------------
+``llvm.eh.sjlj.longjmp``
+------------------------
 
 .. code-block:: llvm
 
@@ -318,8 +315,8 @@ a buffer populated by `llvm.eh.sjlj.setjmp`_. The frame pointer and stack
 pointer are restored from the buffer, then control is transferred to the
 destination address.
 
-llvm.eh.sjlj.lsda
------------------
+``llvm.eh.sjlj.lsda``
+---------------------
 
 .. code-block:: llvm
 
@@ -330,8 +327,8 @@ the address of the Language Specific Data Area (LSDA) for the current
 function. The SJLJ front-end code stores this address in the exception handling
 function context for use by the runtime.
 
-llvm.eh.sjlj.callsite
----------------------
+``llvm.eh.sjlj.callsite``
+-------------------------
 
 .. code-block:: llvm
 
diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst
index 6df08eee985a..3d8e9ee79a46 100644
--- a/docs/ExtendingLLVM.rst
+++ b/docs/ExtendingLLVM.rst
@@ -1,5 +1,3 @@
-.. _extending_llvm:
-
 ============================================================
 Extending LLVM: Adding instructions, intrinsics, types, etc.
 ============================================================
diff --git a/docs/FAQ.rst b/docs/FAQ.rst
index b0e3ca045693..e4ab2c18f748 100644
--- a/docs/FAQ.rst
+++ b/docs/FAQ.rst
@@ -1,5 +1,3 @@
-.. _faq:
-
 ================================
 Frequently Asked Questions (FAQ)
 ================================
@@ -53,6 +51,29 @@ Some porting problems may exist in the following areas:
   like the Bourne Shell and sed.  Porting to systems without these tools
   (MacOS 9, Plan 9) will require more effort.
 
+What API do I use to store a value to one of the virtual registers in LLVM IR's SSA representation?
+---------------------------------------------------------------------------------------------------
+
+In short: you can't. It's actually kind of a silly question once you grok
+what's going on. Basically, in code like:
+
+.. code-block:: llvm
+
+    %result = add i32 %foo, %bar
+
+, ``%result`` is just a name given to the ``Value`` of the ``add``
+instruction. In other words, ``%result`` *is* the add instruction. The
+"assignment" doesn't explicitly "store" anything to any "virtual register";
+the "``=``" is more like the mathematical sense of equality.
+
+Longer explanation: In order to generate a textual representation of the
+IR, some kind of name has to be given to each instruction so that other
+instructions can textually reference it. However, the isomorphic in-memory
+representation that you manipulate from C++ has no such restriction since
+instructions can simply keep pointers to any other ``Value``'s that they
+reference. In fact, the names of dummy numbered temporaries like ``%1`` are
+not explicitly represented in the in-memory representation at all (see
+``Value::getName()``).
 
 Build Problems
 ==============
@@ -79,7 +100,7 @@ grabbing the wrong linker/assembler/etc, there are two ways to fix it:
 #. Run ``configure`` with an alternative ``PATH`` that is correct. In a
    Bourne compatible shell, the syntax would be:
 
-.. code-block:: bash
+.. code-block:: console
 
    % PATH=[the path without the bad program] ./configure ...
 
@@ -106,7 +127,7 @@ I've modified a Makefile in my source tree, but my build tree keeps using the ol
 If the Makefile already exists in your object tree, you can just run the
 following command in the top level directory of your object tree:
 
-.. code-block:: bash
+.. code-block:: console
 
    % ./config.status <relative path to Makefile>;
 
@@ -133,13 +154,13 @@ This is most likely occurring because you built a profile or release
 
 For example, if you built LLVM with the command:
 
-.. code-block:: bash
+.. code-block:: console
 
    % gmake ENABLE_PROFILING=1
 
 ...then you must run the tests with the following commands:
 
-.. code-block:: bash
+.. code-block:: console
 
    % cd llvm/test
    % gmake ENABLE_PROFILING=1
@@ -175,17 +196,17 @@ After Subversion update, rebuilding gives the error "No rule to make target".
 -----------------------------------------------------------------------------
 If the error is of the form:
 
-.. code-block:: bash
+.. code-block:: console
 
    gmake[2]: *** No rule to make target `/path/to/somefile',
-   needed by `/path/to/another/file.d'.
+                 needed by `/path/to/another/file.d'.
    Stop.
 
 This may occur anytime files are moved within the Subversion repository or
 removed entirely.  In this case, the best solution is to erase all ``.d``
 files, which list dependencies for source files, and rebuild:
 
-.. code-block:: bash
+.. code-block:: console
 
    % cd $LLVM_OBJ_DIR
    % rm -f `find . -name \*\.d`
diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html
deleted file mode 100644
index 37800c8080d4..000000000000
--- a/docs/GCCFEBuildInstrs.html
+++ /dev/null
@@ -1,279 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css" media="screen">
-  <title>Building the LLVM GCC Front-End</title>
-</head>
-<body>
-
-<h1>
-  Building the LLVM GCC Front-End
-</h1>
-
-<ol>
-  <li><a href="#instructions">Building llvm-gcc from Source</a></li>
-  <li><a href="#ada">Building the Ada front-end</a></li>
-  <li><a href="#fortran">Building the Fortran front-end</a></li>
-  <li><a href="#license">License Information</a></li>
-</ol>
-
-<div class="doc_author">    
-  <p>Written by the LLVM Team</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="instructions">Building llvm-gcc from Source</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section describes how to acquire and build llvm-gcc 4.2, which is based
-on the GCC 4.2.1 front-end.  Supported languages are Ada, C, C++, Fortran,
-Objective-C and Objective-C++.  Note that the instructions for building these
-front-ends are completely different (and much easier!) than those for building
-llvm-gcc3 in the past.</p>
-
-<ol>
-  <li><p>Retrieve the appropriate llvm-gcc-4.2-<i>version</i>.source.tar.gz
-         archive from the <a href="http://llvm.org/releases/">LLVM web
-         site</a>.</p>
-
-      <p>It is also possible to download the sources of the llvm-gcc front end
-         from a read-only mirror using subversion.  To check out the 4.2 code
-         for first time use:</p>
-
-<div class="doc_code">
-<pre>
-svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk <i>dst-directory</i>
-</pre>
-</div>
-
-      <p>After that, the code can be be updated in the destination directory
-         using:</p>
-
-<div class="doc_code">
-<pre>svn update</pre>
-</div>
-
-      <p>The mirror is brought up to date every evening.</p></li>
-
-  <li>Follow the directions in the top-level <tt>README.LLVM</tt> file for
-      up-to-date instructions on how to build llvm-gcc.  See below for building
-      with support for Ada or Fortran.
-</ol>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ada">Building the Ada front-end</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>Building with support for Ada amounts to following the directions in the
-top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
-<tt>EXTRALANGS=,ada</tt></p>
-
-<p>There are some complications however:</p>
-
-<ol>
-  <li><p>The only platform for which the Ada front-end is known to build is
-      32 bit intel x86 running linux.  It is unlikely to build for other
-      systems without some work.</p></li>
-  <li><p>The build requires having a compiler that supports Ada, C and C++.
-      The Ada front-end is written in Ada so an Ada compiler is needed to
-      build it.  Compilers known to work with the
-      <a href="http://llvm.org/releases/download.html">LLVM 2.7 release</a>
-      are <a href="http://gcc.gnu.org/releases.html">gcc-4.2</a> and the
-      2005, 2006 and 2007 versions of the
-      <a href="http://libre.adacore.com/">GNAT GPL Edition</a>.
-      <b>GNAT GPL 2008, gcc-4.3 and later will not work</b>.
-      The LLVM parts of llvm-gcc are written in C++ so a C++ compiler is
-      needed to build them.  The rest of gcc is written in C.
-      Some linux distributions provide a version of gcc that supports all
-      three languages (the Ada part often comes as an add-on package to
-      the rest of gcc).  Otherwise it is possible to combine two versions
-      of gcc, one that supports Ada and C (such as the
-      <a href="http://libre.adacore.com/">2007 GNAT GPL Edition</a>)
-      and another which supports C++, see below.</p></li>
-  <li><p>Because the Ada front-end is experimental, it is wise to build the
-      compiler with checking enabled.  This causes it to run much slower, but
-      helps catch mistakes in the compiler (please report any problems using
-      <a href="http://llvm.org/bugs/">LLVM bugzilla</a>).</p></li>
-  <li><p>The Ada front-end <a href="http://llvm.org/PR2007">fails to
-      bootstrap</a>, due to lack of LLVM support for
-      <tt>setjmp</tt>/<tt>longjmp</tt> style exception handling (used
-      internally by the compiler), so you must specify
-      <tt>--disable-bootstrap</tt>.</p></li>
-</ol>
-
-<p>Supposing appropriate compilers are available, llvm-gcc with Ada support can
-   be built on an x86-32 linux box using the following recipe:</p>
-
-<ol>
-  <li><p>Download the <a href="http://llvm.org/releases/download.html">LLVM source</a>
-      and unpack it:</p>
-
-<pre class="doc_code">
-wget http://llvm.org/releases/2.7/llvm-2.7.tgz
-tar xzf llvm-2.7.tgz
-mv llvm-2.7 llvm
-</pre>
-
-      <p>or <a href="GettingStarted.html#checkout">check out the
-      latest version from subversion</a>:</p>
-
-<pre class="doc_code">svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</pre>
-
-      </li>
-
-  <li><p>Download the
-      <a href="http://llvm.org/releases/download.html">llvm-gcc-4.2 source</a>
-      and unpack it:</p>
-
-<pre class="doc_code">
-wget http://llvm.org/releases/2.7/llvm-gcc-4.2-2.7.source.tgz
-tar xzf llvm-gcc-4.2-2.7.source.tgz
-mv llvm-gcc-4.2-2.7.source llvm-gcc-4.2
-</pre>
-
-      <p>or <a href="GettingStarted.html#checkout">check out the
-      latest version from subversion</a>:</p>
-
-<pre class="doc_code">
-svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk llvm-gcc-4.2
-</pre>
-      </li>
-
-  <li><p>Make a build directory <tt>llvm-objects</tt> for llvm and make it the
-      current directory:</p>
-
-<pre class="doc_code">
-mkdir llvm-objects
-cd llvm-objects
-</pre>
-      </li>
-
-  <li><p>Configure LLVM (here it is configured to install into <tt>/usr/local</tt>):</p>
-
-<pre class="doc_code">
-../llvm/configure --prefix=<b>/usr/local</b> --enable-optimized --enable-assertions
-</pre>
-
-      <p>If you have a multi-compiler setup and the C++ compiler is not the
-      default, then you can configure like this:</p>
-
-<pre class="doc_code">
-CXX=<b>PATH_TO_C++_COMPILER</b> ../llvm/configure --prefix=<b>/usr/local</b> --enable-optimized --enable-assertions
-</pre>
-
-      <p>To compile without checking (not recommended), replace
-      <tt>--enable-assertions</tt> with <tt>--disable-assertions</tt>.</p>
-
-      </li>
-
-  <li><p>Build LLVM:</p>
-
-<pre class="doc_code">
-make
-</pre>
-      </li>
-
-  <li><p>Install LLVM (optional):</p>
-
-<pre class="doc_code">
-make install
-</pre>
-      </li>
-
-  <li><p>Make a build directory <tt>llvm-gcc-4.2-objects</tt> for llvm-gcc and make it the
-      current directory:</p>
-
-<pre class="doc_code">
-cd ..
-mkdir llvm-gcc-4.2-objects
-cd llvm-gcc-4.2-objects
-</pre>
-      </li>
-
-  <li><p>Configure llvm-gcc (here it is configured to install into <tt>/usr/local</tt>).
-      The <tt>--enable-checking</tt> flag turns on sanity checks inside the compiler.
-      To turn off these checks (not recommended), replace <tt>--enable-checking</tt>
-      with <tt>--disable-checking</tt>.
-      Additional languages can be appended to the <tt>--enable-languages</tt> switch,
-      for example <tt>--enable-languages=ada,c,c++</tt>.</p>
-
-<pre class="doc_code">
-../llvm-gcc-4.2/configure --prefix=<b>/usr/local</b> --enable-languages=ada,c \
-                          --enable-checking --enable-llvm=$PWD/../llvm-objects \
-			  --disable-bootstrap --disable-multilib
-</pre>
-
-      <p>If you have a multi-compiler setup, then you can configure like this:</p>
-
-<pre class="doc_code">
-export CC=<b>PATH_TO_C_AND_ADA_COMPILER</b>
-export CXX=<b>PATH_TO_C++_COMPILER</b>
-../llvm-gcc-4.2/configure --prefix=<b>/usr/local</b> --enable-languages=ada,c \
-                          --enable-checking --enable-llvm=$PWD/../llvm-objects \
-			  --disable-bootstrap --disable-multilib
-</pre>
-      </li>
-
-  <li><p>Build and install the compiler:</p>
-
-<pre class="doc_code">
-make
-make install
-</pre>
-      </li>
-</ol>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="fortran">Building the Fortran front-end</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>To build with support for Fortran, follow the directions in the top-level
-<tt>README.LLVM</tt> file, adding ",fortran" to EXTRALANGS, for example:</p>
-
-<pre class="doc_code">
-EXTRALANGS=,fortran
-</pre>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="license">License Information</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>
-The LLVM GCC frontend is licensed to you under the GNU General Public License
-and the GNU Lesser General Public License.  Please see the files COPYING and
-COPYING.LIB for more details.
-</p>
-
-<p>
-More information is <a href="FAQ.html#license">available in the FAQ</a>.
-</p>
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-04-19 22:20:34 +0200 (Thu, 19 Apr 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
deleted file mode 100644
index e12485167a72..000000000000
--- a/docs/GarbageCollection.html
+++ /dev/null
@@ -1,1389 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" Content="text/html; charset=UTF-8" >
-  <title>Accurate Garbage Collection with LLVM</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-  <style type="text/css">
-    .rowhead { text-align: left; background: inherit; }
-    .indent { padding-left: 1em; }
-    .optl { color: #BFBFBF; }
-  </style>
-</head>
-<body>
-
-<h1>
-  Accurate Garbage Collection with LLVM
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a>
-    <ul>
-    <li><a href="#feature">Goals and non-goals</a></li>
-    </ul>
-  </li>
-
-  <li><a href="#quickstart">Getting started</a>
-    <ul>
-    <li><a href="#quickstart-compiler">In your compiler</a></li>
-    <li><a href="#quickstart-runtime">In your runtime library</a></li>
-    <li><a href="#shadow-stack">About the shadow stack</a></li>
-    </ul>
-  </li>
-
-  <li><a href="#core">Core support</a>
-    <ul>
-    <li><a href="#gcattr">Specifying GC code generation:
-      <tt>gc "..."</tt></a></li>
-    <li><a href="#gcroot">Identifying GC roots on the stack:
-      <tt>llvm.gcroot</tt></a></li>
-    <li><a href="#barriers">Reading and writing references in the heap</a>
-      <ul>
-      <li><a href="#gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a></li>
-      <li><a href="#gcread">Read barrier: <tt>llvm.gcread</tt></a></li>
-      </ul>
-    </li>
-    </ul>
-  </li>
-  
-  <li><a href="#plugin">Compiler plugin interface</a>
-    <ul>
-    <li><a href="#collector-algos">Overview of available features</a></li>
-    <li><a href="#stack-map">Computing stack maps</a></li>
-    <li><a href="#init-roots">Initializing roots to null:
-      <tt>InitRoots</tt></a></li>
-    <li><a href="#custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
-      <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a></li>
-    <li><a href="#safe-points">Generating safe points:
-      <tt>NeededSafePoints</tt></a></li>
-    <li><a href="#assembly">Emitting assembly code:
-      <tt>GCMetadataPrinter</tt></a></li>
-    </ul>
-  </li>
-
-  <li><a href="#runtime-impl">Implementing a collector runtime</a>
-    <ul>
-      <li><a href="#gcdescriptors">Tracing GC pointers from heap
-      objects</a></li>
-    </ul>
-  </li>
-  
-  <li><a href="#references">References</a></li>
-  
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
-     Gordon Henriksen</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Garbage collection is a widely used technique that frees the programmer from
-having to know the lifetimes of heap objects, making software easier to produce
-and maintain. Many programming languages rely on garbage collection for
-automatic memory management. There are two primary forms of garbage collection:
-conservative and accurate.</p>
-
-<p>Conservative garbage collection often does not require any special support
-from either the language or the compiler: it can handle non-type-safe
-programming languages (such as C/C++) and does not require any special
-information from the compiler. The
-<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/">Boehm collector</a> is
-an example of a state-of-the-art conservative collector.</p>
-
-<p>Accurate garbage collection requires the ability to identify all pointers in
-the program at run-time (which requires that the source-language be type-safe in
-most cases). Identifying pointers at run-time requires compiler support to
-locate all places that hold live pointer variables at run-time, including the
-<a href="#gcroot">processor stack and registers</a>.</p>
-
-<p>Conservative garbage collection is attractive because it does not require any
-special compiler support, but it does have problems. In particular, because the
-conservative garbage collector cannot <i>know</i> that a particular word in the
-machine is a pointer, it cannot move live objects in the heap (preventing the
-use of compacting and generational GC algorithms) and it can occasionally suffer
-from memory leaks due to integer values that happen to point to objects in the
-program. In addition, some aggressive compiler transformations can break
-conservative garbage collectors (though these seem rare in practice).</p>
-
-<p>Accurate garbage collectors do not suffer from any of these problems, but
-they can suffer from degraded scalar optimization of the program. In particular,
-because the runtime must be able to identify and update all pointers active in
-the program, some optimizations are less effective. In practice, however, the
-locality and performance benefits of using aggressive garbage collection
-techniques dominates any low-level losses.</p>
-
-<p>This document describes the mechanisms and interfaces provided by LLVM to
-support accurate garbage collection.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="feature">Goals and non-goals</a>
-</h3>
-
-<div>
-
-<p>LLVM's intermediate representation provides <a href="#intrinsics">garbage
-collection intrinsics</a> that offer support for a broad class of
-collector models. For instance, the intrinsics permit:</p>
-
-<ul>
-  <li>semi-space collectors</li>
-  <li>mark-sweep collectors</li>
-  <li>generational collectors</li>
-  <li>reference counting</li>
-  <li>incremental collectors</li>
-  <li>concurrent collectors</li>
-  <li>cooperative collectors</li>
-</ul>
-
-<p>We hope that the primitive support built into the LLVM IR is sufficient to
-support a broad class of garbage collected languages including Scheme, ML, Java,
-C#, Perl, Python, Lua, Ruby, other scripting languages, and more.</p>
-
-<p>However, LLVM does not itself provide a garbage collector&mdash;this should
-be part of your language's runtime library. LLVM provides a framework for
-compile time <a href="#plugin">code generation plugins</a>. The role of these
-plugins is to generate code and data structures which conforms to the <em>binary
-interface</em> specified by the <em>runtime library</em>. This is similar to the
-relationship between LLVM and DWARF debugging info, for example. The
-difference primarily lies in the lack of an established standard in the domain
-of garbage collection&mdash;thus the plugins.</p>
-
-<p>The aspects of the binary interface with which LLVM's GC support is
-concerned are:</p>
-
-<ul>
-  <li>Creation of GC-safe points within code where collection is allowed to
-      execute safely.</li>
-  <li>Computation of the stack map. For each safe point in the code, object
-      references within the stack frame must be identified so that the
-      collector may traverse and perhaps update them.</li>
-  <li>Write barriers when storing object references to the heap. These are
-      commonly used to optimize incremental scans in generational
-      collectors.</li>
-  <li>Emission of read barriers when loading object references. These are
-      useful for interoperating with concurrent collectors.</li>
-</ul>
-
-<p>There are additional areas that LLVM does not directly address:</p>
-
-<ul>
-  <li>Registration of global roots with the runtime.</li>
-  <li>Registration of stack map entries with the runtime.</li>
-  <li>The functions used by the program to allocate memory, trigger a
-      collection, etc.</li>
-  <li>Computation or compilation of type maps, or registration of them with
-      the runtime. These are used to crawl the heap for object
-      references.</li>
-</ul>
-
-<p>In general, LLVM's support for GC does not include features which can be
-adequately addressed with other features of the IR and does not specify a
-particular binary interface. On the plus side, this means that you should be
-able to integrate LLVM with an existing runtime. On the other hand, it leaves
-a lot of work for the developer of a novel language. However, it's easy to get
-started quickly and scale up to a more sophisticated implementation as your
-compiler matures.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="quickstart">Getting started</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Using a GC with LLVM implies many things, for example:</p>
-
-<ul>
-  <li>Write a runtime library or find an existing one which implements a GC
-      heap.<ol>
-    <li>Implement a memory allocator.</li>
-    <li>Design a binary interface for the stack map, used to identify
-        references within a stack frame on the machine stack.*</li>
-    <li>Implement a stack crawler to discover functions on the call stack.*</li>
-    <li>Implement a registry for global roots.</li>
-    <li>Design a binary interface for type maps, used to identify references
-        within heap objects.</li>
-    <li>Implement a collection routine bringing together all of the above.</li>
-  </ol></li>
-  <li>Emit compatible code from your compiler.<ul>
-    <li>Initialization in the main function.</li>
-    <li>Use the <tt>gc "..."</tt> attribute to enable GC code generation
-        (or <tt>F.setGC("...")</tt>).</li>
-    <li>Use <tt>@llvm.gcroot</tt> to mark stack roots.</li>
-    <li>Use <tt>@llvm.gcread</tt> and/or <tt>@llvm.gcwrite</tt> to
-        manipulate GC references, if necessary.</li>
-    <li>Allocate memory using the GC allocation routine provided by the
-        runtime library.</li>
-    <li>Generate type maps according to your runtime's binary interface.</li>
-  </ul></li>
-  <li>Write a compiler plugin to interface LLVM with the runtime library.*<ul>
-    <li>Lower <tt>@llvm.gcread</tt> and <tt>@llvm.gcwrite</tt> to appropriate
-        code sequences.*</li>
-    <li>Compile LLVM's stack map to the binary form expected by the
-        runtime.</li>
-  </ul></li>
-  <li>Load the plugin into the compiler. Use <tt>llc -load</tt> or link the
-      plugin statically with your language's compiler.*</li>
-  <li>Link program executables with the runtime.</li>
-</ul>
-
-<p>To help with several of these tasks (those indicated with a *), LLVM
-includes a highly portable, built-in ShadowStack code generator. It is compiled
-into <tt>llc</tt> and works even with the interpreter and C backends.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="quickstart-compiler">In your compiler</a>
-</h3>
-
-<div>
-
-<p>To turn the shadow stack on for your functions, first call:</p>
-
-<div class="doc_code"><pre
->F.setGC("shadow-stack");</pre></div>
-
-<p>for each function your compiler emits. Since the shadow stack is built into
-LLVM, you do not need to load a plugin.</p>
-
-<p>Your compiler must also use <tt>@llvm.gcroot</tt> as documented.
-Don't forget to create a root for each intermediate value that is generated
-when evaluating an expression. In <tt>h(f(), g())</tt>, the result of
-<tt>f()</tt> could easily be collected if evaluating <tt>g()</tt> triggers a
-collection.</p>
-
-<p>There's no need to use <tt>@llvm.gcread</tt> and <tt>@llvm.gcwrite</tt> over
-plain <tt>load</tt> and <tt>store</tt> for now. You will need them when
-switching to a more advanced GC.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="quickstart-runtime">In your runtime</a>
-</h3>
-
-<div>
-
-<p>The shadow stack doesn't imply a memory allocation algorithm. A semispace
-collector or building atop <tt>malloc</tt> are great places to start, and can
-be implemented with very little code.</p>
-
-<p>When it comes time to collect, however, your runtime needs to traverse the
-stack roots, and for this it needs to integrate with the shadow stack. Luckily,
-doing so is very simple. (This code is heavily commented to help you
-understand the data structure, but there are only 20 lines of meaningful
-code.)</p>
-
-<pre class="doc_code">
-/// @brief The map for a single function's stack frame. One of these is
-///        compiled as constant data into the executable for each function.
-/// 
-/// Storage of metadata values is elided if the %metadata parameter to
-/// @llvm.gcroot is null.
-struct FrameMap {
-  int32_t NumRoots;    //&lt; Number of roots in stack frame.
-  int32_t NumMeta;     //&lt; Number of metadata entries. May be &lt; NumRoots.
-  const void *Meta[0]; //&lt; Metadata for each root.
-};
-
-/// @brief A link in the dynamic shadow stack. One of these is embedded in the
-///        stack frame of each function on the call stack.
-struct StackEntry {
-  StackEntry *Next;    //&lt; Link to next stack entry (the caller's).
-  const FrameMap *Map; //&lt; Pointer to constant FrameMap.
-  void *Roots[0];      //&lt; Stack roots (in-place array).
-};
-
-/// @brief The head of the singly-linked list of StackEntries. Functions push
-///        and pop onto this in their prologue and epilogue.
-/// 
-/// Since there is only a global list, this technique is not threadsafe.
-StackEntry *llvm_gc_root_chain;
-
-/// @brief Calls Visitor(root, meta) for each GC root on the stack.
-///        root and meta are exactly the values passed to
-///        <tt>@llvm.gcroot</tt>.
-/// 
-/// Visitor could be a function to recursively mark live objects. Or it
-/// might copy them to another heap or generation.
-/// 
-/// @param Visitor A function to invoke for every GC root on the stack.
-void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
-  for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
-    unsigned i = 0;
-    
-    // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
-    for (unsigned e = R->Map->NumMeta; i != e; ++i)
-      Visitor(&amp;R->Roots[i], R->Map->Meta[i]);
-    
-    // For roots [NumMeta, NumRoots), the metadata pointer is null.
-    for (unsigned e = R->Map->NumRoots; i != e; ++i)
-      Visitor(&amp;R->Roots[i], NULL);
-  }
-}</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="shadow-stack">About the shadow stack</a>
-</h3>
-
-<div>
-
-<p>Unlike many GC algorithms which rely on a cooperative code generator to
-compile stack maps, this algorithm carefully maintains a linked list of stack
-roots [<a href="#henderson02">Henderson2002</a>]. This so-called "shadow stack"
-mirrors the machine stack. Maintaining this data structure is slower than using
-a stack map compiled into the executable as constant data, but has a significant
-portability advantage because it requires no special support from the target
-code generator, and does not require tricky platform-specific code to crawl
-the machine stack.</p>
-
-<p>The tradeoff for this simplicity and portability is:</p>
-
-<ul>
-  <li>High overhead per function call.</li>
-  <li>Not thread-safe.</li>
-</ul>
-
-<p>Still, it's an easy way to get started. After your compiler and runtime are
-up and running, writing a <a href="#plugin">plugin</a> will allow you to take
-advantage of <a href="#collector-algos">more advanced GC features</a> of LLVM
-in order to improve performance.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="core">IR features</a><a name="intrinsics"></a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section describes the garbage collection facilities provided by the
-<a href="LangRef.html">LLVM intermediate representation</a>. The exact behavior
-of these IR features is specified by the binary interface implemented by a
-<a href="#plugin">code generation plugin</a>, not by this document.</p>
-
-<p>These facilities are limited to those strictly necessary; they are not
-intended to be a complete interface to any garbage collector. A program will
-need to interface with the GC library using the facilities provided by that
-program.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="gcattr">Specifying GC code generation: <tt>gc "..."</tt></a>
-</h3>
-
-<div>
-
-<div class="doc_code"><tt>
-  define <i>ty</i> @<i>name</i>(...) <span style="text-decoration: underline">gc "<i>name</i>"</span> { ...
-</tt></div>
-
-<p>The <tt>gc</tt> function attribute is used to specify the desired GC style
-to the compiler. Its programmatic equivalent is the <tt>setGC</tt> method of
-<tt>Function</tt>.</p>
-
-<p>Setting <tt>gc "<i>name</i>"</tt> on a function triggers a search for a
-matching code generation plugin "<i>name</i>"; it is that plugin which defines
-the exact nature of the code generated to support GC. If none is found, the
-compiler will raise an error.</p>
-
-<p>Specifying the GC style on a per-function basis allows LLVM to link together
-programs that use different garbage collection algorithms (or none at all).</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="gcroot">Identifying GC roots on the stack: <tt>llvm.gcroot</tt></a>
-</h3>
-
-<div>
-
-<div class="doc_code"><tt>
-  void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
-</tt></div>
-
-<p>The <tt>llvm.gcroot</tt> intrinsic is used to inform LLVM that a stack
-variable references an object on the heap and is to be tracked for garbage
-collection. The exact impact on generated code is specified by a <a
-href="#plugin">compiler plugin</a>. All calls to <tt>llvm.gcroot</tt> <b>must</b> reside
- inside the first basic block.</p>
-
-<p>A compiler which uses mem2reg to raise imperative code using <tt>alloca</tt>
-into SSA form need only add a call to <tt>@llvm.gcroot</tt> for those variables
-which a pointers into the GC heap.</p>
-
-<p>It is also important to mark intermediate values with <tt>llvm.gcroot</tt>.
-For example, consider <tt>h(f(), g())</tt>. Beware leaking the result of
-<tt>f()</tt> in the case that <tt>g()</tt> triggers a collection. Note, that
-stack variables must be initialized and marked with <tt>llvm.gcroot</tt> in
-function's prologue.</p>
-
-<p>The first argument <b>must</b> be a value referring to an alloca instruction
-or a bitcast of an alloca. The second contains a pointer to metadata that
-should be associated with the pointer, and <b>must</b> be a constant or global
-value address. If your target collector uses tags, use a null pointer for
-metadata.</p>
-
-<p>The <tt>%metadata</tt> argument can be used to avoid requiring heap objects
-to have 'isa' pointers or tag bits. [<a href="#appel89">Appel89</a>, <a
-href="#goldberg91">Goldberg91</a>, <a href="#tolmach94">Tolmach94</a>] If
-specified, its value will be tracked along with the location of the pointer in
-the stack frame.</p>
-
-<p>Consider the following fragment of Java code:</p>
-
-<pre class="doc_code">
-       {
-         Object X;   // A null-initialized reference to an object
-         ...
-       }
-</pre>
-
-<p>This block (which may be located in the middle of a function or in a loop
-nest), could be compiled to this LLVM code:</p>
-
-<pre class="doc_code">
-Entry:
-   ;; In the entry block for the function, allocate the
-   ;; stack space for X, which is an LLVM pointer.
-   %X = alloca %Object*
-   
-   ;; Tell LLVM that the stack space is a stack root.
-   ;; Java has type-tags on objects, so we pass null as metadata.
-   %tmp = bitcast %Object** %X to i8**
-   call void @llvm.gcroot(i8** %tmp, i8* null)
-   ...
-
-   ;; "CodeBlock" is the block corresponding to the start
-   ;;  of the scope above.
-CodeBlock:
-   ;; Java null-initializes pointers.
-   store %Object* null, %Object** %X
-
-   ...
-
-   ;; As the pointer goes out of scope, store a null value into
-   ;; it, to indicate that the value is no longer live.
-   store %Object* null, %Object** %X
-   ...
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="barriers">Reading and writing references in the heap</a>
-</h3>
-
-<div>
-
-<p>Some collectors need to be informed when the mutator (the program that needs
-garbage collection) either reads a pointer from or writes a pointer to a field
-of a heap object. The code fragments inserted at these points are called
-<em>read barriers</em> and <em>write barriers</em>, respectively. The amount of
-code that needs to be executed is usually quite small and not on the critical
-path of any computation, so the overall performance impact of the barrier is
-tolerable.</p>
-
-<p>Barriers often require access to the <em>object pointer</em> rather than the
-<em>derived pointer</em> (which is a pointer to the field within the
-object). Accordingly, these intrinsics take both pointers as separate arguments
-for completeness. In this snippet, <tt>%object</tt> is the object pointer, and 
-<tt>%derived</tt> is the derived pointer:</p>
-
-<blockquote><pre>
-    ;; An array type.
-    %class.Array = type { %class.Object, i32, [0 x %class.Object*] }
-    ...
-
-    ;; Load the object pointer from a gcroot.
-    %object = load %class.Array** %object_addr
-
-    ;; Compute the derived pointer.
-    %derived = getelementptr %object, i32 0, i32 2, i32 %n</pre></blockquote>
-
-<p>LLVM does not enforce this relationship between the object and derived
-pointer (although a <a href="#plugin">plugin</a> might). However, it would be
-an unusual collector that violated it.</p>
-
-<p>The use of these intrinsics is naturally optional if the target GC does
-require the corresponding barrier. Such a GC plugin will replace the intrinsic
-calls with the corresponding <tt>load</tt> or <tt>store</tt> instruction if they
-are used.</p>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a>
-</h4>
-
-<div>
-
-<div class="doc_code"><tt>
-void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
-</tt></div>
-
-<p>For write barriers, LLVM provides the <tt>llvm.gcwrite</tt> intrinsic
-function. It has exactly the same semantics as a non-volatile <tt>store</tt> to
-the derived pointer (the third argument). The exact code generated is specified
-by a <a href="#plugin">compiler plugin</a>.</p>
-
-<p>Many important algorithms require write barriers, including generational
-and concurrent collectors. Additionally, write barriers could be used to
-implement reference counting.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="gcread">Read barrier: <tt>llvm.gcread</tt></a>
-</h4>
-
-<div>
-
-<div class="doc_code"><tt>
-i8* @llvm.gcread(i8* %object, i8** %derived)<br>
-</tt></div>
-
-<p>For read barriers, LLVM provides the <tt>llvm.gcread</tt> intrinsic function.
-It has exactly the same semantics as a non-volatile <tt>load</tt> from the
-derived pointer (the second argument). The exact code generated is specified by
-a <a href="#plugin">compiler plugin</a>.</p>
-
-<p>Read barriers are needed by fewer algorithms than write barriers, and may
-have a greater performance impact since pointer reads are more frequent than
-writes.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="plugin">Implementing a collector plugin</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>User code specifies which GC code generation to use with the <tt>gc</tt>
-function attribute or, equivalently, with the <tt>setGC</tt> method of
-<tt>Function</tt>.</p>
-
-<p>To implement a GC plugin, it is necessary to subclass
-<tt>llvm::GCStrategy</tt>, which can be accomplished in a few lines of
-boilerplate code. LLVM's infrastructure provides access to several important
-algorithms. For an uncontroversial collector, all that remains may be to
-compile LLVM's computed stack map to assembly code (using the binary
-representation expected by the runtime library). This can be accomplished in
-about 100 lines of code.</p>
-
-<p>This is not the appropriate place to implement a garbage collected heap or a
-garbage collector itself. That code should exist in the language's runtime
-library. The compiler plugin is responsible for generating code which
-conforms to the binary interface defined by library, most essentially the
-<a href="#stack-map">stack map</a>.</p>
-
-<p>To subclass <tt>llvm::GCStrategy</tt> and register it with the compiler:</p>
-
-<blockquote><pre>// lib/MyGC/MyGC.cpp - Example LLVM GC plugin
-
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
-  class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
-  public:
-    MyGC() {}
-  };
-  
-  GCRegistry::Add&lt;MyGC&gt;
-  X("mygc", "My bespoke garbage collector.");
-}</pre></blockquote>
-
-<p>This boilerplate collector does nothing. More specifically:</p>
-
-<ul>
-  <li><tt>llvm.gcread</tt> calls are replaced with the corresponding
-      <tt>load</tt> instruction.</li>
-  <li><tt>llvm.gcwrite</tt> calls are replaced with the corresponding
-      <tt>store</tt> instruction.</li>
-  <li>No safe points are added to the code.</li>
-  <li>The stack map is not compiled into the executable.</li>
-</ul>
-
-<p>Using the LLVM makefiles (like the <a
-href="http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/">sample
-project</a>), this code can be compiled as a plugin using a simple
-makefile:</p>
-
-<blockquote><pre
-># lib/MyGC/Makefile
-
-LEVEL := ../..
-LIBRARYNAME = <var>MyGC</var>
-LOADABLE_MODULE = 1
-
-include $(LEVEL)/Makefile.common</pre></blockquote>
-
-<p>Once the plugin is compiled, code using it may be compiled using <tt>llc
--load=<var>MyGC.so</var></tt> (though <var>MyGC.so</var> may have some other
-platform-specific extension):</p>
-
-<blockquote><pre
->$ cat sample.ll
-define void @f() gc "mygc" {
-entry:
-        ret void
-}
-$ llvm-as &lt; sample.ll | llc -load=MyGC.so</pre></blockquote>
-
-<p>It is also possible to statically link the collector plugin into tools, such
-as a language-specific compiler front-end.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="collector-algos">Overview of available features</a>
-</h3>
-
-<div>
-
-<p><tt>GCStrategy</tt> provides a range of features through which a plugin
-may do useful work. Some of these are callbacks, some are algorithms that can
-be enabled, disabled, or customized. This matrix summarizes the supported (and
-planned) features and correlates them with the collection techniques which
-typically require them.</p>
-
-<table>
-  <tr>
-    <th>Algorithm</th>
-    <th>Done</th>
-    <th>shadow stack</th>
-    <th>refcount</th>
-    <th>mark-sweep</th>
-    <th>copying</th>
-    <th>incremental</th>
-    <th>threaded</th>
-    <th>concurrent</th>
-  </tr>
-  <tr>
-    <th class="rowhead"><a href="#stack-map">stack map</a></th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><a href="#init-roots">initialize roots</a></th>
-    <td>&#10004;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">derived pointers</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;*</td>
-    <td>&#10008;*</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><em><a href="#custom">custom lowering</a></em></th>
-    <td>&#10004;</td>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-  </tr>
-  <tr>
-    <th class="rowhead indent">gcroot</th>
-    <td>&#10004;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">gcwrite</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td>&#10008;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td></td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">gcread</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><em><a href="#safe-points">safe points</a></em></th>
-    <td></td>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-  </tr>
-  <tr>
-    <th class="rowhead indent">in calls</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">before calls</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead indent">for loops</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">before escape</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">emit code at safe points</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><em>output</em></th>
-    <td></td>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-  </tr>
-  <tr>
-    <th class="rowhead indent"><a href="#assembly">assembly</a></th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead indent">JIT</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead indent">obj</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">live analysis</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">register map</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr>
-    <td colspan="10">
-      <div><span class="doc_warning">*</span> Derived pointers only pose a
-           hazard to copying collectors.</div>
-      <div><span class="optl">&#10008;</span> in gray denotes a feature which
-           could be utilized if available.</div>
-    </td>
-  </tr>
-</table>
-
-<p>To be clear, the collection techniques above are defined as:</p>
-
-<dl>
-  <dt>Shadow Stack</dt>
-  <dd>The mutator carefully maintains a linked list of stack roots.</dd>
-  <dt>Reference Counting</dt>
-  <dd>The mutator maintains a reference count for each object and frees an
-      object when its count falls to zero.</dd>
-  <dt>Mark-Sweep</dt>
-  <dd>When the heap is exhausted, the collector marks reachable objects starting
-      from the roots, then deallocates unreachable objects in a sweep
-      phase.</dd>
-  <dt>Copying</dt>
-  <dd>As reachability analysis proceeds, the collector copies objects from one
-      heap area to another, compacting them in the process. Copying collectors
-      enable highly efficient "bump pointer" allocation and can improve locality
-      of reference.</dd>
-  <dt>Incremental</dt>
-  <dd>(Including generational collectors.) Incremental collectors generally have
-      all the properties of a copying collector (regardless of whether the
-      mature heap is compacting), but bring the added complexity of requiring
-      write barriers.</dd>
-  <dt>Threaded</dt>
-  <dd>Denotes a multithreaded mutator; the collector must still stop the mutator
-      ("stop the world") before beginning reachability analysis. Stopping a
-      multithreaded mutator is a complicated problem. It generally requires
-      highly platform specific code in the runtime, and the production of
-      carefully designed machine code at safe points.</dd>
-  <dt>Concurrent</dt>
-  <dd>In this technique, the mutator and the collector run concurrently, with
-      the goal of eliminating pause times. In a <em>cooperative</em> collector,
-      the mutator further aids with collection should a pause occur, allowing
-      collection to take advantage of multiprocessor hosts. The "stop the world"
-      problem of threaded collectors is generally still present to a limited
-      extent. Sophisticated marking algorithms are necessary. Read barriers may
-      be necessary.</dd>
-</dl>
-
-<p>As the matrix indicates, LLVM's garbage collection infrastructure is already
-suitable for a wide variety of collectors, but does not currently extend to
-multithreaded programs. This will be added in the future as there is
-interest.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="stack-map">Computing stack maps</a>
-</h3>
-
-<div>
-
-<p>LLVM automatically computes a stack map. One of the most important features
-of a <tt>GCStrategy</tt> is to compile this information into the executable in
-the binary representation expected by the runtime library.</p>
-
-<p>The stack map consists of the location and identity of each GC root in the
-each function in the module. For each root:</p>
-
-<ul>
-  <li><tt>RootNum</tt>: The index of the root.</li>
-  <li><tt>StackOffset</tt>: The offset of the object relative to the frame
-      pointer.</li>
-  <li><tt>RootMetadata</tt>: The value passed as the <tt>%metadata</tt>
-      parameter to the <a href="#gcroot"><tt>@llvm.gcroot</tt></a> intrinsic.</li>
-</ul>
-
-<p>Also, for the function as a whole:</p>
-
-<ul>
-  <li><tt>getFrameSize()</tt>: The overall size of the function's initial
-      stack frame, not accounting for any dynamic allocation.</li>
-  <li><tt>roots_size()</tt>: The count of roots in the function.</li>
-</ul>
-
-<p>To access the stack map, use <tt>GCFunctionMetadata::roots_begin()</tt> and
--<tt>end()</tt> from the <tt><a
-href="#assembly">GCMetadataPrinter</a></tt>:</p>
-
-<blockquote><pre
->for (iterator I = begin(), E = end(); I != E; ++I) {
-  GCFunctionInfo *FI = *I;
-  unsigned FrameSize = FI-&gt;getFrameSize();
-  size_t RootCount = FI-&gt;roots_size();
-
-  for (GCFunctionInfo::roots_iterator RI = FI-&gt;roots_begin(),
-                                      RE = FI-&gt;roots_end();
-                                      RI != RE; ++RI) {
-    int RootNum = RI->Num;
-    int RootStackOffset = RI->StackOffset;
-    Constant *RootMetadata = RI->Metadata;
-  }
-}</pre></blockquote>
-
-<p>If the <tt>llvm.gcroot</tt> intrinsic is eliminated before code generation by
-a custom lowering pass, LLVM will compute an empty stack map. This may be useful
-for collector plugins which implement reference counting or a shadow stack.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="init-roots">Initializing roots to null: <tt>InitRoots</tt></a>
-</h3>
-
-<div>
-
-<blockquote><pre
->MyGC::MyGC() {
-  InitRoots = true;
-}</pre></blockquote>
-
-<p>When set, LLVM will automatically initialize each root to <tt>null</tt> upon
-entry to the function. This prevents the GC's sweep phase from visiting
-uninitialized pointers, which will almost certainly cause it to crash. This
-initialization occurs before custom lowering, so the two may be used
-together.</p>
-
-<p>Since LLVM does not yet compute liveness information, there is no means of
-distinguishing an uninitialized stack root from an initialized one. Therefore,
-this feature should be used by all GC plugins. It is enabled by default.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
-    <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a>
-</h3>
-
-<div>
-
-<p>For GCs which use barriers or unusual treatment of stack roots, these
-flags allow the collector to perform arbitrary transformations of the LLVM
-IR:</p>
-
-<blockquote><pre
->class MyGC : public GCStrategy {
-public:
-  MyGC() {
-    CustomRoots = true;
-    CustomReadBarriers = true;
-    CustomWriteBarriers = true;
-  }
-  
-  virtual bool initializeCustomLowering(Module &amp;M);
-  virtual bool performCustomLowering(Function &amp;F);
-};</pre></blockquote>
-
-<p>If any of these flags are set, then LLVM suppresses its default lowering for
-the corresponding intrinsics and instead calls
-<tt>performCustomLowering</tt>.</p>
-
-<p>LLVM's default action for each intrinsic is as follows:</p>
-
-<ul>
-  <li><tt>llvm.gcroot</tt>: Leave it alone. The code generator must see it
-                            or the stack map will not be computed.</li>
-  <li><tt>llvm.gcread</tt>: Substitute a <tt>load</tt> instruction.</li>
-  <li><tt>llvm.gcwrite</tt>: Substitute a <tt>store</tt> instruction.</li>
-</ul>
-
-<p>If <tt>CustomReadBarriers</tt> or <tt>CustomWriteBarriers</tt> are specified,
-then <tt>performCustomLowering</tt> <strong>must</strong> eliminate the
-corresponding barriers.</p>
-
-<p><tt>performCustomLowering</tt> must comply with the same restrictions as <a
-href="WritingAnLLVMPass.html#runOnFunction"><tt
->FunctionPass::runOnFunction</tt></a>.
-Likewise, <tt>initializeCustomLowering</tt> has the same semantics as <a
-href="WritingAnLLVMPass.html#doInitialization_mod"><tt
->Pass::doInitialization(Module&amp;)</tt></a>.</p>
-
-<p>The following can be used as a template:</p>
-
-<blockquote><pre
->#include "llvm/Module.h"
-#include "llvm/IntrinsicInst.h"
-
-bool MyGC::initializeCustomLowering(Module &amp;M) {
-  return false;
-}
-
-bool MyGC::performCustomLowering(Function &amp;F) {
-  bool MadeChange = false;
-  
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; )
-      if (IntrinsicInst *CI = dyn_cast&lt;IntrinsicInst&gt;(II++))
-        if (Function *F = CI-&gt;getCalledFunction())
-          switch (F-&gt;getIntrinsicID()) {
-          case Intrinsic::gcwrite:
-            // Handle llvm.gcwrite.
-            CI-&gt;eraseFromParent();
-            MadeChange = true;
-            break;
-          case Intrinsic::gcread:
-            // Handle llvm.gcread.
-            CI-&gt;eraseFromParent();
-            MadeChange = true;
-            break;
-          case Intrinsic::gcroot:
-            // Handle llvm.gcroot.
-            CI-&gt;eraseFromParent();
-            MadeChange = true;
-            break;
-          }
-  
-  return MadeChange;
-}</pre></blockquote>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="safe-points">Generating safe points: <tt>NeededSafePoints</tt></a>
-</h3>
-
-<div>
-
-<p>LLVM can compute four kinds of safe points:</p>
-
-<blockquote><pre
->namespace GC {
-  /// PointKind - The type of a collector-safe point.
-  /// 
-  enum PointKind {
-    Loop,    //&lt; Instr is a loop (backwards branch).
-    Return,  //&lt; Instr is a return instruction.
-    PreCall, //&lt; Instr is a call instruction.
-    PostCall //&lt; Instr is the return address of a call.
-  };
-}</pre></blockquote>
-
-<p>A collector can request any combination of the four by setting the 
-<tt>NeededSafePoints</tt> mask:</p>
-
-<blockquote><pre
->MyGC::MyGC() {
-  NeededSafePoints = 1 &lt;&lt; GC::Loop
-                   | 1 &lt;&lt; GC::Return
-                   | 1 &lt;&lt; GC::PreCall
-                   | 1 &lt;&lt; GC::PostCall;
-}</pre></blockquote>
-
-<p>It can then use the following routines to access safe points.</p>
-
-<blockquote><pre
->for (iterator I = begin(), E = end(); I != E; ++I) {
-  GCFunctionInfo *MD = *I;
-  size_t PointCount = MD-&gt;size();
-
-  for (GCFunctionInfo::iterator PI = MD-&gt;begin(),
-                                PE = MD-&gt;end(); PI != PE; ++PI) {
-    GC::PointKind PointKind = PI-&gt;Kind;
-    unsigned PointNum = PI-&gt;Num;
-  }
-}
-</pre></blockquote>
-
-<p>Almost every collector requires <tt>PostCall</tt> safe points, since these
-correspond to the moments when the function is suspended during a call to a
-subroutine.</p>
-
-<p>Threaded programs generally require <tt>Loop</tt> safe points to guarantee
-that the application will reach a safe point within a bounded amount of time,
-even if it is executing a long-running loop which contains no function
-calls.</p>
-
-<p>Threaded collectors may also require <tt>Return</tt> and <tt>PreCall</tt>
-safe points to implement "stop the world" techniques using self-modifying code,
-where it is important that the program not exit the function without reaching a
-safe point (because only the topmost function has been patched).</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="assembly">Emitting assembly code: <tt>GCMetadataPrinter</tt></a>
-</h3>
-
-<div>
-
-<p>LLVM allows a plugin to print arbitrary assembly code before and after the
-rest of a module's assembly code. At the end of the module, the GC can compile
-the LLVM stack map into assembly code. (At the beginning, this information is not
-yet computed.)</p>
-
-<p>Since AsmWriter and CodeGen are separate components of LLVM, a separate
-abstract base class and registry is provided for printing assembly code, the
-<tt>GCMetadaPrinter</tt> and <tt>GCMetadataPrinterRegistry</tt>. The AsmWriter
-will look for such a subclass if the <tt>GCStrategy</tt> sets
-<tt>UsesMetadata</tt>:</p>
-
-<blockquote><pre
->MyGC::MyGC() {
-  UsesMetadata = true;
-}</pre></blockquote>
-
-<p>This separation allows JIT-only clients to be smaller.</p>
-
-<p>Note that LLVM does not currently have analogous APIs to support code
-generation in the JIT, nor using the object writers.</p>
-
-<blockquote><pre
->// lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
-
-#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
-  class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
-  public:
-    virtual void beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                               const TargetAsmInfo &amp;TAI);
-  
-    virtual void finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                                const TargetAsmInfo &amp;TAI);
-  };
-  
-  GCMetadataPrinterRegistry::Add&lt;MyGCPrinter&gt;
-  X("mygc", "My bespoke garbage collector.");
-}</pre></blockquote>
-
-<p>The collector should use <tt>AsmPrinter</tt> and <tt>TargetAsmInfo</tt> to
-print portable assembly code to the <tt>std::ostream</tt>. The collector itself
-contains the stack map for the entire module, and may access the
-<tt>GCFunctionInfo</tt> using its own <tt>begin()</tt> and <tt>end()</tt>
-methods. Here's a realistic example:</p>
-
-<blockquote><pre
->#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Function.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetAsmInfo.h"
-
-void MyGCPrinter::beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                                const TargetAsmInfo &amp;TAI) {
-  // Nothing to do.
-}
-
-void MyGCPrinter::finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                                 const TargetAsmInfo &amp;TAI) {
-  // Set up for emitting addresses.
-  const char *AddressDirective;
-  int AddressAlignLog;
-  if (AP.TM.getDataLayout()->getPointerSize() == sizeof(int32_t)) {
-    AddressDirective = TAI.getData32bitsDirective();
-    AddressAlignLog = 2;
-  } else {
-    AddressDirective = TAI.getData64bitsDirective();
-    AddressAlignLog = 3;
-  }
-  
-  // Put this in the data section.
-  AP.SwitchToDataSection(TAI.getDataSection());
-  
-  // For each function...
-  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
-    GCFunctionInfo &amp;MD = **FI;
-    
-    // Emit this data structure:
-    // 
-    // struct {
-    //   int32_t PointCount;
-    //   struct {
-    //     void *SafePointAddress;
-    //     int32_t LiveCount;
-    //     int32_t LiveOffsets[LiveCount];
-    //   } Points[PointCount];
-    // } __gcmap_&lt;FUNCTIONNAME&gt;;
-    
-    // Align to address width.
-    AP.EmitAlignment(AddressAlignLog);
-    
-    // Emit the symbol by which the stack map entry can be found.
-    std::string Symbol;
-    Symbol += TAI.getGlobalPrefix();
-    Symbol += "__gcmap_";
-    Symbol += MD.getFunction().getName();
-    if (const char *GlobalDirective = TAI.getGlobalDirective())
-      OS &lt;&lt; GlobalDirective &lt;&lt; Symbol &lt;&lt; "\n";
-    OS &lt;&lt; TAI.getGlobalPrefix() &lt;&lt; Symbol &lt;&lt; ":\n";
-    
-    // Emit PointCount.
-    AP.EmitInt32(MD.size());
-    AP.EOL("safe point count");
-    
-    // And each safe point...
-    for (GCFunctionInfo::iterator PI = MD.begin(),
-                                     PE = MD.end(); PI != PE; ++PI) {
-      // Align to address width.
-      AP.EmitAlignment(AddressAlignLog);
-      
-      // Emit the address of the safe point.
-      OS &lt;&lt; AddressDirective
-         &lt;&lt; TAI.getPrivateGlobalPrefix() &lt;&lt; "label" &lt;&lt; PI-&gt;Num;
-      AP.EOL("safe point address");
-      
-      // Emit the stack frame size.
-      AP.EmitInt32(MD.getFrameSize());
-      AP.EOL("stack frame size");
-      
-      // Emit the number of live roots in the function.
-      AP.EmitInt32(MD.live_size(PI));
-      AP.EOL("live root count");
-      
-      // And for each live root...
-      for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
-                                            LE = MD.live_end(PI);
-                                            LI != LE; ++LI) {
-        // Print its offset within the stack frame.
-        AP.EmitInt32(LI-&gt;StackOffset);
-        AP.EOL("stack offset");
-      }
-    }
-  }
-}
-</pre></blockquote>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="references">References</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p><a name="appel89">[Appel89]</a> Runtime Tags Aren't Necessary. Andrew
-W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.</p>
-
-<p><a name="goldberg91">[Goldberg91]</a> Tag-free garbage collection for
-strongly typed programming languages. Benjamin Goldberg. ACM SIGPLAN
-PLDI'91.</p>
-
-<p><a name="tolmach94">[Tolmach94]</a> Tag-free garbage collection using
-explicit type parameters. Andrew Tolmach. Proceedings of the 1994 ACM
-conference on LISP and functional programming.</p>
-
-<p><a name="henderson02">[Henderson2002]</a> <a
-href="http://citeseer.ist.psu.edu/henderson02accurate.html">
-Accurate Garbage Collection in an Uncooperative Environment</a>.
-Fergus Henderson. International Symposium on Memory Management 2002.</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/GarbageCollection.rst b/docs/GarbageCollection.rst
new file mode 100644
index 000000000000..5c3a1af23cd3
--- /dev/null
+++ b/docs/GarbageCollection.rst
@@ -0,0 +1,1029 @@
+=====================================
+Accurate Garbage Collection with LLVM
+=====================================
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+Garbage collection is a widely used technique that frees the programmer from
+having to know the lifetimes of heap objects, making software easier to produce
+and maintain.  Many programming languages rely on garbage collection for
+automatic memory management.  There are two primary forms of garbage collection:
+conservative and accurate.
+
+Conservative garbage collection often does not require any special support from
+either the language or the compiler: it can handle non-type-safe programming
+languages (such as C/C++) and does not require any special information from the
+compiler.  The `Boehm collector
+<http://www.hpl.hp.com/personal/Hans_Boehm/gc/>`__ is an example of a
+state-of-the-art conservative collector.
+
+Accurate garbage collection requires the ability to identify all pointers in the
+program at run-time (which requires that the source-language be type-safe in
+most cases).  Identifying pointers at run-time requires compiler support to
+locate all places that hold live pointer variables at run-time, including the
+:ref:`processor stack and registers <gcroot>`.
+
+Conservative garbage collection is attractive because it does not require any
+special compiler support, but it does have problems.  In particular, because the
+conservative garbage collector cannot *know* that a particular word in the
+machine is a pointer, it cannot move live objects in the heap (preventing the
+use of compacting and generational GC algorithms) and it can occasionally suffer
+from memory leaks due to integer values that happen to point to objects in the
+program.  In addition, some aggressive compiler transformations can break
+conservative garbage collectors (though these seem rare in practice).
+
+Accurate garbage collectors do not suffer from any of these problems, but they
+can suffer from degraded scalar optimization of the program.  In particular,
+because the runtime must be able to identify and update all pointers active in
+the program, some optimizations are less effective.  In practice, however, the
+locality and performance benefits of using aggressive garbage collection
+techniques dominates any low-level losses.
+
+This document describes the mechanisms and interfaces provided by LLVM to
+support accurate garbage collection.
+
+Goals and non-goals
+-------------------
+
+LLVM's intermediate representation provides :ref:`garbage collection intrinsics
+<gc_intrinsics>` that offer support for a broad class of collector models.  For
+instance, the intrinsics permit:
+
+* semi-space collectors
+
+* mark-sweep collectors
+
+* generational collectors
+
+* reference counting
+
+* incremental collectors
+
+* concurrent collectors
+
+* cooperative collectors
+
+We hope that the primitive support built into the LLVM IR is sufficient to
+support a broad class of garbage collected languages including Scheme, ML, Java,
+C#, Perl, Python, Lua, Ruby, other scripting languages, and more.
+
+However, LLVM does not itself provide a garbage collector --- this should be
+part of your language's runtime library.  LLVM provides a framework for compile
+time :ref:`code generation plugins <plugin>`.  The role of these plugins is to
+generate code and data structures which conforms to the *binary interface*
+specified by the *runtime library*.  This is similar to the relationship between
+LLVM and DWARF debugging info, for example.  The difference primarily lies in
+the lack of an established standard in the domain of garbage collection --- thus
+the plugins.
+
+The aspects of the binary interface with which LLVM's GC support is
+concerned are:
+
+* Creation of GC-safe points within code where collection is allowed to execute
+  safely.
+
+* Computation of the stack map.  For each safe point in the code, object
+  references within the stack frame must be identified so that the collector may
+  traverse and perhaps update them.
+
+* Write barriers when storing object references to the heap.  These are commonly
+  used to optimize incremental scans in generational collectors.
+
+* Emission of read barriers when loading object references.  These are useful
+  for interoperating with concurrent collectors.
+
+There are additional areas that LLVM does not directly address:
+
+* Registration of global roots with the runtime.
+
+* Registration of stack map entries with the runtime.
+
+* The functions used by the program to allocate memory, trigger a collection,
+  etc.
+
+* Computation or compilation of type maps, or registration of them with the
+  runtime.  These are used to crawl the heap for object references.
+
+In general, LLVM's support for GC does not include features which can be
+adequately addressed with other features of the IR and does not specify a
+particular binary interface.  On the plus side, this means that you should be
+able to integrate LLVM with an existing runtime.  On the other hand, it leaves a
+lot of work for the developer of a novel language.  However, it's easy to get
+started quickly and scale up to a more sophisticated implementation as your
+compiler matures.
+
+Getting started
+===============
+
+Using a GC with LLVM implies many things, for example:
+
+* Write a runtime library or find an existing one which implements a GC heap.
+
+  #. Implement a memory allocator.
+
+  #. Design a binary interface for the stack map, used to identify references
+     within a stack frame on the machine stack.\*
+
+  #. Implement a stack crawler to discover functions on the call stack.\*
+
+  #. Implement a registry for global roots.
+
+  #. Design a binary interface for type maps, used to identify references
+     within heap objects.
+
+  #. Implement a collection routine bringing together all of the above.
+
+* Emit compatible code from your compiler.
+
+  * Initialization in the main function.
+
+  * Use the ``gc "..."`` attribute to enable GC code generation (or
+    ``F.setGC("...")``).
+
+  * Use ``@llvm.gcroot`` to mark stack roots.
+
+  * Use ``@llvm.gcread`` and/or ``@llvm.gcwrite`` to manipulate GC references,
+    if necessary.
+
+  * Allocate memory using the GC allocation routine provided by the runtime
+    library.
+
+  * Generate type maps according to your runtime's binary interface.
+
+* Write a compiler plugin to interface LLVM with the runtime library.\*
+
+  * Lower ``@llvm.gcread`` and ``@llvm.gcwrite`` to appropriate code
+    sequences.\*
+
+  * Compile LLVM's stack map to the binary form expected by the runtime.
+
+* Load the plugin into the compiler.  Use ``llc -load`` or link the plugin
+  statically with your language's compiler.\*
+
+* Link program executables with the runtime.
+
+To help with several of these tasks (those indicated with a \*), LLVM includes a
+highly portable, built-in ShadowStack code generator.  It is compiled into
+``llc`` and works even with the interpreter and C backends.
+
+In your compiler
+----------------
+
+To turn the shadow stack on for your functions, first call:
+
+.. code-block:: c++
+
+  F.setGC("shadow-stack");
+
+for each function your compiler emits. Since the shadow stack is built into
+LLVM, you do not need to load a plugin.
+
+Your compiler must also use ``@llvm.gcroot`` as documented.  Don't forget to
+create a root for each intermediate value that is generated when evaluating an
+expression.  In ``h(f(), g())``, the result of ``f()`` could easily be collected
+if evaluating ``g()`` triggers a collection.
+
+There's no need to use ``@llvm.gcread`` and ``@llvm.gcwrite`` over plain
+``load`` and ``store`` for now.  You will need them when switching to a more
+advanced GC.
+
+In your runtime
+---------------
+
+The shadow stack doesn't imply a memory allocation algorithm.  A semispace
+collector or building atop ``malloc`` are great places to start, and can be
+implemented with very little code.
+
+When it comes time to collect, however, your runtime needs to traverse the stack
+roots, and for this it needs to integrate with the shadow stack.  Luckily, doing
+so is very simple. (This code is heavily commented to help you understand the
+data structure, but there are only 20 lines of meaningful code.)
+
+.. code-block:: c++
+
+  /// @brief The map for a single function's stack frame.  One of these is
+  ///        compiled as constant data into the executable for each function.
+  ///
+  /// Storage of metadata values is elided if the %metadata parameter to
+  /// @llvm.gcroot is null.
+  struct FrameMap {
+    int32_t NumRoots;    //< Number of roots in stack frame.
+    int32_t NumMeta;     //< Number of metadata entries.  May be < NumRoots.
+    const void *Meta[0]; //< Metadata for each root.
+  };
+
+  /// @brief A link in the dynamic shadow stack.  One of these is embedded in
+  ///        the stack frame of each function on the call stack.
+  struct StackEntry {
+    StackEntry *Next;    //< Link to next stack entry (the caller's).
+    const FrameMap *Map; //< Pointer to constant FrameMap.
+    void *Roots[0];      //< Stack roots (in-place array).
+  };
+
+  /// @brief The head of the singly-linked list of StackEntries.  Functions push
+  ///        and pop onto this in their prologue and epilogue.
+  ///
+  /// Since there is only a global list, this technique is not threadsafe.
+  StackEntry *llvm_gc_root_chain;
+
+  /// @brief Calls Visitor(root, meta) for each GC root on the stack.
+  ///        root and meta are exactly the values passed to
+  ///        @llvm.gcroot.
+  ///
+  /// Visitor could be a function to recursively mark live objects.  Or it
+  /// might copy them to another heap or generation.
+  ///
+  /// @param Visitor A function to invoke for every GC root on the stack.
+  void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
+    for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
+      unsigned i = 0;
+
+      // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
+      for (unsigned e = R->Map->NumMeta; i != e; ++i)
+        Visitor(&R->Roots[i], R->Map->Meta[i]);
+
+      // For roots [NumMeta, NumRoots), the metadata pointer is null.
+      for (unsigned e = R->Map->NumRoots; i != e; ++i)
+        Visitor(&R->Roots[i], NULL);
+    }
+  }
+
+About the shadow stack
+----------------------
+
+Unlike many GC algorithms which rely on a cooperative code generator to compile
+stack maps, this algorithm carefully maintains a linked list of stack roots
+[:ref:`Henderson2002 <henderson02>`].  This so-called "shadow stack" mirrors the
+machine stack.  Maintaining this data structure is slower than using a stack map
+compiled into the executable as constant data, but has a significant portability
+advantage because it requires no special support from the target code generator,
+and does not require tricky platform-specific code to crawl the machine stack.
+
+The tradeoff for this simplicity and portability is:
+
+* High overhead per function call.
+
+* Not thread-safe.
+
+Still, it's an easy way to get started.  After your compiler and runtime are up
+and running, writing a :ref:`plugin <plugin>` will allow you to take advantage
+of :ref:`more advanced GC features <collector-algos>` of LLVM in order to
+improve performance.
+
+.. _gc_intrinsics:
+
+IR features
+===========
+
+This section describes the garbage collection facilities provided by the
+:doc:`LLVM intermediate representation <LangRef>`.  The exact behavior of these
+IR features is specified by the binary interface implemented by a :ref:`code
+generation plugin <plugin>`, not by this document.
+
+These facilities are limited to those strictly necessary; they are not intended
+to be a complete interface to any garbage collector.  A program will need to
+interface with the GC library using the facilities provided by that program.
+
+Specifying GC code generation: ``gc "..."``
+-------------------------------------------
+
+.. code-block:: llvm
+
+  define ty @name(...) gc "name" { ...
+
+The ``gc`` function attribute is used to specify the desired GC style to the
+compiler.  Its programmatic equivalent is the ``setGC`` method of ``Function``.
+
+Setting ``gc "name"`` on a function triggers a search for a matching code
+generation plugin "*name*"; it is that plugin which defines the exact nature of
+the code generated to support GC.  If none is found, the compiler will raise an
+error.
+
+Specifying the GC style on a per-function basis allows LLVM to link together
+programs that use different garbage collection algorithms (or none at all).
+
+.. _gcroot:
+
+Identifying GC roots on the stack: ``llvm.gcroot``
+--------------------------------------------------
+
+.. code-block:: llvm
+
+  void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+
+The ``llvm.gcroot`` intrinsic is used to inform LLVM that a stack variable
+references an object on the heap and is to be tracked for garbage collection.
+The exact impact on generated code is specified by a :ref:`compiler plugin
+<plugin>`.  All calls to ``llvm.gcroot`` **must** reside inside the first basic
+block.
+
+A compiler which uses mem2reg to raise imperative code using ``alloca`` into SSA
+form need only add a call to ``@llvm.gcroot`` for those variables which a
+pointers into the GC heap.
+
+It is also important to mark intermediate values with ``llvm.gcroot``.  For
+example, consider ``h(f(), g())``.  Beware leaking the result of ``f()`` in the
+case that ``g()`` triggers a collection.  Note, that stack variables must be
+initialized and marked with ``llvm.gcroot`` in function's prologue.
+
+The first argument **must** be a value referring to an alloca instruction or a
+bitcast of an alloca.  The second contains a pointer to metadata that should be
+associated with the pointer, and **must** be a constant or global value
+address.  If your target collector uses tags, use a null pointer for metadata.
+
+The ``%metadata`` argument can be used to avoid requiring heap objects to have
+'isa' pointers or tag bits. [Appel89_, Goldberg91_, Tolmach94_] If specified,
+its value will be tracked along with the location of the pointer in the stack
+frame.
+
+Consider the following fragment of Java code:
+
+.. code-block:: java
+
+   {
+     Object X;   // A null-initialized reference to an object
+     ...
+   }
+
+This block (which may be located in the middle of a function or in a loop nest),
+could be compiled to this LLVM code:
+
+.. code-block:: llvm
+
+  Entry:
+     ;; In the entry block for the function, allocate the
+     ;; stack space for X, which is an LLVM pointer.
+     %X = alloca %Object*
+
+     ;; Tell LLVM that the stack space is a stack root.
+     ;; Java has type-tags on objects, so we pass null as metadata.
+     %tmp = bitcast %Object** %X to i8**
+     call void @llvm.gcroot(i8** %tmp, i8* null)
+     ...
+
+     ;; "CodeBlock" is the block corresponding to the start
+     ;;  of the scope above.
+  CodeBlock:
+     ;; Java null-initializes pointers.
+     store %Object* null, %Object** %X
+
+     ...
+
+     ;; As the pointer goes out of scope, store a null value into
+     ;; it, to indicate that the value is no longer live.
+     store %Object* null, %Object** %X
+     ...
+
+Reading and writing references in the heap
+------------------------------------------
+
+Some collectors need to be informed when the mutator (the program that needs
+garbage collection) either reads a pointer from or writes a pointer to a field
+of a heap object.  The code fragments inserted at these points are called *read
+barriers* and *write barriers*, respectively.  The amount of code that needs to
+be executed is usually quite small and not on the critical path of any
+computation, so the overall performance impact of the barrier is tolerable.
+
+Barriers often require access to the *object pointer* rather than the *derived
+pointer* (which is a pointer to the field within the object).  Accordingly,
+these intrinsics take both pointers as separate arguments for completeness.  In
+this snippet, ``%object`` is the object pointer, and ``%derived`` is the derived
+pointer:
+
+.. code-block:: llvm
+
+  ;; An array type.
+  %class.Array = type { %class.Object, i32, [0 x %class.Object*] }
+  ...
+
+  ;; Load the object pointer from a gcroot.
+  %object = load %class.Array** %object_addr
+
+  ;; Compute the derived pointer.
+  %derived = getelementptr %object, i32 0, i32 2, i32 %n
+
+LLVM does not enforce this relationship between the object and derived pointer
+(although a :ref:`plugin <plugin>` might).  However, it would be an unusual
+collector that violated it.
+
+The use of these intrinsics is naturally optional if the target GC does require
+the corresponding barrier.  Such a GC plugin will replace the intrinsic calls
+with the corresponding ``load`` or ``store`` instruction if they are used.
+
+Write barrier: ``llvm.gcwrite``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
+
+For write barriers, LLVM provides the ``llvm.gcwrite`` intrinsic function.  It
+has exactly the same semantics as a non-volatile ``store`` to the derived
+pointer (the third argument).  The exact code generated is specified by a
+compiler :ref:`plugin <plugin>`.
+
+Many important algorithms require write barriers, including generational and
+concurrent collectors.  Additionally, write barriers could be used to implement
+reference counting.
+
+Read barrier: ``llvm.gcread``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  i8* @llvm.gcread(i8* %object, i8** %derived)
+
+For read barriers, LLVM provides the ``llvm.gcread`` intrinsic function.  It has
+exactly the same semantics as a non-volatile ``load`` from the derived pointer
+(the second argument).  The exact code generated is specified by a
+:ref:`compiler plugin <plugin>`.
+
+Read barriers are needed by fewer algorithms than write barriers, and may have a
+greater performance impact since pointer reads are more frequent than writes.
+
+.. _plugin:
+
+Implementing a collector plugin
+===============================
+
+User code specifies which GC code generation to use with the ``gc`` function
+attribute or, equivalently, with the ``setGC`` method of ``Function``.
+
+To implement a GC plugin, it is necessary to subclass ``llvm::GCStrategy``,
+which can be accomplished in a few lines of boilerplate code.  LLVM's
+infrastructure provides access to several important algorithms.  For an
+uncontroversial collector, all that remains may be to compile LLVM's computed
+stack map to assembly code (using the binary representation expected by the
+runtime library).  This can be accomplished in about 100 lines of code.
+
+This is not the appropriate place to implement a garbage collected heap or a
+garbage collector itself.  That code should exist in the language's runtime
+library.  The compiler plugin is responsible for generating code which conforms
+to the binary interface defined by library, most essentially the :ref:`stack map
+<stack-map>`.
+
+To subclass ``llvm::GCStrategy`` and register it with the compiler:
+
+.. code-block:: c++
+
+  // lib/MyGC/MyGC.cpp - Example LLVM GC plugin
+
+  #include "llvm/CodeGen/GCStrategy.h"
+  #include "llvm/CodeGen/GCMetadata.h"
+  #include "llvm/Support/Compiler.h"
+
+  using namespace llvm;
+
+  namespace {
+    class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
+    public:
+      MyGC() {}
+    };
+
+    GCRegistry::Add<MyGC>
+    X("mygc", "My bespoke garbage collector.");
+  }
+
+This boilerplate collector does nothing.  More specifically:
+
+* ``llvm.gcread`` calls are replaced with the corresponding ``load``
+  instruction.
+
+* ``llvm.gcwrite`` calls are replaced with the corresponding ``store``
+  instruction.
+
+* No safe points are added to the code.
+
+* The stack map is not compiled into the executable.
+
+Using the LLVM makefiles (like the `sample project
+<http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/>`__), this code
+can be compiled as a plugin using a simple makefile:
+
+.. code-block:: make
+
+  # lib/MyGC/Makefile
+
+  LEVEL := ../..
+  LIBRARYNAME = MyGC
+  LOADABLE_MODULE = 1
+
+  include $(LEVEL)/Makefile.common
+
+Once the plugin is compiled, code using it may be compiled using ``llc
+-load=MyGC.so`` (though MyGC.so may have some other platform-specific
+extension):
+
+::
+
+  $ cat sample.ll
+  define void @f() gc "mygc" {
+  entry:
+          ret void
+  }
+  $ llvm-as < sample.ll | llc -load=MyGC.so
+
+It is also possible to statically link the collector plugin into tools, such as
+a language-specific compiler front-end.
+
+.. _collector-algos:
+
+Overview of available features
+------------------------------
+
+``GCStrategy`` provides a range of features through which a plugin may do useful
+work.  Some of these are callbacks, some are algorithms that can be enabled,
+disabled, or customized.  This matrix summarizes the supported (and planned)
+features and correlates them with the collection techniques which typically
+require them.
+
+.. |v| unicode:: 0x2714
+   :trim:
+
+.. |x| unicode:: 0x2718
+   :trim:
+
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| Algorithm  | Done | Shadow | refcount | mark- | copying | incremental | threaded | concurrent |
+|            |      | stack  |          | sweep |         |             |          |            |
++============+======+========+==========+=======+=========+=============+==========+============+
+| stack map  | |v|  |        |          | |x|   | |x|     | |x|         | |x|      | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| initialize | |v|  | |x|    | |x|      | |x|   | |x|     | |x|         | |x|      | |x|        |
+| roots      |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| derived    | NO   |        |          |       |         |             | **N**\*  | **N**\*    |
+| pointers   |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **custom   | |v|  |        |          |       |         |             |          |            |
+| lowering** |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *gcroot*   | |v|  | |x|    | |x|      |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *gcwrite*  | |v|  |        | |x|      |       |         | |x|         |          | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *gcread*   | |v|  |        |          |       |         |             |          | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **safe     |      |        |          |       |         |             |          |            |
+| points**   |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *in        | |v|  |        |          | |x|   | |x|     | |x|         | |x|      | |x|        |
+| calls*     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *before    | |v|  |        |          |       |         |             | |x|      | |x|        |
+| calls*     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *for       | NO   |        |          |       |         |             | **N**    | **N**      |
+| loops*     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *before    | |v|  |        |          |       |         |             | |x|      | |x|        |
+| escape*    |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| emit code  | NO   |        |          |       |         |             | **N**    | **N**      |
+| at safe    |      |        |          |       |         |             |          |            |
+| points     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **output** |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *assembly* | |v|  |        |          | |x|   | |x|     | |x|         | |x|      | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *JIT*      | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *obj*      | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| live       | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
+| analysis   |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| register   | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
+| map        |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| \* Derived pointers only pose a hasard to copying collections.                                |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **?** denotes a feature which could be utilized if available.                                 |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+
+To be clear, the collection techniques above are defined as:
+
+Shadow Stack
+  The mutator carefully maintains a linked list of stack roots.
+
+Reference Counting
+  The mutator maintains a reference count for each object and frees an object
+  when its count falls to zero.
+
+Mark-Sweep
+  When the heap is exhausted, the collector marks reachable objects starting
+  from the roots, then deallocates unreachable objects in a sweep phase.
+
+Copying
+  As reachability analysis proceeds, the collector copies objects from one heap
+  area to another, compacting them in the process.  Copying collectors enable
+  highly efficient "bump pointer" allocation and can improve locality of
+  reference.
+
+Incremental
+  (Including generational collectors.) Incremental collectors generally have all
+  the properties of a copying collector (regardless of whether the mature heap
+  is compacting), but bring the added complexity of requiring write barriers.
+
+Threaded
+  Denotes a multithreaded mutator; the collector must still stop the mutator
+  ("stop the world") before beginning reachability analysis.  Stopping a
+  multithreaded mutator is a complicated problem.  It generally requires highly
+  platform specific code in the runtime, and the production of carefully
+  designed machine code at safe points.
+
+Concurrent
+  In this technique, the mutator and the collector run concurrently, with the
+  goal of eliminating pause times.  In a *cooperative* collector, the mutator
+  further aids with collection should a pause occur, allowing collection to take
+  advantage of multiprocessor hosts.  The "stop the world" problem of threaded
+  collectors is generally still present to a limited extent.  Sophisticated
+  marking algorithms are necessary.  Read barriers may be necessary.
+
+As the matrix indicates, LLVM's garbage collection infrastructure is already
+suitable for a wide variety of collectors, but does not currently extend to
+multithreaded programs.  This will be added in the future as there is
+interest.
+
+.. _stack-map:
+
+Computing stack maps
+--------------------
+
+LLVM automatically computes a stack map.  One of the most important features
+of a ``GCStrategy`` is to compile this information into the executable in
+the binary representation expected by the runtime library.
+
+The stack map consists of the location and identity of each GC root in the
+each function in the module.  For each root:
+
+* ``RootNum``: The index of the root.
+
+* ``StackOffset``: The offset of the object relative to the frame pointer.
+
+* ``RootMetadata``: The value passed as the ``%metadata`` parameter to the
+  ``@llvm.gcroot`` intrinsic.
+
+Also, for the function as a whole:
+
+* ``getFrameSize()``: The overall size of the function's initial stack frame,
+   not accounting for any dynamic allocation.
+
+* ``roots_size()``: The count of roots in the function.
+
+To access the stack map, use ``GCFunctionMetadata::roots_begin()`` and
+-``end()`` from the :ref:`GCMetadataPrinter <assembly>`:
+
+.. code-block:: c++
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    GCFunctionInfo *FI = *I;
+    unsigned FrameSize = FI->getFrameSize();
+    size_t RootCount = FI->roots_size();
+
+    for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+                                        RE = FI->roots_end();
+                                        RI != RE; ++RI) {
+      int RootNum = RI->Num;
+      int RootStackOffset = RI->StackOffset;
+      Constant *RootMetadata = RI->Metadata;
+    }
+  }
+
+If the ``llvm.gcroot`` intrinsic is eliminated before code generation by a
+custom lowering pass, LLVM will compute an empty stack map.  This may be useful
+for collector plugins which implement reference counting or a shadow stack.
+
+.. _init-roots:
+
+Initializing roots to null: ``InitRoots``
+-----------------------------------------
+
+.. code-block:: c++
+
+  MyGC::MyGC() {
+    InitRoots = true;
+  }
+
+When set, LLVM will automatically initialize each root to ``null`` upon entry to
+the function.  This prevents the GC's sweep phase from visiting uninitialized
+pointers, which will almost certainly cause it to crash.  This initialization
+occurs before custom lowering, so the two may be used together.
+
+Since LLVM does not yet compute liveness information, there is no means of
+distinguishing an uninitialized stack root from an initialized one.  Therefore,
+this feature should be used by all GC plugins.  It is enabled by default.
+
+Custom lowering of intrinsics: ``CustomRoots``, ``CustomReadBarriers``, and ``CustomWriteBarriers``
+---------------------------------------------------------------------------------------------------
+
+For GCs which use barriers or unusual treatment of stack roots, these flags
+allow the collector to perform arbitrary transformations of the LLVM IR:
+
+.. code-block:: c++
+
+  class MyGC : public GCStrategy {
+  public:
+    MyGC() {
+      CustomRoots = true;
+      CustomReadBarriers = true;
+      CustomWriteBarriers = true;
+    }
+
+    virtual bool initializeCustomLowering(Module &M);
+    virtual bool performCustomLowering(Function &F);
+  };
+
+If any of these flags are set, then LLVM suppresses its default lowering for the
+corresponding intrinsics and instead calls ``performCustomLowering``.
+
+LLVM's default action for each intrinsic is as follows:
+
+* ``llvm.gcroot``: Leave it alone.  The code generator must see it or the stack
+  map will not be computed.
+
+* ``llvm.gcread``: Substitute a ``load`` instruction.
+
+* ``llvm.gcwrite``: Substitute a ``store`` instruction.
+
+If ``CustomReadBarriers`` or ``CustomWriteBarriers`` are specified, then
+``performCustomLowering`` **must** eliminate the corresponding barriers.
+
+``performCustomLowering`` must comply with the same restrictions as
+:ref:`FunctionPass::runOnFunction <writing-an-llvm-pass-runOnFunction>`
+Likewise, ``initializeCustomLowering`` has the same semantics as
+:ref:`Pass::doInitialization(Module&)
+<writing-an-llvm-pass-doInitialization-mod>`
+
+The following can be used as a template:
+
+.. code-block:: c++
+
+  #include "llvm/Module.h"
+  #include "llvm/IntrinsicInst.h"
+
+  bool MyGC::initializeCustomLowering(Module &M) {
+    return false;
+  }
+
+  bool MyGC::performCustomLowering(Function &F) {
+    bool MadeChange = false;
+
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; )
+        if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+          if (Function *F = CI->getCalledFunction())
+            switch (F->getIntrinsicID()) {
+            case Intrinsic::gcwrite:
+              // Handle llvm.gcwrite.
+              CI->eraseFromParent();
+              MadeChange = true;
+              break;
+            case Intrinsic::gcread:
+              // Handle llvm.gcread.
+              CI->eraseFromParent();
+              MadeChange = true;
+              break;
+            case Intrinsic::gcroot:
+              // Handle llvm.gcroot.
+              CI->eraseFromParent();
+              MadeChange = true;
+              break;
+            }
+
+    return MadeChange;
+  }
+
+.. _safe-points:
+
+Generating safe points: ``NeededSafePoints``
+--------------------------------------------
+
+LLVM can compute four kinds of safe points:
+
+.. code-block:: c++
+
+  namespace GC {
+    /// PointKind - The type of a collector-safe point.
+    ///
+    enum PointKind {
+      Loop,    //< Instr is a loop (backwards branch).
+      Return,  //< Instr is a return instruction.
+      PreCall, //< Instr is a call instruction.
+      PostCall //< Instr is the return address of a call.
+    };
+  }
+
+A collector can request any combination of the four by setting the
+``NeededSafePoints`` mask:
+
+.. code-block:: c++
+
+  MyGC::MyGC()  {
+    NeededSafePoints = 1 << GC::Loop
+                     | 1 << GC::Return
+                     | 1 << GC::PreCall
+                     | 1 << GC::PostCall;
+  }
+
+It can then use the following routines to access safe points.
+
+.. code-block:: c++
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    GCFunctionInfo *MD = *I;
+    size_t PointCount = MD->size();
+
+    for (GCFunctionInfo::iterator PI = MD->begin(),
+                                  PE = MD->end(); PI != PE; ++PI) {
+      GC::PointKind PointKind = PI->Kind;
+      unsigned PointNum = PI->Num;
+    }
+  }
+
+Almost every collector requires ``PostCall`` safe points, since these correspond
+to the moments when the function is suspended during a call to a subroutine.
+
+Threaded programs generally require ``Loop`` safe points to guarantee that the
+application will reach a safe point within a bounded amount of time, even if it
+is executing a long-running loop which contains no function calls.
+
+Threaded collectors may also require ``Return`` and ``PreCall`` safe points to
+implement "stop the world" techniques using self-modifying code, where it is
+important that the program not exit the function without reaching a safe point
+(because only the topmost function has been patched).
+
+.. _assembly:
+
+Emitting assembly code: ``GCMetadataPrinter``
+---------------------------------------------
+
+LLVM allows a plugin to print arbitrary assembly code before and after the rest
+of a module's assembly code.  At the end of the module, the GC can compile the
+LLVM stack map into assembly code. (At the beginning, this information is not
+yet computed.)
+
+Since AsmWriter and CodeGen are separate components of LLVM, a separate abstract
+base class and registry is provided for printing assembly code, the
+``GCMetadaPrinter`` and ``GCMetadataPrinterRegistry``.  The AsmWriter will look
+for such a subclass if the ``GCStrategy`` sets ``UsesMetadata``:
+
+.. code-block:: c++
+
+  MyGC::MyGC() {
+    UsesMetadata = true;
+  }
+
+This separation allows JIT-only clients to be smaller.
+
+Note that LLVM does not currently have analogous APIs to support code generation
+in the JIT, nor using the object writers.
+
+.. code-block:: c++
+
+  // lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
+
+  #include "llvm/CodeGen/GCMetadataPrinter.h"
+  #include "llvm/Support/Compiler.h"
+
+  using namespace llvm;
+
+  namespace {
+    class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
+    public:
+      virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP,
+                                 const TargetAsmInfo &TAI);
+
+      virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP,
+                                  const TargetAsmInfo &TAI);
+    };
+
+    GCMetadataPrinterRegistry::Add<MyGCPrinter>
+    X("mygc", "My bespoke garbage collector.");
+  }
+
+The collector should use ``AsmPrinter`` and ``TargetAsmInfo`` to print portable
+assembly code to the ``std::ostream``.  The collector itself contains the stack
+map for the entire module, and may access the ``GCFunctionInfo`` using its own
+``begin()`` and ``end()`` methods.  Here's a realistic example:
+
+.. code-block:: c++
+
+  #include "llvm/CodeGen/AsmPrinter.h"
+  #include "llvm/Function.h"
+  #include "llvm/Target/TargetMachine.h"
+  #include "llvm/DataLayout.h"
+  #include "llvm/Target/TargetAsmInfo.h"
+
+  void MyGCPrinter::beginAssembly(std::ostream &OS, AsmPrinter &AP,
+                                  const TargetAsmInfo &TAI) {
+    // Nothing to do.
+  }
+
+  void MyGCPrinter::finishAssembly(std::ostream &OS, AsmPrinter &AP,
+                                   const TargetAsmInfo &TAI) {
+    // Set up for emitting addresses.
+    const char *AddressDirective;
+    int AddressAlignLog;
+    if (AP.TM.getDataLayout()->getPointerSize() == sizeof(int32_t)) {
+      AddressDirective = TAI.getData32bitsDirective();
+      AddressAlignLog = 2;
+    } else {
+      AddressDirective = TAI.getData64bitsDirective();
+      AddressAlignLog = 3;
+    }
+
+    // Put this in the data section.
+    AP.SwitchToDataSection(TAI.getDataSection());
+
+    // For each function...
+    for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+      GCFunctionInfo &MD = **FI;
+
+      // Emit this data structure:
+      //
+      // struct {
+      //   int32_t PointCount;
+      //   struct {
+      //     void *SafePointAddress;
+      //     int32_t LiveCount;
+      //     int32_t LiveOffsets[LiveCount];
+      //   } Points[PointCount];
+      // } __gcmap_<FUNCTIONNAME>;
+
+      // Align to address width.
+      AP.EmitAlignment(AddressAlignLog);
+
+      // Emit the symbol by which the stack map entry can be found.
+      std::string Symbol;
+      Symbol += TAI.getGlobalPrefix();
+      Symbol += "__gcmap_";
+      Symbol += MD.getFunction().getName();
+      if (const char *GlobalDirective = TAI.getGlobalDirective())
+        OS << GlobalDirective << Symbol << "\n";
+      OS << TAI.getGlobalPrefix() << Symbol << ":\n";
+
+      // Emit PointCount.
+      AP.EmitInt32(MD.size());
+      AP.EOL("safe point count");
+
+      // And each safe point...
+      for (GCFunctionInfo::iterator PI = MD.begin(),
+                                       PE = MD.end(); PI != PE; ++PI) {
+        // Align to address width.
+        AP.EmitAlignment(AddressAlignLog);
+
+        // Emit the address of the safe point.
+        OS << AddressDirective
+           << TAI.getPrivateGlobalPrefix() << "label" << PI->Num;
+        AP.EOL("safe point address");
+
+        // Emit the stack frame size.
+        AP.EmitInt32(MD.getFrameSize());
+        AP.EOL("stack frame size");
+
+        // Emit the number of live roots in the function.
+        AP.EmitInt32(MD.live_size(PI));
+        AP.EOL("live root count");
+
+        // And for each live root...
+        for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+                                           LE = MD.live_end(PI);
+                                           LI != LE; ++LI) {
+          // Print its offset within the stack frame.
+          AP.EmitInt32(LI->StackOffset);
+          AP.EOL("stack offset");
+        }
+      }
+    }
+  }
+
+References
+==========
+
+.. _appel89:
+
+[Appel89] Runtime Tags Aren't Necessary. Andrew W. Appel. Lisp and Symbolic
+Computation 19(7):703-705, July 1989.
+
+.. _goldberg91:
+
+[Goldberg91] Tag-free garbage collection for strongly typed programming
+languages. Benjamin Goldberg. ACM SIGPLAN PLDI'91.
+
+.. _tolmach94:
+
+[Tolmach94] Tag-free garbage collection using explicit type parameters. Andrew
+Tolmach. Proceedings of the 1994 ACM conference on LISP and functional
+programming.
+
+.. _henderson02:
+
+[Henderson2002] `Accurate Garbage Collection in an Uncooperative Environment
+<http://citeseer.ist.psu.edu/henderson02accurate.html>`__
+
diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst
index f6f904b2e35e..306a2a87effd 100644
--- a/docs/GetElementPtr.rst
+++ b/docs/GetElementPtr.rst
@@ -1,5 +1,3 @@
-.. _gep:
-
 =======================================
 The Often Misunderstood GEP Instruction
 =======================================
@@ -22,7 +20,7 @@ Address Computation
 When people are first confronted with the GEP instruction, they tend to relate
 it to known concepts from other programming paradigms, most notably C array
 indexing and field selection. GEP closely resembles C array indexing and field
-selection, however it's is a little different and this leads to the following
+selection, however it is a little different and this leads to the following
 questions.
 
 What is the first index of the GEP instruction?
@@ -190,7 +188,7 @@ In this example, we have a global variable, ``%MyVar`` that is a pointer to a
 structure containing a pointer to an array of 40 ints. The GEP instruction seems
 to be accessing the 18th integer of the structure's array of ints. However, this
 is actually an illegal GEP instruction. It won't compile. The reason is that the
-pointer in the structure <i>must</i> be dereferenced in order to index into the
+pointer in the structure *must* be dereferenced in order to index into the
 array of 40 ints. Since the GEP instruction never accesses memory, it is
 illegal.
 
@@ -416,7 +414,7 @@ arithmetic, and inttoptr sequences.
 Can I compute the distance between two objects, and add that value to one address to compute the other address?
 ---------------------------------------------------------------------------------------------------------------
 
-As with arithmetic on null, You can use GEP to compute an address that way, but
+As with arithmetic on null, you can use GEP to compute an address that way, but
 you can't use that pointer to actually access the object if you do, unless the
 object is managed outside of LLVM.
 
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index 68768921f6ae..0bbbafc6e690 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -1,9 +1,10 @@
-.. _getting_started:
-
 ====================================
 Getting Started with the LLVM System  
 ====================================
 
+.. contents::
+   :local:
+
 Overview
 ========
 
@@ -68,33 +69,30 @@ Here's the short story for getting up and running quickly with LLVM:
    * ``../llvm/configure [options]``
      Some common options:
 
-     * ``--prefix=directory`` ---
-
-       Specify for *directory* the full pathname of where you want the LLVM
-       tools and libraries to be installed (default ``/usr/local``).
-
-     * ``--enable-optimized`` ---
+     * ``--prefix=directory`` --- Specify for *directory* the full pathname of
+       where you want the LLVM tools and libraries to be installed (default
+       ``/usr/local``).
 
-       Compile with optimizations enabled (default is NO).
+     * ``--enable-optimized`` --- Compile with optimizations enabled (default
+       is NO).
 
-     * ``--enable-assertions`` ---
-
-       Compile with assertion checks enabled (default is YES).
+     * ``--enable-assertions`` --- Compile with assertion checks enabled
+       (default is YES).
 
    * ``make [-j]`` --- The ``-j`` specifies the number of jobs (commands) to run
      simultaneously.  This builds both LLVM and Clang for Debug+Asserts mode.
-     The --enabled-optimized configure option is used to specify a Release
+     The ``--enabled-optimized`` configure option is used to specify a Release
      build.
 
    * ``make check-all`` --- This run the regression tests to ensure everything
      is in working order.
-  
+
    * ``make update`` --- This command is used to update all the svn repositories
      at once, rather then having to ``cd`` into the individual repositories and
      running ``svn update``.
 
    * It is also possible to use CMake instead of the makefiles. With CMake it is
-     also possible to generate project files for several IDEs: Eclipse CDT4,
+     possible to generate project files for several IDEs: Xcode, Eclipse CDT4,
      CodeBlocks, Qt-Creator (use the CodeBlocks generator), KDevelop3.
 
    * If you get an "internal compiler error (ICE)" or test failures, see
@@ -126,6 +124,8 @@ LLVM is known to work on the following platforms:
 +-----------------+----------------------+-------------------------+
 |Linux            | amd64                | GCC                     |
 +-----------------+----------------------+-------------------------+
+|Linux            | ARM\ :sup:`13`       | GCC                     |
++-----------------+----------------------+-------------------------+
 |Solaris          | V9 (Ultrasparc)      | GCC                     |
 +-----------------+----------------------+-------------------------+
 |FreeBSD          | x86\ :sup:`1`        | GCC                     |
@@ -161,8 +161,6 @@ LLVM has partial support for the following platforms:
 
 .. note::
 
-  Code generation supported for Pentium processors and up
-
   #. Code generation supported for Pentium processors and up
   #. Code generation supported for 32-bit ABI only
   #. No native code generation
@@ -182,9 +180,9 @@ LLVM has partial support for the following platforms:
      Windows-specifics that will cause the build to fail.
   #. To use LLVM modules on Win32-based system, you may configure LLVM
      with ``--enable-shared``.
-
   #. To compile SPU backend, you need to add ``LDFLAGS=-Wl,--stack,16777216`` to
      configure.
+  #. MCJIT not working well pre-v7, old JIT engine not supported any more.
 
 Note that you will need about 1-3 GB of space for a full LLVM build in Debug
 mode, depending on the system (it is so large because of all the debugging
@@ -219,11 +217,7 @@ uses the package and provides other details.
 +--------------------------------------------------------------+-----------------+---------------------------------------------+
 | `SVN <http://subversion.tigris.org/project_packages.html>`_  | >=1.3           | Subversion access to LLVM\ :sup:`2`         |
 +--------------------------------------------------------------+-----------------+---------------------------------------------+
-| `DejaGnu <http://savannah.gnu.org/projects/dejagnu>`_        | 1.4.2           | Automated test suite\ :sup:`3`              |
-+--------------------------------------------------------------+-----------------+---------------------------------------------+
-| `tcl <http://www.tcl.tk/software/tcltk/>`_                   | 8.3, 8.4        | Automated test suite\ :sup:`3`              |
-+--------------------------------------------------------------+-----------------+---------------------------------------------+
-| `expect <http://expect.nist.gov/>`_                          | 5.38.0          | Automated test suite\ :sup:`3`              |
+| `python <http://www.python.org/>`_                           | >=2.4           | Automated test suite\ :sup:`3`              |
 +--------------------------------------------------------------+-----------------+---------------------------------------------+
 | `perl <http://www.perl.com/download.csp>`_                   | >=5.6.0         | Utilities                                   |
 +--------------------------------------------------------------+-----------------+---------------------------------------------+
@@ -368,6 +362,9 @@ optimizations are turned on. The symptom is an infinite loop in
 ``-O0``. A test failure in ``test/Assembler/alignstack.ll`` is one symptom of
 the problem.
 
+**GCC 4.6.3 on ARM**: Miscompiles ``llvm-readobj`` at ``-O3``. A test failure
+in ``test/Object/readobj-shared-object.test`` is one symptom of the problem.
+
 **GNU ld 2.16.X**. Some 2.16.X versions of the ld linker will produce very long
 warning messages complaining that some "``.gnu.linkonce.t.*``" symbol was
 defined in a discarded section. You can safely ignore these messages as they are
@@ -384,6 +381,14 @@ intermittent failures when building LLVM with position independent code.  The
 symptom is an error about cyclic dependencies.  We recommend upgrading to a
 newer version of Gold.
 
+**Clang 3.0 with libstdc++ 4.7.x**: a few Linux distributions (Ubuntu 12.10,
+Fedora 17) have both Clang 3.0 and libstdc++ 4.7 in their repositories.  Clang
+3.0 does not implement a few builtins that are used in this library.  We
+recommend using the system GCC to compile LLVM and Clang in this case.
+
+**Clang 3.0 on Mageia 2**.  There's a packaging issue: Clang can not find at
+least some (``cxxabi.h``) libstdc++ headers.
+
 .. _Getting Started with LLVM:
 
 Getting Started with LLVM
@@ -459,6 +464,8 @@ The files are as follows, with *x.y* marking the version number:
 
   Binary release of the llvm-gcc-4.2 front end for a specific platform.
 
+.. _checkout:
+
 Checkout LLVM from Subversion
 -----------------------------
 
@@ -505,7 +512,7 @@ directory:
 If you would like to get the LLVM test suite (a separate package as of 1.4), you
 get it from the Subversion repository:
 
-.. code-block:: bash
+.. code-block:: console
 
   % cd llvm/projects
   % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
@@ -514,33 +521,46 @@ By placing it in the ``llvm/projects``, it will be automatically configured by
 the LLVM configure script as well as automatically updated when you run ``svn
 update``.
 
-GIT mirror
+Git Mirror
 ----------
 
-GIT mirrors are available for a number of LLVM subprojects. These mirrors sync
+Git mirrors are available for a number of LLVM subprojects. These mirrors sync
 automatically with each Subversion commit and contain all necessary git-svn
 marks (so, you can recreate git-svn metadata locally). Note that right now
-mirrors reflect only ``trunk`` for each project. You can do the read-only GIT
+mirrors reflect only ``trunk`` for each project. You can do the read-only Git
 clone of LLVM via:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git clone http://llvm.org/git/llvm.git
 
 If you want to check out clang too, run:
 
-.. code-block:: bash
+.. code-block:: console
 
-  % git clone http://llvm.org/git/llvm.git
   % cd llvm/tools
   % git clone http://llvm.org/git/clang.git
 
+If you want to check out compiler-rt too, run:
+
+.. code-block:: console
+
+  % cd llvm/projects
+  % git clone http://llvm.org/git/compiler-rt.git
+
+If you want to check out the Test Suite Source Code (optional), run:
+
+.. code-block:: console
+
+  % cd llvm/projects
+  % git clone http://llvm.org/git/test-suite.git
+
 Since the upstream repository is in Subversion, you should use ``git
 pull --rebase`` instead of ``git pull`` to avoid generating a non-linear history
 in your clone.  To configure ``git pull`` to pass ``--rebase`` by default on the
 master branch, run the following command:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git config branch.master.rebase true
 
@@ -553,13 +573,13 @@ Assume ``master`` points the upstream and ``mybranch`` points your working
 branch, and ``mybranch`` is rebased onto ``master``.  At first you may check
 sanity of whitespaces:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git diff --check master..mybranch
 
 The easiest way to generate a patch is as below:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git diff master..mybranch > /path/to/mybranch.diff
 
@@ -570,20 +590,20 @@ could be accepted with ``patch -p1 -N``.
 But you may generate patchset with git-format-patch. It generates by-each-commit
 patchset. To generate patch files to attach to your article:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git format-patch --no-attach master..mybranch -o /path/to/your/patchset
 
 If you would like to send patches directly, you may use git-send-email or
 git-imap-send. Here is an example to generate the patchset in Gmail's [Drafts].
 
-.. code-block:: bash
+.. code-block:: console
 
   % git format-patch --attach master..mybranch --stdout | git imap-send
 
 Then, your .git/config should have [imap] sections.
 
-.. code-block:: bash
+.. code-block:: ini
 
   [imap]
         host = imaps://imap.gmail.com
@@ -603,7 +623,7 @@ For developers to work with git-svn
 
 To set up clone from which you can submit code using ``git-svn``, run:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git clone http://llvm.org/git/llvm.git
   % cd llvm
@@ -619,10 +639,12 @@ To set up clone from which you can submit code using ``git-svn``, run:
   % git config svn-remote.svn.fetch :refs/remotes/origin/master
   % git svn rebase -l
 
+Likewise for compiler-rt and test-suite.
+
 To update this clone without generating git-svn tags that conflict with the
-upstream git repo, run:
+upstream Git repo, run:
 
-.. code-block:: bash
+.. code-block:: console
 
   % git fetch && (cd tools/clang && git fetch)  # Get matching revisions of both trees.
   % git checkout master
@@ -631,20 +653,65 @@ upstream git repo, run:
      git checkout master &&
      git svn rebase -l)
 
+Likewise for compiler-rt and test-suite.
+
 This leaves your working directories on their master branches, so you'll need to
 ``checkout`` each working branch individually and ``rebase`` it on top of its
-parent branch.  (Note: This script is intended for relative newbies to git.  If
-you have more experience, you can likely improve on it.)
+parent branch.
+
+For those who wish to be able to update an llvm repo in a simpler fashion,
+consider placing the following Git script in your path under the name
+``git-svnup``:
+
+.. code-block:: bash
+
+  #!/bin/bash
+
+  STATUS=$(git status -s | grep -v "??")
+
+  if [ ! -z "$STATUS" ]; then
+      STASH="yes"
+      git stash >/dev/null
+  fi
+
+  git fetch
+  OLD_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+  git checkout master 2> /dev/null
+  git svn rebase -l
+  git checkout $OLD_BRANCH 2> /dev/null
+
+  if [ ! -z $STASH ]; then
+      git stash pop >/dev/null
+  fi
+
+Then to perform the aforementioned update steps go into your source directory
+and just type ``git-svnup`` or ``git svnup`` and everything will just work.
+
+To commit back changes via git-svn, use ``dcommit``:
+
+.. code-block:: console
+
+  % git svn dcommit
+
+Note that git-svn will create one SVN commit for each Git commit you have pending,
+so squash and edit each commit before executing ``dcommit`` to make sure they all
+conform to the coding standards and the developers' policy.
+
+On success, ``dcommit`` will rebase against the HEAD of SVN, so to avoid conflict,
+please make sure your current branch is up-to-date (via fetch/rebase) before
+proceeding.
 
 The git-svn metadata can get out of sync after you mess around with branches and
 ``dcommit``. When that happens, ``git svn dcommit`` stops working, complaining
 about files with uncommitted changes. The fix is to rebuild the metadata:
 
-.. code-block:: bash
+.. code-block:: console
 
   % rm -rf .git/svn
   % git svn rebase -l
 
+Please, refer to the Git-SVN manual (``man git-svn``) for more information.
+
 Local LLVM Configuration
 ------------------------
 
@@ -661,14 +728,15 @@ configure the build system:
 | Variable   | Purpose                                                   |
 +============+===========================================================+
 | CC         | Tells ``configure`` which C compiler to use.  By default, |
-|            | ``configure`` will look for the first GCC C compiler in   |
-|            | ``PATH``.  Use this variable to override ``configure``\'s |
-|            | default behavior.                                         |
+|            | ``configure`` will check ``PATH`` for ``clang`` and GCC C |
+|            | compilers (in this order).  Use this variable to override |
+|            | ``configure``\'s  default behavior.                       |
 +------------+-----------------------------------------------------------+
 | CXX        | Tells ``configure`` which C++ compiler to use.  By        |
-|            | default, ``configure`` will look for the first GCC C++    |
-|            | compiler in ``PATH``.  Use this variable to override      |
-|            | ``configure``'s default behavior.                         |
+|            | default, ``configure`` will check ``PATH`` for            |
+|            | ``clang++`` and GCC C++ compilers (in this order).  Use   |
+|            | this variable to override  ``configure``'s default        |
+|            | behavior.                                                 |
 +------------+-----------------------------------------------------------+
 
 The following options can be used to set or enable LLVM specific options:
@@ -722,13 +790,13 @@ To configure LLVM, follow these steps:
 
 #. Change directory into the object root directory:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % cd OBJ_ROOT
 
 #. Run the ``configure`` script located in the LLVM source tree:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % SRC_ROOT/configure --prefix=/install/path [other options]
 
@@ -764,7 +832,7 @@ Profile Builds
 Once you have LLVM configured, you can build it by entering the *OBJ_ROOT*
 directory and issuing the following command:
 
-.. code-block:: bash
+.. code-block:: console
 
   % gmake
 
@@ -775,7 +843,7 @@ If you have multiple processors in your machine, you may wish to use some of the
 parallel build options provided by GNU Make.  For example, you could use the
 command:
 
-.. code-block:: bash
+.. code-block:: console
 
   % gmake -j2
 
@@ -842,12 +910,39 @@ any subdirectories that it contains.  Entering any directory inside the LLVM
 object tree and typing ``gmake`` should rebuild anything in or below that
 directory that is out of date.
 
+This does not apply to building the documentation.
+LLVM's (non-Doxygen) documentation is produced with the
+`Sphinx <http://sphinx-doc.org/>`_ documentation generation system.
+There are some HTML documents that have not yet been converted to the new
+system (which uses the easy-to-read and easy-to-write
+`reStructuredText <http://sphinx-doc.org/rest.html>`_ plaintext markup
+language).
+The generated documentation is built in the ``SRC_ROOT/docs`` directory using
+a special makefile.
+For instructions on how to install Sphinx, see
+`Sphinx Introduction for LLVM Developers
+<http://lld.llvm.org/sphinx_intro.html>`_.
+After following the instructions there for installing Sphinx, build the LLVM
+HTML documentation by doing the following:
+
+.. code-block:: console
+
+  $ cd SRC_ROOT/docs
+  $ make -f Makefile.sphinx
+
+This creates a ``_build/html`` sub-directory with all of the HTML files, not
+just the generated ones.
+This directory corresponds to ``llvm.org/docs``.
+For example, ``_build/html/SphinxQuickstartTemplate.html`` corresponds to
+``llvm.org/docs/SphinxQuickstartTemplate.html``.
+The :doc:`SphinxQuickstartTemplate` is useful when creating a new document.
+
 Cross-Compiling LLVM
 --------------------
 
 It is possible to cross-compile LLVM itself. That is, you can create LLVM
 executables and libraries to be hosted on a platform different from the platform
-where they are build (a Canadian Cross build). To configure a cross-compile,
+where they are built (a Canadian Cross build). To configure a cross-compile,
 supply the configure script with ``--build`` and ``--host`` options that are
 different. The values of these options must be legal target triples that your
 GCC compiler supports.
@@ -866,13 +961,13 @@ This is accomplished in the typical autoconf manner:
 
 * Change directory to where the LLVM object files should live:
 
-  .. code-block:: bash
+  .. code-block:: console
 
     % cd OBJ_ROOT
 
 * Run the ``configure`` script found in the LLVM source directory:
 
-  .. code-block:: bash
+  .. code-block:: console
 
     % SRC_ROOT/configure
 
@@ -913,12 +1008,12 @@ Optional Configuration Items
 ----------------------------
 
 If you're running on a Linux system that supports the `binfmt_misc
-<http://www.tat.physik.uni-tuebingen.de/~rguenth/linux/binfmt_misc.html>`_
+<http://en.wikipedia.org/wiki/binfmt_misc>`_
 module, and you have root access on the system, you can set your system up to
 execute LLVM bitcode files directly. To do this, use commands like this (the
 first command may not be required if you are already using the module):
 
-.. code-block:: bash
+.. code-block:: console
 
   % mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
   % echo ':llvm:M::BC::/path/to/lli:' > /proc/sys/fs/binfmt_misc/register
@@ -928,7 +1023,7 @@ first command may not be required if you are already using the module):
 This allows you to execute LLVM bitcode files directly.  On Debian, you can also
 use this command instead of the 'echo' command above:
 
-.. code-block:: bash
+.. code-block:: console
 
   % sudo update-binfmts --install llvm /path/to/lli --magic 'BC'
 
@@ -1073,8 +1168,8 @@ module that must be checked out (usually to ``projects/test-suite``).  This
 module contains a comprehensive correctness, performance, and benchmarking test
 suite for LLVM. It is a separate Subversion module because not every LLVM user
 is interested in downloading or building such a comprehensive test suite. For
-further details on this test suite, please see the `Testing
-Guide <TestingGuide.html>`_ document.
+further details on this test suite, please see the :doc:`Testing Guide
+<TestingGuide>` document.
 
 .. _tools:
 
@@ -1219,7 +1314,7 @@ Example with clang
 
 #. Next, compile the C file into a native executable:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % clang hello.c -o hello
 
@@ -1230,7 +1325,7 @@ Example with clang
 
 #. Next, compile the C file into a LLVM bitcode file:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % clang -O3 -emit-llvm hello.c -c -o hello.bc
 
@@ -1240,42 +1335,42 @@ Example with clang
 
 #. Run the program in both forms. To run the program, use:
 
-   .. code-block:: bash
+   .. code-block:: console
 
       % ./hello
  
    and
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % lli hello.bc
 
-   The second examples shows how to invoke the LLVM JIT, `lli
-   <CommandGuide/html/lli.html>`_.
+   The second examples shows how to invoke the LLVM JIT, :doc:`lli
+   <CommandGuide/lli>`.
 
 #. Use the ``llvm-dis`` utility to take a look at the LLVM assembly code:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % llvm-dis < hello.bc | less
 
 #. Compile the program to native assembly using the LLC code generator:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % llc hello.bc -o hello.s
 
 #. Assemble the native assembly language file into a program:
 
-   .. code-block:: bash
+   .. code-block:: console
 
-     **Solaris:** % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native
+     % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native   # On Solaris
 
-     **Others:**  % gcc hello.s -o hello.native
+     % gcc hello.s -o hello.native                              # On others
 
 #. Execute the native code program:
 
-   .. code-block:: bash
+   .. code-block:: console
 
      % ./hello.native
 
diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst
index 35f97f04b9d5..4c80f2c57bfa 100644
--- a/docs/GettingStartedVS.rst
+++ b/docs/GettingStartedVS.rst
@@ -1,5 +1,3 @@
-.. _winvs:
-
 ==================================================================
 Getting Started with the LLVM System using Microsoft Visual Studio
 ==================================================================
diff --git a/docs/GoldPlugin.rst b/docs/GoldPlugin.rst
index 300aea9f9a49..17bbeb8ba9f8 100644
--- a/docs/GoldPlugin.rst
+++ b/docs/GoldPlugin.rst
@@ -1,11 +1,7 @@
-.. _gold-plugin:
-
 ====================
 The LLVM gold plugin
 ====================
 
-.. sectionauthor:: Nick Lewycky
-
 Introduction
 ============
 
diff --git a/docs/HowToAddABuilder.rst b/docs/HowToAddABuilder.rst
index b0cd2907f975..893f12d19d55 100644
--- a/docs/HowToAddABuilder.rst
+++ b/docs/HowToAddABuilder.rst
@@ -1,11 +1,7 @@
-.. _how_to_add_a_builder:
-
 ===================================================================
 How To Add Your Build Configuration To LLVM Buildbot Infrastructure
 ===================================================================
 
-.. sectionauthor:: Galina Kistanova <gkistanova@gmail.com>
-
 Introduction
 ============
 
diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst
index d786a7dedaf4..32ae39ba6883 100644
--- a/docs/HowToBuildOnARM.rst
+++ b/docs/HowToBuildOnARM.rst
@@ -1,11 +1,7 @@
-.. _how_to_build_on_arm:
-
 ===================================================================
 How To Build On ARM
 ===================================================================
 
-.. sectionauthor:: Wei-Ren Chen (陳韋任) <chenwj@iis.sinica.edu.tw>
-
 Introduction
 ============
 
@@ -40,8 +36,8 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.
 
    .. code-block:: bash
 
-     ./configure --build=armv7l-unknown-linux-gnueabihf
-     --host=armv7l-unknown-linux-gnueabihf
-     --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9
-     --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon
-     --enable-targets=arm --disable-optimized --enable-assertions
+     ./configure --build=armv7l-unknown-linux-gnueabihf \
+     --host=armv7l-unknown-linux-gnueabihf \
+     --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9 \
+     --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon \
+     --enable-targets=arm --enable-optimized --enable-assertions
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
deleted file mode 100644
index 6fdec2cfee79..000000000000
--- a/docs/HowToReleaseLLVM.html
+++ /dev/null
@@ -1,581 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>How To Release LLVM To The Public</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>How To Release LLVM To The Public</h1>
-<ol>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#criteria">Qualification Criteria</a></li>
-  <li><a href="#introduction">Release Timeline</a></li>
-  <li><a href="#process">Release Process</a></li>
-</ol>
-<div class="doc_author">
-  <p>Written by <a href="mailto:tonic@nondot.org">Tanya Lattner</a>,
-  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
-  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>, &amp;
-  <a href="mailto:wendling@apple.com">Bill Wendling</a>
-  </p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document contains information about successfully releasing LLVM &mdash;
-   including subprojects: e.g., <tt>clang</tt> and <tt>dragonegg</tt> &mdash; to
-   the public. It is the Release Manager's responsibility to ensure that a high
-   quality build of LLVM is released.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="process">Release Timeline</a></h2>
-<!-- *********************************************************************** -->
-<div>
-
-<p>LLVM is released on a time based schedule &mdash; roughly every 6 months. We
-   do not normally have dot releases because of the nature of LLVM's incremental
-   development philosophy. That said, the only thing preventing dot releases for
-   critical bug fixes from happening is a lack of resources &mdash; testers,
-   machines, time, etc. And, because of the high quality we desire for LLVM
-   releases, we cannot allow for a truncated form of release qualification.</p>
-
-<p>The release process is roughly as follows:</p>
-
-<ul>
-  <li><p>Set code freeze and branch creation date for 6 months after last code
-      freeze date. Announce release schedule to the LLVM community and update
-      the website.</p></li>
-
-  <li><p>Create release branch and begin release process.</p></li>
-
-  <li><p>Send out release candidate sources for first round of testing. Testing
-      lasts 7-10 days. During the first round of testing, any regressions found
-      should be fixed. Patches are merged from mainline into the release
-      branch. Also, all features need to be completed during this time. Any
-      features not completed at the end of the first round of testing will be
-      removed or disabled for the release.</p></li>
-
-  <li><p>Generate and send out the second release candidate sources. Only
-      <em>critial</em> bugs found during this testing phase will be fixed. Any
-      bugs introduced by merged patches will be fixed. If so a third round of
-      testing is needed.</p></li>
-
-  <li><p>The release notes are updated.</p></li>
-
-  <li><p>Finally, release!</p></li>
-</ul>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="process">Release Process</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<ol>
-  <li><a href="#release-admin">Release Administrative Tasks</a>
-  <ol>
-    <li><a href="#branch">Create Release Branch</a></li>
-    <li><a href="#verchanges">Update Version Numbers</a></li>
-  </ol>
-  </li>
-  <li><a href="#release-build">Building the Release</a>
-  <ol>
-    <li><a href="#dist">Build the LLVM Source Distributions</a></li>
-    <li><a href="#build">Build LLVM</a></li>
-    <li><a href="#clangbin">Build the Clang Binary Distribution</a></li>
-    <li><a href="#target-build">Target Specific Build Details</a></li>
-  </ol>
-  </li>
-  <li><a href="#release-qualify">Release Qualification Criteria</a>
-  <ol>
-    <li><a href="#llvm-qualify">Qualify LLVM</a></li>
-    <li><a href="#clang-qualify">Qualify Clang</a></li>
-    <li><a href="#targets">Specific Target Qualification Details</a></li>
-  </ol>
-  </li>
-
-  <li><a href="#commTest">Community Testing</a></li>    
-  <li><a href="#release-patch">Release Patch Rules</a></li>
-  <li><a href="#release-final">Release final tasks</a>
-  <ol>
-    <li><a href="#updocs">Update Documentation</a></li>
-    <li><a href="#tag">Tag the LLVM Final Release</a></li>
-    <li><a href="#updemo">Update the LLVM Demo Page</a></li>
-    <li><a href="#webupdates">Update the LLVM Website</a></li>
-    <li><a href="#announce">Announce the Release</a></li>
-  </ol>
-  </li>
-</ol>
-
-<!-- ======================================================================= -->
-<h3><a name="release-admin">Release Administrative Tasks</a></h3>
-
-<div>
-
-<p>This section describes a few administrative tasks that need to be done for
-   the release process to begin. Specifically, it involves:</p>
-
-<ul>
-  <li>Creating the release branch,</li>
-  <li>Setting version numbers, and</li>
-  <li>Tagging release candidates for the release team to begin testing</li>
-</ul>
-
-<!-- ======================================================================= -->
-<h4><a name="branch">Create Release Branch</a></h4>
-
-<div>
-
-<p>Branch the Subversion trunk using the following procedure:</p>
-
-<ol>
-  <li><p>Remind developers that the release branching is imminent and to refrain
-      from committing patches that might break the build. E.g., new features,
-      large patches for works in progress, an overhaul of the type system, an
-      exciting new TableGen feature, etc.</p></li>
-
-  <li><p>Verify that the current Subversion trunk is in decent shape by
-      examining nightly tester and buildbot results.</p></li>
-
-  <li><p>Create the release branch for <tt>llvm</tt>, <tt>clang</tt>,
-      the <tt>test-suite</tt>, and <tt>dragonegg</tt> from the last known good
-      revision. The branch's name is <tt>release_<i>XY</i></tt>,
-      where <tt>X</tt> is the major and <tt>Y</tt> the minor release
-      numbers. The branches should be created using the following commands:</p>
-  
-<div class="doc_code">
-<pre>
-$ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
-           https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i>
-
-$ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
-           https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i>
-
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/trunk \
-           https://llvm.org/svn/llvm-project/dragonegg/branches/release_<i>XY</i>
-
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
-           https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i>
-</pre>
-</div></li>
-
-  <li><p>Advise developers that they may now check their patches into the
-      Subversion tree again.</p></li>
-
-  <li><p>The Release Manager should switch to the release branch, because all
-      changes to the release will now be done in the branch. The easiest way to
-      do this is to grab a working copy using the following commands:</p>
-
-<div class="doc_code">
-<pre>
-$ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> llvm-<i>X.Y</i>
-
-$ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> clang-<i>X.Y</i>
-
-$ svn co https://llvm.org/svn/llvm-project/dragonegg/branches/release_<i>XY</i> dragonegg-<i>X.Y</i>
-
-$ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> test-suite-<i>X.Y</i>
-</pre>
-</div></li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="verchanges">Update LLVM Version</a></h4>
-
-<div>
-
-<p>After creating the LLVM release branch, update the release branches'
-   <tt>autoconf</tt> and <tt>configure.ac</tt> versions from '<tt>X.Ysvn</tt>'
-   to '<tt>X.Y</tt>'. Update it on mainline as well to be the next version
-   ('<tt>X.Y+1svn</tt>'). Regenerate the configure scripts for both
-   <tt>llvm</tt> and the <tt>test-suite</tt>.</p>
-
-<p>In addition, the version numbers of all the Bugzilla components must be
-   updated for the next release.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="dist">Build the LLVM Release Candidates</a></h4>
-
-<div>
-
-<p>Create release candidates for <tt>llvm</tt>, <tt>clang</tt>,
-   <tt>dragonegg</tt>, and the LLVM <tt>test-suite</tt> by tagging the branch
-   with the respective release candidate number. For instance, to
-   create <b>Release Candidate 1</b> you would issue the following commands:</p>
-
-<div class="doc_code">
-<pre>
-$ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1
-</pre>
-</div>
-
-<p>Similarly, <b>Release Candidate 2</b> would be named <tt>RC2</tt> and so
-   on. This keeps a permanent copy of the release candidate around for people to
-   export and build as they wish. The final released sources will be tagged in
-   the <tt>RELEASE_<i>XY</i></tt> directory as <tt>Final</tt>
-   (c.f. <a href="#tag">Tag the LLVM Final Release</a>).</p>
-
-<p>The Release Manager may supply pre-packaged source tarballs for users. This
-   can be done with the following commands:</p>
-
-<div class="doc_code">
-<pre>
-$ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1 llvm-<i>X.Y</i>rc1
-$ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1 clang-<i>X.Y</i>rc1
-$ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>/rc1 dragonegg-<i>X.Y</i>rc1
-$ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1 llvm-test-<i>X.Y</i>rc1
-
-$ tar -cvf - llvm-<i>X.Y</i>rc1        | gzip &gt; llvm-<i>X.Y</i>rc1.src.tar.gz
-$ tar -cvf - clang-<i>X.Y</i>rc1       | gzip &gt; clang-<i>X.Y</i>rc1.src.tar.gz
-$ tar -cvf - dragonegg-<i>X.Y</i>rc1   | gzip &gt; dragonegg-<i>X.Y</i>rc1.src.tar.gz
-$ tar -cvf - llvm-test-<i>X.Y</i>rc1   | gzip &gt; llvm-test-<i>X.Y</i>rc1.src.tar.gz
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-build">Building the Release</a></h3>
-
-<div>
-
-<p>The builds of <tt>llvm</tt>, <tt>clang</tt>, and <tt>dragonegg</tt>
-   <em>must</em> be free of errors and warnings in Debug, Release+Asserts, and
-   Release builds. If all builds are clean, then the release passes Build
-   Qualification.</p>
-
-<p>The <tt>make</tt> options for building the different modes:</p>
-
-<table>
-  <tr><th>Mode</th><th>Options</th></tr>
-  <tr align="left"><td>Debug</td><td><tt>ENABLE_OPTIMIZED=0</tt></td></tr>
-  <tr align="left"><td>Release+Asserts</td><td><tt>ENABLE_OPTIMIZED=1</tt></td></tr>
-  <tr align="left"><td>Release</td><td><tt>ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</tt></td></tr>
-</table>
-
-<!-- ======================================================================= -->
-<h4><a name="build">Build LLVM</a></h4>
-
-<div>
-
-<p>Build <tt>Debug</tt>, <tt>Release+Asserts</tt>, and <tt>Release</tt> versions
-   of <tt>llvm</tt> on all supported platforms. Directions to build
-   <tt>llvm</tt> are <a href="GettingStarted.html#quickstart">here</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="clangbin">Build Clang Binary Distribution</a></h4>
-
-<div>
-
-<p>Creating the <tt>clang</tt> binary distribution
-   (Debug/Release+Asserts/Release) requires performing the following steps for
-   each supported platform:</p>
-
-<ol>
-  <li>Build clang according to the directions
-      <a href="http://clang.llvm.org/get_started.html">here</a>.</li>
-
-  <li>Build both a Debug and Release version of clang. The binary will be the
-      Release build.</lI>
-
-  <li>Package <tt>clang</tt> (details to follow).</li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="target-build">Target Specific Build Details</a></h4>
-
-<div>
-
-<p>The table below specifies which compilers are used for each Arch/OS
-   combination when qualifying the build of <tt>llvm</tt>, <tt>clang</tt>,
-   and <tt>dragonegg</tt>.</p>
-
-<table>
-  <tr><th>Architecture</th> <th>OS</th>          <th>compiler</th></tr>
-  <tr><td>x86-32</td>       <td>Mac OS 10.5</td> <td>gcc 4.0.1</td></tr>
-  <tr><td>x86-32</td>       <td>Linux</td>       <td>gcc 4.2.X, gcc 4.3.X</td></tr>
-  <tr><td>x86-32</td>       <td>FreeBSD</td>     <td>gcc 4.2.X</td></tr>
-  <tr><td>x86-32</td>       <td>mingw</td>       <td>gcc 3.4.5</td></tr>
-  <tr><td>x86-64</td>       <td>Mac OS 10.5</td> <td>gcc 4.0.1</td></tr>
-  <tr><td>x86-64</td>       <td>Linux</td>       <td>gcc 4.2.X, gcc 4.3.X</td></tr>
-  <tr><td>x86-64</td>       <td>FreeBSD</td>     <td>gcc 4.2.X</td></tr>
-</table> 
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-qualify">Building the Release</a></h3>
-
-<div>
-
-<p>A release is qualified when it has no regressions from the previous release
-   (or baseline). Regressions are related to correctness first and performance
-   second. (We may tolerate some minor performance regressions if they are
-   deemed necessary for the general quality of the compiler.)</p>
-
-<p><b>Regressions are new failures in the set of tests that are used to qualify
-   each product and only include things on the list. Every release will have
-   some bugs in it. It is the reality of developing a complex piece of
-   software. We need a very concrete and definitive release criteria that
-   ensures we have monotonically improving quality on some metric. The metric we
-   use is described below. This doesn't mean that we don't care about other
-   criteria, but these are the criteria which we found to be most important and
-   which must be satisfied before a release can go out</b></p>
-
-<!-- ======================================================================= -->
-<h4><a name="llvm-qualify">Qualify LLVM</a></h4>
-
-<div>
-
-<p>LLVM is qualified when it has a clean test run without a front-end. And it
-   has no regressions when using either <tt>clang</tt> or <tt>dragonegg</tt>
-   with the <tt>test-suite</tt> from the previous release.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="clang-qualify">Qualify Clang</a></h4>
-
-<div>
-
-<p><tt>Clang</tt> is qualified when front-end specific tests in the 
-   <tt>llvm</tt> dejagnu test suite all pass, clang's own test suite passes
-   cleanly, and there are no regressions in the <tt>test-suite</tt>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="targets">Specific Target Qualification Details</a></h4>
-
-<div>
-
-<table>
-  <tr><th>Architecture</th> <th>OS</th>          <th>clang baseline</th> <th>tests</th></tr>
-  <tr><td>x86-32</td>       <td>Linux</td>       <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
-  <tr><td>x86-32</td>       <td>FreeBSD</td>     <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite</td></tr>
-  <tr><td>x86-32</td>       <td>mingw</td>       <td>none</td>           <td>QT</td></tr>
-  <tr><td>x86-64</td>       <td>Mac OS 10.X</td> <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
-  <tr><td>x86-64</td>       <td>Linux</td>       <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
-  <tr><td>x86-64</td>       <td>FreeBSD</td>     <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite</td></tr>
-</table>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="commTest">Community Testing</a></h3>
-<div>
-
-<p>Once all testing has been completed and appropriate bugs filed, the release
-   candidate tarballs are put on the website and the LLVM community is
-   notified. Ask that all LLVM developers test the release in 2 ways:</p>
-
-<ol>
-  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
-      appropriate <tt>clang</tt> binary. Build LLVM. Run <tt>make check</tt> and
-      the full LLVM test suite (<tt>make TEST=nightly report</tt>).</li>
-
-  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
-      <tt>clang</tt> sources. Compile everything. Run <tt>make check</tt> and
-      the full LLVM test suite (<tt>make TEST=nightly report</tt>).</li>
-</ol>
-
-<p>Ask LLVM developers to submit the test suite report and <tt>make check</tt>
-   results to the list. Verify that there are no regressions from the previous
-   release. The results are not used to qualify a release, but to spot other
-   potential problems. For unsupported targets, verify that <tt>make check</tt>
-   is at least clean.</p>
-  
-<p>During the first round of testing, all regressions must be fixed before the
-   second release candidate is tagged.</p>
-  
-<p>If this is the second round of testing, the testing is only to ensure that
-   bug fixes previously merged in have not created new major problems. <i>This
-   is not the time to solve additional and unrelated bugs!</i> If no patches are
-   merged in, the release is determined to be ready and the release manager may
-   move onto the next stage.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-patch">Release Patch Rules</a></h3>
-
-<div>
-
-<p>Below are the rules regarding patching the release branch:</p>
-
-<ol>
-  <li><p>Patches applied to the release branch may only be applied by the
-      release manager.</p></li>
-
-  <li><p>During the first round of testing, patches that fix regressions or that
-      are small and relatively risk free (verified by the appropriate code
-      owner) are applied to the branch. Code owners are asked to be very
-      conservative in approving patches for the branch. We reserve the right to
-      reject any patch that does not fix a regression as previously
-      defined.</p></li>
-
-  <li><p>During the remaining rounds of testing, only patches that fix critical
-      regressions may be applied.</p></li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-final">Release Final Tasks</a></h3>
-
-<div>
-
-<p>The final stages of the release process involves tagging the "final" release
-   branch, updating documentation that refers to the release, and updating the
-   demo page.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="updocs">Update Documentation</a></h4>
-
-<div>
-
-<p>Review the documentation and ensure that it is up to date. The "Release
-   Notes" must be updated to reflect new features, bug fixes, new known issues,
-   and changes in the list of supported platforms. The "Getting Started Guide"
-   should be updated to reflect the new release version number tag available from
-   Subversion and changes in basic system requirements. Merge both changes from
-   mainline into the release branch.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="tag">Tag the LLVM Final Release</a></h4>
-
-<div>
-
-<p>Tag the final release sources using the following procedure:</p>
-
-<div class="doc_code">
-<pre>
-$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
-           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
-           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
-           https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
-           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/Final
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="updemo">Update the LLVM Demo Page</a></h3>
-
-<div>
-
-<p>The LLVM demo page must be updated to use the new release. This consists of
-   using the new <tt>clang</tt> binary and building LLVM.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="webupdates">Update the LLVM Website</a></h4>
-
-<div>
-
-<p>The website must be updated before the release announcement is sent out. Here
-   is what to do:</p>
-
-<ol>
-  <li>Check out the <tt>www</tt> module from Subversion.</li>
-
-  <li>Create a new subdirectory <tt>X.Y</tt> in the releases directory.</li>
-
-  <li>Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>clang</tt> source,
-      <tt>clang binaries</tt>, <tt>dragonegg</tt> source, and <tt>dragonegg</tt>
-      binaries in this new directory.</li>
-
-  <li>Copy and commit the <tt>llvm/docs</tt> and <tt>LICENSE.txt</tt> files
-      into this new directory. The docs should be built with
-      <tt>BUILD_FOR_WEBSITE=1</tt>.</li>
-
-  <li>Commit the <tt>index.html</tt> to the <tt>release/X.Y</tt> directory to
-      redirect (use from previous release.</li>
-
-  <li>Update the <tt>releases/download.html</tt> file with the new release.</li>
-
-  <li>Update the <tt>releases/index.html</tt> with the new release and link to
-      release documentation.</li>
-
-  <li>Finally, update the main page (<tt>index.html</tt> and sidebar) to point
-      to the new release and release announcement. Make sure this all gets
-      committed back into Subversion.</li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="announce">Announce the Release</a></h4>
-
-<div>
-
-<p>Have Chris send out the release announcement when everything is finished.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
-  <br>
-  Last modified: $Date: 2012-07-31 09:05:57 +0200 (Tue, 31 Jul 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/HowToReleaseLLVM.rst b/docs/HowToReleaseLLVM.rst
new file mode 100644
index 000000000000..31877bd35ac8
--- /dev/null
+++ b/docs/HowToReleaseLLVM.rst
@@ -0,0 +1,422 @@
+=================================
+How To Release LLVM To The Public
+=================================
+
+.. contents::
+   :local:
+   :depth: 1
+
+Introduction
+============
+
+This document contains information about successfully releasing LLVM ---
+including subprojects: e.g., ``clang`` and ``dragonegg`` --- to the public.  It
+is the Release Manager's responsibility to ensure that a high quality build of
+LLVM is released.
+
+.. _timeline:
+
+Release Timeline
+================
+
+LLVM is released on a time based schedule --- roughly every 6 months.  We do
+not normally have dot releases because of the nature of LLVM's incremental
+development philosophy.  That said, the only thing preventing dot releases for
+critical bug fixes from happening is a lack of resources --- testers,
+machines, time, etc.  And, because of the high quality we desire for LLVM
+releases, we cannot allow for a truncated form of release qualification.
+
+The release process is roughly as follows:
+
+* Set code freeze and branch creation date for 6 months after last code freeze
+  date.  Announce release schedule to the LLVM community and update the website.
+
+* Create release branch and begin release process.
+
+* Send out release candidate sources for first round of testing.  Testing lasts
+  7-10 days.  During the first round of testing, any regressions found should be
+  fixed.  Patches are merged from mainline into the release branch.  Also, all
+  features need to be completed during this time.  Any features not completed at
+  the end of the first round of testing will be removed or disabled for the
+  release.
+
+* Generate and send out the second release candidate sources.  Only *critial*
+  bugs found during this testing phase will be fixed.  Any bugs introduced by
+  merged patches will be fixed.  If so a third round of testing is needed.
+
+* The release notes are updated.
+
+* Finally, release!
+
+Release Process
+===============
+
+.. contents::
+   :local:
+
+Release Administrative Tasks
+----------------------------
+
+This section describes a few administrative tasks that need to be done for the
+release process to begin.  Specifically, it involves:
+
+* Creating the release branch,
+
+* Setting version numbers, and
+
+* Tagging release candidates for the release team to begin testing.
+
+Create Release Branch
+^^^^^^^^^^^^^^^^^^^^^
+
+Branch the Subversion trunk using the following procedure:
+
+#. Remind developers that the release branching is imminent and to refrain from
+   committing patches that might break the build.  E.g., new features, large
+   patches for works in progress, an overhaul of the type system, an exciting
+   new TableGen feature, etc.
+
+#. Verify that the current Subversion trunk is in decent shape by
+   examining nightly tester and buildbot results.
+
+#. Create the release branch for ``llvm``, ``clang``, the ``test-suite``, and
+   ``dragonegg`` from the last known good revision.  The branch's name is
+   ``release_XY``, where ``X`` is the major and ``Y`` the minor release
+   numbers.  The branches should be created using the following commands:
+
+   ::
+
+     $ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
+                https://llvm.org/svn/llvm-project/llvm/branches/release_XY
+
+     $ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
+                https://llvm.org/svn/llvm-project/cfe/branches/release_XY
+
+     $ svn copy https://llvm.org/svn/llvm-project/dragonegg/trunk \
+                https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY
+
+     $ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
+                https://llvm.org/svn/llvm-project/test-suite/branches/release_XY
+
+#. Advise developers that they may now check their patches into the Subversion
+   tree again.
+
+#. The Release Manager should switch to the release branch, because all changes
+   to the release will now be done in the branch.  The easiest way to do this is
+   to grab a working copy using the following commands:
+
+   ::
+
+     $ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XY llvm-X.Y
+
+     $ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XY clang-X.Y
+
+     $ svn co https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY dragonegg-X.Y
+
+     $ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XY test-suite-X.Y
+
+Update LLVM Version
+^^^^^^^^^^^^^^^^^^^
+
+After creating the LLVM release branch, update the release branches'
+``autoconf`` and ``configure.ac`` versions from '``X.Ysvn``' to '``X.Y``'.
+Update it on mainline as well to be the next version ('``X.Y+1svn``').
+Regenerate the configure scripts for both ``llvm`` and the ``test-suite``.
+
+In addition, the version numbers of all the Bugzilla components must be updated
+for the next release.
+
+Build the LLVM Release Candidates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Create release candidates for ``llvm``, ``clang``, ``dragonegg``, and the LLVM
+``test-suite`` by tagging the branch with the respective release candidate
+number.  For instance, to create **Release Candidate 1** you would issue the
+following commands:
+
+::
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
+             https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
+             https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
+             https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
+             https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1
+
+Similarly, **Release Candidate 2** would be named ``RC2`` and so on.  This keeps
+a permanent copy of the release candidate around for people to export and build
+as they wish.  The final released sources will be tagged in the ``RELEASE_XY``
+directory as ``Final`` (c.f. :ref:`tag`).
+
+The Release Manager may supply pre-packaged source tarballs for users.  This can
+be done with the following commands:
+
+::
+
+  $ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1 llvm-X.Yrc1
+  $ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1 clang-X.Yrc1
+  $ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1 dragonegg-X.Yrc1
+  $ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1 llvm-test-X.Yrc1
+
+  $ tar -cvf - llvm-X.Yrc1        | gzip > llvm-X.Yrc1.src.tar.gz
+  $ tar -cvf - clang-X.Yrc1       | gzip > clang-X.Yrc1.src.tar.gz
+  $ tar -cvf - dragonegg-X.Yrc1   | gzip > dragonegg-X.Yrc1.src.tar.gz
+  $ tar -cvf - llvm-test-X.Yrc1   | gzip > llvm-test-X.Yrc1.src.tar.gz
+
+Building the Release
+--------------------
+
+The builds of ``llvm``, ``clang``, and ``dragonegg`` *must* be free of
+errors and warnings in Debug, Release+Asserts, and Release builds.  If all
+builds are clean, then the release passes Build Qualification.
+
+The ``make`` options for building the different modes:
+
++-----------------+---------------------------------------------+
+| Mode            | Options                                     |
++=================+=============================================+
+| Debug           | ``ENABLE_OPTIMIZED=0``                      |
++-----------------+---------------------------------------------+
+| Release+Asserts | ``ENABLE_OPTIMIZED=1``                      |
++-----------------+---------------------------------------------+
+| Release         | ``ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1`` |
++-----------------+---------------------------------------------+
+
+Build LLVM
+^^^^^^^^^^
+
+Build ``Debug``, ``Release+Asserts``, and ``Release`` versions
+of ``llvm`` on all supported platforms.  Directions to build ``llvm``
+are :doc:`here <GettingStarted>`.
+
+Build Clang Binary Distribution
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Creating the ``clang`` binary distribution (Debug/Release+Asserts/Release)
+requires performing the following steps for each supported platform:
+
+#. Build clang according to the directions `here
+   <http://clang.llvm.org/get_started.html>`__.
+
+#. Build both a Debug and Release version of clang.  The binary will be the
+   Release build.
+
+#. Package ``clang`` (details to follow).
+
+Target Specific Build Details
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The table below specifies which compilers are used for each Arch/OS combination
+when qualifying the build of ``llvm``, ``clang``, and ``dragonegg``.
+
++--------------+---------------+----------------------+
+| Architecture | OS            | compiler             |
++==============+===============+======================+
+| x86-32       | Mac OS 10.5   | gcc 4.0.1            |
++--------------+---------------+----------------------+
+| x86-32       | Linux         | gcc 4.2.X, gcc 4.3.X |
++--------------+---------------+----------------------+
+| x86-32       | FreeBSD       | gcc 4.2.X            |
++--------------+---------------+----------------------+
+| x86-32       | mingw         | gcc 3.4.5            |
++--------------+---------------+----------------------+
+| x86-64       | Mac OS 10.5   | gcc 4.0.1            |
++--------------+---------------+----------------------+
+| x86-64       | Linux         | gcc 4.2.X, gcc 4.3.X |
++--------------+---------------+----------------------+
+| x86-64       | FreeBSD       | gcc 4.2.X            |
++--------------+---------------+----------------------+
+
+Release Qualification Criteria
+------------------------------
+
+A release is qualified when it has no regressions from the previous release (or
+baseline).  Regressions are related to correctness first and performance second.
+(We may tolerate some minor performance regressions if they are deemed
+necessary for the general quality of the compiler.)
+
+**Regressions are new failures in the set of tests that are used to qualify
+each product and only include things on the list.  Every release will have
+some bugs in it.  It is the reality of developing a complex piece of
+software.  We need a very concrete and definitive release criteria that
+ensures we have monotonically improving quality on some metric.  The metric we
+use is described below.  This doesn't mean that we don't care about other
+criteria, but these are the criteria which we found to be most important and
+which must be satisfied before a release can go out.**
+
+Qualify LLVM
+^^^^^^^^^^^^
+
+LLVM is qualified when it has a clean test run without a front-end.  And it has
+no regressions when using either ``clang`` or ``dragonegg`` with the
+``test-suite`` from the previous release.
+
+Qualify Clang
+^^^^^^^^^^^^^
+
+``Clang`` is qualified when front-end specific tests in the ``llvm`` regression
+test suite all pass, clang's own test suite passes cleanly, and there are no
+regressions in the ``test-suite``.
+
+Specific Target Qualification Details
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------+-------------+----------------+-----------------------------+
+| Architecture | OS          | clang baseline | tests                       |
++==============+=============+================+=============================+
+| x86-32       | Linux       | last release   | llvm regression tests,      |
+|              |             |                | clang regression tests,     |
+|              |             |                | test-suite (including spec) |
++--------------+-------------+----------------+-----------------------------+
+| x86-32       | FreeBSD     | last release   | llvm regression tests,      |
+|              |             |                | clang regression tests,     |
+|              |             |                | test-suite                  |
++--------------+-------------+----------------+-----------------------------+
+| x86-32       | mingw       | none           | QT                          |
++--------------+-------------+----------------+-----------------------------+
+| x86-64       | Mac OS 10.X | last release   | llvm regression tests,      |
+|              |             |                | clang regression tests,     |
+|              |             |                | test-suite (including spec) |
++--------------+-------------+----------------+-----------------------------+
+| x86-64       | Linux       | last release   | llvm regression tests,      |
+|              |             |                | clang regression tests,     |
+|              |             |                | test-suite (including spec) |
++--------------+-------------+----------------+-----------------------------+
+| x86-64       | FreeBSD     | last release   | llvm regression tests,      |
+|              |             |                | clang regression tests,     |
+|              |             |                | test-suite                  |
++--------------+-------------+----------------+-----------------------------+
+
+Community Testing
+-----------------
+
+Once all testing has been completed and appropriate bugs filed, the release
+candidate tarballs are put on the website and the LLVM community is notified.
+Ask that all LLVM developers test the release in 2 ways:
+
+#. Download ``llvm-X.Y``, ``llvm-test-X.Y``, and the appropriate ``clang``
+   binary.  Build LLVM.  Run ``make check`` and the full LLVM test suite (``make
+   TEST=nightly report``).
+
+#. Download ``llvm-X.Y``, ``llvm-test-X.Y``, and the ``clang`` sources.  Compile
+   everything.  Run ``make check`` and the full LLVM test suite (``make
+   TEST=nightly report``).
+
+Ask LLVM developers to submit the test suite report and ``make check`` results
+to the list.  Verify that there are no regressions from the previous release.
+The results are not used to qualify a release, but to spot other potential
+problems.  For unsupported targets, verify that ``make check`` is at least
+clean.
+
+During the first round of testing, all regressions must be fixed before the
+second release candidate is tagged.
+
+If this is the second round of testing, the testing is only to ensure that bug
+fixes previously merged in have not created new major problems. *This is not
+the time to solve additional and unrelated bugs!* If no patches are merged in,
+the release is determined to be ready and the release manager may move onto the
+next stage.
+
+Release Patch Rules
+-------------------
+
+Below are the rules regarding patching the release branch:
+
+#. Patches applied to the release branch may only be applied by the release
+   manager.
+
+#. During the first round of testing, patches that fix regressions or that are
+   small and relatively risk free (verified by the appropriate code owner) are
+   applied to the branch.  Code owners are asked to be very conservative in
+   approving patches for the branch.  We reserve the right to reject any patch
+   that does not fix a regression as previously defined.
+
+#. During the remaining rounds of testing, only patches that fix critical
+   regressions may be applied.
+
+Release Final Tasks
+-------------------
+
+The final stages of the release process involves tagging the "final" release
+branch, updating documentation that refers to the release, and updating the
+demo page.
+
+Update Documentation
+^^^^^^^^^^^^^^^^^^^^
+
+Review the documentation and ensure that it is up to date.  The "Release Notes"
+must be updated to reflect new features, bug fixes, new known issues, and
+changes in the list of supported platforms.  The "Getting Started Guide" should
+be updated to reflect the new release version number tag available from
+Subversion and changes in basic system requirements.  Merge both changes from
+mainline into the release branch.
+
+.. _tag:
+
+Tag the LLVM Final Release
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Tag the final release sources using the following procedure:
+
+::
+
+  $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
+             https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/Final
+
+  $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
+             https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/Final
+
+  $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
+             https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/Final
+
+  $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
+             https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/Final
+
+Update the LLVM Demo Page
+-------------------------
+
+The LLVM demo page must be updated to use the new release.  This consists of
+using the new ``clang`` binary and building LLVM.
+
+Update the LLVM Website
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The website must be updated before the release announcement is sent out.  Here
+is what to do:
+
+#. Check out the ``www`` module from Subversion.
+
+#. Create a new subdirectory ``X.Y`` in the releases directory.
+
+#. Commit the ``llvm``, ``test-suite``, ``clang`` source, ``clang binaries``,
+   ``dragonegg`` source, and ``dragonegg`` binaries in this new directory.
+
+#. Copy and commit the ``llvm/docs`` and ``LICENSE.txt`` files into this new
+   directory.  The docs should be built with ``BUILD_FOR_WEBSITE=1``.
+
+#. Commit the ``index.html`` to the ``release/X.Y`` directory to redirect (use
+   from previous release).
+
+#. Update the ``releases/download.html`` file with the new release.
+
+#. Update the ``releases/index.html`` with the new release and link to release
+   documentation.
+
+#. Finally, update the main page (``index.html`` and sidebar) to point to the
+   new release and release announcement.  Make sure this all gets committed back
+   into Subversion.
+
+Announce the Release
+^^^^^^^^^^^^^^^^^^^^
+
+Have Chris send out the release announcement when everything is finished.
+
diff --git a/docs/HowToSetUpLLVMStyleRTTI.rst b/docs/HowToSetUpLLVMStyleRTTI.rst
index aa1ad84afee3..e0f865a141c7 100644
--- a/docs/HowToSetUpLLVMStyleRTTI.rst
+++ b/docs/HowToSetUpLLVMStyleRTTI.rst
@@ -1,11 +1,7 @@
-.. _how-to-set-up-llvm-style-rtti:
-
 ======================================================
 How to set up LLVM-style RTTI for your class hierarchy
 ======================================================
 
-.. sectionauthor:: Sean Silva <silvas@purdue.edu>
-
 .. contents::
 
 Background
@@ -299,6 +295,78 @@ ordering right::
        | OtherSpecialSquare
      | Circle
 
+A Bug to be Aware Of
+--------------------
+
+The example just given opens the door to bugs where the ``classof``\s are
+not updated to match the ``Kind`` enum when adding (or removing) classes to
+(from) the hierarchy.
+
+Continuing the example above, suppose we add a ``SomewhatSpecialSquare`` as
+a subclass of ``Square``, and update the ``ShapeKind`` enum like so:
+
+.. code-block:: c++
+
+    enum ShapeKind {
+      SK_Square,
+      SK_SpecialSquare,
+      SK_OtherSpecialSquare,
+   +  SK_SomewhatSpecialSquare,
+      SK_Circle
+    }
+
+Now, suppose that we forget to update ``Square::classof()``, so it still
+looks like:
+
+.. code-block:: c++
+
+   static bool classof(const Shape *S) {
+     // BUG: Returns false when S->getKind() == SK_SomewhatSpecialSquare,
+     // even though SomewhatSpecialSquare "is a" Square.
+     return S->getKind() >= SK_Square &&
+            S->getKind() <= SK_OtherSpecialSquare;
+   }
+
+As the comment indicates, this code contains a bug. A straightforward and
+non-clever way to avoid this is to introduce an explicit ``SK_LastSquare``
+entry in the enum when adding the first subclass(es). For example, we could
+rewrite the example at the beginning of `Concrete Bases and Deeper
+Hierarchies`_ as:
+
+.. code-block:: c++
+
+    enum ShapeKind {
+      SK_Square,
+   +  SK_SpecialSquare,
+   +  SK_OtherSpecialSquare,
+   +  SK_LastSquare,
+      SK_Circle
+    }
+   ...
+   // Square::classof()
+   -  static bool classof(const Shape *S) {
+   -    return S->getKind() == SK_Square;
+   -  }
+   +  static bool classof(const Shape *S) {
+   +    return S->getKind() >= SK_Square &&
+   +           S->getKind() <= SK_LastSquare;
+   +  }
+
+Then, adding new subclasses is easy:
+
+.. code-block:: c++
+
+    enum ShapeKind {
+      SK_Square,
+      SK_SpecialSquare,
+      SK_OtherSpecialSquare,
+   +  SK_SomewhatSpecialSquare,
+      SK_LastSquare,
+      SK_Circle
+    }
+
+Notice that ``Square::classof`` does not need to be changed.
+
 .. _classof-contract:
 
 The Contract of ``classof``
diff --git a/docs/HowToSubmitABug.rst b/docs/HowToSubmitABug.rst
index ff2d649ce33c..45be2826b301 100644
--- a/docs/HowToSubmitABug.rst
+++ b/docs/HowToSubmitABug.rst
@@ -1,11 +1,7 @@
-.. _how-to-submit-a-bug-report:
-
 ================================
 How to submit an LLVM bug report
 ================================
 
-.. sectionauthor:: Chris Lattner <sabre@nondot.org> and Misha Brukman <http://misha.brukman.net>
-
 Introduction - Got bugs?
 ========================
 
diff --git a/docs/HowToUseAttributes.rst b/docs/HowToUseAttributes.rst
new file mode 100644
index 000000000000..66c44c01f631
--- /dev/null
+++ b/docs/HowToUseAttributes.rst
@@ -0,0 +1,81 @@
+=====================
+How To Use Attributes
+=====================
+
+.. contents::
+  :local:
+
+Introduction
+============
+
+Attributes in LLVM have changed in some fundamental ways.  It was necessary to
+do this to support expanding the attributes to encompass more than a handful of
+attributes --- e.g. command line options.  The old way of handling attributes
+consisted of representing them as a bit mask of values.  This bit mask was
+stored in a "list" structure that was reference counted.  The advantage of this
+was that attributes could be manipulated with 'or's and 'and's.  The
+disadvantage of this was that there was limited room for expansion, and
+virtually no support for attribute-value pairs other than alignment.
+
+In the new scheme, an ``Attribute`` object represents a single attribute that's
+uniqued.  You use the ``Attribute::get`` methods to create a new ``Attribute``
+object.  An attribute can be a single "enum" value (the enum being the
+``Attribute::AttrKind`` enum), a string representing a target-dependent
+attribute, or an attribute-value pair.  Some examples:
+
+* Target-independent: ``noinline``, ``zext``
+* Target-dependent: ``"no-sse"``, ``"thumb2"``
+* Attribute-value pair: ``"cpu" = "cortex-a8"``, ``align = 4``
+
+Note: for an attribute value pair, we expect a target-dependent attribute to
+have a string for the value.
+
+``Attribute``
+=============
+An ``Attribute`` object is designed to be passed around by value.
+
+Because attributes are no longer represented as a bit mask, you will need to
+convert any code which does treat them as a bit mask to use the new query
+methods on the Attribute class.
+
+``AttributeSet``
+================
+
+The ``AttributeSet`` class replaces the old ``AttributeList`` class.  The
+``AttributeSet`` stores a collection of Attribute objects for each kind of
+object that may have an attribute associated with it: the function as a
+whole, the return type, or the function's parameters.  A function's attributes
+are at index ``AttributeSet::FunctionIndex``; the return type's attributes are
+at index ``AttributeSet::ReturnIndex``; and the function's parameters'
+attributes are at indices 1, ..., n (where 'n' is the number of parameters).
+Most methods on the ``AttributeSet`` class take an index parameter.
+
+An ``AttributeSet`` is also a uniqued and immutable object.  You create an
+``AttributeSet`` through the ``AttributeSet::get`` methods.  You can add and
+remove attributes, which result in the creation of a new ``AttributeSet``.
+
+An ``AttributeSet`` object is designed to be passed around by value.
+
+Note: It is advised that you do *not* use the ``AttributeSet`` "introspection"
+methods (e.g. ``Raw``, ``getRawPointer``, etc.).  These methods break
+encapsulation, and may be removed in a future release (i.e. LLVM 4.0).
+
+``AttrBuilder``
+===============
+
+Lastly, we have a "builder" class to help create the ``AttributeSet`` object
+without having to create several different intermediate uniqued
+``AttributeSet`` objects.  The ``AttrBuilder`` class allows you to add and
+remove attributes at will.  The attributes won't be uniqued until you call the
+appropriate ``AttributeSet::get`` method.
+
+An ``AttrBuilder`` object is *not* designed to be passed around by value.  It
+should be passed by reference.
+
+Note: It is advised that you do *not* use the ``AttrBuilder::addRawValue()``
+method or the ``AttrBuilder(uint64_t Val)`` constructor.  These are for
+backwards compatibility and may be removed in a future release (i.e. LLVM 4.0).
+
+And that's basically it! A lot of functionality is hidden behind these classes,
+but the interfaces are pretty straight forward.
+
diff --git a/docs/HowToUseInstrMappings.rst b/docs/HowToUseInstrMappings.rst
index b51e74e23c29..8a3e7c8d726d 100755
--- a/docs/HowToUseInstrMappings.rst
+++ b/docs/HowToUseInstrMappings.rst
@@ -1,11 +1,7 @@
-.. _how_to_use_instruction_mappings:
-
 ===============================
 How To Use Instruction Mappings
 ===============================
 
-.. sectionauthor:: Jyotsna Verma <jverma@codeaurora.org>
-
 .. contents::
    :local:
 
@@ -120,7 +116,7 @@ to include relevant information in its definition. For example, consider
 following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false)
 instructions:
 
-.. code-block::llvm
+.. code-block:: llvm
 
   def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
               "$dst = add($a, $b)",
@@ -141,7 +137,7 @@ In this step, we modify these instructions to include the information
 required by the relationship model, <tt>getPredOpcode</tt>, so that they can
 be related.
 
-.. code-block::llvm
+.. code-block:: llvm
 
   def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
               "$dst = add($a, $b)",
diff --git a/docs/LLVMBuild.html b/docs/LLVMBuild.html
deleted file mode 100644
index 9e7f8c765775..000000000000
--- a/docs/LLVMBuild.html
+++ /dev/null
@@ -1,368 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>LLVMBuild Documentation</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>LLVMBuild Guide</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#projectorg">Project Organization</a></li>
-  <li><a href="#buildintegration">Build Integration</a></li>
-  <li><a href="#componentoverview">Component Overview</a></li>
-  <li><a href="#formatreference">Format Reference</a></li>
-</ol>
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-  <p>This document describes the <tt>LLVMBuild</tt> organization and files which
-  we use to describe parts of the LLVM ecosystem. For description of specific
-  LLVMBuild related tools, please see the command guide.</p>
-
-  <p>LLVM is designed to be a modular set of libraries which can be flexibly
-  mixed together in order to build a variety of tools, like compilers, JITs,
-  custom code generators, optimization passes, interpreters, and so on. Related
-  projects in the LLVM system like Clang and LLDB also tend to follow this
-  philosophy.</p>
-
-  <p>In order to support this usage style, LLVM has a fairly strict structure as
-  to how the source code and various components are organized. The
-  <tt>LLVMBuild.txt</tt> files are the explicit specification of that structure,
-  and are used by the build systems and other tools in order to develop the LLVM
-  project.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="projectorg">Project Organization</a></h2>
-<!-- *********************************************************************** -->
-
-<!-- FIXME: We should probably have an explicit top level project object. Good
-place to hang project level data, name, etc. Also useful for serving as the
-$ROOT of project trees for things which can be checked out separately. -->
-
-<div>
-  <p>The source code for LLVM projects using the LLVMBuild system (LLVM, Clang,
-  and LLDB) is organized into <em>components</em>, which define the separate
-  pieces of functionality that make up the project. These projects may consist
-  of many libraries, associated tools, build tools, or other utility tools (for
-  example, testing tools).</p>
-
-  <p>For the most part, the project contents are organized around defining one
-  main component per each subdirectory. Each such directory contains
-  an <tt>LLVMBuild.txt</tt> which contains the component definitions.</p>
-
-  <p>The component descriptions for the project as a whole are automatically
-  gathered by the LLVMBuild tools. The tools automatically traverse the source
-  directory structure to find all of the component description files. NOTE: For
-  performance/sanity reasons, we only traverse into subdirectories when the
-  parent itself contains an <tt>LLVMBuild.txt</tt> description file.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="buildintegration">Build Integration</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-  <p>The LLVMBuild files themselves are just a declarative way to describe the
-  project structure. The actual building of the LLVM project is handled by
-  another build system (currently we support
-  both <a href="MakefileGuide.html">Makefiles</a>
-  and <a href="CMake.html">CMake</a>.</p>
-
-  <p>The build system implementation will load the relevant contents of the
-  LLVMBuild files and use that to drive the actual project build. Typically, the
-  build system will only need to load this information at "configure" time, and
-  use it to generative native information. Build systems will also handle
-  automatically reconfiguring their information when the contents of
-  the <i>LLVMBuild.txt</i> files change.</p>
-
-  <p>Developers generally are not expected to need to be aware of the details of
-  how the LLVMBuild system is integrated into their build. Ideally, LLVM
-  developers who are not working on the build system would only ever need to
-  modify the contents of the <i>LLVMBuild.txt</i> description files (although we
-  have not reached this goal yet).</p>
-
-  <p>For more information on the utility tool we provide to help interfacing
-  with the build system, please see
-  the <a href="CommandGuide/html/llvm-build.html">llvm-build</a>
-  documentation.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="componentoverview">Component Overview</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-  <p>As mentioned earlier, LLVM projects are organized into
-  logical <em>components</em>. Every component is typically grouped into its
-  own subdirectory. Generally, a component is organized around a coherent group
-  of sources which have some kind of clear API separation from other parts of
-  the code.</p>
-
-  <p>LLVM primarily uses the following types of components:</p>
-  <ul>
-    <li><em>Libraries</em> - Library components define a distinct API which can
-    be independently linked into LLVM client applications. Libraries typically
-    have private and public header files, and may specify a link of required
-    libraries that they build on top of.</li>
-
-    <li><em>Build Tools</em> - Build tools are applications which are designed
-    to be run as part of the build process (typically to generate other source
-    files). Currently, LLVM uses one main build tool
-    called <a href="TableGenFundamentals.html">TableGen</a> to generate a
-    variety of source files.</li>
-
-    <li><em>Tools</em> - Command line applications which are built using the
-    LLVM component libraries. Most LLVM tools are small and are primarily
-    frontends to the library interfaces.</li>
-
-<!-- FIXME: We also need shared libraries as a first class component, but this
-     is not yet implemented. -->
-  </ul>
-
-  <p>Components are described using <em>LLVMBuild.txt</em> files in the
-  directories that define the component. See
-  the <a href="#formatreference">Format Reference</a> section for information on
-  the exact format of these files.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="formatreference">LLVMBuild Format Reference</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-  <p>LLVMBuild files are written in a simple variant of the INI or configuration
-  file format (<a href="http://en.wikipedia.org/wiki/INI_file">Wikipedia
-  entry</a>). The format defines a list of sections each of which may contain
-  some number of properties. A simple example of the file format is below:</p>
-  <div class="doc_code">
-  <pre>
-<i>; Comments start with a semi-colon.</i>
-
-<i>; Sections are declared using square brackets.</i>
-[component_0]
-
-<i>; Properties are declared using '=' and are contained in the previous section.
-;
-; We support simple string and boolean scalar values and list values, where
-; items are separated by spaces. There is no support for quoting, and so
-; property values may not contain spaces.</i>
-property_name = property_value
-list_property_name = value_1 value_2 <em>...</em> value_n
-boolean_property_name = 1 <em>(or 0)</em>
-</pre>
-  </div>
-
-  <p>LLVMBuild files are expected to define a strict set of sections and
-  properties. An typical component description file for a library
-  component would look typically look like the following example:</p>
-  <div class="doc_code">
-  <pre>
-[component_0]
-type = Library
-name = Linker
-parent = Libraries
-required_libraries = Archive BitReader Core Support TransformUtils
-</pre>
-  </div>
-
-  <p>A full description of the exact sections and properties which are allowed
- follows.</p>
-
-  <p>Each file may define exactly one common component, named "common". The
-  common component may define the following properties:</p>
-  <ul>
-    <li><i>subdirectories</i> <b>[optional]</b>
-      <p>If given, a list of the names of the subdirectories from the current
-        subpath to search for additional LLVMBuild files.</p></li>
-  </ul>
-
-  <p>Each file may define multiple components. Each component is described by a
-  section who name starts with "component". The remainder of the section name is
-  ignored, but each section name must be unique. Typically components are just
-  number in order for files with multiple components ("component_0",
-  "component_1", and so on).<p>
-
-  <p><b>Section names not matching this format (or the "common" section) are
-  currently unused and are disallowed.</b></p>
-
-  <p>Every component is defined by the properties in the section. The exact list
-  of properties that are allowed depends on the component
-  type. Components <b>may not</b> define any properties other than those
-  expected by the component type.</p>
-
-  <p>Every component must define the following properties:</p>
-  <ul>
-    <li><i>type</i> <b>[required]</b>
-      <p>The type of the component. Supported component types are
-      detailed below. Most components will define additional properties which
-      may be required or optional.</p></li>
-
-    <li><i>name</i> <b>[required]</b>
-      <p>The name of the component. Names are required to be unique
-      across the entire project.</p></li>
-
-    <li><i>parent</i> <b>[required]</b>
-      <p>The name of the logical parent of the component. Components are
-      organized into a logical tree to make it easier to navigate and organize
-      groups of components. The parents have no semantics as far as the project
-      build is concerned, however. Typically, the parent will be the main
-      component of the parent directory.</p>
-
-      <!-- FIXME: Should we make the parent optional, and default to parent
-      directories component? -->
-
-      <p>Components may reference the root pseudo component using '$ROOT' to
-      indicate they should logically be grouped at the top-level.</p>
-    </li>
-  </ul>
-
-  <p>Components may define the following properties:</p>
-  <ul>
-    <li><i>dependencies</i> <b>[optional]</b>
-      <p>If specified, a list of names of components which <i>must</i> be built
-      prior to this one. This should only be exactly those components which
-      produce some tool or source code required for building the
-      component.</p>
-
-      <p><em>NOTE:</em> Group and LibraryGroup components have no semantics for
-      the actual build, and are not allowed to specify dependencies.</p></li>
-  </ul>
-
-  <p>The following section lists the available component types, as well as the
-  properties which are associated with that component.</p>
-
-  <ul>
-    <li><i>type = Group</i>
-      <p>Group components exist purely to allow additional arbitrary structuring
-      of the logical components tree. For example, one might define a
-      "Libraries" group to hold all of the root library components.</p>
-
-      <p>Group components have no additionally properties.</p>
-    </li>
-
-    <li><i>type = Library</i>
-      <p>Library components define an individual library which should be built
-      from the source code in the component directory.</p>
-
-      <p>Components with this type use the following properties:</p>
-      <ul>
-        <li><i>library_name</i> <b>[optional]</b>
-          <p>If given, the name to use for the actual library file on disk. If
-          not given, the name is derived from the component name
-          itself.</p></li>
-
-        <li><i>required_libraries</i> <b>[optional]</b>
-          <p>If given, a list of the names of Library or LibraryGroup components
-          which must also be linked in whenever this library is used. That is,
-          the link time dependencies for this component. When tools are built,
-          the build system will include the transitive closure of
-          all <i>required_libraries</i> for the components the tool needs.</p></li>
-
-        <li><i>add_to_library_groups</i> <b>[optional]</b>
-          <p>If given, a list of the names of LibraryGroup components which this
-          component is also part of. This allows nesting groups of
-          components. For example, the <i>X86</i> target might define a library
-          group for all of the <i>X86</i> components. That library group might
-          then be included in the <i>all-targets</i> library group.</p></li>
-
-        <li><i>installed</i> <b>[optional]</b> <b>[boolean]</b>
-          <p>Whether this library is installed. Libraries that are not installed
-          are only reported by <tt>llvm-config</tt> when it is run as part of a
-          development directory.</p></li>
-      </ul>
-    </li>
-
-    <li><i>type = LibraryGroup</i>
-      <p>LibraryGroup components are a mechanism to allow easy definition of
-      useful sets of related components. In particular, we use them to easily
-      specify things like "all targets", or "all assembly printers".</p>
-
-      <p>Components with this type use the following properties:</p>
-      <ul>
-        <li><i>required_libraries</i> <b>[optional]</b>
-          <p>See the Library type for a description of this property.</p></li>
-
-        <li><i>add_to_library_groups</i> <b>[optional]</b>
-          <p>See the Library type for a description of this property.</p></li>
-      </ul>
-    </li>
-
-    <li><i>type = TargetGroup</i>
-      <p>TargetGroup components are an extension of LibraryGroups, specifically
-      for defining LLVM targets (which are handled specially in a few
-      places).</p>
-
-      <p>The name of the component should always be the name of the target.</p>
-
-      <p>Components with this type use the LibraryGroup properties in addition
-      to:</p>
-      <ul>
-        <li><i>has_asmparser</i> <b>[optional]</b> <b>[boolean]</b>
-          <p>Whether this target defines an assembly parser.</p></li>
-        <li><i>has_asmprinter</i> <b>[optional]</b> <b>[boolean]</b>
-          <p>Whether this target defines an assembly printer.</p></li>
-        <li><i>has_disassembler</i> <b>[optional]</b> <b>[boolean]</b>
-          <p>Whether this target defines a disassembler.</p></li>
-        <li><i>has_jit</i> <b>[optional]</b> <b>[boolean]</b>
-          <p>Whether this target supports JIT compilation.</p></li>
-      </ul>
-    </li>
-
-    <li><i>type = Tool</i>
-      <p>Tool components define standalone command line tools which should be
-      built from the source code in the component directory and linked.</p>
-
-      <p>Components with this type use the following properties:</p>
-      <ul>
-        <li><i>required_libraries</i> <b>[optional]</b>
-
-          <p>If given, a list of the names of Library or LibraryGroup components
-          which this tool is required to be linked with. <b>NOTE:</b> The values
-          should be the component names, which may not always match up with the
-          actual library names on disk.</p>
-
-          <p>Build systems are expected to properly include all of the libraries
-          required by the linked components (i.e., the transitive closer
-          of <em>required_libraries</em>).</p>
-
-          <p>Build systems are also expected to understand that those library
-          components must be built prior to linking -- they do not also need to
-          be listed under <i>dependencies</i>.</p></li>
-      </ul>
-    </li>
-
-    <li><i>type = BuildTool</i>
-      <p>BuildTool components are like Tool components, except that the tool is
-      supposed to be built for the platform where the build is running (instead
-      of that platform being targetted). Build systems are expected to handle
-      the fact that required libraries may need to be built for multiple
-      platforms in order to be able to link this tool.</p>
-
-      <p>BuildTool components currently use the exact same properties as Tool
-      components, the type distinction is only used to differentiate what the
-      tool is built for.</p>
-    </li>
-  </ul>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst
new file mode 100644
index 000000000000..d9215dd8eb52
--- /dev/null
+++ b/docs/LLVMBuild.rst
@@ -0,0 +1,325 @@
+===============
+LLVMBuild Guide
+===============
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+This document describes the ``LLVMBuild`` organization and files which
+we use to describe parts of the LLVM ecosystem. For description of
+specific LLVMBuild related tools, please see the command guide.
+
+LLVM is designed to be a modular set of libraries which can be flexibly
+mixed together in order to build a variety of tools, like compilers,
+JITs, custom code generators, optimization passes, interpreters, and so
+on. Related projects in the LLVM system like Clang and LLDB also tend to
+follow this philosophy.
+
+In order to support this usage style, LLVM has a fairly strict structure
+as to how the source code and various components are organized. The
+``LLVMBuild.txt`` files are the explicit specification of that
+structure, and are used by the build systems and other tools in order to
+develop the LLVM project.
+
+Project Organization
+====================
+
+The source code for LLVM projects using the LLVMBuild system (LLVM,
+Clang, and LLDB) is organized into *components*, which define the
+separate pieces of functionality that make up the project. These
+projects may consist of many libraries, associated tools, build tools,
+or other utility tools (for example, testing tools).
+
+For the most part, the project contents are organized around defining
+one main component per each subdirectory. Each such directory contains
+an ``LLVMBuild.txt`` which contains the component definitions.
+
+The component descriptions for the project as a whole are automatically
+gathered by the LLVMBuild tools. The tools automatically traverse the
+source directory structure to find all of the component description
+files. NOTE: For performance/sanity reasons, we only traverse into
+subdirectories when the parent itself contains an ``LLVMBuild.txt``
+description file.
+
+Build Integration
+=================
+
+The LLVMBuild files themselves are just a declarative way to describe
+the project structure. The actual building of the LLVM project is
+handled by another build system (currently we support both
+:doc:`Makefiles <MakefileGuide>` and :doc:`CMake <CMake>`).
+
+The build system implementation will load the relevant contents of the
+LLVMBuild files and use that to drive the actual project build.
+Typically, the build system will only need to load this information at
+"configure" time, and use it to generative native information. Build
+systems will also handle automatically reconfiguring their information
+when the contents of the ``LLVMBuild.txt`` files change.
+
+Developers generally are not expected to need to be aware of the details
+of how the LLVMBuild system is integrated into their build. Ideally,
+LLVM developers who are not working on the build system would only ever
+need to modify the contents of the ``LLVMBuild.txt`` description files
+(although we have not reached this goal yet).
+
+For more information on the utility tool we provide to help interfacing
+with the build system, please see the :doc:`llvm-build
+<CommandGuide/llvm-build>` documentation.
+
+Component Overview
+==================
+
+As mentioned earlier, LLVM projects are organized into logical
+*components*. Every component is typically grouped into its own
+subdirectory. Generally, a component is organized around a coherent
+group of sources which have some kind of clear API separation from other
+parts of the code.
+
+LLVM primarily uses the following types of components:
+
+- *Libraries* - Library components define a distinct API which can be
+  independently linked into LLVM client applications. Libraries typically
+  have private and public header files, and may specify a link of required
+  libraries that they build on top of.
+- *Build Tools* - Build tools are applications which are designed to be run
+  as part of the build process (typically to generate other source files).
+  Currently, LLVM uses one main build tool called :doc:`TableGen
+  <TableGenFundamentals>` to generate a variety of source files.
+- *Tools* - Command line applications which are built using the LLVM
+  component libraries. Most LLVM tools are small and are primarily
+  frontends to the library interfaces.
+
+Components are described using ``LLVMBuild.txt`` files in the directories
+that define the component. See the `LLVMBuild Format Reference`_ section
+for information on the exact format of these files.
+
+LLVMBuild Format Reference
+==========================
+
+LLVMBuild files are written in a simple variant of the INI or configuration
+file format (`Wikipedia entry`_). The format defines a list of sections
+each of which may contain some number of properties. A simple example of
+the file format is below:
+
+.. _Wikipedia entry: http://en.wikipedia.org/wiki/INI_file
+
+.. code-block:: ini
+
+   ; Comments start with a semi-colon.
+
+   ; Sections are declared using square brackets.
+   [component_0]
+
+   ; Properties are declared using '=' and are contained in the previous section.
+   ;
+   ; We support simple string and boolean scalar values and list values, where
+   ; items are separated by spaces. There is no support for quoting, and so
+   ; property values may not contain spaces.
+   property_name = property_value
+   list_property_name = value_1 value_2 ... value_n
+   boolean_property_name = 1 (or 0)
+
+LLVMBuild files are expected to define a strict set of sections and
+properties. An typical component description file for a library
+component would look typically look like the following example:
+
+.. code-block:: ini
+
+   [component_0]
+   type = Library
+   name = Linker
+   parent = Libraries
+   required_libraries = Archive BitReader Core Support TransformUtils
+
+A full description of the exact sections and properties which are
+allowed follows.
+
+Each file may define exactly one common component, named ``common``. The
+common component may define the following properties:
+
+-  ``subdirectories`` **[optional]**
+
+   If given, a list of the names of the subdirectories from the current
+   subpath to search for additional LLVMBuild files.
+
+Each file may define multiple components. Each component is described by a
+section who name starts with ``component``. The remainder of the section
+name is ignored, but each section name must be unique. Typically components
+are just number in order for files with multiple components
+(``component_0``, ``component_1``, and so on).
+
+.. warning::
+
+   Section names not matching this format (or the ``common`` section) are
+   currently unused and are disallowed.
+
+Every component is defined by the properties in the section. The exact
+list of properties that are allowed depends on the component type.
+Components **may not** define any properties other than those expected
+by the component type.
+
+Every component must define the following properties:
+
+-  ``type`` **[required]**
+
+   The type of the component. Supported component types are detailed
+   below. Most components will define additional properties which may be
+   required or optional.
+
+-  ``name`` **[required]**
+
+   The name of the component. Names are required to be unique across the
+   entire project.
+
+-  ``parent`` **[required]**
+
+   The name of the logical parent of the component. Components are
+   organized into a logical tree to make it easier to navigate and
+   organize groups of components. The parents have no semantics as far
+   as the project build is concerned, however. Typically, the parent
+   will be the main component of the parent directory.
+
+   Components may reference the root pseudo component using ``$ROOT`` to
+   indicate they should logically be grouped at the top-level.
+
+Components may define the following properties:
+
+-  ``dependencies`` **[optional]**
+
+   If specified, a list of names of components which *must* be built
+   prior to this one. This should only be exactly those components which
+   produce some tool or source code required for building the component.
+
+   .. note::
+
+      ``Group`` and ``LibraryGroup`` components have no semantics for the
+      actual build, and are not allowed to specify dependencies.
+
+The following section lists the available component types, as well as
+the properties which are associated with that component.
+
+-  ``type = Group``
+
+   Group components exist purely to allow additional arbitrary structuring
+   of the logical components tree. For example, one might define a
+   ``Libraries`` group to hold all of the root library components.
+
+   ``Group`` components have no additionally properties.
+
+-  ``type = Library``
+
+   Library components define an individual library which should be built
+   from the source code in the component directory.
+
+   Components with this type use the following properties:
+
+   -  ``library_name`` **[optional]**
+
+      If given, the name to use for the actual library file on disk. If
+      not given, the name is derived from the component name itself.
+
+   -  ``required_libraries`` **[optional]**
+
+      If given, a list of the names of ``Library`` or ``LibraryGroup``
+      components which must also be linked in whenever this library is
+      used. That is, the link time dependencies for this component. When
+      tools are built, the build system will include the transitive closure
+      of all ``required_libraries`` for the components the tool needs.
+
+   -  ``add_to_library_groups`` **[optional]**
+
+      If given, a list of the names of ``LibraryGroup`` components which
+      this component is also part of. This allows nesting groups of
+      components.  For example, the ``X86`` target might define a library
+      group for all of the ``X86`` components. That library group might
+      then be included in the ``all-targets`` library group.
+
+   -  ``installed`` **[optional]** **[boolean]**
+
+      Whether this library is installed. Libraries that are not installed
+      are only reported by ``llvm-config`` when it is run as part of a
+      development directory.
+
+-  ``type = LibraryGroup``
+
+   ``LibraryGroup`` components are a mechanism to allow easy definition of
+   useful sets of related components. In particular, we use them to easily
+   specify things like "all targets", or "all assembly printers".
+
+   Components with this type use the following properties:
+
+   -  ``required_libraries`` **[optional]**
+
+      See the ``Library`` type for a description of this property.
+
+   -  ``add_to_library_groups`` **[optional]**
+
+      See the ``Library`` type for a description of this property.
+
+-  ``type = TargetGroup``
+
+   ``TargetGroup`` components are an extension of ``LibraryGroup``\s,
+   specifically for defining LLVM targets (which are handled specially in a
+   few places).
+
+   The name of the component should always be the name of the target.
+
+   Components with this type use the ``LibraryGroup`` properties in
+   addition to:
+
+   -  ``has_asmparser`` **[optional]** **[boolean]**
+
+      Whether this target defines an assembly parser.
+
+   -  ``has_asmprinter`` **[optional]** **[boolean]**
+
+      Whether this target defines an assembly printer.
+
+   -  ``has_disassembler`` **[optional]** **[boolean]**
+
+      Whether this target defines a disassembler.
+
+   -  ``has_jit`` **[optional]** **[boolean]**
+
+      Whether this target supports JIT compilation.
+
+-  ``type = Tool``
+
+   ``Tool`` components define standalone command line tools which should be
+   built from the source code in the component directory and linked.
+
+   Components with this type use the following properties:
+
+   -  ``required_libraries`` **[optional]**
+
+      If given, a list of the names of ``Library`` or ``LibraryGroup``
+      components which this tool is required to be linked with.
+
+      .. note::
+
+         The values should be the component names, which may not always
+         match up with the actual library names on disk.
+
+      Build systems are expected to properly include all of the libraries
+      required by the linked components (i.e., the transitive closure of
+      ``required_libraries``).
+
+      Build systems are also expected to understand that those library
+      components must be built prior to linking -- they do not also need
+      to be listed under ``dependencies``.
+
+-  ``type = BuildTool``
+
+   ``BuildTool`` components are like ``Tool`` components, except that the
+   tool is supposed to be built for the platform where the build is running
+   (instead of that platform being targetted). Build systems are expected
+   to handle the fact that required libraries may need to be built for
+   multiple platforms in order to be able to link this tool.
+
+   ``BuildTool`` components currently use the exact same properties as
+   ``Tool`` components, the type distinction is only used to differentiate
+   what the tool is built for.
+
diff --git a/docs/LangRef.html b/docs/LangRef.html
deleted file mode 100644
index 13daa65ca358..000000000000
--- a/docs/LangRef.html
+++ /dev/null
@@ -1,8776 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <title>LLVM Assembly Language Reference Manual</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="description"
-  content="LLVM Assembly Language Reference Manual.">
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>LLVM Language Reference Manual</h1>
-<ol>
-  <li><a href="#abstract">Abstract</a></li>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#identifiers">Identifiers</a></li>
-  <li><a href="#highlevel">High Level Structure</a>
-    <ol>
-      <li><a href="#modulestructure">Module Structure</a></li>
-      <li><a href="#linkage">Linkage Types</a>
-        <ol>
-          <li><a href="#linkage_private">'<tt>private</tt>' Linkage</a></li>
-          <li><a href="#linkage_linker_private">'<tt>linker_private</tt>' Linkage</a></li>
-          <li><a href="#linkage_linker_private_weak">'<tt>linker_private_weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_internal">'<tt>internal</tt>' Linkage</a></li>
-          <li><a href="#linkage_available_externally">'<tt>available_externally</tt>' Linkage</a></li>
-          <li><a href="#linkage_linkonce">'<tt>linkonce</tt>' Linkage</a></li>
-          <li><a href="#linkage_common">'<tt>common</tt>' Linkage</a></li>
-          <li><a href="#linkage_weak">'<tt>weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_appending">'<tt>appending</tt>' Linkage</a></li>
-          <li><a href="#linkage_externweak">'<tt>extern_weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_linkonce_odr">'<tt>linkonce_odr</tt>' Linkage</a></li>
-          <li><a href="#linkage_linkonce_odr_auto_hide">'<tt>linkonce_odr_auto_hide</tt>' Linkage</a></li>
-          <li><a href="#linkage_weak">'<tt>weak_odr</tt>' Linkage</a></li>
-          <li><a href="#linkage_external">'<tt>external</tt>' Linkage</a></li>
-          <li><a href="#linkage_dllimport">'<tt>dllimport</tt>' Linkage</a></li>
-          <li><a href="#linkage_dllexport">'<tt>dllexport</tt>' Linkage</a></li>
-        </ol>
-      </li>
-      <li><a href="#callingconv">Calling Conventions</a></li>
-      <li><a href="#namedtypes">Named Types</a></li>
-      <li><a href="#globalvars">Global Variables</a></li>
-      <li><a href="#functionstructure">Functions</a></li>
-      <li><a href="#aliasstructure">Aliases</a></li>
-      <li><a href="#namedmetadatastructure">Named Metadata</a></li>
-      <li><a href="#paramattrs">Parameter Attributes</a></li>
-      <li><a href="#fnattrs">Function Attributes</a></li>
-      <li><a href="#gc">Garbage Collector Names</a></li>
-      <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
-      <li><a href="#datalayout">Data Layout</a></li>
-      <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
-      <li><a href="#volatile">Volatile Memory Accesses</a></li>
-      <li><a href="#memmodel">Memory Model for Concurrent Operations</a></li>
-      <li><a href="#ordering">Atomic Memory Ordering Constraints</a></li>
-    </ol>
-  </li>
-  <li><a href="#typesystem">Type System</a>
-    <ol>
-      <li><a href="#t_classifications">Type Classifications</a></li>
-      <li><a href="#t_primitive">Primitive Types</a>
-        <ol>
-          <li><a href="#t_integer">Integer Type</a></li>
-          <li><a href="#t_floating">Floating Point Types</a></li>
-          <li><a href="#t_x86mmx">X86mmx Type</a></li>
-          <li><a href="#t_void">Void Type</a></li>
-          <li><a href="#t_label">Label Type</a></li>
-          <li><a href="#t_metadata">Metadata Type</a></li>
-        </ol>
-      </li>
-      <li><a href="#t_derived">Derived Types</a>
-        <ol>
-          <li><a href="#t_aggregate">Aggregate Types</a>
-            <ol>
-              <li><a href="#t_array">Array Type</a></li>
-              <li><a href="#t_struct">Structure Type</a></li>
-              <li><a href="#t_opaque">Opaque Structure Types</a></li>
-              <li><a href="#t_vector">Vector Type</a></li>
-            </ol>
-          </li>
-          <li><a href="#t_function">Function Type</a></li>
-          <li><a href="#t_pointer">Pointer Type</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-  <li><a href="#constants">Constants</a>
-    <ol>
-      <li><a href="#simpleconstants">Simple Constants</a></li>
-      <li><a href="#complexconstants">Complex Constants</a></li>
-      <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
-      <li><a href="#undefvalues">Undefined Values</a></li>
-      <li><a href="#poisonvalues">Poison Values</a></li>
-      <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
-      <li><a href="#constantexprs">Constant Expressions</a></li>
-    </ol>
-  </li>
-  <li><a href="#othervalues">Other Values</a>
-    <ol>
-      <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
-      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a>
-        <ol>
-          <li><a href="#tbaa">'<tt>tbaa</tt>' Metadata</a></li>
-          <li><a href="#tbaa.struct">'<tt>tbaa.struct</tt>' Metadata</a></li>
-          <li><a href="#fpmath">'<tt>fpmath</tt>' Metadata</a></li>
-          <li><a href="#range">'<tt>range</tt>' Metadata</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-  <li><a href="#module_flags">Module Flags Metadata</a>
-    <ol>
-      <li><a href="#objc_gc_flags">Objective-C Garbage Collection Module Flags Metadata</a></li>
-    </ol>
-  </li>
-  <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
-    <ol>
-      <li><a href="#intg_used">The '<tt>llvm.used</tt>' Global Variable</a></li>
-      <li><a href="#intg_compiler_used">The '<tt>llvm.compiler.used</tt>'
-          Global Variable</a></li>
-      <li><a href="#intg_global_ctors">The '<tt>llvm.global_ctors</tt>'
-         Global Variable</a></li>
-      <li><a href="#intg_global_dtors">The '<tt>llvm.global_dtors</tt>'
-         Global Variable</a></li>
-    </ol>
-  </li>
-  <li><a href="#instref">Instruction Reference</a>
-    <ol>
-      <li><a href="#terminators">Terminator Instructions</a>
-        <ol>
-          <li><a href="#i_ret">'<tt>ret</tt>' Instruction</a></li>
-          <li><a href="#i_br">'<tt>br</tt>' Instruction</a></li>
-          <li><a href="#i_switch">'<tt>switch</tt>' Instruction</a></li>
-          <li><a href="#i_indirectbr">'<tt>indirectbr</tt>' Instruction</a></li>
-          <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
-          <li><a href="#i_resume">'<tt>resume</tt>'  Instruction</a></li>
-          <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#binaryops">Binary Operations</a>
-        <ol>
-          <li><a href="#i_add">'<tt>add</tt>' Instruction</a></li>
-          <li><a href="#i_fadd">'<tt>fadd</tt>' Instruction</a></li>
-          <li><a href="#i_sub">'<tt>sub</tt>' Instruction</a></li>
-          <li><a href="#i_fsub">'<tt>fsub</tt>' Instruction</a></li>
-          <li><a href="#i_mul">'<tt>mul</tt>' Instruction</a></li>
-          <li><a href="#i_fmul">'<tt>fmul</tt>' Instruction</a></li>
-          <li><a href="#i_udiv">'<tt>udiv</tt>' Instruction</a></li>
-          <li><a href="#i_sdiv">'<tt>sdiv</tt>' Instruction</a></li>
-          <li><a href="#i_fdiv">'<tt>fdiv</tt>' Instruction</a></li>
-          <li><a href="#i_urem">'<tt>urem</tt>' Instruction</a></li>
-          <li><a href="#i_srem">'<tt>srem</tt>' Instruction</a></li>
-          <li><a href="#i_frem">'<tt>frem</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#bitwiseops">Bitwise Binary Operations</a>
-        <ol>
-          <li><a href="#i_shl">'<tt>shl</tt>' Instruction</a></li>
-          <li><a href="#i_lshr">'<tt>lshr</tt>' Instruction</a></li>
-          <li><a href="#i_ashr">'<tt>ashr</tt>' Instruction</a></li>
-          <li><a href="#i_and">'<tt>and</tt>' Instruction</a></li>
-          <li><a href="#i_or">'<tt>or</tt>'  Instruction</a></li>
-          <li><a href="#i_xor">'<tt>xor</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#vectorops">Vector Operations</a>
-        <ol>
-          <li><a href="#i_extractelement">'<tt>extractelement</tt>' Instruction</a></li>
-          <li><a href="#i_insertelement">'<tt>insertelement</tt>' Instruction</a></li>
-          <li><a href="#i_shufflevector">'<tt>shufflevector</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#aggregateops">Aggregate Operations</a>
-        <ol>
-          <li><a href="#i_extractvalue">'<tt>extractvalue</tt>' Instruction</a></li>
-          <li><a href="#i_insertvalue">'<tt>insertvalue</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#memoryops">Memory Access and Addressing Operations</a>
-        <ol>
-          <li><a href="#i_alloca">'<tt>alloca</tt>' Instruction</a></li>
-         <li><a href="#i_load">'<tt>load</tt>' Instruction</a></li>
-         <li><a href="#i_store">'<tt>store</tt>' Instruction</a></li>
-         <li><a href="#i_fence">'<tt>fence</tt>' Instruction</a></li>
-         <li><a href="#i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a></li>
-         <li><a href="#i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a></li>
-         <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#convertops">Conversion Operations</a>
-        <ol>
-          <li><a href="#i_trunc">'<tt>trunc .. to</tt>' Instruction</a></li>
-          <li><a href="#i_zext">'<tt>zext .. to</tt>' Instruction</a></li>
-          <li><a href="#i_sext">'<tt>sext .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fpext">'<tt>fpext .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a></li>
-          <li><a href="#i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a></li>
-          <li><a href="#i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a></li>
-          <li><a href="#i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a></li>
-          <li><a href="#i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a></li>
-          <li><a href="#i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#otherops">Other Operations</a>
-        <ol>
-          <li><a href="#i_icmp">'<tt>icmp</tt>' Instruction</a></li>
-          <li><a href="#i_fcmp">'<tt>fcmp</tt>' Instruction</a></li>
-          <li><a href="#i_phi">'<tt>phi</tt>'   Instruction</a></li>
-          <li><a href="#i_select">'<tt>select</tt>' Instruction</a></li>
-          <li><a href="#i_call">'<tt>call</tt>'  Instruction</a></li>
-          <li><a href="#i_va_arg">'<tt>va_arg</tt>'  Instruction</a></li>
-          <li><a href="#i_landingpad">'<tt>landingpad</tt>' Instruction</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-  <li><a href="#intrinsics">Intrinsic Functions</a>
-    <ol>
-      <li><a href="#int_varargs">Variable Argument Handling Intrinsics</a>
-        <ol>
-          <li><a href="#int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a></li>
-          <li><a href="#int_va_end">'<tt>llvm.va_end</tt>'   Intrinsic</a></li>
-          <li><a href="#int_va_copy">'<tt>llvm.va_copy</tt>'  Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_gc">Accurate Garbage Collection Intrinsics</a>
-        <ol>
-          <li><a href="#int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a></li>
-          <li><a href="#int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a></li>
-          <li><a href="#int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_codegen">Code Generator Intrinsics</a>
-        <ol>
-          <li><a href="#int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a></li>
-          <li><a href="#int_frameaddress">'<tt>llvm.frameaddress</tt>'   Intrinsic</a></li>
-          <li><a href="#int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a></li>
-          <li><a href="#int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a></li>
-          <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
-          <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
-          <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_libc">Standard C Library Intrinsics</a>
-        <ol>
-          <li><a href="#int_memcpy">'<tt>llvm.memcpy.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_memmove">'<tt>llvm.memmove.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_memset">'<tt>llvm.memset.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_fabs">'<tt>llvm.fabs.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
-        <ol>
-          <li><a href="#int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a></li>
-          <li><a href="#int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic </a></li>
-          <li><a href="#int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic </a></li>
-          <li><a href="#int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic </a></li>
-        </ol>
-      </li>
-      <li><a href="#int_overflow">Arithmetic with Overflow Intrinsics</a>
-        <ol>
-          <li><a href="#int_sadd_overflow">'<tt>llvm.sadd.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_uadd_overflow">'<tt>llvm.uadd.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_ssub_overflow">'<tt>llvm.ssub.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_usub_overflow">'<tt>llvm.usub.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_smul_overflow">'<tt>llvm.smul.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li>
-        </ol>
-      </li>
-      <li><a href="#spec_arithmetic">Specialised Arithmetic Intrinsics</a>
-        <ol>
-          <li><a href="#fmuladd">'<tt>llvm.fmuladd</tt> Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a>
-        <ol>
-          <li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li>
-          <li><a href="#int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_debugger">Debugger intrinsics</a></li>
-      <li><a href="#int_eh">Exception Handling intrinsics</a></li>
-      <li><a href="#int_trampoline">Trampoline Intrinsics</a>
-        <ol>
-          <li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
-          <li><a href="#int_at">'<tt>llvm.adjust.trampoline</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_memorymarkers">Memory Use Markers</a>
-        <ol>
-          <li><a href="#int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a></li>
-          <li><a href="#int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a></li>
-          <li><a href="#int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a></li>
-          <li><a href="#int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_general">General intrinsics</a>
-        <ol>
-          <li><a href="#int_var_annotation">
-            '<tt>llvm.var.annotation</tt>' Intrinsic</a></li>
-          <li><a href="#int_annotation">
-            '<tt>llvm.annotation.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_trap">
-            '<tt>llvm.trap</tt>' Intrinsic</a></li>
-          <li><a href="#int_debugtrap">
-            '<tt>llvm.debugtrap</tt>' Intrinsic</a></li>
-          <li><a href="#int_stackprotector">
-            '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
-          <li><a href="#int_objectsize">
-            '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
-          <li><a href="#int_expect">
-            '<tt>llvm.expect</tt>' Intrinsic</a></li>
-          <li><a href="#int_donothing">
-            '<tt>llvm.donothing</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-            and <a href="mailto:vadve@cs.uiuc.edu">Vikram Adve</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="abstract">Abstract</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document is a reference manual for the LLVM assembly language. LLVM is
-   a Static Single Assignment (SSA) based representation that provides type
-   safety, low-level operations, flexibility, and the capability of representing
-   'all' high-level languages cleanly.  It is the common code representation
-   used throughout all phases of the LLVM compilation strategy.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM code representation is designed to be used in three different forms:
-   as an in-memory compiler IR, as an on-disk bitcode representation (suitable
-   for fast loading by a Just-In-Time compiler), and as a human readable
-   assembly language representation.  This allows LLVM to provide a powerful
-   intermediate representation for efficient compiler transformations and
-   analysis, while providing a natural means to debug and visualize the
-   transformations.  The three different forms of LLVM are all equivalent.  This
-   document describes the human readable representation and notation.</p>
-
-<p>The LLVM representation aims to be light-weight and low-level while being
-   expressive, typed, and extensible at the same time.  It aims to be a
-   "universal IR" of sorts, by being at a low enough level that high-level ideas
-   may be cleanly mapped to it (similar to how microprocessors are "universal
-   IR's", allowing many source languages to be mapped to them).  By providing
-   type information, LLVM can be used as the target of optimizations: for
-   example, through pointer analysis, it can be proven that a C automatic
-   variable is never accessed outside of the current function, allowing it to
-   be promoted to a simple SSA value instead of a memory location.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="wellformed">Well-Formedness</a>
-</h4>
-
-<div>
-
-<p>It is important to note that this document describes 'well formed' LLVM
-   assembly language.  There is a difference between what the parser accepts and
-   what is considered 'well formed'.  For example, the following instruction is
-   syntactically okay, but not well formed:</p>
-
-<pre class="doc_code">
-%x = <a href="#i_add">add</a> i32 1, %x
-</pre>
-
-<p>because the definition of <tt>%x</tt> does not dominate all of its uses. The
-   LLVM infrastructure provides a verification pass that may be used to verify
-   that an LLVM module is well formed.  This pass is automatically run by the
-   parser after parsing input assembly and by the optimizer before it outputs
-   bitcode.  The violations pointed out by the verifier pass indicate bugs in
-   transformation passes or input to the parser.</p>
-
-</div>
-
-</div>
-
-<!-- Describe the typesetting conventions here. -->
-
-<!-- *********************************************************************** -->
-<h2><a name="identifiers">Identifiers</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM identifiers come in two basic types: global and local. Global
-   identifiers (functions, global variables) begin with the <tt>'@'</tt>
-   character. Local identifiers (register names, types) begin with
-   the <tt>'%'</tt> character. Additionally, there are three different formats
-   for identifiers, for different purposes:</p>
-
-<ol>
-  <li>Named values are represented as a string of characters with their prefix.
-      For example, <tt>%foo</tt>, <tt>@DivisionByZero</tt>,
-      <tt>%a.really.long.identifier</tt>. The actual regular expression used is
-      '<tt>[%@][a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.  Identifiers which require
-      other characters in their names can be surrounded with quotes. Special
-      characters may be escaped using <tt>"\xx"</tt> where <tt>xx</tt> is the
-      ASCII code for the character in hexadecimal.  In this way, any character
-      can be used in a name value, even quotes themselves.</li>
-
-  <li>Unnamed values are represented as an unsigned numeric value with their
-      prefix.  For example, <tt>%12</tt>, <tt>@2</tt>, <tt>%44</tt>.</li>
-
-  <li>Constants, which are described in a <a href="#constants">section about
-      constants</a>, below.</li>
-</ol>
-
-<p>LLVM requires that values start with a prefix for two reasons: Compilers
-   don't need to worry about name clashes with reserved words, and the set of
-   reserved words may be expanded in the future without penalty.  Additionally,
-   unnamed identifiers allow a compiler to quickly come up with a temporary
-   variable without having to avoid symbol table conflicts.</p>
-
-<p>Reserved words in LLVM are very similar to reserved words in other
-   languages. There are keywords for different opcodes
-   ('<tt><a href="#i_add">add</a></tt>',
-   '<tt><a href="#i_bitcast">bitcast</a></tt>',
-   '<tt><a href="#i_ret">ret</a></tt>', etc...), for primitive type names
-   ('<tt><a href="#t_void">void</a></tt>',
-   '<tt><a href="#t_primitive">i32</a></tt>', etc...), and others.  These
-   reserved words cannot conflict with variable names, because none of them
-   start with a prefix character (<tt>'%'</tt> or <tt>'@'</tt>).</p>
-
-<p>Here is an example of LLVM code to multiply the integer variable
-   '<tt>%X</tt>' by 8:</p>
-
-<p>The easy way:</p>
-
-<pre class="doc_code">
-%result = <a href="#i_mul">mul</a> i32 %X, 8
-</pre>
-
-<p>After strength reduction:</p>
-
-<pre class="doc_code">
-%result = <a href="#i_shl">shl</a> i32 %X, i8 3
-</pre>
-
-<p>And the hard way:</p>
-
-<pre class="doc_code">
-%0 = <a href="#i_add">add</a> i32 %X, %X           <i>; yields {i32}:%0</i>
-%1 = <a href="#i_add">add</a> i32 %0, %0           <i>; yields {i32}:%1</i>
-%result = <a href="#i_add">add</a> i32 %1, %1
-</pre>
-
-<p>This last way of multiplying <tt>%X</tt> by 8 illustrates several important
-   lexical features of LLVM:</p>
-
-<ol>
-  <li>Comments are delimited with a '<tt>;</tt>' and go until the end of
-      line.</li>
-
-  <li>Unnamed temporaries are created when the result of a computation is not
-      assigned to a named value.</li>
-
-  <li>Unnamed temporaries are numbered sequentially</li>
-</ol>
-
-<p>It also shows a convention that we follow in this document.  When
-   demonstrating instructions, we will follow an instruction with a comment that
-   defines the type and name of value produced.  Comments are shown in italic
-   text.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="highlevel">High Level Structure</a></h2>
-<!-- *********************************************************************** -->
-<div>
-<!-- ======================================================================= -->
-<h3>
-  <a name="modulestructure">Module Structure</a>
-</h3>
-
-<div>
-
-<p>LLVM programs are composed of <tt>Module</tt>s, each of which is a
-   translation unit of the input programs.  Each module consists of functions,
-   global variables, and symbol table entries.  Modules may be combined together
-   with the LLVM linker, which merges function (and global variable)
-   definitions, resolves forward declarations, and merges symbol table
-   entries. Here is an example of the "hello world" module:</p>
-
-<pre class="doc_code">
-<i>; Declare the string constant as a global constant.</i>&nbsp;
-<a href="#identifiers">@.str</a> = <a href="#linkage_private">private</a>&nbsp;<a href="#globalvars">unnamed_addr</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"&nbsp;
-
-<i>; External declaration of the puts function</i>&nbsp;
-<a href="#functionstructure">declare</a> i32 @puts(i8* <a href="#nocapture">nocapture</a>) <a href="#fnattrs">nounwind</a>&nbsp;
-
-<i>; Definition of main function</i>
-define i32 @main() {   <i>; i32()* </i>&nbsp;
-  <i>; Convert [13 x i8]* to i8  *...</i>&nbsp;
-  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.str, i64 0, i64 0
-
-  <i>; Call puts function to write out the string to stdout.</i>&nbsp;
-  <a href="#i_call">call</a> i32 @puts(i8* %cast210)
-  <a href="#i_ret">ret</a> i32 0&nbsp;
-}
-
-<i>; Named metadata</i>
-!1 = metadata !{i32 42}
-!foo = !{!1, null}
-</pre>
-
-<p>This example is made up of a <a href="#globalvars">global variable</a> named
-   "<tt>.str</tt>", an external declaration of the "<tt>puts</tt>" function,
-   a <a href="#functionstructure">function definition</a> for
-   "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a> 
-   "<tt>foo</tt>".</p>
-
-<p>In general, a module is made up of a list of global values (where both
-   functions and global variables are global values). Global values are
-   represented by a pointer to a memory location (in this case, a pointer to an
-   array of char, and a pointer to a function), and have one of the
-   following <a href="#linkage">linkage types</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="linkage">Linkage Types</a>
-</h3>
-
-<div>
-
-<p>All Global Variables and Functions have one of the following types of
-   linkage:</p>
-
-<dl>
-  <dt><tt><b><a name="linkage_private">private</a></b></tt></dt>
-  <dd>Global values with "<tt>private</tt>" linkage are only directly accessible
-      by objects in the current module. In particular, linking code into a
-      module with an private global value may cause the private to be renamed as
-      necessary to avoid collisions.  Because the symbol is private to the
-      module, all references can be updated. This doesn't show up in any symbol
-      table in the object file.</dd>
-
-  <dt><tt><b><a name="linkage_linker_private">linker_private</a></b></tt></dt>
-  <dd>Similar to <tt>private</tt>, but the symbol is passed through the
-      assembler and evaluated by the linker. Unlike normal strong symbols, they
-      are removed by the linker from the final linked image (executable or
-      dynamic library).</dd>
-
-  <dt><tt><b><a name="linkage_linker_private_weak">linker_private_weak</a></b></tt></dt>
-  <dd>Similar to "<tt>linker_private</tt>", but the symbol is weak. Note that
-      <tt>linker_private_weak</tt> symbols are subject to coalescing by the
-      linker. The symbols are removed by the linker from the final linked image
-      (executable or dynamic library).</dd>
-
-  <dt><tt><b><a name="linkage_internal">internal</a></b></tt></dt>
-  <dd>Similar to private, but the value shows as a local symbol
-      (<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
-      corresponds to the notion of the '<tt>static</tt>' keyword in C.</dd>
-
-  <dt><tt><b><a name="linkage_available_externally">available_externally</a></b></tt></dt>
-  <dd>Globals with "<tt>available_externally</tt>" linkage are never emitted
-      into the object file corresponding to the LLVM module.  They exist to
-      allow inlining and other optimizations to take place given knowledge of
-      the definition of the global, which is known to be somewhere outside the
-      module.  Globals with <tt>available_externally</tt> linkage are allowed to
-      be discarded at will, and are otherwise the same as <tt>linkonce_odr</tt>.
-      This linkage type is only allowed on definitions, not declarations.</dd>
-
-  <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt></dt>
-  <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
-      the same name when linkage occurs.  This can be used to implement
-      some forms of inline functions, templates, or other code which must be
-      generated in each translation unit that uses it, but where the body may
-      be overridden with a more definitive definition later.  Unreferenced
-      <tt>linkonce</tt> globals are allowed to be discarded.  Note that
-      <tt>linkonce</tt> linkage does not actually allow the optimizer to
-      inline the body of this function into callers because it doesn't know if
-      this definition of the function is the definitive definition within the
-      program or whether it will be overridden by a stronger definition.
-      To enable inlining and other optimizations, use "<tt>linkonce_odr</tt>"
-      linkage.</dd>
-
-  <dt><tt><b><a name="linkage_weak">weak</a></b></tt></dt>
-  <dd>"<tt>weak</tt>" linkage has the same merging semantics as
-      <tt>linkonce</tt> linkage, except that unreferenced globals with
-      <tt>weak</tt> linkage may not be discarded.  This is used for globals that
-      are declared "weak" in C source code.</dd>
-
-  <dt><tt><b><a name="linkage_common">common</a></b></tt></dt>
-  <dd>"<tt>common</tt>" linkage is most similar to "<tt>weak</tt>" linkage, but
-      they are used for tentative definitions in C, such as "<tt>int X;</tt>" at
-      global scope.
-      Symbols with "<tt>common</tt>" linkage are merged in the same way as
-      <tt>weak symbols</tt>, and they may not be deleted if unreferenced.
-      <tt>common</tt> symbols may not have an explicit section,
-      must have a zero initializer, and may not be marked '<a
-      href="#globalvars"><tt>constant</tt></a>'.  Functions and aliases may not
-      have common linkage.</dd>
-
-
-  <dt><tt><b><a name="linkage_appending">appending</a></b></tt></dt>
-  <dd>"<tt>appending</tt>" linkage may only be applied to global variables of
-      pointer to array type.  When two global variables with appending linkage
-      are linked together, the two global arrays are appended together.  This is
-      the LLVM, typesafe, equivalent of having the system linker append together
-      "sections" with identical names when .o files are linked.</dd>
-
-  <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt></dt>
-  <dd>The semantics of this linkage follow the ELF object file model: the symbol
-      is weak until linked, if not linked, the symbol becomes null instead of
-      being an undefined reference.</dd>
-
-  <dt><tt><b><a name="linkage_linkonce_odr">linkonce_odr</a></b></tt></dt>
-  <dt><tt><b><a name="linkage_weak_odr">weak_odr</a></b></tt></dt>
-  <dd>Some languages allow differing globals to be merged, such as two functions
-      with different semantics.  Other languages, such as <tt>C++</tt>, ensure
-      that only equivalent globals are ever merged (the "one definition rule"
-      &mdash; "ODR").  Such languages can use the <tt>linkonce_odr</tt>
-      and <tt>weak_odr</tt> linkage types to indicate that the global will only
-      be merged with equivalent globals.  These linkage types are otherwise the
-      same as their non-<tt>odr</tt> versions.</dd>
-
-  <dt><tt><b><a name="linkage_linkonce_odr_auto_hide">linkonce_odr_auto_hide</a></b></tt></dt>
-  <dd>Similar to "<tt>linkonce_odr</tt>", but nothing in the translation unit
-      takes the address of this definition. For instance, functions that had an
-      inline definition, but the compiler decided not to inline it.
-      <tt>linkonce_odr_auto_hide</tt> may have only <tt>default</tt> visibility.
-      The symbols are removed by the linker from the final linked image
-      (executable or dynamic library).</dd>
-
-  <dt><tt><b><a name="linkage_external">external</a></b></tt></dt>
-  <dd>If none of the above identifiers are used, the global is externally
-      visible, meaning that it participates in linkage and can be used to
-      resolve external symbol references.</dd>
-</dl>
-
-<p>The next two types of linkage are targeted for Microsoft Windows platform
-   only. They are designed to support importing (exporting) symbols from (to)
-   DLLs (Dynamic Link Libraries).</p>
-
-<dl>
-  <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt></dt>
-  <dd>"<tt>dllimport</tt>" linkage causes the compiler to reference a function
-      or variable via a global pointer to a pointer that is set up by the DLL
-      exporting the symbol. On Microsoft Windows targets, the pointer name is
-      formed by combining <code>__imp_</code> and the function or variable
-      name.</dd>
-
-  <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt></dt>
-  <dd>"<tt>dllexport</tt>" linkage causes the compiler to provide a global
-      pointer to a pointer in a DLL, so that it can be referenced with the
-      <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
-      name is formed by combining <code>__imp_</code> and the function or
-      variable name.</dd>
-</dl>
-
-<p>For example, since the "<tt>.LC0</tt>" variable is defined to be internal, if
-   another module defined a "<tt>.LC0</tt>" variable and was linked with this
-   one, one of the two would be renamed, preventing a collision.  Since
-   "<tt>main</tt>" and "<tt>puts</tt>" are external (i.e., lacking any linkage
-   declarations), they are accessible outside of the current module.</p>
-
-<p>It is illegal for a function <i>declaration</i> to have any linkage type
-   other than <tt>external</tt>, <tt>dllimport</tt>
-  or <tt>extern_weak</tt>.</p>
-
-<p>Aliases can have only <tt>external</tt>, <tt>internal</tt>, <tt>weak</tt>
-   or <tt>weak_odr</tt> linkages.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="callingconv">Calling Conventions</a>
-</h3>
-
-<div>
-
-<p>LLVM <a href="#functionstructure">functions</a>, <a href="#i_call">calls</a>
-   and <a href="#i_invoke">invokes</a> can all have an optional calling
-   convention specified for the call.  The calling convention of any pair of
-   dynamic caller/callee must match, or the behavior of the program is
-   undefined.  The following calling conventions are supported by LLVM, and more
-   may be added in the future:</p>
-
-<dl>
-  <dt><b>"<tt>ccc</tt>" - The C calling convention</b>:</dt>
-  <dd>This calling convention (the default if no other calling convention is
-      specified) matches the target C calling conventions.  This calling
-      convention supports varargs function calls and tolerates some mismatch in
-      the declared prototype and implemented declaration of the function (as
-      does normal C).</dd>
-
-  <dt><b>"<tt>fastcc</tt>" - The fast calling convention</b>:</dt>
-  <dd>This calling convention attempts to make calls as fast as possible
-      (e.g. by passing things in registers).  This calling convention allows the
-      target to use whatever tricks it wants to produce fast code for the
-      target, without having to conform to an externally specified ABI
-      (Application Binary Interface).
-      <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
-      when this or the GHC convention is used.</a>  This calling convention
-      does not support varargs and requires the prototype of all callees to
-      exactly match the prototype of the function definition.</dd>
-
-  <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
-  <dd>This calling convention attempts to make code in the caller as efficient
-      as possible under the assumption that the call is not commonly executed.
-      As such, these calls often preserve all registers so that the call does
-      not break any live ranges in the caller side.  This calling convention
-      does not support varargs and requires the prototype of all callees to
-      exactly match the prototype of the function definition.</dd>
-
-  <dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
-  <dd>This calling convention has been implemented specifically for use by the
-      <a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
-      It passes everything in registers, going to extremes to achieve this by
-      disabling callee save registers. This calling convention should not be
-      used lightly but only for specific situations such as an alternative to
-      the <em>register pinning</em> performance technique often used when
-      implementing functional programming languages.At the moment only X86
-      supports this convention and it has the following limitations:
-      <ul>
-        <li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
-            floating point types are supported.</li>
-        <li>On <em>X86-64</em> only supports up to 10 bit type parameters and
-            6 floating point parameters.</li>
-      </ul>
-      This calling convention supports
-      <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
-      requires both the caller and callee are using it.
-  </dd>
-
-  <dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
-  <dd>Any calling convention may be specified by number, allowing
-      target-specific calling conventions to be used.  Target specific calling
-      conventions start at 64.</dd>
-</dl>
-
-<p>More calling conventions can be added/defined on an as-needed basis, to
-   support Pascal conventions or any other well-known target-independent
-   convention.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="visibility">Visibility Styles</a>
-</h3>
-
-<div>
-
-<p>All Global Variables and Functions have one of the following visibility
-   styles:</p>
-
-<dl>
-  <dt><b>"<tt>default</tt>" - Default style</b>:</dt>
-  <dd>On targets that use the ELF object file format, default visibility means
-      that the declaration is visible to other modules and, in shared libraries,
-      means that the declared entity may be overridden. On Darwin, default
-      visibility means that the declaration is visible to other modules. Default
-      visibility corresponds to "external linkage" in the language.</dd>
-
-  <dt><b>"<tt>hidden</tt>" - Hidden style</b>:</dt>
-  <dd>Two declarations of an object with hidden visibility refer to the same
-      object if they are in the same shared object. Usually, hidden visibility
-      indicates that the symbol will not be placed into the dynamic symbol
-      table, so no other module (executable or shared library) can reference it
-      directly.</dd>
-
-  <dt><b>"<tt>protected</tt>" - Protected style</b>:</dt>
-  <dd>On ELF, protected visibility indicates that the symbol will be placed in
-      the dynamic symbol table, but that references within the defining module
-      will bind to the local symbol. That is, the symbol cannot be overridden by
-      another module.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="namedtypes">Named Types</a>
-</h3>
-
-<div>
-
-<p>LLVM IR allows you to specify name aliases for certain types.  This can make
-   it easier to read the IR and make the IR more condensed (particularly when
-   recursive types are involved).  An example of a name specification is:</p>
-
-<pre class="doc_code">
-%mytype = type { %mytype*, i32 }
-</pre>
-
-<p>You may give a name to any <a href="#typesystem">type</a> except
-   "<a href="#t_void">void</a>".  Type name aliases may be used anywhere a type
-   is expected with the syntax "%mytype".</p>
-
-<p>Note that type names are aliases for the structural type that they indicate,
-   and that you can therefore specify multiple names for the same type.  This
-   often leads to confusing behavior when dumping out a .ll file.  Since LLVM IR
-   uses structural typing, the name is not part of the type.  When printing out
-   LLVM IR, the printer will pick <em>one name</em> to render all types of a
-   particular shape.  This means that if you have code where two different
-   source types end up having the same LLVM type, that the dumper will sometimes
-   print the "wrong" or unexpected type.  This is an important design point and
-   isn't going to change.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="globalvars">Global Variables</a>
-</h3>
-
-<div>
-
-<p>Global variables define regions of memory allocated at compilation time
-   instead of run-time.  Global variables may optionally be initialized, may
-   have an explicit section to be placed in, and may have an optional explicit
-   alignment specified.</p>
-
-<p>A variable may be defined as <tt>thread_local</tt>, which
-   means that it will not be shared by threads (each thread will have a
-   separated copy of the variable).  Not all targets support thread-local
-   variables.  Optionally, a TLS model may be specified:</p>
-
-<dl>
-  <dt><b><tt>localdynamic</tt></b>:</dt>
-  <dd>For variables that are only used within the current shared library.</dd>
-
-  <dt><b><tt>initialexec</tt></b>:</dt>
-  <dd>For variables in modules that will not be loaded dynamically.</dd>
-
-  <dt><b><tt>localexec</tt></b>:</dt>
-  <dd>For variables defined in the executable and only used within it.</dd>
-</dl>
-
-<p>The models correspond to the ELF TLS models; see
-   <a href="http://people.redhat.com/drepper/tls.pdf">ELF
-   Handling For Thread-Local Storage</a> for more information on under which
-   circumstances the different models may be used.  The target may choose a
-   different TLS model if the specified model is not supported, or if a better
-   choice of model can be made.</p>
-
-<p>A variable may be defined as a global
-   "constant," which indicates that the contents of the variable
-   will <b>never</b> be modified (enabling better optimization, allowing the
-   global data to be placed in the read-only section of an executable, etc).
-   Note that variables that need runtime initialization cannot be marked
-   "constant" as there is a store to the variable.</p>
-
-<p>LLVM explicitly allows <em>declarations</em> of global variables to be marked
-   constant, even if the final definition of the global is not.  This capability
-   can be used to enable slightly better optimization of the program, but
-   requires the language definition to guarantee that optimizations based on the
-   'constantness' are valid for the translation units that do not include the
-   definition.</p>
-
-<p>As SSA values, global variables define pointer values that are in scope
-   (i.e. they dominate) all basic blocks in the program.  Global variables
-   always define a pointer to their "content" type because they describe a
-   region of memory, and all memory objects in LLVM are accessed through
-   pointers.</p>
-
-<p>Global variables can be marked with <tt>unnamed_addr</tt> which indicates
-  that the address is not significant, only the content. Constants marked
-  like this can be merged with other constants if they have the same
-  initializer. Note that a constant with significant address <em>can</em>
-  be merged with a <tt>unnamed_addr</tt> constant, the result being a
-  constant whose address is significant.</p>
-
-<p>A global variable may be declared to reside in a target-specific numbered
-   address space. For targets that support them, address spaces may affect how
-   optimizations are performed and/or what target instructions are used to
-   access the variable. The default address space is zero. The address space
-   qualifier must precede any other attributes.</p>
-
-<p>LLVM allows an explicit section to be specified for globals.  If the target
-   supports it, it will emit globals to the section specified.</p>
-
-<p>An explicit alignment may be specified for a global, which must be a power
-   of 2.  If not present, or if the alignment is set to zero, the alignment of
-   the global is set by the target to whatever it feels convenient.  If an
-   explicit alignment is specified, the global is forced to have exactly that
-   alignment.  Targets and optimizers are not allowed to over-align the global
-   if the global has an assigned section.  In this case, the extra alignment
-   could be observable: for example, code could assume that the globals are
-   densely packed in their section and try to iterate over them as an array,
-   alignment padding would break this iteration.</p>
-
-<p>For example, the following defines a global in a numbered address space with
-   an initializer, section, and alignment:</p>
-
-<pre class="doc_code">
-@G = addrspace(5) constant float 1.0, section "foo", align 4
-</pre>
-
-<p>The following example defines a thread-local global with
-   the <tt>initialexec</tt> TLS model:</p>
-
-<pre class="doc_code">
-@G = thread_local(initialexec) global i32 0, align 4
-</pre>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="functionstructure">Functions</a>
-</h3>
-
-<div>
-
-<p>LLVM function definitions consist of the "<tt>define</tt>" keyword, an
-   optional <a href="#linkage">linkage type</a>, an optional
-   <a href="#visibility">visibility style</a>, an optional
-   <a href="#callingconv">calling convention</a>,
-   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
-   <a href="#paramattrs">parameter attribute</a> for the return type, a function
-   name, a (possibly empty) argument list (each with optional
-   <a href="#paramattrs">parameter attributes</a>), optional
-   <a href="#fnattrs">function attributes</a>, an optional section, an optional
-   alignment, an optional <a href="#gc">garbage collector name</a>, an opening
-   curly brace, a list of basic blocks, and a closing curly brace.</p>
-
-<p>LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
-   optional <a href="#linkage">linkage type</a>, an optional
-   <a href="#visibility">visibility style</a>, an optional
-   <a href="#callingconv">calling convention</a>,
-   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
-   <a href="#paramattrs">parameter attribute</a> for the return type, a function
-   name, a possibly empty list of arguments, an optional alignment, and an
-   optional <a href="#gc">garbage collector name</a>.</p>
-
-<p>A function definition contains a list of basic blocks, forming the CFG
-   (Control Flow Graph) for the function.  Each basic block may optionally start
-   with a label (giving the basic block a symbol table entry), contains a list
-   of instructions, and ends with a <a href="#terminators">terminator</a>
-   instruction (such as a branch or function return).</p>
-
-<p>The first basic block in a function is special in two ways: it is immediately
-   executed on entrance to the function, and it is not allowed to have
-   predecessor basic blocks (i.e. there can not be any branches to the entry
-   block of a function).  Because the block can have no predecessors, it also
-   cannot have any <a href="#i_phi">PHI nodes</a>.</p>
-
-<p>LLVM allows an explicit section to be specified for functions.  If the target
-   supports it, it will emit functions to the section specified.</p>
-
-<p>An explicit alignment may be specified for a function.  If not present, or if
-   the alignment is set to zero, the alignment of the function is set by the
-   target to whatever it feels convenient.  If an explicit alignment is
-   specified, the function is forced to have at least that much alignment.  All
-   alignments must be a power of 2.</p>
-
-<p>If the <tt>unnamed_addr</tt> attribute is given, the address is know to not
-   be significant and two identical functions can be merged.</p>
-
-<h5>Syntax:</h5>
-<pre class="doc_code">
-define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
-       [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>]
-       &lt;ResultType&gt; @&lt;FunctionName&gt; ([argument list])
-       [<a href="#fnattrs">fn Attrs</a>] [section "name"] [align N]
-       [<a href="#gc">gc</a>] { ... }
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="aliasstructure">Aliases</a>
-</h3>
-
-<div>
-
-<p>Aliases act as "second name" for the aliasee value (which can be either
-   function, global variable, another alias or bitcast of global value). Aliases
-   may have an optional <a href="#linkage">linkage type</a>, and an
-   optional <a href="#visibility">visibility style</a>.</p>
-
-<h5>Syntax:</h5>
-<pre class="doc_code">
-@&lt;Name&gt; = alias [Linkage] [Visibility] &lt;AliaseeTy&gt; @&lt;Aliasee&gt;
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="namedmetadatastructure">Named Metadata</a>
-</h3>
-
-<div>
-
-<p>Named metadata is a collection of metadata. <a href="#metadata">Metadata
-   nodes</a> (but not metadata strings) are the only valid operands for
-   a named metadata.</p>
-
-<h5>Syntax:</h5>
-<pre class="doc_code">
-; Some unnamed metadata nodes, which are referenced by the named metadata.
-!0 = metadata !{metadata !"zero"}
-!1 = metadata !{metadata !"one"}
-!2 = metadata !{metadata !"two"}
-; A named metadata.
-!name = !{!0, !1, !2}
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="paramattrs">Parameter Attributes</a>
-</h3>
-
-<div>
-
-<p>The return type and each parameter of a function type may have a set of
-   <i>parameter attributes</i> associated with them. Parameter attributes are
-   used to communicate additional information about the result or parameters of
-   a function. Parameter attributes are considered to be part of the function,
-   not of the function type, so functions with different parameter attributes
-   can have the same function type.</p>
-
-<p>Parameter attributes are simple keywords that follow the type specified. If
-   multiple parameter attributes are needed, they are space separated. For
-   example:</p>
-
-<pre class="doc_code">
-declare i32 @printf(i8* noalias nocapture, ...)
-declare i32 @atoi(i8 zeroext)
-declare signext i8 @returns_signed_char()
-</pre>
-
-<p>Note that any attributes for the function result (<tt>nounwind</tt>,
-   <tt>readonly</tt>) come immediately after the argument list.</p>
-
-<p>Currently, only the following parameter attributes are defined:</p>
-
-<dl>
-  <dt><tt><b>zeroext</b></tt></dt>
-  <dd>This indicates to the code generator that the parameter or return value
-      should be zero-extended to the extent required by the target's ABI (which
-      is usually 32-bits, but is 8-bits for a i1 on x86-64) by the caller (for a
-      parameter) or the callee (for a return value).</dd>
-
-  <dt><tt><b>signext</b></tt></dt>
-  <dd>This indicates to the code generator that the parameter or return value
-      should be sign-extended to the extent required by the target's ABI (which
-      is usually 32-bits) by the caller (for a parameter) or the callee (for a
-      return value).</dd>
-
-  <dt><tt><b>inreg</b></tt></dt>
-  <dd>This indicates that this parameter or return value should be treated in a
-      special target-dependent fashion during while emitting code for a function
-      call or return (usually, by putting it in a register as opposed to memory,
-      though some targets use it to distinguish between two different kinds of
-      registers).  Use of this attribute is target-specific.</dd>
-
-  <dt><tt><b><a name="byval">byval</a></b></tt></dt>
-  <dd><p>This indicates that the pointer parameter should really be passed by
-      value to the function.  The attribute implies that a hidden copy of the
-      pointee
-      is made between the caller and the callee, so the callee is unable to
-      modify the value in the caller.  This attribute is only valid on LLVM
-      pointer arguments.  It is generally used to pass structs and arrays by
-      value, but is also valid on pointers to scalars.  The copy is considered
-      to belong to the caller not the callee (for example,
-      <tt><a href="#readonly">readonly</a></tt> functions should not write to
-      <tt>byval</tt> parameters). This is not a valid attribute for return
-      values.</p>
-      
-      <p>The byval attribute also supports specifying an alignment with
-      the align attribute.  It indicates the alignment of the stack slot to
-      form and the known alignment of the pointer specified to the call site. If
-      the alignment is not specified, then the code generator makes a
-      target-specific assumption.</p></dd>
-
-  <dt><tt><b><a name="sret">sret</a></b></tt></dt>
-  <dd>This indicates that the pointer parameter specifies the address of a
-      structure that is the return value of the function in the source program.
-      This pointer must be guaranteed by the caller to be valid: loads and
-      stores to the structure may be assumed by the callee to not to trap and
-      to be properly aligned.  This may only be applied to the first parameter.
-      This is not a valid attribute for return values. </dd>
-
-  <dt><tt><b><a name="noalias">noalias</a></b></tt></dt>
-  <dd>This indicates that pointer values
-      <a href="#pointeraliasing"><i>based</i></a> on the argument or return
-      value do not alias pointer values which are not <i>based</i> on it,
-      ignoring certain "irrelevant" dependencies.
-      For a call to the parent function, dependencies between memory
-      references from before or after the call and from those during the call
-      are "irrelevant" to the <tt>noalias</tt> keyword for the arguments and
-      return value used in that call.
-      The caller shares the responsibility with the callee for ensuring that
-      these requirements are met.
-      For further details, please see the discussion of the NoAlias response in
-      <a href="AliasAnalysis.html#MustMayNo">alias analysis</a>.<br>
-<br>
-      Note that this definition of <tt>noalias</tt> is intentionally
-      similar to the definition of <tt>restrict</tt> in C99 for function
-      arguments, though it is slightly weaker.
-<br>
-      For function return values, C99's <tt>restrict</tt> is not meaningful,
-      while LLVM's <tt>noalias</tt> is.
-      </dd>
-
-  <dt><tt><b><a name="nocapture">nocapture</a></b></tt></dt>
-  <dd>This indicates that the callee does not make any copies of the pointer
-      that outlive the callee itself. This is not a valid attribute for return
-      values.</dd>
-
-  <dt><tt><b><a name="nest">nest</a></b></tt></dt>
-  <dd>This indicates that the pointer parameter can be excised using the
-      <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
-      attribute for return values.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="gc">Garbage Collector Names</a>
-</h3>
-
-<div>
-
-<p>Each function may specify a garbage collector name, which is simply a
-   string:</p>
-
-<pre class="doc_code">
-define void @f() gc "name" { ... }
-</pre>
-
-<p>The compiler declares the supported values of <i>name</i>. Specifying a
-   collector which will cause the compiler to alter its output in order to
-   support the named garbage collection algorithm.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="fnattrs">Function Attributes</a>
-</h3>
-
-<div>
-
-<p>Function attributes are set to communicate additional information about a
-   function. Function attributes are considered to be part of the function, not
-   of the function type, so functions with different parameter attributes can
-   have the same function type.</p>
-
-<p>Function attributes are simple keywords that follow the type specified. If
-   multiple attributes are needed, they are space separated. For example:</p>
-
-<pre class="doc_code">
-define void @f() noinline { ... }
-define void @f() alwaysinline { ... }
-define void @f() alwaysinline optsize { ... }
-define void @f() optsize { ... }
-</pre>
-
-<dl>
-  <dt><tt><b>address_safety</b></tt></dt>
-  <dd>This attribute indicates that the address safety analysis
-  is enabled for this function.  </dd>
-
-  <dt><tt><b>alignstack(&lt;<em>n</em>&gt;)</b></tt></dt>
-  <dd>This attribute indicates that, when emitting the prologue and epilogue,
-      the backend should forcibly align the stack pointer. Specify the
-      desired alignment, which must be a power of two, in parentheses.
-
-  <dt><tt><b>alwaysinline</b></tt></dt>
-  <dd>This attribute indicates that the inliner should attempt to inline this
-      function into callers whenever possible, ignoring any active inlining size
-      threshold for this caller.</dd>
-
-  <dt><tt><b>nonlazybind</b></tt></dt>
-  <dd>This attribute suppresses lazy symbol binding for the function. This
-      may make calls to the function faster, at the cost of extra program
-      startup time if the function is not called during program startup.</dd>
-
-  <dt><tt><b>inlinehint</b></tt></dt>
-  <dd>This attribute indicates that the source code contained a hint that inlining
-      this function is desirable (such as the "inline" keyword in C/C++).  It
-      is just a hint; it imposes no requirements on the inliner.</dd>
-
-  <dt><tt><b>naked</b></tt></dt>
-  <dd>This attribute disables prologue / epilogue emission for the function.
-      This can have very system-specific consequences.</dd>
-
-  <dt><tt><b>noimplicitfloat</b></tt></dt>
-  <dd>This attributes disables implicit floating point instructions.</dd>
-
-  <dt><tt><b>noinline</b></tt></dt>
-  <dd>This attribute indicates that the inliner should never inline this
-      function in any situation. This attribute may not be used together with
-      the <tt>alwaysinline</tt> attribute.</dd>
-
-  <dt><tt><b>noredzone</b></tt></dt>
-  <dd>This attribute indicates that the code generator should not use a red
-      zone, even if the target-specific ABI normally permits it.</dd>
-
-  <dt><tt><b>noreturn</b></tt></dt>
-  <dd>This function attribute indicates that the function never returns
-      normally.  This produces undefined behavior at runtime if the function
-      ever does dynamically return.</dd>
-
-  <dt><tt><b>nounwind</b></tt></dt>
-  <dd>This function attribute indicates that the function never returns with an
-      unwind or exceptional control flow.  If the function does unwind, its
-      runtime behavior is undefined.</dd>
-
-  <dt><tt><b>optsize</b></tt></dt>
-  <dd>This attribute suggests that optimization passes and code generator passes
-      make choices that keep the code size of this function low, and otherwise
-      do optimizations specifically to reduce code size.</dd>
-
-  <dt><tt><b>readnone</b></tt></dt>
-  <dd>This attribute indicates that the function computes its result (or decides
-      to unwind an exception) based strictly on its arguments, without
-      dereferencing any pointer arguments or otherwise accessing any mutable
-      state (e.g. memory, control registers, etc) visible to caller functions.
-      It does not write through any pointer arguments
-      (including <tt><a href="#byval">byval</a></tt> arguments) and never
-      changes any state visible to callers.  This means that it cannot unwind
-      exceptions by calling the <tt>C++</tt> exception throwing methods.</dd>
-
-  <dt><tt><b><a name="readonly">readonly</a></b></tt></dt>
-  <dd>This attribute indicates that the function does not write through any
-      pointer arguments (including <tt><a href="#byval">byval</a></tt>
-      arguments) or otherwise modify any state (e.g. memory, control registers,
-      etc) visible to caller functions.  It may dereference pointer arguments
-      and read state that may be set in the caller.  A readonly function always
-      returns the same value (or unwinds an exception identically) when called
-      with the same set of arguments and global state.  It cannot unwind an
-      exception by calling the <tt>C++</tt> exception throwing methods.</dd>
-
-  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
-  <dd>This attribute indicates that this function can return twice. The
-      C <code>setjmp</code> is an example of such a function.  The compiler
-      disables some optimizations (like tail calls) in the caller of these
-      functions.</dd>
-
-  <dt><tt><b><a name="ssp">ssp</a></b></tt></dt>
-  <dd>This attribute indicates that the function should emit a stack smashing
-      protector. It is in the form of a "canary"&mdash;a random value placed on
-      the stack before the local variables that's checked upon return from the
-      function to see if it has been overwritten. A heuristic is used to
-      determine if a function needs stack protectors or not.<br>
-<br>
-      If a function that has an <tt>ssp</tt> attribute is inlined into a
-      function that doesn't have an <tt>ssp</tt> attribute, then the resulting
-      function will have an <tt>ssp</tt> attribute.</dd>
-
-  <dt><tt><b>sspreq</b></tt></dt>
-  <dd>This attribute indicates that the function should <em>always</em> emit a
-      stack smashing protector. This overrides
-      the <tt><a href="#ssp">ssp</a></tt> function attribute.<br>
-<br>
-      If a function that has an <tt>sspreq</tt> attribute is inlined into a
-      function that doesn't have an <tt>sspreq</tt> attribute or which has
-      an <tt>ssp</tt> attribute, then the resulting function will have
-      an <tt>sspreq</tt> attribute.</dd>
-
-  <dt><tt><b><a name="uwtable">uwtable</a></b></tt></dt>
-  <dd>This attribute indicates that the ABI being targeted requires that
-      an unwind table entry be produce for this function even if we can
-      show that no exceptions passes by it. This is normally the case for
-      the ELF x86-64 abi, but it can be disabled for some compilation
-      units.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="moduleasm">Module-Level Inline Assembly</a>
-</h3>
-
-<div>
-
-<p>Modules may contain "module-level inline asm" blocks, which corresponds to
-   the GCC "file scope inline asm" blocks.  These blocks are internally
-   concatenated by LLVM and treated as a single unit, but may be separated in
-   the <tt>.ll</tt> file if desired.  The syntax is very simple:</p>
-
-<pre class="doc_code">
-module asm "inline asm code goes here"
-module asm "more can go here"
-</pre>
-
-<p>The strings can contain any character by escaping non-printable characters.
-   The escape sequence used is simply "\xx" where "xx" is the two digit hex code
-   for the number.</p>
-
-<p>The inline asm code is simply printed to the machine code .s file when
-   assembly code is generated.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="datalayout">Data Layout</a>
-</h3>
-
-<div>
-
-<p>A module may specify a target specific data layout string that specifies how
-   data is to be laid out in memory. The syntax for the data layout is
-   simply:</p>
-
-<pre class="doc_code">
-target datalayout = "<i>layout specification</i>"
-</pre>
-
-<p>The <i>layout specification</i> consists of a list of specifications
-   separated by the minus sign character ('-').  Each specification starts with
-   a letter and may include other information after the letter to define some
-   aspect of the data layout.  The specifications accepted are as follows:</p>
-
-<dl>
-  <dt><tt>E</tt></dt>
-  <dd>Specifies that the target lays out data in big-endian form. That is, the
-      bits with the most significance have the lowest address location.</dd>
-
-  <dt><tt>e</tt></dt>
-  <dd>Specifies that the target lays out data in little-endian form. That is,
-      the bits with the least significance have the lowest address
-      location.</dd>
-
-  <dt><tt>S<i>size</i></tt></dt>
-  <dd>Specifies the natural alignment of the stack in bits. Alignment promotion
-      of stack variables is limited to the natural stack alignment to avoid
-      dynamic stack realignment. The stack alignment must be a multiple of
-      8-bits. If omitted, the natural stack alignment defaults to "unspecified",
-      which does not prevent any alignment promotions.</dd>
-
-  <dt><tt>p[n]:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
-      <i>preferred</i> alignments for address space <i>n</i>. All sizes are in
-      bits. Specifying the <i>pref</i> alignment is optional. If omitted, the
-      preceding <tt>:</tt> should be omitted too. The address space,
-      <i>n</i> is optional, and if not specified, denotes the default address
-      space 0. The value of <i>n</i> must be in the range [1,2^23).</dd>
-
-  <dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for an integer type of a given bit
-      <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
-
-  <dt><tt>v<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a vector type of a given bit
-      <i>size</i>.</dd>
-
-  <dt><tt>f<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a floating point type of a given bit
-      <i>size</i>. Only values of <i>size</i> that are supported by the target
-      will work.  32 (float) and 64 (double) are supported on all targets;
-      80 or 128 (different flavors of long double) are also supported on some
-      targets.
-
-  <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for an aggregate type of a given bit
-      <i>size</i>.</dd>
-
-  <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a stack object of a given bit
-      <i>size</i>.</dd>
-
-  <dt><tt>n<i>size1</i>:<i>size2</i>:<i>size3</i>...</tt></dt>
-  <dd>This specifies a set of native integer widths for the target CPU
-      in bits.  For example, it might contain "n32" for 32-bit PowerPC,
-      "n32:64" for PowerPC 64, or "n8:16:32:64" for X86-64.  Elements of
-      this set are considered to support most general arithmetic
-      operations efficiently.</dd>
-</dl>
-
-<p>When constructing the data layout for a given target, LLVM starts with a
-   default set of specifications which are then (possibly) overridden by the
-   specifications in the <tt>datalayout</tt> keyword. The default specifications
-   are given in this list:</p>
-
-<ul>
-  <li><tt>E</tt> - big endian</li>
-  <li><tt>p:64:64:64</tt> - 64-bit pointers with 64-bit alignment</li>
-  <li><tt>p1:32:32:32</tt> - 32-bit pointers with 32-bit alignment for
-  address space 1</li>
-  <li><tt>p2:16:32:32</tt> - 16-bit pointers with 32-bit alignment for
-  address space 2</li>
-  <li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li>
-  <li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li>
-  <li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li>
-  <li><tt>i32:32:32</tt> - i32 is 32-bit aligned</li>
-  <li><tt>i64:32:64</tt> - i64 has ABI alignment of 32-bits but preferred
-  alignment of 64-bits</li>
-  <li><tt>f32:32:32</tt> - float is 32-bit aligned</li>
-  <li><tt>f64:64:64</tt> - double is 64-bit aligned</li>
-  <li><tt>v64:64:64</tt> - 64-bit vector is 64-bit aligned</li>
-  <li><tt>v128:128:128</tt> - 128-bit vector is 128-bit aligned</li>
-  <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
-  <li><tt>s0:64:64</tt> - stack objects are 64-bit aligned</li>
-</ul>
-
-<p>When LLVM is determining the alignment for a given type, it uses the
-   following rules:</p>
-
-<ol>
-  <li>If the type sought is an exact match for one of the specifications, that
-      specification is used.</li>
-
-  <li>If no match is found, and the type sought is an integer type, then the
-      smallest integer type that is larger than the bitwidth of the sought type
-      is used. If none of the specifications are larger than the bitwidth then
-      the largest integer type is used. For example, given the default
-      specifications above, the i7 type will use the alignment of i8 (next
-      largest) while both i65 and i256 will use the alignment of i64 (largest
-      specified).</li>
-
-  <li>If no match is found, and the type sought is a vector type, then the
-      largest vector type that is smaller than the sought vector type will be
-      used as a fall back.  This happens because &lt;128 x double&gt; can be
-      implemented in terms of 64 &lt;2 x double&gt;, for example.</li>
-</ol>
-
-<p>The function of the data layout string may not be what you expect.  Notably,
-   this is not a specification from the frontend of what alignment the code
-   generator should use.</p>
-
-<p>Instead, if specified, the target data layout is required to match what the 
-   ultimate <em>code generator</em> expects.  This string is used by the 
-   mid-level optimizers to
-   improve code, and this only works if it matches what the ultimate code 
-   generator uses.  If you would like to generate IR that does not embed this
-   target-specific detail into the IR, then you don't have to specify the 
-   string.  This will disable some optimizations that require precise layout
-   information, but this also prevents those optimizations from introducing
-   target specificity into the IR.</p>
-
-
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="pointeraliasing">Pointer Aliasing Rules</a>
-</h3>
-
-<div>
-
-<p>Any memory access must be done through a pointer value associated
-with an address range of the memory access, otherwise the behavior
-is undefined. Pointer values are associated with address ranges
-according to the following rules:</p>
-
-<ul>
-  <li>A pointer value is associated with the addresses associated with
-      any value it is <i>based</i> on.
-  <li>An address of a global variable is associated with the address
-      range of the variable's storage.</li>
-  <li>The result value of an allocation instruction is associated with
-      the address range of the allocated storage.</li>
-  <li>A null pointer in the default address-space is associated with
-      no address.</li>
-  <li>An integer constant other than zero or a pointer value returned
-      from a function not defined within LLVM may be associated with address
-      ranges allocated through mechanisms other than those provided by
-      LLVM. Such ranges shall not overlap with any ranges of addresses
-      allocated by mechanisms provided by LLVM.</li>
-</ul>
-
-<p>A pointer value is <i>based</i> on another pointer value according
-   to the following rules:</p>
-
-<ul>
-  <li>A pointer value formed from a
-      <tt><a href="#i_getelementptr">getelementptr</a></tt> operation
-      is <i>based</i> on the first operand of the <tt>getelementptr</tt>.</li>
-  <li>The result value of a
-      <tt><a href="#i_bitcast">bitcast</a></tt> is <i>based</i> on the operand
-      of the <tt>bitcast</tt>.</li>
-  <li>A pointer value formed by an
-      <tt><a href="#i_inttoptr">inttoptr</a></tt> is <i>based</i> on all
-      pointer values that contribute (directly or indirectly) to the
-      computation of the pointer's value.</li>
-  <li>The "<i>based</i> on" relationship is transitive.</li>
-</ul>
-
-<p>Note that this definition of <i>"based"</i> is intentionally
-   similar to the definition of <i>"based"</i> in C99, though it is
-   slightly weaker.</p>
-
-<p>LLVM IR does not associate types with memory. The result type of a
-<tt><a href="#i_load">load</a></tt> merely indicates the size and
-alignment of the memory from which to load, as well as the
-interpretation of the value. The first operand type of a
-<tt><a href="#i_store">store</a></tt> similarly only indicates the size
-and alignment of the store.</p>
-
-<p>Consequently, type-based alias analysis, aka TBAA, aka
-<tt>-fstrict-aliasing</tt>, is not applicable to general unadorned
-LLVM IR. <a href="#metadata">Metadata</a> may be used to encode
-additional information which specialized optimization passes may use
-to implement type-based alias analysis.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="volatile">Volatile Memory Accesses</a>
-</h3>
-
-<div>
-
-<p>Certain memory accesses, such as <a href="#i_load"><tt>load</tt></a>s, <a
-href="#i_store"><tt>store</tt></a>s, and <a
-href="#int_memcpy"><tt>llvm.memcpy</tt></a>s may be marked <tt>volatile</tt>.
-The optimizers must not change the number of volatile operations or change their
-order of execution relative to other volatile operations.  The optimizers
-<i>may</i> change the order of volatile operations relative to non-volatile
-operations.  This is not Java's "volatile" and has no cross-thread
-synchronization behavior.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="memmodel">Memory Model for Concurrent Operations</a>
-</h3>
-
-<div>
-
-<p>The LLVM IR does not define any way to start parallel threads of execution
-or to register signal handlers. Nonetheless, there are platform-specific
-ways to create them, and we define LLVM IR's behavior in their presence. This
-model is inspired by the C++0x memory model.</p>
-
-<p>For a more informal introduction to this model, see the
-<a href="Atomics.html">LLVM Atomic Instructions and Concurrency Guide</a>.
-
-<p>We define a <i>happens-before</i> partial order as the least partial order
-that</p>
-<ul>
-  <li>Is a superset of single-thread program order, and</li>
-  <li>When a <i>synchronizes-with</i> <tt>b</tt>, includes an edge from
-      <tt>a</tt> to <tt>b</tt>. <i>Synchronizes-with</i> pairs are introduced
-      by platform-specific techniques, like pthread locks, thread
-      creation, thread joining, etc., and by atomic instructions.
-      (See also <a href="#ordering">Atomic Memory Ordering Constraints</a>).
-      </li>
-</ul>
-
-<p>Note that program order does not introduce <i>happens-before</i> edges
-between a thread and signals executing inside that thread.</p>
-
-<p>Every (defined) read operation (load instructions, memcpy, atomic
-loads/read-modify-writes, etc.) <var>R</var> reads a series of bytes written by
-(defined) write operations (store instructions, atomic
-stores/read-modify-writes, memcpy, etc.). For the purposes of this section,
-initialized globals are considered to have a write of the initializer which is
-atomic and happens before any other read or write of the memory in question.
-For each byte of a read <var>R</var>, <var>R<sub>byte</sub></var> may see
-any write to the same byte, except:</p>
-
-<ul>
-  <li>If <var>write<sub>1</sub></var> happens before
-      <var>write<sub>2</sub></var>, and <var>write<sub>2</sub></var> happens
-      before <var>R<sub>byte</sub></var>, then <var>R<sub>byte</sub></var>
-      does not see <var>write<sub>1</sub></var>.
-  <li>If <var>R<sub>byte</sub></var> happens before
-      <var>write<sub>3</sub></var>, then <var>R<sub>byte</sub></var> does not
-      see <var>write<sub>3</sub></var>.
-</ul>
-
-<p>Given that definition, <var>R<sub>byte</sub></var> is defined as follows:
-<ul>
-  <li>If <var>R</var> is volatile, the result is target-dependent. (Volatile
-      is supposed to give guarantees which can support
-      <code>sig_atomic_t</code> in C/C++, and may be used for accesses to
-      addresses which do not behave like normal memory.  It does not generally
-      provide cross-thread synchronization.)
-  <li>Otherwise, if there is no write to the same byte that happens before
-    <var>R<sub>byte</sub></var>, <var>R<sub>byte</sub></var> returns 
-    <tt>undef</tt> for that byte.
-  <li>Otherwise, if <var>R<sub>byte</sub></var> may see exactly one write,
-      <var>R<sub>byte</sub></var> returns the value written by that
-      write.</li>
-  <li>Otherwise, if <var>R</var> is atomic, and all the writes
-      <var>R<sub>byte</sub></var> may see are atomic, it chooses one of the
-      values written.  See the <a href="#ordering">Atomic Memory Ordering
-      Constraints</a> section for additional constraints on how the choice
-      is made.
-  <li>Otherwise <var>R<sub>byte</sub></var> returns <tt>undef</tt>.</li>
-</ul>
-
-<p><var>R</var> returns the value composed of the series of bytes it read.
-This implies that some bytes within the value may be <tt>undef</tt>
-<b>without</b> the entire value being <tt>undef</tt>. Note that this only
-defines the semantics of the operation; it doesn't mean that targets will
-emit more than one instruction to read the series of bytes.</p>
-
-<p>Note that in cases where none of the atomic intrinsics are used, this model
-places only one restriction on IR transformations on top of what is required
-for single-threaded execution: introducing a store to a byte which might not
-otherwise be stored is not allowed in general.  (Specifically, in the case
-where another thread might write to and read from an address, introducing a
-store can change a load that may see exactly one write into a load that may
-see multiple writes.)</p>
-
-<!-- FIXME: This model assumes all targets where concurrency is relevant have
-a byte-size store which doesn't affect adjacent bytes.  As far as I can tell,
-none of the backends currently in the tree fall into this category; however,
-there might be targets which care.  If there are, we want a paragraph
-like the following:
-
-Targets may specify that stores narrower than a certain width are not
-available; on such a target, for the purposes of this model, treat any
-non-atomic write with an alignment or width less than the minimum width
-as if it writes to the relevant surrounding bytes.
--->
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-      <a name="ordering">Atomic Memory Ordering Constraints</a>
-</h3>
-
-<div>
-
-<p>Atomic instructions (<a href="#i_cmpxchg"><code>cmpxchg</code></a>,
-<a href="#i_atomicrmw"><code>atomicrmw</code></a>,
-<a href="#i_fence"><code>fence</code></a>,
-<a href="#i_load"><code>atomic load</code></a>, and
-<a href="#i_store"><code>atomic store</code></a>) take an ordering parameter
-that determines which other atomic instructions on the same address they
-<i>synchronize with</i>.  These semantics are borrowed from Java and C++0x,
-but are somewhat more colloquial. If these descriptions aren't precise enough,
-check those specs (see spec references in the
-<a href="Atomics.html#introduction">atomics guide</a>).
-<a href="#i_fence"><code>fence</code></a> instructions
-treat these orderings somewhat differently since they don't take an address.
-See that instruction's documentation for details.</p>
-
-<p>For a simpler introduction to the ordering constraints, see the
-<a href="Atomics.html">LLVM Atomic Instructions and Concurrency Guide</a>.</p>
-
-<dl>
-<dt><code>unordered</code></dt>
-<dd>The set of values that can be read is governed by the happens-before
-partial order. A value cannot be read unless some operation wrote it.
-This is intended to provide a guarantee strong enough to model Java's
-non-volatile shared variables.  This ordering cannot be specified for
-read-modify-write operations; it is not strong enough to make them atomic
-in any interesting way.</dd>
-<dt><code>monotonic</code></dt>
-<dd>In addition to the guarantees of <code>unordered</code>, there is a single
-total order for modifications by <code>monotonic</code> operations on each
-address. All modification orders must be compatible with the happens-before
-order. There is no guarantee that the modification orders can be combined to
-a global total order for the whole program (and this often will not be
-possible). The read in an atomic read-modify-write operation
-(<a href="#i_cmpxchg"><code>cmpxchg</code></a> and
-<a href="#i_atomicrmw"><code>atomicrmw</code></a>)
-reads the value in the modification order immediately before the value it
-writes. If one atomic read happens before another atomic read of the same
-address, the later read must see the same value or a later value in the
-address's modification order. This disallows reordering of
-<code>monotonic</code> (or stronger) operations on the same address. If an
-address is written <code>monotonic</code>ally by one thread, and other threads
-<code>monotonic</code>ally read that address repeatedly, the other threads must
-eventually see the write. This corresponds to the C++0x/C1x
-<code>memory_order_relaxed</code>.</dd>
-<dt><code>acquire</code></dt>
-<dd>In addition to the guarantees of <code>monotonic</code>,
-a <i>synchronizes-with</i> edge may be formed with a <code>release</code>
-operation. This is intended to model C++'s <code>memory_order_acquire</code>.</dd>
-<dt><code>release</code></dt>
-<dd>In addition to the guarantees of <code>monotonic</code>, if this operation
-writes a value which is subsequently read by an <code>acquire</code> operation,
-it <i>synchronizes-with</i> that operation.  (This isn't a complete
-description; see the C++0x definition of a release sequence.) This corresponds
-to the C++0x/C1x <code>memory_order_release</code>.</dd>
-<dt><code>acq_rel</code> (acquire+release)</dt><dd>Acts as both an
-<code>acquire</code> and <code>release</code> operation on its address.
-This corresponds to the C++0x/C1x <code>memory_order_acq_rel</code>.</dd>
-<dt><code>seq_cst</code> (sequentially consistent)</dt><dd>
-<dd>In addition to the guarantees of <code>acq_rel</code>
-(<code>acquire</code> for an operation which only reads, <code>release</code>
-for an operation which only writes), there is a global total order on all
-sequentially-consistent operations on all addresses, which is consistent with
-the <i>happens-before</i> partial order and with the modification orders of
-all the affected addresses. Each sequentially-consistent read sees the last
-preceding write to the same address in this global order. This corresponds
-to the C++0x/C1x <code>memory_order_seq_cst</code> and Java volatile.</dd>
-</dl>
-
-<p id="singlethread">If an atomic operation is marked <code>singlethread</code>,
-it only <i>synchronizes with</i> or participates in modification and seq_cst
-total orderings with other operations running in the same thread (for example,
-in signal handlers).</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="typesystem">Type System</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM type system is one of the most important features of the
-   intermediate representation.  Being typed enables a number of optimizations
-   to be performed on the intermediate representation directly, without having
-   to do extra analyses on the side before the transformation.  A strong type
-   system makes it easier to read the generated code and enables novel analyses
-   and transformations that are not feasible to perform on normal three address
-   code representations.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_classifications">Type Classifications</a>
-</h3>
-
-<div>
-
-<p>The types fall into a few useful classifications:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr><th>Classification</th><th>Types</th></tr>
-    <tr>
-      <td><a href="#t_integer">integer</a></td>
-      <td><tt>i1, i2, i3, ... i8, ... i16, ... i32, ... i64, ... </tt></td>
-    </tr>
-    <tr>
-      <td><a href="#t_floating">floating point</a></td>
-      <td><tt>half, float, double, x86_fp80, fp128, ppc_fp128</tt></td>
-    </tr>
-    <tr>
-      <td><a name="t_firstclass">first class</a></td>
-      <td><a href="#t_integer">integer</a>,
-          <a href="#t_floating">floating point</a>,
-          <a href="#t_pointer">pointer</a>,
-          <a href="#t_vector">vector</a>,
-          <a href="#t_struct">structure</a>,
-          <a href="#t_array">array</a>,
-          <a href="#t_label">label</a>,
-          <a href="#t_metadata">metadata</a>.
-      </td>
-    </tr>
-    <tr>
-      <td><a href="#t_primitive">primitive</a></td>
-      <td><a href="#t_label">label</a>,
-          <a href="#t_void">void</a>,
-          <a href="#t_integer">integer</a>,
-          <a href="#t_floating">floating point</a>,
-          <a href="#t_x86mmx">x86mmx</a>,
-          <a href="#t_metadata">metadata</a>.</td>
-    </tr>
-    <tr>
-      <td><a href="#t_derived">derived</a></td>
-      <td><a href="#t_array">array</a>,
-          <a href="#t_function">function</a>,
-          <a href="#t_pointer">pointer</a>,
-          <a href="#t_struct">structure</a>,
-          <a href="#t_vector">vector</a>,
-          <a href="#t_opaque">opaque</a>.
-      </td>
-    </tr>
-  </tbody>
-</table>
-
-<p>The <a href="#t_firstclass">first class</a> types are perhaps the most
-   important.  Values of these types are the only ones which can be produced by
-   instructions.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_primitive">Primitive Types</a>
-</h3>
-
-<div>
-
-<p>The primitive types are the fundamental building blocks of the LLVM
-   system.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_integer">Integer Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The integer type is a very simple type that simply specifies an arbitrary
-   bit width for the integer type desired. Any bit width from 1 bit to
-   2<sup>23</sup>-1 (about 8 million) can be specified.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  iN
-</pre>
-
-<p>The number of bits the integer will occupy is specified by the <tt>N</tt>
-   value.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>i1</tt></td>
-    <td class="left">a single-bit integer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32</tt></td>
-    <td class="left">a 32-bit integer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i1942652</tt></td>
-    <td class="left">a really big integer of over 1 million bits.</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_floating">Floating Point Types</a>
-</h4>
-
-<div>
-
-<table>
-  <tbody>
-    <tr><th>Type</th><th>Description</th></tr>
-    <tr><td><tt>half</tt></td><td>16-bit floating point value</td></tr>
-    <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
-    <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
-    <tr><td><tt>fp128</tt></td><td>128-bit floating point value (112-bit mantissa)</td></tr>
-    <tr><td><tt>x86_fp80</tt></td><td>80-bit floating point value (X87)</td></tr>
-    <tr><td><tt>ppc_fp128</tt></td><td>128-bit floating point value (two 64-bits)</td></tr>
-  </tbody>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_x86mmx">X86mmx Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The x86mmx type represents a value held in an MMX register on an x86 machine.  The operations allowed on it are quite limited:  parameters and return values, load and store, and bitcast.  User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type.  There are no arrays, vectors or constants of this type.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  x86mmx
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_void">Void Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The void type does not represent any value and has no size.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  void
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_label">Label Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The label type represents code labels.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  label
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_metadata">Metadata Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The metadata type represents embedded metadata. No derived types may be
-   created from metadata except for <a href="#t_function">function</a>
-   arguments.
-
-<h5>Syntax:</h5>
-<pre>
-  metadata
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_derived">Derived Types</a>
-</h3>
-
-<div>
-
-<p>The real power in LLVM comes from the derived types in the system.  This is
-   what allows a programmer to represent arrays, functions, pointers, and other
-   useful types.  Each of these types contain one or more element types which
-   may be a primitive type, or another derived type.  For example, it is
-   possible to have a two dimensional array, using an array as the element type
-   of another array.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_aggregate">Aggregate Types</a>
-</h4>
-
-<div>
-
-<p>Aggregate Types are a subset of derived types that can contain multiple
-  member types. <a href="#t_array">Arrays</a> and
-  <a href="#t_struct">structs</a> are aggregate types.
-  <a href="#t_vector">Vectors</a> are not considered to be aggregate types.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_array">Array Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The array type is a very simple derived type that arranges elements
-   sequentially in memory.  The array type requires a size (number of elements)
-   and an underlying data type.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  [&lt;# elements&gt; x &lt;elementtype&gt;]
-</pre>
-
-<p>The number of elements is a constant integer value; <tt>elementtype</tt> may
-   be any type with a size.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>[40 x i32]</tt></td>
-    <td class="left">Array of 40 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[41 x i32]</tt></td>
-    <td class="left">Array of 41 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[4 x i8]</tt></td>
-    <td class="left">Array of 4 8-bit integer values.</td>
-  </tr>
-</table>
-<p>Here are some examples of multidimensional arrays:</p>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>[3 x [4 x i32]]</tt></td>
-    <td class="left">3x4 array of 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[12 x [10 x float]]</tt></td>
-    <td class="left">12x10 array of single precision floating point values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[2 x [3 x [4 x i16]]]</tt></td>
-    <td class="left">2x3x4 array of 16-bit integer  values.</td>
-  </tr>
-</table>
-
-<p>There is no restriction on indexing beyond the end of the array implied by
-   a static type (though there are restrictions on indexing beyond the bounds
-   of an allocated object in some cases). This means that single-dimension
-   'variable sized array' addressing can be implemented in LLVM with a zero
-   length array type. An implementation of 'pascal style arrays' in LLVM could
-   use the type "<tt>{ i32, [0 x float]}</tt>", for example.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_function">Function Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The function type can be thought of as a function signature.  It consists of
-   a return type and a list of formal parameter types. The return type of a
-   function type is a first class type or a void type.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;returntype&gt; (&lt;parameter list&gt;)
-</pre>
-
-<p>...where '<tt>&lt;parameter list&gt;</tt>' is a comma-separated list of type
-   specifiers.  Optionally, the parameter list may include a type <tt>...</tt>,
-   which indicates that the function takes a variable number of arguments.
-   Variable argument functions can access their arguments with
-   the <a href="#int_varargs">variable argument handling intrinsic</a>
-   functions.  '<tt>&lt;returntype&gt;</tt>' is any type except
-   <a href="#t_label">label</a>.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>i32 (i32)</tt></td>
-    <td class="left">function taking an <tt>i32</tt>, returning an <tt>i32</tt>
-    </td>
-  </tr><tr class="layout">
-    <td class="left"><tt>float&nbsp;(i16,&nbsp;i32&nbsp;*)&nbsp;*
-    </tt></td>
-    <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes
-      an <tt>i16</tt> and a <a href="#t_pointer">pointer</a> to <tt>i32</tt>,
-      returning <tt>float</tt>.
-    </td>
-  </tr><tr class="layout">
-    <td class="left"><tt>i32 (i8*, ...)</tt></td>
-    <td class="left">A vararg function that takes at least one
-      <a href="#t_pointer">pointer</a> to <tt>i8 </tt> (char in C),
-      which returns an integer.  This is the signature for <tt>printf</tt> in
-      LLVM.
-    </td>
-  </tr><tr class="layout">
-    <td class="left"><tt>{i32, i32} (i32)</tt></td>
-    <td class="left">A function taking an <tt>i32</tt>, returning a
-        <a href="#t_struct">structure</a> containing two <tt>i32</tt> values
-    </td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_struct">Structure Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The structure type is used to represent a collection of data members together
-  in memory.  The elements of a structure may be any type that has a size.</p>
-
-<p>Structures in memory are accessed using '<tt><a href="#i_load">load</a></tt>'
-   and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field
-   with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
-   Structures in registers are accessed using the
-   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
-   '<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
-  
-<p>Structures may optionally be "packed" structures, which indicate that the 
-  alignment of the struct is one byte, and that there is no padding between
-  the elements.  In non-packed structs, padding between field types is inserted
-  as defined by the DataLayout string in the module, which is required to match
-  what the underlying code generator expects.</p>
-
-<p>Structures can either be "literal" or "identified".  A literal structure is
-  defined inline with other types (e.g. <tt>{i32, i32}*</tt>) whereas identified
-  types are always defined at the top level with a name.  Literal types are
-  uniqued by their contents and can never be recursive or opaque since there is
-  no way to write one.  Identified types can be recursive, can be opaqued, and are
-  never uniqued.
-</p>
-  
-<h5>Syntax:</h5>
-<pre>
-  %T1 = type { &lt;type list&gt; }     <i>; Identified normal struct type</i>
-  %T2 = type &lt;{ &lt;type list&gt; }&gt;   <i>; Identified packed struct type</i>
-</pre>
-  
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>{ i32, i32, i32 }</tt></td>
-    <td class="left">A triple of three <tt>i32</tt> values</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
-    <td class="left">A pair, where the first element is a <tt>float</tt> and the
-      second element is a <a href="#t_pointer">pointer</a> to a
-      <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
-      an <tt>i32</tt>.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;{ i8, i32 }&gt;</tt></td>
-    <td class="left">A packed struct known to be 5 bytes in size.</td>
-  </tr>
-</table>
-
-</div>
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_opaque">Opaque Structure Types</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>Opaque structure types are used to represent named structure types that do
-   not have a body specified.  This corresponds (for example) to the C notion of
-   a forward declared structure.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  %X = type opaque
-  %52 = type opaque
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>opaque</tt></td>
-    <td class="left">An opaque type.</td>
-  </tr>
-</table>
-
-</div>
-
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_pointer">Pointer Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The pointer type is used to specify memory locations.
-   Pointers are commonly used to reference objects in memory.</p>
-   
-<p>Pointer types may have an optional address space attribute defining the
-   numbered address space where the pointed-to object resides. The default
-   address space is number zero. The semantics of non-zero address
-   spaces are target-specific.</p>
-
-<p>Note that LLVM does not permit pointers to void (<tt>void*</tt>) nor does it
-   permit pointers to labels (<tt>label*</tt>).  Use <tt>i8*</tt> instead.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;type&gt; *
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>[4 x i32]*</tt></td>
-    <td class="left">A <a href="#t_pointer">pointer</a> to <a
-                    href="#t_array">array</a> of four <tt>i32</tt> values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32 (i32*) *</tt></td>
-    <td class="left"> A <a href="#t_pointer">pointer</a> to a <a
-      href="#t_function">function</a> that takes an <tt>i32*</tt>, returning an
-      <tt>i32</tt>.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32 addrspace(5)*</tt></td>
-    <td class="left">A <a href="#t_pointer">pointer</a> to an <tt>i32</tt> value
-     that resides in address space #5.</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_vector">Vector Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>A vector type is a simple derived type that represents a vector of elements.
-   Vector types are used when multiple primitive data are operated in parallel
-   using a single instruction (SIMD).  A vector type requires a size (number of
-   elements) and an underlying primitive data type.  Vector types are considered
-   <a href="#t_firstclass">first class</a>.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt; &lt;# elements&gt; x &lt;elementtype&gt; &gt;
-</pre>
-
-<p>The number of elements is a constant integer value larger than 0; elementtype
-   may be any integer or floating point type, or a pointer to these types.
-   Vectors of size zero are not allowed. </p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>&lt;4 x i32&gt;</tt></td>
-    <td class="left">Vector of 4 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;8 x float&gt;</tt></td>
-    <td class="left">Vector of 8 32-bit floating-point values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;2 x i64&gt;</tt></td>
-    <td class="left">Vector of 2 64-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;4 x i64*&gt;</tt></td>
-    <td class="left">Vector of 4 pointers to 64-bit integer values.</td>
-  </tr>
-</table>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="constants">Constants</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM has several different basic types of constants.  This section describes
-   them all and their syntax.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="simpleconstants">Simple Constants</a>
-</h3>
-
-<div>
-
-<dl>
-  <dt><b>Boolean constants</b></dt>
-  <dd>The two strings '<tt>true</tt>' and '<tt>false</tt>' are both valid
-      constants of the <tt><a href="#t_integer">i1</a></tt> type.</dd>
-
-  <dt><b>Integer constants</b></dt>
-  <dd>Standard integers (such as '4') are constants of
-      the <a href="#t_integer">integer</a> type.  Negative numbers may be used
-      with integer types.</dd>
-
-  <dt><b>Floating point constants</b></dt>
-  <dd>Floating point constants use standard decimal notation (e.g. 123.421),
-      exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal
-      notation (see below).  The assembler requires the exact decimal value of a
-      floating-point constant.  For example, the assembler accepts 1.25 but
-      rejects 1.3 because 1.3 is a repeating decimal in binary.  Floating point
-      constants must have a <a href="#t_floating">floating point</a> type. </dd>
-
-  <dt><b>Null pointer constants</b></dt>
-  <dd>The identifier '<tt>null</tt>' is recognized as a null pointer constant
-      and must be of <a href="#t_pointer">pointer type</a>.</dd>
-</dl>
-
-<p>The one non-intuitive notation for constants is the hexadecimal form of
-   floating point constants.  For example, the form '<tt>double
-   0x432ff973cafa8000</tt>' is equivalent to (but harder to read than)
-   '<tt>double 4.5e+15</tt>'.  The only time hexadecimal floating point
-   constants are required (and the only time that they are generated by the
-   disassembler) is when a floating point constant must be emitted but it cannot
-   be represented as a decimal floating point number in a reasonable number of
-   digits.  For example, NaN's, infinities, and other special values are
-   represented in their IEEE hexadecimal format so that assembly and disassembly
-   do not cause any bits to change in the constants.</p>
-
-<p>When using the hexadecimal form, constants of types half, float, and double are
-   represented using the 16-digit form shown above (which matches the IEEE754
-   representation for double); half and float values must, however, be exactly
-   representable as IEE754 half and single precision, respectively.
-   Hexadecimal format is always used
-   for long double, and there are three forms of long double.  The 80-bit format
-   used by x86 is represented as <tt>0xK</tt> followed by 20 hexadecimal digits.
-   The 128-bit format used by PowerPC (two adjacent doubles) is represented
-   by <tt>0xM</tt> followed by 32 hexadecimal digits.  The IEEE 128-bit format
-   is represented by <tt>0xL</tt> followed by 32 hexadecimal digits; no
-   currently supported target uses this format.  Long doubles will only work if
-   they match the long double format on your target. The IEEE 16-bit format
-   (half precision) is represented by <tt>0xH</tt> followed by 4 hexadecimal
-   digits. All hexadecimal formats are big-endian (sign bit at the left).</p>
-
-<p>There are no constants of type x86mmx.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="aggregateconstants"></a> <!-- old anchor -->
-<a name="complexconstants">Complex Constants</a>
-</h3>
-
-<div>
-
-<p>Complex constants are a (potentially recursive) combination of simple
-   constants and smaller complex constants.</p>
-
-<dl>
-  <dt><b>Structure constants</b></dt>
-  <dd>Structure constants are represented with notation similar to structure
-      type definitions (a comma separated list of elements, surrounded by braces
-      (<tt>{}</tt>)).  For example: "<tt>{ i32 4, float 17.0, i32* @G }</tt>",
-      where "<tt>@G</tt>" is declared as "<tt>@G = external global i32</tt>".
-      Structure constants must have <a href="#t_struct">structure type</a>, and
-      the number and types of elements must match those specified by the
-      type.</dd>
-
-  <dt><b>Array constants</b></dt>
-  <dd>Array constants are represented with notation similar to array type
-     definitions (a comma separated list of elements, surrounded by square
-     brackets (<tt>[]</tt>)).  For example: "<tt>[ i32 42, i32 11, i32 74
-     ]</tt>".  Array constants must have <a href="#t_array">array type</a>, and
-     the number and types of elements must match those specified by the
-     type.</dd>
-
-  <dt><b>Vector constants</b></dt>
-  <dd>Vector constants are represented with notation similar to vector type
-      definitions (a comma separated list of elements, surrounded by
-      less-than/greater-than's (<tt>&lt;&gt;</tt>)).  For example: "<tt>&lt; i32
-      42, i32 11, i32 74, i32 100 &gt;</tt>".  Vector constants must
-      have <a href="#t_vector">vector type</a>, and the number and types of
-      elements must match those specified by the type.</dd>
-
-  <dt><b>Zero initialization</b></dt>
-  <dd>The string '<tt>zeroinitializer</tt>' can be used to zero initialize a
-      value to zero of <em>any</em> type, including scalar and
-      <a href="#t_aggregate">aggregate</a> types.
-      This is often used to avoid having to print large zero initializers
-      (e.g. for large arrays) and is always exactly equivalent to using explicit
-      zero initializers.</dd>
-
-  <dt><b>Metadata node</b></dt>
-  <dd>A metadata node is a structure-like constant with
-      <a href="#t_metadata">metadata type</a>.  For example: "<tt>metadata !{
-      i32 0, metadata !"test" }</tt>".  Unlike other constants that are meant to
-      be interpreted as part of the instruction stream, metadata is a place to
-      attach additional information such as debug info.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="globalconstants">Global Variable and Function Addresses</a>
-</h3>
-
-<div>
-
-<p>The addresses of <a href="#globalvars">global variables</a>
-   and <a href="#functionstructure">functions</a> are always implicitly valid
-   (link-time) constants.  These constants are explicitly referenced when
-   the <a href="#identifiers">identifier for the global</a> is used and always
-   have <a href="#t_pointer">pointer</a> type. For example, the following is a
-   legal LLVM file:</p>
-
-<pre class="doc_code">
-@X = global i32 17
-@Y = global i32 42
-@Z = global [2 x i32*] [ i32* @X, i32* @Y ]
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="undefvalues">Undefined Values</a>
-</h3>
-
-<div>
-
-<p>The string '<tt>undef</tt>' can be used anywhere a constant is expected, and
-   indicates that the user of the value may receive an unspecified bit-pattern.
-   Undefined values may be of any type (other than '<tt>label</tt>'
-   or '<tt>void</tt>') and be used anywhere a constant is permitted.</p>
-
-<p>Undefined values are useful because they indicate to the compiler that the
-   program is well defined no matter what value is used.  This gives the
-   compiler more freedom to optimize.  Here are some examples of (potentially
-   surprising) transformations that are valid (in pseudo IR):</p>
-
-
-<pre class="doc_code">
-  %A = add %X, undef
-  %B = sub %X, undef
-  %C = xor %X, undef
-Safe:
-  %A = undef
-  %B = undef
-  %C = undef
-</pre>
-
-<p>This is safe because all of the output bits are affected by the undef bits.
-   Any output bit can have a zero or one depending on the input bits.</p>
-
-<pre class="doc_code">
-  %A = or %X, undef
-  %B = and %X, undef
-Safe:
-  %A = -1
-  %B = 0
-Unsafe:
-  %A = undef
-  %B = undef
-</pre>
-
-<p>These logical operations have bits that are not always affected by the input.
-   For example, if <tt>%X</tt> has a zero bit, then the output of the
-   '<tt>and</tt>' operation will always be a zero for that bit, no matter what
-   the corresponding bit from the '<tt>undef</tt>' is. As such, it is unsafe to
-   optimize or assume that the result of the '<tt>and</tt>' is '<tt>undef</tt>'.
-   However, it is safe to assume that all bits of the '<tt>undef</tt>' could be
-   0, and optimize the '<tt>and</tt>' to 0. Likewise, it is safe to assume that
-   all the bits of the '<tt>undef</tt>' operand to the '<tt>or</tt>' could be
-   set, allowing the '<tt>or</tt>' to be folded to -1.</p>
-
-<pre class="doc_code">
-  %A = select undef, %X, %Y
-  %B = select undef, 42, %Y
-  %C = select %X, %Y, undef
-Safe:
-  %A = %X     (or %Y)
-  %B = 42     (or %Y)
-  %C = %Y
-Unsafe:
-  %A = undef
-  %B = undef
-  %C = undef
-</pre>
-
-<p>This set of examples shows that undefined '<tt>select</tt>' (and conditional
-   branch) conditions can go <em>either way</em>, but they have to come from one
-   of the two operands.  In the <tt>%A</tt> example, if <tt>%X</tt> and
-   <tt>%Y</tt> were both known to have a clear low bit, then <tt>%A</tt> would
-   have to have a cleared low bit. However, in the <tt>%C</tt> example, the
-   optimizer is allowed to assume that the '<tt>undef</tt>' operand could be the
-   same as <tt>%Y</tt>, allowing the whole '<tt>select</tt>' to be
-   eliminated.</p>
-
-<pre class="doc_code">
-  %A = xor undef, undef
-
-  %B = undef
-  %C = xor %B, %B
-
-  %D = undef
-  %E = icmp lt %D, 4
-  %F = icmp gte %D, 4
-
-Safe:
-  %A = undef
-  %B = undef
-  %C = undef
-  %D = undef
-  %E = undef
-  %F = undef
-</pre>
-
-<p>This example points out that two '<tt>undef</tt>' operands are not
-   necessarily the same. This can be surprising to people (and also matches C
-   semantics) where they assume that "<tt>X^X</tt>" is always zero, even
-   if <tt>X</tt> is undefined. This isn't true for a number of reasons, but the
-   short answer is that an '<tt>undef</tt>' "variable" can arbitrarily change
-   its value over its "live range".  This is true because the variable doesn't
-   actually <em>have a live range</em>. Instead, the value is logically read
-   from arbitrary registers that happen to be around when needed, so the value
-   is not necessarily consistent over time. In fact, <tt>%A</tt> and <tt>%C</tt>
-   need to have the same semantics or the core LLVM "replace all uses with"
-   concept would not hold.</p>
-
-<pre class="doc_code">
-  %A = fdiv undef, %X
-  %B = fdiv %X, undef
-Safe:
-  %A = undef
-b: unreachable
-</pre>
-
-<p>These examples show the crucial difference between an <em>undefined
-  value</em> and <em>undefined behavior</em>. An undefined value (like
-  '<tt>undef</tt>') is allowed to have an arbitrary bit-pattern. This means that
-  the <tt>%A</tt> operation can be constant folded to '<tt>undef</tt>', because
-  the '<tt>undef</tt>' could be an SNaN, and <tt>fdiv</tt> is not (currently)
-  defined on SNaN's. However, in the second example, we can make a more
-  aggressive assumption: because the <tt>undef</tt> is allowed to be an
-  arbitrary value, we are allowed to assume that it could be zero. Since a
-  divide by zero has <em>undefined behavior</em>, we are allowed to assume that
-  the operation does not execute at all. This allows us to delete the divide and
-  all code after it. Because the undefined operation "can't happen", the
-  optimizer can assume that it occurs in dead code.</p>
-
-<pre class="doc_code">
-a:  store undef -> %X
-b:  store %X -> undef
-Safe:
-a: &lt;deleted&gt;
-b: unreachable
-</pre>
-
-<p>These examples reiterate the <tt>fdiv</tt> example: a store <em>of</em> an
-   undefined value can be assumed to not have any effect; we can assume that the
-   value is overwritten with bits that happen to match what was already there.
-   However, a store <em>to</em> an undefined location could clobber arbitrary
-   memory, therefore, it has undefined behavior.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="poisonvalues">Poison Values</a>
-</h3>
-
-<div>
-
-<p>Poison values are similar to <a href="#undefvalues">undef values</a>, however
-   they also represent the fact that an instruction or constant expression which
-   cannot evoke side effects has nevertheless detected a condition which results
-   in undefined behavior.</p>
-
-<p>There is currently no way of representing a poison value in the IR; they
-   only exist when produced by operations such as
-   <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
-
-<p>Poison value behavior is defined in terms of value <i>dependence</i>:</p>
-
-<ul>
-<li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
-    their operands.</li>
-
-<li><a href="#i_phi"><tt>Phi</tt></a> nodes depend on the operand corresponding
-    to their dynamic predecessor basic block.</li>
-
-<li>Function arguments depend on the corresponding actual argument values in
-    the dynamic callers of their functions.</li>
-
-<li><a href="#i_call"><tt>Call</tt></a> instructions depend on the
-    <a href="#i_ret"><tt>ret</tt></a> instructions that dynamically transfer
-    control back to them.</li>
-
-<li><a href="#i_invoke"><tt>Invoke</tt></a> instructions depend on the
-    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_resume"><tt>resume</tt></a>,
-    or exception-throwing call instructions that dynamically transfer control
-    back to them.</li>
-
-<li>Non-volatile loads and stores depend on the most recent stores to all of the
-    referenced memory addresses, following the order in the IR
-    (including loads and stores implied by intrinsics such as
-    <a href="#int_memcpy"><tt>@llvm.memcpy</tt></a>.)</li>
-
-<!-- TODO: In the case of multiple threads, this only applies if the store
-     "happens-before" the load or store. -->
-
-<!-- TODO: floating-point exception state -->
-
-<li>An instruction with externally visible side effects depends on the most
-    recent preceding instruction with externally visible side effects, following
-    the order in the IR. (This includes
-    <a href="#volatile">volatile operations</a>.)</li>
-
-<li>An instruction <i>control-depends</i> on a
-    <a href="#terminators">terminator instruction</a>
-    if the terminator instruction has multiple successors and the instruction
-    is always executed when control transfers to one of the successors, and
-    may not be executed when control is transferred to another.</li>
-
-<li>Additionally, an instruction also <i>control-depends</i> on a terminator
-    instruction if the set of instructions it otherwise depends on would be
-    different if the terminator had transferred control to a different
-    successor.</li>
-
-<li>Dependence is transitive.</li>
-
-</ul>
-
-<p>Poison Values have the same behavior as <a href="#undefvalues">undef values</a>,
-   with the additional affect that any instruction which has a <i>dependence</i>
-   on a poison value has undefined behavior.</p>
-
-<p>Here are some examples:</p>
-
-<pre class="doc_code">
-entry:
-  %poison = sub nuw i32 0, 1           ; Results in a poison value.
-  %still_poison = and i32 %poison, 0   ; 0, but also poison.
-  %poison_yet_again = getelementptr i32* @h, i32 %still_poison
-  store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
-
-  store i32 %poison, i32* @g           ; Poison value stored to memory.
-  %poison2 = load i32* @g              ; Poison value loaded back from memory.
-
-  store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
-
-  %narrowaddr = bitcast i32* @g to i16*
-  %wideaddr = bitcast i32* @g to i64*
-  %poison3 = load i16* %narrowaddr     ; Returns a poison value.
-  %poison4 = load i64* %wideaddr       ; Returns a poison value.
-
-  %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
-  br i1 %cmp, label %true, label %end  ; Branch to either destination.
-
-true:
-  store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
-                                       ; it has undefined behavior.
-  br label %end
-
-end:
-  %p = phi i32 [ 0, %entry ], [ 1, %true ]
-                                       ; Both edges into this PHI are
-                                       ; control-dependent on %cmp, so this
-                                       ; always results in a poison value.
-
-  store volatile i32 0, i32* @g        ; This would depend on the store in %true
-                                       ; if %cmp is true, or the store in %entry
-                                       ; otherwise, so this is undefined behavior.
-
-  br i1 %cmp, label %second_true, label %second_end
-                                       ; The same branch again, but this time the
-                                       ; true block doesn't have side effects.
-
-second_true:
-  ; No side effects!
-  ret void
-
-second_end:
-  store volatile i32 0, i32* @g        ; This time, the instruction always depends
-                                       ; on the store in %end. Also, it is
-                                       ; control-equivalent to %end, so this is
-                                       ; well-defined (ignoring earlier undefined
-                                       ; behavior in this example).
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="blockaddress">Addresses of Basic Blocks</a>
-</h3>
-
-<div>
-
-<p><b><tt>blockaddress(@function, %block)</tt></b></p>
-
-<p>The '<tt>blockaddress</tt>' constant computes the address of the specified
-   basic block in the specified function, and always has an i8* type.  Taking
-   the address of the entry block is illegal.</p>
-
-<p>This value only has defined behavior when used as an operand to the
-   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction, or for
-   comparisons against null. Pointer equality tests between labels addresses
-   results in undefined behavior &mdash; though, again, comparison against null
-   is ok, and no label is equal to the null pointer. This may be passed around
-   as an opaque pointer sized value as long as the bits are not inspected. This
-   allows <tt>ptrtoint</tt> and arithmetic to be performed on these values so
-   long as the original value is reconstituted before the <tt>indirectbr</tt>
-   instruction.</p>
-
-<p>Finally, some targets may provide defined semantics when using the value as
-   the operand to an inline assembly, but that is target specific.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="constantexprs">Constant Expressions</a>
-</h3>
-
-<div>
-
-<p>Constant expressions are used to allow expressions involving other constants
-   to be used as constants.  Constant expressions may be of
-   any <a href="#t_firstclass">first class</a> type and may involve any LLVM
-   operation that does not have side effects (e.g. load and call are not
-   supported). The following is the syntax for constant expressions:</p>
-
-<dl>
-  <dt><b><tt>trunc (CST to TYPE)</tt></b></dt>
-  <dd>Truncate a constant to another type. The bit size of CST must be larger
-      than the bit size of TYPE. Both types must be integers.</dd>
-
-  <dt><b><tt>zext (CST to TYPE)</tt></b></dt>
-  <dd>Zero extend a constant to another type. The bit size of CST must be
-      smaller than the bit size of TYPE.  Both types must be integers.</dd>
-
-  <dt><b><tt>sext (CST to TYPE)</tt></b></dt>
-  <dd>Sign extend a constant to another type. The bit size of CST must be
-      smaller than the bit size of TYPE.  Both types must be integers.</dd>
-
-  <dt><b><tt>fptrunc (CST to TYPE)</tt></b></dt>
-  <dd>Truncate a floating point constant to another floating point type. The
-      size of CST must be larger than the size of TYPE. Both types must be
-      floating point.</dd>
-
-  <dt><b><tt>fpext (CST to TYPE)</tt></b></dt>
-  <dd>Floating point extend a constant to another type. The size of CST must be
-      smaller or equal to the size of TYPE. Both types must be floating
-      point.</dd>
-
-  <dt><b><tt>fptoui (CST to TYPE)</tt></b></dt>
-  <dd>Convert a floating point constant to the corresponding unsigned integer
-      constant. TYPE must be a scalar or vector integer type. CST must be of
-      scalar or vector floating point type. Both CST and TYPE must be scalars,
-      or vectors of the same number of elements. If the value won't fit in the
-      integer type, the results are undefined.</dd>
-
-  <dt><b><tt>fptosi (CST to TYPE)</tt></b></dt>
-  <dd>Convert a floating point constant to the corresponding signed integer
-      constant.  TYPE must be a scalar or vector integer type. CST must be of
-      scalar or vector floating point type. Both CST and TYPE must be scalars,
-      or vectors of the same number of elements. If the value won't fit in the
-      integer type, the results are undefined.</dd>
-
-  <dt><b><tt>uitofp (CST to TYPE)</tt></b></dt>
-  <dd>Convert an unsigned integer constant to the corresponding floating point
-      constant. TYPE must be a scalar or vector floating point type. CST must be
-      of scalar or vector integer type. Both CST and TYPE must be scalars, or
-      vectors of the same number of elements. If the value won't fit in the
-      floating point type, the results are undefined.</dd>
-
-  <dt><b><tt>sitofp (CST to TYPE)</tt></b></dt>
-  <dd>Convert a signed integer constant to the corresponding floating point
-      constant. TYPE must be a scalar or vector floating point type. CST must be
-      of scalar or vector integer type. Both CST and TYPE must be scalars, or
-      vectors of the same number of elements. If the value won't fit in the
-      floating point type, the results are undefined.</dd>
-
-  <dt><b><tt>ptrtoint (CST to TYPE)</tt></b></dt>
-  <dd>Convert a pointer typed constant to the corresponding integer constant
-      <tt>TYPE</tt> must be an integer type. <tt>CST</tt> must be of pointer
-      type. The <tt>CST</tt> value is zero extended, truncated, or unchanged to
-      make it fit in <tt>TYPE</tt>.</dd>
-
-  <dt><b><tt>inttoptr (CST to TYPE)</tt></b></dt>
-  <dd>Convert an integer constant to a pointer constant.  TYPE must be a pointer
-      type.  CST must be of integer type. The CST value is zero extended,
-      truncated, or unchanged to make it fit in a pointer size. This one is
-      <i>really</i> dangerous!</dd>
-
-  <dt><b><tt>bitcast (CST to TYPE)</tt></b></dt>
-  <dd>Convert a constant, CST, to another TYPE. The constraints of the operands
-      are the same as those for the <a href="#i_bitcast">bitcast
-      instruction</a>.</dd>
-
-  <dt><b><tt>getelementptr (CSTPTR, IDX0, IDX1, ...)</tt></b></dt>
-  <dt><b><tt>getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)</tt></b></dt>
-  <dd>Perform the <a href="#i_getelementptr">getelementptr operation</a> on
-      constants.  As with the <a href="#i_getelementptr">getelementptr</a>
-      instruction, the index list may have zero or more indexes, which are
-      required to make sense for the type of "CSTPTR".</dd>
-
-  <dt><b><tt>select (COND, VAL1, VAL2)</tt></b></dt>
-  <dd>Perform the <a href="#i_select">select operation</a> on constants.</dd>
-
-  <dt><b><tt>icmp COND (VAL1, VAL2)</tt></b></dt>
-  <dd>Performs the <a href="#i_icmp">icmp operation</a> on constants.</dd>
-
-  <dt><b><tt>fcmp COND (VAL1, VAL2)</tt></b></dt>
-  <dd>Performs the <a href="#i_fcmp">fcmp operation</a> on constants.</dd>
-
-  <dt><b><tt>extractelement (VAL, IDX)</tt></b></dt>
-  <dd>Perform the <a href="#i_extractelement">extractelement operation</a> on
-      constants.</dd>
-
-  <dt><b><tt>insertelement (VAL, ELT, IDX)</tt></b></dt>
-  <dd>Perform the <a href="#i_insertelement">insertelement operation</a> on
-    constants.</dd>
-
-  <dt><b><tt>shufflevector (VEC1, VEC2, IDXMASK)</tt></b></dt>
-  <dd>Perform the <a href="#i_shufflevector">shufflevector operation</a> on
-      constants.</dd>
-
-  <dt><b><tt>extractvalue (VAL, IDX0, IDX1, ...)</tt></b></dt>
-  <dd>Perform the <a href="#i_extractvalue">extractvalue operation</a> on
-    constants. The index list is interpreted in a similar manner as indices in
-    a '<a href="#i_getelementptr">getelementptr</a>' operation. At least one
-    index value must be specified.</dd>
-
-  <dt><b><tt>insertvalue (VAL, ELT, IDX0, IDX1, ...)</tt></b></dt>
-  <dd>Perform the <a href="#i_insertvalue">insertvalue operation</a> on
-    constants. The index list is interpreted in a similar manner as indices in
-    a '<a href="#i_getelementptr">getelementptr</a>' operation. At least one
-    index value must be specified.</dd>
-
-  <dt><b><tt>OPCODE (LHS, RHS)</tt></b></dt>
-  <dd>Perform the specified operation of the LHS and RHS constants. OPCODE may
-      be any of the <a href="#binaryops">binary</a>
-      or <a href="#bitwiseops">bitwise binary</a> operations.  The constraints
-      on operands are the same as those for the corresponding instruction
-      (e.g. no bitwise operations on floating point values are allowed).</dd>
-</dl>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="othervalues">Other Values</a></h2>
-<!-- *********************************************************************** -->
-<div>
-<!-- ======================================================================= -->
-<h3>
-<a name="inlineasm">Inline Assembler Expressions</a>
-</h3>
-
-<div>
-
-<p>LLVM supports inline assembler expressions (as opposed
-   to <a href="#moduleasm">Module-Level Inline Assembly</a>) through the use of
-   a special value.  This value represents the inline assembler as a string
-   (containing the instructions to emit), a list of operand constraints (stored
-   as a string), a flag that indicates whether or not the inline asm
-   expression has side effects, and a flag indicating whether the function
-   containing the asm needs to align its stack conservatively.  An example
-   inline assembler expression is:</p>
-
-<pre class="doc_code">
-i32 (i32) asm "bswap $0", "=r,r"
-</pre>
-
-<p>Inline assembler expressions may <b>only</b> be used as the callee operand of
-   a <a href="#i_call"><tt>call</tt></a> or an
-   <a href="#i_invoke"><tt>invoke</tt></a> instruction.
-   Thus, typically we have:</p>
-
-<pre class="doc_code">
-%X = call i32 asm "<a href="#int_bswap">bswap</a> $0", "=r,r"(i32 %Y)
-</pre>
-
-<p>Inline asms with side effects not visible in the constraint list must be
-   marked as having side effects.  This is done through the use of the
-   '<tt>sideeffect</tt>' keyword, like so:</p>
-
-<pre class="doc_code">
-call void asm sideeffect "eieio", ""()
-</pre>
-
-<p>In some cases inline asms will contain code that will not work unless the
-   stack is aligned in some way, such as calls or SSE instructions on x86,
-   yet will not contain code that does that alignment within the asm.
-   The compiler should make conservative assumptions about what the asm might
-   contain and should generate its usual stack alignment code in the prologue
-   if the '<tt>alignstack</tt>' keyword is present:</p>
-
-<pre class="doc_code">
-call void asm alignstack "eieio", ""()
-</pre>
-
-<p>Inline asms also support using non-standard assembly dialects.  The assumed
-   dialect is ATT.  When the '<tt>inteldialect</tt>' keyword is present, the
-   inline asm is using the Intel dialect.  Currently, ATT and Intel are the
-   only supported dialects.  An example is:</p>
-
-<pre class="doc_code">
-call void asm inteldialect "eieio", ""()
-</pre>
-
-<p>If multiple keywords appear the '<tt>sideeffect</tt>' keyword must come
-   first, the '<tt>alignstack</tt>' keyword second and the
-   '<tt>inteldialect</tt>' keyword last.</p>
-
-<!--
-<p>TODO: The format of the asm and constraints string still need to be
-   documented here.  Constraints on what can be done (e.g. duplication, moving,
-   etc need to be documented).  This is probably best done by reference to
-   another document that covers inline asm from a holistic perspective.</p>
-  -->
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="inlineasm_md">Inline Asm Metadata</a>
-</h4>
-
-<div>
-
-<p>The call instructions that wrap inline asm nodes may have a
-   "<tt>!srcloc</tt>" MDNode attached to it that contains a list of constant
-   integers.  If present, the code generator will use the integer as the
-   location cookie value when report errors through the <tt>LLVMContext</tt>
-   error reporting mechanisms.  This allows a front-end to correlate backend
-   errors that occur with inline asm back to the source code that produced it.
-   For example:</p>
-
-<pre class="doc_code">
-call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
-...
-!42 = !{ i32 1234567 }
-</pre>
-
-<p>It is up to the front-end to make sense of the magic numbers it places in the
-   IR. If the MDNode contains multiple constants, the code generator will use
-   the one that corresponds to the line of the asm that the error occurs on.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="metadata">Metadata Nodes and Metadata Strings</a>
-</h3>
-
-<div>
-
-<p>LLVM IR allows metadata to be attached to instructions in the program that
-   can convey extra information about the code to the optimizers and code
-   generator.  One example application of metadata is source-level debug
-   information.  There are two metadata primitives: strings and nodes. All
-   metadata has the <tt>metadata</tt> type and is identified in syntax by a
-   preceding exclamation point ('<tt>!</tt>').</p>
-
-<p>A metadata string is a string surrounded by double quotes.  It can contain
-   any character by escaping non-printable characters with "<tt>\xx</tt>" where
-   "<tt>xx</tt>" is the two digit hex code.  For example:
-   "<tt>!"test\00"</tt>".</p>
-
-<p>Metadata nodes are represented with notation similar to structure constants
-   (a comma separated list of elements, surrounded by braces and preceded by an
-   exclamation point). Metadata nodes can have any values as their operand. For
-   example:</p>
-
-<div class="doc_code">
-<pre>
-!{ metadata !"test\00", i32 10}
-</pre>
-</div>
-
-<p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of 
-   metadata nodes, which can be looked up in the module symbol table. For
-   example:</p>
-
-<div class="doc_code">
-<pre>
-!foo =  metadata !{!4, !3}
-</pre>
-</div>
-
-<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
-   function is using two metadata arguments:</p>
-
-<div class="doc_code">
-<pre>
-call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
-</pre>
-</div>
-
-<p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
-   attached to the <tt>add</tt> instruction using the <tt>!dbg</tt>
-   identifier:</p>
-
-<div class="doc_code">
-<pre>
-%indvar.next = add i64 %indvar, 1, !dbg !21
-</pre>
-</div>
-
-<p>More information about specific metadata nodes recognized by the optimizers
-   and code generator is found below.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="tbaa">'<tt>tbaa</tt>' Metadata</a>
-</h4>
-
-<div>
-
-<p>In LLVM IR, memory does not have types, so LLVM's own type system is not
-   suitable for doing TBAA. Instead, metadata is added to the IR to describe
-   a type system of a higher level language. This can be used to implement
-   typical C/C++ TBAA, but it can also be used to implement custom alias
-   analysis behavior for other languages.</p>
-
-<p>The current metadata format is very simple. TBAA metadata nodes have up to
-   three fields, e.g.:</p>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{ metadata !"an example type tree" }
-!1 = metadata !{ metadata !"int", metadata !0 }
-!2 = metadata !{ metadata !"float", metadata !0 }
-!3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
-</pre>
-</div>
-
-<p>The first field is an identity field. It can be any value, usually
-   a metadata string, which uniquely identifies the type. The most important
-   name in the tree is the name of the root node. Two trees with
-   different root node names are entirely disjoint, even if they
-   have leaves with common names.</p>
-
-<p>The second field identifies the type's parent node in the tree, or
-   is null or omitted for a root node. A type is considered to alias
-   all of its descendants and all of its ancestors in the tree. Also,
-   a type is considered to alias all types in other trees, so that
-   bitcode produced from multiple front-ends is handled conservatively.</p>
-
-<p>If the third field is present, it's an integer which if equal to 1
-   indicates that the type is "constant" (meaning
-   <tt>pointsToConstantMemory</tt> should return true; see
-   <a href="AliasAnalysis.html#OtherItfs">other useful
-   <tt>AliasAnalysis</tt> methods</a>).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="tbaa.struct">'<tt>tbaa.struct</tt>' Metadata</a>
-</h4>
-
-<div>
-
-<p>The <a href="#int_memcpy"><tt>llvm.memcpy</tt></a> is often used to implement
-aggregate assignment operations in C and similar languages, however it is
-defined to copy a contiguous region of memory, which is more than strictly
-necessary for aggregate types which contain holes due to padding. Also, it
-doesn't contain any TBAA information about the fields of the aggregate.</p>
-
-<p><tt>!tbaa.struct</tt> metadata can describe which memory subregions in a memcpy
-are padding and what the TBAA tags of the struct are.</p>
-
-<p>The current metadata format is very simple. <tt>!tbaa.struct</tt> metadata nodes
-   are a list of operands which are in conceptual groups of three. For each
-   group of three, the first operand gives the byte offset of a field in bytes,
-   the second gives its size in bytes, and the third gives its
-   tbaa tag. e.g.:</p>
-
-<div class="doc_code">
-<pre>
-!4 = metadata !{ i64 0, i64 4, metadata !1, i64 8, i64 4, metadata !2 }
-</pre>
-</div>
-
-<p>This describes a struct with two fields. The first is at offset 0 bytes
-   with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes
-   and has size 4 bytes and has tbaa tag !2.</p>
-
-<p>Note that the fields need not be contiguous. In this example, there is a
-   4 byte gap between the two fields. This gap represents padding which
-   does not carry useful data and need not be preserved.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="fpmath">'<tt>fpmath</tt>' Metadata</a>
-</h4>
- 
-<div>
-
-<p><tt>fpmath</tt> metadata may be attached to any instruction of floating point
-  type.  It can be used to express the maximum acceptable error in the result of
-  that instruction, in ULPs, thus potentially allowing the compiler to use a
-  more efficient but less accurate method of computing it.  ULP is defined as
-  follows:</p>
-
-<blockquote>
-
-<p>If <tt>x</tt> is a real number that lies between two finite consecutive
-   floating-point numbers <tt>a</tt> and <tt>b</tt>, without being equal to one
-   of them, then <tt>ulp(x) = |b - a|</tt>, otherwise <tt>ulp(x)</tt> is the
-   distance between the two non-equal finite floating-point numbers nearest
-   <tt>x</tt>. Moreover, <tt>ulp(NaN)</tt> is <tt>NaN</tt>.</p>
-
-</blockquote>
-
-<p>The metadata node shall consist of a single positive floating point number
-   representing the maximum relative error, for example:</p>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="range">'<tt>range</tt>' Metadata</a>
-</h4>
-
-<div>
-<p><tt>range</tt> metadata may be attached only to loads of integer types. It
-   expresses the possible ranges the loaded value is in. The ranges are
-   represented with a flattened list of integers. The loaded value is known to
-   be in the union of the ranges defined by each consecutive pair. Each pair
-   has the following properties:</p>
-<ul>
-   <li>The type must match the type loaded by the instruction.</li>
-   <li>The pair <tt>a,b</tt> represents the range <tt>[a,b)</tt>.</li>
-   <li>Both <tt>a</tt> and <tt>b</tt> are constants.</li>
-   <li>The range is allowed to wrap.</li>
-   <li>The range should not represent the full or empty set. That is,
-       <tt>a!=b</tt>. </li>
-</ul>
-<p> In addition, the pairs must be in signed order of the lower bound and
-  they must be non-contiguous.</p>
-
-<p>Examples:</p>
-<div class="doc_code">
-<pre>
-  %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
-  %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
-  %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
-  %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
-...
-!0 = metadata !{ i8 0, i8 2 }
-!1 = metadata !{ i8 255, i8 2 }
-!2 = metadata !{ i8 0, i8 2, i8 3, i8 6 }
-!3 = metadata !{ i8 -2, i8 0, i8 3, i8 6 }
-</pre>
-</div>
-</div>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="module_flags">Module Flags Metadata</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Information about the module as a whole is difficult to convey to LLVM's
-   subsystems. The LLVM IR isn't sufficient to transmit this
-   information. The <tt>llvm.module.flags</tt> named metadata exists in order to
-   facilitate this. These flags are in the form of key / value pairs &mdash;
-   much like a dictionary &mdash; making it easy for any subsystem who cares
-   about a flag to look it up.</p>
-
-<p>The <tt>llvm.module.flags</tt> metadata contains a list of metadata
-   triplets. Each triplet has the following form:</p>
-
-<ul>
-  <li>The first element is a <i>behavior</i> flag, which specifies the behavior
-      when two (or more) modules are merged together, and it encounters two (or
-      more) metadata with the same ID. The supported behaviors are described
-      below.</li>
-
-  <li>The second element is a metadata string that is a unique ID for the
-      metadata. How each ID is interpreted is documented below.</li>
-
-  <li>The third element is the value of the flag.</li>
-</ul>
-
-<p>When two (or more) modules are merged together, the resulting
-   <tt>llvm.module.flags</tt> metadata is the union of the
-   modules' <tt>llvm.module.flags</tt> metadata. The only exception being a flag
-   with the <i>Override</i> behavior, which may override another flag's value
-   (see below).</p>
-
-<p>The following behaviors are supported:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>Value</th>
-      <th>Behavior</th>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td align="left">
-        <dl>
-          <dt><b>Error</b></dt>
-          <dd>Emits an error if two values disagree. It is an error to have an ID
-              with both an Error and a Warning behavior.</dd>
-        </dl>
-      </td>
-    </tr>
-    <tr>
-      <td>2</td>
-      <td align="left">
-        <dl>
-          <dt><b>Warning</b></dt>
-          <dd>Emits a warning if two values disagree.</dd>
-        </dl>
-      </td>
-    </tr>
-    <tr>
-      <td>3</td>
-      <td align="left">
-        <dl>
-          <dt><b>Require</b></dt>
-          <dd>Emits an error when the specified value is not present or doesn't
-              have the specified value. It is an error for two (or more)
-              <tt>llvm.module.flags</tt> with the same ID to have the Require
-              behavior but different values. There may be multiple Require flags
-              per ID.</dd>
-        </dl>
-      </td>
-    </tr>
-    <tr>
-      <td>4</td>
-      <td align="left">
-        <dl>
-          <dt><b>Override</b></dt>
-          <dd>Uses the specified value if the two values disagree. It is an
-              error for two (or more) <tt>llvm.module.flags</tt> with the same
-              ID to have the Override behavior but different values.</dd>
-        </dl>
-      </td>
-    </tr>
-  </tbody>
-</table>
-
-<p>An example of module flags:</p>
-
-<pre class="doc_code">
-!0 = metadata !{ i32 1, metadata !"foo", i32 1 }
-!1 = metadata !{ i32 4, metadata !"bar", i32 37 }
-!2 = metadata !{ i32 2, metadata !"qux", i32 42 }
-!3 = metadata !{ i32 3, metadata !"qux",
-  metadata !{
-    metadata !"foo", i32 1
-  }
-}
-!llvm.module.flags = !{ !0, !1, !2, !3 }
-</pre>
-
-<ul>
-  <li><p>Metadata <tt>!0</tt> has the ID <tt>!"foo"</tt> and the value '1'. The
-         behavior if two or more <tt>!"foo"</tt> flags are seen is to emit an
-         error if their values are not equal.</p></li>
-
-  <li><p>Metadata <tt>!1</tt> has the ID <tt>!"bar"</tt> and the value '37'. The
-         behavior if two or more <tt>!"bar"</tt> flags are seen is to use the
-         value '37' if their values are not equal.</p></li>
-
-  <li><p>Metadata <tt>!2</tt> has the ID <tt>!"qux"</tt> and the value '42'. The
-         behavior if two or more <tt>!"qux"</tt> flags are seen is to emit a
-         warning if their values are not equal.</p></li>
-
-  <li><p>Metadata <tt>!3</tt> has the ID <tt>!"qux"</tt> and the value:</p>
-
-<pre class="doc_code">
-metadata !{ metadata !"foo", i32 1 }
-</pre>
-
-      <p>The behavior is to emit an error if the <tt>llvm.module.flags</tt> does
-         not contain a flag with the ID <tt>!"foo"</tt> that has the value
-         '1'. If two or more <tt>!"qux"</tt> flags exist, then they must have
-         the same value or an error will be issued.</p></li>
-</ul>
-
-
-<!-- ======================================================================= -->
-<h3>
-<a name="objc_gc_flags">Objective-C Garbage Collection Module Flags Metadata</a>
-</h3>
-
-<div>
-
-<p>On the Mach-O platform, Objective-C stores metadata about garbage collection
-   in a special section called "image info". The metadata consists of a version
-   number and a bitmask specifying what types of garbage collection are
-   supported (if any) by the file. If two or more modules are linked together
-   their garbage collection metadata needs to be merged rather than appended
-   together.</p>
-
-<p>The Objective-C garbage collection module flags metadata consists of the
-   following key-value pairs:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <col width="30%">
-  <tbody>
-    <tr>
-      <th>Key</th>
-      <th>Value</th>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Version</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; The Objective-C ABI
-         version. Valid values are 1 and 2.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Image&nbsp;Info&nbsp;Version</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; The version of the image info
-         section. Currently always 0.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Image&nbsp;Info&nbsp;Section</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; The section to place the
-         metadata. Valid values are <tt>"__OBJC, __image_info, regular"</tt> for
-         Objective-C ABI version 1, and <tt>"__DATA,__objc_imageinfo, regular,
-         no_dead_strip"</tt> for Objective-C ABI version 2.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Garbage&nbsp;Collection</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; Specifies whether garbage
-          collection is supported or not. Valid values are 0, for no garbage
-          collection, and 2, for garbage collection supported.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;GC&nbsp;Only</tt></td>
-      <td align="left"><b>[Optional]</b> &mdash; Specifies that only garbage
-         collection is supported. If present, its value must be 6. This flag
-         requires that the <tt>Objective-C Garbage Collection</tt> flag have the
-         value 2.</td>
-    </tr>
-  </tbody>
-</table>
-
-<p>Some important flag interactions:</p>
-
-<ul>
-  <li>If a module with <tt>Objective-C Garbage Collection</tt> set to 0 is
-      merged with a module with <tt>Objective-C Garbage Collection</tt> set to
-      2, then the resulting module has the <tt>Objective-C Garbage
-      Collection</tt> flag set to 0.</li>
-
-  <li>A module with <tt>Objective-C Garbage Collection</tt> set to 0 cannot be
-      merged with a module with <tt>Objective-C GC Only</tt> set to 6.</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="intrinsic_globals">Intrinsic Global Variables</a>
-</h2>
-<!-- *********************************************************************** -->
-<div>
-<p>LLVM has a number of "magic" global variables that contain data that affect
-code generation or other IR semantics.  These are documented here.  All globals
-of this sort should have a section specified as "<tt>llvm.metadata</tt>".  This
-section and all globals that start with "<tt>llvm.</tt>" are reserved for use
-by LLVM.</p>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="intg_used">The '<tt>llvm.used</tt>' Global Variable</a>
-</h3>
-
-<div>
-
-<p>The <tt>@llvm.used</tt> global is an array with i8* element type which has <a
-href="#linkage_appending">appending linkage</a>.  This array contains a list of
-pointers to global variables and functions which may optionally have a pointer
-cast formed of bitcast or getelementptr.  For example, a legal use of it is:</p>
-
-<div class="doc_code">
-<pre>
-@X = global i8 4
-@Y = global i32 123
-
-@llvm.used = appending global [2 x i8*] [
-   i8* @X,
-   i8* bitcast (i32* @Y to i8*)
-], section "llvm.metadata"
-</pre>
-</div>
-
-<p>If a global variable appears in the <tt>@llvm.used</tt> list, then the
-   compiler, assembler, and linker are required to treat the symbol as if there
-   is a reference to the global that it cannot see.  For example, if a variable
-   has internal linkage and no references other than that from
-   the <tt>@llvm.used</tt> list, it cannot be deleted.  This is commonly used to
-   represent references from inline asms and other things the compiler cannot
-   "see", and corresponds to "<tt>attribute((used))</tt>" in GNU C.</p>
-
-<p>On some targets, the code generator must emit a directive to the assembler or
-   object file to prevent the assembler and linker from molesting the
-   symbol.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="intg_compiler_used">
-    The '<tt>llvm.compiler.used</tt>' Global Variable
-  </a>
-</h3>
-
-<div>
-
-<p>The <tt>@llvm.compiler.used</tt> directive is the same as the
-   <tt>@llvm.used</tt> directive, except that it only prevents the compiler from
-   touching the symbol.  On targets that support it, this allows an intelligent
-   linker to optimize references to the symbol without being impeded as it would
-   be by <tt>@llvm.used</tt>.</p>
-
-<p>This is a rare construct that should only be used in rare circumstances, and
-   should not be exposed to source languages.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="intg_global_ctors">The '<tt>llvm.global_ctors</tt>' Global Variable</a>
-</h3>
-
-<div>
-
-<div class="doc_code">
-<pre>
-%0 = type { i32, void ()* }
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
-</pre>
-</div>
-
-<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor
-   functions and associated priorities.  The functions referenced by this array
-   will be called in ascending order of priority (i.e. lowest first) when the
-   module is loaded.  The order of functions with the same priority is not
-   defined.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="intg_global_dtors">The '<tt>llvm.global_dtors</tt>' Global Variable</a>
-</h3>
-
-<div>
-
-<div class="doc_code">
-<pre>
-%0 = type { i32, void ()* }
-@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
-</pre>
-</div>
-
-<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions
-   and associated priorities.  The functions referenced by this array will be
-   called in descending order of priority (i.e. highest first) when the module
-   is loaded.  The order of functions with the same priority is not defined.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="instref">Instruction Reference</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM instruction set consists of several different classifications of
-   instructions: <a href="#terminators">terminator
-   instructions</a>, <a href="#binaryops">binary instructions</a>,
-   <a href="#bitwiseops">bitwise binary instructions</a>,
-   <a href="#memoryops">memory instructions</a>, and
-   <a href="#otherops">other instructions</a>.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="terminators">Terminator Instructions</a>
-</h3>
-
-<div>
-
-<p>As mentioned <a href="#functionstructure">previously</a>, every basic block
-   in a program ends with a "Terminator" instruction, which indicates which
-   block should be executed after the current block is finished. These
-   terminator instructions typically yield a '<tt>void</tt>' value: they produce
-   control flow, not values (the one exception being the
-   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
-
-<p>The terminator instructions are: 
-   '<a href="#i_ret"><tt>ret</tt></a>', 
-   '<a href="#i_br"><tt>br</tt></a>',
-   '<a href="#i_switch"><tt>switch</tt></a>', 
-   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>',
-   '<a href="#i_invoke"><tt>invoke</tt></a>', 
-   '<a href="#i_resume"><tt>resume</tt></a>', and 
-   '<a href="#i_unreachable"><tt>unreachable</tt></a>'.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_ret">'<tt>ret</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  ret &lt;type&gt; &lt;value&gt;       <i>; Return a value from a non-void function</i>
-  ret void                 <i>; Return from void function</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>ret</tt>' instruction is used to return control flow (and optionally
-   a value) from a function back to the caller.</p>
-
-<p>There are two forms of the '<tt>ret</tt>' instruction: one that returns a
-   value and then causes control flow, and one that just causes control flow to
-   occur.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>ret</tt>' instruction optionally accepts a single argument, the
-   return value. The type of the return value must be a
-   '<a href="#t_firstclass">first class</a>' type.</p>
-
-<p>A function is not <a href="#wellformed">well formed</a> if it it has a
-   non-void return type and contains a '<tt>ret</tt>' instruction with no return
-   value or a return value with a type that does not match its type, or if it
-   has a void return type and contains a '<tt>ret</tt>' instruction with a
-   return value.</p>
-
-<h5>Semantics:</h5>
-<p>When the '<tt>ret</tt>' instruction is executed, control flow returns back to
-   the calling function's context.  If the caller is a
-   "<a href="#i_call"><tt>call</tt></a>" instruction, execution continues at the
-   instruction after the call.  If the caller was an
-   "<a href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues at
-   the beginning of the "normal" destination block.  If the instruction returns
-   a value, that value shall set the call or invoke instruction's return
-   value.</p>
-
-<h5>Example:</h5>
-<pre>
-  ret i32 5                       <i>; Return an integer value of 5</i>
-  ret void                        <i>; Return from a void function</i>
-  ret { i32, i8 } { i32 4, i8 2 } <i>; Return a struct of values 4 and 2</i>
-</pre>
-
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_br">'<tt>br</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;
-  br label &lt;dest&gt;          <i>; Unconditional branch</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>br</tt>' instruction is used to cause control flow to transfer to a
-   different basic block in the current function.  There are two forms of this
-   instruction, corresponding to a conditional branch and an unconditional
-   branch.</p>
-
-<h5>Arguments:</h5>
-<p>The conditional branch form of the '<tt>br</tt>' instruction takes a single
-   '<tt>i1</tt>' value and two '<tt>label</tt>' values.  The unconditional form
-   of the '<tt>br</tt>' instruction takes a single '<tt>label</tt>' value as a
-   target.</p>
-
-<h5>Semantics:</h5>
-<p>Upon execution of a conditional '<tt>br</tt>' instruction, the '<tt>i1</tt>'
-   argument is evaluated.  If the value is <tt>true</tt>, control flows to the
-   '<tt>iftrue</tt>' <tt>label</tt> argument.  If "cond" is <tt>false</tt>,
-   control flows to the '<tt>iffalse</tt>' <tt>label</tt> argument.</p>
-
-<h5>Example:</h5>
-<pre>
-Test:
-  %cond = <a href="#i_icmp">icmp</a> eq i32 %a, %b
-  br i1 %cond, label %IfEqual, label %IfUnequal
-IfEqual:
-  <a href="#i_ret">ret</a> i32 1
-IfUnequal:
-  <a href="#i_ret">ret</a> i32 0
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_switch">'<tt>switch</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  switch &lt;intty&gt; &lt;value&gt;, label &lt;defaultdest&gt; [ &lt;intty&gt; &lt;val&gt;, label &lt;dest&gt; ... ]
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>switch</tt>' instruction is used to transfer control flow to one of
-   several different places.  It is a generalization of the '<tt>br</tt>'
-   instruction, allowing a branch to occur to one of many possible
-   destinations.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>switch</tt>' instruction uses three parameters: an integer
-   comparison value '<tt>value</tt>', a default '<tt>label</tt>' destination,
-   and an array of pairs of comparison value constants and '<tt>label</tt>'s.
-   The table is not allowed to contain duplicate constant entries.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>switch</tt> instruction specifies a table of values and
-   destinations. When the '<tt>switch</tt>' instruction is executed, this table
-   is searched for the given value.  If the value is found, control flow is
-   transferred to the corresponding destination; otherwise, control flow is
-   transferred to the default destination.</p>
-
-<h5>Implementation:</h5>
-<p>Depending on properties of the target machine and the particular
-   <tt>switch</tt> instruction, this instruction may be code generated in
-   different ways.  For example, it could be generated as a series of chained
-   conditional branches or with a lookup table.</p>
-
-<h5>Example:</h5>
-<pre>
- <i>; Emulate a conditional br instruction</i>
- %Val = <a href="#i_zext">zext</a> i1 %value to i32
- switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
-
- <i>; Emulate an unconditional br instruction</i>
- switch i32 0, label %dest [ ]
-
- <i>; Implement a jump table:</i>
- switch i32 %val, label %otherwise [ i32 0, label %onzero
-                                     i32 1, label %onone
-                                     i32 2, label %ontwo ]
-</pre>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_indirectbr">'<tt>indirectbr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  indirectbr &lt;somety&gt;* &lt;address&gt;, [ label &lt;dest1&gt;, label &lt;dest2&gt;, ... ]
-</pre>
-
-<h5>Overview:</h5>
-
-<p>The '<tt>indirectbr</tt>' instruction implements an indirect branch to a label
-   within the current function, whose address is specified by
-   "<tt>address</tt>".  Address must be derived from a <a
-   href="#blockaddress">blockaddress</a> constant.</p>
-
-<h5>Arguments:</h5>
-
-<p>The '<tt>address</tt>' argument is the address of the label to jump to.  The
-   rest of the arguments indicate the full set of possible destinations that the
-   address may point to.  Blocks are allowed to occur multiple times in the
-   destination list, though this isn't particularly useful.</p>
-
-<p>This destination list is required so that dataflow analysis has an accurate
-   understanding of the CFG.</p>
-
-<h5>Semantics:</h5>
-
-<p>Control transfers to the block specified in the address argument.  All
-   possible destination blocks must be listed in the label list, otherwise this
-   instruction has undefined behavior.  This implies that jumps to labels
-   defined in other functions have undefined behavior as well.</p>
-
-<h5>Implementation:</h5>
-
-<p>This is typically implemented with a jump through a register.</p>
-
-<h5>Example:</h5>
-<pre>
- indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
-</pre>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_invoke">'<tt>invoke</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ptr to function ty&gt; &lt;function ptr val&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
-                to label &lt;normal label&gt; unwind label &lt;exception label&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>invoke</tt>' instruction causes control to transfer to a specified
-   function, with the possibility of control flow transfer to either the
-   '<tt>normal</tt>' label or the '<tt>exception</tt>' label.  If the callee
-   function returns with the "<tt><a href="#i_ret">ret</a></tt>" instruction,
-   control flow will return to the "normal" label.  If the callee (or any
-   indirect callees) returns via the "<a href="#i_resume"><tt>resume</tt></a>"
-   instruction or other exception handling mechanism, control is interrupted and
-   continued at the dynamically nearest "exception" label.</p>
-
-<p>The '<tt>exception</tt>' label is a
-   <i><a href="ExceptionHandling.html#overview">landing pad</a></i> for the
-   exception. As such, '<tt>exception</tt>' label is required to have the
-   "<a href="#i_landingpad"><tt>landingpad</tt></a>" instruction, which contains
-   the information about the behavior of the program after unwinding
-   happens, as its first non-PHI instruction. The restrictions on the
-   "<tt>landingpad</tt>" instruction's tightly couples it to the
-   "<tt>invoke</tt>" instruction, so that the important information contained
-   within the "<tt>landingpad</tt>" instruction can't be lost through normal
-   code motion.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction requires several arguments:</p>
-
-<ol>
-  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
-      convention</a> the call should use.  If none is specified, the call
-      defaults to using C calling conventions.</li>
-
-  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
-      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
-      '<tt>inreg</tt>' attributes are valid here.</li>
-
-  <li>'<tt>ptr to function ty</tt>': shall be the signature of the pointer to
-      function value being invoked.  In most cases, this is a direct function
-      invocation, but indirect <tt>invoke</tt>s are just as possible, branching
-      off an arbitrary pointer to function value.</li>
-
-  <li>'<tt>function ptr val</tt>': An LLVM value containing a pointer to a
-      function to be invoked. </li>
-
-  <li>'<tt>function args</tt>': argument list whose types match the function
-      signature argument types and parameter attributes. All arguments must be
-      of <a href="#t_firstclass">first class</a> type. If the function
-      signature indicates the function accepts a variable number of arguments,
-      the extra arguments can be specified.</li>
-
-  <li>'<tt>normal label</tt>': the label reached when the called function
-      executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
-
-  <li>'<tt>exception label</tt>': the label reached when a callee returns via
-      the <a href="#i_resume"><tt>resume</tt></a> instruction or other exception
-      handling mechanism.</li>
-
-  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
-      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
-      '<tt>readnone</tt>' attributes are valid here.</li>
-</ol>
-
-<h5>Semantics:</h5>
-<p>This instruction is designed to operate as a standard
-   '<tt><a href="#i_call">call</a></tt>' instruction in most regards.  The
-   primary difference is that it establishes an association with a label, which
-   is used by the runtime library to unwind the stack.</p>
-
-<p>This instruction is used in languages with destructors to ensure that proper
-   cleanup is performed in the case of either a <tt>longjmp</tt> or a thrown
-   exception.  Additionally, this is important for implementation of
-   '<tt>catch</tt>' clauses in high-level languages that support them.</p>
-
-<p>For the purposes of the SSA form, the definition of the value returned by the
-   '<tt>invoke</tt>' instruction is deemed to occur on the edge from the current
-   block to the "normal" label. If the callee unwinds then no return value is
-   available.</p>
-
-<h5>Example:</h5>
-<pre>
-  %retval = invoke i32 @Test(i32 15) to label %Continue
-              unwind label %TestCleanup              <i>; {i32}:retval set</i>
-  %retval = invoke <a href="#callingconv">coldcc</a> i32 %Testfnptr(i32 15) to label %Continue
-              unwind label %TestCleanup              <i>; {i32}:retval set</i>
-</pre>
-
-</div>
-
- <!-- _______________________________________________________________________ -->
- 
-<h4>
-  <a name="i_resume">'<tt>resume</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  resume &lt;type&gt; &lt;value&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>resume</tt>' instruction is a terminator instruction that has no
-   successors.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>resume</tt>' instruction requires one argument, which must have the
-   same type as the result of any '<tt>landingpad</tt>' instruction in the same
-   function.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>resume</tt>' instruction resumes propagation of an existing
-   (in-flight) exception whose unwinding was interrupted with
-   a <a href="#i_landingpad"><tt>landingpad</tt></a> instruction.</p>
-
-<h5>Example:</h5>
-<pre>
-  resume { i8*, i32 } %exn
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="i_unreachable">'<tt>unreachable</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  unreachable
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>unreachable</tt>' instruction has no defined semantics.  This
-   instruction is used to inform the optimizer that a particular portion of the
-   code is not reachable.  This can be used to indicate that the code after a
-   no-return function cannot be reached, and other facts.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>unreachable</tt>' instruction has no defined semantics.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="binaryops">Binary Operations</a>
-</h3>
-
-<div>
-
-<p>Binary operators are used to do most of the computation in a program.  They
-   require two operands of the same type, execute an operation on them, and
-   produce a single value.  The operands might represent multiple data, as is
-   the case with the <a href="#t_vector">vector</a> data type.  The result value
-   has the same type as its operands.</p>
-
-<p>There are several different binary operators:</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_add">'<tt>add</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = add &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
-  &lt;result&gt; = add nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = add nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = add nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>add</tt>' instruction returns the sum of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>add</tt>' instruction must
-   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-   integer values. Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the integer sum of the two operands.</p>
-
-<p>If the sum has unsigned overflow, the result returned is the mathematical
-   result modulo 2<sup>n</sup>, where n is the bit width of the result.</p>
-
-<p>Because LLVM integers use a two's complement representation, this instruction
-   is appropriate for both signed and unsigned integers.</p>
-
-<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
-   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
-   <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
-   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
-   respectively, occurs.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = add i32 4, %var          <i>; yields {i32}:result = 4 + %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fadd &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fadd</tt>' instruction returns the sum of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fadd</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values. Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point sum of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fadd float 4.0, %var          <i>; yields {float}:result = 4.0 + %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_sub">'<tt>sub</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
-  &lt;result&gt; = sub nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = sub nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = sub nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sub</tt>' instruction returns the difference of its two
-   operands.</p>
-
-<p>Note that the '<tt>sub</tt>' instruction is used to represent the
-   '<tt>neg</tt>' instruction present in most other intermediate
-   representations.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>sub</tt>' instruction must
-   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-   integer values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the integer difference of the two operands.</p>
-
-<p>If the difference has unsigned overflow, the result returned is the
-   mathematical result modulo 2<sup>n</sup>, where n is the bit width of the
-   result.</p>
-
-<p>Because LLVM integers use a two's complement representation, this instruction
-   is appropriate for both signed and unsigned integers.</p>
-
-<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
-   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
-   <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
-   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
-   respectively, occurs.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = sub i32 4, %var          <i>; yields {i32}:result = 4 - %var</i>
-  &lt;result&gt; = sub i32 0, %val          <i>; yields {i32}:result = -%var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fsub">'<tt>fsub</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fsub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fsub</tt>' instruction returns the difference of its two
-   operands.</p>
-
-<p>Note that the '<tt>fsub</tt>' instruction is used to represent the
-   '<tt>fneg</tt>' instruction present in most other intermediate
-   representations.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fsub</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point difference of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fsub float 4.0, %var           <i>; yields {float}:result = 4.0 - %var</i>
-  &lt;result&gt; = fsub float -0.0, %val          <i>; yields {float}:result = -%var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_mul">'<tt>mul</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = mul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
-  &lt;result&gt; = mul nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = mul nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = mul nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>mul</tt>' instruction returns the product of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>mul</tt>' instruction must
-   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-   integer values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the integer product of the two operands.</p>
-
-<p>If the result of the multiplication has unsigned overflow, the result
-   returned is the mathematical result modulo 2<sup>n</sup>, where n is the bit
-   width of the result.</p>
-
-<p>Because LLVM integers use a two's complement representation, and the result
-   is the same width as the operands, this instruction returns the correct
-   result for both signed and unsigned integers.  If a full product
-   (e.g. <tt>i32</tt>x<tt>i32</tt>-><tt>i64</tt>) is needed, the operands should
-   be sign-extended or zero-extended as appropriate to the width of the full
-   product.</p>
-
-<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
-   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
-   <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
-   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
-   respectively, occurs.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = mul i32 4, %var          <i>; yields {i32}:result = 4 * %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fmul">'<tt>fmul</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fmul</tt>' instruction returns the product of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fmul</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point product of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_udiv">'<tt>udiv</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = udiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>udiv</tt>' instruction returns the quotient of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>udiv</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the unsigned integer quotient of the two operands.</p>
-
-<p>Note that unsigned integer division and signed integer division are distinct
-   operations; for signed integer division, use '<tt>sdiv</tt>'.</p>
-
-<p>Division by zero leads to undefined behavior.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>udiv</tt> is a <a href="#poisonvalues">poison value</a> if %op1 is not a
-  multiple of %op2 (as such, "((a udiv exact b) mul b) == a").</p>
-
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = udiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_sdiv">'<tt>sdiv</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = sdiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sdiv</tt>' instruction returns the quotient of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>sdiv</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the signed integer quotient of the two operands rounded
-   towards zero.</p>
-
-<p>Note that signed integer division and unsigned integer division are distinct
-   operations; for unsigned integer division, use '<tt>udiv</tt>'.</p>
-
-<p>Division by zero leads to undefined behavior. Overflow also leads to
-   undefined behavior; this is a rare case, but can occur, for example, by doing
-   a 32-bit division of -2147483648 by -1.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>sdiv</tt> is a <a href="#poisonvalues">poison value</a> if the result would
-   be rounded.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = sdiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fdiv">'<tt>fdiv</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fdiv</tt>' instruction returns the quotient of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fdiv</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point quotient of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fdiv float 4.0, %var          <i>; yields {float}:result = 4.0 / %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_urem">'<tt>urem</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = urem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>urem</tt>' instruction returns the remainder from the unsigned
-   division of its two arguments.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>urem</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction returns the unsigned integer <i>remainder</i> of a division.
-   This instruction always performs an unsigned division to get the
-   remainder.</p>
-
-<p>Note that unsigned integer remainder and signed integer remainder are
-   distinct operations; for signed integer remainder, use '<tt>srem</tt>'.</p>
-
-<p>Taking the remainder of a division by zero leads to undefined behavior.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = urem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_srem">'<tt>srem</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = srem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>srem</tt>' instruction returns the remainder from the signed
-   division of its two operands. This instruction can also take
-   <a href="#t_vector">vector</a> versions of the values in which case the
-   elements must be integers.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>srem</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction returns the <i>remainder</i> of a division (where the result
-   is either zero or has the same sign as the dividend, <tt>op1</tt>), not the
-   <i>modulo</i> operator (where the result is either zero or has the same sign
-   as the divisor, <tt>op2</tt>) of a value.
-   For more information about the difference,
-   see <a href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
-   Math Forum</a>. For a table of how this is implemented in various languages,
-   please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
-   Wikipedia: modulo operation</a>.</p>
-
-<p>Note that signed integer remainder and unsigned integer remainder are
-   distinct operations; for unsigned integer remainder, use '<tt>urem</tt>'.</p>
-
-<p>Taking the remainder of a division by zero leads to undefined behavior.
-   Overflow also leads to undefined behavior; this is a rare case, but can
-   occur, for example, by taking the remainder of a 32-bit division of
-   -2147483648 by -1.  (The remainder doesn't actually overflow, but this rule
-   lets srem be implemented using instructions that return both the result of
-   the division and the remainder.)</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = srem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_frem">'<tt>frem</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = frem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>frem</tt>' instruction returns the remainder from the division of
-   its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>frem</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction returns the <i>remainder</i> of a division.  The remainder
-   has the same sign as the dividend.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = frem float 4.0, %var          <i>; yields {float}:result = 4.0 % %var</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="bitwiseops">Bitwise Binary Operations</a>
-</h3>
-
-<div>
-
-<p>Bitwise binary operators are used to do various forms of bit-twiddling in a
-   program.  They are generally very efficient instructions and can commonly be
-   strength reduced from other instructions.  They require two operands of the
-   same type, execute an operation on them, and produce a single value.  The
-   resulting value is the same type as its operands.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_shl">'<tt>shl</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;           <i>; yields {ty}:result</i>
-  &lt;result&gt; = shl nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
-  &lt;result&gt; = shl nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
-  &lt;result&gt; = shl nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>shl</tt>' instruction returns the first operand shifted to the left
-   a specified number of bits.</p>
-
-<h5>Arguments:</h5>
-<p>Both arguments to the '<tt>shl</tt>' instruction must be the
-    same <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-    integer type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod
-   2<sup>n</sup>, where <tt>n</tt> is the width of the result.  If <tt>op2</tt>
-   is (statically or dynamically) negative or equal to or larger than the number
-   of bits in <tt>op1</tt>, the result is undefined.  If the arguments are
-   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
-   shift amount in <tt>op2</tt>.</p>
-
-<p>If the <tt>nuw</tt> keyword is present, then the shift produces a 
-   <a href="#poisonvalues">poison value</a> if it shifts out any non-zero bits.  If
-   the <tt>nsw</tt> keyword is present, then the shift produces a
-   <a href="#poisonvalues">poison value</a> if it shifts out any bits that disagree
-   with the resultant sign bit.  As such, NUW/NSW have the same semantics as
-   they would if the shift were expressed as a mul instruction with the same
-   nsw/nuw bits in (mul %op1, (shl 1, %op2)).</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = shl i32 4, %var   <i>; yields {i32}: 4 &lt;&lt; %var</i>
-  &lt;result&gt; = shl i32 4, 2      <i>; yields {i32}: 16</i>
-  &lt;result&gt; = shl i32 1, 10     <i>; yields {i32}: 1024</i>
-  &lt;result&gt; = shl i32 1, 32     <i>; undefined</i>
-  &lt;result&gt; = shl &lt;2 x i32&gt; &lt; i32 1, i32 1&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 2, i32 4&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_lshr">'<tt>lshr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = lshr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>lshr</tt>' instruction (logical shift right) returns the first
-   operand shifted to the right a specified number of bits with zero fill.</p>
-
-<h5>Arguments:</h5>
-<p>Both arguments to the '<tt>lshr</tt>' instruction must be the same
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   type. '<tt>op2</tt>' is treated as an unsigned value.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction always performs a logical shift right operation. The most
-   significant bits of the result will be filled with zero bits after the shift.
-   If <tt>op2</tt> is (statically or dynamically) equal to or larger than the
-   number of bits in <tt>op1</tt>, the result is undefined. If the arguments are
-   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
-   shift amount in <tt>op2</tt>.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>lshr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
-   shifted out are non-zero.</p>
-
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = lshr i32 4, 1   <i>; yields {i32}:result = 2</i>
-  &lt;result&gt; = lshr i32 4, 2   <i>; yields {i32}:result = 1</i>
-  &lt;result&gt; = lshr i8  4, 3   <i>; yields {i8}:result = 0</i>
-  &lt;result&gt; = lshr i8 -2, 1   <i>; yields {i8}:result = 0x7FFFFFFF </i>
-  &lt;result&gt; = lshr i32 1, 32  <i>; undefined</i>
-  &lt;result&gt; = lshr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0x7FFFFFFF, i32 1&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_ashr">'<tt>ashr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = ashr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>ashr</tt>' instruction (arithmetic shift right) returns the first
-   operand shifted to the right a specified number of bits with sign
-   extension.</p>
-
-<h5>Arguments:</h5>
-<p>Both arguments to the '<tt>ashr</tt>' instruction must be the same
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction always performs an arithmetic shift right operation, The
-   most significant bits of the result will be filled with the sign bit
-   of <tt>op1</tt>.  If <tt>op2</tt> is (statically or dynamically) equal to or
-   larger than the number of bits in <tt>op1</tt>, the result is undefined. If
-   the arguments are vectors, each vector element of <tt>op1</tt> is shifted by
-   the corresponding shift amount in <tt>op2</tt>.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>ashr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
-   shifted out are non-zero.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = ashr i32 4, 1   <i>; yields {i32}:result = 2</i>
-  &lt;result&gt; = ashr i32 4, 2   <i>; yields {i32}:result = 1</i>
-  &lt;result&gt; = ashr i8  4, 3   <i>; yields {i8}:result = 0</i>
-  &lt;result&gt; = ashr i8 -2, 1   <i>; yields {i8}:result = -1</i>
-  &lt;result&gt; = ashr i32 1, 32  <i>; undefined</i>
-  &lt;result&gt; = ashr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 3&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 -1, i32 0&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_and">'<tt>and</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = and &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>and</tt>' instruction returns the bitwise logical and of its two
-   operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>and</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The truth table used for the '<tt>and</tt>' instruction is:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>In0</th>
-      <th>In1</th>
-      <th>Out</th>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>1</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-  </tbody>
-</table>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = and i32 4, %var         <i>; yields {i32}:result = 4 &amp; %var</i>
-  &lt;result&gt; = and i32 15, 40          <i>; yields {i32}:result = 8</i>
-  &lt;result&gt; = and i32 4, 8            <i>; yields {i32}:result = 0</i>
-</pre>
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_or">'<tt>or</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = or &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>or</tt>' instruction returns the bitwise logical inclusive or of its
-   two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>or</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The truth table used for the '<tt>or</tt>' instruction is:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>In0</th>
-      <th>In1</th>
-      <th>Out</th>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>0</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-  </tbody>
-</table>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = or i32 4, %var         <i>; yields {i32}:result = 4 | %var</i>
-  &lt;result&gt; = or i32 15, 40          <i>; yields {i32}:result = 47</i>
-  &lt;result&gt; = or i32 4, 8            <i>; yields {i32}:result = 12</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_xor">'<tt>xor</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = xor &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>xor</tt>' instruction returns the bitwise logical exclusive or of
-   its two operands.  The <tt>xor</tt> is used to implement the "one's
-   complement" operation, which is the "~" operator in C.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>xor</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The truth table used for the '<tt>xor</tt>' instruction is:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>In0</th>
-      <th>In1</th>
-      <th>Out</th>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>0</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>1</td>
-      <td>0</td>
-    </tr>
-  </tbody>
-</table>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = xor i32 4, %var         <i>; yields {i32}:result = 4 ^ %var</i>
-  &lt;result&gt; = xor i32 15, 40          <i>; yields {i32}:result = 39</i>
-  &lt;result&gt; = xor i32 4, 8            <i>; yields {i32}:result = 12</i>
-  &lt;result&gt; = xor i32 %V, -1          <i>; yields {i32}:result = ~%V</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="vectorops">Vector Operations</a>
-</h3>
-
-<div>
-
-<p>LLVM supports several instructions to represent vector operations in a
-   target-independent manner.  These instructions cover the element-access and
-   vector-specific operations needed to process vectors effectively.  While LLVM
-   does directly support these vector operations, many sophisticated algorithms
-   will want to use target-specific intrinsics to take full advantage of a
-   specific target.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_extractelement">'<tt>extractelement</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = extractelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, i32 &lt;idx&gt;    <i>; yields &lt;ty&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>extractelement</tt>' instruction extracts a single scalar element
-   from a vector at a specified index.</p>
-
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>extractelement</tt>' instruction is a value
-   of <a href="#t_vector">vector</a> type.  The second operand is an index
-   indicating the position from which to extract the element.  The index may be
-   a variable.</p>
-
-<h5>Semantics:</h5>
-<p>The result is a scalar of the same type as the element type of
-   <tt>val</tt>.  Its value is the value at position <tt>idx</tt> of
-   <tt>val</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
-   results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = extractelement &lt;4 x i32&gt; %vec, i32 0    <i>; yields i32</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_insertelement">'<tt>insertelement</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = insertelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, i32 &lt;idx&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>insertelement</tt>' instruction inserts a scalar element into a
-   vector at a specified index.</p>
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>insertelement</tt>' instruction is a value
-   of <a href="#t_vector">vector</a> type.  The second operand is a scalar value
-   whose type must equal the element type of the first operand.  The third
-   operand is an index indicating the position at which to insert the value.
-   The index may be a variable.</p>
-
-<h5>Semantics:</h5>
-<p>The result is a vector of the same type as <tt>val</tt>.  Its element values
-   are those of <tt>val</tt> except at position <tt>idx</tt>, where it gets the
-   value <tt>elt</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
-   results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0    <i>; yields &lt;4 x i32&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_shufflevector">'<tt>shufflevector</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;m x i32&gt; &lt;mask&gt;    <i>; yields &lt;m x &lt;ty&gt;&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
-   from two input vectors, returning a vector with the same element type as the
-   input and length that is the same as the shuffle mask.</p>
-
-<h5>Arguments:</h5>
-<p>The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
-   with the same type.  The third argument is a shuffle mask whose
-   element type is always 'i32'.  The result of the instruction is a vector
-   whose length is the same as the shuffle mask and whose element type is the
-   same as the element type of the first two operands.</p>
-
-<p>The shuffle mask operand is required to be a constant vector with either
-   constant integer or undef values.</p>
-
-<h5>Semantics:</h5>
-<p>The elements of the two input vectors are numbered from left to right across
-   both of the vectors.  The shuffle mask operand specifies, for each element of
-   the result vector, which element of the two input vectors the result element
-   gets.  The element selector may be undef (meaning "don't care") and the
-   second operand may be undef if performing a shuffle from only one vector.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
-                          &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef,
-                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
-  &lt;result&gt; = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef,
-                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
-                          &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="aggregateops">Aggregate Operations</a>
-</h3>
-
-<div>
-
-<p>LLVM supports several instructions for working with
-  <a href="#t_aggregate">aggregate</a> values.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_extractvalue">'<tt>extractvalue</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = extractvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;idx&gt;{, &lt;idx&gt;}*
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>extractvalue</tt>' instruction extracts the value of a member field
-   from an <a href="#t_aggregate">aggregate</a> value.</p>
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>extractvalue</tt>' instruction is a value
-   of <a href="#t_struct">struct</a> or
-   <a href="#t_array">array</a> type.  The operands are constant indices to
-   specify which value to extract in a similar manner as indices in a
-   '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
-   <p>The major differences to <tt>getelementptr</tt> indexing are:</p>
-     <ul>
-       <li>Since the value being indexed is not a pointer, the first index is
-           omitted and assumed to be zero.</li>
-       <li>At least one index must be specified.</li>
-       <li>Not only struct indices but also array indices must be in
-           bounds.</li>
-     </ul>
-
-<h5>Semantics:</h5>
-<p>The result is the value at the position in the aggregate specified by the
-   index operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = extractvalue {i32, float} %agg, 0    <i>; yields i32</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_insertvalue">'<tt>insertvalue</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, &lt;idx&gt;{, &lt;idx&gt;}*    <i>; yields &lt;aggregate type&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>insertvalue</tt>' instruction inserts a value into a member field
-   in an <a href="#t_aggregate">aggregate</a> value.</p>
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>insertvalue</tt>' instruction is a value
-   of <a href="#t_struct">struct</a> or
-   <a href="#t_array">array</a> type.  The second operand is a first-class
-   value to insert.  The following operands are constant indices indicating
-   the position at which to insert the value in a similar manner as indices in a
-   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' instruction.  The
-   value to insert must have the same type as the value identified by the
-   indices.</p>
-
-<h5>Semantics:</h5>
-<p>The result is an aggregate of the same type as <tt>val</tt>.  Its value is
-   that of <tt>val</tt> except that the value at the position specified by the
-   indices is that of <tt>elt</tt>.</p>
-
-<h5>Example:</h5>
-<pre>
-  %agg1 = insertvalue {i32, float} undef, i32 1, 0              <i>; yields {i32 1, float undef}</i>
-  %agg2 = insertvalue {i32, float} %agg1, float %val, 1         <i>; yields {i32 1, float %val}</i>
-  %agg3 = insertvalue {i32, {float}} %agg1, float %val, 1, 0    <i>; yields {i32 1, float %val}</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="memoryops">Memory Access and Addressing Operations</a>
-</h3>
-
-<div>
-
-<p>A key design point of an SSA-based representation is how it represents
-   memory.  In LLVM, no memory locations are in SSA form, which makes things
-   very simple.  This section describes how to read, write, and allocate
-   memory in LLVM.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_alloca">'<tt>alloca</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = alloca &lt;type&gt;[, &lt;ty&gt; &lt;NumElements&gt;][, align &lt;alignment&gt;]     <i>; yields {type*}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>alloca</tt>' instruction allocates memory on the stack frame of the
-   currently executing function, to be automatically released when this function
-   returns to its caller. The object is always allocated in the generic address
-   space (address space zero).</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>alloca</tt>' instruction
-   allocates <tt>sizeof(&lt;type&gt;)*NumElements</tt> bytes of memory on the
-   runtime stack, returning a pointer of the appropriate type to the program.
-   If "NumElements" is specified, it is the number of elements allocated,
-   otherwise "NumElements" is defaulted to be one.  If a constant alignment is
-   specified, the value result of the allocation is guaranteed to be aligned to
-   at least that boundary.  If not specified, or if zero, the target can choose
-   to align the allocation on any convenient boundary compatible with the
-   type.</p>
-
-<p>'<tt>type</tt>' may be any sized type.</p>
-
-<h5>Semantics:</h5>
-<p>Memory is allocated; a pointer is returned.  The operation is undefined if
-   there is insufficient stack space for the allocation.  '<tt>alloca</tt>'d
-   memory is automatically released when the function returns.  The
-   '<tt>alloca</tt>' instruction is commonly used to represent automatic
-   variables that must have an address available.  When the function returns
-   (either with the <tt><a href="#i_ret">ret</a></tt>
-   or <tt><a href="#i_resume">resume</a></tt> instructions), the memory is
-   reclaimed.  Allocating zero bytes is legal, but the result is undefined.
-   The order in which memory is allocated (ie., which way the stack grows) is
-   not specified.</p>
-
-<p>
-
-<h5>Example:</h5>
-<pre>
-  %ptr = alloca i32                             <i>; yields {i32*}:ptr</i>
-  %ptr = alloca i32, i32 4                      <i>; yields {i32*}:ptr</i>
-  %ptr = alloca i32, i32 4, align 1024          <i>; yields {i32*}:ptr</i>
-  %ptr = alloca i32, align 1024                 <i>; yields {i32*}:ptr</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_load">'<tt>load</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = load [volatile] &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;][, !invariant.load !&lt;index&gt;]
-  &lt;result&gt; = load atomic [volatile] &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;
-  !&lt;index&gt; = !{ i32 1 }
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>load</tt>' instruction is used to read from memory.</p>
-
-<h5>Arguments:</h5>
-<p>The argument to the '<tt>load</tt>' instruction specifies the memory address
-   from which to load.  The pointer must point to
-   a <a href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
-   marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
-   number or order of execution of this <tt>load</tt> with other <a
-   href="#volatile">volatile operations</a>.</p>
-
-<p>If the <code>load</code> is marked as <code>atomic</code>, it takes an extra
-   <a href="#ordering">ordering</a> and optional <code>singlethread</code>
-   argument.  The <code>release</code> and <code>acq_rel</code> orderings are
-   not valid on <code>load</code> instructions.  Atomic loads produce <a
-   href="#memorymodel">defined</a> results when they may see multiple atomic
-   stores.  The type of the pointee must be an integer type whose bit width
-   is a power of two greater than or equal to eight and less than or equal
-   to a target-specific size limit. <code>align</code> must be explicitly 
-   specified on atomic loads, and the load has undefined behavior if the
-   alignment is not set to a value which is at least the size in bytes of
-   the pointee. <code>!nontemporal</code> does not have any defined semantics
-   for atomic loads.</p>
-
-<p>The optional constant <tt>align</tt> argument specifies the alignment of the
-   operation (that is, the alignment of the memory address). A value of 0 or an
-   omitted <tt>align</tt> argument means that the operation has the abi
-   alignment for the target. It is the responsibility of the code emitter to
-   ensure that the alignment information is correct. Overestimating the
-   alignment results in undefined behavior. Underestimating the alignment may
-   produce less efficient code. An alignment of 1 is always safe.</p>
-
-<p>The optional <tt>!nontemporal</tt> metadata must reference a single
-   metatadata name &lt;index&gt; corresponding to a metadata node with
-   one <tt>i32</tt> entry of value 1.  The existence of
-   the <tt>!nontemporal</tt> metatadata on the instruction tells the optimizer
-   and code generator that this load is not expected to be reused in the cache.
-   The code generator may select special instructions to save cache bandwidth,
-   such as the <tt>MOVNT</tt> instruction on x86.</p>
-
-<p>The optional <tt>!invariant.load</tt> metadata must reference a single
-   metatadata name &lt;index&gt; corresponding to a metadata node with no
-   entries.  The existence of the <tt>!invariant.load</tt> metatadata on the
-   instruction tells the optimizer and code generator that this load address
-   points to memory which does not change value during program execution.
-   The optimizer may then move this load around, for example, by hoisting it
-   out of loops using loop invariant code motion.</p>
-
-<h5>Semantics:</h5>
-<p>The location of memory pointed to is loaded.  If the value being loaded is of
-   scalar type then the number of bytes read does not exceed the minimum number
-   of bytes needed to hold all bits of the type.  For example, loading an
-   <tt>i24</tt> reads at most three bytes.  When loading a value of a type like
-   <tt>i20</tt> with a size that is not an integral number of bytes, the result
-   is undefined if the value was not originally written using a store of the
-   same type.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
-  <a href="#i_store">store</a> i32 3, i32* %ptr                          <i>; yields {void}</i>
-  %val = load i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_store">'<tt>store</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]        <i>; yields {void}</i>
-  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;  <i>; yields {void}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>store</tt>' instruction is used to write to memory.</p>
-
-<h5>Arguments:</h5>
-<p>There are two arguments to the '<tt>store</tt>' instruction: a value to store
-   and an address at which to store it.  The type of the
-   '<tt>&lt;pointer&gt;</tt>' operand must be a pointer to
-   the <a href="#t_firstclass">first class</a> type of the
-   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked as
-   <tt>volatile</tt>, then the optimizer is not allowed to modify the number or
-   order of execution of this <tt>store</tt> with other <a
-   href="#volatile">volatile operations</a>.</p>
-
-<p>If the <code>store</code> is marked as <code>atomic</code>, it takes an extra
-   <a href="#ordering">ordering</a> and optional <code>singlethread</code>
-   argument.  The <code>acquire</code> and <code>acq_rel</code> orderings aren't
-   valid on <code>store</code> instructions.  Atomic loads produce <a
-   href="#memorymodel">defined</a> results when they may see multiple atomic
-   stores. The type of the pointee must be an integer type whose bit width
-   is a power of two greater than or equal to eight and less than or equal
-   to a target-specific size limit. <code>align</code> must be explicitly 
-   specified on atomic stores, and the store has undefined behavior if the
-   alignment is not set to a value which is at least the size in bytes of
-   the pointee. <code>!nontemporal</code> does not have any defined semantics
-   for atomic stores.</p>
-
-<p>The optional constant "align" argument specifies the alignment of the
-   operation (that is, the alignment of the memory address). A value of 0 or an
-   omitted "align" argument means that the operation has the abi
-   alignment for the target. It is the responsibility of the code emitter to
-   ensure that the alignment information is correct. Overestimating the
-   alignment results in an undefined behavior. Underestimating the alignment may
-   produce less efficient code. An alignment of 1 is always safe.</p>
-
-<p>The optional !nontemporal metadata must reference a single metatadata
-   name &lt;index&gt; corresponding to a metadata node with one i32 entry of
-   value 1.  The existence of the !nontemporal metatadata on the
-   instruction tells the optimizer and code generator that this load is
-   not expected to be reused in the cache.  The code generator may
-   select special instructions to save cache bandwidth, such as the
-   MOVNT instruction on x86.</p>
-
-
-<h5>Semantics:</h5>
-<p>The contents of memory are updated to contain '<tt>&lt;value&gt;</tt>' at the
-   location specified by the '<tt>&lt;pointer&gt;</tt>' operand.  If
-   '<tt>&lt;value&gt;</tt>' is of scalar type then the number of bytes written
-   does not exceed the minimum number of bytes needed to hold all bits of the
-   type.  For example, storing an <tt>i24</tt> writes at most three bytes.  When
-   writing a value of a type like <tt>i20</tt> with a size that is not an
-   integral number of bytes, it is unspecified what happens to the extra bits
-   that do not belong to the type, but they will typically be overwritten.</p>
-
-<h5>Example:</h5>
-<pre>
-  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
-  store i32 3, i32* %ptr                          <i>; yields {void}</i>
-  %val = <a href="#i_load">load</a> i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-<a name="i_fence">'<tt>fence</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  fence [singlethread] &lt;ordering&gt;                   <i>; yields {void}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fence</tt>' instruction is used to introduce happens-before edges
-between operations.</p>
-
-<h5>Arguments:</h5> <p>'<code>fence</code>' instructions take an <a
-href="#ordering">ordering</a> argument which defines what
-<i>synchronizes-with</i> edges they add.  They can only be given
-<code>acquire</code>, <code>release</code>, <code>acq_rel</code>, and
-<code>seq_cst</code> orderings.</p>
-
-<h5>Semantics:</h5>
-<p>A fence <var>A</var> which has (at least) <code>release</code> ordering
-semantics <i>synchronizes with</i> a fence <var>B</var> with (at least)
-<code>acquire</code> ordering semantics if and only if there exist atomic
-operations <var>X</var> and <var>Y</var>, both operating on some atomic object
-<var>M</var>, such that <var>A</var> is sequenced before <var>X</var>,
-<var>X</var> modifies <var>M</var> (either directly or through some side effect
-of a sequence headed by <var>X</var>), <var>Y</var> is sequenced before
-<var>B</var>, and <var>Y</var> observes <var>M</var>. This provides a
-<i>happens-before</i> dependency between <var>A</var> and <var>B</var>. Rather
-than an explicit <code>fence</code>, one (but not both) of the atomic operations
-<var>X</var> or <var>Y</var> might provide a <code>release</code> or
-<code>acquire</code> (resp.) ordering constraint and still
-<i>synchronize-with</i> the explicit <code>fence</code> and establish the
-<i>happens-before</i> edge.</p>
-
-<p>A <code>fence</code> which has <code>seq_cst</code> ordering, in addition to
-having both <code>acquire</code> and <code>release</code> semantics specified
-above, participates in the global program order of other <code>seq_cst</code>
-operations and/or fences.</p>
-
-<p>The optional "<a href="#singlethread"><code>singlethread</code></a>" argument
-specifies that the fence only synchronizes with other fences in the same
-thread.  (This is useful for interacting with signal handlers.)</p>
-
-<h5>Example:</h5>
-<pre>
-  fence acquire                          <i>; yields {void}</i>
-  fence singlethread seq_cst             <i>; yields {void}</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-<a name="i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;  <i>; yields {ty}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>cmpxchg</tt>' instruction is used to atomically modify memory.
-It loads a value in memory and compares it to a given value. If they are
-equal, it stores a new value into the memory.</p>
-
-<h5>Arguments:</h5>
-<p>There are three arguments to the '<code>cmpxchg</code>' instruction: an
-address to operate on, a value to compare to the value currently be at that
-address, and a new value to place at that address if the compared values are
-equal.  The type of '<var>&lt;cmp&gt;</var>' must be an integer type whose
-bit width is a power of two greater than or equal to eight and less than
-or equal to a target-specific size limit. '<var>&lt;cmp&gt;</var>' and
-'<var>&lt;new&gt;</var>' must have the same type, and the type of
-'<var>&lt;pointer&gt;</var>' must be a pointer to that type. If the
-<code>cmpxchg</code> is marked as <code>volatile</code>, then the
-optimizer is not allowed to modify the number or order of execution
-of this <code>cmpxchg</code> with other <a href="#volatile">volatile
-operations</a>.</p>
-
-<!-- FIXME: Extend allowed types. -->
-
-<p>The <a href="#ordering"><var>ordering</var></a> argument specifies how this
-<code>cmpxchg</code> synchronizes with other atomic operations.</p>
-
-<p>The optional "<code>singlethread</code>" argument declares that the
-<code>cmpxchg</code> is only atomic with respect to code (usually signal
-handlers) running in the same thread as the <code>cmpxchg</code>.  Otherwise the
-cmpxchg is atomic with respect to all other code in the system.</p>
-
-<p>The pointer passed into cmpxchg must have alignment greater than or equal to
-the size in memory of the operand.
-
-<h5>Semantics:</h5>
-<p>The contents of memory at the location specified by the
-'<tt>&lt;pointer&gt;</tt>' operand is read and compared to
-'<tt>&lt;cmp&gt;</tt>'; if the read value is the equal,
-'<tt>&lt;new&gt;</tt>' is written.  The original value at the location
-is returned.
-
-<p>A successful <code>cmpxchg</code> is a read-modify-write instruction for the
-purpose of identifying <a href="#release_sequence">release sequences</a>.  A
-failed <code>cmpxchg</code> is equivalent to an atomic load with an ordering
-parameter determined by dropping any <code>release</code> part of the
-<code>cmpxchg</code>'s ordering.</p>
-
-<!--
-FIXME: Is compare_exchange_weak() necessary?  (Consider after we've done
-optimization work on ARM.)
-
-FIXME: Is a weaker ordering constraint on failure helpful in practice?
--->
-
-<h5>Example:</h5>
-<pre>
-entry:
-  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                   <i>; yields {i32}</i>
-  <a href="#i_br">br</a> label %loop
-
-loop:
-  %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
-  %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
-  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          <i>; yields {i32}</i>
-  %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
-  <a href="#i_br">br</a> i1 %success, label %done, label %loop
-
-done:
-  ...
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-<a name="i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  atomicrmw [volatile] &lt;operation&gt; &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;value&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>atomicrmw</tt>' instruction is used to atomically modify memory.</p>
-
-<h5>Arguments:</h5>
-<p>There are three arguments to the '<code>atomicrmw</code>' instruction: an
-operation to apply, an address whose value to modify, an argument to the
-operation.  The operation must be one of the following keywords:</p>
-<ul>
-  <li>xchg</li>
-  <li>add</li>
-  <li>sub</li>
-  <li>and</li>
-  <li>nand</li>
-  <li>or</li>
-  <li>xor</li>
-  <li>max</li>
-  <li>min</li>
-  <li>umax</li>
-  <li>umin</li>
-</ul>
-
-<p>The type of '<var>&lt;value&gt;</var>' must be an integer type whose
-bit width is a power of two greater than or equal to eight and less than
-or equal to a target-specific size limit.  The type of the
-'<code>&lt;pointer&gt;</code>' operand must be a pointer to that type.
-If the <code>atomicrmw</code> is marked as <code>volatile</code>, then the
-optimizer is not allowed to modify the number or order of execution of this
-<code>atomicrmw</code> with other <a href="#volatile">volatile
-  operations</a>.</p>
-
-<!-- FIXME: Extend allowed types. -->
-
-<h5>Semantics:</h5>
-<p>The contents of memory at the location specified by the
-'<tt>&lt;pointer&gt;</tt>' operand are atomically read, modified, and written
-back.  The original value at the location is returned.  The modification is
-specified by the <var>operation</var> argument:</p>
-
-<ul>
-  <li>xchg: <code>*ptr = val</code></li>
-  <li>add: <code>*ptr = *ptr + val</code></li>
-  <li>sub: <code>*ptr = *ptr - val</code></li>
-  <li>and: <code>*ptr = *ptr &amp; val</code></li>
-  <li>nand: <code>*ptr = ~(*ptr &amp; val)</code></li>
-  <li>or: <code>*ptr = *ptr | val</code></li>
-  <li>xor: <code>*ptr = *ptr ^ val</code></li>
-  <li>max: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using a signed comparison)</li>
-  <li>min: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using a signed comparison)</li>
-  <li>umax: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using an unsigned comparison)</li>
-  <li>umin: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using an unsigned comparison)</li>
-</ul>
-
-<h5>Example:</h5>
-<pre>
-  %old = atomicrmw add i32* %ptr, i32 1 acquire                        <i>; yields {i32}</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
-  &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
-  &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx 
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>getelementptr</tt>' instruction is used to get the address of a
-   subelement of an <a href="#t_aggregate">aggregate</a> data structure.
-   It performs address calculation only and does not access memory.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is always a pointer or a vector of pointers,
-   and forms the basis of the
-   calculation. The remaining arguments are indices that indicate which of the
-   elements of the aggregate object are indexed. The interpretation of each
-   index is dependent on the type being indexed into. The first index always
-   indexes the pointer value given as the first argument, the second index
-   indexes a value of the type pointed to (not necessarily the value directly
-   pointed to, since the first index can be non-zero), etc. The first type
-   indexed into must be a pointer value, subsequent types can be arrays,
-   vectors, and structs. Note that subsequent types being indexed into
-   can never be pointers, since that would require loading the pointer before
-   continuing calculation.</p>
-
-<p>The type of each index argument depends on the type it is indexing into.
-   When indexing into a (optionally packed) structure, only <tt>i32</tt>
-   integer <b>constants</b> are allowed.  When indexing into an array, pointer
-   or vector, integers of any width are allowed, and they are not required to be
-   constant.  These integers are treated as signed values where relevant.</p>
-
-<p>For example, let's consider a C code fragment and how it gets compiled to
-   LLVM:</p>
-
-<pre class="doc_code">
-struct RT {
-  char A;
-  int B[10][20];
-  char C;
-};
-struct ST {
-  int X;
-  double Y;
-  struct RT Z;
-};
-
-int *foo(struct ST *s) {
-  return &amp;s[1].Z.B[5][13];
-}
-</pre>
-
-<p>The LLVM code generated by Clang is:</p>
-
-<pre class="doc_code">
-%struct.RT = <a href="#namedtypes">type</a> { i8, [10 x [20 x i32]], i8 }
-%struct.ST = <a href="#namedtypes">type</a> { i32, double, %struct.RT }
-
-define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
-entry:
-  %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
-  ret i32* %arrayidx
-}
-</pre>
-
-<h5>Semantics:</h5>
-<p>In the example above, the first index is indexing into the
-   '<tt>%struct.ST*</tt>' type, which is a pointer, yielding a
-   '<tt>%struct.ST</tt>' = '<tt>{ i32, double, %struct.RT }</tt>' type, a
-   structure. The second index indexes into the third element of the structure,
-   yielding a '<tt>%struct.RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]], i8 }</tt>'
-   type, another structure. The third index indexes into the second element of
-   the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an array. The
-   two dimensions of the array are subscripted into, yielding an '<tt>i32</tt>'
-   type. The '<tt>getelementptr</tt>' instruction returns a pointer to this
-   element, thus computing a value of '<tt>i32*</tt>' type.</p>
-
-<p>Note that it is perfectly legal to index partially through a structure,
-   returning a pointer to an inner element.  Because of this, the LLVM code for
-   the given testcase is equivalent to:</p>
-
-<pre class="doc_code">
-define i32* @foo(%struct.ST* %s) {
-  %t1 = getelementptr %struct.ST* %s, i32 1                 <i>; yields %struct.ST*:%t1</i>
-  %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         <i>; yields %struct.RT*:%t2</i>
-  %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         <i>; yields [10 x [20 x i32]]*:%t3</i>
-  %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
-  %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
-  ret i32* %t5
-}
-</pre>
-
-<p>If the <tt>inbounds</tt> keyword is present, the result value of the
-   <tt>getelementptr</tt> is a <a href="#poisonvalues">poison value</a> if the
-   base pointer is not an <i>in bounds</i> address of an allocated object,
-   or if any of the addresses that would be formed by successive addition of
-   the offsets implied by the indices to the base address with infinitely
-   precise signed arithmetic are not an <i>in bounds</i> address of that
-   allocated object. The <i>in bounds</i> addresses for an allocated object
-   are all the addresses that point into the object, plus the address one
-   byte past the end.
-   In cases where the base is a vector of pointers the <tt>inbounds</tt> keyword
-   applies to each of the computations element-wise. </p>
-
-<p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
-   the base address with silently-wrapping two's complement arithmetic. If the
-   offsets have a different width from the pointer, they are sign-extended or
-   truncated to the width of the pointer. The result value of the
-   <tt>getelementptr</tt> may be outside the object pointed to by the base
-   pointer. The result value may not necessarily be used to access memory
-   though, even if it happens to point into allocated storage. See the
-   <a href="#pointeraliasing">Pointer Aliasing Rules</a> section for more
-   information.</p>
-
-<p>The getelementptr instruction is often confusing.  For some more insight into
-   how it works, see <a href="GetElementPtr.html">the getelementptr FAQ</a>.</p>
-
-<h5>Example:</h5>
-<pre>
-    <i>; yields [12 x i8]*:aptr</i>
-    %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
-    <i>; yields i8*:vptr</i>
-    %vptr = getelementptr {i32, &lt;2 x i8&gt;}* %svptr, i64 0, i32 1, i32 1
-    <i>; yields i8*:eptr</i>
-    %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
-    <i>; yields i32*:iptr</i>
-    %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
-</pre>
-
-<p>In cases where the pointer argument is a vector of pointers, only a
-   single index may be used, and the number of vector elements has to be
-   the same.  For example: </p>
-<pre class="doc_code">
- %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="convertops">Conversion Operations</a>
-</h3>
-
-<div>
-
-<p>The instructions in this category are the conversion instructions (casting)
-   which all take a single operand and a type. They perform various bit
-   conversions on the operand.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_trunc">'<tt>trunc .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = trunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>trunc</tt>' instruction truncates its operand to the
-   type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>trunc</tt>' instruction takes a value to trunc, and a type to trunc it to.
-   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   of the same number of integers.
-   The bit size of the <tt>value</tt> must be larger than
-   the bit size of the destination type, <tt>ty2</tt>.
-   Equal sized types are not allowed.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>trunc</tt>' instruction truncates the high order bits
-   in <tt>value</tt> and converts the remaining bits to <tt>ty2</tt>. Since the
-   source size must be larger than the destination size, <tt>trunc</tt> cannot
-   be a <i>no-op cast</i>.  It will always truncate bits.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = trunc i32 257 to i8                        <i>; yields i8:1</i>
-  %Y = trunc i32 123 to i1                        <i>; yields i1:true</i>
-  %Z = trunc i32 122 to i1                        <i>; yields i1:false</i>
-  %W = trunc &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i8&gt; <i>; yields &lt;i8 8, i8 7&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_zext">'<tt>zext .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = zext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>zext</tt>' instruction zero extends its operand to type
-   <tt>ty2</tt>.</p>
-
-
-<h5>Arguments:</h5>
-<p>The '<tt>zext</tt>' instruction takes a value to cast, and a type to cast it to.
-   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   of the same number of integers.
-   The bit size of the <tt>value</tt> must be smaller than
-   the bit size of the destination type,
-   <tt>ty2</tt>.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>zext</tt> fills the high order bits of the <tt>value</tt> with zero
-   bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
-
-<p>When zero extending from i1, the result will always be either 0 or 1.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = zext i32 257 to i64              <i>; yields i64:257</i>
-  %Y = zext i1 true to i32              <i>; yields i32:1</i>
-  %Z = zext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_sext">'<tt>sext .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>sext</tt>' instruction takes a value to cast, and a type to cast it to.
-   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   of the same number of integers.
-   The bit size of the <tt>value</tt> must be smaller than
-   the bit size of the destination type,
-   <tt>ty2</tt>.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>sext</tt>' instruction performs a sign extension by copying the sign
-   bit (highest order bit) of the <tt>value</tt> until it reaches the bit size
-   of the type <tt>ty2</tt>.</p>
-
-<p>When sign extending from i1, the extension always results in -1 or 0.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = sext i8  -1 to i16              <i>; yields i16   :65535</i>
-  %Y = sext i1 true to i32             <i>; yields i32:-1</i>
-  %Z = sext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fptrunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fptrunc</tt>' instruction truncates <tt>value</tt> to type
-   <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fptrunc</tt>' instruction takes a <a href="#t_floating">floating
-   point</a> value to cast and a <a href="#t_floating">floating point</a> type
-   to cast it to. The size of <tt>value</tt> must be larger than the size of
-   <tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a
-   <i>no-op cast</i>.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
-   <a href="#t_floating">floating point</a> type to a smaller
-   <a href="#t_floating">floating point</a> type.  If the value cannot fit
-   within the destination type, <tt>ty2</tt>, then the results are
-   undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fptrunc double 123.0 to float         <i>; yields float:123.0</i>
-  %Y = fptrunc double 1.0E+300 to float      <i>; yields undefined</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fpext">'<tt>fpext .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fpext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fpext</tt>' extends a floating point <tt>value</tt> to a larger
-   floating point value.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fpext</tt>' instruction takes a
-   <a href="#t_floating">floating point</a> <tt>value</tt> to cast, and
-   a <a href="#t_floating">floating point</a> type to cast it to. The source
-   type must be smaller than the destination type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fpext</tt>' instruction extends the <tt>value</tt> from a smaller
-   <a href="#t_floating">floating point</a> type to a larger
-   <a href="#t_floating">floating point</a> type. The <tt>fpext</tt> cannot be
-   used to make a <i>no-op cast</i> because it always changes bits. Use
-   <tt>bitcast</tt> to make a <i>no-op cast</i> for a floating point cast.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fpext float 3.125 to double         <i>; yields double:3.125000e+00</i>
-  %Y = fpext double %X to fp128            <i>; yields fp128:0xL00000000000000004000900000000000</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fptoui &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fptoui</tt>' converts a floating point <tt>value</tt> to its
-   unsigned integer equivalent of type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fptoui</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_floating">floating point</a> value, and a type
-   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
-   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
-   vector integer type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fptoui</tt>' instruction converts its
-   <a href="#t_floating">floating point</a> operand into the nearest (rounding
-   towards zero) unsigned integer value. If the value cannot fit
-   in <tt>ty2</tt>, the results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fptoui double 123.0 to i32      <i>; yields i32:123</i>
-  %Y = fptoui float 1.0E+300 to i1     <i>; yields undefined:1</i>
-  %Z = fptoui float 1.04E+17 to i8     <i>; yields undefined:1</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fptosi &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fptosi</tt>' instruction converts
-   <a href="#t_floating">floating point</a> <tt>value</tt> to
-   type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fptosi</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_floating">floating point</a> value, and a type
-   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
-   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
-   vector integer type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fptosi</tt>' instruction converts its
-   <a href="#t_floating">floating point</a> operand into the nearest (rounding
-   towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
-   the results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fptosi double -123.0 to i32      <i>; yields i32:-123</i>
-  %Y = fptosi float 1.0E-247 to i1      <i>; yields undefined:1</i>
-  %Z = fptosi float 1.04E+17 to i8      <i>; yields undefined:1</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = uitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>uitofp</tt>' instruction regards <tt>value</tt> as an unsigned
-   integer and converts that value to the <tt>ty2</tt> type.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>uitofp</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
-   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
-   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
-   floating point type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>uitofp</tt>' instruction interprets its operand as an unsigned
-   integer quantity and converts it to the corresponding floating point
-   value. If the value cannot fit in the floating point value, the results are
-   undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = uitofp i32 257 to float         <i>; yields float:257.0</i>
-  %Y = uitofp i8 -1 to double          <i>; yields double:255.0</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sitofp</tt>' instruction regards <tt>value</tt> as a signed integer
-   and converts that value to the <tt>ty2</tt> type.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>sitofp</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
-   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
-   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
-   floating point type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>sitofp</tt>' instruction interprets its operand as a signed integer
-   quantity and converts it to the corresponding floating point value. If the
-   value cannot fit in the floating point value, the results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = sitofp i32 257 to float         <i>; yields float:257.0</i>
-  %Y = sitofp i8 -1 to double          <i>; yields double:-1.0</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = ptrtoint &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts the pointer or a vector of
-   pointers <tt>value</tt> to
-   the integer (or vector of integers) type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
-   must be a a value of type <a href="#t_pointer">pointer</a> or a vector of
-    pointers, and a type to cast it to
-   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> or a vector
-   of integers type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
-   <tt>ty2</tt> by interpreting the pointer value as an integer and either
-   truncating or zero extending that value to the size of the integer type. If
-   <tt>value</tt> is smaller than <tt>ty2</tt> then a zero extension is done. If
-   <tt>value</tt> is larger than <tt>ty2</tt> then a truncation is done. If they
-   are the same size, then nothing is done (<i>no-op cast</i>) other than a type
-   change.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = ptrtoint i32* %P to i8                         <i>; yields truncation on 32-bit architecture</i>
-  %Y = ptrtoint i32* %P to i64                        <i>; yields zero extension on 32-bit architecture</i>
-  %Z = ptrtoint &lt;4 x i32*&gt; %P to &lt;4 x i64&gt;<i>; yields vector zero extension for a vector of addresses on 32-bit architecture</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = inttoptr &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>inttoptr</tt>' instruction converts an integer <tt>value</tt> to a
-   pointer type, <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>inttoptr</tt>' instruction takes an <a href="#t_integer">integer</a>
-   value to cast, and a type to cast it to, which must be a
-   <a href="#t_pointer">pointer</a> type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>inttoptr</tt>' instruction converts <tt>value</tt> to type
-   <tt>ty2</tt> by applying either a zero extension or a truncation depending on
-   the size of the integer <tt>value</tt>. If <tt>value</tt> is larger than the
-   size of a pointer then a truncation is done. If <tt>value</tt> is smaller
-   than the size of a pointer then a zero extension is done. If they are the
-   same size, nothing is done (<i>no-op cast</i>).</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
-  %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
-  %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
-  %Z = inttoptr &lt;4 x i32&gt; %G to &lt;4 x i8*&gt;<i>; yields truncation of vector G to four pointers</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = bitcast &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
-   <tt>ty2</tt> without changing any bits.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>bitcast</tt>' instruction takes a value to cast, which must be a
-   non-aggregate first class value, and a type to cast it to, which must also be
-   a non-aggregate <a href="#t_firstclass">first class</a> type. The bit sizes
-   of <tt>value</tt> and the destination type, <tt>ty2</tt>, must be
-   identical. If the source type is a pointer, the destination type must also be
-   a pointer.  This instruction supports bitwise conversion of vectors to
-   integers and to vectors of other types (as long as they have the same
-   size).</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
-   <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
-   this conversion.  The conversion is done as if the <tt>value</tt> had been
-   stored to memory and read back as type <tt>ty2</tt>.
-   Pointer (or vector of pointers) types may only be converted to other pointer
-   (or vector of pointers) types with this instruction. To convert
-   pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
-   <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
-  %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast &lt;2 x int&gt; %V to i64;        <i>; yields i64: %V</i>
-  %Z = bitcast &lt;2 x i32*&gt; %V to &lt;2 x i64*&gt; <i>; yields &lt;2 x i64*&gt;</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="otherops">Other Operations</a>
-</h3>
-
-<div>
-
-<p>The instructions in this category are the "miscellaneous" instructions, which
-   defy better classification.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
-   boolean values based on comparison of its two integer, integer vector,
-   pointer, or pointer vector operands.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
-   the condition code indicating the kind of comparison to perform. It is not a
-   value, just a keyword. The possible condition code are:</p>
-
-<ol>
-  <li><tt>eq</tt>: equal</li>
-  <li><tt>ne</tt>: not equal </li>
-  <li><tt>ugt</tt>: unsigned greater than</li>
-  <li><tt>uge</tt>: unsigned greater or equal</li>
-  <li><tt>ult</tt>: unsigned less than</li>
-  <li><tt>ule</tt>: unsigned less or equal</li>
-  <li><tt>sgt</tt>: signed greater than</li>
-  <li><tt>sge</tt>: signed greater or equal</li>
-  <li><tt>slt</tt>: signed less than</li>
-  <li><tt>sle</tt>: signed less or equal</li>
-</ol>
-
-<p>The remaining two arguments must be <a href="#t_integer">integer</a> or
-   <a href="#t_pointer">pointer</a> or integer <a href="#t_vector">vector</a>
-   typed.  They must also be identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>icmp</tt>' compares <tt>op1</tt> and <tt>op2</tt> according to the
-   condition code given as <tt>cond</tt>. The comparison performed always yields
-   either an <a href="#t_integer"><tt>i1</tt></a> or vector of <tt>i1</tt>
-   result, as follows:</p>
-
-<ol>
-  <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal,
-      <tt>false</tt> otherwise. No sign interpretation is necessary or
-      performed.</li>
-
-  <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal,
-      <tt>false</tt> otherwise. No sign interpretation is necessary or
-      performed.</li>
-
-  <li><tt>ugt</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>uge</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than or equal
-      to <tt>op2</tt>.</li>
-
-  <li><tt>ult</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>ule</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>sgt</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>sge</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than or equal
-      to <tt>op2</tt>.</li>
-
-  <li><tt>slt</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>sle</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-</ol>
-
-<p>If the operands are <a href="#t_pointer">pointer</a> typed, the pointer
-   values are compared as if they were integers.</p>
-
-<p>If the operands are integer vectors, then they are compared element by
-   element. The result is an <tt>i1</tt> vector with the same number of elements
-   as the values being compared.  Otherwise, the result is an <tt>i1</tt>.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = icmp eq i32 4, 5          <i>; yields: result=false</i>
-  &lt;result&gt; = icmp ne float* %X, %X     <i>; yields: result=false</i>
-  &lt;result&gt; = icmp ult i16  4, 5        <i>; yields: result=true</i>
-  &lt;result&gt; = icmp sgt i16  4, 5        <i>; yields: result=false</i>
-  &lt;result&gt; = icmp ule i16 -4, 5        <i>; yields: result=false</i>
-  &lt;result&gt; = icmp sge i16  4, 5        <i>; yields: result=false</i>
-</pre>
-
-<p>Note that the code generator does not yet support vector types with
-   the <tt>icmp</tt> instruction.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fcmp</tt>' instruction returns a boolean value or vector of boolean
-   values based on comparison of its operands.</p>
-
-<p>If the operands are floating point scalars, then the result type is a boolean
-(<a href="#t_integer"><tt>i1</tt></a>).</p>
-
-<p>If the operands are floating point vectors, then the result type is a vector
-   of boolean with the same number of elements as the operands being
-   compared.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fcmp</tt>' instruction takes three operands. The first operand is
-   the condition code indicating the kind of comparison to perform. It is not a
-   value, just a keyword. The possible condition code are:</p>
-
-<ol>
-  <li><tt>false</tt>: no comparison, always returns false</li>
-  <li><tt>oeq</tt>: ordered and equal</li>
-  <li><tt>ogt</tt>: ordered and greater than </li>
-  <li><tt>oge</tt>: ordered and greater than or equal</li>
-  <li><tt>olt</tt>: ordered and less than </li>
-  <li><tt>ole</tt>: ordered and less than or equal</li>
-  <li><tt>one</tt>: ordered and not equal</li>
-  <li><tt>ord</tt>: ordered (no nans)</li>
-  <li><tt>ueq</tt>: unordered or equal</li>
-  <li><tt>ugt</tt>: unordered or greater than </li>
-  <li><tt>uge</tt>: unordered or greater than or equal</li>
-  <li><tt>ult</tt>: unordered or less than </li>
-  <li><tt>ule</tt>: unordered or less than or equal</li>
-  <li><tt>une</tt>: unordered or not equal</li>
-  <li><tt>uno</tt>: unordered (either nans)</li>
-  <li><tt>true</tt>: no comparison, always returns true</li>
-</ol>
-
-<p><i>Ordered</i> means that neither operand is a QNAN while
-   <i>unordered</i> means that either operand may be a QNAN.</p>
-
-<p>Each of <tt>val1</tt> and <tt>val2</tt> arguments must be either
-   a <a href="#t_floating">floating point</a> type or
-   a <a href="#t_vector">vector</a> of floating point type.  They must have
-   identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fcmp</tt>' instruction compares <tt>op1</tt> and <tt>op2</tt>
-   according to the condition code given as <tt>cond</tt>.  If the operands are
-   vectors, then the vectors are compared element by element.  Each comparison
-   performed always yields an <a href="#t_integer">i1</a> result, as
-   follows:</p>
-
-<ol>
-  <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
-
-  <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
-
-  <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
-
-  <li><tt>ord</tt>: yields <tt>true</tt> if both operands are not a QNAN.</li>
-
-  <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
-
-  <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
-
-  <li><tt>uno</tt>: yields <tt>true</tt> if either operand is a QNAN.</li>
-
-  <li><tt>true</tt>: always yields <tt>true</tt>, regardless of operands.</li>
-</ol>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fcmp oeq float 4.0, 5.0    <i>; yields: result=false</i>
-  &lt;result&gt; = fcmp one float 4.0, 5.0    <i>; yields: result=true</i>
-  &lt;result&gt; = fcmp olt float 4.0, 5.0    <i>; yields: result=true</i>
-  &lt;result&gt; = fcmp ueq double 1.0, 2.0   <i>; yields: result=false</i>
-</pre>
-
-<p>Note that the code generator does not yet support vector types with
-   the <tt>fcmp</tt> instruction.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_phi">'<tt>phi</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = phi &lt;ty&gt; [ &lt;val0&gt;, &lt;label0&gt;], ...
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>phi</tt>' instruction is used to implement the &#966; node in the
-   SSA graph representing the function.</p>
-
-<h5>Arguments:</h5>
-<p>The type of the incoming values is specified with the first type field. After
-   this, the '<tt>phi</tt>' instruction takes a list of pairs as arguments, with
-   one pair for each predecessor basic block of the current block.  Only values
-   of <a href="#t_firstclass">first class</a> type may be used as the value
-   arguments to the PHI node.  Only labels may be used as the label
-   arguments.</p>
-
-<p>There must be no non-phi instructions between the start of a basic block and
-   the PHI instructions: i.e. PHI instructions must be first in a basic
-   block.</p>
-
-<p>For the purposes of the SSA form, the use of each incoming value is deemed to
-   occur on the edge from the corresponding predecessor block to the current
-   block (but after any definition of an '<tt>invoke</tt>' instruction's return
-   value on the same edge).</p>
-
-<h5>Semantics:</h5>
-<p>At runtime, the '<tt>phi</tt>' instruction logically takes on the value
-   specified by the pair corresponding to the predecessor basic block that
-   executed just prior to the current block.</p>
-
-<h5>Example:</h5>
-<pre>
-Loop:       ; Infinite loop that counts from 0 on up...
-  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
-  %nextindvar = add i32 %indvar, 1
-  br label %Loop
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_select">'<tt>select</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = select <i>selty</i> &lt;cond&gt;, &lt;ty&gt; &lt;val1&gt;, &lt;ty&gt; &lt;val2&gt;             <i>; yields ty</i>
-
-  <i>selty</i> is either i1 or {&lt;N x i1&gt;}
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>select</tt>' instruction is used to choose one value based on a
-   condition, without branching.</p>
-
-
-<h5>Arguments:</h5>
-<p>The '<tt>select</tt>' instruction requires an 'i1' value or a vector of 'i1'
-   values indicating the condition, and two values of the
-   same <a href="#t_firstclass">first class</a> type.  If the val1/val2 are
-   vectors and the condition is a scalar, then entire vectors are selected, not
-   individual elements.</p>
-
-<h5>Semantics:</h5>
-<p>If the condition is an i1 and it evaluates to 1, the instruction returns the
-   first value argument; otherwise, it returns the second value argument.</p>
-
-<p>If the condition is a vector of i1, then the value arguments must be vectors
-   of the same size, and the selection is done element by element.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = select i1 true, i8 17, i8 42          <i>; yields i8:17</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_call">'<tt>call</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = [tail] call [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ty&gt; [&lt;fnty&gt;*] &lt;fnptrval&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>call</tt>' instruction represents a simple function call.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction requires several arguments:</p>
-
-<ol>
-  <li>The optional "tail" marker indicates that the callee function does not
-      access any allocas or varargs in the caller.  Note that calls may be
-      marked "tail" even if they do not occur before
-      a <a href="#i_ret"><tt>ret</tt></a> instruction.  If the "tail" marker is
-      present, the function call is eligible for tail call optimization,
-      but <a href="CodeGenerator.html#tailcallopt">might not in fact be
-      optimized into a jump</a>.  The code generator may optimize calls marked
-      "tail" with either 1) automatic <a href="CodeGenerator.html#sibcallopt">
-      sibling call optimization</a> when the caller and callee have
-      matching signatures, or 2) forced tail call optimization when the
-      following extra requirements are met:
-      <ul>
-        <li>Caller and callee both have the calling
-            convention <tt>fastcc</tt>.</li>
-        <li>The call is in tail position (ret immediately follows call and ret
-            uses value of call or is void).</li>
-        <li>Option <tt>-tailcallopt</tt> is enabled,
-            or <code>llvm::GuaranteedTailCallOpt</code> is <code>true</code>.</li>
-        <li><a href="CodeGenerator.html#tailcallopt">Platform specific
-            constraints are met.</a></li>
-      </ul>
-  </li>
-
-  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
-      convention</a> the call should use.  If none is specified, the call
-      defaults to using C calling conventions.  The calling convention of the
-      call must match the calling convention of the target function, or else the
-      behavior is undefined.</li>
-
-  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
-      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
-      '<tt>inreg</tt>' attributes are valid here.</li>
-
-  <li>'<tt>ty</tt>': the type of the call instruction itself which is also the
-      type of the return value.  Functions that return no value are marked
-      <tt><a href="#t_void">void</a></tt>.</li>
-
-  <li>'<tt>fnty</tt>': shall be the signature of the pointer to function value
-      being invoked.  The argument types must match the types implied by this
-      signature.  This type can be omitted if the function is not varargs and if
-      the function type does not return a pointer to a function.</li>
-
-  <li>'<tt>fnptrval</tt>': An LLVM value containing a pointer to a function to
-      be invoked. In most cases, this is a direct function invocation, but
-      indirect <tt>call</tt>s are just as possible, calling an arbitrary pointer
-      to function value.</li>
-
-  <li>'<tt>function args</tt>': argument list whose types match the function
-      signature argument types and parameter attributes. All arguments must be
-      of <a href="#t_firstclass">first class</a> type. If the function
-      signature indicates the function accepts a variable number of arguments,
-      the extra arguments can be specified.</li>
-
-  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
-      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
-      '<tt>readnone</tt>' attributes are valid here.</li>
-</ol>
-
-<h5>Semantics:</h5>
-<p>The '<tt>call</tt>' instruction is used to cause control flow to transfer to
-   a specified function, with its incoming arguments bound to the specified
-   values. Upon a '<tt><a href="#i_ret">ret</a></tt>' instruction in the called
-   function, control flow continues with the instruction after the function
-   call, and the return value of the function is bound to the result
-   argument.</p>
-
-<h5>Example:</h5>
-<pre>
-  %retval = call i32 @test(i32 %argc)
-  call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        <i>; yields i32</i>
-  %X = tail call i32 @foo()                                    <i>; yields i32</i>
-  %Y = tail call <a href="#callingconv">fastcc</a> i32 @foo()  <i>; yields i32</i>
-  call void %foo(i8 97 signext)
-
-  %struct.A = type { i32, i8 }
-  %r = call %struct.A @foo()                        <i>; yields { 32, i8 }</i>
-  %gr = extractvalue %struct.A %r, 0                <i>; yields i32</i>
-  %gr1 = extractvalue %struct.A %r, 1               <i>; yields i8</i>
-  %Z = call void @foo() noreturn                    <i>; indicates that %foo never returns normally</i>
-  %ZZ = call zeroext i32 @bar()                     <i>; Return value is %zero extended</i>
-</pre>
-
-<p>llvm treats calls to some functions with names and arguments that match the
-standard C99 library as being the C99 library functions, and may perform
-optimizations or generate code for them under that assumption.  This is
-something we'd like to change in the future to provide better support for
-freestanding environments and non-C-based languages.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_va_arg">'<tt>va_arg</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;resultval&gt; = va_arg &lt;va_list*&gt; &lt;arglist&gt;, &lt;argty&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>va_arg</tt>' instruction is used to access arguments passed through
-   the "variable argument" area of a function call.  It is used to implement the
-   <tt>va_arg</tt> macro in C.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction takes a <tt>va_list*</tt> value and the type of the
-   argument. It returns a value of the specified argument type and increments
-   the <tt>va_list</tt> to point to the next argument.  The actual type
-   of <tt>va_list</tt> is target specific.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>va_arg</tt>' instruction loads an argument of the specified type
-   from the specified <tt>va_list</tt> and causes the <tt>va_list</tt> to point
-   to the next argument.  For more information, see the variable argument
-   handling <a href="#int_varargs">Intrinsic Functions</a>.</p>
-
-<p>It is legal for this instruction to be called in a function which does not
-   take a variable number of arguments, for example, the <tt>vfprintf</tt>
-   function.</p>
-
-<p><tt>va_arg</tt> is an LLVM instruction instead of
-   an <a href="#intrinsics">intrinsic function</a> because it takes a type as an
-   argument.</p>
-
-<h5>Example:</h5>
-<p>See the <a href="#int_varargs">variable argument processing</a> section.</p>
-
-<p>Note that the code generator does not yet fully support va_arg on many
-   targets. Also, it does not currently support va_arg with aggregate types on
-   any target.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_landingpad">'<tt>landingpad</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;resultval&gt; = landingpad &lt;resultty&gt; personality &lt;type&gt; &lt;pers_fn&gt; &lt;clause&gt;+
-  &lt;resultval&gt; = landingpad &lt;resultty&gt; personality &lt;type&gt; &lt;pers_fn&gt; cleanup &lt;clause&gt;*
-
-  &lt;clause&gt; := catch &lt;type&gt; &lt;value&gt;
-  &lt;clause&gt; := filter &lt;array constant type&gt; &lt;array constant&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>landingpad</tt>' instruction is used by
-   <a href="ExceptionHandling.html#overview">LLVM's exception handling
-   system</a> to specify that a basic block is a landing pad &mdash; one where
-   the exception lands, and corresponds to the code found in the
-   <i><tt>catch</tt></i> portion of a <i><tt>try/catch</tt></i> sequence. It
-   defines values supplied by the personality function (<tt>pers_fn</tt>) upon
-   re-entry to the function. The <tt>resultval</tt> has the
-   type <tt>resultty</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction takes a <tt>pers_fn</tt> value. This is the personality
-   function associated with the unwinding mechanism. The optional
-   <tt>cleanup</tt> flag indicates that the landing pad block is a cleanup.</p>
-
-<p>A <tt>clause</tt> begins with the clause type &mdash; <tt>catch</tt>
-   or <tt>filter</tt> &mdash; and contains the global variable representing the
-   "type" that may be caught or filtered respectively. Unlike the
-   <tt>catch</tt> clause, the <tt>filter</tt> clause takes an array constant as
-   its argument. Use "<tt>[0 x i8**] undef</tt>" for a filter which cannot
-   throw. The '<tt>landingpad</tt>' instruction must contain <em>at least</em>
-   one <tt>clause</tt> or the <tt>cleanup</tt> flag.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>landingpad</tt>' instruction defines the values which are set by the
-   personality function (<tt>pers_fn</tt>) upon re-entry to the function, and
-   therefore the "result type" of the <tt>landingpad</tt> instruction. As with
-   calling conventions, how the personality function results are represented in
-   LLVM IR is target specific.</p>
-
-<p>The clauses are applied in order from top to bottom. If two
-   <tt>landingpad</tt> instructions are merged together through inlining, the
-   clauses from the calling function are appended to the list of clauses.
-   When the call stack is being unwound due to an exception being thrown, the
-   exception is compared against each <tt>clause</tt> in turn.  If it doesn't
-   match any of the clauses, and the <tt>cleanup</tt> flag is not set, then
-   unwinding continues further up the call stack.</p>
-
-<p>The <tt>landingpad</tt> instruction has several restrictions:</p>
-
-<ul>
-  <li>A landing pad block is a basic block which is the unwind destination of an
-      '<tt>invoke</tt>' instruction.</li>
-  <li>A landing pad block must have a '<tt>landingpad</tt>' instruction as its
-      first non-PHI instruction.</li>
-  <li>There can be only one '<tt>landingpad</tt>' instruction within the landing
-      pad block.</li>
-  <li>A basic block that is not a landing pad block may not include a
-      '<tt>landingpad</tt>' instruction.</li>
-  <li>All '<tt>landingpad</tt>' instructions in a function must have the same
-      personality function.</li>
-</ul>
-
-<h5>Example:</h5>
-<pre>
-  ;; A landing pad which can catch an integer.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           catch i8** @_ZTIi
-  ;; A landing pad that is a cleanup.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           cleanup
-  ;; A landing pad which can catch an integer and can only throw a double.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           catch i8** @_ZTIi
-           filter [1 x i8**] [@_ZTId]
-</pre>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intrinsics">Intrinsic Functions</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM supports the notion of an "intrinsic function".  These functions have
-   well known names and semantics and are required to follow certain
-   restrictions.  Overall, these intrinsics represent an extension mechanism for
-   the LLVM language that does not require changing all of the transformations
-   in LLVM when adding to the language (or the bitcode reader/writer, the
-   parser, etc...).</p>
-
-<p>Intrinsic function names must all start with an "<tt>llvm.</tt>" prefix. This
-   prefix is reserved in LLVM for intrinsic names; thus, function names may not
-   begin with this prefix.  Intrinsic functions must always be external
-   functions: you cannot define the body of intrinsic functions.  Intrinsic
-   functions may only be used in call or invoke instructions: it is illegal to
-   take the address of an intrinsic function.  Additionally, because intrinsic
-   functions are part of the LLVM language, it is required if any are added that
-   they be documented here.</p>
-
-<p>Some intrinsic functions can be overloaded, i.e., the intrinsic represents a
-   family of functions that perform the same operation but on different data
-   types. Because LLVM can represent over 8 million different integer types,
-   overloading is used commonly to allow an intrinsic function to operate on any
-   integer type. One or more of the argument types or the result type can be
-   overloaded to accept any integer type. Argument types may also be defined as
-   exactly matching a previous argument's type or the result type. This allows
-   an intrinsic function which accepts multiple arguments, but needs all of them
-   to be of the same type, to only be overloaded with respect to a single
-   argument or the result.</p>
-
-<p>Overloaded intrinsics will have the names of its overloaded argument types
-   encoded into its function name, each preceded by a period. Only those types
-   which are overloaded result in a name suffix. Arguments whose type is matched
-   against another type do not. For example, the <tt>llvm.ctpop</tt> function
-   can take an integer of any width and returns an integer of exactly the same
-   integer width. This leads to a family of functions such as
-   <tt>i8 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i29 @llvm.ctpop.i29(i29
-   %val)</tt>.  Only one type, the return type, is overloaded, and only one type
-   suffix is required. Because the argument's type is matched against the return
-   type, it does not require its own name suffix.</p>
-
-<p>To learn how to add an intrinsic function, please see the
-   <a href="ExtendingLLVM.html">Extending LLVM Guide</a>.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_varargs">Variable Argument Handling Intrinsics</a>
-</h3>
-
-<div>
-
-<p>Variable argument support is defined in LLVM with
-   the <a href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
-   intrinsic functions.  These functions are related to the similarly named
-   macros defined in the <tt>&lt;stdarg.h&gt;</tt> header file.</p>
-
-<p>All of these functions operate on arguments that use a target-specific value
-   type "<tt>va_list</tt>".  The LLVM assembly language reference manual does
-   not define what this type is, so all transformations should be prepared to
-   handle these functions regardless of the type used.</p>
-
-<p>This example shows how the <a href="#i_va_arg"><tt>va_arg</tt></a>
-   instruction and the variable argument handling intrinsic functions are
-   used.</p>
-
-<pre class="doc_code">
-define i32 @test(i32 %X, ...) {
-  ; Initialize variable argument processing
-  %ap = alloca i8*
-  %ap2 = bitcast i8** %ap to i8*
-  call void @llvm.va_start(i8* %ap2)
-
-  ; Read a single integer argument
-  %tmp = va_arg i8** %ap, i32
-
-  ; Demonstrate usage of llvm.va_copy and llvm.va_end
-  %aq = alloca i8*
-  %aq2 = bitcast i8** %aq to i8*
-  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-  call void @llvm.va_end(i8* %aq2)
-
-  ; Stop processing of arguments.
-  call void @llvm.va_end(i8* %ap2)
-  ret i32 %tmp
-}
-
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare void @llvm.va_end(i8*)
-</pre>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a>
-</h4>
-
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void %llvm.va_start(i8* &lt;arglist&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.va_start</tt>' intrinsic initializes <tt>*&lt;arglist&gt;</tt>
-   for subsequent use by <tt><a href="#i_va_arg">va_arg</a></tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
-   macro available in C.  In a target-dependent way, it initializes
-   the <tt>va_list</tt> element to which the argument points, so that the next
-   call to <tt>va_arg</tt> will produce the first variable argument passed to
-   the function.  Unlike the C <tt>va_start</tt> macro, this intrinsic does not
-   need to know the last argument of the function as the compiler can figure
-   that out.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="int_va_end">'<tt>llvm.va_end</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.va_end(i8* &lt;arglist&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.va_end</tt>' intrinsic destroys <tt>*&lt;arglist&gt;</tt>,
-   which has been initialized previously
-   with <tt><a href="#int_va_start">llvm.va_start</a></tt>
-   or <tt><a href="#i_va_copy">llvm.va_copy</a></tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The argument is a pointer to a <tt>va_list</tt> to destroy.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.va_end</tt>' intrinsic works just like the <tt>va_end</tt>
-   macro available in C.  In a target-dependent way, it destroys
-   the <tt>va_list</tt> element to which the argument points.  Calls
-   to <a href="#int_va_start"><tt>llvm.va_start</tt></a>
-   and <a href="#int_va_copy"> <tt>llvm.va_copy</tt></a> must be matched exactly
-   with calls to <tt>llvm.va_end</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_va_copy">'<tt>llvm.va_copy</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.va_copy(i8* &lt;destarglist&gt;, i8* &lt;srcarglist&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.va_copy</tt>' intrinsic copies the current argument position
-   from the source argument list to the destination argument list.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a pointer to a <tt>va_list</tt> element to initialize.
-   The second argument is a pointer to a <tt>va_list</tt> element to copy
-   from.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.va_copy</tt>' intrinsic works just like the <tt>va_copy</tt>
-   macro available in C.  In a target-dependent way, it copies the
-   source <tt>va_list</tt> element into the destination <tt>va_list</tt>
-   element.  This intrinsic is necessary because
-   the <tt><a href="#int_va_start"> llvm.va_start</a></tt> intrinsic may be
-   arbitrarily complex and require, for example, memory allocation.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_gc">Accurate Garbage Collection Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM support for <a href="GarbageCollection.html">Accurate Garbage
-Collection</a> (GC) requires the implementation and generation of these
-intrinsics. These intrinsics allow identification of <a href="#int_gcroot">GC
-roots on the stack</a>, as well as garbage collector implementations that
-require <a href="#int_gcread">read</a> and <a href="#int_gcwrite">write</a>
-barriers.  Front-ends for type-safe garbage collected languages should generate
-these intrinsics to make use of the LLVM garbage collectors.  For more details,
-see <a href="GarbageCollection.html">Accurate Garbage Collection with
-LLVM</a>.</p>
-
-<p>The garbage collection intrinsics only operate on objects in the generic
-   address space (address space zero).</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.gcroot</tt>' intrinsic declares the existence of a GC root to
-   the code generator, and allows some metadata to be associated with it.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument specifies the address of a stack object that contains the
-   root pointer.  The second pointer (which must be either a constant or a
-   global value address) contains the meta-data to be associated with the
-   root.</p>
-
-<h5>Semantics:</h5>
-<p>At runtime, a call to this intrinsic stores a null pointer into the "ptrloc"
-   location.  At compile-time, the code generator generates information to allow
-   the runtime to find the pointer at GC safe points. The '<tt>llvm.gcroot</tt>'
-   intrinsic may only be used in a function which <a href="#gc">specifies a GC
-   algorithm</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.gcread</tt>' intrinsic identifies reads of references from heap
-   locations, allowing garbage collector implementations that require read
-   barriers.</p>
-
-<h5>Arguments:</h5>
-<p>The second argument is the address to read from, which should be an address
-   allocated from the garbage collector.  The first object is a pointer to the
-   start of the referenced object, if needed by the language runtime (otherwise
-   null).</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.gcread</tt>' intrinsic has the same semantics as a load
-   instruction, but may be replaced with substantially more complex code by the
-   garbage collector runtime, as needed. The '<tt>llvm.gcread</tt>' intrinsic
-   may only be used in a function which <a href="#gc">specifies a GC
-   algorithm</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.gcwrite</tt>' intrinsic identifies writes of references to heap
-   locations, allowing garbage collector implementations that require write
-   barriers (such as generational or reference counting collectors).</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the reference to store, the second is the start of the
-   object to store it to, and the third is the address of the field of Obj to
-   store to.  If the runtime does not require a pointer to the object, Obj may
-   be null.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.gcwrite</tt>' intrinsic has the same semantics as a store
-   instruction, but may be replaced with substantially more complex code by the
-   garbage collector runtime, as needed. The '<tt>llvm.gcwrite</tt>' intrinsic
-   may only be used in a function which <a href="#gc">specifies a GC
-   algorithm</a>.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_codegen">Code Generator Intrinsics</a>
-</h3>
-
-<div>
-
-<p>These intrinsics are provided by LLVM to expose special features that may
-   only be implemented with code generator support.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8  *@llvm.returnaddress(i32 &lt;level&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.returnaddress</tt>' intrinsic attempts to compute a
-   target-specific value indicating the return address of the current function
-   or one of its callers.</p>
-
-<h5>Arguments:</h5>
-<p>The argument to this intrinsic indicates which function to return the address
-   for.  Zero indicates the calling function, one indicates its caller, etc.
-   The argument is <b>required</b> to be a constant integer value.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.returnaddress</tt>' intrinsic either returns a pointer
-   indicating the return address of the specified call frame, or zero if it
-   cannot be identified.  The value returned by this intrinsic is likely to be
-   incorrect or 0 for arguments other than zero, so it should only be used for
-   debugging purposes.</p>
-
-<p>Note that calling this intrinsic does not prevent function inlining or other
-   aggressive transformations, so the value returned may not be that of the
-   obvious source-language caller.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.frameaddress(i32 &lt;level&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.frameaddress</tt>' intrinsic attempts to return the
-   target-specific frame pointer value for the specified stack frame.</p>
-
-<h5>Arguments:</h5>
-<p>The argument to this intrinsic indicates which function to return the frame
-   pointer for.  Zero indicates the calling function, one indicates its caller,
-   etc.  The argument is <b>required</b> to be a constant integer value.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.frameaddress</tt>' intrinsic either returns a pointer
-   indicating the frame address of the specified call frame, or zero if it
-   cannot be identified.  The value returned by this intrinsic is likely to be
-   incorrect or 0 for arguments other than zero, so it should only be used for
-   debugging purposes.</p>
-
-<p>Note that calling this intrinsic does not prevent function inlining or other
-   aggressive transformations, so the value returned may not be that of the
-   obvious source-language caller.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.stacksave()
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.stacksave</tt>' intrinsic is used to remember the current state
-   of the function stack, for use
-   with <a href="#int_stackrestore"> <tt>llvm.stackrestore</tt></a>.  This is
-   useful for implementing language features like scoped automatic variable
-   sized arrays in C99.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic returns a opaque pointer value that can be passed
-   to <a href="#int_stackrestore"><tt>llvm.stackrestore</tt></a>.  When
-   an <tt>llvm.stackrestore</tt> intrinsic is executed with a value saved
-   from <tt>llvm.stacksave</tt>, it effectively restores the state of the stack
-   to the state it was in when the <tt>llvm.stacksave</tt> intrinsic executed.
-   In practice, this pops any <a href="#i_alloca">alloca</a> blocks from the
-   stack that were allocated after the <tt>llvm.stacksave</tt> was executed.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.stackrestore(i8* %ptr)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.stackrestore</tt>' intrinsic is used to restore the state of
-   the function stack to the state it was in when the
-   corresponding <a href="#int_stacksave"><tt>llvm.stacksave</tt></a> intrinsic
-   executed.  This is useful for implementing language features like scoped
-   automatic variable sized arrays in C99.</p>
-
-<h5>Semantics:</h5>
-<p>See the description
-   for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.prefetch(i8* &lt;address&gt;, i32 &lt;rw&gt;, i32 &lt;locality&gt;, i32 &lt;cache type&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.prefetch</tt>' intrinsic is a hint to the code generator to
-   insert a prefetch instruction if supported; otherwise, it is a noop.
-   Prefetches have no effect on the behavior of the program but can change its
-   performance characteristics.</p>
-
-<h5>Arguments:</h5>
-<p><tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the
-   specifier determining if the fetch should be for a read (0) or write (1),
-   and <tt>locality</tt> is a temporal locality specifier ranging from (0) - no
-   locality, to (3) - extremely local keep in cache. The <tt>cache type</tt>
-   specifies whether the prefetch is performed on the data (1) or instruction (0)
-   cache. The <tt>rw</tt>, <tt>locality</tt> and <tt>cache type</tt> arguments
-   must be constant integers.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does not modify the behavior of the program.  In particular,
-   prefetches cannot trap and do not produce a value.  On targets that support
-   this intrinsic, the prefetch can provide hints to the processor cache for
-   better performance.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.pcmarker(i32 &lt;id&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.pcmarker</tt>' intrinsic is a method to export a Program
-   Counter (PC) in a region of code to simulators and other tools.  The method
-   is target specific, but it is expected that the marker will use exported
-   symbols to transmit the PC of the marker.  The marker makes no guarantees
-   that it will remain with any specific instruction after optimizations.  It is
-   possible that the presence of a marker will inhibit optimizations.  The
-   intended use is to be inserted after optimizations to allow correlations of
-   simulation runs.</p>
-
-<h5>Arguments:</h5>
-<p><tt>id</tt> is a numerical id identifying the marker.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does not modify the behavior of the program.  Backends that do
-   not support this intrinsic may ignore it.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i64 @llvm.readcyclecounter()
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.readcyclecounter</tt>' intrinsic provides access to the cycle
-   counter register (or similar low latency, high accuracy clocks) on those
-   targets that support it.  On X86, it should map to RDTSC.  On Alpha, it
-   should map to RPCC.  As the backing counters overflow quickly (on the order
-   of 9 seconds on alpha), this should only be used for small timings.</p>
-
-<h5>Semantics:</h5>
-<p>When directly supported, reading the cycle counter should not modify any
-   memory.  Implementations are allowed to either return a application specific
-   value or a system wide value.  On backends without support, this is lowered
-   to a constant 0.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_libc">Standard C Library Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM provides intrinsics for a few important standard C library functions.
-   These intrinsics allow source-language front-ends to pass information about
-   the alignment of the pointer arguments to the code generator, providing
-   opportunity for more efficient code generation.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_memcpy">'<tt>llvm.memcpy</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
-   integer bit width and for different address spaces. Not all targets support
-   all bit widths however.</p>
-
-<pre>
-  declare void @llvm.memcpy.p0i8.p0i8.i32(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                          i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                          i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
-   source location to the destination location.</p>
-
-<p>Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
-   intrinsics do not return a value, takes extra alignment/isvolatile arguments
-   and the pointers can be in specified address spaces.</p>
-
-<h5>Arguments:</h5>
-
-<p>The first argument is a pointer to the destination, the second is a pointer
-   to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, the fourth argument is the alignment of the
-   source and destination locations, and the fifth is a boolean indicating a
-   volatile access.</p>
-
-<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
-   then the caller guarantees that both the source and destination pointers are
-   aligned to that boundary.</p>
-
-<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
-   <tt>llvm.memcpy</tt> call is a <a href="#volatile">volatile operation</a>.
-   The detailed access behavior is not very cleanly specified and it is unwise
-   to depend on it.</p>
-
-<h5>Semantics:</h5>
-
-<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
-   source location to the destination location, which are not allowed to
-   overlap.  It copies "len" bytes of memory over.  If the argument is known to
-   be aligned to some boundary, this can be specified as the fourth argument,
-   otherwise it should be set to 0 or 1.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_memmove">'<tt>llvm.memmove</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
-   width and for different address space. Not all targets support all bit
-   widths however.</p>
-
-<pre>
-  declare void @llvm.memmove.p0i8.p0i8.i32(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                           i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-  declare void @llvm.memmove.p0i8.p0i8.i64(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                           i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.memmove.*</tt>' intrinsics move a block of memory from the
-   source location to the destination location. It is similar to the
-   '<tt>llvm.memcpy</tt>' intrinsic but allows the two memory locations to
-   overlap.</p>
-
-<p>Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
-   intrinsics do not return a value, takes extra alignment/isvolatile arguments
-   and the pointers can be in specified address spaces.</p>
-
-<h5>Arguments:</h5>
-
-<p>The first argument is a pointer to the destination, the second is a pointer
-   to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, the fourth argument is the alignment of the
-   source and destination locations, and the fifth is a boolean indicating a
-   volatile access.</p>
-
-<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
-   then the caller guarantees that the source and destination pointers are
-   aligned to that boundary.</p>
-
-<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
-   <tt>llvm.memmove</tt> call is a <a href="#volatile">volatile operation</a>.
-   The detailed access behavior is not very cleanly specified and it is unwise
-   to depend on it.</p>
-
-<h5>Semantics:</h5>
-
-<p>The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the
-   source location to the destination location, which may overlap.  It copies
-   "len" bytes of memory over.  If the argument is known to be aligned to some
-   boundary, this can be specified as the fourth argument, otherwise it should
-   be set to 0 or 1.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_memset">'<tt>llvm.memset.*</tt>' Intrinsics</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
-   width and for different address spaces. However, not all targets support all
-   bit widths.</p>
-
-<pre>
-  declare void @llvm.memset.p0i8.i32(i8* &lt;dest&gt;, i8 &lt;val&gt;,
-                                     i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-  declare void @llvm.memset.p0i8.i64(i8* &lt;dest&gt;, i8 &lt;val&gt;,
-                                     i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.memset.*</tt>' intrinsics fill a block of memory with a
-   particular byte value.</p>
-
-<p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
-   intrinsic does not return a value and takes extra alignment/volatile
-   arguments.  Also, the destination can be in an arbitrary address space.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a pointer to the destination to fill, the second is the
-   byte value with which to fill it, the third argument is an integer argument
-   specifying the number of bytes to fill, and the fourth argument is the known
-   alignment of the destination location.</p>
-
-<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
-   then the caller guarantees that the destination pointer is aligned to that
-   boundary.</p>
-
-<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
-   <tt>llvm.memset</tt> call is a <a href="#volatile">volatile operation</a>.
-   The detailed access behavior is not very cleanly specified and it is unwise
-   to depend on it.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting
-   at the destination location.  If the argument is known to be aligned to some
-   boundary, this can be specified as the fourth argument, otherwise it should
-   be set to 0 or 1.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.sqrt.f32(float %Val)
-  declare double    @llvm.sqrt.f64(double %Val)
-  declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
-  declare fp128     @llvm.sqrt.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.sqrt</tt>' intrinsics return the sqrt of the specified operand,
-   returning the same value as the libm '<tt>sqrt</tt>' functions would.
-   Unlike <tt>sqrt</tt> in libm, however, <tt>llvm.sqrt</tt> has undefined
-   behavior for negative numbers other than -0.0 (which allows for better
-   optimization, because there is no need to worry about errno being
-   set).  <tt>llvm.sqrt(-0.0)</tt> is defined to return -0.0 like IEEE sqrt.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the sqrt of the specified operand if it is a
-   nonnegative floating point number.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.powi.f32(float  %Val, i32 %power)
-  declare double    @llvm.powi.f64(double %Val, i32 %power)
-  declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
-  declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
-  declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.powi.*</tt>' intrinsics return the first operand raised to the
-   specified (positive or negative) power.  The order of evaluation of
-   multiplications is not defined.  When a vector of floating point type is
-   used, the second argument remains a scalar integer value.</p>
-
-<h5>Arguments:</h5>
-<p>The second argument is an integer power, and the first is a value to raise to
-   that power.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the first value raised to the second power with an
-   unspecified sequence of rounding operations.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.sin.f32(float  %Val)
-  declare double    @llvm.sin.f64(double %Val)
-  declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
-  declare fp128     @llvm.sin.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.sin.*</tt>' intrinsics return the sine of the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the sine of the specified operand, returning the same
-   values as the libm <tt>sin</tt> functions would, and handles error conditions
-   in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.cos.f32(float  %Val)
-  declare double    @llvm.cos.f64(double %Val)
-  declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
-  declare fp128     @llvm.cos.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.cos.*</tt>' intrinsics return the cosine of the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the cosine of the specified operand, returning the same
-   values as the libm <tt>cos</tt> functions would, and handles error conditions
-   in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.pow.f32(float  %Val, float %Power)
-  declare double    @llvm.pow.f64(double %Val, double %Power)
-  declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
-  declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
-  declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.pow.*</tt>' intrinsics return the first operand raised to the
-   specified (positive or negative) power.</p>
-
-<h5>Arguments:</h5>
-<p>The second argument is a floating point power, and the first is a value to
-   raise to that power.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the first value raised to the second power, returning
-   the same values as the libm <tt>pow</tt> functions would, and handles error
-   conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.exp</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.exp.f32(float  %Val)
-  declare double    @llvm.exp.f64(double %Val)
-  declare x86_fp80  @llvm.exp.f80(x86_fp80  %Val)
-  declare fp128     @llvm.exp.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.exp.*</tt>' intrinsics perform the exp function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>exp</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_log">'<tt>llvm.log.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.log</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.log.f32(float  %Val)
-  declare double    @llvm.log.f64(double %Val)
-  declare x86_fp80  @llvm.log.f80(x86_fp80  %Val)
-  declare fp128     @llvm.log.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.log.*</tt>' intrinsics perform the log function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>log</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.fma</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
-  declare double    @llvm.fma.f64(double %a, double %b, double %c)
-  declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
-  declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
-  declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.fma.*</tt>' intrinsics perform the fused multiply-add
-   operation.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>fma</tt> functions
-   would.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_fabs">'<tt>llvm.fabs.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.fabs</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.fabs.f32(float  %Val)
-  declare double    @llvm.fabs.f64(double %Val)
-  declare x86_fp80  @llvm.fabs.f80(x86_fp80  %Val)
-  declare fp128     @llvm.fabs.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.fabs.*</tt>' intrinsics return the absolute value of
-   the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>fabs</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.floor</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.floor.f32(float  %Val)
-  declare double    @llvm.floor.f64(double %Val)
-  declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
-  declare fp128     @llvm.floor.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.floor.*</tt>' intrinsics return the floor of
-   the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>floor</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_manip">Bit Manipulation Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM provides intrinsics for a few important bit manipulation operations.
-   These allow efficient code generation for some algorithms.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic function. You can use bswap on any integer
-   type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
-
-<pre>
-  declare i16 @llvm.bswap.i16(i16 &lt;id&gt;)
-  declare i32 @llvm.bswap.i32(i32 &lt;id&gt;)
-  declare i64 @llvm.bswap.i64(i64 &lt;id&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.bswap</tt>' family of intrinsics is used to byte swap integer
-   values with an even number of bytes (positive multiple of 16 bits).  These
-   are useful for performing operations on data that is not in the target's
-   native byte order.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>llvm.bswap.i16</tt> intrinsic returns an i16 value that has the high
-   and low byte of the input i16 swapped.  Similarly,
-   the <tt>llvm.bswap.i32</tt> intrinsic returns an i32 value that has the four
-   bytes of the input i32 swapped, so that if the input bytes are numbered 0, 1,
-   2, 3 then the returned i32 will have its bytes in 3, 2, 1, 0 order.
-   The <tt>llvm.bswap.i48</tt>, <tt>llvm.bswap.i64</tt> and other intrinsics
-   extend this concept to additional even-byte lengths (6 bytes, 8 bytes and
-   more, respectively).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
-   width, or on any vector with integer elements. Not all targets support all
-  bit widths or vector types, however.</p>
-
-<pre>
-  declare i8 @llvm.ctpop.i8(i8  &lt;src&gt;)
-  declare i16 @llvm.ctpop.i16(i16 &lt;src&gt;)
-  declare i32 @llvm.ctpop.i32(i32 &lt;src&gt;)
-  declare i64 @llvm.ctpop.i64(i64 &lt;src&gt;)
-  declare i256 @llvm.ctpop.i256(i256 &lt;src&gt;)
-  declare &lt;2 x i32&gt; @llvm.ctpop.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ctpop</tt>' family of intrinsics counts the number of bits set
-   in a value.</p>
-
-<h5>Arguments:</h5>
-<p>The only argument is the value to be counted.  The argument may be of any
-   integer type, or a vector with integer elements.
-   The return type must match the argument type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable, or within each
-   element of a vector.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
-   integer bit width, or any vector whose elements are integers. Not all
-   targets support all bit widths or vector types, however.</p>
-
-<pre>
-  declare i8   @llvm.ctlz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i16  @llvm.ctlz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i32  @llvm.ctlz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i64  @llvm.ctlz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declase &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ctlz</tt>' family of intrinsic functions counts the number of
-   leading zeros in a variable.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the value to be counted. This argument may be of any
-   integer type, or a vectory with integer element type. The return type
-   must match the first argument type.</p>
-
-<p>The second argument must be a constant and is a flag to indicate whether the
-   intrinsic should ensure that a zero as the first argument produces a defined
-   result. Historically some architectures did not provide a defined result for
-   zero values as efficiently, and many algorithms are now predicated on
-   avoiding zero-value inputs.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
-   zeros in a variable, or within each element of the vector.
-   If <tt>src == 0</tt> then the result is the size in bits of the type of
-   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
-   For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
-   integer bit width, or any vector of integer elements. Not all targets
-   support all bit widths or vector types, however.</p>
-
-<pre>
-  declare i8   @llvm.cttz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i16  @llvm.cttz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i32  @llvm.cttz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i64  @llvm.cttz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.cttz</tt>' family of intrinsic functions counts the number of
-   trailing zeros.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the value to be counted. This argument may be of any
-   integer type, or a vectory with integer element type. The return type
-   must match the first argument type.</p>
-
-<p>The second argument must be a constant and is a flag to indicate whether the
-   intrinsic should ensure that a zero as the first argument produces a defined
-   result. Historically some architectures did not provide a defined result for
-   zero values as efficiently, and many algorithms are now predicated on
-   avoiding zero-value inputs.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
-   zeros in a variable, or within each element of a vector.
-   If <tt>src == 0</tt> then the result is the size in bits of the type of
-   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
-   For example, <tt>llvm.cttz(2) = 1</tt>.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_overflow">Arithmetic with Overflow Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM provides intrinsics for some arithmetic with overflow operations.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_sadd_overflow">
-    '<tt>llvm.sadd.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sadd.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
-   a signed addition of the two arguments, and indicate whether an overflow
-   occurred during the signed summation.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo signed addition.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
-   a signed addition of the two variables. They return a structure &mdash; the
-   first element of which is the signed summation, and the second element of
-   which is a bit specifying if the signed summation resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_uadd_overflow">
-    '<tt>llvm.uadd.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.uadd.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned addition of the two arguments, and indicate whether a carry
-   occurred during the unsigned summation.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo unsigned addition.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned addition of the two arguments. They return a structure &mdash;
-   the first element of which is the sum, and the second element of which is a
-   bit specifying if the unsigned summation resulted in a carry.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %carry, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ssub_overflow">
-    '<tt>llvm.ssub.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.ssub.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
-   a signed subtraction of the two arguments, and indicate whether an overflow
-   occurred during the signed subtraction.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo signed subtraction.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
-   a signed subtraction of the two arguments. They return a structure &mdash;
-   the first element of which is the subtraction, and the second element of
-   which is a bit specifying if the signed subtraction resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_usub_overflow">
-    '<tt>llvm.usub.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.usub.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned subtraction of the two arguments, and indicate whether an
-   overflow occurred during the unsigned subtraction.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo unsigned subtraction.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned subtraction of the two arguments. They return a structure &mdash;
-   the first element of which is the subtraction, and the second element of
-   which is a bit specifying if the unsigned subtraction resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_smul_overflow">
-    '<tt>llvm.smul.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.smul.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-
-<p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
-   a signed multiplication of the two arguments, and indicate whether an
-   overflow occurred during the signed multiplication.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo signed multiplication.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
-   a signed multiplication of the two arguments. They return a structure &mdash;
-   the first element of which is the multiplication, and the second element of
-   which is a bit specifying if the signed multiplication resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_umul_overflow">
-    '<tt>llvm.umul.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.umul.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
-   a unsigned multiplication of the two arguments, and indicate whether an
-   overflow occurred during the unsigned multiplication.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo unsigned multiplication.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned multiplication of the two arguments. They return a structure
-   &mdash; the first element of which is the multiplication, and the second
-   element of which is a bit specifying if the unsigned multiplication resulted
-   in an overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="spec_arithmetic">Specialised Arithmetic Intrinsics</a>
-</h3>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="fmuladd">'<tt>llvm.fmuladd.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.fmuladd.*</tt>' intrinsic functions represent multiply-add
-expressions that can be fused if the code generator determines that the fused
-expression would be legal and efficient.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>llvm.fmuladd.*</tt>' intrinsics each take three arguments: two
-multiplicands, a and b, and an addend c.</p>
-
-<h5>Semantics:</h5>
-<p>The expression:</p>
-<pre>
-  %0 = call float @llvm.fmuladd.f32(%a, %b, %c)
-</pre>
-<p>is equivalent to the expression a * b + c, except that rounding will not be
-performed between the multiplication and addition steps if the code generator
-fuses the operations. Fusion is not guaranteed, even if the target platform
-supports it. If a fused multiply-add is required the corresponding llvm.fma.*
-intrinsic function should be used instead.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_fp16">Half Precision Floating Point Intrinsics</a>
-</h3>
-
-<div>
-
-<p>For most target platforms, half precision floating point is a storage-only
-   format. This means that it is
-   a dense encoding (in memory) but does not support computation in the
-   format.</p>
-   
-<p>This means that code must first load the half-precision floating point
-   value as an i16, then convert it to float with <a
-   href="#int_convert_from_fp16"><tt>llvm.convert.from.fp16</tt></a>.
-   Computation can then be performed on the float value (including extending to
-   double etc).  To store the value back to memory, it is first converted to
-   float if needed, then converted to i16 with
-   <a href="#int_convert_to_fp16"><tt>llvm.convert.to.fp16</tt></a>, then
-   storing as an i16 value.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_convert_to_fp16">
-    '<tt>llvm.convert.to.fp16</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i16 @llvm.convert.to.fp16(f32 %a)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
-   a conversion from single precision floating point format to half precision
-   floating point format.</p>
-
-<h5>Arguments:</h5>
-<p>The intrinsic function contains single argument - the value to be
-   converted.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
-   a conversion from single precision floating point format to half precision
-   floating point format. The return value is an <tt>i16</tt> which
-   contains the converted number.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call i16 @llvm.convert.to.fp16(f32 %a)
-  store i16 %res, i16* @x, align 2
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_convert_from_fp16">
-    '<tt>llvm.convert.from.fp16</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare f32 @llvm.convert.from.fp16(i16 %a)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs
-   a conversion from half precision floating point format to single precision
-   floating point format.</p>
-
-<h5>Arguments:</h5>
-<p>The intrinsic function contains single argument - the value to be
-   converted.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs a
-   conversion from half single precision floating point format to single
-   precision floating point format. The input half-float value is represented by
-   an <tt>i16</tt> value.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %a = load i16* @x, align 2
-  %res = call f32 @llvm.convert.from.fp16(i16 %a)
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_debugger">Debugger Intrinsics</a>
-</h3>
-
-<div>
-
-<p>The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt>
-   prefix), are described in
-   the <a href="SourceLevelDebugging.html#format_common_intrinsics">LLVM Source
-   Level Debugging</a> document.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_eh">Exception Handling Intrinsics</a>
-</h3>
-
-<div>
-
-<p>The LLVM exception handling intrinsics (which all start with
-   <tt>llvm.eh.</tt> prefix), are described in
-   the <a href="ExceptionHandling.html#format_common_intrinsics">LLVM Exception
-   Handling</a> document.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_trampoline">Trampoline Intrinsics</a>
-</h3>
-
-<div>
-
-<p>These intrinsics make it possible to excise one parameter, marked with
-   the <a href="#nest"><tt>nest</tt></a> attribute, from a function.
-   The result is a callable
-   function pointer lacking the nest parameter - the caller does not need to
-   provide a value for it.  Instead, the value to use is stored in advance in a
-   "trampoline", a block of memory usually allocated on the stack, which also
-   contains code to splice the nest value into the argument list.  This is used
-   to implement the GCC nested function address extension.</p>
-
-<p>For example, if the function is
-   <tt>i32 f(i8* nest %c, i32 %x, i32 %y)</tt> then the resulting function
-   pointer has signature <tt>i32 (i32, i32)*</tt>.  It can be created as
-   follows:</p>
-
-<pre class="doc_code">
-  %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
-  %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
-  call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
-  %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
-  %fp = bitcast i8* %p to i32 (i32, i32)*
-</pre>
-
-<p>The call <tt>%val = call i32 %fp(i32 %x, i32 %y)</tt> is then equivalent
-   to <tt>%val = call i32 %f(i8* %nval, i32 %x, i32 %y)</tt>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_it">
-    '<tt>llvm.init.trampoline</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This fills the memory pointed to by <tt>tramp</tt> with executable code,
-   turning it into a trampoline.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
-   pointers.  The <tt>tramp</tt> argument must point to a sufficiently large and
-   sufficiently aligned block of memory; this memory is written to by the
-   intrinsic.  Note that the size and the alignment are target-specific - LLVM
-   currently provides no portable way of determining them, so a front-end that
-   generates this intrinsic needs to have some target-specific knowledge.
-   The <tt>func</tt> argument must hold a function bitcast to
-   an <tt>i8*</tt>.</p>
-
-<h5>Semantics:</h5>
-<p>The block of memory pointed to by <tt>tramp</tt> is filled with target
-   dependent code, turning it into a function.  Then <tt>tramp</tt> needs to be
-   passed to <a href="#int_at">llvm.adjust.trampoline</a> to get a pointer
-   which can be <a href="#int_trampoline">bitcast (to a new function) and
-   called</a>.  The new function's signature is the same as that of
-   <tt>func</tt> with any arguments marked with the <tt>nest</tt> attribute
-   removed.  At most one such <tt>nest</tt> argument is allowed, and it must be of
-   pointer type.  Calling the new function is equivalent to calling <tt>func</tt>
-   with the same argument list, but with <tt>nval</tt> used for the missing
-   <tt>nest</tt> argument.  If, after calling <tt>llvm.init.trampoline</tt>, the
-   memory pointed to by <tt>tramp</tt> is modified, then the effect of any later call
-   to the returned function pointer is undefined.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_at">
-    '<tt>llvm.adjust.trampoline</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.adjust.trampoline(i8* &lt;tramp&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This performs any required machine-specific adjustment to the address of a
-   trampoline (passed as <tt>tramp</tt>).</p>
-
-<h5>Arguments:</h5>
-<p><tt>tramp</tt> must point to a block of memory which already has trampoline code
-   filled in by a previous call to <a href="#int_it"><tt>llvm.init.trampoline</tt>
-   </a>.</p>
-
-<h5>Semantics:</h5>
-<p>On some architectures the address of the code to be executed needs to be
-   different to the address where the trampoline is actually stored.  This
-   intrinsic returns the executable address corresponding to <tt>tramp</tt>
-   after performing the required machine specific adjustments.
-   The pointer returned can then be <a href="#int_trampoline"> bitcast and
-   executed</a>.
-</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_memorymarkers">Memory Use Markers</a>
-</h3>
-
-<div>
-
-<p>This class of intrinsics exists to information about the lifetime of memory
-   objects and ranges where variables are immutable.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.lifetime.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.lifetime.start</tt>' intrinsic specifies the start of a memory
-   object's lifetime.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized.  The second argument is a pointer to
-   the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that before this point in the code, the value of the
-   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
-   never be used and has an undefined value.  A load from the pointer that
-   precedes this intrinsic can be replaced with
-   <tt>'<a href="#undefvalues">undef</a>'</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.lifetime.end(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.lifetime.end</tt>' intrinsic specifies the end of a memory
-   object's lifetime.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized.  The second argument is a pointer to
-   the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that after this point in the code, the value of the
-   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
-   never be used and has an undefined value.  Any stores into the memory object
-   following this intrinsic may be removed as dead.
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare {}* @llvm.invariant.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.invariant.start</tt>' intrinsic specifies that the contents of
-   a memory object will not change.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized.  The second argument is a pointer to
-   the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that until an <tt>llvm.invariant.end</tt> that uses
-   the return value, the referenced memory location is constant and
-   unchanging.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.invariant.end({}* &lt;start&gt;, i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.invariant.end</tt>' intrinsic specifies that the contents of
-   a memory object are mutable.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the matching <tt>llvm.invariant.start</tt> intrinsic.
-   The second argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized and the third argument is a pointer
-   to the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that the memory is mutable again.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_general">General Intrinsics</a>
-</h3>
-
-<div>
-
-<p>This class of intrinsics is designed to be generic and has no specific
-   purpose.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_var_annotation">'<tt>llvm.var.annotation</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.var.annotation(i8* &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.var.annotation</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a pointer to a value, the second is a pointer to a
-   global string, the third is a pointer to a global string which is the source
-   file name, and the last argument is the line number.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic allows annotation of local variables with arbitrary strings.
-   This can be useful for special purpose optimizations that want to look for
-   these annotations.  These have no other defined use; they are ignored by code
-   generation and optimization.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_annotation">'<tt>llvm.annotation.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use '<tt>llvm.annotation</tt>' on
-   any integer bit width.</p>
-
-<pre>
-  declare i8 @llvm.annotation.i8(i8 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i16 @llvm.annotation.i16(i16 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i32 @llvm.annotation.i32(i32 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i64 @llvm.annotation.i64(i64 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i256 @llvm.annotation.i256(i256 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.annotation</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is an integer value (result of some expression), the
-   second is a pointer to a global string, the third is a pointer to a global
-   string which is the source file name, and the last argument is the line
-   number.  It returns the value of the first argument.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic allows annotations to be put on arbitrary expressions with
-   arbitrary strings.  This can be useful for special purpose optimizations that
-   want to look for these annotations.  These have no other defined use; they
-   are ignored by code generation and optimization.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_trap">'<tt>llvm.trap</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.trap() noreturn nounwind
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.trap</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>None.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic is lowered to the target dependent trap instruction. If the
-   target does not have a trap instruction, this intrinsic will be lowered to
-   a call of the <tt>abort()</tt> function.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_debugtrap">'<tt>llvm.debugtrap</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.debugtrap() nounwind
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.debugtrap</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>None.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic is lowered to code which is intended to cause an execution
-   trap with the intention of requesting the attention of a debugger.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_stackprotector">'<tt>llvm.stackprotector</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.stackprotector(i8* &lt;guard&gt;, i8** &lt;slot&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.stackprotector</tt> intrinsic takes the <tt>guard</tt> and
-   stores it onto the stack at <tt>slot</tt>. The stack slot is adjusted to
-   ensure that it is placed on the stack before local variables.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.stackprotector</tt> intrinsic requires two pointer
-   arguments. The first argument is the value loaded from the stack
-   guard <tt>@__stack_chk_guard</tt>. The second variable is an <tt>alloca</tt>
-   that has enough space to hold the value of the guard.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic causes the prologue/epilogue inserter to force the position of
-   the <tt>AllocaInst</tt> stack slot to be before local variables on the
-   stack. This is to ensure that if a local variable on the stack is
-   overwritten, it will destroy the value of the guard. When the function exits,
-   the guard on the stack is checked against the original guard. If they are
-   different, then the program aborts by calling the <tt>__stack_chk_fail()</tt>
-   function.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_objectsize">'<tt>llvm.objectsize</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i32 @llvm.objectsize.i32(i8* &lt;object&gt;, i1 &lt;min&gt;)
-  declare i64 @llvm.objectsize.i64(i8* &lt;object&gt;, i1 &lt;min&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information to
-   the optimizers to determine at compile time whether a) an operation (like
-   memcpy) will overflow a buffer that corresponds to an object, or b) that a
-   runtime check for overflow isn't necessary. An object in this context means
-   an allocation of a specific class, structure, array, or other object.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic takes two arguments. The first
-   argument is a pointer to or into the <tt>object</tt>. The second argument
-   is a boolean and determines whether <tt>llvm.objectsize</tt> returns 0 (if
-   true) or -1 (if false) when the object size is unknown.
-   The second argument only accepts constants.</p>
-   
-<h5>Semantics:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic is lowered to a constant representing
-   the size of the object concerned. If the size cannot be determined at compile
-   time, <tt>llvm.objectsize</tt> returns <tt>i32/i64 -1 or 0</tt>
-   (depending on the <tt>min</tt> argument).</p>
-
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_expect">'<tt>llvm.expect</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i32 @llvm.expect.i32(i32 &lt;val&gt;, i32 &lt;expected_val&gt;)
-  declare i64 @llvm.expect.i64(i64 &lt;val&gt;, i64 &lt;expected_val&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.expect</tt> intrinsic provides information about expected (the
-   most probable) value of <tt>val</tt>, which can be used by optimizers.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.expect</tt> intrinsic takes two arguments. The first
-   argument is a value. The second argument is an expected value, this needs to
-   be a constant value, variables are not allowed.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic is lowered to the <tt>val</tt>.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_donothing">'<tt>llvm.donothing</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.donothing() nounwind readnone
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.donothing</tt> intrinsic doesn't perform any operation. It's the
-only intrinsic that can be called with an invoke instruction.</p>
-
-<h5>Arguments:</h5>
-<p>None.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does nothing, and it's removed by optimizers and ignored by
-codegen.</p>
-</div>
-
-</div>
-
-</div>
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-29 15:12:44 +0100 (Mon, 29 Oct 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
new file mode 100644
index 000000000000..659f02afb961
--- /dev/null
+++ b/docs/LangRef.rst
@@ -0,0 +1,8605 @@
+==============================
+LLVM Language Reference Manual
+==============================
+
+.. contents::
+   :local:
+   :depth: 3
+
+Abstract
+========
+
+This document is a reference manual for the LLVM assembly language. LLVM
+is a Static Single Assignment (SSA) based representation that provides
+type safety, low-level operations, flexibility, and the capability of
+representing 'all' high-level languages cleanly. It is the common code
+representation used throughout all phases of the LLVM compilation
+strategy.
+
+Introduction
+============
+
+The LLVM code representation is designed to be used in three different
+forms: as an in-memory compiler IR, as an on-disk bitcode representation
+(suitable for fast loading by a Just-In-Time compiler), and as a human
+readable assembly language representation. This allows LLVM to provide a
+powerful intermediate representation for efficient compiler
+transformations and analysis, while providing a natural means to debug
+and visualize the transformations. The three different forms of LLVM are
+all equivalent. This document describes the human readable
+representation and notation.
+
+The LLVM representation aims to be light-weight and low-level while
+being expressive, typed, and extensible at the same time. It aims to be
+a "universal IR" of sorts, by being at a low enough level that
+high-level ideas may be cleanly mapped to it (similar to how
+microprocessors are "universal IR's", allowing many source languages to
+be mapped to them). By providing type information, LLVM can be used as
+the target of optimizations: for example, through pointer analysis, it
+can be proven that a C automatic variable is never accessed outside of
+the current function, allowing it to be promoted to a simple SSA value
+instead of a memory location.
+
+.. _wellformed:
+
+Well-Formedness
+---------------
+
+It is important to note that this document describes 'well formed' LLVM
+assembly language. There is a difference between what the parser accepts
+and what is considered 'well formed'. For example, the following
+instruction is syntactically okay, but not well formed:
+
+.. code-block:: llvm
+
+    %x = add i32 1, %x
+
+because the definition of ``%x`` does not dominate all of its uses. The
+LLVM infrastructure provides a verification pass that may be used to
+verify that an LLVM module is well formed. This pass is automatically
+run by the parser after parsing input assembly and by the optimizer
+before it outputs bitcode. The violations pointed out by the verifier
+pass indicate bugs in transformation passes or input to the parser.
+
+.. _identifiers:
+
+Identifiers
+===========
+
+LLVM identifiers come in two basic types: global and local. Global
+identifiers (functions, global variables) begin with the ``'@'``
+character. Local identifiers (register names, types) begin with the
+``'%'`` character. Additionally, there are three different formats for
+identifiers, for different purposes:
+
+#. Named values are represented as a string of characters with their
+   prefix. For example, ``%foo``, ``@DivisionByZero``,
+   ``%a.really.long.identifier``. The actual regular expression used is
+   '``[%@][a-zA-Z$._][a-zA-Z$._0-9]*``'. Identifiers which require other
+   characters in their names can be surrounded with quotes. Special
+   characters may be escaped using ``"\xx"`` where ``xx`` is the ASCII
+   code for the character in hexadecimal. In this way, any character can
+   be used in a name value, even quotes themselves.
+#. Unnamed values are represented as an unsigned numeric value with
+   their prefix. For example, ``%12``, ``@2``, ``%44``.
+#. Constants, which are described in the section  Constants_ below.
+
+LLVM requires that values start with a prefix for two reasons: Compilers
+don't need to worry about name clashes with reserved words, and the set
+of reserved words may be expanded in the future without penalty.
+Additionally, unnamed identifiers allow a compiler to quickly come up
+with a temporary variable without having to avoid symbol table
+conflicts.
+
+Reserved words in LLVM are very similar to reserved words in other
+languages. There are keywords for different opcodes ('``add``',
+'``bitcast``', '``ret``', etc...), for primitive type names ('``void``',
+'``i32``', etc...), and others. These reserved words cannot conflict
+with variable names, because none of them start with a prefix character
+(``'%'`` or ``'@'``).
+
+Here is an example of LLVM code to multiply the integer variable
+'``%X``' by 8:
+
+The easy way:
+
+.. code-block:: llvm
+
+    %result = mul i32 %X, 8
+
+After strength reduction:
+
+.. code-block:: llvm
+
+    %result = shl i32 %X, 3
+
+And the hard way:
+
+.. code-block:: llvm
+
+    %0 = add i32 %X, %X           ; yields {i32}:%0
+    %1 = add i32 %0, %0           ; yields {i32}:%1
+    %result = add i32 %1, %1
+
+This last way of multiplying ``%X`` by 8 illustrates several important
+lexical features of LLVM:
+
+#. Comments are delimited with a '``;``' and go until the end of line.
+#. Unnamed temporaries are created when the result of a computation is
+   not assigned to a named value.
+#. Unnamed temporaries are numbered sequentially
+
+It also shows a convention that we follow in this document. When
+demonstrating instructions, we will follow an instruction with a comment
+that defines the type and name of value produced.
+
+High Level Structure
+====================
+
+Module Structure
+----------------
+
+LLVM programs are composed of ``Module``'s, each of which is a
+translation unit of the input programs. Each module consists of
+functions, global variables, and symbol table entries. Modules may be
+combined together with the LLVM linker, which merges function (and
+global variable) definitions, resolves forward declarations, and merges
+symbol table entries. Here is an example of the "hello world" module:
+
+.. code-block:: llvm
+
+    ; Declare the string constant as a global constant.
+    @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00"
+
+    ; External declaration of the puts function
+    declare i32 @puts(i8* nocapture) nounwind
+
+    ; Definition of main function
+    define i32 @main() {   ; i32()*
+      ; Convert [13 x i8]* to i8  *...
+      %cast210 = getelementptr [13 x i8]* @.str, i64 0, i64 0
+
+      ; Call puts function to write out the string to stdout.
+      call i32 @puts(i8* %cast210)
+      ret i32 0
+    }
+
+    ; Named metadata
+    !1 = metadata !{i32 42}
+    !foo = !{!1, null}
+
+This example is made up of a :ref:`global variable <globalvars>` named
+"``.str``", an external declaration of the "``puts``" function, a
+:ref:`function definition <functionstructure>` for "``main``" and
+:ref:`named metadata <namedmetadatastructure>` "``foo``".
+
+In general, a module is made up of a list of global values (where both
+functions and global variables are global values). Global values are
+represented by a pointer to a memory location (in this case, a pointer
+to an array of char, and a pointer to a function), and have one of the
+following :ref:`linkage types <linkage>`.
+
+.. _linkage:
+
+Linkage Types
+-------------
+
+All Global Variables and Functions have one of the following types of
+linkage:
+
+``private``
+    Global values with "``private``" linkage are only directly
+    accessible by objects in the current module. In particular, linking
+    code into a module with an private global value may cause the
+    private to be renamed as necessary to avoid collisions. Because the
+    symbol is private to the module, all references can be updated. This
+    doesn't show up in any symbol table in the object file.
+``linker_private``
+    Similar to ``private``, but the symbol is passed through the
+    assembler and evaluated by the linker. Unlike normal strong symbols,
+    they are removed by the linker from the final linked image
+    (executable or dynamic library).
+``linker_private_weak``
+    Similar to "``linker_private``", but the symbol is weak. Note that
+    ``linker_private_weak`` symbols are subject to coalescing by the
+    linker. The symbols are removed by the linker from the final linked
+    image (executable or dynamic library).
+``internal``
+    Similar to private, but the value shows as a local symbol
+    (``STB_LOCAL`` in the case of ELF) in the object file. This
+    corresponds to the notion of the '``static``' keyword in C.
+``available_externally``
+    Globals with "``available_externally``" linkage are never emitted
+    into the object file corresponding to the LLVM module. They exist to
+    allow inlining and other optimizations to take place given knowledge
+    of the definition of the global, which is known to be somewhere
+    outside the module. Globals with ``available_externally`` linkage
+    are allowed to be discarded at will, and are otherwise the same as
+    ``linkonce_odr``. This linkage type is only allowed on definitions,
+    not declarations.
+``linkonce``
+    Globals with "``linkonce``" linkage are merged with other globals of
+    the same name when linkage occurs. This can be used to implement
+    some forms of inline functions, templates, or other code which must
+    be generated in each translation unit that uses it, but where the
+    body may be overridden with a more definitive definition later.
+    Unreferenced ``linkonce`` globals are allowed to be discarded. Note
+    that ``linkonce`` linkage does not actually allow the optimizer to
+    inline the body of this function into callers because it doesn't
+    know if this definition of the function is the definitive definition
+    within the program or whether it will be overridden by a stronger
+    definition. To enable inlining and other optimizations, use
+    "``linkonce_odr``" linkage.
+``weak``
+    "``weak``" linkage has the same merging semantics as ``linkonce``
+    linkage, except that unreferenced globals with ``weak`` linkage may
+    not be discarded. This is used for globals that are declared "weak"
+    in C source code.
+``common``
+    "``common``" linkage is most similar to "``weak``" linkage, but they
+    are used for tentative definitions in C, such as "``int X;``" at
+    global scope. Symbols with "``common``" linkage are merged in the
+    same way as ``weak symbols``, and they may not be deleted if
+    unreferenced. ``common`` symbols may not have an explicit section,
+    must have a zero initializer, and may not be marked
+    ':ref:`constant <globalvars>`'. Functions and aliases may not have
+    common linkage.
+
+.. _linkage_appending:
+
+``appending``
+    "``appending``" linkage may only be applied to global variables of
+    pointer to array type. When two global variables with appending
+    linkage are linked together, the two global arrays are appended
+    together. This is the LLVM, typesafe, equivalent of having the
+    system linker append together "sections" with identical names when
+    .o files are linked.
+``extern_weak``
+    The semantics of this linkage follow the ELF object file model: the
+    symbol is weak until linked, if not linked, the symbol becomes null
+    instead of being an undefined reference.
+``linkonce_odr``, ``weak_odr``
+    Some languages allow differing globals to be merged, such as two
+    functions with different semantics. Other languages, such as
+    ``C++``, ensure that only equivalent globals are ever merged (the
+    "one definition rule" --- "ODR").  Such languages can use the
+    ``linkonce_odr`` and ``weak_odr`` linkage types to indicate that the
+    global will only be merged with equivalent globals. These linkage
+    types are otherwise the same as their non-``odr`` versions.
+``linkonce_odr_auto_hide``
+    Similar to "``linkonce_odr``", but nothing in the translation unit
+    takes the address of this definition. For instance, functions that
+    had an inline definition, but the compiler decided not to inline it.
+    ``linkonce_odr_auto_hide`` may have only ``default`` visibility. The
+    symbols are removed by the linker from the final linked image
+    (executable or dynamic library).
+``external``
+    If none of the above identifiers are used, the global is externally
+    visible, meaning that it participates in linkage and can be used to
+    resolve external symbol references.
+
+The next two types of linkage are targeted for Microsoft Windows
+platform only. They are designed to support importing (exporting)
+symbols from (to) DLLs (Dynamic Link Libraries).
+
+``dllimport``
+    "``dllimport``" linkage causes the compiler to reference a function
+    or variable via a global pointer to a pointer that is set up by the
+    DLL exporting the symbol. On Microsoft Windows targets, the pointer
+    name is formed by combining ``__imp_`` and the function or variable
+    name.
+``dllexport``
+    "``dllexport``" linkage causes the compiler to provide a global
+    pointer to a pointer in a DLL, so that it can be referenced with the
+    ``dllimport`` attribute. On Microsoft Windows targets, the pointer
+    name is formed by combining ``__imp_`` and the function or variable
+    name.
+
+For example, since the "``.LC0``" variable is defined to be internal, if
+another module defined a "``.LC0``" variable and was linked with this
+one, one of the two would be renamed, preventing a collision. Since
+"``main``" and "``puts``" are external (i.e., lacking any linkage
+declarations), they are accessible outside of the current module.
+
+It is illegal for a function *declaration* to have any linkage type
+other than ``external``, ``dllimport`` or ``extern_weak``.
+
+Aliases can have only ``external``, ``internal``, ``weak`` or
+``weak_odr`` linkages.
+
+.. _callingconv:
+
+Calling Conventions
+-------------------
+
+LLVM :ref:`functions <functionstructure>`, :ref:`calls <i_call>` and
+:ref:`invokes <i_invoke>` can all have an optional calling convention
+specified for the call. The calling convention of any pair of dynamic
+caller/callee must match, or the behavior of the program is undefined.
+The following calling conventions are supported by LLVM, and more may be
+added in the future:
+
+"``ccc``" - The C calling convention
+    This calling convention (the default if no other calling convention
+    is specified) matches the target C calling conventions. This calling
+    convention supports varargs function calls and tolerates some
+    mismatch in the declared prototype and implemented declaration of
+    the function (as does normal C).
+"``fastcc``" - The fast calling convention
+    This calling convention attempts to make calls as fast as possible
+    (e.g. by passing things in registers). This calling convention
+    allows the target to use whatever tricks it wants to produce fast
+    code for the target, without having to conform to an externally
+    specified ABI (Application Binary Interface). `Tail calls can only
+    be optimized when this, the GHC or the HiPE convention is
+    used. <CodeGenerator.html#id80>`_ This calling convention does not
+    support varargs and requires the prototype of all callees to exactly
+    match the prototype of the function definition.
+"``coldcc``" - The cold calling convention
+    This calling convention attempts to make code in the caller as
+    efficient as possible under the assumption that the call is not
+    commonly executed. As such, these calls often preserve all registers
+    so that the call does not break any live ranges in the caller side.
+    This calling convention does not support varargs and requires the
+    prototype of all callees to exactly match the prototype of the
+    function definition.
+"``cc 10``" - GHC convention
+    This calling convention has been implemented specifically for use by
+    the `Glasgow Haskell Compiler (GHC) <http://www.haskell.org/ghc>`_.
+    It passes everything in registers, going to extremes to achieve this
+    by disabling callee save registers. This calling convention should
+    not be used lightly but only for specific situations such as an
+    alternative to the *register pinning* performance technique often
+    used when implementing functional programming languages. At the
+    moment only X86 supports this convention and it has the following
+    limitations:
+
+    -  On *X86-32* only supports up to 4 bit type parameters. No
+       floating point types are supported.
+    -  On *X86-64* only supports up to 10 bit type parameters and 6
+       floating point parameters.
+
+    This calling convention supports `tail call
+    optimization <CodeGenerator.html#id80>`_ but requires both the
+    caller and callee are using it.
+"``cc 11``" - The HiPE calling convention
+    This calling convention has been implemented specifically for use by
+    the `High-Performance Erlang
+    (HiPE) <http://www.it.uu.se/research/group/hipe/>`_ compiler, *the*
+    native code compiler of the `Ericsson's Open Source Erlang/OTP
+    system <http://www.erlang.org/download.shtml>`_. It uses more
+    registers for argument passing than the ordinary C calling
+    convention and defines no callee-saved registers. The calling
+    convention properly supports `tail call
+    optimization <CodeGenerator.html#id80>`_ but requires that both the
+    caller and the callee use it. It uses a *register pinning*
+    mechanism, similar to GHC's convention, for keeping frequently
+    accessed runtime components pinned to specific hardware registers.
+    At the moment only X86 supports this convention (both 32 and 64
+    bit).
+"``cc <n>``" - Numbered convention
+    Any calling convention may be specified by number, allowing
+    target-specific calling conventions to be used. Target specific
+    calling conventions start at 64.
+
+More calling conventions can be added/defined on an as-needed basis, to
+support Pascal conventions or any other well-known target-independent
+convention.
+
+Visibility Styles
+-----------------
+
+All Global Variables and Functions have one of the following visibility
+styles:
+
+"``default``" - Default style
+    On targets that use the ELF object file format, default visibility
+    means that the declaration is visible to other modules and, in
+    shared libraries, means that the declared entity may be overridden.
+    On Darwin, default visibility means that the declaration is visible
+    to other modules. Default visibility corresponds to "external
+    linkage" in the language.
+"``hidden``" - Hidden style
+    Two declarations of an object with hidden visibility refer to the
+    same object if they are in the same shared object. Usually, hidden
+    visibility indicates that the symbol will not be placed into the
+    dynamic symbol table, so no other module (executable or shared
+    library) can reference it directly.
+"``protected``" - Protected style
+    On ELF, protected visibility indicates that the symbol will be
+    placed in the dynamic symbol table, but that references within the
+    defining module will bind to the local symbol. That is, the symbol
+    cannot be overridden by another module.
+
+Named Types
+-----------
+
+LLVM IR allows you to specify name aliases for certain types. This can
+make it easier to read the IR and make the IR more condensed
+(particularly when recursive types are involved). An example of a name
+specification is:
+
+.. code-block:: llvm
+
+    %mytype = type { %mytype*, i32 }
+
+You may give a name to any :ref:`type <typesystem>` except
+":ref:`void <t_void>`". Type name aliases may be used anywhere a type is
+expected with the syntax "%mytype".
+
+Note that type names are aliases for the structural type that they
+indicate, and that you can therefore specify multiple names for the same
+type. This often leads to confusing behavior when dumping out a .ll
+file. Since LLVM IR uses structural typing, the name is not part of the
+type. When printing out LLVM IR, the printer will pick *one name* to
+render all types of a particular shape. This means that if you have code
+where two different source types end up having the same LLVM type, that
+the dumper will sometimes print the "wrong" or unexpected type. This is
+an important design point and isn't going to change.
+
+.. _globalvars:
+
+Global Variables
+----------------
+
+Global variables define regions of memory allocated at compilation time
+instead of run-time. Global variables may optionally be initialized, may
+have an explicit section to be placed in, and may have an optional
+explicit alignment specified.
+
+A variable may be defined as ``thread_local``, which means that it will
+not be shared by threads (each thread will have a separated copy of the
+variable). Not all targets support thread-local variables. Optionally, a
+TLS model may be specified:
+
+``localdynamic``
+    For variables that are only used within the current shared library.
+``initialexec``
+    For variables in modules that will not be loaded dynamically.
+``localexec``
+    For variables defined in the executable and only used within it.
+
+The models correspond to the ELF TLS models; see `ELF Handling For
+Thread-Local Storage <http://people.redhat.com/drepper/tls.pdf>`_ for
+more information on under which circumstances the different models may
+be used. The target may choose a different TLS model if the specified
+model is not supported, or if a better choice of model can be made.
+
+A variable may be defined as a global ``constant``, which indicates that
+the contents of the variable will **never** be modified (enabling better
+optimization, allowing the global data to be placed in the read-only
+section of an executable, etc). Note that variables that need runtime
+initialization cannot be marked ``constant`` as there is a store to the
+variable.
+
+LLVM explicitly allows *declarations* of global variables to be marked
+constant, even if the final definition of the global is not. This
+capability can be used to enable slightly better optimization of the
+program, but requires the language definition to guarantee that
+optimizations based on the 'constantness' are valid for the translation
+units that do not include the definition.
+
+As SSA values, global variables define pointer values that are in scope
+(i.e. they dominate) all basic blocks in the program. Global variables
+always define a pointer to their "content" type because they describe a
+region of memory, and all memory objects in LLVM are accessed through
+pointers.
+
+Global variables can be marked with ``unnamed_addr`` which indicates
+that the address is not significant, only the content. Constants marked
+like this can be merged with other constants if they have the same
+initializer. Note that a constant with significant address *can* be
+merged with a ``unnamed_addr`` constant, the result being a constant
+whose address is significant.
+
+A global variable may be declared to reside in a target-specific
+numbered address space. For targets that support them, address spaces
+may affect how optimizations are performed and/or what target
+instructions are used to access the variable. The default address space
+is zero. The address space qualifier must precede any other attributes.
+
+LLVM allows an explicit section to be specified for globals. If the
+target supports it, it will emit globals to the section specified.
+
+By default, global initializers are optimized by assuming that global
+variables defined within the module are not modified from their
+initial values before the start of the global initializer.  This is
+true even for variables potentially accessible from outside the
+module, including those with external linkage or appearing in
+``@llvm.used``. This assumption may be suppressed by marking the
+variable with ``externally_initialized``.
+
+An explicit alignment may be specified for a global, which must be a
+power of 2. If not present, or if the alignment is set to zero, the
+alignment of the global is set by the target to whatever it feels
+convenient. If an explicit alignment is specified, the global is forced
+to have exactly that alignment. Targets and optimizers are not allowed
+to over-align the global if the global has an assigned section. In this
+case, the extra alignment could be observable: for example, code could
+assume that the globals are densely packed in their section and try to
+iterate over them as an array, alignment padding would break this
+iteration.
+
+For example, the following defines a global in a numbered address space
+with an initializer, section, and alignment:
+
+.. code-block:: llvm
+
+    @G = addrspace(5) constant float 1.0, section "foo", align 4
+
+The following example defines a thread-local global with the
+``initialexec`` TLS model:
+
+.. code-block:: llvm
+
+    @G = thread_local(initialexec) global i32 0, align 4
+
+.. _functionstructure:
+
+Functions
+---------
+
+LLVM function definitions consist of the "``define``" keyword, an
+optional :ref:`linkage type <linkage>`, an optional :ref:`visibility
+style <visibility>`, an optional :ref:`calling convention <callingconv>`,
+an optional ``unnamed_addr`` attribute, a return type, an optional
+:ref:`parameter attribute <paramattrs>` for the return type, a function
+name, a (possibly empty) argument list (each with optional :ref:`parameter
+attributes <paramattrs>`), optional :ref:`function attributes <fnattrs>`,
+an optional section, an optional alignment, an optional :ref:`garbage
+collector name <gc>`, an opening curly brace, a list of basic blocks,
+and a closing curly brace.
+
+LLVM function declarations consist of the "``declare``" keyword, an
+optional :ref:`linkage type <linkage>`, an optional :ref:`visibility
+style <visibility>`, an optional :ref:`calling convention <callingconv>`,
+an optional ``unnamed_addr`` attribute, a return type, an optional
+:ref:`parameter attribute <paramattrs>` for the return type, a function
+name, a possibly empty list of arguments, an optional alignment, and an
+optional :ref:`garbage collector name <gc>`.
+
+A function definition contains a list of basic blocks, forming the CFG
+(Control Flow Graph) for the function. Each basic block may optionally
+start with a label (giving the basic block a symbol table entry),
+contains a list of instructions, and ends with a
+:ref:`terminator <terminators>` instruction (such as a branch or function
+return).
+
+The first basic block in a function is special in two ways: it is
+immediately executed on entrance to the function, and it is not allowed
+to have predecessor basic blocks (i.e. there can not be any branches to
+the entry block of a function). Because the block can have no
+predecessors, it also cannot have any :ref:`PHI nodes <i_phi>`.
+
+LLVM allows an explicit section to be specified for functions. If the
+target supports it, it will emit functions to the section specified.
+
+An explicit alignment may be specified for a function. If not present,
+or if the alignment is set to zero, the alignment of the function is set
+by the target to whatever it feels convenient. If an explicit alignment
+is specified, the function is forced to have at least that much
+alignment. All alignments must be a power of 2.
+
+If the ``unnamed_addr`` attribute is given, the address is know to not
+be significant and two identical functions can be merged.
+
+Syntax::
+
+    define [linkage] [visibility]
+           [cconv] [ret attrs]
+           <ResultType> @<FunctionName> ([argument list])
+           [fn Attrs] [section "name"] [align N]
+           [gc] { ... }
+
+Aliases
+-------
+
+Aliases act as "second name" for the aliasee value (which can be either
+function, global variable, another alias or bitcast of global value).
+Aliases may have an optional :ref:`linkage type <linkage>`, and an optional
+:ref:`visibility style <visibility>`.
+
+Syntax::
+
+    @<Name> = alias [Linkage] [Visibility] <AliaseeTy> @<Aliasee>
+
+.. _namedmetadatastructure:
+
+Named Metadata
+--------------
+
+Named metadata is a collection of metadata. :ref:`Metadata
+nodes <metadata>` (but not metadata strings) are the only valid
+operands for a named metadata.
+
+Syntax::
+
+    ; Some unnamed metadata nodes, which are referenced by the named metadata.
+    !0 = metadata !{metadata !"zero"}
+    !1 = metadata !{metadata !"one"}
+    !2 = metadata !{metadata !"two"}
+    ; A named metadata.
+    !name = !{!0, !1, !2}
+
+.. _paramattrs:
+
+Parameter Attributes
+--------------------
+
+The return type and each parameter of a function type may have a set of
+*parameter attributes* associated with them. Parameter attributes are
+used to communicate additional information about the result or
+parameters of a function. Parameter attributes are considered to be part
+of the function, not of the function type, so functions with different
+parameter attributes can have the same function type.
+
+Parameter attributes are simple keywords that follow the type specified.
+If multiple parameter attributes are needed, they are space separated.
+For example:
+
+.. code-block:: llvm
+
+    declare i32 @printf(i8* noalias nocapture, ...)
+    declare i32 @atoi(i8 zeroext)
+    declare signext i8 @returns_signed_char()
+
+Note that any attributes for the function result (``nounwind``,
+``readonly``) come immediately after the argument list.
+
+Currently, only the following parameter attributes are defined:
+
+``zeroext``
+    This indicates to the code generator that the parameter or return
+    value should be zero-extended to the extent required by the target's
+    ABI (which is usually 32-bits, but is 8-bits for a i1 on x86-64) by
+    the caller (for a parameter) or the callee (for a return value).
+``signext``
+    This indicates to the code generator that the parameter or return
+    value should be sign-extended to the extent required by the target's
+    ABI (which is usually 32-bits) by the caller (for a parameter) or
+    the callee (for a return value).
+``inreg``
+    This indicates that this parameter or return value should be treated
+    in a special target-dependent fashion during while emitting code for
+    a function call or return (usually, by putting it in a register as
+    opposed to memory, though some targets use it to distinguish between
+    two different kinds of registers). Use of this attribute is
+    target-specific.
+``byval``
+    This indicates that the pointer parameter should really be passed by
+    value to the function. The attribute implies that a hidden copy of
+    the pointee is made between the caller and the callee, so the callee
+    is unable to modify the value in the caller. This attribute is only
+    valid on LLVM pointer arguments. It is generally used to pass
+    structs and arrays by value, but is also valid on pointers to
+    scalars. The copy is considered to belong to the caller not the
+    callee (for example, ``readonly`` functions should not write to
+    ``byval`` parameters). This is not a valid attribute for return
+    values.
+
+    The byval attribute also supports specifying an alignment with the
+    align attribute. It indicates the alignment of the stack slot to
+    form and the known alignment of the pointer specified to the call
+    site. If the alignment is not specified, then the code generator
+    makes a target-specific assumption.
+
+``sret``
+    This indicates that the pointer parameter specifies the address of a
+    structure that is the return value of the function in the source
+    program. This pointer must be guaranteed by the caller to be valid:
+    loads and stores to the structure may be assumed by the callee
+    not to trap and to be properly aligned. This may only be applied to
+    the first parameter. This is not a valid attribute for return
+    values.
+``noalias``
+    This indicates that pointer values `*based* <pointeraliasing>` on
+    the argument or return value do not alias pointer values which are
+    not *based* on it, ignoring certain "irrelevant" dependencies. For a
+    call to the parent function, dependencies between memory references
+    from before or after the call and from those during the call are
+    "irrelevant" to the ``noalias`` keyword for the arguments and return
+    value used in that call. The caller shares the responsibility with
+    the callee for ensuring that these requirements are met. For further
+    details, please see the discussion of the NoAlias response in `alias
+    analysis <AliasAnalysis.html#MustMayNo>`_.
+
+    Note that this definition of ``noalias`` is intentionally similar
+    to the definition of ``restrict`` in C99 for function arguments,
+    though it is slightly weaker.
+
+    For function return values, C99's ``restrict`` is not meaningful,
+    while LLVM's ``noalias`` is.
+``nocapture``
+    This indicates that the callee does not make any copies of the
+    pointer that outlive the callee itself. This is not a valid
+    attribute for return values.
+
+.. _nest:
+
+``nest``
+    This indicates that the pointer parameter can be excised using the
+    :ref:`trampoline intrinsics <int_trampoline>`. This is not a valid
+    attribute for return values.
+``nobuiltin``
+    This indicates that the callee function at a call site is not
+    recognized as a built-in function. LLVM will retain the original call
+    and not replace it with equivalent code based on the semantics of the
+    built-in function.
+
+.. _gc:
+
+Garbage Collector Names
+-----------------------
+
+Each function may specify a garbage collector name, which is simply a
+string:
+
+.. code-block:: llvm
+
+    define void @f() gc "name" { ... }
+
+The compiler declares the supported values of *name*. Specifying a
+collector which will cause the compiler to alter its output in order to
+support the named garbage collection algorithm.
+
+.. _attrgrp:
+
+Attribute Groups
+----------------
+
+Attribute groups are groups of attributes that are referenced by objects within
+the IR. They are important for keeping ``.ll`` files readable, because a lot of
+functions will use the same set of attributes. In the degenerative case of a
+``.ll`` file that corresponds to a single ``.c`` file, the single attribute
+group will capture the important command line flags used to build that file.
+
+An attribute group is a module-level object. To use an attribute group, an
+object references the attribute group's ID (e.g. ``#37``). An object may refer
+to more than one attribute group. In that situation, the attributes from the
+different groups are merged.
+
+Here is an example of attribute groups for a function that should always be
+inlined, has a stack alignment of 4, and which shouldn't use SSE instructions:
+
+.. code-block:: llvm
+
+   ; Target-independent attributes:
+   #0 = attributes { alwaysinline alignstack=4 }
+
+   ; Target-dependent attributes:
+   #1 = attributes { "no-sse" }
+
+   ; Function @f has attributes: alwaysinline, alignstack=4, and "no-sse".
+   define void @f() #0 #1 { ... }
+
+.. _fnattrs:
+
+Function Attributes
+-------------------
+
+Function attributes are set to communicate additional information about
+a function. Function attributes are considered to be part of the
+function, not of the function type, so functions with different function
+attributes can have the same function type.
+
+Function attributes are simple keywords that follow the type specified.
+If multiple attributes are needed, they are space separated. For
+example:
+
+.. code-block:: llvm
+
+    define void @f() noinline { ... }
+    define void @f() alwaysinline { ... }
+    define void @f() alwaysinline optsize { ... }
+    define void @f() optsize { ... }
+
+``alignstack(<n>)``
+    This attribute indicates that, when emitting the prologue and
+    epilogue, the backend should forcibly align the stack pointer.
+    Specify the desired alignment, which must be a power of two, in
+    parentheses.
+``alwaysinline``
+    This attribute indicates that the inliner should attempt to inline
+    this function into callers whenever possible, ignoring any active
+    inlining size threshold for this caller.
+``nonlazybind``
+    This attribute suppresses lazy symbol binding for the function. This
+    may make calls to the function faster, at the cost of extra program
+    startup time if the function is not called during program startup.
+``inlinehint``
+    This attribute indicates that the source code contained a hint that
+    inlining this function is desirable (such as the "inline" keyword in
+    C/C++). It is just a hint; it imposes no requirements on the
+    inliner.
+``naked``
+    This attribute disables prologue / epilogue emission for the
+    function. This can have very system-specific consequences.
+``noduplicate``
+    This attribute indicates that calls to the function cannot be
+    duplicated. A call to a ``noduplicate`` function may be moved
+    within its parent function, but may not be duplicated within
+    its parent function.
+
+    A function containing a ``noduplicate`` call may still
+    be an inlining candidate, provided that the call is not
+    duplicated by inlining. That implies that the function has
+    internal linkage and only has one call site, so the original
+    call is dead after inlining.
+``noimplicitfloat``
+    This attributes disables implicit floating point instructions.
+``noinline``
+    This attribute indicates that the inliner should never inline this
+    function in any situation. This attribute may not be used together
+    with the ``alwaysinline`` attribute.
+``noredzone``
+    This attribute indicates that the code generator should not use a
+    red zone, even if the target-specific ABI normally permits it.
+``noreturn``
+    This function attribute indicates that the function never returns
+    normally. This produces undefined behavior at runtime if the
+    function ever does dynamically return.
+``nounwind``
+    This function attribute indicates that the function never returns
+    with an unwind or exceptional control flow. If the function does
+    unwind, its runtime behavior is undefined.
+``optsize``
+    This attribute suggests that optimization passes and code generator
+    passes make choices that keep the code size of this function low,
+    and otherwise do optimizations specifically to reduce code size.
+``readnone``
+    This attribute indicates that the function computes its result (or
+    decides to unwind an exception) based strictly on its arguments,
+    without dereferencing any pointer arguments or otherwise accessing
+    any mutable state (e.g. memory, control registers, etc) visible to
+    caller functions. It does not write through any pointer arguments
+    (including ``byval`` arguments) and never changes any state visible
+    to callers. This means that it cannot unwind exceptions by calling
+    the ``C++`` exception throwing methods.
+``readonly``
+    This attribute indicates that the function does not write through
+    any pointer arguments (including ``byval`` arguments) or otherwise
+    modify any state (e.g. memory, control registers, etc) visible to
+    caller functions. It may dereference pointer arguments and read
+    state that may be set in the caller. A readonly function always
+    returns the same value (or unwinds an exception identically) when
+    called with the same set of arguments and global state. It cannot
+    unwind an exception by calling the ``C++`` exception throwing
+    methods.
+``returns_twice``
+    This attribute indicates that this function can return twice. The C
+    ``setjmp`` is an example of such a function. The compiler disables
+    some optimizations (like tail calls) in the caller of these
+    functions.
+``sanitize_address``
+    This attribute indicates that AddressSanitizer checks
+    (dynamic address safety analysis) are enabled for this function.
+``sanitize_memory``
+    This attribute indicates that MemorySanitizer checks (dynamic detection
+    of accesses to uninitialized memory) are enabled for this function.
+``sanitize_thread``
+    This attribute indicates that ThreadSanitizer checks
+    (dynamic thread safety analysis) are enabled for this function.
+``ssp``
+    This attribute indicates that the function should emit a stack
+    smashing protector. It is in the form of a "canary" --- a random value
+    placed on the stack before the local variables that's checked upon
+    return from the function to see if it has been overwritten. A
+    heuristic is used to determine if a function needs stack protectors
+    or not. The heuristic used will enable protectors for functions with:
+
+    - Character arrays larger than ``ssp-buffer-size`` (default 8).
+    - Aggregates containing character arrays larger than ``ssp-buffer-size``.
+    - Calls to alloca() with variable sizes or constant sizes greater than
+      ``ssp-buffer-size``.
+
+    If a function that has an ``ssp`` attribute is inlined into a
+    function that doesn't have an ``ssp`` attribute, then the resulting
+    function will have an ``ssp`` attribute.
+``sspreq``
+    This attribute indicates that the function should *always* emit a
+    stack smashing protector. This overrides the ``ssp`` function
+    attribute.
+
+    If a function that has an ``sspreq`` attribute is inlined into a
+    function that doesn't have an ``sspreq`` attribute or which has an
+    ``ssp`` or ``sspstrong`` attribute, then the resulting function will have
+    an ``sspreq`` attribute.
+``sspstrong``
+    This attribute indicates that the function should emit a stack smashing
+    protector. This attribute causes a strong heuristic to be used when
+    determining if a function needs stack protectors.  The strong heuristic
+    will enable protectors for functions with:
+
+    - Arrays of any size and type
+    - Aggregates containing an array of any size and type.
+    - Calls to alloca().
+    - Local variables that have had their address taken.
+
+    This overrides the ``ssp`` function attribute.
+
+    If a function that has an ``sspstrong`` attribute is inlined into a
+    function that doesn't have an ``sspstrong`` attribute, then the
+    resulting function will have an ``sspstrong`` attribute.
+``uwtable``
+    This attribute indicates that the ABI being targeted requires that
+    an unwind table entry be produce for this function even if we can
+    show that no exceptions passes by it. This is normally the case for
+    the ELF x86-64 abi, but it can be disabled for some compilation
+    units.
+
+.. _moduleasm:
+
+Module-Level Inline Assembly
+----------------------------
+
+Modules may contain "module-level inline asm" blocks, which corresponds
+to the GCC "file scope inline asm" blocks. These blocks are internally
+concatenated by LLVM and treated as a single unit, but may be separated
+in the ``.ll`` file if desired. The syntax is very simple:
+
+.. code-block:: llvm
+
+    module asm "inline asm code goes here"
+    module asm "more can go here"
+
+The strings can contain any character by escaping non-printable
+characters. The escape sequence used is simply "\\xx" where "xx" is the
+two digit hex code for the number.
+
+The inline asm code is simply printed to the machine code .s file when
+assembly code is generated.
+
+Data Layout
+-----------
+
+A module may specify a target specific data layout string that specifies
+how data is to be laid out in memory. The syntax for the data layout is
+simply:
+
+.. code-block:: llvm
+
+    target datalayout = "layout specification"
+
+The *layout specification* consists of a list of specifications
+separated by the minus sign character ('-'). Each specification starts
+with a letter and may include other information after the letter to
+define some aspect of the data layout. The specifications accepted are
+as follows:
+
+``E``
+    Specifies that the target lays out data in big-endian form. That is,
+    the bits with the most significance have the lowest address
+    location.
+``e``
+    Specifies that the target lays out data in little-endian form. That
+    is, the bits with the least significance have the lowest address
+    location.
+``S<size>``
+    Specifies the natural alignment of the stack in bits. Alignment
+    promotion of stack variables is limited to the natural stack
+    alignment to avoid dynamic stack realignment. The stack alignment
+    must be a multiple of 8-bits. If omitted, the natural stack
+    alignment defaults to "unspecified", which does not prevent any
+    alignment promotions.
+``p[n]:<size>:<abi>:<pref>``
+    This specifies the *size* of a pointer and its ``<abi>`` and
+    ``<pref>``\erred alignments for address space ``n``. All sizes are in
+    bits. Specifying the ``<pref>`` alignment is optional. If omitted, the
+    preceding ``:`` should be omitted too. The address space, ``n`` is
+    optional, and if not specified, denotes the default address space 0.
+    The value of ``n`` must be in the range [1,2^23).
+``i<size>:<abi>:<pref>``
+    This specifies the alignment for an integer type of a given bit
+    ``<size>``. The value of ``<size>`` must be in the range [1,2^23).
+``v<size>:<abi>:<pref>``
+    This specifies the alignment for a vector type of a given bit
+    ``<size>``.
+``f<size>:<abi>:<pref>``
+    This specifies the alignment for a floating point type of a given bit
+    ``<size>``. Only values of ``<size>`` that are supported by the target
+    will work. 32 (float) and 64 (double) are supported on all targets; 80
+    or 128 (different flavors of long double) are also supported on some
+    targets.
+``a<size>:<abi>:<pref>``
+    This specifies the alignment for an aggregate type of a given bit
+    ``<size>``.
+``s<size>:<abi>:<pref>``
+    This specifies the alignment for a stack object of a given bit
+    ``<size>``.
+``n<size1>:<size2>:<size3>...``
+    This specifies a set of native integer widths for the target CPU in
+    bits. For example, it might contain ``n32`` for 32-bit PowerPC,
+    ``n32:64`` for PowerPC 64, or ``n8:16:32:64`` for X86-64. Elements of
+    this set are considered to support most general arithmetic operations
+    efficiently.
+
+When constructing the data layout for a given target, LLVM starts with a
+default set of specifications which are then (possibly) overridden by
+the specifications in the ``datalayout`` keyword. The default
+specifications are given in this list:
+
+-  ``E`` - big endian
+-  ``p:64:64:64`` - 64-bit pointers with 64-bit alignment
+-  ``S0`` - natural stack alignment is unspecified
+-  ``i1:8:8`` - i1 is 8-bit (byte) aligned
+-  ``i8:8:8`` - i8 is 8-bit (byte) aligned
+-  ``i16:16:16`` - i16 is 16-bit aligned
+-  ``i32:32:32`` - i32 is 32-bit aligned
+-  ``i64:32:64`` - i64 has ABI alignment of 32-bits but preferred
+   alignment of 64-bits
+-  ``f16:16:16`` - half is 16-bit aligned
+-  ``f32:32:32`` - float is 32-bit aligned
+-  ``f64:64:64`` - double is 64-bit aligned
+-  ``f128:128:128`` - quad is 128-bit aligned
+-  ``v64:64:64`` - 64-bit vector is 64-bit aligned
+-  ``v128:128:128`` - 128-bit vector is 128-bit aligned
+-  ``a0:0:64`` - aggregates are 64-bit aligned
+
+When LLVM is determining the alignment for a given type, it uses the
+following rules:
+
+#. If the type sought is an exact match for one of the specifications,
+   that specification is used.
+#. If no match is found, and the type sought is an integer type, then
+   the smallest integer type that is larger than the bitwidth of the
+   sought type is used. If none of the specifications are larger than
+   the bitwidth then the largest integer type is used. For example,
+   given the default specifications above, the i7 type will use the
+   alignment of i8 (next largest) while both i65 and i256 will use the
+   alignment of i64 (largest specified).
+#. If no match is found, and the type sought is a vector type, then the
+   largest vector type that is smaller than the sought vector type will
+   be used as a fall back. This happens because <128 x double> can be
+   implemented in terms of 64 <2 x double>, for example.
+
+The function of the data layout string may not be what you expect.
+Notably, this is not a specification from the frontend of what alignment
+the code generator should use.
+
+Instead, if specified, the target data layout is required to match what
+the ultimate *code generator* expects. This string is used by the
+mid-level optimizers to improve code, and this only works if it matches
+what the ultimate code generator uses. If you would like to generate IR
+that does not embed this target-specific detail into the IR, then you
+don't have to specify the string. This will disable some optimizations
+that require precise layout information, but this also prevents those
+optimizations from introducing target specificity into the IR.
+
+.. _pointeraliasing:
+
+Pointer Aliasing Rules
+----------------------
+
+Any memory access must be done through a pointer value associated with
+an address range of the memory access, otherwise the behavior is
+undefined. Pointer values are associated with address ranges according
+to the following rules:
+
+-  A pointer value is associated with the addresses associated with any
+   value it is *based* on.
+-  An address of a global variable is associated with the address range
+   of the variable's storage.
+-  The result value of an allocation instruction is associated with the
+   address range of the allocated storage.
+-  A null pointer in the default address-space is associated with no
+   address.
+-  An integer constant other than zero or a pointer value returned from
+   a function not defined within LLVM may be associated with address
+   ranges allocated through mechanisms other than those provided by
+   LLVM. Such ranges shall not overlap with any ranges of addresses
+   allocated by mechanisms provided by LLVM.
+
+A pointer value is *based* on another pointer value according to the
+following rules:
+
+-  A pointer value formed from a ``getelementptr`` operation is *based*
+   on the first operand of the ``getelementptr``.
+-  The result value of a ``bitcast`` is *based* on the operand of the
+   ``bitcast``.
+-  A pointer value formed by an ``inttoptr`` is *based* on all pointer
+   values that contribute (directly or indirectly) to the computation of
+   the pointer's value.
+-  The "*based* on" relationship is transitive.
+
+Note that this definition of *"based"* is intentionally similar to the
+definition of *"based"* in C99, though it is slightly weaker.
+
+LLVM IR does not associate types with memory. The result type of a
+``load`` merely indicates the size and alignment of the memory from
+which to load, as well as the interpretation of the value. The first
+operand type of a ``store`` similarly only indicates the size and
+alignment of the store.
+
+Consequently, type-based alias analysis, aka TBAA, aka
+``-fstrict-aliasing``, is not applicable to general unadorned LLVM IR.
+:ref:`Metadata <metadata>` may be used to encode additional information
+which specialized optimization passes may use to implement type-based
+alias analysis.
+
+.. _volatile:
+
+Volatile Memory Accesses
+------------------------
+
+Certain memory accesses, such as :ref:`load <i_load>`'s,
+:ref:`store <i_store>`'s, and :ref:`llvm.memcpy <int_memcpy>`'s may be
+marked ``volatile``. The optimizers must not change the number of
+volatile operations or change their order of execution relative to other
+volatile operations. The optimizers *may* change the order of volatile
+operations relative to non-volatile operations. This is not Java's
+"volatile" and has no cross-thread synchronization behavior.
+
+IR-level volatile loads and stores cannot safely be optimized into
+llvm.memcpy or llvm.memmove intrinsics even when those intrinsics are
+flagged volatile. Likewise, the backend should never split or merge
+target-legal volatile load/store instructions.
+
+.. admonition:: Rationale
+
+ Platforms may rely on volatile loads and stores of natively supported
+ data width to be executed as single instruction. For example, in C
+ this holds for an l-value of volatile primitive type with native
+ hardware support, but not necessarily for aggregate types. The
+ frontend upholds these expectations, which are intentionally
+ unspecified in the IR. The rules above ensure that IR transformation
+ do not violate the frontend's contract with the language.
+
+.. _memmodel:
+
+Memory Model for Concurrent Operations
+--------------------------------------
+
+The LLVM IR does not define any way to start parallel threads of
+execution or to register signal handlers. Nonetheless, there are
+platform-specific ways to create them, and we define LLVM IR's behavior
+in their presence. This model is inspired by the C++0x memory model.
+
+For a more informal introduction to this model, see the :doc:`Atomics`.
+
+We define a *happens-before* partial order as the least partial order
+that
+
+-  Is a superset of single-thread program order, and
+-  When a *synchronizes-with* ``b``, includes an edge from ``a`` to
+   ``b``. *Synchronizes-with* pairs are introduced by platform-specific
+   techniques, like pthread locks, thread creation, thread joining,
+   etc., and by atomic instructions. (See also :ref:`Atomic Memory Ordering
+   Constraints <ordering>`).
+
+Note that program order does not introduce *happens-before* edges
+between a thread and signals executing inside that thread.
+
+Every (defined) read operation (load instructions, memcpy, atomic
+loads/read-modify-writes, etc.) R reads a series of bytes written by
+(defined) write operations (store instructions, atomic
+stores/read-modify-writes, memcpy, etc.). For the purposes of this
+section, initialized globals are considered to have a write of the
+initializer which is atomic and happens before any other read or write
+of the memory in question. For each byte of a read R, R\ :sub:`byte`
+may see any write to the same byte, except:
+
+-  If write\ :sub:`1`  happens before write\ :sub:`2`, and
+   write\ :sub:`2` happens before R\ :sub:`byte`, then
+   R\ :sub:`byte` does not see write\ :sub:`1`.
+-  If R\ :sub:`byte` happens before write\ :sub:`3`, then
+   R\ :sub:`byte` does not see write\ :sub:`3`.
+
+Given that definition, R\ :sub:`byte` is defined as follows:
+
+-  If R is volatile, the result is target-dependent. (Volatile is
+   supposed to give guarantees which can support ``sig_atomic_t`` in
+   C/C++, and may be used for accesses to addresses which do not behave
+   like normal memory. It does not generally provide cross-thread
+   synchronization.)
+-  Otherwise, if there is no write to the same byte that happens before
+   R\ :sub:`byte`, R\ :sub:`byte` returns ``undef`` for that byte.
+-  Otherwise, if R\ :sub:`byte` may see exactly one write,
+   R\ :sub:`byte` returns the value written by that write.
+-  Otherwise, if R is atomic, and all the writes R\ :sub:`byte` may
+   see are atomic, it chooses one of the values written. See the :ref:`Atomic
+   Memory Ordering Constraints <ordering>` section for additional
+   constraints on how the choice is made.
+-  Otherwise R\ :sub:`byte` returns ``undef``.
+
+R returns the value composed of the series of bytes it read. This
+implies that some bytes within the value may be ``undef`` **without**
+the entire value being ``undef``. Note that this only defines the
+semantics of the operation; it doesn't mean that targets will emit more
+than one instruction to read the series of bytes.
+
+Note that in cases where none of the atomic intrinsics are used, this
+model places only one restriction on IR transformations on top of what
+is required for single-threaded execution: introducing a store to a byte
+which might not otherwise be stored is not allowed in general.
+(Specifically, in the case where another thread might write to and read
+from an address, introducing a store can change a load that may see
+exactly one write into a load that may see multiple writes.)
+
+.. _ordering:
+
+Atomic Memory Ordering Constraints
+----------------------------------
+
+Atomic instructions (:ref:`cmpxchg <i_cmpxchg>`,
+:ref:`atomicrmw <i_atomicrmw>`, :ref:`fence <i_fence>`,
+:ref:`atomic load <i_load>`, and :ref:`atomic store <i_store>`) take
+an ordering parameter that determines which other atomic instructions on
+the same address they *synchronize with*. These semantics are borrowed
+from Java and C++0x, but are somewhat more colloquial. If these
+descriptions aren't precise enough, check those specs (see spec
+references in the :doc:`atomics guide <Atomics>`).
+:ref:`fence <i_fence>` instructions treat these orderings somewhat
+differently since they don't take an address. See that instruction's
+documentation for details.
+
+For a simpler introduction to the ordering constraints, see the
+:doc:`Atomics`.
+
+``unordered``
+    The set of values that can be read is governed by the happens-before
+    partial order. A value cannot be read unless some operation wrote
+    it. This is intended to provide a guarantee strong enough to model
+    Java's non-volatile shared variables. This ordering cannot be
+    specified for read-modify-write operations; it is not strong enough
+    to make them atomic in any interesting way.
+``monotonic``
+    In addition to the guarantees of ``unordered``, there is a single
+    total order for modifications by ``monotonic`` operations on each
+    address. All modification orders must be compatible with the
+    happens-before order. There is no guarantee that the modification
+    orders can be combined to a global total order for the whole program
+    (and this often will not be possible). The read in an atomic
+    read-modify-write operation (:ref:`cmpxchg <i_cmpxchg>` and
+    :ref:`atomicrmw <i_atomicrmw>`) reads the value in the modification
+    order immediately before the value it writes. If one atomic read
+    happens before another atomic read of the same address, the later
+    read must see the same value or a later value in the address's
+    modification order. This disallows reordering of ``monotonic`` (or
+    stronger) operations on the same address. If an address is written
+    ``monotonic``-ally by one thread, and other threads ``monotonic``-ally
+    read that address repeatedly, the other threads must eventually see
+    the write. This corresponds to the C++0x/C1x
+    ``memory_order_relaxed``.
+``acquire``
+    In addition to the guarantees of ``monotonic``, a
+    *synchronizes-with* edge may be formed with a ``release`` operation.
+    This is intended to model C++'s ``memory_order_acquire``.
+``release``
+    In addition to the guarantees of ``monotonic``, if this operation
+    writes a value which is subsequently read by an ``acquire``
+    operation, it *synchronizes-with* that operation. (This isn't a
+    complete description; see the C++0x definition of a release
+    sequence.) This corresponds to the C++0x/C1x
+    ``memory_order_release``.
+``acq_rel`` (acquire+release)
+    Acts as both an ``acquire`` and ``release`` operation on its
+    address. This corresponds to the C++0x/C1x ``memory_order_acq_rel``.
+``seq_cst`` (sequentially consistent)
+    In addition to the guarantees of ``acq_rel`` (``acquire`` for an
+    operation which only reads, ``release`` for an operation which only
+    writes), there is a global total order on all
+    sequentially-consistent operations on all addresses, which is
+    consistent with the *happens-before* partial order and with the
+    modification orders of all the affected addresses. Each
+    sequentially-consistent read sees the last preceding write to the
+    same address in this global order. This corresponds to the C++0x/C1x
+    ``memory_order_seq_cst`` and Java volatile.
+
+.. _singlethread:
+
+If an atomic operation is marked ``singlethread``, it only *synchronizes
+with* or participates in modification and seq\_cst total orderings with
+other operations running in the same thread (for example, in signal
+handlers).
+
+.. _fastmath:
+
+Fast-Math Flags
+---------------
+
+LLVM IR floating-point binary ops (:ref:`fadd <i_fadd>`,
+:ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
+:ref:`frem <i_frem>`) have the following flags that can set to enable
+otherwise unsafe floating point operations
+
+``nnan``
+   No NaNs - Allow optimizations to assume the arguments and result are not
+   NaN. Such optimizations are required to retain defined behavior over
+   NaNs, but the value of the result is undefined.
+
+``ninf``
+   No Infs - Allow optimizations to assume the arguments and result are not
+   +/-Inf. Such optimizations are required to retain defined behavior over
+   +/-Inf, but the value of the result is undefined.
+
+``nsz``
+   No Signed Zeros - Allow optimizations to treat the sign of a zero
+   argument or result as insignificant.
+
+``arcp``
+   Allow Reciprocal - Allow optimizations to use the reciprocal of an
+   argument rather than perform division.
+
+``fast``
+   Fast - Allow algebraically equivalent transformations that may
+   dramatically change results in floating point (e.g. reassociate). This
+   flag implies all the others.
+
+.. _typesystem:
+
+Type System
+===========
+
+The LLVM type system is one of the most important features of the
+intermediate representation. Being typed enables a number of
+optimizations to be performed on the intermediate representation
+directly, without having to do extra analyses on the side before the
+transformation. A strong type system makes it easier to read the
+generated code and enables novel analyses and transformations that are
+not feasible to perform on normal three address code representations.
+
+Type Classifications
+--------------------
+
+The types fall into a few useful classifications:
+
+
+.. list-table::
+   :header-rows: 1
+
+   * - Classification
+     - Types
+
+   * - :ref:`integer <t_integer>`
+     - ``i1``, ``i2``, ``i3``, ... ``i8``, ... ``i16``, ... ``i32``, ...
+       ``i64``, ...
+
+   * - :ref:`floating point <t_floating>`
+     - ``half``, ``float``, ``double``, ``x86_fp80``, ``fp128``,
+       ``ppc_fp128``
+
+
+   * - first class
+
+       .. _t_firstclass:
+
+     - :ref:`integer <t_integer>`, :ref:`floating point <t_floating>`,
+       :ref:`pointer <t_pointer>`, :ref:`vector <t_vector>`,
+       :ref:`structure <t_struct>`, :ref:`array <t_array>`,
+       :ref:`label <t_label>`, :ref:`metadata <t_metadata>`.
+
+   * - :ref:`primitive <t_primitive>`
+     - :ref:`label <t_label>`,
+       :ref:`void <t_void>`,
+       :ref:`integer <t_integer>`,
+       :ref:`floating point <t_floating>`,
+       :ref:`x86mmx <t_x86mmx>`,
+       :ref:`metadata <t_metadata>`.
+
+   * - :ref:`derived <t_derived>`
+     - :ref:`array <t_array>`,
+       :ref:`function <t_function>`,
+       :ref:`pointer <t_pointer>`,
+       :ref:`structure <t_struct>`,
+       :ref:`vector <t_vector>`,
+       :ref:`opaque <t_opaque>`.
+
+The :ref:`first class <t_firstclass>` types are perhaps the most important.
+Values of these types are the only ones which can be produced by
+instructions.
+
+.. _t_primitive:
+
+Primitive Types
+---------------
+
+The primitive types are the fundamental building blocks of the LLVM
+system.
+
+.. _t_integer:
+
+Integer Type
+^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The integer type is a very simple type that simply specifies an
+arbitrary bit width for the integer type desired. Any bit width from 1
+bit to 2\ :sup:`23`\ -1 (about 8 million) can be specified.
+
+Syntax:
+"""""""
+
+::
+
+      iN
+
+The number of bits the integer will occupy is specified by the ``N``
+value.
+
+Examples:
+"""""""""
+
++----------------+------------------------------------------------+
+| ``i1``         | a single-bit integer.                          |
++----------------+------------------------------------------------+
+| ``i32``        | a 32-bit integer.                              |
++----------------+------------------------------------------------+
+| ``i1942652``   | a really big integer of over 1 million bits.   |
++----------------+------------------------------------------------+
+
+.. _t_floating:
+
+Floating Point Types
+^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :header-rows: 1
+
+   * - Type
+     - Description
+
+   * - ``half``
+     - 16-bit floating point value
+
+   * - ``float``
+     - 32-bit floating point value
+
+   * - ``double``
+     - 64-bit floating point value
+
+   * - ``fp128``
+     - 128-bit floating point value (112-bit mantissa)
+
+   * - ``x86_fp80``
+     -  80-bit floating point value (X87)
+
+   * - ``ppc_fp128``
+     - 128-bit floating point value (two 64-bits)
+
+.. _t_x86mmx:
+
+X86mmx Type
+^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The x86mmx type represents a value held in an MMX register on an x86
+machine. The operations allowed on it are quite limited: parameters and
+return values, load and store, and bitcast. User-specified MMX
+instructions are represented as intrinsic or asm calls with arguments
+and/or results of this type. There are no arrays, vectors or constants
+of this type.
+
+Syntax:
+"""""""
+
+::
+
+      x86mmx
+
+.. _t_void:
+
+Void Type
+^^^^^^^^^
+
+Overview:
+"""""""""
+
+The void type does not represent any value and has no size.
+
+Syntax:
+"""""""
+
+::
+
+      void
+
+.. _t_label:
+
+Label Type
+^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The label type represents code labels.
+
+Syntax:
+"""""""
+
+::
+
+      label
+
+.. _t_metadata:
+
+Metadata Type
+^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The metadata type represents embedded metadata. No derived types may be
+created from metadata except for :ref:`function <t_function>` arguments.
+
+Syntax:
+"""""""
+
+::
+
+      metadata
+
+.. _t_derived:
+
+Derived Types
+-------------
+
+The real power in LLVM comes from the derived types in the system. This
+is what allows a programmer to represent arrays, functions, pointers,
+and other useful types. Each of these types contain one or more element
+types which may be a primitive type, or another derived type. For
+example, it is possible to have a two dimensional array, using an array
+as the element type of another array.
+
+.. _t_aggregate:
+
+Aggregate Types
+^^^^^^^^^^^^^^^
+
+Aggregate Types are a subset of derived types that can contain multiple
+member types. :ref:`Arrays <t_array>` and :ref:`structs <t_struct>` are
+aggregate types. :ref:`Vectors <t_vector>` are not considered to be
+aggregate types.
+
+.. _t_array:
+
+Array Type
+^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The array type is a very simple derived type that arranges elements
+sequentially in memory. The array type requires a size (number of
+elements) and an underlying data type.
+
+Syntax:
+"""""""
+
+::
+
+      [<# elements> x <elementtype>]
+
+The number of elements is a constant integer value; ``elementtype`` may
+be any type with a size.
+
+Examples:
+"""""""""
+
++------------------+--------------------------------------+
+| ``[40 x i32]``   | Array of 40 32-bit integer values.   |
++------------------+--------------------------------------+
+| ``[41 x i32]``   | Array of 41 32-bit integer values.   |
++------------------+--------------------------------------+
+| ``[4 x i8]``     | Array of 4 8-bit integer values.     |
++------------------+--------------------------------------+
+
+Here are some examples of multidimensional arrays:
+
++-----------------------------+----------------------------------------------------------+
+| ``[3 x [4 x i32]]``         | 3x4 array of 32-bit integer values.                      |
++-----------------------------+----------------------------------------------------------+
+| ``[12 x [10 x float]]``     | 12x10 array of single precision floating point values.   |
++-----------------------------+----------------------------------------------------------+
+| ``[2 x [3 x [4 x i16]]]``   | 2x3x4 array of 16-bit integer values.                    |
++-----------------------------+----------------------------------------------------------+
+
+There is no restriction on indexing beyond the end of the array implied
+by a static type (though there are restrictions on indexing beyond the
+bounds of an allocated object in some cases). This means that
+single-dimension 'variable sized array' addressing can be implemented in
+LLVM with a zero length array type. An implementation of 'pascal style
+arrays' in LLVM could use the type "``{ i32, [0 x float]}``", for
+example.
+
+.. _t_function:
+
+Function Type
+^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The function type can be thought of as a function signature. It consists
+of a return type and a list of formal parameter types. The return type
+of a function type is a first class type or a void type.
+
+Syntax:
+"""""""
+
+::
+
+      <returntype> (<parameter list>)
+
+...where '``<parameter list>``' is a comma-separated list of type
+specifiers. Optionally, the parameter list may include a type ``...``,
+which indicates that the function takes a variable number of arguments.
+Variable argument functions can access their arguments with the
+:ref:`variable argument handling intrinsic <int_varargs>` functions.
+'``<returntype>``' is any type except :ref:`label <t_label>`.
+
+Examples:
+"""""""""
+
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``i32 (i32)``                   | function taking an ``i32``, returning an ``i32``                                                                                                                    |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``float (i16, i32 *) *``        | :ref:`Pointer <t_pointer>` to a function that takes an ``i16`` and a :ref:`pointer <t_pointer>` to ``i32``, returning ``float``.                                    |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``i32 (i8*, ...)``              | A vararg function that takes at least one :ref:`pointer <t_pointer>` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``{i32, i32} (i32)``            | A function taking an ``i32``, returning a :ref:`structure <t_struct>` containing two ``i32`` values                                                                 |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+.. _t_struct:
+
+Structure Type
+^^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The structure type is used to represent a collection of data members
+together in memory. The elements of a structure may be any type that has
+a size.
+
+Structures in memory are accessed using '``load``' and '``store``' by
+getting a pointer to a field with the '``getelementptr``' instruction.
+Structures in registers are accessed using the '``extractvalue``' and
+'``insertvalue``' instructions.
+
+Structures may optionally be "packed" structures, which indicate that
+the alignment of the struct is one byte, and that there is no padding
+between the elements. In non-packed structs, padding between field types
+is inserted as defined by the DataLayout string in the module, which is
+required to match what the underlying code generator expects.
+
+Structures can either be "literal" or "identified". A literal structure
+is defined inline with other types (e.g. ``{i32, i32}*``) whereas
+identified types are always defined at the top level with a name.
+Literal types are uniqued by their contents and can never be recursive
+or opaque since there is no way to write one. Identified types can be
+recursive, can be opaqued, and are never uniqued.
+
+Syntax:
+"""""""
+
+::
+
+      %T1 = type { <type list> }     ; Identified normal struct type
+      %T2 = type <{ <type list> }>   ; Identified packed struct type
+
+Examples:
+"""""""""
+
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``{ i32, i32, i32 }``        | A triple of three ``i32`` values                                                                                                                                                      |
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``{ float, i32 (i32) * }``   | A pair, where the first element is a ``float`` and the second element is a :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32``, returning an ``i32``.  |
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``<{ i8, i32 }>``            | A packed struct known to be 5 bytes in size.                                                                                                                                          |
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+.. _t_opaque:
+
+Opaque Structure Types
+^^^^^^^^^^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+Opaque structure types are used to represent named structure types that
+do not have a body specified. This corresponds (for example) to the C
+notion of a forward declared structure.
+
+Syntax:
+"""""""
+
+::
+
+      %X = type opaque
+      %52 = type opaque
+
+Examples:
+"""""""""
+
++--------------+-------------------+
+| ``opaque``   | An opaque type.   |
++--------------+-------------------+
+
+.. _t_pointer:
+
+Pointer Type
+^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The pointer type is used to specify memory locations. Pointers are
+commonly used to reference objects in memory.
+
+Pointer types may have an optional address space attribute defining the
+numbered address space where the pointed-to object resides. The default
+address space is number zero. The semantics of non-zero address spaces
+are target-specific.
+
+Note that LLVM does not permit pointers to void (``void*``) nor does it
+permit pointers to labels (``label*``). Use ``i8*`` instead.
+
+Syntax:
+"""""""
+
+::
+
+      <type> *
+
+Examples:
+"""""""""
+
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+| ``[4 x i32]*``          | A :ref:`pointer <t_pointer>` to :ref:`array <t_array>` of four ``i32`` values.                               |
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+| ``i32 (i32*) *``        | A :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32*``, returning an ``i32``. |
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+| ``i32 addrspace(5)*``   | A :ref:`pointer <t_pointer>` to an ``i32`` value that resides in address space #5.                           |
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+
+.. _t_vector:
+
+Vector Type
+^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+A vector type is a simple derived type that represents a vector of
+elements. Vector types are used when multiple primitive data are
+operated in parallel using a single instruction (SIMD). A vector type
+requires a size (number of elements) and an underlying primitive data
+type. Vector types are considered :ref:`first class <t_firstclass>`.
+
+Syntax:
+"""""""
+
+::
+
+      < <# elements> x <elementtype> >
+
+The number of elements is a constant integer value larger than 0;
+elementtype may be any integer or floating point type, or a pointer to
+these types. Vectors of size zero are not allowed.
+
+Examples:
+"""""""""
+
++-------------------+--------------------------------------------------+
+| ``<4 x i32>``     | Vector of 4 32-bit integer values.               |
++-------------------+--------------------------------------------------+
+| ``<8 x float>``   | Vector of 8 32-bit floating-point values.        |
++-------------------+--------------------------------------------------+
+| ``<2 x i64>``     | Vector of 2 64-bit integer values.               |
++-------------------+--------------------------------------------------+
+| ``<4 x i64*>``    | Vector of 4 pointers to 64-bit integer values.   |
++-------------------+--------------------------------------------------+
+
+Constants
+=========
+
+LLVM has several different basic types of constants. This section
+describes them all and their syntax.
+
+Simple Constants
+----------------
+
+**Boolean constants**
+    The two strings '``true``' and '``false``' are both valid constants
+    of the ``i1`` type.
+**Integer constants**
+    Standard integers (such as '4') are constants of the
+    :ref:`integer <t_integer>` type. Negative numbers may be used with
+    integer types.
+**Floating point constants**
+    Floating point constants use standard decimal notation (e.g.
+    123.421), exponential notation (e.g. 1.23421e+2), or a more precise
+    hexadecimal notation (see below). The assembler requires the exact
+    decimal value of a floating-point constant. For example, the
+    assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating
+    decimal in binary. Floating point constants must have a :ref:`floating
+    point <t_floating>` type.
+**Null pointer constants**
+    The identifier '``null``' is recognized as a null pointer constant
+    and must be of :ref:`pointer type <t_pointer>`.
+
+The one non-intuitive notation for constants is the hexadecimal form of
+floating point constants. For example, the form
+'``double    0x432ff973cafa8000``' is equivalent to (but harder to read
+than) '``double 4.5e+15``'. The only time hexadecimal floating point
+constants are required (and the only time that they are generated by the
+disassembler) is when a floating point constant must be emitted but it
+cannot be represented as a decimal floating point number in a reasonable
+number of digits. For example, NaN's, infinities, and other special
+values are represented in their IEEE hexadecimal format so that assembly
+and disassembly do not cause any bits to change in the constants.
+
+When using the hexadecimal form, constants of types half, float, and
+double are represented using the 16-digit form shown above (which
+matches the IEEE754 representation for double); half and float values
+must, however, be exactly representable as IEEE 754 half and single
+precision, respectively. Hexadecimal format is always used for long
+double, and there are three forms of long double. The 80-bit format used
+by x86 is represented as ``0xK`` followed by 20 hexadecimal digits. The
+128-bit format used by PowerPC (two adjacent doubles) is represented by
+``0xM`` followed by 32 hexadecimal digits. The IEEE 128-bit format is
+represented by ``0xL`` followed by 32 hexadecimal digits; no currently
+supported target uses this format. Long doubles will only work if they
+match the long double format on your target. The IEEE 16-bit format
+(half precision) is represented by ``0xH`` followed by 4 hexadecimal
+digits. All hexadecimal formats are big-endian (sign bit at the left).
+
+There are no constants of type x86mmx.
+
+Complex Constants
+-----------------
+
+Complex constants are a (potentially recursive) combination of simple
+constants and smaller complex constants.
+
+**Structure constants**
+    Structure constants are represented with notation similar to
+    structure type definitions (a comma separated list of elements,
+    surrounded by braces (``{}``)). For example:
+    "``{ i32 4, float 17.0, i32* @G }``", where "``@G``" is declared as
+    "``@G = external global i32``". Structure constants must have
+    :ref:`structure type <t_struct>`, and the number and types of elements
+    must match those specified by the type.
+**Array constants**
+    Array constants are represented with notation similar to array type
+    definitions (a comma separated list of elements, surrounded by
+    square brackets (``[]``)). For example:
+    "``[ i32 42, i32 11, i32 74 ]``". Array constants must have
+    :ref:`array type <t_array>`, and the number and types of elements must
+    match those specified by the type.
+**Vector constants**
+    Vector constants are represented with notation similar to vector
+    type definitions (a comma separated list of elements, surrounded by
+    less-than/greater-than's (``<>``)). For example:
+    "``< i32 42, i32 11, i32 74, i32 100 >``". Vector constants
+    must have :ref:`vector type <t_vector>`, and the number and types of
+    elements must match those specified by the type.
+**Zero initialization**
+    The string '``zeroinitializer``' can be used to zero initialize a
+    value to zero of *any* type, including scalar and
+    :ref:`aggregate <t_aggregate>` types. This is often used to avoid
+    having to print large zero initializers (e.g. for large arrays) and
+    is always exactly equivalent to using explicit zero initializers.
+**Metadata node**
+    A metadata node is a structure-like constant with :ref:`metadata
+    type <t_metadata>`. For example:
+    "``metadata !{ i32 0, metadata !"test" }``". Unlike other
+    constants that are meant to be interpreted as part of the
+    instruction stream, metadata is a place to attach additional
+    information such as debug info.
+
+Global Variable and Function Addresses
+--------------------------------------
+
+The addresses of :ref:`global variables <globalvars>` and
+:ref:`functions <functionstructure>` are always implicitly valid
+(link-time) constants. These constants are explicitly referenced when
+the :ref:`identifier for the global <identifiers>` is used and always have
+:ref:`pointer <t_pointer>` type. For example, the following is a legal LLVM
+file:
+
+.. code-block:: llvm
+
+    @X = global i32 17
+    @Y = global i32 42
+    @Z = global [2 x i32*] [ i32* @X, i32* @Y ]
+
+.. _undefvalues:
+
+Undefined Values
+----------------
+
+The string '``undef``' can be used anywhere a constant is expected, and
+indicates that the user of the value may receive an unspecified
+bit-pattern. Undefined values may be of any type (other than '``label``'
+or '``void``') and be used anywhere a constant is permitted.
+
+Undefined values are useful because they indicate to the compiler that
+the program is well defined no matter what value is used. This gives the
+compiler more freedom to optimize. Here are some examples of
+(potentially surprising) transformations that are valid (in pseudo IR):
+
+.. code-block:: llvm
+
+      %A = add %X, undef
+      %B = sub %X, undef
+      %C = xor %X, undef
+    Safe:
+      %A = undef
+      %B = undef
+      %C = undef
+
+This is safe because all of the output bits are affected by the undef
+bits. Any output bit can have a zero or one depending on the input bits.
+
+.. code-block:: llvm
+
+      %A = or %X, undef
+      %B = and %X, undef
+    Safe:
+      %A = -1
+      %B = 0
+    Unsafe:
+      %A = undef
+      %B = undef
+
+These logical operations have bits that are not always affected by the
+input. For example, if ``%X`` has a zero bit, then the output of the
+'``and``' operation will always be a zero for that bit, no matter what
+the corresponding bit from the '``undef``' is. As such, it is unsafe to
+optimize or assume that the result of the '``and``' is '``undef``'.
+However, it is safe to assume that all bits of the '``undef``' could be
+0, and optimize the '``and``' to 0. Likewise, it is safe to assume that
+all the bits of the '``undef``' operand to the '``or``' could be set,
+allowing the '``or``' to be folded to -1.
+
+.. code-block:: llvm
+
+      %A = select undef, %X, %Y
+      %B = select undef, 42, %Y
+      %C = select %X, %Y, undef
+    Safe:
+      %A = %X     (or %Y)
+      %B = 42     (or %Y)
+      %C = %Y
+    Unsafe:
+      %A = undef
+      %B = undef
+      %C = undef
+
+This set of examples shows that undefined '``select``' (and conditional
+branch) conditions can go *either way*, but they have to come from one
+of the two operands. In the ``%A`` example, if ``%X`` and ``%Y`` were
+both known to have a clear low bit, then ``%A`` would have to have a
+cleared low bit. However, in the ``%C`` example, the optimizer is
+allowed to assume that the '``undef``' operand could be the same as
+``%Y``, allowing the whole '``select``' to be eliminated.
+
+.. code-block:: llvm
+
+      %A = xor undef, undef
+
+      %B = undef
+      %C = xor %B, %B
+
+      %D = undef
+      %E = icmp lt %D, 4
+      %F = icmp gte %D, 4
+
+    Safe:
+      %A = undef
+      %B = undef
+      %C = undef
+      %D = undef
+      %E = undef
+      %F = undef
+
+This example points out that two '``undef``' operands are not
+necessarily the same. This can be surprising to people (and also matches
+C semantics) where they assume that "``X^X``" is always zero, even if
+``X`` is undefined. This isn't true for a number of reasons, but the
+short answer is that an '``undef``' "variable" can arbitrarily change
+its value over its "live range". This is true because the variable
+doesn't actually *have a live range*. Instead, the value is logically
+read from arbitrary registers that happen to be around when needed, so
+the value is not necessarily consistent over time. In fact, ``%A`` and
+``%C`` need to have the same semantics or the core LLVM "replace all
+uses with" concept would not hold.
+
+.. code-block:: llvm
+
+      %A = fdiv undef, %X
+      %B = fdiv %X, undef
+    Safe:
+      %A = undef
+    b: unreachable
+
+These examples show the crucial difference between an *undefined value*
+and *undefined behavior*. An undefined value (like '``undef``') is
+allowed to have an arbitrary bit-pattern. This means that the ``%A``
+operation can be constant folded to '``undef``', because the '``undef``'
+could be an SNaN, and ``fdiv`` is not (currently) defined on SNaN's.
+However, in the second example, we can make a more aggressive
+assumption: because the ``undef`` is allowed to be an arbitrary value,
+we are allowed to assume that it could be zero. Since a divide by zero
+has *undefined behavior*, we are allowed to assume that the operation
+does not execute at all. This allows us to delete the divide and all
+code after it. Because the undefined operation "can't happen", the
+optimizer can assume that it occurs in dead code.
+
+.. code-block:: llvm
+
+    a:  store undef -> %X
+    b:  store %X -> undef
+    Safe:
+    a: <deleted>
+    b: unreachable
+
+These examples reiterate the ``fdiv`` example: a store *of* an undefined
+value can be assumed to not have any effect; we can assume that the
+value is overwritten with bits that happen to match what was already
+there. However, a store *to* an undefined location could clobber
+arbitrary memory, therefore, it has undefined behavior.
+
+.. _poisonvalues:
+
+Poison Values
+-------------
+
+Poison values are similar to :ref:`undef values <undefvalues>`, however
+they also represent the fact that an instruction or constant expression
+which cannot evoke side effects has nevertheless detected a condition
+which results in undefined behavior.
+
+There is currently no way of representing a poison value in the IR; they
+only exist when produced by operations such as :ref:`add <i_add>` with
+the ``nsw`` flag.
+
+Poison value behavior is defined in terms of value *dependence*:
+
+-  Values other than :ref:`phi <i_phi>` nodes depend on their operands.
+-  :ref:`Phi <i_phi>` nodes depend on the operand corresponding to
+   their dynamic predecessor basic block.
+-  Function arguments depend on the corresponding actual argument values
+   in the dynamic callers of their functions.
+-  :ref:`Call <i_call>` instructions depend on the :ref:`ret <i_ret>`
+   instructions that dynamically transfer control back to them.
+-  :ref:`Invoke <i_invoke>` instructions depend on the
+   :ref:`ret <i_ret>`, :ref:`resume <i_resume>`, or exception-throwing
+   call instructions that dynamically transfer control back to them.
+-  Non-volatile loads and stores depend on the most recent stores to all
+   of the referenced memory addresses, following the order in the IR
+   (including loads and stores implied by intrinsics such as
+   :ref:`@llvm.memcpy <int_memcpy>`.)
+-  An instruction with externally visible side effects depends on the
+   most recent preceding instruction with externally visible side
+   effects, following the order in the IR. (This includes :ref:`volatile
+   operations <volatile>`.)
+-  An instruction *control-depends* on a :ref:`terminator
+   instruction <terminators>` if the terminator instruction has
+   multiple successors and the instruction is always executed when
+   control transfers to one of the successors, and may not be executed
+   when control is transferred to another.
+-  Additionally, an instruction also *control-depends* on a terminator
+   instruction if the set of instructions it otherwise depends on would
+   be different if the terminator had transferred control to a different
+   successor.
+-  Dependence is transitive.
+
+Poison Values have the same behavior as :ref:`undef values <undefvalues>`,
+with the additional affect that any instruction which has a *dependence*
+on a poison value has undefined behavior.
+
+Here are some examples:
+
+.. code-block:: llvm
+
+    entry:
+      %poison = sub nuw i32 0, 1           ; Results in a poison value.
+      %still_poison = and i32 %poison, 0   ; 0, but also poison.
+      %poison_yet_again = getelementptr i32* @h, i32 %still_poison
+      store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
+
+      store i32 %poison, i32* @g           ; Poison value stored to memory.
+      %poison2 = load i32* @g              ; Poison value loaded back from memory.
+
+      store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
+
+      %narrowaddr = bitcast i32* @g to i16*
+      %wideaddr = bitcast i32* @g to i64*
+      %poison3 = load i16* %narrowaddr     ; Returns a poison value.
+      %poison4 = load i64* %wideaddr       ; Returns a poison value.
+
+      %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
+      br i1 %cmp, label %true, label %end  ; Branch to either destination.
+
+    true:
+      store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
+                                           ; it has undefined behavior.
+      br label %end
+
+    end:
+      %p = phi i32 [ 0, %entry ], [ 1, %true ]
+                                           ; Both edges into this PHI are
+                                           ; control-dependent on %cmp, so this
+                                           ; always results in a poison value.
+
+      store volatile i32 0, i32* @g        ; This would depend on the store in %true
+                                           ; if %cmp is true, or the store in %entry
+                                           ; otherwise, so this is undefined behavior.
+
+      br i1 %cmp, label %second_true, label %second_end
+                                           ; The same branch again, but this time the
+                                           ; true block doesn't have side effects.
+
+    second_true:
+      ; No side effects!
+      ret void
+
+    second_end:
+      store volatile i32 0, i32* @g        ; This time, the instruction always depends
+                                           ; on the store in %end. Also, it is
+                                           ; control-equivalent to %end, so this is
+                                           ; well-defined (ignoring earlier undefined
+                                           ; behavior in this example).
+
+.. _blockaddress:
+
+Addresses of Basic Blocks
+-------------------------
+
+``blockaddress(@function, %block)``
+
+The '``blockaddress``' constant computes the address of the specified
+basic block in the specified function, and always has an ``i8*`` type.
+Taking the address of the entry block is illegal.
+
+This value only has defined behavior when used as an operand to the
+':ref:`indirectbr <i_indirectbr>`' instruction, or for comparisons
+against null. Pointer equality tests between labels addresses results in
+undefined behavior --- though, again, comparison against null is ok, and
+no label is equal to the null pointer. This may be passed around as an
+opaque pointer sized value as long as the bits are not inspected. This
+allows ``ptrtoint`` and arithmetic to be performed on these values so
+long as the original value is reconstituted before the ``indirectbr``
+instruction.
+
+Finally, some targets may provide defined semantics when using the value
+as the operand to an inline assembly, but that is target specific.
+
+Constant Expressions
+--------------------
+
+Constant expressions are used to allow expressions involving other
+constants to be used as constants. Constant expressions may be of any
+:ref:`first class <t_firstclass>` type and may involve any LLVM operation
+that does not have side effects (e.g. load and call are not supported).
+The following is the syntax for constant expressions:
+
+``trunc (CST to TYPE)``
+    Truncate a constant to another type. The bit size of CST must be
+    larger than the bit size of TYPE. Both types must be integers.
+``zext (CST to TYPE)``
+    Zero extend a constant to another type. The bit size of CST must be
+    smaller than the bit size of TYPE. Both types must be integers.
+``sext (CST to TYPE)``
+    Sign extend a constant to another type. The bit size of CST must be
+    smaller than the bit size of TYPE. Both types must be integers.
+``fptrunc (CST to TYPE)``
+    Truncate a floating point constant to another floating point type.
+    The size of CST must be larger than the size of TYPE. Both types
+    must be floating point.
+``fpext (CST to TYPE)``
+    Floating point extend a constant to another type. The size of CST
+    must be smaller or equal to the size of TYPE. Both types must be
+    floating point.
+``fptoui (CST to TYPE)``
+    Convert a floating point constant to the corresponding unsigned
+    integer constant. TYPE must be a scalar or vector integer type. CST
+    must be of scalar or vector floating point type. Both CST and TYPE
+    must be scalars, or vectors of the same number of elements. If the
+    value won't fit in the integer type, the results are undefined.
+``fptosi (CST to TYPE)``
+    Convert a floating point constant to the corresponding signed
+    integer constant. TYPE must be a scalar or vector integer type. CST
+    must be of scalar or vector floating point type. Both CST and TYPE
+    must be scalars, or vectors of the same number of elements. If the
+    value won't fit in the integer type, the results are undefined.
+``uitofp (CST to TYPE)``
+    Convert an unsigned integer constant to the corresponding floating
+    point constant. TYPE must be a scalar or vector floating point type.
+    CST must be of scalar or vector integer type. Both CST and TYPE must
+    be scalars, or vectors of the same number of elements. If the value
+    won't fit in the floating point type, the results are undefined.
+``sitofp (CST to TYPE)``
+    Convert a signed integer constant to the corresponding floating
+    point constant. TYPE must be a scalar or vector floating point type.
+    CST must be of scalar or vector integer type. Both CST and TYPE must
+    be scalars, or vectors of the same number of elements. If the value
+    won't fit in the floating point type, the results are undefined.
+``ptrtoint (CST to TYPE)``
+    Convert a pointer typed constant to the corresponding integer
+    constant. ``TYPE`` must be an integer type. ``CST`` must be of
+    pointer type. The ``CST`` value is zero extended, truncated, or
+    unchanged to make it fit in ``TYPE``.
+``inttoptr (CST to TYPE)``
+    Convert an integer constant to a pointer constant. TYPE must be a
+    pointer type. CST must be of integer type. The CST value is zero
+    extended, truncated, or unchanged to make it fit in a pointer size.
+    This one is *really* dangerous!
+``bitcast (CST to TYPE)``
+    Convert a constant, CST, to another TYPE. The constraints of the
+    operands are the same as those for the :ref:`bitcast
+    instruction <i_bitcast>`.
+``getelementptr (CSTPTR, IDX0, IDX1, ...)``, ``getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)``
+    Perform the :ref:`getelementptr operation <i_getelementptr>` on
+    constants. As with the :ref:`getelementptr <i_getelementptr>`
+    instruction, the index list may have zero or more indexes, which are
+    required to make sense for the type of "CSTPTR".
+``select (COND, VAL1, VAL2)``
+    Perform the :ref:`select operation <i_select>` on constants.
+``icmp COND (VAL1, VAL2)``
+    Performs the :ref:`icmp operation <i_icmp>` on constants.
+``fcmp COND (VAL1, VAL2)``
+    Performs the :ref:`fcmp operation <i_fcmp>` on constants.
+``extractelement (VAL, IDX)``
+    Perform the :ref:`extractelement operation <i_extractelement>` on
+    constants.
+``insertelement (VAL, ELT, IDX)``
+    Perform the :ref:`insertelement operation <i_insertelement>` on
+    constants.
+``shufflevector (VEC1, VEC2, IDXMASK)``
+    Perform the :ref:`shufflevector operation <i_shufflevector>` on
+    constants.
+``extractvalue (VAL, IDX0, IDX1, ...)``
+    Perform the :ref:`extractvalue operation <i_extractvalue>` on
+    constants. The index list is interpreted in a similar manner as
+    indices in a ':ref:`getelementptr <i_getelementptr>`' operation. At
+    least one index value must be specified.
+``insertvalue (VAL, ELT, IDX0, IDX1, ...)``
+    Perform the :ref:`insertvalue operation <i_insertvalue>` on constants.
+    The index list is interpreted in a similar manner as indices in a
+    ':ref:`getelementptr <i_getelementptr>`' operation. At least one index
+    value must be specified.
+``OPCODE (LHS, RHS)``
+    Perform the specified operation of the LHS and RHS constants. OPCODE
+    may be any of the :ref:`binary <binaryops>` or :ref:`bitwise
+    binary <bitwiseops>` operations. The constraints on operands are
+    the same as those for the corresponding instruction (e.g. no bitwise
+    operations on floating point values are allowed).
+
+Other Values
+============
+
+Inline Assembler Expressions
+----------------------------
+
+LLVM supports inline assembler expressions (as opposed to :ref:`Module-Level
+Inline Assembly <moduleasm>`) through the use of a special value. This
+value represents the inline assembler as a string (containing the
+instructions to emit), a list of operand constraints (stored as a
+string), a flag that indicates whether or not the inline asm expression
+has side effects, and a flag indicating whether the function containing
+the asm needs to align its stack conservatively. An example inline
+assembler expression is:
+
+.. code-block:: llvm
+
+    i32 (i32) asm "bswap $0", "=r,r"
+
+Inline assembler expressions may **only** be used as the callee operand
+of a :ref:`call <i_call>` or an :ref:`invoke <i_invoke>` instruction.
+Thus, typically we have:
+
+.. code-block:: llvm
+
+    %X = call i32 asm "bswap $0", "=r,r"(i32 %Y)
+
+Inline asms with side effects not visible in the constraint list must be
+marked as having side effects. This is done through the use of the
+'``sideeffect``' keyword, like so:
+
+.. code-block:: llvm
+
+    call void asm sideeffect "eieio", ""()
+
+In some cases inline asms will contain code that will not work unless
+the stack is aligned in some way, such as calls or SSE instructions on
+x86, yet will not contain code that does that alignment within the asm.
+The compiler should make conservative assumptions about what the asm
+might contain and should generate its usual stack alignment code in the
+prologue if the '``alignstack``' keyword is present:
+
+.. code-block:: llvm
+
+    call void asm alignstack "eieio", ""()
+
+Inline asms also support using non-standard assembly dialects. The
+assumed dialect is ATT. When the '``inteldialect``' keyword is present,
+the inline asm is using the Intel dialect. Currently, ATT and Intel are
+the only supported dialects. An example is:
+
+.. code-block:: llvm
+
+    call void asm inteldialect "eieio", ""()
+
+If multiple keywords appear the '``sideeffect``' keyword must come
+first, the '``alignstack``' keyword second and the '``inteldialect``'
+keyword last.
+
+Inline Asm Metadata
+^^^^^^^^^^^^^^^^^^^
+
+The call instructions that wrap inline asm nodes may have a
+"``!srcloc``" MDNode attached to it that contains a list of constant
+integers. If present, the code generator will use the integer as the
+location cookie value when report errors through the ``LLVMContext``
+error reporting mechanisms. This allows a front-end to correlate backend
+errors that occur with inline asm back to the source code that produced
+it. For example:
+
+.. code-block:: llvm
+
+    call void asm sideeffect "something bad", ""(), !srcloc !42
+    ...
+    !42 = !{ i32 1234567 }
+
+It is up to the front-end to make sense of the magic numbers it places
+in the IR. If the MDNode contains multiple constants, the code generator
+will use the one that corresponds to the line of the asm that the error
+occurs on.
+
+.. _metadata:
+
+Metadata Nodes and Metadata Strings
+-----------------------------------
+
+LLVM IR allows metadata to be attached to instructions in the program
+that can convey extra information about the code to the optimizers and
+code generator. One example application of metadata is source-level
+debug information. There are two metadata primitives: strings and nodes.
+All metadata has the ``metadata`` type and is identified in syntax by a
+preceding exclamation point ('``!``').
+
+A metadata string is a string surrounded by double quotes. It can
+contain any character by escaping non-printable characters with
+"``\xx``" where "``xx``" is the two digit hex code. For example:
+"``!"test\00"``".
+
+Metadata nodes are represented with notation similar to structure
+constants (a comma separated list of elements, surrounded by braces and
+preceded by an exclamation point). Metadata nodes can have any values as
+their operand. For example:
+
+.. code-block:: llvm
+
+    !{ metadata !"test\00", i32 10}
+
+A :ref:`named metadata <namedmetadatastructure>` is a collection of
+metadata nodes, which can be looked up in the module symbol table. For
+example:
+
+.. code-block:: llvm
+
+    !foo =  metadata !{!4, !3}
+
+Metadata can be used as function arguments. Here ``llvm.dbg.value``
+function is using two metadata arguments:
+
+.. code-block:: llvm
+
+    call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
+
+Metadata can be attached with an instruction. Here metadata ``!21`` is
+attached to the ``add`` instruction using the ``!dbg`` identifier:
+
+.. code-block:: llvm
+
+    %indvar.next = add i64 %indvar, 1, !dbg !21
+
+More information about specific metadata nodes recognized by the
+optimizers and code generator is found below.
+
+'``tbaa``' Metadata
+^^^^^^^^^^^^^^^^^^^
+
+In LLVM IR, memory does not have types, so LLVM's own type system is not
+suitable for doing TBAA. Instead, metadata is added to the IR to
+describe a type system of a higher level language. This can be used to
+implement typical C/C++ TBAA, but it can also be used to implement
+custom alias analysis behavior for other languages.
+
+The current metadata format is very simple. TBAA metadata nodes have up
+to three fields, e.g.:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ metadata !"an example type tree" }
+    !1 = metadata !{ metadata !"int", metadata !0 }
+    !2 = metadata !{ metadata !"float", metadata !0 }
+    !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+
+The first field is an identity field. It can be any value, usually a
+metadata string, which uniquely identifies the type. The most important
+name in the tree is the name of the root node. Two trees with different
+root node names are entirely disjoint, even if they have leaves with
+common names.
+
+The second field identifies the type's parent node in the tree, or is
+null or omitted for a root node. A type is considered to alias all of
+its descendants and all of its ancestors in the tree. Also, a type is
+considered to alias all types in other trees, so that bitcode produced
+from multiple front-ends is handled conservatively.
+
+If the third field is present, it's an integer which if equal to 1
+indicates that the type is "constant" (meaning
+``pointsToConstantMemory`` should return true; see `other useful
+AliasAnalysis methods <AliasAnalysis.html#OtherItfs>`_).
+
+'``tbaa.struct``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The :ref:`llvm.memcpy <int_memcpy>` is often used to implement
+aggregate assignment operations in C and similar languages, however it
+is defined to copy a contiguous region of memory, which is more than
+strictly necessary for aggregate types which contain holes due to
+padding. Also, it doesn't contain any TBAA information about the fields
+of the aggregate.
+
+``!tbaa.struct`` metadata can describe which memory subregions in a
+memcpy are padding and what the TBAA tags of the struct are.
+
+The current metadata format is very simple. ``!tbaa.struct`` metadata
+nodes are a list of operands which are in conceptual groups of three.
+For each group of three, the first operand gives the byte offset of a
+field in bytes, the second gives its size in bytes, and the third gives
+its tbaa tag. e.g.:
+
+.. code-block:: llvm
+
+    !4 = metadata !{ i64 0, i64 4, metadata !1, i64 8, i64 4, metadata !2 }
+
+This describes a struct with two fields. The first is at offset 0 bytes
+with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes
+and has size 4 bytes and has tbaa tag !2.
+
+Note that the fields need not be contiguous. In this example, there is a
+4 byte gap between the two fields. This gap represents padding which
+does not carry useful data and need not be preserved.
+
+'``fpmath``' Metadata
+^^^^^^^^^^^^^^^^^^^^^
+
+``fpmath`` metadata may be attached to any instruction of floating point
+type. It can be used to express the maximum acceptable error in the
+result of that instruction, in ULPs, thus potentially allowing the
+compiler to use a more efficient but less accurate method of computing
+it. ULP is defined as follows:
+
+    If ``x`` is a real number that lies between two finite consecutive
+    floating-point numbers ``a`` and ``b``, without being equal to one
+    of them, then ``ulp(x) = |b - a|``, otherwise ``ulp(x)`` is the
+    distance between the two non-equal finite floating-point numbers
+    nearest ``x``. Moreover, ``ulp(NaN)`` is ``NaN``.
+
+The metadata node shall consist of a single positive floating point
+number representing the maximum relative error, for example:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs
+
+'``range``' Metadata
+^^^^^^^^^^^^^^^^^^^^
+
+``range`` metadata may be attached only to loads of integer types. It
+expresses the possible ranges the loaded value is in. The ranges are
+represented with a flattened list of integers. The loaded value is known
+to be in the union of the ranges defined by each consecutive pair. Each
+pair has the following properties:
+
+-  The type must match the type loaded by the instruction.
+-  The pair ``a,b`` represents the range ``[a,b)``.
+-  Both ``a`` and ``b`` are constants.
+-  The range is allowed to wrap.
+-  The range should not represent the full or empty set. That is,
+   ``a!=b``.
+
+In addition, the pairs must be in signed order of the lower bound and
+they must be non-contiguous.
+
+Examples:
+
+.. code-block:: llvm
+
+      %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
+      %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
+      %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
+      %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
+    ...
+    !0 = metadata !{ i8 0, i8 2 }
+    !1 = metadata !{ i8 255, i8 2 }
+    !2 = metadata !{ i8 0, i8 2, i8 3, i8 6 }
+    !3 = metadata !{ i8 -2, i8 0, i8 3, i8 6 }
+
+'``llvm.loop``'
+^^^^^^^^^^^^^^^
+
+It is sometimes useful to attach information to loop constructs. Currently,
+loop metadata is implemented as metadata attached to the branch instruction
+in the loop latch block. This type of metadata refer to a metadata node that is
+guaranteed to be separate for each loop. The loop-level metadata is prefixed
+with ``llvm.loop``.
+
+The loop identifier metadata is implemented using a metadata that refers to
+itself to avoid merging it with any other identifier metadata, e.g.,
+during module linkage or function inlining. That is, each loop should refer
+to their own identification metadata even if they reside in separate functions.
+The following example contains loop identifier metadata for two separate loop
+constructs:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ metadata !0 }
+    !1 = metadata !{ metadata !1 }
+
+
+'``llvm.loop.parallel``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This loop metadata can be used to communicate that a loop should be considered
+a parallel loop. The semantics of parallel loops in this case is the one
+with the strongest cross-iteration instruction ordering freedom: the
+iterations in the loop can be considered completely independent of each
+other (also known as embarrassingly parallel loops).
+
+This metadata can originate from a programming language with parallel loop
+constructs. In such a case it is completely the programmer's responsibility
+to ensure the instructions from the different iterations of the loop can be
+executed in an arbitrary order, in parallel, or intertwined. No loop-carried
+dependency checking at all must be expected from the compiler.
+
+In order to fulfill the LLVM requirement for metadata to be safely ignored,
+it is important to ensure that a parallel loop is converted to
+a sequential loop in case an optimization (agnostic of the parallel loop
+semantics) converts the loop back to such. This happens when new memory
+accesses that do not fulfill the requirement of free ordering across iterations
+are added to the loop. Therefore, this metadata is required, but not
+sufficient, to consider the loop at hand a parallel loop. For a loop
+to be parallel,  all its memory accessing instructions need to be
+marked with the ``llvm.mem.parallel_loop_access`` metadata that refer
+to the same loop identifier metadata that identify the loop at hand.
+
+'``llvm.mem``'
+^^^^^^^^^^^^^^^
+
+Metadata types used to annotate memory accesses with information helpful
+for optimizations are prefixed with ``llvm.mem``.
+
+'``llvm.mem.parallel_loop_access``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For a loop to be parallel, in addition to using
+the ``llvm.loop.parallel`` metadata to mark the loop latch branch instruction,
+also all of the memory accessing instructions in the loop body need to be
+marked with the ``llvm.mem.parallel_loop_access`` metadata. If there
+is at least one memory accessing instruction not marked with the metadata,
+the loop, despite it possibly using the ``llvm.loop.parallel`` metadata,
+must be considered a sequential loop. This causes parallel loops to be
+converted to sequential loops due to optimization passes that are unaware of
+the parallel semantics and that insert new memory instructions to the loop
+body.
+
+Example of a loop that is considered parallel due to its correct use of
+both ``llvm.loop.parallel`` and ``llvm.mem.parallel_loop_access``
+metadata types that refer to the same loop identifier metadata.
+
+.. code-block:: llvm
+
+   for.body:
+   ...
+   %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+   ...
+   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0
+   ...
+   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !0
+
+   for.end:
+   ...
+   !0 = metadata !{ metadata !0 }
+
+It is also possible to have nested parallel loops. In that case the
+memory accesses refer to a list of loop identifier metadata nodes instead of
+the loop identifier metadata node directly:
+
+.. code-block:: llvm
+
+   outer.for.body:
+   ...
+
+   inner.for.body:
+   ...
+   %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+   ...
+   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0
+   ...
+   br i1 %exitcond, label %inner.for.end, label %inner.for.body, !llvm.loop.parallel !1
+
+   inner.for.end:
+   ...
+   %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+   ...
+   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0
+   ...
+   br i1 %exitcond, label %outer.for.end, label %outer.for.body, !llvm.loop.parallel !2
+
+   outer.for.end:                                          ; preds = %for.body
+   ...
+   !0 = metadata !{ metadata !1, metadata !2 } ; a list of parallel loop identifiers
+   !1 = metadata !{ metadata !1 } ; an identifier for the inner parallel loop
+   !2 = metadata !{ metadata !2 } ; an identifier for the outer parallel loop
+
+
+Module Flags Metadata
+=====================
+
+Information about the module as a whole is difficult to convey to LLVM's
+subsystems. The LLVM IR isn't sufficient to transmit this information.
+The ``llvm.module.flags`` named metadata exists in order to facilitate
+this. These flags are in the form of key / value pairs --- much like a
+dictionary --- making it easy for any subsystem who cares about a flag to
+look it up.
+
+The ``llvm.module.flags`` metadata contains a list of metadata triplets.
+Each triplet has the following form:
+
+-  The first element is a *behavior* flag, which specifies the behavior
+   when two (or more) modules are merged together, and it encounters two
+   (or more) metadata with the same ID. The supported behaviors are
+   described below.
+-  The second element is a metadata string that is a unique ID for the
+   metadata. Each module may only have one flag entry for each unique ID (not
+   including entries with the **Require** behavior).
+-  The third element is the value of the flag.
+
+When two (or more) modules are merged together, the resulting
+``llvm.module.flags`` metadata is the union of the modules' flags. That is, for
+each unique metadata ID string, there will be exactly one entry in the merged
+modules ``llvm.module.flags`` metadata table, and the value for that entry will
+be determined by the merge behavior flag, as described below. The only exception
+is that entries with the *Require* behavior are always preserved.
+
+The following behaviors are supported:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 10 90
+
+   * - Value
+     - Behavior
+
+   * - 1
+     - **Error**
+           Emits an error if two values disagree, otherwise the resulting value
+           is that of the operands.
+
+   * - 2
+     - **Warning**
+           Emits a warning if two values disagree. The result value will be the
+           operand for the flag from the first module being linked.
+
+   * - 3
+     - **Require**
+           Adds a requirement that another module flag be present and have a
+           specified value after linking is performed. The value must be a
+           metadata pair, where the first element of the pair is the ID of the
+           module flag to be restricted, and the second element of the pair is
+           the value the module flag should be restricted to. This behavior can
+           be used to restrict the allowable results (via triggering of an
+           error) of linking IDs with the **Override** behavior.
+
+   * - 4
+     - **Override**
+           Uses the specified value, regardless of the behavior or value of the
+           other module. If both modules specify **Override**, but the values
+           differ, an error will be emitted.
+
+   * - 5
+     - **Append**
+           Appends the two values, which are required to be metadata nodes.
+
+   * - 6
+     - **AppendUnique**
+           Appends the two values, which are required to be metadata
+           nodes. However, duplicate entries in the second list are dropped
+           during the append operation.
+
+It is an error for a particular unique flag ID to have multiple behaviors,
+except in the case of **Require** (which adds restrictions on another metadata
+value) or **Override**.
+
+An example of module flags:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ i32 1, metadata !"foo", i32 1 }
+    !1 = metadata !{ i32 4, metadata !"bar", i32 37 }
+    !2 = metadata !{ i32 2, metadata !"qux", i32 42 }
+    !3 = metadata !{ i32 3, metadata !"qux",
+      metadata !{
+        metadata !"foo", i32 1
+      }
+    }
+    !llvm.module.flags = !{ !0, !1, !2, !3 }
+
+-  Metadata ``!0`` has the ID ``!"foo"`` and the value '1'. The behavior
+   if two or more ``!"foo"`` flags are seen is to emit an error if their
+   values are not equal.
+
+-  Metadata ``!1`` has the ID ``!"bar"`` and the value '37'. The
+   behavior if two or more ``!"bar"`` flags are seen is to use the value
+   '37'.
+
+-  Metadata ``!2`` has the ID ``!"qux"`` and the value '42'. The
+   behavior if two or more ``!"qux"`` flags are seen is to emit a
+   warning if their values are not equal.
+
+-  Metadata ``!3`` has the ID ``!"qux"`` and the value:
+
+   ::
+
+       metadata !{ metadata !"foo", i32 1 }
+
+   The behavior is to emit an error if the ``llvm.module.flags`` does not
+   contain a flag with the ID ``!"foo"`` that has the value '1' after linking is
+   performed.
+
+Objective-C Garbage Collection Module Flags Metadata
+----------------------------------------------------
+
+On the Mach-O platform, Objective-C stores metadata about garbage
+collection in a special section called "image info". The metadata
+consists of a version number and a bitmask specifying what types of
+garbage collection are supported (if any) by the file. If two or more
+modules are linked together their garbage collection metadata needs to
+be merged rather than appended together.
+
+The Objective-C garbage collection module flags metadata consists of the
+following key-value pairs:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Key
+     - Value
+
+   * - ``Objective-C Version``
+     - **[Required]** --- The Objective-C ABI version. Valid values are 1 and 2.
+
+   * - ``Objective-C Image Info Version``
+     - **[Required]** --- The version of the image info section. Currently
+       always 0.
+
+   * - ``Objective-C Image Info Section``
+     - **[Required]** --- The section to place the metadata. Valid values are
+       ``"__OBJC, __image_info, regular"`` for Objective-C ABI version 1, and
+       ``"__DATA,__objc_imageinfo, regular, no_dead_strip"`` for
+       Objective-C ABI version 2.
+
+   * - ``Objective-C Garbage Collection``
+     - **[Required]** --- Specifies whether garbage collection is supported or
+       not. Valid values are 0, for no garbage collection, and 2, for garbage
+       collection supported.
+
+   * - ``Objective-C GC Only``
+     - **[Optional]** --- Specifies that only garbage collection is supported.
+       If present, its value must be 6. This flag requires that the
+       ``Objective-C Garbage Collection`` flag have the value 2.
+
+Some important flag interactions:
+
+-  If a module with ``Objective-C Garbage Collection`` set to 0 is
+   merged with a module with ``Objective-C Garbage Collection`` set to
+   2, then the resulting module has the
+   ``Objective-C Garbage Collection`` flag set to 0.
+-  A module with ``Objective-C Garbage Collection`` set to 0 cannot be
+   merged with a module with ``Objective-C GC Only`` set to 6.
+
+Automatic Linker Flags Module Flags Metadata
+--------------------------------------------
+
+Some targets support embedding flags to the linker inside individual object
+files. Typically this is used in conjunction with language extensions which
+allow source files to explicitly declare the libraries they depend on, and have
+these automatically be transmitted to the linker via object files.
+
+These flags are encoded in the IR using metadata in the module flags section,
+using the ``Linker Options`` key. The merge behavior for this flag is required
+to be ``AppendUnique``, and the value for the key is expected to be a metadata
+node which should be a list of other metadata nodes, each of which should be a
+list of metadata strings defining linker options.
+
+For example, the following metadata section specifies two separate sets of
+linker options, presumably to link against ``libz`` and the ``Cocoa``
+framework::
+
+    !0 = metadata !{ i32 6, metadata !"Linker Options",
+       metadata !{
+          metadata !{ metadata !"-lz" },
+          metadata !{ metadata !"-framework", metadata !"Cocoa" } } }
+    !llvm.module.flags = !{ !0 }
+
+The metadata encoding as lists of lists of options, as opposed to a collapsed
+list of options, is chosen so that the IR encoding can use multiple option
+strings to specify e.g., a single library, while still having that specifier be
+preserved as an atomic element that can be recognized by a target specific
+assembly writer or object file emitter.
+
+Each individual option is required to be either a valid option for the target's
+linker, or an option that is reserved by the target specific assembly writer or
+object file emitter. No other aspect of these options is defined by the IR.
+
+Intrinsic Global Variables
+==========================
+
+LLVM has a number of "magic" global variables that contain data that
+affect code generation or other IR semantics. These are documented here.
+All globals of this sort should have a section specified as
+"``llvm.metadata``". This section and all globals that start with
+"``llvm.``" are reserved for use by LLVM.
+
+The '``llvm.used``' Global Variable
+-----------------------------------
+
+The ``@llvm.used`` global is an array with i8\* element type which has
+:ref:`appending linkage <linkage_appending>`. This array contains a list of
+pointers to global variables and functions which may optionally have a
+pointer cast formed of bitcast or getelementptr. For example, a legal
+use of it is:
+
+.. code-block:: llvm
+
+    @X = global i8 4
+    @Y = global i32 123
+
+    @llvm.used = appending global [2 x i8*] [
+       i8* @X,
+       i8* bitcast (i32* @Y to i8*)
+    ], section "llvm.metadata"
+
+If a global variable appears in the ``@llvm.used`` list, then the
+compiler, assembler, and linker are required to treat the symbol as if
+there is a reference to the global that it cannot see. For example, if a
+variable has internal linkage and no references other than that from the
+``@llvm.used`` list, it cannot be deleted. This is commonly used to
+represent references from inline asms and other things the compiler
+cannot "see", and corresponds to "``attribute((used))``" in GNU C.
+
+On some targets, the code generator must emit a directive to the
+assembler or object file to prevent the assembler and linker from
+molesting the symbol.
+
+The '``llvm.compiler.used``' Global Variable
+--------------------------------------------
+
+The ``@llvm.compiler.used`` directive is the same as the ``@llvm.used``
+directive, except that it only prevents the compiler from touching the
+symbol. On targets that support it, this allows an intelligent linker to
+optimize references to the symbol without being impeded as it would be
+by ``@llvm.used``.
+
+This is a rare construct that should only be used in rare circumstances,
+and should not be exposed to source languages.
+
+The '``llvm.global_ctors``' Global Variable
+-------------------------------------------
+
+.. code-block:: llvm
+
+    %0 = type { i32, void ()* }
+    @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
+
+The ``@llvm.global_ctors`` array contains a list of constructor
+functions and associated priorities. The functions referenced by this
+array will be called in ascending order of priority (i.e. lowest first)
+when the module is loaded. The order of functions with the same priority
+is not defined.
+
+The '``llvm.global_dtors``' Global Variable
+-------------------------------------------
+
+.. code-block:: llvm
+
+    %0 = type { i32, void ()* }
+    @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
+
+The ``@llvm.global_dtors`` array contains a list of destructor functions
+and associated priorities. The functions referenced by this array will
+be called in descending order of priority (i.e. highest first) when the
+module is loaded. The order of functions with the same priority is not
+defined.
+
+Instruction Reference
+=====================
+
+The LLVM instruction set consists of several different classifications
+of instructions: :ref:`terminator instructions <terminators>`, :ref:`binary
+instructions <binaryops>`, :ref:`bitwise binary
+instructions <bitwiseops>`, :ref:`memory instructions <memoryops>`, and
+:ref:`other instructions <otherops>`.
+
+.. _terminators:
+
+Terminator Instructions
+-----------------------
+
+As mentioned :ref:`previously <functionstructure>`, every basic block in a
+program ends with a "Terminator" instruction, which indicates which
+block should be executed after the current block is finished. These
+terminator instructions typically yield a '``void``' value: they produce
+control flow, not values (the one exception being the
+':ref:`invoke <i_invoke>`' instruction).
+
+The terminator instructions are: ':ref:`ret <i_ret>`',
+':ref:`br <i_br>`', ':ref:`switch <i_switch>`',
+':ref:`indirectbr <i_indirectbr>`', ':ref:`invoke <i_invoke>`',
+':ref:`resume <i_resume>`', and ':ref:`unreachable <i_unreachable>`'.
+
+.. _i_ret:
+
+'``ret``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      ret <type> <value>       ; Return a value from a non-void function
+      ret void                 ; Return from void function
+
+Overview:
+"""""""""
+
+The '``ret``' instruction is used to return control flow (and optionally
+a value) from a function back to the caller.
+
+There are two forms of the '``ret``' instruction: one that returns a
+value and then causes control flow, and one that just causes control
+flow to occur.
+
+Arguments:
+""""""""""
+
+The '``ret``' instruction optionally accepts a single argument, the
+return value. The type of the return value must be a ':ref:`first
+class <t_firstclass>`' type.
+
+A function is not :ref:`well formed <wellformed>` if it it has a non-void
+return type and contains a '``ret``' instruction with no return value or
+a return value with a type that does not match its type, or if it has a
+void return type and contains a '``ret``' instruction with a return
+value.
+
+Semantics:
+""""""""""
+
+When the '``ret``' instruction is executed, control flow returns back to
+the calling function's context. If the caller is a
+":ref:`call <i_call>`" instruction, execution continues at the
+instruction after the call. If the caller was an
+":ref:`invoke <i_invoke>`" instruction, execution continues at the
+beginning of the "normal" destination block. If the instruction returns
+a value, that value shall set the call or invoke instruction's return
+value.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      ret i32 5                       ; Return an integer value of 5
+      ret void                        ; Return from a void function
+      ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2
+
+.. _i_br:
+
+'``br``' Instruction
+^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      br i1 <cond>, label <iftrue>, label <iffalse>
+      br label <dest>          ; Unconditional branch
+
+Overview:
+"""""""""
+
+The '``br``' instruction is used to cause control flow to transfer to a
+different basic block in the current function. There are two forms of
+this instruction, corresponding to a conditional branch and an
+unconditional branch.
+
+Arguments:
+""""""""""
+
+The conditional branch form of the '``br``' instruction takes a single
+'``i1``' value and two '``label``' values. The unconditional form of the
+'``br``' instruction takes a single '``label``' value as a target.
+
+Semantics:
+""""""""""
+
+Upon execution of a conditional '``br``' instruction, the '``i1``'
+argument is evaluated. If the value is ``true``, control flows to the
+'``iftrue``' ``label`` argument. If "cond" is ``false``, control flows
+to the '``iffalse``' ``label`` argument.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    Test:
+      %cond = icmp eq i32 %a, %b
+      br i1 %cond, label %IfEqual, label %IfUnequal
+    IfEqual:
+      ret i32 1
+    IfUnequal:
+      ret i32 0
+
+.. _i_switch:
+
+'``switch``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      switch <intty> <value>, label <defaultdest> [ <intty> <val>, label <dest> ... ]
+
+Overview:
+"""""""""
+
+The '``switch``' instruction is used to transfer control flow to one of
+several different places. It is a generalization of the '``br``'
+instruction, allowing a branch to occur to one of many possible
+destinations.
+
+Arguments:
+""""""""""
+
+The '``switch``' instruction uses three parameters: an integer
+comparison value '``value``', a default '``label``' destination, and an
+array of pairs of comparison value constants and '``label``'s. The table
+is not allowed to contain duplicate constant entries.
+
+Semantics:
+""""""""""
+
+The ``switch`` instruction specifies a table of values and destinations.
+When the '``switch``' instruction is executed, this table is searched
+for the given value. If the value is found, control flow is transferred
+to the corresponding destination; otherwise, control flow is transferred
+to the default destination.
+
+Implementation:
+"""""""""""""""
+
+Depending on properties of the target machine and the particular
+``switch`` instruction, this instruction may be code generated in
+different ways. For example, it could be generated as a series of
+chained conditional branches or with a lookup table.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+     ; Emulate a conditional br instruction
+     %Val = zext i1 %value to i32
+     switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
+
+     ; Emulate an unconditional br instruction
+     switch i32 0, label %dest [ ]
+
+     ; Implement a jump table:
+     switch i32 %val, label %otherwise [ i32 0, label %onzero
+                                         i32 1, label %onone
+                                         i32 2, label %ontwo ]
+
+.. _i_indirectbr:
+
+'``indirectbr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      indirectbr <somety>* <address>, [ label <dest1>, label <dest2>, ... ]
+
+Overview:
+"""""""""
+
+The '``indirectbr``' instruction implements an indirect branch to a
+label within the current function, whose address is specified by
+"``address``". Address must be derived from a
+:ref:`blockaddress <blockaddress>` constant.
+
+Arguments:
+""""""""""
+
+The '``address``' argument is the address of the label to jump to. The
+rest of the arguments indicate the full set of possible destinations
+that the address may point to. Blocks are allowed to occur multiple
+times in the destination list, though this isn't particularly useful.
+
+This destination list is required so that dataflow analysis has an
+accurate understanding of the CFG.
+
+Semantics:
+""""""""""
+
+Control transfers to the block specified in the address argument. All
+possible destination blocks must be listed in the label list, otherwise
+this instruction has undefined behavior. This implies that jumps to
+labels defined in other functions have undefined behavior as well.
+
+Implementation:
+"""""""""""""""
+
+This is typically implemented with a jump through a register.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+     indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
+
+.. _i_invoke:
+
+'``invoke``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = invoke [cconv] [ret attrs] <ptr to function ty> <function ptr val>(<function args>) [fn attrs]
+                    to label <normal label> unwind label <exception label>
+
+Overview:
+"""""""""
+
+The '``invoke``' instruction causes control to transfer to a specified
+function, with the possibility of control flow transfer to either the
+'``normal``' label or the '``exception``' label. If the callee function
+returns with the "``ret``" instruction, control flow will return to the
+"normal" label. If the callee (or any indirect callees) returns via the
+":ref:`resume <i_resume>`" instruction or other exception handling
+mechanism, control is interrupted and continued at the dynamically
+nearest "exception" label.
+
+The '``exception``' label is a `landing
+pad <ExceptionHandling.html#overview>`_ for the exception. As such,
+'``exception``' label is required to have the
+":ref:`landingpad <i_landingpad>`" instruction, which contains the
+information about the behavior of the program after unwinding happens,
+as its first non-PHI instruction. The restrictions on the
+"``landingpad``" instruction's tightly couples it to the "``invoke``"
+instruction, so that the important information contained within the
+"``landingpad``" instruction can't be lost through normal code motion.
+
+Arguments:
+""""""""""
+
+This instruction requires several arguments:
+
+#. The optional "cconv" marker indicates which :ref:`calling
+   convention <callingconv>` the call should use. If none is
+   specified, the call defaults to using C calling conventions.
+#. The optional :ref:`Parameter Attributes <paramattrs>` list for return
+   values. Only '``zeroext``', '``signext``', and '``inreg``' attributes
+   are valid here.
+#. '``ptr to function ty``': shall be the signature of the pointer to
+   function value being invoked. In most cases, this is a direct
+   function invocation, but indirect ``invoke``'s are just as possible,
+   branching off an arbitrary pointer to function value.
+#. '``function ptr val``': An LLVM value containing a pointer to a
+   function to be invoked.
+#. '``function args``': argument list whose types match the function
+   signature argument types and parameter attributes. All arguments must
+   be of :ref:`first class <t_firstclass>` type. If the function signature
+   indicates the function accepts a variable number of arguments, the
+   extra arguments can be specified.
+#. '``normal label``': the label reached when the called function
+   executes a '``ret``' instruction.
+#. '``exception label``': the label reached when a callee returns via
+   the :ref:`resume <i_resume>` instruction or other exception handling
+   mechanism.
+#. The optional :ref:`function attributes <fnattrs>` list. Only
+   '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``'
+   attributes are valid here.
+
+Semantics:
+""""""""""
+
+This instruction is designed to operate as a standard '``call``'
+instruction in most regards. The primary difference is that it
+establishes an association with a label, which is used by the runtime
+library to unwind the stack.
+
+This instruction is used in languages with destructors to ensure that
+proper cleanup is performed in the case of either a ``longjmp`` or a
+thrown exception. Additionally, this is important for implementation of
+'``catch``' clauses in high-level languages that support them.
+
+For the purposes of the SSA form, the definition of the value returned
+by the '``invoke``' instruction is deemed to occur on the edge from the
+current block to the "normal" label. If the callee unwinds then no
+return value is available.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %retval = invoke i32 @Test(i32 15) to label %Continue
+                  unwind label %TestCleanup              ; {i32}:retval set
+      %retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue
+                  unwind label %TestCleanup              ; {i32}:retval set
+
+.. _i_resume:
+
+'``resume``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      resume <type> <value>
+
+Overview:
+"""""""""
+
+The '``resume``' instruction is a terminator instruction that has no
+successors.
+
+Arguments:
+""""""""""
+
+The '``resume``' instruction requires one argument, which must have the
+same type as the result of any '``landingpad``' instruction in the same
+function.
+
+Semantics:
+""""""""""
+
+The '``resume``' instruction resumes propagation of an existing
+(in-flight) exception whose unwinding was interrupted with a
+:ref:`landingpad <i_landingpad>` instruction.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      resume { i8*, i32 } %exn
+
+.. _i_unreachable:
+
+'``unreachable``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      unreachable
+
+Overview:
+"""""""""
+
+The '``unreachable``' instruction has no defined semantics. This
+instruction is used to inform the optimizer that a particular portion of
+the code is not reachable. This can be used to indicate that the code
+after a no-return function cannot be reached, and other facts.
+
+Semantics:
+""""""""""
+
+The '``unreachable``' instruction has no defined semantics.
+
+.. _binaryops:
+
+Binary Operations
+-----------------
+
+Binary operators are used to do most of the computation in a program.
+They require two operands of the same type, execute an operation on
+them, and produce a single value. The operands might represent multiple
+data, as is the case with the :ref:`vector <t_vector>` data type. The
+result value has the same type as its operands.
+
+There are several different binary operators:
+
+.. _i_add:
+
+'``add``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = add <ty> <op1>, <op2>          ; yields {ty}:result
+      <result> = add nuw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = add nsw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = add nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``add``' instruction returns the sum of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``add``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the integer sum of the two operands.
+
+If the sum has unsigned overflow, the result returned is the
+mathematical result modulo 2\ :sup:`n`\ , where n is the bit width of
+the result.
+
+Because LLVM integers use a two's complement representation, this
+instruction is appropriate for both signed and unsigned integers.
+
+``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap",
+respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the
+result value of the ``add`` is a :ref:`poison value <poisonvalues>` if
+unsigned and/or signed overflow, respectively, occurs.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = add i32 4, %var          ; yields {i32}:result = 4 + %var
+
+.. _i_fadd:
+
+'``fadd``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fadd [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fadd``' instruction returns the sum of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fadd``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point sum of the two operands. This
+instruction can also take any number of :ref:`fast-math flags <fastmath>`,
+which are optimization hints to enable otherwise unsafe floating point
+optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fadd float 4.0, %var          ; yields {float}:result = 4.0 + %var
+
+'``sub``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sub <ty> <op1>, <op2>          ; yields {ty}:result
+      <result> = sub nuw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = sub nsw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = sub nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``sub``' instruction returns the difference of its two operands.
+
+Note that the '``sub``' instruction is used to represent the '``neg``'
+instruction present in most other intermediate representations.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``sub``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the integer difference of the two operands.
+
+If the difference has unsigned overflow, the result returned is the
+mathematical result modulo 2\ :sup:`n`\ , where n is the bit width of
+the result.
+
+Because LLVM integers use a two's complement representation, this
+instruction is appropriate for both signed and unsigned integers.
+
+``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap",
+respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the
+result value of the ``sub`` is a :ref:`poison value <poisonvalues>` if
+unsigned and/or signed overflow, respectively, occurs.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = sub i32 4, %var          ; yields {i32}:result = 4 - %var
+      <result> = sub i32 0, %val          ; yields {i32}:result = -%var
+
+.. _i_fsub:
+
+'``fsub``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fsub [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fsub``' instruction returns the difference of its two operands.
+
+Note that the '``fsub``' instruction is used to represent the '``fneg``'
+instruction present in most other intermediate representations.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fsub``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point difference of the two operands.
+This instruction can also take any number of :ref:`fast-math
+flags <fastmath>`, which are optimization hints to enable otherwise
+unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fsub float 4.0, %var           ; yields {float}:result = 4.0 - %var
+      <result> = fsub float -0.0, %val          ; yields {float}:result = -%var
+
+'``mul``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = mul <ty> <op1>, <op2>          ; yields {ty}:result
+      <result> = mul nuw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = mul nsw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = mul nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``mul``' instruction returns the product of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``mul``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the integer product of the two operands.
+
+If the result of the multiplication has unsigned overflow, the result
+returned is the mathematical result modulo 2\ :sup:`n`\ , where n is the
+bit width of the result.
+
+Because LLVM integers use a two's complement representation, and the
+result is the same width as the operands, this instruction returns the
+correct result for both signed and unsigned integers. If a full product
+(e.g. ``i32`` * ``i32`` -> ``i64``) is needed, the operands should be
+sign-extended or zero-extended as appropriate to the width of the full
+product.
+
+``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap",
+respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the
+result value of the ``mul`` is a :ref:`poison value <poisonvalues>` if
+unsigned and/or signed overflow, respectively, occurs.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = mul i32 4, %var          ; yields {i32}:result = 4 * %var
+
+.. _i_fmul:
+
+'``fmul``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fmul [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fmul``' instruction returns the product of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fmul``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point product of the two operands.
+This instruction can also take any number of :ref:`fast-math
+flags <fastmath>`, which are optimization hints to enable otherwise
+unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fmul float 4.0, %var          ; yields {float}:result = 4.0 * %var
+
+'``udiv``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = udiv <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = udiv exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``udiv``' instruction returns the quotient of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``udiv``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the unsigned integer quotient of the two operands.
+
+Note that unsigned integer division and signed integer division are
+distinct operations; for signed integer division, use '``sdiv``'.
+
+Division by zero leads to undefined behavior.
+
+If the ``exact`` keyword is present, the result value of the ``udiv`` is
+a :ref:`poison value <poisonvalues>` if %op1 is not a multiple of %op2 (as
+such, "((a udiv exact b) mul b) == a").
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = udiv i32 4, %var          ; yields {i32}:result = 4 / %var
+
+'``sdiv``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sdiv <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = sdiv exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``sdiv``' instruction returns the quotient of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``sdiv``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the signed integer quotient of the two operands
+rounded towards zero.
+
+Note that signed integer division and unsigned integer division are
+distinct operations; for unsigned integer division, use '``udiv``'.
+
+Division by zero leads to undefined behavior. Overflow also leads to
+undefined behavior; this is a rare case, but can occur, for example, by
+doing a 32-bit division of -2147483648 by -1.
+
+If the ``exact`` keyword is present, the result value of the ``sdiv`` is
+a :ref:`poison value <poisonvalues>` if the result would be rounded.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = sdiv i32 4, %var          ; yields {i32}:result = 4 / %var
+
+.. _i_fdiv:
+
+'``fdiv``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fdiv [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fdiv``' instruction returns the quotient of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fdiv``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point quotient of the two operands.
+This instruction can also take any number of :ref:`fast-math
+flags <fastmath>`, which are optimization hints to enable otherwise
+unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fdiv float 4.0, %var          ; yields {float}:result = 4.0 / %var
+
+'``urem``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = urem <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``urem``' instruction returns the remainder from the unsigned
+division of its two arguments.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``urem``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+This instruction returns the unsigned integer *remainder* of a division.
+This instruction always performs an unsigned division to get the
+remainder.
+
+Note that unsigned integer remainder and signed integer remainder are
+distinct operations; for signed integer remainder, use '``srem``'.
+
+Taking the remainder of a division by zero leads to undefined behavior.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = urem i32 4, %var          ; yields {i32}:result = 4 % %var
+
+'``srem``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = srem <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``srem``' instruction returns the remainder from the signed
+division of its two operands. This instruction can also take
+:ref:`vector <t_vector>` versions of the values in which case the elements
+must be integers.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``srem``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+This instruction returns the *remainder* of a division (where the result
+is either zero or has the same sign as the dividend, ``op1``), not the
+*modulo* operator (where the result is either zero or has the same sign
+as the divisor, ``op2``) of a value. For more information about the
+difference, see `The Math
+Forum <http://mathforum.org/dr.math/problems/anne.4.28.99.html>`_. For a
+table of how this is implemented in various languages, please see
+`Wikipedia: modulo
+operation <http://en.wikipedia.org/wiki/Modulo_operation>`_.
+
+Note that signed integer remainder and unsigned integer remainder are
+distinct operations; for unsigned integer remainder, use '``urem``'.
+
+Taking the remainder of a division by zero leads to undefined behavior.
+Overflow also leads to undefined behavior; this is a rare case, but can
+occur, for example, by taking the remainder of a 32-bit division of
+-2147483648 by -1. (The remainder doesn't actually overflow, but this
+rule lets srem be implemented using instructions that return both the
+result of the division and the remainder.)
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = srem i32 4, %var          ; yields {i32}:result = 4 % %var
+
+.. _i_frem:
+
+'``frem``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = frem [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``frem``' instruction returns the remainder from the division of
+its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``frem``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+This instruction returns the *remainder* of a division. The remainder
+has the same sign as the dividend. This instruction can also take any
+number of :ref:`fast-math flags <fastmath>`, which are optimization hints
+to enable otherwise unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = frem float 4.0, %var          ; yields {float}:result = 4.0 % %var
+
+.. _bitwiseops:
+
+Bitwise Binary Operations
+-------------------------
+
+Bitwise binary operators are used to do various forms of bit-twiddling
+in a program. They are generally very efficient instructions and can
+commonly be strength reduced from other instructions. They require two
+operands of the same type, execute an operation on them, and produce a
+single value. The resulting value is the same type as its operands.
+
+'``shl``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = shl <ty> <op1>, <op2>           ; yields {ty}:result
+      <result> = shl nuw <ty> <op1>, <op2>       ; yields {ty}:result
+      <result> = shl nsw <ty> <op1>, <op2>       ; yields {ty}:result
+      <result> = shl nuw nsw <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``shl``' instruction returns the first operand shifted to the left
+a specified number of bits.
+
+Arguments:
+""""""""""
+
+Both arguments to the '``shl``' instruction must be the same
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer type.
+'``op2``' is treated as an unsigned value.
+
+Semantics:
+""""""""""
+
+The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`,
+where ``n`` is the width of the result. If ``op2`` is (statically or
+dynamically) negative or equal to or larger than the number of bits in
+``op1``, the result is undefined. If the arguments are vectors, each
+vector element of ``op1`` is shifted by the corresponding shift amount
+in ``op2``.
+
+If the ``nuw`` keyword is present, then the shift produces a :ref:`poison
+value <poisonvalues>` if it shifts out any non-zero bits. If the
+``nsw`` keyword is present, then the shift produces a :ref:`poison
+value <poisonvalues>` if it shifts out any bits that disagree with the
+resultant sign bit. As such, NUW/NSW have the same semantics as they
+would if the shift were expressed as a mul instruction with the same
+nsw/nuw bits in (mul %op1, (shl 1, %op2)).
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = shl i32 4, %var   ; yields {i32}: 4 << %var
+      <result> = shl i32 4, 2      ; yields {i32}: 16
+      <result> = shl i32 1, 10     ; yields {i32}: 1024
+      <result> = shl i32 1, 32     ; undefined
+      <result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 2, i32 4>
+
+'``lshr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = lshr <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = lshr exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``lshr``' instruction (logical shift right) returns the first
+operand shifted to the right a specified number of bits with zero fill.
+
+Arguments:
+""""""""""
+
+Both arguments to the '``lshr``' instruction must be the same
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer type.
+'``op2``' is treated as an unsigned value.
+
+Semantics:
+""""""""""
+
+This instruction always performs a logical shift right operation. The
+most significant bits of the result will be filled with zero bits after
+the shift. If ``op2`` is (statically or dynamically) equal to or larger
+than the number of bits in ``op1``, the result is undefined. If the
+arguments are vectors, each vector element of ``op1`` is shifted by the
+corresponding shift amount in ``op2``.
+
+If the ``exact`` keyword is present, the result value of the ``lshr`` is
+a :ref:`poison value <poisonvalues>` if any of the bits shifted out are
+non-zero.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = lshr i32 4, 1   ; yields {i32}:result = 2
+      <result> = lshr i32 4, 2   ; yields {i32}:result = 1
+      <result> = lshr i8  4, 3   ; yields {i8}:result = 0
+      <result> = lshr i8 -2, 1   ; yields {i8}:result = 0x7FFFFFFF
+      <result> = lshr i32 1, 32  ; undefined
+      <result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
+
+'``ashr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = ashr <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = ashr exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``ashr``' instruction (arithmetic shift right) returns the first
+operand shifted to the right a specified number of bits with sign
+extension.
+
+Arguments:
+""""""""""
+
+Both arguments to the '``ashr``' instruction must be the same
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer type.
+'``op2``' is treated as an unsigned value.
+
+Semantics:
+""""""""""
+
+This instruction always performs an arithmetic shift right operation,
+The most significant bits of the result will be filled with the sign bit
+of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger
+than the number of bits in ``op1``, the result is undefined. If the
+arguments are vectors, each vector element of ``op1`` is shifted by the
+corresponding shift amount in ``op2``.
+
+If the ``exact`` keyword is present, the result value of the ``ashr`` is
+a :ref:`poison value <poisonvalues>` if any of the bits shifted out are
+non-zero.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = ashr i32 4, 1   ; yields {i32}:result = 2
+      <result> = ashr i32 4, 2   ; yields {i32}:result = 1
+      <result> = ashr i8  4, 3   ; yields {i8}:result = 0
+      <result> = ashr i8 -2, 1   ; yields {i8}:result = -1
+      <result> = ashr i32 1, 32  ; undefined
+      <result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3>   ; yields: result=<2 x i32> < i32 -1, i32 0>
+
+'``and``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = and <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``and``' instruction returns the bitwise logical and of its two
+operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``and``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The truth table used for the '``and``' instruction is:
+
++-----+-----+-----+
+| In0 | In1 | Out |
++-----+-----+-----+
+|   0 |   0 |   0 |
++-----+-----+-----+
+|   0 |   1 |   0 |
++-----+-----+-----+
+|   1 |   0 |   0 |
++-----+-----+-----+
+|   1 |   1 |   1 |
++-----+-----+-----+
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = and i32 4, %var         ; yields {i32}:result = 4 & %var
+      <result> = and i32 15, 40          ; yields {i32}:result = 8
+      <result> = and i32 4, 8            ; yields {i32}:result = 0
+
+'``or``' Instruction
+^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = or <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``or``' instruction returns the bitwise logical inclusive or of its
+two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``or``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The truth table used for the '``or``' instruction is:
+
++-----+-----+-----+
+| In0 | In1 | Out |
++-----+-----+-----+
+|   0 |   0 |   0 |
++-----+-----+-----+
+|   0 |   1 |   1 |
++-----+-----+-----+
+|   1 |   0 |   1 |
++-----+-----+-----+
+|   1 |   1 |   1 |
++-----+-----+-----+
+
+Example:
+""""""""
+
+::
+
+      <result> = or i32 4, %var         ; yields {i32}:result = 4 | %var
+      <result> = or i32 15, 40          ; yields {i32}:result = 47
+      <result> = or i32 4, 8            ; yields {i32}:result = 12
+
+'``xor``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = xor <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``xor``' instruction returns the bitwise logical exclusive or of
+its two operands. The ``xor`` is used to implement the "one's
+complement" operation, which is the "~" operator in C.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``xor``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The truth table used for the '``xor``' instruction is:
+
++-----+-----+-----+
+| In0 | In1 | Out |
++-----+-----+-----+
+|   0 |   0 |   0 |
++-----+-----+-----+
+|   0 |   1 |   1 |
++-----+-----+-----+
+|   1 |   0 |   1 |
++-----+-----+-----+
+|   1 |   1 |   0 |
++-----+-----+-----+
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = xor i32 4, %var         ; yields {i32}:result = 4 ^ %var
+      <result> = xor i32 15, 40          ; yields {i32}:result = 39
+      <result> = xor i32 4, 8            ; yields {i32}:result = 12
+      <result> = xor i32 %V, -1          ; yields {i32}:result = ~%V
+
+Vector Operations
+-----------------
+
+LLVM supports several instructions to represent vector operations in a
+target-independent manner. These instructions cover the element-access
+and vector-specific operations needed to process vectors effectively.
+While LLVM does directly support these vector operations, many
+sophisticated algorithms will want to use target-specific intrinsics to
+take full advantage of a specific target.
+
+.. _i_extractelement:
+
+'``extractelement``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = extractelement <n x <ty>> <val>, i32 <idx>    ; yields <ty>
+
+Overview:
+"""""""""
+
+The '``extractelement``' instruction extracts a single scalar element
+from a vector at a specified index.
+
+Arguments:
+""""""""""
+
+The first operand of an '``extractelement``' instruction is a value of
+:ref:`vector <t_vector>` type. The second operand is an index indicating
+the position from which to extract the element. The index may be a
+variable.
+
+Semantics:
+""""""""""
+
+The result is a scalar of the same type as the element type of ``val``.
+Its value is the value at position ``idx`` of ``val``. If ``idx``
+exceeds the length of ``val``, the results are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = extractelement <4 x i32> %vec, i32 0    ; yields i32
+
+.. _i_insertelement:
+
+'``insertelement``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = insertelement <n x <ty>> <val>, <ty> <elt>, i32 <idx>    ; yields <n x <ty>>
+
+Overview:
+"""""""""
+
+The '``insertelement``' instruction inserts a scalar element into a
+vector at a specified index.
+
+Arguments:
+""""""""""
+
+The first operand of an '``insertelement``' instruction is a value of
+:ref:`vector <t_vector>` type. The second operand is a scalar value whose
+type must equal the element type of the first operand. The third operand
+is an index indicating the position at which to insert the value. The
+index may be a variable.
+
+Semantics:
+""""""""""
+
+The result is a vector of the same type as ``val``. Its element values
+are those of ``val`` except at position ``idx``, where it gets the value
+``elt``. If ``idx`` exceeds the length of ``val``, the results are
+undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = insertelement <4 x i32> %vec, i32 1, i32 0    ; yields <4 x i32>
+
+.. _i_shufflevector:
+
+'``shufflevector``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>    ; yields <m x <ty>>
+
+Overview:
+"""""""""
+
+The '``shufflevector``' instruction constructs a permutation of elements
+from two input vectors, returning a vector with the same element type as
+the input and length that is the same as the shuffle mask.
+
+Arguments:
+""""""""""
+
+The first two operands of a '``shufflevector``' instruction are vectors
+with the same type. The third argument is a shuffle mask whose element
+type is always 'i32'. The result of the instruction is a vector whose
+length is the same as the shuffle mask and whose element type is the
+same as the element type of the first two operands.
+
+The shuffle mask operand is required to be a constant vector with either
+constant integer or undef values.
+
+Semantics:
+""""""""""
+
+The elements of the two input vectors are numbered from left to right
+across both of the vectors. The shuffle mask operand specifies, for each
+element of the result vector, which element of the two input vectors the
+result element gets. The element selector may be undef (meaning "don't
+care") and the second operand may be undef if performing a shuffle from
+only one vector.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
+                              <4 x i32> <i32 0, i32 4, i32 1, i32 5>  ; yields <4 x i32>
+      <result> = shufflevector <4 x i32> %v1, <4 x i32> undef,
+                              <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32> - Identity shuffle.
+      <result> = shufflevector <8 x i32> %v1, <8 x i32> undef,
+                              <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32>
+      <result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
+                              <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >  ; yields <8 x i32>
+
+Aggregate Operations
+--------------------
+
+LLVM supports several instructions for working with
+:ref:`aggregate <t_aggregate>` values.
+
+.. _i_extractvalue:
+
+'``extractvalue``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = extractvalue <aggregate type> <val>, <idx>{, <idx>}*
+
+Overview:
+"""""""""
+
+The '``extractvalue``' instruction extracts the value of a member field
+from an :ref:`aggregate <t_aggregate>` value.
+
+Arguments:
+""""""""""
+
+The first operand of an '``extractvalue``' instruction is a value of
+:ref:`struct <t_struct>` or :ref:`array <t_array>` type. The operands are
+constant indices to specify which value to extract in a similar manner
+as indices in a '``getelementptr``' instruction.
+
+The major differences to ``getelementptr`` indexing are:
+
+-  Since the value being indexed is not a pointer, the first index is
+   omitted and assumed to be zero.
+-  At least one index must be specified.
+-  Not only struct indices but also array indices must be in bounds.
+
+Semantics:
+""""""""""
+
+The result is the value at the position in the aggregate specified by
+the index operands.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = extractvalue {i32, float} %agg, 0    ; yields i32
+
+.. _i_insertvalue:
+
+'``insertvalue``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>{, <idx>}*    ; yields <aggregate type>
+
+Overview:
+"""""""""
+
+The '``insertvalue``' instruction inserts a value into a member field in
+an :ref:`aggregate <t_aggregate>` value.
+
+Arguments:
+""""""""""
+
+The first operand of an '``insertvalue``' instruction is a value of
+:ref:`struct <t_struct>` or :ref:`array <t_array>` type. The second operand is
+a first-class value to insert. The following operands are constant
+indices indicating the position at which to insert the value in a
+similar manner as indices in a '``extractvalue``' instruction. The value
+to insert must have the same type as the value identified by the
+indices.
+
+Semantics:
+""""""""""
+
+The result is an aggregate of the same type as ``val``. Its value is
+that of ``val`` except that the value at the position specified by the
+indices is that of ``elt``.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %agg1 = insertvalue {i32, float} undef, i32 1, 0              ; yields {i32 1, float undef}
+      %agg2 = insertvalue {i32, float} %agg1, float %val, 1         ; yields {i32 1, float %val}
+      %agg3 = insertvalue {i32, {float}} %agg1, float %val, 1, 0    ; yields {i32 1, float %val}
+
+.. _memoryops:
+
+Memory Access and Addressing Operations
+---------------------------------------
+
+A key design point of an SSA-based representation is how it represents
+memory. In LLVM, no memory locations are in SSA form, which makes things
+very simple. This section describes how to read, write, and allocate
+memory in LLVM.
+
+.. _i_alloca:
+
+'``alloca``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = alloca <type>[, <ty> <NumElements>][, align <alignment>]     ; yields {type*}:result
+
+Overview:
+"""""""""
+
+The '``alloca``' instruction allocates memory on the stack frame of the
+currently executing function, to be automatically released when this
+function returns to its caller. The object is always allocated in the
+generic address space (address space zero).
+
+Arguments:
+""""""""""
+
+The '``alloca``' instruction allocates ``sizeof(<type>)*NumElements``
+bytes of memory on the runtime stack, returning a pointer of the
+appropriate type to the program. If "NumElements" is specified, it is
+the number of elements allocated, otherwise "NumElements" is defaulted
+to be one. If a constant alignment is specified, the value result of the
+allocation is guaranteed to be aligned to at least that boundary. If not
+specified, or if zero, the target can choose to align the allocation on
+any convenient boundary compatible with the type.
+
+'``type``' may be any sized type.
+
+Semantics:
+""""""""""
+
+Memory is allocated; a pointer is returned. The operation is undefined
+if there is insufficient stack space for the allocation. '``alloca``'d
+memory is automatically released when the function returns. The
+'``alloca``' instruction is commonly used to represent automatic
+variables that must have an address available. When the function returns
+(either with the ``ret`` or ``resume`` instructions), the memory is
+reclaimed. Allocating zero bytes is legal, but the result is undefined.
+The order in which memory is allocated (ie., which way the stack grows)
+is not specified.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %ptr = alloca i32                             ; yields {i32*}:ptr
+      %ptr = alloca i32, i32 4                      ; yields {i32*}:ptr
+      %ptr = alloca i32, i32 4, align 1024          ; yields {i32*}:ptr
+      %ptr = alloca i32, align 1024                 ; yields {i32*}:ptr
+
+.. _i_load:
+
+'``load``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = load [volatile] <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>]
+      <result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment>
+      !<index> = !{ i32 1 }
+
+Overview:
+"""""""""
+
+The '``load``' instruction is used to read from memory.
+
+Arguments:
+""""""""""
+
+The argument to the '``load``' instruction specifies the memory address
+from which to load. The pointer must point to a :ref:`first
+class <t_firstclass>` type. If the ``load`` is marked as ``volatile``,
+then the optimizer is not allowed to modify the number or order of
+execution of this ``load`` with other :ref:`volatile
+operations <volatile>`.
+
+If the ``load`` is marked as ``atomic``, it takes an extra
+:ref:`ordering <ordering>` and optional ``singlethread`` argument. The
+``release`` and ``acq_rel`` orderings are not valid on ``load``
+instructions. Atomic loads produce :ref:`defined <memmodel>` results
+when they may see multiple atomic stores. The type of the pointee must
+be an integer type whose bit width is a power of two greater than or
+equal to eight and less than or equal to a target-specific size limit.
+``align`` must be explicitly specified on atomic loads, and the load has
+undefined behavior if the alignment is not set to a value which is at
+least the size in bytes of the pointee. ``!nontemporal`` does not have
+any defined semantics for atomic loads.
+
+The optional constant ``align`` argument specifies the alignment of the
+operation (that is, the alignment of the memory address). A value of 0
+or an omitted ``align`` argument means that the operation has the abi
+alignment for the target. It is the responsibility of the code emitter
+to ensure that the alignment information is correct. Overestimating the
+alignment results in undefined behavior. Underestimating the alignment
+may produce less efficient code. An alignment of 1 is always safe.
+
+The optional ``!nontemporal`` metadata must reference a single
+metatadata name <index> corresponding to a metadata node with one
+``i32`` entry of value 1. The existence of the ``!nontemporal``
+metatadata on the instruction tells the optimizer and code generator
+that this load is not expected to be reused in the cache. The code
+generator may select special instructions to save cache bandwidth, such
+as the ``MOVNT`` instruction on x86.
+
+The optional ``!invariant.load`` metadata must reference a single
+metatadata name <index> corresponding to a metadata node with no
+entries. The existence of the ``!invariant.load`` metatadata on the
+instruction tells the optimizer and code generator that this load
+address points to memory which does not change value during program
+execution. The optimizer may then move this load around, for example, by
+hoisting it out of loops using loop invariant code motion.
+
+Semantics:
+""""""""""
+
+The location of memory pointed to is loaded. If the value being loaded
+is of scalar type then the number of bytes read does not exceed the
+minimum number of bytes needed to hold all bits of the type. For
+example, loading an ``i24`` reads at most three bytes. When loading a
+value of a type like ``i20`` with a size that is not an integral number
+of bytes, the result is undefined if the value was not originally
+written using a store of the same type.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %ptr = alloca i32                               ; yields {i32*}:ptr
+      store i32 3, i32* %ptr                          ; yields {void}
+      %val = load i32* %ptr                           ; yields {i32}:val = i32 3
+
+.. _i_store:
+
+'``store``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]        ; yields {void}
+      store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment>  ; yields {void}
+
+Overview:
+"""""""""
+
+The '``store``' instruction is used to write to memory.
+
+Arguments:
+""""""""""
+
+There are two arguments to the '``store``' instruction: a value to store
+and an address at which to store it. The type of the '``<pointer>``'
+operand must be a pointer to the :ref:`first class <t_firstclass>` type of
+the '``<value>``' operand. If the ``store`` is marked as ``volatile``,
+then the optimizer is not allowed to modify the number or order of
+execution of this ``store`` with other :ref:`volatile
+operations <volatile>`.
+
+If the ``store`` is marked as ``atomic``, it takes an extra
+:ref:`ordering <ordering>` and optional ``singlethread`` argument. The
+``acquire`` and ``acq_rel`` orderings aren't valid on ``store``
+instructions. Atomic loads produce :ref:`defined <memmodel>` results
+when they may see multiple atomic stores. The type of the pointee must
+be an integer type whose bit width is a power of two greater than or
+equal to eight and less than or equal to a target-specific size limit.
+``align`` must be explicitly specified on atomic stores, and the store
+has undefined behavior if the alignment is not set to a value which is
+at least the size in bytes of the pointee. ``!nontemporal`` does not
+have any defined semantics for atomic stores.
+
+The optional constant "align" argument specifies the alignment of the
+operation (that is, the alignment of the memory address). A value of 0
+or an omitted "align" argument means that the operation has the abi
+alignment for the target. It is the responsibility of the code emitter
+to ensure that the alignment information is correct. Overestimating the
+alignment results in an undefined behavior. Underestimating the
+alignment may produce less efficient code. An alignment of 1 is always
+safe.
+
+The optional !nontemporal metadata must reference a single metatadata
+name <index> corresponding to a metadata node with one i32 entry of
+value 1. The existence of the !nontemporal metatadata on the instruction
+tells the optimizer and code generator that this load is not expected to
+be reused in the cache. The code generator may select special
+instructions to save cache bandwidth, such as the MOVNT instruction on
+x86.
+
+Semantics:
+""""""""""
+
+The contents of memory are updated to contain '``<value>``' at the
+location specified by the '``<pointer>``' operand. If '``<value>``' is
+of scalar type then the number of bytes written does not exceed the
+minimum number of bytes needed to hold all bits of the type. For
+example, storing an ``i24`` writes at most three bytes. When writing a
+value of a type like ``i20`` with a size that is not an integral number
+of bytes, it is unspecified what happens to the extra bits that do not
+belong to the type, but they will typically be overwritten.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %ptr = alloca i32                               ; yields {i32*}:ptr
+      store i32 3, i32* %ptr                          ; yields {void}
+      %val = load i32* %ptr                           ; yields {i32}:val = i32 3
+
+.. _i_fence:
+
+'``fence``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      fence [singlethread] <ordering>                   ; yields {void}
+
+Overview:
+"""""""""
+
+The '``fence``' instruction is used to introduce happens-before edges
+between operations.
+
+Arguments:
+""""""""""
+
+'``fence``' instructions take an :ref:`ordering <ordering>` argument which
+defines what *synchronizes-with* edges they add. They can only be given
+``acquire``, ``release``, ``acq_rel``, and ``seq_cst`` orderings.
+
+Semantics:
+""""""""""
+
+A fence A which has (at least) ``release`` ordering semantics
+*synchronizes with* a fence B with (at least) ``acquire`` ordering
+semantics if and only if there exist atomic operations X and Y, both
+operating on some atomic object M, such that A is sequenced before X, X
+modifies M (either directly or through some side effect of a sequence
+headed by X), Y is sequenced before B, and Y observes M. This provides a
+*happens-before* dependency between A and B. Rather than an explicit
+``fence``, one (but not both) of the atomic operations X or Y might
+provide a ``release`` or ``acquire`` (resp.) ordering constraint and
+still *synchronize-with* the explicit ``fence`` and establish the
+*happens-before* edge.
+
+A ``fence`` which has ``seq_cst`` ordering, in addition to having both
+``acquire`` and ``release`` semantics specified above, participates in
+the global program order of other ``seq_cst`` operations and/or fences.
+
+The optional ":ref:`singlethread <singlethread>`" argument specifies
+that the fence only synchronizes with other fences in the same thread.
+(This is useful for interacting with signal handlers.)
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      fence acquire                          ; yields {void}
+      fence singlethread seq_cst             ; yields {void}
+
+.. _i_cmpxchg:
+
+'``cmpxchg``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>  ; yields {ty}
+
+Overview:
+"""""""""
+
+The '``cmpxchg``' instruction is used to atomically modify memory. It
+loads a value in memory and compares it to a given value. If they are
+equal, it stores a new value into the memory.
+
+Arguments:
+""""""""""
+
+There are three arguments to the '``cmpxchg``' instruction: an address
+to operate on, a value to compare to the value currently be at that
+address, and a new value to place at that address if the compared values
+are equal. The type of '<cmp>' must be an integer type whose bit width
+is a power of two greater than or equal to eight and less than or equal
+to a target-specific size limit. '<cmp>' and '<new>' must have the same
+type, and the type of '<pointer>' must be a pointer to that type. If the
+``cmpxchg`` is marked as ``volatile``, then the optimizer is not allowed
+to modify the number or order of execution of this ``cmpxchg`` with
+other :ref:`volatile operations <volatile>`.
+
+The :ref:`ordering <ordering>` argument specifies how this ``cmpxchg``
+synchronizes with other atomic operations.
+
+The optional "``singlethread``" argument declares that the ``cmpxchg``
+is only atomic with respect to code (usually signal handlers) running in
+the same thread as the ``cmpxchg``. Otherwise the cmpxchg is atomic with
+respect to all other code in the system.
+
+The pointer passed into cmpxchg must have alignment greater than or
+equal to the size in memory of the operand.
+
+Semantics:
+""""""""""
+
+The contents of memory at the location specified by the '``<pointer>``'
+operand is read and compared to '``<cmp>``'; if the read value is the
+equal, '``<new>``' is written. The original value at the location is
+returned.
+
+A successful ``cmpxchg`` is a read-modify-write instruction for the purpose
+of identifying release sequences. A failed ``cmpxchg`` is equivalent to an
+atomic load with an ordering parameter determined by dropping any
+``release`` part of the ``cmpxchg``'s ordering.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    entry:
+      %orig = atomic load i32* %ptr unordered                   ; yields {i32}
+      br label %loop
+
+    loop:
+      %cmp = phi i32 [ %orig, %entry ], [%old, %loop]
+      %squared = mul i32 %cmp, %cmp
+      %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          ; yields {i32}
+      %success = icmp eq i32 %cmp, %old
+      br i1 %success, label %done, label %loop
+
+    done:
+      ...
+
+.. _i_atomicrmw:
+
+'``atomicrmw``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [singlethread] <ordering>                   ; yields {ty}
+
+Overview:
+"""""""""
+
+The '``atomicrmw``' instruction is used to atomically modify memory.
+
+Arguments:
+""""""""""
+
+There are three arguments to the '``atomicrmw``' instruction: an
+operation to apply, an address whose value to modify, an argument to the
+operation. The operation must be one of the following keywords:
+
+-  xchg
+-  add
+-  sub
+-  and
+-  nand
+-  or
+-  xor
+-  max
+-  min
+-  umax
+-  umin
+
+The type of '<value>' must be an integer type whose bit width is a power
+of two greater than or equal to eight and less than or equal to a
+target-specific size limit. The type of the '``<pointer>``' operand must
+be a pointer to that type. If the ``atomicrmw`` is marked as
+``volatile``, then the optimizer is not allowed to modify the number or
+order of execution of this ``atomicrmw`` with other :ref:`volatile
+operations <volatile>`.
+
+Semantics:
+""""""""""
+
+The contents of memory at the location specified by the '``<pointer>``'
+operand are atomically read, modified, and written back. The original
+value at the location is returned. The modification is specified by the
+operation argument:
+
+-  xchg: ``*ptr = val``
+-  add: ``*ptr = *ptr + val``
+-  sub: ``*ptr = *ptr - val``
+-  and: ``*ptr = *ptr & val``
+-  nand: ``*ptr = ~(*ptr & val)``
+-  or: ``*ptr = *ptr | val``
+-  xor: ``*ptr = *ptr ^ val``
+-  max: ``*ptr = *ptr > val ? *ptr : val`` (using a signed comparison)
+-  min: ``*ptr = *ptr < val ? *ptr : val`` (using a signed comparison)
+-  umax: ``*ptr = *ptr > val ? *ptr : val`` (using an unsigned
+   comparison)
+-  umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned
+   comparison)
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %old = atomicrmw add i32* %ptr, i32 1 acquire                        ; yields {i32}
+
+.. _i_getelementptr:
+
+'``getelementptr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = getelementptr <pty>* <ptrval>{, <ty> <idx>}*
+      <result> = getelementptr inbounds <pty>* <ptrval>{, <ty> <idx>}*
+      <result> = getelementptr <ptr vector> ptrval, <vector index type> idx
+
+Overview:
+"""""""""
+
+The '``getelementptr``' instruction is used to get the address of a
+subelement of an :ref:`aggregate <t_aggregate>` data structure. It performs
+address calculation only and does not access memory.
+
+Arguments:
+""""""""""
+
+The first argument is always a pointer or a vector of pointers, and
+forms the basis of the calculation. The remaining arguments are indices
+that indicate which of the elements of the aggregate object are indexed.
+The interpretation of each index is dependent on the type being indexed
+into. The first index always indexes the pointer value given as the
+first argument, the second index indexes a value of the type pointed to
+(not necessarily the value directly pointed to, since the first index
+can be non-zero), etc. The first type indexed into must be a pointer
+value, subsequent types can be arrays, vectors, and structs. Note that
+subsequent types being indexed into can never be pointers, since that
+would require loading the pointer before continuing calculation.
+
+The type of each index argument depends on the type it is indexing into.
+When indexing into a (optionally packed) structure, only ``i32`` integer
+**constants** are allowed (when using a vector of indices they must all
+be the **same** ``i32`` integer constant). When indexing into an array,
+pointer or vector, integers of any width are allowed, and they are not
+required to be constant. These integers are treated as signed values
+where relevant.
+
+For example, let's consider a C code fragment and how it gets compiled
+to LLVM:
+
+.. code-block:: c
+
+    struct RT {
+      char A;
+      int B[10][20];
+      char C;
+    };
+    struct ST {
+      int X;
+      double Y;
+      struct RT Z;
+    };
+
+    int *foo(struct ST *s) {
+      return &s[1].Z.B[5][13];
+    }
+
+The LLVM code generated by Clang is:
+
+.. code-block:: llvm
+
+    %struct.RT = type { i8, [10 x [20 x i32]], i8 }
+    %struct.ST = type { i32, double, %struct.RT }
+
+    define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
+    entry:
+      %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
+      ret i32* %arrayidx
+    }
+
+Semantics:
+""""""""""
+
+In the example above, the first index is indexing into the
+'``%struct.ST*``' type, which is a pointer, yielding a '``%struct.ST``'
+= '``{ i32, double, %struct.RT }``' type, a structure. The second index
+indexes into the third element of the structure, yielding a
+'``%struct.RT``' = '``{ i8 , [10 x [20 x i32]], i8 }``' type, another
+structure. The third index indexes into the second element of the
+structure, yielding a '``[10 x [20 x i32]]``' type, an array. The two
+dimensions of the array are subscripted into, yielding an '``i32``'
+type. The '``getelementptr``' instruction returns a pointer to this
+element, thus computing a value of '``i32*``' type.
+
+Note that it is perfectly legal to index partially through a structure,
+returning a pointer to an inner element. Because of this, the LLVM code
+for the given testcase is equivalent to:
+
+.. code-block:: llvm
+
+    define i32* @foo(%struct.ST* %s) {
+      %t1 = getelementptr %struct.ST* %s, i32 1                 ; yields %struct.ST*:%t1
+      %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         ; yields %struct.RT*:%t2
+      %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         ; yields [10 x [20 x i32]]*:%t3
+      %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  ; yields [20 x i32]*:%t4
+      %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        ; yields i32*:%t5
+      ret i32* %t5
+    }
+
+If the ``inbounds`` keyword is present, the result value of the
+``getelementptr`` is a :ref:`poison value <poisonvalues>` if the base
+pointer is not an *in bounds* address of an allocated object, or if any
+of the addresses that would be formed by successive addition of the
+offsets implied by the indices to the base address with infinitely
+precise signed arithmetic are not an *in bounds* address of that
+allocated object. The *in bounds* addresses for an allocated object are
+all the addresses that point into the object, plus the address one byte
+past the end. In cases where the base is a vector of pointers the
+``inbounds`` keyword applies to each of the computations element-wise.
+
+If the ``inbounds`` keyword is not present, the offsets are added to the
+base address with silently-wrapping two's complement arithmetic. If the
+offsets have a different width from the pointer, they are sign-extended
+or truncated to the width of the pointer. The result value of the
+``getelementptr`` may be outside the object pointed to by the base
+pointer. The result value may not necessarily be used to access memory
+though, even if it happens to point into allocated storage. See the
+:ref:`Pointer Aliasing Rules <pointeraliasing>` section for more
+information.
+
+The getelementptr instruction is often confusing. For some more insight
+into how it works, see :doc:`the getelementptr FAQ <GetElementPtr>`.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+        ; yields [12 x i8]*:aptr
+        %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
+        ; yields i8*:vptr
+        %vptr = getelementptr {i32, <2 x i8>}* %svptr, i64 0, i32 1, i32 1
+        ; yields i8*:eptr
+        %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
+        ; yields i32*:iptr
+        %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
+
+In cases where the pointer argument is a vector of pointers, each index
+must be a vector with the same number of elements. For example:
+
+.. code-block:: llvm
+
+     %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
+
+Conversion Operations
+---------------------
+
+The instructions in this category are the conversion instructions
+(casting) which all take a single operand and a type. They perform
+various bit conversions on the operand.
+
+'``trunc .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = trunc <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``trunc``' instruction truncates its operand to the type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``trunc``' instruction takes a value to trunc, and a type to trunc
+it to. Both types must be of :ref:`integer <t_integer>` types, or vectors
+of the same number of integers. The bit size of the ``value`` must be
+larger than the bit size of the destination type, ``ty2``. Equal sized
+types are not allowed.
+
+Semantics:
+""""""""""
+
+The '``trunc``' instruction truncates the high order bits in ``value``
+and converts the remaining bits to ``ty2``. Since the source size must
+be larger than the destination size, ``trunc`` cannot be a *no-op cast*.
+It will always truncate bits.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = trunc i32 257 to i8                        ; yields i8:1
+      %Y = trunc i32 123 to i1                        ; yields i1:true
+      %Z = trunc i32 122 to i1                        ; yields i1:false
+      %W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
+
+'``zext .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = zext <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``zext``' instruction zero extends its operand to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``zext``' instruction takes a value to cast, and a type to cast it
+to. Both types must be of :ref:`integer <t_integer>` types, or vectors of
+the same number of integers. The bit size of the ``value`` must be
+smaller than the bit size of the destination type, ``ty2``.
+
+Semantics:
+""""""""""
+
+The ``zext`` fills the high order bits of the ``value`` with zero bits
+until it reaches the size of the destination type, ``ty2``.
+
+When zero extending from i1, the result will always be either 0 or 1.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = zext i32 257 to i64              ; yields i64:257
+      %Y = zext i1 true to i32              ; yields i32:1
+      %Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
+
+'``sext .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sext <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``sext``' sign extends ``value`` to the type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``sext``' instruction takes a value to cast, and a type to cast it
+to. Both types must be of :ref:`integer <t_integer>` types, or vectors of
+the same number of integers. The bit size of the ``value`` must be
+smaller than the bit size of the destination type, ``ty2``.
+
+Semantics:
+""""""""""
+
+The '``sext``' instruction performs a sign extension by copying the sign
+bit (highest order bit) of the ``value`` until it reaches the bit size
+of the type ``ty2``.
+
+When sign extending from i1, the extension always results in -1 or 0.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = sext i8  -1 to i16              ; yields i16   :65535
+      %Y = sext i1 true to i32             ; yields i32:-1
+      %Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
+
+'``fptrunc .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fptrunc <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fptrunc``' instruction truncates ``value`` to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``fptrunc``' instruction takes a :ref:`floating point <t_floating>`
+value to cast and a :ref:`floating point <t_floating>` type to cast it to.
+The size of ``value`` must be larger than the size of ``ty2``. This
+implies that ``fptrunc`` cannot be used to make a *no-op cast*.
+
+Semantics:
+""""""""""
+
+The '``fptrunc``' instruction truncates a ``value`` from a larger
+:ref:`floating point <t_floating>` type to a smaller :ref:`floating
+point <t_floating>` type. If the value cannot fit within the
+destination type, ``ty2``, then the results are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fptrunc double 123.0 to float         ; yields float:123.0
+      %Y = fptrunc double 1.0E+300 to float      ; yields undefined
+
+'``fpext .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fpext <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fpext``' extends a floating point ``value`` to a larger floating
+point value.
+
+Arguments:
+""""""""""
+
+The '``fpext``' instruction takes a :ref:`floating point <t_floating>`
+``value`` to cast, and a :ref:`floating point <t_floating>` type to cast it
+to. The source type must be smaller than the destination type.
+
+Semantics:
+""""""""""
+
+The '``fpext``' instruction extends the ``value`` from a smaller
+:ref:`floating point <t_floating>` type to a larger :ref:`floating
+point <t_floating>` type. The ``fpext`` cannot be used to make a
+*no-op cast* because it always changes bits. Use ``bitcast`` to make a
+*no-op cast* for a floating point cast.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fpext float 3.125 to double         ; yields double:3.125000e+00
+      %Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000
+
+'``fptoui .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fptoui <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fptoui``' converts a floating point ``value`` to its unsigned
+integer equivalent of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``fptoui``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`floating point <t_floating>` value, and a type to
+cast it to ``ty2``, which must be an :ref:`integer <t_integer>` type. If
+``ty`` is a vector floating point type, ``ty2`` must be a vector integer
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``fptoui``' instruction converts its :ref:`floating
+point <t_floating>` operand into the nearest (rounding towards zero)
+unsigned integer value. If the value cannot fit in ``ty2``, the results
+are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fptoui double 123.0 to i32      ; yields i32:123
+      %Y = fptoui float 1.0E+300 to i1     ; yields undefined:1
+      %Z = fptoui float 1.04E+17 to i8     ; yields undefined:1
+
+'``fptosi .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fptosi <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fptosi``' instruction converts :ref:`floating point <t_floating>`
+``value`` to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``fptosi``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`floating point <t_floating>` value, and a type to
+cast it to ``ty2``, which must be an :ref:`integer <t_integer>` type. If
+``ty`` is a vector floating point type, ``ty2`` must be a vector integer
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``fptosi``' instruction converts its :ref:`floating
+point <t_floating>` operand into the nearest (rounding towards zero)
+signed integer value. If the value cannot fit in ``ty2``, the results
+are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fptosi double -123.0 to i32      ; yields i32:-123
+      %Y = fptosi float 1.0E-247 to i1      ; yields undefined:1
+      %Z = fptosi float 1.04E+17 to i8      ; yields undefined:1
+
+'``uitofp .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = uitofp <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``uitofp``' instruction regards ``value`` as an unsigned integer
+and converts that value to the ``ty2`` type.
+
+Arguments:
+""""""""""
+
+The '``uitofp``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`integer <t_integer>` value, and a type to cast it to
+``ty2``, which must be an :ref:`floating point <t_floating>` type. If
+``ty`` is a vector integer type, ``ty2`` must be a vector floating point
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``uitofp``' instruction interprets its operand as an unsigned
+integer quantity and converts it to the corresponding floating point
+value. If the value cannot fit in the floating point value, the results
+are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = uitofp i32 257 to float         ; yields float:257.0
+      %Y = uitofp i8 -1 to double          ; yields double:255.0
+
+'``sitofp .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sitofp <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``sitofp``' instruction regards ``value`` as a signed integer and
+converts that value to the ``ty2`` type.
+
+Arguments:
+""""""""""
+
+The '``sitofp``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`integer <t_integer>` value, and a type to cast it to
+``ty2``, which must be an :ref:`floating point <t_floating>` type. If
+``ty`` is a vector integer type, ``ty2`` must be a vector floating point
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``sitofp``' instruction interprets its operand as a signed integer
+quantity and converts it to the corresponding floating point value. If
+the value cannot fit in the floating point value, the results are
+undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = sitofp i32 257 to float         ; yields float:257.0
+      %Y = sitofp i8 -1 to double          ; yields double:-1.0
+
+.. _i_ptrtoint:
+
+'``ptrtoint .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = ptrtoint <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``ptrtoint``' instruction converts the pointer or a vector of
+pointers ``value`` to the integer (or vector of integers) type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``ptrtoint``' instruction takes a ``value`` to cast, which must be
+a a value of type :ref:`pointer <t_pointer>` or a vector of pointers, and a
+type to cast it to ``ty2``, which must be an :ref:`integer <t_integer>` or
+a vector of integers type.
+
+Semantics:
+""""""""""
+
+The '``ptrtoint``' instruction converts ``value`` to integer type
+``ty2`` by interpreting the pointer value as an integer and either
+truncating or zero extending that value to the size of the integer type.
+If ``value`` is smaller than ``ty2`` then a zero extension is done. If
+``value`` is larger than ``ty2`` then a truncation is done. If they are
+the same size, then nothing is done (*no-op cast*) other than a type
+change.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = ptrtoint i32* %P to i8                         ; yields truncation on 32-bit architecture
+      %Y = ptrtoint i32* %P to i64                        ; yields zero extension on 32-bit architecture
+      %Z = ptrtoint <4 x i32*> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture
+
+.. _i_inttoptr:
+
+'``inttoptr .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = inttoptr <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``inttoptr``' instruction converts an integer ``value`` to a
+pointer type, ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``inttoptr``' instruction takes an :ref:`integer <t_integer>` value to
+cast, and a type to cast it to, which must be a :ref:`pointer <t_pointer>`
+type.
+
+Semantics:
+""""""""""
+
+The '``inttoptr``' instruction converts ``value`` to type ``ty2`` by
+applying either a zero extension or a truncation depending on the size
+of the integer ``value``. If ``value`` is larger than the size of a
+pointer then a truncation is done. If ``value`` is smaller than the size
+of a pointer then a zero extension is done. If they are the same size,
+nothing is done (*no-op cast*).
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = inttoptr i32 255 to i32*          ; yields zero extension on 64-bit architecture
+      %Y = inttoptr i32 255 to i32*          ; yields no-op on 32-bit architecture
+      %Z = inttoptr i64 0 to i32*            ; yields truncation on 32-bit architecture
+      %Z = inttoptr <4 x i32> %G to <4 x i8*>; yields truncation of vector G to four pointers
+
+.. _i_bitcast:
+
+'``bitcast .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = bitcast <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``bitcast``' instruction converts ``value`` to type ``ty2`` without
+changing any bits.
+
+Arguments:
+""""""""""
+
+The '``bitcast``' instruction takes a value to cast, which must be a
+non-aggregate first class value, and a type to cast it to, which must
+also be a non-aggregate :ref:`first class <t_firstclass>` type. The bit
+sizes of ``value`` and the destination type, ``ty2``, must be identical.
+If the source type is a pointer, the destination type must also be a
+pointer. This instruction supports bitwise conversion of vectors to
+integers and to vectors of other types (as long as they have the same
+size).
+
+Semantics:
+""""""""""
+
+The '``bitcast``' instruction converts ``value`` to type ``ty2``. It is
+always a *no-op cast* because no bits change with this conversion. The
+conversion is done as if the ``value`` had been stored to memory and
+read back as type ``ty2``. Pointer (or vector of pointers) types may
+only be converted to other pointer (or vector of pointers) types with
+this instruction. To convert pointers to other types, use the
+:ref:`inttoptr <i_inttoptr>` or :ref:`ptrtoint <i_ptrtoint>` instructions
+first.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = bitcast i8 255 to i8              ; yields i8 :-1
+      %Y = bitcast i32* %x to sint*          ; yields sint*:%x
+      %Z = bitcast <2 x int> %V to i64;        ; yields i64: %V
+      %Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*>
+
+.. _otherops:
+
+Other Operations
+----------------
+
+The instructions in this category are the "miscellaneous" instructions,
+which defy better classification.
+
+.. _i_icmp:
+
+'``icmp``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = icmp <cond> <ty> <op1>, <op2>   ; yields {i1} or {<N x i1>}:result
+
+Overview:
+"""""""""
+
+The '``icmp``' instruction returns a boolean value or a vector of
+boolean values based on comparison of its two integer, integer vector,
+pointer, or pointer vector operands.
+
+Arguments:
+""""""""""
+
+The '``icmp``' instruction takes three operands. The first operand is
+the condition code indicating the kind of comparison to perform. It is
+not a value, just a keyword. The possible condition code are:
+
+#. ``eq``: equal
+#. ``ne``: not equal
+#. ``ugt``: unsigned greater than
+#. ``uge``: unsigned greater or equal
+#. ``ult``: unsigned less than
+#. ``ule``: unsigned less or equal
+#. ``sgt``: signed greater than
+#. ``sge``: signed greater or equal
+#. ``slt``: signed less than
+#. ``sle``: signed less or equal
+
+The remaining two arguments must be :ref:`integer <t_integer>` or
+:ref:`pointer <t_pointer>` or integer :ref:`vector <t_vector>` typed. They
+must also be identical types.
+
+Semantics:
+""""""""""
+
+The '``icmp``' compares ``op1`` and ``op2`` according to the condition
+code given as ``cond``. The comparison performed always yields either an
+:ref:`i1 <t_integer>` or vector of ``i1`` result, as follows:
+
+#. ``eq``: yields ``true`` if the operands are equal, ``false``
+   otherwise. No sign interpretation is necessary or performed.
+#. ``ne``: yields ``true`` if the operands are unequal, ``false``
+   otherwise. No sign interpretation is necessary or performed.
+#. ``ugt``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is greater than ``op2``.
+#. ``uge``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is greater than or equal to ``op2``.
+#. ``ult``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is less than ``op2``.
+#. ``ule``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is less than or equal to ``op2``.
+#. ``sgt``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is greater than ``op2``.
+#. ``sge``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is greater than or equal to ``op2``.
+#. ``slt``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is less than ``op2``.
+#. ``sle``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is less than or equal to ``op2``.
+
+If the operands are :ref:`pointer <t_pointer>` typed, the pointer values
+are compared as if they were integers.
+
+If the operands are integer vectors, then they are compared element by
+element. The result is an ``i1`` vector with the same number of elements
+as the values being compared. Otherwise, the result is an ``i1``.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = icmp eq i32 4, 5          ; yields: result=false
+      <result> = icmp ne float* %X, %X     ; yields: result=false
+      <result> = icmp ult i16  4, 5        ; yields: result=true
+      <result> = icmp sgt i16  4, 5        ; yields: result=false
+      <result> = icmp ule i16 -4, 5        ; yields: result=false
+      <result> = icmp sge i16  4, 5        ; yields: result=false
+
+Note that the code generator does not yet support vector types with the
+``icmp`` instruction.
+
+.. _i_fcmp:
+
+'``fcmp``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fcmp <cond> <ty> <op1>, <op2>     ; yields {i1} or {<N x i1>}:result
+
+Overview:
+"""""""""
+
+The '``fcmp``' instruction returns a boolean value or vector of boolean
+values based on comparison of its operands.
+
+If the operands are floating point scalars, then the result type is a
+boolean (:ref:`i1 <t_integer>`).
+
+If the operands are floating point vectors, then the result type is a
+vector of boolean with the same number of elements as the operands being
+compared.
+
+Arguments:
+""""""""""
+
+The '``fcmp``' instruction takes three operands. The first operand is
+the condition code indicating the kind of comparison to perform. It is
+not a value, just a keyword. The possible condition code are:
+
+#. ``false``: no comparison, always returns false
+#. ``oeq``: ordered and equal
+#. ``ogt``: ordered and greater than
+#. ``oge``: ordered and greater than or equal
+#. ``olt``: ordered and less than
+#. ``ole``: ordered and less than or equal
+#. ``one``: ordered and not equal
+#. ``ord``: ordered (no nans)
+#. ``ueq``: unordered or equal
+#. ``ugt``: unordered or greater than
+#. ``uge``: unordered or greater than or equal
+#. ``ult``: unordered or less than
+#. ``ule``: unordered or less than or equal
+#. ``une``: unordered or not equal
+#. ``uno``: unordered (either nans)
+#. ``true``: no comparison, always returns true
+
+*Ordered* means that neither operand is a QNAN while *unordered* means
+that either operand may be a QNAN.
+
+Each of ``val1`` and ``val2`` arguments must be either a :ref:`floating
+point <t_floating>` type or a :ref:`vector <t_vector>` of floating point
+type. They must have identical types.
+
+Semantics:
+""""""""""
+
+The '``fcmp``' instruction compares ``op1`` and ``op2`` according to the
+condition code given as ``cond``. If the operands are vectors, then the
+vectors are compared element by element. Each comparison performed
+always yields an :ref:`i1 <t_integer>` result, as follows:
+
+#. ``false``: always yields ``false``, regardless of operands.
+#. ``oeq``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is equal to ``op2``.
+#. ``ogt``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is greater than ``op2``.
+#. ``oge``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is greater than or equal to ``op2``.
+#. ``olt``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is less than ``op2``.
+#. ``ole``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is less than or equal to ``op2``.
+#. ``one``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is not equal to ``op2``.
+#. ``ord``: yields ``true`` if both operands are not a QNAN.
+#. ``ueq``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   equal to ``op2``.
+#. ``ugt``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   greater than ``op2``.
+#. ``uge``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   greater than or equal to ``op2``.
+#. ``ult``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   less than ``op2``.
+#. ``ule``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   less than or equal to ``op2``.
+#. ``une``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   not equal to ``op2``.
+#. ``uno``: yields ``true`` if either operand is a QNAN.
+#. ``true``: always yields ``true``, regardless of operands.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fcmp oeq float 4.0, 5.0    ; yields: result=false
+      <result> = fcmp one float 4.0, 5.0    ; yields: result=true
+      <result> = fcmp olt float 4.0, 5.0    ; yields: result=true
+      <result> = fcmp ueq double 1.0, 2.0   ; yields: result=false
+
+Note that the code generator does not yet support vector types with the
+``fcmp`` instruction.
+
+.. _i_phi:
+
+'``phi``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = phi <ty> [ <val0>, <label0>], ...
+
+Overview:
+"""""""""
+
+The '``phi``' instruction is used to implement the φ node in the SSA
+graph representing the function.
+
+Arguments:
+""""""""""
+
+The type of the incoming values is specified with the first type field.
+After this, the '``phi``' instruction takes a list of pairs as
+arguments, with one pair for each predecessor basic block of the current
+block. Only values of :ref:`first class <t_firstclass>` type may be used as
+the value arguments to the PHI node. Only labels may be used as the
+label arguments.
+
+There must be no non-phi instructions between the start of a basic block
+and the PHI instructions: i.e. PHI instructions must be first in a basic
+block.
+
+For the purposes of the SSA form, the use of each incoming value is
+deemed to occur on the edge from the corresponding predecessor block to
+the current block (but after any definition of an '``invoke``'
+instruction's return value on the same edge).
+
+Semantics:
+""""""""""
+
+At runtime, the '``phi``' instruction logically takes on the value
+specified by the pair corresponding to the predecessor basic block that
+executed just prior to the current block.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    Loop:       ; Infinite loop that counts from 0 on up...
+      %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
+      %nextindvar = add i32 %indvar, 1
+      br label %Loop
+
+.. _i_select:
+
+'``select``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = select selty <cond>, <ty> <val1>, <ty> <val2>             ; yields ty
+
+      selty is either i1 or {<N x i1>}
+
+Overview:
+"""""""""
+
+The '``select``' instruction is used to choose one value based on a
+condition, without branching.
+
+Arguments:
+""""""""""
+
+The '``select``' instruction requires an 'i1' value or a vector of 'i1'
+values indicating the condition, and two values of the same :ref:`first
+class <t_firstclass>` type. If the val1/val2 are vectors and the
+condition is a scalar, then entire vectors are selected, not individual
+elements.
+
+Semantics:
+""""""""""
+
+If the condition is an i1 and it evaluates to 1, the instruction returns
+the first value argument; otherwise, it returns the second value
+argument.
+
+If the condition is a vector of i1, then the value arguments must be
+vectors of the same size, and the selection is done element by element.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = select i1 true, i8 17, i8 42          ; yields i8:17
+
+.. _i_call:
+
+'``call``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = [tail] call [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
+
+Overview:
+"""""""""
+
+The '``call``' instruction represents a simple function call.
+
+Arguments:
+""""""""""
+
+This instruction requires several arguments:
+
+#. The optional "tail" marker indicates that the callee function does
+   not access any allocas or varargs in the caller. Note that calls may
+   be marked "tail" even if they do not occur before a
+   :ref:`ret <i_ret>` instruction. If the "tail" marker is present, the
+   function call is eligible for tail call optimization, but `might not
+   in fact be optimized into a jump <CodeGenerator.html#tailcallopt>`_.
+   The code generator may optimize calls marked "tail" with either 1)
+   automatic `sibling call
+   optimization <CodeGenerator.html#sibcallopt>`_ when the caller and
+   callee have matching signatures, or 2) forced tail call optimization
+   when the following extra requirements are met:
+
+   -  Caller and callee both have the calling convention ``fastcc``.
+   -  The call is in tail position (ret immediately follows call and ret
+      uses value of call or is void).
+   -  Option ``-tailcallopt`` is enabled, or
+      ``llvm::GuaranteedTailCallOpt`` is ``true``.
+   -  `Platform specific constraints are
+      met. <CodeGenerator.html#tailcallopt>`_
+
+#. The optional "cconv" marker indicates which :ref:`calling
+   convention <callingconv>` the call should use. If none is
+   specified, the call defaults to using C calling conventions. The
+   calling convention of the call must match the calling convention of
+   the target function, or else the behavior is undefined.
+#. The optional :ref:`Parameter Attributes <paramattrs>` list for return
+   values. Only '``zeroext``', '``signext``', and '``inreg``' attributes
+   are valid here.
+#. '``ty``': the type of the call instruction itself which is also the
+   type of the return value. Functions that return no value are marked
+   ``void``.
+#. '``fnty``': shall be the signature of the pointer to function value
+   being invoked. The argument types must match the types implied by
+   this signature. This type can be omitted if the function is not
+   varargs and if the function type does not return a pointer to a
+   function.
+#. '``fnptrval``': An LLVM value containing a pointer to a function to
+   be invoked. In most cases, this is a direct function invocation, but
+   indirect ``call``'s are just as possible, calling an arbitrary pointer
+   to function value.
+#. '``function args``': argument list whose types match the function
+   signature argument types and parameter attributes. All arguments must
+   be of :ref:`first class <t_firstclass>` type. If the function signature
+   indicates the function accepts a variable number of arguments, the
+   extra arguments can be specified.
+#. The optional :ref:`function attributes <fnattrs>` list. Only
+   '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``'
+   attributes are valid here.
+
+Semantics:
+""""""""""
+
+The '``call``' instruction is used to cause control flow to transfer to
+a specified function, with its incoming arguments bound to the specified
+values. Upon a '``ret``' instruction in the called function, control
+flow continues with the instruction after the function call, and the
+return value of the function is bound to the result argument.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %retval = call i32 @test(i32 %argc)
+      call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        ; yields i32
+      %X = tail call i32 @foo()                                    ; yields i32
+      %Y = tail call fastcc i32 @foo()  ; yields i32
+      call void %foo(i8 97 signext)
+
+      %struct.A = type { i32, i8 }
+      %r = call %struct.A @foo()                        ; yields { 32, i8 }
+      %gr = extractvalue %struct.A %r, 0                ; yields i32
+      %gr1 = extractvalue %struct.A %r, 1               ; yields i8
+      %Z = call void @foo() noreturn                    ; indicates that %foo never returns normally
+      %ZZ = call zeroext i32 @bar()                     ; Return value is %zero extended
+
+llvm treats calls to some functions with names and arguments that match
+the standard C99 library as being the C99 library functions, and may
+perform optimizations or generate code for them under that assumption.
+This is something we'd like to change in the future to provide better
+support for freestanding environments and non-C-based languages.
+
+.. _i_va_arg:
+
+'``va_arg``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = va_arg <va_list*> <arglist>, <argty>
+
+Overview:
+"""""""""
+
+The '``va_arg``' instruction is used to access arguments passed through
+the "variable argument" area of a function call. It is used to implement
+the ``va_arg`` macro in C.
+
+Arguments:
+""""""""""
+
+This instruction takes a ``va_list*`` value and the type of the
+argument. It returns a value of the specified argument type and
+increments the ``va_list`` to point to the next argument. The actual
+type of ``va_list`` is target specific.
+
+Semantics:
+""""""""""
+
+The '``va_arg``' instruction loads an argument of the specified type
+from the specified ``va_list`` and causes the ``va_list`` to point to
+the next argument. For more information, see the variable argument
+handling :ref:`Intrinsic Functions <int_varargs>`.
+
+It is legal for this instruction to be called in a function which does
+not take a variable number of arguments, for example, the ``vfprintf``
+function.
+
+``va_arg`` is an LLVM instruction instead of an :ref:`intrinsic
+function <intrinsics>` because it takes a type as an argument.
+
+Example:
+""""""""
+
+See the :ref:`variable argument processing <int_varargs>` section.
+
+Note that the code generator does not yet fully support va\_arg on many
+targets. Also, it does not currently support va\_arg with aggregate
+types on any target.
+
+.. _i_landingpad:
+
+'``landingpad``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = landingpad <resultty> personality <type> <pers_fn> <clause>+
+      <resultval> = landingpad <resultty> personality <type> <pers_fn> cleanup <clause>*
+
+      <clause> := catch <type> <value>
+      <clause> := filter <array constant type> <array constant>
+
+Overview:
+"""""""""
+
+The '``landingpad``' instruction is used by `LLVM's exception handling
+system <ExceptionHandling.html#overview>`_ to specify that a basic block
+is a landing pad --- one where the exception lands, and corresponds to the
+code found in the ``catch`` portion of a ``try``/``catch`` sequence. It
+defines values supplied by the personality function (``pers_fn``) upon
+re-entry to the function. The ``resultval`` has the type ``resultty``.
+
+Arguments:
+""""""""""
+
+This instruction takes a ``pers_fn`` value. This is the personality
+function associated with the unwinding mechanism. The optional
+``cleanup`` flag indicates that the landing pad block is a cleanup.
+
+A ``clause`` begins with the clause type --- ``catch`` or ``filter`` --- and
+contains the global variable representing the "type" that may be caught
+or filtered respectively. Unlike the ``catch`` clause, the ``filter``
+clause takes an array constant as its argument. Use
+"``[0 x i8**] undef``" for a filter which cannot throw. The
+'``landingpad``' instruction must contain *at least* one ``clause`` or
+the ``cleanup`` flag.
+
+Semantics:
+""""""""""
+
+The '``landingpad``' instruction defines the values which are set by the
+personality function (``pers_fn``) upon re-entry to the function, and
+therefore the "result type" of the ``landingpad`` instruction. As with
+calling conventions, how the personality function results are
+represented in LLVM IR is target specific.
+
+The clauses are applied in order from top to bottom. If two
+``landingpad`` instructions are merged together through inlining, the
+clauses from the calling function are appended to the list of clauses.
+When the call stack is being unwound due to an exception being thrown,
+the exception is compared against each ``clause`` in turn. If it doesn't
+match any of the clauses, and the ``cleanup`` flag is not set, then
+unwinding continues further up the call stack.
+
+The ``landingpad`` instruction has several restrictions:
+
+-  A landing pad block is a basic block which is the unwind destination
+   of an '``invoke``' instruction.
+-  A landing pad block must have a '``landingpad``' instruction as its
+   first non-PHI instruction.
+-  There can be only one '``landingpad``' instruction within the landing
+   pad block.
+-  A basic block that is not a landing pad block may not include a
+   '``landingpad``' instruction.
+-  All '``landingpad``' instructions in a function must have the same
+   personality function.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      ;; A landing pad which can catch an integer.
+      %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+               catch i8** @_ZTIi
+      ;; A landing pad that is a cleanup.
+      %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+               cleanup
+      ;; A landing pad which can catch an integer and can only throw a double.
+      %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+               catch i8** @_ZTIi
+               filter [1 x i8**] [@_ZTId]
+
+.. _intrinsics:
+
+Intrinsic Functions
+===================
+
+LLVM supports the notion of an "intrinsic function". These functions
+have well known names and semantics and are required to follow certain
+restrictions. Overall, these intrinsics represent an extension mechanism
+for the LLVM language that does not require changing all of the
+transformations in LLVM when adding to the language (or the bitcode
+reader/writer, the parser, etc...).
+
+Intrinsic function names must all start with an "``llvm.``" prefix. This
+prefix is reserved in LLVM for intrinsic names; thus, function names may
+not begin with this prefix. Intrinsic functions must always be external
+functions: you cannot define the body of intrinsic functions. Intrinsic
+functions may only be used in call or invoke instructions: it is illegal
+to take the address of an intrinsic function. Additionally, because
+intrinsic functions are part of the LLVM language, it is required if any
+are added that they be documented here.
+
+Some intrinsic functions can be overloaded, i.e., the intrinsic
+represents a family of functions that perform the same operation but on
+different data types. Because LLVM can represent over 8 million
+different integer types, overloading is used commonly to allow an
+intrinsic function to operate on any integer type. One or more of the
+argument types or the result type can be overloaded to accept any
+integer type. Argument types may also be defined as exactly matching a
+previous argument's type or the result type. This allows an intrinsic
+function which accepts multiple arguments, but needs all of them to be
+of the same type, to only be overloaded with respect to a single
+argument or the result.
+
+Overloaded intrinsics will have the names of its overloaded argument
+types encoded into its function name, each preceded by a period. Only
+those types which are overloaded result in a name suffix. Arguments
+whose type is matched against another type do not. For example, the
+``llvm.ctpop`` function can take an integer of any width and returns an
+integer of exactly the same integer width. This leads to a family of
+functions such as ``i8 @llvm.ctpop.i8(i8 %val)`` and
+``i29 @llvm.ctpop.i29(i29 %val)``. Only one type, the return type, is
+overloaded, and only one type suffix is required. Because the argument's
+type is matched against the return type, it does not require its own
+name suffix.
+
+To learn how to add an intrinsic function, please see the `Extending
+LLVM Guide <ExtendingLLVM.html>`_.
+
+.. _int_varargs:
+
+Variable Argument Handling Intrinsics
+-------------------------------------
+
+Variable argument support is defined in LLVM with the
+:ref:`va_arg <i_va_arg>` instruction and these three intrinsic
+functions. These functions are related to the similarly named macros
+defined in the ``<stdarg.h>`` header file.
+
+All of these functions operate on arguments that use a target-specific
+value type "``va_list``". The LLVM assembly language reference manual
+does not define what this type is, so all transformations should be
+prepared to handle these functions regardless of the type used.
+
+This example shows how the :ref:`va_arg <i_va_arg>` instruction and the
+variable argument handling intrinsic functions are used.
+
+.. code-block:: llvm
+
+    define i32 @test(i32 %X, ...) {
+      ; Initialize variable argument processing
+      %ap = alloca i8*
+      %ap2 = bitcast i8** %ap to i8*
+      call void @llvm.va_start(i8* %ap2)
+
+      ; Read a single integer argument
+      %tmp = va_arg i8** %ap, i32
+
+      ; Demonstrate usage of llvm.va_copy and llvm.va_end
+      %aq = alloca i8*
+      %aq2 = bitcast i8** %aq to i8*
+      call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+      call void @llvm.va_end(i8* %aq2)
+
+      ; Stop processing of arguments.
+      call void @llvm.va_end(i8* %ap2)
+      ret i32 %tmp
+    }
+
+    declare void @llvm.va_start(i8*)
+    declare void @llvm.va_copy(i8*, i8*)
+    declare void @llvm.va_end(i8*)
+
+.. _int_va_start:
+
+'``llvm.va_start``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void %llvm.va_start(i8* <arglist>)
+
+Overview:
+"""""""""
+
+The '``llvm.va_start``' intrinsic initializes ``*<arglist>`` for
+subsequent use by ``va_arg``.
+
+Arguments:
+""""""""""
+
+The argument is a pointer to a ``va_list`` element to initialize.
+
+Semantics:
+""""""""""
+
+The '``llvm.va_start``' intrinsic works just like the ``va_start`` macro
+available in C. In a target-dependent way, it initializes the
+``va_list`` element to which the argument points, so that the next call
+to ``va_arg`` will produce the first variable argument passed to the
+function. Unlike the C ``va_start`` macro, this intrinsic does not need
+to know the last argument of the function as the compiler can figure
+that out.
+
+'``llvm.va_end``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.va_end(i8* <arglist>)
+
+Overview:
+"""""""""
+
+The '``llvm.va_end``' intrinsic destroys ``*<arglist>``, which has been
+initialized previously with ``llvm.va_start`` or ``llvm.va_copy``.
+
+Arguments:
+""""""""""
+
+The argument is a pointer to a ``va_list`` to destroy.
+
+Semantics:
+""""""""""
+
+The '``llvm.va_end``' intrinsic works just like the ``va_end`` macro
+available in C. In a target-dependent way, it destroys the ``va_list``
+element to which the argument points. Calls to
+:ref:`llvm.va_start <int_va_start>` and
+:ref:`llvm.va_copy <int_va_copy>` must be matched exactly with calls to
+``llvm.va_end``.
+
+.. _int_va_copy:
+
+'``llvm.va_copy``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.va_copy(i8* <destarglist>, i8* <srcarglist>)
+
+Overview:
+"""""""""
+
+The '``llvm.va_copy``' intrinsic copies the current argument position
+from the source argument list to the destination argument list.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to a ``va_list`` element to initialize.
+The second argument is a pointer to a ``va_list`` element to copy from.
+
+Semantics:
+""""""""""
+
+The '``llvm.va_copy``' intrinsic works just like the ``va_copy`` macro
+available in C. In a target-dependent way, it copies the source
+``va_list`` element into the destination ``va_list`` element. This
+intrinsic is necessary because the `` llvm.va_start`` intrinsic may be
+arbitrarily complex and require, for example, memory allocation.
+
+Accurate Garbage Collection Intrinsics
+--------------------------------------
+
+LLVM support for `Accurate Garbage Collection <GarbageCollection.html>`_
+(GC) requires the implementation and generation of these intrinsics.
+These intrinsics allow identification of :ref:`GC roots on the
+stack <int_gcroot>`, as well as garbage collector implementations that
+require :ref:`read <int_gcread>` and :ref:`write <int_gcwrite>` barriers.
+Front-ends for type-safe garbage collected languages should generate
+these intrinsics to make use of the LLVM garbage collectors. For more
+details, see `Accurate Garbage Collection with
+LLVM <GarbageCollection.html>`_.
+
+The garbage collection intrinsics only operate on objects in the generic
+address space (address space zero).
+
+.. _int_gcroot:
+
+'``llvm.gcroot``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+
+Overview:
+"""""""""
+
+The '``llvm.gcroot``' intrinsic declares the existence of a GC root to
+the code generator, and allows some metadata to be associated with it.
+
+Arguments:
+""""""""""
+
+The first argument specifies the address of a stack object that contains
+the root pointer. The second pointer (which must be either a constant or
+a global value address) contains the meta-data to be associated with the
+root.
+
+Semantics:
+""""""""""
+
+At runtime, a call to this intrinsic stores a null pointer into the
+"ptrloc" location. At compile-time, the code generator generates
+information to allow the runtime to find the pointer at GC safe points.
+The '``llvm.gcroot``' intrinsic may only be used in a function which
+:ref:`specifies a GC algorithm <gc>`.
+
+.. _int_gcread:
+
+'``llvm.gcread``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
+
+Overview:
+"""""""""
+
+The '``llvm.gcread``' intrinsic identifies reads of references from heap
+locations, allowing garbage collector implementations that require read
+barriers.
+
+Arguments:
+""""""""""
+
+The second argument is the address to read from, which should be an
+address allocated from the garbage collector. The first object is a
+pointer to the start of the referenced object, if needed by the language
+runtime (otherwise null).
+
+Semantics:
+""""""""""
+
+The '``llvm.gcread``' intrinsic has the same semantics as a load
+instruction, but may be replaced with substantially more complex code by
+the garbage collector runtime, as needed. The '``llvm.gcread``'
+intrinsic may only be used in a function which :ref:`specifies a GC
+algorithm <gc>`.
+
+.. _int_gcwrite:
+
+'``llvm.gcwrite``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
+
+Overview:
+"""""""""
+
+The '``llvm.gcwrite``' intrinsic identifies writes of references to heap
+locations, allowing garbage collector implementations that require write
+barriers (such as generational or reference counting collectors).
+
+Arguments:
+""""""""""
+
+The first argument is the reference to store, the second is the start of
+the object to store it to, and the third is the address of the field of
+Obj to store to. If the runtime does not require a pointer to the
+object, Obj may be null.
+
+Semantics:
+""""""""""
+
+The '``llvm.gcwrite``' intrinsic has the same semantics as a store
+instruction, but may be replaced with substantially more complex code by
+the garbage collector runtime, as needed. The '``llvm.gcwrite``'
+intrinsic may only be used in a function which :ref:`specifies a GC
+algorithm <gc>`.
+
+Code Generator Intrinsics
+-------------------------
+
+These intrinsics are provided by LLVM to expose special features that
+may only be implemented with code generator support.
+
+'``llvm.returnaddress``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8  *@llvm.returnaddress(i32 <level>)
+
+Overview:
+"""""""""
+
+The '``llvm.returnaddress``' intrinsic attempts to compute a
+target-specific value indicating the return address of the current
+function or one of its callers.
+
+Arguments:
+""""""""""
+
+The argument to this intrinsic indicates which function to return the
+address for. Zero indicates the calling function, one indicates its
+caller, etc. The argument is **required** to be a constant integer
+value.
+
+Semantics:
+""""""""""
+
+The '``llvm.returnaddress``' intrinsic either returns a pointer
+indicating the return address of the specified call frame, or zero if it
+cannot be identified. The value returned by this intrinsic is likely to
+be incorrect or 0 for arguments other than zero, so it should only be
+used for debugging purposes.
+
+Note that calling this intrinsic does not prevent function inlining or
+other aggressive transformations, so the value returned may not be that
+of the obvious source-language caller.
+
+'``llvm.frameaddress``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.frameaddress(i32 <level>)
+
+Overview:
+"""""""""
+
+The '``llvm.frameaddress``' intrinsic attempts to return the
+target-specific frame pointer value for the specified stack frame.
+
+Arguments:
+""""""""""
+
+The argument to this intrinsic indicates which function to return the
+frame pointer for. Zero indicates the calling function, one indicates
+its caller, etc. The argument is **required** to be a constant integer
+value.
+
+Semantics:
+""""""""""
+
+The '``llvm.frameaddress``' intrinsic either returns a pointer
+indicating the frame address of the specified call frame, or zero if it
+cannot be identified. The value returned by this intrinsic is likely to
+be incorrect or 0 for arguments other than zero, so it should only be
+used for debugging purposes.
+
+Note that calling this intrinsic does not prevent function inlining or
+other aggressive transformations, so the value returned may not be that
+of the obvious source-language caller.
+
+.. _int_stacksave:
+
+'``llvm.stacksave``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.stacksave()
+
+Overview:
+"""""""""
+
+The '``llvm.stacksave``' intrinsic is used to remember the current state
+of the function stack, for use with
+:ref:`llvm.stackrestore <int_stackrestore>`. This is useful for
+implementing language features like scoped automatic variable sized
+arrays in C99.
+
+Semantics:
+""""""""""
+
+This intrinsic returns a opaque pointer value that can be passed to
+:ref:`llvm.stackrestore <int_stackrestore>`. When an
+``llvm.stackrestore`` intrinsic is executed with a value saved from
+``llvm.stacksave``, it effectively restores the state of the stack to
+the state it was in when the ``llvm.stacksave`` intrinsic executed. In
+practice, this pops any :ref:`alloca <i_alloca>` blocks from the stack that
+were allocated after the ``llvm.stacksave`` was executed.
+
+.. _int_stackrestore:
+
+'``llvm.stackrestore``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.stackrestore(i8* %ptr)
+
+Overview:
+"""""""""
+
+The '``llvm.stackrestore``' intrinsic is used to restore the state of
+the function stack to the state it was in when the corresponding
+:ref:`llvm.stacksave <int_stacksave>` intrinsic executed. This is
+useful for implementing language features like scoped automatic variable
+sized arrays in C99.
+
+Semantics:
+""""""""""
+
+See the description for :ref:`llvm.stacksave <int_stacksave>`.
+
+'``llvm.prefetch``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
+
+Overview:
+"""""""""
+
+The '``llvm.prefetch``' intrinsic is a hint to the code generator to
+insert a prefetch instruction if supported; otherwise, it is a noop.
+Prefetches have no effect on the behavior of the program but can change
+its performance characteristics.
+
+Arguments:
+""""""""""
+
+``address`` is the address to be prefetched, ``rw`` is the specifier
+determining if the fetch should be for a read (0) or write (1), and
+``locality`` is a temporal locality specifier ranging from (0) - no
+locality, to (3) - extremely local keep in cache. The ``cache type``
+specifies whether the prefetch is performed on the data (1) or
+instruction (0) cache. The ``rw``, ``locality`` and ``cache type``
+arguments must be constant integers.
+
+Semantics:
+""""""""""
+
+This intrinsic does not modify the behavior of the program. In
+particular, prefetches cannot trap and do not produce a value. On
+targets that support this intrinsic, the prefetch can provide hints to
+the processor cache for better performance.
+
+'``llvm.pcmarker``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.pcmarker(i32 <id>)
+
+Overview:
+"""""""""
+
+The '``llvm.pcmarker``' intrinsic is a method to export a Program
+Counter (PC) in a region of code to simulators and other tools. The
+method is target specific, but it is expected that the marker will use
+exported symbols to transmit the PC of the marker. The marker makes no
+guarantees that it will remain with any specific instruction after
+optimizations. It is possible that the presence of a marker will inhibit
+optimizations. The intended use is to be inserted after optimizations to
+allow correlations of simulation runs.
+
+Arguments:
+""""""""""
+
+``id`` is a numerical id identifying the marker.
+
+Semantics:
+""""""""""
+
+This intrinsic does not modify the behavior of the program. Backends
+that do not support this intrinsic may ignore it.
+
+'``llvm.readcyclecounter``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i64 @llvm.readcyclecounter()
+
+Overview:
+"""""""""
+
+The '``llvm.readcyclecounter``' intrinsic provides access to the cycle
+counter register (or similar low latency, high accuracy clocks) on those
+targets that support it. On X86, it should map to RDTSC. On Alpha, it
+should map to RPCC. As the backing counters overflow quickly (on the
+order of 9 seconds on alpha), this should only be used for small
+timings.
+
+Semantics:
+""""""""""
+
+When directly supported, reading the cycle counter should not modify any
+memory. Implementations are allowed to either return a application
+specific value or a system wide value. On backends without support, this
+is lowered to a constant 0.
+
+Standard C Library Intrinsics
+-----------------------------
+
+LLVM provides intrinsics for a few important standard C library
+functions. These intrinsics allow source-language front-ends to pass
+information about the alignment of the pointer arguments to the code
+generator, providing opportunity for more efficient code generation.
+
+.. _int_memcpy:
+
+'``llvm.memcpy``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memcpy`` on any
+integer bit width and for different address spaces. Not all targets
+support all bit widths however.
+
+::
+
+      declare void @llvm.memcpy.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                              i32 <len>, i32 <align>, i1 <isvolatile>)
+      declare void @llvm.memcpy.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                              i64 <len>, i32 <align>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memcpy.*``' intrinsics copy a block of memory from the
+source location to the destination location.
+
+Note that, unlike the standard libc function, the ``llvm.memcpy.*``
+intrinsics do not return a value, takes extra alignment/isvolatile
+arguments and the pointers can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination, the second is a
+pointer to the source. The third argument is an integer argument
+specifying the number of bytes to copy, the fourth argument is the
+alignment of the source and destination locations, and the fifth is a
+boolean indicating a volatile access.
+
+If the call to this intrinsic has an alignment value that is not 0 or 1,
+then the caller guarantees that both the source and destination pointers
+are aligned to that boundary.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memcpy.*``' intrinsics copy a block of memory from the
+source location to the destination location, which are not allowed to
+overlap. It copies "len" bytes of memory over. If the argument is known
+to be aligned to some boundary, this can be specified as the fourth
+argument, otherwise it should be set to 0 or 1.
+
+'``llvm.memmove``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use llvm.memmove on any integer
+bit width and for different address space. Not all targets support all
+bit widths however.
+
+::
+
+      declare void @llvm.memmove.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                               i32 <len>, i32 <align>, i1 <isvolatile>)
+      declare void @llvm.memmove.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                               i64 <len>, i32 <align>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memmove.*``' intrinsics move a block of memory from the
+source location to the destination location. It is similar to the
+'``llvm.memcpy``' intrinsic but allows the two memory locations to
+overlap.
+
+Note that, unlike the standard libc function, the ``llvm.memmove.*``
+intrinsics do not return a value, takes extra alignment/isvolatile
+arguments and the pointers can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination, the second is a
+pointer to the source. The third argument is an integer argument
+specifying the number of bytes to copy, the fourth argument is the
+alignment of the source and destination locations, and the fifth is a
+boolean indicating a volatile access.
+
+If the call to this intrinsic has an alignment value that is not 0 or 1,
+then the caller guarantees that the source and destination pointers are
+aligned to that boundary.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call
+is a :ref:`volatile operation <volatile>`. The detailed access behavior is
+not very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memmove.*``' intrinsics copy a block of memory from the
+source location to the destination location, which may overlap. It
+copies "len" bytes of memory over. If the argument is known to be
+aligned to some boundary, this can be specified as the fourth argument,
+otherwise it should be set to 0 or 1.
+
+'``llvm.memset.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use llvm.memset on any integer
+bit width and for different address spaces. However, not all targets
+support all bit widths.
+
+::
+
+      declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>,
+                                         i32 <len>, i32 <align>, i1 <isvolatile>)
+      declare void @llvm.memset.p0i8.i64(i8* <dest>, i8 <val>,
+                                         i64 <len>, i32 <align>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memset.*``' intrinsics fill a block of memory with a
+particular byte value.
+
+Note that, unlike the standard libc function, the ``llvm.memset``
+intrinsic does not return a value and takes extra alignment/volatile
+arguments. Also, the destination can be in an arbitrary address space.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination to fill, the second
+is the byte value with which to fill it, the third argument is an
+integer argument specifying the number of bytes to fill, and the fourth
+argument is the known alignment of the destination location.
+
+If the call to this intrinsic has an alignment value that is not 0 or 1,
+then the caller guarantees that the destination pointer is aligned to
+that boundary.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting
+at the destination location. If the argument is known to be aligned to
+some boundary, this can be specified as the fourth argument, otherwise
+it should be set to 0 or 1.
+
+'``llvm.sqrt.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.sqrt.f32(float %Val)
+      declare double    @llvm.sqrt.f64(double %Val)
+      declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
+      declare fp128     @llvm.sqrt.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sqrt``' intrinsics return the sqrt of the specified operand,
+returning the same value as the libm '``sqrt``' functions would. Unlike
+``sqrt`` in libm, however, ``llvm.sqrt`` has undefined behavior for
+negative numbers other than -0.0 (which allows for better optimization,
+because there is no need to worry about errno being set).
+``llvm.sqrt(-0.0)`` is defined to return -0.0 like IEEE sqrt.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the sqrt of the specified operand if it is a
+nonnegative floating point number.
+
+'``llvm.powi.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.powi`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.powi.f32(float  %Val, i32 %power)
+      declare double    @llvm.powi.f64(double %Val, i32 %power)
+      declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
+      declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
+      declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
+
+Overview:
+"""""""""
+
+The '``llvm.powi.*``' intrinsics return the first operand raised to the
+specified (positive or negative) power. The order of evaluation of
+multiplications is not defined. When a vector of floating point type is
+used, the second argument remains a scalar integer value.
+
+Arguments:
+""""""""""
+
+The second argument is an integer power, and the first is a value to
+raise to that power.
+
+Semantics:
+""""""""""
+
+This function returns the first value raised to the second power with an
+unspecified sequence of rounding operations.
+
+'``llvm.sin.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sin`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.sin.f32(float  %Val)
+      declare double    @llvm.sin.f64(double %Val)
+      declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
+      declare fp128     @llvm.sin.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sin.*``' intrinsics return the sine of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the sine of the specified operand, returning the
+same values as the libm ``sin`` functions would, and handles error
+conditions in the same way.
+
+'``llvm.cos.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.cos`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.cos.f32(float  %Val)
+      declare double    @llvm.cos.f64(double %Val)
+      declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
+      declare fp128     @llvm.cos.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.cos.*``' intrinsics return the cosine of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the cosine of the specified operand, returning the
+same values as the libm ``cos`` functions would, and handles error
+conditions in the same way.
+
+'``llvm.pow.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.pow`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.pow.f32(float  %Val, float %Power)
+      declare double    @llvm.pow.f64(double %Val, double %Power)
+      declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
+      declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
+      declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
+
+Overview:
+"""""""""
+
+The '``llvm.pow.*``' intrinsics return the first operand raised to the
+specified (positive or negative) power.
+
+Arguments:
+""""""""""
+
+The second argument is a floating point power, and the first is a value
+to raise to that power.
+
+Semantics:
+""""""""""
+
+This function returns the first value raised to the second power,
+returning the same values as the libm ``pow`` functions would, and
+handles error conditions in the same way.
+
+'``llvm.exp.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.exp`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.exp.f32(float  %Val)
+      declare double    @llvm.exp.f64(double %Val)
+      declare x86_fp80  @llvm.exp.f80(x86_fp80  %Val)
+      declare fp128     @llvm.exp.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.exp.*``' intrinsics perform the exp function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``exp`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.exp2.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.exp2`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.exp2.f32(float  %Val)
+      declare double    @llvm.exp2.f64(double %Val)
+      declare x86_fp80  @llvm.exp2.f80(x86_fp80  %Val)
+      declare fp128     @llvm.exp2.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.exp2.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.exp2.*``' intrinsics perform the exp2 function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``exp2`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.log.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.log`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.log.f32(float  %Val)
+      declare double    @llvm.log.f64(double %Val)
+      declare x86_fp80  @llvm.log.f80(x86_fp80  %Val)
+      declare fp128     @llvm.log.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.log.*``' intrinsics perform the log function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``log`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.log10.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.log10`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.log10.f32(float  %Val)
+      declare double    @llvm.log10.f64(double %Val)
+      declare x86_fp80  @llvm.log10.f80(x86_fp80  %Val)
+      declare fp128     @llvm.log10.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.log10.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.log10.*``' intrinsics perform the log10 function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``log10`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.log2.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.log2`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.log2.f32(float  %Val)
+      declare double    @llvm.log2.f64(double %Val)
+      declare x86_fp80  @llvm.log2.f80(x86_fp80  %Val)
+      declare fp128     @llvm.log2.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.log2.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.log2.*``' intrinsics perform the log2 function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``log2`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.fma.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.fma`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
+      declare double    @llvm.fma.f64(double %a, double %b, double %c)
+      declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
+      declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
+      declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
+
+Overview:
+"""""""""
+
+The '``llvm.fma.*``' intrinsics perform the fused multiply-add
+operation.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``fma`` functions
+would.
+
+'``llvm.fabs.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.fabs`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.fabs.f32(float  %Val)
+      declare double    @llvm.fabs.f64(double %Val)
+      declare x86_fp80  @llvm.fabs.f80(x86_fp80  %Val)
+      declare fp128     @llvm.fabs.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.fabs.*``' intrinsics return the absolute value of the
+operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``fabs`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.floor.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.floor`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.floor.f32(float  %Val)
+      declare double    @llvm.floor.f64(double %Val)
+      declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
+      declare fp128     @llvm.floor.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.floor.*``' intrinsics return the floor of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``floor`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.ceil.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.ceil`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.ceil.f32(float  %Val)
+      declare double    @llvm.ceil.f64(double %Val)
+      declare x86_fp80  @llvm.ceil.f80(x86_fp80  %Val)
+      declare fp128     @llvm.ceil.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.ceil.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.ceil.*``' intrinsics return the ceiling of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``ceil`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.trunc.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.trunc`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.trunc.f32(float  %Val)
+      declare double    @llvm.trunc.f64(double %Val)
+      declare x86_fp80  @llvm.trunc.f80(x86_fp80  %Val)
+      declare fp128     @llvm.trunc.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.trunc.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.trunc.*``' intrinsics returns the operand rounded to the
+nearest integer not larger in magnitude than the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``trunc`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.rint.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.rint`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.rint.f32(float  %Val)
+      declare double    @llvm.rint.f64(double %Val)
+      declare x86_fp80  @llvm.rint.f80(x86_fp80  %Val)
+      declare fp128     @llvm.rint.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.rint.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.rint.*``' intrinsics returns the operand rounded to the
+nearest integer. It may raise an inexact floating-point exception if the
+operand isn't an integer.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``rint`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.nearbyint.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.nearbyint`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.nearbyint.f32(float  %Val)
+      declare double    @llvm.nearbyint.f64(double %Val)
+      declare x86_fp80  @llvm.nearbyint.f80(x86_fp80  %Val)
+      declare fp128     @llvm.nearbyint.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.nearbyint.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.nearbyint.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``nearbyint``
+functions would, and handles error conditions in the same way.
+
+Bit Manipulation Intrinsics
+---------------------------
+
+LLVM provides intrinsics for a few important bit manipulation
+operations. These allow efficient code generation for some algorithms.
+
+'``llvm.bswap.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic function. You can use bswap on any
+integer type that is an even number of bytes (i.e. BitWidth % 16 == 0).
+
+::
+
+      declare i16 @llvm.bswap.i16(i16 <id>)
+      declare i32 @llvm.bswap.i32(i32 <id>)
+      declare i64 @llvm.bswap.i64(i64 <id>)
+
+Overview:
+"""""""""
+
+The '``llvm.bswap``' family of intrinsics is used to byte swap integer
+values with an even number of bytes (positive multiple of 16 bits).
+These are useful for performing operations on data that is not in the
+target's native byte order.
+
+Semantics:
+""""""""""
+
+The ``llvm.bswap.i16`` intrinsic returns an i16 value that has the high
+and low byte of the input i16 swapped. Similarly, the ``llvm.bswap.i32``
+intrinsic returns an i32 value that has the four bytes of the input i32
+swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the
+returned i32 will have its bytes in 3, 2, 1, 0 order. The
+``llvm.bswap.i48``, ``llvm.bswap.i64`` and other intrinsics extend this
+concept to additional even-byte lengths (6 bytes, 8 bytes and more,
+respectively).
+
+'``llvm.ctpop.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use llvm.ctpop on any integer
+bit width, or on any vector with integer elements. Not all targets
+support all bit widths or vector types, however.
+
+::
+
+      declare i8 @llvm.ctpop.i8(i8  <src>)
+      declare i16 @llvm.ctpop.i16(i16 <src>)
+      declare i32 @llvm.ctpop.i32(i32 <src>)
+      declare i64 @llvm.ctpop.i64(i64 <src>)
+      declare i256 @llvm.ctpop.i256(i256 <src>)
+      declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32> <src>)
+
+Overview:
+"""""""""
+
+The '``llvm.ctpop``' family of intrinsics counts the number of bits set
+in a value.
+
+Arguments:
+""""""""""
+
+The only argument is the value to be counted. The argument may be of any
+integer type, or a vector with integer elements. The return type must
+match the argument type.
+
+Semantics:
+""""""""""
+
+The '``llvm.ctpop``' intrinsic counts the 1's in a variable, or within
+each element of a vector.
+
+'``llvm.ctlz.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.ctlz`` on any
+integer bit width, or any vector whose elements are integers. Not all
+targets support all bit widths or vector types, however.
+
+::
+
+      declare i8   @llvm.ctlz.i8  (i8   <src>, i1 <is_zero_undef>)
+      declare i16  @llvm.ctlz.i16 (i16  <src>, i1 <is_zero_undef>)
+      declare i32  @llvm.ctlz.i32 (i32  <src>, i1 <is_zero_undef>)
+      declare i64  @llvm.ctlz.i64 (i64  <src>, i1 <is_zero_undef>)
+      declare i256 @llvm.ctlz.i256(i256 <src>, i1 <is_zero_undef>)
+      declase <2 x i32> @llvm.ctlz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+
+Overview:
+"""""""""
+
+The '``llvm.ctlz``' family of intrinsic functions counts the number of
+leading zeros in a variable.
+
+Arguments:
+""""""""""
+
+The first argument is the value to be counted. This argument may be of
+any integer type, or a vectory with integer element type. The return
+type must match the first argument type.
+
+The second argument must be a constant and is a flag to indicate whether
+the intrinsic should ensure that a zero as the first argument produces a
+defined result. Historically some architectures did not provide a
+defined result for zero values as efficiently, and many algorithms are
+now predicated on avoiding zero-value inputs.
+
+Semantics:
+""""""""""
+
+The '``llvm.ctlz``' intrinsic counts the leading (most significant)
+zeros in a variable, or within each element of the vector. If
+``src == 0`` then the result is the size in bits of the type of ``src``
+if ``is_zero_undef == 0`` and ``undef`` otherwise. For example,
+``llvm.ctlz(i32 2) = 30``.
+
+'``llvm.cttz.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.cttz`` on any
+integer bit width, or any vector of integer elements. Not all targets
+support all bit widths or vector types, however.
+
+::
+
+      declare i8   @llvm.cttz.i8  (i8   <src>, i1 <is_zero_undef>)
+      declare i16  @llvm.cttz.i16 (i16  <src>, i1 <is_zero_undef>)
+      declare i32  @llvm.cttz.i32 (i32  <src>, i1 <is_zero_undef>)
+      declare i64  @llvm.cttz.i64 (i64  <src>, i1 <is_zero_undef>)
+      declare i256 @llvm.cttz.i256(i256 <src>, i1 <is_zero_undef>)
+      declase <2 x i32> @llvm.cttz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+
+Overview:
+"""""""""
+
+The '``llvm.cttz``' family of intrinsic functions counts the number of
+trailing zeros.
+
+Arguments:
+""""""""""
+
+The first argument is the value to be counted. This argument may be of
+any integer type, or a vectory with integer element type. The return
+type must match the first argument type.
+
+The second argument must be a constant and is a flag to indicate whether
+the intrinsic should ensure that a zero as the first argument produces a
+defined result. Historically some architectures did not provide a
+defined result for zero values as efficiently, and many algorithms are
+now predicated on avoiding zero-value inputs.
+
+Semantics:
+""""""""""
+
+The '``llvm.cttz``' intrinsic counts the trailing (least significant)
+zeros in a variable, or within each element of a vector. If ``src == 0``
+then the result is the size in bits of the type of ``src`` if
+``is_zero_undef == 0`` and ``undef`` otherwise. For example,
+``llvm.cttz(2) = 1``.
+
+Arithmetic with Overflow Intrinsics
+-----------------------------------
+
+LLVM provides intrinsics for some arithmetic with overflow operations.
+
+'``llvm.sadd.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sadd.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.sadd.with.overflow``' family of intrinsic functions perform
+a signed addition of the two arguments, and indicate whether an overflow
+occurred during the signed summation.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo signed
+addition.
+
+Semantics:
+""""""""""
+
+The '``llvm.sadd.with.overflow``' family of intrinsic functions perform
+a signed addition of the two variables. They return a structure --- the
+first element of which is the signed summation, and the second element
+of which is a bit specifying if the signed summation resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.uadd.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.uadd.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.uadd.with.overflow``' family of intrinsic functions perform
+an unsigned addition of the two arguments, and indicate whether a carry
+occurred during the unsigned summation.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned
+addition.
+
+Semantics:
+""""""""""
+
+The '``llvm.uadd.with.overflow``' family of intrinsic functions perform
+an unsigned addition of the two arguments. They return a structure --- the
+first element of which is the sum, and the second element of which is a
+bit specifying if the unsigned summation resulted in a carry.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %carry, label %normal
+
+'``llvm.ssub.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.ssub.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.ssub.with.overflow``' family of intrinsic functions perform
+a signed subtraction of the two arguments, and indicate whether an
+overflow occurred during the signed subtraction.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo signed
+subtraction.
+
+Semantics:
+""""""""""
+
+The '``llvm.ssub.with.overflow``' family of intrinsic functions perform
+a signed subtraction of the two arguments. They return a structure --- the
+first element of which is the subtraction, and the second element of
+which is a bit specifying if the signed subtraction resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.usub.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.usub.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.usub.with.overflow``' family of intrinsic functions perform
+an unsigned subtraction of the two arguments, and indicate whether an
+overflow occurred during the unsigned subtraction.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned
+subtraction.
+
+Semantics:
+""""""""""
+
+The '``llvm.usub.with.overflow``' family of intrinsic functions perform
+an unsigned subtraction of the two arguments. They return a structure ---
+the first element of which is the subtraction, and the second element of
+which is a bit specifying if the unsigned subtraction resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.smul.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.smul.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.smul.with.overflow``' family of intrinsic functions perform
+a signed multiplication of the two arguments, and indicate whether an
+overflow occurred during the signed multiplication.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo signed
+multiplication.
+
+Semantics:
+""""""""""
+
+The '``llvm.smul.with.overflow``' family of intrinsic functions perform
+a signed multiplication of the two arguments. They return a structure ---
+the first element of which is the multiplication, and the second element
+of which is a bit specifying if the signed multiplication resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.umul.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.umul.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.umul.with.overflow``' family of intrinsic functions perform
+a unsigned multiplication of the two arguments, and indicate whether an
+overflow occurred during the unsigned multiplication.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned
+multiplication.
+
+Semantics:
+""""""""""
+
+The '``llvm.umul.with.overflow``' family of intrinsic functions perform
+an unsigned multiplication of the two arguments. They return a structure ---
+the first element of which is the multiplication, and the second
+element of which is a bit specifying if the unsigned multiplication
+resulted in an overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+Specialised Arithmetic Intrinsics
+---------------------------------
+
+'``llvm.fmuladd.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
+      declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
+
+Overview:
+"""""""""
+
+The '``llvm.fmuladd.*``' intrinsic functions represent multiply-add
+expressions that can be fused if the code generator determines that (a) the
+target instruction set has support for a fused operation, and (b) that the
+fused operation is more efficient than the equivalent, separate pair of mul
+and add instructions.
+
+Arguments:
+""""""""""
+
+The '``llvm.fmuladd.*``' intrinsics each take three arguments: two
+multiplicands, a and b, and an addend c.
+
+Semantics:
+""""""""""
+
+The expression:
+
+::
+
+      %0 = call float @llvm.fmuladd.f32(%a, %b, %c)
+
+is equivalent to the expression a \* b + c, except that rounding will
+not be performed between the multiplication and addition steps if the
+code generator fuses the operations. Fusion is not guaranteed, even if
+the target platform supports it. If a fused multiply-add is required the
+corresponding llvm.fma.\* intrinsic function should be used instead.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
+
+Half Precision Floating Point Intrinsics
+----------------------------------------
+
+For most target platforms, half precision floating point is a
+storage-only format. This means that it is a dense encoding (in memory)
+but does not support computation in the format.
+
+This means that code must first load the half-precision floating point
+value as an i16, then convert it to float with
+:ref:`llvm.convert.from.fp16 <int_convert_from_fp16>`. Computation can
+then be performed on the float value (including extending to double
+etc). To store the value back to memory, it is first converted to float
+if needed, then converted to i16 with
+:ref:`llvm.convert.to.fp16 <int_convert_to_fp16>`, then storing as an
+i16 value.
+
+.. _int_convert_to_fp16:
+
+'``llvm.convert.to.fp16``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i16 @llvm.convert.to.fp16(f32 %a)
+
+Overview:
+"""""""""
+
+The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
+from single precision floating point format to half precision floating
+point format.
+
+Arguments:
+""""""""""
+
+The intrinsic function contains single argument - the value to be
+converted.
+
+Semantics:
+""""""""""
+
+The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
+from single precision floating point format to half precision floating
+point format. The return value is an ``i16`` which contains the
+converted number.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call i16 @llvm.convert.to.fp16(f32 %a)
+      store i16 %res, i16* @x, align 2
+
+.. _int_convert_from_fp16:
+
+'``llvm.convert.from.fp16``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare f32 @llvm.convert.from.fp16(i16 %a)
+
+Overview:
+"""""""""
+
+The '``llvm.convert.from.fp16``' intrinsic function performs a
+conversion from half precision floating point format to single precision
+floating point format.
+
+Arguments:
+""""""""""
+
+The intrinsic function contains single argument - the value to be
+converted.
+
+Semantics:
+""""""""""
+
+The '``llvm.convert.from.fp16``' intrinsic function performs a
+conversion from half single precision floating point format to single
+precision floating point format. The input half-float value is
+represented by an ``i16`` value.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %a = load i16* @x, align 2
+      %res = call f32 @llvm.convert.from.fp16(i16 %a)
+
+Debugger Intrinsics
+-------------------
+
+The LLVM debugger intrinsics (which all start with ``llvm.dbg.``
+prefix), are described in the `LLVM Source Level
+Debugging <SourceLevelDebugging.html#format_common_intrinsics>`_
+document.
+
+Exception Handling Intrinsics
+-----------------------------
+
+The LLVM exception handling intrinsics (which all start with
+``llvm.eh.`` prefix), are described in the `LLVM Exception
+Handling <ExceptionHandling.html#format_common_intrinsics>`_ document.
+
+.. _int_trampoline:
+
+Trampoline Intrinsics
+---------------------
+
+These intrinsics make it possible to excise one parameter, marked with
+the :ref:`nest <nest>` attribute, from a function. The result is a
+callable function pointer lacking the nest parameter - the caller does
+not need to provide a value for it. Instead, the value to use is stored
+in advance in a "trampoline", a block of memory usually allocated on the
+stack, which also contains code to splice the nest value into the
+argument list. This is used to implement the GCC nested function address
+extension.
+
+For example, if the function is ``i32 f(i8* nest %c, i32 %x, i32 %y)``
+then the resulting function pointer has signature ``i32 (i32, i32)*``.
+It can be created as follows:
+
+.. code-block:: llvm
+
+      %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
+      %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
+      call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
+      %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
+      %fp = bitcast i8* %p to i32 (i32, i32)*
+
+The call ``%val = call i32 %fp(i32 %x, i32 %y)`` is then equivalent to
+``%val = call i32 %f(i8* %nval, i32 %x, i32 %y)``.
+
+.. _int_it:
+
+'``llvm.init.trampoline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
+
+Overview:
+"""""""""
+
+This fills the memory pointed to by ``tramp`` with executable code,
+turning it into a trampoline.
+
+Arguments:
+""""""""""
+
+The ``llvm.init.trampoline`` intrinsic takes three arguments, all
+pointers. The ``tramp`` argument must point to a sufficiently large and
+sufficiently aligned block of memory; this memory is written to by the
+intrinsic. Note that the size and the alignment are target-specific -
+LLVM currently provides no portable way of determining them, so a
+front-end that generates this intrinsic needs to have some
+target-specific knowledge. The ``func`` argument must hold a function
+bitcast to an ``i8*``.
+
+Semantics:
+""""""""""
+
+The block of memory pointed to by ``tramp`` is filled with target
+dependent code, turning it into a function. Then ``tramp`` needs to be
+passed to :ref:`llvm.adjust.trampoline <int_at>` to get a pointer which can
+be :ref:`bitcast (to a new function) and called <int_trampoline>`. The new
+function's signature is the same as that of ``func`` with any arguments
+marked with the ``nest`` attribute removed. At most one such ``nest``
+argument is allowed, and it must be of pointer type. Calling the new
+function is equivalent to calling ``func`` with the same argument list,
+but with ``nval`` used for the missing ``nest`` argument. If, after
+calling ``llvm.init.trampoline``, the memory pointed to by ``tramp`` is
+modified, then the effect of any later call to the returned function
+pointer is undefined.
+
+.. _int_at:
+
+'``llvm.adjust.trampoline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.adjust.trampoline(i8* <tramp>)
+
+Overview:
+"""""""""
+
+This performs any required machine-specific adjustment to the address of
+a trampoline (passed as ``tramp``).
+
+Arguments:
+""""""""""
+
+``tramp`` must point to a block of memory which already has trampoline
+code filled in by a previous call to
+:ref:`llvm.init.trampoline <int_it>`.
+
+Semantics:
+""""""""""
+
+On some architectures the address of the code to be executed needs to be
+different to the address where the trampoline is actually stored. This
+intrinsic returns the executable address corresponding to ``tramp``
+after performing the required machine specific adjustments. The pointer
+returned can then be :ref:`bitcast and executed <int_trampoline>`.
+
+Memory Use Markers
+------------------
+
+This class of intrinsics exists to information about the lifetime of
+memory objects and ranges where variables are immutable.
+
+'``llvm.lifetime.start``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.lifetime.start(i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.lifetime.start``' intrinsic specifies the start of a memory
+object's lifetime.
+
+Arguments:
+""""""""""
+
+The first argument is a constant integer representing the size of the
+object, or -1 if it is variable sized. The second argument is a pointer
+to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that before this point in the code, the value
+of the memory pointed to by ``ptr`` is dead. This means that it is known
+to never be used and has an undefined value. A load from the pointer
+that precedes this intrinsic can be replaced with ``'undef'``.
+
+'``llvm.lifetime.end``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.lifetime.end(i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.lifetime.end``' intrinsic specifies the end of a memory
+object's lifetime.
+
+Arguments:
+""""""""""
+
+The first argument is a constant integer representing the size of the
+object, or -1 if it is variable sized. The second argument is a pointer
+to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that after this point in the code, the value of
+the memory pointed to by ``ptr`` is dead. This means that it is known to
+never be used and has an undefined value. Any stores into the memory
+object following this intrinsic may be removed as dead.
+
+'``llvm.invariant.start``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare {}* @llvm.invariant.start(i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.invariant.start``' intrinsic specifies that the contents of
+a memory object will not change.
+
+Arguments:
+""""""""""
+
+The first argument is a constant integer representing the size of the
+object, or -1 if it is variable sized. The second argument is a pointer
+to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that until an ``llvm.invariant.end`` that uses
+the return value, the referenced memory location is constant and
+unchanging.
+
+'``llvm.invariant.end``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.invariant.end({}* <start>, i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.invariant.end``' intrinsic specifies that the contents of a
+memory object are mutable.
+
+Arguments:
+""""""""""
+
+The first argument is the matching ``llvm.invariant.start`` intrinsic.
+The second argument is a constant integer representing the size of the
+object, or -1 if it is variable sized and the third argument is a
+pointer to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that the memory is mutable again.
+
+General Intrinsics
+------------------
+
+This class of intrinsics is designed to be generic and has no specific
+purpose.
+
+'``llvm.var.annotation``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
+
+Overview:
+"""""""""
+
+The '``llvm.var.annotation``' intrinsic.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to a value, the second is a pointer to a
+global string, the third is a pointer to a global string which is the
+source file name, and the last argument is the line number.
+
+Semantics:
+""""""""""
+
+This intrinsic allows annotation of local variables with arbitrary
+strings. This can be useful for special purpose optimizations that want
+to look for these annotations. These have no other defined use; they are
+ignored by code generation and optimization.
+
+'``llvm.ptr.annotation.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use '``llvm.ptr.annotation``' on a
+pointer to an integer of any width. *NOTE* you must specify an address space for
+the pointer. The identifier for the default address space is the integer
+'``0``'.
+
+::
+
+      declare i8*   @llvm.ptr.annotation.p<address space>i8(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i16*  @llvm.ptr.annotation.p<address space>i16(i16* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i32*  @llvm.ptr.annotation.p<address space>i32(i32* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i64*  @llvm.ptr.annotation.p<address space>i64(i64* <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i256* @llvm.ptr.annotation.p<address space>i256(i256* <val>, i8* <str>, i8* <str>, i32  <int>)
+
+Overview:
+"""""""""
+
+The '``llvm.ptr.annotation``' intrinsic.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to an integer value of arbitrary bitwidth
+(result of some expression), the second is a pointer to a global string, the
+third is a pointer to a global string which is the source file name, and the
+last argument is the line number. It returns the value of the first argument.
+
+Semantics:
+""""""""""
+
+This intrinsic allows annotation of a pointer to an integer with arbitrary
+strings. This can be useful for special purpose optimizations that want to look
+for these annotations. These have no other defined use; they are ignored by code
+generation and optimization.
+
+'``llvm.annotation.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use '``llvm.annotation``' on
+any integer bit width.
+
+::
+
+      declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i32 @llvm.annotation.i32(i32 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i64 @llvm.annotation.i64(i64 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i256 @llvm.annotation.i256(i256 <val>, i8* <str>, i8* <str>, i32  <int>)
+
+Overview:
+"""""""""
+
+The '``llvm.annotation``' intrinsic.
+
+Arguments:
+""""""""""
+
+The first argument is an integer value (result of some expression), the
+second is a pointer to a global string, the third is a pointer to a
+global string which is the source file name, and the last argument is
+the line number. It returns the value of the first argument.
+
+Semantics:
+""""""""""
+
+This intrinsic allows annotations to be put on arbitrary expressions
+with arbitrary strings. This can be useful for special purpose
+optimizations that want to look for these annotations. These have no
+other defined use; they are ignored by code generation and optimization.
+
+'``llvm.trap``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.trap() noreturn nounwind
+
+Overview:
+"""""""""
+
+The '``llvm.trap``' intrinsic.
+
+Arguments:
+""""""""""
+
+None.
+
+Semantics:
+""""""""""
+
+This intrinsic is lowered to the target dependent trap instruction. If
+the target does not have a trap instruction, this intrinsic will be
+lowered to a call of the ``abort()`` function.
+
+'``llvm.debugtrap``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.debugtrap() nounwind
+
+Overview:
+"""""""""
+
+The '``llvm.debugtrap``' intrinsic.
+
+Arguments:
+""""""""""
+
+None.
+
+Semantics:
+""""""""""
+
+This intrinsic is lowered to code which is intended to cause an
+execution trap with the intention of requesting the attention of a
+debugger.
+
+'``llvm.stackprotector``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.stackprotector(i8* <guard>, i8** <slot>)
+
+Overview:
+"""""""""
+
+The ``llvm.stackprotector`` intrinsic takes the ``guard`` and stores it
+onto the stack at ``slot``. The stack slot is adjusted to ensure that it
+is placed on the stack before local variables.
+
+Arguments:
+""""""""""
+
+The ``llvm.stackprotector`` intrinsic requires two pointer arguments.
+The first argument is the value loaded from the stack guard
+``@__stack_chk_guard``. The second variable is an ``alloca`` that has
+enough space to hold the value of the guard.
+
+Semantics:
+""""""""""
+
+This intrinsic causes the prologue/epilogue inserter to force the
+position of the ``AllocaInst`` stack slot to be before local variables
+on the stack. This is to ensure that if a local variable on the stack is
+overwritten, it will destroy the value of the guard. When the function
+exits, the guard on the stack is checked against the original guard. If
+they are different, then the program aborts by calling the
+``__stack_chk_fail()`` function.
+
+'``llvm.objectsize``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 @llvm.objectsize.i32(i8* <object>, i1 <min>)
+      declare i64 @llvm.objectsize.i64(i8* <object>, i1 <min>)
+
+Overview:
+"""""""""
+
+The ``llvm.objectsize`` intrinsic is designed to provide information to
+the optimizers to determine at compile time whether a) an operation
+(like memcpy) will overflow a buffer that corresponds to an object, or
+b) that a runtime check for overflow isn't necessary. An object in this
+context means an allocation of a specific class, structure, array, or
+other object.
+
+Arguments:
+""""""""""
+
+The ``llvm.objectsize`` intrinsic takes two arguments. The first
+argument is a pointer to or into the ``object``. The second argument is
+a boolean and determines whether ``llvm.objectsize`` returns 0 (if true)
+or -1 (if false) when the object size is unknown. The second argument
+only accepts constants.
+
+Semantics:
+""""""""""
+
+The ``llvm.objectsize`` intrinsic is lowered to a constant representing
+the size of the object concerned. If the size cannot be determined at
+compile time, ``llvm.objectsize`` returns ``i32/i64 -1 or 0`` (depending
+on the ``min`` argument).
+
+'``llvm.expect``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 @llvm.expect.i32(i32 <val>, i32 <expected_val>)
+      declare i64 @llvm.expect.i64(i64 <val>, i64 <expected_val>)
+
+Overview:
+"""""""""
+
+The ``llvm.expect`` intrinsic provides information about expected (the
+most probable) value of ``val``, which can be used by optimizers.
+
+Arguments:
+""""""""""
+
+The ``llvm.expect`` intrinsic takes two arguments. The first argument is
+a value. The second argument is an expected value, this needs to be a
+constant value, variables are not allowed.
+
+Semantics:
+""""""""""
+
+This intrinsic is lowered to the ``val``.
+
+'``llvm.donothing``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.donothing() nounwind readnone
+
+Overview:
+"""""""""
+
+The ``llvm.donothing`` intrinsic doesn't perform any operation. It's the
+only intrinsic that can be called with an invoke instruction.
+
+Arguments:
+""""""""""
+
+None.
+
+Semantics:
+""""""""""
+
+This intrinsic does nothing, and it's removed by optimizers and ignored
+by codegen.
diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst
index d568c0b302ec..11f1341f5cbd 100644
--- a/docs/Lexicon.rst
+++ b/docs/Lexicon.rst
@@ -1,5 +1,3 @@
-.. _lexicon:
-
 ================
 The LLVM Lexicon
 ================
@@ -17,11 +15,28 @@ A
 **ADCE**
     Aggressive Dead Code Elimination
 
+**AST**
+    Abstract Syntax Tree.
+
+    Due to Clang's influence (mostly the fact that parsing and semantic
+    analysis are so intertwined for C and especially C++), the typical
+    working definition of AST in the LLVM community is roughly "the
+    compiler's first complete symbolic (as opposed to textual)
+    representation of an input program".
+    As such, an "AST" might be a more general graph instead of a "tree"
+    (consider the symbolic representation for the type of a typical "linked
+    list node"). This working definition is closer to what some authors
+    call an "annotated abstract syntax tree".
+
+    Consult your favorite compiler book or search engine for more details.
+
 B
 -
 
+.. _lexicon-bb-vectorization:
+
 **BB Vectorization**
-    Basic Block Vectorization
+    Basic-Block Vectorization
 
 **BURS**
     Bottom Up Rewriting System --- A method of instruction selection for code
@@ -185,6 +200,10 @@ S
 **SCCP**
     Sparse Conditional Constant Propagation
 
+**SLP**
+    Superword-Level Parallelism, same as :ref:`Basic-Block Vectorization
+    <lexicon-bb-vectorization>`.
+
 **SRoA**
     Scalar Replacement of Aggregates
 
diff --git a/docs/LinkTimeOptimization.rst b/docs/LinkTimeOptimization.rst
index 7eacf0bd0d01..c15abd325ed0 100644
--- a/docs/LinkTimeOptimization.rst
+++ b/docs/LinkTimeOptimization.rst
@@ -1,5 +1,3 @@
-.. _lto:
-
 ======================================================
 LLVM Link Time Optimization: Design and Implementation
 ======================================================
@@ -85,9 +83,10 @@ invokes system linker.
     return foo1();
   }
 
-.. code-block:: bash
+To compile, run:
+
+.. code-block:: console
 
-  --- command lines ---
   % clang -emit-llvm -c a.c -o a.o   # <-- a.o is LLVM bitcode file
   % clang -c main.c -o main.o        # <-- main.o is native object file
   % clang a.o main.o -o main         # <-- standard link command without modifications
@@ -96,7 +95,7 @@ invokes system linker.
   visible symbol defined in LLVM bitcode file. The linker completes its usual
   symbol resolution pass and finds that ``foo2()`` is not used
   anywhere. This information is used by the LLVM optimizer and it
-  removes ``foo2()``.</li>
+  removes ``foo2()``.
 
 * As soon as ``foo2()`` is removed, the optimizer recognizes that condition ``i
   < 0`` is always false, which means ``foo3()`` is never used. Hence, the
diff --git a/docs/Makefile.sphinx b/docs/Makefile.sphinx
index 81c13de9cd9e..21f66488b2b7 100644
--- a/docs/Makefile.sphinx
+++ b/docs/Makefile.sphinx
@@ -46,10 +46,6 @@ clean:
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
-	@# FIXME: Remove this `cp` once HTML->Sphinx transition is completed.
-	@# Kind of a hack, but HTML-formatted docs are on the way out anyway.
-	@echo "Copying legacy HTML-formatted docs into $(BUILDDIR)/html"
-	@cp -a *.html tutorial $(BUILDDIR)/html
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
 dirhtml:
diff --git a/docs/MakefileGuide.rst b/docs/MakefileGuide.rst
index d2bdd24a9e7f..3e9090788654 100644
--- a/docs/MakefileGuide.rst
+++ b/docs/MakefileGuide.rst
@@ -1,5 +1,3 @@
-.. _makefile_guide:
-
 ===================
 LLVM Makefile Guide
 ===================
@@ -60,7 +58,7 @@ To use the makefile system, you simply create a file named ``Makefile`` in your
 directory and declare values for certain variables.  The variables and values
 that you select determine what the makefile system will do. These variables
 enable rules and processing in the makefile system that automatically Do The
-Right Thing&trade;.
+Right Thing (C).
 
 Including Makefiles
 -------------------
@@ -170,9 +168,9 @@ openable with the ``dlopen`` function and searchable with the ``dlsym`` function
 (or your operating system's equivalents). While this isn't strictly necessary on
 Linux and a few other platforms, it is required on systems like HP-UX and
 Darwin. You should use ``LOADABLE_MODULE`` for any shared library that you
-intend to be loaded into an tool via the ``-load`` option. See the
-`WritingAnLLVMPass.html <WritingAnLLVMPass.html#makefile>`_ document for an
-example of why you might want to do this.
+intend to be loaded into an tool via the ``-load`` option.  `Pass documentation
+<writing-an-llvm-pass-makefile>`_ has an example of why you might want to do
+this.
 
 Bitcode Modules
 ^^^^^^^^^^^^^^^
@@ -241,7 +239,7 @@ and the names of the libraries you wish to link with the tool. For example:
 says that we are to build a tool name ``mytool`` and that it requires three
 libraries: ``mylib``, ``LLVMSupport.a`` and ``LLVMSystem.a``.
 
-Note that two different variables are use to indicate which libraries are
+Note that two different variables are used to indicate which libraries are
 linked: ``USEDLIBS`` and ``LLVMLIBS``. This distinction is necessary to support
 projects. ``LLVMLIBS`` refers to the LLVM libraries found in the LLVM object
 directory. ``USEDLIBS`` refers to the libraries built by your project. In the
@@ -339,7 +337,7 @@ the invocation of ``make check-local`` in the ``test`` directory. The intended
 usage for this is to assist in running specific suites of tests. If
 ``TESTSUITE`` is not set, the implementation of ``check-local`` should run all
 normal tests.  It is up to the project to define what different values for
-``TESTSUTE`` will do. See the `Testing Guide <TestingGuide.html>`_ for further
+``TESTSUTE`` will do. See the :doc:`Testing Guide <TestingGuide>` for further
 details.
 
 ``check-local``
@@ -348,9 +346,9 @@ details.
 This target should be implemented by the ``Makefile`` in the project's ``test``
 directory. It is invoked by the ``check`` target elsewhere.  Each project is
 free to define the actions of ``check-local`` as appropriate for that
-project. The LLVM project itself uses dejagnu to run a suite of feature and
-regresson tests. Other projects may choose to use dejagnu or any other testing
-mechanism.
+project. The LLVM project itself uses the :doc:`Lit <CommandGuide/lit>` testing
+tool to run a suite of feature and regression tests. Other projects may choose
+to use :program:`lit` or any other testing mechanism.
 
 ``clean``
 ---------
@@ -358,7 +356,7 @@ mechanism.
 This target cleans the build directory, recursively removing all things that the
 Makefile builds. The cleaning rules have been made guarded so they shouldn't go
 awry (via ``rm -f $(UNSET_VARIABLE)/*`` which will attempt to erase the entire
-directory structure.
+directory structure).
 
 ``clean-local``
 ---------------
@@ -606,8 +604,8 @@ system that tell it what to do for the current directory.
     the build process, such as code generators (e.g.  ``tblgen``).
 
 ``OPTIONAL_DIRS``
-    Specify a set of directories that may be built, if they exist, but its not
-    an error for them not to exist.
+    Specify a set of directories that may be built, if they exist, but it is
+    not an error for them not to exist.
 
 ``PARALLEL_DIRS``
     Specify a set of directories to build recursively and in parallel if the
@@ -701,6 +699,9 @@ The override variables are given below:
 ``CFLAGS``
     Additional flags to be passed to the 'C' compiler.
 
+``CPPFLAGS``
+    Additional flags passed to the C/C++ preprocessor.
+
 ``CXX``
     Specifies the path to the C++ compiler.
 
diff --git a/docs/MarkedUpDisassembly.rst b/docs/MarkedUpDisassembly.rst
index e1282e102ebe..cc4dbc817e08 100644
--- a/docs/MarkedUpDisassembly.rst
+++ b/docs/MarkedUpDisassembly.rst
@@ -1,5 +1,3 @@
-.. _marked_up_disassembly:
-
 =======================================
 LLVM's Optional Rich Disassembly Output
 =======================================
diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst
new file mode 100644
index 000000000000..5451619686d9
--- /dev/null
+++ b/docs/NVPTXUsage.rst
@@ -0,0 +1,276 @@
+=============================
+User Guide for NVPTX Back-end
+=============================
+
+.. contents::
+   :local:
+   :depth: 3
+
+
+Introduction
+============
+
+To support GPU programming, the NVPTX back-end supports a subset of LLVM IR
+along with a defined set of conventions used to represent GPU programming
+concepts. This document provides an overview of the general usage of the back-
+end, including a description of the conventions used and the set of accepted
+LLVM IR.
+
+.. note:: 
+   
+   This document assumes a basic familiarity with CUDA and the PTX
+   assembly language. Information about the CUDA Driver API and the PTX assembly
+   language can be found in the `CUDA documentation
+   <http://docs.nvidia.com/cuda/index.html>`_.
+
+
+
+Conventions
+===========
+
+Marking Functions as Kernels
+----------------------------
+
+In PTX, there are two types of functions: *device functions*, which are only
+callable by device code, and *kernel functions*, which are callable by host
+code. By default, the back-end will emit device functions. Metadata is used to
+declare a function as a kernel function. This metadata is attached to the
+``nvvm.annotations`` named metadata object, and has the following format:
+
+.. code-block:: llvm
+
+   !0 = metadata !{<function-ref>, metadata !"kernel", i32 1}
+
+The first parameter is a reference to the kernel function. The following
+example shows a kernel function calling a device function in LLVM IR. The
+function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
+
+.. code-block:: llvm
+
+    define float @my_fmad(float %x, float %y, float %z) {
+      %mul = fmul float %x, %y
+      %add = fadd float %mul, %z
+      ret float %add
+    }
+
+    define void @my_kernel(float* %ptr) {
+      %val = load float* %ptr
+      %ret = call float @my_fmad(float %val, float %val, float %val)
+      store float %ret, float* %ptr
+      ret void
+    }
+
+    !nvvm.annotations = !{!1}
+    !1 = metadata !{void (float*)* @my_kernel, metadata !"kernel", i32 1}
+
+When compiled, the PTX kernel functions are callable by host-side code.
+
+
+Address Spaces
+--------------
+
+The NVPTX back-end uses the following address space mapping:
+
+   ============= ======================
+   Address Space Memory Space
+   ============= ======================
+   0             Generic
+   1             Global
+   2             Internal Use
+   3             Shared
+   4             Constant
+   5             Local
+   ============= ======================
+
+Every global variable and pointer type is assigned to one of these address
+spaces, with 0 being the default address space. Intrinsics are provided which
+can be used to convert pointers between the generic and non-generic address
+spaces.
+
+As an example, the following IR will define an array ``@g`` that resides in
+global device memory.
+
+.. code-block:: llvm
+
+    @g = internal addrspace(1) global [4 x i32] [ i32 0, i32 1, i32 2, i32 3 ]
+
+LLVM IR functions can read and write to this array, and host-side code can
+copy data to it by name with the CUDA Driver API.
+
+Note that since address space 0 is the generic space, it is illegal to have
+global variables in address space 0.  Address space 0 is the default address
+space in LLVM, so the ``addrspace(N)`` annotation is *required* for global
+variables.
+
+
+NVPTX Intrinsics
+================
+
+Address Space Conversion
+------------------------
+
+'``llvm.nvvm.ptr.*.to.gen``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+These are overloaded intrinsics.  You can use these on any pointer types.
+
+.. code-block:: llvm
+
+    declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)*)
+    declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)*)
+    declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+    declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)*)
+
+Overview:
+"""""""""
+
+The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic
+address space to a generic address space pointer.
+
+Semantics:
+""""""""""
+
+These intrinsics modify the pointer value to be a valid generic address space
+pointer.
+
+
+'``llvm.nvvm.ptr.gen.to.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+These are overloaded intrinsics.  You can use these on any pointer types.
+
+.. code-block:: llvm
+
+    declare i8* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8 addrspace(1)*)
+    declare i8* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8 addrspace(3)*)
+    declare i8* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8 addrspace(4)*)
+    declare i8* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8 addrspace(5)*)
+
+Overview:
+"""""""""
+
+The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic
+address space to a pointer in the target address space.  Note that these
+intrinsics are only useful if the address space of the target address space of
+the pointer is known.  It is not legal to use address space conversion
+intrinsics to convert a pointer from one non-generic address space to another
+non-generic address space.
+
+Semantics:
+""""""""""
+
+These intrinsics modify the pointer value to be a valid pointer in the target
+non-generic address space.
+
+
+Reading PTX Special Registers
+-----------------------------
+
+'``llvm.nvvm.read.ptx.sreg.*``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+    declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+    declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+    declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+    declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.read.ptx.sreg.*``' intrinsics provide access to the PTX
+special registers, in particular the kernel launch bounds.  These registers
+map in the following way to CUDA builtins:
+
+   ============ =====================================
+   CUDA Builtin PTX Special Register Intrinsic
+   ============ =====================================
+   ``threadId`` ``@llvm.nvvm.read.ptx.sreg.tid.*``
+   ``blockIdx`` ``@llvm.nvvm.read.ptx.sreg.ctaid.*``
+   ``blockDim`` ``@llvm.nvvm.read.ptx.sreg.ntid.*``
+   ``gridDim``  ``@llvm.nvvm.read.ptx.sreg.nctaid.*``
+   ============ =====================================
+
+
+Barriers
+--------
+
+'``llvm.nvvm.barrier0``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+  declare void @llvm.nvvm.barrier0()
+
+Overview:
+"""""""""
+
+The '``@llvm.nvvm.barrier0()``' intrinsic emits a PTX ``bar.sync 0``
+instruction, equivalent to the ``__syncthreads()`` call in CUDA.
+
+
+Other Intrinsics
+----------------
+
+For the full set of NVPTX intrinsics, please see the
+``include/llvm/IR/IntrinsicsNVVM.td`` file in the LLVM source tree.
+
+
+Executing PTX
+=============
+
+The most common way to execute PTX assembly on a GPU device is to use the CUDA
+Driver API. This API is a low-level interface to the GPU driver and allows for
+JIT compilation of PTX code to native GPU machine code.
+
+Initializing the Driver API:
+
+.. code-block:: c++
+
+    CUdevice device;
+    CUcontext context;
+
+    // Initialize the driver API
+    cuInit(0);
+    // Get a handle to the first compute device
+    cuDeviceGet(&device, 0);
+    // Create a compute device context
+    cuCtxCreate(&context, 0, device);
+
+JIT compiling a PTX string to a device binary:
+
+.. code-block:: c++
+
+    CUmodule module;
+    CUfunction funcion;
+
+    // JIT compile a null-terminated PTX string
+    cuModuleLoadData(&module, (void*)PTXString);
+
+    // Get a handle to the "myfunction" kernel function
+    cuModuleGetFunction(&function, module, "myfunction");
+
+For full examples of executing PTX assembly, please see the `CUDA Samples
+<https://developer.nvidia.com/cuda-downloads>`_ distribution.
diff --git a/docs/Packaging.rst b/docs/Packaging.rst
index 6e74158d7213..7c2dc956128e 100644
--- a/docs/Packaging.rst
+++ b/docs/Packaging.rst
@@ -1,5 +1,3 @@
-.. _packaging:
-
 ========================
 Advice on Packaging LLVM
 ========================
diff --git a/docs/Passes.html b/docs/Passes.html
deleted file mode 100644
index 16e8bd6f6b13..000000000000
--- a/docs/Passes.html
+++ /dev/null
@@ -1,2049 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <title>LLVM's Analysis and Transform Passes</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-</head>
-<body>
-
-<!--
-
-If Passes.html is up to date, the following "one-liner" should print
-an empty diff.
-
-egrep -e '^<tr><td><a href="#.*">-.*</a></td><td>.*</td></tr>$' \
-      -e '^  <a name=".*">.*</a>$' < Passes.html >html; \
-perl >help <<'EOT' && diff -u help html; rm -f help html
-open HTML, "<Passes.html" or die "open: Passes.html: $!\n";
-while (<HTML>) {
-  m:^<tr><td><a href="#(.*)">-.*</a></td><td>.*</td></tr>$: or next;
-  $order{$1} = sprintf("%03d", 1 + int %order);
-}
-open HELP, "../Release/bin/opt -help|" or die "open: opt -help: $!\n";
-while (<HELP>) {
-  m:^    -([^ ]+) +- (.*)$: or next;
-  my $o = $order{$1};
-  $o = "000" unless defined $o;
-  push @x, "$o<tr><td><a href=\"#$1\">-$1</a></td><td>$2</td></tr>\n";
-  push @y, "$o  <a name=\"$1\">-$1: $2</a>\n";
-}
-@x = map { s/^\d\d\d//; $_ } sort @x;
-@y = map { s/^\d\d\d//; $_ } sort @y;
-print @x, @y;
-EOT
-
-This (real) one-liner can also be helpful when converting comments to HTML:
-
-perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !$on && $_ =~ /\S/; print "  </p>\n" if $on && $_ =~ /^\s*$/; print "  $_\n"; $on = ($_ =~ /\S/); } print "  </p>\n" if $on'
-
-  -->
-
-<h1>LLVM's Analysis and Transform Passes</h1>
-
-<ol>
-  <li><a href="#intro">Introduction</a></li>
-  <li><a href="#analyses">Analysis Passes</a>
-  <li><a href="#transforms">Transform Passes</a></li>
-  <li><a href="#utilities">Utility Passes</a></li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
-            and Gordon Henriksen</p>
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="intro">Introduction</a></h2>
-<div>
-  <p>This document serves as a high level summary of the optimization features 
-  that LLVM provides. Optimizations are implemented as Passes that traverse some
-  portion of a program to either collect information or transform the program.
-  The table below divides the passes that LLVM provides into three categories.
-  Analysis passes compute information that other passes can use or for debugging
-  or program visualization purposes. Transform passes can use (or invalidate)
-  the analysis passes. Transform passes all mutate the program in some way. 
-  Utility passes provides some utility but don't otherwise fit categorization.
-  For example passes to extract functions to bitcode or write a module to
-  bitcode are neither analysis nor transform passes.
-  <p>The table below provides a quick summary of each pass and links to the more
-  complete pass description later in the document.</p>
-
-<table>
-<tr><th colspan="2"><b>ANALYSIS PASSES</b></th></tr>
-<tr><th>Option</th><th>Name</th></tr>
-<tr><td><a href="#aa-eval">-aa-eval</a></td><td>Exhaustive Alias Analysis Precision Evaluator</td></tr>
-<tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (stateless AA impl)</td></tr>
-<tr><td><a href="#basiccg">-basiccg</a></td><td>Basic CallGraph Construction</td></tr>
-<tr><td><a href="#count-aa">-count-aa</a></td><td>Count Alias Analysis Query Responses</td></tr>
-<tr><td><a href="#da">-da</a></td><td>Dependence Analysis</td></tr>
-<tr><td><a href="#debug-aa">-debug-aa</a></td><td>AA use debugger</td></tr>
-<tr><td><a href="#domfrontier">-domfrontier</a></td><td>Dominance Frontier Construction</td></tr>
-<tr><td><a href="#domtree">-domtree</a></td><td>Dominator Tree Construction</td></tr>
-<tr><td><a href="#dot-callgraph">-dot-callgraph</a></td><td>Print Call Graph to 'dot' file</td></tr>
-<tr><td><a href="#dot-cfg">-dot-cfg</a></td><td>Print CFG of function to 'dot' file</td></tr>
-<tr><td><a href="#dot-cfg-only">-dot-cfg-only</a></td><td>Print CFG of function to 'dot' file (with no function bodies)</td></tr>
-<tr><td><a href="#dot-dom">-dot-dom</a></td><td>Print dominance tree of function to 'dot' file</td></tr>
-<tr><td><a href="#dot-dom-only">-dot-dom-only</a></td><td>Print dominance tree of function to 'dot' file (with no function bodies)</td></tr>
-<tr><td><a href="#dot-postdom">-dot-postdom</a></td><td>Print postdominance tree of function to 'dot' file</td></tr>
-<tr><td><a href="#dot-postdom-only">-dot-postdom-only</a></td><td>Print postdominance tree of function to 'dot' file (with no function bodies)</td></tr>
-<tr><td><a href="#globalsmodref-aa">-globalsmodref-aa</a></td><td>Simple mod/ref analysis for globals</td></tr>
-<tr><td><a href="#instcount">-instcount</a></td><td>Counts the various types of Instructions</td></tr>
-<tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr>
-<tr><td><a href="#iv-users">-iv-users</a></td><td>Induction Variable Users</td></tr>
-<tr><td><a href="#lazy-value-info">-lazy-value-info</a></td><td>Lazy Value Information Analysis</td></tr>
-<tr><td><a href="#libcall-aa">-libcall-aa</a></td><td>LibCall Alias Analysis</td></tr>
-<tr><td><a href="#lint">-lint</a></td><td>Statically lint-checks LLVM IR</td></tr>
-<tr><td><a href="#loops">-loops</a></td><td>Natural Loop Information</td></tr>
-<tr><td><a href="#memdep">-memdep</a></td><td>Memory Dependence Analysis</td></tr>
-<tr><td><a href="#module-debuginfo">-module-debuginfo</a></td><td>Decodes module-level debug info</td></tr>
-<tr><td><a href="#no-aa">-no-aa</a></td><td>No Alias Analysis (always returns 'may' alias)</td></tr>
-<tr><td><a href="#no-profile">-no-profile</a></td><td>No Profile Information</td></tr>
-<tr><td><a href="#postdomtree">-postdomtree</a></td><td>Post-Dominator Tree Construction</td></tr>
-<tr><td><a href="#print-alias-sets">-print-alias-sets</a></td><td>Alias Set Printer</td></tr>
-<tr><td><a href="#print-callgraph">-print-callgraph</a></td><td>Print a call graph</td></tr>
-<tr><td><a href="#print-callgraph-sccs">-print-callgraph-sccs</a></td><td>Print SCCs of the Call Graph</td></tr>
-<tr><td><a href="#print-cfg-sccs">-print-cfg-sccs</a></td><td>Print SCCs of each function CFG</td></tr>
-<tr><td><a href="#print-dbginfo">-print-dbginfo</a></td><td>Print debug info in human readable form</td></tr>
-<tr><td><a href="#print-dom-info">-print-dom-info</a></td><td>Dominator Info Printer</td></tr>
-<tr><td><a href="#print-externalfnconstants">-print-externalfnconstants</a></td><td>Print external fn callsites passed constants</td></tr>
-<tr><td><a href="#print-function">-print-function</a></td><td>Print function to stderr</td></tr>
-<tr><td><a href="#print-module">-print-module</a></td><td>Print module to stderr</td></tr>
-<tr><td><a href="#print-used-types">-print-used-types</a></td><td>Find Used Types</td></tr>
-<tr><td><a href="#profile-estimator">-profile-estimator</a></td><td>Estimate profiling information</td></tr>
-<tr><td><a href="#profile-loader">-profile-loader</a></td><td>Load profile information from llvmprof.out</td></tr>
-<tr><td><a href="#profile-verifier">-profile-verifier</a></td><td>Verify profiling information</td></tr>
-<tr><td><a href="#regions">-regions</a></td><td>Detect single entry single exit regions</td></tr>
-<tr><td><a href="#scalar-evolution">-scalar-evolution</a></td><td>Scalar Evolution Analysis</td></tr>
-<tr><td><a href="#scev-aa">-scev-aa</a></td><td>ScalarEvolution-based Alias Analysis</td></tr>
-<tr><td><a href="#targetdata">-targetdata</a></td><td>Target Data Layout</td></tr>
-
-
-<tr><th colspan="2"><b>TRANSFORM PASSES</b></th></tr>
-<tr><th>Option</th><th>Name</th></tr>
-<tr><td><a href="#adce">-adce</a></td><td>Aggressive Dead Code Elimination</td></tr>
-<tr><td><a href="#always-inline">-always-inline</a></td><td>Inliner for always_inline functions</td></tr>
-<tr><td><a href="#argpromotion">-argpromotion</a></td><td>Promote 'by reference' arguments to scalars</td></tr>
-<tr><td><a href="#bb-vectorize">-bb-vectorize</a></td><td>Combine instructions to form vector instructions within basic blocks</td></tr>
-<tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
-<tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break critical edges in CFG</td></tr>
-<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
-<tr><td><a href="#constmerge">-constmerge</a></td><td>Merge Duplicate Global Constants</td></tr>
-<tr><td><a href="#constprop">-constprop</a></td><td>Simple constant propagation</td></tr>
-<tr><td><a href="#dce">-dce</a></td><td>Dead Code Elimination</td></tr>
-<tr><td><a href="#deadargelim">-deadargelim</a></td><td>Dead Argument Elimination</td></tr>
-<tr><td><a href="#deadtypeelim">-deadtypeelim</a></td><td>Dead Type Elimination</td></tr>
-<tr><td><a href="#die">-die</a></td><td>Dead Instruction Elimination</td></tr>
-<tr><td><a href="#dse">-dse</a></td><td>Dead Store Elimination</td></tr>
-<tr><td><a href="#functionattrs">-functionattrs</a></td><td>Deduce function attributes</td></tr>
-<tr><td><a href="#globaldce">-globaldce</a></td><td>Dead Global Elimination</td></tr>
-<tr><td><a href="#globalopt">-globalopt</a></td><td>Global Variable Optimizer</td></tr>
-<tr><td><a href="#gvn">-gvn</a></td><td>Global Value Numbering</td></tr>
-<tr><td><a href="#indvars">-indvars</a></td><td>Canonicalize Induction Variables</td></tr>
-<tr><td><a href="#inline">-inline</a></td><td>Function Integration/Inlining</td></tr>
-<tr><td><a href="#insert-edge-profiling">-insert-edge-profiling</a></td><td>Insert instrumentation for edge profiling</td></tr>
-<tr><td><a href="#insert-optimal-edge-profiling">-insert-optimal-edge-profiling</a></td><td>Insert optimal instrumentation for edge profiling</td></tr>
-<tr><td><a href="#instcombine">-instcombine</a></td><td>Combine redundant instructions</td></tr>
-<tr><td><a href="#internalize">-internalize</a></td><td>Internalize Global Symbols</td></tr>
-<tr><td><a href="#ipconstprop">-ipconstprop</a></td><td>Interprocedural constant propagation</td></tr>
-<tr><td><a href="#ipsccp">-ipsccp</a></td><td>Interprocedural Sparse Conditional Constant Propagation</td></tr>
-<tr><td><a href="#jump-threading">-jump-threading</a></td><td>Jump Threading</td></tr>
-<tr><td><a href="#lcssa">-lcssa</a></td><td>Loop-Closed SSA Form Pass</td></tr>
-<tr><td><a href="#licm">-licm</a></td><td>Loop Invariant Code Motion</td></tr>
-<tr><td><a href="#loop-deletion">-loop-deletion</a></td><td>Delete dead loops</td></tr>
-<tr><td><a href="#loop-extract">-loop-extract</a></td><td>Extract loops into new functions</td></tr>
-<tr><td><a href="#loop-extract-single">-loop-extract-single</a></td><td>Extract at most one loop into a new function</td></tr>
-<tr><td><a href="#loop-reduce">-loop-reduce</a></td><td>Loop Strength Reduction</td></tr>
-<tr><td><a href="#loop-rotate">-loop-rotate</a></td><td>Rotate Loops</td></tr>
-<tr><td><a href="#loop-simplify">-loop-simplify</a></td><td>Canonicalize natural loops</td></tr>
-<tr><td><a href="#loop-unroll">-loop-unroll</a></td><td>Unroll loops</td></tr>
-<tr><td><a href="#loop-unswitch">-loop-unswitch</a></td><td>Unswitch loops</td></tr>
-<tr><td><a href="#loweratomic">-loweratomic</a></td><td>Lower atomic intrinsics to non-atomic form</td></tr>
-<tr><td><a href="#lowerinvoke">-lowerinvoke</a></td><td>Lower invoke and unwind, for unwindless code generators</td></tr>
-<tr><td><a href="#lowerswitch">-lowerswitch</a></td><td>Lower SwitchInst's to branches</td></tr>
-<tr><td><a href="#mem2reg">-mem2reg</a></td><td>Promote Memory to Register</td></tr>
-<tr><td><a href="#memcpyopt">-memcpyopt</a></td><td>MemCpy Optimization</td></tr>
-<tr><td><a href="#mergefunc">-mergefunc</a></td><td>Merge Functions</td></tr>
-<tr><td><a href="#mergereturn">-mergereturn</a></td><td>Unify function exit nodes</td></tr>
-<tr><td><a href="#partial-inliner">-partial-inliner</a></td><td>Partial Inliner</td></tr>
-<tr><td><a href="#prune-eh">-prune-eh</a></td><td>Remove unused exception handling info</td></tr>
-<tr><td><a href="#reassociate">-reassociate</a></td><td>Reassociate expressions</td></tr>
-<tr><td><a href="#reg2mem">-reg2mem</a></td><td>Demote all values to stack slots</td></tr>
-<tr><td><a href="#scalarrepl">-scalarrepl</a></td><td>Scalar Replacement of Aggregates (DT)</td></tr>
-<tr><td><a href="#sccp">-sccp</a></td><td>Sparse Conditional Constant Propagation</td></tr>
-<tr><td><a href="#simplify-libcalls">-simplify-libcalls</a></td><td>Simplify well-known library calls</td></tr>
-<tr><td><a href="#simplifycfg">-simplifycfg</a></td><td>Simplify the CFG</td></tr>
-<tr><td><a href="#sink">-sink</a></td><td>Code sinking</td></tr>
-<tr><td><a href="#sretpromotion">-sretpromotion</a></td><td>Promote sret arguments to multiple ret values</td></tr>
-<tr><td><a href="#strip">-strip</a></td><td>Strip all symbols from a module</td></tr>
-<tr><td><a href="#strip-dead-debug-info">-strip-dead-debug-info</a></td><td>Strip debug info for unused symbols</td></tr>
-<tr><td><a href="#strip-dead-prototypes">-strip-dead-prototypes</a></td><td>Strip Unused Function Prototypes</td></tr>
-<tr><td><a href="#strip-debug-declare">-strip-debug-declare</a></td><td>Strip all llvm.dbg.declare intrinsics</td></tr>
-<tr><td><a href="#strip-nondebug">-strip-nondebug</a></td><td>Strip all symbols, except dbg symbols, from a module</td></tr>
-<tr><td><a href="#tailcallelim">-tailcallelim</a></td><td>Tail Call Elimination</td></tr>
-
-
-<tr><th colspan="2"><b>UTILITY PASSES</b></th></tr>
-<tr><th>Option</th><th>Name</th></tr>
-<tr><td><a href="#deadarghaX0r">-deadarghaX0r</a></td><td>Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)</td></tr>
-<tr><td><a href="#extract-blocks">-extract-blocks</a></td><td>Extract Basic Blocks From Module (for bugpoint use)</td></tr>
-<tr><td><a href="#instnamer">-instnamer</a></td><td>Assign names to anonymous instructions</td></tr>
-<tr><td><a href="#preverify">-preverify</a></td><td>Preliminary module verification</td></tr>
-<tr><td><a href="#verify">-verify</a></td><td>Module Verifier</td></tr>
-<tr><td><a href="#view-cfg">-view-cfg</a></td><td>View CFG of function</td></tr>
-<tr><td><a href="#view-cfg-only">-view-cfg-only</a></td><td>View CFG of function (with no function bodies)</td></tr>
-<tr><td><a href="#view-dom">-view-dom</a></td><td>View dominance tree of function</td></tr>
-<tr><td><a href="#view-dom-only">-view-dom-only</a></td><td>View dominance tree of function (with no function bodies)</td></tr>
-<tr><td><a href="#view-postdom">-view-postdom</a></td><td>View postdominance tree of function</td></tr>
-<tr><td><a href="#view-postdom-only">-view-postdom-only</a></td><td>View postdominance tree of function (with no function bodies)</td></tr>
-</table>
-
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="analyses">Analysis Passes</a></h2>
-<div>
-  <p>This section describes the LLVM Analysis Passes.</p>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="aa-eval">-aa-eval: Exhaustive Alias Analysis Precision Evaluator</a>
-</h3>
-<div>
-  <p>This is a simple N^2 alias analysis accuracy evaluator.
-  Basically, for each function in the program, it simply queries to see how the
-  alias analysis implementation answers alias queries between each pair of
-  pointers in the function.</p>
-
-  <p>This is inspired and adapted from code by: Naveen Neelakantam, Francesco
-  Spadini, and Wojciech Stryjewski.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="basicaa">-basicaa: Basic Alias Analysis (stateless AA impl)</a>
-</h3>
-<div>
-  <p>A basic alias analysis pass that implements identities (two different
-  globals cannot alias, etc), but does no stateful analysis.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="basiccg">-basiccg: Basic CallGraph Construction</a>
-</h3>
-<div>
-  <p>Yet to be written.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="count-aa">-count-aa: Count Alias Analysis Query Responses</a>
-</h3>
-<div>
-  <p>
-  A pass which can be used to count how many alias queries
-  are being made and how the alias analysis implementation being used responds.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="da">-da: Dependence Analysis</a>
-</h3>
-<div>
-  <p>Dependence analysis framework, which is used to detect dependences in
-  memory accesses.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="debug-aa">-debug-aa: AA use debugger</a>
-</h3>
-<div>
-  <p>
-  This simple pass checks alias analysis users to ensure that if they
-  create a new value, they do not query AA without informing it of the value.
-  It acts as a shim over any other AA pass you want.
-  </p>
-  
-  <p>
-  Yes keeping track of every value in the program is expensive, but this is 
-  a debugging pass.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="domfrontier">-domfrontier: Dominance Frontier Construction</a>
-</h3>
-<div>
-  <p>
-  This pass is a simple dominator construction algorithm for finding forward
-  dominator frontiers.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="domtree">-domtree: Dominator Tree Construction</a>
-</h3>
-<div>
-  <p>
-  This pass is a simple dominator construction algorithm for finding forward
-  dominators.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-callgraph">-dot-callgraph: Print Call Graph to 'dot' file</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the call graph into a
-  <code>.dot</code> graph.  This graph can then be processed with the "dot" tool
-  to convert it to postscript or some other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-cfg">-dot-cfg: Print CFG of function to 'dot' file</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the control flow graph
-  into a <code>.dot</code> graph.  This graph can then be processed with the
-  "dot" tool to convert it to postscript or some other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-cfg-only">-dot-cfg-only: Print CFG of function to 'dot' file (with no function bodies)</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the control flow graph
-  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
-  then be processed with the "dot" tool to convert it to postscript or some
-  other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-dom">-dot-dom: Print dominance tree of function to 'dot' file</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the dominator tree
-  into a <code>.dot</code> graph.  This graph can then be processed with the
-  "dot" tool to convert it to postscript or some other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-dom-only">-dot-dom-only: Print dominance tree of function to 'dot' file (with no function bodies)</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the dominator tree
-  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
-  then be processed with the "dot" tool to convert it to postscript or some
-  other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-postdom">-dot-postdom: Print postdominance tree of function to 'dot' file</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the post dominator tree
-  into a <code>.dot</code> graph.  This graph can then be processed with the
-  "dot" tool to convert it to postscript or some other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dot-postdom-only">-dot-postdom-only: Print postdominance tree of function to 'dot' file (with no function bodies)</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the post dominator tree
-  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
-  then be processed with the "dot" tool to convert it to postscript or some
-  other suitable format.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="globalsmodref-aa">-globalsmodref-aa: Simple mod/ref analysis for globals</a>
-</h3>
-<div>
-  <p>
-  This simple pass provides alias and mod/ref information for global values
-  that do not have their address taken, and keeps track of whether functions
-  read or write memory (are "pure").  For this simple (but very common) case,
-  we can provide pretty accurate and useful information.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="instcount">-instcount: Counts the various types of Instructions</a>
-</h3>
-<div>
-  <p>
-  This pass collects the count of all instructions and reports them
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="intervals">-intervals: Interval Partition Construction</a>
-</h3>
-<div>
-  <p>
-  This analysis calculates and represents the interval partition of a function,
-  or a preexisting interval partition.
-  </p>
-  
-  <p>
-  In this way, the interval partition may be used to reduce a flow graph down
-  to its degenerate single node interval partition (unless it is irreducible).
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="iv-users">-iv-users: Induction Variable Users</a>
-</h3>
-<div>
-  <p>Bookkeeping for "interesting" users of expressions computed from 
-  induction variables.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="lazy-value-info">-lazy-value-info: Lazy Value Information Analysis</a>
-</h3>
-<div>
-  <p>Interface for lazy computation of value constraint information.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="libcall-aa">-libcall-aa: LibCall Alias Analysis</a>
-</h3>
-<div>
-  <p>LibCall Alias Analysis.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="lint">-lint: Statically lint-checks LLVM IR</a>
-</h3>
-<div>
-  <p>This pass statically checks for common and easily-identified constructs
-  which produce undefined or likely unintended behavior in LLVM IR.</p>
- 
-  <p>It is not a guarantee of correctness, in two ways. First, it isn't
-  comprehensive. There are checks which could be done statically which are
-  not yet implemented. Some of these are indicated by TODO comments, but
-  those aren't comprehensive either. Second, many conditions cannot be
-  checked statically. This pass does no dynamic instrumentation, so it
-  can't check for all possible problems.</p>
-  
-  <p>Another limitation is that it assumes all code will be executed. A store
-  through a null pointer in a basic block which is never reached is harmless,
-  but this pass will warn about it anyway.</p>
- 
-  <p>Optimization passes may make conditions that this pass checks for more or
-  less obvious. If an optimization pass appears to be introducing a warning,
-  it may be that the optimization pass is merely exposing an existing
-  condition in the code.</p>
-  
-  <p>This code may be run before instcombine. In many cases, instcombine checks
-  for the same kinds of things and turns instructions with undefined behavior
-  into unreachable (or equivalent). Because of this, this pass makes some
-  effort to look through bitcasts and so on.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loops">-loops: Natural Loop Information</a>
-</h3>
-<div>
-  <p>
-  This analysis is used to identify natural loops and determine the loop depth
-  of various nodes of the CFG.  Note that the loops identified may actually be
-  several natural loops that share the same header node... not just a single
-  natural loop.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="memdep">-memdep: Memory Dependence Analysis</a>
-</h3>
-<div>
-  <p>
-  An analysis that determines, for a given memory operation, what preceding 
-  memory operations it depends on.  It builds on alias analysis information, and 
-  tries to provide a lazy, caching interface to a common kind of alias 
-  information query.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="module-debuginfo">-module-debuginfo: Decodes module-level debug info</a>
-</h3>
-<div>
-  <p>This pass decodes the debug info metadata in a module and prints in a
- (sufficiently-prepared-) human-readable form.
-
- For example, run this pass from opt along with the -analyze option, and
- it'll print to standard output.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="no-aa">-no-aa: No Alias Analysis (always returns 'may' alias)</a>
-</h3>
-<div>
-  <p>
-  This is the default implementation of the Alias Analysis interface. It always
-  returns "I don't know" for alias queries.  NoAA is unlike other alias analysis
-  implementations, in that it does not chain to a previous analysis. As such it
-  doesn't follow many of the rules that other alias analyses must.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="no-profile">-no-profile: No Profile Information</a>
-</h3>
-<div>
-  <p>
-  The default "no profile" implementation of the abstract
-  <code>ProfileInfo</code> interface.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="postdomfrontier">-postdomfrontier: Post-Dominance Frontier Construction</a>
-</h3>
-<div>
-  <p>
-  This pass is a simple post-dominator construction algorithm for finding
-  post-dominator frontiers.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="postdomtree">-postdomtree: Post-Dominator Tree Construction</a>
-</h3>
-<div>
-  <p>
-  This pass is a simple post-dominator construction algorithm for finding
-  post-dominators.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-alias-sets">-print-alias-sets: Alias Set Printer</a>
-</h3>
-<div>
-  <p>Yet to be written.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-callgraph">-print-callgraph: Print a call graph</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the call graph to
-  standard error in a human-readable form.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-callgraph-sccs">-print-callgraph-sccs: Print SCCs of the Call Graph</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the SCCs of the call
-  graph to standard error in a human-readable form.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-cfg-sccs">-print-cfg-sccs: Print SCCs of each function CFG</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints the SCCs of each
-  function CFG to standard error in a human-readable form.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-dbginfo">-print-dbginfo: Print debug info in human readable form</a>
-</h3>
-<div>
-  <p>Pass that prints instructions, and associated debug info:</p>
-  <ul>
-  
-  <li>source/line/col information</li>
-  <li>original variable name</li>
-  <li>original type name</li>
-  </ul>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-dom-info">-print-dom-info: Dominator Info Printer</a>
-</h3>
-<div>
-  <p>Dominator Info Printer.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-externalfnconstants">-print-externalfnconstants: Print external fn callsites passed constants</a>
-</h3>
-<div>
-  <p>
-  This pass, only available in <code>opt</code>, prints out call sites to
-  external functions that are called with constant arguments.  This can be
-  useful when looking for standard library functions we should constant fold
-  or handle in alias analyses.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-function">-print-function: Print function to stderr</a>
-</h3>
-<div>
-  <p>
-  The <code>PrintFunctionPass</code> class is designed to be pipelined with
-  other <code>FunctionPass</code>es, and prints out the functions of the module
-  as they are processed.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-module">-print-module: Print module to stderr</a>
-</h3>
-<div>
-  <p>
-  This pass simply prints out the entire module when it is executed.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="print-used-types">-print-used-types: Find Used Types</a>
-</h3>
-<div>
-  <p>
-  This pass is used to seek out all of the types in use by the program.  Note
-  that this analysis explicitly does not include types only used by the symbol
-  table.
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="profile-estimator">-profile-estimator: Estimate profiling information</a>
-</h3>
-<div>
-  <p>Profiling information that estimates the profiling information 
-  in a very crude and unimaginative way.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="profile-loader">-profile-loader: Load profile information from llvmprof.out</a>
-</h3>
-<div>
-  <p>
-  A concrete implementation of profiling information that loads the information
-  from a profile dump file.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="profile-verifier">-profile-verifier: Verify profiling information</a>
-</h3>
-<div>
-  <p>Pass that checks profiling information for plausibility.</p>
-</div>
-<h3>
-  <a name="regions">-regions: Detect single entry single exit regions</a>
-</h3>
-<div>
-  <p>
-  The <code>RegionInfo</code> pass detects single entry single exit regions in a
-  function, where a region is defined as any subgraph that is connected to the
-  remaining graph at only two spots. Furthermore, an hierarchical region tree is
-  built.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="scalar-evolution">-scalar-evolution: Scalar Evolution Analysis</a>
-</h3>
-<div>
-  <p>
-  The <code>ScalarEvolution</code> analysis can be used to analyze and
-  catagorize scalar expressions in loops.  It specializes in recognizing general
-  induction variables, representing them with the abstract and opaque
-  <code>SCEV</code> class.  Given this analysis, trip counts of loops and other
-  important properties can be obtained.
-  </p>
-  
-  <p>
-  This analysis is primarily useful for induction variable substitution and
-  strength reduction.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="scev-aa">-scev-aa: ScalarEvolution-based Alias Analysis</a>
-</h3>
-<div>
-  <p>Simple alias analysis implemented in terms of ScalarEvolution queries.
- 
-  This differs from traditional loop dependence analysis in that it tests
-  for dependencies within a single iteration of a loop, rather than
-  dependencies between different iterations.
- 
-  ScalarEvolution has a more complete understanding of pointer arithmetic
-  than BasicAliasAnalysis' collection of ad-hoc analyses.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="targetdata">-targetdata: Target Data Layout</a>
-</h3>
-<div>
-  <p>Provides other passes access to information on how the size and alignment
-  required by the target ABI for various data types.</p>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="transforms">Transform Passes</a></h2>
-<div>
-  <p>This section describes the LLVM Transform Passes.</p>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="adce">-adce: Aggressive Dead Code Elimination</a>
-</h3>
-<div>
-  <p>ADCE aggressively tries to eliminate code. This pass is similar to
-  <a href="#dce">DCE</a> but it assumes that values are dead until proven 
-  otherwise. This is similar to <a href="#sccp">SCCP</a>, except applied to 
-  the liveness of values.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="always-inline">-always-inline: Inliner for always_inline functions</a>
-</h3>
-<div>
-  <p>A custom inliner that handles only functions that are marked as 
-  "always inline".</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="argpromotion">-argpromotion: Promote 'by reference' arguments to scalars</a>
-</h3>
-<div>
-  <p>
-  This pass promotes "by reference" arguments to be "by value" arguments.  In
-  practice, this means looking for internal functions that have pointer
-  arguments.  If it can prove, through the use of alias analysis, that an
-  argument is *only* loaded, then it can pass the value into the function
-  instead of the address of the value.  This can cause recursive simplification
-  of code and lead to the elimination of allocas (especially in C++ template
-  code like the STL).
-  </p>
-  
-  <p>
-  This pass also handles aggregate arguments that are passed into a function,
-  scalarizing them if the elements of the aggregate are only loaded.  Note that
-  it refuses to scalarize aggregates which would require passing in more than
-  three operands to the function, because passing thousands of operands for a
-  large array or structure is unprofitable!
-  </p>
-  
-  <p>
-  Note that this transformation could also be done for arguments that are only
-  stored to (returning the value instead), but does not currently.  This case
-  would be best handled when and if LLVM starts supporting multiple return
-  values from functions.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="bb-vectorize">-bb-vectorize: Basic-Block Vectorization</a>
-</h3>
-<div>
-  <p>This pass combines instructions inside basic blocks to form vector
-  instructions. It iterates over each basic block, attempting to pair
-  compatible instructions, repeating this process until no additional
-  pairs are selected for vectorization. When the outputs of some pair
-  of compatible instructions are used as inputs by some other pair of
-  compatible instructions, those pairs are part of a potential
-  vectorization chain. Instruction pairs are only fused into vector
-  instructions when they are part of a chain longer than some
-  threshold length. Moreover, the pass attempts to find the best
-  possible chain for each pair of compatible instructions. These
-  heuristics are intended to prevent vectorization in cases where
-  it would not yield a performance increase of the resulting code.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="block-placement">-block-placement: Profile Guided Basic Block Placement</a>
-</h3>
-<div>
-  <p>This pass is a very simple profile guided basic block placement algorithm.
-  The idea is to put frequently executed blocks together at the start of the
-  function and hopefully increase the number of fall-through conditional
-  branches.  If there is no profile information for a particular function, this
-  pass basically orders blocks in depth-first order.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="break-crit-edges">-break-crit-edges: Break critical edges in CFG</a>
-</h3>
-<div>
-  <p>
-  Break all of the critical edges in the CFG by inserting a dummy basic block.
-  It may be "required" by passes that cannot deal with critical edges. This
-  transformation obviously invalidates the CFG, but can update forward dominator
-  (set, immediate dominators, tree, and frontier) information.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="codegenprepare">-codegenprepare: Optimize for code generation</a>
-</h3>
-<div>
-  This pass munges the code in the input function to better prepare it for
-  SelectionDAG-based code generation. This works around limitations in it's
-  basic-block-at-a-time approach. It should eventually be removed.
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="constmerge">-constmerge: Merge Duplicate Global Constants</a>
-</h3>
-<div>
-  <p>
-  Merges duplicate global constants together into a single constant that is
-  shared.  This is useful because some passes (ie TraceValues) insert a lot of
-  string constants into the program, regardless of whether or not an existing
-  string is available.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="constprop">-constprop: Simple constant propagation</a>
-</h3>
-<div>
-  <p>This file implements constant propagation and merging. It looks for
-  instructions involving only constant operands and replaces them with a
-  constant value instead of an instruction. For example:</p>
-  <blockquote><pre>add i32 1, 2</pre></blockquote>
-  <p>becomes</p>
-  <blockquote><pre>i32 3</pre></blockquote>
-  <p>NOTE: this pass has a habit of making definitions be dead.  It is a good 
-  idea to to run a <a href="#die">DIE</a> (Dead Instruction Elimination) pass 
-  sometime after running this pass.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dce">-dce: Dead Code Elimination</a>
-</h3>
-<div>
-  <p>
-  Dead code elimination is similar to <a href="#die">dead instruction
-  elimination</a>, but it rechecks instructions that were used by removed
-  instructions to see if they are newly dead.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="deadargelim">-deadargelim: Dead Argument Elimination</a>
-</h3>
-<div>
-  <p>
-  This pass deletes dead arguments from internal functions.  Dead argument
-  elimination removes arguments which are directly dead, as well as arguments
-  only passed into function calls as dead arguments of other functions.  This
-  pass also deletes dead arguments in a similar way.
-  </p>
-  
-  <p>
-  This pass is often useful as a cleanup pass to run after aggressive
-  interprocedural passes, which add possibly-dead arguments.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="deadtypeelim">-deadtypeelim: Dead Type Elimination</a>
-</h3>
-<div>
-  <p>
-  This pass is used to cleanup the output of GCC.  It eliminate names for types
-  that are unused in the entire translation unit, using the <a
-  href="#findusedtypes">find used types</a> pass.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="die">-die: Dead Instruction Elimination</a>
-</h3>
-<div>
-  <p>
-  Dead instruction elimination performs a single pass over the function,
-  removing instructions that are obviously dead.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="dse">-dse: Dead Store Elimination</a>
-</h3>
-<div>
-  <p>
-  A trivial dead store elimination that only considers basic-block local
-  redundant stores.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="functionattrs">-functionattrs: Deduce function attributes</a>
-</h3>
-<div>
-  <p>A simple interprocedural pass which walks the call-graph, looking for 
-  functions which do not access or only read non-local memory, and marking them 
-  readnone/readonly.  In addition, it marks function arguments (of pointer type) 
-  'nocapture' if a call to the function does not create any copies of the pointer 
-  value that outlive the call. This more or less means that the pointer is only
-  dereferenced, and not returned from the function or stored in a global.
-  This pass is implemented as a bottom-up traversal of the call-graph.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="globaldce">-globaldce: Dead Global Elimination</a>
-</h3>
-<div>
-  <p>
-  This transform is designed to eliminate unreachable internal globals from the
-  program.  It uses an aggressive algorithm, searching out globals that are
-  known to be alive.  After it finds all of the globals which are needed, it
-  deletes whatever is left over.  This allows it to delete recursive chunks of
-  the program which are unreachable.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="globalopt">-globalopt: Global Variable Optimizer</a>
-</h3>
-<div>
-  <p>
-  This pass transforms simple global variables that never have their address
-  taken.  If obviously true, it marks read/write globals as constant, deletes
-  variables only stored to, etc.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="gvn">-gvn: Global Value Numbering</a>
-</h3>
-<div>
-  <p>
-  This pass performs global value numbering to eliminate fully and partially
-  redundant instructions.  It also performs redundant load elimination.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="indvars">-indvars: Canonicalize Induction Variables</a>
-</h3>
-<div>
-  <p>
-  This transformation analyzes and transforms the induction variables (and
-  computations derived from them) into simpler forms suitable for subsequent
-  analysis and transformation.
-  </p>
-  
-  <p>
-  This transformation makes the following changes to each loop with an
-  identifiable induction variable:
-  </p>
-  
-  <ol>
-    <li>All loops are transformed to have a <em>single</em> canonical
-        induction variable which starts at zero and steps by one.</li>
-    <li>The canonical induction variable is guaranteed to be the first PHI node
-        in the loop header block.</li>
-    <li>Any pointer arithmetic recurrences are raised to use array
-        subscripts.</li>
-  </ol>
-  
-  <p>
-  If the trip count of a loop is computable, this pass also makes the following
-  changes:
-  </p>
-  
-  <ol>
-    <li>The exit condition for the loop is canonicalized to compare the
-        induction value against the exit value.  This turns loops like:
-        <blockquote><pre>for (i = 7; i*i < 1000; ++i)</pre></blockquote>
-        into
-        <blockquote><pre>for (i = 0; i != 25; ++i)</pre></blockquote></li>
-    <li>Any use outside of the loop of an expression derived from the indvar
-        is changed to compute the derived value outside of the loop, eliminating
-        the dependence on the exit value of the induction variable.  If the only
-        purpose of the loop is to compute the exit value of some derived
-        expression, this transformation will make the loop dead.</li>
-  </ol>
-  
-  <p>
-  This transformation should be followed by strength reduction after all of the
-  desired loop transformations have been performed.  Additionally, on targets
-  where it is profitable, the loop could be transformed to count down to zero
-  (the "do loop" optimization).
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="inline">-inline: Function Integration/Inlining</a>
-</h3>
-<div>
-  <p>
-  Bottom-up inlining of functions into callees.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="insert-edge-profiling">-insert-edge-profiling: Insert instrumentation for edge profiling</a>
-</h3>
-<div>
-  <p>
-  This pass instruments the specified program with counters for edge profiling.
-  Edge profiling can give a reasonable approximation of the hot paths through a
-  program, and is used for a wide variety of program transformations.
-  </p>
-  
-  <p>
-  Note that this implementation is very naïve.  It inserts a counter for
-  <em>every</em> edge in the program, instead of using control flow information
-  to prune the number of counters inserted.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="insert-optimal-edge-profiling">-insert-optimal-edge-profiling: Insert optimal instrumentation for edge profiling</a>
-</h3>
-<div>
-  <p>This pass instruments the specified program with counters for edge profiling.
-  Edge profiling can give a reasonable approximation of the hot paths through a
-  program, and is used for a wide variety of program transformations.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="instcombine">-instcombine: Combine redundant instructions</a>
-</h3>
-<div>
-  <p>
-  Combine instructions to form fewer, simple
-  instructions.  This pass does not modify the CFG This pass is where algebraic
-  simplification happens.
-  </p>
-  
-  <p>
-  This pass combines things like:
-  </p>
-  
-<blockquote><pre
->%Y = add i32 %X, 1
-%Z = add i32 %Y, 1</pre></blockquote>
-  
-  <p>
-  into:
-  </p>
-
-<blockquote><pre
->%Z = add i32 %X, 2</pre></blockquote>
-  
-  <p>
-  This is a simple worklist driven algorithm.
-  </p>
-  
-  <p>
-  This pass guarantees that the following canonicalizations are performed on
-  the program:
-  </p>
-
-  <ul>
-    <li>If a binary operator has a constant operand, it is moved to the right-
-        hand side.</li>
-    <li>Bitwise operators with constant operands are always grouped so that
-        shifts are performed first, then <code>or</code>s, then
-        <code>and</code>s, then <code>xor</code>s.</li>
-    <li>Compare instructions are converted from <code>&lt;</code>,
-        <code>&gt;</code>, <code>≤</code>, or <code>≥</code> to
-        <code>=</code> or <code>≠</code> if possible.</li>
-    <li>All <code>cmp</code> instructions on boolean values are replaced with
-        logical operations.</li>
-    <li><code>add <var>X</var>, <var>X</var></code> is represented as
-        <code>mul <var>X</var>, 2</code> ⇒ <code>shl <var>X</var>, 1</code></li>
-    <li>Multiplies with a constant power-of-two argument are transformed into
-        shifts.</li>
-    <li>… etc.</li>
-  </ul>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="internalize">-internalize: Internalize Global Symbols</a>
-</h3>
-<div>
-  <p>
-  This pass loops over all of the functions in the input module, looking for a
-  main function.  If a main function is found, all other functions and all
-  global variables with initializers are marked as internal.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="ipconstprop">-ipconstprop: Interprocedural constant propagation</a>
-</h3>
-<div>
-  <p>
-  This pass implements an <em>extremely</em> simple interprocedural constant
-  propagation pass.  It could certainly be improved in many different ways,
-  like using a worklist.  This pass makes arguments dead, but does not remove
-  them.  The existing dead argument elimination pass should be run after this
-  to clean up the mess.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="ipsccp">-ipsccp: Interprocedural Sparse Conditional Constant Propagation</a>
-</h3>
-<div>
-  <p>
-  An interprocedural variant of <a href="#sccp">Sparse Conditional Constant 
-  Propagation</a>.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="jump-threading">-jump-threading: Jump Threading</a>
-</h3>
-<div>
-  <p>
-  Jump threading tries to find distinct threads of control flow running through
-  a basic block. This pass looks at blocks that have multiple predecessors and
-  multiple successors.  If one or more of the predecessors of the block can be
-  proven to always cause a jump to one of the successors, we forward the edge
-  from the predecessor to the successor by duplicating the contents of this
-  block.
-  </p>
-  <p>
-  An example of when this can occur is code like this:
-  </p>
-
-  <pre
->if () { ...
-  X = 4;
-}
-if (X &lt; 3) {</pre>
-
-  <p>
-  In this case, the unconditional branch at the end of the first if can be
-  revectored to the false side of the second if.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="lcssa">-lcssa: Loop-Closed SSA Form Pass</a>
-</h3>
-<div>
-  <p>
-  This pass transforms loops by placing phi nodes at the end of the loops for
-  all values that are live across the loop boundary.  For example, it turns
-  the left into the right code:
-  </p>
-  
-  <pre
->for (...)                for (...)
-  if (c)                   if (c)
-    X1 = ...                 X1 = ...
-  else                     else
-    X2 = ...                 X2 = ...
-  X3 = phi(X1, X2)         X3 = phi(X1, X2)
-... = X3 + 4              X4 = phi(X3)
-                          ... = X4 + 4</pre>
-  
-  <p>
-  This is still valid LLVM; the extra phi nodes are purely redundant, and will
-  be trivially eliminated by <code>InstCombine</code>.  The major benefit of
-  this transformation is that it makes many other loop optimizations, such as 
-  LoopUnswitching, simpler.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="licm">-licm: Loop Invariant Code Motion</a>
-</h3>
-<div>
-  <p>
-  This pass performs loop invariant code motion, attempting to remove as much
-  code from the body of a loop as possible.  It does this by either hoisting
-  code into the preheader block, or by sinking code to the exit blocks if it is
-  safe.  This pass also promotes must-aliased memory locations in the loop to
-  live in registers, thus hoisting and sinking "invariant" loads and stores.
-  </p>
-  
-  <p>
-  This pass uses alias analysis for two purposes:
-  </p>
-  
-  <ul>
-    <li>Moving loop invariant loads and calls out of loops.  If we can determine
-        that a load or call inside of a loop never aliases anything stored to,
-        we can hoist it or sink it like any other instruction.</li>
-    <li>Scalar Promotion of Memory - If there is a store instruction inside of
-        the loop, we try to move the store to happen AFTER the loop instead of
-        inside of the loop.  This can only happen if a few conditions are true:
-        <ul>
-          <li>The pointer stored through is loop invariant.</li>
-          <li>There are no stores or loads in the loop which <em>may</em> alias
-              the pointer.  There are no calls in the loop which mod/ref the
-              pointer.</li>
-        </ul>
-        If these conditions are true, we can promote the loads and stores in the
-        loop of the pointer to use a temporary alloca'd variable.  We then use
-        the mem2reg functionality to construct the appropriate SSA form for the
-        variable.</li>
-  </ul>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-deletion">-loop-deletion: Delete dead loops</a>
-</h3>
-<div>
-  <p>
-  This file implements the Dead Loop Deletion Pass.  This pass is responsible
-  for eliminating loops with non-infinite computable trip counts that have no
-  side effects or volatile instructions, and do not contribute to the
-  computation of the function's return value.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-extract">-loop-extract: Extract loops into new functions</a>
-</h3>
-<div>
-  <p>
-  A pass wrapper around the <code>ExtractLoop()</code> scalar transformation to 
-  extract each top-level loop into its own new function. If the loop is the
-  <em>only</em> loop in a given function, it is not touched. This is a pass most
-  useful for debugging via bugpoint.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-extract-single">-loop-extract-single: Extract at most one loop into a new function</a>
-</h3>
-<div>
-  <p>
-  Similar to <a href="#loop-extract">Extract loops into new functions</a>,
-  this pass extracts one natural loop from the program into a function if it
-  can. This is used by bugpoint.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-reduce">-loop-reduce: Loop Strength Reduction</a>
-</h3>
-<div>
-  <p>
-  This pass performs a strength reduction on array references inside loops that
-  have as one or more of their components the loop induction variable.  This is
-  accomplished by creating a new value to hold the initial value of the array
-  access for the first iteration, and then creating a new GEP instruction in
-  the loop to increment the value by the appropriate amount.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-rotate">-loop-rotate: Rotate Loops</a>
-</h3>
-<div>
-  <p>A simple loop rotation transformation.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-simplify">-loop-simplify: Canonicalize natural loops</a>
-</h3>
-<div>
-  <p>
-  This pass performs several transformations to transform natural loops into a
-  simpler form, which makes subsequent analyses and transformations simpler and
-  more effective.
-  </p>
-  
-  <p>
-  Loop pre-header insertion guarantees that there is a single, non-critical
-  entry edge from outside of the loop to the loop header.  This simplifies a
-  number of analyses and transformations, such as LICM.
-  </p>
-  
-  <p>
-  Loop exit-block insertion guarantees that all exit blocks from the loop
-  (blocks which are outside of the loop that have predecessors inside of the
-  loop) only have predecessors from inside of the loop (and are thus dominated
-  by the loop header).  This simplifies transformations such as store-sinking
-  that are built into LICM.
-  </p>
-  
-  <p>
-  This pass also guarantees that loops will have exactly one backedge.
-  </p>
-  
-  <p>
-  Note that the simplifycfg pass will clean up blocks which are split out but
-  end up being unnecessary, so usage of this pass should not pessimize
-  generated code.
-  </p>
-  
-  <p>
-  This pass obviously modifies the CFG, but updates loop information and
-  dominator information.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-unroll">-loop-unroll: Unroll loops</a>
-</h3>
-<div>
-  <p>
-  This pass implements a simple loop unroller.  It works best when loops have
-  been canonicalized by the <a href="#indvars"><tt>-indvars</tt></a> pass,
-  allowing it to determine the trip counts of loops easily.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loop-unswitch">-loop-unswitch: Unswitch loops</a>
-</h3>
-<div>
-  <p>
-  This pass transforms loops that contain branches on loop-invariant conditions
-  to have multiple loops.  For example, it turns the left into the right code:
-  </p>
-  
-  <pre
->for (...)                  if (lic)
-  A                          for (...)
-  if (lic)                     A; B; C
-    B                      else
-  C                          for (...)
-                               A; C</pre>
-  
-  <p>
-  This can increase the size of the code exponentially (doubling it every time
-  a loop is unswitched) so we only unswitch if the resultant code will be
-  smaller than a threshold.
-  </p>
-  
-  <p>
-  This pass expects LICM to be run before it to hoist invariant conditions out
-  of the loop, to make the unswitching opportunity obvious.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="loweratomic">-loweratomic: Lower atomic intrinsics to non-atomic form</a>
-</h3>
-<div>
-  <p>
-  This pass lowers atomic intrinsics to non-atomic form for use in a known
-  non-preemptible environment.
-  </p>
-
-  <p>
-  The pass does not verify that the environment is non-preemptible (in
-  general this would require knowledge of the entire call graph of the
-  program including any libraries which may not be available in bitcode form);
-  it simply lowers every atomic intrinsic.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="lowerinvoke">-lowerinvoke: Lower invoke and unwind, for unwindless code generators</a>
-</h3>
-<div>
-  <p>
-  This transformation is designed for use by code generators which do not yet
-  support stack unwinding.  This pass supports two models of exception handling
-  lowering, the 'cheap' support and the 'expensive' support.
-  </p>
-  
-  <p>
-  'Cheap' exception handling support gives the program the ability to execute
-  any program which does not "throw an exception", by turning 'invoke'
-  instructions into calls and by turning 'unwind' instructions into calls to
-  abort().  If the program does dynamically use the unwind instruction, the
-  program will print a message then abort.
-  </p>
-  
-  <p>
-  'Expensive' exception handling support gives the full exception handling
-  support to the program at the cost of making the 'invoke' instruction
-  really expensive.  It basically inserts setjmp/longjmp calls to emulate the
-  exception handling as necessary.
-  </p>
-  
-  <p>
-  Because the 'expensive' support slows down programs a lot, and EH is only
-  used for a subset of the programs, it must be specifically enabled by the
-  <tt>-enable-correct-eh-support</tt> option.
-  </p>
-  
-  <p>
-  Note that after this pass runs the CFG is not entirely accurate (exceptional
-  control flow edges are not correct anymore) so only very simple things should
-  be done after the lowerinvoke pass has run (like generation of native code).
-  This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
-  support the invoke instruction yet" lowering pass.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="lowerswitch">-lowerswitch: Lower SwitchInst's to branches</a>
-</h3>
-<div>
-  <p>
-  Rewrites <tt>switch</tt> instructions with a sequence of branches, which
-  allows targets to get away with not implementing the switch instruction until
-  it is convenient.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="mem2reg">-mem2reg: Promote Memory to Register</a>
-</h3>
-<div>
-  <p>
-  This file promotes memory references to be register references.  It promotes
-  <tt>alloca</tt> instructions which only have <tt>load</tt>s and
-  <tt>store</tt>s as uses.  An <tt>alloca</tt> is transformed by using dominator
-  frontiers to place <tt>phi</tt> nodes, then traversing the function in
-  depth-first order to rewrite <tt>load</tt>s and <tt>store</tt>s as
-  appropriate. This is just the standard SSA construction algorithm to construct
-  "pruned" SSA form.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="memcpyopt">-memcpyopt: MemCpy Optimization</a>
-</h3>
-<div>
-  <p>
-  This pass performs various transformations related to eliminating memcpy
-  calls, or transforming sets of stores into memset's.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="mergefunc">-mergefunc: Merge Functions</a>
-</h3>
-<div>
-  <p>This pass looks for equivalent functions that are mergable and folds them.
- 
-  A hash is computed from the function, based on its type and number of
-  basic blocks.
- 
-  Once all hashes are computed, we perform an expensive equality comparison
-  on each function pair. This takes n^2/2 comparisons per bucket, so it's
-  important that the hash function be high quality. The equality comparison
-  iterates through each instruction in each basic block.
- 
-  When a match is found the functions are folded. If both functions are
-  overridable, we move the functionality into a new internal function and
-  leave two overridable thunks to it.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="mergereturn">-mergereturn: Unify function exit nodes</a>
-</h3>
-<div>
-  <p>
-  Ensure that functions have at most one <tt>ret</tt> instruction in them.
-  Additionally, it keeps track of which node is the new exit node of the CFG.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="partial-inliner">-partial-inliner: Partial Inliner</a>
-</h3>
-<div>
-  <p>This pass performs partial inlining, typically by inlining an if 
-  statement that surrounds the body of the function.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="prune-eh">-prune-eh: Remove unused exception handling info</a>
-</h3>
-<div>
-  <p>
-  This file implements a simple interprocedural pass which walks the call-graph,
-  turning <tt>invoke</tt> instructions into <tt>call</tt> instructions if and
-  only if the callee cannot throw an exception. It implements this as a
-  bottom-up traversal of the call-graph.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="reassociate">-reassociate: Reassociate expressions</a>
-</h3>
-<div>
-  <p>
-  This pass reassociates commutative expressions in an order that is designed
-  to promote better constant propagation, GCSE, LICM, PRE, etc.
-  </p>
-  
-  <p>
-  For example: 4 + (<var>x</var> + 5) ⇒ <var>x</var> + (4 + 5)
-  </p>
-  
-  <p>
-  In the implementation of this algorithm, constants are assigned rank = 0,
-  function arguments are rank = 1, and other values are assigned ranks
-  corresponding to the reverse post order traversal of current function
-  (starting at 2), which effectively gives values in deep loops higher rank
-  than values not in loops.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="reg2mem">-reg2mem: Demote all values to stack slots</a>
-</h3>
-<div>
-  <p>
-  This file demotes all registers to memory references.  It is intended to be
-  the inverse of <a href="#mem2reg"><tt>-mem2reg</tt></a>.  By converting to
-  <tt>load</tt> instructions, the only values live across basic blocks are
-  <tt>alloca</tt> instructions and <tt>load</tt> instructions before
-  <tt>phi</tt> nodes. It is intended that this should make CFG hacking much 
-  easier. To make later hacking easier, the entry block is split into two, such
-  that all introduced <tt>alloca</tt> instructions (and nothing else) are in the
-  entry block.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="scalarrepl">-scalarrepl: Scalar Replacement of Aggregates (DT)</a>
-</h3>
-<div>
-  <p>
-  The well-known scalar replacement of aggregates transformation.  This
-  transform breaks up <tt>alloca</tt> instructions of aggregate type (structure
-  or array) into individual <tt>alloca</tt> instructions for each member if
-  possible.  Then, if possible, it transforms the individual <tt>alloca</tt>
-  instructions into nice clean scalar SSA form.
-  </p>
-  
-  <p>
-  This combines a simple scalar replacement of aggregates algorithm with the <a
-  href="#mem2reg"><tt>mem2reg</tt></a> algorithm because often interact, 
-  especially for C++ programs.  As such, iterating between <tt>scalarrepl</tt>, 
-  then <a href="#mem2reg"><tt>mem2reg</tt></a> until we run out of things to 
-  promote works well.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="sccp">-sccp: Sparse Conditional Constant Propagation</a>
-</h3>
-<div>
-  <p>
-  Sparse conditional constant propagation and merging, which can be summarized
-  as:
-  </p>
-  
-  <ol>
-    <li>Assumes values are constant unless proven otherwise</li>
-    <li>Assumes BasicBlocks are dead unless proven otherwise</li>
-    <li>Proves values to be constant, and replaces them with constants</li>
-    <li>Proves conditional branches to be unconditional</li>
-  </ol>
-  
-  <p>
-  Note that this pass has a habit of making definitions be dead.  It is a good
-  idea to to run a DCE pass sometime after running this pass.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="simplify-libcalls">-simplify-libcalls: Simplify well-known library calls</a>
-</h3>
-<div>
-  <p>
-  Applies a variety of small optimizations for calls to specific well-known 
-  function calls (e.g. runtime library functions). For example, a call
-   <tt>exit(3)</tt> that occurs within the <tt>main()</tt> function can be 
-   transformed into simply <tt>return 3</tt>.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="simplifycfg">-simplifycfg: Simplify the CFG</a>
-</h3>
-<div>
-  <p>
-  Performs dead code elimination and basic block merging. Specifically:
-  </p>
-  
-  <ol>
-    <li>Removes basic blocks with no predecessors.</li>
-    <li>Merges a basic block into its predecessor if there is only one and the
-        predecessor only has one successor.</li>
-    <li>Eliminates PHI nodes for basic blocks with a single predecessor.</li>
-    <li>Eliminates a basic block that only contains an unconditional
-        branch.</li>
-  </ol>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="sink">-sink: Code sinking</a>
-</h3>
-<div>
-  <p>This pass moves instructions into successor blocks, when possible, so that
- they aren't executed on paths where their results aren't needed.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="sretpromotion">-sretpromotion: Promote sret arguments to multiple ret values</a>
-</h3>
-<div>
-  <p>
-  This pass finds functions that return a struct (using a pointer to the struct
-  as the first argument of the function, marked with the '<tt>sret</tt>' attribute) and
-  replaces them with a new function that simply returns each of the elements of
-  that struct (using multiple return values).
-  </p>
-
-  <p>
-  This pass works under a number of conditions:
-  </p>
-
-  <ul>
-  <li>The returned struct must not contain other structs</li>
-  <li>The returned struct must only be used to load values from</li>
-  <li>The placeholder struct passed in is the result of an <tt>alloca</tt></li>
-  </ul>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="strip">-strip: Strip all symbols from a module</a>
-</h3>
-<div>
-  <p>
-  performs code stripping. this transformation can delete:
-  </p>
-  
-  <ol>
-    <li>names for virtual registers</li>
-    <li>symbols for internal globals and functions</li>
-    <li>debug information</li>
-  </ol>
-  
-  <p>
-  note that this transformation makes code much less readable, so it should
-  only be used in situations where the <tt>strip</tt> utility would be used,
-  such as reducing code size or making it harder to reverse engineer code.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="strip-dead-debug-info">-strip-dead-debug-info: Strip debug info for unused symbols</a>
-</h3>
-<div>
-  <p>
-  performs code stripping. this transformation can delete:
-  </p>
-  
-  <ol>
-    <li>names for virtual registers</li>
-    <li>symbols for internal globals and functions</li>
-    <li>debug information</li>
-  </ol>
-  
-  <p>
-  note that this transformation makes code much less readable, so it should
-  only be used in situations where the <tt>strip</tt> utility would be used,
-  such as reducing code size or making it harder to reverse engineer code.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="strip-dead-prototypes">-strip-dead-prototypes: Strip Unused Function Prototypes</a>
-</h3>
-<div>
-  <p>
-  This pass loops over all of the functions in the input module, looking for
-  dead declarations and removes them. Dead declarations are declarations of
-  functions for which no implementation is available (i.e., declarations for
-  unused library functions).
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="strip-debug-declare">-strip-debug-declare: Strip all llvm.dbg.declare intrinsics</a>
-</h3>
-<div>
-  <p>This pass implements code stripping. Specifically, it can delete:</p>
-  <ul>
-  <li>names for virtual registers</li>
-  <li>symbols for internal globals and functions</li>
-  <li>debug information</li>
-  </ul>
-  <p>
-  Note that this transformation makes code much less readable, so it should
-  only be used in situations where the 'strip' utility would be used, such as
-  reducing code size or making it harder to reverse engineer code.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="strip-nondebug">-strip-nondebug: Strip all symbols, except dbg symbols, from a module</a>
-</h3>
-<div>
-  <p>This pass implements code stripping. Specifically, it can delete:</p>
-  <ul>
-  <li>names for virtual registers</li>
-  <li>symbols for internal globals and functions</li>
-  <li>debug information</li>
-  </ul>
-  <p>
-  Note that this transformation makes code much less readable, so it should
-  only be used in situations where the 'strip' utility would be used, such as
-  reducing code size or making it harder to reverse engineer code.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="tailcallelim">-tailcallelim: Tail Call Elimination</a>
-</h3>
-<div>
-  <p>
-  This file transforms calls of the current function (self recursion) followed
-  by a return instruction with a branch to the entry of the function, creating
-  a loop.  This pass also implements the following extensions to the basic
-  algorithm:
-  </p>
-  
-  <ul>
-  <li>Trivial instructions between the call and return do not prevent the
-      transformation from taking place, though currently the analysis cannot
-      support moving any really useful instructions (only dead ones).
-  <li>This pass transforms functions that are prevented from being tail
-      recursive by an associative expression to use an accumulator variable,
-      thus compiling the typical naive factorial or <tt>fib</tt> implementation
-      into efficient code.
-  <li>TRE is performed if the function returns void, if the return
-      returns the result returned by the call, or if the function returns a
-      run-time constant on all exits from the function.  It is possible, though
-      unlikely, that the return returns something else (like constant 0), and
-      can still be TRE'd.  It can be TRE'd if <em>all other</em> return 
-      instructions in the function return the exact same value.
-  <li>If it can prove that callees do not access theier caller stack frame,
-      they are marked as eligible for tail call elimination (by the code
-      generator).
-  </ul>
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="utilities">Utility Passes</a></h2>
-<div>
-  <p>This section describes the LLVM Utility Passes.</p>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="deadarghaX0r">-deadarghaX0r: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)</a>
-</h3>
-<div>
-  <p>
-  Same as dead argument elimination, but deletes arguments to functions which
-  are external.  This is only for use by <a
-  href="Bugpoint.html">bugpoint</a>.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="extract-blocks">-extract-blocks: Extract Basic Blocks From Module (for bugpoint use)</a>
-</h3>
-<div>
-  <p>
-  This pass is used by bugpoint to extract all blocks from the module into their
-  own functions.</p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="instnamer">-instnamer: Assign names to anonymous instructions</a>
-</h3>
-<div>
-  <p>This is a little utility pass that gives instructions names, this is mostly
- useful when diffing the effect of an optimization because deleting an
- unnamed instruction can change all other instruction numbering, making the
- diff very noisy.  
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="preverify">-preverify: Preliminary module verification</a>
-</h3>
-<div>
-  <p>
-  Ensures that the module is in the form required by the <a
-  href="#verifier">Module Verifier</a> pass.
-  </p>
-  
-  <p>
-  Running the verifier runs this pass automatically, so there should be no need
-  to use it directly.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="verify">-verify: Module Verifier</a>
-</h3>
-<div>
-  <p>
-  Verifies an LLVM IR code. This is useful to run after an optimization which is
-  undergoing testing. Note that <tt>llvm-as</tt> verifies its input before
-  emitting bitcode, and also that malformed bitcode is likely to make LLVM
-  crash. All language front-ends are therefore encouraged to verify their output
-  before performing optimizing transformations.
-  </p>
-
-  <ul>
-    <li>Both of a binary operator's parameters are of the same type.</li>
-    <li>Verify that the indices of mem access instructions match other
-        operands.</li>
-    <li>Verify that arithmetic and other things are only performed on
-        first-class types.  Verify that shifts and logicals only happen on
-        integrals f.e.</li>
-    <li>All of the constants in a switch statement are of the correct type.</li>
-    <li>The code is in valid SSA form.</li>
-    <li>It is illegal to put a label into any other type (like a structure) or 
-        to return one.</li>
-    <li>Only phi nodes can be self referential: <tt>%x = add i32 %x, %x</tt> is
-        invalid.</li>
-    <li>PHI nodes must have an entry for each predecessor, with no extras.</li>
-    <li>PHI nodes must be the first thing in a basic block, all grouped
-        together.</li>
-    <li>PHI nodes must have at least one entry.</li>
-    <li>All basic blocks should only end with terminator insts, not contain
-        them.</li>
-    <li>The entry node to a function must not have predecessors.</li>
-    <li>All Instructions must be embedded into a basic block.</li>
-    <li>Functions cannot take a void-typed parameter.</li>
-    <li>Verify that a function's argument list agrees with its declared
-        type.</li>
-    <li>It is illegal to specify a name for a void value.</li>
-    <li>It is illegal to have an internal global value with no initializer.</li>
-    <li>It is illegal to have a ret instruction that returns a value that does
-        not agree with the function return value type.</li>
-    <li>Function call argument types match the function prototype.</li>
-    <li>All other things that are tested by asserts spread about the code.</li>
-  </ul>
-  
-  <p>
-  Note that this does not provide full security verification (like Java), but
-  instead just tries to ensure that code is well-formed.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="view-cfg">-view-cfg: View CFG of function</a>
-</h3>
-<div>
-  <p>
-  Displays the control flow graph using the GraphViz tool.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="view-cfg-only">-view-cfg-only: View CFG of function (with no function bodies)</a>
-</h3>
-<div>
-  <p>
-  Displays the control flow graph using the GraphViz tool, but omitting function
-  bodies.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="view-dom">-view-dom: View dominance tree of function</a>
-</h3>
-<div>
-  <p>
-  Displays the dominator tree using the GraphViz tool.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="view-dom-only">-view-dom-only: View dominance tree of function (with no function bodies)</a>
-</h3>
-<div>
-  <p>
-  Displays the dominator tree using the GraphViz tool, but omitting function
-  bodies.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="view-postdom">-view-postdom: View postdominance tree of function</a>
-</h3>
-<div>
-  <p>
-  Displays the post dominator tree using the GraphViz tool.
-  </p>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
-  <a name="view-postdom-only">-view-postdom-only: View postdominance tree of function (with no function bodies)</a>
-</h3>
-<div>
-  <p>
-  Displays the post dominator tree using the GraphViz tool, but omitting
-  function bodies.
-  </p>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-31 18:25:31 +0100 (Wed, 31 Oct 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/Passes.rst b/docs/Passes.rst
new file mode 100644
index 000000000000..d279eca3afb6
--- /dev/null
+++ b/docs/Passes.rst
@@ -0,0 +1,1261 @@
+..
+    If Passes.html is up to date, the following "one-liner" should print
+    an empty diff.
+
+    egrep -e '^<tr><td><a href="#.*">-.*</a></td><td>.*</td></tr>$' \
+          -e '^  <a name=".*">.*</a>$' < Passes.html >html; \
+    perl >help <<'EOT' && diff -u help html; rm -f help html
+    open HTML, "<Passes.html" or die "open: Passes.html: $!\n";
+    while (<HTML>) {
+      m:^<tr><td><a href="#(.*)">-.*</a></td><td>.*</td></tr>$: or next;
+      $order{$1} = sprintf("%03d", 1 + int %order);
+    }
+    open HELP, "../Release/bin/opt -help|" or die "open: opt -help: $!\n";
+    while (<HELP>) {
+      m:^    -([^ ]+) +- (.*)$: or next;
+      my $o = $order{$1};
+      $o = "000" unless defined $o;
+      push @x, "$o<tr><td><a href=\"#$1\">-$1</a></td><td>$2</td></tr>\n";
+      push @y, "$o  <a name=\"$1\">-$1: $2</a>\n";
+    }
+    @x = map { s/^\d\d\d//; $_ } sort @x;
+    @y = map { s/^\d\d\d//; $_ } sort @y;
+    print @x, @y;
+    EOT
+
+    This (real) one-liner can also be helpful when converting comments to HTML:
+
+    perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !$on && $_ =~ /\S/; print "  </p>\n" if $on && $_ =~ /^\s*$/; print "  $_\n"; $on = ($_ =~ /\S/); } print "  </p>\n" if $on'
+
+====================================
+LLVM's Analysis and Transform Passes
+====================================
+
+.. contents::
+    :local:
+
+Introduction
+============
+
+This document serves as a high level summary of the optimization features that
+LLVM provides.  Optimizations are implemented as Passes that traverse some
+portion of a program to either collect information or transform the program.
+The table below divides the passes that LLVM provides into three categories.
+Analysis passes compute information that other passes can use or for debugging
+or program visualization purposes.  Transform passes can use (or invalidate)
+the analysis passes.  Transform passes all mutate the program in some way.
+Utility passes provides some utility but don't otherwise fit categorization.
+For example passes to extract functions to bitcode or write a module to bitcode
+are neither analysis nor transform passes.  The table of contents above
+provides a quick summary of each pass and links to the more complete pass
+description later in the document.
+
+Analysis Passes
+===============
+
+This section describes the LLVM Analysis Passes.
+
+``-aa-eval``: Exhaustive Alias Analysis Precision Evaluator
+-----------------------------------------------------------
+
+This is a simple N^2 alias analysis accuracy evaluator.  Basically, for each
+function in the program, it simply queries to see how the alias analysis
+implementation answers alias queries between each pair of pointers in the
+function.
+
+This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+Spadini, and Wojciech Stryjewski.
+
+``-basicaa``: Basic Alias Analysis (stateless AA impl)
+------------------------------------------------------
+
+A basic alias analysis pass that implements identities (two different globals
+cannot alias, etc), but does no stateful analysis.
+
+``-basiccg``: Basic CallGraph Construction
+------------------------------------------
+
+Yet to be written.
+
+``-count-aa``: Count Alias Analysis Query Responses
+---------------------------------------------------
+
+A pass which can be used to count how many alias queries are being made and how
+the alias analysis implementation being used responds.
+
+``-da``: Dependence Analysis
+----------------------------
+
+Dependence analysis framework, which is used to detect dependences in memory
+accesses.
+
+``-debug-aa``: AA use debugger
+------------------------------
+
+This simple pass checks alias analysis users to ensure that if they create a
+new value, they do not query AA without informing it of the value.  It acts as
+a shim over any other AA pass you want.
+
+Yes keeping track of every value in the program is expensive, but this is a
+debugging pass.
+
+``-domfrontier``: Dominance Frontier Construction
+-------------------------------------------------
+
+This pass is a simple dominator construction algorithm for finding forward
+dominator frontiers.
+
+``-domtree``: Dominator Tree Construction
+-----------------------------------------
+
+This pass is a simple dominator construction algorithm for finding forward
+dominators.
+
+
+``-dot-callgraph``: Print Call Graph to "dot" file
+--------------------------------------------------
+
+This pass, only available in ``opt``, prints the call graph into a ``.dot``
+graph.  This graph can then be processed with the "dot" tool to convert it to
+postscript or some other suitable format.
+
+``-dot-cfg``: Print CFG of function to "dot" file
+-------------------------------------------------
+
+This pass, only available in ``opt``, prints the control flow graph into a
+``.dot`` graph.  This graph can then be processed with the :program:`dot` tool
+to convert it to postscript or some other suitable format.
+
+``-dot-cfg-only``: Print CFG of function to "dot" file (with no function bodies)
+--------------------------------------------------------------------------------
+
+This pass, only available in ``opt``, prints the control flow graph into a
+``.dot`` graph, omitting the function bodies.  This graph can then be processed
+with the :program:`dot` tool to convert it to postscript or some other suitable
+format.
+
+``-dot-dom``: Print dominance tree of function to "dot" file
+------------------------------------------------------------
+
+This pass, only available in ``opt``, prints the dominator tree into a ``.dot``
+graph.  This graph can then be processed with the :program:`dot` tool to
+convert it to postscript or some other suitable format.
+
+``-dot-dom-only``: Print dominance tree of function to "dot" file (with no function bodies)
+-------------------------------------------------------------------------------------------
+
+This pass, only available in ``opt``, prints the dominator tree into a ``.dot``
+graph, omitting the function bodies.  This graph can then be processed with the
+:program:`dot` tool to convert it to postscript or some other suitable format.
+
+``-dot-postdom``: Print postdominance tree of function to "dot" file
+--------------------------------------------------------------------
+
+This pass, only available in ``opt``, prints the post dominator tree into a
+``.dot`` graph.  This graph can then be processed with the :program:`dot` tool
+to convert it to postscript or some other suitable format.
+
+``-dot-postdom-only``: Print postdominance tree of function to "dot" file (with no function bodies)
+---------------------------------------------------------------------------------------------------
+
+This pass, only available in ``opt``, prints the post dominator tree into a
+``.dot`` graph, omitting the function bodies.  This graph can then be processed
+with the :program:`dot` tool to convert it to postscript or some other suitable
+format.
+
+``-globalsmodref-aa``: Simple mod/ref analysis for globals
+----------------------------------------------------------
+
+This simple pass provides alias and mod/ref information for global values that
+do not have their address taken, and keeps track of whether functions read or
+write memory (are "pure").  For this simple (but very common) case, we can
+provide pretty accurate and useful information.
+
+``-instcount``: Counts the various types of ``Instruction``\ s
+--------------------------------------------------------------
+
+This pass collects the count of all instructions and reports them.
+
+``-intervals``: Interval Partition Construction
+-----------------------------------------------
+
+This analysis calculates and represents the interval partition of a function,
+or a preexisting interval partition.
+
+In this way, the interval partition may be used to reduce a flow graph down to
+its degenerate single node interval partition (unless it is irreducible).
+
+``-iv-users``: Induction Variable Users
+---------------------------------------
+
+Bookkeeping for "interesting" users of expressions computed from induction
+variables.
+
+``-lazy-value-info``: Lazy Value Information Analysis
+-----------------------------------------------------
+
+Interface for lazy computation of value constraint information.
+
+``-libcall-aa``: LibCall Alias Analysis
+---------------------------------------
+
+LibCall Alias Analysis.
+
+``-lint``: Statically lint-checks LLVM IR
+-----------------------------------------
+
+This pass statically checks for common and easily-identified constructs which
+produce undefined or likely unintended behavior in LLVM IR.
+
+It is not a guarantee of correctness, in two ways.  First, it isn't
+comprehensive.  There are checks which could be done statically which are not
+yet implemented.  Some of these are indicated by TODO comments, but those
+aren't comprehensive either.  Second, many conditions cannot be checked
+statically.  This pass does no dynamic instrumentation, so it can't check for
+all possible problems.
+
+Another limitation is that it assumes all code will be executed.  A store
+through a null pointer in a basic block which is never reached is harmless, but
+this pass will warn about it anyway.
+
+Optimization passes may make conditions that this pass checks for more or less
+obvious.  If an optimization pass appears to be introducing a warning, it may
+be that the optimization pass is merely exposing an existing condition in the
+code.
+
+This code may be run before :ref:`instcombine <passes-instcombine>`.  In many
+cases, instcombine checks for the same kinds of things and turns instructions
+with undefined behavior into unreachable (or equivalent).  Because of this,
+this pass makes some effort to look through bitcasts and so on.
+
+``-loops``: Natural Loop Information
+------------------------------------
+
+This analysis is used to identify natural loops and determine the loop depth of
+various nodes of the CFG.  Note that the loops identified may actually be
+several natural loops that share the same header node... not just a single
+natural loop.
+
+``-memdep``: Memory Dependence Analysis
+---------------------------------------
+
+An analysis that determines, for a given memory operation, what preceding
+memory operations it depends on.  It builds on alias analysis information, and
+tries to provide a lazy, caching interface to a common kind of alias
+information query.
+
+``-module-debuginfo``: Decodes module-level debug info
+------------------------------------------------------
+
+This pass decodes the debug info metadata in a module and prints in a
+(sufficiently-prepared-) human-readable form.
+
+For example, run this pass from ``opt`` along with the ``-analyze`` option, and
+it'll print to standard output.
+
+``-no-aa``: No Alias Analysis (always returns 'may' alias)
+----------------------------------------------------------
+
+This is the default implementation of the Alias Analysis interface.  It always
+returns "I don't know" for alias queries.  NoAA is unlike other alias analysis
+implementations, in that it does not chain to a previous analysis.  As such it
+doesn't follow many of the rules that other alias analyses must.
+
+``-no-profile``: No Profile Information
+---------------------------------------
+
+The default "no profile" implementation of the abstract ``ProfileInfo``
+interface.
+
+``-postdomfrontier``: Post-Dominance Frontier Construction
+----------------------------------------------------------
+
+This pass is a simple post-dominator construction algorithm for finding
+post-dominator frontiers.
+
+``-postdomtree``: Post-Dominator Tree Construction
+--------------------------------------------------
+
+This pass is a simple post-dominator construction algorithm for finding
+post-dominators.
+
+``-print-alias-sets``: Alias Set Printer
+----------------------------------------
+
+Yet to be written.
+
+``-print-callgraph``: Print a call graph
+----------------------------------------
+
+This pass, only available in ``opt``, prints the call graph to standard error
+in a human-readable form.
+
+``-print-callgraph-sccs``: Print SCCs of the Call Graph
+-------------------------------------------------------
+
+This pass, only available in ``opt``, prints the SCCs of the call graph to
+standard error in a human-readable form.
+
+``-print-cfg-sccs``: Print SCCs of each function CFG
+----------------------------------------------------
+
+This pass, only available in ``opt``, printsthe SCCs of each function CFG to
+standard error in a human-readable fom.
+
+``-print-dbginfo``: Print debug info in human readable form
+-----------------------------------------------------------
+
+Pass that prints instructions, and associated debug info:
+
+#. source/line/col information
+#. original variable name
+#. original type name
+
+``-print-dom-info``: Dominator Info Printer
+-------------------------------------------
+
+Dominator Info Printer.
+
+``-print-externalfnconstants``: Print external fn callsites passed constants
+----------------------------------------------------------------------------
+
+This pass, only available in ``opt``, prints out call sites to external
+functions that are called with constant arguments.  This can be useful when
+looking for standard library functions we should constant fold or handle in
+alias analyses.
+
+``-print-function``: Print function to stderr
+---------------------------------------------
+
+The ``PrintFunctionPass`` class is designed to be pipelined with other
+``FunctionPasses``, and prints out the functions of the module as they are
+processed.
+
+``-print-module``: Print module to stderr
+-----------------------------------------
+
+This pass simply prints out the entire module when it is executed.
+
+.. _passes-print-used-types:
+
+``-print-used-types``: Find Used Types
+--------------------------------------
+
+This pass is used to seek out all of the types in use by the program.  Note
+that this analysis explicitly does not include types only used by the symbol
+table.
+
+``-profile-estimator``: Estimate profiling information
+------------------------------------------------------
+
+Profiling information that estimates the profiling information in a very crude
+and unimaginative way.
+
+``-profile-loader``: Load profile information from ``llvmprof.out``
+-------------------------------------------------------------------
+
+A concrete implementation of profiling information that loads the information
+from a profile dump file.
+
+``-profile-verifier``: Verify profiling information
+---------------------------------------------------
+
+Pass that checks profiling information for plausibility.
+
+``-regions``: Detect single entry single exit regions
+-----------------------------------------------------
+
+The ``RegionInfo`` pass detects single entry single exit regions in a function,
+where a region is defined as any subgraph that is connected to the remaining
+graph at only two spots.  Furthermore, an hierarchical region tree is built.
+
+``-scalar-evolution``: Scalar Evolution Analysis
+------------------------------------------------
+
+The ``ScalarEvolution`` analysis can be used to analyze and catagorize scalar
+expressions in loops.  It specializes in recognizing general induction
+variables, representing them with the abstract and opaque ``SCEV`` class.
+Given this analysis, trip counts of loops and other important properties can be
+obtained.
+
+This analysis is primarily useful for induction variable substitution and
+strength reduction.
+
+``-scev-aa``: ScalarEvolution-based Alias Analysis
+--------------------------------------------------
+
+Simple alias analysis implemented in terms of ``ScalarEvolution`` queries.
+
+This differs from traditional loop dependence analysis in that it tests for
+dependencies within a single iteration of a loop, rather than dependencies
+between different iterations.
+
+``ScalarEvolution`` has a more complete understanding of pointer arithmetic
+than ``BasicAliasAnalysis``' collection of ad-hoc analyses.
+
+``-targetdata``: Target Data Layout
+-----------------------------------
+
+Provides other passes access to information on how the size and alignment
+required by the target ABI for various data types.
+
+Transform Passes
+================
+
+This section describes the LLVM Transform Passes.
+
+``-adce``: Aggressive Dead Code Elimination
+-------------------------------------------
+
+ADCE aggressively tries to eliminate code.  This pass is similar to :ref:`DCE
+<passes-dce>` but it assumes that values are dead until proven otherwise.  This
+is similar to :ref:`SCCP <passes-sccp>`, except applied to the liveness of
+values.
+
+``-always-inline``: Inliner for ``always_inline`` functions
+-----------------------------------------------------------
+
+A custom inliner that handles only functions that are marked as "always
+inline".
+
+``-argpromotion``: Promote 'by reference' arguments to scalars
+--------------------------------------------------------------
+
+This pass promotes "by reference" arguments to be "by value" arguments.  In
+practice, this means looking for internal functions that have pointer
+arguments.  If it can prove, through the use of alias analysis, that an
+argument is *only* loaded, then it can pass the value into the function instead
+of the address of the value.  This can cause recursive simplification of code
+and lead to the elimination of allocas (especially in C++ template code like
+the STL).
+
+This pass also handles aggregate arguments that are passed into a function,
+scalarizing them if the elements of the aggregate are only loaded.  Note that
+it refuses to scalarize aggregates which would require passing in more than
+three operands to the function, because passing thousands of operands for a
+large array or structure is unprofitable!
+
+Note that this transformation could also be done for arguments that are only
+stored to (returning the value instead), but does not currently.  This case
+would be best handled when and if LLVM starts supporting multiple return values
+from functions.
+
+``-bb-vectorize``: Basic-Block Vectorization
+--------------------------------------------
+
+This pass combines instructions inside basic blocks to form vector
+instructions.  It iterates over each basic block, attempting to pair compatible
+instructions, repeating this process until no additional pairs are selected for
+vectorization.  When the outputs of some pair of compatible instructions are
+used as inputs by some other pair of compatible instructions, those pairs are
+part of a potential vectorization chain.  Instruction pairs are only fused into
+vector instructions when they are part of a chain longer than some threshold
+length.  Moreover, the pass attempts to find the best possible chain for each
+pair of compatible instructions.  These heuristics are intended to prevent
+vectorization in cases where it would not yield a performance increase of the
+resulting code.
+
+``-block-placement``: Profile Guided Basic Block Placement
+----------------------------------------------------------
+
+This pass is a very simple profile guided basic block placement algorithm.  The
+idea is to put frequently executed blocks together at the start of the function
+and hopefully increase the number of fall-through conditional branches.  If
+there is no profile information for a particular function, this pass basically
+orders blocks in depth-first order.
+
+``-break-crit-edges``: Break critical edges in CFG
+--------------------------------------------------
+
+Break all of the critical edges in the CFG by inserting a dummy basic block.
+It may be "required" by passes that cannot deal with critical edges.  This
+transformation obviously invalidates the CFG, but can update forward dominator
+(set, immediate dominators, tree, and frontier) information.
+
+``-codegenprepare``: Optimize for code generation
+-------------------------------------------------
+
+This pass munges the code in the input function to better prepare it for
+SelectionDAG-based code generation.  This works around limitations in it's
+basic-block-at-a-time approach.  It should eventually be removed.
+
+``-constmerge``: Merge Duplicate Global Constants
+-------------------------------------------------
+
+Merges duplicate global constants together into a single constant that is
+shared.  This is useful because some passes (i.e., TraceValues) insert a lot of
+string constants into the program, regardless of whether or not an existing
+string is available.
+
+``-constprop``: Simple constant propagation
+-------------------------------------------
+
+This file implements constant propagation and merging.  It looks for
+instructions involving only constant operands and replaces them with a constant
+value instead of an instruction.  For example:
+
+.. code-block:: llvm
+
+  add i32 1, 2
+
+becomes
+
+.. code-block:: llvm
+
+  i32 3
+
+NOTE: this pass has a habit of making definitions be dead.  It is a good idea
+to to run a :ref:`Dead Instruction Elimination <passes-die>` pass sometime
+after running this pass.
+
+.. _passes-dce:
+
+``-dce``: Dead Code Elimination
+-------------------------------
+
+Dead code elimination is similar to :ref:`dead instruction elimination
+<passes-die>`, but it rechecks instructions that were used by removed
+instructions to see if they are newly dead.
+
+``-deadargelim``: Dead Argument Elimination
+-------------------------------------------
+
+This pass deletes dead arguments from internal functions.  Dead argument
+elimination removes arguments which are directly dead, as well as arguments
+only passed into function calls as dead arguments of other functions.  This
+pass also deletes dead arguments in a similar way.
+
+This pass is often useful as a cleanup pass to run after aggressive
+interprocedural passes, which add possibly-dead arguments.
+
+``-deadtypeelim``: Dead Type Elimination
+----------------------------------------
+
+This pass is used to cleanup the output of GCC.  It eliminate names for types
+that are unused in the entire translation unit, using the :ref:`find used types
+<passes-print-used-types>` pass.
+
+.. _passes-die:
+
+``-die``: Dead Instruction Elimination
+--------------------------------------
+
+Dead instruction elimination performs a single pass over the function, removing
+instructions that are obviously dead.
+
+``-dse``: Dead Store Elimination
+--------------------------------
+
+A trivial dead store elimination that only considers basic-block local
+redundant stores.
+
+``-functionattrs``: Deduce function attributes
+----------------------------------------------
+
+A simple interprocedural pass which walks the call-graph, looking for functions
+which do not access or only read non-local memory, and marking them
+``readnone``/``readonly``.  In addition, it marks function arguments (of
+pointer type) "``nocapture``" if a call to the function does not create any
+copies of the pointer value that outlive the call.  This more or less means
+that the pointer is only dereferenced, and not returned from the function or
+stored in a global.  This pass is implemented as a bottom-up traversal of the
+call-graph.
+
+``-globaldce``: Dead Global Elimination
+---------------------------------------
+
+This transform is designed to eliminate unreachable internal globals from the
+program.  It uses an aggressive algorithm, searching out globals that are known
+to be alive.  After it finds all of the globals which are needed, it deletes
+whatever is left over.  This allows it to delete recursive chunks of the
+program which are unreachable.
+
+``-globalopt``: Global Variable Optimizer
+-----------------------------------------
+
+This pass transforms simple global variables that never have their address
+taken.  If obviously true, it marks read/write globals as constant, deletes
+variables only stored to, etc.
+
+``-gvn``: Global Value Numbering
+--------------------------------
+
+This pass performs global value numbering to eliminate fully and partially
+redundant instructions.  It also performs redundant load elimination.
+
+.. _passes-indvars:
+
+``-indvars``: Canonicalize Induction Variables
+----------------------------------------------
+
+This transformation analyzes and transforms the induction variables (and
+computations derived from them) into simpler forms suitable for subsequent
+analysis and transformation.
+
+This transformation makes the following changes to each loop with an
+identifiable induction variable:
+
+* All loops are transformed to have a *single* canonical induction variable
+  which starts at zero and steps by one.
+* The canonical induction variable is guaranteed to be the first PHI node in
+  the loop header block.
+* Any pointer arithmetic recurrences are raised to use array subscripts.
+
+If the trip count of a loop is computable, this pass also makes the following
+changes:
+
+* The exit condition for the loop is canonicalized to compare the induction
+  value against the exit value.  This turns loops like:
+
+  .. code-block:: c++
+
+    for (i = 7; i*i < 1000; ++i)
+
+    into
+
+  .. code-block:: c++
+
+    for (i = 0; i != 25; ++i)
+
+* Any use outside of the loop of an expression derived from the indvar is
+  changed to compute the derived value outside of the loop, eliminating the
+  dependence on the exit value of the induction variable.  If the only purpose
+  of the loop is to compute the exit value of some derived expression, this
+  transformation will make the loop dead.
+
+This transformation should be followed by strength reduction after all of the
+desired loop transformations have been performed.  Additionally, on targets
+where it is profitable, the loop could be transformed to count down to zero
+(the "do loop" optimization).
+
+``-inline``: Function Integration/Inlining
+------------------------------------------
+
+Bottom-up inlining of functions into callees.
+
+``-insert-edge-profiling``: Insert instrumentation for edge profiling
+---------------------------------------------------------------------
+
+This pass instruments the specified program with counters for edge profiling.
+Edge profiling can give a reasonable approximation of the hot paths through a
+program, and is used for a wide variety of program transformations.
+
+Note that this implementation is very naïve.  It inserts a counter for *every*
+edge in the program, instead of using control flow information to prune the
+number of counters inserted.
+
+``-insert-optimal-edge-profiling``: Insert optimal instrumentation for edge profiling
+-------------------------------------------------------------------------------------
+
+This pass instruments the specified program with counters for edge profiling.
+Edge profiling can give a reasonable approximation of the hot paths through a
+program, and is used for a wide variety of program transformations.
+
+.. _passes-instcombine:
+
+``-instcombine``: Combine redundant instructions
+------------------------------------------------
+
+Combine instructions to form fewer, simple instructions.  This pass does not
+modify the CFG This pass is where algebraic simplification happens.
+
+This pass combines things like:
+
+.. code-block:: llvm
+
+  %Y = add i32 %X, 1
+  %Z = add i32 %Y, 1
+
+into:
+
+.. code-block:: llvm
+
+  %Z = add i32 %X, 2
+
+This is a simple worklist driven algorithm.
+
+This pass guarantees that the following canonicalizations are performed on the
+program:
+
+#. If a binary operator has a constant operand, it is moved to the right-hand
+   side.
+#. Bitwise operators with constant operands are always grouped so that shifts
+   are performed first, then ``or``\ s, then ``and``\ s, then ``xor``\ s.
+#. Compare instructions are converted from ``<``, ``>``, ``≤``, or ``≥`` to
+   ``=`` or ``≠`` if possible.
+#. All ``cmp`` instructions on boolean values are replaced with logical
+   operations.
+#. ``add X, X`` is represented as ``mul X, 2`` ⇒ ``shl X, 1``
+#. Multiplies with a constant power-of-two argument are transformed into
+   shifts.
+#. … etc.
+
+``-internalize``: Internalize Global Symbols
+--------------------------------------------
+
+This pass loops over all of the functions in the input module, looking for a
+main function.  If a main function is found, all other functions and all global
+variables with initializers are marked as internal.
+
+``-ipconstprop``: Interprocedural constant propagation
+------------------------------------------------------
+
+This pass implements an *extremely* simple interprocedural constant propagation
+pass.  It could certainly be improved in many different ways, like using a
+worklist.  This pass makes arguments dead, but does not remove them.  The
+existing dead argument elimination pass should be run after this to clean up
+the mess.
+
+``-ipsccp``: Interprocedural Sparse Conditional Constant Propagation
+--------------------------------------------------------------------
+
+An interprocedural variant of :ref:`Sparse Conditional Constant Propagation
+<passes-sccp>`.
+
+``-jump-threading``: Jump Threading
+-----------------------------------
+
+Jump threading tries to find distinct threads of control flow running through a
+basic block.  This pass looks at blocks that have multiple predecessors and
+multiple successors.  If one or more of the predecessors of the block can be
+proven to always cause a jump to one of the successors, we forward the edge
+from the predecessor to the successor by duplicating the contents of this
+block.
+
+An example of when this can occur is code like this:
+
+.. code-block:: c++
+
+  if () { ...
+    X = 4;
+  }
+  if (X < 3) {
+
+In this case, the unconditional branch at the end of the first if can be
+revectored to the false side of the second if.
+
+``-lcssa``: Loop-Closed SSA Form Pass
+-------------------------------------
+
+This pass transforms loops by placing phi nodes at the end of the loops for all
+values that are live across the loop boundary.  For example, it turns the left
+into the right code:
+
+.. code-block:: c++
+
+  for (...)                for (...)
+      if (c)                   if (c)
+          X1 = ...                 X1 = ...
+      else                     else
+          X2 = ...                 X2 = ...
+      X3 = phi(X1, X2)         X3 = phi(X1, X2)
+  ... = X3 + 4              X4 = phi(X3)
+                              ... = X4 + 4
+
+This is still valid LLVM; the extra phi nodes are purely redundant, and will be
+trivially eliminated by ``InstCombine``.  The major benefit of this
+transformation is that it makes many other loop optimizations, such as
+``LoopUnswitch``\ ing, simpler.
+
+.. _passes-licm:
+
+``-licm``: Loop Invariant Code Motion
+-------------------------------------
+
+This pass performs loop invariant code motion, attempting to remove as much
+code from the body of a loop as possible.  It does this by either hoisting code
+into the preheader block, or by sinking code to the exit blocks if it is safe.
+This pass also promotes must-aliased memory locations in the loop to live in
+registers, thus hoisting and sinking "invariant" loads and stores.
+
+This pass uses alias analysis for two purposes:
+
+#. Moving loop invariant loads and calls out of loops.  If we can determine
+   that a load or call inside of a loop never aliases anything stored to, we
+   can hoist it or sink it like any other instruction.
+
+#. Scalar Promotion of Memory.  If there is a store instruction inside of the
+   loop, we try to move the store to happen AFTER the loop instead of inside of
+   the loop.  This can only happen if a few conditions are true:
+
+   #. The pointer stored through is loop invariant.
+   #. There are no stores or loads in the loop which *may* alias the pointer.
+      There are no calls in the loop which mod/ref the pointer.
+
+   If these conditions are true, we can promote the loads and stores in the
+   loop of the pointer to use a temporary alloca'd variable.  We then use the
+   :ref:`mem2reg <passes-mem2reg>` functionality to construct the appropriate
+   SSA form for the variable.
+
+``-loop-deletion``: Delete dead loops
+-------------------------------------
+
+This file implements the Dead Loop Deletion Pass.  This pass is responsible for
+eliminating loops with non-infinite computable trip counts that have no side
+effects or volatile instructions, and do not contribute to the computation of
+the function's return value.
+
+.. _passes-loop-extract:
+
+``-loop-extract``: Extract loops into new functions
+---------------------------------------------------
+
+A pass wrapper around the ``ExtractLoop()`` scalar transformation to extract
+each top-level loop into its own new function.  If the loop is the *only* loop
+in a given function, it is not touched.  This is a pass most useful for
+debugging via bugpoint.
+
+``-loop-extract-single``: Extract at most one loop into a new function
+----------------------------------------------------------------------
+
+Similar to :ref:`Extract loops into new functions <passes-loop-extract>`, this
+pass extracts one natural loop from the program into a function if it can.
+This is used by :program:`bugpoint`.
+
+``-loop-reduce``: Loop Strength Reduction
+-----------------------------------------
+
+This pass performs a strength reduction on array references inside loops that
+have as one or more of their components the loop induction variable.  This is
+accomplished by creating a new value to hold the initial value of the array
+access for the first iteration, and then creating a new GEP instruction in the
+loop to increment the value by the appropriate amount.
+
+``-loop-rotate``: Rotate Loops
+------------------------------
+
+A simple loop rotation transformation.
+
+``-loop-simplify``: Canonicalize natural loops
+----------------------------------------------
+
+This pass performs several transformations to transform natural loops into a
+simpler form, which makes subsequent analyses and transformations simpler and
+more effective.
+
+Loop pre-header insertion guarantees that there is a single, non-critical entry
+edge from outside of the loop to the loop header.  This simplifies a number of
+analyses and transformations, such as :ref:`LICM <passes-licm>`.
+
+Loop exit-block insertion guarantees that all exit blocks from the loop (blocks
+which are outside of the loop that have predecessors inside of the loop) only
+have predecessors from inside of the loop (and are thus dominated by the loop
+header).  This simplifies transformations such as store-sinking that are built
+into LICM.
+
+This pass also guarantees that loops will have exactly one backedge.
+
+Note that the :ref:`simplifycfg <passes-simplifycfg>` pass will clean up blocks
+which are split out but end up being unnecessary, so usage of this pass should
+not pessimize generated code.
+
+This pass obviously modifies the CFG, but updates loop information and
+dominator information.
+
+``-loop-unroll``: Unroll loops
+------------------------------
+
+This pass implements a simple loop unroller.  It works best when loops have
+been canonicalized by the :ref:`indvars <passes-indvars>` pass, allowing it to
+determine the trip counts of loops easily.
+
+``-loop-unswitch``: Unswitch loops
+----------------------------------
+
+This pass transforms loops that contain branches on loop-invariant conditions
+to have multiple loops.  For example, it turns the left into the right code:
+
+.. code-block:: c++
+
+  for (...)                  if (lic)
+      A                          for (...)
+      if (lic)                       A; B; C
+          B                  else
+      C                          for (...)
+                                     A; C
+
+This can increase the size of the code exponentially (doubling it every time a
+loop is unswitched) so we only unswitch if the resultant code will be smaller
+than a threshold.
+
+This pass expects :ref:`LICM <passes-licm>` to be run before it to hoist
+invariant conditions out of the loop, to make the unswitching opportunity
+obvious.
+
+``-loweratomic``: Lower atomic intrinsics to non-atomic form
+------------------------------------------------------------
+
+This pass lowers atomic intrinsics to non-atomic form for use in a known
+non-preemptible environment.
+
+The pass does not verify that the environment is non-preemptible (in general
+this would require knowledge of the entire call graph of the program including
+any libraries which may not be available in bitcode form); it simply lowers
+every atomic intrinsic.
+
+``-lowerinvoke``: Lower invoke and unwind, for unwindless code generators
+-------------------------------------------------------------------------
+
+This transformation is designed for use by code generators which do not yet
+support stack unwinding.  This pass supports two models of exception handling
+lowering, the "cheap" support and the "expensive" support.
+
+"Cheap" exception handling support gives the program the ability to execute any
+program which does not "throw an exception", by turning "``invoke``"
+instructions into calls and by turning "``unwind``" instructions into calls to
+``abort()``.  If the program does dynamically use the "``unwind``" instruction,
+the program will print a message then abort.
+
+"Expensive" exception handling support gives the full exception handling
+support to the program at the cost of making the "``invoke``" instruction
+really expensive.  It basically inserts ``setjmp``/``longjmp`` calls to emulate
+the exception handling as necessary.
+
+Because the "expensive" support slows down programs a lot, and EH is only used
+for a subset of the programs, it must be specifically enabled by the
+``-enable-correct-eh-support`` option.
+
+Note that after this pass runs the CFG is not entirely accurate (exceptional
+control flow edges are not correct anymore) so only very simple things should
+be done after the ``lowerinvoke`` pass has run (like generation of native
+code).  This should not be used as a general purpose "my LLVM-to-LLVM pass
+doesn't support the ``invoke`` instruction yet" lowering pass.
+
+``-lowerswitch``: Lower ``SwitchInst``\ s to branches
+-----------------------------------------------------
+
+Rewrites switch instructions with a sequence of branches, which allows targets
+to get away with not implementing the switch instruction until it is
+convenient.
+
+.. _passes-mem2reg:
+
+``-mem2reg``: Promote Memory to Register
+----------------------------------------
+
+This file promotes memory references to be register references.  It promotes
+alloca instructions which only have loads and stores as uses.  An ``alloca`` is
+transformed by using dominator frontiers to place phi nodes, then traversing
+the function in depth-first order to rewrite loads and stores as appropriate.
+This is just the standard SSA construction algorithm to construct "pruned" SSA
+form.
+
+``-memcpyopt``: MemCpy Optimization
+-----------------------------------
+
+This pass performs various transformations related to eliminating ``memcpy``
+calls, or transforming sets of stores into ``memset``\ s.
+
+``-mergefunc``: Merge Functions
+-------------------------------
+
+This pass looks for equivalent functions that are mergable and folds them.
+
+A hash is computed from the function, based on its type and number of basic
+blocks.
+
+Once all hashes are computed, we perform an expensive equality comparison on
+each function pair.  This takes n^2/2 comparisons per bucket, so it's important
+that the hash function be high quality.  The equality comparison iterates
+through each instruction in each basic block.
+
+When a match is found the functions are folded.  If both functions are
+overridable, we move the functionality into a new internal function and leave
+two overridable thunks to it.
+
+``-mergereturn``: Unify function exit nodes
+-------------------------------------------
+
+Ensure that functions have at most one ``ret`` instruction in them.
+Additionally, it keeps track of which node is the new exit node of the CFG.
+
+``-partial-inliner``: Partial Inliner
+-------------------------------------
+
+This pass performs partial inlining, typically by inlining an ``if`` statement
+that surrounds the body of the function.
+
+``-prune-eh``: Remove unused exception handling info
+----------------------------------------------------
+
+This file implements a simple interprocedural pass which walks the call-graph,
+turning invoke instructions into call instructions if and only if the callee
+cannot throw an exception.  It implements this as a bottom-up traversal of the
+call-graph.
+
+``-reassociate``: Reassociate expressions
+-----------------------------------------
+
+This pass reassociates commutative expressions in an order that is designed to
+promote better constant propagation, GCSE, :ref:`LICM <passes-licm>`, PRE, etc.
+
+For example: 4 + (x + 5) ⇒ x + (4 + 5)
+
+In the implementation of this algorithm, constants are assigned rank = 0,
+function arguments are rank = 1, and other values are assigned ranks
+corresponding to the reverse post order traversal of current function (starting
+at 2), which effectively gives values in deep loops higher rank than values not
+in loops.
+
+``-reg2mem``: Demote all values to stack slots
+----------------------------------------------
+
+This file demotes all registers to memory references.  It is intended to be the
+inverse of :ref:`mem2reg <passes-mem2reg>`.  By converting to ``load``
+instructions, the only values live across basic blocks are ``alloca``
+instructions and ``load`` instructions before ``phi`` nodes.  It is intended
+that this should make CFG hacking much easier.  To make later hacking easier,
+the entry block is split into two, such that all introduced ``alloca``
+instructions (and nothing else) are in the entry block.
+
+``-scalarrepl``: Scalar Replacement of Aggregates (DT)
+------------------------------------------------------
+
+The well-known scalar replacement of aggregates transformation.  This transform
+breaks up ``alloca`` instructions of aggregate type (structure or array) into
+individual ``alloca`` instructions for each member if possible.  Then, if
+possible, it transforms the individual ``alloca`` instructions into nice clean
+scalar SSA form.
+
+This combines a simple scalar replacement of aggregates algorithm with the
+:ref:`mem2reg <passes-mem2reg>` algorithm because they often interact,
+especially for C++ programs.  As such, iterating between ``scalarrepl``, then
+:ref:`mem2reg <passes-mem2reg>` until we run out of things to promote works
+well.
+
+.. _passes-sccp:
+
+``-sccp``: Sparse Conditional Constant Propagation
+--------------------------------------------------
+
+Sparse conditional constant propagation and merging, which can be summarized
+as:
+
+* Assumes values are constant unless proven otherwise
+* Assumes BasicBlocks are dead unless proven otherwise
+* Proves values to be constant, and replaces them with constants
+* Proves conditional branches to be unconditional
+
+Note that this pass has a habit of making definitions be dead.  It is a good
+idea to to run a :ref:`DCE <passes-dce>` pass sometime after running this pass.
+
+``-simplify-libcalls``: Simplify well-known library calls
+---------------------------------------------------------
+
+Applies a variety of small optimizations for calls to specific well-known
+function calls (e.g. runtime library functions).  For example, a call
+``exit(3)`` that occurs within the ``main()`` function can be transformed into
+simply ``return 3``.
+
+.. _passes-simplifycfg:
+
+``-simplifycfg``: Simplify the CFG
+----------------------------------
+
+Performs dead code elimination and basic block merging.  Specifically:
+
+* Removes basic blocks with no predecessors.
+* Merges a basic block into its predecessor if there is only one and the
+  predecessor only has one successor.
+* Eliminates PHI nodes for basic blocks with a single predecessor.
+* Eliminates a basic block that only contains an unconditional branch.
+
+``-sink``: Code sinking
+-----------------------
+
+This pass moves instructions into successor blocks, when possible, so that they
+aren't executed on paths where their results aren't needed.
+
+``-strip``: Strip all symbols from a module
+-------------------------------------------
+
+Performs code stripping.  This transformation can delete:
+
+* names for virtual registers
+* symbols for internal globals and functions
+* debug information
+
+Note that this transformation makes code much less readable, so it should only
+be used in situations where the strip utility would be used, such as reducing
+code size or making it harder to reverse engineer code.
+
+``-strip-dead-debug-info``: Strip debug info for unused symbols
+---------------------------------------------------------------
+
+.. FIXME: this description is the same as for -strip
+
+performs code stripping. this transformation can delete:
+
+* names for virtual registers
+* symbols for internal globals and functions
+* debug information
+
+note that this transformation makes code much less readable, so it should only
+be used in situations where the strip utility would be used, such as reducing
+code size or making it harder to reverse engineer code.
+
+``-strip-dead-prototypes``: Strip Unused Function Prototypes
+------------------------------------------------------------
+
+This pass loops over all of the functions in the input module, looking for dead
+declarations and removes them.  Dead declarations are declarations of functions
+for which no implementation is available (i.e., declarations for unused library
+functions).
+
+``-strip-debug-declare``: Strip all ``llvm.dbg.declare`` intrinsics
+-------------------------------------------------------------------
+
+.. FIXME: this description is the same as for -strip
+
+This pass implements code stripping.  Specifically, it can delete:
+
+#. names for virtual registers
+#. symbols for internal globals and functions
+#. debug information
+
+Note that this transformation makes code much less readable, so it should only
+be used in situations where the 'strip' utility would be used, such as reducing
+code size or making it harder to reverse engineer code.
+
+``-strip-nondebug``: Strip all symbols, except dbg symbols, from a module
+-------------------------------------------------------------------------
+
+.. FIXME: this description is the same as for -strip
+
+This pass implements code stripping.  Specifically, it can delete:
+
+#. names for virtual registers
+#. symbols for internal globals and functions
+#. debug information
+
+Note that this transformation makes code much less readable, so it should only
+be used in situations where the 'strip' utility would be used, such as reducing
+code size or making it harder to reverse engineer code.
+
+``-tailcallelim``: Tail Call Elimination
+----------------------------------------
+
+This file transforms calls of the current function (self recursion) followed by
+a return instruction with a branch to the entry of the function, creating a
+loop.  This pass also implements the following extensions to the basic
+algorithm:
+
+#. Trivial instructions between the call and return do not prevent the
+   transformation from taking place, though currently the analysis cannot
+   support moving any really useful instructions (only dead ones).
+#. This pass transforms functions that are prevented from being tail recursive
+   by an associative expression to use an accumulator variable, thus compiling
+   the typical naive factorial or fib implementation into efficient code.
+#. TRE is performed if the function returns void, if the return returns the
+   result returned by the call, or if the function returns a run-time constant
+   on all exits from the function.  It is possible, though unlikely, that the
+   return returns something else (like constant 0), and can still be TRE'd.  It
+   can be TRE'd if *all other* return instructions in the function return the
+   exact same value.
+#. If it can prove that callees do not access theier caller stack frame, they
+   are marked as eligible for tail call elimination (by the code generator).
+
+Utility Passes
+==============
+
+This section describes the LLVM Utility Passes.
+
+``-deadarghaX0r``: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)
+------------------------------------------------------------------------
+
+Same as dead argument elimination, but deletes arguments to functions which are
+external.  This is only for use by :doc:`bugpoint <Bugpoint>`.
+
+``-extract-blocks``: Extract Basic Blocks From Module (for bugpoint use)
+------------------------------------------------------------------------
+
+This pass is used by bugpoint to extract all blocks from the module into their
+own functions.
+
+``-instnamer``: Assign names to anonymous instructions
+------------------------------------------------------
+
+This is a little utility pass that gives instructions names, this is mostly
+useful when diffing the effect of an optimization because deleting an unnamed
+instruction can change all other instruction numbering, making the diff very
+noisy.
+
+``-preverify``: Preliminary module verification
+-----------------------------------------------
+
+Ensures that the module is in the form required by the :ref:`Module Verifier
+<passes-verify>` pass.  Running the verifier runs this pass automatically, so
+there should be no need to use it directly.
+
+.. _passes-verify:
+
+``-verify``: Module Verifier
+----------------------------
+
+Verifies an LLVM IR code.  This is useful to run after an optimization which is
+undergoing testing.  Note that llvm-as verifies its input before emitting
+bitcode, and also that malformed bitcode is likely to make LLVM crash.  All
+language front-ends are therefore encouraged to verify their output before
+performing optimizing transformations.
+
+#. Both of a binary operator's parameters are of the same type.
+#. Verify that the indices of mem access instructions match other operands.
+#. Verify that arithmetic and other things are only performed on first-class
+   types.  Verify that shifts and logicals only happen on integrals f.e.
+#. All of the constants in a switch statement are of the correct type.
+#. The code is in valid SSA form.
+#. It is illegal to put a label into any other type (like a structure) or to
+   return one.
+#. Only phi nodes can be self referential: ``%x = add i32 %x``, ``%x`` is
+   invalid.
+#. PHI nodes must have an entry for each predecessor, with no extras.
+#. PHI nodes must be the first thing in a basic block, all grouped together.
+#. PHI nodes must have at least one entry.
+#. All basic blocks should only end with terminator insts, not contain them.
+#. The entry node to a function must not have predecessors.
+#. All Instructions must be embedded into a basic block.
+#. Functions cannot take a void-typed parameter.
+#. Verify that a function's argument list agrees with its declared type.
+#. It is illegal to specify a name for a void value.
+#. It is illegal to have an internal global value with no initializer.
+#. It is illegal to have a ``ret`` instruction that returns a value that does
+   not agree with the function return value type.
+#. Function call argument types match the function prototype.
+#. All other things that are tested by asserts spread about the code.
+
+Note that this does not provide full security verification (like Java), but
+instead just tries to ensure that code is well-formed.
+
+``-view-cfg``: View CFG of function
+-----------------------------------
+
+Displays the control flow graph using the GraphViz tool.
+
+``-view-cfg-only``: View CFG of function (with no function bodies)
+------------------------------------------------------------------
+
+Displays the control flow graph using the GraphViz tool, but omitting function
+bodies.
+
+``-view-dom``: View dominance tree of function
+----------------------------------------------
+
+Displays the dominator tree using the GraphViz tool.
+
+``-view-dom-only``: View dominance tree of function (with no function bodies)
+-----------------------------------------------------------------------------
+
+Displays the dominator tree using the GraphViz tool, but omitting function
+bodies.
+
+``-view-postdom``: View postdominance tree of function
+------------------------------------------------------
+
+Displays the post dominator tree using the GraphViz tool.
+
+``-view-postdom-only``: View postdominance tree of function (with no function bodies)
+-------------------------------------------------------------------------------------
+
+Displays the post dominator tree using the GraphViz tool, but omitting function
+bodies.
+
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index b45449793e0a..efab10cd13a5 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -88,6 +88,12 @@ diffs between different versions of the patch as it was reviewed in the
 *Revision Update History*. Most features are self descriptive - explore, and
 if you have a question, drop by on #llvm in IRC to get help.
 
+Note that as e-mail is the system of reference for code reviews, and some
+people prefer it over a web interface, we do not generate automated mail
+when a review changes state, for example by clicking "Accept Revision" in
+the web interface. Thus, please type LGTM into the comment box to accept
+a change from Phabricator.
+
 Status
 ------
 
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
deleted file mode 100644
index 7c2e6c8aad92..000000000000
--- a/docs/ProgrammersManual.html
+++ /dev/null
@@ -1,4156 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
-  <title>LLVM Programmer's Manual</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
-  LLVM Programmer's Manual
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#general">General Information</a>
-    <ul>
-      <li><a href="#stl">The C++ Standard Template Library</a></li>
-<!--
-      <li>The <tt>-time-passes</tt> option</li>
-      <li>How to use the LLVM Makefile system</li>
-      <li>How to write a regression test</li>
-
---> 
-    </ul>
-  </li>
-  <li><a href="#apis">Important and useful LLVM APIs</a>
-    <ul>
-      <li><a href="#isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt>
-and <tt>dyn_cast&lt;&gt;</tt> templates</a> </li>
-      <li><a href="#string_apis">Passing strings (the <tt>StringRef</tt>
-and <tt>Twine</tt> classes)</a>
-        <ul>
-          <li><a href="#StringRef">The <tt>StringRef</tt> class</a> </li>
-          <li><a href="#Twine">The <tt>Twine</tt> class</a> </li>
-        </ul>
-      </li>
-      <li><a href="#DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt>
-option</a>
-        <ul>
-          <li><a href="#DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt>
-and the <tt>-debug-only</tt> option</a> </li>
-        </ul>
-      </li>
-      <li><a href="#Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
-option</a></li>
-<!--
-      <li>The <tt>InstVisitor</tt> template
-      <li>The general graph API
---> 
-      <li><a href="#ViewGraph">Viewing graphs while debugging code</a></li>
-    </ul>
-  </li>
-  <li><a href="#datastructure">Picking the Right Data Structure for a Task</a>
-    <ul>
-    <li><a href="#ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
-    <ul>
-      <li><a href="#dss_arrayref">llvm/ADT/ArrayRef.h</a></li>
-      <li><a href="#dss_fixedarrays">Fixed Size Arrays</a></li>
-      <li><a href="#dss_heaparrays">Heap Allocated Arrays</a></li>
-      <li><a href="#dss_tinyptrvector">"llvm/ADT/TinyPtrVector.h"</a></li>
-      <li><a href="#dss_smallvector">"llvm/ADT/SmallVector.h"</a></li>
-      <li><a href="#dss_vector">&lt;vector&gt;</a></li>
-      <li><a href="#dss_deque">&lt;deque&gt;</a></li>
-      <li><a href="#dss_list">&lt;list&gt;</a></li>
-      <li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
-      <li><a href="#dss_packedvector">llvm/ADT/PackedVector.h</a></li>
-      <li><a href="#dss_other">Other Sequential Container Options</a></li>
-    </ul></li>
-    <li><a href="#ds_string">String-like containers</a>
-    <ul>
-      <li><a href="#dss_stringref">llvm/ADT/StringRef.h</a></li>
-      <li><a href="#dss_twine">llvm/ADT/Twine.h</a></li>
-      <li><a href="#dss_smallstring">llvm/ADT/SmallString.h</a></li>
-      <li><a href="#dss_stdstring">std::string</a></li>
-    </ul></li>
-    <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
-    <ul>
-      <li><a href="#dss_sortedvectorset">A sorted 'vector'</a></li>
-      <li><a href="#dss_smallset">"llvm/ADT/SmallSet.h"</a></li>
-      <li><a href="#dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a></li>
-      <li><a href="#dss_denseset">"llvm/ADT/DenseSet.h"</a></li>
-      <li><a href="#dss_sparseset">"llvm/ADT/SparseSet.h"</a></li>
-      <li><a href="#dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a></li>
-      <li><a href="#dss_set">&lt;set&gt;</a></li>
-      <li><a href="#dss_setvector">"llvm/ADT/SetVector.h"</a></li>
-      <li><a href="#dss_uniquevector">"llvm/ADT/UniqueVector.h"</a></li>
-      <li><a href="#dss_immutableset">"llvm/ADT/ImmutableSet.h"</a></li>
-      <li><a href="#dss_otherset">Other Set-Like Container Options</a></li>
-    </ul></li>
-    <li><a href="#ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
-    <ul>
-      <li><a href="#dss_sortedvectormap">A sorted 'vector'</a></li>
-      <li><a href="#dss_stringmap">"llvm/ADT/StringMap.h"</a></li>
-      <li><a href="#dss_indexedmap">"llvm/ADT/IndexedMap.h"</a></li>
-      <li><a href="#dss_densemap">"llvm/ADT/DenseMap.h"</a></li>
-      <li><a href="#dss_valuemap">"llvm/ADT/ValueMap.h"</a></li>
-      <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
-      <li><a href="#dss_map">&lt;map&gt;</a></li>
-      <li><a href="#dss_mapvector">"llvm/ADT/MapVector.h"</a></li>
-      <li><a href="#dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a></li>
-      <li><a href="#dss_immutablemap">"llvm/ADT/ImmutableMap.h"</a></li>
-      <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
-    </ul></li>
-    <li><a href="#ds_bit">BitVector-like containers</a>
-    <ul>
-      <li><a href="#dss_bitvector">A dense bitvector</a></li>
-      <li><a href="#dss_smallbitvector">A "small" dense bitvector</a></li>
-      <li><a href="#dss_sparsebitvector">A sparse bitvector</a></li>
-    </ul></li>
-  </ul>
-  </li>
-  <li><a href="#common">Helpful Hints for Common Operations</a>
-    <ul>
-      <li><a href="#inspection">Basic Inspection and Traversal Routines</a>
-        <ul>
-          <li><a href="#iterate_function">Iterating over the <tt>BasicBlock</tt>s
-in a <tt>Function</tt></a> </li>
-          <li><a href="#iterate_basicblock">Iterating over the <tt>Instruction</tt>s
-in a <tt>BasicBlock</tt></a> </li>
-          <li><a href="#iterate_institer">Iterating over the <tt>Instruction</tt>s
-in a <tt>Function</tt></a> </li>
-          <li><a href="#iterate_convert">Turning an iterator into a
-class pointer</a> </li>
-          <li><a href="#iterate_complex">Finding call sites: a more
-complex example</a> </li>
-          <li><a href="#calls_and_invokes">Treating calls and invokes
-the same way</a> </li>
-          <li><a href="#iterate_chains">Iterating over def-use &amp;
-use-def chains</a> </li>
-          <li><a href="#iterate_preds">Iterating over predecessors &amp;
-successors of blocks</a></li>
-        </ul>
-      </li>
-      <li><a href="#simplechanges">Making simple changes</a>
-        <ul>
-          <li><a href="#schanges_creating">Creating and inserting new
-		 <tt>Instruction</tt>s</a> </li>
-          <li><a href="#schanges_deleting">Deleting 		 <tt>Instruction</tt>s</a> </li>
-          <li><a href="#schanges_replacing">Replacing an 		 <tt>Instruction</tt>
-with another <tt>Value</tt></a> </li>
-          <li><a href="#schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a> </li>  
-        </ul>
-      </li>
-      <li><a href="#create_types">How to Create Types</a></li>
-<!--
-    <li>Working with the Control Flow Graph
-    <ul>
-      <li>Accessing predecessors and successors of a <tt>BasicBlock</tt>
-      <li>
-      <li>
-    </ul>
---> 
-    </ul>
-  </li>
-
-  <li><a href="#threading">Threads and LLVM</a>
-  <ul>
-    <li><a href="#startmultithreaded">Entering and Exiting Multithreaded Mode
-        </a></li>
-    <li><a href="#shutdown">Ending execution with <tt>llvm_shutdown()</tt></a></li>
-    <li><a href="#managedstatic">Lazy initialization with <tt>ManagedStatic</tt></a></li>
-    <li><a href="#llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a></li>
-    <li><a href="#jitthreading">Threads and the JIT</a></li>
-  </ul>
-  </li>
-
-  <li><a href="#advanced">Advanced Topics</a>
-  <ul>
-
-  <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> class</a></li>
-  <li><a href="#UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a></li>
-  </ul></li>
-
-  <li><a href="#coreclasses">The Core LLVM Class Hierarchy Reference</a>
-    <ul>
-      <li><a href="#Type">The <tt>Type</tt> class</a> </li>
-      <li><a href="#Module">The <tt>Module</tt> class</a></li>
-      <li><a href="#Value">The <tt>Value</tt> class</a>
-      <ul>
-        <li><a href="#User">The <tt>User</tt> class</a>
-        <ul>
-          <li><a href="#Instruction">The <tt>Instruction</tt> class</a></li>
-          <li><a href="#Constant">The <tt>Constant</tt> class</a>
-          <ul>
-            <li><a href="#GlobalValue">The <tt>GlobalValue</tt> class</a>
-            <ul>
-              <li><a href="#Function">The <tt>Function</tt> class</a></li>
-              <li><a href="#GlobalVariable">The <tt>GlobalVariable</tt> class</a></li>
-            </ul>
-            </li>
-          </ul>
-          </li>
-        </ul>
-        </li>
-        <li><a href="#BasicBlock">The <tt>BasicBlock</tt> class</a></li>
-        <li><a href="#Argument">The <tt>Argument</tt> class</a></li>
-      </ul>
-      </li>
-    </ul>
-  </li>
-</ol>
-
-<div class="doc_author">    
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>, 
-                <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a>, 
-                <a href="mailto:ggreif@gmail.com">Gabor Greif</a>, 
-                <a href="mailto:jstanley@cs.uiuc.edu">Joel Stanley</a>,
-                <a href="mailto:rspencer@x10sys.com">Reid Spencer</a> and
-                <a href="mailto:owen@apple.com">Owen Anderson</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction </a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document is meant to highlight some of the important classes and
-interfaces available in the LLVM source-base.  This manual is not
-intended to explain what LLVM is, how it works, and what LLVM code looks
-like.  It assumes that you know the basics of LLVM and are interested
-in writing transformations or otherwise analyzing or manipulating the
-code.</p>
-
-<p>This document should get you oriented so that you can find your
-way in the continuously growing source code that makes up the LLVM
-infrastructure. Note that this manual is not intended to serve as a
-replacement for reading the source code, so if you think there should be
-a method in one of these classes to do something, but it's not listed,
-check the source.  Links to the <a href="/doxygen/">doxygen</a> sources
-are provided to make this as easy as possible.</p>
-
-<p>The first section of this document describes general information that is
-useful to know when working in the LLVM infrastructure, and the second describes
-the Core LLVM classes.  In the future this manual will be extended with
-information describing how to use extension libraries, such as dominator
-information, CFG traversal routines, and useful utilities like the <tt><a
-href="/doxygen/InstVisitor_8h-source.html">InstVisitor</a></tt> template.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="general">General Information</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section contains general information that is useful if you are working
-in the LLVM source-base, but that isn't specific to any particular API.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="stl">The C++ Standard Template Library</a>
-</h3>
-
-<div>
-
-<p>LLVM makes heavy use of the C++ Standard Template Library (STL),
-perhaps much more than you are used to, or have seen before.  Because of
-this, you might want to do a little background reading in the
-techniques used and capabilities of the library.  There are many good
-pages that discuss the STL, and several books on the subject that you
-can get, so it will not be discussed in this document.</p>
-
-<p>Here are some useful links:</p>
-
-<ol>
-
-<li><a href="http://www.dinkumware.com/manuals/#Standard C++ Library">Dinkumware
-C++ Library reference</a> - an excellent reference for the STL and other parts
-of the standard C++ library.</li>
-
-<li><a href="http://www.tempest-sw.com/cpp/">C++ In a Nutshell</a> - This is an
-O'Reilly book in the making.  It has a decent Standard Library
-Reference that rivals Dinkumware's, and is unfortunately no longer free since the
-book has been published.</li>
-
-<li><a href="http://www.parashift.com/c++-faq-lite/">C++ Frequently Asked
-Questions</a></li>
-
-<li><a href="http://www.sgi.com/tech/stl/">SGI's STL Programmer's Guide</a> -
-Contains a useful <a
-href="http://www.sgi.com/tech/stl/stl_introduction.html">Introduction to the
-STL</a>.</li>
-
-<li><a href="http://www.research.att.com/%7Ebs/C++.html">Bjarne Stroustrup's C++
-Page</a></li>
-
-<li><a href="http://64.78.49.204/">
-Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 (even better, get
-the book).</a></li>
-
-</ol>
-  
-<p>You are also encouraged to take a look at the <a
-href="CodingStandards.html">LLVM Coding Standards</a> guide which focuses on how
-to write maintainable code more than where to put your curly braces.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="stl">Other useful references</a>
-</h3>
-
-<div>
-
-<ol>
-<li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
-static and shared libraries across platforms</a></li>
-</ol>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="apis">Important and useful LLVM APIs</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Here we highlight some LLVM APIs that are generally useful and good to
-know about when writing transformations.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt> and
-  <tt>dyn_cast&lt;&gt;</tt> templates</a>
-</h3>
-
-<div>
-
-<p>The LLVM source-base makes extensive use of a custom form of RTTI.
-These templates have many similarities to the C++ <tt>dynamic_cast&lt;&gt;</tt>
-operator, but they don't have some drawbacks (primarily stemming from
-the fact that <tt>dynamic_cast&lt;&gt;</tt> only works on classes that
-have a v-table). Because they are used so often, you must know what they
-do and how they work. All of these templates are defined in the <a
- href="/doxygen/Casting_8h-source.html"><tt>llvm/Support/Casting.h</tt></a>
-file (note that you very rarely have to include this file directly).</p>
-
-<dl>
-  <dt><tt>isa&lt;&gt;</tt>: </dt>
-
-  <dd><p>The <tt>isa&lt;&gt;</tt> operator works exactly like the Java
-  "<tt>instanceof</tt>" operator.  It returns true or false depending on whether
-  a reference or pointer points to an instance of the specified class.  This can
-  be very useful for constraint checking of various sorts (example below).</p>
-  </dd>
-
-  <dt><tt>cast&lt;&gt;</tt>: </dt>
-
-  <dd><p>The <tt>cast&lt;&gt;</tt> operator is a "checked cast" operation. It
-  converts a pointer or reference from a base class to a derived class, causing
-  an assertion failure if it is not really an instance of the right type.  This
-  should be used in cases where you have some information that makes you believe
-  that something is of the right type.  An example of the <tt>isa&lt;&gt;</tt>
-  and <tt>cast&lt;&gt;</tt> template is:</p>
-
-<div class="doc_code">
-<pre>
-static bool isLoopInvariant(const <a href="#Value">Value</a> *V, const Loop *L) {
-  if (isa&lt;<a href="#Constant">Constant</a>&gt;(V) || isa&lt;<a href="#Argument">Argument</a>&gt;(V) || isa&lt;<a href="#GlobalValue">GlobalValue</a>&gt;(V))
-    return true;
-
-  // <i>Otherwise, it must be an instruction...</i>
-  return !L-&gt;contains(cast&lt;<a href="#Instruction">Instruction</a>&gt;(V)-&gt;getParent());
-}
-</pre>
-</div>
-
-  <p>Note that you should <b>not</b> use an <tt>isa&lt;&gt;</tt> test followed
-  by a <tt>cast&lt;&gt;</tt>, for that use the <tt>dyn_cast&lt;&gt;</tt>
-  operator.</p>
-
-  </dd>
-
-  <dt><tt>dyn_cast&lt;&gt;</tt>:</dt>
-
-  <dd><p>The <tt>dyn_cast&lt;&gt;</tt> operator is a "checking cast" operation.
-  It checks to see if the operand is of the specified type, and if so, returns a
-  pointer to it (this operator does not work with references). If the operand is
-  not of the correct type, a null pointer is returned.  Thus, this works very
-  much like the <tt>dynamic_cast&lt;&gt;</tt> operator in C++, and should be
-  used in the same circumstances.  Typically, the <tt>dyn_cast&lt;&gt;</tt>
-  operator is used in an <tt>if</tt> statement or some other flow control
-  statement like this:</p>
-
-<div class="doc_code">
-<pre>
-if (<a href="#AllocationInst">AllocationInst</a> *AI = dyn_cast&lt;<a href="#AllocationInst">AllocationInst</a>&gt;(Val)) {
-  // <i>...</i>
-}
-</pre>
-</div>
-   
-  <p>This form of the <tt>if</tt> statement effectively combines together a call
-  to <tt>isa&lt;&gt;</tt> and a call to <tt>cast&lt;&gt;</tt> into one
-  statement, which is very convenient.</p>
-
-  <p>Note that the <tt>dyn_cast&lt;&gt;</tt> operator, like C++'s
-  <tt>dynamic_cast&lt;&gt;</tt> or Java's <tt>instanceof</tt> operator, can be
-  abused.  In particular, you should not use big chained <tt>if/then/else</tt>
-  blocks to check for lots of different variants of classes.  If you find
-  yourself wanting to do this, it is much cleaner and more efficient to use the
-  <tt>InstVisitor</tt> class to dispatch over the instruction type directly.</p>
-
-  </dd>
-
-  <dt><tt>cast_or_null&lt;&gt;</tt>: </dt>
-  
-  <dd><p>The <tt>cast_or_null&lt;&gt;</tt> operator works just like the
-  <tt>cast&lt;&gt;</tt> operator, except that it allows for a null pointer as an
-  argument (which it then propagates).  This can sometimes be useful, allowing
-  you to combine several null checks into one.</p></dd>
-
-  <dt><tt>dyn_cast_or_null&lt;&gt;</tt>: </dt>
-
-  <dd><p>The <tt>dyn_cast_or_null&lt;&gt;</tt> operator works just like the
-  <tt>dyn_cast&lt;&gt;</tt> operator, except that it allows for a null pointer
-  as an argument (which it then propagates).  This can sometimes be useful,
-  allowing you to combine several null checks into one.</p></dd>
-
-</dl>
-
-<p>These five templates can be used with any classes, whether they have a
-v-table or not. If you want to add support for these templates, see the
-document <a href="HowToSetUpLLVMStyleRTTI.html">How to set up LLVM-style
-RTTI for your class hierarchy </a>.
-</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="string_apis">Passing strings (the <tt>StringRef</tt>
-and <tt>Twine</tt> classes)</a>
-</h3>
-
-<div>
-
-<p>Although LLVM generally does not do much string manipulation, we do have
-several important APIs which take strings.  Two important examples are the
-Value class -- which has names for instructions, functions, etc. -- and the
-StringMap class which is used extensively in LLVM and Clang.</p>
-
-<p>These are generic classes, and they need to be able to accept strings which
-may have embedded null characters.  Therefore, they cannot simply take
-a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
-clients to perform a heap allocation which is usually unnecessary.  Instead,
-many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
-passing strings efficiently.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="StringRef">The <tt>StringRef</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>StringRef</tt> data type represents a reference to a constant string
-(a character array and a length) and supports the common operations available
-on <tt>std:string</tt>, but does not require heap allocation.</p>
-
-<p>It can be implicitly constructed using a C style null-terminated string,
-an <tt>std::string</tt>, or explicitly with a character pointer and length.
-For example, the <tt>StringRef</tt> find function is declared as:</p>
-
-<pre class="doc_code">
-  iterator find(StringRef Key);
-</pre>
-
-<p>and clients can call it using any one of:</p>
-
-<pre class="doc_code">
-  Map.find("foo");                 <i>// Lookup "foo"</i>
-  Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
-  Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
-</pre>
-
-<p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
-instance, which can be used directly or converted to an <tt>std::string</tt>
-using the <tt>str</tt> member function.  See 
-"<tt><a href="/doxygen/classllvm_1_1StringRef_8h-source.html">llvm/ADT/StringRef.h</a></tt>"
-for more information.</p>
-
-<p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
-pointers to external memory it is not generally safe to store an instance of the
-class (unless you know that the external storage will not be freed). StringRef is
-small and pervasive enough in LLVM that it should always be passed by value.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="Twine">The <tt>Twine</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt><a href="/doxygen/classllvm_1_1Twine.html">Twine</a></tt> class is an
-efficient way for APIs to accept concatenated strings.  For example, a common
-LLVM paradigm is to name one instruction based on
-the name of another instruction with a suffix, for example:</p>
-
-<div class="doc_code">
-<pre>
-    New = CmpInst::Create(<i>...</i>, SO->getName() + ".cmp");
-</pre>
-</div>
-
-<p>The <tt>Twine</tt> class is effectively a lightweight
-<a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
-which points to temporary (stack allocated) objects.  Twines can be implicitly
-constructed as the result of the plus operator applied to strings (i.e., a C
-strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays
-the actual concatenation of strings until it is actually required, at which
-point it can be efficiently rendered directly into a character array.  This
-avoids unnecessary heap allocation involved in constructing the temporary
-results of string concatenation. See
-"<tt><a href="/doxygen/Twine_8h_source.html">llvm/ADT/Twine.h</a></tt>"
-and <a href="#dss_twine">here</a> for more information.</p>
-
-<p>As with a <tt>StringRef</tt>, <tt>Twine</tt> objects point to external memory
-and should almost never be stored or mentioned directly.  They are intended
-solely for use when defining a function which should be able to efficiently
-accept concatenated strings.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
-</h3>
-
-<div>
-
-<p>Often when working on your pass you will put a bunch of debugging printouts
-and other code into your pass.  After you get it working, you want to remove
-it, but you may need it again in the future (to work out new bugs that you run
-across).</p>
-
-<p> Naturally, because of this, you don't want to delete the debug printouts,
-but you don't want them to always be noisy.  A standard compromise is to comment
-them out, allowing you to enable them if you need them in the future.</p>
-
-<p>The "<tt><a href="/doxygen/Debug_8h-source.html">llvm/Support/Debug.h</a></tt>"
-file provides a macro named <tt>DEBUG()</tt> that is a much nicer solution to
-this problem.  Basically, you can put arbitrary code into the argument of the
-<tt>DEBUG</tt> macro, and it is only executed if '<tt>opt</tt>' (or any other
-tool) is run with the '<tt>-debug</tt>' command line argument:</p>
-
-<div class="doc_code">
-<pre>
-DEBUG(errs() &lt;&lt; "I am here!\n");
-</pre>
-</div>
-
-<p>Then you can run your pass like this:</p>
-
-<div class="doc_code">
-<pre>
-$ opt &lt; a.bc &gt; /dev/null -mypass
-<i>&lt;no output&gt;</i>
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug
-I am here!
-</pre>
-</div>
-
-<p>Using the <tt>DEBUG()</tt> macro instead of a home-brewed solution allows you
-to not have to create "yet another" command line option for the debug output for
-your pass.  Note that <tt>DEBUG()</tt> macros are disabled for optimized builds,
-so they do not cause a performance impact at all (for the same reason, they
-should also not contain side-effects!).</p>
-
-<p>One additional nice thing about the <tt>DEBUG()</tt> macro is that you can
-enable or disable it directly in gdb.  Just use "<tt>set DebugFlag=0</tt>" or
-"<tt>set DebugFlag=1</tt>" from the gdb if the program is running.  If the
-program hasn't been started yet, you can always just run it with
-<tt>-debug</tt>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt> and
-  the <tt>-debug-only</tt> option</a>
-</h4>
-
-<div>
-
-<p>Sometimes you may find yourself in a situation where enabling <tt>-debug</tt>
-just turns on <b>too much</b> information (such as when working on the code
-generator).  If you want to enable debug information with more fine-grained
-control, you define the <tt>DEBUG_TYPE</tt> macro and the <tt>-debug</tt> only
-option as follows:</p>
-
-<div class="doc_code">
-<pre>
-#undef  DEBUG_TYPE
-DEBUG(errs() &lt;&lt; "No debug type\n");
-#define DEBUG_TYPE "foo"
-DEBUG(errs() &lt;&lt; "'foo' debug type\n");
-#undef  DEBUG_TYPE
-#define DEBUG_TYPE "bar"
-DEBUG(errs() &lt;&lt; "'bar' debug type\n"));
-#undef  DEBUG_TYPE
-#define DEBUG_TYPE ""
-DEBUG(errs() &lt;&lt; "No debug type (2)\n");
-</pre>
-</div>
-
-<p>Then you can run your pass like this:</p>
-
-<div class="doc_code">
-<pre>
-$ opt &lt; a.bc &gt; /dev/null -mypass
-<i>&lt;no output&gt;</i>
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug
-No debug type
-'foo' debug type
-'bar' debug type
-No debug type (2)
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=foo
-'foo' debug type
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=bar
-'bar' debug type
-</pre>
-</div>
-
-<p>Of course, in practice, you should only set <tt>DEBUG_TYPE</tt> at the top of
-a file, to specify the debug type for the entire module (if you do this before
-you <tt>#include "llvm/Support/Debug.h"</tt>, you don't have to insert the ugly
-<tt>#undef</tt>'s).  Also, you should use names more meaningful than "foo" and
-"bar", because there is no system in place to ensure that names do not
-conflict. If two different modules use the same string, they will all be turned
-on when the name is specified. This allows, for example, all debug information
-for instruction scheduling to be enabled with <tt>-debug-type=InstrSched</tt>,
-even if the source lives in multiple files.</p>
-
-<p>The <tt>DEBUG_WITH_TYPE</tt> macro is also available for situations where you
-would like to set <tt>DEBUG_TYPE</tt>, but only for one specific <tt>DEBUG</tt>
-statement. It takes an additional first parameter, which is the type to use. For
-example, the preceding example could be written as:</p>
-
-
-<div class="doc_code">
-<pre>
-DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type\n");
-DEBUG_WITH_TYPE("foo", errs() &lt;&lt; "'foo' debug type\n");
-DEBUG_WITH_TYPE("bar", errs() &lt;&lt; "'bar' debug type\n"));
-DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type (2)\n");
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
-  option</a>
-</h3>
-
-<div>
-
-<p>The "<tt><a
-href="/doxygen/Statistic_8h-source.html">llvm/ADT/Statistic.h</a></tt>" file
-provides a class named <tt>Statistic</tt> that is used as a unified way to
-keep track of what the LLVM compiler is doing and how effective various
-optimizations are.  It is useful to see what optimizations are contributing to
-making a particular program run faster.</p>
-
-<p>Often you may run your pass on some big program, and you're interested to see
-how many times it makes a certain transformation.  Although you can do this with
-hand inspection, or some ad-hoc method, this is a real pain and not very useful
-for big programs.  Using the <tt>Statistic</tt> class makes it very easy to
-keep track of this information, and the calculated information is presented in a
-uniform manner with the rest of the passes being executed.</p>
-
-<p>There are many examples of <tt>Statistic</tt> uses, but the basics of using
-it are as follows:</p>
-
-<ol>
-    <li><p>Define your statistic like this:</p>
-
-<div class="doc_code">
-<pre>
-#define <a href="#DEBUG_TYPE">DEBUG_TYPE</a> "mypassname"   <i>// This goes before any #includes.</i>
-STATISTIC(NumXForms, "The # of times I did stuff");
-</pre>
-</div>
-
-  <p>The <tt>STATISTIC</tt> macro defines a static variable, whose name is
-    specified by the first argument.  The pass name is taken from the DEBUG_TYPE
-    macro, and the description is taken from the second argument.  The variable
-    defined ("NumXForms" in this case) acts like an unsigned integer.</p></li>
-
-    <li><p>Whenever you make a transformation, bump the counter:</p>
-
-<div class="doc_code">
-<pre>
-++NumXForms;   // <i>I did stuff!</i>
-</pre>
-</div>
-
-    </li>
-  </ol>
-
-  <p>That's all you have to do.  To get '<tt>opt</tt>' to print out the
-  statistics gathered, use the '<tt>-stats</tt>' option:</p>
-
-<div class="doc_code">
-<pre>
-$ opt -stats -mypassname &lt; program.bc &gt; /dev/null
-<i>... statistics output ...</i>
-</pre>
-</div>
-
-  <p> When running <tt>opt</tt> on a C file from the SPEC benchmark
-suite, it gives a report that looks like this:</p>
-
-<div class="doc_code">
-<pre>
-   7646 bitcodewriter   - Number of normal instructions
-    725 bitcodewriter   - Number of oversized instructions
- 129996 bitcodewriter   - Number of bitcode bytes written
-   2817 raise           - Number of insts DCEd or constprop'd
-   3213 raise           - Number of cast-of-self removed
-   5046 raise           - Number of expression trees converted
-     75 raise           - Number of other getelementptr's formed
-    138 raise           - Number of load/store peepholes
-     42 deadtypeelim    - Number of unused typenames removed from symtab
-    392 funcresolve     - Number of varargs functions resolved
-     27 globaldce       - Number of global variables removed
-      2 adce            - Number of basic blocks removed
-    134 cee             - Number of branches revectored
-     49 cee             - Number of setcc instruction eliminated
-    532 gcse            - Number of loads removed
-   2919 gcse            - Number of instructions removed
-     86 indvars         - Number of canonical indvars added
-     87 indvars         - Number of aux indvars removed
-     25 instcombine     - Number of dead inst eliminate
-    434 instcombine     - Number of insts combined
-    248 licm            - Number of load insts hoisted
-   1298 licm            - Number of insts hoisted to a loop pre-header
-      3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
-     75 mem2reg         - Number of alloca's promoted
-   1444 cfgsimplify     - Number of blocks simplified
-</pre>
-</div>
-
-<p>Obviously, with so many optimizations, having a unified framework for this
-stuff is very nice.  Making your pass fit well into the framework makes it more
-maintainable and useful.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ViewGraph">Viewing graphs while debugging code</a>
-</h3>
-
-<div>
-
-<p>Several of the important data structures in LLVM are graphs: for example
-CFGs made out of LLVM <a href="#BasicBlock">BasicBlock</a>s, CFGs made out of
-LLVM <a href="CodeGenerator.html#machinebasicblock">MachineBasicBlock</a>s, and
-<a href="CodeGenerator.html#selectiondag_intro">Instruction Selection
-DAGs</a>.  In many cases, while debugging various parts of the compiler, it is
-nice to instantly visualize these graphs.</p>
-
-<p>LLVM provides several callbacks that are available in a debug build to do
-exactly that.  If you call the <tt>Function::viewCFG()</tt> method, for example,
-the current LLVM tool will pop up a window containing the CFG for the function
-where each basic block is a node in the graph, and each node contains the
-instructions in the block.  Similarly, there also exists 
-<tt>Function::viewCFGOnly()</tt> (does not include the instructions), the
-<tt>MachineFunction::viewCFG()</tt> and <tt>MachineFunction::viewCFGOnly()</tt>,
-and the <tt>SelectionDAG::viewGraph()</tt> methods.  Within GDB, for example,
-you can usually use something like <tt>call DAG.viewGraph()</tt> to pop
-up a window.  Alternatively, you can sprinkle calls to these functions in your
-code in places you want to debug.</p>
-
-<p>Getting this to work requires a small amount of configuration.  On Unix
-systems with X11, install the <a href="http://www.graphviz.org">graphviz</a>
-toolkit, and make sure 'dot' and 'gv' are in your path.  If you are running on
-Mac OS/X, download and install the Mac OS/X <a 
-href="http://www.pixelglow.com/graphviz/">Graphviz program</a>, and add
-<tt>/Applications/Graphviz.app/Contents/MacOS/</tt> (or wherever you install
-it) to your path.  Once in your system and path are set up, rerun the LLVM
-configure script and rebuild LLVM to enable this functionality.</p>
-
-<p><tt>SelectionDAG</tt> has been extended to make it easier to locate
-<i>interesting</i> nodes in large complex graphs.  From gdb, if you
-<tt>call DAG.setGraphColor(<i>node</i>, "<i>color</i>")</tt>, then the
-next <tt>call DAG.viewGraph()</tt> would highlight the node in the
-specified color (choices of colors can be found at <a
-href="http://www.graphviz.org/doc/info/colors.html">colors</a>.) More
-complex node attributes can be provided with <tt>call
-DAG.setGraphAttrs(<i>node</i>, "<i>attributes</i>")</tt> (choices can be
-found at <a href="http://www.graphviz.org/doc/info/attrs.html">Graph
-Attributes</a>.)  If you want to restart and clear all the current graph
-attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
-
-<p>Note that graph visualization features are compiled out of Release builds
-to reduce file size.  This means that you need a Debug+Asserts or 
-Release+Asserts build to use these features.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="datastructure">Picking the Right Data Structure for a Task</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM has a plethora of data structures in the <tt>llvm/ADT/</tt> directory,
- and we commonly use STL data structures.  This section describes the trade-offs
- you should consider when you pick one.</p>
-
-<p>
-The first step is a choose your own adventure: do you want a sequential
-container, a set-like container, or a map-like container?  The most important
-thing when choosing a container is the algorithmic properties of how you plan to
-access the container.  Based on that, you should use:</p>
-
-<ul>
-<li>a <a href="#ds_map">map-like</a> container if you need efficient look-up
-    of an value based on another value.  Map-like containers also support
-    efficient queries for containment (whether a key is in the map).  Map-like
-    containers generally do not support efficient reverse mapping (values to
-    keys).  If you need that, use two maps.  Some map-like containers also
-    support efficient iteration through the keys in sorted order.  Map-like
-    containers are the most expensive sort, only use them if you need one of
-    these capabilities.</li>
-
-<li>a <a href="#ds_set">set-like</a> container if you need to put a bunch of
-    stuff into a container that automatically eliminates duplicates.  Some
-    set-like containers support efficient iteration through the elements in
-    sorted order.  Set-like containers are more expensive than sequential
-    containers.
-</li>
-
-<li>a <a href="#ds_sequential">sequential</a> container provides
-    the most efficient way to add elements and keeps track of the order they are
-    added to the collection.  They permit duplicates and support efficient
-    iteration, but do not support efficient look-up based on a key.
-</li>
-
-<li>a <a href="#ds_string">string</a> container is a specialized sequential
-    container or reference structure that is used for character or byte
-    arrays.</li>
-
-<li>a <a href="#ds_bit">bit</a> container provides an efficient way to store and
-    perform set operations on sets of numeric id's, while automatically
-    eliminating duplicates.  Bit containers require a maximum of 1 bit for each
-    identifier you want to store.
-</li>
-</ul>
-
-<p>
-Once the proper category of container is determined, you can fine tune the
-memory use, constant factors, and cache behaviors of access by intelligently
-picking a member of the category.  Note that constant factors and cache behavior
-can be a big deal.  If you have a vector that usually only contains a few
-elements (but could contain many), for example, it's much better to use
-<a href="#dss_smallvector">SmallVector</a> than <a href="#dss_vector">vector</a>
-.  Doing so avoids (relatively) expensive malloc/free calls, which dwarf the
-cost of adding the elements to the container. </p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
-</h3>
-
-<div>
-There are a variety of sequential containers available for you, based on your
-needs.  Pick the first in this section that will do what you want.
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_arrayref">llvm/ADT/ArrayRef.h</a>
-</h4>
-
-<div>
-<p>The llvm::ArrayRef class is the preferred class to use in an interface that
-   accepts a sequential list of elements in memory and just reads from them.  By
-   taking an ArrayRef, the API can be passed a fixed size array, an std::vector,
-   an llvm::SmallVector and anything else that is contiguous in memory.
-</p>
-</div>
-
-
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_fixedarrays">Fixed Size Arrays</a>
-</h4>
-
-<div>
-<p>Fixed size arrays are very simple and very fast.  They are good if you know
-exactly how many elements you have, or you have a (low) upper bound on how many
-you have.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_heaparrays">Heap Allocated Arrays</a>
-</h4>
-
-<div>
-<p>Heap allocated arrays (new[] + delete[]) are also simple.  They are good if
-the number of elements is variable, if you know how many elements you will need
-before the array is allocated, and if the array is usually large (if not,
-consider a <a href="#dss_smallvector">SmallVector</a>).  The cost of a heap
-allocated array is the cost of the new/delete (aka malloc/free).  Also note that
-if you are allocating an array of a type with a constructor, the constructor and
-destructors will be run for every element in the array (re-sizable vectors only
-construct those elements actually used).</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_tinyptrvector">"llvm/ADT/TinyPtrVector.h"</a>
-</h4>
-
-
-<div>
-<p><tt>TinyPtrVector&lt;Type&gt;</tt> is a highly specialized collection class
-that is optimized to avoid allocation in the case when a vector has zero or one
-elements.  It has two major restrictions: 1) it can only hold values of pointer
-type, and 2) it cannot hold a null pointer.</p>
-  
-<p>Since this container is highly specialized, it is rarely used.</p>
-  
-</div>
-    
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallvector">"llvm/ADT/SmallVector.h"</a>
-</h4>
-
-<div>
-<p><tt>SmallVector&lt;Type, N&gt;</tt> is a simple class that looks and smells
-just like <tt>vector&lt;Type&gt;</tt>:
-it supports efficient iteration, lays out elements in memory order (so you can
-do pointer arithmetic between elements), supports efficient push_back/pop_back
-operations, supports efficient random access to its elements, etc.</p>
-
-<p>The advantage of SmallVector is that it allocates space for
-some number of elements (N) <b>in the object itself</b>.  Because of this, if
-the SmallVector is dynamically smaller than N, no malloc is performed.  This can
-be a big win in cases where the malloc/free call is far more expensive than the
-code that fiddles around with the elements.</p>
-
-<p>This is good for vectors that are "usually small" (e.g. the number of
-predecessors/successors of a block is usually less than 8).  On the other hand,
-this makes the size of the SmallVector itself large, so you don't want to
-allocate lots of them (doing so will waste a lot of space).  As such,
-SmallVectors are most useful when on the stack.</p>
-
-<p>SmallVector also provides a nice portable and efficient replacement for
-<tt>alloca</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_vector">&lt;vector&gt;</a>
-</h4>
-
-<div>
-<p>
-std::vector is well loved and respected.  It is useful when SmallVector isn't:
-when the size of the vector is often large (thus the small optimization will
-rarely be a benefit) or if you will be allocating many instances of the vector
-itself (which would waste space for elements that aren't in the container).
-vector is also useful when interfacing with code that expects vectors :).
-</p>
-
-<p>One worthwhile note about std::vector: avoid code like this:</p>
-
-<div class="doc_code">
-<pre>
-for ( ... ) {
-   std::vector&lt;foo&gt; V;
-   // make use of V.
-}
-</pre>
-</div>
-
-<p>Instead, write this as:</p>
-
-<div class="doc_code">
-<pre>
-std::vector&lt;foo&gt; V;
-for ( ... ) {
-   // make use of V.
-   V.clear();
-}
-</pre>
-</div>
-
-<p>Doing so will save (at least) one heap allocation and free per iteration of
-the loop.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_deque">&lt;deque&gt;</a>
-</h4>
-
-<div>
-<p>std::deque is, in some senses, a generalized version of std::vector.  Like
-std::vector, it provides constant time random access and other similar
-properties, but it also provides efficient access to the front of the list.  It
-does not guarantee continuity of elements within memory.</p>
-
-<p>In exchange for this extra flexibility, std::deque has significantly higher
-constant factor costs than std::vector.  If possible, use std::vector or
-something cheaper.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_list">&lt;list&gt;</a>
-</h4>
-
-<div>
-<p>std::list is an extremely inefficient class that is rarely useful.
-It performs a heap allocation for every element inserted into it, thus having an
-extremely high constant factor, particularly for small data types.  std::list
-also only supports bidirectional iteration, not random access iteration.</p>
-
-<p>In exchange for this high cost, std::list supports efficient access to both
-ends of the list (like std::deque, but unlike std::vector or SmallVector).  In
-addition, the iterator invalidation characteristics of std::list are stronger
-than that of a vector class: inserting or removing an element into the list does
-not invalidate iterator or pointers to other elements in the list.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist">llvm/ADT/ilist.h</a>
-</h4>
-
-<div>
-<p><tt>ilist&lt;T&gt;</tt> implements an 'intrusive' doubly-linked list.  It is
-intrusive, because it requires the element to store and provide access to the
-prev/next pointers for the list.</p>
-
-<p><tt>ilist</tt> has the same drawbacks as <tt>std::list</tt>, and additionally
-requires an <tt>ilist_traits</tt> implementation for the element type, but it
-provides some novel characteristics.  In particular, it can efficiently store
-polymorphic objects, the traits class is informed when an element is inserted or
-removed from the list, and <tt>ilist</tt>s are guaranteed to support a
-constant-time splice operation.</p>
-
-<p>These properties are exactly what we want for things like
-<tt>Instruction</tt>s and basic blocks, which is why these are implemented with
-<tt>ilist</tt>s.</p>
-
-Related classes of interest are explained in the following subsections:
-    <ul>
-      <li><a href="#dss_ilist_traits">ilist_traits</a></li>
-      <li><a href="#dss_iplist">iplist</a></li>
-      <li><a href="#dss_ilist_node">llvm/ADT/ilist_node.h</a></li>
-      <li><a href="#dss_ilist_sentinel">Sentinels</a></li>
-    </ul>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_packedvector">llvm/ADT/PackedVector.h</a>
-</h4>
-
-<div>
-<p>
-Useful for storing a vector of values using only a few number of bits for each
-value. Apart from the standard operations of a vector-like container, it can
-also perform an 'or' set operation. 
-</p>
-
-<p>For example:</p>
-
-<div class="doc_code">
-<pre>
-enum State {
-    None = 0x0,
-    FirstCondition = 0x1,
-    SecondCondition = 0x2,
-    Both = 0x3
-};
-
-State get() {
-    PackedVector&lt;State, 2&gt; Vec1;
-    Vec1.push_back(FirstCondition);
-
-    PackedVector&lt;State, 2&gt; Vec2;
-    Vec2.push_back(SecondCondition);
-
-    Vec1 |= Vec2;
-    return Vec1[0]; // returns 'Both'.
-}
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist_traits">ilist_traits</a>
-</h4>
-
-<div>
-<p><tt>ilist_traits&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s customization
-mechanism. <tt>iplist&lt;T&gt;</tt> (and consequently <tt>ilist&lt;T&gt;</tt>)
-publicly derive from this traits class.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_iplist">iplist</a>
-</h4>
-
-<div>
-<p><tt>iplist&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s base and as such
-supports a slightly narrower interface. Notably, inserters from
-<tt>T&amp;</tt> are absent.</p>
-
-<p><tt>ilist_traits&lt;T&gt;</tt> is a public base of this class and can be
-used for a wide variety of customizations.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist_node">llvm/ADT/ilist_node.h</a>
-</h4>
-
-<div>
-<p><tt>ilist_node&lt;T&gt;</tt> implements a the forward and backward links
-that are expected by the <tt>ilist&lt;T&gt;</tt> (and analogous containers)
-in the default manner.</p>
-
-<p><tt>ilist_node&lt;T&gt;</tt>s are meant to be embedded in the node type
-<tt>T</tt>, usually <tt>T</tt> publicly derives from
-<tt>ilist_node&lt;T&gt;</tt>.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist_sentinel">Sentinels</a>
-</h4>
-
-<div>
-<p><tt>ilist</tt>s have another specialty that must be considered. To be a good
-citizen in the C++ ecosystem, it needs to support the standard container
-operations, such as <tt>begin</tt> and <tt>end</tt> iterators, etc. Also, the
-<tt>operator--</tt> must work correctly on the <tt>end</tt> iterator in the
-case of non-empty <tt>ilist</tt>s.</p>
-
-<p>The only sensible solution to this problem is to allocate a so-called
-<i>sentinel</i> along with the intrusive list, which serves as the <tt>end</tt>
-iterator, providing the back-link to the last element. However conforming to the
-C++ convention it is illegal to <tt>operator++</tt> beyond the sentinel and it
-also must not be dereferenced.</p>
-
-<p>These constraints allow for some implementation freedom to the <tt>ilist</tt>
-how to allocate and store the sentinel. The corresponding policy is dictated
-by <tt>ilist_traits&lt;T&gt;</tt>. By default a <tt>T</tt> gets heap-allocated
-whenever the need for a sentinel arises.</p>
-
-<p>While the default policy is sufficient in most cases, it may break down when
-<tt>T</tt> does not provide a default constructor. Also, in the case of many
-instances of <tt>ilist</tt>s, the memory overhead of the associated sentinels
-is wasted. To alleviate the situation with numerous and voluminous
-<tt>T</tt>-sentinels, sometimes a trick is employed, leading to <i>ghostly
-sentinels</i>.</p>
-
-<p>Ghostly sentinels are obtained by specially-crafted <tt>ilist_traits&lt;T&gt;</tt>
-which superpose the sentinel with the <tt>ilist</tt> instance in memory. Pointer
-arithmetic is used to obtain the sentinel, which is relative to the
-<tt>ilist</tt>'s <tt>this</tt> pointer. The <tt>ilist</tt> is augmented by an
-extra pointer, which serves as the back-link of the sentinel. This is the only
-field in the ghostly sentinel which can be legally accessed.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_other">Other Sequential Container options</a>
-</h4>
-
-<div>
-<p>Other STL containers are available, such as std::string.</p>
-
-<p>There are also various STL adapter classes such as std::queue,
-std::priority_queue, std::stack, etc.  These provide simplified access to an
-underlying container but don't affect the cost of the container itself.</p>
-
-</div>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_string">String-like containers</a>
-</h3>
-
-<div>
-
-<p>
-There are a variety of ways to pass around and use strings in C and C++, and
-LLVM adds a few new options to choose from.  Pick the first option on this list
-that will do what you need, they are ordered according to their relative cost.
-</p>
-<p>
-Note that is is generally preferred to <em>not</em> pass strings around as 
-"<tt>const char*</tt>"'s.  These have a number of problems, including the fact
-that they cannot represent embedded nul ("\0") characters, and do not have a
-length available efficiently.  The general replacement for '<tt>const 
-char*</tt>' is StringRef.
-</p>
-  
-<p>For more information on choosing string containers for APIs, please see
-<a href="#string_apis">Passing strings</a>.</p>
-  
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_stringref">llvm/ADT/StringRef.h</a>
-</h4>
-
-<div>
-<p>
-The StringRef class is a simple value class that contains a pointer to a
-character and a length, and is quite related to the <a 
-href="#dss_arrayref">ArrayRef</a> class (but specialized for arrays of
-characters).  Because StringRef carries a length with it, it safely handles
-strings with embedded nul characters in it, getting the length does not require
-a strlen call, and it even has very convenient APIs for slicing and dicing the
-character range that it represents.
-</p>
-  
-<p>
-StringRef is ideal for passing simple strings around that are known to be live,
-either because they are C string literals, std::string, a C array, or a
-SmallVector.  Each of these cases has an efficient implicit conversion to
-StringRef, which doesn't result in a dynamic strlen being executed.
-</p>
-  
-<p>StringRef has a few major limitations which make more powerful string
-containers useful:</p>
-  
-<ol>
-<li>You cannot directly convert a StringRef to a 'const char*' because there is
-no way to add a trailing nul (unlike the .c_str() method on various stronger
-classes).</li>
-
-  
-<li>StringRef doesn't own or keep alive the underlying string bytes.
-As such it can easily lead to dangling pointers, and is not suitable for
-embedding in datastructures in most cases (instead, use an std::string or
-something like that).</li>
-  
-<li>For the same reason, StringRef cannot be used as the return value of a
-method if the method "computes" the result string.  Instead, use
-std::string.</li>
-    
-<li>StringRef's do not allow you to mutate the pointed-to string bytes and it
-doesn't allow you to insert or remove bytes from the range.  For editing 
-operations like this, it interoperates with the <a 
-href="#dss_twine">Twine</a> class.</li>
-</ol>
-  
-<p>Because of its strengths and limitations, it is very common for a function to
-take a StringRef and for a method on an object to return a StringRef that
-points into some string that it owns.</p>
-  
-</div>
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_twine">llvm/ADT/Twine.h</a>
-</h4>
-
-<div>
-  <p>
-  The Twine class is used as an intermediary datatype for APIs that want to take
-  a string that can be constructed inline with a series of concatenations.
-  Twine works by forming recursive instances of the Twine datatype (a simple
-  value object) on the stack as temporary objects, linking them together into a
-  tree which is then linearized when the Twine is consumed.  Twine is only safe
-  to use as the argument to a function, and should always be a const reference,
-  e.g.:
-  </p>
-  
-  <pre>
-    void foo(const Twine &amp;T);
-    ...
-    StringRef X = ...
-    unsigned i = ...
-    foo(X + "." + Twine(i));
-  </pre>
-  
-  <p>This example forms a string like "blarg.42" by concatenating the values
-  together, and does not form intermediate strings containing "blarg" or
-  "blarg.".
-  </p>
-  
-  <p>Because Twine is constructed with temporary objects on the stack, and
-  because these instances are destroyed at the end of the current statement,
-  it is an inherently dangerous API.  For example, this simple variant contains
-  undefined behavior and will probably crash:</p>
-  
-  <pre>
-    void foo(const Twine &amp;T);
-    ...
-    StringRef X = ...
-    unsigned i = ...
-    const Twine &amp;Tmp = X + "." + Twine(i);
-    foo(Tmp);
-  </pre>
-
-  <p>... because the temporaries are destroyed before the call.  That said,
-  Twine's are much more efficient than intermediate std::string temporaries, and
-  they work really well with StringRef.  Just be aware of their limitations.</p>
-  
-</div>
-
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallstring">llvm/ADT/SmallString.h</a>
-</h4>
-
-<div>
-  
-<p>SmallString is a subclass of <a href="#dss_smallvector">SmallVector</a> that
-adds some convenience APIs like += that takes StringRef's.  SmallString avoids
-allocating memory in the case when the preallocated space is enough to hold its
-data, and it calls back to general heap allocation when required.  Since it owns
-its data, it is very safe to use and supports full mutation of the string.</p>
-  
-<p>Like SmallVector's, the big downside to SmallString is their sizeof.  While
-they are optimized for small strings, they themselves are not particularly
-small.  This means that they work great for temporary scratch buffers on the
-stack, but should not generally be put into the heap: it is very rare to 
-see a SmallString as the member of a frequently-allocated heap data structure
-or returned by-value.
-</p>
-
-</div>
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_stdstring">std::string</a>
-</h4>
-
-<div>
-  
-  <p>The standard C++ std::string class is a very general class that (like
-  SmallString) owns its underlying data.  sizeof(std::string) is very reasonable
-  so it can be embedded into heap data structures and returned by-value.
-  On the other hand, std::string is highly inefficient for inline editing (e.g.
-  concatenating a bunch of stuff together) and because it is provided by the
-  standard library, its performance characteristics depend a lot of the host
-  standard library (e.g. libc++ and MSVC provide a highly optimized string
-  class, GCC contains a really slow implementation).
-  </p>
-
-  <p>The major disadvantage of std::string is that almost every operation that
-  makes them larger can allocate memory, which is slow.  As such, it is better
-  to use SmallVector or Twine as a scratch buffer, but then use std::string to
-  persist the result.</p>
-
-  
-</div>
-  
-<!-- end of strings -->
-</div>
-
-  
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
-</h3>
-
-<div>
-
-<p>Set-like containers are useful when you need to canonicalize multiple values
-into a single representation.  There are several different choices for how to do
-this, providing various trade-offs.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sortedvectorset">A sorted 'vector'</a>
-</h4>
-
-<div>
-
-<p>If you intend to insert a lot of elements, then do a lot of queries, a
-great approach is to use a vector (or other sequential container) with
-std::sort+std::unique to remove duplicates.  This approach works really well if
-your usage pattern has these two distinct phases (insert then query), and can be
-coupled with a good choice of <a href="#ds_sequential">sequential container</a>.
-</p>
-
-<p>
-This combination provides the several nice properties: the result data is
-contiguous in memory (good for cache locality), has few allocations, is easy to
-address (iterators in the final vector are just indices or pointers), and can be
-efficiently queried with a standard binary or radix search.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallset">"llvm/ADT/SmallSet.h"</a>
-</h4>
-
-<div>
-
-<p>If you have a set-like data structure that is usually small and whose elements
-are reasonably small, a <tt>SmallSet&lt;Type, N&gt;</tt> is a good choice.  This set
-has space for N elements in place (thus, if the set is dynamically smaller than
-N, no malloc traffic is required) and accesses them with a simple linear search.
-When the set grows beyond 'N' elements, it allocates a more expensive representation that
-guarantees efficient access (for most types, it falls back to std::set, but for
-pointers it uses something far better, <a
-href="#dss_smallptrset">SmallPtrSet</a>).</p>
-
-<p>The magic of this class is that it handles small sets extremely efficiently,
-but gracefully handles extremely large sets without loss of efficiency.  The
-drawback is that the interface is quite small: it supports insertion, queries
-and erasing, but does not support iteration.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a>
-</h4>
-
-<div>
-
-<p>SmallPtrSet has all the advantages of <tt>SmallSet</tt> (and a <tt>SmallSet</tt> of pointers is 
-transparently implemented with a <tt>SmallPtrSet</tt>), but also supports iterators.  If
-more than 'N' insertions are performed, a single quadratically
-probed hash table is allocated and grows as needed, providing extremely
-efficient access (constant time insertion/deleting/queries with low constant
-factors) and is very stingy with malloc traffic.</p>
-
-<p>Note that, unlike <tt>std::set</tt>, the iterators of <tt>SmallPtrSet</tt> are invalidated
-whenever an insertion occurs.  Also, the values visited by the iterators are not
-visited in sorted order.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_denseset">"llvm/ADT/DenseSet.h"</a>
-</h4>
-
-<div>
-
-<p>
-DenseSet is a simple quadratically probed hash table.  It excels at supporting
-small values: it uses a single allocation to hold all of the pairs that
-are currently inserted in the set.  DenseSet is a great way to unique small
-values that are not simple pointers (use <a 
-href="#dss_smallptrset">SmallPtrSet</a> for pointers).  Note that DenseSet has
-the same requirements for the value type that <a 
-href="#dss_densemap">DenseMap</a> has.
-</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sparseset">"llvm/ADT/SparseSet.h"</a>
-</h4>
-
-<div>
-
-<p>SparseSet holds a small number of objects identified by unsigned keys of
-moderate size. It uses a lot of memory, but provides operations that are
-almost as fast as a vector. Typical keys are physical registers, virtual
-registers, or numbered basic blocks.</p>
-
-<p>SparseSet is useful for algorithms that need very fast clear/find/insert/erase
-and fast iteration over small sets.  It is not intended for building composite
-data structures.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
-</h4>
-
-<div>
-
-<p>
-FoldingSet is an aggregate class that is really good at uniquing
-expensive-to-create or polymorphic objects.  It is a combination of a chained
-hash table with intrusive links (uniqued objects are required to inherit from
-FoldingSetNode) that uses <a href="#dss_smallvector">SmallVector</a> as part of
-its ID process.</p>
-
-<p>Consider a case where you want to implement a "getOrCreateFoo" method for
-a complex object (for example, a node in the code generator).  The client has a
-description of *what* it wants to generate (it knows the opcode and all the
-operands), but we don't want to 'new' a node, then try inserting it into a set
-only to find out it already exists, at which point we would have to delete it
-and return the node that already exists.
-</p>
-
-<p>To support this style of client, FoldingSet perform a query with a
-FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
-element that we want to query for.  The query either returns the element
-matching the ID or it returns an opaque ID that indicates where insertion should
-take place.  Construction of the ID usually does not require heap traffic.</p>
-
-<p>Because FoldingSet uses intrusive links, it can support polymorphic objects
-in the set (for example, you can have SDNode instances mixed with LoadSDNodes).
-Because the elements are individually allocated, pointers to the elements are
-stable: inserting or removing elements does not invalidate any pointers to other
-elements.
-</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_set">&lt;set&gt;</a>
-</h4>
-
-<div>
-
-<p><tt>std::set</tt> is a reasonable all-around set class, which is decent at
-many things but great at nothing.  std::set allocates memory for each element
-inserted (thus it is very malloc intensive) and typically stores three pointers
-per element in the set (thus adding a large amount of per-element space
-overhead).  It offers guaranteed log(n) performance, which is not particularly
-fast from a complexity standpoint (particularly if the elements of the set are
-expensive to compare, like strings), and has extremely high constant factors for
-lookup, insertion and removal.</p>
-
-<p>The advantages of std::set are that its iterators are stable (deleting or
-inserting an element from the set does not affect iterators or pointers to other
-elements) and that iteration over the set is guaranteed to be in sorted order.
-If the elements in the set are large, then the relative overhead of the pointers
-and malloc traffic is not a big deal, but if the elements of the set are small,
-std::set is almost never a good choice.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_setvector">"llvm/ADT/SetVector.h"</a>
-</h4>
-
-<div>
-<p>LLVM's SetVector&lt;Type&gt; is an adapter class that combines your choice of
-a set-like container along with a <a href="#ds_sequential">Sequential 
-Container</a>.  The important property
-that this provides is efficient insertion with uniquing (duplicate elements are
-ignored) with iteration support.  It implements this by inserting elements into
-both a set-like container and the sequential container, using the set-like
-container for uniquing and the sequential container for iteration.
-</p>
-
-<p>The difference between SetVector and other sets is that the order of
-iteration is guaranteed to match the order of insertion into the SetVector.
-This property is really important for things like sets of pointers.  Because
-pointer values are non-deterministic (e.g. vary across runs of the program on
-different machines), iterating over the pointers in the set will
-not be in a well-defined order.</p>
-
-<p>
-The drawback of SetVector is that it requires twice as much space as a normal
-set and has the sum of constant factors from the set-like container and the 
-sequential container that it uses.  Use it *only* if you need to iterate over 
-the elements in a deterministic order.  SetVector is also expensive to delete
-elements out of (linear time), unless you use it's "pop_back" method, which is
-faster.
-</p>
-
-<p><tt>SetVector</tt> is an adapter class that defaults to
-   using <tt>std::vector</tt> and a size 16 <tt>SmallSet</tt> for the underlying
-   containers, so it is quite expensive. However,
-   <tt>"llvm/ADT/SetVector.h"</tt> also provides a <tt>SmallSetVector</tt>
-   class, which defaults to using a <tt>SmallVector</tt> and <tt>SmallSet</tt>
-   of a specified size. If you use this, and if your sets are dynamically
-   smaller than <tt>N</tt>, you will save a lot of heap traffic.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_uniquevector">"llvm/ADT/UniqueVector.h"</a>
-</h4>
-
-<div>
-
-<p>
-UniqueVector is similar to <a href="#dss_setvector">SetVector</a>, but it
-retains a unique ID for each element inserted into the set.  It internally
-contains a map and a vector, and it assigns a unique ID for each value inserted
-into the set.</p>
-
-<p>UniqueVector is very expensive: its cost is the sum of the cost of
-maintaining both the map and vector, it has high complexity, high constant
-factors, and produces a lot of malloc traffic.  It should be avoided.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_immutableset">"llvm/ADT/ImmutableSet.h"</a>
-</h4>
-
-<div>
-
-<p>
-ImmutableSet is an immutable (functional) set implementation based on an AVL
-tree.
-Adding or removing elements is done through a Factory object and results in the
-creation of a new ImmutableSet object.
-If an ImmutableSet already exists with the given contents, then the existing one
-is returned; equality is compared with a FoldingSetNodeID.
-The time and space complexity of add or remove operations is logarithmic in the
-size of the original set.
-
-<p>
-There is no method for returning an element of the set, you can only check for
-membership.
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_otherset">Other Set-Like Container Options</a>
-</h4>
-
-<div>
-
-<p>
-The STL provides several other options, such as std::multiset and the various 
-"hash_set" like containers (whether from C++ TR1 or from the SGI library). We
-never use hash_set and unordered_set because they are generally very expensive 
-(each insertion requires a malloc) and very non-portable.
-</p>
-
-<p>std::multiset is useful if you're not interested in elimination of
-duplicates, but has all the drawbacks of std::set.  A sorted vector (where you 
-don't delete duplicate entries) or some other approach is almost always
-better.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
-</h3>
-
-<div>
-Map-like containers are useful when you want to associate data to a key.  As
-usual, there are a lot of different ways to do this. :)
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sortedvectormap">A sorted 'vector'</a>
-</h4>
-
-<div>
-
-<p>
-If your usage pattern follows a strict insert-then-query approach, you can
-trivially use the same approach as <a href="#dss_sortedvectorset">sorted vectors
-for set-like containers</a>.  The only difference is that your query function
-(which uses std::lower_bound to get efficient log(n) lookup) should only compare
-the key, not both the key and value.  This yields the same advantages as sorted
-vectors for sets.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_stringmap">"llvm/ADT/StringMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-Strings are commonly used as keys in maps, and they are difficult to support
-efficiently: they are variable length, inefficient to hash and compare when
-long, expensive to copy, etc.  StringMap is a specialized container designed to
-cope with these issues.  It supports mapping an arbitrary range of bytes to an
-arbitrary other object.</p>
-
-<p>The StringMap implementation uses a quadratically-probed hash table, where
-the buckets store a pointer to the heap allocated entries (and some other
-stuff).  The entries in the map must be heap allocated because the strings are
-variable length.  The string data (key) and the element object (value) are
-stored in the same allocation with the string data immediately after the element
-object.  This container guarantees the "<tt>(char*)(&amp;Value+1)</tt>" points
-to the key string for a value.</p>
-
-<p>The StringMap is very fast for several reasons: quadratic probing is very
-cache efficient for lookups, the hash value of strings in buckets is not
-recomputed when looking up an element, StringMap rarely has to touch the
-memory for unrelated objects when looking up a value (even when hash collisions
-happen), hash table growth does not recompute the hash values for strings
-already in the table, and each pair in the map is store in a single allocation
-(the string data is stored in the same allocation as the Value of a pair).</p>
-
-<p>StringMap also provides query methods that take byte ranges, so it only ever
-copies a string if a value is inserted into the table.</p>
-
-<p>StringMap iteratation order, however, is not guaranteed to be deterministic,
-so any uses which require that should instead use a std::map.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_indexedmap">"llvm/ADT/IndexedMap.h"</a>
-</h4>
-
-<div>
-<p>
-IndexedMap is a specialized container for mapping small dense integers (or
-values that can be mapped to small dense integers) to some other type.  It is
-internally implemented as a vector with a mapping function that maps the keys to
-the dense integer range.
-</p>
-
-<p>
-This is useful for cases like virtual registers in the LLVM code generator: they
-have a dense mapping that is offset by a compile-time constant (the first
-virtual register ID).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_densemap">"llvm/ADT/DenseMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-DenseMap is a simple quadratically probed hash table.  It excels at supporting
-small keys and values: it uses a single allocation to hold all of the pairs that
-are currently inserted in the map.  DenseMap is a great way to map pointers to
-pointers, or map other small types to each other.
-</p>
-
-<p>
-There are several aspects of DenseMap that you should be aware of, however.  The
-iterators in a DenseMap are invalidated whenever an insertion occurs, unlike
-map.  Also, because DenseMap allocates space for a large number of key/value
-pairs (it starts with 64 by default), it will waste a lot of space if your keys
-or values are large.  Finally, you must implement a partial specialization of
-DenseMapInfo for the key that you want, if it isn't already supported.  This
-is required to tell DenseMap about two special marker values (which can never be
-inserted into the map) that it needs internally.</p>
-
-<p>
-DenseMap's find_as() method supports lookup operations using an alternate key
-type. This is useful in cases where the normal key type is expensive to
-construct, but cheap to compare against. The DenseMapInfo is responsible for
-defining the appropriate comparison and hashing methods for each alternate
-key type used.
-</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_valuemap">"llvm/ADT/ValueMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-ValueMap is a wrapper around a <a href="#dss_densemap">DenseMap</a> mapping
-Value*s (or subclasses) to another type.  When a Value is deleted or RAUW'ed,
-ValueMap will update itself so the new version of the key is mapped to the same
-value, just as if the key were a WeakVH.  You can configure exactly how this
-happens, and what else happens on these two events, by passing
-a <code>Config</code> parameter to the ValueMap template.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_intervalmap">"llvm/ADT/IntervalMap.h"</a>
-</h4>
-
-<div>
-
-<p> IntervalMap is a compact map for small keys and values. It maps key
-intervals instead of single keys, and it will automatically coalesce adjacent
-intervals. When then map only contains a few intervals, they are stored in the
-map object itself to avoid allocations.</p>
-
-<p> The IntervalMap iterators are quite big, so they should not be passed around
-as STL iterators. The heavyweight iterators allow a smaller data structure.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_map">&lt;map&gt;</a>
-</h4>
-
-<div>
-
-<p>
-std::map has similar characteristics to <a href="#dss_set">std::set</a>: it uses
-a single allocation per pair inserted into the map, it offers log(n) lookup with
-an extremely large constant factor, imposes a space penalty of 3 pointers per
-pair in the map, etc.</p>
-
-<p>std::map is most useful when your keys or values are very large, if you need
-to iterate over the collection in sorted order, or if you need stable iterators
-into the map (i.e. they don't get invalidated if an insertion or deletion of
-another element takes place).</p>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_mapvector">"llvm/ADT/MapVector.h"</a>
-</h4>
-<div>
-
-<p> MapVector&lt;KeyT,ValueT&gt provides a subset of the DenseMap interface.
-  The main difference is that the iteration order is guaranteed to be
-  the insertion order, making it an easy (but somewhat expensive) solution
-  for non-deterministic iteration over maps of pointers. </p>
-
-<p> It is implemented by mapping from key to an index in a vector of key,value
-  pairs. This provides fast lookup and iteration, but has two main drawbacks:
-  The key is stored twice and it doesn't support removing elements. </p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a>
-</h4>
-
-<div>
-
-<p>IntEqClasses provides a compact representation of equivalence classes of
-small integers. Initially, each integer in the range 0..n-1 has its own
-equivalence class. Classes can be joined by passing two class representatives to
-the join(a, b) method. Two integers are in the same class when findLeader()
-returns the same representative.</p>
-
-<p>Once all equivalence classes are formed, the map can be compressed so each
-integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m
-is the total number of equivalence classes. The map must be uncompressed before
-it can be edited again.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_immutablemap">"llvm/ADT/ImmutableMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-ImmutableMap is an immutable (functional) map implementation based on an AVL
-tree.
-Adding or removing elements is done through a Factory object and results in the
-creation of a new ImmutableMap object.
-If an ImmutableMap already exists with the given key set, then the existing one
-is returned; equality is compared with a FoldingSetNodeID.
-The time and space complexity of add or remove operations is logarithmic in the
-size of the original map.
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_othermap">Other Map-Like Container Options</a>
-</h4>
-
-<div>
-
-<p>
-The STL provides several other options, such as std::multimap and the various 
-"hash_map" like containers (whether from C++ TR1 or from the SGI library). We
-never use hash_set and unordered_set because they are generally very expensive 
-(each insertion requires a malloc) and very non-portable.</p>
-
-<p>std::multimap is useful if you want to map a key to multiple values, but has
-all the drawbacks of std::map.  A sorted vector or some other approach is almost
-always better.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_bit">Bit storage containers (BitVector, SparseBitVector)</a>
-</h3>
-
-<div>
-<p>Unlike the other containers, there are only two bit storage containers, and 
-choosing when to use each is relatively straightforward.</p>
-
-<p>One additional option is 
-<tt>std::vector&lt;bool&gt;</tt>: we discourage its use for two reasons 1) the
-implementation in many common compilers (e.g. commonly available versions of 
-GCC) is extremely inefficient and 2) the C++ standards committee is likely to
-deprecate this container and/or change it significantly somehow.  In any case,
-please don't use it.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_bitvector">BitVector</a>
-</h4>
-
-<div>
-<p> The BitVector container provides a dynamic size set of bits for manipulation.
-It supports individual bit setting/testing, as well as set operations.  The set
-operations take time O(size of bitvector), but operations are performed one word
-at a time, instead of one bit at a time.  This makes the BitVector very fast for
-set operations compared to other containers.  Use the BitVector when you expect
-the number of set bits to be high (IE a dense set).
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallbitvector">SmallBitVector</a>
-</h4>
-
-<div>
-<p> The SmallBitVector container provides the same interface as BitVector, but
-it is optimized for the case where only a small number of bits, less than
-25 or so, are needed. It also transparently supports larger bit counts, but
-slightly less efficiently than a plain BitVector, so SmallBitVector should
-only be used when larger counts are rare.
-</p>
-
-<p>
-At this time, SmallBitVector does not support set operations (and, or, xor),
-and its operator[] does not provide an assignable lvalue.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sparsebitvector">SparseBitVector</a>
-</h4>
-
-<div>
-<p> The SparseBitVector container is much like BitVector, with one major
-difference: Only the bits that are set, are stored.  This makes the
-SparseBitVector much more space efficient than BitVector when the set is sparse,
-as well as making set operations O(number of set bits) instead of O(size of
-universe).  The downside to the SparseBitVector is that setting and testing of random bits is O(N), and on large SparseBitVectors, this can be slower than BitVector. In our implementation, setting or testing bits in sorted order
-(either forwards or reverse) is O(1) worst case.  Testing and setting bits within 128 bits (depends on size) of the current bit is also O(1).  As a general statement, testing/setting bits in a SparseBitVector is O(distance away from last set bit).
-</p>
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="common">Helpful Hints for Common Operations</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section describes how to perform some very simple transformations of
-LLVM code.  This is meant to give examples of common idioms used, showing the
-practical side of LLVM transformations.  <p> Because this is a "how-to" section,
-you should also read about the main classes that you will be working with.  The
-<a href="#coreclasses">Core LLVM Class Hierarchy Reference</a> contains details
-and descriptions of the main classes that you should know about.</p>
-
-<!-- NOTE: this section should be heavy on example code -->
-<!-- ======================================================================= -->
-<h3>
-  <a name="inspection">Basic Inspection and Traversal Routines</a>
-</h3>
-
-<div>
-
-<p>The LLVM compiler infrastructure have many different data structures that may
-be traversed.  Following the example of the C++ standard template library, the
-techniques used to traverse these various data structures are all basically the
-same.  For a enumerable sequence of values, the <tt>XXXbegin()</tt> function (or
-method) returns an iterator to the start of the sequence, the <tt>XXXend()</tt>
-function returns an iterator pointing to one past the last valid element of the
-sequence, and there is some <tt>XXXiterator</tt> data type that is common
-between the two operations.</p>
-
-<p>Because the pattern for iteration is common across many different aspects of
-the program representation, the standard template library algorithms may be used
-on them, and it is easier to remember how to iterate. First we show a few common
-examples of the data structures that need to be traversed.  Other data
-structures are traversed in very similar ways.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_function">Iterating over the </a><a
-  href="#BasicBlock"><tt>BasicBlock</tt></a>s in a <a
-  href="#Function"><tt>Function</tt></a>
-</h4>
-
-<div>
-
-<p>It's quite common to have a <tt>Function</tt> instance that you'd like to
-transform in some way; in particular, you'd like to manipulate its
-<tt>BasicBlock</tt>s.  To facilitate this, you'll need to iterate over all of
-the <tt>BasicBlock</tt>s that constitute the <tt>Function</tt>. The following is
-an example that prints the name of a <tt>BasicBlock</tt> and the number of
-<tt>Instruction</tt>s it contains:</p>
-
-<div class="doc_code">
-<pre>
-// <i>func is a pointer to a Function instance</i>
-for (Function::iterator i = func-&gt;begin(), e = func-&gt;end(); i != e; ++i)
-  // <i>Print out the name of the basic block if it has one, and then the</i>
-  // <i>number of instructions that it contains</i>
-  errs() &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
-             &lt;&lt; i-&gt;size() &lt;&lt; " instructions.\n";
-</pre>
-</div>
-
-<p>Note that i can be used as if it were a pointer for the purposes of
-invoking member functions of the <tt>Instruction</tt> class.  This is
-because the indirection operator is overloaded for the iterator
-classes.  In the above code, the expression <tt>i-&gt;size()</tt> is
-exactly equivalent to <tt>(*i).size()</tt> just like you'd expect.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_basicblock">Iterating over the </a><a
-  href="#Instruction"><tt>Instruction</tt></a>s in a <a
-  href="#BasicBlock"><tt>BasicBlock</tt></a>
-</h4>
-
-<div>
-
-<p>Just like when dealing with <tt>BasicBlock</tt>s in <tt>Function</tt>s, it's
-easy to iterate over the individual instructions that make up
-<tt>BasicBlock</tt>s. Here's a code snippet that prints out each instruction in
-a <tt>BasicBlock</tt>:</p>
-
-<div class="doc_code">
-<pre>
-// <i>blk is a pointer to a BasicBlock instance</i>
-for (BasicBlock::iterator i = blk-&gt;begin(), e = blk-&gt;end(); i != e; ++i)
-   // <i>The next statement works since operator&lt;&lt;(ostream&amp;,...)</i>
-   // <i>is overloaded for Instruction&amp;</i>
-   errs() &lt;&lt; *i &lt;&lt; "\n";
-</pre>
-</div>
-
-<p>However, this isn't really the best way to print out the contents of a
-<tt>BasicBlock</tt>!  Since the ostream operators are overloaded for virtually
-anything you'll care about, you could have just invoked the print routine on the
-basic block itself: <tt>errs() &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_institer">Iterating over the </a><a
-  href="#Instruction"><tt>Instruction</tt></a>s in a <a
-  href="#Function"><tt>Function</tt></a>
-</h4>
-
-<div>
-
-<p>If you're finding that you commonly iterate over a <tt>Function</tt>'s
-<tt>BasicBlock</tt>s and then that <tt>BasicBlock</tt>'s <tt>Instruction</tt>s,
-<tt>InstIterator</tt> should be used instead. You'll need to include <a
-href="/doxygen/InstIterator_8h-source.html"><tt>llvm/Support/InstIterator.h</tt></a>,
-and then instantiate <tt>InstIterator</tt>s explicitly in your code.  Here's a
-small example that shows how to dump all instructions in a function to the standard error stream:<p>
-
-<div class="doc_code">
-<pre>
-#include "<a href="/doxygen/InstIterator_8h-source.html">llvm/Support/InstIterator.h</a>"
-
-// <i>F is a pointer to a Function instance</i>
-for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-  errs() &lt;&lt; *I &lt;&lt; "\n";
-</pre>
-</div>
-
-<p>Easy, isn't it?  You can also use <tt>InstIterator</tt>s to fill a
-work list with its initial contents.  For example, if you wanted to
-initialize a work list to contain all instructions in a <tt>Function</tt>
-F, all you would need to do is something like:</p>
-
-<div class="doc_code">
-<pre>
-std::set&lt;Instruction*&gt; worklist;
-// or better yet, SmallPtrSet&lt;Instruction*, 64&gt; worklist;
-
-for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-   worklist.insert(&amp;*I);
-</pre>
-</div>
-
-<p>The STL set <tt>worklist</tt> would now contain all instructions in the
-<tt>Function</tt> pointed to by F.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_convert">Turning an iterator into a class pointer (and
-  vice-versa)</a>
-</h4>
-
-<div>
-
-<p>Sometimes, it'll be useful to grab a reference (or pointer) to a class
-instance when all you've got at hand is an iterator.  Well, extracting
-a reference or a pointer from an iterator is very straight-forward.
-Assuming that <tt>i</tt> is a <tt>BasicBlock::iterator</tt> and <tt>j</tt>
-is a <tt>BasicBlock::const_iterator</tt>:</p>
-
-<div class="doc_code">
-<pre>
-Instruction&amp; inst = *i;   // <i>Grab reference to instruction reference</i>
-Instruction* pinst = &amp;*i; // <i>Grab pointer to instruction reference</i>
-const Instruction&amp; inst = *j;
-</pre>
-</div>
-
-<p>However, the iterators you'll be working with in the LLVM framework are
-special: they will automatically convert to a ptr-to-instance type whenever they
-need to.  Instead of dereferencing the iterator and then taking the address of
-the result, you can simply assign the iterator to the proper pointer type and
-you get the dereference and address-of operation as a result of the assignment
-(behind the scenes, this is a result of overloading casting mechanisms).  Thus
-the last line of the last example,</p>
-
-<div class="doc_code">
-<pre>
-Instruction *pinst = &amp;*i;
-</pre>
-</div>
-
-<p>is semantically equivalent to</p>
-
-<div class="doc_code">
-<pre>
-Instruction *pinst = i;
-</pre>
-</div>
-
-<p>It's also possible to turn a class pointer into the corresponding iterator,
-and this is a constant time operation (very efficient).  The following code
-snippet illustrates use of the conversion constructors provided by LLVM
-iterators.  By using these, you can explicitly grab the iterator of something
-without actually obtaining it via iteration over some structure:</p>
-
-<div class="doc_code">
-<pre>
-void printNextInstruction(Instruction* inst) {
-  BasicBlock::iterator it(inst);
-  ++it; // <i>After this line, it refers to the instruction after *inst</i>
-  if (it != inst-&gt;getParent()-&gt;end()) errs() &lt;&lt; *it &lt;&lt; "\n";
-}
-</pre>
-</div>
-
-<p>Unfortunately, these implicit conversions come at a cost; they prevent
-these iterators from conforming to standard iterator conventions, and thus
-from being usable with standard algorithms and containers. For example, they
-prevent the following code, where <tt>B</tt> is a <tt>BasicBlock</tt>,
-from compiling:</p>
-
-<div class="doc_code">
-<pre>
-  llvm::SmallVector&lt;llvm::Instruction *, 16&gt;(B-&gt;begin(), B-&gt;end());
-</pre>
-</div>
-
-<p>Because of this, these implicit conversions may be removed some day,
-and <tt>operator*</tt> changed to return a pointer instead of a reference.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="iterate_complex">Finding call sites: a slightly more complex
-  example</a>
-</h4>
-
-<div>
-
-<p>Say that you're writing a FunctionPass and would like to count all the
-locations in the entire module (that is, across every <tt>Function</tt>) where a
-certain function (i.e., some <tt>Function</tt>*) is already in scope.  As you'll
-learn later, you may want to use an <tt>InstVisitor</tt> to accomplish this in a
-much more straight-forward manner, but this example will allow us to explore how
-you'd do it if you didn't have <tt>InstVisitor</tt> around. In pseudo-code, this
-is what we want to do:</p>
-
-<div class="doc_code">
-<pre>
-initialize callCounter to zero
-for each Function f in the Module
-  for each BasicBlock b in f
-    for each Instruction i in b
-      if (i is a CallInst and calls the given function)
-        increment callCounter
-</pre>
-</div>
-
-<p>And the actual code is (remember, because we're writing a
-<tt>FunctionPass</tt>, our <tt>FunctionPass</tt>-derived class simply has to
-override the <tt>runOnFunction</tt> method):</p>
-
-<div class="doc_code">
-<pre>
-Function* targetFunc = ...;
-
-class OurFunctionPass : public FunctionPass {
-  public:
-    OurFunctionPass(): callCounter(0) { }
-
-    virtual runOnFunction(Function&amp; F) {
-      for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
-        for (BasicBlock::iterator i = b-&gt;begin(), ie = b-&gt;end(); i != ie; ++i) {
-          if (<a href="#CallInst">CallInst</a>* callInst = <a href="#isa">dyn_cast</a>&lt;<a
- href="#CallInst">CallInst</a>&gt;(&amp;*i)) {
-            // <i>We know we've encountered a call instruction, so we</i>
-            // <i>need to determine if it's a call to the</i>
-            // <i>function pointed to by m_func or not.</i>
-            if (callInst-&gt;getCalledFunction() == targetFunc)
-              ++callCounter;
-          }
-        }
-      }
-    }
-
-  private:
-    unsigned callCounter;
-};
-</pre>
-</div>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="calls_and_invokes">Treating calls and invokes the same way</a>
-</h4>
-
-<div>
-
-<p>You may have noticed that the previous example was a bit oversimplified in
-that it did not deal with call sites generated by 'invoke' instructions. In
-this, and in other situations, you may find that you want to treat
-<tt>CallInst</tt>s and <tt>InvokeInst</tt>s the same way, even though their
-most-specific common base class is <tt>Instruction</tt>, which includes lots of
-less closely-related things. For these cases, LLVM provides a handy wrapper
-class called <a
-href="http://llvm.org/doxygen/classllvm_1_1CallSite.html"><tt>CallSite</tt></a>.
-It is essentially a wrapper around an <tt>Instruction</tt> pointer, with some
-methods that provide functionality common to <tt>CallInst</tt>s and
-<tt>InvokeInst</tt>s.</p>
-
-<p>This class has "value semantics": it should be passed by value, not by
-reference and it should not be dynamically allocated or deallocated using
-<tt>operator new</tt> or <tt>operator delete</tt>. It is efficiently copyable,
-assignable and constructable, with costs equivalents to that of a bare pointer.
-If you look at its definition, it has only a single pointer member.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="iterate_chains">Iterating over def-use &amp; use-def chains</a>
-</h4>
-
-<div>
-
-<p>Frequently, we might have an instance of the <a
-href="/doxygen/classllvm_1_1Value.html">Value Class</a> and we want to
-determine which <tt>User</tt>s use the <tt>Value</tt>.  The list of all
-<tt>User</tt>s of a particular <tt>Value</tt> is called a <i>def-use</i> chain.
-For example, let's say we have a <tt>Function*</tt> named <tt>F</tt> to a
-particular function <tt>foo</tt>. Finding all of the instructions that
-<i>use</i> <tt>foo</tt> is as simple as iterating over the <i>def-use</i> chain
-of <tt>F</tt>:</p>
-
-<div class="doc_code">
-<pre>
-Function *F = ...;
-
-for (Value::use_iterator i = F-&gt;use_begin(), e = F-&gt;use_end(); i != e; ++i)
-  if (Instruction *Inst = dyn_cast&lt;Instruction&gt;(*i)) {
-    errs() &lt;&lt; "F is used in instruction:\n";
-    errs() &lt;&lt; *Inst &lt;&lt; "\n";
-  }
-</pre>
-</div>
-
-<p>Note that dereferencing a <tt>Value::use_iterator</tt> is not a very cheap
-operation. Instead of performing <tt>*i</tt> above several times, consider
-doing it only once in the loop body and reusing its result.</p>
-
-<p>Alternatively, it's common to have an instance of the <a
-href="/doxygen/classllvm_1_1User.html">User Class</a> and need to know what
-<tt>Value</tt>s are used by it.  The list of all <tt>Value</tt>s used by a
-<tt>User</tt> is known as a <i>use-def</i> chain.  Instances of class
-<tt>Instruction</tt> are common <tt>User</tt>s, so we might want to iterate over
-all of the values that a particular instruction uses (that is, the operands of
-the particular <tt>Instruction</tt>):</p>
-
-<div class="doc_code">
-<pre>
-Instruction *pi = ...;
-
-for (User::op_iterator i = pi-&gt;op_begin(), e = pi-&gt;op_end(); i != e; ++i) {
-  Value *v = *i;
-  // <i>...</i>
-}
-</pre>
-</div>
-
-<p>Declaring objects as <tt>const</tt> is an important tool of enforcing
-mutation free algorithms (such as analyses, etc.). For this purpose above
-iterators come in constant flavors as <tt>Value::const_use_iterator</tt>
-and <tt>Value::const_op_iterator</tt>.  They automatically arise when
-calling <tt>use/op_begin()</tt> on <tt>const Value*</tt>s or
-<tt>const User*</tt>s respectively.  Upon dereferencing, they return
-<tt>const Use*</tt>s. Otherwise the above patterns remain unchanged.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="iterate_preds">Iterating over predecessors &amp;
-successors of blocks</a>
-</h4>
-
-<div>
-
-<p>Iterating over the predecessors and successors of a block is quite easy
-with the routines defined in <tt>"llvm/Support/CFG.h"</tt>.  Just use code like
-this to iterate over all predecessors of BB:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/Support/CFG.h"
-BasicBlock *BB = ...;
-
-for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-  BasicBlock *Pred = *PI;
-  // <i>...</i>
-}
-</pre>
-</div>
-
-<p>Similarly, to iterate over successors use
-succ_iterator/succ_begin/succ_end.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="simplechanges">Making simple changes</a>
-</h3>
-
-<div>
-
-<p>There are some primitive transformation operations present in the LLVM
-infrastructure that are worth knowing about.  When performing
-transformations, it's fairly common to manipulate the contents of basic
-blocks. This section describes some of the common methods for doing so
-and gives example code.</p>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_creating">Creating and inserting new
-  <tt>Instruction</tt>s</a>
-</h4>
-
-<div>
-
-<p><i>Instantiating Instructions</i></p>
-
-<p>Creation of <tt>Instruction</tt>s is straight-forward: simply call the
-constructor for the kind of instruction to instantiate and provide the necessary
-parameters. For example, an <tt>AllocaInst</tt> only <i>requires</i> a
-(const-ptr-to) <tt>Type</tt>. Thus:</p> 
-
-<div class="doc_code">
-<pre>
-AllocaInst* ai = new AllocaInst(Type::Int32Ty);
-</pre>
-</div>
-
-<p>will create an <tt>AllocaInst</tt> instance that represents the allocation of
-one integer in the current stack frame, at run time. Each <tt>Instruction</tt>
-subclass is likely to have varying default parameters which change the semantics
-of the instruction, so refer to the <a
-href="/doxygen/classllvm_1_1Instruction.html">doxygen documentation for the subclass of
-Instruction</a> that you're interested in instantiating.</p>
-
-<p><i>Naming values</i></p>
-
-<p>It is very useful to name the values of instructions when you're able to, as
-this facilitates the debugging of your transformations.  If you end up looking
-at generated LLVM machine code, you definitely want to have logical names
-associated with the results of instructions!  By supplying a value for the
-<tt>Name</tt> (default) parameter of the <tt>Instruction</tt> constructor, you
-associate a logical name with the result of the instruction's execution at
-run time.  For example, say that I'm writing a transformation that dynamically
-allocates space for an integer on the stack, and that integer is going to be
-used as some kind of index by some other code.  To accomplish this, I place an
-<tt>AllocaInst</tt> at the first point in the first <tt>BasicBlock</tt> of some
-<tt>Function</tt>, and I'm intending to use it within the same
-<tt>Function</tt>. I might do:</p>
-
-<div class="doc_code">
-<pre>
-AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
-</pre>
-</div>
-
-<p>where <tt>indexLoc</tt> is now the logical name of the instruction's
-execution value, which is a pointer to an integer on the run time stack.</p>
-
-<p><i>Inserting instructions</i></p>
-
-<p>There are essentially two ways to insert an <tt>Instruction</tt>
-into an existing sequence of instructions that form a <tt>BasicBlock</tt>:</p>
-
-<ul>
-  <li>Insertion into an explicit instruction list
-
-    <p>Given a <tt>BasicBlock* pb</tt>, an <tt>Instruction* pi</tt> within that
-    <tt>BasicBlock</tt>, and a newly-created instruction we wish to insert
-    before <tt>*pi</tt>, we do the following: </p>
-
-<div class="doc_code">
-<pre>
-BasicBlock *pb = ...;
-Instruction *pi = ...;
-Instruction *newInst = new Instruction(...);
-
-pb-&gt;getInstList().insert(pi, newInst); // <i>Inserts newInst before pi in pb</i>
-</pre>
-</div>
-
-    <p>Appending to the end of a <tt>BasicBlock</tt> is so common that
-    the <tt>Instruction</tt> class and <tt>Instruction</tt>-derived
-    classes provide constructors which take a pointer to a
-    <tt>BasicBlock</tt> to be appended to. For example code that
-    looked like: </p>
-
-<div class="doc_code">
-<pre>
-BasicBlock *pb = ...;
-Instruction *newInst = new Instruction(...);
-
-pb-&gt;getInstList().push_back(newInst); // <i>Appends newInst to pb</i>
-</pre>
-</div>
-
-    <p>becomes: </p>
-
-<div class="doc_code">
-<pre>
-BasicBlock *pb = ...;
-Instruction *newInst = new Instruction(..., pb);
-</pre>
-</div>
-
-    <p>which is much cleaner, especially if you are creating
-    long instruction streams.</p></li>
-
-  <li>Insertion into an implicit instruction list
-
-    <p><tt>Instruction</tt> instances that are already in <tt>BasicBlock</tt>s
-    are implicitly associated with an existing instruction list: the instruction
-    list of the enclosing basic block. Thus, we could have accomplished the same
-    thing as the above code without being given a <tt>BasicBlock</tt> by doing:
-    </p>
-
-<div class="doc_code">
-<pre>
-Instruction *pi = ...;
-Instruction *newInst = new Instruction(...);
-
-pi-&gt;getParent()-&gt;getInstList().insert(pi, newInst);
-</pre>
-</div>
-
-    <p>In fact, this sequence of steps occurs so frequently that the
-    <tt>Instruction</tt> class and <tt>Instruction</tt>-derived classes provide
-    constructors which take (as a default parameter) a pointer to an
-    <tt>Instruction</tt> which the newly-created <tt>Instruction</tt> should
-    precede.  That is, <tt>Instruction</tt> constructors are capable of
-    inserting the newly-created instance into the <tt>BasicBlock</tt> of a
-    provided instruction, immediately before that instruction.  Using an
-    <tt>Instruction</tt> constructor with a <tt>insertBefore</tt> (default)
-    parameter, the above code becomes:</p>
-
-<div class="doc_code">
-<pre>
-Instruction* pi = ...;
-Instruction* newInst = new Instruction(..., pi);
-</pre>
-</div>
-
-    <p>which is much cleaner, especially if you're creating a lot of
-    instructions and adding them to <tt>BasicBlock</tt>s.</p></li>
-</ul>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a>
-</h4>
-
-<div>
-
-<p>Deleting an instruction from an existing sequence of instructions that form a
-<a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward: just
-call the instruction's eraseFromParent() method.  For example:</p>
-
-<div class="doc_code">
-<pre>
-<a href="#Instruction">Instruction</a> *I = .. ;
-I-&gt;eraseFromParent();
-</pre>
-</div>
-
-<p>This unlinks the instruction from its containing basic block and deletes 
-it.  If you'd just like to unlink the instruction from its containing basic
-block but not delete it, you can use the <tt>removeFromParent()</tt> method.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_replacing">Replacing an <tt>Instruction</tt> with another
-  <tt>Value</tt></a>
-</h4>
-
-<div>
-
-<h5><i>Replacing individual instructions</i></h5>
-
-<p>Including "<a href="/doxygen/BasicBlockUtils_8h-source.html">llvm/Transforms/Utils/BasicBlockUtils.h</a>"
-permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
-and <tt>ReplaceInstWithInst</tt>.</p>
-
-<h5><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h5>
-
-<div>
-<ul>
-  <li><tt>ReplaceInstWithValue</tt>
-
-    <p>This function replaces all uses of a given instruction with a value,
-    and then removes the original instruction. The following example
-    illustrates the replacement of the result of a particular
-    <tt>AllocaInst</tt> that allocates memory for a single integer with a null
-    pointer to an integer.</p>
-
-<div class="doc_code">
-<pre>
-AllocaInst* instToReplace = ...;
-BasicBlock::iterator ii(instToReplace);
-
-ReplaceInstWithValue(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
-                     Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
-</pre></div></li>
-
-  <li><tt>ReplaceInstWithInst</tt> 
-
-    <p>This function replaces a particular instruction with another
-    instruction, inserting the new instruction into the basic block at the
-    location where the old instruction was, and replacing any uses of the old
-    instruction with the new instruction. The following example illustrates
-    the replacement of one <tt>AllocaInst</tt> with another.</p>
-
-<div class="doc_code">
-<pre>
-AllocaInst* instToReplace = ...;
-BasicBlock::iterator ii(instToReplace);
-
-ReplaceInstWithInst(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
-                    new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
-</pre></div></li>
-</ul>
-
-</div>
-
-<h5><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></h5>
-
-<p>You can use <tt>Value::replaceAllUsesWith</tt> and
-<tt>User::replaceUsesOfWith</tt> to change more than one use at a time.  See the
-doxygen documentation for the <a href="/doxygen/classllvm_1_1Value.html">Value Class</a>
-and <a href="/doxygen/classllvm_1_1User.html">User Class</a>, respectively, for more
-information.</p>
-
-<!-- Value::replaceAllUsesWith User::replaceUsesOfWith Point out:
-include/llvm/Transforms/Utils/ especially BasicBlockUtils.h with:
-ReplaceInstWithValue, ReplaceInstWithInst -->
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a>
-</h4>
-
-<div>
-
-<p>Deleting a global variable from a module is just as easy as deleting an 
-Instruction. First, you must have a pointer to the global variable that you wish
- to delete.  You use this pointer to erase it from its parent, the module.
- For example:</p>
-
-<div class="doc_code">
-<pre>
-<a href="#GlobalVariable">GlobalVariable</a> *GV = .. ;
-
-GV-&gt;eraseFromParent();
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="create_types">How to Create Types</a>
-</h3>
-
-<div>
-
-<p>In generating IR, you may need some complex types.  If you know these types
-statically, you can use <tt>TypeBuilder&lt;...&gt;::get()</tt>, defined
-in <tt>llvm/Support/TypeBuilder.h</tt>, to retrieve them.  <tt>TypeBuilder</tt>
-has two forms depending on whether you're building types for cross-compilation
-or native library use.  <tt>TypeBuilder&lt;T, true&gt;</tt> requires
-that <tt>T</tt> be independent of the host environment, meaning that it's built
-out of types from
-the <a href="/doxygen/namespacellvm_1_1types.html"><tt>llvm::types</tt></a>
-namespace and pointers, functions, arrays, etc. built of
-those.  <tt>TypeBuilder&lt;T, false&gt;</tt> additionally allows native C types
-whose size may depend on the host compiler.  For example,</p>
-
-<div class="doc_code">
-<pre>
-FunctionType *ft = TypeBuilder&lt;types::i&lt;8&gt;(types::i&lt;32&gt;*), true&gt;::get();
-</pre>
-</div>
-
-<p>is easier to read and write than the equivalent</p>
-
-<div class="doc_code">
-<pre>
-std::vector&lt;const Type*&gt; params;
-params.push_back(PointerType::getUnqual(Type::Int32Ty));
-FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
-</pre>
-</div>
-
-<p>See the <a href="/doxygen/TypeBuilder_8h-source.html#l00001">class
-comment</a> for more details.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="threading">Threads and LLVM</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>
-This section describes the interaction of the LLVM APIs with multithreading,
-both on the part of client applications, and in the JIT, in the hosted
-application.
-</p>
-
-<p>
-Note that LLVM's support for multithreading is still relatively young.  Up 
-through version 2.5, the execution of threaded hosted applications was
-supported, but not threaded client access to the APIs.  While this use case is
-now supported, clients <em>must</em> adhere to the guidelines specified below to
-ensure proper operation in multithreaded mode.
-</p>
-
-<p>
-Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
-intrinsics in order to support threaded operation.  If you need a
-multhreading-capable LLVM on a platform without a suitably modern system
-compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and 
-using the resultant compiler to build a copy of LLVM with multithreading
-support.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="startmultithreaded">Entering and Exiting Multithreaded Mode</a>
-</h3>
-
-<div>
-
-<p>
-In order to properly protect its internal data structures while avoiding 
-excessive locking overhead in the single-threaded case, the LLVM must intialize
-certain data structures necessary to provide guards around its internals.  To do
-so, the client program must invoke <tt>llvm_start_multithreaded()</tt> before
-making any concurrent LLVM API calls.  To subsequently tear down these
-structures, use the <tt>llvm_stop_multithreaded()</tt> call.  You can also use
-the <tt>llvm_is_multithreaded()</tt> call to check the status of multithreaded
-mode.
-</p>
-
-<p>
-Note that both of these calls must be made <em>in isolation</em>.  That is to
-say that no other LLVM API calls may be executing at any time during the 
-execution of <tt>llvm_start_multithreaded()</tt> or <tt>llvm_stop_multithreaded
-</tt>.  It's is the client's responsibility to enforce this isolation.
-</p>
-
-<p>
-The return value of <tt>llvm_start_multithreaded()</tt> indicates the success or
-failure of the initialization.  Failure typically indicates that your copy of
-LLVM was built without multithreading support, typically because GCC atomic
-intrinsics were not found in your system compiler.  In this case, the LLVM API
-will not be safe for concurrent calls.  However, it <em>will</em> be safe for
-hosting threaded applications in the JIT, though <a href="#jitthreading">care
-must be taken</a> to ensure that side exits and the like do not accidentally
-result in concurrent LLVM API calls.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="shutdown">Ending Execution with <tt>llvm_shutdown()</tt></a>
-</h3>
-
-<div>
-<p>
-When you are done using the LLVM APIs, you should call <tt>llvm_shutdown()</tt>
-to deallocate memory used for internal structures.  This will also invoke 
-<tt>llvm_stop_multithreaded()</tt> if LLVM is operating in multithreaded mode.
-As such, <tt>llvm_shutdown()</tt> requires the same isolation guarantees as
-<tt>llvm_stop_multithreaded()</tt>.
-</p>
-
-<p>
-Note that, if you use scope-based shutdown, you can use the
-<tt>llvm_shutdown_obj</tt> class, which calls <tt>llvm_shutdown()</tt> in its
-destructor.
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="managedstatic">Lazy Initialization with <tt>ManagedStatic</tt></a>
-</h3>
-
-<div>
-<p>
-<tt>ManagedStatic</tt> is a utility class in LLVM used to implement static
-initialization of static resources, such as the global type tables.  Before the
-invocation of <tt>llvm_shutdown()</tt>, it implements a simple lazy 
-initialization scheme.  Once <tt>llvm_start_multithreaded()</tt> returns,
-however, it uses double-checked locking to implement thread-safe lazy
-initialization.
-</p>
-
-<p>
-Note that, because no other threads are allowed to issue LLVM API calls before
-<tt>llvm_start_multithreaded()</tt> returns, it is possible to have 
-<tt>ManagedStatic</tt>s of <tt>llvm::sys::Mutex</tt>s.
-</p>
-
-<p>
-The <tt>llvm_acquire_global_lock()</tt> and <tt>llvm_release_global_lock</tt> 
-APIs provide access to the global lock used to implement the double-checked
-locking for lazy initialization.  These should only be used internally to LLVM,
-and only if you know what you're doing!
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a>
-</h3>
-
-<div>
-<p>
-<tt>LLVMContext</tt> is an opaque class in the LLVM API which clients can use
-to operate multiple, isolated instances of LLVM concurrently within the same
-address space.  For instance, in a hypothetical compile-server, the compilation
-of an individual translation unit is conceptually independent from all the 
-others, and it would be desirable to be able to compile incoming translation 
-units concurrently on independent server threads.  Fortunately, 
-<tt>LLVMContext</tt> exists to enable just this kind of scenario!
-</p>
-
-<p>
-Conceptually, <tt>LLVMContext</tt> provides isolation.  Every LLVM entity 
-(<tt>Module</tt>s, <tt>Value</tt>s, <tt>Type</tt>s, <tt>Constant</tt>s, etc.)
-in LLVM's in-memory IR belongs to an <tt>LLVMContext</tt>.  Entities in 
-different contexts <em>cannot</em> interact with each other: <tt>Module</tt>s in
-different contexts cannot be linked together, <tt>Function</tt>s cannot be added
-to <tt>Module</tt>s in different contexts, etc.  What this means is that is is
-safe to compile on multiple threads simultaneously, as long as no two threads
-operate on entities within the same context.
-</p>
-
-<p>
-In practice, very few places in the API require the explicit specification of a
-<tt>LLVMContext</tt>, other than the <tt>Type</tt> creation/lookup APIs.
-Because every <tt>Type</tt> carries a reference to its owning context, most
-other entities can determine what context they belong to by looking at their
-own <tt>Type</tt>.  If you are adding new entities to LLVM IR, please try to
-maintain this interface design.
-</p>
-
-<p>
-For clients that do <em>not</em> require the benefits of isolation, LLVM 
-provides a convenience API <tt>getGlobalContext()</tt>.  This returns a global,
-lazily initialized <tt>LLVMContext</tt> that may be used in situations where
-isolation is not a concern.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="jitthreading">Threads and the JIT</a>
-</h3>
-
-<div>
-<p>
-LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
-threads can call <tt>ExecutionEngine::getPointerToFunction()</tt> or
-<tt>ExecutionEngine::runFunction()</tt> concurrently, and multiple threads can
-run code output by the JIT concurrently.  The user must still ensure that only
-one thread accesses IR in a given <tt>LLVMContext</tt> while another thread
-might be modifying it.  One way to do that is to always hold the JIT lock while
-accessing IR outside the JIT (the JIT <em>modifies</em> the IR by adding
-<tt>CallbackVH</tt>s).  Another way is to only
-call <tt>getPointerToFunction()</tt> from the <tt>LLVMContext</tt>'s thread.
-</p>
-
-<p>When the JIT is configured to compile lazily (using
-<tt>ExecutionEngine::DisableLazyCompilation(false)</tt>), there is currently a
-<a href="http://llvm.org/bugs/show_bug.cgi?id=5184">race condition</a> in
-updating call sites after a function is lazily-jitted.  It's still possible to
-use the lazy JIT in a threaded program if you ensure that only one thread at a
-time can call any particular lazy stub and that the JIT lock guards any IR
-access, but we suggest using only the eager JIT in threaded programs.
-</p>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="advanced">Advanced Topics</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>
-This section describes some of the advanced or obscure API's that most clients
-do not need to be aware of.  These API's tend manage the inner workings of the
-LLVM system, and only need to be accessed in unusual circumstances.
-</p>
-
-  
-<!-- ======================================================================= -->
-<h3>
-  <a name="SymbolTable">The <tt>ValueSymbolTable</tt> class</a>
-</h3>
-
-<div>
-<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html">
-ValueSymbolTable</a></tt> class provides a symbol table that the <a
-href="#Function"><tt>Function</tt></a> and <a href="#Module">
-<tt>Module</tt></a> classes use for naming value definitions. The symbol table
-can provide a name for any <a href="#Value"><tt>Value</tt></a>. 
-</p>
-
-<p>Note that the <tt>SymbolTable</tt> class should not be directly accessed 
-by most clients.  It should only be used when iteration over the symbol table 
-names themselves are required, which is very special purpose.  Note that not 
-all LLVM
-<tt><a href="#Value">Value</a></tt>s have names, and those without names (i.e. they have
-an empty name) do not exist in the symbol table.
-</p>
-
-<p>Symbol tables support iteration over the values in the symbol
-table with <tt>begin/end/iterator</tt> and supports querying to see if a
-specific name is in the symbol table (with <tt>lookup</tt>).  The
-<tt>ValueSymbolTable</tt> class exposes no public mutator methods, instead,
-simply call <tt>setName</tt> on a value, which will autoinsert it into the
-appropriate symbol table.</p>
-
-</div>
-
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a>
-</h3>
-
-<div>
-<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1User.html">
-User</a></tt> class provides a basis for expressing the ownership of <tt>User</tt>
-towards other <tt><a href="http://llvm.org/doxygen/classllvm_1_1Value.html">
-Value</a></tt>s. The <tt><a href="http://llvm.org/doxygen/classllvm_1_1Use.html">
-Use</a></tt> helper class is employed to do the bookkeeping and to facilitate <i>O(1)</i>
-addition and removal.</p>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="Use2User">
-    Interaction and relationship between <tt>User</tt> and <tt>Use</tt> objects
-  </a>
-</h4>
-
-<div>
-<p>
-A subclass of <tt>User</tt> can choose between incorporating its <tt>Use</tt> objects
-or refer to them out-of-line by means of a pointer. A mixed variant
-(some <tt>Use</tt>s inline others hung off) is impractical and breaks the invariant
-that the <tt>Use</tt> objects belonging to the same <tt>User</tt> form a contiguous array.
-</p>
-
-<p>
-We have 2 different layouts in the <tt>User</tt> (sub)classes:
-<ul>
-<li><p>Layout a)
-The <tt>Use</tt> object(s) are inside (resp. at fixed offset) of the <tt>User</tt>
-object and there are a fixed number of them.</p>
-
-<li><p>Layout b)
-The <tt>Use</tt> object(s) are referenced by a pointer to an
-array from the <tt>User</tt> object and there may be a variable
-number of them.</p>
-</ul>
-<p>
-As of v2.4 each layout still possesses a direct pointer to the
-start of the array of <tt>Use</tt>s. Though not mandatory for layout a),
-we stick to this redundancy for the sake of simplicity.
-The <tt>User</tt> object also stores the number of <tt>Use</tt> objects it
-has. (Theoretically this information can also be calculated
-given the scheme presented below.)</p>
-<p>
-Special forms of allocation operators (<tt>operator new</tt>)
-enforce the following memory layouts:</p>
-
-<ul>
-<li><p>Layout a) is modelled by prepending the <tt>User</tt> object by the <tt>Use[]</tt> array.</p>
-
-<pre>
-...---.---.---.---.-------...
-  | P | P | P | P | User
-'''---'---'---'---'-------'''
-</pre>
-
-<li><p>Layout b) is modelled by pointing at the <tt>Use[]</tt> array.</p>
-<pre>
-.-------...
-| User
-'-------'''
-    |
-    v
-    .---.---.---.---...
-    | P | P | P | P |
-    '---'---'---'---'''
-</pre>
-</ul>
-<i>(In the above figures '<tt>P</tt>' stands for the <tt>Use**</tt> that
-    is stored in each <tt>Use</tt> object in the member <tt>Use::Prev</tt>)</i>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="Waymarking">The waymarking algorithm</a>
-</h4>
-
-<div>
-<p>
-Since the <tt>Use</tt> objects are deprived of the direct (back)pointer to
-their <tt>User</tt> objects, there must be a fast and exact method to
-recover it. This is accomplished by the following scheme:</p>
-
-A bit-encoding in the 2 LSBits (least significant bits) of the <tt>Use::Prev</tt> allows to find the
-start of the <tt>User</tt> object:
-<ul>
-<li><tt>00</tt> &mdash;&gt; binary digit 0</li>
-<li><tt>01</tt> &mdash;&gt; binary digit 1</li>
-<li><tt>10</tt> &mdash;&gt; stop and calculate (<tt>s</tt>)</li>
-<li><tt>11</tt> &mdash;&gt; full stop (<tt>S</tt>)</li>
-</ul>
-<p>
-Given a <tt>Use*</tt>, all we have to do is to walk till we get
-a stop and we either have a <tt>User</tt> immediately behind or
-we have to walk to the next stop picking up digits
-and calculating the offset:</p>
-<pre>
-.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
-| 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
-'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
-    |+15                |+10            |+6         |+3     |+1
-    |                   |               |           |       |__>
-    |                   |               |           |__________>
-    |                   |               |______________________>
-    |                   |______________________________________>
-    |__________________________________________________________>
-</pre>
-<p>
-Only the significant number of bits need to be stored between the
-stops, so that the <i>worst case is 20 memory accesses</i> when there are
-1000 <tt>Use</tt> objects associated with a <tt>User</tt>.</p>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="ReferenceImpl">Reference implementation</a>
-</h4>
-
-<div>
-<p>
-The following literate Haskell fragment demonstrates the concept:</p>
-
-<div class="doc_code">
-<pre>
-> import Test.QuickCheck
-> 
-> digits :: Int -> [Char] -> [Char]
-> digits 0 acc = '0' : acc
-> digits 1 acc = '1' : acc
-> digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
-> 
-> dist :: Int -> [Char] -> [Char]
-> dist 0 [] = ['S']
-> dist 0 acc = acc
-> dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
-> dist n acc = dist (n - 1) $ dist 1 acc
-> 
-> takeLast n ss = reverse $ take n $ reverse ss
-> 
-> test = takeLast 40 $ dist 20 []
-> 
-</pre>
-</div>
-<p>
-Printing &lt;test&gt; gives: <tt>"1s100000s11010s10100s1111s1010s110s11s1S"</tt></p>
-<p>
-The reverse algorithm computes the length of the string just by examining
-a certain prefix:</p>
-
-<div class="doc_code">
-<pre>
-> pref :: [Char] -> Int
-> pref "S" = 1
-> pref ('s':'1':rest) = decode 2 1 rest
-> pref (_:rest) = 1 + pref rest
-> 
-> decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
-> decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
-> decode walk acc _ = walk + acc
-> 
-</pre>
-</div>
-<p>
-Now, as expected, printing &lt;pref test&gt; gives <tt>40</tt>.</p>
-<p>
-We can <i>quickCheck</i> this with following property:</p>
-
-<div class="doc_code">
-<pre>
-> testcase = dist 2000 []
-> testcaseLength = length testcase
-> 
-> identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
->     where arr = takeLast n testcase
-> 
-</pre>
-</div>
-<p>
-As expected &lt;quickCheck identityProp&gt; gives:</p>
-
-<pre>
-*Main> quickCheck identityProp
-OK, passed 100 tests.
-</pre>
-<p>
-Let's be a bit more exhaustive:</p>
-
-<div class="doc_code">
-<pre>
-> 
-> deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
-> 
-</pre>
-</div>
-<p>
-And here is the result of &lt;deepCheck identityProp&gt;:</p>
-
-<pre>
-*Main> deepCheck identityProp
-OK, passed 500 tests.
-</pre>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="Tagging">Tagging considerations</a>
-</h4>
-
-<div>
-
-<p>
-To maintain the invariant that the 2 LSBits of each <tt>Use**</tt> in <tt>Use</tt>
-never change after being set up, setters of <tt>Use::Prev</tt> must re-tag the
-new <tt>Use**</tt> on every modification. Accordingly getters must strip the
-tag bits.</p>
-<p>
-For layout b) instead of the <tt>User</tt> we find a pointer (<tt>User*</tt> with LSBit set).
-Following this pointer brings us to the <tt>User</tt>. A portable trick ensures
-that the first bytes of <tt>User</tt> (if interpreted as a pointer) never has
-the LSBit set. (Portability is relying on the fact that all known compilers place the
-<tt>vptr</tt> in the first word of the instances.)</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p><tt>#include "<a href="/doxygen/Type_8h-source.html">llvm/Type.h</a>"</tt>
-<br>doxygen info: <a href="/doxygen/classllvm_1_1Type.html">Type Class</a></p>
-
-<p>The Core LLVM classes are the primary means of representing the program
-being inspected or transformed.  The core LLVM classes are defined in
-header files in the <tt>include/llvm/</tt> directory, and implemented in
-the <tt>lib/VMCore</tt> directory.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Type">The <tt>Type</tt> class and Derived Types</a>
-</h3>
-
-<div>
-
-  <p><tt>Type</tt> is a superclass of all type classes. Every <tt>Value</tt> has
-  a <tt>Type</tt>. <tt>Type</tt> cannot be instantiated directly but only
-  through its subclasses. Certain primitive types (<tt>VoidType</tt>,
-  <tt>LabelType</tt>, <tt>FloatType</tt> and <tt>DoubleType</tt>) have hidden 
-  subclasses. They are hidden because they offer no useful functionality beyond
-  what the <tt>Type</tt> class offers except to distinguish themselves from 
-  other subclasses of <tt>Type</tt>.</p>
-  <p>All other types are subclasses of <tt>DerivedType</tt>.  Types can be 
-  named, but this is not a requirement. There exists exactly 
-  one instance of a given shape at any one time.  This allows type equality to
-  be performed with address equality of the Type Instance. That is, given two 
-  <tt>Type*</tt> values, the types are identical if the pointers are identical.
-  </p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Type">Important Public Methods</a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>bool isIntegerTy() const</tt>: Returns true for any integer type.</li>
-
-  <li><tt>bool isFloatingPointTy()</tt>: Return true if this is one of the five
-  floating point types.</li>
-
-  <li><tt>bool isSized()</tt>: Return true if the type has known size. Things
-  that don't have a size are abstract types, labels and void.</li>
-
-</ul>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="derivedtypes">Important Derived Types</a>
-</h4>
-<div>
-<dl>
-  <dt><tt>IntegerType</tt></dt>
-  <dd>Subclass of DerivedType that represents integer types of any bit width. 
-  Any bit width between <tt>IntegerType::MIN_INT_BITS</tt> (1) and 
-  <tt>IntegerType::MAX_INT_BITS</tt> (~8 million) can be represented.
-  <ul>
-    <li><tt>static const IntegerType* get(unsigned NumBits)</tt>: get an integer
-    type of a specific bit width.</li>
-    <li><tt>unsigned getBitWidth() const</tt>: Get the bit width of an integer
-    type.</li>
-  </ul>
-  </dd>
-  <dt><tt>SequentialType</tt></dt>
-  <dd>This is subclassed by ArrayType, PointerType and VectorType.
-    <ul>
-      <li><tt>const Type * getElementType() const</tt>: Returns the type of each
-      of the elements in the sequential type. </li>
-    </ul>
-  </dd>
-  <dt><tt>ArrayType</tt></dt>
-  <dd>This is a subclass of SequentialType and defines the interface for array 
-  types.
-    <ul>
-      <li><tt>unsigned getNumElements() const</tt>: Returns the number of 
-      elements in the array. </li>
-    </ul>
-  </dd>
-  <dt><tt>PointerType</tt></dt>
-  <dd>Subclass of SequentialType for pointer types.</dd>
-  <dt><tt>VectorType</tt></dt>
-  <dd>Subclass of SequentialType for vector types. A 
-  vector type is similar to an ArrayType but is distinguished because it is 
-  a first class type whereas ArrayType is not. Vector types are used for 
-  vector operations and are usually small vectors of of an integer or floating 
-  point type.</dd>
-  <dt><tt>StructType</tt></dt>
-  <dd>Subclass of DerivedTypes for struct types.</dd>
-  <dt><tt><a name="FunctionType">FunctionType</a></tt></dt>
-  <dd>Subclass of DerivedTypes for function types.
-    <ul>
-      <li><tt>bool isVarArg() const</tt>: Returns true if it's a vararg
-      function</li>
-      <li><tt> const Type * getReturnType() const</tt>: Returns the
-      return type of the function.</li>
-      <li><tt>const Type * getParamType (unsigned i)</tt>: Returns
-      the type of the ith parameter.</li>
-      <li><tt> const unsigned getNumParams() const</tt>: Returns the
-      number of formal parameters.</li>
-    </ul>
-  </dd>
-</dl>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Module">The <tt>Module</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/Module_8h-source.html">llvm/Module.h</a>"</tt><br> doxygen info:
-<a href="/doxygen/classllvm_1_1Module.html">Module Class</a></p>
-
-<p>The <tt>Module</tt> class represents the top level structure present in LLVM
-programs.  An LLVM module is effectively either a translation unit of the
-original program or a combination of several translation units merged by the
-linker.  The <tt>Module</tt> class keeps track of a list of <a
-href="#Function"><tt>Function</tt></a>s, a list of <a
-href="#GlobalVariable"><tt>GlobalVariable</tt></a>s, and a <a
-href="#SymbolTable"><tt>SymbolTable</tt></a>.  Additionally, it contains a few
-helpful member functions that try to make common operations easy.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Module">Important Public Members of the <tt>Module</tt> class</a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>Module::Module(std::string name = "")</tt>
-
-  <p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
-provide a name for it (probably based on the name of the translation unit).</p>
-  </li>
-
-  <li><tt>Module::iterator</tt> - Typedef for function list iterator<br>
-    <tt>Module::const_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>begin()</tt>, <tt>end()</tt>
-    <tt>size()</tt>, <tt>empty()</tt>
-
-    <p>These are forwarding methods that make it easy to access the contents of
-    a <tt>Module</tt> object's <a href="#Function"><tt>Function</tt></a>
-    list.</p></li>
-
-  <li><tt>Module::FunctionListType &amp;getFunctionList()</tt>
-
-    <p> Returns the list of <a href="#Function"><tt>Function</tt></a>s.  This is
-    necessary to use when you need to update the list or perform a complex
-    action that doesn't have a forwarding method.</p>
-
-    <p><!--  Global Variable --></p></li> 
-</ul>
-
-<hr>
-
-<ul>
-  <li><tt>Module::global_iterator</tt> - Typedef for global variable list iterator<br>
-
-    <tt>Module::const_global_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>global_begin()</tt>, <tt>global_end()</tt>
-    <tt>global_size()</tt>, <tt>global_empty()</tt>
-
-    <p> These are forwarding methods that make it easy to access the contents of
-    a <tt>Module</tt> object's <a
-    href="#GlobalVariable"><tt>GlobalVariable</tt></a> list.</p></li>
-
-  <li><tt>Module::GlobalListType &amp;getGlobalList()</tt>
-
-    <p>Returns the list of <a
-    href="#GlobalVariable"><tt>GlobalVariable</tt></a>s.  This is necessary to
-    use when you need to update the list or perform a complex action that
-    doesn't have a forwarding method.</p>
-
-    <p><!--  Symbol table stuff --> </p></li>
-</ul>
-
-<hr>
-
-<ul>
-  <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
-
-    <p>Return a reference to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
-    for this <tt>Module</tt>.</p>
-
-    <p><!--  Convenience methods --></p></li>
-</ul>
-
-<hr>
-
-<ul>
-
-  <li><tt><a href="#Function">Function</a> *getFunction(StringRef Name) const
-    </tt>
-
-    <p>Look up the specified function in the <tt>Module</tt> <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, return
-    <tt>null</tt>.</p></li>
-
-  <li><tt><a href="#Function">Function</a> *getOrInsertFunction(const
-  std::string &amp;Name, const <a href="#FunctionType">FunctionType</a> *T)</tt>
-
-    <p>Look up the specified function in the <tt>Module</tt> <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, add an
-    external declaration for the function and return it.</p></li>
-
-  <li><tt>std::string getTypeName(const <a href="#Type">Type</a> *Ty)</tt>
-
-    <p>If there is at least one entry in the <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a> for the specified <a
-    href="#Type"><tt>Type</tt></a>, return it.  Otherwise return the empty
-    string.</p></li>
-
-  <li><tt>bool addTypeName(const std::string &amp;Name, const <a
-  href="#Type">Type</a> *Ty)</tt>
-
-    <p>Insert an entry in the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
-    mapping <tt>Name</tt> to <tt>Ty</tt>. If there is already an entry for this
-    name, true is returned and the <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a> is not modified.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Value">The <tt>Value</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a href="/doxygen/Value_8h-source.html">llvm/Value.h</a>"</tt>
-<br> 
-doxygen info: <a href="/doxygen/classllvm_1_1Value.html">Value Class</a></p>
-
-<p>The <tt>Value</tt> class is the most important class in the LLVM Source
-base.  It represents a typed value that may be used (among other things) as an
-operand to an instruction.  There are many different types of <tt>Value</tt>s,
-such as <a href="#Constant"><tt>Constant</tt></a>s,<a
-href="#Argument"><tt>Argument</tt></a>s. Even <a
-href="#Instruction"><tt>Instruction</tt></a>s and <a
-href="#Function"><tt>Function</tt></a>s are <tt>Value</tt>s.</p>
-
-<p>A particular <tt>Value</tt> may be used many times in the LLVM representation
-for a program.  For example, an incoming argument to a function (represented
-with an instance of the <a href="#Argument">Argument</a> class) is "used" by
-every instruction in the function that references the argument.  To keep track
-of this relationship, the <tt>Value</tt> class keeps a list of all of the <a
-href="#User"><tt>User</tt></a>s that is using it (the <a
-href="#User"><tt>User</tt></a> class is a base class for all nodes in the LLVM
-graph that can refer to <tt>Value</tt>s).  This use list is how LLVM represents
-def-use information in the program, and is accessible through the <tt>use_</tt>*
-methods, shown below.</p>
-
-<p>Because LLVM is a typed representation, every LLVM <tt>Value</tt> is typed,
-and this <a href="#Type">Type</a> is available through the <tt>getType()</tt>
-method. In addition, all LLVM values can be named.  The "name" of the
-<tt>Value</tt> is a symbolic string printed in the LLVM code:</p>
-
-<div class="doc_code">
-<pre>
-%<b>foo</b> = add i32 1, 2
-</pre>
-</div>
-
-<p><a name="nameWarning">The name of this instruction is "foo".</a> <b>NOTE</b>
-that the name of any value may be missing (an empty string), so names should
-<b>ONLY</b> be used for debugging (making the source code easier to read,
-debugging printouts), they should not be used to keep track of values or map
-between them.  For this purpose, use a <tt>std::map</tt> of pointers to the
-<tt>Value</tt> itself instead.</p>
-
-<p>One important aspect of LLVM is that there is no distinction between an SSA
-variable and the operation that produces it.  Because of this, any reference to
-the value produced by an instruction (or the value available as an incoming
-argument, for example) is represented as a direct pointer to the instance of
-the class that
-represents this value.  Although this may take some getting used to, it
-simplifies the representation and makes it easier to manipulate.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Value">Important Public Members of the <tt>Value</tt> class</a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>Value::use_iterator</tt> - Typedef for iterator over the
-use-list<br>
-    <tt>Value::const_use_iterator</tt> - Typedef for const_iterator over
-the use-list<br>
-    <tt>unsigned use_size()</tt> - Returns the number of users of the
-value.<br>
-    <tt>bool use_empty()</tt> - Returns true if there are no users.<br>
-    <tt>use_iterator use_begin()</tt> - Get an iterator to the start of
-the use-list.<br>
-    <tt>use_iterator use_end()</tt> - Get an iterator to the end of the
-use-list.<br>
-    <tt><a href="#User">User</a> *use_back()</tt> - Returns the last
-element in the list.
-    <p> These methods are the interface to access the def-use
-information in LLVM.  As with all other iterators in LLVM, the naming
-conventions follow the conventions defined by the <a href="#stl">STL</a>.</p>
-  </li>
-  <li><tt><a href="#Type">Type</a> *getType() const</tt>
-    <p>This method returns the Type of the Value.</p>
-  </li>
-  <li><tt>bool hasName() const</tt><br>
-    <tt>std::string getName() const</tt><br>
-    <tt>void setName(const std::string &amp;Name)</tt>
-    <p> This family of methods is used to access and assign a name to a <tt>Value</tt>,
-be aware of the <a href="#nameWarning">precaution above</a>.</p>
-  </li>
-  <li><tt>void replaceAllUsesWith(Value *V)</tt>
-
-    <p>This method traverses the use list of a <tt>Value</tt> changing all <a
-    href="#User"><tt>User</tt>s</a> of the current value to refer to
-    "<tt>V</tt>" instead.  For example, if you detect that an instruction always
-    produces a constant value (for example through constant folding), you can
-    replace all uses of the instruction with the constant like this:</p>
-
-<div class="doc_code">
-<pre>
-Inst-&gt;replaceAllUsesWith(ConstVal);
-</pre>
-</div>
-
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="User">The <tt>User</tt> class</a>
-</h3>
-
-<div>
-  
-<p>
-<tt>#include "<a href="/doxygen/User_8h-source.html">llvm/User.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1User.html">User Class</a><br>
-Superclass: <a href="#Value"><tt>Value</tt></a></p>
-
-<p>The <tt>User</tt> class is the common base class of all LLVM nodes that may
-refer to <a href="#Value"><tt>Value</tt></a>s.  It exposes a list of "Operands"
-that are all of the <a href="#Value"><tt>Value</tt></a>s that the User is
-referring to.  The <tt>User</tt> class itself is a subclass of
-<tt>Value</tt>.</p>
-
-<p>The operands of a <tt>User</tt> point directly to the LLVM <a
-href="#Value"><tt>Value</tt></a> that it refers to.  Because LLVM uses Static
-Single Assignment (SSA) form, there can only be one definition referred to,
-allowing this direct connection.  This connection provides the use-def
-information in LLVM.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_User">Important Public Members of the <tt>User</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>User</tt> class exposes the operand list in two ways: through
-an index access interface and through an iterator based interface.</p>
-
-<ul>
-  <li><tt>Value *getOperand(unsigned i)</tt><br>
-    <tt>unsigned getNumOperands()</tt>
-    <p> These two methods expose the operands of the <tt>User</tt> in a
-convenient form for direct access.</p></li>
-
-  <li><tt>User::op_iterator</tt> - Typedef for iterator over the operand
-list<br>
-    <tt>op_iterator op_begin()</tt> - Get an iterator to the start of 
-the operand list.<br>
-    <tt>op_iterator op_end()</tt> - Get an iterator to the end of the
-operand list.
-    <p> Together, these methods make up the iterator based interface to
-the operands of a <tt>User</tt>.</p></li>
-</ul>
-
-</div>    
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Instruction">The <tt>Instruction</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "</tt><tt><a
-href="/doxygen/Instruction_8h-source.html">llvm/Instruction.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1Instruction.html">Instruction Class</a><br>
-Superclasses: <a href="#User"><tt>User</tt></a>, <a
-href="#Value"><tt>Value</tt></a></p>
-
-<p>The <tt>Instruction</tt> class is the common base class for all LLVM
-instructions.  It provides only a few methods, but is a very commonly used
-class.  The primary data tracked by the <tt>Instruction</tt> class itself is the
-opcode (instruction type) and the parent <a
-href="#BasicBlock"><tt>BasicBlock</tt></a> the <tt>Instruction</tt> is embedded
-into.  To represent a specific type of instruction, one of many subclasses of
-<tt>Instruction</tt> are used.</p>
-
-<p> Because the <tt>Instruction</tt> class subclasses the <a
-href="#User"><tt>User</tt></a> class, its operands can be accessed in the same
-way as for other <a href="#User"><tt>User</tt></a>s (with the
-<tt>getOperand()</tt>/<tt>getNumOperands()</tt> and
-<tt>op_begin()</tt>/<tt>op_end()</tt> methods).</p> <p> An important file for
-the <tt>Instruction</tt> class is the <tt>llvm/Instruction.def</tt> file. This
-file contains some meta-data about the various different types of instructions
-in LLVM.  It describes the enum values that are used as opcodes (for example
-<tt>Instruction::Add</tt> and <tt>Instruction::ICmp</tt>), as well as the
-concrete sub-classes of <tt>Instruction</tt> that implement the instruction (for
-example <tt><a href="#BinaryOperator">BinaryOperator</a></tt> and <tt><a
-href="#CmpInst">CmpInst</a></tt>).  Unfortunately, the use of macros in
-this file confuses doxygen, so these enum values don't show up correctly in the
-<a href="/doxygen/classllvm_1_1Instruction.html">doxygen output</a>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="s_Instruction">
-    Important Subclasses of the <tt>Instruction</tt> class
-  </a>
-</h4>
-<div>
-  <ul>
-    <li><tt><a name="BinaryOperator">BinaryOperator</a></tt>
-    <p>This subclasses represents all two operand instructions whose operands
-    must be the same type, except for the comparison instructions.</p></li>
-    <li><tt><a name="CastInst">CastInst</a></tt>
-    <p>This subclass is the parent of the 12 casting instructions. It provides
-    common operations on cast instructions.</p>
-    <li><tt><a name="CmpInst">CmpInst</a></tt>
-    <p>This subclass respresents the two comparison instructions, 
-    <a href="LangRef.html#i_icmp">ICmpInst</a> (integer opreands), and
-    <a href="LangRef.html#i_fcmp">FCmpInst</a> (floating point operands).</p>
-    <li><tt><a name="TerminatorInst">TerminatorInst</a></tt>
-    <p>This subclass is the parent of all terminator instructions (those which
-    can terminate a block).</p>
-  </ul>
-  </div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Instruction">
-    Important Public Members of the <tt>Instruction</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt><a href="#BasicBlock">BasicBlock</a> *getParent()</tt>
-    <p>Returns the <a href="#BasicBlock"><tt>BasicBlock</tt></a> that
-this  <tt>Instruction</tt> is embedded into.</p></li>
-  <li><tt>bool mayWriteToMemory()</tt>
-    <p>Returns true if the instruction writes to memory, i.e. it is a
-      <tt>call</tt>,<tt>free</tt>,<tt>invoke</tt>, or <tt>store</tt>.</p></li>
-  <li><tt>unsigned getOpcode()</tt>
-    <p>Returns the opcode for the <tt>Instruction</tt>.</p></li>
-  <li><tt><a href="#Instruction">Instruction</a> *clone() const</tt>
-    <p>Returns another instance of the specified instruction, identical
-in all ways to the original except that the instruction has no parent
-(ie it's not embedded into a <a href="#BasicBlock"><tt>BasicBlock</tt></a>),
-and it has no name</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Constant">The <tt>Constant</tt> class and subclasses</a>
-</h3>
-
-<div>
-
-<p>Constant represents a base class for different types of constants. It
-is subclassed by ConstantInt, ConstantArray, etc. for representing 
-the various types of Constants.  <a href="#GlobalValue">GlobalValue</a> is also
-a subclass, which represents the address of a global variable or function.
-</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>Important Subclasses of Constant</h4>
-<div>
-<ul>
-  <li>ConstantInt : This subclass of Constant represents an integer constant of
-  any width.
-    <ul>
-      <li><tt>const APInt&amp; getValue() const</tt>: Returns the underlying
-      value of this constant, an APInt value.</li>
-      <li><tt>int64_t getSExtValue() const</tt>: Converts the underlying APInt
-      value to an int64_t via sign extension. If the value (not the bit width)
-      of the APInt is too large to fit in an int64_t, an assertion will result.
-      For this reason, use of this method is discouraged.</li>
-      <li><tt>uint64_t getZExtValue() const</tt>: Converts the underlying APInt
-      value to a uint64_t via zero extension. IF the value (not the bit width)
-      of the APInt is too large to fit in a uint64_t, an assertion will result.
-      For this reason, use of this method is discouraged.</li>
-      <li><tt>static ConstantInt* get(const APInt&amp; Val)</tt>: Returns the
-      ConstantInt object that represents the value provided by <tt>Val</tt>.
-      The type is implied as the IntegerType that corresponds to the bit width
-      of <tt>Val</tt>.</li>
-      <li><tt>static ConstantInt* get(const Type *Ty, uint64_t Val)</tt>: 
-      Returns the ConstantInt object that represents the value provided by 
-      <tt>Val</tt> for integer type <tt>Ty</tt>.</li>
-    </ul>
-  </li>
-  <li>ConstantFP : This class represents a floating point constant.
-    <ul>
-      <li><tt>double getValue() const</tt>: Returns the underlying value of 
-      this constant. </li>
-    </ul>
-  </li>
-  <li>ConstantArray : This represents a constant array.
-    <ul>
-      <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
-      a vector of component constants that makeup this array. </li>
-    </ul>
-  </li>
-  <li>ConstantStruct : This represents a constant struct.
-    <ul>
-      <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
-      a vector of component constants that makeup this array. </li>
-    </ul>
-  </li>
-  <li>GlobalValue : This represents either a global variable or a function. In 
-  either case, the value is a constant fixed address (after linking). 
-  </li>
-</ul>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="GlobalValue">The <tt>GlobalValue</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/GlobalValue_8h-source.html">llvm/GlobalValue.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1GlobalValue.html">GlobalValue
-Class</a><br>
-Superclasses: <a href="#Constant"><tt>Constant</tt></a>, 
-<a href="#User"><tt>User</tt></a>, <a href="#Value"><tt>Value</tt></a></p>
-
-<p>Global values (<a href="#GlobalVariable"><tt>GlobalVariable</tt></a>s or <a
-href="#Function"><tt>Function</tt></a>s) are the only LLVM values that are
-visible in the bodies of all <a href="#Function"><tt>Function</tt></a>s.
-Because they are visible at global scope, they are also subject to linking with
-other globals defined in different translation units.  To control the linking
-process, <tt>GlobalValue</tt>s know their linkage rules. Specifically,
-<tt>GlobalValue</tt>s know whether they have internal or external linkage, as
-defined by the <tt>LinkageTypes</tt> enumeration.</p>
-
-<p>If a <tt>GlobalValue</tt> has internal linkage (equivalent to being
-<tt>static</tt> in C), it is not visible to code outside the current translation
-unit, and does not participate in linking.  If it has external linkage, it is
-visible to external code, and does participate in linking.  In addition to
-linkage information, <tt>GlobalValue</tt>s keep track of which <a
-href="#Module"><tt>Module</tt></a> they are currently part of.</p>
-
-<p>Because <tt>GlobalValue</tt>s are memory objects, they are always referred to
-by their <b>address</b>. As such, the <a href="#Type"><tt>Type</tt></a> of a
-global is always a pointer to its contents. It is important to remember this
-when using the <tt>GetElementPtrInst</tt> instruction because this pointer must
-be dereferenced first. For example, if you have a <tt>GlobalVariable</tt> (a
-subclass of <tt>GlobalValue)</tt> that is an array of 24 ints, type <tt>[24 x
-i32]</tt>, then the <tt>GlobalVariable</tt> is a pointer to that array. Although
-the address of the first element of this array and the value of the
-<tt>GlobalVariable</tt> are the same, they have different types. The
-<tt>GlobalVariable</tt>'s type is <tt>[24 x i32]</tt>. The first element's type
-is <tt>i32.</tt> Because of this, accessing a global value requires you to
-dereference the pointer with <tt>GetElementPtrInst</tt> first, then its elements
-can be accessed. This is explained in the <a href="LangRef.html#globalvars">LLVM
-Language Reference Manual</a>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_GlobalValue">
-    Important Public Members of the <tt>GlobalValue</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>bool hasInternalLinkage() const</tt><br>
-    <tt>bool hasExternalLinkage() const</tt><br>
-    <tt>void setInternalLinkage(bool HasInternalLinkage)</tt>
-    <p> These methods manipulate the linkage characteristics of the <tt>GlobalValue</tt>.</p>
-    <p> </p>
-  </li>
-  <li><tt><a href="#Module">Module</a> *getParent()</tt>
-    <p> This returns the <a href="#Module"><tt>Module</tt></a> that the
-GlobalValue is currently embedded into.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Function">The <tt>Function</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/Function_8h-source.html">llvm/Function.h</a>"</tt><br> doxygen
-info: <a href="/doxygen/classllvm_1_1Function.html">Function Class</a><br>
-Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
-<a href="#Constant"><tt>Constant</tt></a>, 
-<a href="#User"><tt>User</tt></a>, 
-<a href="#Value"><tt>Value</tt></a></p>
-
-<p>The <tt>Function</tt> class represents a single procedure in LLVM.  It is
-actually one of the more complex classes in the LLVM hierarchy because it must
-keep track of a large amount of data.  The <tt>Function</tt> class keeps track
-of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal 
-<a href="#Argument"><tt>Argument</tt></a>s, and a 
-<a href="#SymbolTable"><tt>SymbolTable</tt></a>.</p>
-
-<p>The list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s is the most
-commonly used part of <tt>Function</tt> objects.  The list imposes an implicit
-ordering of the blocks in the function, which indicate how the code will be
-laid out by the backend.  Additionally, the first <a
-href="#BasicBlock"><tt>BasicBlock</tt></a> is the implicit entry node for the
-<tt>Function</tt>.  It is not legal in LLVM to explicitly branch to this initial
-block.  There are no implicit exit nodes, and in fact there may be multiple exit
-nodes from a single <tt>Function</tt>.  If the <a
-href="#BasicBlock"><tt>BasicBlock</tt></a> list is empty, this indicates that
-the <tt>Function</tt> is actually a function declaration: the actual body of the
-function hasn't been linked in yet.</p>
-
-<p>In addition to a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, the
-<tt>Function</tt> class also keeps track of the list of formal <a
-href="#Argument"><tt>Argument</tt></a>s that the function receives.  This
-container manages the lifetime of the <a href="#Argument"><tt>Argument</tt></a>
-nodes, just like the <a href="#BasicBlock"><tt>BasicBlock</tt></a> list does for
-the <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.</p>
-
-<p>The <a href="#SymbolTable"><tt>SymbolTable</tt></a> is a very rarely used
-LLVM feature that is only used when you have to look up a value by name.  Aside
-from that, the <a href="#SymbolTable"><tt>SymbolTable</tt></a> is used
-internally to make sure that there are not conflicts between the names of <a
-href="#Instruction"><tt>Instruction</tt></a>s, <a
-href="#BasicBlock"><tt>BasicBlock</tt></a>s, or <a
-href="#Argument"><tt>Argument</tt></a>s in the function body.</p>
-
-<p>Note that <tt>Function</tt> is a <a href="#GlobalValue">GlobalValue</a>
-and therefore also a <a href="#Constant">Constant</a>. The value of the function
-is its address (after linking) which is guaranteed to be constant.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Function">
-    Important Public Members of the <tt>Function</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>Function(const </tt><tt><a href="#FunctionType">FunctionType</a>
-  *Ty, LinkageTypes Linkage, const std::string &amp;N = "", Module* Parent = 0)</tt>
-
-    <p>Constructor used when you need to create new <tt>Function</tt>s to add
-    the program.  The constructor must specify the type of the function to
-    create and what type of linkage the function should have. The <a 
-    href="#FunctionType"><tt>FunctionType</tt></a> argument
-    specifies the formal arguments and return value for the function. The same
-    <a href="#FunctionType"><tt>FunctionType</tt></a> value can be used to
-    create multiple functions. The <tt>Parent</tt> argument specifies the Module
-    in which the function is defined. If this argument is provided, the function
-    will automatically be inserted into that module's list of
-    functions.</p></li>
-
-  <li><tt>bool isDeclaration()</tt>
-
-    <p>Return whether or not the <tt>Function</tt> has a body defined.  If the
-    function is "external", it does not have a body, and thus must be resolved
-    by linking with a function defined in a different translation unit.</p></li>
-
-  <li><tt>Function::iterator</tt> - Typedef for basic block list iterator<br>
-    <tt>Function::const_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>begin()</tt>, <tt>end()</tt>
-    <tt>size()</tt>, <tt>empty()</tt>
-
-    <p>These are forwarding methods that make it easy to access the contents of
-    a <tt>Function</tt> object's <a href="#BasicBlock"><tt>BasicBlock</tt></a>
-    list.</p></li>
-
-  <li><tt>Function::BasicBlockListType &amp;getBasicBlockList()</tt>
-
-    <p>Returns the list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.  This
-    is necessary to use when you need to update the list or perform a complex
-    action that doesn't have a forwarding method.</p></li>
-
-  <li><tt>Function::arg_iterator</tt> - Typedef for the argument list
-iterator<br>
-    <tt>Function::const_arg_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>arg_begin()</tt>, <tt>arg_end()</tt>
-    <tt>arg_size()</tt>, <tt>arg_empty()</tt>
-
-    <p>These are forwarding methods that make it easy to access the contents of
-    a <tt>Function</tt> object's <a href="#Argument"><tt>Argument</tt></a>
-    list.</p></li>
-
-  <li><tt>Function::ArgumentListType &amp;getArgumentList()</tt>
-
-    <p>Returns the list of <a href="#Argument"><tt>Argument</tt></a>s.  This is
-    necessary to use when you need to update the list or perform a complex
-    action that doesn't have a forwarding method.</p></li>
-
-  <li><tt><a href="#BasicBlock">BasicBlock</a> &amp;getEntryBlock()</tt>
-
-    <p>Returns the entry <a href="#BasicBlock"><tt>BasicBlock</tt></a> for the
-    function.  Because the entry block for the function is always the first
-    block, this returns the first block of the <tt>Function</tt>.</p></li>
-
-  <li><tt><a href="#Type">Type</a> *getReturnType()</tt><br>
-    <tt><a href="#FunctionType">FunctionType</a> *getFunctionType()</tt>
-
-    <p>This traverses the <a href="#Type"><tt>Type</tt></a> of the
-    <tt>Function</tt> and returns the return type of the function, or the <a
-    href="#FunctionType"><tt>FunctionType</tt></a> of the actual
-    function.</p></li>
-
-  <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
-
-    <p> Return a pointer to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
-    for this <tt>Function</tt>.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="GlobalVariable">The <tt>GlobalVariable</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/GlobalVariable_8h-source.html">llvm/GlobalVariable.h</a>"</tt>
-<br>
-doxygen info: <a href="/doxygen/classllvm_1_1GlobalVariable.html">GlobalVariable
- Class</a><br>
-Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
-<a href="#Constant"><tt>Constant</tt></a>,
-<a href="#User"><tt>User</tt></a>,
-<a href="#Value"><tt>Value</tt></a></p>
-
-<p>Global variables are represented with the (surprise surprise)
-<tt>GlobalVariable</tt> class. Like functions, <tt>GlobalVariable</tt>s are also
-subclasses of <a href="#GlobalValue"><tt>GlobalValue</tt></a>, and as such are
-always referenced by their address (global values must live in memory, so their
-"name" refers to their constant address). See 
-<a href="#GlobalValue"><tt>GlobalValue</tt></a> for more on this.  Global 
-variables may have an initial value (which must be a 
-<a href="#Constant"><tt>Constant</tt></a>), and if they have an initializer, 
-they may be marked as "constant" themselves (indicating that their contents 
-never change at runtime).</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_GlobalVariable">
-    Important Public Members of the <tt>GlobalVariable</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>GlobalVariable(const </tt><tt><a href="#Type">Type</a> *Ty, bool
-  isConstant, LinkageTypes&amp; Linkage, <a href="#Constant">Constant</a>
-  *Initializer = 0, const std::string &amp;Name = "", Module* Parent = 0)</tt>
-
-    <p>Create a new global variable of the specified type. If
-    <tt>isConstant</tt> is true then the global variable will be marked as
-    unchanging for the program. The Linkage parameter specifies the type of
-    linkage (internal, external, weak, linkonce, appending) for the variable.
-    If the linkage is InternalLinkage, WeakAnyLinkage, WeakODRLinkage,
-    LinkOnceAnyLinkage or LinkOnceODRLinkage,&nbsp; then the resultant
-    global variable will have internal linkage.  AppendingLinkage concatenates
-    together all instances (in different translation units) of the variable
-    into a single variable but is only applicable to arrays.  &nbsp;See
-    the <a href="LangRef.html#modulestructure">LLVM Language Reference</a> for
-    further details on linkage types. Optionally an initializer, a name, and the
-    module to put the variable into may be specified for the global variable as
-    well.</p></li>
-
-  <li><tt>bool isConstant() const</tt>
-
-    <p>Returns true if this is a global variable that is known not to
-    be modified at runtime.</p></li>
-
-  <li><tt>bool hasInitializer()</tt>
-
-    <p>Returns true if this <tt>GlobalVariable</tt> has an intializer.</p></li>
-
-  <li><tt><a href="#Constant">Constant</a> *getInitializer()</tt>
-
-    <p>Returns the initial value for a <tt>GlobalVariable</tt>.  It is not legal
-    to call this method if there is no initializer.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="BasicBlock">The <tt>BasicBlock</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1BasicBlock.html">BasicBlock
-Class</a><br>
-Superclass: <a href="#Value"><tt>Value</tt></a></p>
-
-<p>This class represents a single entry single exit section of the code,
-commonly known as a basic block by the compiler community.  The
-<tt>BasicBlock</tt> class maintains a list of <a
-href="#Instruction"><tt>Instruction</tt></a>s, which form the body of the block.
-Matching the language definition, the last element of this list of instructions
-is always a terminator instruction (a subclass of the <a
-href="#TerminatorInst"><tt>TerminatorInst</tt></a> class).</p>
-
-<p>In addition to tracking the list of instructions that make up the block, the
-<tt>BasicBlock</tt> class also keeps track of the <a
-href="#Function"><tt>Function</tt></a> that it is embedded into.</p>
-
-<p>Note that <tt>BasicBlock</tt>s themselves are <a
-href="#Value"><tt>Value</tt></a>s, because they are referenced by instructions
-like branches and can go in the switch tables. <tt>BasicBlock</tt>s have type
-<tt>label</tt>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_BasicBlock">
-    Important Public Members of the <tt>BasicBlock</tt> class
-  </a>
-</h4>
-
-<div>
-<ul>
-
-<li><tt>BasicBlock(const std::string &amp;Name = "", </tt><tt><a
- href="#Function">Function</a> *Parent = 0)</tt>
-
-<p>The <tt>BasicBlock</tt> constructor is used to create new basic blocks for
-insertion into a function.  The constructor optionally takes a name for the new
-block, and a <a href="#Function"><tt>Function</tt></a> to insert it into.  If
-the <tt>Parent</tt> parameter is specified, the new <tt>BasicBlock</tt> is
-automatically inserted at the end of the specified <a
-href="#Function"><tt>Function</tt></a>, if not specified, the BasicBlock must be
-manually inserted into the <a href="#Function"><tt>Function</tt></a>.</p></li>
-
-<li><tt>BasicBlock::iterator</tt> - Typedef for instruction list iterator<br>
-<tt>BasicBlock::const_iterator</tt> - Typedef for const_iterator.<br>
-<tt>begin()</tt>, <tt>end()</tt>, <tt>front()</tt>, <tt>back()</tt>,
-<tt>size()</tt>, <tt>empty()</tt>
-STL-style functions for accessing the instruction list.
-
-<p>These methods and typedefs are forwarding functions that have the same
-semantics as the standard library methods of the same names.  These methods
-expose the underlying instruction list of a basic block in a way that is easy to
-manipulate.  To get the full complement of container operations (including
-operations to update the list), you must use the <tt>getInstList()</tt>
-method.</p></li>
-
-<li><tt>BasicBlock::InstListType &amp;getInstList()</tt>
-
-<p>This method is used to get access to the underlying container that actually
-holds the Instructions.  This method must be used when there isn't a forwarding
-function in the <tt>BasicBlock</tt> class for the operation that you would like
-to perform.  Because there are no forwarding functions for "updating"
-operations, you need to use this if you want to update the contents of a
-<tt>BasicBlock</tt>.</p></li>
-
-<li><tt><a href="#Function">Function</a> *getParent()</tt>
-
-<p> Returns a pointer to <a href="#Function"><tt>Function</tt></a> the block is
-embedded into, or a null pointer if it is homeless.</p></li>
-
-<li><tt><a href="#TerminatorInst">TerminatorInst</a> *getTerminator()</tt>
-
-<p> Returns a pointer to the terminator instruction that appears at the end of
-the <tt>BasicBlock</tt>.  If there is no terminator instruction, or if the last
-instruction in the block is not a terminator, then a null pointer is
-returned.</p></li>
-
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Argument">The <tt>Argument</tt> class</a>
-</h3>
-
-<div>
-
-<p>This subclass of Value defines the interface for incoming formal
-arguments to a function. A Function maintains a list of its formal
-arguments. An argument has a pointer to the parent Function.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01 Strict"></a>
-
-  <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-07 02:56:09 +0200 (Sun, 07 Oct 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
new file mode 100644
index 000000000000..7864165617a0
--- /dev/null
+++ b/docs/ProgrammersManual.rst
@@ -0,0 +1,3204 @@
+========================
+LLVM Programmer's Manual
+========================
+
+.. contents::
+   :local:
+
+.. warning::
+   This is always a work in progress.
+
+.. _introduction:
+
+Introduction
+============
+
+This document is meant to highlight some of the important classes and interfaces
+available in the LLVM source-base.  This manual is not intended to explain what
+LLVM is, how it works, and what LLVM code looks like.  It assumes that you know
+the basics of LLVM and are interested in writing transformations or otherwise
+analyzing or manipulating the code.
+
+This document should get you oriented so that you can find your way in the
+continuously growing source code that makes up the LLVM infrastructure.  Note
+that this manual is not intended to serve as a replacement for reading the
+source code, so if you think there should be a method in one of these classes to
+do something, but it's not listed, check the source.  Links to the `doxygen
+<http://llvm.org/doxygen/>`__ sources are provided to make this as easy as
+possible.
+
+The first section of this document describes general information that is useful
+to know when working in the LLVM infrastructure, and the second describes the
+Core LLVM classes.  In the future this manual will be extended with information
+describing how to use extension libraries, such as dominator information, CFG
+traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen
+<http://llvm.org/doxygen/InstVisitor_8h-source.html>`__) template.
+
+.. _general:
+
+General Information
+===================
+
+This section contains general information that is useful if you are working in
+the LLVM source-base, but that isn't specific to any particular API.
+
+.. _stl:
+
+The C++ Standard Template Library
+---------------------------------
+
+LLVM makes heavy use of the C++ Standard Template Library (STL), perhaps much
+more than you are used to, or have seen before.  Because of this, you might want
+to do a little background reading in the techniques used and capabilities of the
+library.  There are many good pages that discuss the STL, and several books on
+the subject that you can get, so it will not be discussed in this document.
+
+Here are some useful links:
+
+#. `cppreference.com
+   <http://en.cppreference.com/w/>`_ - an excellent
+   reference for the STL and other parts of the standard C++ library.
+
+#. `C++ In a Nutshell <http://www.tempest-sw.com/cpp/>`_ - This is an O'Reilly
+   book in the making.  It has a decent Standard Library Reference that rivals
+   Dinkumware's, and is unfortunately no longer free since the book has been
+   published.
+
+#. `C++ Frequently Asked Questions <http://www.parashift.com/c++-faq-lite/>`_.
+
+#. `SGI's STL Programmer's Guide <http://www.sgi.com/tech/stl/>`_ - Contains a
+   useful `Introduction to the STL
+   <http://www.sgi.com/tech/stl/stl_introduction.html>`_.
+
+#. `Bjarne Stroustrup's C++ Page
+   <http://www.research.att.com/%7Ebs/C++.html>`_.
+
+#. `Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0
+   (even better, get the book)
+   <http://www.mindview.net/Books/TICPP/ThinkingInCPP2e.html>`_.
+
+You are also encouraged to take a look at the :doc:`LLVM Coding Standards
+<CodingStandards>` guide which focuses on how to write maintainable code more
+than where to put your curly braces.
+
+.. _resources:
+
+Other useful references
+-----------------------
+
+#. `Using static and shared libraries across platforms
+   <http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html>`_
+
+.. _apis:
+
+Important and useful LLVM APIs
+==============================
+
+Here we highlight some LLVM APIs that are generally useful and good to know
+about when writing transformations.
+
+.. _isa:
+
+The ``isa<>``, ``cast<>`` and ``dyn_cast<>`` templates
+------------------------------------------------------
+
+The LLVM source-base makes extensive use of a custom form of RTTI.  These
+templates have many similarities to the C++ ``dynamic_cast<>`` operator, but
+they don't have some drawbacks (primarily stemming from the fact that
+``dynamic_cast<>`` only works on classes that have a v-table).  Because they are
+used so often, you must know what they do and how they work.  All of these
+templates are defined in the ``llvm/Support/Casting.h`` (`doxygen
+<http://llvm.org/doxygen/Casting_8h-source.html>`__) file (note that you very
+rarely have to include this file directly).
+
+``isa<>``:
+  The ``isa<>`` operator works exactly like the Java "``instanceof``" operator.
+  It returns true or false depending on whether a reference or pointer points to
+  an instance of the specified class.  This can be very useful for constraint
+  checking of various sorts (example below).
+
+``cast<>``:
+  The ``cast<>`` operator is a "checked cast" operation.  It converts a pointer
+  or reference from a base class to a derived class, causing an assertion
+  failure if it is not really an instance of the right type.  This should be
+  used in cases where you have some information that makes you believe that
+  something is of the right type.  An example of the ``isa<>`` and ``cast<>``
+  template is:
+
+  .. code-block:: c++
+
+    static bool isLoopInvariant(const Value *V, const Loop *L) {
+      if (isa<Constant>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
+        return true;
+
+      // Otherwise, it must be an instruction...
+      return !L->contains(cast<Instruction>(V)->getParent());
+    }
+
+  Note that you should **not** use an ``isa<>`` test followed by a ``cast<>``,
+  for that use the ``dyn_cast<>`` operator.
+
+``dyn_cast<>``:
+  The ``dyn_cast<>`` operator is a "checking cast" operation.  It checks to see
+  if the operand is of the specified type, and if so, returns a pointer to it
+  (this operator does not work with references).  If the operand is not of the
+  correct type, a null pointer is returned.  Thus, this works very much like
+  the ``dynamic_cast<>`` operator in C++, and should be used in the same
+  circumstances.  Typically, the ``dyn_cast<>`` operator is used in an ``if``
+  statement or some other flow control statement like this:
+
+  .. code-block:: c++
+
+    if (AllocationInst *AI = dyn_cast<AllocationInst>(Val)) {
+      // ...
+    }
+
+  This form of the ``if`` statement effectively combines together a call to
+  ``isa<>`` and a call to ``cast<>`` into one statement, which is very
+  convenient.
+
+  Note that the ``dyn_cast<>`` operator, like C++'s ``dynamic_cast<>`` or Java's
+  ``instanceof`` operator, can be abused.  In particular, you should not use big
+  chained ``if/then/else`` blocks to check for lots of different variants of
+  classes.  If you find yourself wanting to do this, it is much cleaner and more
+  efficient to use the ``InstVisitor`` class to dispatch over the instruction
+  type directly.
+
+``cast_or_null<>``:
+  The ``cast_or_null<>`` operator works just like the ``cast<>`` operator,
+  except that it allows for a null pointer as an argument (which it then
+  propagates).  This can sometimes be useful, allowing you to combine several
+  null checks into one.
+
+``dyn_cast_or_null<>``:
+  The ``dyn_cast_or_null<>`` operator works just like the ``dyn_cast<>``
+  operator, except that it allows for a null pointer as an argument (which it
+  then propagates).  This can sometimes be useful, allowing you to combine
+  several null checks into one.
+
+These five templates can be used with any classes, whether they have a v-table
+or not.  If you want to add support for these templates, see the document
+:doc:`How to set up LLVM-style RTTI for your class hierarchy
+<HowToSetUpLLVMStyleRTTI>`
+
+.. _string_apis:
+
+Passing strings (the ``StringRef`` and ``Twine`` classes)
+---------------------------------------------------------
+
+Although LLVM generally does not do much string manipulation, we do have several
+important APIs which take strings.  Two important examples are the Value class
+-- which has names for instructions, functions, etc. -- and the ``StringMap``
+class which is used extensively in LLVM and Clang.
+
+These are generic classes, and they need to be able to accept strings which may
+have embedded null characters.  Therefore, they cannot simply take a ``const
+char *``, and taking a ``const std::string&`` requires clients to perform a heap
+allocation which is usually unnecessary.  Instead, many LLVM APIs use a
+``StringRef`` or a ``const Twine&`` for passing strings efficiently.
+
+.. _StringRef:
+
+The ``StringRef`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``StringRef`` data type represents a reference to a constant string (a
+character array and a length) and supports the common operations available on
+``std::string``, but does not require heap allocation.
+
+It can be implicitly constructed using a C style null-terminated string, an
+``std::string``, or explicitly with a character pointer and length.  For
+example, the ``StringRef`` find function is declared as:
+
+.. code-block:: c++
+
+  iterator find(StringRef Key);
+
+and clients can call it using any one of:
+
+.. code-block:: c++
+
+  Map.find("foo");                 // Lookup "foo"
+  Map.find(std::string("bar"));    // Lookup "bar"
+  Map.find(StringRef("\0baz", 4)); // Lookup "\0baz"
+
+Similarly, APIs which need to return a string may return a ``StringRef``
+instance, which can be used directly or converted to an ``std::string`` using
+the ``str`` member function.  See ``llvm/ADT/StringRef.h`` (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1StringRef_8h-source.html>`__) for more
+information.
+
+You should rarely use the ``StringRef`` class directly, because it contains
+pointers to external memory it is not generally safe to store an instance of the
+class (unless you know that the external storage will not be freed).
+``StringRef`` is small and pervasive enough in LLVM that it should always be
+passed by value.
+
+The ``Twine`` class
+^^^^^^^^^^^^^^^^^^^
+
+The ``Twine`` (`doxygen <http://llvm.org/doxygen/classllvm_1_1Twine.html>`__)
+class is an efficient way for APIs to accept concatenated strings.  For example,
+a common LLVM paradigm is to name one instruction based on the name of another
+instruction with a suffix, for example:
+
+.. code-block:: c++
+
+    New = CmpInst::Create(..., SO->getName() + ".cmp");
+
+The ``Twine`` class is effectively a lightweight `rope
+<http://en.wikipedia.org/wiki/Rope_(computer_science)>`_ which points to
+temporary (stack allocated) objects.  Twines can be implicitly constructed as
+the result of the plus operator applied to strings (i.e., a C strings, an
+``std::string``, or a ``StringRef``).  The twine delays the actual concatenation
+of strings until it is actually required, at which point it can be efficiently
+rendered directly into a character array.  This avoids unnecessary heap
+allocation involved in constructing the temporary results of string
+concatenation.  See ``llvm/ADT/Twine.h`` (`doxygen
+<http://llvm.org/doxygen/Twine_8h_source.html>`__) and :ref:`here <dss_twine>`
+for more information.
+
+As with a ``StringRef``, ``Twine`` objects point to external memory and should
+almost never be stored or mentioned directly.  They are intended solely for use
+when defining a function which should be able to efficiently accept concatenated
+strings.
+
+.. _DEBUG:
+
+The ``DEBUG()`` macro and ``-debug`` option
+-------------------------------------------
+
+Often when working on your pass you will put a bunch of debugging printouts and
+other code into your pass.  After you get it working, you want to remove it, but
+you may need it again in the future (to work out new bugs that you run across).
+
+Naturally, because of this, you don't want to delete the debug printouts, but
+you don't want them to always be noisy.  A standard compromise is to comment
+them out, allowing you to enable them if you need them in the future.
+
+The ``llvm/Support/Debug.h`` (`doxygen
+<http://llvm.org/doxygen/Debug_8h-source.html>`__) file provides a macro named
+``DEBUG()`` that is a much nicer solution to this problem.  Basically, you can
+put arbitrary code into the argument of the ``DEBUG`` macro, and it is only
+executed if '``opt``' (or any other tool) is run with the '``-debug``' command
+line argument:
+
+.. code-block:: c++
+
+  DEBUG(errs() << "I am here!\n");
+
+Then you can run your pass like this:
+
+.. code-block:: none
+
+  $ opt < a.bc > /dev/null -mypass
+  <no output>
+  $ opt < a.bc > /dev/null -mypass -debug
+  I am here!
+
+Using the ``DEBUG()`` macro instead of a home-brewed solution allows you to not
+have to create "yet another" command line option for the debug output for your
+pass.  Note that ``DEBUG()`` macros are disabled for optimized builds, so they
+do not cause a performance impact at all (for the same reason, they should also
+not contain side-effects!).
+
+One additional nice thing about the ``DEBUG()`` macro is that you can enable or
+disable it directly in gdb.  Just use "``set DebugFlag=0``" or "``set
+DebugFlag=1``" from the gdb if the program is running.  If the program hasn't
+been started yet, you can always just run it with ``-debug``.
+
+.. _DEBUG_TYPE:
+
+Fine grained debug info with ``DEBUG_TYPE`` and the ``-debug-only`` option
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes you may find yourself in a situation where enabling ``-debug`` just
+turns on **too much** information (such as when working on the code generator).
+If you want to enable debug information with more fine-grained control, you
+define the ``DEBUG_TYPE`` macro and the ``-debug`` only option as follows:
+
+.. code-block:: c++
+
+  #undef  DEBUG_TYPE
+  DEBUG(errs() << "No debug type\n");
+  #define DEBUG_TYPE "foo"
+  DEBUG(errs() << "'foo' debug type\n");
+  #undef  DEBUG_TYPE
+  #define DEBUG_TYPE "bar"
+  DEBUG(errs() << "'bar' debug type\n"));
+  #undef  DEBUG_TYPE
+  #define DEBUG_TYPE ""
+  DEBUG(errs() << "No debug type (2)\n");
+
+Then you can run your pass like this:
+
+.. code-block:: none
+
+  $ opt < a.bc > /dev/null -mypass
+  <no output>
+  $ opt < a.bc > /dev/null -mypass -debug
+  No debug type
+  'foo' debug type
+  'bar' debug type
+  No debug type (2)
+  $ opt < a.bc > /dev/null -mypass -debug-only=foo
+  'foo' debug type
+  $ opt < a.bc > /dev/null -mypass -debug-only=bar
+  'bar' debug type
+
+Of course, in practice, you should only set ``DEBUG_TYPE`` at the top of a file,
+to specify the debug type for the entire module (if you do this before you
+``#include "llvm/Support/Debug.h"``, you don't have to insert the ugly
+``#undef``'s).  Also, you should use names more meaningful than "foo" and "bar",
+because there is no system in place to ensure that names do not conflict.  If
+two different modules use the same string, they will all be turned on when the
+name is specified.  This allows, for example, all debug information for
+instruction scheduling to be enabled with ``-debug-type=InstrSched``, even if
+the source lives in multiple files.
+
+The ``DEBUG_WITH_TYPE`` macro is also available for situations where you would
+like to set ``DEBUG_TYPE``, but only for one specific ``DEBUG`` statement.  It
+takes an additional first parameter, which is the type to use.  For example, the
+preceding example could be written as:
+
+.. code-block:: c++
+
+  DEBUG_WITH_TYPE("", errs() << "No debug type\n");
+  DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n");
+  DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n"));
+  DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n");
+
+.. _Statistic:
+
+The ``Statistic`` class & ``-stats`` option
+-------------------------------------------
+
+The ``llvm/ADT/Statistic.h`` (`doxygen
+<http://llvm.org/doxygen/Statistic_8h-source.html>`__) file provides a class
+named ``Statistic`` that is used as a unified way to keep track of what the LLVM
+compiler is doing and how effective various optimizations are.  It is useful to
+see what optimizations are contributing to making a particular program run
+faster.
+
+Often you may run your pass on some big program, and you're interested to see
+how many times it makes a certain transformation.  Although you can do this with
+hand inspection, or some ad-hoc method, this is a real pain and not very useful
+for big programs.  Using the ``Statistic`` class makes it very easy to keep
+track of this information, and the calculated information is presented in a
+uniform manner with the rest of the passes being executed.
+
+There are many examples of ``Statistic`` uses, but the basics of using it are as
+follows:
+
+#. Define your statistic like this:
+
+  .. code-block:: c++
+
+    #define DEBUG_TYPE "mypassname"   // This goes before any #includes.
+    STATISTIC(NumXForms, "The # of times I did stuff");
+
+  The ``STATISTIC`` macro defines a static variable, whose name is specified by
+  the first argument.  The pass name is taken from the ``DEBUG_TYPE`` macro, and
+  the description is taken from the second argument.  The variable defined
+  ("NumXForms" in this case) acts like an unsigned integer.
+
+#. Whenever you make a transformation, bump the counter:
+
+  .. code-block:: c++
+
+    ++NumXForms;   // I did stuff!
+
+That's all you have to do.  To get '``opt``' to print out the statistics
+gathered, use the '``-stats``' option:
+
+.. code-block:: none
+
+  $ opt -stats -mypassname < program.bc > /dev/null
+  ... statistics output ...
+
+When running ``opt`` on a C file from the SPEC benchmark suite, it gives a
+report that looks like this:
+
+.. code-block:: none
+
+   7646 bitcodewriter   - Number of normal instructions
+    725 bitcodewriter   - Number of oversized instructions
+ 129996 bitcodewriter   - Number of bitcode bytes written
+   2817 raise           - Number of insts DCEd or constprop'd
+   3213 raise           - Number of cast-of-self removed
+   5046 raise           - Number of expression trees converted
+     75 raise           - Number of other getelementptr's formed
+    138 raise           - Number of load/store peepholes
+     42 deadtypeelim    - Number of unused typenames removed from symtab
+    392 funcresolve     - Number of varargs functions resolved
+     27 globaldce       - Number of global variables removed
+      2 adce            - Number of basic blocks removed
+    134 cee             - Number of branches revectored
+     49 cee             - Number of setcc instruction eliminated
+    532 gcse            - Number of loads removed
+   2919 gcse            - Number of instructions removed
+     86 indvars         - Number of canonical indvars added
+     87 indvars         - Number of aux indvars removed
+     25 instcombine     - Number of dead inst eliminate
+    434 instcombine     - Number of insts combined
+    248 licm            - Number of load insts hoisted
+   1298 licm            - Number of insts hoisted to a loop pre-header
+      3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
+     75 mem2reg         - Number of alloca's promoted
+   1444 cfgsimplify     - Number of blocks simplified
+
+Obviously, with so many optimizations, having a unified framework for this stuff
+is very nice.  Making your pass fit well into the framework makes it more
+maintainable and useful.
+
+.. _ViewGraph:
+
+Viewing graphs while debugging code
+-----------------------------------
+
+Several of the important data structures in LLVM are graphs: for example CFGs
+made out of LLVM :ref:`BasicBlocks <BasicBlock>`, CFGs made out of LLVM
+:ref:`MachineBasicBlocks <MachineBasicBlock>`, and :ref:`Instruction Selection
+DAGs <SelectionDAG>`.  In many cases, while debugging various parts of the
+compiler, it is nice to instantly visualize these graphs.
+
+LLVM provides several callbacks that are available in a debug build to do
+exactly that.  If you call the ``Function::viewCFG()`` method, for example, the
+current LLVM tool will pop up a window containing the CFG for the function where
+each basic block is a node in the graph, and each node contains the instructions
+in the block.  Similarly, there also exists ``Function::viewCFGOnly()`` (does
+not include the instructions), the ``MachineFunction::viewCFG()`` and
+``MachineFunction::viewCFGOnly()``, and the ``SelectionDAG::viewGraph()``
+methods.  Within GDB, for example, you can usually use something like ``call
+DAG.viewGraph()`` to pop up a window.  Alternatively, you can sprinkle calls to
+these functions in your code in places you want to debug.
+
+Getting this to work requires a small amount of configuration.  On Unix systems
+with X11, install the `graphviz <http://www.graphviz.org>`_ toolkit, and make
+sure 'dot' and 'gv' are in your path.  If you are running on Mac OS/X, download
+and install the Mac OS/X `Graphviz program
+<http://www.pixelglow.com/graphviz/>`_ and add
+``/Applications/Graphviz.app/Contents/MacOS/`` (or wherever you install it) to
+your path.  Once in your system and path are set up, rerun the LLVM configure
+script and rebuild LLVM to enable this functionality.
+
+``SelectionDAG`` has been extended to make it easier to locate *interesting*
+nodes in large complex graphs.  From gdb, if you ``call DAG.setGraphColor(node,
+"color")``, then the next ``call DAG.viewGraph()`` would highlight the node in
+the specified color (choices of colors can be found at `colors
+<http://www.graphviz.org/doc/info/colors.html>`_.) More complex node attributes
+can be provided with ``call DAG.setGraphAttrs(node, "attributes")`` (choices can
+be found at `Graph attributes <http://www.graphviz.org/doc/info/attrs.html>`_.)
+If you want to restart and clear all the current graph attributes, then you can
+``call DAG.clearGraphAttrs()``.
+
+Note that graph visualization features are compiled out of Release builds to
+reduce file size.  This means that you need a Debug+Asserts or Release+Asserts
+build to use these features.
+
+.. _datastructure:
+
+Picking the Right Data Structure for a Task
+===========================================
+
+LLVM has a plethora of data structures in the ``llvm/ADT/`` directory, and we
+commonly use STL data structures.  This section describes the trade-offs you
+should consider when you pick one.
+
+The first step is a choose your own adventure: do you want a sequential
+container, a set-like container, or a map-like container?  The most important
+thing when choosing a container is the algorithmic properties of how you plan to
+access the container.  Based on that, you should use:
+
+
+* a :ref:`map-like <ds_map>` container if you need efficient look-up of a
+  value based on another value.  Map-like containers also support efficient
+  queries for containment (whether a key is in the map).  Map-like containers
+  generally do not support efficient reverse mapping (values to keys).  If you
+  need that, use two maps.  Some map-like containers also support efficient
+  iteration through the keys in sorted order.  Map-like containers are the most
+  expensive sort, only use them if you need one of these capabilities.
+
+* a :ref:`set-like <ds_set>` container if you need to put a bunch of stuff into
+  a container that automatically eliminates duplicates.  Some set-like
+  containers support efficient iteration through the elements in sorted order.
+  Set-like containers are more expensive than sequential containers.
+
+* a :ref:`sequential <ds_sequential>` container provides the most efficient way
+  to add elements and keeps track of the order they are added to the collection.
+  They permit duplicates and support efficient iteration, but do not support
+  efficient look-up based on a key.
+
+* a :ref:`string <ds_string>` container is a specialized sequential container or
+  reference structure that is used for character or byte arrays.
+
+* a :ref:`bit <ds_bit>` container provides an efficient way to store and
+  perform set operations on sets of numeric id's, while automatically
+  eliminating duplicates.  Bit containers require a maximum of 1 bit for each
+  identifier you want to store.
+
+Once the proper category of container is determined, you can fine tune the
+memory use, constant factors, and cache behaviors of access by intelligently
+picking a member of the category.  Note that constant factors and cache behavior
+can be a big deal.  If you have a vector that usually only contains a few
+elements (but could contain many), for example, it's much better to use
+:ref:`SmallVector <dss_smallvector>` than :ref:`vector <dss_vector>`.  Doing so
+avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding
+the elements to the container.
+
+.. _ds_sequential:
+
+Sequential Containers (std::vector, std::list, etc)
+---------------------------------------------------
+
+There are a variety of sequential containers available for you, based on your
+needs.  Pick the first in this section that will do what you want.
+
+.. _dss_arrayref:
+
+llvm/ADT/ArrayRef.h
+^^^^^^^^^^^^^^^^^^^
+
+The ``llvm::ArrayRef`` class is the preferred class to use in an interface that
+accepts a sequential list of elements in memory and just reads from them.  By
+taking an ``ArrayRef``, the API can be passed a fixed size array, an
+``std::vector``, an ``llvm::SmallVector`` and anything else that is contiguous
+in memory.
+
+.. _dss_fixedarrays:
+
+Fixed Size Arrays
+^^^^^^^^^^^^^^^^^
+
+Fixed size arrays are very simple and very fast.  They are good if you know
+exactly how many elements you have, or you have a (low) upper bound on how many
+you have.
+
+.. _dss_heaparrays:
+
+Heap Allocated Arrays
+^^^^^^^^^^^^^^^^^^^^^
+
+Heap allocated arrays (``new[]`` + ``delete[]``) are also simple.  They are good
+if the number of elements is variable, if you know how many elements you will
+need before the array is allocated, and if the array is usually large (if not,
+consider a :ref:`SmallVector <dss_smallvector>`).  The cost of a heap allocated
+array is the cost of the new/delete (aka malloc/free).  Also note that if you
+are allocating an array of a type with a constructor, the constructor and
+destructors will be run for every element in the array (re-sizable vectors only
+construct those elements actually used).
+
+.. _dss_tinyptrvector:
+
+llvm/ADT/TinyPtrVector.h
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+``TinyPtrVector<Type>`` is a highly specialized collection class that is
+optimized to avoid allocation in the case when a vector has zero or one
+elements.  It has two major restrictions: 1) it can only hold values of pointer
+type, and 2) it cannot hold a null pointer.
+
+Since this container is highly specialized, it is rarely used.
+
+.. _dss_smallvector:
+
+llvm/ADT/SmallVector.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+``SmallVector<Type, N>`` is a simple class that looks and smells just like
+``vector<Type>``: it supports efficient iteration, lays out elements in memory
+order (so you can do pointer arithmetic between elements), supports efficient
+push_back/pop_back operations, supports efficient random access to its elements,
+etc.
+
+The advantage of SmallVector is that it allocates space for some number of
+elements (N) **in the object itself**.  Because of this, if the SmallVector is
+dynamically smaller than N, no malloc is performed.  This can be a big win in
+cases where the malloc/free call is far more expensive than the code that
+fiddles around with the elements.
+
+This is good for vectors that are "usually small" (e.g. the number of
+predecessors/successors of a block is usually less than 8).  On the other hand,
+this makes the size of the SmallVector itself large, so you don't want to
+allocate lots of them (doing so will waste a lot of space).  As such,
+SmallVectors are most useful when on the stack.
+
+SmallVector also provides a nice portable and efficient replacement for
+``alloca``.
+
+.. note::
+
+   Prefer to use ``SmallVectorImpl<T>`` as a parameter type.
+
+   In APIs that don't care about the "small size" (most?), prefer to use
+   the ``SmallVectorImpl<T>`` class, which is basically just the "vector
+   header" (and methods) without the elements allocated after it. Note that
+   ``SmallVector<T, N>`` inherits from ``SmallVectorImpl<T>`` so the
+   conversion is implicit and costs nothing. E.g.
+
+   .. code-block:: c++
+
+      // BAD: Clients cannot pass e.g. SmallVector<Foo, 4>.
+      hardcodedSmallSize(SmallVector<Foo, 2> &Out);
+      // GOOD: Clients can pass any SmallVector<Foo, N>.
+      allowsAnySmallSize(SmallVectorImpl<Foo> &Out);
+
+      void someFunc() {
+        SmallVector<Foo, 8> Vec;
+        hardcodedSmallSize(Vec); // Error.
+        allowsAnySmallSize(Vec); // Works.
+      }
+
+   Even though it has "``Impl``" in the name, this is so widely used that
+   it really isn't "private to the implementation" anymore. A name like
+   ``SmallVectorHeader`` would be more appropriate.
+
+.. _dss_vector:
+
+<vector>
+^^^^^^^^
+
+``std::vector`` is well loved and respected.  It is useful when SmallVector
+isn't: when the size of the vector is often large (thus the small optimization
+will rarely be a benefit) or if you will be allocating many instances of the
+vector itself (which would waste space for elements that aren't in the
+container).  vector is also useful when interfacing with code that expects
+vectors :).
+
+One worthwhile note about std::vector: avoid code like this:
+
+.. code-block:: c++
+
+  for ( ... ) {
+     std::vector<foo> V;
+     // make use of V.
+  }
+
+Instead, write this as:
+
+.. code-block:: c++
+
+  std::vector<foo> V;
+  for ( ... ) {
+     // make use of V.
+     V.clear();
+  }
+
+Doing so will save (at least) one heap allocation and free per iteration of the
+loop.
+
+.. _dss_deque:
+
+<deque>
+^^^^^^^
+
+``std::deque`` is, in some senses, a generalized version of ``std::vector``.
+Like ``std::vector``, it provides constant time random access and other similar
+properties, but it also provides efficient access to the front of the list.  It
+does not guarantee continuity of elements within memory.
+
+In exchange for this extra flexibility, ``std::deque`` has significantly higher
+constant factor costs than ``std::vector``.  If possible, use ``std::vector`` or
+something cheaper.
+
+.. _dss_list:
+
+<list>
+^^^^^^
+
+``std::list`` is an extremely inefficient class that is rarely useful.  It
+performs a heap allocation for every element inserted into it, thus having an
+extremely high constant factor, particularly for small data types.
+``std::list`` also only supports bidirectional iteration, not random access
+iteration.
+
+In exchange for this high cost, std::list supports efficient access to both ends
+of the list (like ``std::deque``, but unlike ``std::vector`` or
+``SmallVector``).  In addition, the iterator invalidation characteristics of
+std::list are stronger than that of a vector class: inserting or removing an
+element into the list does not invalidate iterator or pointers to other elements
+in the list.
+
+.. _dss_ilist:
+
+llvm/ADT/ilist.h
+^^^^^^^^^^^^^^^^
+
+``ilist<T>`` implements an 'intrusive' doubly-linked list.  It is intrusive,
+because it requires the element to store and provide access to the prev/next
+pointers for the list.
+
+``ilist`` has the same drawbacks as ``std::list``, and additionally requires an
+``ilist_traits`` implementation for the element type, but it provides some novel
+characteristics.  In particular, it can efficiently store polymorphic objects,
+the traits class is informed when an element is inserted or removed from the
+list, and ``ilist``\ s are guaranteed to support a constant-time splice
+operation.
+
+These properties are exactly what we want for things like ``Instruction``\ s and
+basic blocks, which is why these are implemented with ``ilist``\ s.
+
+Related classes of interest are explained in the following subsections:
+
+* :ref:`ilist_traits <dss_ilist_traits>`
+
+* :ref:`iplist <dss_iplist>`
+
+* :ref:`llvm/ADT/ilist_node.h <dss_ilist_node>`
+
+* :ref:`Sentinels <dss_ilist_sentinel>`
+
+.. _dss_packedvector:
+
+llvm/ADT/PackedVector.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Useful for storing a vector of values using only a few number of bits for each
+value.  Apart from the standard operations of a vector-like container, it can
+also perform an 'or' set operation.
+
+For example:
+
+.. code-block:: c++
+
+  enum State {
+      None = 0x0,
+      FirstCondition = 0x1,
+      SecondCondition = 0x2,
+      Both = 0x3
+  };
+
+  State get() {
+      PackedVector<State, 2> Vec1;
+      Vec1.push_back(FirstCondition);
+
+      PackedVector<State, 2> Vec2;
+      Vec2.push_back(SecondCondition);
+
+      Vec1 |= Vec2;
+      return Vec1[0]; // returns 'Both'.
+  }
+
+.. _dss_ilist_traits:
+
+ilist_traits
+^^^^^^^^^^^^
+
+``ilist_traits<T>`` is ``ilist<T>``'s customization mechanism. ``iplist<T>``
+(and consequently ``ilist<T>``) publicly derive from this traits class.
+
+.. _dss_iplist:
+
+iplist
+^^^^^^
+
+``iplist<T>`` is ``ilist<T>``'s base and as such supports a slightly narrower
+interface.  Notably, inserters from ``T&`` are absent.
+
+``ilist_traits<T>`` is a public base of this class and can be used for a wide
+variety of customizations.
+
+.. _dss_ilist_node:
+
+llvm/ADT/ilist_node.h
+^^^^^^^^^^^^^^^^^^^^^
+
+``ilist_node<T>`` implements a the forward and backward links that are expected
+by the ``ilist<T>`` (and analogous containers) in the default manner.
+
+``ilist_node<T>``\ s are meant to be embedded in the node type ``T``, usually
+``T`` publicly derives from ``ilist_node<T>``.
+
+.. _dss_ilist_sentinel:
+
+Sentinels
+^^^^^^^^^
+
+``ilist``\ s have another specialty that must be considered.  To be a good
+citizen in the C++ ecosystem, it needs to support the standard container
+operations, such as ``begin`` and ``end`` iterators, etc.  Also, the
+``operator--`` must work correctly on the ``end`` iterator in the case of
+non-empty ``ilist``\ s.
+
+The only sensible solution to this problem is to allocate a so-called *sentinel*
+along with the intrusive list, which serves as the ``end`` iterator, providing
+the back-link to the last element.  However conforming to the C++ convention it
+is illegal to ``operator++`` beyond the sentinel and it also must not be
+dereferenced.
+
+These constraints allow for some implementation freedom to the ``ilist`` how to
+allocate and store the sentinel.  The corresponding policy is dictated by
+``ilist_traits<T>``.  By default a ``T`` gets heap-allocated whenever the need
+for a sentinel arises.
+
+While the default policy is sufficient in most cases, it may break down when
+``T`` does not provide a default constructor.  Also, in the case of many
+instances of ``ilist``\ s, the memory overhead of the associated sentinels is
+wasted.  To alleviate the situation with numerous and voluminous
+``T``-sentinels, sometimes a trick is employed, leading to *ghostly sentinels*.
+
+Ghostly sentinels are obtained by specially-crafted ``ilist_traits<T>`` which
+superpose the sentinel with the ``ilist`` instance in memory.  Pointer
+arithmetic is used to obtain the sentinel, which is relative to the ``ilist``'s
+``this`` pointer.  The ``ilist`` is augmented by an extra pointer, which serves
+as the back-link of the sentinel.  This is the only field in the ghostly
+sentinel which can be legally accessed.
+
+.. _dss_other:
+
+Other Sequential Container options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Other STL containers are available, such as ``std::string``.
+
+There are also various STL adapter classes such as ``std::queue``,
+``std::priority_queue``, ``std::stack``, etc.  These provide simplified access
+to an underlying container but don't affect the cost of the container itself.
+
+.. _ds_string:
+
+String-like containers
+----------------------
+
+There are a variety of ways to pass around and use strings in C and C++, and
+LLVM adds a few new options to choose from.  Pick the first option on this list
+that will do what you need, they are ordered according to their relative cost.
+
+Note that is is generally preferred to *not* pass strings around as ``const
+char*``'s.  These have a number of problems, including the fact that they
+cannot represent embedded nul ("\0") characters, and do not have a length
+available efficiently.  The general replacement for '``const char*``' is
+StringRef.
+
+For more information on choosing string containers for APIs, please see
+:ref:`Passing Strings <string_apis>`.
+
+.. _dss_stringref:
+
+llvm/ADT/StringRef.h
+^^^^^^^^^^^^^^^^^^^^
+
+The StringRef class is a simple value class that contains a pointer to a
+character and a length, and is quite related to the :ref:`ArrayRef
+<dss_arrayref>` class (but specialized for arrays of characters).  Because
+StringRef carries a length with it, it safely handles strings with embedded nul
+characters in it, getting the length does not require a strlen call, and it even
+has very convenient APIs for slicing and dicing the character range that it
+represents.
+
+StringRef is ideal for passing simple strings around that are known to be live,
+either because they are C string literals, std::string, a C array, or a
+SmallVector.  Each of these cases has an efficient implicit conversion to
+StringRef, which doesn't result in a dynamic strlen being executed.
+
+StringRef has a few major limitations which make more powerful string containers
+useful:
+
+#. You cannot directly convert a StringRef to a 'const char*' because there is
+   no way to add a trailing nul (unlike the .c_str() method on various stronger
+   classes).
+
+#. StringRef doesn't own or keep alive the underlying string bytes.
+   As such it can easily lead to dangling pointers, and is not suitable for
+   embedding in datastructures in most cases (instead, use an std::string or
+   something like that).
+
+#. For the same reason, StringRef cannot be used as the return value of a
+   method if the method "computes" the result string.  Instead, use std::string.
+
+#. StringRef's do not allow you to mutate the pointed-to string bytes and it
+   doesn't allow you to insert or remove bytes from the range.  For editing
+   operations like this, it interoperates with the :ref:`Twine <dss_twine>`
+   class.
+
+Because of its strengths and limitations, it is very common for a function to
+take a StringRef and for a method on an object to return a StringRef that points
+into some string that it owns.
+
+.. _dss_twine:
+
+llvm/ADT/Twine.h
+^^^^^^^^^^^^^^^^
+
+The Twine class is used as an intermediary datatype for APIs that want to take a
+string that can be constructed inline with a series of concatenations.  Twine
+works by forming recursive instances of the Twine datatype (a simple value
+object) on the stack as temporary objects, linking them together into a tree
+which is then linearized when the Twine is consumed.  Twine is only safe to use
+as the argument to a function, and should always be a const reference, e.g.:
+
+.. code-block:: c++
+
+  void foo(const Twine &T);
+  ...
+  StringRef X = ...
+  unsigned i = ...
+  foo(X + "." + Twine(i));
+
+This example forms a string like "blarg.42" by concatenating the values
+together, and does not form intermediate strings containing "blarg" or "blarg.".
+
+Because Twine is constructed with temporary objects on the stack, and because
+these instances are destroyed at the end of the current statement, it is an
+inherently dangerous API.  For example, this simple variant contains undefined
+behavior and will probably crash:
+
+.. code-block:: c++
+
+  void foo(const Twine &T);
+  ...
+  StringRef X = ...
+  unsigned i = ...
+  const Twine &Tmp = X + "." + Twine(i);
+  foo(Tmp);
+
+... because the temporaries are destroyed before the call.  That said, Twine's
+are much more efficient than intermediate std::string temporaries, and they work
+really well with StringRef.  Just be aware of their limitations.
+
+.. _dss_smallstring:
+
+llvm/ADT/SmallString.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+SmallString is a subclass of :ref:`SmallVector <dss_smallvector>` that adds some
+convenience APIs like += that takes StringRef's.  SmallString avoids allocating
+memory in the case when the preallocated space is enough to hold its data, and
+it calls back to general heap allocation when required.  Since it owns its data,
+it is very safe to use and supports full mutation of the string.
+
+Like SmallVector's, the big downside to SmallString is their sizeof.  While they
+are optimized for small strings, they themselves are not particularly small.
+This means that they work great for temporary scratch buffers on the stack, but
+should not generally be put into the heap: it is very rare to see a SmallString
+as the member of a frequently-allocated heap data structure or returned
+by-value.
+
+.. _dss_stdstring:
+
+std::string
+^^^^^^^^^^^
+
+The standard C++ std::string class is a very general class that (like
+SmallString) owns its underlying data.  sizeof(std::string) is very reasonable
+so it can be embedded into heap data structures and returned by-value.  On the
+other hand, std::string is highly inefficient for inline editing (e.g.
+concatenating a bunch of stuff together) and because it is provided by the
+standard library, its performance characteristics depend a lot of the host
+standard library (e.g. libc++ and MSVC provide a highly optimized string class,
+GCC contains a really slow implementation).
+
+The major disadvantage of std::string is that almost every operation that makes
+them larger can allocate memory, which is slow.  As such, it is better to use
+SmallVector or Twine as a scratch buffer, but then use std::string to persist
+the result.
+
+.. _ds_set:
+
+Set-Like Containers (std::set, SmallSet, SetVector, etc)
+--------------------------------------------------------
+
+Set-like containers are useful when you need to canonicalize multiple values
+into a single representation.  There are several different choices for how to do
+this, providing various trade-offs.
+
+.. _dss_sortedvectorset:
+
+A sorted 'vector'
+^^^^^^^^^^^^^^^^^
+
+If you intend to insert a lot of elements, then do a lot of queries, a great
+approach is to use a vector (or other sequential container) with
+std::sort+std::unique to remove duplicates.  This approach works really well if
+your usage pattern has these two distinct phases (insert then query), and can be
+coupled with a good choice of :ref:`sequential container <ds_sequential>`.
+
+This combination provides the several nice properties: the result data is
+contiguous in memory (good for cache locality), has few allocations, is easy to
+address (iterators in the final vector are just indices or pointers), and can be
+efficiently queried with a standard binary search (e.g.
+``std::lower_bound``; if you want the whole range of elements comparing
+equal, use ``std::equal_range``).
+
+.. _dss_smallset:
+
+llvm/ADT/SmallSet.h
+^^^^^^^^^^^^^^^^^^^
+
+If you have a set-like data structure that is usually small and whose elements
+are reasonably small, a ``SmallSet<Type, N>`` is a good choice.  This set has
+space for N elements in place (thus, if the set is dynamically smaller than N,
+no malloc traffic is required) and accesses them with a simple linear search.
+When the set grows beyond 'N' elements, it allocates a more expensive
+representation that guarantees efficient access (for most types, it falls back
+to std::set, but for pointers it uses something far better, :ref:`SmallPtrSet
+<dss_smallptrset>`.
+
+The magic of this class is that it handles small sets extremely efficiently, but
+gracefully handles extremely large sets without loss of efficiency.  The
+drawback is that the interface is quite small: it supports insertion, queries
+and erasing, but does not support iteration.
+
+.. _dss_smallptrset:
+
+llvm/ADT/SmallPtrSet.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+SmallPtrSet has all the advantages of ``SmallSet`` (and a ``SmallSet`` of
+pointers is transparently implemented with a ``SmallPtrSet``), but also supports
+iterators.  If more than 'N' insertions are performed, a single quadratically
+probed hash table is allocated and grows as needed, providing extremely
+efficient access (constant time insertion/deleting/queries with low constant
+factors) and is very stingy with malloc traffic.
+
+Note that, unlike ``std::set``, the iterators of ``SmallPtrSet`` are invalidated
+whenever an insertion occurs.  Also, the values visited by the iterators are not
+visited in sorted order.
+
+.. _dss_denseset:
+
+llvm/ADT/DenseSet.h
+^^^^^^^^^^^^^^^^^^^
+
+DenseSet is a simple quadratically probed hash table.  It excels at supporting
+small values: it uses a single allocation to hold all of the pairs that are
+currently inserted in the set.  DenseSet is a great way to unique small values
+that are not simple pointers (use :ref:`SmallPtrSet <dss_smallptrset>` for
+pointers).  Note that DenseSet has the same requirements for the value type that
+:ref:`DenseMap <dss_densemap>` has.
+
+.. _dss_sparseset:
+
+llvm/ADT/SparseSet.h
+^^^^^^^^^^^^^^^^^^^^
+
+SparseSet holds a small number of objects identified by unsigned keys of
+moderate size.  It uses a lot of memory, but provides operations that are almost
+as fast as a vector.  Typical keys are physical registers, virtual registers, or
+numbered basic blocks.
+
+SparseSet is useful for algorithms that need very fast clear/find/insert/erase
+and fast iteration over small sets.  It is not intended for building composite
+data structures.
+
+.. _dss_sparsemultiset:
+
+llvm/ADT/SparseMultiSet.h
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SparseMultiSet adds multiset behavior to SparseSet, while retaining SparseSet's
+desirable attributes. Like SparseSet, it typically uses a lot of memory, but
+provides operations that are almost as fast as a vector.  Typical keys are
+physical registers, virtual registers, or numbered basic blocks.
+
+SparseMultiSet is useful for algorithms that need very fast
+clear/find/insert/erase of the entire collection, and iteration over sets of
+elements sharing a key. It is often a more efficient choice than using composite
+data structures (e.g. vector-of-vectors, map-of-vectors). It is not intended for
+building composite data structures.
+
+.. _dss_FoldingSet:
+
+llvm/ADT/FoldingSet.h
+^^^^^^^^^^^^^^^^^^^^^
+
+FoldingSet is an aggregate class that is really good at uniquing
+expensive-to-create or polymorphic objects.  It is a combination of a chained
+hash table with intrusive links (uniqued objects are required to inherit from
+FoldingSetNode) that uses :ref:`SmallVector <dss_smallvector>` as part of its ID
+process.
+
+Consider a case where you want to implement a "getOrCreateFoo" method for a
+complex object (for example, a node in the code generator).  The client has a
+description of **what** it wants to generate (it knows the opcode and all the
+operands), but we don't want to 'new' a node, then try inserting it into a set
+only to find out it already exists, at which point we would have to delete it
+and return the node that already exists.
+
+To support this style of client, FoldingSet perform a query with a
+FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
+element that we want to query for.  The query either returns the element
+matching the ID or it returns an opaque ID that indicates where insertion should
+take place.  Construction of the ID usually does not require heap traffic.
+
+Because FoldingSet uses intrusive links, it can support polymorphic objects in
+the set (for example, you can have SDNode instances mixed with LoadSDNodes).
+Because the elements are individually allocated, pointers to the elements are
+stable: inserting or removing elements does not invalidate any pointers to other
+elements.
+
+.. _dss_set:
+
+<set>
+^^^^^
+
+``std::set`` is a reasonable all-around set class, which is decent at many
+things but great at nothing.  std::set allocates memory for each element
+inserted (thus it is very malloc intensive) and typically stores three pointers
+per element in the set (thus adding a large amount of per-element space
+overhead).  It offers guaranteed log(n) performance, which is not particularly
+fast from a complexity standpoint (particularly if the elements of the set are
+expensive to compare, like strings), and has extremely high constant factors for
+lookup, insertion and removal.
+
+The advantages of std::set are that its iterators are stable (deleting or
+inserting an element from the set does not affect iterators or pointers to other
+elements) and that iteration over the set is guaranteed to be in sorted order.
+If the elements in the set are large, then the relative overhead of the pointers
+and malloc traffic is not a big deal, but if the elements of the set are small,
+std::set is almost never a good choice.
+
+.. _dss_setvector:
+
+llvm/ADT/SetVector.h
+^^^^^^^^^^^^^^^^^^^^
+
+LLVM's ``SetVector<Type>`` is an adapter class that combines your choice of a
+set-like container along with a :ref:`Sequential Container <ds_sequential>` The
+important property that this provides is efficient insertion with uniquing
+(duplicate elements are ignored) with iteration support.  It implements this by
+inserting elements into both a set-like container and the sequential container,
+using the set-like container for uniquing and the sequential container for
+iteration.
+
+The difference between SetVector and other sets is that the order of iteration
+is guaranteed to match the order of insertion into the SetVector.  This property
+is really important for things like sets of pointers.  Because pointer values
+are non-deterministic (e.g. vary across runs of the program on different
+machines), iterating over the pointers in the set will not be in a well-defined
+order.
+
+The drawback of SetVector is that it requires twice as much space as a normal
+set and has the sum of constant factors from the set-like container and the
+sequential container that it uses.  Use it **only** if you need to iterate over
+the elements in a deterministic order.  SetVector is also expensive to delete
+elements out of (linear time), unless you use it's "pop_back" method, which is
+faster.
+
+``SetVector`` is an adapter class that defaults to using ``std::vector`` and a
+size 16 ``SmallSet`` for the underlying containers, so it is quite expensive.
+However, ``"llvm/ADT/SetVector.h"`` also provides a ``SmallSetVector`` class,
+which defaults to using a ``SmallVector`` and ``SmallSet`` of a specified size.
+If you use this, and if your sets are dynamically smaller than ``N``, you will
+save a lot of heap traffic.
+
+.. _dss_uniquevector:
+
+llvm/ADT/UniqueVector.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+UniqueVector is similar to :ref:`SetVector <dss_setvector>` but it retains a
+unique ID for each element inserted into the set.  It internally contains a map
+and a vector, and it assigns a unique ID for each value inserted into the set.
+
+UniqueVector is very expensive: its cost is the sum of the cost of maintaining
+both the map and vector, it has high complexity, high constant factors, and
+produces a lot of malloc traffic.  It should be avoided.
+
+.. _dss_immutableset:
+
+llvm/ADT/ImmutableSet.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+ImmutableSet is an immutable (functional) set implementation based on an AVL
+tree.  Adding or removing elements is done through a Factory object and results
+in the creation of a new ImmutableSet object.  If an ImmutableSet already exists
+with the given contents, then the existing one is returned; equality is compared
+with a FoldingSetNodeID.  The time and space complexity of add or remove
+operations is logarithmic in the size of the original set.
+
+There is no method for returning an element of the set, you can only check for
+membership.
+
+.. _dss_otherset:
+
+Other Set-Like Container Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The STL provides several other options, such as std::multiset and the various
+"hash_set" like containers (whether from C++ TR1 or from the SGI library).  We
+never use hash_set and unordered_set because they are generally very expensive
+(each insertion requires a malloc) and very non-portable.
+
+std::multiset is useful if you're not interested in elimination of duplicates,
+but has all the drawbacks of std::set.  A sorted vector (where you don't delete
+duplicate entries) or some other approach is almost always better.
+
+.. _ds_map:
+
+Map-Like Containers (std::map, DenseMap, etc)
+---------------------------------------------
+
+Map-like containers are useful when you want to associate data to a key.  As
+usual, there are a lot of different ways to do this. :)
+
+.. _dss_sortedvectormap:
+
+A sorted 'vector'
+^^^^^^^^^^^^^^^^^
+
+If your usage pattern follows a strict insert-then-query approach, you can
+trivially use the same approach as :ref:`sorted vectors for set-like containers
+<dss_sortedvectorset>`.  The only difference is that your query function (which
+uses std::lower_bound to get efficient log(n) lookup) should only compare the
+key, not both the key and value.  This yields the same advantages as sorted
+vectors for sets.
+
+.. _dss_stringmap:
+
+llvm/ADT/StringMap.h
+^^^^^^^^^^^^^^^^^^^^
+
+Strings are commonly used as keys in maps, and they are difficult to support
+efficiently: they are variable length, inefficient to hash and compare when
+long, expensive to copy, etc.  StringMap is a specialized container designed to
+cope with these issues.  It supports mapping an arbitrary range of bytes to an
+arbitrary other object.
+
+The StringMap implementation uses a quadratically-probed hash table, where the
+buckets store a pointer to the heap allocated entries (and some other stuff).
+The entries in the map must be heap allocated because the strings are variable
+length.  The string data (key) and the element object (value) are stored in the
+same allocation with the string data immediately after the element object.
+This container guarantees the "``(char*)(&Value+1)``" points to the key string
+for a value.
+
+The StringMap is very fast for several reasons: quadratic probing is very cache
+efficient for lookups, the hash value of strings in buckets is not recomputed
+when looking up an element, StringMap rarely has to touch the memory for
+unrelated objects when looking up a value (even when hash collisions happen),
+hash table growth does not recompute the hash values for strings already in the
+table, and each pair in the map is store in a single allocation (the string data
+is stored in the same allocation as the Value of a pair).
+
+StringMap also provides query methods that take byte ranges, so it only ever
+copies a string if a value is inserted into the table.
+
+StringMap iteratation order, however, is not guaranteed to be deterministic, so
+any uses which require that should instead use a std::map.
+
+.. _dss_indexmap:
+
+llvm/ADT/IndexedMap.h
+^^^^^^^^^^^^^^^^^^^^^
+
+IndexedMap is a specialized container for mapping small dense integers (or
+values that can be mapped to small dense integers) to some other type.  It is
+internally implemented as a vector with a mapping function that maps the keys
+to the dense integer range.
+
+This is useful for cases like virtual registers in the LLVM code generator: they
+have a dense mapping that is offset by a compile-time constant (the first
+virtual register ID).
+
+.. _dss_densemap:
+
+llvm/ADT/DenseMap.h
+^^^^^^^^^^^^^^^^^^^
+
+DenseMap is a simple quadratically probed hash table.  It excels at supporting
+small keys and values: it uses a single allocation to hold all of the pairs
+that are currently inserted in the map.  DenseMap is a great way to map
+pointers to pointers, or map other small types to each other.
+
+There are several aspects of DenseMap that you should be aware of, however.
+The iterators in a DenseMap are invalidated whenever an insertion occurs,
+unlike map.  Also, because DenseMap allocates space for a large number of
+key/value pairs (it starts with 64 by default), it will waste a lot of space if
+your keys or values are large.  Finally, you must implement a partial
+specialization of DenseMapInfo for the key that you want, if it isn't already
+supported.  This is required to tell DenseMap about two special marker values
+(which can never be inserted into the map) that it needs internally.
+
+DenseMap's find_as() method supports lookup operations using an alternate key
+type.  This is useful in cases where the normal key type is expensive to
+construct, but cheap to compare against.  The DenseMapInfo is responsible for
+defining the appropriate comparison and hashing methods for each alternate key
+type used.
+
+.. _dss_valuemap:
+
+llvm/ADT/ValueMap.h
+^^^^^^^^^^^^^^^^^^^
+
+ValueMap is a wrapper around a :ref:`DenseMap <dss_densemap>` mapping
+``Value*``\ s (or subclasses) to another type.  When a Value is deleted or
+RAUW'ed, ValueMap will update itself so the new version of the key is mapped to
+the same value, just as if the key were a WeakVH.  You can configure exactly how
+this happens, and what else happens on these two events, by passing a ``Config``
+parameter to the ValueMap template.
+
+.. _dss_intervalmap:
+
+llvm/ADT/IntervalMap.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+IntervalMap is a compact map for small keys and values.  It maps key intervals
+instead of single keys, and it will automatically coalesce adjacent intervals.
+When then map only contains a few intervals, they are stored in the map object
+itself to avoid allocations.
+
+The IntervalMap iterators are quite big, so they should not be passed around as
+STL iterators.  The heavyweight iterators allow a smaller data structure.
+
+.. _dss_map:
+
+<map>
+^^^^^
+
+std::map has similar characteristics to :ref:`std::set <dss_set>`: it uses a
+single allocation per pair inserted into the map, it offers log(n) lookup with
+an extremely large constant factor, imposes a space penalty of 3 pointers per
+pair in the map, etc.
+
+std::map is most useful when your keys or values are very large, if you need to
+iterate over the collection in sorted order, or if you need stable iterators
+into the map (i.e. they don't get invalidated if an insertion or deletion of
+another element takes place).
+
+.. _dss_mapvector:
+
+llvm/ADT/MapVector.h
+^^^^^^^^^^^^^^^^^^^^
+
+``MapVector<KeyT,ValueT>`` provides a subset of the DenseMap interface.  The
+main difference is that the iteration order is guaranteed to be the insertion
+order, making it an easy (but somewhat expensive) solution for non-deterministic
+iteration over maps of pointers.
+
+It is implemented by mapping from key to an index in a vector of key,value
+pairs.  This provides fast lookup and iteration, but has two main drawbacks: The
+key is stored twice and it doesn't support removing elements.
+
+.. _dss_inteqclasses:
+
+llvm/ADT/IntEqClasses.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+IntEqClasses provides a compact representation of equivalence classes of small
+integers.  Initially, each integer in the range 0..n-1 has its own equivalence
+class.  Classes can be joined by passing two class representatives to the
+join(a, b) method.  Two integers are in the same class when findLeader() returns
+the same representative.
+
+Once all equivalence classes are formed, the map can be compressed so each
+integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m
+is the total number of equivalence classes.  The map must be uncompressed before
+it can be edited again.
+
+.. _dss_immutablemap:
+
+llvm/ADT/ImmutableMap.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+ImmutableMap is an immutable (functional) map implementation based on an AVL
+tree.  Adding or removing elements is done through a Factory object and results
+in the creation of a new ImmutableMap object.  If an ImmutableMap already exists
+with the given key set, then the existing one is returned; equality is compared
+with a FoldingSetNodeID.  The time and space complexity of add or remove
+operations is logarithmic in the size of the original map.
+
+.. _dss_othermap:
+
+Other Map-Like Container Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The STL provides several other options, such as std::multimap and the various
+"hash_map" like containers (whether from C++ TR1 or from the SGI library).  We
+never use hash_set and unordered_set because they are generally very expensive
+(each insertion requires a malloc) and very non-portable.
+
+std::multimap is useful if you want to map a key to multiple values, but has all
+the drawbacks of std::map.  A sorted vector or some other approach is almost
+always better.
+
+.. _ds_bit:
+
+Bit storage containers (BitVector, SparseBitVector)
+---------------------------------------------------
+
+Unlike the other containers, there are only two bit storage containers, and
+choosing when to use each is relatively straightforward.
+
+One additional option is ``std::vector<bool>``: we discourage its use for two
+reasons 1) the implementation in many common compilers (e.g.  commonly
+available versions of GCC) is extremely inefficient and 2) the C++ standards
+committee is likely to deprecate this container and/or change it significantly
+somehow.  In any case, please don't use it.
+
+.. _dss_bitvector:
+
+BitVector
+^^^^^^^^^
+
+The BitVector container provides a dynamic size set of bits for manipulation.
+It supports individual bit setting/testing, as well as set operations.  The set
+operations take time O(size of bitvector), but operations are performed one word
+at a time, instead of one bit at a time.  This makes the BitVector very fast for
+set operations compared to other containers.  Use the BitVector when you expect
+the number of set bits to be high (i.e. a dense set).
+
+.. _dss_smallbitvector:
+
+SmallBitVector
+^^^^^^^^^^^^^^
+
+The SmallBitVector container provides the same interface as BitVector, but it is
+optimized for the case where only a small number of bits, less than 25 or so,
+are needed.  It also transparently supports larger bit counts, but slightly less
+efficiently than a plain BitVector, so SmallBitVector should only be used when
+larger counts are rare.
+
+At this time, SmallBitVector does not support set operations (and, or, xor), and
+its operator[] does not provide an assignable lvalue.
+
+.. _dss_sparsebitvector:
+
+SparseBitVector
+^^^^^^^^^^^^^^^
+
+The SparseBitVector container is much like BitVector, with one major difference:
+Only the bits that are set, are stored.  This makes the SparseBitVector much
+more space efficient than BitVector when the set is sparse, as well as making
+set operations O(number of set bits) instead of O(size of universe).  The
+downside to the SparseBitVector is that setting and testing of random bits is
+O(N), and on large SparseBitVectors, this can be slower than BitVector.  In our
+implementation, setting or testing bits in sorted order (either forwards or
+reverse) is O(1) worst case.  Testing and setting bits within 128 bits (depends
+on size) of the current bit is also O(1).  As a general statement,
+testing/setting bits in a SparseBitVector is O(distance away from last set bit).
+
+.. _common:
+
+Helpful Hints for Common Operations
+===================================
+
+This section describes how to perform some very simple transformations of LLVM
+code.  This is meant to give examples of common idioms used, showing the
+practical side of LLVM transformations.
+
+Because this is a "how-to" section, you should also read about the main classes
+that you will be working with.  The :ref:`Core LLVM Class Hierarchy Reference
+<coreclasses>` contains details and descriptions of the main classes that you
+should know about.
+
+.. _inspection:
+
+Basic Inspection and Traversal Routines
+---------------------------------------
+
+The LLVM compiler infrastructure have many different data structures that may be
+traversed.  Following the example of the C++ standard template library, the
+techniques used to traverse these various data structures are all basically the
+same.  For a enumerable sequence of values, the ``XXXbegin()`` function (or
+method) returns an iterator to the start of the sequence, the ``XXXend()``
+function returns an iterator pointing to one past the last valid element of the
+sequence, and there is some ``XXXiterator`` data type that is common between the
+two operations.
+
+Because the pattern for iteration is common across many different aspects of the
+program representation, the standard template library algorithms may be used on
+them, and it is easier to remember how to iterate.  First we show a few common
+examples of the data structures that need to be traversed.  Other data
+structures are traversed in very similar ways.
+
+.. _iterate_function:
+
+Iterating over the ``BasicBlock`` in a ``Function``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+It's quite common to have a ``Function`` instance that you'd like to transform
+in some way; in particular, you'd like to manipulate its ``BasicBlock``\ s.  To
+facilitate this, you'll need to iterate over all of the ``BasicBlock``\ s that
+constitute the ``Function``.  The following is an example that prints the name
+of a ``BasicBlock`` and the number of ``Instruction``\ s it contains:
+
+.. code-block:: c++
+
+  // func is a pointer to a Function instance
+  for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i)
+    // Print out the name of the basic block if it has one, and then the
+    // number of instructions that it contains
+    errs() << "Basic block (name=" << i->getName() << ") has "
+               << i->size() << " instructions.\n";
+
+Note that i can be used as if it were a pointer for the purposes of invoking
+member functions of the ``Instruction`` class.  This is because the indirection
+operator is overloaded for the iterator classes.  In the above code, the
+expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like
+you'd expect.
+
+.. _iterate_basicblock:
+
+Iterating over the ``Instruction`` in a ``BasicBlock``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Just like when dealing with ``BasicBlock``\ s in ``Function``\ s, it's easy to
+iterate over the individual instructions that make up ``BasicBlock``\ s.  Here's
+a code snippet that prints out each instruction in a ``BasicBlock``:
+
+.. code-block:: c++
+
+  // blk is a pointer to a BasicBlock instance
+  for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i)
+     // The next statement works since operator<<(ostream&,...)
+     // is overloaded for Instruction&
+     errs() << *i << "\n";
+
+
+However, this isn't really the best way to print out the contents of a
+``BasicBlock``!  Since the ostream operators are overloaded for virtually
+anything you'll care about, you could have just invoked the print routine on the
+basic block itself: ``errs() << *blk << "\n";``.
+
+.. _iterate_insiter:
+
+Iterating over the ``Instruction`` in a ``Function``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you're finding that you commonly iterate over a ``Function``'s
+``BasicBlock``\ s and then that ``BasicBlock``'s ``Instruction``\ s,
+``InstIterator`` should be used instead.  You'll need to include
+``llvm/Support/InstIterator.h`` (`doxygen
+<http://llvm.org/doxygen/InstIterator_8h-source.html>`__) and then instantiate
+``InstIterator``\ s explicitly in your code.  Here's a small example that shows
+how to dump all instructions in a function to the standard error stream:
+
+.. code-block:: c++
+
+  #include "llvm/Support/InstIterator.h"
+
+  // F is a pointer to a Function instance
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    errs() << *I << "\n";
+
+Easy, isn't it?  You can also use ``InstIterator``\ s to fill a work list with
+its initial contents.  For example, if you wanted to initialize a work list to
+contain all instructions in a ``Function`` F, all you would need to do is
+something like:
+
+.. code-block:: c++
+
+  std::set<Instruction*> worklist;
+  // or better yet, SmallPtrSet<Instruction*, 64> worklist;
+
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    worklist.insert(&*I);
+
+The STL set ``worklist`` would now contain all instructions in the ``Function``
+pointed to by F.
+
+.. _iterate_convert:
+
+Turning an iterator into a class pointer (and vice-versa)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes, it'll be useful to grab a reference (or pointer) to a class instance
+when all you've got at hand is an iterator.  Well, extracting a reference or a
+pointer from an iterator is very straight-forward.  Assuming that ``i`` is a
+``BasicBlock::iterator`` and ``j`` is a ``BasicBlock::const_iterator``:
+
+.. code-block:: c++
+
+  Instruction& inst = *i;   // Grab reference to instruction reference
+  Instruction* pinst = &*i; // Grab pointer to instruction reference
+  const Instruction& inst = *j;
+
+However, the iterators you'll be working with in the LLVM framework are special:
+they will automatically convert to a ptr-to-instance type whenever they need to.
+Instead of derferencing the iterator and then taking the address of the result,
+you can simply assign the iterator to the proper pointer type and you get the
+dereference and address-of operation as a result of the assignment (behind the
+scenes, this is a result of overloading casting mechanisms).  Thus the last line
+of the last example,
+
+.. code-block:: c++
+
+  Instruction *pinst = &*i;
+
+is semantically equivalent to
+
+.. code-block:: c++
+
+  Instruction *pinst = i;
+
+It's also possible to turn a class pointer into the corresponding iterator, and
+this is a constant time operation (very efficient).  The following code snippet
+illustrates use of the conversion constructors provided by LLVM iterators.  By
+using these, you can explicitly grab the iterator of something without actually
+obtaining it via iteration over some structure:
+
+.. code-block:: c++
+
+  void printNextInstruction(Instruction* inst) {
+    BasicBlock::iterator it(inst);
+    ++it; // After this line, it refers to the instruction after *inst
+    if (it != inst->getParent()->end()) errs() << *it << "\n";
+  }
+
+Unfortunately, these implicit conversions come at a cost; they prevent these
+iterators from conforming to standard iterator conventions, and thus from being
+usable with standard algorithms and containers.  For example, they prevent the
+following code, where ``B`` is a ``BasicBlock``, from compiling:
+
+.. code-block:: c++
+
+  llvm::SmallVector<llvm::Instruction *, 16>(B->begin(), B->end());
+
+Because of this, these implicit conversions may be removed some day, and
+``operator*`` changed to return a pointer instead of a reference.
+
+.. _iterate_complex:
+
+Finding call sites: a slightly more complex example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Say that you're writing a FunctionPass and would like to count all the locations
+in the entire module (that is, across every ``Function``) where a certain
+function (i.e., some ``Function *``) is already in scope.  As you'll learn
+later, you may want to use an ``InstVisitor`` to accomplish this in a much more
+straight-forward manner, but this example will allow us to explore how you'd do
+it if you didn't have ``InstVisitor`` around.  In pseudo-code, this is what we
+want to do:
+
+.. code-block:: none
+
+  initialize callCounter to zero
+  for each Function f in the Module
+    for each BasicBlock b in f
+      for each Instruction i in b
+        if (i is a CallInst and calls the given function)
+          increment callCounter
+
+And the actual code is (remember, because we're writing a ``FunctionPass``, our
+``FunctionPass``-derived class simply has to override the ``runOnFunction``
+method):
+
+.. code-block:: c++
+
+  Function* targetFunc = ...;
+
+  class OurFunctionPass : public FunctionPass {
+    public:
+      OurFunctionPass(): callCounter(0) { }
+
+      virtual runOnFunction(Function& F) {
+        for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
+          for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) {
+            if (CallInst* callInst = dyn_cast<CallInst>(&*i)) {
+              // We know we've encountered a call instruction, so we
+              // need to determine if it's a call to the
+              // function pointed to by m_func or not.
+              if (callInst->getCalledFunction() == targetFunc)
+                ++callCounter;
+            }
+          }
+        }
+      }
+
+    private:
+      unsigned callCounter;
+  };
+
+.. _calls_and_invokes:
+
+Treating calls and invokes the same way
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You may have noticed that the previous example was a bit oversimplified in that
+it did not deal with call sites generated by 'invoke' instructions.  In this,
+and in other situations, you may find that you want to treat ``CallInst``\ s and
+``InvokeInst``\ s the same way, even though their most-specific common base
+class is ``Instruction``, which includes lots of less closely-related things.
+For these cases, LLVM provides a handy wrapper class called ``CallSite``
+(`doxygen <http://llvm.org/doxygen/classllvm_1_1CallSite.html>`__) It is
+essentially a wrapper around an ``Instruction`` pointer, with some methods that
+provide functionality common to ``CallInst``\ s and ``InvokeInst``\ s.
+
+This class has "value semantics": it should be passed by value, not by reference
+and it should not be dynamically allocated or deallocated using ``operator new``
+or ``operator delete``.  It is efficiently copyable, assignable and
+constructable, with costs equivalents to that of a bare pointer.  If you look at
+its definition, it has only a single pointer member.
+
+.. _iterate_chains:
+
+Iterating over def-use & use-def chains
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Frequently, we might have an instance of the ``Value`` class (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1Value.html>`__) and we want to determine
+which ``User`` s use the ``Value``.  The list of all ``User``\ s of a particular
+``Value`` is called a *def-use* chain.  For example, let's say we have a
+``Function*`` named ``F`` to a particular function ``foo``.  Finding all of the
+instructions that *use* ``foo`` is as simple as iterating over the *def-use*
+chain of ``F``:
+
+.. code-block:: c++
+
+  Function *F = ...;
+
+  for (Value::use_iterator i = F->use_begin(), e = F->use_end(); i != e; ++i)
+    if (Instruction *Inst = dyn_cast<Instruction>(*i)) {
+      errs() << "F is used in instruction:\n";
+      errs() << *Inst << "\n";
+    }
+
+Note that dereferencing a ``Value::use_iterator`` is not a very cheap operation.
+Instead of performing ``*i`` above several times, consider doing it only once in
+the loop body and reusing its result.
+
+Alternatively, it's common to have an instance of the ``User`` Class (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1User.html>`__) and need to know what
+``Value``\ s are used by it.  The list of all ``Value``\ s used by a ``User`` is
+known as a *use-def* chain.  Instances of class ``Instruction`` are common
+``User`` s, so we might want to iterate over all of the values that a particular
+instruction uses (that is, the operands of the particular ``Instruction``):
+
+.. code-block:: c++
+
+  Instruction *pi = ...;
+
+  for (User::op_iterator i = pi->op_begin(), e = pi->op_end(); i != e; ++i) {
+    Value *v = *i;
+    // ...
+  }
+
+Declaring objects as ``const`` is an important tool of enforcing mutation free
+algorithms (such as analyses, etc.).  For this purpose above iterators come in
+constant flavors as ``Value::const_use_iterator`` and
+``Value::const_op_iterator``.  They automatically arise when calling
+``use/op_begin()`` on ``const Value*``\ s or ``const User*``\ s respectively.
+Upon dereferencing, they return ``const Use*``\ s.  Otherwise the above patterns
+remain unchanged.
+
+.. _iterate_preds:
+
+Iterating over predecessors & successors of blocks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Iterating over the predecessors and successors of a block is quite easy with the
+routines defined in ``"llvm/Support/CFG.h"``.  Just use code like this to
+iterate over all predecessors of BB:
+
+.. code-block:: c++
+
+  #include "llvm/Support/CFG.h"
+  BasicBlock *BB = ...;
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    // ...
+  }
+
+Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``.
+
+.. _simplechanges:
+
+Making simple changes
+---------------------
+
+There are some primitive transformation operations present in the LLVM
+infrastructure that are worth knowing about.  When performing transformations,
+it's fairly common to manipulate the contents of basic blocks.  This section
+describes some of the common methods for doing so and gives example code.
+
+.. _schanges_creating:
+
+Creating and inserting new ``Instruction``\ s
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+*Instantiating Instructions*
+
+Creation of ``Instruction``\ s is straight-forward: simply call the constructor
+for the kind of instruction to instantiate and provide the necessary parameters.
+For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``.  Thus:
+
+.. code-block:: c++
+
+  AllocaInst* ai = new AllocaInst(Type::Int32Ty);
+
+will create an ``AllocaInst`` instance that represents the allocation of one
+integer in the current stack frame, at run time.  Each ``Instruction`` subclass
+is likely to have varying default parameters which change the semantics of the
+instruction, so refer to the `doxygen documentation for the subclass of
+Instruction <http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_ that
+you're interested in instantiating.
+
+*Naming values*
+
+It is very useful to name the values of instructions when you're able to, as
+this facilitates the debugging of your transformations.  If you end up looking
+at generated LLVM machine code, you definitely want to have logical names
+associated with the results of instructions!  By supplying a value for the
+``Name`` (default) parameter of the ``Instruction`` constructor, you associate a
+logical name with the result of the instruction's execution at run time.  For
+example, say that I'm writing a transformation that dynamically allocates space
+for an integer on the stack, and that integer is going to be used as some kind
+of index by some other code.  To accomplish this, I place an ``AllocaInst`` at
+the first point in the first ``BasicBlock`` of some ``Function``, and I'm
+intending to use it within the same ``Function``.  I might do:
+
+.. code-block:: c++
+
+  AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
+
+where ``indexLoc`` is now the logical name of the instruction's execution value,
+which is a pointer to an integer on the run time stack.
+
+*Inserting instructions*
+
+There are essentially two ways to insert an ``Instruction`` into an existing
+sequence of instructions that form a ``BasicBlock``:
+
+* Insertion into an explicit instruction list
+
+  Given a ``BasicBlock* pb``, an ``Instruction* pi`` within that ``BasicBlock``,
+  and a newly-created instruction we wish to insert before ``*pi``, we do the
+  following:
+
+  .. code-block:: c++
+
+      BasicBlock *pb = ...;
+      Instruction *pi = ...;
+      Instruction *newInst = new Instruction(...);
+
+      pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
+
+  Appending to the end of a ``BasicBlock`` is so common that the ``Instruction``
+  class and ``Instruction``-derived classes provide constructors which take a
+  pointer to a ``BasicBlock`` to be appended to.  For example code that looked
+  like:
+
+  .. code-block:: c++
+
+    BasicBlock *pb = ...;
+    Instruction *newInst = new Instruction(...);
+
+    pb->getInstList().push_back(newInst); // Appends newInst to pb
+
+  becomes:
+
+  .. code-block:: c++
+
+    BasicBlock *pb = ...;
+    Instruction *newInst = new Instruction(..., pb);
+
+  which is much cleaner, especially if you are creating long instruction
+  streams.
+
+* Insertion into an implicit instruction list
+
+  ``Instruction`` instances that are already in ``BasicBlock``\ s are implicitly
+  associated with an existing instruction list: the instruction list of the
+  enclosing basic block.  Thus, we could have accomplished the same thing as the
+  above code without being given a ``BasicBlock`` by doing:
+
+  .. code-block:: c++
+
+    Instruction *pi = ...;
+    Instruction *newInst = new Instruction(...);
+
+    pi->getParent()->getInstList().insert(pi, newInst);
+
+  In fact, this sequence of steps occurs so frequently that the ``Instruction``
+  class and ``Instruction``-derived classes provide constructors which take (as
+  a default parameter) a pointer to an ``Instruction`` which the newly-created
+  ``Instruction`` should precede.  That is, ``Instruction`` constructors are
+  capable of inserting the newly-created instance into the ``BasicBlock`` of a
+  provided instruction, immediately before that instruction.  Using an
+  ``Instruction`` constructor with a ``insertBefore`` (default) parameter, the
+  above code becomes:
+
+  .. code-block:: c++
+
+    Instruction* pi = ...;
+    Instruction* newInst = new Instruction(..., pi);
+
+  which is much cleaner, especially if you're creating a lot of instructions and
+  adding them to ``BasicBlock``\ s.
+
+.. _schanges_deleting:
+
+Deleting Instructions
+^^^^^^^^^^^^^^^^^^^^^
+
+Deleting an instruction from an existing sequence of instructions that form a
+BasicBlock_ is very straight-forward: just call the instruction's
+``eraseFromParent()`` method.  For example:
+
+.. code-block:: c++
+
+  Instruction *I = .. ;
+  I->eraseFromParent();
+
+This unlinks the instruction from its containing basic block and deletes it.  If
+you'd just like to unlink the instruction from its containing basic block but
+not delete it, you can use the ``removeFromParent()`` method.
+
+.. _schanges_replacing:
+
+Replacing an Instruction with another Value
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Replacing individual instructions
+"""""""""""""""""""""""""""""""""
+
+Including "`llvm/Transforms/Utils/BasicBlockUtils.h
+<http://llvm.org/doxygen/BasicBlockUtils_8h-source.html>`_" permits use of two
+very useful replace functions: ``ReplaceInstWithValue`` and
+``ReplaceInstWithInst``.
+
+.. _schanges_deleting_sub:
+
+Deleting Instructions
+"""""""""""""""""""""
+
+* ``ReplaceInstWithValue``
+
+  This function replaces all uses of a given instruction with a value, and then
+  removes the original instruction.  The following example illustrates the
+  replacement of the result of a particular ``AllocaInst`` that allocates memory
+  for a single integer with a null pointer to an integer.
+
+  .. code-block:: c++
+
+    AllocaInst* instToReplace = ...;
+    BasicBlock::iterator ii(instToReplace);
+
+    ReplaceInstWithValue(instToReplace->getParent()->getInstList(), ii,
+                         Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
+
+* ``ReplaceInstWithInst``
+
+  This function replaces a particular instruction with another instruction,
+  inserting the new instruction into the basic block at the location where the
+  old instruction was, and replacing any uses of the old instruction with the
+  new instruction.  The following example illustrates the replacement of one
+  ``AllocaInst`` with another.
+
+  .. code-block:: c++
+
+    AllocaInst* instToReplace = ...;
+    BasicBlock::iterator ii(instToReplace);
+
+    ReplaceInstWithInst(instToReplace->getParent()->getInstList(), ii,
+                        new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
+
+
+Replacing multiple uses of Users and Values
+"""""""""""""""""""""""""""""""""""""""""""
+
+You can use ``Value::replaceAllUsesWith`` and ``User::replaceUsesOfWith`` to
+change more than one use at a time.  See the doxygen documentation for the
+`Value Class <http://llvm.org/doxygen/classllvm_1_1Value.html>`_ and `User Class
+<http://llvm.org/doxygen/classllvm_1_1User.html>`_, respectively, for more
+information.
+
+.. _schanges_deletingGV:
+
+Deleting GlobalVariables
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Deleting a global variable from a module is just as easy as deleting an
+Instruction.  First, you must have a pointer to the global variable that you
+wish to delete.  You use this pointer to erase it from its parent, the module.
+For example:
+
+.. code-block:: c++
+
+  GlobalVariable *GV = .. ;
+
+  GV->eraseFromParent();
+
+
+.. _create_types:
+
+How to Create Types
+-------------------
+
+In generating IR, you may need some complex types.  If you know these types
+statically, you can use ``TypeBuilder<...>::get()``, defined in
+``llvm/Support/TypeBuilder.h``, to retrieve them.  ``TypeBuilder`` has two forms
+depending on whether you're building types for cross-compilation or native
+library use.  ``TypeBuilder<T, true>`` requires that ``T`` be independent of the
+host environment, meaning that it's built out of types from the ``llvm::types``
+(`doxygen <http://llvm.org/doxygen/namespacellvm_1_1types.html>`__) namespace
+and pointers, functions, arrays, etc. built of those.  ``TypeBuilder<T, false>``
+additionally allows native C types whose size may depend on the host compiler.
+For example,
+
+.. code-block:: c++
+
+  FunctionType *ft = TypeBuilder<types::i<8>(types::i<32>*), true>::get();
+
+is easier to read and write than the equivalent
+
+.. code-block:: c++
+
+  std::vector<const Type*> params;
+  params.push_back(PointerType::getUnqual(Type::Int32Ty));
+  FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
+
+See the `class comment
+<http://llvm.org/doxygen/TypeBuilder_8h-source.html#l00001>`_ for more details.
+
+.. _threading:
+
+Threads and LLVM
+================
+
+This section describes the interaction of the LLVM APIs with multithreading,
+both on the part of client applications, and in the JIT, in the hosted
+application.
+
+Note that LLVM's support for multithreading is still relatively young.  Up
+through version 2.5, the execution of threaded hosted applications was
+supported, but not threaded client access to the APIs.  While this use case is
+now supported, clients *must* adhere to the guidelines specified below to ensure
+proper operation in multithreaded mode.
+
+Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
+intrinsics in order to support threaded operation.  If you need a
+multhreading-capable LLVM on a platform without a suitably modern system
+compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and
+using the resultant compiler to build a copy of LLVM with multithreading
+support.
+
+.. _startmultithreaded:
+
+Entering and Exiting Multithreaded Mode
+---------------------------------------
+
+In order to properly protect its internal data structures while avoiding
+excessive locking overhead in the single-threaded case, the LLVM must intialize
+certain data structures necessary to provide guards around its internals.  To do
+so, the client program must invoke ``llvm_start_multithreaded()`` before making
+any concurrent LLVM API calls.  To subsequently tear down these structures, use
+the ``llvm_stop_multithreaded()`` call.  You can also use the
+``llvm_is_multithreaded()`` call to check the status of multithreaded mode.
+
+Note that both of these calls must be made *in isolation*.  That is to say that
+no other LLVM API calls may be executing at any time during the execution of
+``llvm_start_multithreaded()`` or ``llvm_stop_multithreaded``.  It's is the
+client's responsibility to enforce this isolation.
+
+The return value of ``llvm_start_multithreaded()`` indicates the success or
+failure of the initialization.  Failure typically indicates that your copy of
+LLVM was built without multithreading support, typically because GCC atomic
+intrinsics were not found in your system compiler.  In this case, the LLVM API
+will not be safe for concurrent calls.  However, it *will* be safe for hosting
+threaded applications in the JIT, though :ref:`care must be taken
+<jitthreading>` to ensure that side exits and the like do not accidentally
+result in concurrent LLVM API calls.
+
+.. _shutdown:
+
+Ending Execution with ``llvm_shutdown()``
+-----------------------------------------
+
+When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to
+deallocate memory used for internal structures.  This will also invoke
+``llvm_stop_multithreaded()`` if LLVM is operating in multithreaded mode.  As
+such, ``llvm_shutdown()`` requires the same isolation guarantees as
+``llvm_stop_multithreaded()``.
+
+Note that, if you use scope-based shutdown, you can use the
+``llvm_shutdown_obj`` class, which calls ``llvm_shutdown()`` in its destructor.
+
+.. _managedstatic:
+
+Lazy Initialization with ``ManagedStatic``
+------------------------------------------
+
+``ManagedStatic`` is a utility class in LLVM used to implement static
+initialization of static resources, such as the global type tables.  Before the
+invocation of ``llvm_shutdown()``, it implements a simple lazy initialization
+scheme.  Once ``llvm_start_multithreaded()`` returns, however, it uses
+double-checked locking to implement thread-safe lazy initialization.
+
+Note that, because no other threads are allowed to issue LLVM API calls before
+``llvm_start_multithreaded()`` returns, it is possible to have
+``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s.
+
+The ``llvm_acquire_global_lock()`` and ``llvm_release_global_lock`` APIs provide
+access to the global lock used to implement the double-checked locking for lazy
+initialization.  These should only be used internally to LLVM, and only if you
+know what you're doing!
+
+.. _llvmcontext:
+
+Achieving Isolation with ``LLVMContext``
+----------------------------------------
+
+``LLVMContext`` is an opaque class in the LLVM API which clients can use to
+operate multiple, isolated instances of LLVM concurrently within the same
+address space.  For instance, in a hypothetical compile-server, the compilation
+of an individual translation unit is conceptually independent from all the
+others, and it would be desirable to be able to compile incoming translation
+units concurrently on independent server threads.  Fortunately, ``LLVMContext``
+exists to enable just this kind of scenario!
+
+Conceptually, ``LLVMContext`` provides isolation.  Every LLVM entity
+(``Module``\ s, ``Value``\ s, ``Type``\ s, ``Constant``\ s, etc.) in LLVM's
+in-memory IR belongs to an ``LLVMContext``.  Entities in different contexts
+*cannot* interact with each other: ``Module``\ s in different contexts cannot be
+linked together, ``Function``\ s cannot be added to ``Module``\ s in different
+contexts, etc.  What this means is that is is safe to compile on multiple
+threads simultaneously, as long as no two threads operate on entities within the
+same context.
+
+In practice, very few places in the API require the explicit specification of a
+``LLVMContext``, other than the ``Type`` creation/lookup APIs.  Because every
+``Type`` carries a reference to its owning context, most other entities can
+determine what context they belong to by looking at their own ``Type``.  If you
+are adding new entities to LLVM IR, please try to maintain this interface
+design.
+
+For clients that do *not* require the benefits of isolation, LLVM provides a
+convenience API ``getGlobalContext()``.  This returns a global, lazily
+initialized ``LLVMContext`` that may be used in situations where isolation is
+not a concern.
+
+.. _jitthreading:
+
+Threads and the JIT
+-------------------
+
+LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
+threads can call ``ExecutionEngine::getPointerToFunction()`` or
+``ExecutionEngine::runFunction()`` concurrently, and multiple threads can run
+code output by the JIT concurrently.  The user must still ensure that only one
+thread accesses IR in a given ``LLVMContext`` while another thread might be
+modifying it.  One way to do that is to always hold the JIT lock while accessing
+IR outside the JIT (the JIT *modifies* the IR by adding ``CallbackVH``\ s).
+Another way is to only call ``getPointerToFunction()`` from the
+``LLVMContext``'s thread.
+
+When the JIT is configured to compile lazily (using
+``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race
+condition <http://llvm.org/bugs/show_bug.cgi?id=5184>`_ in updating call sites
+after a function is lazily-jitted.  It's still possible to use the lazy JIT in a
+threaded program if you ensure that only one thread at a time can call any
+particular lazy stub and that the JIT lock guards any IR access, but we suggest
+using only the eager JIT in threaded programs.
+
+.. _advanced:
+
+Advanced Topics
+===============
+
+This section describes some of the advanced or obscure API's that most clients
+do not need to be aware of.  These API's tend manage the inner workings of the
+LLVM system, and only need to be accessed in unusual circumstances.
+
+.. _SymbolTable:
+
+The ``ValueSymbolTable`` class
+------------------------------
+
+The ``ValueSymbolTable`` (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html>`__) class provides
+a symbol table that the :ref:`Function <c_Function>` and Module_ classes use for
+naming value definitions.  The symbol table can provide a name for any Value_.
+
+Note that the ``SymbolTable`` class should not be directly accessed by most
+clients.  It should only be used when iteration over the symbol table names
+themselves are required, which is very special purpose.  Note that not all LLVM
+Value_\ s have names, and those without names (i.e. they have an empty name) do
+not exist in the symbol table.
+
+Symbol tables support iteration over the values in the symbol table with
+``begin/end/iterator`` and supports querying to see if a specific name is in the
+symbol table (with ``lookup``).  The ``ValueSymbolTable`` class exposes no
+public mutator methods, instead, simply call ``setName`` on a value, which will
+autoinsert it into the appropriate symbol table.
+
+.. _UserLayout:
+
+The ``User`` and owned ``Use`` classes' memory layout
+-----------------------------------------------------
+
+The ``User`` (`doxygen <http://llvm.org/doxygen/classllvm_1_1User.html>`__)
+class provides a basis for expressing the ownership of ``User`` towards other
+`Value instance <http://llvm.org/doxygen/classllvm_1_1Value.html>`_\ s.  The
+``Use`` (`doxygen <http://llvm.org/doxygen/classllvm_1_1Use.html>`__) helper
+class is employed to do the bookkeeping and to facilitate *O(1)* addition and
+removal.
+
+.. _Use2User:
+
+Interaction and relationship between ``User`` and ``Use`` objects
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A subclass of ``User`` can choose between incorporating its ``Use`` objects or
+refer to them out-of-line by means of a pointer.  A mixed variant (some ``Use``
+s inline others hung off) is impractical and breaks the invariant that the
+``Use`` objects belonging to the same ``User`` form a contiguous array.
+
+We have 2 different layouts in the ``User`` (sub)classes:
+
+* Layout a)
+
+  The ``Use`` object(s) are inside (resp. at fixed offset) of the ``User``
+  object and there are a fixed number of them.
+
+* Layout b)
+
+  The ``Use`` object(s) are referenced by a pointer to an array from the
+  ``User`` object and there may be a variable number of them.
+
+As of v2.4 each layout still possesses a direct pointer to the start of the
+array of ``Use``\ s.  Though not mandatory for layout a), we stick to this
+redundancy for the sake of simplicity.  The ``User`` object also stores the
+number of ``Use`` objects it has. (Theoretically this information can also be
+calculated given the scheme presented below.)
+
+Special forms of allocation operators (``operator new``) enforce the following
+memory layouts:
+
+* Layout a) is modelled by prepending the ``User`` object by the ``Use[]``
+  array.
+
+  .. code-block:: none
+
+    ...---.---.---.---.-------...
+      | P | P | P | P | User
+    '''---'---'---'---'-------'''
+
+* Layout b) is modelled by pointing at the ``Use[]`` array.
+
+  .. code-block:: none
+
+    .-------...
+    | User
+    '-------'''
+        |
+        v
+        .---.---.---.---...
+        | P | P | P | P |
+        '---'---'---'---'''
+
+*(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in
+each* ``Use`` *object in the member* ``Use::Prev`` *)*
+
+.. _Waymarking:
+
+The waymarking algorithm
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Since the ``Use`` objects are deprived of the direct (back)pointer to their
+``User`` objects, there must be a fast and exact method to recover it.  This is
+accomplished by the following scheme:
+
+A bit-encoding in the 2 LSBits (least significant bits) of the ``Use::Prev``
+allows to find the start of the ``User`` object:
+
+* ``00`` --- binary digit 0
+
+* ``01`` --- binary digit 1
+
+* ``10`` --- stop and calculate (``s``)
+
+* ``11`` --- full stop (``S``)
+
+Given a ``Use*``, all we have to do is to walk till we get a stop and we either
+have a ``User`` immediately behind or we have to walk to the next stop picking
+up digits and calculating the offset:
+
+.. code-block:: none
+
+  .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
+  | 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
+  '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
+      |+15                |+10            |+6         |+3     |+1
+      |                   |               |           |       | __>
+      |                   |               |           | __________>
+      |                   |               | ______________________>
+      |                   | ______________________________________>
+      | __________________________________________________________>
+
+Only the significant number of bits need to be stored between the stops, so that
+the *worst case is 20 memory accesses* when there are 1000 ``Use`` objects
+associated with a ``User``.
+
+.. _ReferenceImpl:
+
+Reference implementation
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following literate Haskell fragment demonstrates the concept:
+
+.. code-block:: haskell
+
+  > import Test.QuickCheck
+  >
+  > digits :: Int -> [Char] -> [Char]
+  > digits 0 acc = '0' : acc
+  > digits 1 acc = '1' : acc
+  > digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
+  >
+  > dist :: Int -> [Char] -> [Char]
+  > dist 0 [] = ['S']
+  > dist 0 acc = acc
+  > dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
+  > dist n acc = dist (n - 1) $ dist 1 acc
+  >
+  > takeLast n ss = reverse $ take n $ reverse ss
+  >
+  > test = takeLast 40 $ dist 20 []
+  >
+
+Printing <test> gives: ``"1s100000s11010s10100s1111s1010s110s11s1S"``
+
+The reverse algorithm computes the length of the string just by examining a
+certain prefix:
+
+.. code-block:: haskell
+
+  > pref :: [Char] -> Int
+  > pref "S" = 1
+  > pref ('s':'1':rest) = decode 2 1 rest
+  > pref (_:rest) = 1 + pref rest
+  >
+  > decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
+  > decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
+  > decode walk acc _ = walk + acc
+  >
+
+Now, as expected, printing <pref test> gives ``40``.
+
+We can *quickCheck* this with following property:
+
+.. code-block:: haskell
+
+  > testcase = dist 2000 []
+  > testcaseLength = length testcase
+  >
+  > identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
+  >     where arr = takeLast n testcase
+  >
+
+As expected <quickCheck identityProp> gives:
+
+::
+
+  *Main> quickCheck identityProp
+  OK, passed 100 tests.
+
+Let's be a bit more exhaustive:
+
+.. code-block:: haskell
+
+  >
+  > deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
+  >
+
+And here is the result of <deepCheck identityProp>:
+
+::
+
+  *Main> deepCheck identityProp
+  OK, passed 500 tests.
+
+.. _Tagging:
+
+Tagging considerations
+^^^^^^^^^^^^^^^^^^^^^^
+
+To maintain the invariant that the 2 LSBits of each ``Use**`` in ``Use`` never
+change after being set up, setters of ``Use::Prev`` must re-tag the new
+``Use**`` on every modification.  Accordingly getters must strip the tag bits.
+
+For layout b) instead of the ``User`` we find a pointer (``User*`` with LSBit
+set).  Following this pointer brings us to the ``User``.  A portable trick
+ensures that the first bytes of ``User`` (if interpreted as a pointer) never has
+the LSBit set. (Portability is relying on the fact that all known compilers
+place the ``vptr`` in the first word of the instances.)
+
+.. _coreclasses:
+
+The Core LLVM Class Hierarchy Reference
+=======================================
+
+``#include "llvm/Type.h"``
+
+header source: `Type.h <http://llvm.org/doxygen/Type_8h-source.html>`_
+
+doxygen info: `Type Clases <http://llvm.org/doxygen/classllvm_1_1Type.html>`_
+
+The Core LLVM classes are the primary means of representing the program being
+inspected or transformed.  The core LLVM classes are defined in header files in
+the ``include/llvm/`` directory, and implemented in the ``lib/VMCore``
+directory.
+
+.. _Type:
+
+The Type class and Derived Types
+--------------------------------
+
+``Type`` is a superclass of all type classes.  Every ``Value`` has a ``Type``.
+``Type`` cannot be instantiated directly but only through its subclasses.
+Certain primitive types (``VoidType``, ``LabelType``, ``FloatType`` and
+``DoubleType``) have hidden subclasses.  They are hidden because they offer no
+useful functionality beyond what the ``Type`` class offers except to distinguish
+themselves from other subclasses of ``Type``.
+
+All other types are subclasses of ``DerivedType``.  Types can be named, but this
+is not a requirement.  There exists exactly one instance of a given shape at any
+one time.  This allows type equality to be performed with address equality of
+the Type Instance.  That is, given two ``Type*`` values, the types are identical
+if the pointers are identical.
+
+.. _m_Type:
+
+Important Public Methods
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``bool isIntegerTy() const``: Returns true for any integer type.
+
+* ``bool isFloatingPointTy()``: Return true if this is one of the five
+  floating point types.
+
+* ``bool isSized()``: Return true if the type has known size.  Things
+  that don't have a size are abstract types, labels and void.
+
+.. _derivedtypes:
+
+Important Derived Types
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``IntegerType``
+  Subclass of DerivedType that represents integer types of any bit width.  Any
+  bit width between ``IntegerType::MIN_INT_BITS`` (1) and
+  ``IntegerType::MAX_INT_BITS`` (~8 million) can be represented.
+
+  * ``static const IntegerType* get(unsigned NumBits)``: get an integer
+    type of a specific bit width.
+
+  * ``unsigned getBitWidth() const``: Get the bit width of an integer type.
+
+``SequentialType``
+  This is subclassed by ArrayType, PointerType and VectorType.
+
+  * ``const Type * getElementType() const``: Returns the type of each
+    of the elements in the sequential type.
+
+``ArrayType``
+  This is a subclass of SequentialType and defines the interface for array
+  types.
+
+  * ``unsigned getNumElements() const``: Returns the number of elements
+    in the array.
+
+``PointerType``
+  Subclass of SequentialType for pointer types.
+
+``VectorType``
+  Subclass of SequentialType for vector types.  A vector type is similar to an
+  ArrayType but is distinguished because it is a first class type whereas
+  ArrayType is not.  Vector types are used for vector operations and are usually
+  small vectors of of an integer or floating point type.
+
+``StructType``
+  Subclass of DerivedTypes for struct types.
+
+.. _FunctionType:
+
+``FunctionType``
+  Subclass of DerivedTypes for function types.
+
+  * ``bool isVarArg() const``: Returns true if it's a vararg function.
+
+  * ``const Type * getReturnType() const``: Returns the return type of the
+    function.
+
+  * ``const Type * getParamType (unsigned i)``: Returns the type of the ith
+    parameter.
+
+  * ``const unsigned getNumParams() const``: Returns the number of formal
+    parameters.
+
+.. _Module:
+
+The ``Module`` class
+--------------------
+
+``#include "llvm/Module.h"``
+
+header source: `Module.h <http://llvm.org/doxygen/Module_8h-source.html>`_
+
+doxygen info: `Module Class <http://llvm.org/doxygen/classllvm_1_1Module.html>`_
+
+The ``Module`` class represents the top level structure present in LLVM
+programs.  An LLVM module is effectively either a translation unit of the
+original program or a combination of several translation units merged by the
+linker.  The ``Module`` class keeps track of a list of :ref:`Function
+<c_Function>`\ s, a list of GlobalVariable_\ s, and a SymbolTable_.
+Additionally, it contains a few helpful member functions that try to make common
+operations easy.
+
+.. _m_Module:
+
+Important Public Members of the ``Module`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``Module::Module(std::string name = "")``
+
+  Constructing a Module_ is easy.  You can optionally provide a name for it
+  (probably based on the name of the translation unit).
+
+* | ``Module::iterator`` - Typedef for function list iterator
+  | ``Module::const_iterator`` - Typedef for const_iterator.
+  | ``begin()``, ``end()``, ``size()``, ``empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Module`` object's :ref:`Function <c_Function>` list.
+
+* ``Module::FunctionListType &getFunctionList()``
+
+  Returns the list of :ref:`Function <c_Function>`\ s.  This is necessary to use
+  when you need to update the list or perform a complex action that doesn't have
+  a forwarding method.
+
+----------------
+
+* | ``Module::global_iterator`` - Typedef for global variable list iterator
+  | ``Module::const_global_iterator`` - Typedef for const_iterator.
+  | ``global_begin()``, ``global_end()``, ``global_size()``, ``global_empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Module`` object's GlobalVariable_ list.
+
+* ``Module::GlobalListType &getGlobalList()``
+
+  Returns the list of GlobalVariable_\ s.  This is necessary to use when you
+  need to update the list or perform a complex action that doesn't have a
+  forwarding method.
+
+----------------
+
+* ``SymbolTable *getSymbolTable()``
+
+  Return a reference to the SymbolTable_ for this ``Module``.
+
+----------------
+
+* ``Function *getFunction(StringRef Name) const``
+
+  Look up the specified function in the ``Module`` SymbolTable_.  If it does not
+  exist, return ``null``.
+
+* ``Function *getOrInsertFunction(const std::string &Name, const FunctionType
+  *T)``
+
+  Look up the specified function in the ``Module`` SymbolTable_.  If it does not
+  exist, add an external declaration for the function and return it.
+
+* ``std::string getTypeName(const Type *Ty)``
+
+  If there is at least one entry in the SymbolTable_ for the specified Type_,
+  return it.  Otherwise return the empty string.
+
+* ``bool addTypeName(const std::string &Name, const Type *Ty)``
+
+  Insert an entry in the SymbolTable_ mapping ``Name`` to ``Ty``.  If there is
+  already an entry for this name, true is returned and the SymbolTable_ is not
+  modified.
+
+.. _Value:
+
+The ``Value`` class
+-------------------
+
+``#include "llvm/Value.h"``
+
+header source: `Value.h <http://llvm.org/doxygen/Value_8h-source.html>`_
+
+doxygen info: `Value Class <http://llvm.org/doxygen/classllvm_1_1Value.html>`_
+
+The ``Value`` class is the most important class in the LLVM Source base.  It
+represents a typed value that may be used (among other things) as an operand to
+an instruction.  There are many different types of ``Value``\ s, such as
+Constant_\ s, Argument_\ s.  Even Instruction_\ s and :ref:`Function
+<c_Function>`\ s are ``Value``\ s.
+
+A particular ``Value`` may be used many times in the LLVM representation for a
+program.  For example, an incoming argument to a function (represented with an
+instance of the Argument_ class) is "used" by every instruction in the function
+that references the argument.  To keep track of this relationship, the ``Value``
+class keeps a list of all of the ``User``\ s that is using it (the User_ class
+is a base class for all nodes in the LLVM graph that can refer to ``Value``\ s).
+This use list is how LLVM represents def-use information in the program, and is
+accessible through the ``use_*`` methods, shown below.
+
+Because LLVM is a typed representation, every LLVM ``Value`` is typed, and this
+Type_ is available through the ``getType()`` method.  In addition, all LLVM
+values can be named.  The "name" of the ``Value`` is a symbolic string printed
+in the LLVM code:
+
+.. code-block:: llvm
+
+  %foo = add i32 1, 2
+
+.. _nameWarning:
+
+The name of this instruction is "foo". **NOTE** that the name of any value may
+be missing (an empty string), so names should **ONLY** be used for debugging
+(making the source code easier to read, debugging printouts), they should not be
+used to keep track of values or map between them.  For this purpose, use a
+``std::map`` of pointers to the ``Value`` itself instead.
+
+One important aspect of LLVM is that there is no distinction between an SSA
+variable and the operation that produces it.  Because of this, any reference to
+the value produced by an instruction (or the value available as an incoming
+argument, for example) is represented as a direct pointer to the instance of the
+class that represents this value.  Although this may take some getting used to,
+it simplifies the representation and makes it easier to manipulate.
+
+.. _m_Value:
+
+Important Public Members of the ``Value`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* | ``Value::use_iterator`` - Typedef for iterator over the use-list
+  | ``Value::const_use_iterator`` - Typedef for const_iterator over the
+    use-list
+  | ``unsigned use_size()`` - Returns the number of users of the value.
+  | ``bool use_empty()`` - Returns true if there are no users.
+  | ``use_iterator use_begin()`` - Get an iterator to the start of the
+    use-list.
+  | ``use_iterator use_end()`` - Get an iterator to the end of the use-list.
+  | ``User *use_back()`` - Returns the last element in the list.
+
+  These methods are the interface to access the def-use information in LLVM.
+  As with all other iterators in LLVM, the naming conventions follow the
+  conventions defined by the STL_.
+
+* ``Type *getType() const``
+  This method returns the Type of the Value.
+
+* | ``bool hasName() const``
+  | ``std::string getName() const``
+  | ``void setName(const std::string &Name)``
+
+  This family of methods is used to access and assign a name to a ``Value``, be
+  aware of the :ref:`precaution above <nameWarning>`.
+
+* ``void replaceAllUsesWith(Value *V)``
+
+  This method traverses the use list of a ``Value`` changing all User_\ s of the
+  current value to refer to "``V``" instead.  For example, if you detect that an
+  instruction always produces a constant value (for example through constant
+  folding), you can replace all uses of the instruction with the constant like
+  this:
+
+  .. code-block:: c++
+
+    Inst->replaceAllUsesWith(ConstVal);
+
+.. _User:
+
+The ``User`` class
+------------------
+
+``#include "llvm/User.h"``
+
+header source: `User.h <http://llvm.org/doxygen/User_8h-source.html>`_
+
+doxygen info: `User Class <http://llvm.org/doxygen/classllvm_1_1User.html>`_
+
+Superclass: Value_
+
+The ``User`` class is the common base class of all LLVM nodes that may refer to
+``Value``\ s.  It exposes a list of "Operands" that are all of the ``Value``\ s
+that the User is referring to.  The ``User`` class itself is a subclass of
+``Value``.
+
+The operands of a ``User`` point directly to the LLVM ``Value`` that it refers
+to.  Because LLVM uses Static Single Assignment (SSA) form, there can only be
+one definition referred to, allowing this direct connection.  This connection
+provides the use-def information in LLVM.
+
+.. _m_User:
+
+Important Public Members of the ``User`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``User`` class exposes the operand list in two ways: through an index access
+interface and through an iterator based interface.
+
+* | ``Value *getOperand(unsigned i)``
+  | ``unsigned getNumOperands()``
+
+  These two methods expose the operands of the ``User`` in a convenient form for
+  direct access.
+
+* | ``User::op_iterator`` - Typedef for iterator over the operand list
+  | ``op_iterator op_begin()`` - Get an iterator to the start of the operand
+    list.
+  | ``op_iterator op_end()`` - Get an iterator to the end of the operand list.
+
+  Together, these methods make up the iterator based interface to the operands
+  of a ``User``.
+
+
+.. _Instruction:
+
+The ``Instruction`` class
+-------------------------
+
+``#include "llvm/Instruction.h"``
+
+header source: `Instruction.h
+<http://llvm.org/doxygen/Instruction_8h-source.html>`_
+
+doxygen info: `Instruction Class
+<http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_
+
+Superclasses: User_, Value_
+
+The ``Instruction`` class is the common base class for all LLVM instructions.
+It provides only a few methods, but is a very commonly used class.  The primary
+data tracked by the ``Instruction`` class itself is the opcode (instruction
+type) and the parent BasicBlock_ the ``Instruction`` is embedded into.  To
+represent a specific type of instruction, one of many subclasses of
+``Instruction`` are used.
+
+Because the ``Instruction`` class subclasses the User_ class, its operands can
+be accessed in the same way as for other ``User``\ s (with the
+``getOperand()``/``getNumOperands()`` and ``op_begin()``/``op_end()`` methods).
+An important file for the ``Instruction`` class is the ``llvm/Instruction.def``
+file.  This file contains some meta-data about the various different types of
+instructions in LLVM.  It describes the enum values that are used as opcodes
+(for example ``Instruction::Add`` and ``Instruction::ICmp``), as well as the
+concrete sub-classes of ``Instruction`` that implement the instruction (for
+example BinaryOperator_ and CmpInst_).  Unfortunately, the use of macros in this
+file confuses doxygen, so these enum values don't show up correctly in the
+`doxygen output <http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_.
+
+.. _s_Instruction:
+
+Important Subclasses of the ``Instruction`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. _BinaryOperator:
+
+* ``BinaryOperator``
+
+  This subclasses represents all two operand instructions whose operands must be
+  the same type, except for the comparison instructions.
+
+.. _CastInst:
+
+* ``CastInst``
+  This subclass is the parent of the 12 casting instructions.  It provides
+  common operations on cast instructions.
+
+.. _CmpInst:
+
+* ``CmpInst``
+
+  This subclass respresents the two comparison instructions,
+  `ICmpInst <LangRef.html#i_icmp>`_ (integer opreands), and
+  `FCmpInst <LangRef.html#i_fcmp>`_ (floating point operands).
+
+.. _TerminatorInst:
+
+* ``TerminatorInst``
+
+  This subclass is the parent of all terminator instructions (those which can
+  terminate a block).
+
+.. _m_Instruction:
+
+Important Public Members of the ``Instruction`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``BasicBlock *getParent()``
+
+  Returns the BasicBlock_ that this
+  ``Instruction`` is embedded into.
+
+* ``bool mayWriteToMemory()``
+
+  Returns true if the instruction writes to memory, i.e. it is a ``call``,
+  ``free``, ``invoke``, or ``store``.
+
+* ``unsigned getOpcode()``
+
+  Returns the opcode for the ``Instruction``.
+
+* ``Instruction *clone() const``
+
+  Returns another instance of the specified instruction, identical in all ways
+  to the original except that the instruction has no parent (i.e. it's not
+  embedded into a BasicBlock_), and it has no name.
+
+.. _Constant:
+
+The ``Constant`` class and subclasses
+-------------------------------------
+
+Constant represents a base class for different types of constants.  It is
+subclassed by ConstantInt, ConstantArray, etc. for representing the various
+types of Constants.  GlobalValue_ is also a subclass, which represents the
+address of a global variable or function.
+
+.. _s_Constant:
+
+Important Subclasses of Constant
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ConstantInt : This subclass of Constant represents an integer constant of
+  any width.
+
+  * ``const APInt& getValue() const``: Returns the underlying
+    value of this constant, an APInt value.
+
+  * ``int64_t getSExtValue() const``: Converts the underlying APInt value to an
+    int64_t via sign extension.  If the value (not the bit width) of the APInt
+    is too large to fit in an int64_t, an assertion will result.  For this
+    reason, use of this method is discouraged.
+
+  * ``uint64_t getZExtValue() const``: Converts the underlying APInt value
+    to a uint64_t via zero extension.  IF the value (not the bit width) of the
+    APInt is too large to fit in a uint64_t, an assertion will result.  For this
+    reason, use of this method is discouraged.
+
+  * ``static ConstantInt* get(const APInt& Val)``: Returns the ConstantInt
+    object that represents the value provided by ``Val``.  The type is implied
+    as the IntegerType that corresponds to the bit width of ``Val``.
+
+  * ``static ConstantInt* get(const Type *Ty, uint64_t Val)``: Returns the
+    ConstantInt object that represents the value provided by ``Val`` for integer
+    type ``Ty``.
+
+* ConstantFP : This class represents a floating point constant.
+
+  * ``double getValue() const``: Returns the underlying value of this constant.
+
+* ConstantArray : This represents a constant array.
+
+  * ``const std::vector<Use> &getValues() const``: Returns a vector of
+    component constants that makeup this array.
+
+* ConstantStruct : This represents a constant struct.
+
+  * ``const std::vector<Use> &getValues() const``: Returns a vector of
+    component constants that makeup this array.
+
+* GlobalValue : This represents either a global variable or a function.  In
+  either case, the value is a constant fixed address (after linking).
+
+.. _GlobalValue:
+
+The ``GlobalValue`` class
+-------------------------
+
+``#include "llvm/GlobalValue.h"``
+
+header source: `GlobalValue.h
+<http://llvm.org/doxygen/GlobalValue_8h-source.html>`_
+
+doxygen info: `GlobalValue Class
+<http://llvm.org/doxygen/classllvm_1_1GlobalValue.html>`_
+
+Superclasses: Constant_, User_, Value_
+
+Global values ( GlobalVariable_\ s or :ref:`Function <c_Function>`\ s) are the
+only LLVM values that are visible in the bodies of all :ref:`Function
+<c_Function>`\ s.  Because they are visible at global scope, they are also
+subject to linking with other globals defined in different translation units.
+To control the linking process, ``GlobalValue``\ s know their linkage rules.
+Specifically, ``GlobalValue``\ s know whether they have internal or external
+linkage, as defined by the ``LinkageTypes`` enumeration.
+
+If a ``GlobalValue`` has internal linkage (equivalent to being ``static`` in C),
+it is not visible to code outside the current translation unit, and does not
+participate in linking.  If it has external linkage, it is visible to external
+code, and does participate in linking.  In addition to linkage information,
+``GlobalValue``\ s keep track of which Module_ they are currently part of.
+
+Because ``GlobalValue``\ s are memory objects, they are always referred to by
+their **address**.  As such, the Type_ of a global is always a pointer to its
+contents.  It is important to remember this when using the ``GetElementPtrInst``
+instruction because this pointer must be dereferenced first.  For example, if
+you have a ``GlobalVariable`` (a subclass of ``GlobalValue)`` that is an array
+of 24 ints, type ``[24 x i32]``, then the ``GlobalVariable`` is a pointer to
+that array.  Although the address of the first element of this array and the
+value of the ``GlobalVariable`` are the same, they have different types.  The
+``GlobalVariable``'s type is ``[24 x i32]``.  The first element's type is
+``i32.`` Because of this, accessing a global value requires you to dereference
+the pointer with ``GetElementPtrInst`` first, then its elements can be accessed.
+This is explained in the `LLVM Language Reference Manual
+<LangRef.html#globalvars>`_.
+
+.. _m_GlobalValue:
+
+Important Public Members of the ``GlobalValue`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* | ``bool hasInternalLinkage() const``
+  | ``bool hasExternalLinkage() const``
+  | ``void setInternalLinkage(bool HasInternalLinkage)``
+
+  These methods manipulate the linkage characteristics of the ``GlobalValue``.
+
+* ``Module *getParent()``
+
+  This returns the Module_ that the
+  GlobalValue is currently embedded into.
+
+.. _c_Function:
+
+The ``Function`` class
+----------------------
+
+``#include "llvm/Function.h"``
+
+header source: `Function.h <http://llvm.org/doxygen/Function_8h-source.html>`_
+
+doxygen info: `Function Class
+<http://llvm.org/doxygen/classllvm_1_1Function.html>`_
+
+Superclasses: GlobalValue_, Constant_, User_, Value_
+
+The ``Function`` class represents a single procedure in LLVM.  It is actually
+one of the more complex classes in the LLVM hierarchy because it must keep track
+of a large amount of data.  The ``Function`` class keeps track of a list of
+BasicBlock_\ s, a list of formal Argument_\ s, and a SymbolTable_.
+
+The list of BasicBlock_\ s is the most commonly used part of ``Function``
+objects.  The list imposes an implicit ordering of the blocks in the function,
+which indicate how the code will be laid out by the backend.  Additionally, the
+first BasicBlock_ is the implicit entry node for the ``Function``.  It is not
+legal in LLVM to explicitly branch to this initial block.  There are no implicit
+exit nodes, and in fact there may be multiple exit nodes from a single
+``Function``.  If the BasicBlock_ list is empty, this indicates that the
+``Function`` is actually a function declaration: the actual body of the function
+hasn't been linked in yet.
+
+In addition to a list of BasicBlock_\ s, the ``Function`` class also keeps track
+of the list of formal Argument_\ s that the function receives.  This container
+manages the lifetime of the Argument_ nodes, just like the BasicBlock_ list does
+for the BasicBlock_\ s.
+
+The SymbolTable_ is a very rarely used LLVM feature that is only used when you
+have to look up a value by name.  Aside from that, the SymbolTable_ is used
+internally to make sure that there are not conflicts between the names of
+Instruction_\ s, BasicBlock_\ s, or Argument_\ s in the function body.
+
+Note that ``Function`` is a GlobalValue_ and therefore also a Constant_.  The
+value of the function is its address (after linking) which is guaranteed to be
+constant.
+
+.. _m_Function:
+
+Important Public Members of the ``Function``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``Function(const FunctionType *Ty, LinkageTypes Linkage,
+  const std::string &N = "", Module* Parent = 0)``
+
+  Constructor used when you need to create new ``Function``\ s to add the
+  program.  The constructor must specify the type of the function to create and
+  what type of linkage the function should have.  The FunctionType_ argument
+  specifies the formal arguments and return value for the function.  The same
+  FunctionType_ value can be used to create multiple functions.  The ``Parent``
+  argument specifies the Module in which the function is defined.  If this
+  argument is provided, the function will automatically be inserted into that
+  module's list of functions.
+
+* ``bool isDeclaration()``
+
+  Return whether or not the ``Function`` has a body defined.  If the function is
+  "external", it does not have a body, and thus must be resolved by linking with
+  a function defined in a different translation unit.
+
+* | ``Function::iterator`` - Typedef for basic block list iterator
+  | ``Function::const_iterator`` - Typedef for const_iterator.
+  | ``begin()``, ``end()``, ``size()``, ``empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Function`` object's BasicBlock_ list.
+
+* ``Function::BasicBlockListType &getBasicBlockList()``
+
+  Returns the list of BasicBlock_\ s.  This is necessary to use when you need to
+  update the list or perform a complex action that doesn't have a forwarding
+  method.
+
+* | ``Function::arg_iterator`` - Typedef for the argument list iterator
+  | ``Function::const_arg_iterator`` - Typedef for const_iterator.
+  | ``arg_begin()``, ``arg_end()``, ``arg_size()``, ``arg_empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Function`` object's Argument_ list.
+
+* ``Function::ArgumentListType &getArgumentList()``
+
+  Returns the list of Argument_.  This is necessary to use when you need to
+  update the list or perform a complex action that doesn't have a forwarding
+  method.
+
+* ``BasicBlock &getEntryBlock()``
+
+  Returns the entry ``BasicBlock`` for the function.  Because the entry block
+  for the function is always the first block, this returns the first block of
+  the ``Function``.
+
+* | ``Type *getReturnType()``
+  | ``FunctionType *getFunctionType()``
+
+  This traverses the Type_ of the ``Function`` and returns the return type of
+  the function, or the FunctionType_ of the actual function.
+
+* ``SymbolTable *getSymbolTable()``
+
+  Return a pointer to the SymbolTable_ for this ``Function``.
+
+.. _GlobalVariable:
+
+The ``GlobalVariable`` class
+----------------------------
+
+``#include "llvm/GlobalVariable.h"``
+
+header source: `GlobalVariable.h
+<http://llvm.org/doxygen/GlobalVariable_8h-source.html>`_
+
+doxygen info: `GlobalVariable Class
+<http://llvm.org/doxygen/classllvm_1_1GlobalVariable.html>`_
+
+Superclasses: GlobalValue_, Constant_, User_, Value_
+
+Global variables are represented with the (surprise surprise) ``GlobalVariable``
+class.  Like functions, ``GlobalVariable``\ s are also subclasses of
+GlobalValue_, and as such are always referenced by their address (global values
+must live in memory, so their "name" refers to their constant address).  See
+GlobalValue_ for more on this.  Global variables may have an initial value
+(which must be a Constant_), and if they have an initializer, they may be marked
+as "constant" themselves (indicating that their contents never change at
+runtime).
+
+.. _m_GlobalVariable:
+
+Important Public Members of the ``GlobalVariable`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes &Linkage,
+  Constant *Initializer = 0, const std::string &Name = "", Module* Parent = 0)``
+
+  Create a new global variable of the specified type.  If ``isConstant`` is true
+  then the global variable will be marked as unchanging for the program.  The
+  Linkage parameter specifies the type of linkage (internal, external, weak,
+  linkonce, appending) for the variable.  If the linkage is InternalLinkage,
+  WeakAnyLinkage, WeakODRLinkage, LinkOnceAnyLinkage or LinkOnceODRLinkage, then
+  the resultant global variable will have internal linkage.  AppendingLinkage
+  concatenates together all instances (in different translation units) of the
+  variable into a single variable but is only applicable to arrays.  See the
+  `LLVM Language Reference <LangRef.html#modulestructure>`_ for further details
+  on linkage types.  Optionally an initializer, a name, and the module to put
+  the variable into may be specified for the global variable as well.
+
+* ``bool isConstant() const``
+
+  Returns true if this is a global variable that is known not to be modified at
+  runtime.
+
+* ``bool hasInitializer()``
+
+  Returns true if this ``GlobalVariable`` has an intializer.
+
+* ``Constant *getInitializer()``
+
+  Returns the initial value for a ``GlobalVariable``.  It is not legal to call
+  this method if there is no initializer.
+
+.. _BasicBlock:
+
+The ``BasicBlock`` class
+------------------------
+
+``#include "llvm/BasicBlock.h"``
+
+header source: `BasicBlock.h
+<http://llvm.org/doxygen/BasicBlock_8h-source.html>`_
+
+doxygen info: `BasicBlock Class
+<http://llvm.org/doxygen/classllvm_1_1BasicBlock.html>`_
+
+Superclass: Value_
+
+This class represents a single entry single exit section of the code, commonly
+known as a basic block by the compiler community.  The ``BasicBlock`` class
+maintains a list of Instruction_\ s, which form the body of the block.  Matching
+the language definition, the last element of this list of instructions is always
+a terminator instruction (a subclass of the TerminatorInst_ class).
+
+In addition to tracking the list of instructions that make up the block, the
+``BasicBlock`` class also keeps track of the :ref:`Function <c_Function>` that
+it is embedded into.
+
+Note that ``BasicBlock``\ s themselves are Value_\ s, because they are
+referenced by instructions like branches and can go in the switch tables.
+``BasicBlock``\ s have type ``label``.
+
+.. _m_BasicBlock:
+
+Important Public Members of the ``BasicBlock`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``BasicBlock(const std::string &Name = "", Function *Parent = 0)``
+
+  The ``BasicBlock`` constructor is used to create new basic blocks for
+  insertion into a function.  The constructor optionally takes a name for the
+  new block, and a :ref:`Function <c_Function>` to insert it into.  If the
+  ``Parent`` parameter is specified, the new ``BasicBlock`` is automatically
+  inserted at the end of the specified :ref:`Function <c_Function>`, if not
+  specified, the BasicBlock must be manually inserted into the :ref:`Function
+  <c_Function>`.
+
+* | ``BasicBlock::iterator`` - Typedef for instruction list iterator
+  | ``BasicBlock::const_iterator`` - Typedef for const_iterator.
+  | ``begin()``, ``end()``, ``front()``, ``back()``,
+    ``size()``, ``empty()``
+    STL-style functions for accessing the instruction list.
+
+  These methods and typedefs are forwarding functions that have the same
+  semantics as the standard library methods of the same names.  These methods
+  expose the underlying instruction list of a basic block in a way that is easy
+  to manipulate.  To get the full complement of container operations (including
+  operations to update the list), you must use the ``getInstList()`` method.
+
+* ``BasicBlock::InstListType &getInstList()``
+
+  This method is used to get access to the underlying container that actually
+  holds the Instructions.  This method must be used when there isn't a
+  forwarding function in the ``BasicBlock`` class for the operation that you
+  would like to perform.  Because there are no forwarding functions for
+  "updating" operations, you need to use this if you want to update the contents
+  of a ``BasicBlock``.
+
+* ``Function *getParent()``
+
+  Returns a pointer to :ref:`Function <c_Function>` the block is embedded into,
+  or a null pointer if it is homeless.
+
+* ``TerminatorInst *getTerminator()``
+
+  Returns a pointer to the terminator instruction that appears at the end of the
+  ``BasicBlock``.  If there is no terminator instruction, or if the last
+  instruction in the block is not a terminator, then a null pointer is returned.
+
+.. _Argument:
+
+The ``Argument`` class
+----------------------
+
+This subclass of Value defines the interface for incoming formal arguments to a
+function.  A Function maintains a list of its formal arguments.  An argument has
+a pointer to the parent Function.
+
+
diff --git a/docs/Projects.rst b/docs/Projects.rst
index 63132887a599..3246e3ff169b 100644
--- a/docs/Projects.rst
+++ b/docs/Projects.rst
@@ -1,5 +1,3 @@
-.. _projects:
-
 ========================
 Creating an LLVM Project
 ========================
@@ -153,12 +151,10 @@ Underneath your top level directory, you should have the following directories:
     Currently, the LLVM build system provides basic support for tests. The LLVM
     system provides the following:
 
-* LLVM provides a ``tcl`` procedure that is used by ``Dejagnu`` to run tests.
-  It can be found in ``llvm/lib/llvm-dg.exp``.  This test procedure uses ``RUN``
+* LLVM contains regression tests in ``llvm/test``.  These tests are run by the
+  :doc:`Lit <CommandGuide/lit>` testing tool.  This test procedure uses ``RUN``
   lines in the actual test case to determine how to run the test.  See the
-  `TestingGuide <TestingGuide.html>`_ for more details. You can easily write
-  Makefile support similar to the Makefiles in ``llvm/test`` to use ``Dejagnu``
-  to run your project's tests.
+  :doc:`TestingGuide` for more details.
 
 * LLVM contains an optional package called ``llvm-test``, which provides
   benchmarks and programs that are known to compile with the Clang front
diff --git a/docs/README.txt b/docs/README.txt
index 5ddd599d8a78..22cf93077959 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -1,12 +1,42 @@
 LLVM Documentation
 ==================
 
-The LLVM documentation is currently written in two formats:
+LLVM's documentation is written in reStructuredText, a lightweight
+plaintext markup language (file extension `.rst`). While the
+reStructuredText documentation should be quite readable in source form, it
+is mostly meant to be processed by the Sphinx documentation generation
+system to create HTML pages which are hosted on <http://llvm.org/docs/> and
+updated after every commit. Manpage output is also supported, see below.
 
-  * Plain HTML documentation.
+If you instead would like to generate and view the HTML locally, install
+Sphinx <http://sphinx-doc.org/> and then do:
 
-  * reStructured Text documentation using the Sphinx documentation generator. It
-    is currently tested with Sphinx 1.1.3. 
+    cd docs/
+    make -f Makefile.sphinx
+    $BROWSER _build/html/index.html
 
-    For more information, see the "Sphinx Introduction for LLVM Developers"
-    document.
+The mapping between reStructuredText files and generated documentation is
+`docs/Foo.rst` <-> `_build/html/Foo.html` <-> `http://llvm.org/docs/Foo.html`.
+
+If you are interested in writing new documentation, you will want to read
+`SphinxQuickstartTemplate.rst` which will get you writing documentation
+very fast and includes examples of the most important reStructuredText
+markup syntax.
+
+Manpage Output
+===============
+
+Building the manpages is similar to building the HTML documentation. The
+primary difference is to use the `man` makefile target, instead of the
+default (which is `html`). Sphinx then produces the man pages in the
+directory `_build/man/`.
+
+    cd docs/
+    make -f Makefile.sphinx man
+    man -l _build/man/FileCheck.1
+
+The correspondence between .rst files and man pages is
+`docs/CommandGuide/Foo.rst` <-> `_build/man/Foo.1`.
+These .rst files are also included during HTML generation so they are also
+viewable online (as noted above) at e.g.
+`http://llvm.org/docs/CommandGuide/Foo.html`.
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
deleted file mode 100644
index a4c5960c1555..000000000000
--- a/docs/ReleaseNotes.html
+++ /dev/null
@@ -1,975 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-  <title>LLVM 3.2 Release Notes</title>
-</head>
-<body>
-
-<h1>LLVM 3.2 Release Notes</h1>
-
-<div>
-<img style="float:right" src="http://llvm.org/img/DragonSmall.png"
-     width="136" height="136" alt="LLVM Dragon Logo">
-</div>
-
-<ol>
-  <li><a href="#intro">Introduction</a></li>
-  <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 3.2</a></li>
-  <li><a href="#whatsnew">What's New in LLVM?</a></li>
-  <li><a href="GettingStarted.html">Installation Instructions</a></li>
-  <li><a href="#knownproblems">Known Problems</a></li>
-  <li><a href="#additionalinfo">Additional Information</a></li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="intro">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document contains the release notes for the LLVM Compiler
-   Infrastructure, release 3.2.  Here we describe the status of LLVM, including
-   major improvements from the previous release, improvements in various
-   sub-projects of LLVM, and some of the current users of the code.  All LLVM
-   releases may be downloaded from the <a href="http://llvm.org/releases/">LLVM
-   releases web site</a>.</p>
-
-<p>For more information about LLVM, including information about the latest
-   release, please check out the <a href="http://llvm.org/">main LLVM web
-   site</a>.  If you have questions or comments,
-   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM
-   Developer's Mailing List</a> is a good place to send them.</p>
-
-<p>Note that if you are reading this file from a Subversion checkout or the main
-   LLVM web page, this document applies to the <i>next</i> release, not the
-   current one.  To see the release notes for a specific release, please see the
-   <a href="http://llvm.org/releases/">releases page</a>.</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="subproj">Sub-project Status Update</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM 3.2 distribution currently consists of production-quality code
-   from the core LLVM repository, which roughly includes the LLVM optimizers,
-   code generators and supporting tools, as well as Clang, DragonEgg and 
-   compiler-rt sub-project repositories. In addition to this code, the LLVM 
-   Project includes other sub-projects that are in development. Here we 
-   include updates on these sub-projects.</p>
-
-<!--=========================================================================-->
-<h3>
-<a name="clang">Clang: C/C++/Objective-C Frontend Toolkit</a>
-</h3>
-
-<div>
-
-<p><a href="http://clang.llvm.org/">Clang</a> is an LLVM front end for the C,
-   C++, and Objective-C languages. Clang aims to provide a better user
-   experience through expressive diagnostics, a high level of conformance to
-   language standards, fast compilation, and low memory use. Like LLVM, Clang
-   provides a modular, library-based architecture that makes it suitable for
-   creating or integrating with other development tools.</p>
-
-<p>In the LLVM 3.2 time-frame, the Clang team has made many improvements.
-   Highlights include:</p>
-<ul>
-  <li>Improvements to Clang's diagnostics</li>
-  <li>Support for tls_model attribute</li>
-  <li>Type safety attributes</li>
-</ul>
-
-<p>For more details about the changes to Clang since the 3.1 release, see the
-   <a href="http://llvm.org/releases/3.2/tools/clang/docs/ReleaseNotes.html">Clang 3.2 release
-   notes.</a></p>
-
-<p>If Clang rejects your code but another compiler accepts it, please take a
-   look at the <a href="http://clang.llvm.org/compatibility.html">language
-   compatibility</a> guide to make sure this is not intentional or a known
-   issue.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="dragonegg">DragonEgg: GCC front-ends, LLVM back-end</a>
-</h3>
-
-<div>
-
-<p><a href="http://dragonegg.llvm.org/">DragonEgg</a> is a
-   <a href="http://gcc.gnu.org/wiki/plugins">gcc plugin</a> that replaces GCC's
-   optimizers and code generators with LLVM's. It works with gcc-4.5 and gcc-4.6
-   (and partially with gcc-4.7), can target the x86-32/x86-64 and ARM processor
-   families, and has been successfully used on the Darwin, FreeBSD, KFreeBSD,
-   Linux and OpenBSD platforms.  It fully supports Ada, C, C++ and Fortran.  It
-   has partial support for Go, Java, Obj-C and Obj-C++.</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
- <li>Able to load LLVM plugins such as Polly.</li>
- <li>Supports thread-local storage models.</li>
- <li>Passes knowledge of variable lifetimes to the LLVM optimizers.</li>
- <li>No longer requires GCC to be built with LTO support.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="compiler-rt">compiler-rt: Compiler Runtime Library</a>
-</h3>
-
-<div>
-
-
-<p>The LLVM <a href="http://compiler-rt.llvm.org/">compiler-rt project</a>
-   is a simple library that provides an implementation of the low-level
-   target-specific hooks required by code generation and other runtime
-   components.  For example, when compiling for a 32-bit target, converting a
-   double to a 64-bit unsigned integer is compiled into a runtime call to the
-   <code>__fixunsdfdi</code> function. The compiler-rt library provides highly
-   optimized implementations of this and other low-level routines (some are 3x
-   faster than the equivalent libgcc routines).</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li><a href="http://llvm.org/releases/3.2/tools/clang/docs/ThreadSanitizer.html">ThreadSanitizer (TSan)</a> - data race detector run-time library for C/C++ has been added.</li>
-  <li>Improvements to <a href="http://llvm.org/releases/3.2/tools/clang/docs/AddressSanitizer.html">AddressSanitizer</a> including: better portability 
-  (OSX, Android NDK), support for cmake based builds, enhanced error reporting and lots of bug fixes.</li>
-  <li>Added support for A6 'Swift' CPU.</li>
-  <li><code>divsi3</code> function has been enhanced to take advantage of a hardware unsigned divide when it is available.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="lldb">LLDB: Low Level Debugger</a>
-</h3>
-
-<div>
-
-<p><a href="http://lldb.llvm.org">LLDB</a> is a ground-up implementation of a
-   command line debugger, as well as a debugger API that can be used from other
-   applications.  LLDB makes use of the Clang parser to provide high-fidelity
-   expression parsing (particularly for C++) and uses the LLVM JIT for target
-   support.</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li>Linux build fixes for clang (see <a href="http://lldb.llvm.org/build.html">Building LLDB</a>)</li>
-  <li>Some Linux stability and usability improvements</li>
-  <li>Switch expression evaluation to use MCJIT (from legacy JIT) on Linux</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="libc++">libc++: C++ Standard Library</a>
-</h3>
-
-<div>
-
-<p>Like compiler_rt, libc++ is now <a href="DeveloperPolicy.html#license">dual
-   licensed</a> under the MIT and UIUC license, allowing it to be used more
-   permissively.</p>
-
-<p>Within the LLVM 3.2 time-frame there were the following highlights:</p>
-
-<ul>
-  <li> C++11 shared_ptr atomic access API (20.7.2.5) has been implemented.</li>
-  <li>Applied noexcept and constexpr throughout library.</li>
-  <li>Improved C++11 conformance in associative container emplace.</li>
-  <li>Performance improvements in: std::rotate algorithm and I/O.</li>
-  <li>Operator new/delete and type_infos for exception types moved from libc++ to libc++abi.</li>
-  <li>Bug fixes in: <code>&lt;atomic&gt;</code>; vector<code>&lt;bool&gt;</code> algorithms,
-    <code>&lt;future&gt;</code>,<code>&lt;tuple&gt;</code>,
-    <code>&lt;type_traits&gt;</code>,<code>&lt;fstream&gt;</code>,<code>&lt;istream&gt;</code>,
-    <code>&lt;iterator&gt;</code>, <code>&lt;condition_variable&gt;</code>,<code>&lt;complex&gt;</code> as well as visibility fixes.
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="vmkit">VMKit</a>
-</h3>
-
-<div>
-
-<p>The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation
-  of a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and
-  just-in-time compilation.</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li>Bug fixes only, no functional changes.</li>
-</ul>
-
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="Polly">Polly: Polyhedral Optimizer</a>
-</h3>
-
-<div>
-
-<p><a href="http://polly.llvm.org/">Polly</a> is an <em>experimental</em>
-  optimizer for data locality and parallelism. It currently provides high-level
-  loop optimizations and automatic parallelization (using the OpenMP run time).
-  Work in the area of automatic SIMD and accelerator code generation was
-  started.</p>
-
-<p>Within the LLVM 3.2 time-frame there were the following highlights:</p>
-
-<ul>
-  <li>isl, the integer set library used by Polly, was relicensed under the MIT license.</li>
-  <li>isl based code generation.</li>
-  <li>MIT licensed replacement for CLooG (LGPLv2).</li>
-  <li>Fine grained option handling (separation of core and border computations, control overhead vs. code size).</li>
-  <li>Support for FORTRAN and Dragonegg.</li>
-  <li>OpenMP code generation fixes.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="StaticAnalyzer">Clang Static Analyzer</a>
-</h3>
-
-<div>
-
-<p>The <a href="http://clang-analyzer.llvm.org/">Clang Static Analyzer</a> 
-    is an advanced source code analysis tool integrated into Clang that performs
-    a deep analysis of code to find potential bugs.</p>
-    
-<p>In the LLVM 3.2 release, the static analyzer has made significant improvements
-    in many areas, with notable highlights such as:</p>
-    
-<ul>
-    <li>Improved interprocedural analysis within a translation unit (see details below), which greatly amplified the analyzer's ability to find bugs.</li>
-    <li>New infrastructure to model &quot;well-known&quot; APIs, allowing the analyzer to do a much better job when modeling calls to such functions.</li>
-    <li>Significant improvements to the APIs to write static analyzer checkers, with a more unified way of representing function/method calls in the checker API.  Details can be found in the <a href="http://llvm.org/devmtg/2012-11#talk13">Building a Checker in 24 hours</a> talk.
-</ul>
-
-<p>The release specifically includes notable improvements for Objective-C analysis, including:</p>
-
-<ul>
-    <li>Interprocedural analysis for Objective-C methods.</li>
-    <li>Interprocedural analysis of calls to &quot;blocks&quot;.</li>
-    <li>Precise modeling of GCD APIs such as <tt>dispatch_once</tt> and friends.</li>
-    <li>Improved support for recently added Objective-C constructs such as array and dictionary literals.</li>
-</ul>
-
-<p>The release specifically includes notable improvements for C++ analysis, including:</p>
-
-<ul>
-    <li>Interprocedural analysis for C++ methods (within a translation unit).</li>
-    <li>More precise modeling of C++ initializers and destructors.</li>
-</ul>
-
-<p>Finally, this release includes many small improvements to <tt>scan-build</tt>, which can be used to drive the analyzer from the command line or a continuous integration system.  This includes a directory-traversal issue, which could cause potential security problems in some cases.  We would like to acknowledge Tim Brown of Portcullis Computer Security Ltd for reporting this issue.</p>
-    
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="externalproj">External Open Source Projects Using LLVM 3.2</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>An exciting aspect of LLVM is that it is used as an enabling technology for
-   a lot of other language and tools projects. This section lists some of the
-   projects that have already been updated to work with LLVM 3.2.</p>
-
-<h3>Crack</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide
-   the ease of development of a scripting language with the performance of a
-   compiled language. The language derives concepts from C++, Java and Python,
-   incorporating object-oriented programming, operator overloading and strong
-   typing.</p>
-
-</div>
-
-<h3>EmbToolkit</h3>
-
-<div>
-
-<p><a href="http://www.embtoolkit.org/">EmbToolkit</a> provides Linux cross-compiler 
-    toolchain/SDK (GCC/binutils/C library (uclibc,eglibc,musl)), a build system for 
-    package cross-compilation and optionally various root file systems. 
-    It supports ARM and MIPS. There is an ongoing effort to provide a clang+llvm 
-    environment for the 3.2 releases, 
-</p>
-
-</div>
-
-<h3>FAUST</h3>
-
-<div>
-
-<p><a href="http://faust.grame.fr/">FAUST</a> is a compiled language for
-   real-time audio signal processing. The name FAUST stands for Functional
-   AUdio STream. Its programming model combines two approaches: functional
-   programming and block diagram composition. In addition with the C, C++, Java,
-   JavaScript output formats, the Faust compiler can generate LLVM bitcode, and
-   works with LLVM 2.7-3.2.</p>
-
-</div>
-
-<h3>Glasgow Haskell Compiler (GHC)</h3>
-
-<div>
-
-<p><a href="http://www.haskell.org/ghc/">GHC</a> is an open source compiler and
-   programming suite for Haskell, a lazy functional programming language. It
-   includes an optimizing static compiler generating good code for a variety of
-   platforms, together with an interactive system for convenient, quick
-   development.</p>
-
-<p>GHC 7.0 and onwards include an LLVM code generator, supporting LLVM 2.8 and
-   later.</p>
-
-</div>
-
-<h3>Julia</h3>
-
-<div>
-
-<p><a href="https://github.com/JuliaLang/julia">Julia</a> is a high-level,
-   high-performance dynamic language for technical computing. It provides a
-   sophisticated compiler, distributed parallel execution, numerical accuracy,
-   and an extensive mathematical function library. The compiler uses type
-   inference to generate fast code without any type declarations, and uses
-   LLVM's optimization passes and JIT compiler. The
-   <a href="http://julialang.org/"> Julia Language</a> is designed
-   around multiple dispatch, giving programs a large degree of flexibility. It
-   is ready for use on many kinds of problems.</p>
-
-</div>
-
-<h3>LLVM D Compiler</h3>
-
-<div>
-
-<p><a href="https://github.com/ldc-developers/ldc">LLVM D Compiler</a> (LDC) is
-   a compiler for the D programming Language. It is based on the DMD frontend
-   and uses LLVM as backend.</p>
-
-</div>
-
-<h3>Open Shading Language</h3>
-
-<div>
-
-<p><a href="https://github.com/imageworks/OpenShadingLanguage/">Open Shading
-   Language (OSL)</a> is a small but rich language for programmable shading in
-   advanced global illumination renderers and other applications, ideal for
-   describing materials, lights, displacement, and pattern generation. It uses
-   LLVM to JIT complex shader networks to x86 code at runtime.</p>
-
-<p>OSL was developed by Sony Pictures Imageworks for use in its in-house
-   renderer used for feature film animation and visual effects, and is
-   distributed as open source software with the "New BSD" license.
-   It has been used for all the shading on such films as The Amazing Spider-Man,
-   Men in Black III, Hotel Transylvania, and may other films in-progress, 
-   and also has been incorporated into several commercial and open source 
-   rendering products such as Blender, VRay, and Autodesk Beast.</p>
-
-</div>
-
-<h3>Portable OpenCL (pocl)</h3>
-
-<div>
-
-<p>In addition to producing an easily portable open source OpenCL
-   implementation, another major goal of <a href="http://pocl.sourceforge.net/">
-   pocl</a> is improving performance portability of OpenCL programs with
-   compiler optimizations, reducing the need for target-dependent manual
-   optimizations. An important part of pocl is a set of LLVM passes used to
-   statically parallelize multiple work-items with the kernel compiler, even in
-   the presence of work-group barriers. This enables static parallelization of
-   the fine-grained static concurrency in the work groups in multiple ways
-   (SIMD, VLIW, superscalar,...).</p>
-
-</div>
-
-<h3>Pure</h3>
-
-<div>
-
-<p><a href="http://pure-lang.googlecode.com/">Pure</a> is an
-   algebraic/functional programming language based on term rewriting. Programs
-   are collections of equations which are used to evaluate expressions in a
-   symbolic fashion. The interpreter uses LLVM as a backend to JIT-compile Pure
-   programs to fast native code. Pure offers dynamic typing, eager and lazy
-   evaluation, lexical closures, a hygienic macro system (also based on term
-   rewriting), built-in list and matrix support (including list and matrix
-   comprehensions) and an easy-to-use interface to C and other programming
-   languages (including the ability to load LLVM bitcode modules, and inline C,
-   C++, Fortran and Faust code in Pure programs if the corresponding
-   LLVM-enabled compilers are installed).</p>
-
-<p>Pure version 0.56 has been tested and is known to work with LLVM 3.2 (and
-   continues to work with older LLVM releases >= 2.5).</p>
-
-</div>
-
-<h3>TTA-based Co-design Environment (TCE)</h3>
-
-<div>
-
-<p><a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
-   application-specific processors (ASP) based on the Transport triggered
-   architecture (TTA). The toolset provides a complete co-design flow from C/C++
-   programs down to synthesizable VHDL/Verilog and parallel program binaries.
-   Processor customization points include the register files, function units,
-   supported operations, and the interconnection network.</p>
-
-<p>TCE uses Clang and LLVM for C/C++ language support, target independent
-   optimizations and also for parts of code generation. It generates new
-   LLVM-based code generators "on the fly" for the designed TTA processors and
-   loads them in to the compiler backend as runtime libraries to avoid
-   per-target recompilation of larger parts of the compiler chain.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="whatsnew">What's New in LLVM 3.2?</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This release includes a huge number of bug fixes, performance tweaks and
-   minor improvements. Some of the major improvements and new features are
-   listed in this section.</p>
-
-<!--=========================================================================-->
-<h3>
-<a name="majorfeatures">Major New Features</a>
-</h3>
-
-<div>
-
-  <!-- Features that need text if they're finished for 3.2:
-   ARM EHABI
-   combiner-aa?
-   strong phi elim
-   loop dependence analysis
-   CorrelatedValuePropagation
-   lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.2.
-   Integrated assembler on by default for arm/thumb?
-
-   -->
-
-  <!-- Near dead:
-   Analysis/RegionInfo.h + Dom Frontiers
-   SparseBitVector: used in LiveVar.
-   llvm/lib/Archive - replace with lib object?
-   -->
-
-<p>LLVM 3.2 includes several major changes and big features:</p>
-
-<ul>
-  <li>Loop Vectorizer.</li>
-  <li>New implementation of SROA.</li>
-  <li>New NVPTX back-end (replacing existing PTX back-end) based on NVIDIA sources.</li>
-</ul>
-
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="coreimprovements">LLVM IR and Core Improvements</a>
-</h3>
-
-<div>
-
-<p>LLVM IR has several new features for better support of new targets and that
-   expose new optimization opportunities:</p>
-
-<ul>
-  <li>Thread local variables may have a specified TLS model. See the
-  <a href="LangRef.html#globalvars">Language Reference Manual</a>.</li>
-  <li>'TYPE_CODE_FUNCTION_OLD' type code and autoupgrade code for old function attributes format has been removed.</li>
-  <li>Internal representation of the Attributes class has been converted into a pointer to an
-         opaque object that's uniqued by and stored in the LLVMContext object. 
-         The Attributes class then becomes a thin wrapper around this opaque object.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="optimizer">Optimizer Improvements</a>
-</h3>
-
-<div>
-
-<p>In addition to many minor performance tweaks and bug fixes, this release
-   includes a few major enhancements and additions to the optimizers:</p>
-
-<p> Loop Vectorizer - We've added a loop vectorizer and we are now able to
-    vectorize small loops. The loop vectorizer is disabled by default and
-    can be enabled using the <b>-mllvm -vectorize-loops</b> flag.
-    The SIMD vector width can be specified using the flag
-    <b>-mllvm -force-vector-width=4</b>.
-    The default value is <b>0</b> which means auto-select.
-    <br/>
-    We can now vectorize this function:
-
-    <pre class="doc_code">
-    unsigned sum_arrays(int *A, int *B, int start, int end) {
-      unsigned sum = 0;
-      for (int i = start; i &lt; end; ++i)
-        sum += A[i] + B[i] + i;
-
-      return sum;
-    }
-    </pre>
-
-    We vectorize under the following loops:
-    <ul>
-    <li>The inner most loops must have a single basic block.</li>
-    <li>The number of iterations are known before the loop starts to execute.</li>
-    <li>The loop counter needs to be incremented by one.</li>
-    <li>The loop trip count <b>can</b> be a variable.</li>
-    <li>Loops do <b>not</b> need to start at zero.</li>
-    <li>The induction variable can be used inside the loop.</li>
-    <li>Loop reductions are supported.</li>
-    <li>Arrays with affine access pattern do <b>not</b> need to be marked as 'noalias' and are checked at runtime.</li>
-    </ul>
-
-</p>
-
-<p>SROA - We&#8217;ve re-written SROA to be significantly more powerful and generate
-code which is much more friendly to the rest of the optimization pipeline.
-Previously this pass had scaling problems that required it to only operate on
-relatively small aggregates, and at times it would mistakenly replace a large
-aggregate with a single very large integer in order to make it a scalar SSA
-value. The result was a large number of i1024 and i2048 values representing any
-small stack buffer. These in turn slowed down many subsequent optimization
-paths.</p>
-<p>The new SROA pass uses a different algorithm that allows it to only promote to
-scalars the pieces of the aggregate actively in use. Because of this it doesn&#8217;t
-require any thresholds. It also always deduces the scalar values from the uses
-of the aggregate rather than the specific LLVM type of the aggregate. These
-features combine to both optimize more code with the pass but to improve the
-compile time of many functions dramatically.</p>
-
-<ul>
-  <li>Branch weight metadata is preserved through more of the optimizer.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="mc">MC Level Improvements</a>
-</h3>
-
-<div>
-
-<p>The LLVM Machine Code (aka MC) subsystem was created to solve a number of
-   problems in the realm of assembly, disassembly, object file format handling,
-   and a number of other related areas that CPU instruction-set level tools work
-   in. For more information, please see the
-   <a href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro
-   to the LLVM MC Project Blog Post</a>.</p>
-
-<ul>    
-  <li> Added support for following assembler directives: <code>.ifb</code>, <code>.ifnb</code>, <code>.ifc</code>, 
-          <code>.ifnc</code>, <code>.purgem</code>, <code>.rept</code> and <code>.version</code> (ELF) as well as Darwin specific
-	<code>.pushsection</code>, <code>.popsection</code> and  <code>.previous</code> .</li>
-  <li>Enhanced handling of <code>.lcomm directive</code>.</li>
-  <li>MS style inline assembler: added implementation of the offset and TYPE operators.</li>
-  <li>Targets can specify minimum supported NOP size for NOP padding.</li>
-  <li>ELF improvements: added support for generating ELF objects on Windows.</li>
-  <li>MachO improvements:  symbol-difference variables are marked as N_ABS, added direct-to-object attribute for data-in-code markers.</li>
-  <li>Added support for annotated disassembly output for x86 and arm targets.</li>
-  <li>Arm support has been improved by adding support for ARM TARGET2 relocation
-          and fixing hadling of ARM-style "$d.*" labels.</li>
-   <li>Implemented local-exec TLS on PowerPC.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="codegen">Target Independent Code Generator Improvements</a>
-</h3>
-
-<div>
-
-<p>Stack Coloring - We have implemented a new optimization pass
-  to merge stack objects which are used in disjoin areas of the code.
-  This optimization reduces the required stack space significantly, in cases
-  where it is clear to the optimizer that the stack slot is not shared.
-  We use the lifetime markers to tell the codegen that a certain alloca
-  is used within a region.</p>
-
-<p> We now merge consecutive loads and stores. </p>
-
-<p>We have put a significant amount of work into the code generator
-   infrastructure, which allows us to implement more aggressive algorithms and
-   make it run faster:</p>
-
-<p> We added new TableGen infrastructure to support bundling for
-    Very Long Instruction Word (VLIW) architectures. TableGen can now
-    automatically generate a deterministic finite automaton from a VLIW
-    target's schedule description which can be queried to determine
-    legal groupings of instructions in a bundle.</p>
-
-<p> We have added a new target independent VLIW packetizer based on the
-    DFA infrastructure to group machine instructions into bundles.</p>
-
-<p> We have added new TableGen infrastructure to support relationship maps
-    between instructions. This feature enables TableGen to automatically
-    construct a set of relation tables and query functions that can be used
-    to switch between various forms of instructions. For more information,
-    please refer to <a href="http://llvm.org/docs/HowToUseInstrMappings.html">
-    How To Use Instruction Mappings</a>.</p> 
-
-</div>
-
-<h4>
-<a name="blockplacement">Basic Block Placement</a>
-</h4>
-
-<div>
-
-<p>A probability based block placement and code layout algorithm was added to
-   LLVM's code generator. This layout pass supports probabilities derived from
-   static heuristics as well as source code annotations such as
-   <code>__builtin_expect</code>.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="x86">X86-32 and X86-64 Target Improvements</a>
-</h3>
-
-<div>
-
-<p>New features and major changes in the X86 target include:</p>
-
-<ul>
-  <li>Small codegen optimizations, especially for AVX2.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="ARM">ARM Target Improvements</a>
-</h3>
-
-<div>
-
-<p>New features of the ARM target include:</p>
-
-<ul>
-  <li>Support and performance tuning for the A6 'Swift' CPU.</li>
-</ul>
-
-<!--_________________________________________________________________________-->
-
-<h4>
-<a name="armintegratedassembler">ARM Integrated Assembler</a>
-</h4>
-
-<div>
-
-<p>The ARM target now includes a full featured macro assembler, including
-   direct-to-object module support for clang. The assembler is currently enabled
-   by default for Darwin only pending testing and any additional necessary
-   platform specific support for Linux.</p>
-
-<p>Full support is included for Thumb1, Thumb2 and ARM modes, along with
-   sub-target and CPU specific extensions for VFP2, VFP3 and NEON.</p>
-
-<p>The assembler is Unified Syntax only (see ARM Architecural Reference Manual
-   for details). While there is some, and growing, support for pre-unfied
-   (divided) syntax, there are still significant gaps in that support.</p>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="MIPS">MIPS Target Improvements</a>
-</h3>
-
-<div>
-
-<p>New features and major changes in the MIPS target include:</p>
-
-<ul>
-  <li>Integrated assembler support: 
-         MIPS32 works for both PIC and static, known limitation is the PR14456 where 
-         R_MIPS_GPREL16 relocation is generated with the wrong addend.
-         MIPS64 support is incomplete, for example exception handling is not working.</li>
-   <li>Support for fast calling convention has been added.</li>
-   <li>Support for Android MIPS toolchain has been added to clang driver.</li>
-   <li>Added clang driver support for MIPS N32 ABI through "-mabi=n32" option.</li>
-   <li>MIPS32 and MIPS64 disassembler has been implemented.</li>
-   <li>Support for compiling programs with large GOTs (exceeding 64kB in size) has been added 
-	through llc option "-mxgot".</li>
-  <li>Added experimental support for MIPS32 DSP intrinsics.</li>
-  <li>Experimental support for MIPS16 with following limitations: only soft float is supported,
-         C++ exceptions are not supported, large stack frames (> 32000 bytes) are not supported,
-         direct object code emission is not supported only .s .</li>
-  <li>Standalone assembler (llvm-mc):  implementation is in progress and considered experimental.</li>
-  <li>All classic JIT and MCJIT tests pass on Little and Big Endian MIPS32 platforms.</li>
-  <li>Inline asm support: all common constraints and operand modifiers have been implemented.</li>
-  <li>Added tail call optimization support, use llc option "-enable-mips-tail-calls"
-      or clang options "-mllvm -enable-mips-tail-calls"to enable it.</li>
-  <li>Improved register allocation by removing registers $fp, $gp, $ra and $at from the list of reserved registers.</li>
-  <li>Long branch expansion pass has been implemented, which expands branch
-      instructions with offsets that do not fit in the 16-bit field.</li>
-  <li>Cavium Octeon II board is used for testing builds (llvm-mips-linux builder).</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="PowerPC">PowerPC Target Improvements</a>
-</h3>
-
-<div>
-
-<p>Many fixes and changes across LLVM (and Clang) for better compliance with
-   the 64-bit PowerPC ELF Application Binary Interface, interoperability with
-   GCC, and overall 64-bit PowerPC support.   Some highlights include:</p>
-<ul>
-  <li>  MCJIT support added.</li>
-  <li>  PPC64 relocation support and (small code model) TOC handling
-        added.</li>
-  <li>  Parameter passing and return value fixes (alignment issues,
-        padding, varargs support, proper register usage, odd-sized
-        structure support, float support, extension of return values
-        for i32 return values).</li>
-  <li>  Fixes in spill and reload code for vector registers.</li>
-  <li>  C++ exception handling enabled.</li>
-  <li>  Changes to remediate double-rounding compatibility issues with
-        respect to GCC behavior.</li>
-  <li>  Refactoring to disentangle ppc64-elf-linux ABI from Darwin
-        ppc64 ABI support.</li>
-  <li>  Assorted new test cases and test case fixes (endian and word
-        size issues).</li>
-  <li>  Fixes for big-endian codegen bugs, instruction encodings, and
-        instruction constraints.</li>
-  <li>  Implemented -integrated-as support.</li>
-  <li>  Additional support for Altivec compare operations.</li>
-  <li>  IBM long double support.</li>
-</ul>
-<p>There have also been code generation improvements for both 32- and 64-bit
-   code. Instruction scheduling support for the Freescale e500mc and e5500
-   cores has been added.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="NVPTX">PTX/NVPTX Target Improvements</a>
-</h3>
-
-<div>
-
-<p>The PTX back-end has been replaced by the NVPTX back-end, which is based on
-   the LLVM back-end used by NVIDIA in their CUDA (nvcc) and OpenCL compiler.
-   Some highlights include:</p>
-<ul>
-  <li>Compatibility with PTX 3.1 and SM 3.5</li>
-  <li>Support for NVVM intrinsics as defined in the NVIDIA Compiler SDK</li>
-  <li>Full compatibility with old PTX back-end, with much greater coverage of
-      LLVM IR</li>
-</ul>
-
-<p>Please submit any back-end bugs to the LLVM Bugzilla site.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="OtherTS">Other Target Specific Improvements</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>Added support for custom names for library functions in TargetLibraryInfo.</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="changes">Major Changes and Removed Features</a>
-</h3>
-
-<div>
-
-<p>If you're already an LLVM user or developer with out-of-tree changes based on
-   LLVM 3.2, this section lists some "gotchas" that you may run into upgrading
-   from the previous release.</p>
-
-<ul>
-<li>llvm-ld and llvm-stub have been removed, llvm-ld functionality can be partially replaced by 
-        llvm-link | opt | {llc | as, llc -filetype=obj} | ld, or fully replaced by Clang. </li>
-<li>MCJIT: added support for inline assembly (requires asm parser), added faux remote target execution to lli option '-remote-mcjit'.</li>
-</ul> 
- 
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="api_changes">Internal API Changes</a>
-</h3>
-
-<div>
-
-<p>In addition, many APIs have changed in this release.  Some of the major
-   LLVM API changes are:</p>
-
-<p> We've added a new interface for allowing IR-level passes to access
-  target-specific information. A new IR-level pass, called
-  "TargetTransformInfo" provides a number of low-level interfaces.
-  LSR and LowerInvoke already use the new interface. </p>
-
-<p> The TargetData structure has been renamed to DataLayout and moved to VMCore
-to remove a dependency on Target. </p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="tools_changes">Tools Changes</a>
-</h3>
-
-<div>
-
-<p>In addition, some tools have changed in this release. Some of the changes are:</p>
-
-<ul>
-<li>opt: added support for '-mtriple' option.</li>
-<li>llvm-mc : - added '-disassemble' support for '-show-inst' and '-show-encoding' options, added '-edis' option to produce annotated 
-        disassembly output for X86 and ARM targets.</li>
-<li>libprofile: allows the profile data file name to be specified by the LLVMPROF_OUTPUT environment variable.</li>
-<li>llvm-objdump: has been changed to display available targets, '-arch' option accepts x86 and x86-64 as valid arch names.</li>
-<li>llc and opt: added FMA formation from pairs of FADD + FMUL or FSUB + FMUL enabled by option '-enable-excess-fp-precision' or option '-enable-unsafe-fp-math',
-       option '-fp-contract' controls the creation by optimizations of fused FP by selecting Fast, Standard, or Strict mode.</li>
-<li>llc: object file output from llc is no longer considered experimental.</li>
-<li>gold plugin: handles Position Independent Executables.</li>
-</ul>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="knownproblems">Known Problems</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM is generally a production quality compiler, and is used by a broad range
-   of applications and shipping in many products.  That said, not every
-   subsystem is as mature as the aggregate, particularly the more obscure
-   targets.  If you run into a problem, please check
-   the <a href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
-   there isn't already one or ask on
-   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev
-   list</a>.</p>
-
-  <p>Known problem areas include:</p>
-
-<ul>
-  <li>The CellSPU, MSP430, and XCore backends are experimental, and the CellSPU backend will be removed in LLVM 3.3.</li>
-
-  <li>The integrated assembler, disassembler, and JIT is not supported by
-      several targets. If an integrated assembler is not supported, then a
-      system assembler is required.  For more details, see the <a
-      href="CodeGenerator.html#targetfeatures">Target Features Matrix</a>.
-  </li>
-</ul>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="additionalinfo">Additional Information</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>A wide variety of additional information is available on
-   the <a href="http://llvm.org/">LLVM web page</a>, in particular in
-   the <a href="http://llvm.org/docs/">documentation</a> section.  The web page
-   also contains versions of the API documentation which is up-to-date with the
-   Subversion version of the source code.  You can access versions of these
-   documents specific to this release by going into the "<tt>llvm/doc/</tt>"
-   directory in the LLVM tree.</p>
-
-<p>If you have any questions or comments about LLVM, please feel free to contact
-   us via the <a href="http://llvm.org/docs/#maillist"> mailing lists</a>.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-12-19 11:50:28 +0100 (Wed, 19 Dec 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
new file mode 100644
index 000000000000..3ca556025494
--- /dev/null
+++ b/docs/ReleaseNotes.rst
@@ -0,0 +1,144 @@
+======================
+LLVM 3.3 Release Notes
+======================
+
+.. contents::
+    :local:
+
+.. warning::
+   These are in-progress notes for the upcoming LLVM 3.3 release.  You may
+   prefer the `LLVM 3.2 Release Notes <http://llvm.org/releases/3.2/docs
+   /ReleaseNotes.html>`_.
+
+
+Introduction
+============
+
+This document contains the release notes for the LLVM Compiler Infrastructure,
+release 3.3.  Here we describe the status of LLVM, including major improvements
+from the previous release, improvements in various subprojects of LLVM, and
+some of the current users of the code.  All LLVM releases may be downloaded
+from the `LLVM releases web site <http://llvm.org/releases/>`_.
+
+For more information about LLVM, including information about the latest
+release, please check out the `main LLVM web site <http://llvm.org/>`_.  If you
+have questions or comments, the `LLVM Developer's Mailing List
+<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
+them.
+
+Note that if you are reading this file from a Subversion checkout or the main
+LLVM web page, this document applies to the *next* release, not the current
+one.  To see the release notes for a specific release, please see the `releases
+page <http://llvm.org/releases/>`_.
+
+Non-comprehensive list of changes in this release
+=================================================
+
+.. NOTE
+   For small 1-3 sentence descriptions, just add an entry at the end of
+   this list. If your description won't fit comfortably in one bullet
+   point (e.g. maybe you would like to give an example of the
+   functionality, or simply have a lot to talk about), see the `NOTE` below
+   for adding a new subsection.
+
+* The CellSPU port has been removed.  It can still be found in older versions.
+
+* The IR-level extended linker APIs (for example, to link bitcode files out of
+  archives) have been removed. Any existing clients of these features should
+  move to using a linker with integrated LTO support.
+
+* LLVM and Clang's documentation has been migrated to the `Sphinx
+  <http://sphinx-doc.org/>`_ documentation generation system which uses
+  easy-to-write reStructuredText. See `llvm/docs/README.txt` for more
+  information.
+
+* TargetTransformInfo (TTI) is a new interface that can be used by IR-level
+  passes to obtain target-specific information, such as the costs of
+  instructions. Only "Lowering" passes such as LSR and the vectorizer are
+  allowed to use the TTI infrastructure.
+
+* We've improved the X86 and ARM cost model.
+
+* The Attributes classes have been completely rewritten and expanded. They now
+  support not only enumerated attributes and alignments, but "string"
+  attributes, which are useful for passing information to code generation. See
+  :doc:`HowToUseAttributes` for more details.
+
+* TableGen's syntax for instruction selection patterns has been simplified.
+  Instead of specifying types indirectly with register classes, you should now
+  specify types directly in the input patterns. See ``SparcInstrInfo.td`` for
+  examples of the new syntax. The old syntax using register classes still
+  works, but it will be removed in a future LLVM release.
+
+* ... next change ...
+
+.. NOTE
+   If you would like to document a larger change, then you can add a
+   subsection about it right here. You can copy the following boilerplate
+   and un-indent it (the indentation causes it to be inside this comment).
+
+   Special New Feature
+   -------------------
+
+   Makes programs 10x faster by doing Special New Thing.
+
+AArch64 target
+--------------
+
+We've added support for AArch64, ARM's 64-bit architecture. Development is still
+in fairly early stages, but we expect successful compilation when:
+
+- compiling standard compliant C99 and C++03 with Clang;
+- using Linux as a target platform;
+- where code + static data doesn't exceed 4GB in size (heap allocated data has
+  no limitation).
+
+Some additional functionality is also implemented, notably DWARF debugging,
+GNU-style thread local storage and inline assembly.
+
+Hexagon Target
+--------------
+
+- Removed support for legacy hexagonv2 and hexagonv3 processor
+  architectures which are no longer in use. Currently supported
+  architectures are hexagonv4 and hexagonv5.
+
+Loop Vectorizer
+---------------
+
+We've continued the work on the loop vectorizer. The loop vectorizer now
+has the following features:
+
+- Loops with unknown trip count.
+- Runtime checks of pointers
+- Reductions, Inductions
+- If Conversion
+- Pointer induction variables
+- Reverse iterators
+- Vectorization of mixed types
+- Vectorization of function calls
+- Partial unrolling during vectorization
+
+R600 Backend
+------------
+
+The R600 backend was added in this release, it supports AMD GPUs
+(HD2XXX - HD7XXX).  This backend is used in AMD's Open Source
+graphics / compute drivers which are developed as part of the `Mesa3D
+<http://www.mesa3d.org>`_ project.
+
+
+
+Additional Information
+======================
+
+A wide variety of additional information is available on the `LLVM web page
+<http://llvm.org/>`_, in particular in the `documentation
+<http://llvm.org/docs/>`_ section.  The web page also contains versions of the
+API documentation which is up-to-date with the Subversion version of the source
+code.  You can access versions of these documents specific to this release by
+going into the ``llvm/docs/`` directory in the LLVM tree.
+
+If you have any questions or comments about LLVM, please feel free to contact
+us via the `mailing lists <http://llvm.org/docs/#maillist>`_.
+
diff --git a/docs/SegmentedStacks.rst b/docs/SegmentedStacks.rst
index f97d62abda04..e44ce42313cb 100644
--- a/docs/SegmentedStacks.rst
+++ b/docs/SegmentedStacks.rst
@@ -1,5 +1,3 @@
-.. _segmented_stacks:
-
 ========================
 Segmented Stacks in LLVM
 ========================
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
deleted file mode 100644
index 1dcee54f0bf9..000000000000
--- a/docs/SourceLevelDebugging.html
+++ /dev/null
@@ -1,2858 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>Source Level Debugging with LLVM</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>Source Level Debugging with LLVM</h1>
-
-<table class="layout" style="width:100%">
-  <tr class="layout">
-    <td class="left">
-<ul>
-  <li><a href="#introduction">Introduction</a>
-  <ol>
-    <li><a href="#phil">Philosophy behind LLVM debugging information</a></li>
-    <li><a href="#consumers">Debug information consumers</a></li>
-    <li><a href="#debugopt">Debugging optimized code</a></li>
-  </ol></li>
-  <li><a href="#format">Debugging information format</a>
-  <ol>
-    <li><a href="#debug_info_descriptors">Debug information descriptors</a>
-    <ul>
-      <li><a href="#format_compile_units">Compile unit descriptors</a></li>
-      <li><a href="#format_files">File descriptors</a></li>
-      <li><a href="#format_global_variables">Global variable descriptors</a></li>
-      <li><a href="#format_subprograms">Subprogram descriptors</a></li>
-      <li><a href="#format_blocks">Block descriptors</a></li>
-      <li><a href="#format_basic_type">Basic type descriptors</a></li>
-      <li><a href="#format_derived_type">Derived type descriptors</a></li>
-      <li><a href="#format_composite_type">Composite type descriptors</a></li>
-      <li><a href="#format_subrange">Subrange descriptors</a></li>
-      <li><a href="#format_enumeration">Enumerator descriptors</a></li>
-      <li><a href="#format_variables">Local variables</a></li>
-    </ul></li>
-    <li><a href="#format_common_intrinsics">Debugger intrinsic functions</a>
-      <ul>
-      <li><a href="#format_common_declare">llvm.dbg.declare</a></li>
-      <li><a href="#format_common_value">llvm.dbg.value</a></li>
-    </ul></li>
-  </ol></li>
-  <li><a href="#format_common_lifetime">Object lifetimes and scoping</a></li>
-  <li><a href="#ccxx_frontend">C/C++ front-end specific debug information</a>
-  <ol>
-    <li><a href="#ccxx_compile_units">C/C++ source file information</a></li>
-    <li><a href="#ccxx_global_variable">C/C++ global variable information</a></li>
-    <li><a href="#ccxx_subprogram">C/C++ function information</a></li>
-    <li><a href="#ccxx_basic_types">C/C++ basic types</a></li>
-    <li><a href="#ccxx_derived_types">C/C++ derived types</a></li>
-    <li><a href="#ccxx_composite_types">C/C++ struct/union types</a></li>
-    <li><a href="#ccxx_enumeration_types">C/C++ enumeration types</a></li>
-  </ol></li>
-  <li><a href="#llvmdwarfextension">LLVM Dwarf Extensions</a>
-    <ol>
-      <li><a href="#objcproperty">Debugging Information Extension
-	  for Objective C Properties</a>
-        <ul>
-	  <li><a href="#objcpropertyintroduction">Introduction</a></li>
-	  <li><a href="#objcpropertyproposal">Proposal</a></li>
-	  <li><a href="#objcpropertynewattributes">New DWARF Attributes</a></li>
-	  <li><a href="#objcpropertynewconstants">New DWARF Constants</a></li>
-        </ul>
-      </li>
-      <li><a href="#acceltable">Name Accelerator Tables</a>
-        <ul>
-          <li><a href="#acceltableintroduction">Introduction</a></li>
-          <li><a href="#acceltablehashes">Hash Tables</a></li>
-          <li><a href="#acceltabledetails">Details</a></li>
-          <li><a href="#acceltablecontents">Contents</a></li>
-          <li><a href="#acceltableextensions">Language Extensions and File Format Changes</a></li>
-        </ul>
-      </li>
-    </ol>
-  </li>
-</ul>
-</td>
-</tr></table>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-            and <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document is the central repository for all information pertaining to
-   debug information in LLVM.  It describes the <a href="#format">actual format
-   that the LLVM debug information</a> takes, which is useful for those
-   interested in creating front-ends or dealing directly with the information.
-   Further, this document provides specific examples of what debug information
-   for C/C++ looks like.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="phil">Philosophy behind LLVM debugging information</a>
-</h3>
-
-<div>
-
-<p>The idea of the LLVM debugging information is to capture how the important
-   pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
-   Several design aspects have shaped the solution that appears here.  The
-   important ones are:</p>
-
-<ul>
-  <li>Debugging information should have very little impact on the rest of the
-      compiler.  No transformations, analyses, or code generators should need to
-      be modified because of debugging information.</li>
-
-  <li>LLVM optimizations should interact in <a href="#debugopt">well-defined and
-      easily described ways</a> with the debugging information.</li>
-
-  <li>Because LLVM is designed to support arbitrary programming languages,
-      LLVM-to-LLVM tools should not need to know anything about the semantics of
-      the source-level-language.</li>
-
-  <li>Source-level languages are often <b>widely</b> different from one another.
-      LLVM should not put any restrictions of the flavor of the source-language,
-      and the debugging information should work with any language.</li>
-
-  <li>With code generator support, it should be possible to use an LLVM compiler
-      to compile a program to native machine code and standard debugging
-      formats.  This allows compatibility with traditional machine-code level
-      debuggers, like GDB or DBX.</li>
-</ul>
-
-<p>The approach used by the LLVM implementation is to use a small set
-   of <a href="#format_common_intrinsics">intrinsic functions</a> to define a
-   mapping between LLVM program objects and the source-level objects.  The
-   description of the source-level program is maintained in LLVM metadata
-   in an <a href="#ccxx_frontend">implementation-defined format</a>
-   (the C/C++ front-end currently uses working draft 7 of
-   the <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3
-   standard</a>).</p>
-
-<p>When a program is being debugged, a debugger interacts with the user and
-   turns the stored debug information into source-language specific information.
-   As such, a debugger must be aware of the source-language, and is thus tied to
-   a specific language or family of languages.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="consumers">Debug information consumers</a>
-</h3>
-
-<div>
-
-<p>The role of debug information is to provide meta information normally
-   stripped away during the compilation process.  This meta information provides
-   an LLVM user a relationship between generated code and the original program
-   source code.</p>
-
-<p>Currently, debug information is consumed by DwarfDebug to produce dwarf
-   information used by the gdb debugger.  Other targets could use the same
-   information to produce stabs or other debug forms.</p>
-
-<p>It would also be reasonable to use debug information to feed profiling tools
-   for analysis of generated code, or, tools for reconstructing the original
-   source from generated code.</p>
-
-<p>TODO - expound a bit more.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="debugopt">Debugging optimized code</a>
-</h3>
-
-<div>
-
-<p>An extremely high priority of LLVM debugging information is to make it
-   interact well with optimizations and analysis.  In particular, the LLVM debug
-   information provides the following guarantees:</p>
-
-<ul>
-  <li>LLVM debug information <b>always provides information to accurately read
-      the source-level state of the program</b>, regardless of which LLVM
-      optimizations have been run, and without any modification to the
-      optimizations themselves.  However, some optimizations may impact the
-      ability to modify the current state of the program with a debugger, such
-      as setting program variables, or calling functions that have been
-      deleted.</li>
-
-  <li>As desired, LLVM optimizations can be upgraded to be aware of the LLVM
-      debugging information, allowing them to update the debugging information
-      as they perform aggressive optimizations.  This means that, with effort,
-      the LLVM optimizers could optimize debug code just as well as non-debug
-      code.</li>
-
-  <li>LLVM debug information does not prevent optimizations from
-      happening (for example inlining, basic block reordering/merging/cleanup,
-      tail duplication, etc).</li>
-
-  <li>LLVM debug information is automatically optimized along with the rest of
-      the program, using existing facilities.  For example, duplicate
-      information is automatically merged by the linker, and unused information
-      is automatically removed.</li>
-</ul>
-
-<p>Basically, the debug information allows you to compile a program with
-   "<tt>-O0 -g</tt>" and get full debug information, allowing you to arbitrarily
-   modify the program as it executes from a debugger.  Compiling a program with
-   "<tt>-O3 -g</tt>" gives you full debug information that is always available
-   and accurate for reading (e.g., you get accurate stack traces despite tail
-   call elimination and inlining), but you might lose the ability to modify the
-   program and call functions where were optimized out of the program, or
-   inlined away completely.</p>
-
-<p><a href="TestingGuide.html#quicktestsuite">LLVM test suite</a> provides a
-   framework to test optimizer's handling of debugging information. It can be
-   run like this:</p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
-% make TEST=dbgopt
-</pre>
-</div>
-
-<p>This will test impact of debugging information on optimization passes. If
-   debugging information influences optimization passes then it will be reported
-   as a failure. See <a href="TestingGuide.html">TestingGuide</a> for more
-   information on LLVM test infrastructure and how to run various tests.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="format">Debugging information format</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM debugging information has been carefully designed to make it possible
-   for the optimizer to optimize the program and debugging information without
-   necessarily having to know anything about debugging information.  In
-   particular, the use of metadata avoids duplicated debugging information from
-   the beginning, and the global dead code elimination pass automatically
-   deletes debugging information for a function if it decides to delete the
-   function. </p>
-
-<p>To do this, most of the debugging information (descriptors for types,
-   variables, functions, source files, etc) is inserted by the language
-   front-end in the form of LLVM metadata. </p>
-
-<p>Debug information is designed to be agnostic about the target debugger and
-   debugging information representation (e.g. DWARF/Stabs/etc).  It uses a
-   generic pass to decode the information that represents variables, types,
-   functions, namespaces, etc: this allows for arbitrary source-language
-   semantics and type-systems to be used, as long as there is a module
-   written for the target debugger to interpret the information. </p>
-
-<p>To provide basic functionality, the LLVM debugger does have to make some
-   assumptions about the source-level language being debugged, though it keeps
-   these to a minimum.  The only common features that the LLVM debugger assumes
-   exist are <a href="#format_files">source files</a>,
-   and <a href="#format_global_variables">program objects</a>.  These abstract
-   objects are used by a debugger to form stack traces, show information about
-   local variables, etc.</p>
-
-<p>This section of the documentation first describes the representation aspects
-   common to any source-language.  The <a href="#ccxx_frontend">next section</a>
-   describes the data layout conventions used by the C and C++ front-ends.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="debug_info_descriptors">Debug information descriptors</a>
-</h3>
-
-<div>
-
-<p>In consideration of the complexity and volume of debug information, LLVM
-   provides a specification for well formed debug descriptors. </p>
-
-<p>Consumers of LLVM debug information expect the descriptors for program
-   objects to start in a canonical format, but the descriptors can include
-   additional information appended at the end that is source-language
-   specific. All LLVM debugging information is versioned, allowing backwards
-   compatibility in the case that the core structures need to change in some
-   way.  Also, all debugging information objects start with a tag to indicate
-   what type of object it is.  The source-language is allowed to define its own
-   objects, by using unreserved tag numbers.  We recommend using with tags in
-   the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base =
-   0x1000.)</p>
-
-<p>The fields of debug descriptors used internally by LLVM
-   are restricted to only the simple data types <tt>i32</tt>, <tt>i1</tt>,
-   <tt>float</tt>, <tt>double</tt>, <tt>mdstring</tt> and <tt>mdnode</tt>. </p>
-
-<div class="doc_code">
-<pre>
-!1 = metadata !{
-  i32,   ;; A tag
-  ...
-}
-</pre>
-</div>
-
-<p><a name="LLVMDebugVersion">The first field of a descriptor is always an
-   <tt>i32</tt> containing a tag value identifying the content of the
-   descriptor.  The remaining fields are specific to the descriptor.  The values
-   of tags are loosely bound to the tag values of DWARF information entries.
-   However, that does not restrict the use of the information supplied to DWARF
-   targets.  To facilitate versioning of debug information, the tag is augmented
-   with the current debug version (LLVMDebugVersion = 8 &lt;&lt; 16 or
-   0x80000 or 524288.)</a></p>
-
-<p>The details of the various descriptors follow.</p>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_compile_units">Compile unit descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{
-  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-             ;; (DW_TAG_compile_unit)
-  i32,       ;; Unused field.
-  i32,       ;; DWARF language identifier (ex. DW_LANG_C89)
-  metadata,  ;; Source file name
-  metadata,  ;; Source file directory (includes trailing slash)
-  metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
-  i1,        ;; True if this is a main compile unit.
-  i1,        ;; True if this is optimized.
-  metadata,  ;; Flags
-  i32        ;; Runtime version
-  metadata   ;; List of enums types
-  metadata   ;; List of retained types
-  metadata   ;; List of subprograms
-  metadata   ;; List of global variables
-}
-</pre>
-</div>
-
-<p>These descriptors contain a source language ID for the file (we use the DWARF
-   3.0 ID numbers, such as <tt>DW_LANG_C89</tt>, <tt>DW_LANG_C_plus_plus</tt>,
-   <tt>DW_LANG_Cobol74</tt>, etc), three strings describing the filename,
-   working directory of the compiler, and an identifier string for the compiler
-   that produced it.</p>
-
-<p>Compile unit descriptors provide the root context for objects declared in a
-   specific compilation unit. File descriptors are defined using this context.
-   These descriptors are collected by a named metadata
-   <tt>!llvm.dbg.cu</tt>. Compile unit descriptor keeps track of subprograms,
-   global variables and type information.
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_files">File descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{
-  i32,       ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-             ;; (DW_TAG_file_type)
-  metadata,  ;; Source file name
-  metadata,  ;; Source file directory (includes trailing slash)
-  metadata   ;; Unused
-}
-</pre>
-</div>
-
-<p>These descriptors contain information for a file. Global variables and top
-   level functions would be defined using this context.k File descriptors also
-   provide context for source line correspondence. </p>
-
-<p>Each input file is encoded as a separate file descriptor in LLVM debugging
-   information output. </p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_global_variables">Global variable descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!1 = metadata !{
-  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-            ;; (DW_TAG_variable)
-  i32,      ;; Unused field.
-  metadata, ;; Reference to context descriptor
-  metadata, ;; Name
-  metadata, ;; Display name (fully qualified C++ name)
-  metadata, ;; MIPS linkage name (for C++)
-  metadata, ;; Reference to file where defined
-  i32,      ;; Line number where defined
-  metadata, ;; Reference to type descriptor
-  i1,       ;; True if the global is local to compile unit (static)
-  i1,       ;; True if the global is defined in the compile unit (not extern)
-  {}*       ;; Reference to the global variable
-}
-</pre>
-</div>
-
-<p>These descriptors provide debug information about globals variables.  The
-provide details such as name, type and where the variable is defined. All
-global variables are collected inside the named metadata
-<tt>!llvm.dbg.cu</tt>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_subprograms">Subprogram descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32,      ;; Tag = 46 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-            ;; (DW_TAG_subprogram)
-  i32,      ;; Unused field.
-  metadata, ;; Reference to context descriptor
-  metadata, ;; Name
-  metadata, ;; Display name (fully qualified C++ name)
-  metadata, ;; MIPS linkage name (for C++)
-  metadata, ;; Reference to file where defined
-  i32,      ;; Line number where defined
-  metadata, ;; Reference to type descriptor
-  i1,       ;; True if the global is local to compile unit (static)
-  i1,       ;; True if the global is defined in the compile unit (not extern)
-  i32,      ;; Line number where the scope of the subprogram begins
-  i32,      ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
-  i32,      ;; Index into a virtual function
-  metadata, ;; indicates which base type contains the vtable pointer for the
-            ;; derived class
-  i32,      ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
-  i1,       ;; isOptimized
-  Function *,;; Pointer to LLVM function
-  metadata, ;; Lists function template parameters
-  metadata  ;; Function declaration descriptor
-  metadata  ;; List of function variables
-}
-</pre>
-</div>
-
-<p>These descriptors provide debug information about functions, methods and
-   subprograms.  They provide details such as name, return types and the source
-   location where the subprogram is defined.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_blocks">Block descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!3 = metadata !{
-  i32,     ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
-  metadata,;; Reference to context descriptor
-  i32,     ;; Line number
-  i32,     ;; Column number
-  metadata,;; Reference to source file
-  i32      ;; Unique ID to identify blocks from a template function
-}
-</pre>
-</div>
-
-<p>This descriptor provides debug information about nested blocks within a
-   subprogram. The line number and column numbers are used to dinstinguish
-   two lexical blocks at same depth. </p>
-
-<div class="doc_code">
-<pre>
-!3 = metadata !{
-  i32,     ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
-  metadata ;; Reference to the scope we're annotating with a file change
-  metadata,;; Reference to the file the scope is enclosed in.
-}
-</pre>
-</div>
-
-<p>This descriptor provides a wrapper around a lexical scope to handle file
-   changes in the middle of a lexical block.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_basic_type">Basic type descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!4 = metadata !{
-  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-            ;; (DW_TAG_base_type)
-  metadata, ;; Reference to context
-  metadata, ;; Name (may be "" for anonymous types)
-  metadata, ;; Reference to file where defined (may be NULL)
-  i32,      ;; Line number where defined (may be 0)
-  i64,      ;; Size in bits
-  i64,      ;; Alignment in bits
-  i64,      ;; Offset in bits
-  i32,      ;; Flags
-  i32       ;; DWARF type encoding
-}
-</pre>
-</div>
-
-<p>These descriptors define primitive types used in the code. Example int, bool
-   and float.  The context provides the scope of the type, which is usually the
-   top level.  Since basic types are not usually user defined the context
-   and line number can be left as NULL and 0.  The size, alignment and offset
-   are expressed in bits and can be 64 bit values.  The alignment is used to
-   round the offset when embedded in a
-   <a href="#format_composite_type">composite type</a> (example to keep float
-   doubles on 64 bit boundaries.) The offset is the bit offset if embedded in
-   a <a href="#format_composite_type">composite type</a>.</p>
-
-<p>The type encoding provides the details of the type.  The values are typically
-   one of the following:</p>
-
-<div class="doc_code">
-<pre>
-DW_ATE_address       = 1
-DW_ATE_boolean       = 2
-DW_ATE_float         = 4
-DW_ATE_signed        = 5
-DW_ATE_signed_char   = 6
-DW_ATE_unsigned      = 7
-DW_ATE_unsigned_char = 8
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_derived_type">Derived type descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!5 = metadata !{
-  i32,      ;; Tag (see below)
-  metadata, ;; Reference to context
-  metadata, ;; Name (may be "" for anonymous types)
-  metadata, ;; Reference to file where defined (may be NULL)
-  i32,      ;; Line number where defined (may be 0)
-  i64,      ;; Size in bits
-  i64,      ;; Alignment in bits
-  i64,      ;; Offset in bits
-  i32,      ;; Flags to encode attributes, e.g. private
-  metadata, ;; Reference to type derived from
-  metadata, ;; (optional) Name of the Objective C property associated with
-            ;; Objective-C an ivar
-  metadata, ;; (optional) Name of the Objective C property getter selector.
-  metadata, ;; (optional) Name of the Objective C property setter selector.
-  i32       ;; (optional) Objective C property attributes.
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define types derived from other types.  The
-value of the tag varies depending on the meaning.  The following are possible
-tag values:</p>
-
-<div class="doc_code">
-<pre>
-DW_TAG_formal_parameter = 5
-DW_TAG_member           = 13
-DW_TAG_pointer_type     = 15
-DW_TAG_reference_type   = 16
-DW_TAG_typedef          = 22
-DW_TAG_const_type       = 38
-DW_TAG_volatile_type    = 53
-DW_TAG_restrict_type    = 55
-</pre>
-</div>
-
-<p><tt>DW_TAG_member</tt> is used to define a member of
-   a <a href="#format_composite_type">composite type</a>
-   or <a href="#format_subprograms">subprogram</a>.  The type of the member is
-   the <a href="#format_derived_type">derived
-   type</a>. <tt>DW_TAG_formal_parameter</tt> is used to define a member which
-   is a formal argument of a subprogram.</p>
-
-<p><tt>DW_TAG_typedef</tt> is used to provide a name for the derived type.</p>
-
-<p><tt>DW_TAG_pointer_type</tt>, <tt>DW_TAG_reference_type</tt>,
-   <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt> and
-   <tt>DW_TAG_restrict_type</tt> are used to qualify
-   the <a href="#format_derived_type">derived type</a>. </p>
-
-<p><a href="#format_derived_type">Derived type</a> location can be determined
-   from the context and line number.  The size, alignment and offset are
-   expressed in bits and can be 64 bit values.  The alignment is used to round
-   the offset when embedded in a <a href="#format_composite_type">composite
-   type</a> (example to keep float doubles on 64 bit boundaries.) The offset is
-   the bit offset if embedded in a <a href="#format_composite_type">composite
-   type</a>.</p>
-
-<p>Note that the <tt>void *</tt> type is expressed as a type derived from NULL.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_composite_type">Composite type descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!6 = metadata !{
-  i32,      ;; Tag (see below)
-  metadata, ;; Reference to context
-  metadata, ;; Name (may be "" for anonymous types)
-  metadata, ;; Reference to file where defined (may be NULL)
-  i32,      ;; Line number where defined (may be 0)
-  i64,      ;; Size in bits
-  i64,      ;; Alignment in bits
-  i64,      ;; Offset in bits
-  i32,      ;; Flags
-  metadata, ;; Reference to type derived from
-  metadata, ;; Reference to array of member descriptors
-  i32       ;; Runtime languages
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define types that are composed of 0 or more
-elements.  The value of the tag varies depending on the meaning.  The following
-are possible tag values:</p>
-
-<div class="doc_code">
-<pre>
-DW_TAG_array_type       = 1
-DW_TAG_enumeration_type = 4
-DW_TAG_structure_type   = 19
-DW_TAG_union_type       = 23
-DW_TAG_vector_type      = 259
-DW_TAG_subroutine_type  = 21
-DW_TAG_inheritance      = 28
-</pre>
-</div>
-
-<p>The vector flag indicates that an array type is a native packed vector.</p>
-
-<p>The members of array types (tag = <tt>DW_TAG_array_type</tt>) or vector types
-   (tag = <tt>DW_TAG_vector_type</tt>) are <a href="#format_subrange">subrange
-   descriptors</a>, each representing the range of subscripts at that level of
-   indexing.</p>
-
-<p>The members of enumeration types (tag = <tt>DW_TAG_enumeration_type</tt>) are
-   <a href="#format_enumeration">enumerator descriptors</a>, each representing
-   the definition of enumeration value for the set. All enumeration type
-   descriptors are collected inside the named metadata
-   <tt>!llvm.dbg.cu</tt>.</p>
-
-<p>The members of structure (tag = <tt>DW_TAG_structure_type</tt>) or union (tag
-   = <tt>DW_TAG_union_type</tt>) types are any one of
-   the <a href="#format_basic_type">basic</a>,
-   <a href="#format_derived_type">derived</a>
-   or <a href="#format_composite_type">composite</a> type descriptors, each
-   representing a field member of the structure or union.</p>
-
-<p>For C++ classes (tag = <tt>DW_TAG_structure_type</tt>), member descriptors
-   provide information about base classes, static members and member
-   functions. If a member is a <a href="#format_derived_type">derived type
-   descriptor</a> and has a tag of <tt>DW_TAG_inheritance</tt>, then the type
-   represents a base class. If the member of is
-   a <a href="#format_global_variables">global variable descriptor</a> then it
-   represents a static member.  And, if the member is
-   a <a href="#format_subprograms">subprogram descriptor</a> then it represents
-   a member function.  For static members and member
-   functions, <tt>getName()</tt> returns the members link or the C++ mangled
-   name.  <tt>getDisplayName()</tt> the simplied version of the name.</p>
-
-<p>The first member of subroutine (tag = <tt>DW_TAG_subroutine_type</tt>) type
-   elements is the return type for the subroutine.  The remaining elements are
-   the formal arguments to the subroutine.</p>
-
-<p><a href="#format_composite_type">Composite type</a> location can be
-   determined from the context and line number.  The size, alignment and
-   offset are expressed in bits and can be 64 bit values.  The alignment is used
-   to round the offset when embedded in
-   a <a href="#format_composite_type">composite type</a> (as an example, to keep
-   float doubles on 64 bit boundaries.) The offset is the bit offset if embedded
-   in a <a href="#format_composite_type">composite type</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_subrange">Subrange descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!42 = metadata !{
-  i32,    ;; Tag = 33 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subrange_type)
-  i64,    ;; Low value
-  i64     ;; High value
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define ranges of array subscripts for an array
-   <a href="#format_composite_type">composite type</a>.  The low value defines
-   the lower bounds typically zero for C/C++.  The high value is the upper
-   bounds.  Values are 64 bit.  High - low + 1 is the size of the array.  If low
-   > high the array bounds are not included in generated debugging information.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_enumeration">Enumerator descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!6 = metadata !{
-  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-            ;; (DW_TAG_enumerator)
-  metadata, ;; Name
-  i64       ;; Value
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define members of an
-   enumeration <a href="#format_composite_type">composite type</a>, it
-   associates the name to the value.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_variables">Local variables</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!7 = metadata !{
-  i32,      ;; Tag (see below)
-  metadata, ;; Context
-  metadata, ;; Name
-  metadata, ;; Reference to file where defined
-  i32,      ;; 24 bit - Line number where defined
-            ;; 8 bit - Argument number. 1 indicates 1st argument.
-  metadata, ;; Type descriptor
-  i32,      ;; flags
-  metadata  ;; (optional) Reference to inline location
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define variables local to a sub program.  The
-   value of the tag depends on the usage of the variable:</p>
-
-<div class="doc_code">
-<pre>
-DW_TAG_auto_variable   = 256
-DW_TAG_arg_variable    = 257
-DW_TAG_return_variable = 258
-</pre>
-</div>
-
-<p>An auto variable is any variable declared in the body of the function.  An
-   argument variable is any variable that appears as a formal argument to the
-   function.  A return variable is used to track the result of a function and
-   has no source correspondent.</p>
-
-<p>The context is either the subprogram or block where the variable is defined.
-   Name the source variable name.  Context and line indicate where the
-   variable was defined. Type descriptor defines the declared type of the
-   variable.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="format_common_intrinsics">Debugger intrinsic functions</a>
-</h3>
-
-<div>
-
-<p>LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to
-   provide debug information at various points in generated code.</p>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_common_declare">llvm.dbg.declare</a>
-</h4>
-
-<div>
-<pre>
-  void %<a href="#format_common_declare">llvm.dbg.declare</a>(metadata, metadata)
-</pre>
-
-<p>This intrinsic provides information about a local element (e.g., variable). The
-   first argument is metadata holding the alloca for the variable. The
-   second argument is metadata containing a description of the variable.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="format_common_value">llvm.dbg.value</a>
-</h4>
-
-<div>
-<pre>
-  void %<a href="#format_common_value">llvm.dbg.value</a>(metadata, i64, metadata)
-</pre>
-
-<p>This intrinsic provides information when a user source variable is set to a
-   new value.  The first argument is the new value (wrapped as metadata).  The
-   second argument is the offset in the user source variable where the new value
-   is written.  The third argument is metadata containing a description of the
-   user source variable.</p>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="format_common_lifetime">Object lifetimes and scoping</a>
-</h3>
-
-<div>
-<p>In many languages, the local variables in functions can have their lifetimes
-   or scopes limited to a subset of a function.  In the C family of languages,
-   for example, variables are only live (readable and writable) within the
-   source block that they are defined in.  In functional languages, values are
-   only readable after they have been defined.  Though this is a very obvious
-   concept, it is non-trivial to model in LLVM, because it has no notion of
-   scoping in this sense, and does not want to be tied to a language's scoping
-   rules.</p>
-
-<p>In order to handle this, the LLVM debug format uses the metadata attached to
-   llvm instructions to encode line number and scoping information. Consider
-   the following C fragment, for example:</p>
-
-<div class="doc_code">
-<pre>
-1.  void foo() {
-2.    int X = 21;
-3.    int Y = 22;
-4.    {
-5.      int Z = 23;
-6.      Z = X;
-7.    }
-8.    X = Y;
-9.  }
-</pre>
-</div>
-
-<p>Compiled to LLVM, this function would be represented like this:</p>
-
-<div class="doc_code">
-<pre>
-define void @foo() nounwind ssp {
-entry:
-  %X = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=4]
-  %Y = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=4]
-  %Z = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=3]
-  %0 = bitcast i32* %X to {}*                     ; &lt;{}*&gt; [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
-  store i32 21, i32* %X, !dbg !8
-  %1 = bitcast i32* %Y to {}*                     ; &lt;{}*&gt; [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
-  store i32 22, i32* %Y, !dbg !11
-  %2 = bitcast i32* %Z to {}*                     ; &lt;{}*&gt; [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
-  store i32 23, i32* %Z, !dbg !15
-  %tmp = load i32* %X, !dbg !16                   ; &lt;i32&gt; [#uses=1]
-  %tmp1 = load i32* %Y, !dbg !16                  ; &lt;i32&gt; [#uses=1]
-  %add = add nsw i32 %tmp, %tmp1, !dbg !16        ; &lt;i32&gt; [#uses=1]
-  store i32 %add, i32* %Z, !dbg !16
-  %tmp2 = load i32* %Y, !dbg !17                  ; &lt;i32&gt; [#uses=1]
-  store i32 %tmp2, i32* %X, !dbg !17
-  ret void, !dbg !18
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!0 = metadata !{i32 459008, metadata !1, metadata !"X",
-                metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
-               metadata !"foo", metadata !3, i32 1, metadata !4,
-               i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
-                metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
-                i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
-                i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
-!5 = metadata !{null}
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
-                i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
-!7 = metadata !{i32 2, i32 7, metadata !1, null}
-!8 = metadata !{i32 2, i32 3, metadata !1, null}
-!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
-                metadata !6}; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 3, i32 7, metadata !1, null}
-!11 = metadata !{i32 3, i32 3, metadata !1, null}
-!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
-                 metadata !6}; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
-!14 = metadata !{i32 5, i32 9, metadata !13, null}
-!15 = metadata !{i32 5, i32 5, metadata !13, null}
-!16 = metadata !{i32 6, i32 5, metadata !13, null}
-!17 = metadata !{i32 8, i32 3, metadata !1, null}
-!18 = metadata !{i32 9, i32 1, metadata !2, null}
-</pre>
-</div>
-
-<p>This example illustrates a few important details about LLVM debugging
-   information. In particular, it shows how the <tt>llvm.dbg.declare</tt>
-   intrinsic and location information, which are attached to an instruction,
-   are applied together to allow a debugger to analyze the relationship between
-   statements, variable definitions, and the code used to implement the
-   function.</p>
-
-<div class="doc_code">
-<pre>
-call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
-</pre>
-</div>
-
-<p>The first intrinsic
-   <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
-   encodes debugging information for the variable <tt>X</tt>. The metadata
-   <tt>!dbg !7</tt> attached to the intrinsic provides scope information for the
-   variable <tt>X</tt>.</p>
-
-<div class="doc_code">
-<pre>
-!7 = metadata !{i32 2, i32 7, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
-                metadata !"foo", metadata !"foo", metadata !3, i32 1,
-                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
-</pre>
-</div>
-
-<p>Here <tt>!7</tt> is metadata providing location information. It has four
-   fields: line number, column number, scope, and original scope. The original
-   scope represents inline location if this instruction is inlined inside a
-   caller, and is null otherwise. In this example, scope is encoded by
-   <tt>!1</tt>. <tt>!1</tt> represents a lexical block inside the scope
-   <tt>!2</tt>, where <tt>!2</tt> is a
-   <a href="#format_subprograms">subprogram descriptor</a>. This way the
-   location information attached to the intrinsics indicates that the
-   variable <tt>X</tt> is declared at line number 2 at a function level scope in
-   function <tt>foo</tt>.</p>
-
-<p>Now lets take another example.</p>
-
-<div class="doc_code">
-<pre>
-call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
-</pre>
-</div>
-
-<p>The second intrinsic
-   <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
-   encodes debugging information for variable <tt>Z</tt>. The metadata
-   <tt>!dbg !14</tt> attached to the intrinsic provides scope information for
-   the variable <tt>Z</tt>.</p>
-
-<div class="doc_code">
-<pre>
-!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
-!14 = metadata !{i32 5, i32 9, metadata !13, null}
-</pre>
-</div>
-
-<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declared at line number 5 and
-   column number 9 inside of lexical scope <tt>!13</tt>. The lexical scope
-   itself resides inside of lexical scope <tt>!1</tt> described above.</p>
-
-<p>The scope information attached with each instruction provides a
-   straightforward way to find instructions covered by a scope.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="ccxx_frontend">C/C++ front-end specific debug information</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The C and C++ front-ends represent information about the program in a format
-   that is effectively identical
-   to <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3.0</a> in
-   terms of information content.  This allows code generators to trivially
-   support native debuggers by generating standard dwarf information, and
-   contains enough information for non-dwarf targets to translate it as
-   needed.</p>
-
-<p>This section describes the forms used to represent C and C++ programs. Other
-   languages could pattern themselves after this (which itself is tuned to
-   representing programs in the same way that DWARF 3 does), or they could
-   choose to provide completely different forms if they don't fit into the DWARF
-   model.  As support for debugging information gets added to the various LLVM
-   source-language front-ends, the information used should be documented
-   here.</p>
-
-<p>The following sections provide examples of various C/C++ constructs and the
-   debug information that would best describe those constructs.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_compile_units">C/C++ source file information</a>
-</h3>
-
-<div>
-
-<p>Given the source files <tt>MySource.cpp</tt> and <tt>MyHeader.h</tt> located
-   in the directory <tt>/Users/mine/sources</tt>, the following code:</p>
-
-<div class="doc_code">
-<pre>
-#include "MyHeader.h"
-
-int main(int argc, char *argv[]) {
-  return 0;
-}
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-...
-;;
-;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp".
-;;
-!2 = metadata !{
-  i32 524305,    ;; Tag
-  i32 0,         ;; Unused
-  i32 4,         ;; Language Id
-  metadata !"MySource.cpp",
-  metadata !"/Users/mine/sources",
-  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
-  i1 true,       ;; Main Compile Unit
-  i1 false,      ;; Optimized compile unit
-  metadata !"",  ;; Compiler flags
-  i32 0}         ;; Runtime version
-
-;;
-;; Define the file for the file "/Users/mine/sources/MySource.cpp".
-;;
-!1 = metadata !{
-  i32 524329,    ;; Tag
-  metadata !"MySource.cpp",
-  metadata !"/Users/mine/sources",
-  metadata !2    ;; Compile unit
-}
-
-;;
-;; Define the file for the file "/Users/mine/sources/Myheader.h"
-;;
-!3 = metadata !{
-  i32 524329,    ;; Tag
-  metadata !"Myheader.h"
-  metadata !"/Users/mine/sources",
-  metadata !2    ;; Compile unit
-}
-
-...
-</pre>
-</div>
-
-<p>llvm::Instruction provides easy access to metadata attached with an
-instruction. One can extract line number information encoded in LLVM IR
-using <tt>Instruction::getMetadata()</tt> and
-<tt>DILocation::getLineNumber()</tt>.
-<pre>
- if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
-   DILocation Loc(N);                      // DILocation is in DebugInfo.h
-   unsigned Line = Loc.getLineNumber();
-   StringRef File = Loc.getFilename();
-   StringRef Dir = Loc.getDirectory();
- }
-</pre>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_global_variable">C/C++ global variable information</a>
-</h3>
-
-<div>
-
-<p>Given an integer global variable declared as follows:</p>
-
-<div class="doc_code">
-<pre>
-int MyGlobal = 100;
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define the global itself.
-;;
-%MyGlobal = global int 100
-...
-;;
-;; List of debug info of globals
-;;
-!llvm.dbg.cu = !{!0}
-
-;; Define the compile unit.
-!0 = metadata !{
-  i32 786449,                       ;; Tag
-  i32 0,                            ;; Context
-  i32 4,                            ;; Language
-  metadata !"foo.cpp",              ;; File
-  metadata !"/Volumes/Data/tmp",    ;; Directory
-  metadata !"clang version 3.1 ",   ;; Producer
-  i1 true,                          ;; Deprecated field
-  i1 false,                         ;; "isOptimized"?
-  metadata !"",                     ;; Flags
-  i32 0,                            ;; Runtime Version
-  metadata !1,                      ;; Enum Types
-  metadata !1,                      ;; Retained Types
-  metadata !1,                      ;; Subprograms
-  metadata !3                       ;; Global Variables
-} ; [ DW_TAG_compile_unit ]
-
-;; The Array of Global Variables
-!3 = metadata !{
-  metadata !4
-}
-
-!4 = metadata !{
-  metadata !5
-}
-
-;;
-;; Define the global variable itself.
-;;
-!5 = metadata !{
-  i32 786484,                        ;; Tag
-  i32 0,                             ;; Unused
-  null,                              ;; Unused
-  metadata !"MyGlobal",              ;; Name
-  metadata !"MyGlobal",              ;; Display Name
-  metadata !"",                      ;; Linkage Name
-  metadata !6,                       ;; File
-  i32 1,                             ;; Line
-  metadata !7,                       ;; Type
-  i32 0,                             ;; IsLocalToUnit
-  i32 1,                             ;; IsDefinition
-  i32* @MyGlobal                     ;; LLVM-IR Value
-} ; [ DW_TAG_variable ]
-
-;;
-;; Define the file
-;;
-!6 = metadata !{
-  i32 786473,                        ;; Tag
-  metadata !"foo.cpp",               ;; File
-  metadata !"/Volumes/Data/tmp",     ;; Directory
-  null                               ;; Unused
-} ; [ DW_TAG_file_type ]
-
-;;
-;; Define the type
-;;
-!7 = metadata !{
-  i32 786468,                         ;; Tag
-  null,                               ;; Unused
-  metadata !"int",                    ;; Name
-  null,                               ;; Unused
-  i32 0,                              ;; Line
-  i64 32,                             ;; Size in Bits
-  i64 32,                             ;; Align in Bits
-  i64 0,                              ;; Offset
-  i32 0,                              ;; Flags
-  i32 5                               ;; Encoding
-} ; [ DW_TAG_base_type ]
-
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_subprogram">C/C++ function information</a>
-</h3>
-
-<div>
-
-<p>Given a function declared as follows:</p>
-
-<div class="doc_code">
-<pre>
-int main(int argc, char *argv[]) {
-  return 0;
-}
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define the anchor for subprograms.  Note that the second field of the
-;; anchor is 46, which is the same as the tag for subprograms
-;; (46 = DW_TAG_subprogram.)
-;;
-!6 = metadata !{
-  i32 524334,        ;; Tag
-  i32 0,             ;; Unused
-  metadata !1,       ;; Context
-  metadata !"main",  ;; Name
-  metadata !"main",  ;; Display name
-  metadata !"main",  ;; Linkage name
-  metadata !1,       ;; File
-  i32 1,             ;; Line number
-  metadata !4,       ;; Type
-  i1 false,          ;; Is local
-  i1 true,           ;; Is definition
-  i32 0,             ;; Virtuality attribute, e.g. pure virtual function
-  i32 0,             ;; Index into virtual table for C++ methods
-  i32 0,             ;; Type that holds virtual table.
-  i32 0,             ;; Flags
-  i1 false,          ;; True if this function is optimized
-  Function *,        ;; Pointer to llvm::Function
-  null               ;; Function template parameters
-}
-;;
-;; Define the subprogram itself.
-;;
-define i32 @main(i32 %argc, i8** %argv) {
-...
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_basic_types">C/C++ basic types</a>
-</h3>
-
-<div>
-
-<p>The following are the basic type descriptors for C/C++ core types:</p>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_type_bool">bool</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"bool",  ;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 8,             ;; Size in Bits
-  i64 8,             ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 2              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_char">char</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"char",  ;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 8,             ;; Size in Bits
-  i64 8,             ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 6              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_unsigned_char">unsigned char</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"unsigned char",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 8,             ;; Size in Bits
-  i64 8,             ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 8              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_short">short</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"short int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 16,            ;; Size in Bits
-  i64 16,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 5              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_unsigned_short">unsigned short</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"short unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 16,            ;; Size in Bits
-  i64 16,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_int">int</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"int",   ;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 5              ;; Encoding
-}
-</pre></div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_unsigned_int">unsigned int</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_long_long">long long</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"long long int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 64,            ;; Size in Bits
-  i64 64,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 5              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_unsigned_long_long">unsigned long long</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"long long unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 64,            ;; Size in Bits
-  i64 64,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_float">float</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"float",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 4              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="ccxx_basic_double">double</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"double",;; Name
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 64,            ;; Size in Bits
-  i64 64,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 4              ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_derived_types">C/C++ derived types</a>
-</h3>
-
-<div>
-
-<p>Given the following as an example of C/C++ derived type:</p>
-
-<div class="doc_code">
-<pre>
-typedef const int *IntPtr;
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define the typedef "IntPtr".
-;;
-!2 = metadata !{
-  i32 524310,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"IntPtr",  ;; Name
-  metadata !3,         ;; File
-  i32 0,               ;; Line number
-  i64 0,               ;; Size in bits
-  i64 0,               ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  metadata !4          ;; Derived From type
-}
-
-;;
-;; Define the pointer type.
-;;
-!4 = metadata !{
-  i32 524303,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"",        ;; Name
-  metadata !1,         ;; File
-  i32 0,               ;; Line number
-  i64 64,              ;; Size in bits
-  i64 64,              ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  metadata !5          ;; Derived From type
-}
-;;
-;; Define the const type.
-;;
-!5 = metadata !{
-  i32 524326,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"",        ;; Name
-  metadata !1,         ;; File
-  i32 0,               ;; Line number
-  i64 32,              ;; Size in bits
-  i64 32,              ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  metadata !6          ;; Derived From type
-}
-;;
-;; Define the int type.
-;;
-!6 = metadata !{
-  i32 524324,          ;; Tag
-  metadata !1,         ;; Context
-  metadata !"int",     ;; Name
-  metadata !1,         ;; File
-  i32 0,               ;; Line number
-  i64 32,              ;; Size in bits
-  i64 32,              ;; Align in bits
-  i64 0,               ;; Offset in bits
-  i32 0,               ;; Flags
-  5                    ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_composite_types">C/C++ struct/union types</a>
-</h3>
-
-<div>
-
-<p>Given the following as an example of C/C++ struct type:</p>
-
-<div class="doc_code">
-<pre>
-struct Color {
-  unsigned Red;
-  unsigned Green;
-  unsigned Blue;
-};
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define basic type for unsigned int.
-;;
-!5 = metadata !{
-  i32 524324,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"unsigned int",
-  metadata !1,       ;; File
-  i32 0,             ;; Line number
-  i64 32,            ;; Size in Bits
-  i64 32,            ;; Align in Bits
-  i64 0,             ;; Offset in Bits
-  i32 0,             ;; Flags
-  i32 7              ;; Encoding
-}
-;;
-;; Define composite type for struct Color.
-;;
-!2 = metadata !{
-  i32 524307,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Color", ;; Name
-  metadata !1,       ;; Compile unit
-  i32 1,             ;; Line number
-  i64 96,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 0,             ;; Offset in bits
-  i32 0,             ;; Flags
-  null,              ;; Derived From
-  metadata !3,       ;; Elements
-  i32 0              ;; Runtime Language
-}
-
-;;
-;; Define the Red field.
-;;
-!4 = metadata !{
-  i32 524301,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Red",   ;; Name
-  metadata !1,       ;; File
-  i32 2,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 0,             ;; Offset in bits
-  i32 0,             ;; Flags
-  metadata !5        ;; Derived From type
-}
-
-;;
-;; Define the Green field.
-;;
-!6 = metadata !{
-  i32 524301,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Green", ;; Name
-  metadata !1,       ;; File
-  i32 3,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 32,             ;; Offset in bits
-  i32 0,             ;; Flags
-  metadata !5        ;; Derived From type
-}
-
-;;
-;; Define the Blue field.
-;;
-!7 = metadata !{
-  i32 524301,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Blue",  ;; Name
-  metadata !1,       ;; File
-  i32 4,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 64,             ;; Offset in bits
-  i32 0,             ;; Flags
-  metadata !5        ;; Derived From type
-}
-
-;;
-;; Define the array of fields used by the composite type Color.
-;;
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ccxx_enumeration_types">C/C++ enumeration types</a>
-</h3>
-
-<div>
-
-<p>Given the following as an example of C/C++ enumeration type:</p>
-
-<div class="doc_code">
-<pre>
-enum Trees {
-  Spruce = 100,
-  Oak = 200,
-  Maple = 300
-};
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define composite type for enum Trees
-;;
-!2 = metadata !{
-  i32 524292,        ;; Tag
-  metadata !1,       ;; Context
-  metadata !"Trees", ;; Name
-  metadata !1,       ;; File
-  i32 1,             ;; Line number
-  i64 32,            ;; Size in bits
-  i64 32,            ;; Align in bits
-  i64 0,             ;; Offset in bits
-  i32 0,             ;; Flags
-  null,              ;; Derived From type
-  metadata !3,       ;; Elements
-  i32 0              ;; Runtime language
-}
-
-;;
-;; Define the array of enumerators used by composite type Trees.
-;;
-!3 = metadata !{metadata !4, metadata !5, metadata !6}
-
-;;
-;; Define Spruce enumerator.
-;;
-!4 = metadata !{i32 524328, metadata !"Spruce", i64 100}
-
-;;
-;; Define Oak enumerator.
-;;
-!5 = metadata !{i32 524328, metadata !"Oak", i64 200}
-
-;;
-;; Define Maple enumerator.
-;;
-!6 = metadata !{i32 524328, metadata !"Maple", i64 300}
-
-</pre>
-</div>
-
-</div>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="llvmdwarfextension">Debugging information format</a>
-</h2>
-<!-- *********************************************************************** -->
-<div>
-<!-- ======================================================================= -->
-<h3>
-  <a name="objcproperty">Debugging Information Extension for Objective C Properties</a>
-</h3>
-<div>
-<!-- *********************************************************************** -->
-<h4>
-  <a name="objcpropertyintroduction">Introduction</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<p>Objective C provides a simpler way to declare and define accessor methods
-using declared properties. The language provides features to declare a
-property and to let compiler synthesize accessor methods.
-</p>
-
-<p>The debugger lets developer inspect Objective C interfaces and their
-instance variables and class variables. However, the debugger does not know
-anything about the properties defined in Objective C interfaces. The debugger
-consumes information generated by compiler in DWARF format. The format does
-not support encoding of Objective C properties. This proposal describes DWARF
-extensions to encode Objective C properties, which the debugger can use to let
-developers inspect Objective C properties.
-</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h4>
-  <a name="objcpropertyproposal">Proposal</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<p>Objective C properties exist separately from class members. A property
-can be defined only by &quot;setter&quot; and &quot;getter&quot; selectors, and
-be calculated anew on each access.  Or a property can just be a direct access
-to some declared ivar.  Finally it can have an ivar &quot;automatically
-synthesized&quot; for it by the compiler, in which case the property can be
-referred to in user code directly using the standard C dereference syntax as
-well as through the property &quot;dot&quot; syntax, but there is no entry in
-the @interface declaration corresponding to this ivar.
-</p>
-<p>
-To facilitate debugging, these properties we will add a new DWARF TAG into the
-DW_TAG_structure_type definition for the class to hold the description of a
-given property, and a set of DWARF attributes that provide said description.
-The property tag will also contain the name and declared type of the property.
-</p>
-<p>
-If there is a related ivar, there will also be a DWARF property attribute placed
-in the DW_TAG_member DIE for that ivar referring back to the property TAG for
-that property. And in the case where the compiler synthesizes the ivar directly,
-the compiler is expected to generate a DW_TAG_member for that ivar (with the
-DW_AT_artificial set to 1), whose name will be the name used to access this
-ivar directly in code, and with the property attribute pointing back to the
-property it is backing.
-</p>
-<p>
-The following examples will serve as illustration for our discussion:
-</p>
-
-<div class="doc_code">
-<pre>
-@interface I1 {
-  int n2;
-}
-
-@property int p1;
-@property int p2;
-@end
-
-@implementation I1
-@synthesize p1;
-@synthesize p2 = n2;
-@end
-</pre>
-</div>
-
-<p>
-This produces the following DWARF (this is a &quot;pseudo dwarfdump&quot; output):
-</p>
-<div class="doc_code">
-<pre>
-0x00000100:  TAG_structure_type [7] *
-               AT_APPLE_runtime_class( 0x10 )
-               AT_name( "I1" )
-               AT_decl_file( "Objc_Property.m" )
-               AT_decl_line( 3 )
-
-0x00000110    TAG_APPLE_property
-                AT_name ( "p1" )
-                AT_type ( {0x00000150} ( int ) )
-
-0x00000120:   TAG_APPLE_property
-                AT_name ( "p2" )
-                AT_type ( {0x00000150} ( int ) )
-
-0x00000130:   TAG_member [8]
-                AT_name( "_p1" )
-                AT_APPLE_property ( {0x00000110} "p1" )
-                AT_type( {0x00000150} ( int ) )
-                AT_artificial ( 0x1 )
-
-0x00000140:    TAG_member [8]
-                 AT_name( "n2" )
-                 AT_APPLE_property ( {0x00000120} "p2" )
-                 AT_type( {0x00000150} ( int ) )
-
-0x00000150:  AT_type( ( int ) )
-</pre>
-</div>
-
-<p> Note, the current convention is that the name of the ivar for an
-auto-synthesized property is the name of the property from which it derives with
-an underscore prepended, as is shown in the example.
-But we actually don't need to know this convention, since we are given the name
-of the ivar directly.
-</p>
-
-<p>
-Also, it is common practice in ObjC to have different property declarations in
-the @interface and @implementation - e.g. to provide a read-only property in
-the interface,and a read-write interface in the implementation.  In that case,
-the compiler should emit whichever property declaration will be in force in the
-current translation unit.
-</p>
-
-<p> Developers can decorate a property with attributes which are encoded using
-DW_AT_APPLE_property_attribute.
-</p>
-
-<div class="doc_code">
-<pre>
-@property (readonly, nonatomic) int pr;
-</pre>
-</div>
-<p>
-Which produces a property tag:
-<p>
-<div class="doc_code">
-<pre>
-TAG_APPLE_property [8]
-  AT_name( "pr" )
-  AT_type ( {0x00000147} (int) )
-  AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
-</pre>
-</div>
-
-<p> The setter and getter method names are attached to the property using
-DW_AT_APPLE_property_setter and DW_AT_APPLE_property_getter attributes.
-</p>
-<div class="doc_code">
-<pre>
-@interface I1
-@property (setter=myOwnP3Setter:) int p3;
--(void)myOwnP3Setter:(int)a;
-@end
-
-@implementation I1
-@synthesize p3;
--(void)myOwnP3Setter:(int)a{ }
-@end
-</pre>
-</div>
-
-<p>
-The DWARF for this would be:
-</p>
-<div class="doc_code">
-<pre>
-0x000003bd: TAG_structure_type [7] *
-              AT_APPLE_runtime_class( 0x10 )
-              AT_name( "I1" )
-              AT_decl_file( "Objc_Property.m" )
-              AT_decl_line( 3 )
-
-0x000003cd      TAG_APPLE_property
-                  AT_name ( "p3" )
-                  AT_APPLE_property_setter ( "myOwnP3Setter:" )
-                  AT_type( {0x00000147} ( int ) )
-
-0x000003f3:     TAG_member [8]
-                  AT_name( "_p3" )
-                  AT_type ( {0x00000147} ( int ) )
-                  AT_APPLE_property ( {0x000003cd} )
-                  AT_artificial ( 0x1 )
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h4>
-  <a name="objcpropertynewtags">New DWARF Tags</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<table border="1" cellspacing="0">
-  <col width="200">
-  <col width="200">
-  <tr>
-    <th>TAG</th>
-    <th>Value</th>
-  </tr>
-  <tr>
-    <td>DW_TAG_APPLE_property</td>
-    <td>0x4200</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h4>
-  <a name="objcpropertynewattributes">New DWARF Attributes</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<table border="1" cellspacing="0">
-  <col width="200">
-  <col width="200">
-  <col width="200">
-  <tr>
-    <th>Attribute</th>
-    <th>Value</th>
-    <th>Classes</th>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_property</td>
-    <td>0x3fed</td>
-    <td>Reference</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_property_getter</td>
-    <td>0x3fe9</td>
-    <td>String</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_property_setter</td>
-    <td>0x3fea</td>
-    <td>String</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_property_attribute</td>
-    <td>0x3feb</td>
-    <td>Constant</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h4>
-  <a name="objcpropertynewconstants">New DWARF Constants</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<table border="1" cellspacing="0">
-  <col width="200">
-  <col width="200">
-  <tr>
-    <th>Name</th>
-    <th>Value</th>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_PROPERTY_readonly</td>
-    <td>0x1</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_PROPERTY_readwrite</td>
-    <td>0x2</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_PROPERTY_assign</td>
-    <td>0x4</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_PROPERTY_retain</td>
-    <td>0x8</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_PROPERTY_copy</td>
-    <td>0x10</td>
-  </tr>
-  <tr>
-    <td>DW_AT_APPLE_PROPERTY_nonatomic</td>
-    <td>0x20</td>
-  </tr>
-</table>
-
-</div>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="acceltable">Name Accelerator Tables</a>
-</h3>
-<!-- ======================================================================= -->
-<div>
-<!-- ======================================================================= -->
-<h4>
-  <a name="acceltableintroduction">Introduction</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<p>The .debug_pubnames and .debug_pubtypes formats are not what a debugger
-  needs. The "pub" in the section name indicates that the entries in the
-  table are publicly visible names only. This means no static or hidden
-  functions show up in the .debug_pubnames. No static variables or private class
-  variables are in the .debug_pubtypes. Many compilers add different things to
-  these tables, so we can't rely upon the contents between gcc, icc, or clang.</p>
-
-<p>The typical query given by users tends not to match up with the contents of
-  these tables. For example, the DWARF spec states that "In the case of the
-  name of a function member or static data member of a C++ structure, class or
-  union, the name presented in the .debug_pubnames section is not the simple
-  name given by the DW_AT_name attribute of the referenced debugging information
-  entry, but rather the fully qualified name of the data or function member."
-  So the only names in these tables for complex C++ entries is a fully
-  qualified name.  Debugger users tend not to enter their search strings as
-  "a::b::c(int,const Foo&) const", but rather as "c", "b::c" , or "a::b::c".  So
-  the name entered in the name table must be demangled in order to chop it up
-  appropriately and additional names must be manually entered into the table
-  to make it effective as a name lookup table for debuggers to use.</p>
-
-<p>All debuggers currently ignore the .debug_pubnames table as a result of
-  its inconsistent and useless public-only name content making it a waste of
-  space in the object file. These tables, when they are written to disk, are
-  not sorted in any way, leaving every debugger to do its own parsing
-  and sorting. These tables also include an inlined copy of the string values
-  in the table itself making the tables much larger than they need to be on
-  disk, especially for large C++ programs.</p>
-
-<p>Can't we just fix the sections by adding all of the names we need to this
-  table? No, because that is not what the tables are defined to contain and we
-  won't know the difference between the old bad tables and the new good tables.
-  At best we could make our own renamed sections that contain all of the data
-  we need.</p>
-
-<p>These tables are also insufficient for what a debugger like LLDB needs.
-  LLDB uses clang for its expression parsing where LLDB acts as a PCH. LLDB is
-  then often asked to look for type "foo" or namespace "bar", or list items in
-  namespace "baz". Namespaces are not included in the pubnames or pubtypes
-  tables. Since clang asks a lot of questions when it is parsing an expression,
-  we need to be very fast when looking up names, as it happens a lot. Having new
-  accelerator tables that are optimized for very quick lookups will benefit
-  this type of debugging experience greatly.</p>
-
-<p>We would like to generate name lookup tables that can be mapped into
-  memory from disk, and used as is, with little or no up-front parsing. We would
-  also be able to control the exact content of these different tables so they
-  contain exactly what we need. The Name Accelerator Tables were designed
-  to fix these issues. In order to solve these issues we need to:</p>
-
-<ul>
-  <li>Have a format that can be mapped into memory from disk and used as is</li>
-  <li>Lookups should be very fast</li>
-  <li>Extensible table format so these tables can be made by many producers</li>
-  <li>Contain all of the names needed for typical lookups out of the box</li>
-  <li>Strict rules for the contents of tables</li>
-</ul>
-
-<p>Table size is important and the accelerator table format should allow the
-  reuse of strings from common string tables so the strings for the names are
-  not duplicated. We also want to make sure the table is ready to be used as-is
-  by simply mapping the table into memory with minimal header parsing.</p>
-
-<p>The name lookups need to be fast and optimized for the kinds of lookups
-  that debuggers tend to do. Optimally we would like to touch as few parts of
-  the mapped table as possible when doing a name lookup and be able to quickly
-  find the name entry we are looking for, or discover there are no matches. In
-  the case of debuggers we optimized for lookups that fail most of the time.</p>
-
-<p>Each table that is defined should have strict rules on exactly what is in
-  the accelerator tables and documented so clients can rely on the content.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="acceltablehashes">Hash Tables</a>
-</h4>
-<!-- ======================================================================= -->
-
-<div>
-<h5>Standard Hash Tables</h5>
-
-<p>Typical hash tables have a header, buckets, and each bucket points to the
-bucket contents:
-</p>
-
-<div class="doc_code">
-<pre>
-.------------.
-|  HEADER    |
-|------------|
-|  BUCKETS   |
-|------------|
-|  DATA      |
-`------------'
-</pre>
-</div>
-
-<p>The BUCKETS are an array of offsets to DATA for each hash:</p>
-
-<div class="doc_code">
-<pre>
-.------------.
-| 0x00001000 | BUCKETS[0]
-| 0x00002000 | BUCKETS[1]
-| 0x00002200 | BUCKETS[2]
-| 0x000034f0 | BUCKETS[3]
-|            | ...
-| 0xXXXXXXXX | BUCKETS[n_buckets]
-'------------'
-</pre>
-</div>
-
-<p>So for bucket[3] in the example above, we have an offset into the table
-  0x000034f0 which points to a chain of entries for the bucket. Each bucket
-  must contain a next pointer, full 32 bit hash value, the string itself,
-  and the data for the current string value.</p>
-
-<div class="doc_code">
-<pre>
-            .------------.
-0x000034f0: | 0x00003500 | next pointer
-            | 0x12345678 | 32 bit hash
-            | "erase"    | string value
-            | data[n]    | HashData for this bucket
-            |------------|
-0x00003500: | 0x00003550 | next pointer
-            | 0x29273623 | 32 bit hash
-            | "dump"     | string value
-            | data[n]    | HashData for this bucket
-            |------------|
-0x00003550: | 0x00000000 | next pointer
-            | 0x82638293 | 32 bit hash
-            | "main"     | string value
-            | data[n]    | HashData for this bucket
-            `------------'
-</pre>
-</div>
-
-<p>The problem with this layout for debuggers is that we need to optimize for
-  the negative lookup case where the symbol we're searching for is not present.
-  So if we were to lookup "printf" in the table above, we would make a 32 hash
-  for "printf", it might match bucket[3]. We would need to go to the offset
-  0x000034f0 and start looking to see if our 32 bit hash matches. To do so, we
-  need to read the next pointer, then read the hash, compare it, and skip to
-  the next bucket. Each time we are skipping many bytes in memory and touching
-  new cache pages just to do the compare on the full 32 bit hash. All of these
-  accesses then tell us that we didn't have a match.</p>
-
-<h5>Name Hash Tables</h5>
-
-<p>To solve the issues mentioned above we have structured the hash tables
-  a bit differently: a header, buckets, an array of all unique 32 bit hash
-  values, followed by an array of hash value data offsets, one for each hash
-  value, then the data for all hash values:</p>
-
-<div class="doc_code">
-<pre>
-.-------------.
-|  HEADER     |
-|-------------|
-|  BUCKETS    |
-|-------------|
-|  HASHES     |
-|-------------|
-|  OFFSETS    |
-|-------------|
-|  DATA       |
-`-------------'
-</pre>
-</div>
-
-<p>The BUCKETS in the name tables are an index into the HASHES array. By
-  making all of the full 32 bit hash values contiguous in memory, we allow
-  ourselves to efficiently check for a match while touching as little
-  memory as possible. Most often checking the 32 bit hash values is as far as
-  the lookup goes. If it does match, it usually is a match with no collisions.
-  So for a table with "n_buckets" buckets, and "n_hashes" unique 32 bit hash
-  values, we can clarify the contents of the BUCKETS, HASHES and OFFSETS as:</p>
-
-<div class="doc_code">
-<pre>
-.-------------------------.
-|  HEADER.magic           | uint32_t
-|  HEADER.version         | uint16_t
-|  HEADER.hash_function   | uint16_t
-|  HEADER.bucket_count    | uint32_t
-|  HEADER.hashes_count    | uint32_t
-|  HEADER.header_data_len | uint32_t
-|  HEADER_DATA            | HeaderData
-|-------------------------|
-|  BUCKETS                | uint32_t[bucket_count] // 32 bit hash indexes
-|-------------------------|
-|  HASHES                 | uint32_t[hashes_count] // 32 bit hash values
-|-------------------------|
-|  OFFSETS                | uint32_t[hashes_count] // 32 bit offsets to hash value data
-|-------------------------|
-|  ALL HASH DATA          |
-`-------------------------'
-</pre>
-</div>
-
-<p>So taking the exact same data from the standard hash example above we end up
-  with:</p>
-
-<div class="doc_code">
-<pre>
-            .------------.
-            | HEADER     |
-            |------------|
-            |          0 | BUCKETS[0]
-            |          2 | BUCKETS[1]
-            |          5 | BUCKETS[2]
-            |          6 | BUCKETS[3]
-            |            | ...
-            |        ... | BUCKETS[n_buckets]
-            |------------|
-            | 0x........ | HASHES[0]
-            | 0x........ | HASHES[1]
-            | 0x........ | HASHES[2]
-            | 0x........ | HASHES[3]
-            | 0x........ | HASHES[4]
-            | 0x........ | HASHES[5]
-            | 0x12345678 | HASHES[6]    hash for BUCKETS[3]
-            | 0x29273623 | HASHES[7]    hash for BUCKETS[3]
-            | 0x82638293 | HASHES[8]    hash for BUCKETS[3]
-            | 0x........ | HASHES[9]
-            | 0x........ | HASHES[10]
-            | 0x........ | HASHES[11]
-            | 0x........ | HASHES[12]
-            | 0x........ | HASHES[13]
-            | 0x........ | HASHES[n_hashes]
-            |------------|
-            | 0x........ | OFFSETS[0]
-            | 0x........ | OFFSETS[1]
-            | 0x........ | OFFSETS[2]
-            | 0x........ | OFFSETS[3]
-            | 0x........ | OFFSETS[4]
-            | 0x........ | OFFSETS[5]
-            | 0x000034f0 | OFFSETS[6]   offset for BUCKETS[3]
-            | 0x00003500 | OFFSETS[7]   offset for BUCKETS[3]
-            | 0x00003550 | OFFSETS[8]   offset for BUCKETS[3]
-            | 0x........ | OFFSETS[9]
-            | 0x........ | OFFSETS[10]
-            | 0x........ | OFFSETS[11]
-            | 0x........ | OFFSETS[12]
-            | 0x........ | OFFSETS[13]
-            | 0x........ | OFFSETS[n_hashes]
-            |------------|
-            |            |
-            |            |
-            |            |
-            |            |
-            |            |
-            |------------|
-0x000034f0: | 0x00001203 | .debug_str ("erase")
-            | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x........ | HashData[2]
-            | 0x........ | HashData[3]
-            | 0x00000000 | String offset into .debug_str (terminate data for hash)
-            |------------|
-0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
-            | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x00001203 | String offset into .debug_str ("dump")
-            | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x........ | HashData[2]
-            | 0x00000000 | String offset into .debug_str (terminate data for hash)
-            |------------|
-0x00003550: | 0x00001203 | String offset into .debug_str ("main")
-            | 0x00000009 | A 32 bit array count - number of HashData with name "main"
-            | 0x........ | HashData[0]
-            | 0x........ | HashData[1]
-            | 0x........ | HashData[2]
-            | 0x........ | HashData[3]
-            | 0x........ | HashData[4]
-            | 0x........ | HashData[5]
-            | 0x........ | HashData[6]
-            | 0x........ | HashData[7]
-            | 0x........ | HashData[8]
-            | 0x00000000 | String offset into .debug_str (terminate data for hash)
-            `------------'
-</pre>
-</div>
-
-<p>So we still have all of the same data, we just organize it more efficiently
-  for debugger lookup. If we repeat the same "printf" lookup from above, we
-  would hash "printf" and find it matches BUCKETS[3] by taking the 32 bit hash
-  value and modulo it by n_buckets. BUCKETS[3] contains "6" which is the index
-  into the HASHES table. We would then compare any consecutive 32 bit hashes
-  values in the HASHES array as long as the hashes would be in BUCKETS[3]. We
-  do this by verifying that each subsequent hash value modulo n_buckets is still
-  3. In the case of a failed lookup we would access the memory for BUCKETS[3], and
-  then compare a few consecutive 32 bit hashes before we know that we have no match.
-  We don't end up marching through multiple words of memory and we really keep the
-  number of processor data cache lines being accessed as small as possible.</p>
-
-<p>The string hash that is used for these lookup tables is the Daniel J.
-  Bernstein hash which is also used in the ELF GNU_HASH sections. It is a very
-  good hash for all kinds of names in programs with very few hash collisions.</p>
-
-<p>Empty buckets are designated by using an invalid hash index of UINT32_MAX.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="acceltabledetails">Details</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<p>These name hash tables are designed to be generic where specializations of
-  the table get to define additional data that goes into the header
-  ("HeaderData"), how the string value is stored ("KeyType") and the content
-  of the data for each hash value.</p>
-
-<h5>Header Layout</h5>
-<p>The header has a fixed part, and the specialized part. The exact format of
-  the header is:</p>
-<div class="doc_code">
-<pre>
-struct Header
-{
-  uint32_t   magic;           // 'HASH' magic value to allow endian detection
-  uint16_t   version;         // Version number
-  uint16_t   hash_function;   // The hash function enumeration that was used
-  uint32_t   bucket_count;    // The number of buckets in this hash table
-  uint32_t   hashes_count;    // The total number of unique hash values and hash data offsets in this table
-  uint32_t   header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
-                              // Specifically the length of the following HeaderData field - this does not
-                              // include the size of the preceding fields
-  HeaderData header_data;     // Implementation specific header data
-};
-</pre>
-</div>
-<p>The header starts with a 32 bit "magic" value which must be 'HASH' encoded as
-  an ASCII integer. This allows the detection of the start of the hash table and
-  also allows the table's byte order to be determined so the table can be
-  correctly extracted. The "magic" value is followed by a 16 bit version number
-  which allows the table to be revised and modified in the future. The current
-  version number is 1. "hash_function" is a uint16_t enumeration that specifies
-  which hash function was used to produce this table. The current values for the
-  hash function enumerations include:</p>
-<div class="doc_code">
-<pre>
-enum HashFunctionType
-{
-  eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
-};
-</pre>
-</div>
-<p>"bucket_count" is a 32 bit unsigned integer that represents how many buckets
-  are in the BUCKETS array. "hashes_count" is the number of unique 32 bit hash
-  values that are in the HASHES array, and is the same number of offsets are
-  contained in the OFFSETS array. "header_data_len" specifies the size in
-  bytes of the HeaderData that is filled in by specialized versions of this
-  table.</p>
-
-<h5>Fixed Lookup</h5>
-<p>The header is followed by the buckets, hashes, offsets, and hash value
-  data.
-<div class="doc_code">
-<pre>
-struct FixedTable
-{
-  uint32_t buckets[Header.bucket_count];  // An array of hash indexes into the "hashes[]" array below
-  uint32_t hashes [Header.hashes_count];  // Every unique 32 bit hash for the entire table is in this table
-  uint32_t offsets[Header.hashes_count];  // An offset that corresponds to each item in the "hashes[]" array above
-};
-</pre>
-</div>
-<p>"buckets" is an array of 32 bit indexes into the "hashes" array. The
-  "hashes" array contains all of the 32 bit hash values for all names in the
-  hash table. Each hash in the "hashes" table has an offset in the "offsets"
-  array that points to the data for the hash value.</p>
-
-<p>This table setup makes it very easy to repurpose these tables to contain
-  different data, while keeping the lookup mechanism the same for all tables.
-  This layout also makes it possible to save the table to disk and map it in
-  later and do very efficient name lookups with little or no parsing.</p>
-
-<p>DWARF lookup tables can be implemented in a variety of ways and can store
-  a lot of information for each name. We want to make the DWARF tables
-  extensible and able to store the data efficiently so we have used some of the
-  DWARF features that enable efficient data storage to define exactly what kind
-  of data we store for each name.</p>
-
-<p>The "HeaderData" contains a definition of the contents of each HashData
-  chunk. We might want to store an offset to all of the debug information
-  entries (DIEs) for each name. To keep things extensible, we create a list of
-  items, or Atoms, that are contained in the data for each name. First comes the
-  type of the data in each atom:</p>
-<div class="doc_code">
-<pre>
-enum AtomType
-{
-  eAtomTypeNULL       = 0u,
-  eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
-  eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
-  eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
-  eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
-  eAtomTypeTypeFlags  = 5u,   // Flags from enum TypeFlags
-};
-</pre>
-</div>
-<p>The enumeration values and their meanings are:</p>
-<div class="doc_code">
-<pre>
-  eAtomTypeNULL       - a termination atom that specifies the end of the atom list
-  eAtomTypeDIEOffset  - an offset into the .debug_info section for the DWARF DIE for this name
-  eAtomTypeCUOffset   - an offset into the .debug_info section for the CU that contains the DIE
-  eAtomTypeDIETag     - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
-  eAtomTypeNameFlags  - Flags for functions and global variables (isFunction, isInlined, isExternal...)
-  eAtomTypeTypeFlags  - Flags for types (isCXXClass, isObjCClass, ...)
-</pre>
-</div>
-<p>Then we allow each atom type to define the atom type and how the data for
-  each atom type data is encoded:</p>
-<div class="doc_code">
-<pre>
-struct Atom
-{
-  uint16_t type;  // AtomType enum value
-  uint16_t form;  // DWARF DW_FORM_XXX defines
-};
-</pre>
-</div>
-<p>The "form" type above is from the DWARF specification and defines the
-  exact encoding of the data for the Atom type. See the DWARF specification for
-  the DW_FORM_ definitions.</p>
-<div class="doc_code">
-<pre>
-struct HeaderData
-{
-  uint32_t die_offset_base;
-  uint32_t atom_count;
-  Atoms    atoms[atom_count0];
-};
-</pre>
-</div>
-<p>"HeaderData" defines the base DIE offset that should be added to any atoms
-  that are encoded using the DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
-  DW_FORM_ref8 or DW_FORM_ref_udata. It also defines what is contained in
-  each "HashData" object -- Atom.form tells us how large each field will be in
-  the HashData and the Atom.type tells us how this data should be interpreted.</p>
-
-<p>For the current implementations of the ".apple_names" (all functions + globals),
-  the ".apple_types" (names of all types that are defined), and the
-  ".apple_namespaces" (all namespaces), we currently set the Atom array to be:</p>
-<div class="doc_code">
-<pre>
-HeaderData.atom_count = 1;
-HeaderData.atoms[0].type = eAtomTypeDIEOffset;
-HeaderData.atoms[0].form = DW_FORM_data4;
-</pre>
-</div>
-<p>This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
-  encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have
-  multiple matching DIEs in a single file, which could come up with an inlined
-  function for instance. Future tables could include more information about the
-  DIE such as flags indicating if the DIE is a function, method, block,
-  or inlined.</p>
-
-<p>The KeyType for the DWARF table is a 32 bit string table offset into the
-  ".debug_str" table. The ".debug_str" is the string table for the DWARF which
-  may already contain copies of all of the strings. This helps make sure, with
-  help from the compiler, that we reuse the strings between all of the DWARF
-  sections and keeps the hash table size down. Another benefit to having the
-  compiler generate all strings as DW_FORM_strp in the debug info, is that
-  DWARF parsing can be made much faster.</p>
-
-<p>After a lookup is made, we get an offset into the hash data. The hash data
-  needs to be able to deal with 32 bit hash collisions, so the chunk of data
-  at the offset in the hash data consists of a triple:</p>
-<div class="doc_code">
-<pre>
-uint32_t str_offset
-uint32_t hash_data_count
-HashData[hash_data_count]
-</pre>
-</div>
-<p>If "str_offset" is zero, then the bucket contents are done. 99.9% of the
-  hash data chunks contain a single item (no 32 bit hash collision):</p>
-<div class="doc_code">
-<pre>
-.------------.
-| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
-| 0x00000004 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x........ | uint32_t HashData[2] DIE offset
-| 0x........ | uint32_t HashData[3] DIE offset
-| 0x00000000 | uint32_t KeyType (end of hash chain)
-`------------'
-</pre>
-</div>
-<p>If there are collisions, you will have multiple valid string offsets:</p>
-<div class="doc_code">
-<pre>
-.------------.
-| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
-| 0x00000004 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x........ | uint32_t HashData[2] DIE offset
-| 0x........ | uint32_t HashData[3] DIE offset
-| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
-| 0x00000002 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x00000000 | uint32_t KeyType (end of hash chain)
-`------------'
-</pre>
-</div>
-<p>Current testing with real world C++ binaries has shown that there is around 1
-  32 bit hash collision per 100,000 name entries.</p>
-</div>
-<!-- ======================================================================= -->
-<h4>
-  <a name="acceltablecontents">Contents</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<p>As we said, we want to strictly define exactly what is included in the
-  different tables. For DWARF, we have 3 tables: ".apple_names", ".apple_types",
-  and ".apple_namespaces".</p>
-
-<p>".apple_names" sections should contain an entry for each DWARF DIE whose
-  DW_TAG is a DW_TAG_label, DW_TAG_inlined_subroutine, or DW_TAG_subprogram that
-  has address attributes: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges or
-  DW_AT_entry_pc. It also contains DW_TAG_variable DIEs that have a DW_OP_addr
-  in the location (global and static variables). All global and static variables
-  should be included, including those scoped within functions and classes. For
-  example using the following code:</p>
-<div class="doc_code">
-<pre>
-static int var = 0;
-
-void f ()
-{
-  static int var = 0;
-}
-</pre>
-</div>
-<p>Both of the static "var" variables would be included in the table. All
-  functions should emit both their full names and their basenames. For C or C++,
-  the full name is the mangled name (if available) which is usually in the
-  DW_AT_MIPS_linkage_name attribute, and the DW_AT_name contains the function
-  basename. If global or static variables have a mangled name in a
-  DW_AT_MIPS_linkage_name attribute, this should be emitted along with the
-  simple name found in the DW_AT_name attribute.</p>
-
-<p>".apple_types" sections should contain an entry for each DWARF DIE whose
-  tag is one of:</p>
-<ul>
-  <li>DW_TAG_array_type</li>
-  <li>DW_TAG_class_type</li>
-  <li>DW_TAG_enumeration_type</li>
-  <li>DW_TAG_pointer_type</li>
-  <li>DW_TAG_reference_type</li>
-  <li>DW_TAG_string_type</li>
-  <li>DW_TAG_structure_type</li>
-  <li>DW_TAG_subroutine_type</li>
-  <li>DW_TAG_typedef</li>
-  <li>DW_TAG_union_type</li>
-  <li>DW_TAG_ptr_to_member_type</li>
-  <li>DW_TAG_set_type</li>
-  <li>DW_TAG_subrange_type</li>
-  <li>DW_TAG_base_type</li>
-  <li>DW_TAG_const_type</li>
-  <li>DW_TAG_constant</li>
-  <li>DW_TAG_file_type</li>
-  <li>DW_TAG_namelist</li>
-  <li>DW_TAG_packed_type</li>
-  <li>DW_TAG_volatile_type</li>
-  <li>DW_TAG_restrict_type</li>
-  <li>DW_TAG_interface_type</li>
-  <li>DW_TAG_unspecified_type</li>
-  <li>DW_TAG_shared_type</li>
-</ul>
-<p>Only entries with a DW_AT_name attribute are included, and the entry must
-  not be a forward declaration (DW_AT_declaration attribute with a non-zero value).
-  For example, using the following code:</p>
-<div class="doc_code">
-<pre>
-int main ()
-{
-  int *b = 0;
-  return *b;
-}
-</pre>
-</div>
-<p>We get a few type DIEs:</p>
-<div class="doc_code">
-<pre>
-0x00000067:     TAG_base_type [5]
-                AT_encoding( DW_ATE_signed )
-                AT_name( "int" )
-                AT_byte_size( 0x04 )
-
-0x0000006e:     TAG_pointer_type [6]
-                AT_type( {0x00000067} ( int ) )
-                AT_byte_size( 0x08 )
-</pre>
-</div>
-<p>The DW_TAG_pointer_type is not included because it does not have a DW_AT_name.</p>
-
-<p>".apple_namespaces" section should contain all DW_TAG_namespace DIEs. If
-  we run into a namespace that has no name this is an anonymous namespace,
-  and the name should be output as "(anonymous namespace)" (without the quotes).
-  Why? This matches the output of the abi::cxa_demangle() that is in the standard
-  C++ library that demangles mangled names.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="acceltableextensions">Language Extensions and File Format Changes</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<h5>Objective-C Extensions</h5>
-<p>".apple_objc" section should contain all DW_TAG_subprogram DIEs for an
-  Objective-C class. The name used in the hash table is the name of the
-  Objective-C class itself. If the Objective-C class has a category, then an
-  entry is made for both the class name without the category, and for the class
-  name with the category. So if we have a DIE at offset 0x1234 with a name
-  of method "-[NSString(my_additions) stringWithSpecialString:]", we would add
-  an entry for "NSString" that points to DIE 0x1234, and an entry for
-  "NSString(my_additions)" that points to 0x1234. This allows us to quickly
-  track down all Objective-C methods for an Objective-C class when doing
-  expressions. It is needed because of the dynamic nature of Objective-C where
-  anyone can add methods to a class. The DWARF for Objective-C methods is also
-  emitted differently from C++ classes where the methods are not usually
-  contained in the class definition, they are scattered about across one or more
-  compile units. Categories can also be defined in different shared libraries.
-  So we need to be able to quickly find all of the methods and class functions
-  given the Objective-C class name, or quickly find all methods and class
-  functions for a class + category name. This table does not contain any selector
-  names, it just maps Objective-C class names (or class names + category) to all
-  of the methods and class functions. The selectors are added as function
-  basenames in the .debug_names section.</p>
-
-<p>In the ".apple_names" section for Objective-C functions, the full name is the
-  entire function name with the brackets ("-[NSString stringWithCString:]") and the
-  basename is the selector only ("stringWithCString:").</p>
-
-<h5>Mach-O Changes</h5>
-<p>The sections names for the apple hash tables are for non mach-o files. For
-  mach-o files, the sections should be contained in the "__DWARF" segment with
-  names as follows:</p>
-<ul>
-  <li>".apple_names" -> "__apple_names"</li>
-  <li>".apple_types" -> "__apple_types"</li>
-  <li>".apple_namespaces" -> "__apple_namespac" (16 character limit)</li>
-  <li> ".apple_objc" -> "__apple_objc"</li>
-</ul>
-</div>
-</div>
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-09 01:54:10 +0200 (Tue, 09 Oct 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
new file mode 100644
index 000000000000..857479508a5e
--- /dev/null
+++ b/docs/SourceLevelDebugging.rst
@@ -0,0 +1,2281 @@
+================================
+Source Level Debugging with LLVM
+================================
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+This document is the central repository for all information pertaining to debug
+information in LLVM.  It describes the :ref:`actual format that the LLVM debug
+information takes <format>`, which is useful for those interested in creating
+front-ends or dealing directly with the information.  Further, this document
+provides specific examples of what debug information for C/C++ looks like.
+
+Philosophy behind LLVM debugging information
+--------------------------------------------
+
+The idea of the LLVM debugging information is to capture how the important
+pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
+Several design aspects have shaped the solution that appears here.  The
+important ones are:
+
+* Debugging information should have very little impact on the rest of the
+  compiler.  No transformations, analyses, or code generators should need to
+  be modified because of debugging information.
+
+* LLVM optimizations should interact in :ref:`well-defined and easily described
+  ways <intro_debugopt>` with the debugging information.
+
+* Because LLVM is designed to support arbitrary programming languages,
+  LLVM-to-LLVM tools should not need to know anything about the semantics of
+  the source-level-language.
+
+* Source-level languages are often **widely** different from one another.
+  LLVM should not put any restrictions of the flavor of the source-language,
+  and the debugging information should work with any language.
+
+* With code generator support, it should be possible to use an LLVM compiler
+  to compile a program to native machine code and standard debugging
+  formats.  This allows compatibility with traditional machine-code level
+  debuggers, like GDB or DBX.
+
+The approach used by the LLVM implementation is to use a small set of
+:ref:`intrinsic functions <format_common_intrinsics>` to define a mapping
+between LLVM program objects and the source-level objects.  The description of
+the source-level program is maintained in LLVM metadata in an
+:ref:`implementation-defined format <ccxx_frontend>` (the C/C++ front-end
+currently uses working draft 7 of the `DWARF 3 standard
+<http://www.eagercon.com/dwarf/dwarf3std.htm>`_).
+
+When a program is being debugged, a debugger interacts with the user and turns
+the stored debug information into source-language specific information.  As
+such, a debugger must be aware of the source-language, and is thus tied to a
+specific language or family of languages.
+
+Debug information consumers
+---------------------------
+
+The role of debug information is to provide meta information normally stripped
+away during the compilation process.  This meta information provides an LLVM
+user a relationship between generated code and the original program source
+code.
+
+Currently, debug information is consumed by DwarfDebug to produce dwarf
+information used by the gdb debugger.  Other targets could use the same
+information to produce stabs or other debug forms.
+
+It would also be reasonable to use debug information to feed profiling tools
+for analysis of generated code, or, tools for reconstructing the original
+source from generated code.
+
+TODO - expound a bit more.
+
+.. _intro_debugopt:
+
+Debugging optimized code
+------------------------
+
+An extremely high priority of LLVM debugging information is to make it interact
+well with optimizations and analysis.  In particular, the LLVM debug
+information provides the following guarantees:
+
+* LLVM debug information **always provides information to accurately read
+  the source-level state of the program**, regardless of which LLVM
+  optimizations have been run, and without any modification to the
+  optimizations themselves.  However, some optimizations may impact the
+  ability to modify the current state of the program with a debugger, such
+  as setting program variables, or calling functions that have been
+  deleted.
+
+* As desired, LLVM optimizations can be upgraded to be aware of the LLVM
+  debugging information, allowing them to update the debugging information
+  as they perform aggressive optimizations.  This means that, with effort,
+  the LLVM optimizers could optimize debug code just as well as non-debug
+  code.
+
+* LLVM debug information does not prevent optimizations from
+  happening (for example inlining, basic block reordering/merging/cleanup,
+  tail duplication, etc).
+
+* LLVM debug information is automatically optimized along with the rest of
+  the program, using existing facilities.  For example, duplicate
+  information is automatically merged by the linker, and unused information
+  is automatically removed.
+
+Basically, the debug information allows you to compile a program with
+"``-O0 -g``" and get full debug information, allowing you to arbitrarily modify
+the program as it executes from a debugger.  Compiling a program with
+"``-O3 -g``" gives you full debug information that is always available and
+accurate for reading (e.g., you get accurate stack traces despite tail call
+elimination and inlining), but you might lose the ability to modify the program
+and call functions where were optimized out of the program, or inlined away
+completely.
+
+:ref:`LLVM test suite <test-suite-quickstart>` provides a framework to test
+optimizer's handling of debugging information.  It can be run like this:
+
+.. code-block:: bash
+
+  % cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+  % make TEST=dbgopt
+
+This will test impact of debugging information on optimization passes.  If
+debugging information influences optimization passes then it will be reported
+as a failure.  See :doc:`TestingGuide` for more information on LLVM test
+infrastructure and how to run various tests.
+
+.. _format:
+
+Debugging information format
+============================
+
+LLVM debugging information has been carefully designed to make it possible for
+the optimizer to optimize the program and debugging information without
+necessarily having to know anything about debugging information.  In
+particular, the use of metadata avoids duplicated debugging information from
+the beginning, and the global dead code elimination pass automatically deletes
+debugging information for a function if it decides to delete the function.
+
+To do this, most of the debugging information (descriptors for types,
+variables, functions, source files, etc) is inserted by the language front-end
+in the form of LLVM metadata.
+
+Debug information is designed to be agnostic about the target debugger and
+debugging information representation (e.g. DWARF/Stabs/etc).  It uses a generic
+pass to decode the information that represents variables, types, functions,
+namespaces, etc: this allows for arbitrary source-language semantics and
+type-systems to be used, as long as there is a module written for the target
+debugger to interpret the information.
+
+To provide basic functionality, the LLVM debugger does have to make some
+assumptions about the source-level language being debugged, though it keeps
+these to a minimum.  The only common features that the LLVM debugger assumes
+exist are :ref:`source files <format_files>`, and :ref:`program objects
+<format_global_variables>`.  These abstract objects are used by a debugger to
+form stack traces, show information about local variables, etc.
+
+This section of the documentation first describes the representation aspects
+common to any source-language.  :ref:`ccxx_frontend` describes the data layout
+conventions used by the C and C++ front-ends.
+
+Debug information descriptors
+-----------------------------
+
+In consideration of the complexity and volume of debug information, LLVM
+provides a specification for well formed debug descriptors.
+
+Consumers of LLVM debug information expect the descriptors for program objects
+to start in a canonical format, but the descriptors can include additional
+information appended at the end that is source-language specific.  All LLVM
+debugging information is versioned, allowing backwards compatibility in the
+case that the core structures need to change in some way.  Also, all debugging
+information objects start with a tag to indicate what type of object it is.
+The source-language is allowed to define its own objects, by using unreserved
+tag numbers.  We recommend using with tags in the range 0x1000 through 0x2000
+(there is a defined ``enum DW_TAG_user_base = 0x1000``.)
+
+The fields of debug descriptors used internally by LLVM are restricted to only
+the simple data types ``i32``, ``i1``, ``float``, ``double``, ``mdstring`` and
+``mdnode``.
+
+.. code-block:: llvm
+
+  !1 = metadata !{
+    i32,   ;; A tag
+    ...
+  }
+
+<a name="LLVMDebugVersion">The first field of a descriptor is always an
+``i32`` containing a tag value identifying the content of the descriptor.
+The remaining fields are specific to the descriptor.  The values of tags are
+loosely bound to the tag values of DWARF information entries.  However, that
+does not restrict the use of the information supplied to DWARF targets.  To
+facilitate versioning of debug information, the tag is augmented with the
+current debug version (``LLVMDebugVersion = 8 << 16`` or 0x80000 or
+524288.)
+
+The details of the various descriptors follow.
+
+Compile unit descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !0 = metadata !{
+    i32,       ;; Tag = 17 + LLVMDebugVersion (DW_TAG_compile_unit)
+    i32,       ;; Unused field.
+    i32,       ;; DWARF language identifier (ex. DW_LANG_C89)
+    metadata,  ;; Source file name
+    metadata,  ;; Source file directory (includes trailing slash)
+    metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
+    i1,        ;; True if this is a main compile unit.
+    i1,        ;; True if this is optimized.
+    metadata,  ;; Flags
+    i32        ;; Runtime version
+    metadata   ;; List of enums types
+    metadata   ;; List of retained types
+    metadata   ;; List of subprograms
+    metadata   ;; List of global variables
+  }
+
+These descriptors contain a source language ID for the file (we use the DWARF
+3.0 ID numbers, such as ``DW_LANG_C89``, ``DW_LANG_C_plus_plus``,
+``DW_LANG_Cobol74``, etc), three strings describing the filename, working
+directory of the compiler, and an identifier string for the compiler that
+produced it.
+
+Compile unit descriptors provide the root context for objects declared in a
+specific compilation unit.  File descriptors are defined using this context.
+These descriptors are collected by a named metadata ``!llvm.dbg.cu``.  They
+keep track of subprograms, global variables and type information.
+
+.. _format_files:
+
+File descriptors
+^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !0 = metadata !{
+    i32,       ;; Tag = 41 + LLVMDebugVersion (DW_TAG_file_type)
+    metadata,  ;; Source file name
+    metadata,  ;; Source file directory (includes trailing slash)
+    metadata   ;; Unused
+  }
+
+These descriptors contain information for a file.  Global variables and top
+level functions would be defined using this context.  File descriptors also
+provide context for source line correspondence.
+
+Each input file is encoded as a separate file descriptor in LLVM debugging
+information output.
+
+.. _format_global_variables:
+
+Global variable descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !1 = metadata !{
+    i32,      ;; Tag = 52 + LLVMDebugVersion (DW_TAG_variable)
+    i32,      ;; Unused field.
+    metadata, ;; Reference to context descriptor
+    metadata, ;; Name
+    metadata, ;; Display name (fully qualified C++ name)
+    metadata, ;; MIPS linkage name (for C++)
+    metadata, ;; Reference to file where defined
+    i32,      ;; Line number where defined
+    metadata, ;; Reference to type descriptor
+    i1,       ;; True if the global is local to compile unit (static)
+    i1,       ;; True if the global is defined in the compile unit (not extern)
+    {}*       ;; Reference to the global variable
+  }
+
+These descriptors provide debug information about globals variables.  They
+provide details such as name, type and where the variable is defined.  All
+global variables are collected inside the named metadata ``!llvm.dbg.cu``.
+
+.. _format_subprograms:
+
+Subprogram descriptors
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32,      ;; Tag = 46 + LLVMDebugVersion (DW_TAG_subprogram)
+    i32,      ;; Unused field.
+    metadata, ;; Reference to context descriptor
+    metadata, ;; Name
+    metadata, ;; Display name (fully qualified C++ name)
+    metadata, ;; MIPS linkage name (for C++)
+    metadata, ;; Reference to file where defined
+    i32,      ;; Line number where defined
+    metadata, ;; Reference to type descriptor
+    i1,       ;; True if the global is local to compile unit (static)
+    i1,       ;; True if the global is defined in the compile unit (not extern)
+    i32,      ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
+    i32,      ;; Index into a virtual function
+    metadata, ;; indicates which base type contains the vtable pointer for the
+              ;; derived class
+    i32,      ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
+    i1,       ;; isOptimized
+    Function * , ;; Pointer to LLVM function
+    metadata, ;; Lists function template parameters
+    metadata, ;; Function declaration descriptor
+    metadata, ;; List of function variables
+    i32       ;; Line number where the scope of the subprogram begins
+  }
+
+These descriptors provide debug information about functions, methods and
+subprograms.  They provide details such as name, return types and the source
+location where the subprogram is defined.
+
+Block descriptors
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !3 = metadata !{
+    i32,     ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block)
+    metadata,;; Reference to context descriptor
+    i32,     ;; Line number
+    i32,     ;; Column number
+    metadata,;; Reference to source file
+    i32      ;; Unique ID to identify blocks from a template function
+  }
+
+This descriptor provides debug information about nested blocks within a
+subprogram.  The line number and column numbers are used to dinstinguish two
+lexical blocks at same depth.
+
+.. code-block:: llvm
+
+  !3 = metadata !{
+    i32,     ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block)
+    metadata ;; Reference to the scope we're annotating with a file change
+    metadata,;; Reference to the file the scope is enclosed in.
+  }
+
+This descriptor provides a wrapper around a lexical scope to handle file
+changes in the middle of a lexical block.
+
+.. _format_basic_type:
+
+Basic type descriptors
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !4 = metadata !{
+    i32,      ;; Tag = 36 + LLVMDebugVersion (DW_TAG_base_type)
+    metadata, ;; Reference to context
+    metadata, ;; Name (may be "" for anonymous types)
+    metadata, ;; Reference to file where defined (may be NULL)
+    i32,      ;; Line number where defined (may be 0)
+    i64,      ;; Size in bits
+    i64,      ;; Alignment in bits
+    i64,      ;; Offset in bits
+    i32,      ;; Flags
+    i32       ;; DWARF type encoding
+  }
+
+These descriptors define primitive types used in the code.  Example ``int``,
+``bool`` and ``float``.  The context provides the scope of the type, which is
+usually the top level.  Since basic types are not usually user defined the
+context and line number can be left as NULL and 0.  The size, alignment and
+offset are expressed in bits and can be 64 bit values.  The alignment is used
+to round the offset when embedded in a :ref:`composite type
+<format_composite_type>` (example to keep float doubles on 64 bit boundaries).
+The offset is the bit offset if embedded in a :ref:`composite type
+<format_composite_type>`.
+
+The type encoding provides the details of the type.  The values are typically
+one of the following:
+
+.. code-block:: llvm
+
+  DW_ATE_address       = 1
+  DW_ATE_boolean       = 2
+  DW_ATE_float         = 4
+  DW_ATE_signed        = 5
+  DW_ATE_signed_char   = 6
+  DW_ATE_unsigned      = 7
+  DW_ATE_unsigned_char = 8
+
+.. _format_derived_type:
+
+Derived type descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !5 = metadata !{
+    i32,      ;; Tag (see below)
+    metadata, ;; Reference to context
+    metadata, ;; Name (may be "" for anonymous types)
+    metadata, ;; Reference to file where defined (may be NULL)
+    i32,      ;; Line number where defined (may be 0)
+    i64,      ;; Size in bits
+    i64,      ;; Alignment in bits
+    i64,      ;; Offset in bits
+    i32,      ;; Flags to encode attributes, e.g. private
+    metadata, ;; Reference to type derived from
+    metadata, ;; (optional) Name of the Objective C property associated with
+              ;; Objective-C an ivar, or the type of which this
+              ;; pointer-to-member is pointing to members of.
+    metadata, ;; (optional) Name of the Objective C property getter selector.
+    metadata, ;; (optional) Name of the Objective C property setter selector.
+    i32       ;; (optional) Objective C property attributes.
+  }
+
+These descriptors are used to define types derived from other types.  The value
+of the tag varies depending on the meaning.  The following are possible tag
+values:
+
+.. code-block:: llvm
+
+  DW_TAG_formal_parameter   = 5
+  DW_TAG_member             = 13
+  DW_TAG_pointer_type       = 15
+  DW_TAG_reference_type     = 16
+  DW_TAG_typedef            = 22
+  DW_TAG_ptr_to_member_type = 31
+  DW_TAG_const_type         = 38
+  DW_TAG_volatile_type      = 53
+  DW_TAG_restrict_type      = 55
+
+``DW_TAG_member`` is used to define a member of a :ref:`composite type
+<format_composite_type>` or :ref:`subprogram <format_subprograms>`.  The type
+of the member is the :ref:`derived type <format_derived_type>`.
+``DW_TAG_formal_parameter`` is used to define a member which is a formal
+argument of a subprogram.
+
+``DW_TAG_typedef`` is used to provide a name for the derived type.
+
+``DW_TAG_pointer_type``, ``DW_TAG_reference_type``, ``DW_TAG_const_type``,
+``DW_TAG_volatile_type`` and ``DW_TAG_restrict_type`` are used to qualify the
+:ref:`derived type <format_derived_type>`.
+
+:ref:`Derived type <format_derived_type>` location can be determined from the
+context and line number.  The size, alignment and offset are expressed in bits
+and can be 64 bit values.  The alignment is used to round the offset when
+embedded in a :ref:`composite type <format_composite_type>`  (example to keep
+float doubles on 64 bit boundaries.) The offset is the bit offset if embedded
+in a :ref:`composite type <format_composite_type>`.
+
+Note that the ``void *`` type is expressed as a type derived from NULL.
+
+.. _format_composite_type:
+
+Composite type descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !6 = metadata !{
+    i32,      ;; Tag (see below)
+    metadata, ;; Reference to context
+    metadata, ;; Name (may be "" for anonymous types)
+    metadata, ;; Reference to file where defined (may be NULL)
+    i32,      ;; Line number where defined (may be 0)
+    i64,      ;; Size in bits
+    i64,      ;; Alignment in bits
+    i64,      ;; Offset in bits
+    i32,      ;; Flags
+    metadata, ;; Reference to type derived from
+    metadata, ;; Reference to array of member descriptors
+    i32       ;; Runtime languages
+  }
+
+These descriptors are used to define types that are composed of 0 or more
+elements.  The value of the tag varies depending on the meaning.  The following
+are possible tag values:
+
+.. code-block:: llvm
+
+  DW_TAG_array_type       = 1
+  DW_TAG_enumeration_type = 4
+  DW_TAG_structure_type   = 19
+  DW_TAG_union_type       = 23
+  DW_TAG_subroutine_type  = 21
+  DW_TAG_inheritance      = 28
+
+The vector flag indicates that an array type is a native packed vector.
+
+The members of array types (tag = ``DW_TAG_array_type``) are
+:ref:`subrange descriptors <format_subrange>`, each
+representing the range of subscripts at that level of indexing.
+
+The members of enumeration types (tag = ``DW_TAG_enumeration_type``) are
+:ref:`enumerator descriptors <format_enumerator>`, each representing the
+definition of enumeration value for the set.  All enumeration type descriptors
+are collected inside the named metadata ``!llvm.dbg.cu``.
+
+The members of structure (tag = ``DW_TAG_structure_type``) or union (tag =
+``DW_TAG_union_type``) types are any one of the :ref:`basic
+<format_basic_type>`, :ref:`derived <format_derived_type>` or :ref:`composite
+<format_composite_type>` type descriptors, each representing a field member of
+the structure or union.
+
+For C++ classes (tag = ``DW_TAG_structure_type``), member descriptors provide
+information about base classes, static members and member functions.  If a
+member is a :ref:`derived type descriptor <format_derived_type>` and has a tag
+of ``DW_TAG_inheritance``, then the type represents a base class.  If the member
+of is a :ref:`global variable descriptor <format_global_variables>` then it
+represents a static member.  And, if the member is a :ref:`subprogram
+descriptor <format_subprograms>` then it represents a member function.  For
+static members and member functions, ``getName()`` returns the members link or
+the C++ mangled name.  ``getDisplayName()`` the simplied version of the name.
+
+The first member of subroutine (tag = ``DW_TAG_subroutine_type``) type elements
+is the return type for the subroutine.  The remaining elements are the formal
+arguments to the subroutine.
+
+:ref:`Composite type <format_composite_type>` location can be determined from
+the context and line number.  The size, alignment and offset are expressed in
+bits and can be 64 bit values.  The alignment is used to round the offset when
+embedded in a :ref:`composite type <format_composite_type>` (as an example, to
+keep float doubles on 64 bit boundaries).  The offset is the bit offset if
+embedded in a :ref:`composite type <format_composite_type>`.
+
+.. _format_subrange:
+
+Subrange descriptors
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !42 = metadata !{
+    i32,    ;; Tag = 33 + LLVMDebugVersion (DW_TAG_subrange_type)
+    i64,    ;; Low value
+    i64     ;; High value
+  }
+
+These descriptors are used to define ranges of array subscripts for an array
+:ref:`composite type <format_composite_type>`.  The low value defines the lower
+bounds typically zero for C/C++.  The high value is the upper bounds.  Values
+are 64 bit.  ``High - Low + 1`` is the size of the array.  If ``Low > High``
+the array bounds are not included in generated debugging information.
+
+.. _format_enumerator:
+
+Enumerator descriptors
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !6 = metadata !{
+    i32,      ;; Tag = 40 + LLVMDebugVersion (DW_TAG_enumerator)
+    metadata, ;; Name
+    i64       ;; Value
+  }
+
+These descriptors are used to define members of an enumeration :ref:`composite
+type <format_composite_type>`, it associates the name to the value.
+
+Local variables
+^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !7 = metadata !{
+    i32,      ;; Tag (see below)
+    metadata, ;; Context
+    metadata, ;; Name
+    metadata, ;; Reference to file where defined
+    i32,      ;; 24 bit - Line number where defined
+              ;; 8 bit - Argument number. 1 indicates 1st argument.
+    metadata, ;; Type descriptor
+    i32,      ;; flags
+    metadata  ;; (optional) Reference to inline location
+  }
+
+These descriptors are used to define variables local to a sub program.  The
+value of the tag depends on the usage of the variable:
+
+.. code-block:: llvm
+
+  DW_TAG_auto_variable   = 256
+  DW_TAG_arg_variable    = 257
+
+An auto variable is any variable declared in the body of the function.  An
+argument variable is any variable that appears as a formal argument to the
+function.
+
+The context is either the subprogram or block where the variable is defined.
+Name the source variable name.  Context and line indicate where the variable
+was defined.  Type descriptor defines the declared type of the variable.
+
+.. _format_common_intrinsics:
+
+Debugger intrinsic functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+LLVM uses several intrinsic functions (name prefixed with "``llvm.dbg``") to
+provide debug information at various points in generated code.
+
+``llvm.dbg.declare``
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  void %llvm.dbg.declare(metadata, metadata)
+
+This intrinsic provides information about a local element (e.g., variable).
+The first argument is metadata holding the alloca for the variable.  The second
+argument is metadata containing a description of the variable.
+
+``llvm.dbg.value``
+^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  void %llvm.dbg.value(metadata, i64, metadata)
+
+This intrinsic provides information when a user source variable is set to a new
+value.  The first argument is the new value (wrapped as metadata).  The second
+argument is the offset in the user source variable where the new value is
+written.  The third argument is metadata containing a description of the user
+source variable.
+
+Object lifetimes and scoping
+============================
+
+In many languages, the local variables in functions can have their lifetimes or
+scopes limited to a subset of a function.  In the C family of languages, for
+example, variables are only live (readable and writable) within the source
+block that they are defined in.  In functional languages, values are only
+readable after they have been defined.  Though this is a very obvious concept,
+it is non-trivial to model in LLVM, because it has no notion of scoping in this
+sense, and does not want to be tied to a language's scoping rules.
+
+In order to handle this, the LLVM debug format uses the metadata attached to
+llvm instructions to encode line number and scoping information.  Consider the
+following C fragment, for example:
+
+.. code-block:: c
+
+  1.  void foo() {
+  2.    int X = 21;
+  3.    int Y = 22;
+  4.    {
+  5.      int Z = 23;
+  6.      Z = X;
+  7.    }
+  8.    X = Y;
+  9.  }
+
+Compiled to LLVM, this function would be represented like this:
+
+.. code-block:: llvm
+
+  define void @foo() nounwind ssp {
+  entry:
+    %X = alloca i32, align 4                        ; <i32*> [#uses=4]
+    %Y = alloca i32, align 4                        ; <i32*> [#uses=4]
+    %Z = alloca i32, align 4                        ; <i32*> [#uses=3]
+    %0 = bitcast i32* %X to {}*                     ; <{}*> [#uses=1]
+    call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
+    store i32 21, i32* %X, !dbg !8
+    %1 = bitcast i32* %Y to {}*                     ; <{}*> [#uses=1]
+    call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
+    store i32 22, i32* %Y, !dbg !11
+    %2 = bitcast i32* %Z to {}*                     ; <{}*> [#uses=1]
+    call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
+    store i32 23, i32* %Z, !dbg !15
+    %tmp = load i32* %X, !dbg !16                   ; <i32> [#uses=1]
+    %tmp1 = load i32* %Y, !dbg !16                  ; <i32> [#uses=1]
+    %add = add nsw i32 %tmp, %tmp1, !dbg !16        ; <i32> [#uses=1]
+    store i32 %add, i32* %Z, !dbg !16
+    %tmp2 = load i32* %Y, !dbg !17                  ; <i32> [#uses=1]
+    store i32 %tmp2, i32* %X, !dbg !17
+    ret void, !dbg !18
+  }
+
+  declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+  !0 = metadata !{i32 459008, metadata !1, metadata !"X",
+                  metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
+  !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+  !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
+                 metadata !"foo", metadata !3, i32 1, metadata !4,
+                 i1 false, i1 true}; [DW_TAG_subprogram ]
+  !3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
+                  metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
+                  i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+  !4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
+                  i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+  !5 = metadata !{null}
+  !6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
+                  i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+  !7 = metadata !{i32 2, i32 7, metadata !1, null}
+  !8 = metadata !{i32 2, i32 3, metadata !1, null}
+  !9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
+                  metadata !6}; [ DW_TAG_auto_variable ]
+  !10 = metadata !{i32 3, i32 7, metadata !1, null}
+  !11 = metadata !{i32 3, i32 3, metadata !1, null}
+  !12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
+                   metadata !6}; [ DW_TAG_auto_variable ]
+  !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+  !14 = metadata !{i32 5, i32 9, metadata !13, null}
+  !15 = metadata !{i32 5, i32 5, metadata !13, null}
+  !16 = metadata !{i32 6, i32 5, metadata !13, null}
+  !17 = metadata !{i32 8, i32 3, metadata !1, null}
+  !18 = metadata !{i32 9, i32 1, metadata !2, null}
+
+This example illustrates a few important details about LLVM debugging
+information.  In particular, it shows how the ``llvm.dbg.declare`` intrinsic and
+location information, which are attached to an instruction, are applied
+together to allow a debugger to analyze the relationship between statements,
+variable definitions, and the code used to implement the function.
+
+.. code-block:: llvm
+
+  call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
+
+The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the
+variable ``X``.  The metadata ``!dbg !7`` attached to the intrinsic provides
+scope information for the variable ``X``.
+
+.. code-block:: llvm
+
+  !7 = metadata !{i32 2, i32 7, metadata !1, null}
+  !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+  !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
+                  metadata !"foo", metadata !"foo", metadata !3, i32 1,
+                  metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
+
+Here ``!7`` is metadata providing location information.  It has four fields:
+line number, column number, scope, and original scope.  The original scope
+represents inline location if this instruction is inlined inside a caller, and
+is null otherwise.  In this example, scope is encoded by ``!1``. ``!1``
+represents a lexical block inside the scope ``!2``, where ``!2`` is a
+:ref:`subprogram descriptor <format_subprograms>`.  This way the location
+information attached to the intrinsics indicates that the variable ``X`` is
+declared at line number 2 at a function level scope in function ``foo``.
+
+Now lets take another example.
+
+.. code-block:: llvm
+
+  call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
+
+The second intrinsic ``%llvm.dbg.declare`` encodes debugging information for
+variable ``Z``.  The metadata ``!dbg !14`` attached to the intrinsic provides
+scope information for the variable ``Z``.
+
+.. code-block:: llvm
+
+  !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+  !14 = metadata !{i32 5, i32 9, metadata !13, null}
+
+Here ``!14`` indicates that ``Z`` is declared at line number 5 and
+column number 9 inside of lexical scope ``!13``.  The lexical scope itself
+resides inside of lexical scope ``!1`` described above.
+
+The scope information attached with each instruction provides a straightforward
+way to find instructions covered by a scope.
+
+.. _ccxx_frontend:
+
+C/C++ front-end specific debug information
+==========================================
+
+The C and C++ front-ends represent information about the program in a format
+that is effectively identical to `DWARF 3.0
+<http://www.eagercon.com/dwarf/dwarf3std.htm>`_ in terms of information
+content.  This allows code generators to trivially support native debuggers by
+generating standard dwarf information, and contains enough information for
+non-dwarf targets to translate it as needed.
+
+This section describes the forms used to represent C and C++ programs.  Other
+languages could pattern themselves after this (which itself is tuned to
+representing programs in the same way that DWARF 3 does), or they could choose
+to provide completely different forms if they don't fit into the DWARF model.
+As support for debugging information gets added to the various LLVM
+source-language front-ends, the information used should be documented here.
+
+The following sections provide examples of various C/C++ constructs and the
+debug information that would best describe those constructs.
+
+C/C++ source file information
+-----------------------------
+
+Given the source files ``MySource.cpp`` and ``MyHeader.h`` located in the
+directory ``/Users/mine/sources``, the following code:
+
+.. code-block:: c
+
+  #include "MyHeader.h"
+
+  int main(int argc, char *argv[]) {
+    return 0;
+  }
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+  ...
+  ;;
+  ;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp".
+  ;;
+  !2 = metadata !{
+    i32 524305,    ;; Tag
+    i32 0,         ;; Unused
+    i32 4,         ;; Language Id
+    metadata !"MySource.cpp",
+    metadata !"/Users/mine/sources",
+    metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
+    i1 true,       ;; Main Compile Unit
+    i1 false,      ;; Optimized compile unit
+    metadata !"",  ;; Compiler flags
+    i32 0}         ;; Runtime version
+
+  ;;
+  ;; Define the file for the file "/Users/mine/sources/MySource.cpp".
+  ;;
+  !1 = metadata !{
+    i32 524329,    ;; Tag
+    metadata !"MySource.cpp",
+    metadata !"/Users/mine/sources",
+    metadata !2    ;; Compile unit
+  }
+
+  ;;
+  ;; Define the file for the file "/Users/mine/sources/Myheader.h"
+  ;;
+  !3 = metadata !{
+    i32 524329,    ;; Tag
+    metadata !"Myheader.h"
+    metadata !"/Users/mine/sources",
+    metadata !2    ;; Compile unit
+  }
+
+  ...
+
+``llvm::Instruction`` provides easy access to metadata attached with an
+instruction.  One can extract line number information encoded in LLVM IR using
+``Instruction::getMetadata()`` and ``DILocation::getLineNumber()``.
+
+.. code-block:: c++
+
+  if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
+    DILocation Loc(N);                      // DILocation is in DebugInfo.h
+    unsigned Line = Loc.getLineNumber();
+    StringRef File = Loc.getFilename();
+    StringRef Dir = Loc.getDirectory();
+  }
+
+C/C++ global variable information
+---------------------------------
+
+Given an integer global variable declared as follows:
+
+.. code-block:: c
+
+  int MyGlobal = 100;
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+  ;;
+  ;; Define the global itself.
+  ;;
+  %MyGlobal = global int 100
+  ...
+  ;;
+  ;; List of debug info of globals
+  ;;
+  !llvm.dbg.cu = !{!0}
+
+  ;; Define the compile unit.
+  !0 = metadata !{
+    i32 786449,                       ;; Tag
+    i32 0,                            ;; Context
+    i32 4,                            ;; Language
+    metadata !"foo.cpp",              ;; File
+    metadata !"/Volumes/Data/tmp",    ;; Directory
+    metadata !"clang version 3.1 ",   ;; Producer
+    i1 true,                          ;; Deprecated field
+    i1 false,                         ;; "isOptimized"?
+    metadata !"",                     ;; Flags
+    i32 0,                            ;; Runtime Version
+    metadata !1,                      ;; Enum Types
+    metadata !1,                      ;; Retained Types
+    metadata !1,                      ;; Subprograms
+    metadata !3                       ;; Global Variables
+  } ; [ DW_TAG_compile_unit ]
+
+  ;; The Array of Global Variables
+  !3 = metadata !{
+    metadata !4
+  }
+
+  !4 = metadata !{
+    metadata !5
+  }
+
+  ;;
+  ;; Define the global variable itself.
+  ;;
+  !5 = metadata !{
+    i32 786484,                        ;; Tag
+    i32 0,                             ;; Unused
+    null,                              ;; Unused
+    metadata !"MyGlobal",              ;; Name
+    metadata !"MyGlobal",              ;; Display Name
+    metadata !"",                      ;; Linkage Name
+    metadata !6,                       ;; File
+    i32 1,                             ;; Line
+    metadata !7,                       ;; Type
+    i32 0,                             ;; IsLocalToUnit
+    i32 1,                             ;; IsDefinition
+    i32* @MyGlobal                     ;; LLVM-IR Value
+  } ; [ DW_TAG_variable ]
+
+  ;;
+  ;; Define the file
+  ;;
+  !6 = metadata !{
+    i32 786473,                        ;; Tag
+    metadata !"foo.cpp",               ;; File
+    metadata !"/Volumes/Data/tmp",     ;; Directory
+    null                               ;; Unused
+  } ; [ DW_TAG_file_type ]
+
+  ;;
+  ;; Define the type
+  ;;
+  !7 = metadata !{
+    i32 786468,                         ;; Tag
+    null,                               ;; Unused
+    metadata !"int",                    ;; Name
+    null,                               ;; Unused
+    i32 0,                              ;; Line
+    i64 32,                             ;; Size in Bits
+    i64 32,                             ;; Align in Bits
+    i64 0,                              ;; Offset
+    i32 0,                              ;; Flags
+    i32 5                               ;; Encoding
+  } ; [ DW_TAG_base_type ]
+
+C/C++ function information
+--------------------------
+
+Given a function declared as follows:
+
+.. code-block:: c
+
+  int main(int argc, char *argv[]) {
+    return 0;
+  }
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+  ;;
+  ;; Define the anchor for subprograms.  Note that the second field of the
+  ;; anchor is 46, which is the same as the tag for subprograms
+  ;; (46 = DW_TAG_subprogram.)
+  ;;
+  !6 = metadata !{
+    i32 524334,        ;; Tag
+    i32 0,             ;; Unused
+    metadata !1,       ;; Context
+    metadata !"main",  ;; Name
+    metadata !"main",  ;; Display name
+    metadata !"main",  ;; Linkage name
+    metadata !1,       ;; File
+    i32 1,             ;; Line number
+    metadata !4,       ;; Type
+    i1 false,          ;; Is local
+    i1 true,           ;; Is definition
+    i32 0,             ;; Virtuality attribute, e.g. pure virtual function
+    i32 0,             ;; Index into virtual table for C++ methods
+    i32 0,             ;; Type that holds virtual table.
+    i32 0,             ;; Flags
+    i1 false,          ;; True if this function is optimized
+    Function *,        ;; Pointer to llvm::Function
+    null               ;; Function template parameters
+  }
+  ;;
+  ;; Define the subprogram itself.
+  ;;
+  define i32 @main(i32 %argc, i8** %argv) {
+  ...
+  }
+
+C/C++ basic types
+-----------------
+
+The following are the basic type descriptors for C/C++ core types:
+
+bool
+^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"bool",  ;; Name
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 8,             ;; Size in Bits
+    i64 8,             ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 2              ;; Encoding
+  }
+
+char
+^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"char",  ;; Name
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 8,             ;; Size in Bits
+    i64 8,             ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 6              ;; Encoding
+  }
+
+unsigned char
+^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"unsigned char",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 8,             ;; Size in Bits
+    i64 8,             ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 8              ;; Encoding
+  }
+
+short
+^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"short int",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 16,            ;; Size in Bits
+    i64 16,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 5              ;; Encoding
+  }
+
+unsigned short
+^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"short unsigned int",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 16,            ;; Size in Bits
+    i64 16,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 7              ;; Encoding
+  }
+
+int
+^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"int",   ;; Name
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 32,            ;; Size in Bits
+    i64 32,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 5              ;; Encoding
+  }
+
+unsigned int
+^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"unsigned int",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 32,            ;; Size in Bits
+    i64 32,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 7              ;; Encoding
+  }
+
+long long
+^^^^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"long long int",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 64,            ;; Size in Bits
+    i64 64,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 5              ;; Encoding
+  }
+
+unsigned long long
+^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"long long unsigned int",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 64,            ;; Size in Bits
+    i64 64,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 7              ;; Encoding
+  }
+
+float
+^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"float",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 32,            ;; Size in Bits
+    i64 32,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 4              ;; Encoding
+  }
+
+double
+^^^^^^
+
+.. code-block:: llvm
+
+  !2 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"double",;; Name
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 64,            ;; Size in Bits
+    i64 64,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 4              ;; Encoding
+  }
+
+C/C++ derived types
+-------------------
+
+Given the following as an example of C/C++ derived type:
+
+.. code-block:: c
+
+  typedef const int *IntPtr;
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+  ;;
+  ;; Define the typedef "IntPtr".
+  ;;
+  !2 = metadata !{
+    i32 524310,          ;; Tag
+    metadata !1,         ;; Context
+    metadata !"IntPtr",  ;; Name
+    metadata !3,         ;; File
+    i32 0,               ;; Line number
+    i64 0,               ;; Size in bits
+    i64 0,               ;; Align in bits
+    i64 0,               ;; Offset in bits
+    i32 0,               ;; Flags
+    metadata !4          ;; Derived From type
+  }
+  ;;
+  ;; Define the pointer type.
+  ;;
+  !4 = metadata !{
+    i32 524303,          ;; Tag
+    metadata !1,         ;; Context
+    metadata !"",        ;; Name
+    metadata !1,         ;; File
+    i32 0,               ;; Line number
+    i64 64,              ;; Size in bits
+    i64 64,              ;; Align in bits
+    i64 0,               ;; Offset in bits
+    i32 0,               ;; Flags
+    metadata !5          ;; Derived From type
+  }
+  ;;
+  ;; Define the const type.
+  ;;
+  !5 = metadata !{
+    i32 524326,          ;; Tag
+    metadata !1,         ;; Context
+    metadata !"",        ;; Name
+    metadata !1,         ;; File
+    i32 0,               ;; Line number
+    i64 32,              ;; Size in bits
+    i64 32,              ;; Align in bits
+    i64 0,               ;; Offset in bits
+    i32 0,               ;; Flags
+    metadata !6          ;; Derived From type
+  }
+  ;;
+  ;; Define the int type.
+  ;;
+  !6 = metadata !{
+    i32 524324,          ;; Tag
+    metadata !1,         ;; Context
+    metadata !"int",     ;; Name
+    metadata !1,         ;; File
+    i32 0,               ;; Line number
+    i64 32,              ;; Size in bits
+    i64 32,              ;; Align in bits
+    i64 0,               ;; Offset in bits
+    i32 0,               ;; Flags
+    5                    ;; Encoding
+  }
+
+C/C++ struct/union types
+------------------------
+
+Given the following as an example of C/C++ struct type:
+
+.. code-block:: c
+
+  struct Color {
+    unsigned Red;
+    unsigned Green;
+    unsigned Blue;
+  };
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+  ;;
+  ;; Define basic type for unsigned int.
+  ;;
+  !5 = metadata !{
+    i32 524324,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"unsigned int",
+    metadata !1,       ;; File
+    i32 0,             ;; Line number
+    i64 32,            ;; Size in Bits
+    i64 32,            ;; Align in Bits
+    i64 0,             ;; Offset in Bits
+    i32 0,             ;; Flags
+    i32 7              ;; Encoding
+  }
+  ;;
+  ;; Define composite type for struct Color.
+  ;;
+  !2 = metadata !{
+    i32 524307,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"Color", ;; Name
+    metadata !1,       ;; Compile unit
+    i32 1,             ;; Line number
+    i64 96,            ;; Size in bits
+    i64 32,            ;; Align in bits
+    i64 0,             ;; Offset in bits
+    i32 0,             ;; Flags
+    null,              ;; Derived From
+    metadata !3,       ;; Elements
+    i32 0              ;; Runtime Language
+  }
+
+  ;;
+  ;; Define the Red field.
+  ;;
+  !4 = metadata !{
+    i32 524301,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"Red",   ;; Name
+    metadata !1,       ;; File
+    i32 2,             ;; Line number
+    i64 32,            ;; Size in bits
+    i64 32,            ;; Align in bits
+    i64 0,             ;; Offset in bits
+    i32 0,             ;; Flags
+    metadata !5        ;; Derived From type
+  }
+
+  ;;
+  ;; Define the Green field.
+  ;;
+  !6 = metadata !{
+    i32 524301,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"Green", ;; Name
+    metadata !1,       ;; File
+    i32 3,             ;; Line number
+    i64 32,            ;; Size in bits
+    i64 32,            ;; Align in bits
+    i64 32,             ;; Offset in bits
+    i32 0,             ;; Flags
+    metadata !5        ;; Derived From type
+  }
+
+  ;;
+  ;; Define the Blue field.
+  ;;
+  !7 = metadata !{
+    i32 524301,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"Blue",  ;; Name
+    metadata !1,       ;; File
+    i32 4,             ;; Line number
+    i64 32,            ;; Size in bits
+    i64 32,            ;; Align in bits
+    i64 64,             ;; Offset in bits
+    i32 0,             ;; Flags
+    metadata !5        ;; Derived From type
+  }
+
+  ;;
+  ;; Define the array of fields used by the composite type Color.
+  ;;
+  !3 = metadata !{metadata !4, metadata !6, metadata !7}
+
+C/C++ enumeration types
+-----------------------
+
+Given the following as an example of C/C++ enumeration type:
+
+.. code-block:: c
+
+  enum Trees {
+    Spruce = 100,
+    Oak = 200,
+    Maple = 300
+  };
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+  ;;
+  ;; Define composite type for enum Trees
+  ;;
+  !2 = metadata !{
+    i32 524292,        ;; Tag
+    metadata !1,       ;; Context
+    metadata !"Trees", ;; Name
+    metadata !1,       ;; File
+    i32 1,             ;; Line number
+    i64 32,            ;; Size in bits
+    i64 32,            ;; Align in bits
+    i64 0,             ;; Offset in bits
+    i32 0,             ;; Flags
+    null,              ;; Derived From type
+    metadata !3,       ;; Elements
+    i32 0              ;; Runtime language
+  }
+
+  ;;
+  ;; Define the array of enumerators used by composite type Trees.
+  ;;
+  !3 = metadata !{metadata !4, metadata !5, metadata !6}
+
+  ;;
+  ;; Define Spruce enumerator.
+  ;;
+  !4 = metadata !{i32 524328, metadata !"Spruce", i64 100}
+
+  ;;
+  ;; Define Oak enumerator.
+  ;;
+  !5 = metadata !{i32 524328, metadata !"Oak", i64 200}
+
+  ;;
+  ;; Define Maple enumerator.
+  ;;
+  !6 = metadata !{i32 524328, metadata !"Maple", i64 300}
+
+Debugging information format
+============================
+
+Debugging Information Extension for Objective C Properties
+----------------------------------------------------------
+
+Introduction
+^^^^^^^^^^^^
+
+Objective C provides a simpler way to declare and define accessor methods using
+declared properties.  The language provides features to declare a property and
+to let compiler synthesize accessor methods.
+
+The debugger lets developer inspect Objective C interfaces and their instance
+variables and class variables.  However, the debugger does not know anything
+about the properties defined in Objective C interfaces.  The debugger consumes
+information generated by compiler in DWARF format.  The format does not support
+encoding of Objective C properties.  This proposal describes DWARF extensions to
+encode Objective C properties, which the debugger can use to let developers
+inspect Objective C properties.
+
+Proposal
+^^^^^^^^
+
+Objective C properties exist separately from class members.  A property can be
+defined only by "setter" and "getter" selectors, and be calculated anew on each
+access.  Or a property can just be a direct access to some declared ivar.
+Finally it can have an ivar "automatically synthesized" for it by the compiler,
+in which case the property can be referred to in user code directly using the
+standard C dereference syntax as well as through the property "dot" syntax, but
+there is no entry in the ``@interface`` declaration corresponding to this ivar.
+
+To facilitate debugging, these properties we will add a new DWARF TAG into the
+``DW_TAG_structure_type`` definition for the class to hold the description of a
+given property, and a set of DWARF attributes that provide said description.
+The property tag will also contain the name and declared type of the property.
+
+If there is a related ivar, there will also be a DWARF property attribute placed
+in the ``DW_TAG_member`` DIE for that ivar referring back to the property TAG
+for that property.  And in the case where the compiler synthesizes the ivar
+directly, the compiler is expected to generate a ``DW_TAG_member`` for that
+ivar (with the ``DW_AT_artificial`` set to 1), whose name will be the name used
+to access this ivar directly in code, and with the property attribute pointing
+back to the property it is backing.
+
+The following examples will serve as illustration for our discussion:
+
+.. code-block:: objc
+
+  @interface I1 {
+    int n2;
+  }
+
+  @property int p1;
+  @property int p2;
+  @end
+
+  @implementation I1
+  @synthesize p1;
+  @synthesize p2 = n2;
+  @end
+
+This produces the following DWARF (this is a "pseudo dwarfdump" output):
+
+.. code-block:: none
+
+  0x00000100:  TAG_structure_type [7] *
+                 AT_APPLE_runtime_class( 0x10 )
+                 AT_name( "I1" )
+                 AT_decl_file( "Objc_Property.m" )
+                 AT_decl_line( 3 )
+
+  0x00000110    TAG_APPLE_property
+                  AT_name ( "p1" )
+                  AT_type ( {0x00000150} ( int ) )
+
+  0x00000120:   TAG_APPLE_property
+                  AT_name ( "p2" )
+                  AT_type ( {0x00000150} ( int ) )
+
+  0x00000130:   TAG_member [8]
+                  AT_name( "_p1" )
+                  AT_APPLE_property ( {0x00000110} "p1" )
+                  AT_type( {0x00000150} ( int ) )
+                  AT_artificial ( 0x1 )
+
+  0x00000140:    TAG_member [8]
+                   AT_name( "n2" )
+                   AT_APPLE_property ( {0x00000120} "p2" )
+                   AT_type( {0x00000150} ( int ) )
+
+  0x00000150:  AT_type( ( int ) )
+
+Note, the current convention is that the name of the ivar for an
+auto-synthesized property is the name of the property from which it derives
+with an underscore prepended, as is shown in the example.  But we actually
+don't need to know this convention, since we are given the name of the ivar
+directly.
+
+Also, it is common practice in ObjC to have different property declarations in
+the @interface and @implementation - e.g. to provide a read-only property in
+the interface,and a read-write interface in the implementation.  In that case,
+the compiler should emit whichever property declaration will be in force in the
+current translation unit.
+
+Developers can decorate a property with attributes which are encoded using
+``DW_AT_APPLE_property_attribute``.
+
+.. code-block:: objc
+
+  @property (readonly, nonatomic) int pr;
+
+.. code-block:: none
+
+  TAG_APPLE_property [8]
+    AT_name( "pr" )
+    AT_type ( {0x00000147} (int) )
+    AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
+
+The setter and getter method names are attached to the property using
+``DW_AT_APPLE_property_setter`` and ``DW_AT_APPLE_property_getter`` attributes.
+
+.. code-block:: objc
+
+  @interface I1
+  @property (setter=myOwnP3Setter:) int p3;
+  -(void)myOwnP3Setter:(int)a;
+  @end
+
+  @implementation I1
+  @synthesize p3;
+  -(void)myOwnP3Setter:(int)a{ }
+  @end
+
+The DWARF for this would be:
+
+.. code-block:: none
+
+  0x000003bd: TAG_structure_type [7] *
+                AT_APPLE_runtime_class( 0x10 )
+                AT_name( "I1" )
+                AT_decl_file( "Objc_Property.m" )
+                AT_decl_line( 3 )
+
+  0x000003cd      TAG_APPLE_property
+                    AT_name ( "p3" )
+                    AT_APPLE_property_setter ( "myOwnP3Setter:" )
+                    AT_type( {0x00000147} ( int ) )
+
+  0x000003f3:     TAG_member [8]
+                    AT_name( "_p3" )
+                    AT_type ( {0x00000147} ( int ) )
+                    AT_APPLE_property ( {0x000003cd} )
+                    AT_artificial ( 0x1 )
+
+New DWARF Tags
+^^^^^^^^^^^^^^
+
++-----------------------+--------+
+| TAG                   | Value  |
++=======================+========+
+| DW_TAG_APPLE_property | 0x4200 |
++-----------------------+--------+
+
+New DWARF Attributes
+^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------+--------+-----------+
+| Attribute                      | Value  | Classes   |
++================================+========+===========+
+| DW_AT_APPLE_property           | 0x3fed | Reference |
++--------------------------------+--------+-----------+
+| DW_AT_APPLE_property_getter    | 0x3fe9 | String    |
++--------------------------------+--------+-----------+
+| DW_AT_APPLE_property_setter    | 0x3fea | String    |
++--------------------------------+--------+-----------+
+| DW_AT_APPLE_property_attribute | 0x3feb | Constant  |
++--------------------------------+--------+-----------+
+
+New DWARF Constants
+^^^^^^^^^^^^^^^^^^^
+
++--------------------------------+-------+
+| Name                           | Value |
++================================+=======+
+| DW_AT_APPLE_PROPERTY_readonly  | 0x1   |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_readwrite | 0x2   |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_assign    | 0x4   |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_retain    | 0x8   |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_copy      | 0x10  |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_nonatomic | 0x20  |
++--------------------------------+-------+
+
+Name Accelerator Tables
+-----------------------
+
+Introduction
+^^^^^^^^^^^^
+
+The "``.debug_pubnames``" and "``.debug_pubtypes``" formats are not what a
+debugger needs.  The "``pub``" in the section name indicates that the entries
+in the table are publicly visible names only.  This means no static or hidden
+functions show up in the "``.debug_pubnames``".  No static variables or private
+class variables are in the "``.debug_pubtypes``".  Many compilers add different
+things to these tables, so we can't rely upon the contents between gcc, icc, or
+clang.
+
+The typical query given by users tends not to match up with the contents of
+these tables.  For example, the DWARF spec states that "In the case of the name
+of a function member or static data member of a C++ structure, class or union,
+the name presented in the "``.debug_pubnames``" section is not the simple name
+given by the ``DW_AT_name attribute`` of the referenced debugging information
+entry, but rather the fully qualified name of the data or function member."
+So the only names in these tables for complex C++ entries is a fully
+qualified name.  Debugger users tend not to enter their search strings as
+"``a::b::c(int,const Foo&) const``", but rather as "``c``", "``b::c``" , or
+"``a::b::c``".  So the name entered in the name table must be demangled in
+order to chop it up appropriately and additional names must be manually entered
+into the table to make it effective as a name lookup table for debuggers to
+se.
+
+All debuggers currently ignore the "``.debug_pubnames``" table as a result of
+its inconsistent and useless public-only name content making it a waste of
+space in the object file.  These tables, when they are written to disk, are not
+sorted in any way, leaving every debugger to do its own parsing and sorting.
+These tables also include an inlined copy of the string values in the table
+itself making the tables much larger than they need to be on disk, especially
+for large C++ programs.
+
+Can't we just fix the sections by adding all of the names we need to this
+table? No, because that is not what the tables are defined to contain and we
+won't know the difference between the old bad tables and the new good tables.
+At best we could make our own renamed sections that contain all of the data we
+need.
+
+These tables are also insufficient for what a debugger like LLDB needs.  LLDB
+uses clang for its expression parsing where LLDB acts as a PCH.  LLDB is then
+often asked to look for type "``foo``" or namespace "``bar``", or list items in
+namespace "``baz``".  Namespaces are not included in the pubnames or pubtypes
+tables.  Since clang asks a lot of questions when it is parsing an expression,
+we need to be very fast when looking up names, as it happens a lot.  Having new
+accelerator tables that are optimized for very quick lookups will benefit this
+type of debugging experience greatly.
+
+We would like to generate name lookup tables that can be mapped into memory
+from disk, and used as is, with little or no up-front parsing.  We would also
+be able to control the exact content of these different tables so they contain
+exactly what we need.  The Name Accelerator Tables were designed to fix these
+issues.  In order to solve these issues we need to:
+
+* Have a format that can be mapped into memory from disk and used as is
+* Lookups should be very fast
+* Extensible table format so these tables can be made by many producers
+* Contain all of the names needed for typical lookups out of the box
+* Strict rules for the contents of tables
+
+Table size is important and the accelerator table format should allow the reuse
+of strings from common string tables so the strings for the names are not
+duplicated.  We also want to make sure the table is ready to be used as-is by
+simply mapping the table into memory with minimal header parsing.
+
+The name lookups need to be fast and optimized for the kinds of lookups that
+debuggers tend to do.  Optimally we would like to touch as few parts of the
+mapped table as possible when doing a name lookup and be able to quickly find
+the name entry we are looking for, or discover there are no matches.  In the
+case of debuggers we optimized for lookups that fail most of the time.
+
+Each table that is defined should have strict rules on exactly what is in the
+accelerator tables and documented so clients can rely on the content.
+
+Hash Tables
+^^^^^^^^^^^
+
+Standard Hash Tables
+""""""""""""""""""""
+
+Typical hash tables have a header, buckets, and each bucket points to the
+bucket contents:
+
+.. code-block:: none
+
+  .------------.
+  |  HEADER    |
+  |------------|
+  |  BUCKETS   |
+  |------------|
+  |  DATA      |
+  `------------'
+
+The BUCKETS are an array of offsets to DATA for each hash:
+
+.. code-block:: none
+
+  .------------.
+  | 0x00001000 | BUCKETS[0]
+  | 0x00002000 | BUCKETS[1]
+  | 0x00002200 | BUCKETS[2]
+  | 0x000034f0 | BUCKETS[3]
+  |            | ...
+  | 0xXXXXXXXX | BUCKETS[n_buckets]
+  '------------'
+
+So for ``bucket[3]`` in the example above, we have an offset into the table
+0x000034f0 which points to a chain of entries for the bucket.  Each bucket must
+contain a next pointer, full 32 bit hash value, the string itself, and the data
+for the current string value.
+
+.. code-block:: none
+
+              .------------.
+  0x000034f0: | 0x00003500 | next pointer
+              | 0x12345678 | 32 bit hash
+              | "erase"    | string value
+              | data[n]    | HashData for this bucket
+              |------------|
+  0x00003500: | 0x00003550 | next pointer
+              | 0x29273623 | 32 bit hash
+              | "dump"     | string value
+              | data[n]    | HashData for this bucket
+              |------------|
+  0x00003550: | 0x00000000 | next pointer
+              | 0x82638293 | 32 bit hash
+              | "main"     | string value
+              | data[n]    | HashData for this bucket
+              `------------'
+
+The problem with this layout for debuggers is that we need to optimize for the
+negative lookup case where the symbol we're searching for is not present.  So
+if we were to lookup "``printf``" in the table above, we would make a 32 hash
+for "``printf``", it might match ``bucket[3]``.  We would need to go to the
+offset 0x000034f0 and start looking to see if our 32 bit hash matches.  To do
+so, we need to read the next pointer, then read the hash, compare it, and skip
+to the next bucket.  Each time we are skipping many bytes in memory and
+touching new cache pages just to do the compare on the full 32 bit hash.  All
+of these accesses then tell us that we didn't have a match.
+
+Name Hash Tables
+""""""""""""""""
+
+To solve the issues mentioned above we have structured the hash tables a bit
+differently: a header, buckets, an array of all unique 32 bit hash values,
+followed by an array of hash value data offsets, one for each hash value, then
+the data for all hash values:
+
+.. code-block:: none
+
+  .-------------.
+  |  HEADER     |
+  |-------------|
+  |  BUCKETS    |
+  |-------------|
+  |  HASHES     |
+  |-------------|
+  |  OFFSETS    |
+  |-------------|
+  |  DATA       |
+  `-------------'
+
+The ``BUCKETS`` in the name tables are an index into the ``HASHES`` array.  By
+making all of the full 32 bit hash values contiguous in memory, we allow
+ourselves to efficiently check for a match while touching as little memory as
+possible.  Most often checking the 32 bit hash values is as far as the lookup
+goes.  If it does match, it usually is a match with no collisions.  So for a
+table with "``n_buckets``" buckets, and "``n_hashes``" unique 32 bit hash
+values, we can clarify the contents of the ``BUCKETS``, ``HASHES`` and
+``OFFSETS`` as:
+
+.. code-block:: none
+
+  .-------------------------.
+  |  HEADER.magic           | uint32_t
+  |  HEADER.version         | uint16_t
+  |  HEADER.hash_function   | uint16_t
+  |  HEADER.bucket_count    | uint32_t
+  |  HEADER.hashes_count    | uint32_t
+  |  HEADER.header_data_len | uint32_t
+  |  HEADER_DATA            | HeaderData
+  |-------------------------|
+  |  BUCKETS                | uint32_t[n_buckets] // 32 bit hash indexes
+  |-------------------------|
+  |  HASHES                 | uint32_t[n_hashes] // 32 bit hash values
+  |-------------------------|
+  |  OFFSETS                | uint32_t[n_hashes] // 32 bit offsets to hash value data
+  |-------------------------|
+  |  ALL HASH DATA          |
+  `-------------------------'
+
+So taking the exact same data from the standard hash example above we end up
+with:
+
+.. code-block:: none
+
+              .------------.
+              | HEADER     |
+              |------------|
+              |          0 | BUCKETS[0]
+              |          2 | BUCKETS[1]
+              |          5 | BUCKETS[2]
+              |          6 | BUCKETS[3]
+              |            | ...
+              |        ... | BUCKETS[n_buckets]
+              |------------|
+              | 0x........ | HASHES[0]
+              | 0x........ | HASHES[1]
+              | 0x........ | HASHES[2]
+              | 0x........ | HASHES[3]
+              | 0x........ | HASHES[4]
+              | 0x........ | HASHES[5]
+              | 0x12345678 | HASHES[6]    hash for BUCKETS[3]
+              | 0x29273623 | HASHES[7]    hash for BUCKETS[3]
+              | 0x82638293 | HASHES[8]    hash for BUCKETS[3]
+              | 0x........ | HASHES[9]
+              | 0x........ | HASHES[10]
+              | 0x........ | HASHES[11]
+              | 0x........ | HASHES[12]
+              | 0x........ | HASHES[13]
+              | 0x........ | HASHES[n_hashes]
+              |------------|
+              | 0x........ | OFFSETS[0]
+              | 0x........ | OFFSETS[1]
+              | 0x........ | OFFSETS[2]
+              | 0x........ | OFFSETS[3]
+              | 0x........ | OFFSETS[4]
+              | 0x........ | OFFSETS[5]
+              | 0x000034f0 | OFFSETS[6]   offset for BUCKETS[3]
+              | 0x00003500 | OFFSETS[7]   offset for BUCKETS[3]
+              | 0x00003550 | OFFSETS[8]   offset for BUCKETS[3]
+              | 0x........ | OFFSETS[9]
+              | 0x........ | OFFSETS[10]
+              | 0x........ | OFFSETS[11]
+              | 0x........ | OFFSETS[12]
+              | 0x........ | OFFSETS[13]
+              | 0x........ | OFFSETS[n_hashes]
+              |------------|
+              |            |
+              |            |
+              |            |
+              |            |
+              |            |
+              |------------|
+  0x000034f0: | 0x00001203 | .debug_str ("erase")
+              | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
+              | 0x........ | HashData[0]
+              | 0x........ | HashData[1]
+              | 0x........ | HashData[2]
+              | 0x........ | HashData[3]
+              | 0x00000000 | String offset into .debug_str (terminate data for hash)
+              |------------|
+  0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
+              | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
+              | 0x........ | HashData[0]
+              | 0x........ | HashData[1]
+              | 0x00001203 | String offset into .debug_str ("dump")
+              | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
+              | 0x........ | HashData[0]
+              | 0x........ | HashData[1]
+              | 0x........ | HashData[2]
+              | 0x00000000 | String offset into .debug_str (terminate data for hash)
+              |------------|
+  0x00003550: | 0x00001203 | String offset into .debug_str ("main")
+              | 0x00000009 | A 32 bit array count - number of HashData with name "main"
+              | 0x........ | HashData[0]
+              | 0x........ | HashData[1]
+              | 0x........ | HashData[2]
+              | 0x........ | HashData[3]
+              | 0x........ | HashData[4]
+              | 0x........ | HashData[5]
+              | 0x........ | HashData[6]
+              | 0x........ | HashData[7]
+              | 0x........ | HashData[8]
+              | 0x00000000 | String offset into .debug_str (terminate data for hash)
+              `------------'
+
+So we still have all of the same data, we just organize it more efficiently for
+debugger lookup.  If we repeat the same "``printf``" lookup from above, we
+would hash "``printf``" and find it matches ``BUCKETS[3]`` by taking the 32 bit
+hash value and modulo it by ``n_buckets``.  ``BUCKETS[3]`` contains "6" which
+is the index into the ``HASHES`` table.  We would then compare any consecutive
+32 bit hashes values in the ``HASHES`` array as long as the hashes would be in
+``BUCKETS[3]``.  We do this by verifying that each subsequent hash value modulo
+``n_buckets`` is still 3.  In the case of a failed lookup we would access the
+memory for ``BUCKETS[3]``, and then compare a few consecutive 32 bit hashes
+before we know that we have no match.  We don't end up marching through
+multiple words of memory and we really keep the number of processor data cache
+lines being accessed as small as possible.
+
+The string hash that is used for these lookup tables is the Daniel J.
+Bernstein hash which is also used in the ELF ``GNU_HASH`` sections.  It is a
+very good hash for all kinds of names in programs with very few hash
+collisions.
+
+Empty buckets are designated by using an invalid hash index of ``UINT32_MAX``.
+
+Details
+^^^^^^^
+
+These name hash tables are designed to be generic where specializations of the
+table get to define additional data that goes into the header ("``HeaderData``"),
+how the string value is stored ("``KeyType``") and the content of the data for each
+hash value.
+
+Header Layout
+"""""""""""""
+
+The header has a fixed part, and the specialized part.  The exact format of the
+header is:
+
+.. code-block:: c
+
+  struct Header
+  {
+    uint32_t   magic;           // 'HASH' magic value to allow endian detection
+    uint16_t   version;         // Version number
+    uint16_t   hash_function;   // The hash function enumeration that was used
+    uint32_t   bucket_count;    // The number of buckets in this hash table
+    uint32_t   hashes_count;    // The total number of unique hash values and hash data offsets in this table
+    uint32_t   header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
+                                // Specifically the length of the following HeaderData field - this does not
+                                // include the size of the preceding fields
+    HeaderData header_data;     // Implementation specific header data
+  };
+
+The header starts with a 32 bit "``magic``" value which must be ``'HASH'``
+encoded as an ASCII integer.  This allows the detection of the start of the
+hash table and also allows the table's byte order to be determined so the table
+can be correctly extracted.  The "``magic``" value is followed by a 16 bit
+``version`` number which allows the table to be revised and modified in the
+future.  The current version number is 1. ``hash_function`` is a ``uint16_t``
+enumeration that specifies which hash function was used to produce this table.
+The current values for the hash function enumerations include:
+
+.. code-block:: c
+
+  enum HashFunctionType
+  {
+    eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
+  };
+
+``bucket_count`` is a 32 bit unsigned integer that represents how many buckets
+are in the ``BUCKETS`` array.  ``hashes_count`` is the number of unique 32 bit
+hash values that are in the ``HASHES`` array, and is the same number of offsets
+are contained in the ``OFFSETS`` array.  ``header_data_len`` specifies the size
+in bytes of the ``HeaderData`` that is filled in by specialized versions of
+this table.
+
+Fixed Lookup
+""""""""""""
+
+The header is followed by the buckets, hashes, offsets, and hash value data.
+
+.. code-block:: c
+
+  struct FixedTable
+  {
+    uint32_t buckets[Header.bucket_count];  // An array of hash indexes into the "hashes[]" array below
+    uint32_t hashes [Header.hashes_count];  // Every unique 32 bit hash for the entire table is in this table
+    uint32_t offsets[Header.hashes_count];  // An offset that corresponds to each item in the "hashes[]" array above
+  };
+
+``buckets`` is an array of 32 bit indexes into the ``hashes`` array.  The
+``hashes`` array contains all of the 32 bit hash values for all names in the
+hash table.  Each hash in the ``hashes`` table has an offset in the ``offsets``
+array that points to the data for the hash value.
+
+This table setup makes it very easy to repurpose these tables to contain
+different data, while keeping the lookup mechanism the same for all tables.
+This layout also makes it possible to save the table to disk and map it in
+later and do very efficient name lookups with little or no parsing.
+
+DWARF lookup tables can be implemented in a variety of ways and can store a lot
+of information for each name.  We want to make the DWARF tables extensible and
+able to store the data efficiently so we have used some of the DWARF features
+that enable efficient data storage to define exactly what kind of data we store
+for each name.
+
+The ``HeaderData`` contains a definition of the contents of each HashData chunk.
+We might want to store an offset to all of the debug information entries (DIEs)
+for each name.  To keep things extensible, we create a list of items, or
+Atoms, that are contained in the data for each name.  First comes the type of
+the data in each atom:
+
+.. code-block:: c
+
+  enum AtomType
+  {
+    eAtomTypeNULL       = 0u,
+    eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
+    eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
+    eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
+    eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
+    eAtomTypeTypeFlags  = 5u,   // Flags from enum TypeFlags
+  };
+
+The enumeration values and their meanings are:
+
+.. code-block:: none
+
+  eAtomTypeNULL       - a termination atom that specifies the end of the atom list
+  eAtomTypeDIEOffset  - an offset into the .debug_info section for the DWARF DIE for this name
+  eAtomTypeCUOffset   - an offset into the .debug_info section for the CU that contains the DIE
+  eAtomTypeDIETag     - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
+  eAtomTypeNameFlags  - Flags for functions and global variables (isFunction, isInlined, isExternal...)
+  eAtomTypeTypeFlags  - Flags for types (isCXXClass, isObjCClass, ...)
+
+Then we allow each atom type to define the atom type and how the data for each
+atom type data is encoded:
+
+.. code-block:: c
+
+  struct Atom
+  {
+    uint16_t type;  // AtomType enum value
+    uint16_t form;  // DWARF DW_FORM_XXX defines
+  };
+
+The ``form`` type above is from the DWARF specification and defines the exact
+encoding of the data for the Atom type.  See the DWARF specification for the
+``DW_FORM_`` definitions.
+
+.. code-block:: c
+
+  struct HeaderData
+  {
+    uint32_t die_offset_base;
+    uint32_t atom_count;
+    Atoms    atoms[atom_count0];
+  };
+
+``HeaderData`` defines the base DIE offset that should be added to any atoms
+that are encoded using the ``DW_FORM_ref1``, ``DW_FORM_ref2``,
+``DW_FORM_ref4``, ``DW_FORM_ref8`` or ``DW_FORM_ref_udata``.  It also defines
+what is contained in each ``HashData`` object -- ``Atom.form`` tells us how large
+each field will be in the ``HashData`` and the ``Atom.type`` tells us how this data
+should be interpreted.
+
+For the current implementations of the "``.apple_names``" (all functions +
+globals), the "``.apple_types``" (names of all types that are defined), and
+the "``.apple_namespaces``" (all namespaces), we currently set the ``Atom``
+array to be:
+
+.. code-block:: c
+
+  HeaderData.atom_count = 1;
+  HeaderData.atoms[0].type = eAtomTypeDIEOffset;
+  HeaderData.atoms[0].form = DW_FORM_data4;
+
+This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
+encoded as a 32 bit value (DW_FORM_data4).  This allows a single name to have
+multiple matching DIEs in a single file, which could come up with an inlined
+function for instance.  Future tables could include more information about the
+DIE such as flags indicating if the DIE is a function, method, block,
+or inlined.
+
+The KeyType for the DWARF table is a 32 bit string table offset into the
+".debug_str" table.  The ".debug_str" is the string table for the DWARF which
+may already contain copies of all of the strings.  This helps make sure, with
+help from the compiler, that we reuse the strings between all of the DWARF
+sections and keeps the hash table size down.  Another benefit to having the
+compiler generate all strings as DW_FORM_strp in the debug info, is that
+DWARF parsing can be made much faster.
+
+After a lookup is made, we get an offset into the hash data.  The hash data
+needs to be able to deal with 32 bit hash collisions, so the chunk of data
+at the offset in the hash data consists of a triple:
+
+.. code-block:: c
+
+  uint32_t str_offset
+  uint32_t hash_data_count
+  HashData[hash_data_count]
+
+If "str_offset" is zero, then the bucket contents are done. 99.9% of the
+hash data chunks contain a single item (no 32 bit hash collision):
+
+.. code-block:: none
+
+  .------------.
+  | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+  | 0x00000004 | uint32_t HashData count
+  | 0x........ | uint32_t HashData[0] DIE offset
+  | 0x........ | uint32_t HashData[1] DIE offset
+  | 0x........ | uint32_t HashData[2] DIE offset
+  | 0x........ | uint32_t HashData[3] DIE offset
+  | 0x00000000 | uint32_t KeyType (end of hash chain)
+  `------------'
+
+If there are collisions, you will have multiple valid string offsets:
+
+.. code-block:: none
+
+  .------------.
+  | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+  | 0x00000004 | uint32_t HashData count
+  | 0x........ | uint32_t HashData[0] DIE offset
+  | 0x........ | uint32_t HashData[1] DIE offset
+  | 0x........ | uint32_t HashData[2] DIE offset
+  | 0x........ | uint32_t HashData[3] DIE offset
+  | 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
+  | 0x00000002 | uint32_t HashData count
+  | 0x........ | uint32_t HashData[0] DIE offset
+  | 0x........ | uint32_t HashData[1] DIE offset
+  | 0x00000000 | uint32_t KeyType (end of hash chain)
+  `------------'
+
+Current testing with real world C++ binaries has shown that there is around 1
+32 bit hash collision per 100,000 name entries.
+
+Contents
+^^^^^^^^
+
+As we said, we want to strictly define exactly what is included in the
+different tables.  For DWARF, we have 3 tables: "``.apple_names``",
+"``.apple_types``", and "``.apple_namespaces``".
+
+"``.apple_names``" sections should contain an entry for each DWARF DIE whose
+``DW_TAG`` is a ``DW_TAG_label``, ``DW_TAG_inlined_subroutine``, or
+``DW_TAG_subprogram`` that has address attributes: ``DW_AT_low_pc``,
+``DW_AT_high_pc``, ``DW_AT_ranges`` or ``DW_AT_entry_pc``.  It also contains
+``DW_TAG_variable`` DIEs that have a ``DW_OP_addr`` in the location (global and
+static variables).  All global and static variables should be included,
+including those scoped within functions and classes.  For example using the
+following code:
+
+.. code-block:: c
+
+  static int var = 0;
+
+  void f ()
+  {
+    static int var = 0;
+  }
+
+Both of the static ``var`` variables would be included in the table.  All
+functions should emit both their full names and their basenames.  For C or C++,
+the full name is the mangled name (if available) which is usually in the
+``DW_AT_MIPS_linkage_name`` attribute, and the ``DW_AT_name`` contains the
+function basename.  If global or static variables have a mangled name in a
+``DW_AT_MIPS_linkage_name`` attribute, this should be emitted along with the
+simple name found in the ``DW_AT_name`` attribute.
+
+"``.apple_types``" sections should contain an entry for each DWARF DIE whose
+tag is one of:
+
+* DW_TAG_array_type
+* DW_TAG_class_type
+* DW_TAG_enumeration_type
+* DW_TAG_pointer_type
+* DW_TAG_reference_type
+* DW_TAG_string_type
+* DW_TAG_structure_type
+* DW_TAG_subroutine_type
+* DW_TAG_typedef
+* DW_TAG_union_type
+* DW_TAG_ptr_to_member_type
+* DW_TAG_set_type
+* DW_TAG_subrange_type
+* DW_TAG_base_type
+* DW_TAG_const_type
+* DW_TAG_constant
+* DW_TAG_file_type
+* DW_TAG_namelist
+* DW_TAG_packed_type
+* DW_TAG_volatile_type
+* DW_TAG_restrict_type
+* DW_TAG_interface_type
+* DW_TAG_unspecified_type
+* DW_TAG_shared_type
+
+Only entries with a ``DW_AT_name`` attribute are included, and the entry must
+not be a forward declaration (``DW_AT_declaration`` attribute with a non-zero
+value).  For example, using the following code:
+
+.. code-block:: c
+
+  int main ()
+  {
+    int *b = 0;
+    return *b;
+  }
+
+We get a few type DIEs:
+
+.. code-block:: none
+
+  0x00000067:     TAG_base_type [5]
+                  AT_encoding( DW_ATE_signed )
+                  AT_name( "int" )
+                  AT_byte_size( 0x04 )
+
+  0x0000006e:     TAG_pointer_type [6]
+                  AT_type( {0x00000067} ( int ) )
+                  AT_byte_size( 0x08 )
+
+The DW_TAG_pointer_type is not included because it does not have a ``DW_AT_name``.
+
+"``.apple_namespaces``" section should contain all ``DW_TAG_namespace`` DIEs.
+If we run into a namespace that has no name this is an anonymous namespace, and
+the name should be output as "``(anonymous namespace)``" (without the quotes).
+Why?  This matches the output of the ``abi::cxa_demangle()`` that is in the
+standard C++ library that demangles mangled names.
+
+
+Language Extensions and File Format Changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Objective-C Extensions
+""""""""""""""""""""""
+
+"``.apple_objc``" section should contain all ``DW_TAG_subprogram`` DIEs for an
+Objective-C class.  The name used in the hash table is the name of the
+Objective-C class itself.  If the Objective-C class has a category, then an
+entry is made for both the class name without the category, and for the class
+name with the category.  So if we have a DIE at offset 0x1234 with a name of
+method "``-[NSString(my_additions) stringWithSpecialString:]``", we would add
+an entry for "``NSString``" that points to DIE 0x1234, and an entry for
+"``NSString(my_additions)``" that points to 0x1234.  This allows us to quickly
+track down all Objective-C methods for an Objective-C class when doing
+expressions.  It is needed because of the dynamic nature of Objective-C where
+anyone can add methods to a class.  The DWARF for Objective-C methods is also
+emitted differently from C++ classes where the methods are not usually
+contained in the class definition, they are scattered about across one or more
+compile units.  Categories can also be defined in different shared libraries.
+So we need to be able to quickly find all of the methods and class functions
+given the Objective-C class name, or quickly find all methods and class
+functions for a class + category name.  This table does not contain any
+selector names, it just maps Objective-C class names (or class names +
+category) to all of the methods and class functions.  The selectors are added
+as function basenames in the "``.debug_names``" section.
+
+In the "``.apple_names``" section for Objective-C functions, the full name is
+the entire function name with the brackets ("``-[NSString
+stringWithCString:]``") and the basename is the selector only
+("``stringWithCString:``").
+
+Mach-O Changes
+""""""""""""""
+
+The sections names for the apple hash tables are for non mach-o files.  For
+mach-o files, the sections should be contained in the ``__DWARF`` segment with
+names as follows:
+
+* "``.apple_names``" -> "``__apple_names``"
+* "``.apple_types``" -> "``__apple_types``"
+* "``.apple_namespaces``" -> "``__apple_namespac``" (16 character limit)
+* "``.apple_objc``" -> "``__apple_objc``"
+
diff --git a/docs/SphinxQuickstartTemplate.rst b/docs/SphinxQuickstartTemplate.rst
index 75d916368e33..fe6e44a27cea 100644
--- a/docs/SphinxQuickstartTemplate.rst
+++ b/docs/SphinxQuickstartTemplate.rst
@@ -2,8 +2,6 @@
 Sphinx Quickstart Template
 ==========================
 
-.. sectionauthor:: Sean Silva <silvas@purdue.edu>
-
 Introduction and Quickstart
 ===========================
 
@@ -24,7 +22,8 @@ reStructuredText syntax is useful when writing the document, so the last
 ~half of this document (starting with `Example Section`_) gives examples
 which should cover 99% of use cases.
 
-Let me say that again: focus on *content*.
+Let me say that again: focus on *content*. But if you really need to verify
+Sphinx's output, see ``docs/README.txt`` for information.
 
 Once you have finished with the content, please send the ``.rst`` file to
 llvm-commits for review.
@@ -65,7 +64,7 @@ Your text can be *emphasized*, **bold**, or ``monospace``.
 
 Use blank lines to separate paragraphs.
 
-Headings (like ``Example Section`` just above) give your document
+Headings (like ``Example Section`` just above) give your document its
 structure. Use the same kind of adornments (e.g. ``======`` vs. ``------``)
 as are used in this document. The adornment must be the same length as the
 text above it. For Vim users, variations of ``yypVr=`` might be handy.
@@ -86,7 +85,7 @@ Lists can be made like this:
 
 #. This is a second list element.
 
-   #. They nest too.
+   #. Use indentation to create nested lists.
 
 You can also use unordered lists.
 
@@ -104,18 +103,54 @@ You can make blocks of code like this:
 .. code-block:: c++
 
    int main() {
-     return 0
+     return 0;
    }
 
-For a shell session, use a ``bash`` code block:
+For a shell session, use a ``console`` code block (some existing docs use
+``bash``):
 
-.. code-block:: bash
+.. code-block:: console
 
    $ echo "Goodbye cruel world!"
    $ rm -rf /
 
 If you need to show LLVM IR use the ``llvm`` code block.
 
+.. code-block:: llvm
+
+   define i32 @test1() {
+   entry:
+     ret i32 0
+   }
+
+Some other common code blocks you might need are ``c``, ``objc``, ``make``,
+and ``cmake``. If you need something beyond that, you can look at the `full
+list`_ of supported code blocks.
+
+.. _`full list`: http://pygments.org/docs/lexers/
+
+However, don't waste time fiddling with syntax highlighting when you could
+be adding meaningful content. When in doubt, show preformatted text
+without any syntax highlighting like this:
+
+::
+
+                          .
+                           +:.
+                       ..:: ::
+                    .++:+:: ::+:.:.
+                   .:+           :
+            ::.::..::            .+.
+          ..:+    ::              :
+    ......+:.                    ..
+          :++.    ..              :
+            .+:::+::              :
+            ..   . .+            ::
+                     +.:      .::+.
+                      ...+. .: .
+                         .++:..
+                          ...
+
 Hopefully you won't need to be this deep
 """"""""""""""""""""""""""""""""""""""""
 
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
deleted file mode 100644
index 4b09e7cba13d..000000000000
--- a/docs/SystemLibrary.html
+++ /dev/null
@@ -1,316 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>System Library</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>System Library</h1>
-<ul>
-  <li><a href="#abstract">Abstract</a></li>
-  <li><a href="#requirements">Keeping LLVM Portable</a>
-  <ol>
-    <li><a href="#headers">Don't Include System Headers</a></li>
-    <li><a href="#expose">Don't Expose System Headers</a></li>
-    <li><a href="#c_headers">Allow Standard C Header Files</a></li>
-    <li><a href="#cpp_headers">Allow Standard C++ Header Files</a></li>
-    <li><a href="#highlev">High-Level Interface</a></li>
-    <li><a href="#nofunc">No Exposed Functions</a></li>
-    <li><a href="#nodata">No Exposed Data</a></li>
-    <li><a href="#nodupl">No Duplicate Implementations</a></li>
-    <li><a href="#nounused">No Unused Functionality</a></li>
-    <li><a href="#virtuals">No Virtual Methods</a></li>
-    <li><a href="#softerrors">Minimize Soft Errors</a></li>
-    <li><a href="#throw_spec">No throw() Specifications</a></li>
-    <li><a href="#organization">Code Organization</a></li>
-    <li><a href="#semantics">Consistent Semantics</a></li>
-    <li><a href="#bug">Tracking Bugzilla Bug: 351</a></li>
-  </ol></li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="abstract">Abstract</a></h2>
-<div>
-  <p>This document provides some details on LLVM's System Library, located in
-  the source at <tt>lib/System</tt> and <tt>include/llvm/System</tt>. The
-  library's purpose is to shield LLVM from the differences between operating
-  systems for the few services LLVM needs from the operating system. Much of
-  LLVM is written using portability features of standard C++. However, in a few
-  areas, system dependent facilities are needed and the System Library is the
-  wrapper around those system calls.</p>
-  <p>By centralizing LLVM's use of operating system interfaces, we make it 
-  possible for the LLVM tool chain and runtime libraries to be more easily 
-  ported to new platforms since (theoretically) only <tt>lib/System</tt> needs 
-  to be ported.  This library also unclutters the rest of LLVM from #ifdef use 
-  and special cases for specific operating systems. Such uses are replaced 
-  with simple calls to the interfaces provided in <tt>include/llvm/System</tt>.
-  </p> 
-  <p>Note that the System Library is not intended to be a complete operating 
-  system wrapper (such as the Adaptive Communications Environment (ACE) or 
-  Apache Portable Runtime (APR)), but only provides the functionality necessary
-  to support LLVM.
-  <p>The System Library was written by Reid Spencer who formulated the
-  design based on similar work originating from the eXtensible Programming 
-  System (XPS). Several people helped with the effort; especially,
-  Jeff Cohen and Henrik Bach on the Win32 port.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="requirements">Keeping LLVM Portable</a>
-</h2>
-<div>
-  <p>In order to keep LLVM portable, LLVM developers should adhere to a set of
-  portability rules associated with the System Library. Adherence to these rules
-  should help the System Library achieve its goal of shielding LLVM from the
-  variations in operating system interfaces and doing so efficiently.  The 
-  following sections define the rules needed to fulfill this objective.</p>
-
-<!-- ======================================================================= -->
-<h3><a name="headers">Don't Include System Headers</a></h3>
-<div>
-  <p>Except in <tt>lib/System</tt>, no LLVM source code should directly
-  <tt>#include</tt> a system header. Care has been taken to remove all such
-  <tt>#includes</tt> from LLVM while <tt>lib/System</tt> was being
-  developed.  Specifically this means that header files like "unistd.h", 
-  "windows.h", "stdio.h", and "string.h" are forbidden to be included by LLVM 
-  source code outside the implementation of <tt>lib/System</tt>.</p>
-  <p>To obtain system-dependent functionality, existing interfaces to the system
-  found in <tt>include/llvm/System</tt> should be used. If an appropriate 
-  interface is not available, it should be added to <tt>include/llvm/System</tt>
-  and implemented in <tt>lib/System</tt> for all supported platforms.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="expose">Don't Expose System Headers</a></h3>
-<div>
-  <p>The System Library must shield LLVM from <em>all</em> system headers. To 
-  obtain system level functionality, LLVM source must 
-  <tt>#include "llvm/System/Thing.h"</tt> and nothing else. This means that 
-  <tt>Thing.h</tt> cannot expose any system header files. This protects LLVM 
-  from accidentally using system specific functionality and only allows it
-  via the <tt>lib/System</tt> interface.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="c_headers">Use Standard C Headers</a></h3>
-<div>
-  <p>The <em>standard</em> C headers (the ones beginning with "c") are allowed
-  to be exposed through the <tt>lib/System</tt> interface. These headers and 
-  the things they declare are considered to be platform agnostic. LLVM source 
-  files may include them directly or obtain their inclusion through 
-  <tt>lib/System</tt> interfaces.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="cpp_headers">Use Standard C++ Headers</a></h3>
-<div>
-  <p>The <em>standard</em> C++ headers from the standard C++ library and
-  standard template library may be exposed through the <tt>lib/System</tt>
-  interface. These headers and the things they declare are considered to be
-  platform agnostic. LLVM source files may include them or obtain their
-  inclusion through lib/System interfaces.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="highlev">High Level Interface</a></h3>
-<div>
-  <p>The entry points specified in the interface of lib/System must be aimed at 
-  completing some reasonably high level task needed by LLVM. We do not want to
-  simply wrap each operating system call. It would be preferable to wrap several
-  operating system calls that are always used in conjunction with one another by
-  LLVM.</p>
-  <p>For example, consider what is needed to execute a program, wait for it to
-  complete, and return its result code. On Unix, this involves the following
-  operating system calls: <tt>getenv, fork, execve,</tt> and <tt>wait</tt>. The
-  correct thing for lib/System to provide is a function, say
-  <tt>ExecuteProgramAndWait</tt>, that implements the functionality completely.
-  what we don't want is wrappers for the operating system calls involved.</p>
-  <p>There must <em>not</em> be a one-to-one relationship between operating
-  system calls and the System library's interface. Any such interface function
-  will be suspicious.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nounused">No Unused Functionality</a></h3>
-<div>
-  <p>There must be no functionality specified in the interface of lib/System 
-  that isn't actually used by LLVM. We're not writing a general purpose
-  operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM
-  doesn't need much. This design goal aims to keep the lib/System interface
-  small and understandable which should foster its actual use and adoption.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nodupl">No Duplicate Implementations</a></h3>
-<div>
-  <p>The implementation of a function for a given platform must be written
-  exactly once. This implies that it must be possible to apply a function's 
-  implementation to multiple operating systems if those operating systems can
-  share the same implementation. This rule applies to the set of operating
-  systems supported for a given class of operating system (e.g. Unix, Win32).
-  </p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="virtuals">No Virtual Methods</a></h3>
-<div>
-  <p>The System Library interfaces can be called quite frequently by LLVM. In
-  order to make those calls as efficient as possible, we discourage the use of
-  virtual methods. There is no need to use inheritance for implementation
-  differences, it just adds complexity. The <tt>#include</tt> mechanism works
-  just fine.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nofunc">No Exposed Functions</a></h3>
-<div>
-  <p>Any functions defined by system libraries (i.e. not defined by lib/System) 
-  must not be exposed through the lib/System interface, even if the header file 
-  for that function is not exposed. This prevents inadvertent use of system
-  specific functionality.</p>
-  <p>For example, the <tt>stat</tt> system call is notorious for having
-  variations in the data it provides. <tt>lib/System</tt> must not declare 
-  <tt>stat</tt> nor allow it to be declared. Instead it should provide its own 
-  interface to discovering information about files and directories. Those 
-  interfaces may be implemented in terms of <tt>stat</tt> but that is strictly 
-  an implementation detail. The interface provided by the System Library must
-  be implemented on all platforms (even those without <tt>stat</tt>).</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nodata">No Exposed Data</a></h3>
-<div>
-  <p>Any data defined by system libraries (i.e. not defined by lib/System) must
-  not be exposed through the lib/System interface, even if the header file for
-  that function is not exposed. As with functions, this prevents inadvertent use
-  of data that might not exist on all platforms.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="softerrors">Minimize Soft Errors</a></h3>
-<div>
-  <p>Operating system interfaces will generally provide error results for every
-  little thing that could go wrong. In almost all cases, you can divide these
-  error results into two groups: normal/good/soft and abnormal/bad/hard. That
-  is, some of the errors are simply information like "file not found", 
-  "insufficient privileges", etc. while other errors are much harder like
-  "out of space", "bad disk sector", or "system call interrupted". We'll call 
-  the first group "<i>soft</i>" errors and the second group "<i>hard</i>" 
-  errors.<p>
-  <p>lib/System must always attempt to minimize soft errors.
-  This is a design requirement because the
-  minimization of soft errors can affect the granularity and the nature of the
-  interface. In general, if you find that you're wanting to throw soft errors,
-  you must review the granularity of the interface because it is likely you're
-  trying to implement something that is too low level. The rule of thumb is to
-  provide interface functions that <em>can't</em> fail, except when faced with 
-  hard errors.</p>
-  <p>For a trivial example, suppose we wanted to add an "OpenFileForWriting" 
-  function. For many operating systems, if the file doesn't exist, attempting 
-  to open the file will produce an error.  However, lib/System should not
-  simply throw that error if it occurs because its a soft error. The problem
-  is that the interface function, OpenFileForWriting is too low level. It should
-  be OpenOrCreateFileForWriting. In the case of the soft "doesn't exist" error, 
-  this function would just create it and then open it for writing.</p>
-  <p>This design principle needs to be maintained in lib/System because it
-  avoids the propagation of soft error handling throughout the rest of LLVM.
-  Hard errors will generally just cause a termination for an LLVM tool so don't
-  be bashful about throwing them.</p>
-  <p>Rules of thumb:</p>
-  <ol>
-    <li>Don't throw soft errors, only hard errors.</li>
-    <li>If you're tempted to throw a soft error, re-think the interface.</li>
-    <li>Handle internally the most common normal/good/soft error conditions
-    so the rest of LLVM doesn't have to.</li>
-  </ol>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="throw_spec">No throw Specifications</a></h3>
-<div>
-  <p>None of the lib/System interface functions may be declared with C++ 
-  <tt>throw()</tt> specifications on them. This requirement makes sure that the
-  compiler does not insert additional exception handling code into the interface
-  functions. This is a performance consideration: lib/System functions are at
-  the bottom of many call chains and as such can be frequently called. We
-  need them to be as efficient as possible.  However, no routines in the
-  system library should actually throw exceptions.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="organization">Code Organization</a></h3>
-<div>
-  <p>Implementations of the System Library interface are separated by their
-  general class of operating system. Currently only Unix and Win32 classes are
-  defined but more could be added for other operating system classifications.
-  To distinguish which implementation to compile, the code in lib/System uses
-  the LLVM_ON_UNIX and LLVM_ON_WIN32 #defines provided via configure through the
-  llvm/Config/config.h file. Each source file in lib/System, after implementing
-  the generic (operating system independent) functionality needs to include the
-  correct implementation using a set of <tt>#if defined(LLVM_ON_XYZ)</tt> 
-  directives. For example, if we had lib/System/File.cpp, we'd expect to see in
-  that file:</p>
-  <pre><tt>
-  #if defined(LLVM_ON_UNIX)
-  #include "Unix/File.cpp"
-  #endif
-  #if defined(LLVM_ON_WIN32)
-  #include "Win32/File.cpp"
-  #endif
-  </tt></pre>
-  <p>The implementation in lib/System/Unix/File.cpp should handle all Unix
-  variants. The implementation in lib/System/Win32/File.cpp should handle all
-  Win32 variants.  What this does is quickly differentiate the basic class of 
-  operating system that will provide the implementation. The specific details
-  for a given platform must still be determined through the use of
-  <tt>#ifdef</tt>.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="semantics">Consistent Semantics</a></h3>
-<div>
-  <p>The implementation of a lib/System interface can vary drastically between
-  platforms. That's okay as long as the end result of the interface function 
-  is the same. For example, a function to create a directory is pretty straight
-  forward on all operating system. System V IPC on the other hand isn't even
-  supported on all platforms. Instead of "supporting" System V IPC, lib/System
-  should provide an interface to the basic concept of inter-process 
-  communications. The implementations might use System V IPC if that was 
-  available or named pipes, or whatever gets the job done effectively for a 
-  given operating system.  In all cases, the interface and the implementation 
-  must be semantically consistent. </p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="bug">Bug 351</a></h3>
-<div>
-  <p>See <a href="http://llvm.org/PR351">bug 351</a>
-  for further details on the progress of this work</p>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-04-19 22:20:34 +0200 (Thu, 19 Apr 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/SystemLibrary.rst b/docs/SystemLibrary.rst
new file mode 100644
index 000000000000..0d0f4fa99482
--- /dev/null
+++ b/docs/SystemLibrary.rst
@@ -0,0 +1,247 @@
+==============
+System Library
+==============
+
+Abstract
+========
+
+This document provides some details on LLVM's System Library, located in the
+source at ``lib/System`` and ``include/llvm/System``. The library's purpose is
+to shield LLVM from the differences between operating systems for the few
+services LLVM needs from the operating system. Much of LLVM is written using
+portability features of standard C++. However, in a few areas, system dependent
+facilities are needed and the System Library is the wrapper around those system
+calls.
+
+By centralizing LLVM's use of operating system interfaces, we make it possible
+for the LLVM tool chain and runtime libraries to be more easily ported to new
+platforms since (theoretically) only ``lib/System`` needs to be ported.  This
+library also unclutters the rest of LLVM from #ifdef use and special cases for
+specific operating systems. Such uses are replaced with simple calls to the
+interfaces provided in ``include/llvm/System``.
+
+Note that the System Library is not intended to be a complete operating system
+wrapper (such as the Adaptive Communications Environment (ACE) or Apache
+Portable Runtime (APR)), but only provides the functionality necessary to
+support LLVM.
+
+The System Library was written by Reid Spencer who formulated the design based
+on similar work originating from the eXtensible Programming System (XPS).
+Several people helped with the effort; especially, Jeff Cohen and Henrik Bach
+on the Win32 port.
+
+Keeping LLVM Portable
+=====================
+
+In order to keep LLVM portable, LLVM developers should adhere to a set of
+portability rules associated with the System Library. Adherence to these rules
+should help the System Library achieve its goal of shielding LLVM from the
+variations in operating system interfaces and doing so efficiently.  The
+following sections define the rules needed to fulfill this objective.
+
+Don't Include System Headers
+----------------------------
+
+Except in ``lib/System``, no LLVM source code should directly ``#include`` a
+system header. Care has been taken to remove all such ``#includes`` from LLVM
+while ``lib/System`` was being developed.  Specifically this means that header
+files like "``unistd.h``", "``windows.h``", "``stdio.h``", and "``string.h``"
+are forbidden to be included by LLVM source code outside the implementation of
+``lib/System``.
+
+To obtain system-dependent functionality, existing interfaces to the system
+found in ``include/llvm/System`` should be used. If an appropriate interface is
+not available, it should be added to ``include/llvm/System`` and implemented in
+``lib/System`` for all supported platforms.
+
+Don't Expose System Headers
+---------------------------
+
+The System Library must shield LLVM from **all** system headers. To obtain
+system level functionality, LLVM source must ``#include "llvm/System/Thing.h"``
+and nothing else. This means that ``Thing.h`` cannot expose any system header
+files. This protects LLVM from accidentally using system specific functionality
+and only allows it via the ``lib/System`` interface.
+
+Use Standard C Headers
+----------------------
+
+The **standard** C headers (the ones beginning with "c") are allowed to be
+exposed through the ``lib/System`` interface. These headers and the things they
+declare are considered to be platform agnostic. LLVM source files may include
+them directly or obtain their inclusion through ``lib/System`` interfaces.
+
+Use Standard C++ Headers
+------------------------
+
+The **standard** C++ headers from the standard C++ library and standard
+template library may be exposed through the ``lib/System`` interface. These
+headers and the things they declare are considered to be platform agnostic.
+LLVM source files may include them or obtain their inclusion through
+``lib/System`` interfaces.
+
+High Level Interface
+--------------------
+
+The entry points specified in the interface of ``lib/System`` must be aimed at
+completing some reasonably high level task needed by LLVM. We do not want to
+simply wrap each operating system call. It would be preferable to wrap several
+operating system calls that are always used in conjunction with one another by
+LLVM.
+
+For example, consider what is needed to execute a program, wait for it to
+complete, and return its result code. On Unix, this involves the following
+operating system calls: ``getenv``, ``fork``, ``execve``, and ``wait``. The
+correct thing for ``lib/System`` to provide is a function, say
+``ExecuteProgramAndWait``, that implements the functionality completely.  what
+we don't want is wrappers for the operating system calls involved.
+
+There must **not** be a one-to-one relationship between operating system
+calls and the System library's interface. Any such interface function will be
+suspicious.
+
+No Unused Functionality
+-----------------------
+
+There must be no functionality specified in the interface of ``lib/System``
+that isn't actually used by LLVM. We're not writing a general purpose operating
+system wrapper here, just enough to satisfy LLVM's needs. And, LLVM doesn't
+need much. This design goal aims to keep the ``lib/System`` interface small and
+understandable which should foster its actual use and adoption.
+
+No Duplicate Implementations
+----------------------------
+
+The implementation of a function for a given platform must be written exactly
+once. This implies that it must be possible to apply a function's
+implementation to multiple operating systems if those operating systems can
+share the same implementation. This rule applies to the set of operating
+systems supported for a given class of operating system (e.g. Unix, Win32).
+
+No Virtual Methods
+------------------
+
+The System Library interfaces can be called quite frequently by LLVM. In order
+to make those calls as efficient as possible, we discourage the use of virtual
+methods. There is no need to use inheritance for implementation differences, it
+just adds complexity. The ``#include`` mechanism works just fine.
+
+No Exposed Functions
+--------------------
+
+Any functions defined by system libraries (i.e. not defined by ``lib/System``)
+must not be exposed through the ``lib/System`` interface, even if the header
+file for that function is not exposed. This prevents inadvertent use of system
+specific functionality.
+
+For example, the ``stat`` system call is notorious for having variations in the
+data it provides. ``lib/System`` must not declare ``stat`` nor allow it to be
+declared. Instead it should provide its own interface to discovering
+information about files and directories. Those interfaces may be implemented in
+terms of ``stat`` but that is strictly an implementation detail. The interface
+provided by the System Library must be implemented on all platforms (even those
+without ``stat``).
+
+No Exposed Data
+---------------
+
+Any data defined by system libraries (i.e. not defined by ``lib/System``) must
+not be exposed through the ``lib/System`` interface, even if the header file
+for that function is not exposed. As with functions, this prevents inadvertent
+use of data that might not exist on all platforms.
+
+Minimize Soft Errors
+--------------------
+
+Operating system interfaces will generally provide error results for every
+little thing that could go wrong. In almost all cases, you can divide these
+error results into two groups: normal/good/soft and abnormal/bad/hard. That is,
+some of the errors are simply information like "file not found", "insufficient
+privileges", etc. while other errors are much harder like "out of space", "bad
+disk sector", or "system call interrupted". We'll call the first group "*soft*"
+errors and the second group "*hard*" errors.
+
+``lib/System`` must always attempt to minimize soft errors.  This is a design
+requirement because the minimization of soft errors can affect the granularity
+and the nature of the interface. In general, if you find that you're wanting to
+throw soft errors, you must review the granularity of the interface because it
+is likely you're trying to implement something that is too low level. The rule
+of thumb is to provide interface functions that **can't** fail, except when
+faced with hard errors.
+
+For a trivial example, suppose we wanted to add an "``OpenFileForWriting``"
+function. For many operating systems, if the file doesn't exist, attempting to
+open the file will produce an error.  However, ``lib/System`` should not simply
+throw that error if it occurs because its a soft error. The problem is that the
+interface function, ``OpenFileForWriting`` is too low level. It should be
+``OpenOrCreateFileForWriting``. In the case of the soft "doesn't exist" error,
+this function would just create it and then open it for writing.
+
+This design principle needs to be maintained in ``lib/System`` because it
+avoids the propagation of soft error handling throughout the rest of LLVM.
+Hard errors will generally just cause a termination for an LLVM tool so don't
+be bashful about throwing them.
+
+Rules of thumb:
+
+#. Don't throw soft errors, only hard errors.
+
+#. If you're tempted to throw a soft error, re-think the interface.
+
+#. Handle internally the most common normal/good/soft error conditions
+   so the rest of LLVM doesn't have to.
+
+No throw Specifications
+-----------------------
+
+None of the ``lib/System`` interface functions may be declared with C++
+``throw()`` specifications on them. This requirement makes sure that the
+compiler does not insert additional exception handling code into the interface
+functions. This is a performance consideration: ``lib/System`` functions are at
+the bottom of many call chains and as such can be frequently called. We need
+them to be as efficient as possible.  However, no routines in the system
+library should actually throw exceptions.
+
+Code Organization
+-----------------
+
+Implementations of the System Library interface are separated by their general
+class of operating system. Currently only Unix and Win32 classes are defined
+but more could be added for other operating system classifications.  To
+distinguish which implementation to compile, the code in ``lib/System`` uses
+the ``LLVM_ON_UNIX`` and ``LLVM_ON_WIN32`` ``#defines`` provided via configure
+through the ``llvm/Config/config.h`` file. Each source file in ``lib/System``,
+after implementing the generic (operating system independent) functionality
+needs to include the correct implementation using a set of
+``#if defined(LLVM_ON_XYZ)`` directives. For example, if we had
+``lib/System/File.cpp``, we'd expect to see in that file:
+
+.. code-block:: c++
+
+  #if defined(LLVM_ON_UNIX)
+  #include "Unix/File.cpp"
+  #endif
+  #if defined(LLVM_ON_WIN32)
+  #include "Win32/File.cpp"
+  #endif
+
+The implementation in ``lib/System/Unix/File.cpp`` should handle all Unix
+variants. The implementation in ``lib/System/Win32/File.cpp`` should handle all
+Win32 variants.  What this does is quickly differentiate the basic class of
+operating system that will provide the implementation. The specific details for
+a given platform must still be determined through the use of ``#ifdef``.
+
+Consistent Semantics
+--------------------
+
+The implementation of a ``lib/System`` interface can vary drastically between
+platforms. That's okay as long as the end result of the interface function is
+the same. For example, a function to create a directory is pretty straight
+forward on all operating system. System V IPC on the other hand isn't even
+supported on all platforms. Instead of "supporting" System V IPC,
+``lib/System`` should provide an interface to the basic concept of
+inter-process communications. The implementations might use System V IPC if
+that was available or named pipes, or whatever gets the job done effectively
+for a given operating system.  In all cases, the interface and the
+implementation must be semantically consistent.
+
diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst
new file mode 100644
index 000000000000..bd28a9031d74
--- /dev/null
+++ b/docs/TableGen/LangRef.rst
@@ -0,0 +1,383 @@
+===========================
+TableGen Language Reference
+===========================
+
+.. sectionauthor:: Sean Silva <silvas@purdue.edu>
+
+.. contents::
+   :local:
+
+.. warning::
+   This document is extremely rough. If you find something lacking, please
+   fix it, file a documentation bug, or ask about it on llvmdev.
+
+Introduction
+============
+
+This document is meant to be a normative spec about the TableGen language
+in and of itself (i.e. how to understand a given construct in terms of how
+it affects the final set of records represented by the TableGen file). If
+you are unsure if this document is really what you are looking for, please
+read :doc:`/TableGenFundamentals` first.
+
+Notation
+========
+
+The lexical and syntax notation used here is intended to imitate
+`Python's`_. In particular, for lexical definitions, the productions
+operate at the character level and there is no implied whitespace between
+elements. The syntax definitions operate at the token level, so there is
+implied whitespace between tokens.
+
+.. _`Python's`: http://docs.python.org/py3k/reference/introduction.html#notation
+
+Lexical Analysis
+================
+
+TableGen supports BCPL (``// ...``) and nestable C-style (``/* ... */``)
+comments.
+
+The following is a listing of the basic punctuation tokens::
+
+   - + [ ] { } ( ) < > : ; .  = ? #
+
+Numeric literals take one of the following forms:
+
+.. TableGen actually will lex some pretty strange sequences an interpret
+   them as numbers. What is shown here is an attempt to approximate what it
+   "should" accept.
+
+.. productionlist::
+   TokInteger: `DecimalInteger` | `HexInteger` | `BinInteger`
+   DecimalInteger: ["+" | "-"] ("0"..."9")+
+   HexInteger: "0x" ("0"..."9" | "a"..."f" | "A"..."F")+
+   BinInteger: "0b" ("0" | "1")+
+
+One aspect to note is that the :token:`DecimalInteger` token *includes* the
+``+`` or ``-``, as opposed to having ``+`` and ``-`` be unary operators as
+most languages do.
+
+TableGen has identifier-like tokens:
+
+.. productionlist::
+   ualpha: "a"..."z" | "A"..."Z" | "_"
+   TokIdentifier: ("0"..."9")* `ualpha` (`ualpha` | "0"..."9")*
+   TokVarName: "$" `ualpha` (`ualpha` |  "0"..."9")*
+
+Note that unlike most languages, TableGen allows :token:`TokIdentifier` to
+begin with a number. In case of ambiguity, a token will be interpreted as a
+numeric literal rather than an identifier.
+
+TableGen also has two string-like literals:
+
+.. productionlist::
+   TokString: '"' <non-'"' characters and C-like escapes> '"'
+   TokCodeFragment: "[{" <shortest text not containing "}]"> "}]"
+
+.. note::
+   The current implementation accepts the following C-like escapes::
+
+      \\ \' \" \t \n
+
+TableGen also has the following keywords::
+
+   bit   bits      class   code         dag
+   def   foreach   defm    field        in
+   int   let       list    multiclass   string
+
+TableGen also has "bang operators" which have a
+wide variety of meanings:
+
+.. productionlist::
+   BangOperator: one of
+               :!eq     !if      !head    !tail      !con
+               :!add    !shl     !sra     !srl
+               :!cast   !empty   !subst   !foreach   !strconcat
+
+Syntax
+======
+
+TableGen has an ``include`` mechanism. It does not play a role in the
+syntax per se, since it is lexically replaced with the contents of the
+included file.
+
+.. productionlist::
+   IncludeDirective: "include" `TokString`
+
+TableGen's top-level production consists of "objects".
+
+.. productionlist::
+   TableGenFile: `Object`*
+   Object: `Class` | `Def` | `Defm` | `Let` | `MultiClass` | `Foreach`
+
+``class``\es
+------------
+
+.. productionlist::
+   Class: "class" `TokIdentifier` [`TemplateArgList`] `ObjectBody`
+
+A ``class`` declaration creates a record which other records can inherit
+from. A class can be parametrized by a list of "template arguments", whose
+values can be used in the class body.
+
+A given class can only be defined once. A ``class`` declaration is
+considered to define the class if any of the following is true:
+
+.. break ObjectBody into its consituents so that they are present here?
+
+#. The :token:`TemplateArgList` is present.
+#. The :token:`Body` in the :token:`ObjectBody` is present and is not empty.
+#. The :token:`BaseClassList` in the :token:`ObjectBody` is present.
+
+You can declare an empty class by giving and empty :token:`TemplateArgList`
+and an empty :token:`ObjectBody`. This can serve as a restricted form of
+forward declaration: note that records deriving from the forward-declared
+class will inherit no fields from it since the record expansion is done
+when the record is parsed.
+
+.. productionlist::
+   TemplateArgList: "<" `Declaration` ("," `Declaration`)* ">"
+
+Declarations
+------------
+
+.. Omitting mention of arcane "field" prefix to discourage its use.
+
+The declaration syntax is pretty much what you would expect as a C++
+programmer.
+
+.. productionlist::
+   Declaration: `Type` `TokIdentifier` ["=" `Value`]
+
+It assigns the value to the identifer.
+
+Types
+-----
+
+.. productionlist::
+   Type: "string" | "code" | "bit" | "int" | "dag"
+       :| "bits" "<" `TokInteger` ">"
+       :| "list" "<" `Type` ">"
+       :| `ClassID`
+   ClassID: `TokIdentifier`
+
+Both ``string`` and ``code`` correspond to the string type; the difference
+is purely to indicate programmer intention.
+
+The :token:`ClassID` must identify a class that has been previously
+declared or defined.
+
+Values
+------
+
+.. productionlist::
+   Value: `SimpleValue` `ValueSuffix`*
+   ValueSuffix: "{" `RangeList` "}"
+              :| "[" `RangeList` "]"
+              :| "." `TokIdentifier`
+   RangeList: `RangePiece` ("," `RangePiece`)*
+   RangePiece: `TokInteger`
+             :| `TokInteger` "-" `TokInteger`
+             :| `TokInteger` `TokInteger`
+
+The peculiar last form of :token:`RangePiece` is due to the fact that the
+"``-``" is included in the :token:`TokInteger`, hence ``1-5`` gets lexed as
+two consecutive :token:`TokInteger`'s, with values ``1`` and ``-5``,
+instead of "1", "-", and "5".
+The :token:`RangeList` can be thought of as specifying "list slice" in some
+contexts.
+
+
+:token:`SimpleValue` has a number of forms:
+
+
+.. productionlist::
+   SimpleValue: `TokIdentifier`
+
+The value will be the variable referenced by the identifier. It can be one
+of:
+
+.. The code for this is exceptionally abstruse. These examples are a
+   best-effort attempt.
+
+* name of a ``def``, such as the use of ``Bar`` in::
+
+     def Bar : SomeClass {
+       int X = 5;
+     }
+
+     def Foo {
+       SomeClass Baz = Bar;
+     }
+
+* value local to a ``def``, such as the use of ``Bar`` in::
+
+     def Foo {
+       int Bar = 5;
+       int Baz = Bar;
+     }
+
+* a template arg of a ``class``, such as the use of ``Bar`` in::
+
+     class Foo<int Bar> {
+       int Baz = Bar;
+     }
+
+* value local to a ``multiclass``, such as the use of ``Bar`` in::
+
+     multiclass Foo {
+       int Bar = 5;
+       int Baz = Bar;
+     }
+
+* a template arg to a ``multiclass``, such as the use of ``Bar`` in::
+
+     multiclass Foo<int Bar> {
+       int Baz = Bar;
+     }
+
+.. productionlist::
+   SimpleValue: `TokInteger`
+
+This represents the numeric value of the integer.
+
+.. productionlist::
+   SimpleValue: `TokString`+
+
+Multiple adjacent string literals are concatenated like in C/C++. The value
+is the concatenation of the strings.
+
+.. productionlist::
+   SimpleValue: `TokCodeFragment`
+
+The value is the string value of the code fragment.
+
+.. productionlist::
+   SimpleValue: "?"
+
+``?`` represents an "unset" initializer.
+
+.. productionlist::
+   SimpleValue: "{" `ValueList` "}"
+   ValueList: [`ValueListNE`]
+   ValueListNE: `Value` ("," `Value`)*
+
+This represents a sequence of bits, as would be used to initialize a
+``bits<n>`` field (where ``n`` is the number of bits).
+
+.. productionlist::
+   SimpleValue: `ClassID` "<" `ValueListNE` ">"
+
+This generates a new anonymous record definition (as would be created by an
+unnamed ``def`` inheriting from the given class with the given template
+arguments) and the value is the value of that record definition.
+
+.. productionlist::
+   SimpleValue: "[" `ValueList` "]" ["<" `Type` ">"]
+
+A list initializer. The optional :token:`Type` can be used to indicate a
+specific element type, otherwise the element type will be deduced from the
+given values.
+
+.. The initial `DagArg` of the dag must start with an identifier or
+   !cast, but this is more of an implementation detail and so for now just
+   leave it out.
+
+.. productionlist::
+   SimpleValue: "(" `DagArg` `DagArgList` ")"
+   DagArgList: `DagArg` ("," `DagArg`)*
+   DagArg: `Value` [":" `TokVarName`] | `TokVarName`
+
+The initial :token:`DagArg` is called the "operator" of the dag.
+
+.. productionlist::
+   SimpleValue: `BangOperator` ["<" `Type` ">"] "(" `ValueListNE` ")"
+
+Bodies
+------
+
+.. productionlist::
+   ObjectBody: `BaseClassList` `Body`
+   BaseClassList: [":" `BaseClassListNE`]
+   BaseClassListNE: `SubClassRef` ("," `SubClassRef`)*
+   SubClassRef: (`ClassID` | `MultiClassID`) ["<" `ValueList` ">"]
+   DefmID: `TokIdentifier`
+
+The version with the :token:`MultiClassID` is only valid in the
+:token:`BaseClassList` of a ``defm``.
+The :token:`MultiClassID` should be the name of a ``multiclass``.
+
+.. put this somewhere else
+
+It is after parsing the base class list that the "let stack" is applied.
+
+.. productionlist::
+   Body: ";" | "{" BodyList "}"
+   BodyList: BodyItem*
+   BodyItem: `Declaration` ";"
+           :| "let" `TokIdentifier` [`RangeList`] "=" `Value` ";"
+
+The ``let`` form allows overriding the value of an inherited field.
+
+``def``
+-------
+
+.. TODO::
+   There can be pastes in the names here, like ``#NAME#``. Look into that
+   and document it (it boils down to ParseIDValue with IDParseMode ==
+   ParseNameMode). ParseObjectName calls into the general ParseValue, with
+   the only different from "arbitrary expression parsing" being IDParseMode
+   == Mode.
+
+.. productionlist::
+   Def: "def" `TokIdentifier` `ObjectBody`
+
+Defines a record whose name is given by the :token:`TokIdentifier`. The
+fields of the record are inherited from the base classes and defined in the
+body.
+
+Special handling occurs if this ``def`` appears inside a ``multiclass`` or
+a ``foreach``.
+
+``defm``
+--------
+
+.. productionlist::
+   Defm: "defm" `TokIdentifier` ":" `BaseClassListNE` ";"
+
+Note that in the :token:`BaseClassList`, all of the ``multiclass``'s must
+precede any ``class``'s that appear.
+
+``foreach``
+-----------
+
+.. productionlist::
+   Foreach: "foreach" `Declaration` "in" "{" `Object`* "}"
+          :| "foreach" `Declaration` "in" `Object`
+
+The value assigned to the variable in the declaration is iterated over and
+the object or object list is reevaluated with the variable set at each
+iterated value.
+
+Top-Level ``let``
+-----------------
+
+.. productionlist::
+   Let:  "let" `LetList` "in" "{" `Object`* "}"
+      :| "let" `LetList` "in" `Object`
+   LetList: `LetItem` ("," `LetItem`)*
+   LetItem: `TokIdentifier` [`RangeList`] "=" `Value`
+
+This is effectively equivalent to ``let`` inside the body of a record
+except that it applies to multiple records at a time. The bindings are
+applied at the end of parsing the base classes of a record.
+
+``multiclass``
+--------------
+
+.. productionlist::
+   MultiClass: "multiclass" `TokIdentifier` [`TemplateArgList`]
+             : [":" `BaseMultiClassList`] "{" `MultiClassObject`+ "}"
+   BaseMultiClassList: `MultiClassID` ("," `MultiClassID`)*
+   MultiClassID: `TokIdentifier`
+   MultiClassObject: `Def` | `Defm` | `Let` | `Foreach`
diff --git a/docs/TableGenFundamentals.rst b/docs/TableGenFundamentals.rst
index bfb2618998a9..4fe4bb986a2f 100644
--- a/docs/TableGenFundamentals.rst
+++ b/docs/TableGenFundamentals.rst
@@ -1,5 +1,3 @@
-.. _tablegen:
-
 =====================
 TableGen Fundamentals
 =====================
@@ -120,16 +118,16 @@ this (at the time of this writing):
   }
   ...
 
-This definition corresponds to a 32-bit register-register add instruction in the
-X86.  The string after the '``def``' string indicates the name of the
-record---"``ADD32rr``" in this case---and the comment at the end of the line
-indicates the superclasses of the definition.  The body of the record contains
-all of the data that TableGen assembled for the record, indicating that the
-instruction is part of the "X86" namespace, the pattern indicating how the the
-instruction should be emitted into the assembly file, that it is a two-address
-instruction, has a particular encoding, etc.  The contents and semantics of the
-information in the record is specific to the needs of the X86 backend, and is
-only shown as an example.
+This definition corresponds to the 32-bit register-register ``add`` instruction
+of the x86 architecture.  ``def ADD32rr`` defines a record named
+``ADD32rr``, and the comment at the end of the line indicates the superclasses
+of the definition.  The body of the record contains all of the data that
+TableGen assembled for the record, indicating that the instruction is part of
+the "X86" namespace, the pattern indicating how the instruction should be
+emitted into the assembly file, that it is a two-address instruction, has a
+particular encoding, etc.  The contents and semantics of the information in the
+record are specific to the needs of the X86 backend, and are only shown as an
+example.
 
 As you can see, a lot of information is needed for every instruction supported
 by the code generator, and specifying it all manually would be unmaintainable,
@@ -152,13 +150,12 @@ factor out the common features that instructions of its class share.  A key
 feature of TableGen is that it allows the end-user to define the abstractions
 they prefer to use when describing their information.
 
-Each def record has a special entry called "``NAME``."  This is the name of the
-def ("``ADD32rr``" above).  In the general case def names can be formed from
-various kinds of string processing expressions and ``NAME`` resolves to the
+Each ``def`` record has a special entry called "NAME".  This is the name of the
+record ("``ADD32rr``" above).  In the general case ``def`` names can be formed
+from various kinds of string processing expressions and ``NAME`` resolves to the
 final value obtained after resolving all of those expressions.  The user may
-refer to ``NAME`` anywhere she desires to use the ultimate name of the def.
-``NAME`` should not be defined anywhere else in user code to avoid conflict
-problems.
+refer to ``NAME`` anywhere she desires to use the ultimate name of the ``def``.
+``NAME`` should not be defined anywhere else in user code to avoid conflicts.
 
 Running TableGen
 ----------------
@@ -794,6 +791,10 @@ Expressions used by code generator to describe instructions and isel patterns:
 TableGen backends
 =================
 
+Until we get a step-by-step HowTo for writing TableGen backends, you can at
+least grab the boilerplate (build system, new files, etc.) from Clang's
+r173931.
+
 TODO: How they work, how to write one.  This section should not contain details
 about any particular backend, except maybe ``-print-enums`` as an example.  This
 should highlight the APIs in ``TableGen/Record.h``.
diff --git a/docs/TestSuiteMakefileGuide.html b/docs/TestSuiteMakefileGuide.html
deleted file mode 100644
index 1b24250380fb..000000000000
--- a/docs/TestSuiteMakefileGuide.html
+++ /dev/null
@@ -1,351 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>LLVM test-suite Makefile Guide</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-      
-<h1>
-  LLVM test-suite Makefile Guide
-</h1>
-
-<ol>
-  <li><a href="#overview">Overview</a></li>
-  <li><a href="#testsuitestructure">Test suite structure</a></li>
-  <li><a href="#testsuiterun">Running the test suite</a>
-    <ul>
-      <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
-      <li><a href="#testsuitetests">Running different tests</a></li>
-      <li><a href="#testsuiteoutput">Generating test output</a></li>
-      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
-   </ul>
-  </li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="overview">Overview</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>This document describes the features of the Makefile-based LLVM
-test-suite. This way of interacting with the test-suite is deprecated in favor
-of running the test-suite using LNT, but may continue to prove useful for some
-users. See the Testing
-Guide's <a href="TestingGuide.html#testsuitequickstart">test-suite
-Quickstart</a> section for more information.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuitestructure">Test suite Structure</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
-with LLVM and executed. These programs are compiled using the native compiler
-and various LLVM backends. The output from the program compiled with the 
-native compiler is assumed correct; the results from the other programs are
-compared to the native program output and pass if they match.</p>
-
-<p>When executing tests, it is usually a good idea to start out with a subset of
-the available tests or programs. This makes test run times smaller at first and
-later on this is useful to investigate individual test failures. To run some
-test only on a subset of programs, simply change directory to the programs you
-want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
-test using the <tt>TEST</tt> variable to change what tests or run on the
-selected programs (see below for more info).</p>
-
-<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
-performs timing tests of various LLVM optimizations.  It also records
-compilation times for the compilers and the JIT.  This information can be
-used to compare the effectiveness of LLVM's optimizations and code
-generation.</p>
-
-<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
-SingleSource, and External.</p> 
-
-<ul>
-<li><tt>test-suite/SingleSource</tt>
-<p>The SingleSource directory contains test programs that are only a single 
-source file in size.  These are usually small benchmark programs or small 
-programs that calculate a particular value.  Several such programs are grouped 
-together in each directory.</p></li>
-
-<li><tt>test-suite/MultiSource</tt>
-<p>The MultiSource directory contains subdirectories which contain entire 
-programs with multiple source files.  Large benchmarks and whole applications 
-go here.</p></li>
-
-<li><tt>test-suite/External</tt>
-<p>The External directory contains Makefiles for building code that is external
-to (i.e., not distributed with) LLVM.  The most prominent members of this
-directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
-directory does not contain these actual tests, but only the Makefiles that know
-how to properly compile these programs from somewhere else. The presence and
-location of these external programs is configured by the test-suite
-<tt>configure</tt> script.</p></li>
-</ul>
-
-<p>Each tree is then subdivided into several categories, including applications,
-benchmarks, regression tests, code that is strange grammatically, etc.  These
-organizations should be relatively self explanatory.</p>
-
-<p>Some tests are known to fail.  Some are bugs that we have not fixed yet;
-others are features that we haven't added yet (or may never add).  In the
-regression tests, the result for such tests will be XFAIL (eXpected FAILure).
-In this way, you can tell the difference between an expected and unexpected
-failure.</p>
-
-<p>The tests in the test suite have no such feature at this time. If the
-test passes, only warnings and other miscellaneous output will be generated.  If
-a test fails, a large &lt;program&gt; FAILED message will be displayed.  This
-will help you separate benign warnings from actual test failures.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuiterun">Running the test suite</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>First, all tests are executed within the LLVM object directory tree.  They
-<i>are not</i> executed inside of the LLVM source tree. This is because the
-test suite creates temporary files during execution.</p>
-
-<p>To run the test suite, you need to use the following steps:</p>
-
-<ol>
-  <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
-  </li>
-
-  <li><p>Check out the <tt>test-suite</tt> module with:</p>
-
-<div class="doc_code">
-<pre>
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
-</pre>
-</div>
-    <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
-  </li>
-  <li><p>Configure and build <tt>llvm</tt>.</p></li>
-  <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
-  <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
-  <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
-      each build tree (LLVM object directory tree) in which you want
-      to run the test suite, just as you do before building LLVM.</p>
-    <p>During the <em>re-configuration</em>, you must either: (1)
-      have <tt>llvm-gcc</tt> you just built in your path, or (2)
-      specify the directory where your just-built <tt>llvm-gcc</tt> is
-      installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
-    <p>You must also tell the configure machinery that the test suite
-      is available so it can be configured for your build tree:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
-</pre>
-</div>
-    <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
-    <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
-  </li>
-
-  <li><p>You can now run the test suite from your build tree as follows:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT/projects/test-suite
-% make
-</pre>
-</div>
-  </li>
-</ol>
-<p>Note that the second and third steps only need to be done once. After you
-have the suite checked out and configured, you don't need to do it again (unless
-the test code or configure script changes).</p>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuiteexternal">Configuring External Tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-<p>In order to run the External tests in the <tt>test-suite</tt>
-  module, you must specify <i>--with-externals</i>.  This
-  must be done during the <em>re-configuration</em> step (see above),
-  and the <tt>llvm</tt> re-configuration must recognize the
-  previously-built <tt>llvm-gcc</tt>.  If any of these is missing or
-  neglected, the External tests won't work.</p>
-<dl>
-<dt><i>--with-externals</i></dt>
-<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
-</dl>
-  This tells LLVM where to find any external tests.  They are expected to be
-  in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
-  If <tt>directory</tt> is left unspecified,
-  <tt>configure</tt> uses the default value
-  <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
-  Subdirectory names known to LLVM include:
-  <dl>
-  <dt>spec95</dt>
-  <dt>speccpu2000</dt>
-  <dt>speccpu2006</dt>
-  <dt>povray31</dt>
-  </dl>
-  Others are added from time to time, and can be determined from 
-  <tt>configure</tt>.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuitetests">Running different tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
-module also provides a mechanism for compiling the programs in different ways.
-If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
-include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
-This Makefile can modify build rules to yield different results.</p>
-
-<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
-create the nightly test reports.  To run the nightly tests, run <tt>gmake
-TEST=nightly</tt>.</p>
-
-<p>There are several TEST Makefiles available in the tree.  Some of them are
-designed for internal LLVM research and will not work outside of the LLVM
-research group.  They may still be valuable, however, as a guide to writing your
-own TEST Makefile for any optimization or analysis passes that you develop with
-LLVM.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuiteoutput">Generating test output</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-  <p>There are a number of ways to run the tests and generate output. The most
-  simple one is simply running <tt>gmake</tt> with no arguments. This will
-  compile and run all programs in the tree using a number of different methods
-  and compare results. Any failures are reported in the output, but are likely
-  drowned in the other output. Passes are not reported explicitly.</p>
-
-  <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
-  the specified test and usually adds per-program summaries to the output
-  (depending on which sometest you use). For example, the <tt>nightly</tt> test
-  explicitly outputs TEST-PASS or TEST-FAIL for every test after each program.
-  Though these lines are still drowned in the output, it's easy to grep the
-  output logs in the Output directories.</p>
-
-  <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
-  (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
-  <tt>graphs</tt>). The exact contents of the report are dependent on which
-  <tt>TEST</tt> you are running, but the text results are always shown at the
-  end of the run and the results are always stored in the
-  <tt>report.&lt;type&gt;.format</tt> file (when running with
-  <tt>TEST=&lt;type&gt;</tt>).
-
-  The <tt>report</tt> also generate a file called
-  <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
-  run.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuitecustom">Writing custom tests for the test suite</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
-should work), it is really easy to run optimizations or code generator
-components against every program in the tree, collecting statistics or running
-custom checks for correctness.  At base, this is how the nightly tester works,
-it's just one example of a general framework.</p>
-
-<p>Lets say that you have an LLVM optimization pass, and you want to see how
-many times it triggers.  First thing you should do is add an LLVM
-<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
-will tally counts of things you care about.</p>
-
-<p>Following this, you can set up a test and a report that collects these and
-formats them for easy viewing.  This consists of two files, a
-"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
-test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
-format the output into a table.  There are many example reports of various
-levels of sophistication included with the test suite, and the framework is very
-general.</p>
-
-<p>If you are interested in testing an optimization pass, check out the
-"libcalls" test as an example.  It can be run like this:<p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
-% make TEST=libcalls report
-</pre>
-</div>
-
-<p>This will do a bunch of stuff, then eventually print a table like this:</p>
-
-<div class="doc_code">
-<pre>
-Name                                  | total | #exit |
-...
-FreeBench/analyzer/analyzer           | 51    | 6     | 
-FreeBench/fourinarow/fourinarow       | 1     | 1     | 
-FreeBench/neural/neural               | 19    | 9     | 
-FreeBench/pifft/pifft                 | 5     | 3     | 
-MallocBench/cfrac/cfrac               | 1     | *     | 
-MallocBench/espresso/espresso         | 52    | 12    | 
-MallocBench/gs/gs                     | 4     | *     | 
-Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
-Prolangs-C/agrep/agrep                | 33    | 12    | 
-Prolangs-C/allroots/allroots          | *     | *     | 
-Prolangs-C/assembler/assembler        | 47    | *     | 
-Prolangs-C/bison/mybison              | 74    | *     | 
-...
-</pre>
-</div>
-
-<p>This basically is grepping the -stats output and displaying it in a table.
-You can also use the "TEST=libcalls report.html" target to get the table in HTML
-form, similarly for report.csv and report.tex.</p>
-
-<p>The source for this is in test-suite/TEST.libcalls.*.  The format is pretty
-simple: the Makefile indicates how to run the test (in this case, 
-"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
-each column of the output.  The first value is the header for the column and the
-second is the regex to grep the output of the command for.  There are lots of
-example reports that can do fancy stuff.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/TestSuiteMakefileGuide.rst b/docs/TestSuiteMakefileGuide.rst
new file mode 100644
index 000000000000..e2852a073518
--- /dev/null
+++ b/docs/TestSuiteMakefileGuide.rst
@@ -0,0 +1,276 @@
+==============================
+LLVM test-suite Makefile Guide
+==============================
+
+.. contents::
+   :local:
+
+Overview
+========
+
+This document describes the features of the Makefile-based LLVM
+test-suite. This way of interacting with the test-suite is deprecated in
+favor of running the test-suite using LNT, but may continue to prove
+useful for some users. See the Testing Guide's :ref:`test-suite Quickstart
+<test-suite-quickstart>` section for more information.
+
+Test suite Structure
+====================
+
+The ``test-suite`` module contains a number of programs that can be
+compiled with LLVM and executed. These programs are compiled using the
+native compiler and various LLVM backends. The output from the program
+compiled with the native compiler is assumed correct; the results from
+the other programs are compared to the native program output and pass if
+they match.
+
+When executing tests, it is usually a good idea to start out with a
+subset of the available tests or programs. This makes test run times
+smaller at first and later on this is useful to investigate individual
+test failures. To run some test only on a subset of programs, simply
+change directory to the programs you want tested and run ``gmake``
+there. Alternatively, you can run a different test using the ``TEST``
+variable to change what tests or run on the selected programs (see below
+for more info).
+
+In addition for testing correctness, the ``test-suite`` directory also
+performs timing tests of various LLVM optimizations. It also records
+compilation times for the compilers and the JIT. This information can be
+used to compare the effectiveness of LLVM's optimizations and code
+generation.
+
+``test-suite`` tests are divided into three types of tests: MultiSource,
+SingleSource, and External.
+
+-  ``test-suite/SingleSource``
+
+   The SingleSource directory contains test programs that are only a
+   single source file in size. These are usually small benchmark
+   programs or small programs that calculate a particular value. Several
+   such programs are grouped together in each directory.
+
+-  ``test-suite/MultiSource``
+
+   The MultiSource directory contains subdirectories which contain
+   entire programs with multiple source files. Large benchmarks and
+   whole applications go here.
+
+-  ``test-suite/External``
+
+   The External directory contains Makefiles for building code that is
+   external to (i.e., not distributed with) LLVM. The most prominent
+   members of this directory are the SPEC 95 and SPEC 2000 benchmark
+   suites. The ``External`` directory does not contain these actual
+   tests, but only the Makefiles that know how to properly compile these
+   programs from somewhere else. The presence and location of these
+   external programs is configured by the test-suite ``configure``
+   script.
+
+Each tree is then subdivided into several categories, including
+applications, benchmarks, regression tests, code that is strange
+grammatically, etc. These organizations should be relatively self
+explanatory.
+
+Some tests are known to fail. Some are bugs that we have not fixed yet;
+others are features that we haven't added yet (or may never add). In the
+regression tests, the result for such tests will be XFAIL (eXpected
+FAILure). In this way, you can tell the difference between an expected
+and unexpected failure.
+
+The tests in the test suite have no such feature at this time. If the
+test passes, only warnings and other miscellaneous output will be
+generated. If a test fails, a large <program> FAILED message will be
+displayed. This will help you separate benign warnings from actual test
+failures.
+
+Running the test suite
+======================
+
+First, all tests are executed within the LLVM object directory tree.
+They *are not* executed inside of the LLVM source tree. This is because
+the test suite creates temporary files during execution.
+
+To run the test suite, you need to use the following steps:
+
+#. ``cd`` into the ``llvm/projects`` directory in your source tree.
+#. Check out the ``test-suite`` module with:
+
+   .. code-block:: bash
+
+       % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+
+   This will get the test suite into ``llvm/projects/test-suite``.
+
+#. Configure and build ``llvm``.
+
+#. Configure and build ``llvm-gcc``.
+
+#. Install ``llvm-gcc`` somewhere.
+
+#. *Re-configure* ``llvm`` from the top level of each build tree (LLVM
+   object directory tree) in which you want to run the test suite, just
+   as you do before building LLVM.
+
+   During the *re-configuration*, you must either: (1) have ``llvm-gcc``
+   you just built in your path, or (2) specify the directory where your
+   just-built ``llvm-gcc`` is installed using
+   ``--with-llvmgccdir=$LLVM_GCC_DIR``.
+
+   You must also tell the configure machinery that the test suite is
+   available so it can be configured for your build tree:
+
+   .. code-block:: bash
+
+       % cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
+
+   [Remember that ``$LLVM_GCC_DIR`` is the directory where you
+   *installed* llvm-gcc, not its src or obj directory.]
+
+#. You can now run the test suite from your build tree as follows:
+
+   .. code-block:: bash
+
+       % cd $LLVM_OBJ_ROOT/projects/test-suite
+       % make
+
+Note that the second and third steps only need to be done once. After
+you have the suite checked out and configured, you don't need to do it
+again (unless the test code or configure script changes).
+
+Configuring External Tests
+--------------------------
+
+In order to run the External tests in the ``test-suite`` module, you
+must specify *--with-externals*. This must be done during the
+*re-configuration* step (see above), and the ``llvm`` re-configuration
+must recognize the previously-built ``llvm-gcc``. If any of these is
+missing or neglected, the External tests won't work.
+
+* *--with-externals*
+
+* *--with-externals=<directory>*
+
+This tells LLVM where to find any external tests. They are expected to
+be in specifically named subdirectories of <``directory``>. If
+``directory`` is left unspecified, ``configure`` uses the default value
+``/home/vadve/shared/benchmarks/speccpu2000/benchspec``. Subdirectory
+names known to LLVM include:
+
+* spec95
+
+* speccpu2000
+
+* speccpu2006
+
+* povray31
+
+Others are added from time to time, and can be determined from
+``configure``.
+
+Running different tests
+-----------------------
+
+In addition to the regular "whole program" tests, the ``test-suite``
+module also provides a mechanism for compiling the programs in different
+ways. If the variable TEST is defined on the ``gmake`` command line, the
+test system will include a Makefile named
+``TEST.<value of TEST variable>.Makefile``. This Makefile can modify
+build rules to yield different results.
+
+For example, the LLVM nightly tester uses ``TEST.nightly.Makefile`` to
+create the nightly test reports. To run the nightly tests, run
+``gmake TEST=nightly``.
+
+There are several TEST Makefiles available in the tree. Some of them are
+designed for internal LLVM research and will not work outside of the
+LLVM research group. They may still be valuable, however, as a guide to
+writing your own TEST Makefile for any optimization or analysis passes
+that you develop with LLVM.
+
+Generating test output
+----------------------
+
+There are a number of ways to run the tests and generate output. The
+most simple one is simply running ``gmake`` with no arguments. This will
+compile and run all programs in the tree using a number of different
+methods and compare results. Any failures are reported in the output,
+but are likely drowned in the other output. Passes are not reported
+explicitly.
+
+Somewhat better is running ``gmake TEST=sometest test``, which runs the
+specified test and usually adds per-program summaries to the output
+(depending on which sometest you use). For example, the ``nightly`` test
+explicitly outputs TEST-PASS or TEST-FAIL for every test after each
+program. Though these lines are still drowned in the output, it's easy
+to grep the output logs in the Output directories.
+
+Even better are the ``report`` and ``report.format`` targets (where
+``format`` is one of ``html``, ``csv``, ``text`` or ``graphs``). The
+exact contents of the report are dependent on which ``TEST`` you are
+running, but the text results are always shown at the end of the run and
+the results are always stored in the ``report.<type>.format`` file (when
+running with ``TEST=<type>``). The ``report`` also generate a file
+called ``report.<type>.raw.out`` containing the output of the entire
+test run.
+
+Writing custom tests for the test suite
+---------------------------------------
+
+Assuming you can run the test suite, (e.g.
+"``gmake TEST=nightly report``" should work), it is really easy to run
+optimizations or code generator components against every program in the
+tree, collecting statistics or running custom checks for correctness. At
+base, this is how the nightly tester works, it's just one example of a
+general framework.
+
+Lets say that you have an LLVM optimization pass, and you want to see
+how many times it triggers. First thing you should do is add an LLVM
+`statistic <ProgrammersManual.html#Statistic>`_ to your pass, which will
+tally counts of things you care about.
+
+Following this, you can set up a test and a report that collects these
+and formats them for easy viewing. This consists of two files, a
+"``test-suite/TEST.XXX.Makefile``" fragment (where XXX is the name of
+your test) and a "``test-suite/TEST.XXX.report``" file that indicates
+how to format the output into a table. There are many example reports of
+various levels of sophistication included with the test suite, and the
+framework is very general.
+
+If you are interested in testing an optimization pass, check out the
+"libcalls" test as an example. It can be run like this:
+
+.. code-block:: bash
+
+    % cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+    % make TEST=libcalls report
+
+This will do a bunch of stuff, then eventually print a table like this:
+
+::
+
+    Name                                  | total | #exit |
+    ...
+    FreeBench/analyzer/analyzer           | 51    | 6     |
+    FreeBench/fourinarow/fourinarow       | 1     | 1     |
+    FreeBench/neural/neural               | 19    | 9     |
+    FreeBench/pifft/pifft                 | 5     | 3     |
+    MallocBench/cfrac/cfrac               | 1     | *     |
+    MallocBench/espresso/espresso         | 52    | 12    |
+    MallocBench/gs/gs                     | 4     | *     |
+    Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     |
+    Prolangs-C/agrep/agrep                | 33    | 12    |
+    Prolangs-C/allroots/allroots          | *     | *     |
+    Prolangs-C/assembler/assembler        | 47    | *     |
+    Prolangs-C/bison/mybison              | 74    | *     |
+    ...
+
+This basically is grepping the -stats output and displaying it in a
+table. You can also use the "TEST=libcalls report.html" target to get
+the table in HTML form, similarly for report.csv and report.tex.
+
+The source for this is in ``test-suite/TEST.libcalls.*``. The format is
+pretty simple: the Makefile indicates how to run the test (in this case,
+"``opt -simplify-libcalls -stats``"), and the report contains one line
+for each column of the output. The first value is the header for the
+column and the second is the regex to grep the output of the command
+for. There are lots of example reports that can do fancy stuff.
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
deleted file mode 100644
index c313083fa76a..000000000000
--- a/docs/TestingGuide.html
+++ /dev/null
@@ -1,916 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>LLVM Testing Infrastructure Guide</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-      
-<h1>
-  LLVM Testing Infrastructure Guide
-</h1>
-
-<ol>
-  <li><a href="#overview">Overview</a></li>
-  <li><a href="#requirements">Requirements</a></li>
-  <li><a href="#org">LLVM testing infrastructure organization</a>
-    <ul>
-      <li><a href="#regressiontests">Regression tests</a></li>
-      <li><a href="#testsuite"><tt>test-suite</tt></a></li>
-      <li><a href="#debuginfotests">Debugging Information tests</a></li>
-    </ul>
-  </li>
-  <li><a href="#quick">Quick start</a>
-    <ul>
-      <li><a href="#quickregressiontests">Regression tests</a></li>
-      <li><a href="#quickdebuginfotests">Debugging Information tests</a></li>
-   </ul>
-  </li>
-  <li><a href="#rtstructure">Regression test structure</a>
-    <ul>
-      <li><a href="#rtcustom">Writing new regression tests</a></li>
-      <li><a href="#FileCheck">The FileCheck utility</a></li>
-      <li><a href="#rtvars">Variables and substitutions</a></li>
-      <li><a href="#rtfeatures">Other features</a></li>
-   </ul>
-  </li>
-  <li><a href="#testsuiteoverview"><tt>test-suite</tt> Overview</a>
-    <ul>
-      <li><a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a></li>
-      <li><a href="#testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></li>
-   </ul>
-  </li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="overview">Overview</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>This document is the reference manual for the LLVM testing infrastructure. It
-documents the structure of the LLVM testing infrastructure, the tools needed to
-use it, and how to add and run tests.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="requirements">Requirements</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>In order to use the LLVM testing infrastructure, you will need all of the
-software required to build LLVM, as well
-as <a href="http://python.org">Python</a> 2.4 or later.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="org">LLVM testing infrastructure organization</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>The LLVM testing infrastructure contains two major categories of tests:
-regression tests and whole programs. The regression tests are contained inside
-the LLVM repository itself under <tt>llvm/test</tt> and are expected to always
-pass -- they should be run before every commit.</p>
-
-<p>The whole programs tests are referred to as the "LLVM test suite" (or
-"test-suite") and are in the <tt>test-suite</tt> module in subversion. For
-historical reasons, these tests are also referred to as the "nightly tests" in
-places, which is less ambiguous than "test-suite" and remains in use although we
-run them much more often than nightly.</p>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="regressiontests">Regression tests</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>The regression tests are small pieces of code that test a specific feature of
-LLVM or trigger a specific bug in LLVM.  They are usually written in LLVM
-assembly language, but can be written in other languages if the test targets a
-particular language front end (and the appropriate <tt>--with-llvmgcc</tt>
-options were used at <tt>configure</tt> time of the <tt>llvm</tt> module). These
-tests are driven by the 'lit' testing tool, which is part of LLVM.</p>
-
-<p>These code fragments are not complete programs. The code generated
-from them is never executed to determine correct behavior.</p>
-
-<p>These code fragment tests are located in the <tt>llvm/test</tt>
-directory.</p>
-
-<p>Typically when a bug is found in LLVM, a regression test containing 
-just enough code to reproduce the problem should be written and placed 
-somewhere underneath this directory.  In most cases, this will be a small 
-piece of LLVM assembly language code, often distilled from an actual 
-application or benchmark.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="testsuite"><tt>test-suite</tt></a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>The test suite contains whole programs, which are pieces of code which can be
-compiled and linked into a stand-alone program that can be executed.  These
-programs are generally written in high level languages such as C or C++.</p>
-
-<p>These programs are compiled using a user specified compiler and set of flags,
-and then executed to capture the program output and timing information.  The
-output of these programs is compared to a reference output to ensure that the
-program is being compiled correctly.</p>
-
-<p>In addition to compiling and executing programs, whole program tests serve as
-a way of benchmarking LLVM performance, both in terms of the efficiency of the
-programs generated as well as the speed with which LLVM compiles, optimizes, and
-generates code.</p>
-
-<p>The test-suite is located in the <tt>test-suite</tt> Subversion module.</p> 
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="debuginfotests">Debugging Information tests</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>The test suite contains tests to check quality of debugging information.
-The test are written in C based languages or in LLVM assembly language. </p>
-
-<p>These tests are compiled and run under a debugger. The debugger output
-is checked to validate of debugging information. See README.txt in the 
-test suite for more information . This test suite is located in the 
-<tt>debuginfo-tests</tt> Subversion module. </p>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="quick">Quick start</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-  <p>The tests are located in two separate Subversion modules. The regressions
-  tests are in the main "llvm" module under the directory
-  <tt>llvm/test</tt> (so you get these tests for free with the main llvm
-  tree). Use "make check-all" to run the regression tests after building
-  LLVM.</p>
-
-  <p>The more comprehensive test suite that includes whole programs in C and C++
-  is in the <tt>test-suite</tt>
-  module. See <a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a>
-  for more information on running these tests.</p>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="quickregressiontests">Regression tests</a></h3>
-<div>
-<!-- _______________________________________________________________________ -->
-<p>To run all of the LLVM regression tests, use master Makefile in
- the <tt>llvm/test</tt> directory:</p>
-
-<div class="doc_code">
-<pre>
-% gmake -C llvm/test
-</pre>
-</div>
-
-<p>or</p>
-
-<div class="doc_code">
-<pre>
-% gmake check
-</pre>
-</div>
-
-<p>If you have <a href="http://clang.llvm.org/">Clang</a> checked out and built,
-you can run the LLVM and Clang tests simultaneously using:</p>
-
-<p>or</p>
-
-<div class="doc_code">
-<pre>
-% gmake check-all
-</pre>
-</div>
-
-<p>To run the tests with Valgrind (Memcheck by default), just append
-<tt>VG=1</tt> to the commands above, e.g.:</p>
-
-<div class="doc_code">
-<pre>
-% gmake check VG=1
-</pre>
-</div>
-
-<p>To run individual tests or subsets of tests, you can use the 'llvm-lit'
-script which is built as part of LLVM. For example, to run the
-'Integer/BitPacked.ll' test by itself you can run:</p>
-
-<div class="doc_code">
-<pre>
-% llvm-lit ~/llvm/test/Integer/BitPacked.ll 
-</pre>
-</div>
-
-<p>or to run all of the ARM CodeGen tests:</p>
-
-<div class="doc_code">
-<pre>
-% llvm-lit ~/llvm/test/CodeGen/ARM
-</pre>
-</div>
-
-<p>For more information on using the 'lit' tool, see 'llvm-lit --help' or the
-'lit' man page.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="quickdebuginfotests">Debugging Information tests</a></h3>
-<div>
-<!-- _______________________________________________________________________ -->
-<div>
-
-<p> To run debugging information tests simply checkout the tests inside
-clang/test directory. </p>
-
-<div class="doc_code">
-<pre>
-%cd clang/test
-% svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
-</pre>
-</div>
-
-<p> These tests are already set up to run as part of clang regression tests.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="rtstructure">Regression test structure</a></h2>
-<!--=========================================================================-->
-<div>
-  <p>The LLVM regression tests are driven by 'lit' and are located in
-  the <tt>llvm/test</tt> directory.
-
-  <p>This directory contains a large array of small tests
-  that exercise various features of LLVM and to ensure that regressions do not
-  occur. The directory is broken into several sub-directories, each focused on
-  a particular area of LLVM. A few of the important ones are:</p>
-
-  <ul>
-    <li><tt>Analysis</tt>: checks Analysis passes.</li>
-    <li><tt>Archive</tt>: checks the Archive library.</li>
-    <li><tt>Assembler</tt>: checks Assembly reader/writer functionality.</li>
-    <li><tt>Bitcode</tt>: checks Bitcode reader/writer functionality.</li>
-    <li><tt>CodeGen</tt>: checks code generation and each target.</li>
-    <li><tt>Features</tt>: checks various features of the LLVM language.</li>
-    <li><tt>Linker</tt>: tests bitcode linking.</li>
-    <li><tt>Transforms</tt>: tests each of the scalar, IPO, and utility
-    transforms to ensure they make the right transformations.</li>
-    <li><tt>Verifier</tt>: tests the IR verifier.</li>
-  </ul>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="rtcustom">Writing new regression tests</a></h3>
-<!-- _______________________________________________________________________ -->
-<div>
-  <p>The regression test structure is very simple, but does require some
-  information to be set. This information is gathered via <tt>configure</tt> and
-  is written to a file, <tt>lit.site.cfg</tt>
-  in <tt>llvm/test</tt>. The <tt>llvm/test</tt> Makefile does this work for
-  you.</p>
-
-  <p>In order for the regression tests to work, each directory of tests must
-  have a <tt>lit.local.cfg</tt> file. Lit looks for this file to determine how
-  to run the tests. This file is just Python code and thus is very flexible,
-  but we've standardized it for the LLVM regression tests. If you're adding a
-  directory of tests, just copy <tt>lit.local.cfg</tt> from another directory to
-  get running. The standard <tt>lit.local.cfg</tt> simply specifies which files
-  to look in for tests. Any directory that contains only directories does not
-  need the <tt>lit.local.cfg</tt> file. Read the
-  <a href="http://llvm.org/cmds/lit.html">Lit documentation</a> for more
-  information. </p>
-
-  <p>The <tt>llvm-runtests</tt> function looks at each file that is passed to
-  it and gathers any lines together that match "RUN:". These are the "RUN" lines
-  that specify how the test is to be run. So, each test script must contain
-  RUN lines if it is to do anything. If there are no RUN lines, the
-  <tt>llvm-runtests</tt> function will issue an error and the test will
-  fail.</p>
-
-  <p>RUN lines are specified in the comments of the test program using the 
-  keyword <tt>RUN</tt> followed by a colon, and lastly the command (pipeline) 
-  to execute.  Together, these lines form the "script" that 
-  <tt>llvm-runtests</tt> executes to run the test case.  The syntax of the
-  RUN lines is similar to a shell's syntax for pipelines including I/O
-  redirection and variable substitution.  However, even though these lines 
-  may <i>look</i> like a shell script, they are not. RUN lines are interpreted 
-  directly by the Tcl <tt>exec</tt> command. They are never executed by a 
-  shell. Consequently the syntax differs from normal shell script syntax in a 
-  few ways.  You can specify as many RUN lines as needed.</p>
-
-  <p>lit performs substitution on each RUN line to replace LLVM tool
-  names with the full paths to the executable built for each tool (in
-  $(LLVM_OBJ_ROOT)/$(BuildMode)/bin).  This ensures that lit does not
-  invoke any stray LLVM tools in the user's path during testing.</p>
-
-  <p>Each RUN line is executed on its own, distinct from other lines unless
-  its last character is <tt>\</tt>. This continuation character causes the RUN
-  line to be concatenated with the next one. In this way you can build up long
-  pipelines of commands without making huge line lengths. The lines ending in
-  <tt>\</tt> are concatenated until a RUN line that doesn't end in <tt>\</tt> is
-  found. This concatenated set of RUN lines then constitutes one execution. 
-  Tcl will substitute variables and arrange for the pipeline to be executed. If
-  any process in the pipeline fails, the entire line (and test case) fails too.
-  </p>
-
-  <p> Below is an example of legal RUN lines in a <tt>.ll</tt> file:</p>
-
-<div class="doc_code">
-<pre>
-; RUN: llvm-as &lt; %s | llvm-dis &gt; %t1
-; RUN: llvm-dis &lt; %s.bc-13 &gt; %t2
-; RUN: diff %t1 %t2
-</pre>
-</div>
-
-  <p>As with a Unix shell, the RUN: lines permit pipelines and I/O redirection
-  to be used. However, the usage is slightly different than for Bash. To check
-  what's legal, see the documentation for the 
-  <a href="http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2">Tcl exec</a>
-  command and the 
-  <a href="http://www.tcl.tk/man/tcl8.5/tutorial/Tcl26.html">tutorial</a>. 
-  The major differences are:</p>
-  <ul>
-    <li>You can't do <tt>2&gt;&amp;1</tt>. That will cause Tcl to write to a
-    file named <tt>&amp;1</tt>. Usually this is done to get stderr to go through
-    a pipe. You can do that in tcl with <tt>|&amp;</tt> so replace this idiom:
-    <tt>... 2&gt;&amp;1 | grep</tt> with <tt>... |&amp; grep</tt></li>
-    <li>You can only redirect to a file, not to another descriptor and not from
-    a here document.</li>
-    <li>tcl supports redirecting to open files with the @ syntax but you
-    shouldn't use that here.</li>
-  </ul>
-
-  <p>There are some quoting rules that you must pay attention to when writing
-  your RUN lines. In general nothing needs to be quoted. Tcl won't strip off any
-  quote characters so they will get passed to the invoked program. For
-  example:</p>
-
-<div class="doc_code">
-<pre>
-... | grep 'find this string'
-</pre>
-</div>
-
-  <p>This will fail because the ' characters are passed to grep. This would
-  instruction grep to look for <tt>'find</tt> in the files <tt>this</tt> and
-  <tt>string'</tt>. To avoid this use curly braces to tell Tcl that it should
-  treat everything enclosed as one value. So our example would become:</p>
-
-<div class="doc_code">
-<pre>
-... | grep {find this string}
-</pre>
-</div>
-
-  <p>Additionally, the characters <tt>[</tt> and <tt>]</tt> are treated 
-  specially by Tcl. They tell Tcl to interpret the content as a command to
-  execute. Since these characters are often used in regular expressions this can
-  have disastrous results and cause the entire test run in a directory to fail.
-  For example, a common idiom is to look for some basicblock number:</p>
-
-<div class="doc_code">
-<pre>
-... | grep bb[2-8]
-</pre>
-</div>
-
-  <p>This, however, will cause Tcl to fail because its going to try to execute
-  a program named "2-8". Instead, what you want is this:</p>
-
-<div class="doc_code">
-<pre>
-... | grep {bb\[2-8\]}
-</pre>
-</div>
-
-  <p>Finally, if you need to pass the <tt>\</tt> character down to a program,
-  then it must be doubled. This is another Tcl special character. So, suppose
-  you had:
-
-<div class="doc_code">
-<pre>
-... | grep 'i32\*'
-</pre>
-</div>
-
-  <p>This will fail to match what you want (a pointer to i32). First, the
-  <tt>'</tt> do not get stripped off. Second, the <tt>\</tt> gets stripped off
-  by Tcl so what grep sees is: <tt>'i32*'</tt>. That's not likely to match
-  anything. To resolve this you must use <tt>\\</tt> and the <tt>{}</tt>, like
-  this:</p>
-
-<div class="doc_code">
-<pre>
-... | grep {i32\\*}
-</pre>
-</div>
-
-<p>If your system includes GNU <tt>grep</tt>, make sure
-that <tt>GREP_OPTIONS</tt> is not set in your environment. Otherwise,
-you may get invalid results (both false positives and false
-negatives).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="FileCheck">The FileCheck utility</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>A powerful feature of the RUN: lines is that it allows any arbitrary commands
-   to be executed as part of the test harness.  While standard (portable) unix
-   tools like 'grep' work fine on run lines, as you see above, there are a lot
-   of caveats due to interaction with Tcl syntax, and we want to make sure the
-   run lines are portable to a wide range of systems.  Another major problem is
-   that grep is not very good at checking to verify that the output of a tools
-   contains a series of different output in a specific order.  The FileCheck
-   tool was designed to help with these problems.</p>
-
-<p>FileCheck (whose basic command line arguments are described in <a
-   href="http://llvm.org/cmds/FileCheck.html">the FileCheck man page</a> is
-   designed to read a file to check from standard input, and the set of things
-   to verify from a file specified as a command line argument.  A simple example
-   of using FileCheck from a RUN line looks like this:</p>
-   
-<div class="doc_code">
-<pre>
-; RUN: llvm-as &lt; %s | llc -march=x86-64 | <b>FileCheck %s</b>
-</pre>
-</div>
-
-<p>This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
-llc, then pipe the output of llc into FileCheck.  This means that FileCheck will
-be verifying its standard input (the llc output) against the filename argument
-specified (the original .ll file specified by "%s").  To see how this works,
-let's look at the rest of the .ll file (after the RUN line):</p>
-
-<div class="doc_code">
-<pre>
-define void @sub1(i32* %p, i32 %v) {
-entry:
-; <b>CHECK: sub1:</b>
-; <b>CHECK: subl</b>
-        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
-        ret void
-}
-
-define void @inc4(i64* %p) {
-entry:
-; <b>CHECK: inc4:</b>
-; <b>CHECK: incq</b>
-        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
-        ret void
-}
-</pre>
-</div>
-
-<p>Here you can see some "CHECK:" lines specified in comments.  Now you can see
-how the file is piped into llvm-as, then llc, and the machine code output is
-what we are verifying.  FileCheck checks the machine code output to verify that
-it matches what the "CHECK:" lines specify.</p>
-
-<p>The syntax of the CHECK: lines is very simple: they are fixed strings that
-must occur in order.  FileCheck defaults to ignoring horizontal whitespace
-differences (e.g. a space is allowed to match a tab) but otherwise, the contents
-of the CHECK: line is required to match some thing in the test file exactly.</p>
-
-<p>One nice thing about FileCheck (compared to grep) is that it allows merging
-test cases together into logical groups.  For example, because the test above
-is checking for the "sub1:" and "inc4:" labels, it will not match unless there
-is a "subl" in between those labels.  If it existed somewhere else in the file,
-that would not count: "grep subl" matches if subl exists anywhere in the
-file.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="FileCheck-check-prefix">The FileCheck -check-prefix option</a>
-</h4>
-
-<div>
-
-<p>The FileCheck -check-prefix option allows multiple test configurations to be
-driven from one .ll file.  This is useful in many circumstances, for example,
-testing different architectural variants with llc.  Here's a simple example:</p>
-
-<div class="doc_code">
-<pre>
-; RUN: llvm-as &lt; %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
-; RUN:              | <b>FileCheck %s -check-prefix=X32</b>
-; RUN: llvm-as &lt; %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
-; RUN:              | <b>FileCheck %s -check-prefix=X64</b>
-
-define &lt;4 x i32&gt; @pinsrd_1(i32 %s, &lt;4 x i32&gt; %tmp) nounwind {
-        %tmp1 = insertelement &lt;4 x i32&gt; %tmp, i32 %s, i32 1
-        ret &lt;4 x i32&gt; %tmp1
-; <b>X32:</b> pinsrd_1:
-; <b>X32:</b>    pinsrd $1, 4(%esp), %xmm0
-
-; <b>X64:</b> pinsrd_1:
-; <b>X64:</b>    pinsrd $1, %edi, %xmm0
-}
-</pre>
-</div>
-
-<p>In this case, we're testing that we get the expected code generation with
-both 32-bit and 64-bit code generation.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a>
-</h4>
-
-<div>
-
-<p>Sometimes you want to match lines and would like to verify that matches
-happen on exactly consecutive lines with no other lines in between them.  In
-this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
-you specified a custom check prefix, just use "&lt;PREFIX&gt;-NEXT:".  For
-example, something like this works as you'd expect:</p>
-
-<div class="doc_code">
-<pre>
-define void @t2(&lt;2 x double&gt;* %r, &lt;2 x double&gt;* %A, double %B) {
-	%tmp3 = load &lt;2 x double&gt;* %A, align 16
-	%tmp7 = insertelement &lt;2 x double&gt; undef, double %B, i32 0
-	%tmp9 = shufflevector &lt;2 x double&gt; %tmp3,
-                              &lt;2 x double&gt; %tmp7,
-                              &lt;2 x i32&gt; &lt; i32 0, i32 2 &gt;
-	store &lt;2 x double&gt; %tmp9, &lt;2 x double&gt;* %r, align 16
-	ret void
-        
-; <b>CHECK:</b> t2:
-; <b>CHECK:</b> 	movl	8(%esp), %eax
-; <b>CHECK-NEXT:</b> 	movapd	(%eax), %xmm0
-; <b>CHECK-NEXT:</b> 	movhpd	12(%esp), %xmm0
-; <b>CHECK-NEXT:</b> 	movl	4(%esp), %eax
-; <b>CHECK-NEXT:</b> 	movapd	%xmm0, (%eax)
-; <b>CHECK-NEXT:</b> 	ret
-}
-</pre>
-</div>
-
-<p>CHECK-NEXT: directives reject the input unless there is exactly one newline
-between it an the previous directive.  A CHECK-NEXT cannot be the first
-directive in a file.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="FileCheck-CHECK-NOT">The "CHECK-NOT:" directive</a>
-</h4>
-
-<div>
-
-<p>The CHECK-NOT: directive is used to verify that a string doesn't occur
-between two matches (or the first match and the beginning of the file).  For
-example, to verify that a load is removed by a transformation, a test like this
-can be used:</p>
-
-<div class="doc_code">
-<pre>
-define i8 @coerce_offset0(i32 %V, i32* %P) {
-  store i32 %V, i32* %P
-   
-  %P2 = bitcast i32* %P to i8*
-  %P3 = getelementptr i8* %P2, i32 2
-
-  %A = load i8* %P3
-  ret i8 %A
-; <b>CHECK:</b> @coerce_offset0
-; <b>CHECK-NOT:</b> load
-; <b>CHECK:</b> ret i8
-}
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="FileCheck-Matching">FileCheck Pattern Matching Syntax</a>
-</h4>
-
-<div>
-
-<!-- {% raw %} -->
-
-<p>The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
-uses of FileCheck, fixed string matching is perfectly sufficient.  For some
-things, a more flexible form of matching is desired.  To support this, FileCheck
-allows you to specify regular expressions in matching strings, surrounded by
-double braces: <b>{{yourregex}}</b>.  Because we want to use fixed string
-matching for a majority of what we do, FileCheck has been designed to support
-mixing and matching fixed string matching with regular expressions.  This allows
-you to write things like this:</p>
-
-<div class="doc_code">
-<pre>
-; CHECK: movhpd	<b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
-</pre>
-</div>
-
-<p>In this case, any offset from the ESP register will be allowed, and any xmm
-register will be allowed.</p>
-
-<p>Because regular expressions are enclosed with double braces, they are
-visually distinct, and you don't need to use escape characters within the double
-braces like you would in C.  In the rare case that you want to match double
-braces explicitly from the input, you can use something ugly like
-<b>{{[{][{]}}</b> as your pattern.</p>
-
-<!-- {% endraw %} -->
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="FileCheck-Variables">FileCheck Variables</a>
-</h4>
-
-<div>
-
-
-<!-- {% raw %} -->
-
-<p>It is often useful to match a pattern and then verify that it occurs again
-later in the file.  For codegen tests, this can be useful to allow any register,
-but verify that that register is used consistently later.  To do this, FileCheck
-allows named variables to be defined and substituted into patterns.  Here is a
-simple example:</p>
-
-<div class="doc_code">
-<pre>
-; CHECK: test5:
-; CHECK:    notw	<b>[[REGISTER:%[a-z]+]]</b>
-; CHECK:    andw	{{.*}}<b>[[REGISTER]]</b>
-</pre>
-</div>
-
-<p>The first check line matches a regex (<tt>%[a-z]+</tt>) and captures it into
-the variables "REGISTER".  The second line verifies that whatever is in REGISTER
-occurs later in the file after an "andw".  FileCheck variable references are
-always contained in <tt>[[ ]]</tt> pairs, are named, and their names can be
-formed with the regex "<tt>[a-zA-Z][a-zA-Z0-9]*</tt>".  If a colon follows the
-name, then it is a definition of the variable, if not, it is a use.</p>
-
-<p>FileCheck variables can be defined multiple times, and uses always get the
-latest value.  Note that variables are all read at the start of a "CHECK" line
-and are all defined at the end.  This means that if you have something like
-"<tt>CHECK: [[XYZ:.*]]x[[XYZ]]</tt>" that the check line will read the previous
-value of the XYZ variable and define a new one after the match is performed.  If
-you need to do something like this you can probably take advantage of the fact
-that FileCheck is not actually line-oriented when it matches, this allows you to
-define two separate CHECK lines that match on the same line.
-</p>
-
-<!-- {% endraw %} -->
-
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="rtvars">Variables and substitutions</a></h3>
-<!-- _______________________________________________________________________ -->
-<div>
-  <p>With a RUN line there are a number of substitutions that are permitted. In
-  general, any Tcl variable that is available in the <tt>substitute</tt> 
-  function (in <tt>test/lib/llvm.exp</tt>) can be substituted into a RUN line.
-  To make a substitution just write the variable's name preceded by a $. 
-  Additionally, for compatibility reasons with previous versions of the test
-  library, certain names can be accessed with an alternate syntax: a % prefix.
-  These alternates are deprecated and may go away in a future version.
-  </p>
-  <p>Here are the available variable names. The alternate syntax is listed in
-  parentheses.</p>
-
-  <dl style="margin-left: 25px">
-    <dt><b>$test</b> (%s)</dt>
-    <dd>The full path to the test case's source. This is suitable for passing
-    on the command line as the input to an llvm tool.</dd>
-
-    <dt><b>$srcdir</b></dt>
-    <dd>The source directory from where the "<tt>make check</tt>" was run.</dd>
-
-    <dt><b>objdir</b></dt>
-    <dd>The object directory that corresponds to the <tt>$srcdir</tt>.</dd>
-
-    <dt><b>subdir</b></dt>
-    <dd>A partial path from the <tt>test</tt> directory that contains the 
-    sub-directory that contains the test source being executed.</dd>
-
-    <dt><b>srcroot</b></dt>
-    <dd>The root directory of the LLVM src tree.</dd>
-
-    <dt><b>objroot</b></dt>
-    <dd>The root directory of the LLVM object tree. This could be the same
-    as the srcroot.</dd>
-
-    <dt><b>path</b><dt>
-    <dd>The path to the directory that contains the test case source.  This is 
-    for locating any supporting files that are not generated by the test, but 
-    used by the test.</dd>
-
-    <dt><b>tmp</b></dt>
-    <dd>The path to a temporary file name that could be used for this test case.
-    The file name won't conflict with other test cases. You can append to it if
-    you need multiple temporaries. This is useful as the destination of some
-    redirected output.</dd>
-
-    <dt><b>target_triplet</b> (%target_triplet)</dt>
-    <dd>The target triplet that corresponds to the current host machine (the one
-    running the test cases). This should probably be called "host".<dd>
-
-    <dt><b>link</b> (%link)</dt> 
-    <dd>This full link command used to link LLVM executables. This has all the
-    configured -I, -L and -l options.</dd>
-
-    <dt><b>shlibext</b> (%shlibext)</dt>
-    <dd>The suffix for the host platforms share library (dll) files. This
-    includes the period as the first character.</dd>
-  </dl>
-  <p>To add more variables, two things need to be changed. First, add a line in
-  the <tt>test/Makefile</tt> that creates the <tt>site.exp</tt> file. This will
-  "set" the variable as a global in the site.exp file. Second, in the
-  <tt>test/lib/llvm.exp</tt> file, in the substitute proc, add the variable name
-  to the list of "global" declarations at the beginning of the proc. That's it,
-  the variable can then be used in test scripts.</p>
-</div>
-  
-<!-- _______________________________________________________________________ -->
-<h3><a name="rtfeatures">Other Features</a></h3>
-<!-- _______________________________________________________________________ -->
-<div>
-  <p>To make RUN line writing easier, there are several shell scripts located
-  in the <tt>llvm/test/Scripts</tt> directory. This directory is in the PATH
-  when running tests, so you can just call these scripts using their name. For
-  example:</p>
-  <dl>
-    <dt><b>ignore</b></dt>
-    <dd>This script runs its arguments and then always returns 0. This is useful
-    in cases where the test needs to cause a tool to generate an error (e.g. to
-    check the error output). However, any program in a pipeline that returns a
-    non-zero result will cause the test to fail. This script overcomes that 
-    issue and nicely documents that the test case is purposefully ignoring the
-    result code of the tool</dd>
-
-    <dt><b>not</b></dt>
-    <dd>This script runs its arguments and then inverts the result code from 
-    it. Zero result codes become 1. Non-zero result codes become 0. This is
-    useful to invert the result of a grep. For example "not grep X" means
-    succeed only if you don't find X in the input.</dd>
-  </dl>
-
-  <p>Sometimes it is necessary to mark a test case as "expected fail" or XFAIL.
-  You can easily mark a test as XFAIL just by including <tt>XFAIL: </tt> on a
-  line near the top of the file. This signals that the test case should succeed
-  if the test fails. Such test cases are counted separately by the testing
-  tool. To specify an expected fail, use the XFAIL keyword in the comments of
-  the test program followed by a colon and one or more failure patterns. Each
-  failure pattern can be either '*' (to specify fail everywhere), or a part of a
-  target triple (indicating the test should fail on that platform), or the name
-  of a configurable feature (for example, "loadable_module"). If there is a
-  match, the test is expected to fail. If not, the test is expected to
-  succeed. To XFAIL everywhere just specify <tt>XFAIL: *</tt>. Here is an
-  example of an <tt>XFAIL</tt> line:</p>
-
-<div class="doc_code">
-<pre>
-; XFAIL: darwin,sun
-</pre>
-</div>
-
-  <p>To make the output more useful, the <tt>llvm_runtest</tt> function wil
-  scan the lines of the test case for ones that contain a pattern that matches
-  PR[0-9]+. This is the syntax for specifying a PR (Problem Report) number that
-  is related to the test case. The number after "PR" specifies the LLVM bugzilla
-  number. When a PR number is specified, it will be used in the pass/fail
-  reporting. This is useful to quickly get some context when a test fails.</p>
-
-  <p>Finally, any line that contains "END." will cause the special
-  interpretation of lines to terminate. This is generally done right after the
-  last RUN: line. This has two side effects: (a) it prevents special
-  interpretation of lines that are part of the test program, not the
-  instructions to the test case, and (b) it speeds things up for really big test
-  cases by avoiding interpretation of the remainder of the file.</p>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuiteoverview"><tt>test-suite</tt> Overview</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>The <tt>test-suite</tt> module contains a number of programs that can be
-compiled and executed. The <tt>test-suite</tt> includes reference outputs for
-all of the programs, so that the output of the executed program can be checked
-for correctness.</p>
-
-<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
-SingleSource, and External.</p> 
-
-<ul>
-<li><tt>test-suite/SingleSource</tt>
-<p>The SingleSource directory contains test programs that are only a single 
-source file in size.  These are usually small benchmark programs or small 
-programs that calculate a particular value.  Several such programs are grouped 
-together in each directory.</p></li>
-
-<li><tt>test-suite/MultiSource</tt>
-<p>The MultiSource directory contains subdirectories which contain entire 
-programs with multiple source files.  Large benchmarks and whole applications 
-go here.</p></li>
-
-<li><tt>test-suite/External</tt>
-<p>The External directory contains Makefiles for building code that is external
-to (i.e., not distributed with) LLVM.  The most prominent members of this
-directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
-directory does not contain these actual tests, but only the Makefiles that know
-how to properly compile these programs from somewhere else. When
-using <tt>LNT</tt>, use the <tt>--test-externals</tt> option to include these
-tests in the results.</p></li>
-</ul>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuitequickstart"><tt>test-suite</tt> Quickstart</a></h2>
-<!--=========================================================================-->
-
-<div>
-<p>The modern way of running the <tt>test-suite</tt> is focused on testing and
-benchmarking complete compilers using
-the <a href="http://llvm.org/docs/lnt">LNT</a> testing infrastructure.</p>
-
-<p>For more information on using LNT to execute the <tt>test-suite</tt>, please
-see the <a href="http://llvm.org/docs/lnt/quickstart.html">LNT Quickstart</a>
-documentation.</p>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></h2>
-<!--=========================================================================-->
-
-<div>
-<p>Historically, the <tt>test-suite</tt> was executed using a complicated setup
-of Makefiles. The LNT based approach above is recommended for most users, but
-there are some testing scenarios which are not supported by the LNT approach. In
-addition, LNT currently uses the Makefile setup under the covers and so
-developers who are interested in how LNT works under the hood may want to
-understand the Makefile based setup.</p>
-
-<p>For more information on the <tt>test-suite</tt> Makefile setup, please see
-the <a href="TestSuiteMakefileGuide.html">Test Suite Makefile Guide.</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-11-07 18:00:18 +0100 (Wed, 07 Nov 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst
new file mode 100644
index 000000000000..79cedee764f7
--- /dev/null
+++ b/docs/TestingGuide.rst
@@ -0,0 +1,455 @@
+=================================
+LLVM Testing Infrastructure Guide
+=================================
+
+.. contents::
+   :local:
+
+.. toctree::
+   :hidden:
+
+   TestSuiteMakefileGuide
+
+Overview
+========
+
+This document is the reference manual for the LLVM testing
+infrastructure. It documents the structure of the LLVM testing
+infrastructure, the tools needed to use it, and how to add and run
+tests.
+
+Requirements
+============
+
+In order to use the LLVM testing infrastructure, you will need all of
+the software required to build LLVM, as well as
+`Python <http://python.org>`_ 2.4 or later.
+
+LLVM testing infrastructure organization
+========================================
+
+The LLVM testing infrastructure contains two major categories of tests:
+regression tests and whole programs. The regression tests are contained
+inside the LLVM repository itself under ``llvm/test`` and are expected
+to always pass -- they should be run before every commit.
+
+The whole programs tests are referred to as the "LLVM test suite" (or
+"test-suite") and are in the ``test-suite`` module in subversion. For
+historical reasons, these tests are also referred to as the "nightly
+tests" in places, which is less ambiguous than "test-suite" and remains
+in use although we run them much more often than nightly.
+
+Regression tests
+----------------
+
+The regression tests are small pieces of code that test a specific
+feature of LLVM or trigger a specific bug in LLVM. The language they are
+written in depends on the part of LLVM being tested. These tests are driven by
+the :doc:`Lit <CommandGuide/lit>` testing tool (which is part of LLVM), and
+are located in the ``llvm/test`` directory.
+
+Typically when a bug is found in LLVM, a regression test containing just
+enough code to reproduce the problem should be written and placed
+somewhere underneath this directory. For example, it can be a small
+piece of LLVM IR distilled from an actual application or benchmark.
+
+``test-suite``
+--------------
+
+The test suite contains whole programs, which are pieces of code which
+can be compiled and linked into a stand-alone program that can be
+executed. These programs are generally written in high level languages
+such as C or C++.
+
+These programs are compiled using a user specified compiler and set of
+flags, and then executed to capture the program output and timing
+information. The output of these programs is compared to a reference
+output to ensure that the program is being compiled correctly.
+
+In addition to compiling and executing programs, whole program tests
+serve as a way of benchmarking LLVM performance, both in terms of the
+efficiency of the programs generated as well as the speed with which
+LLVM compiles, optimizes, and generates code.
+
+The test-suite is located in the ``test-suite`` Subversion module.
+
+Debugging Information tests
+---------------------------
+
+The test suite contains tests to check quality of debugging information.
+The test are written in C based languages or in LLVM assembly language.
+
+These tests are compiled and run under a debugger. The debugger output
+is checked to validate of debugging information. See README.txt in the
+test suite for more information . This test suite is located in the
+``debuginfo-tests`` Subversion module.
+
+Quick start
+===========
+
+The tests are located in two separate Subversion modules. The
+regressions tests are in the main "llvm" module under the directory
+``llvm/test`` (so you get these tests for free with the main LLVM tree).
+Use ``make check-all`` to run the regression tests after building LLVM.
+
+The more comprehensive test suite that includes whole programs in C and C++
+is in the ``test-suite`` module. See :ref:`test-suite Quickstart
+<test-suite-quickstart>` for more information on running these tests.
+
+Regression tests
+----------------
+
+To run all of the LLVM regression tests, use the master Makefile in the
+``llvm/test`` directory. LLVM Makefiles require GNU Make (read the :doc:`LLVM
+Makefile Guide <MakefileGuide>` for more details):
+
+.. code-block:: bash
+
+    % make -C llvm/test
+
+or:
+
+.. code-block:: bash
+
+    % make check
+
+If you have `Clang <http://clang.llvm.org/>`_ checked out and built, you
+can run the LLVM and Clang tests simultaneously using:
+
+.. code-block:: bash
+
+    % make check-all
+
+To run the tests with Valgrind (Memcheck by default), just append
+``VG=1`` to the commands above, e.g.:
+
+.. code-block:: bash
+
+    % make check VG=1
+
+To run individual tests or subsets of tests, you can use the ``llvm-lit``
+script which is built as part of LLVM. For example, to run the
+``Integer/BitPacked.ll`` test by itself you can run:
+
+.. code-block:: bash
+
+    % llvm-lit ~/llvm/test/Integer/BitPacked.ll 
+
+or to run all of the ARM CodeGen tests:
+
+.. code-block:: bash
+
+    % llvm-lit ~/llvm/test/CodeGen/ARM
+
+For more information on using the :program:`lit` tool, see ``llvm-lit --help``
+or the :doc:`lit man page <CommandGuide/lit>`.
+
+Debugging Information tests
+---------------------------
+
+To run debugging information tests simply checkout the tests inside
+clang/test directory.
+
+.. code-block:: bash
+
+    % cd clang/test
+    % svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
+
+These tests are already set up to run as part of clang regression tests.
+
+Regression test structure
+=========================
+
+The LLVM regression tests are driven by :program:`lit` and are located in the
+``llvm/test`` directory.
+
+This directory contains a large array of small tests that exercise
+various features of LLVM and to ensure that regressions do not occur.
+The directory is broken into several sub-directories, each focused on a
+particular area of LLVM.
+
+Writing new regression tests
+----------------------------
+
+The regression test structure is very simple, but does require some
+information to be set. This information is gathered via ``configure``
+and is written to a file, ``test/lit.site.cfg`` in the build directory.
+The ``llvm/test`` Makefile does this work for you.
+
+In order for the regression tests to work, each directory of tests must
+have a ``lit.local.cfg`` file. :program:`lit` looks for this file to determine
+how to run the tests. This file is just Python code and thus is very
+flexible, but we've standardized it for the LLVM regression tests. If
+you're adding a directory of tests, just copy ``lit.local.cfg`` from
+another directory to get running. The standard ``lit.local.cfg`` simply
+specifies which files to look in for tests. Any directory that contains
+only directories does not need the ``lit.local.cfg`` file. Read the :doc:`Lit
+documentation <CommandGuide/lit>` for more information.
+
+Each test file must contain lines starting with "RUN:" that tell :program:`lit`
+how to run it. If there are no RUN lines, :program:`lit` will issue an error
+while running a test.
+
+RUN lines are specified in the comments of the test program using the
+keyword ``RUN`` followed by a colon, and lastly the command (pipeline)
+to execute. Together, these lines form the "script" that :program:`lit`
+executes to run the test case. The syntax of the RUN lines is similar to a
+shell's syntax for pipelines including I/O redirection and variable
+substitution. However, even though these lines may *look* like a shell
+script, they are not. RUN lines are interpreted by :program:`lit`.
+Consequently, the syntax differs from shell in a few ways. You can specify
+as many RUN lines as needed.
+
+:program:`lit` performs substitution on each RUN line to replace LLVM tool names
+with the full paths to the executable built for each tool (in
+``$(LLVM_OBJ_ROOT)/$(BuildMode)/bin)``. This ensures that :program:`lit` does
+not invoke any stray LLVM tools in the user's path during testing.
+
+Each RUN line is executed on its own, distinct from other lines unless
+its last character is ``\``. This continuation character causes the RUN
+line to be concatenated with the next one. In this way you can build up
+long pipelines of commands without making huge line lengths. The lines
+ending in ``\`` are concatenated until a RUN line that doesn't end in
+``\`` is found. This concatenated set of RUN lines then constitutes one
+execution. :program:`lit` will substitute variables and arrange for the pipeline
+to be executed. If any process in the pipeline fails, the entire line (and
+test case) fails too.
+
+Below is an example of legal RUN lines in a ``.ll`` file:
+
+.. code-block:: llvm
+
+    ; RUN: llvm-as < %s | llvm-dis > %t1
+    ; RUN: llvm-dis < %s.bc-13 > %t2
+    ; RUN: diff %t1 %t2
+
+As with a Unix shell, the RUN lines permit pipelines and I/O
+redirection to be used.
+
+There are some quoting rules that you must pay attention to when writing
+your RUN lines. In general nothing needs to be quoted. :program:`lit` won't
+strip off any quote characters so they will get passed to the invoked program.
+To avoid this use curly braces to tell :program:`lit` that it should treat
+everything enclosed as one value.
+
+In general, you should strive to keep your RUN lines as simple as possible,
+using them only to run tools that generate textual output you can then examine.
+The recommended way to examine output to figure out if the test passes is using
+the :doc:`FileCheck tool <CommandGuide/FileCheck>`. *[The usage of grep in RUN
+lines is deprecated - please do not send or commit patches that use it.]*
+
+Fragile tests
+-------------
+
+It is easy to write a fragile test that would fail spuriously if the tool being
+tested outputs a full path to the input file.  For example, :program:`opt` by
+default outputs a ``ModuleID``:
+
+.. code-block:: console
+
+  $ cat example.ll
+  define i32 @main() nounwind {
+      ret i32 0
+  }
+
+  $ opt -S /path/to/example.ll
+  ; ModuleID = '/path/to/example.ll'
+
+  define i32 @main() nounwind {
+      ret i32 0
+  }
+
+``ModuleID`` can unexpetedly match against ``CHECK`` lines.  For example:
+
+.. code-block:: llvm
+
+  ; RUN: opt -S %s | FileCheck
+
+  define i32 @main() nounwind {
+      ; CHECK-NOT: load
+      ret i32 0
+  }
+
+This test will fail if placed into a ``download`` directory.
+
+To make your tests robust, always use ``opt ... < %s`` in the RUN line.
+:program:`opt` does not output a ``ModuleID`` when input comes from stdin.
+
+Variables and substitutions
+---------------------------
+
+With a RUN line there are a number of substitutions that are permitted.
+To make a substitution just write the variable's name preceded by a ``$``.
+Additionally, for compatibility reasons with previous versions of the
+test library, certain names can be accessed with an alternate syntax: a
+% prefix. These alternates are deprecated and may go away in a future
+version.
+
+Here are the available variable names. The alternate syntax is listed in
+parentheses.
+
+``$test`` (``%s``)
+   The full path to the test case's source. This is suitable for passing on
+   the command line as the input to an LLVM tool.
+
+``%(line)``, ``%(line+<number>)``, ``%(line-<number>)``
+   The number of the line where this variable is used, with an optional
+   integer offset. This can be used in tests with multiple RUN lines,
+   which reference test file's line numbers.
+
+``$srcdir``
+   The source directory from where the ``make check`` was run.
+
+``objdir``
+   The object directory that corresponds to the ``$srcdir``.
+
+``subdir``
+   A partial path from the ``test`` directory that contains the
+   sub-directory that contains the test source being executed.
+
+``srcroot``
+   The root directory of the LLVM src tree.
+
+``objroot``
+   The root directory of the LLVM object tree. This could be the same as
+   the srcroot.
+
+``path``
+   The path to the directory that contains the test case source. This is
+   for locating any supporting files that are not generated by the test,
+   but used by the test.
+
+``tmp``
+   The path to a temporary file name that could be used for this test case.
+   The file name won't conflict with other test cases. You can append to it
+   if you need multiple temporaries. This is useful as the destination of
+   some redirected output.
+
+``target_triplet`` (``%target_triplet``)
+   The target triplet that corresponds to the current host machine (the one
+   running the test cases). This should probably be called "host".
+
+``link`` (``%link``)
+   This full link command used to link LLVM executables. This has all the
+   configured ``-I``, ``-L`` and ``-l`` options.
+
+``shlibext`` (``%shlibext``)
+   The suffix for the host platforms shared library (DLL) files. This
+   includes the period as the first character.
+
+To add more variables, look at ``test/lit.cfg``.
+
+Other Features
+--------------
+
+To make RUN line writing easier, there are several helper scripts and programs
+in the ``llvm/test/Scripts`` directory. This directory is in the PATH
+when running tests, so you can just call these scripts using their name.
+For example:
+
+``ignore``
+   This script runs its arguments and then always returns 0. This is useful
+   in cases where the test needs to cause a tool to generate an error (e.g.
+   to check the error output). However, any program in a pipeline that
+   returns a non-zero result will cause the test to fail.  This script
+   overcomes that issue and nicely documents that the test case is
+   purposefully ignoring the result code of the tool
+``not``
+   This script runs its arguments and then inverts the result code from it.
+   Zero result codes become 1. Non-zero result codes become 0.
+
+Sometimes it is necessary to mark a test case as "expected fail" or
+XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:``
+on a line near the top of the file. This signals that the test case
+should succeed if the test fails. Such test cases are counted separately
+by the testing tool. To specify an expected fail, use the XFAIL keyword
+in the comments of the test program followed by a colon and one or more
+failure patterns. Each failure pattern can be either ``*`` (to specify
+fail everywhere), or a part of a target triple (indicating the test
+should fail on that platform), or the name of a configurable feature
+(for example, ``loadable_module``). If there is a match, the test is
+expected to fail. If not, the test is expected to succeed. To XFAIL
+everywhere just specify ``XFAIL: *``. Here is an example of an ``XFAIL``
+line:
+
+.. code-block:: llvm
+
+    ; XFAIL: darwin,sun
+
+To make the output more useful, :program:`lit` will scan
+the lines of the test case for ones that contain a pattern that matches
+``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number
+that is related to the test case. The number after "PR" specifies the
+LLVM bugzilla number. When a PR number is specified, it will be used in
+the pass/fail reporting. This is useful to quickly get some context when
+a test fails.
+
+Finally, any line that contains "END." will cause the special
+interpretation of lines to terminate. This is generally done right after
+the last RUN: line. This has two side effects:
+
+(a) it prevents special interpretation of lines that are part of the test
+    program, not the instructions to the test case, and
+
+(b) it speeds things up for really big test cases by avoiding
+    interpretation of the remainder of the file.
+
+``test-suite`` Overview
+=======================
+
+The ``test-suite`` module contains a number of programs that can be
+compiled and executed. The ``test-suite`` includes reference outputs for
+all of the programs, so that the output of the executed program can be
+checked for correctness.
+
+``test-suite`` tests are divided into three types of tests: MultiSource,
+SingleSource, and External.
+
+-  ``test-suite/SingleSource``
+
+   The SingleSource directory contains test programs that are only a
+   single source file in size. These are usually small benchmark
+   programs or small programs that calculate a particular value. Several
+   such programs are grouped together in each directory.
+
+-  ``test-suite/MultiSource``
+
+   The MultiSource directory contains subdirectories which contain
+   entire programs with multiple source files. Large benchmarks and
+   whole applications go here.
+
+-  ``test-suite/External``
+
+   The External directory contains Makefiles for building code that is
+   external to (i.e., not distributed with) LLVM. The most prominent
+   members of this directory are the SPEC 95 and SPEC 2000 benchmark
+   suites. The ``External`` directory does not contain these actual
+   tests, but only the Makefiles that know how to properly compile these
+   programs from somewhere else. When using ``LNT``, use the
+   ``--test-externals`` option to include these tests in the results.
+
+.. _test-suite-quickstart:
+
+``test-suite`` Quickstart
+-------------------------
+
+The modern way of running the ``test-suite`` is focused on testing and
+benchmarking complete compilers using the
+`LNT <http://llvm.org/docs/lnt>`_ testing infrastructure.
+
+For more information on using LNT to execute the ``test-suite``, please
+see the `LNT Quickstart <http://llvm.org/docs/lnt/quickstart.html>`_
+documentation.
+
+``test-suite`` Makefiles
+------------------------
+
+Historically, the ``test-suite`` was executed using a complicated setup
+of Makefiles. The LNT based approach above is recommended for most
+users, but there are some testing scenarios which are not supported by
+the LNT approach. In addition, LNT currently uses the Makefile setup
+under the covers and so developers who are interested in how LNT works
+under the hood may want to understand the Makefile based setup.
+
+For more information on the ``test-suite`` Makefile setup, please see
+the :doc:`Test Suite Makefile Guide <TestSuiteMakefileGuide>`.
diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst
new file mode 100644
index 000000000000..e2d3667bc116
--- /dev/null
+++ b/docs/Vectorizers.rst
@@ -0,0 +1,338 @@
+==========================
+Auto-Vectorization in LLVM
+==========================
+
+.. contents::
+   :local:
+
+LLVM has two vectorizers: The :ref:`Loop Vectorizer <loop-vectorizer>`,
+which operates on Loops, and the :ref:`Basic Block Vectorizer
+<bb-vectorizer>`, which optimizes straight-line code. These vectorizers
+focus on different optimization opportunities and use different techniques.
+The BB vectorizer merges multiple scalars that are found in the code into
+vectors while the Loop Vectorizer widens instructions in the original loop
+to operate on multiple consecutive loop iterations.
+
+.. _loop-vectorizer:
+
+The Loop Vectorizer
+===================
+
+Usage
+-----
+
+LLVM's Loop Vectorizer is now available and will be useful for many people.
+It is not enabled by default, but can be enabled through clang using the
+command line flag:
+
+.. code-block:: console
+
+   $ clang -fvectorize -O3 file.c
+
+If the ``-fvectorize`` flag is used then the loop vectorizer will be enabled
+when running with ``-O3``, ``-O2``. When ``-Os`` is used, the loop vectorizer
+will only vectorize loops that do not require a major increase in code size.
+
+We plan to enable the Loop Vectorizer by default as part of the LLVM 3.3 release.
+
+Command line flags
+^^^^^^^^^^^^^^^^^^
+
+The loop vectorizer uses a cost model to decide on the optimal vectorization factor
+and unroll factor. However, users of the vectorizer can force the vectorizer to use
+specific values. Both 'clang' and 'opt' support the flags below.
+
+Users can control the vectorization SIMD width using the command line flag "-force-vector-width".
+
+.. code-block:: console
+
+  $ clang  -mllvm -force-vector-width=8 ...
+  $ opt -loop-vectorize -force-vector-width=8 ...
+
+Users can control the unroll factor using the command line flag "-force-vector-unroll"
+
+.. code-block:: console
+
+  $ clang  -mllvm -force-vector-unroll=2 ...
+  $ opt -loop-vectorize -force-vector-unroll=2 ...
+
+Features
+--------
+
+The LLVM Loop Vectorizer has a number of features that allow it to vectorize
+complex loops.
+
+Loops with unknown trip count
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Loop Vectorizer supports loops with an unknown trip count.
+In the loop below, the iteration ``start`` and ``finish`` points are unknown,
+and the Loop Vectorizer has a mechanism to vectorize loops that do not start
+at zero. In this example, 'n' may not be a multiple of the vector width, and
+the vectorizer has to execute the last few iterations as scalar code. Keeping
+a scalar copy of the loop increases the code size.
+
+.. code-block:: c++
+
+  void bar(float *A, float* B, float K, int start, int end) {
+    for (int i = start; i < end; ++i)
+      A[i] *= B[i] + K;
+  }
+
+Runtime Checks of Pointers
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the example below, if the pointers A and B point to consecutive addresses,
+then it is illegal to vectorize the code because some elements of A will be
+written before they are read from array B.
+
+Some programmers use the 'restrict' keyword to notify the compiler that the
+pointers are disjointed, but in our example, the Loop Vectorizer has no way of
+knowing that the pointers A and B are unique. The Loop Vectorizer handles this
+loop by placing code that checks, at runtime, if the arrays A and B point to
+disjointed memory locations. If arrays A and B overlap, then the scalar version
+of the loop is executed.
+
+.. code-block:: c++
+
+  void bar(float *A, float* B, float K, int n) {
+    for (int i = 0; i < n; ++i)
+      A[i] *= B[i] + K;
+  }
+
+
+Reductions
+^^^^^^^^^^
+
+In this example the ``sum`` variable is used by consecutive iterations of
+the loop. Normally, this would prevent vectorization, but the vectorizer can
+detect that 'sum' is a reduction variable. The variable 'sum' becomes a vector
+of integers, and at the end of the loop the elements of the array are added
+together to create the correct result. We support a number of different
+reduction operations, such as addition, multiplication, XOR, AND and OR.
+
+.. code-block:: c++
+
+  int foo(int *A, int *B, int n) {
+    unsigned sum = 0;
+    for (int i = 0; i < n; ++i)
+      sum += A[i] + 5;
+    return sum;
+  }
+
+We support floating point reduction operations when `-ffast-math` is used.
+
+Inductions
+^^^^^^^^^^
+
+In this example the value of the induction variable ``i`` is saved into an
+array. The Loop Vectorizer knows to vectorize induction variables.
+
+.. code-block:: c++
+
+  void bar(float *A, float* B, float K, int n) {
+    for (int i = 0; i < n; ++i)
+      A[i] = i;
+  }
+
+If Conversion
+^^^^^^^^^^^^^
+
+The Loop Vectorizer is able to "flatten" the IF statement in the code and
+generate a single stream of instructions. The Loop Vectorizer supports any
+control flow in the innermost loop. The innermost loop may contain complex
+nesting of IFs, ELSEs and even GOTOs.
+
+.. code-block:: c++
+
+  int foo(int *A, int *B, int n) {
+    unsigned sum = 0;
+    for (int i = 0; i < n; ++i)
+      if (A[i] > B[i])
+        sum += A[i] + 5;
+    return sum;
+  }
+
+Pointer Induction Variables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This example uses the "accumulate" function of the standard c++ library. This
+loop uses C++ iterators, which are pointers, and not integer indices.
+The Loop Vectorizer detects pointer induction variables and can vectorize
+this loop. This feature is important because many C++ programs use iterators.
+
+.. code-block:: c++
+
+  int baz(int *A, int n) {
+    return std::accumulate(A, A + n, 0);
+  }
+
+Reverse Iterators
+^^^^^^^^^^^^^^^^^
+
+The Loop Vectorizer can vectorize loops that count backwards.
+
+.. code-block:: c++
+
+  int foo(int *A, int *B, int n) {
+    for (int i = n; i > 0; --i)
+      A[i] +=1;
+  }
+
+Scatter / Gather
+^^^^^^^^^^^^^^^^
+
+The Loop Vectorizer can vectorize code that becomes a sequence of scalar instructions 
+that scatter/gathers memory.
+
+.. code-block:: c++
+
+  int foo(int *A, int *B, int n, int k) {
+    for (int i = 0; i < n; ++i)
+      A[i*7] += B[i*k];
+  }
+
+Vectorization of Mixed Types
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Loop Vectorizer can vectorize programs with mixed types. The Vectorizer
+cost model can estimate the cost of the type conversion and decide if
+vectorization is profitable.
+
+.. code-block:: c++
+
+  int foo(int *A, char *B, int n, int k) {
+    for (int i = 0; i < n; ++i)
+      A[i] += 4 * B[i];
+  }
+
+Global Structures Alias Analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Access to global structures can also be vectorized, with alias analysis being
+used to make sure accesses don't alias. Run-time checks can also be added on
+pointer access to structure members.
+
+Many variations are supported, but some that rely on undefined behaviour being
+ignored (as other compilers do) are still being left un-vectorized.
+
+.. code-block:: c++
+
+  struct { int A[100], K, B[100]; } Foo;
+
+  int foo() {
+    for (int i = 0; i < 100; ++i)
+      Foo.A[i] = Foo.B[i] + 100;
+  }
+
+Vectorization of function calls
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Loop Vectorize can vectorize intrinsic math functions.
+See the table below for a list of these functions.
+
++-----+-----+---------+
+| pow | exp |  exp2   |
++-----+-----+---------+
+| sin | cos |  sqrt   |
++-----+-----+---------+
+| log |log2 |  log10  |
++-----+-----+---------+
+|fabs |floor|  ceil   |
++-----+-----+---------+
+|fma  |trunc|nearbyint|
++-----+-----+---------+
+|     |     | fmuladd |
++-----+-----+---------+
+
+The loop vectorizer knows about special instructions on the target and will
+vectorize a loop containing a function call that maps to the instructions. For
+example, the loop below will be vectorized on Intel x86 if the SSE4.1 roundps
+instruction is available.
+
+.. code-block:: c++
+
+  void foo(float *f) {
+    for (int i = 0; i != 1024; ++i)
+      f[i] = floorf(f[i]);
+  }
+
+Partial unrolling during vectorization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Modern processors feature multiple execution units, and only programs that contain a
+high degree of parallelism can fully utilize the entire width of the machine. 
+The Loop Vectorizer increases the instruction level parallelism (ILP) by 
+performing partial-unrolling of loops.
+
+In the example below the entire array is accumulated into the variable 'sum'.
+This is inefficient because only a single execution port can be used by the processor.
+By unrolling the code the Loop Vectorizer allows two or more execution ports
+to be used simultaneously.
+
+.. code-block:: c++
+
+  int foo(int *A, int *B, int n) {
+    unsigned sum = 0;
+    for (int i = 0; i < n; ++i)
+        sum += A[i];
+    return sum;
+  }
+
+The Loop Vectorizer uses a cost model to decide when it is profitable to unroll loops.
+The decision to unroll the loop depends on the register pressure and the generated code size. 
+
+Performance
+-----------
+
+This section shows the the execution time of Clang on a simple benchmark:
+`gcc-loops <http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/UnitTests/Vectorizer/>`_.
+This benchmarks is a collection of loops from the GCC autovectorization
+`page <http://gcc.gnu.org/projects/tree-ssa/vectorization.html>`_ by Dorit Nuzman.
+
+The chart below compares GCC-4.7, ICC-13, and Clang-SVN with and without loop vectorization at -O3, tuned for "corei7-avx", running on a Sandybridge iMac.
+The Y-axis shows the time in msec. Lower is better. The last column shows the geomean of all the kernels.
+
+.. image:: gcc-loops.png
+
+And Linpack-pc with the same configuration. Result is Mflops, higher is better.
+
+.. image:: linpack-pc.png
+
+.. _bb-vectorizer:
+
+The Basic Block Vectorizer
+==========================
+
+Usage
+------
+
+The Basic Block Vectorizer is not enabled by default, but it can be enabled
+through clang using the command line flag:
+
+.. code-block:: console
+
+   $ clang -fslp-vectorize file.c
+
+Details
+-------
+
+The goal of basic-block vectorization (a.k.a. superword-level parallelism) is
+to combine similar independent instructions within simple control-flow regions
+into vector instructions. Memory accesses, arithemetic operations, comparison
+operations and some math functions can all be vectorized using this technique
+(subject to the capabilities of the target architecture).
+
+For example, the following function performs very similar operations on its
+inputs (a1, b1) and (a2, b2). The basic-block vectorizer may combine these
+into vector operations.
+
+.. code-block:: c++
+
+  int foo(int a1, int a2, int b1, int b2) {
+    int r1 = a1*(a1 + b1)/b1 + 50*b1/a1;
+    int r2 = a2*(a2 + b2)/b2 + 50*b2/a2;
+    return r1 + r2;
+  }
+
+
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
deleted file mode 100644
index b7fdce490472..000000000000
--- a/docs/WritingAnLLVMBackend.html
+++ /dev/null
@@ -1,2557 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>Writing an LLVM Compiler Backend</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>
-  Writing an LLVM Compiler Backend
-</h1>
-
-<ol>
-  <li><a href="#intro">Introduction</a>
-  <ul>
-    <li><a href="#Audience">Audience</a></li>
-    <li><a href="#Prerequisite">Prerequisite Reading</a></li>
-    <li><a href="#Basic">Basic Steps</a></li>
-    <li><a href="#Preliminaries">Preliminaries</a></li>
-  </ul>
-  <li><a href="#TargetMachine">Target Machine</a></li>
-  <li><a href="#TargetRegistration">Target Registration</a></li>
-  <li><a href="#RegisterSet">Register Set and Register Classes</a>
-  <ul>
-    <li><a href="#RegisterDef">Defining a Register</a></li>
-    <li><a href="#RegisterClassDef">Defining a Register Class</a></li>
-    <li><a href="#implementRegister">Implement a subclass of TargetRegisterInfo</a></li>
-  </ul></li>
-  <li><a href="#InstructionSet">Instruction Set</a>
-  <ul>  
-    <li><a href="#operandMapping">Instruction Operand Mapping</a></li>
-    <li><a href="#relationMapping">Instruction Relation Mapping</a></li>
-    <li><a href="#implementInstr">Implement a subclass of TargetInstrInfo</a></li>
-    <li><a href="#branchFolding">Branch Folding and If Conversion</a></li>
-  </ul></li>
-  <li><a href="#InstructionSelector">Instruction Selector</a>
-  <ul>
-    <li><a href="#LegalizePhase">The SelectionDAG Legalize Phase</a>
-    <ul>
-      <li><a href="#promote">Promote</a></li> 
-      <li><a href="#expand">Expand</a></li> 
-      <li><a href="#custom">Custom</a></li> 
-      <li><a href="#legal">Legal</a></li>       
-    </ul></li>
-    <li><a href="#callingConventions">Calling Conventions</a></li>     
-  </ul></li>
-  <li><a href="#assemblyPrinter">Assembly Printer</a></li> 
-  <li><a href="#subtargetSupport">Subtarget Support</a></li> 
-  <li><a href="#jitSupport">JIT Support</a>
-  <ul>  
-    <li><a href="#mce">Machine Code Emitter</a></li>   
-    <li><a href="#targetJITInfo">Target JIT Info</a></li>   
-  </ul></li>
-</ol>
-
-<div class="doc_author">    
-  <p>Written by <a href="http://www.woo.com">Mason Woo</a> and
-                <a href="http://misha.brukman.net">Misha Brukman</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="intro">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-This document describes techniques for writing compiler backends that convert
-the LLVM Intermediate Representation (IR) to code for a specified machine or
-other languages. Code intended for a specific machine can take the form of
-either assembly code or binary code (usable for a JIT compiler).
-</p>
-
-<p>
-The backend of LLVM features a target-independent code generator that may create
-output for several types of target CPUs &mdash; including X86, PowerPC, ARM,
-and SPARC. The backend may also be used to generate code targeted at SPUs of the
-Cell processor or GPUs to support the execution of compute kernels.
-</p>
-
-<p>
-The document focuses on existing examples found in subdirectories
-of <tt>llvm/lib/Target</tt> in a downloaded LLVM release. In particular, this
-document focuses on the example of creating a static compiler (one that emits
-text assembly) for a SPARC target, because SPARC has fairly standard
-characteristics, such as a RISC instruction set and straightforward calling
-conventions.
-</p>
-
-<h3>
-  <a name="Audience">Audience</a>
-</h3>
-
-<div>
-
-<p>
-The audience for this document is anyone who needs to write an LLVM backend to
-generate code for a specific hardware or software target.
-</p>
-
-</div>
-
-<h3>
-  <a name="Prerequisite">Prerequisite Reading</a>
-</h3>
-
-<div>  
-
-<p>
-These essential documents must be read before reading this document:
-</p>
-
-<ul>
-<li><i><a href="LangRef.html">LLVM Language Reference
-    Manual</a></i> &mdash; a reference manual for the LLVM assembly language.</li>
-
-<li><i><a href="CodeGenerator.html">The LLVM
-    Target-Independent Code Generator</a></i> &mdash; a guide to the components
-    (classes and code generation algorithms) for translating the LLVM internal
-    representation into machine code for a specified target.  Pay particular
-    attention to the descriptions of code generation stages: Instruction
-    Selection, Scheduling and Formation, SSA-based Optimization, Register
-    Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations,
-    and Code Emission.</li>
-
-<li><i><a href="TableGenFundamentals.html">TableGen
-    Fundamentals</a></i> &mdash;a document that describes the TableGen
-    (<tt>tblgen</tt>) application that manages domain-specific information to
-    support LLVM code generation. TableGen processes input from a target
-    description file (<tt>.td</tt> suffix) and generates C++ code that can be
-    used for code generation.</li>
-
-<li><i><a href="WritingAnLLVMPass.html">Writing an LLVM
-    Pass</a></i> &mdash; The assembly printer is a <tt>FunctionPass</tt>, as are
-    several SelectionDAG processing steps.</li>
-</ul>
-
-<p>
-To follow the SPARC examples in this document, have a copy of
-<i><a href="http://www.sparc.org/standards/V8.pdf">The SPARC Architecture
-Manual, Version 8</a></i> for reference. For details about the ARM instruction
-set, refer to the <i><a href="http://infocenter.arm.com/">ARM Architecture
-Reference Manual</a></i>. For more about the GNU Assembler format
-(<tt>GAS</tt>), see
-<i><a href="http://sourceware.org/binutils/docs/as/index.html">Using As</a></i>,
-especially for the assembly printer. <i>Using As</i> contains a list of target
-machine dependent features.
-</p>
-
-</div>
-
-<h3>
-  <a name="Basic">Basic Steps</a>
-</h3>
-
-<div>
-
-<p>
-To write a compiler backend for LLVM that converts the LLVM IR to code for a
-specified target (machine or other language), follow these steps:
-</p>
-
-<ul>
-<li>Create a subclass of the TargetMachine class that describes characteristics
-    of your target machine. Copy existing examples of specific TargetMachine
-    class and header files; for example, start with
-    <tt>SparcTargetMachine.cpp</tt> and <tt>SparcTargetMachine.h</tt>, but
-    change the file names for your target. Similarly, change code that
-    references "Sparc" to reference your target. </li>
-
-<li>Describe the register set of the target. Use TableGen to generate code for
-    register definition, register aliases, and register classes from a
-    target-specific <tt>RegisterInfo.td</tt> input file. You should also write
-    additional code for a subclass of the TargetRegisterInfo class that
-    represents the class register file data used for register allocation and
-    also describes the interactions between registers.</li>
-
-<li>Describe the instruction set of the target. Use TableGen to generate code
-    for target-specific instructions from target-specific versions of
-    <tt>TargetInstrFormats.td</tt> and <tt>TargetInstrInfo.td</tt>. You should
-    write additional code for a subclass of the TargetInstrInfo class to
-    represent machine instructions supported by the target machine. </li>
-
-<li>Describe the selection and conversion of the LLVM IR from a Directed Acyclic
-    Graph (DAG) representation of instructions to native target-specific
-    instructions. Use TableGen to generate code that matches patterns and
-    selects instructions based on additional information in a target-specific
-    version of <tt>TargetInstrInfo.td</tt>. Write code
-    for <tt>XXXISelDAGToDAG.cpp</tt>, where XXX identifies the specific target,
-    to perform pattern matching and DAG-to-DAG instruction selection. Also write
-    code in <tt>XXXISelLowering.cpp</tt> to replace or remove operations and
-    data types that are not supported natively in a SelectionDAG. </li>
-
-<li>Write code for an assembly printer that converts LLVM IR to a GAS format for
-    your target machine.  You should add assembly strings to the instructions
-    defined in your target-specific version of <tt>TargetInstrInfo.td</tt>. You
-    should also write code for a subclass of AsmPrinter that performs the
-    LLVM-to-assembly conversion and a trivial subclass of TargetAsmInfo.</li>
-
-<li>Optionally, add support for subtargets (i.e., variants with different
-    capabilities). You should also write code for a subclass of the
-    TargetSubtarget class, which allows you to use the <tt>-mcpu=</tt>
-    and <tt>-mattr=</tt> command-line options.</li>
-
-<li>Optionally, add JIT support and create a machine code emitter (subclass of
-    TargetJITInfo) that is used to emit binary code directly into memory. </li>
-</ul>
-
-<p>
-In the <tt>.cpp</tt> and <tt>.h</tt>. files, initially stub up these methods and
-then implement them later. Initially, you may not know which private members
-that the class will need and which components will need to be subclassed.
-</p>
-
-</div>
-
-<h3>
-  <a name="Preliminaries">Preliminaries</a>
-</h3>
-
-<div>
-
-<p>
-To actually create your compiler backend, you need to create and modify a few
-files. The absolute minimum is discussed here. But to actually use the LLVM
-target-independent code generator, you must perform the steps described in
-the <a href="CodeGenerator.html">LLVM
-Target-Independent Code Generator</a> document.
-</p>
-
-<p>
-First, you should create a subdirectory under <tt>lib/Target</tt> to hold all
-the files related to your target. If your target is called "Dummy," create the
-directory <tt>lib/Target/Dummy</tt>.
-</p>
-
-<p>
-In this new
-directory, create a <tt>Makefile</tt>. It is easiest to copy a
-<tt>Makefile</tt> of another target and modify it. It should at least contain
-the <tt>LEVEL</tt>, <tt>LIBRARYNAME</tt> and <tt>TARGET</tt> variables, and then
-include <tt>$(LEVEL)/Makefile.common</tt>. The library can be
-named <tt>LLVMDummy</tt> (for example, see the MIPS target). Alternatively, you
-can split the library into <tt>LLVMDummyCodeGen</tt>
-and <tt>LLVMDummyAsmPrinter</tt>, the latter of which should be implemented in a
-subdirectory below <tt>lib/Target/Dummy</tt> (for example, see the PowerPC
-target).
-</p>
-
-<p>
-Note that these two naming schemes are hardcoded into <tt>llvm-config</tt>.
-Using any other naming scheme will confuse <tt>llvm-config</tt> and produce a
-lot of (seemingly unrelated) linker errors when linking <tt>llc</tt>.
-</p>
-
-<p>
-To make your target actually do something, you need to implement a subclass of
-<tt>TargetMachine</tt>. This implementation should typically be in the file
-<tt>lib/Target/DummyTargetMachine.cpp</tt>, but any file in
-the <tt>lib/Target</tt> directory will be built and should work. To use LLVM's
-target independent code generator, you should do what all current machine
-backends do: create a subclass of <tt>LLVMTargetMachine</tt>. (To create a
-target from scratch, create a subclass of <tt>TargetMachine</tt>.)
-</p>
-
-<p>
-To get LLVM to actually build and link your target, you need to add it to
-the <tt>TARGETS_TO_BUILD</tt> variable. To do this, you modify the configure
-script to know about your target when parsing the <tt>--enable-targets</tt>
-option. Search the configure script for <tt>TARGETS_TO_BUILD</tt>, add your
-target to the lists there (some creativity required), and then
-reconfigure. Alternatively, you can change <tt>autotools/configure.ac</tt> and
-regenerate configure by running <tt>./autoconf/AutoRegen.sh</tt>.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="TargetMachine">Target Machine</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-<tt>LLVMTargetMachine</tt> is designed as a base class for targets implemented
-with the LLVM target-independent code generator. The <tt>LLVMTargetMachine</tt>
-class should be specialized by a concrete target class that implements the
-various virtual methods. <tt>LLVMTargetMachine</tt> is defined as a subclass of
-<tt>TargetMachine</tt> in <tt>include/llvm/Target/TargetMachine.h</tt>. The
-<tt>TargetMachine</tt> class implementation (<tt>TargetMachine.cpp</tt>) also
-processes numerous command-line options.
-</p>
-
-<p>
-To create a concrete target-specific subclass of <tt>LLVMTargetMachine</tt>,
-start by copying an existing <tt>TargetMachine</tt> class and header.  You
-should name the files that you create to reflect your specific target. For
-instance, for the SPARC target, name the files <tt>SparcTargetMachine.h</tt> and
-<tt>SparcTargetMachine.cpp</tt>.
-</p>
-
-<p>
-For a target machine <tt>XXX</tt>, the implementation of
-<tt>XXXTargetMachine</tt> must have access methods to obtain objects that
-represent target components.  These methods are named <tt>get*Info</tt>, and are
-intended to obtain the instruction set (<tt>getInstrInfo</tt>), register set
-(<tt>getRegisterInfo</tt>), stack frame layout (<tt>getFrameInfo</tt>), and
-similar information. <tt>XXXTargetMachine</tt> must also implement the
-<tt>getDataLayout</tt> method to access an object with target-specific data
-characteristics, such as data type size and alignment requirements.
-</p>
-
-<p>
-For instance, for the SPARC target, the header file
-<tt>SparcTargetMachine.h</tt> declares prototypes for several <tt>get*Info</tt>
-and <tt>getDataLayout</tt> methods that simply return a class member.
-</p>
-
-<div class="doc_code">
-<pre>
-namespace llvm {
-
-class Module;
-
-class SparcTargetMachine : public LLVMTargetMachine {
-  const DataLayout DataLayout;       // Calculates type size &amp; alignment
-  SparcSubtarget Subtarget;
-  SparcInstrInfo InstrInfo;
-  TargetFrameInfo FrameInfo;
-  
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
-public:
-  SparcTargetMachine(const Module &amp;M, const std::string &amp;FS);
-
-  virtual const SparcInstrInfo *getInstrInfo() const {return &amp;InstrInfo; }
-  virtual const TargetFrameInfo *getFrameInfo() const {return &amp;FrameInfo; }
-  virtual const TargetSubtarget *getSubtargetImpl() const{return &amp;Subtarget; }
-  virtual const TargetRegisterInfo *getRegisterInfo() const {
-    return &amp;InstrInfo.getRegisterInfo();
-  }
-  virtual const DataLayout *getDataLayout() const { return &amp;DataLayout; }
-  static unsigned getModuleMatchQuality(const Module &amp;M);
-
-  // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &amp;PM, bool Fast);
-  virtual bool addPreEmitPass(PassManagerBase &amp;PM, bool Fast);
-};
-
-} // end namespace llvm
-</pre>
-</div>
-
-<ul>
-<li><tt>getInstrInfo()</tt></li>
-<li><tt>getRegisterInfo()</tt></li>
-<li><tt>getFrameInfo()</tt></li>
-<li><tt>getDataLayout()</tt></li>
-<li><tt>getSubtargetImpl()</tt></li>
-</ul>
-
-<p>For some targets, you also need to support the following methods:</p>
-
-<ul>
-<li><tt>getTargetLowering()</tt></li>
-<li><tt>getJITInfo()</tt></li>
-</ul>
-
-<p>
-In addition, the <tt>XXXTargetMachine</tt> constructor should specify a
-<tt>TargetDescription</tt> string that determines the data layout for the target
-machine, including characteristics such as pointer size, alignment, and
-endianness. For example, the constructor for SparcTargetMachine contains the
-following:
-</p>
-
-<div class="doc_code">
-<pre>
-SparcTargetMachine::SparcTargetMachine(const Module &amp;M, const std::string &amp;FS)
-  : DataLayout("E-p:32:32-f128:128:128"),
-    Subtarget(M, FS), InstrInfo(Subtarget),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
-}
-</pre>
-</div>
-
-<p>Hyphens separate portions of the <tt>TargetDescription</tt> string.</p>
-
-<ul>
-<li>An upper-case "<tt>E</tt>" in the string indicates a big-endian target data
-    model. a lower-case "<tt>e</tt>" indicates little-endian.</li>
-
-<li>"<tt>p:</tt>" is followed by pointer information: size, ABI alignment, and
-    preferred alignment. If only two figures follow "<tt>p:</tt>", then the
-    first value is pointer size, and the second value is both ABI and preferred
-    alignment.</li>
-
-<li>Then a letter for numeric type alignment: "<tt>i</tt>", "<tt>f</tt>",
-    "<tt>v</tt>", or "<tt>a</tt>" (corresponding to integer, floating point,
-    vector, or aggregate). "<tt>i</tt>", "<tt>v</tt>", or "<tt>a</tt>" are
-    followed by ABI alignment and preferred alignment. "<tt>f</tt>" is followed
-    by three values: the first indicates the size of a long double, then ABI
-    alignment, and then ABI preferred alignment.</li>
-</ul>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="TargetRegistration">Target Registration</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-You must also register your target with the <tt>TargetRegistry</tt>, which is
-what other LLVM tools use to be able to lookup and use your target at
-runtime. The <tt>TargetRegistry</tt> can be used directly, but for most targets
-there are helper templates which should take care of the work for you.</p>
-
-<p>
-All targets should declare a global <tt>Target</tt> object which is used to
-represent the target during registration. Then, in the target's TargetInfo
-library, the target should define that object and use
-the <tt>RegisterTarget</tt> template to register the target. For example, the Sparc registration code looks like this:
-</p>
-
-<div class="doc_code">
-<pre>
-Target llvm::TheSparcTarget;
-
-extern "C" void LLVMInitializeSparcTargetInfo() { 
-  RegisterTarget&lt;Triple::sparc, /*HasJIT=*/false&gt;
-    X(TheSparcTarget, "sparc", "Sparc");
-}
-</pre>
-</div>
-
-<p>
-This allows the <tt>TargetRegistry</tt> to look up the target by name or by
-target triple. In addition, most targets will also register additional features
-which are available in separate libraries. These registration steps are
-separate, because some clients may wish to only link in some parts of the target
--- the JIT code generator does not require the use of the assembler printer, for
-example. Here is an example of registering the Sparc assembly printer:
-</p>
-
-<div class="doc_code">
-<pre>
-extern "C" void LLVMInitializeSparcAsmPrinter() { 
-  RegisterAsmPrinter&lt;SparcAsmPrinter&gt; X(TheSparcTarget);
-}
-</pre>
-</div>
-
-<p>
-For more information, see
-"<a href="/doxygen/TargetRegistry_8h-source.html">llvm/Target/TargetRegistry.h</a>".
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="RegisterSet">Register Set and Register Classes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-You should describe a concrete target-specific class that represents the
-register file of a target machine. This class is called <tt>XXXRegisterInfo</tt>
-(where <tt>XXX</tt> identifies the target) and represents the class register
-file data that is used for register allocation. It also describes the
-interactions between registers.
-</p>
-
-<p>
-You also need to define register classes to categorize related registers. A
-register class should be added for groups of registers that are all treated the
-same way for some instruction. Typical examples are register classes for
-integer, floating-point, or vector registers. A register allocator allows an
-instruction to use any register in a specified register class to perform the
-instruction in a similar manner. Register classes allocate virtual registers to
-instructions from these sets, and register classes let the target-independent
-register allocator automatically choose the actual registers.
-</p>
-
-<p>
-Much of the code for registers, including register definition, register aliases,
-and register classes, is generated by TableGen from <tt>XXXRegisterInfo.td</tt>
-input files and placed in <tt>XXXGenRegisterInfo.h.inc</tt> and
-<tt>XXXGenRegisterInfo.inc</tt> output files. Some of the code in the
-implementation of <tt>XXXRegisterInfo</tt> requires hand-coding.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="RegisterDef">Defining a Register</a>
-</h3>
-
-<div>
-
-<p>
-The <tt>XXXRegisterInfo.td</tt> file typically starts with register definitions
-for a target machine. The <tt>Register</tt> class (specified
-in <tt>Target.td</tt>) is used to define an object for each register. The
-specified string <tt>n</tt> becomes the <tt>Name</tt> of the register. The
-basic <tt>Register</tt> object does not have any subregisters and does not
-specify any aliases.
-</p>
-
-<div class="doc_code">
-<pre>
-class Register&lt;string n&gt; {
-  string Namespace = "";
-  string AsmName = n;
-  string Name = n;
-  int SpillSize = 0;
-  int SpillAlignment = 0;
-  list&lt;Register&gt; Aliases = [];
-  list&lt;Register&gt; SubRegs = [];
-  list&lt;int&gt; DwarfNumbers = [];
-}
-</pre>
-</div>
-
-<p>
-For example, in the <tt>X86RegisterInfo.td</tt> file, there are register
-definitions that utilize the Register class, such as:
-</p>
-
-<div class="doc_code">
-<pre>
-def AL : Register&lt;"AL"&gt;, DwarfRegNum&lt;[0, 0, 0]&gt;;
-</pre>
-</div>
-
-<p>
-This defines the register <tt>AL</tt> and assigns it values (with
-<tt>DwarfRegNum</tt>) that are used by <tt>gcc</tt>, <tt>gdb</tt>, or a debug
-information writer to identify a register. For register
-<tt>AL</tt>, <tt>DwarfRegNum</tt> takes an array of 3 values representing 3
-different modes: the first element is for X86-64, the second for exception
-handling (EH) on X86-32, and the third is generic. -1 is a special Dwarf number
-that indicates the gcc number is undefined, and -2 indicates the register number
-is invalid for this mode.
-</p>
-
-<p>
-From the previously described line in the <tt>X86RegisterInfo.td</tt> file,
-TableGen generates this code in the <tt>X86GenRegisterInfo.inc</tt> file:
-</p>
-
-<div class="doc_code">
-<pre>
-static const unsigned GR8[] = { X86::AL, ... };
-
-const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
-
-const TargetRegisterDesc RegisterDescriptors[] = { 
-  ...
-{ "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
-</pre>
-</div>
-
-<p>
-From the register info file, TableGen generates a <tt>TargetRegisterDesc</tt>
-object for each register. <tt>TargetRegisterDesc</tt> is defined in
-<tt>include/llvm/Target/TargetRegisterInfo.h</tt> with the following fields:
-</p>
-
-<div class="doc_code">
-<pre>
-struct TargetRegisterDesc {
-  const char     *AsmName;      // Assembly language name for the register
-  const char     *Name;         // Printable name for the reg (for debugging)
-  const unsigned *AliasSet;     // Register Alias Set
-  const unsigned *SubRegs;      // Sub-register set
-  const unsigned *ImmSubRegs;   // Immediate sub-register set
-  const unsigned *SuperRegs;    // Super-register set
-};</pre>
-</div>
-
-<p>
-TableGen uses the entire target description file (<tt>.td</tt>) to determine
-text names for the register (in the <tt>AsmName</tt> and <tt>Name</tt> fields of
-<tt>TargetRegisterDesc</tt>) and the relationships of other registers to the
-defined register (in the other <tt>TargetRegisterDesc</tt> fields). In this
-example, other definitions establish the registers "<tt>AX</tt>",
-"<tt>EAX</tt>", and "<tt>RAX</tt>" as aliases for one another, so TableGen
-generates a null-terminated array (<tt>AL_AliasSet</tt>) for this register alias
-set.
-</p>
-
-<p>
-The <tt>Register</tt> class is commonly used as a base class for more complex
-classes. In <tt>Target.td</tt>, the <tt>Register</tt> class is the base for the
-<tt>RegisterWithSubRegs</tt> class that is used to define registers that need to
-specify subregisters in the <tt>SubRegs</tt> list, as shown here:
-</p>
-
-<div class="doc_code">
-<pre>
-class RegisterWithSubRegs&lt;string n,
-list&lt;Register&gt; subregs&gt; : Register&lt;n&gt; {
-  let SubRegs = subregs;
-}
-</pre>
-</div>
-
-<p>
-In <tt>SparcRegisterInfo.td</tt>, additional register classes are defined for
-SPARC: a Register subclass, SparcReg, and further subclasses: <tt>Ri</tt>,
-<tt>Rf</tt>, and <tt>Rd</tt>. SPARC registers are identified by 5-bit ID
-numbers, which is a feature common to these subclasses. Note the use of
-'<tt>let</tt>' expressions to override values that are initially defined in a
-superclass (such as <tt>SubRegs</tt> field in the <tt>Rd</tt> class).
-</p>
-
-<div class="doc_code">
-<pre>
-class SparcReg&lt;string n&gt; : Register&lt;n&gt; {
-  field bits&lt;5&gt; Num;
-  let Namespace = "SP";
-}
-// Ri - 32-bit integer registers
-class Ri&lt;bits&lt;5&gt; num, string n&gt; :
-SparcReg&lt;n&gt; {
-  let Num = num;
-}
-// Rf - 32-bit floating-point registers
-class Rf&lt;bits&lt;5&gt; num, string n&gt; :
-SparcReg&lt;n&gt; {
-  let Num = num;
-}
-// Rd - Slots in the FP register file for 64-bit
-floating-point values.
-class Rd&lt;bits&lt;5&gt; num, string n,
-list&lt;Register&gt; subregs&gt; : SparcReg&lt;n&gt; {
-  let Num = num;
-  let SubRegs = subregs;
-}
-</pre>
-</div>
-
-<p>
-In the <tt>SparcRegisterInfo.td</tt> file, there are register definitions that
-utilize these subclasses of <tt>Register</tt>, such as:
-</p>
-
-<div class="doc_code">
-<pre>
-def G0 : Ri&lt; 0, "G0"&gt;,
-DwarfRegNum&lt;[0]&gt;;
-def G1 : Ri&lt; 1, "G1"&gt;, DwarfRegNum&lt;[1]&gt;;
-...
-def F0 : Rf&lt; 0, "F0"&gt;,
-DwarfRegNum&lt;[32]&gt;;
-def F1 : Rf&lt; 1, "F1"&gt;,
-DwarfRegNum&lt;[33]&gt;;
-...
-def D0 : Rd&lt; 0, "F0", [F0, F1]&gt;,
-DwarfRegNum&lt;[32]&gt;;
-def D1 : Rd&lt; 2, "F2", [F2, F3]&gt;,
-DwarfRegNum&lt;[34]&gt;;
-</pre>
-</div>
-
-<p>
-The last two registers shown above (<tt>D0</tt> and <tt>D1</tt>) are
-double-precision floating-point registers that are aliases for pairs of
-single-precision floating-point sub-registers. In addition to aliases, the
-sub-register and super-register relationships of the defined register are in
-fields of a register's TargetRegisterDesc.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="RegisterClassDef">Defining a Register Class</a>
-</h3>
-
-<div>
-
-<p>
-The <tt>RegisterClass</tt> class (specified in <tt>Target.td</tt>) is used to
-define an object that represents a group of related registers and also defines
-the default allocation order of the registers. A target description file
-<tt>XXXRegisterInfo.td</tt> that uses <tt>Target.td</tt> can construct register
-classes using the following class:
-</p>
-
-<div class="doc_code">
-<pre>
-class RegisterClass&lt;string namespace,
-list&lt;ValueType&gt; regTypes, int alignment, dag regList&gt; {
-  string Namespace = namespace;
-  list&lt;ValueType&gt; RegTypes = regTypes;
-  int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
-  int Alignment = alignment;
-
-  // CopyCost is the cost of copying a value between two registers
-  // default value 1 means a single instruction
-  // A negative value means copying is extremely expensive or impossible
-  int CopyCost = 1;  
-  dag MemberList = regList;
-  
-  // for register classes that are subregisters of this class
-  list&lt;RegisterClass&gt; SubRegClassList = [];  
-  
-  code MethodProtos = [{}];  // to insert arbitrary code
-  code MethodBodies = [{}];
-}
-</pre>
-</div>
-
-<p>To define a RegisterClass, use the following 4 arguments:</p>
-
-<ul>
-<li>The first argument of the definition is the name of the namespace.</li>
-
-<li>The second argument is a list of <tt>ValueType</tt> register type values
-    that are defined in <tt>include/llvm/CodeGen/ValueTypes.td</tt>. Defined
-    values include integer types (such as <tt>i16</tt>, <tt>i32</tt>,
-    and <tt>i1</tt> for Boolean), floating-point types
-    (<tt>f32</tt>, <tt>f64</tt>), and vector types (for example, <tt>v8i16</tt>
-    for an <tt>8 x i16</tt> vector). All registers in a <tt>RegisterClass</tt>
-    must have the same <tt>ValueType</tt>, but some registers may store vector
-    data in different configurations. For example a register that can process a
-    128-bit vector may be able to handle 16 8-bit integer elements, 8 16-bit
-    integers, 4 32-bit integers, and so on. </li>
-
-<li>The third argument of the <tt>RegisterClass</tt> definition specifies the
-    alignment required of the registers when they are stored or loaded to
-    memory.</li>
-
-<li>The final argument, <tt>regList</tt>, specifies which registers are in this
-    class. If an alternative allocation order method is not specified, then
-    <tt>regList</tt> also defines the order of allocation used by the register
-    allocator. Besides simply listing registers with <tt>(add R0, R1, ...)</tt>,
-    more advanced set operators are available. See
-    <tt>include/llvm/Target/Target.td</tt> for more information.</li>
-</ul>
-
-<p>
-In <tt>SparcRegisterInfo.td</tt>, three RegisterClass objects are defined:
-<tt>FPRegs</tt>, <tt>DFPRegs</tt>, and <tt>IntRegs</tt>. For all three register
-classes, the first argument defines the namespace with the string
-'<tt>SP</tt>'. <tt>FPRegs</tt> defines a group of 32 single-precision
-floating-point registers (<tt>F0</tt> to <tt>F31</tt>); <tt>DFPRegs</tt> defines
-a group of 16 double-precision registers
-(<tt>D0-D15</tt>).
-</p>
-
-<div class="doc_code">
-<pre>
-// F0, F1, F2, ..., F31
-def FPRegs : RegisterClass&lt;"SP", [f32], 32, (sequence "F%u", 0, 31)&gt;;
-
-def DFPRegs : RegisterClass&lt;"SP", [f64], 64,
-                            (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
-                                 D9, D10, D11, D12, D13, D14, D15)&gt;;
-&nbsp;
-def IntRegs : RegisterClass&lt;"SP", [i32], 32,
-    (add L0, L1, L2, L3, L4, L5, L6, L7,
-         I0, I1, I2, I3, I4, I5,
-         O0, O1, O2, O3, O4, O5, O7,
-         G1,
-         // Non-allocatable regs:
-         G2, G3, G4,
-         O6,        // stack ptr
-         I6,        // frame ptr
-         I7,        // return address
-         G0,        // constant zero
-         G5, G6, G7 // reserved for kernel
-    )&gt;;
-</pre>
-</div>
-
-<p>
-Using <tt>SparcRegisterInfo.td</tt> with TableGen generates several output files
-that are intended for inclusion in other source code that you write.
-<tt>SparcRegisterInfo.td</tt> generates <tt>SparcGenRegisterInfo.h.inc</tt>,
-which should be included in the header file for the implementation of the SPARC
-register implementation that you write (<tt>SparcRegisterInfo.h</tt>). In
-<tt>SparcGenRegisterInfo.h.inc</tt> a new structure is defined called
-<tt>SparcGenRegisterInfo</tt> that uses <tt>TargetRegisterInfo</tt> as its
-base. It also specifies types, based upon the defined register
-classes: <tt>DFPRegsClass</tt>, <tt>FPRegsClass</tt>, and <tt>IntRegsClass</tt>.
-</p>
-
-<p>
-<tt>SparcRegisterInfo.td</tt> also generates <tt>SparcGenRegisterInfo.inc</tt>,
-which is included at the bottom of <tt>SparcRegisterInfo.cpp</tt>, the SPARC
-register implementation. The code below shows only the generated integer
-registers and associated register classes. The order of registers
-in <tt>IntRegs</tt> reflects the order in the definition of <tt>IntRegs</tt> in
-the target description file.
-</p>
-
-<div class="doc_code">
-<pre>  // IntRegs Register Class...
-  static const unsigned IntRegs[] = {
-    SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
-    SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
-    SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
-    SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
-    SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
-    SP::G6, SP::G7, 
-  };
-
-  // IntRegsVTs Register Class Value Types...
-  static const MVT::ValueType IntRegsVTs[] = {
-    MVT::i32, MVT::Other
-  };
-
-namespace SP {   // Register class instances
-  DFPRegsClass&nbsp;&nbsp;&nbsp; DFPRegsRegClass;
-  FPRegsClass&nbsp;&nbsp;&nbsp;&nbsp; FPRegsRegClass;
-  IntRegsClass&nbsp;&nbsp;&nbsp; IntRegsRegClass;
-...
-  // IntRegs Sub-register Classess...
-  static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
-    NULL
-  };
-...
-  // IntRegs Super-register Classess...
-  static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
-    NULL
-  };
-...
-  // IntRegs Register Class sub-classes...
-  static const TargetRegisterClass* const IntRegsSubclasses [] = {
-    NULL
-  };
-...
-  // IntRegs Register Class super-classes...
-  static const TargetRegisterClass* const IntRegsSuperclasses [] = {
-    NULL
-  };
-
-  IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, 
-    IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, 
-    IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
-}
-</pre>
-</div>
-
-<p>
-The register allocators will avoid using reserved registers, and callee saved
-registers are not used until all the volatile registers have been used.  That
-is usually good enough, but in some cases it may be necessary to provide custom
-allocation orders.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="implementRegister">Implement a subclass of</a> 
-  <a href="CodeGenerator.html#targetregisterinfo">TargetRegisterInfo</a>
-</h3>
-
-<div>
-
-<p>
-The final step is to hand code portions of <tt>XXXRegisterInfo</tt>, which
-implements the interface described in <tt>TargetRegisterInfo.h</tt>. These
-functions return <tt>0</tt>, <tt>NULL</tt>, or <tt>false</tt>, unless
-overridden. Here is a list of functions that are overridden for the SPARC
-implementation in <tt>SparcRegisterInfo.cpp</tt>:
-</p>
-
-<ul>
-<li><tt>getCalleeSavedRegs</tt> &mdash; Returns a list of callee-saved registers
-    in the order of the desired callee-save stack frame offset.</li>
-
-<li><tt>getReservedRegs</tt> &mdash; Returns a bitset indexed by physical
-    register numbers, indicating if a particular register is unavailable.</li>
-
-<li><tt>hasFP</tt> &mdash; Return a Boolean indicating if a function should have
-    a dedicated frame pointer register.</li>
-
-<li><tt>eliminateCallFramePseudoInstr</tt> &mdash; If call frame setup or
-    destroy pseudo instructions are used, this can be called to eliminate
-    them.</li>
-
-<li><tt>eliminateFrameIndex</tt> &mdash; Eliminate abstract frame indices from
-    instructions that may use them.</li>
-
-<li><tt>emitPrologue</tt> &mdash; Insert prologue code into the function.</li>
-
-<li><tt>emitEpilogue</tt> &mdash; Insert epilogue code into the function.</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="InstructionSet">Instruction Set</a>
-</h2>
-
-<!-- *********************************************************************** -->
-<div>
-
-<p>
-During the early stages of code generation, the LLVM IR code is converted to a
-<tt>SelectionDAG</tt> with nodes that are instances of the <tt>SDNode</tt> class
-containing target instructions. An <tt>SDNode</tt> has an opcode, operands, type
-requirements, and operation properties. For example, is an operation
-commutative, does an operation load from memory. The various operation node
-types are described in the <tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>
-file (values of the <tt>NodeType</tt> enum in the <tt>ISD</tt> namespace).
-</p>
-
-<p>
-TableGen uses the following target description (<tt>.td</tt>) input files to
-generate much of the code for instruction definition:
-</p>
-
-<ul>
-<li><tt>Target.td</tt> &mdash; Where the <tt>Instruction</tt>, <tt>Operand</tt>,
-    <tt>InstrInfo</tt>, and other fundamental classes are defined.</li>
-
-<li><tt>TargetSelectionDAG.td</tt>&mdash; Used by <tt>SelectionDAG</tt>
-    instruction selection generators, contains <tt>SDTC*</tt> classes (selection
-    DAG type constraint), definitions of <tt>SelectionDAG</tt> nodes (such as
-    <tt>imm</tt>, <tt>cond</tt>, <tt>bb</tt>, <tt>add</tt>, <tt>fadd</tt>,
-    <tt>sub</tt>), and pattern support (<tt>Pattern</tt>, <tt>Pat</tt>,
-    <tt>PatFrag</tt>, <tt>PatLeaf</tt>, <tt>ComplexPattern</tt>.</li>
-
-<li><tt>XXXInstrFormats.td</tt> &mdash; Patterns for definitions of
-    target-specific instructions.</li>
-
-<li><tt>XXXInstrInfo.td</tt> &mdash; Target-specific definitions of instruction
-    templates, condition codes, and instructions of an instruction set. For
-    architecture modifications, a different file name may be used. For example,
-    for Pentium with SSE instruction, this file is <tt>X86InstrSSE.td</tt>, and
-    for Pentium with MMX, this file is <tt>X86InstrMMX.td</tt>.</li>
-</ul>
-
-<p>
-There is also a target-specific <tt>XXX.td</tt> file, where <tt>XXX</tt> is the
-name of the target. The <tt>XXX.td</tt> file includes the other <tt>.td</tt>
-input files, but its contents are only directly important for subtargets.
-</p>
-
-<p>
-You should describe a concrete target-specific class <tt>XXXInstrInfo</tt> that
-represents machine instructions supported by a target machine.
-<tt>XXXInstrInfo</tt> contains an array of <tt>XXXInstrDescriptor</tt> objects,
-each of which describes one instruction. An instruction descriptor defines:</p>
-
-<ul>
-<li>Opcode mnemonic</li>
-
-<li>Number of operands</li>
-
-<li>List of implicit register definitions and uses</li>
-
-<li>Target-independent properties (such as memory access, is commutable)</li>
-
-<li>Target-specific flags </li>
-</ul>
-
-<p>
-The Instruction class (defined in <tt>Target.td</tt>) is mostly used as a base
-for more complex instruction classes.
-</p>
-
-<div class="doc_code">
-<pre>class Instruction {
-  string Namespace = "";
-  dag OutOperandList;       // An dag containing the MI def operand list.
-  dag InOperandList;        // An dag containing the MI use operand list.
-  string AsmString = "";    // The .s format to print the instruction with.
-  list&lt;dag&gt; Pattern;  // Set to the DAG pattern for this instruction
-  list&lt;Register&gt; Uses = []; 
-  list&lt;Register&gt; Defs = [];
-  list&lt;Predicate&gt; Predicates = [];  // predicates turned into isel match code
-  ... remainder not shown for space ...
-}
-</pre>
-</div>
-
-<p>
-A <tt>SelectionDAG</tt> node (<tt>SDNode</tt>) should contain an object
-representing a target-specific instruction that is defined
-in <tt>XXXInstrInfo.td</tt>. The instruction objects should represent
-instructions from the architecture manual of the target machine (such as the
-SPARC Architecture Manual for the SPARC target).
-</p>
-
-<p>
-A single instruction from the architecture manual is often modeled as multiple
-target instructions, depending upon its operands. For example, a manual might
-describe an add instruction that takes a register or an immediate operand. An
-LLVM target could model this with two instructions named <tt>ADDri</tt> and
-<tt>ADDrr</tt>.
-</p>
-
-<p>
-You should define a class for each instruction category and define each opcode
-as a subclass of the category with appropriate parameters such as the fixed
-binary encoding of opcodes and extended opcodes. You should map the register
-bits to the bits of the instruction in which they are encoded (for the
-JIT). Also you should specify how the instruction should be printed when the
-automatic assembly printer is used.
-</p>
-
-<p>
-As is described in the SPARC Architecture Manual, Version 8, there are three
-major 32-bit formats for instructions. Format 1 is only for the <tt>CALL</tt>
-instruction. Format 2 is for branch on condition codes and <tt>SETHI</tt> (set
-high bits of a register) instructions.  Format 3 is for other instructions.
-</p>
-
-<p>
-Each of these formats has corresponding classes in <tt>SparcInstrFormat.td</tt>.
-<tt>InstSP</tt> is a base class for other instruction classes. Additional base
-classes are specified for more precise formats: for example
-in <tt>SparcInstrFormat.td</tt>, <tt>F2_1</tt> is for <tt>SETHI</tt>,
-and <tt>F2_2</tt> is for branches. There are three other base
-classes: <tt>F3_1</tt> for register/register operations, <tt>F3_2</tt> for
-register/immediate operations, and <tt>F3_3</tt> for floating-point
-operations. <tt>SparcInstrInfo.td</tt> also adds the base class Pseudo for
-synthetic SPARC instructions.
-</p>
-
-<p>
-<tt>SparcInstrInfo.td</tt> largely consists of operand and instruction
-definitions for the SPARC target. In <tt>SparcInstrInfo.td</tt>, the following
-target description file entry, <tt>LDrr</tt>, defines the Load Integer
-instruction for a Word (the <tt>LD</tt> SPARC opcode) from a memory address to a
-register. The first parameter, the value 3 (<tt>11<sub>2</sub></tt>), is the
-operation value for this category of operation. The second parameter
-(<tt>000000<sub>2</sub></tt>) is the specific operation value
-for <tt>LD</tt>/Load Word. The third parameter is the output destination, which
-is a register operand and defined in the <tt>Register</tt> target description
-file (<tt>IntRegs</tt>).
-</p>
-
-<div class="doc_code">
-<pre>def LDrr : F3_1 &lt;3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
-                 "ld [$addr], $dst",
-                 [(set IntRegs:$dst, (load ADDRrr:$addr))]&gt;;
-</pre>
-</div>
-
-<p>
-The fourth parameter is the input source, which uses the address
-operand <tt>MEMrr</tt> that is defined earlier in <tt>SparcInstrInfo.td</tt>:
-</p>
-
-<div class="doc_code">
-<pre>def MEMrr : Operand&lt;i32&gt; {
-  let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-</pre>
-</div>
-
-<p>
-The fifth parameter is a string that is used by the assembly printer and can be
-left as an empty string until the assembly printer interface is implemented. The
-sixth and final parameter is the pattern used to match the instruction during
-the SelectionDAG Select Phase described in
-(<a href="CodeGenerator.html">The LLVM
-Target-Independent Code Generator</a>).  This parameter is detailed in the next
-section, <a href="#InstructionSelector">Instruction Selector</a>.
-</p>
-
-<p>
-Instruction class definitions are not overloaded for different operand types, so
-separate versions of instructions are needed for register, memory, or immediate
-value operands. For example, to perform a Load Integer instruction for a Word
-from an immediate operand to a register, the following instruction class is
-defined:
-</p>
-
-<div class="doc_code">
-<pre>def LDri : F3_2 &lt;3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
-                 "ld [$addr], $dst",
-                 [(set IntRegs:$dst, (load ADDRri:$addr))]&gt;;
-</pre>
-</div>
-
-<p>
-Writing these definitions for so many similar instructions can involve a lot of
-cut and paste. In td files, the <tt>multiclass</tt> directive enables the
-creation of templates to define several instruction classes at once (using
-the <tt>defm</tt> directive). For example in <tt>SparcInstrInfo.td</tt>, the
-<tt>multiclass</tt> pattern <tt>F3_12</tt> is defined to create 2 instruction
-classes each time <tt>F3_12</tt> is invoked:
-</p>
-
-<div class="doc_code">
-<pre>multiclass F3_12 &lt;string OpcStr, bits&lt;6&gt; Op3Val, SDNode OpNode&gt; {
-  def rr  : F3_1 &lt;2, Op3Val, 
-                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]&gt;;
-  def ri  : F3_2 &lt;2, Op3Val,
-                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]&gt;;
-}
-</pre>
-</div>
-
-<p>
-So when the <tt>defm</tt> directive is used for the <tt>XOR</tt>
-and <tt>ADD</tt> instructions, as seen below, it creates four instruction
-objects: <tt>XORrr</tt>, <tt>XORri</tt>, <tt>ADDrr</tt>, and <tt>ADDri</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-defm XOR   : F3_12&lt;"xor", 0b000011, xor&gt;;
-defm ADD   : F3_12&lt;"add", 0b000000, add&gt;;
-</pre>
-</div>
-
-<p>
-<tt>SparcInstrInfo.td</tt> also includes definitions for condition codes that
-are referenced by branch instructions. The following definitions
-in <tt>SparcInstrInfo.td</tt> indicate the bit location of the SPARC condition
-code. For example, the 10<sup>th</sup> bit represents the 'greater than'
-condition for integers, and the 22<sup>nd</sup> bit represents the 'greater
-than' condition for floats.
-</p>
-
-<div class="doc_code">
-<pre>
-def ICC_NE  : ICC_VAL&lt; 9&gt;;  // Not Equal
-def ICC_E   : ICC_VAL&lt; 1&gt;;  // Equal
-def ICC_G   : ICC_VAL&lt;10&gt;;  // Greater
-...
-def FCC_U   : FCC_VAL&lt;23&gt;;  // Unordered
-def FCC_G   : FCC_VAL&lt;22&gt;;  // Greater
-def FCC_UG  : FCC_VAL&lt;21&gt;;  // Unordered or Greater
-...
-</pre>
-</div>
-
-<p>
-(Note that <tt>Sparc.h</tt> also defines enums that correspond to the same SPARC
-condition codes. Care must be taken to ensure the values in <tt>Sparc.h</tt>
-correspond to the values in <tt>SparcInstrInfo.td</tt>. I.e.,
-<tt>SPCC::ICC_NE = 9</tt>, <tt>SPCC::FCC_U = 23</tt> and so on.)
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="operandMapping">Instruction Operand Mapping</a>
-</h3>
-
-<div>
-
-<p>
-The code generator backend maps instruction operands to fields in the
-instruction.  Operands are assigned to unbound fields in the instruction in the
-order they are defined. Fields are bound when they are assigned a value.  For
-example, the Sparc target defines the <tt>XNORrr</tt> instruction as
-a <tt>F3_1</tt> format instruction having three operands.
-</p>
-
-<div class="doc_code">
-<pre>
-def XNORrr  : F3_1&lt;2, 0b000111,
-                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                   "xnor $b, $c, $dst",
-                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]&gt;;
-</pre>
-</div>
-
-<p>
-The instruction templates in <tt>SparcInstrFormats.td</tt> show the base class
-for <tt>F3_1</tt> is <tt>InstSP</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-class InstSP&lt;dag outs, dag ins, string asmstr, list&lt;dag&gt; pattern&gt; : Instruction {
-  field bits&lt;32&gt; Inst;
-  let Namespace = "SP";
-  bits&lt;2&gt; op;
-  let Inst{31-30} = op;       
-  dag OutOperandList = outs;
-  dag InOperandList = ins;
-  let AsmString   = asmstr;
-  let Pattern = pattern;
-}
-</pre>
-</div>
-
-<p><tt>InstSP</tt> leaves the <tt>op</tt> field unbound.</p>
-
-<div class="doc_code">
-<pre>
-class F3&lt;dag outs, dag ins, string asmstr, list&lt;dag&gt; pattern&gt;
-    : InstSP&lt;outs, ins, asmstr, pattern&gt; {
-  bits&lt;5&gt; rd;
-  bits&lt;6&gt; op3;
-  bits&lt;5&gt; rs1;
-  let op{1} = 1;   // Op = 2 or 3
-  let Inst{29-25} = rd;
-  let Inst{24-19} = op3;
-  let Inst{18-14} = rs1;
-}
-</pre>
-</div>
-
-<p>
-<tt>F3</tt> binds the <tt>op</tt> field and defines the <tt>rd</tt>,
-<tt>op3</tt>, and <tt>rs1</tt> fields.  <tt>F3</tt> format instructions will
-bind the operands <tt>rd</tt>, <tt>op3</tt>, and <tt>rs1</tt> fields.
-</p>
-
-<div class="doc_code">
-<pre>
-class F3_1&lt;bits&lt;2&gt; opVal, bits&lt;6&gt; op3val, dag outs, dag ins,
-           string asmstr, list&lt;dag&gt; pattern&gt; : F3&lt;outs, ins, asmstr, pattern&gt; {
-  bits&lt;8&gt; asi = 0; // asi not currently used
-  bits&lt;5&gt; rs2;
-  let op         = opVal;
-  let op3        = op3val;
-  let Inst{13}   = 0;     // i field = 0
-  let Inst{12-5} = asi;   // address space identifier
-  let Inst{4-0}  = rs2;
-}
-</pre>
-</div>
-
-<p>
-<tt>F3_1</tt> binds the <tt>op3</tt> field and defines the <tt>rs2</tt>
-fields.  <tt>F3_1</tt> format instructions will bind the operands to the <tt>rd</tt>,
-<tt>rs1</tt>, and <tt>rs2</tt> fields. This results in the <tt>XNORrr</tt>
-instruction binding <tt>$dst</tt>, <tt>$b</tt>, and <tt>$c</tt> operands to
-the <tt>rd</tt>, <tt>rs1</tt>, and <tt>rs2</tt> fields respectively.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="relationMapping">Instruction Relation Mapping</a>
-</h3>
-
-<div>
-
-<p>
-This TableGen feature is used to relate instructions with each other. It is
-particularly useful when you have multiple instruction formats and need to
-switch between them after instruction selection. This entire feature is driven
-by relation models which can be defined in <tt>XXXInstrInfo.td</tt> files
-according to the target-specific instruction set. Relation models are defined
-using <tt>InstrMapping</tt> class as a base. TableGen parses all the models
-and generates instruction relation maps using the specified information.
-Relation maps are emitted as tables in the <tt>XXXGenInstrInfo.inc</tt> file
-along with the functions to query them. For the detailed information on how to
-use this feature, please refer to
-<a href="HowToUseInstrMappings.html">How to add Instruction Mappings</a>
-document.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="implementInstr">Implement a subclass of </a>
-  <a href="CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a>
-</h3>
-
-<div>
-
-<p>
-The final step is to hand code portions of <tt>XXXInstrInfo</tt>, which
-implements the interface described in <tt>TargetInstrInfo.h</tt>. These
-functions return <tt>0</tt> or a Boolean or they assert, unless
-overridden. Here's a list of functions that are overridden for the SPARC
-implementation in <tt>SparcInstrInfo.cpp</tt>:
-</p>
-
-<ul>
-<li><tt>isLoadFromStackSlot</tt> &mdash; If the specified machine instruction is
-    a direct load from a stack slot, return the register number of the
-    destination and the <tt>FrameIndex</tt> of the stack slot.</li>
-
-<li><tt>isStoreToStackSlot</tt> &mdash; If the specified machine instruction is
-    a direct store to a stack slot, return the register number of the
-    destination and the <tt>FrameIndex</tt> of the stack slot.</li>
-
-<li><tt>copyPhysReg</tt> &mdash; Copy values between a pair of physical
-    registers.</li>
-
-<li><tt>storeRegToStackSlot</tt> &mdash; Store a register value to a stack
-    slot.</li>
-
-<li><tt>loadRegFromStackSlot</tt> &mdash; Load a register value from a stack
-    slot.</li>
-
-<li><tt>storeRegToAddr</tt> &mdash; Store a register value to memory.</li>
-
-<li><tt>loadRegFromAddr</tt> &mdash; Load a register value from memory.</li>
-
-<li><tt>foldMemoryOperand</tt> &mdash; Attempt to combine instructions of any
-    load or store instruction for the specified operand(s).</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="branchFolding">Branch Folding and If Conversion</a>
-</h3>
-<div>
-
-<p>
-Performance can be improved by combining instructions or by eliminating
-instructions that are never reached. The <tt>AnalyzeBranch</tt> method
-in <tt>XXXInstrInfo</tt> may be implemented to examine conditional instructions
-and remove unnecessary instructions. <tt>AnalyzeBranch</tt> looks at the end of
-a machine basic block (MBB) for opportunities for improvement, such as branch
-folding and if conversion. The <tt>BranchFolder</tt> and <tt>IfConverter</tt>
-machine function passes (see the source files <tt>BranchFolding.cpp</tt> and
-<tt>IfConversion.cpp</tt> in the <tt>lib/CodeGen</tt> directory) call
-<tt>AnalyzeBranch</tt> to improve the control flow graph that represents the
-instructions.
-</p>
-
-<p>
-Several implementations of <tt>AnalyzeBranch</tt> (for ARM, Alpha, and X86) can
-be examined as models for your own <tt>AnalyzeBranch</tt> implementation. Since
-SPARC does not implement a useful <tt>AnalyzeBranch</tt>, the ARM target
-implementation is shown below.
-</p>
-
-<p><tt>AnalyzeBranch</tt> returns a Boolean value and takes four parameters:</p>
-
-<ul>
-<li><tt>MachineBasicBlock &amp;MBB</tt> &mdash; The incoming block to be
-    examined.</li>
-
-<li><tt>MachineBasicBlock *&amp;TBB</tt> &mdash; A destination block that is
-    returned. For a conditional branch that evaluates to true, <tt>TBB</tt> is
-    the destination.</li>
-
-<li><tt>MachineBasicBlock *&amp;FBB</tt> &mdash; For a conditional branch that
-    evaluates to false, <tt>FBB</tt> is returned as the destination.</li>
-
-<li><tt>std::vector&lt;MachineOperand&gt; &amp;Cond</tt> &mdash; List of
-    operands to evaluate a condition for a conditional branch.</li>
-</ul>
-
-<p>
-In the simplest case, if a block ends without a branch, then it falls through to
-the successor block. No destination blocks are specified for either <tt>TBB</tt>
-or <tt>FBB</tt>, so both parameters return <tt>NULL</tt>. The start of
-the <tt>AnalyzeBranch</tt> (see code below for the ARM target) shows the
-function parameters and the code for the simplest case.
-</p>
-
-<div class="doc_code">
-<pre>bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &amp;MBB,
-        MachineBasicBlock *&amp;TBB, MachineBasicBlock *&amp;FBB,
-        std::vector&lt;MachineOperand&gt; &amp;Cond) const
-{
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
-    return false;
-</pre>
-</div>
-
-<p>
-If a block ends with a single unconditional branch instruction, then
-<tt>AnalyzeBranch</tt> (shown below) should return the destination of that
-branch in the <tt>TBB</tt> parameter.
-</p>
-
-<div class="doc_code">
-<pre>
-  if (LastOpc == ARM::B || LastOpc == ARM::tB) {
-    TBB = LastInst-&gt;getOperand(0).getMBB();
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-If a block ends with two unconditional branches, then the second branch is never
-reached. In that situation, as shown below, remove the last branch instruction
-and return the penultimate branch in the <tt>TBB</tt> parameter.
-</p>
-
-<div class="doc_code">
-<pre>
-  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &amp;&amp;
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
-    TBB = SecondLastInst-&gt;getOperand(0).getMBB();
-    I = LastInst;
-    I-&gt;eraseFromParent();
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-A block may end with a single conditional branch instruction that falls through
-to successor block if the condition evaluates to false. In that case,
-<tt>AnalyzeBranch</tt> (shown below) should return the destination of that
-conditional branch in the <tt>TBB</tt> parameter and a list of operands in
-the <tt>Cond</tt> parameter to evaluate the condition.
-</p>
-
-<div class="doc_code">
-<pre>
-  if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
-    // Block ends with fall-through condbranch.
-    TBB = LastInst-&gt;getOperand(0).getMBB();
-    Cond.push_back(LastInst-&gt;getOperand(1));
-    Cond.push_back(LastInst-&gt;getOperand(2));
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-If a block ends with both a conditional branch and an ensuing unconditional
-branch, then <tt>AnalyzeBranch</tt> (shown below) should return the conditional
-branch destination (assuming it corresponds to a conditional evaluation of
-'<tt>true</tt>') in the <tt>TBB</tt> parameter and the unconditional branch
-destination in the <tt>FBB</tt> (corresponding to a conditional evaluation of
-'<tt>false</tt>').  A list of operands to evaluate the condition should be
-returned in the <tt>Cond</tt> parameter.
-</p>
-
-<div class="doc_code">
-<pre>
-  unsigned SecondLastOpc = SecondLastInst-&gt;getOpcode();
-
-  if ((SecondLastOpc == ARM::Bcc &amp;&amp; LastOpc == ARM::B) ||
-      (SecondLastOpc == ARM::tBcc &amp;&amp; LastOpc == ARM::tB)) {
-    TBB =  SecondLastInst-&gt;getOperand(0).getMBB();
-    Cond.push_back(SecondLastInst-&gt;getOperand(1));
-    Cond.push_back(SecondLastInst-&gt;getOperand(2));
-    FBB = LastInst-&gt;getOperand(0).getMBB();
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-For the last two cases (ending with a single conditional branch or ending with
-one conditional and one unconditional branch), the operands returned in
-the <tt>Cond</tt> parameter can be passed to methods of other instructions to
-create new branches or perform other operations. An implementation
-of <tt>AnalyzeBranch</tt> requires the helper methods <tt>RemoveBranch</tt>
-and <tt>InsertBranch</tt> to manage subsequent operations.
-</p>
-
-<p>
-<tt>AnalyzeBranch</tt> should return false indicating success in most circumstances.
-<tt>AnalyzeBranch</tt> should only return true when the method is stumped about what to
-do, for example, if a block has three terminating branches. <tt>AnalyzeBranch</tt> may
-return true if it encounters a terminator it cannot handle, such as an indirect
-branch.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="InstructionSelector">Instruction Selector</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-LLVM uses a <tt>SelectionDAG</tt> to represent LLVM IR instructions, and nodes
-of the <tt>SelectionDAG</tt> ideally represent native target
-instructions. During code generation, instruction selection passes are performed
-to convert non-native DAG instructions into native target-specific
-instructions. The pass described in <tt>XXXISelDAGToDAG.cpp</tt> is used to
-match patterns and perform DAG-to-DAG instruction selection. Optionally, a pass
-may be defined (in <tt>XXXBranchSelector.cpp</tt>) to perform similar DAG-to-DAG
-operations for branch instructions. Later, the code in
-<tt>XXXISelLowering.cpp</tt> replaces or removes operations and data types not
-supported natively (legalizes) in a <tt>SelectionDAG</tt>.
-</p>
-
-<p>
-TableGen generates code for instruction selection using the following target
-description input files:
-</p>
-
-<ul>
-<li><tt>XXXInstrInfo.td</tt> &mdash; Contains definitions of instructions in a
-    target-specific instruction set, generates <tt>XXXGenDAGISel.inc</tt>, which
-    is included in <tt>XXXISelDAGToDAG.cpp</tt>.</li>
-
-<li><tt>XXXCallingConv.td</tt> &mdash; Contains the calling and return value
-    conventions for the target architecture, and it generates
-    <tt>XXXGenCallingConv.inc</tt>, which is included in
-    <tt>XXXISelLowering.cpp</tt>.</li>
-</ul>
-
-<p>
-The implementation of an instruction selection pass must include a header that
-declares the <tt>FunctionPass</tt> class or a subclass of <tt>FunctionPass</tt>. In
-<tt>XXXTargetMachine.cpp</tt>, a Pass Manager (PM) should add each instruction
-selection pass into the queue of passes to run.
-</p>
-
-<p>
-The LLVM static compiler (<tt>llc</tt>) is an excellent tool for visualizing the
-contents of DAGs. To display the <tt>SelectionDAG</tt> before or after specific
-processing phases, use the command line options for <tt>llc</tt>, described
-at <a href="CodeGenerator.html#selectiondag_process">
-SelectionDAG Instruction Selection Process</a>.
-</p>
-
-<p>
-To describe instruction selector behavior, you should add patterns for lowering
-LLVM code into a <tt>SelectionDAG</tt> as the last parameter of the instruction
-definitions in <tt>XXXInstrInfo.td</tt>. For example, in
-<tt>SparcInstrInfo.td</tt>, this entry defines a register store operation, and
-the last parameter describes a pattern with the store DAG operator.
-</p>
-
-<div class="doc_code">
-<pre>
-def STrr  : F3_1&lt; 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
-                 "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>ADDRrr</tt> is a memory mode that is also defined in
-<tt>SparcInstrInfo.td</tt>:
-</p>
-
-<div class="doc_code">
-<pre>
-def ADDRrr : ComplexPattern&lt;i32, 2, "SelectADDRrr", [], []&gt;;
-</pre>
-</div>
-
-<p>
-The definition of <tt>ADDRrr</tt> refers to <tt>SelectADDRrr</tt>, which is a
-function defined in an implementation of the Instructor Selector (such
-as <tt>SparcISelDAGToDAG.cpp</tt>).
-</p>
-
-<p>
-In <tt>lib/Target/TargetSelectionDAG.td</tt>, the DAG operator for store is
-defined below:
-</p>
-
-<div class="doc_code">
-<pre>
-def store : PatFrag&lt;(ops node:$val, node:$ptr),
-                    (st node:$val, node:$ptr), [{
-  if (StoreSDNode *ST = dyn_cast&lt;StoreSDNode&gt;(N))
-    return !ST-&gt;isTruncatingStore() &amp;&amp; 
-           ST-&gt;getAddressingMode() == ISD::UNINDEXED;
-  return false;
-}]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>XXXInstrInfo.td</tt> also generates (in <tt>XXXGenDAGISel.inc</tt>) the
-<tt>SelectCode</tt> method that is used to call the appropriate processing
-method for an instruction. In this example, <tt>SelectCode</tt>
-calls <tt>Select_ISD_STORE</tt> for the <tt>ISD::STORE</tt> opcode.
-</p>
-
-<div class="doc_code">
-<pre>
-SDNode *SelectCode(SDValue N) {
-  ... 
-  MVT::ValueType NVT = N.getNode()-&gt;getValueType(0);
-  switch (N.getOpcode()) {
-  case ISD::STORE: {
-    switch (NVT) {
-    default:
-      return Select_ISD_STORE(N);
-      break;
-    }
-    break;
-  }
-  ...
-</pre>
-</div>
-
-<p>
-The pattern for <tt>STrr</tt> is matched, so elsewhere in
-<tt>XXXGenDAGISel.inc</tt>, code for <tt>STrr</tt> is created for
-<tt>Select_ISD_STORE</tt>. The <tt>Emit_22</tt> method is also generated
-in <tt>XXXGenDAGISel.inc</tt> to complete the processing of this
-instruction.
-</p>
-
-<div class="doc_code">
-<pre>
-SDNode *Select_ISD_STORE(const SDValue &amp;N) {
-  SDValue Chain = N.getOperand(0);
-  if (Predicate_store(N.getNode())) {
-    SDValue N1 = N.getOperand(1);
-    SDValue N2 = N.getOperand(2);
-    SDValue CPTmp0;
-    SDValue CPTmp1;
-
-    // Pattern: (st:void IntRegs:i32:$src, 
-    //           ADDRrr:i32:$addr)&lt;&lt;P:Predicate_store&gt;&gt;
-    // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
-    // Pattern complexity = 13  cost = 1  size = 0
-    if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &amp;&amp;
-        N1.getNode()-&gt;getValueType(0) == MVT::i32 &amp;&amp;
-        N2.getNode()-&gt;getValueType(0) == MVT::i32) {
-      return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
-    }
-...
-</pre>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="LegalizePhase">The SelectionDAG Legalize Phase</a>
-</h3>
-
-<div>
-
-<p>
-The Legalize phase converts a DAG to use types and operations that are natively
-supported by the target. For natively unsupported types and operations, you need
-to add code to the target-specific XXXTargetLowering implementation to convert
-unsupported types and operations to supported ones.
-</p>
-
-<p>
-In the constructor for the <tt>XXXTargetLowering</tt> class, first use the
-<tt>addRegisterClass</tt> method to specify which types are supports and which
-register classes are associated with them. The code for the register classes are
-generated by TableGen from <tt>XXXRegisterInfo.td</tt> and placed
-in <tt>XXXGenRegisterInfo.h.inc</tt>. For example, the implementation of the
-constructor for the SparcTargetLowering class (in
-<tt>SparcISelLowering.cpp</tt>) starts with the following code:
-</p>
-
-<div class="doc_code">
-<pre>
-addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
-addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
-addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); 
-</pre>
-</div>
-
-<p>
-You should examine the node types in the <tt>ISD</tt> namespace
-(<tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>) and determine which
-operations the target natively supports. For operations that do <b>not</b> have
-native support, add a callback to the constructor for the XXXTargetLowering
-class, so the instruction selection process knows what to do. The TargetLowering
-class callback methods (declared in <tt>llvm/Target/TargetLowering.h</tt>) are:
-</p>
-
-<ul>
-<li><tt>setOperationAction</tt> &mdash; General operation.</li>
-
-<li><tt>setLoadExtAction</tt> &mdash; Load with extension.</li>
-
-<li><tt>setTruncStoreAction</tt> &mdash; Truncating store.</li>
-
-<li><tt>setIndexedLoadAction</tt> &mdash; Indexed load.</li>
-
-<li><tt>setIndexedStoreAction</tt> &mdash; Indexed store.</li>
-
-<li><tt>setConvertAction</tt> &mdash; Type conversion.</li>
-
-<li><tt>setCondCodeAction</tt> &mdash; Support for a given condition code.</li>
-</ul>
-
-<p>
-Note: on older releases, <tt>setLoadXAction</tt> is used instead
-of <tt>setLoadExtAction</tt>.  Also, on older releases,
-<tt>setCondCodeAction</tt> may not be supported. Examine your release
-to see what methods are specifically supported.
-</p>
-
-<p>
-These callbacks are used to determine that an operation does or does not work
-with a specified type (or types). And in all cases, the third parameter is
-a <tt>LegalAction</tt> type enum value: <tt>Promote</tt>, <tt>Expand</tt>,
-<tt>Custom</tt>, or <tt>Legal</tt>. <tt>SparcISelLowering.cpp</tt>
-contains examples of all four <tt>LegalAction</tt> values.
-</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="promote">Promote</a>
-</h4>
-
-<div>
-
-<p>
-For an operation without native support for a given type, the specified type may
-be promoted to a larger type that is supported. For example, SPARC does not
-support a sign-extending load for Boolean values (<tt>i1</tt> type), so
-in <tt>SparcISelLowering.cpp</tt> the third parameter below, <tt>Promote</tt>,
-changes <tt>i1</tt> type values to a large type before loading.
-</p>
-
-<div class="doc_code">
-<pre>
-setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="expand">Expand</a>
-</h4>
-
-<div>
-
-<p>
-For a type without native support, a value may need to be broken down further,
-rather than promoted. For an operation without native support, a combination of
-other operations may be used to similar effect. In SPARC, the floating-point
-sine and cosine trig operations are supported by expansion to other operations,
-as indicated by the third parameter, <tt>Expand</tt>, to
-<tt>setOperationAction</tt>:
-</p>
-
-<div class="doc_code">
-<pre>
-setOperationAction(ISD::FSIN, MVT::f32, Expand);
-setOperationAction(ISD::FCOS, MVT::f32, Expand);
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="custom">Custom</a>
-</h4>
-
-<div>
-
-<p>
-For some operations, simple type promotion or operation expansion may be
-insufficient. In some cases, a special intrinsic function must be implemented.
-</p>
-
-<p>
-For example, a constant value may require special treatment, or an operation may
-require spilling and restoring registers in the stack and working with register
-allocators.
-</p>
-
-<p>
-As seen in <tt>SparcISelLowering.cpp</tt> code below, to perform a type
-conversion from a floating point value to a signed integer, first the
-<tt>setOperationAction</tt> should be called with <tt>Custom</tt> as the third
-parameter:
-</p>
-
-<div class="doc_code">
-<pre>
-setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-</pre>
-</div>    
-
-<p>
-In the <tt>LowerOperation</tt> method, for each <tt>Custom</tt> operation, a
-case statement should be added to indicate what function to call. In the
-following code, an <tt>FP_TO_SINT</tt> opcode will call
-the <tt>LowerFP_TO_SINT</tt> method:
-</p>
-
-<div class="doc_code">
-<pre>
-SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &amp;DAG) {
-  switch (Op.getOpcode()) {
-  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
-  ...
-  }
-}
-</pre>
-</div>
-
-<p>
-Finally, the <tt>LowerFP_TO_SINT</tt> method is implemented, using an FP
-register to convert the floating-point value to an integer.
-</p>
-
-<div class="doc_code">
-<pre>
-static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &amp;DAG) {
-  assert(Op.getValueType() == MVT::i32);
-  Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
-}
-</pre>
-</div>    
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="legal">Legal</a>
-</h4>
-
-<div>
-
-<p>
-The <tt>Legal</tt> LegalizeAction enum value simply indicates that an
-operation <b>is</b> natively supported. <tt>Legal</tt> represents the default
-condition, so it is rarely used. In <tt>SparcISelLowering.cpp</tt>, the action
-for <tt>CTPOP</tt> (an operation to count the bits set in an integer) is
-natively supported only for SPARC v9. The following code enables
-the <tt>Expand</tt> conversion technique for non-v9 SPARC implementations.
-</p>
-
-<div class="doc_code">
-<pre>
-setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-...
-if (TM.getSubtarget&lt;SparcSubtarget&gt;().isV9())
-  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
-  case ISD::SETULT: return SPCC::ICC_CS;
-  case ISD::SETULE: return SPCC::ICC_LEU;
-  case ISD::SETUGT: return SPCC::ICC_GU;
-  case ISD::SETUGE: return SPCC::ICC_CC;
-  }
-}
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="callingConventions">Calling Conventions</a>
-</h3>
-
-<div>
-
-<p>
-To support target-specific calling conventions, <tt>XXXGenCallingConv.td</tt>
-uses interfaces (such as CCIfType and CCAssignToReg) that are defined in
-<tt>lib/Target/TargetCallingConv.td</tt>. TableGen can take the target
-descriptor file <tt>XXXGenCallingConv.td</tt> and generate the header
-file <tt>XXXGenCallingConv.inc</tt>, which is typically included
-in <tt>XXXISelLowering.cpp</tt>. You can use the interfaces in
-<tt>TargetCallingConv.td</tt> to specify:
-</p>
-
-<ul>
-<li>The order of parameter allocation.</li>
-
-<li>Where parameters and return values are placed (that is, on the stack or in
-    registers).</li>
-
-<li>Which registers may be used.</li>
-
-<li>Whether the caller or callee unwinds the stack.</li>
-</ul>
-
-<p>
-The following example demonstrates the use of the <tt>CCIfType</tt> and
-<tt>CCAssignToReg</tt> interfaces. If the <tt>CCIfType</tt> predicate is true
-(that is, if the current argument is of type <tt>f32</tt> or <tt>f64</tt>), then
-the action is performed. In this case, the <tt>CCAssignToReg</tt> action assigns
-the argument value to the first available register: either <tt>R0</tt>
-or <tt>R1</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-CCIfType&lt;[f32,f64], CCAssignToReg&lt;[R0, R1]&gt;&gt;
-</pre>
-</div>
-
-<p>
-<tt>SparcCallingConv.td</tt> contains definitions for a target-specific
-return-value calling convention (RetCC_Sparc32) and a basic 32-bit C calling
-convention (<tt>CC_Sparc32</tt>). The definition of <tt>RetCC_Sparc32</tt>
-(shown below) indicates which registers are used for specified scalar return
-types. A single-precision float is returned to register <tt>F0</tt>, and a
-double-precision float goes to register <tt>D0</tt>. A 32-bit integer is
-returned in register <tt>I0</tt> or <tt>I1</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-def RetCC_Sparc32 : CallingConv&lt;[
-  CCIfType&lt;[i32], CCAssignToReg&lt;[I0, I1]&gt;&gt;,
-  CCIfType&lt;[f32], CCAssignToReg&lt;[F0]&gt;&gt;,
-  CCIfType&lt;[f64], CCAssignToReg&lt;[D0]&gt;&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>
-The definition of <tt>CC_Sparc32</tt> in <tt>SparcCallingConv.td</tt> introduces
-<tt>CCAssignToStack</tt>, which assigns the value to a stack slot with the
-specified size and alignment. In the example below, the first parameter, 4,
-indicates the size of the slot, and the second parameter, also 4, indicates the
-stack alignment along 4-byte units. (Special cases: if size is zero, then the
-ABI size is used; if alignment is zero, then the ABI alignment is used.)
-</p>
-
-<div class="doc_code">
-<pre>
-def CC_Sparc32 : CallingConv&lt;[
-  // All arguments get passed in integer registers if there is space.
-  CCIfType&lt;[i32, f32, f64], CCAssignToReg&lt;[I0, I1, I2, I3, I4, I5]&gt;&gt;,
-  CCAssignToStack&lt;4, 4&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>CCDelegateTo</tt> is another commonly used interface, which tries to find a
-specified sub-calling convention, and, if a match is found, it is invoked. In
-the following example (in <tt>X86CallingConv.td</tt>), the definition of
-<tt>RetCC_X86_32_C</tt> ends with <tt>CCDelegateTo</tt>. After the current value
-is assigned to the register <tt>ST0</tt> or <tt>ST1</tt>,
-the <tt>RetCC_X86Common</tt> is invoked.
-</p>
-
-<div class="doc_code">
-<pre>
-def RetCC_X86_32_C : CallingConv&lt;[
-  CCIfType&lt;[f32], CCAssignToReg&lt;[ST0, ST1]&gt;&gt;,
-  CCIfType&lt;[f64], CCAssignToReg&lt;[ST0, ST1]&gt;&gt;,
-  CCDelegateTo&lt;RetCC_X86Common&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>CCIfCC</tt> is an interface that attempts to match the given name to the
-current calling convention. If the name identifies the current calling
-convention, then a specified action is invoked. In the following example (in
-<tt>X86CallingConv.td</tt>), if the <tt>Fast</tt> calling convention is in use,
-then <tt>RetCC_X86_32_Fast</tt> is invoked. If the <tt>SSECall</tt> calling
-convention is in use, then <tt>RetCC_X86_32_SSE</tt> is invoked.
-</p>
-
-<div class="doc_code">
-<pre>
-def RetCC_X86_32 : CallingConv&lt;[
-  CCIfCC&lt;"CallingConv::Fast", CCDelegateTo&lt;RetCC_X86_32_Fast&gt;&gt;,
-  CCIfCC&lt;"CallingConv::X86_SSECall", CCDelegateTo&lt;RetCC_X86_32_SSE&gt;&gt;,
-  CCDelegateTo&lt;RetCC_X86_32_C&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>Other calling convention interfaces include:</p>
-
-<ul>
-<li><tt>CCIf &lt;predicate, action&gt;</tt> &mdash; If the predicate matches,
-    apply the action.</li>
-
-<li><tt>CCIfInReg &lt;action&gt;</tt> &mdash; If the argument is marked with the
-    '<tt>inreg</tt>' attribute, then apply the action.</li>
-
-<li><tt>CCIfNest &lt;action&gt;</tt> &mdash; Inf the argument is marked with the
-    '<tt>nest</tt>' attribute, then apply the action.</li>
-
-<li><tt>CCIfNotVarArg &lt;action&gt;</tt> &mdash; If the current function does
-    not take a variable number of arguments, apply the action.</li>
-
-<li><tt>CCAssignToRegWithShadow &lt;registerList, shadowList&gt;</tt> &mdash;
-    similar to <tt>CCAssignToReg</tt>, but with a shadow list of registers.</li>
-
-<li><tt>CCPassByVal &lt;size, align&gt;</tt> &mdash; Assign value to a stack
-    slot with the minimum specified size and alignment.</li>
-
-<li><tt>CCPromoteToType &lt;type&gt;</tt> &mdash; Promote the current value to
-    the specified type.</li>
-
-<li><tt>CallingConv &lt;[actions]&gt;</tt> &mdash; Define each calling
-    convention that is supported.</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="assemblyPrinter">Assembly Printer</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-During the code emission stage, the code generator may utilize an LLVM pass to
-produce assembly output. To do this, you want to implement the code for a
-printer that converts LLVM IR to a GAS-format assembly language for your target
-machine, using the following steps:
-</p>
-
-<ul>
-<li>Define all the assembly strings for your target, adding them to the
-    instructions defined in the <tt>XXXInstrInfo.td</tt> file.
-    (See <a href="#InstructionSet">Instruction Set</a>.)  TableGen will produce
-    an output file (<tt>XXXGenAsmWriter.inc</tt>) with an implementation of
-    the <tt>printInstruction</tt> method for the XXXAsmPrinter class.</li>
-
-<li>Write <tt>XXXTargetAsmInfo.h</tt>, which contains the bare-bones declaration
-    of the <tt>XXXTargetAsmInfo</tt> class (a subclass
-    of <tt>TargetAsmInfo</tt>).</li>
-
-<li>Write <tt>XXXTargetAsmInfo.cpp</tt>, which contains target-specific values
-    for <tt>TargetAsmInfo</tt> properties and sometimes new implementations for
-    methods.</li>
-
-<li>Write <tt>XXXAsmPrinter.cpp</tt>, which implements the <tt>AsmPrinter</tt>
-    class that performs the LLVM-to-assembly conversion.</li>
-</ul>
-
-<p>
-The code in <tt>XXXTargetAsmInfo.h</tt> is usually a trivial declaration of the
-<tt>XXXTargetAsmInfo</tt> class for use in <tt>XXXTargetAsmInfo.cpp</tt>.
-Similarly, <tt>XXXTargetAsmInfo.cpp</tt> usually has a few declarations of
-<tt>XXXTargetAsmInfo</tt> replacement values that override the default values
-in <tt>TargetAsmInfo.cpp</tt>. For example in <tt>SparcTargetAsmInfo.cpp</tt>:
-</p>
-
-<div class="doc_code">
-<pre>
-SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &amp;TM) {
-  Data16bitsDirective = "\t.half\t";
-  Data32bitsDirective = "\t.word\t";
-  Data64bitsDirective = 0;  // .xword is only supported by V9.
-  ZeroDirective = "\t.skip\t";
-  CommentString = "!";
-  ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
-}
-</pre>
-</div>
-
-<p>
-The X86 assembly printer implementation (<tt>X86TargetAsmInfo</tt>) is an
-example where the target specific <tt>TargetAsmInfo</tt> class uses an 
-overridden methods: <tt>ExpandInlineAsm</tt>.
-</p>
-
-<p>
-A target-specific implementation of AsmPrinter is written in
-<tt>XXXAsmPrinter.cpp</tt>, which implements the <tt>AsmPrinter</tt> class that
-converts the LLVM to printable assembly. The implementation must include the
-following headers that have declarations for the <tt>AsmPrinter</tt> and
-<tt>MachineFunctionPass</tt> classes. The <tt>MachineFunctionPass</tt> is a
-subclass of <tt>FunctionPass</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h" 
-</pre>
-</div>
-
-<p>
-As a <tt>FunctionPass</tt>, <tt>AsmPrinter</tt> first
-calls <tt>doInitialization</tt> to set up the <tt>AsmPrinter</tt>. In
-<tt>SparcAsmPrinter</tt>, a <tt>Mangler</tt> object is instantiated to process
-variable names.
-</p>
-
-<p>
-In <tt>XXXAsmPrinter.cpp</tt>, the <tt>runOnMachineFunction</tt> method
-(declared in <tt>MachineFunctionPass</tt>) must be implemented
-for <tt>XXXAsmPrinter</tt>. In <tt>MachineFunctionPass</tt>,
-the <tt>runOnFunction</tt> method invokes <tt>runOnMachineFunction</tt>.
-Target-specific implementations of <tt>runOnMachineFunction</tt> differ, but
-generally do the following to process each machine function:
-</p>
-
-<ul>
-<li>Call <tt>SetupMachineFunction</tt> to perform initialization.</li>
-
-<li>Call <tt>EmitConstantPool</tt> to print out (to the output stream) constants
-    which have been spilled to memory.</li>
-
-<li>Call <tt>EmitJumpTableInfo</tt> to print out jump tables used by the current
-    function.</li>
-
-<li>Print out the label for the current function.</li>
-
-<li>Print out the code for the function, including basic block labels and the
-    assembly for the instruction (using <tt>printInstruction</tt>)</li>
-</ul>
-
-<p>
-The <tt>XXXAsmPrinter</tt> implementation must also include the code generated
-by TableGen that is output in the <tt>XXXGenAsmWriter.inc</tt> file. The code
-in <tt>XXXGenAsmWriter.inc</tt> contains an implementation of the
-<tt>printInstruction</tt> method that may call these methods:
-</p>
-
-<ul>
-<li><tt>printOperand</tt></li>
-
-<li><tt>printMemOperand</tt></li>
-
-<li><tt>printCCOperand (for conditional statements)</tt></li>
-
-<li><tt>printDataDirective</tt></li>
-
-<li><tt>printDeclare</tt></li>
-
-<li><tt>printImplicitDef</tt></li>
-
-<li><tt>printInlineAsm</tt></li>
-</ul>
-
-<p>
-The implementations of <tt>printDeclare</tt>, <tt>printImplicitDef</tt>,
-<tt>printInlineAsm</tt>, and <tt>printLabel</tt> in <tt>AsmPrinter.cpp</tt> are
-generally adequate for printing assembly and do not need to be
-overridden.
-</p>
-
-<p>
-The <tt>printOperand</tt> method is implemented with a long switch/case
-statement for the type of operand: register, immediate, basic block, external
-symbol, global address, constant pool index, or jump table index. For an
-instruction with a memory address operand, the <tt>printMemOperand</tt> method
-should be implemented to generate the proper output. Similarly,
-<tt>printCCOperand</tt> should be used to print a conditional operand.
-</p>
-
-<p><tt>doFinalization</tt> should be overridden in <tt>XXXAsmPrinter</tt>, and
-it should be called to shut down the assembly printer. During
-<tt>doFinalization</tt>, global variables and constants are printed to
-output.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="subtargetSupport">Subtarget Support</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Subtarget support is used to inform the code generation process of instruction
-set variations for a given chip set.  For example, the LLVM SPARC implementation
-provided covers three major versions of the SPARC microprocessor architecture:
-Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a 64-bit
-architecture), and the UltraSPARC architecture. V8 has 16 double-precision
-floating-point registers that are also usable as either 32 single-precision or 8
-quad-precision registers.  V8 is also purely big-endian. V9 has 32
-double-precision floating-point registers that are also usable as 16
-quad-precision registers, but cannot be used as single-precision registers. The
-UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set
-extensions.
-</p>
-
-<p>
-If subtarget support is needed, you should implement a target-specific
-XXXSubtarget class for your architecture. This class should process the
-command-line options <tt>-mcpu=</tt> and <tt>-mattr=</tt>.
-</p>
-
-<p>
-TableGen uses definitions in the <tt>Target.td</tt> and <tt>Sparc.td</tt> files
-to generate code in <tt>SparcGenSubtarget.inc</tt>. In <tt>Target.td</tt>, shown
-below, the <tt>SubtargetFeature</tt> interface is defined. The first 4 string
-parameters of the <tt>SubtargetFeature</tt> interface are a feature name, an
-attribute set by the feature, the value of the attribute, and a description of
-the feature. (The fifth parameter is a list of features whose presence is
-implied, and its default value is an empty array.)
-</p>
-
-<div class="doc_code">
-<pre>
-class SubtargetFeature&lt;string n, string a,  string v, string d,
-                       list&lt;SubtargetFeature&gt; i = []&gt; {
-  string Name = n;
-  string Attribute = a;
-  string Value = v;
-  string Desc = d;
-  list&lt;SubtargetFeature&gt; Implies = i;
-}
-</pre>
-</div>
-
-<p>
-In the <tt>Sparc.td</tt> file, the SubtargetFeature is used to define the
-following features.
-</p>
-
-<div class="doc_code">
-<pre>
-def FeatureV9 : SubtargetFeature&lt;"v9", "IsV9", "true",
-                     "Enable SPARC-V9 instructions"&gt;;
-def FeatureV8Deprecated : SubtargetFeature&lt;"deprecated-v8", 
-                     "V8DeprecatedInsts", "true",
-                     "Enable deprecated V8 instructions in V9 mode"&gt;;
-def FeatureVIS : SubtargetFeature&lt;"vis", "IsVIS", "true",
-                     "Enable UltraSPARC Visual Instruction Set extensions"&gt;;
-</pre>
-</div>
-
-<p>
-Elsewhere in <tt>Sparc.td</tt>, the Proc class is defined and then is used to
-define particular SPARC processor subtypes that may have the previously
-described features.
-</p>
-
-<div class="doc_code">
-<pre>
-class Proc&lt;string Name, list&lt;SubtargetFeature&gt; Features&gt;
-  : Processor&lt;Name, NoItineraries, Features&gt;;
-&nbsp;
-def : Proc&lt;"generic",         []&gt;;
-def : Proc&lt;"v8",              []&gt;;
-def : Proc&lt;"supersparc",      []&gt;;
-def : Proc&lt;"sparclite",       []&gt;;
-def : Proc&lt;"f934",            []&gt;;
-def : Proc&lt;"hypersparc",      []&gt;;
-def : Proc&lt;"sparclite86x",    []&gt;;
-def : Proc&lt;"sparclet",        []&gt;;
-def : Proc&lt;"tsc701",          []&gt;;
-def : Proc&lt;"v9",              [FeatureV9]&gt;;
-def : Proc&lt;"ultrasparc",      [FeatureV9, FeatureV8Deprecated]&gt;;
-def : Proc&lt;"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]&gt;;
-def : Proc&lt;"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]&gt;;
-</pre>
-</div>
-
-<p>
-From <tt>Target.td</tt> and <tt>Sparc.td</tt> files, the resulting
-SparcGenSubtarget.inc specifies enum values to identify the features, arrays of
-constants to represent the CPU features and CPU subtypes, and the
-ParseSubtargetFeatures method that parses the features string that sets
-specified subtarget options. The generated <tt>SparcGenSubtarget.inc</tt> file
-should be included in the <tt>SparcSubtarget.cpp</tt>. The target-specific
-implementation of the XXXSubtarget method should follow this pseudocode:
-</p>
-
-<div class="doc_code">
-<pre>
-XXXSubtarget::XXXSubtarget(const Module &amp;M, const std::string &amp;FS) {
-  // Set the default features
-  // Determine default and user specified characteristics of the CPU
-  // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
-  // Perform any additional operations
-}
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="jitSupport">JIT Support</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The implementation of a target machine optionally includes a Just-In-Time (JIT)
-code generator that emits machine code and auxiliary structures as binary output
-that can be written directly to memory.  To do this, implement JIT code
-generation by performing the following steps:
-</p>
-
-<ul>
-<li>Write an <tt>XXXCodeEmitter.cpp</tt> file that contains a machine function
-    pass that transforms target-machine instructions into relocatable machine
-    code.</li>
-
-<li>Write an <tt>XXXJITInfo.cpp</tt> file that implements the JIT interfaces for
-    target-specific code-generation activities, such as emitting machine code
-    and stubs.</li>
-
-<li>Modify <tt>XXXTargetMachine</tt> so that it provides a
-    <tt>TargetJITInfo</tt> object through its <tt>getJITInfo</tt> method.</li>
-</ul>
-
-<p>
-There are several different approaches to writing the JIT support code. For
-instance, TableGen and target descriptor files may be used for creating a JIT
-code generator, but are not mandatory. For the Alpha and PowerPC target
-machines, TableGen is used to generate <tt>XXXGenCodeEmitter.inc</tt>, which
-contains the binary coding of machine instructions and the
-<tt>getBinaryCodeForInstr</tt> method to access those codes. Other JIT
-implementations do not.
-</p>
-
-<p>
-Both <tt>XXXJITInfo.cpp</tt> and <tt>XXXCodeEmitter.cpp</tt> must include the
-<tt>llvm/CodeGen/MachineCodeEmitter.h</tt> header file that defines the
-<tt>MachineCodeEmitter</tt> class containing code for several callback functions
-that write data (in bytes, words, strings, etc.) to the output stream.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mce">Machine Code Emitter</a>
-</h3>
-
-<div>
-
-<p>
-In <tt>XXXCodeEmitter.cpp</tt>, a target-specific of the <tt>Emitter</tt> class
-is implemented as a function pass (subclass
-of <tt>MachineFunctionPass</tt>). The target-specific implementation
-of <tt>runOnMachineFunction</tt> (invoked by
-<tt>runOnFunction</tt> in <tt>MachineFunctionPass</tt>) iterates through the
-<tt>MachineBasicBlock</tt> calls <tt>emitInstruction</tt> to process each
-instruction and emit binary code. <tt>emitInstruction</tt> is largely
-implemented with case statements on the instruction types defined in
-<tt>XXXInstrInfo.h</tt>. For example, in <tt>X86CodeEmitter.cpp</tt>,
-the <tt>emitInstruction</tt> method is built around the following switch/case
-statements:
-</p>
-
-<div class="doc_code">
-<pre>
-switch (Desc-&gt;TSFlags &amp; X86::FormMask) {
-case X86II::Pseudo:  // for not yet implemented instructions 
-   ...               // or pseudo-instructions
-   break;
-case X86II::RawFrm:  // for instructions with a fixed opcode value
-   ...
-   break;
-case X86II::AddRegFrm: // for instructions that have one register operand 
-   ...                 // added to their opcode
-   break;
-case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
-   ...                 // to specify a destination (register)
-   break;
-case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
-   ...                 // to specify a destination (memory)
-   break;
-case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
-   ...                 // to specify a source (register)
-   break;
-case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
-   ...                 // to specify a source (memory)
-   break;
-case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on 
-case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
-case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
-case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
-   ...  
-   break;
-case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
-case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
-case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
-case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
-   ...  
-   break;
-case X86II::MRMInitReg: // for instructions whose source and
-   ...                  // destination are the same register
-   break;
-}
-</pre>
-</div>
-
-<p>
-The implementations of these case statements often first emit the opcode and
-then get the operand(s). Then depending upon the operand, helper methods may be
-called to process the operand(s). For example, in <tt>X86CodeEmitter.cpp</tt>,
-for the <tt>X86II::AddRegFrm</tt> case, the first data emitted
-(by <tt>emitByte</tt>) is the opcode added to the register operand. Then an
-object representing the machine operand, <tt>MO1</tt>, is extracted. The helper
-methods such as <tt>isImmediate</tt>,
-<tt>isGlobalAddress</tt>, <tt>isExternalSymbol</tt>, <tt>isConstantPoolIndex</tt>, and 
-<tt>isJumpTableIndex</tt> determine the operand
-type. (<tt>X86CodeEmitter.cpp</tt> also has private methods such
-as <tt>emitConstant</tt>, <tt>emitGlobalAddress</tt>,
-<tt>emitExternalSymbolAddress</tt>, <tt>emitConstPoolAddress</tt>,
-and <tt>emitJumpTableAddress</tt> that emit the data into the output stream.)
-</p>
-
-<div class="doc_code">
-<pre>
-case X86II::AddRegFrm:
-  MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
-  
-  if (CurOp != NumOps) {
-    const MachineOperand &amp;MO1 = MI.getOperand(CurOp++);
-    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
-    if (MO1.isImmediate())
-      emitConstant(MO1.getImm(), Size);
-    else {
-      unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-        : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-      if (Opcode == X86::MOV64ri) 
-        rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
-      if (MO1.isGlobalAddress()) {
-        bool NeedStub = isa&lt;Function&gt;(MO1.getGlobal());
-        bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
-        emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                          NeedStub, isLazy);
-      } else if (MO1.isExternalSymbol())
-        emitExternalSymbolAddress(MO1.getSymbolName(), rt);
-      else if (MO1.isConstantPoolIndex())
-        emitConstPoolAddress(MO1.getIndex(), rt);
-      else if (MO1.isJumpTableIndex())
-        emitJumpTableAddress(MO1.getIndex(), rt);
-    }
-  }
-  break;
-</pre>
-</div>
-
-<p>
-In the previous example, <tt>XXXCodeEmitter.cpp</tt> uses the
-variable <tt>rt</tt>, which is a RelocationType enum that may be used to
-relocate addresses (for example, a global address with a PIC base offset). The
-<tt>RelocationType</tt> enum for that target is defined in the short
-target-specific <tt>XXXRelocations.h</tt> file. The <tt>RelocationType</tt> is used by
-the <tt>relocate</tt> method defined in <tt>XXXJITInfo.cpp</tt> to rewrite
-addresses for referenced global symbols.
-</p>
-
-<p>
-For example, <tt>X86Relocations.h</tt> specifies the following relocation types
-for the X86 addresses. In all four cases, the relocated value is added to the
-value already in memory. For <tt>reloc_pcrel_word</tt>
-and <tt>reloc_picrel_word</tt>, there is an additional initial adjustment.
-</p>
-
-<div class="doc_code">
-<pre>
-enum RelocationType {
-  reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
-  reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
-  reloc_absolute_word = 2, // absolute relocation; no additional adjustment 
-  reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
-};
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetJITInfo">Target JIT Info</a>
-</h3>
-
-<div>
-
-<p>
-<tt>XXXJITInfo.cpp</tt> implements the JIT interfaces for target-specific
-code-generation activities, such as emitting machine code and stubs. At minimum,
-a target-specific version of <tt>XXXJITInfo</tt> implements the following:
-</p>
-
-<ul>
-<li><tt>getLazyResolverFunction</tt> &mdash; Initializes the JIT, gives the
-    target a function that is used for compilation.</li>
-
-<li><tt>emitFunctionStub</tt> &mdash; Returns a native function with a specified
-    address for a callback function.</li>
-
-<li><tt>relocate</tt> &mdash; Changes the addresses of referenced globals, based
-    on relocation types.</li>
-
-<li>Callback function that are wrappers to a function stub that is used when the
-    real target is not initially known.</li>
-</ul>
-
-<p>
-<tt>getLazyResolverFunction</tt> is generally trivial to implement. It makes the
-incoming parameter as the global <tt>JITCompilerFunction</tt> and returns the
-callback function that will be used a function wrapper. For the Alpha target
-(in <tt>AlphaJITInfo.cpp</tt>), the <tt>getLazyResolverFunction</tt>
-implementation is simply:
-</p>
-
-<div class="doc_code">
-<pre>
-TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(  
-                                            JITCompilerFn F) {
-  JITCompilerFunction = F;
-  return AlphaCompilationCallback;
-}
-</pre>
-</div>
-
-<p>
-For the X86 target, the <tt>getLazyResolverFunction</tt> implementation is a
-little more complication, because it returns a different callback function for
-processors with SSE instructions and XMM registers.
-</p>
-
-<p>
-The callback function initially saves and later restores the callee register
-values, incoming arguments, and frame and return address. The callback function
-needs low-level access to the registers or stack, so it is typically implemented
-with assembler.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
-  <br>
-  Last modified: $Date: 2012-10-25 17:54:06 +0200 (Thu, 25 Oct 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst
new file mode 100644
index 000000000000..a03a5e42c22d
--- /dev/null
+++ b/docs/WritingAnLLVMBackend.rst
@@ -0,0 +1,1838 @@
+================================
+Writing an LLVM Compiler Backend
+================================
+
+.. toctree::
+   :hidden:
+
+   HowToUseInstrMappings
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+This document describes techniques for writing compiler backends that convert
+the LLVM Intermediate Representation (IR) to code for a specified machine or
+other languages.  Code intended for a specific machine can take the form of
+either assembly code or binary code (usable for a JIT compiler).
+
+The backend of LLVM features a target-independent code generator that may
+create output for several types of target CPUs --- including X86, PowerPC,
+ARM, and SPARC.  The backend may also be used to generate code targeted at SPUs
+of the Cell processor or GPUs to support the execution of compute kernels.
+
+The document focuses on existing examples found in subdirectories of
+``llvm/lib/Target`` in a downloaded LLVM release.  In particular, this document
+focuses on the example of creating a static compiler (one that emits text
+assembly) for a SPARC target, because SPARC has fairly standard
+characteristics, such as a RISC instruction set and straightforward calling
+conventions.
+
+Audience
+--------
+
+The audience for this document is anyone who needs to write an LLVM backend to
+generate code for a specific hardware or software target.
+
+Prerequisite Reading
+--------------------
+
+These essential documents must be read before reading this document:
+
+* `LLVM Language Reference Manual <LangRef.html>`_ --- a reference manual for
+  the LLVM assembly language.
+
+* :doc:`CodeGenerator` --- a guide to the components (classes and code
+  generation algorithms) for translating the LLVM internal representation into
+  machine code for a specified target.  Pay particular attention to the
+  descriptions of code generation stages: Instruction Selection, Scheduling and
+  Formation, SSA-based Optimization, Register Allocation, Prolog/Epilog Code
+  Insertion, Late Machine Code Optimizations, and Code Emission.
+
+* :doc:`TableGenFundamentals` --- a document that describes the TableGen
+  (``tblgen``) application that manages domain-specific information to support
+  LLVM code generation.  TableGen processes input from a target description
+  file (``.td`` suffix) and generates C++ code that can be used for code
+  generation.
+
+* :doc:`WritingAnLLVMPass` --- The assembly printer is a ``FunctionPass``, as
+  are several ``SelectionDAG`` processing steps.
+
+To follow the SPARC examples in this document, have a copy of `The SPARC
+Architecture Manual, Version 8 <http://www.sparc.org/standards/V8.pdf>`_ for
+reference.  For details about the ARM instruction set, refer to the `ARM
+Architecture Reference Manual <http://infocenter.arm.com/>`_.  For more about
+the GNU Assembler format (``GAS``), see `Using As
+<http://sourceware.org/binutils/docs/as/index.html>`_, especially for the
+assembly printer.  "Using As" contains a list of target machine dependent
+features.
+
+Basic Steps
+-----------
+
+To write a compiler backend for LLVM that converts the LLVM IR to code for a
+specified target (machine or other language), follow these steps:
+
+* Create a subclass of the ``TargetMachine`` class that describes
+  characteristics of your target machine.  Copy existing examples of specific
+  ``TargetMachine`` class and header files; for example, start with
+  ``SparcTargetMachine.cpp`` and ``SparcTargetMachine.h``, but change the file
+  names for your target.  Similarly, change code that references "``Sparc``" to
+  reference your target.
+
+* Describe the register set of the target.  Use TableGen to generate code for
+  register definition, register aliases, and register classes from a
+  target-specific ``RegisterInfo.td`` input file.  You should also write
+  additional code for a subclass of the ``TargetRegisterInfo`` class that
+  represents the class register file data used for register allocation and also
+  describes the interactions between registers.
+
+* Describe the instruction set of the target.  Use TableGen to generate code
+  for target-specific instructions from target-specific versions of
+  ``TargetInstrFormats.td`` and ``TargetInstrInfo.td``.  You should write
+  additional code for a subclass of the ``TargetInstrInfo`` class to represent
+  machine instructions supported by the target machine.
+
+* Describe the selection and conversion of the LLVM IR from a Directed Acyclic
+  Graph (DAG) representation of instructions to native target-specific
+  instructions.  Use TableGen to generate code that matches patterns and
+  selects instructions based on additional information in a target-specific
+  version of ``TargetInstrInfo.td``.  Write code for ``XXXISelDAGToDAG.cpp``,
+  where ``XXX`` identifies the specific target, to perform pattern matching and
+  DAG-to-DAG instruction selection.  Also write code in ``XXXISelLowering.cpp``
+  to replace or remove operations and data types that are not supported
+  natively in a SelectionDAG.
+
+* Write code for an assembly printer that converts LLVM IR to a GAS format for
+  your target machine.  You should add assembly strings to the instructions
+  defined in your target-specific version of ``TargetInstrInfo.td``.  You
+  should also write code for a subclass of ``AsmPrinter`` that performs the
+  LLVM-to-assembly conversion and a trivial subclass of ``TargetAsmInfo``.
+
+* Optionally, add support for subtargets (i.e., variants with different
+  capabilities).  You should also write code for a subclass of the
+  ``TargetSubtarget`` class, which allows you to use the ``-mcpu=`` and
+  ``-mattr=`` command-line options.
+
+* Optionally, add JIT support and create a machine code emitter (subclass of
+  ``TargetJITInfo``) that is used to emit binary code directly into memory.
+
+In the ``.cpp`` and ``.h``. files, initially stub up these methods and then
+implement them later.  Initially, you may not know which private members that
+the class will need and which components will need to be subclassed.
+
+Preliminaries
+-------------
+
+To actually create your compiler backend, you need to create and modify a few
+files.  The absolute minimum is discussed here.  But to actually use the LLVM
+target-independent code generator, you must perform the steps described in the
+:doc:`LLVM Target-Independent Code Generator <CodeGenerator>` document.
+
+First, you should create a subdirectory under ``lib/Target`` to hold all the
+files related to your target.  If your target is called "Dummy", create the
+directory ``lib/Target/Dummy``.
+
+In this new directory, create a ``Makefile``.  It is easiest to copy a
+``Makefile`` of another target and modify it.  It should at least contain the
+``LEVEL``, ``LIBRARYNAME`` and ``TARGET`` variables, and then include
+``$(LEVEL)/Makefile.common``.  The library can be named ``LLVMDummy`` (for
+example, see the MIPS target).  Alternatively, you can split the library into
+``LLVMDummyCodeGen`` and ``LLVMDummyAsmPrinter``, the latter of which should be
+implemented in a subdirectory below ``lib/Target/Dummy`` (for example, see the
+PowerPC target).
+
+Note that these two naming schemes are hardcoded into ``llvm-config``.  Using
+any other naming scheme will confuse ``llvm-config`` and produce a lot of
+(seemingly unrelated) linker errors when linking ``llc``.
+
+To make your target actually do something, you need to implement a subclass of
+``TargetMachine``.  This implementation should typically be in the file
+``lib/Target/DummyTargetMachine.cpp``, but any file in the ``lib/Target``
+directory will be built and should work.  To use LLVM's target independent code
+generator, you should do what all current machine backends do: create a
+subclass of ``LLVMTargetMachine``.  (To create a target from scratch, create a
+subclass of ``TargetMachine``.)
+
+To get LLVM to actually build and link your target, you need to add it to the
+``TARGETS_TO_BUILD`` variable.  To do this, you modify the configure script to
+know about your target when parsing the ``--enable-targets`` option.  Search
+the configure script for ``TARGETS_TO_BUILD``, add your target to the lists
+there (some creativity required), and then reconfigure.  Alternatively, you can
+change ``autotools/configure.ac`` and regenerate configure by running
+``./autoconf/AutoRegen.sh``.
+
+Target Machine
+==============
+
+``LLVMTargetMachine`` is designed as a base class for targets implemented with
+the LLVM target-independent code generator.  The ``LLVMTargetMachine`` class
+should be specialized by a concrete target class that implements the various
+virtual methods.  ``LLVMTargetMachine`` is defined as a subclass of
+``TargetMachine`` in ``include/llvm/Target/TargetMachine.h``.  The
+``TargetMachine`` class implementation (``TargetMachine.cpp``) also processes
+numerous command-line options.
+
+To create a concrete target-specific subclass of ``LLVMTargetMachine``, start
+by copying an existing ``TargetMachine`` class and header.  You should name the
+files that you create to reflect your specific target.  For instance, for the
+SPARC target, name the files ``SparcTargetMachine.h`` and
+``SparcTargetMachine.cpp``.
+
+For a target machine ``XXX``, the implementation of ``XXXTargetMachine`` must
+have access methods to obtain objects that represent target components.  These
+methods are named ``get*Info``, and are intended to obtain the instruction set
+(``getInstrInfo``), register set (``getRegisterInfo``), stack frame layout
+(``getFrameInfo``), and similar information.  ``XXXTargetMachine`` must also
+implement the ``getDataLayout`` method to access an object with target-specific
+data characteristics, such as data type size and alignment requirements.
+
+For instance, for the SPARC target, the header file ``SparcTargetMachine.h``
+declares prototypes for several ``get*Info`` and ``getDataLayout`` methods that
+simply return a class member.
+
+.. code-block:: c++
+
+  namespace llvm {
+
+  class Module;
+
+  class SparcTargetMachine : public LLVMTargetMachine {
+    const DataLayout DataLayout;       // Calculates type size & alignment
+    SparcSubtarget Subtarget;
+    SparcInstrInfo InstrInfo;
+    TargetFrameInfo FrameInfo;
+
+  protected:
+    virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+  public:
+    SparcTargetMachine(const Module &M, const std::string &FS);
+
+    virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; }
+    virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; }
+    virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; }
+    virtual const TargetRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo();
+    }
+    virtual const DataLayout *getDataLayout() const { return &DataLayout; }
+    static unsigned getModuleMatchQuality(const Module &M);
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM, bool Fast);
+    virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast);
+  };
+
+  } // end namespace llvm
+
+* ``getInstrInfo()``
+* ``getRegisterInfo()``
+* ``getFrameInfo()``
+* ``getDataLayout()``
+* ``getSubtargetImpl()``
+
+For some targets, you also need to support the following methods:
+
+* ``getTargetLowering()``
+* ``getJITInfo()``
+
+In addition, the ``XXXTargetMachine`` constructor should specify a
+``TargetDescription`` string that determines the data layout for the target
+machine, including characteristics such as pointer size, alignment, and
+endianness.  For example, the constructor for ``SparcTargetMachine`` contains
+the following:
+
+.. code-block:: c++
+
+  SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
+    : DataLayout("E-p:32:32-f128:128:128"),
+      Subtarget(M, FS), InstrInfo(Subtarget),
+      FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+  }
+
+Hyphens separate portions of the ``TargetDescription`` string.
+
+* An upper-case "``E``" in the string indicates a big-endian target data model.
+  A lower-case "``e``" indicates little-endian.
+
+* "``p:``" is followed by pointer information: size, ABI alignment, and
+  preferred alignment.  If only two figures follow "``p:``", then the first
+  value is pointer size, and the second value is both ABI and preferred
+  alignment.
+
+* Then a letter for numeric type alignment: "``i``", "``f``", "``v``", or
+  "``a``" (corresponding to integer, floating point, vector, or aggregate).
+  "``i``", "``v``", or "``a``" are followed by ABI alignment and preferred
+  alignment. "``f``" is followed by three values: the first indicates the size
+  of a long double, then ABI alignment, and then ABI preferred alignment.
+
+Target Registration
+===================
+
+You must also register your target with the ``TargetRegistry``, which is what
+other LLVM tools use to be able to lookup and use your target at runtime.  The
+``TargetRegistry`` can be used directly, but for most targets there are helper
+templates which should take care of the work for you.
+
+All targets should declare a global ``Target`` object which is used to
+represent the target during registration.  Then, in the target's ``TargetInfo``
+library, the target should define that object and use the ``RegisterTarget``
+template to register the target.  For example, the Sparc registration code
+looks like this:
+
+.. code-block:: c++
+
+  Target llvm::TheSparcTarget;
+
+  extern "C" void LLVMInitializeSparcTargetInfo() {
+    RegisterTarget<Triple::sparc, /*HasJIT=*/false>
+      X(TheSparcTarget, "sparc", "Sparc");
+  }
+
+This allows the ``TargetRegistry`` to look up the target by name or by target
+triple.  In addition, most targets will also register additional features which
+are available in separate libraries.  These registration steps are separate,
+because some clients may wish to only link in some parts of the target --- the
+JIT code generator does not require the use of the assembler printer, for
+example.  Here is an example of registering the Sparc assembly printer:
+
+.. code-block:: c++
+
+  extern "C" void LLVMInitializeSparcAsmPrinter() {
+    RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+  }
+
+For more information, see "`llvm/Target/TargetRegistry.h
+</doxygen/TargetRegistry_8h-source.html>`_".
+
+Register Set and Register Classes
+=================================
+
+You should describe a concrete target-specific class that represents the
+register file of a target machine.  This class is called ``XXXRegisterInfo``
+(where ``XXX`` identifies the target) and represents the class register file
+data that is used for register allocation.  It also describes the interactions
+between registers.
+
+You also need to define register classes to categorize related registers.  A
+register class should be added for groups of registers that are all treated the
+same way for some instruction.  Typical examples are register classes for
+integer, floating-point, or vector registers.  A register allocator allows an
+instruction to use any register in a specified register class to perform the
+instruction in a similar manner.  Register classes allocate virtual registers
+to instructions from these sets, and register classes let the
+target-independent register allocator automatically choose the actual
+registers.
+
+Much of the code for registers, including register definition, register
+aliases, and register classes, is generated by TableGen from
+``XXXRegisterInfo.td`` input files and placed in ``XXXGenRegisterInfo.h.inc``
+and ``XXXGenRegisterInfo.inc`` output files.  Some of the code in the
+implementation of ``XXXRegisterInfo`` requires hand-coding.
+
+Defining a Register
+-------------------
+
+The ``XXXRegisterInfo.td`` file typically starts with register definitions for
+a target machine.  The ``Register`` class (specified in ``Target.td``) is used
+to define an object for each register.  The specified string ``n`` becomes the
+``Name`` of the register.  The basic ``Register`` object does not have any
+subregisters and does not specify any aliases.
+
+.. code-block:: llvm
+
+  class Register<string n> {
+    string Namespace = "";
+    string AsmName = n;
+    string Name = n;
+    int SpillSize = 0;
+    int SpillAlignment = 0;
+    list<Register> Aliases = [];
+    list<Register> SubRegs = [];
+    list<int> DwarfNumbers = [];
+  }
+
+For example, in the ``X86RegisterInfo.td`` file, there are register definitions
+that utilize the ``Register`` class, such as:
+
+.. code-block:: llvm
+
+  def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>;
+
+This defines the register ``AL`` and assigns it values (with ``DwarfRegNum``)
+that are used by ``gcc``, ``gdb``, or a debug information writer to identify a
+register.  For register ``AL``, ``DwarfRegNum`` takes an array of 3 values
+representing 3 different modes: the first element is for X86-64, the second for
+exception handling (EH) on X86-32, and the third is generic. -1 is a special
+Dwarf number that indicates the gcc number is undefined, and -2 indicates the
+register number is invalid for this mode.
+
+From the previously described line in the ``X86RegisterInfo.td`` file, TableGen
+generates this code in the ``X86GenRegisterInfo.inc`` file:
+
+.. code-block:: c++
+
+  static const unsigned GR8[] = { X86::AL, ... };
+
+  const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
+
+  const TargetRegisterDesc RegisterDescriptors[] = {
+    ...
+  { "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
+
+From the register info file, TableGen generates a ``TargetRegisterDesc`` object
+for each register.  ``TargetRegisterDesc`` is defined in
+``include/llvm/Target/TargetRegisterInfo.h`` with the following fields:
+
+.. code-block:: c++
+
+  struct TargetRegisterDesc {
+    const char     *AsmName;      // Assembly language name for the register
+    const char     *Name;         // Printable name for the reg (for debugging)
+    const unsigned *AliasSet;     // Register Alias Set
+    const unsigned *SubRegs;      // Sub-register set
+    const unsigned *ImmSubRegs;   // Immediate sub-register set
+    const unsigned *SuperRegs;    // Super-register set
+  };
+
+TableGen uses the entire target description file (``.td``) to determine text
+names for the register (in the ``AsmName`` and ``Name`` fields of
+``TargetRegisterDesc``) and the relationships of other registers to the defined
+register (in the other ``TargetRegisterDesc`` fields).  In this example, other
+definitions establish the registers "``AX``", "``EAX``", and "``RAX``" as
+aliases for one another, so TableGen generates a null-terminated array
+(``AL_AliasSet``) for this register alias set.
+
+The ``Register`` class is commonly used as a base class for more complex
+classes.  In ``Target.td``, the ``Register`` class is the base for the
+``RegisterWithSubRegs`` class that is used to define registers that need to
+specify subregisters in the ``SubRegs`` list, as shown here:
+
+.. code-block:: llvm
+
+  class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
+    let SubRegs = subregs;
+  }
+
+In ``SparcRegisterInfo.td``, additional register classes are defined for SPARC:
+a ``Register`` subclass, ``SparcReg``, and further subclasses: ``Ri``, ``Rf``,
+and ``Rd``.  SPARC registers are identified by 5-bit ID numbers, which is a
+feature common to these subclasses.  Note the use of "``let``" expressions to
+override values that are initially defined in a superclass (such as ``SubRegs``
+field in the ``Rd`` class).
+
+.. code-block:: llvm
+
+  class SparcReg<string n> : Register<n> {
+    field bits<5> Num;
+    let Namespace = "SP";
+  }
+  // Ri - 32-bit integer registers
+  class Ri<bits<5> num, string n> :
+  SparcReg<n> {
+    let Num = num;
+  }
+  // Rf - 32-bit floating-point registers
+  class Rf<bits<5> num, string n> :
+  SparcReg<n> {
+    let Num = num;
+  }
+  // Rd - Slots in the FP register file for 64-bit floating-point values.
+  class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+    let Num = num;
+    let SubRegs = subregs;
+  }
+
+In the ``SparcRegisterInfo.td`` file, there are register definitions that
+utilize these subclasses of ``Register``, such as:
+
+.. code-block:: llvm
+
+  def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+  def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+  ...
+  def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+  def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+  ...
+  def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+  def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+
+The last two registers shown above (``D0`` and ``D1``) are double-precision
+floating-point registers that are aliases for pairs of single-precision
+floating-point sub-registers.  In addition to aliases, the sub-register and
+super-register relationships of the defined register are in fields of a
+register's ``TargetRegisterDesc``.
+
+Defining a Register Class
+-------------------------
+
+The ``RegisterClass`` class (specified in ``Target.td``) is used to define an
+object that represents a group of related registers and also defines the
+default allocation order of the registers.  A target description file
+``XXXRegisterInfo.td`` that uses ``Target.td`` can construct register classes
+using the following class:
+
+.. code-block:: llvm
+
+  class RegisterClass<string namespace,
+  list<ValueType> regTypes, int alignment, dag regList> {
+    string Namespace = namespace;
+    list<ValueType> RegTypes = regTypes;
+    int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
+    int Alignment = alignment;
+
+    // CopyCost is the cost of copying a value between two registers
+    // default value 1 means a single instruction
+    // A negative value means copying is extremely expensive or impossible
+    int CopyCost = 1;
+    dag MemberList = regList;
+
+    // for register classes that are subregisters of this class
+    list<RegisterClass> SubRegClassList = [];
+
+    code MethodProtos = [{}];  // to insert arbitrary code
+    code MethodBodies = [{}];
+  }
+
+To define a ``RegisterClass``, use the following 4 arguments:
+
+* The first argument of the definition is the name of the namespace.
+
+* The second argument is a list of ``ValueType`` register type values that are
+  defined in ``include/llvm/CodeGen/ValueTypes.td``.  Defined values include
+  integer types (such as ``i16``, ``i32``, and ``i1`` for Boolean),
+  floating-point types (``f32``, ``f64``), and vector types (for example,
+  ``v8i16`` for an ``8 x i16`` vector).  All registers in a ``RegisterClass``
+  must have the same ``ValueType``, but some registers may store vector data in
+  different configurations.  For example a register that can process a 128-bit
+  vector may be able to handle 16 8-bit integer elements, 8 16-bit integers, 4
+  32-bit integers, and so on.
+
+* The third argument of the ``RegisterClass`` definition specifies the
+  alignment required of the registers when they are stored or loaded to
+  memory.
+
+* The final argument, ``regList``, specifies which registers are in this class.
+  If an alternative allocation order method is not specified, then ``regList``
+  also defines the order of allocation used by the register allocator.  Besides
+  simply listing registers with ``(add R0, R1, ...)``, more advanced set
+  operators are available.  See ``include/llvm/Target/Target.td`` for more
+  information.
+
+In ``SparcRegisterInfo.td``, three ``RegisterClass`` objects are defined:
+``FPRegs``, ``DFPRegs``, and ``IntRegs``.  For all three register classes, the
+first argument defines the namespace with the string "``SP``".  ``FPRegs``
+defines a group of 32 single-precision floating-point registers (``F0`` to
+``F31``); ``DFPRegs`` defines a group of 16 double-precision registers
+(``D0-D15``).
+
+.. code-block:: llvm
+
+  // F0, F1, F2, ..., F31
+  def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+  def DFPRegs : RegisterClass<"SP", [f64], 64,
+                              (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
+                                   D9, D10, D11, D12, D13, D14, D15)>;
+
+  def IntRegs : RegisterClass<"SP", [i32], 32,
+      (add L0, L1, L2, L3, L4, L5, L6, L7,
+           I0, I1, I2, I3, I4, I5,
+           O0, O1, O2, O3, O4, O5, O7,
+           G1,
+           // Non-allocatable regs:
+           G2, G3, G4,
+           O6,        // stack ptr
+           I6,        // frame ptr
+           I7,        // return address
+           G0,        // constant zero
+           G5, G6, G7 // reserved for kernel
+      )>;
+
+Using ``SparcRegisterInfo.td`` with TableGen generates several output files
+that are intended for inclusion in other source code that you write.
+``SparcRegisterInfo.td`` generates ``SparcGenRegisterInfo.h.inc``, which should
+be included in the header file for the implementation of the SPARC register
+implementation that you write (``SparcRegisterInfo.h``).  In
+``SparcGenRegisterInfo.h.inc`` a new structure is defined called
+``SparcGenRegisterInfo`` that uses ``TargetRegisterInfo`` as its base.  It also
+specifies types, based upon the defined register classes: ``DFPRegsClass``,
+``FPRegsClass``, and ``IntRegsClass``.
+
+``SparcRegisterInfo.td`` also generates ``SparcGenRegisterInfo.inc``, which is
+included at the bottom of ``SparcRegisterInfo.cpp``, the SPARC register
+implementation.  The code below shows only the generated integer registers and
+associated register classes.  The order of registers in ``IntRegs`` reflects
+the order in the definition of ``IntRegs`` in the target description file.
+
+.. code-block:: c++
+
+  // IntRegs Register Class...
+  static const unsigned IntRegs[] = {
+    SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
+    SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
+    SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
+    SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
+    SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
+    SP::G6, SP::G7,
+  };
+
+  // IntRegsVTs Register Class Value Types...
+  static const MVT::ValueType IntRegsVTs[] = {
+    MVT::i32, MVT::Other
+  };
+
+  namespace SP {   // Register class instances
+    DFPRegsClass    DFPRegsRegClass;
+    FPRegsClass     FPRegsRegClass;
+    IntRegsClass    IntRegsRegClass;
+  ...
+    // IntRegs Sub-register Classess...
+    static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
+      NULL
+    };
+  ...
+    // IntRegs Super-register Classess...
+    static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
+      NULL
+    };
+  ...
+    // IntRegs Register Class sub-classes...
+    static const TargetRegisterClass* const IntRegsSubclasses [] = {
+      NULL
+    };
+  ...
+    // IntRegs Register Class super-classes...
+    static const TargetRegisterClass* const IntRegsSuperclasses [] = {
+      NULL
+    };
+
+    IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID,
+      IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses,
+      IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
+  }
+
+The register allocators will avoid using reserved registers, and callee saved
+registers are not used until all the volatile registers have been used.  That
+is usually good enough, but in some cases it may be necessary to provide custom
+allocation orders.
+
+Implement a subclass of ``TargetRegisterInfo``
+----------------------------------------------
+
+The final step is to hand code portions of ``XXXRegisterInfo``, which
+implements the interface described in ``TargetRegisterInfo.h`` (see
+:ref:`TargetRegisterInfo`).  These functions return ``0``, ``NULL``, or
+``false``, unless overridden.  Here is a list of functions that are overridden
+for the SPARC implementation in ``SparcRegisterInfo.cpp``:
+
+* ``getCalleeSavedRegs`` --- Returns a list of callee-saved registers in the
+  order of the desired callee-save stack frame offset.
+
+* ``getReservedRegs`` --- Returns a bitset indexed by physical register
+  numbers, indicating if a particular register is unavailable.
+
+* ``hasFP`` --- Return a Boolean indicating if a function should have a
+  dedicated frame pointer register.
+
+* ``eliminateCallFramePseudoInstr`` --- If call frame setup or destroy pseudo
+  instructions are used, this can be called to eliminate them.
+
+* ``eliminateFrameIndex`` --- Eliminate abstract frame indices from
+  instructions that may use them.
+
+* ``emitPrologue`` --- Insert prologue code into the function.
+
+* ``emitEpilogue`` --- Insert epilogue code into the function.
+
+.. _instruction-set:
+
+Instruction Set
+===============
+
+During the early stages of code generation, the LLVM IR code is converted to a
+``SelectionDAG`` with nodes that are instances of the ``SDNode`` class
+containing target instructions.  An ``SDNode`` has an opcode, operands, type
+requirements, and operation properties.  For example, is an operation
+commutative, does an operation load from memory.  The various operation node
+types are described in the ``include/llvm/CodeGen/SelectionDAGNodes.h`` file
+(values of the ``NodeType`` enum in the ``ISD`` namespace).
+
+TableGen uses the following target description (``.td``) input files to
+generate much of the code for instruction definition:
+
+* ``Target.td`` --- Where the ``Instruction``, ``Operand``, ``InstrInfo``, and
+  other fundamental classes are defined.
+
+* ``TargetSelectionDAG.td`` --- Used by ``SelectionDAG`` instruction selection
+  generators, contains ``SDTC*`` classes (selection DAG type constraint),
+  definitions of ``SelectionDAG`` nodes (such as ``imm``, ``cond``, ``bb``,
+  ``add``, ``fadd``, ``sub``), and pattern support (``Pattern``, ``Pat``,
+  ``PatFrag``, ``PatLeaf``, ``ComplexPattern``.
+
+* ``XXXInstrFormats.td`` --- Patterns for definitions of target-specific
+  instructions.
+
+* ``XXXInstrInfo.td`` --- Target-specific definitions of instruction templates,
+  condition codes, and instructions of an instruction set.  For architecture
+  modifications, a different file name may be used.  For example, for Pentium
+  with SSE instruction, this file is ``X86InstrSSE.td``, and for Pentium with
+  MMX, this file is ``X86InstrMMX.td``.
+
+There is also a target-specific ``XXX.td`` file, where ``XXX`` is the name of
+the target.  The ``XXX.td`` file includes the other ``.td`` input files, but
+its contents are only directly important for subtargets.
+
+You should describe a concrete target-specific class ``XXXInstrInfo`` that
+represents machine instructions supported by a target machine.
+``XXXInstrInfo`` contains an array of ``XXXInstrDescriptor`` objects, each of
+which describes one instruction.  An instruction descriptor defines:
+
+* Opcode mnemonic
+* Number of operands
+* List of implicit register definitions and uses
+* Target-independent properties (such as memory access, is commutable)
+* Target-specific flags
+
+The Instruction class (defined in ``Target.td``) is mostly used as a base for
+more complex instruction classes.
+
+.. code-block:: llvm
+
+  class Instruction {
+    string Namespace = "";
+    dag OutOperandList;    // A dag containing the MI def operand list.
+    dag InOperandList;     // A dag containing the MI use operand list.
+    string AsmString = ""; // The .s format to print the instruction with.
+    list<dag> Pattern;     // Set to the DAG pattern for this instruction.
+    list<Register> Uses = [];
+    list<Register> Defs = [];
+    list<Predicate> Predicates = [];  // predicates turned into isel match code
+    ... remainder not shown for space ...
+  }
+
+A ``SelectionDAG`` node (``SDNode``) should contain an object representing a
+target-specific instruction that is defined in ``XXXInstrInfo.td``.  The
+instruction objects should represent instructions from the architecture manual
+of the target machine (such as the SPARC Architecture Manual for the SPARC
+target).
+
+A single instruction from the architecture manual is often modeled as multiple
+target instructions, depending upon its operands.  For example, a manual might
+describe an add instruction that takes a register or an immediate operand.  An
+LLVM target could model this with two instructions named ``ADDri`` and
+``ADDrr``.
+
+You should define a class for each instruction category and define each opcode
+as a subclass of the category with appropriate parameters such as the fixed
+binary encoding of opcodes and extended opcodes.  You should map the register
+bits to the bits of the instruction in which they are encoded (for the JIT).
+Also you should specify how the instruction should be printed when the
+automatic assembly printer is used.
+
+As is described in the SPARC Architecture Manual, Version 8, there are three
+major 32-bit formats for instructions.  Format 1 is only for the ``CALL``
+instruction.  Format 2 is for branch on condition codes and ``SETHI`` (set high
+bits of a register) instructions.  Format 3 is for other instructions.
+
+Each of these formats has corresponding classes in ``SparcInstrFormat.td``.
+``InstSP`` is a base class for other instruction classes.  Additional base
+classes are specified for more precise formats: for example in
+``SparcInstrFormat.td``, ``F2_1`` is for ``SETHI``, and ``F2_2`` is for
+branches.  There are three other base classes: ``F3_1`` for register/register
+operations, ``F3_2`` for register/immediate operations, and ``F3_3`` for
+floating-point operations.  ``SparcInstrInfo.td`` also adds the base class
+``Pseudo`` for synthetic SPARC instructions.
+
+``SparcInstrInfo.td`` largely consists of operand and instruction definitions
+for the SPARC target.  In ``SparcInstrInfo.td``, the following target
+description file entry, ``LDrr``, defines the Load Integer instruction for a
+Word (the ``LD`` SPARC opcode) from a memory address to a register.  The first
+parameter, the value 3 (``11``\ :sub:`2`), is the operation value for this
+category of operation.  The second parameter (``000000``\ :sub:`2`) is the
+specific operation value for ``LD``/Load Word.  The third parameter is the
+output destination, which is a register operand and defined in the ``Register``
+target description file (``IntRegs``).
+
+.. code-block:: llvm
+
+  def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
+                   "ld [$addr], $dst",
+                   [(set i32:$dst, (load ADDRrr:$addr))]>;
+
+The fourth parameter is the input source, which uses the address operand
+``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``:
+
+.. code-block:: llvm
+
+  def MEMrr : Operand<i32> {
+    let PrintMethod = "printMemOperand";
+    let MIOperandInfo = (ops IntRegs, IntRegs);
+  }
+
+The fifth parameter is a string that is used by the assembly printer and can be
+left as an empty string until the assembly printer interface is implemented.
+The sixth and final parameter is the pattern used to match the instruction
+during the SelectionDAG Select Phase described in :doc:`CodeGenerator`.
+This parameter is detailed in the next section, :ref:`instruction-selector`.
+
+Instruction class definitions are not overloaded for different operand types,
+so separate versions of instructions are needed for register, memory, or
+immediate value operands.  For example, to perform a Load Integer instruction
+for a Word from an immediate operand to a register, the following instruction
+class is defined:
+
+.. code-block:: llvm
+
+  def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
+                   "ld [$addr], $dst",
+                   [(set i32:$dst, (load ADDRri:$addr))]>;
+
+Writing these definitions for so many similar instructions can involve a lot of
+cut and paste.  In ``.td`` files, the ``multiclass`` directive enables the
+creation of templates to define several instruction classes at once (using the
+``defm`` directive).  For example in ``SparcInstrInfo.td``, the ``multiclass``
+pattern ``F3_12`` is defined to create 2 instruction classes each time
+``F3_12`` is invoked:
+
+.. code-block:: llvm
+
+  multiclass F3_12 <string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+    def rr  : F3_1 <2, Op3Val,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   !strconcat(OpcStr, " $b, $c, $dst"),
+                   [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
+    def ri  : F3_2 <2, Op3Val,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   !strconcat(OpcStr, " $b, $c, $dst"),
+                   [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>;
+  }
+
+So when the ``defm`` directive is used for the ``XOR`` and ``ADD``
+instructions, as seen below, it creates four instruction objects: ``XORrr``,
+``XORri``, ``ADDrr``, and ``ADDri``.
+
+.. code-block:: llvm
+
+  defm XOR   : F3_12<"xor", 0b000011, xor>;
+  defm ADD   : F3_12<"add", 0b000000, add>;
+
+``SparcInstrInfo.td`` also includes definitions for condition codes that are
+referenced by branch instructions.  The following definitions in
+``SparcInstrInfo.td`` indicate the bit location of the SPARC condition code.
+For example, the 10\ :sup:`th` bit represents the "greater than" condition for
+integers, and the 22\ :sup:`nd` bit represents the "greater than" condition for
+floats.
+
+.. code-block:: llvm
+
+  def ICC_NE  : ICC_VAL< 9>;  // Not Equal
+  def ICC_E   : ICC_VAL< 1>;  // Equal
+  def ICC_G   : ICC_VAL<10>;  // Greater
+  ...
+  def FCC_U   : FCC_VAL<23>;  // Unordered
+  def FCC_G   : FCC_VAL<22>;  // Greater
+  def FCC_UG  : FCC_VAL<21>;  // Unordered or Greater
+  ...
+
+(Note that ``Sparc.h`` also defines enums that correspond to the same SPARC
+condition codes.  Care must be taken to ensure the values in ``Sparc.h``
+correspond to the values in ``SparcInstrInfo.td``.  I.e., ``SPCC::ICC_NE = 9``,
+``SPCC::FCC_U = 23`` and so on.)
+
+Instruction Operand Mapping
+---------------------------
+
+The code generator backend maps instruction operands to fields in the
+instruction.  Operands are assigned to unbound fields in the instruction in the
+order they are defined.  Fields are bound when they are assigned a value.  For
+example, the Sparc target defines the ``XNORrr`` instruction as a ``F3_1``
+format instruction having three operands.
+
+.. code-block:: llvm
+
+  def XNORrr  : F3_1<2, 0b000111,
+                     (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                     "xnor $b, $c, $dst",
+                     [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
+
+The instruction templates in ``SparcInstrFormats.td`` show the base class for
+``F3_1`` is ``InstSP``.
+
+.. code-block:: llvm
+
+  class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+    field bits<32> Inst;
+    let Namespace = "SP";
+    bits<2> op;
+    let Inst{31-30} = op;
+    dag OutOperandList = outs;
+    dag InOperandList = ins;
+    let AsmString   = asmstr;
+    let Pattern = pattern;
+  }
+
+``InstSP`` leaves the ``op`` field unbound.
+
+.. code-block:: llvm
+
+  class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+      : InstSP<outs, ins, asmstr, pattern> {
+    bits<5> rd;
+    bits<6> op3;
+    bits<5> rs1;
+    let op{1} = 1;   // Op = 2 or 3
+    let Inst{29-25} = rd;
+    let Inst{24-19} = op3;
+    let Inst{18-14} = rs1;
+  }
+
+``F3`` binds the ``op`` field and defines the ``rd``, ``op3``, and ``rs1``
+fields.  ``F3`` format instructions will bind the operands ``rd``, ``op3``, and
+``rs1`` fields.
+
+.. code-block:: llvm
+
+  class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+             string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+    bits<8> asi = 0; // asi not currently used
+    bits<5> rs2;
+    let op         = opVal;
+    let op3        = op3val;
+    let Inst{13}   = 0;     // i field = 0
+    let Inst{12-5} = asi;   // address space identifier
+    let Inst{4-0}  = rs2;
+  }
+
+``F3_1`` binds the ``op3`` field and defines the ``rs2`` fields.  ``F3_1``
+format instructions will bind the operands to the ``rd``, ``rs1``, and ``rs2``
+fields.  This results in the ``XNORrr`` instruction binding ``$dst``, ``$b``,
+and ``$c`` operands to the ``rd``, ``rs1``, and ``rs2`` fields respectively.
+
+Instruction Relation Mapping
+----------------------------
+
+This TableGen feature is used to relate instructions with each other.  It is
+particularly useful when you have multiple instruction formats and need to
+switch between them after instruction selection.  This entire feature is driven
+by relation models which can be defined in ``XXXInstrInfo.td`` files
+according to the target-specific instruction set.  Relation models are defined
+using ``InstrMapping`` class as a base.  TableGen parses all the models
+and generates instruction relation maps using the specified information.
+Relation maps are emitted as tables in the ``XXXGenInstrInfo.inc`` file
+along with the functions to query them.  For the detailed information on how to
+use this feature, please refer to :doc:`HowToUseInstrMappings`.
+
+Implement a subclass of ``TargetInstrInfo``
+-------------------------------------------
+
+The final step is to hand code portions of ``XXXInstrInfo``, which implements
+the interface described in ``TargetInstrInfo.h`` (see :ref:`TargetInstrInfo`).
+These functions return ``0`` or a Boolean or they assert, unless overridden.
+Here's a list of functions that are overridden for the SPARC implementation in
+``SparcInstrInfo.cpp``:
+
+* ``isLoadFromStackSlot`` --- If the specified machine instruction is a direct
+  load from a stack slot, return the register number of the destination and the
+  ``FrameIndex`` of the stack slot.
+
+* ``isStoreToStackSlot`` --- If the specified machine instruction is a direct
+  store to a stack slot, return the register number of the destination and the
+  ``FrameIndex`` of the stack slot.
+
+* ``copyPhysReg`` --- Copy values between a pair of physical registers.
+
+* ``storeRegToStackSlot`` --- Store a register value to a stack slot.
+
+* ``loadRegFromStackSlot`` --- Load a register value from a stack slot.
+
+* ``storeRegToAddr`` --- Store a register value to memory.
+
+* ``loadRegFromAddr`` --- Load a register value from memory.
+
+* ``foldMemoryOperand`` --- Attempt to combine instructions of any load or
+  store instruction for the specified operand(s).
+
+Branch Folding and If Conversion
+--------------------------------
+
+Performance can be improved by combining instructions or by eliminating
+instructions that are never reached.  The ``AnalyzeBranch`` method in
+``XXXInstrInfo`` may be implemented to examine conditional instructions and
+remove unnecessary instructions.  ``AnalyzeBranch`` looks at the end of a
+machine basic block (MBB) for opportunities for improvement, such as branch
+folding and if conversion.  The ``BranchFolder`` and ``IfConverter`` machine
+function passes (see the source files ``BranchFolding.cpp`` and
+``IfConversion.cpp`` in the ``lib/CodeGen`` directory) call ``AnalyzeBranch``
+to improve the control flow graph that represents the instructions.
+
+Several implementations of ``AnalyzeBranch`` (for ARM, Alpha, and X86) can be
+examined as models for your own ``AnalyzeBranch`` implementation.  Since SPARC
+does not implement a useful ``AnalyzeBranch``, the ARM target implementation is
+shown below.
+
+``AnalyzeBranch`` returns a Boolean value and takes four parameters:
+
+* ``MachineBasicBlock &MBB`` --- The incoming block to be examined.
+
+* ``MachineBasicBlock *&TBB`` --- A destination block that is returned.  For a
+  conditional branch that evaluates to true, ``TBB`` is the destination.
+
+* ``MachineBasicBlock *&FBB`` --- For a conditional branch that evaluates to
+  false, ``FBB`` is returned as the destination.
+
+* ``std::vector<MachineOperand> &Cond`` --- List of operands to evaluate a
+  condition for a conditional branch.
+
+In the simplest case, if a block ends without a branch, then it falls through
+to the successor block.  No destination blocks are specified for either ``TBB``
+or ``FBB``, so both parameters return ``NULL``.  The start of the
+``AnalyzeBranch`` (see code below for the ARM target) shows the function
+parameters and the code for the simplest case.
+
+.. code-block:: c++
+
+  bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   std::vector<MachineOperand> &Cond) const
+  {
+    MachineBasicBlock::iterator I = MBB.end();
+    if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+      return false;
+
+If a block ends with a single unconditional branch instruction, then
+``AnalyzeBranch`` (shown below) should return the destination of that branch in
+the ``TBB`` parameter.
+
+.. code-block:: c++
+
+    if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+
+If a block ends with two unconditional branches, then the second branch is
+never reached.  In that situation, as shown below, remove the last branch
+instruction and return the penultimate branch in the ``TBB`` parameter.
+
+.. code-block:: c++
+
+    if ((SecondLastOpc == ARM::B || SecondLastOpc == ARM::tB) &&
+        (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+      TBB = SecondLastInst->getOperand(0).getMBB();
+      I = LastInst;
+      I->eraseFromParent();
+      return false;
+    }
+
+A block may end with a single conditional branch instruction that falls through
+to successor block if the condition evaluates to false.  In that case,
+``AnalyzeBranch`` (shown below) should return the destination of that
+conditional branch in the ``TBB`` parameter and a list of operands in the
+``Cond`` parameter to evaluate the condition.
+
+.. code-block:: c++
+
+    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(LastInst->getOperand(1));
+      Cond.push_back(LastInst->getOperand(2));
+      return false;
+    }
+
+If a block ends with both a conditional branch and an ensuing unconditional
+branch, then ``AnalyzeBranch`` (shown below) should return the conditional
+branch destination (assuming it corresponds to a conditional evaluation of
+"``true``") in the ``TBB`` parameter and the unconditional branch destination
+in the ``FBB`` (corresponding to a conditional evaluation of "``false``").  A
+list of operands to evaluate the condition should be returned in the ``Cond``
+parameter.
+
+.. code-block:: c++
+
+    unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+    if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+        (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+      TBB =  SecondLastInst->getOperand(0).getMBB();
+      Cond.push_back(SecondLastInst->getOperand(1));
+      Cond.push_back(SecondLastInst->getOperand(2));
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+
+For the last two cases (ending with a single conditional branch or ending with
+one conditional and one unconditional branch), the operands returned in the
+``Cond`` parameter can be passed to methods of other instructions to create new
+branches or perform other operations.  An implementation of ``AnalyzeBranch``
+requires the helper methods ``RemoveBranch`` and ``InsertBranch`` to manage
+subsequent operations.
+
+``AnalyzeBranch`` should return false indicating success in most circumstances.
+``AnalyzeBranch`` should only return true when the method is stumped about what
+to do, for example, if a block has three terminating branches.
+``AnalyzeBranch`` may return true if it encounters a terminator it cannot
+handle, such as an indirect branch.
+
+.. _instruction-selector:
+
+Instruction Selector
+====================
+
+LLVM uses a ``SelectionDAG`` to represent LLVM IR instructions, and nodes of
+the ``SelectionDAG`` ideally represent native target instructions.  During code
+generation, instruction selection passes are performed to convert non-native
+DAG instructions into native target-specific instructions.  The pass described
+in ``XXXISelDAGToDAG.cpp`` is used to match patterns and perform DAG-to-DAG
+instruction selection.  Optionally, a pass may be defined (in
+``XXXBranchSelector.cpp``) to perform similar DAG-to-DAG operations for branch
+instructions.  Later, the code in ``XXXISelLowering.cpp`` replaces or removes
+operations and data types not supported natively (legalizes) in a
+``SelectionDAG``.
+
+TableGen generates code for instruction selection using the following target
+description input files:
+
+* ``XXXInstrInfo.td`` --- Contains definitions of instructions in a
+  target-specific instruction set, generates ``XXXGenDAGISel.inc``, which is
+  included in ``XXXISelDAGToDAG.cpp``.
+
+* ``XXXCallingConv.td`` --- Contains the calling and return value conventions
+  for the target architecture, and it generates ``XXXGenCallingConv.inc``,
+  which is included in ``XXXISelLowering.cpp``.
+
+The implementation of an instruction selection pass must include a header that
+declares the ``FunctionPass`` class or a subclass of ``FunctionPass``.  In
+``XXXTargetMachine.cpp``, a Pass Manager (PM) should add each instruction
+selection pass into the queue of passes to run.
+
+The LLVM static compiler (``llc``) is an excellent tool for visualizing the
+contents of DAGs.  To display the ``SelectionDAG`` before or after specific
+processing phases, use the command line options for ``llc``, described at
+:ref:`SelectionDAG-Process`.
+
+To describe instruction selector behavior, you should add patterns for lowering
+LLVM code into a ``SelectionDAG`` as the last parameter of the instruction
+definitions in ``XXXInstrInfo.td``.  For example, in ``SparcInstrInfo.td``,
+this entry defines a register store operation, and the last parameter describes
+a pattern with the store DAG operator.
+
+.. code-block:: llvm
+
+  def STrr  : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
+                   "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>;
+
+``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``:
+
+.. code-block:: llvm
+
+  def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+
+The definition of ``ADDRrr`` refers to ``SelectADDRrr``, which is a function
+defined in an implementation of the Instructor Selector (such as
+``SparcISelDAGToDAG.cpp``).
+
+In ``lib/Target/TargetSelectionDAG.td``, the DAG operator for store is defined
+below:
+
+.. code-block:: llvm
+
+  def store : PatFrag<(ops node:$val, node:$ptr),
+                      (st node:$val, node:$ptr), [{
+    if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+      return !ST->isTruncatingStore() &&
+             ST->getAddressingMode() == ISD::UNINDEXED;
+    return false;
+  }]>;
+
+``XXXInstrInfo.td`` also generates (in ``XXXGenDAGISel.inc``) the
+``SelectCode`` method that is used to call the appropriate processing method
+for an instruction.  In this example, ``SelectCode`` calls ``Select_ISD_STORE``
+for the ``ISD::STORE`` opcode.
+
+.. code-block:: c++
+
+  SDNode *SelectCode(SDValue N) {
+    ...
+    MVT::ValueType NVT = N.getNode()->getValueType(0);
+    switch (N.getOpcode()) {
+    case ISD::STORE: {
+      switch (NVT) {
+      default:
+        return Select_ISD_STORE(N);
+        break;
+      }
+      break;
+    }
+    ...
+
+The pattern for ``STrr`` is matched, so elsewhere in ``XXXGenDAGISel.inc``,
+code for ``STrr`` is created for ``Select_ISD_STORE``.  The ``Emit_22`` method
+is also generated in ``XXXGenDAGISel.inc`` to complete the processing of this
+instruction.
+
+.. code-block:: c++
+
+  SDNode *Select_ISD_STORE(const SDValue &N) {
+    SDValue Chain = N.getOperand(0);
+    if (Predicate_store(N.getNode())) {
+      SDValue N1 = N.getOperand(1);
+      SDValue N2 = N.getOperand(2);
+      SDValue CPTmp0;
+      SDValue CPTmp1;
+
+      // Pattern: (st:void i32:i32:$src,
+      //           ADDRrr:i32:$addr)<<P:Predicate_store>>
+      // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
+      // Pattern complexity = 13  cost = 1  size = 0
+      if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &&
+          N1.getNode()->getValueType(0) == MVT::i32 &&
+          N2.getNode()->getValueType(0) == MVT::i32) {
+        return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
+      }
+  ...
+
+The SelectionDAG Legalize Phase
+-------------------------------
+
+The Legalize phase converts a DAG to use types and operations that are natively
+supported by the target.  For natively unsupported types and operations, you
+need to add code to the target-specific ``XXXTargetLowering`` implementation to
+convert unsupported types and operations to supported ones.
+
+In the constructor for the ``XXXTargetLowering`` class, first use the
+``addRegisterClass`` method to specify which types are supported and which
+register classes are associated with them.  The code for the register classes
+are generated by TableGen from ``XXXRegisterInfo.td`` and placed in
+``XXXGenRegisterInfo.h.inc``.  For example, the implementation of the
+constructor for the SparcTargetLowering class (in ``SparcISelLowering.cpp``)
+starts with the following code:
+
+.. code-block:: c++
+
+  addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+  addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+  addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+You should examine the node types in the ``ISD`` namespace
+(``include/llvm/CodeGen/SelectionDAGNodes.h``) and determine which operations
+the target natively supports.  For operations that do **not** have native
+support, add a callback to the constructor for the ``XXXTargetLowering`` class,
+so the instruction selection process knows what to do.  The ``TargetLowering``
+class callback methods (declared in ``llvm/Target/TargetLowering.h``) are:
+
+* ``setOperationAction`` --- General operation.
+* ``setLoadExtAction`` --- Load with extension.
+* ``setTruncStoreAction`` --- Truncating store.
+* ``setIndexedLoadAction`` --- Indexed load.
+* ``setIndexedStoreAction`` --- Indexed store.
+* ``setConvertAction`` --- Type conversion.
+* ``setCondCodeAction`` --- Support for a given condition code.
+
+Note: on older releases, ``setLoadXAction`` is used instead of
+``setLoadExtAction``.  Also, on older releases, ``setCondCodeAction`` may not
+be supported.  Examine your release to see what methods are specifically
+supported.
+
+These callbacks are used to determine that an operation does or does not work
+with a specified type (or types).  And in all cases, the third parameter is a
+``LegalAction`` type enum value: ``Promote``, ``Expand``, ``Custom``, or
+``Legal``.  ``SparcISelLowering.cpp`` contains examples of all four
+``LegalAction`` values.
+
+Promote
+^^^^^^^
+
+For an operation without native support for a given type, the specified type
+may be promoted to a larger type that is supported.  For example, SPARC does
+not support a sign-extending load for Boolean values (``i1`` type), so in
+``SparcISelLowering.cpp`` the third parameter below, ``Promote``, changes
+``i1`` type values to a large type before loading.
+
+.. code-block:: c++
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+Expand
+^^^^^^
+
+For a type without native support, a value may need to be broken down further,
+rather than promoted.  For an operation without native support, a combination
+of other operations may be used to similar effect.  In SPARC, the
+floating-point sine and cosine trig operations are supported by expansion to
+other operations, as indicated by the third parameter, ``Expand``, to
+``setOperationAction``:
+
+.. code-block:: c++
+
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
+  setOperationAction(ISD::FCOS, MVT::f32, Expand);
+
+Custom
+^^^^^^
+
+For some operations, simple type promotion or operation expansion may be
+insufficient.  In some cases, a special intrinsic function must be implemented.
+
+For example, a constant value may require special treatment, or an operation
+may require spilling and restoring registers in the stack and working with
+register allocators.
+
+As seen in ``SparcISelLowering.cpp`` code below, to perform a type conversion
+from a floating point value to a signed integer, first the
+``setOperationAction`` should be called with ``Custom`` as the third parameter:
+
+.. code-block:: c++
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+In the ``LowerOperation`` method, for each ``Custom`` operation, a case
+statement should be added to indicate what function to call.  In the following
+code, an ``FP_TO_SINT`` opcode will call the ``LowerFP_TO_SINT`` method:
+
+.. code-block:: c++
+
+  SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+    switch (Op.getOpcode()) {
+    case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+    ...
+    }
+  }
+
+Finally, the ``LowerFP_TO_SINT`` method is implemented, using an FP register to
+convert the floating-point value to an integer.
+
+.. code-block:: c++
+
+  static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+    assert(Op.getValueType() == MVT::i32);
+    Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
+    return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
+  }
+
+Legal
+^^^^^
+
+The ``Legal`` ``LegalizeAction`` enum value simply indicates that an operation
+**is** natively supported.  ``Legal`` represents the default condition, so it
+is rarely used.  In ``SparcISelLowering.cpp``, the action for ``CTPOP`` (an
+operation to count the bits set in an integer) is natively supported only for
+SPARC v9.  The following code enables the ``Expand`` conversion technique for
+non-v9 SPARC implementations.
+
+.. code-block:: c++
+
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  ...
+  if (TM.getSubtarget<SparcSubtarget>().isV9())
+    setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+Calling Conventions
+-------------------
+
+To support target-specific calling conventions, ``XXXGenCallingConv.td`` uses
+interfaces (such as ``CCIfType`` and ``CCAssignToReg``) that are defined in
+``lib/Target/TargetCallingConv.td``.  TableGen can take the target descriptor
+file ``XXXGenCallingConv.td`` and generate the header file
+``XXXGenCallingConv.inc``, which is typically included in
+``XXXISelLowering.cpp``.  You can use the interfaces in
+``TargetCallingConv.td`` to specify:
+
+* The order of parameter allocation.
+
+* Where parameters and return values are placed (that is, on the stack or in
+  registers).
+
+* Which registers may be used.
+
+* Whether the caller or callee unwinds the stack.
+
+The following example demonstrates the use of the ``CCIfType`` and
+``CCAssignToReg`` interfaces.  If the ``CCIfType`` predicate is true (that is,
+if the current argument is of type ``f32`` or ``f64``), then the action is
+performed.  In this case, the ``CCAssignToReg`` action assigns the argument
+value to the first available register: either ``R0`` or ``R1``.
+
+.. code-block:: llvm
+
+  CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
+
+``SparcCallingConv.td`` contains definitions for a target-specific return-value
+calling convention (``RetCC_Sparc32``) and a basic 32-bit C calling convention
+(``CC_Sparc32``).  The definition of ``RetCC_Sparc32`` (shown below) indicates
+which registers are used for specified scalar return types.  A single-precision
+float is returned to register ``F0``, and a double-precision float goes to
+register ``D0``.  A 32-bit integer is returned in register ``I0`` or ``I1``.
+
+.. code-block:: llvm
+
+  def RetCC_Sparc32 : CallingConv<[
+    CCIfType<[i32], CCAssignToReg<[I0, I1]>>,
+    CCIfType<[f32], CCAssignToReg<[F0]>>,
+    CCIfType<[f64], CCAssignToReg<[D0]>>
+  ]>;
+
+The definition of ``CC_Sparc32`` in ``SparcCallingConv.td`` introduces
+``CCAssignToStack``, which assigns the value to a stack slot with the specified
+size and alignment.  In the example below, the first parameter, 4, indicates
+the size of the slot, and the second parameter, also 4, indicates the stack
+alignment along 4-byte units.  (Special cases: if size is zero, then the ABI
+size is used; if alignment is zero, then the ABI alignment is used.)
+
+.. code-block:: llvm
+
+  def CC_Sparc32 : CallingConv<[
+    // All arguments get passed in integer registers if there is space.
+    CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+    CCAssignToStack<4, 4>
+  ]>;
+
+``CCDelegateTo`` is another commonly used interface, which tries to find a
+specified sub-calling convention, and, if a match is found, it is invoked.  In
+the following example (in ``X86CallingConv.td``), the definition of
+``RetCC_X86_32_C`` ends with ``CCDelegateTo``.  After the current value is
+assigned to the register ``ST0`` or ``ST1``, the ``RetCC_X86Common`` is
+invoked.
+
+.. code-block:: llvm
+
+  def RetCC_X86_32_C : CallingConv<[
+    CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>,
+    CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>,
+    CCDelegateTo<RetCC_X86Common>
+  ]>;
+
+``CCIfCC`` is an interface that attempts to match the given name to the current
+calling convention.  If the name identifies the current calling convention,
+then a specified action is invoked.  In the following example (in
+``X86CallingConv.td``), if the ``Fast`` calling convention is in use, then
+``RetCC_X86_32_Fast`` is invoked.  If the ``SSECall`` calling convention is in
+use, then ``RetCC_X86_32_SSE`` is invoked.
+
+.. code-block:: llvm
+
+  def RetCC_X86_32 : CallingConv<[
+    CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+    CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo<RetCC_X86_32_SSE>>,
+    CCDelegateTo<RetCC_X86_32_C>
+  ]>;
+
+Other calling convention interfaces include:
+
+* ``CCIf <predicate, action>`` --- If the predicate matches, apply the action.
+
+* ``CCIfInReg <action>`` --- If the argument is marked with the "``inreg``"
+  attribute, then apply the action.
+
+* ``CCIfNest <action>`` --- If the argument is marked with the "``nest``"
+  attribute, then apply the action.
+
+* ``CCIfNotVarArg <action>`` --- If the current function does not take a
+  variable number of arguments, apply the action.
+
+* ``CCAssignToRegWithShadow <registerList, shadowList>`` --- similar to
+  ``CCAssignToReg``, but with a shadow list of registers.
+
+* ``CCPassByVal <size, align>`` --- Assign value to a stack slot with the
+  minimum specified size and alignment.
+
+* ``CCPromoteToType <type>`` --- Promote the current value to the specified
+  type.
+
+* ``CallingConv <[actions]>`` --- Define each calling convention that is
+  supported.
+
+Assembly Printer
+================
+
+During the code emission stage, the code generator may utilize an LLVM pass to
+produce assembly output.  To do this, you want to implement the code for a
+printer that converts LLVM IR to a GAS-format assembly language for your target
+machine, using the following steps:
+
+* Define all the assembly strings for your target, adding them to the
+  instructions defined in the ``XXXInstrInfo.td`` file.  (See
+  :ref:`instruction-set`.)  TableGen will produce an output file
+  (``XXXGenAsmWriter.inc``) with an implementation of the ``printInstruction``
+  method for the ``XXXAsmPrinter`` class.
+
+* Write ``XXXTargetAsmInfo.h``, which contains the bare-bones declaration of
+  the ``XXXTargetAsmInfo`` class (a subclass of ``TargetAsmInfo``).
+
+* Write ``XXXTargetAsmInfo.cpp``, which contains target-specific values for
+  ``TargetAsmInfo`` properties and sometimes new implementations for methods.
+
+* Write ``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that
+  performs the LLVM-to-assembly conversion.
+
+The code in ``XXXTargetAsmInfo.h`` is usually a trivial declaration of the
+``XXXTargetAsmInfo`` class for use in ``XXXTargetAsmInfo.cpp``.  Similarly,
+``XXXTargetAsmInfo.cpp`` usually has a few declarations of ``XXXTargetAsmInfo``
+replacement values that override the default values in ``TargetAsmInfo.cpp``.
+For example in ``SparcTargetAsmInfo.cpp``:
+
+.. code-block:: c++
+
+  SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) {
+    Data16bitsDirective = "\t.half\t";
+    Data32bitsDirective = "\t.word\t";
+    Data64bitsDirective = 0;  // .xword is only supported by V9.
+    ZeroDirective = "\t.skip\t";
+    CommentString = "!";
+    ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+  }
+
+The X86 assembly printer implementation (``X86TargetAsmInfo``) is an example
+where the target specific ``TargetAsmInfo`` class uses an overridden methods:
+``ExpandInlineAsm``.
+
+A target-specific implementation of ``AsmPrinter`` is written in
+``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that converts
+the LLVM to printable assembly.  The implementation must include the following
+headers that have declarations for the ``AsmPrinter`` and
+``MachineFunctionPass`` classes.  The ``MachineFunctionPass`` is a subclass of
+``FunctionPass``.
+
+.. code-block:: c++
+
+  #include "llvm/CodeGen/AsmPrinter.h"
+  #include "llvm/CodeGen/MachineFunctionPass.h"
+
+As a ``FunctionPass``, ``AsmPrinter`` first calls ``doInitialization`` to set
+up the ``AsmPrinter``.  In ``SparcAsmPrinter``, a ``Mangler`` object is
+instantiated to process variable names.
+
+In ``XXXAsmPrinter.cpp``, the ``runOnMachineFunction`` method (declared in
+``MachineFunctionPass``) must be implemented for ``XXXAsmPrinter``.  In
+``MachineFunctionPass``, the ``runOnFunction`` method invokes
+``runOnMachineFunction``.  Target-specific implementations of
+``runOnMachineFunction`` differ, but generally do the following to process each
+machine function:
+
+* Call ``SetupMachineFunction`` to perform initialization.
+
+* Call ``EmitConstantPool`` to print out (to the output stream) constants which
+  have been spilled to memory.
+
+* Call ``EmitJumpTableInfo`` to print out jump tables used by the current
+  function.
+
+* Print out the label for the current function.
+
+* Print out the code for the function, including basic block labels and the
+  assembly for the instruction (using ``printInstruction``)
+
+The ``XXXAsmPrinter`` implementation must also include the code generated by
+TableGen that is output in the ``XXXGenAsmWriter.inc`` file.  The code in
+``XXXGenAsmWriter.inc`` contains an implementation of the ``printInstruction``
+method that may call these methods:
+
+* ``printOperand``
+* ``printMemOperand``
+* ``printCCOperand`` (for conditional statements)
+* ``printDataDirective``
+* ``printDeclare``
+* ``printImplicitDef``
+* ``printInlineAsm``
+
+The implementations of ``printDeclare``, ``printImplicitDef``,
+``printInlineAsm``, and ``printLabel`` in ``AsmPrinter.cpp`` are generally
+adequate for printing assembly and do not need to be overridden.
+
+The ``printOperand`` method is implemented with a long ``switch``/``case``
+statement for the type of operand: register, immediate, basic block, external
+symbol, global address, constant pool index, or jump table index.  For an
+instruction with a memory address operand, the ``printMemOperand`` method
+should be implemented to generate the proper output.  Similarly,
+``printCCOperand`` should be used to print a conditional operand.
+
+``doFinalization`` should be overridden in ``XXXAsmPrinter``, and it should be
+called to shut down the assembly printer.  During ``doFinalization``, global
+variables and constants are printed to output.
+
+Subtarget Support
+=================
+
+Subtarget support is used to inform the code generation process of instruction
+set variations for a given chip set.  For example, the LLVM SPARC
+implementation provided covers three major versions of the SPARC microprocessor
+architecture: Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a
+64-bit architecture), and the UltraSPARC architecture.  V8 has 16
+double-precision floating-point registers that are also usable as either 32
+single-precision or 8 quad-precision registers.  V8 is also purely big-endian.
+V9 has 32 double-precision floating-point registers that are also usable as 16
+quad-precision registers, but cannot be used as single-precision registers.
+The UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set
+extensions.
+
+If subtarget support is needed, you should implement a target-specific
+``XXXSubtarget`` class for your architecture.  This class should process the
+command-line options ``-mcpu=`` and ``-mattr=``.
+
+TableGen uses definitions in the ``Target.td`` and ``Sparc.td`` files to
+generate code in ``SparcGenSubtarget.inc``.  In ``Target.td``, shown below, the
+``SubtargetFeature`` interface is defined.  The first 4 string parameters of
+the ``SubtargetFeature`` interface are a feature name, an attribute set by the
+feature, the value of the attribute, and a description of the feature.  (The
+fifth parameter is a list of features whose presence is implied, and its
+default value is an empty array.)
+
+.. code-block:: llvm
+
+  class SubtargetFeature<string n, string a, string v, string d,
+                         list<SubtargetFeature> i = []> {
+    string Name = n;
+    string Attribute = a;
+    string Value = v;
+    string Desc = d;
+    list<SubtargetFeature> Implies = i;
+  }
+
+In the ``Sparc.td`` file, the ``SubtargetFeature`` is used to define the
+following features.
+
+.. code-block:: llvm
+
+  def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true",
+                       "Enable SPARC-V9 instructions">;
+  def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8",
+                       "V8DeprecatedInsts", "true",
+                       "Enable deprecated V8 instructions in V9 mode">;
+  def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true",
+                       "Enable UltraSPARC Visual Instruction Set extensions">;
+
+Elsewhere in ``Sparc.td``, the ``Proc`` class is defined and then is used to
+define particular SPARC processor subtypes that may have the previously
+described features.
+
+.. code-block:: llvm
+
+  class Proc<string Name, list<SubtargetFeature> Features>
+    : Processor<Name, NoItineraries, Features>;
+
+  def : Proc<"generic",         []>;
+  def : Proc<"v8",              []>;
+  def : Proc<"supersparc",      []>;
+  def : Proc<"sparclite",       []>;
+  def : Proc<"f934",            []>;
+  def : Proc<"hypersparc",      []>;
+  def : Proc<"sparclite86x",    []>;
+  def : Proc<"sparclet",        []>;
+  def : Proc<"tsc701",          []>;
+  def : Proc<"v9",              [FeatureV9]>;
+  def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated]>;
+  def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]>;
+  def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+From ``Target.td`` and ``Sparc.td`` files, the resulting
+``SparcGenSubtarget.inc`` specifies enum values to identify the features,
+arrays of constants to represent the CPU features and CPU subtypes, and the
+``ParseSubtargetFeatures`` method that parses the features string that sets
+specified subtarget options.  The generated ``SparcGenSubtarget.inc`` file
+should be included in the ``SparcSubtarget.cpp``.  The target-specific
+implementation of the ``XXXSubtarget`` method should follow this pseudocode:
+
+.. code-block:: c++
+
+  XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) {
+    // Set the default features
+    // Determine default and user specified characteristics of the CPU
+    // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
+    // Perform any additional operations
+  }
+
+JIT Support
+===========
+
+The implementation of a target machine optionally includes a Just-In-Time (JIT)
+code generator that emits machine code and auxiliary structures as binary
+output that can be written directly to memory.  To do this, implement JIT code
+generation by performing the following steps:
+
+* Write an ``XXXCodeEmitter.cpp`` file that contains a machine function pass
+  that transforms target-machine instructions into relocatable machine
+  code.
+
+* Write an ``XXXJITInfo.cpp`` file that implements the JIT interfaces for
+  target-specific code-generation activities, such as emitting machine code and
+  stubs.
+
+* Modify ``XXXTargetMachine`` so that it provides a ``TargetJITInfo`` object
+  through its ``getJITInfo`` method.
+
+There are several different approaches to writing the JIT support code.  For
+instance, TableGen and target descriptor files may be used for creating a JIT
+code generator, but are not mandatory.  For the Alpha and PowerPC target
+machines, TableGen is used to generate ``XXXGenCodeEmitter.inc``, which
+contains the binary coding of machine instructions and the
+``getBinaryCodeForInstr`` method to access those codes.  Other JIT
+implementations do not.
+
+Both ``XXXJITInfo.cpp`` and ``XXXCodeEmitter.cpp`` must include the
+``llvm/CodeGen/MachineCodeEmitter.h`` header file that defines the
+``MachineCodeEmitter`` class containing code for several callback functions
+that write data (in bytes, words, strings, etc.) to the output stream.
+
+Machine Code Emitter
+--------------------
+
+In ``XXXCodeEmitter.cpp``, a target-specific of the ``Emitter`` class is
+implemented as a function pass (subclass of ``MachineFunctionPass``).  The
+target-specific implementation of ``runOnMachineFunction`` (invoked by
+``runOnFunction`` in ``MachineFunctionPass``) iterates through the
+``MachineBasicBlock`` calls ``emitInstruction`` to process each instruction and
+emit binary code.  ``emitInstruction`` is largely implemented with case
+statements on the instruction types defined in ``XXXInstrInfo.h``.  For
+example, in ``X86CodeEmitter.cpp``, the ``emitInstruction`` method is built
+around the following ``switch``/``case`` statements:
+
+.. code-block:: c++
+
+  switch (Desc->TSFlags & X86::FormMask) {
+  case X86II::Pseudo:  // for not yet implemented instructions
+     ...               // or pseudo-instructions
+     break;
+  case X86II::RawFrm:  // for instructions with a fixed opcode value
+     ...
+     break;
+  case X86II::AddRegFrm: // for instructions that have one register operand
+     ...                 // added to their opcode
+     break;
+  case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
+     ...                 // to specify a destination (register)
+     break;
+  case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
+     ...                 // to specify a destination (memory)
+     break;
+  case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
+     ...                 // to specify a source (register)
+     break;
+  case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
+     ...                 // to specify a source (memory)
+     break;
+  case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on
+  case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
+  case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
+  case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
+     ...
+     break;
+  case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
+  case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
+  case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
+  case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
+     ...
+     break;
+  case X86II::MRMInitReg: // for instructions whose source and
+     ...                  // destination are the same register
+     break;
+  }
+
+The implementations of these case statements often first emit the opcode and
+then get the operand(s).  Then depending upon the operand, helper methods may
+be called to process the operand(s).  For example, in ``X86CodeEmitter.cpp``,
+for the ``X86II::AddRegFrm`` case, the first data emitted (by ``emitByte``) is
+the opcode added to the register operand.  Then an object representing the
+machine operand, ``MO1``, is extracted.  The helper methods such as
+``isImmediate``, ``isGlobalAddress``, ``isExternalSymbol``,
+``isConstantPoolIndex``, and ``isJumpTableIndex`` determine the operand type.
+(``X86CodeEmitter.cpp`` also has private methods such as ``emitConstant``,
+``emitGlobalAddress``, ``emitExternalSymbolAddress``, ``emitConstPoolAddress``,
+and ``emitJumpTableAddress`` that emit the data into the output stream.)
+
+.. code-block:: c++
+
+  case X86II::AddRegFrm:
+    MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+    if (CurOp != NumOps) {
+      const MachineOperand &MO1 = MI.getOperand(CurOp++);
+      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      if (MO1.isImmediate())
+        emitConstant(MO1.getImm(), Size);
+      else {
+        unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+          : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+        if (Opcode == X86::MOV64ri)
+          rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+        if (MO1.isGlobalAddress()) {
+          bool NeedStub = isa<Function>(MO1.getGlobal());
+          bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
+          emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                            NeedStub, isLazy);
+        } else if (MO1.isExternalSymbol())
+          emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+        else if (MO1.isConstantPoolIndex())
+          emitConstPoolAddress(MO1.getIndex(), rt);
+        else if (MO1.isJumpTableIndex())
+          emitJumpTableAddress(MO1.getIndex(), rt);
+      }
+    }
+    break;
+
+In the previous example, ``XXXCodeEmitter.cpp`` uses the variable ``rt``, which
+is a ``RelocationType`` enum that may be used to relocate addresses (for
+example, a global address with a PIC base offset).  The ``RelocationType`` enum
+for that target is defined in the short target-specific ``XXXRelocations.h``
+file.  The ``RelocationType`` is used by the ``relocate`` method defined in
+``XXXJITInfo.cpp`` to rewrite addresses for referenced global symbols.
+
+For example, ``X86Relocations.h`` specifies the following relocation types for
+the X86 addresses.  In all four cases, the relocated value is added to the
+value already in memory.  For ``reloc_pcrel_word`` and ``reloc_picrel_word``,
+there is an additional initial adjustment.
+
+.. code-block:: c++
+
+  enum RelocationType {
+    reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
+    reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
+    reloc_absolute_word = 2, // absolute relocation; no additional adjustment
+    reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
+  };
+
+Target JIT Info
+---------------
+
+``XXXJITInfo.cpp`` implements the JIT interfaces for target-specific
+code-generation activities, such as emitting machine code and stubs.  At
+minimum, a target-specific version of ``XXXJITInfo`` implements the following:
+
+* ``getLazyResolverFunction`` --- Initializes the JIT, gives the target a
+  function that is used for compilation.
+
+* ``emitFunctionStub`` --- Returns a native function with a specified address
+  for a callback function.
+
+* ``relocate`` --- Changes the addresses of referenced globals, based on
+  relocation types.
+
+* Callback function that are wrappers to a function stub that is used when the
+  real target is not initially known.
+
+``getLazyResolverFunction`` is generally trivial to implement.  It makes the
+incoming parameter as the global ``JITCompilerFunction`` and returns the
+callback function that will be used a function wrapper.  For the Alpha target
+(in ``AlphaJITInfo.cpp``), the ``getLazyResolverFunction`` implementation is
+simply:
+
+.. code-block:: c++
+
+  TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(
+                                              JITCompilerFn F) {
+    JITCompilerFunction = F;
+    return AlphaCompilationCallback;
+  }
+
+For the X86 target, the ``getLazyResolverFunction`` implementation is a little
+more complicated, because it returns a different callback function for
+processors with SSE instructions and XMM registers.
+
+The callback function initially saves and later restores the callee register
+values, incoming arguments, and frame and return address.  The callback
+function needs low-level access to the registers or stack, so it is typically
+implemented with assembler.
+
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
deleted file mode 100644
index 149b103097ff..000000000000
--- a/docs/WritingAnLLVMPass.html
+++ /dev/null
@@ -1,1954 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>Writing an LLVM Pass</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
-  Writing an LLVM Pass
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction - What is a pass?</a></li>
-  <li><a href="#quickstart">Quick Start - Writing hello world</a>
-    <ul>
-    <li><a href="#makefile">Setting up the build environment</a></li>
-    <li><a href="#basiccode">Basic code required</a></li>
-    <li><a href="#running">Running a pass with <tt>opt</tt></a></li>
-    </ul></li>
-  <li><a href="#passtype">Pass classes and requirements</a>
-     <ul>
-     <li><a href="#ImmutablePass">The <tt>ImmutablePass</tt> class</a></li>
-     <li><a href="#ModulePass">The <tt>ModulePass</tt> class</a>
-        <ul>
-        <li><a href="#runOnModule">The <tt>runOnModule</tt> method</a></li>
-        </ul></li>
-     <li><a href="#CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
-        <ul>
-        <li><a href="#doInitialization_scc">The <tt>doInitialization(CallGraph
-                                           &amp;)</tt> method</a></li>
-        <li><a href="#runOnSCC">The <tt>runOnSCC</tt> method</a></li>
-        <li><a href="#doFinalization_scc">The <tt>doFinalization(CallGraph
-                                           &amp;)</tt> method</a></li>
-        </ul></li>
-     <li><a href="#FunctionPass">The <tt>FunctionPass</tt> class</a>
-        <ul>
-        <li><a href="#doInitialization_mod">The <tt>doInitialization(Module
-                                            &amp;)</tt> method</a></li>
-        <li><a href="#runOnFunction">The <tt>runOnFunction</tt> method</a></li>
-        <li><a href="#doFinalization_mod">The <tt>doFinalization(Module
-                                            &amp;)</tt> method</a></li>
-        </ul></li>
-     <li><a href="#LoopPass">The <tt>LoopPass</tt> class</a>
-        <ul>
-        <li><a href="#doInitialization_loop">The <tt>doInitialization(Loop *,
-                                            LPPassManager &amp;)</tt> method</a></li>
-        <li><a href="#runOnLoop">The <tt>runOnLoop</tt> method</a></li>
-        <li><a href="#doFinalization_loop">The <tt>doFinalization()
-                                            </tt> method</a></li>
-        </ul></li>
-     <li><a href="#RegionPass">The <tt>RegionPass</tt> class</a>
-        <ul>
-        <li><a href="#doInitialization_region">The <tt>doInitialization(Region *,
-                                            RGPassManager &amp;)</tt> method</a></li>
-        <li><a href="#runOnRegion">The <tt>runOnRegion</tt> method</a></li>
-        <li><a href="#doFinalization_region">The <tt>doFinalization()
-                                            </tt> method</a></li>
-        </ul></li>
-     <li><a href="#BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
-        <ul>
-        <li><a href="#doInitialization_fn">The <tt>doInitialization(Function
-                                             &amp;)</tt> method</a></li>
-        <li><a href="#runOnBasicBlock">The <tt>runOnBasicBlock</tt>
-                                       method</a></li>
-        <li><a href="#doFinalization_fn">The <tt>doFinalization(Function
-                                         &amp;)</tt> method</a></li>
-        </ul></li>
-     <li><a href="#MachineFunctionPass">The <tt>MachineFunctionPass</tt>
-                                        class</a>
-        <ul>
-        <li><a href="#runOnMachineFunction">The
-            <tt>runOnMachineFunction(MachineFunction &amp;)</tt> method</a></li>
-        </ul></li>
-     </ul>
-  <li><a href="#registration">Pass Registration</a>
-     <ul>
-     <li><a href="#print">The <tt>print</tt> method</a></li>
-     </ul></li>
-  <li><a href="#interaction">Specifying interactions between passes</a>
-     <ul>
-     <li><a href="#getAnalysisUsage">The <tt>getAnalysisUsage</tt> 
-                                     method</a></li>
-     <li><a href="#AU::addRequired">The <tt>AnalysisUsage::addRequired&lt;&gt;</tt> and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods</a></li>
-     <li><a href="#AU::addPreserved">The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method</a></li>
-     <li><a href="#AU::examples">Example implementations of <tt>getAnalysisUsage</tt></a></li>
-     <li><a href="#getAnalysis">The <tt>getAnalysis&lt;&gt;</tt> and
-<tt>getAnalysisIfAvailable&lt;&gt;</tt> methods</a></li>
-     </ul></li>
-  <li><a href="#analysisgroup">Implementing Analysis Groups</a>
-     <ul>
-     <li><a href="#agconcepts">Analysis Group Concepts</a></li>
-     <li><a href="#registerag">Using <tt>RegisterAnalysisGroup</tt></a></li>
-     </ul></li>
-  <li><a href="#passStatistics">Pass Statistics</a>
-  <li><a href="#passmanager">What PassManager does</a>
-    <ul>
-    <li><a href="#releaseMemory">The <tt>releaseMemory</tt> method</a></li>
-    </ul></li>
-  <li><a href="#registering">Registering dynamically loaded passes</a>
-    <ul>
-      <li><a href="#registering_existing">Using existing registries</a></li>
-      <li><a href="#registering_new">Creating new registries</a></li>
-    </ul></li>
-  <li><a href="#debughints">Using GDB with dynamically loaded passes</a>
-    <ul>
-    <li><a href="#breakpoint">Setting a breakpoint in your pass</a></li>
-    <li><a href="#debugmisc">Miscellaneous Problems</a></li>
-    </ul></li>
-  <li><a href="#future">Future extensions planned</a>
-    <ul>
-    <li><a href="#SMP">Multithreaded LLVM</a></li>
-    </ul></li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
-  <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction - What is a pass?</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM Pass Framework is an important part of the LLVM system, because LLVM
-passes are where most of the interesting parts of the compiler exist.  Passes
-perform the transformations and optimizations that make up the compiler, they
-build the analysis results that are used by these transformations, and they are,
-above all, a structuring technique for compiler code.</p>
-
-<p>All LLVM passes are subclasses of the <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>
-class, which implement functionality by overriding virtual methods inherited
-from <tt>Pass</tt>.  Depending on how your pass works, you should inherit from
-the <tt><a href="#ModulePass">ModulePass</a></tt>, <tt><a
-href="#CallGraphSCCPass">CallGraphSCCPass</a></tt>, <tt><a
-href="#FunctionPass">FunctionPass</a></tt>, or <tt><a
-href="#LoopPass">LoopPass</a></tt>, or <tt><a
-href="#RegionPass">RegionPass</a></tt>, or <tt><a
-href="#BasicBlockPass">BasicBlockPass</a></tt> classes, which gives the system
-more information about what your pass does, and how it can be combined with
-other passes.  One of the main features of the LLVM Pass Framework is that it
-schedules passes to run in an efficient way based on the constraints that your
-pass meets (which are indicated by which class they derive from).</p>
-
-<p>We start by showing you how to construct a pass, everything from setting up
-the code, to compiling, loading, and executing it.  After the basics are down,
-more advanced features are discussed.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="quickstart">Quick Start - Writing hello world</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Here we describe how to write the "hello world" of passes.  The "Hello" pass
-is designed to simply print out the name of non-external functions that exist in
-the program being compiled.  It does not modify the program at all, it just
-inspects it.  The source code and files for this pass are available in the LLVM
-source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="makefile">Setting up the build environment</a>
-</h3>
-
-<div>
-
-  <p>First, configure and build LLVM.  This needs to be done directly inside the
-  LLVM source tree rather than in a separate objects directory.
-  Next, you need to create a new directory somewhere in the LLVM source 
-  base.  For this example, we'll assume that you made 
-  <tt>lib/Transforms/Hello</tt>.  Finally, you must set up a build script 
-  (Makefile) that will compile the source code for the new pass.  To do this, 
-  copy the following into <tt>Makefile</tt>:</p>
-  <hr>
-
-<div class="doc_code"><pre>
-# Makefile for hello pass
-
-# Path to top level of LLVM hierarchy
-LEVEL = ../../..
-
-# Name of the library to build
-LIBRARYNAME = Hello
-
-# Make the shared library become a loadable module so the tools can 
-# dlopen/dlsym on the resulting library.
-LOADABLE_MODULE = 1
-
-# Include the makefile implementation stuff
-include $(LEVEL)/Makefile.common
-</pre></div>
-
-<p>This makefile specifies that all of the <tt>.cpp</tt> files in the current
-directory are to be compiled and linked together into a shared object
-<tt>$(LEVEL)/Debug+Asserts/lib/Hello.so</tt> that can be dynamically loaded by
-the <tt>opt</tt> or <tt>bugpoint</tt> tools via their <tt>-load</tt> options.  
-If your operating system uses a suffix other than .so (such as windows or 
-Mac OS/X), the appropriate extension will be used.</p>
-
-<p>If you are used CMake to build LLVM, see
-<a href="CMake.html#passdev">Developing an LLVM pass with CMake</a>.</p>
-
-<p>Now that we have the build scripts set up, we just need to write the code for
-the pass itself.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="basiccode">Basic code required</a>
-</h3>
-
-<div>
-
-<p>Now that we have a way to compile our new pass, we just have to write it.
-Start out with:</p>
-
-<div class="doc_code">
-<pre>
-<b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
-<b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
-<b>#include</b> "<a href="http://llvm.org/doxygen/raw__ostream_8h.html">llvm/Support/raw_ostream.h</a>"
-</pre>
-</div>
-
-<p>Which are needed because we are writing a <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>,
-we are operating on <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1Function.html">Function</a></tt>'s,
-and we will be doing some printing.</p>
-
-<p>Next we have:</p>
-
-<div class="doc_code">
-<pre>
-<b>using namespace llvm;</b>
-</pre>
-</div>
-
-<p>... which is required because the functions from the include files 
-live in the llvm namespace.</p>
-
-<p>Next we have:</p>
-
-<div class="doc_code">
-<pre>
-<b>namespace</b> {
-</pre>
-</div>
-
-<p>... which starts out an anonymous namespace.  Anonymous namespaces are to C++
-what the "<tt>static</tt>" keyword is to C (at global scope).  It makes the
-things declared inside of the anonymous namespace visible only to the current
-file.  If you're not familiar with them, consult a decent C++ book for more
-information.</p>
-
-<p>Next, we declare our pass itself:</p>
-
-<div class="doc_code">
-<pre>
-  <b>struct</b> Hello : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
-</pre>
-</div>
-
-<p>This declares a "<tt>Hello</tt>" class that is a subclass of <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1FunctionPass.html">FunctionPass</a></tt>.
-The different builtin pass subclasses are described in detail <a
-href="#passtype">later</a>, but for now, know that <a
-href="#FunctionPass"><tt>FunctionPass</tt></a>'s operate on a function at a
-time.</p>
-
-<div class="doc_code">
-<pre>
-    static char ID;
-    Hello() : FunctionPass(ID) {}
-</pre>
-</div>
-
-<p>This declares pass identifier used by LLVM to identify pass. This allows LLVM
-to avoid using expensive C++ runtime information.</p>
-
-<div class="doc_code">
-<pre>
-    <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
-      errs() &lt;&lt; "<i>Hello: </i>";
-      errs().write_escaped(F.getName()) &lt;&lt; "\n";
-      <b>return false</b>;
-    }
-  };  <i>// end of struct Hello</i>
-}  <i>// end of anonymous namespace</i>
-</pre>
-</div>
-
-<p>We declare a "<a href="#runOnFunction"><tt>runOnFunction</tt></a>" method,
-which overloads an abstract virtual method inherited from <a
-href="#FunctionPass"><tt>FunctionPass</tt></a>.  This is where we are supposed
-to do our thing, so we just print out our message with the name of each
-function.</p>
-
-<div class="doc_code">
-<pre>
-char Hello::ID = 0;
-</pre>
-</div>
-
-<p>We initialize pass ID here. LLVM uses ID's address to identify a pass, so
-initialization value is not important.</p>
-
-<div class="doc_code">
-<pre>
-static RegisterPass&lt;Hello&gt; X("<i>hello</i>", "<i>Hello World Pass</i>",
-                             false /* Only looks at CFG */,
-                             false /* Analysis Pass */);
-</pre>
-</div>
-
-<p>Lastly, we <a href="#registration">register our class</a> <tt>Hello</tt>,
-giving it a command line argument "<tt>hello</tt>", and a name "<tt>Hello World
-Pass</tt>". The last two arguments describe its behavior: if a pass walks CFG
-without modifying it then the third argument is set to <tt>true</tt>; if a pass
-is an analysis pass, for example dominator tree pass, then <tt>true</tt> is
-supplied as the fourth argument.</p>
-
-<p>As a whole, the <tt>.cpp</tt> file looks like:</p>
-
-<div class="doc_code">
-<pre>
-<b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
-<b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
-<b>#include</b> "<a href="http://llvm.org/doxygen/raw__ostream_8h.html">llvm/Support/raw_ostream.h</a>"
-
-<b>using namespace llvm;</b>
-
-<b>namespace</b> {
-  <b>struct Hello</b> : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
-    
-    static char ID;
-    Hello() : FunctionPass(ID) {}
-
-    <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
-      errs() &lt;&lt; "<i>Hello: </i>";
-      errs().write_escaped(F.getName()) &lt;&lt; '\n';
-      <b>return false</b>;
-    }
-
-  };
-}
-  
-char Hello::ID = 0;
-static RegisterPass&lt;Hello&gt; X("hello", "Hello World Pass", false, false);
-</pre>
-</div>
-
-<p>Now that it's all together, compile the file with a simple "<tt>gmake</tt>"
-command in the local directory and you should get a new file
-"<tt>Debug+Asserts/lib/Hello.so</tt>" under the top level directory of the LLVM
-source tree (not in the local directory).  Note that everything in this file is
-contained in an anonymous namespace &mdash; this reflects the fact that passes
-are self contained units that do not need external interfaces (although they can
-have them) to be useful.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="running">Running a pass with <tt>opt</tt></a>
-</h3>
-
-<div>
-
-<p>Now that you have a brand new shiny shared object file, we can use the
-<tt>opt</tt> command to run an LLVM program through your pass.  Because you
-registered your pass with <tt>RegisterPass</tt>, you will be able to
-use the <tt>opt</tt> tool to access it, once loaded.</p>
-
-<p>To test it, follow the example at the end of the <a
-href="GettingStarted.html">Getting Started Guide</a> to compile "Hello World" to
-LLVM.  We can now run the bitcode file (<tt>hello.bc</tt>) for the program
-through our transformation like this (or course, any bitcode file will
-work):</p>
-
-<div class="doc_code"><pre>
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello &lt; hello.bc &gt; /dev/null
-Hello: __main
-Hello: puts
-Hello: main
-</pre></div>
-
-<p>The '<tt>-load</tt>' option specifies that '<tt>opt</tt>' should load your
-pass as a shared object, which makes '<tt>-hello</tt>' a valid command line
-argument (which is one reason you need to <a href="#registration">register your
-pass</a>).  Because the hello pass does not modify the program in any
-interesting way, we just throw away the result of <tt>opt</tt> (sending it to
-<tt>/dev/null</tt>).</p>
-
-<p>To see what happened to the other string you registered, try running
-<tt>opt</tt> with the <tt>-help</tt> option:</p>
-
-<div class="doc_code"><pre>
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -help
-OVERVIEW: llvm .bc -&gt; .bc modular optimizer
-
-USAGE: opt [options] &lt;input bitcode&gt;
-
-OPTIONS:
-  Optimizations available:
-...
-    -globalopt                - Global Variable Optimizer
-    -globalsmodref-aa         - Simple mod/ref analysis for globals
-    -gvn                      - Global Value Numbering
-    <b>-hello                    - Hello World Pass</b>
-    -indvars                  - Induction Variable Simplification
-    -inline                   - Function Integration/Inlining
-    -insert-edge-profiling    - Insert instrumentation for edge profiling
-...
-</pre></div>
-
-<p>The pass name gets added as the information string for your pass, giving some
-documentation to users of <tt>opt</tt>.  Now that you have a working pass, you
-would go ahead and make it do the cool transformations you want.  Once you get
-it all working and tested, it may become useful to find out how fast your pass
-is.  The <a href="#passManager"><tt>PassManager</tt></a> provides a nice command
-line option (<tt>--time-passes</tt>) that allows you to get information about
-the execution time of your pass along with the other passes you queue up.  For
-example:</p>
-
-<div class="doc_code"><pre>
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes &lt; hello.bc &gt; /dev/null
-Hello: __main
-Hello: puts
-Hello: main
-===============================================================================
-                      ... Pass execution timing report ...
-===============================================================================
-  Total Execution Time: 0.02 seconds (0.0479059 wall clock)
-
-   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Pass Name ---
-   0.0100 (100.0%)   0.0000 (  0.0%)   0.0100 ( 50.0%)   0.0402 ( 84.0%)  Bitcode Writer
-   0.0000 (  0.0%)   0.0100 (100.0%)   0.0100 ( 50.0%)   0.0031 (  6.4%)  Dominator Set Construction
-   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0013 (  2.7%)  Module Verifier
- <b>  0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0033 (  6.9%)  Hello World Pass</b>
-   0.0100 (100.0%)   0.0100 (100.0%)   0.0200 (100.0%)   0.0479 (100.0%)  TOTAL
-</pre></div>
-
-<p>As you can see, our implementation above is pretty fast :).  The additional
-passes listed are automatically inserted by the '<tt>opt</tt>' tool to verify
-that the LLVM emitted by your pass is still valid and well formed LLVM, which
-hasn't been broken somehow.</p>
-
-<p>Now that you have seen the basics of the mechanics behind passes, we can talk
-about some more details of how they work and how to use them.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="passtype">Pass classes and requirements</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>One of the first things that you should do when designing a new pass is to
-decide what class you should subclass for your pass.  The <a
-href="#basiccode">Hello World</a> example uses the <tt><a
-href="#FunctionPass">FunctionPass</a></tt> class for its implementation, but we
-did not discuss why or when this should occur.  Here we talk about the classes
-available, from the most general to the most specific.</p>
-
-<p>When choosing a superclass for your Pass, you should choose the <b>most
-specific</b> class possible, while still being able to meet the requirements
-listed.  This gives the LLVM Pass Infrastructure information necessary to
-optimize how passes are run, so that the resultant compiler isn't unnecessarily
-slow.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ImmutablePass">The <tt>ImmutablePass</tt> class</a>
-</h3>
-
-<div>
-
-<p>The most plain and boring type of pass is the "<tt><a
-href="http://llvm.org/doxygen/classllvm_1_1ImmutablePass.html">ImmutablePass</a></tt>"
-class.  This pass type is used for passes that do not have to be run, do not
-change state, and never need to be updated.  This is not a normal type of
-transformation or analysis, but can provide information about the current
-compiler configuration.</p>
-
-<p>Although this pass class is very infrequently used, it is important for
-providing information about the current target machine being compiled for, and
-other static information that can affect the various transformations.</p>
-
-<p><tt>ImmutablePass</tt>es never invalidate other transformations, are never
-invalidated, and are never "run".</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ModulePass">The <tt>ModulePass</tt> class</a>
-</h3>
-
-<div>
-
-<p>The "<tt><a
-href="http://llvm.org/doxygen/classllvm_1_1ModulePass.html">ModulePass</a></tt>"
-class is the most general of all superclasses that you can use.  Deriving from
-<tt>ModulePass</tt> indicates that your pass uses the entire program as a unit,
-referring to function bodies in no predictable order, or adding and removing
-functions.  Because nothing is known about the behavior of <tt>ModulePass</tt>
-subclasses, no optimization can be done for their execution.</p>
-
-<p>A module pass can use function level passes (e.g. dominators) using
-the getAnalysis interface
-<tt>getAnalysis&lt;DominatorTree&gt;(llvm::Function *)</tt> to provide the
-function to retrieve analysis result for, if the function pass does not require
-any module or immutable passes. Note that this can only be done for functions for which the
-analysis ran, e.g. in the case of dominators you should only ask for the
-DominatorTree for function definitions, not declarations.</p>
-
-<p>To write a correct <tt>ModulePass</tt> subclass, derive from
-<tt>ModulePass</tt> and overload the <tt>runOnModule</tt> method with the
-following signature:</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnModule">The <tt>runOnModule</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnModule(Module &amp;M) = 0;
-</pre></div>
-
-<p>The <tt>runOnModule</tt> method performs the interesting work of the pass.
-It should return true if the module was modified by the transformation and
-false otherwise.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
-</h3>
-
-<div>
-
-<p>The "<tt><a
-href="http://llvm.org/doxygen/classllvm_1_1CallGraphSCCPass.html">CallGraphSCCPass</a></tt>"
-is used by passes that need to traverse the program bottom-up on the call graph
-(callees before callers).  Deriving from CallGraphSCCPass provides some
-mechanics for building and traversing the CallGraph, but also allows the system
-to optimize execution of CallGraphSCCPass's.  If your pass meets the
-requirements outlined below, and doesn't meet the requirements of a <tt><a
-href="#FunctionPass">FunctionPass</a></tt> or <tt><a
-href="#BasicBlockPass">BasicBlockPass</a></tt>, you should derive from
-<tt>CallGraphSCCPass</tt>.</p>
-
-<p><b>TODO</b>: explain briefly what SCC, Tarjan's algo, and B-U mean.</p>
-
-<p>To be explicit, <tt>CallGraphSCCPass</tt> subclasses are:</p>
-
-<ol>
-
-<li>... <em>not allowed</em> to inspect or modify any <tt>Function</tt>s other
-than those in the current SCC and the direct callers and direct callees of the
-SCC.</li>
-
-<li>... <em>required</em> to preserve the current CallGraph object, updating it
-to reflect any changes made to the program.</li>
-
-<li>... <em>not allowed</em> to add or remove SCC's from the current Module,
-though they may change the contents of an SCC.</li>
-
-<li>... <em>allowed</em> to add or remove global variables from the current
-Module.</li>
-
-<li>... <em>allowed</em> to maintain state across invocations of
-    <a href="#runOnSCC"><tt>runOnSCC</tt></a> (including global data).</li>
-</ol>
-
-<p>Implementing a <tt>CallGraphSCCPass</tt> is slightly tricky in some cases
-because it has to handle SCCs with more than one node in it.  All of the virtual
-methods described below should return true if they modified the program, or
-false if they didn't.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doInitialization_scc">
-    The <tt>doInitialization(CallGraph &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doInitialization(CallGraph &amp;CG);
-</pre></div>
-
-<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
-<tt>CallGraphSCCPass</tt>'s are not allowed to do.  They can add and remove
-functions, get pointers to functions, etc.  The <tt>doInitialization</tt> method
-is designed to do simple initialization type of stuff that does not depend on
-the SCCs being processed.  The <tt>doInitialization</tt> method call is not
-scheduled to overlap with any other pass executions (thus it should be very
-fast).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnSCC">The <tt>runOnSCC</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnSCC(CallGraphSCC &amp;SCC) = 0;
-</pre></div>
-
-<p>The <tt>runOnSCC</tt> method performs the interesting work of the pass, and
-should return true if the module was modified by the transformation, false
-otherwise.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doFinalization_scc">
-    The <tt>doFinalization(CallGraph &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doFinalization(CallGraph &amp;CG);
-</pre></div>
-
-<p>The <tt>doFinalization</tt> method is an infrequently used method that is
-called when the pass framework has finished calling <a
-href="#runOnFunction"><tt>runOnFunction</tt></a> for every function in the
-program being compiled.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="FunctionPass">The <tt>FunctionPass</tt> class</a>
-</h3>
-
-<div>
-
-<p>In contrast to <tt>ModulePass</tt> subclasses, <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1Pass.html">FunctionPass</a></tt>
-subclasses do have a predictable, local behavior that can be expected by the
-system.  All <tt>FunctionPass</tt> execute on each function in the program
-independent of all of the other functions in the program.
-<tt>FunctionPass</tt>'s do not require that they are executed in a particular
-order, and <tt>FunctionPass</tt>'s do not modify external functions.</p>
-
-<p>To be explicit, <tt>FunctionPass</tt> subclasses are not allowed to:</p>
-
-<ol>
-<li>Modify a Function other than the one currently being processed.</li>
-<li>Add or remove Function's from the current Module.</li>
-<li>Add or remove global variables from the current Module.</li>
-<li>Maintain state across invocations of
-    <a href="#runOnFunction"><tt>runOnFunction</tt></a> (including global data)</li>
-</ol>
-
-<p>Implementing a <tt>FunctionPass</tt> is usually straightforward (See the <a
-href="#basiccode">Hello World</a> pass for example).  <tt>FunctionPass</tt>'s
-may overload three virtual methods to do their work.  All of these methods
-should return true if they modified the program, or false if they didn't.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doInitialization_mod">
-    The <tt>doInitialization(Module &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doInitialization(Module &amp;M);
-</pre></div>
-
-<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
-<tt>FunctionPass</tt>'s are not allowed to do.  They can add and remove
-functions, get pointers to functions, etc.  The <tt>doInitialization</tt> method
-is designed to do simple initialization type of stuff that does not depend on
-the functions being processed.  The <tt>doInitialization</tt> method call is not
-scheduled to overlap with any other pass executions (thus it should be very
-fast).</p>
-
-<p>A good example of how this method should be used is the <a
-href="http://llvm.org/doxygen/LowerAllocations_8cpp-source.html">LowerAllocations</a>
-pass.  This pass converts <tt>malloc</tt> and <tt>free</tt> instructions into
-platform dependent <tt>malloc()</tt> and <tt>free()</tt> function calls.  It
-uses the <tt>doInitialization</tt> method to get a reference to the malloc and
-free functions that it needs, adding prototypes to the module if necessary.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnFunction">The <tt>runOnFunction</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnFunction(Function &amp;F) = 0;
-</pre></div><p>
-
-<p>The <tt>runOnFunction</tt> method must be implemented by your subclass to do
-the transformation or analysis work of your pass.  As usual, a true value should
-be returned if the function is modified.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doFinalization_mod">
-    The <tt>doFinalization(Module &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doFinalization(Module &amp;M);
-</pre></div>
-
-<p>The <tt>doFinalization</tt> method is an infrequently used method that is
-called when the pass framework has finished calling <a
-href="#runOnFunction"><tt>runOnFunction</tt></a> for every function in the
-program being compiled.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="LoopPass">The <tt>LoopPass</tt> class </a>
-</h3>
-
-<div>
-
-<p> All <tt>LoopPass</tt> execute on each loop in the function independent of
-all of the other loops in the function. <tt>LoopPass</tt> processes loops in
-loop nest order such that outer most loop is processed last. </p>
-
-<p> <tt>LoopPass</tt> subclasses are allowed to update loop nest using
-<tt>LPPassManager</tt> interface. Implementing a loop pass is usually
-straightforward. <tt>LoopPass</tt>'s may overload three virtual methods to
-do their work. All these methods should return true if they modified the 
-program, or false if they didn't. </p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doInitialization_loop">
-    The <tt>doInitialization(Loop *,LPPassManager &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doInitialization(Loop *, LPPassManager &amp;LPM);
-</pre></div>
-
-<p>The <tt>doInitialization</tt> method is designed to do simple initialization 
-type of stuff that does not depend on the functions being processed.  The 
-<tt>doInitialization</tt> method call is not scheduled to overlap with any 
-other pass executions (thus it should be very fast). LPPassManager 
-interface should be used to access Function or Module level analysis
-information.</p>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnLoop">The <tt>runOnLoop</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnLoop(Loop *, LPPassManager &amp;LPM) = 0;
-</pre></div><p>
-
-<p>The <tt>runOnLoop</tt> method must be implemented by your subclass to do
-the transformation or analysis work of your pass.  As usual, a true value should
-be returned if the function is modified. <tt>LPPassManager</tt> interface
-should be used to update loop nest.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doFinalization_loop">The <tt>doFinalization()</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doFinalization();
-</pre></div>
-
-<p>The <tt>doFinalization</tt> method is an infrequently used method that is
-called when the pass framework has finished calling <a
-href="#runOnLoop"><tt>runOnLoop</tt></a> for every loop in the
-program being compiled. </p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="RegionPass">The <tt>RegionPass</tt> class </a>
-</h3>
-
-<div>
-
-<p> <tt>RegionPass</tt> is similar to <a href="#LoopPass"><tt>LoopPass</tt></a>,
-but executes on each single entry single exit region in the function.
-<tt>RegionPass</tt> processes regions in nested order such that the outer most
-region is processed last.  </p>
-
-<p> <tt>RegionPass</tt> subclasses are allowed to update the region tree by using
-the <tt>RGPassManager</tt> interface. You may overload three virtual methods of
-<tt>RegionPass</tt> to implement your own region pass. All these
-methods should return true if they modified the program, or false if they didn not.
-</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doInitialization_region">
-    The <tt>doInitialization(Region *, RGPassManager &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doInitialization(Region *, RGPassManager &amp;RGM);
-</pre></div>
-
-<p>The <tt>doInitialization</tt> method is designed to do simple initialization
-type of stuff that does not depend on the functions being processed.  The
-<tt>doInitialization</tt> method call is not scheduled to overlap with any
-other pass executions (thus it should be very fast). RPPassManager
-interface should be used to access Function or Module level analysis
-information.</p>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnRegion">The <tt>runOnRegion</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnRegion(Region *, RGPassManager &amp;RGM) = 0;
-</pre></div><p>
-
-<p>The <tt>runOnRegion</tt> method must be implemented by your subclass to do
-the transformation or analysis work of your pass.  As usual, a true value should
-be returned if the region is modified. <tt>RGPassManager</tt> interface
-should be used to update region tree.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doFinalization_region">The <tt>doFinalization()</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doFinalization();
-</pre></div>
-
-<p>The <tt>doFinalization</tt> method is an infrequently used method that is
-called when the pass framework has finished calling <a
-href="#runOnRegion"><tt>runOnRegion</tt></a> for every region in the
-program being compiled. </p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>BasicBlockPass</tt>'s are just like <a
-href="#FunctionPass"><tt>FunctionPass</tt></a>'s, except that they must limit
-their scope of inspection and modification to a single basic block at a time.
-As such, they are <b>not</b> allowed to do any of the following:</p>
-
-<ol>
-<li>Modify or inspect any basic blocks outside of the current one</li>
-<li>Maintain state across invocations of
-    <a href="#runOnBasicBlock"><tt>runOnBasicBlock</tt></a></li>
-<li>Modify the control flow graph (by altering terminator instructions)</li>
-<li>Any of the things forbidden for
-    <a href="#FunctionPass"><tt>FunctionPass</tt></a>es.</li>
-</ol>
-
-<p><tt>BasicBlockPass</tt>es are useful for traditional local and "peephole"
-optimizations.  They may override the same <a
-href="#doInitialization_mod"><tt>doInitialization(Module &amp;)</tt></a> and <a
-href="#doFinalization_mod"><tt>doFinalization(Module &amp;)</tt></a> methods that <a
-href="#FunctionPass"><tt>FunctionPass</tt></a>'s have, but also have the following virtual methods that may also be implemented:</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doInitialization_fn">
-    The <tt>doInitialization(Function &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doInitialization(Function &amp;F);
-</pre></div>
-
-<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
-<tt>BasicBlockPass</tt>'s are not allowed to do, but that
-<tt>FunctionPass</tt>'s can.  The <tt>doInitialization</tt> method is designed
-to do simple initialization that does not depend on the
-BasicBlocks being processed.  The <tt>doInitialization</tt> method call is not
-scheduled to overlap with any other pass executions (thus it should be very
-fast).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnBasicBlock">The <tt>runOnBasicBlock</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnBasicBlock(BasicBlock &amp;BB) = 0;
-</pre></div>
-
-<p>Override this function to do the work of the <tt>BasicBlockPass</tt>.  This
-function is not allowed to inspect or modify basic blocks other than the
-parameter, and are not allowed to modify the CFG.  A true value must be returned
-if the basic block is modified.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="doFinalization_fn">
-    The <tt>doFinalization(Function &amp;)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> doFinalization(Function &amp;F);
-</pre></div>
-
-<p>The <tt>doFinalization</tt> method is an infrequently used method that is
-called when the pass framework has finished calling <a
-href="#runOnBasicBlock"><tt>runOnBasicBlock</tt></a> for every BasicBlock in the
-program being compiled.  This can be used to perform per-function
-finalization.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="MachineFunctionPass">The <tt>MachineFunctionPass</tt> class</a>
-</h3>
-
-<div>
-
-<p>A <tt>MachineFunctionPass</tt> is a part of the LLVM code generator that
-executes on the machine-dependent representation of each LLVM function in the
-program.</p>
-
-<p>Code generator passes are registered and initialized specially by
-<tt>TargetMachine::addPassesToEmitFile</tt> and similar routines, so they
-cannot generally be run from the <tt>opt</tt> or <tt>bugpoint</tt>
-commands.</p>
-
-<p>A <tt>MachineFunctionPass</tt> is also a <tt>FunctionPass</tt>, so all
-the restrictions that apply to a <tt>FunctionPass</tt> also apply to it.
-<tt>MachineFunctionPass</tt>es also have additional restrictions. In particular,
-<tt>MachineFunctionPass</tt>es are not allowed to do any of the following:</p>
-
-<ol>
-<li>Modify or create any LLVM IR Instructions, BasicBlocks, Arguments,
-    Functions, GlobalVariables, GlobalAliases, or Modules.</li>
-<li>Modify a MachineFunction other than the one currently being processed.</li>
-<li>Maintain state across invocations of <a
-href="#runOnMachineFunction"><tt>runOnMachineFunction</tt></a> (including global
-data)</li>
-</ol>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="runOnMachineFunction">
-    The <tt>runOnMachineFunction(MachineFunction &amp;MF)</tt> method
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual bool</b> runOnMachineFunction(MachineFunction &amp;MF) = 0;
-</pre></div>
-
-<p><tt>runOnMachineFunction</tt> can be considered the main entry point of a
-<tt>MachineFunctionPass</tt>; that is, you should override this method to do the
-work of your <tt>MachineFunctionPass</tt>.</p>
-
-<p>The <tt>runOnMachineFunction</tt> method is called on every
-<tt>MachineFunction</tt> in a <tt>Module</tt>, so that the
-<tt>MachineFunctionPass</tt> may perform optimizations on the machine-dependent
-representation of the function. If you want to get at the LLVM <tt>Function</tt>
-for the <tt>MachineFunction</tt> you're working on, use
-<tt>MachineFunction</tt>'s <tt>getFunction()</tt> accessor method -- but
-remember, you may not modify the LLVM <tt>Function</tt> or its contents from a
-<tt>MachineFunctionPass</tt>.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="registration">Pass registration</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>In the <a href="#basiccode">Hello World</a> example pass we illustrated how
-pass registration works, and discussed some of the reasons that it is used and
-what it does.  Here we discuss how and why passes are registered.</p>
-
-<p>As we saw above, passes are registered with the <b><tt>RegisterPass</tt></b>
-template.  The template parameter is the name of the pass that is to be used on
-the command line to specify that the pass should be added to a program (for
-example, with <tt>opt</tt> or <tt>bugpoint</tt>).  The first argument is the
-name of the pass, which is to be used for the <tt>-help</tt> output of
-programs, as
-well as for debug output generated by the <tt>--debug-pass</tt> option.</p>
-
-<p>If you want your pass to be easily dumpable, you should 
-implement the virtual <tt>print</tt> method:</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="print">The <tt>print</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual void</b> print(std::ostream &amp;O, <b>const</b> Module *M) <b>const</b>;
-</pre></div>
-
-<p>The <tt>print</tt> method must be implemented by "analyses" in order to print
-a human readable version of the analysis results.  This is useful for debugging
-an analysis itself, as well as for other people to figure out how an analysis
-works.  Use the <tt>opt -analyze</tt> argument to invoke this method.</p>
-
-<p>The <tt>llvm::OStream</tt> parameter specifies the stream to write the results on,
-and the <tt>Module</tt> parameter gives a pointer to the top level module of the
-program that has been analyzed.  Note however that this pointer may be null in
-certain circumstances (such as calling the <tt>Pass::dump()</tt> from a
-debugger), so it should only be used to enhance debug output, it should not be
-depended on.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="interaction">Specifying interactions between passes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>One of the main responsibilities of the <tt>PassManager</tt> is to make sure
-that passes interact with each other correctly.  Because <tt>PassManager</tt>
-tries to <a href="#passmanager">optimize the execution of passes</a> it must
-know how the passes interact with each other and what dependencies exist between
-the various passes.  To track this, each pass can declare the set of passes that
-are required to be executed before the current pass, and the passes which are
-invalidated by the current pass.</p>
-
-<p>Typically this functionality is used to require that analysis results are
-computed before your pass is run.  Running arbitrary transformation passes can
-invalidate the computed analysis results, which is what the invalidation set
-specifies.  If a pass does not implement the <tt><a
-href="#getAnalysisUsage">getAnalysisUsage</a></tt> method, it defaults to not
-having any prerequisite passes, and invalidating <b>all</b> other passes.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="getAnalysisUsage">The <tt>getAnalysisUsage</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;Info) <b>const</b>;
-</pre></div>
-
-<p>By implementing the <tt>getAnalysisUsage</tt> method, the required and
-invalidated sets may be specified for your transformation.  The implementation
-should fill in the <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1AnalysisUsage.html">AnalysisUsage</a></tt>
-object with information about which passes are required and not invalidated.  To
-do this, a pass may call any of the following methods on the AnalysisUsage
-object:</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="AU::addRequired">
-    The <tt>AnalysisUsage::addRequired&lt;&gt;</tt>
-    and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods
-  </a>
-</h4>
-
-<div>
-<p>
-If your pass requires a previous pass to be executed (an analysis for example),
-it can use one of these methods to arrange for it to be run before your pass.
-LLVM has many different types of analyses and passes that can be required,
-spanning the range from <tt>DominatorSet</tt> to <tt>BreakCriticalEdges</tt>.
-Requiring <tt>BreakCriticalEdges</tt>, for example, guarantees that there will
-be no critical edges in the CFG when your pass has been run.
-</p>
-
-<p>
-Some analyses chain to other analyses to do their job.  For example, an <a
-href="AliasAnalysis.html">AliasAnalysis</a> implementation is required to <a
-href="AliasAnalysis.html#chaining">chain</a> to other alias analysis passes.  In
-cases where analyses chain, the <tt>addRequiredTransitive</tt> method should be
-used instead of the <tt>addRequired</tt> method.  This informs the PassManager
-that the transitively required pass should be alive as long as the requiring
-pass is.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="AU::addPreserved">
-    The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method
-  </a>
-</h4>
-
-<div>
-<p>
-One of the jobs of the PassManager is to optimize how and when analyses are run.
-In particular, it attempts to avoid recomputing data unless it needs to.  For
-this reason, passes are allowed to declare that they preserve (i.e., they don't
-invalidate) an existing analysis if it's available.  For example, a simple
-constant folding pass would not modify the CFG, so it can't possibly affect the
-results of dominator analysis.  By default, all passes are assumed to invalidate
-all others.
-</p>
-
-<p>
-The <tt>AnalysisUsage</tt> class provides several methods which are useful in
-certain circumstances that are related to <tt>addPreserved</tt>.  In particular,
-the <tt>setPreservesAll</tt> method can be called to indicate that the pass does
-not modify the LLVM program at all (which is true for analyses), and the
-<tt>setPreservesCFG</tt> method can be used by transformations that change
-instructions in the program but do not modify the CFG or terminator instructions
-(note that this property is implicitly set for <a
-href="#BasicBlockPass">BasicBlockPass</a>'s).
-</p>
-
-<p>
-<tt>addPreserved</tt> is particularly useful for transformations like
-<tt>BreakCriticalEdges</tt>.  This pass knows how to update a small set of loop
-and dominator related analyses if they exist, so it can preserve them, despite
-the fact that it hacks on the CFG.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="AU::examples">
-    Example implementations of <tt>getAnalysisUsage</tt>
-  </a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-<i>// This example modifies the program, but does not modify the CFG</i>
-<b>void</b> <a href="http://llvm.org/doxygen/structLICM.html">LICM</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
-  AU.setPreservesCFG();
-  AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1LoopInfo.html">LoopInfo</a>&gt;();
-}
-</pre></div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="getAnalysis">
-    The <tt>getAnalysis&lt;&gt;</tt> and
-    <tt>getAnalysisIfAvailable&lt;&gt;</tt> methods
-  </a>
-</h4>
-
-<div>
-
-<p>The <tt>Pass::getAnalysis&lt;&gt;</tt> method is automatically inherited by
-your class, providing you with access to the passes that you declared that you
-required with the <a href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a>
-method.  It takes a single template argument that specifies which pass class you
-want, and returns a reference to that pass.  For example:</p>
-
-<div class="doc_code"><pre>
-bool LICM::runOnFunction(Function &amp;F) {
-  LoopInfo &amp;LI = getAnalysis&lt;LoopInfo&gt;();
-  ...
-}
-</pre></div>
-
-<p>This method call returns a reference to the pass desired.  You may get a
-runtime assertion failure if you attempt to get an analysis that you did not
-declare as required in your <a
-href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> implementation.  This
-method can be called by your <tt>run*</tt> method implementation, or by any
-other local method invoked by your <tt>run*</tt> method.
-
-A module level pass can use function level analysis info using this interface.
-For example:</p>
-
-<div class="doc_code"><pre>
-bool ModuleLevelPass::runOnModule(Module &amp;M) {
-  ...
-  DominatorTree &amp;DT = getAnalysis&lt;DominatorTree&gt;(Func);
-  ...
-}
-</pre></div>
-
-<p>In above example, runOnFunction for DominatorTree is called by pass manager
-before returning a reference to the desired pass.</p>
-
-<p>
-If your pass is capable of updating analyses if they exist (e.g.,
-<tt>BreakCriticalEdges</tt>, as described above), you can use the
-<tt>getAnalysisIfAvailable</tt> method, which returns a pointer to the analysis
-if it is active.  For example:</p>
-
-<div class="doc_code"><pre>
-...
-if (DominatorSet *DS = getAnalysisIfAvailable&lt;DominatorSet&gt;()) {
-  <i>// A DominatorSet is active.  This code will update it.</i>
-}
-...
-</pre></div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="analysisgroup">Implementing Analysis Groups</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we understand the basics of how passes are defined, how they are
-used, and how they are required from other passes, it's time to get a little bit
-fancier.  All of the pass relationships that we have seen so far are very
-simple: one pass depends on one other specific pass to be run before it can run.
-For many applications, this is great, for others, more flexibility is
-required.</p>
-
-<p>In particular, some analyses are defined such that there is a single simple
-interface to the analysis results, but multiple ways of calculating them.
-Consider alias analysis for example.  The most trivial alias analysis returns
-"may alias" for any alias query.  The most sophisticated analysis a
-flow-sensitive, context-sensitive interprocedural analysis that can take a
-significant amount of time to execute (and obviously, there is a lot of room
-between these two extremes for other implementations).  To cleanly support
-situations like this, the LLVM Pass Infrastructure supports the notion of
-Analysis Groups.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="agconcepts">Analysis Group Concepts</a>
-</h4>
-
-<div>
-
-<p>An Analysis Group is a single simple interface that may be implemented by
-multiple different passes.  Analysis Groups can be given human readable names
-just like passes, but unlike passes, they need not derive from the <tt>Pass</tt>
-class.  An analysis group may have one or more implementations, one of which is
-the "default" implementation.</p>
-
-<p>Analysis groups are used by client passes just like other passes are: the
-<tt>AnalysisUsage::addRequired()</tt> and <tt>Pass::getAnalysis()</tt> methods.
-In order to resolve this requirement, the <a href="#passmanager">PassManager</a>
-scans the available passes to see if any implementations of the analysis group
-are available.  If none is available, the default implementation is created for
-the pass to use.  All standard rules for <A href="#interaction">interaction
-between passes</a> still apply.</p>
-
-<p>Although <a href="#registration">Pass Registration</a> is optional for normal
-passes, all analysis group implementations must be registered, and must use the
-<A href="#registerag"><tt>INITIALIZE_AG_PASS</tt></a> template to join the
-implementation pool.  Also, a default implementation of the interface
-<b>must</b> be registered with <A
-href="#registerag"><tt>RegisterAnalysisGroup</tt></a>.</p>
-
-<p>As a concrete example of an Analysis Group in action, consider the <a
-href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>
-analysis group.  The default implementation of the alias analysis interface (the
-<tt><a
-href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">basicaa</a></tt>
-pass) just does a few simple checks that don't require significant analysis to
-compute (such as: two different globals can never alias each other, etc).
-Passes that use the <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
-interface (for example the <tt><a
-href="http://llvm.org/doxygen/structGCSE.html">gcse</a></tt> pass), do
-not care which implementation of alias analysis is actually provided, they just
-use the designated interface.</p>
-
-<p>From the user's perspective, commands work just like normal.  Issuing the
-command '<tt>opt -gcse ...</tt>' will cause the <tt>basicaa</tt> class to be
-instantiated and added to the pass sequence.  Issuing the command '<tt>opt
--somefancyaa -gcse ...</tt>' will cause the <tt>gcse</tt> pass to use the
-<tt>somefancyaa</tt> alias analysis (which doesn't actually exist, it's just a
-hypothetical example) instead.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="registerag">Using <tt>RegisterAnalysisGroup</tt></a>
-</h4>
-
-<div>
-
-<p>The <tt>RegisterAnalysisGroup</tt> template is used to register the analysis
-group itself, while the <tt>INITIALIZE_AG_PASS</tt> is used to add pass
-implementations to the analysis group.  First,
-an analysis group should be registered, with a human readable name
-provided for it.
-Unlike registration of passes, there is no command line argument to be specified
-for the Analysis Group Interface itself, because it is "abstract":</p>
-
-<div class="doc_code"><pre>
-<b>static</b> RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; A("<i>Alias Analysis</i>");
-</pre></div>
-
-<p>Once the analysis is registered, passes can declare that they are valid
-implementations of the interface by using the following code:</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> {
-  //<i> Declare that we implement the AliasAnalysis interface</i>
-  INITIALIZE_AG_PASS(FancyAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>somefancyaa</i>",
-                     "<i>A more complex alias analysis implementation</i>",
-                     false,  // <i>Is CFG Only?</i>
-                     true,   // <i>Is Analysis?</i>
-                     false); // <i>Is default Analysis Group implementation?</i>
-}
-</pre></div>
-
-<p>This just shows a class <tt>FancyAA</tt> that 
-uses the <tt>INITIALIZE_AG_PASS</tt> macro both to register and
-to "join" the <tt><a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
-analysis group.  Every implementation of an analysis group should join using
-this macro.</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> {
-  //<i> Declare that we implement the AliasAnalysis interface</i>
-  INITIALIZE_AG_PASS(BasicAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>basicaa</i>",
-                     "<i>Basic Alias Analysis (default AA impl)</i>",
-                     false, // <i>Is CFG Only?</i>
-                     true,  // <i>Is Analysis?</i>
-                     true); // <i>Is default Analysis Group implementation?</i>
-}
-</pre></div>
-
-<p>Here we show how the default implementation is specified (using the final
-argument to the <tt>INITIALIZE_AG_PASS</tt> template).  There must be exactly
-one default implementation available at all times for an Analysis Group to be
-used.  Only default implementation can derive from <tt>ImmutablePass</tt>. 
-Here we declare that the
- <tt><a href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">BasicAliasAnalysis</a></tt>
-pass is the default implementation for the interface.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="passStatistics">Pass Statistics</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>The <a
-href="http://llvm.org/doxygen/Statistic_8h-source.html"><tt>Statistic</tt></a>
-class is designed to be an easy way to expose various success
-metrics from passes.  These statistics are printed at the end of a
-run, when the -stats command line option is enabled on the command
-line. See the <a href="http://llvm.org/docs/ProgrammersManual.html#Statistic">Statistics section</a> in the Programmer's Manual for details. 
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="passmanager">What PassManager does</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The <a
-href="http://llvm.org/doxygen/PassManager_8h-source.html"><tt>PassManager</tt></a>
-<a
-href="http://llvm.org/doxygen/classllvm_1_1PassManager.html">class</a>
-takes a list of passes, ensures their <a href="#interaction">prerequisites</a>
-are set up correctly, and then schedules passes to run efficiently.  All of the
-LLVM tools that run passes use the <tt>PassManager</tt> for execution of these
-passes.</p>
-
-<p>The <tt>PassManager</tt> does two main things to try to reduce the execution
-time of a series of passes:</p>
-
-<ol>
-<li><b>Share analysis results</b> - The PassManager attempts to avoid
-recomputing analysis results as much as possible.  This means keeping track of
-which analyses are available already, which analyses get invalidated, and which
-analyses are needed to be run for a pass.  An important part of work is that the
-<tt>PassManager</tt> tracks the exact lifetime of all analysis results, allowing
-it to <a href="#releaseMemory">free memory</a> allocated to holding analysis
-results as soon as they are no longer needed.</li>
-
-<li><b>Pipeline the execution of passes on the program</b> - The
-<tt>PassManager</tt> attempts to get better cache and memory usage behavior out
-of a series of passes by pipelining the passes together.  This means that, given
-a series of consecutive <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s, it
-will execute all of the <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s on
-the first function, then all of the <a
-href="#FunctionPass"><tt>FunctionPass</tt></a>es on the second function,
-etc... until the entire program has been run through the passes.
-
-<p>This improves the cache behavior of the compiler, because it is only touching
-the LLVM program representation for a single function at a time, instead of
-traversing the entire program.  It reduces the memory consumption of compiler,
-because, for example, only one <a
-href="http://llvm.org/doxygen/classllvm_1_1DominatorSet.html"><tt>DominatorSet</tt></a>
-needs to be calculated at a time.  This also makes it possible to implement
-some <a
-href="#SMP">interesting enhancements</a> in the future.</p></li>
-
-</ol>
-
-<p>The effectiveness of the <tt>PassManager</tt> is influenced directly by how
-much information it has about the behaviors of the passes it is scheduling.  For
-example, the "preserved" set is intentionally conservative in the face of an
-unimplemented <a href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method.
-Not implementing when it should be implemented will have the effect of not
-allowing any analysis results to live across the execution of your pass.</p>
-
-<p>The <tt>PassManager</tt> class exposes a <tt>--debug-pass</tt> command line
-options that is useful for debugging pass execution, seeing how things work, and
-diagnosing when you should be preserving more analyses than you currently are
-(To get information about all of the variants of the <tt>--debug-pass</tt>
-option, just type '<tt>opt -help-hidden</tt>').</p>
-
-<p>By using the <tt>--debug-pass=Structure</tt> option, for example, we can see
-how our <a href="#basiccode">Hello World</a> pass interacts with other passes.
-Lets try it out with the <tt>gcse</tt> and <tt>licm</tt> passes:</p>
-
-<div class="doc_code"><pre>
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
-Module Pass Manager
-  Function Pass Manager
-    Dominator Set Construction
-    Immediate Dominators Construction
-    Global Common Subexpression Elimination
---  Immediate Dominators Construction
---  Global Common Subexpression Elimination
-    Natural Loop Construction
-    Loop Invariant Code Motion
---  Natural Loop Construction
---  Loop Invariant Code Motion
-    Module Verifier
---  Dominator Set Construction
---  Module Verifier
-  Bitcode Writer
---Bitcode Writer
-</pre></div>
-
-<p>This output shows us when passes are constructed and when the analysis
-results are known to be dead (prefixed with '<tt>--</tt>').  Here we see that
-GCSE uses dominator and immediate dominator information to do its job.  The LICM
-pass uses natural loop information, which uses dominator sets, but not immediate
-dominators.  Because immediate dominators are no longer useful after the GCSE
-pass, it is immediately destroyed.  The dominator sets are then reused to
-compute natural loop information, which is then used by the LICM pass.</p>
-
-<p>After the LICM pass, the module verifier runs (which is automatically added
-by the '<tt>opt</tt>' tool), which uses the dominator set to check that the
-resultant LLVM code is well formed.  After it finishes, the dominator set
-information is destroyed, after being computed once, and shared by three
-passes.</p>
-
-<p>Lets see how this changes when we run the <a href="#basiccode">Hello
-World</a> pass in between the two passes:</p>
-
-<div class="doc_code"><pre>
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
-Module Pass Manager
-  Function Pass Manager
-    Dominator Set Construction
-    Immediate Dominators Construction
-    Global Common Subexpression Elimination
-<b>--  Dominator Set Construction</b>
---  Immediate Dominators Construction
---  Global Common Subexpression Elimination
-<b>    Hello World Pass
---  Hello World Pass
-    Dominator Set Construction</b>
-    Natural Loop Construction
-    Loop Invariant Code Motion
---  Natural Loop Construction
---  Loop Invariant Code Motion
-    Module Verifier
---  Dominator Set Construction
---  Module Verifier
-  Bitcode Writer
---Bitcode Writer
-Hello: __main
-Hello: puts
-Hello: main
-</pre></div>
-
-<p>Here we see that the <a href="#basiccode">Hello World</a> pass has killed the
-Dominator Set pass, even though it doesn't modify the code at all!  To fix this,
-we need to add the following <a
-href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method to our pass:</p>
-
-<div class="doc_code"><pre>
-<i>// We don't modify the program, so we preserve all analyses</i>
-<b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
-  AU.setPreservesAll();
-}
-</pre></div>
-
-<p>Now when we run our pass, we get this output:</p>
-
-<div class="doc_code"><pre>
-$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
-Pass Arguments:  -gcse -hello -licm
-Module Pass Manager
-  Function Pass Manager
-    Dominator Set Construction
-    Immediate Dominators Construction
-    Global Common Subexpression Elimination
---  Immediate Dominators Construction
---  Global Common Subexpression Elimination
-    Hello World Pass
---  Hello World Pass
-    Natural Loop Construction
-    Loop Invariant Code Motion
---  Loop Invariant Code Motion
---  Natural Loop Construction
-    Module Verifier
---  Dominator Set Construction
---  Module Verifier
-  Bitcode Writer
---Bitcode Writer
-Hello: __main
-Hello: puts
-Hello: main
-</pre></div>
-
-<p>Which shows that we don't accidentally invalidate dominator information
-anymore, and therefore do not have to compute it twice.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="releaseMemory">The <tt>releaseMemory</tt> method</a>
-</h4>
-
-<div>
-
-<div class="doc_code"><pre>
-  <b>virtual void</b> releaseMemory();
-</pre></div>
-
-<p>The <tt>PassManager</tt> automatically determines when to compute analysis
-results, and how long to keep them around for.  Because the lifetime of the pass
-object itself is effectively the entire duration of the compilation process, we
-need some way to free analysis results when they are no longer useful.  The
-<tt>releaseMemory</tt> virtual method is the way to do this.</p>
-
-<p>If you are writing an analysis or any other pass that retains a significant
-amount of state (for use by another pass which "requires" your pass and uses the
-<a href="#getAnalysis">getAnalysis</a> method) you should implement
-<tt>releaseMemory</tt> to, well, release the memory allocated to maintain this
-internal state.  This method is called after the <tt>run*</tt> method for the
-class, before the next call of <tt>run*</tt> in your pass.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="registering">Registering dynamically loaded passes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p><i>Size matters</i> when constructing production quality tools using llvm, 
-both for the purposes of distribution, and for regulating the resident code size
-when running on the target system. Therefore, it becomes desirable to
-selectively use some passes, while omitting others and maintain the flexibility
-to change configurations later on. You want to be able to do all this, and,
-provide feedback to the user. This is where pass registration comes into
-play.</p>
-
-<p>The fundamental mechanisms for pass registration are the
-<tt>MachinePassRegistry</tt> class and subclasses of
-<tt>MachinePassRegistryNode</tt>.</p>
-
-<p>An instance of <tt>MachinePassRegistry</tt> is used to maintain a list of
-<tt>MachinePassRegistryNode</tt> objects.  This instance maintains the list and
-communicates additions and deletions to the command line interface.</p>
-
-<p>An instance of <tt>MachinePassRegistryNode</tt> subclass is used to maintain
-information provided about a particular pass.  This information includes the
-command line name, the command help string and the address of the function used
-to create an instance of the pass.  A global static constructor of one of these
-instances <i>registers</i> with a corresponding <tt>MachinePassRegistry</tt>,
-the static destructor <i>unregisters</i>. Thus a pass that is statically linked
-in the tool will be registered at start up. A dynamically loaded pass will
-register on load and unregister at unload.</p>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="registering_existing">Using existing registries</a>
-</h3>
-
-<div>
-
-<p>There are predefined registries to track instruction scheduling
-(<tt>RegisterScheduler</tt>) and register allocation (<tt>RegisterRegAlloc</tt>)
-machine passes.  Here we will describe how to <i>register</i> a register
-allocator machine pass.</p>
-
-<p>Implement your register allocator machine pass.  In your register allocator
-<tt>.cpp</tt> file add the following include;</p>
-
-<div class="doc_code"><pre>
-#include "llvm/CodeGen/RegAllocRegistry.h"
-</pre></div>
-
-<p>Also in your register allocator .cpp file, define a creator function in the
-form; </p>
-
-<div class="doc_code"><pre>
-FunctionPass *createMyRegisterAllocator() {
-  return new MyRegisterAllocator();
-}
-</pre></div>
-
-<p>Note that the signature of this function should match the type of
-<tt>RegisterRegAlloc::FunctionPassCtor</tt>.  In the same file add the
-"installing" declaration, in the form;</p>
-
-<div class="doc_code"><pre>
-static RegisterRegAlloc myRegAlloc("myregalloc",
-                                   "my register allocator help string",
-                                   createMyRegisterAllocator);
-</pre></div>
-
-<p>Note the two spaces prior to the help string produces a tidy result on the
--help query.</p>
-
-<div class="doc_code"><pre>
-$ llc -help
-  ...
-  -regalloc                    - Register allocator to use (default=linearscan)
-    =linearscan                -   linear scan register allocator
-    =local                     -   local register allocator
-    =simple                    -   simple register allocator
-    =myregalloc                -   my register allocator help string
-  ...
-</pre></div>
-
-<p>And that's it.  The user is now free to use <tt>-regalloc=myregalloc</tt> as
-an option.  Registering instruction schedulers is similar except use the
-<tt>RegisterScheduler</tt> class.  Note that the
-<tt>RegisterScheduler::FunctionPassCtor</tt> is significantly different from
-<tt>RegisterRegAlloc::FunctionPassCtor</tt>.</p>
-
-<p>To force the load/linking of your register allocator into the llc/lli tools,
-add your creator function's global declaration to "Passes.h" and add a "pseudo"
-call line to <tt>llvm/Codegen/LinkAllCodegenComponents.h</tt>.</p>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="registering_new">Creating new registries</a>
-</h3>
-
-<div>
-
-<p>The easiest way to get started is to clone one of the existing registries; we
-recommend <tt>llvm/CodeGen/RegAllocRegistry.h</tt>.  The key things to modify
-are the class name and the <tt>FunctionPassCtor</tt> type.</p>
-
-<p>Then you need to declare the registry.  Example: if your pass registry is
-<tt>RegisterMyPasses</tt> then define;</p>
-
-<div class="doc_code"><pre>
-MachinePassRegistry RegisterMyPasses::Registry;
-</pre></div>
-
-<p>And finally, declare the command line option for your passes.  Example:</p> 
-
-<div class="doc_code"><pre>
-cl::opt&lt;RegisterMyPasses::FunctionPassCtor, false,
-        RegisterPassParser&lt;RegisterMyPasses&gt; &gt;
-MyPassOpt("mypass",
-          cl::init(&amp;createDefaultMyPass),
-          cl::desc("my pass option help")); 
-</pre></div>
-
-<p>Here the command option is "mypass", with createDefaultMyPass as the default
-creator.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="debughints">Using GDB with dynamically loaded passes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Unfortunately, using GDB with dynamically loaded passes is not as easy as it
-should be.  First of all, you can't set a breakpoint in a shared object that has
-not been loaded yet, and second of all there are problems with inlined functions
-in shared objects.  Here are some suggestions to debugging your pass with
-GDB.</p>
-
-<p>For sake of discussion, I'm going to assume that you are debugging a
-transformation invoked by <tt>opt</tt>, although nothing described here depends
-on that.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="breakpoint">Setting a breakpoint in your pass</a>
-</h4>
-
-<div>
-
-<p>First thing you do is start <tt>gdb</tt> on the <tt>opt</tt> process:</p>
-
-<div class="doc_code"><pre>
-$ <b>gdb opt</b>
-GNU gdb 5.0
-Copyright 2000 Free Software Foundation, Inc.
-GDB is free software, covered by the GNU General Public License, and you are
-welcome to change it and/or distribute copies of it under certain conditions.
-Type "show copying" to see the conditions.
-There is absolutely no warranty for GDB.  Type "show warranty" for details.
-This GDB was configured as "sparc-sun-solaris2.6"...
-(gdb)
-</pre></div>
-
-<p>Note that <tt>opt</tt> has a lot of debugging information in it, so it takes
-time to load.  Be patient.  Since we cannot set a breakpoint in our pass yet
-(the shared object isn't loaded until runtime), we must execute the process, and
-have it stop before it invokes our pass, but after it has loaded the shared
-object.  The most foolproof way of doing this is to set a breakpoint in
-<tt>PassManager::run</tt> and then run the process with the arguments you
-want:</p>
-
-<div class="doc_code"><pre>
-(gdb) <b>break llvm::PassManager::run</b>
-Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
-(gdb) <b>run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]</b>
-Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
-Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
-70      bool PassManager::run(Module &amp;M) { return PM-&gt;run(M); }
-(gdb)
-</pre></div>
-
-<p>Once the <tt>opt</tt> stops in the <tt>PassManager::run</tt> method you are
-now free to set breakpoints in your pass so that you can trace through execution
-or do other standard debugging stuff.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="debugmisc">Miscellaneous Problems</a>
-</h4>
-
-<div>
-
-<p>Once you have the basics down, there are a couple of problems that GDB has,
-some with solutions, some without.</p>
-
-<ul>
-<li>Inline functions have bogus stack information.  In general, GDB does a
-pretty good job getting stack traces and stepping through inline functions.
-When a pass is dynamically loaded however, it somehow completely loses this
-capability.  The only solution I know of is to de-inline a function (move it
-from the body of a class to a .cpp file).</li>
-
-<li>Restarting the program breaks breakpoints.  After following the information
-above, you have succeeded in getting some breakpoints planted in your pass.  Nex
-thing you know, you restart the program (i.e., you type '<tt>run</tt>' again),
-and you start getting errors about breakpoints being unsettable.  The only way I
-have found to "fix" this problem is to <tt>delete</tt> the breakpoints that are
-already set in your pass, run the program, and re-set the breakpoints once
-execution stops in <tt>PassManager::run</tt>.</li>
-
-</ul>
-
-<p>Hopefully these tips will help with common case debugging situations.  If
-you'd like to contribute some tips of your own, just contact <a
-href="mailto:sabre@nondot.org">Chris</a>.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="future">Future extensions planned</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Although the LLVM Pass Infrastructure is very capable as it stands, and does
-some nifty stuff, there are things we'd like to add in the future.  Here is
-where we are going:</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="SMP">Multithreaded LLVM</a>
-</h4>
-
-<div>
-
-<p>Multiple CPU machines are becoming more common and compilation can never be
-fast enough: obviously we should allow for a multithreaded compiler.  Because of
-the semantics defined for passes above (specifically they cannot maintain state
-across invocations of their <tt>run*</tt> methods), a nice clean way to
-implement a multithreaded compiler would be for the <tt>PassManager</tt> class
-to create multiple instances of each pass object, and allow the separate
-instances to be hacking on different parts of the program at the same time.</p>
-
-<p>This implementation would prevent each of the passes from having to implement
-multithreaded constructs, requiring only the LLVM core to have locking in a few
-places (for global resources).  Although this is a simple extension, we simply
-haven't had time (or multiprocessor machines, thus a reason) to implement this.
-Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-04-19 22:20:34 +0200 (Thu, 19 Apr 2012) $
-</address>
-
-</body>
-</html>
diff --git a/docs/WritingAnLLVMPass.rst b/docs/WritingAnLLVMPass.rst
new file mode 100644
index 000000000000..b10d98f87e2b
--- /dev/null
+++ b/docs/WritingAnLLVMPass.rst
@@ -0,0 +1,1436 @@
+====================
+Writing an LLVM Pass
+====================
+
+.. contents::
+    :local:
+
+Introduction --- What is a pass?
+================================
+
+The LLVM Pass Framework is an important part of the LLVM system, because LLVM
+passes are where most of the interesting parts of the compiler exist.  Passes
+perform the transformations and optimizations that make up the compiler, they
+build the analysis results that are used by these transformations, and they
+are, above all, a structuring technique for compiler code.
+
+All LLVM passes are subclasses of the `Pass
+<http://llvm.org/doxygen/classllvm_1_1Pass.html>`_ class, which implement
+functionality by overriding virtual methods inherited from ``Pass``.  Depending
+on how your pass works, you should inherit from the :ref:`ModulePass
+<writing-an-llvm-pass-ModulePass>` , :ref:`CallGraphSCCPass
+<writing-an-llvm-pass-CallGraphSCCPass>`, :ref:`FunctionPass
+<writing-an-llvm-pass-FunctionPass>` , or :ref:`LoopPass
+<writing-an-llvm-pass-LoopPass>`, or :ref:`RegionPass
+<writing-an-llvm-pass-RegionPass>`, or :ref:`BasicBlockPass
+<writing-an-llvm-pass-BasicBlockPass>` classes, which gives the system more
+information about what your pass does, and how it can be combined with other
+passes.  One of the main features of the LLVM Pass Framework is that it
+schedules passes to run in an efficient way based on the constraints that your
+pass meets (which are indicated by which class they derive from).
+
+We start by showing you how to construct a pass, everything from setting up the
+code, to compiling, loading, and executing it.  After the basics are down, more
+advanced features are discussed.
+
+Quick Start --- Writing hello world
+===================================
+
+Here we describe how to write the "hello world" of passes.  The "Hello" pass is
+designed to simply print out the name of non-external functions that exist in
+the program being compiled.  It does not modify the program at all, it just
+inspects it.  The source code and files for this pass are available in the LLVM
+source tree in the ``lib/Transforms/Hello`` directory.
+
+.. _writing-an-llvm-pass-makefile:
+
+Setting up the build environment
+--------------------------------
+
+.. FIXME: Why does this recommend to build in-tree?
+
+First, configure and build LLVM.  This needs to be done directly inside the
+LLVM source tree rather than in a separate objects directory.  Next, you need
+to create a new directory somewhere in the LLVM source base.  For this example,
+we'll assume that you made ``lib/Transforms/Hello``.  Finally, you must set up
+a build script (``Makefile``) that will compile the source code for the new
+pass.  To do this, copy the following into ``Makefile``:
+
+.. code-block:: make
+
+    # Makefile for hello pass
+
+    # Path to top level of LLVM hierarchy
+    LEVEL = ../../..
+
+    # Name of the library to build
+    LIBRARYNAME = Hello
+
+    # Make the shared library become a loadable module so the tools can
+    # dlopen/dlsym on the resulting library.
+    LOADABLE_MODULE = 1
+
+    # Include the makefile implementation stuff
+    include $(LEVEL)/Makefile.common
+
+This makefile specifies that all of the ``.cpp`` files in the current directory
+are to be compiled and linked together into a shared object
+``$(LEVEL)/Debug+Asserts/lib/Hello.so`` that can be dynamically loaded by the
+:program:`opt` or :program:`bugpoint` tools via their :option:`-load` options.
+If your operating system uses a suffix other than ``.so`` (such as Windows or Mac
+OS X), the appropriate extension will be used.
+
+If you are used CMake to build LLVM, see :ref:`cmake-out-of-source-pass`.
+
+Now that we have the build scripts set up, we just need to write the code for
+the pass itself.
+
+.. _writing-an-llvm-pass-basiccode:
+
+Basic code required
+-------------------
+
+Now that we have a way to compile our new pass, we just have to write it.
+Start out with:
+
+.. code-block:: c++
+
+  #include "llvm/Pass.h"
+  #include "llvm/Function.h"
+  #include "llvm/Support/raw_ostream.h"
+
+Which are needed because we are writing a `Pass
+<http://llvm.org/doxygen/classllvm_1_1Pass.html>`_, we are operating on
+`Function <http://llvm.org/doxygen/classllvm_1_1Function.html>`_\ s, and we will
+be doing some printing.
+
+Next we have:
+
+.. code-block:: c++
+
+  using namespace llvm;
+
+... which is required because the functions from the include files live in the
+llvm namespace.
+
+Next we have:
+
+.. code-block:: c++
+
+  namespace {
+
+... which starts out an anonymous namespace.  Anonymous namespaces are to C++
+what the "``static``" keyword is to C (at global scope).  It makes the things
+declared inside of the anonymous namespace visible only to the current file.
+If you're not familiar with them, consult a decent C++ book for more
+information.
+
+Next, we declare our pass itself:
+
+.. code-block:: c++
+
+  struct Hello : public FunctionPass {
+
+This declares a "``Hello``" class that is a subclass of `FunctionPass
+<writing-an-llvm-pass-FunctionPass>`.  The different builtin pass subclasses
+are described in detail :ref:`later <writing-an-llvm-pass-pass-classes>`, but
+for now, know that ``FunctionPass`` operates on a function at a time.
+
+.. code-block:: c++
+
+    static char ID;
+    Hello() : FunctionPass(ID) {}
+
+This declares pass identifier used by LLVM to identify pass.  This allows LLVM
+to avoid using expensive C++ runtime information.
+
+.. code-block:: c++
+
+      virtual bool runOnFunction(Function &F) {
+        errs() << "Hello: ";
+        errs().write_escaped(F.getName()) << "\n";
+        return false;
+      }
+    }; // end of struct Hello
+  }  // end of anonymous namespace
+
+We declare a :ref:`runOnFunction <writing-an-llvm-pass-runOnFunction>` method,
+which overrides an abstract virtual method inherited from :ref:`FunctionPass
+<writing-an-llvm-pass-FunctionPass>`.  This is where we are supposed to do our
+thing, so we just print out our message with the name of each function.
+
+.. code-block:: c++
+
+  char Hello::ID = 0;
+
+We initialize pass ID here.  LLVM uses ID's address to identify a pass, so
+initialization value is not important.
+
+.. code-block:: c++
+
+  static RegisterPass<Hello> X("hello", "Hello World Pass",
+                               false /* Only looks at CFG */,
+                               false /* Analysis Pass */);
+
+Lastly, we :ref:`register our class <writing-an-llvm-pass-registration>`
+``Hello``, giving it a command line argument "``hello``", and a name "Hello
+World Pass".  The last two arguments describe its behavior: if a pass walks CFG
+without modifying it then the third argument is set to ``true``; if a pass is
+an analysis pass, for example dominator tree pass, then ``true`` is supplied as
+the fourth argument.
+
+As a whole, the ``.cpp`` file looks like:
+
+.. code-block:: c++
+
+    #include "llvm/Pass.h"
+    #include "llvm/Function.h"
+    #include "llvm/Support/raw_ostream.h"
+
+    using namespace llvm;
+
+    namespace {
+      struct Hello : public FunctionPass {
+        static char ID;
+        Hello() : FunctionPass(ID) {}
+
+        virtual bool runOnFunction(Function &F) {
+          errs() << "Hello: ";
+          errs().write_escaped(F.getName()) << '\n';
+          return false;
+        }
+      };
+    }
+
+    char Hello::ID = 0;
+    static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
+
+Now that it's all together, compile the file with a simple "``gmake``" command
+in the local directory and you should get a new file
+"``Debug+Asserts/lib/Hello.so``" under the top level directory of the LLVM
+source tree (not in the local directory).  Note that everything in this file is
+contained in an anonymous namespace --- this reflects the fact that passes
+are self contained units that do not need external interfaces (although they
+can have them) to be useful.
+
+Running a pass with ``opt``
+---------------------------
+
+Now that you have a brand new shiny shared object file, we can use the
+:program:`opt` command to run an LLVM program through your pass.  Because you
+registered your pass with ``RegisterPass``, you will be able to use the
+:program:`opt` tool to access it, once loaded.
+
+To test it, follow the example at the end of the :doc:`GettingStarted` to
+compile "Hello World" to LLVM.  We can now run the bitcode file (hello.bc) for
+the program through our transformation like this (or course, any bitcode file
+will work):
+
+.. code-block:: console
+
+  $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null
+  Hello: __main
+  Hello: puts
+  Hello: main
+
+The :option:`-load` option specifies that :program:`opt` should load your pass
+as a shared object, which makes "``-hello``" a valid command line argument
+(which is one reason you need to :ref:`register your pass
+<writing-an-llvm-pass-registration>`).  Because the Hello pass does not modify
+the program in any interesting way, we just throw away the result of
+:program:`opt` (sending it to ``/dev/null``).
+
+To see what happened to the other string you registered, try running
+:program:`opt` with the :option:`-help` option:
+
+.. code-block:: console
+
+  $ opt -load ../../../Debug+Asserts/lib/Hello.so -help
+  OVERVIEW: llvm .bc -> .bc modular optimizer
+
+  USAGE: opt [options] <input bitcode>
+
+  OPTIONS:
+    Optimizations available:
+  ...
+      -globalopt                - Global Variable Optimizer
+      -globalsmodref-aa         - Simple mod/ref analysis for globals
+      -gvn                      - Global Value Numbering
+      -hello                    - Hello World Pass
+      -indvars                  - Induction Variable Simplification
+      -inline                   - Function Integration/Inlining
+      -insert-edge-profiling    - Insert instrumentation for edge profiling
+  ...
+
+The pass name gets added as the information string for your pass, giving some
+documentation to users of :program:`opt`.  Now that you have a working pass,
+you would go ahead and make it do the cool transformations you want.  Once you
+get it all working and tested, it may become useful to find out how fast your
+pass is.  The :ref:`PassManager <writing-an-llvm-pass-passmanager>` provides a
+nice command line option (:option:`--time-passes`) that allows you to get
+information about the execution time of your pass along with the other passes
+you queue up.  For example:
+
+.. code-block:: console
+
+  $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null
+  Hello: __main
+  Hello: puts
+  Hello: main
+  ===============================================================================
+                        ... Pass execution timing report ...
+  ===============================================================================
+    Total Execution Time: 0.02 seconds (0.0479059 wall clock)
+
+     ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Pass Name ---
+     0.0100 (100.0%)   0.0000 (  0.0%)   0.0100 ( 50.0%)   0.0402 ( 84.0%)  Bitcode Writer
+     0.0000 (  0.0%)   0.0100 (100.0%)   0.0100 ( 50.0%)   0.0031 (  6.4%)  Dominator Set Construction
+     0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0013 (  2.7%)  Module Verifier
+     0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0033 (  6.9%)  Hello World Pass
+     0.0100 (100.0%)   0.0100 (100.0%)   0.0200 (100.0%)   0.0479 (100.0%)  TOTAL
+
+As you can see, our implementation above is pretty fast.  The additional
+passes listed are automatically inserted by the :program:`opt` tool to verify
+that the LLVM emitted by your pass is still valid and well formed LLVM, which
+hasn't been broken somehow.
+
+Now that you have seen the basics of the mechanics behind passes, we can talk
+about some more details of how they work and how to use them.
+
+.. _writing-an-llvm-pass-pass-classes:
+
+Pass classes and requirements
+=============================
+
+One of the first things that you should do when designing a new pass is to
+decide what class you should subclass for your pass.  The :ref:`Hello World
+<writing-an-llvm-pass-basiccode>` example uses the :ref:`FunctionPass
+<writing-an-llvm-pass-FunctionPass>` class for its implementation, but we did
+not discuss why or when this should occur.  Here we talk about the classes
+available, from the most general to the most specific.
+
+When choosing a superclass for your ``Pass``, you should choose the **most
+specific** class possible, while still being able to meet the requirements
+listed.  This gives the LLVM Pass Infrastructure information necessary to
+optimize how passes are run, so that the resultant compiler isn't unnecessarily
+slow.
+
+The ``ImmutablePass`` class
+---------------------------
+
+The most plain and boring type of pass is the "`ImmutablePass
+<http://llvm.org/doxygen/classllvm_1_1ImmutablePass.html>`_" class.  This pass
+type is used for passes that do not have to be run, do not change state, and
+never need to be updated.  This is not a normal type of transformation or
+analysis, but can provide information about the current compiler configuration.
+
+Although this pass class is very infrequently used, it is important for
+providing information about the current target machine being compiled for, and
+other static information that can affect the various transformations.
+
+``ImmutablePass``\ es never invalidate other transformations, are never
+invalidated, and are never "run".
+
+.. _writing-an-llvm-pass-ModulePass:
+
+The ``ModulePass`` class
+------------------------
+
+The `ModulePass <http://llvm.org/doxygen/classllvm_1_1ModulePass.html>`_ class
+is the most general of all superclasses that you can use.  Deriving from
+``ModulePass`` indicates that your pass uses the entire program as a unit,
+referring to function bodies in no predictable order, or adding and removing
+functions.  Because nothing is known about the behavior of ``ModulePass``
+subclasses, no optimization can be done for their execution.
+
+A module pass can use function level passes (e.g. dominators) using the
+``getAnalysis`` interface ``getAnalysis<DominatorTree>(llvm::Function *)`` to
+provide the function to retrieve analysis result for, if the function pass does
+not require any module or immutable passes.  Note that this can only be done
+for functions for which the analysis ran, e.g. in the case of dominators you
+should only ask for the ``DominatorTree`` for function definitions, not
+declarations.
+
+To write a correct ``ModulePass`` subclass, derive from ``ModulePass`` and
+overload the ``runOnModule`` method with the following signature:
+
+The ``runOnModule`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnModule(Module &M) = 0;
+
+The ``runOnModule`` method performs the interesting work of the pass.  It
+should return ``true`` if the module was modified by the transformation and
+``false`` otherwise.
+
+.. _writing-an-llvm-pass-CallGraphSCCPass:
+
+The ``CallGraphSCCPass`` class
+------------------------------
+
+The `CallGraphSCCPass
+<http://llvm.org/doxygen/classllvm_1_1CallGraphSCCPass.html>`_ is used by
+passes that need to traverse the program bottom-up on the call graph (callees
+before callers).  Deriving from ``CallGraphSCCPass`` provides some mechanics
+for building and traversing the ``CallGraph``, but also allows the system to
+optimize execution of ``CallGraphSCCPass``\ es.  If your pass meets the
+requirements outlined below, and doesn't meet the requirements of a
+:ref:`FunctionPass <writing-an-llvm-pass-FunctionPass>` or :ref:`BasicBlockPass
+<writing-an-llvm-pass-BasicBlockPass>`, you should derive from
+``CallGraphSCCPass``.
+
+``TODO``: explain briefly what SCC, Tarjan's algo, and B-U mean.
+
+To be explicit, CallGraphSCCPass subclasses are:
+
+#. ... *not allowed* to inspect or modify any ``Function``\ s other than those
+   in the current SCC and the direct callers and direct callees of the SCC.
+#. ... *required* to preserve the current ``CallGraph`` object, updating it to
+   reflect any changes made to the program.
+#. ... *not allowed* to add or remove SCC's from the current Module, though
+   they may change the contents of an SCC.
+#. ... *allowed* to add or remove global variables from the current Module.
+#. ... *allowed* to maintain state across invocations of :ref:`runOnSCC
+   <writing-an-llvm-pass-runOnSCC>` (including global data).
+
+Implementing a ``CallGraphSCCPass`` is slightly tricky in some cases because it
+has to handle SCCs with more than one node in it.  All of the virtual methods
+described below should return ``true`` if they modified the program, or
+``false`` if they didn't.
+
+The ``doInitialization(CallGraph &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doInitialization(CallGraph &CG);
+
+The ``doInitialization`` method is allowed to do most of the things that
+``CallGraphSCCPass``\ es are not allowed to do.  They can add and remove
+functions, get pointers to functions, etc.  The ``doInitialization`` method is
+designed to do simple initialization type of stuff that does not depend on the
+SCCs being processed.  The ``doInitialization`` method call is not scheduled to
+overlap with any other pass executions (thus it should be very fast).
+
+.. _writing-an-llvm-pass-runOnSCC:
+
+The ``runOnSCC`` method
+^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
+
+The ``runOnSCC`` method performs the interesting work of the pass, and should
+return ``true`` if the module was modified by the transformation, ``false``
+otherwise.
+
+The ``doFinalization(CallGraph &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doFinalization(CallGraph &CG);
+
+The ``doFinalization`` method is an infrequently used method that is called
+when the pass framework has finished calling :ref:`runOnFunction
+<writing-an-llvm-pass-runOnFunction>` for every function in the program being
+compiled.
+
+.. _writing-an-llvm-pass-FunctionPass:
+
+The ``FunctionPass`` class
+--------------------------
+
+In contrast to ``ModulePass`` subclasses, `FunctionPass
+<http://llvm.org/doxygen/classllvm_1_1Pass.html>`_ subclasses do have a
+predictable, local behavior that can be expected by the system.  All
+``FunctionPass`` execute on each function in the program independent of all of
+the other functions in the program.  ``FunctionPass``\ es do not require that
+they are executed in a particular order, and ``FunctionPass``\ es do not modify
+external functions.
+
+To be explicit, ``FunctionPass`` subclasses are not allowed to:
+
+#. Modify a ``Function`` other than the one currently being processed.
+#. Add or remove ``Function``\ s from the current ``Module``.
+#. Add or remove global variables from the current ``Module``.
+#. Maintain state across invocations of:ref:`runOnFunction
+   <writing-an-llvm-pass-runOnFunction>` (including global data).
+
+Implementing a ``FunctionPass`` is usually straightforward (See the :ref:`Hello
+World <writing-an-llvm-pass-basiccode>` pass for example).
+``FunctionPass``\ es may overload three virtual methods to do their work.  All
+of these methods should return ``true`` if they modified the program, or
+``false`` if they didn't.
+
+.. _writing-an-llvm-pass-doInitialization-mod:
+
+The ``doInitialization(Module &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doInitialization(Module &M);
+
+The ``doInitialization`` method is allowed to do most of the things that
+``FunctionPass``\ es are not allowed to do.  They can add and remove functions,
+get pointers to functions, etc.  The ``doInitialization`` method is designed to
+do simple initialization type of stuff that does not depend on the functions
+being processed.  The ``doInitialization`` method call is not scheduled to
+overlap with any other pass executions (thus it should be very fast).
+
+A good example of how this method should be used is the `LowerAllocations
+<http://llvm.org/doxygen/LowerAllocations_8cpp-source.html>`_ pass.  This pass
+converts ``malloc`` and ``free`` instructions into platform dependent
+``malloc()`` and ``free()`` function calls.  It uses the ``doInitialization``
+method to get a reference to the ``malloc`` and ``free`` functions that it
+needs, adding prototypes to the module if necessary.
+
+.. _writing-an-llvm-pass-runOnFunction:
+
+The ``runOnFunction`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnFunction(Function &F) = 0;
+
+The ``runOnFunction`` method must be implemented by your subclass to do the
+transformation or analysis work of your pass.  As usual, a ``true`` value
+should be returned if the function is modified.
+
+.. _writing-an-llvm-pass-doFinalization-mod:
+
+The ``doFinalization(Module &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doFinalization(Module &M);
+
+The ``doFinalization`` method is an infrequently used method that is called
+when the pass framework has finished calling :ref:`runOnFunction
+<writing-an-llvm-pass-runOnFunction>` for every function in the program being
+compiled.
+
+.. _writing-an-llvm-pass-LoopPass:
+
+The ``LoopPass`` class
+----------------------
+
+All ``LoopPass`` execute on each loop in the function independent of all of the
+other loops in the function.  ``LoopPass`` processes loops in loop nest order
+such that outer most loop is processed last.
+
+``LoopPass`` subclasses are allowed to update loop nest using ``LPPassManager``
+interface.  Implementing a loop pass is usually straightforward.
+``LoopPass``\ es may overload three virtual methods to do their work.  All
+these methods should return ``true`` if they modified the program, or ``false``
+if they didn't.
+
+The ``doInitialization(Loop *, LPPassManager &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doInitialization(Loop *, LPPassManager &LPM);
+
+The ``doInitialization`` method is designed to do simple initialization type of
+stuff that does not depend on the functions being processed.  The
+``doInitialization`` method call is not scheduled to overlap with any other
+pass executions (thus it should be very fast).  ``LPPassManager`` interface
+should be used to access ``Function`` or ``Module`` level analysis information.
+
+.. _writing-an-llvm-pass-runOnLoop:
+
+The ``runOnLoop`` method
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0;
+
+The ``runOnLoop`` method must be implemented by your subclass to do the
+transformation or analysis work of your pass.  As usual, a ``true`` value
+should be returned if the function is modified.  ``LPPassManager`` interface
+should be used to update loop nest.
+
+The ``doFinalization()`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doFinalization();
+
+The ``doFinalization`` method is an infrequently used method that is called
+when the pass framework has finished calling :ref:`runOnLoop
+<writing-an-llvm-pass-runOnLoop>` for every loop in the program being compiled.
+
+.. _writing-an-llvm-pass-RegionPass:
+
+The ``RegionPass`` class
+------------------------
+
+``RegionPass`` is similar to :ref:`LoopPass <writing-an-llvm-pass-LoopPass>`,
+but executes on each single entry single exit region in the function.
+``RegionPass`` processes regions in nested order such that the outer most
+region is processed last.
+
+``RegionPass`` subclasses are allowed to update the region tree by using the
+``RGPassManager`` interface.  You may overload three virtual methods of
+``RegionPass`` to implement your own region pass.  All these methods should
+return ``true`` if they modified the program, or ``false`` if they did not.
+
+The ``doInitialization(Region *, RGPassManager &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doInitialization(Region *, RGPassManager &RGM);
+
+The ``doInitialization`` method is designed to do simple initialization type of
+stuff that does not depend on the functions being processed.  The
+``doInitialization`` method call is not scheduled to overlap with any other
+pass executions (thus it should be very fast).  ``RPPassManager`` interface
+should be used to access ``Function`` or ``Module`` level analysis information.
+
+.. _writing-an-llvm-pass-runOnRegion:
+
+The ``runOnRegion`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0;
+
+The ``runOnRegion`` method must be implemented by your subclass to do the
+transformation or analysis work of your pass.  As usual, a true value should be
+returned if the region is modified.  ``RGPassManager`` interface should be used to
+update region tree.
+
+The ``doFinalization()`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doFinalization();
+
+The ``doFinalization`` method is an infrequently used method that is called
+when the pass framework has finished calling :ref:`runOnRegion
+<writing-an-llvm-pass-runOnRegion>` for every region in the program being
+compiled.
+
+.. _writing-an-llvm-pass-BasicBlockPass:
+
+The ``BasicBlockPass`` class
+----------------------------
+
+``BasicBlockPass``\ es are just like :ref:`FunctionPass's
+<writing-an-llvm-pass-FunctionPass>` , except that they must limit their scope
+of inspection and modification to a single basic block at a time.  As such,
+they are **not** allowed to do any of the following:
+
+#. Modify or inspect any basic blocks outside of the current one.
+#. Maintain state across invocations of :ref:`runOnBasicBlock
+   <writing-an-llvm-pass-runOnBasicBlock>`.
+#. Modify the control flow graph (by altering terminator instructions)
+#. Any of the things forbidden for :ref:`FunctionPasses
+   <writing-an-llvm-pass-FunctionPass>`.
+
+``BasicBlockPass``\ es are useful for traditional local and "peephole"
+optimizations.  They may override the same :ref:`doInitialization(Module &)
+<writing-an-llvm-pass-doInitialization-mod>` and :ref:`doFinalization(Module &)
+<writing-an-llvm-pass-doFinalization-mod>` methods that :ref:`FunctionPass's
+<writing-an-llvm-pass-FunctionPass>` have, but also have the following virtual
+methods that may also be implemented:
+
+The ``doInitialization(Function &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool doInitialization(Function &F);
+
+The ``doInitialization`` method is allowed to do most of the things that
+``BasicBlockPass``\ es are not allowed to do, but that ``FunctionPass``\ es
+can.  The ``doInitialization`` method is designed to do simple initialization
+that does not depend on the ``BasicBlock``\ s being processed.  The
+``doInitialization`` method call is not scheduled to overlap with any other
+pass executions (thus it should be very fast).
+
+.. _writing-an-llvm-pass-runOnBasicBlock:
+
+The ``runOnBasicBlock`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnBasicBlock(BasicBlock &BB) = 0;
+
+Override this function to do the work of the ``BasicBlockPass``.  This function
+is not allowed to inspect or modify basic blocks other than the parameter, and
+are not allowed to modify the CFG.  A ``true`` value must be returned if the
+basic block is modified.
+
+The ``doFinalization(Function &)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+    virtual bool doFinalization(Function &F);
+
+The ``doFinalization`` method is an infrequently used method that is called
+when the pass framework has finished calling :ref:`runOnBasicBlock
+<writing-an-llvm-pass-runOnBasicBlock>` for every ``BasicBlock`` in the program
+being compiled.  This can be used to perform per-function finalization.
+
+The ``MachineFunctionPass`` class
+---------------------------------
+
+A ``MachineFunctionPass`` is a part of the LLVM code generator that executes on
+the machine-dependent representation of each LLVM function in the program.
+
+Code generator passes are registered and initialized specially by
+``TargetMachine::addPassesToEmitFile`` and similar routines, so they cannot
+generally be run from the :program:`opt` or :program:`bugpoint` commands.
+
+A ``MachineFunctionPass`` is also a ``FunctionPass``, so all the restrictions
+that apply to a ``FunctionPass`` also apply to it.  ``MachineFunctionPass``\ es
+also have additional restrictions.  In particular, ``MachineFunctionPass``\ es
+are not allowed to do any of the following:
+
+#. Modify or create any LLVM IR ``Instruction``\ s, ``BasicBlock``\ s,
+   ``Argument``\ s, ``Function``\ s, ``GlobalVariable``\ s,
+   ``GlobalAlias``\ es, or ``Module``\ s.
+#. Modify a ``MachineFunction`` other than the one currently being processed.
+#. Maintain state across invocations of :ref:`runOnMachineFunction
+   <writing-an-llvm-pass-runOnMachineFunction>` (including global data).
+
+.. _writing-an-llvm-pass-runOnMachineFunction:
+
+The ``runOnMachineFunction(MachineFunction &MF)`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual bool runOnMachineFunction(MachineFunction &MF) = 0;
+
+``runOnMachineFunction`` can be considered the main entry point of a
+``MachineFunctionPass``; that is, you should override this method to do the
+work of your ``MachineFunctionPass``.
+
+The ``runOnMachineFunction`` method is called on every ``MachineFunction`` in a
+``Module``, so that the ``MachineFunctionPass`` may perform optimizations on
+the machine-dependent representation of the function.  If you want to get at
+the LLVM ``Function`` for the ``MachineFunction`` you're working on, use
+``MachineFunction``'s ``getFunction()`` accessor method --- but remember, you
+may not modify the LLVM ``Function`` or its contents from a
+``MachineFunctionPass``.
+
+.. _writing-an-llvm-pass-registration:
+
+Pass registration
+-----------------
+
+In the :ref:`Hello World <writing-an-llvm-pass-basiccode>` example pass we
+illustrated how pass registration works, and discussed some of the reasons that
+it is used and what it does.  Here we discuss how and why passes are
+registered.
+
+As we saw above, passes are registered with the ``RegisterPass`` template.  The
+template parameter is the name of the pass that is to be used on the command
+line to specify that the pass should be added to a program (for example, with
+:program:`opt` or :program:`bugpoint`).  The first argument is the name of the
+pass, which is to be used for the :option:`-help` output of programs, as well
+as for debug output generated by the :option:`--debug-pass` option.
+
+If you want your pass to be easily dumpable, you should implement the virtual
+print method:
+
+The ``print`` method
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual void print(llvm::raw_ostream &O, const Module *M) const;
+
+The ``print`` method must be implemented by "analyses" in order to print a
+human readable version of the analysis results.  This is useful for debugging
+an analysis itself, as well as for other people to figure out how an analysis
+works.  Use the opt ``-analyze`` argument to invoke this method.
+
+The ``llvm::raw_ostream`` parameter specifies the stream to write the results
+on, and the ``Module`` parameter gives a pointer to the top level module of the
+program that has been analyzed.  Note however that this pointer may be ``NULL``
+in certain circumstances (such as calling the ``Pass::dump()`` from a
+debugger), so it should only be used to enhance debug output, it should not be
+depended on.
+
+.. _writing-an-llvm-pass-interaction:
+
+Specifying interactions between passes
+--------------------------------------
+
+One of the main responsibilities of the ``PassManager`` is to make sure that
+passes interact with each other correctly.  Because ``PassManager`` tries to
+:ref:`optimize the execution of passes <writing-an-llvm-pass-passmanager>` it
+must know how the passes interact with each other and what dependencies exist
+between the various passes.  To track this, each pass can declare the set of
+passes that are required to be executed before the current pass, and the passes
+which are invalidated by the current pass.
+
+Typically this functionality is used to require that analysis results are
+computed before your pass is run.  Running arbitrary transformation passes can
+invalidate the computed analysis results, which is what the invalidation set
+specifies.  If a pass does not implement the :ref:`getAnalysisUsage
+<writing-an-llvm-pass-getAnalysisUsage>` method, it defaults to not having any
+prerequisite passes, and invalidating **all** other passes.
+
+.. _writing-an-llvm-pass-getAnalysisUsage:
+
+The ``getAnalysisUsage`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual void getAnalysisUsage(AnalysisUsage &Info) const;
+
+By implementing the ``getAnalysisUsage`` method, the required and invalidated
+sets may be specified for your transformation.  The implementation should fill
+in the `AnalysisUsage
+<http://llvm.org/doxygen/classllvm_1_1AnalysisUsage.html>`_ object with
+information about which passes are required and not invalidated.  To do this, a
+pass may call any of the following methods on the ``AnalysisUsage`` object:
+
+The ``AnalysisUsage::addRequired<>`` and ``AnalysisUsage::addRequiredTransitive<>`` methods
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If your pass requires a previous pass to be executed (an analysis for example),
+it can use one of these methods to arrange for it to be run before your pass.
+LLVM has many different types of analyses and passes that can be required,
+spanning the range from ``DominatorSet`` to ``BreakCriticalEdges``.  Requiring
+``BreakCriticalEdges``, for example, guarantees that there will be no critical
+edges in the CFG when your pass has been run.
+
+Some analyses chain to other analyses to do their job.  For example, an
+`AliasAnalysis <AliasAnalysis>` implementation is required to :ref:`chain
+<aliasanalysis-chaining>` to other alias analysis passes.  In cases where
+analyses chain, the ``addRequiredTransitive`` method should be used instead of
+the ``addRequired`` method.  This informs the ``PassManager`` that the
+transitively required pass should be alive as long as the requiring pass is.
+
+The ``AnalysisUsage::addPreserved<>`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+One of the jobs of the ``PassManager`` is to optimize how and when analyses are
+run.  In particular, it attempts to avoid recomputing data unless it needs to.
+For this reason, passes are allowed to declare that they preserve (i.e., they
+don't invalidate) an existing analysis if it's available.  For example, a
+simple constant folding pass would not modify the CFG, so it can't possibly
+affect the results of dominator analysis.  By default, all passes are assumed
+to invalidate all others.
+
+The ``AnalysisUsage`` class provides several methods which are useful in
+certain circumstances that are related to ``addPreserved``.  In particular, the
+``setPreservesAll`` method can be called to indicate that the pass does not
+modify the LLVM program at all (which is true for analyses), and the
+``setPreservesCFG`` method can be used by transformations that change
+instructions in the program but do not modify the CFG or terminator
+instructions (note that this property is implicitly set for
+:ref:`BasicBlockPass <writing-an-llvm-pass-BasicBlockPass>`\ es).
+
+``addPreserved`` is particularly useful for transformations like
+``BreakCriticalEdges``.  This pass knows how to update a small set of loop and
+dominator related analyses if they exist, so it can preserve them, despite the
+fact that it hacks on the CFG.
+
+Example implementations of ``getAnalysisUsage``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  // This example modifies the program, but does not modify the CFG
+  void LICM::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<LoopInfo>();
+  }
+
+.. _writing-an-llvm-pass-getAnalysis:
+
+The ``getAnalysis<>`` and ``getAnalysisIfAvailable<>`` methods
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``Pass::getAnalysis<>`` method is automatically inherited by your class,
+providing you with access to the passes that you declared that you required
+with the :ref:`getAnalysisUsage <writing-an-llvm-pass-getAnalysisUsage>`
+method.  It takes a single template argument that specifies which pass class
+you want, and returns a reference to that pass.  For example:
+
+.. code-block:: c++
+
+  bool LICM::runOnFunction(Function &F) {
+    LoopInfo &LI = getAnalysis<LoopInfo>();
+    //...
+  }
+
+This method call returns a reference to the pass desired.  You may get a
+runtime assertion failure if you attempt to get an analysis that you did not
+declare as required in your :ref:`getAnalysisUsage
+<writing-an-llvm-pass-getAnalysisUsage>` implementation.  This method can be
+called by your ``run*`` method implementation, or by any other local method
+invoked by your ``run*`` method.
+
+A module level pass can use function level analysis info using this interface.
+For example:
+
+.. code-block:: c++
+
+  bool ModuleLevelPass::runOnModule(Module &M) {
+    //...
+    DominatorTree &DT = getAnalysis<DominatorTree>(Func);
+    //...
+  }
+
+In above example, ``runOnFunction`` for ``DominatorTree`` is called by pass
+manager before returning a reference to the desired pass.
+
+If your pass is capable of updating analyses if they exist (e.g.,
+``BreakCriticalEdges``, as described above), you can use the
+``getAnalysisIfAvailable`` method, which returns a pointer to the analysis if
+it is active.  For example:
+
+.. code-block:: c++
+
+  if (DominatorSet *DS = getAnalysisIfAvailable<DominatorSet>()) {
+    // A DominatorSet is active.  This code will update it.
+  }
+
+Implementing Analysis Groups
+----------------------------
+
+Now that we understand the basics of how passes are defined, how they are used,
+and how they are required from other passes, it's time to get a little bit
+fancier.  All of the pass relationships that we have seen so far are very
+simple: one pass depends on one other specific pass to be run before it can
+run.  For many applications, this is great, for others, more flexibility is
+required.
+
+In particular, some analyses are defined such that there is a single simple
+interface to the analysis results, but multiple ways of calculating them.
+Consider alias analysis for example.  The most trivial alias analysis returns
+"may alias" for any alias query.  The most sophisticated analysis a
+flow-sensitive, context-sensitive interprocedural analysis that can take a
+significant amount of time to execute (and obviously, there is a lot of room
+between these two extremes for other implementations).  To cleanly support
+situations like this, the LLVM Pass Infrastructure supports the notion of
+Analysis Groups.
+
+Analysis Group Concepts
+^^^^^^^^^^^^^^^^^^^^^^^
+
+An Analysis Group is a single simple interface that may be implemented by
+multiple different passes.  Analysis Groups can be given human readable names
+just like passes, but unlike passes, they need not derive from the ``Pass``
+class.  An analysis group may have one or more implementations, one of which is
+the "default" implementation.
+
+Analysis groups are used by client passes just like other passes are: the
+``AnalysisUsage::addRequired()`` and ``Pass::getAnalysis()`` methods.  In order
+to resolve this requirement, the :ref:`PassManager
+<writing-an-llvm-pass-passmanager>` scans the available passes to see if any
+implementations of the analysis group are available.  If none is available, the
+default implementation is created for the pass to use.  All standard rules for
+:ref:`interaction between passes <writing-an-llvm-pass-interaction>` still
+apply.
+
+Although :ref:`Pass Registration <writing-an-llvm-pass-registration>` is
+optional for normal passes, all analysis group implementations must be
+registered, and must use the :ref:`INITIALIZE_AG_PASS
+<writing-an-llvm-pass-RegisterAnalysisGroup>` template to join the
+implementation pool.  Also, a default implementation of the interface **must**
+be registered with :ref:`RegisterAnalysisGroup
+<writing-an-llvm-pass-RegisterAnalysisGroup>`.
+
+As a concrete example of an Analysis Group in action, consider the
+`AliasAnalysis <http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`_
+analysis group.  The default implementation of the alias analysis interface
+(the `basicaa <http://llvm.org/doxygen/structBasicAliasAnalysis.html>`_ pass)
+just does a few simple checks that don't require significant analysis to
+compute (such as: two different globals can never alias each other, etc).
+Passes that use the `AliasAnalysis
+<http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`_ interface (for
+example the `gcse <http://llvm.org/doxygen/structGCSE.html>`_ pass), do not
+care which implementation of alias analysis is actually provided, they just use
+the designated interface.
+
+From the user's perspective, commands work just like normal.  Issuing the
+command ``opt -gcse ...`` will cause the ``basicaa`` class to be instantiated
+and added to the pass sequence.  Issuing the command ``opt -somefancyaa -gcse
+...`` will cause the ``gcse`` pass to use the ``somefancyaa`` alias analysis
+(which doesn't actually exist, it's just a hypothetical example) instead.
+
+.. _writing-an-llvm-pass-RegisterAnalysisGroup:
+
+Using ``RegisterAnalysisGroup``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``RegisterAnalysisGroup`` template is used to register the analysis group
+itself, while the ``INITIALIZE_AG_PASS`` is used to add pass implementations to
+the analysis group.  First, an analysis group should be registered, with a
+human readable name provided for it.  Unlike registration of passes, there is
+no command line argument to be specified for the Analysis Group Interface
+itself, because it is "abstract":
+
+.. code-block:: c++
+
+  static RegisterAnalysisGroup<AliasAnalysis> A("Alias Analysis");
+
+Once the analysis is registered, passes can declare that they are valid
+implementations of the interface by using the following code:
+
+.. code-block:: c++
+
+  namespace {
+    // Declare that we implement the AliasAnalysis interface
+    INITIALIZE_AG_PASS(FancyAA, AliasAnalysis , "somefancyaa",
+        "A more complex alias analysis implementation",
+        false,  // Is CFG Only?
+        true,   // Is Analysis?
+        false); // Is default Analysis Group implementation?
+  }
+
+This just shows a class ``FancyAA`` that uses the ``INITIALIZE_AG_PASS`` macro
+both to register and to "join" the `AliasAnalysis
+<http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`_ analysis group.
+Every implementation of an analysis group should join using this macro.
+
+.. code-block:: c++
+
+  namespace {
+    // Declare that we implement the AliasAnalysis interface
+    INITIALIZE_AG_PASS(BasicAA, AliasAnalysis, "basicaa",
+        "Basic Alias Analysis (default AA impl)",
+        false, // Is CFG Only?
+        true,  // Is Analysis?
+        true); // Is default Analysis Group implementation?
+  }
+
+Here we show how the default implementation is specified (using the final
+argument to the ``INITIALIZE_AG_PASS`` template).  There must be exactly one
+default implementation available at all times for an Analysis Group to be used.
+Only default implementation can derive from ``ImmutablePass``.  Here we declare
+that the `BasicAliasAnalysis
+<http://llvm.org/doxygen/structBasicAliasAnalysis.html>`_ pass is the default
+implementation for the interface.
+
+Pass Statistics
+===============
+
+The `Statistic <http://llvm.org/doxygen/Statistic_8h-source.html>`_ class is
+designed to be an easy way to expose various success metrics from passes.
+These statistics are printed at the end of a run, when the :option:`-stats`
+command line option is enabled on the command line.  See the :ref:`Statistics
+section <Statistic>` in the Programmer's Manual for details.
+
+.. _writing-an-llvm-pass-passmanager:
+
+What PassManager does
+---------------------
+
+The `PassManager <http://llvm.org/doxygen/PassManager_8h-source.html>`_ `class
+<http://llvm.org/doxygen/classllvm_1_1PassManager.html>`_ takes a list of
+passes, ensures their :ref:`prerequisites <writing-an-llvm-pass-interaction>`
+are set up correctly, and then schedules passes to run efficiently.  All of the
+LLVM tools that run passes use the PassManager for execution of these passes.
+
+The PassManager does two main things to try to reduce the execution time of a
+series of passes:
+
+#. **Share analysis results.**  The ``PassManager`` attempts to avoid
+   recomputing analysis results as much as possible.  This means keeping track
+   of which analyses are available already, which analyses get invalidated, and
+   which analyses are needed to be run for a pass.  An important part of work
+   is that the ``PassManager`` tracks the exact lifetime of all analysis
+   results, allowing it to :ref:`free memory
+   <writing-an-llvm-pass-releaseMemory>` allocated to holding analysis results
+   as soon as they are no longer needed.
+
+#. **Pipeline the execution of passes on the program.**  The ``PassManager``
+   attempts to get better cache and memory usage behavior out of a series of
+   passes by pipelining the passes together.  This means that, given a series
+   of consecutive :ref:`FunctionPass <writing-an-llvm-pass-FunctionPass>`, it
+   will execute all of the :ref:`FunctionPass
+   <writing-an-llvm-pass-FunctionPass>` on the first function, then all of the
+   :ref:`FunctionPasses <writing-an-llvm-pass-FunctionPass>` on the second
+   function, etc... until the entire program has been run through the passes.
+
+   This improves the cache behavior of the compiler, because it is only
+   touching the LLVM program representation for a single function at a time,
+   instead of traversing the entire program.  It reduces the memory consumption
+   of compiler, because, for example, only one `DominatorSet
+   <http://llvm.org/doxygen/classllvm_1_1DominatorSet.html>`_ needs to be
+   calculated at a time.  This also makes it possible to implement some
+   :ref:`interesting enhancements <writing-an-llvm-pass-SMP>` in the future.
+
+The effectiveness of the ``PassManager`` is influenced directly by how much
+information it has about the behaviors of the passes it is scheduling.  For
+example, the "preserved" set is intentionally conservative in the face of an
+unimplemented :ref:`getAnalysisUsage <writing-an-llvm-pass-getAnalysisUsage>`
+method.  Not implementing when it should be implemented will have the effect of
+not allowing any analysis results to live across the execution of your pass.
+
+The ``PassManager`` class exposes a ``--debug-pass`` command line options that
+is useful for debugging pass execution, seeing how things work, and diagnosing
+when you should be preserving more analyses than you currently are.  (To get
+information about all of the variants of the ``--debug-pass`` option, just type
+"``opt -help-hidden``").
+
+By using the --debug-pass=Structure option, for example, we can see how our
+:ref:`Hello World <writing-an-llvm-pass-basiccode>` pass interacts with other
+passes.  Lets try it out with the gcse and licm passes:
+
+.. code-block:: console
+
+  $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null
+  Module Pass Manager
+    Function Pass Manager
+      Dominator Set Construction
+      Immediate Dominators Construction
+      Global Common Subexpression Elimination
+  --  Immediate Dominators Construction
+  --  Global Common Subexpression Elimination
+      Natural Loop Construction
+      Loop Invariant Code Motion
+  --  Natural Loop Construction
+  --  Loop Invariant Code Motion
+      Module Verifier
+  --  Dominator Set Construction
+  --  Module Verifier
+    Bitcode Writer
+  --Bitcode Writer
+
+This output shows us when passes are constructed and when the analysis results
+are known to be dead (prefixed with "``--``").  Here we see that GCSE uses
+dominator and immediate dominator information to do its job.  The LICM pass
+uses natural loop information, which uses dominator sets, but not immediate
+dominators.  Because immediate dominators are no longer useful after the GCSE
+pass, it is immediately destroyed.  The dominator sets are then reused to
+compute natural loop information, which is then used by the LICM pass.
+
+After the LICM pass, the module verifier runs (which is automatically added by
+the :program:`opt` tool), which uses the dominator set to check that the
+resultant LLVM code is well formed.  After it finishes, the dominator set
+information is destroyed, after being computed once, and shared by three
+passes.
+
+Lets see how this changes when we run the :ref:`Hello World
+<writing-an-llvm-pass-basiccode>` pass in between the two passes:
+
+.. code-block:: console
+
+  $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
+  Module Pass Manager
+    Function Pass Manager
+      Dominator Set Construction
+      Immediate Dominators Construction
+      Global Common Subexpression Elimination
+  --  Dominator Set Construction
+  --  Immediate Dominators Construction
+  --  Global Common Subexpression Elimination
+      Hello World Pass
+  --  Hello World Pass
+      Dominator Set Construction
+      Natural Loop Construction
+      Loop Invariant Code Motion
+  --  Natural Loop Construction
+  --  Loop Invariant Code Motion
+      Module Verifier
+  --  Dominator Set Construction
+  --  Module Verifier
+    Bitcode Writer
+  --Bitcode Writer
+  Hello: __main
+  Hello: puts
+  Hello: main
+
+Here we see that the :ref:`Hello World <writing-an-llvm-pass-basiccode>` pass
+has killed the Dominator Set pass, even though it doesn't modify the code at
+all!  To fix this, we need to add the following :ref:`getAnalysisUsage
+<writing-an-llvm-pass-getAnalysisUsage>` method to our pass:
+
+.. code-block:: c++
+
+  // We don't modify the program, so we preserve all analyses
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+Now when we run our pass, we get this output:
+
+.. code-block:: console
+
+  $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
+  Pass Arguments:  -gcse -hello -licm
+  Module Pass Manager
+    Function Pass Manager
+      Dominator Set Construction
+      Immediate Dominators Construction
+      Global Common Subexpression Elimination
+  --  Immediate Dominators Construction
+  --  Global Common Subexpression Elimination
+      Hello World Pass
+  --  Hello World Pass
+      Natural Loop Construction
+      Loop Invariant Code Motion
+  --  Loop Invariant Code Motion
+  --  Natural Loop Construction
+      Module Verifier
+  --  Dominator Set Construction
+  --  Module Verifier
+    Bitcode Writer
+  --Bitcode Writer
+  Hello: __main
+  Hello: puts
+  Hello: main
+
+Which shows that we don't accidentally invalidate dominator information
+anymore, and therefore do not have to compute it twice.
+
+.. _writing-an-llvm-pass-releaseMemory:
+
+The ``releaseMemory`` method
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: c++
+
+  virtual void releaseMemory();
+
+The ``PassManager`` automatically determines when to compute analysis results,
+and how long to keep them around for.  Because the lifetime of the pass object
+itself is effectively the entire duration of the compilation process, we need
+some way to free analysis results when they are no longer useful.  The
+``releaseMemory`` virtual method is the way to do this.
+
+If you are writing an analysis or any other pass that retains a significant
+amount of state (for use by another pass which "requires" your pass and uses
+the :ref:`getAnalysis <writing-an-llvm-pass-getAnalysis>` method) you should
+implement ``releaseMemory`` to, well, release the memory allocated to maintain
+this internal state.  This method is called after the ``run*`` method for the
+class, before the next call of ``run*`` in your pass.
+
+Registering dynamically loaded passes
+=====================================
+
+*Size matters* when constructing production quality tools using LLVM, both for
+the purposes of distribution, and for regulating the resident code size when
+running on the target system.  Therefore, it becomes desirable to selectively
+use some passes, while omitting others and maintain the flexibility to change
+configurations later on.  You want to be able to do all this, and, provide
+feedback to the user.  This is where pass registration comes into play.
+
+The fundamental mechanisms for pass registration are the
+``MachinePassRegistry`` class and subclasses of ``MachinePassRegistryNode``.
+
+An instance of ``MachinePassRegistry`` is used to maintain a list of
+``MachinePassRegistryNode`` objects.  This instance maintains the list and
+communicates additions and deletions to the command line interface.
+
+An instance of ``MachinePassRegistryNode`` subclass is used to maintain
+information provided about a particular pass.  This information includes the
+command line name, the command help string and the address of the function used
+to create an instance of the pass.  A global static constructor of one of these
+instances *registers* with a corresponding ``MachinePassRegistry``, the static
+destructor *unregisters*.  Thus a pass that is statically linked in the tool
+will be registered at start up.  A dynamically loaded pass will register on
+load and unregister at unload.
+
+Using existing registries
+-------------------------
+
+There are predefined registries to track instruction scheduling
+(``RegisterScheduler``) and register allocation (``RegisterRegAlloc``) machine
+passes.  Here we will describe how to *register* a register allocator machine
+pass.
+
+Implement your register allocator machine pass.  In your register allocator
+``.cpp`` file add the following include:
+
+.. code-block:: c++
+
+  #include "llvm/CodeGen/RegAllocRegistry.h"
+
+Also in your register allocator ``.cpp`` file, define a creator function in the
+form:
+
+.. code-block:: c++
+
+  FunctionPass *createMyRegisterAllocator() {
+    return new MyRegisterAllocator();
+  }
+
+Note that the signature of this function should match the type of
+``RegisterRegAlloc::FunctionPassCtor``.  In the same file add the "installing"
+declaration, in the form:
+
+.. code-block:: c++
+
+  static RegisterRegAlloc myRegAlloc("myregalloc",
+                                     "my register allocator help string",
+                                     createMyRegisterAllocator);
+
+Note the two spaces prior to the help string produces a tidy result on the
+:option:`-help` query.
+
+.. code-block:: console
+
+  $ llc -help
+    ...
+    -regalloc                    - Register allocator to use (default=linearscan)
+      =linearscan                -   linear scan register allocator
+      =local                     -   local register allocator
+      =simple                    -   simple register allocator
+      =myregalloc                -   my register allocator help string
+    ...
+
+And that's it.  The user is now free to use ``-regalloc=myregalloc`` as an
+option.  Registering instruction schedulers is similar except use the
+``RegisterScheduler`` class.  Note that the
+``RegisterScheduler::FunctionPassCtor`` is significantly different from
+``RegisterRegAlloc::FunctionPassCtor``.
+
+To force the load/linking of your register allocator into the
+:program:`llc`/:program:`lli` tools, add your creator function's global
+declaration to ``Passes.h`` and add a "pseudo" call line to
+``llvm/Codegen/LinkAllCodegenComponents.h``.
+
+Creating new registries
+-----------------------
+
+The easiest way to get started is to clone one of the existing registries; we
+recommend ``llvm/CodeGen/RegAllocRegistry.h``.  The key things to modify are
+the class name and the ``FunctionPassCtor`` type.
+
+Then you need to declare the registry.  Example: if your pass registry is
+``RegisterMyPasses`` then define:
+
+.. code-block:: c++
+
+  MachinePassRegistry RegisterMyPasses::Registry;
+
+And finally, declare the command line option for your passes.  Example:
+
+.. code-block:: c++
+
+  cl::opt<RegisterMyPasses::FunctionPassCtor, false,
+          RegisterPassParser<RegisterMyPasses> >
+  MyPassOpt("mypass",
+            cl::init(&createDefaultMyPass),
+            cl::desc("my pass option help"));
+
+Here the command option is "``mypass``", with ``createDefaultMyPass`` as the
+default creator.
+
+Using GDB with dynamically loaded passes
+----------------------------------------
+
+Unfortunately, using GDB with dynamically loaded passes is not as easy as it
+should be.  First of all, you can't set a breakpoint in a shared object that
+has not been loaded yet, and second of all there are problems with inlined
+functions in shared objects.  Here are some suggestions to debugging your pass
+with GDB.
+
+For sake of discussion, I'm going to assume that you are debugging a
+transformation invoked by :program:`opt`, although nothing described here
+depends on that.
+
+Setting a breakpoint in your pass
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+First thing you do is start gdb on the opt process:
+
+.. code-block:: console
+
+  $ gdb opt
+  GNU gdb 5.0
+  Copyright 2000 Free Software Foundation, Inc.
+  GDB is free software, covered by the GNU General Public License, and you are
+  welcome to change it and/or distribute copies of it under certain conditions.
+  Type "show copying" to see the conditions.
+  There is absolutely no warranty for GDB.  Type "show warranty" for details.
+  This GDB was configured as "sparc-sun-solaris2.6"...
+  (gdb)
+
+Note that :program:`opt` has a lot of debugging information in it, so it takes
+time to load.  Be patient.  Since we cannot set a breakpoint in our pass yet
+(the shared object isn't loaded until runtime), we must execute the process,
+and have it stop before it invokes our pass, but after it has loaded the shared
+object.  The most foolproof way of doing this is to set a breakpoint in
+``PassManager::run`` and then run the process with the arguments you want:
+
+.. code-block:: console
+
+  $ (gdb) break llvm::PassManager::run
+  Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
+  (gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
+  Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
+  Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
+  70      bool PassManager::run(Module &M) { return PM->run(M); }
+  (gdb)
+
+Once the :program:`opt` stops in the ``PassManager::run`` method you are now
+free to set breakpoints in your pass so that you can trace through execution or
+do other standard debugging stuff.
+
+Miscellaneous Problems
+^^^^^^^^^^^^^^^^^^^^^^
+
+Once you have the basics down, there are a couple of problems that GDB has,
+some with solutions, some without.
+
+* Inline functions have bogus stack information.  In general, GDB does a pretty
+  good job getting stack traces and stepping through inline functions.  When a
+  pass is dynamically loaded however, it somehow completely loses this
+  capability.  The only solution I know of is to de-inline a function (move it
+  from the body of a class to a ``.cpp`` file).
+
+* Restarting the program breaks breakpoints.  After following the information
+  above, you have succeeded in getting some breakpoints planted in your pass.
+  Nex thing you know, you restart the program (i.e., you type "``run``" again),
+  and you start getting errors about breakpoints being unsettable.  The only
+  way I have found to "fix" this problem is to delete the breakpoints that are
+  already set in your pass, run the program, and re-set the breakpoints once
+  execution stops in ``PassManager::run``.
+
+Hopefully these tips will help with common case debugging situations.  If you'd
+like to contribute some tips of your own, just contact `Chris
+<mailto:sabre@nondot.org>`_.
+
+Future extensions planned
+-------------------------
+
+Although the LLVM Pass Infrastructure is very capable as it stands, and does
+some nifty stuff, there are things we'd like to add in the future.  Here is
+where we are going:
+
+.. _writing-an-llvm-pass-SMP:
+
+Multithreaded LLVM
+^^^^^^^^^^^^^^^^^^
+
+Multiple CPU machines are becoming more common and compilation can never be
+fast enough: obviously we should allow for a multithreaded compiler.  Because
+of the semantics defined for passes above (specifically they cannot maintain
+state across invocations of their ``run*`` methods), a nice clean way to
+implement a multithreaded compiler would be for the ``PassManager`` class to
+create multiple instances of each pass object, and allow the separate instances
+to be hacking on different parts of the program at the same time.
+
+This implementation would prevent each of the passes from having to implement
+multithreaded constructs, requiring only the LLVM core to have locking in a few
+places (for global resources).  Although this is a simple extension, we simply
+haven't had time (or multiprocessor machines, thus a reason) to implement this.
+Despite that, we have kept the LLVM passes SMP ready, and you should too.
+
diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst
new file mode 100644
index 000000000000..ac50292f4a81
--- /dev/null
+++ b/docs/YamlIO.rst
@@ -0,0 +1,860 @@
+=====================
+YAML I/O
+=====================
+
+.. contents::
+   :local:
+
+Introduction to YAML
+====================
+
+YAML is a human readable data serialization language.  The full YAML language 
+spec can be read at `yaml.org 
+<http://www.yaml.org/spec/1.2/spec.html#Introduction>`_.  The simplest form of
+yaml is just "scalars", "mappings", and "sequences".  A scalar is any number
+or string.  The pound/hash symbol (#) begins a comment line.   A mapping is 
+a set of key-value pairs where the key ends with a colon.  For example:
+
+.. code-block:: yaml
+
+     # a mapping
+     name:      Tom
+     hat-size:  7
+     
+A sequence is a list of items where each item starts with a leading dash ('-'). 
+For example:
+
+.. code-block:: yaml
+
+     # a sequence
+     - x86
+     - x86_64
+     - PowerPC
+
+You can combine mappings and sequences by indenting.  For example a sequence
+of mappings in which one of the mapping values is itself a sequence:
+
+.. code-block:: yaml
+
+     # a sequence of mappings with one key's value being a sequence
+     - name:      Tom
+       cpus:
+        - x86
+        - x86_64
+     - name:      Bob
+       cpus:
+        - x86
+     - name:      Dan
+       cpus:
+        - PowerPC
+        - x86
+
+Sometime sequences are known to be short and the one entry per line is too
+verbose, so YAML offers an alternate syntax for sequences called a "Flow
+Sequence" in which you put comma separated sequence elements into square 
+brackets.  The above example could then be simplified to :
+
+
+.. code-block:: yaml
+
+     # a sequence of mappings with one key's value being a flow sequence
+     - name:      Tom
+       cpus:      [ x86, x86_64 ]
+     - name:      Bob
+       cpus:      [ x86 ]
+     - name:      Dan
+       cpus:      [ PowerPC, x86 ]
+
+
+Introduction to YAML I/O
+========================
+
+The use of indenting makes the YAML easy for a human to read and understand,
+but having a program read and write YAML involves a lot of tedious details.
+The YAML I/O library structures and simplifies reading and writing YAML 
+documents.
+
+YAML I/O assumes you have some "native" data structures which you want to be
+able to dump as YAML and recreate from YAML.  The first step is to try 
+writing example YAML for your data structures. You may find after looking at 
+possible YAML representations that a direct mapping of your data structures
+to YAML is not very readable.  Often the fields are not in the order that
+a human would find readable.  Or the same information is replicated in multiple
+locations, making it hard for a human to write such YAML correctly.  
+
+In relational database theory there is a design step called normalization in 
+which you reorganize fields and tables.  The same considerations need to 
+go into the design of your YAML encoding.  But, you may not want to change
+your existing native data structures.  Therefore, when writing out YAML
+there may be a normalization step, and when reading YAML there would be a
+corresponding denormalization step.  
+
+YAML I/O uses a non-invasive, traits based design.  YAML I/O defines some 
+abstract base templates.  You specialize those templates on your data types.
+For instance, if you have an enumerated type FooBar you could specialize 
+ScalarEnumerationTraits on that type and define the enumeration() method:
+
+.. code-block:: c++
+
+    using llvm::yaml::ScalarEnumerationTraits;
+    using llvm::yaml::IO;
+
+    template <>
+    struct ScalarEnumerationTraits<FooBar> {
+      static void enumeration(IO &io, FooBar &value) {
+      ...
+      }
+    };
+
+
+As with all YAML I/O template specializations, the ScalarEnumerationTraits is used for 
+both reading and writing YAML. That is, the mapping between in-memory enum
+values and the YAML string representation is only in place.
+This assures that the code for writing and parsing of YAML stays in sync.
+
+To specify a YAML mappings, you define a specialization on 
+llvm::yaml::MappingTraits.
+If your native data structure happens to be a struct that is already normalized,
+then the specialization is simple.  For example:
+
+.. code-block:: c++
+   
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+    
+    template <>
+    struct MappingTraits<Person> {
+      static void mapping(IO &io, Person &info) {
+        io.mapRequired("name",         info.name);
+        io.mapOptional("hat-size",     info.hatSize);
+      }
+    };
+
+
+A YAML sequence is automatically inferred if you data type has begin()/end()
+iterators and a push_back() method.  Therefore any of the STL containers
+(such as std::vector<>) will automatically translate to YAML sequences.
+
+Once you have defined specializations for your data types, you can 
+programmatically use YAML I/O to write a YAML document:
+
+.. code-block:: c++
+   
+    using llvm::yaml::Output;
+
+    Person tom;
+    tom.name = "Tom";
+    tom.hatSize = 8;
+    Person dan;
+    dan.name = "Dan";
+    dan.hatSize = 7;
+    std::vector<Person> persons;
+    persons.push_back(tom);
+    persons.push_back(dan);
+    
+    Output yout(llvm::outs());
+    yout << persons;
+   
+This would write the following:
+
+.. code-block:: yaml
+
+     - name:      Tom
+       hat-size:  8
+     - name:      Dan
+       hat-size:  7
+
+And you can also read such YAML documents with the following code:
+
+.. code-block:: c++
+
+    using llvm::yaml::Input;
+
+    typedef std::vector<Person> PersonList;
+    std::vector<PersonList> docs;
+    
+    Input yin(document.getBuffer());
+    yin >> docs;
+    
+    if ( yin.error() )
+      return;
+    
+    // Process read document
+    for ( PersonList &pl : docs ) {
+      for ( Person &person : pl ) {
+        cout << "name=" << person.name;
+      }
+    }
+  
+One other feature of YAML is the ability to define multiple documents in a 
+single file.  That is why reading YAML produces a vector of your document type.
+
+
+
+Error Handling
+==============
+
+When parsing a YAML document, if the input does not match your schema (as 
+expressed in your XxxTraits<> specializations).  YAML I/O 
+will print out an error message and your Input object's error() method will 
+return true. For instance the following document:
+
+.. code-block:: yaml
+
+     - name:      Tom
+       shoe-size: 12
+     - name:      Dan
+       hat-size:  7
+
+Has a key (shoe-size) that is not defined in the schema.  YAML I/O will 
+automatically generate this error:
+
+.. code-block:: yaml
+
+    YAML:2:2: error: unknown key 'shoe-size'
+      shoe-size:       12
+      ^~~~~~~~~
+
+Similar errors are produced for other input not conforming to the schema.
+
+
+Scalars
+=======
+
+YAML scalars are just strings (i.e. not a sequence or mapping).  The YAML I/O
+library provides support for translating between YAML scalars and specific
+C++ types.
+
+
+Built-in types
+--------------
+The following types have built-in support in YAML I/O:
+
+* bool
+* float
+* double
+* StringRef
+* int64_t
+* int32_t
+* int16_t
+* int8_t
+* uint64_t
+* uint32_t
+* uint16_t
+* uint8_t
+
+That is, you can use those types in fields of MappingTraits or as element type
+in sequence.  When reading, YAML I/O will validate that the string found
+is convertible to that type and error out if not.
+
+
+Unique types
+------------
+Given that YAML I/O is trait based, the selection of how to convert your data
+to YAML is based on the type of your data.  But in C++ type matching, typedefs
+do not generate unique type names.  That means if you have two typedefs of
+unsigned int, to YAML I/O both types look exactly like unsigned int.  To
+facilitate make unique type names, YAML I/O provides a macro which is used
+like a typedef on built-in types, but expands to create a class with conversion
+operators to and from the base type.  For example:
+
+.. code-block:: c++
+
+    LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyFooFlags)
+    LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyBarFlags)
+
+This generates two classes MyFooFlags and MyBarFlags which you can use in your
+native data structures instead of uint32_t. They are implicitly 
+converted to and from uint32_t.  The point of creating these unique types
+is that you can now specify traits on them to get different YAML conversions.
+
+Hex types
+---------
+An example use of a unique type is that YAML I/O provides fixed sized unsigned
+integers that are written with YAML I/O as hexadecimal instead of the decimal
+format used by the built-in integer types:
+
+* Hex64
+* Hex32
+* Hex16
+* Hex8
+
+You can use llvm::yaml::Hex32 instead of uint32_t and the only different will
+be that when YAML I/O writes out that type it will be formatted in hexadecimal.
+
+
+ScalarEnumerationTraits
+-----------------------
+YAML I/O supports translating between in-memory enumerations and a set of string
+values in YAML documents. This is done by specializing ScalarEnumerationTraits<>
+on your enumeration type and define a enumeration() method. 
+For instance, suppose you had an enumeration of CPUs and a struct with it as 
+a field:
+
+.. code-block:: c++
+
+    enum CPUs {
+      cpu_x86_64  = 5,
+      cpu_x86     = 7,
+      cpu_PowerPC = 8
+    };
+    
+    struct Info {
+      CPUs      cpu;
+      uint32_t  flags;
+    };
+    
+To support reading and writing of this enumeration, you can define a 
+ScalarEnumerationTraits specialization on CPUs, which can then be used 
+as a field type: 
+
+.. code-block:: c++
+
+    using llvm::yaml::ScalarEnumerationTraits;
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+
+    template <>
+    struct ScalarEnumerationTraits<CPUs> {
+      static void enumeration(IO &io, CPUs &value) {
+        io.enumCase(value, "x86_64",  cpu_x86_64);
+        io.enumCase(value, "x86",     cpu_x86);
+        io.enumCase(value, "PowerPC", cpu_PowerPC);
+      }
+    };
+ 
+    template <>
+    struct MappingTraits<Info> {
+      static void mapping(IO &io, Info &info) {
+        io.mapRequired("cpu",       info.cpu);
+        io.mapOptional("flags",     info.flags, 0);
+      }
+    };
+
+When reading YAML, if the string found does not match any of the the strings
+specified by enumCase() methods, an error is automatically generated.
+When writing YAML, if the value being written does not match any of the values
+specified by the enumCase() methods, a runtime assertion is triggered.
+  
+
+BitValue
+--------
+Another common data structure in C++ is a field where each bit has a unique
+meaning.  This is often used in a "flags" field.  YAML I/O has support for
+converting such fields to a flow sequence.   For instance suppose you 
+had the following bit flags defined:
+
+.. code-block:: c++
+
+    enum {
+      flagsPointy = 1
+      flagsHollow = 2
+      flagsFlat   = 4
+      flagsRound  = 8
+    };
+
+    LLVM_YAML_UNIQUE_TYPE(MyFlags, uint32_t)
+    
+To support reading and writing of MyFlags, you specialize ScalarBitSetTraits<>
+on MyFlags and provide the bit values and their names.   
+
+.. code-block:: c++
+
+    using llvm::yaml::ScalarBitSetTraits;
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+
+    template <>
+    struct ScalarBitSetTraits<MyFlags> {
+      static void bitset(IO &io, MyFlags &value) {
+        io.bitSetCase(value, "hollow",  flagHollow);
+        io.bitSetCase(value, "flat",    flagFlat);
+        io.bitSetCase(value, "round",   flagRound);
+        io.bitSetCase(value, "pointy",  flagPointy);
+      }
+    };
+    
+    struct Info {
+      StringRef   name;
+      MyFlags     flags;
+    };
+    
+    template <>
+    struct MappingTraits<Info> {
+      static void mapping(IO &io, Info& info) {
+        io.mapRequired("name",  info.name);
+        io.mapRequired("flags", info.flags);
+       }
+    };
+
+With the above, YAML I/O (when writing) will test mask each value in the 
+bitset trait against the flags field, and each that matches will
+cause the corresponding string to be added to the flow sequence.  The opposite
+is done when reading and any unknown string values will result in a error. With 
+the above schema, a same valid YAML document is:
+
+.. code-block:: yaml
+
+    name:    Tom
+    flags:   [ pointy, flat ]
+
+
+Custom Scalar
+-------------
+Sometimes for readability a scalar needs to be formatted in a custom way. For
+instance your internal data structure may use a integer for time (seconds since
+some epoch), but in YAML it would be much nicer to express that integer in 
+some time format (e.g. 4-May-2012 10:30pm).  YAML I/O has a way to support  
+custom formatting and parsing of scalar types by specializing ScalarTraits<> on
+your data type.  When writing, YAML I/O will provide the native type and
+your specialization must create a temporary llvm::StringRef.  When reading,
+YAML I/O will provide a llvm::StringRef of scalar and your specialization
+must convert that to your native data type.  An outline of a custom scalar type
+looks like:
+
+.. code-block:: c++
+
+    using llvm::yaml::ScalarTraits;
+    using llvm::yaml::IO;
+
+    template <>
+    struct ScalarTraits<MyCustomType> {
+      static void output(const T &value, llvm::raw_ostream &out) {
+        out << value;  // do custom formatting here
+      }
+      static StringRef input(StringRef scalar, T &value) {
+        // do custom parsing here.  Return the empty string on success,
+        // or an error message on failure.
+        return StringRef(); 
+      }
+    };
+    
+
+Mappings
+========
+
+To be translated to or from a YAML mapping for your type T you must specialize  
+llvm::yaml::MappingTraits on T and implement the "void mapping(IO &io, T&)" 
+method. If your native data structures use pointers to a class everywhere,
+you can specialize on the class pointer.  Examples:
+
+.. code-block:: c++
+   
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+    
+    // Example of struct Foo which is used by value
+    template <>
+    struct MappingTraits<Foo> {
+      static void mapping(IO &io, Foo &foo) {
+        io.mapOptional("size",      foo.size);
+      ...
+      }
+    };
+
+    // Example of struct Bar which is natively always a pointer
+    template <>
+    struct MappingTraits<Bar*> {
+      static void mapping(IO &io, Bar *&bar) {
+        io.mapOptional("size",    bar->size);
+      ...
+      }
+    };
+
+
+No Normalization
+----------------
+
+The mapping() method is responsible, if needed, for normalizing and 
+denormalizing. In a simple case where the native data structure requires no 
+normalization, the mapping method just uses mapOptional() or mapRequired() to 
+bind the struct's fields to YAML key names.  For example:
+
+.. code-block:: c++
+   
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+    
+    template <>
+    struct MappingTraits<Person> {
+      static void mapping(IO &io, Person &info) {
+        io.mapRequired("name",         info.name);
+        io.mapOptional("hat-size",     info.hatSize);
+      }
+    };
+
+
+Normalization
+----------------
+
+When [de]normalization is required, the mapping() method needs a way to access
+normalized values as fields. To help with this, there is
+a template MappingNormalization<> which you can then use to automatically
+do the normalization and denormalization.  The template is used to create
+a local variable in your mapping() method which contains the normalized keys.
+
+Suppose you have native data type 
+Polar which specifies a position in polar coordinates (distance, angle):
+
+.. code-block:: c++
+   
+    struct Polar {
+      float distance;
+      float angle;
+    };
+
+but you've decided the normalized YAML for should be in x,y coordinates. That 
+is, you want the yaml to look like:
+
+.. code-block:: yaml
+
+    x:   10.3
+    y:   -4.7
+
+You can support this by defining a MappingTraits that normalizes the polar
+coordinates to x,y coordinates when writing YAML and denormalizes x,y 
+coordinates into polar when reading YAML.  
+
+.. code-block:: c++
+   
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+        
+    template <>
+    struct MappingTraits<Polar> {
+      
+      class NormalizedPolar {
+      public:
+        NormalizedPolar(IO &io)
+          : x(0.0), y(0.0) {
+        }
+        NormalizedPolar(IO &, Polar &polar)
+          : x(polar.distance * cos(polar.angle)), 
+            y(polar.distance * sin(polar.angle)) {
+        }
+        Polar denormalize(IO &) {
+          return Polar(sqrt(x*x+y*y, arctan(x,y));
+        }
+         
+        float        x;
+        float        y;
+      };
+
+      static void mapping(IO &io, Polar &polar) {
+        MappingNormalization<NormalizedPolar, Polar> keys(io, polar);
+        
+        io.mapRequired("x",    keys->x);
+        io.mapRequired("y",    keys->y);
+      }
+    };
+
+When writing YAML, the local variable "keys" will be a stack allocated 
+instance of NormalizedPolar, constructed from the suppled polar object which
+initializes it x and y fields.  The mapRequired() methods then write out the x
+and y values as key/value pairs.  
+
+When reading YAML, the local variable "keys" will be a stack allocated instance
+of NormalizedPolar, constructed by the empty constructor.  The mapRequired 
+methods will find the matching key in the YAML document and fill in the x and y 
+fields of the NormalizedPolar object keys. At the end of the mapping() method
+when the local keys variable goes out of scope, the denormalize() method will
+automatically be called to convert the read values back to polar coordinates,
+and then assigned back to the second parameter to mapping().
+
+In some cases, the normalized class may be a subclass of the native type and
+could be returned by the denormalize() method, except that the temporary
+normalized instance is stack allocated.  In these cases, the utility template
+MappingNormalizationHeap<> can be used instead.  It just like 
+MappingNormalization<> except that it heap allocates the normalized object
+when reading YAML.  It never destroys the normalized object.  The denormalize()
+method can this return "this".
+
+
+Default values
+--------------
+Within a mapping() method, calls to io.mapRequired() mean that that key is 
+required to exist when parsing YAML documents, otherwise YAML I/O will issue an 
+error.
+
+On the other hand, keys registered with io.mapOptional() are allowed to not 
+exist in the YAML document being read.  So what value is put in the field 
+for those optional keys? 
+There are two steps to how those optional fields are filled in. First, the  
+second parameter to the mapping() method is a reference to a native class.  That
+native class must have a default constructor.  Whatever value the default
+constructor initially sets for an optional field will be that field's value.
+Second, the mapOptional() method has an optional third parameter.  If provided
+it is the value that mapOptional() should set that field to if the YAML document  
+does not have that key.  
+
+There is one important difference between those two ways (default constructor
+and third parameter to mapOptional). When YAML I/O generates a YAML document, 
+if the mapOptional() third parameter is used, if the actual value being written
+is the same as (using ==) the default value, then that key/value is not written.
+
+
+Order of Keys
+--------------
+
+When writing out a YAML document, the keys are written in the order that the
+calls to mapRequired()/mapOptional() are made in the mapping() method. This
+gives you a chance to write the fields in an order that a human reader of
+the YAML document would find natural.  This may be different that the order
+of the fields in the native class.
+
+When reading in a YAML document, the keys in the document can be in any order, 
+but they are processed in the order that the calls to mapRequired()/mapOptional() 
+are made in the mapping() method.  That enables some interesting 
+functionality.  For instance, if the first field bound is the cpu and the second
+field bound is flags, and the flags are cpu specific, you can programmatically
+switch how the flags are converted to and from YAML based on the cpu.  
+This works for both reading and writing. For example:
+
+.. code-block:: c++
+
+    using llvm::yaml::MappingTraits;
+    using llvm::yaml::IO;
+    
+    struct Info {
+      CPUs        cpu;
+      uint32_t    flags;
+    };
+
+    template <>
+    struct MappingTraits<Info> {
+      static void mapping(IO &io, Info &info) {
+        io.mapRequired("cpu",       info.cpu);
+        // flags must come after cpu for this to work when reading yaml
+        if ( info.cpu == cpu_x86_64 )
+          io.mapRequired("flags",  *(My86_64Flags*)info.flags);
+        else
+          io.mapRequired("flags",  *(My86Flags*)info.flags);
+     }
+    };
+
+
+Sequence
+========
+
+To be translated to or from a YAML sequence for your type T you must specialize
+llvm::yaml::SequenceTraits on T and implement two methods:
+``size_t size(IO &io, T&)`` and
+``T::value_type& element(IO &io, T&, size_t indx)``.  For example:
+
+.. code-block:: c++
+
+  template <>
+  struct SequenceTraits<MySeq> {
+    static size_t size(IO &io, MySeq &list) { ... }
+    static MySeqEl element(IO &io, MySeq &list, size_t index) { ... }
+  };
+
+The size() method returns how many elements are currently in your sequence.
+The element() method returns a reference to the i'th element in the sequence. 
+When parsing YAML, the element() method may be called with an index one bigger
+than the current size.  Your element() method should allocate space for one
+more element (using default constructor if element is a C++ object) and returns
+a reference to that new allocated space.  
+
+
+Flow Sequence
+-------------
+A YAML "flow sequence" is a sequence that when written to YAML it uses the 
+inline notation (e.g [ foo, bar ] ).  To specify that a sequence type should
+be written in YAML as a flow sequence, your SequenceTraits specialization should
+add "static const bool flow = true;".  For instance:
+
+.. code-block:: c++
+
+  template <>
+  struct SequenceTraits<MyList> {
+    static size_t size(IO &io, MyList &list) { ... }
+    static MyListEl element(IO &io, MyList &list, size_t index) { ... }
+    
+    // The existence of this member causes YAML I/O to use a flow sequence
+    static const bool flow = true;
+  };
+
+With the above, if you used MyList as the data type in your native data 
+structures, then then when converted to YAML, a flow sequence of integers 
+will be used (e.g. [ 10, -3, 4 ]).
+
+
+Utility Macros
+--------------
+Since a common source of sequences is std::vector<>, YAML I/O provides macros:
+LLVM_YAML_IS_SEQUENCE_VECTOR() and LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR() which
+can be used to easily specify SequenceTraits<> on a std::vector type.  YAML 
+I/O does not partial specialize SequenceTraits on std::vector<> because that
+would force all vectors to be sequences.  An example use of the macros:
+
+.. code-block:: c++
+
+  std::vector<MyType1>;
+  std::vector<MyType2>;
+  LLVM_YAML_IS_SEQUENCE_VECTOR(MyType1)
+  LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(MyType2)
+
+
+
+Document List
+=============
+
+YAML allows you to define multiple "documents" in a single YAML file.  Each 
+new document starts with a left aligned "---" token.  The end of all documents
+is denoted with a left aligned "..." token.  Many users of YAML will never
+have need for multiple documents.  The top level node in their YAML schema
+will be a mapping or sequence. For those cases, the following is not needed.
+But for cases where you do want multiple documents, you can specify a
+trait for you document list type.  The trait has the same methods as 
+SequenceTraits but is named DocumentListTraits.  For example:
+
+.. code-block:: c++
+
+  template <>
+  struct DocumentListTraits<MyDocList> {
+    static size_t size(IO &io, MyDocList &list) { ... }
+    static MyDocType element(IO &io, MyDocList &list, size_t index) { ... }
+  };
+
+
+User Context Data
+=================
+When an llvm::yaml::Input or llvm::yaml::Output object is created their 
+constructors take an optional "context" parameter.  This is a pointer to 
+whatever state information you might need.  
+
+For instance, in a previous example we showed how the conversion type for a 
+flags field could be determined at runtime based on the value of another field 
+in the mapping. But what if an inner mapping needs to know some field value
+of an outer mapping?  That is where the "context" parameter comes in. You
+can set values in the context in the outer map's mapping() method and
+retrieve those values in the inner map's mapping() method.
+
+The context value is just a void*.  All your traits which use the context 
+and operate on your native data types, need to agree what the context value
+actually is.  It could be a pointer to an object or struct which your various
+traits use to shared context sensitive information.
+
+
+Output
+======
+
+The llvm::yaml::Output class is used to generate a YAML document from your 
+in-memory data structures, using traits defined on your data types.  
+To instantiate an Output object you need an llvm::raw_ostream, and optionally 
+a context pointer:
+
+.. code-block:: c++
+
+      class Output : public IO {
+      public:
+        Output(llvm::raw_ostream &, void *context=NULL);
+    
+Once you have an Output object, you can use the C++ stream operator on it
+to write your native data as YAML. One thing to recall is that a YAML file
+can contain multiple "documents".  If the top level data structure you are
+streaming as YAML is a mapping, scalar, or sequence, then Output assumes you
+are generating one document and wraps the mapping output 
+with  "``---``" and trailing "``...``".  
+
+.. code-block:: c++
+   
+    using llvm::yaml::Output;
+
+    void dumpMyMapDoc(const MyMapType &info) {
+      Output yout(llvm::outs());
+      yout << info;
+    }
+
+The above could produce output like:
+
+.. code-block:: yaml
+
+     ---
+     name:      Tom
+     hat-size:  7
+     ...
+
+On the other hand, if the top level data structure you are streaming as YAML
+has a DocumentListTraits specialization, then Output walks through each element
+of your DocumentList and generates a "---" before the start of each element
+and ends with a "...".
+
+.. code-block:: c++
+   
+    using llvm::yaml::Output;
+
+    void dumpMyMapDoc(const MyDocListType &docList) {
+      Output yout(llvm::outs());
+      yout << docList;
+    }
+
+The above could produce output like:
+
+.. code-block:: yaml
+
+     ---
+     name:      Tom
+     hat-size:  7
+     ---
+     name:      Tom
+     shoe-size:  11
+     ...
+
+Input
+=====
+
+The llvm::yaml::Input class is used to parse YAML document(s) into your native
+data structures. To instantiate an Input
+object you need a StringRef to the entire YAML file, and optionally a context 
+pointer:
+
+.. code-block:: c++
+
+      class Input : public IO {
+      public:
+        Input(StringRef inputContent, void *context=NULL);
+    
+Once you have an Input object, you can use the C++ stream operator to read
+the document(s).  If you expect there might be multiple YAML documents in
+one file, you'll need to specialize DocumentListTraits on a list of your
+document type and stream in that document list type.  Otherwise you can
+just stream in the document type.  Also, you can check if there was 
+any syntax errors in the YAML be calling the error() method on the Input
+object.  For example:
+
+.. code-block:: c++
+   
+     // Reading a single document
+     using llvm::yaml::Input;
+
+     Input yin(mb.getBuffer());
+     
+     // Parse the YAML file
+     MyDocType theDoc;
+     yin >> theDoc;
+
+     // Check for error
+     if ( yin.error() )
+       return;
+  
+      
+.. code-block:: c++
+   
+     // Reading multiple documents in one file
+     using llvm::yaml::Input;
+
+     LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(std::vector<MyDocType>)
+     
+     Input yin(mb.getBuffer());
+     
+     // Parse the YAML file
+     std::vector<MyDocType> theDocList;
+     yin >> theDocList;
+
+     // Check for error
+     if ( yin.error() )
+       return;
+
+
diff --git a/docs/conf.py b/docs/conf.py
index a1e9b5f6e286..0ac3b7836b9e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -40,16 +40,16 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'LLVM'
-copyright = u'2012, LLVM Project'
+copyright = u'2003-2013, LLVM Project'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '3.2'
+version = '3.3'
 # The full version, including alpha/beta/rc tags.
-release = '3.2'
+release = '3.3'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -95,7 +95,7 @@ html_theme = 'llvm-theme'
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+html_theme_options = { "nosidebar": True }
 
 # Add any paths that contain custom themes here, relative to this directory.
 html_theme_path = ["_themes"]
diff --git a/docs/design_and_overview.rst b/docs/design_and_overview.rst
deleted file mode 100644
index ea684155e00f..000000000000
--- a/docs/design_and_overview.rst
+++ /dev/null
@@ -1,36 +0,0 @@
-.. _design_and_overview:
-
-LLVM Design & Overview
-======================
-
-.. toctree::
-   :hidden:
-
-   GetElementPtr
-
-* `LLVM Language Reference Manual <LangRef.html>`_
-
-  Defines the LLVM intermediate representation.
-
-* `Introduction to the LLVM Compiler <http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html>`_
-
-  Presentation providing a users introduction to LLVM.
-
-* `Intro to LLVM <http://www.aosabook.org/en/llvm.html>`_
-
-  Book chapter providing a compiler hacker's introduction to LLVM.
-
-* `LLVM: A Compilation Framework forLifelong Program Analysis & Transformation
-  <http://llvm.org/pubs/2004-01-30-CGO-LLVM.html>`_
-
-  Design overview.
-
-* `LLVM: An Infrastructure for Multi-Stage Optimization
-  <http://llvm.org/pubs/2002-12-LattnerMSThesis.html>`_
-
-  More details (quite old now).
-
-* :ref:`gep`
-
-  Answers to some very frequent questions about LLVM's most frequently
-  misunderstood instruction.
diff --git a/docs/development_process.rst b/docs/development_process.rst
deleted file mode 100644
index 4fc20b34129d..000000000000
--- a/docs/development_process.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-.. _development_process:
-
-Development Process Documentation
-=================================
-
-.. toctree::
-   :hidden:
-
-   MakefileGuide
-   Projects
-
-* :ref:`projects`
-
-  How-to guide and templates for new projects that *use* the LLVM
-  infrastructure.  The templates (directory organization, Makefiles, and test
-  tree) allow the project code to be located outside (or inside) the ``llvm/``
-  tree, while using LLVM header files and libraries.
-
-* `LLVMBuild Documentation <LLVMBuild.html>`_
-
-  Describes the LLVMBuild organization and files used by LLVM to specify
-  component descriptions.
-
-* :ref:`makefile_guide`
-
-  Describes how the LLVM makefiles work and how to use them.
-
-* `How To Release LLVM To The Public <HowToReleaseLLVM.html>`_
-
-  This is a guide to preparing LLVM releases. Most developers can ignore it.
diff --git a/docs/doxygen.footer b/docs/doxygen.footer
index c492e7df6cba..95d5434f6712 100644
--- a/docs/doxygen.footer
+++ b/docs/doxygen.footer
@@ -3,7 +3,7 @@
 Generated on $datetime for <a href="http://llvm.org/">$projectname</a> by
 <a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
 align="middle" border="0"/>$doxygenversion</a><br>
-Copyright &copy; 2003-2012 University of Illinois at Urbana-Champaign.
+Copyright &copy; 2003-2013 University of Illinois at Urbana-Champaign.
 All Rights Reserved.</p>
 
 <hr>
diff --git a/docs/gcc-loops.png b/docs/gcc-loops.png
new file mode 100644
index 000000000000..8923a3115325
--- /dev/null
+++ b/docs/gcc-loops.png
diff --git a/docs/index.rst b/docs/index.rst
index d406b5257440..c3bb8089da30 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,5 +1,3 @@
-.. _contents:
-
 Overview
 ========
 
@@ -15,54 +13,396 @@ research projects.
 Similarly, documentation is broken down into several high-level groupings
 targeted at different audiences:
 
-* **Design & Overview**
+LLVM Design & Overview
+======================
+
+Several introductory papers and presentations.
+
+.. toctree::
+   :hidden:
+
+   LangRef
+
+:doc:`LangRef`
+  Defines the LLVM intermediate representation.
+
+`Introduction to the LLVM Compiler`__
+  Presentation providing a users introduction to LLVM.
+
+  .. __: http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html
+
+`Intro to LLVM`__
+  Book chapter providing a compiler hacker's introduction to LLVM.
+
+  .. __: http://www.aosabook.org/en/llvm.html
+
+
+`LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation`__
+  Design overview.
+
+  .. __: http://llvm.org/pubs/2004-01-30-CGO-LLVM.html
+
+`LLVM: An Infrastructure for Multi-Stage Optimization`__
+  More details (quite old now).
+
+  .. __: http://llvm.org/pubs/2002-12-LattnerMSThesis.html
+
+`Publications mentioning LLVM <http://llvm.org/pubs>`_
+   ..
+
+User Guides
+===========
+
+For those new to the LLVM system.
+
+NOTE: If you are a user who is only interested in using LLVM-based
+compilers, you should look into `Clang <http://clang.llvm.org>`_ or
+`DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is
+intended for users who have a need to work with the intermediate LLVM
+representation.
+
+.. toctree::
+   :hidden:
+
+   CMake
+   HowToBuildOnARM
+   CommandGuide/index
+   GettingStarted
+   GettingStartedVS
+   FAQ
+   Lexicon
+   HowToAddABuilder
+   yaml2obj
+   HowToSubmitABug
+   SphinxQuickstartTemplate
+   Phabricator
+   TestingGuide
+   tutorial/index
+   ReleaseNotes
+   Passes
+   YamlIO
+   GetElementPtr
+
+:doc:`GettingStarted`
+   Discusses how to get up and running quickly with the LLVM infrastructure.
+   Everything from unpacking and compilation of the distribution to execution
+   of some tools.
+
+:doc:`CMake`
+   An addendum to the main Getting Started guide for those using the `CMake
+   build system <http://www.cmake.org>`_.
+
+:doc:`HowToBuildOnARM`
+   Notes on building and testing LLVM/Clang on ARM.
 
- Several introductory papers and presentations are available at
- :ref:`design_and_overview`.
+:doc:`GettingStartedVS`
+   An addendum to the main Getting Started guide for those using Visual Studio
+   on Windows.
 
-* **Publications**
+:doc:`tutorial/index`
+   Tutorials about using LLVM. Includes a tutorial about making a custom
+   language with LLVM.
 
- The list of `publications <http://llvm.org/pubs>`_ based on LLVM.
+:doc:`LLVM Command Guide <CommandGuide/index>`
+   A reference manual for the LLVM command line utilities ("man" pages for LLVM
+   tools).
 
-* **User Guides**
+:doc:`Passes`
+   A list of optimizations and analyses implemented in LLVM.
 
- Those new to the LLVM system should first visit the :ref:`userguides`.
+:doc:`FAQ`
+   A list of common questions and problems and their solutions.
 
- NOTE: If you are a user who is only interested in using LLVM-based
- compilers, you should look into `Clang <http://clang.llvm.org>`_ or
- `DragonEgg <http://dragonegg.llvm.org>`_ instead. The documentation here is
- intended for users who have a need to work with the intermediate LLVM
- representation.
+:doc:`Release notes for the current release <ReleaseNotes>`
+   This describes new features, known bugs, and other limitations.
 
-* **API Clients**
+:doc:`HowToSubmitABug`
+   Instructions for properly submitting information about any bugs you run into
+   in the LLVM system.
 
- Developers of applications which use LLVM as a library should visit the
- :ref:`programming`.
+:doc:`SphinxQuickstartTemplate`
+  A template + tutorial for writing new Sphinx documentation. It is meant
+  to be read in source form.
 
-* **Subsystems**
+:doc:`LLVM Testing Infrastructure Guide <TestingGuide>`
+   A reference manual for using the LLVM testing infrastructure.
 
- API clients and LLVM developers may be interested in the
- :ref:`subsystems` documentation.
+`How to build the C, C++, ObjC, and ObjC++ front end`__
+   Instructions for building the clang front-end from source.
 
-* **Development Process**
+   .. __: http://clang.llvm.org/get_started.html
 
- Additional documentation on the LLVM project can be found at
- :ref:`development_process`.
+:doc:`Lexicon`
+   Definition of acronyms, terms and concepts used in LLVM.
 
-* **Mailing Lists**
+:doc:`HowToAddABuilder`
+   Instructions for adding new builder to LLVM buildbot master.
 
- For more information, consider consulting the LLVM :ref:`mailing_lists`.
+:doc:`YamlIO`
+   A reference guide for using LLVM's YAML I/O library.
+
+:doc:`GetElementPtr`
+  Answers to some very frequent questions about LLVM's most frequently
+  misunderstood instruction.
+
+Programming Documentation
+=========================
+
+For developers of applications which use LLVM as a library.
 
 .. toctree::
-   :maxdepth: 2
-
-   design_and_overview
-   userguides
-   programming
-   subsystems
-   development_process
-   mailing_lists
-   
+   :hidden:
+
+   Atomics
+   CodingStandards
+   CommandLine
+   CompilerWriterInfo
+   ExtendingLLVM
+   HowToSetUpLLVMStyleRTTI
+   ProgrammersManual
+
+:doc:`LLVM Language Reference Manual <LangRef>`
+  Defines the LLVM intermediate representation and the assembly form of the
+  different nodes.
+
+:doc:`Atomics`
+  Information about LLVM's concurrency model.
+
+:doc:`ProgrammersManual`
+  Introduction to the general layout of the LLVM sourcebase, important classes
+  and APIs, and some tips & tricks.
+
+:doc:`CommandLine`
+  Provides information on using the command line parsing library.
+
+:doc:`CodingStandards`
+  Details the LLVM coding standards and provides useful information on writing
+  efficient C++ code.
+
+:doc:`HowToSetUpLLVMStyleRTTI`
+  How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your
+  class hierarchy.
+
+:doc:`ExtendingLLVM`
+  Look here to see how to add instructions and intrinsics to LLVM.
+
+`Doxygen generated documentation <http://llvm.org/doxygen/>`_
+  (`classes <http://llvm.org/doxygen/inherits.html>`_)
+  (`tarball <http://llvm.org/doxygen/doxygen.tar.gz>`_)
+
+`ViewVC Repository Browser <http://llvm.org/viewvc/>`_
+   ..
+
+:doc:`CompilerWriterInfo`
+  A list of helpful links for compiler writers.
+
+Subsystem Documentation
+=======================
+
+For API clients and LLVM developers.
+
+.. toctree::
+   :hidden:
+
+   AliasAnalysis
+   BitCodeFormat
+   BranchWeightMetadata
+   Bugpoint
+   CodeGenerator
+   ExceptionHandling
+   LinkTimeOptimization
+   SegmentedStacks
+   TableGenFundamentals
+   DebuggingJITedCode
+   GoldPlugin
+   MarkedUpDisassembly
+   SystemLibrary
+   SourceLevelDebugging
+   Vectorizers
+   WritingAnLLVMBackend
+   GarbageCollection
+   WritingAnLLVMPass
+   TableGen/LangRef
+   HowToUseAttributes
+   NVPTXUsage
+
+:doc:`WritingAnLLVMPass`
+   Information on how to write LLVM transformations and analyses.
+
+:doc:`WritingAnLLVMBackend`
+   Information on how to write LLVM backends for machine targets.
+
+:doc:`CodeGenerator`
+   The design and implementation of the LLVM code generator.  Useful if you are
+   working on retargetting LLVM to a new architecture, designing a new codegen
+   pass, or enhancing existing components.
+
+:doc:`TableGenFundamentals`
+   Describes the TableGen tool, which is used heavily by the LLVM code
+   generator.
+
+:doc:`AliasAnalysis`
+   Information on how to write a new alias analysis implementation or how to
+   use existing analyses.
+
+:doc:`GarbageCollection`
+   The interfaces source-language compilers should use for compiling GC'd
+   programs.
+
+:doc:`Source Level Debugging with LLVM <SourceLevelDebugging>`
+   This document describes the design and philosophy behind the LLVM
+   source-level debugger.
+
+:doc:`Vectorizers`
+   This document describes the current status of vectorization in LLVM.
+
+:doc:`ExceptionHandling`
+   This document describes the design and implementation of exception handling
+   in LLVM.
+
+:doc:`Bugpoint`
+   Automatic bug finder and test-case reducer description and usage
+   information.
+
+:doc:`BitCodeFormat`
+   This describes the file format and encoding used for LLVM "bc" files.
+
+:doc:`System Library <SystemLibrary>`
+   This document describes the LLVM System Library (``lib/System``) and
+   how to keep LLVM source code portable
+
+:doc:`LinkTimeOptimization`
+   This document describes the interface between LLVM intermodular optimizer
+   and the linker and its design
+
+:doc:`GoldPlugin`
+   How to build your programs with link-time optimization on Linux.
+
+:doc:`DebuggingJITedCode`
+   How to debug JITed code with GDB.
+
+:doc:`BranchWeightMetadata`
+   Provides information about Branch Prediction Information.
+
+:doc:`SegmentedStacks`
+   This document describes segmented stacks and how they are used in LLVM.
+
+:doc:`MarkedUpDisassembly`
+   This document describes the optional rich disassembly output syntax.
+
+:doc:`HowToUseAttributes`
+  Answers some questions about the new Attributes infrastructure.
+
+:doc:`NVPTXUsage`
+   This document describes using the NVPTX back-end to compile GPU kernels.
+
+
+Development Process Documentation
+=================================
+
+Information about LLVM's development process.
+
+.. toctree::
+   :hidden:
+
+   DeveloperPolicy
+   MakefileGuide
+   Projects
+   LLVMBuild
+   HowToReleaseLLVM
+   Packaging
+
+:doc:`DeveloperPolicy`
+   The LLVM project's policy towards developers and their contributions.
+
+:doc:`Projects`
+  How-to guide and templates for new projects that *use* the LLVM
+  infrastructure.  The templates (directory organization, Makefiles, and test
+  tree) allow the project code to be located outside (or inside) the ``llvm/``
+  tree, while using LLVM header files and libraries.
+
+:doc:`LLVMBuild`
+  Describes the LLVMBuild organization and files used by LLVM to specify
+  component descriptions.
+
+:doc:`MakefileGuide`
+  Describes how the LLVM makefiles work and how to use them.
+
+:doc:`HowToReleaseLLVM`
+  This is a guide to preparing LLVM releases. Most developers can ignore it.
+
+:doc:`Packaging`
+   Advice on packaging LLVM into a distribution.
+
+Community
+=========
+
+LLVM has a thriving community of friendly and helpful developers.
+The two primary communication mechanisms in the LLVM community are mailing
+lists and IRC.
+
+Mailing Lists
+-------------
+
+If you can't find what you need in these docs, try consulting the mailing
+lists.
+
+`Developer's List (llvmdev)`__
+  This list is for people who want to be included in technical discussions of
+  LLVM. People post to this list when they have questions about writing code
+  for or using the LLVM tools. It is relatively low volume.
+
+  .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
+
+`Commits Archive (llvm-commits)`__
+  This list contains all commit messages that are made when LLVM developers
+  commit code changes to the repository. It also serves as a forum for
+  patch review (i.e. send patches here). It is useful for those who want to
+  stay on the bleeding edge of LLVM development. This list is very high
+  volume.
+
+  .. __: http://lists.cs.uiuc.edu/pipermail/llvm-commits/
+
+`Bugs & Patches Archive (llvmbugs)`__
+  This list gets emailed every time a bug is opened and closed. It is
+  higher volume than the LLVMdev list.
+
+  .. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/
+
+`Test Results Archive (llvm-testresults)`__
+  A message is automatically sent to this list by every active nightly tester
+  when it completes.  As such, this list gets email several times each day,
+  making it a high volume list.
+
+  .. __: http://lists.cs.uiuc.edu/pipermail/llvm-testresults/
+
+`LLVM Announcements List (llvm-announce)`__
+  This is a low volume list that provides important announcements regarding
+  LLVM.  It gets email about once a month.
+
+  .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce
+
+IRC
+---
+
+Users and developers of the LLVM project (including subprojects such as Clang)
+can be found in #llvm on `irc.oftc.net <irc://irc.oftc.net/llvm>`_.
+
+This channel has several bots.
+
+* Buildbot reporters
+
+  * llvmbb - Bot for the main LLVM buildbot master.
+    http://lab.llvm.org:8011/console
+  * bb-chapuni - An individually run buildbot master. http://bb.pgr.jp/console
+  * smooshlab - Apple's internal buildbot master.
+
+* robot - Bugzilla linker. %bug <number>
+
+* clang-bot - A `geordi <http://www.eelis.net/geordi/>`_ instance running
+  near-trunk clang instead of gcc.
+
+
 Indices and tables
 ==================
 
diff --git a/docs/linpack-pc.png b/docs/linpack-pc.png
new file mode 100644
index 000000000000..bbbee7d67ef9
--- /dev/null
+++ b/docs/linpack-pc.png
diff --git a/docs/mailing_lists.rst b/docs/mailing_lists.rst
deleted file mode 100644
index 106f1da48f89..000000000000
--- a/docs/mailing_lists.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-.. _mailing_lists:
-
-Mailing Lists
-=============
-
- * `LLVM Announcements List
-   <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce>`_
-
-   This is a low volume list that provides important announcements regarding
-   LLVM.  It gets email about once a month.
-
- * `Developer's List <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_
-
-   This list is for people who want to be included in technical discussions of
-   LLVM. People post to this list when they have questions about writing code
-   for or using the LLVM tools. It is relatively low volume.
-
- * `Bugs & Patches Archive <http://lists.cs.uiuc.edu/pipermail/llvmbugs/>`_
-
-   This list gets emailed every time a bug is opened and closed, and when people
-   submit patches to be included in LLVM.  It is higher volume than the LLVMdev
-   list.
-
- * `Commits Archive <http://lists.cs.uiuc.edu/pipermail/llvm-commits/>`_
-
-   This list contains all commit messages that are made when LLVM developers
-   commit code changes to the repository. It is useful for those who want to
-   stay on the bleeding edge of LLVM development. This list is very high volume.
-
- * `Test Results Archive
-   <http://lists.cs.uiuc.edu/pipermail/llvm-testresults/>`_
-
-   A message is automatically sent to this list by every active nightly tester
-   when it completes.  As such, this list gets email several times each day,
-   making it a high volume list.
diff --git a/docs/programming.rst b/docs/programming.rst
deleted file mode 100644
index c4eec59417e8..000000000000
--- a/docs/programming.rst
+++ /dev/null
@@ -1,57 +0,0 @@
-.. _programming:
-
-Programming Documentation
-=========================
-
-.. toctree::
-   :hidden:
-
-   Atomics
-   CodingStandards
-   CommandLine
-   CompilerWriterInfo
-   ExtendingLLVM
-   HowToSetUpLLVMStyleRTTI
-
-* `LLVM Language Reference Manual <LangRef.html>`_
-
-  Defines the LLVM intermediate representation and the assembly form of the
-  different nodes.
-
-* :ref:`atomics`
-
-  Information about LLVM's concurrency model.
-
-* `The LLVM Programmers Manual <ProgrammersManual.html>`_
-
-  Introduction to the general layout of the LLVM sourcebase, important classes
-  and APIs, and some tips & tricks.
-
-* :ref:`commandline`
-
-  Provides information on using the command line parsing library.
-
-* :ref:`coding_standards`
-
-  Details the LLVM coding standards and provides useful information on writing
-  efficient C++ code.
-
-* :doc:`HowToSetUpLLVMStyleRTTI`
-
-  How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your
-  class hierarchy.
-
-* :ref:`extending_llvm`
-
-  Look here to see how to add instructions and intrinsics to LLVM.
-
-* `Doxygen generated documentation <http://llvm.org/doxygen/>`_
-
-  (`classes <http://llvm.org/doxygen/inherits.html>`_)
-  (`tarball <http://llvm.org/doxygen/doxygen.tar.gz>`_)
-
-* `ViewVC Repository Browser <http://llvm.org/viewvc/>`_
-
-* :ref:`compiler_writer_info`
-
-  A list of helpful links for compiler writers.
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
deleted file mode 100644
index 80d0eed66339..000000000000
--- a/docs/subsystems.rst
+++ /dev/null
@@ -1,106 +0,0 @@
-.. _subsystems:
-
-Subsystem Documentation
-=======================
-
-.. toctree::
-   :hidden:
-
-   AliasAnalysis
-   BitCodeFormat
-   BranchWeightMetadata
-   Bugpoint
-   CodeGenerator
-   ExceptionHandling
-   LinkTimeOptimization
-   SegmentedStacks
-   TableGenFundamentals
-   DebuggingJITedCode
-   GoldPlugin
-   MarkedUpDisassembly
-
-* `Writing an LLVM Pass <WritingAnLLVMPass.html>`_
-    
-   Information on how to write LLVM transformations and analyses.
-    
-* `Writing an LLVM Backend <WritingAnLLVMBackend.html>`_
-    
-   Information on how to write LLVM backends for machine targets.
-
-* :ref:`code_generator`
-
-   The design and implementation of the LLVM code generator.  Useful if you are
-   working on retargetting LLVM to a new architecture, designing a new codegen
-   pass, or enhancing existing components.
-    
-* :ref:`tablegen`
-
-   Describes the TableGen tool, which is used heavily by the LLVM code
-   generator.
-    
-* :ref:`alias_analysis`
-    
-   Information on how to write a new alias analysis implementation or how to
-   use existing analyses.
-    
-* `Accurate Garbage Collection with LLVM <GarbageCollection.html>`_
-    
-   The interfaces source-language compilers should use for compiling GC'd
-   programs.
-
-* `Source Level Debugging with LLVM <SourceLevelDebugging.html>`_
-    
-   This document describes the design and philosophy behind the LLVM
-   source-level debugger.
-    
-* :ref:`exception_handling`
-    
-   This document describes the design and implementation of exception handling
-   in LLVM.
-    
-* :ref:`bugpoint`
-    
-   Automatic bug finder and test-case reducer description and usage
-   information.
-    
-* :ref:`bitcode_format`
-    
-   This describes the file format and encoding used for LLVM "bc" files.
-    
-* `System Library <SystemLibrary.html>`_
-    
-   This document describes the LLVM System Library (<tt>lib/System</tt>) and
-   how to keep LLVM source code portable
-    
-* :ref:`lto`
-    
-   This document describes the interface between LLVM intermodular optimizer
-   and the linker and its design
-    
-* :ref:`gold-plugin`
-    
-   How to build your programs with link-time optimization on Linux.
-    
-* :ref:`debugging-jited-code`
-    
-   How to debug JITed code with GDB.
-    
-* :ref:`branch_weight`
-    
-   Provides information about Branch Prediction Information.
-
-* :ref:`segmented_stacks`
-
-   This document describes segmented stacks and how they are used in LLVM.
-
-* `Howto: Implementing LLVM Integrated Assembler`_
-
-   A simple guide for how to implement an LLVM integrated assembler for an
-   architecture.
-
-.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html
-
-* :ref:`marked_up_disassembly`
-
-   This document describes the optional rich disassembly output syntax.
-
diff --git a/docs/tutorial/LangImpl1.html b/docs/tutorial/LangImpl1.html
deleted file mode 100644
index 717454f392e5..000000000000
--- a/docs/tutorial/LangImpl1.html
+++ /dev/null
@@ -1,348 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Tutorial Introduction and the Lexer</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Tutorial Introduction and the Lexer</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 1
-  <ol>
-    <li><a href="#intro">Tutorial Introduction</a></li>
-    <li><a href="#language">The Basic Language</a></li>
-    <li><a href="#lexer">The Lexer</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl2.html">Chapter 2</a>: Implementing a Parser and AST</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Tutorial Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
-runs through the implementation of a simple language, showing how fun and
-easy it can be.  This tutorial will get you up and started as well as help to
-build a framework you can extend to other languages.  The code in this tutorial
-can also be used as a playground to hack on other LLVM specific things.
-</p>
-
-<p>
-The goal of this tutorial is to progressively unveil our language, describing
-how it is built up over time.  This will let us cover a fairly broad range of
-language design and LLVM-specific usage issues, showing and explaining the code
-for it all along the way, without overwhelming you with tons of details up
-front.</p>
-
-<p>It is useful to point out ahead of time that this tutorial is really about
-teaching compiler techniques and LLVM specifically, <em>not</em> about teaching
-modern and sane software engineering principles.  In practice, this means that
-we'll take a number of shortcuts to simplify the exposition.  For example, the
-code leaks memory, uses global variables all over the place, doesn't use nice
-design patterns like <a
-href="http://en.wikipedia.org/wiki/Visitor_pattern">visitors</a>, etc... but it
-is very simple.  If you dig in and use the code as a basis for future projects,
-fixing these deficiencies shouldn't be hard.</p>
-
-<p>I've tried to put this tutorial together in a way that makes chapters easy to
-skip over if you are already familiar with or are uninterested in the various
-pieces.  The structure of the tutorial is:
-</p>
-
-<ul>
-<li><b><a href="#language">Chapter #1</a>: Introduction to the Kaleidoscope
-language, and the definition of its Lexer</b> - This shows where we are going
-and the basic functionality that we want it to do.  In order to make this
-tutorial maximally understandable and hackable, we choose to implement 
-everything in C++ instead of using lexer and parser generators.  LLVM obviously
-works just fine with such tools, feel free to use one if you prefer.</li>
-<li><b><a href="LangImpl2.html">Chapter #2</a>: Implementing a Parser and
-AST</b> - With the lexer in place, we can talk about parsing techniques and
-basic AST construction.  This tutorial describes recursive descent parsing and
-operator precedence parsing.  Nothing in Chapters 1 or 2 is LLVM-specific,
-the code doesn't even link in LLVM at this point. :)</li>
-<li><b><a href="LangImpl3.html">Chapter #3</a>: Code generation to LLVM IR</b> -
-With the AST ready, we can show off how easy generation of LLVM IR really 
-is.</li>
-<li><b><a href="LangImpl4.html">Chapter #4</a>: Adding JIT and Optimizer
-Support</b> - Because a lot of people are interested in using LLVM as a JIT,
-we'll dive right into it and show you the 3 lines it takes to add JIT support.
-LLVM is also useful in many other ways, but this is one simple and "sexy" way
-to shows off its power. :)</li>
-<li><b><a href="LangImpl5.html">Chapter #5</a>: Extending the Language: Control
-Flow</b> - With the language up and running, we show how to extend it with
-control flow operations (if/then/else and a 'for' loop).  This gives us a chance
-to talk about simple SSA construction and control flow.</li>
-<li><b><a href="LangImpl6.html">Chapter #6</a>: Extending the Language: 
-User-defined Operators</b> - This is a silly but fun chapter that talks about
-extending the language to let the user program define their own arbitrary
-unary and binary operators (with assignable precedence!).  This lets us build a
-significant piece of the "language" as library routines.</li>
-<li><b><a href="LangImpl7.html">Chapter #7</a>: Extending the Language: Mutable
-Variables</b> - This chapter talks about adding user-defined local variables
-along with an assignment operator.  The interesting part about this is how
-easy and trivial it is to construct SSA form in LLVM: no, LLVM does <em>not</em>
-require your front-end to construct SSA form!</li>
-<li><b><a href="LangImpl8.html">Chapter #8</a>: Conclusion and other useful LLVM
-tidbits</b> - This chapter wraps up the series by talking about potential
-ways to extend the language, but also includes a bunch of pointers to info about
-"special topics" like adding garbage collection support, exceptions, debugging,
-support for "spaghetti stacks", and a bunch of other tips and tricks.</li>
-
-</ul>
-
-<p>By the end of the tutorial, we'll have written a bit less than 700 lines of 
-non-comment, non-blank, lines of code.  With this small amount of code, we'll
-have built up a very reasonable compiler for a non-trivial language including
-a hand-written lexer, parser, AST, as well as code generation support with a JIT
-compiler.  While other systems may have interesting "hello world" tutorials,
-I think the breadth of this tutorial is a great testament to the strengths of
-LLVM and why you should consider it if you're interested in language or compiler
-design.</p>
-
-<p>A note about this tutorial: we expect you to extend the language and play
-with it on your own.  Take the code and go crazy hacking away at it, compilers
-don't need to be scary creatures - it can be a lot of fun to play with
-languages!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="language">The Basic Language</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This tutorial will be illustrated with a toy language that we'll call
-"<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived 
-from "meaning beautiful, form, and view").
-Kaleidoscope is a procedural language that allows you to define functions, use
-conditionals, math, etc.  Over the course of the tutorial, we'll extend
-Kaleidoscope to support the if/then/else construct, a for loop, user defined
-operators, JIT compilation with a simple command line interface, etc.</p>
-
-<p>Because we want to keep things simple, the only datatype in Kaleidoscope is a
-64-bit floating point type (aka 'double' in C parlance).  As such, all values
-are implicitly double precision and the language doesn't require type
-declarations.  This gives the language a very nice and simple syntax.  For
-example, the following simple example computes <a 
-href="http://en.wikipedia.org/wiki/Fibonacci_number">Fibonacci numbers:</a></p>
-
-<div class="doc_code">
-<pre>
-# Compute the x'th fibonacci number.
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2)
-
-# This expression will compute the 40th number.
-fib(40)
-</pre>
-</div>
-
-<p>We also allow Kaleidoscope to call into standard library functions (the LLVM
-JIT makes this completely trivial).  This means that you can use the 'extern'
-keyword to define a function before you use it (this is also useful for mutually
-recursive functions).  For example:</p>
-
-<div class="doc_code">
-<pre>
-extern sin(arg);
-extern cos(arg);
-extern atan2(arg1 arg2);
-
-atan2(sin(.4), cos(42))
-</pre>
-</div>
-
-<p>A more interesting example is included in Chapter 6 where we write a little
-Kaleidoscope application that <a href="LangImpl6.html#example">displays 
-a Mandelbrot Set</a> at various levels of magnification.</p>
-
-<p>Lets dive into the implementation of this language!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="lexer">The Lexer</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>When it comes to implementing a language, the first thing needed is
-the ability to process a text file and recognize what it says.  The traditional
-way to do this is to use a "<a 
-href="http://en.wikipedia.org/wiki/Lexical_analysis">lexer</a>" (aka 'scanner')
-to break the input up into "tokens".  Each token returned by the lexer includes
-a token code and potentially some metadata (e.g. the numeric value of a number).
-First, we define the possibilities:
-</p>
-
-<div class="doc_code">
-<pre>
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-</pre>
-</div>
-
-<p>Each token returned by our lexer will either be one of the Token enum values
-or it will be an 'unknown' character like '+', which is returned as its ASCII
-value.  If the current token is an identifier, the <tt>IdentifierStr</tt>
-global variable holds the name of the identifier.  If the current token is a
-numeric literal (like 1.0), <tt>NumVal</tt> holds its value.  Note that we use
-global variables for simplicity, this is not the best choice for a real language
-implementation :).
-</p>
-
-<p>The actual implementation of the lexer is a single function named
-<tt>gettok</tt>. The <tt>gettok</tt> function is called to return the next token
-from standard input.  Its definition starts as:</p>
-
-<div class="doc_code">
-<pre>
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-</pre>
-</div>
-
-<p>
-<tt>gettok</tt> works by calling the C <tt>getchar()</tt> function to read
-characters one at a time from standard input.  It eats them as it recognizes
-them and stores the last character read, but not processed, in LastChar.  The
-first thing that it has to do is ignore whitespace between tokens.  This is 
-accomplished with the loop above.</p>
-
-<p>The next thing <tt>gettok</tt> needs to do is recognize identifiers and
-specific keywords like "def".  Kaleidoscope does this with this simple loop:</p>
-
-<div class="doc_code">
-<pre>
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-</pre>
-</div>
-
-<p>Note that this code sets the '<tt>IdentifierStr</tt>' global whenever it
-lexes an identifier.  Also, since language keywords are matched by the same
-loop, we handle them here inline.  Numeric values are similar:</p>
-
-<div class="doc_code">
-<pre>
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-</pre>
-</div>
-
-<p>This is all pretty straight-forward code for processing input.  When reading
-a numeric value from input, we use the C <tt>strtod</tt> function to convert it
-to a numeric value that we store in <tt>NumVal</tt>.  Note that this isn't doing
-sufficient error checking: it will incorrectly read "1.23.45.67" and handle it as
-if you typed in "1.23".  Feel free to extend it :).  Next we handle comments:
-</p>
-
-<div class="doc_code">
-<pre>
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-</pre>
-</div>
-
-<p>We handle comments by skipping to the end of the line and then return the
-next token.  Finally, if the input doesn't match one of the above cases, it is
-either an operator character like '+' or the end of the file.  These are handled
-with this code:</p>
-
-<div class="doc_code">
-<pre>
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-  
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-</pre>
-</div>
-
-<p>With this, we have the complete lexer for the basic Kaleidoscope language
-(the <a href="LangImpl2.html#code">full code listing</a> for the Lexer is
-available in the <a href="LangImpl2.html">next chapter</a> of the tutorial).
-Next we'll <a href="LangImpl2.html">build a simple parser that uses this to 
-build an Abstract Syntax Tree</a>.  When we have that, we'll include a driver
-so that you can use the lexer and parser together.
-</p>
-
-<a href="LangImpl2.html">Next: Implementing a Parser and AST</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl1.rst b/docs/tutorial/LangImpl1.rst
new file mode 100644
index 000000000000..aa619cf19f26
--- /dev/null
+++ b/docs/tutorial/LangImpl1.rst
@@ -0,0 +1,278 @@
+=================================================
+Kaleidoscope: Tutorial Introduction and the Lexer
+=================================================
+
+.. contents::
+   :local:
+
+Tutorial Introduction
+=====================
+
+Welcome to the "Implementing a language with LLVM" tutorial. This
+tutorial runs through the implementation of a simple language, showing
+how fun and easy it can be. This tutorial will get you up and started as
+well as help to build a framework you can extend to other languages. The
+code in this tutorial can also be used as a playground to hack on other
+LLVM specific things.
+
+The goal of this tutorial is to progressively unveil our language,
+describing how it is built up over time. This will let us cover a fairly
+broad range of language design and LLVM-specific usage issues, showing
+and explaining the code for it all along the way, without overwhelming
+you with tons of details up front.
+
+It is useful to point out ahead of time that this tutorial is really
+about teaching compiler techniques and LLVM specifically, *not* about
+teaching modern and sane software engineering principles. In practice,
+this means that we'll take a number of shortcuts to simplify the
+exposition. For example, the code leaks memory, uses global variables
+all over the place, doesn't use nice design patterns like
+`visitors <http://en.wikipedia.org/wiki/Visitor_pattern>`_, etc... but
+it is very simple. If you dig in and use the code as a basis for future
+projects, fixing these deficiencies shouldn't be hard.
+
+I've tried to put this tutorial together in a way that makes chapters
+easy to skip over if you are already familiar with or are uninterested
+in the various pieces. The structure of the tutorial is:
+
+-  `Chapter #1 <#language>`_: Introduction to the Kaleidoscope
+   language, and the definition of its Lexer - This shows where we are
+   going and the basic functionality that we want it to do. In order to
+   make this tutorial maximally understandable and hackable, we choose
+   to implement everything in C++ instead of using lexer and parser
+   generators. LLVM obviously works just fine with such tools, feel free
+   to use one if you prefer.
+-  `Chapter #2 <LangImpl2.html>`_: Implementing a Parser and AST -
+   With the lexer in place, we can talk about parsing techniques and
+   basic AST construction. This tutorial describes recursive descent
+   parsing and operator precedence parsing. Nothing in Chapters 1 or 2
+   is LLVM-specific, the code doesn't even link in LLVM at this point.
+   :)
+-  `Chapter #3 <LangImpl3.html>`_: Code generation to LLVM IR - With
+   the AST ready, we can show off how easy generation of LLVM IR really
+   is.
+-  `Chapter #4 <LangImpl4.html>`_: Adding JIT and Optimizer Support
+   - Because a lot of people are interested in using LLVM as a JIT,
+   we'll dive right into it and show you the 3 lines it takes to add JIT
+   support. LLVM is also useful in many other ways, but this is one
+   simple and "sexy" way to shows off its power. :)
+-  `Chapter #5 <LangImpl5.html>`_: Extending the Language: Control
+   Flow - With the language up and running, we show how to extend it
+   with control flow operations (if/then/else and a 'for' loop). This
+   gives us a chance to talk about simple SSA construction and control
+   flow.
+-  `Chapter #6 <LangImpl6.html>`_: Extending the Language:
+   User-defined Operators - This is a silly but fun chapter that talks
+   about extending the language to let the user program define their own
+   arbitrary unary and binary operators (with assignable precedence!).
+   This lets us build a significant piece of the "language" as library
+   routines.
+-  `Chapter #7 <LangImpl7.html>`_: Extending the Language: Mutable
+   Variables - This chapter talks about adding user-defined local
+   variables along with an assignment operator. The interesting part
+   about this is how easy and trivial it is to construct SSA form in
+   LLVM: no, LLVM does *not* require your front-end to construct SSA
+   form!
+-  `Chapter #8 <LangImpl8.html>`_: Conclusion and other useful LLVM
+   tidbits - This chapter wraps up the series by talking about
+   potential ways to extend the language, but also includes a bunch of
+   pointers to info about "special topics" like adding garbage
+   collection support, exceptions, debugging, support for "spaghetti
+   stacks", and a bunch of other tips and tricks.
+
+By the end of the tutorial, we'll have written a bit less than 700 lines
+of non-comment, non-blank, lines of code. With this small amount of
+code, we'll have built up a very reasonable compiler for a non-trivial
+language including a hand-written lexer, parser, AST, as well as code
+generation support with a JIT compiler. While other systems may have
+interesting "hello world" tutorials, I think the breadth of this
+tutorial is a great testament to the strengths of LLVM and why you
+should consider it if you're interested in language or compiler design.
+
+A note about this tutorial: we expect you to extend the language and
+play with it on your own. Take the code and go crazy hacking away at it,
+compilers don't need to be scary creatures - it can be a lot of fun to
+play with languages!
+
+The Basic Language
+==================
+
+This tutorial will be illustrated with a toy language that we'll call
+"`Kaleidoscope <http://en.wikipedia.org/wiki/Kaleidoscope>`_" (derived
+from "meaning beautiful, form, and view"). Kaleidoscope is a procedural
+language that allows you to define functions, use conditionals, math,
+etc. Over the course of the tutorial, we'll extend Kaleidoscope to
+support the if/then/else construct, a for loop, user defined operators,
+JIT compilation with a simple command line interface, etc.
+
+Because we want to keep things simple, the only datatype in Kaleidoscope
+is a 64-bit floating point type (aka 'double' in C parlance). As such,
+all values are implicitly double precision and the language doesn't
+require type declarations. This gives the language a very nice and
+simple syntax. For example, the following simple example computes
+`Fibonacci numbers: <http://en.wikipedia.org/wiki/Fibonacci_number>`_
+
+::
+
+    # Compute the x'th fibonacci number.
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2)
+
+    # This expression will compute the 40th number.
+    fib(40)
+
+We also allow Kaleidoscope to call into standard library functions (the
+LLVM JIT makes this completely trivial). This means that you can use the
+'extern' keyword to define a function before you use it (this is also
+useful for mutually recursive functions). For example:
+
+::
+
+    extern sin(arg);
+    extern cos(arg);
+    extern atan2(arg1 arg2);
+
+    atan2(sin(.4), cos(42))
+
+A more interesting example is included in Chapter 6 where we write a
+little Kaleidoscope application that `displays a Mandelbrot
+Set <LangImpl6.html#example>`_ at various levels of magnification.
+
+Lets dive into the implementation of this language!
+
+The Lexer
+=========
+
+When it comes to implementing a language, the first thing needed is the
+ability to process a text file and recognize what it says. The
+traditional way to do this is to use a
+"`lexer <http://en.wikipedia.org/wiki/Lexical_analysis>`_" (aka
+'scanner') to break the input up into "tokens". Each token returned by
+the lexer includes a token code and potentially some metadata (e.g. the
+numeric value of a number). First, we define the possibilities:
+
+.. code-block:: c++
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+Each token returned by our lexer will either be one of the Token enum
+values or it will be an 'unknown' character like '+', which is returned
+as its ASCII value. If the current token is an identifier, the
+``IdentifierStr`` global variable holds the name of the identifier. If
+the current token is a numeric literal (like 1.0), ``NumVal`` holds its
+value. Note that we use global variables for simplicity, this is not the
+best choice for a real language implementation :).
+
+The actual implementation of the lexer is a single function named
+``gettok``. The ``gettok`` function is called to return the next token
+from standard input. Its definition starts as:
+
+.. code-block:: c++
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+``gettok`` works by calling the C ``getchar()`` function to read
+characters one at a time from standard input. It eats them as it
+recognizes them and stores the last character read, but not processed,
+in LastChar. The first thing that it has to do is ignore whitespace
+between tokens. This is accomplished with the loop above.
+
+The next thing ``gettok`` needs to do is recognize identifiers and
+specific keywords like "def". Kaleidoscope does this with this simple
+loop:
+
+.. code-block:: c++
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+Note that this code sets the '``IdentifierStr``' global whenever it
+lexes an identifier. Also, since language keywords are matched by the
+same loop, we handle them here inline. Numeric values are similar:
+
+.. code-block:: c++
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+This is all pretty straight-forward code for processing input. When
+reading a numeric value from input, we use the C ``strtod`` function to
+convert it to a numeric value that we store in ``NumVal``. Note that
+this isn't doing sufficient error checking: it will incorrectly read
+"1.23.45.67" and handle it as if you typed in "1.23". Feel free to
+extend it :). Next we handle comments:
+
+.. code-block:: c++
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+We handle comments by skipping to the end of the line and then return
+the next token. Finally, if the input doesn't match one of the above
+cases, it is either an operator character like '+' or the end of the
+file. These are handled with this code:
+
+.. code-block:: c++
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+With this, we have the complete lexer for the basic Kaleidoscope
+language (the `full code listing <LangImpl2.html#code>`_ for the Lexer
+is available in the `next chapter <LangImpl2.html>`_ of the tutorial).
+Next we'll `build a simple parser that uses this to build an Abstract
+Syntax Tree <LangImpl2.html>`_. When we have that, we'll include a
+driver so that you can use the lexer and parser together.
+
+`Next: Implementing a Parser and AST <LangImpl2.html>`_
+
diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html
deleted file mode 100644
index 694f7342d38b..000000000000
--- a/docs/tutorial/LangImpl2.html
+++ /dev/null
@@ -1,1231 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing a Parser and AST</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Implementing a Parser and AST</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 2
-  <ol>
-    <li><a href="#intro">Chapter 2 Introduction</a></li>
-    <li><a href="#ast">The Abstract Syntax Tree (AST)</a></li>
-    <li><a href="#parserbasics">Parser Basics</a></li>
-    <li><a href="#parserprimexprs">Basic Expression Parsing</a></li>
-    <li><a href="#parserbinops">Binary Expression Parsing</a></li>
-    <li><a href="#parsertop">Parsing the Rest</a></li>
-    <li><a href="#driver">The Driver</a></li>
-    <li><a href="#conclusions">Conclusions</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl3.html">Chapter 3</a>: Code generation to LLVM IR</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 2 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  This chapter shows you how to use the lexer, built in 
-<a href="LangImpl1.html">Chapter 1</a>, to build a full <a
-href="http://en.wikipedia.org/wiki/Parsing">parser</a> for
-our Kaleidoscope language.  Once we have a parser, we'll define and build an <a 
-href="http://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax 
-Tree</a> (AST).</p>
-
-<p>The parser we will build uses a combination of <a 
-href="http://en.wikipedia.org/wiki/Recursive_descent_parser">Recursive Descent
-Parsing</a> and <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence 
-Parsing</a> to parse the Kaleidoscope language (the latter for 
-binary expressions and the former for everything else).  Before we get to
-parsing though, lets talk about the output of the parser: the Abstract Syntax
-Tree.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ast">The Abstract Syntax Tree (AST)</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The AST for a program captures its behavior in such a way that it is easy for
-later stages of the compiler (e.g. code generation) to interpret.  We basically
-want one object for each construct in the language, and the AST should closely
-model the language.  In Kaleidoscope, we have expressions, a prototype, and a
-function object.  We'll start with expressions first:</p>
-
-<div class="doc_code">
-<pre>
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-};
-</pre>
-</div>
-
-<p>The code above shows the definition of the base ExprAST class and one
-subclass which we use for numeric literals.  The important thing to note about
-this code is that the NumberExprAST class captures the numeric value of the
-literal as an instance variable. This allows later phases of the compiler to
-know what the stored numeric value is.</p>
-
-<p>Right now we only create the AST,  so there are no useful accessor methods on
-them.  It would be very easy to add a virtual method to pretty print the code,
-for example.  Here are the other expression AST node definitions that we'll use
-in the basic form of the Kaleidoscope language:
-</p>
-
-<div class="doc_code">
-<pre>
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-};
-</pre>
-</div>
-
-<p>This is all (intentionally) rather straight-forward: variables capture the
-variable name, binary operators capture their opcode (e.g. '+'), and calls
-capture a function name as well as a list of any argument expressions.  One thing 
-that is nice about our AST is that it captures the language features without 
-talking about the syntax of the language.  Note that there is no discussion about 
-precedence of binary operators, lexical structure, etc.</p>
-
-<p>For our basic language, these are all of the expression nodes we'll define.
-Because it doesn't have conditional control flow, it isn't Turing-complete;
-we'll fix that in a later installment.  The two things we need next are a way
-to talk about the interface to a function, and a way to talk about functions
-themselves:</p>
-
-<div class="doc_code">
-<pre>
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-};
-</pre>
-</div>
-
-<p>In Kaleidoscope, functions are typed with just a count of their arguments.
-Since all values are double precision floating point, the type of each argument
-doesn't need to be stored anywhere.  In a more aggressive and realistic
-language, the "ExprAST" class would probably have a type field.</p>
-
-<p>With this scaffolding, we can now talk about parsing expressions and function
-bodies in Kaleidoscope.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbasics">Parser Basics</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we have an AST to build, we need to define the parser code to build
-it.  The idea here is that we want to parse something like "x+y" (which is
-returned as three tokens by the lexer) into an AST that could be generated with
-calls like this:</p>
-
-<div class="doc_code">
-<pre>
-  ExprAST *X = new VariableExprAST("x");
-  ExprAST *Y = new VariableExprAST("y");
-  ExprAST *Result = new BinaryExprAST('+', X, Y);
-</pre>
-</div>
-
-<p>In order to do this, we'll start by defining some basic helper routines:</p>
-
-<div class="doc_code">
-<pre>
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-</pre>
-</div>
-
-<p>
-This implements a simple token buffer around the lexer.  This allows 
-us to look one token ahead at what the lexer is returning.  Every function in
-our parser will assume that CurTok is the current token that needs to be
-parsed.</p>
-
-<div class="doc_code">
-<pre>
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-</pre>
-</div>
-
-<p>
-The <tt>Error</tt> routines are simple helper routines that our parser will use
-to handle errors.  The error recovery in our parser will not be the best and
-is not particular user-friendly, but it will be enough for our tutorial.  These
-routines make it easier to handle errors in routines that have various return
-types: they always return null.</p>
-
-<p>With these basic helper functions, we can implement the first
-piece of our grammar: numeric literals.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserprimexprs">Basic Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We start with numeric literals, because they are the simplest to process.
-For each production in our grammar, we'll define a function which parses that
-production.  For numeric literals, we have:
-</p>
-
-<div class="doc_code">
-<pre>
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-</pre>
-</div>
-
-<p>This routine is very simple: it expects to be called when the current token
-is a <tt>tok_number</tt> token.  It takes the current number value, creates 
-a <tt>NumberExprAST</tt> node, advances the lexer to the next token, and finally
-returns.</p>
-
-<p>There are some interesting aspects to this.  The most important one is that
-this routine eats all of the tokens that correspond to the production and
-returns the lexer buffer with the next token (which is not part of the grammar
-production) ready to go.  This is a fairly standard way to go for recursive
-descent parsers.  For a better example, the parenthesis operator is defined like
-this:</p>
-
-<div class="doc_code">
-<pre>
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-</pre>
-</div>
-
-<p>This function illustrates a number of interesting things about the 
-parser:</p>
-
-<p>
-1) It shows how we use the Error routines.  When called, this function expects
-that the current token is a '(' token, but after parsing the subexpression, it
-is possible that there is no ')' waiting.  For example, if the user types in
-"(4 x" instead of "(4)", the parser should emit an error.  Because errors can
-occur, the parser needs a way to indicate that they happened: in our parser, we
-return null on an error.</p>
-
-<p>2) Another interesting aspect of this function is that it uses recursion by
-calling <tt>ParseExpression</tt> (we will soon see that <tt>ParseExpression</tt> can call
-<tt>ParseParenExpr</tt>).  This is powerful because it allows us to handle 
-recursive grammars, and keeps each production very simple.  Note that
-parentheses do not cause construction of AST nodes themselves.  While we could
-do it this way, the most important role of parentheses are to guide the parser
-and provide grouping.  Once the parser constructs the AST, parentheses are not
-needed.</p>
-
-<p>The next simple production is for handling variable references and function
-calls:</p>
-
-<div class="doc_code">
-<pre>
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-</pre>
-</div>
-
-<p>This routine follows the same style as the other routines.  (It expects to be
-called if the current token is a <tt>tok_identifier</tt> token).  It also has
-recursion and error handling.  One interesting aspect of this is that it uses
-<em>look-ahead</em> to determine if the current identifier is a stand alone
-variable reference or if it is a function call expression.  It handles this by
-checking to see if the token after the identifier is a '(' token, constructing
-either a <tt>VariableExprAST</tt> or <tt>CallExprAST</tt> node as appropriate.
-</p>
-
-<p>Now that we have all of our simple expression-parsing logic in place, we can
-define a helper function to wrap it together into one entry point.  We call this
-class of expressions "primary" expressions, for reasons that will become more
-clear <a href="LangImpl6.html#unary">later in the tutorial</a>.  In order to
-parse an arbitrary primary expression, we need to determine what sort of
-expression it is:</p>
-
-<div class="doc_code">
-<pre>
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-</pre>
-</div>
-
-<p>Now that you see the definition of this function, it is more obvious why we
-can assume the state of CurTok in the various functions.  This uses look-ahead
-to determine which sort of expression is being inspected, and then parses it
-with a function call.</p>
-
-<p>Now that basic expressions are handled, we need to handle binary expressions.
-They are a bit more complex.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbinops">Binary Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Binary expressions are significantly harder to parse because they are often
-ambiguous.  For example, when given the string "x+y*z", the parser can choose
-to parse it as either "(x+y)*z" or "x+(y*z)".  With common definitions from
-mathematics, we expect the later parse, because "*" (multiplication) has
-higher <em>precedence</em> than "+" (addition).</p>
-
-<p>There are many ways to handle this, but an elegant and efficient way is to
-use <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence 
-Parsing</a>.  This parsing technique uses the precedence of binary operators to
-guide recursion.  To start with, we need a table of precedences:</p>
-
-<div class="doc_code">
-<pre>
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-    
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-int main() {
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-  ...
-}
-</pre>
-</div>
-
-<p>For the basic form of Kaleidoscope, we will only support 4 binary operators
-(this can obviously be extended by you, our brave and intrepid reader).  The
-<tt>GetTokPrecedence</tt> function returns the precedence for the current token,
-or -1 if the token is not a binary operator.  Having a map makes it easy to add
-new operators and makes it clear that the algorithm doesn't depend on the
-specific operators involved, but it would be easy enough to eliminate the map
-and do the comparisons in the <tt>GetTokPrecedence</tt> function.  (Or just use
-a fixed-size array).</p>
-
-<p>With the helper above defined, we can now start parsing binary expressions.
-The basic idea of operator precedence parsing is to break down an expression
-with potentially ambiguous binary operators into pieces.  Consider ,for example,
-the expression "a+b+(c+d)*e*f+g".  Operator precedence parsing considers this
-as a stream of primary expressions separated by binary operators.  As such,
-it will first parse the leading primary expression "a", then it will see the
-pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g].  Note that because parentheses
-are primary expressions, the binary expression parser doesn't need to worry
-about nested subexpressions like (c+d) at all. 
-</p>
-
-<p>
-To start, an expression is a primary expression potentially followed by a
-sequence of [binop,primaryexpr] pairs:</p>
-
-<div class="doc_code">
-<pre>
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-</pre>
-</div>
-
-<p><tt>ParseBinOpRHS</tt> is the function that parses the sequence of pairs for
-us.  It takes a precedence and a pointer to an expression for the part that has been
-parsed so far.   Note that "x" is a perfectly valid expression: As such, "binoprhs" is
-allowed to be empty, in which case it returns the expression that is passed into
-it. In our example above, the code passes the expression for "a" into
-<tt>ParseBinOpRHS</tt> and the current token is "+".</p>
-
-<p>The precedence value passed into <tt>ParseBinOpRHS</tt> indicates the <em>
-minimal operator precedence</em> that the function is allowed to eat.  For
-example, if the current pair stream is [+, x] and <tt>ParseBinOpRHS</tt> is
-passed in a precedence of 40, it will not consume any tokens (because the
-precedence of '+' is only 20).  With this in mind, <tt>ParseBinOpRHS</tt> starts
-with:</p>
-
-<div class="doc_code">
-<pre>
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-</pre>
-</div>
-
-<p>This code gets the precedence of the current token and checks to see if if is
-too low.  Because we defined invalid tokens to have a precedence of -1, this 
-check implicitly knows that the pair-stream ends when the token stream runs out
-of binary operators.  If this check succeeds, we know that the token is a binary
-operator and that it will be included in this expression:</p>
-
-<div class="doc_code">
-<pre>
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-</pre>
-</div>
-
-<p>As such, this code eats (and remembers) the binary operator and then parses
-the primary expression that follows.  This builds up the whole pair, the first of
-which is [+, b] for the running example.</p>
-
-<p>Now that we parsed the left-hand side of an expression and one pair of the 
-RHS sequence, we have to decide which way the expression associates.  In
-particular, we could have "(a+b) binop unparsed"  or "a + (b binop unparsed)".
-To determine this, we look ahead at "binop" to determine its precedence and 
-compare it to BinOp's precedence (which is '+' in this case):</p>
-
-<div class="doc_code">
-<pre>
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-</pre>
-</div>
-
-<p>If the precedence of the binop to the right of "RHS" is lower or equal to the
-precedence of our current operator, then we know that the parentheses associate
-as "(a+b) binop ...".  In our example, the current operator is "+" and the next 
-operator is "+", we know that they have the same precedence.  In this case we'll
-create the AST node for "a+b", and then continue parsing:</p>
-
-<div class="doc_code">
-<pre>
-      ... if body omitted ...
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }  // loop around to the top of the while loop.
-}
-</pre>
-</div>
-
-<p>In our example above, this will turn "a+b+" into "(a+b)" and execute the next
-iteration of the loop, with "+" as the current token.  The code above will eat, 
-remember, and parse "(c+d)" as the primary expression, which makes the
-current pair equal to [+, (c+d)].  It will then evaluate the 'if' conditional above with 
-"*" as the binop to the right of the primary.  In this case, the precedence of "*" is
-higher than the precedence of "+" so the if condition will be entered.</p>
-
-<p>The critical question left here is "how can the if condition parse the right
-hand side in full"?  In particular, to build the AST correctly for our example,
-it needs to get all of "(c+d)*e*f" as the RHS expression variable.  The code to
-do this is surprisingly simple (code from the above two blocks duplicated for
-context):</p>
-
-<div class="doc_code">
-<pre>
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      <b>RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;</b>
-    }
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }  // loop around to the top of the while loop.
-}
-</pre>
-</div>
-
-<p>At this point, we know that the binary operator to the RHS of our primary
-has higher precedence than the binop we are currently parsing.  As such, we know
-that any sequence of pairs whose operators are all higher precedence than "+"
-should be parsed together and returned as "RHS".  To do this, we recursively
-invoke the <tt>ParseBinOpRHS</tt> function specifying "TokPrec+1" as the minimum
-precedence required for it to continue.  In our example above, this will cause
-it to return the AST node for "(c+d)*e*f" as RHS, which is then set as the RHS
-of the '+' expression.</p>
-
-<p>Finally, on the next iteration of the while loop, the "+g" piece is parsed
-and added to the AST.  With this little bit of code (14 non-trivial lines), we
-correctly handle fully general binary expression parsing in a very elegant way.
-This was a whirlwind tour of this code, and it is somewhat subtle.  I recommend
-running through it with a few tough examples to see how it works.
-</p>
-
-<p>This wraps up handling of expressions.  At this point, we can point the
-parser at an arbitrary token stream and build an expression from it, stopping
-at the first token that is not part of the expression.  Next up we need to
-handle function definitions, etc.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parsertop">Parsing the Rest</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The next thing missing is handling of function prototypes.  In Kaleidoscope,
-these are used both for 'extern' function declarations as well as function body
-definitions.  The code to do this is straight-forward and not very interesting
-(once you've survived expressions):
-</p>
-
-<div class="doc_code">
-<pre>
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  // Read the list of argument names.
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-</pre>
-</div>
-
-<p>Given this, a function definition is very simple, just a prototype plus
-an expression to implement the body:</p>
-
-<div class="doc_code">
-<pre>
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-</pre>
-</div>
-
-<p>In addition, we support 'extern' to declare functions like 'sin' and 'cos' as
-well as to support forward declaration of user functions.  These 'extern's are just
-prototypes with no body:</p>
-
-<div class="doc_code">
-<pre>
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-</pre>
-</div>
-
-<p>Finally, we'll also let the user type in arbitrary top-level expressions and
-evaluate them on the fly.  We will handle this by defining anonymous nullary
-(zero argument) functions for them:</p>
-
-<div class="doc_code">
-<pre>
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-</pre>
-</div>
-
-<p>Now that we have all the pieces, let's build a little driver that will let us
-actually <em>execute</em> this code we've built!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">The Driver</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The driver for this simply invokes all of the parsing pieces with a top-level
-dispatch loop.  There isn't much interesting here, so I'll just include the
-top-level loop.  See <a href="#code">below</a> for full code in the "Top-Level
-Parsing" section.</p>
-
-<div class="doc_code">
-<pre>
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-</pre>
-</div>
-
-<p>The most interesting part of this is that we ignore top-level semicolons.
-Why is this, you ask?  The basic reason is that if you type "4 + 5" at the
-command line, the parser doesn't know whether that is the end of what you will type
-or not.  For example, on the next line you could type "def foo..." in which case
-4+5 is the end of a top-level expression.  Alternatively you could type "* 6",
-which would continue the expression.  Having top-level semicolons allows you to
-type "4+5;", and the parser will know you are done.</p> 
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusions">Conclusions</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With just under 400 lines of commented code (240 lines of non-comment, 
-non-blank code), we fully defined our minimal language, including a lexer,
-parser, and AST builder.  With this done, the executable will validate 
-Kaleidoscope code and tell us if it is grammatically invalid.  For
-example, here is a sample interaction:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>./a.out</b>
-ready&gt; <b>def foo(x y) x+foo(y, 4.0);</b>
-Parsed a function definition.
-ready&gt; <b>def foo(x y) x+y y;</b>
-Parsed a function definition.
-Parsed a top-level expr
-ready&gt; <b>def foo(x y) x+y );</b>
-Parsed a function definition.
-Error: unknown token when expecting an expression
-ready&gt; <b>extern sin(a);</b>
-ready&gt; Parsed an extern
-ready&gt; <b>^D</b>
-$ 
-</pre>
-</div>
-
-<p>There is a lot of room for extension here.  You can define new AST nodes,
-extend the language in many ways, etc.  In the <a href="LangImpl3.html">next
-installment</a>, we will describe how to generate LLVM Intermediate
-Representation (IR) from the AST.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for this and the previous chapter.  
-Note that it is fully self-contained: you don't need LLVM or any external
-libraries at all for this.  (Besides the C and C++ standard libraries, of
-course.)  To build this, just compile with:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g -O3 toy.cpp
-# Run
-./a.out 
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include &lt;cstdio&gt;
-#include &lt;cstdlib&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing
-//===----------------------------------------------------------------------===//
-
-static void HandleDefinition() {
-  if (ParseDefinition()) {
-    fprintf(stderr, "Parsed a function definition.\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (ParseExtern()) {
-    fprintf(stderr, "Parsed an extern\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (ParseTopLevelExpr()) {
-    fprintf(stderr, "Parsed a top-level expr\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  return 0;
-}
-</pre>
-</div>
-<a href="LangImpl3.html">Next: Implementing Code Generation to LLVM IR</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl2.rst b/docs/tutorial/LangImpl2.rst
new file mode 100644
index 000000000000..7262afa8f374
--- /dev/null
+++ b/docs/tutorial/LangImpl2.rst
@@ -0,0 +1,1096 @@
+===========================================
+Kaleidoscope: Implementing a Parser and AST
+===========================================
+
+.. contents::
+   :local:
+
+Chapter 2 Introduction
+======================
+
+Welcome to Chapter 2 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. This chapter shows you how to use the
+lexer, built in `Chapter 1 <LangImpl1.html>`_, to build a full
+`parser <http://en.wikipedia.org/wiki/Parsing>`_ for our Kaleidoscope
+language. Once we have a parser, we'll define and build an `Abstract
+Syntax Tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_ (AST).
+
+The parser we will build uses a combination of `Recursive Descent
+Parsing <http://en.wikipedia.org/wiki/Recursive_descent_parser>`_ and
+`Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_ to
+parse the Kaleidoscope language (the latter for binary expressions and
+the former for everything else). Before we get to parsing though, lets
+talk about the output of the parser: the Abstract Syntax Tree.
+
+The Abstract Syntax Tree (AST)
+==============================
+
+The AST for a program captures its behavior in such a way that it is
+easy for later stages of the compiler (e.g. code generation) to
+interpret. We basically want one object for each construct in the
+language, and the AST should closely model the language. In
+Kaleidoscope, we have expressions, a prototype, and a function object.
+We'll start with expressions first:
+
+.. code-block:: c++
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+    };
+
+The code above shows the definition of the base ExprAST class and one
+subclass which we use for numeric literals. The important thing to note
+about this code is that the NumberExprAST class captures the numeric
+value of the literal as an instance variable. This allows later phases
+of the compiler to know what the stored numeric value is.
+
+Right now we only create the AST, so there are no useful accessor
+methods on them. It would be very easy to add a virtual method to pretty
+print the code, for example. Here are the other expression AST node
+definitions that we'll use in the basic form of the Kaleidoscope
+language:
+
+.. code-block:: c++
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+    };
+
+This is all (intentionally) rather straight-forward: variables capture
+the variable name, binary operators capture their opcode (e.g. '+'), and
+calls capture a function name as well as a list of any argument
+expressions. One thing that is nice about our AST is that it captures
+the language features without talking about the syntax of the language.
+Note that there is no discussion about precedence of binary operators,
+lexical structure, etc.
+
+For our basic language, these are all of the expression nodes we'll
+define. Because it doesn't have conditional control flow, it isn't
+Turing-complete; we'll fix that in a later installment. The two things
+we need next are a way to talk about the interface to a function, and a
+way to talk about functions themselves:
+
+.. code-block:: c++
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+    };
+
+In Kaleidoscope, functions are typed with just a count of their
+arguments. Since all values are double precision floating point, the
+type of each argument doesn't need to be stored anywhere. In a more
+aggressive and realistic language, the "ExprAST" class would probably
+have a type field.
+
+With this scaffolding, we can now talk about parsing expressions and
+function bodies in Kaleidoscope.
+
+Parser Basics
+=============
+
+Now that we have an AST to build, we need to define the parser code to
+build it. The idea here is that we want to parse something like "x+y"
+(which is returned as three tokens by the lexer) into an AST that could
+be generated with calls like this:
+
+.. code-block:: c++
+
+      ExprAST *X = new VariableExprAST("x");
+      ExprAST *Y = new VariableExprAST("y");
+      ExprAST *Result = new BinaryExprAST('+', X, Y);
+
+In order to do this, we'll start by defining some basic helper routines:
+
+.. code-block:: c++
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+This implements a simple token buffer around the lexer. This allows us
+to look one token ahead at what the lexer is returning. Every function
+in our parser will assume that CurTok is the current token that needs to
+be parsed.
+
+.. code-block:: c++
+
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+The ``Error`` routines are simple helper routines that our parser will
+use to handle errors. The error recovery in our parser will not be the
+best and is not particular user-friendly, but it will be enough for our
+tutorial. These routines make it easier to handle errors in routines
+that have various return types: they always return null.
+
+With these basic helper functions, we can implement the first piece of
+our grammar: numeric literals.
+
+Basic Expression Parsing
+========================
+
+We start with numeric literals, because they are the simplest to
+process. For each production in our grammar, we'll define a function
+which parses that production. For numeric literals, we have:
+
+.. code-block:: c++
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+This routine is very simple: it expects to be called when the current
+token is a ``tok_number`` token. It takes the current number value,
+creates a ``NumberExprAST`` node, advances the lexer to the next token,
+and finally returns.
+
+There are some interesting aspects to this. The most important one is
+that this routine eats all of the tokens that correspond to the
+production and returns the lexer buffer with the next token (which is
+not part of the grammar production) ready to go. This is a fairly
+standard way to go for recursive descent parsers. For a better example,
+the parenthesis operator is defined like this:
+
+.. code-block:: c++
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+This function illustrates a number of interesting things about the
+parser:
+
+1) It shows how we use the Error routines. When called, this function
+expects that the current token is a '(' token, but after parsing the
+subexpression, it is possible that there is no ')' waiting. For example,
+if the user types in "(4 x" instead of "(4)", the parser should emit an
+error. Because errors can occur, the parser needs a way to indicate that
+they happened: in our parser, we return null on an error.
+
+2) Another interesting aspect of this function is that it uses recursion
+by calling ``ParseExpression`` (we will soon see that
+``ParseExpression`` can call ``ParseParenExpr``). This is powerful
+because it allows us to handle recursive grammars, and keeps each
+production very simple. Note that parentheses do not cause construction
+of AST nodes themselves. While we could do it this way, the most
+important role of parentheses are to guide the parser and provide
+grouping. Once the parser constructs the AST, parentheses are not
+needed.
+
+The next simple production is for handling variable references and
+function calls:
+
+.. code-block:: c++
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+This routine follows the same style as the other routines. (It expects
+to be called if the current token is a ``tok_identifier`` token). It
+also has recursion and error handling. One interesting aspect of this is
+that it uses *look-ahead* to determine if the current identifier is a
+stand alone variable reference or if it is a function call expression.
+It handles this by checking to see if the token after the identifier is
+a '(' token, constructing either a ``VariableExprAST`` or
+``CallExprAST`` node as appropriate.
+
+Now that we have all of our simple expression-parsing logic in place, we
+can define a helper function to wrap it together into one entry point.
+We call this class of expressions "primary" expressions, for reasons
+that will become more clear `later in the
+tutorial <LangImpl6.html#unary>`_. In order to parse an arbitrary
+primary expression, we need to determine what sort of expression it is:
+
+.. code-block:: c++
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+Now that you see the definition of this function, it is more obvious why
+we can assume the state of CurTok in the various functions. This uses
+look-ahead to determine which sort of expression is being inspected, and
+then parses it with a function call.
+
+Now that basic expressions are handled, we need to handle binary
+expressions. They are a bit more complex.
+
+Binary Expression Parsing
+=========================
+
+Binary expressions are significantly harder to parse because they are
+often ambiguous. For example, when given the string "x+y\*z", the parser
+can choose to parse it as either "(x+y)\*z" or "x+(y\*z)". With common
+definitions from mathematics, we expect the later parse, because "\*"
+(multiplication) has higher *precedence* than "+" (addition).
+
+There are many ways to handle this, but an elegant and efficient way is
+to use `Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_.
+This parsing technique uses the precedence of binary operators to guide
+recursion. To start with, we need a table of precedences:
+
+.. code-block:: c++
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    int main() {
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+      ...
+    }
+
+For the basic form of Kaleidoscope, we will only support 4 binary
+operators (this can obviously be extended by you, our brave and intrepid
+reader). The ``GetTokPrecedence`` function returns the precedence for
+the current token, or -1 if the token is not a binary operator. Having a
+map makes it easy to add new operators and makes it clear that the
+algorithm doesn't depend on the specific operators involved, but it
+would be easy enough to eliminate the map and do the comparisons in the
+``GetTokPrecedence`` function. (Or just use a fixed-size array).
+
+With the helper above defined, we can now start parsing binary
+expressions. The basic idea of operator precedence parsing is to break
+down an expression with potentially ambiguous binary operators into
+pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g".
+Operator precedence parsing considers this as a stream of primary
+expressions separated by binary operators. As such, it will first parse
+the leading primary expression "a", then it will see the pairs [+, b]
+[+, (c+d)] [\*, e] [\*, f] and [+, g]. Note that because parentheses are
+primary expressions, the binary expression parser doesn't need to worry
+about nested subexpressions like (c+d) at all.
+
+To start, an expression is a primary expression potentially followed by
+a sequence of [binop,primaryexpr] pairs:
+
+.. code-block:: c++
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+``ParseBinOpRHS`` is the function that parses the sequence of pairs for
+us. It takes a precedence and a pointer to an expression for the part
+that has been parsed so far. Note that "x" is a perfectly valid
+expression: As such, "binoprhs" is allowed to be empty, in which case it
+returns the expression that is passed into it. In our example above, the
+code passes the expression for "a" into ``ParseBinOpRHS`` and the
+current token is "+".
+
+The precedence value passed into ``ParseBinOpRHS`` indicates the
+*minimal operator precedence* that the function is allowed to eat. For
+example, if the current pair stream is [+, x] and ``ParseBinOpRHS`` is
+passed in a precedence of 40, it will not consume any tokens (because
+the precedence of '+' is only 20). With this in mind, ``ParseBinOpRHS``
+starts with:
+
+.. code-block:: c++
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+This code gets the precedence of the current token and checks to see if
+if is too low. Because we defined invalid tokens to have a precedence of
+-1, this check implicitly knows that the pair-stream ends when the token
+stream runs out of binary operators. If this check succeeds, we know
+that the token is a binary operator and that it will be included in this
+expression:
+
+.. code-block:: c++
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+As such, this code eats (and remembers) the binary operator and then
+parses the primary expression that follows. This builds up the whole
+pair, the first of which is [+, b] for the running example.
+
+Now that we parsed the left-hand side of an expression and one pair of
+the RHS sequence, we have to decide which way the expression associates.
+In particular, we could have "(a+b) binop unparsed" or "a + (b binop
+unparsed)". To determine this, we look ahead at "binop" to determine its
+precedence and compare it to BinOp's precedence (which is '+' in this
+case):
+
+.. code-block:: c++
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+
+If the precedence of the binop to the right of "RHS" is lower or equal
+to the precedence of our current operator, then we know that the
+parentheses associate as "(a+b) binop ...". In our example, the current
+operator is "+" and the next operator is "+", we know that they have the
+same precedence. In this case we'll create the AST node for "a+b", and
+then continue parsing:
+
+.. code-block:: c++
+
+          ... if body omitted ...
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }  // loop around to the top of the while loop.
+    }
+
+In our example above, this will turn "a+b+" into "(a+b)" and execute the
+next iteration of the loop, with "+" as the current token. The code
+above will eat, remember, and parse "(c+d)" as the primary expression,
+which makes the current pair equal to [+, (c+d)]. It will then evaluate
+the 'if' conditional above with "\*" as the binop to the right of the
+primary. In this case, the precedence of "\*" is higher than the
+precedence of "+" so the if condition will be entered.
+
+The critical question left here is "how can the if condition parse the
+right hand side in full"? In particular, to build the AST correctly for
+our example, it needs to get all of "(c+d)\*e\*f" as the RHS expression
+variable. The code to do this is surprisingly simple (code from the
+above two blocks duplicated for context):
+
+.. code-block:: c++
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }  // loop around to the top of the while loop.
+    }
+
+At this point, we know that the binary operator to the RHS of our
+primary has higher precedence than the binop we are currently parsing.
+As such, we know that any sequence of pairs whose operators are all
+higher precedence than "+" should be parsed together and returned as
+"RHS". To do this, we recursively invoke the ``ParseBinOpRHS`` function
+specifying "TokPrec+1" as the minimum precedence required for it to
+continue. In our example above, this will cause it to return the AST
+node for "(c+d)\*e\*f" as RHS, which is then set as the RHS of the '+'
+expression.
+
+Finally, on the next iteration of the while loop, the "+g" piece is
+parsed and added to the AST. With this little bit of code (14
+non-trivial lines), we correctly handle fully general binary expression
+parsing in a very elegant way. This was a whirlwind tour of this code,
+and it is somewhat subtle. I recommend running through it with a few
+tough examples to see how it works.
+
+This wraps up handling of expressions. At this point, we can point the
+parser at an arbitrary token stream and build an expression from it,
+stopping at the first token that is not part of the expression. Next up
+we need to handle function definitions, etc.
+
+Parsing the Rest
+================
+
+The next thing missing is handling of function prototypes. In
+Kaleidoscope, these are used both for 'extern' function declarations as
+well as function body definitions. The code to do this is
+straight-forward and not very interesting (once you've survived
+expressions):
+
+.. code-block:: c++
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      // Read the list of argument names.
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+Given this, a function definition is very simple, just a prototype plus
+an expression to implement the body:
+
+.. code-block:: c++
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+In addition, we support 'extern' to declare functions like 'sin' and
+'cos' as well as to support forward declaration of user functions. These
+'extern's are just prototypes with no body:
+
+.. code-block:: c++
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+Finally, we'll also let the user type in arbitrary top-level expressions
+and evaluate them on the fly. We will handle this by defining anonymous
+nullary (zero argument) functions for them:
+
+.. code-block:: c++
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+Now that we have all the pieces, let's build a little driver that will
+let us actually *execute* this code we've built!
+
+The Driver
+==========
+
+The driver for this simply invokes all of the parsing pieces with a
+top-level dispatch loop. There isn't much interesting here, so I'll just
+include the top-level loop. See `below <#code>`_ for full code in the
+"Top-Level Parsing" section.
+
+.. code-block:: c++
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+The most interesting part of this is that we ignore top-level
+semicolons. Why is this, you ask? The basic reason is that if you type
+"4 + 5" at the command line, the parser doesn't know whether that is the
+end of what you will type or not. For example, on the next line you
+could type "def foo..." in which case 4+5 is the end of a top-level
+expression. Alternatively you could type "\* 6", which would continue
+the expression. Having top-level semicolons allows you to type "4+5;",
+and the parser will know you are done.
+
+Conclusions
+===========
+
+With just under 400 lines of commented code (240 lines of non-comment,
+non-blank code), we fully defined our minimal language, including a
+lexer, parser, and AST builder. With this done, the executable will
+validate Kaleidoscope code and tell us if it is grammatically invalid.
+For example, here is a sample interaction:
+
+.. code-block:: bash
+
+    $ ./a.out
+    ready> def foo(x y) x+foo(y, 4.0);
+    Parsed a function definition.
+    ready> def foo(x y) x+y y;
+    Parsed a function definition.
+    Parsed a top-level expr
+    ready> def foo(x y) x+y );
+    Parsed a function definition.
+    Error: unknown token when expecting an expression
+    ready> extern sin(a);
+    ready> Parsed an extern
+    ready> ^D
+    $
+
+There is a lot of room for extension here. You can define new AST nodes,
+extend the language in many ways, etc. In the `next
+installment <LangImpl3.html>`_, we will describe how to generate LLVM
+Intermediate Representation (IR) from the AST.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for this and the previous chapter.
+Note that it is fully self-contained: you don't need LLVM or any
+external libraries at all for this. (Besides the C and C++ standard
+libraries, of course.) To build this, just compile with:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g -O3 toy.cpp
+    # Run
+    ./a.out
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include <cstdio>
+    #include <cstdlib>
+    #include <string>
+    #include <map>
+    #include <vector>
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing
+    //===----------------------------------------------------------------------===//
+
+    static void HandleDefinition() {
+      if (ParseDefinition()) {
+        fprintf(stderr, "Parsed a function definition.\n");
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (ParseExtern()) {
+        fprintf(stderr, "Parsed an extern\n");
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (ParseTopLevelExpr()) {
+        fprintf(stderr, "Parsed a top-level expr\n");
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      return 0;
+    }
+
+`Next: Implementing Code Generation to LLVM IR <LangImpl3.html>`_
+
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
deleted file mode 100644
index 1390153ee7cf..000000000000
--- a/docs/tutorial/LangImpl3.html
+++ /dev/null
@@ -1,1268 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Code generation to LLVM IR</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 3
-  <ol>
-    <li><a href="#intro">Chapter 3 Introduction</a></li>
-    <li><a href="#basics">Code Generation Setup</a></li>
-    <li><a href="#exprs">Expression Code Generation</a></li>
-    <li><a href="#funcs">Function Code Generation</a></li>
-    <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer 
-Support</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 3 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  This chapter shows you how to transform the <a 
-href="LangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into LLVM IR.
-This will teach you a little bit about how LLVM does things, as well as
-demonstrate how easy it is to use.  It's much more work to build a lexer and
-parser than it is to generate LLVM IR code. :)
-</p>
-
-<p><b>Please note</b>: the code in this chapter and later require LLVM 2.2 or
-later.  LLVM 2.1 and before will not work with it.  Also note that you need
-to use a version of this tutorial that matches your LLVM release: If you are
-using an official LLVM release, use the version of the documentation included
-with your release or on the <a href="http://llvm.org/releases/">llvm.org 
-releases page</a>.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="basics">Code Generation Setup</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-In order to generate LLVM IR, we want some simple setup to get started.  First
-we define virtual code generation (codegen) methods in each AST class:</p>
-
-<div class="doc_code">
-<pre>
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  <b>virtual Value *Codegen() = 0;</b>
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  <b>virtual Value *Codegen();</b>
-};
-...
-</pre>
-</div>
-
-<p>The Codegen() method says to emit IR for that AST node along with all the things it
-depends on, and they all return an LLVM Value object. 
-"Value" is the class used to represent a "<a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
-of SSA values is that their value is computed as the related instruction
-executes, and it does not get a new value until (and if) the instruction
-re-executes.  In other words, there is no way to "change" an SSA value.  For
-more information, please read up on <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment</a> - the concepts are really quite natural once you grok them.</p>
-
-<p>Note that instead of adding virtual methods to the ExprAST class hierarchy,
-it could also make sense to use a <a
-href="http://en.wikipedia.org/wiki/Visitor_pattern">visitor pattern</a> or some
-other way to model this.  Again, this tutorial won't dwell on good software
-engineering practices: for our purposes, adding a virtual method is
-simplest.</p>
-
-<p>The
-second thing we want is an "Error" method like we used for the parser, which will
-be used to report errors found during code generation (for example, use of an
-undeclared parameter):</p>
-
-<div class="doc_code">
-<pre>
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-</pre>
-</div>
-
-<p>The static variables will be used during code generation.  <tt>TheModule</tt>
-is the LLVM construct that contains all of the functions and global variables in
-a chunk of code.  In many ways, it is the top-level structure that the LLVM IR
-uses to contain code.</p>
-
-<p>The <tt>Builder</tt> object is a helper object that makes it easy to generate
-LLVM instructions.  Instances of the <a 
-href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a> 
-class template keep track of the current place to insert instructions and has
-methods to create new instructions.</p>
-
-<p>The <tt>NamedValues</tt> map keeps track of which values are defined in the
-current scope and what their LLVM representation is.  (In other words, it is a
-symbol table for the code).  In this form of Kaleidoscope, the only things that
-can be referenced are function parameters.  As such, function parameters will
-be in this map when generating code for their function body.</p>
-
-<p>
-With these basics in place, we can start talking about how to generate code for
-each expression.  Note that this assumes that the <tt>Builder</tt> has been set
-up to generate code <em>into</em> something.  For now, we'll assume that this
-has already been done, and we'll just use it to emit code.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="exprs">Expression Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Generating LLVM code for expression nodes is very straightforward: less
-than 45 lines of commented code for all four of our expression nodes.  First
-we'll do numeric literals:</p>
-
-<div class="doc_code">
-<pre>
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-</pre>
-</div>
-
-<p>In the LLVM IR, numeric constants are represented with the
-<tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
-internally (<tt>APFloat</tt> has the capability of holding floating point
-constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
-creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
-that constants are all uniqued together and shared.  For this reason, the API
-uses the "foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
-
-<div class="doc_code">
-<pre>
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-</pre>
-</div>
-
-<p>References to variables are also quite simple using LLVM.  In the simple version
-of Kaleidoscope, we assume that the variable has already been emitted somewhere
-and its value is available.  In practice, the only values that can be in the
-<tt>NamedValues</tt> map are function arguments.  This
-code simply checks to see that the specified name is in the map (if not, an 
-unknown variable is being referenced) and returns the value for it.  In future
-chapters, we'll add support for <a href="LangImpl5.html#for">loop induction 
-variables</a> in the symbol table, and for <a 
-href="LangImpl7.html#localvars">local variables</a>.</p>
-
-<div class="doc_code">
-<pre>
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-</pre>
-</div>
-
-<p>Binary operators start to get more interesting.  The basic idea here is that
-we recursively emit code for the left-hand side of the expression, then the 
-right-hand side, then we compute the result of the binary expression.  In this
-code, we do a simple switch on the opcode to create the right LLVM instruction.
-</p>
-
-<p>In the example above, the LLVM builder class is starting to show its value.  
-IRBuilder knows where to insert the newly created instruction, all you have to
-do is specify what instruction to create (e.g. with <tt>CreateFAdd</tt>), which
-operands to use (<tt>L</tt> and <tt>R</tt> here) and optionally provide a name
-for the generated instruction.</p>
-
-<p>One nice thing about LLVM is that the name is just a hint.  For instance, if
-the code above emits multiple "addtmp" variables, LLVM will automatically
-provide each one with an increasing, unique numeric suffix.  Local value names
-for instructions are purely optional, but it makes it much easier to read the
-IR dumps.</p>
-
-<p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
-strict rules: for example, the Left and Right operators of
-an <a href="../LangRef.html#i_add">add instruction</a> must have the same
-type, and the result type of the add must match the operand types.  Because
-all values in Kaleidoscope are doubles, this makes for very simple code for add,
-sub and mul.</p>
-
-<p>On the other hand, LLVM specifies that the <a 
-href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
-(a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
-a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
-converts its input integer into a floating point value by treating the input
-as an unsigned value.  In contrast, if we used the <a 
-href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '&lt;'
-operator would return 0.0 and -1.0, depending on the input value.</p>
-
-<div class="doc_code">
-<pre>
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-</pre>
-</div>
-
-<p>Code generation for function calls is quite straightforward with LLVM.  The
-code above initially does a function name lookup in the LLVM Module's symbol
-table.  Recall that the LLVM Module is the container that holds all of the
-functions we are JIT'ing.  By giving each function the same name as what the
-user specifies, we can use the LLVM symbol table to resolve function names for
-us.</p>
-
-<p>Once we have the function to call, we recursively codegen each argument that
-is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
-instruction</a>.  Note that LLVM uses the native C calling conventions by
-default, allowing these calls to also call into standard library functions like
-"sin" and "cos", with no additional effort.</p>
-
-<p>This wraps up our handling of the four basic expressions that we have so far
-in Kaleidoscope.  Feel free to go in and add some more.  For example, by 
-browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
-several other interesting instructions that are really easy to plug into our
-basic framework.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="funcs">Function Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code generation for prototypes and functions must handle a number of
-details, which make their code less beautiful than expression code
-generation, but allows us to  illustrate some important points.  First, lets
-talk about code generation for prototypes: they are used both for function 
-bodies and external function declarations.  The code starts with:</p>
-
-<div class="doc_code">
-<pre>
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-</pre>
-</div>
-
-<p>This code packs a lot of power into a few lines.  Note first that this 
-function returns a "Function*" instead of a "Value*".  Because a "prototype"
-really talks about the external interface for a function (not the value computed
-by an expression), it makes sense for it to return the LLVM Function it
-corresponds to when codegen'd.</p>
-
-<p>The call to <tt>FunctionType::get</tt> creates
-the <tt>FunctionType</tt> that should be used for a given Prototype.  Since all
-function arguments in Kaleidoscope are of type double, the first line creates
-a vector of "N" LLVM double types.  It then uses the <tt>Functiontype::get</tt>
-method to create a function type that takes "N" doubles as arguments, returns
-one double as a result, and that is not vararg (the false parameter indicates
-this).  Note that Types in LLVM are uniqued just like Constants are, so you
-don't "new" a type, you "get" it.</p>
-
-<p>The final line above actually creates the function that the prototype will
-correspond to.  This indicates the type, linkage and name to use, as well as which
-module to insert into.  "<a href="../LangRef.html#linkage">external linkage</a>"
-means that the function may be defined outside the current module and/or that it
-is callable by functions outside the module.  The Name passed in is the name the
-user specified: since "<tt>TheModule</tt>" is specified, this name is registered
-in "<tt>TheModule</tt>"s symbol table, which is used by the function call code
-above.</p>
-
-<div class="doc_code">
-<pre>
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-</pre>
-</div>
-
-<p>The Module symbol table works just like the Function symbol table when it
-comes to name conflicts: if a new function is created with a name that was previously
-added to the symbol table, the new function will get implicitly renamed when added to the
-Module.  The code above exploits this fact to determine if there was a previous
-definition of this function.</p>
-
-<p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
-first, we want to allow 'extern'ing a function more than once, as long as the
-prototypes for the externs match (since all arguments have the same type, we
-just have to check that the number of arguments match).  Second, we want to
-allow 'extern'ing a function and then defining a body for it.  This is useful
-when defining mutually recursive functions.</p>
-
-<p>In order to implement this, the code above first checks to see if there is
-a collision on the name of the function.  If so, it deletes the function we just
-created (by calling <tt>eraseFromParent</tt>) and then calling 
-<tt>getFunction</tt> to get the existing function with the specified name.  Note
-that many APIs in LLVM have "erase" forms and "remove" forms.  The "remove" form
-unlinks the object from its parent (e.g. a Function from a Module) and returns
-it.  The "erase" form unlinks the object and then deletes it.</p>
-   
-<div class="doc_code">
-<pre>
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-</pre>
-</div>
-
-<p>In order to verify the logic above, we first check to see if the pre-existing
-function is "empty".  In this case, empty means that it has no basic blocks in
-it, which means it has no body.  If it has no body, it is a forward 
-declaration.  Since we don't allow anything after a full definition of the
-function, the code rejects this case.  If the previous reference to a function
-was an 'extern', we simply verify that the number of arguments for that
-definition and this one match up.  If not, we emit an error.</p>
-
-<div class="doc_code">
-<pre>
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  return F;
-}
-</pre>
-</div>
-
-<p>The last bit of code for prototypes loops over all of the arguments in the
-function, setting the name of the LLVM Argument objects to match, and registering
-the arguments in the <tt>NamedValues</tt> map for future use by the
-<tt>VariableExprAST</tt> AST node.  Once this is set up, it returns the Function
-object to the caller.  Note that we don't check for conflicting 
-argument names here (e.g. "extern foo(a b a)").  Doing so would be very
-straight-forward with the mechanics we have already used above.</p>
-
-<div class="doc_code">
-<pre>
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-</pre>
-</div>
-
-<p>Code generation for function definitions starts out simply enough: we just
-codegen the prototype (Proto) and verify that it is ok.  We then clear out the
-<tt>NamedValues</tt> map to make sure that there isn't anything in it from the
-last function we compiled.  Code generation of the prototype ensures that there
-is an LLVM Function object that is ready to go for us.</p>
-
-<div class="doc_code">
-<pre>
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-</pre>
-</div>
-
-<p>Now we get to the point where the <tt>Builder</tt> is set up.  The first
-line creates a new <a href="http://en.wikipedia.org/wiki/Basic_block">basic
-block</a> (named "entry"), which is inserted into <tt>TheFunction</tt>.  The
-second line then tells the builder that new instructions should be inserted into
-the end of the new basic block.  Basic blocks in LLVM are an important part
-of functions that define the <a 
-href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
-Since we don't have any control flow, our functions will only contain one 
-block at this point.  We'll fix this in <a href="LangImpl5.html">Chapter 5</a> :).</p>
-
-<div class="doc_code">
-<pre>
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    return TheFunction;
-  }
-</pre>
-</div>
-
-<p>Once the insertion point is set up, we call the <tt>CodeGen()</tt> method for
-the root expression of the function.  If no error happens, this emits code to
-compute the expression into the entry block and returns the value that was
-computed.  Assuming no error, we then create an LLVM <a 
-href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
-Once the function is built, we call <tt>verifyFunction</tt>, which
-is provided by LLVM.  This function does a variety of consistency checks on the
-generated code, to determine if our compiler is doing everything right.  Using
-this is important: it can catch a lot of bugs.  Once the function is finished
-and validated, we return it.</p>
-  
-<div class="doc_code">
-<pre>
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-</pre>
-</div>
-
-<p>The only piece left here is handling of the error case.  For simplicity, we
-handle this by merely deleting the function we produced with the 
-<tt>eraseFromParent</tt> method.  This allows the user to redefine a function
-that they incorrectly typed in before: if we didn't delete it, it would live in
-the symbol table, with a body, preventing future redefinition.</p>
-
-<p>This code does have a bug, though.  Since the <tt>PrototypeAST::Codegen</tt>
-can return a previously defined forward declaration, our code can actually delete
-a forward declaration.  There are a number of ways to fix this bug, see what you
-can come up with!  Here is a testcase:</p>
-
-<div class="doc_code">
-<pre>
-extern foo(a b);     # ok, defines foo.
-def foo(a b) c;      # error, 'c' is invalid.
-def bar() foo(1, 2); # error, unknown function "foo"
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">Driver Changes and Closing Thoughts</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-For now, code generation to LLVM doesn't really get us much, except that we can
-look at the pretty IR calls.  The sample code inserts calls to Codegen into the
-"<tt>HandleDefinition</tt>", "<tt>HandleExtern</tt>" etc functions, and then
-dumps out the LLVM IR.  This gives a nice way to look at the LLVM IR for simple
-functions.  For example:
-</p>
-
-<div class="doc_code">
-<pre>
-ready> <b>4+5</b>;
-Read top-level expression:
-define double @0() {
-entry:
-  ret double 9.000000e+00
-}
-</pre>
-</div>
-
-<p>Note how the parser turns the top-level expression into anonymous functions
-for us.  This will be handy when we add <a href="LangImpl4.html#jit">JIT 
-support</a> in the next chapter.  Also note that the code is very literally
-transcribed, no optimizations are being performed except simple constant
-folding done by IRBuilder.  We will 
-<a href="LangImpl4.html#trivialconstfold">add optimizations</a> explicitly in
-the next chapter.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def foo(a b) a*a + 2*a*b + b*b;</b>
-Read function definition:
-define double @foo(double %a, double %b) {
-entry:
-  %multmp = fmul double %a, %a
-  %multmp1 = fmul double 2.000000e+00, %a
-  %multmp2 = fmul double %multmp1, %b
-  %addtmp = fadd double %multmp, %multmp2
-  %multmp3 = fmul double %b, %b
-  %addtmp4 = fadd double %addtmp, %multmp3
-  ret double %addtmp4
-}
-</pre>
-</div>
-
-<p>This shows some simple arithmetic. Notice the striking similarity to the
-LLVM builder calls that we use to create the instructions.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
-Read function definition:
-define double @bar(double %a) {
-entry:
-  %calltmp = call double @foo(double %a, double 4.000000e+00)
-  %calltmp1 = call double @bar(double 3.133700e+04)
-  %addtmp = fadd double %calltmp, %calltmp1
-  ret double %addtmp
-}
-</pre>
-</div>
-
-<p>This shows some function calls.  Note that this function will take a long
-time to execute if you call it.  In the future we'll add conditional control 
-flow to actually make recursion useful :).</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern cos(x);</b>
-Read extern: 
-declare double @cos(double)
-
-ready&gt; <b>cos(1.234);</b>
-Read top-level expression:
-define double @1() {
-entry:
-  %calltmp = call double @cos(double 1.234000e+00)
-  ret double %calltmp
-}
-</pre>
-</div>
-
-<p>This shows an extern for the libm "cos" function, and a call to it.</p>
-
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>^D</b>
-; ModuleID = 'my cool jit'
-
-define double @0() {
-entry:
-  %addtmp = fadd double 4.000000e+00, 5.000000e+00
-  ret double %addtmp
-}
-
-define double @foo(double %a, double %b) {
-entry:
-  %multmp = fmul double %a, %a
-  %multmp1 = fmul double 2.000000e+00, %a
-  %multmp2 = fmul double %multmp1, %b
-  %addtmp = fadd double %multmp, %multmp2
-  %multmp3 = fmul double %b, %b
-  %addtmp4 = fadd double %addtmp, %multmp3
-  ret double %addtmp4
-}
-
-define double @bar(double %a) {
-entry:
-  %calltmp = call double @foo(double %a, double 4.000000e+00)
-  %calltmp1 = call double @bar(double 3.133700e+04)
-  %addtmp = fadd double %calltmp, %calltmp1
-  ret double %addtmp
-}
-
-declare double @cos(double)
-
-define double @1() {
-entry:
-  %calltmp = call double @cos(double 1.234000e+00)
-  ret double %calltmp
-}
-</pre>
-</div>
-
-<p>When you quit the current demo, it dumps out the IR for the entire module
-generated.  Here you can see the big picture with all the functions referencing
-each other.</p>
-
-<p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
-describe how to <a href="LangImpl4.html">add JIT codegen and optimizer
-support</a> to this so we can actually start running code!</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM code generator.    Because this uses the LLVM libraries, we need to link
-them in.  To do this, we use the <a 
-href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
-our makefile/command line about which options to use:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-// To build this:
-// See example below.
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/Verifier.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read top-level expression:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-<a href="LangImpl4.html">Next: Adding JIT and Optimizer Support</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-06-29 14:38:19 +0200 (Fri, 29 Jun 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl3.rst b/docs/tutorial/LangImpl3.rst
new file mode 100644
index 000000000000..9d5f90839edc
--- /dev/null
+++ b/docs/tutorial/LangImpl3.rst
@@ -0,0 +1,1160 @@
+========================================
+Kaleidoscope: Code generation to LLVM IR
+========================================
+
+.. contents::
+   :local:
+
+Chapter 3 Introduction
+======================
+
+Welcome to Chapter 3 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. This chapter shows you how to transform
+the `Abstract Syntax Tree <LangImpl2.html>`_, built in Chapter 2, into
+LLVM IR. This will teach you a little bit about how LLVM does things, as
+well as demonstrate how easy it is to use. It's much more work to build
+a lexer and parser than it is to generate LLVM IR code. :)
+
+**Please note**: the code in this chapter and later require LLVM 2.2 or
+later. LLVM 2.1 and before will not work with it. Also note that you
+need to use a version of this tutorial that matches your LLVM release:
+If you are using an official LLVM release, use the version of the
+documentation included with your release or on the `llvm.org releases
+page <http://llvm.org/releases/>`_.
+
+Code Generation Setup
+=====================
+
+In order to generate LLVM IR, we want some simple setup to get started.
+First we define virtual code generation (codegen) methods in each AST
+class:
+
+.. code-block:: c++
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+    ...
+
+The Codegen() method says to emit IR for that AST node along with all
+the things it depends on, and they all return an LLVM Value object.
+"Value" is the class used to represent a "`Static Single Assignment
+(SSA) <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+register" or "SSA value" in LLVM. The most distinct aspect of SSA values
+is that their value is computed as the related instruction executes, and
+it does not get a new value until (and if) the instruction re-executes.
+In other words, there is no way to "change" an SSA value. For more
+information, please read up on `Static Single
+Assignment <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+- the concepts are really quite natural once you grok them.
+
+Note that instead of adding virtual methods to the ExprAST class
+hierarchy, it could also make sense to use a `visitor
+pattern <http://en.wikipedia.org/wiki/Visitor_pattern>`_ or some other
+way to model this. Again, this tutorial won't dwell on good software
+engineering practices: for our purposes, adding a virtual method is
+simplest.
+
+The second thing we want is an "Error" method like we used for the
+parser, which will be used to report errors found during code generation
+(for example, use of an undeclared parameter):
+
+.. code-block:: c++
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+
+The static variables will be used during code generation. ``TheModule``
+is the LLVM construct that contains all of the functions and global
+variables in a chunk of code. In many ways, it is the top-level
+structure that the LLVM IR uses to contain code.
+
+The ``Builder`` object is a helper object that makes it easy to generate
+LLVM instructions. Instances of the
+```IRBuilder`` <http://llvm.org/doxygen/IRBuilder_8h-source.html>`_
+class template keep track of the current place to insert instructions
+and has methods to create new instructions.
+
+The ``NamedValues`` map keeps track of which values are defined in the
+current scope and what their LLVM representation is. (In other words, it
+is a symbol table for the code). In this form of Kaleidoscope, the only
+things that can be referenced are function parameters. As such, function
+parameters will be in this map when generating code for their function
+body.
+
+With these basics in place, we can start talking about how to generate
+code for each expression. Note that this assumes that the ``Builder``
+has been set up to generate code *into* something. For now, we'll assume
+that this has already been done, and we'll just use it to emit code.
+
+Expression Code Generation
+==========================
+
+Generating LLVM code for expression nodes is very straightforward: less
+than 45 lines of commented code for all four of our expression nodes.
+First we'll do numeric literals:
+
+.. code-block:: c++
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+In the LLVM IR, numeric constants are represented with the
+``ConstantFP`` class, which holds the numeric value in an ``APFloat``
+internally (``APFloat`` has the capability of holding floating point
+constants of Arbitrary Precision). This code basically just creates
+and returns a ``ConstantFP``. Note that in the LLVM IR that constants
+are all uniqued together and shared. For this reason, the API uses the
+"foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)".
+
+.. code-block:: c++
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+References to variables are also quite simple using LLVM. In the simple
+version of Kaleidoscope, we assume that the variable has already been
+emitted somewhere and its value is available. In practice, the only
+values that can be in the ``NamedValues`` map are function arguments.
+This code simply checks to see that the specified name is in the map (if
+not, an unknown variable is being referenced) and returns the value for
+it. In future chapters, we'll add support for `loop induction
+variables <LangImpl5.html#for>`_ in the symbol table, and for `local
+variables <LangImpl7.html#localvars>`_.
+
+.. code-block:: c++
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+Binary operators start to get more interesting. The basic idea here is
+that we recursively emit code for the left-hand side of the expression,
+then the right-hand side, then we compute the result of the binary
+expression. In this code, we do a simple switch on the opcode to create
+the right LLVM instruction.
+
+In the example above, the LLVM builder class is starting to show its
+value. IRBuilder knows where to insert the newly created instruction,
+all you have to do is specify what instruction to create (e.g. with
+``CreateFAdd``), which operands to use (``L`` and ``R`` here) and
+optionally provide a name for the generated instruction.
+
+One nice thing about LLVM is that the name is just a hint. For instance,
+if the code above emits multiple "addtmp" variables, LLVM will
+automatically provide each one with an increasing, unique numeric
+suffix. Local value names for instructions are purely optional, but it
+makes it much easier to read the IR dumps.
+
+`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict
+rules: for example, the Left and Right operators of an `add
+instruction <../LangRef.html#i_add>`_ must have the same type, and the
+result type of the add must match the operand types. Because all values
+in Kaleidoscope are doubles, this makes for very simple code for add,
+sub and mul.
+
+On the other hand, LLVM specifies that the `fcmp
+instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a
+one bit integer). The problem with this is that Kaleidoscope wants the
+value to be a 0.0 or 1.0 value. In order to get these semantics, we
+combine the fcmp instruction with a `uitofp
+instruction <../LangRef.html#i_uitofp>`_. This instruction converts its
+input integer into a floating point value by treating the input as an
+unsigned value. In contrast, if we used the `sitofp
+instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator
+would return 0.0 and -1.0, depending on the input value.
+
+.. code-block:: c++
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+Code generation for function calls is quite straightforward with LLVM.
+The code above initially does a function name lookup in the LLVM
+Module's symbol table. Recall that the LLVM Module is the container that
+holds all of the functions we are JIT'ing. By giving each function the
+same name as what the user specifies, we can use the LLVM symbol table
+to resolve function names for us.
+
+Once we have the function to call, we recursively codegen each argument
+that is to be passed in, and create an LLVM `call
+instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C
+calling conventions by default, allowing these calls to also call into
+standard library functions like "sin" and "cos", with no additional
+effort.
+
+This wraps up our handling of the four basic expressions that we have so
+far in Kaleidoscope. Feel free to go in and add some more. For example,
+by browsing the `LLVM language reference <../LangRef.html>`_ you'll find
+several other interesting instructions that are really easy to plug into
+our basic framework.
+
+Function Code Generation
+========================
+
+Code generation for prototypes and functions must handle a number of
+details, which make their code less beautiful than expression code
+generation, but allows us to illustrate some important points. First,
+lets talk about code generation for prototypes: they are used both for
+function bodies and external function declarations. The code starts
+with:
+
+.. code-block:: c++
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+This code packs a lot of power into a few lines. Note first that this
+function returns a "Function\*" instead of a "Value\*". Because a
+"prototype" really talks about the external interface for a function
+(not the value computed by an expression), it makes sense for it to
+return the LLVM Function it corresponds to when codegen'd.
+
+The call to ``FunctionType::get`` creates the ``FunctionType`` that
+should be used for a given Prototype. Since all function arguments in
+Kaleidoscope are of type double, the first line creates a vector of "N"
+LLVM double types. It then uses the ``Functiontype::get`` method to
+create a function type that takes "N" doubles as arguments, returns one
+double as a result, and that is not vararg (the false parameter
+indicates this). Note that Types in LLVM are uniqued just like Constants
+are, so you don't "new" a type, you "get" it.
+
+The final line above actually creates the function that the prototype
+will correspond to. This indicates the type, linkage and name to use, as
+well as which module to insert into. "`external
+linkage <../LangRef.html#linkage>`_" means that the function may be
+defined outside the current module and/or that it is callable by
+functions outside the module. The Name passed in is the name the user
+specified: since "``TheModule``" is specified, this name is registered
+in "``TheModule``"s symbol table, which is used by the function call
+code above.
+
+.. code-block:: c++
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+The Module symbol table works just like the Function symbol table when
+it comes to name conflicts: if a new function is created with a name
+that was previously added to the symbol table, the new function will get
+implicitly renamed when added to the Module. The code above exploits
+this fact to determine if there was a previous definition of this
+function.
+
+In Kaleidoscope, I choose to allow redefinitions of functions in two
+cases: first, we want to allow 'extern'ing a function more than once, as
+long as the prototypes for the externs match (since all arguments have
+the same type, we just have to check that the number of arguments
+match). Second, we want to allow 'extern'ing a function and then
+defining a body for it. This is useful when defining mutually recursive
+functions.
+
+In order to implement this, the code above first checks to see if there
+is a collision on the name of the function. If so, it deletes the
+function we just created (by calling ``eraseFromParent``) and then
+calling ``getFunction`` to get the existing function with the specified
+name. Note that many APIs in LLVM have "erase" forms and "remove" forms.
+The "remove" form unlinks the object from its parent (e.g. a Function
+from a Module) and returns it. The "erase" form unlinks the object and
+then deletes it.
+
+.. code-block:: c++
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+In order to verify the logic above, we first check to see if the
+pre-existing function is "empty". In this case, empty means that it has
+no basic blocks in it, which means it has no body. If it has no body, it
+is a forward declaration. Since we don't allow anything after a full
+definition of the function, the code rejects this case. If the previous
+reference to a function was an 'extern', we simply verify that the
+number of arguments for that definition and this one match up. If not,
+we emit an error.
+
+.. code-block:: c++
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+      return F;
+    }
+
+The last bit of code for prototypes loops over all of the arguments in
+the function, setting the name of the LLVM Argument objects to match,
+and registering the arguments in the ``NamedValues`` map for future use
+by the ``VariableExprAST`` AST node. Once this is set up, it returns the
+Function object to the caller. Note that we don't check for conflicting
+argument names here (e.g. "extern foo(a b a)"). Doing so would be very
+straight-forward with the mechanics we have already used above.
+
+.. code-block:: c++
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+Code generation for function definitions starts out simply enough: we
+just codegen the prototype (Proto) and verify that it is ok. We then
+clear out the ``NamedValues`` map to make sure that there isn't anything
+in it from the last function we compiled. Code generation of the
+prototype ensures that there is an LLVM Function object that is ready to
+go for us.
+
+.. code-block:: c++
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+
+Now we get to the point where the ``Builder`` is set up. The first line
+creates a new `basic block <http://en.wikipedia.org/wiki/Basic_block>`_
+(named "entry"), which is inserted into ``TheFunction``. The second line
+then tells the builder that new instructions should be inserted into the
+end of the new basic block. Basic blocks in LLVM are an important part
+of functions that define the `Control Flow
+Graph <http://en.wikipedia.org/wiki/Control_flow_graph>`_. Since we
+don't have any control flow, our functions will only contain one block
+at this point. We'll fix this in `Chapter 5 <LangImpl5.html>`_ :).
+
+.. code-block:: c++
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        return TheFunction;
+      }
+
+Once the insertion point is set up, we call the ``CodeGen()`` method for
+the root expression of the function. If no error happens, this emits
+code to compute the expression into the entry block and returns the
+value that was computed. Assuming no error, we then create an LLVM `ret
+instruction <../LangRef.html#i_ret>`_, which completes the function.
+Once the function is built, we call ``verifyFunction``, which is
+provided by LLVM. This function does a variety of consistency checks on
+the generated code, to determine if our compiler is doing everything
+right. Using this is important: it can catch a lot of bugs. Once the
+function is finished and validated, we return it.
+
+.. code-block:: c++
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+The only piece left here is handling of the error case. For simplicity,
+we handle this by merely deleting the function we produced with the
+``eraseFromParent`` method. This allows the user to redefine a function
+that they incorrectly typed in before: if we didn't delete it, it would
+live in the symbol table, with a body, preventing future redefinition.
+
+This code does have a bug, though. Since the ``PrototypeAST::Codegen``
+can return a previously defined forward declaration, our code can
+actually delete a forward declaration. There are a number of ways to fix
+this bug, see what you can come up with! Here is a testcase:
+
+::
+
+    extern foo(a b);     # ok, defines foo.
+    def foo(a b) c;      # error, 'c' is invalid.
+    def bar() foo(1, 2); # error, unknown function "foo"
+
+Driver Changes and Closing Thoughts
+===================================
+
+For now, code generation to LLVM doesn't really get us much, except that
+we can look at the pretty IR calls. The sample code inserts calls to
+Codegen into the "``HandleDefinition``", "``HandleExtern``" etc
+functions, and then dumps out the LLVM IR. This gives a nice way to look
+at the LLVM IR for simple functions. For example:
+
+::
+
+    ready> 4+5;
+    Read top-level expression:
+    define double @0() {
+    entry:
+      ret double 9.000000e+00
+    }
+
+Note how the parser turns the top-level expression into anonymous
+functions for us. This will be handy when we add `JIT
+support <LangImpl4.html#jit>`_ in the next chapter. Also note that the
+code is very literally transcribed, no optimizations are being performed
+except simple constant folding done by IRBuilder. We will `add
+optimizations <LangImpl4.html#trivialconstfold>`_ explicitly in the next
+chapter.
+
+::
+
+    ready> def foo(a b) a*a + 2*a*b + b*b;
+    Read function definition:
+    define double @foo(double %a, double %b) {
+    entry:
+      %multmp = fmul double %a, %a
+      %multmp1 = fmul double 2.000000e+00, %a
+      %multmp2 = fmul double %multmp1, %b
+      %addtmp = fadd double %multmp, %multmp2
+      %multmp3 = fmul double %b, %b
+      %addtmp4 = fadd double %addtmp, %multmp3
+      ret double %addtmp4
+    }
+
+This shows some simple arithmetic. Notice the striking similarity to the
+LLVM builder calls that we use to create the instructions.
+
+::
+
+    ready> def bar(a) foo(a, 4.0) + bar(31337);
+    Read function definition:
+    define double @bar(double %a) {
+    entry:
+      %calltmp = call double @foo(double %a, double 4.000000e+00)
+      %calltmp1 = call double @bar(double 3.133700e+04)
+      %addtmp = fadd double %calltmp, %calltmp1
+      ret double %addtmp
+    }
+
+This shows some function calls. Note that this function will take a long
+time to execute if you call it. In the future we'll add conditional
+control flow to actually make recursion useful :).
+
+::
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> cos(1.234);
+    Read top-level expression:
+    define double @1() {
+    entry:
+      %calltmp = call double @cos(double 1.234000e+00)
+      ret double %calltmp
+    }
+
+This shows an extern for the libm "cos" function, and a call to it.
+
+.. TODO:: Abandon Pygments' horrible `llvm` lexer. It just totally gives up
+   on highlighting this due to the first line.
+
+::
+
+    ready> ^D
+    ; ModuleID = 'my cool jit'
+
+    define double @0() {
+    entry:
+      %addtmp = fadd double 4.000000e+00, 5.000000e+00
+      ret double %addtmp
+    }
+
+    define double @foo(double %a, double %b) {
+    entry:
+      %multmp = fmul double %a, %a
+      %multmp1 = fmul double 2.000000e+00, %a
+      %multmp2 = fmul double %multmp1, %b
+      %addtmp = fadd double %multmp, %multmp2
+      %multmp3 = fmul double %b, %b
+      %addtmp4 = fadd double %addtmp, %multmp3
+      ret double %addtmp4
+    }
+
+    define double @bar(double %a) {
+    entry:
+      %calltmp = call double @foo(double %a, double 4.000000e+00)
+      %calltmp1 = call double @bar(double 3.133700e+04)
+      %addtmp = fadd double %calltmp, %calltmp1
+      ret double %addtmp
+    }
+
+    declare double @cos(double)
+
+    define double @1() {
+    entry:
+      %calltmp = call double @cos(double 1.234000e+00)
+      ret double %calltmp
+    }
+
+When you quit the current demo, it dumps out the IR for the entire
+module generated. Here you can see the big picture with all the
+functions referencing each other.
+
+This wraps up the third chapter of the Kaleidoscope tutorial. Up next,
+we'll describe how to `add JIT codegen and optimizer
+support <LangImpl4.html>`_ to this so we can actually start running
+code!
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM code generator. Because this uses the LLVM libraries, we need
+to link them in. To do this, we use the
+`llvm-config <http://llvm.org/cmds/llvm-config.html>`_ tool to inform
+our makefile/command line about which options to use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
+    # Run
+    ./toy
+
+Here is the code:
+
+.. code-block:: c++
+
+    // To build this:
+    // See example below.
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read top-level expression:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Adding JIT and Optimizer Support <LangImpl4.html>`_
+
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
deleted file mode 100644
index 5e9c65676c9e..000000000000
--- a/docs/tutorial/LangImpl4.html
+++ /dev/null
@@ -1,1152 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Adding JIT and Optimizer Support</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Adding JIT and Optimizer Support</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 4
-  <ol>
-    <li><a href="#intro">Chapter 4 Introduction</a></li>
-    <li><a href="#trivialconstfold">Trivial Constant Folding</a></li>
-    <li><a href="#optimizerpasses">LLVM Optimization Passes</a></li>
-    <li><a href="#jit">Adding a JIT Compiler</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl5.html">Chapter 5</a>: Extending the Language: Control 
-Flow</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 4 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
-language and added support for generating LLVM IR.  This chapter describes
-two new techniques: adding optimizer support to your language, and adding JIT
-compiler support.  These additions will demonstrate how to get nice, efficient code 
-for the Kaleidoscope language.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="trivialconstfold">Trivial Constant Folding</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
-it does not produce wonderful code.  The IRBuilder, however, does give us
-obvious optimizations when compiling simple code:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>This code is not a literal transcription of the AST built by parsing the 
-input. That would be:
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 2.000000e+00, 1.000000e+00
-        %addtmp1 = fadd double %addtmp, %x
-        ret double %addtmp1
-}
-</pre>
-</div>
-
-<p>Constant folding, as seen above, in particular, is a very common and very
-important optimization: so much so that many language implementors implement
-constant folding support in their AST representation.</p>
-
-<p>With LLVM, you don't need this support in the AST.  Since all calls to build 
-LLVM IR go through the LLVM IR builder, the builder itself checked to see if 
-there was a constant folding opportunity when you call it.  If so, it just does 
-the constant fold and return the constant instead of creating an instruction.
-
-<p>Well, that was easy :).  In practice, we recommend always using
-<tt>IRBuilder</tt> when generating code like this.  It has no
-"syntactic overhead" for its use (you don't have to uglify your compiler with
-constant checks everywhere) and it can dramatically reduce the amount of
-LLVM IR that is generated in some cases (particular for languages with a macro
-preprocessor or that use a lot of constants).</p>
-
-<p>On the other hand, the <tt>IRBuilder</tt> is limited by the fact
-that it does all of its analysis inline with the code as it is built.  If you
-take a slightly more complex example:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        %addtmp1 = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp1
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>In this case, the LHS and RHS of the multiplication are the same value.  We'd
-really like to see this generate "<tt>tmp = x+3; result = tmp*tmp;</tt>" instead
-of computing "<tt>x+3</tt>" twice.</p>
-
-<p>Unfortunately, no amount of local analysis will be able to detect and correct
-this.  This requires two transformations: reassociation of expressions (to 
-make the add's lexically identical) and Common Subexpression Elimination (CSE)
-to  delete the redundant add instruction.  Fortunately, LLVM provides a broad
-range of optimizations that you can use, in the form of "passes".</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="optimizerpasses">LLVM Optimization Passes</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM provides many optimization passes, which do many different sorts of
-things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
-to the mistaken notion that one set of optimizations is right for all languages
-and for all situations.  LLVM allows a compiler implementor to make complete
-decisions about what optimizations to use, in which order, and in what
-situation.</p>
-
-<p>As a concrete example, LLVM supports both "whole module" passes, which look
-across as large of body of code as they can (often a whole file, but if run 
-at link time, this can be a substantial portion of the whole program).  It also
-supports and includes "per-function" passes which just operate on a single
-function at a time, without looking at other functions.  For more information
-on passes and how they are run, see the <a href="../WritingAnLLVMPass.html">How
-to Write a Pass</a> document and the <a href="../Passes.html">List of LLVM 
-Passes</a>.</p>
-
-<p>For Kaleidoscope, we are currently generating functions on the fly, one at
-a time, as the user types them in.  We aren't shooting for the ultimate
-optimization experience in this setting, but we also want to catch the easy and
-quick stuff where possible.  As such, we will choose to run a few per-function
-optimizations as the user types the function in.  If we wanted to make a "static
-Kaleidoscope compiler", we would use exactly the code we have now, except that
-we would defer running the optimizer until the entire file has been parsed.</p>
-
-<p>In order to get per-function optimizations going, we need to set up a
-<a href="../WritingAnLLVMPass.html#passmanager">FunctionPassManager</a> to hold and
-organize the LLVM optimizations that we want to run.  Once we have that, we can
-add a set of optimizations to run.  The code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-</pre>
-</div>
-
-<p>This code defines a <tt>FunctionPassManager</tt>, "<tt>OurFPM</tt>".  It
-requires a pointer to the <tt>Module</tt> to construct itself.  Once it is set
-up, we use a series of "add" calls to add a bunch of LLVM passes.  The first
-pass is basically boilerplate, it adds a pass so that later optimizations know
-how the data structures in the program are laid out.  The
-"<tt>TheExecutionEngine</tt>" variable is related to the JIT, which we will get
-to in the next section.</p>
-
-<p>In this case, we choose to add 4 optimization passes.  The passes we chose
-here are a pretty standard set of "cleanup" optimizations that are useful for
-a wide variety of code.  I won't delve into what they do but, believe me,
-they are a good starting place :).</p>
-
-<p>Once the PassManager is set up, we need to make use of it.  We do this by
-running it after our newly created function is constructed (in 
-<tt>FunctionAST::Codegen</tt>), but before it is returned to the client:</p>
-
-<div class="doc_code">
-<pre>
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    <b>// Optimize the function.
-    TheFPM-&gt;run(*TheFunction);</b>
-    
-    return TheFunction;
-  }
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  The 
-<tt>FunctionPassManager</tt> optimizes and updates the LLVM Function* in place,
-improving (hopefully) its body.  With this in place, we can try our test above
-again:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>As expected, we now get our nicely optimized code, saving a floating point
-add instruction from every execution of this function.</p>
-
-<p>LLVM provides a wide variety of optimizations that can be used in certain
-circumstances.  Some <a href="../Passes.html">documentation about the various 
-passes</a> is available, but it isn't very complete.  Another good source of
-ideas can come from looking at the passes that <tt>Clang</tt> runs to get
-started.  The "<tt>opt</tt>" tool allows you to experiment with passes from the
-command line, so you can see if they do anything.</p>
-
-<p>Now that we have reasonable code coming out of our front-end, lets talk about
-executing it!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="jit">Adding a JIT Compiler</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code that is available in LLVM IR can have a wide variety of tools 
-applied to it.  For example, you can run optimizations on it (as we did above),
-you can dump it out in textual or binary forms, you can compile the code to an
-assembly file (.s) for some target, or you can JIT compile it.  The nice thing
-about the LLVM IR representation is that it is the "common currency" between
-many different parts of the compiler.
-</p>
-
-<p>In this section, we'll add JIT compiler support to our interpreter.  The
-basic idea that we want for Kaleidoscope is to have the user enter function
-bodies as they do now, but immediately evaluate the top-level expressions they
-type in.  For example, if they type in "1 + 2;", we should evaluate and print
-out 3.  If they define a function, they should be able to call it from the 
-command line.</p>
-
-<p>In order to do this, we first declare and initialize the JIT.  This is done
-by adding a global variable and a call in <tt>main</tt>:</p>
-
-<div class="doc_code">
-<pre>
-<b>static ExecutionEngine *TheExecutionEngine;</b>
-...
-int main() {
-  ..
-  <b>// Create the JIT.  This takes ownership of the module.
-  TheExecutionEngine = EngineBuilder(TheModule).create();</b>
-  ..
-}
-</pre>
-</div>
-
-<p>This creates an abstract "Execution Engine" which can be either a JIT
-compiler or the LLVM interpreter.  LLVM will automatically pick a JIT compiler
-for you if one is available for your platform, otherwise it will fall back to
-the interpreter.</p>
-
-<p>Once the <tt>ExecutionEngine</tt> is created, the JIT is ready to be used.
-There are a variety of APIs that are useful, but the simplest one is the
-"<tt>getPointerToFunction(F)</tt>" method.  This method JIT compiles the
-specified LLVM Function and returns a function pointer to the generated machine
-code.  In our case, this means that we can change the code that parses a
-top-level expression to look like this:</p>
-
-<div class="doc_code">
-<pre>
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      LF->dump();  // Dump the function for exposition purposes.
-    
-      <b>// JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());</b>
-    }
-</pre>
-</div>
-
-<p>Recall that we compile top-level expressions into a self-contained LLVM
-function that takes no arguments and returns the computed double.  Because the 
-LLVM JIT compiler matches the native platform ABI, this means that you can just
-cast the result pointer to a function pointer of that type and call it directly.
-This means, there is no difference between JIT compiled code and native machine
-code that is statically linked into your application.</p>
-
-<p>With just these two changes, lets see how Kaleidoscope works now!</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>4+5;</b>
-Read top-level expression:
-define double @0() {
-entry:
-  ret double 9.000000e+00
-}
-
-<em>Evaluated to 9.000000</em>
-</pre>
-</div>
-
-<p>Well this looks like it is basically working.  The dump of the function
-shows the "no argument function that always returns double" that we synthesize
-for each top-level expression that is typed in.  This demonstrates very basic
-functionality, but can we do more?</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def testfunc(x y) x + y*2; </b> 
-Read function definition:
-define double @testfunc(double %x, double %y) {
-entry:
-  %multmp = fmul double %y, 2.000000e+00
-  %addtmp = fadd double %multmp, %x
-  ret double %addtmp
-}
-
-ready&gt; <b>testfunc(4, 10);</b>
-Read top-level expression:
-define double @1() {
-entry:
-  %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
-  ret double %calltmp
-}
-
-<em>Evaluated to 24.000000</em>
-</pre>
-</div>
-
-<p>This illustrates that we can now call user code, but there is something a bit
-subtle going on here.  Note that we only invoke the JIT on the anonymous
-functions that <em>call testfunc</em>, but we never invoked it
-on <em>testfunc</em> itself.  What actually happened here is that the JIT
-scanned for all non-JIT'd functions transitively called from the anonymous
-function and compiled all of them before returning
-from <tt>getPointerToFunction()</tt>.</p>
-
-<p>The JIT provides a number of other more advanced interfaces for things like
-freeing allocated machine code, rejit'ing functions to update them, etc.
-However, even with this simple code, we get some surprisingly powerful
-capabilities - check this out (I removed the dump of the anonymous functions,
-you should get the idea by now :) :</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern sin(x);</b>
-Read extern: 
-declare double @sin(double)
-
-ready&gt; <b>extern cos(x);</b>
-Read extern: 
-declare double @cos(double)
-
-ready&gt; <b>sin(1.0);</b>
-Read top-level expression:
-define double @2() {
-entry:
-  ret double 0x3FEAED548F090CEE
-}
-
-<em>Evaluated to 0.841471</em>
-
-ready&gt; <b>def foo(x) sin(x)*sin(x) + cos(x)*cos(x);</b>
-Read function definition:
-define double @foo(double %x) {
-entry:
-  %calltmp = call double @sin(double %x)
-  %multmp = fmul double %calltmp, %calltmp
-  %calltmp2 = call double @cos(double %x)
-  %multmp4 = fmul double %calltmp2, %calltmp2
-  %addtmp = fadd double %multmp, %multmp4
-  ret double %addtmp
-}
-
-ready&gt; <b>foo(4.0);</b>
-Read top-level expression:
-define double @3() {
-entry:
-  %calltmp = call double @foo(double 4.000000e+00)
-  ret double %calltmp
-}
-
-<em>Evaluated to 1.000000</em>
-</pre>
-</div>
-
-<p>Whoa, how does the JIT know about sin and cos?  The answer is surprisingly
-simple: in this
-example, the JIT started execution of a function and got to a function call.  It
-realized that the function was not yet JIT compiled and invoked the standard set
-of routines to resolve the function.  In this case, there is no body defined
-for the function, so the JIT ended up calling "<tt>dlsym("sin")</tt>" on the
-Kaleidoscope process itself.
-Since "<tt>sin</tt>" is defined within the JIT's address space, it simply
-patches up calls in the module to call the libm version of <tt>sin</tt>
-directly.</p>
-
-<p>The LLVM JIT provides a number of interfaces (look in the 
-<tt>ExecutionEngine.h</tt> file) for controlling how unknown functions get
-resolved.  It allows you to establish explicit mappings between IR objects and
-addresses (useful for LLVM global variables that you want to map to static
-tables, for example), allows you to dynamically decide on the fly based on the
-function name, and even allows you to have the JIT compile functions lazily the
-first time they're called.</p>
-
-<p>One interesting application of this is that we can now extend the language
-by writing arbitrary C++ code to implement operations.  For example, if we add:
-</p>
-
-<div class="doc_code">
-<pre>
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</div>
-
-<p>Now we can produce simple output to the console by using things like:
-"<tt>extern putchard(x); putchard(120);</tt>", which prints a lowercase 'x' on
-the console (120 is the ASCII code for 'x').  Similar code could be used to 
-implement file I/O, console input, and many other capabilities in
-Kaleidoscope.</p>
-
-<p>This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At
-this point, we can compile a non-Turing-complete programming language, optimize
-and JIT compile it in a user-driven way.  Next up we'll look into <a 
-href="LangImpl5.html">extending the language with control flow constructs</a>,
-tackling some interesting LLVM IR issues along the way.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM JIT and optimizer.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>
-If you are compiling this on Linux, make sure to add the "-rdynamic" option 
-as well.  This makes sure that the external functions are resolved properly 
-at runtime.</p>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read top-level expression:");
-      LF->dump();
-
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl5.html">Next: Extending the language: control flow</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl4.rst b/docs/tutorial/LangImpl4.rst
new file mode 100644
index 000000000000..96c06d124ef1
--- /dev/null
+++ b/docs/tutorial/LangImpl4.rst
@@ -0,0 +1,1061 @@
+==============================================
+Kaleidoscope: Adding JIT and Optimizer Support
+==============================================
+
+.. contents::
+   :local:
+
+Chapter 4 Introduction
+======================
+
+Welcome to Chapter 4 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Chapters 1-3 described the implementation
+of a simple language and added support for generating LLVM IR. This
+chapter describes two new techniques: adding optimizer support to your
+language, and adding JIT compiler support. These additions will
+demonstrate how to get nice, efficient code for the Kaleidoscope
+language.
+
+Trivial Constant Folding
+========================
+
+Our demonstration for Chapter 3 is elegant and easy to extend.
+Unfortunately, it does not produce wonderful code. The IRBuilder,
+however, does give us obvious optimizations when compiling simple code:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            ret double %addtmp
+    }
+
+This code is not a literal transcription of the AST built by parsing the
+input. That would be:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 2.000000e+00, 1.000000e+00
+            %addtmp1 = fadd double %addtmp, %x
+            ret double %addtmp1
+    }
+
+Constant folding, as seen above, in particular, is a very common and
+very important optimization: so much so that many language implementors
+implement constant folding support in their AST representation.
+
+With LLVM, you don't need this support in the AST. Since all calls to
+build LLVM IR go through the LLVM IR builder, the builder itself checked
+to see if there was a constant folding opportunity when you call it. If
+so, it just does the constant fold and return the constant instead of
+creating an instruction.
+
+Well, that was easy :). In practice, we recommend always using
+``IRBuilder`` when generating code like this. It has no "syntactic
+overhead" for its use (you don't have to uglify your compiler with
+constant checks everywhere) and it can dramatically reduce the amount of
+LLVM IR that is generated in some cases (particular for languages with a
+macro preprocessor or that use a lot of constants).
+
+On the other hand, the ``IRBuilder`` is limited by the fact that it does
+all of its analysis inline with the code as it is built. If you take a
+slightly more complex example:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            %addtmp1 = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp1
+            ret double %multmp
+    }
+
+In this case, the LHS and RHS of the multiplication are the same value.
+We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``"
+instead of computing "``x+3``" twice.
+
+Unfortunately, no amount of local analysis will be able to detect and
+correct this. This requires two transformations: reassociation of
+expressions (to make the add's lexically identical) and Common
+Subexpression Elimination (CSE) to delete the redundant add instruction.
+Fortunately, LLVM provides a broad range of optimizations that you can
+use, in the form of "passes".
+
+LLVM Optimization Passes
+========================
+
+LLVM provides many optimization passes, which do many different sorts of
+things and have different tradeoffs. Unlike other systems, LLVM doesn't
+hold to the mistaken notion that one set of optimizations is right for
+all languages and for all situations. LLVM allows a compiler implementor
+to make complete decisions about what optimizations to use, in which
+order, and in what situation.
+
+As a concrete example, LLVM supports both "whole module" passes, which
+look across as large of body of code as they can (often a whole file,
+but if run at link time, this can be a substantial portion of the whole
+program). It also supports and includes "per-function" passes which just
+operate on a single function at a time, without looking at other
+functions. For more information on passes and how they are run, see the
+`How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the
+`List of LLVM Passes <../Passes.html>`_.
+
+For Kaleidoscope, we are currently generating functions on the fly, one
+at a time, as the user types them in. We aren't shooting for the
+ultimate optimization experience in this setting, but we also want to
+catch the easy and quick stuff where possible. As such, we will choose
+to run a few per-function optimizations as the user types the function
+in. If we wanted to make a "static Kaleidoscope compiler", we would use
+exactly the code we have now, except that we would defer running the
+optimizer until the entire file has been parsed.
+
+In order to get per-function optimizations going, we need to set up a
+`FunctionPassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold
+and organize the LLVM optimizations that we want to run. Once we have
+that, we can add a set of optimizations to run. The code looks like
+this:
+
+.. code-block:: c++
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+This code defines a ``FunctionPassManager``, "``OurFPM``". It requires a
+pointer to the ``Module`` to construct itself. Once it is set up, we use
+a series of "add" calls to add a bunch of LLVM passes. The first pass is
+basically boilerplate, it adds a pass so that later optimizations know
+how the data structures in the program are laid out. The
+"``TheExecutionEngine``" variable is related to the JIT, which we will
+get to in the next section.
+
+In this case, we choose to add 4 optimization passes. The passes we
+chose here are a pretty standard set of "cleanup" optimizations that are
+useful for a wide variety of code. I won't delve into what they do but,
+believe me, they are a good starting place :).
+
+Once the PassManager is set up, we need to make use of it. We do this by
+running it after our newly created function is constructed (in
+``FunctionAST::Codegen``), but before it is returned to the client:
+
+.. code-block:: c++
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+As you can see, this is pretty straightforward. The
+``FunctionPassManager`` optimizes and updates the LLVM Function\* in
+place, improving (hopefully) its body. With this in place, we can try
+our test above again:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp
+            ret double %multmp
+    }
+
+As expected, we now get our nicely optimized code, saving a floating
+point add instruction from every execution of this function.
+
+LLVM provides a wide variety of optimizations that can be used in
+certain circumstances. Some `documentation about the various
+passes <../Passes.html>`_ is available, but it isn't very complete.
+Another good source of ideas can come from looking at the passes that
+``Clang`` runs to get started. The "``opt``" tool allows you to
+experiment with passes from the command line, so you can see if they do
+anything.
+
+Now that we have reasonable code coming out of our front-end, lets talk
+about executing it!
+
+Adding a JIT Compiler
+=====================
+
+Code that is available in LLVM IR can have a wide variety of tools
+applied to it. For example, you can run optimizations on it (as we did
+above), you can dump it out in textual or binary forms, you can compile
+the code to an assembly file (.s) for some target, or you can JIT
+compile it. The nice thing about the LLVM IR representation is that it
+is the "common currency" between many different parts of the compiler.
+
+In this section, we'll add JIT compiler support to our interpreter. The
+basic idea that we want for Kaleidoscope is to have the user enter
+function bodies as they do now, but immediately evaluate the top-level
+expressions they type in. For example, if they type in "1 + 2;", we
+should evaluate and print out 3. If they define a function, they should
+be able to call it from the command line.
+
+In order to do this, we first declare and initialize the JIT. This is
+done by adding a global variable and a call in ``main``:
+
+.. code-block:: c++
+
+    static ExecutionEngine *TheExecutionEngine;
+    ...
+    int main() {
+      ..
+      // Create the JIT.  This takes ownership of the module.
+      TheExecutionEngine = EngineBuilder(TheModule).create();
+      ..
+    }
+
+This creates an abstract "Execution Engine" which can be either a JIT
+compiler or the LLVM interpreter. LLVM will automatically pick a JIT
+compiler for you if one is available for your platform, otherwise it
+will fall back to the interpreter.
+
+Once the ``ExecutionEngine`` is created, the JIT is ready to be used.
+There are a variety of APIs that are useful, but the simplest one is the
+"``getPointerToFunction(F)``" method. This method JIT compiles the
+specified LLVM Function and returns a function pointer to the generated
+machine code. In our case, this means that we can change the code that
+parses a top-level expression to look like this:
+
+.. code-block:: c++
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          LF->dump();  // Dump the function for exposition purposes.
+
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+
+Recall that we compile top-level expressions into a self-contained LLVM
+function that takes no arguments and returns the computed double.
+Because the LLVM JIT compiler matches the native platform ABI, this
+means that you can just cast the result pointer to a function pointer of
+that type and call it directly. This means, there is no difference
+between JIT compiled code and native machine code that is statically
+linked into your application.
+
+With just these two changes, lets see how Kaleidoscope works now!
+
+::
+
+    ready> 4+5;
+    Read top-level expression:
+    define double @0() {
+    entry:
+      ret double 9.000000e+00
+    }
+
+    Evaluated to 9.000000
+
+Well this looks like it is basically working. The dump of the function
+shows the "no argument function that always returns double" that we
+synthesize for each top-level expression that is typed in. This
+demonstrates very basic functionality, but can we do more?
+
+::
+
+    ready> def testfunc(x y) x + y*2;
+    Read function definition:
+    define double @testfunc(double %x, double %y) {
+    entry:
+      %multmp = fmul double %y, 2.000000e+00
+      %addtmp = fadd double %multmp, %x
+      ret double %addtmp
+    }
+
+    ready> testfunc(4, 10);
+    Read top-level expression:
+    define double @1() {
+    entry:
+      %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+      ret double %calltmp
+    }
+
+    Evaluated to 24.000000
+
+This illustrates that we can now call user code, but there is something
+a bit subtle going on here. Note that we only invoke the JIT on the
+anonymous functions that *call testfunc*, but we never invoked it on
+*testfunc* itself. What actually happened here is that the JIT scanned
+for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning from
+``getPointerToFunction()``.
+
+The JIT provides a number of other more advanced interfaces for things
+like freeing allocated machine code, rejit'ing functions to update them,
+etc. However, even with this simple code, we get some surprisingly
+powerful capabilities - check this out (I removed the dump of the
+anonymous functions, you should get the idea by now :) :
+
+::
+
+    ready> extern sin(x);
+    Read extern:
+    declare double @sin(double)
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> sin(1.0);
+    Read top-level expression:
+    define double @2() {
+    entry:
+      ret double 0x3FEAED548F090CEE
+    }
+
+    Evaluated to 0.841471
+
+    ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
+    Read function definition:
+    define double @foo(double %x) {
+    entry:
+      %calltmp = call double @sin(double %x)
+      %multmp = fmul double %calltmp, %calltmp
+      %calltmp2 = call double @cos(double %x)
+      %multmp4 = fmul double %calltmp2, %calltmp2
+      %addtmp = fadd double %multmp, %multmp4
+      ret double %addtmp
+    }
+
+    ready> foo(4.0);
+    Read top-level expression:
+    define double @3() {
+    entry:
+      %calltmp = call double @foo(double 4.000000e+00)
+      ret double %calltmp
+    }
+
+    Evaluated to 1.000000
+
+Whoa, how does the JIT know about sin and cos? The answer is
+surprisingly simple: in this example, the JIT started execution of a
+function and got to a function call. It realized that the function was
+not yet JIT compiled and invoked the standard set of routines to resolve
+the function. In this case, there is no body defined for the function,
+so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
+process itself. Since "``sin``" is defined within the JIT's address
+space, it simply patches up calls in the module to call the libm version
+of ``sin`` directly.
+
+The LLVM JIT provides a number of interfaces (look in the
+``ExecutionEngine.h`` file) for controlling how unknown functions get
+resolved. It allows you to establish explicit mappings between IR
+objects and addresses (useful for LLVM global variables that you want to
+map to static tables, for example), allows you to dynamically decide on
+the fly based on the function name, and even allows you to have the JIT
+compile functions lazily the first time they're called.
+
+One interesting application of this is that we can now extend the
+language by writing arbitrary C++ code to implement operations. For
+example, if we add:
+
+.. code-block:: c++
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+Now we can produce simple output to the console by using things like:
+"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
+on the console (120 is the ASCII code for 'x'). Similar code could be
+used to implement file I/O, console input, and many other capabilities
+in Kaleidoscope.
+
+This completes the JIT and optimizer chapter of the Kaleidoscope
+tutorial. At this point, we can compile a non-Turing-complete
+programming language, optimize and JIT compile it in a user-driven way.
+Next up we'll look into `extending the language with control flow
+constructs <LangImpl5.html>`_, tackling some interesting LLVM IR issues
+along the way.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM JIT and optimizer. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+If you are compiling this on Linux, make sure to add the "-rdynamic"
+option as well. This makes sure that the external functions are resolved
+properly at runtime.
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read top-level expression:");
+          LF->dump();
+
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Extending the language: control flow <LangImpl5.html>`_
+
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
deleted file mode 100644
index 9a9fd8c14e09..000000000000
--- a/docs/tutorial/LangImpl5.html
+++ /dev/null
@@ -1,1772 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Control Flow</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Control Flow</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 5
-  <ol>
-    <li><a href="#intro">Chapter 5 Introduction</a></li>
-    <li><a href="#ifthen">If/Then/Else</a>
-    <ol>
-      <li><a href="#iflexer">Lexer Extensions</a></li>
-      <li><a href="#ifast">AST Extensions</a></li>
-      <li><a href="#ifparser">Parser Extensions</a></li>
-      <li><a href="#ifir">LLVM IR</a></li>
-      <li><a href="#ifcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#for">'for' Loop Expression</a>
-    <ol>
-      <li><a href="#forlexer">Lexer Extensions</a></li>
-      <li><a href="#forast">AST Extensions</a></li>
-      <li><a href="#forparser">Parser Extensions</a></li>
-      <li><a href="#forir">LLVM IR</a></li>
-      <li><a href="#forcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl6.html">Chapter 6</a>: Extending the Language: 
-User-defined Operators</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 5 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
-Kaleidoscope language and included support for generating LLVM IR, followed by
-optimizations and a JIT compiler.  Unfortunately, as presented, Kaleidoscope is
-mostly useless: it has no control flow other than call and return.  This means
-that you can't have conditional branches in the code, significantly limiting its
-power.  In this episode of "build that compiler", we'll extend Kaleidoscope to
-have an if/then/else expression plus a simple 'for' loop.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ifthen">If/Then/Else</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Extending Kaleidoscope to support if/then/else is quite straightforward.  It
-basically requires adding support for this "new" concept to the lexer,
-parser, AST, and LLVM code emitter.  This example is nice, because it shows how
-easy it is to "grow" a language over time, incrementally extending it as new
-ideas are discovered.</p>
-
-<p>Before we get going on "how" we add this extension, lets talk about "what" we
-want.  The basic idea is that we want to be able to write this sort of thing:
-</p>
-
-<div class="doc_code">
-<pre>
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2);
-</pre>
-</div>
-
-<p>In Kaleidoscope, every construct is an expression: there are no statements.
-As such, the if/then/else expression needs to return a value like any other.
-Since we're using a mostly functional form, we'll have it evaluate its
-conditional, then return the 'then' or 'else' value based on how the condition
-was resolved.  This is very similar to the C "?:" expression.</p>
-
-<p>The semantics of the if/then/else expression is that it evaluates the
-condition to a boolean equality value: 0.0 is considered to be false and
-everything else is considered to be true.
-If the condition is true, the first subexpression is evaluated and returned, if
-the condition is false, the second subexpression is evaluated and returned.
-Since Kaleidoscope allows side-effects, this behavior is important to nail down.
-</p>
-
-<p>Now that we know what we "want", lets break this down into its constituent
-pieces.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="iflexer">Lexer Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-
-<div>
-
-<p>The lexer extensions are straightforward.  First we add new enum values
-for the relevant tokens:</p>
-
-<div class="doc_code">
-<pre>
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-</pre>
-</div>
-
-<p>Once we have that, we recognize the new keywords in the lexer. This is pretty simple
-stuff:</p>
-
-<div class="doc_code">
-<pre>
-    ...
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    <b>if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;</b>
-    return tok_identifier;
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifast">AST Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>To represent the new expression we add a new AST node for it:</p>
-
-<div class="doc_code">
-<pre>
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-    : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-<p>The AST node just has pointers to the various subexpressions.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifparser">Parser Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have the relevant tokens coming from the lexer and we have the
-AST node to build, our parsing logic is relatively straightforward.  First we
-define a new parsing function:</p>
-
-<div class="doc_code">
-<pre>
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-</pre>
-</div>
-
-<p>Next we hook it up as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  <b>case tok_if:         return ParseIfExpr();</b>
-  }
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifir">LLVM IR for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have it parsing and building the AST, the final piece is adding
-LLVM code generation support.  This is the most interesting part of the
-if/then/else example, because this is where it starts to introduce new concepts.
-All of the code above has been thoroughly described in previous chapters.
-</p>
-
-<p>To motivate the code we want to produce, lets take a look at a simple
-example.  Consider:</p>
-
-<div class="doc_code">
-<pre>
-extern foo();
-extern bar();
-def baz(x) if x then foo() else bar();
-</pre>
-</div>
-
-<p>If you disable optimizations, the code you'll (soon) get from Kaleidoscope
-looks like this:</p>
-
-<div class="doc_code">
-<pre>
-declare double @foo()
-
-declare double @bar()
-
-define double @baz(double %x) {
-entry:
-  %ifcond = fcmp one double %x, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:		; preds = %entry
-  %calltmp = call double @foo()
-  br label %ifcont
-
-else:		; preds = %entry
-  %calltmp1 = call double @bar()
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>To visualize the control flow graph, you can use a nifty feature of the LLVM
-'<a href="http://llvm.org/cmds/opt.html">opt</a>' tool.  If you put this LLVM IR
-into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
-href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
-see this graph:</p>
-
-<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423" 
-height="315"></div>
-
-<p>Another way to get this is to call "<tt>F-&gt;viewCFG()</tt>" or
-"<tt>F-&gt;viewCFGOnly()</tt>" (where F is a "<tt>Function*</tt>") either by
-inserting actual calls into the code and recompiling or by calling these in the
-debugger.  LLVM has many nice features for visualizing various graphs.</p>
-
-<p>Getting back to the generated code, it is fairly simple: the entry block 
-evaluates the conditional expression ("x" in our case here) and compares the
-result to 0.0 with the "<tt><a href="../LangRef.html#i_fcmp">fcmp</a> one</tt>"
-instruction ('one' is "Ordered and Not Equal").  Based on the result of this
-expression, the code jumps to either the "then" or "else" blocks, which contain
-the expressions for the true/false cases.</p>
-
-<p>Once the then/else blocks are finished executing, they both branch back to the
-'ifcont' block to execute the code that happens after the if/then/else.  In this
-case the only thing left to do is to return to the caller of the function.  The
-question then becomes: how does the code know which expression to return?</p>
-
-<p>The answer to this question involves an important SSA operation: the
-<a href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Phi
-operation</a>.  If you're not familiar with SSA, <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">the wikipedia
-article</a> is a good introduction and there are various other introductions to
-it available on your favorite search engine.  The short version is that
-"execution" of the Phi operation requires "remembering" which block control came
-from.  The Phi operation takes on the value corresponding to the input control
-block.  In this case, if control comes in from the "then" block, it gets the
-value of "calltmp".  If control comes from the "else" block, it gets the value
-of "calltmp1".</p>
-
-<p>At this point, you are probably starting to think "Oh no! This means my
-simple and elegant front-end will have to start generating SSA form in order to
-use LLVM!".  Fortunately, this is not the case, and we strongly advise
-<em>not</em> implementing an SSA construction algorithm in your front-end
-unless there is an amazingly good reason to do so.  In practice, there are two
-sorts of values that float around in code written for your average imperative
-programming language that might need Phi nodes:</p>
-
-<ol>
-<li>Code that involves user variables: <tt>x = 1; x = x + 1; </tt></li>
-<li>Values that are implicit in the structure of your AST, such as the Phi node
-in this case.</li>
-</ol>
-
-<p>In <a href="LangImpl7.html">Chapter 7</a> of this tutorial ("mutable 
-variables"), we'll talk about #1
-in depth.  For now, just believe me that you don't need SSA construction to
-handle this case.  For #2, you have the choice of using the techniques that we will 
-describe for #1, or you can insert Phi nodes directly, if convenient.  In this 
-case, it is really really easy to generate the Phi node, so we choose to do it
-directly.</p>
-
-<p>Okay, enough of the motivation and overview, lets generate code!</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifcodegen">Code Generation for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>In order to generate code for this, we implement the <tt>Codegen</tt> method
-for <tt>IfExprAST</tt>:</p>
-
-<div class="doc_code">
-<pre>
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-</pre>
-</div>
-
-<p>This code is straightforward and similar to what we saw before.  We emit the
-expression for the condition, then compare that value to zero to get a truth
-value as a 1-bit (bool) value.</p>
-
-<div class="doc_code">
-<pre>
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-</pre>
-</div>
-
-<p>This code creates the basic blocks that are related to the if/then/else
-statement, and correspond directly to the blocks in the example above.  The
-first line gets the current Function object that is being built.  It
-gets this by asking the builder for the current BasicBlock, and asking that
-block for its "parent" (the function it is currently embedded into).</p>
-
-<p>Once it has that, it creates three blocks.  Note that it passes "TheFunction"
-into the constructor for the "then" block.  This causes the constructor to
-automatically insert the new block into the end of the specified function.  The
-other two blocks are created, but aren't yet inserted into the function.</p>
-
-<p>Once the blocks are created, we can emit the conditional branch that chooses
-between them.  Note that creating new blocks does not implicitly affect the
-IRBuilder, so it is still inserting into the block that the condition
-went into.  Also note that it is creating a branch to the "then" block and the
-"else" block, even though the "else" block isn't inserted into the function yet.
-This is all ok: it is the standard way that LLVM supports forward 
-references.</p>
-
-<div class="doc_code">
-<pre>
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-</pre>
-</div>
-
-<p>After the conditional branch is inserted, we move the builder to start
-inserting into the "then" block.  Strictly speaking, this call moves the
-insertion point to be at the end of the specified block.  However, since the
-"then" block is empty, it also starts out by inserting at the beginning of the
-block.  :)</p>
-
-<p>Once the insertion point is set, we recursively codegen the "then" expression
-from the AST.  To finish off the "then" block, we create an unconditional branch
-to the merge block.  One interesting (and very important) aspect of the LLVM IR
-is that it <a href="../LangRef.html#functionstructure">requires all basic blocks
-to be "terminated"</a> with a <a href="../LangRef.html#terminators">control flow
-instruction</a> such as return or branch.  This means that all control flow,
-<em>including fall throughs</em> must be made explicit in the LLVM IR.  If you
-violate this rule, the verifier will emit an error.</p>
-
-<p>The final line here is quite subtle, but is very important.  The basic issue
-is that when we create the Phi node in the merge block, we need to set up the
-block/value pairs that indicate how the Phi will work.  Importantly, the Phi
-node expects to have an entry for each predecessor of the block in the CFG.  Why
-then, are we getting the current block when we just set it to ThenBB 5 lines
-above?  The problem is that the "Then" expression may actually itself change the
-block that the Builder is emitting into if, for example, it contains a nested
-"if/then/else" expression.  Because calling Codegen recursively could
-arbitrarily change the notion of the current block, we are required to get an
-up-to-date value for code that will set up the Phi node.</p>
-
-<div class="doc_code">
-<pre>
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-</pre>
-</div>
-
-<p>Code generation for the 'else' block is basically identical to codegen for
-the 'then' block.  The only significant difference is the first line, which adds
-the 'else' block to the function.  Recall previously that the 'else' block was
-created, but not added to the function.  Now that the 'then' and 'else' blocks
-are emitted, we can finish up with the merge code:</p>
-
-<div class="doc_code">
-<pre>
-  // Emit merge block.
-  TheFunction->getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN->addIncoming(ThenV, ThenBB);
-  PN->addIncoming(ElseV, ElseBB);
-  return PN;
-}
-</pre>
-</div>
-
-<p>The first two lines here are now familiar: the first adds the "merge" block
-to the Function object (it was previously floating, like the else block above).
-The second block changes the insertion point so that newly created code will go
-into the "merge" block.  Once that is done, we need to create the PHI node and
-set up the block/value pairs for the PHI.</p>
-
-<p>Finally, the CodeGen function returns the phi node as the value computed by
-the if/then/else expression.  In our example above, this returned value will 
-feed into the code for the top-level function, which will create the return
-instruction.</p>
-
-<p>Overall, we now have the ability to execute conditional code in
-Kaleidoscope.  With this extension, Kaleidoscope is a fairly complete language
-that can calculate a wide variety of numeric functions.  Next up we'll add
-another useful expression that is familiar from non-functional languages...</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="for">'for' Loop Expression</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know how to add basic control flow constructs to the language,
-we have the tools to add more powerful things.  Lets add something more
-aggressive, a 'for' expression:</p>
-
-<div class="doc_code">
-<pre>
- extern putchard(char)
- def printstar(n)
-   for i = 1, i &lt; n, 1.0 in
-     putchard(42);  # ascii 42 = '*'
-     
- # print 100 '*' characters
- printstar(100);
-</pre>
-</div>
-
-<p>This expression defines a new variable ("i" in this case) which iterates from
-a starting value, while the condition ("i &lt; n" in this case) is true, 
-incrementing by an optional step value ("1.0" in this case).  If the step value
-is omitted, it defaults to 1.0.  While the loop is true, it executes its 
-body expression.  Because we don't have anything better to return, we'll just
-define the loop as always returning 0.0.  In the future when we have mutable
-variables, it will get more useful.</p>
-
-<p>As before, lets talk about the changes that we need to Kaleidoscope to
-support this.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="forlexer">Lexer Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The lexer extensions are the same sort of thing as for if/then/else:</p>
-
-<div class="doc_code">
-<pre>
-  ... in enum Token ...
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-<b>  tok_for = -9, tok_in = -10</b>
-
-  ... in gettok ...
-  if (IdentifierStr == "def") return tok_def;
-  if (IdentifierStr == "extern") return tok_extern;
-  if (IdentifierStr == "if") return tok_if;
-  if (IdentifierStr == "then") return tok_then;
-  if (IdentifierStr == "else") return tok_else;
-  <b>if (IdentifierStr == "for") return tok_for;
-  if (IdentifierStr == "in") return tok_in;</b>
-  return tok_identifier;
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forast">AST Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The AST node is just as simple.  It basically boils down to capturing
-the variable name and the constituent expressions in the node.</p>
-
-<div class="doc_code">
-<pre>
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forparser">Parser Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The parser code is also fairly standard.  The only interesting thing here is
-handling of the optional step value.  The parser code handles it by checking to
-see if the second comma is present.  If not, it sets the step value to null in
-the AST node:</p>
-
-<div class="doc_code">
-<pre>
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forir">LLVM IR for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now we get to the good part: the LLVM IR we want to generate for this thing.
-With the simple example above, we get this LLVM IR (note that this dump is
-generated with optimizations disabled for clarity):
-</p>
-
-<div class="doc_code">
-<pre>
-declare double @putchard(double)
-
-define double @printstar(double %n) {
-entry:
-  ; initial value = 1.0 (inlined into phi)
-  br label %loop
-
-loop:		; preds = %loop, %entry
-  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
-  ; body
-  %calltmp = call double @putchard(double 4.200000e+01)
-  ; increment
-  %nextvar = fadd double %i, 1.000000e+00
-
-  ; termination test
-  %cmptmp = fcmp ult double %i, %n
-  %booltmp = uitofp i1 %cmptmp to double
-  %loopcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %loopcond, label %loop, label %afterloop
-
-afterloop:		; preds = %loop
-  ; loop always returns 0.0
-  ret double 0.000000e+00
-}
-</pre>
-</div>
-
-<p>This loop contains all the same constructs we saw before: a phi node, several
-expressions, and some basic blocks.  Lets see how this fits together.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forcodegen">Code Generation for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The first part of Codegen is very simple: we just output the start expression
-for the loop value:</p>
-
-<div class="doc_code">
-<pre>
-Value *ForExprAST::Codegen() {
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-</pre>
-</div>
-
-<p>With this out of the way, the next step is to set up the LLVM basic block
-for the start of the loop body.  In the case above, the whole loop body is one
-block, but remember that the body code itself could consist of multiple blocks
-(e.g. if it contains an if/then/else or a for/in expression).</p>
-
-<div class="doc_code">
-<pre>
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-</pre>
-</div>
-
-<p>This code is similar to what we saw for if/then/else.  Because we will need
-it to create the Phi node, we remember the block that falls through into the
-loop.  Once we have that, we create the actual block that starts the loop and
-create an unconditional branch for the fall-through between the two blocks.</p>
-  
-<div class="doc_code">
-<pre>
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable-&gt;addIncoming(StartVal, PreheaderBB);
-</pre>
-</div>
-
-<p>Now that the "preheader" for the loop is set up, we switch to emitting code
-for the loop body.  To begin with, we move the insertion point and create the
-PHI node for the loop induction variable.  Since we already know the incoming
-value for the starting value, we add it to the Phi node.  Note that the Phi will
-eventually get a second value for the backedge, but we can't set it up yet
-(because it doesn't exist!).</p>
-
-<div class="doc_code">
-<pre>
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-</pre>
-</div>
-
-<p>Now the code starts to get more interesting.  Our 'for' loop introduces a new
-variable to the symbol table.  This means that our symbol table can now contain
-either function arguments or loop variables.  To handle this, before we codegen
-the body of the loop, we add the loop variable as the current value for its
-name.  Note that it is possible that there is a variable of the same name in the
-outer scope.  It would be easy to make this an error (emit an error and return
-null if there is already an entry for VarName) but we choose to allow shadowing
-of variables.  In order to handle this correctly, we remember the Value that
-we are potentially shadowing in <tt>OldVal</tt> (which will be null if there is
-no shadowed variable).</p>
-
-<p>Once the loop variable is set into the symbol table, the code recursively
-codegen's the body.  This allows the body to use the loop variable: any
-references to it will naturally find it in the symbol table.</p>
-
-<div class="doc_code">
-<pre>
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-</pre>
-</div>
-
-<p>Now that the body is emitted, we compute the next value of the iteration
-variable by adding the step value, or 1.0 if it isn't present. '<tt>NextVar</tt>'
-will be the value of the loop variable on the next iteration of the loop.</p>
-
-<div class="doc_code">
-<pre>
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-</pre>
-</div>
-
-<p>Finally, we evaluate the exit value of the loop, to determine whether the
-loop should exit.  This mirrors the condition evaluation for the if/then/else
-statement.</p>
-      
-<div class="doc_code">
-<pre>
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-</pre>
-</div>
-
-<p>With the code for the body of the loop complete, we just need to finish up
-the control flow for it.  This code remembers the end block (for the phi node),
-then creates the block for the loop exit ("afterloop").  Based on the value of
-the exit condition, it creates a conditional branch that chooses between
-executing the loop again and exiting the loop.  Any future code is emitted in
-the "afterloop" block, so it sets the insertion position to it.</p>
-  
-<div class="doc_code">
-<pre>
-  // Add a new entry to the PHI node for the backedge.
-  Variable-&gt;addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-</pre>
-</div>
-
-<p>The final code handles various cleanups: now that we have the "NextVar"
-value, we can add the incoming value to the loop PHI node.  After that, we
-remove the loop variable from the symbol table, so that it isn't in scope after
-the for loop.  Finally, code generation of the for loop always returns 0.0, so
-that is what we return from <tt>ForExprAST::Codegen</tt>.</p>
-
-<p>With this, we conclude the "adding control flow to Kaleidoscope" chapter of
-the tutorial.  In this chapter we added two control flow constructs, and used them to motivate a couple of aspects of the LLVM IR that are important for front-end implementors
-to know.  In the next chapter of our saga, we will get a bit crazier and add
-<a href="LangImpl6.html">user-defined operators</a> to our poor innocent 
-language.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN-&gt;addIncoming(ThenV, ThenBB);
-  PN-&gt;addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop: 
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable-&gt;addIncoming(StartVal, PreheaderBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Add a new entry to the PHI node for the backedge.
-  Variable-&gt;addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl6.html">Next: Extending the language: user-defined operators</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl5.rst b/docs/tutorial/LangImpl5.rst
new file mode 100644
index 000000000000..80d5f37bc4cd
--- /dev/null
+++ b/docs/tutorial/LangImpl5.rst
@@ -0,0 +1,1607 @@
+==================================================
+Kaleidoscope: Extending the Language: Control Flow
+==================================================
+
+.. contents::
+   :local:
+
+Chapter 5 Introduction
+======================
+
+Welcome to Chapter 5 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Parts 1-4 described the implementation of
+the simple Kaleidoscope language and included support for generating
+LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as
+presented, Kaleidoscope is mostly useless: it has no control flow other
+than call and return. This means that you can't have conditional
+branches in the code, significantly limiting its power. In this episode
+of "build that compiler", we'll extend Kaleidoscope to have an
+if/then/else expression plus a simple 'for' loop.
+
+If/Then/Else
+============
+
+Extending Kaleidoscope to support if/then/else is quite straightforward.
+It basically requires adding support for this "new" concept to the
+lexer, parser, AST, and LLVM code emitter. This example is nice, because
+it shows how easy it is to "grow" a language over time, incrementally
+extending it as new ideas are discovered.
+
+Before we get going on "how" we add this extension, lets talk about
+"what" we want. The basic idea is that we want to be able to write this
+sort of thing:
+
+::
+
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+In Kaleidoscope, every construct is an expression: there are no
+statements. As such, the if/then/else expression needs to return a value
+like any other. Since we're using a mostly functional form, we'll have
+it evaluate its conditional, then return the 'then' or 'else' value
+based on how the condition was resolved. This is very similar to the C
+"?:" expression.
+
+The semantics of the if/then/else expression is that it evaluates the
+condition to a boolean equality value: 0.0 is considered to be false and
+everything else is considered to be true. If the condition is true, the
+first subexpression is evaluated and returned, if the condition is
+false, the second subexpression is evaluated and returned. Since
+Kaleidoscope allows side-effects, this behavior is important to nail
+down.
+
+Now that we know what we "want", lets break this down into its
+constituent pieces.
+
+Lexer Extensions for If/Then/Else
+---------------------------------
+
+The lexer extensions are straightforward. First we add new enum values
+for the relevant tokens:
+
+.. code-block:: c++
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+
+Once we have that, we recognize the new keywords in the lexer. This is
+pretty simple stuff:
+
+.. code-block:: c++
+
+        ...
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        return tok_identifier;
+
+AST Extensions for If/Then/Else
+-------------------------------
+
+To represent the new expression we add a new AST node for it:
+
+.. code-block:: c++
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+        : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+The AST node just has pointers to the various subexpressions.
+
+Parser Extensions for If/Then/Else
+----------------------------------
+
+Now that we have the relevant tokens coming from the lexer and we have
+the AST node to build, our parsing logic is relatively straightforward.
+First we define a new parsing function:
+
+.. code-block:: c++
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+Next we hook it up as a primary expression:
+
+.. code-block:: c++
+
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      }
+    }
+
+LLVM IR for If/Then/Else
+------------------------
+
+Now that we have it parsing and building the AST, the final piece is
+adding LLVM code generation support. This is the most interesting part
+of the if/then/else example, because this is where it starts to
+introduce new concepts. All of the code above has been thoroughly
+described in previous chapters.
+
+To motivate the code we want to produce, lets take a look at a simple
+example. Consider:
+
+::
+
+    extern foo();
+    extern bar();
+    def baz(x) if x then foo() else bar();
+
+If you disable optimizations, the code you'll (soon) get from
+Kaleidoscope looks like this:
+
+.. code-block:: llvm
+
+    declare double @foo()
+
+    declare double @bar()
+
+    define double @baz(double %x) {
+    entry:
+      %ifcond = fcmp one double %x, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:       ; preds = %entry
+      %calltmp = call double @foo()
+      br label %ifcont
+
+    else:       ; preds = %entry
+      %calltmp1 = call double @bar()
+      br label %ifcont
+
+    ifcont:     ; preds = %else, %then
+      %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+      ret double %iftmp
+    }
+
+To visualize the control flow graph, you can use a nifty feature of the
+LLVM '`opt <http://llvm.org/cmds/opt.html>`_' tool. If you put this LLVM
+IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a
+window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll
+see this graph:
+
+.. figure:: LangImpl5-cfg.png
+   :align: center
+   :alt: Example CFG
+
+   Example CFG
+
+Another way to get this is to call "``F->viewCFG()``" or
+"``F->viewCFGOnly()``" (where F is a "``Function*``") either by
+inserting actual calls into the code and recompiling or by calling these
+in the debugger. LLVM has many nice features for visualizing various
+graphs.
+
+Getting back to the generated code, it is fairly simple: the entry block
+evaluates the conditional expression ("x" in our case here) and compares
+the result to 0.0 with the "``fcmp one``" instruction ('one' is "Ordered
+and Not Equal"). Based on the result of this expression, the code jumps
+to either the "then" or "else" blocks, which contain the expressions for
+the true/false cases.
+
+Once the then/else blocks are finished executing, they both branch back
+to the 'ifcont' block to execute the code that happens after the
+if/then/else. In this case the only thing left to do is to return to the
+caller of the function. The question then becomes: how does the code
+know which expression to return?
+
+The answer to this question involves an important SSA operation: the
+`Phi
+operation <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+If you're not familiar with SSA, `the wikipedia
+article <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+is a good introduction and there are various other introductions to it
+available on your favorite search engine. The short version is that
+"execution" of the Phi operation requires "remembering" which block
+control came from. The Phi operation takes on the value corresponding to
+the input control block. In this case, if control comes in from the
+"then" block, it gets the value of "calltmp". If control comes from the
+"else" block, it gets the value of "calltmp1".
+
+At this point, you are probably starting to think "Oh no! This means my
+simple and elegant front-end will have to start generating SSA form in
+order to use LLVM!". Fortunately, this is not the case, and we strongly
+advise *not* implementing an SSA construction algorithm in your
+front-end unless there is an amazingly good reason to do so. In
+practice, there are two sorts of values that float around in code
+written for your average imperative programming language that might need
+Phi nodes:
+
+#. Code that involves user variables: ``x = 1; x = x + 1;``
+#. Values that are implicit in the structure of your AST, such as the
+   Phi node in this case.
+
+In `Chapter 7 <LangImpl7.html>`_ of this tutorial ("mutable variables"),
+we'll talk about #1 in depth. For now, just believe me that you don't
+need SSA construction to handle this case. For #2, you have the choice
+of using the techniques that we will describe for #1, or you can insert
+Phi nodes directly, if convenient. In this case, it is really really
+easy to generate the Phi node, so we choose to do it directly.
+
+Okay, enough of the motivation and overview, lets generate code!
+
+Code Generation for If/Then/Else
+--------------------------------
+
+In order to generate code for this, we implement the ``Codegen`` method
+for ``IfExprAST``:
+
+.. code-block:: c++
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+This code is straightforward and similar to what we saw before. We emit
+the expression for the condition, then compare that value to zero to get
+a truth value as a 1-bit (bool) value.
+
+.. code-block:: c++
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+This code creates the basic blocks that are related to the if/then/else
+statement, and correspond directly to the blocks in the example above.
+The first line gets the current Function object that is being built. It
+gets this by asking the builder for the current BasicBlock, and asking
+that block for its "parent" (the function it is currently embedded
+into).
+
+Once it has that, it creates three blocks. Note that it passes
+"TheFunction" into the constructor for the "then" block. This causes the
+constructor to automatically insert the new block into the end of the
+specified function. The other two blocks are created, but aren't yet
+inserted into the function.
+
+Once the blocks are created, we can emit the conditional branch that
+chooses between them. Note that creating new blocks does not implicitly
+affect the IRBuilder, so it is still inserting into the block that the
+condition went into. Also note that it is creating a branch to the
+"then" block and the "else" block, even though the "else" block isn't
+inserted into the function yet. This is all ok: it is the standard way
+that LLVM supports forward references.
+
+.. code-block:: c++
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+After the conditional branch is inserted, we move the builder to start
+inserting into the "then" block. Strictly speaking, this call moves the
+insertion point to be at the end of the specified block. However, since
+the "then" block is empty, it also starts out by inserting at the
+beginning of the block. :)
+
+Once the insertion point is set, we recursively codegen the "then"
+expression from the AST. To finish off the "then" block, we create an
+unconditional branch to the merge block. One interesting (and very
+important) aspect of the LLVM IR is that it `requires all basic blocks
+to be "terminated" <../LangRef.html#functionstructure>`_ with a `control
+flow instruction <../LangRef.html#terminators>`_ such as return or
+branch. This means that all control flow, *including fall throughs* must
+be made explicit in the LLVM IR. If you violate this rule, the verifier
+will emit an error.
+
+The final line here is quite subtle, but is very important. The basic
+issue is that when we create the Phi node in the merge block, we need to
+set up the block/value pairs that indicate how the Phi will work.
+Importantly, the Phi node expects to have an entry for each predecessor
+of the block in the CFG. Why then, are we getting the current block when
+we just set it to ThenBB 5 lines above? The problem is that the "Then"
+expression may actually itself change the block that the Builder is
+emitting into if, for example, it contains a nested "if/then/else"
+expression. Because calling Codegen recursively could arbitrarily change
+the notion of the current block, we are required to get an up-to-date
+value for code that will set up the Phi node.
+
+.. code-block:: c++
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+Code generation for the 'else' block is basically identical to codegen
+for the 'then' block. The only significant difference is the first line,
+which adds the 'else' block to the function. Recall previously that the
+'else' block was created, but not added to the function. Now that the
+'then' and 'else' blocks are emitted, we can finish up with the merge
+code:
+
+.. code-block:: c++
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+The first two lines here are now familiar: the first adds the "merge"
+block to the Function object (it was previously floating, like the else
+block above). The second block changes the insertion point so that newly
+created code will go into the "merge" block. Once that is done, we need
+to create the PHI node and set up the block/value pairs for the PHI.
+
+Finally, the CodeGen function returns the phi node as the value computed
+by the if/then/else expression. In our example above, this returned
+value will feed into the code for the top-level function, which will
+create the return instruction.
+
+Overall, we now have the ability to execute conditional code in
+Kaleidoscope. With this extension, Kaleidoscope is a fairly complete
+language that can calculate a wide variety of numeric functions. Next up
+we'll add another useful expression that is familiar from non-functional
+languages...
+
+'for' Loop Expression
+=====================
+
+Now that we know how to add basic control flow constructs to the
+language, we have the tools to add more powerful things. Lets add
+something more aggressive, a 'for' expression:
+
+::
+
+     extern putchard(char)
+     def printstar(n)
+       for i = 1, i < n, 1.0 in
+         putchard(42);  # ascii 42 = '*'
+
+     # print 100 '*' characters
+     printstar(100);
+
+This expression defines a new variable ("i" in this case) which iterates
+from a starting value, while the condition ("i < n" in this case) is
+true, incrementing by an optional step value ("1.0" in this case). If
+the step value is omitted, it defaults to 1.0. While the loop is true,
+it executes its body expression. Because we don't have anything better
+to return, we'll just define the loop as always returning 0.0. In the
+future when we have mutable variables, it will get more useful.
+
+As before, lets talk about the changes that we need to Kaleidoscope to
+support this.
+
+Lexer Extensions for the 'for' Loop
+-----------------------------------
+
+The lexer extensions are the same sort of thing as for if/then/else:
+
+.. code-block:: c++
+
+      ... in enum Token ...
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10
+
+      ... in gettok ...
+      if (IdentifierStr == "def") return tok_def;
+      if (IdentifierStr == "extern") return tok_extern;
+      if (IdentifierStr == "if") return tok_if;
+      if (IdentifierStr == "then") return tok_then;
+      if (IdentifierStr == "else") return tok_else;
+      if (IdentifierStr == "for") return tok_for;
+      if (IdentifierStr == "in") return tok_in;
+      return tok_identifier;
+
+AST Extensions for the 'for' Loop
+---------------------------------
+
+The AST node is just as simple. It basically boils down to capturing the
+variable name and the constituent expressions in the node.
+
+.. code-block:: c++
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+Parser Extensions for the 'for' Loop
+------------------------------------
+
+The parser code is also fairly standard. The only interesting thing here
+is handling of the optional step value. The parser code handles it by
+checking to see if the second comma is present. If not, it sets the step
+value to null in the AST node:
+
+.. code-block:: c++
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+LLVM IR for the 'for' Loop
+--------------------------
+
+Now we get to the good part: the LLVM IR we want to generate for this
+thing. With the simple example above, we get this LLVM IR (note that
+this dump is generated with optimizations disabled for clarity):
+
+.. code-block:: llvm
+
+    declare double @putchard(double)
+
+    define double @printstar(double %n) {
+    entry:
+      ; initial value = 1.0 (inlined into phi)
+      br label %loop
+
+    loop:       ; preds = %loop, %entry
+      %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+      ; body
+      %calltmp = call double @putchard(double 4.200000e+01)
+      ; increment
+      %nextvar = fadd double %i, 1.000000e+00
+
+      ; termination test
+      %cmptmp = fcmp ult double %i, %n
+      %booltmp = uitofp i1 %cmptmp to double
+      %loopcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %loopcond, label %loop, label %afterloop
+
+    afterloop:      ; preds = %loop
+      ; loop always returns 0.0
+      ret double 0.000000e+00
+    }
+
+This loop contains all the same constructs we saw before: a phi node,
+several expressions, and some basic blocks. Lets see how this fits
+together.
+
+Code Generation for the 'for' Loop
+----------------------------------
+
+The first part of Codegen is very simple: we just output the start
+expression for the loop value:
+
+.. code-block:: c++
+
+    Value *ForExprAST::Codegen() {
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+With this out of the way, the next step is to set up the LLVM basic
+block for the start of the loop body. In the case above, the whole loop
+body is one block, but remember that the body code itself could consist
+of multiple blocks (e.g. if it contains an if/then/else or a for/in
+expression).
+
+.. code-block:: c++
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+      BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+This code is similar to what we saw for if/then/else. Because we will
+need it to create the Phi node, we remember the block that falls through
+into the loop. Once we have that, we create the actual block that starts
+the loop and create an unconditional branch for the fall-through between
+the two blocks.
+
+.. code-block:: c++
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Start the PHI node with an entry for Start.
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      Variable->addIncoming(StartVal, PreheaderBB);
+
+Now that the "preheader" for the loop is set up, we switch to emitting
+code for the loop body. To begin with, we move the insertion point and
+create the PHI node for the loop induction variable. Since we already
+know the incoming value for the starting value, we add it to the Phi
+node. Note that the Phi will eventually get a second value for the
+backedge, but we can't set it up yet (because it doesn't exist!).
+
+.. code-block:: c++
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      Value *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Variable;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+Now the code starts to get more interesting. Our 'for' loop introduces a
+new variable to the symbol table. This means that our symbol table can
+now contain either function arguments or loop variables. To handle this,
+before we codegen the body of the loop, we add the loop variable as the
+current value for its name. Note that it is possible that there is a
+variable of the same name in the outer scope. It would be easy to make
+this an error (emit an error and return null if there is already an
+entry for VarName) but we choose to allow shadowing of variables. In
+order to handle this correctly, we remember the Value that we are
+potentially shadowing in ``OldVal`` (which will be null if there is no
+shadowed variable).
+
+Once the loop variable is set into the symbol table, the code
+recursively codegen's the body. This allows the body to use the loop
+variable: any references to it will naturally find it in the symbol
+table.
+
+.. code-block:: c++
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+Now that the body is emitted, we compute the next value of the iteration
+variable by adding the step value, or 1.0 if it isn't present.
+'``NextVar``' will be the value of the loop variable on the next
+iteration of the loop.
+
+.. code-block:: c++
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+Finally, we evaluate the exit value of the loop, to determine whether
+the loop should exit. This mirrors the condition evaluation for the
+if/then/else statement.
+
+.. code-block:: c++
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+With the code for the body of the loop complete, we just need to finish
+up the control flow for it. This code remembers the end block (for the
+phi node), then creates the block for the loop exit ("afterloop"). Based
+on the value of the exit condition, it creates a conditional branch that
+chooses between executing the loop again and exiting the loop. Any
+future code is emitted in the "afterloop" block, so it sets the
+insertion position to it.
+
+.. code-block:: c++
+
+      // Add a new entry to the PHI node for the backedge.
+      Variable->addIncoming(NextVar, LoopEndBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+The final code handles various cleanups: now that we have the "NextVar"
+value, we can add the incoming value to the loop PHI node. After that,
+we remove the loop variable from the symbol table, so that it isn't in
+scope after the for loop. Finally, code generation of the for loop
+always returns 0.0, so that is what we return from
+``ForExprAST::Codegen``.
+
+With this, we conclude the "adding control flow to Kaleidoscope" chapter
+of the tutorial. In this chapter we added two control flow constructs,
+and used them to motivate a couple of aspects of the LLVM IR that are
+important for front-end implementors to know. In the next chapter of our
+saga, we will get a bit crazier and add `user-defined
+operators <LangImpl6.html>`_ to our poor innocent language.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+      : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+    Value *ForExprAST::Codegen() {
+      // Output this as:
+      //   ...
+      //   start = startexpr
+      //   goto loop
+      // loop:
+      //   variable = phi [start, loopheader], [nextvariable, loopend]
+      //   ...
+      //   bodyexpr
+      //   ...
+      // loopend:
+      //   step = stepexpr
+      //   nextvariable = variable + step
+      //   endcond = endexpr
+      //   br endcond, loop, endloop
+      // outloop:
+
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+      BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Start the PHI node with an entry for Start.
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      Variable->addIncoming(StartVal, PreheaderBB);
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      Value *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Variable;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+      // Add a new entry to the PHI node for the backedge.
+      Variable->addIncoming(NextVar, LoopEndBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Extending the language: user-defined operators <LangImpl6.html>`_
+
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
deleted file mode 100644
index 7cd87da79229..000000000000
--- a/docs/tutorial/LangImpl6.html
+++ /dev/null
@@ -1,1829 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: User-defined Operators</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: User-defined Operators</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 6
-  <ol>
-    <li><a href="#intro">Chapter 6 Introduction</a></li>
-    <li><a href="#idea">User-defined Operators: the Idea</a></li>
-    <li><a href="#binary">User-defined Binary Operators</a></li>
-    <li><a href="#unary">User-defined Unary Operators</a></li>
-    <li><a href="#example">Kicking the Tires</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl7.html">Chapter 7</a>: Extending the Language: Mutable
-Variables / SSA Construction</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 6 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
-functional language that is fairly minimal, but also useful.  There
-is still one big problem with it, however. Our language doesn't have many 
-useful operators (like division, logical negation, or even any comparisons 
-besides less-than).</p>
-
-<p>This chapter of the tutorial takes a wild digression into adding user-defined
-operators to the simple and beautiful Kaleidoscope language. This digression now gives 
-us a simple and ugly language in some ways, but also a powerful one at the same time.
-One of the great things about creating your own language is that you get to
-decide what is good or bad.  In this tutorial we'll assume that it is okay to
-use this as a way to show some interesting parsing techniques.</p>
-
-<p>At the end of this tutorial, we'll run through an example Kaleidoscope 
-application that <a href="#example">renders the Mandelbrot set</a>.  This gives 
-an example of what you can build with Kaleidoscope and its feature set.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="idea">User-defined Operators: the Idea</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The "operator overloading" that we will add to Kaleidoscope is more general than
-languages like C++.  In C++, you are only allowed to redefine existing
-operators: you can't programatically change the grammar, introduce new
-operators, change precedence levels, etc.  In this chapter, we will add this
-capability to Kaleidoscope, which will let the user round out the set of
-operators that are supported.</p>
-
-<p>The point of going into user-defined operators in a tutorial like this is to
-show the power and flexibility of using a hand-written parser.  Thus far, the parser
-we have been implementing uses recursive descent for most parts of the grammar and 
-operator precedence parsing for the expressions.  See <a 
-href="LangImpl2.html">Chapter 2</a> for details.  Without using operator
-precedence parsing, it would be very difficult to allow the programmer to
-introduce new operators into the grammar: the grammar is dynamically extensible
-as the JIT runs.</p>
-
-<p>The two specific features we'll add are programmable unary operators (right
-now, Kaleidoscope has no unary operators at all) as well as binary operators.
-An example of this is:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary "logical or", (note that it does not "short circuit")
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Define = with slightly lower precedence than relationals.
-def binary= 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-</pre>
-</div>
-
-<p>Many languages aspire to being able to implement their standard runtime
-library in the language itself.  In Kaleidoscope, we can implement significant
-parts of the language in the library!</p>
-
-<p>We will break down implementation of these features into two parts:
-implementing support for user-defined binary operators and adding unary
-operators.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="binary">User-defined Binary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding support for user-defined binary operators is pretty simple with our
-current framework.  We'll first add support for the unary/binary keywords:</p>
-
-<div class="doc_code">
-<pre>
-enum Token {
-  ...
-  <b>// operators
-  tok_binary = -11, tok_unary = -12</b>
-};
-...
-static int gettok() {
-...
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    <b>if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;</b>
-    return tok_identifier;
-</pre>
-</div>
-
-<p>This just adds lexer support for the unary and binary keywords, like we
-did in <a href="LangImpl5.html#iflexer">previous chapters</a>.  One nice thing
-about our current AST, is that we represent binary operators with full generalisation
-by using their ASCII code as the opcode.  For our extended operators, we'll use this
-same representation, so we don't need any new AST or parser support.</p>
-
-<p>On the other hand, we have to be able to represent the definitions of these
-new operators, in the "def binary| 5" part of the function definition.  In our
-grammar so far, the "name" for the function definition is parsed as the
-"prototype" production and into the <tt>PrototypeAST</tt> AST node.  To
-represent our new user-defined operators as prototypes, we have to extend
-the  <tt>PrototypeAST</tt> AST node like this:</p>
-
-<div class="doc_code">
-<pre>
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-  <b>bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.</b>
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
-               <b>bool isoperator = false, unsigned prec = 0</b>)
-  : Name(name), Args(args), <b>isOperator(isoperator), Precedence(prec)</b> {}
-  
-  <b>bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }</b>
-  
-  Function *Codegen();
-};
-</pre>
-</div>
-
-<p>Basically, in addition to knowing a name for the prototype, we now keep track
-of whether it was an operator, and if it was, what precedence level the operator
-is at.  The precedence is only used for binary operators (as you'll see below,
-it just doesn't apply for unary operators).  Now that we have a way to represent
-the prototype for a user-defined operator, we need to parse it:</p>
-
-<div class="doc_code">
-<pre>
-/// prototype
-///   ::= id '(' id* ')'
-<b>///   ::= binary LETTER number? (id, id)</b>
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  <b>unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;</b>
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  <b>case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal &lt; 1 || NumVal &gt; 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;</b>
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  <b>// Verify right number of names for operator.
-  if (Kind &amp;&amp; ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);</b>
-}
-</pre>
-</div>
-
-<p>This is all fairly straightforward parsing code, and we have already seen
-a lot of similar code in the past.  One interesting part about the code above is 
-the couple lines that set up <tt>FnName</tt> for binary operators.  This builds names 
-like "binary@" for a newly defined "@" operator.  This then takes advantage of the 
-fact that symbol names in the LLVM symbol table are allowed to have any character in
-them, including embedded nul characters.</p>
-
-<p>The next interesting thing to add, is codegen support for these binary operators.
-Given our current structure, this is a simple addition of a default case for our
-existing binary operator node:</p>
-
-<div class="doc_code">
-<pre>
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  <b>default: break;</b>
-  }
-  
-  <b>// If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
-  assert(F &amp;&amp; "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");</b>
-}
-
-</pre>
-</div>
-
-<p>As you can see above, the new code is actually really simple.  It just does
-a lookup for the appropriate operator in the symbol table and generates a 
-function call to it.  Since user-defined operators are just built as normal
-functions (because the "prototype" boils down to a function with the right
-name) everything falls into place.</p>
-
-<p>The final piece of code we are missing, is a bit of top-level magic:</p>
-
-<div class="doc_code">
-<pre>
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  <b>// If this is an operator, install it.
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();</b>
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    ...
-</pre>
-</div>
-
-<p>Basically, before codegening a function, if it is a user-defined operator, we
-register it in the precedence table.  This allows the binary operator parsing
-logic we already have in place to handle it.  Since we are working on a fully-general operator precedence parser, this is all we need to do to "extend the grammar".</p>
-
-<p>Now we have useful user-defined binary operators.  This builds a lot
-on the previous framework we built for other operators.  Adding unary operators
-is a bit more challenging, because we don't have any framework for it yet - lets
-see what it takes.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="unary">User-defined Unary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Since we don't currently support unary operators in the Kaleidoscope
-language, we'll need to add everything to support them.  Above, we added simple
-support for the 'unary' keyword to the lexer.  In addition to that, we need an
-AST node:</p>
-
-<div class="doc_code">
-<pre>
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-<p>This AST node is very simple and obvious by now.  It directly mirrors the
-binary operator AST node, except that it only has one child.  With this, we
-need to add the parsing logic.  Parsing a unary operator is pretty simple: we'll
-add a new function to do it:</p>
-
-<div class="doc_code">
-<pre>
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-</pre>
-</div>
-
-<p>The grammar we add is pretty straightforward here.  If we see a unary
-operator when parsing a primary operator, we eat the operator as a prefix and
-parse the remaining piece as another unary operator.  This allows us to handle
-multiple unary operators (e.g. "!!x").  Note that unary operators can't have 
-ambiguous parses like binary operators can, so there is no need for precedence
-information.</p>
-
-<p>The problem with this function, is that we need to call ParseUnary from somewhere.
-To do this, we change previous callers of ParsePrimary to call ParseUnary
-instead:</p>
-
-<div class="doc_code">
-<pre>
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  ...
-    <b>// Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;</b>
-  ...
-}
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  <b>ExprAST *LHS = ParseUnary();</b>
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-</pre>
-</div>
-
-<p>With these two simple changes, we are now able to parse unary operators and build the
-AST for them.  Next up, we need to add parser support for prototypes, to parse
-the unary operator prototype.  We extend the binary operator code above 
-with:</p>
-
-<div class="doc_code">
-<pre>
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-<b>///   ::= unary LETTER (id)</b>
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  <b>case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;</b>
-  case tok_binary:
-    ...
-</pre>
-</div>
-
-<p>As with binary operators, we name unary operators with a name that includes
-the operator character.  This assists us at code generation time.  Speaking of,
-the final piece we need to add is codegen support for unary operators.  It looks
-like this:</p>
-
-<div class="doc_code">
-<pre>
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-</pre>
-</div>
-
-<p>This code is similar to, but simpler than, the code for binary operators.  It
-is simpler primarily because it doesn't need to handle any predefined operators.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="example">Kicking the Tires</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>It is somewhat hard to believe, but with a few simple extensions we've
-covered in the last chapters, we have grown a real-ish language.  With this, we 
-can do a lot of interesting things, including I/O, math, and a bunch of other
-things.  For example, we can now add a nice sequencing operator (printd is
-defined to print out the specified value and a newline):</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern printd(x);</b>
-Read extern:
-declare double @printd(double)
-
-ready&gt; <b>def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.</b>
-..
-ready&gt; <b>printd(123) : printd(456) : printd(789);</b>
-123.000000
-456.000000
-789.000000
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>We can also define a bunch of other "primitive" operations, such as:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-    
-# Unary negate.
-def unary-(v)
-  0-v;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary logical or, which does not short circuit. 
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Binary logical and, which does not short circuit. 
-def binary&amp; 6 (LHS RHS)
-  if !LHS then
-    0
-  else
-    !!RHS;
-
-# Define = with slightly lower precedence than relationals.
-def binary = 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-</pre>
-</div>
-
-
-<p>Given the previous if/then/else support, we can also define interesting
-functions for I/O.  For example, the following prints out a character whose
-"density" reflects the value passed in: the lower the value, the denser the
-character:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt;
-<b>
-extern putchard(char)
-def printdensity(d)
-  if d &gt; 8 then
-    putchard(32)  # ' '
-  else if d &gt; 4 then
-    putchard(46)  # '.'
-  else if d &gt; 2 then
-    putchard(43)  # '+'
-  else
-    putchard(42); # '*'</b>
-...
-ready&gt; <b>printdensity(1): printdensity(2): printdensity(3):
-       printdensity(4): printdensity(5): printdensity(9):
-       putchard(10);</b>
-**++.
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>Based on these simple primitive operations, we can start to define more
-interesting things.  For example, here's a little function that solves for the
-number of iterations it takes a function in the complex plane to
-converge:</p>
-
-<div class="doc_code">
-<pre>
-# Determine whether the specific location diverges.
-# Solve for z = z^2 + c in the complex plane.
-def mandleconverger(real imag iters creal cimag)
-  if iters &gt; 255 | (real*real + imag*imag &gt; 4) then
-    iters
-  else
-    mandleconverger(real*real - imag*imag + creal,
-                    2*real*imag + cimag,
-                    iters+1, creal, cimag);
-
-# Return the number of iterations required for the iteration to escape
-def mandleconverge(real imag)
-  mandleconverger(real, imag, 0, real, imag);
-</pre>
-</div>
-
-<p>This "<code>z = z<sup>2</sup> + c</code>" function is a beautiful little
-creature that is the basis for computation of
-the <a href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>.
-Our <tt>mandelconverge</tt> function returns the number of iterations that it
-takes for a complex orbit to escape, saturating to 255.  This is not a very
-useful function by itself, but if you plot its value over a two-dimensional
-plane, you can see the Mandelbrot set.  Given that we are limited to using
-putchard here, our amazing graphical output is limited, but we can whip together
-something using the density plotter above:</p>
-
-<div class="doc_code">
-<pre>
-# Compute and plot the mandlebrot set with the specified 2 dimensional range
-# info.
-def mandelhelp(xmin xmax xstep   ymin ymax ystep)
-  for y = ymin, y &lt; ymax, ystep in (
-    (for x = xmin, x &lt; xmax, xstep in
-       printdensity(mandleconverge(x,y)))
-    : putchard(10)
-  )
- 
-# mandel - This is a convenient helper function for plotting the mandelbrot set
-# from the specified position with the specified Magnification.
-def mandel(realstart imagstart realmag imagmag) 
-  mandelhelp(realstart, realstart+realmag*78, realmag,
-             imagstart, imagstart+imagmag*40, imagmag);
-</pre>
-</div>
-
-<p>Given this, we can try plotting out the mandlebrot set!  Lets try it out:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>mandel(-2.3, -1.3, 0.05, 0.07);</b>
-*******************************+++++++++++*************************************
-*************************+++++++++++++++++++++++*******************************
-**********************+++++++++++++++++++++++++++++****************************
-*******************+++++++++++++++++++++.. ...++++++++*************************
-*****************++++++++++++++++++++++.... ...+++++++++***********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-**************+++++++++++++++++++++++....     ....+++++++++********************
-*************++++++++++++++++++++++......      .....++++++++*******************
-************+++++++++++++++++++++.......       .......+++++++******************
-***********+++++++++++++++++++....                ... .+++++++*****************
-**********+++++++++++++++++.......                     .+++++++****************
-*********++++++++++++++...........                    ...+++++++***************
-********++++++++++++............                      ...++++++++**************
-********++++++++++... ..........                        .++++++++**************
-*******+++++++++.....                                   .+++++++++*************
-*******++++++++......                                  ..+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******+++++......                                     ..+++++++++*************
-*******.... ....                                      ...+++++++++*************
-*******.... .                                         ...+++++++++*************
-*******+++++......                                    ...+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******++++++++......                                   .+++++++++*************
-*******+++++++++.....                                  ..+++++++++*************
-********++++++++++... ..........                        .++++++++**************
-********++++++++++++............                      ...++++++++**************
-*********++++++++++++++..........                     ...+++++++***************
-**********++++++++++++++++........                     .+++++++****************
-**********++++++++++++++++++++....                ... ..+++++++****************
-***********++++++++++++++++++++++.......       .......++++++++*****************
-************+++++++++++++++++++++++......      ......++++++++******************
-**************+++++++++++++++++++++++....      ....++++++++********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-*****************++++++++++++++++++++++....  ...++++++++***********************
-*******************+++++++++++++++++++++......++++++++*************************
-*********************++++++++++++++++++++++.++++++++***************************
-*************************+++++++++++++++++++++++*******************************
-******************************+++++++++++++************************************
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-Evaluated to 0.000000
-ready&gt; <b>mandel(-2, -1, 0.02, 0.04);</b>
-**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
-***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
-*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
-*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
-***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
-**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
-************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
-***********++++++++++++++++++++++++++++++++++++++++++++++++++........        . 
-**********++++++++++++++++++++++++++++++++++++++++++++++.............          
-********+++++++++++++++++++++++++++++++++++++++++++..................          
-*******+++++++++++++++++++++++++++++++++++++++.......................          
-******+++++++++++++++++++++++++++++++++++...........................           
-*****++++++++++++++++++++++++++++++++............................              
-*****++++++++++++++++++++++++++++...............................               
-****++++++++++++++++++++++++++......   .........................               
-***++++++++++++++++++++++++.........     ......    ...........                 
-***++++++++++++++++++++++............                                          
-**+++++++++++++++++++++..............                                          
-**+++++++++++++++++++................                                          
-*++++++++++++++++++.................                                           
-*++++++++++++++++............ ...                                              
-*++++++++++++++..............                                                  
-*+++....++++................                                                   
-*..........  ...........                                                       
-*                                                                              
-*..........  ...........                                                       
-*+++....++++................                                                   
-*++++++++++++++..............                                                  
-*++++++++++++++++............ ...                                              
-*++++++++++++++++++.................                                           
-**+++++++++++++++++++................                                          
-**+++++++++++++++++++++..............                                          
-***++++++++++++++++++++++............                                          
-***++++++++++++++++++++++++.........     ......    ...........                 
-****++++++++++++++++++++++++++......   .........................               
-*****++++++++++++++++++++++++++++...............................               
-*****++++++++++++++++++++++++++++++++............................              
-******+++++++++++++++++++++++++++++++++++...........................           
-*******+++++++++++++++++++++++++++++++++++++++.......................          
-********+++++++++++++++++++++++++++++++++++++++++++..................          
-Evaluated to 0.000000
-ready&gt; <b>mandel(-0.9, -1.4, 0.02, 0.03);</b>
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-**********+++++++++++++++++++++************************************************
-*+++++++++++++++++++++++++++++++++++++++***************************************
-+++++++++++++++++++++++++++++++++++++++++++++**********************************
-++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
-++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
-+++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
-+++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
-+++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
-++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
-+++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
-++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
-++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
-+++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
-++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
-++++++++++++++++++++...........                .........++++++++++++++++++++++*
-++++++++++++++++++............                  ...........++++++++++++++++++++
-++++++++++++++++...............                 .............++++++++++++++++++
-++++++++++++++.................                 ...............++++++++++++++++
-++++++++++++..................                  .................++++++++++++++
-+++++++++..................                      .................+++++++++++++
-++++++........        .                               .........  ..++++++++++++
-++............                                         ......    ....++++++++++
-..............                                                    ...++++++++++
-..............                                                    ....+++++++++
-..............                                                    .....++++++++
-.............                                                    ......++++++++
-...........                                                     .......++++++++
-.........                                                       ........+++++++
-.........                                                       ........+++++++
-.........                                                           ....+++++++
-........                                                             ...+++++++
-.......                                                              ...+++++++
-                                                                    ....+++++++
-                                                                   .....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-Evaluated to 0.000000
-ready&gt; <b>^D</b>
-</pre>
-</div>
-
-<p>At this point, you may be starting to realize that Kaleidoscope is a real
-and powerful language.  It may not be self-similar :), but it can be used to
-plot things that are!</p>
-
-<p>With this, we conclude the "adding user-defined operators" chapter of the
-tutorial.  We have successfully augmented our language, adding the ability to extend the
-language in the library, and we have shown how this can be used to build a simple but
-interesting end-user application in Kaleidoscope.  At this point, Kaleidoscope
-can build a variety of applications that are functional and can call functions
-with side-effects, but it can't actually define and mutate a variable itself.
-</p>
-
-<p>Strikingly, variable mutation is an important feature of some
-languages, and it is not at all obvious how to <a href="LangImpl7.html">add
-support for mutable variables</a> without having to add an "SSA construction"
-phase to your front-end.  In the next chapter, we will describe how you can
-add variable mutation without building SSA in your front-end.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>On some platforms, you will need to specify -rdynamic or -Wl,--export-dynamic
-when linking.  This ensures that symbols defined in the main executable are
-exported to the dynamic linker and so are available for symbol resolution at
-run time.  This is not needed if you compile your support code into a shared
-library, although doing that will cause problems on Windows.</p>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10,
-  
-  // operators
-  tok_binary = -11, tok_unary = -12
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes), as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  }
-}
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal &lt; 1 || NumVal &gt; 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind &amp;&amp; ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand-&gt;Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule-&gt;getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
-  assert(F &amp;&amp; "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN-&gt;addIncoming(ThenV, ThenBB);
-  PN-&gt;addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop: 
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable-&gt;addIncoming(StartVal, PreheaderBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Add a new entry to the PHI node for the backedge.
-  Variable-&gt;addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence.erase(Proto-&gt;getOperatorName());
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
-double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl7.html">Next: Extending the language: mutable variables / SSA construction</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl6.rst b/docs/tutorial/LangImpl6.rst
new file mode 100644
index 000000000000..a5a60bffe04a
--- /dev/null
+++ b/docs/tutorial/LangImpl6.rst
@@ -0,0 +1,1726 @@
+============================================================
+Kaleidoscope: Extending the Language: User-defined Operators
+============================================================
+
+.. contents::
+   :local:
+
+Chapter 6 Introduction
+======================
+
+Welcome to Chapter 6 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. At this point in our tutorial, we now
+have a fully functional language that is fairly minimal, but also
+useful. There is still one big problem with it, however. Our language
+doesn't have many useful operators (like division, logical negation, or
+even any comparisons besides less-than).
+
+This chapter of the tutorial takes a wild digression into adding
+user-defined operators to the simple and beautiful Kaleidoscope
+language. This digression now gives us a simple and ugly language in
+some ways, but also a powerful one at the same time. One of the great
+things about creating your own language is that you get to decide what
+is good or bad. In this tutorial we'll assume that it is okay to use
+this as a way to show some interesting parsing techniques.
+
+At the end of this tutorial, we'll run through an example Kaleidoscope
+application that `renders the Mandelbrot set <#example>`_. This gives an
+example of what you can build with Kaleidoscope and its feature set.
+
+User-defined Operators: the Idea
+================================
+
+The "operator overloading" that we will add to Kaleidoscope is more
+general than languages like C++. In C++, you are only allowed to
+redefine existing operators: you can't programatically change the
+grammar, introduce new operators, change precedence levels, etc. In this
+chapter, we will add this capability to Kaleidoscope, which will let the
+user round out the set of operators that are supported.
+
+The point of going into user-defined operators in a tutorial like this
+is to show the power and flexibility of using a hand-written parser.
+Thus far, the parser we have been implementing uses recursive descent
+for most parts of the grammar and operator precedence parsing for the
+expressions. See `Chapter 2 <LangImpl2.html>`_ for details. Without
+using operator precedence parsing, it would be very difficult to allow
+the programmer to introduce new operators into the grammar: the grammar
+is dynamically extensible as the JIT runs.
+
+The two specific features we'll add are programmable unary operators
+(right now, Kaleidoscope has no unary operators at all) as well as
+binary operators. An example of this is:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary "logical or", (note that it does not "short circuit")
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary= 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+Many languages aspire to being able to implement their standard runtime
+library in the language itself. In Kaleidoscope, we can implement
+significant parts of the language in the library!
+
+We will break down implementation of these features into two parts:
+implementing support for user-defined binary operators and adding unary
+operators.
+
+User-defined Binary Operators
+=============================
+
+Adding support for user-defined binary operators is pretty simple with
+our current framework. We'll first add support for the unary/binary
+keywords:
+
+.. code-block:: c++
+
+    enum Token {
+      ...
+      // operators
+      tok_binary = -11, tok_unary = -12
+    };
+    ...
+    static int gettok() {
+    ...
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        return tok_identifier;
+
+This just adds lexer support for the unary and binary keywords, like we
+did in `previous chapters <LangImpl5.html#iflexer>`_. One nice thing
+about our current AST, is that we represent binary operators with full
+generalisation by using their ASCII code as the opcode. For our extended
+operators, we'll use this same representation, so we don't need any new
+AST or parser support.
+
+On the other hand, we have to be able to represent the definitions of
+these new operators, in the "def binary\| 5" part of the function
+definition. In our grammar so far, the "name" for the function
+definition is parsed as the "prototype" production and into the
+``PrototypeAST`` AST node. To represent our new user-defined operators
+as prototypes, we have to extend the ``PrototypeAST`` AST node like
+this:
+
+.. code-block:: c++
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its argument names as well as if it is an operator.
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+      bool isOperator;
+      unsigned Precedence;  // Precedence if a binary op.
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+                   bool isoperator = false, unsigned prec = 0)
+      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+
+      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+
+      char getOperatorName() const {
+        assert(isUnaryOp() || isBinaryOp());
+        return Name[Name.size()-1];
+      }
+
+      unsigned getBinaryPrecedence() const { return Precedence; }
+
+      Function *Codegen();
+    };
+
+Basically, in addition to knowing a name for the prototype, we now keep
+track of whether it was an operator, and if it was, what precedence
+level the operator is at. The precedence is only used for binary
+operators (as you'll see below, it just doesn't apply for unary
+operators). Now that we have a way to represent the prototype for a
+user-defined operator, we need to parse it:
+
+.. code-block:: c++
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_binary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected binary operator");
+        FnName = "binary";
+        FnName += (char)CurTok;
+        Kind = 2;
+        getNextToken();
+
+        // Read the precedence if present.
+        if (CurTok == tok_number) {
+          if (NumVal < 1 || NumVal > 100)
+            return ErrorP("Invalid precedecnce: must be 1..100");
+          BinaryPrecedence = (unsigned)NumVal;
+          getNextToken();
+        }
+        break;
+      }
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      // Verify right number of names for operator.
+      if (Kind && ArgNames.size() != Kind)
+        return ErrorP("Invalid number of operands for operator");
+
+      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+    }
+
+This is all fairly straightforward parsing code, and we have already
+seen a lot of similar code in the past. One interesting part about the
+code above is the couple lines that set up ``FnName`` for binary
+operators. This builds names like "binary@" for a newly defined "@"
+operator. This then takes advantage of the fact that symbol names in the
+LLVM symbol table are allowed to have any character in them, including
+embedded nul characters.
+
+The next interesting thing to add, is codegen support for these binary
+operators. Given our current structure, this is a simple addition of a
+default case for our existing binary operator node:
+
+.. code-block:: c++
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: break;
+      }
+
+      // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+      // a call to it.
+      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      assert(F && "binary operator not found!");
+
+      Value *Ops[2] = { L, R };
+      return Builder.CreateCall(F, Ops, "binop");
+    }
+
+As you can see above, the new code is actually really simple. It just
+does a lookup for the appropriate operator in the symbol table and
+generates a function call to it. Since user-defined operators are just
+built as normal functions (because the "prototype" boils down to a
+function with the right name) everything falls into place.
+
+The final piece of code we are missing, is a bit of top-level magic:
+
+.. code-block:: c++
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // If this is an operator, install it.
+      if (Proto->isBinaryOp())
+        BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        ...
+
+Basically, before codegening a function, if it is a user-defined
+operator, we register it in the precedence table. This allows the binary
+operator parsing logic we already have in place to handle it. Since we
+are working on a fully-general operator precedence parser, this is all
+we need to do to "extend the grammar".
+
+Now we have useful user-defined binary operators. This builds a lot on
+the previous framework we built for other operators. Adding unary
+operators is a bit more challenging, because we don't have any framework
+for it yet - lets see what it takes.
+
+User-defined Unary Operators
+============================
+
+Since we don't currently support unary operators in the Kaleidoscope
+language, we'll need to add everything to support them. Above, we added
+simple support for the 'unary' keyword to the lexer. In addition to
+that, we need an AST node:
+
+.. code-block:: c++
+
+    /// UnaryExprAST - Expression class for a unary operator.
+    class UnaryExprAST : public ExprAST {
+      char Opcode;
+      ExprAST *Operand;
+    public:
+      UnaryExprAST(char opcode, ExprAST *operand)
+        : Opcode(opcode), Operand(operand) {}
+      virtual Value *Codegen();
+    };
+
+This AST node is very simple and obvious by now. It directly mirrors the
+binary operator AST node, except that it only has one child. With this,
+we need to add the parsing logic. Parsing a unary operator is pretty
+simple: we'll add a new function to do it:
+
+.. code-block:: c++
+
+    /// unary
+    ///   ::= primary
+    ///   ::= '!' unary
+    static ExprAST *ParseUnary() {
+      // If the current token is not an operator, it must be a primary expr.
+      if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+        return ParsePrimary();
+
+      // If this is a unary operator, read it.
+      int Opc = CurTok;
+      getNextToken();
+      if (ExprAST *Operand = ParseUnary())
+        return new UnaryExprAST(Opc, Operand);
+      return 0;
+    }
+
+The grammar we add is pretty straightforward here. If we see a unary
+operator when parsing a primary operator, we eat the operator as a
+prefix and parse the remaining piece as another unary operator. This
+allows us to handle multiple unary operators (e.g. "!!x"). Note that
+unary operators can't have ambiguous parses like binary operators can,
+so there is no need for precedence information.
+
+The problem with this function, is that we need to call ParseUnary from
+somewhere. To do this, we change previous callers of ParsePrimary to
+call ParseUnary instead:
+
+.. code-block:: c++
+
+    /// binoprhs
+    ///   ::= ('+' unary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      ...
+        // Parse the unary expression after the binary operator.
+        ExprAST *RHS = ParseUnary();
+        if (!RHS) return 0;
+      ...
+    }
+    /// expression
+    ///   ::= unary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParseUnary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+With these two simple changes, we are now able to parse unary operators
+and build the AST for them. Next up, we need to add parser support for
+prototypes, to parse the unary operator prototype. We extend the binary
+operator code above with:
+
+.. code-block:: c++
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    ///   ::= unary LETTER (id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_unary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected unary operator");
+        FnName = "unary";
+        FnName += (char)CurTok;
+        Kind = 1;
+        getNextToken();
+        break;
+      case tok_binary:
+        ...
+
+As with binary operators, we name unary operators with a name that
+includes the operator character. This assists us at code generation
+time. Speaking of, the final piece we need to add is codegen support for
+unary operators. It looks like this:
+
+.. code-block:: c++
+
+    Value *UnaryExprAST::Codegen() {
+      Value *OperandV = Operand->Codegen();
+      if (OperandV == 0) return 0;
+
+      Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+      if (F == 0)
+        return ErrorV("Unknown unary operator");
+
+      return Builder.CreateCall(F, OperandV, "unop");
+    }
+
+This code is similar to, but simpler than, the code for binary
+operators. It is simpler primarily because it doesn't need to handle any
+predefined operators.
+
+Kicking the Tires
+=================
+
+It is somewhat hard to believe, but with a few simple extensions we've
+covered in the last chapters, we have grown a real-ish language. With
+this, we can do a lot of interesting things, including I/O, math, and a
+bunch of other things. For example, we can now add a nice sequencing
+operator (printd is defined to print out the specified value and a
+newline):
+
+::
+
+    ready> extern printd(x);
+    Read extern:
+    declare double @printd(double)
+
+    ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
+    ..
+    ready> printd(123) : printd(456) : printd(789);
+    123.000000
+    456.000000
+    789.000000
+    Evaluated to 0.000000
+
+We can also define a bunch of other "primitive" operations, such as:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Unary negate.
+    def unary-(v)
+      0-v;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary logical or, which does not short circuit.
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Binary logical and, which does not short circuit.
+    def binary& 6 (LHS RHS)
+      if !LHS then
+        0
+      else
+        !!RHS;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary = 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+Given the previous if/then/else support, we can also define interesting
+functions for I/O. For example, the following prints out a character
+whose "density" reflects the value passed in: the lower the value, the
+denser the character:
+
+::
+
+    ready>
+
+    extern putchard(char)
+    def printdensity(d)
+      if d > 8 then
+        putchard(32)  # ' '
+      else if d > 4 then
+        putchard(46)  # '.'
+      else if d > 2 then
+        putchard(43)  # '+'
+      else
+        putchard(42); # '*'
+    ...
+    ready> printdensity(1): printdensity(2): printdensity(3):
+           printdensity(4): printdensity(5): printdensity(9):
+           putchard(10);
+    **++.
+    Evaluated to 0.000000
+
+Based on these simple primitive operations, we can start to define more
+interesting things. For example, here's a little function that solves
+for the number of iterations it takes a function in the complex plane to
+converge:
+
+::
+
+    # Determine whether the specific location diverges.
+    # Solve for z = z^2 + c in the complex plane.
+    def mandleconverger(real imag iters creal cimag)
+      if iters > 255 | (real*real + imag*imag > 4) then
+        iters
+      else
+        mandleconverger(real*real - imag*imag + creal,
+                        2*real*imag + cimag,
+                        iters+1, creal, cimag);
+
+    # Return the number of iterations required for the iteration to escape
+    def mandleconverge(real imag)
+      mandleconverger(real, imag, 0, real, imag);
+
+This "``z = z2 + c``" function is a beautiful little creature that is
+the basis for computation of the `Mandelbrot
+Set <http://en.wikipedia.org/wiki/Mandelbrot_set>`_. Our
+``mandelconverge`` function returns the number of iterations that it
+takes for a complex orbit to escape, saturating to 255. This is not a
+very useful function by itself, but if you plot its value over a
+two-dimensional plane, you can see the Mandelbrot set. Given that we are
+limited to using putchard here, our amazing graphical output is limited,
+but we can whip together something using the density plotter above:
+
+::
+
+    # Compute and plot the mandlebrot set with the specified 2 dimensional range
+    # info.
+    def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+      for y = ymin, y < ymax, ystep in (
+        (for x = xmin, x < xmax, xstep in
+           printdensity(mandleconverge(x,y)))
+        : putchard(10)
+      )
+
+    # mandel - This is a convenient helper function for plotting the mandelbrot set
+    # from the specified position with the specified Magnification.
+    def mandel(realstart imagstart realmag imagmag)
+      mandelhelp(realstart, realstart+realmag*78, realmag,
+                 imagstart, imagstart+imagmag*40, imagmag);
+
+Given this, we can try plotting out the mandlebrot set! Lets try it out:
+
+::
+
+    ready> mandel(-2.3, -1.3, 0.05, 0.07);
+    *******************************+++++++++++*************************************
+    *************************+++++++++++++++++++++++*******************************
+    **********************+++++++++++++++++++++++++++++****************************
+    *******************+++++++++++++++++++++.. ...++++++++*************************
+    *****************++++++++++++++++++++++.... ...+++++++++***********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    **************+++++++++++++++++++++++....     ....+++++++++********************
+    *************++++++++++++++++++++++......      .....++++++++*******************
+    ************+++++++++++++++++++++.......       .......+++++++******************
+    ***********+++++++++++++++++++....                ... .+++++++*****************
+    **********+++++++++++++++++.......                     .+++++++****************
+    *********++++++++++++++...........                    ...+++++++***************
+    ********++++++++++++............                      ...++++++++**************
+    ********++++++++++... ..........                        .++++++++**************
+    *******+++++++++.....                                   .+++++++++*************
+    *******++++++++......                                  ..+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******+++++......                                     ..+++++++++*************
+    *******.... ....                                      ...+++++++++*************
+    *******.... .                                         ...+++++++++*************
+    *******+++++......                                    ...+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******++++++++......                                   .+++++++++*************
+    *******+++++++++.....                                  ..+++++++++*************
+    ********++++++++++... ..........                        .++++++++**************
+    ********++++++++++++............                      ...++++++++**************
+    *********++++++++++++++..........                     ...+++++++***************
+    **********++++++++++++++++........                     .+++++++****************
+    **********++++++++++++++++++++....                ... ..+++++++****************
+    ***********++++++++++++++++++++++.......       .......++++++++*****************
+    ************+++++++++++++++++++++++......      ......++++++++******************
+    **************+++++++++++++++++++++++....      ....++++++++********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    *****************++++++++++++++++++++++....  ...++++++++***********************
+    *******************+++++++++++++++++++++......++++++++*************************
+    *********************++++++++++++++++++++++.++++++++***************************
+    *************************+++++++++++++++++++++++*******************************
+    ******************************+++++++++++++************************************
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    Evaluated to 0.000000
+    ready> mandel(-2, -1, 0.02, 0.04);
+    **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+    ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+    *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+    *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+    ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+    **************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+    ************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+    ***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
+    **********++++++++++++++++++++++++++++++++++++++++++++++.............
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *****++++++++++++++++++++++++++++++++............................
+    *****++++++++++++++++++++++++++++...............................
+    ****++++++++++++++++++++++++++......   .........................
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ***++++++++++++++++++++++............
+    **+++++++++++++++++++++..............
+    **+++++++++++++++++++................
+    *++++++++++++++++++.................
+    *++++++++++++++++............ ...
+    *++++++++++++++..............
+    *+++....++++................
+    *..........  ...........
+    *
+    *..........  ...........
+    *+++....++++................
+    *++++++++++++++..............
+    *++++++++++++++++............ ...
+    *++++++++++++++++++.................
+    **+++++++++++++++++++................
+    **+++++++++++++++++++++..............
+    ***++++++++++++++++++++++............
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ****++++++++++++++++++++++++++......   .........................
+    *****++++++++++++++++++++++++++++...............................
+    *****++++++++++++++++++++++++++++++++............................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    Evaluated to 0.000000
+    ready> mandel(-0.9, -1.4, 0.02, 0.03);
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    **********+++++++++++++++++++++************************************************
+    *+++++++++++++++++++++++++++++++++++++++***************************************
+    +++++++++++++++++++++++++++++++++++++++++++++**********************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
+    +++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
+    +++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
+    +++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
+    +++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+    ++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
+    +++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+    ++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+    ++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
+    +++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+    ++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+    ++++++++++++++++++++...........                .........++++++++++++++++++++++*
+    ++++++++++++++++++............                  ...........++++++++++++++++++++
+    ++++++++++++++++...............                 .............++++++++++++++++++
+    ++++++++++++++.................                 ...............++++++++++++++++
+    ++++++++++++..................                  .................++++++++++++++
+    +++++++++..................                      .................+++++++++++++
+    ++++++........        .                               .........  ..++++++++++++
+    ++............                                         ......    ....++++++++++
+    ..............                                                    ...++++++++++
+    ..............                                                    ....+++++++++
+    ..............                                                    .....++++++++
+    .............                                                    ......++++++++
+    ...........                                                     .......++++++++
+    .........                                                       ........+++++++
+    .........                                                       ........+++++++
+    .........                                                           ....+++++++
+    ........                                                             ...+++++++
+    .......                                                              ...+++++++
+                                                                        ....+++++++
+                                                                       .....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+    Evaluated to 0.000000
+    ready> ^D
+
+At this point, you may be starting to realize that Kaleidoscope is a
+real and powerful language. It may not be self-similar :), but it can be
+used to plot things that are!
+
+With this, we conclude the "adding user-defined operators" chapter of
+the tutorial. We have successfully augmented our language, adding the
+ability to extend the language in the library, and we have shown how
+this can be used to build a simple but interesting end-user application
+in Kaleidoscope. At this point, Kaleidoscope can build a variety of
+applications that are functional and can call functions with
+side-effects, but it can't actually define and mutate a variable itself.
+
+Strikingly, variable mutation is an important feature of some languages,
+and it is not at all obvious how to `add support for mutable
+variables <LangImpl7.html>`_ without having to add an "SSA construction"
+phase to your front-end. In the next chapter, we will describe how you
+can add variable mutation without building SSA in your front-end.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+On some platforms, you will need to specify -rdynamic or
+-Wl,--export-dynamic when linking. This ensures that symbols defined in
+the main executable are exported to the dynamic linker and so are
+available for symbol resolution at run time. This is not needed if you
+compile your support code into a shared library, although doing that
+will cause problems on Windows.
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10,
+
+      // operators
+      tok_binary = -11, tok_unary = -12
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// UnaryExprAST - Expression class for a unary operator.
+    class UnaryExprAST : public ExprAST {
+      char Opcode;
+      ExprAST *Operand;
+    public:
+      UnaryExprAST(char opcode, ExprAST *operand)
+        : Opcode(opcode), Operand(operand) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+      : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes), as well as if it is an operator.
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+      bool isOperator;
+      unsigned Precedence;  // Precedence if a binary op.
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+                   bool isoperator = false, unsigned prec = 0)
+      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+
+      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+
+      char getOperatorName() const {
+        assert(isUnaryOp() || isBinaryOp());
+        return Name[Name.size()-1];
+      }
+
+      unsigned getBinaryPrecedence() const { return Precedence; }
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      }
+    }
+
+    /// unary
+    ///   ::= primary
+    ///   ::= '!' unary
+    static ExprAST *ParseUnary() {
+      // If the current token is not an operator, it must be a primary expr.
+      if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+        return ParsePrimary();
+
+      // If this is a unary operator, read it.
+      int Opc = CurTok;
+      getNextToken();
+      if (ExprAST *Operand = ParseUnary())
+        return new UnaryExprAST(Opc, Operand);
+      return 0;
+    }
+
+    /// binoprhs
+    ///   ::= ('+' unary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the unary expression after the binary operator.
+        ExprAST *RHS = ParseUnary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= unary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParseUnary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    ///   ::= unary LETTER (id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_unary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected unary operator");
+        FnName = "unary";
+        FnName += (char)CurTok;
+        Kind = 1;
+        getNextToken();
+        break;
+      case tok_binary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected binary operator");
+        FnName = "binary";
+        FnName += (char)CurTok;
+        Kind = 2;
+        getNextToken();
+
+        // Read the precedence if present.
+        if (CurTok == tok_number) {
+          if (NumVal < 1 || NumVal > 100)
+            return ErrorP("Invalid precedecnce: must be 1..100");
+          BinaryPrecedence = (unsigned)NumVal;
+          getNextToken();
+        }
+        break;
+      }
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      // Verify right number of names for operator.
+      if (Kind && ArgNames.size() != Kind)
+        return ErrorP("Invalid number of operands for operator");
+
+      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *UnaryExprAST::Codegen() {
+      Value *OperandV = Operand->Codegen();
+      if (OperandV == 0) return 0;
+
+      Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+      if (F == 0)
+        return ErrorV("Unknown unary operator");
+
+      return Builder.CreateCall(F, OperandV, "unop");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: break;
+      }
+
+      // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+      // a call to it.
+      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      assert(F && "binary operator not found!");
+
+      Value *Ops[2] = { L, R };
+      return Builder.CreateCall(F, Ops, "binop");
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+    Value *ForExprAST::Codegen() {
+      // Output this as:
+      //   ...
+      //   start = startexpr
+      //   goto loop
+      // loop:
+      //   variable = phi [start, loopheader], [nextvariable, loopend]
+      //   ...
+      //   bodyexpr
+      //   ...
+      // loopend:
+      //   step = stepexpr
+      //   nextvariable = variable + step
+      //   endcond = endexpr
+      //   br endcond, loop, endloop
+      // outloop:
+
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+      BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Start the PHI node with an entry for Start.
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      Variable->addIncoming(StartVal, PreheaderBB);
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      Value *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Variable;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+      // Add a new entry to the PHI node for the backedge.
+      Variable->addIncoming(NextVar, LoopEndBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // If this is an operator, install it.
+      if (Proto->isBinaryOp())
+        BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+
+      if (Proto->isBinaryOp())
+        BinopPrecedence.erase(Proto->getOperatorName());
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    /// printd - printf that takes a double prints it as "%f\n", returning 0.
+    extern "C"
+    double printd(double X) {
+      printf("%f\n", X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Extending the language: mutable variables / SSA
+construction <LangImpl7.html>`_
+
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
deleted file mode 100644
index 4d5a4aa7e84a..000000000000
--- a/docs/tutorial/LangImpl7.html
+++ /dev/null
@@ -1,2164 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Mutable Variables / SSA
-         construction</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Mutable Variables</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 7
-  <ol>
-    <li><a href="#intro">Chapter 7 Introduction</a></li>
-    <li><a href="#why">Why is this a hard problem?</a></li>
-    <li><a href="#memory">Memory in LLVM</a></li>
-    <li><a href="#kalvars">Mutable Variables in Kaleidoscope</a></li>
-    <li><a href="#adjustments">Adjusting Existing Variables for
-     Mutation</a></li>
-    <li><a href="#assignment">New Assignment Operator</a></li>
-    <li><a href="#localvars">User-defined Local Variables</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
- tidbits</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 7 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
-respectable, albeit simple, <a 
-href="http://en.wikipedia.org/wiki/Functional_programming">functional
-programming language</a>.  In our journey, we learned some parsing techniques,
-how to build and represent an AST, how to build LLVM IR, and how to optimize
-the resultant code as well as JIT compile it.</p>
-
-<p>While Kaleidoscope is interesting as a functional language, the fact that it
-is functional makes it "too easy" to generate LLVM IR for it.  In particular, a 
-functional language makes it very easy to build LLVM IR directly in <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">SSA form</a>.
-Since LLVM requires that the input code be in SSA form, this is a very nice
-property and it is often unclear to newcomers how to generate code for an
-imperative language with mutable variables.</p>
-
-<p>The short (and happy) summary of this chapter is that there is no need for
-your front-end to build SSA form: LLVM provides highly tuned and well tested
-support for this, though the way it works is a bit unexpected for some.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="why">Why is this a hard problem?</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-To understand why mutable variables cause complexities in SSA construction, 
-consider this extremely simple C example:
-</p>
-
-<div class="doc_code">
-<pre>
-int G, H;
-int test(_Bool Condition) {
-  int X;
-  if (Condition)
-    X = G;
-  else
-    X = H;
-  return X;
-}
-</pre>
-</div>
-
-<p>In this case, we have the variable "X", whose value depends on the path 
-executed in the program.  Because there are two different possible values for X
-before the return instruction, a PHI node is inserted to merge the two values.
-The LLVM IR that we want for this example looks like this:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>In this example, the loads from the G and H global variables are explicit in
-the LLVM IR, and they live in the then/else branches of the if statement
-(cond_true/cond_false).  In order to merge the incoming values, the X.2 phi node
-in the cond_next block selects the right value to use based on where control 
-flow is coming from: if control flow comes from the cond_false block, X.2 gets
-the value of X.1.  Alternatively, if control flow comes from cond_true, it gets
-the value of X.0.  The intent of this chapter is not to explain the details of
-SSA form.  For more information, see one of the many <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">online 
-references</a>.</p>
-
-<p>The question for this article is "who places the phi nodes when lowering 
-assignments to mutable variables?".  The issue here is that LLVM 
-<em>requires</em> that its IR be in SSA form: there is no "non-ssa" mode for it.
-However, SSA construction requires non-trivial algorithms and data structures,
-so it is inconvenient and wasteful for every front-end to have to reproduce this
-logic.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="memory">Memory in LLVM</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The 'trick' here is that while LLVM does require all register values to be
-in SSA form, it does not require (or permit) memory objects to be in SSA form.
-In the example above, note that the loads from G and H are direct accesses to
-G and H: they are not renamed or versioned.  This differs from some other
-compiler systems, which do try to version memory objects.  In LLVM, instead of
-encoding dataflow analysis of memory into the LLVM IR, it is handled with <a 
-href="../WritingAnLLVMPass.html">Analysis Passes</a> which are computed on
-demand.</p>
-
-<p>
-With this in mind, the high-level idea is that we want to make a stack variable
-(which lives in memory, because it is on the stack) for each mutable object in
-a function.  To take advantage of this trick, we need to talk about how LLVM
-represents stack variables.
-</p>
-
-<p>In LLVM, all memory accesses are explicit with load/store instructions, and
-it is carefully designed not to have (or need) an "address-of" operator.  Notice
-how the type of the @G/@H global variables is actually "i32*" even though the 
-variable is defined as "i32".  What this means is that @G defines <em>space</em>
-for an i32 in the global data area, but its <em>name</em> actually refers to the
-address for that space.  Stack variables work the same way, except that instead of 
-being declared with global variable definitions, they are declared with the 
-<a href="../LangRef.html#i_alloca">LLVM alloca instruction</a>:</p>
-
-<div class="doc_code">
-<pre>
-define i32 @example() {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  ...
-  %tmp = load i32* %X       ; load the stack value %X from the stack.
-  %tmp2 = add i32 %tmp, 1   ; increment it
-  store i32 %tmp2, i32* %X  ; store it back
-  ...
-</pre>
-</div>
-
-<p>This code shows an example of how you can declare and manipulate a stack
-variable in the LLVM IR.  Stack memory allocated with the alloca instruction is
-fully general: you can pass the address of the stack slot to functions, you can
-store it in other variables, etc.  In our example above, we could rewrite the
-example to use the alloca technique to avoid using a PHI node:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  store i32 %X.0, i32* %X   ; Update X
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  store i32 %X.1, i32* %X   ; Update X
-  br label %cond_next
-
-cond_next:
-  %X.2 = load i32* %X       ; Read X
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>With this, we have discovered a way to handle arbitrary mutable variables
-without the need to create Phi nodes at all:</p>
-
-<ol>
-<li>Each mutable variable becomes a stack allocation.</li>
-<li>Each read of the variable becomes a load from the stack.</li>
-<li>Each update of the variable becomes a store to the stack.</li>
-<li>Taking the address of a variable just uses the stack address directly.</li>
-</ol>
-
-<p>While this solution has solved our immediate problem, it introduced another
-one: we have now apparently introduced a lot of stack traffic for very simple
-and common operations, a major performance problem.  Fortunately for us, the
-LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles
-this case, promoting allocas like this into SSA registers, inserting Phi nodes
-as appropriate.  If you run this example through the pass, for example, you'll
-get:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>llvm-as &lt; example.ll | opt -mem2reg | llvm-dis</b>
-@G = weak global i32 0
-@H = weak global i32 0
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.01
-}
-</pre>
-</div>
-
-<p>The mem2reg pass implements the standard "iterated dominance frontier"
-algorithm for constructing SSA form and has a number of optimizations that speed
-up (very common) degenerate cases. The mem2reg optimization pass is the answer to dealing 
-with mutable variables, and we highly recommend that you depend on it.  Note that
-mem2reg only works on variables in certain circumstances:</p>
-
-<ol>
-<li>mem2reg is alloca-driven: it looks for allocas and if it can handle them, it
-promotes them.  It does not apply to global variables or heap allocations.</li>
-
-<li>mem2reg only looks for alloca instructions in the entry block of the
-function.  Being in the entry block guarantees that the alloca is only executed
-once, which makes analysis simpler.</li>
-
-<li>mem2reg only promotes allocas whose uses are direct loads and stores.  If
-the address of the stack object is passed to a function, or if any funny pointer
-arithmetic is involved, the alloca will not be promoted.</li>
-
-<li>mem2reg only works on allocas of <a 
-href="../LangRef.html#t_classifications">first class</a> 
-values (such as pointers, scalars and vectors), and only if the array size
-of the allocation is 1 (or missing in the .ll file).  mem2reg is not capable of
-promoting structs or arrays to registers.  Note that the "scalarrepl" pass is
-more powerful and can promote structs, "unions", and arrays in many cases.</li>
-
-</ol>
-
-<p>
-All of these properties are easy to satisfy for most imperative languages, and
-we'll illustrate it below with Kaleidoscope.  The final question you may be
-asking is: should I bother with this nonsense for my front-end?  Wouldn't it be
-better if I just did SSA construction directly, avoiding use of the mem2reg
-optimization pass?  In short, we strongly recommend that you use this technique
-for building SSA form, unless there is an extremely good reason not to.  Using
-this technique is:</p>
-
-<ul>
-<li>Proven and well tested: llvm-gcc and clang both use this technique for local
-mutable variables.  As such, the most common clients of LLVM are using this to
-handle a bulk of their variables.  You can be sure that bugs are found fast and
-fixed early.</li>
-
-<li>Extremely Fast: mem2reg has a number of special cases that make it fast in
-common cases as well as fully general.  For example, it has fast-paths for
-variables that are only used in a single block, variables that only have one
-assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc.
-</li>
-
-<li>Needed for debug info generation: <a href="../SourceLevelDebugging.html">
-Debug information in LLVM</a> relies on having the address of the variable
-exposed so that debug info can be attached to it.  This technique dovetails 
-very naturally with this style of debug info.</li>
-</ul>
-
-<p>If nothing else, this makes it much easier to get your front-end up and 
-running, and is very simple to implement.  Lets extend Kaleidoscope with mutable
-variables now!
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="kalvars">Mutable Variables in Kaleidoscope</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know the sort of problem we want to tackle, lets see what this
-looks like in the context of our little Kaleidoscope language.  We're going to
-add two features:</p>
-
-<ol>
-<li>The ability to mutate variables with the '=' operator.</li>
-<li>The ability to define new variables.</li>
-</ol>
-
-<p>While the first item is really what this is about, we only have variables
-for incoming arguments as well as for induction variables, and redefining those only
-goes so far :).  Also, the ability to define new variables is a
-useful thing regardless of whether you will be mutating them.  Here's a
-motivating example that shows how we could use these:</p>
-
-<div class="doc_code">
-<pre>
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-# Recursive fib, we could do this before.
-def fib(x)
-  if (x &lt; 3) then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-# Iterative fib.
-def fibi(x)
-  <b>var a = 1, b = 1, c in</b>
-  (for i = 3, i &lt; x in 
-     <b>c = a + b</b> :
-     <b>a = b</b> :
-     <b>b = c</b>) :
-  b;
-
-# Call it. 
-fibi(10);
-</pre>
-</div>
-
-<p>
-In order to mutate variables, we have to change our existing variables to use
-the "alloca trick".  Once we have that, we'll add our new operator, then extend
-Kaleidoscope to support new variable definitions.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="adjustments">Adjusting Existing Variables for Mutation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The symbol table in Kaleidoscope is managed at code generation time by the 
-'<tt>NamedValues</tt>' map.  This map currently keeps track of the LLVM "Value*"
-that holds the double value for the named variable.  In order to support
-mutation, we need to change this slightly, so that it <tt>NamedValues</tt> holds
-the <em>memory location</em> of the variable in question.  Note that this 
-change is a refactoring: it changes the structure of the code, but does not
-(by itself) change the behavior of the compiler.  All of these changes are 
-isolated in the Kaleidoscope code generator.</p>
-
-<p>
-At this point in Kaleidoscope's development, it only supports variables for two
-things: incoming arguments to functions and the induction variable of 'for'
-loops.  For consistency, we'll allow mutation of these variables in addition to
-other user-defined variables.  This means that these will both need memory
-locations.
-</p>
-
-<p>To start our transformation of Kaleidoscope, we'll change the NamedValues
-map so that it maps to AllocaInst* instead of Value*.  Once we do this, the C++ 
-compiler will tell us what parts of the code we need to update:</p>
-
-<div class="doc_code">
-<pre>
-static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
-</pre>
-</div>
-
-<p>Also, since we will need to create these alloca's, we'll use a helper
-function that ensures that the allocas are created in the entry block of the
-function:</p>
-
-<div class="doc_code">
-<pre>
-/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
-/// the function.  This is used for mutable variables etc.
-static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
-                                          const std::string &amp;VarName) {
-  IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
-                 TheFunction-&gt;getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
-                           VarName.c_str());
-}
-</pre>
-</div>
-
-<p>This funny looking code creates an IRBuilder object that is pointing at
-the first instruction (.begin()) of the entry block.  It then creates an alloca
-with the expected name and returns it.  Because all values in Kaleidoscope are
-doubles, there is no need to pass in a type to use.</p>
-
-<p>With this in place, the first functionality change we want to make is to
-variable references.  In our new scheme, variables live on the stack, so code
-generating a reference to them actually needs to produce a load from the stack
-slot:</p>
-
-<div class="doc_code">
-<pre>
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  if (V == 0) return ErrorV("Unknown variable name");
-
-  <b>// Load the value.
-  return Builder.CreateLoad(V, Name.c_str());</b>
-}
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  Now we need to update the
-things that define the variables to set up the alloca.  We'll start with 
-<tt>ForExprAST::Codegen</tt> (see the <a href="#code">full code listing</a> for
-the unabridged code):</p>
-
-<div class="doc_code">
-<pre>
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-
-  <b>// Create an alloca for the variable in the entry block.
-  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);</b>
-  
-    // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  <b>// Store the value into the alloca.
-  Builder.CreateStore(StartVal, Alloca);</b>
-  ...
-
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  <b>// Reload, increment, and restore the alloca.  This handles the case where
-  // the body of the loop mutates the variable.
-  Value *CurVar = Builder.CreateLoad(Alloca);
-  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
-  Builder.CreateStore(NextVar, Alloca);</b>
-  ...
-</pre>
-</div>
-
-<p>This code is virtually identical to the code <a 
-href="LangImpl5.html#forcodegen">before we allowed mutable variables</a>.  The
-big difference is that we no longer have to construct a PHI node, and we use
-load/store to access the variable as needed.</p>
-
-<p>To support mutable argument variables, we need to also make allocas for them.
-The code for this is also pretty simple:</p>
-
-<div class="doc_code">
-<pre>
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F-&gt;arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-</pre>
-</div>
-
-<p>For each argument, we make an alloca, store the input value to the function
-into the alloca, and register the alloca as the memory location for the
-argument.  This method gets invoked by <tt>FunctionAST::Codegen</tt> right after
-it sets up the entry block for the function.</p>
-
-<p>The final missing piece is adding the mem2reg pass, which allows us to get
-good codegen once again:</p>
-
-<div class="doc_code">
-<pre>
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-    <b>// Promote allocas to registers.
-    OurFPM.add(createPromoteMemoryToRegisterPass());</b>
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-</pre>
-</div>
-
-<p>It is interesting to see what the code looks like before and after the
-mem2reg optimization runs.  For example, this is the before/after code for our
-recursive fib function.  Before the optimization:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  <b>%x1 = alloca double
-  store double %x, double* %x1
-  %x2 = load double* %x1</b>
-  %cmptmp = fcmp ult double %x2, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:		; preds = %entry
-  br label %ifcont
-
-else:		; preds = %entry
-  <b>%x3 = load double* %x1</b>
-  %subtmp = fsub double %x3, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  <b>%x4 = load double* %x1</b>
-  %subtmp5 = fsub double %x4, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>Here there is only one variable (x, the input argument) but you can still
-see the extremely simple-minded code generation strategy we are using.  In the
-entry block, an alloca is created, and the initial input value is stored into
-it.  Each reference to the variable does a reload from the stack.  Also, note
-that we didn't modify the if/then/else expression, so it still inserts a PHI
-node.  While we could make an alloca for it, it is actually easier to create a 
-PHI node for it, so we still just make the PHI.</p>
-
-<p>Here is the code after the mem2reg pass runs:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double <b>%x</b>, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:
-  br label %ifcont
-
-else:
-  %subtmp = fsub double <b>%x</b>, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double <b>%x</b>, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>This is a trivial case for mem2reg, since there are no redefinitions of the
-variable.  The point of showing this is to calm your tension about inserting
-such blatent inefficiencies :).</p>
-
-<p>After the rest of the optimizers run, we get:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %else, label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  ret double %addtmp
-
-ifcont:
-  ret double 1.000000e+00
-}
-</pre>
-</div>
-
-<p>Here we see that the simplifycfg pass decided to clone the return instruction
-into the end of the 'else' block.  This allowed it to eliminate some branches
-and the PHI node.</p>
-
-<p>Now that all symbol table references are updated to use stack variables, 
-we'll add the assignment operator.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="assignment">New Assignment Operator</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With our current framework, adding a new assignment operator is really
-simple.  We will parse it just like any other binary operator, but handle it
-internally (instead of allowing the user to define it).  The first step is to
-set a precedence:</p>
-
-<div class="doc_code">
-<pre>
- int main() {
-   // Install standard binary operators.
-   // 1 is lowest precedence.
-   <b>BinopPrecedence['='] = 2;</b>
-   BinopPrecedence['&lt;'] = 10;
-   BinopPrecedence['+'] = 20;
-   BinopPrecedence['-'] = 20;
-</pre>
-</div>
-
-<p>Now that the parser knows the precedence of the binary operator, it takes
-care of all the parsing and AST generation.  We just need to implement codegen
-for the assignment operator.  This looks like:</p> 
-
-<div class="doc_code">
-<pre>
-Value *BinaryExprAST::Codegen() {
-  // Special case '=' because we don't want to emit the LHS as an expression.
-  if (Op == '=') {
-    // Assignment requires the LHS to be an identifier.
-    VariableExprAST *LHSE = dynamic_cast&lt;VariableExprAST*&gt;(LHS);
-    if (!LHSE)
-      return ErrorV("destination of '=' must be a variable");
-</pre>
-</div>
-
-<p>Unlike the rest of the binary operators, our assignment operator doesn't
-follow the "emit LHS, emit RHS, do computation" model.  As such, it is handled
-as a special case before the other binary operators are handled.  The other 
-strange thing is that it requires the LHS to be a variable.  It is invalid to
-have "(x+1) = expr" - only things like "x = expr" are allowed.
-</p>
-
-<div class="doc_code">
-<pre>
-    // Codegen the RHS.
-    Value *Val = RHS-&gt;Codegen();
-    if (Val == 0) return 0;
-
-    // Look up the name.
-    Value *Variable = NamedValues[LHSE-&gt;getName()];
-    if (Variable == 0) return ErrorV("Unknown variable name");
-
-    Builder.CreateStore(Val, Variable);
-    return Val;
-  }
-  ...  
-</pre>
-</div>
-
-<p>Once we have the variable, codegen'ing the assignment is straightforward:
-we emit the RHS of the assignment, create a store, and return the computed
-value.  Returning a value allows for chained assignments like "X = (Y = Z)".</p>
-
-<p>Now that we have an assignment operator, we can mutate loop variables and
-arguments.  For example, we can now run code like this:</p>
-
-<div class="doc_code">
-<pre>
-# Function to print a double.
-extern printd(x);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-def test(x)
-  printd(x) :
-  x = 4 :
-  printd(x);
-
-test(123);
-</pre>
-</div>
-
-<p>When run, this example prints "123" and then "4", showing that we did
-actually mutate the value!  Okay, we have now officially implemented our goal:
-getting this to work requires SSA construction in the general case.  However,
-to be really useful, we want the ability to define our own local variables, lets
-add this next! 
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="localvars">User-defined Local Variables</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding var/in is just like any other other extensions we made to 
-Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
-The first step for adding our new 'var/in' construct is to extend the lexer.
-As before, this is pretty trivial, the code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-enum Token {
-  ...
-  <b>// var definition
-  tok_var = -13</b>
-...
-}
-...
-static int gettok() {
-...
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    <b>if (IdentifierStr == "var") return tok_var;</b>
-    return tok_identifier;
-...
-</pre>
-</div>
-
-<p>The next step is to define the AST node that we will construct.  For var/in,
-it looks like this:</p>
-
-<div class="doc_code">
-<pre>
-/// VarExprAST - Expression class for var/in
-class VarExprAST : public ExprAST {
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-  ExprAST *Body;
-public:
-  VarExprAST(const std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; &amp;varnames,
-             ExprAST *body)
-  : VarNames(varnames), Body(body) {}
-  
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-<p>var/in allows a list of names to be defined all at once, and each name can
-optionally have an initializer value.  As such, we capture this information in
-the VarNames vector.  Also, var/in has a body, this body is allowed to access
-the variables defined by the var/in.</p>
-
-<p>With this in place, we can define the parser pieces.  The first thing we do is add
-it as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-<b>///   ::= varexpr</b>
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  <b>case tok_var:        return ParseVarExpr();</b>
-  }
-}
-</pre>
-</div>
-
-<p>Next we define ParseVarExpr:</p>
-
-<div class="doc_code">
-<pre>
-/// varexpr ::= 'var' identifier ('=' expression)? 
-//                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
-  getNextToken();  // eat the var.
-
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-
-  // At least one variable name is required.
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after var");
-</pre>
-</div>
-
-<p>The first part of this code parses the list of identifier/expr pairs into the
-local <tt>VarNames</tt> vector.  
-
-<div class="doc_code">
-<pre>
-  while (1) {
-    std::string Name = IdentifierStr;
-    getNextToken();  // eat identifier.
-
-    // Read the optional initializer.
-    ExprAST *Init = 0;
-    if (CurTok == '=') {
-      getNextToken(); // eat the '='.
-      
-      Init = ParseExpression();
-      if (Init == 0) return 0;
-    }
-    
-    VarNames.push_back(std::make_pair(Name, Init));
-    
-    // End of var list, exit loop.
-    if (CurTok != ',') break;
-    getNextToken(); // eat the ','.
-    
-    if (CurTok != tok_identifier)
-      return Error("expected identifier list after var");
-  }
-</pre>
-</div>
-
-<p>Once all the variables are parsed, we then parse the body and create the
-AST node:</p>
-
-<div class="doc_code">
-<pre>
-  // At this point, we have to have 'in'.
-  if (CurTok != tok_in)
-    return Error("expected 'in' keyword after 'var'");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-  
-  return new VarExprAST(VarNames, Body);
-}
-</pre>
-</div>
-
-<p>Now that we can parse and represent the code, we need to support emission of
-LLVM IR for it.  This code starts out with:</p>
-
-<div class="doc_code">
-<pre>
-Value *VarExprAST::Codegen() {
-  std::vector&lt;AllocaInst *&gt; OldBindings;
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-
-  // Register all variables and emit their initializer.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
-    const std::string &amp;VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
-</pre>
-</div>
-
-<p>Basically it loops over all the variables, installing them one at a time.
-For each variable we put into the symbol table, we remember the previous value
-that we replace in OldBindings.</p>
-
-<div class="doc_code">
-<pre>
-    // Emit the initializer before adding the variable to scope, this prevents
-    // the initializer from referencing the variable itself, and permits stuff
-    // like this:
-    //  var a = 1 in
-    //    var a = a in ...   # refers to outer 'a'.
-    Value *InitVal;
-    if (Init) {
-      InitVal = Init-&gt;Codegen();
-      if (InitVal == 0) return 0;
-    } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    }
-    
-    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-    Builder.CreateStore(InitVal, Alloca);
-
-    // Remember the old variable binding so that we can restore the binding when
-    // we unrecurse.
-    OldBindings.push_back(NamedValues[VarName]);
-    
-    // Remember this binding.
-    NamedValues[VarName] = Alloca;
-  }
-</pre>
-</div>
-
-<p>There are more comments here than code.  The basic idea is that we emit the
-initializer, create the alloca, then update the symbol table to point to it.
-Once all the variables are installed in the symbol table, we evaluate the body
-of the var/in expression:</p>
-
-<div class="doc_code">
-<pre>
-  // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body-&gt;Codegen();
-  if (BodyVal == 0) return 0;
-</pre>
-</div>
-
-<p>Finally, before returning, we restore the previous variable bindings:</p>
-
-<div class="doc_code">
-<pre>
-  // Pop all our variables from scope.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
-    NamedValues[VarNames[i].first] = OldBindings[i];
-
-  // Return the body computation.
-  return BodyVal;
-}
-</pre>
-</div>
-
-<p>The end result of all of this is that we get properly scoped variable 
-definitions, and we even (trivially) allow mutation of them :).</p>
-
-<p>With this, we completed what we set out to do.  Our nice iterative fib
-example from the intro compiles and runs just fine.  The mem2reg pass optimizes
-all of our stack variables into SSA registers, inserting PHI nodes where needed,
-and our front-end remains simple: no "iterated dominance frontier" computation
-anywhere in sight.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with mutable
-variables and var/in support.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10,
-  
-  // operators
-  tok_binary = -11, tok_unary = -12,
-  
-  // var definition
-  tok_var = -13
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    if (IdentifierStr == "var") return tok_var;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  const std::string &amp;getName() const { return Name; }
-  virtual Value *Codegen();
-};
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// VarExprAST - Expression class for var/in
-class VarExprAST : public ExprAST {
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-  ExprAST *Body;
-public:
-  VarExprAST(const std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; &amp;varnames,
-             ExprAST *body)
-  : VarNames(varnames), Body(body) {}
-  
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes), as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-  
-  void CreateArgumentAllocas(Function *F);
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// varexpr ::= 'var' identifier ('=' expression)? 
-//                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
-  getNextToken();  // eat the var.
-
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-
-  // At least one variable name is required.
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after var");
-  
-  while (1) {
-    std::string Name = IdentifierStr;
-    getNextToken();  // eat identifier.
-
-    // Read the optional initializer.
-    ExprAST *Init = 0;
-    if (CurTok == '=') {
-      getNextToken(); // eat the '='.
-      
-      Init = ParseExpression();
-      if (Init == 0) return 0;
-    }
-    
-    VarNames.push_back(std::make_pair(Name, Init));
-    
-    // End of var list, exit loop.
-    if (CurTok != ',') break;
-    getNextToken(); // eat the ','.
-    
-    if (CurTok != tok_identifier)
-      return Error("expected identifier list after var");
-  }
-  
-  // At this point, we have to have 'in'.
-  if (CurTok != tok_in)
-    return Error("expected 'in' keyword after 'var'");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-  
-  return new VarExprAST(VarNames, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-///   ::= varexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  case tok_var:        return ParseVarExpr();
-  }
-}
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal &lt; 1 || NumVal &gt; 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind &amp;&amp; ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
-/// the function.  This is used for mutable variables etc.
-static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
-                                          const std::string &amp;VarName) {
-  IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
-                 TheFunction-&gt;getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
-                           VarName.c_str());
-}
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  if (V == 0) return ErrorV("Unknown variable name");
-
-  // Load the value.
-  return Builder.CreateLoad(V, Name.c_str());
-}
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand-&gt;Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule-&gt;getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-Value *BinaryExprAST::Codegen() {
-  // Special case '=' because we don't want to emit the LHS as an expression.
-  if (Op == '=') {
-    // Assignment requires the LHS to be an identifier.
-    VariableExprAST *LHSE = dynamic_cast&lt;VariableExprAST*&gt;(LHS);
-    if (!LHSE)
-      return ErrorV("destination of '=' must be a variable");
-    // Codegen the RHS.
-    Value *Val = RHS-&gt;Codegen();
-    if (Val == 0) return 0;
-
-    // Look up the name.
-    Value *Variable = NamedValues[LHSE-&gt;getName()];
-    if (Variable == 0) return ErrorV("Unknown variable name");
-
-    Builder.CreateStore(Val, Variable);
-    return Val;
-  }
-  
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
-  assert(F &amp;&amp; "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN-&gt;addIncoming(ThenV, ThenBB);
-  PN-&gt;addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   var = alloca double
-  //   ...
-  //   start = startexpr
-  //   store start -&gt; var
-  //   goto loop
-  // loop: 
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   endcond = endexpr
-  //
-  //   curvar = load var
-  //   nextvar = curvar + step
-  //   store nextvar -&gt; var
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-
-  // Create an alloca for the variable in the entry block.
-  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Store the value into the alloca.
-  Builder.CreateStore(StartVal, Alloca);
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  AllocaInst *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Alloca;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Reload, increment, and restore the alloca.  This handles the case where
-  // the body of the loop mutates the variable.
-  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
-  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
-  Builder.CreateStore(NextVar, Alloca);
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Value *VarExprAST::Codegen() {
-  std::vector&lt;AllocaInst *&gt; OldBindings;
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-
-  // Register all variables and emit their initializer.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
-    const std::string &amp;VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
-    
-    // Emit the initializer before adding the variable to scope, this prevents
-    // the initializer from referencing the variable itself, and permits stuff
-    // like this:
-    //  var a = 1 in
-    //    var a = a in ...   # refers to outer 'a'.
-    Value *InitVal;
-    if (Init) {
-      InitVal = Init-&gt;Codegen();
-      if (InitVal == 0) return 0;
-    } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    }
-    
-    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-    Builder.CreateStore(InitVal, Alloca);
-
-    // Remember the old variable binding so that we can restore the binding when
-    // we unrecurse.
-    OldBindings.push_back(NamedValues[VarName]);
-    
-    // Remember this binding.
-    NamedValues[VarName] = Alloca;
-  }
-  
-  // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body-&gt;Codegen();
-  if (BodyVal == 0) return 0;
-  
-  // Pop all our variables from scope.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
-    NamedValues[VarNames[i].first] = OldBindings[i];
-
-  // Return the body computation.
-  return BodyVal;
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx)
-    AI-&gt;setName(Args[Idx]);
-    
-  return F;
-}
-
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F-&gt;arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  // Add all arguments to the symbol table and create their allocas.
-  Proto-&gt;CreateArgumentAllocas(TheFunction);
-
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence.erase(Proto-&gt;getOperatorName());
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
-double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['='] = 2;
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Promote allocas to registers.
-  OurFPM.add(createPromoteMemoryToRegisterPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst
new file mode 100644
index 000000000000..6dde2fe41d1a
--- /dev/null
+++ b/docs/tutorial/LangImpl7.rst
@@ -0,0 +1,2003 @@
+=======================================================
+Kaleidoscope: Extending the Language: Mutable Variables
+=======================================================
+
+.. contents::
+   :local:
+
+Chapter 7 Introduction
+======================
+
+Welcome to Chapter 7 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In chapters 1 through 6, we've built a
+very respectable, albeit simple, `functional programming
+language <http://en.wikipedia.org/wiki/Functional_programming>`_. In our
+journey, we learned some parsing techniques, how to build and represent
+an AST, how to build LLVM IR, and how to optimize the resultant code as
+well as JIT compile it.
+
+While Kaleidoscope is interesting as a functional language, the fact
+that it is functional makes it "too easy" to generate LLVM IR for it. In
+particular, a functional language makes it very easy to build LLVM IR
+directly in `SSA
+form <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+Since LLVM requires that the input code be in SSA form, this is a very
+nice property and it is often unclear to newcomers how to generate code
+for an imperative language with mutable variables.
+
+The short (and happy) summary of this chapter is that there is no need
+for your front-end to build SSA form: LLVM provides highly tuned and
+well tested support for this, though the way it works is a bit
+unexpected for some.
+
+Why is this a hard problem?
+===========================
+
+To understand why mutable variables cause complexities in SSA
+construction, consider this extremely simple C example:
+
+.. code-block:: c
+
+    int G, H;
+    int test(_Bool Condition) {
+      int X;
+      if (Condition)
+        X = G;
+      else
+        X = H;
+      return X;
+    }
+
+In this case, we have the variable "X", whose value depends on the path
+executed in the program. Because there are two different possible values
+for X before the return instruction, a PHI node is inserted to merge the
+two values. The LLVM IR that we want for this example looks like this:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.2
+    }
+
+In this example, the loads from the G and H global variables are
+explicit in the LLVM IR, and they live in the then/else branches of the
+if statement (cond\_true/cond\_false). In order to merge the incoming
+values, the X.2 phi node in the cond\_next block selects the right value
+to use based on where control flow is coming from: if control flow comes
+from the cond\_false block, X.2 gets the value of X.1. Alternatively, if
+control flow comes from cond\_true, it gets the value of X.0. The intent
+of this chapter is not to explain the details of SSA form. For more
+information, see one of the many `online
+references <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+
+The question for this article is "who places the phi nodes when lowering
+assignments to mutable variables?". The issue here is that LLVM
+*requires* that its IR be in SSA form: there is no "non-ssa" mode for
+it. However, SSA construction requires non-trivial algorithms and data
+structures, so it is inconvenient and wasteful for every front-end to
+have to reproduce this logic.
+
+Memory in LLVM
+==============
+
+The 'trick' here is that while LLVM does require all register values to
+be in SSA form, it does not require (or permit) memory objects to be in
+SSA form. In the example above, note that the loads from G and H are
+direct accesses to G and H: they are not renamed or versioned. This
+differs from some other compiler systems, which do try to version memory
+objects. In LLVM, instead of encoding dataflow analysis of memory into
+the LLVM IR, it is handled with `Analysis
+Passes <../WritingAnLLVMPass.html>`_ which are computed on demand.
+
+With this in mind, the high-level idea is that we want to make a stack
+variable (which lives in memory, because it is on the stack) for each
+mutable object in a function. To take advantage of this trick, we need
+to talk about how LLVM represents stack variables.
+
+In LLVM, all memory accesses are explicit with load/store instructions,
+and it is carefully designed not to have (or need) an "address-of"
+operator. Notice how the type of the @G/@H global variables is actually
+"i32\*" even though the variable is defined as "i32". What this means is
+that @G defines *space* for an i32 in the global data area, but its
+*name* actually refers to the address for that space. Stack variables
+work the same way, except that instead of being declared with global
+variable definitions, they are declared with the `LLVM alloca
+instruction <../LangRef.html#i_alloca>`_:
+
+.. code-block:: llvm
+
+    define i32 @example() {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      ...
+      %tmp = load i32* %X       ; load the stack value %X from the stack.
+      %tmp2 = add i32 %tmp, 1   ; increment it
+      store i32 %tmp2, i32* %X  ; store it back
+      ...
+
+This code shows an example of how you can declare and manipulate a stack
+variable in the LLVM IR. Stack memory allocated with the alloca
+instruction is fully general: you can pass the address of the stack slot
+to functions, you can store it in other variables, etc. In our example
+above, we could rewrite the example to use the alloca technique to avoid
+using a PHI node:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      store i32 %X.0, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      store i32 %X.1, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_next:
+      %X.2 = load i32* %X       ; Read X
+      ret i32 %X.2
+    }
+
+With this, we have discovered a way to handle arbitrary mutable
+variables without the need to create Phi nodes at all:
+
+#. Each mutable variable becomes a stack allocation.
+#. Each read of the variable becomes a load from the stack.
+#. Each update of the variable becomes a store to the stack.
+#. Taking the address of a variable just uses the stack address
+   directly.
+
+While this solution has solved our immediate problem, it introduced
+another one: we have now apparently introduced a lot of stack traffic
+for very simple and common operations, a major performance problem.
+Fortunately for us, the LLVM optimizer has a highly-tuned optimization
+pass named "mem2reg" that handles this case, promoting allocas like this
+into SSA registers, inserting Phi nodes as appropriate. If you run this
+example through the pass, for example, you'll get:
+
+.. code-block:: bash
+
+    $ llvm-as < example.ll | opt -mem2reg | llvm-dis
+    @G = weak global i32 0
+    @H = weak global i32 0
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.01
+    }
+
+The mem2reg pass implements the standard "iterated dominance frontier"
+algorithm for constructing SSA form and has a number of optimizations
+that speed up (very common) degenerate cases. The mem2reg optimization
+pass is the answer to dealing with mutable variables, and we highly
+recommend that you depend on it. Note that mem2reg only works on
+variables in certain circumstances:
+
+#. mem2reg is alloca-driven: it looks for allocas and if it can handle
+   them, it promotes them. It does not apply to global variables or heap
+   allocations.
+#. mem2reg only looks for alloca instructions in the entry block of the
+   function. Being in the entry block guarantees that the alloca is only
+   executed once, which makes analysis simpler.
+#. mem2reg only promotes allocas whose uses are direct loads and stores.
+   If the address of the stack object is passed to a function, or if any
+   funny pointer arithmetic is involved, the alloca will not be
+   promoted.
+#. mem2reg only works on allocas of `first
+   class <../LangRef.html#t_classifications>`_ values (such as pointers,
+   scalars and vectors), and only if the array size of the allocation is
+   1 (or missing in the .ll file). mem2reg is not capable of promoting
+   structs or arrays to registers. Note that the "scalarrepl" pass is
+   more powerful and can promote structs, "unions", and arrays in many
+   cases.
+
+All of these properties are easy to satisfy for most imperative
+languages, and we'll illustrate it below with Kaleidoscope. The final
+question you may be asking is: should I bother with this nonsense for my
+front-end? Wouldn't it be better if I just did SSA construction
+directly, avoiding use of the mem2reg optimization pass? In short, we
+strongly recommend that you use this technique for building SSA form,
+unless there is an extremely good reason not to. Using this technique
+is:
+
+-  Proven and well tested: llvm-gcc and clang both use this technique
+   for local mutable variables. As such, the most common clients of LLVM
+   are using this to handle a bulk of their variables. You can be sure
+   that bugs are found fast and fixed early.
+-  Extremely Fast: mem2reg has a number of special cases that make it
+   fast in common cases as well as fully general. For example, it has
+   fast-paths for variables that are only used in a single block,
+   variables that only have one assignment point, good heuristics to
+   avoid insertion of unneeded phi nodes, etc.
+-  Needed for debug info generation: `Debug information in
+   LLVM <../SourceLevelDebugging.html>`_ relies on having the address of
+   the variable exposed so that debug info can be attached to it. This
+   technique dovetails very naturally with this style of debug info.
+
+If nothing else, this makes it much easier to get your front-end up and
+running, and is very simple to implement. Lets extend Kaleidoscope with
+mutable variables now!
+
+Mutable Variables in Kaleidoscope
+=================================
+
+Now that we know the sort of problem we want to tackle, lets see what
+this looks like in the context of our little Kaleidoscope language.
+We're going to add two features:
+
+#. The ability to mutate variables with the '=' operator.
+#. The ability to define new variables.
+
+While the first item is really what this is about, we only have
+variables for incoming arguments as well as for induction variables, and
+redefining those only goes so far :). Also, the ability to define new
+variables is a useful thing regardless of whether you will be mutating
+them. Here's a motivating example that shows how we could use these:
+
+::
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    # Recursive fib, we could do this before.
+    def fib(x)
+      if (x < 3) then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+    # Iterative fib.
+    def fibi(x)
+      var a = 1, b = 1, c in
+      (for i = 3, i < x in
+         c = a + b :
+         a = b :
+         b = c) :
+      b;
+
+    # Call it.
+    fibi(10);
+
+In order to mutate variables, we have to change our existing variables
+to use the "alloca trick". Once we have that, we'll add our new
+operator, then extend Kaleidoscope to support new variable definitions.
+
+Adjusting Existing Variables for Mutation
+=========================================
+
+The symbol table in Kaleidoscope is managed at code generation time by
+the '``NamedValues``' map. This map currently keeps track of the LLVM
+"Value\*" that holds the double value for the named variable. In order
+to support mutation, we need to change this slightly, so that it
+``NamedValues`` holds the *memory location* of the variable in question.
+Note that this change is a refactoring: it changes the structure of the
+code, but does not (by itself) change the behavior of the compiler. All
+of these changes are isolated in the Kaleidoscope code generator.
+
+At this point in Kaleidoscope's development, it only supports variables
+for two things: incoming arguments to functions and the induction
+variable of 'for' loops. For consistency, we'll allow mutation of these
+variables in addition to other user-defined variables. This means that
+these will both need memory locations.
+
+To start our transformation of Kaleidoscope, we'll change the
+NamedValues map so that it maps to AllocaInst\* instead of Value\*. Once
+we do this, the C++ compiler will tell us what parts of the code we need
+to update:
+
+.. code-block:: c++
+
+    static std::map<std::string, AllocaInst*> NamedValues;
+
+Also, since we will need to create these alloca's, we'll use a helper
+function that ensures that the allocas are created in the entry block of
+the function:
+
+.. code-block:: c++
+
+    /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+    /// the function.  This is used for mutable variables etc.
+    static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                              const std::string &VarName) {
+      IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                     TheFunction->getEntryBlock().begin());
+      return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                               VarName.c_str());
+    }
+
+This funny looking code creates an IRBuilder object that is pointing at
+the first instruction (.begin()) of the entry block. It then creates an
+alloca with the expected name and returns it. Because all values in
+Kaleidoscope are doubles, there is no need to pass in a type to use.
+
+With this in place, the first functionality change we want to make is to
+variable references. In our new scheme, variables live on the stack, so
+code generating a reference to them actually needs to produce a load
+from the stack slot:
+
+.. code-block:: c++
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      if (V == 0) return ErrorV("Unknown variable name");
+
+      // Load the value.
+      return Builder.CreateLoad(V, Name.c_str());
+    }
+
+As you can see, this is pretty straightforward. Now we need to update
+the things that define the variables to set up the alloca. We'll start
+with ``ForExprAST::Codegen`` (see the `full code listing <#code>`_ for
+the unabridged code):
+
+.. code-block:: c++
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create an alloca for the variable in the entry block.
+      AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+        // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Store the value into the alloca.
+      Builder.CreateStore(StartVal, Alloca);
+      ...
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Reload, increment, and restore the alloca.  This handles the case where
+      // the body of the loop mutates the variable.
+      Value *CurVar = Builder.CreateLoad(Alloca);
+      Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+      Builder.CreateStore(NextVar, Alloca);
+      ...
+
+This code is virtually identical to the code `before we allowed mutable
+variables <LangImpl5.html#forcodegen>`_. The big difference is that we
+no longer have to construct a PHI node, and we use load/store to access
+the variable as needed.
+
+To support mutable argument variables, we need to also make allocas for
+them. The code for this is also pretty simple:
+
+.. code-block:: c++
+
+    /// CreateArgumentAllocas - Create an alloca for each argument and register the
+    /// argument in the symbol table so that references to it will succeed.
+    void PrototypeAST::CreateArgumentAllocas(Function *F) {
+      Function::arg_iterator AI = F->arg_begin();
+      for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+        // Create an alloca for this variable.
+        AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+        // Store the initial value into the alloca.
+        Builder.CreateStore(AI, Alloca);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = Alloca;
+      }
+    }
+
+For each argument, we make an alloca, store the input value to the
+function into the alloca, and register the alloca as the memory location
+for the argument. This method gets invoked by ``FunctionAST::Codegen``
+right after it sets up the entry block for the function.
+
+The final missing piece is adding the mem2reg pass, which allows us to
+get good codegen once again:
+
+.. code-block:: c++
+
+        // Set up the optimizer pipeline.  Start with registering info about how the
+        // target lays out data structures.
+        OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+        // Promote allocas to registers.
+        OurFPM.add(createPromoteMemoryToRegisterPass());
+        // Do simple "peephole" optimizations and bit-twiddling optzns.
+        OurFPM.add(createInstructionCombiningPass());
+        // Reassociate expressions.
+        OurFPM.add(createReassociatePass());
+
+It is interesting to see what the code looks like before and after the
+mem2reg optimization runs. For example, this is the before/after code
+for our recursive fib function. Before the optimization:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %x1 = alloca double
+      store double %x, double* %x1
+      %x2 = load double* %x1
+      %cmptmp = fcmp ult double %x2, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:       ; preds = %entry
+      br label %ifcont
+
+    else:       ; preds = %entry
+      %x3 = load double* %x1
+      %subtmp = fsub double %x3, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %x4 = load double* %x1
+      %subtmp5 = fsub double %x4, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:     ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+Here there is only one variable (x, the input argument) but you can
+still see the extremely simple-minded code generation strategy we are
+using. In the entry block, an alloca is created, and the initial input
+value is stored into it. Each reference to the variable does a reload
+from the stack. Also, note that we didn't modify the if/then/else
+expression, so it still inserts a PHI node. While we could make an
+alloca for it, it is actually easier to create a PHI node for it, so we
+still just make the PHI.
+
+Here is the code after the mem2reg pass runs:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:
+      br label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:     ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+This is a trivial case for mem2reg, since there are no redefinitions of
+the variable. The point of showing this is to calm your tension about
+inserting such blatent inefficiencies :).
+
+After the rest of the optimizers run, we get:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp ueq double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %else, label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      ret double %addtmp
+
+    ifcont:
+      ret double 1.000000e+00
+    }
+
+Here we see that the simplifycfg pass decided to clone the return
+instruction into the end of the 'else' block. This allowed it to
+eliminate some branches and the PHI node.
+
+Now that all symbol table references are updated to use stack variables,
+we'll add the assignment operator.
+
+New Assignment Operator
+=======================
+
+With our current framework, adding a new assignment operator is really
+simple. We will parse it just like any other binary operator, but handle
+it internally (instead of allowing the user to define it). The first
+step is to set a precedence:
+
+.. code-block:: c++
+
+     int main() {
+       // Install standard binary operators.
+       // 1 is lowest precedence.
+       BinopPrecedence['='] = 2;
+       BinopPrecedence['<'] = 10;
+       BinopPrecedence['+'] = 20;
+       BinopPrecedence['-'] = 20;
+
+Now that the parser knows the precedence of the binary operator, it
+takes care of all the parsing and AST generation. We just need to
+implement codegen for the assignment operator. This looks like:
+
+.. code-block:: c++
+
+    Value *BinaryExprAST::Codegen() {
+      // Special case '=' because we don't want to emit the LHS as an expression.
+      if (Op == '=') {
+        // Assignment requires the LHS to be an identifier.
+        VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+        if (!LHSE)
+          return ErrorV("destination of '=' must be a variable");
+
+Unlike the rest of the binary operators, our assignment operator doesn't
+follow the "emit LHS, emit RHS, do computation" model. As such, it is
+handled as a special case before the other binary operators are handled.
+The other strange thing is that it requires the LHS to be a variable. It
+is invalid to have "(x+1) = expr" - only things like "x = expr" are
+allowed.
+
+.. code-block:: c++
+
+        // Codegen the RHS.
+        Value *Val = RHS->Codegen();
+        if (Val == 0) return 0;
+
+        // Look up the name.
+        Value *Variable = NamedValues[LHSE->getName()];
+        if (Variable == 0) return ErrorV("Unknown variable name");
+
+        Builder.CreateStore(Val, Variable);
+        return Val;
+      }
+      ...
+
+Once we have the variable, codegen'ing the assignment is
+straightforward: we emit the RHS of the assignment, create a store, and
+return the computed value. Returning a value allows for chained
+assignments like "X = (Y = Z)".
+
+Now that we have an assignment operator, we can mutate loop variables
+and arguments. For example, we can now run code like this:
+
+::
+
+    # Function to print a double.
+    extern printd(x);
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    def test(x)
+      printd(x) :
+      x = 4 :
+      printd(x);
+
+    test(123);
+
+When run, this example prints "123" and then "4", showing that we did
+actually mutate the value! Okay, we have now officially implemented our
+goal: getting this to work requires SSA construction in the general
+case. However, to be really useful, we want the ability to define our
+own local variables, lets add this next!
+
+User-defined Local Variables
+============================
+
+Adding var/in is just like any other other extensions we made to
+Kaleidoscope: we extend the lexer, the parser, the AST and the code
+generator. The first step for adding our new 'var/in' construct is to
+extend the lexer. As before, this is pretty trivial, the code looks like
+this:
+
+.. code-block:: c++
+
+    enum Token {
+      ...
+      // var definition
+      tok_var = -13
+    ...
+    }
+    ...
+    static int gettok() {
+    ...
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        if (IdentifierStr == "var") return tok_var;
+        return tok_identifier;
+    ...
+
+The next step is to define the AST node that we will construct. For
+var/in, it looks like this:
+
+.. code-block:: c++
+
+    /// VarExprAST - Expression class for var/in
+    class VarExprAST : public ExprAST {
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+      ExprAST *Body;
+    public:
+      VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
+                 ExprAST *body)
+      : VarNames(varnames), Body(body) {}
+
+      virtual Value *Codegen();
+    };
+
+var/in allows a list of names to be defined all at once, and each name
+can optionally have an initializer value. As such, we capture this
+information in the VarNames vector. Also, var/in has a body, this body
+is allowed to access the variables defined by the var/in.
+
+With this in place, we can define the parser pieces. The first thing we
+do is add it as a primary expression:
+
+.. code-block:: c++
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    ///   ::= varexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      case tok_var:        return ParseVarExpr();
+      }
+    }
+
+Next we define ParseVarExpr:
+
+.. code-block:: c++
+
+    /// varexpr ::= 'var' identifier ('=' expression)?
+    //                    (',' identifier ('=' expression)?)* 'in' expression
+    static ExprAST *ParseVarExpr() {
+      getNextToken();  // eat the var.
+
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+
+      // At least one variable name is required.
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after var");
+
+The first part of this code parses the list of identifier/expr pairs
+into the local ``VarNames`` vector.
+
+.. code-block:: c++
+
+      while (1) {
+        std::string Name = IdentifierStr;
+        getNextToken();  // eat identifier.
+
+        // Read the optional initializer.
+        ExprAST *Init = 0;
+        if (CurTok == '=') {
+          getNextToken(); // eat the '='.
+
+          Init = ParseExpression();
+          if (Init == 0) return 0;
+        }
+
+        VarNames.push_back(std::make_pair(Name, Init));
+
+        // End of var list, exit loop.
+        if (CurTok != ',') break;
+        getNextToken(); // eat the ','.
+
+        if (CurTok != tok_identifier)
+          return Error("expected identifier list after var");
+      }
+
+Once all the variables are parsed, we then parse the body and create the
+AST node:
+
+.. code-block:: c++
+
+      // At this point, we have to have 'in'.
+      if (CurTok != tok_in)
+        return Error("expected 'in' keyword after 'var'");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new VarExprAST(VarNames, Body);
+    }
+
+Now that we can parse and represent the code, we need to support
+emission of LLVM IR for it. This code starts out with:
+
+.. code-block:: c++
+
+    Value *VarExprAST::Codegen() {
+      std::vector<AllocaInst *> OldBindings;
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Register all variables and emit their initializer.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+        const std::string &VarName = VarNames[i].first;
+        ExprAST *Init = VarNames[i].second;
+
+Basically it loops over all the variables, installing them one at a
+time. For each variable we put into the symbol table, we remember the
+previous value that we replace in OldBindings.
+
+.. code-block:: c++
+
+        // Emit the initializer before adding the variable to scope, this prevents
+        // the initializer from referencing the variable itself, and permits stuff
+        // like this:
+        //  var a = 1 in
+        //    var a = a in ...   # refers to outer 'a'.
+        Value *InitVal;
+        if (Init) {
+          InitVal = Init->Codegen();
+          if (InitVal == 0) return 0;
+        } else { // If not specified, use 0.0.
+          InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+        }
+
+        AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+        Builder.CreateStore(InitVal, Alloca);
+
+        // Remember the old variable binding so that we can restore the binding when
+        // we unrecurse.
+        OldBindings.push_back(NamedValues[VarName]);
+
+        // Remember this binding.
+        NamedValues[VarName] = Alloca;
+      }
+
+There are more comments here than code. The basic idea is that we emit
+the initializer, create the alloca, then update the symbol table to
+point to it. Once all the variables are installed in the symbol table,
+we evaluate the body of the var/in expression:
+
+.. code-block:: c++
+
+      // Codegen the body, now that all vars are in scope.
+      Value *BodyVal = Body->Codegen();
+      if (BodyVal == 0) return 0;
+
+Finally, before returning, we restore the previous variable bindings:
+
+.. code-block:: c++
+
+      // Pop all our variables from scope.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+        NamedValues[VarNames[i].first] = OldBindings[i];
+
+      // Return the body computation.
+      return BodyVal;
+    }
+
+The end result of all of this is that we get properly scoped variable
+definitions, and we even (trivially) allow mutation of them :).
+
+With this, we completed what we set out to do. Our nice iterative fib
+example from the intro compiles and runs just fine. The mem2reg pass
+optimizes all of our stack variables into SSA registers, inserting PHI
+nodes where needed, and our front-end remains simple: no "iterated
+dominance frontier" computation anywhere in sight.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+mutable variables and var/in support. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10,
+
+      // operators
+      tok_binary = -11, tok_unary = -12,
+
+      // var definition
+      tok_var = -13
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        if (IdentifierStr == "var") return tok_var;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      const std::string &getName() const { return Name; }
+      virtual Value *Codegen();
+    };
+
+    /// UnaryExprAST - Expression class for a unary operator.
+    class UnaryExprAST : public ExprAST {
+      char Opcode;
+      ExprAST *Operand;
+    public:
+      UnaryExprAST(char opcode, ExprAST *operand)
+        : Opcode(opcode), Operand(operand) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+      : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+    /// VarExprAST - Expression class for var/in
+    class VarExprAST : public ExprAST {
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+      ExprAST *Body;
+    public:
+      VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
+                 ExprAST *body)
+      : VarNames(varnames), Body(body) {}
+
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes), as well as if it is an operator.
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+      bool isOperator;
+      unsigned Precedence;  // Precedence if a binary op.
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+                   bool isoperator = false, unsigned prec = 0)
+      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+
+      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+
+      char getOperatorName() const {
+        assert(isUnaryOp() || isBinaryOp());
+        return Name[Name.size()-1];
+      }
+
+      unsigned getBinaryPrecedence() const { return Precedence; }
+
+      Function *Codegen();
+
+      void CreateArgumentAllocas(Function *F);
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+    /// varexpr ::= 'var' identifier ('=' expression)?
+    //                    (',' identifier ('=' expression)?)* 'in' expression
+    static ExprAST *ParseVarExpr() {
+      getNextToken();  // eat the var.
+
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+
+      // At least one variable name is required.
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after var");
+
+      while (1) {
+        std::string Name = IdentifierStr;
+        getNextToken();  // eat identifier.
+
+        // Read the optional initializer.
+        ExprAST *Init = 0;
+        if (CurTok == '=') {
+          getNextToken(); // eat the '='.
+
+          Init = ParseExpression();
+          if (Init == 0) return 0;
+        }
+
+        VarNames.push_back(std::make_pair(Name, Init));
+
+        // End of var list, exit loop.
+        if (CurTok != ',') break;
+        getNextToken(); // eat the ','.
+
+        if (CurTok != tok_identifier)
+          return Error("expected identifier list after var");
+      }
+
+      // At this point, we have to have 'in'.
+      if (CurTok != tok_in)
+        return Error("expected 'in' keyword after 'var'");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new VarExprAST(VarNames, Body);
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    ///   ::= varexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      case tok_var:        return ParseVarExpr();
+      }
+    }
+
+    /// unary
+    ///   ::= primary
+    ///   ::= '!' unary
+    static ExprAST *ParseUnary() {
+      // If the current token is not an operator, it must be a primary expr.
+      if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+        return ParsePrimary();
+
+      // If this is a unary operator, read it.
+      int Opc = CurTok;
+      getNextToken();
+      if (ExprAST *Operand = ParseUnary())
+        return new UnaryExprAST(Opc, Operand);
+      return 0;
+    }
+
+    /// binoprhs
+    ///   ::= ('+' unary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the unary expression after the binary operator.
+        ExprAST *RHS = ParseUnary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= unary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParseUnary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    ///   ::= unary LETTER (id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_unary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected unary operator");
+        FnName = "unary";
+        FnName += (char)CurTok;
+        Kind = 1;
+        getNextToken();
+        break;
+      case tok_binary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected binary operator");
+        FnName = "binary";
+        FnName += (char)CurTok;
+        Kind = 2;
+        getNextToken();
+
+        // Read the precedence if present.
+        if (CurTok == tok_number) {
+          if (NumVal < 1 || NumVal > 100)
+            return ErrorP("Invalid precedecnce: must be 1..100");
+          BinaryPrecedence = (unsigned)NumVal;
+          getNextToken();
+        }
+        break;
+      }
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      // Verify right number of names for operator.
+      if (Kind && ArgNames.size() != Kind)
+        return ErrorP("Invalid number of operands for operator");
+
+      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, AllocaInst*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+    /// the function.  This is used for mutable variables etc.
+    static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                              const std::string &VarName) {
+      IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                     TheFunction->getEntryBlock().begin());
+      return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                               VarName.c_str());
+    }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      if (V == 0) return ErrorV("Unknown variable name");
+
+      // Load the value.
+      return Builder.CreateLoad(V, Name.c_str());
+    }
+
+    Value *UnaryExprAST::Codegen() {
+      Value *OperandV = Operand->Codegen();
+      if (OperandV == 0) return 0;
+
+      Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+      if (F == 0)
+        return ErrorV("Unknown unary operator");
+
+      return Builder.CreateCall(F, OperandV, "unop");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      // Special case '=' because we don't want to emit the LHS as an expression.
+      if (Op == '=') {
+        // Assignment requires the LHS to be an identifier.
+        VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+        if (!LHSE)
+          return ErrorV("destination of '=' must be a variable");
+        // Codegen the RHS.
+        Value *Val = RHS->Codegen();
+        if (Val == 0) return 0;
+
+        // Look up the name.
+        Value *Variable = NamedValues[LHSE->getName()];
+        if (Variable == 0) return ErrorV("Unknown variable name");
+
+        Builder.CreateStore(Val, Variable);
+        return Val;
+      }
+
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: break;
+      }
+
+      // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+      // a call to it.
+      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      assert(F && "binary operator not found!");
+
+      Value *Ops[2] = { L, R };
+      return Builder.CreateCall(F, Ops, "binop");
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+    Value *ForExprAST::Codegen() {
+      // Output this as:
+      //   var = alloca double
+      //   ...
+      //   start = startexpr
+      //   store start -> var
+      //   goto loop
+      // loop:
+      //   ...
+      //   bodyexpr
+      //   ...
+      // loopend:
+      //   step = stepexpr
+      //   endcond = endexpr
+      //
+      //   curvar = load var
+      //   nextvar = curvar + step
+      //   store nextvar -> var
+      //   br endcond, loop, endloop
+      // outloop:
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create an alloca for the variable in the entry block.
+      AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Store the value into the alloca.
+      Builder.CreateStore(StartVal, Alloca);
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      AllocaInst *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Alloca;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Reload, increment, and restore the alloca.  This handles the case where
+      // the body of the loop mutates the variable.
+      Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
+      Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+      Builder.CreateStore(NextVar, Alloca);
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+    Value *VarExprAST::Codegen() {
+      std::vector<AllocaInst *> OldBindings;
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Register all variables and emit their initializer.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+        const std::string &VarName = VarNames[i].first;
+        ExprAST *Init = VarNames[i].second;
+
+        // Emit the initializer before adding the variable to scope, this prevents
+        // the initializer from referencing the variable itself, and permits stuff
+        // like this:
+        //  var a = 1 in
+        //    var a = a in ...   # refers to outer 'a'.
+        Value *InitVal;
+        if (Init) {
+          InitVal = Init->Codegen();
+          if (InitVal == 0) return 0;
+        } else { // If not specified, use 0.0.
+          InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+        }
+
+        AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+        Builder.CreateStore(InitVal, Alloca);
+
+        // Remember the old variable binding so that we can restore the binding when
+        // we unrecurse.
+        OldBindings.push_back(NamedValues[VarName]);
+
+        // Remember this binding.
+        NamedValues[VarName] = Alloca;
+      }
+
+      // Codegen the body, now that all vars are in scope.
+      Value *BodyVal = Body->Codegen();
+      if (BodyVal == 0) return 0;
+
+      // Pop all our variables from scope.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+        NamedValues[VarNames[i].first] = OldBindings[i];
+
+      // Return the body computation.
+      return BodyVal;
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx)
+        AI->setName(Args[Idx]);
+
+      return F;
+    }
+
+    /// CreateArgumentAllocas - Create an alloca for each argument and register the
+    /// argument in the symbol table so that references to it will succeed.
+    void PrototypeAST::CreateArgumentAllocas(Function *F) {
+      Function::arg_iterator AI = F->arg_begin();
+      for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+        // Create an alloca for this variable.
+        AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+        // Store the initial value into the alloca.
+        Builder.CreateStore(AI, Alloca);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = Alloca;
+      }
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // If this is an operator, install it.
+      if (Proto->isBinaryOp())
+        BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      // Add all arguments to the symbol table and create their allocas.
+      Proto->CreateArgumentAllocas(TheFunction);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+
+      if (Proto->isBinaryOp())
+        BinopPrecedence.erase(Proto->getOperatorName());
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    /// printd - printf that takes a double prints it as "%f\n", returning 0.
+    extern "C"
+    double printd(double X) {
+      printf("%f\n", X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['='] = 2;
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Promote allocas to registers.
+      OurFPM.add(createPromoteMemoryToRegisterPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Conclusion and other useful LLVM tidbits <LangImpl8.html>`_
+
diff --git a/docs/tutorial/LangImpl8.html b/docs/tutorial/LangImpl8.html
deleted file mode 100644
index 50fcd8c6998f..000000000000
--- a/docs/tutorial/LangImpl8.html
+++ /dev/null
@@ -1,359 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Conclusion and other useful LLVM tidbits</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 8
-  <ol>
-    <li><a href="#conclusion">Tutorial Conclusion</a></li>
-    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
-    <ul>
-      <li><a href="#targetindep">Target Independence</a></li>
-      <li><a href="#safety">Safety Guarantees</a></li>
-      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
-    </ul>
-    </li>
-    <li><a href="#tipsandtricks">Tips and Tricks</a>
-    <ul>
-      <li><a href="#offsetofsizeof">Implementing portable 
-                                    offsetof/sizeof</a></li>
-      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
-    </ul>
-    </li>
-  </ol>
-</li>
-</ul>
-
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusion">Tutorial Conclusion</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the final chapter of the "<a href="index.html">Implementing a
-language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
-our little Kaleidoscope language from being a useless toy, to being a
-semi-interesting (but probably still useless) toy. :)</p>
-
-<p>It is interesting to see how far we've come, and how little code it has
-taken.  We built the entire lexer, parser, AST, code generator, and an 
-interactive run-loop (with a JIT!) by-hand in under 700 lines of
-(non-comment/non-blank) code.</p>
-
-<p>Our little language supports a couple of interesting features: it supports
-user defined binary and unary operators, it uses JIT compilation for immediate
-evaluation, and it supports a few control flow constructs with SSA construction.
-</p>
-
-<p>Part of the idea of this tutorial was to show you how easy and fun it can be
-to define, build, and play with languages.  Building a compiler need not be a
-scary or mystical process!  Now that you've seen some of the basics, I strongly
-encourage you to take the code and hack on it.  For example, try adding:</p>
-
-<ul>
-<li><b>global variables</b> - While global variables have questional value in
-modern software engineering, they are often useful when putting together quick
-little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
-setup makes it very easy to add global variables: just have value lookup check
-to see if an unresolved variable is in the global variable symbol table before
-rejecting it.  To create a new global variable, make an instance of the LLVM
-<tt>GlobalVariable</tt> class.</li>
-
-<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
-type double.  This gives the language a very nice elegance, because only
-supporting one type means that you never have to specify types.  Different
-languages have different ways of handling this.  The easiest way is to require
-the user to specify types for every variable definition, and record the type
-of the variable in the symbol table along with its Value*.</li>
-
-<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
-extending the type system in all sorts of interesting ways.  Simple arrays are
-very easy and are quite useful for many different applications.  Adding them is
-mostly an exercise in learning how the LLVM <a 
-href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
-is so nifty/unconventional, it <a 
-href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
-for recursive types (e.g. linked lists), make sure to read the <a 
-href="../ProgrammersManual.html#TypeResolve">section in the LLVM
-Programmer's Manual</a> that describes how to construct them.</li>
-
-<li><b>standard runtime</b> - Our current language allows the user to access
-arbitrary external functions, and we use it for things like "printd" and
-"putchard".  As you extend the language to add higher-level constructs, often
-these constructs make the most sense if they are lowered to calls into a
-language-supplied runtime.  For example, if you add hash tables to the language,
-it would probably make sense to add the routines to a runtime, instead of 
-inlining them all the way.</li>
-
-<li><b>memory management</b> - Currently we can only access the stack in
-Kaleidoscope.  It would also be useful to be able to allocate heap memory,
-either with calls to the standard libc malloc/free interface or with a garbage
-collector.  If you would like to use garbage collection, note that LLVM fully
-supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
-including algorithms that move objects and need to scan/update the stack.</li>
-
-<li><b>debugger support</b> - LLVM supports generation of <a 
-href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
-common debuggers like GDB.  Adding support for debug info is fairly 
-straightforward.  The best way to understand it is to compile some C/C++ code
-with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
-
-<li><b>exception handling support</b> - LLVM supports generation of <a 
-href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
-with code compiled in other languages.  You could also generate code by
-implicitly making every function return an error value and checking it.  You 
-could also make explicit use of setjmp/longjmp.  There are many different ways
-to go here.</li>
-
-<li><b>object orientation, generics, database access, complex numbers,
-geometric programming, ...</b> - Really, there is
-no end of crazy features that you can add to the language.</li>
-
-<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
-that many people are interested in: building a compiler for a specific language.
-However, there are many other domains that can use compiler technology that are
-not typically considered.  For example, LLVM has been used to implement OpenGL
-graphics acceleration, translate C++ code to ActionScript, and many other
-cute and clever things.  Maybe you will be the first to JIT compile a regular
-expression interpreter into native code with LLVM?</li>
-
-</ul>
-
-<p>
-Have fun - try doing something crazy and unusual.  Building a language like
-everyone else always has, is much less fun than trying something a little crazy
-or off the wall and seeing how it turns out.  If you get stuck or want to talk
-about it, feel free to email the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a>: it has lots of people who are interested in languages and are often
-willing to help out.
-</p>
-
-<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
-LLVM IR.  These are some of the more subtle things that may not be obvious, but
-are very useful if you want to take advantage of LLVM's capabilities.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="llvmirproperties">Properties of the LLVM IR</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We have a couple common questions about code in the LLVM IR form - lets just
-get these out of the way right now, shall we?</p>
-
-<!-- ======================================================================= -->
-<h4><a name="targetindep">Target Independence</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Kaleidoscope is an example of a "portable language": any program written in
-Kaleidoscope will work the same way on any target that it runs on.  Many other
-languages have this property, e.g. lisp, java, haskell, javascript, python, etc
-(note that while these languages are portable, not all their libraries are).</p>
-
-<p>One nice aspect of LLVM is that it is often capable of preserving target
-independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
-program and run it on any target that LLVM supports, even emitting C code and
-compiling that on targets that LLVM doesn't support natively.  You can trivially
-tell that the Kaleidoscope compiler generates target-independent code because it
-never queries for any target-specific information when generating code.</p>
-
-<p>The fact that LLVM provides a compact, target-independent, representation for
-code gets a lot of people excited.  Unfortunately, these people are usually
-thinking about C or a language from the C family when they are asking questions
-about language portability.  I say "unfortunately", because there is really no
-way to make (fully general) C code portable, other than shipping the source code
-around (and of course, C source code is not actually portable in general
-either - ever port a really old application from 32- to 64-bits?).</p>
-
-<p>The problem with C (again, in its full generality) is that it is heavily
-laden with target specific assumptions.  As one simple example, the preprocessor
-often destructively removes target-independence from the code when it processes
-the input text:</p>
-
-<div class="doc_code">
-<pre>
-#ifdef __i386__
-  int X = 1;
-#else
-  int X = 42;
-#endif
-</pre>
-</div>
-
-<p>While it is possible to engineer more and more complex solutions to problems
-like this, it cannot be solved in full generality in a way that is better than shipping
-the actual source code.</p>
-
-<p>That said, there are interesting subsets of C that can be made portable.  If
-you are willing to fix primitive types to a fixed size (say int = 32-bits, 
-and long = 64-bits), don't care about ABI compatibility with existing binaries,
-and are willing to give up some other minor features, you can have portable
-code.  This can make sense for specialized domains such as an
-in-kernel language.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="safety">Safety Guarantees</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Many of the languages above are also "safe" languages: it is impossible for
-a program written in Java to corrupt its address space and crash the process
-(assuming the JVM has no bugs).
-Safety is an interesting property that requires a combination of language
-design, runtime support, and often operating system support.</p>
-
-<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
-does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
-use after free bugs, buffer over-runs, and a variety of other problems.  Safety
-needs to be implemented as a layer on top of LLVM and, conveniently, several
-groups have investigated this.  Ask on the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a> if you are interested in more details.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="langspecific">Language-Specific Optimizations</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One thing about LLVM that turns off many people is that it does not solve all
-the world's problems in one system (sorry 'world hunger', someone else will have
-to solve you some other day).  One specific complaint is that people perceive
-LLVM as being incapable of performing high-level language-specific optimization:
-LLVM "loses too much information".</p>
-
-<p>Unfortunately, this is really not the place to give you a full and unified
-version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
-few observations:</p>
-
-<p>First, you're right that LLVM does lose information.  For example, as of this
-writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
-from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
-get compiled down to an 'i32' value and the information about what it came from
-is lost.  The more general issue here, is that the LLVM type system uses
-"structural equivalence" instead of "name equivalence".  Another place this
-surprises people is if you have two types in a high-level language that have the
-same structure (e.g. two different structs that have a single int field): these
-types will compile down into a single LLVM type and it will be impossible to
-tell what it came from.</p>
-
-<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
-continue to enhance and improve it in many different ways.  In addition to
-adding new features (LLVM did not always support exceptions or debug info), we
-also extend the IR to capture important information for optimization (e.g.
-whether an argument is sign or zero extended, information about pointers
-aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
-include some specific feature, so they go ahead and extend it.</p>
-
-<p>Third, it is <em>possible and easy</em> to add language-specific
-optimizations, and you have a number of choices in how to do it.  As one trivial
-example, it is easy to add language-specific optimization passes that
-"know" things about code compiled for a language.  In the case of the C family,
-there is an optimization pass that "knows" about the standard C library
-functions.  If you call "exit(0)" in main(), it knows that it is safe to
-optimize that into "return 0;" because C specifies what the 'exit'
-function does.</p>
-
-<p>In addition to simple library knowledge, it is possible to embed a variety of
-other language-specific information into the LLVM IR.  If you have a specific
-need and run into a wall, please bring the topic up on the llvmdev list.  At the
-very worst, you can always treat LLVM as if it were a "dumb code generator" and
-implement the high-level optimizations you desire in your front-end, on the
-language-specific AST.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="tipsandtricks">Tips and Tricks</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>There is a variety of useful tips and tricks that you come to know after
-working on/with LLVM that aren't obvious at first glance.  Instead of letting
-everyone rediscover them, this section talks about some of these issues.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="offsetofsizeof">Implementing portable offsetof/sizeof</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One interesting thing that comes up, if you are trying to keep the code 
-generated by your compiler "target independent", is that you often need to know
-the size of some LLVM type or the offset of some field in an llvm structure.
-For example, you might need to pass the size of a type into a function that
-allocates memory.</p>
-
-<p>Unfortunately, this can vary widely across targets: for example the width of
-a pointer is trivially target-specific.  However, there is a <a 
-href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
-way to use the getelementptr instruction</a> that allows you to compute this
-in a portable way.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="gcstack">Garbage Collected Stack Frames</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Some languages want to explicitly manage their stack frames, often so that
-they are garbage collected or to allow easy implementation of closures.  There
-are often better ways to implement these features than explicit stack frames,
-but <a 
-href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
-does support them,</a> if you want.  It requires your front-end to convert the
-code into <a 
-href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
-Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-07-23 10:51:15 +0200 (Mon, 23 Jul 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl8.rst b/docs/tutorial/LangImpl8.rst
new file mode 100644
index 000000000000..3534b2e0c931
--- /dev/null
+++ b/docs/tutorial/LangImpl8.rst
@@ -0,0 +1,267 @@
+======================================================
+Kaleidoscope: Conclusion and other useful LLVM tidbits
+======================================================
+
+.. contents::
+   :local:
+
+Tutorial Conclusion
+===================
+
+Welcome to the final chapter of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In the course of this tutorial, we have
+grown our little Kaleidoscope language from being a useless toy, to
+being a semi-interesting (but probably still useless) toy. :)
+
+It is interesting to see how far we've come, and how little code it has
+taken. We built the entire lexer, parser, AST, code generator, and an
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.
+
+Our little language supports a couple of interesting features: it
+supports user defined binary and unary operators, it uses JIT
+compilation for immediate evaluation, and it supports a few control flow
+constructs with SSA construction.
+
+Part of the idea of this tutorial was to show you how easy and fun it
+can be to define, build, and play with languages. Building a compiler
+need not be a scary or mystical process! Now that you've seen some of
+the basics, I strongly encourage you to take the code and hack on it.
+For example, try adding:
+
+-  **global variables** - While global variables have questional value
+   in modern software engineering, they are often useful when putting
+   together quick little hacks like the Kaleidoscope compiler itself.
+   Fortunately, our current setup makes it very easy to add global
+   variables: just have value lookup check to see if an unresolved
+   variable is in the global variable symbol table before rejecting it.
+   To create a new global variable, make an instance of the LLVM
+   ``GlobalVariable`` class.
+-  **typed variables** - Kaleidoscope currently only supports variables
+   of type double. This gives the language a very nice elegance, because
+   only supporting one type means that you never have to specify types.
+   Different languages have different ways of handling this. The easiest
+   way is to require the user to specify types for every variable
+   definition, and record the type of the variable in the symbol table
+   along with its Value\*.
+-  **arrays, structs, vectors, etc** - Once you add types, you can start
+   extending the type system in all sorts of interesting ways. Simple
+   arrays are very easy and are quite useful for many different
+   applications. Adding them is mostly an exercise in learning how the
+   LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction
+   works: it is so nifty/unconventional, it `has its own
+   FAQ <../GetElementPtr.html>`_! If you add support for recursive types
+   (e.g. linked lists), make sure to read the `section in the LLVM
+   Programmer's Manual <../ProgrammersManual.html#TypeResolve>`_ that
+   describes how to construct them.
+-  **standard runtime** - Our current language allows the user to access
+   arbitrary external functions, and we use it for things like "printd"
+   and "putchard". As you extend the language to add higher-level
+   constructs, often these constructs make the most sense if they are
+   lowered to calls into a language-supplied runtime. For example, if
+   you add hash tables to the language, it would probably make sense to
+   add the routines to a runtime, instead of inlining them all the way.
+-  **memory management** - Currently we can only access the stack in
+   Kaleidoscope. It would also be useful to be able to allocate heap
+   memory, either with calls to the standard libc malloc/free interface
+   or with a garbage collector. If you would like to use garbage
+   collection, note that LLVM fully supports `Accurate Garbage
+   Collection <../GarbageCollection.html>`_ including algorithms that
+   move objects and need to scan/update the stack.
+-  **debugger support** - LLVM supports generation of `DWARF Debug
+   info <../SourceLevelDebugging.html>`_ which is understood by common
+   debuggers like GDB. Adding support for debug info is fairly
+   straightforward. The best way to understand it is to compile some
+   C/C++ code with "``llvm-gcc -g -O0``" and taking a look at what it
+   produces.
+-  **exception handling support** - LLVM supports generation of `zero
+   cost exceptions <../ExceptionHandling.html>`_ which interoperate with
+   code compiled in other languages. You could also generate code by
+   implicitly making every function return an error value and checking
+   it. You could also make explicit use of setjmp/longjmp. There are
+   many different ways to go here.
+-  **object orientation, generics, database access, complex numbers,
+   geometric programming, ...** - Really, there is no end of crazy
+   features that you can add to the language.
+-  **unusual domains** - We've been talking about applying LLVM to a
+   domain that many people are interested in: building a compiler for a
+   specific language. However, there are many other domains that can use
+   compiler technology that are not typically considered. For example,
+   LLVM has been used to implement OpenGL graphics acceleration,
+   translate C++ code to ActionScript, and many other cute and clever
+   things. Maybe you will be the first to JIT compile a regular
+   expression interpreter into native code with LLVM?
+
+Have fun - try doing something crazy and unusual. Building a language
+like everyone else always has, is much less fun than trying something a
+little crazy or off the wall and seeing how it turns out. If you get
+stuck or want to talk about it, feel free to email the `llvmdev mailing
+list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
+of people who are interested in languages and are often willing to help
+out.
+
+Before we end this tutorial, I want to talk about some "tips and tricks"
+for generating LLVM IR. These are some of the more subtle things that
+may not be obvious, but are very useful if you want to take advantage of
+LLVM's capabilities.
+
+Properties of the LLVM IR
+=========================
+
+We have a couple common questions about code in the LLVM IR form - lets
+just get these out of the way right now, shall we?
+
+Target Independence
+-------------------
+
+Kaleidoscope is an example of a "portable language": any program written
+in Kaleidoscope will work the same way on any target that it runs on.
+Many other languages have this property, e.g. lisp, java, haskell,
+javascript, python, etc (note that while these languages are portable,
+not all their libraries are).
+
+One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a
+Kaleidoscope-compiled program and run it on any target that LLVM
+supports, even emitting C code and compiling that on targets that LLVM
+doesn't support natively. You can trivially tell that the Kaleidoscope
+compiler generates target-independent code because it never queries for
+any target-specific information when generating code.
+
+The fact that LLVM provides a compact, target-independent,
+representation for code gets a lot of people excited. Unfortunately,
+these people are usually thinking about C or a language from the C
+family when they are asking questions about language portability. I say
+"unfortunately", because there is really no way to make (fully general)
+C code portable, other than shipping the source code around (and of
+course, C source code is not actually portable in general either - ever
+port a really old application from 32- to 64-bits?).
+
+The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions. As one simple example, the
+preprocessor often destructively removes target-independence from the
+code when it processes the input text:
+
+.. code-block:: c
+
+    #ifdef __i386__
+      int X = 1;
+    #else
+      int X = 42;
+    #endif
+
+While it is possible to engineer more and more complex solutions to
+problems like this, it cannot be solved in full generality in a way that
+is better than shipping the actual source code.
+
+That said, there are interesting subsets of C that can be made portable.
+If you are willing to fix primitive types to a fixed size (say int =
+32-bits, and long = 64-bits), don't care about ABI compatibility with
+existing binaries, and are willing to give up some other minor features,
+you can have portable code. This can make sense for specialized domains
+such as an in-kernel language.
+
+Safety Guarantees
+-----------------
+
+Many of the languages above are also "safe" languages: it is impossible
+for a program written in Java to corrupt its address space and crash the
+process (assuming the JVM has no bugs). Safety is an interesting
+property that requires a combination of language design, runtime
+support, and often operating system support.
+
+It is certainly possible to implement a safe language in LLVM, but LLVM
+IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
+casts, use after free bugs, buffer over-runs, and a variety of other
+problems. Safety needs to be implemented as a layer on top of LLVM and,
+conveniently, several groups have investigated this. Ask on the `llvmdev
+mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
+you are interested in more details.
+
+Language-Specific Optimizations
+-------------------------------
+
+One thing about LLVM that turns off many people is that it does not
+solve all the world's problems in one system (sorry 'world hunger',
+someone else will have to solve you some other day). One specific
+complaint is that people perceive LLVM as being incapable of performing
+high-level language-specific optimization: LLVM "loses too much
+information".
+
+Unfortunately, this is really not the place to give you a full and
+unified version of "Chris Lattner's theory of compiler design". Instead,
+I'll make a few observations:
+
+First, you're right that LLVM does lose information. For example, as of
+this writing, there is no way to distinguish in the LLVM IR whether an
+SSA-value came from a C "int" or a C "long" on an ILP32 machine (other
+than debug info). Both get compiled down to an 'i32' value and the
+information about what it came from is lost. The more general issue
+here, is that the LLVM type system uses "structural equivalence" instead
+of "name equivalence". Another place this surprises people is if you
+have two types in a high-level language that have the same structure
+(e.g. two different structs that have a single int field): these types
+will compile down into a single LLVM type and it will be impossible to
+tell what it came from.
+
+Second, while LLVM does lose information, LLVM is not a fixed target: we
+continue to enhance and improve it in many different ways. In addition
+to adding new features (LLVM did not always support exceptions or debug
+info), we also extend the IR to capture important information for
+optimization (e.g. whether an argument is sign or zero extended,
+information about pointers aliasing, etc). Many of the enhancements are
+user-driven: people want LLVM to include some specific feature, so they
+go ahead and extend it.
+
+Third, it is *possible and easy* to add language-specific optimizations,
+and you have a number of choices in how to do it. As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language. In the case of the C
+family, there is an optimization pass that "knows" about the standard C
+library functions. If you call "exit(0)" in main(), it knows that it is
+safe to optimize that into "return 0;" because C specifies what the
+'exit' function does.
+
+In addition to simple library knowledge, it is possible to embed a
+variety of other language-specific information into the LLVM IR. If you
+have a specific need and run into a wall, please bring the topic up on
+the llvmdev list. At the very worst, you can always treat LLVM as if it
+were a "dumb code generator" and implement the high-level optimizations
+you desire in your front-end, on the language-specific AST.
+
+Tips and Tricks
+===============
+
+There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance. Instead of
+letting everyone rediscover them, this section talks about some of these
+issues.
+
+Implementing portable offsetof/sizeof
+-------------------------------------
+
+One interesting thing that comes up, if you are trying to keep the code
+generated by your compiler "target independent", is that you often need
+to know the size of some LLVM type or the offset of some field in an
+llvm structure. For example, you might need to pass the size of a type
+into a function that allocates memory.
+
+Unfortunately, this can vary widely across targets: for example the
+width of a pointer is trivially target-specific. However, there is a
+`clever way to use the getelementptr
+instruction <http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt>`_
+that allows you to compute this in a portable way.
+
+Garbage Collected Stack Frames
+------------------------------
+
+Some languages want to explicitly manage their stack frames, often so
+that they are garbage collected or to allow easy implementation of
+closures. There are often better ways to implement these features than
+explicit stack frames, but `LLVM does support
+them, <http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt>`_
+if you want. It requires your front-end to convert the code into
+`Continuation Passing
+Style <http://en.wikipedia.org/wiki/Continuation-passing_style>`_ and
+the use of tail calls (which LLVM also supports).
+
diff --git a/docs/tutorial/OCamlLangImpl1.html b/docs/tutorial/OCamlLangImpl1.html
deleted file mode 100644
index 86a395a3a873..000000000000
--- a/docs/tutorial/OCamlLangImpl1.html
+++ /dev/null
@@ -1,365 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Tutorial Introduction and the Lexer</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Tutorial Introduction and the Lexer</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 1
-  <ol>
-    <li><a href="#intro">Tutorial Introduction</a></li>
-    <li><a href="#language">The Basic Language</a></li>
-    <li><a href="#lexer">The Lexer</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl2.html">Chapter 2</a>: Implementing a Parser and
-AST</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Tutorial Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
-runs through the implementation of a simple language, showing how fun and
-easy it can be.  This tutorial will get you up and started as well as help to
-build a framework you can extend to other languages.  The code in this tutorial
-can also be used as a playground to hack on other LLVM specific things.
-</p>
-
-<p>
-The goal of this tutorial is to progressively unveil our language, describing
-how it is built up over time.  This will let us cover a fairly broad range of
-language design and LLVM-specific usage issues, showing and explaining the code
-for it all along the way, without overwhelming you with tons of details up
-front.</p>
-
-<p>It is useful to point out ahead of time that this tutorial is really about
-teaching compiler techniques and LLVM specifically, <em>not</em> about teaching
-modern and sane software engineering principles.  In practice, this means that
-we'll take a number of shortcuts to simplify the exposition.  For example, the
-code leaks memory, uses global variables all over the place, doesn't use nice
-design patterns like <a
-href="http://en.wikipedia.org/wiki/Visitor_pattern">visitors</a>, etc... but it
-is very simple.  If you dig in and use the code as a basis for future projects,
-fixing these deficiencies shouldn't be hard.</p>
-
-<p>I've tried to put this tutorial together in a way that makes chapters easy to
-skip over if you are already familiar with or are uninterested in the various
-pieces.  The structure of the tutorial is:
-</p>
-
-<ul>
-<li><b><a href="#language">Chapter #1</a>: Introduction to the Kaleidoscope
-language, and the definition of its Lexer</b> - This shows where we are going
-and the basic functionality that we want it to do.  In order to make this
-tutorial maximally understandable and hackable, we choose to implement
-everything in Objective Caml instead of using lexer and parser generators.
-LLVM obviously works just fine with such tools, feel free to use one if you
-prefer.</li>
-<li><b><a href="OCamlLangImpl2.html">Chapter #2</a>: Implementing a Parser and
-AST</b> - With the lexer in place, we can talk about parsing techniques and
-basic AST construction.  This tutorial describes recursive descent parsing and
-operator precedence parsing.  Nothing in Chapters 1 or 2 is LLVM-specific,
-the code doesn't even link in LLVM at this point. :)</li>
-<li><b><a href="OCamlLangImpl3.html">Chapter #3</a>: Code generation to LLVM
-IR</b> - With the AST ready, we can show off how easy generation of LLVM IR
-really is.</li>
-<li><b><a href="OCamlLangImpl4.html">Chapter #4</a>: Adding JIT and Optimizer
-Support</b> - Because a lot of people are interested in using LLVM as a JIT,
-we'll dive right into it and show you the 3 lines it takes to add JIT support.
-LLVM is also useful in many other ways, but this is one simple and "sexy" way
-to shows off its power. :)</li>
-<li><b><a href="OCamlLangImpl5.html">Chapter #5</a>: Extending the Language:
-Control Flow</b> - With the language up and running, we show how to extend it
-with control flow operations (if/then/else and a 'for' loop).  This gives us a
-chance to talk about simple SSA construction and control flow.</li>
-<li><b><a href="OCamlLangImpl6.html">Chapter #6</a>: Extending the Language:
-User-defined Operators</b> - This is a silly but fun chapter that talks about
-extending the language to let the user program define their own arbitrary
-unary and binary operators (with assignable precedence!).  This lets us build a
-significant piece of the "language" as library routines.</li>
-<li><b><a href="OCamlLangImpl7.html">Chapter #7</a>: Extending the Language:
-Mutable Variables</b> - This chapter talks about adding user-defined local
-variables along with an assignment operator.  The interesting part about this
-is how easy and trivial it is to construct SSA form in LLVM: no, LLVM does
-<em>not</em> require your front-end to construct SSA form!</li>
-<li><b><a href="OCamlLangImpl8.html">Chapter #8</a>: Conclusion and other
-useful LLVM tidbits</b> - This chapter wraps up the series by talking about
-potential ways to extend the language, but also includes a bunch of pointers to
-info about "special topics" like adding garbage collection support, exceptions,
-debugging, support for "spaghetti stacks", and a bunch of other tips and
-tricks.</li>
-
-</ul>
-
-<p>By the end of the tutorial, we'll have written a bit less than 700 lines of
-non-comment, non-blank, lines of code.  With this small amount of code, we'll
-have built up a very reasonable compiler for a non-trivial language including
-a hand-written lexer, parser, AST, as well as code generation support with a JIT
-compiler.  While other systems may have interesting "hello world" tutorials,
-I think the breadth of this tutorial is a great testament to the strengths of
-LLVM and why you should consider it if you're interested in language or compiler
-design.</p>
-
-<p>A note about this tutorial: we expect you to extend the language and play
-with it on your own.  Take the code and go crazy hacking away at it, compilers
-don't need to be scary creatures - it can be a lot of fun to play with
-languages!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="language">The Basic Language</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This tutorial will be illustrated with a toy language that we'll call
-"<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived
-from "meaning beautiful, form, and view").
-Kaleidoscope is a procedural language that allows you to define functions, use
-conditionals, math, etc.  Over the course of the tutorial, we'll extend
-Kaleidoscope to support the if/then/else construct, a for loop, user defined
-operators, JIT compilation with a simple command line interface, etc.</p>
-
-<p>Because we want to keep things simple, the only datatype in Kaleidoscope is a
-64-bit floating point type (aka 'float' in O'Caml parlance).  As such, all
-values are implicitly double precision and the language doesn't require type
-declarations.  This gives the language a very nice and simple syntax.  For
-example, the following simple example computes <a
-href="http://en.wikipedia.org/wiki/Fibonacci_number">Fibonacci numbers:</a></p>
-
-<div class="doc_code">
-<pre>
-# Compute the x'th fibonacci number.
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2)
-
-# This expression will compute the 40th number.
-fib(40)
-</pre>
-</div>
-
-<p>We also allow Kaleidoscope to call into standard library functions (the LLVM
-JIT makes this completely trivial).  This means that you can use the 'extern'
-keyword to define a function before you use it (this is also useful for mutually
-recursive functions).  For example:</p>
-
-<div class="doc_code">
-<pre>
-extern sin(arg);
-extern cos(arg);
-extern atan2(arg1 arg2);
-
-atan2(sin(.4), cos(42))
-</pre>
-</div>
-
-<p>A more interesting example is included in Chapter 6 where we write a little
-Kaleidoscope application that <a href="OCamlLangImpl6.html#example">displays
-a Mandelbrot Set</a> at various levels of magnification.</p>
-
-<p>Lets dive into the implementation of this language!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="lexer">The Lexer</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>When it comes to implementing a language, the first thing needed is
-the ability to process a text file and recognize what it says.  The traditional
-way to do this is to use a "<a
-href="http://en.wikipedia.org/wiki/Lexical_analysis">lexer</a>" (aka 'scanner')
-to break the input up into "tokens".  Each token returned by the lexer includes
-a token code and potentially some metadata (e.g. the numeric value of a number).
-First, we define the possibilities:
-</p>
-
-<div class="doc_code">
-<pre>
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</div>
-
-<p>Each token returned by our lexer will be one of the token variant values.
-An unknown character like '+' will be returned as <tt>Token.Kwd '+'</tt>.  If
-the curr token is an identifier, the value will be <tt>Token.Ident s</tt>.  If
-the current token is a numeric literal (like 1.0), the value will be
-<tt>Token.Number 1.0</tt>.
-</p>
-
-<p>The actual implementation of the lexer is a collection of functions driven
-by a function named <tt>Lexer.lex</tt>.  The <tt>Lexer.lex</tt> function is
-called to return the next token from standard input.  We will use
-<a href="http://caml.inria.fr/pub/docs/manual-camlp4/index.html">Camlp4</a>
-to simplify the tokenization of the standard input.  Its definition starts
-as:</p>
-
-<div class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-</pre>
-</div>
-
-<p>
-<tt>Lexer.lex</tt> works by recursing over a <tt>char Stream.t</tt> to read
-characters one at a time from the standard input.  It eats them as it recognizes
-them and stores them in in a <tt>Token.token</tt> variant.  The first thing that
-it has to do is ignore whitespace between tokens.  This is accomplished with the
-recursive call above.</p>
-
-<p>The next thing <tt>Lexer.lex</tt> needs to do is recognize identifiers and
-specific keywords like "def".  Kaleidoscope does this with a pattern match
-and a helper function.<p>
-
-<div class="doc_code">
-<pre>
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-...
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-</pre>
-</div>
-
-<p>Numeric values are similar:</p>
-
-<div class="doc_code">
-<pre>
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-...
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-</pre>
-</div>
-
-<p>This is all pretty straight-forward code for processing input.  When reading
-a numeric value from input, we use the ocaml <tt>float_of_string</tt> function
-to convert it to a numeric value that we store in <tt>Token.Number</tt>.  Note
-that this isn't doing sufficient error checking: it will raise <tt>Failure</tt>
-if the string "1.23.45.67".  Feel free to extend it :).  Next we handle
-comments:
-</p>
-
-<div class="doc_code">
-<pre>
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-...
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</div>
-
-<p>We handle comments by skipping to the end of the line and then return the
-next token.  Finally, if the input doesn't match one of the above cases, it is
-either an operator character like '+' or the end of the file.  These are handled
-with this code:</p>
-
-<div class="doc_code">
-<pre>
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</div>
-
-<p>With this, we have the complete lexer for the basic Kaleidoscope language
-(the <a href="OCamlLangImpl2.html#code">full code listing</a> for the Lexer is
-available in the <a href="OCamlLangImpl2.html">next chapter</a> of the
-tutorial).  Next we'll <a href="OCamlLangImpl2.html">build a simple parser that
-uses this to build an Abstract Syntax Tree</a>.  When we have that, we'll
-include a driver so that you can use the lexer and parser together.
-</p>
-
-<a href="OCamlLangImpl2.html">Next: Implementing a Parser and AST</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl1.rst b/docs/tutorial/OCamlLangImpl1.rst
new file mode 100644
index 000000000000..94ca3a5aa4d3
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl1.rst
@@ -0,0 +1,285 @@
+=================================================
+Kaleidoscope: Tutorial Introduction and the Lexer
+=================================================
+
+.. contents::
+   :local:
+
+Tutorial Introduction
+=====================
+
+Welcome to the "Implementing a language with LLVM" tutorial. This
+tutorial runs through the implementation of a simple language, showing
+how fun and easy it can be. This tutorial will get you up and started as
+well as help to build a framework you can extend to other languages. The
+code in this tutorial can also be used as a playground to hack on other
+LLVM specific things.
+
+The goal of this tutorial is to progressively unveil our language,
+describing how it is built up over time. This will let us cover a fairly
+broad range of language design and LLVM-specific usage issues, showing
+and explaining the code for it all along the way, without overwhelming
+you with tons of details up front.
+
+It is useful to point out ahead of time that this tutorial is really
+about teaching compiler techniques and LLVM specifically, *not* about
+teaching modern and sane software engineering principles. In practice,
+this means that we'll take a number of shortcuts to simplify the
+exposition. For example, the code leaks memory, uses global variables
+all over the place, doesn't use nice design patterns like
+`visitors <http://en.wikipedia.org/wiki/Visitor_pattern>`_, etc... but
+it is very simple. If you dig in and use the code as a basis for future
+projects, fixing these deficiencies shouldn't be hard.
+
+I've tried to put this tutorial together in a way that makes chapters
+easy to skip over if you are already familiar with or are uninterested
+in the various pieces. The structure of the tutorial is:
+
+-  `Chapter #1 <#language>`_: Introduction to the Kaleidoscope
+   language, and the definition of its Lexer - This shows where we are
+   going and the basic functionality that we want it to do. In order to
+   make this tutorial maximally understandable and hackable, we choose
+   to implement everything in Objective Caml instead of using lexer and
+   parser generators. LLVM obviously works just fine with such tools,
+   feel free to use one if you prefer.
+-  `Chapter #2 <OCamlLangImpl2.html>`_: Implementing a Parser and
+   AST - With the lexer in place, we can talk about parsing techniques
+   and basic AST construction. This tutorial describes recursive descent
+   parsing and operator precedence parsing. Nothing in Chapters 1 or 2
+   is LLVM-specific, the code doesn't even link in LLVM at this point.
+   :)
+-  `Chapter #3 <OCamlLangImpl3.html>`_: Code generation to LLVM IR -
+   With the AST ready, we can show off how easy generation of LLVM IR
+   really is.
+-  `Chapter #4 <OCamlLangImpl4.html>`_: Adding JIT and Optimizer
+   Support - Because a lot of people are interested in using LLVM as a
+   JIT, we'll dive right into it and show you the 3 lines it takes to
+   add JIT support. LLVM is also useful in many other ways, but this is
+   one simple and "sexy" way to shows off its power. :)
+-  `Chapter #5 <OCamlLangImpl5.html>`_: Extending the Language:
+   Control Flow - With the language up and running, we show how to
+   extend it with control flow operations (if/then/else and a 'for'
+   loop). This gives us a chance to talk about simple SSA construction
+   and control flow.
+-  `Chapter #6 <OCamlLangImpl6.html>`_: Extending the Language:
+   User-defined Operators - This is a silly but fun chapter that talks
+   about extending the language to let the user program define their own
+   arbitrary unary and binary operators (with assignable precedence!).
+   This lets us build a significant piece of the "language" as library
+   routines.
+-  `Chapter #7 <OCamlLangImpl7.html>`_: Extending the Language:
+   Mutable Variables - This chapter talks about adding user-defined
+   local variables along with an assignment operator. The interesting
+   part about this is how easy and trivial it is to construct SSA form
+   in LLVM: no, LLVM does *not* require your front-end to construct SSA
+   form!
+-  `Chapter #8 <OCamlLangImpl8.html>`_: Conclusion and other useful
+   LLVM tidbits - This chapter wraps up the series by talking about
+   potential ways to extend the language, but also includes a bunch of
+   pointers to info about "special topics" like adding garbage
+   collection support, exceptions, debugging, support for "spaghetti
+   stacks", and a bunch of other tips and tricks.
+
+By the end of the tutorial, we'll have written a bit less than 700 lines
+of non-comment, non-blank, lines of code. With this small amount of
+code, we'll have built up a very reasonable compiler for a non-trivial
+language including a hand-written lexer, parser, AST, as well as code
+generation support with a JIT compiler. While other systems may have
+interesting "hello world" tutorials, I think the breadth of this
+tutorial is a great testament to the strengths of LLVM and why you
+should consider it if you're interested in language or compiler design.
+
+A note about this tutorial: we expect you to extend the language and
+play with it on your own. Take the code and go crazy hacking away at it,
+compilers don't need to be scary creatures - it can be a lot of fun to
+play with languages!
+
+The Basic Language
+==================
+
+This tutorial will be illustrated with a toy language that we'll call
+"`Kaleidoscope <http://en.wikipedia.org/wiki/Kaleidoscope>`_" (derived
+from "meaning beautiful, form, and view"). Kaleidoscope is a procedural
+language that allows you to define functions, use conditionals, math,
+etc. Over the course of the tutorial, we'll extend Kaleidoscope to
+support the if/then/else construct, a for loop, user defined operators,
+JIT compilation with a simple command line interface, etc.
+
+Because we want to keep things simple, the only datatype in Kaleidoscope
+is a 64-bit floating point type (aka 'float' in O'Caml parlance). As
+such, all values are implicitly double precision and the language
+doesn't require type declarations. This gives the language a very nice
+and simple syntax. For example, the following simple example computes
+`Fibonacci numbers: <http://en.wikipedia.org/wiki/Fibonacci_number>`_
+
+::
+
+    # Compute the x'th fibonacci number.
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2)
+
+    # This expression will compute the 40th number.
+    fib(40)
+
+We also allow Kaleidoscope to call into standard library functions (the
+LLVM JIT makes this completely trivial). This means that you can use the
+'extern' keyword to define a function before you use it (this is also
+useful for mutually recursive functions). For example:
+
+::
+
+    extern sin(arg);
+    extern cos(arg);
+    extern atan2(arg1 arg2);
+
+    atan2(sin(.4), cos(42))
+
+A more interesting example is included in Chapter 6 where we write a
+little Kaleidoscope application that `displays a Mandelbrot
+Set <OCamlLangImpl6.html#example>`_ at various levels of magnification.
+
+Lets dive into the implementation of this language!
+
+The Lexer
+=========
+
+When it comes to implementing a language, the first thing needed is the
+ability to process a text file and recognize what it says. The
+traditional way to do this is to use a
+"`lexer <http://en.wikipedia.org/wiki/Lexical_analysis>`_" (aka
+'scanner') to break the input up into "tokens". Each token returned by
+the lexer includes a token code and potentially some metadata (e.g. the
+numeric value of a number). First, we define the possibilities:
+
+.. code-block:: ocaml
+
+    (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+     * these others for known things. *)
+    type token =
+      (* commands *)
+      | Def | Extern
+
+      (* primary *)
+      | Ident of string | Number of float
+
+      (* unknown *)
+      | Kwd of char
+
+Each token returned by our lexer will be one of the token variant
+values. An unknown character like '+' will be returned as
+``Token.Kwd '+'``. If the curr token is an identifier, the value will be
+``Token.Ident s``. If the current token is a numeric literal (like 1.0),
+the value will be ``Token.Number 1.0``.
+
+The actual implementation of the lexer is a collection of functions
+driven by a function named ``Lexer.lex``. The ``Lexer.lex`` function is
+called to return the next token from standard input. We will use
+`Camlp4 <http://caml.inria.fr/pub/docs/manual-camlp4/index.html>`_ to
+simplify the tokenization of the standard input. Its definition starts
+as:
+
+.. code-block:: ocaml
+
+    (*===----------------------------------------------------------------------===
+     * Lexer
+     *===----------------------------------------------------------------------===*)
+
+    let rec lex = parser
+      (* Skip any whitespace. *)
+      | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+``Lexer.lex`` works by recursing over a ``char Stream.t`` to read
+characters one at a time from the standard input. It eats them as it
+recognizes them and stores them in in a ``Token.token`` variant. The
+first thing that it has to do is ignore whitespace between tokens. This
+is accomplished with the recursive call above.
+
+The next thing ``Lexer.lex`` needs to do is recognize identifiers and
+specific keywords like "def". Kaleidoscope does this with a pattern
+match and a helper function.
+
+.. code-block:: ocaml
+
+      (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+      | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+          let buffer = Buffer.create 1 in
+          Buffer.add_char buffer c;
+          lex_ident buffer stream
+
+    ...
+
+    and lex_ident buffer = parser
+      | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+          Buffer.add_char buffer c;
+          lex_ident buffer stream
+      | [< stream=lex >] ->
+          match Buffer.contents buffer with
+          | "def" -> [< 'Token.Def; stream >]
+          | "extern" -> [< 'Token.Extern; stream >]
+          | id -> [< 'Token.Ident id; stream >]
+
+Numeric values are similar:
+
+.. code-block:: ocaml
+
+      (* number: [0-9.]+ *)
+      | [< ' ('0' .. '9' as c); stream >] ->
+          let buffer = Buffer.create 1 in
+          Buffer.add_char buffer c;
+          lex_number buffer stream
+
+    ...
+
+    and lex_number buffer = parser
+      | [< ' ('0' .. '9' | '.' as c); stream >] ->
+          Buffer.add_char buffer c;
+          lex_number buffer stream
+      | [< stream=lex >] ->
+          [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+This is all pretty straight-forward code for processing input. When
+reading a numeric value from input, we use the ocaml ``float_of_string``
+function to convert it to a numeric value that we store in
+``Token.Number``. Note that this isn't doing sufficient error checking:
+it will raise ``Failure`` if the string "1.23.45.67". Feel free to
+extend it :). Next we handle comments:
+
+.. code-block:: ocaml
+
+      (* Comment until end of line. *)
+      | [< ' ('#'); stream >] ->
+          lex_comment stream
+
+    ...
+
+    and lex_comment = parser
+      | [< ' ('\n'); stream=lex >] -> stream
+      | [< 'c; e=lex_comment >] -> e
+      | [< >] -> [< >]
+
+We handle comments by skipping to the end of the line and then return
+the next token. Finally, if the input doesn't match one of the above
+cases, it is either an operator character like '+' or the end of the
+file. These are handled with this code:
+
+.. code-block:: ocaml
+
+      (* Otherwise, just return the character as its ascii value. *)
+      | [< 'c; stream >] ->
+          [< 'Token.Kwd c; lex stream >]
+
+      (* end of stream. *)
+      | [< >] -> [< >]
+
+With this, we have the complete lexer for the basic Kaleidoscope
+language (the `full code listing <OCamlLangImpl2.html#code>`_ for the
+Lexer is available in the `next chapter <OCamlLangImpl2.html>`_ of the
+tutorial). Next we'll `build a simple parser that uses this to build an
+Abstract Syntax Tree <OCamlLangImpl2.html>`_. When we have that, we'll
+include a driver so that you can use the lexer and parser together.
+
+`Next: Implementing a Parser and AST <OCamlLangImpl2.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl2.html b/docs/tutorial/OCamlLangImpl2.html
deleted file mode 100644
index 9bb4c40361c5..000000000000
--- a/docs/tutorial/OCamlLangImpl2.html
+++ /dev/null
@@ -1,1043 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing a Parser and AST</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Implementing a Parser and AST</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 2
-  <ol>
-    <li><a href="#intro">Chapter 2 Introduction</a></li>
-    <li><a href="#ast">The Abstract Syntax Tree (AST)</a></li>
-    <li><a href="#parserbasics">Parser Basics</a></li>
-    <li><a href="#parserprimexprs">Basic Expression Parsing</a></li>
-    <li><a href="#parserbinops">Binary Expression Parsing</a></li>
-    <li><a href="#parsertop">Parsing the Rest</a></li>
-    <li><a href="#driver">The Driver</a></li>
-    <li><a href="#conclusions">Conclusions</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl3.html">Chapter 3</a>: Code generation to LLVM IR</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 2 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
-with LLVM in Objective Caml</a>" tutorial.  This chapter shows you how to use
-the lexer, built in <a href="OCamlLangImpl1.html">Chapter 1</a>, to build a
-full <a href="http://en.wikipedia.org/wiki/Parsing">parser</a> for our
-Kaleidoscope language.  Once we have a parser, we'll define and build an <a
-href="http://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax
-Tree</a> (AST).</p>
-
-<p>The parser we will build uses a combination of <a
-href="http://en.wikipedia.org/wiki/Recursive_descent_parser">Recursive Descent
-Parsing</a> and <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence
-Parsing</a> to parse the Kaleidoscope language (the latter for
-binary expressions and the former for everything else).  Before we get to
-parsing though, lets talk about the output of the parser: the Abstract Syntax
-Tree.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ast">The Abstract Syntax Tree (AST)</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The AST for a program captures its behavior in such a way that it is easy for
-later stages of the compiler (e.g. code generation) to interpret.  We basically
-want one object for each construct in the language, and the AST should closely
-model the language.  In Kaleidoscope, we have expressions, a prototype, and a
-function object.  We'll start with expressions first:</p>
-
-<div class="doc_code">
-<pre>
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-</pre>
-</div>
-
-<p>The code above shows the definition of the base ExprAST class and one
-subclass which we use for numeric literals.  The important thing to note about
-this code is that the Number variant captures the numeric value of the
-literal as an instance variable. This allows later phases of the compiler to
-know what the stored numeric value is.</p>
-
-<p>Right now we only create the AST,  so there are no useful functions on
-them.  It would be very easy to add a function to pretty print the code,
-for example.  Here are the other expression AST node definitions that we'll use
-in the basic form of the Kaleidoscope language:
-</p>
-
-<div class="doc_code">
-<pre>
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-</pre>
-</div>
-
-<p>This is all (intentionally) rather straight-forward: variables capture the
-variable name, binary operators capture their opcode (e.g. '+'), and calls
-capture a function name as well as a list of any argument expressions.  One thing
-that is nice about our AST is that it captures the language features without
-talking about the syntax of the language.  Note that there is no discussion about
-precedence of binary operators, lexical structure, etc.</p>
-
-<p>For our basic language, these are all of the expression nodes we'll define.
-Because it doesn't have conditional control flow, it isn't Turing-complete;
-we'll fix that in a later installment.  The two things we need next are a way
-to talk about the interface to a function, and a way to talk about functions
-themselves:</p>
-
-<div class="doc_code">
-<pre>
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</div>
-
-<p>In Kaleidoscope, functions are typed with just a count of their arguments.
-Since all values are double precision floating point, the type of each argument
-doesn't need to be stored anywhere.  In a more aggressive and realistic
-language, the "expr" variants would probably have a type field.</p>
-
-<p>With this scaffolding, we can now talk about parsing expressions and function
-bodies in Kaleidoscope.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbasics">Parser Basics</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we have an AST to build, we need to define the parser code to build
-it.  The idea here is that we want to parse something like "x+y" (which is
-returned as three tokens by the lexer) into an AST that could be generated with
-calls like this:</p>
-
-<div class="doc_code">
-<pre>
-  let x = Variable "x" in
-  let y = Variable "y" in
-  let result = Binary ('+', x, y) in
-  ...
-</pre>
-</div>
-
-<p>
-The error handling routines make use of the builtin <tt>Stream.Failure</tt> and
-<tt>Stream.Error</tt>s.  <tt>Stream.Failure</tt> is raised when the parser is
-unable to find any matching token in the first position of a pattern.
-<tt>Stream.Error</tt> is raised when the first token matches, but the rest do
-not.  The error recovery in our parser will not be the best and is not
-particular user-friendly, but it will be enough for our tutorial.  These
-exceptions make it easier to handle errors in routines that have various return
-types.</p>
-
-<p>With these basic types and exceptions, we can implement the first
-piece of our grammar: numeric literals.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserprimexprs">Basic Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We start with numeric literals, because they are the simplest to process.
-For each production in our grammar, we'll define a function which parses that
-production.  We call this class of expressions "primary" expressions, for
-reasons that will become more clear <a href="OCamlLangImpl6.html#unary">
-later in the tutorial</a>.  In order to parse an arbitrary primary expression,
-we need to determine what sort of expression it is.  For numeric literals, we
-have:</p>
-
-<div class="doc_code">
-<pre>
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-</pre>
-</div>
-
-<p>This routine is very simple: it expects to be called when the current token
-is a <tt>Token.Number</tt> token.  It takes the current number value, creates
-a <tt>Ast.Number</tt> node, advances the lexer to the next token, and finally
-returns.</p>
-
-<p>There are some interesting aspects to this.  The most important one is that
-this routine eats all of the tokens that correspond to the production and
-returns the lexer buffer with the next token (which is not part of the grammar
-production) ready to go.  This is a fairly standard way to go for recursive
-descent parsers.  For a better example, the parenthesis operator is defined like
-this:</p>
-
-<div class="doc_code">
-<pre>
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-</pre>
-</div>
-
-<p>This function illustrates a number of interesting things about the
-parser:</p>
-
-<p>
-1) It shows how we use the <tt>Stream.Error</tt> exception.  When called, this
-function expects that the current token is a '(' token, but after parsing the
-subexpression, it is possible that there is no ')' waiting.  For example, if
-the user types in "(4 x" instead of "(4)", the parser should emit an error.
-Because errors can occur, the parser needs a way to indicate that they
-happened. In our parser, we use the camlp4 shortcut syntax <tt>token ?? "parse
-error"</tt>, where if the token before the <tt>??</tt> does not match, then
-<tt>Stream.Error "parse error"</tt> will be raised.</p>
-
-<p>2) Another interesting aspect of this function is that it uses recursion by
-calling <tt>Parser.parse_primary</tt> (we will soon see that
-<tt>Parser.parse_primary</tt> can call <tt>Parser.parse_primary</tt>).  This is
-powerful because it allows us to handle recursive grammars, and keeps each
-production very simple.  Note that parentheses do not cause construction of AST
-nodes themselves.  While we could do it this way, the most important role of
-parentheses are to guide the parser and provide grouping.  Once the parser
-constructs the AST, parentheses are not needed.</p>
-
-<p>The next simple production is for handling variable references and function
-calls:</p>
-
-<div class="doc_code">
-<pre>
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-</pre>
-</div>
-
-<p>This routine follows the same style as the other routines.  (It expects to be
-called if the current token is a <tt>Token.Ident</tt> token).  It also has
-recursion and error handling.  One interesting aspect of this is that it uses
-<em>look-ahead</em> to determine if the current identifier is a stand alone
-variable reference or if it is a function call expression.  It handles this by
-checking to see if the token after the identifier is a '(' token, constructing
-either a <tt>Ast.Variable</tt> or <tt>Ast.Call</tt> node as appropriate.
-</p>
-
-<p>We finish up by raising an exception if we received a token we didn't
-expect:</p>
-
-<div class="doc_code">
-<pre>
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-</pre>
-</div>
-
-<p>Now that basic expressions are handled, we need to handle binary expressions.
-They are a bit more complex.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbinops">Binary Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Binary expressions are significantly harder to parse because they are often
-ambiguous.  For example, when given the string "x+y*z", the parser can choose
-to parse it as either "(x+y)*z" or "x+(y*z)".  With common definitions from
-mathematics, we expect the later parse, because "*" (multiplication) has
-higher <em>precedence</em> than "+" (addition).</p>
-
-<p>There are many ways to handle this, but an elegant and efficient way is to
-use <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence
-Parsing</a>.  This parsing technique uses the precedence of binary operators to
-guide recursion.  To start with, we need a table of precedences:</p>
-
-<div class="doc_code">
-<pre>
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-...
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-  ...
-</pre>
-</div>
-
-<p>For the basic form of Kaleidoscope, we will only support 4 binary operators
-(this can obviously be extended by you, our brave and intrepid reader).  The
-<tt>Parser.precedence</tt> function returns the precedence for the current
-token, or -1 if the token is not a binary operator.  Having a <tt>Hashtbl.t</tt>
-makes it easy to add new operators and makes it clear that the algorithm doesn't
-depend on the specific operators involved, but it would be easy enough to
-eliminate the <tt>Hashtbl.t</tt> and do the comparisons in the
-<tt>Parser.precedence</tt> function.  (Or just use a fixed-size array).</p>
-
-<p>With the helper above defined, we can now start parsing binary expressions.
-The basic idea of operator precedence parsing is to break down an expression
-with potentially ambiguous binary operators into pieces.  Consider ,for example,
-the expression "a+b+(c+d)*e*f+g".  Operator precedence parsing considers this
-as a stream of primary expressions separated by binary operators.  As such,
-it will first parse the leading primary expression "a", then it will see the
-pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g].  Note that because parentheses
-are primary expressions, the binary expression parser doesn't need to worry
-about nested subexpressions like (c+d) at all.
-</p>
-
-<p>
-To start, an expression is a primary expression potentially followed by a
-sequence of [binop,primaryexpr] pairs:</p>
-
-<div class="doc_code">
-<pre>
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-</pre>
-</div>
-
-<p><tt>Parser.parse_bin_rhs</tt> is the function that parses the sequence of
-pairs for us.  It takes a precedence and a pointer to an expression for the part
-that has been parsed so far.   Note that "x" is a perfectly valid expression: As
-such, "binoprhs" is allowed to be empty, in which case it returns the expression
-that is passed into it. In our example above, the code passes the expression for
-"a" into <tt>Parser.parse_bin_rhs</tt> and the current token is "+".</p>
-
-<p>The precedence value passed into <tt>Parser.parse_bin_rhs</tt> indicates the
-<em>minimal operator precedence</em> that the function is allowed to eat.  For
-example, if the current pair stream is [+, x] and <tt>Parser.parse_bin_rhs</tt>
-is passed in a precedence of 40, it will not consume any tokens (because the
-precedence of '+' is only 20).  With this in mind, <tt>Parser.parse_bin_rhs</tt>
-starts with:</p>
-
-<div class="doc_code">
-<pre>
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-</pre>
-</div>
-
-<p>This code gets the precedence of the current token and checks to see if if is
-too low.  Because we defined invalid tokens to have a precedence of -1, this
-check implicitly knows that the pair-stream ends when the token stream runs out
-of binary operators.  If this check succeeds, we know that the token is a binary
-operator and that it will be included in this expression:</p>
-
-<div class="doc_code">
-<pre>
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-</pre>
-</div>
-
-<p>As such, this code eats (and remembers) the binary operator and then parses
-the primary expression that follows.  This builds up the whole pair, the first of
-which is [+, b] for the running example.</p>
-
-<p>Now that we parsed the left-hand side of an expression and one pair of the
-RHS sequence, we have to decide which way the expression associates.  In
-particular, we could have "(a+b) binop unparsed"  or "a + (b binop unparsed)".
-To determine this, we look ahead at "binop" to determine its precedence and
-compare it to BinOp's precedence (which is '+' in this case):</p>
-
-<div class="doc_code">
-<pre>
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-</pre>
-</div>
-
-<p>If the precedence of the binop to the right of "RHS" is lower or equal to the
-precedence of our current operator, then we know that the parentheses associate
-as "(a+b) binop ...".  In our example, the current operator is "+" and the next
-operator is "+", we know that they have the same precedence.  In this case we'll
-create the AST node for "a+b", and then continue parsing:</p>
-
-<div class="doc_code">
-<pre>
-          ... if body omitted ...
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-</pre>
-</div>
-
-<p>In our example above, this will turn "a+b+" into "(a+b)" and execute the next
-iteration of the loop, with "+" as the current token.  The code above will eat,
-remember, and parse "(c+d)" as the primary expression, which makes the
-current pair equal to [+, (c+d)].  It will then evaluate the 'if' conditional above with
-"*" as the binop to the right of the primary.  In this case, the precedence of "*" is
-higher than the precedence of "+" so the if condition will be entered.</p>
-
-<p>The critical question left here is "how can the if condition parse the right
-hand side in full"?  In particular, to build the AST correctly for our example,
-it needs to get all of "(c+d)*e*f" as the RHS expression variable.  The code to
-do this is surprisingly simple (code from the above two blocks duplicated for
-context):</p>
-
-<div class="doc_code">
-<pre>
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              if token_prec &lt; precedence c2
-              then <b>parse_bin_rhs (token_prec + 1) rhs stream</b>
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-</pre>
-</div>
-
-<p>At this point, we know that the binary operator to the RHS of our primary
-has higher precedence than the binop we are currently parsing.  As such, we know
-that any sequence of pairs whose operators are all higher precedence than "+"
-should be parsed together and returned as "RHS".  To do this, we recursively
-invoke the <tt>Parser.parse_bin_rhs</tt> function specifying "token_prec+1" as
-the minimum precedence required for it to continue.  In our example above, this
-will cause it to return the AST node for "(c+d)*e*f" as RHS, which is then set
-as the RHS of the '+' expression.</p>
-
-<p>Finally, on the next iteration of the while loop, the "+g" piece is parsed
-and added to the AST.  With this little bit of code (14 non-trivial lines), we
-correctly handle fully general binary expression parsing in a very elegant way.
-This was a whirlwind tour of this code, and it is somewhat subtle.  I recommend
-running through it with a few tough examples to see how it works.
-</p>
-
-<p>This wraps up handling of expressions.  At this point, we can point the
-parser at an arbitrary token stream and build an expression from it, stopping
-at the first token that is not part of the expression.  Next up we need to
-handle function definitions, etc.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parsertop">Parsing the Rest</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The next thing missing is handling of function prototypes.  In Kaleidoscope,
-these are used both for 'extern' function declarations as well as function body
-definitions.  The code to do this is straight-forward and not very interesting
-(once you've survived expressions):
-</p>
-
-<div class="doc_code">
-<pre>
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-</pre>
-</div>
-
-<p>Given this, a function definition is very simple, just a prototype plus
-an expression to implement the body:</p>
-
-<div class="doc_code">
-<pre>
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-</pre>
-</div>
-
-<p>In addition, we support 'extern' to declare functions like 'sin' and 'cos' as
-well as to support forward declaration of user functions.  These 'extern's are just
-prototypes with no body:</p>
-
-<div class="doc_code">
-<pre>
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</div>
-
-<p>Finally, we'll also let the user type in arbitrary top-level expressions and
-evaluate them on the fly.  We will handle this by defining anonymous nullary
-(zero argument) functions for them:</p>
-
-<div class="doc_code">
-<pre>
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-</pre>
-</div>
-
-<p>Now that we have all the pieces, let's build a little driver that will let us
-actually <em>execute</em> this code we've built!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">The Driver</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The driver for this simply invokes all of the parsing pieces with a top-level
-dispatch loop.  There isn't much interesting here, so I'll just include the
-top-level loop.  See <a href="#code">below</a> for full code in the "Top-Level
-Parsing" section.</p>
-
-<div class="doc_code">
-<pre>
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            ignore(Parser.parse_definition stream);
-            print_endline "parsed a function definition.";
-        | Token.Extern -&gt;
-            ignore(Parser.parse_extern stream);
-            print_endline "parsed an extern.";
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            ignore(Parser.parse_toplevel stream);
-            print_endline "parsed a top-level expr";
-        with Stream.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop stream
-</pre>
-</div>
-
-<p>The most interesting part of this is that we ignore top-level semicolons.
-Why is this, you ask?  The basic reason is that if you type "4 + 5" at the
-command line, the parser doesn't know whether that is the end of what you will type
-or not.  For example, on the next line you could type "def foo..." in which case
-4+5 is the end of a top-level expression.  Alternatively you could type "* 6",
-which would continue the expression.  Having top-level semicolons allows you to
-type "4+5;", and the parser will know you are done.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusions">Conclusions</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With just under 300 lines of commented code (240 lines of non-comment,
-non-blank code), we fully defined our minimal language, including a lexer,
-parser, and AST builder.  With this done, the executable will validate
-Kaleidoscope code and tell us if it is grammatically invalid.  For
-example, here is a sample interaction:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>./toy.byte</b>
-ready&gt; <b>def foo(x y) x+foo(y, 4.0);</b>
-Parsed a function definition.
-ready&gt; <b>def foo(x y) x+y y;</b>
-Parsed a function definition.
-Parsed a top-level expr
-ready&gt; <b>def foo(x y) x+y );</b>
-Parsed a function definition.
-Error: unknown token when expecting an expression
-ready&gt; <b>extern sin(a);</b>
-ready&gt; Parsed an extern
-ready&gt; <b>^D</b>
-$
-</pre>
-</div>
-
-<p>There is a lot of room for extension here.  You can define new AST nodes,
-extend the language in many ways, etc.  In the <a href="OCamlLangImpl3.html">
-next installment</a>, we will describe how to generate LLVM Intermediate
-Representation (IR) from the AST.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for this and the previous chapter.
-Note that it is fully self-contained: you don't need LLVM or any external
-libraries at all for this.  (Besides the ocaml standard libraries, of
-course.)  To build this, just compile with:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            ignore(Parser.parse_definition stream);
-            print_endline "parsed a function definition.";
-        | Token.Extern -&gt;
-            ignore(Parser.parse_extern stream);
-            print_endline "parsed an extern.";
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            ignore(Parser.parse_toplevel stream);
-            print_endline "parsed a top-level expr";
-        with Stream.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop stream;
-;;
-
-main ()
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl3.html">Next: Implementing Code Generation to LLVM IR</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-  <a href="mailto:erickt@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl2.rst b/docs/tutorial/OCamlLangImpl2.rst
new file mode 100644
index 000000000000..83a22ab22d4f
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl2.rst
@@ -0,0 +1,896 @@
+===========================================
+Kaleidoscope: Implementing a Parser and AST
+===========================================
+
+.. contents::
+   :local:
+
+Chapter 2 Introduction
+======================
+
+Welcome to Chapter 2 of the "`Implementing a language with LLVM in
+Objective Caml <index.html>`_" tutorial. This chapter shows you how to
+use the lexer, built in `Chapter 1 <OCamlLangImpl1.html>`_, to build a
+full `parser <http://en.wikipedia.org/wiki/Parsing>`_ for our
+Kaleidoscope language. Once we have a parser, we'll define and build an
+`Abstract Syntax
+Tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_ (AST).
+
+The parser we will build uses a combination of `Recursive Descent
+Parsing <http://en.wikipedia.org/wiki/Recursive_descent_parser>`_ and
+`Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_ to
+parse the Kaleidoscope language (the latter for binary expressions and
+the former for everything else). Before we get to parsing though, lets
+talk about the output of the parser: the Abstract Syntax Tree.
+
+The Abstract Syntax Tree (AST)
+==============================
+
+The AST for a program captures its behavior in such a way that it is
+easy for later stages of the compiler (e.g. code generation) to
+interpret. We basically want one object for each construct in the
+language, and the AST should closely model the language. In
+Kaleidoscope, we have expressions, a prototype, and a function object.
+We'll start with expressions first:
+
+.. code-block:: ocaml
+
+    (* expr - Base type for all expression nodes. *)
+    type expr =
+      (* variant for numeric literals like "1.0". *)
+      | Number of float
+
+The code above shows the definition of the base ExprAST class and one
+subclass which we use for numeric literals. The important thing to note
+about this code is that the Number variant captures the numeric value of
+the literal as an instance variable. This allows later phases of the
+compiler to know what the stored numeric value is.
+
+Right now we only create the AST, so there are no useful functions on
+them. It would be very easy to add a function to pretty print the code,
+for example. Here are the other expression AST node definitions that
+we'll use in the basic form of the Kaleidoscope language:
+
+.. code-block:: ocaml
+
+      (* variant for referencing a variable, like "a". *)
+      | Variable of string
+
+      (* variant for a binary operator. *)
+      | Binary of char * expr * expr
+
+      (* variant for function calls. *)
+      | Call of string * expr array
+
+This is all (intentionally) rather straight-forward: variables capture
+the variable name, binary operators capture their opcode (e.g. '+'), and
+calls capture a function name as well as a list of any argument
+expressions. One thing that is nice about our AST is that it captures
+the language features without talking about the syntax of the language.
+Note that there is no discussion about precedence of binary operators,
+lexical structure, etc.
+
+For our basic language, these are all of the expression nodes we'll
+define. Because it doesn't have conditional control flow, it isn't
+Turing-complete; we'll fix that in a later installment. The two things
+we need next are a way to talk about the interface to a function, and a
+way to talk about functions themselves:
+
+.. code-block:: ocaml
+
+    (* proto - This type represents the "prototype" for a function, which captures
+     * its name, and its argument names (thus implicitly the number of arguments the
+     * function takes). *)
+    type proto = Prototype of string * string array
+
+    (* func - This type represents a function definition itself. *)
+    type func = Function of proto * expr
+
+In Kaleidoscope, functions are typed with just a count of their
+arguments. Since all values are double precision floating point, the
+type of each argument doesn't need to be stored anywhere. In a more
+aggressive and realistic language, the "expr" variants would probably
+have a type field.
+
+With this scaffolding, we can now talk about parsing expressions and
+function bodies in Kaleidoscope.
+
+Parser Basics
+=============
+
+Now that we have an AST to build, we need to define the parser code to
+build it. The idea here is that we want to parse something like "x+y"
+(which is returned as three tokens by the lexer) into an AST that could
+be generated with calls like this:
+
+.. code-block:: ocaml
+
+      let x = Variable "x" in
+      let y = Variable "y" in
+      let result = Binary ('+', x, y) in
+      ...
+
+The error handling routines make use of the builtin ``Stream.Failure``
+and ``Stream.Error``s. ``Stream.Failure`` is raised when the parser is
+unable to find any matching token in the first position of a pattern.
+``Stream.Error`` is raised when the first token matches, but the rest do
+not. The error recovery in our parser will not be the best and is not
+particular user-friendly, but it will be enough for our tutorial. These
+exceptions make it easier to handle errors in routines that have various
+return types.
+
+With these basic types and exceptions, we can implement the first piece
+of our grammar: numeric literals.
+
+Basic Expression Parsing
+========================
+
+We start with numeric literals, because they are the simplest to
+process. For each production in our grammar, we'll define a function
+which parses that production. We call this class of expressions
+"primary" expressions, for reasons that will become more clear `later in
+the tutorial <OCamlLangImpl6.html#unary>`_. In order to parse an
+arbitrary primary expression, we need to determine what sort of
+expression it is. For numeric literals, we have:
+
+.. code-block:: ocaml
+
+    (* primary
+     *   ::= identifier
+     *   ::= numberexpr
+     *   ::= parenexpr *)
+    parse_primary = parser
+      (* numberexpr ::= number *)
+      | [< 'Token.Number n >] -> Ast.Number n
+
+This routine is very simple: it expects to be called when the current
+token is a ``Token.Number`` token. It takes the current number value,
+creates a ``Ast.Number`` node, advances the lexer to the next token, and
+finally returns.
+
+There are some interesting aspects to this. The most important one is
+that this routine eats all of the tokens that correspond to the
+production and returns the lexer buffer with the next token (which is
+not part of the grammar production) ready to go. This is a fairly
+standard way to go for recursive descent parsers. For a better example,
+the parenthesis operator is defined like this:
+
+.. code-block:: ocaml
+
+      (* parenexpr ::= '(' expression ')' *)
+      | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+This function illustrates a number of interesting things about the
+parser:
+
+1) It shows how we use the ``Stream.Error`` exception. When called, this
+function expects that the current token is a '(' token, but after
+parsing the subexpression, it is possible that there is no ')' waiting.
+For example, if the user types in "(4 x" instead of "(4)", the parser
+should emit an error. Because errors can occur, the parser needs a way
+to indicate that they happened. In our parser, we use the camlp4
+shortcut syntax ``token ?? "parse error"``, where if the token before
+the ``??`` does not match, then ``Stream.Error "parse error"`` will be
+raised.
+
+2) Another interesting aspect of this function is that it uses recursion
+by calling ``Parser.parse_primary`` (we will soon see that
+``Parser.parse_primary`` can call ``Parser.parse_primary``). This is
+powerful because it allows us to handle recursive grammars, and keeps
+each production very simple. Note that parentheses do not cause
+construction of AST nodes themselves. While we could do it this way, the
+most important role of parentheses are to guide the parser and provide
+grouping. Once the parser constructs the AST, parentheses are not
+needed.
+
+The next simple production is for handling variable references and
+function calls:
+
+.. code-block:: ocaml
+
+      (* identifierexpr
+       *   ::= identifier
+       *   ::= identifier '(' argumentexpr ')' *)
+      | [< 'Token.Ident id; stream >] ->
+          let rec parse_args accumulator = parser
+            | [< e=parse_expr; stream >] ->
+                begin parser
+                  | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                  | [< >] -> e :: accumulator
+                end stream
+            | [< >] -> accumulator
+          in
+          let rec parse_ident id = parser
+            (* Call. *)
+            | [< 'Token.Kwd '(';
+                 args=parse_args [];
+                 'Token.Kwd ')' ?? "expected ')'">] ->
+                Ast.Call (id, Array.of_list (List.rev args))
+
+            (* Simple variable ref. *)
+            | [< >] -> Ast.Variable id
+          in
+          parse_ident id stream
+
+This routine follows the same style as the other routines. (It expects
+to be called if the current token is a ``Token.Ident`` token). It also
+has recursion and error handling. One interesting aspect of this is that
+it uses *look-ahead* to determine if the current identifier is a stand
+alone variable reference or if it is a function call expression. It
+handles this by checking to see if the token after the identifier is a
+'(' token, constructing either a ``Ast.Variable`` or ``Ast.Call`` node
+as appropriate.
+
+We finish up by raising an exception if we received a token we didn't
+expect:
+
+.. code-block:: ocaml
+
+      | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+Now that basic expressions are handled, we need to handle binary
+expressions. They are a bit more complex.
+
+Binary Expression Parsing
+=========================
+
+Binary expressions are significantly harder to parse because they are
+often ambiguous. For example, when given the string "x+y\*z", the parser
+can choose to parse it as either "(x+y)\*z" or "x+(y\*z)". With common
+definitions from mathematics, we expect the later parse, because "\*"
+(multiplication) has higher *precedence* than "+" (addition).
+
+There are many ways to handle this, but an elegant and efficient way is
+to use `Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_.
+This parsing technique uses the precedence of binary operators to guide
+recursion. To start with, we need a table of precedences:
+
+.. code-block:: ocaml
+
+    (* binop_precedence - This holds the precedence for each binary operator that is
+     * defined *)
+    let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+    (* precedence - Get the precedence of the pending binary operator token. *)
+    let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+    ...
+
+    let main () =
+      (* Install standard binary operators.
+       * 1 is the lowest precedence. *)
+      Hashtbl.add Parser.binop_precedence '<' 10;
+      Hashtbl.add Parser.binop_precedence '+' 20;
+      Hashtbl.add Parser.binop_precedence '-' 20;
+      Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+      ...
+
+For the basic form of Kaleidoscope, we will only support 4 binary
+operators (this can obviously be extended by you, our brave and intrepid
+reader). The ``Parser.precedence`` function returns the precedence for
+the current token, or -1 if the token is not a binary operator. Having a
+``Hashtbl.t`` makes it easy to add new operators and makes it clear that
+the algorithm doesn't depend on the specific operators involved, but it
+would be easy enough to eliminate the ``Hashtbl.t`` and do the
+comparisons in the ``Parser.precedence`` function. (Or just use a
+fixed-size array).
+
+With the helper above defined, we can now start parsing binary
+expressions. The basic idea of operator precedence parsing is to break
+down an expression with potentially ambiguous binary operators into
+pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g".
+Operator precedence parsing considers this as a stream of primary
+expressions separated by binary operators. As such, it will first parse
+the leading primary expression "a", then it will see the pairs [+, b]
+[+, (c+d)] [\*, e] [\*, f] and [+, g]. Note that because parentheses are
+primary expressions, the binary expression parser doesn't need to worry
+about nested subexpressions like (c+d) at all.
+
+To start, an expression is a primary expression potentially followed by
+a sequence of [binop,primaryexpr] pairs:
+
+.. code-block:: ocaml
+
+    (* expression
+     *   ::= primary binoprhs *)
+    and parse_expr = parser
+      | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+``Parser.parse_bin_rhs`` is the function that parses the sequence of
+pairs for us. It takes a precedence and a pointer to an expression for
+the part that has been parsed so far. Note that "x" is a perfectly valid
+expression: As such, "binoprhs" is allowed to be empty, in which case it
+returns the expression that is passed into it. In our example above, the
+code passes the expression for "a" into ``Parser.parse_bin_rhs`` and the
+current token is "+".
+
+The precedence value passed into ``Parser.parse_bin_rhs`` indicates the
+*minimal operator precedence* that the function is allowed to eat. For
+example, if the current pair stream is [+, x] and
+``Parser.parse_bin_rhs`` is passed in a precedence of 40, it will not
+consume any tokens (because the precedence of '+' is only 20). With this
+in mind, ``Parser.parse_bin_rhs`` starts with:
+
+.. code-block:: ocaml
+
+    (* binoprhs
+     *   ::= ('+' primary)* *)
+    and parse_bin_rhs expr_prec lhs stream =
+      match Stream.peek stream with
+      (* If this is a binop, find its precedence. *)
+      | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+          let token_prec = precedence c in
+
+          (* If this is a binop that binds at least as tightly as the current binop,
+           * consume it, otherwise we are done. *)
+          if token_prec < expr_prec then lhs else begin
+
+This code gets the precedence of the current token and checks to see if
+if is too low. Because we defined invalid tokens to have a precedence of
+-1, this check implicitly knows that the pair-stream ends when the token
+stream runs out of binary operators. If this check succeeds, we know
+that the token is a binary operator and that it will be included in this
+expression:
+
+.. code-block:: ocaml
+
+            (* Eat the binop. *)
+            Stream.junk stream;
+
+            (* Okay, we know this is a binop. *)
+            let rhs =
+              match Stream.peek stream with
+              | Some (Token.Kwd c2) ->
+
+As such, this code eats (and remembers) the binary operator and then
+parses the primary expression that follows. This builds up the whole
+pair, the first of which is [+, b] for the running example.
+
+Now that we parsed the left-hand side of an expression and one pair of
+the RHS sequence, we have to decide which way the expression associates.
+In particular, we could have "(a+b) binop unparsed" or "a + (b binop
+unparsed)". To determine this, we look ahead at "binop" to determine its
+precedence and compare it to BinOp's precedence (which is '+' in this
+case):
+
+.. code-block:: ocaml
+
+                  (* If BinOp binds less tightly with rhs than the operator after
+                   * rhs, let the pending operator take rhs as its lhs. *)
+                  let next_prec = precedence c2 in
+                  if token_prec < next_prec
+
+If the precedence of the binop to the right of "RHS" is lower or equal
+to the precedence of our current operator, then we know that the
+parentheses associate as "(a+b) binop ...". In our example, the current
+operator is "+" and the next operator is "+", we know that they have the
+same precedence. In this case we'll create the AST node for "a+b", and
+then continue parsing:
+
+.. code-block:: ocaml
+
+              ... if body omitted ...
+            in
+
+            (* Merge lhs/rhs. *)
+            let lhs = Ast.Binary (c, lhs, rhs) in
+            parse_bin_rhs expr_prec lhs stream
+          end
+
+In our example above, this will turn "a+b+" into "(a+b)" and execute the
+next iteration of the loop, with "+" as the current token. The code
+above will eat, remember, and parse "(c+d)" as the primary expression,
+which makes the current pair equal to [+, (c+d)]. It will then evaluate
+the 'if' conditional above with "\*" as the binop to the right of the
+primary. In this case, the precedence of "\*" is higher than the
+precedence of "+" so the if condition will be entered.
+
+The critical question left here is "how can the if condition parse the
+right hand side in full"? In particular, to build the AST correctly for
+our example, it needs to get all of "(c+d)\*e\*f" as the RHS expression
+variable. The code to do this is surprisingly simple (code from the
+above two blocks duplicated for context):
+
+.. code-block:: ocaml
+
+              match Stream.peek stream with
+              | Some (Token.Kwd c2) ->
+                  (* If BinOp binds less tightly with rhs than the operator after
+                   * rhs, let the pending operator take rhs as its lhs. *)
+                  if token_prec < precedence c2
+                  then parse_bin_rhs (token_prec + 1) rhs stream
+                  else rhs
+              | _ -> rhs
+            in
+
+            (* Merge lhs/rhs. *)
+            let lhs = Ast.Binary (c, lhs, rhs) in
+            parse_bin_rhs expr_prec lhs stream
+          end
+
+At this point, we know that the binary operator to the RHS of our
+primary has higher precedence than the binop we are currently parsing.
+As such, we know that any sequence of pairs whose operators are all
+higher precedence than "+" should be parsed together and returned as
+"RHS". To do this, we recursively invoke the ``Parser.parse_bin_rhs``
+function specifying "token\_prec+1" as the minimum precedence required
+for it to continue. In our example above, this will cause it to return
+the AST node for "(c+d)\*e\*f" as RHS, which is then set as the RHS of
+the '+' expression.
+
+Finally, on the next iteration of the while loop, the "+g" piece is
+parsed and added to the AST. With this little bit of code (14
+non-trivial lines), we correctly handle fully general binary expression
+parsing in a very elegant way. This was a whirlwind tour of this code,
+and it is somewhat subtle. I recommend running through it with a few
+tough examples to see how it works.
+
+This wraps up handling of expressions. At this point, we can point the
+parser at an arbitrary token stream and build an expression from it,
+stopping at the first token that is not part of the expression. Next up
+we need to handle function definitions, etc.
+
+Parsing the Rest
+================
+
+The next thing missing is handling of function prototypes. In
+Kaleidoscope, these are used both for 'extern' function declarations as
+well as function body definitions. The code to do this is
+straight-forward and not very interesting (once you've survived
+expressions):
+
+.. code-block:: ocaml
+
+    (* prototype
+     *   ::= id '(' id* ')' *)
+    let parse_prototype =
+      let rec parse_args accumulator = parser
+        | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+        | [< >] -> accumulator
+      in
+
+      parser
+      | [< 'Token.Ident id;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+           args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          (* success. *)
+          Ast.Prototype (id, Array.of_list (List.rev args))
+
+      | [< >] ->
+          raise (Stream.Error "expected function name in prototype")
+
+Given this, a function definition is very simple, just a prototype plus
+an expression to implement the body:
+
+.. code-block:: ocaml
+
+    (* definition ::= 'def' prototype expression *)
+    let parse_definition = parser
+      | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+          Ast.Function (p, e)
+
+In addition, we support 'extern' to declare functions like 'sin' and
+'cos' as well as to support forward declaration of user functions. These
+'extern's are just prototypes with no body:
+
+.. code-block:: ocaml
+
+    (*  external ::= 'extern' prototype *)
+    let parse_extern = parser
+      | [< 'Token.Extern; e=parse_prototype >] -> e
+
+Finally, we'll also let the user type in arbitrary top-level expressions
+and evaluate them on the fly. We will handle this by defining anonymous
+nullary (zero argument) functions for them:
+
+.. code-block:: ocaml
+
+    (* toplevelexpr ::= expression *)
+    let parse_toplevel = parser
+      | [< e=parse_expr >] ->
+          (* Make an anonymous proto. *)
+          Ast.Function (Ast.Prototype ("", [||]), e)
+
+Now that we have all the pieces, let's build a little driver that will
+let us actually *execute* this code we've built!
+
+The Driver
+==========
+
+The driver for this simply invokes all of the parsing pieces with a
+top-level dispatch loop. There isn't much interesting here, so I'll just
+include the top-level loop. See `below <#code>`_ for full code in the
+"Top-Level Parsing" section.
+
+.. code-block:: ocaml
+
+    (* top ::= definition | external | expression | ';' *)
+    let rec main_loop stream =
+      match Stream.peek stream with
+      | None -> ()
+
+      (* ignore top-level semicolons. *)
+      | Some (Token.Kwd ';') ->
+          Stream.junk stream;
+          main_loop stream
+
+      | Some token ->
+          begin
+            try match token with
+            | Token.Def ->
+                ignore(Parser.parse_definition stream);
+                print_endline "parsed a function definition.";
+            | Token.Extern ->
+                ignore(Parser.parse_extern stream);
+                print_endline "parsed an extern.";
+            | _ ->
+                (* Evaluate a top-level expression into an anonymous function. *)
+                ignore(Parser.parse_toplevel stream);
+                print_endline "parsed a top-level expr";
+            with Stream.Error s ->
+              (* Skip token for error recovery. *)
+              Stream.junk stream;
+              print_endline s;
+          end;
+          print_string "ready> "; flush stdout;
+          main_loop stream
+
+The most interesting part of this is that we ignore top-level
+semicolons. Why is this, you ask? The basic reason is that if you type
+"4 + 5" at the command line, the parser doesn't know whether that is the
+end of what you will type or not. For example, on the next line you
+could type "def foo..." in which case 4+5 is the end of a top-level
+expression. Alternatively you could type "\* 6", which would continue
+the expression. Having top-level semicolons allows you to type "4+5;",
+and the parser will know you are done.
+
+Conclusions
+===========
+
+With just under 300 lines of commented code (240 lines of non-comment,
+non-blank code), we fully defined our minimal language, including a
+lexer, parser, and AST builder. With this done, the executable will
+validate Kaleidoscope code and tell us if it is grammatically invalid.
+For example, here is a sample interaction:
+
+.. code-block:: bash
+
+    $ ./toy.byte
+    ready> def foo(x y) x+foo(y, 4.0);
+    Parsed a function definition.
+    ready> def foo(x y) x+y y;
+    Parsed a function definition.
+    Parsed a top-level expr
+    ready> def foo(x y) x+y );
+    Parsed a function definition.
+    Error: unknown token when expecting an expression
+    ready> extern sin(a);
+    ready> Parsed an extern
+    ready> ^D
+    $
+
+There is a lot of room for extension here. You can define new AST nodes,
+extend the language in many ways, etc. In the `next
+installment <OCamlLangImpl3.html>`_, we will describe how to generate
+LLVM Intermediate Representation (IR) from the AST.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for this and the previous chapter.
+Note that it is fully self-contained: you don't need LLVM or any
+external libraries at all for this. (Besides the ocaml standard
+libraries, of course.) To build this, just compile with:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    ignore(Parser.parse_definition stream);
+                    print_endline "parsed a function definition.";
+                | Token.Extern ->
+                    ignore(Parser.parse_extern stream);
+                    print_endline "parsed an extern.";
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    ignore(Parser.parse_toplevel stream);
+                    print_endline "parsed a top-level expr";
+                with Stream.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        let main () =
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop stream;
+        ;;
+
+        main ()
+
+`Next: Implementing Code Generation to LLVM IR <OCamlLangImpl3.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html
deleted file mode 100644
index e6105e85f4d2..000000000000
--- a/docs/tutorial/OCamlLangImpl3.html
+++ /dev/null
@@ -1,1093 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Code generation to LLVM IR</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 3
-  <ol>
-    <li><a href="#intro">Chapter 3 Introduction</a></li>
-    <li><a href="#basics">Code Generation Setup</a></li>
-    <li><a href="#exprs">Expression Code Generation</a></li>
-    <li><a href="#funcs">Function Code Generation</a></li>
-    <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer
-Support</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 3 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  This chapter shows you how to transform the <a
-href="OCamlLangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into
-LLVM IR.  This will teach you a little bit about how LLVM does things, as well
-as demonstrate how easy it is to use.  It's much more work to build a lexer and
-parser than it is to generate LLVM IR code. :)
-</p>
-
-<p><b>Please note</b>: the code in this chapter and later require LLVM 2.3 or
-LLVM SVN to work.  LLVM 2.2 and before will not work with it.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="basics">Code Generation Setup</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-In order to generate LLVM IR, we want some simple setup to get started.  First
-we define virtual code generation (codegen) methods in each AST class:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  | Ast.Number n -&gt; ...
-  | Ast.Variable name -&gt; ...
-</pre>
-</div>
-
-<p>The <tt>Codegen.codegen_expr</tt> function says to emit IR for that AST node
-along with all the things it depends on, and they all return an LLVM Value
-object.  "Value" is the class used to represent a "<a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
-of SSA values is that their value is computed as the related instruction
-executes, and it does not get a new value until (and if) the instruction
-re-executes.  In other words, there is no way to "change" an SSA value.  For
-more information, please read up on <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment</a> - the concepts are really quite natural once you grok them.</p>
-
-<p>The
-second thing we want is an "Error" exception like we used for the parser, which
-will be used to report errors found during code generation (for example, use of
-an undeclared parameter):</p>
-
-<div class="doc_code">
-<pre>
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-</pre>
-</div>
-
-<p>The static variables will be used during code generation.
-<tt>Codgen.the_module</tt> is the LLVM construct that contains all of the
-functions and global variables in a chunk of code.  In many ways, it is the
-top-level structure that the LLVM IR uses to contain code.</p>
-
-<p>The <tt>Codegen.builder</tt> object is a helper object that makes it easy to
-generate LLVM instructions.  Instances of the <a
-href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a>
-class keep track of the current place to insert instructions and has methods to
-create new instructions.</p>
-
-<p>The <tt>Codegen.named_values</tt> map keeps track of which values are defined
-in the current scope and what their LLVM representation is.  (In other words, it
-is a symbol table for the code).  In this form of Kaleidoscope, the only things
-that can be referenced are function parameters.  As such, function parameters
-will be in this map when generating code for their function body.</p>
-
-<p>
-With these basics in place, we can start talking about how to generate code for
-each expression.  Note that this assumes that the <tt>Codgen.builder</tt> has
-been set up to generate code <em>into</em> something.  For now, we'll assume
-that this has already been done, and we'll just use it to emit code.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="exprs">Expression Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Generating LLVM code for expression nodes is very straightforward: less
-than 30 lines of commented code for all four of our expression nodes.  First
-we'll do numeric literals:</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Number n -&gt; const_float double_type n
-</pre>
-</div>
-
-<p>In the LLVM IR, numeric constants are represented with the
-<tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
-internally (<tt>APFloat</tt> has the capability of holding floating point
-constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
-creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
-that constants are all uniqued together and shared.  For this reason, the API
-uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-</pre>
-</div>
-
-<p>References to variables are also quite simple using LLVM.  In the simple
-version of Kaleidoscope, we assume that the variable has already been emitted
-somewhere and its value is available.  In practice, the only values that can be
-in the <tt>Codegen.named_values</tt> map are function arguments.  This code
-simply checks to see that the specified name is in the map (if not, an unknown
-variable is being referenced) and returns the value for it.  In future chapters,
-we'll add support for <a href="LangImpl5.html#for">loop induction variables</a>
-in the symbol table, and for <a href="LangImpl7.html#localvars">local
-variables</a>.</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_fadd lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_fsub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_fmul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-</pre>
-</div>
-
-<p>Binary operators start to get more interesting.  The basic idea here is that
-we recursively emit code for the left-hand side of the expression, then the
-right-hand side, then we compute the result of the binary expression.  In this
-code, we do a simple switch on the opcode to create the right LLVM instruction.
-</p>
-
-<p>In the example above, the LLVM builder class is starting to show its value.
-IRBuilder knows where to insert the newly created instruction, all you have to
-do is specify what instruction to create (e.g. with <tt>Llvm.create_add</tt>),
-which operands to use (<tt>lhs</tt> and <tt>rhs</tt> here) and optionally
-provide a name for the generated instruction.</p>
-
-<p>One nice thing about LLVM is that the name is just a hint.  For instance, if
-the code above emits multiple "addtmp" variables, LLVM will automatically
-provide each one with an increasing, unique numeric suffix.  Local value names
-for instructions are purely optional, but it makes it much easier to read the
-IR dumps.</p>
-
-<p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
-strict rules: for example, the Left and Right operators of
-an <a href="../LangRef.html#i_add">add instruction</a> must have the same
-type, and the result type of the add must match the operand types.  Because
-all values in Kaleidoscope are doubles, this makes for very simple code for add,
-sub and mul.</p>
-
-<p>On the other hand, LLVM specifies that the <a
-href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
-(a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
-a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
-converts its input integer into a floating point value by treating the input
-as an unsigned value.  In contrast, if we used the <a
-href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '&lt;'
-operator would return 0.0 and -1.0, depending on the input value.</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-</pre>
-</div>
-
-<p>Code generation for function calls is quite straightforward with LLVM.  The
-code above initially does a function name lookup in the LLVM Module's symbol
-table.  Recall that the LLVM Module is the container that holds all of the
-functions we are JIT'ing.  By giving each function the same name as what the
-user specifies, we can use the LLVM symbol table to resolve function names for
-us.</p>
-
-<p>Once we have the function to call, we recursively codegen each argument that
-is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
-instruction</a>.  Note that LLVM uses the native C calling conventions by
-default, allowing these calls to also call into standard library functions like
-"sin" and "cos", with no additional effort.</p>
-
-<p>This wraps up our handling of the four basic expressions that we have so far
-in Kaleidoscope.  Feel free to go in and add some more.  For example, by
-browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
-several other interesting instructions that are really easy to plug into our
-basic framework.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="funcs">Function Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code generation for prototypes and functions must handle a number of
-details, which make their code less beautiful than expression code
-generation, but allows us to illustrate some important points.  First, lets
-talk about code generation for prototypes: they are used both for function
-bodies and external function declarations.  The code starts with:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-</pre>
-</div>
-
-<p>This code packs a lot of power into a few lines.  Note first that this
-function returns a "Function*" instead of a "Value*" (although at the moment
-they both are modeled by <tt>llvalue</tt> in ocaml).  Because a "prototype"
-really talks about the external interface for a function (not the value computed
-by an expression), it makes sense for it to return the LLVM Function it
-corresponds to when codegen'd.</p>
-
-<p>The call to <tt>Llvm.function_type</tt> creates the <tt>Llvm.llvalue</tt>
-that should be used for a given Prototype.  Since all function arguments in
-Kaleidoscope are of type double, the first line creates a vector of "N" LLVM
-double types.  It then uses the <tt>Llvm.function_type</tt> method to create a
-function type that takes "N" doubles as arguments, returns one double as a
-result, and that is not vararg (that uses the function
-<tt>Llvm.var_arg_function_type</tt>).  Note that Types in LLVM are uniqued just
-like <tt>Constant</tt>s are, so you don't "new" a type, you "get" it.</p>
-
-<p>The final line above checks if the function has already been defined in
-<tt>Codegen.the_module</tt>. If not, we will create it.</p>
-
-<div class="doc_code">
-<pre>
-        | None -&gt; declare_function name ft the_module
-</pre>
-</div>
-
-<p>This indicates the type and name to use, as well as which module to insert
-into.  By default we assume a function has
-<tt>Llvm.Linkage.ExternalLinkage</tt>.  "<a href="LangRef.html#linkage">external
-linkage</a>" means that the function may be defined outside the current module
-and/or that it is callable by functions outside the module.  The "<tt>name</tt>"
-passed in is the name the user specified: this name is registered in
-"<tt>Codegen.the_module</tt>"s symbol table, which is used by the function call
-code above.</p>
-
-<p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
-first, we want to allow 'extern'ing a function more than once, as long as the
-prototypes for the externs match (since all arguments have the same type, we
-just have to check that the number of arguments match).  Second, we want to
-allow 'extern'ing a function and then defining a body for it.  This is useful
-when defining mutually recursive functions.</p>
-
-<div class="doc_code">
-<pre>
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if Array.length (basic_blocks f) == 0 then () else
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if Array.length (params f) == Array.length args then () else
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-</pre>
-</div>
-
-<p>In order to verify the logic above, we first check to see if the pre-existing
-function is "empty".  In this case, empty means that it has no basic blocks in
-it, which means it has no body.  If it has no body, it is a forward
-declaration.  Since we don't allow anything after a full definition of the
-function, the code rejects this case.  If the previous reference to a function
-was an 'extern', we simply verify that the number of arguments for that
-definition and this one match up.  If not, we emit an error.</p>
-
-<div class="doc_code">
-<pre>
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-</pre>
-</div>
-
-<p>The last bit of code for prototypes loops over all of the arguments in the
-function, setting the name of the LLVM Argument objects to match, and registering
-the arguments in the <tt>Codegen.named_values</tt> map for future use by the
-<tt>Ast.Variable</tt> variant.  Once this is set up, it returns the Function
-object to the caller.  Note that we don't check for conflicting
-argument names here (e.g. "extern foo(a b a)").  Doing so would be very
-straight-forward with the mechanics we have already used above.</p>
-
-<div class="doc_code">
-<pre>
-let codegen_func = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-</pre>
-</div>
-
-<p>Code generation for function definitions starts out simply enough: we just
-codegen the prototype (Proto) and verify that it is ok.  We then clear out the
-<tt>Codegen.named_values</tt> map to make sure that there isn't anything in it
-from the last function we compiled.  Code generation of the prototype ensures
-that there is an LLVM Function object that is ready to go for us.</p>
-
-<div class="doc_code">
-<pre>
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-</pre>
-</div>
-
-<p>Now we get to the point where the <tt>Codegen.builder</tt> is set up.  The
-first line creates a new
-<a href="http://en.wikipedia.org/wiki/Basic_block">basic block</a> (named
-"entry"), which is inserted into <tt>the_function</tt>.  The second line then
-tells the builder that new instructions should be inserted into the end of the
-new basic block.  Basic blocks in LLVM are an important part of functions that
-define the <a
-href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
-Since we don't have any control flow, our functions will only contain one
-block at this point.  We'll fix this in <a href="OCamlLangImpl5.html">Chapter
-5</a> :).</p>
-
-<div class="doc_code">
-<pre>
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        the_function
-</pre>
-</div>
-
-<p>Once the insertion point is set up, we call the <tt>Codegen.codegen_func</tt>
-method for the root expression of the function.  If no error happens, this emits
-code to compute the expression into the entry block and returns the value that
-was computed.  Assuming no error, we then create an LLVM <a
-href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
-Once the function is built, we call
-<tt>Llvm_analysis.assert_valid_function</tt>, which is provided by LLVM.  This
-function does a variety of consistency checks on the generated code, to
-determine if our compiler is doing everything right.  Using this is important:
-it can catch a lot of bugs.  Once the function is finished and validated, we
-return it.</p>
-
-<div class="doc_code">
-<pre>
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</div>
-
-<p>The only piece left here is handling of the error case.  For simplicity, we
-handle this by merely deleting the function we produced with the
-<tt>Llvm.delete_function</tt> method.  This allows the user to redefine a
-function that they incorrectly typed in before: if we didn't delete it, it
-would live in the symbol table, with a body, preventing future redefinition.</p>
-
-<p>This code does have a bug, though.  Since the <tt>Codegen.codegen_proto</tt>
-can return a previously defined forward declaration, our code can actually delete
-a forward declaration.  There are a number of ways to fix this bug, see what you
-can come up with!  Here is a testcase:</p>
-
-<div class="doc_code">
-<pre>
-extern foo(a b);     # ok, defines foo.
-def foo(a b) c;      # error, 'c' is invalid.
-def bar() foo(1, 2); # error, unknown function "foo"
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">Driver Changes and Closing Thoughts</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-For now, code generation to LLVM doesn't really get us much, except that we can
-look at the pretty IR calls.  The sample code inserts calls to Codegen into the
-"<tt>Toplevel.main_loop</tt>", and then dumps out the LLVM IR.  This gives a
-nice way to look at the LLVM IR for simple functions.  For example:
-</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>4+5</b>;
-Read top-level expression:
-define double @""() {
-entry:
-        %addtmp = fadd double 4.000000e+00, 5.000000e+00
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>Note how the parser turns the top-level expression into anonymous functions
-for us.  This will be handy when we add <a href="OCamlLangImpl4.html#jit">JIT
-support</a> in the next chapter.  Also note that the code is very literally
-transcribed, no optimizations are being performed.  We will
-<a href="OCamlLangImpl4.html#trivialconstfold">add optimizations</a> explicitly
-in the next chapter.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def foo(a b) a*a + 2*a*b + b*b;</b>
-Read function definition:
-define double @foo(double %a, double %b) {
-entry:
-        %multmp = fmul double %a, %a
-        %multmp1 = fmul double 2.000000e+00, %a
-        %multmp2 = fmul double %multmp1, %b
-        %addtmp = fadd double %multmp, %multmp2
-        %multmp3 = fmul double %b, %b
-        %addtmp4 = fadd double %addtmp, %multmp3
-        ret double %addtmp4
-}
-</pre>
-</div>
-
-<p>This shows some simple arithmetic. Notice the striking similarity to the
-LLVM builder calls that we use to create the instructions.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
-Read function definition:
-define double @bar(double %a) {
-entry:
-        %calltmp = call double @foo(double %a, double 4.000000e+00)
-        %calltmp1 = call double @bar(double 3.133700e+04)
-        %addtmp = fadd double %calltmp, %calltmp1
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>This shows some function calls.  Note that this function will take a long
-time to execute if you call it.  In the future we'll add conditional control
-flow to actually make recursion useful :).</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern cos(x);</b>
-Read extern:
-declare double @cos(double)
-
-ready&gt; <b>cos(1.234);</b>
-Read top-level expression:
-define double @""() {
-entry:
-        %calltmp = call double @cos(double 1.234000e+00)
-        ret double %calltmp
-}
-</pre>
-</div>
-
-<p>This shows an extern for the libm "cos" function, and a call to it.</p>
-
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>^D</b>
-; ModuleID = 'my cool jit'
-
-define double @""() {
-entry:
-        %addtmp = fadd double 4.000000e+00, 5.000000e+00
-        ret double %addtmp
-}
-
-define double @foo(double %a, double %b) {
-entry:
-        %multmp = fmul double %a, %a
-        %multmp1 = fmul double 2.000000e+00, %a
-        %multmp2 = fmul double %multmp1, %b
-        %addtmp = fadd double %multmp, %multmp2
-        %multmp3 = fmul double %b, %b
-        %addtmp4 = fadd double %addtmp, %multmp3
-        ret double %addtmp4
-}
-
-define double @bar(double %a) {
-entry:
-        %calltmp = call double @foo(double %a, double 4.000000e+00)
-        %calltmp1 = call double @bar(double 3.133700e+04)
-        %addtmp = fadd double %calltmp, %calltmp1
-        ret double %addtmp
-}
-
-declare double @cos(double)
-
-define double @""() {
-entry:
-        %calltmp = call double @cos(double 1.234000e+00)
-        ret double %calltmp
-}
-</pre>
-</div>
-
-<p>When you quit the current demo, it dumps out the IR for the entire module
-generated.  Here you can see the big picture with all the functions referencing
-each other.</p>
-
-<p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
-describe how to <a href="OCamlLangImpl4.html">add JIT codegen and optimizer
-support</a> to this so we can actually start running code!</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM code generator.    Because this uses the LLVM libraries, we need to link
-them in.  To do this, we use the <a
-href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
-our makefile/command line about which options to use:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            dump_value (Codegen.codegen_func e);
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl4.html">Next: Adding JIT and Optimizer Support</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-05-03 00:46:36 +0200 (Thu, 03 May 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl3.rst b/docs/tutorial/OCamlLangImpl3.rst
new file mode 100644
index 000000000000..fd9f0e5cd3f4
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl3.rst
@@ -0,0 +1,961 @@
+========================================
+Kaleidoscope: Code generation to LLVM IR
+========================================
+
+.. contents::
+   :local:
+
+Chapter 3 Introduction
+======================
+
+Welcome to Chapter 3 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. This chapter shows you how to transform
+the `Abstract Syntax Tree <OCamlLangImpl2.html>`_, built in Chapter 2,
+into LLVM IR. This will teach you a little bit about how LLVM does
+things, as well as demonstrate how easy it is to use. It's much more
+work to build a lexer and parser than it is to generate LLVM IR code. :)
+
+**Please note**: the code in this chapter and later require LLVM 2.3 or
+LLVM SVN to work. LLVM 2.2 and before will not work with it.
+
+Code Generation Setup
+=====================
+
+In order to generate LLVM IR, we want some simple setup to get started.
+First we define virtual code generation (codegen) methods in each AST
+class:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      | Ast.Number n -> ...
+      | Ast.Variable name -> ...
+
+The ``Codegen.codegen_expr`` function says to emit IR for that AST node
+along with all the things it depends on, and they all return an LLVM
+Value object. "Value" is the class used to represent a "`Static Single
+Assignment
+(SSA) <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+register" or "SSA value" in LLVM. The most distinct aspect of SSA values
+is that their value is computed as the related instruction executes, and
+it does not get a new value until (and if) the instruction re-executes.
+In other words, there is no way to "change" an SSA value. For more
+information, please read up on `Static Single
+Assignment <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+- the concepts are really quite natural once you grok them.
+
+The second thing we want is an "Error" exception like we used for the
+parser, which will be used to report errors found during code generation
+(for example, use of an undeclared parameter):
+
+.. code-block:: ocaml
+
+    exception Error of string
+
+    let context = global_context ()
+    let the_module = create_module context "my cool jit"
+    let builder = builder context
+    let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+    let double_type = double_type context
+
+The static variables will be used during code generation.
+``Codgen.the_module`` is the LLVM construct that contains all of the
+functions and global variables in a chunk of code. In many ways, it is
+the top-level structure that the LLVM IR uses to contain code.
+
+The ``Codegen.builder`` object is a helper object that makes it easy to
+generate LLVM instructions. Instances of the
+```IRBuilder`` <http://llvm.org/doxygen/IRBuilder_8h-source.html>`_
+class keep track of the current place to insert instructions and has
+methods to create new instructions.
+
+The ``Codegen.named_values`` map keeps track of which values are defined
+in the current scope and what their LLVM representation is. (In other
+words, it is a symbol table for the code). In this form of Kaleidoscope,
+the only things that can be referenced are function parameters. As such,
+function parameters will be in this map when generating code for their
+function body.
+
+With these basics in place, we can start talking about how to generate
+code for each expression. Note that this assumes that the
+``Codgen.builder`` has been set up to generate code *into* something.
+For now, we'll assume that this has already been done, and we'll just
+use it to emit code.
+
+Expression Code Generation
+==========================
+
+Generating LLVM code for expression nodes is very straightforward: less
+than 30 lines of commented code for all four of our expression nodes.
+First we'll do numeric literals:
+
+.. code-block:: ocaml
+
+      | Ast.Number n -> const_float double_type n
+
+In the LLVM IR, numeric constants are represented with the
+``ConstantFP`` class, which holds the numeric value in an ``APFloat``
+internally (``APFloat`` has the capability of holding floating point
+constants of Arbitrary Precision). This code basically just creates
+and returns a ``ConstantFP``. Note that in the LLVM IR that constants
+are all uniqued together and shared. For this reason, the API uses "the
+foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".
+
+.. code-block:: ocaml
+
+      | Ast.Variable name ->
+          (try Hashtbl.find named_values name with
+            | Not_found -> raise (Error "unknown variable name"))
+
+References to variables are also quite simple using LLVM. In the simple
+version of Kaleidoscope, we assume that the variable has already been
+emitted somewhere and its value is available. In practice, the only
+values that can be in the ``Codegen.named_values`` map are function
+arguments. This code simply checks to see that the specified name is in
+the map (if not, an unknown variable is being referenced) and returns
+the value for it. In future chapters, we'll add support for `loop
+induction variables <LangImpl5.html#for>`_ in the symbol table, and for
+`local variables <LangImpl7.html#localvars>`_.
+
+.. code-block:: ocaml
+
+      | Ast.Binary (op, lhs, rhs) ->
+          let lhs_val = codegen_expr lhs in
+          let rhs_val = codegen_expr rhs in
+          begin
+            match op with
+            | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
+            | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
+            | '*' -> build_fmul lhs_val rhs_val "multmp" builder
+            | '<' ->
+                (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                build_uitofp i double_type "booltmp" builder
+            | _ -> raise (Error "invalid binary operator")
+          end
+
+Binary operators start to get more interesting. The basic idea here is
+that we recursively emit code for the left-hand side of the expression,
+then the right-hand side, then we compute the result of the binary
+expression. In this code, we do a simple switch on the opcode to create
+the right LLVM instruction.
+
+In the example above, the LLVM builder class is starting to show its
+value. IRBuilder knows where to insert the newly created instruction,
+all you have to do is specify what instruction to create (e.g. with
+``Llvm.create_add``), which operands to use (``lhs`` and ``rhs`` here)
+and optionally provide a name for the generated instruction.
+
+One nice thing about LLVM is that the name is just a hint. For instance,
+if the code above emits multiple "addtmp" variables, LLVM will
+automatically provide each one with an increasing, unique numeric
+suffix. Local value names for instructions are purely optional, but it
+makes it much easier to read the IR dumps.
+
+`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict
+rules: for example, the Left and Right operators of an `add
+instruction <../LangRef.html#i_add>`_ must have the same type, and the
+result type of the add must match the operand types. Because all values
+in Kaleidoscope are doubles, this makes for very simple code for add,
+sub and mul.
+
+On the other hand, LLVM specifies that the `fcmp
+instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a
+one bit integer). The problem with this is that Kaleidoscope wants the
+value to be a 0.0 or 1.0 value. In order to get these semantics, we
+combine the fcmp instruction with a `uitofp
+instruction <../LangRef.html#i_uitofp>`_. This instruction converts its
+input integer into a floating point value by treating the input as an
+unsigned value. In contrast, if we used the `sitofp
+instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator
+would return 0.0 and -1.0, depending on the input value.
+
+.. code-block:: ocaml
+
+      | Ast.Call (callee, args) ->
+          (* Look up the name in the module table. *)
+          let callee =
+            match lookup_function callee the_module with
+            | Some callee -> callee
+            | None -> raise (Error "unknown function referenced")
+          in
+          let params = params callee in
+
+          (* If argument mismatch error. *)
+          if Array.length params == Array.length args then () else
+            raise (Error "incorrect # arguments passed");
+          let args = Array.map codegen_expr args in
+          build_call callee args "calltmp" builder
+
+Code generation for function calls is quite straightforward with LLVM.
+The code above initially does a function name lookup in the LLVM
+Module's symbol table. Recall that the LLVM Module is the container that
+holds all of the functions we are JIT'ing. By giving each function the
+same name as what the user specifies, we can use the LLVM symbol table
+to resolve function names for us.
+
+Once we have the function to call, we recursively codegen each argument
+that is to be passed in, and create an LLVM `call
+instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C
+calling conventions by default, allowing these calls to also call into
+standard library functions like "sin" and "cos", with no additional
+effort.
+
+This wraps up our handling of the four basic expressions that we have so
+far in Kaleidoscope. Feel free to go in and add some more. For example,
+by browsing the `LLVM language reference <../LangRef.html>`_ you'll find
+several other interesting instructions that are really easy to plug into
+our basic framework.
+
+Function Code Generation
+========================
+
+Code generation for prototypes and functions must handle a number of
+details, which make their code less beautiful than expression code
+generation, but allows us to illustrate some important points. First,
+lets talk about code generation for prototypes: they are used both for
+function bodies and external function declarations. The code starts
+with:
+
+.. code-block:: ocaml
+
+    let codegen_proto = function
+      | Ast.Prototype (name, args) ->
+          (* Make the function type: double(double,double) etc. *)
+          let doubles = Array.make (Array.length args) double_type in
+          let ft = function_type double_type doubles in
+          let f =
+            match lookup_function name the_module with
+
+This code packs a lot of power into a few lines. Note first that this
+function returns a "Function\*" instead of a "Value\*" (although at the
+moment they both are modeled by ``llvalue`` in ocaml). Because a
+"prototype" really talks about the external interface for a function
+(not the value computed by an expression), it makes sense for it to
+return the LLVM Function it corresponds to when codegen'd.
+
+The call to ``Llvm.function_type`` creates the ``Llvm.llvalue`` that
+should be used for a given Prototype. Since all function arguments in
+Kaleidoscope are of type double, the first line creates a vector of "N"
+LLVM double types. It then uses the ``Llvm.function_type`` method to
+create a function type that takes "N" doubles as arguments, returns one
+double as a result, and that is not vararg (that uses the function
+``Llvm.var_arg_function_type``). Note that Types in LLVM are uniqued
+just like ``Constant``'s are, so you don't "new" a type, you "get" it.
+
+The final line above checks if the function has already been defined in
+``Codegen.the_module``. If not, we will create it.
+
+.. code-block:: ocaml
+
+            | None -> declare_function name ft the_module
+
+This indicates the type and name to use, as well as which module to
+insert into. By default we assume a function has
+``Llvm.Linkage.ExternalLinkage``. "`external
+linkage <LangRef.html#linkage>`_" means that the function may be defined
+outside the current module and/or that it is callable by functions
+outside the module. The "``name``" passed in is the name the user
+specified: this name is registered in "``Codegen.the_module``"s symbol
+table, which is used by the function call code above.
+
+In Kaleidoscope, I choose to allow redefinitions of functions in two
+cases: first, we want to allow 'extern'ing a function more than once, as
+long as the prototypes for the externs match (since all arguments have
+the same type, we just have to check that the number of arguments
+match). Second, we want to allow 'extern'ing a function and then
+defining a body for it. This is useful when defining mutually recursive
+functions.
+
+.. code-block:: ocaml
+
+            (* If 'f' conflicted, there was already something named 'name'. If it
+             * has a body, don't allow redefinition or reextern. *)
+            | Some f ->
+                (* If 'f' already has a body, reject this. *)
+                if Array.length (basic_blocks f) == 0 then () else
+                  raise (Error "redefinition of function");
+
+                (* If 'f' took a different number of arguments, reject. *)
+                if Array.length (params f) == Array.length args then () else
+                  raise (Error "redefinition of function with different # args");
+                f
+          in
+
+In order to verify the logic above, we first check to see if the
+pre-existing function is "empty". In this case, empty means that it has
+no basic blocks in it, which means it has no body. If it has no body, it
+is a forward declaration. Since we don't allow anything after a full
+definition of the function, the code rejects this case. If the previous
+reference to a function was an 'extern', we simply verify that the
+number of arguments for that definition and this one match up. If not,
+we emit an error.
+
+.. code-block:: ocaml
+
+          (* Set names for all arguments. *)
+          Array.iteri (fun i a ->
+            let n = args.(i) in
+            set_value_name n a;
+            Hashtbl.add named_values n a;
+          ) (params f);
+          f
+
+The last bit of code for prototypes loops over all of the arguments in
+the function, setting the name of the LLVM Argument objects to match,
+and registering the arguments in the ``Codegen.named_values`` map for
+future use by the ``Ast.Variable`` variant. Once this is set up, it
+returns the Function object to the caller. Note that we don't check for
+conflicting argument names here (e.g. "extern foo(a b a)"). Doing so
+would be very straight-forward with the mechanics we have already used
+above.
+
+.. code-block:: ocaml
+
+    let codegen_func = function
+      | Ast.Function (proto, body) ->
+          Hashtbl.clear named_values;
+          let the_function = codegen_proto proto in
+
+Code generation for function definitions starts out simply enough: we
+just codegen the prototype (Proto) and verify that it is ok. We then
+clear out the ``Codegen.named_values`` map to make sure that there isn't
+anything in it from the last function we compiled. Code generation of
+the prototype ensures that there is an LLVM Function object that is
+ready to go for us.
+
+.. code-block:: ocaml
+
+          (* Create a new basic block to start insertion into. *)
+          let bb = append_block context "entry" the_function in
+          position_at_end bb builder;
+
+          try
+            let ret_val = codegen_expr body in
+
+Now we get to the point where the ``Codegen.builder`` is set up. The
+first line creates a new `basic
+block <http://en.wikipedia.org/wiki/Basic_block>`_ (named "entry"),
+which is inserted into ``the_function``. The second line then tells the
+builder that new instructions should be inserted into the end of the new
+basic block. Basic blocks in LLVM are an important part of functions
+that define the `Control Flow
+Graph <http://en.wikipedia.org/wiki/Control_flow_graph>`_. Since we
+don't have any control flow, our functions will only contain one block
+at this point. We'll fix this in `Chapter 5 <OCamlLangImpl5.html>`_ :).
+
+.. code-block:: ocaml
+
+            let ret_val = codegen_expr body in
+
+            (* Finish off the function. *)
+            let _ = build_ret ret_val builder in
+
+            (* Validate the generated code, checking for consistency. *)
+            Llvm_analysis.assert_valid_function the_function;
+
+            the_function
+
+Once the insertion point is set up, we call the ``Codegen.codegen_func``
+method for the root expression of the function. If no error happens,
+this emits code to compute the expression into the entry block and
+returns the value that was computed. Assuming no error, we then create
+an LLVM `ret instruction <../LangRef.html#i_ret>`_, which completes the
+function. Once the function is built, we call
+``Llvm_analysis.assert_valid_function``, which is provided by LLVM. This
+function does a variety of consistency checks on the generated code, to
+determine if our compiler is doing everything right. Using this is
+important: it can catch a lot of bugs. Once the function is finished and
+validated, we return it.
+
+.. code-block:: ocaml
+
+          with e ->
+            delete_function the_function;
+            raise e
+
+The only piece left here is handling of the error case. For simplicity,
+we handle this by merely deleting the function we produced with the
+``Llvm.delete_function`` method. This allows the user to redefine a
+function that they incorrectly typed in before: if we didn't delete it,
+it would live in the symbol table, with a body, preventing future
+redefinition.
+
+This code does have a bug, though. Since the ``Codegen.codegen_proto``
+can return a previously defined forward declaration, our code can
+actually delete a forward declaration. There are a number of ways to fix
+this bug, see what you can come up with! Here is a testcase:
+
+::
+
+    extern foo(a b);     # ok, defines foo.
+    def foo(a b) c;      # error, 'c' is invalid.
+    def bar() foo(1, 2); # error, unknown function "foo"
+
+Driver Changes and Closing Thoughts
+===================================
+
+For now, code generation to LLVM doesn't really get us much, except that
+we can look at the pretty IR calls. The sample code inserts calls to
+Codegen into the "``Toplevel.main_loop``", and then dumps out the LLVM
+IR. This gives a nice way to look at the LLVM IR for simple functions.
+For example:
+
+::
+
+    ready> 4+5;
+    Read top-level expression:
+    define double @""() {
+    entry:
+            %addtmp = fadd double 4.000000e+00, 5.000000e+00
+            ret double %addtmp
+    }
+
+Note how the parser turns the top-level expression into anonymous
+functions for us. This will be handy when we add `JIT
+support <OCamlLangImpl4.html#jit>`_ in the next chapter. Also note that
+the code is very literally transcribed, no optimizations are being
+performed. We will `add
+optimizations <OCamlLangImpl4.html#trivialconstfold>`_ explicitly in the
+next chapter.
+
+::
+
+    ready> def foo(a b) a*a + 2*a*b + b*b;
+    Read function definition:
+    define double @foo(double %a, double %b) {
+    entry:
+            %multmp = fmul double %a, %a
+            %multmp1 = fmul double 2.000000e+00, %a
+            %multmp2 = fmul double %multmp1, %b
+            %addtmp = fadd double %multmp, %multmp2
+            %multmp3 = fmul double %b, %b
+            %addtmp4 = fadd double %addtmp, %multmp3
+            ret double %addtmp4
+    }
+
+This shows some simple arithmetic. Notice the striking similarity to the
+LLVM builder calls that we use to create the instructions.
+
+::
+
+    ready> def bar(a) foo(a, 4.0) + bar(31337);
+    Read function definition:
+    define double @bar(double %a) {
+    entry:
+            %calltmp = call double @foo(double %a, double 4.000000e+00)
+            %calltmp1 = call double @bar(double 3.133700e+04)
+            %addtmp = fadd double %calltmp, %calltmp1
+            ret double %addtmp
+    }
+
+This shows some function calls. Note that this function will take a long
+time to execute if you call it. In the future we'll add conditional
+control flow to actually make recursion useful :).
+
+::
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> cos(1.234);
+    Read top-level expression:
+    define double @""() {
+    entry:
+            %calltmp = call double @cos(double 1.234000e+00)
+            ret double %calltmp
+    }
+
+This shows an extern for the libm "cos" function, and a call to it.
+
+::
+
+    ready> ^D
+    ; ModuleID = 'my cool jit'
+
+    define double @""() {
+    entry:
+            %addtmp = fadd double 4.000000e+00, 5.000000e+00
+            ret double %addtmp
+    }
+
+    define double @foo(double %a, double %b) {
+    entry:
+            %multmp = fmul double %a, %a
+            %multmp1 = fmul double 2.000000e+00, %a
+            %multmp2 = fmul double %multmp1, %b
+            %addtmp = fadd double %multmp, %multmp2
+            %multmp3 = fmul double %b, %b
+            %addtmp4 = fadd double %addtmp, %multmp3
+            ret double %addtmp4
+    }
+
+    define double @bar(double %a) {
+    entry:
+            %calltmp = call double @foo(double %a, double 4.000000e+00)
+            %calltmp1 = call double @bar(double 3.133700e+04)
+            %addtmp = fadd double %calltmp, %calltmp1
+            ret double %addtmp
+    }
+
+    declare double @cos(double)
+
+    define double @""() {
+    entry:
+            %calltmp = call double @cos(double 1.234000e+00)
+            ret double %calltmp
+    }
+
+When you quit the current demo, it dumps out the IR for the entire
+module generated. Here you can see the big picture with all the
+functions referencing each other.
+
+This wraps up the third chapter of the Kaleidoscope tutorial. Up next,
+we'll describe how to `add JIT codegen and optimizer
+support <OCamlLangImpl4.html>`_ to this so we can actually start running
+code!
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM code generator. Because this uses the LLVM libraries, we need
+to link them in. To do this, we use the
+`llvm-config <http://llvm.org/cmds/llvm-config.html>`_ tool to inform
+our makefile/command line about which options to use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ -> raise (Error "invalid binary operator")
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    dump_value (Codegen.codegen_func e);
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        let main () =
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+`Next: Adding JIT and Optimizer Support <OCamlLangImpl4.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html
deleted file mode 100644
index d3cfd3d6736a..000000000000
--- a/docs/tutorial/OCamlLangImpl4.html
+++ /dev/null
@@ -1,1026 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Adding JIT and Optimizer Support</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Adding JIT and Optimizer Support</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 4
-  <ol>
-    <li><a href="#intro">Chapter 4 Introduction</a></li>
-    <li><a href="#trivialconstfold">Trivial Constant Folding</a></li>
-    <li><a href="#optimizerpasses">LLVM Optimization Passes</a></li>
-    <li><a href="#jit">Adding a JIT Compiler</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl5.html">Chapter 5</a>: Extending the Language: Control
-Flow</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 4 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
-language and added support for generating LLVM IR.  This chapter describes
-two new techniques: adding optimizer support to your language, and adding JIT
-compiler support.  These additions will demonstrate how to get nice, efficient code
-for the Kaleidoscope language.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="trivialconstfold">Trivial Constant Folding</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p><b>Note:</b> the default <tt>IRBuilder</tt> now always includes the constant 
-folding optimisations below.<p>
-
-<p>
-Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
-it does not produce wonderful code.  For example, when compiling simple code,
-we don't get obvious optimizations:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 1.000000e+00, 2.000000e+00
-        %addtmp1 = fadd double %addtmp, %x
-        ret double %addtmp1
-}
-</pre>
-</div>
-
-<p>This code is a very, very literal transcription of the AST built by parsing
-the input. As such, this transcription lacks optimizations like constant folding
-(we'd like to get "<tt>add x, 3.0</tt>" in the example above) as well as other
-more important optimizations.  Constant folding, in particular, is a very common
-and very important optimization: so much so that many language implementors
-implement constant folding support in their AST representation.</p>
-
-<p>With LLVM, you don't need this support in the AST.  Since all calls to build
-LLVM IR go through the LLVM builder, it would be nice if the builder itself
-checked to see if there was a constant folding opportunity when you call it.
-If so, it could just do the constant fold and return the constant instead of
-creating an instruction.  This is exactly what the <tt>LLVMFoldingBuilder</tt>
-class does.
-
-<p>All we did was switch from <tt>LLVMBuilder</tt> to
-<tt>LLVMFoldingBuilder</tt>.  Though we change no other code, we now have all of our
-instructions implicitly constant folded without us having to do anything
-about it.  For example, the input above now compiles to:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>Well, that was easy :).  In practice, we recommend always using
-<tt>LLVMFoldingBuilder</tt> when generating code like this.  It has no
-"syntactic overhead" for its use (you don't have to uglify your compiler with
-constant checks everywhere) and it can dramatically reduce the amount of
-LLVM IR that is generated in some cases (particular for languages with a macro
-preprocessor or that use a lot of constants).</p>
-
-<p>On the other hand, the <tt>LLVMFoldingBuilder</tt> is limited by the fact
-that it does all of its analysis inline with the code as it is built.  If you
-take a slightly more complex example:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready&gt; Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        %addtmp1 = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp1
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>In this case, the LHS and RHS of the multiplication are the same value.  We'd
-really like to see this generate "<tt>tmp = x+3; result = tmp*tmp;</tt>" instead
-of computing "<tt>x*3</tt>" twice.</p>
-
-<p>Unfortunately, no amount of local analysis will be able to detect and correct
-this.  This requires two transformations: reassociation of expressions (to
-make the add's lexically identical) and Common Subexpression Elimination (CSE)
-to  delete the redundant add instruction.  Fortunately, LLVM provides a broad
-range of optimizations that you can use, in the form of "passes".</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="optimizerpasses">LLVM Optimization Passes</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM provides many optimization passes, which do many different sorts of
-things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
-to the mistaken notion that one set of optimizations is right for all languages
-and for all situations.  LLVM allows a compiler implementor to make complete
-decisions about what optimizations to use, in which order, and in what
-situation.</p>
-
-<p>As a concrete example, LLVM supports both "whole module" passes, which look
-across as large of body of code as they can (often a whole file, but if run
-at link time, this can be a substantial portion of the whole program).  It also
-supports and includes "per-function" passes which just operate on a single
-function at a time, without looking at other functions.  For more information
-on passes and how they are run, see the <a href="../WritingAnLLVMPass.html">How
-to Write a Pass</a> document and the <a href="../Passes.html">List of LLVM
-Passes</a>.</p>
-
-<p>For Kaleidoscope, we are currently generating functions on the fly, one at
-a time, as the user types them in.  We aren't shooting for the ultimate
-optimization experience in this setting, but we also want to catch the easy and
-quick stuff where possible.  As such, we will choose to run a few per-function
-optimizations as the user types the function in.  If we wanted to make a "static
-Kaleidoscope compiler", we would use exactly the code we have now, except that
-we would defer running the optimizer until the entire file has been parsed.</p>
-
-<p>In order to get per-function optimizations going, we need to set up a
-<a href="../WritingAnLLVMPass.html#passmanager">Llvm.PassManager</a> to hold and
-organize the LLVM optimizations that we want to run.  Once we have that, we can
-add a set of optimizations to run.  The code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combining the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-</pre>
-</div>
-
-<p>The meat of the matter here, is the definition of "<tt>the_fpm</tt>".  It
-requires a pointer to the <tt>the_module</tt> to construct itself.  Once it is
-set up, we use a series of "add" calls to add a bunch of LLVM passes.  The
-first pass is basically boilerplate, it adds a pass so that later optimizations
-know how the data structures in the program are laid out.  The
-"<tt>the_execution_engine</tt>" variable is related to the JIT, which we will
-get to in the next section.</p>
-
-<p>In this case, we choose to add 4 optimization passes.  The passes we chose
-here are a pretty standard set of "cleanup" optimizations that are useful for
-a wide variety of code.  I won't delve into what they do but, believe me,
-they are a good starting place :).</p>
-
-<p>Once the <tt>Llvm.PassManager.</tt> is set up, we need to make use of it.
-We do this by running it after our newly created function is constructed (in
-<tt>Codegen.codegen_func</tt>), but before it is returned to the client:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_func the_fpm = function
-      ...
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  The <tt>the_fpm</tt>
-optimizes and updates the LLVM Function* in place, improving (hopefully) its
-body.  With this in place, we can try our test above again:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready&gt; Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>As expected, we now get our nicely optimized code, saving a floating point
-add instruction from every execution of this function.</p>
-
-<p>LLVM provides a wide variety of optimizations that can be used in certain
-circumstances.  Some <a href="../Passes.html">documentation about the various
-passes</a> is available, but it isn't very complete.  Another good source of
-ideas can come from looking at the passes that <tt>Clang</tt> runs to get
-started.  The "<tt>opt</tt>" tool allows you to experiment with passes from the
-command line, so you can see if they do anything.</p>
-
-<p>Now that we have reasonable code coming out of our front-end, lets talk about
-executing it!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="jit">Adding a JIT Compiler</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code that is available in LLVM IR can have a wide variety of tools
-applied to it.  For example, you can run optimizations on it (as we did above),
-you can dump it out in textual or binary forms, you can compile the code to an
-assembly file (.s) for some target, or you can JIT compile it.  The nice thing
-about the LLVM IR representation is that it is the "common currency" between
-many different parts of the compiler.
-</p>
-
-<p>In this section, we'll add JIT compiler support to our interpreter.  The
-basic idea that we want for Kaleidoscope is to have the user enter function
-bodies as they do now, but immediately evaluate the top-level expressions they
-type in.  For example, if they type in "1 + 2;", we should evaluate and print
-out 3.  If they define a function, they should be able to call it from the
-command line.</p>
-
-<p>In order to do this, we first declare and initialize the JIT.  This is done
-by adding a global variable and a call in <tt>main</tt>:</p>
-
-<div class="doc_code">
-<pre>
-...
-let main () =
-  ...
-  <b>(* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in</b>
-  ...
-</pre>
-</div>
-
-<p>This creates an abstract "Execution Engine" which can be either a JIT
-compiler or the LLVM interpreter.  LLVM will automatically pick a JIT compiler
-for you if one is available for your platform, otherwise it will fall back to
-the interpreter.</p>
-
-<p>Once the <tt>Llvm_executionengine.ExecutionEngine.t</tt> is created, the JIT
-is ready to be used.  There are a variety of APIs that are useful, but the
-simplest one is the "<tt>Llvm_executionengine.ExecutionEngine.run_function</tt>"
-function.  This method JIT compiles the specified LLVM Function and returns a
-function pointer to the generated machine code.  In our case, this means that we
-can change the code that parses a top-level expression to look like this:</p>
-
-<div class="doc_code">
-<pre>
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-</pre>
-</div>
-
-<p>Recall that we compile top-level expressions into a self-contained LLVM
-function that takes no arguments and returns the computed double.  Because the
-LLVM JIT compiler matches the native platform ABI, this means that you can just
-cast the result pointer to a function pointer of that type and call it directly.
-This means, there is no difference between JIT compiled code and native machine
-code that is statically linked into your application.</p>
-
-<p>With just these two changes, lets see how Kaleidoscope works now!</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>4+5;</b>
-define double @""() {
-entry:
-        ret double 9.000000e+00
-}
-
-<em>Evaluated to 9.000000</em>
-</pre>
-</div>
-
-<p>Well this looks like it is basically working.  The dump of the function
-shows the "no argument function that always returns double" that we synthesize
-for each top level expression that is typed in.  This demonstrates very basic
-functionality, but can we do more?</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def testfunc(x y) x + y*2; </b>
-Read function definition:
-define double @testfunc(double %x, double %y) {
-entry:
-        %multmp = fmul double %y, 2.000000e+00
-        %addtmp = fadd double %multmp, %x
-        ret double %addtmp
-}
-
-ready&gt; <b>testfunc(4, 10);</b>
-define double @""() {
-entry:
-        %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
-        ret double %calltmp
-}
-
-<em>Evaluated to 24.000000</em>
-</pre>
-</div>
-
-<p>This illustrates that we can now call user code, but there is something a bit
-subtle going on here.  Note that we only invoke the JIT on the anonymous
-functions that <em>call testfunc</em>, but we never invoked it
-on <em>testfunc</em> itself.  What actually happened here is that the JIT
-scanned for all non-JIT'd functions transitively called from the anonymous
-function and compiled all of them before returning
-from <tt>run_function</tt>.</p>
-
-<p>The JIT provides a number of other more advanced interfaces for things like
-freeing allocated machine code, rejit'ing functions to update them, etc.
-However, even with this simple code, we get some surprisingly powerful
-capabilities - check this out (I removed the dump of the anonymous functions,
-you should get the idea by now :) :</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern sin(x);</b>
-Read extern:
-declare double @sin(double)
-
-ready&gt; <b>extern cos(x);</b>
-Read extern:
-declare double @cos(double)
-
-ready&gt; <b>sin(1.0);</b>
-<em>Evaluated to 0.841471</em>
-
-ready&gt; <b>def foo(x) sin(x)*sin(x) + cos(x)*cos(x);</b>
-Read function definition:
-define double @foo(double %x) {
-entry:
-        %calltmp = call double @sin(double %x)
-        %multmp = fmul double %calltmp, %calltmp
-        %calltmp2 = call double @cos(double %x)
-        %multmp4 = fmul double %calltmp2, %calltmp2
-        %addtmp = fadd double %multmp, %multmp4
-        ret double %addtmp
-}
-
-ready&gt; <b>foo(4.0);</b>
-<em>Evaluated to 1.000000</em>
-</pre>
-</div>
-
-<p>Whoa, how does the JIT know about sin and cos?  The answer is surprisingly
-simple: in this example, the JIT started execution of a function and got to a
-function call.  It realized that the function was not yet JIT compiled and
-invoked the standard set of routines to resolve the function.  In this case,
-there is no body defined for the function, so the JIT ended up calling
-"<tt>dlsym("sin")</tt>" on the Kaleidoscope process itself.  Since
-"<tt>sin</tt>" is defined within the JIT's address space, it simply patches up
-calls in the module to call the libm version of <tt>sin</tt> directly.</p>
-
-<p>The LLVM JIT provides a number of interfaces (look in the
-<tt>llvm_executionengine.mli</tt> file) for controlling how unknown functions
-get resolved.  It allows you to establish explicit mappings between IR objects
-and addresses (useful for LLVM global variables that you want to map to static
-tables, for example), allows you to dynamically decide on the fly based on the
-function name, and even allows you to have the JIT compile functions lazily the
-first time they're called.</p>
-
-<p>One interesting application of this is that we can now extend the language
-by writing arbitrary C code to implement operations.  For example, if we add:
-</p>
-
-<div class="doc_code">
-<pre>
-/* putchard - putchar that takes a double and returns 0. */
-extern "C"
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</div>
-
-<p>Now we can produce simple output to the console by using things like:
-"<tt>extern putchard(x); putchard(120);</tt>", which prints a lowercase 'x' on
-the console (120 is the ASCII code for 'x').  Similar code could be used to
-implement file I/O, console input, and many other capabilities in
-Kaleidoscope.</p>
-
-<p>This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At
-this point, we can compile a non-Turing-complete programming language, optimize
-and JIT compile it in a user-driven way.  Next up we'll look into <a
-href="OCamlLangImpl5.html">extending the language with control flow
-constructs</a>, tackling some interesting LLVM IR issues along the way.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM JIT and optimizer.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl5.html">Next: Extending the language: control flow</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl4.rst b/docs/tutorial/OCamlLangImpl4.rst
new file mode 100644
index 000000000000..b13b2afa8883
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl4.rst
@@ -0,0 +1,915 @@
+==============================================
+Kaleidoscope: Adding JIT and Optimizer Support
+==============================================
+
+.. contents::
+   :local:
+
+Chapter 4 Introduction
+======================
+
+Welcome to Chapter 4 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Chapters 1-3 described the implementation
+of a simple language and added support for generating LLVM IR. This
+chapter describes two new techniques: adding optimizer support to your
+language, and adding JIT compiler support. These additions will
+demonstrate how to get nice, efficient code for the Kaleidoscope
+language.
+
+Trivial Constant Folding
+========================
+
+**Note:** the default ``IRBuilder`` now always includes the constant
+folding optimisations below.
+
+Our demonstration for Chapter 3 is elegant and easy to extend.
+Unfortunately, it does not produce wonderful code. For example, when
+compiling simple code, we don't get obvious optimizations:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 1.000000e+00, 2.000000e+00
+            %addtmp1 = fadd double %addtmp, %x
+            ret double %addtmp1
+    }
+
+This code is a very, very literal transcription of the AST built by
+parsing the input. As such, this transcription lacks optimizations like
+constant folding (we'd like to get "``add x, 3.0``" in the example
+above) as well as other more important optimizations. Constant folding,
+in particular, is a very common and very important optimization: so much
+so that many language implementors implement constant folding support in
+their AST representation.
+
+With LLVM, you don't need this support in the AST. Since all calls to
+build LLVM IR go through the LLVM builder, it would be nice if the
+builder itself checked to see if there was a constant folding
+opportunity when you call it. If so, it could just do the constant fold
+and return the constant instead of creating an instruction. This is
+exactly what the ``LLVMFoldingBuilder`` class does.
+
+All we did was switch from ``LLVMBuilder`` to ``LLVMFoldingBuilder``.
+Though we change no other code, we now have all of our instructions
+implicitly constant folded without us having to do anything about it.
+For example, the input above now compiles to:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            ret double %addtmp
+    }
+
+Well, that was easy :). In practice, we recommend always using
+``LLVMFoldingBuilder`` when generating code like this. It has no
+"syntactic overhead" for its use (you don't have to uglify your compiler
+with constant checks everywhere) and it can dramatically reduce the
+amount of LLVM IR that is generated in some cases (particular for
+languages with a macro preprocessor or that use a lot of constants).
+
+On the other hand, the ``LLVMFoldingBuilder`` is limited by the fact
+that it does all of its analysis inline with the code as it is built. If
+you take a slightly more complex example:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            %addtmp1 = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp1
+            ret double %multmp
+    }
+
+In this case, the LHS and RHS of the multiplication are the same value.
+We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``"
+instead of computing "``x*3``" twice.
+
+Unfortunately, no amount of local analysis will be able to detect and
+correct this. This requires two transformations: reassociation of
+expressions (to make the add's lexically identical) and Common
+Subexpression Elimination (CSE) to delete the redundant add instruction.
+Fortunately, LLVM provides a broad range of optimizations that you can
+use, in the form of "passes".
+
+LLVM Optimization Passes
+========================
+
+LLVM provides many optimization passes, which do many different sorts of
+things and have different tradeoffs. Unlike other systems, LLVM doesn't
+hold to the mistaken notion that one set of optimizations is right for
+all languages and for all situations. LLVM allows a compiler implementor
+to make complete decisions about what optimizations to use, in which
+order, and in what situation.
+
+As a concrete example, LLVM supports both "whole module" passes, which
+look across as large of body of code as they can (often a whole file,
+but if run at link time, this can be a substantial portion of the whole
+program). It also supports and includes "per-function" passes which just
+operate on a single function at a time, without looking at other
+functions. For more information on passes and how they are run, see the
+`How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the
+`List of LLVM Passes <../Passes.html>`_.
+
+For Kaleidoscope, we are currently generating functions on the fly, one
+at a time, as the user types them in. We aren't shooting for the
+ultimate optimization experience in this setting, but we also want to
+catch the easy and quick stuff where possible. As such, we will choose
+to run a few per-function optimizations as the user types the function
+in. If we wanted to make a "static Kaleidoscope compiler", we would use
+exactly the code we have now, except that we would defer running the
+optimizer until the entire file has been parsed.
+
+In order to get per-function optimizations going, we need to set up a
+`Llvm.PassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold and
+organize the LLVM optimizations that we want to run. Once we have that,
+we can add a set of optimizations to run. The code looks like this:
+
+.. code-block:: ocaml
+
+      (* Create the JIT. *)
+      let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+      let the_fpm = PassManager.create_function Codegen.the_module in
+
+      (* Set up the optimizer pipeline.  Start with registering info about how the
+       * target lays out data structures. *)
+      DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+      (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+      add_instruction_combining the_fpm;
+
+      (* reassociate expressions. *)
+      add_reassociation the_fpm;
+
+      (* Eliminate Common SubExpressions. *)
+      add_gvn the_fpm;
+
+      (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+      add_cfg_simplification the_fpm;
+
+      ignore (PassManager.initialize the_fpm);
+
+      (* Run the main "interpreter loop" now. *)
+      Toplevel.main_loop the_fpm the_execution_engine stream;
+
+The meat of the matter here, is the definition of "``the_fpm``". It
+requires a pointer to the ``the_module`` to construct itself. Once it is
+set up, we use a series of "add" calls to add a bunch of LLVM passes.
+The first pass is basically boilerplate, it adds a pass so that later
+optimizations know how the data structures in the program are laid out.
+The "``the_execution_engine``" variable is related to the JIT, which we
+will get to in the next section.
+
+In this case, we choose to add 4 optimization passes. The passes we
+chose here are a pretty standard set of "cleanup" optimizations that are
+useful for a wide variety of code. I won't delve into what they do but,
+believe me, they are a good starting place :).
+
+Once the ``Llvm.PassManager.`` is set up, we need to make use of it. We
+do this by running it after our newly created function is constructed
+(in ``Codegen.codegen_func``), but before it is returned to the client:
+
+.. code-block:: ocaml
+
+    let codegen_func the_fpm = function
+          ...
+          try
+            let ret_val = codegen_expr body in
+
+            (* Finish off the function. *)
+            let _ = build_ret ret_val builder in
+
+            (* Validate the generated code, checking for consistency. *)
+            Llvm_analysis.assert_valid_function the_function;
+
+            (* Optimize the function. *)
+            let _ = PassManager.run_function the_function the_fpm in
+
+            the_function
+
+As you can see, this is pretty straightforward. The ``the_fpm``
+optimizes and updates the LLVM Function\* in place, improving
+(hopefully) its body. With this in place, we can try our test above
+again:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp
+            ret double %multmp
+    }
+
+As expected, we now get our nicely optimized code, saving a floating
+point add instruction from every execution of this function.
+
+LLVM provides a wide variety of optimizations that can be used in
+certain circumstances. Some `documentation about the various
+passes <../Passes.html>`_ is available, but it isn't very complete.
+Another good source of ideas can come from looking at the passes that
+``Clang`` runs to get started. The "``opt``" tool allows you to
+experiment with passes from the command line, so you can see if they do
+anything.
+
+Now that we have reasonable code coming out of our front-end, lets talk
+about executing it!
+
+Adding a JIT Compiler
+=====================
+
+Code that is available in LLVM IR can have a wide variety of tools
+applied to it. For example, you can run optimizations on it (as we did
+above), you can dump it out in textual or binary forms, you can compile
+the code to an assembly file (.s) for some target, or you can JIT
+compile it. The nice thing about the LLVM IR representation is that it
+is the "common currency" between many different parts of the compiler.
+
+In this section, we'll add JIT compiler support to our interpreter. The
+basic idea that we want for Kaleidoscope is to have the user enter
+function bodies as they do now, but immediately evaluate the top-level
+expressions they type in. For example, if they type in "1 + 2;", we
+should evaluate and print out 3. If they define a function, they should
+be able to call it from the command line.
+
+In order to do this, we first declare and initialize the JIT. This is
+done by adding a global variable and a call in ``main``:
+
+.. code-block:: ocaml
+
+    ...
+    let main () =
+      ...
+      (* Create the JIT. *)
+      let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+      ...
+
+This creates an abstract "Execution Engine" which can be either a JIT
+compiler or the LLVM interpreter. LLVM will automatically pick a JIT
+compiler for you if one is available for your platform, otherwise it
+will fall back to the interpreter.
+
+Once the ``Llvm_executionengine.ExecutionEngine.t`` is created, the JIT
+is ready to be used. There are a variety of APIs that are useful, but
+the simplest one is the
+"``Llvm_executionengine.ExecutionEngine.run_function``" function. This
+method JIT compiles the specified LLVM Function and returns a function
+pointer to the generated machine code. In our case, this means that we
+can change the code that parses a top-level expression to look like
+this:
+
+.. code-block:: ocaml
+
+                (* Evaluate a top-level expression into an anonymous function. *)
+                let e = Parser.parse_toplevel stream in
+                print_endline "parsed a top-level expr";
+                let the_function = Codegen.codegen_func the_fpm e in
+                dump_value the_function;
+
+                (* JIT the function, returning a function pointer. *)
+                let result = ExecutionEngine.run_function the_function [||]
+                  the_execution_engine in
+
+                print_string "Evaluated to ";
+                print_float (GenericValue.as_float Codegen.double_type result);
+                print_newline ();
+
+Recall that we compile top-level expressions into a self-contained LLVM
+function that takes no arguments and returns the computed double.
+Because the LLVM JIT compiler matches the native platform ABI, this
+means that you can just cast the result pointer to a function pointer of
+that type and call it directly. This means, there is no difference
+between JIT compiled code and native machine code that is statically
+linked into your application.
+
+With just these two changes, lets see how Kaleidoscope works now!
+
+::
+
+    ready> 4+5;
+    define double @""() {
+    entry:
+            ret double 9.000000e+00
+    }
+
+    Evaluated to 9.000000
+
+Well this looks like it is basically working. The dump of the function
+shows the "no argument function that always returns double" that we
+synthesize for each top level expression that is typed in. This
+demonstrates very basic functionality, but can we do more?
+
+::
+
+    ready> def testfunc(x y) x + y*2;
+    Read function definition:
+    define double @testfunc(double %x, double %y) {
+    entry:
+            %multmp = fmul double %y, 2.000000e+00
+            %addtmp = fadd double %multmp, %x
+            ret double %addtmp
+    }
+
+    ready> testfunc(4, 10);
+    define double @""() {
+    entry:
+            %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+            ret double %calltmp
+    }
+
+    Evaluated to 24.000000
+
+This illustrates that we can now call user code, but there is something
+a bit subtle going on here. Note that we only invoke the JIT on the
+anonymous functions that *call testfunc*, but we never invoked it on
+*testfunc* itself. What actually happened here is that the JIT scanned
+for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning from
+``run_function``.
+
+The JIT provides a number of other more advanced interfaces for things
+like freeing allocated machine code, rejit'ing functions to update them,
+etc. However, even with this simple code, we get some surprisingly
+powerful capabilities - check this out (I removed the dump of the
+anonymous functions, you should get the idea by now :) :
+
+::
+
+    ready> extern sin(x);
+    Read extern:
+    declare double @sin(double)
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> sin(1.0);
+    Evaluated to 0.841471
+
+    ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
+    Read function definition:
+    define double @foo(double %x) {
+    entry:
+            %calltmp = call double @sin(double %x)
+            %multmp = fmul double %calltmp, %calltmp
+            %calltmp2 = call double @cos(double %x)
+            %multmp4 = fmul double %calltmp2, %calltmp2
+            %addtmp = fadd double %multmp, %multmp4
+            ret double %addtmp
+    }
+
+    ready> foo(4.0);
+    Evaluated to 1.000000
+
+Whoa, how does the JIT know about sin and cos? The answer is
+surprisingly simple: in this example, the JIT started execution of a
+function and got to a function call. It realized that the function was
+not yet JIT compiled and invoked the standard set of routines to resolve
+the function. In this case, there is no body defined for the function,
+so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
+process itself. Since "``sin``" is defined within the JIT's address
+space, it simply patches up calls in the module to call the libm version
+of ``sin`` directly.
+
+The LLVM JIT provides a number of interfaces (look in the
+``llvm_executionengine.mli`` file) for controlling how unknown functions
+get resolved. It allows you to establish explicit mappings between IR
+objects and addresses (useful for LLVM global variables that you want to
+map to static tables, for example), allows you to dynamically decide on
+the fly based on the function name, and even allows you to have the JIT
+compile functions lazily the first time they're called.
+
+One interesting application of this is that we can now extend the
+language by writing arbitrary C code to implement operations. For
+example, if we add:
+
+.. code-block:: c++
+
+    /* putchard - putchar that takes a double and returns 0. */
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+Now we can produce simple output to the console by using things like:
+"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
+on the console (120 is the ASCII code for 'x'). Similar code could be
+used to implement file I/O, console input, and many other capabilities
+in Kaleidoscope.
+
+This completes the JIT and optimizer chapter of the Kaleidoscope
+tutorial. At this point, we can compile a non-Turing-complete
+programming language, optimize and JIT compile it in a user-driven way.
+Next up we'll look into `extending the language with control flow
+constructs <OCamlLangImpl5.html>`_, tackling some interesting LLVM IR
+issues along the way.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM JIT and optimizer. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ -> raise (Error "invalid binary operator")
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+`Next: Extending the language: control flow <OCamlLangImpl5.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html
deleted file mode 100644
index 0a759ac66d67..000000000000
--- a/docs/tutorial/OCamlLangImpl5.html
+++ /dev/null
@@ -1,1560 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Control Flow</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Control Flow</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 5
-  <ol>
-    <li><a href="#intro">Chapter 5 Introduction</a></li>
-    <li><a href="#ifthen">If/Then/Else</a>
-    <ol>
-      <li><a href="#iflexer">Lexer Extensions</a></li>
-      <li><a href="#ifast">AST Extensions</a></li>
-      <li><a href="#ifparser">Parser Extensions</a></li>
-      <li><a href="#ifir">LLVM IR</a></li>
-      <li><a href="#ifcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#for">'for' Loop Expression</a>
-    <ol>
-      <li><a href="#forlexer">Lexer Extensions</a></li>
-      <li><a href="#forast">AST Extensions</a></li>
-      <li><a href="#forparser">Parser Extensions</a></li>
-      <li><a href="#forir">LLVM IR</a></li>
-      <li><a href="#forcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl6.html">Chapter 6</a>: Extending the Language:
-User-defined Operators</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 5 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
-Kaleidoscope language and included support for generating LLVM IR, followed by
-optimizations and a JIT compiler.  Unfortunately, as presented, Kaleidoscope is
-mostly useless: it has no control flow other than call and return.  This means
-that you can't have conditional branches in the code, significantly limiting its
-power.  In this episode of "build that compiler", we'll extend Kaleidoscope to
-have an if/then/else expression plus a simple 'for' loop.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ifthen">If/Then/Else</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Extending Kaleidoscope to support if/then/else is quite straightforward.  It
-basically requires adding lexer support for this "new" concept to the lexer,
-parser, AST, and LLVM code emitter.  This example is nice, because it shows how
-easy it is to "grow" a language over time, incrementally extending it as new
-ideas are discovered.</p>
-
-<p>Before we get going on "how" we add this extension, lets talk about "what" we
-want.  The basic idea is that we want to be able to write this sort of thing:
-</p>
-
-<div class="doc_code">
-<pre>
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2);
-</pre>
-</div>
-
-<p>In Kaleidoscope, every construct is an expression: there are no statements.
-As such, the if/then/else expression needs to return a value like any other.
-Since we're using a mostly functional form, we'll have it evaluate its
-conditional, then return the 'then' or 'else' value based on how the condition
-was resolved.  This is very similar to the C "?:" expression.</p>
-
-<p>The semantics of the if/then/else expression is that it evaluates the
-condition to a boolean equality value: 0.0 is considered to be false and
-everything else is considered to be true.
-If the condition is true, the first subexpression is evaluated and returned, if
-the condition is false, the second subexpression is evaluated and returned.
-Since Kaleidoscope allows side-effects, this behavior is important to nail down.
-</p>
-
-<p>Now that we know what we "want", lets break this down into its constituent
-pieces.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="iflexer">Lexer Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-
-<div>
-
-<p>The lexer extensions are straightforward.  First we add new variants
-for the relevant tokens:</p>
-
-<div class="doc_code">
-<pre>
-  (* control *)
-  | If | Then | Else | For | In
-</pre>
-</div>
-
-<p>Once we have that, we recognize the new keywords in the lexer. This is pretty simple
-stuff:</p>
-
-<div class="doc_code">
-<pre>
-      ...
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifast">AST Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>To represent the new expression we add a new AST variant for it:</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-</pre>
-</div>
-
-<p>The AST variant just has pointers to the various subexpressions.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifparser">Parser Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have the relevant tokens coming from the lexer and we have the
-AST node to build, our parsing logic is relatively straightforward.  First we
-define a new parsing function:</p>
-
-<div class="doc_code">
-<pre>
-let rec parse_primary = parser
-  ...
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-</pre>
-</div>
-
-<p>Next we hook it up as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-let rec parse_primary = parser
-  ...
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifir">LLVM IR for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have it parsing and building the AST, the final piece is adding
-LLVM code generation support.  This is the most interesting part of the
-if/then/else example, because this is where it starts to introduce new concepts.
-All of the code above has been thoroughly described in previous chapters.
-</p>
-
-<p>To motivate the code we want to produce, lets take a look at a simple
-example.  Consider:</p>
-
-<div class="doc_code">
-<pre>
-extern foo();
-extern bar();
-def baz(x) if x then foo() else bar();
-</pre>
-</div>
-
-<p>If you disable optimizations, the code you'll (soon) get from Kaleidoscope
-looks like this:</p>
-
-<div class="doc_code">
-<pre>
-declare double @foo()
-
-declare double @bar()
-
-define double @baz(double %x) {
-entry:
-  %ifcond = fcmp one double %x, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:    ; preds = %entry
-  %calltmp = call double @foo()
-  br label %ifcont
-
-else:    ; preds = %entry
-  %calltmp1 = call double @bar()
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>To visualize the control flow graph, you can use a nifty feature of the LLVM
-'<a href="http://llvm.org/cmds/opt.html">opt</a>' tool.  If you put this LLVM IR
-into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
-href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
-see this graph:</p>
-
-<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423"
-height="315"></div>
-
-<p>Another way to get this is to call "<tt>Llvm_analysis.view_function_cfg
-f</tt>" or "<tt>Llvm_analysis.view_function_cfg_only f</tt>" (where <tt>f</tt>
-is a "<tt>Function</tt>") either by inserting actual calls into the code and
-recompiling or by calling these in the debugger.  LLVM has many nice features
-for visualizing various graphs.</p>
-
-<p>Getting back to the generated code, it is fairly simple: the entry block
-evaluates the conditional expression ("x" in our case here) and compares the
-result to 0.0 with the "<tt><a href="../LangRef.html#i_fcmp">fcmp</a> one</tt>"
-instruction ('one' is "Ordered and Not Equal").  Based on the result of this
-expression, the code jumps to either the "then" or "else" blocks, which contain
-the expressions for the true/false cases.</p>
-
-<p>Once the then/else blocks are finished executing, they both branch back to the
-'ifcont' block to execute the code that happens after the if/then/else.  In this
-case the only thing left to do is to return to the caller of the function.  The
-question then becomes: how does the code know which expression to return?</p>
-
-<p>The answer to this question involves an important SSA operation: the
-<a href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Phi
-operation</a>.  If you're not familiar with SSA, <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">the wikipedia
-article</a> is a good introduction and there are various other introductions to
-it available on your favorite search engine.  The short version is that
-"execution" of the Phi operation requires "remembering" which block control came
-from.  The Phi operation takes on the value corresponding to the input control
-block.  In this case, if control comes in from the "then" block, it gets the
-value of "calltmp".  If control comes from the "else" block, it gets the value
-of "calltmp1".</p>
-
-<p>At this point, you are probably starting to think "Oh no! This means my
-simple and elegant front-end will have to start generating SSA form in order to
-use LLVM!".  Fortunately, this is not the case, and we strongly advise
-<em>not</em> implementing an SSA construction algorithm in your front-end
-unless there is an amazingly good reason to do so.  In practice, there are two
-sorts of values that float around in code written for your average imperative
-programming language that might need Phi nodes:</p>
-
-<ol>
-<li>Code that involves user variables: <tt>x = 1; x = x + 1; </tt></li>
-<li>Values that are implicit in the structure of your AST, such as the Phi node
-in this case.</li>
-</ol>
-
-<p>In <a href="OCamlLangImpl7.html">Chapter 7</a> of this tutorial ("mutable
-variables"), we'll talk about #1
-in depth.  For now, just believe me that you don't need SSA construction to
-handle this case.  For #2, you have the choice of using the techniques that we will
-describe for #1, or you can insert Phi nodes directly, if convenient.  In this
-case, it is really really easy to generate the Phi node, so we choose to do it
-directly.</p>
-
-<p>Okay, enough of the motivation and overview, lets generate code!</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifcodegen">Code Generation for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>In order to generate code for this, we implement the <tt>Codegen</tt> method
-for <tt>IfExprAST</tt>:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-</pre>
-</div>
-
-<p>This code is straightforward and similar to what we saw before.  We emit the
-expression for the condition, then compare that value to zero to get a truth
-value as a 1-bit (bool) value.</p>
-
-<div class="doc_code">
-<pre>
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-      position_at_end then_bb builder;
-</pre>
-</div>
-
-<p>
-As opposed to the <a href="LangImpl5.html">C++ tutorial</a>, we have to build
-our basic blocks bottom up since we can't have dangling BasicBlocks.  We start
-off by saving a pointer to the first block (which might not be the entry
-block), which we'll need to build a conditional branch later.  We do this by
-asking the <tt>builder</tt> for the current BasicBlock.  The fourth line
-gets the current Function object that is being built.  It gets this by the
-<tt>start_bb</tt> for its "parent" (the function it is currently embedded
-into).</p>
-
-<p>Once it has that, it creates one block.  It is automatically appended into
-the function's list of blocks.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-</pre>
-</div>
-
-<p>We move the builder to start inserting into the "then" block.  Strictly
-speaking, this call moves the insertion point to be at the end of the specified
-block.  However, since the "then" block is empty, it also starts out by
-inserting at the beginning of the block.  :)</p>
-
-<p>Once the insertion point is set, we recursively codegen the "then" expression
-from the AST.</p>
-
-<p>The final line here is quite subtle, but is very important.  The basic issue
-is that when we create the Phi node in the merge block, we need to set up the
-block/value pairs that indicate how the Phi will work.  Importantly, the Phi
-node expects to have an entry for each predecessor of the block in the CFG.  Why
-then, are we getting the current block when we just set it to ThenBB 5 lines
-above?  The problem is that the "Then" expression may actually itself change the
-block that the Builder is emitting into if, for example, it contains a nested
-"if/then/else" expression.  Because calling Codegen recursively could
-arbitrarily change the notion of the current block, we are required to get an
-up-to-date value for code that will set up the Phi node.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-</pre>
-</div>
-
-<p>Code generation for the 'else' block is basically identical to codegen for
-the 'then' block.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-</pre>
-</div>
-
-<p>The first two lines here are now familiar: the first adds the "merge" block
-to the Function object.  The second block changes the insertion point so that
-newly created code will go into the "merge" block.  Once that is done, we need
-to create the PHI node and set up the block/value pairs for the PHI.</p>
-
-<div class="doc_code">
-<pre>
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-</pre>
-</div>
-
-<p>Once the blocks are created, we can emit the conditional branch that chooses
-between them.  Note that creating new blocks does not implicitly affect the
-IRBuilder, so it is still inserting into the block that the condition
-went into.  This is why we needed to save the "start" block.</p>
-
-<div class="doc_code">
-<pre>
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-</pre>
-</div>
-
-<p>To finish off the blocks, we create an unconditional branch
-to the merge block.  One interesting (and very important) aspect of the LLVM IR
-is that it <a href="../LangRef.html#functionstructure">requires all basic blocks
-to be "terminated"</a> with a <a href="../LangRef.html#terminators">control flow
-instruction</a> such as return or branch.  This means that all control flow,
-<em>including fall throughs</em> must be made explicit in the LLVM IR.  If you
-violate this rule, the verifier will emit an error.
-
-<p>Finally, the CodeGen function returns the phi node as the value computed by
-the if/then/else expression.  In our example above, this returned value will
-feed into the code for the top-level function, which will create the return
-instruction.</p>
-
-<p>Overall, we now have the ability to execute conditional code in
-Kaleidoscope.  With this extension, Kaleidoscope is a fairly complete language
-that can calculate a wide variety of numeric functions.  Next up we'll add
-another useful expression that is familiar from non-functional languages...</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="for">'for' Loop Expression</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know how to add basic control flow constructs to the language,
-we have the tools to add more powerful things.  Lets add something more
-aggressive, a 'for' expression:</p>
-
-<div class="doc_code">
-<pre>
- extern putchard(char);
- def printstar(n)
-   for i = 1, i &lt; n, 1.0 in
-     putchard(42);  # ascii 42 = '*'
-
- # print 100 '*' characters
- printstar(100);
-</pre>
-</div>
-
-<p>This expression defines a new variable ("i" in this case) which iterates from
-a starting value, while the condition ("i &lt; n" in this case) is true,
-incrementing by an optional step value ("1.0" in this case).  If the step value
-is omitted, it defaults to 1.0.  While the loop is true, it executes its
-body expression.  Because we don't have anything better to return, we'll just
-define the loop as always returning 0.0.  In the future when we have mutable
-variables, it will get more useful.</p>
-
-<p>As before, lets talk about the changes that we need to Kaleidoscope to
-support this.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="forlexer">Lexer Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The lexer extensions are the same sort of thing as for if/then/else:</p>
-
-<div class="doc_code">
-<pre>
-  ... in Token.token ...
-  (* control *)
-  | If | Then | Else
-  <b>| For | In</b>
-
-  ... in Lexer.lex_ident...
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      <b>| "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]</b>
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forast">AST Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The AST variant is just as simple.  It basically boils down to capturing
-the variable name and the constituent expressions in the node.</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forparser">Parser Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The parser code is also fairly standard.  The only interesting thing here is
-handling of the optional step value.  The parser code handles it by checking to
-see if the second comma is present.  If not, it sets the step value to null in
-the AST node:</p>
-
-<div class="doc_code">
-<pre>
-let rec parse_primary = parser
-  ...
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forir">LLVM IR for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now we get to the good part: the LLVM IR we want to generate for this thing.
-With the simple example above, we get this LLVM IR (note that this dump is
-generated with optimizations disabled for clarity):
-</p>
-
-<div class="doc_code">
-<pre>
-declare double @putchard(double)
-
-define double @printstar(double %n) {
-entry:
-        ; initial value = 1.0 (inlined into phi)
-  br label %loop
-
-loop:    ; preds = %loop, %entry
-  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
-        ; body
-  %calltmp = call double @putchard(double 4.200000e+01)
-        ; increment
-  %nextvar = fadd double %i, 1.000000e+00
-
-        ; termination test
-  %cmptmp = fcmp ult double %i, %n
-  %booltmp = uitofp i1 %cmptmp to double
-  %loopcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %loopcond, label %loop, label %afterloop
-
-afterloop:    ; preds = %loop
-        ; loop always returns 0.0
-  ret double 0.000000e+00
-}
-</pre>
-</div>
-
-<p>This loop contains all the same constructs we saw before: a phi node, several
-expressions, and some basic blocks.  Lets see how this fits together.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forcodegen">Code Generation for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The first part of Codegen is very simple: we just output the start expression
-for the loop value:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-</pre>
-</div>
-
-<p>With this out of the way, the next step is to set up the LLVM basic block
-for the start of the loop body.  In the case above, the whole loop body is one
-block, but remember that the body code itself could consist of multiple blocks
-(e.g. if it contains an if/then/else or a for/in expression).</p>
-
-<div class="doc_code">
-<pre>
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-</pre>
-</div>
-
-<p>This code is similar to what we saw for if/then/else.  Because we will need
-it to create the Phi node, we remember the block that falls through into the
-loop.  Once we have that, we create the actual block that starts the loop and
-create an unconditional branch for the fall-through between the two blocks.</p>
-
-<div class="doc_code">
-<pre>
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-</pre>
-</div>
-
-<p>Now that the "preheader" for the loop is set up, we switch to emitting code
-for the loop body.  To begin with, we move the insertion point and create the
-PHI node for the loop induction variable.  Since we already know the incoming
-value for the starting value, we add it to the Phi node.  Note that the Phi will
-eventually get a second value for the backedge, but we can't set it up yet
-(because it doesn't exist!).</p>
-
-<div class="doc_code">
-<pre>
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-</pre>
-</div>
-
-<p>Now the code starts to get more interesting.  Our 'for' loop introduces a new
-variable to the symbol table.  This means that our symbol table can now contain
-either function arguments or loop variables.  To handle this, before we codegen
-the body of the loop, we add the loop variable as the current value for its
-name.  Note that it is possible that there is a variable of the same name in the
-outer scope.  It would be easy to make this an error (emit an error and return
-null if there is already an entry for VarName) but we choose to allow shadowing
-of variables.  In order to handle this correctly, we remember the Value that
-we are potentially shadowing in <tt>old_val</tt> (which will be None if there is
-no shadowed variable).</p>
-
-<p>Once the loop variable is set into the symbol table, the code recursively
-codegen's the body.  This allows the body to use the loop variable: any
-references to it will naturally find it in the symbol table.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-</pre>
-</div>
-
-<p>Now that the body is emitted, we compute the next value of the iteration
-variable by adding the step value, or 1.0 if it isn't present.
-'<tt>next_var</tt>' will be the value of the loop variable on the next iteration
-of the loop.</p>
-
-<div class="doc_code">
-<pre>
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-</pre>
-</div>
-
-<p>Finally, we evaluate the exit value of the loop, to determine whether the
-loop should exit.  This mirrors the condition evaluation for the if/then/else
-statement.</p>
-
-<div class="doc_code">
-<pre>
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-</pre>
-</div>
-
-<p>With the code for the body of the loop complete, we just need to finish up
-the control flow for it.  This code remembers the end block (for the phi node), then creates the block for the loop exit ("afterloop").  Based on the value of the
-exit condition, it creates a conditional branch that chooses between executing
-the loop again and exiting the loop.  Any future code is emitted in the
-"afterloop" block, so it sets the insertion position to it.</p>
-
-<div class="doc_code">
-<pre>
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-</pre>
-</div>
-
-<p>The final code handles various cleanups: now that we have the
-"<tt>next_var</tt>" value, we can add the incoming value to the loop PHI node.
-After that, we remove the loop variable from the symbol table, so that it isn't
-in scope after the for loop.  Finally, code generation of the for loop always
-returns 0.0, so that is what we return from <tt>Codegen.codegen_expr</tt>.</p>
-
-<p>With this, we conclude the "adding control flow to Kaleidoscope" chapter of
-the tutorial.  In this chapter we added two control flow constructs, and used
-them to motivate a couple of aspects of the LLVM IR that are important for
-front-end implementors to know.  In the next chapter of our saga, we will get
-a bit crazier and add <a href="OCamlLangImpl6.html">user-defined operators</a>
-to our poor innocent language.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl6.html">Next: Extending the language: user-defined
-operators</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst
new file mode 100644
index 000000000000..b8ae3c58ddff
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl5.rst
@@ -0,0 +1,1362 @@
+==================================================
+Kaleidoscope: Extending the Language: Control Flow
+==================================================
+
+.. contents::
+   :local:
+
+Chapter 5 Introduction
+======================
+
+Welcome to Chapter 5 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Parts 1-4 described the implementation of
+the simple Kaleidoscope language and included support for generating
+LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as
+presented, Kaleidoscope is mostly useless: it has no control flow other
+than call and return. This means that you can't have conditional
+branches in the code, significantly limiting its power. In this episode
+of "build that compiler", we'll extend Kaleidoscope to have an
+if/then/else expression plus a simple 'for' loop.
+
+If/Then/Else
+============
+
+Extending Kaleidoscope to support if/then/else is quite straightforward.
+It basically requires adding lexer support for this "new" concept to the
+lexer, parser, AST, and LLVM code emitter. This example is nice, because
+it shows how easy it is to "grow" a language over time, incrementally
+extending it as new ideas are discovered.
+
+Before we get going on "how" we add this extension, lets talk about
+"what" we want. The basic idea is that we want to be able to write this
+sort of thing:
+
+::
+
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+In Kaleidoscope, every construct is an expression: there are no
+statements. As such, the if/then/else expression needs to return a value
+like any other. Since we're using a mostly functional form, we'll have
+it evaluate its conditional, then return the 'then' or 'else' value
+based on how the condition was resolved. This is very similar to the C
+"?:" expression.
+
+The semantics of the if/then/else expression is that it evaluates the
+condition to a boolean equality value: 0.0 is considered to be false and
+everything else is considered to be true. If the condition is true, the
+first subexpression is evaluated and returned, if the condition is
+false, the second subexpression is evaluated and returned. Since
+Kaleidoscope allows side-effects, this behavior is important to nail
+down.
+
+Now that we know what we "want", lets break this down into its
+constituent pieces.
+
+Lexer Extensions for If/Then/Else
+---------------------------------
+
+The lexer extensions are straightforward. First we add new variants for
+the relevant tokens:
+
+.. code-block:: ocaml
+
+      (* control *)
+      | If | Then | Else | For | In
+
+Once we have that, we recognize the new keywords in the lexer. This is
+pretty simple stuff:
+
+.. code-block:: ocaml
+
+          ...
+          match Buffer.contents buffer with
+          | "def" -> [< 'Token.Def; stream >]
+          | "extern" -> [< 'Token.Extern; stream >]
+          | "if" -> [< 'Token.If; stream >]
+          | "then" -> [< 'Token.Then; stream >]
+          | "else" -> [< 'Token.Else; stream >]
+          | "for" -> [< 'Token.For; stream >]
+          | "in" -> [< 'Token.In; stream >]
+          | id -> [< 'Token.Ident id; stream >]
+
+AST Extensions for If/Then/Else
+-------------------------------
+
+To represent the new expression we add a new AST variant for it:
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for if/then/else. *)
+      | If of expr * expr * expr
+
+The AST variant just has pointers to the various subexpressions.
+
+Parser Extensions for If/Then/Else
+----------------------------------
+
+Now that we have the relevant tokens coming from the lexer and we have
+the AST node to build, our parsing logic is relatively straightforward.
+First we define a new parsing function:
+
+.. code-block:: ocaml
+
+    let rec parse_primary = parser
+      ...
+      (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+      | [< 'Token.If; c=parse_expr;
+           'Token.Then ?? "expected 'then'"; t=parse_expr;
+           'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+          Ast.If (c, t, e)
+
+Next we hook it up as a primary expression:
+
+.. code-block:: ocaml
+
+    let rec parse_primary = parser
+      ...
+      (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+      | [< 'Token.If; c=parse_expr;
+           'Token.Then ?? "expected 'then'"; t=parse_expr;
+           'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+          Ast.If (c, t, e)
+
+LLVM IR for If/Then/Else
+------------------------
+
+Now that we have it parsing and building the AST, the final piece is
+adding LLVM code generation support. This is the most interesting part
+of the if/then/else example, because this is where it starts to
+introduce new concepts. All of the code above has been thoroughly
+described in previous chapters.
+
+To motivate the code we want to produce, lets take a look at a simple
+example. Consider:
+
+::
+
+    extern foo();
+    extern bar();
+    def baz(x) if x then foo() else bar();
+
+If you disable optimizations, the code you'll (soon) get from
+Kaleidoscope looks like this:
+
+.. code-block:: llvm
+
+    declare double @foo()
+
+    declare double @bar()
+
+    define double @baz(double %x) {
+    entry:
+      %ifcond = fcmp one double %x, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:    ; preds = %entry
+      %calltmp = call double @foo()
+      br label %ifcont
+
+    else:    ; preds = %entry
+      %calltmp1 = call double @bar()
+      br label %ifcont
+
+    ifcont:    ; preds = %else, %then
+      %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+      ret double %iftmp
+    }
+
+To visualize the control flow graph, you can use a nifty feature of the
+LLVM '`opt <http://llvm.org/cmds/opt.html>`_' tool. If you put this LLVM
+IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a
+window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll
+see this graph:
+
+.. figure:: LangImpl5-cfg.png
+   :align: center
+   :alt: Example CFG
+
+   Example CFG
+
+Another way to get this is to call
+"``Llvm_analysis.view_function_cfg f``" or
+"``Llvm_analysis.view_function_cfg_only f``" (where ``f`` is a
+"``Function``") either by inserting actual calls into the code and
+recompiling or by calling these in the debugger. LLVM has many nice
+features for visualizing various graphs.
+
+Getting back to the generated code, it is fairly simple: the entry block
+evaluates the conditional expression ("x" in our case here) and compares
+the result to 0.0 with the "``fcmp one``" instruction ('one' is "Ordered
+and Not Equal"). Based on the result of this expression, the code jumps
+to either the "then" or "else" blocks, which contain the expressions for
+the true/false cases.
+
+Once the then/else blocks are finished executing, they both branch back
+to the 'ifcont' block to execute the code that happens after the
+if/then/else. In this case the only thing left to do is to return to the
+caller of the function. The question then becomes: how does the code
+know which expression to return?
+
+The answer to this question involves an important SSA operation: the
+`Phi
+operation <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+If you're not familiar with SSA, `the wikipedia
+article <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+is a good introduction and there are various other introductions to it
+available on your favorite search engine. The short version is that
+"execution" of the Phi operation requires "remembering" which block
+control came from. The Phi operation takes on the value corresponding to
+the input control block. In this case, if control comes in from the
+"then" block, it gets the value of "calltmp". If control comes from the
+"else" block, it gets the value of "calltmp1".
+
+At this point, you are probably starting to think "Oh no! This means my
+simple and elegant front-end will have to start generating SSA form in
+order to use LLVM!". Fortunately, this is not the case, and we strongly
+advise *not* implementing an SSA construction algorithm in your
+front-end unless there is an amazingly good reason to do so. In
+practice, there are two sorts of values that float around in code
+written for your average imperative programming language that might need
+Phi nodes:
+
+#. Code that involves user variables: ``x = 1; x = x + 1;``
+#. Values that are implicit in the structure of your AST, such as the
+   Phi node in this case.
+
+In `Chapter 7 <OCamlLangImpl7.html>`_ of this tutorial ("mutable
+variables"), we'll talk about #1 in depth. For now, just believe me that
+you don't need SSA construction to handle this case. For #2, you have
+the choice of using the techniques that we will describe for #1, or you
+can insert Phi nodes directly, if convenient. In this case, it is really
+really easy to generate the Phi node, so we choose to do it directly.
+
+Okay, enough of the motivation and overview, lets generate code!
+
+Code Generation for If/Then/Else
+--------------------------------
+
+In order to generate code for this, we implement the ``Codegen`` method
+for ``IfExprAST``:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.If (cond, then_, else_) ->
+          let cond = codegen_expr cond in
+
+          (* Convert condition to a bool by comparing equal to 0.0 *)
+          let zero = const_float double_type 0.0 in
+          let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+This code is straightforward and similar to what we saw before. We emit
+the expression for the condition, then compare that value to zero to get
+a truth value as a 1-bit (bool) value.
+
+.. code-block:: ocaml
+
+          (* Grab the first block so that we might later add the conditional branch
+           * to it at the end of the function. *)
+          let start_bb = insertion_block builder in
+          let the_function = block_parent start_bb in
+
+          let then_bb = append_block context "then" the_function in
+          position_at_end then_bb builder;
+
+As opposed to the `C++ tutorial <LangImpl5.html>`_, we have to build our
+basic blocks bottom up since we can't have dangling BasicBlocks. We
+start off by saving a pointer to the first block (which might not be the
+entry block), which we'll need to build a conditional branch later. We
+do this by asking the ``builder`` for the current BasicBlock. The fourth
+line gets the current Function object that is being built. It gets this
+by the ``start_bb`` for its "parent" (the function it is currently
+embedded into).
+
+Once it has that, it creates one block. It is automatically appended
+into the function's list of blocks.
+
+.. code-block:: ocaml
+
+          (* Emit 'then' value. *)
+          position_at_end then_bb builder;
+          let then_val = codegen_expr then_ in
+
+          (* Codegen of 'then' can change the current block, update then_bb for the
+           * phi. We create a new name because one is used for the phi node, and the
+           * other is used for the conditional branch. *)
+          let new_then_bb = insertion_block builder in
+
+We move the builder to start inserting into the "then" block. Strictly
+speaking, this call moves the insertion point to be at the end of the
+specified block. However, since the "then" block is empty, it also
+starts out by inserting at the beginning of the block. :)
+
+Once the insertion point is set, we recursively codegen the "then"
+expression from the AST.
+
+The final line here is quite subtle, but is very important. The basic
+issue is that when we create the Phi node in the merge block, we need to
+set up the block/value pairs that indicate how the Phi will work.
+Importantly, the Phi node expects to have an entry for each predecessor
+of the block in the CFG. Why then, are we getting the current block when
+we just set it to ThenBB 5 lines above? The problem is that the "Then"
+expression may actually itself change the block that the Builder is
+emitting into if, for example, it contains a nested "if/then/else"
+expression. Because calling Codegen recursively could arbitrarily change
+the notion of the current block, we are required to get an up-to-date
+value for code that will set up the Phi node.
+
+.. code-block:: ocaml
+
+          (* Emit 'else' value. *)
+          let else_bb = append_block context "else" the_function in
+          position_at_end else_bb builder;
+          let else_val = codegen_expr else_ in
+
+          (* Codegen of 'else' can change the current block, update else_bb for the
+           * phi. *)
+          let new_else_bb = insertion_block builder in
+
+Code generation for the 'else' block is basically identical to codegen
+for the 'then' block.
+
+.. code-block:: ocaml
+
+          (* Emit merge block. *)
+          let merge_bb = append_block context "ifcont" the_function in
+          position_at_end merge_bb builder;
+          let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+          let phi = build_phi incoming "iftmp" builder in
+
+The first two lines here are now familiar: the first adds the "merge"
+block to the Function object. The second block changes the insertion
+point so that newly created code will go into the "merge" block. Once
+that is done, we need to create the PHI node and set up the block/value
+pairs for the PHI.
+
+.. code-block:: ocaml
+
+          (* Return to the start block to add the conditional branch. *)
+          position_at_end start_bb builder;
+          ignore (build_cond_br cond_val then_bb else_bb builder);
+
+Once the blocks are created, we can emit the conditional branch that
+chooses between them. Note that creating new blocks does not implicitly
+affect the IRBuilder, so it is still inserting into the block that the
+condition went into. This is why we needed to save the "start" block.
+
+.. code-block:: ocaml
+
+          (* Set a unconditional branch at the end of the 'then' block and the
+           * 'else' block to the 'merge' block. *)
+          position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+          position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+          (* Finally, set the builder to the end of the merge block. *)
+          position_at_end merge_bb builder;
+
+          phi
+
+To finish off the blocks, we create an unconditional branch to the merge
+block. One interesting (and very important) aspect of the LLVM IR is
+that it `requires all basic blocks to be
+"terminated" <../LangRef.html#functionstructure>`_ with a `control flow
+instruction <../LangRef.html#terminators>`_ such as return or branch.
+This means that all control flow, *including fall throughs* must be made
+explicit in the LLVM IR. If you violate this rule, the verifier will
+emit an error.
+
+Finally, the CodeGen function returns the phi node as the value computed
+by the if/then/else expression. In our example above, this returned
+value will feed into the code for the top-level function, which will
+create the return instruction.
+
+Overall, we now have the ability to execute conditional code in
+Kaleidoscope. With this extension, Kaleidoscope is a fairly complete
+language that can calculate a wide variety of numeric functions. Next up
+we'll add another useful expression that is familiar from non-functional
+languages...
+
+'for' Loop Expression
+=====================
+
+Now that we know how to add basic control flow constructs to the
+language, we have the tools to add more powerful things. Lets add
+something more aggressive, a 'for' expression:
+
+::
+
+     extern putchard(char);
+     def printstar(n)
+       for i = 1, i < n, 1.0 in
+         putchard(42);  # ascii 42 = '*'
+
+     # print 100 '*' characters
+     printstar(100);
+
+This expression defines a new variable ("i" in this case) which iterates
+from a starting value, while the condition ("i < n" in this case) is
+true, incrementing by an optional step value ("1.0" in this case). If
+the step value is omitted, it defaults to 1.0. While the loop is true,
+it executes its body expression. Because we don't have anything better
+to return, we'll just define the loop as always returning 0.0. In the
+future when we have mutable variables, it will get more useful.
+
+As before, lets talk about the changes that we need to Kaleidoscope to
+support this.
+
+Lexer Extensions for the 'for' Loop
+-----------------------------------
+
+The lexer extensions are the same sort of thing as for if/then/else:
+
+.. code-block:: ocaml
+
+      ... in Token.token ...
+      (* control *)
+      | If | Then | Else
+      | For | In
+
+      ... in Lexer.lex_ident...
+          match Buffer.contents buffer with
+          | "def" -> [< 'Token.Def; stream >]
+          | "extern" -> [< 'Token.Extern; stream >]
+          | "if" -> [< 'Token.If; stream >]
+          | "then" -> [< 'Token.Then; stream >]
+          | "else" -> [< 'Token.Else; stream >]
+          | "for" -> [< 'Token.For; stream >]
+          | "in" -> [< 'Token.In; stream >]
+          | id -> [< 'Token.Ident id; stream >]
+
+AST Extensions for the 'for' Loop
+---------------------------------
+
+The AST variant is just as simple. It basically boils down to capturing
+the variable name and the constituent expressions in the node.
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for for/in. *)
+      | For of string * expr * expr * expr option * expr
+
+Parser Extensions for the 'for' Loop
+------------------------------------
+
+The parser code is also fairly standard. The only interesting thing here
+is handling of the optional step value. The parser code handles it by
+checking to see if the second comma is present. If not, it sets the step
+value to null in the AST node:
+
+.. code-block:: ocaml
+
+    let rec parse_primary = parser
+      ...
+      (* forexpr
+            ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+      | [< 'Token.For;
+           'Token.Ident id ?? "expected identifier after for";
+           'Token.Kwd '=' ?? "expected '=' after for";
+           stream >] ->
+          begin parser
+            | [<
+                 start=parse_expr;
+                 'Token.Kwd ',' ?? "expected ',' after for";
+                 end_=parse_expr;
+                 stream >] ->
+                let step =
+                  begin parser
+                  | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                  | [< >] -> None
+                  end stream
+                in
+                begin parser
+                | [< 'Token.In; body=parse_expr >] ->
+                    Ast.For (id, start, end_, step, body)
+                | [< >] ->
+                    raise (Stream.Error "expected 'in' after for")
+                end stream
+            | [< >] ->
+                raise (Stream.Error "expected '=' after for")
+          end stream
+
+LLVM IR for the 'for' Loop
+--------------------------
+
+Now we get to the good part: the LLVM IR we want to generate for this
+thing. With the simple example above, we get this LLVM IR (note that
+this dump is generated with optimizations disabled for clarity):
+
+.. code-block:: llvm
+
+    declare double @putchard(double)
+
+    define double @printstar(double %n) {
+    entry:
+            ; initial value = 1.0 (inlined into phi)
+      br label %loop
+
+    loop:    ; preds = %loop, %entry
+      %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+            ; body
+      %calltmp = call double @putchard(double 4.200000e+01)
+            ; increment
+      %nextvar = fadd double %i, 1.000000e+00
+
+            ; termination test
+      %cmptmp = fcmp ult double %i, %n
+      %booltmp = uitofp i1 %cmptmp to double
+      %loopcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %loopcond, label %loop, label %afterloop
+
+    afterloop:    ; preds = %loop
+            ; loop always returns 0.0
+      ret double 0.000000e+00
+    }
+
+This loop contains all the same constructs we saw before: a phi node,
+several expressions, and some basic blocks. Lets see how this fits
+together.
+
+Code Generation for the 'for' Loop
+----------------------------------
+
+The first part of Codegen is very simple: we just output the start
+expression for the loop value:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.For (var_name, start, end_, step, body) ->
+          (* Emit the start code first, without 'variable' in scope. *)
+          let start_val = codegen_expr start in
+
+With this out of the way, the next step is to set up the LLVM basic
+block for the start of the loop body. In the case above, the whole loop
+body is one block, but remember that the body code itself could consist
+of multiple blocks (e.g. if it contains an if/then/else or a for/in
+expression).
+
+.. code-block:: ocaml
+
+          (* Make the new basic block for the loop header, inserting after current
+           * block. *)
+          let preheader_bb = insertion_block builder in
+          let the_function = block_parent preheader_bb in
+          let loop_bb = append_block context "loop" the_function in
+
+          (* Insert an explicit fall through from the current block to the
+           * loop_bb. *)
+          ignore (build_br loop_bb builder);
+
+This code is similar to what we saw for if/then/else. Because we will
+need it to create the Phi node, we remember the block that falls through
+into the loop. Once we have that, we create the actual block that starts
+the loop and create an unconditional branch for the fall-through between
+the two blocks.
+
+.. code-block:: ocaml
+
+          (* Start insertion in loop_bb. *)
+          position_at_end loop_bb builder;
+
+          (* Start the PHI node with an entry for start. *)
+          let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+Now that the "preheader" for the loop is set up, we switch to emitting
+code for the loop body. To begin with, we move the insertion point and
+create the PHI node for the loop induction variable. Since we already
+know the incoming value for the starting value, we add it to the Phi
+node. Note that the Phi will eventually get a second value for the
+backedge, but we can't set it up yet (because it doesn't exist!).
+
+.. code-block:: ocaml
+
+          (* Within the loop, the variable is defined equal to the PHI node. If it
+           * shadows an existing variable, we have to restore it, so save it
+           * now. *)
+          let old_val =
+            try Some (Hashtbl.find named_values var_name) with Not_found -> None
+          in
+          Hashtbl.add named_values var_name variable;
+
+          (* Emit the body of the loop.  This, like any other expr, can change the
+           * current BB.  Note that we ignore the value computed by the body, but
+           * don't allow an error *)
+          ignore (codegen_expr body);
+
+Now the code starts to get more interesting. Our 'for' loop introduces a
+new variable to the symbol table. This means that our symbol table can
+now contain either function arguments or loop variables. To handle this,
+before we codegen the body of the loop, we add the loop variable as the
+current value for its name. Note that it is possible that there is a
+variable of the same name in the outer scope. It would be easy to make
+this an error (emit an error and return null if there is already an
+entry for VarName) but we choose to allow shadowing of variables. In
+order to handle this correctly, we remember the Value that we are
+potentially shadowing in ``old_val`` (which will be None if there is no
+shadowed variable).
+
+Once the loop variable is set into the symbol table, the code
+recursively codegen's the body. This allows the body to use the loop
+variable: any references to it will naturally find it in the symbol
+table.
+
+.. code-block:: ocaml
+
+          (* Emit the step value. *)
+          let step_val =
+            match step with
+            | Some step -> codegen_expr step
+            (* If not specified, use 1.0. *)
+            | None -> const_float double_type 1.0
+          in
+
+          let next_var = build_add variable step_val "nextvar" builder in
+
+Now that the body is emitted, we compute the next value of the iteration
+variable by adding the step value, or 1.0 if it isn't present.
+'``next_var``' will be the value of the loop variable on the next
+iteration of the loop.
+
+.. code-block:: ocaml
+
+          (* Compute the end condition. *)
+          let end_cond = codegen_expr end_ in
+
+          (* Convert condition to a bool by comparing equal to 0.0. *)
+          let zero = const_float double_type 0.0 in
+          let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+Finally, we evaluate the exit value of the loop, to determine whether
+the loop should exit. This mirrors the condition evaluation for the
+if/then/else statement.
+
+.. code-block:: ocaml
+
+          (* Create the "after loop" block and insert it. *)
+          let loop_end_bb = insertion_block builder in
+          let after_bb = append_block context "afterloop" the_function in
+
+          (* Insert the conditional branch into the end of loop_end_bb. *)
+          ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+          (* Any new code will be inserted in after_bb. *)
+          position_at_end after_bb builder;
+
+With the code for the body of the loop complete, we just need to finish
+up the control flow for it. This code remembers the end block (for the
+phi node), then creates the block for the loop exit ("afterloop"). Based
+on the value of the exit condition, it creates a conditional branch that
+chooses between executing the loop again and exiting the loop. Any
+future code is emitted in the "afterloop" block, so it sets the
+insertion position to it.
+
+.. code-block:: ocaml
+
+          (* Add a new entry to the PHI node for the backedge. *)
+          add_incoming (next_var, loop_end_bb) variable;
+
+          (* Restore the unshadowed variable. *)
+          begin match old_val with
+          | Some old_val -> Hashtbl.add named_values var_name old_val
+          | None -> ()
+          end;
+
+          (* for expr always returns 0.0. *)
+          const_null double_type
+
+The final code handles various cleanups: now that we have the
+"``next_var``" value, we can add the incoming value to the loop PHI
+node. After that, we remove the loop variable from the symbol table, so
+that it isn't in scope after the for loop. Finally, code generation of
+the for loop always returns 0.0, so that is what we return from
+``Codegen.codegen_expr``.
+
+With this, we conclude the "adding control flow to Kaleidoscope" chapter
+of the tutorial. In this chapter we added two control flow constructs,
+and used them to motivate a couple of aspects of the LLVM IR that are
+important for front-end implementors to know. In the next chapter of our
+saga, we will get a bit crazier and add `user-defined
+operators <OCamlLangImpl6.html>`_ to our poor innocent language.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+          (* control *)
+          | If | Then | Else
+          | For | In
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | "if" -> [< 'Token.If; stream >]
+              | "then" -> [< 'Token.Then; stream >]
+              | "else" -> [< 'Token.Else; stream >]
+              | "for" -> [< 'Token.For; stream >]
+              | "in" -> [< 'Token.In; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+          (* variant for if/then/else. *)
+          | If of expr * expr * expr
+
+          (* variant for for/in. *)
+          | For of string * expr * expr * expr option * expr
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr
+         *   ::= ifexpr
+         *   ::= forexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+          | [< 'Token.If; c=parse_expr;
+               'Token.Then ?? "expected 'then'"; t=parse_expr;
+               'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+              Ast.If (c, t, e)
+
+          (* forexpr
+                ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+          | [< 'Token.For;
+               'Token.Ident id ?? "expected identifier after for";
+               'Token.Kwd '=' ?? "expected '=' after for";
+               stream >] ->
+              begin parser
+                | [<
+                     start=parse_expr;
+                     'Token.Kwd ',' ?? "expected ',' after for";
+                     end_=parse_expr;
+                     stream >] ->
+                    let step =
+                      begin parser
+                      | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                      | [< >] -> None
+                      end stream
+                    in
+                    begin parser
+                    | [< 'Token.In; body=parse_expr >] ->
+                        Ast.For (id, start, end_, step, body)
+                    | [< >] ->
+                        raise (Stream.Error "expected 'in' after for")
+                    end stream
+                | [< >] ->
+                    raise (Stream.Error "expected '=' after for")
+              end stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ -> raise (Error "invalid binary operator")
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+          | Ast.If (cond, then_, else_) ->
+              let cond = codegen_expr cond in
+
+              (* Convert condition to a bool by comparing equal to 0.0 *)
+              let zero = const_float double_type 0.0 in
+              let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+              (* Grab the first block so that we might later add the conditional branch
+               * to it at the end of the function. *)
+              let start_bb = insertion_block builder in
+              let the_function = block_parent start_bb in
+
+              let then_bb = append_block context "then" the_function in
+
+              (* Emit 'then' value. *)
+              position_at_end then_bb builder;
+              let then_val = codegen_expr then_ in
+
+              (* Codegen of 'then' can change the current block, update then_bb for the
+               * phi. We create a new name because one is used for the phi node, and the
+               * other is used for the conditional branch. *)
+              let new_then_bb = insertion_block builder in
+
+              (* Emit 'else' value. *)
+              let else_bb = append_block context "else" the_function in
+              position_at_end else_bb builder;
+              let else_val = codegen_expr else_ in
+
+              (* Codegen of 'else' can change the current block, update else_bb for the
+               * phi. *)
+              let new_else_bb = insertion_block builder in
+
+              (* Emit merge block. *)
+              let merge_bb = append_block context "ifcont" the_function in
+              position_at_end merge_bb builder;
+              let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+              let phi = build_phi incoming "iftmp" builder in
+
+              (* Return to the start block to add the conditional branch. *)
+              position_at_end start_bb builder;
+              ignore (build_cond_br cond_val then_bb else_bb builder);
+
+              (* Set a unconditional branch at the end of the 'then' block and the
+               * 'else' block to the 'merge' block. *)
+              position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+              position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+              (* Finally, set the builder to the end of the merge block. *)
+              position_at_end merge_bb builder;
+
+              phi
+          | Ast.For (var_name, start, end_, step, body) ->
+              (* Emit the start code first, without 'variable' in scope. *)
+              let start_val = codegen_expr start in
+
+              (* Make the new basic block for the loop header, inserting after current
+               * block. *)
+              let preheader_bb = insertion_block builder in
+              let the_function = block_parent preheader_bb in
+              let loop_bb = append_block context "loop" the_function in
+
+              (* Insert an explicit fall through from the current block to the
+               * loop_bb. *)
+              ignore (build_br loop_bb builder);
+
+              (* Start insertion in loop_bb. *)
+              position_at_end loop_bb builder;
+
+              (* Start the PHI node with an entry for start. *)
+              let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+              (* Within the loop, the variable is defined equal to the PHI node. If it
+               * shadows an existing variable, we have to restore it, so save it
+               * now. *)
+              let old_val =
+                try Some (Hashtbl.find named_values var_name) with Not_found -> None
+              in
+              Hashtbl.add named_values var_name variable;
+
+              (* Emit the body of the loop.  This, like any other expr, can change the
+               * current BB.  Note that we ignore the value computed by the body, but
+               * don't allow an error *)
+              ignore (codegen_expr body);
+
+              (* Emit the step value. *)
+              let step_val =
+                match step with
+                | Some step -> codegen_expr step
+                (* If not specified, use 1.0. *)
+                | None -> const_float double_type 1.0
+              in
+
+              let next_var = build_add variable step_val "nextvar" builder in
+
+              (* Compute the end condition. *)
+              let end_cond = codegen_expr end_ in
+
+              (* Convert condition to a bool by comparing equal to 0.0. *)
+              let zero = const_float double_type 0.0 in
+              let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+              (* Create the "after loop" block and insert it. *)
+              let loop_end_bb = insertion_block builder in
+              let after_bb = append_block context "afterloop" the_function in
+
+              (* Insert the conditional branch into the end of loop_end_bb. *)
+              ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+              (* Any new code will be inserted in after_bb. *)
+              position_at_end after_bb builder;
+
+              (* Add a new entry to the PHI node for the backedge. *)
+              add_incoming (next_var, loop_end_bb) variable;
+
+              (* Restore the unshadowed variable. *)
+              begin match old_val with
+              | Some old_val -> Hashtbl.add named_values var_name old_val
+              | None -> ()
+              end;
+
+              (* for expr always returns 0.0. *)
+              const_null double_type
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+`Next: Extending the language: user-defined
+operators <OCamlLangImpl6.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html
deleted file mode 100644
index db252406fed7..000000000000
--- a/docs/tutorial/OCamlLangImpl6.html
+++ /dev/null
@@ -1,1574 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: User-defined Operators</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: User-defined Operators</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 6
-  <ol>
-    <li><a href="#intro">Chapter 6 Introduction</a></li>
-    <li><a href="#idea">User-defined Operators: the Idea</a></li>
-    <li><a href="#binary">User-defined Binary Operators</a></li>
-    <li><a href="#unary">User-defined Unary Operators</a></li>
-    <li><a href="#example">Kicking the Tires</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl7.html">Chapter 7</a>: Extending the Language: Mutable
-Variables / SSA Construction</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 6 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
-functional language that is fairly minimal, but also useful.  There
-is still one big problem with it, however. Our language doesn't have many
-useful operators (like division, logical negation, or even any comparisons
-besides less-than).</p>
-
-<p>This chapter of the tutorial takes a wild digression into adding user-defined
-operators to the simple and beautiful Kaleidoscope language. This digression now
-gives us a simple and ugly language in some ways, but also a powerful one at the
-same time.  One of the great things about creating your own language is that you
-get to decide what is good or bad.  In this tutorial we'll assume that it is
-okay to use this as a way to show some interesting parsing techniques.</p>
-
-<p>At the end of this tutorial, we'll run through an example Kaleidoscope
-application that <a href="#example">renders the Mandelbrot set</a>.  This gives
-an example of what you can build with Kaleidoscope and its feature set.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="idea">User-defined Operators: the Idea</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The "operator overloading" that we will add to Kaleidoscope is more general than
-languages like C++.  In C++, you are only allowed to redefine existing
-operators: you can't programatically change the grammar, introduce new
-operators, change precedence levels, etc.  In this chapter, we will add this
-capability to Kaleidoscope, which will let the user round out the set of
-operators that are supported.</p>
-
-<p>The point of going into user-defined operators in a tutorial like this is to
-show the power and flexibility of using a hand-written parser.  Thus far, the parser
-we have been implementing uses recursive descent for most parts of the grammar and
-operator precedence parsing for the expressions.  See <a
-href="OCamlLangImpl2.html">Chapter 2</a> for details.  Without using operator
-precedence parsing, it would be very difficult to allow the programmer to
-introduce new operators into the grammar: the grammar is dynamically extensible
-as the JIT runs.</p>
-
-<p>The two specific features we'll add are programmable unary operators (right
-now, Kaleidoscope has no unary operators at all) as well as binary operators.
-An example of this is:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary "logical or", (note that it does not "short circuit")
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Define = with slightly lower precedence than relationals.
-def binary= 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-</pre>
-</div>
-
-<p>Many languages aspire to being able to implement their standard runtime
-library in the language itself.  In Kaleidoscope, we can implement significant
-parts of the language in the library!</p>
-
-<p>We will break down implementation of these features into two parts:
-implementing support for user-defined binary operators and adding unary
-operators.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="binary">User-defined Binary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding support for user-defined binary operators is pretty simple with our
-current framework.  We'll first add support for the unary/binary keywords:</p>
-
-<div class="doc_code">
-<pre>
-type token =
-  ...
-  <b>(* operators *)
-  | Binary | Unary</b>
-
-...
-
-and lex_ident buffer = parser
-  ...
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      <b>| "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]</b>
-</pre>
-</div>
-
-<p>This just adds lexer support for the unary and binary keywords, like we
-did in <a href="OCamlLangImpl5.html#iflexer">previous chapters</a>.  One nice
-thing about our current AST, is that we represent binary operators with full
-generalisation by using their ASCII code as the opcode.  For our extended
-operators, we'll use this same representation, so we don't need any new AST or
-parser support.</p>
-
-<p>On the other hand, we have to be able to represent the definitions of these
-new operators, in the "def binary| 5" part of the function definition.  In our
-grammar so far, the "name" for the function definition is parsed as the
-"prototype" production and into the <tt>Ast.Prototype</tt> AST node.  To
-represent our new user-defined operators as prototypes, we have to extend
-the  <tt>Ast.Prototype</tt> AST node like this:</p>
-
-<div class="doc_code">
-<pre>
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  <b>| BinOpPrototype of string * string array * int</b>
-</pre>
-</div>
-
-<p>Basically, in addition to knowing a name for the prototype, we now keep track
-of whether it was an operator, and if it was, what precedence level the operator
-is at.  The precedence is only used for binary operators (as you'll see below,
-it just doesn't apply for unary operators).  Now that we have a way to represent
-the prototype for a user-defined operator, we need to parse it:</p>
-
-<div class="doc_code">
-<pre>
-(* prototype
- *   ::= id '(' id* ')'
- <b>*   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)</b>
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  <b>| [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)</b>
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-</pre>
-</div>
-
-<p>This is all fairly straightforward parsing code, and we have already seen
-a lot of similar code in the past.  One interesting part about the code above is
-the couple lines that set up <tt>name</tt> for binary operators.  This builds
-names like "binary@" for a newly defined "@" operator.  This then takes
-advantage of the fact that symbol names in the LLVM symbol table are allowed to
-have any character in them, including embedded nul characters.</p>
-
-<p>The next interesting thing to add, is codegen support for these binary
-operators.  Given our current structure, this is a simple addition of a default
-case for our existing binary operator node:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_expr = function
-  ...
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        <b>| _ -&gt;
-            (* If it wasn't a builtin binary operator, it must be a user defined
-             * one. Emit a call to it. *)
-            let callee = "binary" ^ (String.make 1 op) in
-            let callee =
-              match lookup_function callee the_module with
-              | Some callee -&gt; callee
-              | None -&gt; raise (Error "binary operator not found!")
-            in
-            build_call callee [|lhs_val; rhs_val|] "binop" builder</b>
-      end
-</pre>
-</div>
-
-<p>As you can see above, the new code is actually really simple.  It just does
-a lookup for the appropriate operator in the symbol table and generates a
-function call to it.  Since user-defined operators are just built as normal
-functions (because the "prototype" boils down to a function with the right
-name) everything falls into place.</p>
-
-<p>The final piece of code we are missing, is a bit of top level magic:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      <b>(* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) -&gt;
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -&gt; ()
-      end;</b>
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-      ...
-</pre>
-</div>
-
-<p>Basically, before codegening a function, if it is a user-defined operator, we
-register it in the precedence table.  This allows the binary operator parsing
-logic we already have in place to handle it.  Since we are working on a
-fully-general operator precedence parser, this is all we need to do to "extend
-the grammar".</p>
-
-<p>Now we have useful user-defined binary operators.  This builds a lot
-on the previous framework we built for other operators.  Adding unary operators
-is a bit more challenging, because we don't have any framework for it yet - lets
-see what it takes.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="unary">User-defined Unary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Since we don't currently support unary operators in the Kaleidoscope
-language, we'll need to add everything to support them.  Above, we added simple
-support for the 'unary' keyword to the lexer.  In addition to that, we need an
-AST node:</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-  ...
-</pre>
-</div>
-
-<p>This AST node is very simple and obvious by now.  It directly mirrors the
-binary operator AST node, except that it only has one child.  With this, we
-need to add the parsing logic.  Parsing a unary operator is pretty simple: we'll
-add a new function to do it:</p>
-
-<div class="doc_code">
-<pre>
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [&lt; stream &gt;] -&gt; parse_primary stream
-</pre>
-</div>
-
-<p>The grammar we add is pretty straightforward here.  If we see a unary
-operator when parsing a primary operator, we eat the operator as a prefix and
-parse the remaining piece as another unary operator.  This allows us to handle
-multiple unary operators (e.g. "!!x").  Note that unary operators can't have
-ambiguous parses like binary operators can, so there is no need for precedence
-information.</p>
-
-<p>The problem with this function, is that we need to call ParseUnary from
-somewhere.  To do this, we change previous callers of ParsePrimary to call
-<tt>parse_unary</tt> instead:</p>
-
-<div class="doc_code">
-<pre>
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-        ...
-        <b>(* Parse the unary expression after the binary operator. *)
-        let rhs = parse_unary stream in</b>
-        ...
-
-...
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=<b>parse_unary</b>; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-</pre>
-</div>
-
-<p>With these two simple changes, we are now able to parse unary operators and build the
-AST for them.  Next up, we need to add parser support for prototypes, to parse
-the unary operator prototype.  We extend the binary operator code above
-with:</p>
-
-<div class="doc_code">
-<pre>
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- <b>*   ::= unary LETTER number? (id)</b> *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  <b>let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in</b>
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  <b>| [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)</b>
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-</pre>
-</div>
-
-<p>As with binary operators, we name unary operators with a name that includes
-the operator character.  This assists us at code generation time.  Speaking of,
-the final piece we need to add is codegen support for unary operators.  It looks
-like this:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.Unary (op, operand) -&gt;
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-</pre>
-</div>
-
-<p>This code is similar to, but simpler than, the code for binary operators.  It
-is simpler primarily because it doesn't need to handle any predefined operators.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="example">Kicking the Tires</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>It is somewhat hard to believe, but with a few simple extensions we've
-covered in the last chapters, we have grown a real-ish language.  With this, we
-can do a lot of interesting things, including I/O, math, and a bunch of other
-things.  For example, we can now add a nice sequencing operator (printd is
-defined to print out the specified value and a newline):</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern printd(x);</b>
-Read extern: declare double @printd(double)
-ready&gt; <b>def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.</b>
-..
-ready&gt; <b>printd(123) : printd(456) : printd(789);</b>
-123.000000
-456.000000
-789.000000
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>We can also define a bunch of other "primitive" operations, such as:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Unary negate.
-def unary-(v)
-  0-v;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary logical or, which does not short circuit.
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Binary logical and, which does not short circuit.
-def binary&amp; 6 (LHS RHS)
-  if !LHS then
-    0
-  else
-    !!RHS;
-
-# Define = with slightly lower precedence than relationals.
-def binary = 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-
-</pre>
-</div>
-
-
-<p>Given the previous if/then/else support, we can also define interesting
-functions for I/O.  For example, the following prints out a character whose
-"density" reflects the value passed in: the lower the value, the denser the
-character:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt;
-<b>
-extern putchard(char)
-def printdensity(d)
-  if d &gt; 8 then
-    putchard(32)  # ' '
-  else if d &gt; 4 then
-    putchard(46)  # '.'
-  else if d &gt; 2 then
-    putchard(43)  # '+'
-  else
-    putchard(42); # '*'</b>
-...
-ready&gt; <b>printdensity(1): printdensity(2): printdensity(3) :
-          printdensity(4): printdensity(5): printdensity(9): putchard(10);</b>
-*++..
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>Based on these simple primitive operations, we can start to define more
-interesting things.  For example, here's a little function that solves for the
-number of iterations it takes a function in the complex plane to
-converge:</p>
-
-<div class="doc_code">
-<pre>
-# determine whether the specific location diverges.
-# Solve for z = z^2 + c in the complex plane.
-def mandleconverger(real imag iters creal cimag)
-  if iters &gt; 255 | (real*real + imag*imag &gt; 4) then
-    iters
-  else
-    mandleconverger(real*real - imag*imag + creal,
-                    2*real*imag + cimag,
-                    iters+1, creal, cimag);
-
-# return the number of iterations required for the iteration to escape
-def mandleconverge(real imag)
-  mandleconverger(real, imag, 0, real, imag);
-</pre>
-</div>
-
-<p>This "z = z<sup>2</sup> + c" function is a beautiful little creature that is the basis
-for computation of the <a
-href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>.  Our
-<tt>mandelconverge</tt> function returns the number of iterations that it takes
-for a complex orbit to escape, saturating to 255.  This is not a very useful
-function by itself, but if you plot its value over a two-dimensional plane,
-you can see the Mandelbrot set.  Given that we are limited to using putchard
-here, our amazing graphical output is limited, but we can whip together
-something using the density plotter above:</p>
-
-<div class="doc_code">
-<pre>
-# compute and plot the mandlebrot set with the specified 2 dimensional range
-# info.
-def mandelhelp(xmin xmax xstep   ymin ymax ystep)
-  for y = ymin, y &lt; ymax, ystep in (
-    (for x = xmin, x &lt; xmax, xstep in
-       printdensity(mandleconverge(x,y)))
-    : putchard(10)
-  )
-
-# mandel - This is a convenient helper function for plotting the mandelbrot set
-# from the specified position with the specified Magnification.
-def mandel(realstart imagstart realmag imagmag)
-  mandelhelp(realstart, realstart+realmag*78, realmag,
-             imagstart, imagstart+imagmag*40, imagmag);
-</pre>
-</div>
-
-<p>Given this, we can try plotting out the mandlebrot set!  Lets try it out:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>mandel(-2.3, -1.3, 0.05, 0.07);</b>
-*******************************+++++++++++*************************************
-*************************+++++++++++++++++++++++*******************************
-**********************+++++++++++++++++++++++++++++****************************
-*******************+++++++++++++++++++++.. ...++++++++*************************
-*****************++++++++++++++++++++++.... ...+++++++++***********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-**************+++++++++++++++++++++++....     ....+++++++++********************
-*************++++++++++++++++++++++......      .....++++++++*******************
-************+++++++++++++++++++++.......       .......+++++++******************
-***********+++++++++++++++++++....                ... .+++++++*****************
-**********+++++++++++++++++.......                     .+++++++****************
-*********++++++++++++++...........                    ...+++++++***************
-********++++++++++++............                      ...++++++++**************
-********++++++++++... ..........                        .++++++++**************
-*******+++++++++.....                                   .+++++++++*************
-*******++++++++......                                  ..+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******+++++......                                     ..+++++++++*************
-*******.... ....                                      ...+++++++++*************
-*******.... .                                         ...+++++++++*************
-*******+++++......                                    ...+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******++++++++......                                   .+++++++++*************
-*******+++++++++.....                                  ..+++++++++*************
-********++++++++++... ..........                        .++++++++**************
-********++++++++++++............                      ...++++++++**************
-*********++++++++++++++..........                     ...+++++++***************
-**********++++++++++++++++........                     .+++++++****************
-**********++++++++++++++++++++....                ... ..+++++++****************
-***********++++++++++++++++++++++.......       .......++++++++*****************
-************+++++++++++++++++++++++......      ......++++++++******************
-**************+++++++++++++++++++++++....      ....++++++++********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-*****************++++++++++++++++++++++....  ...++++++++***********************
-*******************+++++++++++++++++++++......++++++++*************************
-*********************++++++++++++++++++++++.++++++++***************************
-*************************+++++++++++++++++++++++*******************************
-******************************+++++++++++++************************************
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-Evaluated to 0.000000
-ready&gt; <b>mandel(-2, -1, 0.02, 0.04);</b>
-**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
-***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
-*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
-*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
-***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
-**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
-************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
-***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
-**********++++++++++++++++++++++++++++++++++++++++++++++.............
-********+++++++++++++++++++++++++++++++++++++++++++..................
-*******+++++++++++++++++++++++++++++++++++++++.......................
-******+++++++++++++++++++++++++++++++++++...........................
-*****++++++++++++++++++++++++++++++++............................
-*****++++++++++++++++++++++++++++...............................
-****++++++++++++++++++++++++++......   .........................
-***++++++++++++++++++++++++.........     ......    ...........
-***++++++++++++++++++++++............
-**+++++++++++++++++++++..............
-**+++++++++++++++++++................
-*++++++++++++++++++.................
-*++++++++++++++++............ ...
-*++++++++++++++..............
-*+++....++++................
-*..........  ...........
-*
-*..........  ...........
-*+++....++++................
-*++++++++++++++..............
-*++++++++++++++++............ ...
-*++++++++++++++++++.................
-**+++++++++++++++++++................
-**+++++++++++++++++++++..............
-***++++++++++++++++++++++............
-***++++++++++++++++++++++++.........     ......    ...........
-****++++++++++++++++++++++++++......   .........................
-*****++++++++++++++++++++++++++++...............................
-*****++++++++++++++++++++++++++++++++............................
-******+++++++++++++++++++++++++++++++++++...........................
-*******+++++++++++++++++++++++++++++++++++++++.......................
-********+++++++++++++++++++++++++++++++++++++++++++..................
-Evaluated to 0.000000
-ready&gt; <b>mandel(-0.9, -1.4, 0.02, 0.03);</b>
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-**********+++++++++++++++++++++************************************************
-*+++++++++++++++++++++++++++++++++++++++***************************************
-+++++++++++++++++++++++++++++++++++++++++++++**********************************
-++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
-++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
-+++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
-+++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
-+++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
-++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
-+++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
-++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
-++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
-+++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
-++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
-++++++++++++++++++++...........                .........++++++++++++++++++++++*
-++++++++++++++++++............                  ...........++++++++++++++++++++
-++++++++++++++++...............                 .............++++++++++++++++++
-++++++++++++++.................                 ...............++++++++++++++++
-++++++++++++..................                  .................++++++++++++++
-+++++++++..................                      .................+++++++++++++
-++++++........        .                               .........  ..++++++++++++
-++............                                         ......    ....++++++++++
-..............                                                    ...++++++++++
-..............                                                    ....+++++++++
-..............                                                    .....++++++++
-.............                                                    ......++++++++
-...........                                                     .......++++++++
-.........                                                       ........+++++++
-.........                                                       ........+++++++
-.........                                                           ....+++++++
-........                                                             ...+++++++
-.......                                                              ...+++++++
-                                                                    ....+++++++
-                                                                   .....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-Evaluated to 0.000000
-ready&gt; <b>^D</b>
-</pre>
-</div>
-
-<p>At this point, you may be starting to realize that Kaleidoscope is a real
-and powerful language.  It may not be self-similar :), but it can be used to
-plot things that are!</p>
-
-<p>With this, we conclude the "adding user-defined operators" chapter of the
-tutorial.  We have successfully augmented our language, adding the ability to
-extend the language in the library, and we have shown how this can be used to
-build a simple but interesting end-user application in Kaleidoscope.  At this
-point, Kaleidoscope can build a variety of applications that are functional and
-can call functions with side-effects, but it can't actually define and mutate a
-variable itself.</p>
-
-<p>Strikingly, variable mutation is an important feature of some
-languages, and it is not at all obvious how to <a href="OCamlLangImpl7.html">add
-support for mutable variables</a> without having to add an "SSA construction"
-phase to your front-end.  In the next chapter, we will describe how you can
-add variable mutation without building SSA in your front-end.</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  (* operators *)
-  | Binary | Unary
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [&lt; stream &gt;] -&gt; parse_primary stream
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the unary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_unary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Unary (op, operand) -&gt;
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt;
-            (* If it wasn't a builtin binary operator, it must be a user defined
-             * one. Emit a call to it. *)
-            let callee = "binary" ^ (String.make 1 op) in
-            let callee =
-              match lookup_function callee the_module with
-              | Some callee -&gt; callee
-              | None -&gt; raise (Error "binary operator not found!")
-            in
-            build_call callee [|lhs_val; rhs_val|] "binop" builder
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) -&gt;
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -&gt; ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/* printd - printf that takes a double prints it as "%f\n", returning 0. */
-extern double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl7.html">Next: Extending the language: mutable variables /
-SSA construction</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl6.rst b/docs/tutorial/OCamlLangImpl6.rst
new file mode 100644
index 000000000000..36bffa8e9696
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl6.rst
@@ -0,0 +1,1441 @@
+============================================================
+Kaleidoscope: Extending the Language: User-defined Operators
+============================================================
+
+.. contents::
+   :local:
+
+Chapter 6 Introduction
+======================
+
+Welcome to Chapter 6 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. At this point in our tutorial, we now
+have a fully functional language that is fairly minimal, but also
+useful. There is still one big problem with it, however. Our language
+doesn't have many useful operators (like division, logical negation, or
+even any comparisons besides less-than).
+
+This chapter of the tutorial takes a wild digression into adding
+user-defined operators to the simple and beautiful Kaleidoscope
+language. This digression now gives us a simple and ugly language in
+some ways, but also a powerful one at the same time. One of the great
+things about creating your own language is that you get to decide what
+is good or bad. In this tutorial we'll assume that it is okay to use
+this as a way to show some interesting parsing techniques.
+
+At the end of this tutorial, we'll run through an example Kaleidoscope
+application that `renders the Mandelbrot set <#example>`_. This gives an
+example of what you can build with Kaleidoscope and its feature set.
+
+User-defined Operators: the Idea
+================================
+
+The "operator overloading" that we will add to Kaleidoscope is more
+general than languages like C++. In C++, you are only allowed to
+redefine existing operators: you can't programatically change the
+grammar, introduce new operators, change precedence levels, etc. In this
+chapter, we will add this capability to Kaleidoscope, which will let the
+user round out the set of operators that are supported.
+
+The point of going into user-defined operators in a tutorial like this
+is to show the power and flexibility of using a hand-written parser.
+Thus far, the parser we have been implementing uses recursive descent
+for most parts of the grammar and operator precedence parsing for the
+expressions. See `Chapter 2 <OCamlLangImpl2.html>`_ for details. Without
+using operator precedence parsing, it would be very difficult to allow
+the programmer to introduce new operators into the grammar: the grammar
+is dynamically extensible as the JIT runs.
+
+The two specific features we'll add are programmable unary operators
+(right now, Kaleidoscope has no unary operators at all) as well as
+binary operators. An example of this is:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary "logical or", (note that it does not "short circuit")
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary= 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+Many languages aspire to being able to implement their standard runtime
+library in the language itself. In Kaleidoscope, we can implement
+significant parts of the language in the library!
+
+We will break down implementation of these features into two parts:
+implementing support for user-defined binary operators and adding unary
+operators.
+
+User-defined Binary Operators
+=============================
+
+Adding support for user-defined binary operators is pretty simple with
+our current framework. We'll first add support for the unary/binary
+keywords:
+
+.. code-block:: ocaml
+
+    type token =
+      ...
+      (* operators *)
+      | Binary | Unary
+
+    ...
+
+    and lex_ident buffer = parser
+      ...
+          | "for" -> [< 'Token.For; stream >]
+          | "in" -> [< 'Token.In; stream >]
+          | "binary" -> [< 'Token.Binary; stream >]
+          | "unary" -> [< 'Token.Unary; stream >]
+
+This just adds lexer support for the unary and binary keywords, like we
+did in `previous chapters <OCamlLangImpl5.html#iflexer>`_. One nice
+thing about our current AST, is that we represent binary operators with
+full generalisation by using their ASCII code as the opcode. For our
+extended operators, we'll use this same representation, so we don't need
+any new AST or parser support.
+
+On the other hand, we have to be able to represent the definitions of
+these new operators, in the "def binary\| 5" part of the function
+definition. In our grammar so far, the "name" for the function
+definition is parsed as the "prototype" production and into the
+``Ast.Prototype`` AST node. To represent our new user-defined operators
+as prototypes, we have to extend the ``Ast.Prototype`` AST node like
+this:
+
+.. code-block:: ocaml
+
+    (* proto - This type represents the "prototype" for a function, which captures
+     * its name, and its argument names (thus implicitly the number of arguments the
+     * function takes). *)
+    type proto =
+      | Prototype of string * string array
+      | BinOpPrototype of string * string array * int
+
+Basically, in addition to knowing a name for the prototype, we now keep
+track of whether it was an operator, and if it was, what precedence
+level the operator is at. The precedence is only used for binary
+operators (as you'll see below, it just doesn't apply for unary
+operators). Now that we have a way to represent the prototype for a
+user-defined operator, we need to parse it:
+
+.. code-block:: ocaml
+
+    (* prototype
+     *   ::= id '(' id* ')'
+     *   ::= binary LETTER number? (id, id)
+     *   ::= unary LETTER number? (id) *)
+    let parse_prototype =
+      let rec parse_args accumulator = parser
+        | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+        | [< >] -> accumulator
+      in
+      let parse_operator = parser
+        | [< 'Token.Unary >] -> "unary", 1
+        | [< 'Token.Binary >] -> "binary", 2
+      in
+      let parse_binary_precedence = parser
+        | [< 'Token.Number n >] -> int_of_float n
+        | [< >] -> 30
+      in
+      parser
+      | [< 'Token.Ident id;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+           args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          (* success. *)
+          Ast.Prototype (id, Array.of_list (List.rev args))
+      | [< (prefix, kind)=parse_operator;
+           'Token.Kwd op ?? "expected an operator";
+           (* Read the precedence if present. *)
+           binary_precedence=parse_binary_precedence;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+            args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          let name = prefix ^ (String.make 1 op) in
+          let args = Array.of_list (List.rev args) in
+
+          (* Verify right number of arguments for operator. *)
+          if Array.length args != kind
+          then raise (Stream.Error "invalid number of operands for operator")
+          else
+            if kind == 1 then
+              Ast.Prototype (name, args)
+            else
+              Ast.BinOpPrototype (name, args, binary_precedence)
+      | [< >] ->
+          raise (Stream.Error "expected function name in prototype")
+
+This is all fairly straightforward parsing code, and we have already
+seen a lot of similar code in the past. One interesting part about the
+code above is the couple lines that set up ``name`` for binary
+operators. This builds names like "binary@" for a newly defined "@"
+operator. This then takes advantage of the fact that symbol names in the
+LLVM symbol table are allowed to have any character in them, including
+embedded nul characters.
+
+The next interesting thing to add, is codegen support for these binary
+operators. Given our current structure, this is a simple addition of a
+default case for our existing binary operator node:
+
+.. code-block:: ocaml
+
+    let codegen_expr = function
+      ...
+      | Ast.Binary (op, lhs, rhs) ->
+          let lhs_val = codegen_expr lhs in
+          let rhs_val = codegen_expr rhs in
+          begin
+            match op with
+            | '+' -> build_add lhs_val rhs_val "addtmp" builder
+            | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+            | '*' -> build_mul lhs_val rhs_val "multmp" builder
+            | '<' ->
+                (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                build_uitofp i double_type "booltmp" builder
+            | _ ->
+                (* If it wasn't a builtin binary operator, it must be a user defined
+                 * one. Emit a call to it. *)
+                let callee = "binary" ^ (String.make 1 op) in
+                let callee =
+                  match lookup_function callee the_module with
+                  | Some callee -> callee
+                  | None -> raise (Error "binary operator not found!")
+                in
+                build_call callee [|lhs_val; rhs_val|] "binop" builder
+          end
+
+As you can see above, the new code is actually really simple. It just
+does a lookup for the appropriate operator in the symbol table and
+generates a function call to it. Since user-defined operators are just
+built as normal functions (because the "prototype" boils down to a
+function with the right name) everything falls into place.
+
+The final piece of code we are missing, is a bit of top level magic:
+
+.. code-block:: ocaml
+
+    let codegen_func the_fpm = function
+      | Ast.Function (proto, body) ->
+          Hashtbl.clear named_values;
+          let the_function = codegen_proto proto in
+
+          (* If this is an operator, install it. *)
+          begin match proto with
+          | Ast.BinOpPrototype (name, args, prec) ->
+              let op = name.[String.length name - 1] in
+              Hashtbl.add Parser.binop_precedence op prec;
+          | _ -> ()
+          end;
+
+          (* Create a new basic block to start insertion into. *)
+          let bb = append_block context "entry" the_function in
+          position_at_end bb builder;
+          ...
+
+Basically, before codegening a function, if it is a user-defined
+operator, we register it in the precedence table. This allows the binary
+operator parsing logic we already have in place to handle it. Since we
+are working on a fully-general operator precedence parser, this is all
+we need to do to "extend the grammar".
+
+Now we have useful user-defined binary operators. This builds a lot on
+the previous framework we built for other operators. Adding unary
+operators is a bit more challenging, because we don't have any framework
+for it yet - lets see what it takes.
+
+User-defined Unary Operators
+============================
+
+Since we don't currently support unary operators in the Kaleidoscope
+language, we'll need to add everything to support them. Above, we added
+simple support for the 'unary' keyword to the lexer. In addition to
+that, we need an AST node:
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for a unary operator. *)
+      | Unary of char * expr
+      ...
+
+This AST node is very simple and obvious by now. It directly mirrors the
+binary operator AST node, except that it only has one child. With this,
+we need to add the parsing logic. Parsing a unary operator is pretty
+simple: we'll add a new function to do it:
+
+.. code-block:: ocaml
+
+    (* unary
+     *   ::= primary
+     *   ::= '!' unary *)
+    and parse_unary = parser
+      (* If this is a unary operator, read it. *)
+      | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+          Ast.Unary (op, operand)
+
+      (* If the current token is not an operator, it must be a primary expr. *)
+      | [< stream >] -> parse_primary stream
+
+The grammar we add is pretty straightforward here. If we see a unary
+operator when parsing a primary operator, we eat the operator as a
+prefix and parse the remaining piece as another unary operator. This
+allows us to handle multiple unary operators (e.g. "!!x"). Note that
+unary operators can't have ambiguous parses like binary operators can,
+so there is no need for precedence information.
+
+The problem with this function, is that we need to call ParseUnary from
+somewhere. To do this, we change previous callers of ParsePrimary to
+call ``parse_unary`` instead:
+
+.. code-block:: ocaml
+
+    (* binoprhs
+     *   ::= ('+' primary)* *)
+    and parse_bin_rhs expr_prec lhs stream =
+            ...
+            (* Parse the unary expression after the binary operator. *)
+            let rhs = parse_unary stream in
+            ...
+
+    ...
+
+    (* expression
+     *   ::= primary binoprhs *)
+    and parse_expr = parser
+      | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+With these two simple changes, we are now able to parse unary operators
+and build the AST for them. Next up, we need to add parser support for
+prototypes, to parse the unary operator prototype. We extend the binary
+operator code above with:
+
+.. code-block:: ocaml
+
+    (* prototype
+     *   ::= id '(' id* ')'
+     *   ::= binary LETTER number? (id, id)
+     *   ::= unary LETTER number? (id) *)
+    let parse_prototype =
+      let rec parse_args accumulator = parser
+        | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+        | [< >] -> accumulator
+      in
+      let parse_operator = parser
+        | [< 'Token.Unary >] -> "unary", 1
+        | [< 'Token.Binary >] -> "binary", 2
+      in
+      let parse_binary_precedence = parser
+        | [< 'Token.Number n >] -> int_of_float n
+        | [< >] -> 30
+      in
+      parser
+      | [< 'Token.Ident id;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+           args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          (* success. *)
+          Ast.Prototype (id, Array.of_list (List.rev args))
+      | [< (prefix, kind)=parse_operator;
+           'Token.Kwd op ?? "expected an operator";
+           (* Read the precedence if present. *)
+           binary_precedence=parse_binary_precedence;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+            args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          let name = prefix ^ (String.make 1 op) in
+          let args = Array.of_list (List.rev args) in
+
+          (* Verify right number of arguments for operator. *)
+          if Array.length args != kind
+          then raise (Stream.Error "invalid number of operands for operator")
+          else
+            if kind == 1 then
+              Ast.Prototype (name, args)
+            else
+              Ast.BinOpPrototype (name, args, binary_precedence)
+      | [< >] ->
+          raise (Stream.Error "expected function name in prototype")
+
+As with binary operators, we name unary operators with a name that
+includes the operator character. This assists us at code generation
+time. Speaking of, the final piece we need to add is codegen support for
+unary operators. It looks like this:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.Unary (op, operand) ->
+          let operand = codegen_expr operand in
+          let callee = "unary" ^ (String.make 1 op) in
+          let callee =
+            match lookup_function callee the_module with
+            | Some callee -> callee
+            | None -> raise (Error "unknown unary operator")
+          in
+          build_call callee [|operand|] "unop" builder
+
+This code is similar to, but simpler than, the code for binary
+operators. It is simpler primarily because it doesn't need to handle any
+predefined operators.
+
+Kicking the Tires
+=================
+
+It is somewhat hard to believe, but with a few simple extensions we've
+covered in the last chapters, we have grown a real-ish language. With
+this, we can do a lot of interesting things, including I/O, math, and a
+bunch of other things. For example, we can now add a nice sequencing
+operator (printd is defined to print out the specified value and a
+newline):
+
+::
+
+    ready> extern printd(x);
+    Read extern: declare double @printd(double)
+    ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
+    ..
+    ready> printd(123) : printd(456) : printd(789);
+    123.000000
+    456.000000
+    789.000000
+    Evaluated to 0.000000
+
+We can also define a bunch of other "primitive" operations, such as:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Unary negate.
+    def unary-(v)
+      0-v;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary logical or, which does not short circuit.
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Binary logical and, which does not short circuit.
+    def binary& 6 (LHS RHS)
+      if !LHS then
+        0
+      else
+        !!RHS;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary = 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+Given the previous if/then/else support, we can also define interesting
+functions for I/O. For example, the following prints out a character
+whose "density" reflects the value passed in: the lower the value, the
+denser the character:
+
+::
+
+    ready>
+
+    extern putchard(char)
+    def printdensity(d)
+      if d > 8 then
+        putchard(32)  # ' '
+      else if d > 4 then
+        putchard(46)  # '.'
+      else if d > 2 then
+        putchard(43)  # '+'
+      else
+        putchard(42); # '*'
+    ...
+    ready> printdensity(1): printdensity(2): printdensity(3) :
+              printdensity(4): printdensity(5): printdensity(9): putchard(10);
+    *++..
+    Evaluated to 0.000000
+
+Based on these simple primitive operations, we can start to define more
+interesting things. For example, here's a little function that solves
+for the number of iterations it takes a function in the complex plane to
+converge:
+
+::
+
+    # determine whether the specific location diverges.
+    # Solve for z = z^2 + c in the complex plane.
+    def mandleconverger(real imag iters creal cimag)
+      if iters > 255 | (real*real + imag*imag > 4) then
+        iters
+      else
+        mandleconverger(real*real - imag*imag + creal,
+                        2*real*imag + cimag,
+                        iters+1, creal, cimag);
+
+    # return the number of iterations required for the iteration to escape
+    def mandleconverge(real imag)
+      mandleconverger(real, imag, 0, real, imag);
+
+This "z = z\ :sup:`2`\  + c" function is a beautiful little creature
+that is the basis for computation of the `Mandelbrot
+Set <http://en.wikipedia.org/wiki/Mandelbrot_set>`_. Our
+``mandelconverge`` function returns the number of iterations that it
+takes for a complex orbit to escape, saturating to 255. This is not a
+very useful function by itself, but if you plot its value over a
+two-dimensional plane, you can see the Mandelbrot set. Given that we are
+limited to using putchard here, our amazing graphical output is limited,
+but we can whip together something using the density plotter above:
+
+::
+
+    # compute and plot the mandlebrot set with the specified 2 dimensional range
+    # info.
+    def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+      for y = ymin, y < ymax, ystep in (
+        (for x = xmin, x < xmax, xstep in
+           printdensity(mandleconverge(x,y)))
+        : putchard(10)
+      )
+
+    # mandel - This is a convenient helper function for plotting the mandelbrot set
+    # from the specified position with the specified Magnification.
+    def mandel(realstart imagstart realmag imagmag)
+      mandelhelp(realstart, realstart+realmag*78, realmag,
+                 imagstart, imagstart+imagmag*40, imagmag);
+
+Given this, we can try plotting out the mandlebrot set! Lets try it out:
+
+::
+
+    ready> mandel(-2.3, -1.3, 0.05, 0.07);
+    *******************************+++++++++++*************************************
+    *************************+++++++++++++++++++++++*******************************
+    **********************+++++++++++++++++++++++++++++****************************
+    *******************+++++++++++++++++++++.. ...++++++++*************************
+    *****************++++++++++++++++++++++.... ...+++++++++***********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    **************+++++++++++++++++++++++....     ....+++++++++********************
+    *************++++++++++++++++++++++......      .....++++++++*******************
+    ************+++++++++++++++++++++.......       .......+++++++******************
+    ***********+++++++++++++++++++....                ... .+++++++*****************
+    **********+++++++++++++++++.......                     .+++++++****************
+    *********++++++++++++++...........                    ...+++++++***************
+    ********++++++++++++............                      ...++++++++**************
+    ********++++++++++... ..........                        .++++++++**************
+    *******+++++++++.....                                   .+++++++++*************
+    *******++++++++......                                  ..+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******+++++......                                     ..+++++++++*************
+    *******.... ....                                      ...+++++++++*************
+    *******.... .                                         ...+++++++++*************
+    *******+++++......                                    ...+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******++++++++......                                   .+++++++++*************
+    *******+++++++++.....                                  ..+++++++++*************
+    ********++++++++++... ..........                        .++++++++**************
+    ********++++++++++++............                      ...++++++++**************
+    *********++++++++++++++..........                     ...+++++++***************
+    **********++++++++++++++++........                     .+++++++****************
+    **********++++++++++++++++++++....                ... ..+++++++****************
+    ***********++++++++++++++++++++++.......       .......++++++++*****************
+    ************+++++++++++++++++++++++......      ......++++++++******************
+    **************+++++++++++++++++++++++....      ....++++++++********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    *****************++++++++++++++++++++++....  ...++++++++***********************
+    *******************+++++++++++++++++++++......++++++++*************************
+    *********************++++++++++++++++++++++.++++++++***************************
+    *************************+++++++++++++++++++++++*******************************
+    ******************************+++++++++++++************************************
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    Evaluated to 0.000000
+    ready> mandel(-2, -1, 0.02, 0.04);
+    **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+    ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+    *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+    *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+    ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+    **************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+    ************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+    ***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
+    **********++++++++++++++++++++++++++++++++++++++++++++++.............
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *****++++++++++++++++++++++++++++++++............................
+    *****++++++++++++++++++++++++++++...............................
+    ****++++++++++++++++++++++++++......   .........................
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ***++++++++++++++++++++++............
+    **+++++++++++++++++++++..............
+    **+++++++++++++++++++................
+    *++++++++++++++++++.................
+    *++++++++++++++++............ ...
+    *++++++++++++++..............
+    *+++....++++................
+    *..........  ...........
+    *
+    *..........  ...........
+    *+++....++++................
+    *++++++++++++++..............
+    *++++++++++++++++............ ...
+    *++++++++++++++++++.................
+    **+++++++++++++++++++................
+    **+++++++++++++++++++++..............
+    ***++++++++++++++++++++++............
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ****++++++++++++++++++++++++++......   .........................
+    *****++++++++++++++++++++++++++++...............................
+    *****++++++++++++++++++++++++++++++++............................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    Evaluated to 0.000000
+    ready> mandel(-0.9, -1.4, 0.02, 0.03);
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    **********+++++++++++++++++++++************************************************
+    *+++++++++++++++++++++++++++++++++++++++***************************************
+    +++++++++++++++++++++++++++++++++++++++++++++**********************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
+    +++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
+    +++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
+    +++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
+    +++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+    ++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
+    +++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+    ++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+    ++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
+    +++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+    ++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+    ++++++++++++++++++++...........                .........++++++++++++++++++++++*
+    ++++++++++++++++++............                  ...........++++++++++++++++++++
+    ++++++++++++++++...............                 .............++++++++++++++++++
+    ++++++++++++++.................                 ...............++++++++++++++++
+    ++++++++++++..................                  .................++++++++++++++
+    +++++++++..................                      .................+++++++++++++
+    ++++++........        .                               .........  ..++++++++++++
+    ++............                                         ......    ....++++++++++
+    ..............                                                    ...++++++++++
+    ..............                                                    ....+++++++++
+    ..............                                                    .....++++++++
+    .............                                                    ......++++++++
+    ...........                                                     .......++++++++
+    .........                                                       ........+++++++
+    .........                                                       ........+++++++
+    .........                                                           ....+++++++
+    ........                                                             ...+++++++
+    .......                                                              ...+++++++
+                                                                        ....+++++++
+                                                                       .....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+    Evaluated to 0.000000
+    ready> ^D
+
+At this point, you may be starting to realize that Kaleidoscope is a
+real and powerful language. It may not be self-similar :), but it can be
+used to plot things that are!
+
+With this, we conclude the "adding user-defined operators" chapter of
+the tutorial. We have successfully augmented our language, adding the
+ability to extend the language in the library, and we have shown how
+this can be used to build a simple but interesting end-user application
+in Kaleidoscope. At this point, Kaleidoscope can build a variety of
+applications that are functional and can call functions with
+side-effects, but it can't actually define and mutate a variable itself.
+
+Strikingly, variable mutation is an important feature of some languages,
+and it is not at all obvious how to `add support for mutable
+variables <OCamlLangImpl7.html>`_ without having to add an "SSA
+construction" phase to your front-end. In the next chapter, we will
+describe how you can add variable mutation without building SSA in your
+front-end.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+          (* control *)
+          | If | Then | Else
+          | For | In
+
+          (* operators *)
+          | Binary | Unary
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | "if" -> [< 'Token.If; stream >]
+              | "then" -> [< 'Token.Then; stream >]
+              | "else" -> [< 'Token.Else; stream >]
+              | "for" -> [< 'Token.For; stream >]
+              | "in" -> [< 'Token.In; stream >]
+              | "binary" -> [< 'Token.Binary; stream >]
+              | "unary" -> [< 'Token.Unary; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a unary operator. *)
+          | Unary of char * expr
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+          (* variant for if/then/else. *)
+          | If of expr * expr * expr
+
+          (* variant for for/in. *)
+          | For of string * expr * expr * expr option * expr
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto =
+          | Prototype of string * string array
+          | BinOpPrototype of string * string array * int
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr
+         *   ::= ifexpr
+         *   ::= forexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+          | [< 'Token.If; c=parse_expr;
+               'Token.Then ?? "expected 'then'"; t=parse_expr;
+               'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+              Ast.If (c, t, e)
+
+          (* forexpr
+                ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+          | [< 'Token.For;
+               'Token.Ident id ?? "expected identifier after for";
+               'Token.Kwd '=' ?? "expected '=' after for";
+               stream >] ->
+              begin parser
+                | [<
+                     start=parse_expr;
+                     'Token.Kwd ',' ?? "expected ',' after for";
+                     end_=parse_expr;
+                     stream >] ->
+                    let step =
+                      begin parser
+                      | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                      | [< >] -> None
+                      end stream
+                    in
+                    begin parser
+                    | [< 'Token.In; body=parse_expr >] ->
+                        Ast.For (id, start, end_, step, body)
+                    | [< >] ->
+                        raise (Stream.Error "expected 'in' after for")
+                    end stream
+                | [< >] ->
+                    raise (Stream.Error "expected '=' after for")
+              end stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* unary
+         *   ::= primary
+         *   ::= '!' unary *)
+        and parse_unary = parser
+          (* If this is a unary operator, read it. *)
+          | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+              Ast.Unary (op, operand)
+
+          (* If the current token is not an operator, it must be a primary expr. *)
+          | [< stream >] -> parse_primary stream
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the unary expression after the binary operator. *)
+                let rhs = parse_unary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')'
+         *   ::= binary LETTER number? (id, id)
+         *   ::= unary LETTER number? (id) *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+          let parse_operator = parser
+            | [< 'Token.Unary >] -> "unary", 1
+            | [< 'Token.Binary >] -> "binary", 2
+          in
+          let parse_binary_precedence = parser
+            | [< 'Token.Number n >] -> int_of_float n
+            | [< >] -> 30
+          in
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+          | [< (prefix, kind)=parse_operator;
+               'Token.Kwd op ?? "expected an operator";
+               (* Read the precedence if present. *)
+               binary_precedence=parse_binary_precedence;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+                args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              let name = prefix ^ (String.make 1 op) in
+              let args = Array.of_list (List.rev args) in
+
+              (* Verify right number of arguments for operator. *)
+              if Array.length args != kind
+              then raise (Stream.Error "invalid number of operands for operator")
+              else
+                if kind == 1 then
+                  Ast.Prototype (name, args)
+                else
+                  Ast.BinOpPrototype (name, args, binary_precedence)
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Unary (op, operand) ->
+              let operand = codegen_expr operand in
+              let callee = "unary" ^ (String.make 1 op) in
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown unary operator")
+              in
+              build_call callee [|operand|] "unop" builder
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ ->
+                    (* If it wasn't a builtin binary operator, it must be a user defined
+                     * one. Emit a call to it. *)
+                    let callee = "binary" ^ (String.make 1 op) in
+                    let callee =
+                      match lookup_function callee the_module with
+                      | Some callee -> callee
+                      | None -> raise (Error "binary operator not found!")
+                    in
+                    build_call callee [|lhs_val; rhs_val|] "binop" builder
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+          | Ast.If (cond, then_, else_) ->
+              let cond = codegen_expr cond in
+
+              (* Convert condition to a bool by comparing equal to 0.0 *)
+              let zero = const_float double_type 0.0 in
+              let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+              (* Grab the first block so that we might later add the conditional branch
+               * to it at the end of the function. *)
+              let start_bb = insertion_block builder in
+              let the_function = block_parent start_bb in
+
+              let then_bb = append_block context "then" the_function in
+
+              (* Emit 'then' value. *)
+              position_at_end then_bb builder;
+              let then_val = codegen_expr then_ in
+
+              (* Codegen of 'then' can change the current block, update then_bb for the
+               * phi. We create a new name because one is used for the phi node, and the
+               * other is used for the conditional branch. *)
+              let new_then_bb = insertion_block builder in
+
+              (* Emit 'else' value. *)
+              let else_bb = append_block context "else" the_function in
+              position_at_end else_bb builder;
+              let else_val = codegen_expr else_ in
+
+              (* Codegen of 'else' can change the current block, update else_bb for the
+               * phi. *)
+              let new_else_bb = insertion_block builder in
+
+              (* Emit merge block. *)
+              let merge_bb = append_block context "ifcont" the_function in
+              position_at_end merge_bb builder;
+              let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+              let phi = build_phi incoming "iftmp" builder in
+
+              (* Return to the start block to add the conditional branch. *)
+              position_at_end start_bb builder;
+              ignore (build_cond_br cond_val then_bb else_bb builder);
+
+              (* Set a unconditional branch at the end of the 'then' block and the
+               * 'else' block to the 'merge' block. *)
+              position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+              position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+              (* Finally, set the builder to the end of the merge block. *)
+              position_at_end merge_bb builder;
+
+              phi
+          | Ast.For (var_name, start, end_, step, body) ->
+              (* Emit the start code first, without 'variable' in scope. *)
+              let start_val = codegen_expr start in
+
+              (* Make the new basic block for the loop header, inserting after current
+               * block. *)
+              let preheader_bb = insertion_block builder in
+              let the_function = block_parent preheader_bb in
+              let loop_bb = append_block context "loop" the_function in
+
+              (* Insert an explicit fall through from the current block to the
+               * loop_bb. *)
+              ignore (build_br loop_bb builder);
+
+              (* Start insertion in loop_bb. *)
+              position_at_end loop_bb builder;
+
+              (* Start the PHI node with an entry for start. *)
+              let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+              (* Within the loop, the variable is defined equal to the PHI node. If it
+               * shadows an existing variable, we have to restore it, so save it
+               * now. *)
+              let old_val =
+                try Some (Hashtbl.find named_values var_name) with Not_found -> None
+              in
+              Hashtbl.add named_values var_name variable;
+
+              (* Emit the body of the loop.  This, like any other expr, can change the
+               * current BB.  Note that we ignore the value computed by the body, but
+               * don't allow an error *)
+              ignore (codegen_expr body);
+
+              (* Emit the step value. *)
+              let step_val =
+                match step with
+                | Some step -> codegen_expr step
+                (* If not specified, use 1.0. *)
+                | None -> const_float double_type 1.0
+              in
+
+              let next_var = build_add variable step_val "nextvar" builder in
+
+              (* Compute the end condition. *)
+              let end_cond = codegen_expr end_ in
+
+              (* Convert condition to a bool by comparing equal to 0.0. *)
+              let zero = const_float double_type 0.0 in
+              let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+              (* Create the "after loop" block and insert it. *)
+              let loop_end_bb = insertion_block builder in
+              let after_bb = append_block context "afterloop" the_function in
+
+              (* Insert the conditional branch into the end of loop_end_bb. *)
+              ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+              (* Any new code will be inserted in after_bb. *)
+              position_at_end after_bb builder;
+
+              (* Add a new entry to the PHI node for the backedge. *)
+              add_incoming (next_var, loop_end_bb) variable;
+
+              (* Restore the unshadowed variable. *)
+              begin match old_val with
+              | Some old_val -> Hashtbl.add named_values var_name old_val
+              | None -> ()
+              end;
+
+              (* for expr always returns 0.0. *)
+              const_null double_type
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* If this is an operator, install it. *)
+              begin match proto with
+              | Ast.BinOpPrototype (name, args, prec) ->
+                  let op = name.[String.length name - 1] in
+                  Hashtbl.add Parser.binop_precedence op prec;
+              | _ -> ()
+              end;
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+        /* printd - printf that takes a double prints it as "%f\n", returning 0. */
+        extern double printd(double X) {
+          printf("%f\n", X);
+          return 0;
+        }
+
+`Next: Extending the language: mutable variables / SSA
+construction <OCamlLangImpl7.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html
deleted file mode 100644
index aa30555a1d40..000000000000
--- a/docs/tutorial/OCamlLangImpl7.html
+++ /dev/null
@@ -1,1904 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Mutable Variables / SSA
-         construction</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Mutable Variables</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 7
-  <ol>
-    <li><a href="#intro">Chapter 7 Introduction</a></li>
-    <li><a href="#why">Why is this a hard problem?</a></li>
-    <li><a href="#memory">Memory in LLVM</a></li>
-    <li><a href="#kalvars">Mutable Variables in Kaleidoscope</a></li>
-    <li><a href="#adjustments">Adjusting Existing Variables for
-     Mutation</a></li>
-    <li><a href="#assignment">New Assignment Operator</a></li>
-    <li><a href="#localvars">User-defined Local Variables</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
- tidbits</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 7 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
-respectable, albeit simple, <a
-href="http://en.wikipedia.org/wiki/Functional_programming">functional
-programming language</a>.  In our journey, we learned some parsing techniques,
-how to build and represent an AST, how to build LLVM IR, and how to optimize
-the resultant code as well as JIT compile it.</p>
-
-<p>While Kaleidoscope is interesting as a functional language, the fact that it
-is functional makes it "too easy" to generate LLVM IR for it.  In particular, a
-functional language makes it very easy to build LLVM IR directly in <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">SSA form</a>.
-Since LLVM requires that the input code be in SSA form, this is a very nice
-property and it is often unclear to newcomers how to generate code for an
-imperative language with mutable variables.</p>
-
-<p>The short (and happy) summary of this chapter is that there is no need for
-your front-end to build SSA form: LLVM provides highly tuned and well tested
-support for this, though the way it works is a bit unexpected for some.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="why">Why is this a hard problem?</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-To understand why mutable variables cause complexities in SSA construction,
-consider this extremely simple C example:
-</p>
-
-<div class="doc_code">
-<pre>
-int G, H;
-int test(_Bool Condition) {
-  int X;
-  if (Condition)
-    X = G;
-  else
-    X = H;
-  return X;
-}
-</pre>
-</div>
-
-<p>In this case, we have the variable "X", whose value depends on the path
-executed in the program.  Because there are two different possible values for X
-before the return instruction, a PHI node is inserted to merge the two values.
-The LLVM IR that we want for this example looks like this:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>In this example, the loads from the G and H global variables are explicit in
-the LLVM IR, and they live in the then/else branches of the if statement
-(cond_true/cond_false).  In order to merge the incoming values, the X.2 phi node
-in the cond_next block selects the right value to use based on where control
-flow is coming from: if control flow comes from the cond_false block, X.2 gets
-the value of X.1.  Alternatively, if control flow comes from cond_true, it gets
-the value of X.0.  The intent of this chapter is not to explain the details of
-SSA form.  For more information, see one of the many <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">online
-references</a>.</p>
-
-<p>The question for this article is "who places the phi nodes when lowering
-assignments to mutable variables?".  The issue here is that LLVM
-<em>requires</em> that its IR be in SSA form: there is no "non-ssa" mode for it.
-However, SSA construction requires non-trivial algorithms and data structures,
-so it is inconvenient and wasteful for every front-end to have to reproduce this
-logic.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="memory">Memory in LLVM</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The 'trick' here is that while LLVM does require all register values to be
-in SSA form, it does not require (or permit) memory objects to be in SSA form.
-In the example above, note that the loads from G and H are direct accesses to
-G and H: they are not renamed or versioned.  This differs from some other
-compiler systems, which do try to version memory objects.  In LLVM, instead of
-encoding dataflow analysis of memory into the LLVM IR, it is handled with <a
-href="../WritingAnLLVMPass.html">Analysis Passes</a> which are computed on
-demand.</p>
-
-<p>
-With this in mind, the high-level idea is that we want to make a stack variable
-(which lives in memory, because it is on the stack) for each mutable object in
-a function.  To take advantage of this trick, we need to talk about how LLVM
-represents stack variables.
-</p>
-
-<p>In LLVM, all memory accesses are explicit with load/store instructions, and
-it is carefully designed not to have (or need) an "address-of" operator.  Notice
-how the type of the @G/@H global variables is actually "i32*" even though the
-variable is defined as "i32".  What this means is that @G defines <em>space</em>
-for an i32 in the global data area, but its <em>name</em> actually refers to the
-address for that space.  Stack variables work the same way, except that instead of
-being declared with global variable definitions, they are declared with the
-<a href="../LangRef.html#i_alloca">LLVM alloca instruction</a>:</p>
-
-<div class="doc_code">
-<pre>
-define i32 @example() {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  ...
-  %tmp = load i32* %X       ; load the stack value %X from the stack.
-  %tmp2 = add i32 %tmp, 1   ; increment it
-  store i32 %tmp2, i32* %X  ; store it back
-  ...
-</pre>
-</div>
-
-<p>This code shows an example of how you can declare and manipulate a stack
-variable in the LLVM IR.  Stack memory allocated with the alloca instruction is
-fully general: you can pass the address of the stack slot to functions, you can
-store it in other variables, etc.  In our example above, we could rewrite the
-example to use the alloca technique to avoid using a PHI node:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-        store i32 %X.0, i32* %X   ; Update X
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-        store i32 %X.1, i32* %X   ; Update X
-  br label %cond_next
-
-cond_next:
-  %X.2 = load i32* %X       ; Read X
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>With this, we have discovered a way to handle arbitrary mutable variables
-without the need to create Phi nodes at all:</p>
-
-<ol>
-<li>Each mutable variable becomes a stack allocation.</li>
-<li>Each read of the variable becomes a load from the stack.</li>
-<li>Each update of the variable becomes a store to the stack.</li>
-<li>Taking the address of a variable just uses the stack address directly.</li>
-</ol>
-
-<p>While this solution has solved our immediate problem, it introduced another
-one: we have now apparently introduced a lot of stack traffic for very simple
-and common operations, a major performance problem.  Fortunately for us, the
-LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles
-this case, promoting allocas like this into SSA registers, inserting Phi nodes
-as appropriate.  If you run this example through the pass, for example, you'll
-get:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>llvm-as &lt; example.ll | opt -mem2reg | llvm-dis</b>
-@G = weak global i32 0
-@H = weak global i32 0
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.01
-}
-</pre>
-</div>
-
-<p>The mem2reg pass implements the standard "iterated dominance frontier"
-algorithm for constructing SSA form and has a number of optimizations that speed
-up (very common) degenerate cases. The mem2reg optimization pass is the answer
-to dealing with mutable variables, and we highly recommend that you depend on
-it.  Note that mem2reg only works on variables in certain circumstances:</p>
-
-<ol>
-<li>mem2reg is alloca-driven: it looks for allocas and if it can handle them, it
-promotes them.  It does not apply to global variables or heap allocations.</li>
-
-<li>mem2reg only looks for alloca instructions in the entry block of the
-function.  Being in the entry block guarantees that the alloca is only executed
-once, which makes analysis simpler.</li>
-
-<li>mem2reg only promotes allocas whose uses are direct loads and stores.  If
-the address of the stack object is passed to a function, or if any funny pointer
-arithmetic is involved, the alloca will not be promoted.</li>
-
-<li>mem2reg only works on allocas of <a
-href="../LangRef.html#t_classifications">first class</a>
-values (such as pointers, scalars and vectors), and only if the array size
-of the allocation is 1 (or missing in the .ll file).  mem2reg is not capable of
-promoting structs or arrays to registers.  Note that the "scalarrepl" pass is
-more powerful and can promote structs, "unions", and arrays in many cases.</li>
-
-</ol>
-
-<p>
-All of these properties are easy to satisfy for most imperative languages, and
-we'll illustrate it below with Kaleidoscope.  The final question you may be
-asking is: should I bother with this nonsense for my front-end?  Wouldn't it be
-better if I just did SSA construction directly, avoiding use of the mem2reg
-optimization pass?  In short, we strongly recommend that you use this technique
-for building SSA form, unless there is an extremely good reason not to.  Using
-this technique is:</p>
-
-<ul>
-<li>Proven and well tested: llvm-gcc and clang both use this technique for local
-mutable variables.  As such, the most common clients of LLVM are using this to
-handle a bulk of their variables.  You can be sure that bugs are found fast and
-fixed early.</li>
-
-<li>Extremely Fast: mem2reg has a number of special cases that make it fast in
-common cases as well as fully general.  For example, it has fast-paths for
-variables that are only used in a single block, variables that only have one
-assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc.
-</li>
-
-<li>Needed for debug info generation: <a href="../SourceLevelDebugging.html">
-Debug information in LLVM</a> relies on having the address of the variable
-exposed so that debug info can be attached to it.  This technique dovetails
-very naturally with this style of debug info.</li>
-</ul>
-
-<p>If nothing else, this makes it much easier to get your front-end up and
-running, and is very simple to implement.  Lets extend Kaleidoscope with mutable
-variables now!
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="kalvars">Mutable Variables in Kaleidoscope</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know the sort of problem we want to tackle, lets see what this
-looks like in the context of our little Kaleidoscope language.  We're going to
-add two features:</p>
-
-<ol>
-<li>The ability to mutate variables with the '=' operator.</li>
-<li>The ability to define new variables.</li>
-</ol>
-
-<p>While the first item is really what this is about, we only have variables
-for incoming arguments as well as for induction variables, and redefining those only
-goes so far :).  Also, the ability to define new variables is a
-useful thing regardless of whether you will be mutating them.  Here's a
-motivating example that shows how we could use these:</p>
-
-<div class="doc_code">
-<pre>
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-# Recursive fib, we could do this before.
-def fib(x)
-  if (x &lt; 3) then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-# Iterative fib.
-def fibi(x)
-  <b>var a = 1, b = 1, c in</b>
-  (for i = 3, i &lt; x in
-     <b>c = a + b</b> :
-     <b>a = b</b> :
-     <b>b = c</b>) :
-  b;
-
-# Call it.
-fibi(10);
-</pre>
-</div>
-
-<p>
-In order to mutate variables, we have to change our existing variables to use
-the "alloca trick".  Once we have that, we'll add our new operator, then extend
-Kaleidoscope to support new variable definitions.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="adjustments">Adjusting Existing Variables for Mutation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The symbol table in Kaleidoscope is managed at code generation time by the
-'<tt>named_values</tt>' map.  This map currently keeps track of the LLVM
-"Value*" that holds the double value for the named variable.  In order to
-support mutation, we need to change this slightly, so that it
-<tt>named_values</tt> holds the <em>memory location</em> of the variable in
-question.  Note that this change is a refactoring: it changes the structure of
-the code, but does not (by itself) change the behavior of the compiler.  All of
-these changes are isolated in the Kaleidoscope code generator.</p>
-
-<p>
-At this point in Kaleidoscope's development, it only supports variables for two
-things: incoming arguments to functions and the induction variable of 'for'
-loops.  For consistency, we'll allow mutation of these variables in addition to
-other user-defined variables.  This means that these will both need memory
-locations.
-</p>
-
-<p>To start our transformation of Kaleidoscope, we'll change the
-<tt>named_values</tt> map so that it maps to AllocaInst* instead of Value*.
-Once we do this, the C++ compiler will tell us what parts of the code we need to
-update:</p>
-
-<p><b>Note:</b> the ocaml bindings currently model both <tt>Value*</tt>s and
-<tt>AllocInst*</tt>s as <tt>Llvm.llvalue</tt>s, but this may change in the
-future to be more type safe.</p>
-
-<div class="doc_code">
-<pre>
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-</pre>
-</div>
-
-<p>Also, since we will need to create these alloca's, we'll use a helper
-function that ensures that the allocas are created in the entry block of the
-function:</p>
-
-<div class="doc_code">
-<pre>
-(* Create an alloca instruction in the entry block of the function. This
- * is used for mutable variables etc. *)
-let create_entry_block_alloca the_function var_name =
-  let builder = builder_at (instr_begin (entry_block the_function)) in
-  build_alloca double_type var_name builder
-</pre>
-</div>
-
-<p>This funny looking code creates an <tt>Llvm.llbuilder</tt> object that is
-pointing at the first instruction of the entry block.  It then creates an alloca
-with the expected name and returns it.  Because all values in Kaleidoscope are
-doubles, there is no need to pass in a type to use.</p>
-
-<p>With this in place, the first functionality change we want to make is to
-variable references.  In our new scheme, variables live on the stack, so code
-generating a reference to them actually needs to produce a load from the stack
-slot:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.Variable name -&gt;
-      let v = try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name")
-      in
-      <b>(* Load the value. *)
-      build_load v name builder</b>
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  Now we need to update the
-things that define the variables to set up the alloca.  We'll start with
-<tt>codegen_expr Ast.For ...</tt> (see the <a href="#code">full code listing</a>
-for the unabridged code):</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Create an alloca for the variable in the entry block. *)
-      <b>let alloca = create_entry_block_alloca the_function var_name in</b>
-
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      <b>(* Store the value into the alloca. *)
-      ignore(build_store start_val alloca builder);</b>
-
-      ...
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      <b>Hashtbl.add named_values var_name alloca;</b>
-
-      ...
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      <b>(* Reload, increment, and restore the alloca. This handles the case where
-       * the body of the loop mutates the variable. *)
-      let cur_var = build_load alloca var_name builder in
-      let next_var = build_add cur_var step_val "nextvar" builder in
-      ignore(build_store next_var alloca builder);</b>
-      ...
-</pre>
-</div>
-
-<p>This code is virtually identical to the code <a
-href="OCamlLangImpl5.html#forcodegen">before we allowed mutable variables</a>.
-The big difference is that we no longer have to construct a PHI node, and we use
-load/store to access the variable as needed.</p>
-
-<p>To support mutable argument variables, we need to also make allocas for them.
-The code for this is also pretty simple:</p>
-
-<div class="doc_code">
-<pre>
-(* Create an alloca for each argument and register the argument in the symbol
- * table so that references to it will succeed. *)
-let create_argument_allocas the_function proto =
-  let args = match proto with
-    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -&gt; args
-  in
-  Array.iteri (fun i ai -&gt;
-    let var_name = args.(i) in
-    (* Create an alloca for this variable. *)
-    let alloca = create_entry_block_alloca the_function var_name in
-
-    (* Store the initial value into the alloca. *)
-    ignore(build_store ai alloca builder);
-
-    (* Add arguments to variable symbol table. *)
-    Hashtbl.add named_values var_name alloca;
-  ) (params the_function)
-</pre>
-</div>
-
-<p>For each argument, we make an alloca, store the input value to the function
-into the alloca, and register the alloca as the memory location for the
-argument.  This method gets invoked by <tt>Codegen.codegen_func</tt> right after
-it sets up the entry block for the function.</p>
-
-<p>The final missing piece is adding the mem2reg pass, which allows us to get
-good codegen once again:</p>
-
-<div class="doc_code">
-<pre>
-let main () =
-  ...
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  <b>(* Promote allocas to registers. *)
-  add_memory_to_register_promotion the_fpm;</b>
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combining the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-</pre>
-</div>
-
-<p>It is interesting to see what the code looks like before and after the
-mem2reg optimization runs.  For example, this is the before/after code for our
-recursive fib function.  Before the optimization:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  <b>%x1 = alloca double
-  store double %x, double* %x1
-  %x2 = load double* %x1</b>
-  %cmptmp = fcmp ult double %x2, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:    ; preds = %entry
-  br label %ifcont
-
-else:    ; preds = %entry
-  <b>%x3 = load double* %x1</b>
-  %subtmp = fsub double %x3, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  <b>%x4 = load double* %x1</b>
-  %subtmp5 = fsub double %x4, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>Here there is only one variable (x, the input argument) but you can still
-see the extremely simple-minded code generation strategy we are using.  In the
-entry block, an alloca is created, and the initial input value is stored into
-it.  Each reference to the variable does a reload from the stack.  Also, note
-that we didn't modify the if/then/else expression, so it still inserts a PHI
-node.  While we could make an alloca for it, it is actually easier to create a
-PHI node for it, so we still just make the PHI.</p>
-
-<p>Here is the code after the mem2reg pass runs:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double <b>%x</b>, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:
-  br label %ifcont
-
-else:
-  %subtmp = fsub double <b>%x</b>, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double <b>%x</b>, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>This is a trivial case for mem2reg, since there are no redefinitions of the
-variable.  The point of showing this is to calm your tension about inserting
-such blatent inefficiencies :).</p>
-
-<p>After the rest of the optimizers run, we get:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %else, label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  ret double %addtmp
-
-ifcont:
-  ret double 1.000000e+00
-}
-</pre>
-</div>
-
-<p>Here we see that the simplifycfg pass decided to clone the return instruction
-into the end of the 'else' block.  This allowed it to eliminate some branches
-and the PHI node.</p>
-
-<p>Now that all symbol table references are updated to use stack variables,
-we'll add the assignment operator.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="assignment">New Assignment Operator</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With our current framework, adding a new assignment operator is really
-simple.  We will parse it just like any other binary operator, but handle it
-internally (instead of allowing the user to define it).  The first step is to
-set a precedence:</p>
-
-<div class="doc_code">
-<pre>
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  <b>Hashtbl.add Parser.binop_precedence '=' 2;</b>
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  ...
-</pre>
-</div>
-
-<p>Now that the parser knows the precedence of the binary operator, it takes
-care of all the parsing and AST generation.  We just need to implement codegen
-for the assignment operator.  This looks like:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-      begin match op with
-      | '=' -&gt;
-          (* Special case '=' because we don't want to emit the LHS as an
-           * expression. *)
-          let name =
-            match lhs with
-            | Ast.Variable name -&gt; name
-            | _ -&gt; raise (Error "destination of '=' must be a variable")
-          in
-</pre>
-</div>
-
-<p>Unlike the rest of the binary operators, our assignment operator doesn't
-follow the "emit LHS, emit RHS, do computation" model.  As such, it is handled
-as a special case before the other binary operators are handled.  The other
-strange thing is that it requires the LHS to be a variable.  It is invalid to
-have "(x+1) = expr" - only things like "x = expr" are allowed.
-</p>
-
-
-<div class="doc_code">
-<pre>
-          (* Codegen the rhs. *)
-          let val_ = codegen_expr rhs in
-
-          (* Lookup the name. *)
-          let variable = try Hashtbl.find named_values name with
-          | Not_found -&gt; raise (Error "unknown variable name")
-          in
-          ignore(build_store val_ variable builder);
-          val_
-      | _ -&gt;
-			...
-</pre>
-</div>
-
-<p>Once we have the variable, codegen'ing the assignment is straightforward:
-we emit the RHS of the assignment, create a store, and return the computed
-value.  Returning a value allows for chained assignments like "X = (Y = Z)".</p>
-
-<p>Now that we have an assignment operator, we can mutate loop variables and
-arguments.  For example, we can now run code like this:</p>
-
-<div class="doc_code">
-<pre>
-# Function to print a double.
-extern printd(x);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-def test(x)
-  printd(x) :
-  x = 4 :
-  printd(x);
-
-test(123);
-</pre>
-</div>
-
-<p>When run, this example prints "123" and then "4", showing that we did
-actually mutate the value!  Okay, we have now officially implemented our goal:
-getting this to work requires SSA construction in the general case.  However,
-to be really useful, we want the ability to define our own local variables, lets
-add this next!
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="localvars">User-defined Local Variables</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding var/in is just like any other other extensions we made to
-Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
-The first step for adding our new 'var/in' construct is to extend the lexer.
-As before, this is pretty trivial, the code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-type token =
-  ...
-  <b>(* var definition *)
-  | Var</b>
-
-...
-
-and lex_ident buffer = parser
-      ...
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
-      <b>| "var" -&gt; [&lt; 'Token.Var; stream &gt;]</b>
-      ...
-</pre>
-</div>
-
-<p>The next step is to define the AST node that we will construct.  For var/in,
-it looks like this:</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for var/in. *)
-  | Var of (string * expr option) array * expr
-  ...
-</pre>
-</div>
-
-<p>var/in allows a list of names to be defined all at once, and each name can
-optionally have an initializer value.  As such, we capture this information in
-the VarNames vector.  Also, var/in has a body, this body is allowed to access
-the variables defined by the var/in.</p>
-
-<p>With this in place, we can define the parser pieces.  The first thing we do
-is add it as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr
- <b>*   ::= varexpr</b> *)
-let rec parse_primary = parser
-  ...
-  <b>(* varexpr
-   *   ::= 'var' identifier ('=' expression?
-   *             (',' identifier ('=' expression)?)* 'in' expression *)
-  | [&lt; 'Token.Var;
-       (* At least one variable name is required. *)
-       'Token.Ident id ?? "expected identifier after var";
-       init=parse_var_init;
-       var_names=parse_var_names [(id, init)];
-       (* At this point, we have to have 'in'. *)
-       'Token.In ?? "expected 'in' keyword after 'var'";
-       body=parse_expr &gt;] -&gt;
-      Ast.Var (Array.of_list (List.rev var_names), body)</b>
-
-...
-
-and parse_var_init = parser
-  (* read in the optional initializer. *)
-  | [&lt; 'Token.Kwd '='; e=parse_expr &gt;] -&gt; Some e
-  | [&lt; &gt;] -&gt; None
-
-and parse_var_names accumulator = parser
-  | [&lt; 'Token.Kwd ',';
-       'Token.Ident id ?? "expected identifier list after var";
-       init=parse_var_init;
-       e=parse_var_names ((id, init) :: accumulator) &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; accumulator
-</pre>
-</div>
-
-<p>Now that we can parse and represent the code, we need to support emission of
-LLVM IR for it.  This code starts out with:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.Var (var_names, body)
-      let old_bindings = ref [] in
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Register all variables and emit their initializer. *)
-      Array.iter (fun (var_name, init) -&gt;
-</pre>
-</div>
-
-<p>Basically it loops over all the variables, installing them one at a time.
-For each variable we put into the symbol table, we remember the previous value
-that we replace in OldBindings.</p>
-
-<div class="doc_code">
-<pre>
-        (* Emit the initializer before adding the variable to scope, this
-         * prevents the initializer from referencing the variable itself, and
-         * permits stuff like this:
-         *   var a = 1 in
-         *     var a = a in ...   # refers to outer 'a'. *)
-        let init_val =
-          match init with
-          | Some init -&gt; codegen_expr init
-          (* If not specified, use 0.0. *)
-          | None -&gt; const_float double_type 0.0
-        in
-
-        let alloca = create_entry_block_alloca the_function var_name in
-        ignore(build_store init_val alloca builder);
-
-        (* Remember the old variable binding so that we can restore the binding
-         * when we unrecurse. *)
-
-        begin
-          try
-            let old_value = Hashtbl.find named_values var_name in
-            old_bindings := (var_name, old_value) :: !old_bindings;
-          with Not_found &gt; ()
-        end;
-
-        (* Remember this binding. *)
-        Hashtbl.add named_values var_name alloca;
-      ) var_names;
-</pre>
-</div>
-
-<p>There are more comments here than code.  The basic idea is that we emit the
-initializer, create the alloca, then update the symbol table to point to it.
-Once all the variables are installed in the symbol table, we evaluate the body
-of the var/in expression:</p>
-
-<div class="doc_code">
-<pre>
-      (* Codegen the body, now that all vars are in scope. *)
-      let body_val = codegen_expr body in
-</pre>
-</div>
-
-<p>Finally, before returning, we restore the previous variable bindings:</p>
-
-<div class="doc_code">
-<pre>
-      (* Pop all our variables from scope. *)
-      List.iter (fun (var_name, old_value) -&gt;
-        Hashtbl.add named_values var_name old_value
-      ) !old_bindings;
-
-      (* Return the body computation. *)
-      body_val
-</pre>
-</div>
-
-<p>The end result of all of this is that we get properly scoped variable
-definitions, and we even (trivially) allow mutation of them :).</p>
-
-<p>With this, we completed what we set out to do.  Our nice iterative fib
-example from the intro compiles and runs just fine.  The mem2reg pass optimizes
-all of our stack variables into SSA registers, inserting PHI nodes where needed,
-and our front-end remains simple: no "iterated dominance frontier" computation
-anywhere in sight.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with mutable
-variables and var/in support.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  (* operators *)
-  | Binary | Unary
-
-  (* var definition *)
-  | Var
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
-      | "var" -&gt; [&lt; 'Token.Var; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-  (* variant for var/in. *)
-  | Var of (string * expr option) array * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr
- *   ::= varexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  (* varexpr
-   *   ::= 'var' identifier ('=' expression?
-   *             (',' identifier ('=' expression)?)* 'in' expression *)
-  | [&lt; 'Token.Var;
-       (* At least one variable name is required. *)
-       'Token.Ident id ?? "expected identifier after var";
-       init=parse_var_init;
-       var_names=parse_var_names [(id, init)];
-       (* At this point, we have to have 'in'. *)
-       'Token.In ?? "expected 'in' keyword after 'var'";
-       body=parse_expr &gt;] -&gt;
-      Ast.Var (Array.of_list (List.rev var_names), body)
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [&lt; stream &gt;] -&gt; parse_primary stream
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-and parse_var_init = parser
-  (* read in the optional initializer. *)
-  | [&lt; 'Token.Kwd '='; e=parse_expr &gt;] -&gt; Some e
-  | [&lt; &gt;] -&gt; None
-
-and parse_var_names accumulator = parser
-  | [&lt; 'Token.Kwd ',';
-       'Token.Ident id ?? "expected identifier list after var";
-       init=parse_var_init;
-       e=parse_var_names ((id, init) :: accumulator) &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; accumulator
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_unary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-(* Create an alloca instruction in the entry block of the function. This
- * is used for mutable variables etc. *)
-let create_entry_block_alloca the_function var_name =
-  let builder = builder_at context (instr_begin (entry_block the_function)) in
-  build_alloca double_type var_name builder
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      let v = try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name")
-      in
-      (* Load the value. *)
-      build_load v name builder
-  | Ast.Unary (op, operand) -&gt;
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      begin match op with
-      | '=' -&gt;
-          (* Special case '=' because we don't want to emit the LHS as an
-           * expression. *)
-          let name =
-            match lhs with
-            | Ast.Variable name -&gt; name
-            | _ -&gt; raise (Error "destination of '=' must be a variable")
-          in
-
-          (* Codegen the rhs. *)
-          let val_ = codegen_expr rhs in
-
-          (* Lookup the name. *)
-          let variable = try Hashtbl.find named_values name with
-          | Not_found -&gt; raise (Error "unknown variable name")
-          in
-          ignore(build_store val_ variable builder);
-          val_
-      | _ -&gt;
-          let lhs_val = codegen_expr lhs in
-          let rhs_val = codegen_expr rhs in
-          begin
-            match op with
-            | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-            | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-            | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-            | '&lt;' -&gt;
-                (* Convert bool 0/1 to double 0.0 or 1.0 *)
-                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-                build_uitofp i double_type "booltmp" builder
-            | _ -&gt;
-                (* If it wasn't a builtin binary operator, it must be a user defined
-                 * one. Emit a call to it. *)
-                let callee = "binary" ^ (String.make 1 op) in
-                let callee =
-                  match lookup_function callee the_module with
-                  | Some callee -&gt; callee
-                  | None -&gt; raise (Error "binary operator not found!")
-                in
-                build_call callee [|lhs_val; rhs_val|] "binop" builder
-          end
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Output this as:
-       *   var = alloca double
-       *   ...
-       *   start = startexpr
-       *   store start -&gt; var
-       *   goto loop
-       * loop:
-       *   ...
-       *   bodyexpr
-       *   ...
-       * loopend:
-       *   step = stepexpr
-       *   endcond = endexpr
-       *
-       *   curvar = load var
-       *   nextvar = curvar + step
-       *   store nextvar -&gt; var
-       *   br endcond, loop, endloop
-       * outloop: *)
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Create an alloca for the variable in the entry block. *)
-      let alloca = create_entry_block_alloca the_function var_name in
-
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Store the value into the alloca. *)
-      ignore(build_store start_val alloca builder);
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name alloca;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Reload, increment, and restore the alloca. This handles the case where
-       * the body of the loop mutates the variable. *)
-      let cur_var = build_load alloca var_name builder in
-      let next_var = build_add cur_var step_val "nextvar" builder in
-      ignore(build_store next_var alloca builder);
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-  | Ast.Var (var_names, body) -&gt;
-      let old_bindings = ref [] in
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Register all variables and emit their initializer. *)
-      Array.iter (fun (var_name, init) -&gt;
-        (* Emit the initializer before adding the variable to scope, this
-         * prevents the initializer from referencing the variable itself, and
-         * permits stuff like this:
-         *   var a = 1 in
-         *     var a = a in ...   # refers to outer 'a'. *)
-        let init_val =
-          match init with
-          | Some init -&gt; codegen_expr init
-          (* If not specified, use 0.0. *)
-          | None -&gt; const_float double_type 0.0
-        in
-
-        let alloca = create_entry_block_alloca the_function var_name in
-        ignore(build_store init_val alloca builder);
-
-        (* Remember the old variable binding so that we can restore the binding
-         * when we unrecurse. *)
-        begin
-          try
-            let old_value = Hashtbl.find named_values var_name in
-            old_bindings := (var_name, old_value) :: !old_bindings;
-          with Not_found -&gt; ()
-        end;
-
-        (* Remember this binding. *)
-        Hashtbl.add named_values var_name alloca;
-      ) var_names;
-
-      (* Codegen the body, now that all vars are in scope. *)
-      let body_val = codegen_expr body in
-
-      (* Pop all our variables from scope. *)
-      List.iter (fun (var_name, old_value) -&gt;
-        Hashtbl.add named_values var_name old_value
-      ) !old_bindings;
-
-      (* Return the body computation. *)
-      body_val
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-(* Create an alloca for each argument and register the argument in the symbol
- * table so that references to it will succeed. *)
-let create_argument_allocas the_function proto =
-  let args = match proto with
-    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -&gt; args
-  in
-  Array.iteri (fun i ai -&gt;
-    let var_name = args.(i) in
-    (* Create an alloca for this variable. *)
-    let alloca = create_entry_block_alloca the_function var_name in
-
-    (* Store the initial value into the alloca. *)
-    ignore(build_store ai alloca builder);
-
-    (* Add arguments to variable symbol table. *)
-    Hashtbl.add named_values var_name alloca;
-  ) (params the_function)
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) -&gt;
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -&gt; ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        (* Add all arguments to the symbol table and create their allocas. *)
-        create_argument_allocas the_function proto;
-
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '=' 2;
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Promote allocas to registers. *)
-  add_memory_to_register_promotion the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/* printd - printf that takes a double prints it as "%f\n", returning 0. */
-extern double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  Last modified: $Date: 2012-10-08 18:39:34 +0200 (Mon, 08 Oct 2012) $
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl7.rst b/docs/tutorial/OCamlLangImpl7.rst
new file mode 100644
index 000000000000..cfb49312c50f
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl7.rst
@@ -0,0 +1,1723 @@
+=======================================================
+Kaleidoscope: Extending the Language: Mutable Variables
+=======================================================
+
+.. contents::
+   :local:
+
+Chapter 7 Introduction
+======================
+
+Welcome to Chapter 7 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In chapters 1 through 6, we've built a
+very respectable, albeit simple, `functional programming
+language <http://en.wikipedia.org/wiki/Functional_programming>`_. In our
+journey, we learned some parsing techniques, how to build and represent
+an AST, how to build LLVM IR, and how to optimize the resultant code as
+well as JIT compile it.
+
+While Kaleidoscope is interesting as a functional language, the fact
+that it is functional makes it "too easy" to generate LLVM IR for it. In
+particular, a functional language makes it very easy to build LLVM IR
+directly in `SSA
+form <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+Since LLVM requires that the input code be in SSA form, this is a very
+nice property and it is often unclear to newcomers how to generate code
+for an imperative language with mutable variables.
+
+The short (and happy) summary of this chapter is that there is no need
+for your front-end to build SSA form: LLVM provides highly tuned and
+well tested support for this, though the way it works is a bit
+unexpected for some.
+
+Why is this a hard problem?
+===========================
+
+To understand why mutable variables cause complexities in SSA
+construction, consider this extremely simple C example:
+
+.. code-block:: c
+
+    int G, H;
+    int test(_Bool Condition) {
+      int X;
+      if (Condition)
+        X = G;
+      else
+        X = H;
+      return X;
+    }
+
+In this case, we have the variable "X", whose value depends on the path
+executed in the program. Because there are two different possible values
+for X before the return instruction, a PHI node is inserted to merge the
+two values. The LLVM IR that we want for this example looks like this:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.2
+    }
+
+In this example, the loads from the G and H global variables are
+explicit in the LLVM IR, and they live in the then/else branches of the
+if statement (cond\_true/cond\_false). In order to merge the incoming
+values, the X.2 phi node in the cond\_next block selects the right value
+to use based on where control flow is coming from: if control flow comes
+from the cond\_false block, X.2 gets the value of X.1. Alternatively, if
+control flow comes from cond\_true, it gets the value of X.0. The intent
+of this chapter is not to explain the details of SSA form. For more
+information, see one of the many `online
+references <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+
+The question for this article is "who places the phi nodes when lowering
+assignments to mutable variables?". The issue here is that LLVM
+*requires* that its IR be in SSA form: there is no "non-ssa" mode for
+it. However, SSA construction requires non-trivial algorithms and data
+structures, so it is inconvenient and wasteful for every front-end to
+have to reproduce this logic.
+
+Memory in LLVM
+==============
+
+The 'trick' here is that while LLVM does require all register values to
+be in SSA form, it does not require (or permit) memory objects to be in
+SSA form. In the example above, note that the loads from G and H are
+direct accesses to G and H: they are not renamed or versioned. This
+differs from some other compiler systems, which do try to version memory
+objects. In LLVM, instead of encoding dataflow analysis of memory into
+the LLVM IR, it is handled with `Analysis
+Passes <../WritingAnLLVMPass.html>`_ which are computed on demand.
+
+With this in mind, the high-level idea is that we want to make a stack
+variable (which lives in memory, because it is on the stack) for each
+mutable object in a function. To take advantage of this trick, we need
+to talk about how LLVM represents stack variables.
+
+In LLVM, all memory accesses are explicit with load/store instructions,
+and it is carefully designed not to have (or need) an "address-of"
+operator. Notice how the type of the @G/@H global variables is actually
+"i32\*" even though the variable is defined as "i32". What this means is
+that @G defines *space* for an i32 in the global data area, but its
+*name* actually refers to the address for that space. Stack variables
+work the same way, except that instead of being declared with global
+variable definitions, they are declared with the `LLVM alloca
+instruction <../LangRef.html#i_alloca>`_:
+
+.. code-block:: llvm
+
+    define i32 @example() {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      ...
+      %tmp = load i32* %X       ; load the stack value %X from the stack.
+      %tmp2 = add i32 %tmp, 1   ; increment it
+      store i32 %tmp2, i32* %X  ; store it back
+      ...
+
+This code shows an example of how you can declare and manipulate a stack
+variable in the LLVM IR. Stack memory allocated with the alloca
+instruction is fully general: you can pass the address of the stack slot
+to functions, you can store it in other variables, etc. In our example
+above, we could rewrite the example to use the alloca technique to avoid
+using a PHI node:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+            store i32 %X.0, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+            store i32 %X.1, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_next:
+      %X.2 = load i32* %X       ; Read X
+      ret i32 %X.2
+    }
+
+With this, we have discovered a way to handle arbitrary mutable
+variables without the need to create Phi nodes at all:
+
+#. Each mutable variable becomes a stack allocation.
+#. Each read of the variable becomes a load from the stack.
+#. Each update of the variable becomes a store to the stack.
+#. Taking the address of a variable just uses the stack address
+   directly.
+
+While this solution has solved our immediate problem, it introduced
+another one: we have now apparently introduced a lot of stack traffic
+for very simple and common operations, a major performance problem.
+Fortunately for us, the LLVM optimizer has a highly-tuned optimization
+pass named "mem2reg" that handles this case, promoting allocas like this
+into SSA registers, inserting Phi nodes as appropriate. If you run this
+example through the pass, for example, you'll get:
+
+.. code-block:: bash
+
+    $ llvm-as < example.ll | opt -mem2reg | llvm-dis
+    @G = weak global i32 0
+    @H = weak global i32 0
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.01
+    }
+
+The mem2reg pass implements the standard "iterated dominance frontier"
+algorithm for constructing SSA form and has a number of optimizations
+that speed up (very common) degenerate cases. The mem2reg optimization
+pass is the answer to dealing with mutable variables, and we highly
+recommend that you depend on it. Note that mem2reg only works on
+variables in certain circumstances:
+
+#. mem2reg is alloca-driven: it looks for allocas and if it can handle
+   them, it promotes them. It does not apply to global variables or heap
+   allocations.
+#. mem2reg only looks for alloca instructions in the entry block of the
+   function. Being in the entry block guarantees that the alloca is only
+   executed once, which makes analysis simpler.
+#. mem2reg only promotes allocas whose uses are direct loads and stores.
+   If the address of the stack object is passed to a function, or if any
+   funny pointer arithmetic is involved, the alloca will not be
+   promoted.
+#. mem2reg only works on allocas of `first
+   class <../LangRef.html#t_classifications>`_ values (such as pointers,
+   scalars and vectors), and only if the array size of the allocation is
+   1 (or missing in the .ll file). mem2reg is not capable of promoting
+   structs or arrays to registers. Note that the "scalarrepl" pass is
+   more powerful and can promote structs, "unions", and arrays in many
+   cases.
+
+All of these properties are easy to satisfy for most imperative
+languages, and we'll illustrate it below with Kaleidoscope. The final
+question you may be asking is: should I bother with this nonsense for my
+front-end? Wouldn't it be better if I just did SSA construction
+directly, avoiding use of the mem2reg optimization pass? In short, we
+strongly recommend that you use this technique for building SSA form,
+unless there is an extremely good reason not to. Using this technique
+is:
+
+-  Proven and well tested: llvm-gcc and clang both use this technique
+   for local mutable variables. As such, the most common clients of LLVM
+   are using this to handle a bulk of their variables. You can be sure
+   that bugs are found fast and fixed early.
+-  Extremely Fast: mem2reg has a number of special cases that make it
+   fast in common cases as well as fully general. For example, it has
+   fast-paths for variables that are only used in a single block,
+   variables that only have one assignment point, good heuristics to
+   avoid insertion of unneeded phi nodes, etc.
+-  Needed for debug info generation: `Debug information in
+   LLVM <../SourceLevelDebugging.html>`_ relies on having the address of
+   the variable exposed so that debug info can be attached to it. This
+   technique dovetails very naturally with this style of debug info.
+
+If nothing else, this makes it much easier to get your front-end up and
+running, and is very simple to implement. Lets extend Kaleidoscope with
+mutable variables now!
+
+Mutable Variables in Kaleidoscope
+=================================
+
+Now that we know the sort of problem we want to tackle, lets see what
+this looks like in the context of our little Kaleidoscope language.
+We're going to add two features:
+
+#. The ability to mutate variables with the '=' operator.
+#. The ability to define new variables.
+
+While the first item is really what this is about, we only have
+variables for incoming arguments as well as for induction variables, and
+redefining those only goes so far :). Also, the ability to define new
+variables is a useful thing regardless of whether you will be mutating
+them. Here's a motivating example that shows how we could use these:
+
+::
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    # Recursive fib, we could do this before.
+    def fib(x)
+      if (x < 3) then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+    # Iterative fib.
+    def fibi(x)
+      var a = 1, b = 1, c in
+      (for i = 3, i < x in
+         c = a + b :
+         a = b :
+         b = c) :
+      b;
+
+    # Call it.
+    fibi(10);
+
+In order to mutate variables, we have to change our existing variables
+to use the "alloca trick". Once we have that, we'll add our new
+operator, then extend Kaleidoscope to support new variable definitions.
+
+Adjusting Existing Variables for Mutation
+=========================================
+
+The symbol table in Kaleidoscope is managed at code generation time by
+the '``named_values``' map. This map currently keeps track of the LLVM
+"Value\*" that holds the double value for the named variable. In order
+to support mutation, we need to change this slightly, so that it
+``named_values`` holds the *memory location* of the variable in
+question. Note that this change is a refactoring: it changes the
+structure of the code, but does not (by itself) change the behavior of
+the compiler. All of these changes are isolated in the Kaleidoscope code
+generator.
+
+At this point in Kaleidoscope's development, it only supports variables
+for two things: incoming arguments to functions and the induction
+variable of 'for' loops. For consistency, we'll allow mutation of these
+variables in addition to other user-defined variables. This means that
+these will both need memory locations.
+
+To start our transformation of Kaleidoscope, we'll change the
+``named_values`` map so that it maps to AllocaInst\* instead of Value\*.
+Once we do this, the C++ compiler will tell us what parts of the code we
+need to update:
+
+**Note:** the ocaml bindings currently model both ``Value*``'s and
+``AllocInst*``'s as ``Llvm.llvalue``'s, but this may change in the future
+to be more type safe.
+
+.. code-block:: ocaml
+
+    let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+
+Also, since we will need to create these alloca's, we'll use a helper
+function that ensures that the allocas are created in the entry block of
+the function:
+
+.. code-block:: ocaml
+
+    (* Create an alloca instruction in the entry block of the function. This
+     * is used for mutable variables etc. *)
+    let create_entry_block_alloca the_function var_name =
+      let builder = builder_at (instr_begin (entry_block the_function)) in
+      build_alloca double_type var_name builder
+
+This funny looking code creates an ``Llvm.llbuilder`` object that is
+pointing at the first instruction of the entry block. It then creates an
+alloca with the expected name and returns it. Because all values in
+Kaleidoscope are doubles, there is no need to pass in a type to use.
+
+With this in place, the first functionality change we want to make is to
+variable references. In our new scheme, variables live on the stack, so
+code generating a reference to them actually needs to produce a load
+from the stack slot:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.Variable name ->
+          let v = try Hashtbl.find named_values name with
+            | Not_found -> raise (Error "unknown variable name")
+          in
+          (* Load the value. *)
+          build_load v name builder
+
+As you can see, this is pretty straightforward. Now we need to update
+the things that define the variables to set up the alloca. We'll start
+with ``codegen_expr Ast.For ...`` (see the `full code listing <#code>`_
+for the unabridged code):
+
+.. code-block:: ocaml
+
+      | Ast.For (var_name, start, end_, step, body) ->
+          let the_function = block_parent (insertion_block builder) in
+
+          (* Create an alloca for the variable in the entry block. *)
+          let alloca = create_entry_block_alloca the_function var_name in
+
+          (* Emit the start code first, without 'variable' in scope. *)
+          let start_val = codegen_expr start in
+
+          (* Store the value into the alloca. *)
+          ignore(build_store start_val alloca builder);
+
+          ...
+
+          (* Within the loop, the variable is defined equal to the PHI node. If it
+           * shadows an existing variable, we have to restore it, so save it
+           * now. *)
+          let old_val =
+            try Some (Hashtbl.find named_values var_name) with Not_found -> None
+          in
+          Hashtbl.add named_values var_name alloca;
+
+          ...
+
+          (* Compute the end condition. *)
+          let end_cond = codegen_expr end_ in
+
+          (* Reload, increment, and restore the alloca. This handles the case where
+           * the body of the loop mutates the variable. *)
+          let cur_var = build_load alloca var_name builder in
+          let next_var = build_add cur_var step_val "nextvar" builder in
+          ignore(build_store next_var alloca builder);
+          ...
+
+This code is virtually identical to the code `before we allowed mutable
+variables <OCamlLangImpl5.html#forcodegen>`_. The big difference is that
+we no longer have to construct a PHI node, and we use load/store to
+access the variable as needed.
+
+To support mutable argument variables, we need to also make allocas for
+them. The code for this is also pretty simple:
+
+.. code-block:: ocaml
+
+    (* Create an alloca for each argument and register the argument in the symbol
+     * table so that references to it will succeed. *)
+    let create_argument_allocas the_function proto =
+      let args = match proto with
+        | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
+      in
+      Array.iteri (fun i ai ->
+        let var_name = args.(i) in
+        (* Create an alloca for this variable. *)
+        let alloca = create_entry_block_alloca the_function var_name in
+
+        (* Store the initial value into the alloca. *)
+        ignore(build_store ai alloca builder);
+
+        (* Add arguments to variable symbol table. *)
+        Hashtbl.add named_values var_name alloca;
+      ) (params the_function)
+
+For each argument, we make an alloca, store the input value to the
+function into the alloca, and register the alloca as the memory location
+for the argument. This method gets invoked by ``Codegen.codegen_func``
+right after it sets up the entry block for the function.
+
+The final missing piece is adding the mem2reg pass, which allows us to
+get good codegen once again:
+
+.. code-block:: ocaml
+
+    let main () =
+      ...
+      let the_fpm = PassManager.create_function Codegen.the_module in
+
+      (* Set up the optimizer pipeline.  Start with registering info about how the
+       * target lays out data structures. *)
+      DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+      (* Promote allocas to registers. *)
+      add_memory_to_register_promotion the_fpm;
+
+      (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+      add_instruction_combining the_fpm;
+
+      (* reassociate expressions. *)
+      add_reassociation the_fpm;
+
+It is interesting to see what the code looks like before and after the
+mem2reg optimization runs. For example, this is the before/after code
+for our recursive fib function. Before the optimization:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %x1 = alloca double
+      store double %x, double* %x1
+      %x2 = load double* %x1
+      %cmptmp = fcmp ult double %x2, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:    ; preds = %entry
+      br label %ifcont
+
+    else:    ; preds = %entry
+      %x3 = load double* %x1
+      %subtmp = fsub double %x3, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %x4 = load double* %x1
+      %subtmp5 = fsub double %x4, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:    ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+Here there is only one variable (x, the input argument) but you can
+still see the extremely simple-minded code generation strategy we are
+using. In the entry block, an alloca is created, and the initial input
+value is stored into it. Each reference to the variable does a reload
+from the stack. Also, note that we didn't modify the if/then/else
+expression, so it still inserts a PHI node. While we could make an
+alloca for it, it is actually easier to create a PHI node for it, so we
+still just make the PHI.
+
+Here is the code after the mem2reg pass runs:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:
+      br label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:    ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+This is a trivial case for mem2reg, since there are no redefinitions of
+the variable. The point of showing this is to calm your tension about
+inserting such blatent inefficiencies :).
+
+After the rest of the optimizers run, we get:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp ueq double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %else, label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      ret double %addtmp
+
+    ifcont:
+      ret double 1.000000e+00
+    }
+
+Here we see that the simplifycfg pass decided to clone the return
+instruction into the end of the 'else' block. This allowed it to
+eliminate some branches and the PHI node.
+
+Now that all symbol table references are updated to use stack variables,
+we'll add the assignment operator.
+
+New Assignment Operator
+=======================
+
+With our current framework, adding a new assignment operator is really
+simple. We will parse it just like any other binary operator, but handle
+it internally (instead of allowing the user to define it). The first
+step is to set a precedence:
+
+.. code-block:: ocaml
+
+    let main () =
+      (* Install standard binary operators.
+       * 1 is the lowest precedence. *)
+      Hashtbl.add Parser.binop_precedence '=' 2;
+      Hashtbl.add Parser.binop_precedence '<' 10;
+      Hashtbl.add Parser.binop_precedence '+' 20;
+      Hashtbl.add Parser.binop_precedence '-' 20;
+      ...
+
+Now that the parser knows the precedence of the binary operator, it
+takes care of all the parsing and AST generation. We just need to
+implement codegen for the assignment operator. This looks like:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+          begin match op with
+          | '=' ->
+              (* Special case '=' because we don't want to emit the LHS as an
+               * expression. *)
+              let name =
+                match lhs with
+                | Ast.Variable name -> name
+                | _ -> raise (Error "destination of '=' must be a variable")
+              in
+
+Unlike the rest of the binary operators, our assignment operator doesn't
+follow the "emit LHS, emit RHS, do computation" model. As such, it is
+handled as a special case before the other binary operators are handled.
+The other strange thing is that it requires the LHS to be a variable. It
+is invalid to have "(x+1) = expr" - only things like "x = expr" are
+allowed.
+
+.. code-block:: ocaml
+
+              (* Codegen the rhs. *)
+              let val_ = codegen_expr rhs in
+
+              (* Lookup the name. *)
+              let variable = try Hashtbl.find named_values name with
+              | Not_found -> raise (Error "unknown variable name")
+              in
+              ignore(build_store val_ variable builder);
+              val_
+          | _ ->
+                ...
+
+Once we have the variable, codegen'ing the assignment is
+straightforward: we emit the RHS of the assignment, create a store, and
+return the computed value. Returning a value allows for chained
+assignments like "X = (Y = Z)".
+
+Now that we have an assignment operator, we can mutate loop variables
+and arguments. For example, we can now run code like this:
+
+::
+
+    # Function to print a double.
+    extern printd(x);
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    def test(x)
+      printd(x) :
+      x = 4 :
+      printd(x);
+
+    test(123);
+
+When run, this example prints "123" and then "4", showing that we did
+actually mutate the value! Okay, we have now officially implemented our
+goal: getting this to work requires SSA construction in the general
+case. However, to be really useful, we want the ability to define our
+own local variables, lets add this next!
+
+User-defined Local Variables
+============================
+
+Adding var/in is just like any other other extensions we made to
+Kaleidoscope: we extend the lexer, the parser, the AST and the code
+generator. The first step for adding our new 'var/in' construct is to
+extend the lexer. As before, this is pretty trivial, the code looks like
+this:
+
+.. code-block:: ocaml
+
+    type token =
+      ...
+      (* var definition *)
+      | Var
+
+    ...
+
+    and lex_ident buffer = parser
+          ...
+          | "in" -> [< 'Token.In; stream >]
+          | "binary" -> [< 'Token.Binary; stream >]
+          | "unary" -> [< 'Token.Unary; stream >]
+          | "var" -> [< 'Token.Var; stream >]
+          ...
+
+The next step is to define the AST node that we will construct. For
+var/in, it looks like this:
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for var/in. *)
+      | Var of (string * expr option) array * expr
+      ...
+
+var/in allows a list of names to be defined all at once, and each name
+can optionally have an initializer value. As such, we capture this
+information in the VarNames vector. Also, var/in has a body, this body
+is allowed to access the variables defined by the var/in.
+
+With this in place, we can define the parser pieces. The first thing we
+do is add it as a primary expression:
+
+.. code-block:: ocaml
+
+    (* primary
+     *   ::= identifier
+     *   ::= numberexpr
+     *   ::= parenexpr
+     *   ::= ifexpr
+     *   ::= forexpr
+     *   ::= varexpr *)
+    let rec parse_primary = parser
+      ...
+      (* varexpr
+       *   ::= 'var' identifier ('=' expression?
+       *             (',' identifier ('=' expression)?)* 'in' expression *)
+      | [< 'Token.Var;
+           (* At least one variable name is required. *)
+           'Token.Ident id ?? "expected identifier after var";
+           init=parse_var_init;
+           var_names=parse_var_names [(id, init)];
+           (* At this point, we have to have 'in'. *)
+           'Token.In ?? "expected 'in' keyword after 'var'";
+           body=parse_expr >] ->
+          Ast.Var (Array.of_list (List.rev var_names), body)
+
+    ...
+
+    and parse_var_init = parser
+      (* read in the optional initializer. *)
+      | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
+      | [< >] -> None
+
+    and parse_var_names accumulator = parser
+      | [< 'Token.Kwd ',';
+           'Token.Ident id ?? "expected identifier list after var";
+           init=parse_var_init;
+           e=parse_var_names ((id, init) :: accumulator) >] -> e
+      | [< >] -> accumulator
+
+Now that we can parse and represent the code, we need to support
+emission of LLVM IR for it. This code starts out with:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.Var (var_names, body)
+          let old_bindings = ref [] in
+
+          let the_function = block_parent (insertion_block builder) in
+
+          (* Register all variables and emit their initializer. *)
+          Array.iter (fun (var_name, init) ->
+
+Basically it loops over all the variables, installing them one at a
+time. For each variable we put into the symbol table, we remember the
+previous value that we replace in OldBindings.
+
+.. code-block:: ocaml
+
+            (* Emit the initializer before adding the variable to scope, this
+             * prevents the initializer from referencing the variable itself, and
+             * permits stuff like this:
+             *   var a = 1 in
+             *     var a = a in ...   # refers to outer 'a'. *)
+            let init_val =
+              match init with
+              | Some init -> codegen_expr init
+              (* If not specified, use 0.0. *)
+              | None -> const_float double_type 0.0
+            in
+
+            let alloca = create_entry_block_alloca the_function var_name in
+            ignore(build_store init_val alloca builder);
+
+            (* Remember the old variable binding so that we can restore the binding
+             * when we unrecurse. *)
+
+            begin
+              try
+                let old_value = Hashtbl.find named_values var_name in
+                old_bindings := (var_name, old_value) :: !old_bindings;
+              with Not_found > ()
+            end;
+
+            (* Remember this binding. *)
+            Hashtbl.add named_values var_name alloca;
+          ) var_names;
+
+There are more comments here than code. The basic idea is that we emit
+the initializer, create the alloca, then update the symbol table to
+point to it. Once all the variables are installed in the symbol table,
+we evaluate the body of the var/in expression:
+
+.. code-block:: ocaml
+
+          (* Codegen the body, now that all vars are in scope. *)
+          let body_val = codegen_expr body in
+
+Finally, before returning, we restore the previous variable bindings:
+
+.. code-block:: ocaml
+
+          (* Pop all our variables from scope. *)
+          List.iter (fun (var_name, old_value) ->
+            Hashtbl.add named_values var_name old_value
+          ) !old_bindings;
+
+          (* Return the body computation. *)
+          body_val
+
+The end result of all of this is that we get properly scoped variable
+definitions, and we even (trivially) allow mutation of them :).
+
+With this, we completed what we set out to do. Our nice iterative fib
+example from the intro compiles and runs just fine. The mem2reg pass
+optimizes all of our stack variables into SSA registers, inserting PHI
+nodes where needed, and our front-end remains simple: no "iterated
+dominance frontier" computation anywhere in sight.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+mutable variables and var/in support. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+          (* control *)
+          | If | Then | Else
+          | For | In
+
+          (* operators *)
+          | Binary | Unary
+
+          (* var definition *)
+          | Var
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | "if" -> [< 'Token.If; stream >]
+              | "then" -> [< 'Token.Then; stream >]
+              | "else" -> [< 'Token.Else; stream >]
+              | "for" -> [< 'Token.For; stream >]
+              | "in" -> [< 'Token.In; stream >]
+              | "binary" -> [< 'Token.Binary; stream >]
+              | "unary" -> [< 'Token.Unary; stream >]
+              | "var" -> [< 'Token.Var; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a unary operator. *)
+          | Unary of char * expr
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+          (* variant for if/then/else. *)
+          | If of expr * expr * expr
+
+          (* variant for for/in. *)
+          | For of string * expr * expr * expr option * expr
+
+          (* variant for var/in. *)
+          | Var of (string * expr option) array * expr
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto =
+          | Prototype of string * string array
+          | BinOpPrototype of string * string array * int
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr
+         *   ::= ifexpr
+         *   ::= forexpr
+         *   ::= varexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+          | [< 'Token.If; c=parse_expr;
+               'Token.Then ?? "expected 'then'"; t=parse_expr;
+               'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+              Ast.If (c, t, e)
+
+          (* forexpr
+                ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+          | [< 'Token.For;
+               'Token.Ident id ?? "expected identifier after for";
+               'Token.Kwd '=' ?? "expected '=' after for";
+               stream >] ->
+              begin parser
+                | [<
+                     start=parse_expr;
+                     'Token.Kwd ',' ?? "expected ',' after for";
+                     end_=parse_expr;
+                     stream >] ->
+                    let step =
+                      begin parser
+                      | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                      | [< >] -> None
+                      end stream
+                    in
+                    begin parser
+                    | [< 'Token.In; body=parse_expr >] ->
+                        Ast.For (id, start, end_, step, body)
+                    | [< >] ->
+                        raise (Stream.Error "expected 'in' after for")
+                    end stream
+                | [< >] ->
+                    raise (Stream.Error "expected '=' after for")
+              end stream
+
+          (* varexpr
+           *   ::= 'var' identifier ('=' expression?
+           *             (',' identifier ('=' expression)?)* 'in' expression *)
+          | [< 'Token.Var;
+               (* At least one variable name is required. *)
+               'Token.Ident id ?? "expected identifier after var";
+               init=parse_var_init;
+               var_names=parse_var_names [(id, init)];
+               (* At this point, we have to have 'in'. *)
+               'Token.In ?? "expected 'in' keyword after 'var'";
+               body=parse_expr >] ->
+              Ast.Var (Array.of_list (List.rev var_names), body)
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* unary
+         *   ::= primary
+         *   ::= '!' unary *)
+        and parse_unary = parser
+          (* If this is a unary operator, read it. *)
+          | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+              Ast.Unary (op, operand)
+
+          (* If the current token is not an operator, it must be a primary expr. *)
+          | [< stream >] -> parse_primary stream
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_unary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        and parse_var_init = parser
+          (* read in the optional initializer. *)
+          | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
+          | [< >] -> None
+
+        and parse_var_names accumulator = parser
+          | [< 'Token.Kwd ',';
+               'Token.Ident id ?? "expected identifier list after var";
+               init=parse_var_init;
+               e=parse_var_names ((id, init) :: accumulator) >] -> e
+          | [< >] -> accumulator
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')'
+         *   ::= binary LETTER number? (id, id)
+         *   ::= unary LETTER number? (id) *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+          let parse_operator = parser
+            | [< 'Token.Unary >] -> "unary", 1
+            | [< 'Token.Binary >] -> "binary", 2
+          in
+          let parse_binary_precedence = parser
+            | [< 'Token.Number n >] -> int_of_float n
+            | [< >] -> 30
+          in
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+          | [< (prefix, kind)=parse_operator;
+               'Token.Kwd op ?? "expected an operator";
+               (* Read the precedence if present. *)
+               binary_precedence=parse_binary_precedence;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+                args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              let name = prefix ^ (String.make 1 op) in
+              let args = Array.of_list (List.rev args) in
+
+              (* Verify right number of arguments for operator. *)
+              if Array.length args != kind
+              then raise (Stream.Error "invalid number of operands for operator")
+              else
+                if kind == 1 then
+                  Ast.Prototype (name, args)
+                else
+                  Ast.BinOpPrototype (name, args, binary_precedence)
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        (* Create an alloca instruction in the entry block of the function. This
+         * is used for mutable variables etc. *)
+        let create_entry_block_alloca the_function var_name =
+          let builder = builder_at context (instr_begin (entry_block the_function)) in
+          build_alloca double_type var_name builder
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              let v = try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name")
+              in
+              (* Load the value. *)
+              build_load v name builder
+          | Ast.Unary (op, operand) ->
+              let operand = codegen_expr operand in
+              let callee = "unary" ^ (String.make 1 op) in
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown unary operator")
+              in
+              build_call callee [|operand|] "unop" builder
+          | Ast.Binary (op, lhs, rhs) ->
+              begin match op with
+              | '=' ->
+                  (* Special case '=' because we don't want to emit the LHS as an
+                   * expression. *)
+                  let name =
+                    match lhs with
+                    | Ast.Variable name -> name
+                    | _ -> raise (Error "destination of '=' must be a variable")
+                  in
+
+                  (* Codegen the rhs. *)
+                  let val_ = codegen_expr rhs in
+
+                  (* Lookup the name. *)
+                  let variable = try Hashtbl.find named_values name with
+                  | Not_found -> raise (Error "unknown variable name")
+                  in
+                  ignore(build_store val_ variable builder);
+                  val_
+              | _ ->
+                  let lhs_val = codegen_expr lhs in
+                  let rhs_val = codegen_expr rhs in
+                  begin
+                    match op with
+                    | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                    | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                    | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                    | '<' ->
+                        (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                        let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                        build_uitofp i double_type "booltmp" builder
+                    | _ ->
+                        (* If it wasn't a builtin binary operator, it must be a user defined
+                         * one. Emit a call to it. *)
+                        let callee = "binary" ^ (String.make 1 op) in
+                        let callee =
+                          match lookup_function callee the_module with
+                          | Some callee -> callee
+                          | None -> raise (Error "binary operator not found!")
+                        in
+                        build_call callee [|lhs_val; rhs_val|] "binop" builder
+                  end
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+          | Ast.If (cond, then_, else_) ->
+              let cond = codegen_expr cond in
+
+              (* Convert condition to a bool by comparing equal to 0.0 *)
+              let zero = const_float double_type 0.0 in
+              let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+              (* Grab the first block so that we might later add the conditional branch
+               * to it at the end of the function. *)
+              let start_bb = insertion_block builder in
+              let the_function = block_parent start_bb in
+
+              let then_bb = append_block context "then" the_function in
+
+              (* Emit 'then' value. *)
+              position_at_end then_bb builder;
+              let then_val = codegen_expr then_ in
+
+              (* Codegen of 'then' can change the current block, update then_bb for the
+               * phi. We create a new name because one is used for the phi node, and the
+               * other is used for the conditional branch. *)
+              let new_then_bb = insertion_block builder in
+
+              (* Emit 'else' value. *)
+              let else_bb = append_block context "else" the_function in
+              position_at_end else_bb builder;
+              let else_val = codegen_expr else_ in
+
+              (* Codegen of 'else' can change the current block, update else_bb for the
+               * phi. *)
+              let new_else_bb = insertion_block builder in
+
+              (* Emit merge block. *)
+              let merge_bb = append_block context "ifcont" the_function in
+              position_at_end merge_bb builder;
+              let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+              let phi = build_phi incoming "iftmp" builder in
+
+              (* Return to the start block to add the conditional branch. *)
+              position_at_end start_bb builder;
+              ignore (build_cond_br cond_val then_bb else_bb builder);
+
+              (* Set a unconditional branch at the end of the 'then' block and the
+               * 'else' block to the 'merge' block. *)
+              position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+              position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+              (* Finally, set the builder to the end of the merge block. *)
+              position_at_end merge_bb builder;
+
+              phi
+          | Ast.For (var_name, start, end_, step, body) ->
+              (* Output this as:
+               *   var = alloca double
+               *   ...
+               *   start = startexpr
+               *   store start -> var
+               *   goto loop
+               * loop:
+               *   ...
+               *   bodyexpr
+               *   ...
+               * loopend:
+               *   step = stepexpr
+               *   endcond = endexpr
+               *
+               *   curvar = load var
+               *   nextvar = curvar + step
+               *   store nextvar -> var
+               *   br endcond, loop, endloop
+               * outloop: *)
+
+              let the_function = block_parent (insertion_block builder) in
+
+              (* Create an alloca for the variable in the entry block. *)
+              let alloca = create_entry_block_alloca the_function var_name in
+
+              (* Emit the start code first, without 'variable' in scope. *)
+              let start_val = codegen_expr start in
+
+              (* Store the value into the alloca. *)
+              ignore(build_store start_val alloca builder);
+
+              (* Make the new basic block for the loop header, inserting after current
+               * block. *)
+              let loop_bb = append_block context "loop" the_function in
+
+              (* Insert an explicit fall through from the current block to the
+               * loop_bb. *)
+              ignore (build_br loop_bb builder);
+
+              (* Start insertion in loop_bb. *)
+              position_at_end loop_bb builder;
+
+              (* Within the loop, the variable is defined equal to the PHI node. If it
+               * shadows an existing variable, we have to restore it, so save it
+               * now. *)
+              let old_val =
+                try Some (Hashtbl.find named_values var_name) with Not_found -> None
+              in
+              Hashtbl.add named_values var_name alloca;
+
+              (* Emit the body of the loop.  This, like any other expr, can change the
+               * current BB.  Note that we ignore the value computed by the body, but
+               * don't allow an error *)
+              ignore (codegen_expr body);
+
+              (* Emit the step value. *)
+              let step_val =
+                match step with
+                | Some step -> codegen_expr step
+                (* If not specified, use 1.0. *)
+                | None -> const_float double_type 1.0
+              in
+
+              (* Compute the end condition. *)
+              let end_cond = codegen_expr end_ in
+
+              (* Reload, increment, and restore the alloca. This handles the case where
+               * the body of the loop mutates the variable. *)
+              let cur_var = build_load alloca var_name builder in
+              let next_var = build_add cur_var step_val "nextvar" builder in
+              ignore(build_store next_var alloca builder);
+
+              (* Convert condition to a bool by comparing equal to 0.0. *)
+              let zero = const_float double_type 0.0 in
+              let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+              (* Create the "after loop" block and insert it. *)
+              let after_bb = append_block context "afterloop" the_function in
+
+              (* Insert the conditional branch into the end of loop_end_bb. *)
+              ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+              (* Any new code will be inserted in after_bb. *)
+              position_at_end after_bb builder;
+
+              (* Restore the unshadowed variable. *)
+              begin match old_val with
+              | Some old_val -> Hashtbl.add named_values var_name old_val
+              | None -> ()
+              end;
+
+              (* for expr always returns 0.0. *)
+              const_null double_type
+          | Ast.Var (var_names, body) ->
+              let old_bindings = ref [] in
+
+              let the_function = block_parent (insertion_block builder) in
+
+              (* Register all variables and emit their initializer. *)
+              Array.iter (fun (var_name, init) ->
+                (* Emit the initializer before adding the variable to scope, this
+                 * prevents the initializer from referencing the variable itself, and
+                 * permits stuff like this:
+                 *   var a = 1 in
+                 *     var a = a in ...   # refers to outer 'a'. *)
+                let init_val =
+                  match init with
+                  | Some init -> codegen_expr init
+                  (* If not specified, use 0.0. *)
+                  | None -> const_float double_type 0.0
+                in
+
+                let alloca = create_entry_block_alloca the_function var_name in
+                ignore(build_store init_val alloca builder);
+
+                (* Remember the old variable binding so that we can restore the binding
+                 * when we unrecurse. *)
+                begin
+                  try
+                    let old_value = Hashtbl.find named_values var_name in
+                    old_bindings := (var_name, old_value) :: !old_bindings;
+                  with Not_found -> ()
+                end;
+
+                (* Remember this binding. *)
+                Hashtbl.add named_values var_name alloca;
+              ) var_names;
+
+              (* Codegen the body, now that all vars are in scope. *)
+              let body_val = codegen_expr body in
+
+              (* Pop all our variables from scope. *)
+              List.iter (fun (var_name, old_value) ->
+                Hashtbl.add named_values var_name old_value
+              ) !old_bindings;
+
+              (* Return the body computation. *)
+              body_val
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        (* Create an alloca for each argument and register the argument in the symbol
+         * table so that references to it will succeed. *)
+        let create_argument_allocas the_function proto =
+          let args = match proto with
+            | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
+          in
+          Array.iteri (fun i ai ->
+            let var_name = args.(i) in
+            (* Create an alloca for this variable. *)
+            let alloca = create_entry_block_alloca the_function var_name in
+
+            (* Store the initial value into the alloca. *)
+            ignore(build_store ai alloca builder);
+
+            (* Add arguments to variable symbol table. *)
+            Hashtbl.add named_values var_name alloca;
+          ) (params the_function)
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* If this is an operator, install it. *)
+              begin match proto with
+              | Ast.BinOpPrototype (name, args, prec) ->
+                  let op = name.[String.length name - 1] in
+                  Hashtbl.add Parser.binop_precedence op prec;
+              | _ -> ()
+              end;
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                (* Add all arguments to the symbol table and create their allocas. *)
+                create_argument_allocas the_function proto;
+
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '=' 2;
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Promote allocas to registers. *)
+          add_memory_to_register_promotion the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+        /* printd - printf that takes a double prints it as "%f\n", returning 0. */
+        extern double printd(double X) {
+          printf("%f\n", X);
+          return 0;
+        }
+
+`Next: Conclusion and other useful LLVM tidbits <OCamlLangImpl8.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html
deleted file mode 100644
index 7c1a500a21bf..000000000000
--- a/docs/tutorial/OCamlLangImpl8.html
+++ /dev/null
@@ -1,359 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Conclusion and other useful LLVM tidbits</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 8
-  <ol>
-    <li><a href="#conclusion">Tutorial Conclusion</a></li>
-    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
-    <ul>
-      <li><a href="#targetindep">Target Independence</a></li>
-      <li><a href="#safety">Safety Guarantees</a></li>
-      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
-    </ul>
-    </li>
-    <li><a href="#tipsandtricks">Tips and Tricks</a>
-    <ul>
-      <li><a href="#offsetofsizeof">Implementing portable 
-                                    offsetof/sizeof</a></li>
-      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
-    </ul>
-    </li>
-  </ol>
-</li>
-</ul>
-
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusion">Tutorial Conclusion</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the final chapter of the "<a href="index.html">Implementing a
-language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
-our little Kaleidoscope language from being a useless toy, to being a
-semi-interesting (but probably still useless) toy. :)</p>
-
-<p>It is interesting to see how far we've come, and how little code it has
-taken.  We built the entire lexer, parser, AST, code generator, and an 
-interactive run-loop (with a JIT!) by-hand in under 700 lines of
-(non-comment/non-blank) code.</p>
-
-<p>Our little language supports a couple of interesting features: it supports
-user defined binary and unary operators, it uses JIT compilation for immediate
-evaluation, and it supports a few control flow constructs with SSA construction.
-</p>
-
-<p>Part of the idea of this tutorial was to show you how easy and fun it can be
-to define, build, and play with languages.  Building a compiler need not be a
-scary or mystical process!  Now that you've seen some of the basics, I strongly
-encourage you to take the code and hack on it.  For example, try adding:</p>
-
-<ul>
-<li><b>global variables</b> - While global variables have questional value in
-modern software engineering, they are often useful when putting together quick
-little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
-setup makes it very easy to add global variables: just have value lookup check
-to see if an unresolved variable is in the global variable symbol table before
-rejecting it.  To create a new global variable, make an instance of the LLVM
-<tt>GlobalVariable</tt> class.</li>
-
-<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
-type double.  This gives the language a very nice elegance, because only
-supporting one type means that you never have to specify types.  Different
-languages have different ways of handling this.  The easiest way is to require
-the user to specify types for every variable definition, and record the type
-of the variable in the symbol table along with its Value*.</li>
-
-<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
-extending the type system in all sorts of interesting ways.  Simple arrays are
-very easy and are quite useful for many different applications.  Adding them is
-mostly an exercise in learning how the LLVM <a 
-href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
-is so nifty/unconventional, it <a 
-href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
-for recursive types (e.g. linked lists), make sure to read the <a 
-href="../ProgrammersManual.html#TypeResolve">section in the LLVM
-Programmer's Manual</a> that describes how to construct them.</li>
-
-<li><b>standard runtime</b> - Our current language allows the user to access
-arbitrary external functions, and we use it for things like "printd" and
-"putchard".  As you extend the language to add higher-level constructs, often
-these constructs make the most sense if they are lowered to calls into a
-language-supplied runtime.  For example, if you add hash tables to the language,
-it would probably make sense to add the routines to a runtime, instead of 
-inlining them all the way.</li>
-
-<li><b>memory management</b> - Currently we can only access the stack in
-Kaleidoscope.  It would also be useful to be able to allocate heap memory,
-either with calls to the standard libc malloc/free interface or with a garbage
-collector.  If you would like to use garbage collection, note that LLVM fully
-supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
-including algorithms that move objects and need to scan/update the stack.</li>
-
-<li><b>debugger support</b> - LLVM supports generation of <a 
-href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
-common debuggers like GDB.  Adding support for debug info is fairly 
-straightforward.  The best way to understand it is to compile some C/C++ code
-with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
-
-<li><b>exception handling support</b> - LLVM supports generation of <a 
-href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
-with code compiled in other languages.  You could also generate code by
-implicitly making every function return an error value and checking it.  You 
-could also make explicit use of setjmp/longjmp.  There are many different ways
-to go here.</li>
-
-<li><b>object orientation, generics, database access, complex numbers,
-geometric programming, ...</b> - Really, there is
-no end of crazy features that you can add to the language.</li>
-
-<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
-that many people are interested in: building a compiler for a specific language.
-However, there are many other domains that can use compiler technology that are
-not typically considered.  For example, LLVM has been used to implement OpenGL
-graphics acceleration, translate C++ code to ActionScript, and many other
-cute and clever things.  Maybe you will be the first to JIT compile a regular
-expression interpreter into native code with LLVM?</li>
-
-</ul>
-
-<p>
-Have fun - try doing something crazy and unusual.  Building a language like
-everyone else always has, is much less fun than trying something a little crazy
-or off the wall and seeing how it turns out.  If you get stuck or want to talk
-about it, feel free to email the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a>: it has lots of people who are interested in languages and are often
-willing to help out.
-</p>
-
-<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
-LLVM IR.  These are some of the more subtle things that may not be obvious, but
-are very useful if you want to take advantage of LLVM's capabilities.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="llvmirproperties">Properties of the LLVM IR</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We have a couple common questions about code in the LLVM IR form - lets just
-get these out of the way right now, shall we?</p>
-
-<!-- ======================================================================= -->
-<h4><a name="targetindep">Target Independence</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Kaleidoscope is an example of a "portable language": any program written in
-Kaleidoscope will work the same way on any target that it runs on.  Many other
-languages have this property, e.g. lisp, java, haskell, javascript, python, etc
-(note that while these languages are portable, not all their libraries are).</p>
-
-<p>One nice aspect of LLVM is that it is often capable of preserving target
-independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
-program and run it on any target that LLVM supports, even emitting C code and
-compiling that on targets that LLVM doesn't support natively.  You can trivially
-tell that the Kaleidoscope compiler generates target-independent code because it
-never queries for any target-specific information when generating code.</p>
-
-<p>The fact that LLVM provides a compact, target-independent, representation for
-code gets a lot of people excited.  Unfortunately, these people are usually
-thinking about C or a language from the C family when they are asking questions
-about language portability.  I say "unfortunately", because there is really no
-way to make (fully general) C code portable, other than shipping the source code
-around (and of course, C source code is not actually portable in general
-either - ever port a really old application from 32- to 64-bits?).</p>
-
-<p>The problem with C (again, in its full generality) is that it is heavily
-laden with target specific assumptions.  As one simple example, the preprocessor
-often destructively removes target-independence from the code when it processes
-the input text:</p>
-
-<div class="doc_code">
-<pre>
-#ifdef __i386__
-  int X = 1;
-#else
-  int X = 42;
-#endif
-</pre>
-</div>
-
-<p>While it is possible to engineer more and more complex solutions to problems
-like this, it cannot be solved in full generality in a way that is better than shipping
-the actual source code.</p>
-
-<p>That said, there are interesting subsets of C that can be made portable.  If
-you are willing to fix primitive types to a fixed size (say int = 32-bits, 
-and long = 64-bits), don't care about ABI compatibility with existing binaries,
-and are willing to give up some other minor features, you can have portable
-code.  This can make sense for specialized domains such as an
-in-kernel language.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="safety">Safety Guarantees</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Many of the languages above are also "safe" languages: it is impossible for
-a program written in Java to corrupt its address space and crash the process
-(assuming the JVM has no bugs).
-Safety is an interesting property that requires a combination of language
-design, runtime support, and often operating system support.</p>
-
-<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
-does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
-use after free bugs, buffer over-runs, and a variety of other problems.  Safety
-needs to be implemented as a layer on top of LLVM and, conveniently, several
-groups have investigated this.  Ask on the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a> if you are interested in more details.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="langspecific">Language-Specific Optimizations</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One thing about LLVM that turns off many people is that it does not solve all
-the world's problems in one system (sorry 'world hunger', someone else will have
-to solve you some other day).  One specific complaint is that people perceive
-LLVM as being incapable of performing high-level language-specific optimization:
-LLVM "loses too much information".</p>
-
-<p>Unfortunately, this is really not the place to give you a full and unified
-version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
-few observations:</p>
-
-<p>First, you're right that LLVM does lose information.  For example, as of this
-writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
-from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
-get compiled down to an 'i32' value and the information about what it came from
-is lost.  The more general issue here, is that the LLVM type system uses
-"structural equivalence" instead of "name equivalence".  Another place this
-surprises people is if you have two types in a high-level language that have the
-same structure (e.g. two different structs that have a single int field): these
-types will compile down into a single LLVM type and it will be impossible to
-tell what it came from.</p>
-
-<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
-continue to enhance and improve it in many different ways.  In addition to
-adding new features (LLVM did not always support exceptions or debug info), we
-also extend the IR to capture important information for optimization (e.g.
-whether an argument is sign or zero extended, information about pointers
-aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
-include some specific feature, so they go ahead and extend it.</p>
-
-<p>Third, it is <em>possible and easy</em> to add language-specific
-optimizations, and you have a number of choices in how to do it.  As one trivial
-example, it is easy to add language-specific optimization passes that
-"know" things about code compiled for a language.  In the case of the C family,
-there is an optimization pass that "knows" about the standard C library
-functions.  If you call "exit(0)" in main(), it knows that it is safe to
-optimize that into "return 0;" because C specifies what the 'exit'
-function does.</p>
-
-<p>In addition to simple library knowledge, it is possible to embed a variety of
-other language-specific information into the LLVM IR.  If you have a specific
-need and run into a wall, please bring the topic up on the llvmdev list.  At the
-very worst, you can always treat LLVM as if it were a "dumb code generator" and
-implement the high-level optimizations you desire in your front-end, on the
-language-specific AST.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="tipsandtricks">Tips and Tricks</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>There is a variety of useful tips and tricks that you come to know after
-working on/with LLVM that aren't obvious at first glance.  Instead of letting
-everyone rediscover them, this section talks about some of these issues.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="offsetofsizeof">Implementing portable offsetof/sizeof</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One interesting thing that comes up, if you are trying to keep the code 
-generated by your compiler "target independent", is that you often need to know
-the size of some LLVM type or the offset of some field in an llvm structure.
-For example, you might need to pass the size of a type into a function that
-allocates memory.</p>
-
-<p>Unfortunately, this can vary widely across targets: for example the width of
-a pointer is trivially target-specific.  However, there is a <a 
-href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
-way to use the getelementptr instruction</a> that allows you to compute this
-in a portable way.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="gcstack">Garbage Collected Stack Frames</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Some languages want to explicitly manage their stack frames, often so that
-they are garbage collected or to allow easy implementation of closures.  There
-are often better ways to implement these features than explicit stack frames,
-but <a 
-href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
-does support them,</a> if you want.  It requires your front-end to convert the
-code into <a 
-href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
-Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl8.rst b/docs/tutorial/OCamlLangImpl8.rst
new file mode 100644
index 000000000000..3534b2e0c931
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl8.rst
@@ -0,0 +1,267 @@
+======================================================
+Kaleidoscope: Conclusion and other useful LLVM tidbits
+======================================================
+
+.. contents::
+   :local:
+
+Tutorial Conclusion
+===================
+
+Welcome to the final chapter of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In the course of this tutorial, we have
+grown our little Kaleidoscope language from being a useless toy, to
+being a semi-interesting (but probably still useless) toy. :)
+
+It is interesting to see how far we've come, and how little code it has
+taken. We built the entire lexer, parser, AST, code generator, and an
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.
+
+Our little language supports a couple of interesting features: it
+supports user defined binary and unary operators, it uses JIT
+compilation for immediate evaluation, and it supports a few control flow
+constructs with SSA construction.
+
+Part of the idea of this tutorial was to show you how easy and fun it
+can be to define, build, and play with languages. Building a compiler
+need not be a scary or mystical process! Now that you've seen some of
+the basics, I strongly encourage you to take the code and hack on it.
+For example, try adding:
+
+-  **global variables** - While global variables have questional value
+   in modern software engineering, they are often useful when putting
+   together quick little hacks like the Kaleidoscope compiler itself.
+   Fortunately, our current setup makes it very easy to add global
+   variables: just have value lookup check to see if an unresolved
+   variable is in the global variable symbol table before rejecting it.
+   To create a new global variable, make an instance of the LLVM
+   ``GlobalVariable`` class.
+-  **typed variables** - Kaleidoscope currently only supports variables
+   of type double. This gives the language a very nice elegance, because
+   only supporting one type means that you never have to specify types.
+   Different languages have different ways of handling this. The easiest
+   way is to require the user to specify types for every variable
+   definition, and record the type of the variable in the symbol table
+   along with its Value\*.
+-  **arrays, structs, vectors, etc** - Once you add types, you can start
+   extending the type system in all sorts of interesting ways. Simple
+   arrays are very easy and are quite useful for many different
+   applications. Adding them is mostly an exercise in learning how the
+   LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction
+   works: it is so nifty/unconventional, it `has its own
+   FAQ <../GetElementPtr.html>`_! If you add support for recursive types
+   (e.g. linked lists), make sure to read the `section in the LLVM
+   Programmer's Manual <../ProgrammersManual.html#TypeResolve>`_ that
+   describes how to construct them.
+-  **standard runtime** - Our current language allows the user to access
+   arbitrary external functions, and we use it for things like "printd"
+   and "putchard". As you extend the language to add higher-level
+   constructs, often these constructs make the most sense if they are
+   lowered to calls into a language-supplied runtime. For example, if
+   you add hash tables to the language, it would probably make sense to
+   add the routines to a runtime, instead of inlining them all the way.
+-  **memory management** - Currently we can only access the stack in
+   Kaleidoscope. It would also be useful to be able to allocate heap
+   memory, either with calls to the standard libc malloc/free interface
+   or with a garbage collector. If you would like to use garbage
+   collection, note that LLVM fully supports `Accurate Garbage
+   Collection <../GarbageCollection.html>`_ including algorithms that
+   move objects and need to scan/update the stack.
+-  **debugger support** - LLVM supports generation of `DWARF Debug
+   info <../SourceLevelDebugging.html>`_ which is understood by common
+   debuggers like GDB. Adding support for debug info is fairly
+   straightforward. The best way to understand it is to compile some
+   C/C++ code with "``llvm-gcc -g -O0``" and taking a look at what it
+   produces.
+-  **exception handling support** - LLVM supports generation of `zero
+   cost exceptions <../ExceptionHandling.html>`_ which interoperate with
+   code compiled in other languages. You could also generate code by
+   implicitly making every function return an error value and checking
+   it. You could also make explicit use of setjmp/longjmp. There are
+   many different ways to go here.
+-  **object orientation, generics, database access, complex numbers,
+   geometric programming, ...** - Really, there is no end of crazy
+   features that you can add to the language.
+-  **unusual domains** - We've been talking about applying LLVM to a
+   domain that many people are interested in: building a compiler for a
+   specific language. However, there are many other domains that can use
+   compiler technology that are not typically considered. For example,
+   LLVM has been used to implement OpenGL graphics acceleration,
+   translate C++ code to ActionScript, and many other cute and clever
+   things. Maybe you will be the first to JIT compile a regular
+   expression interpreter into native code with LLVM?
+
+Have fun - try doing something crazy and unusual. Building a language
+like everyone else always has, is much less fun than trying something a
+little crazy or off the wall and seeing how it turns out. If you get
+stuck or want to talk about it, feel free to email the `llvmdev mailing
+list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
+of people who are interested in languages and are often willing to help
+out.
+
+Before we end this tutorial, I want to talk about some "tips and tricks"
+for generating LLVM IR. These are some of the more subtle things that
+may not be obvious, but are very useful if you want to take advantage of
+LLVM's capabilities.
+
+Properties of the LLVM IR
+=========================
+
+We have a couple common questions about code in the LLVM IR form - lets
+just get these out of the way right now, shall we?
+
+Target Independence
+-------------------
+
+Kaleidoscope is an example of a "portable language": any program written
+in Kaleidoscope will work the same way on any target that it runs on.
+Many other languages have this property, e.g. lisp, java, haskell,
+javascript, python, etc (note that while these languages are portable,
+not all their libraries are).
+
+One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a
+Kaleidoscope-compiled program and run it on any target that LLVM
+supports, even emitting C code and compiling that on targets that LLVM
+doesn't support natively. You can trivially tell that the Kaleidoscope
+compiler generates target-independent code because it never queries for
+any target-specific information when generating code.
+
+The fact that LLVM provides a compact, target-independent,
+representation for code gets a lot of people excited. Unfortunately,
+these people are usually thinking about C or a language from the C
+family when they are asking questions about language portability. I say
+"unfortunately", because there is really no way to make (fully general)
+C code portable, other than shipping the source code around (and of
+course, C source code is not actually portable in general either - ever
+port a really old application from 32- to 64-bits?).
+
+The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions. As one simple example, the
+preprocessor often destructively removes target-independence from the
+code when it processes the input text:
+
+.. code-block:: c
+
+    #ifdef __i386__
+      int X = 1;
+    #else
+      int X = 42;
+    #endif
+
+While it is possible to engineer more and more complex solutions to
+problems like this, it cannot be solved in full generality in a way that
+is better than shipping the actual source code.
+
+That said, there are interesting subsets of C that can be made portable.
+If you are willing to fix primitive types to a fixed size (say int =
+32-bits, and long = 64-bits), don't care about ABI compatibility with
+existing binaries, and are willing to give up some other minor features,
+you can have portable code. This can make sense for specialized domains
+such as an in-kernel language.
+
+Safety Guarantees
+-----------------
+
+Many of the languages above are also "safe" languages: it is impossible
+for a program written in Java to corrupt its address space and crash the
+process (assuming the JVM has no bugs). Safety is an interesting
+property that requires a combination of language design, runtime
+support, and often operating system support.
+
+It is certainly possible to implement a safe language in LLVM, but LLVM
+IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
+casts, use after free bugs, buffer over-runs, and a variety of other
+problems. Safety needs to be implemented as a layer on top of LLVM and,
+conveniently, several groups have investigated this. Ask on the `llvmdev
+mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
+you are interested in more details.
+
+Language-Specific Optimizations
+-------------------------------
+
+One thing about LLVM that turns off many people is that it does not
+solve all the world's problems in one system (sorry 'world hunger',
+someone else will have to solve you some other day). One specific
+complaint is that people perceive LLVM as being incapable of performing
+high-level language-specific optimization: LLVM "loses too much
+information".
+
+Unfortunately, this is really not the place to give you a full and
+unified version of "Chris Lattner's theory of compiler design". Instead,
+I'll make a few observations:
+
+First, you're right that LLVM does lose information. For example, as of
+this writing, there is no way to distinguish in the LLVM IR whether an
+SSA-value came from a C "int" or a C "long" on an ILP32 machine (other
+than debug info). Both get compiled down to an 'i32' value and the
+information about what it came from is lost. The more general issue
+here, is that the LLVM type system uses "structural equivalence" instead
+of "name equivalence". Another place this surprises people is if you
+have two types in a high-level language that have the same structure
+(e.g. two different structs that have a single int field): these types
+will compile down into a single LLVM type and it will be impossible to
+tell what it came from.
+
+Second, while LLVM does lose information, LLVM is not a fixed target: we
+continue to enhance and improve it in many different ways. In addition
+to adding new features (LLVM did not always support exceptions or debug
+info), we also extend the IR to capture important information for
+optimization (e.g. whether an argument is sign or zero extended,
+information about pointers aliasing, etc). Many of the enhancements are
+user-driven: people want LLVM to include some specific feature, so they
+go ahead and extend it.
+
+Third, it is *possible and easy* to add language-specific optimizations,
+and you have a number of choices in how to do it. As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language. In the case of the C
+family, there is an optimization pass that "knows" about the standard C
+library functions. If you call "exit(0)" in main(), it knows that it is
+safe to optimize that into "return 0;" because C specifies what the
+'exit' function does.
+
+In addition to simple library knowledge, it is possible to embed a
+variety of other language-specific information into the LLVM IR. If you
+have a specific need and run into a wall, please bring the topic up on
+the llvmdev list. At the very worst, you can always treat LLVM as if it
+were a "dumb code generator" and implement the high-level optimizations
+you desire in your front-end, on the language-specific AST.
+
+Tips and Tricks
+===============
+
+There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance. Instead of
+letting everyone rediscover them, this section talks about some of these
+issues.
+
+Implementing portable offsetof/sizeof
+-------------------------------------
+
+One interesting thing that comes up, if you are trying to keep the code
+generated by your compiler "target independent", is that you often need
+to know the size of some LLVM type or the offset of some field in an
+llvm structure. For example, you might need to pass the size of a type
+into a function that allocates memory.
+
+Unfortunately, this can vary widely across targets: for example the
+width of a pointer is trivially target-specific. However, there is a
+`clever way to use the getelementptr
+instruction <http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt>`_
+that allows you to compute this in a portable way.
+
+Garbage Collected Stack Frames
+------------------------------
+
+Some languages want to explicitly manage their stack frames, often so
+that they are garbage collected or to allow easy implementation of
+closures. There are often better ways to implement these features than
+explicit stack frames, but `LLVM does support
+them, <http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt>`_
+if you want. It requires your front-end to convert the code into
+`Continuation Passing
+Style <http://en.wikipedia.org/wiki/Continuation-passing_style>`_ and
+the use of tail calls (which LLVM also supports).
+
diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html
deleted file mode 100644
index 2c11a9a48b35..000000000000
--- a/docs/tutorial/index.html
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <title>LLVM Tutorial: Table of Contents</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Owen Anderson">
-  <meta name="description" 
-  content="LLVM Tutorial: Table of Contents.">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>LLVM Tutorial: Table of Contents</h1>
-
-<ol>
-  <li>Kaleidoscope: Implementing a Language with LLVM
-  <ol>
-    <li><a href="LangImpl1.html">Tutorial Introduction and the Lexer</a></li>
-    <li><a href="LangImpl2.html">Implementing a Parser and AST</a></li>
-    <li><a href="LangImpl3.html">Implementing Code Generation to LLVM IR</a></li>
-    <li><a href="LangImpl4.html">Adding JIT and Optimizer Support</a></li>
-    <li><a href="LangImpl5.html">Extending the language: control flow</a></li>
-    <li><a href="LangImpl6.html">Extending the language: user-defined operators</a></li>
-    <li><a href="LangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
-    <li><a href="LangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
-  </ol></li>
-  <li>Kaleidoscope: Implementing a Language with LLVM in Objective Caml
-  <ol>
-    <li><a href="OCamlLangImpl1.html">Tutorial Introduction and the Lexer</a></li>
-    <li><a href="OCamlLangImpl2.html">Implementing a Parser and AST</a></li>
-    <li><a href="OCamlLangImpl3.html">Implementing Code Generation to LLVM IR</a></li>
-    <li><a href="OCamlLangImpl4.html">Adding JIT and Optimizer Support</a></li>
-    <li><a href="OCamlLangImpl5.html">Extending the language: control flow</a></li>
-    <li><a href="OCamlLangImpl6.html">Extending the language: user-defined operators</a></li>
-    <li><a href="OCamlLangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
-    <li><a href="OCamlLangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
-  </ol></li>
-  <li>Advanced Topics
-  <ol>
-    <li><a href="http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html">Writing
-        an Optimization for LLVM</a></li>
-  </ol></li>
-</ol>
-
-</body>
-</html>
diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst
new file mode 100644
index 000000000000..69a9aee0962a
--- /dev/null
+++ b/docs/tutorial/index.rst
@@ -0,0 +1,43 @@
+================================
+LLVM Tutorial: Table of Contents
+================================
+
+Kaleidoscope: Implementing a Language with LLVM
+===============================================
+
+.. toctree::
+   :titlesonly:
+   :glob:
+   :numbered:
+
+   LangImpl*
+
+Kaleidoscope: Implementing a Language with LLVM in Objective Caml
+=================================================================
+
+.. toctree::
+   :titlesonly:
+   :glob:
+   :numbered:
+
+   OCamlLangImpl*
+
+External Tutorials
+==================
+
+`Tutorial: Creating an LLVM Backend for the Cpu0 Architecture <http://jonathan2251.github.com/lbd/>`_
+   A step-by-step tutorial for developing an LLVM backend. Under
+   active development at `<https://github.com/Jonathan2251/lbd>`_ (please
+   contribute!).
+
+`Howto: Implementing LLVM Integrated Assembler`_
+   A simple guide for how to implement an LLVM integrated assembler for an
+   architecture.
+
+.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html
+
+Advanced Topics
+===============
+
+#. `Writing an Optimization for LLVM <http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html>`_
+
diff --git a/docs/userguides.rst b/docs/userguides.rst
deleted file mode 100644
index 8c1554dfce9c..000000000000
--- a/docs/userguides.rst
+++ /dev/null
@@ -1,104 +0,0 @@
-.. _userguides:
-
-User Guides
-===========
-
-.. toctree::
-   :hidden:
-
-   CMake
-   HowToBuildOnARM
-   CommandGuide/index
-   DeveloperPolicy
-   GettingStarted
-   GettingStartedVS
-   FAQ
-   Lexicon
-   Packaging
-   HowToAddABuilder
-   yaml2obj
-   HowToSubmitABug
-   SphinxQuickstartTemplate
-   Phabricator
-
-* :ref:`getting_started`
-    
-   Discusses how to get up and running quickly with the LLVM infrastructure.
-   Everything from unpacking and compilation of the distribution to execution
-   of some tools.
-    
-* :ref:`building-with-cmake`
-
-   An addendum to the main Getting Started guide for those using the `CMake
-   build system <http://www.cmake.org>`_.
-
-* :ref:`how_to_build_on_arm`
-
-   Notes on building and testing LLVM/Clang on ARM.
-
-* `Getting Started with the LLVM System using Microsoft Visual Studio
-  <GettingStartedVS.html>`_
-
-   An addendum to the main Getting Started guide for those using Visual Studio
-   on Windows.
-    
-* `LLVM Tutorial <tutorial/>`_
-
-   A walk through the process of using LLVM for a custom language, and the
-   facilities LLVM offers in tutorial form.
-
-* :ref:`developer_policy`
-
-   The LLVM project's policy towards developers and their contributions.
-
-* :ref:`LLVM Command Guide <commands>`
-
-   A reference manual for the LLVM command line utilities ("man" pages for LLVM
-   tools).
-    
-* `LLVM's Analysis and Transform Passes <Passes.html>`_
-
-   A list of optimizations and analyses implemented in LLVM.
-    
-* :ref:`faq`
-
-   A list of common questions and problems and their solutions.
-    
-* `Release notes for the current release <ReleaseNotes.html>`_
-
-   This describes new features, known bugs, and other limitations.
-
-* :ref:`how-to-submit-a-bug-report`
-    
-   Instructions for properly submitting information about any bugs you run into
-   in the LLVM system.
-* :doc:`SphinxQuickstartTemplate`
-
-  A template + tutorial for writing new Sphinx documentation. It is meant
-  to be read in source form.
-    
-* `LLVM Testing Infrastructure Guide <TestingGuide.html>`_
-
-   A reference manual for using the LLVM testing infrastructure.
-    
-* `How to build the C, C++, ObjC, and ObjC++ front end <http://clang.llvm.org/get_started.html>`_
-
-   Instructions for building the clang front-end from source.
-    
-* :ref:`packaging`
-
-   Advice on packaging LLVM into a distribution.
-    
-* :ref:`lexicon`
-
-   Definition of acronyms, terms and concepts used in LLVM.
-
-* :ref:`how_to_add_a_builder`
-
-   Instructions for adding new builder to LLVM buildbot master.
-    
-* **IRC** -- You can probably find help on the unofficial LLVM IRC.
-
-   We often are on irc.oftc.net in the #llvm channel.  If you are using the
-   mozilla browser, and have chatzilla installed, you can `join #llvm on
-   irc.oftc.net <irc://irc.oftc.net/llvm>`_.
diff --git a/docs/yaml2obj.rst b/docs/yaml2obj.rst
index d051e7e22c00..b269806e06f6 100644
--- a/docs/yaml2obj.rst
+++ b/docs/yaml2obj.rst
@@ -1,5 +1,3 @@
-.. _yaml2obj:
-
 yaml2obj
 ========
 
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index b002d1f496d2..f8129b819e3a 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -24,10 +24,10 @@
 //===--------------------------------------------------------------------===//
 
 #include "BrainF.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include <iostream>
 using namespace llvm;
 
diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h
index c069feb51e72..15e9e0847141 100644
--- a/examples/BrainF/BrainF.h
+++ b/examples/BrainF/BrainF.h
@@ -15,9 +15,9 @@
 #ifndef BRAINF_H
 #define BRAINF_H
 
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 
 using namespace llvm;
 
diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp
index 58617b7f3809..cd6eabfdffaa 100644
--- a/examples/BrainF/BrainFDriver.cpp
+++ b/examples/BrainF/BrainFDriver.cpp
@@ -25,17 +25,17 @@
 //===--------------------------------------------------------------------===//
 
 #include "BrainF.h"
-#include "llvm/Constants.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
-#include <iostream>
 #include <fstream>
+#include <iostream>
 using namespace llvm;
 
 //Command line options
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 215cb4d3714f..264ef5481f57 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -41,27 +41,27 @@
 //     Cases -1 and 7 are caught by a C++ test harness where the validity of
 //         of a C++ catch(...) clause catching a generated exception with a
 //         type info type of 7 is explained by: example in rules 1.6.4 in
-//         http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22)
+//         http://mentorembedded.github.com/cxx-abi/abi-eh.html (v1.22)
 //
 // This code uses code from the llvm compiler-rt project and the llvm
 // Kaleidoscope project.
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
 
 // FIXME: Although all systems tested with (Linux, OS X), do not need this
 //        header file included. A user on ubuntu reported, undefined symbols
@@ -82,7 +82,7 @@
 #endif
 
 // System C++ ABI unwind types from:
-//     http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22)
+//     http://mentorembedded.github.com/cxx-abi/abi-eh.html (v1.22)
 
 extern "C" {
 
@@ -151,7 +151,7 @@ struct OurExceptionType_t {
 ///
 /// Note: The above unwind.h defines struct _Unwind_Exception to be aligned
 ///       on a double word boundary. This is necessary to match the standard:
-///       http://refspecs.freestandards.org/abi-eh-1.21.html
+///       http://mentorembedded.github.com/cxx-abi/abi-eh.html
 struct OurBaseException_t {
   struct OurExceptionType_t type;
 
@@ -339,7 +339,7 @@ void deleteOurException(OurUnwindException *expToDelete) {
 /// This function is the struct _Unwind_Exception API mandated delete function
 /// used by foreign exception handlers when deleting our exception
 /// (OurException), instances.
-/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html
+/// @param reason @link http://mentorembedded.github.com/cxx-abi/abi-eh.html
 /// @unlink
 /// @param expToDelete exception instance to delete
 void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
@@ -512,7 +512,7 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
 /// are supported. Filters are not supported.
 /// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
-/// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// Also see @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink
 /// @param resultAction reference variable which will be set with result
 /// @param classInfo our array of type info pointers (to globals)
 /// @param actionEntry index into above type info array or 0 (clean up).
@@ -599,7 +599,7 @@ static bool handleActionValue(int64_t *resultAction,
 
 
 /// Deals with the Language specific data portion of the emitted dwarf code.
-/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// See @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink
 /// @param version unsupported (ignored), unwind version
 /// @param lsda language specific data area
 /// @param _Unwind_Action actions minimally supported unwind stage
@@ -667,8 +667,6 @@ static _Unwind_Reason_Code handleLsda(int version,
   const uint8_t   *actionTableStart = callSiteTableEnd;
   const uint8_t   *callSitePtr = callSiteTableStart;
 
-  bool foreignException = false;
-
   while (callSitePtr < callSiteTableEnd) {
     uintptr_t start = readEncodedPointer(&callSitePtr,
                                          callSiteEncoding);
@@ -684,7 +682,6 @@ static _Unwind_Reason_Code handleLsda(int version,
       // We have been notified of a foreign exception being thrown,
       // and we therefore need to execute cleanup landing pads
       actionEntry = 0;
-      foreignException = true;
     }
 
     if (landingPad == 0) {
@@ -786,7 +783,7 @@ static _Unwind_Reason_Code handleLsda(int version,
 
 /// This is the personality function which is embedded (dwarf emitted), in the
 /// dwarf unwind info block. Again see: JITDwarfEmitter.cpp.
-/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// See @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink
 /// @param version unsupported (ignored), unwind version
 /// @param _Unwind_Action actions minimally supported unwind stage
 ///        (forced specifically not supported)
@@ -834,7 +831,7 @@ _Unwind_Reason_Code ourPersonality(int version,
 /// Generates our _Unwind_Exception class from a given character array.
 /// thereby handling arbitrary lengths (not in standard), and handling
 /// embedded \0s.
-/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// See @link http://mentorembedded.github.com/cxx-abi/abi-eh.html @unlink
 /// @param classChars char array to encode. NULL values not checkedf
 /// @param classCharsSize number of chars in classChars. Value is not checked.
 /// @returns class value
@@ -1595,7 +1592,7 @@ void runExceptionThrow(llvm::ExecutionEngine *engine,
   catch (...) {
     // Catch all exceptions including our generated ones. This latter
     // functionality works according to the example in rules 1.6.4 of
-    // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22),
+    // http://mentorembedded.github.com/cxx-abi/abi-eh.html (v1.22),
     // given that these will be exceptions foreign to C++
     // (the _Unwind_Exception::exception_class should be different from
     // the one used by C++).
@@ -1687,7 +1684,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   std::vector<llvm::Constant*> structVals;
 
   llvm::Constant *nextStruct;
-  llvm::GlobalVariable *nextGlobal = NULL;
 
   // Generate each type info
   //
@@ -1702,7 +1698,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
     typeInfoName = typeInfoNameBuilder.str();
 
     // Note: Does not seem to work without allocation
-    nextGlobal =
     new llvm::GlobalVariable(module,
                              ourTypeInfoType,
                              true,
diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp
index 417ad6f4b602..8cbf7d159fc5 100644
--- a/examples/Fibonacci/fibonacci.cpp
+++ b/examples/Fibonacci/fibonacci.cpp
@@ -23,17 +23,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp
index 5588e923df83..7125a1561045 100644
--- a/examples/HowToUseJIT/HowToUseJIT.cpp
+++ b/examples/HowToUseJIT/HowToUseJIT.cpp
@@ -34,17 +34,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/Support/TargetSelect.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp
index f4f09d0b351a..1cf6caacb6af 100644
--- a/examples/Kaleidoscope/Chapter2/toy.cpp
+++ b/examples/Kaleidoscope/Chapter2/toy.cpp
@@ -1,7 +1,7 @@
 #include <cstdio>
 #include <cstdlib>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 
 //===----------------------------------------------------------------------===//
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp
index c1e34b2f09ad..48cfbe6decb2 100644
--- a/examples/Kaleidoscope/Chapter3/toy.cpp
+++ b/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -1,11 +1,11 @@
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
index bc6028c900e7..971a7c68b217 100644
--- a/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -1,18 +1,18 @@
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index 2b0b9d54feb8..5558d08e1d0c 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -1,18 +1,18 @@
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index b751e3516bf8..52926eb99f17 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -1,18 +1,18 @@
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index 0ac099659064..ba192d6243cd 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -1,18 +1,18 @@
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp
index 6bc52c12a034..c931972f5b60 100644
--- a/examples/ModuleMaker/ModuleMaker.cpp
+++ b/examples/ModuleMaker/ModuleMaker.cpp
@@ -13,12 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp
index 305cf1dde06f..64a388695ff2 100644
--- a/examples/ParallelJIT/ParallelJIT.cpp
+++ b/examples/ParallelJIT/ParallelJIT.cpp
@@ -17,17 +17,17 @@
 // call into the JIT at the same time (or the best possible approximation of the
 // same time). This test had assertion errors until I got the locking right.
 
-#include <pthread.h>
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetSelect.h"
 #include <iostream>
+#include <pthread.h>
 using namespace llvm;
 
 static Function* createAdd1(Module *M) {
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 620d0887be73..e85fb9750503 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -21,8 +21,8 @@
 
 /* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' 
    and 'unwrap' conversion functions. */
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassRegistry.h"
 
 extern "C" {
@@ -173,10 +173,11 @@ typedef enum {
     LLVMUWTable = 1 << 30,
     LLVMNonLazyBind = 1 << 31
 
-    /* FIXME: This attribute is currently not included in the C API as
+    /* FIXME: These attributes are currently not included in the C API as
        a temporary measure until the API/ABI impact to the C API is understood
        and the path forward agreed upon.
-    LLVMAddressSafety = 1ULL << 32
+    LLVMAddressSafety = 1ULL << 32,
+    LLVMStackProtectStrongAttribute = 1ULL<<33
     */
 } LLVMAttribute;
 
@@ -357,6 +358,11 @@ typedef enum {
 
 void LLVMInitializeCore(LLVMPassRegistryRef R);
 
+/** Deallocate and destroy all ManagedStatic variables.
+    @see llvm::llvm_shutdown
+    @see ManagedStatic */
+void LLVMShutdown();
+
 
 /*===-- Error handling ----------------------------------------------------===*/
 
@@ -2547,6 +2553,13 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
                                                   char **OutMessage);
 LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
                                          char **OutMessage);
+LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(const char *InputData,
+                                                          size_t InputDataLength,
+                                                          const char *BufferName,
+                                                          LLVMBool RequiresNullTerminator);
+LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(const char *InputData,
+                                                              size_t InputDataLength,
+                                                              const char *BufferName);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
 /**
@@ -2619,6 +2632,34 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM);
  */
 
 /**
+ * @defgroup LLVMCCoreThreading Threading
+ *
+ * Handle the structures needed to make LLVM safe for multithreading.
+ *
+ * @{
+ */
+
+/** Allocate and initialize structures needed to make LLVM safe for
+    multithreading. The return value indicates whether multithreaded
+    initialization succeeded. Must be executed in isolation from all
+    other LLVM api calls.
+    @see llvm::llvm_start_multithreaded */
+LLVMBool LLVMStartMultithreaded();
+
+/** Deallocate structures necessary to make LLVM safe for multithreading.
+    Must be executed in isolation from all other LLVM api calls.
+    @see llvm::llvm_stop_multithreaded */
+void LLVMStopMultithreaded();
+
+/** Check whether LLVM is executing in thread-safe mode or not.
+    @see llvm::llvm_is_multithreaded */
+LLVMBool LLVMIsMultithreaded();
+
+/**
+ * @}
+ */
+
+/**
  * @}
  */
 
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
index b8c4ad9ad738..df65a7b20846 100644
--- a/include/llvm-c/Disassembler.h
+++ b/include/llvm-c/Disassembler.h
@@ -139,13 +139,26 @@ extern "C" {
  * by passing a block of information in the DisInfo parameter and specifying the
  * TagType and callback functions as described above.  These can all be passed
  * as NULL.  If successful, this returns a disassembler context.  If not, it
- * returns NULL.
+ * returns NULL. This function is equivalent to calling LLVMCreateDisasmCPU()
+ * with an empty CPU name.
  */
 LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
                                       int TagType, LLVMOpInfoCallback GetOpInfo,
                                       LLVMSymbolLookupCallback SymbolLookUp);
 
 /**
+ * Create a disassembler for the TripleName and a specific CPU.  Symbolic
+ * disassembly is supported by passing a block of information in the DisInfo
+ * parameter and specifying the TagType and callback functions as described
+ * above.  These can all be passed * as NULL.  If successful, this returns a
+ * disassembler context.  If not, it returns NULL.
+ */
+LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
+                                         void *DisInfo, int TagType,
+                                         LLVMOpInfoCallback GetOpInfo,
+                                         LLVMSymbolLookupCallback SymbolLookUp);
+
+/**
  * Set the disassembler's options.  Returns 1 if it can set the Options and 0
  * otherwise.
  */
@@ -153,6 +166,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DC, uint64_t Options);
 
 /* The option to produce marked up assembly. */
 #define LLVMDisassembler_Option_UseMarkup 1
+/* The option to print immediates as hex. */
+#define LLVMDisassembler_Option_PrintImmHex 2
+/* The option use the other assembler printer variant */
+#define LLVMDisassembler_Option_AsmPrinterVariant 4
 
 /**
  * Dispose of a disassembler context.
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
deleted file mode 100644
index 71a0d496c028..000000000000
--- a/include/llvm-c/EnhancedDisassembly.h
+++ /dev/null
@@ -1,530 +0,0 @@
-/*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\
-|*                                                                            *|
-|*                     The LLVM Compiler Infrastructure                       *|
-|*                                                                            *|
-|* This file is distributed under the University of Illinois Open Source      *|
-|* License. See LICENSE.TXT for details.                                      *|
-|*                                                                            *|
-|*===----------------------------------------------------------------------===*|
-|*                                                                            *|
-|* This header declares the C interface to EnhancedDisassembly.so, which      *|
-|* implements a disassembler with the ability to extract operand values and   *|
-|* individual tokens from assembly instructions.                              *|
-|*                                                                            *|
-|* The header declares additional interfaces if the host compiler supports    *|
-|* the blocks API.                                                            *|
-|*                                                                            *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
-#define LLVM_C_ENHANCEDDISASSEMBLY_H
-
-#include "llvm/Support/DataTypes.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @defgroup LLVMCEnhancedDisassembly Enhanced Disassembly
- * @ingroup LLVMC
- * @deprecated
- *
- * This module contains an interface to the Enhanced Disassembly (edis)
- * library. The edis library is deprecated and will likely disappear in
- * the near future. You should use the @ref LLVMCDisassembler interface
- * instead.
- *
- * @{
- */
-
-/*!
- @typedef EDByteReaderCallback
- Interface to memory from which instructions may be read.
- @param byte A pointer whose target should be filled in with the data returned.
- @param address The address of the byte to be read.
- @param arg An anonymous argument for client use.
- @result 0 on success; -1 otherwise.
- */
-typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
-
-/*!
- @typedef EDRegisterReaderCallback
- Interface to registers from which registers may be read.
- @param value A pointer whose target should be filled in with the value of the
-   register.
- @param regID The LLVM register identifier for the register to read.
- @param arg An anonymous argument for client use.
- @result 0 if the register could be read; -1 otherwise.
- */
-typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
-                                        void* arg);
-
-/*!
- @typedef EDAssemblySyntax_t
- An assembly syntax for use in tokenizing instructions.
- */
-enum {
-/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
-  kEDAssemblySyntaxX86Intel  = 0,
-/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
-  kEDAssemblySyntaxX86ATT    = 1,
-  kEDAssemblySyntaxARMUAL    = 2
-};
-typedef unsigned EDAssemblySyntax_t;
-
-/*!
- @typedef EDDisassemblerRef
- Encapsulates a disassembler for a single CPU architecture.
- */
-typedef void *EDDisassemblerRef;
-
-/*!
- @typedef EDInstRef
- Encapsulates a single disassembled instruction in one assembly syntax.
- */
-typedef void *EDInstRef;
-
-/*!
- @typedef EDTokenRef
- Encapsulates a token from the disassembly of an instruction.
- */
-typedef void *EDTokenRef;
-
-/*!
- @typedef EDOperandRef
- Encapsulates an operand of an instruction.
- */
-typedef void *EDOperandRef;
-
-/*!
- @functiongroup Getting a disassembler
- */
-
-/*!
- @function EDGetDisassembler
- Gets the disassembler for a given target.
- @param disassembler A pointer whose target will be filled in with the
-   disassembler.
- @param triple Identifies the target.  Example: "x86_64-apple-darwin10"
- @param syntax The assembly syntax to use when decoding instructions.
- @result 0 on success; -1 otherwise.
- */
-int EDGetDisassembler(EDDisassemblerRef *disassembler,
-                      const char *triple,
-                      EDAssemblySyntax_t syntax);
-
-/*!
- @functiongroup Generic architectural queries
- */
-
-/*!
- @function EDGetRegisterName
- Gets the human-readable name for a given register.
- @param regName A pointer whose target will be pointed at the name of the
-   register.  The name does not need to be deallocated and will be
- @param disassembler The disassembler to query for the name.
- @param regID The register identifier, as returned by EDRegisterTokenValue.
- @result 0 on success; -1 otherwise.
- */
-int EDGetRegisterName(const char** regName,
-                      EDDisassemblerRef disassembler,
-                      unsigned regID);
-
-/*!
- @function EDRegisterIsStackPointer
- Determines if a register is one of the platform's stack-pointer registers.
- @param disassembler The disassembler to query.
- @param regID The register identifier, as returned by EDRegisterTokenValue.
- @result 1 if true; 0 otherwise.
- */
-int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
-                             unsigned regID);
-
-/*!
- @function EDRegisterIsProgramCounter
- Determines if a register is one of the platform's stack-pointer registers.
- @param disassembler The disassembler to query.
- @param regID The register identifier, as returned by EDRegisterTokenValue.
- @result 1 if true; 0 otherwise.
- */
-int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
-                               unsigned regID);
-
-/*!
- @functiongroup Creating and querying instructions
- */
-
-/*!
- @function EDCreateInst
- Gets a set of contiguous instructions from a disassembler.
- @param insts A pointer to an array that will be filled in with the
-   instructions.  Must have at least count entries.  Entries not filled in will
-   be set to NULL.
- @param count The maximum number of instructions to fill in.
- @param disassembler The disassembler to use when decoding the instructions.
- @param byteReader The function to use when reading the instruction's machine
-   code.
- @param address The address of the first byte of the instruction.
- @param arg An anonymous argument to be passed to byteReader.
- @result The number of instructions read on success; 0 otherwise.
- */
-unsigned int EDCreateInsts(EDInstRef *insts,
-                           unsigned int count,
-                           EDDisassemblerRef disassembler,
-                           EDByteReaderCallback byteReader,
-                           uint64_t address,
-                           void *arg);
-
-/*!
- @function EDReleaseInst
- Frees the memory for an instruction.  The instruction can no longer be accessed
- after this call.
- @param inst The instruction to be freed.
- */
-void EDReleaseInst(EDInstRef inst);
-
-/*!
- @function EDInstByteSize
- @param inst The instruction to be queried.
- @result The number of bytes in the instruction's machine-code representation.
- */
-int EDInstByteSize(EDInstRef inst);
-
-/*!
- @function EDGetInstString
- Gets the disassembled text equivalent of the instruction.
- @param buf A pointer whose target will be filled in with a pointer to the
-   string.  (The string becomes invalid when the instruction is released.)
- @param inst The instruction to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDGetInstString(const char **buf,
-                    EDInstRef inst);
-
-/*!
- @function EDInstID
- @param instID A pointer whose target will be filled in with the LLVM identifier
-   for the instruction.
- @param inst The instruction to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDInstID(unsigned *instID, EDInstRef inst);
-
-/*!
- @function EDInstIsBranch
- @param inst The instruction to be queried.
- @result 1 if the instruction is a branch instruction; 0 if it is some other
-   type of instruction; -1 if there was an error.
- */
-int EDInstIsBranch(EDInstRef inst);
-
-/*!
- @function EDInstIsMove
- @param inst The instruction to be queried.
- @result 1 if the instruction is a move instruction; 0 if it is some other
-   type of instruction; -1 if there was an error.
- */
-int EDInstIsMove(EDInstRef inst);
-
-/*!
- @function EDBranchTargetID
- @param inst The instruction to be queried.
- @result The ID of the branch target operand, suitable for use with
-   EDCopyOperand.  -1 if no such operand exists.
- */
-int EDBranchTargetID(EDInstRef inst);
-
-/*!
- @function EDMoveSourceID
- @param inst The instruction to be queried.
- @result The ID of the move source operand, suitable for use with
-   EDCopyOperand.  -1 if no such operand exists.
- */
-int EDMoveSourceID(EDInstRef inst);
-
-/*!
- @function EDMoveTargetID
- @param inst The instruction to be queried.
- @result The ID of the move source operand, suitable for use with
-   EDCopyOperand.  -1 if no such operand exists.
- */
-int EDMoveTargetID(EDInstRef inst);
-
-/*!
- @functiongroup Creating and querying tokens
- */
-
-/*!
- @function EDNumTokens
- @param inst The instruction to be queried.
- @result The number of tokens in the instruction, or -1 on error.
- */
-int EDNumTokens(EDInstRef inst);
-
-/*!
- @function EDGetToken
- Retrieves a token from an instruction.  The token is valid until the
- instruction is released.
- @param token A pointer to be filled in with the token.
- @param inst The instruction to be queried.
- @param index The index of the token in the instruction.
- @result 0 on success; -1 otherwise.
- */
-int EDGetToken(EDTokenRef *token,
-               EDInstRef inst,
-               int index);
-
-/*!
- @function EDGetTokenString
- Gets the disassembled text for a token.
- @param buf A pointer whose target will be filled in with a pointer to the
-   string.  (The string becomes invalid when the token is released.)
- @param token The token to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDGetTokenString(const char **buf,
-                     EDTokenRef token);
-
-/*!
- @function EDOperandIndexForToken
- Returns the index of the operand to which a token belongs.
- @param token The token to be queried.
- @result The operand index on success; -1 otherwise
- */
-int EDOperandIndexForToken(EDTokenRef token);
-
-/*!
- @function EDTokenIsWhitespace
- @param token The token to be queried.
- @result 1 if the token is whitespace; 0 if not; -1 on error.
- */
-int EDTokenIsWhitespace(EDTokenRef token);
-
-/*!
- @function EDTokenIsPunctuation
- @param token The token to be queried.
- @result 1 if the token is punctuation; 0 if not; -1 on error.
- */
-int EDTokenIsPunctuation(EDTokenRef token);
-
-/*!
- @function EDTokenIsOpcode
- @param token The token to be queried.
- @result 1 if the token is opcode; 0 if not; -1 on error.
- */
-int EDTokenIsOpcode(EDTokenRef token);
-
-/*!
- @function EDTokenIsLiteral
- @param token The token to be queried.
- @result 1 if the token is a numeric literal; 0 if not; -1 on error.
- */
-int EDTokenIsLiteral(EDTokenRef token);
-
-/*!
- @function EDTokenIsRegister
- @param token The token to be queried.
- @result 1 if the token identifies a register; 0 if not; -1 on error.
- */
-int EDTokenIsRegister(EDTokenRef token);
-
-/*!
- @function EDTokenIsNegativeLiteral
- @param token The token to be queried.
- @result 1 if the token is a negative signed literal; 0 if not; -1 on error.
- */
-int EDTokenIsNegativeLiteral(EDTokenRef token);
-
-/*!
- @function EDLiteralTokenAbsoluteValue
- @param value A pointer whose target will be filled in with the absolute value
-   of the literal.
- @param token The token to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDLiteralTokenAbsoluteValue(uint64_t *value,
-                                EDTokenRef token);
-
-/*!
- @function EDRegisterTokenValue
- @param registerID A pointer whose target will be filled in with the LLVM
-   register identifier for the token.
- @param token The token to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDRegisterTokenValue(unsigned *registerID,
-                         EDTokenRef token);
-
-/*!
- @functiongroup Creating and querying operands
- */
-
-/*!
- @function EDNumOperands
- @param inst The instruction to be queried.
- @result The number of operands in the instruction, or -1 on error.
- */
-int EDNumOperands(EDInstRef inst);
-
-/*!
- @function EDGetOperand
- Retrieves an operand from an instruction.  The operand is valid until the
- instruction is released.
- @param operand A pointer to be filled in with the operand.
- @param inst The instruction to be queried.
- @param index The index of the operand in the instruction.
- @result 0 on success; -1 otherwise.
- */
-int EDGetOperand(EDOperandRef *operand,
-                 EDInstRef inst,
-                 int index);
-
-/*!
- @function EDOperandIsRegister
- @param operand The operand to be queried.
- @result 1 if the operand names a register; 0 if not; -1 on error.
- */
-int EDOperandIsRegister(EDOperandRef operand);
-
-/*!
- @function EDOperandIsImmediate
- @param operand The operand to be queried.
- @result 1 if the operand specifies an immediate value; 0 if not; -1 on error.
- */
-int EDOperandIsImmediate(EDOperandRef operand);
-
-/*!
- @function EDOperandIsMemory
- @param operand The operand to be queried.
- @result 1 if the operand specifies a location in memory; 0 if not; -1 on error.
- */
-int EDOperandIsMemory(EDOperandRef operand);
-
-/*!
- @function EDRegisterOperandValue
- @param value A pointer whose target will be filled in with the LLVM register ID
-   of the register named by the operand.
- @param operand The operand to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDRegisterOperandValue(unsigned *value,
-                           EDOperandRef operand);
-
-/*!
- @function EDImmediateOperandValue
- @param value A pointer whose target will be filled in with the value of the
-   immediate.
- @param operand The operand to be queried.
- @result 0 on success; -1 otherwise.
- */
-int EDImmediateOperandValue(uint64_t *value,
-                            EDOperandRef operand);
-
-/*!
- @function EDEvaluateOperand
- Evaluates an operand using a client-supplied register state accessor.  Register
- operands are evaluated by reading the value of the register; immediate operands
- are evaluated by reporting the immediate value; memory operands are evaluated
- by computing the target address (with only those relocations applied that were
- already applied to the original bytes).
- @param result A pointer whose target is to be filled with the result of
-   evaluating the operand.
- @param operand The operand to be evaluated.
- @param regReader The function to use when reading registers from the register
-   state.
- @param arg An anonymous argument for client use.
- @result 0 if the operand could be evaluated; -1 otherwise.
- */
-int EDEvaluateOperand(uint64_t *result,
-                      EDOperandRef operand,
-                      EDRegisterReaderCallback regReader,
-                      void *arg);
-
-#ifdef __BLOCKS__
-
-/*!
- @typedef EDByteBlock_t
- Block-based interface to memory from which instructions may be read.
- @param byte A pointer whose target should be filled in with the data returned.
- @param address The address of the byte to be read.
- @result 0 on success; -1 otherwise.
- */
-typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address);
-
-/*!
- @typedef EDRegisterBlock_t
- Block-based interface to registers from which registers may be read.
- @param value A pointer whose target should be filled in with the value of the
-   register.
- @param regID The LLVM register identifier for the register to read.
- @result 0 if the register could be read; -1 otherwise.
- */
-typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
-
-/*!
- @typedef EDTokenVisitor_t
- Block-based handler for individual tokens.
- @param token The current token being read.
- @result 0 to continue; 1 to stop normally; -1 on error.
- */
-typedef int (^EDTokenVisitor_t)(EDTokenRef token);
-
-/*! @functiongroup Block-based interfaces */
-
-/*!
- @function EDBlockCreateInsts
- Gets a set of contiguous instructions from a disassembler, using a block to
- read memory.
- @param insts A pointer to an array that will be filled in with the
-   instructions.  Must have at least count entries.  Entries not filled in will
-   be set to NULL.
- @param count The maximum number of instructions to fill in.
- @param disassembler The disassembler to use when decoding the instructions.
- @param byteBlock The block to use when reading the instruction's machine
-   code.
- @param address The address of the first byte of the instruction.
- @result The number of instructions read on success; 0 otherwise.
- */
-unsigned int EDBlockCreateInsts(EDInstRef *insts,
-                                int count,
-                                EDDisassemblerRef disassembler,
-                                EDByteBlock_t byteBlock,
-                                uint64_t address);
-
-/*!
- @function EDBlockEvaluateOperand
- Evaluates an operand using a block to read registers.
- @param result A pointer whose target is to be filled with the result of
-   evaluating the operand.
- @param operand The operand to be evaluated.
- @param regBlock The block to use when reading registers from the register
-   state.
- @result 0 if the operand could be evaluated; -1 otherwise.
- */
-int EDBlockEvaluateOperand(uint64_t *result,
-                           EDOperandRef operand,
-                           EDRegisterBlock_t regBlock);
-
-/*!
- @function EDBlockVisitTokens
- Visits every token with a visitor.
- @param inst The instruction with the tokens to be visited.
- @param visitor The visitor.
- @result 0 if the visit ended normally; -1 if the visitor encountered an error
-   or there was some other error.
- */
-int EDBlockVisitTokens(EDInstRef inst,
-                       EDTokenVisitor_t visitor);
-
-/**
- * @}
- */
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index cb3ab9e3f393..ada473818da6 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -34,6 +34,7 @@ extern "C" {
 void LLVMInitializeCore(LLVMPassRegistryRef R);
 void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
+void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R);
 void LLVMInitializeVectorization(LLVMPassRegistryRef R);
 void LLVMInitializeInstCombine(LLVMPassRegistryRef R);
 void LLVMInitializeIPO(LLVMPassRegistryRef R);
diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h
index 5338d3fc4c85..7a0fbf65bedb 100644
--- a/include/llvm-c/LinkTimeOptimizer.h
+++ b/include/llvm-c/LinkTimeOptimizer.h
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef __LTO_CAPI_H__
-#define __LTO_CAPI_H__
+#ifndef LLVM_C_LINKTIMEOPTIMIZER_H
+#define LLVM_C_LINKTIMEOPTIMIZER_H
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index 29668de46529..691abdfcb47a 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -20,6 +20,7 @@
 #define LLVM_C_TARGETMACHINE_H
 
 #include "llvm-c/Core.h"
+#include "llvm-c/Target.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index cee6e5a0ee08..82e513d4905d 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -11,8 +11,8 @@
 |*                                                                            *|
 \*===----------------------------------------------------------------------===*/
 
-#ifndef LLVM_C_PASSMANAGERBUILDER
-#define LLVM_C_PASSMANAGERBUILDER
+#ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
+#define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
 
 #include "llvm-c/Core.h"
 
@@ -77,8 +77,8 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
 /** See llvm::PassManagerBuilder::populateLTOPassManager. */
 void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
                                                   LLVMPassManagerRef PM,
-                                                  bool Internalize,
-                                                  bool RunInliner);
+                                                  LLVMBool Internalize,
+                                                  LLVMBool RunInliner);
 
 /**
  * @}
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index f43d365e3dbe..40110fddfc13 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -13,8 +13,8 @@
 |*                                                                            *|
 \*===----------------------------------------------------------------------===*/
 
-#ifndef LTO_H
-#define LTO_H  1
+#ifndef LLVM_C_LTO_H
+#define LLVM_C_LTO_H
 
 #include <stdbool.h>
 #include <stddef.h>
@@ -291,6 +291,13 @@ lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name);
 extern void
 lto_codegen_debug_options(lto_code_gen_t cg, const char *);
 
+/**
+ * Initializes LLVM disassemblers.
+ * FIXME: This doesn't really belong here.
+ */
+extern void
+lto_initialize_disassembler(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index 31c6e6adbfc6..14bcaef6d165 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -97,8 +97,8 @@
     nexttoward.
 */
 
-#ifndef LLVM_FLOAT_H
-#define LLVM_FLOAT_H
+#ifndef LLVM_ADT_APFLOAT_H
+#define LLVM_ADT_APFLOAT_H
 
 // APInt contains static functions implementing bignum arithmetic.
 #include "llvm/ADT/APInt.h"
@@ -184,9 +184,9 @@ namespace llvm {
     APFloat(const fltSemantics &, integerPart);
     APFloat(const fltSemantics &, fltCategory, bool negative);
     APFloat(const fltSemantics &, uninitializedTag);
+    APFloat(const fltSemantics &, const APInt &);
     explicit APFloat(double d);
     explicit APFloat(float f);
-    explicit APFloat(const APInt &, bool isIEEE = false);
     APFloat(const APFloat &);
     ~APFloat();
 
@@ -300,7 +300,7 @@ namespace llvm {
     /* The definition of equality is not straightforward for floating point,
        so we won't use operator==.  Use one of the following, or write
        whatever it is you really mean. */
-    // bool operator==(const APFloat &) const;     // DO NOT IMPLEMENT
+    bool operator==(const APFloat &) const LLVM_DELETED_FUNCTION;
 
     /* IEEE comparison with another floating point number (NaNs
        compare unordered, 0==-0). */
@@ -327,6 +327,7 @@ namespace llvm {
     bool isNegative() const { return sign; }
     bool isPosZero() const { return isZero() && !isNegative(); }
     bool isNegZero() const { return isZero() && isNegative(); }
+    bool isDenormal() const;
 
     APFloat& operator=(const APFloat &);
 
@@ -422,7 +423,7 @@ namespace llvm {
     APInt convertQuadrupleAPFloatToAPInt() const;
     APInt convertF80LongDoubleAPFloatToAPInt() const;
     APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
-    void initFromAPInt(const APInt& api, bool isIEEE = false);
+    void initFromAPInt(const fltSemantics *Sem, const APInt& api);
     void initFromHalfAPInt(const APInt& api);
     void initFromFloatAPInt(const APInt& api);
     void initFromDoubleAPInt(const APInt& api);
@@ -462,4 +463,4 @@ namespace llvm {
   hash_code hash_value(const APFloat &Arg);
 } /* namespace llvm */
 
-#endif /* LLVM_FLOAT_H */
+#endif /* LLVM_ADT_APFLOAT_H */
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index c7c8016b8339..3d8b72d9aaf4 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_APINT_H
-#define LLVM_APINT_H
+#ifndef LLVM_ADT_APINT_H
+#define LLVM_ADT_APINT_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Compiler.h"
@@ -274,7 +274,7 @@ public:
       initSlowCase(that);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   /// @brief Move Constructor.
   APInt(APInt&& that) : BitWidth(that.BitWidth), VAL(that.VAL) {
     that.BitWidth = 0;
@@ -427,7 +427,7 @@ public:
   /// @returns the all-ones value for an APInt of the specified bit-width.
   /// @brief Get the all-ones value.
   static APInt getAllOnesValue(unsigned numBits) {
-    return APInt(numBits, -1ULL, true);
+    return APInt(numBits, UINT64_MAX, true);
   }
 
   /// @returns the '0' value for an APInt of the specified bit-width.
@@ -498,13 +498,24 @@ public:
     if (loBitsSet == 0)
       return APInt(numBits, 0);
     if (loBitsSet == APINT_BITS_PER_WORD)
-      return APInt(numBits, -1ULL);
+      return APInt(numBits, UINT64_MAX);
     // For small values, return quickly.
     if (loBitsSet <= APINT_BITS_PER_WORD)
-      return APInt(numBits, -1ULL >> (APINT_BITS_PER_WORD - loBitsSet));
+      return APInt(numBits, UINT64_MAX >> (APINT_BITS_PER_WORD - loBitsSet));
     return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
   }
 
+  /// \brief Return a value containing V broadcasted over NewLen bits.
+  static APInt getSplat(unsigned NewLen, const APInt &V) {
+    assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
+
+    APInt Val = V.zextOrSelf(NewLen);
+    for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
+      Val |= Val << I;
+
+    return Val;
+  }
+
   /// \brief Determine if two APInts have the same value, after zero-extending
   /// one of them (if needed!) to ensure that the bit-widths match.
   static bool isSameValue(const APInt &I1, const APInt &I2) {
@@ -601,7 +612,7 @@ public:
     return AssignSlowCase(RHS);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   /// @brief Move assignment operator.
   APInt& operator=(APInt&& that) {
     if (!isSingleWord())
@@ -799,16 +810,7 @@ public:
 
   /// Signed divide this APInt by APInt RHS.
   /// @brief Signed division function for APInt.
-  APInt sdiv(const APInt &RHS) const {
-    if (isNegative())
-      if (RHS.isNegative())
-        return (-(*this)).udiv(-RHS);
-      else
-        return -((-(*this)).udiv(RHS));
-    else if (RHS.isNegative())
-      return -(this->udiv(-RHS));
-    return this->udiv(RHS);
-  }
+  APInt sdiv(const APInt &RHS) const;
 
   /// Perform an unsigned remainder operation on this APInt with RHS being the
   /// divisor. Both this and RHS are treated as unsigned quantities for purposes
@@ -821,16 +823,7 @@ public:
 
   /// Signed remainder operation on APInt.
   /// @brief Function for signed remainder operation.
-  APInt srem(const APInt &RHS) const {
-    if (isNegative())
-      if (RHS.isNegative())
-        return -((-(*this)).urem(-RHS));
-      else
-        return -((-(*this)).urem(RHS));
-    else if (RHS.isNegative())
-      return this->urem(-RHS);
-    return this->urem(RHS);
-  }
+  APInt srem(const APInt &RHS) const;
 
   /// Sometimes it is convenient to divide two APInt values and obtain both the
   /// quotient and remainder. This function does both operations in the same
@@ -842,24 +835,9 @@ public:
                       APInt &Quotient, APInt &Remainder);
 
   static void sdivrem(const APInt &LHS, const APInt &RHS,
-                      APInt &Quotient, APInt &Remainder) {
-    if (LHS.isNegative()) {
-      if (RHS.isNegative())
-        APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
-      else {
-        APInt::udivrem(-LHS, RHS, Quotient, Remainder);
-        Quotient = -Quotient;
-      }
-      Remainder = -Remainder;
-    } else if (RHS.isNegative()) {
-      APInt::udivrem(LHS, -RHS, Quotient, Remainder);
-      Quotient = -Quotient;
-    } else {
-      APInt::udivrem(LHS, RHS, Quotient, Remainder);
-    }
-  }
-  
-  
+                      APInt &Quotient, APInt &Remainder);
+
+
   // Operations that return overflow indicators.
   APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
   APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
@@ -1113,11 +1091,11 @@ public:
   /// @brief Set every bit to 1.
   void setAllBits() {
     if (isSingleWord())
-      VAL = -1ULL;
+      VAL = UINT64_MAX;
     else {
       // Set all the bits in all the words.
       for (unsigned i = 0; i < getNumWords(); ++i)
-        pVal[i] = -1ULL;
+        pVal[i] = UINT64_MAX;
     }
     // Clear the unused ones
     clearUnusedBits();
@@ -1142,10 +1120,10 @@ public:
   /// @brief Toggle every bit to its opposite value.
   void flipAllBits() {
     if (isSingleWord())
-      VAL ^= -1ULL;
+      VAL ^= UINT64_MAX;
     else {
       for (unsigned i = 0; i < getNumWords(); ++i)
-        pVal[i] ^= -1ULL;
+        pVal[i] ^= UINT64_MAX;
     }
     clearUnusedBits();
   }
@@ -1191,7 +1169,8 @@ public:
   /// APInt. This is used in conjunction with getActiveData to extract the raw
   /// value of the APInt.
   unsigned getActiveWords() const {
-    return whichWord(getActiveBits()-1) + 1;
+    unsigned numActiveBits = getActiveBits();
+    return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
   }
 
   /// Computes the minimum bit width for this APInt while considering it to be
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 048c65ce2c77..11be4c513e2c 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_APSINT_H
-#define LLVM_APSINT_H
+#ifndef LLVM_ADT_APSINT_H
+#define LLVM_ADT_APSINT_H
 
 #include "llvm/ADT/APInt.h"
 
@@ -23,7 +23,7 @@ class APSInt : public APInt {
   bool IsUnsigned;
 public:
   /// Default constructor that creates an uninitialized APInt.
-  explicit APSInt() {}
+  explicit APSInt() : IsUnsigned(false) {}
 
   /// APSInt ctor - Create an APSInt with the specified width, default to
   /// unsigned.
@@ -161,11 +161,11 @@ public:
   }
 
   APSInt& operator++() {
-    static_cast<APInt&>(*this)++;
+    ++(static_cast<APInt&>(*this));
     return *this;
   }
   APSInt& operator--() {
-    static_cast<APInt&>(*this)--;
+    --(static_cast<APInt&>(*this));
     return *this;
   }
   APSInt operator++(int) {
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 1e35d6279219..c555c1c2b1d5 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -33,6 +33,8 @@ namespace llvm {
     typedef const T *const_iterator;
     typedef size_t size_type;
 
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+
   private:
     /// The start of the array, in an external buffer.
     const T *Data;
@@ -84,6 +86,9 @@ namespace llvm {
     iterator begin() const { return Data; }
     iterator end() const { return Data + Length; }
 
+    reverse_iterator rbegin() const { return reverse_iterator(end()); }
+    reverse_iterator rend() const { return reverse_iterator(begin()); }
+
     /// empty - Check if the array is empty.
     bool empty() const { return Length == 0; }
 
@@ -171,41 +176,41 @@ namespace llvm {
 
     /// Construct an empty ArrayRef.
     /*implicit*/ MutableArrayRef() : ArrayRef<T>() {}
-    
+
     /// Construct an MutableArrayRef from a single element.
     /*implicit*/ MutableArrayRef(T &OneElt) : ArrayRef<T>(OneElt) {}
-    
+
     /// Construct an MutableArrayRef from a pointer and length.
     /*implicit*/ MutableArrayRef(T *data, size_t length)
       : ArrayRef<T>(data, length) {}
-    
+
     /// Construct an MutableArrayRef from a range.
     MutableArrayRef(T *begin, T *end) : ArrayRef<T>(begin, end) {}
-    
+
     /// Construct an MutableArrayRef from a SmallVector.
     /*implicit*/ MutableArrayRef(SmallVectorImpl<T> &Vec)
     : ArrayRef<T>(Vec) {}
-    
+
     /// Construct a MutableArrayRef from a std::vector.
     /*implicit*/ MutableArrayRef(std::vector<T> &Vec)
     : ArrayRef<T>(Vec) {}
-    
+
     /// Construct an MutableArrayRef from a C array.
     template <size_t N>
     /*implicit*/ MutableArrayRef(T (&Arr)[N])
       : ArrayRef<T>(Arr) {}
-    
+
     T *data() const { return const_cast<T*>(ArrayRef<T>::data()); }
 
     iterator begin() const { return data(); }
     iterator end() const { return data() + this->size(); }
-    
+
     /// front - Get the first element.
     T &front() const {
       assert(!this->empty());
       return data()[0];
     }
-    
+
     /// back - Get the last element.
     T &back() const {
       assert(!this->empty());
@@ -217,14 +222,14 @@ namespace llvm {
       assert(N <= this->size() && "Invalid specifier");
       return MutableArrayRef<T>(data()+N, this->size()-N);
     }
-    
+
     /// slice(n, m) - Chop off the first N elements of the array, and keep M
     /// elements in the array.
     MutableArrayRef<T> slice(unsigned N, unsigned M) const {
       assert(N+M <= this->size() && "Invalid specifier");
       return MutableArrayRef<T>(data()+N, M);
     }
-    
+
     /// @}
     /// @name Operator Overloads
     /// @{
@@ -301,5 +306,5 @@ namespace llvm {
     static const bool value = true;
   };
 }
-  
+
 #endif
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 9d6388f7ee61..82cfdf437d4e 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -98,7 +98,7 @@ public:
     std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord));
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   BitVector(BitVector &&RHS)
     : Bits(RHS.Bits), Size(RHS.Size), Capacity(RHS.Capacity) {
     RHS.Bits = 0;
@@ -452,7 +452,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   const BitVector &operator=(BitVector &&RHS) {
     if (this == &RHS) return *this;
 
diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h
index 2dfed075dea5..3dd862c8b220 100644
--- a/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_ADT_DAGDELTAALGORITHM_H
 #define LLVM_ADT_DAGDELTAALGORITHM_H
 
-#include <vector>
 #include <set>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h
index 7bf7960c63a9..4d07e044781f 100644
--- a/include/llvm/ADT/DeltaAlgorithm.h
+++ b/include/llvm/ADT/DeltaAlgorithm.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_ADT_DELTAALGORITHM_H
 #define LLVM_ADT_DELTAALGORITHM_H
 
-#include <vector>
 #include <set>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index ac4bdbd126c5..d41061996436 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -14,20 +14,20 @@
 #ifndef LLVM_ADT_DENSEMAP_H
 #define LLVM_ADT_DENSEMAP_H
 
-#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/ADT/DenseMapInfo.h"
 #include <algorithm>
-#include <iterator>
-#include <new>
-#include <utility>
 #include <cassert>
 #include <climits>
 #include <cstddef>
 #include <cstring>
+#include <iterator>
+#include <new>
+#include <utility>
 
 namespace llvm {
 
@@ -75,7 +75,7 @@ public:
 
   void clear() {
     if (getNumEntries() == 0 && getNumTombstones() == 0) return;
-    
+
     // If the capacity of the array is huge, and the # elements used is small,
     // shrink the array.
     if (getNumEntries() * 4 < getNumBuckets() && getNumBuckets() > 64) {
@@ -159,6 +159,24 @@ public:
     return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true);
   }
 
+#if LLVM_HAS_RVALUE_REFERENCES
+  // Inserts key,value pair into the map if the key isn't already in the map.
+  // If the key is already in the map, it returns false and doesn't update the
+  // value.
+  std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) {
+    BucketT *TheBucket;
+    if (LookupBucketFor(KV.first, TheBucket))
+      return std::make_pair(iterator(TheBucket, getBucketsEnd(), true),
+                            false); // Already in map.
+    
+    // Otherwise, insert the new element.
+    TheBucket = InsertIntoBucket(std::move(KV.first),
+                                 std::move(KV.second),
+                                 TheBucket);
+    return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true);
+  }
+#endif
+  
   /// insert - Range insertion of pairs.
   template<typename InputIt>
   void insert(InputIt I, InputIt E) {
@@ -198,7 +216,7 @@ public:
     return FindAndConstruct(Key).second;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   value_type& FindAndConstruct(KeyT &&Key) {
     BucketT *TheBucket;
     if (LookupBucketFor(Key, TheBucket))
@@ -383,7 +401,7 @@ private:
     return TheBucket;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   BucketT *InsertIntoBucket(const KeyT &Key, ValueT &&Value,
                             BucketT *TheBucket) {
     TheBucket = InsertIntoBucketImpl(Key, TheBucket);
@@ -430,7 +448,8 @@ private:
     incrementNumEntries();
 
     // If we are writing over a tombstone, remember this.
-    if (!KeyInfoT::isEqual(TheBucket->first, getEmptyKey()))
+    const KeyT EmptyKey = getEmptyKey();
+    if (!KeyInfoT::isEqual(TheBucket->first, EmptyKey))
       decrementNumTombstones();
 
     return TheBucket;
@@ -474,7 +493,6 @@ private:
       if (KeyInfoT::isEqual(ThisBucket->first, EmptyKey)) {
         // If we've already seen a tombstone while probing, fill it in instead
         // of the empty bucket we eventually probed to.
-        if (FoundTombstone) ThisBucket = FoundTombstone;
         FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
         return false;
       }
@@ -531,13 +549,13 @@ public:
     init(NumInitBuckets);
   }
 
-  DenseMap(const DenseMap &other) {
+  DenseMap(const DenseMap &other) : BaseT() {
     init(0);
     copyFrom(other);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
-  DenseMap(DenseMap &&other) {
+#if LLVM_HAS_RVALUE_REFERENCES
+  DenseMap(DenseMap &&other) : BaseT() {
     init(0);
     swap(other);
   }
@@ -566,7 +584,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   DenseMap& operator=(DenseMap &&other) {
     this->destroyAll();
     operator delete(Buckets);
@@ -700,7 +718,7 @@ public:
     copyFrom(other);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallDenseMap(SmallDenseMap &&other) {
     init(0);
     swap(other);
@@ -795,7 +813,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallDenseMap& operator=(SmallDenseMap &&other) {
     this->destroyAll();
     deallocateBuckets();
@@ -1027,7 +1045,7 @@ private:
       ++Ptr;
   }
 };
-  
+
 template<typename KeyT, typename ValueT, typename KeyInfoT>
 static inline size_t
 capacity_in_bytes(const DenseMap<KeyT, ValueT, KeyInfoT> &X) {
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index 8ab9a33200c3..d699ad51ada4 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -32,8 +32,10 @@ public:
 
   bool empty() const { return TheMap.empty(); }
   unsigned size() const { return TheMap.size(); }
+  size_t getMemorySize() const { return TheMap.getMemorySize(); }
 
-  /// Grow the denseset so that it has at least Size buckets. Does not shrink
+  /// Grow the DenseSet so that it has at least Size buckets. Will not shrink
+  /// the Size of the set.
   void resize(size_t Size) { TheMap.resize(Size); }
 
   void clear() {
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 519b18052b6d..644544253ab7 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -34,8 +34,8 @@
 #define LLVM_ADT_DEPTHFIRSTITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <set>
 #include <vector>
 
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 375d84abebdd..91794dea6981 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_ADT_FOLDINGSET_H
 #define LLVM_ADT_FOLDINGSET_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class APFloat;
diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h
index fa7ccb975e52..6793c6b9c205 100644
--- a/include/llvm/ADT/ImmutableIntervalMap.h
+++ b/include/llvm/ADT/ImmutableIntervalMap.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H
-#define LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H
+#ifndef LLVM_ADT_IMMUTABLEINTERVALMAP_H
+#define LLVM_ADT_IMMUTABLEINTERVALMAP_H
 
 #include "llvm/ADT/ImmutableMap.h"
 
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 20bdd903f7a5..7f0c239423bd 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_IMLIST_H
-#define LLVM_ADT_IMLIST_H
+#ifndef LLVM_ADT_IMMUTABLELIST_H
+#define LLVM_ADT_IMMUTABLELIST_H
 
-#include "llvm/Support/Allocator.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index 4883c5ba0a6b..a667479a4d17 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_IMMAP_H
-#define LLVM_ADT_IMMAP_H
+#ifndef LLVM_ADT_IMMUTABLEMAP_H
+#define LLVM_ADT_IMMUTABLEMAP_H
 
 #include "llvm/ADT/ImmutableSet.h"
 
@@ -211,17 +211,22 @@ public:
     friend class ImmutableMap;
 
   public:
-    value_type_ref operator*() const { return itr->getValue(); }
-    value_type*    operator->() const { return &itr->getValue(); }
+    typedef typename ImmutableMap<KeyT,ValT,ValInfo>::value_type value_type;
+    typedef typename ImmutableMap<KeyT,ValT,ValInfo>::value_type_ref reference;
+    typedef typename iterator::value_type *pointer;
+    typedef std::bidirectional_iterator_tag iterator_category;
+
+    typename iterator::reference operator*() const { return itr->getValue(); }
+    typename iterator::pointer   operator->() const { return &itr->getValue(); }
 
     key_type_ref getKey() const { return itr->getValue().first; }
     data_type_ref getData() const { return itr->getValue().second; }
 
-
     iterator& operator++() { ++itr; return *this; }
     iterator  operator++(int) { iterator tmp(*this); ++itr; return tmp; }
     iterator& operator--() { --itr; return *this; }
     iterator  operator--(int) { iterator tmp(*this); --itr; return tmp; }
+
     bool operator==(const iterator& RHS) const { return RHS.itr == itr; }
     bool operator!=(const iterator& RHS) const { return RHS.itr != itr; }
   };
@@ -288,6 +293,13 @@ public:
       Factory(F) {
     if (Root) { Root->retain(); }
   }
+
+  explicit ImmutableMapRef(const ImmutableMap<KeyT, ValT> &X,
+                           typename ImmutableMap<KeyT, ValT>::Factory &F)
+    : Root(X.getRootWithoutRetain()),
+      Factory(F.getTreeFactory()) {
+    if (Root) { Root->retain(); }
+  }
   
   ImmutableMapRef(const ImmutableMapRef &X)
     : Root(X.Root),
@@ -318,12 +330,20 @@ public:
     return ImmutableMapRef(0, F);
   }
 
-  ImmutableMapRef add(key_type_ref K, data_type_ref D) {
+  void manualRetain() {
+    if (Root) Root->retain();
+  }
+
+  void manualRelease() {
+    if (Root) Root->release();
+  }
+
+  ImmutableMapRef add(key_type_ref K, data_type_ref D) const {
     TreeTy *NewT = Factory->add(Root, std::pair<key_type, data_type>(K, D));
     return ImmutableMapRef(NewT, Factory);
   }
 
-  ImmutableMapRef remove(key_type_ref K) {
+  ImmutableMapRef remove(key_type_ref K) const {
     TreeTy *NewT = Factory->remove(Root, K);
     return ImmutableMapRef(NewT, Factory);
   }
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 3900f96be16a..fbdf066e61ab 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_IMSET_H
-#define LLVM_ADT_IMSET_H
+#ifndef LLVM_ADT_IMMUTABLESET_H
+#define LLVM_ADT_IMMUTABLESET_H
 
-#include "llvm/Support/Allocator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
@@ -1054,18 +1054,27 @@ public:
 
   class iterator {
     typename TreeTy::iterator itr;
+
+    iterator() {}
     iterator(TreeTy* t) : itr(t) {}
     friend class ImmutableSet<ValT,ValInfo>;
+
   public:
-    iterator() {}
-    inline value_type_ref operator*() const { return itr->getValue(); }
-    inline iterator& operator++() { ++itr; return *this; }
-    inline iterator  operator++(int) { iterator tmp(*this); ++itr; return tmp; }
-    inline iterator& operator--() { --itr; return *this; }
-    inline iterator  operator--(int) { iterator tmp(*this); --itr; return tmp; }
-    inline bool operator==(const iterator& RHS) const { return RHS.itr == itr; }
-    inline bool operator!=(const iterator& RHS) const { return RHS.itr != itr; }
-    inline value_type *operator->() const { return &(operator*()); }
+    typedef typename ImmutableSet<ValT,ValInfo>::value_type value_type;
+    typedef typename ImmutableSet<ValT,ValInfo>::value_type_ref reference;
+    typedef typename iterator::value_type *pointer;
+    typedef std::bidirectional_iterator_tag iterator_category;
+
+    typename iterator::reference operator*() const { return itr->getValue(); }
+    typename iterator::pointer   operator->() const { return &(operator*()); }
+
+    iterator& operator++() { ++itr; return *this; }
+    iterator  operator++(int) { iterator tmp(*this); ++itr; return tmp; }
+    iterator& operator--() { --itr; return *this; }
+    iterator  operator--(int) { iterator tmp(*this); --itr; return tmp; }
+
+    bool operator==(const iterator& RHS) const { return RHS.itr == itr; }
+    bool operator!=(const iterator& RHS) const { return RHS.itr != itr; }
   };
 
   iterator begin() const { return iterator(Root); }
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 931b67e40911..c4083eed6a99 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -99,8 +99,8 @@
 #ifndef LLVM_ADT_INTERVALMAP_H
 #define LLVM_ADT_INTERVALMAP_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include <iterator>
@@ -151,6 +151,26 @@ struct IntervalMapInfo {
 
 };
 
+template <typename T>
+struct IntervalMapHalfOpenInfo {
+
+  /// startLess - Return true if x is not in [a;b).
+  static inline bool startLess(const T &x, const T &a) {
+    return x < a;
+  }
+
+  /// stopLess - Return true if x is not in [a;b).
+  static inline bool stopLess(const T &b, const T &x) {
+    return b <= x;
+  }
+
+  /// adjacent - Return true when the intervals [x;a) and [b;y) can coalesce.
+  static inline bool adjacent(const T &a, const T &b) {
+    return a == b;
+  }
+
+};
+
 /// IntervalMapImpl - Namespace used for IntervalMap implementation details.
 /// It should be considered private to the implementation.
 namespace IntervalMapImpl {
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index a9724ee15447..b8b88619957e 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -18,8 +18,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_INTRUSIVE_REF_CNT_PTR
-#define LLVM_ADT_INTRUSIVE_REF_CNT_PTR
+#ifndef LLVM_ADT_INTRUSIVEREFCNTPTR_H
+#define LLVM_ADT_INTRUSIVEREFCNTPTR_H
 
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -123,7 +123,7 @@ namespace llvm {
       retain();
     }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     IntrusiveRefCntPtr(IntrusiveRefCntPtr&& S) : Obj(S.Obj) {
       S.Obj = 0;
     }
@@ -226,13 +226,13 @@ namespace llvm {
 
   template<class T> struct simplify_type<IntrusiveRefCntPtr<T> > {
     typedef T* SimpleType;
-    static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
+    static SimpleType getSimplifiedValue(IntrusiveRefCntPtr<T>& Val) {
       return Val.getPtr();
     }
   };
 
   template<class T> struct simplify_type<const IntrusiveRefCntPtr<T> > {
-    typedef T* SimpleType;
+    typedef /*const*/ T* SimpleType;
     static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
       return Val.getPtr();
     }
@@ -240,4 +240,4 @@ namespace llvm {
 
 } // end namespace llvm
 
-#endif // LLVM_ADT_INTRUSIVE_REF_CNT_PTR
+#endif // LLVM_ADT_INTRUSIVEREFCNTPTR_H
diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h
index 6aacca5a6f0f..f6fcb0888de3 100644
--- a/include/llvm/ADT/MapVector.h
+++ b/include/llvm/ADT/MapVector.h
@@ -19,6 +19,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include <vector>
 
 namespace llvm {
@@ -63,6 +64,11 @@ public:
     return Vector.empty();
   }
 
+  std::pair<KeyT, ValueT>       &front()       { return Vector.front(); }
+  const std::pair<KeyT, ValueT> &front() const { return Vector.front(); }
+  std::pair<KeyT, ValueT>       &back()        { return Vector.back(); }
+  const std::pair<KeyT, ValueT> &back()  const { return Vector.back(); }
+
   void clear() {
     Map.clear();
     Vector.clear();
@@ -79,10 +85,46 @@ public:
     return Vector[I].second;
   }
 
+  ValueT lookup(const KeyT &Key) const {
+    typename MapType::const_iterator Pos = Map.find(Key);
+    return Pos == Map.end()? ValueT() : Vector[Pos->second].second;
+  }
+
+  std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
+    std::pair<KeyT, unsigned> Pair = std::make_pair(KV.first, 0);
+    std::pair<typename MapType::iterator, bool> Result = Map.insert(Pair);
+    unsigned &I = Result.first->second;
+    if (Result.second) {
+      Vector.push_back(std::make_pair(KV.first, KV.second));
+      I = Vector.size() - 1;
+      return std::make_pair(llvm::prior(end()), true);
+    }
+    return std::make_pair(begin() + I, false);
+  }
+
   unsigned count(const KeyT &Key) const {
     typename MapType::const_iterator Pos = Map.find(Key);
     return Pos == Map.end()? 0 : 1;
   }
+
+  iterator find(const KeyT &Key) {
+    typename MapType::const_iterator Pos = Map.find(Key);
+    return Pos == Map.end()? Vector.end() :
+                            (Vector.begin() + Pos->second);
+  }
+
+  const_iterator find(const KeyT &Key) const {
+    typename MapType::const_iterator Pos = Map.find(Key);
+    return Pos == Map.end()? Vector.end() :
+                            (Vector.begin() + Pos->second);
+  }
+
+  /// \brief Remove the last element from the vector.
+  void pop_back() {
+    typename MapType::iterator Pos = Map.find(Vector.back().first);
+    Map.erase(Pos);
+    Vector.pop_back();
+  }
 };
 
 }
diff --git a/include/llvm/ADT/None.h b/include/llvm/ADT/None.h
new file mode 100644
index 000000000000..5793bd2faef4
--- /dev/null
+++ b/include/llvm/ADT/None.h
@@ -0,0 +1,27 @@
+//===-- None.h - Simple null value for implicit construction ------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file provides None, an enumerator for use in implicit constructors
+//  of various (usually templated) types to make such construction more
+//  terse.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_NONE_H
+#define LLVM_ADT_NONE_H
+
+namespace llvm {
+/// \brief A simple null object to allow implicit construction of Optional<T>
+/// and similar types without having to spell out the specialization's name.
+enum NoneType {
+  None
+};
+}
+
+#endif
diff --git a/include/llvm/ADT/NullablePtr.h b/include/llvm/ADT/NullablePtr.h
index a9c47a138eca..8ddfd5d20abd 100644
--- a/include/llvm/ADT/NullablePtr.h
+++ b/include/llvm/ADT/NullablePtr.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_NULLABLE_PTR_H
-#define LLVM_ADT_NULLABLE_PTR_H
+#ifndef LLVM_ADT_NULLABLEPTR_H
+#define LLVM_ADT_NULLABLEPTR_H
 
 #include <cassert>
 #include <cstddef>
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index f43aeb1bc4d9..194e53fac213 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -13,13 +13,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_OPTIONAL
-#define LLVM_ADT_OPTIONAL
+#ifndef LLVM_ADT_OPTIONAL_H
+#define LLVM_ADT_OPTIONAL_H
 
+#include "llvm/ADT/None.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/AlignOf.h"
 #include <cassert>
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 #include <utility>
 #endif
 
@@ -27,54 +29,116 @@ namespace llvm {
 
 template<typename T>
 class Optional {
-  T x;
-  unsigned hasVal : 1;
+  AlignedCharArrayUnion<T> storage;
+  bool hasVal;
 public:
-  explicit Optional() : x(), hasVal(false) {}
-  Optional(const T &y) : x(y), hasVal(true) {}
+  Optional(NoneType) : hasVal(false) {}
+  explicit Optional() : hasVal(false) {}
+  Optional(const T &y) : hasVal(true) {
+    new (storage.buffer) T(y);
+  }
+  Optional(const Optional &O) : hasVal(O.hasVal) {
+    if (hasVal)
+      new (storage.buffer) T(*O);
+  }
 
-#if LLVM_USE_RVALUE_REFERENCES
-  Optional(T &&y) : x(std::forward<T>(y)), hasVal(true) {}
+#if LLVM_HAS_RVALUE_REFERENCES
+  Optional(T &&y) : hasVal(true) {
+    new (storage.buffer) T(std::forward<T>(y));
+  }
+  Optional(Optional<T> &&O) : hasVal(O) {
+    if (O) {
+      new (storage.buffer) T(std::move(*O));
+      O.reset();
+    }
+  }
+  Optional &operator=(T &&y) {
+    if (hasVal)
+      **this = std::move(y);
+    else {
+      new (storage.buffer) T(std::move(y));
+      hasVal = true;
+    }
+    return *this;
+  }
+  Optional &operator=(Optional &&O) {
+    if (!O)
+      reset();
+    else {
+      *this = std::move(*O);
+      O.reset();
+    }
+    return *this;
+  }
 #endif
 
   static inline Optional create(const T* y) {
     return y ? Optional(*y) : Optional();
   }
 
+  // FIXME: these assignments (& the equivalent const T&/const Optional& ctors)
+  // could be made more efficient by passing by value, possibly unifying them
+  // with the rvalue versions above - but this could place a different set of
+  // requirements (notably: the existence of a default ctor) when implemented
+  // in that way. Careful SFINAE to avoid such pitfalls would be required.
   Optional &operator=(const T &y) {
-    x = y;
-    hasVal = true;
+    if (hasVal)
+      **this = y;
+    else {
+      new (storage.buffer) T(y);
+      hasVal = true;
+    }
     return *this;
   }
-  
-  const T* getPointer() const { assert(hasVal); return &x; }
-  const T& getValue() const { assert(hasVal); return x; }
 
-  operator bool() const { return hasVal; }
-  bool hasValue() const { return hasVal; }
-  const T* operator->() const { return getPointer(); }
-  const T& operator*() const { assert(hasVal); return x; }
-};
+  Optional &operator=(const Optional &O) {
+    if (!O)
+      reset();
+    else
+      *this = *O;
+    return *this;
+  }
 
-template<typename T> struct simplify_type;
+  void reset() {
+    if (hasVal) {
+      (**this).~T();
+      hasVal = false;
+    }
+  }
 
-template <typename T>
-struct simplify_type<const Optional<T> > {
-  typedef const T* SimpleType;
-  static SimpleType getSimplifiedValue(const Optional<T> &Val) {
-    return Val.getPointer();
+  ~Optional() {
+    reset();
   }
+
+  const T* getPointer() const { assert(hasVal); return reinterpret_cast<const T*>(storage.buffer); }
+  T* getPointer() { assert(hasVal); return reinterpret_cast<T*>(storage.buffer); }
+  const T& getValue() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); }
+  T& getValue() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); }
+
+  LLVM_EXPLICIT operator bool() const { return hasVal; }
+  bool hasValue() const { return hasVal; }
+  const T* operator->() const { return getPointer(); }
+  T* operator->() { return getPointer(); }
+  const T& operator*() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); }
+  T& operator*() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); }
+
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+  T&& getValue() && { assert(hasVal); return std::move(*getPointer()); }
+  T&& operator*() && { assert(hasVal); return std::move(*getPointer()); }
+#endif
 };
 
-template <typename T>
-struct simplify_type<Optional<T> >
-  : public simplify_type<const Optional<T> > {};
+template <typename T> struct isPodLike;
+template <typename T> struct isPodLike<Optional<T> > {
+  // An Optional<T> is pod-like if T is.
+  static const bool value = isPodLike<T>::value;
+};
 
 /// \brief Poison comparison between two \c Optional objects. Clients needs to
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
 ///
-/// This routine will never be defined. It returns \c void to help diagnose 
+/// This routine will never be defined. It returns \c void to help diagnose
 /// errors at compile time.
 template<typename T, typename U>
 void operator==(const Optional<T> &X, const Optional<U> &Y);
@@ -83,7 +147,7 @@ void operator==(const Optional<T> &X, const Optional<U> &Y);
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
 ///
-/// This routine will never be defined. It returns \c void to help diagnose 
+/// This routine will never be defined. It returns \c void to help diagnose
 /// errors at compile time.
 template<typename T, typename U>
 void operator!=(const Optional<T> &X, const Optional<U> &Y);
@@ -92,7 +156,7 @@ void operator!=(const Optional<T> &X, const Optional<U> &Y);
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
 ///
-/// This routine will never be defined. It returns \c void to help diagnose 
+/// This routine will never be defined. It returns \c void to help diagnose
 /// errors at compile time.
 template<typename T, typename U>
 void operator<(const Optional<T> &X, const Optional<U> &Y);
@@ -101,7 +165,7 @@ void operator<(const Optional<T> &X, const Optional<U> &Y);
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
 ///
-/// This routine will never be defined. It returns \c void to help diagnose 
+/// This routine will never be defined. It returns \c void to help diagnose
 /// errors at compile time.
 template<typename T, typename U>
 void operator<=(const Optional<T> &X, const Optional<U> &Y);
@@ -110,7 +174,7 @@ void operator<=(const Optional<T> &X, const Optional<U> &Y);
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
 ///
-/// This routine will never be defined. It returns \c void to help diagnose 
+/// This routine will never be defined. It returns \c void to help diagnose
 /// errors at compile time.
 template<typename T, typename U>
 void operator>=(const Optional<T> &X, const Optional<U> &Y);
@@ -119,7 +183,7 @@ void operator>=(const Optional<T> &X, const Optional<U> &Y);
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
 ///
-/// This routine will never be defined. It returns \c void to help diagnose 
+/// This routine will never be defined. It returns \c void to help diagnose
 /// errors at compile time.
 template<typename T, typename U>
 void operator>(const Optional<T> &X, const Optional<U> &Y);
diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h
index 05bcd40d0862..86f9feee2cb4 100644
--- a/include/llvm/ADT/OwningPtr.h
+++ b/include/llvm/ADT/OwningPtr.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_OWNING_PTR_H
-#define LLVM_ADT_OWNING_PTR_H
+#ifndef LLVM_ADT_OWNINGPTR_H
+#define LLVM_ADT_OWNINGPTR_H
 
 #include "llvm/Support/Compiler.h"
 #include <cassert>
@@ -32,7 +32,7 @@ class OwningPtr {
 public:
   explicit OwningPtr(T *P = 0) : Ptr(P) {}
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {}
 
   OwningPtr &operator=(OwningPtr &&Other) {
@@ -95,7 +95,7 @@ class OwningArrayPtr {
 public:
   explicit OwningArrayPtr(T *P = 0) : Ptr(P) {}
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {}
 
   OwningArrayPtr &operator=(OwningArrayPtr &&Other) {
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 71c379bad5a4..cce2efb6ac99 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -57,11 +57,13 @@ class PointerIntPair {
   };
 public:
   PointerIntPair() : Value(0) {}
-  PointerIntPair(PointerTy Ptr, IntType Int) : Value(0) {
+  PointerIntPair(PointerTy Ptr, IntType Int) {
     assert(IntBits <= PtrTraits::NumLowBitsAvailable &&
            "PointerIntPair formed with integer size too large for pointer");
-    setPointer(Ptr);
-    setInt(Int);
+    setPointerAndInt(Ptr, Int);
+  }
+  explicit PointerIntPair(PointerTy Ptr) {
+    initWithPointer(Ptr);
   }
 
   PointerTy getPointer() const {
@@ -91,6 +93,25 @@ public:
     Value |= IntVal << IntShift;  // Set new integer.
   }
 
+  void initWithPointer(PointerTy Ptr) {
+    intptr_t PtrVal
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+           "Pointer is not sufficiently aligned");
+    Value = PtrVal;
+  }
+
+  void setPointerAndInt(PointerTy Ptr, IntType Int) {
+    intptr_t PtrVal
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+           "Pointer is not sufficiently aligned");
+    intptr_t IntVal = Int;
+    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+
+    Value = PtrVal | (IntVal << IntShift);
+  }
+
   PointerTy const *getAddrOfPointer() const {
     return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
   }
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index a9e86d22002d..f42515ac77a7 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -95,15 +95,11 @@ namespace llvm {
   public:
     PointerUnion() {}
     
-    PointerUnion(PT1 V) {
-      Val.setPointer(
-         const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V)));
-      Val.setInt(0);
+    PointerUnion(PT1 V) : Val(
+      const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {
     }
-    PointerUnion(PT2 V) {
-      Val.setPointer(
-         const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)));
-      Val.setInt(1);
+    PointerUnion(PT2 V) : Val(
+      const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)), 1) {
     }
     
     /// isNull - Return true if the pointer held in the union is null,
@@ -160,15 +156,14 @@ namespace llvm {
     /// Assignment operators - Allow assigning into this union from either
     /// pointer type, setting the discriminator to remember what it came from.
     const PointerUnion &operator=(const PT1 &RHS) {
-      Val.setPointer(
+      Val.initWithPointer(
          const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
-      Val.setInt(0);
       return *this;
     }
     const PointerUnion &operator=(const PT2 &RHS) {
-      Val.setPointer(
-        const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)));
-      Val.setInt(1);
+      Val.setPointerAndInt(
+        const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)),
+        1);
       return *this;
     }
     
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index 7f6350e4443e..59fa3f39c91e 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -260,7 +260,7 @@ class ReversePostOrderTraversal {
   typedef typename GT::NodeType NodeType;
   std::vector<NodeType*> Blocks;       // Block list in normal PO order
   inline void Initialize(NodeType *BB) {
-    copy(po_begin(BB), po_end(BB), back_inserter(Blocks));
+    std::copy(po_begin(BB), po_end(BB), std::back_inserter(Blocks));
   }
 public:
   typedef typename std::vector<NodeType*>::reverse_iterator rpo_iterator;
diff --git a/include/llvm/ADT/PriorityQueue.h b/include/llvm/ADT/PriorityQueue.h
index bf8a68708163..827d0b346e59 100644
--- a/include/llvm/ADT/PriorityQueue.h
+++ b/include/llvm/ADT/PriorityQueue.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_PRIORITY_QUEUE_H
-#define LLVM_ADT_PRIORITY_QUEUE_H
+#ifndef LLVM_ADT_PRIORITYQUEUE_H
+#define LLVM_ADT_PRIORITYQUEUE_H
 
 #include <algorithm>
 #include <queue>
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index 48436c667474..8ce4fd53bacd 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -21,8 +21,8 @@
 #ifndef LLVM_ADT_SCCITERATOR_H
 #define LLVM_ADT_SCCITERATOR_H
 
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index aee500d4fb6c..dacda3652129 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -246,10 +246,10 @@ inline int array_pod_sort_comparator(const void *P1, const void *P2) {
   return 0;
 }
 
-/// get_array_pad_sort_comparator - This is an internal helper function used to
+/// get_array_pod_sort_comparator - This is an internal helper function used to
 /// get type deduction of T right.
 template<typename T>
-inline int (*get_array_pad_sort_comparator(const T &))
+inline int (*get_array_pod_sort_comparator(const T &))
              (const void*, const void*) {
   return array_pod_sort_comparator<T>;
 }
@@ -274,7 +274,7 @@ inline void array_pod_sort(IteratorTy Start, IteratorTy End) {
   // Don't dereference start iterator of empty sequence.
   if (Start == End) return;
   qsort(&*Start, End-Start, sizeof(*Start),
-        get_array_pad_sort_comparator(*Start));
+        get_array_pod_sort_comparator(*Start));
 }
 
 template<class IteratorTy>
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index a9cd54e13b38..652492a1538c 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -153,7 +153,7 @@ public:
       switchToLarge(new BitVector(*RHS.getPointer()));
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallBitVector(SmallBitVector &&RHS) : X(RHS.X) {
     RHS.X = 1;
   }
@@ -178,9 +178,9 @@ public:
   unsigned count() const {
     if (isSmall()) {
       uintptr_t Bits = getSmallBits();
-      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+      if (NumBaseBits == 32)
         return CountPopulation_32(Bits);
-      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+      if (NumBaseBits == 64)
         return CountPopulation_64(Bits);
       llvm_unreachable("Unsupported!");
     }
@@ -215,9 +215,9 @@ public:
       uintptr_t Bits = getSmallBits();
       if (Bits == 0)
         return -1;
-      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+      if (NumBaseBits == 32)
         return CountTrailingZeros_32(Bits);
-      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+      if (NumBaseBits == 64)
         return CountTrailingZeros_64(Bits);
       llvm_unreachable("Unsupported!");
     }
@@ -233,9 +233,9 @@ public:
       Bits &= ~uintptr_t(0) << (Prev + 1);
       if (Bits == 0 || Prev + 1 >= getSmallSize())
         return -1;
-      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+      if (NumBaseBits == 32)
         return CountTrailingZeros_32(Bits);
-      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+      if (NumBaseBits == 64)
         return CountTrailingZeros_64(Bits);
       llvm_unreachable("Unsupported!");
     }
@@ -472,7 +472,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   const SmallBitVector &operator=(SmallBitVector &&RHS) {
     if (this != &RHS) {
       clear();
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 3bb883088c59..8c7304197f34 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -54,8 +54,6 @@ protected:
   /// then the set is in 'small mode'.
   const void **CurArray;
   /// CurArraySize - The allocated size of CurArray, always a power of two.
-  /// Note that CurArray points to an array that has CurArraySize+1 elements in
-  /// it, so that the end iterator actually points to valid memory.
   unsigned CurArraySize;
 
   // If small, this is # elts allocated consecutively
@@ -68,9 +66,6 @@ protected:
     SmallArray(SmallStorage), CurArray(SmallStorage), CurArraySize(SmallSize) {
     assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 &&
            "Initial size must be a power of two!");
-    // The end pointer, always valid, is set to a valid element to help the
-    // iterator.
-    CurArray[SmallSize] = 0;
     clear();
   }
   ~SmallPtrSetImpl();
@@ -147,9 +142,11 @@ protected:
 class SmallPtrSetIteratorImpl {
 protected:
   const void *const *Bucket;
+  const void *const *End;
 public:
-  explicit SmallPtrSetIteratorImpl(const void *const *BP) : Bucket(BP) {
-    AdvanceIfNotValid();
+  explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
+    : Bucket(BP), End(E) {
+      AdvanceIfNotValid();
   }
 
   bool operator==(const SmallPtrSetIteratorImpl &RHS) const {
@@ -164,8 +161,10 @@ protected:
   /// that is.   This is guaranteed to stop because the end() bucket is marked
   /// valid.
   void AdvanceIfNotValid() {
-    while (*Bucket == SmallPtrSetImpl::getEmptyMarker() ||
-           *Bucket == SmallPtrSetImpl::getTombstoneMarker())
+    assert(Bucket <= End);
+    while (Bucket != End &&
+           (*Bucket == SmallPtrSetImpl::getEmptyMarker() ||
+            *Bucket == SmallPtrSetImpl::getTombstoneMarker()))
       ++Bucket;
   }
 };
@@ -182,12 +181,13 @@ public:
   typedef std::ptrdiff_t            difference_type;
   typedef std::forward_iterator_tag iterator_category;
   
-  explicit SmallPtrSetIterator(const void *const *BP)
-    : SmallPtrSetIteratorImpl(BP) {}
+  explicit SmallPtrSetIterator(const void *const *BP, const void *const *E)
+    : SmallPtrSetIteratorImpl(BP, E) {}
 
   // Most methods provided by baseclass.
 
   const PtrTy operator*() const {
+    assert(Bucket < End);
     return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket));
   }
 
@@ -236,9 +236,8 @@ template<class PtrType, unsigned SmallSize>
 class SmallPtrSet : public SmallPtrSetImpl {
   // Make sure that SmallSize is a power of two, round up if not.
   enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val };
-  /// SmallStorage - Fixed size storage used in 'small mode'.  The extra element
-  /// ensures that the end iterator actually points to valid memory.
-  const void *SmallStorage[SmallSizePowTwo+1];
+  /// SmallStorage - Fixed size storage used in 'small mode'.
+  const void *SmallStorage[SmallSizePowTwo];
   typedef PointerLikeTypeTraits<PtrType> PtrTraits;
 public:
   SmallPtrSet() : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {}
@@ -275,10 +274,10 @@ public:
   typedef SmallPtrSetIterator<PtrType> iterator;
   typedef SmallPtrSetIterator<PtrType> const_iterator;
   inline iterator begin() const {
-    return iterator(CurArray);
+    return iterator(CurArray, CurArray+CurArraySize);
   }
   inline iterator end() const {
-    return iterator(CurArray+CurArraySize);
+    return iterator(CurArray+CurArraySize, CurArray+CurArraySize);
   }
 
   // Allow assignment from any smallptrset with the same element type even if it
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index cd117f59ba76..5dfe924f6d78 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ADT_SMALLSET_H
 #define LLVM_ADT_SMALLSET_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include <set>
 
 namespace llvm {
@@ -55,6 +55,7 @@ public:
   }
 
   /// insert - Insert an element into the set if it isn't already there.
+  /// Returns true if the element is inserted (it was not in the set before).
   bool insert(const T &V) {
     if (!isSmall())
       return Set.insert(V).second;
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index 8da99d1c125c..2cfb5b9f2a9d 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -77,7 +77,7 @@ public:
   void append(in_iter S, in_iter E) {
     SmallVectorImpl<char>::append(S, E);
   }
-  
+
   void append(size_t NumInputs, char Elt) {
     SmallVectorImpl<char>::append(NumInputs, Elt);
   }
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 6e0fd94dfe67..7ba0a714bfc7 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -16,6 +16,7 @@
 
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cassert>
@@ -145,16 +146,20 @@ public:
   }
 
   reference front() {
+    assert(!empty());
     return begin()[0];
   }
   const_reference front() const {
+    assert(!empty());
     return begin()[0];
   }
 
   reference back() {
+    assert(!empty());
     return end()[-1];
   }
   const_reference back() const {
+    assert(!empty());
     return end()[-1];
   }
 };
@@ -178,7 +183,7 @@ protected:
   /// std::move, but not all stdlibs actually provide that.
   template<typename It1, typename It2>
   static It2 move(It1 I, It1 E, It2 Dest) {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     for (; I != E; ++I, ++Dest)
       *Dest = ::std::move(*I);
     return Dest;
@@ -193,7 +198,7 @@ protected:
   /// std::move_backward, but not all stdlibs actually provide that.
   template<typename It1, typename It2>
   static It2 move_backward(It1 I, It1 E, It2 Dest) {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     while (I != E)
       *--Dest = ::std::move(*--E);
     return Dest;
@@ -206,7 +211,7 @@ protected:
   /// memory starting with "Dest", constructing elements as needed.
   template<typename It1, typename It2>
   static void uninitialized_move(It1 I, It1 E, It2 Dest) {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     for (; I != E; ++I, ++Dest)
       ::new ((void*) &*Dest) T(::std::move(*I));
 #else
@@ -239,7 +244,7 @@ public:
     goto Retry;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   void push_back(T &&Elt) {
     if (this->EndX < this->CapacityX) {
     Retry:
@@ -263,7 +268,8 @@ template <typename T, bool isPodLike>
 void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
   size_t CurCapacity = this->capacity();
   size_t CurSize = this->size();
-  size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero.
+  // Always grow, even from zero.  
+  size_t NewCapacity = size_t(NextPowerOf2(CurCapacity+2));
   if (NewCapacity < MinSize)
     NewCapacity = MinSize;
   T *NewElts = static_cast<T*>(malloc(NewCapacity*sizeof(T)));
@@ -365,7 +371,7 @@ template <typename T>
 class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
   typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
 
-  SmallVectorImpl(const SmallVectorImpl&); // DISABLED.
+  SmallVectorImpl(const SmallVectorImpl&) LLVM_DELETED_FUNCTION;
 public:
   typedef typename SuperClass::iterator iterator;
   typedef typename SuperClass::size_type size_type;
@@ -422,7 +428,7 @@ public:
   }
 
   T pop_back_val() {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     T Result = ::std::move(this->back());
 #else
     T Result = this->back();
@@ -495,7 +501,7 @@ public:
     return(N);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   iterator insert(iterator I, T &&Elt) {
     if (I == this->end()) {  // Important special case for empty vector.
       this->push_back(::std::move(Elt));
@@ -667,7 +673,7 @@ public:
 
   SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
 #endif
 
@@ -787,7 +793,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::
   return *this;
 }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 template <typename T>
 SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
   // Avoid self-assignment.
@@ -898,7 +904,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
     if (!RHS.empty())
       SmallVectorImpl<T>::operator=(::std::move(RHS));
diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h
new file mode 100644
index 000000000000..7f2a6f7d0bac
--- /dev/null
+++ b/include/llvm/ADT/SparseMultiSet.h
@@ -0,0 +1,526 @@
+//===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SparseMultiSet class, which adds multiset behavior to
+// the SparseSet.
+//
+// A sparse multiset holds a small number of objects identified by integer keys
+// from a moderately sized universe. The sparse multiset uses more memory than
+// other containers in order to provide faster operations. Any key can map to
+// multiple values. A SparseMultiSetNode class is provided, which serves as a
+// convenient base class for the contents of a SparseMultiSet.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SPARSEMULTISET_H
+#define LLVM_ADT_SPARSEMULTISET_H
+
+#include "llvm/ADT/SparseSet.h"
+
+namespace llvm {
+
+/// Fast multiset implementation for objects that can be identified by small
+/// unsigned keys.
+///
+/// SparseMultiSet allocates memory proportional to the size of the key
+/// universe, so it is not recommended for building composite data structures.
+/// It is useful for algorithms that require a single set with fast operations.
+///
+/// Compared to DenseSet and DenseMap, SparseMultiSet provides constant-time
+/// fast clear() as fast as a vector.  The find(), insert(), and erase()
+/// operations are all constant time, and typically faster than a hash table.
+/// The iteration order doesn't depend on numerical key values, it only depends
+/// on the order of insert() and erase() operations.  Iteration order is the
+/// insertion order. Iteration is only provided over elements of equivalent
+/// keys, but iterators are bidirectional.
+///
+/// Compared to BitVector, SparseMultiSet<unsigned> uses 8x-40x more memory, but
+/// offers constant-time clear() and size() operations as well as fast iteration
+/// independent on the size of the universe.
+///
+/// SparseMultiSet contains a dense vector holding all the objects and a sparse
+/// array holding indexes into the dense vector.  Most of the memory is used by
+/// the sparse array which is the size of the key universe. The SparseT template
+/// parameter provides a space/speed tradeoff for sets holding many elements.
+///
+/// When SparseT is uint32_t, find() only touches up to 3 cache lines, but the
+/// sparse array uses 4 x Universe bytes.
+///
+/// When SparseT is uint8_t (the default), find() touches up to 3+[N/256] cache
+/// lines, but the sparse array is 4x smaller.  N is the number of elements in
+/// the set.
+///
+/// For sets that may grow to thousands of elements, SparseT should be set to
+/// uint16_t or uint32_t.
+///
+/// Multiset behavior is provided by providing doubly linked lists for values
+/// that are inlined in the dense vector. SparseMultiSet is a good choice when
+/// one desires a growable number of entries per key, as it will retain the
+/// SparseSet algorithmic properties despite being growable. Thus, it is often a
+/// better choice than a SparseSet of growable containers or a vector of
+/// vectors. SparseMultiSet also keeps iterators valid after erasure (provided
+/// the iterators don't point to the element erased), allowing for more
+/// intuitive and fast removal.
+///
+/// @tparam ValueT      The type of objects in the set.
+/// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
+/// @tparam SparseT     An unsigned integer type. See above.
+///
+template<typename ValueT,
+         typename KeyFunctorT = llvm::identity<unsigned>,
+         typename SparseT = uint8_t>
+class SparseMultiSet {
+  /// The actual data that's stored, as a doubly-linked list implemented via
+  /// indices into the DenseVector.  The doubly linked list is implemented
+  /// circular in Prev indices, and INVALID-terminated in Next indices. This
+  /// provides efficient access to list tails. These nodes can also be
+  /// tombstones, in which case they are actually nodes in a single-linked
+  /// freelist of recyclable slots.
+  struct SMSNode {
+    static const unsigned INVALID = ~0U;
+
+    ValueT Data;
+    unsigned Prev;
+    unsigned Next;
+
+    SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { }
+
+    /// List tails have invalid Nexts.
+    bool isTail() const {
+      return Next == INVALID;
+    }
+
+    /// Whether this node is a tombstone node, and thus is in our freelist.
+    bool isTombstone() const {
+      return Prev == INVALID;
+    }
+
+    /// Since the list is circular in Prev, all non-tombstone nodes have a valid
+    /// Prev.
+    bool isValid() const { return Prev != INVALID; }
+  };
+
+  typedef typename KeyFunctorT::argument_type KeyT;
+  typedef SmallVector<SMSNode, 8> DenseT;
+  DenseT Dense;
+  SparseT *Sparse;
+  unsigned Universe;
+  KeyFunctorT KeyIndexOf;
+  SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf;
+
+  /// We have a built-in recycler for reusing tombstone slots. This recycler
+  /// puts a singly-linked free list into tombstone slots, allowing us quick
+  /// erasure, iterator preservation, and dense size.
+  unsigned FreelistIdx;
+  unsigned NumFree;
+
+  unsigned sparseIndex(const ValueT &Val) const {
+    assert(ValIndexOf(Val) < Universe &&
+           "Invalid key in set. Did object mutate?");
+    return ValIndexOf(Val);
+  }
+  unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); }
+
+  // Disable copy construction and assignment.
+  // This data structure is not meant to be used that way.
+  SparseMultiSet(const SparseMultiSet&) LLVM_DELETED_FUNCTION;
+  SparseMultiSet &operator=(const SparseMultiSet&) LLVM_DELETED_FUNCTION;
+
+  /// Whether the given entry is the head of the list. List heads's previous
+  /// pointers are to the tail of the list, allowing for efficient access to the
+  /// list tail. D must be a valid entry node.
+  bool isHead(const SMSNode &D) const {
+    assert(D.isValid() && "Invalid node for head");
+    return Dense[D.Prev].isTail();
+  }
+
+  /// Whether the given entry is a singleton entry, i.e. the only entry with
+  /// that key.
+  bool isSingleton(const SMSNode &N) const {
+    assert(N.isValid() && "Invalid node for singleton");
+    // Is N its own predecessor?
+    return &Dense[N.Prev] == &N;
+  }
+
+  /// Add in the given SMSNode. Uses a free entry in our freelist if
+  /// available. Returns the index of the added node.
+  unsigned addValue(const ValueT& V, unsigned Prev, unsigned Next) {
+    if (NumFree == 0) {
+      Dense.push_back(SMSNode(V, Prev, Next));
+      return Dense.size() - 1;
+    }
+
+    // Peel off a free slot
+    unsigned Idx = FreelistIdx;
+    unsigned NextFree = Dense[Idx].Next;
+    assert(Dense[Idx].isTombstone() && "Non-tombstone free?");
+
+    Dense[Idx] = SMSNode(V, Prev, Next);
+    FreelistIdx = NextFree;
+    --NumFree;
+    return Idx;
+  }
+
+  /// Make the current index a new tombstone. Pushes it onto the freelist.
+  void makeTombstone(unsigned Idx) {
+    Dense[Idx].Prev = SMSNode::INVALID;
+    Dense[Idx].Next = FreelistIdx;
+    FreelistIdx = Idx;
+    ++NumFree;
+  }
+
+public:
+  typedef ValueT value_type;
+  typedef ValueT &reference;
+  typedef const ValueT &const_reference;
+  typedef ValueT *pointer;
+  typedef const ValueT *const_pointer;
+
+  SparseMultiSet()
+    : Sparse(0), Universe(0), FreelistIdx(SMSNode::INVALID), NumFree(0) { }
+
+  ~SparseMultiSet() { free(Sparse); }
+
+  /// Set the universe size which determines the largest key the set can hold.
+  /// The universe must be sized before any elements can be added.
+  ///
+  /// @param U Universe size. All object keys must be less than U.
+  ///
+  void setUniverse(unsigned U) {
+    // It's not hard to resize the universe on a non-empty set, but it doesn't
+    // seem like a likely use case, so we can add that code when we need it.
+    assert(empty() && "Can only resize universe on an empty map");
+    // Hysteresis prevents needless reallocations.
+    if (U >= Universe/4 && U <= Universe)
+      return;
+    free(Sparse);
+    // The Sparse array doesn't actually need to be initialized, so malloc
+    // would be enough here, but that will cause tools like valgrind to
+    // complain about branching on uninitialized data.
+    Sparse = reinterpret_cast<SparseT*>(calloc(U, sizeof(SparseT)));
+    Universe = U;
+  }
+
+  /// Our iterators are iterators over the collection of objects that share a
+  /// key.
+  template<typename SMSPtrTy>
+  class iterator_base : public std::iterator<std::bidirectional_iterator_tag,
+                                             ValueT> {
+    friend class SparseMultiSet;
+    SMSPtrTy SMS;
+    unsigned Idx;
+    unsigned SparseIdx;
+
+    iterator_base(SMSPtrTy P, unsigned I, unsigned SI)
+      : SMS(P), Idx(I), SparseIdx(SI) { }
+
+    /// Whether our iterator has fallen outside our dense vector.
+    bool isEnd() const {
+      if (Idx == SMSNode::INVALID)
+        return true;
+
+      assert(Idx < SMS->Dense.size() && "Out of range, non-INVALID Idx?");
+      return false;
+    }
+
+    /// Whether our iterator is properly keyed, i.e. the SparseIdx is valid
+    bool isKeyed() const { return SparseIdx < SMS->Universe; }
+
+    unsigned Prev() const { return SMS->Dense[Idx].Prev; }
+    unsigned Next() const { return SMS->Dense[Idx].Next; }
+
+    void setPrev(unsigned P) { SMS->Dense[Idx].Prev = P; }
+    void setNext(unsigned N) { SMS->Dense[Idx].Next = N; }
+
+  public:
+    typedef std::iterator<std::bidirectional_iterator_tag, ValueT> super;
+    typedef typename super::value_type value_type;
+    typedef typename super::difference_type difference_type;
+    typedef typename super::pointer pointer;
+    typedef typename super::reference reference;
+
+    iterator_base(const iterator_base &RHS)
+      : SMS(RHS.SMS), Idx(RHS.Idx), SparseIdx(RHS.SparseIdx) { }
+
+    const iterator_base &operator=(const iterator_base &RHS) {
+      SMS = RHS.SMS;
+      Idx = RHS.Idx;
+      SparseIdx = RHS.SparseIdx;
+      return *this;
+    }
+
+    reference operator*() const {
+      assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx &&
+             "Dereferencing iterator of invalid key or index");
+
+      return SMS->Dense[Idx].Data;
+    }
+    pointer operator->() const { return &operator*(); }
+
+    /// Comparison operators
+    bool operator==(const iterator_base &RHS) const {
+      // end compares equal
+      if (SMS == RHS.SMS && Idx == RHS.Idx) {
+        assert((isEnd() || SparseIdx == RHS.SparseIdx) &&
+               "Same dense entry, but different keys?");
+        return true;
+      }
+
+      return false;
+    }
+
+    bool operator!=(const iterator_base &RHS) const {
+      return !operator==(RHS);
+    }
+
+    /// Increment and decrement operators
+    iterator_base &operator--() { // predecrement - Back up
+      assert(isKeyed() && "Decrementing an invalid iterator");
+      assert((isEnd() || !SMS->isHead(SMS->Dense[Idx])) &&
+             "Decrementing head of list");
+
+      // If we're at the end, then issue a new find()
+      if (isEnd())
+        Idx = SMS->findIndex(SparseIdx).Prev();
+      else
+        Idx = Prev();
+
+      return *this;
+    }
+    iterator_base &operator++() { // preincrement - Advance
+      assert(!isEnd() && isKeyed() && "Incrementing an invalid/end iterator");
+      Idx = Next();
+      return *this;
+    }
+    iterator_base operator--(int) { // postdecrement
+      iterator_base I(*this);
+      --*this;
+      return I;
+    }
+    iterator_base operator++(int) { // postincrement
+      iterator_base I(*this);
+      ++*this;
+      return I;
+    }
+  };
+  typedef iterator_base<SparseMultiSet *> iterator;
+  typedef iterator_base<const SparseMultiSet *> const_iterator;
+
+  // Convenience types
+  typedef std::pair<iterator, iterator> RangePair;
+
+  /// Returns an iterator past this container. Note that such an iterator cannot
+  /// be decremented, but will compare equal to other end iterators.
+  iterator end() { return iterator(this, SMSNode::INVALID, SMSNode::INVALID); }
+  const_iterator end() const {
+    return const_iterator(this, SMSNode::INVALID, SMSNode::INVALID);
+  }
+
+  /// Returns true if the set is empty.
+  ///
+  /// This is not the same as BitVector::empty().
+  ///
+  bool empty() const { return size() == 0; }
+
+  /// Returns the number of elements in the set.
+  ///
+  /// This is not the same as BitVector::size() which returns the size of the
+  /// universe.
+  ///
+  unsigned size() const {
+    assert(NumFree <= Dense.size() && "Out-of-bounds free entries");
+    return Dense.size() - NumFree;
+  }
+
+  /// Clears the set.  This is a very fast constant time operation.
+  ///
+  void clear() {
+    // Sparse does not need to be cleared, see find().
+    Dense.clear();
+    NumFree = 0;
+    FreelistIdx = SMSNode::INVALID;
+  }
+
+  /// Find an element by its index.
+  ///
+  /// @param   Idx A valid index to find.
+  /// @returns An iterator to the element identified by key, or end().
+  ///
+  iterator findIndex(unsigned Idx) {
+    assert(Idx < Universe && "Key out of range");
+    assert(std::numeric_limits<SparseT>::is_integer &&
+           !std::numeric_limits<SparseT>::is_signed &&
+           "SparseT must be an unsigned integer type");
+    const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
+    for (unsigned i = Sparse[Idx], e = Dense.size(); i < e; i += Stride) {
+      const unsigned FoundIdx = sparseIndex(Dense[i]);
+      // Check that we're pointing at the correct entry and that it is the head
+      // of a valid list.
+      if (Idx == FoundIdx && Dense[i].isValid() && isHead(Dense[i]))
+        return iterator(this, i, Idx);
+      // Stride is 0 when SparseT >= unsigned.  We don't need to loop.
+      if (!Stride)
+        break;
+    }
+    return end();
+  }
+
+  /// Find an element by its key.
+  ///
+  /// @param   Key A valid key to find.
+  /// @returns An iterator to the element identified by key, or end().
+  ///
+  iterator find(const KeyT &Key) {
+    return findIndex(KeyIndexOf(Key));
+  }
+
+  const_iterator find(const KeyT &Key) const {
+    iterator I = const_cast<SparseMultiSet*>(this)->findIndex(KeyIndexOf(Key));
+    return const_iterator(I.SMS, I.Idx, KeyIndexOf(Key));
+  }
+
+  /// Returns the number of elements identified by Key. This will be linear in
+  /// the number of elements of that key.
+  unsigned count(const KeyT &Key) const {
+    unsigned Ret = 0;
+    for (const_iterator It = find(Key); It != end(); ++It)
+      ++Ret;
+
+    return Ret;
+  }
+
+  /// Returns true if this set contains an element identified by Key.
+  bool contains(const KeyT &Key) const {
+    return find(Key) != end();
+  }
+
+  /// Return the head and tail of the subset's list, otherwise returns end().
+  iterator getHead(const KeyT &Key) { return find(Key); }
+  iterator getTail(const KeyT &Key) {
+    iterator I = find(Key);
+    if (I != end())
+      I = iterator(this, I.Prev(), KeyIndexOf(Key));
+    return I;
+  }
+
+  /// The bounds of the range of items sharing Key K. First member is the head
+  /// of the list, and the second member is a decrementable end iterator for
+  /// that key.
+  RangePair equal_range(const KeyT &K) {
+    iterator B = find(K);
+    iterator E = iterator(this, SMSNode::INVALID, B.SparseIdx);
+    return make_pair(B, E);
+  }
+
+  /// Insert a new element at the tail of the subset list. Returns an iterator
+  /// to the newly added entry.
+  iterator insert(const ValueT &Val) {
+    unsigned Idx = sparseIndex(Val);
+    iterator I = findIndex(Idx);
+
+    unsigned NodeIdx = addValue(Val, SMSNode::INVALID, SMSNode::INVALID);
+
+    if (I == end()) {
+      // Make a singleton list
+      Sparse[Idx] = NodeIdx;
+      Dense[NodeIdx].Prev = NodeIdx;
+      return iterator(this, NodeIdx, Idx);
+    }
+
+    // Stick it at the end.
+    unsigned HeadIdx = I.Idx;
+    unsigned TailIdx = I.Prev();
+    Dense[TailIdx].Next = NodeIdx;
+    Dense[HeadIdx].Prev = NodeIdx;
+    Dense[NodeIdx].Prev = TailIdx;
+
+    return iterator(this, NodeIdx, Idx);
+  }
+
+  /// Erases an existing element identified by a valid iterator.
+  ///
+  /// This invalidates iterators pointing at the same entry, but erase() returns
+  /// an iterator pointing to the next element in the subset's list. This makes
+  /// it possible to erase selected elements while iterating over the subset:
+  ///
+  ///   tie(I, E) = Set.equal_range(Key);
+  ///   while (I != E)
+  ///     if (test(*I))
+  ///       I = Set.erase(I);
+  ///     else
+  ///       ++I;
+  ///
+  /// Note that if the last element in the subset list is erased, this will
+  /// return an end iterator which can be decremented to get the new tail (if it
+  /// exists):
+  ///
+  ///  tie(B, I) = Set.equal_range(Key);
+  ///  for (bool isBegin = B == I; !isBegin; /* empty */) {
+  ///    isBegin = (--I) == B;
+  ///    if (test(I))
+  ///      break;
+  ///    I = erase(I);
+  ///  }
+  iterator erase(iterator I) {
+    assert(I.isKeyed() && !I.isEnd() && !Dense[I.Idx].isTombstone() &&
+           "erasing invalid/end/tombstone iterator");
+
+    // First, unlink the node from its list. Then swap the node out with the
+    // dense vector's last entry
+    iterator NextI = unlink(Dense[I.Idx]);
+
+    // Put in a tombstone.
+    makeTombstone(I.Idx);
+
+    return NextI;
+  }
+
+  /// Erase all elements with the given key. This invalidates all
+  /// iterators of that key.
+  void eraseAll(const KeyT &K) {
+    for (iterator I = find(K); I != end(); /* empty */)
+      I = erase(I);
+  }
+
+private:
+  /// Unlink the node from its list. Returns the next node in the list.
+  iterator unlink(const SMSNode &N) {
+    if (isSingleton(N)) {
+      // Singleton is already unlinked
+      assert(N.Next == SMSNode::INVALID && "Singleton has next?");
+      return iterator(this, SMSNode::INVALID, ValIndexOf(N.Data));
+    }
+
+    if (isHead(N)) {
+      // If we're the head, then update the sparse array and our next.
+      Sparse[sparseIndex(N)] = N.Next;
+      Dense[N.Next].Prev = N.Prev;
+      return iterator(this, N.Next, ValIndexOf(N.Data));
+    }
+
+    if (N.isTail()) {
+      // If we're the tail, then update our head and our previous.
+      findIndex(sparseIndex(N)).setPrev(N.Prev);
+      Dense[N.Prev].Next = N.Next;
+
+      // Give back an end iterator that can be decremented
+      iterator I(this, N.Prev, ValIndexOf(N.Data));
+      return ++I;
+    }
+
+    // Otherwise, just drop us
+    Dense[N.Next].Prev = N.Prev;
+    Dense[N.Prev].Next = N.Next;
+    return iterator(this, N.Next, ValIndexOf(N.Data));
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h
index 063c6755c680..267a340a7581 100644
--- a/include/llvm/ADT/SparseSet.h
+++ b/include/llvm/ADT/SparseSet.h
@@ -20,8 +20,8 @@
 #ifndef LLVM_ADT_SPARSESET_H
 #define LLVM_ADT_SPARSESET_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
 #include <limits>
 
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index b54d10b9dd33..26aac7bea627 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -51,7 +51,9 @@ public:
 
   // Allow use of this class as the value itself.
   operator unsigned() const { return Value; }
-  const Statistic &operator=(unsigned Val) {
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+   const Statistic &operator=(unsigned Val) {
     Value = Val;
     return init();
   }
@@ -106,6 +108,46 @@ public:
     return init();
   }
 
+#else  // Statistics are disabled in release builds.
+
+  const Statistic &operator=(unsigned Val) {
+    return *this;
+  }
+
+  const Statistic &operator++() {
+    return *this;
+  }
+
+  unsigned operator++(int) {
+    return 0;
+  }
+
+  const Statistic &operator--() {
+    return *this;
+  }
+
+  unsigned operator--(int) {
+    return 0;
+  }
+
+  const Statistic &operator+=(const unsigned &V) {
+    return *this;
+  }
+
+  const Statistic &operator-=(const unsigned &V) {
+    return *this;
+  }
+
+  const Statistic &operator*=(const unsigned &V) {
+    return *this;
+  }
+
+  const Statistic &operator/=(const unsigned &V) {
+    return *this;
+  }
+
+#endif  // !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+
 protected:
   Statistic &init() {
     bool tmp = Initialized;
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index bf27c4313f82..d2887c5c2c56 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ADT_STRINGEXTRAS_H
 #define LLVM_ADT_STRINGEXTRAS_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 template<typename T> class SmallVectorImpl;
@@ -27,6 +27,17 @@ static inline char hexdigit(unsigned X, bool LowerCase = false) {
   return X < 10 ? '0' + X : HexChar + X - 10;
 }
 
+/// Interpret the given character \p C as a hexadecimal digit and return its
+/// value.
+///
+/// If \p C is not a valid hex digit, -1U is returned.
+static inline unsigned hexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10U;
+  if (C >= 'A' && C <= 'F') return C-'A'+10U;
+  return -1U;
+}
+
 /// utohex_buffer - Emit the specified number into the buffer specified by
 /// BufferEnd, returning a pointer to the start of the string.  This can be used
 /// like this: (note that the buffer must be large enough to handle any number):
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index b4497a276d0e..d01437b61c2b 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -53,7 +53,7 @@ public:
 class StringMapImpl {
 protected:
   // Array of NumBuckets pointers to entries, null pointers are holes.
-  // TheTable[NumBuckets] contains a sentinel value for easy iteration. Follwed
+  // TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed
   // by an array of the actual hash values as unsigned integers.
   StringMapEntryBase **TheTable;
   unsigned NumBuckets;
@@ -171,7 +171,6 @@ public:
     return Create(KeyStart, KeyEnd, Allocator, 0);
   }
 
-
   /// Create - Create a StringMapEntry with normal malloc/free.
   template<typename InitType>
   static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
@@ -204,7 +203,6 @@ public:
     return *reinterpret_cast<StringMapEntry*>(Ptr);
   }
 
-
   /// Destroy - Destroy this StringMapEntry, releasing memory back to the
   /// specified allocator.
   template<typename AllocatorTy>
@@ -239,6 +237,10 @@ public:
   explicit StringMap(AllocatorTy A)
     : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), Allocator(A) {}
 
+  StringMap(unsigned InitialSize, AllocatorTy A)
+    : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))),
+      Allocator(A) {}
+
   StringMap(const StringMap &RHS)
     : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
     assert(RHS.empty() &&
@@ -290,7 +292,7 @@ public:
     return const_iterator(TheTable+Bucket, true);
   }
 
-   /// lookup - Return the entry for the specified key, or a default
+  /// lookup - Return the entry for the specified key, or a default
   /// constructed value if no such entry exists.
   ValueTy lookup(StringRef Key) const {
     const_iterator it = find(Key);
@@ -336,8 +338,8 @@ public:
       StringMapEntryBase *&Bucket = TheTable[I];
       if (Bucket && Bucket != getTombstoneVal()) {
         static_cast<MapEntryTy*>(Bucket)->Destroy(Allocator);
-        Bucket = 0;
       }
+      Bucket = 0;
     }
 
     NumItems = 0;
@@ -427,7 +429,7 @@ public:
     return Ptr != RHS.Ptr;
   }
 
-  inline StringMapConstIterator& operator++() {          // Preincrement
+  inline StringMapConstIterator& operator++() {   // Preincrement
     ++Ptr;
     AdvancePastEmptyBuckets();
     return *this;
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 292bde0cd900..224855e3f87c 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -11,7 +11,6 @@
 #define LLVM_ADT_STRINGREF_H
 
 #include "llvm/Support/type_traits.h"
-
 #include <algorithm>
 #include <cassert>
 #include <cstring>
@@ -58,14 +57,14 @@ namespace llvm {
     // integer works around this bug.
     static size_t min(size_t a, size_t b) { return a < b ? a : b; }
     static size_t max(size_t a, size_t b) { return a > b ? a : b; }
-    
+
     // Workaround memcmp issue with null pointers (undefined behavior)
     // by providing a specialized version
     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
       if (Length == 0) { return 0; }
       return ::memcmp(Lhs,Rhs,Length);
     }
-    
+
   public:
     /// @name Constructors
     /// @{
@@ -388,7 +387,7 @@ namespace llvm {
       Start = min(Start, Length);
       return StringRef(Data + Start, min(N, Length - Start));
     }
-    
+
     /// Return a StringRef equal to 'this' but with the first \p N elements
     /// dropped.
     StringRef drop_front(unsigned N = 1) const {
@@ -536,7 +535,7 @@ namespace llvm {
     return LHS.compare(RHS) != -1;
   }
 
-  inline std::string &operator+=(std::string &buffer, llvm::StringRef string) {
+  inline std::string &operator+=(std::string &buffer, StringRef string) {
     return buffer.append(string.data(), string.size());
   }
 
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index b69a964a23ba..7bea577f34d3 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -18,23 +18,25 @@
 
 namespace llvm {
 
-  /// StringSet - A wrapper for StringMap that provides set-like
-  /// functionality.  Only insert() and count() methods are used by my
-  /// code.
+  /// StringSet - A wrapper for StringMap that provides set-like functionality.
   template <class AllocatorTy = llvm::MallocAllocator>
   class StringSet : public llvm::StringMap<char, AllocatorTy> {
     typedef llvm::StringMap<char, AllocatorTy> base;
   public:
-    bool insert(StringRef InLang) {
-      assert(!InLang.empty());
-      const char *KeyStart = InLang.data();
-      const char *KeyEnd = KeyStart + InLang.size();
-      llvm::StringMapEntry<char> *Entry = llvm::StringMapEntry<char>::
-                            Create(KeyStart, KeyEnd, base::getAllocator(), '+');
-      if (!base::insert(Entry)) {
-        Entry->Destroy(base::getAllocator());
+
+    /// insert - Insert the specified key into the set.  If the key already
+    /// exists in the set, return false and ignore the request, otherwise insert
+    /// it and return true.
+    bool insert(StringRef Key) {
+      // Get or create the map entry for the key; if it doesn't exist the value
+      // type will be default constructed which we use to detect insert.
+      //
+      // We use '+' as the sentinel value in the map.
+      assert(!Key.empty());
+      StringMapEntry<char> &Entry = this->GetOrCreateValue(Key);
+      if (Entry.getValue() == '+')
         return false;
-      }
+      Entry.setValue('+');
       return true;
     }
   };
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index d3d33b8adde1..cc0e7b63819c 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -70,7 +70,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   TinyPtrVector(TinyPtrVector &&RHS) : Val(RHS.Val) {
     RHS.Val = (EltTy)0;
   }
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 408d70cf76f8..8fac222c13e3 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -44,7 +44,7 @@ public:
     UnknownArch,
 
     arm,     // ARM; arm, armv.*, xscale
-    cellspu, // CellSPU: spu, cellspu
+    aarch64, // AArch64: aarch64
     hexagon, // Hexagon: hexagon
     mips,    // MIPS: mips, mipsallegrex
     mipsel,  // MIPSEL: mipsel, mipsallegrexel
@@ -101,8 +101,8 @@ public:
     Haiku,
     Minix,
     RTEMS,
-    NativeClient,
-    CNK,         // BG/P Compute-Node Kernel
+    NaCl,       // Native Client
+    CNK,        // BG/P Compute-Node Kernel
     Bitrig,
     AIX
   };
@@ -112,6 +112,7 @@ public:
     GNU,
     GNUEABI,
     GNUEABIHF,
+    GNUX32,
     EABI,
     MachO,
     Android,
@@ -296,9 +297,14 @@ public:
     return getOS() == Triple::Darwin || getOS() == Triple::MacOSX;
   }
 
+  /// Is this an iOS triple.
+  bool isiOS() const {
+    return getOS() == Triple::IOS;
+  }
+
   /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS).
   bool isOSDarwin() const {
-    return isMacOSX() || getOS() == Triple::IOS;
+    return isMacOSX() || isiOS();
   }
 
   /// \brief Tests for either Cygwin or MinGW OS
@@ -311,6 +317,11 @@ public:
     return getOS() == Triple::Win32 || isOSCygMing();
   }
 
+  /// \brief Tests whether the OS is NaCl (Native Client)
+  bool isOSNaCl() const {
+    return getOS() == Triple::NaCl;
+  }
+
   /// \brief Tests whether the OS uses the ELF binary format.
   bool isOSBinFormatELF() const {
     return !isOSDarwin() && !isOSWindows();
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index d23fccf3e8cc..b4fed7a0ebd2 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -27,10 +27,9 @@
 #define LLVM_ADT_VALUEMAP_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/Mutex.h"
-
 #include <iterator>
 
 namespace llvm {
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
index a7f83a6bca9d..0497aa70887c 100644
--- a/include/llvm/ADT/VariadicFunction.h
+++ b/include/llvm/ADT/VariadicFunction.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_VARIADIC_FUNCTION_H
-#define LLVM_ADT_VARIADIC_FUNCTION_H
+#ifndef LLVM_ADT_VARIADICFUNCTION_H
+#define LLVM_ADT_VARIADICFUNCTION_H
 
 #include "llvm/ADT/ArrayRef.h"
 
@@ -328,4 +328,4 @@ struct VariadicFunction3 {
 
 } // end namespace llvm
 
-#endif  // LLVM_ADT_VARIADIC_FUNCTION_H
+#endif  // LLVM_ADT_VARIADICFUNCTION_H
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 7f5cd1718142..71dab2ef551c 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -234,17 +234,17 @@ public:
   pointer getNodePtrUnchecked() const { return NodePtr; }
 };
 
-// do not implement. this is to catch errors when people try to use
-// them as random access iterators
+// These are to catch errors when people try to use them as random access
+// iterators.
 template<typename T>
-void operator-(int, ilist_iterator<T>);
+void operator-(int, ilist_iterator<T>) LLVM_DELETED_FUNCTION;
 template<typename T>
-void operator-(ilist_iterator<T>,int);
+void operator-(ilist_iterator<T>,int) LLVM_DELETED_FUNCTION;
 
 template<typename T>
-void operator+(int, ilist_iterator<T>);
+void operator+(int, ilist_iterator<T>) LLVM_DELETED_FUNCTION;
 template<typename T>
-void operator+(ilist_iterator<T>,int);
+void operator+(ilist_iterator<T>,int) LLVM_DELETED_FUNCTION;
 
 // operator!=/operator== - Allow mixed comparisons without dereferencing
 // the iterator, which could very likely be pointing to end().
@@ -274,12 +274,12 @@ template<typename From> struct simplify_type;
 template<typename NodeTy> struct simplify_type<ilist_iterator<NodeTy> > {
   typedef NodeTy* SimpleType;
 
-  static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
+  static SimpleType getSimplifiedValue(ilist_iterator<NodeTy> &Node) {
     return &*Node;
   }
 };
 template<typename NodeTy> struct simplify_type<const ilist_iterator<NodeTy> > {
-  typedef NodeTy* SimpleType;
+  typedef /*const*/ NodeTy* SimpleType;
 
   static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
     return &*Node;
@@ -465,6 +465,17 @@ public:
     return where;
   }
 
+  /// Remove all nodes from the list like clear(), but do not call
+  /// removeNodeFromList() or deleteNode().
+  ///
+  /// This should only be used immediately before freeing nodes in bulk to
+  /// avoid traversing the list and bringing all the nodes into cache.
+  void clearAndLeakNodesUnsafely() {
+    if (Head) {
+      Head = getTail();
+      this->setPrev(Head, Head);
+    }
+  }
 
 private:
   // transfer - The heart of the splice function.  Move linked list nodes from
@@ -472,6 +483,10 @@ private:
   //
   void transfer(iterator position, iplist &L2, iterator first, iterator last) {
     assert(first != last && "Should be checked by callers");
+    // Position cannot be contained in the range to be transferred.
+    // Check for the most common mistake.
+    assert(position != first &&
+           "Insertion point can't be one of the transferred nodes");
 
     if (position != last) {
       // Note: we have to be careful about the case when we move the first node
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index f0080035cb88..03612440e7ac 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_ILIST_NODE_H
-#define LLVM_ADT_ILIST_NODE_H
+#ifndef LLVM_ADT_ILISTNODE_H
+#define LLVM_ADT_ILISTNODE_H
 
 namespace llvm {
 
diff --git a/include/llvm/AddressingMode.h b/include/llvm/AddressingMode.h
deleted file mode 100644
index 70b3c05238c5..000000000000
--- a/include/llvm/AddressingMode.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===--------- llvm/AddressingMode.h - Addressing Mode    -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//  This file contains addressing mode data structures which are shared
-//  between LSR and a number of places in the codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADDRESSING_MODE_H
-#define LLVM_ADDRESSING_MODE_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class GlobalValue;
-
-/// AddrMode - This represents an addressing mode of:
-///    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
-/// If BaseGV is null,  there is no BaseGV.
-/// If BaseOffs is zero, there is no base offset.
-/// If HasBaseReg is false, there is no base register.
-/// If Scale is zero, there is no ScaleReg.  Scale of 1 indicates a reg with
-/// no scale.
-///
-struct AddrMode {
-  GlobalValue *BaseGV;
-  int64_t      BaseOffs;
-  bool         HasBaseReg;
-  int64_t      Scale;
-  AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {}
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index be274afd1552..d703f21c021c 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -34,11 +34,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_ALIAS_ANALYSIS_H
-#define LLVM_ANALYSIS_ALIAS_ANALYSIS_H
+#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
+#define LLVM_ANALYSIS_ALIASANALYSIS_H
 
-#include "llvm/Support/CallSite.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/CallSite.h"
 
 namespace llvm {
 
@@ -373,7 +373,7 @@ public:
     return getModRefInfo(I, Location(P, Size));
   }
 
-  /// getModRefInfo (for call sites) - Return whether information about whether
+  /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
   virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
                                      const Location &Loc);
@@ -384,7 +384,7 @@ public:
     return getModRefInfo(CS, Location(P, Size));
   }
 
-  /// getModRefInfo (for calls) - Return whether information about whether
+  /// getModRefInfo (for calls) - Return information about whether
   /// a particular call modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const CallInst *C, const Location &Loc) {
     return getModRefInfo(ImmutableCallSite(C), Loc);
@@ -395,7 +395,7 @@ public:
     return getModRefInfo(C, Location(P, Size));
   }
 
-  /// getModRefInfo (for invokes) - Return whether information about whether
+  /// getModRefInfo (for invokes) - Return information about whether
   /// a particular invoke modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const InvokeInst *I,
                              const Location &Loc) {
@@ -408,7 +408,7 @@ public:
     return getModRefInfo(I, Location(P, Size));
   }
 
-  /// getModRefInfo (for loads) - Return whether information about whether
+  /// getModRefInfo (for loads) - Return information about whether
   /// a particular load modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const LoadInst *L, const Location &Loc);
 
@@ -417,7 +417,7 @@ public:
     return getModRefInfo(L, Location(P, Size));
   }
 
-  /// getModRefInfo (for stores) - Return whether information about whether
+  /// getModRefInfo (for stores) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const StoreInst *S, const Location &Loc);
 
@@ -426,7 +426,7 @@ public:
     return getModRefInfo(S, Location(P, Size));
   }
 
-  /// getModRefInfo (for fences) - Return whether information about whether
+  /// getModRefInfo (for fences) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const FenceInst *S, const Location &Loc) {
     // Conservatively correct.  (We could possibly be a bit smarter if
@@ -439,7 +439,7 @@ public:
     return getModRefInfo(S, Location(P, Size));
   }
 
-  /// getModRefInfo (for cmpxchges) - Return whether information about whether
+  /// getModRefInfo (for cmpxchges) - Return information about whether
   /// a particular cmpxchg modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc);
 
@@ -449,7 +449,7 @@ public:
     return getModRefInfo(CX, Location(P, Size));
   }
 
-  /// getModRefInfo (for atomicrmws) - Return whether information about whether
+  /// getModRefInfo (for atomicrmws) - Return information about whether
   /// a particular atomicrmw modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc);
 
@@ -459,7 +459,7 @@ public:
     return getModRefInfo(RMW, Location(P, Size));
   }
 
-  /// getModRefInfo (for va_args) - Return whether information about whether
+  /// getModRefInfo (for va_args) - Return information about whether
   /// a particular va_arg modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const VAArgInst* I, const Location &Loc);
 
@@ -587,17 +587,12 @@ bool isNoAliasCall(const Value *V);
 /// isIdentifiedObject - Return true if this pointer refers to a distinct and
 /// identifiable object.  This returns true for:
 ///    Global Variables and Functions (but not Global Aliases)
-///    Allocas and Mallocs
+///    Allocas
 ///    ByVal and NoAlias Arguments
-///    NoAlias returns
+///    NoAlias returns (e.g. calls to malloc)
 ///
 bool isIdentifiedObject(const Value *V);
 
-/// isKnownNonNull - Return true if this pointer couldn't possibly be null by
-/// its definition.  This returns true for allocas, non-extern-weak globals and
-/// byval arguments.
-bool isKnownNonNull(const Value *V);
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 1e606c81d9c7..da007072e559 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -17,11 +17,10 @@
 #ifndef LLVM_ANALYSIS_ALIASSETTRACKER_H
 #define LLVM_ANALYSIS_ALIASSETTRACKER_H
 
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/ValueHandle.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
index 5168ab78729b..b3e2d18eb2c6 100644
--- a/include/llvm/Analysis/BlockFrequencyImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -14,17 +14,17 @@
 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
 #define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
@@ -271,7 +271,7 @@ class BlockFrequencyImpl {
 
     BlockT *EntryBlock = fn->begin();
 
-    copy(po_begin(EntryBlock), po_end(EntryBlock), back_inserter(POT));
+    std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT));
 
     unsigned RPOidx = 0;
     for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index c0567daa3a5e..6c23f7c3aeb3 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H
 #define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 
 namespace llvm {
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 4704a929acf6..fa596c3a3c99 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_ANALYSIS_CFGPRINTER_H
 #define LLVM_ANALYSIS_CFGPRINTER_H
 
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/GraphWriter.h"
 
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 6a9ed310375a..591484dd2782 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -51,13 +51,13 @@
 #ifndef LLVM_ANALYSIS_CALLGRAPH_H
 #define LLVM_ANALYSIS_CALLGRAPH_H
 
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/IncludeFile.h"
+#include "llvm/Support/ValueHandle.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h
new file mode 100644
index 000000000000..e609dac11891
--- /dev/null
+++ b/include/llvm/Analysis/CallGraphSCCPass.h
@@ -0,0 +1,107 @@
+//===- CallGraphSCCPass.h - Pass that operates BU on call graph -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CallGraphSCCPass class, which is used for passes which
+// are implemented as bottom-up traversals on the call graph.  Because there may
+// be cycles in the call graph, passes of this type operate on the call-graph in
+// SCC order: that is, they process function bottom-up, except for recursive
+// functions, which they process all at once.
+//
+// These passes are inherently interprocedural, and are required to keep the
+// call graph up-to-date if they do anything which could modify it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CALLGRAPHSCCPASS_H
+#define LLVM_ANALYSIS_CALLGRAPHSCCPASS_H
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class CallGraphNode;
+class CallGraph;
+class PMStack;
+class CallGraphSCC;
+  
+class CallGraphSCCPass : public Pass {
+public:
+  explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {}
+
+  /// createPrinterPass - Get a pass that prints the Module
+  /// corresponding to a CallGraph.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
+  /// doInitialization - This method is called before the SCC's of the program
+  /// has been processed, allowing the pass to do initialization as necessary.
+  virtual bool doInitialization(CallGraph &CG) {
+    return false;
+  }
+
+  /// runOnSCC - This method should be implemented by the subclass to perform
+  /// whatever action is necessary for the specified SCC.  Note that
+  /// non-recursive (or only self-recursive) functions will have an SCC size of
+  /// 1, where recursive portions of the call graph will have SCC size > 1.
+  ///
+  /// SCC passes that add or delete functions to the SCC are required to update
+  /// the SCC list, otherwise stale pointers may be dereferenced.
+  ///
+  virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
+
+  /// doFinalization - This method is called after the SCC's of the program has
+  /// been processed, allowing the pass to do final cleanup as necessary.
+  virtual bool doFinalization(CallGraph &CG) {
+    return false;
+  }
+
+  /// Assign pass manager to manager this pass
+  virtual void assignPassManager(PMStack &PMS,
+                                 PassManagerType PMT);
+
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const {
+    return PMT_CallGraphPassManager;
+  }
+
+  /// getAnalysisUsage - For this class, we declare that we require and preserve
+  /// the call graph.  If the derived class implements this method, it should
+  /// always explicitly call the implementation here.
+  virtual void getAnalysisUsage(AnalysisUsage &Info) const;
+};
+
+/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. 
+class CallGraphSCC {
+  void *Context; // The CGPassManager object that is vending this.
+  std::vector<CallGraphNode*> Nodes;
+public:
+  CallGraphSCC(void *context) : Context(context) {}
+  
+  void initialize(CallGraphNode*const*I, CallGraphNode*const*E) {
+    Nodes.assign(I, E);
+  }
+  
+  bool isSingular() const { return Nodes.size() == 1; }
+  unsigned size() const { return Nodes.size(); }
+  
+  /// ReplaceNode - This informs the SCC and the pass manager that the specified
+  /// Old node has been deleted, and New is to be used in its place.
+  void ReplaceNode(CallGraphNode *Old, CallGraphNode *New);
+  
+  typedef std::vector<CallGraphNode*>::const_iterator iterator;
+  iterator begin() const { return Nodes.begin(); }
+  iterator end() const { return Nodes.end(); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/CallPrinter.h b/include/llvm/Analysis/CallPrinter.h
new file mode 100644
index 000000000000..5f5d160c3ca0
--- /dev/null
+++ b/include/llvm/Analysis/CallPrinter.h
@@ -0,0 +1,27 @@
+//===-- CallPrinter.h - Call graph printer external interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines external functions that can be called to explicitly
+// instantiate the call graph printer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CALLPRINTER_H
+#define LLVM_ANALYSIS_CALLPRINTER_H
+
+namespace llvm {
+
+  class ModulePass;
+
+  ModulePass *createCallGraphViewerPass();
+  ModulePass *createCallGraphPrinterPass();
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index 2889269b957a..8edabfe860a1 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -14,12 +14,11 @@
 #ifndef LLVM_ANALYSIS_CAPTURETRACKING_H
 #define LLVM_ANALYSIS_CAPTURETRACKING_H
 
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Support/CallSite.h"
-
 namespace llvm {
+
+  class Value;
+  class Use;
+
   /// PointerMayBeCaptured - Return true if this pointer value may be captured
   /// by the enclosing function (which is required to exist).  This routine can
   /// be expensive, so consider caching the results.  The boolean ReturnCaptures
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 4398faa20a7b..086934d0e69b 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -19,77 +19,75 @@
 #include "llvm/Support/CallSite.h"
 
 namespace llvm {
-  class BasicBlock;
-  class Function;
-  class Instruction;
-  class DataLayout;
-  class Value;
+class BasicBlock;
+class Function;
+class Instruction;
+class DataLayout;
+class TargetTransformInfo;
+class Value;
+
+/// \brief Check whether a call will lower to something small.
+///
+/// This tests checks whether this callsite will lower to something
+/// significantly cheaper than a traditional call, often a single
+/// instruction. Note that if isInstructionFree(CS.getInstruction()) would
+/// return true, so will this function.
+bool callIsSmall(ImmutableCallSite CS);
+
+/// \brief Utility to calculate the size and a few similar metrics for a set
+/// of basic blocks.
+struct CodeMetrics {
+  /// \brief True if this function contains a call to setjmp or other functions
+  /// with attribute "returns twice" without having the attribute itself.
+  bool exposesReturnsTwice;
+
+  /// \brief True if this function calls itself.
+  bool isRecursive;
+
+  /// \brief True if this function cannot be duplicated.
+  ///
+  /// True if this function contains one or more indirect branches, or it contains
+  /// one or more 'noduplicate' instructions.
+  bool notDuplicatable;
+
+  /// \brief True if this function calls alloca (in the C sense).
+  bool usesDynamicAlloca;
+
+  /// \brief Number of instructions in the analyzed blocks.
+  unsigned NumInsts;
 
-  /// \brief Check whether an instruction is likely to be "free" when lowered.
-  bool isInstructionFree(const Instruction *I, const DataLayout *TD = 0);
+  /// \brief Number of analyzed blocks.
+  unsigned NumBlocks;
 
-  /// \brief Check whether a call will lower to something small.
+  /// \brief Keeps track of basic block code size estimates.
+  DenseMap<const BasicBlock *, unsigned> NumBBInsts;
+
+  /// \brief Keep track of the number of calls to 'big' functions.
+  unsigned NumCalls;
+
+  /// \brief The number of calls to internal functions with a single caller.
   ///
-  /// This tests checks whether this callsite will lower to something
-  /// significantly cheaper than a traditional call, often a single
-  /// instruction. Note that if isInstructionFree(CS.getInstruction()) would
-  /// return true, so will this function.
-  bool callIsSmall(ImmutableCallSite CS);
-
-  /// \brief Utility to calculate the size and a few similar metrics for a set
-  /// of basic blocks.
-  struct CodeMetrics {
-    /// \brief True if this function contains a call to setjmp or other functions
-    /// with attribute "returns twice" without having the attribute itself.
-    bool exposesReturnsTwice;
-
-    /// \brief True if this function calls itself.
-    bool isRecursive;
-
-    /// \brief True if this function contains one or more indirect branches.
-    bool containsIndirectBr;
-
-    /// \brief True if this function calls alloca (in the C sense).
-    bool usesDynamicAlloca;
-
-    /// \brief Number of instructions in the analyzed blocks.
-    unsigned NumInsts;
-
-    /// \brief Number of analyzed blocks.
-    unsigned NumBlocks;
-
-    /// \brief Keeps track of basic block code size estimates.
-    DenseMap<const BasicBlock *, unsigned> NumBBInsts;
-
-    /// \brief Keep track of the number of calls to 'big' functions.
-    unsigned NumCalls;
-
-    /// \brief The number of calls to internal functions with a single caller.
-    ///
-    /// These are likely targets for future inlining, likely exposed by
-    /// interleaved devirtualization.
-    unsigned NumInlineCandidates;
-
-    /// \brief How many instructions produce vector values.
-    ///
-    /// The inliner is more aggressive with inlining vector kernels.
-    unsigned NumVectorInsts;
-
-    /// \brief How many 'ret' instructions the blocks contain.
-    unsigned NumRets;
-
-    CodeMetrics() : exposesReturnsTwice(false), isRecursive(false),
-                    containsIndirectBr(false), usesDynamicAlloca(false),
-                    NumInsts(0), NumBlocks(0), NumCalls(0),
-                    NumInlineCandidates(0), NumVectorInsts(0),
-                    NumRets(0) {}
-
-    /// \brief Add information about a block to the current state.
-    void analyzeBasicBlock(const BasicBlock *BB, const DataLayout *TD = 0);
-
-    /// \brief Add information about a function to the current state.
-    void analyzeFunction(Function *F, const DataLayout *TD = 0);
-  };
+  /// These are likely targets for future inlining, likely exposed by
+  /// interleaved devirtualization.
+  unsigned NumInlineCandidates;
+
+  /// \brief How many instructions produce vector values.
+  ///
+  /// The inliner is more aggressive with inlining vector kernels.
+  unsigned NumVectorInsts;
+
+  /// \brief How many 'ret' instructions the blocks contain.
+  unsigned NumRets;
+
+  CodeMetrics()
+      : exposesReturnsTwice(false), isRecursive(false), notDuplicatable(false),
+        usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0),
+        NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {}
+
+  /// \brief Add information about a block to the current state.
+  void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI);
+};
+
 }
 
 #endif
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index b701b8fca5d4..0fc1c2dc360d 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -11,27 +11,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H
-#define LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H
+#ifndef LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H
+#define LLVM_ANALYSIS_DOTGRAPHTRAITSPASS_H
 
-#include "llvm/Pass.h"
 #include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
-template <class Analysis, bool Simple>
-struct DOTGraphTraitsViewer : public FunctionPass {
-  std::string Name;
 
-  DOTGraphTraitsViewer(std::string GraphName, char &ID) : FunctionPass(ID) {
-    Name = GraphName;
-  }
+template <class Analysis, bool Simple>
+class DOTGraphTraitsViewer : public FunctionPass {
+public:
+  DOTGraphTraitsViewer(StringRef GraphName, char &ID)
+    : FunctionPass(ID), Name(GraphName) {}
 
   virtual bool runOnFunction(Function &F) {
-    Analysis *Graph;
-    std::string Title, GraphName;
-    Graph = &getAnalysis<Analysis>();
-    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
-    Title = GraphName + " for '" + F.getName().str() + "' function";
+    Analysis *Graph = &getAnalysis<Analysis>();
+    std::string GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+    std::string Title = GraphName + " for '" + F.getName().str() + "' function";
+
     ViewGraph(Graph, Name, Simple, Title);
 
     return false;
@@ -41,36 +39,92 @@ struct DOTGraphTraitsViewer : public FunctionPass {
     AU.setPreservesAll();
     AU.addRequired<Analysis>();
   }
+
+private:
+  std::string Name;
 };
 
 template <class Analysis, bool Simple>
-struct DOTGraphTraitsPrinter : public FunctionPass {
+class DOTGraphTraitsPrinter : public FunctionPass {
+public:
+  DOTGraphTraitsPrinter(StringRef GraphName, char &ID)
+    : FunctionPass(ID), Name(GraphName) {}
+
+  virtual bool runOnFunction(Function &F) {
+    Analysis *Graph = &getAnalysis<Analysis>();
+    std::string Filename = Name + "." + F.getName().str() + ".dot";
+    std::string ErrorInfo;
+
+    errs() << "Writing '" << Filename << "'...";
 
+    raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+    std::string GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+    std::string Title = GraphName + " for '" + F.getName().str() + "' function";
+
+    if (ErrorInfo.empty())
+      WriteGraph(File, Graph, Simple, Title);
+    else
+      errs() << "  error opening file for writing!";
+    errs() << "\n";
+
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<Analysis>();
+  }
+
+private:
   std::string Name;
+};
+
+template <class Analysis, bool Simple>
+class DOTGraphTraitsModuleViewer : public ModulePass {
+public:
+  DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID)
+    : ModulePass(ID), Name(GraphName) {}
 
-  DOTGraphTraitsPrinter(std::string GraphName, char &ID)
-    : FunctionPass(ID) {
-    Name = GraphName;
+  virtual bool runOnModule(Module &M) {
+    Analysis *Graph = &getAnalysis<Analysis>();
+    std::string Title = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+
+    ViewGraph(Graph, Name, Simple, Title);
+
+    return false;
   }
 
-  virtual bool runOnFunction(Function &F) {
-    Analysis *Graph;
-    std::string Filename = Name + "." + F.getName().str() + ".dot";
-    errs() << "Writing '" << Filename << "'...";
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<Analysis>();
+  }
 
+private:
+  std::string Name;
+};
+
+template <class Analysis, bool Simple>
+class DOTGraphTraitsModulePrinter : public ModulePass {
+public:
+  DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID)
+    : ModulePass(ID), Name(GraphName) {}
+
+  virtual bool runOnModule(Module &M) {
+    Analysis *Graph = &getAnalysis<Analysis>();
+    std::string Filename = Name + ".dot";
     std::string ErrorInfo;
-    raw_fd_ostream File(Filename.c_str(), ErrorInfo);
-    Graph = &getAnalysis<Analysis>();
 
-    std::string Title, GraphName;
-    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
-    Title = GraphName + " for '" + F.getName().str() + "' function";
+    errs() << "Writing '" << Filename << "'...";
+
+    raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+    std::string Title = DOTGraphTraits<Analysis*>::getGraphName(Graph);
 
     if (ErrorInfo.empty())
       WriteGraph(File, Graph, Simple, Title);
     else
       errs() << "  error opening file for writing!";
     errs() << "\n";
+
     return false;
   }
 
@@ -78,6 +132,11 @@ struct DOTGraphTraitsPrinter : public FunctionPass {
     AU.setPreservesAll();
     AU.addRequired<Analysis>();
   }
+
+private:
+  std::string Name;
 };
-}
+
+} // end namespace llvm
+
 #endif
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index b4327eeb0b1e..a78ac5919acb 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -18,6 +18,16 @@
 // of memory references in a function, returning either NULL, for no dependence,
 // or a more-or-less detailed description of the dependence between them.
 //
+// This pass exists to support the DependenceGraph pass. There are two separate
+// passes because there's a useful separation of concerns. A dependence exists
+// if two conditions are met:
+//
+//    1) Two instructions reference the same memory location, and
+//    2) There is a flow of control leading from one instruction to the other.
+//
+// DependenceAnalysis attacks the first condition; DependenceGraph will attack
+// the second (it's not yet ready).
+//
 // Please note that this is work in progress and the interface is subject to
 // change.
 //
@@ -30,9 +40,9 @@
 #ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
 #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
 
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
   class AliasAnalysis;
@@ -53,8 +63,8 @@ namespace llvm {
   /// input dependences are unordered.
   class Dependence {
   public:
-    Dependence(const Instruction *Source,
-               const Instruction *Destination) :
+    Dependence(Instruction *Source,
+               Instruction *Destination) :
       Src(Source), Dst(Destination) {}
     virtual ~Dependence() {}
 
@@ -82,11 +92,11 @@ namespace llvm {
 
     /// getSrc - Returns the source instruction for this dependence.
     ///
-    const Instruction *getSrc() const { return Src; }
+    Instruction *getSrc() const { return Src; }
 
     /// getDst - Returns the destination instruction for this dependence.
     ///
-    const Instruction *getDst() const { return Dst; }
+    Instruction *getDst() const { return Dst; }
 
     /// isInput - Returns true if this is an input dependence.
     ///
@@ -158,14 +168,14 @@ namespace llvm {
     ///
     void dump(raw_ostream &OS) const;
   private:
-    const Instruction *Src, *Dst;
+    Instruction *Src, *Dst;
     friend class DependenceAnalysis;
   };
 
 
   /// FullDependence - This class represents a dependence between two memory
   /// references in a function. It contains detailed information about the
-  /// dependence (direction vectors, etc) and is used when the compiler is
+  /// dependence (direction vectors, etc.) and is used when the compiler is
   /// able to accurately analyze the interaction of the references; that is,
   /// it is not a confused dependence (see Dependence). In most cases
   /// (for output, flow, and anti dependences), the dependence implies an
@@ -173,12 +183,12 @@ namespace llvm {
   /// input dependences are unordered.
   class FullDependence : public Dependence {
   public:
-    FullDependence(const Instruction *Src,
-                   const Instruction *Dst,
+    FullDependence(Instruction *Src,
+                   Instruction *Dst,
                    bool LoopIndependent,
                    unsigned Levels);
     ~FullDependence() {
-      delete DV;
+      delete[] DV;
     }
 
     /// isLoopIndependent - Returns true if this is a loop-independent
@@ -234,8 +244,8 @@ namespace llvm {
   /// DependenceAnalysis - This class is the main dependence-analysis driver.
   ///
   class DependenceAnalysis : public FunctionPass {
-    void operator=(const DependenceAnalysis &);     // do not implement
-    DependenceAnalysis(const DependenceAnalysis &); // do not implement
+    void operator=(const DependenceAnalysis &) LLVM_DELETED_FUNCTION;
+    DependenceAnalysis(const DependenceAnalysis &) LLVM_DELETED_FUNCTION;
   public:
     /// depends - Tests for a dependence between the Src and Dst instructions.
     /// Returns NULL if no dependence; otherwise, returns a Dependence (or a
@@ -243,11 +253,11 @@ namespace llvm {
     /// The flag PossiblyLoopIndependent should be set by the caller
     /// if it appears that control flow can reach from Src to Dst
     /// without traversing a loop back edge.
-    Dependence *depends(const Instruction *Src,
-                        const Instruction *Dst,
+    Dependence *depends(Instruction *Src,
+                        Instruction *Dst,
                         bool PossiblyLoopIndependent);
 
-    /// getSplitIteration - Give a dependence that's splitable at some
+    /// getSplitIteration - Give a dependence that's splittable at some
     /// particular level, return the iteration that should be used to split
     /// the loop.
     ///
diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h
index 0c29236dde96..c0f95cbd9b9b 100644
--- a/include/llvm/Analysis/DominatorInternals.h
+++ b/include/llvm/Analysis/DominatorInternals.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_ANALYSIS_DOMINATOR_INTERNALS_H
 #define LLVM_ANALYSIS_DOMINATOR_INTERNALS_H
 
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 8940971558a3..81c04bb6b0fa 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -15,13 +15,13 @@
 #ifndef LLVM_ANALYSIS_DOMINATORS_H
 #define LLVM_ANALYSIS_DOMINATORS_H
 
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
@@ -101,18 +101,18 @@ public:
     Children.clear();
   }
 
-  bool compare(DomTreeNodeBase<NodeT> *Other) {
+  bool compare(const DomTreeNodeBase<NodeT> *Other) const {
     if (getNumChildren() != Other->getNumChildren())
       return true;
 
-    SmallPtrSet<NodeT *, 4> OtherChildren;
-    for (iterator I = Other->begin(), E = Other->end(); I != E; ++I) {
-      NodeT *Nd = (*I)->getBlock();
+    SmallPtrSet<const NodeT *, 4> OtherChildren;
+    for (const_iterator I = Other->begin(), E = Other->end(); I != E; ++I) {
+      const NodeT *Nd = (*I)->getBlock();
       OtherChildren.insert(Nd);
     }
 
-    for (iterator I = begin(), E = end(); I != E; ++I) {
-      NodeT *N = (*I)->getBlock();
+    for (const_iterator I = begin(), E = end(); I != E; ++I) {
+      const NodeT *N = (*I)->getBlock();
       if (OtherChildren.count(N) == 0)
         return true;
     }
@@ -663,8 +663,7 @@ public:
       // Initialize the roots list
       for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
                                         E = TraitsTy::nodes_end(&F); I != E; ++I) {
-        if (std::distance(TraitsTy::child_begin(I),
-                          TraitsTy::child_end(I)) == 0)
+        if (TraitsTy::child_begin(I) == TraitsTy::child_end(I))
           addRoot(I);
 
         // Prepopulate maps so that we don't get iterator invalidation issues later.
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 9b98013a1683..c9828015be29 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -24,7 +24,6 @@ namespace llvm {
 class DominatorTree;
 class Instruction;
 class Value;
-class IVUsers;
 class ScalarEvolution;
 class SCEV;
 class IVUsers;
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index a075db33427d..bc7924e10fdc 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -14,122 +14,130 @@
 #ifndef LLVM_ANALYSIS_INLINECOST_H
 #define LLVM_ANALYSIS_INLINECOST_H
 
-#include "llvm/Function.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/ValueMap.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
 #include <cassert>
 #include <climits>
-#include <vector>
 
 namespace llvm {
+class CallSite;
+class DataLayout;
+class Function;
+class TargetTransformInfo;
+
+namespace InlineConstants {
+  // Various magic constants used to adjust heuristics.
+  const int InstrCost = 5;
+  const int IndirectCallThreshold = 100;
+  const int CallPenalty = 25;
+  const int LastCallToStaticBonus = -15000;
+  const int ColdccPenalty = 2000;
+  const int NoreturnPenalty = 10000;
+  /// Do not inline functions which allocate this many bytes on the stack
+  /// when the caller is recursive.
+  const unsigned TotalAllocaSizeRecursiveCaller = 1024;
+}
+
+/// \brief Represents the cost of inlining a function.
+///
+/// This supports special values for functions which should "always" or
+/// "never" be inlined. Otherwise, the cost represents a unitless amount;
+/// smaller values increase the likelihood of the function being inlined.
+///
+/// Objects of this type also provide the adjusted threshold for inlining
+/// based on the information available for a particular callsite. They can be
+/// directly tested to determine if inlining should occur given the cost and
+/// threshold for this cost metric.
+class InlineCost {
+  enum SentinelValues {
+    AlwaysInlineCost = INT_MIN,
+    NeverInlineCost = INT_MAX
+  };
+
+  /// \brief The estimated cost of inlining this callsite.
+  const int Cost;
+
+  /// \brief The adjusted threshold against which this cost was computed.
+  const int Threshold;
+
+  // Trivial constructor, interesting logic in the factory functions below.
+  InlineCost(int Cost, int Threshold) : Cost(Cost), Threshold(Threshold) {}
+
+public:
+  static InlineCost get(int Cost, int Threshold) {
+    assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value");
+    assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
+    return InlineCost(Cost, Threshold);
+  }
+  static InlineCost getAlways() {
+    return InlineCost(AlwaysInlineCost, 0);
+  }
+  static InlineCost getNever() {
+    return InlineCost(NeverInlineCost, 0);
+  }
 
-  class CallSite;
-  class DataLayout;
-
-  namespace InlineConstants {
-    // Various magic constants used to adjust heuristics.
-    const int InstrCost = 5;
-    const int IndirectCallThreshold = 100;
-    const int CallPenalty = 25;
-    const int LastCallToStaticBonus = -15000;
-    const int ColdccPenalty = 2000;
-    const int NoreturnPenalty = 10000;
-    /// Do not inline functions which allocate this many bytes on the stack
-    /// when the caller is recursive.
-    const unsigned TotalAllocaSizeRecursiveCaller = 1024;
+  /// \brief Test whether the inline cost is low enough for inlining.
+  operator bool() const {
+    return Cost < Threshold;
   }
 
-  /// \brief Represents the cost of inlining a function.
+  bool isAlways() const { return Cost == AlwaysInlineCost; }
+  bool isNever() const { return Cost == NeverInlineCost; }
+  bool isVariable() const { return !isAlways() && !isNever(); }
+
+  /// \brief Get the inline cost estimate.
+  /// It is an error to call this on an "always" or "never" InlineCost.
+  int getCost() const {
+    assert(isVariable() && "Invalid access of InlineCost");
+    return Cost;
+  }
+
+  /// \brief Get the cost delta from the threshold for inlining.
+  /// Only valid if the cost is of the variable kind. Returns a negative
+  /// value if the cost is too high to inline.
+  int getCostDelta() const { return Threshold - getCost(); }
+};
+
+/// \brief Cost analyzer used by inliner.
+class InlineCostAnalysis : public CallGraphSCCPass {
+  const DataLayout *TD;
+  const TargetTransformInfo *TTI;
+
+public:
+  static char ID;
+
+  InlineCostAnalysis();
+  ~InlineCostAnalysis();
+
+  // Pass interface implementation.
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+  bool runOnSCC(CallGraphSCC &SCC);
+
+  /// \brief Get an InlineCost object representing the cost of inlining this
+  /// callsite.
   ///
-  /// This supports special values for functions which should "always" or
-  /// "never" be inlined. Otherwise, the cost represents a unitless amount;
-  /// smaller values increase the likelihood of the function being inlined.
+  /// Note that threshold is passed into this function. Only costs below the
+  /// threshold are computed with any accuracy. The threshold can be used to
+  /// bound the computation necessary to determine whether the cost is
+  /// sufficiently low to warrant inlining.
   ///
-  /// Objects of this type also provide the adjusted threshold for inlining
-  /// based on the information available for a particular callsite. They can be
-  /// directly tested to determine if inlining should occur given the cost and
-  /// threshold for this cost metric.
-  class InlineCost {
-    enum SentinelValues {
-      AlwaysInlineCost = INT_MIN,
-      NeverInlineCost = INT_MAX
-    };
-
-    /// \brief The estimated cost of inlining this callsite.
-    const int Cost;
-
-    /// \brief The adjusted threshold against which this cost was computed.
-    const int Threshold;
-
-    // Trivial constructor, interesting logic in the factory functions below.
-    InlineCost(int Cost, int Threshold)
-      : Cost(Cost), Threshold(Threshold) {}
-
-  public:
-    static InlineCost get(int Cost, int Threshold) {
-      assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value");
-      assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
-      return InlineCost(Cost, Threshold);
-    }
-    static InlineCost getAlways() {
-      return InlineCost(AlwaysInlineCost, 0);
-    }
-    static InlineCost getNever() {
-      return InlineCost(NeverInlineCost, 0);
-    }
-
-    /// \brief Test whether the inline cost is low enough for inlining.
-    operator bool() const {
-      return Cost < Threshold;
-    }
-
-    bool isAlways() const   { return Cost == AlwaysInlineCost; }
-    bool isNever() const    { return Cost == NeverInlineCost; }
-    bool isVariable() const { return !isAlways() && !isNever(); }
-
-    /// \brief Get the inline cost estimate.
-    /// It is an error to call this on an "always" or "never" InlineCost.
-    int getCost() const {
-      assert(isVariable() && "Invalid access of InlineCost");
-      return Cost;
-    }
-
-    /// \brief Get the cost delta from the threshold for inlining.
-    /// Only valid if the cost is of the variable kind. Returns a negative
-    /// value if the cost is too high to inline.
-    int getCostDelta() const { return Threshold - getCost(); }
-  };
+  /// Also note that calling this function *dynamically* computes the cost of
+  /// inlining the callsite. It is an expensive, heavyweight call.
+  InlineCost getInlineCost(CallSite CS, int Threshold);
+
+  /// \brief Get an InlineCost with the callee explicitly specified.
+  /// This allows you to calculate the cost of inlining a function via a
+  /// pointer. This behaves exactly as the version with no explicit callee
+  /// parameter in all other respects.
+  //
+  //  Note: This is used by out-of-tree passes, please do not remove without
+  //  adding a replacement API.
+  InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
+
+  /// \brief Minimal filter to detect invalid constructs for inlining.
+  bool isInlineViable(Function &Callee);
+};
 
-  /// InlineCostAnalyzer - Cost analyzer used by inliner.
-  class InlineCostAnalyzer {
-    // DataLayout if available, or null.
-    const DataLayout *TD;
-
-  public:
-    InlineCostAnalyzer(): TD(0) {}
-
-    void setDataLayout(const DataLayout *TData) { TD = TData; }
-
-    /// \brief Get an InlineCost object representing the cost of inlining this
-    /// callsite.
-    ///
-    /// Note that threshold is passed into this function. Only costs below the
-    /// threshold are computed with any accuracy. The threshold can be used to
-    /// bound the computation necessary to determine whether the cost is
-    /// sufficiently low to warrant inlining.
-    InlineCost getInlineCost(CallSite CS, int Threshold);
-    /// getCalledFunction - The heuristic used to determine if we should inline
-    /// the function call or not.  The callee is explicitly specified, to allow
-    /// you to calculate the cost of inlining a function via a pointer.  This
-    /// behaves exactly as the version with no explicit callee parameter in all
-    /// other respects.
-    //
-    //  Note: This is used by out-of-tree passes, please do not remove without
-    //  adding a replacement API.
-    InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
-  };
 }
 
 #endif
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index e561e3742b64..d760a4cba1cf 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -14,17 +14,33 @@
 // ("and i32 %x, %x" -> "%x").  If the simplification is also an instruction
 // then it dominates the original instruction.
 //
+// These routines implicitly resolve undef uses. The easiest way to be safe when
+// using these routines to obtain simplified values for existing instructions is
+// to always replace all uses of the instructions with the resulting simplified
+// values. This will prevent other code from seeing the same undef uses and
+// resolving them to different values.
+//
+// These routines are designed to tolerate moderately incomplete IR, such as
+// instructions that are not connected to basic blocks yet. However, they do
+// require that all the IR that they encounter be valid. In particular, they
+// require that all non-constant values be defined in the same function, and the
+// same call context of that function (and not split between caller and callee
+// contexts of a directly recursive call, for example).
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 
+#include "llvm/IR/User.h"
+
 namespace llvm {
   template<typename T>
   class ArrayRef;
   class DominatorTree;
   class Instruction;
   class DataLayout;
+  class FastMathFlags;
   class TargetLibraryInfo;
   class Type;
   class Value;
@@ -43,6 +59,28 @@ namespace llvm {
                          const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
+  /// Given operands for an FAdd, see if we can fold the result.  If not, this
+  /// returns null.
+  Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                         const DataLayout *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
+
+  /// Given operands for an FSub, see if we can fold the result.  If not, this
+  /// returns null.
+  Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                         const DataLayout *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
+
+  /// Given operands for an FMul, see if we can fold the result.  If not, this
+  /// returns null.
+  Value *SimplifyFMulInst(Value *LHS, Value *RHS,
+                          FastMathFlags FMF,
+                          const DataLayout *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
+                          const DominatorTree *DT = 0);
+
   /// SimplifyMulInst - Given operands for a Mul, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
@@ -57,7 +95,7 @@ namespace llvm {
 
   /// SimplifyUDivInst - Given operands for a UDiv, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, 
+  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
                           const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
@@ -69,7 +107,7 @@ namespace llvm {
 
   /// SimplifySRemInst - Given operands for an SRem, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0, 
+  Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
                           const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
@@ -88,7 +126,7 @@ namespace llvm {
   /// SimplifyShlInst - Given operands for a Shl, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                         const DataLayout *TD = 0, 
+                         const DataLayout *TD = 0,
                          const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
@@ -127,14 +165,14 @@ namespace llvm {
   /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const DataLayout *TD = 0, 
+                          const DataLayout *TD = 0,
                           const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const DataLayout *TD = 0, 
+                          const DataLayout *TD = 0,
                           const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
@@ -178,10 +216,28 @@ namespace llvm {
   /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                       const DataLayout *TD = 0, 
+                       const DataLayout *TD = 0,
                        const TargetLibraryInfo *TLI = 0,
                        const DominatorTree *DT = 0);
 
+  /// \brief Given a function and iterators over arguments, see if we can fold
+  /// the result.
+  ///
+  /// If this call could not be simplified returns null.
+  Value *SimplifyCall(Value *V, User::op_iterator ArgBegin,
+                      User::op_iterator ArgEnd, const DataLayout *TD = 0,
+                      const TargetLibraryInfo *TLI = 0,
+                      const DominatorTree *DT = 0);
+
+  /// \brief Given a function and set of arguments, see if we can fold the
+  /// result.
+  ///
+  /// If this call could not be simplified returns null.
+  Value *SimplifyCall(Value *V, ArrayRef<Value *> Args,
+                      const DataLayout *TD = 0,
+                      const TargetLibraryInfo *TLI = 0,
+                      const DominatorTree *DT = 0);
+
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// instruction.  If not, this returns null.
   Value *SimplifyInstruction(Instruction *I, const DataLayout *TD = 0,
diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h
index ca8ad73131a9..5ce1260eca1f 100644
--- a/include/llvm/Analysis/Interval.h
+++ b/include/llvm/Analysis/Interval.h
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_INTERVAL_H
-#define LLVM_INTERVAL_H
+#ifndef LLVM_ANALYSIS_INTERVAL_H
+#define LLVM_ANALYSIS_INTERVAL_H
 
 #include "llvm/ADT/GraphTraits.h"
 #include <vector>
diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h
index 0968c7468e68..22067c4f3c82 100644
--- a/include/llvm/Analysis/IntervalIterator.h
+++ b/include/llvm/Analysis/IntervalIterator.h
@@ -30,11 +30,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_INTERVAL_ITERATOR_H
-#define LLVM_INTERVAL_ITERATOR_H
+#ifndef LLVM_ANALYSIS_INTERVALITERATOR_H
+#define LLVM_ANALYSIS_INTERVALITERATOR_H
 
 #include "llvm/Analysis/IntervalPartition.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CFG.h"
 #include <algorithm>
 #include <set>
@@ -157,7 +157,7 @@ public:
 private:
   // ProcessInterval - This method is used during the construction of the
   // interval graph.  It walks through the source graph, recursively creating
-  // an interval per invokation until the entire graph is covered.  This uses
+  // an interval per invocation until the entire graph is covered.  This uses
   // the ProcessNode method to add all of the nodes to the interval.
   //
   // This method is templated because it may operate on two different source
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index bce84be2f4fd..8cade58cd324 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -20,8 +20,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_INTERVAL_PARTITION_H
-#define LLVM_INTERVAL_PARTITION_H
+#ifndef LLVM_ANALYSIS_INTERVALPARTITION_H
+#define LLVM_ANALYSIS_INTERVALPARTITION_H
 
 #include "llvm/Analysis/Interval.h"
 #include "llvm/Pass.h"
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
index 243234b75635..c01b210acf4b 100644
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_LIBCALL_AA_H
-#define LLVM_ANALYSIS_LIBCALL_AA_H
+#ifndef LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H
+#define LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Pass.h"
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index afc90c2f7441..ebcb76254111 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_ANALYSIS_LOADS_H
 #define LLVM_ANALYSIS_LOADS_H
 
-#include "llvm/BasicBlock.h"
+#include "llvm/IR/BasicBlock.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index c5d7b0128e74..783e347522d4 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -27,21 +27,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_LOOP_INFO_H
-#define LLVM_ANALYSIS_LOOP_INFO_H
+#ifndef LLVM_ANALYSIS_LOOPINFO_H
+#define LLVM_ANALYSIS_LOOPINFO_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
 #include <algorithm>
-#include <map>
 
 namespace llvm {
 
@@ -56,6 +51,7 @@ class DominatorTree;
 class LoopInfo;
 class Loop;
 class PHINode;
+class raw_ostream;
 template<class N, class M> class LoopInfoBase;
 template<class N, class M> class LoopBase;
 
@@ -151,10 +147,10 @@ public:
   /// block that is outside of the current loop.
   ///
   bool isLoopExiting(const BlockT *BB) const {
-    typedef GraphTraits<BlockT*> BlockTraits;
+    typedef GraphTraits<const BlockT*> BlockTraits;
     for (typename BlockTraits::ChildIteratorType SI =
-         BlockTraits::child_begin(const_cast<BlockT*>(BB)),
-         SE = BlockTraits::child_end(const_cast<BlockT*>(BB)); SI != SE; ++SI) {
+         BlockTraits::child_begin(BB),
+         SE = BlockTraits::child_end(BB); SI != SE; ++SI) {
       if (!contains(*SI))
         return true;
     }
@@ -169,8 +165,8 @@ public:
 
     typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
     for (typename InvBlockTraits::ChildIteratorType I =
-         InvBlockTraits::child_begin(const_cast<BlockT*>(H)),
-         E = InvBlockTraits::child_end(const_cast<BlockT*>(H)); I != E; ++I)
+         InvBlockTraits::child_begin(H),
+         E = InvBlockTraits::child_end(H); I != E; ++I)
       if (contains(*I))
         ++NumBackEdges;
 
@@ -381,6 +377,20 @@ public:
   /// isSafeToClone - Return true if the loop body is safe to clone in practice.
   bool isSafeToClone() const;
 
+  /// Returns true if the loop is annotated parallel.
+  ///
+  /// A parallel loop can be assumed to not contain any dependencies between
+  /// iterations by the compiler. That is, any loop-carried dependency checking
+  /// can be skipped completely when parallelizing the loop on the target
+  /// machine. Thus, if the parallel loop information originates from the
+  /// programmer, e.g. via the OpenMP parallel for pragma, it is the
+  /// programmer's responsibility to ensure there are no loop-carried
+  /// dependencies. The final execution order of the instructions across
+  /// iterations is not guaranteed, thus, the end result might or might not
+  /// implement actual concurrent execution of instructions across multiple
+  /// iterations.
+  bool isAnnotatedParallel() const;
+
   /// hasDedicatedExits - Return true if no exit block for the loop
   /// has a predecessor that is outside the loop.
   bool hasDedicatedExits() const;
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 3bb96f96bf52..5485f3c0c04c 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -12,11 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_LOOP_INFO_IMPL_H
-#define LLVM_ANALYSIS_LOOP_INFO_IMPL_H
+#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H
+#define LLVM_ANALYSIS_LOOPINFOIMPL_H
 
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopInfo.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/LoopIterator.h b/include/llvm/Analysis/LoopIterator.h
index 68f25f74bc28..e3dd96354c65 100644
--- a/include/llvm/Analysis/LoopIterator.h
+++ b/include/llvm/Analysis/LoopIterator.h
@@ -21,10 +21,9 @@
 // reachable from the loop header.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_LOOP_ITERATOR_H
-#define LLVM_ANALYSIS_LOOP_ITERATOR_H
+#ifndef LLVM_ANALYSIS_LOOPITERATOR_H
+#define LLVM_ANALYSIS_LOOPITERATOR_H
 
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
 
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index e6ed9bccee31..5767c1916b39 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -12,13 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LOOP_PASS_H
-#define LLVM_LOOP_PASS_H
+#ifndef LLVM_ANALYSIS_LOOPPASS_H
+#define LLVM_ANALYSIS_LOOPPASS_H
 
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManagers.h"
-#include "llvm/Function.h"
 #include <deque>
 
 namespace llvm {
@@ -39,6 +38,9 @@ public:
   // whatever action is necessary for the specified Loop.
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0;
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   // Initialization and finalization hooks.
   virtual bool doInitialization(Loop *L, LPPassManager &LPM) {
     return false;
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index a842898e4100..63262eb9a364 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -15,12 +15,12 @@
 #ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H
 #define LLVM_ANALYSIS_MEMORYBUILTINS_H
 
-#include "llvm/IRBuilder.h"
-#include "llvm/Operator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/ValueHandle.h"
 
@@ -138,12 +138,22 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
 //
 
 /// \brief Compute the size of the object pointed by Ptr. Returns true and the
-/// object size in Size if successful, and false otherwise.
+/// object size in Size if successful, and false otherwise. In this context, by
+/// object we mean the region of memory starting at Ptr to the end of the
+/// underlying object pointed to by Ptr.
 /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
 /// byval arguments, and global variables.
 bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
                    const TargetLibraryInfo *TLI, bool RoundToAlign = false);
 
+/// \brief Compute the size of the underlying object pointed by Ptr. Returns
+/// true and the object size in Size if successful, and false otherwise.
+/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
+/// byval arguments, and global variables.
+bool getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
+                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             bool RoundToAlign = false);
+
 
 
 typedef std::pair<APInt, APInt> SizeOffsetType;
@@ -153,12 +163,14 @@ typedef std::pair<APInt, APInt> SizeOffsetType;
 class ObjectSizeOffsetVisitor
   : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> {
 
+  typedef DenseMap<const Value*, SizeOffsetType> CacheMapTy;
+
   const DataLayout *TD;
   const TargetLibraryInfo *TLI;
   bool RoundToAlign;
   unsigned IntTyBits;
   APInt Zero;
-  SmallPtrSet<Instruction *, 8> SeenInsts;
+  CacheMapTy CacheMap;
 
   APInt align(APInt Size, uint64_t Align);
 
@@ -191,6 +203,7 @@ public:
   SizeOffsetType visitExtractElementInst(ExtractElementInst &I);
   SizeOffsetType visitExtractValueInst(ExtractValueInst &I);
   SizeOffsetType visitGEPOperator(GEPOperator &GEP);
+  SizeOffsetType visitGlobalAlias(GlobalAlias &GA);
   SizeOffsetType visitGlobalVariable(GlobalVariable &GV);
   SizeOffsetType visitIntToPtrInst(IntToPtrInst&);
   SizeOffsetType visitLoadInst(LoadInst &I);
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index a715eaeee11c..47afd1b77b0e 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_MEMORY_DEPENDENCE_H
-#define LLVM_ANALYSIS_MEMORY_DEPENDENCE_H
+#ifndef LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H
+#define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
   class Function;
@@ -34,14 +34,14 @@ namespace llvm {
   class PredIteratorCache;
   class DominatorTree;
   class PHITransAddr;
-  
+
   /// MemDepResult - A memory dependence query can return one of three different
   /// answers, described below.
   class MemDepResult {
     enum DepType {
       /// Invalid - Clients of MemDep never see this.
       Invalid = 0,
-      
+
       /// Clobber - This is a dependence on the specified instruction which
       /// clobbers the desired value.  The pointer member of the MemDepResult
       /// pair holds the instruction that clobbers the memory.  For example,
@@ -72,7 +72,7 @@ namespace llvm {
       ///      and no intervening clobbers.  No validation is done that the
       ///      operands to the calls are the same.
       Def,
-      
+
       /// Other - This marker indicates that the query has no known dependency
       /// in the specified block.  More detailed state info is encoded in the
       /// upper part of the pair (i.e. the Instruction*)
@@ -99,7 +99,7 @@ namespace llvm {
     explicit MemDepResult(PairTy V) : Value(V) {}
   public:
     MemDepResult() : Value(0, Invalid) {}
-    
+
     /// get methods: These are static ctor methods for creating various
     /// MemDepResult kinds.
     static MemDepResult getDef(Instruction *Inst) {
@@ -130,7 +130,7 @@ namespace llvm {
     /// isDef - Return true if this MemDepResult represents a query that is
     /// an instruction definition dependency.
     bool isDef() const { return Value.getInt() == Def; }
-    
+
     /// isNonLocal - Return true if this MemDepResult represents a query that
     /// is transparent to the start of the block, but where a non-local hasn't
     /// been done.
@@ -145,7 +145,7 @@ namespace llvm {
       return Value.getInt() == Other
         && Value.getPointer() == reinterpret_cast<Instruction*>(NonFuncLocal);
     }
-    
+
     /// isUnknown - Return true if this MemDepResult represents a query which
     /// cannot and/or will not be computed.
     bool isUnknown() const {
@@ -159,7 +159,7 @@ namespace llvm {
       if (Value.getInt() == Other) return NULL;
       return Value.getPointer();
     }
-    
+
     bool operator==(const MemDepResult &M) const { return Value == M.Value; }
     bool operator!=(const MemDepResult &M) const { return Value != M.Value; }
     bool operator<(const MemDepResult &M) const { return Value < M.Value; }
@@ -175,11 +175,11 @@ namespace llvm {
     /// In a default-constructed MemDepResult object, the type will be Dirty
     /// and the instruction pointer will be null.
     ///
-         
+
     /// isDirty - Return true if this is a MemDepResult in its dirty/invalid.
     /// state.
     bool isDirty() const { return Value.getInt() == Invalid; }
-    
+
     static MemDepResult getDirty(Instruction *Inst) {
       return MemDepResult(PairTy(Inst, Invalid));
     }
@@ -199,16 +199,16 @@ namespace llvm {
 
     // BB is the sort key, it can't be changed.
     BasicBlock *getBB() const { return BB; }
-    
+
     void setResult(const MemDepResult &R) { Result = R; }
 
     const MemDepResult &getResult() const { return Result; }
-    
+
     bool operator<(const NonLocalDepEntry &RHS) const {
       return BB < RHS.BB;
     }
   };
-  
+
   /// NonLocalDepResult - This is a result from a NonLocal dependence query.
   /// For each BasicBlock (the BB entry) it keeps a MemDepResult and the
   /// (potentially phi translated) address that was live in the block.
@@ -218,17 +218,17 @@ namespace llvm {
   public:
     NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address)
       : Entry(bb, result), Address(address) {}
-    
+
     // BB is the sort key, it can't be changed.
     BasicBlock *getBB() const { return Entry.getBB(); }
-    
+
     void setResult(const MemDepResult &R, Value *Addr) {
       Entry.setResult(R);
       Address = Addr;
     }
-    
+
     const MemDepResult &getResult() const { return Entry.getResult(); }
-    
+
     /// getAddress - Return the address of this pointer in this block.  This can
     /// be different than the address queried for the non-local result because
     /// of phi translation.  This returns null if the address was not available
@@ -238,7 +238,7 @@ namespace llvm {
     /// The address is always null for a non-local 'call' dependence.
     Value *getAddress() const { return Address; }
   };
-  
+
   /// MemoryDependenceAnalysis - This is an analysis that determines, for a
   /// given memory operation, what preceding memory operations it depends on.
   /// It builds on alias analysis information, and tries to provide a lazy,
@@ -297,30 +297,30 @@ namespace llvm {
     CachedNonLocalPointerInfo NonLocalPointerDeps;
 
     // A map from instructions to their non-local pointer dependencies.
-    typedef DenseMap<Instruction*, 
+    typedef DenseMap<Instruction*,
                      SmallPtrSet<ValueIsLoadPair, 4> > ReverseNonLocalPtrDepTy;
     ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps;
 
-    
+
     /// PerInstNLInfo - This is the instruction we keep for each cached access
     /// that we have for an instruction.  The pointer is an owning pointer and
     /// the bool indicates whether we have any dirty bits in the set.
     typedef std::pair<NonLocalDepInfo, bool> PerInstNLInfo;
-    
+
     // A map from instructions to their non-local dependencies.
     typedef DenseMap<Instruction*, PerInstNLInfo> NonLocalDepMapType;
-      
+
     NonLocalDepMapType NonLocalDeps;
-    
+
     // A reverse mapping from dependencies to the dependees.  This is
     // used when removing instructions to keep the cache coherent.
     typedef DenseMap<Instruction*,
                      SmallPtrSet<Instruction*, 4> > ReverseDepMapType;
     ReverseDepMapType ReverseLocalDeps;
-    
+
     // A reverse mapping from dependencies to the non-local dependees.
     ReverseDepMapType ReverseNonLocalDeps;
-    
+
     /// Current AA implementation, just a cache.
     AliasAnalysis *AA;
     DataLayout *TD;
@@ -333,15 +333,15 @@ namespace llvm {
 
     /// Pass Implementation stuff.  This doesn't do any analysis eagerly.
     bool runOnFunction(Function &);
-    
+
     /// Clean up memory in between runs
     void releaseMemory();
-    
+
     /// getAnalysisUsage - Does not modify anything.  It uses Value Numbering
     /// and Alias Analysis.
     ///
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    
+
     /// getDependency - Return the instruction on which a memory operation
     /// depends.  See the class comment for more details.  It is illegal to call
     /// this on non-memory instructions.
@@ -360,8 +360,8 @@ namespace llvm {
     /// removed.  Clients must copy this data if they want it around longer than
     /// that.
     const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS);
-    
-    
+
+
     /// getNonLocalPointerDependency - Perform a full dependency query for an
     /// access to the specified (non-volatile) memory location, returning the
     /// set of instructions that either define or clobber the value.
@@ -374,7 +374,7 @@ namespace llvm {
     /// removeInstruction - Remove an instruction from the dependence analysis,
     /// updating the dependence of instructions that previously depended on it.
     void removeInstruction(Instruction *InstToRemove);
-    
+
     /// invalidateCachedPointerInfo - This method is used to invalidate cached
     /// information about the specified pointer, because it may be too
     /// conservative in memdep.  This is an optional call that can be used when
@@ -387,20 +387,23 @@ namespace llvm {
     /// This needs to be done when the CFG changes, e.g., due to splitting
     /// critical edges.
     void invalidateCachedPredecessors();
-    
+
     /// getPointerDependencyFrom - Return the instruction on which a memory
     /// location depends.  If isLoad is true, this routine ignores may-aliases
     /// with read-only operations.  If isLoad is false, this routine ignores
-    /// may-aliases with reads from read-only locations.
+    /// may-aliases with reads from read-only locations. If possible, pass
+    /// the query instruction as well; this function may take advantage of 
+    /// the metadata annotated to the query instruction to refine the result.
     ///
     /// Note that this is an uncached query, and thus may be inefficient.
     ///
     MemDepResult getPointerDependencyFrom(const AliasAnalysis::Location &Loc,
-                                          bool isLoad, 
+                                          bool isLoad,
                                           BasicBlock::iterator ScanIt,
-                                          BasicBlock *BB);
-    
-    
+                                          BasicBlock *BB,
+                                          Instruction *QueryInst = 0);
+
+
     /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
     /// looks at a memory location for a load (specified by MemLocBase, Offs,
     /// and Size) and compares it against a load.  If the specified load could
@@ -413,7 +416,7 @@ namespace llvm {
                                                     unsigned MemLocSize,
                                                     const LoadInst *LI,
                                                     const DataLayout &TD);
-    
+
   private:
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,
@@ -430,11 +433,11 @@ namespace llvm {
                                          unsigned NumSortedEntries);
 
     void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P);
-    
+
     /// verifyRemoved - Verify that the specified instruction does not occur
     /// in our internal data structures.
     void verifyRemoved(Instruction *Inst) const;
-    
+
   };
 
 } // End llvm namespace
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 5a77fcebafa0..d7a3dd889a1b 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ANALYSIS_PHITRANSADDR_H
 #define LLVM_ANALYSIS_PHITRANSADDR_H
 
-#include "llvm/Instruction.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
 
 namespace llvm {
   class DominatorTree;
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 27726f49bcce..ae117135db93 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -198,9 +198,6 @@ namespace llvm {
   // analyze.
   FunctionPass *createInstCountPass();
 
-  // print debug info intrinsics in human readable form
-  FunctionPass *createDbgInfoPrinterPass();
-
   //===--------------------------------------------------------------------===//
   //
   // createRegionInfoPass - This pass finds all single entry single exit regions
diff --git a/include/llvm/Analysis/PathNumbering.h b/include/llvm/Analysis/PathNumbering.h
index 7025e28484cc..400a37d8293f 100644
--- a/include/llvm/Analysis/PathNumbering.h
+++ b/include/llvm/Analysis/PathNumbering.h
@@ -23,14 +23,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_PATH_NUMBERING_H
-#define LLVM_PATH_NUMBERING_H
+#ifndef LLVM_ANALYSIS_PATHNUMBERING_H
+#define LLVM_ANALYSIS_PATHNUMBERING_H
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
 #include <map>
 #include <stack>
 #include <vector>
diff --git a/include/llvm/Analysis/PathProfileInfo.h b/include/llvm/Analysis/PathProfileInfo.h
index cef6d2d2a6c8..4fce16ef0d56 100644
--- a/include/llvm/Analysis/PathProfileInfo.h
+++ b/include/llvm/Analysis/PathProfileInfo.h
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_PATHPROFILEINFO_H
-#define LLVM_PATHPROFILEINFO_H
+#ifndef LLVM_ANALYSIS_PATHPROFILEINFO_H
+#define LLVM_ANALYSIS_PATHPROFILEINFO_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/Analysis/PathNumbering.h"
+#include "llvm/IR/BasicBlock.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 0eddb9105e60..d082297454a1 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_POST_DOMINATORS_H
-#define LLVM_ANALYSIS_POST_DOMINATORS_H
+#ifndef LLVM_ANALYSIS_POSTDOMINATORS_H
+#define LLVM_ANALYSIS_POSTDOMINATORS_H
 
 #include "llvm/Analysis/Dominators.h"
 
diff --git a/include/llvm/Analysis/ProfileDataLoader.h b/include/llvm/Analysis/ProfileDataLoader.h
index 9efbafcef41c..90097f79951d 100644
--- a/include/llvm/Analysis/ProfileDataLoader.h
+++ b/include/llvm/Analysis/ProfileDataLoader.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_ANALYSIS_PROFILEDATALOADER_H
 #define LLVM_ANALYSIS_PROFILEDATALOADER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h
index 6c2e2732d344..5d17fa1220e1 100644
--- a/include/llvm/Analysis/ProfileInfo.h
+++ b/include/llvm/Analysis/ProfileInfo.h
@@ -26,9 +26,9 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <string>
 #include <map>
 #include <set>
+#include <string>
 
 namespace llvm {
   class Pass;
diff --git a/include/llvm/Analysis/ProfileInfoLoader.h b/include/llvm/Analysis/ProfileInfoLoader.h
index dcf3b38ddcd5..e0f49f3179bc 100644
--- a/include/llvm/Analysis/ProfileInfoLoader.h
+++ b/include/llvm/Analysis/ProfileInfoLoader.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_ANALYSIS_PROFILEINFOLOADER_H
 #define LLVM_ANALYSIS_PROFILEINFOLOADER_H
 
-#include <vector>
 #include <string>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
new file mode 100644
index 000000000000..1802fe88e356
--- /dev/null
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -0,0 +1,285 @@
+//===- PtrUseVisitor.h - InstVisitors over a pointers uses ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides a collection of visitors which walk the (instruction)
+/// uses of a pointer. These visitors all provide the same essential behavior
+/// as an InstVisitor with similar template-based flexibility and
+/// implementation strategies.
+///
+/// These can be used, for example, to quickly analyze the uses of an alloca,
+/// global variable, or function argument.
+///
+/// FIXME: Provide a variant which doesn't track offsets and is cheaper.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PTRUSEVISITOR_H
+#define LLVM_ANALYSIS_PTRUSEVISITOR_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+namespace detail {
+/// \brief Implementation of non-dependent functionality for \c PtrUseVisitor.
+///
+/// See \c PtrUseVisitor for the public interface and detailed comments about
+/// usage. This class is just a helper base class which is not templated and
+/// contains all common code to be shared between different instantiations of
+/// PtrUseVisitor.
+class PtrUseVisitorBase {
+public:
+  /// \brief This class provides information about the result of a visit.
+  ///
+  /// After walking all the users (recursively) of a pointer, the basic
+  /// infrastructure records some commonly useful information such as escape
+  /// analysis and whether the visit completed or aborted early.
+  class PtrInfo {
+  public:
+    PtrInfo() : AbortedInfo(0, false), EscapedInfo(0, false) {}
+
+    /// \brief Reset the pointer info, clearing all state.
+    void reset() {
+      AbortedInfo.setPointer(0);
+      AbortedInfo.setInt(false);
+      EscapedInfo.setPointer(0);
+      EscapedInfo.setInt(false);
+    }
+
+    /// \brief Did we abort the visit early?
+    bool isAborted() const { return AbortedInfo.getInt(); }
+
+    /// \brief Is the pointer escaped at some point?
+    bool isEscaped() const { return EscapedInfo.getInt(); }
+
+    /// \brief Get the instruction causing the visit to abort.
+    /// \returns a pointer to the instruction causing the abort if one is
+    /// available; otherwise returns null.
+    Instruction *getAbortingInst() const { return AbortedInfo.getPointer(); }
+
+    /// \brief Get the instruction causing the pointer to escape.
+    /// \returns a pointer to the instruction which escapes the pointer if one
+    /// is available; otherwise returns null.
+    Instruction *getEscapingInst() const { return EscapedInfo.getPointer(); }
+
+    /// \brief Mark the visit as aborted. Intended for use in a void return.
+    /// \param I The instruction which caused the visit to abort, if available.
+    void setAborted(Instruction *I = 0) {
+      AbortedInfo.setInt(true);
+      AbortedInfo.setPointer(I);
+    }
+
+    /// \brief Mark the pointer as escaped. Intended for use in a void return.
+    /// \param I The instruction which escapes the pointer, if available.
+    void setEscaped(Instruction *I = 0) {
+      EscapedInfo.setInt(true);
+      EscapedInfo.setPointer(I);
+    }
+
+    /// \brief Mark the pointer as escaped, and the visit as aborted. Intended
+    /// for use in a void return.
+    /// \param I The instruction which both escapes the pointer and aborts the
+    /// visit, if available.
+    void setEscapedAndAborted(Instruction *I = 0) {
+      setEscaped(I);
+      setAborted(I);
+    }
+
+  private:
+    PointerIntPair<Instruction *, 1, bool> AbortedInfo, EscapedInfo;
+  };
+
+protected:
+  const DataLayout &DL;
+
+  /// \name Visitation infrastructure
+  /// @{
+
+  /// \brief The info collected about the pointer being visited thus far.
+  PtrInfo PI;
+
+  /// \brief A struct of the data needed to visit a particular use.
+  ///
+  /// This is used to maintain a worklist fo to-visit uses. This is used to
+  /// make the visit be iterative rather than recursive.
+  struct UseToVisit {
+    typedef PointerIntPair<Use *, 1, bool> UseAndIsOffsetKnownPair;
+    UseAndIsOffsetKnownPair UseAndIsOffsetKnown;
+    APInt Offset;
+  };
+
+  /// \brief The worklist of to-visit uses.
+  SmallVector<UseToVisit, 8> Worklist;
+
+  /// \brief A set of visited uses to break cycles in unreachable code.
+  SmallPtrSet<Use *, 8> VisitedUses;
+
+  /// @}
+
+
+  /// \name Per-visit state
+  /// This state is reset for each instruction visited.
+  /// @{
+
+  /// \brief The use currently being visited.
+  Use *U;
+
+  /// \brief True if we have a known constant offset for the use currently
+  /// being visited.
+  bool IsOffsetKnown;
+
+  /// \brief The constant offset of the use if that is known.
+  APInt Offset;
+
+  /// @}
+
+
+  /// Note that the constructor is protected because this class must be a base
+  /// class, we can't create instances directly of this class.
+  PtrUseVisitorBase(const DataLayout &DL) : DL(DL) {}
+
+  /// \brief Enqueue the users of this instruction in the visit worklist.
+  ///
+  /// This will visit the users with the same offset of the current visit
+  /// (including an unknown offset if that is the current state).
+  void enqueueUsers(Instruction &I);
+
+  /// \brief Walk the operands of a GEP and adjust the offset as appropriate.
+  ///
+  /// This routine does the heavy lifting of the pointer walk by computing
+  /// offsets and looking through GEPs.
+  bool adjustOffsetForGEP(GetElementPtrInst &GEPI);
+};
+} // end namespace detail
+
+/// \brief A base class for visitors over the uses of a pointer value.
+///
+/// Once constructed, a user can call \c visit on a pointer value, and this
+/// will walk its uses and visit each instruction using an InstVisitor. It also
+/// provides visit methods which will recurse through any pointer-to-pointer
+/// transformations such as GEPs and bitcasts.
+///
+/// During the visit, the current Use* being visited is available to the
+/// subclass, as well as the current offset from the original base pointer if
+/// known.
+///
+/// The recursive visit of uses is accomplished with a worklist, so the only
+/// ordering guarantee is that an instruction is visited before any uses of it
+/// are visited. Note that this does *not* mean before any of its users are
+/// visited! This is because users can be visited multiple times due to
+/// multiple, different uses of pointers derived from the same base.
+///
+/// A particular Use will only be visited once, but a User may be visited
+/// multiple times, once per Use. This visits may notably have different
+/// offsets.
+///
+/// All visit methods on the underlying InstVisitor return a boolean. This
+/// return short-circuits the visit, stopping it immediately.
+///
+/// FIXME: Generalize this for all values rather than just instructions.
+template <typename DerivedT>
+class PtrUseVisitor : protected InstVisitor<DerivedT>,
+                      public detail::PtrUseVisitorBase {
+  friend class InstVisitor<DerivedT>;
+  typedef InstVisitor<DerivedT> Base;
+
+public:
+  PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {}
+
+  /// \brief Recursively visit the uses of the given pointer.
+  /// \returns An info struct about the pointer. See \c PtrInfo for details.
+  PtrInfo visitPtr(Instruction &I) {
+    // This must be a pointer type. Get an integer type suitable to hold
+    // offsets on this pointer.
+    // FIXME: Support a vector of pointers.
+    assert(I.getType()->isPointerTy());
+    IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(I.getType()));
+    IsOffsetKnown = true;
+    Offset = APInt(IntPtrTy->getBitWidth(), 0);
+    PI.reset();
+
+    // Enqueue the uses of this pointer.
+    enqueueUsers(I);
+
+    // Visit all the uses off the worklist until it is empty.
+    while (!Worklist.empty()) {
+      UseToVisit ToVisit = Worklist.pop_back_val();
+      U = ToVisit.UseAndIsOffsetKnown.getPointer();
+      IsOffsetKnown = ToVisit.UseAndIsOffsetKnown.getInt();
+      if (IsOffsetKnown)
+        Offset = llvm_move(ToVisit.Offset);
+
+      Instruction *I = cast<Instruction>(U->getUser());
+      static_cast<DerivedT*>(this)->visit(I);
+      if (PI.isAborted())
+        break;
+    }
+    return PI;
+  }
+
+protected:
+  void visitStoreInst(StoreInst &SI) {
+    if (SI.getValueOperand() == U->get())
+      PI.setEscaped(&SI);
+  }
+
+  void visitBitCastInst(BitCastInst &BC) {
+    enqueueUsers(BC);
+  }
+
+  void visitPtrToIntInst(PtrToIntInst &I) {
+    PI.setEscaped(&I);
+  }
+
+  void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+    if (GEPI.use_empty())
+      return;
+
+    // If we can't walk the GEP, clear the offset.
+    if (!adjustOffsetForGEP(GEPI)) {
+      IsOffsetKnown = false;
+      Offset = APInt();
+    }
+
+    // Enqueue the users now that the offset has been adjusted.
+    enqueueUsers(GEPI);
+  }
+
+  // No-op intrinsics which we know don't escape the pointer to to logic in
+  // some other function.
+  void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {}
+  void visitMemIntrinsic(MemIntrinsic &I) {}
+  void visitIntrinsicInst(IntrinsicInst &II) {
+    switch (II.getIntrinsicID()) {
+    default:
+      return Base::visitIntrinsicInst(II);
+
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      return; // No-op intrinsics.
+    }
+  }
+
+  // Generically, arguments to calls and invokes escape the pointer to some
+  // other function. Mark that.
+  void visitCallSite(CallSite CS) {
+    PI.setEscaped(CS.getInstruction());
+    Base::visitCallSite(CS);
+  }
+};
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 48d7ee6b5476..69cc29381136 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -24,8 +24,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_REGION_INFO_H
-#define LLVM_ANALYSIS_REGION_INFO_H
+#ifndef LLVM_ANALYSIS_REGIONINFO_H
+#define LLVM_ANALYSIS_REGIONINFO_H
 
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Analysis/DominanceFrontier.h"
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
index 7adc71ca82ac..8fd42637276e 100644
--- a/include/llvm/Analysis/RegionIterator.h
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 // This file defines the iterators to iterate over the elements of a Region.
 //===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_REGION_ITERATOR_H
-#define LLVM_ANALYSIS_REGION_ITERATOR_H
+#ifndef LLVM_ANALYSIS_REGIONITERATOR_H
+#define LLVM_ANALYSIS_REGIONITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index 68f12012bcd1..0690ac5e34a7 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -13,15 +13,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_REGION_PASS_H
-#define LLVM_REGION_PASS_H
+#ifndef LLVM_ANALYSIS_REGIONPASS_H
+#define LLVM_ANALYSIS_REGIONPASS_H
 
 #include "llvm/Analysis/RegionInfo.h"
-
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManagers.h"
-#include "llvm/Function.h"
-
 #include <deque>
 
 namespace llvm {
@@ -59,6 +57,9 @@ public:
   /// @return The pass to print the LLVM IR in the region.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   virtual bool doInitialization(Region *R, RGPassManager &RGM) { return false; }
   virtual bool doFinalization() { return false; }
   //@}
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 235adca02175..306549fba46c 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -21,16 +21,16 @@
 #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H
 #define LLVM_ANALYSIS_SCALAREVOLUTION_H
 
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/Operator.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ConstantRange.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ValueHandle.h"
 #include <map>
 
 namespace llvm {
@@ -338,6 +338,10 @@ namespace llvm {
       /// getMax - Get the max backedge taken count for the loop.
       const SCEV *getMax(ScalarEvolution *SE) const;
 
+      /// Return true if any backedge taken count expressions refer to the given
+      /// subexpression.
+      bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
+
       /// clear - Invalidate this result and free associated memory.
       void clear();
     };
@@ -831,7 +835,7 @@ namespace llvm {
 
     /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
     /// predicate Pred. Return true iff any changes were made. If the
-    /// operands are provably equal or inequal, LHS and RHS are set to
+    /// operands are provably equal or unequal, LHS and RHS are set to
     /// the same value and Pred is set to either ICMP_EQ or ICMP_NE.
     ///
     bool SimplifyICmpOperands(ICmpInst::Predicate &Pred,
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 3f8f149cb420..00779fc329b1 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -11,18 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
-#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H
+#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H
 
-#include "llvm/IRBuilder.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/ValueHandle.h"
 #include <set>
 
 namespace llvm {
-  class TargetLowering;
+  class TargetTransformInfo;
 
   /// Return true if the given expression is safe to expand in the sense that
   /// all materialized values are safe to speculate.
@@ -40,8 +40,10 @@ namespace llvm {
     // New instructions receive a name to identifies them with the current pass.
     const char* IVName;
 
-    std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
+    // InsertedExpressions caches Values for reuse, so must track RAUW.
+    std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >
       InsertedExpressions;
+    // InsertedValues only flags inserted instructions so needs no RAUW.
     std::set<AssertingVH<Value> > InsertedValues;
     std::set<AssertingVH<Value> > InsertedPostIncValues;
 
@@ -129,7 +131,7 @@ namespace llvm {
     /// representative. Return the number of phis eliminated.
     unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                  SmallVectorImpl<WeakVH> &DeadInsts,
-                                 const TargetLowering *TLI = NULL);
+                                 const TargetTransformInfo *TTI = NULL);
 
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 54db7d6bcf0d..eac91131ad53 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
-#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H
+#define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H
 
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
@@ -548,6 +548,151 @@ namespace llvm {
     SCEVTraversal<SV> T(Visitor);
     T.visitAll(Root);
   }
+
+  /// The SCEVRewriter takes a scalar evolution expression and copies all its
+  /// components. The result after a rewrite is an identical SCEV.
+  struct SCEVRewriter
+    : public SCEVVisitor<SCEVRewriter, const SCEV*> {
+  public:
+    SCEVRewriter(ScalarEvolution &S) : SE(S) {}
+
+    virtual ~SCEVRewriter() {}
+
+    virtual const SCEV *visitConstant(const SCEVConstant *Constant) {
+      return Constant;
+    }
+
+    virtual const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+      const SCEV *Operand = visit(Expr->getOperand());
+      return SE.getTruncateExpr(Operand, Expr->getType());
+    }
+
+    virtual const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+      const SCEV *Operand = visit(Expr->getOperand());
+      return SE.getZeroExtendExpr(Operand, Expr->getType());
+    }
+
+    virtual const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+      const SCEV *Operand = visit(Expr->getOperand());
+      return SE.getSignExtendExpr(Operand, Expr->getType());
+    }
+
+    virtual const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(visit(Expr->getOperand(i)));
+      return SE.getAddExpr(Operands);
+    }
+
+    virtual const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(visit(Expr->getOperand(i)));
+      return SE.getMulExpr(Operands);
+    }
+
+    virtual const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+      return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
+    }
+
+    virtual const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(visit(Expr->getOperand(i)));
+      return SE.getAddRecExpr(Operands, Expr->getLoop(),
+                              Expr->getNoWrapFlags());
+    }
+
+    virtual const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(visit(Expr->getOperand(i)));
+      return SE.getSMaxExpr(Operands);
+    }
+
+    virtual const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(visit(Expr->getOperand(i)));
+      return SE.getUMaxExpr(Operands);
+    }
+
+    virtual const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+      return Expr;
+    }
+
+    virtual const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+      return Expr;
+    }
+
+  protected:
+    ScalarEvolution &SE;
+  };
+
+  typedef DenseMap<const Value*, Value*> ValueToValueMap;
+
+  /// The SCEVParameterRewriter takes a scalar evolution expression and updates
+  /// the SCEVUnknown components following the Map (Value -> Value).
+  struct SCEVParameterRewriter: public SCEVRewriter {
+  public:
+    static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
+                               ValueToValueMap &Map) {
+      SCEVParameterRewriter Rewriter(SE, Map);
+      return Rewriter.visit(Scev);
+    }
+    SCEVParameterRewriter(ScalarEvolution &S, ValueToValueMap &M)
+      : SCEVRewriter(S), Map(M) {}
+
+    virtual const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+      Value *V = Expr->getValue();
+      if (Map.count(V))
+        return SE.getUnknown(Map[V]);
+      return Expr;
+    }
+
+  private:
+    ValueToValueMap &Map;
+  };
+
+  typedef DenseMap<const Loop*, const SCEV*> LoopToScevMapT;
+
+  /// The SCEVApplyRewriter takes a scalar evolution expression and applies
+  /// the Map (Loop -> SCEV) to all AddRecExprs.
+  struct SCEVApplyRewriter: public SCEVRewriter {
+  public:
+    static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map,
+                               ScalarEvolution &SE) {
+      SCEVApplyRewriter Rewriter(SE, Map);
+      return Rewriter.visit(Scev);
+    }
+    SCEVApplyRewriter(ScalarEvolution &S, LoopToScevMapT &M)
+      : SCEVRewriter(S), Map(M) {}
+
+    virtual const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(visit(Expr->getOperand(i)));
+
+      const Loop *L = Expr->getLoop();
+      const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags());
+
+      if (0 == Map.count(L))
+        return Res;
+
+      const SCEVAddRecExpr *Rec = (const SCEVAddRecExpr *) Res;
+      return Rec->evaluateAtIteration(Map[L], SE);
+    }
+
+  private:
+    LoopToScevMapT &Map;
+  };
+
+/// Applies the Map (Loop -> SCEV) to the given Scev.
+static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map,
+                                ScalarEvolution &SE) {
+  return SCEVApplyRewriter::rewrite(Scev, Map, SE);
+}
+
 }
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 342e5937891a..7c6423a21cfa 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -33,8 +33,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
-#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
+#define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
 
 #include "llvm/ADT/SmallPtrSet.h"
 
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index b758eca42e78..76c8ccf59c2b 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -12,13 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_SPARSE_PROPAGATION_H
-#define LLVM_ANALYSIS_SPARSE_PROPAGATION_H
+#ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H
+#define LLVM_ANALYSIS_SPARSEPROPAGATION_H
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include <vector>
 #include <set>
+#include <vector>
 
 namespace llvm {
   class Value;
@@ -203,4 +203,4 @@ private:
 
 } // end namespace llvm
 
-#endif // LLVM_ANALYSIS_SPARSE_PROPAGATION_H
+#endif // LLVM_ANALYSIS_SPARSEPROPAGATION_H
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
new file mode 100644
index 000000000000..a9d6725d86b0
--- /dev/null
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -0,0 +1,349 @@
+//===- llvm/Analysis/TargetTransformInfo.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exposes codegen information to IR-level passes. Every
+// transformation that uses codegen information is broken into three parts:
+// 1. The IR-level analysis pass.
+// 2. The IR-level transformation interface which provides the needed
+//    information.
+// 3. Codegen-level implementation which uses target-specific hooks.
+//
+// This file defines #2, which is the interface that IR-level transformations
+// use for querying the codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
+#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class GlobalValue;
+class Type;
+class User;
+class Value;
+
+/// TargetTransformInfo - This pass provides access to the codegen
+/// interfaces that are needed for IR-level transformations.
+class TargetTransformInfo {
+protected:
+  /// \brief The TTI instance one level down the stack.
+  ///
+  /// This is used to implement the default behavior all of the methods which
+  /// is to delegate up through the stack of TTIs until one can answer the
+  /// query.
+  TargetTransformInfo *PrevTTI;
+
+  /// \brief The top of the stack of TTI analyses available.
+  ///
+  /// This is a convenience routine maintained as TTI analyses become available
+  /// that complements the PrevTTI delegation chain. When one part of an
+  /// analysis pass wants to query another part of the analysis pass it can use
+  /// this to start back at the top of the stack.
+  TargetTransformInfo *TopTTI;
+
+  /// All pass subclasses must in their initializePass routine call
+  /// pushTTIStack with themselves to update the pointers tracking the previous
+  /// TTI instance in the analysis group's stack, and the top of the analysis
+  /// group's stack.
+  void pushTTIStack(Pass *P);
+
+  /// All pass subclasses must in their finalizePass routine call popTTIStack
+  /// to update the pointers tracking the previous TTI instance in the analysis
+  /// group's stack, and the top of the analysis group's stack.
+  void popTTIStack();
+
+  /// All pass subclasses must call TargetTransformInfo::getAnalysisUsage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+public:
+  /// This class is intended to be subclassed by real implementations.
+  virtual ~TargetTransformInfo() = 0;
+
+  /// \name Generic Target Information
+  /// @{
+
+  /// \brief Underlying constants for 'cost' values in this interface.
+  ///
+  /// Many APIs in this interface return a cost. This enum defines the
+  /// fundamental values that should be used to interpret (and produce) those
+  /// costs. The costs are returned as an unsigned rather than a member of this
+  /// enumeration because it is expected that the cost of one IR instruction
+  /// may have a multiplicative factor to it or otherwise won't fit directly
+  /// into the enum. Moreover, it is common to sum or average costs which works
+  /// better as simple integral values. Thus this enum only provides constants.
+  ///
+  /// Note that these costs should usually reflect the intersection of code-size
+  /// cost and execution cost. A free instruction is typically one that folds
+  /// into another instruction. For example, reg-to-reg moves can often be
+  /// skipped by renaming the registers in the CPU, but they still are encoded
+  /// and thus wouldn't be considered 'free' here.
+  enum TargetCostConstants {
+    TCC_Free = 0,       ///< Expected to fold away in lowering.
+    TCC_Basic = 1,      ///< The cost of a typical 'add' instruction.
+    TCC_Expensive = 4   ///< The cost of a 'div' instruction on x86.
+  };
+
+  /// \brief Estimate the cost of a specific operation when lowered.
+  ///
+  /// Note that this is designed to work on an arbitrary synthetic opcode, and
+  /// thus work for hypothetical queries before an instruction has even been
+  /// formed. However, this does *not* work for GEPs, and must not be called
+  /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
+  /// analyzing a GEP's cost required more information.
+  ///
+  /// Typically only the result type is required, and the operand type can be
+  /// omitted. However, if the opcode is one of the cast instructions, the
+  /// operand type is required.
+  ///
+  /// The returned cost is defined in terms of \c TargetCostConstants, see its
+  /// comments for a detailed explanation of the cost values.
+  virtual unsigned getOperationCost(unsigned Opcode, Type *Ty,
+                                    Type *OpTy = 0) const;
+
+  /// \brief Estimate the cost of a GEP operation when lowered.
+  ///
+  /// The contract for this function is the same as \c getOperationCost except
+  /// that it supports an interface that provides extra information specific to
+  /// the GEP operation.
+  virtual unsigned getGEPCost(const Value *Ptr,
+                              ArrayRef<const Value *> Operands) const;
+
+  /// \brief Estimate the cost of a function call when lowered.
+  ///
+  /// The contract for this is the same as \c getOperationCost except that it
+  /// supports an interface that provides extra information specific to call
+  /// instructions.
+  ///
+  /// This is the most basic query for estimating call cost: it only knows the
+  /// function type and (potentially) the number of arguments at the call site.
+  /// The latter is only interesting for varargs function types.
+  virtual unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
+
+  /// \brief Estimate the cost of calling a specific function when lowered.
+  ///
+  /// This overload adds the ability to reason about the particular function
+  /// being called in the event it is a library call with special lowering.
+  virtual unsigned getCallCost(const Function *F, int NumArgs = -1) const;
+
+  /// \brief Estimate the cost of calling a specific function when lowered.
+  ///
+  /// This overload allows specifying a set of candidate argument values.
+  virtual unsigned getCallCost(const Function *F,
+                               ArrayRef<const Value *> Arguments) const;
+
+  /// \brief Estimate the cost of an intrinsic when lowered.
+  ///
+  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
+  virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                                    ArrayRef<Type *> ParamTys) const;
+
+  /// \brief Estimate the cost of an intrinsic when lowered.
+  ///
+  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
+  virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                                    ArrayRef<const Value *> Arguments) const;
+
+  /// \brief Estimate the cost of a given IR user when lowered.
+  ///
+  /// This can estimate the cost of either a ConstantExpr or Instruction when
+  /// lowered. It has two primary advantages over the \c getOperationCost and
+  /// \c getGEPCost above, and one significant disadvantage: it can only be
+  /// used when the IR construct has already been formed.
+  ///
+  /// The advantages are that it can inspect the SSA use graph to reason more
+  /// accurately about the cost. For example, all-constant-GEPs can often be
+  /// folded into a load or other instruction, but if they are used in some
+  /// other context they may not be folded. This routine can distinguish such
+  /// cases.
+  ///
+  /// The returned cost is defined in terms of \c TargetCostConstants, see its
+  /// comments for a detailed explanation of the cost values.
+  virtual unsigned getUserCost(const User *U) const;
+
+  /// \brief Test whether calls to a function lower to actual program function
+  /// calls.
+  ///
+  /// The idea is to test whether the program is likely to require a 'call'
+  /// instruction or equivalent in order to call the given function.
+  ///
+  /// FIXME: It's not clear that this is a good or useful query API. Client's
+  /// should probably move to simpler cost metrics using the above.
+  /// Alternatively, we could split the cost interface into distinct code-size
+  /// and execution-speed costs. This would allow modelling the core of this
+  /// query more accurately as the a call is a single small instruction, but
+  /// incurs significant execution cost.
+  virtual bool isLoweredToCall(const Function *F) const;
+
+  /// @}
+
+  /// \name Scalar Target Information
+  /// @{
+
+  /// \brief Flags indicating the kind of support for population count.
+  ///
+  /// Compared to the SW implementation, HW support is supposed to
+  /// significantly boost the performance when the population is dense, and it
+  /// may or may not degrade performance if the population is sparse. A HW
+  /// support is considered as "Fast" if it can outperform, or is on a par
+  /// with, SW implementation when the population is sparse; otherwise, it is
+  /// considered as "Slow".
+  enum PopcntSupportKind {
+    PSK_Software,
+    PSK_SlowHardware,
+    PSK_FastHardware
+  };
+
+  /// isLegalAddImmediate - Return true if the specified immediate is legal
+  /// add immediate, that is the target has add instructions which can add
+  /// a register with the immediate without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalAddImmediate(int64_t Imm) const;
+
+  /// isLegalICmpImmediate - Return true if the specified immediate is legal
+  /// icmp immediate, that is the target has icmp instructions which can compare
+  /// a register against the immediate without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+  /// isLegalAddressingMode - Return true if the addressing mode represented by
+  /// AM is legal for this target, for a load/store of the specified type.
+  /// The type may be VoidTy, in which case only return true if the addressing
+  /// mode is legal for a load/store of any legal type.
+  /// TODO: Handle pre/postinc as well.
+  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                     int64_t BaseOffset, bool HasBaseReg,
+                                     int64_t Scale) const;
+
+  /// isTruncateFree - Return true if it's free to truncate a value of
+  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+  /// register EAX to i16 by referencing its sub-register AX.
+  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+
+  /// Is this type legal.
+  virtual bool isTypeLegal(Type *Ty) const;
+
+  /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes
+  virtual unsigned getJumpBufAlignment() const;
+
+  /// getJumpBufSize - returns the target's jmp_buf size in bytes.
+  virtual unsigned getJumpBufSize() const;
+
+  /// shouldBuildLookupTables - Return true if switches should be turned into
+  /// lookup tables for the target.
+  virtual bool shouldBuildLookupTables() const;
+
+  /// getPopcntSupport - Return hardware support for population count.
+  virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+  /// getIntImmCost - Return the expected cost of materializing the given
+  /// integer immediate of the specified type.
+  virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+
+  /// @}
+
+  /// \name Vector Target Information
+  /// @{
+
+  /// \brief The various kinds of shuffle patterns for vector queries.
+  enum ShuffleKind {
+    SK_Broadcast,       ///< Broadcast element 0 to all other elements.
+    SK_Reverse,         ///< Reverse the order of the vector.
+    SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
+    SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
+  };
+
+  /// \brief Additonal information about an operand's possible values.
+  enum OperandValueKind {
+    OK_AnyValue,            // Operand can have any value.
+    OK_UniformValue,        // Operand is uniform (splat of a value).
+    OK_UniformConstantValue // Operand is uniform constant.
+  };
+
+  /// \return The number of scalar or vector registers that the target has.
+  /// If 'Vectors' is true, it returns the number of vector registers. If it is
+  /// set to false, it returns the number of scalar registers.
+  virtual unsigned getNumberOfRegisters(bool Vector) const;
+
+  /// \return The width of the largest scalar or vector register type.
+  virtual unsigned getRegisterBitWidth(bool Vector) const;
+
+  /// \return The maximum unroll factor that the vectorizer should try to
+  /// perform for this target. This number depends on the level of parallelism
+  /// and the number of execution units in the CPU.
+  virtual unsigned getMaximumUnrollFactor() const;
+
+  /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                  OperandValueKind Opd1Info = OK_AnyValue,
+                                  OperandValueKind Opd2Info = OK_AnyValue) const;
+
+  /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
+  /// The index and subtype parameters are used by the subvector insertion and
+  /// extraction shuffle kinds.
+  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
+                                  Type *SubTp = 0) const;
+
+  /// \return The expected cost of cast instructions, such as bitcast, trunc,
+  /// zext, etc.
+  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const;
+
+  /// \return The expected cost of control-flow related instructions such as
+  /// Phi, Ret, Br.
+  virtual unsigned getCFInstrCost(unsigned Opcode) const;
+
+  /// \returns The expected cost of compare and select instructions.
+  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy = 0) const;
+
+  /// \return The expected cost of vector Insert and Extract.
+  /// Use -1 to indicate that there is no information on the index value.
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index = -1) const;
+
+  /// \return The cost of Load and Store instructions.
+  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const;
+
+  /// \returns The cost of Intrinsic instructions.
+  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                         ArrayRef<Type *> Tys) const;
+
+  /// \returns The number of pieces into which the provided type must be
+  /// split during legalization. Zero is returned when the answer is unknown.
+  virtual unsigned getNumberOfParts(Type *Tp) const;
+
+  /// \returns The cost of the address computation. For most targets this can be
+  /// merged into the instruction indexing mode. Some targets might want to
+  /// distinguish between address computation for memory operations on vector
+  /// types and scalar types. Such targets should override this function.
+  virtual unsigned getAddressComputationCost(Type *Ty) const;
+
+  /// @}
+
+  /// Analysis group identification.
+  static char ID;
+};
+
+/// \brief Create the base case instance of a pass in the TTI analysis group.
+///
+/// This class provides the base case for the stack of TTI analyzes. It doesn't
+/// delegate to anything and uses the STTI and VTTI objects passed in to
+/// satisfy the queries.
+ImmutablePass *createNoTargetTransformInfoPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h
index 99651e192d3b..bedd654c6521 100644
--- a/include/llvm/Analysis/Trace.h
+++ b/include/llvm/Analysis/Trace.h
@@ -18,8 +18,8 @@
 #ifndef LLVM_ANALYSIS_TRACE_H
 #define LLVM_ANALYSIS_TRACE_H
 
-#include <vector>
 #include <cassert>
+#include <vector>
 
 namespace llvm {
   class BasicBlock;
@@ -116,4 +116,4 @@ public:
 
 } // end namespace llvm
 
-#endif // TRACE_H
+#endif // LLVM_ANALYSIS_TRACE_H
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index a85752446bb0..3775ec9f07aa 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -45,13 +45,12 @@ namespace llvm {
   void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
                       const DataLayout *TD = 0, unsigned Depth = 0);
 
-  /// isPowerOfTwo - Return true if the given value is known to have exactly one
-  /// bit set when defined. For vectors return true if every element is known to
-  /// be a power of two when defined.  Supports values with integer or pointer
-  /// type and vectors of integers.  If 'OrZero' is set then returns true if the
-  /// given value is either a power of two or zero.
-  bool isPowerOfTwo(Value *V, const DataLayout *TD = 0, bool OrZero = false,
-                    unsigned Depth = 0);
+  /// isKnownToBeAPowerOfTwo - Return true if the given value is known to have
+  /// exactly one bit set when defined. For vectors return true if every
+  /// element is known to be a power of two when defined.  Supports values with
+  /// integer or pointer type and vectors of integers.  If 'OrZero' is set then
+  /// returns true if the given value is either a power of two or zero.
+  bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero = false, unsigned Depth = 0);
 
   /// isKnownNonZero - Return true if the given value is known to be non-zero
   /// when defined.  For vectors return true if every element is known to be
@@ -118,10 +117,10 @@ namespace llvm {
   /// it can be expressed as a base pointer plus a constant offset.  Return the
   /// base and offset to the caller.
   Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                          const DataLayout &TD);
+                                          const DataLayout *TD);
   static inline const Value *
   GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
-                                   const DataLayout &TD) {
+                                   const DataLayout *TD) {
     return GetPointerBaseWithConstantOffset(const_cast<Value*>(Ptr), Offset,TD);
   }
   
@@ -184,6 +183,11 @@ namespace llvm {
   bool isSafeToSpeculativelyExecute(const Value *V,
                                     const DataLayout *TD = 0);
 
+  /// isKnownNonNull - Return true if this pointer couldn't possibly be null by
+  /// its definition.  This returns true for allocas, non-extern-weak globals
+  /// and byval arguments.
+  bool isKnownNonNull(const Value *V);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h
deleted file mode 100644
index b1c22185191d..000000000000
--- a/include/llvm/Argument.h
+++ /dev/null
@@ -1,91 +0,0 @@
-//===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Argument class. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ARGUMENT_H
-#define LLVM_ARGUMENT_H
-
-#include "llvm/Value.h"
-#include "llvm/Attributes.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/Twine.h"
-
-namespace llvm {
-
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
-
-/// A class to represent an incoming formal argument to a Function. An argument
-/// is a very simple Value. It is essentially a named (optional) type. When used
-/// in the body of a function, it represents the value of the actual argument
-/// the function was called with.
-/// @brief LLVM Argument representation  
-class Argument : public Value, public ilist_node<Argument> {
-  virtual void anchor();
-  Function *Parent;
-
-  friend class SymbolTableListTraits<Argument, Function>;
-  void setParent(Function *parent);
-
-public:
-  /// Argument ctor - If Function argument is specified, this argument is
-  /// inserted at the end of the argument list for the function.
-  ///
-  explicit Argument(Type *Ty, const Twine &Name = "", Function *F = 0);
-
-  inline const Function *getParent() const { return Parent; }
-  inline       Function *getParent()       { return Parent; }
-
-  /// getArgNo - Return the index of this formal argument in its containing
-  /// function.  For example in "void foo(int a, float b)" a is 0 and b is 1. 
-  unsigned getArgNo() const;
-  
-  /// hasByValAttr - Return true if this argument has the byval attribute on it
-  /// in its containing function.
-  bool hasByValAttr() const;
-  
-  /// getParamAlignment - If this is a byval argument, return its alignment.
-  unsigned getParamAlignment() const;
-
-  /// hasNestAttr - Return true if this argument has the nest attribute on
-  /// it in its containing function.
-  bool hasNestAttr() const;
-
-  /// hasNoAliasAttr - Return true if this argument has the noalias attribute on
-  /// it in its containing function.
-  bool hasNoAliasAttr() const;
-  
-  /// hasNoCaptureAttr - Return true if this argument has the nocapture
-  /// attribute on it in its containing function.
-  bool hasNoCaptureAttr() const;
-  
-  /// hasStructRetAttr - Return true if this argument has the sret attribute on
-  /// it in its containing function.
-  bool hasStructRetAttr() const;
-
-  /// addAttr - Add a Attribute to an argument
-  void addAttr(Attributes);
-  
-  /// removeAttr - Remove a Attribute from an argument
-  void removeAttr(Attributes);
-
-  /// classof - Methods for support type inquiry through isa, cast, and
-  /// dyn_cast:
-  ///
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == ArgumentVal;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Assembly/PrintModulePass.h b/include/llvm/Assembly/PrintModulePass.h
index 239fbcc0c8ca..02b9bd9be505 100644
--- a/include/llvm/Assembly/PrintModulePass.h
+++ b/include/llvm/Assembly/PrintModulePass.h
@@ -23,6 +23,7 @@
 namespace llvm {
   class FunctionPass;
   class ModulePass;
+  class BasicBlockPass;
   class raw_ostream;
   
   /// createPrintModulePass - Create and return a pass that writes the
@@ -37,6 +38,11 @@ namespace llvm {
                                         raw_ostream *OS, 
                                         bool DeleteStream=false);  
 
+  /// createPrintBasicBlockPass - Create and return a pass that writes the
+  /// BB to the specified raw_ostream.
+  BasicBlockPass *createPrintBasicBlockPass(raw_ostream *OS,
+                                            bool DeleteStream=false,
+                                            const std::string &Banner = "");
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
deleted file mode 100644
index a9c2d743ff4a..000000000000
--- a/include/llvm/Attributes.h
+++ /dev/null
@@ -1,431 +0,0 @@
-//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the simple types necessary to represent the
-// attributes associated with functions and their calls.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ATTRIBUTES_H
-#define LLVM_ATTRIBUTES_H
-
-#include "llvm/Support/MathExtras.h"
-#include "llvm/ADT/ArrayRef.h"
-#include <cassert>
-#include <string>
-
-namespace llvm {
-
-class AttrBuilder;
-class AttributesImpl;
-class LLVMContext;
-class Type;
-
-/// Attributes - A bitset of attributes.
-class Attributes {
-public:
-  /// Function parameters and results can have attributes to indicate how they
-  /// should be treated by optimizations and code generation. This enumeration
-  /// lists the attributes that can be associated with parameters, function
-  /// results or the function itself.
-  ///
-  /// Note that uwtable is about the ABI or the user mandating an entry in the
-  /// unwind table. The nounwind attribute is about an exception passing by the
-  /// function.
-  ///
-  /// In a theoretical system that uses tables for profiling and sjlj for
-  /// exceptions, they would be fully independent. In a normal system that uses
-  /// tables for both, the semantics are:
-  ///
-  /// nil                = Needs an entry because an exception might pass by.
-  /// nounwind           = No need for an entry
-  /// uwtable            = Needs an entry because the ABI says so and because
-  ///                      an exception might pass by.
-  /// uwtable + nounwind = Needs an entry because the ABI says so.
-
-  enum AttrVal {
-    // IR-Level Attributes
-    None,                  ///< No attributes have been set
-    AddressSafety,         ///< Address safety checking is on.
-    Alignment,             ///< Alignment of parameter (5 bits)
-                           ///< stored as log2 of alignment with +1 bias
-                           ///< 0 means unaligned different from align 1
-    AlwaysInline,          ///< inline=always
-    ByVal,                 ///< Pass structure by value
-    InlineHint,            ///< Source said inlining was desirable
-    InReg,                 ///< Force argument to be passed in register
-    MinSize,               ///< Function must be optimized for size first
-    Naked,                 ///< Naked function
-    Nest,                  ///< Nested function static chain
-    NoAlias,               ///< Considered to not alias after call
-    NoCapture,             ///< Function creates no aliases of pointer
-    NoImplicitFloat,       ///< Disable implicit floating point insts
-    NoInline,              ///< inline=never
-    NonLazyBind,           ///< Function is called early and/or
-                           ///< often, so lazy binding isn't worthwhile
-    NoRedZone,             ///< Disable redzone
-    NoReturn,              ///< Mark the function as not returning
-    NoUnwind,              ///< Function doesn't unwind stack
-    OptimizeForSize,       ///< opt_size
-    ReadNone,              ///< Function does not access memory
-    ReadOnly,              ///< Function only reads from memory
-    ReturnsTwice,          ///< Function can return twice
-    SExt,                  ///< Sign extended before/after call
-    StackAlignment,        ///< Alignment of stack for function (3 bits)
-                           ///< stored as log2 of alignment with +1 bias 0
-                           ///< means unaligned (different from
-                           ///< alignstack={1))
-    StackProtect,          ///< Stack protection.
-    StackProtectReq,       ///< Stack protection required.
-    StructRet,             ///< Hidden pointer to structure to return
-    UWTable,               ///< Function must be in a unwind table
-    ZExt                   ///< Zero extended before/after call
-  };
-private:
-  AttributesImpl *Attrs;
-  Attributes(AttributesImpl *A) : Attrs(A) {}
-public:
-  Attributes() : Attrs(0) {}
-  Attributes(const Attributes &A) : Attrs(A.Attrs) {}
-  Attributes &operator=(const Attributes &A) {
-    Attrs = A.Attrs;
-    return *this;
-  }
-
-  /// get - Return a uniquified Attributes object. This takes the uniquified
-  /// value from the Builder and wraps it in the Attributes class.
-  static Attributes get(LLVMContext &Context, ArrayRef<AttrVal> Vals);
-  static Attributes get(LLVMContext &Context, AttrBuilder &B);
-
-  /// @brief Return true if the attribute is present.
-  bool hasAttribute(AttrVal Val) const;
-
-  /// @brief Return true if attributes exist
-  bool hasAttributes() const;
-
-  /// @brief Return true if the attributes are a non-null intersection.
-  bool hasAttributes(const Attributes &A) const;
-
-  /// @brief Returns the alignment field of an attribute as a byte alignment
-  /// value.
-  unsigned getAlignment() const;
-
-  /// @brief Returns the stack alignment field of an attribute as a byte
-  /// alignment value.
-  unsigned getStackAlignment() const;
-
-  /// @brief Parameter attributes that do not apply to vararg call arguments.
-  bool hasIncompatibleWithVarArgsAttrs() const {
-    return hasAttribute(Attributes::StructRet);
-  }
-
-  /// @brief Attributes that only apply to function parameters.
-  bool hasParameterOnlyAttrs() const {
-    return hasAttribute(Attributes::ByVal) ||
-      hasAttribute(Attributes::Nest) ||
-      hasAttribute(Attributes::StructRet) ||
-      hasAttribute(Attributes::NoCapture);
-  }
-
-  /// @brief Attributes that may be applied to the function itself.  These cannot
-  /// be used on return values or function parameters.
-  bool hasFunctionOnlyAttrs() const {
-    return hasAttribute(Attributes::NoReturn) ||
-      hasAttribute(Attributes::NoUnwind) ||
-      hasAttribute(Attributes::ReadNone) ||
-      hasAttribute(Attributes::ReadOnly) ||
-      hasAttribute(Attributes::NoInline) ||
-      hasAttribute(Attributes::AlwaysInline) ||
-      hasAttribute(Attributes::OptimizeForSize) ||
-      hasAttribute(Attributes::StackProtect) ||
-      hasAttribute(Attributes::StackProtectReq) ||
-      hasAttribute(Attributes::NoRedZone) ||
-      hasAttribute(Attributes::NoImplicitFloat) ||
-      hasAttribute(Attributes::Naked) ||
-      hasAttribute(Attributes::InlineHint) ||
-      hasAttribute(Attributes::StackAlignment) ||
-      hasAttribute(Attributes::UWTable) ||
-      hasAttribute(Attributes::NonLazyBind) ||
-      hasAttribute(Attributes::ReturnsTwice) ||
-      hasAttribute(Attributes::AddressSafety) ||
-      hasAttribute(Attributes::MinSize);
-  }
-
-  bool operator==(const Attributes &A) const {
-    return Attrs == A.Attrs;
-  }
-  bool operator!=(const Attributes &A) const {
-    return Attrs != A.Attrs;
-  }
-
-  uint64_t Raw() const;
-
-  /// @brief Which attributes cannot be applied to a type.
-  static Attributes typeIncompatible(Type *Ty);
-
-  /// encodeLLVMAttributesForBitcode - This returns an integer containing an
-  /// encoding of all the LLVM attributes found in the given attribute bitset.
-  /// Any change to this encoding is a breaking change to bitcode compatibility.
-  static uint64_t encodeLLVMAttributesForBitcode(Attributes Attrs);
-
-  /// decodeLLVMAttributesForBitcode - This returns an attribute bitset
-  /// containing the LLVM attributes that have been decoded from the given
-  /// integer.  This function must stay in sync with
-  /// 'encodeLLVMAttributesForBitcode'.
-  static Attributes decodeLLVMAttributesForBitcode(LLVMContext &C,
-                                                   uint64_t EncodedAttrs);
-
-  /// getAsString - The set of Attributes set in Attributes is converted to a
-  /// string of equivalent mnemonics. This is, presumably, for writing out the
-  /// mnemonics for the assembly writer.
-  /// @brief Convert attribute bits to text
-  std::string getAsString() const;
-};
-
-//===----------------------------------------------------------------------===//
-/// AttrBuilder - This class is used in conjunction with the Attributes::get
-/// method to create an Attributes object. The object itself is uniquified. The
-/// Builder's value, however, is not. So this can be used as a quick way to test
-/// for equality, presence of attributes, etc.
-class AttrBuilder {
-  uint64_t Bits;
-public:
-  AttrBuilder() : Bits(0) {}
-  explicit AttrBuilder(uint64_t B) : Bits(B) {}
-  AttrBuilder(const Attributes &A) : Bits(A.Raw()) {}
-  AttrBuilder(const AttrBuilder &B) : Bits(B.Bits) {}
-
-  void clear() { Bits = 0; }
-
-  /// addAttribute - Add an attribute to the builder.
-  AttrBuilder &addAttribute(Attributes::AttrVal Val);
-
-  /// removeAttribute - Remove an attribute from the builder.
-  AttrBuilder &removeAttribute(Attributes::AttrVal Val);
-
-  /// addAttribute - Add the attributes from A to the builder.
-  AttrBuilder &addAttributes(const Attributes &A);
-
-  /// removeAttribute - Remove the attributes from A from the builder.
-  AttrBuilder &removeAttributes(const Attributes &A);
-
-  /// hasAttribute - Return true if the builder has the specified attribute.
-  bool hasAttribute(Attributes::AttrVal A) const;
-
-  /// hasAttributes - Return true if the builder has IR-level attributes.
-  bool hasAttributes() const;
-
-  /// hasAttributes - Return true if the builder has any attribute that's in the
-  /// specified attribute.
-  bool hasAttributes(const Attributes &A) const;
-
-  /// hasAlignmentAttr - Return true if the builder has an alignment attribute.
-  bool hasAlignmentAttr() const;
-
-  /// getAlignment - Retrieve the alignment attribute, if it exists.
-  uint64_t getAlignment() const;
-
-  /// getStackAlignment - Retrieve the stack alignment attribute, if it exists.
-  uint64_t getStackAlignment() const;
-
-  /// addAlignmentAttr - This turns an int alignment (which must be a power of
-  /// 2) into the form used internally in Attributes.
-  AttrBuilder &addAlignmentAttr(unsigned Align);
-
-  /// addStackAlignmentAttr - This turns an int stack alignment (which must be a
-  /// power of 2) into the form used internally in Attributes.
-  AttrBuilder &addStackAlignmentAttr(unsigned Align);
-
-  /// addRawValue - Add the raw value to the internal representation.
-  /// N.B. This should be used ONLY for decoding LLVM bitcode!
-  AttrBuilder &addRawValue(uint64_t Val);
-
-  /// @brief Remove attributes that are used on functions only.
-  void removeFunctionOnlyAttrs() {
-    removeAttribute(Attributes::NoReturn)
-      .removeAttribute(Attributes::NoUnwind)
-      .removeAttribute(Attributes::ReadNone)
-      .removeAttribute(Attributes::ReadOnly)
-      .removeAttribute(Attributes::NoInline)
-      .removeAttribute(Attributes::AlwaysInline)
-      .removeAttribute(Attributes::OptimizeForSize)
-      .removeAttribute(Attributes::StackProtect)
-      .removeAttribute(Attributes::StackProtectReq)
-      .removeAttribute(Attributes::NoRedZone)
-      .removeAttribute(Attributes::NoImplicitFloat)
-      .removeAttribute(Attributes::Naked)
-      .removeAttribute(Attributes::InlineHint)
-      .removeAttribute(Attributes::StackAlignment)
-      .removeAttribute(Attributes::UWTable)
-      .removeAttribute(Attributes::NonLazyBind)
-      .removeAttribute(Attributes::ReturnsTwice)
-      .removeAttribute(Attributes::AddressSafety)
-      .removeAttribute(Attributes::MinSize);
-  }
-
-  uint64_t Raw() const { return Bits; }
-
-  bool operator==(const AttrBuilder &B) {
-    return Bits == B.Bits;
-  }
-  bool operator!=(const AttrBuilder &B) {
-    return Bits != B.Bits;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// AttributeWithIndex
-//===----------------------------------------------------------------------===//
-
-/// AttributeWithIndex - This is just a pair of values to associate a set of
-/// attributes with an index.
-struct AttributeWithIndex {
-  Attributes Attrs;  ///< The attributes that are set, or'd together.
-  unsigned Index;    ///< Index of the parameter for which the attributes apply.
-                     ///< Index 0 is used for return value attributes.
-                     ///< Index ~0U is used for function attributes.
-
-  static AttributeWithIndex get(LLVMContext &C, unsigned Idx,
-                                ArrayRef<Attributes::AttrVal> Attrs) {
-    return get(Idx, Attributes::get(C, Attrs));
-  }
-  static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
-    AttributeWithIndex P;
-    P.Index = Idx;
-    P.Attrs = Attrs;
-    return P;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// AttrListPtr Smart Pointer
-//===----------------------------------------------------------------------===//
-
-class AttributeListImpl;
-
-/// AttrListPtr - This class manages the ref count for the opaque
-/// AttributeListImpl object and provides accessors for it.
-class AttrListPtr {
-public:
-  enum AttrIndex {
-    ReturnIndex = 0U,
-    FunctionIndex = ~0U
-  };
-private:
-  /// @brief The attributes that we are managing.  This can be null to represent
-  /// the empty attributes list.
-  AttributeListImpl *AttrList;
-
-  /// @brief The attributes for the specified index are returned.  Attributes
-  /// for the result are denoted with Idx = 0.
-  Attributes getAttributes(unsigned Idx) const;
-
-  explicit AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {}
-public:
-  AttrListPtr() : AttrList(0) {}
-  AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {}
-  const AttrListPtr &operator=(const AttrListPtr &RHS);
-
-  //===--------------------------------------------------------------------===//
-  // Attribute List Construction and Mutation
-  //===--------------------------------------------------------------------===//
-
-  /// get - Return a Attributes list with the specified parameters in it.
-  static AttrListPtr get(LLVMContext &C, ArrayRef<AttributeWithIndex> Attrs);
-
-  /// addAttr - Add the specified attribute at the specified index to this
-  /// attribute list.  Since attribute lists are immutable, this
-  /// returns the new list.
-  AttrListPtr addAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
-
-  /// removeAttr - Remove the specified attribute at the specified index from
-  /// this attribute list.  Since attribute lists are immutable, this
-  /// returns the new list.
-  AttrListPtr removeAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
-
-  //===--------------------------------------------------------------------===//
-  // Attribute List Accessors
-  //===--------------------------------------------------------------------===//
-  /// getParamAttributes - The attributes for the specified index are
-  /// returned.
-  Attributes getParamAttributes(unsigned Idx) const {
-    return getAttributes(Idx);
-  }
-
-  /// getRetAttributes - The attributes for the ret value are
-  /// returned.
-  Attributes getRetAttributes() const {
-    return getAttributes(ReturnIndex);
-  }
-
-  /// getFnAttributes - The function attributes are returned.
-  Attributes getFnAttributes() const {
-    return getAttributes(FunctionIndex);
-  }
-
-  /// paramHasAttr - Return true if the specified parameter index has the
-  /// specified attribute set.
-  bool paramHasAttr(unsigned Idx, Attributes Attr) const {
-    return getAttributes(Idx).hasAttributes(Attr);
-  }
-
-  /// getParamAlignment - Return the alignment for the specified function
-  /// parameter.
-  unsigned getParamAlignment(unsigned Idx) const {
-    return getAttributes(Idx).getAlignment();
-  }
-
-  /// hasAttrSomewhere - Return true if the specified attribute is set for at
-  /// least one parameter or for the return value.
-  bool hasAttrSomewhere(Attributes::AttrVal Attr) const;
-
-  unsigned getNumAttrs() const;
-  Attributes &getAttributesAtIndex(unsigned i) const;
-
-  /// operator==/!= - Provide equality predicates.
-  bool operator==(const AttrListPtr &RHS) const
-  { return AttrList == RHS.AttrList; }
-  bool operator!=(const AttrListPtr &RHS) const
-  { return AttrList != RHS.AttrList; }
-
-  //===--------------------------------------------------------------------===//
-  // Attribute List Introspection
-  //===--------------------------------------------------------------------===//
-
-  /// getRawPointer - Return a raw pointer that uniquely identifies this
-  /// attribute list.
-  void *getRawPointer() const {
-    return AttrList;
-  }
-
-  // Attributes are stored as a dense set of slots, where there is one
-  // slot for each argument that has an attribute.  This allows walking over the
-  // dense set instead of walking the sparse list of attributes.
-
-  /// isEmpty - Return true if there are no attributes.
-  ///
-  bool isEmpty() const {
-    return AttrList == 0;
-  }
-
-  /// getNumSlots - Return the number of slots used in this attribute list.
-  /// This is the number of arguments that have an attribute set on them
-  /// (including the function itself).
-  unsigned getNumSlots() const;
-
-  /// getSlot - Return the AttributeWithIndex at the specified slot.  This
-  /// holds a index number plus a set of attributes.
-  const AttributeWithIndex &getSlot(unsigned Slot) const;
-
-  void dump() const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
deleted file mode 100644
index 02c2a96b6c64..000000000000
--- a/include/llvm/BasicBlock.h
+++ /dev/null
@@ -1,290 +0,0 @@
-//===-- llvm/BasicBlock.h - Represent a basic block in the VM ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the BasicBlock class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_BASICBLOCK_H
-#define LLVM_BASICBLOCK_H
-
-#include "llvm/Instruction.h"
-#include "llvm/SymbolTableListTraits.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class LandingPadInst;
-class TerminatorInst;
-class LLVMContext;
-class BlockAddress;
-
-template<> struct ilist_traits<Instruction>
-  : public SymbolTableListTraits<Instruction, BasicBlock> {
-  // createSentinel is used to get hold of a node that marks the end of
-  // the list...
-  // The sentinel is relative to this instance, so we use a non-static
-  // method.
-  Instruction *createSentinel() const {
-    // since i(p)lists always publicly derive from the corresponding
-    // traits, placing a data member in this class will augment i(p)list.
-    // But since the NodeTy is expected to publicly derive from
-    // ilist_node<NodeTy>, there is a legal viable downcast from it
-    // to NodeTy. We use this trick to superpose i(p)list with a "ghostly"
-    // NodeTy, which becomes the sentinel. Dereferencing the sentinel is
-    // forbidden (save the ilist_node<NodeTy>) so no one will ever notice
-    // the superposition.
-    return static_cast<Instruction*>(&Sentinel);
-  }
-  static void destroySentinel(Instruction*) {}
-
-  Instruction *provideInitialHead() const { return createSentinel(); }
-  Instruction *ensureHead(Instruction*) const { return createSentinel(); }
-  static void noteHead(Instruction*, Instruction*) {}
-private:
-  mutable ilist_half_node<Instruction> Sentinel;
-};
-
-/// This represents a single basic block in LLVM. A basic block is simply a
-/// container of instructions that execute sequentially. Basic blocks are Values
-/// because they are referenced by instructions such as branches and switch
-/// tables. The type of a BasicBlock is "Type::LabelTy" because the basic block
-/// represents a label to which a branch can jump.
-///
-/// A well formed basic block is formed of a list of non-terminating
-/// instructions followed by a single TerminatorInst instruction.
-/// TerminatorInst's may not occur in the middle of basic blocks, and must
-/// terminate the blocks. The BasicBlock class allows malformed basic blocks to
-/// occur because it may be useful in the intermediate stage of constructing or
-/// modifying a program. However, the verifier will ensure that basic blocks
-/// are "well formed".
-/// @brief LLVM Basic Block Representation
-class BasicBlock : public Value, // Basic blocks are data objects also
-                   public ilist_node<BasicBlock> {
-  friend class BlockAddress;
-public:
-  typedef iplist<Instruction> InstListType;
-private:
-  InstListType InstList;
-  Function *Parent;
-
-  void setParent(Function *parent);
-  friend class SymbolTableListTraits<BasicBlock, Function>;
-
-  BasicBlock(const BasicBlock &) LLVM_DELETED_FUNCTION;
-  void operator=(const BasicBlock &) LLVM_DELETED_FUNCTION;
-
-  /// BasicBlock ctor - If the function parameter is specified, the basic block
-  /// is automatically inserted at either the end of the function (if
-  /// InsertBefore is null), or before the specified basic block.
-  ///
-  explicit BasicBlock(LLVMContext &C, const Twine &Name = "",
-                      Function *Parent = 0, BasicBlock *InsertBefore = 0);
-public:
-  /// getContext - Get the context in which this basic block lives.
-  LLVMContext &getContext() const;
-
-  /// Instruction iterators...
-  typedef InstListType::iterator                              iterator;
-  typedef InstListType::const_iterator                  const_iterator;
-
-  /// Create - Creates a new BasicBlock. If the Parent parameter is specified,
-  /// the basic block is automatically inserted at either the end of the
-  /// function (if InsertBefore is 0), or before the specified basic block.
-  static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "",
-                            Function *Parent = 0,BasicBlock *InsertBefore = 0) {
-    return new BasicBlock(Context, Name, Parent, InsertBefore);
-  }
-  ~BasicBlock();
-
-  /// getParent - Return the enclosing method, or null if none
-  ///
-  const Function *getParent() const { return Parent; }
-        Function *getParent()       { return Parent; }
-
-  /// getTerminator() - If this is a well formed basic block, then this returns
-  /// a pointer to the terminator instruction.  If it is not, then you get a
-  /// null pointer back.
-  ///
-  TerminatorInst *getTerminator();
-  const TerminatorInst *getTerminator() const;
-
-  /// Returns a pointer to the first instructon in this block that is not a
-  /// PHINode instruction. When adding instruction to the beginning of the
-  /// basic block, they should be added before the returned value, not before
-  /// the first instruction, which might be PHI.
-  /// Returns 0 is there's no non-PHI instruction.
-  Instruction* getFirstNonPHI();
-  const Instruction* getFirstNonPHI() const {
-    return const_cast<BasicBlock*>(this)->getFirstNonPHI();
-  }
-
-  // Same as above, but also skip debug intrinsics.
-  Instruction* getFirstNonPHIOrDbg();
-  const Instruction* getFirstNonPHIOrDbg() const {
-    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbg();
-  }
-
-  // Same as above, but also skip lifetime intrinsics.
-  Instruction* getFirstNonPHIOrDbgOrLifetime();
-  const Instruction* getFirstNonPHIOrDbgOrLifetime() const {
-    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbgOrLifetime();
-  }
-
-  /// getFirstInsertionPt - Returns an iterator to the first instruction in this
-  /// block that is suitable for inserting a non-PHI instruction. In particular,
-  /// it skips all PHIs and LandingPad instructions.
-  iterator getFirstInsertionPt();
-  const_iterator getFirstInsertionPt() const {
-    return const_cast<BasicBlock*>(this)->getFirstInsertionPt();
-  }
-
-  /// removeFromParent - This method unlinks 'this' from the containing
-  /// function, but does not delete it.
-  ///
-  void removeFromParent();
-
-  /// eraseFromParent - This method unlinks 'this' from the containing function
-  /// and deletes it.
-  ///
-  void eraseFromParent();
-
-  /// moveBefore - Unlink this basic block from its current function and
-  /// insert it into the function that MovePos lives in, right before MovePos.
-  void moveBefore(BasicBlock *MovePos);
-
-  /// moveAfter - Unlink this basic block from its current function and
-  /// insert it into the function that MovePos lives in, right after MovePos.
-  void moveAfter(BasicBlock *MovePos);
-
-
-  /// getSinglePredecessor - If this basic block has a single predecessor block,
-  /// return the block, otherwise return a null pointer.
-  BasicBlock *getSinglePredecessor();
-  const BasicBlock *getSinglePredecessor() const {
-    return const_cast<BasicBlock*>(this)->getSinglePredecessor();
-  }
-
-  /// getUniquePredecessor - If this basic block has a unique predecessor block,
-  /// return the block, otherwise return a null pointer.
-  /// Note that unique predecessor doesn't mean single edge, there can be
-  /// multiple edges from the unique predecessor to this block (for example
-  /// a switch statement with multiple cases having the same destination).
-  BasicBlock *getUniquePredecessor();
-  const BasicBlock *getUniquePredecessor() const {
-    return const_cast<BasicBlock*>(this)->getUniquePredecessor();
-  }
-
-  //===--------------------------------------------------------------------===//
-  /// Instruction iterator methods
-  ///
-  inline iterator                begin()       { return InstList.begin(); }
-  inline const_iterator          begin() const { return InstList.begin(); }
-  inline iterator                end  ()       { return InstList.end();   }
-  inline const_iterator          end  () const { return InstList.end();   }
-
-  inline size_t                   size() const { return InstList.size();  }
-  inline bool                    empty() const { return InstList.empty(); }
-  inline const Instruction      &front() const { return InstList.front(); }
-  inline       Instruction      &front()       { return InstList.front(); }
-  inline const Instruction       &back() const { return InstList.back();  }
-  inline       Instruction       &back()       { return InstList.back();  }
-
-  /// getInstList() - Return the underlying instruction list container.  You
-  /// need to access it directly if you want to modify it currently.
-  ///
-  const InstListType &getInstList() const { return InstList; }
-        InstListType &getInstList()       { return InstList; }
-
-  /// getSublistAccess() - returns pointer to member of instruction list
-  static iplist<Instruction> BasicBlock::*getSublistAccess(Instruction*) {
-    return &BasicBlock::InstList;
-  }
-
-  /// getValueSymbolTable() - returns pointer to symbol table (if any)
-  ValueSymbolTable *getValueSymbolTable();
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == Value::BasicBlockVal;
-  }
-
-  /// dropAllReferences() - This function causes all the subinstructions to "let
-  /// go" of all references that they are maintaining.  This allows one to
-  /// 'delete' a whole class at a time, even though there may be circular
-  /// references... first all references are dropped, and all use counts go to
-  /// zero.  Then everything is delete'd for real.  Note that no operations are
-  /// valid on an object that has "dropped all references", except operator
-  /// delete.
-  ///
-  void dropAllReferences();
-
-  /// removePredecessor - This method is used to notify a BasicBlock that the
-  /// specified Predecessor of the block is no longer able to reach it.  This is
-  /// actually not used to update the Predecessor list, but is actually used to
-  /// update the PHI nodes that reside in the block.  Note that this should be
-  /// called while the predecessor still refers to this block.
-  ///
-  void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
-
-  /// splitBasicBlock - This splits a basic block into two at the specified
-  /// instruction.  Note that all instructions BEFORE the specified iterator
-  /// stay as part of the original basic block, an unconditional branch is added
-  /// to the original BB, and the rest of the instructions in the BB are moved
-  /// to the new BB, including the old terminator.  The newly formed BasicBlock
-  /// is returned.  This function invalidates the specified iterator.
-  ///
-  /// Note that this only works on well formed basic blocks (must have a
-  /// terminator), and 'I' must not be the end of instruction list (which would
-  /// cause a degenerate basic block to be formed, having a terminator inside of
-  /// the basic block).
-  ///
-  /// Also note that this doesn't preserve any passes. To split blocks while
-  /// keeping loop information consistent, use the SplitBlock utility function.
-  ///
-  BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
-
-  /// hasAddressTaken - returns true if there are any uses of this basic block
-  /// other than direct branches, switches, etc. to it.
-  bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
-
-  /// replaceSuccessorsPhiUsesWith - Update all phi nodes in all our successors
-  /// to refer to basic block New instead of to us.
-  void replaceSuccessorsPhiUsesWith(BasicBlock *New);
-
-  /// isLandingPad - Return true if this basic block is a landing pad. I.e.,
-  /// it's the destination of the 'unwind' edge of an invoke instruction.
-  bool isLandingPad() const;
-
-  /// getLandingPadInst() - Return the landingpad instruction associated with
-  /// the landing pad.
-  LandingPadInst *getLandingPadInst();
-  const LandingPadInst *getLandingPadInst() const;
-
-private:
-  /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
-  /// objects using it.  This is almost always 0, sometimes one, possibly but
-  /// almost never 2, and inconceivably 3 or more.
-  void AdjustBlockAddressRefCount(int Amt) {
-    setValueSubclassData(getSubclassDataFromValue()+Amt);
-    assert((int)(signed char)getSubclassDataFromValue() >= 0 &&
-           "Refcount wrap-around");
-  }
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // any future subclasses cannot accidentally use it.
-  void setValueSubclassData(unsigned short D) {
-    Value::setValueSubclassData(D);
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 4fd4b5d90a9e..7b30c7e458fa 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -50,10 +50,10 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
       SVR4SymbolTableFlag = 1,     ///< Member is a SVR4 symbol table
       BSD4SymbolTableFlag = 2,     ///< Member is a BSD4 symbol table
       LLVMSymbolTableFlag = 4,     ///< Member is an LLVM symbol table
-      BitcodeFlag = 8,             ///< Member is bitcode
-      HasPathFlag = 16,            ///< Member has a full or partial path
+      BitcodeFlag         = 8,     ///< Member is bitcode
+      HasPathFlag         = 16,    ///< Member has a full or partial path
       HasLongFilenameFlag = 32,    ///< Member uses the long filename syntax
-      StringTableFlag = 64         ///< Member is an ar(1) format string table
+      StringTableFlag     = 64     ///< Member is an ar(1) format string table
     };
 
   /// @}
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index 28e1ab1c8711..b510daf33147 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -26,8 +26,8 @@
 namespace llvm {
 namespace bitc {
   enum StandardWidths {
-    BlockIDWidth = 8,  // We use VBR-8 for block IDs.
-    CodeLenWidth = 4,  // Codelen are VBR-4.
+    BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
+    CodeLenWidth   = 4,  // Codelen are VBR-4.
     BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
   };
 
@@ -69,10 +69,11 @@ namespace bitc {
   enum BlockInfoCodes {
     // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
     // block, instead of the BlockInfo block.
-    
-    BLOCKINFO_CODE_SETBID = 1,       // SETBID: [blockid#]
-    BLOCKINFO_CODE_BLOCKNAME = 2,    // BLOCKNAME: [name]
-    BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: [id, name]
+
+    BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
+    BLOCKINFO_CODE_BLOCKNAME     = 2, // BLOCKNAME: [name]
+    BLOCKINFO_CODE_SETRECORDNAME = 3  // BLOCKINFO_CODE_SETRECORDNAME:
+                                      //                             [id, name]
   };
 
 } // End bitc namespace
@@ -99,7 +100,7 @@ public:
   explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
     : Val(Data), IsLiteral(false), Enc(E) {}
 
-  bool isLiteral() const { return IsLiteral; }
+  bool isLiteral() const  { return IsLiteral; }
   bool isEncoding() const { return !IsLiteral; }
 
   // Accessors for literals.
@@ -138,18 +139,18 @@ public:
     if (C >= 'a' && C <= 'z') return C-'a';
     if (C >= 'A' && C <= 'Z') return C-'A'+26;
     if (C >= '0' && C <= '9') return C-'0'+26+26;
-    if (C == '.') return 62;
-    if (C == '_') return 63;
+    if (C == '.')             return 62;
+    if (C == '_')             return 63;
     llvm_unreachable("Not a value Char6 character!");
   }
 
   static char DecodeChar6(unsigned V) {
     assert((V & ~63) == 0 && "Not a Char6 encoded character!");
-    if (V < 26) return V+'a';
-    if (V < 26+26) return V-26+'A';
+    if (V < 26)       return V+'a';
+    if (V < 26+26)    return V-26+'A';
     if (V < 26+26+10) return V-26-26+'0';
-    if (V == 62) return '.';
-    if (V == 63) return '_';
+    if (V == 62)      return '.';
+    if (V == 63)      return '_';
     llvm_unreachable("Not a value Char6 character!");
   }
 
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 840f57e7526d..f3139739cd18 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef BITSTREAM_READER_H
-#define BITSTREAM_READER_H
+#ifndef LLVM_BITCODE_BITSTREAMREADER_H
+#define LLVM_BITCODE_BITSTREAMREADER_H
 
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/BitCodes.h"
@@ -27,6 +27,11 @@ namespace llvm {
 
   class Deserializer;
 
+/// BitstreamReader - This class is used to read from an LLVM bitcode stream,
+/// maintaining information that is global to decoding the entire file.  While
+/// a file is being read, multiple cursors can be independently advanced or
+/// skipped around within the file.  These are represented by the
+/// BitstreamCursor class.
 class BitstreamReader {
 public:
   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
@@ -35,12 +40,12 @@ public:
     unsigned BlockID;
     std::vector<BitCodeAbbrev*> Abbrevs;
     std::string Name;
-    
+
     std::vector<std::pair<unsigned, std::string> > RecordNames;
   };
 private:
   OwningPtr<StreamableMemoryObject> BitcodeBytes;
-  
+
   std::vector<BlockInfo> BlockInfoRecords;
 
   /// IgnoreBlockInfoNames - This is set to true if we don't care about the
@@ -86,7 +91,7 @@ public:
   /// name information.
   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
-  
+
   //===--------------------------------------------------------------------===//
   // Block Manipulation
   //===--------------------------------------------------------------------===//
@@ -95,7 +100,7 @@ public:
   /// block info block for this Bitstream.  We only process it for the first
   /// cursor that walks over it.
   bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
-  
+
   /// getBlockInfo - If there is block info for the specified ID, return it,
   /// otherwise return null.
   const BlockInfo *getBlockInfo(unsigned BlockID) const {
@@ -119,113 +124,114 @@ public:
     BlockInfoRecords.back().BlockID = BlockID;
     return BlockInfoRecords.back();
   }
+};
+
+
+/// BitstreamEntry - When advancing through a bitstream cursor, each advance can
+/// discover a few different kinds of entries:
+///   Error    - Malformed bitcode was found.
+///   EndBlock - We've reached the end of the current block, (or the end of the
+///              file, which is treated like a series of EndBlock records.
+///   SubBlock - This is the start of a new subblock of a specific ID.
+///   Record   - This is a record with a specific AbbrevID.
+///
+struct BitstreamEntry {
+  enum {
+    Error,
+    EndBlock,
+    SubBlock,
+    Record
+  } Kind;
+
+  unsigned ID;
 
+  static BitstreamEntry getError() {
+    BitstreamEntry E; E.Kind = Error; return E;
+  }
+  static BitstreamEntry getEndBlock() {
+    BitstreamEntry E; E.Kind = EndBlock; return E;
+  }
+  static BitstreamEntry getSubBlock(unsigned ID) {
+    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
+  }
+  static BitstreamEntry getRecord(unsigned AbbrevID) {
+    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
+  }
 };
 
+/// BitstreamCursor - This represents a position within a bitcode file.  There
+/// may be multiple independent cursors reading within one bitstream, each
+/// maintaining their own local state.
+///
+/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
+/// be passed by value.
 class BitstreamCursor {
   friend class Deserializer;
   BitstreamReader *BitStream;
   size_t NextChar;
-  
-  /// CurWord - This is the current data we have pulled from the stream but have
-  /// not returned to the client.
-  uint32_t CurWord;
-  
+
+
+  /// CurWord/word_t - This is the current data we have pulled from the stream
+  /// but have not returned to the client.  This is specifically and
+  /// intentionally defined to follow the word size of the host machine for
+  /// efficiency.  We use word_t in places that are aware of this to make it
+  /// perfectly explicit what is going on.
+  typedef uint32_t word_t;
+  word_t CurWord;
+
   /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
-  /// is always from [0...31] inclusive.
+  /// is always from [0...31/63] inclusive (depending on word size).
   unsigned BitsInCurWord;
-  
+
   // CurCodeSize - This is the declared size of code values used for the current
   // block, in bits.
   unsigned CurCodeSize;
-  
+
   /// CurAbbrevs - Abbrevs installed at in this block.
   std::vector<BitCodeAbbrev*> CurAbbrevs;
-  
+
   struct Block {
     unsigned PrevCodeSize;
     std::vector<BitCodeAbbrev*> PrevAbbrevs;
     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
   };
-  
+
   /// BlockScope - This tracks the codesize of parent blocks.
   SmallVector<Block, 8> BlockScope;
-  
+
+
 public:
   BitstreamCursor() : BitStream(0), NextChar(0) {
   }
   BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
     operator=(RHS);
   }
-  
+
   explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
     NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
   }
-  
+
   void init(BitstreamReader &R) {
     freeState();
-    
+
     BitStream = &R;
     NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
   }
-  
+
   ~BitstreamCursor() {
     freeState();
   }
-  
-  void operator=(const BitstreamCursor &RHS) {
-    freeState();
-    
-    BitStream = RHS.BitStream;
-    NextChar = RHS.NextChar;
-    CurWord = RHS.CurWord;
-    BitsInCurWord = RHS.BitsInCurWord;
-    CurCodeSize = RHS.CurCodeSize;
-    
-    // Copy abbreviations, and bump ref counts.
-    CurAbbrevs = RHS.CurAbbrevs;
-    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
-         i != e; ++i)
-      CurAbbrevs[i]->addRef();
-    
-    // Copy block scope and bump ref counts.
-    BlockScope = RHS.BlockScope;
-    for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
-         S != e; ++S) {
-      std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
-      for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
-           i != e; ++i)
-        Abbrevs[i]->addRef();
-    }
-  }
-  
-  void freeState() {
-    // Free all the Abbrevs.
-    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
-         i != e; ++i)
-      CurAbbrevs[i]->dropRef();
-    CurAbbrevs.clear();
-    
-    // Free all the Abbrevs in the block scope.
-    for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
-         S != e; ++S) {
-      std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
-      for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
-           i != e; ++i)
-        Abbrevs[i]->dropRef();
-    }
-    BlockScope.clear();
-  }
-  
-  /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
-  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
-  
+
+  void operator=(const BitstreamCursor &RHS);
+
+  void freeState();
+
   bool isEndPos(size_t pos) {
     return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
   }
@@ -236,61 +242,113 @@ public:
         static_cast<uint64_t>(pos - 1));
   }
 
-  unsigned char getByte(size_t pos) {
-    uint8_t byte = -1;
-    BitStream->getBitcodeBytes().readByte(pos, &byte);
-    return byte;
-  }
-
   uint32_t getWord(size_t pos) {
-    uint8_t buf[sizeof(uint32_t)];
-    memset(buf, 0xFF, sizeof(buf));
-    BitStream->getBitcodeBytes().readBytes(pos,
-                                           sizeof(buf),
-                                           buf,
-                                           NULL);
+    uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
+    BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL);
     return *reinterpret_cast<support::ulittle32_t *>(buf);
   }
 
   bool AtEndOfStream() {
-    return isEndPos(NextChar) && BitsInCurWord == 0;
+    return BitsInCurWord == 0 && isEndPos(NextChar);
   }
-  
+
+  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
+
   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
   uint64_t GetCurrentBitNo() const {
     return NextChar*CHAR_BIT - BitsInCurWord;
   }
-  
+
   BitstreamReader *getBitStreamReader() {
     return BitStream;
   }
   const BitstreamReader *getBitStreamReader() const {
     return BitStream;
   }
-  
-  
+
+  /// Flags that modify the behavior of advance().
+  enum {
+    /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
+    /// not automatically pop the block scope when the end of a block is
+    /// reached.
+    AF_DontPopBlockAtEnd = 1,
+
+    /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
+    /// returned just like normal records.
+    AF_DontAutoprocessAbbrevs = 2
+  };
+
+  /// advance - Advance the current bitstream, returning the next entry in the
+  /// stream.
+  BitstreamEntry advance(unsigned Flags = 0) {
+    while (1) {
+      unsigned Code = ReadCode();
+      if (Code == bitc::END_BLOCK) {
+        // Pop the end of the block unless Flags tells us not to.
+        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
+          return BitstreamEntry::getError();
+        return BitstreamEntry::getEndBlock();
+      }
+
+      if (Code == bitc::ENTER_SUBBLOCK)
+        return BitstreamEntry::getSubBlock(ReadSubBlockID());
+
+      if (Code == bitc::DEFINE_ABBREV &&
+          !(Flags & AF_DontAutoprocessAbbrevs)) {
+        // We read and accumulate abbrev's, the client can't do anything with
+        // them anyway.
+        ReadAbbrevRecord();
+        continue;
+      }
+
+      return BitstreamEntry::getRecord(Code);
+    }
+  }
+
+  /// advanceSkippingSubblocks - This is a convenience function for clients that
+  /// don't expect any subblocks.  This just skips over them automatically.
+  BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
+    while (1) {
+      // If we found a normal entry, return it.
+      BitstreamEntry Entry = advance(Flags);
+      if (Entry.Kind != BitstreamEntry::SubBlock)
+        return Entry;
+
+      // If we found a sub-block, just skip over it and check the next entry.
+      if (SkipBlock())
+        return BitstreamEntry::getError();
+    }
+  }
+
   /// JumpToBit - Reset the stream to the specified bit number.
   void JumpToBit(uint64_t BitNo) {
-    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
-    uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
+    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
+    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
     assert(canSkipToPos(ByteNo) && "Invalid location");
-    
+
     // Move the cursor to the right word.
     NextChar = ByteNo;
     BitsInCurWord = 0;
     CurWord = 0;
-    
+
     // Skip over any bits that are already consumed.
-    if (WordBitNo)
-      Read(static_cast<unsigned>(WordBitNo));
+    if (WordBitNo) {
+      if (sizeof(word_t) > 4)
+        Read64(WordBitNo);
+      else
+        Read(WordBitNo);
+    }
   }
-  
-  
+
+
   uint32_t Read(unsigned NumBits) {
-    assert(NumBits <= 32 && "Cannot return more than 32 bits!");
+    assert(NumBits && NumBits <= 32 &&
+           "Cannot return zero or more than 32 bits!");
+
     // If the field is fully contained by CurWord, return it quickly.
     if (BitsInCurWord >= NumBits) {
-      uint32_t R = CurWord & ((1U << NumBits)-1);
+      uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
       CurWord >>= NumBits;
       BitsInCurWord -= NumBits;
       return R;
@@ -303,24 +361,37 @@ public:
       return 0;
     }
 
-    unsigned R = CurWord;
+    uint32_t R = uint32_t(CurWord);
 
     // Read the next word from the stream.
-    CurWord = getWord(NextChar);
-    NextChar += 4;
+    uint8_t Array[sizeof(word_t)] = {0};
+
+    BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array),
+                                           Array, NULL);
+
+    // Handle big-endian byte-swapping if necessary.
+    support::detail::packed_endian_specific_integral
+      <word_t, support::little, support::unaligned> EndianValue;
+    memcpy(&EndianValue, Array, sizeof(Array));
+
+    CurWord = EndianValue;
+
+    NextChar += sizeof(word_t);
 
     // Extract NumBits-BitsInCurWord from what we just read.
     unsigned BitsLeft = NumBits-BitsInCurWord;
 
-    // Be careful here, BitsLeft is in the range [1..32] inclusive.
-    R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord;
+    // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
+    R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
+                    << BitsInCurWord);
 
-    // BitsLeft bits have just been used up from CurWord.
-    if (BitsLeft != 32)
+    // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
+    // range [1..32]/[1..64] so be careful how we shift.
+    if (BitsLeft != sizeof(word_t)*8)
       CurWord >>= BitsLeft;
     else
       CurWord = 0;
-    BitsInCurWord = 32-BitsLeft;
+    BitsInCurWord = sizeof(word_t)*8-BitsLeft;
     return R;
   }
 
@@ -369,10 +440,21 @@ public:
     }
   }
 
-  void SkipToWord() {
+private:
+  void SkipToFourByteBoundary() {
+    // If word_t is 64-bits and if we've read less than 32 bits, just dump
+    // the bits we have up to the next 32-bit boundary.
+    if (sizeof(word_t) > 4 &&
+        BitsInCurWord >= 32) {
+      CurWord >>= BitsInCurWord-32;
+      BitsInCurWord = 32;
+      return;
+    }
+
     BitsInCurWord = 0;
     CurWord = 0;
   }
+public:
 
   unsigned ReadCode() {
     return Read(CurCodeSize);
@@ -395,62 +477,37 @@ public:
     // Read and ignore the codelen value.  Since we are skipping this block, we
     // don't care what code widths are used inside of it.
     ReadVBR(bitc::CodeLenWidth);
-    SkipToWord();
-    unsigned NumWords = Read(bitc::BlockSizeWidth);
+    SkipToFourByteBoundary();
+    unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
 
     // Check that the block wasn't partially defined, and that the offset isn't
     // bogus.
-    size_t SkipTo = NextChar + NumWords*4;
-    if (AtEndOfStream() || !canSkipToPos(SkipTo))
+    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
       return true;
 
-    NextChar = SkipTo;
+    JumpToBit(SkipTo);
     return false;
   }
 
   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
   /// the block, and return true if the block has an error.
-  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) {
-    // Save the current block's state on BlockScope.
-    BlockScope.push_back(Block(CurCodeSize));
-    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
-
-    // Add the abbrevs specific to this block to the CurAbbrevs list.
-    if (const BitstreamReader::BlockInfo *Info =
-          BitStream->getBlockInfo(BlockID)) {
-      for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
-           i != e; ++i) {
-        CurAbbrevs.push_back(Info->Abbrevs[i]);
-        CurAbbrevs.back()->addRef();
-      }
-    }
-
-    // Get the codesize of this block.
-    CurCodeSize = ReadVBR(bitc::CodeLenWidth);
-    SkipToWord();
-    unsigned NumWords = Read(bitc::BlockSizeWidth);
-    if (NumWordsP) *NumWordsP = NumWords;
-
-    // Validate that this block is sane.
-    if (CurCodeSize == 0 || AtEndOfStream())
-      return true;
-
-    return false;
-  }
+  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
 
   bool ReadBlockEnd() {
     if (BlockScope.empty()) return true;
 
     // Block tail:
     //    [END_BLOCK, <align4bytes>]
-    SkipToWord();
+    SkipToFourByteBoundary();
 
-    PopBlockScope();
+    popBlockScope();
     return false;
   }
 
 private:
-  void PopBlockScope() {
+
+  void popBlockScope() {
     CurCodeSize = BlockScope.back().PrevCodeSize;
 
     // Delete abbrevs from popped scope.
@@ -462,207 +519,40 @@ private:
     BlockScope.pop_back();
   }
 
- //===--------------------------------------------------------------------===//
+  //===--------------------------------------------------------------------===//
   // Record Processing
   //===--------------------------------------------------------------------===//
 
 private:
-  void ReadAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
-                              SmallVectorImpl<uint64_t> &Vals) {
-    assert(Op.isLiteral() && "Not a literal");
-    // If the abbrev specifies the literal value to use, use it.
-    Vals.push_back(Op.getLiteralValue());
-  }
-  
-  void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
-                            SmallVectorImpl<uint64_t> &Vals) {
-    assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
-
-    // Decode the value as we are commanded.
-    switch (Op.getEncoding()) {
-    default: llvm_unreachable("Unknown encoding!");
-    case BitCodeAbbrevOp::Fixed:
-      Vals.push_back(Read((unsigned)Op.getEncodingData()));
-      break;
-    case BitCodeAbbrevOp::VBR:
-      Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
-      break;
-    case BitCodeAbbrevOp::Char6:
-      Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
-      break;
-    }
-  }
+  void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
+                              SmallVectorImpl<uint64_t> &Vals);
+  void readAbbreviatedField(const BitCodeAbbrevOp &Op,
+                            SmallVectorImpl<uint64_t> &Vals);
+  void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
+
 public:
 
-  /// getAbbrev - Return the abbreviation for the specified AbbrevId. 
+  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
     unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
     return CurAbbrevs[AbbrevNo];
   }
-  
-  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
-                      const char **BlobStart = 0, unsigned *BlobLen = 0) {
-    if (AbbrevID == bitc::UNABBREV_RECORD) {
-      unsigned Code = ReadVBR(6);
-      unsigned NumElts = ReadVBR(6);
-      for (unsigned i = 0; i != NumElts; ++i)
-        Vals.push_back(ReadVBR64(6));
-      return Code;
-    }
 
-    const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
-
-    for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
-      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
-      if (Op.isLiteral()) {
-        ReadAbbreviatedLiteral(Op, Vals); 
-      } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
-        // Array case.  Read the number of elements as a vbr6.
-        unsigned NumElts = ReadVBR(6);
-
-        // Get the element encoding.
-        assert(i+2 == e && "array op not second to last?");
-        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
-
-        // Read all the elements.
-        for (; NumElts; --NumElts)
-          ReadAbbreviatedField(EltEnc, Vals);
-      } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
-        // Blob case.  Read the number of bytes as a vbr6.
-        unsigned NumElts = ReadVBR(6);
-        SkipToWord();  // 32-bit alignment
-
-        // Figure out where the end of this blob will be including tail padding.
-        size_t NewEnd = NextChar+((NumElts+3)&~3);
-        
-        // If this would read off the end of the bitcode file, just set the
-        // record to empty and return.
-        if (!canSkipToPos(NewEnd)) {
-          Vals.append(NumElts, 0);
-          NextChar = BitStream->getBitcodeBytes().getExtent();
-          break;
-        }
-        
-        // Otherwise, read the number of bytes.  If we can return a reference to
-        // the data, do so to avoid copying it.
-        if (BlobStart) {
-          *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer(
-              NextChar, NumElts);
-          *BlobLen = NumElts;
-        } else {
-          for (; NumElts; ++NextChar, --NumElts)
-            Vals.push_back(getByte(NextChar));
-        }
-        // Skip over tail padding.
-        NextChar = NewEnd;
-      } else {
-        ReadAbbreviatedField(Op, Vals);
-      }
-    }
-
-    unsigned Code = (unsigned)Vals[0];
-    Vals.erase(Vals.begin());
-    return Code;
-  }
+  /// skipRecord - Read the current record and discard it.
+  void skipRecord(unsigned AbbrevID);
 
-  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
-                      const char *&BlobStart, unsigned &BlobLen) {
-    return ReadRecord(AbbrevID, Vals, &BlobStart, &BlobLen);
-  }
+  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
+                      StringRef *Blob = 0);
 
-  
   //===--------------------------------------------------------------------===//
   // Abbrev Processing
   //===--------------------------------------------------------------------===//
+  void ReadAbbrevRecord();
 
-  void ReadAbbrevRecord() {
-    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
-    unsigned NumOpInfo = ReadVBR(5);
-    for (unsigned i = 0; i != NumOpInfo; ++i) {
-      bool IsLiteral = Read(1) ? true : false;
-      if (IsLiteral) {
-        Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
-        continue;
-      }
-
-      BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
-      if (BitCodeAbbrevOp::hasEncodingData(E))
-        Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5)));
-      else
-        Abbv->Add(BitCodeAbbrevOp(E));
-    }
-    CurAbbrevs.push_back(Abbv);
-  }
-  
-public:
-
-  bool ReadBlockInfoBlock() {
-    // If this is the second stream to get to the block info block, skip it.
-    if (BitStream->hasBlockInfoRecords())
-      return SkipBlock();
-    
-    if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
-
-    SmallVector<uint64_t, 64> Record;
-    BitstreamReader::BlockInfo *CurBlockInfo = 0;
-
-    // Read all the records for this module.
-    while (1) {
-      unsigned Code = ReadCode();
-      if (Code == bitc::END_BLOCK)
-        return ReadBlockEnd();
-      if (Code == bitc::ENTER_SUBBLOCK) {
-        ReadSubBlockID();
-        if (SkipBlock()) return true;
-        continue;
-      }
-
-      // Read abbrev records, associate them with CurBID.
-      if (Code == bitc::DEFINE_ABBREV) {
-        if (!CurBlockInfo) return true;
-        ReadAbbrevRecord();
-
-        // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
-        // appropriate BlockInfo.
-        BitCodeAbbrev *Abbv = CurAbbrevs.back();
-        CurAbbrevs.pop_back();
-        CurBlockInfo->Abbrevs.push_back(Abbv);
-        continue;
-      }
-
-      // Read a record.
-      Record.clear();
-      switch (ReadRecord(Code, Record)) {
-      default: break;  // Default behavior, ignore unknown content.
-      case bitc::BLOCKINFO_CODE_SETBID:
-        if (Record.size() < 1) return true;
-        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
-        break;
-      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
-        if (!CurBlockInfo) return true;
-        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
-        std::string Name;
-        for (unsigned i = 0, e = Record.size(); i != e; ++i)
-          Name += (char)Record[i];
-        CurBlockInfo->Name = Name;
-        break;
-      }
-      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
-        if (!CurBlockInfo) return true;
-        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
-        std::string Name;
-        for (unsigned i = 1, e = Record.size(); i != e; ++i)
-          Name += (char)Record[i];
-        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
-                                                           Name));
-        break;
-      }
-      }
-    }
-  }
+  bool ReadBlockInfoBlock();
 };
-  
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index dea118f98ed2..a837211875f5 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef BITSTREAM_WRITER_H
-#define BITSTREAM_WRITER_H
+#ifndef LLVM_BITCODE_BITSTREAMWRITER_H
+#define LLVM_BITCODE_BITSTREAMWRITER_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include <vector>
 
@@ -273,7 +273,7 @@ public:
 
 private:
   /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
-  /// record.  This is a no-op, since the abbrev specifies the literal to use. 
+  /// record.  This is a no-op, since the abbrev specifies the literal to use.
   template<typename uintty>
   void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
     assert(Op.isLiteral() && "Not a literal");
@@ -282,13 +282,13 @@ private:
     assert(V == Op.getLiteralValue() &&
            "Invalid abbrev for record!");
   }
-  
+
   /// EmitAbbreviatedField - Emit a single scalar field value with the specified
   /// encoding.
   template<typename uintty>
   void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
     assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
-    
+
     // Encode the value as we are commanded.
     switch (Op.getEncoding()) {
     default: llvm_unreachable("Unknown encoding!");
@@ -305,7 +305,7 @@ private:
       break;
     }
   }
-  
+
   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
   /// emission code.  If BlobData is non-null, then it specifies an array of
   /// data that should be emitted as part of the Blob or Array operand that is
@@ -341,11 +341,11 @@ private:
                  "Blob data and record entries specified for array!");
           // Emit a vbr6 to indicate the number of elements present.
           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
-          
+
           // Emit each field.
           for (unsigned i = 0; i != BlobLen; ++i)
             EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
-          
+
           // Know that blob data is consumed for assertion below.
           BlobData = 0;
         } else {
@@ -359,7 +359,7 @@ private:
       } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
         // If this record has blob data, emit it, otherwise we must have record
         // entries to encode this way.
-        
+
         // Emit a vbr6 to indicate the number of elements present.
         if (BlobData) {
           EmitVBR(static_cast<uint32_t>(BlobLen), 6);
@@ -368,7 +368,7 @@ private:
         } else {
           EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
         }
-        
+
         // Flush to a 32-bit alignment boundary.
         FlushToWord();
 
@@ -376,7 +376,7 @@ private:
         if (BlobData) {
           for (unsigned i = 0; i != BlobLen; ++i)
             WriteByte((unsigned char)BlobData[i]);
-          
+
           // Know that blob data is consumed for assertion below.
           BlobData = 0;
         } else {
@@ -399,7 +399,7 @@ private:
     assert(BlobData == 0 &&
            "Blob data specified for record that doesn't use it!");
   }
-  
+
 public:
 
   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
@@ -420,10 +420,10 @@ public:
 
     // Insert the code into Vals to treat it uniformly.
     Vals.insert(Vals.begin(), Code);
-    
+
     EmitRecordWithAbbrev(Abbrev, Vals);
   }
-  
+
   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
   /// Unlike EmitRecord, the code for the record should be included in Vals as
   /// the first entry.
@@ -431,7 +431,7 @@ public:
   void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
     EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
   }
-  
+
   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
   /// abbrev that includes a blob at the end.  The blob data to emit is
   /// specified by the pointer and length specified at the end.  In contrast to
@@ -458,10 +458,10 @@ public:
   template<typename uintty>
   void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
                           const char *ArrayData, unsigned ArrayLen) {
-    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
                                                             ArrayLen));
   }
-  
+
   //===--------------------------------------------------------------------===//
   // Abbrev Emission
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index c1dc190304c2..f9690d5b779c 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -29,18 +29,17 @@ namespace bitc {
 
     // Module sub-block id's.
     PARAMATTR_BLOCK_ID,
+    PARAMATTR_GROUP_BLOCK_ID,
 
-    UNUSED_ID1,
-    
     CONSTANTS_BLOCK_ID,
     FUNCTION_BLOCK_ID,
-    
-    UNUSED_ID2,
-    
+
+    UNUSED_ID1,
+
     VALUE_SYMTAB_BLOCK_ID,
     METADATA_BLOCK_ID,
     METADATA_ATTACHMENT_ID,
-    
+
     TYPE_BLOCK_ID_NEW,
 
     USELIST_BLOCK_ID
@@ -54,6 +53,8 @@ namespace bitc {
     MODULE_CODE_DATALAYOUT  = 3,    // DATALAYOUT:  [strchr x N]
     MODULE_CODE_ASM         = 4,    // ASM:         [strchr x N]
     MODULE_CODE_SECTIONNAME = 5,    // SECTIONNAME: [strchr x N]
+
+    // FIXME: Remove DEPLIB in 4.0.
     MODULE_CODE_DEPLIB      = 6,    // DEPLIB:      [strchr x N]
 
     // GLOBALVAR: [pointer type, isconst, initid,
@@ -67,7 +68,7 @@ namespace bitc {
     // ALIAS: [alias type, aliasee val#, linkage, visibility]
     MODULE_CODE_ALIAS       = 9,
 
-    /// MODULE_CODE_PURGEVALS: [numvals]
+    // MODULE_CODE_PURGEVALS: [numvals]
     MODULE_CODE_PURGEVALS   = 10,
 
     MODULE_CODE_GCNAME      = 11   // GCNAME: [strchr x N]
@@ -75,7 +76,12 @@ namespace bitc {
 
   /// PARAMATTR blocks have code for defining a parameter attribute set.
   enum AttributeCodes {
-    PARAMATTR_CODE_ENTRY = 1   // ENTRY: [paramidx0, attr0, paramidx1, attr1...]
+    // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0
+    PARAMATTR_CODE_ENTRY_OLD  = 1, // ENTRY: [paramidx0, attr0,
+                                   //         paramidx1, attr1...]
+    PARAMATTR_CODE_ENTRY      = 2, // ENTRY: [paramidx0, attrgrp0,
+                                   //         paramidx1, attrgrp1, ...]
+    PARAMATTR_GRP_CODE_ENTRY  = 3  // ENTRY: [id, attr0, att1, ...]
   };
 
   /// TYPE blocks have codes for each type primitive they use.
@@ -93,9 +99,9 @@ namespace bitc {
 
     TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
                                 //            paramty x N]
-    
+
     TYPE_CODE_HALF     =  10,   // HALF
-    
+
     TYPE_CODE_ARRAY    = 11,    // ARRAY: [numelts, eltty]
     TYPE_CODE_VECTOR   = 12,    // VECTOR: [numelts, eltty]
 
@@ -109,7 +115,7 @@ namespace bitc {
     TYPE_CODE_METADATA = 16,    // METADATA
 
     TYPE_CODE_X86_MMX = 17,     // X86 MMX
-    
+
     TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N]
     TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
     TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
@@ -141,6 +147,7 @@ namespace bitc {
     METADATA_NAMED_NODE    = 10,  // NAMED_NODE:    [n x mdnodes]
     METADATA_ATTACHMENT    = 11   // [m x [value, [n x [id, mdnode]]]
   };
+
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
   // constant and maintains an implicit current type value.
   enum ConstantsCodes {
@@ -234,7 +241,7 @@ namespace bitc {
     OBO_NO_SIGNED_WRAP = 1
   };
 
-  /// PossiblyExactOperatorOptionalFlags - Flags for serializing 
+  /// PossiblyExactOperatorOptionalFlags - Flags for serializing
   /// PossiblyExactOperator's SubclassOptionalData contents.
   enum PossiblyExactOperatorOptionalFlags {
     PEO_EXACT = 0
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index dd96b043fc95..78f40ca17e61 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_BITCODE_H
-#define LLVM_BITCODE_H
+#ifndef LLVM_BITCODE_READERWRITER_H
+#define LLVM_BITCODE_READERWRITER_H
 
 #include <string>
 
diff --git a/include/llvm/CMakeLists.txt b/include/llvm/CMakeLists.txt
index f8cb4250584c..32ffca75bb7a 100644
--- a/include/llvm/CMakeLists.txt
+++ b/include/llvm/CMakeLists.txt
@@ -1,10 +1,4 @@
-set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
-
-tablegen(LLVM Intrinsics.gen -gen-intrinsic)
-
-add_custom_target(intrinsics_gen ALL
-  DEPENDS ${llvm_builded_incs_dir}/Intrinsics.gen)
-set_target_properties(intrinsics_gen PROPERTIES FOLDER "Tablegenning")
+add_subdirectory(IR)
 
 if( MSVC_IDE OR XCODE )
   # Creates a dummy target containing all headers for the benefit of
diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h
deleted file mode 100644
index 7154aa3259d2..000000000000
--- a/include/llvm/CallGraphSCCPass.h
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- CallGraphSCCPass.h - Pass that operates BU on call graph -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the CallGraphSCCPass class, which is used for passes which
-// are implemented as bottom-up traversals on the call graph.  Because there may
-// be cycles in the call graph, passes of this type operate on the call-graph in
-// SCC order: that is, they process function bottom-up, except for recursive
-// functions, which they process all at once.
-//
-// These passes are inherently interprocedural, and are required to keep the
-// call graph up-to-date if they do anything which could modify it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CALL_GRAPH_SCC_PASS_H
-#define LLVM_CALL_GRAPH_SCC_PASS_H
-
-#include "llvm/Pass.h"
-#include "llvm/Analysis/CallGraph.h"
-
-namespace llvm {
-
-class CallGraphNode;
-class CallGraph;
-class PMStack;
-class CallGraphSCC;
-  
-class CallGraphSCCPass : public Pass {
-public:
-  explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {}
-
-  /// createPrinterPass - Get a pass that prints the Module
-  /// corresponding to a CallGraph.
-  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
-
-  /// doInitialization - This method is called before the SCC's of the program
-  /// has been processed, allowing the pass to do initialization as necessary.
-  virtual bool doInitialization(CallGraph &CG) {
-    return false;
-  }
-
-  /// runOnSCC - This method should be implemented by the subclass to perform
-  /// whatever action is necessary for the specified SCC.  Note that
-  /// non-recursive (or only self-recursive) functions will have an SCC size of
-  /// 1, where recursive portions of the call graph will have SCC size > 1.
-  ///
-  /// SCC passes that add or delete functions to the SCC are required to update
-  /// the SCC list, otherwise stale pointers may be dereferenced.
-  ///
-  virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
-
-  /// doFinalization - This method is called after the SCC's of the program has
-  /// been processed, allowing the pass to do final cleanup as necessary.
-  virtual bool doFinalization(CallGraph &CG) {
-    return false;
-  }
-
-  /// Assign pass manager to manager this pass
-  virtual void assignPassManager(PMStack &PMS,
-                                 PassManagerType PMT);
-
-  ///  Return what kind of Pass Manager can manage this pass.
-  virtual PassManagerType getPotentialPassManagerType() const {
-    return PMT_CallGraphPassManager;
-  }
-
-  /// getAnalysisUsage - For this class, we declare that we require and preserve
-  /// the call graph.  If the derived class implements this method, it should
-  /// always explicitly call the implementation here.
-  virtual void getAnalysisUsage(AnalysisUsage &Info) const;
-};
-
-/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. 
-class CallGraphSCC {
-  void *Context; // The CGPassManager object that is vending this.
-  std::vector<CallGraphNode*> Nodes;
-public:
-  CallGraphSCC(void *context) : Context(context) {}
-  
-  void initialize(CallGraphNode*const*I, CallGraphNode*const*E) {
-    Nodes.assign(I, E);
-  }
-  
-  bool isSingular() const { return Nodes.size() == 1; }
-  unsigned size() const { return Nodes.size(); }
-  
-  /// ReplaceNode - This informs the SCC and the pass manager that the specified
-  /// Old node has been deleted, and New is to be used in its place.
-  void ReplaceNode(CallGraphNode *Old, CallGraphNode *New);
-  
-  typedef std::vector<CallGraphNode*>::const_iterator iterator;
-  iterator begin() const { return Nodes.begin(); }
-  iterator end() const { return Nodes.end(); }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
deleted file mode 100644
index 053f4eb326f9..000000000000
--- a/include/llvm/CallingConv.h
+++ /dev/null
@@ -1,125 +0,0 @@
-//===-- llvm/CallingConv.h - LLVM Calling Conventions -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines LLVM's set of calling conventions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CALLINGCONV_H
-#define LLVM_CALLINGCONV_H
-
-namespace llvm {
-
-/// CallingConv Namespace - This namespace contains an enum with a value for
-/// the well-known calling conventions.
-///
-namespace CallingConv {
-  /// A set of enums which specify the assigned numeric values for known llvm
-  /// calling conventions.
-  /// @brief LLVM Calling Convention Representation
-  enum ID {
-    /// C - The default llvm calling convention, compatible with C.  This
-    /// convention is the only calling convention that supports varargs calls.
-    /// As with typical C calling conventions, the callee/caller have to
-    /// tolerate certain amounts of prototype mismatch.
-    C = 0,
-
-    // Generic LLVM calling conventions.  None of these calling conventions
-    // support varargs calls, and all assume that the caller and callee
-    // prototype exactly match.
-
-    /// Fast - This calling convention attempts to make calls as fast as
-    /// possible (e.g. by passing things in registers).
-    Fast = 8,
-
-    // Cold - This calling convention attempts to make code in the caller as
-    // efficient as possible under the assumption that the call is not commonly
-    // executed.  As such, these calls often preserve all registers so that the
-    // call does not break any live ranges in the caller side.
-    Cold = 9,
-
-    // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
-    GHC = 10,
-
-    // Target - This is the start of the target-specific calling conventions,
-    // e.g. fastcall and thiscall on X86.
-    FirstTargetCC = 64,
-
-    /// X86_StdCall - stdcall is the calling conventions mostly used by the
-    /// Win32 API. It is basically the same as the C convention with the
-    /// difference in that the callee is responsible for popping the arguments
-    /// from the stack.
-    X86_StdCall = 64,
-
-    /// X86_FastCall - 'fast' analog of X86_StdCall. Passes first two arguments
-    /// in ECX:EDX registers, others - via stack. Callee is responsible for
-    /// stack cleaning.
-    X86_FastCall = 65,
-
-    /// ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete,
-    /// but still used on some targets).
-    ARM_APCS = 66,
-
-    /// ARM_AAPCS - ARM Architecture Procedure Calling Standard calling
-    /// convention (aka EABI). Soft float variant.
-    ARM_AAPCS = 67,
-
-    /// ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
-    ARM_AAPCS_VFP = 68,
-
-    /// MSP430_INTR - Calling convention used for MSP430 interrupt routines.
-    MSP430_INTR = 69,
-
-    /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX,
-    /// others via stack. Callee is responsible for stack cleaning. MSVC uses
-    /// this by default for methods in its ABI.
-    X86_ThisCall = 70,
-
-    /// PTX_Kernel - Call to a PTX kernel.
-    /// Passes all arguments in parameter space.
-    PTX_Kernel = 71,
-
-    /// PTX_Device - Call to a PTX device function.
-    /// Passes all arguments in register or parameter space.
-    PTX_Device = 72,
-
-    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt routines.
-    MBLAZE_INTR = 73,
-
-    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt support
-    /// routines (i.e. GCC's save_volatiles attribute).
-    MBLAZE_SVOL = 74,
-
-    /// SPIR_FUNC - Calling convention for SPIR non-kernel device functions.
-    /// No lowering or expansion of arguments.
-    /// Structures are passed as a pointer to a struct with the byval attribute.
-    /// Functions can only call SPIR_FUNC and SPIR_KERNEL functions.
-    /// Functions can only have zero or one return values.
-    /// Variable arguments are not allowed, except for printf.
-    /// How arguments/return values are lowered are not specified.
-    /// Functions are only visible to the devices.
-    SPIR_FUNC = 75,
-
-    /// SPIR_KERNEL - Calling convention for SPIR kernel functions.
-    /// Inherits the restrictions of SPIR_FUNC, except
-    /// Cannot have non-void return values.
-    /// Cannot have variable arguments.
-    /// Can also be called by the host.
-    /// Is externally visible.
-    SPIR_KERNEL = 76,
-
-    /// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins
-    Intel_OCL_BI = 77
-
-  };
-} // End CallingConv namespace
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index 0b609ed6586e..ce9ca0a0583a 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_CODEGEN_ANALYSIS_H
 #define LLVM_CODEGEN_ANALYSIS_H
 
-#include "llvm/Instructions.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CallSite.h"
 
 namespace llvm {
@@ -86,11 +86,7 @@ ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
-                          const TargetLowering &TLI);
-
-bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
-                          SDValue &Chain, const TargetLowering &TLI);
+bool isInTailCallPosition(ImmutableCallSite CS, const TargetLowering &TLI);
 
 } // End llvm namespace
 
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index a92b85939f37..e0a6e3f4027a 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -17,7 +17,7 @@
 #define LLVM_CODEGEN_ASMPRINTER_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -385,10 +385,8 @@ namespace llvm {
     /// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
     unsigned GetSizeOfEncodedValue(unsigned Encoding) const;
 
-    /// EmitReference - Emit a reference to a label with a specified encoding.
-    ///
-    void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
-    void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
+    /// EmitReference - Emit reference to a ttype global with a specified encoding.
+    void EmitTTypeReference(const GlobalValue *GV, unsigned Encoding) const;
 
     /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of
     /// its section.  This can be done with a special directive if the target
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 2f76a6cc5583..9cd2decfacff 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -11,8 +11,8 @@
 #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 
-#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 436918b1eb33..c035e0777cce 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -16,11 +16,11 @@
 #define LLVM_CODEGEN_CALLINGCONVLOWER_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/Target/TargetCallingConv.h"
-#include "llvm/CallingConv.h"
 
 namespace llvm {
   class TargetRegisterInfo;
@@ -50,10 +50,10 @@ private:
   unsigned Loc;
 
   /// isMem - True if this is a memory loc, false if it is a register loc.
-  bool isMem : 1;
+  unsigned isMem : 1;
 
   /// isCustom - True if this arg/retval requires special handling.
-  bool isCustom : 1;
+  unsigned isCustom : 1;
 
   /// Information about how the value is assigned.
   LocInfo HTP : 6;
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 90ee23424498..9a27661b5190 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -1,4 +1,4 @@
-//===-- CommandFlags.h - Register Coalescing Interface ----------*- C++ -*-===//
+//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,13 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
-#define LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
+#ifndef LLVM_CODEGEN_COMMANDFLAGS_H
+#define LLVM_CODEGEN_COMMANDFLAGS_H
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
-
 #include <string>
 using namespace llvm;
 
diff --git a/include/llvm/CodeGen/DAGCombine.h b/include/llvm/CodeGen/DAGCombine.h
new file mode 100644
index 000000000000..8b5919005451
--- /dev/null
+++ b/include/llvm/CodeGen/DAGCombine.h
@@ -0,0 +1,25 @@
+//===-- llvm/CodeGen/DAGCombine.h  ------- SelectionDAG Nodes ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_CODEGEN_DAGCOMBINE_H
+#define LLVM_CODEGEN_DAGCOMBINE_H
+
+namespace llvm {
+
+enum CombineLevel {
+  BeforeLegalizeTypes,
+  AfterLegalizeTypes,
+  AfterLegalizeVectorOps,
+  AfterLegalizeDAG
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 2d2db78144a4..9d25fd377b7e 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -26,8 +26,8 @@
 #ifndef LLVM_CODEGEN_DFAPACKETIZER_H
 #define LLVM_CODEGEN_DFAPACKETIZER_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include <map>
 
 namespace llvm {
@@ -135,7 +135,7 @@ public:
   // initPacketizerState - perform initialization before packetizing
   // an instruction. This function is supposed to be overrided by
   // the target dependent packetizer.
-  virtual void initPacketizerState(void) { return; }
+  virtual void initPacketizerState() { return; }
 
   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
   virtual bool ignorePseudoInstruction(MachineInstr *I,
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 7c24e36092b4..705db7e64340 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -15,8 +15,8 @@
 #define LLVM_CODEGEN_FASTISEL_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
 
@@ -90,6 +90,11 @@ public:
 
   /// getCurDebugLoc() - Return current debug location information.
   DebugLoc getCurDebugLoc() const { return DL; }
+  
+  /// LowerArguments - Do "fast" instruction selection for function arguments
+  /// and append machine instructions to the current block. Return true if
+  /// it is successful.
+  bool LowerArguments();
 
   /// SelectInstruction - Do "fast" instruction selection for the given
   /// LLVM IR instruction, and append generated machine instructions to
@@ -131,6 +136,10 @@ public:
   /// into the current block.
   void recomputeInsertPt();
 
+  /// removeDeadCode - Remove all dead instructions between the I and E.
+  void removeDeadCode(MachineBasicBlock::iterator I,
+                      MachineBasicBlock::iterator E);
+
   struct SavePoint {
     MachineBasicBlock::iterator InsertPt;
     DebugLoc DL;
@@ -156,6 +165,11 @@ protected:
   ///
   virtual bool
   TargetSelectInstruction(const Instruction *I) = 0;
+  
+  /// FastLowerArguments - This method is called by target-independent code to
+  /// do target specific argument lowering. It returns true if it was
+  /// successful.
+  virtual bool FastLowerArguments();
 
   /// FastEmit_r - This method is called by target-independent code
   /// to request that an instruction with the given type and opcode
@@ -395,10 +409,6 @@ private:
 
   /// hasTrivialKill - Test whether the given value has exactly one use.
   bool hasTrivialKill(const Value *V) const;
-
-  /// removeDeadCode - Remove all dead instructions between the I and E.
-  void removeDeadCode(MachineBasicBlock::iterator I,
-                      MachineBasicBlock::iterator E);
 };
 
 }
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 8cf22eca4fa6..ea6cb27b7b13 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -15,19 +15,15 @@
 #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
@@ -35,6 +31,7 @@ namespace llvm {
 
 class AllocaInst;
 class BasicBlock;
+class BranchProbabilityInfo;
 class CallInst;
 class Function;
 class GlobalVariable;
@@ -136,7 +133,7 @@ public:
     return ValueMap.count(V);
   }
 
-  unsigned CreateReg(EVT VT);
+  unsigned CreateReg(MVT VT);
   
   unsigned CreateRegs(Type *Ty);
   
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 076f6f39fe2c..1070d29f7381 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -33,9 +33,9 @@
 #ifndef LLVM_CODEGEN_GCMETADATA_H
 #define LLVM_CODEGEN_GCMETADATA_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
@@ -180,7 +180,8 @@ namespace llvm {
     GCModuleInfo();
     ~GCModuleInfo();
 
-    /// clear - Resets the pass. The metadata deleter pass calls this.
+    /// clear - Resets the pass. Any pass, which uses GCModuleInfo, should
+    /// call it in doFinalization().
     ///
     void clear();
 
diff --git a/include/llvm/CodeGen/GCs.h b/include/llvm/CodeGen/GCs.h
index c407b6167485..456d2dcb51a6 100644
--- a/include/llvm/CodeGen/GCs.h
+++ b/include/llvm/CodeGen/GCs.h
@@ -26,6 +26,12 @@ namespace llvm {
   
   /// Creates an ocaml-compatible metadata printer.
   void linkOcamlGCPrinter();
+
+  /// Creates an erlang-compatible garbage collector.
+  void linkErlangGC();
+
+  /// Creates an erlang-compatible metadata printer.
+  void linkErlangGCPrinter();
   
   /// Creates a shadow stack garbage collector. This collector requires no code
   /// generator support.
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 5d0a3b4c7067..442729b5d775 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -311,8 +311,10 @@ namespace ISD {
     /// the shift amount can be any type, but care must be taken to ensure it is
     /// large enough.  TLI.getShiftAmountTy() is i8 on some targets, but before
     /// legalization, types like i1024 can occur and i8 doesn't have enough bits
-    /// to represent the shift amount.  By convention, DAGCombine and
-    /// SelectionDAGBuilder forces these shift amounts to i32 for simplicity.
+    /// to represent the shift amount.
+    /// When the 1st operand is a vector, the shift amount must be in the same
+    /// type. (TLI.getShiftAmountTy() will return the same type when the input
+    /// type is a vector.)
     SHL, SRA, SRL, ROTL, ROTR,
 
     /// Byte Swap and Counting operators.
@@ -455,6 +457,9 @@ namespace ISD {
     FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR,
+    
+    /// FSINCOS - Compute both fsin and fcos as a single operation.
+    FSINCOS,
 
     /// LOAD and STORE have token chains as their first operand, then the same
     /// operands as an LLVM load/store instruction, then an offset node that
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index 5a3fb4b1a3df..68389dde494f 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_CODEGEN_INTRINSICLOWERING_H
 #define LLVM_CODEGEN_INTRINSICLOWERING_H
 
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Intrinsics.h"
 
 namespace llvm {
   class CallInst;
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 89f00e91f78e..9a7321418698 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -17,11 +17,11 @@
 #ifndef LLVM_CODEGEN_JITCODEEMITTER_H
 #define LLVM_CODEGEN_JITCODEEMITTER_H
 
-#include <string>
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/ADT/DenseMap.h"
+#include <string>
 
 namespace llvm {
 
@@ -207,8 +207,7 @@ public:
   /// emitString - This callback is invoked when a String needs to be
   /// written to the output stream.
   void emitString(const std::string &String) {
-    for (unsigned i = 0, N = static_cast<unsigned>(String.size());
-         i < N; ++i) {
+    for (size_t i = 0, N = String.size(); i < N; ++i) {
       uint8_t C = String[i];
       emitByte(C);
     }
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 8fb31aa8a6d1..d454347d0b82 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LATENCY_PRIORITY_QUEUE_H
-#define LATENCY_PRIORITY_QUEUE_H
+#ifndef LLVM_CODEGEN_LATENCYPRIORITYQUEUE_H
+#define LLVM_CODEGEN_LATENCYPRIORITYQUEUE_H
 
 #include "llvm/CodeGen/ScheduleDAG.h"
 
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 8414c64544e5..ff65db4ee4ce 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -17,11 +17,11 @@
 #ifndef LLVM_CODEGEN_LEXICALSCOPES_H
 #define LLVM_CODEGEN_LEXICALSCOPES_H
 
-#include "llvm/Metadata.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/ValueHandle.h"
 #include <utility>
@@ -159,9 +159,6 @@ public:
   LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
       LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) {
-#ifndef NDEBUG
-    IndentLevel = 0;
-#endif
     if (Parent)
       Parent->addChild(this);
   }
@@ -228,7 +225,7 @@ public:
   void setDFSIn(unsigned I)             { DFSIn = I; }
 
   /// dump - print lexical scope.
-  void dump() const;
+  void dump(unsigned Indent = 0) const;
 
 private:
   LexicalScope *Parent;                          // Parent to this scope.
@@ -244,9 +241,6 @@ private:
   const MachineInstr *FirstInsn;      // First instruction of this scope.
   unsigned DFSIn, DFSOut;             // In & Out Depth use to determine
                                       // scope nesting.
-#ifndef NDEBUG
-  mutable unsigned IndentLevel;       // Private state for dump()
-#endif
 };
 
 } // end llvm namespace
diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 7d1b1fe477a5..c3046da90b8d 100644
--- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -29,6 +29,7 @@ namespace {
         return;
 
       llvm::linkOcamlGCPrinter();
+      llvm::linkErlangGCPrinter();
 
     }
   } ForceAsmWriterLinking; // Force link by creating a global definition.
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 46dd004609f5..916c0f233ef8 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
 #define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
 
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
 
@@ -37,6 +37,7 @@ namespace {
       (void) llvm::createDefaultPBQPRegisterAllocator();
 
       llvm::linkOcamlGC();
+      llvm::linkErlangGC();
       llvm::linkShadowStackGC();
 
       (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 185e414ae2cd..244be9c50155 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -22,9 +22,9 @@
 #define LLVM_CODEGEN_LIVEINTERVAL_H
 
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include <cassert>
 #include <climits>
 
@@ -86,9 +86,10 @@ namespace llvm {
     SlotIndex end;    // End point of the interval (exclusive)
     VNInfo *valno;   // identifier for the value contained in this interval.
 
+    LiveRange() : valno(0) {}
+
     LiveRange(SlotIndex S, SlotIndex E, VNInfo *V)
       : start(S), end(E), valno(V) {
-
       assert(S < E && "Cannot create empty or backwards range");
     }
 
@@ -373,8 +374,8 @@ namespace llvm {
     /// addRange - Add the specified LiveRange to this interval, merging
     /// intervals as appropriate.  This returns an iterator to the inserted live
     /// range (which may have grown since it was inserted.
-    void addRange(LiveRange LR) {
-      addRangeFrom(LR, ranges.begin());
+    iterator addRange(LiveRange LR) {
+      return addRangeFrom(LR, ranges.begin());
     }
 
     /// extendInBlock - If this interval is live before Kill in the basic block
@@ -460,9 +461,6 @@ namespace llvm {
     void extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd);
     Ranges::iterator extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStr);
     void markValNoForDeletion(VNInfo *V);
-    void mergeIntervalRanges(const LiveInterval &RHS,
-                             VNInfo *LHSValNo = 0,
-                             const VNInfo *RHSValNo = 0);
 
     LiveInterval& operator=(const LiveInterval& rhs) LLVM_DELETED_FUNCTION;
 
@@ -473,6 +471,64 @@ namespace llvm {
     return OS;
   }
 
+  /// Helper class for performant LiveInterval bulk updates.
+  ///
+  /// Calling LiveInterval::addRange() repeatedly can be expensive on large
+  /// live ranges because segments after the insertion point may need to be
+  /// shifted. The LiveRangeUpdater class can defer the shifting when adding
+  /// many segments in order.
+  ///
+  /// The LiveInterval will be in an invalid state until flush() is called.
+  class LiveRangeUpdater {
+    LiveInterval *LI;
+    SlotIndex LastStart;
+    LiveInterval::iterator WriteI;
+    LiveInterval::iterator ReadI;
+    SmallVector<LiveRange, 16> Spills;
+    void mergeSpills();
+
+  public:
+    /// Create a LiveRangeUpdater for adding segments to LI.
+    /// LI will temporarily be in an invalid state until flush() is called.
+    LiveRangeUpdater(LiveInterval *li = 0) : LI(li) {}
+
+    ~LiveRangeUpdater() { flush(); }
+
+    /// Add a segment to LI and coalesce when possible, just like LI.addRange().
+    /// Segments should be added in increasing start order for best performance.
+    void add(LiveRange);
+
+    void add(SlotIndex Start, SlotIndex End, VNInfo *VNI) {
+      add(LiveRange(Start, End, VNI));
+    }
+
+    /// Return true if the LI is currently in an invalid state, and flush()
+    /// needs to be called.
+    bool isDirty() const { return LastStart.isValid(); }
+
+    /// Flush the updater state to LI so it is valid and contains all added
+    /// segments.
+    void flush();
+
+    /// Select a different destination live range.
+    void setDest(LiveInterval *li) {
+      if (LI != li && isDirty())
+        flush();
+      LI = li;
+    }
+
+    /// Get the current destination live range.
+    LiveInterval *getDest() const { return LI; }
+
+    void dump() const;
+    void print(raw_ostream&) const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const LiveRangeUpdater &X) {
+    X.print(OS);
+    return OS;
+  }
+
   /// LiveRangeQuery - Query information about a live range around a given
   /// instruction. This class hides the implementation details of live ranges,
   /// and it should be used as the primary interface for examining live ranges
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index b421753dd536..7d72f37255b4 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -20,22 +20,21 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cmath>
 #include <iterator>
 
 namespace llvm {
 
   class AliasAnalysis;
+  class BitVector;
   class LiveRangeCalc;
   class LiveVariables;
   class MachineDominatorTree;
@@ -53,7 +52,6 @@ namespace llvm {
     const TargetRegisterInfo* TRI;
     const TargetInstrInfo* TII;
     AliasAnalysis *AA;
-    LiveVariables* LV;
     SlotIndexes* Indexes;
     MachineDominatorTree *DomTree;
     LiveRangeCalc *LRCalc;
@@ -215,6 +213,13 @@ namespace llvm {
       return Indexes->getMBBFromIndex(index);
     }
 
+    void insertMBBInMaps(MachineBasicBlock *MBB) {
+      Indexes->insertMBBInMaps(MBB);
+      assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() &&
+             "Blocks must be added in order.");
+      RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0));
+    }
+
     SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) {
       return Indexes->insertMachineInstrInMaps(MI);
     }
@@ -275,6 +280,21 @@ namespace llvm {
     void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart,
                               bool UpdateFlags = false);
 
+    /// repairIntervalsInRange - Update live intervals for instructions in a
+    /// range of iterators. It is intended for use after target hooks that may
+    /// insert or remove instructions, and is only efficient for a small number
+    /// of instructions.
+    ///
+    /// OrigRegs is a vector of registers that were originally used by the
+    /// instructions in the range between the two iterators.
+    ///
+    /// Currently, the only only changes that are supported are simple removal
+    /// and addition of uses.
+    void repairIntervalsInRange(MachineBasicBlock *MBB,
+                                MachineBasicBlock::iterator Begin,
+                                MachineBasicBlock::iterator End,
+                                ArrayRef<unsigned> OrigRegs);
+
     // Register mask functions.
     //
     // Machine instructions may use a register mask operand to indicate that a
@@ -347,37 +367,17 @@ namespace llvm {
       return RegUnitIntervals[Unit];
     }
 
-  private:
-    /// computeIntervals - Compute live intervals.
-    void computeIntervals();
+    const LiveInterval *getCachedRegUnit(unsigned Unit) const {
+      return RegUnitIntervals[Unit];
+    }
 
+  private:
     /// Compute live intervals for all virtual registers.
     void computeVirtRegs();
 
     /// Compute RegMaskSlots and RegMaskBits.
     void computeRegMasks();
 
-    /// handleRegisterDef - update intervals for a register def
-    /// (calls handleVirtualRegisterDef)
-    void handleRegisterDef(MachineBasicBlock *MBB,
-                           MachineBasicBlock::iterator MI,
-                           SlotIndex MIIdx,
-                           MachineOperand& MO, unsigned MOIdx);
-
-    /// isPartialRedef - Return true if the specified def at the specific index
-    /// is partially re-defining the specified live interval. A common case of
-    /// this is a definition of the sub-register.
-    bool isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
-                        LiveInterval &interval);
-
-    /// handleVirtualRegisterDef - update intervals for a virtual
-    /// register def
-    void handleVirtualRegisterDef(MachineBasicBlock *MBB,
-                                  MachineBasicBlock::iterator MI,
-                                  SlotIndex MIIdx, MachineOperand& MO,
-                                  unsigned MOIdx,
-                                  LiveInterval& interval);
-
     static LiveInterval* createInterval(unsigned Reg);
 
     void printInstrs(raw_ostream &O) const;
diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 000000000000..615b339bd79c
--- /dev/null
+++ b/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,205 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION_H
+#define LLVM_CODEGEN_LIVEINTERVALUNION_H
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context).  We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+  // A set of live virtual register segments that supports fast insertion,
+  // intersection, and removal.
+  // Mapping SlotIndex intervals to virtual register numbers.
+  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+  // SegmentIter can advance to the next segment ordered by starting position
+  // which may belong to a different live virtual register. We also must be able
+  // to reach the current segment's containing virtual register.
+  typedef LiveSegments::iterator SegmentIter;
+
+  // LiveIntervalUnions share an external allocator.
+  typedef LiveSegments::Allocator Allocator;
+
+  class Query;
+
+private:
+  unsigned Tag;           // unique tag for current contents.
+  LiveSegments Segments;  // union of virtual reg segments
+
+public:
+  explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {}
+
+  // Iterate over all segments in the union of live virtual registers ordered
+  // by their starting position.
+  SegmentIter begin() { return Segments.begin(); }
+  SegmentIter end() { return Segments.end(); }
+  SegmentIter find(SlotIndex x) { return Segments.find(x); }
+  bool empty() const { return Segments.empty(); }
+  SlotIndex startIndex() const { return Segments.start(); }
+
+  // Provide public access to the underlying map to allow overlap iteration.
+  typedef LiveSegments Map;
+  const Map &getMap() { return Segments; }
+
+  /// getTag - Return an opaque tag representing the current state of the union.
+  unsigned getTag() const { return Tag; }
+
+  /// changedSince - Return true if the union change since getTag returned tag.
+  bool changedSince(unsigned tag) const { return tag != Tag; }
+
+  // Add a live virtual register to this union and merge its segments.
+  void unify(LiveInterval &VirtReg);
+
+  // Remove a live virtual register's segments from this union.
+  void extract(LiveInterval &VirtReg);
+
+  // Remove all inserted virtual registers.
+  void clear() { Segments.clear(); ++Tag; }
+
+  // Print union, using TRI to translate register names
+  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+  // Verify the live intervals in this union and add them to the visited set.
+  void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+  /// Query interferences between a single live virtual register and a live
+  /// interval union.
+  class Query {
+    LiveIntervalUnion *LiveUnion;
+    LiveInterval *VirtReg;
+    LiveInterval::iterator VirtRegI; // current position in VirtReg
+    SegmentIter LiveUnionI;          // current position in LiveUnion
+    SmallVector<LiveInterval*,4> InterferingVRegs;
+    bool CheckedFirstInterference;
+    bool SeenAllInterferences;
+    bool SeenUnspillableVReg;
+    unsigned Tag, UserTag;
+
+  public:
+    Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
+
+    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+      SeenAllInterferences(false), SeenUnspillableVReg(false)
+    {}
+
+    void clear() {
+      LiveUnion = NULL;
+      VirtReg = NULL;
+      InterferingVRegs.clear();
+      CheckedFirstInterference = false;
+      SeenAllInterferences = false;
+      SeenUnspillableVReg = false;
+      Tag = 0;
+      UserTag = 0;
+    }
+
+    void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
+      assert(VReg && LIU && "Invalid arguments");
+      if (UserTag == UTag && VirtReg == VReg &&
+          LiveUnion == LIU && !LIU->changedSince(Tag)) {
+        // Retain cached results, e.g. firstInterference.
+        return;
+      }
+      clear();
+      LiveUnion = LIU;
+      VirtReg = VReg;
+      Tag = LIU->getTag();
+      UserTag = UTag;
+    }
+
+    LiveInterval &virtReg() const {
+      assert(VirtReg && "uninitialized");
+      return *VirtReg;
+    }
+
+    // Does this live virtual register interfere with the union?
+    bool checkInterference() { return collectInterferingVRegs(1); }
+
+    // Count the virtual registers in this union that interfere with this
+    // query's live virtual register, up to maxInterferingRegs.
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+    // Was this virtual register visited during collectInterferingVRegs?
+    bool isSeenInterference(LiveInterval *VReg) const;
+
+    // Did collectInterferingVRegs collect all interferences?
+    bool seenAllInterferences() const { return SeenAllInterferences; }
+
+    // Did collectInterferingVRegs encounter an unspillable vreg?
+    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+    // Vector generated by collectInterferingVRegs.
+    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+      return InterferingVRegs;
+    }
+
+  private:
+    Query(const Query&) LLVM_DELETED_FUNCTION;
+    void operator=(const Query&) LLVM_DELETED_FUNCTION;
+  };
+
+  // Array of LiveIntervalUnions.
+  class Array {
+    unsigned Size;
+    LiveIntervalUnion *LIUs;
+  public:
+    Array() : Size(0), LIUs(0) {}
+    ~Array() { clear(); }
+
+    // Initialize the array to have Size entries.
+    // Reuse an existing allocation if the size matches.
+    void init(LiveIntervalUnion::Allocator&, unsigned Size);
+
+    unsigned size() const { return Size; }
+
+    void clear();
+
+    LiveIntervalUnion& operator[](unsigned idx) {
+      assert(idx <  Size && "idx out of bounds");
+      return LIUs[idx];
+    }
+  };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION_H)
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index def7b00ce761..8a32a3c11a82 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -83,7 +83,7 @@ private:
   /// allUsesAvailableAt - Return true if all registers used by OrigMI at
   /// OrigIdx are also available with the same value at UseIdx.
   bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx);
+                          SlotIndex UseIdx) const;
 
   /// foldAsLoad - If LI has a single use and a single def that can be folded as
   /// a load, eliminate the register by folding the def into the use.
diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h
new file mode 100644
index 000000000000..7a3e9e8347f4
--- /dev/null
+++ b/include/llvm/CodeGen/LiveRegMatrix.h
@@ -0,0 +1,148 @@
+//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRegMatrix analysis pass keeps track of virtual register interference
+// along two dimensions: Slot indexes and register units. The matrix is used by
+// register allocators to ensure that no interfering virtual registers get
+// assigned to overlapping physical registers.
+//
+// Register units are defined in MCRegisterInfo.h, they represent the smallest
+// unit of interference when dealing with overlapping physical registers. The
+// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
+// a virtual register is assigned to a physical register, the live range for
+// the virtual register is inserted into the LiveIntervalUnion for each regunit
+// in the physreg.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H
+#define LLVM_CODEGEN_LIVEREGMATRIX_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervalAnalysis;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+
+class LiveRegMatrix : public MachineFunctionPass {
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  LiveIntervals *LIS;
+  VirtRegMap *VRM;
+
+  // UserTag changes whenever virtual registers have been modified.
+  unsigned UserTag;
+
+  // The matrix is represented as a LiveIntervalUnion per register unit.
+  LiveIntervalUnion::Allocator LIUAlloc;
+  LiveIntervalUnion::Array Matrix;
+
+  // Cached queries per register unit.
+  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+  // Cached register mask interference info.
+  unsigned RegMaskTag;
+  unsigned RegMaskVirtReg;
+  BitVector RegMaskUsable;
+
+  // MachineFunctionPass boilerplate.
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void releaseMemory();
+public:
+  static char ID;
+  LiveRegMatrix();
+
+  //===--------------------------------------------------------------------===//
+  // High-level interface.
+  //===--------------------------------------------------------------------===//
+  //
+  // Check for interference before assigning virtual registers to physical
+  // registers.
+  //
+
+  /// Invalidate cached interference queries after modifying virtual register
+  /// live ranges. Interference checks may return stale information unless
+  /// caches are invalidated.
+  void invalidateVirtRegs() { ++UserTag; }
+
+  enum InterferenceKind {
+    /// No interference, go ahead and assign.
+    IK_Free = 0,
+
+    /// Virtual register interference. There are interfering virtual registers
+    /// assigned to PhysReg or its aliases. This interference could be resolved
+    /// by unassigning those other virtual registers.
+    IK_VirtReg,
+
+    /// Register unit interference. A fixed live range is in the way, typically
+    /// argument registers for a call. This can't be resolved by unassigning
+    /// other virtual registers.
+    IK_RegUnit,
+
+    /// RegMask interference. The live range is crossing an instruction with a
+    /// regmask operand that doesn't preserve PhysReg. This typically means
+    /// VirtReg is live across a call, and PhysReg isn't call-preserved.
+    IK_RegMask
+  };
+
+  /// Check for interference before assigning VirtReg to PhysReg.
+  /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
+  /// When there is more than one kind of interference, the InterferenceKind
+  /// with the highest enum value is returned.
+  InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// Assign VirtReg to PhysReg.
+  /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
+  /// update VirtRegMap. The live range is expected to be available in PhysReg.
+  void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// Unassign VirtReg from its PhysReg.
+  /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
+  /// the assignment and updates VirtRegMap accordingly.
+  void unassign(LiveInterval &VirtReg);
+
+  //===--------------------------------------------------------------------===//
+  // Low-level interface.
+  //===--------------------------------------------------------------------===//
+  //
+  // Provide access to the underlying LiveIntervalUnions.
+  //
+
+  /// Check for regmask interference only.
+  /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
+  /// If PhysReg is null, check if VirtReg crosses any regmask operands.
+  bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
+
+  /// Check for regunit interference only.
+  /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
+  /// register units.
+  bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// Query a line of the assigned virtual register matrix directly.
+  /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
+  /// This returns a reference to an internal Query data structure that is only
+  /// valid until the next query() call.
+  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
+
+  /// Directly access the live interval unions per regunit.
+  /// This returns an array indexed by the regunit number.
+  LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEREGMATRIX_H
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index 86c4d7c11067..92c35f784d4c 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -13,13 +13,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
-#define LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
+#ifndef LLVM_CODEGEN_LIVESTACKANALYSIS_H
+#define LLVM_CODEGEN_LIVESTACKANALYSIS_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 3bb134b8fb2a..6628fd278e45 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -29,21 +29,19 @@
 #ifndef LLVM_CODEGEN_LIVEVARIABLES_H
 #define LLVM_CODEGEN_LIVEVARIABLES_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
 
+class MachineBasicBlock;
 class MachineRegisterInfo;
-class TargetRegisterInfo;
 
 class LiveVariables : public MachineFunctionPass {
 public:
diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h
index 21fe74f8e1cd..8c9b7a84e5b8 100644
--- a/include/llvm/CodeGen/MachORelocation.h
+++ b/include/llvm/CodeGen/MachORelocation.h
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 
-#ifndef LLVM_CODEGEN_MACHO_RELOCATION_H
-#define LLVM_CODEGEN_MACHO_RELOCATION_H
+#ifndef LLVM_CODEGEN_MACHORELOCATION_H
+#define LLVM_CODEGEN_MACHORELOCATION_H
 
 #include "llvm/Support/DataTypes.h"
 
@@ -53,4 +53,4 @@ namespace llvm {
 
 } // end llvm namespace
 
-#endif // LLVM_CODEGEN_MACHO_RELOCATION_H
+#endif // LLVM_CODEGEN_MACHORELOCATION_H
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 97c39458d93d..492a3ff49f8c 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H
 #define LLVM_CODEGEN_MACHINEBASICBLOCK_H
 
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/DataTypes.h"
 #include <functional>
 
@@ -146,11 +146,11 @@ public:
     bundle_iterator(IterTy mii) : MII(mii) {}
 
     bundle_iterator(Ty &mi) : MII(mi) {
-      assert(!mi.isInsideBundle() &&
+      assert(!mi.isBundledWithPred() &&
              "It's not legal to initialize bundle_iterator with a bundled MI");
     }
     bundle_iterator(Ty *mi) : MII(mi) {
-      assert((!mi || !mi->isInsideBundle()) &&
+      assert((!mi || !mi->isBundledWithPred()) &&
              "It's not legal to initialize bundle_iterator with a bundled MI");
     }
     // Template allows conversion from const to nonconst.
@@ -174,13 +174,13 @@ public:
     // Increment and decrement operators...
     bundle_iterator &operator--() {      // predecrement - Back up
       do --MII;
-      while (MII->isInsideBundle());
+      while (MII->isBundledWithPred());
       return *this;
     }
     bundle_iterator &operator++() {      // preincrement - Advance
-      IterTy E = MII->getParent()->instr_end();
-      do ++MII;
-      while (MII != E && MII->isInsideBundle());
+      while (MII->isBundledWithSucc())
+        ++MII;
+      ++MII;
       return *this;
     }
     bundle_iterator operator--(int) {    // postdecrement operators...
@@ -441,80 +441,107 @@ public:
   void pop_back() { Insts.pop_back(); }
   void push_back(MachineInstr *MI) { Insts.push_back(MI); }
 
-  template<typename IT>
-  void insert(instr_iterator I, IT S, IT E) {
-    Insts.insert(I, S, E);
-  }
-  instr_iterator insert(instr_iterator I, MachineInstr *M) {
-    return Insts.insert(I, M);
-  }
-  instr_iterator insertAfter(instr_iterator I, MachineInstr *M) {
-    return Insts.insertAfter(I, M);
-  }
+  /// Insert MI into the instruction list before I, possibly inside a bundle.
+  ///
+  /// If the insertion point is inside a bundle, MI will be added to the bundle,
+  /// otherwise MI will not be added to any bundle. That means this function
+  /// alone can't be used to prepend or append instructions to bundles. See
+  /// MIBundleBuilder::insert() for a more reliable way of doing that.
+  instr_iterator insert(instr_iterator I, MachineInstr *M);
 
+  /// Insert a range of instructions into the instruction list before I.
   template<typename IT>
   void insert(iterator I, IT S, IT E) {
     Insts.insert(I.getInstrIterator(), S, E);
   }
-  iterator insert(iterator I, MachineInstr *M) {
-    return Insts.insert(I.getInstrIterator(), M);
+
+  /// Insert MI into the instruction list before I.
+  iterator insert(iterator I, MachineInstr *MI) {
+    assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+           "Cannot insert instruction with bundle flags");
+    return Insts.insert(I.getInstrIterator(), MI);
   }
-  iterator insertAfter(iterator I, MachineInstr *M) {
-    return Insts.insertAfter(I.getInstrIterator(), M);
+
+  /// Insert MI into the instruction list after I.
+  iterator insertAfter(iterator I, MachineInstr *MI) {
+    assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+           "Cannot insert instruction with bundle flags");
+    return Insts.insertAfter(I.getInstrIterator(), MI);
   }
 
-  /// erase - Remove the specified element or range from the instruction list.
-  /// These functions delete any instructions removed.
+  /// Remove an instruction from the instruction list and delete it.
   ///
-  instr_iterator erase(instr_iterator I) {
-    return Insts.erase(I);
-  }
-  instr_iterator erase(instr_iterator I, instr_iterator E) {
-    return Insts.erase(I, E);
-  }
+  /// If the instruction is part of a bundle, the other instructions in the
+  /// bundle will still be bundled after removing the single instruction.
+  instr_iterator erase(instr_iterator I);
+
+  /// Remove an instruction from the instruction list and delete it.
+  ///
+  /// If the instruction is part of a bundle, the other instructions in the
+  /// bundle will still be bundled after removing the single instruction.
   instr_iterator erase_instr(MachineInstr *I) {
-    instr_iterator MII(I);
-    return erase(MII);
+    return erase(instr_iterator(I));
   }
 
-  iterator erase(iterator I);
+  /// Remove a range of instructions from the instruction list and delete them.
   iterator erase(iterator I, iterator E) {
     return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
   }
+
+  /// Remove an instruction or bundle from the instruction list and delete it.
+  ///
+  /// If I points to a bundle of instructions, they are all erased.
+  iterator erase(iterator I) {
+    return erase(I, llvm::next(I));
+  }
+
+  /// Remove an instruction from the instruction list and delete it.
+  ///
+  /// If I is the head of a bundle of instructions, the whole bundle will be
+  /// erased.
   iterator erase(MachineInstr *I) {
-    iterator MII(I);
-    return erase(MII);
+    return erase(iterator(I));
   }
 
-  /// remove - Remove the instruction from the instruction list. This function
-  /// does not delete the instruction. WARNING: Note, if the specified
-  /// instruction is a bundle this function will remove all the bundled
-  /// instructions as well. It is up to the caller to keep a list of the
-  /// bundled instructions and re-insert them if desired. This function is
-  /// *not recommended* for manipulating instructions with bundles. Use
-  /// splice instead.
-  MachineInstr *remove(MachineInstr *I);
+  /// Remove the unbundled instruction from the instruction list without
+  /// deleting it.
+  ///
+  /// This function can not be used to remove bundled instructions, use
+  /// remove_instr to remove individual instructions from a bundle.
+  MachineInstr *remove(MachineInstr *I) {
+    assert(!I->isBundled() && "Cannot remove bundled instructions");
+    return Insts.remove(I);
+  }
+
+  /// Remove the possibly bundled instruction from the instruction list
+  /// without deleting it.
+  ///
+  /// If the instruction is part of a bundle, the other instructions in the
+  /// bundle will still be bundled after removing the single instruction.
+  MachineInstr *remove_instr(MachineInstr *I);
+
   void clear() {
     Insts.clear();
   }
 
-  /// splice - Take an instruction from MBB 'Other' at the position From,
-  /// and insert it into this MBB right before 'where'.
-  void splice(instr_iterator where, MachineBasicBlock *Other,
-              instr_iterator From) {
-    Insts.splice(where, Other->Insts, From);
+  /// Take an instruction from MBB 'Other' at the position From, and insert it
+  /// into this MBB right before 'Where'.
+  ///
+  /// If From points to a bundle of instructions, the whole bundle is moved.
+  void splice(iterator Where, MachineBasicBlock *Other, iterator From) {
+    // The range splice() doesn't allow noop moves, but this one does.
+    if (Where != From)
+      splice(Where, Other, From, llvm::next(From));
   }
-  void splice(iterator where, MachineBasicBlock *Other, iterator From);
 
-  /// splice - Take a block of instructions from MBB 'Other' in the range [From,
-  /// To), and insert them into this MBB right before 'where'.
-  void splice(instr_iterator where, MachineBasicBlock *Other, instr_iterator From,
-              instr_iterator To) {
-    Insts.splice(where, Other->Insts, From, To);
-  }
-  void splice(iterator where, MachineBasicBlock *Other, iterator From,
-              iterator To) {
-    Insts.splice(where.getInstrIterator(), Other->Insts,
+  /// Take a block of instructions from MBB 'Other' in the range [From, To),
+  /// and insert them into this MBB right before 'Where'.
+  ///
+  /// The instruction at 'Where' must not be included in the range of
+  /// instructions to move.
+  void splice(iterator Where, MachineBasicBlock *Other,
+              iterator From, iterator To) {
+    Insts.splice(Where.getInstrIterator(), Other->Insts,
                  From.getInstrIterator(), To.getInstrIterator());
   }
 
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 12189ceb7f16..98dd03b45cf7 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -1,4 +1,3 @@
-
 //==- MachineBranchProbabilityInfo.h - Machine Branch Probability Analysis -==//
 //
 //                     The LLVM Compiler Infrastructure
@@ -15,8 +14,8 @@
 #ifndef LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 #include <climits>
 
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 86e8f27877e2..9e41e6e9c1ee 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -19,7 +19,6 @@
 
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
-
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineCodeInfo.h b/include/llvm/CodeGen/MachineCodeInfo.h
index c5c0c4450454..ba9dfab91a7e 100644
--- a/include/llvm/CodeGen/MachineCodeInfo.h
+++ b/include/llvm/CodeGen/MachineCodeInfo.h
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef EE_MACHINE_CODE_INFO_H
-#define EE_MACHINE_CODE_INFO_H
+#ifndef LLVM_CODEGEN_MACHINECODEINFO_H
+#define LLVM_CODEGEN_MACHINECODEINFO_H
 
 #include "llvm/Support/DataTypes.h"
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 82a4ac821b69..e41d206da65c 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H
 #define LLVM_CODEGEN_MACHINEDOMINATORS_H
 
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/DominatorInternals.h"
 
 namespace llvm {
 
@@ -41,15 +41,15 @@ class MachineDominatorTree : public MachineFunctionPass {
 public:
   static char ID; // Pass ID, replacement for typeid
   DominatorTreeBase<MachineBasicBlock>* DT;
-  
+
   MachineDominatorTree();
-  
+
   ~MachineDominatorTree();
-  
+
   DominatorTreeBase<MachineBasicBlock>& getBase() { return *DT; }
-  
+
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  
+
   /// getRoots -  Return the root blocks of the current CFG.  This may include
   /// multiple blocks if we are computing post dominators.  For forward
   /// dominators, this will always be a single block (the entry node).
@@ -57,33 +57,35 @@ public:
   inline const std::vector<MachineBasicBlock*> &getRoots() const {
     return DT->getRoots();
   }
-  
+
   inline MachineBasicBlock *getRoot() const {
     return DT->getRoot();
   }
-  
+
   inline MachineDomTreeNode *getRootNode() const {
     return DT->getRootNode();
   }
-  
+
   virtual bool runOnMachineFunction(MachineFunction &F);
-  
-  inline bool dominates(MachineDomTreeNode* A, MachineDomTreeNode* B) const {
+
+  inline bool dominates(const MachineDomTreeNode* A,
+                        const MachineDomTreeNode* B) const {
     return DT->dominates(A, B);
   }
-  
-  inline bool dominates(MachineBasicBlock* A, MachineBasicBlock* B) const {
+
+  inline bool dominates(const MachineBasicBlock* A,
+                        const MachineBasicBlock* B) const {
     return DT->dominates(A, B);
   }
-  
+
   // dominates - Return true if A dominates B. This performs the
   // special checks necessary if A and B are in the same basic block.
-  bool dominates(MachineInstr *A, MachineInstr *B) const {
-    MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent();
+  bool dominates(const MachineInstr *A, const MachineInstr *B) const {
+    const MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent();
     if (BBA != BBB) return DT->dominates(BBA, BBB);
 
     // Loop through the basic block until we find A or B.
-    MachineBasicBlock::iterator I = BBA->begin();
+    MachineBasicBlock::const_iterator I = BBA->begin();
     for (; &*I != A && &*I != B; ++I)
       /*empty*/ ;
 
@@ -95,43 +97,43 @@ public:
     //  return &*I == B;
     //}
   }
-  
+
   inline bool properlyDominates(const MachineDomTreeNode* A,
-                                MachineDomTreeNode* B) const {
+                                const MachineDomTreeNode* B) const {
     return DT->properlyDominates(A, B);
   }
-  
-  inline bool properlyDominates(MachineBasicBlock* A,
-                                MachineBasicBlock* B) const {
+
+  inline bool properlyDominates(const MachineBasicBlock* A,
+                                const MachineBasicBlock* B) const {
     return DT->properlyDominates(A, B);
   }
-  
+
   /// findNearestCommonDominator - Find nearest common dominator basic block
   /// for basic block A and B. If there is no such block then return NULL.
   inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
                                                        MachineBasicBlock *B) {
     return DT->findNearestCommonDominator(A, B);
   }
-  
+
   inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
     return DT->getNode(BB);
   }
-  
+
   /// getNode - return the (Post)DominatorTree node for the specified basic
   /// block.  This is the same as using operator[] on this class.
   ///
   inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
     return DT->getNode(BB);
   }
-  
+
   /// addNewBlock - Add a new node to the dominator tree information.  This
-  /// creates a new node as a child of DomBB dominator node,linking it into 
+  /// creates a new node as a child of DomBB dominator node,linking it into
   /// the children list of the immediate dominator.
   inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
                                          MachineBasicBlock *DomBB) {
     return DT->addNewBlock(BB, DomBB);
   }
-  
+
   /// changeImmediateDominator - This method is used to update the dominator
   /// tree information when a node's immediate dominator changes.
   ///
@@ -139,19 +141,19 @@ public:
                                        MachineBasicBlock* NewIDom) {
     DT->changeImmediateDominator(N, NewIDom);
   }
-  
+
   inline void changeImmediateDominator(MachineDomTreeNode *N,
                                        MachineDomTreeNode* NewIDom) {
     DT->changeImmediateDominator(N, NewIDom);
   }
-  
+
   /// eraseNode - Removes a node from  the dominator tree. Block must not
   /// dominate any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   inline void eraseNode(MachineBasicBlock *BB) {
     DT->eraseNode(BB);
   }
-  
+
   /// splitBlock - BB is split and now it has one successor. Update dominator
   /// tree to reflect this change.
   inline void splitBlock(MachineBasicBlock* NewBB) {
@@ -160,12 +162,12 @@ public:
 
   /// isReachableFromEntry - Return true if A is dominated by the entry
   /// block of the function containing it.
-  bool isReachableFromEntry(MachineBasicBlock *A) {
+  bool isReachableFromEntry(const MachineBasicBlock *A) {
     return DT->isReachableFromEntry(A);
   }
 
   virtual void releaseMemory();
-  
+
   virtual void print(raw_ostream &OS, const Module*) const;
 };
 
@@ -179,7 +181,7 @@ template<class T> struct GraphTraits;
 template <> struct GraphTraits<MachineDomTreeNode *> {
   typedef MachineDomTreeNode NodeType;
   typedef NodeType::iterator  ChildIteratorType;
-  
+
   static NodeType *getEntryNode(NodeType *N) {
     return N;
   }
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 0e4e132e40d9..cdec7e663708 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -221,8 +221,11 @@ class MachineFrameInfo {
   /// just allocate them normally.
   bool UseLocalStackAllocationBlock;
 
+  /// Whether the "realign-stack" option is on.
+  bool RealignOption;
 public:
-    explicit MachineFrameInfo(const TargetFrameLowering &tfi) : TFI(tfi) {
+    explicit MachineFrameInfo(const TargetFrameLowering &tfi, bool RealignOpt)
+    : TFI(tfi), RealignOption(RealignOpt) {
     StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
     HasVarSizedObjects = false;
     FrameAddressTaken = false;
@@ -416,6 +419,9 @@ public:
   ///
   void setStackSize(uint64_t Size) { StackSize = Size; }
 
+  /// Estimate and return the size of the stack frame.
+  unsigned estimateStackSize(const MachineFunction &MF) const;
+
   /// getOffsetAdjustment - Return the correction for frame offsets.
   ///
   int getOffsetAdjustment() const { return OffsetAdjustment; }
@@ -432,9 +438,7 @@ public:
 
   /// ensureMaxAlignment - Make sure the function is at least Align bytes
   /// aligned.
-  void ensureMaxAlignment(unsigned Align) {
-    if (MaxAlignment < Align) MaxAlignment = Align;
-  }
+  void ensureMaxAlignment(unsigned Align);
 
   /// AdjustsStack - Return true if this function adjusts the stack -- e.g.,
   /// when calling another function. This is only valid during and after
@@ -496,26 +500,13 @@ public:
   /// a nonnegative identifier to represent it.
   ///
   int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
-                        bool MayNeedSP = false, const AllocaInst *Alloca = 0) {
-    assert(Size != 0 && "Cannot allocate zero size stack objects!");
-    Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
-                                  Alloca));
-    int Index = (int)Objects.size() - NumFixedObjects - 1;
-    assert(Index >= 0 && "Bad frame index!");
-    ensureMaxAlignment(Alignment);
-    return Index;
-  }
+                        bool MayNeedSP = false, const AllocaInst *Alloca = 0);
 
   /// CreateSpillStackObject - Create a new statically sized stack object that
   /// represents a spill slot, returning a nonnegative identifier to represent
   /// it.
   ///
-  int CreateSpillStackObject(uint64_t Size, unsigned Alignment) {
-    CreateStackObject(Size, Alignment, true, false);
-    int Index = (int)Objects.size() - NumFixedObjects - 1;
-    ensureMaxAlignment(Alignment);
-    return Index;
-  }
+  int CreateSpillStackObject(uint64_t Size, unsigned Alignment);
 
   /// RemoveStackObject - Remove or mark dead a statically sized stack object.
   ///
@@ -529,12 +520,7 @@ public:
   /// variable sized object is created, whether or not the index returned is
   /// actually used.
   ///
-  int CreateVariableSizedObject(unsigned Alignment) {
-    HasVarSizedObjects = true;
-    Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
-    ensureMaxAlignment(Alignment);
-    return (int)Objects.size()-NumFixedObjects-1;
-  }
+  int CreateVariableSizedObject(unsigned Alignment);
 
   /// getCalleeSavedInfo - Returns a reference to call saved info vector for the
   /// current function.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 025e18a9dde0..82c4cd659840 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -18,10 +18,11 @@
 #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
 #define LLVM_CODEGEN_MACHINEFUNCTION_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/ilist.h"
-#include "llvm/Support/DebugLoc.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/Recycler.h"
 
 namespace llvm {
@@ -105,6 +106,9 @@ class MachineFunction {
   // Allocation management for instructions in function.
   Recycler<MachineInstr> InstructionRecycler;
 
+  // Allocation management for operand arrays on instructions.
+  ArrayRecycler<MachineOperand> OperandRecycler;
+
   // Allocation management for basic blocks in function.
   Recycler<MachineBasicBlock> BasicBlockRecycler;
 
@@ -127,6 +131,9 @@ class MachineFunction {
   /// about the control flow of such functions.
   bool ExposesReturnsTwice;
 
+  /// True if the function includes MS-style inline assembly.
+  bool HasMSInlineAsm;
+
   MachineFunction(const MachineFunction &) LLVM_DELETED_FUNCTION;
   void operator=(const MachineFunction&) LLVM_DELETED_FUNCTION;
 public:
@@ -210,6 +217,17 @@ public:
   void setExposesReturnsTwice(bool B) {
     ExposesReturnsTwice = B;
   }
+
+  /// Returns true if the function contains any MS-style inline assembly.
+  bool hasMSInlineAsm() const {
+    return HasMSInlineAsm;
+  }
+
+  /// Set a flag that indicates that the function contains MS-style inline
+  /// assembly.
+  void setHasMSInlineAsm(bool B) {
+    HasMSInlineAsm = B;
+  }
   
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
@@ -394,6 +412,21 @@ public:
   MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
                                           int64_t Offset, uint64_t Size);
 
+  typedef ArrayRecycler<MachineOperand>::Capacity OperandCapacity;
+
+  /// Allocate an array of MachineOperands. This is only intended for use by
+  /// internal MachineInstr functions.
+  MachineOperand *allocateOperandArray(OperandCapacity Cap) {
+    return OperandRecycler.allocate(Cap, Allocator);
+  }
+
+  /// Dellocate an array of MachineOperands and recycle the memory. This is
+  /// only intended for use by internal MachineInstr functions.
+  /// Cap must be the same capacity that was used to allocate the array.
+  void deallocateOperandArray(OperandCapacity Cap, MachineOperand *Array) {
+    OperandRecycler.deallocate(Cap, Array);
+  }
+
   /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
   /// pointers.  This array is owned by the MachineFunction.
   MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 50ea2062f30c..112f07ea50d7 100644
--- a/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -11,15 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H
-#define LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H
+#ifndef LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS_H
+#define LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS_H
 
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
 class MachineFunction;
+class TargetMachine;
 
 /// MachineFunctionAnalysis - This class is a Pass that manages a
 /// MachineFunction object.
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index b7bf0a36c447..04881e52ca7f 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -16,8 +16,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H
-#define LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H
+#ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H
+#define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H
 
 #include "llvm/Pass.h"
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 7eb03a93012d..195cce7a64d7 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -16,17 +16,18 @@
 #ifndef LLVM_CODEGEN_MACHINEINSTR_H
 #define LLVM_CODEGEN_MACHINEINSTR_H
 
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Target/TargetOpcodes.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ArrayRecycler.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetOpcodes.h"
 #include <vector>
 
 namespace llvm {
@@ -42,6 +43,10 @@ class MachineMemOperand;
 //===----------------------------------------------------------------------===//
 /// MachineInstr - Representation of each machine instruction.
 ///
+/// This class isn't a POD type, but it must have a trivial destructor. When a
+/// MachineFunction is deleted, all the contained MachineInstrs are deallocated
+/// without having their destructor called.
+///
 class MachineInstr : public ilist_node<MachineInstr> {
 public:
   typedef MachineMemOperand **mmo_iterator;
@@ -58,11 +63,18 @@ public:
     NoFlags      = 0,
     FrameSetup   = 1 << 0,              // Instruction is used as a part of
                                         // function frame setup code.
-    InsideBundle = 1 << 1               // Instruction is inside a bundle (not
-                                        // the first MI in a bundle)
+    BundledPred  = 1 << 1,              // Instruction has bundled predecessors.
+    BundledSucc  = 1 << 2               // Instruction has bundled successors.
   };
 private:
   const MCInstrDesc *MCID;              // Instruction descriptor.
+  MachineBasicBlock *Parent;            // Pointer to the owning basic block.
+
+  // Operands are allocated by an ArrayRecycler.
+  MachineOperand *Operands;             // Pointer to the first operand.
+  unsigned NumOperands;                 // Number of operands on instruction.
+  typedef ArrayRecycler<MachineOperand>::Capacity OperandCapacity;
+  OperandCapacity CapOperands;          // Capacity of the Operands array.
 
   uint8_t Flags;                        // Various bits of additional
                                         // information about machine
@@ -75,15 +87,15 @@ private:
                                         // anything other than to convey comment
                                         // information to AsmPrinter.
 
-  uint16_t NumMemRefs;                  // information on memory references
+  uint8_t NumMemRefs;                   // Information on memory references.
   mmo_iterator MemRefs;
 
-  std::vector<MachineOperand> Operands; // the operands
-  MachineBasicBlock *Parent;            // Pointer to the owning basic block.
   DebugLoc debugLoc;                    // Source line information.
 
   MachineInstr(const MachineInstr&) LLVM_DELETED_FUNCTION;
   void operator=(const MachineInstr&) LLVM_DELETED_FUNCTION;
+  // Use MachineFunction::DeleteMachineInstr() instead.
+  ~MachineInstr() LLVM_DELETED_FUNCTION;
 
   // Intrusive list support
   friend struct ilist_traits<MachineInstr>;
@@ -94,22 +106,11 @@ private:
   /// MachineInstr in the given MachineFunction.
   MachineInstr(MachineFunction &, const MachineInstr &);
 
-  /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-  /// MCID NULL and no operands.
-  MachineInstr();
-
   /// MachineInstr ctor - This constructor create a MachineInstr and add the
   /// implicit operands.  It reserves space for number of operands specified by
   /// MCInstrDesc.  An explicit DebugLoc is supplied.
-  MachineInstr(const MCInstrDesc &MCID, const DebugLoc dl, bool NoImp = false);
-
-  /// MachineInstr ctor - Work exactly the same as the ctor above, except that
-  /// the MachineInstr is created and added to the end of the specified basic
-  /// block.
-  MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-               const MCInstrDesc &MCID);
-
-  ~MachineInstr();
+  MachineInstr(MachineFunction&, const MCInstrDesc &MCID,
+               const DebugLoc dl, bool NoImp = false);
 
   // MachineInstrs are pool-allocated and owned by MachineFunction.
   friend class MachineFunction;
@@ -160,7 +161,9 @@ public:
   }
 
   void setFlags(unsigned flags) {
-    Flags = flags;
+    // Filter out the automatically maintained flags.
+    unsigned Mask = BundledPred | BundledSucc;
+    Flags = (Flags & Mask) | (flags & ~Mask);
   }
 
   /// clearFlag - Clear a MI flag.
@@ -205,21 +208,36 @@ public:
   /// The first instruction has the special opcode "BUNDLE". It's not "inside"
   /// a bundle, but the next three MIs are.
   bool isInsideBundle() const {
-    return getFlag(InsideBundle);
-  }
-
-  /// setIsInsideBundle - Set InsideBundle bit.
-  ///
-  void setIsInsideBundle(bool Val = true) {
-    if (Val)
-      setFlag(InsideBundle);
-    else
-      clearFlag(InsideBundle);
+    return getFlag(BundledPred);
   }
 
   /// isBundled - Return true if this instruction part of a bundle. This is true
   /// if either itself or its following instruction is marked "InsideBundle".
-  bool isBundled() const;
+  bool isBundled() const {
+    return isBundledWithPred() || isBundledWithSucc();
+  }
+
+  /// Return true if this instruction is part of a bundle, and it is not the
+  /// first instruction in the bundle.
+  bool isBundledWithPred() const { return getFlag(BundledPred); }
+
+  /// Return true if this instruction is part of a bundle, and it is not the
+  /// last instruction in the bundle.
+  bool isBundledWithSucc() const { return getFlag(BundledSucc); }
+
+  /// Bundle this instruction with its predecessor. This can be an unbundled
+  /// instruction, or it can be the first instruction in a bundle.
+  void bundleWithPred();
+
+  /// Bundle this instruction with its successor. This can be an unbundled
+  /// instruction, or it can be the last instruction in a bundle.
+  void bundleWithSucc();
+
+  /// Break bundle above this instruction.
+  void unbundleFromPred();
+
+  /// Break bundle below this instruction.
+  void unbundleFromSucc();
 
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
@@ -244,7 +262,7 @@ public:
 
   /// Access to explicit operands of the instruction.
   ///
-  unsigned getNumOperands() const { return (unsigned)Operands.size(); }
+  unsigned getNumOperands() const { return NumOperands; }
 
   const MachineOperand& getOperand(unsigned i) const {
     assert(i < getNumOperands() && "getOperand() out of range!");
@@ -260,14 +278,14 @@ public:
   unsigned getNumExplicitOperands() const;
 
   /// iterator/begin/end - Iterate over all operands of a machine instruction.
-  typedef std::vector<MachineOperand>::iterator mop_iterator;
-  typedef std::vector<MachineOperand>::const_iterator const_mop_iterator;
+  typedef MachineOperand *mop_iterator;
+  typedef const MachineOperand *const_mop_iterator;
 
-  mop_iterator operands_begin() { return Operands.begin(); }
-  mop_iterator operands_end() { return Operands.end(); }
+  mop_iterator operands_begin() { return Operands; }
+  mop_iterator operands_end() { return Operands + NumOperands; }
 
-  const_mop_iterator operands_begin() const { return Operands.begin(); }
-  const_mop_iterator operands_end() const { return Operands.end(); }
+  const_mop_iterator operands_begin() const { return Operands; }
+  const_mop_iterator operands_end() const { return Operands + NumOperands; }
 
   /// Access to memory operands of the instruction
   mmo_iterator memoperands_begin() const { return MemRefs; }
@@ -295,11 +313,11 @@ public:
   /// The second argument indicates whether the query should look inside
   /// instruction bundles.
   bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const {
-    // Inline the fast path.
-    if (Type == IgnoreBundle || !isBundle())
+    // Inline the fast path for unbundled or bundle-internal instructions.
+    if (Type == IgnoreBundle || !isBundled() || isBundledWithPred())
       return getDesc().getFlags() & (1 << MCFlag);
 
-    // If we have a bundle, take the slow path.
+    // If this is the first instruction in a bundle, take the slow path.
     return hasPropertyInBundle(1 << MCFlag, Type);
   }
 
@@ -578,14 +596,33 @@ public:
   bool isIdenticalTo(const MachineInstr *Other,
                      MICheckType Check = CheckDefs) const;
 
-  /// removeFromParent - This method unlinks 'this' from the containing basic
-  /// block, and returns it, but does not delete it.
+  /// Unlink 'this' from the containing basic block, and return it without
+  /// deleting it.
+  ///
+  /// This function can not be used on bundled instructions, use
+  /// removeFromBundle() to remove individual instructions from a bundle.
   MachineInstr *removeFromParent();
 
-  /// eraseFromParent - This method unlinks 'this' from the containing basic
-  /// block and deletes it.
+  /// Unlink this instruction from its basic block and return it without
+  /// deleting it.
+  ///
+  /// If the instruction is part of a bundle, the other instructions in the
+  /// bundle remain bundled.
+  MachineInstr *removeFromBundle();
+
+  /// Unlink 'this' from the containing basic block and delete it.
+  ///
+  /// If this instruction is the header of a bundle, the whole bundle is erased.
+  /// This function can not be used for instructions inside a bundle, use
+  /// eraseFromBundle() to erase individual bundled instructions.
   void eraseFromParent();
 
+  /// Unlink 'this' form its basic block and delete it.
+  ///
+  /// If the instruction is part of a bundle, the other instructions in the
+  /// bundle remain bundled.
+  void eraseFromBundle();
+
   /// isLabel - Returns true if the MachineInstr represents a label.
   ///
   bool isLabel() const {
@@ -605,6 +642,9 @@ public:
   bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
   bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
   bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
+  bool isMSInlineAsm() const { 
+    return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
+  }
   bool isStackAligningInlineAsm() const;
   InlineAsm::AsmDialect getInlineAsmDialect() const;
   bool isInsertSubreg() const {
@@ -662,7 +702,11 @@ public:
     }
   }
 
-  /// getBundleSize - Return the number of instructions inside the MI bundle.
+  /// Return the number of instructions inside the MI bundle, excluding the
+  /// bundle header.
+  ///
+  /// This is the number of instructions that MachineBasicBlock::iterator
+  /// skips, 0 for unbundled instructions.
   unsigned getBundleSize() const;
 
   /// readsRegister - Return true if the MachineInstr reads the specified
@@ -821,13 +865,6 @@ public:
   ///
   void clearKillInfo();
 
-  /// copyKillDeadInfo - Copies kill / dead operand properties from MI.
-  ///
-  void copyKillDeadInfo(const MachineInstr *MI);
-
-  /// copyPredicates - Copies predicate operand(s) from MI.
-  void copyPredicates(const MachineInstr *MI);
-
   /// substituteRegister - Replace all occurrences of FromReg with ToReg:SubIdx,
   /// properly composing subreg indices where necessary.
   void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx,
@@ -909,21 +946,35 @@ public:
 
   /// copyImplicitOps - Copy implicit register operands from specified
   /// instruction to this instruction.
-  void copyImplicitOps(const MachineInstr *MI);
+  void copyImplicitOps(MachineFunction &MF, const MachineInstr *MI);
 
   //
   // Debugging support
   //
-  void print(raw_ostream &OS, const TargetMachine *TM = 0) const;
+  void print(raw_ostream &OS, const TargetMachine *TM = 0,
+             bool SkipOpers = false) const;
   void dump() const;
 
   //===--------------------------------------------------------------------===//
   // Accessors used to build up machine instructions.
 
-  /// addOperand - Add the specified operand to the instruction.  If it is an
-  /// implicit operand, it is added to the end of the operand list.  If it is
-  /// an explicit operand it is added at the end of the explicit operand list
+  /// Add the specified operand to the instruction.  If it is an implicit
+  /// operand, it is added to the end of the operand list.  If it is an
+  /// explicit operand it is added at the end of the explicit operand list
   /// (before the first implicit operand).
+  ///
+  /// MF must be the machine function that was used to allocate this
+  /// instruction.
+  ///
+  /// MachineInstrBuilder provides a more convenient interface for creating
+  /// instructions and adding operands.
+  void addOperand(MachineFunction &MF, const MachineOperand &Op);
+
+  /// Add an operand without providing an MF reference. This only works for
+  /// instructions that are inserted in a basic block.
+  ///
+  /// MachineInstrBuilder and the two-argument addOperand(MF, MO) should be
+  /// preferred.
   void addOperand(const MachineOperand &Op);
 
   /// setDesc - Replace the instruction descriptor (thus opcode) of
@@ -950,7 +1001,8 @@ public:
   /// list. This does not transfer ownership.
   void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
     MemRefs = NewMemRefs;
-    NumMemRefs = NewMemRefsEnd - NewMemRefs;
+    NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs);
+    assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
   }
 
 private:
@@ -970,7 +1022,7 @@ private:
 
   /// addImplicitDefUseOperands - Add all implicit def and use operands to
   /// this instruction.
-  void addImplicitDefUseOperands();
+  void addImplicitDefUseOperands(MachineFunction &MF);
 
   /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
   /// this instruction from their respective use lists.  This requires that the
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 770685358aba..92c8da991ca4 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -18,6 +18,7 @@
 #define LLVM_CODEGEN_MACHINEINSTRBUILDER_H
 
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
@@ -42,10 +43,14 @@ namespace RegState {
 }
 
 class MachineInstrBuilder {
+  MachineFunction *MF;
   MachineInstr *MI;
 public:
-  MachineInstrBuilder() : MI(0) {}
-  explicit MachineInstrBuilder(MachineInstr *mi) : MI(mi) {}
+  MachineInstrBuilder() : MF(0), MI(0) {}
+
+  /// Create a MachineInstrBuilder for manipulating an existing instruction.
+  /// F must be the machine function  that was used to allocate I.
+  MachineInstrBuilder(MachineFunction &F, MachineInstr *I) : MF(&F), MI(I) {}
 
   /// Allow automatic conversion to the machine instruction we are working on.
   ///
@@ -60,86 +65,94 @@ public:
                               unsigned SubReg = 0) const {
     assert((flags & 0x1) == 0 &&
            "Passing in 'true' to addReg is forbidden! Use enums instead.");
-    MI->addOperand(MachineOperand::CreateReg(RegNo,
-                                             flags & RegState::Define,
-                                             flags & RegState::Implicit,
-                                             flags & RegState::Kill,
-                                             flags & RegState::Dead,
-                                             flags & RegState::Undef,
-                                             flags & RegState::EarlyClobber,
-                                             SubReg,
-                                             flags & RegState::Debug,
-                                             flags & RegState::InternalRead));
+    MI->addOperand(*MF, MachineOperand::CreateReg(RegNo,
+                                               flags & RegState::Define,
+                                               flags & RegState::Implicit,
+                                               flags & RegState::Kill,
+                                               flags & RegState::Dead,
+                                               flags & RegState::Undef,
+                                               flags & RegState::EarlyClobber,
+                                               SubReg,
+                                               flags & RegState::Debug,
+                                               flags & RegState::InternalRead));
     return *this;
   }
 
   /// addImm - Add a new immediate operand.
   ///
   const MachineInstrBuilder &addImm(int64_t Val) const {
-    MI->addOperand(MachineOperand::CreateImm(Val));
+    MI->addOperand(*MF, MachineOperand::CreateImm(Val));
     return *this;
   }
 
   const MachineInstrBuilder &addCImm(const ConstantInt *Val) const {
-    MI->addOperand(MachineOperand::CreateCImm(Val));
+    MI->addOperand(*MF, MachineOperand::CreateCImm(Val));
     return *this;
   }
 
   const MachineInstrBuilder &addFPImm(const ConstantFP *Val) const {
-    MI->addOperand(MachineOperand::CreateFPImm(Val));
+    MI->addOperand(*MF, MachineOperand::CreateFPImm(Val));
     return *this;
   }
 
   const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB,
                                     unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateMBB(MBB, TargetFlags));
+    MI->addOperand(*MF, MachineOperand::CreateMBB(MBB, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addFrameIndex(int Idx) const {
-    MI->addOperand(MachineOperand::CreateFI(Idx));
+    MI->addOperand(*MF, MachineOperand::CreateFI(Idx));
     return *this;
   }
 
   const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx,
                                                   int Offset = 0,
                                           unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, TargetFlags));
+    MI->addOperand(*MF, MachineOperand::CreateCPI(Idx, Offset, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0,
                                           unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateTargetIndex(Idx, Offset, TargetFlags));
+    MI->addOperand(*MF, MachineOperand::CreateTargetIndex(Idx, Offset,
+                                                          TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addJumpTableIndex(unsigned Idx,
                                           unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateJTI(Idx, TargetFlags));
+    MI->addOperand(*MF, MachineOperand::CreateJTI(Idx, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV,
                                               int64_t Offset = 0,
                                           unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateGA(GV, Offset, TargetFlags));
+    MI->addOperand(*MF, MachineOperand::CreateGA(GV, Offset, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addExternalSymbol(const char *FnName,
                                           unsigned char TargetFlags = 0) const {
-    MI->addOperand(MachineOperand::CreateES(FnName, TargetFlags));
+    MI->addOperand(*MF, MachineOperand::CreateES(FnName, TargetFlags));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addBlockAddress(const BlockAddress *BA,
+                                             int64_t Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(*MF, MachineOperand::CreateBA(BA, Offset, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addRegMask(const uint32_t *Mask) const {
-    MI->addOperand(MachineOperand::CreateRegMask(Mask));
+    MI->addOperand(*MF, MachineOperand::CreateRegMask(Mask));
     return *this;
   }
 
   const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const {
-    MI->addMemOperand(*MI->getParent()->getParent(), MMO);
+    MI->addMemOperand(*MF, MMO);
     return *this;
   }
 
@@ -151,17 +164,17 @@ public:
 
 
   const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
-    MI->addOperand(MO);
+    MI->addOperand(*MF, MO);
     return *this;
   }
 
   const MachineInstrBuilder &addMetadata(const MDNode *MD) const {
-    MI->addOperand(MachineOperand::CreateMetadata(MD));
+    MI->addOperand(*MF, MachineOperand::CreateMetadata(MD));
     return *this;
   }
   
   const MachineInstrBuilder &addSym(MCSymbol *Sym) const {
-    MI->addOperand(MachineOperand::CreateMCSymbol(Sym));
+    MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym));
     return *this;
   }
 
@@ -196,6 +209,12 @@ public:
       }
     }
   }
+
+  /// Copy all the implicit operands from OtherMI onto this one.
+  const MachineInstrBuilder &copyImplicitOps(const MachineInstr *OtherMI) {
+    MI->copyImplicitOps(*MF, OtherMI);
+    return *this;
+  }
 };
 
 /// BuildMI - Builder interface.  Specify how to create the initial instruction
@@ -204,7 +223,7 @@ public:
 inline MachineInstrBuilder BuildMI(MachineFunction &MF,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID) {
-  return MachineInstrBuilder(MF.CreateMachineInstr(MCID, DL));
+  return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL));
 }
 
 /// BuildMI - This version of the builder sets up the first operand as a
@@ -214,7 +233,7 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  return MachineInstrBuilder(MF.CreateMachineInstr(MCID, DL))
+  return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL))
            .addReg(DestReg, RegState::Define);
 }
 
@@ -227,9 +246,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  MachineFunction &MF = *BB.getParent();
+  MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
   BB.insert(I, MI);
-  return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
+  return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
@@ -237,9 +257,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  MachineFunction &MF = *BB.getParent();
+  MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
   BB.insert(I, MI);
-  return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
+  return MachineInstrBuilder(MF, MI).addReg(DestReg, RegState::Define);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
@@ -264,18 +285,20 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID) {
-  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  MachineFunction &MF = *BB.getParent();
+  MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
   BB.insert(I, MI);
-  return MachineInstrBuilder(MI);
+  return MachineInstrBuilder(MF, MI);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::instr_iterator I,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID) {
-  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  MachineFunction &MF = *BB.getParent();
+  MachineInstr *MI = MF.CreateMachineInstr(MCID, DL);
   BB.insert(I, MI);
-  return MachineInstrBuilder(MI);
+  return MachineInstrBuilder(MF, MI);
 }
 
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
@@ -330,6 +353,94 @@ inline unsigned getUndefRegState(bool B) {
 inline unsigned getInternalReadRegState(bool B) {
   return B ? RegState::InternalRead : 0;
 }
+inline unsigned getDebugRegState(bool B) {
+  return B ? RegState::Debug : 0;
+}
+
+
+/// Helper class for constructing bundles of MachineInstrs.
+///
+/// MIBundleBuilder can create a bundle from scratch by inserting new
+/// MachineInstrs one at a time, or it can create a bundle from a sequence of
+/// existing MachineInstrs in a basic block.
+class MIBundleBuilder {
+  MachineBasicBlock &MBB;
+  MachineBasicBlock::instr_iterator Begin;
+  MachineBasicBlock::instr_iterator End;
+
+public:
+  /// Create an MIBundleBuilder that inserts instructions into a new bundle in
+  /// BB above the bundle or instruction at Pos.
+  MIBundleBuilder(MachineBasicBlock &BB,
+                  MachineBasicBlock::iterator Pos)
+    : MBB(BB), Begin(Pos.getInstrIterator()), End(Begin) {}
+
+  /// Create a bundle from the sequence of instructions between B and E.
+  MIBundleBuilder(MachineBasicBlock &BB,
+                  MachineBasicBlock::iterator B,
+                  MachineBasicBlock::iterator E)
+    : MBB(BB), Begin(B.getInstrIterator()), End(E.getInstrIterator()) {
+    assert(B != E && "No instructions to bundle");
+    ++B;
+    while (B != E) {
+      MachineInstr *MI = B;
+      ++B;
+      MI->bundleWithPred();
+    }
+  }
+
+  /// Create an MIBundleBuilder representing an existing instruction or bundle
+  /// that has MI as its head.
+  explicit MIBundleBuilder(MachineInstr *MI)
+    : MBB(*MI->getParent()), Begin(MI), End(getBundleEnd(MI)) {}
+
+  /// Return a reference to the basic block containing this bundle.
+  MachineBasicBlock &getMBB() const { return MBB; }
+
+  /// Return true if no instructions have been inserted in this bundle yet.
+  /// Empty bundles aren't representable in a MachineBasicBlock.
+  bool empty() const { return Begin == End; }
+
+  /// Return an iterator to the first bundled instruction.
+  MachineBasicBlock::instr_iterator begin() const { return Begin; }
+
+  /// Return an iterator beyond the last bundled instruction.
+  MachineBasicBlock::instr_iterator end() const { return End; }
+
+  /// Insert MI into this bundle before I which must point to an instruction in
+  /// the bundle, or end().
+  MIBundleBuilder &insert(MachineBasicBlock::instr_iterator I,
+                          MachineInstr *MI) {
+    MBB.insert(I, MI);
+    if (I == Begin) {
+      if (!empty())
+        MI->bundleWithSucc();
+      Begin = MI;
+      return *this;
+    }
+    if (I == End) {
+      MI->bundleWithPred();
+      return *this;
+    }
+    // MI was inserted in the middle of the bundle, so its neighbors' flags are
+    // already fine. Update MI's bundle flags manually.
+    MI->setFlag(MachineInstr::BundledPred);
+    MI->setFlag(MachineInstr::BundledSucc);
+    return *this;
+  }
+
+  /// Insert MI into MBB by prepending it to the instructions in the bundle.
+  /// MI will become the first instruction in the bundle.
+  MIBundleBuilder &prepend(MachineInstr *MI) {
+    return insert(begin(), MI);
+  }
+
+  /// Insert MI into MBB by appending it to the instructions in the bundle.
+  /// MI will become the last instruction in the bundle.
+  MIBundleBuilder &append(MachineInstr *MI) {
+    return insert(end(), MI);
+  }
+};
 
 } // End llvm namespace
 
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index 854ba06209cd..9519edb3ebae 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -45,18 +45,36 @@ bool finalizeBundles(MachineFunction &MF);
 ///
 inline MachineInstr *getBundleStart(MachineInstr *MI) {
   MachineBasicBlock::instr_iterator I = MI;
-  while (I->isInsideBundle())
+  while (I->isBundledWithPred())
     --I;
   return I;
 }
 
 inline const MachineInstr *getBundleStart(const MachineInstr *MI) {
   MachineBasicBlock::const_instr_iterator I = MI;
-  while (I->isInsideBundle())
+  while (I->isBundledWithPred())
     --I;
   return I;
 }
 
+/// Return an iterator pointing beyond the bundle containing MI.
+inline MachineBasicBlock::instr_iterator
+getBundleEnd(MachineInstr *MI) {
+  MachineBasicBlock::instr_iterator I = MI;
+  while (I->isBundledWithSucc())
+    ++I;
+  return ++I;
+}
+
+/// Return an iterator pointing beyond the bundle containing MI.
+inline MachineBasicBlock::const_instr_iterator
+getBundleEnd(const MachineInstr *MI) {
+  MachineBasicBlock::const_instr_iterator I = MI;
+  while (I->isBundledWithSucc())
+    ++I;
+  return ++I;
+}
+
 //===----------------------------------------------------------------------===//
 // MachineOperand iterator
 //
@@ -149,16 +167,13 @@ public:
   /// PhysRegInfo - Information about a physical register used by a set of
   /// operands.
   struct PhysRegInfo {
-    /// Clobbers - Reg or an overlapping register is defined, or a regmask 
+    /// Clobbers - Reg or an overlapping register is defined, or a regmask
     /// clobbers Reg.
     bool Clobbers;
 
     /// Defines - Reg or a super-register is defined.
     bool Defines;
 
-    /// DefinesOverlap - Reg or an overlapping register is defined.
-    bool DefinesOverlap;
-
     /// Reads - Read or a super-register is read.
     bool Reads;
 
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 928145d279b6..adcd1d0de63d 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -20,8 +20,8 @@
 #ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 
-#include <vector>
 #include <cassert>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index d53f041128ac..b058ecb4c279 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -27,11 +27,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_MACHINE_LOOP_INFO_H
-#define LLVM_CODEGEN_MACHINE_LOOP_INFO_H
+#ifndef LLVM_CODEGEN_MACHINELOOPINFO_H
+#define LLVM_CODEGEN_MACHINELOOPINFO_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineLoopRanges.h b/include/llvm/CodeGen/MachineLoopRanges.h
deleted file mode 100644
index 6a30e8b53c09..000000000000
--- a/include/llvm/CodeGen/MachineLoopRanges.h
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- MachineLoopRanges.h - Ranges of machine loops -----------*- c++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the interface to the MachineLoopRanges analysis.
-//
-// Provide on-demand information about the ranges of machine instructions
-// covered by a loop.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINELOOPRANGES_H
-#define LLVM_CODEGEN_MACHINELOOPRANGES_H
-
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-namespace llvm {
-
-class MachineLoop;
-class MachineLoopInfo;
-class raw_ostream;
-
-/// MachineLoopRange - Range information for a single loop.
-class MachineLoopRange {
-  friend class MachineLoopRanges;
-
-public:
-  typedef IntervalMap<SlotIndex, unsigned, 4> Map;
-  typedef Map::Allocator Allocator;
-
-private:
-  /// The mapped loop.
-  const MachineLoop *const Loop;
-
-  /// Map intervals to a bit mask.
-  /// Bit 0 = inside loop block.
-  Map Intervals;
-
-  /// Loop area as measured by SlotIndex::distance.
-  unsigned Area;
-
-  /// Create a MachineLoopRange, only accessible to MachineLoopRanges.
-  MachineLoopRange(const MachineLoop*, Allocator&, SlotIndexes&);
-
-public:
-  /// getLoop - Return the mapped machine loop.
-  const MachineLoop *getLoop() const { return Loop; }
-
-  /// overlaps - Return true if this loop overlaps the given range of machine
-  /// inteructions.
-  bool overlaps(SlotIndex Start, SlotIndex Stop);
-
-  /// getNumber - Return the loop number. This is the same as the number of the
-  /// header block.
-  unsigned getNumber() const;
-
-  /// getArea - Return the loop area. This number is approximately proportional
-  /// to the number of instructions in the loop.
-  unsigned getArea() const { return Area; }
-
-  /// getMap - Allow public read-only access for IntervalMapOverlaps.
-  const Map &getMap() { return Intervals; }
-
-  /// print - Print loop ranges on OS.
-  void print(raw_ostream&) const;
-
-  /// byNumber - Comparator for array_pod_sort that sorts a list of
-  /// MachineLoopRange pointers by number.
-  static int byNumber(const void*, const void*);
-
-  /// byAreaDesc - Comparator for array_pod_sort that sorts a list of
-  /// MachineLoopRange pointers by descending area, then by number.
-  static int byAreaDesc(const void*, const void*);
-};
-
-raw_ostream &operator<<(raw_ostream&, const MachineLoopRange&);
-
-/// MachineLoopRanges - Analysis pass that provides on-demand per-loop range
-/// information.
-class MachineLoopRanges : public MachineFunctionPass {
-  typedef DenseMap<const MachineLoop*, MachineLoopRange*> CacheMap;
-  typedef MachineLoopRange::Allocator MapAllocator;
-
-  MapAllocator Allocator;
-  SlotIndexes *Indexes;
-  CacheMap Cache;
-
-public:
-  static char ID; // Pass identification, replacement for typeid
-
-  MachineLoopRanges() : MachineFunctionPass(ID), Indexes(0) {}
-  ~MachineLoopRanges() { releaseMemory(); }
-
-  /// getLoopRange - Return the range of loop.
-  MachineLoopRange *getLoopRange(const MachineLoop *Loop);
-
-private:
-  virtual bool runOnMachineFunction(MachineFunction&);
-  virtual void releaseMemory();
-  virtual void getAnalysisUsage(AnalysisUsage&) const;
-};
-
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_MACHINELOOPRANGES_H
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 6b88d4a9499b..a3acec809547 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -31,19 +31,18 @@
 #ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H
 #define LLVM_CODEGEN_MACHINEMODULEINFO_H
 
-#include "llvm/Pass.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Metadata.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
@@ -180,8 +179,9 @@ public:
                     const MCObjectFileInfo *MOFI);
   ~MachineModuleInfo();
 
-  bool doInitialization();
-  bool doFinalization();
+  // Initialization and Finalization
+  virtual bool doInitialization(Module &);
+  virtual bool doFinalization(Module &);
 
   /// EndFunction - Discard function meta information.
   ///
@@ -295,7 +295,7 @@ public:
   /// isUsedFunction - Return true if the functions in the llvm.used list.  This
   /// does not return true for things in llvm.compiler.used unless they are also
   /// in llvm.used.
-  bool isUsedFunction(const Function *F) {
+  bool isUsedFunction(const Function *F) const {
     return UsedFunctions.count(F);
   }
 
@@ -372,7 +372,7 @@ public:
 
   /// getCurrentCallSite - Get the call site currently being processed, if any.
   /// return zero if none.
-  unsigned getCurrentCallSite(void) { return CurCallSite; }
+  unsigned getCurrentCallSite() { return CurCallSite; }
 
   /// getTypeInfos - Return a reference to the C++ typeinfo for the current
   /// function.
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 606833cd4081..414770b9ecf0 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -35,6 +35,11 @@ class MCSymbol;
 
 /// MachineOperand class - Representation of each machine instruction operand.
 ///
+/// This class isn't a POD type because it has a private constructor, but its
+/// destructor must be trivial. Functions like MachineInstr::addOperand(),
+/// MachineRegisterInfo::moveOperands(), and MF::DeleteMachineInstr() depend on
+/// not having to call the MachineOperand destructor.
+///
 class MachineOperand {
 public:
   enum MachineOperandType {
@@ -60,15 +65,11 @@ private:
   /// union.
   unsigned char OpKind; // MachineOperandType
 
-  // This union is discriminated by OpKind.
-  union {
-    /// SubReg - Subregister number, only valid for MO_Register.  A value of 0
-    /// indicates the MO_Register has no subReg.
-    unsigned char SubReg;
-
-    /// TargetFlags - This is a set of target-specific operand flags.
-    unsigned char TargetFlags;
-  };
+  /// Subregister number for MO_Register.  A value of 0 indicates the
+  /// MO_Register has no subReg.
+  ///
+  /// For all other kinds of operands, this field holds target-specific flags.
+  unsigned SubReg_TargetFlags : 12;
 
   /// TiedTo - Non-zero when this register operand is tied to another register
   /// operand. The encoding of this field is described in the block comment
@@ -176,24 +177,25 @@ private:
     } OffsetedInfo;
   } Contents;
 
-  explicit MachineOperand(MachineOperandType K) : OpKind(K), ParentMI(0) {
-    TargetFlags = 0;
-  }
+  explicit MachineOperand(MachineOperandType K)
+    : OpKind(K), SubReg_TargetFlags(0), ParentMI(0) {}
 public:
   /// getType - Returns the MachineOperandType for this operand.
   ///
   MachineOperandType getType() const { return (MachineOperandType)OpKind; }
 
-  unsigned char getTargetFlags() const {
-    return isReg() ? 0 : TargetFlags;
+  unsigned getTargetFlags() const {
+    return isReg() ? 0 : SubReg_TargetFlags;
   }
-  void setTargetFlags(unsigned char F) {
+  void setTargetFlags(unsigned F) {
     assert(!isReg() && "Register operands can't have target flags");
-    TargetFlags = F;
+    SubReg_TargetFlags = F;
+    assert(SubReg_TargetFlags == F && "Target flags out of range");
   }
-  void addTargetFlag(unsigned char F) {
+  void addTargetFlag(unsigned F) {
     assert(!isReg() && "Register operands can't have target flags");
-    TargetFlags |= F;
+    SubReg_TargetFlags |= F;
+    assert((SubReg_TargetFlags & F) && "Target flags out of range");
   }
 
 
@@ -261,7 +263,7 @@ public:
 
   unsigned getSubReg() const {
     assert(isReg() && "Wrong MachineOperand accessor");
-    return (unsigned)SubReg;
+    return SubReg_TargetFlags;
   }
 
   bool isUse() const {
@@ -336,7 +338,8 @@ public:
 
   void setSubReg(unsigned subReg) {
     assert(isReg() && "Wrong MachineOperand accessor");
-    SubReg = (unsigned char)subReg;
+    SubReg_TargetFlags = subReg;
+    assert(SubReg_TargetFlags == subReg && "SubReg out of range");
   }
 
   /// substVirtReg - Substitute the current register with the virtual
@@ -574,7 +577,7 @@ public:
     Op.SmallContents.RegNo = Reg;
     Op.Contents.Reg.Prev = 0;
     Op.Contents.Reg.Next = 0;
-    Op.SubReg = SubReg;
+    Op.setSubReg(SubReg);
     return Op;
   }
   static MachineOperand CreateMBB(MachineBasicBlock *MBB,
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index a9fc8434abee..ca09aef82616 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -15,10 +15,9 @@
 #ifndef LLVM_CODEGEN_MACHINEPOSTDOMINATORS_H
 #define LLVM_CODEGEN_MACHINEPOSTDOMINATORS_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 
@@ -55,26 +54,27 @@ public:
     return DT->getNode(BB);
   }
 
-  bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+  bool dominates(const MachineDomTreeNode *A,
+                 const MachineDomTreeNode *B) const {
     return DT->dominates(A, B);
   }
 
-  bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+  bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
     return DT->dominates(A, B);
   }
 
-  bool
-  properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+  bool properlyDominates(const MachineDomTreeNode *A,
+                         const MachineDomTreeNode *B) const {
     return DT->properlyDominates(A, B);
   }
 
-  bool
-  properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+  bool properlyDominates(const MachineBasicBlock *A,
+                         const MachineBasicBlock *B) const {
     return DT->properlyDominates(A, B);
   }
 
   MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
-                                                       MachineBasicBlock *B) {
+                                                MachineBasicBlock *B) {
     return DT->findNearestCommonDominator(A, B);
   }
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 4e86363f071a..4b43cc10951a 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H
 #define LLVM_CODEGEN_MACHINEREGISTERINFO_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
 namespace llvm {
@@ -99,13 +99,11 @@ class MachineRegisterInfo {
   /// started.
   BitVector ReservedRegs;
 
-  /// LiveIns/LiveOuts - Keep track of the physical registers that are
-  /// livein/liveout of the function.  Live in values are typically arguments in
-  /// registers, live out values are typically return values in registers.
-  /// LiveIn values are allowed to have virtual registers associated with them,
-  /// stored in the second element.
+  /// Keep track of the physical registers that are live in to the function.
+  /// Live in values are typically arguments in registers.  LiveIn values are
+  /// allowed to have virtual registers associated with them, stored in the
+  /// second element.
   std::vector<std::pair<unsigned, unsigned> > LiveIns;
-  std::vector<unsigned> LiveOuts;
 
   MachineRegisterInfo(const MachineRegisterInfo&) LLVM_DELETED_FUNCTION;
   void operator=(const MachineRegisterInfo&) LLVM_DELETED_FUNCTION;
@@ -156,6 +154,9 @@ public:
   // Strictly for use by MachineInstr.cpp.
   void removeRegOperandFromUseList(MachineOperand *MO);
 
+  // Strictly for use by MachineInstr.cpp.
+  void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps);
+
   /// reg_begin/reg_end - Provide iteration support to walk over all definitions
   /// and uses of a register within the MachineFunction that corresponds to this
   /// MachineRegisterInfo object.
@@ -376,6 +377,12 @@ public:
     return false;
   }
 
+  /// Mark the specified register unit as used in this function.
+  /// This should only be called during and after register allocation.
+  void setRegUnitUsed(unsigned RegUnit) {
+    UsedRegUnits.set(RegUnit);
+  }
+
   /// setPhysRegUsed - Mark the specified register used in this function.
   /// This should only be called during and after register allocation.
   void setPhysRegUsed(unsigned Reg) {
@@ -457,30 +464,24 @@ public:
   }
 
   //===--------------------------------------------------------------------===//
-  // LiveIn/LiveOut Management
+  // LiveIn Management
   //===--------------------------------------------------------------------===//
 
-  /// addLiveIn/Out - Add the specified register as a live in/out.  Note that it
+  /// addLiveIn - Add the specified register as a live-in.  Note that it
   /// is an error to add the same register to the same set more than once.
   void addLiveIn(unsigned Reg, unsigned vreg = 0) {
     LiveIns.push_back(std::make_pair(Reg, vreg));
   }
-  void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); }
 
-  // Iteration support for live in/out sets.  These sets are kept in sorted
-  // order by their register number.
+  // Iteration support for the live-ins set.  It's kept in sorted order
+  // by register number.
   typedef std::vector<std::pair<unsigned,unsigned> >::const_iterator
   livein_iterator;
-  typedef std::vector<unsigned>::const_iterator liveout_iterator;
   livein_iterator livein_begin() const { return LiveIns.begin(); }
   livein_iterator livein_end()   const { return LiveIns.end(); }
   bool            livein_empty() const { return LiveIns.empty(); }
-  liveout_iterator liveout_begin() const { return LiveOuts.begin(); }
-  liveout_iterator liveout_end()   const { return LiveOuts.end(); }
-  bool             liveout_empty() const { return LiveOuts.empty(); }
 
   bool isLiveIn(unsigned Reg) const;
-  bool isLiveOut(unsigned Reg) const;
 
   /// getLiveInPhysReg - If VReg is a live-in virtual register, return the
   /// corresponding live-in physical register.
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 31bd606f9320..57febe77464c 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -24,8 +24,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MACHINESCHEDULER_H
-#define MACHINESCHEDULER_H
+#ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
+#define LLVM_CODEGEN_MACHINESCHEDULER_H
 
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/RegisterPressure.h"
@@ -43,6 +43,7 @@ class MachineDominatorTree;
 class MachineLoopInfo;
 class RegisterClassInfo;
 class ScheduleDAGInstrs;
+class SchedDFSResult;
 
 /// MachineSchedContext provides enough context from the MachineScheduler pass
 /// for the target to instantiate a scheduler.
@@ -119,6 +120,9 @@ public:
   /// be scheduled at the bottom.
   virtual SUnit *pickNode(bool &IsTopNode) = 0;
 
+  /// \brief Scheduler callback to notify that a new subtree is scheduled.
+  virtual void scheduleTree(unsigned SubtreeID) {}
+
   /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
   /// instruction and updated scheduled/remaining flags in the DAG nodes.
   virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
@@ -164,6 +168,8 @@ public:
 
   iterator end() { return Queue.end(); }
 
+  ArrayRef<SUnit*> elements() { return Queue; }
+
   iterator find(SUnit *SU) {
     return std::find(Queue.begin(), Queue.end(), SU);
   }
@@ -181,7 +187,7 @@ public:
     return Queue.begin() + idx;
   }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   void dump();
 #endif
 };
@@ -202,6 +208,15 @@ protected:
   RegisterClassInfo *RegClassInfo;
   MachineSchedStrategy *SchedImpl;
 
+  /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
+  /// will be empty.
+  SchedDFSResult *DFSResult;
+  BitVector ScheduledTrees;
+
+  /// Topo - A topological ordering for SUnits which permits fast IsReachable
+  /// and similar queries.
+  ScheduleDAGTopologicalSort Topo;
+
   /// Ordered list of DAG postprocessing steps.
   std::vector<ScheduleDAGMutation*> Mutations;
 
@@ -226,6 +241,10 @@ protected:
   IntervalPressure BotPressure;
   RegPressureTracker BotRPTracker;
 
+  /// Record the next node in a scheduled cluster.
+  const SUnit *NextClusterPred;
+  const SUnit *NextClusterSucc;
+
 #ifndef NDEBUG
   /// The number of instructions scheduled so far. Used to cut off the
   /// scheduler at the point determined by misched-cutoff.
@@ -235,25 +254,33 @@ protected:
 public:
   ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
     ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
-    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
-    RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
-    CurrentBottom(), BotRPTracker(BotPressure) {
+    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), DFSResult(0),
+    Topo(SUnits, &ExitSU), RPTracker(RegPressure), CurrentTop(),
+    TopRPTracker(TopPressure), CurrentBottom(), BotRPTracker(BotPressure),
+    NextClusterPred(NULL), NextClusterSucc(NULL) {
 #ifndef NDEBUG
     NumInstrsScheduled = 0;
 #endif
   }
 
-  virtual ~ScheduleDAGMI() {
-    delete SchedImpl;
-  }
+  virtual ~ScheduleDAGMI();
 
   /// Add a postprocessing step to the DAG builder.
   /// Mutations are applied in the order that they are added after normal DAG
   /// building and before MachineSchedStrategy initialization.
+  ///
+  /// ScheduleDAGMI takes ownership of the Mutation object.
   void addMutation(ScheduleDAGMutation *Mutation) {
     Mutations.push_back(Mutation);
   }
 
+  /// \brief Add a DAG edge to the given SU with the given predecessor
+  /// dependence data.
+  ///
+  /// \returns true if the edge may be added without creating a cycle OR if an
+  /// equivalent edge already existed (false indicates failure).
+  bool addEdge(SUnit *SuccSU, const SDep &PredDep);
+
   MachineBasicBlock::iterator top() const { return CurrentTop; }
   MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
 
@@ -285,6 +312,22 @@ public:
     return RegionCriticalPSets;
   }
 
+  const SUnit *getNextClusterPred() const { return NextClusterPred; }
+
+  const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
+
+  /// Compute a DFSResult after DAG building is complete, and before any
+  /// queue comparisons.
+  void computeDFSResult();
+
+  /// Return a non-null DFS result if the scheduling strategy initialized it.
+  const SchedDFSResult *getDFSResult() const { return DFSResult; }
+
+  BitVector &getScheduledTrees() { return ScheduledTrees; }
+
+  void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
+  void viewGraph() LLVM_OVERRIDE;
+
 protected:
   // Top-Level entry points for the schedule() driver...
 
@@ -298,8 +341,8 @@ protected:
   /// instances of ScheduleDAGMI to perform custom DAG postprocessing.
   void postprocessDAG();
 
-  /// Identify DAG roots and setup scheduler queues.
-  void initQueues();
+  /// Release ExitSU predecessors and setup scheduler queues.
+  void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
 
   /// Move an instruction and update register pressure.
   void scheduleMI(SUnit *SU, bool IsTopNode);
@@ -317,12 +360,13 @@ protected:
 
   void initRegPressure();
 
-  void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
+  void updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure);
 
   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
   bool checkSchedLimit();
 
-  void releaseRoots();
+  void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+                             SmallVectorImpl<SUnit*> &BotRoots);
 
   void releaseSucc(SUnit *SU, SDep *SuccEdge);
   void releaseSuccessors(SUnit *SU);
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
new file mode 100644
index 000000000000..2775a0485821
--- /dev/null
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -0,0 +1,388 @@
+//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the MachineTraceMetrics analysis pass
+// that estimates CPU resource usage and critical data dependency paths through
+// preferred traces. This is useful for super-scalar CPUs where execution speed
+// can be limited both by data dependencies and by limited execution resources.
+//
+// Out-of-order CPUs will often be executing instructions from multiple basic
+// blocks at the same time. This makes it difficult to estimate the resource
+// usage accurately in a single basic block. Resources can be estimated better
+// by looking at a trace through the current basic block.
+//
+// For every block, the MachineTraceMetrics pass will pick a preferred trace
+// that passes through the block. The trace is chosen based on loop structure,
+// branch probabilities, and resource usage. The intention is to pick likely
+// traces that would be the most affected by code transformations.
+//
+// It is expensive to compute a full arbitrary trace for every block, so to
+// save some computations, traces are chosen to be convergent. This means that
+// if the traces through basic blocks A and B ever cross when moving away from
+// A and B, they never diverge again. This applies in both directions - If the
+// traces meet above A and B, they won't diverge when going further back.
+//
+// Traces tend to align with loops. The trace through a block in an inner loop
+// will begin at the loop entry block and end at a back edge. If there are
+// nested loops, the trace may begin and end at those instead.
+//
+// For each trace, we compute the critical path length, which is the number of
+// cycles required to execute the trace when execution is limited by data
+// dependencies only. We also compute the resource height, which is the number
+// of cycles required to execute all instructions in the trace when ignoring
+// data dependencies.
+//
+// Every instruction in the current block has a slack - the number of cycles
+// execution of the instruction can be delayed without extending the critical
+// path.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+class MachineTraceMetrics : public MachineFunctionPass {
+  const MachineFunction *MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+  const MachineLoopInfo *Loops;
+  TargetSchedModel SchedModel;
+
+public:
+  class Ensemble;
+  class Trace;
+  static char ID;
+  MachineTraceMetrics();
+  void getAnalysisUsage(AnalysisUsage&) const;
+  bool runOnMachineFunction(MachineFunction&);
+  void releaseMemory();
+  void verifyAnalysis() const;
+
+  friend class Ensemble;
+  friend class Trace;
+
+  /// Per-basic block information that doesn't depend on the trace through the
+  /// block.
+  struct FixedBlockInfo {
+    /// The number of non-trivial instructions in the block.
+    /// Doesn't count PHI and COPY instructions that are likely to be removed.
+    unsigned InstrCount;
+
+    /// True when the block contains calls.
+    bool HasCalls;
+
+    FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
+
+    /// Returns true when resource information for this block has been computed.
+    bool hasResources() const { return InstrCount != ~0u; }
+
+    /// Invalidate resource information.
+    void invalidate() { InstrCount = ~0u; }
+  };
+
+  /// Get the fixed resource information about MBB. Compute it on demand.
+  const FixedBlockInfo *getResources(const MachineBasicBlock*);
+
+  /// Get the scaled number of cycles used per processor resource in MBB.
+  /// This is an array with SchedModel.getNumProcResourceKinds() entries.
+  /// The getResources() function above must have been called first.
+  ///
+  /// These numbers have already been scaled by SchedModel.getResourceFactor().
+  ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
+
+  /// A virtual register or regunit required by a basic block or its trace
+  /// successors.
+  struct LiveInReg {
+    /// The virtual register required, or a register unit.
+    unsigned Reg;
+
+    /// For virtual registers: Minimum height of the defining instruction.
+    /// For regunits: Height of the highest user in the trace.
+    unsigned Height;
+
+    LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+  };
+
+  /// Per-basic block information that relates to a specific trace through the
+  /// block. Convergent traces means that only one of these is required per
+  /// block in a trace ensemble.
+  struct TraceBlockInfo {
+    /// Trace predecessor, or NULL for the first block in the trace.
+    /// Valid when hasValidDepth().
+    const MachineBasicBlock *Pred;
+
+    /// Trace successor, or NULL for the last block in the trace.
+    /// Valid when hasValidHeight().
+    const MachineBasicBlock *Succ;
+
+    /// The block number of the head of the trace. (When hasValidDepth()).
+    unsigned Head;
+
+    /// The block number of the tail of the trace. (When hasValidHeight()).
+    unsigned Tail;
+
+    /// Accumulated number of instructions in the trace above this block.
+    /// Does not include instructions in this block.
+    unsigned InstrDepth;
+
+    /// Accumulated number of instructions in the trace below this block.
+    /// Includes instructions in this block.
+    unsigned InstrHeight;
+
+    TraceBlockInfo() :
+      Pred(0), Succ(0),
+      InstrDepth(~0u), InstrHeight(~0u),
+      HasValidInstrDepths(false), HasValidInstrHeights(false) {}
+
+    /// Returns true if the depth resources have been computed from the trace
+    /// above this block.
+    bool hasValidDepth() const { return InstrDepth != ~0u; }
+
+    /// Returns true if the height resources have been computed from the trace
+    /// below this block.
+    bool hasValidHeight() const { return InstrHeight != ~0u; }
+
+    /// Invalidate depth resources when some block above this one has changed.
+    void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
+
+    /// Invalidate height resources when a block below this one has changed.
+    void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+
+    /// Assuming that this is a dominator of TBI, determine if it contains
+    /// useful instruction depths. A dominating block can be above the current
+    /// trace head, and any dependencies from such a far away dominator are not
+    /// expected to affect the critical path.
+    ///
+    /// Also returns true when TBI == this.
+    bool isUsefulDominator(const TraceBlockInfo &TBI) const {
+      // The trace for TBI may not even be calculated yet.
+      if (!hasValidDepth() || !TBI.hasValidDepth())
+        return false;
+      // Instruction depths are only comparable if the traces share a head.
+      if (Head != TBI.Head)
+        return false;
+      // It is almost always the case that TBI belongs to the same trace as
+      // this block, but rare convoluted cases involving irreducible control
+      // flow, a dominator may share a trace head without actually being on the
+      // same trace as TBI. This is not a big problem as long as it doesn't
+      // increase the instruction depth.
+      return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
+    }
+
+    // Data-dependency-related information. Per-instruction depth and height
+    // are computed from data dependencies in the current trace, using
+    // itinerary data.
+
+    /// Instruction depths have been computed. This implies hasValidDepth().
+    bool HasValidInstrDepths;
+
+    /// Instruction heights have been computed. This implies hasValidHeight().
+    bool HasValidInstrHeights;
+
+    /// Critical path length. This is the number of cycles in the longest data
+    /// dependency chain through the trace. This is only valid when both
+    /// HasValidInstrDepths and HasValidInstrHeights are set.
+    unsigned CriticalPath;
+
+    /// Live-in registers. These registers are defined above the current block
+    /// and used by this block or a block below it.
+    /// This does not include PHI uses in the current block, but it does
+    /// include PHI uses in deeper blocks.
+    SmallVector<LiveInReg, 4> LiveIns;
+
+    void print(raw_ostream&) const;
+  };
+
+  /// InstrCycles represents the cycle height and depth of an instruction in a
+  /// trace.
+  struct InstrCycles {
+    /// Earliest issue cycle as determined by data dependencies and instruction
+    /// latencies from the beginning of the trace. Data dependencies from
+    /// before the trace are not included.
+    unsigned Depth;
+
+    /// Minimum number of cycles from this instruction is issued to the of the
+    /// trace, as determined by data dependencies and instruction latencies.
+    unsigned Height;
+  };
+
+  /// A trace represents a plausible sequence of executed basic blocks that
+  /// passes through the current basic block one. The Trace class serves as a
+  /// handle to internal cached data structures.
+  class Trace {
+    Ensemble &TE;
+    TraceBlockInfo &TBI;
+
+    unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
+
+  public:
+    explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
+    void print(raw_ostream&) const;
+
+    /// Compute the total number of instructions in the trace.
+    unsigned getInstrCount() const {
+      return TBI.InstrDepth + TBI.InstrHeight;
+    }
+
+    /// Return the resource depth of the top/bottom of the trace center block.
+    /// This is the number of cycles required to execute all instructions from
+    /// the trace head to the trace center block. The resource depth only
+    /// considers execution resources, it ignores data dependencies.
+    /// When Bottom is set, instructions in the trace center block are included.
+    unsigned getResourceDepth(bool Bottom) const;
+
+    /// Return the resource length of the trace. This is the number of cycles
+    /// required to execute the instructions in the trace if they were all
+    /// independent, exposing the maximum instruction-level parallelism.
+    ///
+    /// Any blocks in Extrablocks are included as if they were part of the
+    /// trace.
+    unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
+                               ArrayRef<const MachineBasicBlock*>()) const;
+
+    /// Return the length of the (data dependency) critical path through the
+    /// trace.
+    unsigned getCriticalPath() const { return TBI.CriticalPath; }
+
+    /// Return the depth and height of MI. The depth is only valid for
+    /// instructions in or above the trace center block. The height is only
+    /// valid for instructions in or below the trace center block.
+    InstrCycles getInstrCycles(const MachineInstr *MI) const {
+      return TE.Cycles.lookup(MI);
+    }
+
+    /// Return the slack of MI. This is the number of cycles MI can be delayed
+    /// before the critical path becomes longer.
+    /// MI must be an instruction in the trace center block.
+    unsigned getInstrSlack(const MachineInstr *MI) const;
+
+    /// Return the Depth of a PHI instruction in a trace center block successor.
+    /// The PHI does not have to be part of the trace.
+    unsigned getPHIDepth(const MachineInstr *PHI) const;
+  };
+
+  /// A trace ensemble is a collection of traces selected using the same
+  /// strategy, for example 'minimum resource height'. There is one trace for
+  /// every block in the function.
+  class Ensemble {
+    SmallVector<TraceBlockInfo, 4> BlockInfo;
+    DenseMap<const MachineInstr*, InstrCycles> Cycles;
+    SmallVector<unsigned, 0> ProcResourceDepths;
+    SmallVector<unsigned, 0> ProcResourceHeights;
+    friend class Trace;
+
+    void computeTrace(const MachineBasicBlock*);
+    void computeDepthResources(const MachineBasicBlock*);
+    void computeHeightResources(const MachineBasicBlock*);
+    unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
+    void computeInstrDepths(const MachineBasicBlock*);
+    void computeInstrHeights(const MachineBasicBlock*);
+    void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
+                    ArrayRef<const MachineBasicBlock*> Trace);
+
+  protected:
+    MachineTraceMetrics &MTM;
+    virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
+    virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
+    explicit Ensemble(MachineTraceMetrics*);
+    const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
+    const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
+    const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+    ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
+    ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
+
+  public:
+    virtual ~Ensemble();
+    virtual const char *getName() const =0;
+    void print(raw_ostream&) const;
+    void invalidate(const MachineBasicBlock *MBB);
+    void verify() const;
+
+    /// Get the trace that passes through MBB.
+    /// The trace is computed on demand.
+    Trace getTrace(const MachineBasicBlock *MBB);
+  };
+
+  /// Strategies for selecting traces.
+  enum Strategy {
+    /// Select the trace through a block that has the fewest instructions.
+    TS_MinInstrCount,
+
+    TS_NumStrategies
+  };
+
+  /// Get the trace ensemble representing the given trace selection strategy.
+  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
+  /// and valid for the lifetime of the analysis pass.
+  Ensemble *getEnsemble(Strategy);
+
+  /// Invalidate cached information about MBB. This must be called *before* MBB
+  /// is erased, or the CFG is otherwise changed.
+  ///
+  /// This invalidates per-block information about resource usage for MBB only,
+  /// and it invalidates per-trace information for any trace that passes
+  /// through MBB.
+  ///
+  /// Call Ensemble::getTrace() again to update any trace handles.
+  void invalidate(const MachineBasicBlock *MBB);
+
+private:
+  // One entry per basic block, indexed by block number.
+  SmallVector<FixedBlockInfo, 4> BlockInfo;
+
+  // Cycles consumed on each processor resource per block.
+  // The number of processor resource kinds is constant for a given subtarget,
+  // but it is not known at compile time. The number of cycles consumed by
+  // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
+  // where Kinds = SchedModel.getNumProcResourceKinds().
+  SmallVector<unsigned, 0> ProcResourceCycles;
+
+  // One ensemble per strategy.
+  Ensemble* Ensembles[TS_NumStrategies];
+
+  // Convert scaled resource usage to a cycle count that can be compared with
+  // latencies.
+  unsigned getCycles(unsigned Scaled) {
+    unsigned Factor = SchedModel.getLatencyFactor();
+    return (Scaled + Factor - 1) / Factor;
+  }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const MachineTraceMetrics::Trace &Tr) {
+  Tr.print(OS);
+  return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const MachineTraceMetrics::Ensemble &En) {
+  En.print(OS);
+  return OS;
+}
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index 83c379b48cba..85bf511d6022 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -16,10 +16,10 @@
 #define LLVM_CODEGEN_PBQP_GRAPH_H
 
 #include "Math.h"
-
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
 #include <list>
 #include <map>
-#include <llvm/ADT/ilist.h>
 
 namespace PBQP {
 
diff --git a/include/llvm/CodeGen/PBQP/HeuristicSolver.h b/include/llvm/CodeGen/PBQP/HeuristicSolver.h
index 35514f967478..47e15b27e744 100644
--- a/include/llvm/CodeGen/PBQP/HeuristicSolver.h
+++ b/include/llvm/CodeGen/PBQP/HeuristicSolver.h
@@ -18,8 +18,8 @@
 
 #include "Graph.h"
 #include "Solution.h"
-#include <vector>
 #include <limits>
+#include <vector>
 
 namespace PBQP {
 
diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
index a859e5899f06..307d81e1d161 100644
--- a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
@@ -18,9 +18,8 @@
 #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
 #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
 
-#include "../HeuristicSolver.h"
 #include "../HeuristicBase.h"
-
+#include "../HeuristicSolver.h"
 #include <limits>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index e7598bf3e3f1..08f8b981ae27 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_PBQP_MATH_H 
+#ifndef LLVM_CODEGEN_PBQP_MATH_H
 #define LLVM_CODEGEN_PBQP_MATH_H
 
-#include <cassert>
 #include <algorithm>
+#include <cassert>
 #include <functional>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 57d9b95fc3b1..b9f288bbeeb4 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -14,9 +14,8 @@
 #ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
 #define LLVM_CODEGEN_PBQP_SOLUTION_H
 
-#include "Math.h"
 #include "Graph.h"
-
+#include "Math.h"
 #include <map>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 7bd576494ef7..fc8aa75ddfeb 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -25,6 +25,7 @@ namespace llvm {
   class MachineFunctionPass;
   class PassInfo;
   class PassManagerBase;
+  class TargetLoweringBase;
   class TargetLowering;
   class TargetRegisterClass;
   class raw_ostream;
@@ -141,6 +142,10 @@ public:
   /// Add passes to lower exception handling for the code generator.
   void addPassesToHandleExceptions();
 
+  /// Add pass to prepare the LLVM IR for code generation. This should be done
+  /// before exception handling preparation passes.
+  virtual void addCodeGenPrepare();
+
   /// Add common passes that perform LLVM IR to IR transforms in preparation for
   /// instruction selection.
   virtual void addISelPrepare();
@@ -176,6 +181,16 @@ protected:
   /// instructions in SSA form.
   virtual void addMachineSSAOptimization();
 
+  /// Add passes that optimize instruction level parallelism for out-of-order
+  /// targets. These passes are run while the machine code is still in SSA
+  /// form, so they can use MachineTraceMetrics to control their heuristics.
+  ///
+  /// All passes added here should preserve the MachineDominatorTree,
+  /// MachineLoopInfo, and MachineTraceMetrics analyses.
+  virtual bool addILPOpts() {
+    return false;
+  }
+
   /// addPreRegAlloc - This method may be implemented by targets that want to
   /// run passes immediately before register allocation. This should return
   /// true if -print-machineinstrs should print after these passes.
@@ -237,6 +252,11 @@ protected:
     return false;
   }
 
+  /// addGCPasses - Add late codegen passes that analyze code for garbage
+  /// collection. This should return true if GC info should be printed after
+  /// these passes.
+  virtual bool addGCPasses();
+
   /// Add standard basic block placement passes.
   virtual void addBlockPlacement();
 
@@ -271,6 +291,13 @@ protected:
 
 /// List of target independent CodeGen pass IDs.
 namespace llvm {
+  /// \brief Create a basic TargetTransformInfo analysis pass.
+  ///
+  /// This pass implements the target transform info analysis using the target
+  /// independent information available to the LLVM code generator.
+  ImmutablePass *
+  createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI);
+
   /// createUnreachableBlockEliminationPass - The LLVM code generator does not
   /// work well with unreachable basic blocks (what live ranges make sense for a
   /// block that cannot be reached?).  As such, a code generator should either
@@ -288,9 +315,6 @@ namespace llvm {
   /// MachineLoopInfo - This pass is a loop analysis pass.
   extern char &MachineLoopInfoID;
 
-  /// MachineLoopRanges - This pass is an on-demand loop coverage analysis.
-  extern char &MachineLoopRangesID;
-
   /// MachineDominators - This pass is a machine dominators analysis pass.
   extern char &MachineDominatorsID;
 
@@ -420,10 +444,6 @@ namespace llvm {
   /// information.
   extern char &MachineBlockPlacementStatsID;
 
-  /// Code Placement - This pass optimize code placement and aligns loop
-  /// headers to target specific alignment boundary.
-  extern char &CodePlacementOptID;
-
   /// GCLowering Pass - Performs target-independent LLVM IR transformations for
   /// highly portable strategies.
   ///
@@ -435,10 +455,6 @@ namespace llvm {
   /// branch folding).
   extern char &GCMachineCodeAnalysisID;
 
-  /// Deleter Pass - Releases GC metadata.
-  ///
-  FunctionPass *createGCInfoDeleter();
-
   /// Creates a pass to print GC metadata.
   ///
   FunctionPass *createGCInfoPrinter(raw_ostream &OS);
@@ -469,7 +485,7 @@ namespace llvm {
 
   /// createStackProtectorPass - This pass adds stack protectors to functions.
   ///
-  FunctionPass *createStackProtectorPass(const TargetLowering *tli);
+  FunctionPass *createStackProtectorPass(const TargetLoweringBase *tli);
 
   /// createMachineVerifierPass - This pass verifies cenerated machine code
   /// instructions for correctness.
@@ -483,7 +499,7 @@ namespace llvm {
   /// createSjLjEHPreparePass - This pass adapts exception handling code to use
   /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
   ///
-  FunctionPass *createSjLjEHPreparePass(const TargetLowering *tli);
+  FunctionPass *createSjLjEHPreparePass(const TargetLoweringBase *tli);
 
   /// LocalStackSlotAllocation - This pass assigns local frame indices to stack
   /// slots relative to one another and allocates base registers to access them
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index 8f52d3bf47d2..df74d08888bb 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
 #define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
 
-#include "llvm/Value.h"
+#include "llvm/IR/Value.h"
 
 namespace llvm {
   class MachineFrameInfo;
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index acfc07dd31a2..b617c145585c 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -20,7 +20,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/PBQP/Graph.h"
 #include "llvm/CodeGen/PBQP/Solution.h"
-
 #include <map>
 #include <set>
 
@@ -29,6 +28,7 @@ namespace llvm {
   class LiveIntervals;
   class MachineFunction;
   class MachineLoopInfo;
+  class TargetRegisterInfo;
 
   /// This class wraps up a PBQP instance representing a register allocation
   /// problem, plus the structures necessary to map back from the PBQP solution
diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h
index 100e357654fb..ca495778446f 100644
--- a/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/include/llvm/CodeGen/RegAllocRegistry.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGENREGALLOCREGISTRY_H
-#define LLVM_CODEGENREGALLOCREGISTRY_H
+#ifndef LLVM_CODEGEN_REGALLOCREGISTRY_H
+#define LLVM_CODEGEN_REGALLOCREGISTRY_H
 
 #include "llvm/CodeGen/MachinePassRegistry.h"
 
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 4467b62f2370..3ad22e65c8c7 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -29,10 +29,15 @@ class RegisterClassInfo {
     unsigned Tag;
     unsigned NumRegs;
     bool ProperSubClass;
-    OwningArrayPtr<unsigned> Order;
+    uint8_t MinCost;
+    uint16_t LastCostChange;
+    OwningArrayPtr<MCPhysReg> Order;
 
-    RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {}
-    operator ArrayRef<unsigned>() const {
+    RCInfo()
+      : Tag(0), NumRegs(0), ProperSubClass(false), MinCost(0),
+        LastCostChange(0) {}
+
+    operator ArrayRef<MCPhysReg>() const {
       return makeArrayRef(Order.get(), NumRegs);
     }
   };
@@ -84,7 +89,7 @@ public:
   /// getOrder - Returns the preferred allocation order for RC. The order
   /// contains no reserved registers, and registers that alias callee saved
   /// registers come last.
-  ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
+  ArrayRef<MCPhysReg> getOrder(const TargetRegisterClass *RC) const {
     return get(RC);
   }
 
@@ -106,6 +111,21 @@ public:
       return CalleeSaved[N-1];
     return 0;
   }
+
+  /// Get the minimum register cost in RC's allocation order.
+  /// This is the smallest value returned by TRI->getCostPerUse(Reg) for all
+  /// the registers in getOrder(RC).
+  unsigned getMinCost(const TargetRegisterClass *RC) {
+    return get(RC).MinCost;
+  }
+
+  /// Get the position of the last cost change in getOrder(RC).
+  ///
+  /// All registers in getOrder(RC).slice(getLastCostChange(RC)) will have the
+  /// same cost according to TRI->getCostPerUse().
+  unsigned getLastCostChange(const TargetRegisterClass *RC) {
+    return get(RC).LastCostChange;
+  }
 };
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 30326d05df04..267018074677 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -15,13 +15,14 @@
 #ifndef LLVM_CODEGEN_REGISTERPRESSURE_H
 #define LLVM_CODEGEN_REGISTERPRESSURE_H
 
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SparseSet.h"
 
 namespace llvm {
 
 class LiveIntervals;
+class LiveInterval;
 class RegisterClassInfo;
 class MachineInstr;
 
@@ -30,18 +31,24 @@ struct RegisterPressure {
   /// Map of max reg pressure indexed by pressure set ID, not class ID.
   std::vector<unsigned> MaxSetPressure;
 
-  /// List of live in registers.
+  /// List of live in virtual registers or physical register units.
   SmallVector<unsigned,8> LiveInRegs;
   SmallVector<unsigned,8> LiveOutRegs;
 
   /// Increase register pressure for each pressure set impacted by this register
   /// class. Normally called by RegPressureTracker, but may be called manually
   /// to account for live through (global liveness).
-  void increase(const TargetRegisterClass *RC, const TargetRegisterInfo *TRI);
+  ///
+  /// \param Reg is either a virtual register number or register unit number.
+  void increase(unsigned Reg, const TargetRegisterInfo *TRI,
+                const MachineRegisterInfo *MRI);
 
   /// Decrease register pressure for each pressure set impacted by this register
   /// class. This is only useful to account for spilling or rematerialization.
-  void decrease(const TargetRegisterClass *RC, const TargetRegisterInfo *TRI);
+  ///
+  /// \param Reg is either a virtual register number or register unit number.
+  void decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+                const MachineRegisterInfo *MRI);
 
   void dump(const TargetRegisterInfo *TRI) const;
 };
@@ -116,6 +123,33 @@ struct RegPressureDelta {
   RegPressureDelta() {}
 };
 
+/// \brief A set of live virtual registers and physical register units.
+///
+/// Virtual and physical register numbers require separate sparse sets, but most
+/// of the RegisterPressureTracker handles them uniformly.
+struct LiveRegSet {
+  SparseSet<unsigned> PhysRegs;
+  SparseSet<unsigned, VirtReg2IndexFunctor> VirtRegs;
+
+  bool contains(unsigned Reg) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return VirtRegs.count(Reg);
+    return PhysRegs.count(Reg);
+  }
+
+  bool insert(unsigned Reg) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return VirtRegs.insert(Reg).second;
+    return PhysRegs.insert(Reg).second;
+  }
+
+  bool erase(unsigned Reg) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return VirtRegs.erase(Reg);
+    return PhysRegs.erase(Reg);
+  }
+};
+
 /// Track the current register pressure at some position in the instruction
 /// stream, and remember the high water mark within the region traversed. This
 /// does not automatically consider live-through ranges. The client may
@@ -150,15 +184,15 @@ class RegPressureTracker {
   bool RequireIntervals;
 
   /// Register pressure corresponds to liveness before this instruction
-  /// iterator. It may point to the end of the block rather than an instruction.
+  /// iterator. It may point to the end of the block or a DebugValue rather than
+  /// an instruction.
   MachineBasicBlock::const_iterator CurrPos;
 
   /// Pressure map indexed by pressure set ID, not class ID.
   std::vector<unsigned> CurrSetPressure;
 
-  /// List of live registers.
-  SparseSet<unsigned> LivePhysRegs;
-  SparseSet<unsigned, VirtReg2IndexFunctor> LiveVirtRegs;
+  /// Set of live registers.
+  LiveRegSet LiveRegs;
 
 public:
   RegPressureTracker(IntervalPressure &rp) :
@@ -171,8 +205,9 @@ public:
             const LiveIntervals *lis, const MachineBasicBlock *mbb,
             MachineBasicBlock::const_iterator pos);
 
-  /// Force liveness of registers. Particularly useful to initialize the
-  /// livein/out state of the tracker before the first call to advance/recede.
+  /// Force liveness of virtual registers or physical register
+  /// units. Particularly useful to initialize the livein/out state of the
+  /// tracker before the first call to advance/recede.
   void addLiveRegs(ArrayRef<unsigned> Regs);
 
   /// Get the MI position corresponding to this register pressure.
@@ -184,6 +219,10 @@ public:
   // position changes while pressure does not.
   void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
 
+  /// \brief Get the SlotIndex for the first nondebug instruction including or
+  /// after the current position.
+  SlotIndex getCurrSlot() const;
+
   /// Recede across the previous instruction.
   bool recede();
 
@@ -203,11 +242,8 @@ public:
   /// than the pressure across the traversed region.
   std::vector<unsigned> &getRegSetPressureAtPos() { return CurrSetPressure; }
 
-  void discoverPhysLiveIn(unsigned Reg);
-  void discoverPhysLiveOut(unsigned Reg);
-
-  void discoverVirtLiveIn(unsigned Reg);
-  void discoverVirtLiveOut(unsigned Reg);
+  void discoverLiveOut(unsigned Reg);
+  void discoverLiveIn(unsigned Reg);
 
   bool isTopClosed() const;
   bool isBottomClosed() const;
@@ -268,12 +304,13 @@ public:
     return getDownwardPressure(MI, PressureResult, MaxPressureResult);
   }
 
+  void dump() const;
+
 protected:
-  void increasePhysRegPressure(ArrayRef<unsigned> Regs);
-  void decreasePhysRegPressure(ArrayRef<unsigned> Regs);
+  const LiveInterval *getInterval(unsigned Reg) const;
 
-  void increaseVirtRegPressure(ArrayRef<unsigned> Regs);
-  void decreaseVirtRegPressure(ArrayRef<unsigned> Regs);
+  void increaseRegPressure(ArrayRef<unsigned> Regs);
+  void decreaseRegPressure(ArrayRef<unsigned> Regs);
 
   void bumpUpwardPressure(const MachineInstr *MI);
   void bumpDownwardPressure(const MachineInstr *MI);
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 08d316992ec5..95bf29167c20 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -14,12 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_REGISTER_SCAVENGING_H
-#define LLVM_CODEGEN_REGISTER_SCAVENGING_H
+#ifndef LLVM_CODEGEN_REGISTERSCAVENGING_H
+#define LLVM_CODEGEN_REGISTERSCAVENGING_H
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
 
 namespace llvm {
 
@@ -40,21 +40,23 @@ class RegScavenger {
   /// registers.
   bool Tracking;
 
-  /// ScavengingFrameIndex - Special spill slot used for scavenging a register
-  /// post register allocation.
-  int ScavengingFrameIndex;
+  /// Information on scavenged registers (held in a spill slot).
+  struct ScavengedInfo {
+    ScavengedInfo(int FI = -1) : FrameIndex(FI), Reg(0), Restore(NULL) {}
 
-  /// ScavengedReg - If none zero, the specific register is currently being
-  /// scavenged. That is, it is spilled to the special scavenging stack slot.
-  unsigned ScavengedReg;
+    /// A spill slot used for scavenging a register post register allocation.
+    int FrameIndex;
 
-  /// ScavengedRC - Register class of the scavenged register.
-  ///
-  const TargetRegisterClass *ScavengedRC;
+    /// If non-zero, the specific register is currently being
+    /// scavenged. That is, it is spilled to this scavenging stack slot.
+    unsigned Reg;
 
-  /// ScavengeRestore - Instruction that restores the scavenged register from
-  /// stack.
-  const MachineInstr *ScavengeRestore;
+    /// The instruction that restores the scavenged register from stack.
+    const MachineInstr *Restore;
+  };
+
+  /// A vector of information on scavenged registers.
+  SmallVector<ScavengedInfo, 2> Scavenged;
 
   /// CalleeSavedrRegs - A bitvector of callee saved registers for the target.
   ///
@@ -71,8 +73,7 @@ class RegScavenger {
 
 public:
   RegScavenger()
-    : MBB(NULL), NumPhysRegs(0), Tracking(false),
-      ScavengingFrameIndex(-1), ScavengedReg(0), ScavengedRC(NULL) {}
+    : MBB(NULL), NumPhysRegs(0), Tracking(false) {}
 
   /// enterBasicBlock - Start tracking liveness from the begin of the specific
   /// basic block.
@@ -92,9 +93,25 @@ public:
     while (MBBI != I) forward();
   }
 
+  /// Invert the behavior of forward() on the current instruction (undo the
+  /// changes to the available registers made by forward()).
+  void unprocess();
+
+  /// Unprocess instructions until you reach the provided iterator.
+  void unprocess(MachineBasicBlock::iterator I) {
+    while (MBBI != I) unprocess();
+  }
+
   /// skipTo - Move the internal MBB iterator but do not update register states.
-  ///
-  void skipTo(MachineBasicBlock::iterator I) { MBBI = I; }
+  void skipTo(MachineBasicBlock::iterator I) {
+    if (I == MachineBasicBlock::iterator(NULL))
+      Tracking = false;
+    MBBI = I;
+  }
+
+  MachineBasicBlock::iterator getCurrentPosition() const {
+    return MBBI;
+  }
 
   /// getRegsUsed - return all registers currently in use in used.
   void getRegsUsed(BitVector &used, bool includeReserved);
@@ -107,10 +124,28 @@ public:
   /// Return 0 if none is found.
   unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const;
 
-  /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of
-  /// ScavengingFrameIndex.
-  void setScavengingFrameIndex(int FI) { ScavengingFrameIndex = FI; }
-  int getScavengingFrameIndex() const { return ScavengingFrameIndex; }
+  /// Add a scavenging frame index.
+  void addScavengingFrameIndex(int FI) {
+    Scavenged.push_back(ScavengedInfo(FI));
+  }
+
+  /// Query whether a frame index is a scavenging frame index.
+  bool isScavengingFrameIndex(int FI) const {
+    for (SmallVector<ScavengedInfo, 2>::const_iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I)
+      if (I->FrameIndex == FI)
+        return true;
+
+    return false;
+  }
+
+  /// Get an array of scavenging frame indices.
+  void getScavengingFrameIndices(SmallVectorImpl<int> &A) const {
+    for (SmallVector<ScavengedInfo, 2>::const_iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I)
+      if (I->FrameIndex >= 0)
+        A.push_back(I->FrameIndex);
+  }
 
   /// scavengeRegister - Make a register of the specific register class
   /// available and do the appropriate bookkeeping. SPAdj is the stack
@@ -129,10 +164,12 @@ private:
   /// isReserved - Returns true if a register is reserved. It is never "unused".
   bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
 
-  /// isUsed / isUnused - Test if a register is currently being used.
+  /// isUsed - Test if a register is currently being used.  When called by the
+  /// isAliasUsed function, we only check isReserved if this is the original
+  /// register, not an alias register.
   ///
-  bool isUsed(unsigned Reg) const   {
-    return !RegsAvailable.test(Reg) || isReserved(Reg);
+  bool isUsed(unsigned Reg, bool CheckReserved = true) const   {
+    return !RegsAvailable.test(Reg) || (CheckReserved && isReserved(Reg));
   }
 
   /// isAliasUsed - Is Reg or an alias currently in use?
@@ -147,6 +184,10 @@ private:
     RegsAvailable |= Regs;
   }
 
+  /// Processes the current instruction and fill the KillRegs and DefRegs bit
+  /// vectors.
+  void determineKillsAndDefs();
+
   /// Add Reg and all its sub-registers to BV.
   void addRegWithSubRegs(BitVector &BV, unsigned Reg);
 
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 56b5855c01c9..f20a9fce2ae8 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -14,12 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef RESOURCE_PRIORITY_QUEUE_H
-#define RESOURCE_PRIORITY_QUEUE_H
+#ifndef LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H
+#define LLVM_CODEGEN_RESOURCEPRIORITYQUEUE_H
 
 #include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 4bfd4ab530d1..41289a42c438 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -86,100 +86,131 @@ namespace RTLIB {
     ADD_F32,
     ADD_F64,
     ADD_F80,
+    ADD_F128,
     ADD_PPCF128,
     SUB_F32,
     SUB_F64,
     SUB_F80,
+    SUB_F128,
     SUB_PPCF128,
     MUL_F32,
     MUL_F64,
     MUL_F80,
+    MUL_F128,
     MUL_PPCF128,
     DIV_F32,
     DIV_F64,
     DIV_F80,
+    DIV_F128,
     DIV_PPCF128,
     REM_F32,
     REM_F64,
     REM_F80,
+    REM_F128,
     REM_PPCF128,
     FMA_F32,
     FMA_F64,
     FMA_F80,
+    FMA_F128,
     FMA_PPCF128,
     POWI_F32,
     POWI_F64,
     POWI_F80,
+    POWI_F128,
     POWI_PPCF128,
     SQRT_F32,
     SQRT_F64,
     SQRT_F80,
+    SQRT_F128,
     SQRT_PPCF128,
     LOG_F32,
     LOG_F64,
     LOG_F80,
+    LOG_F128,
     LOG_PPCF128,
     LOG2_F32,
     LOG2_F64,
     LOG2_F80,
+    LOG2_F128,
     LOG2_PPCF128,
     LOG10_F32,
     LOG10_F64,
     LOG10_F80,
+    LOG10_F128,
     LOG10_PPCF128,
     EXP_F32,
     EXP_F64,
     EXP_F80,
+    EXP_F128,
     EXP_PPCF128,
     EXP2_F32,
     EXP2_F64,
     EXP2_F80,
+    EXP2_F128,
     EXP2_PPCF128,
     SIN_F32,
     SIN_F64,
     SIN_F80,
+    SIN_F128,
     SIN_PPCF128,
     COS_F32,
     COS_F64,
     COS_F80,
+    COS_F128,
     COS_PPCF128,
+    SINCOS_F32,
+    SINCOS_F64,
+    SINCOS_F80,
+    SINCOS_F128,
+    SINCOS_PPCF128,
     POW_F32,
     POW_F64,
     POW_F80,
+    POW_F128,
     POW_PPCF128,
     CEIL_F32,
     CEIL_F64,
     CEIL_F80,
+    CEIL_F128,
     CEIL_PPCF128,
     TRUNC_F32,
     TRUNC_F64,
     TRUNC_F80,
+    TRUNC_F128,
     TRUNC_PPCF128,
     RINT_F32,
     RINT_F64,
     RINT_F80,
+    RINT_F128,
     RINT_PPCF128,
     NEARBYINT_F32,
     NEARBYINT_F64,
     NEARBYINT_F80,
+    NEARBYINT_F128,
     NEARBYINT_PPCF128,
     FLOOR_F32,
     FLOOR_F64,
     FLOOR_F80,
+    FLOOR_F128,
     FLOOR_PPCF128,
     COPYSIGN_F32,
     COPYSIGN_F64,
     COPYSIGN_F80,
+    COPYSIGN_F128,
     COPYSIGN_PPCF128,
 
     // CONVERSION
+    FPEXT_F64_F128,
+    FPEXT_F32_F128,
     FPEXT_F32_F64,
     FPEXT_F16_F32,
     FPROUND_F32_F16,
     FPROUND_F64_F32,
     FPROUND_F80_F32,
+    FPROUND_F128_F32,
     FPROUND_PPCF128_F32,
     FPROUND_F80_F64,
+    FPROUND_F128_F64,
     FPROUND_PPCF128_F64,
     FPTOSINT_F32_I8,
     FPTOSINT_F32_I16,
@@ -194,6 +225,9 @@ namespace RTLIB {
     FPTOSINT_F80_I32,
     FPTOSINT_F80_I64,
     FPTOSINT_F80_I128,
+    FPTOSINT_F128_I32,
+    FPTOSINT_F128_I64,
+    FPTOSINT_F128_I128,
     FPTOSINT_PPCF128_I32,
     FPTOSINT_PPCF128_I64,
     FPTOSINT_PPCF128_I128,
@@ -210,51 +244,68 @@ namespace RTLIB {
     FPTOUINT_F80_I32,
     FPTOUINT_F80_I64,
     FPTOUINT_F80_I128,
+    FPTOUINT_F128_I32,
+    FPTOUINT_F128_I64,
+    FPTOUINT_F128_I128,
     FPTOUINT_PPCF128_I32,
     FPTOUINT_PPCF128_I64,
     FPTOUINT_PPCF128_I128,
     SINTTOFP_I32_F32,
     SINTTOFP_I32_F64,
     SINTTOFP_I32_F80,
+    SINTTOFP_I32_F128,
     SINTTOFP_I32_PPCF128,
     SINTTOFP_I64_F32,
     SINTTOFP_I64_F64,
     SINTTOFP_I64_F80,
+    SINTTOFP_I64_F128,
     SINTTOFP_I64_PPCF128,
     SINTTOFP_I128_F32,
     SINTTOFP_I128_F64,
     SINTTOFP_I128_F80,
+    SINTTOFP_I128_F128,
     SINTTOFP_I128_PPCF128,
     UINTTOFP_I32_F32,
     UINTTOFP_I32_F64,
     UINTTOFP_I32_F80,
+    UINTTOFP_I32_F128,
     UINTTOFP_I32_PPCF128,
     UINTTOFP_I64_F32,
     UINTTOFP_I64_F64,
     UINTTOFP_I64_F80,
+    UINTTOFP_I64_F128,
     UINTTOFP_I64_PPCF128,
     UINTTOFP_I128_F32,
     UINTTOFP_I128_F64,
     UINTTOFP_I128_F80,
+    UINTTOFP_I128_F128,
     UINTTOFP_I128_PPCF128,
 
     // COMPARISON
     OEQ_F32,
     OEQ_F64,
+    OEQ_F128,
     UNE_F32,
     UNE_F64,
+    UNE_F128,
     OGE_F32,
     OGE_F64,
+    OGE_F128,
     OLT_F32,
     OLT_F64,
+    OLT_F128,
     OLE_F32,
     OLE_F64,
+    OLE_F128,
     OGT_F32,
     OGT_F64,
+    OGT_F128,
     UO_F32,
     UO_F64,
+    UO_F128,
     O_F32,
     O_F64,
+    O_F128,
 
     // MEMORY
     MEMCPY,
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 7e0ca1478e5f..8c959da696d8 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -16,13 +16,12 @@
 #ifndef LLVM_CODEGEN_SCHEDULEDAG_H
 #define LLVM_CODEGEN_SCHEDULEDAG_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   class AliasAnalysis;
@@ -53,11 +52,22 @@ namespace llvm {
       Order        ///< Any other ordering dependency.
     };
 
+    // Strong dependencies must be respected by the scheduler. Artificial
+    // dependencies may be removed only if they are redundant with another
+    // strong depedence.
+    //
+    // Weak dependencies may be violated by the scheduling strategy, but only if
+    // the strategy can prove it is correct to do so.
+    //
+    // Strong OrderKinds must occur before "Weak".
+    // Weak OrderKinds must occur after "Weak".
     enum OrderKind {
       Barrier,      ///< An unknown scheduling barrier.
       MayAliasMem,  ///< Nonvolatile load/Store instructions that may alias.
       MustAliasMem, ///< Nonvolatile load/Store instructions that must alias.
-      Artificial    ///< Arbitrary weak DAG edge (no actual dependence).
+      Artificial,   ///< Arbitrary strong DAG edge (no real dependence).
+      Weak,         ///< Arbitrary weak DAG edge.
+      Cluster       ///< Weak DAG edge linking a chain of clustered instrs.
     };
 
   private:
@@ -200,12 +210,26 @@ namespace llvm {
       return getKind() == Order && Contents.OrdKind == MustAliasMem;
     }
 
+    /// isWeak - Test if this a weak dependence. Weak dependencies are
+    /// considered DAG edges for height computation and other heuristics, but do
+    /// not force ordering. Breaking a weak edge may require the scheduler to
+    /// compensate, for example by inserting a copy.
+    bool isWeak() const {
+      return getKind() == Order && Contents.OrdKind >= Weak;
+    }
+
     /// isArtificial - Test if this is an Order dependence that is marked
     /// as "artificial", meaning it isn't necessary for correctness.
     bool isArtificial() const {
       return getKind() == Order && Contents.OrdKind == Artificial;
     }
 
+    /// isCluster - Test if this is an Order dependence that is marked
+    /// as "cluster", meaning it is artificial and wants to be adjacent.
+    bool isCluster() const {
+      return getKind() == Order && Contents.OrdKind == Cluster;
+    }
+
     /// isAssignedRegDep - Test if this is a Data dependence that is
     /// associated with a register.
     bool isAssignedRegDep() const {
@@ -243,6 +267,8 @@ namespace llvm {
   /// SUnit - Scheduling unit. This is a node in the scheduling DAG.
   class SUnit {
   private:
+    enum { BoundaryID = ~0u };
+
     SDNode *Node;                       // Representative node.
     MachineInstr *Instr;                // Alternatively, a MachineInstr.
   public:
@@ -267,6 +293,8 @@ namespace llvm {
     unsigned NumSuccs;                  // # of SDep::Data sucss.
     unsigned NumPredsLeft;              // # of preds not scheduled.
     unsigned NumSuccsLeft;              // # of succs not scheduled.
+    unsigned WeakPredsLeft;             // # of weak preds not scheduled.
+    unsigned WeakSuccsLeft;             // # of weak succs not scheduled.
     unsigned short NumRegDefsLeft;      // # of reg defs with no scheduled use.
     unsigned short Latency;             // Node latency.
     bool isVRegCycle      : 1;          // May use and def the same vreg.
@@ -301,12 +329,12 @@ namespace llvm {
     SUnit(SDNode *node, unsigned nodenum)
       : Node(node), Instr(0), OrigNode(0), SchedClass(0), NodeNum(nodenum),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
-        isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
-        isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
-        SchedulingPref(Sched::None),
+        NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
+        Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
+        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
+        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+        isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -315,28 +343,37 @@ namespace llvm {
     SUnit(MachineInstr *instr, unsigned nodenum)
       : Node(0), Instr(instr), OrigNode(0), SchedClass(0), NodeNum(nodenum),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
-        isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
-        isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
-        SchedulingPref(Sched::None),
+        NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
+        Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
+        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
+        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+        isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
     /// SUnit - Construct a placeholder SUnit.
     SUnit()
-      : Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(~0u),
+      : Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(BoundaryID),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
-        isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
-        isPending(false), isAvailable(false), isScheduled(false),
-        isScheduleHigh(false), isScheduleLow(false), isCloned(false),
-        SchedulingPref(Sched::None),
+        NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
+        Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
+        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
+        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+        isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
+    /// \brief Boundary nodes are placeholders for the boundary of the
+    /// scheduling region.
+    ///
+    /// BoundaryNodes can have DAG edges, including Data edges, but they do not
+    /// correspond to schedulable entities (e.g. instructions) and do not have a
+    /// valid ID. Consequently, always check for boundary nodes before accessing
+    /// an assoicative data structure keyed on node ID.
+    bool isBoundaryNode() const { return NodeNum == BoundaryID; };
+
     /// setNode - Assign the representative SDNode for this SUnit.
     /// This may be used during pre-regalloc scheduling.
     void setNode(SDNode *N) {
@@ -372,7 +409,7 @@ namespace llvm {
     /// addPred - This adds the specified edge as a pred of the current node if
     /// not already.  It also adds the current node as a successor of the
     /// specified node.
-    bool addPred(const SDep &D);
+    bool addPred(const SDep &D, bool Required = true);
 
     /// removePred - This removes the specified edge as a pred of the current
     /// node if it exists.  It also removes the current node as a successor of
@@ -438,6 +475,10 @@ namespace llvm {
       return NumSuccsLeft == 0;
     }
 
+    /// \brief Order this node's predecessor edges such that the critical path
+    /// edge occurs first.
+    void biasCriticalPath();
+
     void dump(const ScheduleDAG *G) const;
     void dumpAll(const ScheduleDAG *G) const;
     void print(raw_ostream &O, const ScheduleDAG *G) const;
@@ -546,8 +587,8 @@ namespace llvm {
     /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered
     /// using 'dot'.
     ///
-    void viewGraph(const Twine &Name, const Twine &Title);
-    void viewGraph();
+    virtual void viewGraph(const Twine &Name, const Twine &Title);
+    virtual void viewGraph();
 
     virtual void dumpNode(const SUnit *SU) const = 0;
 
@@ -654,6 +695,7 @@ namespace llvm {
   class ScheduleDAGTopologicalSort {
     /// SUnits - A reference to the ScheduleDAG's SUnits.
     std::vector<SUnit> &SUnits;
+    SUnit *ExitSU;
 
     /// Index2Node - Maps topological index to the node number.
     std::vector<int> Index2Node;
@@ -675,7 +717,7 @@ namespace llvm {
     void Allocate(int n, int index);
 
   public:
-    explicit ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits);
+    ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
 
     /// InitDAGTopologicalSorting - create the initial topological
     /// ordering from the DAG to be scheduled.
diff --git a/include/llvm/CodeGen/ScheduleDAGILP.h b/include/llvm/CodeGen/ScheduleDAGILP.h
deleted file mode 100644
index 1aa405842173..000000000000
--- a/include/llvm/CodeGen/ScheduleDAGILP.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of an ILP metric for machine level instruction scheduling.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
-#define LLVM_CODEGEN_SCHEDULEDAGILP_H
-
-#include "llvm/Support/DataTypes.h"
-#include <vector>
-
-namespace llvm {
-
-class raw_ostream;
-class ScheduleDAGInstrs;
-class SUnit;
-
-/// \brief Represent the ILP of the subDAG rooted at a DAG node.
-struct ILPValue {
-  unsigned InstrCount;
-  unsigned Cycles;
-
-  ILPValue(): InstrCount(0), Cycles(0) {}
-
-  ILPValue(unsigned count, unsigned cycles):
-    InstrCount(count), Cycles(cycles) {}
-
-  bool isValid() const { return Cycles > 0; }
-
-  // Order by the ILP metric's value.
-  bool operator<(ILPValue RHS) const {
-    return (uint64_t)InstrCount * RHS.Cycles
-      < (uint64_t)Cycles * RHS.InstrCount;
-  }
-  bool operator>(ILPValue RHS) const {
-    return RHS < *this;
-  }
-  bool operator<=(ILPValue RHS) const {
-    return (uint64_t)InstrCount * RHS.Cycles
-      <= (uint64_t)Cycles * RHS.InstrCount;
-  }
-  bool operator>=(ILPValue RHS) const {
-    return RHS <= *this;
-  }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  void print(raw_ostream &OS) const;
-
-  void dump() const;
-#endif
-};
-
-/// \brief Compute the values of each DAG node for an ILP metric.
-///
-/// This metric assumes that the DAG is a forest of trees with roots at the
-/// bottom of the schedule.
-class ScheduleDAGILP {
-  bool IsBottomUp;
-  std::vector<ILPValue> ILPValues;
-
-public:
-  ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {}
-
-  /// \brief Initialize the result data with the size of the DAG.
-  void resize(unsigned NumSUnits);
-
-  /// \brief Compute the ILP metric for the subDAG at this root.
-  void computeILP(const SUnit *Root);
-
-  /// \brief Get the ILP value for a DAG node.
-  ILPValue getILP(const SUnit *SU);
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
-
-} // namespace llvm
-
-#endif
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 4bcd35a834c3..2219520ca19f 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -12,20 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef SCHEDULEDAGINSTRS_H
-#define SCHEDULEDAGINSTRS_H
+#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
+#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/SparseMultiSet.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SparseSet.h"
-#include <map>
 
 namespace llvm {
+  class MachineFrameInfo;
   class MachineLoopInfo;
   class MachineDominatorTree;
   class LiveIntervals;
@@ -48,56 +46,18 @@ namespace llvm {
   struct PhysRegSUOper {
     SUnit *SU;
     int OpIdx;
+    unsigned Reg;
 
-    PhysRegSUOper(SUnit *su, int op): SU(su), OpIdx(op) {}
-  };
-
-  /// Combine a SparseSet with a 1x1 vector to track physical registers.
-  /// The SparseSet allows iterating over the (few) live registers for quickly
-  /// comparing against a regmask or clearing the set.
-  ///
-  /// Storage for the map is allocated once for the pass. The map can be
-  /// cleared between scheduling regions without freeing unused entries.
-  class Reg2SUnitsMap {
-    SparseSet<unsigned> PhysRegSet;
-    std::vector<std::vector<PhysRegSUOper> > SUnits;
-  public:
-    typedef SparseSet<unsigned>::const_iterator const_iterator;
-
-    // Allow iteration over register numbers (keys) in the map. If needed, we
-    // can provide an iterator over SUnits (values) as well.
-    const_iterator reg_begin() const { return PhysRegSet.begin(); }
-    const_iterator reg_end() const { return PhysRegSet.end(); }
-
-    /// Initialize the map with the number of registers.
-    /// If the map is already large enough, no allocation occurs.
-    /// For simplicity we expect the map to be empty().
-    void setRegLimit(unsigned Limit);
+    PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}
 
-    /// Returns true if the map is empty.
-    bool empty() const { return PhysRegSet.empty(); }
-
-    /// Clear the map without deallocating storage.
-    void clear();
-
-    bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); }
-
-    /// If this register is mapped, return its existing SUnits vector.
-    /// Otherwise map the register and return an empty SUnits vector.
-    std::vector<PhysRegSUOper> &operator[](unsigned Reg) {
-      bool New = PhysRegSet.insert(Reg).second;
-      assert((!New || SUnits[Reg].empty()) && "stale SUnits vector");
-      (void)New;
-      return SUnits[Reg];
-    }
-
-    /// Erase an existing element without freeing memory.
-    void erase(unsigned Reg) {
-      PhysRegSet.erase(Reg);
-      SUnits[Reg].clear();
-    }
+    unsigned getSparseSetIndex() const { return Reg; }
   };
 
+  /// Use a SparseMultiSet to track physical registers. Storage is only
+  /// allocated once for the pass. It can be cleared in constant time and reused
+  /// without any frees.
+  typedef SparseMultiSet<PhysRegSUOper, llvm::identity<unsigned>, uint16_t> Reg2SUnitsMap;
+
   /// Use SparseSet as a SparseMap by relying on the fact that it never
   /// compares ValueT's, only unsigned keys. This allows the set to be cleared
   /// between scheduling regions in constant time as long as ValueT does not
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
new file mode 100644
index 000000000000..73ce99f4713d
--- /dev/null
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -0,0 +1,196 @@
+//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of an ILP metric for machine level instruction scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEDFS_H
+#define LLVM_CODEGEN_SCHEDULEDFS_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+class IntEqClasses;
+class ScheduleDAGInstrs;
+class SUnit;
+
+/// \brief Represent the ILP of the subDAG rooted at a DAG node.
+///
+/// ILPValues summarize the DAG subtree rooted at each node. ILPValues are
+/// valid for all nodes regardless of their subtree membership.
+///
+/// When computed using bottom-up DFS, this metric assumes that the DAG is a
+/// forest of trees with roots at the bottom of the schedule branching upward.
+struct ILPValue {
+  unsigned InstrCount;
+  /// Length may either correspond to depth or height, depending on direction,
+  /// and cycles or nodes depending on context.
+  unsigned Length;
+
+  ILPValue(unsigned count, unsigned length):
+    InstrCount(count), Length(length) {}
+
+  // Order by the ILP metric's value.
+  bool operator<(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Length
+      < (uint64_t)Length * RHS.InstrCount;
+  }
+  bool operator>(ILPValue RHS) const {
+    return RHS < *this;
+  }
+  bool operator<=(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Length
+      <= (uint64_t)Length * RHS.InstrCount;
+  }
+  bool operator>=(ILPValue RHS) const {
+    return RHS <= *this;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void print(raw_ostream &OS) const;
+
+  void dump() const;
+#endif
+};
+
+/// \brief Compute the values of each DAG node for various metrics during DFS.
+class SchedDFSResult {
+  friend class SchedDFSImpl;
+
+  static const unsigned InvalidSubtreeID = ~0u;
+
+  /// \brief Per-SUnit data computed during DFS for various metrics.
+  ///
+  /// A node's SubtreeID is set to itself when it is visited to indicate that it
+  /// is the root of a subtree. Later it is set to its parent to indicate an
+  /// interior node. Finally, it is set to a representative subtree ID during
+  /// finalization.
+  struct NodeData {
+    unsigned InstrCount;
+    unsigned SubtreeID;
+
+    NodeData(): InstrCount(0), SubtreeID(InvalidSubtreeID) {}
+  };
+
+  /// \brief Per-Subtree data computed during DFS.
+  struct TreeData {
+    unsigned ParentTreeID;
+    unsigned SubInstrCount;
+
+    TreeData(): ParentTreeID(InvalidSubtreeID), SubInstrCount(0) {}
+  };
+
+  /// \brief Record a connection between subtrees and the connection level.
+  struct Connection {
+    unsigned TreeID;
+    unsigned Level;
+
+    Connection(unsigned tree, unsigned level): TreeID(tree), Level(level) {}
+  };
+
+  bool IsBottomUp;
+  unsigned SubtreeLimit;
+  /// DFS results for each SUnit in this DAG.
+  std::vector<NodeData> DFSNodeData;
+
+  // Store per-tree data indexed on tree ID,
+  SmallVector<TreeData, 16> DFSTreeData;
+
+  // For each subtree discovered during DFS, record its connections to other
+  // subtrees.
+  std::vector<SmallVector<Connection, 4> > SubtreeConnections;
+
+  /// Cache the current connection level of each subtree.
+  /// This mutable array is updated during scheduling.
+  std::vector<unsigned> SubtreeConnectLevels;
+
+public:
+  SchedDFSResult(bool IsBU, unsigned lim)
+    : IsBottomUp(IsBU), SubtreeLimit(lim) {}
+
+  /// \brief Get the node cutoff before subtrees are considered significant.
+  unsigned getSubtreeLimit() const { return SubtreeLimit; }
+
+  /// \brief Return true if this DFSResult is uninitialized.
+  ///
+  /// resize() initializes DFSResult, while compute() populates it.
+  bool empty() const { return DFSNodeData.empty(); }
+
+  /// \brief Clear the results.
+  void clear() {
+    DFSNodeData.clear();
+    DFSTreeData.clear();
+    SubtreeConnections.clear();
+    SubtreeConnectLevels.clear();
+  }
+
+  /// \brief Initialize the result data with the size of the DAG.
+  void resize(unsigned NumSUnits) {
+    DFSNodeData.resize(NumSUnits);
+  }
+
+  /// \brief Compute various metrics for the DAG with given roots.
+  void compute(ArrayRef<SUnit> SUnits);
+
+  /// \brief Get the number of instructions in the given subtree and its
+  /// children.
+  unsigned getNumInstrs(const SUnit *SU) const {
+    return DFSNodeData[SU->NodeNum].InstrCount;
+  }
+
+  /// \brief Get the number of instructions in the given subtree not including
+  /// children.
+  unsigned getNumSubInstrs(unsigned SubtreeID) const {
+    return DFSTreeData[SubtreeID].SubInstrCount;
+  }
+
+  /// \brief Get the ILP value for a DAG node.
+  ///
+  /// A leaf node has an ILP of 1/1.
+  ILPValue getILP(const SUnit *SU) const {
+    return ILPValue(DFSNodeData[SU->NodeNum].InstrCount, 1 + SU->getDepth());
+  }
+
+  /// \brief The number of subtrees detected in this DAG.
+  unsigned getNumSubtrees() const { return SubtreeConnectLevels.size(); }
+
+  /// \brief Get the ID of the subtree the given DAG node belongs to.
+  ///
+  /// For convenience, if DFSResults have not been computed yet, give everything
+  /// tree ID 0.
+  unsigned getSubtreeID(const SUnit *SU) const {
+    if (empty())
+      return 0;
+    assert(SU->NodeNum < DFSNodeData.size() &&  "New Node");
+    return DFSNodeData[SU->NodeNum].SubtreeID;
+  }
+
+  /// \brief Get the connection level of a subtree.
+  ///
+  /// For bottom-up trees, the connection level is the latency depth (in cycles)
+  /// of the deepest connection to another subtree.
+  unsigned getSubtreeLevel(unsigned SubtreeID) const {
+    return SubtreeConnectLevels[SubtreeID];
+  }
+
+  /// \brief Scheduler callback to update SubtreeConnectLevels when a tree is
+  /// initially scheduled.
+  void scheduleTree(unsigned SubtreeID);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index 836b73a15a2f..51ac7f28527f 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGENSCHEDULERREGISTRY_H
-#define LLVM_CODEGENSCHEDULERREGISTRY_H
+#ifndef LLVM_CODEGEN_SCHEDULERREGISTRY_H
+#define LLVM_CODEGEN_SCHEDULERREGISTRY_H
 
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 060e89a3fdc7..c2103fb233f8 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -18,7 +18,6 @@
 
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Support/DataTypes.h"
-
 #include <cassert>
 #include <cstring>
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 619ee699430d..e5adf6724931 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -15,16 +15,17 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAG_H
 #define LLVM_CODEGEN_SELECTIONDAG_H
 
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
-#include <vector>
 #include <map>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
@@ -36,6 +37,7 @@ class SDNodeOrdering;
 class SDDbgValue;
 class TargetLowering;
 class TargetSelectionDAGInfo;
+class TargetTransformInfo;
 
 template<> struct ilist_traits<SDNode> : public ilist_default_traits<SDNode> {
 private:
@@ -111,13 +113,6 @@ public:
   DbgIterator ByvalParmDbgEnd()   { return ByvalParmDbgValues.end(); }
 };
 
-enum CombineLevel {
-  BeforeLegalizeTypes,
-  AfterLegalizeTypes,
-  AfterLegalizeVectorOps,
-  AfterLegalizeDAG
-};
-
 class SelectionDAG;
 void checkForCycles(const SDNode *N);
 void checkForCycles(const SelectionDAG *DAG);
@@ -137,6 +132,7 @@ class SelectionDAG {
   const TargetMachine &TM;
   const TargetLowering &TLI;
   const TargetSelectionDAGInfo &TSI;
+  const TargetTransformInfo *TTI;
   MachineFunction *MF;
   LLVMContext *Context;
   CodeGenOpt::Level OptLevel;
@@ -232,7 +228,7 @@ public:
   /// init - Prepare this SelectionDAG to process code in the given
   /// MachineFunction.
   ///
-  void init(MachineFunction &mf);
+  void init(MachineFunction &mf, const TargetTransformInfo *TTI);
 
   /// clear - Clear state and free memory necessary to make this
   /// SelectionDAG ready to process a new block.
@@ -243,6 +239,7 @@ public:
   const TargetMachine &getTarget() const { return TM; }
   const TargetLowering &getTargetLoweringInfo() const { return TLI; }
   const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; }
+  const TargetTransformInfo *getTargetTransformInfo() const { return TTI; }
   LLVMContext *getContext() const {return Context; }
 
   /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
@@ -570,7 +567,7 @@ public:
   SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
                   const SDValue *Ops, unsigned NumOps);
   SDValue getNode(unsigned Opcode, DebugLoc DL,
-                  const std::vector<EVT> &ResultTys,
+                  ArrayRef<EVT> ResultTys,
                   const SDValue *Ops, unsigned NumOps);
   SDValue getNode(unsigned Opcode, DebugLoc DL, const EVT *VTs, unsigned NumVTs,
                   const SDValue *Ops, unsigned NumOps);
@@ -834,7 +831,7 @@ public:
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
                          EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl,
-                         const std::vector<EVT> &ResultTys, const SDValue *Ops,
+                         ArrayRef<EVT> ResultTys, const SDValue *Ops,
                          unsigned NumOps);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs,
                          const SDValue *Ops, unsigned NumOps);
@@ -938,6 +935,20 @@ public:
     }
   }
 
+  /// Returns an APFloat semantics tag appropriate for the given type. If VT is
+  /// a vector type, the element semantics are returned.
+  static const fltSemantics &EVTToAPFloatSemantics(EVT VT) {
+    switch (VT.getScalarType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unknown FP format");
+    case MVT::f16:     return APFloat::IEEEhalf;
+    case MVT::f32:     return APFloat::IEEEsingle;
+    case MVT::f64:     return APFloat::IEEEdouble;
+    case MVT::f80:     return APFloat::x87DoubleExtended;
+    case MVT::f128:    return APFloat::IEEEquad;
+    case MVT::ppcf128: return APFloat::PPCDoubleDouble;
+    }
+  }
+
   /// AssignOrdering - Assign an order to the SDNode.
   void AssignOrdering(const SDNode *SD, unsigned Order);
 
@@ -981,10 +992,8 @@ public:
   SDValue CreateStackTemporary(EVT VT1, EVT VT2);
 
   /// FoldConstantArithmetic -
-  SDValue FoldConstantArithmetic(unsigned Opcode,
-                                 EVT VT,
-                                 ConstantSDNode *Cst1,
-                                 ConstantSDNode *Cst2);
+  SDValue FoldConstantArithmetic(unsigned Opcode, EVT VT,
+                                 SDNode *Cst1, SDNode *Cst2);
 
   /// FoldSetCC - Constant fold a setcc to true or false.
   SDValue FoldSetCC(EVT VT, SDValue N1,
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index c42f6558007b..5f503deff10e 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -12,13 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_SELECTIONDAG_ISEL_H
-#define LLVM_CODEGEN_SELECTIONDAG_ISEL_H
+#ifndef LLVM_CODEGEN_SELECTIONDAGISEL_H
+#define LLVM_CODEGEN_SELECTIONDAGISEL_H
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Pass.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
   class FastISel;
@@ -31,6 +31,7 @@ namespace llvm {
   class TargetLowering;
   class TargetLibraryInfo;
   class TargetInstrInfo;
+  class TargetTransformInfo;
   class FunctionLoweringInfo;
   class ScheduleHazardRecognizer;
   class GCFunctionInfo;
@@ -44,6 +45,7 @@ public:
   const TargetMachine &TM;
   const TargetLowering &TLI;
   const TargetLibraryInfo *LibInfo;
+  const TargetTransformInfo *TTI;
   FunctionLoweringInfo *FuncInfo;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
@@ -247,16 +249,26 @@ private:
                     const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo);
 
   void PrepareEHLandingPad();
+
+  /// \brief Perform instruction selection on all basic blocks in the function.
   void SelectAllBasicBlocks(const Function &Fn);
-  bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
-                             FastISel *FastIS);
-  void FinishBasicBlock();
 
+  /// \brief Perform instruction selection on a single basic block, for
+  /// instructions between \p Begin and \p End.  \p HadTailCall will be set
+  /// to true if a call in the block was translated as a tail call.
   void SelectBasicBlock(BasicBlock::const_iterator Begin,
                         BasicBlock::const_iterator End,
                         bool &HadTailCall);
+
+  bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
+                             FastISel *FastIS);
+  void FinishBasicBlock();
+
   void CodeGenAndEmitDAG();
-  void LowerArguments(const BasicBlock *BB);
+
+  /// \brief Generate instructions for lowering the incoming arguments of the
+  /// given function.
+  void LowerArguments(const Function &F);
 
   void ComputeLiveOutVRegInfo();
 
@@ -279,4 +291,4 @@ private:
 
 }
 
-#endif /* LLVM_CODEGEN_SELECTIONDAG_ISEL_H */
+#endif /* LLVM_CODEGEN_SELECTIONDAGISEL_H */
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 362e9afd225a..fef567f56bce 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -19,20 +19,20 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
 #define LLVM_CODEGEN_SELECTIONDAGNODES_H
 
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
 
 namespace llvm {
@@ -49,7 +49,7 @@ template <typename T> struct simplify_type;
 template <typename T> struct ilist_traits;
 
 void checkForCycles(const SDNode *N);
-  
+
 /// SDVTList - This represents a list of ValueType's that has been intern'd by
 /// a SelectionDAG.  Instances of this simple value class are returned by
 /// SelectionDAG::getVTList(...).
@@ -108,7 +108,7 @@ public:
   void setNode(SDNode *N) { Node = N; }
 
   inline SDNode *operator->() const { return Node; }
-  
+
   bool operator==(const SDValue &O) const {
     return Node == O.Node && ResNo == O.ResNo;
   }
@@ -130,6 +130,11 @@ public:
   ///
   inline EVT getValueType() const;
 
+  /// Return the simple ValueType of the referenced return value.
+  MVT getSimpleValueType() const {
+    return getValueType().getSimpleVT();
+  }
+
   /// getValueSizeInBits - Returns the size of the value in bits.
   ///
   unsigned getValueSizeInBits() const {
@@ -191,14 +196,14 @@ template <> struct isPodLike<SDValue> { static const bool value = true; };
 /// SDValues as if they were SDNode*'s.
 template<> struct simplify_type<SDValue> {
   typedef SDNode* SimpleType;
-  static SimpleType getSimplifiedValue(const SDValue &Val) {
-    return static_cast<SimpleType>(Val.getNode());
+  static SimpleType getSimplifiedValue(SDValue &Val) {
+    return Val.getNode();
   }
 };
 template<> struct simplify_type<const SDValue> {
-  typedef SDNode* SimpleType;
+  typedef /*const*/ SDNode* SimpleType;
   static SimpleType getSimplifiedValue(const SDValue &Val) {
-    return static_cast<SimpleType>(Val.getNode());
+    return Val.getNode();
   }
 };
 
@@ -290,14 +295,8 @@ private:
 /// SDValues as if they were SDNode*'s.
 template<> struct simplify_type<SDUse> {
   typedef SDNode* SimpleType;
-  static SimpleType getSimplifiedValue(const SDUse &Val) {
-    return static_cast<SimpleType>(Val.getNode());
-  }
-};
-template<> struct simplify_type<const SDUse> {
-  typedef SDNode* SimpleType;
-  static SimpleType getSimplifiedValue(const SDUse &Val) {
-    return static_cast<SimpleType>(Val.getNode());
+  static SimpleType getSimplifiedValue(SDUse &Val) {
+    return Val.getNode();
   }
 };
 
@@ -525,7 +524,7 @@ public:
   /// NOTE: This is still very expensive. Use carefully.
   bool hasPredecessorHelper(const SDNode *N,
                             SmallPtrSet<const SDNode *, 32> &Visited,
-                            SmallVector<const SDNode *, 16> &Worklist) const; 
+                            SmallVector<const SDNode *, 16> &Worklist) const;
 
   /// getNumOperands - Return the number of values used by this operation.
   ///
@@ -595,6 +594,12 @@ public:
     return ValueList[ResNo];
   }
 
+  /// Return the type of a specified result as a simple type.
+  ///
+  MVT getSimpleValueType(unsigned ResNo) const {
+    return getValueType(ResNo).getSimpleVT();
+  }
+
   /// getValueSizeInBits - Returns MVT::getSizeInBits(getValueType(ResNo)).
   ///
   unsigned getValueSizeInBits(unsigned ResNo) const {
@@ -1287,7 +1292,7 @@ class ConstantPoolSDNode : public SDNode {
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc(),
              getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
-    assert((int)Offset >= 0 && "Offset is too large");
+    assert(Offset >= 0 && "Offset is too large");
     Val.ConstVal = c;
   }
   ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
@@ -1295,7 +1300,7 @@ class ConstantPoolSDNode : public SDNode {
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc(),
              getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
-    assert((int)Offset >= 0 && "Offset is too large");
+    assert(Offset >= 0 && "Offset is too large");
     Val.MachineCPVal = v;
     Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
   }
@@ -1303,7 +1308,7 @@ public:
   
 
   bool isMachineConstantPoolEntry() const {
-    return (int)Offset < 0;
+    return Offset < 0;
   }
 
   const Constant *getConstVal() const {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index c52599b0f6f9..a27708046686 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -19,13 +19,14 @@
 #ifndef LLVM_CODEGEN_SLOTINDEXES_H
 #define LLVM_CODEGEN_SLOTINDEXES_H
 
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Support/Allocator.h"
 
 namespace llvm {
@@ -111,7 +112,7 @@ namespace llvm {
       return lie.getPointer();
     }
 
-    int getIndex() const {
+    unsigned getIndex() const {
       return listEntry()->getIndex() | getSlot();
     }
 
@@ -359,6 +360,11 @@ namespace llvm {
     /// Renumber the index list, providing space for new instructions.
     void renumberIndexes();
 
+    /// Repair indexes after adding and removing instructions.
+    void repairIndexesInRange(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End);
+
     /// Returns the zero index for this analysis.
     SlotIndex getZeroIndex() {
       assert(indexList.front().getIndex() == 0 && "First index is not 0?");
@@ -390,12 +396,16 @@ namespace llvm {
       return index.isValid() ? index.listEntry()->getInstr() : 0;
     }
 
-    /// Returns the next non-null index.
-    SlotIndex getNextNonNullIndex(SlotIndex index) {
-      IndexList::iterator itr(index.listEntry());
-      ++itr;
-      while (itr != indexList.end() && itr->getInstr() == 0) { ++itr; }
-      return SlotIndex(itr, index.getSlot());
+    /// Returns the next non-null index, if one exists.
+    /// Otherwise returns getLastIndex().
+    SlotIndex getNextNonNullIndex(SlotIndex Index) {
+      IndexList::iterator I = Index.listEntry();
+      IndexList::iterator E = indexList.end();
+      while (++I != E)
+        if (I->getInstr())
+          return SlotIndex(I, Index.getSlot());
+      // We reached the end of the function.
+      return getLastIndex();
     }
 
     /// getIndexBefore - Returns the index of the last indexed instruction
@@ -601,29 +611,35 @@ namespace llvm {
     void insertMBBInMaps(MachineBasicBlock *mbb) {
       MachineFunction::iterator nextMBB =
         llvm::next(MachineFunction::iterator(mbb));
-      IndexListEntry *startEntry = createEntry(0, 0);
-      IndexListEntry *stopEntry = createEntry(0, 0);
-      IndexListEntry *nextEntry = 0;
 
+      IndexListEntry *startEntry = 0;
+      IndexListEntry *endEntry = 0;
+      IndexList::iterator newItr;
       if (nextMBB == mbb->getParent()->end()) {
-        nextEntry = indexList.end();
+        startEntry = &indexList.back();
+        endEntry = createEntry(0, 0);
+        newItr = indexList.insertAfter(startEntry, endEntry);
       } else {
-        nextEntry = getMBBStartIdx(nextMBB).listEntry();
+        startEntry = createEntry(0, 0);
+        endEntry = getMBBStartIdx(nextMBB).listEntry();
+        newItr = indexList.insert(endEntry, startEntry);
       }
 
-      indexList.insert(nextEntry, startEntry);
-      indexList.insert(nextEntry, stopEntry);
-
       SlotIndex startIdx(startEntry, SlotIndex::Slot_Block);
-      SlotIndex endIdx(nextEntry, SlotIndex::Slot_Block);
+      SlotIndex endIdx(endEntry, SlotIndex::Slot_Block);
+
+      MachineFunction::iterator prevMBB(mbb);
+      assert(prevMBB != mbb->getParent()->end() &&
+             "Can't insert a new block at the beginning of a function.");
+      --prevMBB;
+      MBBRanges[prevMBB->getNumber()].second = startIdx;
 
       assert(unsigned(mbb->getNumber()) == MBBRanges.size() &&
              "Blocks must be added in order");
       MBBRanges.push_back(std::make_pair(startIdx, endIdx));
-
       idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
 
-      renumberIndexes();
+      renumberIndexes(newItr);
       std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
     }
 
@@ -631,17 +647,8 @@ namespace llvm {
 
 
   // Specialize IntervalMapInfo for half-open slot index intervals.
-  template <typename> struct IntervalMapInfo;
-  template <> struct IntervalMapInfo<SlotIndex> {
-    static inline bool startLess(const SlotIndex &x, const SlotIndex &a) {
-      return x < a;
-    }
-    static inline bool stopLess(const SlotIndex &b, const SlotIndex &x) {
-      return b <= x;
-    }
-    static inline bool adjacent(const SlotIndex &a, const SlotIndex &b) {
-      return a == b;
-    }
+  template <>
+  struct IntervalMapInfo<SlotIndex> : IntervalMapHalfOpenInfo<SlotIndex> {
   };
 
 }
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 9849e92f7dec..e7098e48bf06 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
   class MachineModuleInfo;
@@ -55,13 +55,12 @@ public:
   SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const;
 
-  /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference
-  /// to the specified global variable from exception handling information.
-  ///
+  /// getTTypeGlobalReference - Return an MCExpr to use for a reference to the
+  /// specified type info global variable from exception handling information.
   virtual const MCExpr *
-  getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI, unsigned Encoding,
-                                 MCStreamer &Streamer) const;
+  getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI, unsigned Encoding,
+                          MCStreamer &Streamer) const;
 
   // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
   virtual MCSymbol *
@@ -103,12 +102,12 @@ public:
   virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
                                           Mangler *) const;
 
-  /// getExprForDwarfGlobalReference - The mach-o version of this method
+  /// getTTypeGlobalReference - The mach-o version of this method
   /// defaults to returning a stub reference.
   virtual const MCExpr *
-  getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI, unsigned Encoding,
-                                 MCStreamer &Streamer) const;
+  getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI, unsigned Encoding,
+                          MCStreamer &Streamer) const;
 
   // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
   virtual MCSymbol *
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 88e6105a7de2..3e22252eeac1 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -13,13 +13,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_TARGETSCHEDMODEL_H
-#define LLVM_TARGET_TARGETSCHEDMODEL_H
+#ifndef LLVM_CODEGEN_TARGETSCHEDULE_H
+#define LLVM_CODEGEN_TARGETSCHEDULE_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 namespace llvm {
 
@@ -84,6 +84,9 @@ public:
   /// \brief Maximum number of micro-ops that may be scheduled per cycle.
   unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
 
+  /// \brief Number of cycles the OOO processor is expected to hide.
+  unsigned getILPWindow() const { return SchedModel.ILPWindow; }
+
   /// \brief Return the number of issue slots required for this MI.
   unsigned getNumMicroOps(const MachineInstr *MI,
                           const MCSchedClassDesc *SC = 0) const;
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 240199291ae9..ec48b67b993c 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -33,6 +33,10 @@ namespace llvm {
   class MVT {
   public:
     enum SimpleValueType {
+      // INVALID_SIMPLE_VALUE_TYPE - Simple value types less than zero are
+      // considered extended value types.
+      INVALID_SIMPLE_VALUE_TYPE = -1,
+
       // If you change this numbering, you must change the values in
       // ValueTypes.td as well!
       Other          =   0,   // This is a non-standard value
@@ -60,52 +64,61 @@ namespace llvm {
       v4i1           =  14,   //  4 x i1
       v8i1           =  15,   //  8 x i1
       v16i1          =  16,   // 16 x i1
-      v2i8           =  17,   //  2 x i8
-      v4i8           =  18,   //  4 x i8
-      v8i8           =  19,   //  8 x i8
-      v16i8          =  20,   // 16 x i8
-      v32i8          =  21,   // 32 x i8
-      v1i16          =  22,   //  1 x i16
-      v2i16          =  23,   //  2 x i16
-      v4i16          =  24,   //  4 x i16
-      v8i16          =  25,   //  8 x i16
-      v16i16         =  26,   // 16 x i16
-      v1i32          =  27,   //  1 x i32
-      v2i32          =  28,   //  2 x i32
-      v4i32          =  29,   //  4 x i32
-      v8i32          =  30,   //  8 x i32
-      v16i32         =  31,   // 16 x i32
-      v1i64          =  32,   //  1 x i64
-      v2i64          =  33,   //  2 x i64
-      v4i64          =  34,   //  4 x i64
-      v8i64          =  35,   //  8 x i64
-      v16i64         =  36,   // 16 x i64
-
-      v2f16          =  37,   //  2 x f16
-      v2f32          =  38,   //  2 x f32
-      v4f32          =  39,   //  4 x f32
-      v8f32          =  40,   //  8 x f32
-      v2f64          =  41,   //  2 x f64
-      v4f64          =  42,   //  4 x f64
+      v32i1          =  17,   // 32 x i1
+      v64i1          =  18,   // 64 x i1
+
+      v2i8           =  19,   //  2 x i8
+      v4i8           =  20,   //  4 x i8
+      v8i8           =  21,   //  8 x i8
+      v16i8          =  22,   // 16 x i8
+      v32i8          =  23,   // 32 x i8
+      v64i8          =  24,   // 64 x i8
+      v1i16          =  25,   //  1 x i16
+      v2i16          =  26,   //  2 x i16
+      v4i16          =  27,   //  4 x i16
+      v8i16          =  28,   //  8 x i16
+      v16i16         =  29,   // 16 x i16
+      v32i16         =  30,   // 32 x i16
+      v1i32          =  31,   //  1 x i32
+      v2i32          =  32,   //  2 x i32
+      v4i32          =  33,   //  4 x i32
+      v8i32          =  34,   //  8 x i32
+      v16i32         =  35,   // 16 x i32
+      v1i64          =  36,   //  1 x i64
+      v2i64          =  37,   //  2 x i64
+      v4i64          =  38,   //  4 x i64
+      v8i64          =  39,   //  8 x i64
+      v16i64         =  40,   // 16 x i64
 
-      FIRST_VECTOR_VALUETYPE = v2i1,
-      LAST_VECTOR_VALUETYPE  = v4f64,
       FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
       LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
+
+      v2f16          =  41,   //  2 x f16
+      v2f32          =  42,   //  2 x f32
+      v4f32          =  43,   //  4 x f32
+      v8f32          =  44,   //  8 x f32
+      v16f32         =  45,   // 16 x f32
+      v2f64          =  46,   //  2 x f64
+      v4f64          =  47,   //  4 x f64
+      v8f64          =  48,   //  8 x f64
+
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
-      LAST_FP_VECTOR_VALUETYPE = v4f64,
+      LAST_FP_VECTOR_VALUETYPE = v8f64,
 
-      x86mmx         =  43,   // This is an X86 MMX value
+      FIRST_VECTOR_VALUETYPE = v2i1,
+      LAST_VECTOR_VALUETYPE  = v8f64,
+
+      x86mmx         =  49,   // This is an X86 MMX value
 
-      Glue           =  44,   // This glues nodes together during pre-RA sched
+      Glue           =  50,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  45,   // This has no value
+      isVoid         =  51,   // This has no value
 
-      Untyped        =  46,   // This value takes a register, but has
+      Untyped        =  52,   // This value takes a register, but has
                               // unspecified type.  The register class
                               // will be determined by the opcode.
 
-      LAST_VALUETYPE =  47,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  53,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -137,14 +150,7 @@ namespace llvm {
 
       // iPTR - An int value the size of the pointer of the current
       // target.  This should only be used internal to tblgen!
-      iPTR           = 255,
-
-      // LastSimpleValueType - The greatest valid SimpleValueType value.
-      LastSimpleValueType = 255,
-
-      // INVALID_SIMPLE_VALUE_TYPE - Simple value types greater than or equal
-      // to this are considered extended value types.
-      INVALID_SIMPLE_VALUE_TYPE = LastSimpleValueType + 1
+      iPTR           = 255
     };
 
     SimpleValueType SimpleTy;
@@ -216,7 +222,9 @@ namespace llvm {
 
     /// is512BitVector - Return true if this is a 512-bit vector type.
     bool is512BitVector() const {
-      return (SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32);
+      return (SimpleTy == MVT::v8f64 || SimpleTy == MVT::v16f32 ||
+              SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
+              SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32);
     }
 
     /// is1024BitVector - Return true if this is a 1024-bit vector type.
@@ -254,17 +262,21 @@ namespace llvm {
       case v2i1 :
       case v4i1 :
       case v8i1 :
-      case v16i1: return i1;
+      case v16i1 :
+      case v32i1 :
+      case v64i1: return i1;
       case v2i8 :
       case v4i8 :
       case v8i8 :
       case v16i8:
-      case v32i8: return i8;
+      case v32i8:
+      case v64i8: return i8;
       case v1i16:
       case v2i16:
       case v4i16:
       case v8i16:
-      case v16i16: return i16;
+      case v16i16:
+      case v32i16: return i16;
       case v1i32:
       case v2i32:
       case v4i32:
@@ -278,9 +290,11 @@ namespace llvm {
       case v2f16: return f16;
       case v2f32:
       case v4f32:
-      case v8f32: return f32;
+      case v8f32:
+      case v16f32: return f32;
       case v2f64:
-      case v4f64: return f64;
+      case v4f64:
+      case v8f64: return f64;
       }
     }
 
@@ -288,18 +302,24 @@ namespace llvm {
       switch (SimpleTy) {
       default:
         llvm_unreachable("Not a vector MVT!");
-      case v32i8: return 32;
+      case v32i1:
+      case v32i8:
+      case v32i16: return 32;
+      case v64i1:
+      case v64i8: return 64;
       case v16i1:
       case v16i8:
       case v16i16:
       case v16i32:
-      case v16i64:return 16;
-      case v8i1:
+      case v16i64:
+      case v16f32: return 16;
+      case v8i1 :
       case v8i8 :
       case v8i16:
       case v8i32:
       case v8i64:
-      case v8f32: return 8;
+      case v8f32:
+      case v8f64: return 8;
       case v4i1:
       case v4i8:
       case v4i16:
@@ -328,7 +348,10 @@ namespace llvm {
       case iPTRAny:
       case iAny:
       case fAny:
+      case vAny:
         llvm_unreachable("Value type is overloaded.");
+      case Metadata:
+        llvm_unreachable("Value type is metadata.");
       default:
         llvm_unreachable("getSizeInBits called on extended MVT.");
       case i1  :  return 1;
@@ -343,13 +366,15 @@ namespace llvm {
       case v1i16: return 16;
       case f32 :
       case i32 :
+      case v32i1:
       case v4i8:
       case v2i16:
-      case v2f16: 
+      case v2f16:
       case v1i32: return 32;
       case x86mmx:
       case f64 :
       case i64 :
+      case v64i1:
       case v8i8:
       case v4i16:
       case v2i32:
@@ -371,8 +396,12 @@ namespace llvm {
       case v4i64:
       case v8f32:
       case v4f64: return 256;
+      case v64i8:
+      case v32i16:
       case v16i32:
-      case v8i64: return 512;
+      case v8i64:
+      case v16f32:
+      case v8f64: return 512;
       case v16i64:return 1024;
       }
     }
@@ -389,6 +418,27 @@ namespace llvm {
       return getStoreSize() * 8;
     }
 
+    /// Return true if this has more bits than VT.
+    bool bitsGT(MVT VT) const {
+      return getSizeInBits() > VT.getSizeInBits();
+    }
+
+    /// Return true if this has no less bits than VT.
+    bool bitsGE(MVT VT) const {
+      return getSizeInBits() >= VT.getSizeInBits();
+    }
+
+    /// Return true if this has less bits than VT.
+    bool bitsLT(MVT VT) const {
+      return getSizeInBits() < VT.getSizeInBits();
+    }
+
+    /// Return true if this has no more bits than VT.
+    bool bitsLE(MVT VT) const {
+      return getSizeInBits() <= VT.getSizeInBits();
+    }
+
+
     static MVT getFloatingPointVT(unsigned BitWidth) {
       switch (BitWidth) {
       default:
@@ -434,6 +484,8 @@ namespace llvm {
         if (NumElements == 4)  return MVT::v4i1;
         if (NumElements == 8)  return MVT::v8i1;
         if (NumElements == 16) return MVT::v16i1;
+        if (NumElements == 32) return MVT::v32i1;
+        if (NumElements == 64) return MVT::v64i1;
         break;
       case MVT::i8:
         if (NumElements == 2)  return MVT::v2i8;
@@ -441,6 +493,7 @@ namespace llvm {
         if (NumElements == 8)  return MVT::v8i8;
         if (NumElements == 16) return MVT::v16i8;
         if (NumElements == 32) return MVT::v32i8;
+        if (NumElements == 64) return MVT::v64i8;
         break;
       case MVT::i16:
         if (NumElements == 1)  return MVT::v1i16;
@@ -448,6 +501,7 @@ namespace llvm {
         if (NumElements == 4)  return MVT::v4i16;
         if (NumElements == 8)  return MVT::v8i16;
         if (NumElements == 16) return MVT::v16i16;
+        if (NumElements == 32) return MVT::v32i16;
         break;
       case MVT::i32:
         if (NumElements == 1)  return MVT::v1i32;
@@ -470,14 +524,22 @@ namespace llvm {
         if (NumElements == 2)  return MVT::v2f32;
         if (NumElements == 4)  return MVT::v4f32;
         if (NumElements == 8)  return MVT::v8f32;
+        if (NumElements == 16) return MVT::v16f32;
         break;
       case MVT::f64:
         if (NumElements == 2)  return MVT::v2f64;
         if (NumElements == 4)  return MVT::v4f64;
+        if (NumElements == 8)  return MVT::v8f64;
         break;
       }
       return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
     }
+
+    /// Return the value type corresponding to the specified type.  This returns
+    /// all pointers as iPTR.  If HandleUnknown is true, unknown types are
+    /// returned as Other, otherwise they are invalid.
+    static MVT getVT(Type *Ty, bool HandleUnknown = false);
+
   };
 
 
@@ -501,7 +563,7 @@ namespace llvm {
     bool operator!=(EVT VT) const {
       if (V.SimpleTy != VT.V.SimpleTy)
         return true;
-      if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+      if (V.SimpleTy < 0)
         return LLVMTy != VT.LLVMTy;
       return false;
     }
@@ -517,7 +579,7 @@ namespace llvm {
     /// number of bits.
     static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
       MVT M = MVT::getIntegerVT(BitWidth);
-      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
+      if (M.SimpleTy >= 0)
         return M;
       return getExtendedIntegerVT(Context, BitWidth);
     }
@@ -526,7 +588,7 @@ namespace llvm {
     /// length, where each element is of type VT.
     static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) {
       MVT M = MVT::getVectorVT(VT.V, NumElements);
-      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
+      if (M.SimpleTy >= 0)
         return M;
       return getExtendedVectorVT(Context, VT, NumElements);
     }
@@ -541,7 +603,7 @@ namespace llvm {
       unsigned BitWidth = EltTy.getSizeInBits();
       MVT IntTy = MVT::getIntegerVT(BitWidth);
       MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements());
-      assert(VecTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+      assert(VecTy.SimpleTy >= 0 &&
              "Simple vector VT not representable by simple integer vector VT!");
       return VecTy;
     }
@@ -549,7 +611,7 @@ namespace llvm {
     /// isSimple - Test if the given EVT is simple (as opposed to being
     /// extended).
     bool isSimple() const {
-      return V.SimpleTy <= MVT::LastSimpleValueType;
+      return V.SimpleTy >= 0;
     }
 
     /// isExtended - Test if the given EVT is extended (as opposed to
@@ -765,7 +827,7 @@ namespace llvm {
     /// types are returned as Other, otherwise they are invalid.
     static EVT getEVT(Type *Ty, bool HandleUnknown = false);
 
-    intptr_t getRawBits() {
+    intptr_t getRawBits() const {
       if (isSimple())
         return V.SimpleTy;
       else
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index a707f887aaf4..76df6ac8e65b 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -37,39 +37,45 @@ def v2i1   : ValueType<2 ,  13>;   //  2 x i1  vector value
 def v4i1   : ValueType<4 ,  14>;   //  4 x i1  vector value
 def v8i1   : ValueType<8 ,  15>;   //  8 x i1  vector value
 def v16i1  : ValueType<16,  16>;   // 16 x i1  vector value
-def v2i8   : ValueType<16 , 17>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 18>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 19>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 20>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 21>;   // 32 x i8 vector value
-def v1i16  : ValueType<16 , 22>;   //  1 x i16 vector value
-def v2i16  : ValueType<32 , 23>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 24>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 25>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 26>;   // 16 x i16 vector value
-def v1i32  : ValueType<32 , 27>;   //  1 x i32 vector value
-def v2i32  : ValueType<64 , 28>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 29>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 30>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 31>;   // 16 x i32 vector value
-def v1i64  : ValueType<64 , 32>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 33>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 34>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 35>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,36>;   // 16 x i64 vector value
+def v32i1  : ValueType<32 , 17>;   // 32 x i1  vector value
+def v64i1  : ValueType<64 , 18>;   // 64 x i1  vector value
+def v2i8   : ValueType<16 , 19>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 20>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 21>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 22>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 23>;   // 32 x i8 vector value
+def v64i8  : ValueType<256, 24>;   // 64 x i8 vector value
+def v1i16  : ValueType<16 , 25>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 26>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 27>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 28>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 29>;   // 16 x i16 vector value
+def v32i16 : ValueType<256, 30>;   // 32 x i16 vector value
+def v1i32  : ValueType<32 , 31>;   //  1 x i32 vector value
+def v2i32  : ValueType<64 , 32>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 33>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 34>;   //  8 x i32 vector value
+def v16i32 : ValueType<512, 35>;   // 16 x i32 vector value
+def v1i64  : ValueType<64 , 36>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 37>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 38>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 39>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,40>;   // 16 x i64 vector value
 
-def v2f16  : ValueType<32 , 37>;   //  2 x f16 vector value
-def v2f32  : ValueType<64 , 38>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 39>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 40>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 41>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 42>;   //  4 x f64 vector value
+def v2f16  : ValueType<32 , 41>;   //  2 x f16 vector value
+def v2f32  : ValueType<64 , 42>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 43>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 44>;   //  8 x f32 vector value
+def v16f32 : ValueType<512, 45>;   // 16 x f32 vector value
+def v2f64  : ValueType<128, 46>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 47>;   //  4 x f64 vector value
+def v8f64  : ValueType<512, 48>;   //  8 x f64 vector value
 
-def x86mmx : ValueType<64 , 43>;   // X86 MMX value
-def FlagVT : ValueType<0  , 44>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 45>;   // Produces no value
-def untyped: ValueType<8  , 46>;   // Produces an untyped value
 
+def x86mmx : ValueType<64 , 49>;   // X86 MMX value
+def FlagVT : ValueType<0  , 50>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 51>;   // Produces no value
+def untyped: ValueType<8  , 52>;   // Produces an untyped value
 def MetadataVT: ValueType<0, 250>; // Metadata
 
 // Pseudo valuetype mapped to the current pointer size to any address space.
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
new file mode 100644
index 000000000000..3bc6ebd563f2
--- /dev/null
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -0,0 +1,190 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class MachineInstr;
+  class MachineFunction;
+  class MachineRegisterInfo;
+  class TargetInstrInfo;
+  class raw_ostream;
+  class SlotIndexes;
+
+  class VirtRegMap : public MachineFunctionPass {
+  public:
+    enum {
+      NO_PHYS_REG = 0,
+      NO_STACK_SLOT = (1L << 30)-1,
+      MAX_STACK_SLOT = (1L << 18)-1
+    };
+
+  private:
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineFunction *MF;
+
+    /// Virt2PhysMap - This is a virtual to physical register
+    /// mapping. Each virtual register is required to have an entry in
+    /// it; even spilled virtual registers (the register mapped to a
+    /// spilled register is the temporary used to load it from the
+    /// stack).
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+    /// Virt2StackSlotMap - This is virtual register to stack slot
+    /// mapping. Each spilled virtual register has an entry in it
+    /// which corresponds to the stack slot this register is spilled
+    /// at.
+    IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+    /// Virt2SplitMap - This is virtual register to splitted virtual register
+    /// mapping.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+    /// createSpillSlot - Allocate a spill slot for RC from MFI.
+    unsigned createSpillSlot(const TargetRegisterClass *RC);
+
+    VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION;
+    void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION;
+
+  public:
+    static char ID;
+    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
+                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    MachineFunction &getMachineFunction() const {
+      assert(MF && "getMachineFunction called before runOnMachineFunction");
+      return *MF;
+    }
+
+    MachineRegisterInfo &getRegInfo() const { return *MRI; }
+    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
+    void grow();
+
+    /// @brief returns true if the specified virtual register is
+    /// mapped to a physical register
+    bool hasPhys(unsigned virtReg) const {
+      return getPhys(virtReg) != NO_PHYS_REG;
+    }
+
+    /// @brief returns the physical register mapped to the specified
+    /// virtual register
+    unsigned getPhys(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2PhysMap[virtReg];
+    }
+
+    /// @brief creates a mapping for the specified virtual register to
+    /// the specified physical register
+    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+             TargetRegisterInfo::isPhysicalRegister(physReg));
+      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+             "attempt to assign physical register to already mapped "
+             "virtual register");
+      Virt2PhysMap[virtReg] = physReg;
+    }
+
+    /// @brief clears the specified virtual register's, physical
+    /// register mapping
+    void clearVirt(unsigned virtReg) {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+             "attempt to clear a not assigned virtual register");
+      Virt2PhysMap[virtReg] = NO_PHYS_REG;
+    }
+
+    /// @brief clears all virtual to physical register mappings
+    void clearAllVirt() {
+      Virt2PhysMap.clear();
+      grow();
+    }
+
+    /// @brief returns true if VirtReg is assigned to its preferred physreg.
+    bool hasPreferredPhys(unsigned VirtReg);
+
+    /// @brief returns true if VirtReg has a known preferred register.
+    /// This returns false if VirtReg has a preference that is a virtual
+    /// register that hasn't been assigned yet.
+    bool hasKnownPreference(unsigned VirtReg);
+
+    /// @brief records virtReg is a split live interval from SReg.
+    void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+      Virt2SplitMap[virtReg] = SReg;
+    }
+
+    /// @brief returns the live interval virtReg is split from.
+    unsigned getPreSplitReg(unsigned virtReg) const {
+      return Virt2SplitMap[virtReg];
+    }
+
+    /// getOriginal - Return the original virtual register that VirtReg descends
+    /// from through splitting.
+    /// A register that was not created by splitting is its own original.
+    /// This operation is idempotent.
+    unsigned getOriginal(unsigned VirtReg) const {
+      unsigned Orig = getPreSplitReg(VirtReg);
+      return Orig ? Orig : VirtReg;
+    }
+
+    /// @brief returns true if the specified virtual register is not
+    /// mapped to a stack slot or rematerialized.
+    bool isAssignedReg(unsigned virtReg) const {
+      if (getStackSlot(virtReg) == NO_STACK_SLOT)
+        return true;
+      // Split register can be assigned a physical register as well as a
+      // stack slot or remat id.
+      return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+    }
+
+    /// @brief returns the stack slot mapped to the specified virtual
+    /// register
+    int getStackSlot(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2StackSlotMap[virtReg];
+    }
+
+    /// @brief create a mapping for the specifed virtual register to
+    /// the next available stack slot
+    int assignVirt2StackSlot(unsigned virtReg);
+    /// @brief create a mapping for the specified virtual register to
+    /// the specified stack slot
+    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+    void print(raw_ostream &OS, const Module* M = 0) const;
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
+    VRM.print(OS);
+    return OS;
+  }
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index ca6412472991..0a2685739782 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -6,6 +6,9 @@
 /* Bug report URL. */
 #define BUG_REPORT_URL "${BUG_REPORT_URL}"
 
+/* Define if we have libxml2 */
+#cmakedefine CLANG_HAVE_LIBXML ${CLANG_HAVE_LIBXML}
+
 /* Relative directory for resource files */
 #define CLANG_RESOURCE_DIR "${CLANG_RESOURCE_DIR}"
 
@@ -69,7 +72,7 @@
 /* Define to 1 if you have the <CrashReporterClient.h> header file. */
 #undef HAVE_CRASHREPORTERCLIENT_H
 
-/* Define if __crashreporter_info__ exists. */
+/* can use __crashreporter_info__ */
 #undef HAVE_CRASHREPORTER_INFO
 
 /* Define to 1 if you have the <ctype.h> header file. */
@@ -143,6 +146,24 @@
 /* Define to 1 if you have the `floorf' function. */
 #cmakedefine HAVE_FLOORF ${HAVE_FLOORF}
 
+/* Define to 1 if you have the `log' function. */
+#cmakedefine HAVE_LOG ${HAVE_LOG}
+
+/* Define to 1 if you have the `log2' function. */
+#cmakedefine HAVE_LOG2 ${HAVE_LOG2}
+
+/* Define to 1 if you have the `log10' function. */
+#cmakedefine HAVE_LOG10 ${HAVE_LOG10}
+
+/* Define to 1 if you have the `exp' function. */
+#cmakedefine HAVE_EXP ${HAVE_LOG}
+
+/* Define to 1 if you have the `exp2' function. */
+#cmakedefine HAVE_EXP2 ${HAVE_LOG2}
+
+/* Define to 1 if you have the `exp10' function. */
+#cmakedefine HAVE_EXP10 ${HAVE_LOG10}
+
 /* Define to 1 if you have the `fmodf' function. */
 #cmakedefine HAVE_FMODF ${HAVE_FMODF}
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index a4f8af4db028..5a3d02c553ee 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -72,12 +72,20 @@
 /* Define to 1 if you have the <CrashReporterClient.h> header file. */
 #undef HAVE_CRASHREPORTERCLIENT_H
 
-/* Define if __crashreporter_info__ exists. */
+/* can use __crashreporter_info__ */
 #undef HAVE_CRASHREPORTER_INFO
 
 /* Define to 1 if you have the <ctype.h> header file. */
 #undef HAVE_CTYPE_H
 
+/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
+   don't. */
+#undef HAVE_DECL_FE_ALL_EXCEPT
+
+/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you
+   don't. */
+#undef HAVE_DECL_FE_INEXACT
+
 /* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
    don't. */
 #undef HAVE_DECL_STRERROR_S
@@ -122,6 +130,12 @@
 /* Define to 1 if you have the <execinfo.h> header file. */
 #undef HAVE_EXECINFO_H
 
+/* Define to 1 if you have the `exp' function. */
+#undef HAVE_EXP
+
+/* Define to 1 if you have the `exp2' function. */
+#undef HAVE_EXP2
+
 /* Define to 1 if you have the <fcntl.h> header file. */
 #undef HAVE_FCNTL_H
 
@@ -225,6 +239,15 @@
    the current directory to the dynamic linker search path. */
 #undef HAVE_LINK_R
 
+/* Define to 1 if you have the `log' function. */
+#undef HAVE_LOG
+
+/* Define to 1 if you have the `log10' function. */
+#undef HAVE_LOG10
+
+/* Define to 1 if you have the `log2' function. */
+#undef HAVE_LOG2
+
 /* Define to 1 if you have the `longjmp' function. */
 #undef HAVE_LONGJMP
 
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 39442926dc9b..eda17ee4a62b 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -112,10 +112,19 @@
 /* Installation prefix directory */
 #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
 
+/* Define if we have the Intel JIT API runtime support library */
+#cmakedefine LLVM_USE_INTEL_JITEVENTS 1
+
+/* Define if we have the oprofile JIT-support library */
+#cmakedefine LLVM_USE_OPROFILE 1
+
 /* Major version of the LLVM API */
 #cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
 
 /* Minor version of the LLVM API */
 #cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
 
+/* Define to 1 if you have the <sanitizer/msan_interface.h> header file. */
+#cmakedefine HAVE_SANITIZER_MSAN_INTERFACE_H ${HAVE_SANITIZER_MSAN_INTERFACE_H}
+
 #endif
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
index 9489dfe01633..af3a32485525 100644
--- a/include/llvm/Config/llvm-config.h.in
+++ b/include/llvm/Config/llvm-config.h.in
@@ -112,6 +112,12 @@
 /* Installation prefix directory */
 #undef LLVM_PREFIX
 
+/* Define if we have the Intel JIT API runtime support library */
+#undef LLVM_USE_INTEL_JITEVENTS
+
+/* Define if we have the oprofile JIT-support library */
+#undef LLVM_USE_OPROFILE
+
 /* Major version of the LLVM API */
 #undef LLVM_VERSION_MAJOR
 
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
deleted file mode 100644
index 0ddd1db6c010..000000000000
--- a/include/llvm/Constant.h
+++ /dev/null
@@ -1,159 +0,0 @@
-//===-- llvm/Constant.h - Constant class definition -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Constant class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CONSTANT_H
-#define LLVM_CONSTANT_H
-
-#include "llvm/User.h"
-
-namespace llvm {
-  class APInt;
-
-  template<typename T> class SmallVectorImpl;
-
-/// This is an important base class in LLVM. It provides the common facilities
-/// of all constant values in an LLVM program. A constant is a value that is
-/// immutable at runtime. Functions are constants because their address is
-/// immutable. Same with global variables. 
-/// 
-/// All constants share the capabilities provided in this class. All constants
-/// can have a null value. They can have an operand list. Constants can be
-/// simple (integer and floating point values), complex (arrays and structures),
-/// or expression based (computations yielding a constant value composed of 
-/// only certain operators and other constant values).
-/// 
-/// Note that Constants are immutable (once created they never change) 
-/// and are fully shared by structural equivalence.  This means that two 
-/// structurally equivalent constants will always have the same address.  
-/// Constants are created on demand as needed and never deleted: thus clients 
-/// don't have to worry about the lifetime of the objects.
-/// @brief LLVM Constant Representation
-class Constant : public User {
-  void operator=(const Constant &) LLVM_DELETED_FUNCTION;
-  Constant(const Constant &) LLVM_DELETED_FUNCTION;
-  virtual void anchor();
-  
-protected:
-  Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
-    : User(ty, vty, Ops, NumOps) {}
-
-  void destroyConstantImpl();
-public:
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.
-  bool isNullValue() const;
-
-  /// isAllOnesValue - Return true if this is the value that would be returned by
-  /// getAllOnesValue.
-  bool isAllOnesValue() const;
-
-  /// isNegativeZeroValue - Return true if the value is what would be returned 
-  /// by getZeroValueForNegation.
-  bool isNegativeZeroValue() const;
-
-  /// canTrap - Return true if evaluation of this constant could trap.  This is
-  /// true for things like constant expressions that could divide by zero.
-  bool canTrap() const;
-
-  /// isThreadDependent - Return true if the value can vary between threads.
-  bool isThreadDependent() const;
-
-  /// isConstantUsed - Return true if the constant has users other than constant
-  /// exprs and other dangling things.
-  bool isConstantUsed() const;
-  
-  enum PossibleRelocationsTy {
-    NoRelocation = 0,
-    LocalRelocation = 1,
-    GlobalRelocations = 2
-  };
-  
-  /// getRelocationInfo - This method classifies the entry according to
-  /// whether or not it may generate a relocation entry.  This must be
-  /// conservative, so if it might codegen to a relocatable entry, it should say
-  /// so.  The return values are:
-  /// 
-  ///  NoRelocation: This constant pool entry is guaranteed to never have a
-  ///     relocation applied to it (because it holds a simple constant like
-  ///     '4').
-  ///  LocalRelocation: This entry has relocations, but the entries are
-  ///     guaranteed to be resolvable by the static linker, so the dynamic
-  ///     linker will never see them.
-  ///  GlobalRelocations: This entry may have arbitrary relocations.
-  ///
-  /// FIXME: This really should not be in VMCore.
-  PossibleRelocationsTy getRelocationInfo() const;
-  
-  /// getAggregateElement - For aggregates (struct/array/vector) return the
-  /// constant that corresponds to the specified element if possible, or null if
-  /// not.  This can return null if the element index is a ConstantExpr, or if
-  /// 'this' is a constant expr.
-  Constant *getAggregateElement(unsigned Elt) const;
-  Constant *getAggregateElement(Constant *Elt) const;
-  
-  /// destroyConstant - Called if some element of this constant is no longer
-  /// valid.  At this point only other constants may be on the use_list for this
-  /// constant.  Any constants on our Use list must also be destroy'd.  The
-  /// implementation must be sure to remove the constant from the list of
-  /// available cached constants.  Implementations should call
-  /// destroyConstantImpl as the last thing they do, to destroy all users and
-  /// delete this.
-  virtual void destroyConstant() { llvm_unreachable("Not reached!"); }
-
-  //// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() >= ConstantFirstVal &&
-           V->getValueID() <= ConstantLastVal;
-  }
-
-  /// replaceUsesOfWithOnConstant - This method is a special form of
-  /// User::replaceUsesOfWith (which does not work on constants) that does work
-  /// on constants.  Basically this method goes through the trouble of building
-  /// a new constant that is equivalent to the current one, with all uses of
-  /// From replaced with uses of To.  After this construction is completed, all
-  /// of the users of 'this' are replaced to use the new constant, and then
-  /// 'this' is deleted.  In general, you should not call this method, instead,
-  /// use Value::replaceAllUsesWith, which automatically dispatches to this
-  /// method as needed.
-  ///
-  virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
-    // Provide a default implementation for constants (like integers) that
-    // cannot use any other values.  This cannot be called at runtime, but needs
-    // to be here to avoid link errors.
-    assert(getNumOperands() == 0 && "replaceUsesOfWithOnConstant must be "
-           "implemented for all constants that have operands!");
-    llvm_unreachable("Constants that do not have operands cannot be using "
-                     "'From'!");
-  }
-
-  static Constant *getNullValue(Type* Ty);
-
-  /// @returns the value for an integer or vector of integer constant of the
-  /// given type that has all its bits set to true.
-  /// @brief Get the all ones value
-  static Constant *getAllOnesValue(Type* Ty);
-
-  /// getIntegerValue - Return the value for an integer or pointer constant,
-  /// or a vector thereof, with the given scalar value.
-  static Constant *getIntegerValue(Type* Ty, const APInt &V);
-  
-  /// removeDeadConstantUsers - If there are any dead constant users dangling
-  /// off of this constant, remove them.  This method is useful for clients
-  /// that want to check to see if a global is unused, but don't want to deal
-  /// with potentially dead constants hanging off of the globals.
-  void removeDeadConstantUsers() const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
deleted file mode 100644
index 7f94ef464ea4..000000000000
--- a/include/llvm/Constants.h
+++ /dev/null
@@ -1,1154 +0,0 @@
-//===-- llvm/Constants.h - Constant class subclass definitions --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// @file
-/// This file contains the declarations for the subclasses of Constant, 
-/// which represent the different flavors of constant values that live in LLVM.
-/// Note that Constants are immutable (once created they never change) and are 
-/// fully shared by structural equivalence.  This means that two structurally
-/// equivalent constants will always have the same address.  Constant's are
-/// created on demand as needed and never deleted: thus clients don't have to
-/// worry about the lifetime of the objects.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CONSTANTS_H
-#define LLVM_CONSTANTS_H
-
-#include "llvm/Constant.h"
-#include "llvm/OperandTraits.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace llvm {
-
-class ArrayType;
-class IntegerType;
-class StructType;
-class PointerType;
-class VectorType;
-class SequentialType;
-
-template<class ConstantClass, class TypeClass, class ValType>
-struct ConstantCreator;
-template<class ConstantClass, class TypeClass>
-struct ConstantArrayCreator;
-template<class ConstantClass, class TypeClass>
-struct ConvertConstantType;
-
-//===----------------------------------------------------------------------===//
-/// This is the shared class of boolean and integer constants. This class 
-/// represents both boolean and integral constants.
-/// @brief Class for constant integers.
-class ConstantInt : public Constant {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantInt(const ConstantInt &) LLVM_DELETED_FUNCTION;
-  ConstantInt(IntegerType *Ty, const APInt& V);
-  APInt Val;
-protected:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  static ConstantInt *getTrue(LLVMContext &Context);
-  static ConstantInt *getFalse(LLVMContext &Context);
-  static Constant *getTrue(Type *Ty);
-  static Constant *getFalse(Type *Ty);
-  
-  /// If Ty is a vector type, return a Constant with a splat of the given
-  /// value. Otherwise return a ConstantInt for the given value.
-  static Constant *get(Type *Ty, uint64_t V, bool isSigned = false);
-                              
-  /// Return a ConstantInt with the specified integer value for the specified
-  /// type. If the type is wider than 64 bits, the value will be zero-extended
-  /// to fit the type, unless isSigned is true, in which case the value will
-  /// be interpreted as a 64-bit signed integer and sign-extended to fit
-  /// the type.
-  /// @brief Get a ConstantInt for a specific value.
-  static ConstantInt *get(IntegerType *Ty, uint64_t V,
-                          bool isSigned = false);
-
-  /// Return a ConstantInt with the specified value for the specified type. The
-  /// value V will be canonicalized to a an unsigned APInt. Accessing it with
-  /// either getSExtValue() or getZExtValue() will yield a correctly sized and
-  /// signed value for the type Ty.
-  /// @brief Get a ConstantInt for a specific signed value.
-  static ConstantInt *getSigned(IntegerType *Ty, int64_t V);
-  static Constant *getSigned(Type *Ty, int64_t V);
-  
-  /// Return a ConstantInt with the specified value and an implied Type. The
-  /// type is the integer type that corresponds to the bit width of the value.
-  static ConstantInt *get(LLVMContext &Context, const APInt &V);
-
-  /// Return a ConstantInt constructed from the string strStart with the given
-  /// radix. 
-  static ConstantInt *get(IntegerType *Ty, StringRef Str,
-                          uint8_t radix);
-  
-  /// If Ty is a vector type, return a Constant with a splat of the given
-  /// value. Otherwise return a ConstantInt for the given value.
-  static Constant *get(Type* Ty, const APInt& V);
-  
-  /// Return the constant as an APInt value reference. This allows clients to
-  /// obtain a copy of the value, with all its precision in tact.
-  /// @brief Return the constant's value.
-  inline const APInt &getValue() const {
-    return Val;
-  }
-  
-  /// getBitWidth - Return the bitwidth of this constant.
-  unsigned getBitWidth() const { return Val.getBitWidth(); }
-
-  /// Return the constant as a 64-bit unsigned integer value after it
-  /// has been zero extended as appropriate for the type of this constant. Note
-  /// that this method can assert if the value does not fit in 64 bits.
-  /// @deprecated
-  /// @brief Return the zero extended value.
-  inline uint64_t getZExtValue() const {
-    return Val.getZExtValue();
-  }
-
-  /// Return the constant as a 64-bit integer value after it has been sign
-  /// extended as appropriate for the type of this constant. Note that
-  /// this method can assert if the value does not fit in 64 bits.
-  /// @deprecated
-  /// @brief Return the sign extended value.
-  inline int64_t getSExtValue() const {
-    return Val.getSExtValue();
-  }
-
-  /// A helper method that can be used to determine if the constant contained 
-  /// within is equal to a constant.  This only works for very small values, 
-  /// because this is all that can be represented with all types.
-  /// @brief Determine if this constant's value is same as an unsigned char.
-  bool equalsInt(uint64_t V) const {
-    return Val == V;
-  }
-
-  /// getType - Specialize the getType() method to always return an IntegerType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline IntegerType *getType() const {
-    return reinterpret_cast<IntegerType*>(Value::getType());
-  }
-
-  /// This static method returns true if the type Ty is big enough to 
-  /// represent the value V. This can be used to avoid having the get method 
-  /// assert when V is larger than Ty can represent. Note that there are two
-  /// versions of this method, one for unsigned and one for signed integers.
-  /// Although ConstantInt canonicalizes everything to an unsigned integer, 
-  /// the signed version avoids callers having to convert a signed quantity
-  /// to the appropriate unsigned type before calling the method.
-  /// @returns true if V is a valid value for type Ty
-  /// @brief Determine if the value is in range for the given type.
-  static bool isValueValidForType(Type *Ty, uint64_t V);
-  static bool isValueValidForType(Type *Ty, int64_t V);
-
-  bool isNegative() const { return Val.isNegative(); }
-
-  /// This is just a convenience method to make client code smaller for a
-  /// common code. It also correctly performs the comparison without the
-  /// potential for an assertion from getZExtValue().
-  bool isZero() const {
-    return Val == 0;
-  }
-
-  /// This is just a convenience method to make client code smaller for a 
-  /// common case. It also correctly performs the comparison without the
-  /// potential for an assertion from getZExtValue().
-  /// @brief Determine if the value is one.
-  bool isOne() const {
-    return Val == 1;
-  }
-
-  /// This function will return true iff every bit in this constant is set
-  /// to true.
-  /// @returns true iff this constant's bits are all set to true.
-  /// @brief Determine if the value is all ones.
-  bool isMinusOne() const { 
-    return Val.isAllOnesValue();
-  }
-
-  /// This function will return true iff this constant represents the largest
-  /// value that may be represented by the constant's type.
-  /// @returns true iff this is the largest value that may be represented 
-  /// by this type.
-  /// @brief Determine if the value is maximal.
-  bool isMaxValue(bool isSigned) const {
-    if (isSigned) 
-      return Val.isMaxSignedValue();
-    else
-      return Val.isMaxValue();
-  }
-
-  /// This function will return true iff this constant represents the smallest
-  /// value that may be represented by this constant's type.
-  /// @returns true if this is the smallest value that may be represented by 
-  /// this type.
-  /// @brief Determine if the value is minimal.
-  bool isMinValue(bool isSigned) const {
-    if (isSigned) 
-      return Val.isMinSignedValue();
-    else
-      return Val.isMinValue();
-  }
-
-  /// This function will return true iff this constant represents a value with
-  /// active bits bigger than 64 bits or a value greater than the given uint64_t
-  /// value.
-  /// @returns true iff this constant is greater or equal to the given number.
-  /// @brief Determine if the value is greater or equal to the given number.
-  bool uge(uint64_t Num) const {
-    return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num;
-  }
-
-  /// getLimitedValue - If the value is smaller than the specified limit,
-  /// return it, otherwise return the limit value.  This causes the value
-  /// to saturate to the limit.
-  /// @returns the min of the value of the constant and the specified value
-  /// @brief Get the constant's value with a saturation limit
-  uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const {
-    return Val.getLimitedValue(Limit);
-  }
-
-  /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantIntVal;
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
-/// ConstantFP - Floating Point Values [float, double]
-///
-class ConstantFP : public Constant {
-  APFloat Val;
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantFP(const ConstantFP &) LLVM_DELETED_FUNCTION;
-  friend class LLVMContextImpl;
-protected:
-  ConstantFP(Type *Ty, const APFloat& V);
-protected:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  /// Floating point negation must be implemented with f(x) = -0.0 - x. This
-  /// method returns the negative zero constant for floating point or vector
-  /// floating point types; for all other types, it returns the null value.
-  static Constant *getZeroValueForNegation(Type *Ty);
-  
-  /// get() - This returns a ConstantFP, or a vector containing a splat of a
-  /// ConstantFP, for the specified value in the specified type.  This should
-  /// only be used for simple constant values like 2.0/1.0 etc, that are
-  /// known-valid both as host double and as the target format.
-  static Constant *get(Type* Ty, double V);
-  static Constant *get(Type* Ty, StringRef Str);
-  static ConstantFP *get(LLVMContext &Context, const APFloat &V);
-  static ConstantFP *getNegativeZero(Type* Ty);
-  static ConstantFP *getInfinity(Type *Ty, bool Negative = false);
-  
-  /// isValueValidForType - return true if Ty is big enough to represent V.
-  static bool isValueValidForType(Type *Ty, const APFloat &V);
-  inline const APFloat &getValueAPF() const { return Val; }
-
-  /// isZero - Return true if the value is positive or negative zero.
-  bool isZero() const { return Val.isZero(); }
-
-  /// isNegative - Return true if the sign bit is set.
-  bool isNegative() const { return Val.isNegative(); }
-
-  /// isNaN - Return true if the value is a NaN.
-  bool isNaN() const { return Val.isNaN(); }
-
-  /// isExactlyValue - We don't rely on operator== working on double values, as
-  /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
-  /// As such, this method can be used to do an exact bit-for-bit comparison of
-  /// two floating point values.  The version with a double operand is retained
-  /// because it's so convenient to write isExactlyValue(2.0), but please use
-  /// it only for simple constants.
-  bool isExactlyValue(const APFloat &V) const;
-
-  bool isExactlyValue(double V) const {
-    bool ignored;
-    APFloat FV(V);
-    FV.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &ignored);
-    return isExactlyValue(FV);
-  }
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantFPVal;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-/// ConstantAggregateZero - All zero aggregate value
-///
-class ConstantAggregateZero : public Constant {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantAggregateZero(const ConstantAggregateZero &) LLVM_DELETED_FUNCTION;
-protected:
-  explicit ConstantAggregateZero(Type *ty)
-    : Constant(ty, ConstantAggregateZeroVal, 0, 0) {}
-protected:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  static ConstantAggregateZero *get(Type *Ty);
-  
-  virtual void destroyConstant();
-
-  /// getSequentialElement - If this CAZ has array or vector type, return a zero
-  /// with the right element type.
-  Constant *getSequentialElement() const;
-
-  /// getStructElement - If this CAZ has struct type, return a zero with the
-  /// right element type for the specified element.
-  Constant *getStructElement(unsigned Elt) const;
-
-  /// getElementValue - Return a zero of the right value for the specified GEP
-  /// index.
-  Constant *getElementValue(Constant *C) const;
-
-  /// getElementValue - Return a zero of the right value for the specified GEP
-  /// index.
-  Constant *getElementValue(unsigned Idx) const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  ///
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantAggregateZeroVal;
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
-/// ConstantArray - Constant Array Declarations
-///
-class ConstantArray : public Constant {
-  friend struct ConstantArrayCreator<ConstantArray, ArrayType>;
-  ConstantArray(const ConstantArray &) LLVM_DELETED_FUNCTION;
-protected:
-  ConstantArray(ArrayType *T, ArrayRef<Constant *> Val);
-public:
-  // ConstantArray accessors
-  static Constant *get(ArrayType *T, ArrayRef<Constant*> V);
-                             
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
-  /// getType - Specialize the getType() method to always return an ArrayType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline ArrayType *getType() const {
-    return reinterpret_cast<ArrayType*>(Value::getType());
-  }
-
-  virtual void destroyConstant();
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantArrayVal;
-  }
-};
-
-template <>
-struct OperandTraits<ConstantArray> :
-  public VariadicOperandTraits<ConstantArray> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantArray, Constant)
-
-//===----------------------------------------------------------------------===//
-// ConstantStruct - Constant Struct Declarations
-//
-class ConstantStruct : public Constant {
-  friend struct ConstantArrayCreator<ConstantStruct, StructType>;
-  ConstantStruct(const ConstantStruct &) LLVM_DELETED_FUNCTION;
-protected:
-  ConstantStruct(StructType *T, ArrayRef<Constant *> Val);
-public:
-  // ConstantStruct accessors
-  static Constant *get(StructType *T, ArrayRef<Constant*> V);
-  static Constant *get(StructType *T, ...) END_WITH_NULL;
-
-  /// getAnon - Return an anonymous struct that has the specified
-  /// elements.  If the struct is possibly empty, then you must specify a
-  /// context.
-  static Constant *getAnon(ArrayRef<Constant*> V, bool Packed = false) {
-    return get(getTypeForElements(V, Packed), V);
-  }
-  static Constant *getAnon(LLVMContext &Ctx, 
-                           ArrayRef<Constant*> V, bool Packed = false) {
-    return get(getTypeForElements(Ctx, V, Packed), V);
-  }
-
-  /// getTypeForElements - Return an anonymous struct type to use for a constant
-  /// with the specified set of elements.  The list must not be empty.
-  static StructType *getTypeForElements(ArrayRef<Constant*> V,
-                                        bool Packed = false);
-  /// getTypeForElements - This version of the method allows an empty list.
-  static StructType *getTypeForElements(LLVMContext &Ctx,
-                                        ArrayRef<Constant*> V,
-                                        bool Packed = false);
-  
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
-  /// getType() specialization - Reduce amount of casting...
-  ///
-  inline StructType *getType() const {
-    return reinterpret_cast<StructType*>(Value::getType());
-  }
-
-  virtual void destroyConstant();
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantStructVal;
-  }
-};
-
-template <>
-struct OperandTraits<ConstantStruct> :
-  public VariadicOperandTraits<ConstantStruct> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantStruct, Constant)
-
-
-//===----------------------------------------------------------------------===//
-/// ConstantVector - Constant Vector Declarations
-///
-class ConstantVector : public Constant {
-  friend struct ConstantArrayCreator<ConstantVector, VectorType>;
-  ConstantVector(const ConstantVector &) LLVM_DELETED_FUNCTION;
-protected:
-  ConstantVector(VectorType *T, ArrayRef<Constant *> Val);
-public:
-  // ConstantVector accessors
-  static Constant *get(ArrayRef<Constant*> V);
-  
-  /// getSplat - Return a ConstantVector with the specified constant in each
-  /// element.
-  static Constant *getSplat(unsigned NumElts, Constant *Elt);
-  
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
-  /// getType - Specialize the getType() method to always return a VectorType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline VectorType *getType() const {
-    return reinterpret_cast<VectorType*>(Value::getType());
-  }
-
-  /// getSplatValue - If this is a splat constant, meaning that all of the
-  /// elements have the same value, return that value. Otherwise return NULL.
-  Constant *getSplatValue() const;
-
-  virtual void destroyConstant();
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantVectorVal;
-  }
-};
-
-template <>
-struct OperandTraits<ConstantVector> :
-  public VariadicOperandTraits<ConstantVector> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantVector, Constant)
-
-//===----------------------------------------------------------------------===//
-/// ConstantPointerNull - a constant pointer value that points to null
-///
-class ConstantPointerNull : public Constant {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantPointerNull(const ConstantPointerNull &) LLVM_DELETED_FUNCTION;
-protected:
-  explicit ConstantPointerNull(PointerType *T)
-    : Constant(reinterpret_cast<Type*>(T),
-               Value::ConstantPointerNullVal, 0, 0) {}
-
-protected:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  /// get() - Static factory methods - Return objects of the specified value
-  static ConstantPointerNull *get(PointerType *T);
-
-  virtual void destroyConstant();
-
-  /// getType - Specialize the getType() method to always return an PointerType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(Value::getType());
-  }
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantPointerNullVal;
-  }
-};
-  
-//===----------------------------------------------------------------------===//
-/// ConstantDataSequential - A vector or array constant whose element type is a
-/// simple 1/2/4/8-byte integer or float/double, and whose elements are just
-/// simple data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
-/// operands because it stores all of the elements of the constant as densely
-/// packed data, instead of as Value*'s.
-///
-/// This is the common base class of ConstantDataArray and ConstantDataVector.
-///
-class ConstantDataSequential : public Constant {
-  friend class LLVMContextImpl;
-  /// DataElements - A pointer to the bytes underlying this constant (which is
-  /// owned by the uniquing StringMap).
-  const char *DataElements;
-  
-  /// Next - This forms a link list of ConstantDataSequential nodes that have
-  /// the same value but different type.  For example, 0,0,0,1 could be a 4
-  /// element array of i8, or a 1-element array of i32.  They'll both end up in
-  /// the same StringMap bucket, linked up.
-  ConstantDataSequential *Next;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantDataSequential(const ConstantDataSequential &) LLVM_DELETED_FUNCTION;
-protected:
-  explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
-    : Constant(ty, VT, 0, 0), DataElements(Data), Next(0) {}
-  ~ConstantDataSequential() { delete Next; }
-  
-  static Constant *getImpl(StringRef Bytes, Type *Ty);
-
-protected:
-  // allocate space for exactly zero operands.
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  
-  /// isElementTypeCompatible - Return true if a ConstantDataSequential can be
-  /// formed with a vector or array of the specified element type.
-  /// ConstantDataArray only works with normal float and int types that are
-  /// stored densely in memory, not with things like i42 or x86_f80.
-  static bool isElementTypeCompatible(const Type *Ty);
-  
-  /// getElementAsInteger - If this is a sequential container of integers (of
-  /// any size), return the specified element in the low bits of a uint64_t.
-  uint64_t getElementAsInteger(unsigned i) const;
-
-  /// getElementAsAPFloat - If this is a sequential container of floating point
-  /// type, return the specified element as an APFloat.
-  APFloat getElementAsAPFloat(unsigned i) const;
-
-  /// getElementAsFloat - If this is an sequential container of floats, return
-  /// the specified element as a float.
-  float getElementAsFloat(unsigned i) const;
-  
-  /// getElementAsDouble - If this is an sequential container of doubles, return
-  /// the specified element as a double.
-  double getElementAsDouble(unsigned i) const;
-  
-  /// getElementAsConstant - Return a Constant for a specified index's element.
-  /// Note that this has to compute a new constant to return, so it isn't as
-  /// efficient as getElementAsInteger/Float/Double.
-  Constant *getElementAsConstant(unsigned i) const;
-  
-  /// getType - Specialize the getType() method to always return a
-  /// SequentialType, which reduces the amount of casting needed in parts of the
-  /// compiler.
-  inline SequentialType *getType() const {
-    return reinterpret_cast<SequentialType*>(Value::getType());
-  }
-  
-  /// getElementType - Return the element type of the array/vector.
-  Type *getElementType() const;
-  
-  /// getNumElements - Return the number of elements in the array or vector.
-  unsigned getNumElements() const;
-
-  /// getElementByteSize - Return the size (in bytes) of each element in the
-  /// array/vector.  The size of the elements is known to be a multiple of one
-  /// byte.
-  uint64_t getElementByteSize() const;
-
-  
-  /// isString - This method returns true if this is an array of i8.
-  bool isString() const;
-  
-  /// isCString - This method returns true if the array "isString", ends with a
-  /// nul byte, and does not contains any other nul bytes.
-  bool isCString() const;
-  
-  /// getAsString - If this array is isString(), then this method returns the
-  /// array as a StringRef.  Otherwise, it asserts out.
-  ///
-  StringRef getAsString() const {
-    assert(isString() && "Not a string");
-    return getRawDataValues();
-  }
-  
-  /// getAsCString - If this array is isCString(), then this method returns the
-  /// array (without the trailing null byte) as a StringRef. Otherwise, it
-  /// asserts out.
-  ///
-  StringRef getAsCString() const {
-    assert(isCString() && "Isn't a C string");
-    StringRef Str = getAsString();
-    return Str.substr(0, Str.size()-1);
-  }
-  
-  /// getRawDataValues - Return the raw, underlying, bytes of this data.  Note
-  /// that this is an extremely tricky thing to work with, as it exposes the
-  /// host endianness of the data elements.
-  StringRef getRawDataValues() const;
-  
-  virtual void destroyConstant();
-  
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  ///
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantDataArrayVal ||
-           V->getValueID() == ConstantDataVectorVal;
-  }
-private:
-  const char *getElementPointer(unsigned Elt) const;
-};
-
-//===----------------------------------------------------------------------===//
-/// ConstantDataArray - An array constant whose element type is a simple
-/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
-/// data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
-/// operands because it stores all of the elements of the constant as densely
-/// packed data, instead of as Value*'s.
-class ConstantDataArray : public ConstantDataSequential {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantDataArray(const ConstantDataArray &) LLVM_DELETED_FUNCTION;
-  virtual void anchor();
-  friend class ConstantDataSequential;
-  explicit ConstantDataArray(Type *ty, const char *Data)
-    : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {}
-protected:
-  // allocate space for exactly zero operands.
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  
-  /// get() constructors - Return a constant with array type with an element
-  /// count and element type matching the ArrayRef passed in.  Note that this
-  /// can return a ConstantAggregateZero object.
-  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
-  
-  /// getString - This method constructs a CDS and initializes it with a text
-  /// string. The default behavior (AddNull==true) causes a null terminator to
-  /// be placed at the end of the array (increasing the length of the string by
-  /// one more than the StringRef would normally indicate.  Pass AddNull=false
-  /// to disable this behavior.
-  static Constant *getString(LLVMContext &Context, StringRef Initializer,
-                             bool AddNull = true);
-
-  /// getType - Specialize the getType() method to always return an ArrayType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline ArrayType *getType() const {
-    return reinterpret_cast<ArrayType*>(Value::getType());
-  }
-  
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  ///
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantDataArrayVal;
-  }
-};
-  
-//===----------------------------------------------------------------------===//
-/// ConstantDataVector - A vector constant whose element type is a simple
-/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
-/// data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
-/// operands because it stores all of the elements of the constant as densely
-/// packed data, instead of as Value*'s.
-class ConstantDataVector : public ConstantDataSequential {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  ConstantDataVector(const ConstantDataVector &) LLVM_DELETED_FUNCTION;
-  virtual void anchor();
-  friend class ConstantDataSequential;
-  explicit ConstantDataVector(Type *ty, const char *Data)
-  : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {}
-protected:
-  // allocate space for exactly zero operands.
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  
-  /// get() constructors - Return a constant with vector type with an element
-  /// count and element type matching the ArrayRef passed in.  Note that this
-  /// can return a ConstantAggregateZero object.
-  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
-  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
-  
-  /// getSplat - Return a ConstantVector with the specified constant in each
-  /// element.  The specified constant has to be a of a compatible type (i8/i16/
-  /// i32/i64/float/double) and must be a ConstantFP or ConstantInt.
-  static Constant *getSplat(unsigned NumElts, Constant *Elt);
-
-  /// getSplatValue - If this is a splat constant, meaning that all of the
-  /// elements have the same value, return that value. Otherwise return NULL.
-  Constant *getSplatValue() const;
-  
-  /// getType - Specialize the getType() method to always return a VectorType,
-  /// which reduces the amount of casting needed in parts of the compiler.
-  ///
-  inline VectorType *getType() const {
-    return reinterpret_cast<VectorType*>(Value::getType());
-  }
-  
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  ///
-  static bool classof(const Value *V) {
-    return V->getValueID() == ConstantDataVectorVal;
-  }
-};
-
-
-
-/// BlockAddress - The address of a basic block.
-///
-class BlockAddress : public Constant {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  void *operator new(size_t s) { return User::operator new(s, 2); }
-  BlockAddress(Function *F, BasicBlock *BB);
-public:
-  /// get - Return a BlockAddress for the specified function and basic block.
-  static BlockAddress *get(Function *F, BasicBlock *BB);
-  
-  /// get - Return a BlockAddress for the specified basic block.  The basic
-  /// block must be embedded into a function.
-  static BlockAddress *get(BasicBlock *BB);
-  
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-  
-  Function *getFunction() const { return (Function*)Op<0>().get(); }
-  BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
-  
-  virtual void destroyConstant();
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-  
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == BlockAddressVal;
-  }
-};
-
-template <>
-struct OperandTraits<BlockAddress> :
-  public FixedNumOperandTraits<BlockAddress, 2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
-  
-
-//===----------------------------------------------------------------------===//
-/// ConstantExpr - a constant value that is initialized with an expression using
-/// other constant values.
-///
-/// This class uses the standard Instruction opcodes to define the various
-/// constant expressions.  The Opcode field for the ConstantExpr class is
-/// maintained in the Value::SubclassData field.
-class ConstantExpr : public Constant {
-  friend struct ConstantCreator<ConstantExpr,Type,
-                            std::pair<unsigned, std::vector<Constant*> > >;
-  friend struct ConvertConstantType<ConstantExpr, Type>;
-
-protected:
-  ConstantExpr(Type *ty, unsigned Opcode, Use *Ops, unsigned NumOps)
-    : Constant(ty, ConstantExprVal, Ops, NumOps) {
-    // Operation type (an Instruction opcode) is stored as the SubclassData.
-    setValueSubclassData(Opcode);
-  }
-
-public:
-  // Static methods to construct a ConstantExpr of different kinds.  Note that
-  // these methods may return a object that is not an instance of the
-  // ConstantExpr class, because they will attempt to fold the constant
-  // expression into something simpler if possible.
-
-  /// getAlignOf constant expr - computes the alignment of a type in a target
-  /// independent way (Note: the return type is an i64).
-  static Constant *getAlignOf(Type *Ty);
-  
-  /// getSizeOf constant expr - computes the (alloc) size of a type (in
-  /// address-units, not bits) in a target independent way (Note: the return
-  /// type is an i64).
-  ///
-  static Constant *getSizeOf(Type *Ty);
-
-  /// getOffsetOf constant expr - computes the offset of a struct field in a 
-  /// target independent way (Note: the return type is an i64).
-  ///
-  static Constant *getOffsetOf(StructType *STy, unsigned FieldNo);
-
-  /// getOffsetOf constant expr - This is a generalized form of getOffsetOf,
-  /// which supports any aggregate type, and any Constant index.
-  ///
-  static Constant *getOffsetOf(Type *Ty, Constant *FieldNo);
-  
-  static Constant *getNeg(Constant *C, bool HasNUW = false, bool HasNSW =false);
-  static Constant *getFNeg(Constant *C);
-  static Constant *getNot(Constant *C);
-  static Constant *getAdd(Constant *C1, Constant *C2,
-                          bool HasNUW = false, bool HasNSW = false);
-  static Constant *getFAdd(Constant *C1, Constant *C2);
-  static Constant *getSub(Constant *C1, Constant *C2,
-                          bool HasNUW = false, bool HasNSW = false);
-  static Constant *getFSub(Constant *C1, Constant *C2);
-  static Constant *getMul(Constant *C1, Constant *C2,
-                          bool HasNUW = false, bool HasNSW = false);
-  static Constant *getFMul(Constant *C1, Constant *C2);
-  static Constant *getUDiv(Constant *C1, Constant *C2, bool isExact = false);
-  static Constant *getSDiv(Constant *C1, Constant *C2, bool isExact = false);
-  static Constant *getFDiv(Constant *C1, Constant *C2);
-  static Constant *getURem(Constant *C1, Constant *C2);
-  static Constant *getSRem(Constant *C1, Constant *C2);
-  static Constant *getFRem(Constant *C1, Constant *C2);
-  static Constant *getAnd(Constant *C1, Constant *C2);
-  static Constant *getOr(Constant *C1, Constant *C2);
-  static Constant *getXor(Constant *C1, Constant *C2);
-  static Constant *getShl(Constant *C1, Constant *C2,
-                          bool HasNUW = false, bool HasNSW = false);
-  static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false);
-  static Constant *getAShr(Constant *C1, Constant *C2, bool isExact = false);
-  static Constant *getTrunc   (Constant *C, Type *Ty);
-  static Constant *getSExt    (Constant *C, Type *Ty);
-  static Constant *getZExt    (Constant *C, Type *Ty);
-  static Constant *getFPTrunc (Constant *C, Type *Ty);
-  static Constant *getFPExtend(Constant *C, Type *Ty);
-  static Constant *getUIToFP  (Constant *C, Type *Ty);
-  static Constant *getSIToFP  (Constant *C, Type *Ty);
-  static Constant *getFPToUI  (Constant *C, Type *Ty);
-  static Constant *getFPToSI  (Constant *C, Type *Ty);
-  static Constant *getPtrToInt(Constant *C, Type *Ty);
-  static Constant *getIntToPtr(Constant *C, Type *Ty);
-  static Constant *getBitCast (Constant *C, Type *Ty);
-
-  static Constant *getNSWNeg(Constant *C) { return getNeg(C, false, true); }
-  static Constant *getNUWNeg(Constant *C) { return getNeg(C, true, false); }
-  static Constant *getNSWAdd(Constant *C1, Constant *C2) {
-    return getAdd(C1, C2, false, true);
-  }
-  static Constant *getNUWAdd(Constant *C1, Constant *C2) {
-    return getAdd(C1, C2, true, false);
-  }
-  static Constant *getNSWSub(Constant *C1, Constant *C2) {
-    return getSub(C1, C2, false, true);
-  }
-  static Constant *getNUWSub(Constant *C1, Constant *C2) {
-    return getSub(C1, C2, true, false);
-  }
-  static Constant *getNSWMul(Constant *C1, Constant *C2) {
-    return getMul(C1, C2, false, true);
-  }
-  static Constant *getNUWMul(Constant *C1, Constant *C2) {
-    return getMul(C1, C2, true, false);
-  }
-  static Constant *getNSWShl(Constant *C1, Constant *C2) {
-    return getShl(C1, C2, false, true);
-  }
-  static Constant *getNUWShl(Constant *C1, Constant *C2) {
-    return getShl(C1, C2, true, false);
-  }
-  static Constant *getExactSDiv(Constant *C1, Constant *C2) {
-    return getSDiv(C1, C2, true);
-  }
-  static Constant *getExactUDiv(Constant *C1, Constant *C2) {
-    return getUDiv(C1, C2, true);
-  }
-  static Constant *getExactAShr(Constant *C1, Constant *C2) {
-    return getAShr(C1, C2, true);
-  }
-  static Constant *getExactLShr(Constant *C1, Constant *C2) {
-    return getLShr(C1, C2, true);
-  }
-
-  /// getBinOpIdentity - Return the identity for the given binary operation,
-  /// i.e. a constant C such that X op C = X and C op X = X for every X.  It
-  /// returns null if the operator doesn't have an identity.
-  static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty);
-
-  /// getBinOpAbsorber - Return the absorbing element for the given binary
-  /// operation, i.e. a constant C such that X op C = C and C op X = C for
-  /// every X.  For example, this returns zero for integer multiplication.
-  /// It returns null if the operator doesn't have an absorbing element.
-  static Constant *getBinOpAbsorber(unsigned Opcode, Type *Ty);
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
-  // @brief Convenience function for getting one of the casting operations
-  // using a CastOps opcode.
-  static Constant *getCast(
-    unsigned ops,  ///< The opcode for the conversion
-    Constant *C,   ///< The constant to be converted
-    Type *Ty ///< The type to which the constant is converted
-  );
-
-  // @brief Create a ZExt or BitCast cast constant expression
-  static Constant *getZExtOrBitCast(
-    Constant *C,   ///< The constant to zext or bitcast
-    Type *Ty ///< The type to zext or bitcast C to
-  );
-
-  // @brief Create a SExt or BitCast cast constant expression 
-  static Constant *getSExtOrBitCast(
-    Constant *C,   ///< The constant to sext or bitcast
-    Type *Ty ///< The type to sext or bitcast C to
-  );
-
-  // @brief Create a Trunc or BitCast cast constant expression
-  static Constant *getTruncOrBitCast(
-    Constant *C,   ///< The constant to trunc or bitcast
-    Type *Ty ///< The type to trunc or bitcast C to
-  );
-
-  /// @brief Create a BitCast or a PtrToInt cast constant expression
-  static Constant *getPointerCast(
-    Constant *C,   ///< The pointer value to be casted (operand 0)
-    Type *Ty ///< The type to which cast should be made
-  );
-
-  /// @brief Create a ZExt, Bitcast or Trunc for integer -> integer casts
-  static Constant *getIntegerCast(
-    Constant *C,    ///< The integer constant to be casted 
-    Type *Ty, ///< The integer type to cast to
-    bool isSigned   ///< Whether C should be treated as signed or not
-  );
-
-  /// @brief Create a FPExt, Bitcast or FPTrunc for fp -> fp casts
-  static Constant *getFPCast(
-    Constant *C,    ///< The integer constant to be casted 
-    Type *Ty ///< The integer type to cast to
-  );
-
-  /// @brief Return true if this is a convert constant expression
-  bool isCast() const;
-
-  /// @brief Return true if this is a compare constant expression
-  bool isCompare() const;
-
-  /// @brief Return true if this is an insertvalue or extractvalue expression,
-  /// and the getIndices() method may be used.
-  bool hasIndices() const;
-
-  /// @brief Return true if this is a getelementptr expression and all
-  /// the index operands are compile-time known integers within the
-  /// corresponding notional static array extents. Note that this is
-  /// not equivalant to, a subset of, or a superset of the "inbounds"
-  /// property.
-  bool isGEPWithNoNotionalOverIndexing() const;
-
-  /// Select constant expr
-  ///
-  static Constant *getSelect(Constant *C, Constant *V1, Constant *V2);
-
-  /// get - Return a binary or shift operator constant expression,
-  /// folding if possible.
-  ///
-  static Constant *get(unsigned Opcode, Constant *C1, Constant *C2,
-                       unsigned Flags = 0);
-
-  /// @brief Return an ICmp or FCmp comparison operator constant expression.
-  static Constant *getCompare(unsigned short pred, Constant *C1, Constant *C2);
-
-  /// get* - Return some common constants without having to
-  /// specify the full Instruction::OPCODE identifier.
-  ///
-  static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS);
-  static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS);
-
-  /// Getelementptr form.  Value* is only accepted for convenience;
-  /// all elements must be Constant's.
-  ///
-  static Constant *getGetElementPtr(Constant *C,
-                                    ArrayRef<Constant *> IdxList,
-                                    bool InBounds = false) {
-    return getGetElementPtr(C, makeArrayRef((Value * const *)IdxList.data(),
-                                            IdxList.size()),
-                            InBounds);
-  }
-  static Constant *getGetElementPtr(Constant *C,
-                                    Constant *Idx,
-                                    bool InBounds = false) {
-    // This form of the function only exists to avoid ambiguous overload
-    // warnings about whether to convert Idx to ArrayRef<Constant *> or
-    // ArrayRef<Value *>.
-    return getGetElementPtr(C, cast<Value>(Idx), InBounds);
-  }
-  static Constant *getGetElementPtr(Constant *C,
-                                    ArrayRef<Value *> IdxList,
-                                    bool InBounds = false);
-
-  /// Create an "inbounds" getelementptr. See the documentation for the
-  /// "inbounds" flag in LangRef.html for details.
-  static Constant *getInBoundsGetElementPtr(Constant *C,
-                                            ArrayRef<Constant *> IdxList) {
-    return getGetElementPtr(C, IdxList, true);
-  }
-  static Constant *getInBoundsGetElementPtr(Constant *C,
-                                            Constant *Idx) {
-    // This form of the function only exists to avoid ambiguous overload
-    // warnings about whether to convert Idx to ArrayRef<Constant *> or
-    // ArrayRef<Value *>.
-    return getGetElementPtr(C, Idx, true);
-  }
-  static Constant *getInBoundsGetElementPtr(Constant *C,
-                                            ArrayRef<Value *> IdxList) {
-    return getGetElementPtr(C, IdxList, true);
-  }
-
-  static Constant *getExtractElement(Constant *Vec, Constant *Idx);
-  static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx);
-  static Constant *getShuffleVector(Constant *V1, Constant *V2, Constant *Mask);
-  static Constant *getExtractValue(Constant *Agg, ArrayRef<unsigned> Idxs);
-  static Constant *getInsertValue(Constant *Agg, Constant *Val,
-                                  ArrayRef<unsigned> Idxs);
-
-  /// getOpcode - Return the opcode at the root of this constant expression
-  unsigned getOpcode() const { return getSubclassDataFromValue(); }
-
-  /// getPredicate - Return the ICMP or FCMP predicate value. Assert if this is
-  /// not an ICMP or FCMP constant expression.
-  unsigned getPredicate() const;
-
-  /// getIndices - Assert that this is an insertvalue or exactvalue
-  /// expression and return the list of indices.
-  ArrayRef<unsigned> getIndices() const;
-
-  /// getOpcodeName - Return a string representation for an opcode.
-  const char *getOpcodeName() const;
-
-  /// getWithOperandReplaced - Return a constant expression identical to this
-  /// one, but with the specified operand set to the specified value.
-  Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
-  
-  /// getWithOperands - This returns the current constant expression with the
-  /// operands replaced with the specified values.  The specified array must
-  /// have the same number of operands as our current one.
-  Constant *getWithOperands(ArrayRef<Constant*> Ops) const {
-    return getWithOperands(Ops, getType());
-  }
-
-  /// getWithOperands - This returns the current constant expression with the
-  /// operands replaced with the specified values and with the specified result
-  /// type.  The specified array must have the same number of operands as our
-  /// current one.
-  Constant *getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const;
-
-  virtual void destroyConstant();
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == ConstantExprVal;
-  }
-  
-private:
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // subclasses cannot accidentally use it.
-  void setValueSubclassData(unsigned short D) {
-    Value::setValueSubclassData(D);
-  }
-};
-
-template <>
-struct OperandTraits<ConstantExpr> :
-  public VariadicOperandTraits<ConstantExpr, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant)
-
-//===----------------------------------------------------------------------===//
-/// UndefValue - 'undef' values are things that do not have specified contents.
-/// These are used for a variety of purposes, including global variable
-/// initializers and operands to instructions.  'undef' values can occur with
-/// any first-class type.
-///
-/// Undef values aren't exactly constants; if they have multiple uses, they
-/// can appear to have different bit patterns at each use. See
-/// LangRef.html#undefvalues for details.
-///
-class UndefValue : public Constant {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  UndefValue(const UndefValue &) LLVM_DELETED_FUNCTION;
-protected:
-  explicit UndefValue(Type *T) : Constant(T, UndefValueVal, 0, 0) {}
-protected:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-public:
-  /// get() - Static factory methods - Return an 'undef' object of the specified
-  /// type.
-  ///
-  static UndefValue *get(Type *T);
-
-  /// getSequentialElement - If this Undef has array or vector type, return a
-  /// undef with the right element type.
-  UndefValue *getSequentialElement() const;
-  
-  /// getStructElement - If this undef has struct type, return a undef with the
-  /// right element type for the specified element.
-  UndefValue *getStructElement(unsigned Elt) const;
-  
-  /// getElementValue - Return an undef of the right value for the specified GEP
-  /// index.
-  UndefValue *getElementValue(Constant *C) const;
-
-  /// getElementValue - Return an undef of the right value for the specified GEP
-  /// index.
-  UndefValue *getElementValue(unsigned Idx) const;
-
-  virtual void destroyConstant();
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == UndefValueVal;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h
index 2f0780068087..4f0aa07130ef 100644
--- a/include/llvm/DIBuilder.h
+++ b/include/llvm/DIBuilder.h
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_DIBUILDER_H
-#define LLVM_ANALYSIS_DIBUILDER_H
+#ifndef LLVM_DIBUILDER_H
+#define LLVM_DIBUILDER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class BasicBlock;
@@ -28,6 +28,9 @@ namespace llvm {
   class LLVMContext;
   class MDNode;
   class StringRef;
+  class DIBasicType;
+  class DICompositeType;
+  class DIDerivedType;
   class DIDescriptor;
   class DIFile;
   class DIEnumerator;
@@ -88,9 +91,12 @@ namespace llvm {
     ///                 by a tool analyzing generated debugging information.
     /// @param RV       This indicates runtime version for languages like 
     ///                 Objective-C.
+    /// @param SplitName The name of the file that we'll split debug info out
+    ///                  into.
     void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir, 
-                           StringRef Producer,
-                           bool isOptimized, StringRef Flags, unsigned RV);
+                           StringRef Producer, bool isOptimized,
+                           StringRef Flags, unsigned RV,
+                           StringRef SplitName = StringRef());
 
     /// createFile - Create a file descriptor to hold debugging information
     /// for a file.
@@ -108,27 +114,32 @@ namespace llvm {
     /// @param SizeInBits  Size of the type.
     /// @param AlignInBits Type alignment.
     /// @param Encoding    DWARF encoding code, e.g. dwarf::DW_ATE_float.
-    DIType createBasicType(StringRef Name, uint64_t SizeInBits, 
-                           uint64_t AlignInBits, unsigned Encoding);
+    DIBasicType createBasicType(StringRef Name, uint64_t SizeInBits,
+                                uint64_t AlignInBits, unsigned Encoding);
 
     /// createQualifiedType - Create debugging information entry for a qualified
     /// type, e.g. 'const int'.
     /// @param Tag         Tag identifing type, e.g. dwarf::TAG_volatile_type
     /// @param FromTy      Base Type.
-    DIType createQualifiedType(unsigned Tag, DIType FromTy);
+    DIDerivedType createQualifiedType(unsigned Tag, DIType FromTy);
 
     /// createPointerType - Create debugging information entry for a pointer.
     /// @param PointeeTy   Type pointed by this pointer.
     /// @param SizeInBits  Size.
     /// @param AlignInBits Alignment. (optional)
     /// @param Name        Pointer type name. (optional)
-    DIType createPointerType(DIType PointeeTy, uint64_t SizeInBits,
-                             uint64_t AlignInBits = 0, 
-                             StringRef Name = StringRef());
+    DIDerivedType
+    createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+                      uint64_t AlignInBits = 0, StringRef Name = StringRef());
+
+    /// \brief Create debugging information entry for a pointer to member.
+    /// @param PointeeTy Type pointed to by this pointer.
+    /// @param Class Type for which this pointer points to members of.
+    DIDerivedType createMemberPointerType(DIType PointeeTy, DIType Class);
 
     /// createReferenceType - Create debugging information entry for a c++
     /// style reference or rvalue reference type.
-    DIType createReferenceType(unsigned Tag, DIType RTy);
+    DIDerivedType createReferenceType(unsigned Tag, DIType RTy);
 
     /// createTypedef - Create debugging information entry for a typedef.
     /// @param Ty          Original type.
@@ -136,8 +147,8 @@ namespace llvm {
     /// @param File        File where this type is defined.
     /// @param LineNo      Line number.
     /// @param Context     The surrounding context for the typedef.
-    DIType createTypedef(DIType Ty, StringRef Name, DIFile File, 
-                         unsigned LineNo, DIDescriptor Context);
+    DIDerivedType createTypedef(DIType Ty, StringRef Name, DIFile File,
+                                unsigned LineNo, DIDescriptor Context);
 
     /// createFriend - Create debugging information entry for a 'friend'.
     DIType createFriend(DIType Ty, DIType FriendTy);
@@ -149,8 +160,8 @@ namespace llvm {
     /// @param BaseOffset   Base offset.
     /// @param Flags        Flags to describe inheritance attribute, 
     ///                     e.g. private
-    DIType createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset,
-                             unsigned Flags);
+    DIDerivedType createInheritance(DIType Ty, DIType BaseTy,
+                                    uint64_t BaseOffset, unsigned Flags);
 
     /// createMemberType - Create debugging information entry for a member.
     /// @param Scope        Member scope.
@@ -162,10 +173,23 @@ namespace llvm {
     /// @param OffsetInBits Member offset.
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Ty           Parent type.
-    DIType createMemberType(DIDescriptor Scope, StringRef Name, DIFile File,
-                            unsigned LineNo, uint64_t SizeInBits, 
-                            uint64_t AlignInBits, uint64_t OffsetInBits, 
-                            unsigned Flags, DIType Ty);
+    DIDerivedType
+    createMemberType(DIDescriptor Scope, StringRef Name, DIFile File,
+                     unsigned LineNo, uint64_t SizeInBits, uint64_t AlignInBits,
+                     uint64_t OffsetInBits, unsigned Flags, DIType Ty);
+
+    /// createStaticMemberType - Create debugging information entry for a
+    /// C++ static data member.
+    /// @param Scope      Member scope.
+    /// @param Name       Member name.
+    /// @param File       File where this member is declared.
+    /// @param LineNo     Line number.
+    /// @param Ty         Type of the static member.
+    /// @param Flags      Flags to encode member attribute, e.g. private.
+    /// @param Val        Const initializer of the member.
+    DIType createStaticMemberType(DIDescriptor Scope, StringRef Name,
+                                  DIFile File, unsigned LineNo, DIType Ty,
+                                  unsigned Flags, llvm::Value *Val);
 
     /// createObjCIVar - Create debugging information entry for Objective-C
     /// instance variable.
@@ -241,12 +265,13 @@ namespace llvm {
     ///                     DW_AT_containing_type. See DWARF documentation
     ///                     for more info.
     /// @param TemplateParms Template type parameters.
-    DIType createClassType(DIDescriptor Scope, StringRef Name, DIFile File,
-                           unsigned LineNumber, uint64_t SizeInBits,
-                           uint64_t AlignInBits, uint64_t OffsetInBits,
-                           unsigned Flags, DIType DerivedFrom, 
-                           DIArray Elements, MDNode *VTableHolder = 0,
-                           MDNode *TemplateParms = 0);
+    DICompositeType createClassType(DIDescriptor Scope, StringRef Name,
+                                    DIFile File, unsigned LineNumber,
+                                    uint64_t SizeInBits, uint64_t AlignInBits,
+                                    uint64_t OffsetInBits, unsigned Flags,
+                                    DIType DerivedFrom, DIArray Elements,
+                                    MDNode *VTableHolder = 0,
+                                    MDNode *TemplateParms = 0);
 
     /// createStructType - Create debugging information entry for a struct.
     /// @param Scope        Scope in which this struct is defined.
@@ -258,10 +283,12 @@ namespace llvm {
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Elements     Struct elements.
     /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
-    DIType createStructType(DIDescriptor Scope, StringRef Name, DIFile File,
-                            unsigned LineNumber, uint64_t SizeInBits,
-                            uint64_t AlignInBits, unsigned Flags,
-                            DIArray Elements, unsigned RunTimeLang = 0);
+    DICompositeType createStructType(DIDescriptor Scope, StringRef Name,
+                                     DIFile File, unsigned LineNumber,
+                                     uint64_t SizeInBits, uint64_t AlignInBits,
+                                     unsigned Flags, DIType DerivedFrom,
+                                     DIArray Elements, unsigned RunTimeLang = 0,
+                                     MDNode *VTableHolder = 0);
 
     /// createUnionType - Create debugging information entry for an union.
     /// @param Scope        Scope in which this union is defined.
@@ -273,10 +300,10 @@ namespace llvm {
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Elements     Union elements.
     /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
-    DIType createUnionType(DIDescriptor Scope, StringRef Name, DIFile File,
-                           unsigned LineNumber, uint64_t SizeInBits,
-                           uint64_t AlignInBits, unsigned Flags,
-                           DIArray Elements, unsigned RunTimeLang = 0);
+    DICompositeType createUnionType(
+        DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+        uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+        DIArray Elements, unsigned RunTimeLang = 0);
 
     /// createTemplateTypeParameter - Create debugging information for template
     /// type parameter.
@@ -311,8 +338,8 @@ namespace llvm {
     /// @param AlignInBits  Alignment.
     /// @param Ty           Element type.
     /// @param Subscripts   Subscripts.
-    DIType createArrayType(uint64_t Size, uint64_t AlignInBits, 
-                           DIType Ty, DIArray Subscripts);
+    DICompositeType createArrayType(uint64_t Size, uint64_t AlignInBits,
+                                    DIType Ty, DIArray Subscripts);
 
     /// createVectorType - Create debugging information entry for a vector type.
     /// @param Size         Array size.
@@ -331,16 +358,16 @@ namespace llvm {
     /// @param SizeInBits   Member size.
     /// @param AlignInBits  Member alignment.
     /// @param Elements     Enumeration elements.
-    DIType createEnumerationType(DIDescriptor Scope, StringRef Name, 
-                                 DIFile File, unsigned LineNumber, 
-                                 uint64_t SizeInBits, uint64_t AlignInBits,
-                                 DIArray Elements, DIType ClassType);
+    DICompositeType createEnumerationType(
+        DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+        uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
+        DIType ClassType);
 
     /// createSubroutineType - Create subroutine type.
     /// @param File           File in which this subroutine is defined.
     /// @param ParameterTypes An array of subroutine parameter types. This
     ///                       includes return type at 0th index.
-    DIType createSubroutineType(DIFile File, DIArray ParameterTypes);
+    DICompositeType createSubroutineType(DIFile File, DIArray ParameterTypes);
 
     /// createArtificialType - Create a new DIType with "artificial" flag set.
     DIType createArtificialType(DIType Ty);
@@ -349,10 +376,6 @@ namespace llvm {
     /// flag set.
     DIType createObjectPointerType(DIType Ty);
 
-    /// createTemporaryType - Create a temporary forward-declared type.
-    DIType createTemporaryType();
-    DIType createTemporaryType(DIFile F);
-
     /// createForwardDecl - Create a temporary forward-declared type.
     DIType createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
                              DIFile F, unsigned Line, unsigned RuntimeLang = 0,
@@ -371,7 +394,7 @@ namespace llvm {
 
     /// getOrCreateSubrange - Create a descriptor for a value range.  This
     /// implicitly uniques the values returned.
-    DISubrange getOrCreateSubrange(int64_t Lo, int64_t Hi);
+    DISubrange getOrCreateSubrange(int64_t Lo, int64_t Count);
 
     /// createGlobalVariable - Create a new descriptor for the specified global.
     /// @param Name        Name of the variable.
@@ -385,6 +408,19 @@ namespace llvm {
     createGlobalVariable(StringRef Name, DIFile File, unsigned LineNo,
                          DIType Ty, bool isLocalToUnit, llvm::Value *Val);
 
+    /// \brief Create a new descriptor for the specified global.
+    /// @param Name        Name of the variable.
+    /// @param LinkageName Mangled variable name.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type.
+    /// @param isLocalToUnit Boolean flag indicate whether this variable is
+    ///                      externally visible or not.
+    /// @param Val         llvm::Value of the variable.
+    DIGlobalVariable
+    createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile File,
+                         unsigned LineNo, DIType Ty, bool isLocalToUnit,
+                         llvm::Value *Val);
 
     /// createStaticVariable - Create a new descriptor for the specified 
     /// variable.
@@ -397,10 +433,12 @@ namespace llvm {
     /// @param isLocalToUnit Boolean flag indicate whether this variable is
     ///                      externally visible or not.
     /// @param Val         llvm::Value of the variable.
+    /// @param Decl        Reference to the corresponding declaration.
     DIGlobalVariable
     createStaticVariable(DIDescriptor Context, StringRef Name, 
                          StringRef LinkageName, DIFile File, unsigned LineNo, 
-                         DIType Ty, bool isLocalToUnit, llvm::Value *Val);
+                         DIType Ty, bool isLocalToUnit, llvm::Value *Val,
+                         MDNode *Decl = NULL);
 
 
     /// createLocalVariable - Create a new descriptor for the specified 
diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h
deleted file mode 100644
index 24ad05f17f39..000000000000
--- a/include/llvm/DataLayout.h
+++ /dev/null
@@ -1,429 +0,0 @@
-//===--------- llvm/DataLayout.h - Data size & alignment info ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines layout properties related to datatype size/offset/alignment
-// information.  It uses lazy annotations to cache information about how
-// structure types are laid out and used.
-//
-// This structure should be created once, filled in if the defaults are not
-// correct and then passed around by const&.  None of the members functions
-// require modification to the object.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DATALAYOUT_H
-#define LLVM_DATALAYOUT_H
-
-#include "llvm/Pass.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class Value;
-class Type;
-class IntegerType;
-class StructType;
-class StructLayout;
-class GlobalVariable;
-class LLVMContext;
-template<typename T>
-class ArrayRef;
-
-/// Enum used to categorize the alignment types stored by LayoutAlignElem
-enum AlignTypeEnum {
-  INTEGER_ALIGN = 'i',               ///< Integer type alignment
-  VECTOR_ALIGN = 'v',                ///< Vector type alignment
-  FLOAT_ALIGN = 'f',                 ///< Floating point type alignment
-  AGGREGATE_ALIGN = 'a',             ///< Aggregate alignment
-  STACK_ALIGN = 's'                  ///< Stack objects alignment
-};
-
-/// Layout alignment element.
-///
-/// Stores the alignment data associated with a given alignment type (integer,
-/// vector, float) and type bit width.
-///
-/// @note The unusual order of elements in the structure attempts to reduce
-/// padding and make the structure slightly more cache friendly.
-struct LayoutAlignElem {
-  unsigned AlignType    : 8;  ///< Alignment type (AlignTypeEnum)
-  unsigned TypeBitWidth : 24; ///< Type bit width
-  unsigned ABIAlign     : 16; ///< ABI alignment for this type/bitw
-  unsigned PrefAlign    : 16; ///< Pref. alignment for this type/bitw
-
-  /// Initializer
-  static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
-                             unsigned pref_align, uint32_t bit_width);
-  /// Equality predicate
-  bool operator==(const LayoutAlignElem &rhs) const;
-};
-
-/// Layout pointer alignment element.
-///
-/// Stores the alignment data associated with a given pointer and address space.
-///
-/// @note The unusual order of elements in the structure attempts to reduce
-/// padding and make the structure slightly more cache friendly.
-struct PointerAlignElem {
-  unsigned            ABIAlign;       ///< ABI alignment for this type/bitw
-  unsigned            PrefAlign;      ///< Pref. alignment for this type/bitw
-  uint32_t            TypeBitWidth;   ///< Type bit width
-  uint32_t            AddressSpace;   ///< Address space for the pointer type
-
-  /// Initializer
-  static PointerAlignElem get(uint32_t addr_space, unsigned abi_align,
-                             unsigned pref_align, uint32_t bit_width);
-  /// Equality predicate
-  bool operator==(const PointerAlignElem &rhs) const;
-};
-
-
-/// DataLayout - This class holds a parsed version of the target data layout
-/// string in a module and provides methods for querying it.  The target data
-/// layout string is specified *by the target* - a frontend generating LLVM IR
-/// is required to generate the right target data for the target being codegen'd
-/// to.  If some measure of portability is desired, an empty string may be
-/// specified in the module.
-class DataLayout : public ImmutablePass {
-private:
-  bool          LittleEndian;          ///< Defaults to false
-  unsigned      StackNaturalAlign;     ///< Stack natural alignment
-
-  SmallVector<unsigned char, 8> LegalIntWidths; ///< Legal Integers.
-
-  /// Alignments- Where the primitive type alignment data is stored.
-  ///
-  /// @sa init().
-  /// @note Could support multiple size pointer alignments, e.g., 32-bit
-  /// pointers vs. 64-bit pointers by extending LayoutAlignment, but for now,
-  /// we don't.
-  SmallVector<LayoutAlignElem, 16> Alignments;
-  DenseMap<unsigned, PointerAlignElem> Pointers;
-
-  /// InvalidAlignmentElem - This member is a signal that a requested alignment
-  /// type and bit width were not found in the SmallVector.
-  static const LayoutAlignElem InvalidAlignmentElem;
-
-  /// InvalidPointerElem - This member is a signal that a requested pointer
-  /// type and bit width were not found in the DenseSet.
-  static const PointerAlignElem InvalidPointerElem;
-
-  // The StructType -> StructLayout map.
-  mutable void *LayoutMap;
-
-  //! Set/initialize target alignments
-  void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
-                    unsigned pref_align, uint32_t bit_width);
-  unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
-                            bool ABIAlign, Type *Ty) const;
-
-  //! Set/initialize pointer alignments
-  void setPointerAlignment(uint32_t addr_space, unsigned abi_align,
-      unsigned pref_align, uint32_t bit_width);
-
-  //! Internal helper method that returns requested alignment for type.
-  unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
-
-  /// Valid alignment predicate.
-  ///
-  /// Predicate that tests a LayoutAlignElem reference returned by get() against
-  /// InvalidAlignmentElem.
-  bool validAlignment(const LayoutAlignElem &align) const {
-    return &align != &InvalidAlignmentElem;
-  }
-
-  /// Valid pointer predicate.
-  ///
-  /// Predicate that tests a PointerAlignElem reference returned by get() against
-  /// InvalidPointerElem.
-  bool validPointer(const PointerAlignElem &align) const {
-    return &align != &InvalidPointerElem;
-  }
-
-  /// Initialise a DataLayout object with default values, ensure that the
-  /// target data pass is registered.
-  void init();
-
-public:
-  /// Default ctor.
-  ///
-  /// @note This has to exist, because this is a pass, but it should never be
-  /// used.
-  DataLayout();
-
-  /// Constructs a DataLayout from a specification string. See init().
-  explicit DataLayout(StringRef LayoutDescription)
-    : ImmutablePass(ID) {
-    std::string errMsg = parseSpecifier(LayoutDescription, this);
-    assert(errMsg == "" && "Invalid target data layout string.");
-    (void)errMsg;
-  }
-
-  /// Parses a target data specification string. Returns an error message
-  /// if the string is malformed, or the empty string on success. Optionally
-  /// initialises a DataLayout object if passed a non-null pointer.
-  static std::string parseSpecifier(StringRef LayoutDescription,
-                                    DataLayout* td = 0);
-
-  /// Initialize target data from properties stored in the module.
-  explicit DataLayout(const Module *M);
-
-  DataLayout(const DataLayout &TD) :
-    ImmutablePass(ID),
-    LittleEndian(TD.isLittleEndian()),
-    LegalIntWidths(TD.LegalIntWidths),
-    Alignments(TD.Alignments),
-    Pointers(TD.Pointers),
-    LayoutMap(0)
-  { }
-
-  ~DataLayout();  // Not virtual, do not subclass this class
-
-  /// Layout endianness...
-  bool isLittleEndian() const { return LittleEndian; }
-  bool isBigEndian() const { return !LittleEndian; }
-
-  /// getStringRepresentation - Return the string representation of the
-  /// DataLayout.  This representation is in the same format accepted by the
-  /// string constructor above.
-  std::string getStringRepresentation() const;
-
-  /// isLegalInteger - This function returns true if the specified type is
-  /// known to be a native integer type supported by the CPU.  For example,
-  /// i64 is not native on most 32-bit CPUs and i37 is not native on any known
-  /// one.  This returns false if the integer width is not legal.
-  ///
-  /// The width is specified in bits.
-  ///
-  bool isLegalInteger(unsigned Width) const {
-    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
-      if (LegalIntWidths[i] == Width)
-        return true;
-    return false;
-  }
-
-  bool isIllegalInteger(unsigned Width) const {
-    return !isLegalInteger(Width);
-  }
-
-  /// Returns true if the given alignment exceeds the natural stack alignment.
-  bool exceedsNaturalStackAlignment(unsigned Align) const {
-    return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
-  }
-
-  /// fitsInLegalInteger - This function returns true if the specified type fits
-  /// in a native integer type supported by the CPU.  For example, if the CPU
-  /// only supports i32 as a native integer type, then i27 fits in a legal
-  // integer type but i45 does not.
-  bool fitsInLegalInteger(unsigned Width) const {
-    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
-      if (Width <= LegalIntWidths[i])
-        return true;
-    return false;
-  }
-
-  /// Layout pointer alignment
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerABIAlignment(unsigned AS = 0)  const {
-    DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
-    if (val == Pointers.end()) {
-      val = Pointers.find(0);
-    }
-    return val->second.ABIAlign;
-  }
-  /// Return target's alignment for stack-based pointers
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerPrefAlignment(unsigned AS = 0) const {
-    DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
-    if (val == Pointers.end()) {
-      val = Pointers.find(0);
-    }
-    return val->second.PrefAlign;
-  }
-  /// Layout pointer size
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerSize(unsigned AS = 0)          const {
-    DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
-    if (val == Pointers.end()) {
-      val = Pointers.find(0);
-    }
-    return val->second.TypeBitWidth;
-  }
-  /// Layout pointer size, in bits
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerSizeInBits(unsigned AS = 0)    const {
-    return getPointerSize(AS) * 8;
-  }
-  /// Size examples:
-  ///
-  /// Type        SizeInBits  StoreSizeInBits  AllocSizeInBits[*]
-  /// ----        ----------  ---------------  ---------------
-  ///  i1            1           8                8
-  ///  i8            8           8                8
-  ///  i19          19          24               32
-  ///  i32          32          32               32
-  ///  i100        100         104              128
-  ///  i128        128         128              128
-  ///  Float        32          32               32
-  ///  Double       64          64               64
-  ///  X86_FP80     80          80               96
-  ///
-  /// [*] The alloc size depends on the alignment, and thus on the target.
-  ///     These values are for x86-32 linux.
-
-  /// getTypeSizeInBits - Return the number of bits necessary to hold the
-  /// specified type.  For example, returns 36 for i36 and 80 for x86_fp80.
-  uint64_t getTypeSizeInBits(Type* Ty) const;
-
-  /// getTypeStoreSize - Return the maximum number of bytes that may be
-  /// overwritten by storing the specified type.  For example, returns 5
-  /// for i36 and 10 for x86_fp80.
-  uint64_t getTypeStoreSize(Type *Ty) const {
-    return (getTypeSizeInBits(Ty)+7)/8;
-  }
-
-  /// getTypeStoreSizeInBits - Return the maximum number of bits that may be
-  /// overwritten by storing the specified type; always a multiple of 8.  For
-  /// example, returns 40 for i36 and 80 for x86_fp80.
-  uint64_t getTypeStoreSizeInBits(Type *Ty) const {
-    return 8*getTypeStoreSize(Ty);
-  }
-
-  /// getTypeAllocSize - Return the offset in bytes between successive objects
-  /// of the specified type, including alignment padding.  This is the amount
-  /// that alloca reserves for this type.  For example, returns 12 or 16 for
-  /// x86_fp80, depending on alignment.
-  uint64_t getTypeAllocSize(Type* Ty) const {
-    // Round up to the next alignment boundary.
-    return RoundUpAlignment(getTypeStoreSize(Ty), getABITypeAlignment(Ty));
-  }
-
-  /// getTypeAllocSizeInBits - Return the offset in bits between successive
-  /// objects of the specified type, including alignment padding; always a
-  /// multiple of 8.  This is the amount that alloca reserves for this type.
-  /// For example, returns 96 or 128 for x86_fp80, depending on alignment.
-  uint64_t getTypeAllocSizeInBits(Type* Ty) const {
-    return 8*getTypeAllocSize(Ty);
-  }
-
-  /// getABITypeAlignment - Return the minimum ABI-required alignment for the
-  /// specified type.
-  unsigned getABITypeAlignment(Type *Ty) const;
-
-  /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
-  /// an integer type of the specified bitwidth.
-  unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
-
-
-  /// getCallFrameTypeAlignment - Return the minimum ABI-required alignment
-  /// for the specified type when it is part of a call frame.
-  unsigned getCallFrameTypeAlignment(Type *Ty) const;
-
-
-  /// getPrefTypeAlignment - Return the preferred stack/global alignment for
-  /// the specified type.  This is always at least as good as the ABI alignment.
-  unsigned getPrefTypeAlignment(Type *Ty) const;
-
-  /// getPreferredTypeAlignmentShift - Return the preferred alignment for the
-  /// specified type, returned as log2 of the value (a shift amount).
-  ///
-  unsigned getPreferredTypeAlignmentShift(Type *Ty) const;
-
-  /// getIntPtrType - Return an integer type with size at least as big as that
-  /// of a pointer in the given address space.
-  IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const;
-
-  /// getIntPtrType - Return an integer (vector of integer) type with size at
-  /// least as big as that of a pointer of the given pointer (vector of pointer)
-  /// type.
-  Type *getIntPtrType(Type *) const;
-
-  /// getIndexedOffset - return the offset from the beginning of the type for
-  /// the specified indices.  This is used to implement getelementptr.
-  ///
-  uint64_t getIndexedOffset(Type *Ty, ArrayRef<Value *> Indices) const;
-
-  /// getStructLayout - Return a StructLayout object, indicating the alignment
-  /// of the struct, its size, and the offsets of its fields.  Note that this
-  /// information is lazily cached.
-  const StructLayout *getStructLayout(StructType *Ty) const;
-
-  /// getPreferredAlignment - Return the preferred alignment of the specified
-  /// global.  This includes an explicitly requested alignment (if the global
-  /// has one).
-  unsigned getPreferredAlignment(const GlobalVariable *GV) const;
-
-  /// getPreferredAlignmentLog - Return the preferred alignment of the
-  /// specified global, returned in log form.  This includes an explicitly
-  /// requested alignment (if the global has one).
-  unsigned getPreferredAlignmentLog(const GlobalVariable *GV) const;
-
-  /// RoundUpAlignment - Round the specified value up to the next alignment
-  /// boundary specified by Alignment.  For example, 7 rounded up to an
-  /// alignment boundary of 4 is 8.  8 rounded up to the alignment boundary of 4
-  /// is 8 because it is already aligned.
-  template <typename UIntTy>
-  static UIntTy RoundUpAlignment(UIntTy Val, unsigned Alignment) {
-    assert((Alignment & (Alignment-1)) == 0 && "Alignment must be power of 2!");
-    return (Val + (Alignment-1)) & ~UIntTy(Alignment-1);
-  }
-
-  static char ID; // Pass identification, replacement for typeid
-};
-
-/// StructLayout - used to lazily calculate structure layout information for a
-/// target machine, based on the DataLayout structure.
-///
-class StructLayout {
-  uint64_t StructSize;
-  unsigned StructAlignment;
-  unsigned NumElements;
-  uint64_t MemberOffsets[1];  // variable sized array!
-public:
-
-  uint64_t getSizeInBytes() const {
-    return StructSize;
-  }
-
-  uint64_t getSizeInBits() const {
-    return 8*StructSize;
-  }
-
-  unsigned getAlignment() const {
-    return StructAlignment;
-  }
-
-  /// getElementContainingOffset - Given a valid byte offset into the structure,
-  /// return the structure index that contains it.
-  ///
-  unsigned getElementContainingOffset(uint64_t Offset) const;
-
-  uint64_t getElementOffset(unsigned Idx) const {
-    assert(Idx < NumElements && "Invalid element idx!");
-    return MemberOffsets[Idx];
-  }
-
-  uint64_t getElementOffsetInBits(unsigned Idx) const {
-    return getElementOffset(Idx)*8;
-  }
-
-private:
-  friend class DataLayout;   // Only DataLayout can create this class
-  StructLayout(StructType *ST, const DataLayout &TD);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h
index dae03ad10095..15f91870a574 100644
--- a/include/llvm/DebugInfo.h
+++ b/include/llvm/DebugInfo.h
@@ -14,11 +14,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_DEBUGINFO_H
-#define LLVM_ANALYSIS_DEBUGINFO_H
+#ifndef LLVM_DEBUGINFO_H
+#define LLVM_DEBUGINFO_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Dwarf.h"
 
@@ -61,7 +61,9 @@ namespace llvm {
       FlagExplicit           = 1 << 7,
       FlagPrototyped         = 1 << 8,
       FlagObjcClassComplete  = 1 << 9,
-      FlagObjectPointer      = 1 << 10
+      FlagObjectPointer      = 1 << 10,
+      FlagVector             = 1 << 11,
+      FlagStaticMember       = 1 << 12
     };
   protected:
     const MDNode *DbgNode;
@@ -71,6 +73,7 @@ namespace llvm {
       return (unsigned)getUInt64Field(Elt);
     }
     uint64_t getUInt64Field(unsigned Elt) const;
+    int64_t getInt64Field(unsigned Elt) const;
     DIDescriptor getDescriptorField(unsigned Elt) const;
 
     template <typename DescTy>
@@ -93,15 +96,11 @@ namespace llvm {
     explicit DIDescriptor(const DIVariable F);
     explicit DIDescriptor(const DIType F);
 
-    bool Verify() const { return DbgNode != 0; }
+    bool Verify() const;
 
     operator MDNode *() const { return const_cast<MDNode*>(DbgNode); }
     MDNode *operator ->() const { return const_cast<MDNode*>(DbgNode); }
 
-    unsigned getVersion() const {
-      return getUnsignedField(0) & LLVMDebugVersionMask;
-    }
-
     unsigned getTag() const {
       return getUnsignedField(0) & ~LLVMDebugVersionMask;
     }
@@ -141,8 +140,9 @@ namespace llvm {
   public:
     explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {}
 
-    uint64_t getLo() const { return getUInt64Field(1); }
-    uint64_t getHi() const { return getUInt64Field(2); }
+    int64_t getLo() const { return getInt64Field(1); }
+    int64_t  getCount() const { return getInt64Field(2); }
+    bool Verify() const;
   };
 
   /// DIArray - This descriptor holds an array of descriptors.
@@ -169,6 +169,18 @@ namespace llvm {
     StringRef getDirectory() const;
   };
 
+  /// DIFile - This is a wrapper for a file.
+  class DIFile : public DIScope {
+    friend class DIDescriptor;
+  public:
+    explicit DIFile(const MDNode *N = 0) : DIScope(N) {
+      if (DbgNode && !isFile())
+        DbgNode = 0;
+    }
+    MDNode *getFileNode() const;
+    bool Verify() const;
+  };
+
   /// DICompileUnit - A wrapper for a compile unit.
   class DICompileUnit : public DIScope {
     friend class DIDescriptor;
@@ -176,51 +188,24 @@ namespace llvm {
   public:
     explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {}
 
-    unsigned getLanguage() const   { return getUnsignedField(2); }
-    StringRef getFilename() const  { return getStringField(3);   }
-    StringRef getDirectory() const { return getStringField(4);   }
-    StringRef getProducer() const  { return getStringField(5);   }
-
-    /// isMain - Each input file is encoded as a separate compile unit in LLVM
-    /// debugging information output. However, many target specific tool chains
-    /// prefer to encode only one compile unit in an object file. In this
-    /// situation, the LLVM code generator will include  debugging information
-    /// entities in the compile unit that is marked as main compile unit. The
-    /// code generator accepts maximum one main compile unit per module. If a
-    /// module does not contain any main compile unit then the code generator
-    /// will emit multiple compile units in the output object file.
-
-    bool isMain() const                { return getUnsignedField(6) != 0; }
-    bool isOptimized() const           { return getUnsignedField(7) != 0; }
-    StringRef getFlags() const       { return getStringField(8);   }
-    unsigned getRunTimeVersion() const { return getUnsignedField(9); }
+    unsigned getLanguage() const { return getUnsignedField(2); }
+    StringRef getProducer() const { return getStringField(3); }
+
+    bool isOptimized() const { return getUnsignedField(4) != 0; }
+    StringRef getFlags() const { return getStringField(5); }
+    unsigned getRunTimeVersion() const { return getUnsignedField(6); }
 
     DIArray getEnumTypes() const;
     DIArray getRetainedTypes() const;
     DIArray getSubprograms() const;
     DIArray getGlobalVariables() const;
 
+    StringRef getSplitDebugFilename() const { return getStringField(11); }
+
     /// Verify - Verify that a compile unit is well formed.
     bool Verify() const;
   };
 
-  /// DIFile - This is a wrapper for a file.
-  class DIFile : public DIScope {
-    friend class DIDescriptor;
-    void printInternal(raw_ostream &OS) const {} // FIXME: Output something?
-  public:
-    explicit DIFile(const MDNode *N = 0) : DIScope(N) {
-      if (DbgNode && !isFile())
-        DbgNode = 0;
-    }
-    StringRef getFilename() const  { return getStringField(1);   }
-    StringRef getDirectory() const { return getStringField(2);   }
-    DICompileUnit getCompileUnit() const{ 
-      assert (getVersion() <= LLVMDebugVersion10  && "Invalid CompileUnit!");
-      return getFieldAs<DICompileUnit>(3); 
-    }
-  };
-
   /// DIEnumerator - A wrapper for an enumerator (e.g. X and Y in 'enum {X,Y}').
   /// FIXME: it seems strange that this doesn't have either a reference to the
   /// type/precision or a file/line pair for location info.
@@ -232,6 +217,7 @@ namespace llvm {
 
     StringRef getName() const        { return getStringField(1); }
     uint64_t getEnumValue() const      { return getUInt64Field(2); }
+    bool Verify() const;
   };
 
   /// DIType - This is a wrapper for a type.
@@ -250,16 +236,8 @@ namespace llvm {
     explicit DIType(const MDNode *N);
     explicit DIType() {}
 
-    DIScope getContext() const          { return getFieldAs<DIScope>(1); }
-    StringRef getName() const           { return getStringField(2);     }
-    DICompileUnit getCompileUnit() const{ 
-      assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
-     if (getVersion() == llvm::LLVMDebugVersion7)
-       return getFieldAs<DICompileUnit>(3);
-     
-     return getFieldAs<DIFile>(3).getCompileUnit();
-    }
-    DIFile getFile() const              { return getFieldAs<DIFile>(3); }
+    DIScope getContext() const          { return getFieldAs<DIScope>(2); }
+    StringRef getName() const           { return getStringField(3);     }
     unsigned getLineNumber() const      { return getUnsignedField(4); }
     uint64_t getSizeInBits() const      { return getUInt64Field(5); }
     uint64_t getAlignInBits() const     { return getUInt64Field(6); }
@@ -295,20 +273,14 @@ namespace llvm {
     bool isObjcClassComplete() const {
       return (getFlags() & FlagObjcClassComplete) != 0;
     }
-    bool isValid() const {
-      return DbgNode && (isBasicType() || isDerivedType() || isCompositeType());
+    bool isVector() const {
+      return (getFlags() & FlagVector) != 0;
     }
-    StringRef getDirectory() const  { 
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getCompileUnit().getDirectory();
-
-      return getFieldAs<DIFile>(3).getDirectory();
+    bool isStaticMember() const {
+      return (getFlags() & FlagStaticMember) != 0;
     }
-    StringRef getFilename() const  { 
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getCompileUnit().getFilename();
-
-      return getFieldAs<DIFile>(3).getFilename();
+    bool isValid() const {
+      return DbgNode && (isBasicType() || isDerivedType() || isCompositeType());
     }
 
     /// isUnsignedDIType - Return true if type encoding is unsigned.
@@ -332,7 +304,8 @@ namespace llvm {
   };
 
   /// DIDerivedType - A simple derived type, like a const qualified type,
-  /// a typedef, a pointer or reference, etc.
+  /// a typedef, a pointer or reference, et cetera.  Or, a data member of
+  /// a class/struct/union.
   class DIDerivedType : public DIType {
     friend class DIDescriptor;
     void printInternal(raw_ostream &OS) const;
@@ -349,46 +322,18 @@ namespace llvm {
     /// return base type size.
     uint64_t getOriginalTypeSize() const;
 
-    /// getObjCProperty - Return property node, if this ivar is 
+    /// getObjCProperty - Return property node, if this ivar is
     /// associated with one.
     MDNode *getObjCProperty() const;
 
-    StringRef getObjCPropertyName() const { 
-      if (getVersion() > LLVMDebugVersion11)
-        return StringRef();
-      return getStringField(10); 
-    }
-    StringRef getObjCPropertyGetterName() const {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return getStringField(11);
-    }
-    StringRef getObjCPropertySetterName() const {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return getStringField(12);
-    }
-    bool isReadOnlyObjCProperty() {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readonly) != 0;
-    }
-    bool isReadWriteObjCProperty() {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0;
-    }
-    bool isAssignObjCProperty() {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_assign) != 0;
-    }
-    bool isRetainObjCProperty() {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_retain) != 0;
-    }
-    bool isCopyObjCProperty() {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_copy) != 0;
+    DIType getClassType() const {
+      assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
+      return getFieldAs<DIType>(10);
     }
-    bool isNonAtomicObjCProperty() {
-      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
-      return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0;
+
+    Constant *getConstant() const {
+      assert((getTag() == dwarf::DW_TAG_member) && isStaticMember());
+      return getConstantField(10);
     }
 
     /// Verify - Verify that a derived type descriptor is well formed.
@@ -409,10 +354,12 @@ namespace llvm {
     }
 
     DIArray getTypeArray() const { return getFieldAs<DIArray>(10); }
+    void setTypeArray(DIArray Elements, DIArray TParams = DIArray());
     unsigned getRunTimeLang() const { return getUnsignedField(11); }
     DICompositeType getContainingType() const {
       return getFieldAs<DICompositeType>(12);
     }
+    void setContainingType(DICompositeType ContainingType);
     DIArray getTemplateParams() const { return getFieldAs<DIArray>(13); }
 
     /// Verify - Verify that a composite type descriptor is well formed.
@@ -427,14 +374,15 @@ namespace llvm {
     DIScope getContext() const       { return getFieldAs<DIScope>(1); }
     StringRef getName() const        { return getStringField(2); }
     DIType getType() const           { return getFieldAs<DIType>(3); }
-    StringRef getFilename() const    { 
+    StringRef getFilename() const    {
       return getFieldAs<DIFile>(4).getFilename();
     }
-    StringRef getDirectory() const   { 
+    StringRef getDirectory() const   {
       return getFieldAs<DIFile>(4).getDirectory();
     }
     unsigned getLineNumber() const   { return getUnsignedField(5); }
     unsigned getColumnNumber() const { return getUnsignedField(6); }
+    bool Verify() const;
   };
 
   /// DITemplateValueParameter - This is a wrapper for template value parameter.
@@ -446,14 +394,15 @@ namespace llvm {
     StringRef getName() const        { return getStringField(2); }
     DIType getType() const           { return getFieldAs<DIType>(3); }
     uint64_t getValue() const         { return getUInt64Field(4); }
-    StringRef getFilename() const    { 
+    StringRef getFilename() const    {
       return getFieldAs<DIFile>(5).getFilename();
     }
-    StringRef getDirectory() const   { 
+    StringRef getDirectory() const   {
       return getFieldAs<DIFile>(5).getDirectory();
     }
     unsigned getLineNumber() const   { return getUnsignedField(6); }
     unsigned getColumnNumber() const { return getUnsignedField(7); }
+    bool Verify() const;
   };
 
   /// DISubprogram - This is a wrapper for a subprogram (e.g. a function).
@@ -467,93 +416,66 @@ namespace llvm {
     StringRef getName() const         { return getStringField(3); }
     StringRef getDisplayName() const  { return getStringField(4); }
     StringRef getLinkageName() const  { return getStringField(5); }
-    DICompileUnit getCompileUnit() const{ 
-      assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getFieldAs<DICompileUnit>(6);
-
-      return getFieldAs<DIFile>(6).getCompileUnit(); 
-    }
-    unsigned getLineNumber() const      { return getUnsignedField(7); }
-    DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
+    unsigned getLineNumber() const      { return getUnsignedField(6); }
+    DICompositeType getType() const { return getFieldAs<DICompositeType>(7); }
 
     /// getReturnTypeName - Subprogram return types are encoded either as
     /// DIType or as DICompositeType.
     StringRef getReturnTypeName() const {
-      DICompositeType DCT(getFieldAs<DICompositeType>(8));
+      DICompositeType DCT(getFieldAs<DICompositeType>(7));
       if (DCT.Verify()) {
         DIArray A = DCT.getTypeArray();
         DIType T(A.getElement(0));
         return T.getName();
       }
-      DIType T(getFieldAs<DIType>(8));
+      DIType T(getFieldAs<DIType>(7));
       return T.getName();
     }
 
     /// isLocalToUnit - Return true if this subprogram is local to the current
     /// compile unit, like 'static' in C.
-    unsigned isLocalToUnit() const     { return getUnsignedField(9); }
-    unsigned isDefinition() const      { return getUnsignedField(10); }
+    unsigned isLocalToUnit() const     { return getUnsignedField(8); }
+    unsigned isDefinition() const      { return getUnsignedField(9); }
 
-    unsigned getVirtuality() const { return getUnsignedField(11); }
-    unsigned getVirtualIndex() const { return getUnsignedField(12); }
+    unsigned getVirtuality() const { return getUnsignedField(10); }
+    unsigned getVirtualIndex() const { return getUnsignedField(11); }
 
     DICompositeType getContainingType() const {
-      return getFieldAs<DICompositeType>(13);
+      return getFieldAs<DICompositeType>(12);
+    }
+
+    unsigned getFlags() const {
+      return getUnsignedField(13);
     }
 
-    unsigned isArtificial() const    { 
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return getUnsignedField(14); 
-      return (getUnsignedField(14) & FlagArtificial) != 0;
+    unsigned isArtificial() const    {
+      return (getUnsignedField(13) & FlagArtificial) != 0;
     }
     /// isPrivate - Return true if this subprogram has "private"
     /// access specifier.
-    bool isPrivate() const    { 
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return false;
-      return (getUnsignedField(14) & FlagPrivate) != 0;
+    bool isPrivate() const    {
+      return (getUnsignedField(13) & FlagPrivate) != 0;
     }
     /// isProtected - Return true if this subprogram has "protected"
     /// access specifier.
-    bool isProtected() const    { 
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return false;
-      return (getUnsignedField(14) & FlagProtected) != 0;
+    bool isProtected() const    {
+      return (getUnsignedField(13) & FlagProtected) != 0;
     }
     /// isExplicit - Return true if this subprogram is marked as explicit.
-    bool isExplicit() const    { 
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return false;
-      return (getUnsignedField(14) & FlagExplicit) != 0;
+    bool isExplicit() const    {
+      return (getUnsignedField(13) & FlagExplicit) != 0;
     }
     /// isPrototyped - Return true if this subprogram is prototyped.
-    bool isPrototyped() const    { 
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return false;
-      return (getUnsignedField(14) & FlagPrototyped) != 0;
+    bool isPrototyped() const    {
+      return (getUnsignedField(13) & FlagPrototyped) != 0;
     }
 
     unsigned isOptimized() const;
 
-    StringRef getFilename() const    { 
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getCompileUnit().getFilename();
-
-      return getFieldAs<DIFile>(6).getFilename(); 
-    }
-
-    StringRef getDirectory() const   { 
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getCompileUnit().getFilename();
-
-      return getFieldAs<DIFile>(6).getDirectory(); 
-    }
-
     /// getScopeLineNumber - Get the beginning of the scope of the
     /// function, not necessarily where the name of the program
     /// starts.
-    unsigned getScopeLineNumber() const { return getUnsignedField(20); }
+    unsigned getScopeLineNumber() const { return getUnsignedField(19); }
 
     /// Verify - Verify that a subprogram descriptor is well formed.
     bool Verify() const;
@@ -562,11 +484,11 @@ namespace llvm {
     /// information for the function F.
     bool describes(const Function *F);
 
-    Function *getFunction() const { return getFunctionField(16); }
-    void replaceFunction(Function *F) { replaceFunctionField(16, F); }
-    DIArray getTemplateParams() const { return getFieldAs<DIArray>(17); }
+    Function *getFunction() const { return getFunctionField(15); }
+    void replaceFunction(Function *F) { replaceFunctionField(15, F); }
+    DIArray getTemplateParams() const { return getFieldAs<DIArray>(16); }
     DISubprogram getFunctionDeclaration() const {
-      return getFieldAs<DISubprogram>(18);
+      return getFieldAs<DISubprogram>(17);
     }
     MDNode *getVariablesNodes() const;
     DIArray getVariables() const;
@@ -583,25 +505,13 @@ namespace llvm {
     StringRef getName() const         { return getStringField(3); }
     StringRef getDisplayName() const  { return getStringField(4); }
     StringRef getLinkageName() const  { return getStringField(5); }
-    DICompileUnit getCompileUnit() const{ 
-      assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getFieldAs<DICompileUnit>(6);
-
-      DIFile F = getFieldAs<DIFile>(6); 
-      return F.getCompileUnit();
-    }
     StringRef getFilename() const {
-      if (getVersion() <= llvm::LLVMDebugVersion10)
-        return getContext().getFilename();
       return getFieldAs<DIFile>(6).getFilename();
-    } 
+    }
     StringRef getDirectory() const {
-      if (getVersion() <= llvm::LLVMDebugVersion10)
-        return getContext().getDirectory();
       return getFieldAs<DIFile>(6).getDirectory();
 
-    } 
+    }
 
     unsigned getLineNumber() const      { return getUnsignedField(7); }
     DIType getType() const              { return getFieldAs<DIType>(8); }
@@ -610,6 +520,9 @@ namespace llvm {
 
     GlobalVariable *getGlobal() const { return getGlobalVariableField(11); }
     Constant *getConstant() const   { return getConstantField(11); }
+    DIDerivedType getStaticDataMemberDeclaration() const {
+      return getFieldAs<DIDerivedType>(12);
+    }
 
     /// Verify - Verify that a global variable descriptor is well formed.
     bool Verify() const;
@@ -626,27 +539,18 @@ namespace llvm {
 
     DIScope getContext() const          { return getFieldAs<DIScope>(1); }
     StringRef getName() const           { return getStringField(2);     }
-    DICompileUnit getCompileUnit() const { 
-      assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getFieldAs<DICompileUnit>(3);
-
-      DIFile F = getFieldAs<DIFile>(3); 
-      return F.getCompileUnit();
-    }
-    unsigned getLineNumber() const      { 
-      return (getUnsignedField(4) << 8) >> 8; 
+    DIFile getFile() const              { return getFieldAs<DIFile>(3); }
+    unsigned getLineNumber() const      {
+      return (getUnsignedField(4) << 8) >> 8;
     }
     unsigned getArgNumber() const       {
-      unsigned L = getUnsignedField(4); 
+      unsigned L = getUnsignedField(4);
       return L >> 24;
     }
     DIType getType() const              { return getFieldAs<DIType>(5); }
-    
+
     /// isArtificial - Return true if this variable is marked as "artificial".
-    bool isArtificial() const    { 
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return false;
+    bool isArtificial() const    {
       return (getUnsignedField(6) & FlagArtificial) != 0;
     }
 
@@ -666,12 +570,8 @@ namespace llvm {
     }
 
     unsigned getNumAddrElements() const;
-    
+
     uint64_t getAddrElement(unsigned Idx) const {
-      if (getVersion() <= llvm::LLVMDebugVersion8)
-        return getUInt64Field(Idx+6);
-      if (getVersion() == llvm::LLVMDebugVersion9)
-        return getUInt64Field(Idx+7);
       return getUInt64Field(Idx+8);
     }
 
@@ -681,7 +581,7 @@ namespace llvm {
       return getType().isBlockByrefStruct();
     }
 
-    /// isInlinedFnArgument - Return trule if this variable provides debugging
+    /// isInlinedFnArgument - Return true if this variable provides debugging
     /// information for an inlined function arguments.
     bool isInlinedFnArgument(const Function *CurFn);
 
@@ -692,17 +592,10 @@ namespace llvm {
   class DILexicalBlock : public DIScope {
   public:
     explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {}
-    DIScope getContext() const       { return getFieldAs<DIScope>(1);      }
-    unsigned getLineNumber() const   { return getUnsignedField(2);         }
-    unsigned getColumnNumber() const { return getUnsignedField(3);         }
-    StringRef getDirectory() const {
-      StringRef dir = getFieldAs<DIFile>(4).getDirectory();
-      return !dir.empty() ? dir : getContext().getDirectory();
-    }
-    StringRef getFilename() const {
-      StringRef filename = getFieldAs<DIFile>(4).getFilename();
-      return !filename.empty() ? filename : getContext().getFilename();
-    }
+    DIScope getContext() const       { return getFieldAs<DIScope>(2);      }
+    unsigned getLineNumber() const   { return getUnsignedField(3);         }
+    unsigned getColumnNumber() const { return getUnsignedField(4);         }
+    bool Verify() const;
   };
 
   /// DILexicalBlockFile - This is a wrapper for a lexical block with
@@ -710,40 +603,21 @@ namespace llvm {
   class DILexicalBlockFile : public DIScope {
   public:
     explicit DILexicalBlockFile(const MDNode *N = 0) : DIScope(N) {}
-    DIScope getContext() const { return getScope().getContext(); }
+    DIScope getContext() const { if (getScope().isSubprogram()) return getScope(); return getScope().getContext(); }
     unsigned getLineNumber() const { return getScope().getLineNumber(); }
     unsigned getColumnNumber() const { return getScope().getColumnNumber(); }
-    StringRef getDirectory() const {
-      StringRef dir = getFieldAs<DIFile>(2).getDirectory();
-      return !dir.empty() ? dir : getContext().getDirectory();
-    }
-    StringRef getFilename() const {
-      StringRef filename = getFieldAs<DIFile>(2).getFilename();
-      assert(!filename.empty() && "Why'd you create this then?");
-      return filename;
-    }
-    DILexicalBlock getScope() const { return getFieldAs<DILexicalBlock>(1); }
+    DILexicalBlock getScope() const { return getFieldAs<DILexicalBlock>(2); }
+    bool Verify() const;
   };
 
   /// DINameSpace - A wrapper for a C++ style name space.
-  class DINameSpace : public DIScope { 
+  class DINameSpace : public DIScope {
+    friend class DIDescriptor;
+    void printInternal(raw_ostream &OS) const;
   public:
     explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
-    DIScope getContext() const     { return getFieldAs<DIScope>(1);      }
-    StringRef getName() const      { return getStringField(2);           }
-    StringRef getDirectory() const  { 
-      return getFieldAs<DIFile>(3).getDirectory();
-    }
-    StringRef getFilename() const  { 
-      return getFieldAs<DIFile>(3).getFilename();
-    }
-    DICompileUnit getCompileUnit() const{ 
-      assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
-      if (getVersion() == llvm::LLVMDebugVersion7)
-        return getFieldAs<DICompileUnit>(3);
-
-      return getFieldAs<DIFile>(3).getCompileUnit(); 
-    }
+    DIScope getContext() const     { return getFieldAs<DIScope>(2);      }
+    StringRef getName() const      { return getStringField(3);           }
     unsigned getLineNumber() const { return getUnsignedField(4);         }
     bool Verify() const;
   };
@@ -818,7 +692,7 @@ namespace llvm {
   /// to hold function specific information.
   NamedMDNode *getOrInsertFnSpecificMDNode(Module &M, DISubprogram SP);
 
-  /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
+  /// getFnSpecificMDNode - Return a NameMDNode, if available, that is
   /// suitable to hold function specific information.
   NamedMDNode *getFnSpecificMDNode(const Module &M, DISubprogram SP);
 
@@ -836,7 +710,7 @@ namespace llvm {
   public:
     /// processModule - Process entire module and collect debug info
     /// anchors.
-    void processModule(Module &M);
+    void processModule(const Module &M);
 
   private:
     /// processType - Process DIType.
@@ -849,7 +723,7 @@ namespace llvm {
     void processSubprogram(DISubprogram SP);
 
     /// processDeclare - Process DbgDeclareInst.
-    void processDeclare(DbgDeclareInst *DDI);
+    void processDeclare(const DbgDeclareInst *DDI);
 
     /// processLocation - Process DILocation.
     void processLocation(DILocation Loc);
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 26bd1f627526..8fcd9e0b8246 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -16,9 +16,11 @@
 #define LLVM_DEBUGINFO_DICONTEXT_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/RelocVisitor.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -56,6 +58,8 @@ public:
   }
 };
 
+typedef SmallVector<std::pair<uint64_t, DILineInfo>, 16> DILineInfoTable;
+
 /// DIInliningInfo - a format-neutral container for inlined code description.
 class DIInliningInfo {
   SmallVector<DILineInfo, 4> Frames;
@@ -90,6 +94,24 @@ public:
   }
 };
 
+/// Selects which debug sections get dumped.
+enum DIDumpType {
+  DIDT_Null,
+  DIDT_All,
+  DIDT_Abbrev,
+  DIDT_AbbrevDwo,
+  DIDT_Aranges,
+  DIDT_Frames,
+  DIDT_Info,
+  DIDT_InfoDwo,
+  DIDT_Line,
+  DIDT_Ranges,
+  DIDT_Pubnames,
+  DIDT_Str,
+  DIDT_StrDwo,
+  DIDT_StrOffsetsDwo
+};
+
 // In place of applying the relocations to the data we've read from disk we use
 // a separate mapping table to the side and checking that at locations in the
 // dwarf where we expect relocated values. This adds a bit of complexity to the
@@ -102,19 +124,14 @@ public:
   virtual ~DIContext();
 
   /// getDWARFContext - get a context for binary DWARF data.
-  static DIContext *getDWARFContext(bool isLittleEndian,
-                                    StringRef infoSection,
-                                    StringRef abbrevSection,
-                                    StringRef aRangeSection = StringRef(),
-                                    StringRef lineSection = StringRef(),
-                                    StringRef stringSection = StringRef(),
-                                    StringRef rangeSection = StringRef(),
-                                    const RelocAddrMap &Map = RelocAddrMap());
+  static DIContext *getDWARFContext(object::ObjectFile *);
 
-  virtual void dump(raw_ostream &OS) = 0;
+  virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All) = 0;
 
   virtual DILineInfo getLineInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+  virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address,
+      uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
   virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
 };
diff --git a/include/llvm/DefaultPasses.h b/include/llvm/DefaultPasses.h
deleted file mode 100644
index 9f1ade86aba6..000000000000
--- a/include/llvm/DefaultPasses.h
+++ /dev/null
@@ -1,168 +0,0 @@
-//===- llvm/DefaultPasses.h - Default Pass Support code --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This file defines the infrastructure for registering the standard pass list.
-// This defines sets of standard optimizations that plugins can modify and
-// front ends can use.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEFAULT_PASS_SUPPORT_H
-#define LLVM_DEFAULT_PASS_SUPPORT_H
-
-#include "llvm/PassSupport.h"
-
-namespace llvm {
-
-class PassManagerBase;
-
-/// Unique identifiers for the default standard passes.  The addresses of
-/// these symbols are used to uniquely identify passes from the default list.
-namespace DefaultStandardPasses {
-extern unsigned char AggressiveDCEID;
-extern unsigned char ArgumentPromotionID;
-extern unsigned char BasicAliasAnalysisID;
-extern unsigned char CFGSimplificationID;
-extern unsigned char ConstantMergeID;
-extern unsigned char CorrelatedValuePropagationID;
-extern unsigned char DeadArgEliminationID;
-extern unsigned char DeadStoreEliminationID;
-extern unsigned char EarlyCSEID;
-extern unsigned char FunctionAttrsID;
-extern unsigned char FunctionInliningID;
-extern unsigned char GVNID;
-extern unsigned char GlobalDCEID;
-extern unsigned char GlobalOptimizerID;
-extern unsigned char GlobalsModRefID;
-extern unsigned char IPSCCPID;
-extern unsigned char IndVarSimplifyID;
-extern unsigned char InlinerPlaceholderID;
-extern unsigned char InstructionCombiningID;
-extern unsigned char JumpThreadingID;
-extern unsigned char LICMID;
-extern unsigned char LoopDeletionID;
-extern unsigned char LoopIdiomID;
-extern unsigned char LoopRotateID;
-extern unsigned char LoopUnrollID;
-extern unsigned char LoopUnswitchID;
-extern unsigned char MemCpyOptID;
-extern unsigned char PruneEHID;
-extern unsigned char ReassociateID;
-extern unsigned char SCCPID;
-extern unsigned char ScalarReplAggregatesID;
-extern unsigned char SimplifyLibCallsID;
-extern unsigned char StripDeadPrototypesID;
-extern unsigned char TailCallEliminationID;
-extern unsigned char TypeBasedAliasAnalysisID;
-}
-
-/// StandardPass - The class responsible for maintaining the lists of standard 
-class StandardPass {
-  friend class RegisterStandardPassLists;
-  public:
-  /// Predefined standard sets of passes
-  enum StandardSet {
-    AliasAnalysis,
-    Function,
-    Module,
-    LTO
-  };
-  /// Flags to specify whether a pass should be enabled.  Passes registered
-  /// with the standard sets may specify a minimum optimization level and one
-  /// or more flags that must be set when constructing the set for the pass to
-  /// be used.
-  enum OptimizationFlags {
-    /// Optimize for size was requested.
-    OptimizeSize = 1<<0,
-    /// Allow passes which may make global module changes.
-    UnitAtATime = 1<<1,
-    /// UnrollLoops - Allow loop unrolling.
-    UnrollLoops = 1<<2,
-    /// Allow library calls to be simplified.
-    SimplifyLibCalls = 1<<3,
-    /// Whether the module may have code using exceptions.
-    HaveExceptions = 1<<4,
-    // Run an inliner pass as part of this set.
-    RunInliner = 1<<5
-  };
-  enum OptimizationFlagComponents {
-    /// The low bits are used to store the optimization level.  When requesting
-    /// passes, this should store the requested optimisation level.  When
-    /// setting passes, this should set the minimum optimization level at which
-    /// the pass will run.
-    OptimizationLevelMask=0xf,
-    /// The maximum optimisation level at which the pass is run.
-    MaxOptimizationLevelMask=0xf0,
-    // Flags that must be set
-    RequiredFlagMask=0xff00,
-    // Flags that may not be set.
-    DisallowedFlagMask=0xff0000,
-    MaxOptimizationLevelShift=4,
-    RequiredFlagShift=8,
-    DisallowedFlagShift=16
-  };
-  /// Returns the optimisation level from a set of flags.
-  static unsigned OptimizationLevel(unsigned flags) {
-      return flags & OptimizationLevelMask;
-  }
-  /// Returns the maximum optimization level for this set of flags
-  static unsigned MaxOptimizationLevel(unsigned flags) {
-      return (flags & MaxOptimizationLevelMask) >> 4;
-  }
-  /// Constructs a set of flags from the specified minimum and maximum
-  /// optimisation level
-  static unsigned OptimzationFlags(unsigned minLevel=0, unsigned maxLevel=0xf,
-      unsigned requiredFlags=0, unsigned disallowedFlags=0) {
-    return ((minLevel & OptimizationLevelMask) |
-            ((maxLevel<<MaxOptimizationLevelShift) & MaxOptimizationLevelMask)
-            | ((requiredFlags<<RequiredFlagShift) & RequiredFlagMask)
-            | ((disallowedFlags<<DisallowedFlagShift) & DisallowedFlagMask));
-  }
-  /// Returns the flags that must be set for this to match
-  static unsigned RequiredFlags(unsigned flags) {
-      return (flags & RequiredFlagMask) >> RequiredFlagShift;
-  }
-  /// Returns the flags that must not be set for this to match
-  static unsigned DisallowedFlags(unsigned flags) {
-      return (flags & DisallowedFlagMask) >> DisallowedFlagShift;
-  }
-  /// Register a standard pass in the specified set.  If flags is non-zero,
-  /// then the pass will only be returned when the specified flags are set.
-  template<typename passName>
-  class RegisterStandardPass {
-    public:
-    RegisterStandardPass(StandardSet set, unsigned char *runBefore=0,
-        unsigned flags=0, unsigned char *ID=0) {
-      // Use the pass's ID if one is not specified
-      RegisterDefaultPass(PassInfo::NormalCtor_t(callDefaultCtor<passName>),
-               ID ? ID : (unsigned char*)&passName::ID, runBefore, set, flags);
-    }
-  };
-  /// Adds the passes from the specified set to the provided pass manager
-  static void AddPassesFromSet(PassManagerBase *PM,
-                               StandardSet set,
-                               unsigned flags=0,
-                               bool VerifyEach=false,
-                               Pass *inliner=0);
-  private:
-  /// Registers the default passes.  This is set by RegisterStandardPassLists
-  /// and is called lazily.
-  static void (*RegisterDefaultPasses)(void);
-  /// Creates the verifier pass that is inserted when a VerifyEach is passed to
-  /// AddPassesFromSet()
-  static Pass* (*CreateVerifierPass)(void);
-  /// Registers the pass
-  static void RegisterDefaultPass(PassInfo::NormalCtor_t constructor,
-                                  unsigned char *newPass,
-                                  unsigned char *oldPass,
-                                  StandardSet set,
-                                  unsigned flags=0);
-};
-
-} // namespace llvm
-
-#endif
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
deleted file mode 100644
index c862c2c8bb20..000000000000
--- a/include/llvm/DerivedTypes.h
+++ /dev/null
@@ -1,455 +0,0 @@
-//===-- llvm/DerivedTypes.h - Classes for handling data types ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of classes that represent "derived
-// types".  These are things like "arrays of x" or "structure of x, y, z" or
-// "function returning x taking (y,z) as parameters", etc...
-//
-// The implementations of these classes live in the Type.cpp file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DERIVED_TYPES_H
-#define LLVM_DERIVED_TYPES_H
-
-#include "llvm/Type.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class Value;
-class APInt;
-class LLVMContext;
-template<typename T> class ArrayRef;
-class StringRef;
-
-/// Class to represent integer types. Note that this class is also used to
-/// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
-/// Int64Ty.
-/// @brief Integer representation type
-class IntegerType : public Type {
-  friend class LLVMContextImpl;
-  
-protected:
-  explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
-    setSubclassData(NumBits);
-  }
-public:
-  /// This enum is just used to hold constants we need for IntegerType.
-  enum {
-    MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
-    MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
-      ///< Note that bit width is stored in the Type classes SubclassData field
-      ///< which has 23 bits. This yields a maximum bit width of 8,388,607 bits.
-  };
-
-  /// This static method is the primary way of constructing an IntegerType.
-  /// If an IntegerType with the same NumBits value was previously instantiated,
-  /// that instance will be returned. Otherwise a new one will be created. Only
-  /// one instance with a given NumBits value is ever created.
-  /// @brief Get or create an IntegerType instance.
-  static IntegerType *get(LLVMContext &C, unsigned NumBits);
-
-  /// @brief Get the number of bits in this IntegerType
-  unsigned getBitWidth() const { return getSubclassData(); }
-
-  /// getBitMask - Return a bitmask with ones set for all of the bits
-  /// that can be set by an unsigned version of this type.  This is 0xFF for
-  /// i8, 0xFFFF for i16, etc.
-  uint64_t getBitMask() const {
-    return ~uint64_t(0UL) >> (64-getBitWidth());
-  }
-
-  /// getSignBit - Return a uint64_t with just the most significant bit set (the
-  /// sign bit, if the value is treated as a signed number).
-  uint64_t getSignBit() const {
-    return 1ULL << (getBitWidth()-1);
-  }
-
-  /// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc.
-  /// @returns a bit mask with ones set for all the bits of this type.
-  /// @brief Get a bit mask for this type.
-  APInt getMask() const;
-
-  /// This method determines if the width of this IntegerType is a power-of-2
-  /// in terms of 8 bit bytes.
-  /// @returns true if this is a power-of-2 byte width.
-  /// @brief Is this a power-of-2 byte-width IntegerType ?
-  bool isPowerOf2ByteWidth() const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == IntegerTyID;
-  }
-};
-
-
-/// FunctionType - Class to represent function types
-///
-class FunctionType : public Type {
-  FunctionType(const FunctionType &) LLVM_DELETED_FUNCTION;
-  const FunctionType &operator=(const FunctionType &) LLVM_DELETED_FUNCTION;
-  FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs);
-
-public:
-  /// FunctionType::get - This static method is the primary way of constructing
-  /// a FunctionType.
-  ///
-  static FunctionType *get(Type *Result,
-                           ArrayRef<Type*> Params, bool isVarArg);
-
-  /// FunctionType::get - Create a FunctionType taking no parameters.
-  ///
-  static FunctionType *get(Type *Result, bool isVarArg);
-  
-  /// isValidReturnType - Return true if the specified type is valid as a return
-  /// type.
-  static bool isValidReturnType(Type *RetTy);
-
-  /// isValidArgumentType - Return true if the specified type is valid as an
-  /// argument type.
-  static bool isValidArgumentType(Type *ArgTy);
-
-  bool isVarArg() const { return getSubclassData(); }
-  Type *getReturnType() const { return ContainedTys[0]; }
-
-  typedef Type::subtype_iterator param_iterator;
-  param_iterator param_begin() const { return ContainedTys + 1; }
-  param_iterator param_end() const { return &ContainedTys[NumContainedTys]; }
-
-  // Parameter type accessors.
-  Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }
-
-  /// getNumParams - Return the number of fixed parameters this function type
-  /// requires.  This does not consider varargs.
-  ///
-  unsigned getNumParams() const { return NumContainedTys - 1; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == FunctionTyID;
-  }
-};
-
-
-/// CompositeType - Common super class of ArrayType, StructType, PointerType
-/// and VectorType.
-class CompositeType : public Type {
-protected:
-  explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) { }
-public:
-
-  /// getTypeAtIndex - Given an index value into the type, return the type of
-  /// the element.
-  ///
-  Type *getTypeAtIndex(const Value *V);
-  Type *getTypeAtIndex(unsigned Idx);
-  bool indexValid(const Value *V) const;
-  bool indexValid(unsigned Idx) const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == ArrayTyID ||
-           T->getTypeID() == StructTyID ||
-           T->getTypeID() == PointerTyID ||
-           T->getTypeID() == VectorTyID;
-  }
-};
-
-
-/// StructType - Class to represent struct types.  There are two different kinds
-/// of struct types: Literal structs and Identified structs.
-///
-/// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must
-/// always have a body when created.  You can get one of these by using one of
-/// the StructType::get() forms.
-///  
-/// Identified structs (e.g. %foo or %42) may optionally have a name and are not
-/// uniqued.  The names for identified structs are managed at the LLVMContext
-/// level, so there can only be a single identified struct with a given name in
-/// a particular LLVMContext.  Identified structs may also optionally be opaque
-/// (have no body specified).  You get one of these by using one of the
-/// StructType::create() forms.
-///
-/// Independent of what kind of struct you have, the body of a struct type are
-/// laid out in memory consequtively with the elements directly one after the
-/// other (if the struct is packed) or (if not packed) with padding between the
-/// elements as defined by DataLayout (which is required to match what the code
-/// generator for a target expects).
-///
-class StructType : public CompositeType {
-  StructType(const StructType &) LLVM_DELETED_FUNCTION;
-  const StructType &operator=(const StructType &) LLVM_DELETED_FUNCTION;
-  StructType(LLVMContext &C)
-    : CompositeType(C, StructTyID), SymbolTableEntry(0) {}
-  enum {
-    // This is the contents of the SubClassData field.
-    SCDB_HasBody = 1,
-    SCDB_Packed = 2,
-    SCDB_IsLiteral = 4,
-    SCDB_IsSized = 8
-  };
-
-  /// SymbolTableEntry - For a named struct that actually has a name, this is a
-  /// pointer to the symbol table entry (maintained by LLVMContext) for the
-  /// struct.  This is null if the type is an literal struct or if it is
-  /// a identified type that has an empty name.
-  /// 
-  void *SymbolTableEntry;
-public:
-  ~StructType() {
-    delete [] ContainedTys; // Delete the body.
-  }
-
-  /// StructType::create - This creates an identified struct.
-  static StructType *create(LLVMContext &Context, StringRef Name);
-  static StructType *create(LLVMContext &Context);
-  
-  static StructType *create(ArrayRef<Type*> Elements,
-                            StringRef Name,
-                            bool isPacked = false);
-  static StructType *create(ArrayRef<Type*> Elements);
-  static StructType *create(LLVMContext &Context,
-                            ArrayRef<Type*> Elements,
-                            StringRef Name,
-                            bool isPacked = false);
-  static StructType *create(LLVMContext &Context, ArrayRef<Type*> Elements);
-  static StructType *create(StringRef Name, Type *elt1, ...) END_WITH_NULL;
-
-  /// StructType::get - This static method is the primary way to create a
-  /// literal StructType.
-  static StructType *get(LLVMContext &Context, ArrayRef<Type*> Elements,
-                         bool isPacked = false);
-
-  /// StructType::get - Create an empty structure type.
-  ///
-  static StructType *get(LLVMContext &Context, bool isPacked = false);
-  
-  /// StructType::get - This static method is a convenience method for creating
-  /// structure types by specifying the elements as arguments.  Note that this
-  /// method always returns a non-packed struct, and requires at least one
-  /// element type.
-  static StructType *get(Type *elt1, ...) END_WITH_NULL;
-
-  bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
-  
-  /// isLiteral - Return true if this type is uniqued by structural
-  /// equivalence, false if it is a struct definition.
-  bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; }
-  
-  /// isOpaque - Return true if this is a type with an identity that has no body
-  /// specified yet.  These prints as 'opaque' in .ll files.
-  bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
-
-  /// isSized - Return true if this is a sized type.
-  bool isSized() const;
-  
-  /// hasName - Return true if this is a named struct that has a non-empty name.
-  bool hasName() const { return SymbolTableEntry != 0; }
-  
-  /// getName - Return the name for this struct type if it has an identity.
-  /// This may return an empty string for an unnamed struct type.  Do not call
-  /// this on an literal type.
-  StringRef getName() const;
-  
-  /// setName - Change the name of this type to the specified name, or to a name
-  /// with a suffix if there is a collision.  Do not call this on an literal
-  /// type.
-  void setName(StringRef Name);
-
-  /// setBody - Specify a body for an opaque identified type.
-  void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
-  void setBody(Type *elt1, ...) END_WITH_NULL;
-  
-  /// isValidElementType - Return true if the specified type is valid as a
-  /// element type.
-  static bool isValidElementType(Type *ElemTy);
-  
-
-  // Iterator access to the elements.
-  typedef Type::subtype_iterator element_iterator;
-  element_iterator element_begin() const { return ContainedTys; }
-  element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
-
-  /// isLayoutIdentical - Return true if this is layout identical to the
-  /// specified struct.
-  bool isLayoutIdentical(StructType *Other) const;  
-  
-  // Random access to the elements
-  unsigned getNumElements() const { return NumContainedTys; }
-  Type *getElementType(unsigned N) const {
-    assert(N < NumContainedTys && "Element number out of range!");
-    return ContainedTys[N];
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == StructTyID;
-  }
-};
-
-/// SequentialType - This is the superclass of the array, pointer and vector
-/// type classes.  All of these represent "arrays" in memory.  The array type
-/// represents a specifically sized array, pointer types are unsized/unknown
-/// size arrays, vector types represent specifically sized arrays that
-/// allow for use of SIMD instructions.  SequentialType holds the common
-/// features of all, which stem from the fact that all three lay their
-/// components out in memory identically.
-///
-class SequentialType : public CompositeType {
-  Type *ContainedType;               ///< Storage for the single contained type.
-  SequentialType(const SequentialType &) LLVM_DELETED_FUNCTION;
-  const SequentialType &operator=(const SequentialType &) LLVM_DELETED_FUNCTION;
-
-protected:
-  SequentialType(TypeID TID, Type *ElType)
-    : CompositeType(ElType->getContext(), TID), ContainedType(ElType) {
-    ContainedTys = &ContainedType;
-    NumContainedTys = 1;
-  }
-
-public:
-  Type *getElementType() const { return ContainedTys[0]; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == ArrayTyID ||
-           T->getTypeID() == PointerTyID ||
-           T->getTypeID() == VectorTyID;
-  }
-};
-
-
-/// ArrayType - Class to represent array types.
-///
-class ArrayType : public SequentialType {
-  uint64_t NumElements;
-
-  ArrayType(const ArrayType &) LLVM_DELETED_FUNCTION;
-  const ArrayType &operator=(const ArrayType &) LLVM_DELETED_FUNCTION;
-  ArrayType(Type *ElType, uint64_t NumEl);
-public:
-  /// ArrayType::get - This static method is the primary way to construct an
-  /// ArrayType
-  ///
-  static ArrayType *get(Type *ElementType, uint64_t NumElements);
-
-  /// isValidElementType - Return true if the specified type is valid as a
-  /// element type.
-  static bool isValidElementType(Type *ElemTy);
-
-  uint64_t getNumElements() const { return NumElements; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == ArrayTyID;
-  }
-};
-
-/// VectorType - Class to represent vector types.
-///
-class VectorType : public SequentialType {
-  unsigned NumElements;
-
-  VectorType(const VectorType &) LLVM_DELETED_FUNCTION;
-  const VectorType &operator=(const VectorType &) LLVM_DELETED_FUNCTION;
-  VectorType(Type *ElType, unsigned NumEl);
-public:
-  /// VectorType::get - This static method is the primary way to construct an
-  /// VectorType.
-  ///
-  static VectorType *get(Type *ElementType, unsigned NumElements);
-
-  /// VectorType::getInteger - This static method gets a VectorType with the
-  /// same number of elements as the input type, and the element type is an
-  /// integer type of the same width as the input element type.
-  ///
-  static VectorType *getInteger(VectorType *VTy) {
-    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    assert(EltBits && "Element size must be of a non-zero size");
-    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
-    return VectorType::get(EltTy, VTy->getNumElements());
-  }
-
-  /// VectorType::getExtendedElementVectorType - This static method is like
-  /// getInteger except that the element types are twice as wide as the
-  /// elements in the input type.
-  ///
-  static VectorType *getExtendedElementVectorType(VectorType *VTy) {
-    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
-    return VectorType::get(EltTy, VTy->getNumElements());
-  }
-
-  /// VectorType::getTruncatedElementVectorType - This static method is like
-  /// getInteger except that the element types are half as wide as the
-  /// elements in the input type.
-  ///
-  static VectorType *getTruncatedElementVectorType(VectorType *VTy) {
-    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    assert((EltBits & 1) == 0 &&
-           "Cannot truncate vector element with odd bit-width");
-    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
-    return VectorType::get(EltTy, VTy->getNumElements());
-  }
-
-  /// isValidElementType - Return true if the specified type is valid as a
-  /// element type.
-  static bool isValidElementType(Type *ElemTy);
-
-  /// @brief Return the number of elements in the Vector type.
-  unsigned getNumElements() const { return NumElements; }
-
-  /// @brief Return the number of bits in the Vector type.
-  /// Returns zero when the vector is a vector of pointers.
-  unsigned getBitWidth() const {
-    return NumElements * getElementType()->getPrimitiveSizeInBits();
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == VectorTyID;
-  }
-};
-
-
-/// PointerType - Class to represent pointers.
-///
-class PointerType : public SequentialType {
-  PointerType(const PointerType &) LLVM_DELETED_FUNCTION;
-  const PointerType &operator=(const PointerType &) LLVM_DELETED_FUNCTION;
-  explicit PointerType(Type *ElType, unsigned AddrSpace);
-public:
-  /// PointerType::get - This constructs a pointer to an object of the specified
-  /// type in a numbered address space.
-  static PointerType *get(Type *ElementType, unsigned AddressSpace);
-
-  /// PointerType::getUnqual - This constructs a pointer to an object of the
-  /// specified type in the generic address space (address space zero).
-  static PointerType *getUnqual(Type *ElementType) {
-    return PointerType::get(ElementType, 0);
-  }
-
-  /// isValidElementType - Return true if the specified type is valid as a
-  /// element type.
-  static bool isValidElementType(Type *ElemTy);
-
-  /// @brief Return the address space of the Pointer type.
-  inline unsigned getAddressSpace() const { return getSubclassData(); }
-
-  // Implement support type inquiry through isa, cast, and dyn_cast.
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == PointerTyID;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 8073d8f92c51..3fd69e266b47 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -12,22 +12,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTION_ENGINE_H
-#define LLVM_EXECUTION_ENGINE_H
+#ifndef LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H
+#define LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H
 
-#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ValueMap.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include <vector>
 #include <map>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index a2fed98c150e..0e92f79eba8f 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 
-#ifndef GENERIC_VALUE_H
-#define GENERIC_VALUE_H
+#ifndef LLVM_EXECUTIONENGINE_GENERICVALUE_H
+#define LLVM_EXECUTIONENGINE_GENERICVALUE_H
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/Support/DataTypes.h"
@@ -24,21 +24,30 @@ typedef void* PointerTy;
 class APInt;
 
 struct GenericValue {
+  struct IntPair {
+    unsigned int first;
+    unsigned int second;
+  };
   union {
     double          DoubleVal;
     float           FloatVal;
     PointerTy       PointerVal;
-    struct { unsigned int first; unsigned int second; } UIntPairVal;
+    struct IntPair  UIntPairVal;
     unsigned char   Untyped[8];
   };
-  APInt IntVal;   // also used for long doubles
-
-  GenericValue() : DoubleVal(0.0), IntVal(1,0) {}
+  APInt IntVal;   // also used for long doubles.
+  // For aggregate data types.
+  std::vector<GenericValue> AggregateVal;
+
+  // to make code faster, set GenericValue to zero could be omitted, but it is
+  // potentially can cause problems, since GenericValue to store garbage
+  // instead of zero.
+  GenericValue() : IntVal(1,0) {UIntPairVal.first = 0; UIntPairVal.second = 0;}
   explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { }
 };
 
 inline GenericValue PTOGV(void *P) { return GenericValue(P); }
 inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; }
 
-} // End llvm namespace
+} // End llvm namespace.
 #endif
diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h
index 72d97ef8e12b..f49d0c487fe9 100644
--- a/include/llvm/ExecutionEngine/Interpreter.h
+++ b/include/llvm/ExecutionEngine/Interpreter.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef EXECUTION_ENGINE_INTERPRETER_H
-#define EXECUTION_ENGINE_INTERPRETER_H
+#ifndef LLVM_EXECUTIONENGINE_INTERPRETER_H
+#define LLVM_EXECUTIONENGINE_INTERPRETER_H
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include <cstdlib>
diff --git a/include/llvm/ExecutionEngine/JIT.h b/include/llvm/ExecutionEngine/JIT.h
index b4cda1d513f1..581d6e6c35eb 100644
--- a/include/llvm/ExecutionEngine/JIT.h
+++ b/include/llvm/ExecutionEngine/JIT.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTION_ENGINE_JIT_H
-#define LLVM_EXECUTION_ENGINE_JIT_H
+#ifndef LLVM_EXECUTIONENGINE_JIT_H
+#define LLVM_EXECUTIONENGINE_JIT_H
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include <cstdlib>
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index e6586e778c19..ed66102d4696 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -12,13 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
-#define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
+#ifndef LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H
+#define LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H
 
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
-
 #include <vector>
 
 namespace llvm {
@@ -128,4 +127,4 @@ public:
 
 } // end namespace llvm.
 
-#endif // defined LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
+#endif // defined LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 90896465018c..714a98055a42 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -7,12 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
-#define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
+#ifndef LLVM_EXECUTIONENGINE_JITMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_JITMEMORYMANAGER_H
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Support/DataTypes.h"
-
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/ExecutionEngine/MCJIT.h b/include/llvm/ExecutionEngine/MCJIT.h
index ac16bdc7df17..66ddb7cdb875 100644
--- a/include/llvm/ExecutionEngine/MCJIT.h
+++ b/include/llvm/ExecutionEngine/MCJIT.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTION_ENGINE_MCJIT_H
-#define LLVM_EXECUTION_ENGINE_MCJIT_H
+#ifndef LLVM_EXECUTIONENGINE_MCJIT_H
+#define LLVM_EXECUTIONENGINE_MCJIT_H
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include <cstdlib>
diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h
index ab7f25e9d03d..05da594a94a8 100644
--- a/include/llvm/ExecutionEngine/OProfileWrapper.h
+++ b/include/llvm/ExecutionEngine/OProfileWrapper.h
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef OPROFILE_WRAPPER_H
-#define OPROFILE_WRAPPER_H
+#ifndef LLVM_EXECUTIONENGINE_OPROFILEWRAPPER_H
+#define LLVM_EXECUTIONENGINE_OPROFILEWRAPPER_H
 
 #include "llvm/Support/DataTypes.h"
 #include <opagent.h>
@@ -41,10 +41,10 @@ class OProfileWrapper {
   typedef  int           (*op_unload_native_code_ptr_t)(op_agent_t, uint64_t);
 
   // Also used for op_minor_version function which has the same signature
-  typedef  int           (*op_major_version_ptr_t)(void);
+  typedef  int           (*op_major_version_ptr_t)();
 
   // This is not a part of the opagent API, but is useful nonetheless
-  typedef  bool          (*IsOProfileRunningPtrT)(void);
+  typedef  bool          (*IsOProfileRunningPtrT)();
 
 
   op_agent_t                      Agent;
@@ -99,8 +99,8 @@ public:
                                size_t num_entries,
                                struct debug_line_info const* info);
   int op_unload_native_code(uint64_t addr);
-  int op_major_version(void);
-  int op_minor_version(void);
+  int op_major_version();
+  int op_minor_version();
 
   // Returns true if the oprofiled process is running, the opagent library is
   // loaded and a connection to the agent has been established, and false
@@ -121,4 +121,4 @@ private:
 
 } // namespace llvm
 
-#endif //OPROFILE_WRAPPER_H
+#endif // LLVM_EXECUTIONENGINE_OPROFILEWRAPPER_H
diff --git a/include/llvm/ExecutionEngine/ObjectBuffer.h b/include/llvm/ExecutionEngine/ObjectBuffer.h
index a0a77b8ba888..96a48b28b847 100644
--- a/include/llvm/ExecutionEngine/ObjectBuffer.h
+++ b/include/llvm/ExecutionEngine/ObjectBuffer.h
@@ -1,80 +1,80 @@
-//===---- ObjectBuffer.h - Utility class to wrap object image memory -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares a wrapper class to hold the memory into which an
-// object will be generated.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
-#define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-namespace llvm {
-
-/// ObjectBuffer - This class acts as a container for the memory buffer used during
-/// generation and loading of executable objects using MCJIT and RuntimeDyld.  The
-/// underlying memory for the object will be owned by the ObjectBuffer instance
-/// throughout its lifetime.  The getMemBuffer() method provides a way to create a
-/// MemoryBuffer wrapper object instance to be owned by other classes (such as
-/// ObjectFile) as needed, but the MemoryBuffer instance returned does not own the
-/// actual memory it points to.
-class ObjectBuffer {
-public:
-  ObjectBuffer() {}
-  ObjectBuffer(MemoryBuffer* Buf) : Buffer(Buf) {}
-  virtual ~ObjectBuffer() {}
-
-  /// getMemBuffer - Like MemoryBuffer::getMemBuffer() this function
-  /// returns a pointer to an object that is owned by the caller. However,
-  /// the caller does not take ownership of the underlying memory.
-  MemoryBuffer *getMemBuffer() const {
-    return MemoryBuffer::getMemBuffer(Buffer->getBuffer(), "", false);
-  }
-
-  const char *getBufferStart() const { return Buffer->getBufferStart(); }
-  size_t getBufferSize() const { return Buffer->getBufferSize(); }
-
-protected:
-  // The memory contained in an ObjectBuffer
-  OwningPtr<MemoryBuffer> Buffer;
-};
-
-/// ObjectBufferStream - This class encapsulates the SmallVector and
-/// raw_svector_ostream needed to generate an object using MC code emission
-/// while providing a common ObjectBuffer interface for access to the
-/// memory once the object has been generated.
-class ObjectBufferStream : public ObjectBuffer {
-public:
-  ObjectBufferStream() : OS(SV) {}
-  virtual ~ObjectBufferStream() {}
-
-  raw_ostream &getOStream() { return OS; }
-  void flush()
-  {
-    OS.flush();
-
-    // Make the data accessible via the ObjectBuffer::Buffer
-    Buffer.reset(MemoryBuffer::getMemBuffer(StringRef(SV.data(), SV.size()),
-                                            "",
-                                            false));
-  }
-
-protected:
-  SmallVector<char, 4096> SV; // Working buffer into which we JIT.
-  raw_svector_ostream     OS; // streaming wrapper
-};
-
-} // namespace llvm
-
-#endif
+//===---- ObjectBuffer.h - Utility class to wrap object image memory -----===//
+//
+//		       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a wrapper class to hold the memory into which an
+// object will be generated.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
+#define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// ObjectBuffer - This class acts as a container for the memory buffer used during
+/// generation and loading of executable objects using MCJIT and RuntimeDyld.  The
+/// underlying memory for the object will be owned by the ObjectBuffer instance
+/// throughout its lifetime.  The getMemBuffer() method provides a way to create a
+/// MemoryBuffer wrapper object instance to be owned by other classes (such as
+/// ObjectFile) as needed, but the MemoryBuffer instance returned does not own the
+/// actual memory it points to.
+class ObjectBuffer {
+public:
+  ObjectBuffer() {}
+  ObjectBuffer(MemoryBuffer* Buf) : Buffer(Buf) {}
+  virtual ~ObjectBuffer() {}
+
+  /// getMemBuffer - Like MemoryBuffer::getMemBuffer() this function
+  /// returns a pointer to an object that is owned by the caller. However,
+  /// the caller does not take ownership of the underlying memory.
+  MemoryBuffer *getMemBuffer() const {
+    return MemoryBuffer::getMemBuffer(Buffer->getBuffer(), "", false);
+  }
+
+  const char *getBufferStart() const { return Buffer->getBufferStart(); }
+  size_t getBufferSize() const { return Buffer->getBufferSize(); }
+
+protected:
+  // The memory contained in an ObjectBuffer
+  OwningPtr<MemoryBuffer> Buffer;
+};
+
+/// ObjectBufferStream - This class encapsulates the SmallVector and
+/// raw_svector_ostream needed to generate an object using MC code emission
+/// while providing a common ObjectBuffer interface for access to the
+/// memory once the object has been generated.
+class ObjectBufferStream : public ObjectBuffer {
+public:
+  ObjectBufferStream() : OS(SV) {}
+  virtual ~ObjectBufferStream() {}
+
+  raw_ostream &getOStream() { return OS; }
+  void flush()
+  {
+    OS.flush();
+
+    // Make the data accessible via the ObjectBuffer::Buffer
+    Buffer.reset(MemoryBuffer::getMemBuffer(StringRef(SV.data(), SV.size()),
+					    "",
+					    false));
+  }
+
+protected:
+  SmallVector<char, 4096> SV; // Working buffer into which we JIT.
+  raw_svector_ostream	  OS; // streaming wrapper
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/ExecutionEngine/ObjectImage.h b/include/llvm/ExecutionEngine/ObjectImage.h
index 82549add62e8..9fddca7e33c8 100644
--- a/include/llvm/ExecutionEngine/ObjectImage.h
+++ b/include/llvm/ExecutionEngine/ObjectImage.h
@@ -1,61 +1,63 @@
-//===---- ObjectImage.h - Format independent executuable object image -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares a file format independent ObjectImage class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
-#define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
-
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectBuffer.h"
-
-namespace llvm {
-
-
-/// ObjectImage - A container class that represents an ObjectFile that has been
-/// or is in the process of being loaded into memory for execution.
-class ObjectImage {
-  ObjectImage() LLVM_DELETED_FUNCTION;
-  ObjectImage(const ObjectImage &other) LLVM_DELETED_FUNCTION;
-
-protected:
-  OwningPtr<ObjectBuffer> Buffer;
-
-public:
-  ObjectImage(ObjectBuffer *Input) : Buffer(Input) {}
-  virtual ~ObjectImage() {}
-
-  virtual object::symbol_iterator begin_symbols() const = 0;
-  virtual object::symbol_iterator end_symbols() const = 0;
-
-  virtual object::section_iterator begin_sections() const = 0;
-  virtual object::section_iterator end_sections() const  = 0;
-
-  virtual /* Triple::ArchType */ unsigned getArch() const = 0;
-
-  // Subclasses can override these methods to update the image with loaded
-  // addresses for sections and common symbols
-  virtual void updateSectionAddress(const object::SectionRef &Sec,
-                                    uint64_t Addr) = 0;
-  virtual void updateSymbolAddress(const object::SymbolRef &Sym,
-                                   uint64_t Addr) = 0;
-
-  virtual StringRef getData() const = 0;
-
-  // Subclasses can override these methods to provide JIT debugging support
-  virtual void registerWithDebugger() = 0;
-  virtual void deregisterWithDebugger() = 0;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
-
+//===---- ObjectImage.h - Format independent executuable object image -----===//
+//
+//		       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectImage class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
+#define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
+
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/Object/ObjectFile.h"
+
+namespace llvm {
+
+
+/// ObjectImage - A container class that represents an ObjectFile that has been
+/// or is in the process of being loaded into memory for execution.
+class ObjectImage {
+  ObjectImage() LLVM_DELETED_FUNCTION;
+  ObjectImage(const ObjectImage &other) LLVM_DELETED_FUNCTION;
+
+protected:
+  OwningPtr<ObjectBuffer> Buffer;
+
+public:
+  ObjectImage(ObjectBuffer *Input) : Buffer(Input) {}
+  virtual ~ObjectImage() {}
+
+  virtual object::symbol_iterator begin_symbols() const = 0;
+  virtual object::symbol_iterator end_symbols() const = 0;
+
+  virtual object::section_iterator begin_sections() const = 0;
+  virtual object::section_iterator end_sections() const  = 0;
+
+  virtual /* Triple::ArchType */ unsigned getArch() const = 0;
+
+  // Subclasses can override these methods to update the image with loaded
+  // addresses for sections and common symbols
+  virtual void updateSectionAddress(const object::SectionRef &Sec,
+				    uint64_t Addr) = 0;
+  virtual void updateSymbolAddress(const object::SymbolRef &Sym,
+				   uint64_t Addr) = 0;
+
+  virtual StringRef getData() const = 0;
+
+  virtual object::ObjectFile* getObjectFile() const = 0;
+
+  // Subclasses can override these methods to provide JIT debugging support
+  virtual void registerWithDebugger() = 0;
+  virtual void deregisterWithDebugger() = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
+
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 891f534862f4..4222d5335bcc 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_RUNTIME_DYLD_H
-#define LLVM_RUNTIME_DYLD_H
+#ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
+#define LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
@@ -36,29 +36,36 @@ public:
   RTDyldMemoryManager() {}
   virtual ~RTDyldMemoryManager();
 
-  /// allocateCodeSection - Allocate a memory block of (at least) the given
-  /// size suitable for executable code. The SectionID is a unique identifier
-  /// assigned by the JIT engine, and optionally recorded by the memory manager
-  /// to access a loaded section.
+  /// Allocate a memory block of (at least) the given size suitable for
+  /// executable code. The SectionID is a unique identifier assigned by the JIT
+  /// engine, and optionally recorded by the memory manager to access a loaded
+  /// section.
   virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                        unsigned SectionID) = 0;
 
-  /// allocateDataSection - Allocate a memory block of (at least) the given
-  /// size suitable for data. The SectionID is a unique identifier
-  /// assigned by the JIT engine, and optionally recorded by the memory manager
-  /// to access a loaded section.
+  /// Allocate a memory block of (at least) the given size suitable for data.
+  /// The SectionID is a unique identifier assigned by the JIT engine, and
+  /// optionally recorded by the memory manager to access a loaded section.
   virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID) = 0;
+                                       unsigned SectionID, bool IsReadOnly) = 0;
 
-  /// getPointerToNamedFunction - This method returns the address of the
-  /// specified function. As such it is only useful for resolving library
-  /// symbols, not code generated symbols.
+  /// This method returns the address of the specified function. As such it is
+  /// only useful for resolving library symbols, not code generated symbols.
   ///
   /// If AbortOnFailure is false and no function with the given name is
   /// found, this function returns a null pointer. Otherwise, it prints a
   /// message to stderr and aborts.
   virtual void *getPointerToNamedFunction(const std::string &Name,
                                           bool AbortOnFailure = true) = 0;
+
+  /// This method is called when object loading is complete and section page
+  /// permissions can be applied.  It is up to the memory manager implementation
+  /// to decide whether or not to act on this method.  The memory manager will
+  /// typically allocate all sections as read-write and then apply specific
+  /// permissions when this method is called.
+  ///
+  /// Returns true if an error occurred, false otherwise.
+  virtual bool applyPermissions(std::string *ErrMsg = 0) = 0;
 };
 
 class RuntimeDyld {
@@ -77,10 +84,10 @@ public:
   RuntimeDyld(RTDyldMemoryManager *);
   ~RuntimeDyld();
 
-  /// loadObject - prepare the object contained in the input buffer for
-  /// execution.  Ownership of the input buffer is transferred to the
-  /// ObjectImage instance returned from this function if successful.
-  /// In the case of load failure, the input buffer will be deleted.
+  /// Prepare the object contained in the input buffer for execution.
+  /// Ownership of the input buffer is transferred to the ObjectImage
+  /// instance returned from this function if successful. In the case of load
+  /// failure, the input buffer will be deleted.
   ObjectImage *loadObject(ObjectBuffer *InputBuffer);
 
   /// Get the address of our local copy of the symbol. This may or may not
@@ -95,7 +102,7 @@ public:
   /// Resolve the relocations for all symbols we currently know about.
   void resolveRelocations();
 
-  /// mapSectionAddress - map a section to its target address space value.
+  /// Map a section to its target address space value.
   /// Map the address of a JIT section as returned from the memory manager
   /// to the address in the target process as the running code will see it.
   /// This is the address which will be used for relocation resolution.
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
new file mode 100644
index 000000000000..ae5004e130c0
--- /dev/null
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -0,0 +1,176 @@
+//===- SectionMemoryManager.h - Memory manager for MCJIT/RtDyld -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of a section-based memory manager used by
+// the MCJIT execution engine and RuntimeDyld.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+
+namespace llvm {
+
+/// This is a simple memory manager which implements the methods called by
+/// the RuntimeDyld class to allocate memory for section-based loading of
+/// objects, usually those generated by the MCJIT execution engine.
+///
+/// This memory manager allocates all section memory as read-write.  The
+/// RuntimeDyld will copy JITed section memory into these allocated blocks
+/// and perform any necessary linking and relocations.
+///
+/// Any client using this memory manager MUST ensure that section-specific
+/// page permissions have been applied before attempting to execute functions
+/// in the JITed object.  Permissions can be applied either by calling
+/// MCJIT::finalizeObject or by calling SectionMemoryManager::applyPermissions
+/// directly.  Clients of MCJIT should call MCJIT::finalizeObject.
+class SectionMemoryManager : public JITMemoryManager {
+  SectionMemoryManager(const SectionMemoryManager&) LLVM_DELETED_FUNCTION;
+  void operator=(const SectionMemoryManager&) LLVM_DELETED_FUNCTION;
+
+public:
+  SectionMemoryManager() { }
+  virtual ~SectionMemoryManager();
+
+  /// \brief Allocates a memory block of (at least) the given size suitable for
+  /// executable code.
+  ///
+  /// The value of \p Alignment must be a power of two.  If \p Alignment is zero
+  /// a default alignment of 16 will be used.
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID);
+
+  /// \brief Allocates a memory block of (at least) the given size suitable for
+  /// executable code.
+  ///
+  /// The value of \p Alignment must be a power of two.  If \p Alignment is zero
+  /// a default alignment of 16 will be used.
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID,
+                                       bool isReadOnly);
+
+  /// \brief Applies section-specific memory permissions.
+  ///
+  /// This method is called when object loading is complete and section page
+  /// permissions can be applied.  It is up to the memory manager implementation
+  /// to decide whether or not to act on this method.  The memory manager will
+  /// typically allocate all sections as read-write and then apply specific
+  /// permissions when this method is called.  Code sections cannot be executed
+  /// until this function has been called.
+  ///
+  /// \returns true if an error occurred, false otherwise.
+  virtual bool applyPermissions(std::string *ErrMsg = 0);
+
+  /// This method returns the address of the specified function. As such it is
+  /// only useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If \p AbortOnFailure is false and no function with the given name is
+  /// found, this function returns a null pointer. Otherwise, it prints a
+  /// message to stderr and aborts.
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true);
+
+  /// \brief Invalidate instruction cache for code sections.
+  ///
+  /// Some platforms with separate data cache and instruction cache require
+  /// explicit cache flush, otherwise JIT code manipulations (like resolved
+  /// relocations) will get to the data cache but not to the instruction cache.
+  ///
+  /// This method is not called by RuntimeDyld or MCJIT during the load
+  /// process.  Clients may call this function when needed.  See the lli
+  /// tool for example use.
+  virtual void invalidateInstructionCache();
+
+private:
+  struct MemoryGroup {
+      SmallVector<sys::MemoryBlock, 16> AllocatedMem;
+      SmallVector<sys::MemoryBlock, 16> FreeMem;
+      sys::MemoryBlock Near;
+  };
+
+  uint8_t *allocateSection(MemoryGroup &MemGroup, uintptr_t Size,
+                           unsigned Alignment);
+
+  error_code applyMemoryGroupPermissions(MemoryGroup &MemGroup,
+                                         unsigned Permissions);
+
+  MemoryGroup CodeMem;
+  MemoryGroup RWDataMem;
+  MemoryGroup RODataMem;
+
+public:
+  ///
+  /// Functions below are not used by MCJIT or RuntimeDyld, but must be
+  /// implemented because they are declared as pure virtuals in the base class.
+  ///
+
+  virtual void setMemoryWritable() {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void setMemoryExecutable() {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void setPoisonMemory(bool poison) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void AllocateGOT() {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual uint8_t *getGOTBase() const {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual uint8_t *startFunctionBody(const Function *F,
+                                     uintptr_t &ActualSize){
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual uint8_t *allocateStub(const GlobalValue *F, unsigned StubSize,
+                                unsigned Alignment) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual void deallocateFunctionBody(void *Body) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual uint8_t *startExceptionTable(const Function *F,
+                                       uintptr_t &ActualSize) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t *FrameRegister) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void deallocateExceptionTable(void *ET) {
+    llvm_unreachable("Unexpected call!");
+  }
+};
+
+}
+
+#endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
+
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
deleted file mode 100644
index e211e9ab52a8..000000000000
--- a/include/llvm/Function.h
+++ /dev/null
@@ -1,455 +0,0 @@
-//===-- llvm/Function.h - Class to represent a single function --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Function class, which represents a
-// single function/procedure in LLVM.
-//
-// A function basically consists of a list of basic blocks, a list of arguments,
-// and a symbol table.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_FUNCTION_H
-#define LLVM_FUNCTION_H
-
-#include "llvm/GlobalValue.h"
-#include "llvm/CallingConv.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Argument.h"
-#include "llvm/Attributes.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class FunctionType;
-class LLVMContext;
-
-// Traits for intrusive list of basic blocks...
-template<> struct ilist_traits<BasicBlock>
-  : public SymbolTableListTraits<BasicBlock, Function> {
-
-  // createSentinel is used to get hold of the node that marks the end of the
-  // list... (same trick used here as in ilist_traits<Instruction>)
-  BasicBlock *createSentinel() const {
-    return static_cast<BasicBlock*>(&Sentinel);
-  }
-  static void destroySentinel(BasicBlock*) {}
-
-  BasicBlock *provideInitialHead() const { return createSentinel(); }
-  BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
-  static void noteHead(BasicBlock*, BasicBlock*) {}
-
-  static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
-  mutable ilist_half_node<BasicBlock> Sentinel;
-};
-
-template<> struct ilist_traits<Argument>
-  : public SymbolTableListTraits<Argument, Function> {
-
-  Argument *createSentinel() const {
-    return static_cast<Argument*>(&Sentinel);
-  }
-  static void destroySentinel(Argument*) {}
-
-  Argument *provideInitialHead() const { return createSentinel(); }
-  Argument *ensureHead(Argument*) const { return createSentinel(); }
-  static void noteHead(Argument*, Argument*) {}
-
-  static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
-  mutable ilist_half_node<Argument> Sentinel;
-};
-
-class Function : public GlobalValue,
-                 public ilist_node<Function> {
-public:
-  typedef iplist<Argument> ArgumentListType;
-  typedef iplist<BasicBlock> BasicBlockListType;
-
-  // BasicBlock iterators...
-  typedef BasicBlockListType::iterator iterator;
-  typedef BasicBlockListType::const_iterator const_iterator;
-
-  typedef ArgumentListType::iterator arg_iterator;
-  typedef ArgumentListType::const_iterator const_arg_iterator;
-
-private:
-  // Important things that make up a function!
-  BasicBlockListType  BasicBlocks;        ///< The basic blocks
-  mutable ArgumentListType ArgumentList;  ///< The formal arguments
-  ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
-  AttrListPtr AttributeList;              ///< Parameter attributes
-
-  // HasLazyArguments is stored in Value::SubclassData.
-  /*bool HasLazyArguments;*/
-                   
-  // The Calling Convention is stored in Value::SubclassData.
-  /*CallingConv::ID CallingConvention;*/
-
-  friend class SymbolTableListTraits<Function, Module>;
-
-  void setParent(Module *parent);
-
-  /// hasLazyArguments/CheckLazyArguments - The argument list of a function is
-  /// built on demand, so that the list isn't allocated until the first client
-  /// needs it.  The hasLazyArguments predicate returns true if the arg list
-  /// hasn't been set up yet.
-  bool hasLazyArguments() const {
-    return getSubclassDataFromValue() & 1;
-  }
-  void CheckLazyArguments() const {
-    if (hasLazyArguments())
-      BuildLazyArguments();
-  }
-  void BuildLazyArguments() const;
-
-  Function(const Function&) LLVM_DELETED_FUNCTION;
-  void operator=(const Function&) LLVM_DELETED_FUNCTION;
-
-  /// Function ctor - If the (optional) Module argument is specified, the
-  /// function is automatically inserted into the end of the function list for
-  /// the module.
-  ///
-  Function(FunctionType *Ty, LinkageTypes Linkage,
-           const Twine &N = "", Module *M = 0);
-
-public:
-  static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
-                          const Twine &N = "", Module *M = 0) {
-    return new(0) Function(Ty, Linkage, N, M);
-  }
-
-  ~Function();
-
-  Type *getReturnType() const;           // Return the type of the ret val
-  FunctionType *getFunctionType() const; // Return the FunctionType for me
-
-  /// getContext - Return a pointer to the LLVMContext associated with this 
-  /// function, or NULL if this function is not bound to a context yet.
-  LLVMContext &getContext() const;
-
-  /// isVarArg - Return true if this function takes a variable number of
-  /// arguments.
-  bool isVarArg() const;
-
-  /// getIntrinsicID - This method returns the ID number of the specified
-  /// function, or Intrinsic::not_intrinsic if the function is not an
-  /// instrinsic, or if the pointer is null.  This value is always defined to be
-  /// zero to allow easy checking for whether a function is intrinsic or not.
-  /// The particular intrinsic functions which correspond to this value are
-  /// defined in llvm/Intrinsics.h.
-  ///
-  unsigned getIntrinsicID() const LLVM_READONLY;
-  bool isIntrinsic() const { return getIntrinsicID() != 0; }
-
-  /// getCallingConv()/setCallingConv(CC) - These method get and set the
-  /// calling convention of this function.  The enum values for the known
-  /// calling conventions are defined in CallingConv.h.
-  CallingConv::ID getCallingConv() const {
-    return static_cast<CallingConv::ID>(getSubclassDataFromValue() >> 1);
-  }
-  void setCallingConv(CallingConv::ID CC) {
-    setValueSubclassData((getSubclassDataFromValue() & 1) |
-                         (static_cast<unsigned>(CC) << 1));
-  }
-  
-  /// getAttributes - Return the attribute list for this Function.
-  ///
-  const AttrListPtr &getAttributes() const { return AttributeList; }
-
-  /// setAttributes - Set the attribute list for this Function.
-  ///
-  void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; }
-
-  /// getFnAttributes - Return the function attributes for querying.
-  ///
-  Attributes getFnAttributes() const {
-    return AttributeList.getFnAttributes();
-  }
-
-  /// addFnAttr - Add function attributes to this function.
-  ///
-  void addFnAttr(Attributes::AttrVal N) { 
-    // Function Attributes are stored at ~0 index 
-    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), N));
-  }
-
-  /// removeFnAttr - Remove function attributes from this function.
-  ///
-  void removeFnAttr(Attributes N) {
-    // Function Attributes are stored at ~0 index 
-    removeAttribute(~0U, N);
-  }
-
-  /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
-  ///                             to use during code generation.
-  bool hasGC() const;
-  const char *getGC() const;
-  void setGC(const char *Str);
-  void clearGC();
-
-
-  /// getRetAttributes - Return the return attributes for querying.
-  Attributes getRetAttributes() const {
-    return AttributeList.getRetAttributes();
-  }
-
-  /// getParamAttributes - Return the parameter attributes for querying.
-  Attributes getParamAttributes(unsigned Idx) const {
-    return AttributeList.getParamAttributes(Idx);
-  }
-
-  /// addAttribute - adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attributes attr);
-  
-  /// removeAttribute - removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attributes attr);
-
-  /// @brief Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned i) const {
-    return AttributeList.getParamAlignment(i);
-  }
-
-  /// @brief Determine if the function does not access memory.
-  bool doesNotAccessMemory() const {
-    return getFnAttributes().hasAttribute(Attributes::ReadNone);
-  }
-  void setDoesNotAccessMemory() {
-    addFnAttr(Attributes::ReadNone);
-  }
-
-  /// @brief Determine if the function does not access or only reads memory.
-  bool onlyReadsMemory() const {
-    return doesNotAccessMemory() ||
-      getFnAttributes().hasAttribute(Attributes::ReadOnly);
-  }
-  void setOnlyReadsMemory() {
-    addFnAttr(Attributes::ReadOnly);
-  }
-
-  /// @brief Determine if the function cannot return.
-  bool doesNotReturn() const {
-    return getFnAttributes().hasAttribute(Attributes::NoReturn);
-  }
-  void setDoesNotReturn() {
-    addFnAttr(Attributes::NoReturn);
-  }
-
-  /// @brief Determine if the function cannot unwind.
-  bool doesNotThrow() const {
-    return getFnAttributes().hasAttribute(Attributes::NoUnwind);
-  }
-  void setDoesNotThrow() {
-    addFnAttr(Attributes::NoUnwind);
-  }
-
-  /// @brief True if the ABI mandates (or the user requested) that this
-  /// function be in a unwind table.
-  bool hasUWTable() const {
-    return getFnAttributes().hasAttribute(Attributes::UWTable);
-  }
-  void setHasUWTable() {
-    addFnAttr(Attributes::UWTable);
-  }
-
-  /// @brief True if this function needs an unwind table.
-  bool needsUnwindTableEntry() const {
-    return hasUWTable() || !doesNotThrow();
-  }
-
-  /// @brief Determine if the function returns a structure through first 
-  /// pointer argument.
-  bool hasStructRetAttr() const {
-    return getParamAttributes(1).hasAttribute(Attributes::StructRet);
-  }
-
-  /// @brief Determine if the parameter does not alias other parameters.
-  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotAlias(unsigned n) const {
-    return getParamAttributes(n).hasAttribute(Attributes::NoAlias);
-  }
-  void setDoesNotAlias(unsigned n) {
-    addAttribute(n, Attributes::get(getContext(), Attributes::NoAlias));
-  }
-
-  /// @brief Determine if the parameter can be captured.
-  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotCapture(unsigned n) const {
-    return getParamAttributes(n).hasAttribute(Attributes::NoCapture);
-  }
-  void setDoesNotCapture(unsigned n) {
-    addAttribute(n, Attributes::get(getContext(), Attributes::NoCapture));
-  }
-
-  /// copyAttributesFrom - copy all additional attributes (those not needed to
-  /// create a Function) from the Function Src to this one.
-  void copyAttributesFrom(const GlobalValue *Src);
-
-  /// deleteBody - This method deletes the body of the function, and converts
-  /// the linkage to external.
-  ///
-  void deleteBody() {
-    dropAllReferences();
-    setLinkage(ExternalLinkage);
-  }
-
-  /// removeFromParent - This method unlinks 'this' from the containing module,
-  /// but does not delete it.
-  ///
-  virtual void removeFromParent();
-
-  /// eraseFromParent - This method unlinks 'this' from the containing module
-  /// and deletes it.
-  ///
-  virtual void eraseFromParent();
-
-
-  /// Get the underlying elements of the Function... the basic block list is
-  /// empty for external functions.
-  ///
-  const ArgumentListType &getArgumentList() const {
-    CheckLazyArguments();
-    return ArgumentList;
-  }
-  ArgumentListType &getArgumentList() {
-    CheckLazyArguments();
-    return ArgumentList;
-  }
-  static iplist<Argument> Function::*getSublistAccess(Argument*) {
-    return &Function::ArgumentList;
-  }
-
-  const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
-        BasicBlockListType &getBasicBlockList()       { return BasicBlocks; }
-  static iplist<BasicBlock> Function::*getSublistAccess(BasicBlock*) {
-    return &Function::BasicBlocks;
-  }
-
-  const BasicBlock       &getEntryBlock() const   { return front(); }
-        BasicBlock       &getEntryBlock()         { return front(); }
-
-  //===--------------------------------------------------------------------===//
-  // Symbol Table Accessing functions...
-
-  /// getSymbolTable() - Return the symbol table...
-  ///
-  inline       ValueSymbolTable &getValueSymbolTable()       { return *SymTab; }
-  inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; }
-
-
-  //===--------------------------------------------------------------------===//
-  // BasicBlock iterator forwarding functions
-  //
-  iterator                begin()       { return BasicBlocks.begin(); }
-  const_iterator          begin() const { return BasicBlocks.begin(); }
-  iterator                end  ()       { return BasicBlocks.end();   }
-  const_iterator          end  () const { return BasicBlocks.end();   }
-
-  size_t                   size() const { return BasicBlocks.size();  }
-  bool                    empty() const { return BasicBlocks.empty(); }
-  const BasicBlock       &front() const { return BasicBlocks.front(); }
-        BasicBlock       &front()       { return BasicBlocks.front(); }
-  const BasicBlock        &back() const { return BasicBlocks.back();  }
-        BasicBlock        &back()       { return BasicBlocks.back();  }
-
-  //===--------------------------------------------------------------------===//
-  // Argument iterator forwarding functions
-  //
-  arg_iterator arg_begin() {
-    CheckLazyArguments();
-    return ArgumentList.begin();
-  }
-  const_arg_iterator arg_begin() const {
-    CheckLazyArguments();
-    return ArgumentList.begin();
-  }
-  arg_iterator arg_end() {
-    CheckLazyArguments();
-    return ArgumentList.end();
-  }
-  const_arg_iterator arg_end() const {
-    CheckLazyArguments();
-    return ArgumentList.end();
-  }
-
-  size_t arg_size() const;
-  bool arg_empty() const;
-
-  /// viewCFG - This function is meant for use from the debugger.  You can just
-  /// say 'call F->viewCFG()' and a ghostview window should pop up from the
-  /// program, displaying the CFG of the current function with the code for each
-  /// basic block inside.  This depends on there being a 'dot' and 'gv' program
-  /// in your path.
-  ///
-  void viewCFG() const;
-
-  /// viewCFGOnly - This function is meant for use from the debugger.  It works
-  /// just like viewCFG, but it does not include the contents of basic blocks
-  /// into the nodes, just the label.  If you are only interested in the CFG
-  /// this can make the graph smaller.
-  ///
-  void viewCFGOnly() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == Value::FunctionVal;
-  }
-
-  /// dropAllReferences() - This method causes all the subinstructions to "let
-  /// go" of all references that they are maintaining.  This allows one to
-  /// 'delete' a whole module at a time, even though there may be circular
-  /// references... first all references are dropped, and all use counts go to
-  /// zero.  Then everything is deleted for real.  Note that no operations are
-  /// valid on an object that has "dropped all references", except operator
-  /// delete.
-  ///
-  /// Since no other object in the module can have references into the body of a
-  /// function, dropping all references deletes the entire body of the function,
-  /// including any contained basic blocks.
-  ///
-  void dropAllReferences();
-
-  /// hasAddressTaken - returns true if there are any uses of this function
-  /// other than direct calls or invokes to it, or blockaddress expressions.
-  /// Optionally passes back an offending user for diagnostic purposes.
-  ///
-  bool hasAddressTaken(const User** = 0) const;
-
-  /// isDefTriviallyDead - Return true if it is trivially safe to remove
-  /// this function definition from the module (because it isn't externally
-  /// visible, does not have its address taken, and has no callers).  To make
-  /// this more accurate, call removeDeadConstantUsers first.
-  bool isDefTriviallyDead() const;
-
-  /// callsFunctionThatReturnsTwice - Return true if the function has a call to
-  /// setjmp or other function that gcc recognizes as "returning twice".
-  bool callsFunctionThatReturnsTwice() const;
-
-private:
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // subclasses cannot accidentally use it.
-  void setValueSubclassData(unsigned short D) {
-    Value::setValueSubclassData(D);
-  }
-};
-
-inline ValueSymbolTable *
-ilist_traits<BasicBlock>::getSymTab(Function *F) {
-  return F ? &F->getValueSymbolTable() : 0;
-}
-
-inline ValueSymbolTable *
-ilist_traits<Argument>::getSymTab(Function *F) {
-  return F ? &F->getValueSymbolTable() : 0;
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/GVMaterializer.h b/include/llvm/GVMaterializer.h
index c14355238867..1e5c4263d49b 100644
--- a/include/llvm/GVMaterializer.h
+++ b/include/llvm/GVMaterializer.h
@@ -15,8 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef GVMATERIALIZER_H
-#define GVMATERIALIZER_H
+#ifndef LLVM_GVMATERIALIZER_H
+#define LLVM_GVMATERIALIZER_H
 
 #include <string>
 
diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h
deleted file mode 100644
index d0f014733fce..000000000000
--- a/include/llvm/GlobalAlias.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===-------- llvm/GlobalAlias.h - GlobalAlias class ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the GlobalAlias class, which
-// represents a single function or variable alias in the IR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_GLOBAL_ALIAS_H
-#define LLVM_GLOBAL_ALIAS_H
-
-#include "llvm/GlobalValue.h"
-#include "llvm/OperandTraits.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/Twine.h"
-
-namespace llvm {
-
-class Module;
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
-
-class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
-  friend class SymbolTableListTraits<GlobalAlias, Module>;
-  void operator=(const GlobalAlias &) LLVM_DELETED_FUNCTION;
-  GlobalAlias(const GlobalAlias &) LLVM_DELETED_FUNCTION;
-
-  void setParent(Module *parent);
-
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-  /// GlobalAlias ctor - If a parent module is specified, the alias is
-  /// automatically inserted into the end of the specified module's alias list.
-  GlobalAlias(Type *Ty, LinkageTypes Linkage, const Twine &Name = "",
-              Constant* Aliasee = 0, Module *Parent = 0);
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
-  /// removeFromParent - This method unlinks 'this' from the containing module,
-  /// but does not delete it.
-  ///
-  virtual void removeFromParent();
-
-  /// eraseFromParent - This method unlinks 'this' from the containing module
-  /// and deletes it.
-  ///
-  virtual void eraseFromParent();
-
-  /// set/getAliasee - These methods retrive and set alias target.
-  void setAliasee(Constant *GV);
-  const Constant *getAliasee() const {
-    return getOperand(0);
-  }
-  Constant *getAliasee() {
-    return getOperand(0);
-  }
-  /// getAliasedGlobal() - Aliasee can be either global or bitcast of
-  /// global. This method retrives the global for both aliasee flavours.
-  const GlobalValue *getAliasedGlobal() const;
-
-  /// resolveAliasedGlobal() - This method tries to ultimately resolve the alias
-  /// by going through the aliasing chain and trying to find the very last
-  /// global. Returns NULL if a cycle was found. If stopOnWeak is false, then
-  /// the whole chain aliasing chain is traversed, otherwise - only strong
-  /// aliases.
-  const GlobalValue *resolveAliasedGlobal(bool stopOnWeak = true) const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == Value::GlobalAliasVal;
-  }
-};
-
-template <>
-struct OperandTraits<GlobalAlias> :
-  public FixedNumOperandTraits<GlobalAlias, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
deleted file mode 100644
index 7f7f74b1e2da..000000000000
--- a/include/llvm/GlobalValue.h
+++ /dev/null
@@ -1,299 +0,0 @@
-//===-- llvm/GlobalValue.h - Class to represent a global value --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a common base class of all globally definable objects.  As such,
-// it is subclassed by GlobalVariable, GlobalAlias and by Function.  This is
-// used because you can do certain things with these global objects that you
-// can't do to anything else.  For example, use the address of one as a
-// constant.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_GLOBALVALUE_H
-#define LLVM_GLOBALVALUE_H
-
-#include "llvm/Constant.h"
-
-namespace llvm {
-
-class PointerType;
-class Module;
-
-class GlobalValue : public Constant {
-  GlobalValue(const GlobalValue &) LLVM_DELETED_FUNCTION;
-public:
-  /// @brief An enumeration for the kinds of linkage for global values.
-  enum LinkageTypes {
-    ExternalLinkage = 0,///< Externally visible function
-    AvailableExternallyLinkage, ///< Available for inspection, not emission.
-    LinkOnceAnyLinkage, ///< Keep one copy of function when linking (inline)
-    LinkOnceODRLinkage, ///< Same, but only replaced by something equivalent.
-    LinkOnceODRAutoHideLinkage, ///< Like LinkOnceODRLinkage but addr not taken.
-    WeakAnyLinkage,     ///< Keep one copy of named function when linking (weak)
-    WeakODRLinkage,     ///< Same, but only replaced by something equivalent.
-    AppendingLinkage,   ///< Special purpose, only applies to global arrays
-    InternalLinkage,    ///< Rename collisions when linking (static functions).
-    PrivateLinkage,     ///< Like Internal, but omit from symbol table.
-    LinkerPrivateLinkage, ///< Like Private, but linker removes.
-    LinkerPrivateWeakLinkage, ///< Like LinkerPrivate, but weak.
-    DLLImportLinkage,   ///< Function to be imported from DLL
-    DLLExportLinkage,   ///< Function to be accessible from DLL.
-    ExternalWeakLinkage,///< ExternalWeak linkage description.
-    CommonLinkage       ///< Tentative definitions.
-  };
-
-  /// @brief An enumeration for the kinds of visibility of global values.
-  enum VisibilityTypes {
-    DefaultVisibility = 0,  ///< The GV is visible
-    HiddenVisibility,       ///< The GV is hidden
-    ProtectedVisibility     ///< The GV is protected
-  };
-
-protected:
-  GlobalValue(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps,
-              LinkageTypes linkage, const Twine &Name)
-    : Constant(ty, vty, Ops, NumOps), Linkage(linkage),
-      Visibility(DefaultVisibility), Alignment(0), UnnamedAddr(0), Parent(0) {
-    setName(Name);
-  }
-
-  // Note: VC++ treats enums as signed, so an extra bit is required to prevent
-  // Linkage and Visibility from turning into negative values.
-  LinkageTypes Linkage : 5;   // The linkage of this global
-  unsigned Visibility : 2;    // The visibility style of this global
-  unsigned Alignment : 16;    // Alignment of this symbol, must be power of two
-  unsigned UnnamedAddr : 1;   // This value's address is not significant
-  Module *Parent;             // The containing module.
-  std::string Section;        // Section to emit this into, empty mean default
-public:
-  ~GlobalValue() {
-    removeDeadConstantUsers();   // remove any dead constants using this.
-  }
-
-  unsigned getAlignment() const {
-    return (1u << Alignment) >> 1;
-  }
-  void setAlignment(unsigned Align);
-
-  bool hasUnnamedAddr() const { return UnnamedAddr; }
-  void setUnnamedAddr(bool Val) { UnnamedAddr = Val; }
-
-  VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); }
-  bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; }
-  bool hasHiddenVisibility() const { return Visibility == HiddenVisibility; }
-  bool hasProtectedVisibility() const {
-    return Visibility == ProtectedVisibility;
-  }
-  void setVisibility(VisibilityTypes V) { Visibility = V; }
-  
-  bool hasSection() const { return !Section.empty(); }
-  const std::string &getSection() const { return Section; }
-  void setSection(StringRef S) { Section = S; }
-  
-  /// If the usage is empty (except transitively dead constants), then this
-  /// global value can be safely deleted since the destructor will
-  /// delete the dead constants as well.
-  /// @brief Determine if the usage of this global value is empty except
-  /// for transitively dead constants.
-  bool use_empty_except_constants();
-
-  /// getType - Global values are always pointers.
-  inline PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(User::getType());
-  }
-
-  static LinkageTypes getLinkOnceLinkage(bool ODR) {
-    return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
-  }
-  static LinkageTypes getWeakLinkage(bool ODR) {
-    return ODR ? WeakODRLinkage : WeakAnyLinkage;
-  }
-
-  static bool isExternalLinkage(LinkageTypes Linkage) {
-    return Linkage == ExternalLinkage;
-  }
-  static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
-    return Linkage == AvailableExternallyLinkage;
-  }
-  static bool isLinkOnceLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkOnceAnyLinkage ||
-           Linkage == LinkOnceODRLinkage ||
-           Linkage == LinkOnceODRAutoHideLinkage;
-  }
-  static bool isLinkOnceODRAutoHideLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkOnceODRAutoHideLinkage;
-  }
-  static bool isWeakLinkage(LinkageTypes Linkage) {
-    return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage;
-  }
-  static bool isAppendingLinkage(LinkageTypes Linkage) {
-    return Linkage == AppendingLinkage;
-  }
-  static bool isInternalLinkage(LinkageTypes Linkage) {
-    return Linkage == InternalLinkage;
-  }
-  static bool isPrivateLinkage(LinkageTypes Linkage) {
-    return Linkage == PrivateLinkage;
-  }
-  static bool isLinkerPrivateLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkerPrivateLinkage;
-  }
-  static bool isLinkerPrivateWeakLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkerPrivateWeakLinkage;
-  }
-  static bool isLocalLinkage(LinkageTypes Linkage) {
-    return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) ||
-      isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage);
-  }
-  static bool isDLLImportLinkage(LinkageTypes Linkage) {
-    return Linkage == DLLImportLinkage;
-  }
-  static bool isDLLExportLinkage(LinkageTypes Linkage) {
-    return Linkage == DLLExportLinkage;
-  }
-  static bool isExternalWeakLinkage(LinkageTypes Linkage) {
-    return Linkage == ExternalWeakLinkage;
-  }
-  static bool isCommonLinkage(LinkageTypes Linkage) {
-    return Linkage == CommonLinkage;
-  }
-
-  /// isDiscardableIfUnused - Whether the definition of this global may be
-  /// discarded if it is not used in its compilation unit.
-  static bool isDiscardableIfUnused(LinkageTypes Linkage) {
-    return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage);
-  }
-
-  /// mayBeOverridden - Whether the definition of this global may be replaced
-  /// by something non-equivalent at link time.  For example, if a function has
-  /// weak linkage then the code defining it may be replaced by different code.
-  static bool mayBeOverridden(LinkageTypes Linkage) {
-    return Linkage == WeakAnyLinkage ||
-           Linkage == LinkOnceAnyLinkage ||
-           Linkage == CommonLinkage ||
-           Linkage == ExternalWeakLinkage ||
-           Linkage == LinkerPrivateWeakLinkage;
-  }
-
-  /// isWeakForLinker - Whether the definition of this global may be replaced at
-  /// link time.  NB: Using this method outside of the code generators is almost
-  /// always a mistake: when working at the IR level use mayBeOverridden instead
-  /// as it knows about ODR semantics.
-  static bool isWeakForLinker(LinkageTypes Linkage)  {
-    return Linkage == AvailableExternallyLinkage ||
-           Linkage == WeakAnyLinkage ||
-           Linkage == WeakODRLinkage ||
-           Linkage == LinkOnceAnyLinkage ||
-           Linkage == LinkOnceODRLinkage ||
-           Linkage == LinkOnceODRAutoHideLinkage ||
-           Linkage == CommonLinkage ||
-           Linkage == ExternalWeakLinkage ||
-           Linkage == LinkerPrivateWeakLinkage;
-  }
-
-  bool hasExternalLinkage() const { return isExternalLinkage(Linkage); }
-  bool hasAvailableExternallyLinkage() const {
-    return isAvailableExternallyLinkage(Linkage);
-  }
-  bool hasLinkOnceLinkage() const {
-    return isLinkOnceLinkage(Linkage);
-  }
-  bool hasLinkOnceODRAutoHideLinkage() const {
-    return isLinkOnceODRAutoHideLinkage(Linkage);
-  }
-  bool hasWeakLinkage() const {
-    return isWeakLinkage(Linkage);
-  }
-  bool hasAppendingLinkage() const { return isAppendingLinkage(Linkage); }
-  bool hasInternalLinkage() const { return isInternalLinkage(Linkage); }
-  bool hasPrivateLinkage() const { return isPrivateLinkage(Linkage); }
-  bool hasLinkerPrivateLinkage() const { return isLinkerPrivateLinkage(Linkage); }
-  bool hasLinkerPrivateWeakLinkage() const {
-    return isLinkerPrivateWeakLinkage(Linkage);
-  }
-  bool hasLocalLinkage() const { return isLocalLinkage(Linkage); }
-  bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); }
-  bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); }
-  bool hasExternalWeakLinkage() const { return isExternalWeakLinkage(Linkage); }
-  bool hasCommonLinkage() const { return isCommonLinkage(Linkage); }
-
-  void setLinkage(LinkageTypes LT) { Linkage = LT; }
-  LinkageTypes getLinkage() const { return Linkage; }
-
-  bool isDiscardableIfUnused() const {
-    return isDiscardableIfUnused(Linkage);
-  }
-
-  bool mayBeOverridden() const { return mayBeOverridden(Linkage); }
-
-  bool isWeakForLinker() const { return isWeakForLinker(Linkage); }
-
-  /// copyAttributesFrom - copy all additional attributes (those not needed to
-  /// create a GlobalValue) from the GlobalValue Src to this one.
-  virtual void copyAttributesFrom(const GlobalValue *Src);
-
-/// @name Materialization
-/// Materialization is used to construct functions only as they're needed. This
-/// is useful to reduce memory usage in LLVM or parsing work done by the
-/// BitcodeReader to load the Module.
-/// @{
-
-  /// isMaterializable - If this function's Module is being lazily streamed in
-  /// functions from disk or some other source, this method can be used to check
-  /// to see if the function has been read in yet or not.
-  bool isMaterializable() const;
-
-  /// isDematerializable - Returns true if this function was loaded from a
-  /// GVMaterializer that's still attached to its Module and that knows how to
-  /// dematerialize the function.
-  bool isDematerializable() const;
-
-  /// Materialize - make sure this GlobalValue is fully read.  If the module is
-  /// corrupt, this returns true and fills in the optional string with
-  /// information about the problem.  If successful, this returns false.
-  bool Materialize(std::string *ErrInfo = 0);
-
-  /// Dematerialize - If this GlobalValue is read in, and if the GVMaterializer
-  /// supports it, release the memory for the function, and set it up to be
-  /// materialized lazily.  If !isDematerializable(), this method is a noop.
-  void Dematerialize();
-
-/// @}
-
-  /// Override from Constant class.
-  virtual void destroyConstant();
-
-  /// isDeclaration - Return true if the primary definition of this global 
-  /// value is outside of the current translation unit.
-  bool isDeclaration() const;
-
-  /// removeFromParent - This method unlinks 'this' from the containing module,
-  /// but does not delete it.
-  virtual void removeFromParent() = 0;
-
-  /// eraseFromParent - This method unlinks 'this' from the containing module
-  /// and deletes it.
-  virtual void eraseFromParent() = 0;
-
-  /// getParent - Get the module that this global value is contained inside
-  /// of...
-  inline Module *getParent() { return Parent; }
-  inline const Module *getParent() const { return Parent; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == Value::FunctionVal ||
-           V->getValueID() == Value::GlobalVariableVal ||
-           V->getValueID() == Value::GlobalAliasVal;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
deleted file mode 100644
index b9d3f68642f4..000000000000
--- a/include/llvm/GlobalVariable.h
+++ /dev/null
@@ -1,191 +0,0 @@
-//===-- llvm/GlobalVariable.h - GlobalVariable class ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the GlobalVariable class, which
-// represents a single global variable (or constant) in the VM.
-//
-// Global variables are constant pointers that refer to hunks of space that are
-// allocated by either the VM, or by the linker in a static compiler.  A global
-// variable may have an initial value, which is copied into the executables .data
-// area.  Global Constants are required to have initializers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_GLOBAL_VARIABLE_H
-#define LLVM_GLOBAL_VARIABLE_H
-
-#include "llvm/GlobalValue.h"
-#include "llvm/OperandTraits.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/Twine.h"
-
-namespace llvm {
-
-class Module;
-class Constant;
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
-
-class GlobalVariable : public GlobalValue, public ilist_node<GlobalVariable> {
-  friend class SymbolTableListTraits<GlobalVariable, Module>;
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  void operator=(const GlobalVariable &) LLVM_DELETED_FUNCTION;
-  GlobalVariable(const GlobalVariable &) LLVM_DELETED_FUNCTION;
-
-  void setParent(Module *parent);
-
-  bool isConstantGlobal : 1;           // Is this a global constant?
-  unsigned threadLocalMode : 3;        // Is this symbol "Thread Local",
-                                       // if so, what is the desired model?
-
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-
-  enum ThreadLocalMode {
-    NotThreadLocal = 0,
-    GeneralDynamicTLSModel,
-    LocalDynamicTLSModel,
-    InitialExecTLSModel,
-    LocalExecTLSModel
-  };
-
-  /// GlobalVariable ctor - If a parent module is specified, the global is
-  /// automatically inserted into the end of the specified modules global list.
-  GlobalVariable(Type *Ty, bool isConstant, LinkageTypes Linkage,
-                 Constant *Initializer = 0, const Twine &Name = "",
-                 ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0);
-  /// GlobalVariable ctor - This creates a global and inserts it before the
-  /// specified other global.
-  GlobalVariable(Module &M, Type *Ty, bool isConstant,
-                 LinkageTypes Linkage, Constant *Initializer,
-                 const Twine &Name = "",
-                 GlobalVariable *InsertBefore = 0,
-                 ThreadLocalMode = NotThreadLocal,
-                 unsigned AddressSpace = 0);
-
-  ~GlobalVariable() {
-    NumOperands = 1; // FIXME: needed by operator delete
-  }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// hasInitializer - Unless a global variable isExternal(), it has an
-  /// initializer.  The initializer for the global variable/constant is held by
-  /// Initializer if an initializer is specified.
-  ///
-  inline bool hasInitializer() const { return !isDeclaration(); }
-
-  /// hasDefinitiveInitializer - Whether the global variable has an initializer,
-  /// and any other instances of the global (this can happen due to weak
-  /// linkage) are guaranteed to have the same initializer.
-  ///
-  /// Note that if you want to transform a global, you must use
-  /// hasUniqueInitializer() instead, because of the *_odr linkage type.
-  ///
-  /// Example:
-  ///
-  /// @a = global SomeType* null - Initializer is both definitive and unique.
-  ///
-  /// @b = global weak SomeType* null - Initializer is neither definitive nor
-  /// unique.
-  ///
-  /// @c = global weak_odr SomeType* null - Initializer is definitive, but not
-  /// unique.
-  inline bool hasDefinitiveInitializer() const {
-    return hasInitializer() &&
-      // The initializer of a global variable with weak linkage may change at
-      // link time.
-      !mayBeOverridden();
-  }
-
-  /// hasUniqueInitializer - Whether the global variable has an initializer, and
-  /// any changes made to the initializer will turn up in the final executable.
-  inline bool hasUniqueInitializer() const {
-    return hasInitializer() &&
-      // It's not safe to modify initializers of global variables with weak
-      // linkage, because the linker might choose to discard the initializer and
-      // use the initializer from another instance of the global variable
-      // instead. It is wrong to modify the initializer of a global variable
-      // with *_odr linkage because then different instances of the global may
-      // have different initializers, breaking the One Definition Rule.
-      !isWeakForLinker();
-  }
-
-  /// getInitializer - Return the initializer for this global variable.  It is
-  /// illegal to call this method if the global is external, because we cannot
-  /// tell what the value is initialized to!
-  ///
-  inline const Constant *getInitializer() const {
-    assert(hasInitializer() && "GV doesn't have initializer!");
-    return static_cast<Constant*>(Op<0>().get());
-  }
-  inline Constant *getInitializer() {
-    assert(hasInitializer() && "GV doesn't have initializer!");
-    return static_cast<Constant*>(Op<0>().get());
-  }
-  /// setInitializer - Sets the initializer for this global variable, removing
-  /// any existing initializer if InitVal==NULL.  If this GV has type T*, the
-  /// initializer must have type T.
-  void setInitializer(Constant *InitVal);
-
-  /// If the value is a global constant, its value is immutable throughout the
-  /// runtime execution of the program.  Assigning a value into the constant
-  /// leads to undefined behavior.
-  ///
-  bool isConstant() const { return isConstantGlobal; }
-  void setConstant(bool Val) { isConstantGlobal = Val; }
-
-  /// If the value is "Thread Local", its value isn't shared by the threads.
-  bool isThreadLocal() const { return threadLocalMode != NotThreadLocal; }
-  void setThreadLocal(bool Val) {
-    threadLocalMode = Val ? GeneralDynamicTLSModel : NotThreadLocal;
-  }
-  void setThreadLocalMode(ThreadLocalMode Val) { threadLocalMode = Val; }
-  ThreadLocalMode getThreadLocalMode() const {
-    return static_cast<ThreadLocalMode>(threadLocalMode);
-  }
-
-  /// copyAttributesFrom - copy all additional attributes (those not needed to
-  /// create a GlobalVariable) from the GlobalVariable Src to this one.
-  void copyAttributesFrom(const GlobalValue *Src);
-
-  /// removeFromParent - This method unlinks 'this' from the containing module,
-  /// but does not delete it.
-  ///
-  virtual void removeFromParent();
-
-  /// eraseFromParent - This method unlinks 'this' from the containing module
-  /// and deletes it.
-  ///
-  virtual void eraseFromParent();
-
-  /// Override Constant's implementation of this method so we can
-  /// replace constant initializers.
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == Value::GlobalVariableVal;
-  }
-};
-
-template <>
-struct OperandTraits<GlobalVariable> :
-  public OptionalOperandTraits<GlobalVariable> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
new file mode 100644
index 000000000000..ef4e4fc7aa68
--- /dev/null
+++ b/include/llvm/IR/Argument.h
@@ -0,0 +1,96 @@
+//===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Argument class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_ARGUMENT_H
+#define LLVM_IR_ARGUMENT_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+/// \brief LLVM Argument representation
+///
+/// This class represents an incoming formal argument to a Function. A formal
+/// argument, since it is ``formal'', does not contain an actual value but
+/// instead represents the type, argument number, and attributes of an argument
+/// for a specific function. When used in the body of said function, the
+/// argument of course represents the value of the actual argument that the
+/// function was called with.
+class Argument : public Value, public ilist_node<Argument> {
+  virtual void anchor();
+  Function *Parent;
+
+  friend class SymbolTableListTraits<Argument, Function>;
+  void setParent(Function *parent);
+
+public:
+  /// \brief Constructor.
+  ///
+  /// If \p F is specified, the argument is inserted at the end of the argument
+  /// list for \p F.
+  explicit Argument(Type *Ty, const Twine &Name = "", Function *F = 0);
+
+  inline const Function *getParent() const { return Parent; }
+  inline       Function *getParent()       { return Parent; }
+
+  /// \brief Return the index of this formal argument in its containing
+  /// function.
+  ///
+  /// For example in "void foo(int a, float b)" a is 0 and b is 1.
+  unsigned getArgNo() const;
+
+  /// \brief Return true if this argument has the byval attribute on it in its
+  /// containing function.
+  bool hasByValAttr() const;
+
+  /// \brief If this is a byval argument, return its alignment.
+  unsigned getParamAlignment() const;
+
+  /// \brief Return true if this argument has the nest attribute on it in its
+  /// containing function.
+  bool hasNestAttr() const;
+
+  /// \brief Return true if this argument has the noalias attribute on it in its
+  /// containing function.
+  bool hasNoAliasAttr() const;
+
+  /// \brief Return true if this argument has the nocapture attribute on it in
+  /// its containing function.
+  bool hasNoCaptureAttr() const;
+
+  /// \brief Return true if this argument has the sret attribute on it in its
+  /// containing function.
+  bool hasStructRetAttr() const;
+
+  /// \brief Add a Attribute to an argument.
+  void addAttr(AttributeSet AS);
+
+  /// \brief Remove a Attribute from an argument.
+  void removeAttr(AttributeSet AS);
+
+  /// \brief Method for support type inquiry through isa, cast, and
+  /// dyn_cast.
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == ArgumentVal;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
new file mode 100644
index 000000000000..074b38779ae8
--- /dev/null
+++ b/include/llvm/IR/Attributes.h
@@ -0,0 +1,499 @@
+//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the simple types necessary to represent the
+/// attributes associated with functions and their calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_ATTRIBUTES_H
+#define LLVM_IR_ATTRIBUTES_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <bitset>
+#include <cassert>
+#include <map>
+#include <string>
+
+namespace llvm {
+
+class AttrBuilder;
+class AttributeImpl;
+class AttributeSetImpl;
+class AttributeSetNode;
+class Constant;
+template<typename T> struct DenseMapInfo;
+class LLVMContext;
+class Type;
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief Functions, function parameters, and return types can have attributes
+/// to indicate how they should be treated by optimizations and code
+/// generation. This class represents one of those attributes. It's light-weight
+/// and should be passed around by-value.
+class Attribute {
+public:
+  /// This enumeration lists the attributes that can be associated with
+  /// parameters, function results, or the function itself.
+  ///
+  /// Note: The `uwtable' attribute is about the ABI or the user mandating an
+  /// entry in the unwind table. The `nounwind' attribute is about an exception
+  /// passing by the function.
+  ///
+  /// In a theoretical system that uses tables for profiling and SjLj for
+  /// exceptions, they would be fully independent. In a normal system that uses
+  /// tables for both, the semantics are:
+  ///
+  /// nil                = Needs an entry because an exception might pass by.
+  /// nounwind           = No need for an entry
+  /// uwtable            = Needs an entry because the ABI says so and because
+  ///                      an exception might pass by.
+  /// uwtable + nounwind = Needs an entry because the ABI says so.
+
+  enum AttrKind {
+    // IR-Level Attributes
+    None,                  ///< No attributes have been set
+    Alignment,             ///< Alignment of parameter (5 bits)
+                           ///< stored as log2 of alignment with +1 bias
+                           ///< 0 means unaligned (different from align(1))
+    AlwaysInline,          ///< inline=always
+    ByVal,                 ///< Pass structure by value
+    InlineHint,            ///< Source said inlining was desirable
+    InReg,                 ///< Force argument to be passed in register
+    MinSize,               ///< Function must be optimized for size first
+    Naked,                 ///< Naked function
+    Nest,                  ///< Nested function static chain
+    NoAlias,               ///< Considered to not alias after call
+    NoBuiltin,             ///< Callee isn't recognized as a builtin
+    NoCapture,             ///< Function creates no aliases of pointer
+    NoDuplicate,           ///< Call cannot be duplicated
+    NoImplicitFloat,       ///< Disable implicit floating point insts
+    NoInline,              ///< inline=never
+    NonLazyBind,           ///< Function is called early and/or
+                           ///< often, so lazy binding isn't worthwhile
+    NoRedZone,             ///< Disable redzone
+    NoReturn,              ///< Mark the function as not returning
+    NoUnwind,              ///< Function doesn't unwind stack
+    OptimizeForSize,       ///< opt_size
+    ReadNone,              ///< Function does not access memory
+    ReadOnly,              ///< Function only reads from memory
+    ReturnsTwice,          ///< Function can return twice
+    SExt,                  ///< Sign extended before/after call
+    StackAlignment,        ///< Alignment of stack for function (3 bits)
+                           ///< stored as log2 of alignment with +1 bias 0
+                           ///< means unaligned (different from
+                           ///< alignstack=(1))
+    StackProtect,          ///< Stack protection.
+    StackProtectReq,       ///< Stack protection required.
+    StackProtectStrong,    ///< Strong Stack protection.
+    StructRet,             ///< Hidden pointer to structure to return
+    SanitizeAddress,       ///< AddressSanitizer is on.
+    SanitizeThread,        ///< ThreadSanitizer is on.
+    SanitizeMemory,        ///< MemorySanitizer is on.
+    UWTable,               ///< Function must be in a unwind table
+    ZExt,                  ///< Zero extended before/after call
+
+    EndAttrKinds           ///< Sentinal value useful for loops
+  };
+private:
+  AttributeImpl *pImpl;
+  Attribute(AttributeImpl *A) : pImpl(A) {}
+public:
+  Attribute() : pImpl(0) {}
+
+  //===--------------------------------------------------------------------===//
+  // Attribute Construction
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Return a uniquified Attribute object.
+  static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
+  static Attribute get(LLVMContext &Context, StringRef Kind,
+                       StringRef Val = StringRef());
+
+  /// \brief Return a uniquified Attribute object that has the specific
+  /// alignment set.
+  static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align);
+  static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align);
+
+  //===--------------------------------------------------------------------===//
+  // Attribute Accessors
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Return true if the attribute is an Attribute::AttrKind type.
+  bool isEnumAttribute() const;
+
+  /// \brief Return true if the attribute is an alignment attribute.
+  bool isAlignAttribute() const;
+
+  /// \brief Return true if the attribute is a string (target-dependent)
+  /// attribute.
+  bool isStringAttribute() const;
+
+  /// \brief Return true if the attribute is present.
+  bool hasAttribute(AttrKind Val) const;
+
+  /// \brief Return true if the target-dependent attribute is present.
+  bool hasAttribute(StringRef Val) const;
+
+  /// \brief Return the attribute's kind as an enum (Attribute::AttrKind). This
+  /// requires the attribute to be an enum or alignment attribute.
+  Attribute::AttrKind getKindAsEnum() const;
+
+  /// \brief Return the attribute's value as an integer. This requires that the
+  /// attribute be an alignment attribute.
+  uint64_t getValueAsInt() const;
+
+  /// \brief Return the attribute's kind as a string. This requires the
+  /// attribute to be a string attribute.
+  StringRef getKindAsString() const;
+
+  /// \brief Return the attribute's value as a string. This requires the
+  /// attribute to be a string attribute.
+  StringRef getValueAsString() const;
+
+  /// \brief Returns the alignment field of an attribute as a byte alignment
+  /// value.
+  unsigned getAlignment() const;
+
+  /// \brief Returns the stack alignment field of an attribute as a byte
+  /// alignment value.
+  unsigned getStackAlignment() const;
+
+  /// \brief The Attribute is converted to a string of equivalent mnemonic. This
+  /// is, presumably, for writing out the mnemonics for the assembly writer.
+  std::string getAsString(bool InAttrGrp = false) const;
+
+  /// \brief Equality and non-equality operators.
+  bool operator==(Attribute A) const { return pImpl == A.pImpl; }
+  bool operator!=(Attribute A) const { return pImpl != A.pImpl; }
+
+  /// \brief Less-than operator. Useful for sorting the attributes list.
+  bool operator<(Attribute A) const;
+
+  void Profile(FoldingSetNodeID &ID) const {
+    ID.AddPointer(pImpl);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class holds the attributes for a function, its return value, and
+/// its parameters. You access the attributes for each of them via an index into
+/// the AttributeSet object. The function attributes are at index
+/// `AttributeSet::FunctionIndex', the return value is at index
+/// `AttributeSet::ReturnIndex', and the attributes for the parameters start at
+/// index `1'.
+class AttributeSet {
+public:
+  enum AttrIndex {
+    ReturnIndex = 0U,
+    FunctionIndex = ~0U
+  };
+private:
+  friend class AttrBuilder;
+  friend class AttributeSetImpl;
+  template <typename Ty> friend struct DenseMapInfo;
+
+  /// \brief The attributes that we are managing. This can be null to represent
+  /// the empty attributes list.
+  AttributeSetImpl *pImpl;
+
+  /// \brief The attributes for the specified index are returned.
+  AttributeSetNode *getAttributes(unsigned Idx) const;
+
+  /// \brief Create an AttributeSet with the specified parameters in it.
+  static AttributeSet get(LLVMContext &C,
+                          ArrayRef<std::pair<unsigned, Attribute> > Attrs);
+  static AttributeSet get(LLVMContext &C,
+                          ArrayRef<std::pair<unsigned,
+                                             AttributeSetNode*> > Attrs);
+
+  static AttributeSet getImpl(LLVMContext &C,
+                              ArrayRef<std::pair<unsigned,
+                                                 AttributeSetNode*> > Attrs);
+
+
+  explicit AttributeSet(AttributeSetImpl *LI) : pImpl(LI) {}
+public:
+  AttributeSet() : pImpl(0) {}
+
+  //===--------------------------------------------------------------------===//
+  // AttributeSet Construction and Mutation
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Return an AttributeSet with the specified parameters in it.
+  static AttributeSet get(LLVMContext &C, ArrayRef<AttributeSet> Attrs);
+  static AttributeSet get(LLVMContext &C, unsigned Idx,
+                          ArrayRef<Attribute::AttrKind> Kind);
+  static AttributeSet get(LLVMContext &C, unsigned Idx, AttrBuilder &B);
+
+  /// \brief Add an attribute to the attribute set at the given index. Since
+  /// attribute sets are immutable, this returns a new set.
+  AttributeSet addAttribute(LLVMContext &C, unsigned Idx,
+                            Attribute::AttrKind Attr) const;
+
+  /// \brief Add an attribute to the attribute set at the given index. Since
+  /// attribute sets are immutable, this returns a new set.
+  AttributeSet addAttribute(LLVMContext &C, unsigned Idx,
+                            StringRef Kind) const;
+
+  /// \brief Add attributes to the attribute set at the given index. Since
+  /// attribute sets are immutable, this returns a new set.
+  AttributeSet addAttributes(LLVMContext &C, unsigned Idx,
+                             AttributeSet Attrs) const;
+
+  /// \brief Remove the specified attribute at the specified index from this
+  /// attribute list. Since attribute lists are immutable, this returns the new
+  /// list.
+  AttributeSet removeAttribute(LLVMContext &C, unsigned Idx, 
+                               Attribute::AttrKind Attr) const;
+
+  /// \brief Remove the specified attributes at the specified index from this
+  /// attribute list. Since attribute lists are immutable, this returns the new
+  /// list.
+  AttributeSet removeAttributes(LLVMContext &C, unsigned Idx, 
+                                AttributeSet Attrs) const;
+
+  //===--------------------------------------------------------------------===//
+  // AttributeSet Accessors
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Retrieve the LLVM context.
+  LLVMContext &getContext() const;
+
+  /// \brief The attributes for the specified index are returned.
+  AttributeSet getParamAttributes(unsigned Idx) const;
+
+  /// \brief The attributes for the ret value are returned.
+  AttributeSet getRetAttributes() const;
+
+  /// \brief The function attributes are returned.
+  AttributeSet getFnAttributes() const;
+
+  /// \brief Return true if the attribute exists at the given index.
+  bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+
+  /// \brief Return true if the attribute exists at the given index.
+  bool hasAttribute(unsigned Index, StringRef Kind) const;
+
+  /// \brief Return true if attribute exists at the given index.
+  bool hasAttributes(unsigned Index) const;
+
+  /// \brief Return true if the specified attribute is set for at least one
+  /// parameter or for the return value.
+  bool hasAttrSomewhere(Attribute::AttrKind Attr) const;
+
+  /// \brief Return the attribute object that exists at the given index.
+  Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+
+  /// \brief Return the attribute object that exists at the given index.
+  Attribute getAttribute(unsigned Index, StringRef Kind) const;
+
+  /// \brief Return the alignment for the specified function parameter.
+  unsigned getParamAlignment(unsigned Idx) const;
+
+  /// \brief Get the stack alignment.
+  unsigned getStackAlignment(unsigned Index) const;
+
+  /// \brief Return the attributes at the index as a string.
+  std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
+
+  typedef ArrayRef<Attribute>::iterator iterator;
+
+  iterator begin(unsigned Idx) const;
+  iterator end(unsigned Idx) const;
+
+  /// operator==/!= - Provide equality predicates.
+  bool operator==(const AttributeSet &RHS) const {
+    return pImpl == RHS.pImpl;
+  }
+  bool operator!=(const AttributeSet &RHS) const {
+    return pImpl != RHS.pImpl;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // AttributeSet Introspection
+  //===--------------------------------------------------------------------===//
+
+  // FIXME: Remove this.
+  uint64_t Raw(unsigned Index) const;
+
+  /// \brief Return a raw pointer that uniquely identifies this attribute list.
+  void *getRawPointer() const {
+    return pImpl;
+  }
+
+  /// \brief Return true if there are no attributes.
+  bool isEmpty() const {
+    return getNumSlots() == 0;
+  }
+
+  /// \brief Return the number of slots used in this attribute list.  This is
+  /// the number of arguments that have an attribute set on them (including the
+  /// function itself).
+  unsigned getNumSlots() const;
+
+  /// \brief Return the index for the given slot.
+  uint64_t getSlotIndex(unsigned Slot) const;
+
+  /// \brief Return the attributes at the given slot.
+  AttributeSet getSlotAttributes(unsigned Slot) const;
+
+  void dump() const;
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief Provide DenseMapInfo for AttributeSet.
+template<> struct DenseMapInfo<AttributeSet> {
+  static inline AttributeSet getEmptyKey() {
+    uintptr_t Val = static_cast<uintptr_t>(-1);
+    Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
+    return AttributeSet(reinterpret_cast<AttributeSetImpl*>(Val));
+  }
+  static inline AttributeSet getTombstoneKey() {
+    uintptr_t Val = static_cast<uintptr_t>(-2);
+    Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
+    return AttributeSet(reinterpret_cast<AttributeSetImpl*>(Val));
+  }
+  static unsigned getHashValue(AttributeSet AS) {
+    return (unsigned((uintptr_t)AS.pImpl) >> 4) ^
+           (unsigned((uintptr_t)AS.pImpl) >> 9);
+  }
+  static bool isEqual(AttributeSet LHS, AttributeSet RHS) { return LHS == RHS; }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class is used in conjunction with the Attribute::get method to
+/// create an Attribute object. The object itself is uniquified. The Builder's
+/// value, however, is not. So this can be used as a quick way to test for
+/// equality, presence of attributes, etc.
+class AttrBuilder {
+  std::bitset<Attribute::EndAttrKinds> Attrs;
+  std::map<std::string, std::string> TargetDepAttrs;
+  uint64_t Alignment;
+  uint64_t StackAlignment;
+public:
+  AttrBuilder() : Attrs(0), Alignment(0), StackAlignment(0) {}
+  explicit AttrBuilder(uint64_t Val)
+    : Attrs(0), Alignment(0), StackAlignment(0) {
+    addRawValue(Val);
+  }
+  AttrBuilder(const Attribute &A) : Attrs(0), Alignment(0), StackAlignment(0) {
+    addAttribute(A);
+  }
+  AttrBuilder(AttributeSet AS, unsigned Idx);
+  AttrBuilder(const AttrBuilder &B)
+    : Attrs(B.Attrs),
+      TargetDepAttrs(B.TargetDepAttrs.begin(), B.TargetDepAttrs.end()),
+      Alignment(B.Alignment), StackAlignment(B.StackAlignment) {}
+
+  void clear();
+
+  /// \brief Add an attribute to the builder.
+  AttrBuilder &addAttribute(Attribute::AttrKind Val);
+
+  /// \brief Add the Attribute object to the builder.
+  AttrBuilder &addAttribute(Attribute A);
+
+  /// \brief Add the target-dependent attribute to the builder.
+  AttrBuilder &addAttribute(StringRef A, StringRef V = StringRef());
+
+  /// \brief Remove an attribute from the builder.
+  AttrBuilder &removeAttribute(Attribute::AttrKind Val);
+
+  /// \brief Remove the attributes from the builder.
+  AttrBuilder &removeAttributes(AttributeSet A, uint64_t Index);
+
+  /// \brief Remove the target-dependent attribute to the builder.
+  AttrBuilder &removeAttribute(StringRef A);
+
+  /// \brief Add the attributes from the builder.
+  AttrBuilder &merge(const AttrBuilder &B);
+
+  /// \brief Return true if the builder has the specified attribute.
+  bool contains(Attribute::AttrKind A) const {
+    assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!");
+    return Attrs[A];
+  }
+
+  /// \brief Return true if the builder has the specified target-dependent
+  /// attribute.
+  bool contains(StringRef A) const;
+
+  /// \brief Return true if the builder has IR-level attributes.
+  bool hasAttributes() const;
+
+  /// \brief Return true if the builder has any attribute that's in the
+  /// specified attribute.
+  bool hasAttributes(AttributeSet A, uint64_t Index) const;
+
+  /// \brief Return true if the builder has an alignment attribute.
+  bool hasAlignmentAttr() const;
+
+  /// \brief Retrieve the alignment attribute, if it exists.
+  uint64_t getAlignment() const { return Alignment; }
+
+  /// \brief Retrieve the stack alignment attribute, if it exists.
+  uint64_t getStackAlignment() const { return StackAlignment; }
+
+  /// \brief This turns an int alignment (which must be a power of 2) into the
+  /// form used internally in Attribute.
+  AttrBuilder &addAlignmentAttr(unsigned Align);
+
+  /// \brief This turns an int stack alignment (which must be a power of 2) into
+  /// the form used internally in Attribute.
+  AttrBuilder &addStackAlignmentAttr(unsigned Align);
+
+  /// \brief Return true if the builder contains no target-independent
+  /// attributes.
+  bool empty() const { return Attrs.none(); }
+
+  // Iterators for target-dependent attributes.
+  typedef std::pair<std::string, std::string>                td_type;
+  typedef std::map<std::string, std::string>::iterator       td_iterator;
+  typedef std::map<std::string, std::string>::const_iterator td_const_iterator;
+
+  td_iterator td_begin()             { return TargetDepAttrs.begin(); }
+  td_iterator td_end()               { return TargetDepAttrs.end(); }
+
+  td_const_iterator td_begin() const { return TargetDepAttrs.begin(); }
+  td_const_iterator td_end() const   { return TargetDepAttrs.end(); }
+
+  bool td_empty() const              { return TargetDepAttrs.empty(); }
+
+  /// \brief Remove attributes that are used on functions only.
+  void removeFunctionOnlyAttrs();
+
+  bool operator==(const AttrBuilder &B);
+  bool operator!=(const AttrBuilder &B) {
+    return !(*this == B);
+  }
+
+  // FIXME: Remove this in 4.0.
+
+  /// \brief Add the raw value to the internal representation.
+  AttrBuilder &addRawValue(uint64_t Val);
+};
+
+namespace AttributeFuncs {
+
+/// \brief Which attributes cannot be applied to a type.
+AttributeSet typeIncompatible(Type *Ty, uint64_t Index);
+
+} // end AttributeFuncs namespace
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
new file mode 100644
index 000000000000..ea5695a9e640
--- /dev/null
+++ b/include/llvm/IR/BasicBlock.h
@@ -0,0 +1,303 @@
+//===-- llvm/BasicBlock.h - Represent a basic block in the VM ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BasicBlock class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_BASICBLOCK_H
+#define LLVM_IR_BASICBLOCK_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class LandingPadInst;
+class TerminatorInst;
+class LLVMContext;
+class BlockAddress;
+
+template<> struct ilist_traits<Instruction>
+  : public SymbolTableListTraits<Instruction, BasicBlock> {
+
+  /// \brief Return a node that marks the end of a list.
+  ///
+  /// The sentinel is relative to this instance, so we use a non-static
+  /// method.
+  Instruction *createSentinel() const {
+    // Since i(p)lists always publicly derive from their corresponding traits,
+    // placing a data member in this class will augment the i(p)list.  But since
+    // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
+    // there is a legal viable downcast from it to NodeTy. We use this trick to
+    // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
+    // sentinel. Dereferencing the sentinel is forbidden (save the
+    // ilist_node<NodeTy>), so no one will ever notice the superposition.
+    return static_cast<Instruction*>(&Sentinel);
+  }
+  static void destroySentinel(Instruction*) {}
+
+  Instruction *provideInitialHead() const { return createSentinel(); }
+  Instruction *ensureHead(Instruction*) const { return createSentinel(); }
+  static void noteHead(Instruction*, Instruction*) {}
+private:
+  mutable ilist_half_node<Instruction> Sentinel;
+};
+
+/// \brief LLVM Basic Block Representation
+///
+/// This represents a single basic block in LLVM. A basic block is simply a
+/// container of instructions that execute sequentially. Basic blocks are Values
+/// because they are referenced by instructions such as branches and switch
+/// tables. The type of a BasicBlock is "Type::LabelTy" because the basic block
+/// represents a label to which a branch can jump.
+///
+/// A well formed basic block is formed of a list of non-terminating
+/// instructions followed by a single TerminatorInst instruction.
+/// TerminatorInst's may not occur in the middle of basic blocks, and must
+/// terminate the blocks. The BasicBlock class allows malformed basic blocks to
+/// occur because it may be useful in the intermediate stage of constructing or
+/// modifying a program. However, the verifier will ensure that basic blocks
+/// are "well formed".
+class BasicBlock : public Value, // Basic blocks are data objects also
+                   public ilist_node<BasicBlock> {
+  friend class BlockAddress;
+public:
+  typedef iplist<Instruction> InstListType;
+private:
+  InstListType InstList;
+  Function *Parent;
+
+  void setParent(Function *parent);
+  friend class SymbolTableListTraits<BasicBlock, Function>;
+
+  BasicBlock(const BasicBlock &) LLVM_DELETED_FUNCTION;
+  void operator=(const BasicBlock &) LLVM_DELETED_FUNCTION;
+
+  /// \brief Constructor.
+  ///
+  /// If the function parameter is specified, the basic block is automatically
+  /// inserted at either the end of the function (if InsertBefore is null), or
+  /// before the specified basic block.
+  explicit BasicBlock(LLVMContext &C, const Twine &Name = "",
+                      Function *Parent = 0, BasicBlock *InsertBefore = 0);
+public:
+  /// \brief Get the context in which this basic block lives.
+  LLVMContext &getContext() const;
+
+  /// Instruction iterators...
+  typedef InstListType::iterator iterator;
+  typedef InstListType::const_iterator const_iterator;
+  typedef InstListType::reverse_iterator reverse_iterator;
+  typedef InstListType::const_reverse_iterator const_reverse_iterator;
+
+  /// \brief Creates a new BasicBlock.
+  ///
+  /// If the Parent parameter is specified, the basic block is automatically
+  /// inserted at either the end of the function (if InsertBefore is 0), or
+  /// before the specified basic block.
+  static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "",
+                            Function *Parent = 0,BasicBlock *InsertBefore = 0) {
+    return new BasicBlock(Context, Name, Parent, InsertBefore);
+  }
+  ~BasicBlock();
+
+  /// \brief Return the enclosing method, or null if none.
+  const Function *getParent() const { return Parent; }
+        Function *getParent()       { return Parent; }
+
+  /// \brief Returns the terminator instruction if the block is well formed or
+  /// null if the block is not well formed.
+  TerminatorInst *getTerminator();
+  const TerminatorInst *getTerminator() const;
+
+  /// \brief Returns a pointer to the first instruction in this block that is
+  /// not a PHINode instruction.
+  ///
+  /// When adding instructions to the beginning of the basic block, they should
+  /// be added before the returned value, not before the first instruction,
+  /// which might be PHI. Returns 0 is there's no non-PHI instruction.
+  Instruction* getFirstNonPHI();
+  const Instruction* getFirstNonPHI() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHI();
+  }
+
+  /// \brief Returns a pointer to the first instruction in this block that is not
+  /// a PHINode or a debug intrinsic.
+  Instruction* getFirstNonPHIOrDbg();
+  const Instruction* getFirstNonPHIOrDbg() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbg();
+  }
+
+  /// \brief Returns a pointer to the first instruction in this block that is not
+  /// a PHINode, a debug intrinsic, or a lifetime intrinsic.
+  Instruction* getFirstNonPHIOrDbgOrLifetime();
+  const Instruction* getFirstNonPHIOrDbgOrLifetime() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbgOrLifetime();
+  }
+
+  /// \brief Returns an iterator to the first instruction in this block that is
+  /// suitable for inserting a non-PHI instruction.
+  ///
+  /// In particular, it skips all PHIs and LandingPad instructions.
+  iterator getFirstInsertionPt();
+  const_iterator getFirstInsertionPt() const {
+    return const_cast<BasicBlock*>(this)->getFirstInsertionPt();
+  }
+
+  /// \brief Unlink 'this' from the containing function, but do not delete it.
+  void removeFromParent();
+
+  /// \brief Unlink 'this' from the containing function and delete it.
+  void eraseFromParent();
+
+  /// \brief Unlink this basic block from its current function and insert it
+  /// into the function that \p MovePos lives in, right before \p MovePos.
+  void moveBefore(BasicBlock *MovePos);
+
+  /// \brief Unlink this basic block from its current function and insert it
+  /// right after \p MovePos in the function \p MovePos lives in.
+  void moveAfter(BasicBlock *MovePos);
+
+
+  /// \brief Return this block if it has a single predecessor block. Otherwise
+  /// return a null pointer.
+  BasicBlock *getSinglePredecessor();
+  const BasicBlock *getSinglePredecessor() const {
+    return const_cast<BasicBlock*>(this)->getSinglePredecessor();
+  }
+
+  /// \brief Return this block if it has a unique predecessor block. Otherwise return a null pointer.
+  ///
+  /// Note that unique predecessor doesn't mean single edge, there can be
+  /// multiple edges from the unique predecessor to this block (for example a
+  /// switch statement with multiple cases having the same destination).
+  BasicBlock *getUniquePredecessor();
+  const BasicBlock *getUniquePredecessor() const {
+    return const_cast<BasicBlock*>(this)->getUniquePredecessor();
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// Instruction iterator methods
+  ///
+  inline iterator                begin()       { return InstList.begin(); }
+  inline const_iterator          begin() const { return InstList.begin(); }
+  inline iterator                end  ()       { return InstList.end();   }
+  inline const_iterator          end  () const { return InstList.end();   }
+
+  inline reverse_iterator        rbegin()       { return InstList.rbegin(); }
+  inline const_reverse_iterator  rbegin() const { return InstList.rbegin(); }
+  inline reverse_iterator        rend  ()       { return InstList.rend();   }
+  inline const_reverse_iterator  rend  () const { return InstList.rend();   }
+
+  inline size_t                   size() const { return InstList.size();  }
+  inline bool                    empty() const { return InstList.empty(); }
+  inline const Instruction      &front() const { return InstList.front(); }
+  inline       Instruction      &front()       { return InstList.front(); }
+  inline const Instruction       &back() const { return InstList.back();  }
+  inline       Instruction       &back()       { return InstList.back();  }
+
+  /// \brief Return the underlying instruction list container.
+  ///
+  /// Currently you need to access the underlying instruction list container
+  /// directly if you want to modify it.
+  const InstListType &getInstList() const { return InstList; }
+        InstListType &getInstList()       { return InstList; }
+
+  /// \brief Returns a pointer to a member of the instruction list.
+  static iplist<Instruction> BasicBlock::*getSublistAccess(Instruction*) {
+    return &BasicBlock::InstList;
+  }
+
+  /// \brief Returns a pointer to the symbol table if one exists.
+  ValueSymbolTable *getValueSymbolTable();
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::BasicBlockVal;
+  }
+
+  /// \brief Cause all subinstructions to "let go" of all the references that
+  /// said subinstructions are maintaining.
+  ///
+  /// This allows one to 'delete' a whole class at a time, even though there may
+  /// be circular references... first all references are dropped, and all use
+  /// counts go to zero.  Then everything is delete'd for real.  Note that no
+  /// operations are valid on an object that has "dropped all references",
+  /// except operator delete.
+  void dropAllReferences();
+
+  /// \brief Notify the BasicBlock that the predecessor \p Pred is no longer
+  /// able to reach it.
+  ///
+  /// This is actually not used to update the Predecessor list, but is actually
+  /// used to update the PHI nodes that reside in the block.  Note that this
+  /// should be called while the predecessor still refers to this block.
+  void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
+
+  /// \brief Split the basic block into two basic blocks at the specified
+  /// instruction.
+  ///
+  /// Note that all instructions BEFORE the specified iterator stay as part of
+  /// the original basic block, an unconditional branch is added to the original
+  /// BB, and the rest of the instructions in the BB are moved to the new BB,
+  /// including the old terminator.  The newly formed BasicBlock is returned.
+  /// This function invalidates the specified iterator.
+  ///
+  /// Note that this only works on well formed basic blocks (must have a
+  /// terminator), and 'I' must not be the end of instruction list (which would
+  /// cause a degenerate basic block to be formed, having a terminator inside of
+  /// the basic block).
+  ///
+  /// Also note that this doesn't preserve any passes. To split blocks while
+  /// keeping loop information consistent, use the SplitBlock utility function.
+  BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
+
+  /// \brief Returns true if there are any uses of this basic block other than
+  /// direct branches, switches, etc. to it.
+  bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
+
+  /// \brief Update all phi nodes in this basic block's successors to refer to
+  /// basic block \p New instead of to it.
+  void replaceSuccessorsPhiUsesWith(BasicBlock *New);
+
+  /// \brief Return true if this basic block is a landing pad.
+  ///
+  /// Being a ``landing pad'' means that the basic block is the destination of
+  /// the 'unwind' edge of an invoke instruction.
+  bool isLandingPad() const;
+
+  /// \brief Return the landingpad instruction associated with the landing pad.
+  LandingPadInst *getLandingPadInst();
+  const LandingPadInst *getLandingPadInst() const;
+
+private:
+  /// \brief Increment the internal refcount of the number of BlockAddresses
+  /// referencing this BasicBlock by \p Amt.
+  ///
+  /// This is almost always 0, sometimes one possibly, but almost never 2, and
+  /// inconceivably 3 or more.
+  void AdjustBlockAddressRefCount(int Amt) {
+    setValueSubclassData(getSubclassDataFromValue()+Amt);
+    assert((int)(signed char)getSubclassDataFromValue() >= 0 &&
+           "Refcount wrap-around");
+  }
+  /// \brief Shadow Value::setValueSubclassData with a private forwarding method
+  /// so that any future subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt
new file mode 100644
index 000000000000..2d52a89f9cd5
--- /dev/null
+++ b/include/llvm/IR/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
+
+tablegen(LLVM Intrinsics.gen -gen-intrinsic)
+
+add_custom_target(intrinsics_gen ALL
+  DEPENDS ${llvm_builded_incs_dir}/IR/Intrinsics.gen)
+set_target_properties(intrinsics_gen PROPERTIES FOLDER "Tablegenning")
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
new file mode 100644
index 000000000000..6f3ab2088655
--- /dev/null
+++ b/include/llvm/IR/CallingConv.h
@@ -0,0 +1,129 @@
+//===-- llvm/CallingConv.h - LLVM Calling Conventions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines LLVM's set of calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_CALLINGCONV_H
+#define LLVM_IR_CALLINGCONV_H
+
+namespace llvm {
+
+/// CallingConv Namespace - This namespace contains an enum with a value for
+/// the well-known calling conventions.
+///
+namespace CallingConv {
+  /// A set of enums which specify the assigned numeric values for known llvm
+  /// calling conventions.
+  /// @brief LLVM Calling Convention Representation
+  enum ID {
+    /// C - The default llvm calling convention, compatible with C.  This
+    /// convention is the only calling convention that supports varargs calls.
+    /// As with typical C calling conventions, the callee/caller have to
+    /// tolerate certain amounts of prototype mismatch.
+    C = 0,
+
+    // Generic LLVM calling conventions.  None of these calling conventions
+    // support varargs calls, and all assume that the caller and callee
+    // prototype exactly match.
+
+    /// Fast - This calling convention attempts to make calls as fast as
+    /// possible (e.g. by passing things in registers).
+    Fast = 8,
+
+    // Cold - This calling convention attempts to make code in the caller as
+    // efficient as possible under the assumption that the call is not commonly
+    // executed.  As such, these calls often preserve all registers so that the
+    // call does not break any live ranges in the caller side.
+    Cold = 9,
+
+    // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
+    GHC = 10,
+
+    // HiPE - Calling convention used by the High-Performance Erlang Compiler
+    // (HiPE).
+    HiPE = 11,
+
+    // Target - This is the start of the target-specific calling conventions,
+    // e.g. fastcall and thiscall on X86.
+    FirstTargetCC = 64,
+
+    /// X86_StdCall - stdcall is the calling conventions mostly used by the
+    /// Win32 API. It is basically the same as the C convention with the
+    /// difference in that the callee is responsible for popping the arguments
+    /// from the stack.
+    X86_StdCall = 64,
+
+    /// X86_FastCall - 'fast' analog of X86_StdCall. Passes first two arguments
+    /// in ECX:EDX registers, others - via stack. Callee is responsible for
+    /// stack cleaning.
+    X86_FastCall = 65,
+
+    /// ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete,
+    /// but still used on some targets).
+    ARM_APCS = 66,
+
+    /// ARM_AAPCS - ARM Architecture Procedure Calling Standard calling
+    /// convention (aka EABI). Soft float variant.
+    ARM_AAPCS = 67,
+
+    /// ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
+    ARM_AAPCS_VFP = 68,
+
+    /// MSP430_INTR - Calling convention used for MSP430 interrupt routines.
+    MSP430_INTR = 69,
+
+    /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX,
+    /// others via stack. Callee is responsible for stack cleaning. MSVC uses
+    /// this by default for methods in its ABI.
+    X86_ThisCall = 70,
+
+    /// PTX_Kernel - Call to a PTX kernel.
+    /// Passes all arguments in parameter space.
+    PTX_Kernel = 71,
+
+    /// PTX_Device - Call to a PTX device function.
+    /// Passes all arguments in register or parameter space.
+    PTX_Device = 72,
+
+    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt routines.
+    MBLAZE_INTR = 73,
+
+    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt support
+    /// routines (i.e. GCC's save_volatiles attribute).
+    MBLAZE_SVOL = 74,
+
+    /// SPIR_FUNC - Calling convention for SPIR non-kernel device functions.
+    /// No lowering or expansion of arguments.
+    /// Structures are passed as a pointer to a struct with the byval attribute.
+    /// Functions can only call SPIR_FUNC and SPIR_KERNEL functions.
+    /// Functions can only have zero or one return values.
+    /// Variable arguments are not allowed, except for printf.
+    /// How arguments/return values are lowered are not specified.
+    /// Functions are only visible to the devices.
+    SPIR_FUNC = 75,
+
+    /// SPIR_KERNEL - Calling convention for SPIR kernel functions.
+    /// Inherits the restrictions of SPIR_FUNC, except
+    /// Cannot have non-void return values.
+    /// Cannot have variable arguments.
+    /// Can also be called by the host.
+    /// Is externally visible.
+    SPIR_KERNEL = 76,
+
+    /// Intel_OCL_BI - Calling conventions for Intel OpenCL built-ins
+    Intel_OCL_BI = 77
+
+  };
+} // End CallingConv namespace
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
new file mode 100644
index 000000000000..26bad1dd1f79
--- /dev/null
+++ b/include/llvm/IR/Constant.h
@@ -0,0 +1,170 @@
+//===-- llvm/Constant.h - Constant class definition -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Constant class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_CONSTANT_H
+#define LLVM_IR_CONSTANT_H
+
+#include "llvm/IR/User.h"
+
+namespace llvm {
+  class APInt;
+
+  template<typename T> class SmallVectorImpl;
+
+/// This is an important base class in LLVM. It provides the common facilities
+/// of all constant values in an LLVM program. A constant is a value that is
+/// immutable at runtime. Functions are constants because their address is
+/// immutable. Same with global variables. 
+/// 
+/// All constants share the capabilities provided in this class. All constants
+/// can have a null value. They can have an operand list. Constants can be
+/// simple (integer and floating point values), complex (arrays and structures),
+/// or expression based (computations yielding a constant value composed of 
+/// only certain operators and other constant values).
+/// 
+/// Note that Constants are immutable (once created they never change) 
+/// and are fully shared by structural equivalence.  This means that two 
+/// structurally equivalent constants will always have the same address.  
+/// Constants are created on demand as needed and never deleted: thus clients 
+/// don't have to worry about the lifetime of the objects.
+/// @brief LLVM Constant Representation
+class Constant : public User {
+  void operator=(const Constant &) LLVM_DELETED_FUNCTION;
+  Constant(const Constant &) LLVM_DELETED_FUNCTION;
+  virtual void anchor();
+  
+protected:
+  Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
+    : User(ty, vty, Ops, NumOps) {}
+
+  void destroyConstantImpl();
+public:
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  bool isNullValue() const;
+
+  /// isAllOnesValue - Return true if this is the value that would be returned by
+  /// getAllOnesValue.
+  bool isAllOnesValue() const;
+
+  /// isNegativeZeroValue - Return true if the value is what would be returned 
+  /// by getZeroValueForNegation.
+  bool isNegativeZeroValue() const;
+
+  /// Return true if the value is negative zero or null value.
+  bool isZeroValue() const;
+
+  /// canTrap - Return true if evaluation of this constant could trap.  This is
+  /// true for things like constant expressions that could divide by zero.
+  bool canTrap() const;
+
+  /// isThreadDependent - Return true if the value can vary between threads.
+  bool isThreadDependent() const;
+
+  /// isConstantUsed - Return true if the constant has users other than constant
+  /// exprs and other dangling things.
+  bool isConstantUsed() const;
+  
+  enum PossibleRelocationsTy {
+    NoRelocation = 0,
+    LocalRelocation = 1,
+    GlobalRelocations = 2
+  };
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are:
+  /// 
+  ///  NoRelocation: This constant pool entry is guaranteed to never have a
+  ///     relocation applied to it (because it holds a simple constant like
+  ///     '4').
+  ///  LocalRelocation: This entry has relocations, but the entries are
+  ///     guaranteed to be resolvable by the static linker, so the dynamic
+  ///     linker will never see them.
+  ///  GlobalRelocations: This entry may have arbitrary relocations.
+  ///
+  /// FIXME: This really should not be in VMCore.
+  PossibleRelocationsTy getRelocationInfo() const;
+  
+  /// getAggregateElement - For aggregates (struct/array/vector) return the
+  /// constant that corresponds to the specified element if possible, or null if
+  /// not.  This can return null if the element index is a ConstantExpr, or if
+  /// 'this' is a constant expr.
+  Constant *getAggregateElement(unsigned Elt) const;
+  Constant *getAggregateElement(Constant *Elt) const;
+
+  /// getSplatValue - If this is a splat vector constant, meaning that all of
+  /// the elements have the same value, return that value. Otherwise return 0.
+  Constant *getSplatValue() const;
+
+  /// If C is a constant integer then return its value, otherwise C must be a
+  /// vector of constant integers, all equal, and the common value is returned.
+  const APInt &getUniqueInteger() const;
+
+  /// destroyConstant - Called if some element of this constant is no longer
+  /// valid.  At this point only other constants may be on the use_list for this
+  /// constant.  Any constants on our Use list must also be destroy'd.  The
+  /// implementation must be sure to remove the constant from the list of
+  /// available cached constants.  Implementations should call
+  /// destroyConstantImpl as the last thing they do, to destroy all users and
+  /// delete this.
+  virtual void destroyConstant() { llvm_unreachable("Not reached!"); }
+
+  //// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() >= ConstantFirstVal &&
+           V->getValueID() <= ConstantLastVal;
+  }
+
+  /// replaceUsesOfWithOnConstant - This method is a special form of
+  /// User::replaceUsesOfWith (which does not work on constants) that does work
+  /// on constants.  Basically this method goes through the trouble of building
+  /// a new constant that is equivalent to the current one, with all uses of
+  /// From replaced with uses of To.  After this construction is completed, all
+  /// of the users of 'this' are replaced to use the new constant, and then
+  /// 'this' is deleted.  In general, you should not call this method, instead,
+  /// use Value::replaceAllUsesWith, which automatically dispatches to this
+  /// method as needed.
+  ///
+  virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
+    // Provide a default implementation for constants (like integers) that
+    // cannot use any other values.  This cannot be called at runtime, but needs
+    // to be here to avoid link errors.
+    assert(getNumOperands() == 0 && "replaceUsesOfWithOnConstant must be "
+           "implemented for all constants that have operands!");
+    llvm_unreachable("Constants that do not have operands cannot be using "
+                     "'From'!");
+  }
+
+  static Constant *getNullValue(Type* Ty);
+
+  /// @returns the value for an integer or vector of integer constant of the
+  /// given type that has all its bits set to true.
+  /// @brief Get the all ones value
+  static Constant *getAllOnesValue(Type* Ty);
+
+  /// getIntegerValue - Return the value for an integer or pointer constant,
+  /// or a vector thereof, with the given scalar value.
+  static Constant *getIntegerValue(Type* Ty, const APInt &V);
+  
+  /// removeDeadConstantUsers - If there are any dead constant users dangling
+  /// off of this constant, remove them.  This method is useful for clients
+  /// that want to check to see if a global is unused, but don't want to deal
+  /// with potentially dead constants hanging off of the globals.
+  void removeDeadConstantUsers() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
new file mode 100644
index 000000000000..ad258f9aca4d
--- /dev/null
+++ b/include/llvm/IR/Constants.h
@@ -0,0 +1,1163 @@
+//===-- llvm/Constants.h - Constant class subclass definitions --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations for the subclasses of Constant,
+/// which represent the different flavors of constant values that live in LLVM.
+/// Note that Constants are immutable (once created they never change) and are
+/// fully shared by structural equivalence.  This means that two structurally
+/// equivalent constants will always have the same address.  Constant's are
+/// created on demand as needed and never deleted: thus clients don't have to
+/// worry about the lifetime of the objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_CONSTANTS_H
+#define LLVM_IR_CONSTANTS_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/OperandTraits.h"
+
+namespace llvm {
+
+class ArrayType;
+class IntegerType;
+class StructType;
+class PointerType;
+class VectorType;
+class SequentialType;
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator;
+template<class ConstantClass, class TypeClass>
+struct ConstantArrayCreator;
+template<class ConstantClass, class TypeClass>
+struct ConvertConstantType;
+
+//===----------------------------------------------------------------------===//
+/// This is the shared class of boolean and integer constants. This class
+/// represents both boolean and integral constants.
+/// @brief Class for constant integers.
+class ConstantInt : public Constant {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantInt(const ConstantInt &) LLVM_DELETED_FUNCTION;
+  ConstantInt(IntegerType *Ty, const APInt& V);
+  APInt Val;
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  static ConstantInt *getTrue(LLVMContext &Context);
+  static ConstantInt *getFalse(LLVMContext &Context);
+  static Constant *getTrue(Type *Ty);
+  static Constant *getFalse(Type *Ty);
+
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(Type *Ty, uint64_t V, bool isSigned = false);
+
+  /// Return a ConstantInt with the specified integer value for the specified
+  /// type. If the type is wider than 64 bits, the value will be zero-extended
+  /// to fit the type, unless isSigned is true, in which case the value will
+  /// be interpreted as a 64-bit signed integer and sign-extended to fit
+  /// the type.
+  /// @brief Get a ConstantInt for a specific value.
+  static ConstantInt *get(IntegerType *Ty, uint64_t V,
+                          bool isSigned = false);
+
+  /// Return a ConstantInt with the specified value for the specified type. The
+  /// value V will be canonicalized to a an unsigned APInt. Accessing it with
+  /// either getSExtValue() or getZExtValue() will yield a correctly sized and
+  /// signed value for the type Ty.
+  /// @brief Get a ConstantInt for a specific signed value.
+  static ConstantInt *getSigned(IntegerType *Ty, int64_t V);
+  static Constant *getSigned(Type *Ty, int64_t V);
+
+  /// Return a ConstantInt with the specified value and an implied Type. The
+  /// type is the integer type that corresponds to the bit width of the value.
+  static ConstantInt *get(LLVMContext &Context, const APInt &V);
+
+  /// Return a ConstantInt constructed from the string strStart with the given
+  /// radix.
+  static ConstantInt *get(IntegerType *Ty, StringRef Str,
+                          uint8_t radix);
+
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(Type* Ty, const APInt& V);
+
+  /// Return the constant as an APInt value reference. This allows clients to
+  /// obtain a copy of the value, with all its precision in tact.
+  /// @brief Return the constant's value.
+  inline const APInt &getValue() const {
+    return Val;
+  }
+
+  /// getBitWidth - Return the bitwidth of this constant.
+  unsigned getBitWidth() const { return Val.getBitWidth(); }
+
+  /// Return the constant as a 64-bit unsigned integer value after it
+  /// has been zero extended as appropriate for the type of this constant. Note
+  /// that this method can assert if the value does not fit in 64 bits.
+  /// @deprecated
+  /// @brief Return the zero extended value.
+  inline uint64_t getZExtValue() const {
+    return Val.getZExtValue();
+  }
+
+  /// Return the constant as a 64-bit integer value after it has been sign
+  /// extended as appropriate for the type of this constant. Note that
+  /// this method can assert if the value does not fit in 64 bits.
+  /// @deprecated
+  /// @brief Return the sign extended value.
+  inline int64_t getSExtValue() const {
+    return Val.getSExtValue();
+  }
+
+  /// A helper method that can be used to determine if the constant contained
+  /// within is equal to a constant.  This only works for very small values,
+  /// because this is all that can be represented with all types.
+  /// @brief Determine if this constant's value is same as an unsigned char.
+  bool equalsInt(uint64_t V) const {
+    return Val == V;
+  }
+
+  /// getType - Specialize the getType() method to always return an IntegerType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline IntegerType *getType() const {
+    return reinterpret_cast<IntegerType*>(Value::getType());
+  }
+
+  /// This static method returns true if the type Ty is big enough to
+  /// represent the value V. This can be used to avoid having the get method
+  /// assert when V is larger than Ty can represent. Note that there are two
+  /// versions of this method, one for unsigned and one for signed integers.
+  /// Although ConstantInt canonicalizes everything to an unsigned integer,
+  /// the signed version avoids callers having to convert a signed quantity
+  /// to the appropriate unsigned type before calling the method.
+  /// @returns true if V is a valid value for type Ty
+  /// @brief Determine if the value is in range for the given type.
+  static bool isValueValidForType(Type *Ty, uint64_t V);
+  static bool isValueValidForType(Type *Ty, int64_t V);
+
+  bool isNegative() const { return Val.isNegative(); }
+
+  /// This is just a convenience method to make client code smaller for a
+  /// common code. It also correctly performs the comparison without the
+  /// potential for an assertion from getZExtValue().
+  bool isZero() const {
+    return Val == 0;
+  }
+
+  /// This is just a convenience method to make client code smaller for a
+  /// common case. It also correctly performs the comparison without the
+  /// potential for an assertion from getZExtValue().
+  /// @brief Determine if the value is one.
+  bool isOne() const {
+    return Val == 1;
+  }
+
+  /// This function will return true iff every bit in this constant is set
+  /// to true.
+  /// @returns true iff this constant's bits are all set to true.
+  /// @brief Determine if the value is all ones.
+  bool isMinusOne() const {
+    return Val.isAllOnesValue();
+  }
+
+  /// This function will return true iff this constant represents the largest
+  /// value that may be represented by the constant's type.
+  /// @returns true iff this is the largest value that may be represented
+  /// by this type.
+  /// @brief Determine if the value is maximal.
+  bool isMaxValue(bool isSigned) const {
+    if (isSigned)
+      return Val.isMaxSignedValue();
+    else
+      return Val.isMaxValue();
+  }
+
+  /// This function will return true iff this constant represents the smallest
+  /// value that may be represented by this constant's type.
+  /// @returns true if this is the smallest value that may be represented by
+  /// this type.
+  /// @brief Determine if the value is minimal.
+  bool isMinValue(bool isSigned) const {
+    if (isSigned)
+      return Val.isMinSignedValue();
+    else
+      return Val.isMinValue();
+  }
+
+  /// This function will return true iff this constant represents a value with
+  /// active bits bigger than 64 bits or a value greater than the given uint64_t
+  /// value.
+  /// @returns true iff this constant is greater or equal to the given number.
+  /// @brief Determine if the value is greater or equal to the given number.
+  bool uge(uint64_t Num) const {
+    return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num;
+  }
+
+  /// getLimitedValue - If the value is smaller than the specified limit,
+  /// return it, otherwise return the limit value.  This causes the value
+  /// to saturate to the limit.
+  /// @returns the min of the value of the constant and the specified value
+  /// @brief Get the constant's value with a saturation limit
+  uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const {
+    return Val.getLimitedValue(Limit);
+  }
+
+  /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantIntVal;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantFP - Floating Point Values [float, double]
+///
+class ConstantFP : public Constant {
+  APFloat Val;
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantFP(const ConstantFP &) LLVM_DELETED_FUNCTION;
+  friend class LLVMContextImpl;
+protected:
+  ConstantFP(Type *Ty, const APFloat& V);
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  /// Floating point negation must be implemented with f(x) = -0.0 - x. This
+  /// method returns the negative zero constant for floating point or vector
+  /// floating point types; for all other types, it returns the null value.
+  static Constant *getZeroValueForNegation(Type *Ty);
+
+  /// get() - This returns a ConstantFP, or a vector containing a splat of a
+  /// ConstantFP, for the specified value in the specified type.  This should
+  /// only be used for simple constant values like 2.0/1.0 etc, that are
+  /// known-valid both as host double and as the target format.
+  static Constant *get(Type* Ty, double V);
+  static Constant *get(Type* Ty, StringRef Str);
+  static ConstantFP *get(LLVMContext &Context, const APFloat &V);
+  static ConstantFP *getNegativeZero(Type* Ty);
+  static ConstantFP *getInfinity(Type *Ty, bool Negative = false);
+
+  /// isValueValidForType - return true if Ty is big enough to represent V.
+  static bool isValueValidForType(Type *Ty, const APFloat &V);
+  inline const APFloat &getValueAPF() const { return Val; }
+
+  /// isZero - Return true if the value is positive or negative zero.
+  bool isZero() const { return Val.isZero(); }
+
+  /// isNegative - Return true if the sign bit is set.
+  bool isNegative() const { return Val.isNegative(); }
+
+  /// isNaN - Return true if the value is a NaN.
+  bool isNaN() const { return Val.isNaN(); }
+
+  /// isExactlyValue - We don't rely on operator== working on double values, as
+  /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+  /// As such, this method can be used to do an exact bit-for-bit comparison of
+  /// two floating point values.  The version with a double operand is retained
+  /// because it's so convenient to write isExactlyValue(2.0), but please use
+  /// it only for simple constants.
+  bool isExactlyValue(const APFloat &V) const;
+
+  bool isExactlyValue(double V) const {
+    bool ignored;
+    APFloat FV(V);
+    FV.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &ignored);
+    return isExactlyValue(FV);
+  }
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantFPVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// ConstantAggregateZero - All zero aggregate value
+///
+class ConstantAggregateZero : public Constant {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantAggregateZero(const ConstantAggregateZero &) LLVM_DELETED_FUNCTION;
+protected:
+  explicit ConstantAggregateZero(Type *ty)
+    : Constant(ty, ConstantAggregateZeroVal, 0, 0) {}
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  static ConstantAggregateZero *get(Type *Ty);
+
+  virtual void destroyConstant();
+
+  /// getSequentialElement - If this CAZ has array or vector type, return a zero
+  /// with the right element type.
+  Constant *getSequentialElement() const;
+
+  /// getStructElement - If this CAZ has struct type, return a zero with the
+  /// right element type for the specified element.
+  Constant *getStructElement(unsigned Elt) const;
+
+  /// getElementValue - Return a zero of the right value for the specified GEP
+  /// index.
+  Constant *getElementValue(Constant *C) const;
+
+  /// getElementValue - Return a zero of the right value for the specified GEP
+  /// index.
+  Constant *getElementValue(unsigned Idx) const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantAggregateZeroVal;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantArray - Constant Array Declarations
+///
+class ConstantArray : public Constant {
+  friend struct ConstantArrayCreator<ConstantArray, ArrayType>;
+  ConstantArray(const ConstantArray &) LLVM_DELETED_FUNCTION;
+protected:
+  ConstantArray(ArrayType *T, ArrayRef<Constant *> Val);
+public:
+  // ConstantArray accessors
+  static Constant *get(ArrayType *T, ArrayRef<Constant*> V);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// getType - Specialize the getType() method to always return an ArrayType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline ArrayType *getType() const {
+    return reinterpret_cast<ArrayType*>(Value::getType());
+  }
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantArrayVal;
+  }
+};
+
+template <>
+struct OperandTraits<ConstantArray> :
+  public VariadicOperandTraits<ConstantArray> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantArray, Constant)
+
+//===----------------------------------------------------------------------===//
+// ConstantStruct - Constant Struct Declarations
+//
+class ConstantStruct : public Constant {
+  friend struct ConstantArrayCreator<ConstantStruct, StructType>;
+  ConstantStruct(const ConstantStruct &) LLVM_DELETED_FUNCTION;
+protected:
+  ConstantStruct(StructType *T, ArrayRef<Constant *> Val);
+public:
+  // ConstantStruct accessors
+  static Constant *get(StructType *T, ArrayRef<Constant*> V);
+  static Constant *get(StructType *T, ...) END_WITH_NULL;
+
+  /// getAnon - Return an anonymous struct that has the specified
+  /// elements.  If the struct is possibly empty, then you must specify a
+  /// context.
+  static Constant *getAnon(ArrayRef<Constant*> V, bool Packed = false) {
+    return get(getTypeForElements(V, Packed), V);
+  }
+  static Constant *getAnon(LLVMContext &Ctx,
+                           ArrayRef<Constant*> V, bool Packed = false) {
+    return get(getTypeForElements(Ctx, V, Packed), V);
+  }
+
+  /// getTypeForElements - Return an anonymous struct type to use for a constant
+  /// with the specified set of elements.  The list must not be empty.
+  static StructType *getTypeForElements(ArrayRef<Constant*> V,
+                                        bool Packed = false);
+  /// getTypeForElements - This version of the method allows an empty list.
+  static StructType *getTypeForElements(LLVMContext &Ctx,
+                                        ArrayRef<Constant*> V,
+                                        bool Packed = false);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// getType() specialization - Reduce amount of casting...
+  ///
+  inline StructType *getType() const {
+    return reinterpret_cast<StructType*>(Value::getType());
+  }
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantStructVal;
+  }
+};
+
+template <>
+struct OperandTraits<ConstantStruct> :
+  public VariadicOperandTraits<ConstantStruct> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantStruct, Constant)
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantVector - Constant Vector Declarations
+///
+class ConstantVector : public Constant {
+  friend struct ConstantArrayCreator<ConstantVector, VectorType>;
+  ConstantVector(const ConstantVector &) LLVM_DELETED_FUNCTION;
+protected:
+  ConstantVector(VectorType *T, ArrayRef<Constant *> Val);
+public:
+  // ConstantVector accessors
+  static Constant *get(ArrayRef<Constant*> V);
+
+  /// getSplat - Return a ConstantVector with the specified constant in each
+  /// element.
+  static Constant *getSplat(unsigned NumElts, Constant *Elt);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// getType - Specialize the getType() method to always return a VectorType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline VectorType *getType() const {
+    return reinterpret_cast<VectorType*>(Value::getType());
+  }
+
+  /// getSplatValue - If this is a splat constant, meaning that all of the
+  /// elements have the same value, return that value. Otherwise return NULL.
+  Constant *getSplatValue() const;
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantVectorVal;
+  }
+};
+
+template <>
+struct OperandTraits<ConstantVector> :
+  public VariadicOperandTraits<ConstantVector> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantVector, Constant)
+
+//===----------------------------------------------------------------------===//
+/// ConstantPointerNull - a constant pointer value that points to null
+///
+class ConstantPointerNull : public Constant {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantPointerNull(const ConstantPointerNull &) LLVM_DELETED_FUNCTION;
+protected:
+  explicit ConstantPointerNull(PointerType *T)
+    : Constant(reinterpret_cast<Type*>(T),
+               Value::ConstantPointerNullVal, 0, 0) {}
+
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  /// get() - Static factory methods - Return objects of the specified value
+  static ConstantPointerNull *get(PointerType *T);
+
+  virtual void destroyConstant();
+
+  /// getType - Specialize the getType() method to always return an PointerType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline PointerType *getType() const {
+    return reinterpret_cast<PointerType*>(Value::getType());
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantPointerNullVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// ConstantDataSequential - A vector or array constant whose element type is a
+/// simple 1/2/4/8-byte integer or float/double, and whose elements are just
+/// simple data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
+/// operands because it stores all of the elements of the constant as densely
+/// packed data, instead of as Value*'s.
+///
+/// This is the common base class of ConstantDataArray and ConstantDataVector.
+///
+class ConstantDataSequential : public Constant {
+  friend class LLVMContextImpl;
+  /// DataElements - A pointer to the bytes underlying this constant (which is
+  /// owned by the uniquing StringMap).
+  const char *DataElements;
+
+  /// Next - This forms a link list of ConstantDataSequential nodes that have
+  /// the same value but different type.  For example, 0,0,0,1 could be a 4
+  /// element array of i8, or a 1-element array of i32.  They'll both end up in
+  /// the same StringMap bucket, linked up.
+  ConstantDataSequential *Next;
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantDataSequential(const ConstantDataSequential &) LLVM_DELETED_FUNCTION;
+protected:
+  explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
+    : Constant(ty, VT, 0, 0), DataElements(Data), Next(0) {}
+  ~ConstantDataSequential() { delete Next; }
+
+  static Constant *getImpl(StringRef Bytes, Type *Ty);
+
+protected:
+  // allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+
+  /// isElementTypeCompatible - Return true if a ConstantDataSequential can be
+  /// formed with a vector or array of the specified element type.
+  /// ConstantDataArray only works with normal float and int types that are
+  /// stored densely in memory, not with things like i42 or x86_f80.
+  static bool isElementTypeCompatible(const Type *Ty);
+
+  /// getElementAsInteger - If this is a sequential container of integers (of
+  /// any size), return the specified element in the low bits of a uint64_t.
+  uint64_t getElementAsInteger(unsigned i) const;
+
+  /// getElementAsAPFloat - If this is a sequential container of floating point
+  /// type, return the specified element as an APFloat.
+  APFloat getElementAsAPFloat(unsigned i) const;
+
+  /// getElementAsFloat - If this is an sequential container of floats, return
+  /// the specified element as a float.
+  float getElementAsFloat(unsigned i) const;
+
+  /// getElementAsDouble - If this is an sequential container of doubles, return
+  /// the specified element as a double.
+  double getElementAsDouble(unsigned i) const;
+
+  /// getElementAsConstant - Return a Constant for a specified index's element.
+  /// Note that this has to compute a new constant to return, so it isn't as
+  /// efficient as getElementAsInteger/Float/Double.
+  Constant *getElementAsConstant(unsigned i) const;
+
+  /// getType - Specialize the getType() method to always return a
+  /// SequentialType, which reduces the amount of casting needed in parts of the
+  /// compiler.
+  inline SequentialType *getType() const {
+    return reinterpret_cast<SequentialType*>(Value::getType());
+  }
+
+  /// getElementType - Return the element type of the array/vector.
+  Type *getElementType() const;
+
+  /// getNumElements - Return the number of elements in the array or vector.
+  unsigned getNumElements() const;
+
+  /// getElementByteSize - Return the size (in bytes) of each element in the
+  /// array/vector.  The size of the elements is known to be a multiple of one
+  /// byte.
+  uint64_t getElementByteSize() const;
+
+
+  /// isString - This method returns true if this is an array of i8.
+  bool isString() const;
+
+  /// isCString - This method returns true if the array "isString", ends with a
+  /// nul byte, and does not contains any other nul bytes.
+  bool isCString() const;
+
+  /// getAsString - If this array is isString(), then this method returns the
+  /// array as a StringRef.  Otherwise, it asserts out.
+  ///
+  StringRef getAsString() const {
+    assert(isString() && "Not a string");
+    return getRawDataValues();
+  }
+
+  /// getAsCString - If this array is isCString(), then this method returns the
+  /// array (without the trailing null byte) as a StringRef. Otherwise, it
+  /// asserts out.
+  ///
+  StringRef getAsCString() const {
+    assert(isCString() && "Isn't a C string");
+    StringRef Str = getAsString();
+    return Str.substr(0, Str.size()-1);
+  }
+
+  /// getRawDataValues - Return the raw, underlying, bytes of this data.  Note
+  /// that this is an extremely tricky thing to work with, as it exposes the
+  /// host endianness of the data elements.
+  StringRef getRawDataValues() const;
+
+  virtual void destroyConstant();
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantDataArrayVal ||
+           V->getValueID() == ConstantDataVectorVal;
+  }
+private:
+  const char *getElementPointer(unsigned Elt) const;
+};
+
+//===----------------------------------------------------------------------===//
+/// ConstantDataArray - An array constant whose element type is a simple
+/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
+/// data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
+/// operands because it stores all of the elements of the constant as densely
+/// packed data, instead of as Value*'s.
+class ConstantDataArray : public ConstantDataSequential {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantDataArray(const ConstantDataArray &) LLVM_DELETED_FUNCTION;
+  virtual void anchor();
+  friend class ConstantDataSequential;
+  explicit ConstantDataArray(Type *ty, const char *Data)
+    : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {}
+protected:
+  // allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+
+  /// get() constructors - Return a constant with array type with an element
+  /// count and element type matching the ArrayRef passed in.  Note that this
+  /// can return a ConstantAggregateZero object.
+  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
+
+  /// getString - This method constructs a CDS and initializes it with a text
+  /// string. The default behavior (AddNull==true) causes a null terminator to
+  /// be placed at the end of the array (increasing the length of the string by
+  /// one more than the StringRef would normally indicate.  Pass AddNull=false
+  /// to disable this behavior.
+  static Constant *getString(LLVMContext &Context, StringRef Initializer,
+                             bool AddNull = true);
+
+  /// getType - Specialize the getType() method to always return an ArrayType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline ArrayType *getType() const {
+    return reinterpret_cast<ArrayType*>(Value::getType());
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantDataArrayVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// ConstantDataVector - A vector constant whose element type is a simple
+/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
+/// data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
+/// operands because it stores all of the elements of the constant as densely
+/// packed data, instead of as Value*'s.
+class ConstantDataVector : public ConstantDataSequential {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  ConstantDataVector(const ConstantDataVector &) LLVM_DELETED_FUNCTION;
+  virtual void anchor();
+  friend class ConstantDataSequential;
+  explicit ConstantDataVector(Type *ty, const char *Data)
+  : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {}
+protected:
+  // allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+
+  /// get() constructors - Return a constant with vector type with an element
+  /// count and element type matching the ArrayRef passed in.  Note that this
+  /// can return a ConstantAggregateZero object.
+  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
+
+  /// getSplat - Return a ConstantVector with the specified constant in each
+  /// element.  The specified constant has to be a of a compatible type (i8/i16/
+  /// i32/i64/float/double) and must be a ConstantFP or ConstantInt.
+  static Constant *getSplat(unsigned NumElts, Constant *Elt);
+
+  /// getSplatValue - If this is a splat constant, meaning that all of the
+  /// elements have the same value, return that value. Otherwise return NULL.
+  Constant *getSplatValue() const;
+
+  /// getType - Specialize the getType() method to always return a VectorType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline VectorType *getType() const {
+    return reinterpret_cast<VectorType*>(Value::getType());
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantDataVectorVal;
+  }
+};
+
+
+
+/// BlockAddress - The address of a basic block.
+///
+class BlockAddress : public Constant {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t s) { return User::operator new(s, 2); }
+  BlockAddress(Function *F, BasicBlock *BB);
+public:
+  /// get - Return a BlockAddress for the specified function and basic block.
+  static BlockAddress *get(Function *F, BasicBlock *BB);
+
+  /// get - Return a BlockAddress for the specified basic block.  The basic
+  /// block must be embedded into a function.
+  static BlockAddress *get(BasicBlock *BB);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  Function *getFunction() const { return (Function*)Op<0>().get(); }
+  BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == BlockAddressVal;
+  }
+};
+
+template <>
+struct OperandTraits<BlockAddress> :
+  public FixedNumOperandTraits<BlockAddress, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantExpr - a constant value that is initialized with an expression using
+/// other constant values.
+///
+/// This class uses the standard Instruction opcodes to define the various
+/// constant expressions.  The Opcode field for the ConstantExpr class is
+/// maintained in the Value::SubclassData field.
+class ConstantExpr : public Constant {
+  friend struct ConstantCreator<ConstantExpr,Type,
+                            std::pair<unsigned, std::vector<Constant*> > >;
+  friend struct ConvertConstantType<ConstantExpr, Type>;
+
+protected:
+  ConstantExpr(Type *ty, unsigned Opcode, Use *Ops, unsigned NumOps)
+    : Constant(ty, ConstantExprVal, Ops, NumOps) {
+    // Operation type (an Instruction opcode) is stored as the SubclassData.
+    setValueSubclassData(Opcode);
+  }
+
+public:
+  // Static methods to construct a ConstantExpr of different kinds.  Note that
+  // these methods may return a object that is not an instance of the
+  // ConstantExpr class, because they will attempt to fold the constant
+  // expression into something simpler if possible.
+
+  /// getAlignOf constant expr - computes the alignment of a type in a target
+  /// independent way (Note: the return type is an i64).
+  static Constant *getAlignOf(Type *Ty);
+
+  /// getSizeOf constant expr - computes the (alloc) size of a type (in
+  /// address-units, not bits) in a target independent way (Note: the return
+  /// type is an i64).
+  ///
+  static Constant *getSizeOf(Type *Ty);
+
+  /// getOffsetOf constant expr - computes the offset of a struct field in a
+  /// target independent way (Note: the return type is an i64).
+  ///
+  static Constant *getOffsetOf(StructType *STy, unsigned FieldNo);
+
+  /// getOffsetOf constant expr - This is a generalized form of getOffsetOf,
+  /// which supports any aggregate type, and any Constant index.
+  ///
+  static Constant *getOffsetOf(Type *Ty, Constant *FieldNo);
+
+  static Constant *getNeg(Constant *C, bool HasNUW = false, bool HasNSW =false);
+  static Constant *getFNeg(Constant *C);
+  static Constant *getNot(Constant *C);
+  static Constant *getAdd(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getFAdd(Constant *C1, Constant *C2);
+  static Constant *getSub(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getFSub(Constant *C1, Constant *C2);
+  static Constant *getMul(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getFMul(Constant *C1, Constant *C2);
+  static Constant *getUDiv(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getSDiv(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getFDiv(Constant *C1, Constant *C2);
+  static Constant *getURem(Constant *C1, Constant *C2);
+  static Constant *getSRem(Constant *C1, Constant *C2);
+  static Constant *getFRem(Constant *C1, Constant *C2);
+  static Constant *getAnd(Constant *C1, Constant *C2);
+  static Constant *getOr(Constant *C1, Constant *C2);
+  static Constant *getXor(Constant *C1, Constant *C2);
+  static Constant *getShl(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getAShr(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getTrunc   (Constant *C, Type *Ty);
+  static Constant *getSExt    (Constant *C, Type *Ty);
+  static Constant *getZExt    (Constant *C, Type *Ty);
+  static Constant *getFPTrunc (Constant *C, Type *Ty);
+  static Constant *getFPExtend(Constant *C, Type *Ty);
+  static Constant *getUIToFP  (Constant *C, Type *Ty);
+  static Constant *getSIToFP  (Constant *C, Type *Ty);
+  static Constant *getFPToUI  (Constant *C, Type *Ty);
+  static Constant *getFPToSI  (Constant *C, Type *Ty);
+  static Constant *getPtrToInt(Constant *C, Type *Ty);
+  static Constant *getIntToPtr(Constant *C, Type *Ty);
+  static Constant *getBitCast (Constant *C, Type *Ty);
+
+  static Constant *getNSWNeg(Constant *C) { return getNeg(C, false, true); }
+  static Constant *getNUWNeg(Constant *C) { return getNeg(C, true, false); }
+  static Constant *getNSWAdd(Constant *C1, Constant *C2) {
+    return getAdd(C1, C2, false, true);
+  }
+  static Constant *getNUWAdd(Constant *C1, Constant *C2) {
+    return getAdd(C1, C2, true, false);
+  }
+  static Constant *getNSWSub(Constant *C1, Constant *C2) {
+    return getSub(C1, C2, false, true);
+  }
+  static Constant *getNUWSub(Constant *C1, Constant *C2) {
+    return getSub(C1, C2, true, false);
+  }
+  static Constant *getNSWMul(Constant *C1, Constant *C2) {
+    return getMul(C1, C2, false, true);
+  }
+  static Constant *getNUWMul(Constant *C1, Constant *C2) {
+    return getMul(C1, C2, true, false);
+  }
+  static Constant *getNSWShl(Constant *C1, Constant *C2) {
+    return getShl(C1, C2, false, true);
+  }
+  static Constant *getNUWShl(Constant *C1, Constant *C2) {
+    return getShl(C1, C2, true, false);
+  }
+  static Constant *getExactSDiv(Constant *C1, Constant *C2) {
+    return getSDiv(C1, C2, true);
+  }
+  static Constant *getExactUDiv(Constant *C1, Constant *C2) {
+    return getUDiv(C1, C2, true);
+  }
+  static Constant *getExactAShr(Constant *C1, Constant *C2) {
+    return getAShr(C1, C2, true);
+  }
+  static Constant *getExactLShr(Constant *C1, Constant *C2) {
+    return getLShr(C1, C2, true);
+  }
+
+  /// getBinOpIdentity - Return the identity for the given binary operation,
+  /// i.e. a constant C such that X op C = X and C op X = X for every X.  It
+  /// returns null if the operator doesn't have an identity.
+  static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty);
+
+  /// getBinOpAbsorber - Return the absorbing element for the given binary
+  /// operation, i.e. a constant C such that X op C = C and C op X = C for
+  /// every X.  For example, this returns zero for integer multiplication.
+  /// It returns null if the operator doesn't have an absorbing element.
+  static Constant *getBinOpAbsorber(unsigned Opcode, Type *Ty);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  // @brief Convenience function for getting one of the casting operations
+  // using a CastOps opcode.
+  static Constant *getCast(
+    unsigned ops,  ///< The opcode for the conversion
+    Constant *C,   ///< The constant to be converted
+    Type *Ty ///< The type to which the constant is converted
+  );
+
+  // @brief Create a ZExt or BitCast cast constant expression
+  static Constant *getZExtOrBitCast(
+    Constant *C,   ///< The constant to zext or bitcast
+    Type *Ty ///< The type to zext or bitcast C to
+  );
+
+  // @brief Create a SExt or BitCast cast constant expression
+  static Constant *getSExtOrBitCast(
+    Constant *C,   ///< The constant to sext or bitcast
+    Type *Ty ///< The type to sext or bitcast C to
+  );
+
+  // @brief Create a Trunc or BitCast cast constant expression
+  static Constant *getTruncOrBitCast(
+    Constant *C,   ///< The constant to trunc or bitcast
+    Type *Ty ///< The type to trunc or bitcast C to
+  );
+
+  /// @brief Create a BitCast or a PtrToInt cast constant expression
+  static Constant *getPointerCast(
+    Constant *C,   ///< The pointer value to be casted (operand 0)
+    Type *Ty ///< The type to which cast should be made
+  );
+
+  /// @brief Create a ZExt, Bitcast or Trunc for integer -> integer casts
+  static Constant *getIntegerCast(
+    Constant *C,    ///< The integer constant to be casted
+    Type *Ty, ///< The integer type to cast to
+    bool isSigned   ///< Whether C should be treated as signed or not
+  );
+
+  /// @brief Create a FPExt, Bitcast or FPTrunc for fp -> fp casts
+  static Constant *getFPCast(
+    Constant *C,    ///< The integer constant to be casted
+    Type *Ty ///< The integer type to cast to
+  );
+
+  /// @brief Return true if this is a convert constant expression
+  bool isCast() const;
+
+  /// @brief Return true if this is a compare constant expression
+  bool isCompare() const;
+
+  /// @brief Return true if this is an insertvalue or extractvalue expression,
+  /// and the getIndices() method may be used.
+  bool hasIndices() const;
+
+  /// @brief Return true if this is a getelementptr expression and all
+  /// the index operands are compile-time known integers within the
+  /// corresponding notional static array extents. Note that this is
+  /// not equivalant to, a subset of, or a superset of the "inbounds"
+  /// property.
+  bool isGEPWithNoNotionalOverIndexing() const;
+
+  /// Select constant expr
+  ///
+  static Constant *getSelect(Constant *C, Constant *V1, Constant *V2);
+
+  /// get - Return a binary or shift operator constant expression,
+  /// folding if possible.
+  ///
+  static Constant *get(unsigned Opcode, Constant *C1, Constant *C2,
+                       unsigned Flags = 0);
+
+  /// @brief Return an ICmp or FCmp comparison operator constant expression.
+  static Constant *getCompare(unsigned short pred, Constant *C1, Constant *C2);
+
+  /// get* - Return some common constants without having to
+  /// specify the full Instruction::OPCODE identifier.
+  ///
+  static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS);
+  static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS);
+
+  /// Getelementptr form.  Value* is only accepted for convenience;
+  /// all elements must be Constant's.
+  ///
+  static Constant *getGetElementPtr(Constant *C,
+                                    ArrayRef<Constant *> IdxList,
+                                    bool InBounds = false) {
+    return getGetElementPtr(C, makeArrayRef((Value * const *)IdxList.data(),
+                                            IdxList.size()),
+                            InBounds);
+  }
+  static Constant *getGetElementPtr(Constant *C,
+                                    Constant *Idx,
+                                    bool InBounds = false) {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return getGetElementPtr(C, cast<Value>(Idx), InBounds);
+  }
+  static Constant *getGetElementPtr(Constant *C,
+                                    ArrayRef<Value *> IdxList,
+                                    bool InBounds = false);
+
+  /// Create an "inbounds" getelementptr. See the documentation for the
+  /// "inbounds" flag in LangRef.html for details.
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            ArrayRef<Constant *> IdxList) {
+    return getGetElementPtr(C, IdxList, true);
+  }
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            Constant *Idx) {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return getGetElementPtr(C, Idx, true);
+  }
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            ArrayRef<Value *> IdxList) {
+    return getGetElementPtr(C, IdxList, true);
+  }
+
+  static Constant *getExtractElement(Constant *Vec, Constant *Idx);
+  static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx);
+  static Constant *getShuffleVector(Constant *V1, Constant *V2, Constant *Mask);
+  static Constant *getExtractValue(Constant *Agg, ArrayRef<unsigned> Idxs);
+  static Constant *getInsertValue(Constant *Agg, Constant *Val,
+                                  ArrayRef<unsigned> Idxs);
+
+  /// getOpcode - Return the opcode at the root of this constant expression
+  unsigned getOpcode() const { return getSubclassDataFromValue(); }
+
+  /// getPredicate - Return the ICMP or FCMP predicate value. Assert if this is
+  /// not an ICMP or FCMP constant expression.
+  unsigned getPredicate() const;
+
+  /// getIndices - Assert that this is an insertvalue or exactvalue
+  /// expression and return the list of indices.
+  ArrayRef<unsigned> getIndices() const;
+
+  /// getOpcodeName - Return a string representation for an opcode.
+  const char *getOpcodeName() const;
+
+  /// getWithOperandReplaced - Return a constant expression identical to this
+  /// one, but with the specified operand set to the specified value.
+  Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
+
+  /// getWithOperands - This returns the current constant expression with the
+  /// operands replaced with the specified values.  The specified array must
+  /// have the same number of operands as our current one.
+  Constant *getWithOperands(ArrayRef<Constant*> Ops) const {
+    return getWithOperands(Ops, getType());
+  }
+
+  /// getWithOperands - This returns the current constant expression with the
+  /// operands replaced with the specified values and with the specified result
+  /// type.  The specified array must have the same number of operands as our
+  /// current one.
+  Constant *getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const;
+
+  /// getAsInstruction - Returns an Instruction which implements the same operation
+  /// as this ConstantExpr. The instruction is not linked to any basic block.
+  ///
+  /// A better approach to this could be to have a constructor for Instruction
+  /// which would take a ConstantExpr parameter, but that would have spread 
+  /// implementation details of ConstantExpr outside of Constants.cpp, which 
+  /// would make it harder to remove ConstantExprs altogether.
+  Instruction *getAsInstruction();
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == ConstantExprVal;
+  }
+
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<ConstantExpr> :
+  public VariadicOperandTraits<ConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant)
+
+//===----------------------------------------------------------------------===//
+/// UndefValue - 'undef' values are things that do not have specified contents.
+/// These are used for a variety of purposes, including global variable
+/// initializers and operands to instructions.  'undef' values can occur with
+/// any first-class type.
+///
+/// Undef values aren't exactly constants; if they have multiple uses, they
+/// can appear to have different bit patterns at each use. See
+/// LangRef.html#undefvalues for details.
+///
+class UndefValue : public Constant {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  UndefValue(const UndefValue &) LLVM_DELETED_FUNCTION;
+protected:
+  explicit UndefValue(Type *T) : Constant(T, UndefValueVal, 0, 0) {}
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  /// get() - Static factory methods - Return an 'undef' object of the specified
+  /// type.
+  ///
+  static UndefValue *get(Type *T);
+
+  /// getSequentialElement - If this Undef has array or vector type, return a
+  /// undef with the right element type.
+  UndefValue *getSequentialElement() const;
+
+  /// getStructElement - If this undef has struct type, return a undef with the
+  /// right element type for the specified element.
+  UndefValue *getStructElement(unsigned Elt) const;
+
+  /// getElementValue - Return an undef of the right value for the specified GEP
+  /// index.
+  UndefValue *getElementValue(Constant *C) const;
+
+  /// getElementValue - Return an undef of the right value for the specified GEP
+  /// index.
+  UndefValue *getElementValue(unsigned Idx) const;
+
+  virtual void destroyConstant();
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == UndefValueVal;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
new file mode 100644
index 000000000000..547d857b7b73
--- /dev/null
+++ b/include/llvm/IR/DataLayout.h
@@ -0,0 +1,477 @@
+//===--------- llvm/DataLayout.h - Data size & alignment info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines layout properties related to datatype size/offset/alignment
+// information.  It uses lazy annotations to cache information about how
+// structure types are laid out and used.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&.  None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_DATALAYOUT_H
+#define LLVM_IR_DATALAYOUT_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class Value;
+class Type;
+class IntegerType;
+class StructType;
+class StructLayout;
+class GlobalVariable;
+class LLVMContext;
+template<typename T>
+class ArrayRef;
+
+/// Enum used to categorize the alignment types stored by LayoutAlignElem
+enum AlignTypeEnum {
+  INVALID_ALIGN = 0,                 ///< An invalid alignment
+  INTEGER_ALIGN = 'i',               ///< Integer type alignment
+  VECTOR_ALIGN = 'v',                ///< Vector type alignment
+  FLOAT_ALIGN = 'f',                 ///< Floating point type alignment
+  AGGREGATE_ALIGN = 'a',             ///< Aggregate alignment
+  STACK_ALIGN = 's'                  ///< Stack objects alignment
+};
+
+/// Layout alignment element.
+///
+/// Stores the alignment data associated with a given alignment type (integer,
+/// vector, float) and type bit width.
+///
+/// @note The unusual order of elements in the structure attempts to reduce
+/// padding and make the structure slightly more cache friendly.
+struct LayoutAlignElem {
+  unsigned AlignType    : 8;  ///< Alignment type (AlignTypeEnum)
+  unsigned TypeBitWidth : 24; ///< Type bit width
+  unsigned ABIAlign     : 16; ///< ABI alignment for this type/bitw
+  unsigned PrefAlign    : 16; ///< Pref. alignment for this type/bitw
+
+  /// Initializer
+  static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
+                             unsigned pref_align, uint32_t bit_width);
+  /// Equality predicate
+  bool operator==(const LayoutAlignElem &rhs) const;
+};
+
+/// Layout pointer alignment element.
+///
+/// Stores the alignment data associated with a given pointer and address space.
+///
+/// @note The unusual order of elements in the structure attempts to reduce
+/// padding and make the structure slightly more cache friendly.
+struct PointerAlignElem {
+  unsigned            ABIAlign;       ///< ABI alignment for this type/bitw
+  unsigned            PrefAlign;      ///< Pref. alignment for this type/bitw
+  uint32_t            TypeBitWidth;   ///< Type bit width
+  uint32_t            AddressSpace;   ///< Address space for the pointer type
+
+  /// Initializer
+  static PointerAlignElem get(uint32_t addr_space, unsigned abi_align,
+                             unsigned pref_align, uint32_t bit_width);
+  /// Equality predicate
+  bool operator==(const PointerAlignElem &rhs) const;
+};
+
+
+/// DataLayout - This class holds a parsed version of the target data layout
+/// string in a module and provides methods for querying it.  The target data
+/// layout string is specified *by the target* - a frontend generating LLVM IR
+/// is required to generate the right target data for the target being codegen'd
+/// to.  If some measure of portability is desired, an empty string may be
+/// specified in the module.
+class DataLayout : public ImmutablePass {
+private:
+  bool          LittleEndian;          ///< Defaults to false
+  unsigned      StackNaturalAlign;     ///< Stack natural alignment
+
+  SmallVector<unsigned char, 8> LegalIntWidths; ///< Legal Integers.
+
+  /// Alignments - Where the primitive type alignment data is stored.
+  ///
+  /// @sa init().
+  /// @note Could support multiple size pointer alignments, e.g., 32-bit
+  /// pointers vs. 64-bit pointers by extending LayoutAlignment, but for now,
+  /// we don't.
+  SmallVector<LayoutAlignElem, 16> Alignments;
+  DenseMap<unsigned, PointerAlignElem> Pointers;
+
+  /// InvalidAlignmentElem - This member is a signal that a requested alignment
+  /// type and bit width were not found in the SmallVector.
+  static const LayoutAlignElem InvalidAlignmentElem;
+
+  /// InvalidPointerElem - This member is a signal that a requested pointer
+  /// type and bit width were not found in the DenseSet.
+  static const PointerAlignElem InvalidPointerElem;
+
+  // The StructType -> StructLayout map.
+  mutable void *LayoutMap;
+
+  //! Set/initialize target alignments
+  void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+                    unsigned pref_align, uint32_t bit_width);
+  unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
+                            bool ABIAlign, Type *Ty) const;
+
+  //! Set/initialize pointer alignments
+  void setPointerAlignment(uint32_t addr_space, unsigned abi_align,
+      unsigned pref_align, uint32_t bit_width);
+
+  //! Internal helper method that returns requested alignment for type.
+  unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
+
+  /// Valid alignment predicate.
+  ///
+  /// Predicate that tests a LayoutAlignElem reference returned by get() against
+  /// InvalidAlignmentElem.
+  bool validAlignment(const LayoutAlignElem &align) const {
+    return &align != &InvalidAlignmentElem;
+  }
+
+  /// Valid pointer predicate.
+  ///
+  /// Predicate that tests a PointerAlignElem reference returned by get() against
+  /// InvalidPointerElem.
+  bool validPointer(const PointerAlignElem &align) const {
+    return &align != &InvalidPointerElem;
+  }
+
+  /// Parses a target data specification string. Assert if the string is
+  /// malformed.
+  void parseSpecifier(StringRef LayoutDescription);
+
+public:
+  /// Default ctor.
+  ///
+  /// @note This has to exist, because this is a pass, but it should never be
+  /// used.
+  DataLayout();
+
+  /// Constructs a DataLayout from a specification string. See init().
+  explicit DataLayout(StringRef LayoutDescription)
+    : ImmutablePass(ID) {
+    init(LayoutDescription);
+  }
+
+  /// Initialize target data from properties stored in the module.
+  explicit DataLayout(const Module *M);
+
+  DataLayout(const DataLayout &TD) :
+    ImmutablePass(ID),
+    LittleEndian(TD.isLittleEndian()),
+    StackNaturalAlign(TD.StackNaturalAlign),
+    LegalIntWidths(TD.LegalIntWidths),
+    Alignments(TD.Alignments),
+    Pointers(TD.Pointers),
+    LayoutMap(0)
+  { }
+
+  ~DataLayout();  // Not virtual, do not subclass this class
+
+  /// DataLayout is an immutable pass, but holds state.  This allows the pass
+  /// manager to clear its mutable state.
+  bool doFinalization(Module &M);
+
+  /// Parse a data layout string (with fallback to default values). Ensure that
+  /// the data layout pass is registered.
+  void init(StringRef LayoutDescription);
+
+  /// Layout endianness...
+  bool isLittleEndian() const { return LittleEndian; }
+  bool isBigEndian() const { return !LittleEndian; }
+
+  /// getStringRepresentation - Return the string representation of the
+  /// DataLayout.  This representation is in the same format accepted by the
+  /// string constructor above.
+  std::string getStringRepresentation() const;
+
+  /// isLegalInteger - This function returns true if the specified type is
+  /// known to be a native integer type supported by the CPU.  For example,
+  /// i64 is not native on most 32-bit CPUs and i37 is not native on any known
+  /// one.  This returns false if the integer width is not legal.
+  ///
+  /// The width is specified in bits.
+  ///
+  bool isLegalInteger(unsigned Width) const {
+    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+      if (LegalIntWidths[i] == Width)
+        return true;
+    return false;
+  }
+
+  bool isIllegalInteger(unsigned Width) const {
+    return !isLegalInteger(Width);
+  }
+
+  /// Returns true if the given alignment exceeds the natural stack alignment.
+  bool exceedsNaturalStackAlignment(unsigned Align) const {
+    return (StackNaturalAlign != 0) && (Align > StackNaturalAlign);
+  }
+
+  /// fitsInLegalInteger - This function returns true if the specified type fits
+  /// in a native integer type supported by the CPU.  For example, if the CPU
+  /// only supports i32 as a native integer type, then i27 fits in a legal
+  // integer type but i45 does not.
+  bool fitsInLegalInteger(unsigned Width) const {
+    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+      if (Width <= LegalIntWidths[i])
+        return true;
+    return false;
+  }
+
+  /// Layout pointer alignment
+  /// FIXME: The defaults need to be removed once all of
+  /// the backends/clients are updated.
+  unsigned getPointerABIAlignment(unsigned AS = 0)  const {
+    DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
+    if (val == Pointers.end()) {
+      val = Pointers.find(0);
+    }
+    return val->second.ABIAlign;
+  }
+  /// Return target's alignment for stack-based pointers
+  /// FIXME: The defaults need to be removed once all of
+  /// the backends/clients are updated.
+  unsigned getPointerPrefAlignment(unsigned AS = 0) const {
+    DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
+    if (val == Pointers.end()) {
+      val = Pointers.find(0);
+    }
+    return val->second.PrefAlign;
+  }
+  /// Layout pointer size
+  /// FIXME: The defaults need to be removed once all of
+  /// the backends/clients are updated.
+  unsigned getPointerSize(unsigned AS = 0)          const {
+    DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
+    if (val == Pointers.end()) {
+      val = Pointers.find(0);
+    }
+    return val->second.TypeBitWidth;
+  }
+  /// Layout pointer size, in bits
+  /// FIXME: The defaults need to be removed once all of
+  /// the backends/clients are updated.
+  unsigned getPointerSizeInBits(unsigned AS = 0)    const {
+    return getPointerSize(AS) * 8;
+  }
+  /// Size examples:
+  ///
+  /// Type        SizeInBits  StoreSizeInBits  AllocSizeInBits[*]
+  /// ----        ----------  ---------------  ---------------
+  ///  i1            1           8                8
+  ///  i8            8           8                8
+  ///  i19          19          24               32
+  ///  i32          32          32               32
+  ///  i100        100         104              128
+  ///  i128        128         128              128
+  ///  Float        32          32               32
+  ///  Double       64          64               64
+  ///  X86_FP80     80          80               96
+  ///
+  /// [*] The alloc size depends on the alignment, and thus on the target.
+  ///     These values are for x86-32 linux.
+
+  /// getTypeSizeInBits - Return the number of bits necessary to hold the
+  /// specified type.  For example, returns 36 for i36 and 80 for x86_fp80.
+  /// The type passed must have a size (Type::isSized() must return true).
+  uint64_t getTypeSizeInBits(Type *Ty) const;
+
+  /// getTypeStoreSize - Return the maximum number of bytes that may be
+  /// overwritten by storing the specified type.  For example, returns 5
+  /// for i36 and 10 for x86_fp80.
+  uint64_t getTypeStoreSize(Type *Ty) const {
+    return (getTypeSizeInBits(Ty)+7)/8;
+  }
+
+  /// getTypeStoreSizeInBits - Return the maximum number of bits that may be
+  /// overwritten by storing the specified type; always a multiple of 8.  For
+  /// example, returns 40 for i36 and 80 for x86_fp80.
+  uint64_t getTypeStoreSizeInBits(Type *Ty) const {
+    return 8*getTypeStoreSize(Ty);
+  }
+
+  /// getTypeAllocSize - Return the offset in bytes between successive objects
+  /// of the specified type, including alignment padding.  This is the amount
+  /// that alloca reserves for this type.  For example, returns 12 or 16 for
+  /// x86_fp80, depending on alignment.
+  uint64_t getTypeAllocSize(Type *Ty) const {
+    // Round up to the next alignment boundary.
+    return RoundUpAlignment(getTypeStoreSize(Ty), getABITypeAlignment(Ty));
+  }
+
+  /// getTypeAllocSizeInBits - Return the offset in bits between successive
+  /// objects of the specified type, including alignment padding; always a
+  /// multiple of 8.  This is the amount that alloca reserves for this type.
+  /// For example, returns 96 or 128 for x86_fp80, depending on alignment.
+  uint64_t getTypeAllocSizeInBits(Type *Ty) const {
+    return 8*getTypeAllocSize(Ty);
+  }
+
+  /// getABITypeAlignment - Return the minimum ABI-required alignment for the
+  /// specified type.
+  unsigned getABITypeAlignment(Type *Ty) const;
+
+  /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+  /// an integer type of the specified bitwidth.
+  unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
+
+  /// getCallFrameTypeAlignment - Return the minimum ABI-required alignment
+  /// for the specified type when it is part of a call frame.
+  unsigned getCallFrameTypeAlignment(Type *Ty) const;
+
+  /// getPrefTypeAlignment - Return the preferred stack/global alignment for
+  /// the specified type.  This is always at least as good as the ABI alignment.
+  unsigned getPrefTypeAlignment(Type *Ty) const;
+
+  /// getPreferredTypeAlignmentShift - Return the preferred alignment for the
+  /// specified type, returned as log2 of the value (a shift amount).
+  unsigned getPreferredTypeAlignmentShift(Type *Ty) const;
+
+  /// getIntPtrType - Return an integer type with size at least as big as that
+  /// of a pointer in the given address space.
+  IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const;
+
+  /// getIntPtrType - Return an integer (vector of integer) type with size at
+  /// least as big as that of a pointer of the given pointer (vector of pointer)
+  /// type.
+  Type *getIntPtrType(Type *) const;
+
+  /// getSmallestLegalIntType - Return the smallest integer type with size at
+  /// least as big as Width bits.
+  Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const;
+
+  /// getIndexedOffset - return the offset from the beginning of the type for
+  /// the specified indices.  This is used to implement getelementptr.
+  uint64_t getIndexedOffset(Type *Ty, ArrayRef<Value *> Indices) const;
+
+  /// getStructLayout - Return a StructLayout object, indicating the alignment
+  /// of the struct, its size, and the offsets of its fields.  Note that this
+  /// information is lazily cached.
+  const StructLayout *getStructLayout(StructType *Ty) const;
+
+  /// getPreferredAlignment - Return the preferred alignment of the specified
+  /// global.  This includes an explicitly requested alignment (if the global
+  /// has one).
+  unsigned getPreferredAlignment(const GlobalVariable *GV) const;
+
+  /// getPreferredAlignmentLog - Return the preferred alignment of the
+  /// specified global, returned in log form.  This includes an explicitly
+  /// requested alignment (if the global has one).
+  unsigned getPreferredAlignmentLog(const GlobalVariable *GV) const;
+
+  /// RoundUpAlignment - Round the specified value up to the next alignment
+  /// boundary specified by Alignment.  For example, 7 rounded up to an
+  /// alignment boundary of 4 is 8.  8 rounded up to the alignment boundary of 4
+  /// is 8 because it is already aligned.
+  template <typename UIntTy>
+  static UIntTy RoundUpAlignment(UIntTy Val, unsigned Alignment) {
+    assert((Alignment & (Alignment-1)) == 0 && "Alignment must be power of 2!");
+    return (Val + (Alignment-1)) & ~UIntTy(Alignment-1);
+  }
+
+  static char ID; // Pass identification, replacement for typeid
+};
+
+/// StructLayout - used to lazily calculate structure layout information for a
+/// target machine, based on the DataLayout structure.
+///
+class StructLayout {
+  uint64_t StructSize;
+  unsigned StructAlignment;
+  unsigned NumElements;
+  uint64_t MemberOffsets[1];  // variable sized array!
+public:
+
+  uint64_t getSizeInBytes() const {
+    return StructSize;
+  }
+
+  uint64_t getSizeInBits() const {
+    return 8*StructSize;
+  }
+
+  unsigned getAlignment() const {
+    return StructAlignment;
+  }
+
+  /// getElementContainingOffset - Given a valid byte offset into the structure,
+  /// return the structure index that contains it.
+  ///
+  unsigned getElementContainingOffset(uint64_t Offset) const;
+
+  uint64_t getElementOffset(unsigned Idx) const {
+    assert(Idx < NumElements && "Invalid element idx!");
+    return MemberOffsets[Idx];
+  }
+
+  uint64_t getElementOffsetInBits(unsigned Idx) const {
+    return getElementOffset(Idx)*8;
+  }
+
+private:
+  friend class DataLayout;   // Only DataLayout can create this class
+  StructLayout(StructType *ST, const DataLayout &TD);
+};
+
+
+// The implementation of this method is provided inline as it is particularly
+// well suited to constant folding when called on a specific Type subclass.
+inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  case Type::LabelTyID:
+    return getPointerSizeInBits(0);
+  case Type::PointerTyID:
+    return getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace());
+  case Type::ArrayTyID: {
+    ArrayType *ATy = cast<ArrayType>(Ty);
+    return ATy->getNumElements() *
+           getTypeAllocSizeInBits(ATy->getElementType());
+  }
+  case Type::StructTyID:
+    // Get the layout annotation... which is lazily created on demand.
+    return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::IntegerTyID:
+    return cast<IntegerType>(Ty)->getBitWidth();
+  case Type::HalfTyID:
+    return 16;
+  case Type::FloatTyID:
+    return 32;
+  case Type::DoubleTyID:
+  case Type::X86_MMXTyID:
+    return 64;
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+    return 128;
+    // In memory objects this is always aligned to a higher boundary, but
+  // only 80 bits contain information.
+  case Type::X86_FP80TyID:
+    return 80;
+  case Type::VectorTyID: {
+    VectorType *VTy = cast<VectorType>(Ty);
+    return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType());
+  }
+  default:
+    llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
+  }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
new file mode 100644
index 000000000000..6c00f596badc
--- /dev/null
+++ b/include/llvm/IR/DerivedTypes.h
@@ -0,0 +1,455 @@
+//===-- llvm/DerivedTypes.h - Classes for handling data types ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of classes that represent "derived
+// types".  These are things like "arrays of x" or "structure of x, y, z" or
+// "function returning x taking (y,z) as parameters", etc...
+//
+// The implementations of these classes live in the Type.cpp file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_DERIVEDTYPES_H
+#define LLVM_IR_DERIVEDTYPES_H
+
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class Value;
+class APInt;
+class LLVMContext;
+template<typename T> class ArrayRef;
+class StringRef;
+
+/// Class to represent integer types. Note that this class is also used to
+/// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
+/// Int64Ty.
+/// @brief Integer representation type
+class IntegerType : public Type {
+  friend class LLVMContextImpl;
+  
+protected:
+  explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
+    setSubclassData(NumBits);
+  }
+public:
+  /// This enum is just used to hold constants we need for IntegerType.
+  enum {
+    MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
+    MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
+      ///< Note that bit width is stored in the Type classes SubclassData field
+      ///< which has 23 bits. This yields a maximum bit width of 8,388,607 bits.
+  };
+
+  /// This static method is the primary way of constructing an IntegerType.
+  /// If an IntegerType with the same NumBits value was previously instantiated,
+  /// that instance will be returned. Otherwise a new one will be created. Only
+  /// one instance with a given NumBits value is ever created.
+  /// @brief Get or create an IntegerType instance.
+  static IntegerType *get(LLVMContext &C, unsigned NumBits);
+
+  /// @brief Get the number of bits in this IntegerType
+  unsigned getBitWidth() const { return getSubclassData(); }
+
+  /// getBitMask - Return a bitmask with ones set for all of the bits
+  /// that can be set by an unsigned version of this type.  This is 0xFF for
+  /// i8, 0xFFFF for i16, etc.
+  uint64_t getBitMask() const {
+    return ~uint64_t(0UL) >> (64-getBitWidth());
+  }
+
+  /// getSignBit - Return a uint64_t with just the most significant bit set (the
+  /// sign bit, if the value is treated as a signed number).
+  uint64_t getSignBit() const {
+    return 1ULL << (getBitWidth()-1);
+  }
+
+  /// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc.
+  /// @returns a bit mask with ones set for all the bits of this type.
+  /// @brief Get a bit mask for this type.
+  APInt getMask() const;
+
+  /// This method determines if the width of this IntegerType is a power-of-2
+  /// in terms of 8 bit bytes.
+  /// @returns true if this is a power-of-2 byte width.
+  /// @brief Is this a power-of-2 byte-width IntegerType ?
+  bool isPowerOf2ByteWidth() const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == IntegerTyID;
+  }
+};
+
+
+/// FunctionType - Class to represent function types
+///
+class FunctionType : public Type {
+  FunctionType(const FunctionType &) LLVM_DELETED_FUNCTION;
+  const FunctionType &operator=(const FunctionType &) LLVM_DELETED_FUNCTION;
+  FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs);
+
+public:
+  /// FunctionType::get - This static method is the primary way of constructing
+  /// a FunctionType.
+  ///
+  static FunctionType *get(Type *Result,
+                           ArrayRef<Type*> Params, bool isVarArg);
+
+  /// FunctionType::get - Create a FunctionType taking no parameters.
+  ///
+  static FunctionType *get(Type *Result, bool isVarArg);
+  
+  /// isValidReturnType - Return true if the specified type is valid as a return
+  /// type.
+  static bool isValidReturnType(Type *RetTy);
+
+  /// isValidArgumentType - Return true if the specified type is valid as an
+  /// argument type.
+  static bool isValidArgumentType(Type *ArgTy);
+
+  bool isVarArg() const { return getSubclassData(); }
+  Type *getReturnType() const { return ContainedTys[0]; }
+
+  typedef Type::subtype_iterator param_iterator;
+  param_iterator param_begin() const { return ContainedTys + 1; }
+  param_iterator param_end() const { return &ContainedTys[NumContainedTys]; }
+
+  /// Parameter type accessors.
+  Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }
+
+  /// getNumParams - Return the number of fixed parameters this function type
+  /// requires.  This does not consider varargs.
+  ///
+  unsigned getNumParams() const { return NumContainedTys - 1; }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == FunctionTyID;
+  }
+};
+
+
+/// CompositeType - Common super class of ArrayType, StructType, PointerType
+/// and VectorType.
+class CompositeType : public Type {
+protected:
+  explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) { }
+public:
+
+  /// getTypeAtIndex - Given an index value into the type, return the type of
+  /// the element.
+  ///
+  Type *getTypeAtIndex(const Value *V);
+  Type *getTypeAtIndex(unsigned Idx);
+  bool indexValid(const Value *V) const;
+  bool indexValid(unsigned Idx) const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == ArrayTyID ||
+           T->getTypeID() == StructTyID ||
+           T->getTypeID() == PointerTyID ||
+           T->getTypeID() == VectorTyID;
+  }
+};
+
+
+/// StructType - Class to represent struct types.  There are two different kinds
+/// of struct types: Literal structs and Identified structs.
+///
+/// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must
+/// always have a body when created.  You can get one of these by using one of
+/// the StructType::get() forms.
+///  
+/// Identified structs (e.g. %foo or %42) may optionally have a name and are not
+/// uniqued.  The names for identified structs are managed at the LLVMContext
+/// level, so there can only be a single identified struct with a given name in
+/// a particular LLVMContext.  Identified structs may also optionally be opaque
+/// (have no body specified).  You get one of these by using one of the
+/// StructType::create() forms.
+///
+/// Independent of what kind of struct you have, the body of a struct type are
+/// laid out in memory consequtively with the elements directly one after the
+/// other (if the struct is packed) or (if not packed) with padding between the
+/// elements as defined by DataLayout (which is required to match what the code
+/// generator for a target expects).
+///
+class StructType : public CompositeType {
+  StructType(const StructType &) LLVM_DELETED_FUNCTION;
+  const StructType &operator=(const StructType &) LLVM_DELETED_FUNCTION;
+  StructType(LLVMContext &C)
+    : CompositeType(C, StructTyID), SymbolTableEntry(0) {}
+  enum {
+    /// This is the contents of the SubClassData field.
+    SCDB_HasBody = 1,
+    SCDB_Packed = 2,
+    SCDB_IsLiteral = 4,
+    SCDB_IsSized = 8
+  };
+
+  /// SymbolTableEntry - For a named struct that actually has a name, this is a
+  /// pointer to the symbol table entry (maintained by LLVMContext) for the
+  /// struct.  This is null if the type is an literal struct or if it is
+  /// a identified type that has an empty name.
+  /// 
+  void *SymbolTableEntry;
+public:
+  ~StructType() {
+    delete [] ContainedTys; // Delete the body.
+  }
+
+  /// StructType::create - This creates an identified struct.
+  static StructType *create(LLVMContext &Context, StringRef Name);
+  static StructType *create(LLVMContext &Context);
+  
+  static StructType *create(ArrayRef<Type*> Elements,
+                            StringRef Name,
+                            bool isPacked = false);
+  static StructType *create(ArrayRef<Type*> Elements);
+  static StructType *create(LLVMContext &Context,
+                            ArrayRef<Type*> Elements,
+                            StringRef Name,
+                            bool isPacked = false);
+  static StructType *create(LLVMContext &Context, ArrayRef<Type*> Elements);
+  static StructType *create(StringRef Name, Type *elt1, ...) END_WITH_NULL;
+
+  /// StructType::get - This static method is the primary way to create a
+  /// literal StructType.
+  static StructType *get(LLVMContext &Context, ArrayRef<Type*> Elements,
+                         bool isPacked = false);
+
+  /// StructType::get - Create an empty structure type.
+  ///
+  static StructType *get(LLVMContext &Context, bool isPacked = false);
+  
+  /// StructType::get - This static method is a convenience method for creating
+  /// structure types by specifying the elements as arguments.  Note that this
+  /// method always returns a non-packed struct, and requires at least one
+  /// element type.
+  static StructType *get(Type *elt1, ...) END_WITH_NULL;
+
+  bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
+  
+  /// isLiteral - Return true if this type is uniqued by structural
+  /// equivalence, false if it is a struct definition.
+  bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; }
+  
+  /// isOpaque - Return true if this is a type with an identity that has no body
+  /// specified yet.  These prints as 'opaque' in .ll files.
+  bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
+
+  /// isSized - Return true if this is a sized type.
+  bool isSized() const;
+  
+  /// hasName - Return true if this is a named struct that has a non-empty name.
+  bool hasName() const { return SymbolTableEntry != 0; }
+  
+  /// getName - Return the name for this struct type if it has an identity.
+  /// This may return an empty string for an unnamed struct type.  Do not call
+  /// this on an literal type.
+  StringRef getName() const;
+  
+  /// setName - Change the name of this type to the specified name, or to a name
+  /// with a suffix if there is a collision.  Do not call this on an literal
+  /// type.
+  void setName(StringRef Name);
+
+  /// setBody - Specify a body for an opaque identified type.
+  void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
+  void setBody(Type *elt1, ...) END_WITH_NULL;
+  
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(Type *ElemTy);
+  
+
+  // Iterator access to the elements.
+  typedef Type::subtype_iterator element_iterator;
+  element_iterator element_begin() const { return ContainedTys; }
+  element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
+
+  /// isLayoutIdentical - Return true if this is layout identical to the
+  /// specified struct.
+  bool isLayoutIdentical(StructType *Other) const;  
+  
+  /// Random access to the elements
+  unsigned getNumElements() const { return NumContainedTys; }
+  Type *getElementType(unsigned N) const {
+    assert(N < NumContainedTys && "Element number out of range!");
+    return ContainedTys[N];
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == StructTyID;
+  }
+};
+
+/// SequentialType - This is the superclass of the array, pointer and vector
+/// type classes.  All of these represent "arrays" in memory.  The array type
+/// represents a specifically sized array, pointer types are unsized/unknown
+/// size arrays, vector types represent specifically sized arrays that
+/// allow for use of SIMD instructions.  SequentialType holds the common
+/// features of all, which stem from the fact that all three lay their
+/// components out in memory identically.
+///
+class SequentialType : public CompositeType {
+  Type *ContainedType;               ///< Storage for the single contained type.
+  SequentialType(const SequentialType &) LLVM_DELETED_FUNCTION;
+  const SequentialType &operator=(const SequentialType &) LLVM_DELETED_FUNCTION;
+
+protected:
+  SequentialType(TypeID TID, Type *ElType)
+    : CompositeType(ElType->getContext(), TID), ContainedType(ElType) {
+    ContainedTys = &ContainedType;
+    NumContainedTys = 1;
+  }
+
+public:
+  Type *getElementType() const { return ContainedTys[0]; }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == ArrayTyID ||
+           T->getTypeID() == PointerTyID ||
+           T->getTypeID() == VectorTyID;
+  }
+};
+
+
+/// ArrayType - Class to represent array types.
+///
+class ArrayType : public SequentialType {
+  uint64_t NumElements;
+
+  ArrayType(const ArrayType &) LLVM_DELETED_FUNCTION;
+  const ArrayType &operator=(const ArrayType &) LLVM_DELETED_FUNCTION;
+  ArrayType(Type *ElType, uint64_t NumEl);
+public:
+  /// ArrayType::get - This static method is the primary way to construct an
+  /// ArrayType
+  ///
+  static ArrayType *get(Type *ElementType, uint64_t NumElements);
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(Type *ElemTy);
+
+  uint64_t getNumElements() const { return NumElements; }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == ArrayTyID;
+  }
+};
+
+/// VectorType - Class to represent vector types.
+///
+class VectorType : public SequentialType {
+  unsigned NumElements;
+
+  VectorType(const VectorType &) LLVM_DELETED_FUNCTION;
+  const VectorType &operator=(const VectorType &) LLVM_DELETED_FUNCTION;
+  VectorType(Type *ElType, unsigned NumEl);
+public:
+  /// VectorType::get - This static method is the primary way to construct an
+  /// VectorType.
+  ///
+  static VectorType *get(Type *ElementType, unsigned NumElements);
+
+  /// VectorType::getInteger - This static method gets a VectorType with the
+  /// same number of elements as the input type, and the element type is an
+  /// integer type of the same width as the input element type.
+  ///
+  static VectorType *getInteger(VectorType *VTy) {
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    assert(EltBits && "Element size must be of a non-zero size");
+    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
+    return VectorType::get(EltTy, VTy->getNumElements());
+  }
+
+  /// VectorType::getExtendedElementVectorType - This static method is like
+  /// getInteger except that the element types are twice as wide as the
+  /// elements in the input type.
+  ///
+  static VectorType *getExtendedElementVectorType(VectorType *VTy) {
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
+    return VectorType::get(EltTy, VTy->getNumElements());
+  }
+
+  /// VectorType::getTruncatedElementVectorType - This static method is like
+  /// getInteger except that the element types are half as wide as the
+  /// elements in the input type.
+  ///
+  static VectorType *getTruncatedElementVectorType(VectorType *VTy) {
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    assert((EltBits & 1) == 0 &&
+           "Cannot truncate vector element with odd bit-width");
+    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
+    return VectorType::get(EltTy, VTy->getNumElements());
+  }
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(Type *ElemTy);
+
+  /// @brief Return the number of elements in the Vector type.
+  unsigned getNumElements() const { return NumElements; }
+
+  /// @brief Return the number of bits in the Vector type.
+  /// Returns zero when the vector is a vector of pointers.
+  unsigned getBitWidth() const {
+    return NumElements * getElementType()->getPrimitiveSizeInBits();
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == VectorTyID;
+  }
+};
+
+
+/// PointerType - Class to represent pointers.
+///
+class PointerType : public SequentialType {
+  PointerType(const PointerType &) LLVM_DELETED_FUNCTION;
+  const PointerType &operator=(const PointerType &) LLVM_DELETED_FUNCTION;
+  explicit PointerType(Type *ElType, unsigned AddrSpace);
+public:
+  /// PointerType::get - This constructs a pointer to an object of the specified
+  /// type in a numbered address space.
+  static PointerType *get(Type *ElementType, unsigned AddressSpace);
+
+  /// PointerType::getUnqual - This constructs a pointer to an object of the
+  /// specified type in the generic address space (address space zero).
+  static PointerType *getUnqual(Type *ElementType) {
+    return PointerType::get(ElementType, 0);
+  }
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(Type *ElemTy);
+
+  /// @brief Return the address space of the Pointer type.
+  inline unsigned getAddressSpace() const { return getSubclassData(); }
+
+  /// Implement support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == PointerTyID;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
new file mode 100644
index 000000000000..f97929f65854
--- /dev/null
+++ b/include/llvm/IR/Function.h
@@ -0,0 +1,470 @@
+//===-- llvm/Function.h - Class to represent a single function --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Function class, which represents a
+// single function/procedure in LLVM.
+//
+// A function basically consists of a list of basic blocks, a list of arguments,
+// and a symbol table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_FUNCTION_H
+#define LLVM_IR_FUNCTION_H
+
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class FunctionType;
+class LLVMContext;
+
+// Traits for intrusive list of basic blocks...
+template<> struct ilist_traits<BasicBlock>
+  : public SymbolTableListTraits<BasicBlock, Function> {
+
+  // createSentinel is used to get hold of the node that marks the end of the
+  // list... (same trick used here as in ilist_traits<Instruction>)
+  BasicBlock *createSentinel() const {
+    return static_cast<BasicBlock*>(&Sentinel);
+  }
+  static void destroySentinel(BasicBlock*) {}
+
+  BasicBlock *provideInitialHead() const { return createSentinel(); }
+  BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
+  static void noteHead(BasicBlock*, BasicBlock*) {}
+
+  static ValueSymbolTable *getSymTab(Function *ItemParent);
+private:
+  mutable ilist_half_node<BasicBlock> Sentinel;
+};
+
+template<> struct ilist_traits<Argument>
+  : public SymbolTableListTraits<Argument, Function> {
+
+  Argument *createSentinel() const {
+    return static_cast<Argument*>(&Sentinel);
+  }
+  static void destroySentinel(Argument*) {}
+
+  Argument *provideInitialHead() const { return createSentinel(); }
+  Argument *ensureHead(Argument*) const { return createSentinel(); }
+  static void noteHead(Argument*, Argument*) {}
+
+  static ValueSymbolTable *getSymTab(Function *ItemParent);
+private:
+  mutable ilist_half_node<Argument> Sentinel;
+};
+
+class Function : public GlobalValue,
+                 public ilist_node<Function> {
+public:
+  typedef iplist<Argument> ArgumentListType;
+  typedef iplist<BasicBlock> BasicBlockListType;
+
+  // BasicBlock iterators...
+  typedef BasicBlockListType::iterator iterator;
+  typedef BasicBlockListType::const_iterator const_iterator;
+
+  typedef ArgumentListType::iterator arg_iterator;
+  typedef ArgumentListType::const_iterator const_arg_iterator;
+
+private:
+  // Important things that make up a function!
+  BasicBlockListType  BasicBlocks;        ///< The basic blocks
+  mutable ArgumentListType ArgumentList;  ///< The formal arguments
+  ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
+  AttributeSet AttributeSets;             ///< Parameter attributes
+
+  // HasLazyArguments is stored in Value::SubclassData.
+  /*bool HasLazyArguments;*/
+
+  // The Calling Convention is stored in Value::SubclassData.
+  /*CallingConv::ID CallingConvention;*/
+
+  friend class SymbolTableListTraits<Function, Module>;
+
+  void setParent(Module *parent);
+
+  /// hasLazyArguments/CheckLazyArguments - The argument list of a function is
+  /// built on demand, so that the list isn't allocated until the first client
+  /// needs it.  The hasLazyArguments predicate returns true if the arg list
+  /// hasn't been set up yet.
+  bool hasLazyArguments() const {
+    return getSubclassDataFromValue() & 1;
+  }
+  void CheckLazyArguments() const {
+    if (hasLazyArguments())
+      BuildLazyArguments();
+  }
+  void BuildLazyArguments() const;
+
+  Function(const Function&) LLVM_DELETED_FUNCTION;
+  void operator=(const Function&) LLVM_DELETED_FUNCTION;
+
+  /// Do the actual lookup of an intrinsic ID when the query could not be
+  /// answered from the cache.
+  unsigned lookupIntrinsicID() const LLVM_READONLY;
+
+  /// Function ctor - If the (optional) Module argument is specified, the
+  /// function is automatically inserted into the end of the function list for
+  /// the module.
+  ///
+  Function(FunctionType *Ty, LinkageTypes Linkage,
+           const Twine &N = "", Module *M = 0);
+
+public:
+  static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
+                          const Twine &N = "", Module *M = 0) {
+    return new(0) Function(Ty, Linkage, N, M);
+  }
+
+  ~Function();
+
+  Type *getReturnType() const;           // Return the type of the ret val
+  FunctionType *getFunctionType() const; // Return the FunctionType for me
+
+  /// getContext - Return a pointer to the LLVMContext associated with this
+  /// function, or NULL if this function is not bound to a context yet.
+  LLVMContext &getContext() const;
+
+  /// isVarArg - Return true if this function takes a variable number of
+  /// arguments.
+  bool isVarArg() const;
+
+  /// getIntrinsicID - This method returns the ID number of the specified
+  /// function, or Intrinsic::not_intrinsic if the function is not an
+  /// intrinsic, or if the pointer is null.  This value is always defined to be
+  /// zero to allow easy checking for whether a function is intrinsic or not.
+  /// The particular intrinsic functions which correspond to this value are
+  /// defined in llvm/Intrinsics.h.  Results are cached in the LLVM context,
+  /// subsequent requests for the same ID return results much faster from the
+  /// cache.
+  ///
+  unsigned getIntrinsicID() const LLVM_READONLY;
+  bool isIntrinsic() const { return getName().startswith("llvm."); }
+
+  /// getCallingConv()/setCallingConv(CC) - These method get and set the
+  /// calling convention of this function.  The enum values for the known
+  /// calling conventions are defined in CallingConv.h.
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromValue() >> 1);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    setValueSubclassData((getSubclassDataFromValue() & 1) |
+                         (static_cast<unsigned>(CC) << 1));
+  }
+
+  /// getAttributes - Return the attribute list for this Function.
+  ///
+  AttributeSet getAttributes() const { return AttributeSets; }
+
+  /// setAttributes - Set the attribute list for this Function.
+  ///
+  void setAttributes(AttributeSet attrs) { AttributeSets = attrs; }
+
+  /// addFnAttr - Add function attributes to this function.
+  ///
+  void addFnAttr(Attribute::AttrKind N) {
+    setAttributes(AttributeSets.addAttribute(getContext(),
+                                             AttributeSet::FunctionIndex, N));
+  }
+
+  /// addFnAttr - Add function attributes to this function.
+  ///
+  void addFnAttr(StringRef Kind) {
+    setAttributes(
+      AttributeSets.addAttribute(getContext(),
+                                 AttributeSet::FunctionIndex, Kind));
+  }
+
+  /// \brief Return true if the function has the attribute.
+  bool hasFnAttribute(Attribute::AttrKind Kind) const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Kind);
+  }
+  bool hasFnAttribute(StringRef Kind) const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Kind);
+  }
+
+  /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
+  ///                             to use during code generation.
+  bool hasGC() const;
+  const char *getGC() const;
+  void setGC(const char *Str);
+  void clearGC();
+
+  /// @brief adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attribute::AttrKind attr);
+
+  /// @brief adds the attributes to the list of attributes.
+  void addAttributes(unsigned i, AttributeSet attrs);
+
+  /// @brief removes the attributes from the list of attributes.
+  void removeAttributes(unsigned i, AttributeSet attr);
+
+  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned i) const {
+    return AttributeSets.getParamAlignment(i);
+  }
+
+  /// @brief Determine if the function does not access memory.
+  bool doesNotAccessMemory() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::ReadNone);
+  }
+  void setDoesNotAccessMemory() {
+    addFnAttr(Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the function does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() ||
+      AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                 Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory() {
+    addFnAttr(Attribute::ReadOnly);
+  }
+
+  /// @brief Determine if the function cannot return.
+  bool doesNotReturn() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::NoReturn);
+  }
+  void setDoesNotReturn() {
+    addFnAttr(Attribute::NoReturn);
+  }
+
+  /// @brief Determine if the function cannot unwind.
+  bool doesNotThrow() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::NoUnwind);
+  }
+  void setDoesNotThrow() {
+    addFnAttr(Attribute::NoUnwind);
+  }
+
+  /// @brief Determine if the call cannot be duplicated.
+  bool cannotDuplicate() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::NoDuplicate);
+  }
+  void setCannotDuplicate() {
+    addFnAttr(Attribute::NoDuplicate);
+  }
+
+  /// @brief True if the ABI mandates (or the user requested) that this
+  /// function be in a unwind table.
+  bool hasUWTable() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::UWTable);
+  }
+  void setHasUWTable() {
+    addFnAttr(Attribute::UWTable);
+  }
+
+  /// @brief True if this function needs an unwind table.
+  bool needsUnwindTableEntry() const {
+    return hasUWTable() || !doesNotThrow();
+  }
+
+  /// @brief Determine if the function returns a structure through first
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    return AttributeSets.hasAttribute(1, Attribute::StructRet);
+  }
+
+  /// @brief Determine if the parameter does not alias other parameters.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotAlias(unsigned n) const {
+    return AttributeSets.hasAttribute(n, Attribute::NoAlias);
+  }
+  void setDoesNotAlias(unsigned n) {
+    addAttribute(n, Attribute::NoAlias);
+  }
+
+  /// @brief Determine if the parameter can be captured.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotCapture(unsigned n) const {
+    return AttributeSets.hasAttribute(n, Attribute::NoCapture);
+  }
+  void setDoesNotCapture(unsigned n) {
+    addAttribute(n, Attribute::NoCapture);
+  }
+
+  /// copyAttributesFrom - copy all additional attributes (those not needed to
+  /// create a Function) from the Function Src to this one.
+  void copyAttributesFrom(const GlobalValue *Src);
+
+  /// deleteBody - This method deletes the body of the function, and converts
+  /// the linkage to external.
+  ///
+  void deleteBody() {
+    dropAllReferences();
+    setLinkage(ExternalLinkage);
+  }
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  ///
+  virtual void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  ///
+  virtual void eraseFromParent();
+
+
+  /// Get the underlying elements of the Function... the basic block list is
+  /// empty for external functions.
+  ///
+  const ArgumentListType &getArgumentList() const {
+    CheckLazyArguments();
+    return ArgumentList;
+  }
+  ArgumentListType &getArgumentList() {
+    CheckLazyArguments();
+    return ArgumentList;
+  }
+  static iplist<Argument> Function::*getSublistAccess(Argument*) {
+    return &Function::ArgumentList;
+  }
+
+  const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
+        BasicBlockListType &getBasicBlockList()       { return BasicBlocks; }
+  static iplist<BasicBlock> Function::*getSublistAccess(BasicBlock*) {
+    return &Function::BasicBlocks;
+  }
+
+  const BasicBlock       &getEntryBlock() const   { return front(); }
+        BasicBlock       &getEntryBlock()         { return front(); }
+
+  //===--------------------------------------------------------------------===//
+  // Symbol Table Accessing functions...
+
+  /// getSymbolTable() - Return the symbol table...
+  ///
+  inline       ValueSymbolTable &getValueSymbolTable()       { return *SymTab; }
+  inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; }
+
+
+  //===--------------------------------------------------------------------===//
+  // BasicBlock iterator forwarding functions
+  //
+  iterator                begin()       { return BasicBlocks.begin(); }
+  const_iterator          begin() const { return BasicBlocks.begin(); }
+  iterator                end  ()       { return BasicBlocks.end();   }
+  const_iterator          end  () const { return BasicBlocks.end();   }
+
+  size_t                   size() const { return BasicBlocks.size();  }
+  bool                    empty() const { return BasicBlocks.empty(); }
+  const BasicBlock       &front() const { return BasicBlocks.front(); }
+        BasicBlock       &front()       { return BasicBlocks.front(); }
+  const BasicBlock        &back() const { return BasicBlocks.back();  }
+        BasicBlock        &back()       { return BasicBlocks.back();  }
+
+  //===--------------------------------------------------------------------===//
+  // Argument iterator forwarding functions
+  //
+  arg_iterator arg_begin() {
+    CheckLazyArguments();
+    return ArgumentList.begin();
+  }
+  const_arg_iterator arg_begin() const {
+    CheckLazyArguments();
+    return ArgumentList.begin();
+  }
+  arg_iterator arg_end() {
+    CheckLazyArguments();
+    return ArgumentList.end();
+  }
+  const_arg_iterator arg_end() const {
+    CheckLazyArguments();
+    return ArgumentList.end();
+  }
+
+  size_t arg_size() const;
+  bool arg_empty() const;
+
+  /// viewCFG - This function is meant for use from the debugger.  You can just
+  /// say 'call F->viewCFG()' and a ghostview window should pop up from the
+  /// program, displaying the CFG of the current function with the code for each
+  /// basic block inside.  This depends on there being a 'dot' and 'gv' program
+  /// in your path.
+  ///
+  void viewCFG() const;
+
+  /// viewCFGOnly - This function is meant for use from the debugger.  It works
+  /// just like viewCFG, but it does not include the contents of basic blocks
+  /// into the nodes, just the label.  If you are only interested in the CFG
+  /// this can make the graph smaller.
+  ///
+  void viewCFGOnly() const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::FunctionVal;
+  }
+
+  /// dropAllReferences() - This method causes all the subinstructions to "let
+  /// go" of all references that they are maintaining.  This allows one to
+  /// 'delete' a whole module at a time, even though there may be circular
+  /// references... first all references are dropped, and all use counts go to
+  /// zero.  Then everything is deleted for real.  Note that no operations are
+  /// valid on an object that has "dropped all references", except operator
+  /// delete.
+  ///
+  /// Since no other object in the module can have references into the body of a
+  /// function, dropping all references deletes the entire body of the function,
+  /// including any contained basic blocks.
+  ///
+  void dropAllReferences();
+
+  /// hasAddressTaken - returns true if there are any uses of this function
+  /// other than direct calls or invokes to it, or blockaddress expressions.
+  /// Optionally passes back an offending user for diagnostic purposes.
+  ///
+  bool hasAddressTaken(const User** = 0) const;
+
+  /// isDefTriviallyDead - Return true if it is trivially safe to remove
+  /// this function definition from the module (because it isn't externally
+  /// visible, does not have its address taken, and has no callers).  To make
+  /// this more accurate, call removeDeadConstantUsers first.
+  bool isDefTriviallyDead() const;
+
+  /// callsFunctionThatReturnsTwice - Return true if the function has a call to
+  /// setjmp or other function that gcc recognizes as "returning twice".
+  bool callsFunctionThatReturnsTwice() const;
+
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+inline ValueSymbolTable *
+ilist_traits<BasicBlock>::getSymTab(Function *F) {
+  return F ? &F->getValueSymbolTable() : 0;
+}
+
+inline ValueSymbolTable *
+ilist_traits<Argument>::getSymTab(Function *F) {
+  return F ? &F->getValueSymbolTable() : 0;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
new file mode 100644
index 000000000000..883814a32371
--- /dev/null
+++ b/include/llvm/IR/GlobalAlias.h
@@ -0,0 +1,93 @@
+//===-------- llvm/GlobalAlias.h - GlobalAlias class ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the GlobalAlias class, which
+// represents a single function or variable alias in the IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_GLOBALALIAS_H
+#define LLVM_IR_GLOBALALIAS_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/OperandTraits.h"
+
+namespace llvm {
+
+class Module;
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
+  friend class SymbolTableListTraits<GlobalAlias, Module>;
+  void operator=(const GlobalAlias &) LLVM_DELETED_FUNCTION;
+  GlobalAlias(const GlobalAlias &) LLVM_DELETED_FUNCTION;
+
+  void setParent(Module *parent);
+
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  /// GlobalAlias ctor - If a parent module is specified, the alias is
+  /// automatically inserted into the end of the specified module's alias list.
+  GlobalAlias(Type *Ty, LinkageTypes Linkage, const Twine &Name = "",
+              Constant* Aliasee = 0, Module *Parent = 0);
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  ///
+  virtual void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  ///
+  virtual void eraseFromParent();
+
+  /// set/getAliasee - These methods retrive and set alias target.
+  void setAliasee(Constant *GV);
+  const Constant *getAliasee() const {
+    return getOperand(0);
+  }
+  Constant *getAliasee() {
+    return getOperand(0);
+  }
+  /// getAliasedGlobal() - Aliasee can be either global or bitcast of
+  /// global. This method retrives the global for both aliasee flavours.
+  const GlobalValue *getAliasedGlobal() const;
+
+  /// resolveAliasedGlobal() - This method tries to ultimately resolve the alias
+  /// by going through the aliasing chain and trying to find the very last
+  /// global. Returns NULL if a cycle was found. If stopOnWeak is false, then
+  /// the whole chain aliasing chain is traversed, otherwise - only strong
+  /// aliases.
+  const GlobalValue *resolveAliasedGlobal(bool stopOnWeak = true) const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::GlobalAliasVal;
+  }
+};
+
+template <>
+struct OperandTraits<GlobalAlias> :
+  public FixedNumOperandTraits<GlobalAlias, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
new file mode 100644
index 000000000000..f398bc1b87ab
--- /dev/null
+++ b/include/llvm/IR/GlobalValue.h
@@ -0,0 +1,299 @@
+//===-- llvm/GlobalValue.h - Class to represent a global value --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a common base class of all globally definable objects.  As such,
+// it is subclassed by GlobalVariable, GlobalAlias and by Function.  This is
+// used because you can do certain things with these global objects that you
+// can't do to anything else.  For example, use the address of one as a
+// constant.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_GLOBALVALUE_H
+#define LLVM_IR_GLOBALVALUE_H
+
+#include "llvm/IR/Constant.h"
+
+namespace llvm {
+
+class PointerType;
+class Module;
+
+class GlobalValue : public Constant {
+  GlobalValue(const GlobalValue &) LLVM_DELETED_FUNCTION;
+public:
+  /// @brief An enumeration for the kinds of linkage for global values.
+  enum LinkageTypes {
+    ExternalLinkage = 0,///< Externally visible function
+    AvailableExternallyLinkage, ///< Available for inspection, not emission.
+    LinkOnceAnyLinkage, ///< Keep one copy of function when linking (inline)
+    LinkOnceODRLinkage, ///< Same, but only replaced by something equivalent.
+    LinkOnceODRAutoHideLinkage, ///< Like LinkOnceODRLinkage but addr not taken.
+    WeakAnyLinkage,     ///< Keep one copy of named function when linking (weak)
+    WeakODRLinkage,     ///< Same, but only replaced by something equivalent.
+    AppendingLinkage,   ///< Special purpose, only applies to global arrays
+    InternalLinkage,    ///< Rename collisions when linking (static functions).
+    PrivateLinkage,     ///< Like Internal, but omit from symbol table.
+    LinkerPrivateLinkage, ///< Like Private, but linker removes.
+    LinkerPrivateWeakLinkage, ///< Like LinkerPrivate, but weak.
+    DLLImportLinkage,   ///< Function to be imported from DLL
+    DLLExportLinkage,   ///< Function to be accessible from DLL.
+    ExternalWeakLinkage,///< ExternalWeak linkage description.
+    CommonLinkage       ///< Tentative definitions.
+  };
+
+  /// @brief An enumeration for the kinds of visibility of global values.
+  enum VisibilityTypes {
+    DefaultVisibility = 0,  ///< The GV is visible
+    HiddenVisibility,       ///< The GV is hidden
+    ProtectedVisibility     ///< The GV is protected
+  };
+
+protected:
+  GlobalValue(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps,
+              LinkageTypes linkage, const Twine &Name)
+    : Constant(ty, vty, Ops, NumOps), Linkage(linkage),
+      Visibility(DefaultVisibility), Alignment(0), UnnamedAddr(0), Parent(0) {
+    setName(Name);
+  }
+
+  // Note: VC++ treats enums as signed, so an extra bit is required to prevent
+  // Linkage and Visibility from turning into negative values.
+  LinkageTypes Linkage : 5;   // The linkage of this global
+  unsigned Visibility : 2;    // The visibility style of this global
+  unsigned Alignment : 16;    // Alignment of this symbol, must be power of two
+  unsigned UnnamedAddr : 1;   // This value's address is not significant
+  Module *Parent;             // The containing module.
+  std::string Section;        // Section to emit this into, empty mean default
+public:
+  ~GlobalValue() {
+    removeDeadConstantUsers();   // remove any dead constants using this.
+  }
+
+  unsigned getAlignment() const {
+    return (1u << Alignment) >> 1;
+  }
+  void setAlignment(unsigned Align);
+
+  bool hasUnnamedAddr() const { return UnnamedAddr; }
+  void setUnnamedAddr(bool Val) { UnnamedAddr = Val; }
+
+  VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); }
+  bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; }
+  bool hasHiddenVisibility() const { return Visibility == HiddenVisibility; }
+  bool hasProtectedVisibility() const {
+    return Visibility == ProtectedVisibility;
+  }
+  void setVisibility(VisibilityTypes V) { Visibility = V; }
+  
+  bool hasSection() const { return !Section.empty(); }
+  const std::string &getSection() const { return Section; }
+  void setSection(StringRef S) { Section = S; }
+  
+  /// If the usage is empty (except transitively dead constants), then this
+  /// global value can be safely deleted since the destructor will
+  /// delete the dead constants as well.
+  /// @brief Determine if the usage of this global value is empty except
+  /// for transitively dead constants.
+  bool use_empty_except_constants();
+
+  /// getType - Global values are always pointers.
+  inline PointerType *getType() const {
+    return reinterpret_cast<PointerType*>(User::getType());
+  }
+
+  static LinkageTypes getLinkOnceLinkage(bool ODR) {
+    return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
+  }
+  static LinkageTypes getWeakLinkage(bool ODR) {
+    return ODR ? WeakODRLinkage : WeakAnyLinkage;
+  }
+
+  static bool isExternalLinkage(LinkageTypes Linkage) {
+    return Linkage == ExternalLinkage;
+  }
+  static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
+    return Linkage == AvailableExternallyLinkage;
+  }
+  static bool isLinkOnceLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkOnceAnyLinkage ||
+           Linkage == LinkOnceODRLinkage ||
+           Linkage == LinkOnceODRAutoHideLinkage;
+  }
+  static bool isLinkOnceODRAutoHideLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkOnceODRAutoHideLinkage;
+  }
+  static bool isWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage;
+  }
+  static bool isAppendingLinkage(LinkageTypes Linkage) {
+    return Linkage == AppendingLinkage;
+  }
+  static bool isInternalLinkage(LinkageTypes Linkage) {
+    return Linkage == InternalLinkage;
+  }
+  static bool isPrivateLinkage(LinkageTypes Linkage) {
+    return Linkage == PrivateLinkage;
+  }
+  static bool isLinkerPrivateLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkerPrivateLinkage;
+  }
+  static bool isLinkerPrivateWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkerPrivateWeakLinkage;
+  }
+  static bool isLocalLinkage(LinkageTypes Linkage) {
+    return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) ||
+      isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage);
+  }
+  static bool isDLLImportLinkage(LinkageTypes Linkage) {
+    return Linkage == DLLImportLinkage;
+  }
+  static bool isDLLExportLinkage(LinkageTypes Linkage) {
+    return Linkage == DLLExportLinkage;
+  }
+  static bool isExternalWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == ExternalWeakLinkage;
+  }
+  static bool isCommonLinkage(LinkageTypes Linkage) {
+    return Linkage == CommonLinkage;
+  }
+
+  /// isDiscardableIfUnused - Whether the definition of this global may be
+  /// discarded if it is not used in its compilation unit.
+  static bool isDiscardableIfUnused(LinkageTypes Linkage) {
+    return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage);
+  }
+
+  /// mayBeOverridden - Whether the definition of this global may be replaced
+  /// by something non-equivalent at link time.  For example, if a function has
+  /// weak linkage then the code defining it may be replaced by different code.
+  static bool mayBeOverridden(LinkageTypes Linkage) {
+    return Linkage == WeakAnyLinkage ||
+           Linkage == LinkOnceAnyLinkage ||
+           Linkage == CommonLinkage ||
+           Linkage == ExternalWeakLinkage ||
+           Linkage == LinkerPrivateWeakLinkage;
+  }
+
+  /// isWeakForLinker - Whether the definition of this global may be replaced at
+  /// link time.  NB: Using this method outside of the code generators is almost
+  /// always a mistake: when working at the IR level use mayBeOverridden instead
+  /// as it knows about ODR semantics.
+  static bool isWeakForLinker(LinkageTypes Linkage)  {
+    return Linkage == AvailableExternallyLinkage ||
+           Linkage == WeakAnyLinkage ||
+           Linkage == WeakODRLinkage ||
+           Linkage == LinkOnceAnyLinkage ||
+           Linkage == LinkOnceODRLinkage ||
+           Linkage == LinkOnceODRAutoHideLinkage ||
+           Linkage == CommonLinkage ||
+           Linkage == ExternalWeakLinkage ||
+           Linkage == LinkerPrivateWeakLinkage;
+  }
+
+  bool hasExternalLinkage() const { return isExternalLinkage(Linkage); }
+  bool hasAvailableExternallyLinkage() const {
+    return isAvailableExternallyLinkage(Linkage);
+  }
+  bool hasLinkOnceLinkage() const {
+    return isLinkOnceLinkage(Linkage);
+  }
+  bool hasLinkOnceODRAutoHideLinkage() const {
+    return isLinkOnceODRAutoHideLinkage(Linkage);
+  }
+  bool hasWeakLinkage() const {
+    return isWeakLinkage(Linkage);
+  }
+  bool hasAppendingLinkage() const { return isAppendingLinkage(Linkage); }
+  bool hasInternalLinkage() const { return isInternalLinkage(Linkage); }
+  bool hasPrivateLinkage() const { return isPrivateLinkage(Linkage); }
+  bool hasLinkerPrivateLinkage() const { return isLinkerPrivateLinkage(Linkage); }
+  bool hasLinkerPrivateWeakLinkage() const {
+    return isLinkerPrivateWeakLinkage(Linkage);
+  }
+  bool hasLocalLinkage() const { return isLocalLinkage(Linkage); }
+  bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); }
+  bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); }
+  bool hasExternalWeakLinkage() const { return isExternalWeakLinkage(Linkage); }
+  bool hasCommonLinkage() const { return isCommonLinkage(Linkage); }
+
+  void setLinkage(LinkageTypes LT) { Linkage = LT; }
+  LinkageTypes getLinkage() const { return Linkage; }
+
+  bool isDiscardableIfUnused() const {
+    return isDiscardableIfUnused(Linkage);
+  }
+
+  bool mayBeOverridden() const { return mayBeOverridden(Linkage); }
+
+  bool isWeakForLinker() const { return isWeakForLinker(Linkage); }
+
+  /// copyAttributesFrom - copy all additional attributes (those not needed to
+  /// create a GlobalValue) from the GlobalValue Src to this one.
+  virtual void copyAttributesFrom(const GlobalValue *Src);
+
+/// @name Materialization
+/// Materialization is used to construct functions only as they're needed. This
+/// is useful to reduce memory usage in LLVM or parsing work done by the
+/// BitcodeReader to load the Module.
+/// @{
+
+  /// isMaterializable - If this function's Module is being lazily streamed in
+  /// functions from disk or some other source, this method can be used to check
+  /// to see if the function has been read in yet or not.
+  bool isMaterializable() const;
+
+  /// isDematerializable - Returns true if this function was loaded from a
+  /// GVMaterializer that's still attached to its Module and that knows how to
+  /// dematerialize the function.
+  bool isDematerializable() const;
+
+  /// Materialize - make sure this GlobalValue is fully read.  If the module is
+  /// corrupt, this returns true and fills in the optional string with
+  /// information about the problem.  If successful, this returns false.
+  bool Materialize(std::string *ErrInfo = 0);
+
+  /// Dematerialize - If this GlobalValue is read in, and if the GVMaterializer
+  /// supports it, release the memory for the function, and set it up to be
+  /// materialized lazily.  If !isDematerializable(), this method is a noop.
+  void Dematerialize();
+
+/// @}
+
+  /// Override from Constant class.
+  virtual void destroyConstant();
+
+  /// isDeclaration - Return true if the primary definition of this global 
+  /// value is outside of the current translation unit.
+  bool isDeclaration() const;
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  virtual void removeFromParent() = 0;
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  virtual void eraseFromParent() = 0;
+
+  /// getParent - Get the module that this global value is contained inside
+  /// of...
+  inline Module *getParent() { return Parent; }
+  inline const Module *getParent() const { return Parent; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::FunctionVal ||
+           V->getValueID() == Value::GlobalVariableVal ||
+           V->getValueID() == Value::GlobalAliasVal;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
new file mode 100644
index 000000000000..bfed50786ea0
--- /dev/null
+++ b/include/llvm/IR/GlobalVariable.h
@@ -0,0 +1,210 @@
+//===-- llvm/GlobalVariable.h - GlobalVariable class ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the GlobalVariable class, which
+// represents a single global variable (or constant) in the VM.
+//
+// Global variables are constant pointers that refer to hunks of space that are
+// allocated by either the VM, or by the linker in a static compiler.  A global
+// variable may have an initial value, which is copied into the executables .data
+// area.  Global Constants are required to have initializers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_GLOBALVARIABLE_H
+#define LLVM_IR_GLOBALVARIABLE_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/OperandTraits.h"
+
+namespace llvm {
+
+class Module;
+class Constant;
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class GlobalVariable : public GlobalValue, public ilist_node<GlobalVariable> {
+  friend class SymbolTableListTraits<GlobalVariable, Module>;
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void operator=(const GlobalVariable &) LLVM_DELETED_FUNCTION;
+  GlobalVariable(const GlobalVariable &) LLVM_DELETED_FUNCTION;
+
+  void setParent(Module *parent);
+
+  bool isConstantGlobal : 1;                   // Is this a global constant?
+  unsigned threadLocalMode : 3;                // Is this symbol "Thread Local",
+                                               // if so, what is the desired
+                                               // model?
+  bool isExternallyInitializedConstant : 1;    // Is this a global whose value
+                                               // can change from its initial
+                                               // value before global
+                                               // initializers are run?
+
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+
+  enum ThreadLocalMode {
+    NotThreadLocal = 0,
+    GeneralDynamicTLSModel,
+    LocalDynamicTLSModel,
+    InitialExecTLSModel,
+    LocalExecTLSModel
+  };
+
+  /// GlobalVariable ctor - If a parent module is specified, the global is
+  /// automatically inserted into the end of the specified modules global list.
+  GlobalVariable(Type *Ty, bool isConstant, LinkageTypes Linkage,
+                 Constant *Initializer = 0, const Twine &Name = "",
+                 ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0,
+                 bool isExternallyInitialized = false);
+  /// GlobalVariable ctor - This creates a global and inserts it before the
+  /// specified other global.
+  GlobalVariable(Module &M, Type *Ty, bool isConstant,
+                 LinkageTypes Linkage, Constant *Initializer,
+                 const Twine &Name = "", GlobalVariable *InsertBefore = 0,
+                 ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0,
+                 bool isExternallyInitialized = false);
+
+  ~GlobalVariable() {
+    NumOperands = 1; // FIXME: needed by operator delete
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// hasInitializer - Unless a global variable isExternal(), it has an
+  /// initializer.  The initializer for the global variable/constant is held by
+  /// Initializer if an initializer is specified.
+  ///
+  inline bool hasInitializer() const { return !isDeclaration(); }
+
+  /// hasDefinitiveInitializer - Whether the global variable has an initializer,
+  /// and any other instances of the global (this can happen due to weak
+  /// linkage) are guaranteed to have the same initializer.
+  ///
+  /// Note that if you want to transform a global, you must use
+  /// hasUniqueInitializer() instead, because of the *_odr linkage type.
+  ///
+  /// Example:
+  ///
+  /// @a = global SomeType* null - Initializer is both definitive and unique.
+  ///
+  /// @b = global weak SomeType* null - Initializer is neither definitive nor
+  /// unique.
+  ///
+  /// @c = global weak_odr SomeType* null - Initializer is definitive, but not
+  /// unique.
+  inline bool hasDefinitiveInitializer() const {
+    return hasInitializer() &&
+      // The initializer of a global variable with weak linkage may change at
+      // link time.
+      !mayBeOverridden() &&
+      // The initializer of a global variable with the externally_initialized
+      // marker may change at runtime before C++ initializers are evaluated.
+      !isExternallyInitialized();
+  }
+
+  /// hasUniqueInitializer - Whether the global variable has an initializer, and
+  /// any changes made to the initializer will turn up in the final executable.
+  inline bool hasUniqueInitializer() const {
+    return hasInitializer() &&
+      // It's not safe to modify initializers of global variables with weak
+      // linkage, because the linker might choose to discard the initializer and
+      // use the initializer from another instance of the global variable
+      // instead. It is wrong to modify the initializer of a global variable
+      // with *_odr linkage because then different instances of the global may
+      // have different initializers, breaking the One Definition Rule.
+      !isWeakForLinker() &&
+      // It is not safe to modify initializers of global variables with the
+      // external_initializer marker since the value may be changed at runtime
+      // before C++ initializers are evaluated.
+      !isExternallyInitialized();
+  }
+
+  /// getInitializer - Return the initializer for this global variable.  It is
+  /// illegal to call this method if the global is external, because we cannot
+  /// tell what the value is initialized to!
+  ///
+  inline const Constant *getInitializer() const {
+    assert(hasInitializer() && "GV doesn't have initializer!");
+    return static_cast<Constant*>(Op<0>().get());
+  }
+  inline Constant *getInitializer() {
+    assert(hasInitializer() && "GV doesn't have initializer!");
+    return static_cast<Constant*>(Op<0>().get());
+  }
+  /// setInitializer - Sets the initializer for this global variable, removing
+  /// any existing initializer if InitVal==NULL.  If this GV has type T*, the
+  /// initializer must have type T.
+  void setInitializer(Constant *InitVal);
+
+  /// If the value is a global constant, its value is immutable throughout the
+  /// runtime execution of the program.  Assigning a value into the constant
+  /// leads to undefined behavior.
+  ///
+  bool isConstant() const { return isConstantGlobal; }
+  void setConstant(bool Val) { isConstantGlobal = Val; }
+
+  /// If the value is "Thread Local", its value isn't shared by the threads.
+  bool isThreadLocal() const { return threadLocalMode != NotThreadLocal; }
+  void setThreadLocal(bool Val) {
+    threadLocalMode = Val ? GeneralDynamicTLSModel : NotThreadLocal;
+  }
+  void setThreadLocalMode(ThreadLocalMode Val) { threadLocalMode = Val; }
+  ThreadLocalMode getThreadLocalMode() const {
+    return static_cast<ThreadLocalMode>(threadLocalMode);
+  }
+
+  bool isExternallyInitialized() const {
+    return isExternallyInitializedConstant;
+  }
+  void setExternallyInitialized(bool Val) {
+    isExternallyInitializedConstant = Val;
+  }
+
+  /// copyAttributesFrom - copy all additional attributes (those not needed to
+  /// create a GlobalVariable) from the GlobalVariable Src to this one.
+  void copyAttributesFrom(const GlobalValue *Src);
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  ///
+  virtual void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  ///
+  virtual void eraseFromParent();
+
+  /// Override Constant's implementation of this method so we can
+  /// replace constant initializers.
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::GlobalVariableVal;
+  }
+};
+
+template <>
+struct OperandTraits<GlobalVariable> :
+  public OptionalOperandTraits<GlobalVariable> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
new file mode 100644
index 000000000000..1c71d0a90146
--- /dev/null
+++ b/include/llvm/IR/IRBuilder.h
@@ -0,0 +1,1401 @@
+//===---- llvm/IRBuilder.h - Builder for LLVM Instructions ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_IRBUILDER_H
+#define LLVM_IR_IRBUILDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/ConstantFolder.h"
+
+namespace llvm {
+  class MDNode;
+
+/// \brief This provides the default implementation of the IRBuilder
+/// 'InsertHelper' method that is called whenever an instruction is created by
+/// IRBuilder and needs to be inserted.
+///
+/// By default, this inserts the instruction at the insertion point.
+template <bool preserveNames = true>
+class IRBuilderDefaultInserter {
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    if (BB) BB->getInstList().insert(InsertPt, I);
+    if (preserveNames)
+      I->setName(Name);
+  }
+};
+
+/// \brief Common base class shared among various IRBuilders.
+class IRBuilderBase {
+  DebugLoc CurDbgLocation;
+protected:
+  BasicBlock *BB;
+  BasicBlock::iterator InsertPt;
+  LLVMContext &Context;
+public:
+
+  IRBuilderBase(LLVMContext &context)
+    : Context(context) {
+    ClearInsertionPoint();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Builder configuration methods
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Clear the insertion point: created instructions will not be
+  /// inserted into a block.
+  void ClearInsertionPoint() {
+    BB = 0;
+  }
+
+  BasicBlock *GetInsertBlock() const { return BB; }
+  BasicBlock::iterator GetInsertPoint() const { return InsertPt; }
+  LLVMContext &getContext() const { return Context; }
+
+  /// \brief This specifies that created instructions should be appended to the
+  /// end of the specified block.
+  void SetInsertPoint(BasicBlock *TheBB) {
+    BB = TheBB;
+    InsertPt = BB->end();
+  }
+
+  /// \brief This specifies that created instructions should be inserted before
+  /// the specified instruction.
+  void SetInsertPoint(Instruction *I) {
+    BB = I->getParent();
+    InsertPt = I;
+    SetCurrentDebugLocation(I->getDebugLoc());
+  }
+
+  /// \brief This specifies that created instructions should be inserted at the
+  /// specified point.
+  void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) {
+    BB = TheBB;
+    InsertPt = IP;
+  }
+
+  /// \brief Find the nearest point that dominates this use, and specify that
+  /// created instructions should be inserted at this point.
+  void SetInsertPoint(Use &U) {
+    Instruction *UseInst = cast<Instruction>(U.getUser());
+    if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
+      BasicBlock *PredBB = Phi->getIncomingBlock(U);
+      assert(U != PredBB->getTerminator() && "critical edge not split");
+      SetInsertPoint(PredBB, PredBB->getTerminator());
+      return;
+    }
+    SetInsertPoint(UseInst);
+  }
+
+  /// \brief Set location information used by debugging information.
+  void SetCurrentDebugLocation(const DebugLoc &L) {
+    CurDbgLocation = L;
+  }
+
+  /// \brief Get location information used by debugging information.
+  DebugLoc getCurrentDebugLocation() const { return CurDbgLocation; }
+
+  /// \brief If this builder has a current debug location, set it on the
+  /// specified instruction.
+  void SetInstDebugLocation(Instruction *I) const {
+    if (!CurDbgLocation.isUnknown())
+      I->setDebugLoc(CurDbgLocation);
+  }
+
+  /// \brief Get the return type of the current function that we're emitting
+  /// into.
+  Type *getCurrentFunctionReturnType() const;
+
+  /// InsertPoint - A saved insertion point.
+  class InsertPoint {
+    BasicBlock *Block;
+    BasicBlock::iterator Point;
+
+  public:
+    /// \brief Creates a new insertion point which doesn't point to anything.
+    InsertPoint() : Block(0) {}
+
+    /// \brief Creates a new insertion point at the given location.
+    InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint)
+      : Block(InsertBlock), Point(InsertPoint) {}
+
+    /// \brief Returns true if this insert point is set.
+    bool isSet() const { return (Block != 0); }
+
+    llvm::BasicBlock *getBlock() const { return Block; }
+    llvm::BasicBlock::iterator getPoint() const { return Point; }
+  };
+
+  /// \brief Returns the current insert point.
+  InsertPoint saveIP() const {
+    return InsertPoint(GetInsertBlock(), GetInsertPoint());
+  }
+
+  /// \brief Returns the current insert point, clearing it in the process.
+  InsertPoint saveAndClearIP() {
+    InsertPoint IP(GetInsertBlock(), GetInsertPoint());
+    ClearInsertionPoint();
+    return IP;
+  }
+
+  /// \brief Sets the current insert point to a previously-saved location.
+  void restoreIP(InsertPoint IP) {
+    if (IP.isSet())
+      SetInsertPoint(IP.getBlock(), IP.getPoint());
+    else
+      ClearInsertionPoint();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Miscellaneous creation methods.
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Make a new global variable with initializer type i8*
+  ///
+  /// Make a new global variable with an initializer that has array of i8 type
+  /// filled in with the null terminated string value specified.  The new global
+  /// variable will be marked mergable with any others of the same contents.  If
+  /// Name is specified, it is the name of the global variable created.
+  Value *CreateGlobalString(StringRef Str, const Twine &Name = "");
+
+  /// \brief Get a constant value representing either true or false.
+  ConstantInt *getInt1(bool V) {
+    return ConstantInt::get(getInt1Ty(), V);
+  }
+
+  /// \brief Get the constant value for i1 true.
+  ConstantInt *getTrue() {
+    return ConstantInt::getTrue(Context);
+  }
+
+  /// \brief Get the constant value for i1 false.
+  ConstantInt *getFalse() {
+    return ConstantInt::getFalse(Context);
+  }
+
+  /// \brief Get a constant 8-bit value.
+  ConstantInt *getInt8(uint8_t C) {
+    return ConstantInt::get(getInt8Ty(), C);
+  }
+
+  /// \brief Get a constant 16-bit value.
+  ConstantInt *getInt16(uint16_t C) {
+    return ConstantInt::get(getInt16Ty(), C);
+  }
+
+  /// \brief Get a constant 32-bit value.
+  ConstantInt *getInt32(uint32_t C) {
+    return ConstantInt::get(getInt32Ty(), C);
+  }
+
+  /// \brief Get a constant 64-bit value.
+  ConstantInt *getInt64(uint64_t C) {
+    return ConstantInt::get(getInt64Ty(), C);
+  }
+
+  /// \brief Get a constant integer value.
+  ConstantInt *getInt(const APInt &AI) {
+    return ConstantInt::get(Context, AI);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Type creation methods
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Fetch the type representing a single bit
+  IntegerType *getInt1Ty() {
+    return Type::getInt1Ty(Context);
+  }
+
+  /// \brief Fetch the type representing an 8-bit integer.
+  IntegerType *getInt8Ty() {
+    return Type::getInt8Ty(Context);
+  }
+
+  /// \brief Fetch the type representing a 16-bit integer.
+  IntegerType *getInt16Ty() {
+    return Type::getInt16Ty(Context);
+  }
+
+  /// \brief Fetch the type representing a 32-bit integer.
+  IntegerType *getInt32Ty() {
+    return Type::getInt32Ty(Context);
+  }
+
+  /// \brief Fetch the type representing a 64-bit integer.
+  IntegerType *getInt64Ty() {
+    return Type::getInt64Ty(Context);
+  }
+
+  /// \brief Fetch the type representing a 32-bit floating point value.
+  Type *getFloatTy() {
+    return Type::getFloatTy(Context);
+  }
+
+  /// \brief Fetch the type representing a 64-bit floating point value.
+  Type *getDoubleTy() {
+    return Type::getDoubleTy(Context);
+  }
+
+  /// \brief Fetch the type representing void.
+  Type *getVoidTy() {
+    return Type::getVoidTy(Context);
+  }
+
+  /// \brief Fetch the type representing a pointer to an 8-bit integer value.
+  PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
+    return Type::getInt8PtrTy(Context, AddrSpace);
+  }
+
+  /// \brief Fetch the type representing a pointer to an integer value.
+  IntegerType* getIntPtrTy(DataLayout *DL, unsigned AddrSpace = 0) {
+    return DL->getIntPtrType(Context, AddrSpace);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Intrinsic creation methods
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Create and insert a memset to the specified pointer and the
+  /// specified value.
+  ///
+  /// If the pointer isn't an i8*, it will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction.
+  CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+
+  CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0);
+
+  /// \brief Create and insert a memcpy between the specified pointers.
+  ///
+  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction.
+  CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0,
+                         MDNode *TBAAStructTag = 0) {
+    return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag,
+                        TBAAStructTag);
+  }
+
+  CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0,
+                         MDNode *TBAAStructTag = 0);
+
+  /// \brief Create and insert a memmove between the specified
+  /// pointers.
+  ///
+  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction.
+  CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+                          bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+
+  CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+                          bool isVolatile = false, MDNode *TBAATag = 0);
+
+  /// \brief Create a lifetime.start intrinsic.
+  ///
+  /// If the pointer isn't i8* it will be converted.
+  CallInst *CreateLifetimeStart(Value *Ptr, ConstantInt *Size = 0);
+
+  /// \brief Create a lifetime.end intrinsic.
+  ///
+  /// If the pointer isn't i8* it will be converted.
+  CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = 0);
+
+private:
+  Value *getCastedInt8PtrValue(Value *Ptr);
+};
+
+/// \brief This provides a uniform API for creating instructions and inserting
+/// them into a basic block: either at the end of a BasicBlock, or at a specific
+/// iterator location in a block.
+///
+/// Note that the builder does not expose the full generality of LLVM
+/// instructions.  For access to extra instruction properties, use the mutators
+/// (e.g. setVolatile) on the instructions after they have been
+/// created. Convenience state exists to specify fast-math flags and fp-math
+/// tags.
+///
+/// The first template argument handles whether or not to preserve names in the
+/// final instruction output. This defaults to on.  The second template argument
+/// specifies a class to use for creating constants.  This defaults to creating
+/// minimally folded constants.  The fourth template argument allows clients to
+/// specify custom insertion hooks that are called on every newly created
+/// insertion.
+template<bool preserveNames = true, typename T = ConstantFolder,
+         typename Inserter = IRBuilderDefaultInserter<preserveNames> >
+class IRBuilder : public IRBuilderBase, public Inserter {
+  T Folder;
+  MDNode *DefaultFPMathTag;
+  FastMathFlags FMF;
+public:
+  IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
+            MDNode *FPMathTag = 0)
+    : IRBuilderBase(C), Inserter(I), Folder(F), DefaultFPMathTag(FPMathTag),
+      FMF() {
+  }
+
+  explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = 0)
+    : IRBuilderBase(C), Folder(), DefaultFPMathTag(FPMathTag), FMF() {
+  }
+
+  explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = 0)
+    : IRBuilderBase(TheBB->getContext()), Folder(F),
+      DefaultFPMathTag(FPMathTag), FMF() {
+    SetInsertPoint(TheBB);
+  }
+
+  explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = 0)
+    : IRBuilderBase(TheBB->getContext()), Folder(),
+      DefaultFPMathTag(FPMathTag), FMF() {
+    SetInsertPoint(TheBB);
+  }
+
+  explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = 0)
+    : IRBuilderBase(IP->getContext()), Folder(), DefaultFPMathTag(FPMathTag),
+      FMF() {
+    SetInsertPoint(IP);
+    SetCurrentDebugLocation(IP->getDebugLoc());
+  }
+
+  explicit IRBuilder(Use &U, MDNode *FPMathTag = 0)
+    : IRBuilderBase(U->getContext()), Folder(), DefaultFPMathTag(FPMathTag),
+      FMF() {
+    SetInsertPoint(U);
+    SetCurrentDebugLocation(cast<Instruction>(U.getUser())->getDebugLoc());
+  }
+
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
+            MDNode *FPMathTag = 0)
+    : IRBuilderBase(TheBB->getContext()), Folder(F),
+      DefaultFPMathTag(FPMathTag), FMF() {
+    SetInsertPoint(TheBB, IP);
+  }
+
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, MDNode *FPMathTag = 0)
+    : IRBuilderBase(TheBB->getContext()), Folder(),
+      DefaultFPMathTag(FPMathTag), FMF() {
+    SetInsertPoint(TheBB, IP);
+  }
+
+  /// \brief Get the constant folder being used.
+  const T &getFolder() { return Folder; }
+
+  /// \brief Get the floating point math metadata being used.
+  MDNode *getDefaultFPMathTag() const { return DefaultFPMathTag; }
+
+  /// \brief Get the flags to be applied to created floating point ops
+  FastMathFlags getFastMathFlags() const { return FMF; }
+
+  /// \brief Clear the fast-math flags.
+  void clearFastMathFlags() { FMF.clear(); }
+
+  /// \brief SetDefaultFPMathTag - Set the floating point math metadata to be used.
+  void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
+
+  /// \brief Set the fast-math flags to be used with generated fp-math operators
+  void SetFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
+
+  /// \brief Return true if this builder is configured to actually add the
+  /// requested names to IR created through it.
+  bool isNamePreserving() const { return preserveNames; }
+
+  /// \brief Insert and return the specified instruction.
+  template<typename InstTy>
+  InstTy *Insert(InstTy *I, const Twine &Name = "") const {
+    this->InsertHelper(I, Name, BB, InsertPt);
+    this->SetInstDebugLocation(I);
+    return I;
+  }
+
+  /// \brief No-op overload to handle constants.
+  Constant *Insert(Constant *C, const Twine& = "") const {
+    return C;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Terminators
+  //===--------------------------------------------------------------------===//
+
+private:
+  /// \brief Helper to add branch weight metadata onto an instruction.
+  /// \returns The annotated instruction.
+  template <typename InstTy>
+  InstTy *addBranchWeights(InstTy *I, MDNode *Weights) {
+    if (Weights)
+      I->setMetadata(LLVMContext::MD_prof, Weights);
+    return I;
+  }
+
+public:
+  /// \brief Create a 'ret void' instruction.
+  ReturnInst *CreateRetVoid() {
+    return Insert(ReturnInst::Create(Context));
+  }
+
+  /// \brief Create a 'ret <val>' instruction.
+  ReturnInst *CreateRet(Value *V) {
+    return Insert(ReturnInst::Create(Context, V));
+  }
+
+  /// \brief Create a sequence of N insertvalue instructions,
+  /// with one Value from the retVals array each, that build a aggregate
+  /// return value one value at a time, and a ret instruction to return
+  /// the resulting aggregate value.
+  ///
+  /// This is a convenience function for code that uses aggregate return values
+  /// as a vehicle for having multiple return values.
+  ReturnInst *CreateAggregateRet(Value *const *retVals, unsigned N) {
+    Value *V = UndefValue::get(getCurrentFunctionReturnType());
+    for (unsigned i = 0; i != N; ++i)
+      V = CreateInsertValue(V, retVals[i], i, "mrv");
+    return Insert(ReturnInst::Create(Context, V));
+  }
+
+  /// \brief Create an unconditional 'br label X' instruction.
+  BranchInst *CreateBr(BasicBlock *Dest) {
+    return Insert(BranchInst::Create(Dest));
+  }
+
+  /// \brief Create a conditional 'br Cond, TrueDest, FalseDest'
+  /// instruction.
+  BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
+                           MDNode *BranchWeights = 0) {
+    return Insert(addBranchWeights(BranchInst::Create(True, False, Cond),
+                                   BranchWeights));
+  }
+
+  /// \brief Create a switch instruction with the specified value, default dest,
+  /// and with a hint for the number of cases that will be added (for efficient
+  /// allocation).
+  SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10,
+                           MDNode *BranchWeights = 0) {
+    return Insert(addBranchWeights(SwitchInst::Create(V, Dest, NumCases),
+                                   BranchWeights));
+  }
+
+  /// \brief Create an indirect branch instruction with the specified address
+  /// operand, with an optional hint for the number of destinations that will be
+  /// added (for efficient allocation).
+  IndirectBrInst *CreateIndirectBr(Value *Addr, unsigned NumDests = 10) {
+    return Insert(IndirectBrInst::Create(Addr, NumDests));
+  }
+
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, const Twine &Name = "") {
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
+                                     ArrayRef<Value *>()),
+                  Name);
+  }
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, Value *Arg1,
+                           const Twine &Name = "") {
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Arg1),
+                  Name);
+  }
+  InvokeInst *CreateInvoke3(Value *Callee, BasicBlock *NormalDest,
+                            BasicBlock *UnwindDest, Value *Arg1,
+                            Value *Arg2, Value *Arg3,
+                            const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
+                  Name);
+  }
+  /// \brief Create an invoke instruction.
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, ArrayRef<Value *> Args,
+                           const Twine &Name = "") {
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
+                  Name);
+  }
+
+  ResumeInst *CreateResume(Value *Exn) {
+    return Insert(ResumeInst::Create(Exn));
+  }
+
+  UnreachableInst *CreateUnreachable() {
+    return Insert(new UnreachableInst(Context));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Binary Operators
+  //===--------------------------------------------------------------------===//
+private:
+  BinaryOperator *CreateInsertNUWNSWBinOp(BinaryOperator::BinaryOps Opc,
+                                          Value *LHS, Value *RHS,
+                                          const Twine &Name,
+                                          bool HasNUW, bool HasNSW) {
+    BinaryOperator *BO = Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+
+  Instruction *AddFPMathAttributes(Instruction *I,
+                                   MDNode *FPMathTag,
+                                   FastMathFlags FMF) const {
+    if (!FPMathTag)
+      FPMathTag = DefaultFPMathTag;
+    if (FPMathTag)
+      I->setMetadata(LLVMContext::MD_fpmath, FPMathTag);
+    I->setFastMathFlags(FMF);
+    return I;
+  }
+public:
+  Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateAdd(LHS, RHS, Name, false, true);
+  }
+  Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateAdd(LHS, RHS, Name, true, false);
+  }
+  Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "",
+                    MDNode *FPMathTag = 0) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFAdd(LC, RC), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFAdd(LHS, RHS),
+                                      FPMathTag, FMF), Name);
+  }
+  Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateSub(LC, RC), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSub(LHS, RHS, Name, false, true);
+  }
+  Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSub(LHS, RHS, Name, true, false);
+  }
+  Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "",
+                    MDNode *FPMathTag = 0) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFSub(LC, RC), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFSub(LHS, RHS),
+                                      FPMathTag, FMF), Name);
+  }
+  Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateMul(LC, RC), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateMul(LHS, RHS, Name, false, true);
+  }
+  Value *CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateMul(LHS, RHS, Name, true, false);
+  }
+  Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "",
+                    MDNode *FPMathTag = 0) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFMul(LC, RC), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFMul(LHS, RHS),
+                                      FPMathTag, FMF), Name);
+  }
+  Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name);
+  }
+  Value *CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateUDiv(LHS, RHS, Name, true);
+  }
+  Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
+  }
+  Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSDiv(LHS, RHS, Name, true);
+  }
+  Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    MDNode *FPMathTag = 0) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFDiv(LC, RC), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFDiv(LHS, RHS),
+                                      FPMathTag, FMF), Name);
+  }
+  Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateURem(LC, RC), Name);
+    return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
+  }
+  Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateSRem(LC, RC), Name);
+    return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
+  }
+  Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "",
+                    MDNode *FPMathTag = 0) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFRem(LC, RC), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFRem(LHS, RHS),
+                                      FPMathTag, FMF), Name);
+  }
+
+  Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
+                     HasNUW, HasNSW);
+  }
+  Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
+                     HasNUW, HasNSW);
+  }
+
+  Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name);
+  }
+  Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+  Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+
+  Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name);
+  }
+  Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+  Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+
+  Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+      if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
+        return LHS;  // LHS & -1 -> LHS
+      if (Constant *LC = dyn_cast<Constant>(LHS))
+        return Insert(Folder.CreateAnd(LC, RC), Name);
+    }
+    return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
+  }
+  Value *CreateAnd(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+  Value *CreateAnd(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+
+  Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+      if (RC->isNullValue())
+        return LHS;  // LHS | 0 -> LHS
+      if (Constant *LC = dyn_cast<Constant>(LHS))
+        return Insert(Folder.CreateOr(LC, RC), Name);
+    }
+    return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
+  }
+  Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+  Value *CreateOr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+
+  Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateXor(LC, RC), Name);
+    return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
+  }
+  Value *CreateXor(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+  Value *CreateXor(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+
+  Value *CreateBinOp(Instruction::BinaryOps Opc,
+                     Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
+    return Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
+  }
+
+  Value *CreateNeg(Value *V, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
+    BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Value *CreateNSWNeg(Value *V, const Twine &Name = "") {
+    return CreateNeg(V, Name, false, true);
+  }
+  Value *CreateNUWNeg(Value *V, const Twine &Name = "") {
+    return CreateNeg(V, Name, true, false);
+  }
+  Value *CreateFNeg(Value *V, const Twine &Name = "", MDNode *FPMathTag = 0) {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateFNeg(VC), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V),
+                                      FPMathTag, FMF), Name);
+  }
+  Value *CreateNot(Value *V, const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateNot(VC), Name);
+    return Insert(BinaryOperator::CreateNot(V), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Memory Instructions
+  //===--------------------------------------------------------------------===//
+
+  AllocaInst *CreateAlloca(Type *Ty, Value *ArraySize = 0,
+                           const Twine &Name = "") {
+    return Insert(new AllocaInst(Ty, ArraySize), Name);
+  }
+  // \brief Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
+  // converting the string to 'bool' for the isVolatile parameter.
+  LoadInst *CreateLoad(Value *Ptr, const char *Name) {
+    return Insert(new LoadInst(Ptr), Name);
+  }
+  LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
+    return Insert(new LoadInst(Ptr), Name);
+  }
+  LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
+    return Insert(new LoadInst(Ptr, 0, isVolatile), Name);
+  }
+  StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
+    return Insert(new StoreInst(Val, Ptr, isVolatile));
+  }
+  // \brief Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")'
+  // correctly, instead of converting the string to 'bool' for the isVolatile
+  // parameter.
+  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
+    LoadInst *LI = CreateLoad(Ptr, Name);
+    LI->setAlignment(Align);
+    return LI;
+  }
+  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
+                              const Twine &Name = "") {
+    LoadInst *LI = CreateLoad(Ptr, Name);
+    LI->setAlignment(Align);
+    return LI;
+  }
+  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
+                              const Twine &Name = "") {
+    LoadInst *LI = CreateLoad(Ptr, isVolatile, Name);
+    LI->setAlignment(Align);
+    return LI;
+  }
+  StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
+                                bool isVolatile = false) {
+    StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
+    SI->setAlignment(Align);
+    return SI;
+  }
+  FenceInst *CreateFence(AtomicOrdering Ordering,
+                         SynchronizationScope SynchScope = CrossThread) {
+    return Insert(new FenceInst(Context, Ordering, SynchScope));
+  }
+  AtomicCmpXchgInst *CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New,
+                                         AtomicOrdering Ordering,
+                               SynchronizationScope SynchScope = CrossThread) {
+    return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope));
+  }
+  AtomicRMWInst *CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val,
+                                 AtomicOrdering Ordering,
+                               SynchronizationScope SynchScope = CrossThread) {
+    return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SynchScope));
+  }
+  Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList,
+                   const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+      // Every index must be constant.
+      size_t i, e;
+      for (i = 0, e = IdxList.size(); i != e; ++i)
+        if (!isa<Constant>(IdxList[i]))
+          break;
+      if (i == e)
+        return Insert(Folder.CreateGetElementPtr(PC, IdxList), Name);
+    }
+    return Insert(GetElementPtrInst::Create(Ptr, IdxList), Name);
+  }
+  Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
+                           const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+      // Every index must be constant.
+      size_t i, e;
+      for (i = 0, e = IdxList.size(); i != e; ++i)
+        if (!isa<Constant>(IdxList[i]))
+          break;
+      if (i == e)
+        return Insert(Folder.CreateInBoundsGetElementPtr(PC, IdxList), Name);
+    }
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, IdxList), Name);
+  }
+  Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Insert(Folder.CreateGetElementPtr(PC, IC), Name);
+    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
+  }
+  Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Insert(Folder.CreateInBoundsGetElementPtr(PC, IC), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
+  }
+  Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, Idx), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
+  }
+  Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0,
+                                    const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idx), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
+  }
+  Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
+                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, Idxs), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, Idxs), Name);
+  }
+  Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
+                                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs), Name);
+  }
+  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, Idx), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
+  }
+  Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+                                    const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idx), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
+  }
+  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, Idxs), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, Idxs), Name);
+  }
+  Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs), Name);
+  }
+  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
+    return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name);
+  }
+
+  /// \brief Same as CreateGlobalString, but return a pointer with "i8*" type
+  /// instead of a pointer to array of i8.
+  Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") {
+    Value *gv = CreateGlobalString(Str, Name);
+    Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Value *Args[] = { zero, zero };
+    return CreateInBoundsGEP(gv, Args, Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Cast/Conversion Operators
+  //===--------------------------------------------------------------------===//
+
+  Value *CreateTrunc(Value *V, Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::Trunc, V, DestTy, Name);
+  }
+  Value *CreateZExt(Value *V, Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::ZExt, V, DestTy, Name);
+  }
+  Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::SExt, V, DestTy, Name);
+  }
+  /// \brief Create a ZExt or Trunc from the integer value V to DestTy. Return
+  /// the value untouched if the type of V is already DestTy.
+  Value *CreateZExtOrTrunc(Value *V, Type *DestTy,
+                           const Twine &Name = "") {
+    assert(V->getType()->isIntOrIntVectorTy() &&
+           DestTy->isIntOrIntVectorTy() &&
+           "Can only zero extend/truncate integers!");
+    Type *VTy = V->getType();
+    if (VTy->getScalarSizeInBits() < DestTy->getScalarSizeInBits())
+      return CreateZExt(V, DestTy, Name);
+    if (VTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
+      return CreateTrunc(V, DestTy, Name);
+    return V;
+  }
+  /// \brief Create a SExt or Trunc from the integer value V to DestTy. Return
+  /// the value untouched if the type of V is already DestTy.
+  Value *CreateSExtOrTrunc(Value *V, Type *DestTy,
+                           const Twine &Name = "") {
+    assert(V->getType()->isIntOrIntVectorTy() &&
+           DestTy->isIntOrIntVectorTy() &&
+           "Can only sign extend/truncate integers!");
+    Type *VTy = V->getType();
+    if (VTy->getScalarSizeInBits() < DestTy->getScalarSizeInBits())
+      return CreateSExt(V, DestTy, Name);
+    if (VTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())
+      return CreateTrunc(V, DestTy, Name);
+    return V;
+  }
+  Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::FPToUI, V, DestTy, Name);
+  }
+  Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::FPToSI, V, DestTy, Name);
+  }
+  Value *CreateUIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::UIToFP, V, DestTy, Name);
+  }
+  Value *CreateSIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::SIToFP, V, DestTy, Name);
+  }
+  Value *CreateFPTrunc(Value *V, Type *DestTy,
+                       const Twine &Name = "") {
+    return CreateCast(Instruction::FPTrunc, V, DestTy, Name);
+  }
+  Value *CreateFPExt(Value *V, Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::FPExt, V, DestTy, Name);
+  }
+  Value *CreatePtrToInt(Value *V, Type *DestTy,
+                        const Twine &Name = "") {
+    return CreateCast(Instruction::PtrToInt, V, DestTy, Name);
+  }
+  Value *CreateIntToPtr(Value *V, Type *DestTy,
+                        const Twine &Name = "") {
+    return CreateCast(Instruction::IntToPtr, V, DestTy, Name);
+  }
+  Value *CreateBitCast(Value *V, Type *DestTy,
+                       const Twine &Name = "") {
+    return CreateCast(Instruction::BitCast, V, DestTy, Name);
+  }
+  Value *CreateZExtOrBitCast(Value *V, Type *DestTy,
+                             const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateSExtOrBitCast(Value *V, Type *DestTy,
+                             const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateTruncOrBitCast(Value *V, Type *DestTy,
+                              const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy,
+                    const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
+    return Insert(CastInst::Create(Op, V, DestTy), Name);
+  }
+  Value *CreatePointerCast(Value *V, Type *DestTy,
+                           const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
+    return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
+  }
+  Value *CreateIntCast(Value *V, Type *DestTy, bool isSigned,
+                       const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
+    return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
+  }
+private:
+  // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
+  // compile time error, instead of converting the string to bool for the
+  // isSigned parameter.
+  Value *CreateIntCast(Value *, Type *, const char *) LLVM_DELETED_FUNCTION;
+public:
+  Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateFPCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateFPCast(V, DestTy), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Compare Instructions
+  //===--------------------------------------------------------------------===//
+
+  Value *CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_EQ, LHS, RHS, Name);
+  }
+  Value *CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_NE, LHS, RHS, Name);
+  }
+  Value *CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_UGT, LHS, RHS, Name);
+  }
+  Value *CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_UGE, LHS, RHS, Name);
+  }
+  Value *CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_ULT, LHS, RHS, Name);
+  }
+  Value *CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_ULE, LHS, RHS, Name);
+  }
+  Value *CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SGT, LHS, RHS, Name);
+  }
+  Value *CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SGE, LHS, RHS, Name);
+  }
+  Value *CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SLT, LHS, RHS, Name);
+  }
+  Value *CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name);
+  }
+
+  Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name);
+  }
+
+  Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
+                    const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateICmp(P, LC, RC), Name);
+    return Insert(new ICmpInst(P, LHS, RHS), Name);
+  }
+  Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
+                    const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFCmp(P, LC, RC), Name);
+    return Insert(new FCmpInst(P, LHS, RHS), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Other Instructions
+  //===--------------------------------------------------------------------===//
+
+  PHINode *CreatePHI(Type *Ty, unsigned NumReservedValues,
+                     const Twine &Name = "") {
+    return Insert(PHINode::Create(Ty, NumReservedValues), Name);
+  }
+
+  CallInst *CreateCall(Value *Callee, const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee), Name);
+  }
+  CallInst *CreateCall(Value *Callee, Value *Arg, const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee, Arg), Name);
+  }
+  CallInst *CreateCall2(Value *Callee, Value *Arg1, Value *Arg2,
+                        const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2 };
+    return Insert(CallInst::Create(Callee, Args), Name);
+  }
+  CallInst *CreateCall3(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3 };
+    return Insert(CallInst::Create(Callee, Args), Name);
+  }
+  CallInst *CreateCall4(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        Value *Arg4, const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3, Arg4 };
+    return Insert(CallInst::Create(Callee, Args), Name);
+  }
+  CallInst *CreateCall5(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        Value *Arg4, Value *Arg5, const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3, Arg4, Arg5 };
+    return Insert(CallInst::Create(Callee, Args), Name);
+  }
+
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+                       const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee, Args), Name);
+  }
+
+  Value *CreateSelect(Value *C, Value *True, Value *False,
+                      const Twine &Name = "") {
+    if (Constant *CC = dyn_cast<Constant>(C))
+      if (Constant *TC = dyn_cast<Constant>(True))
+        if (Constant *FC = dyn_cast<Constant>(False))
+          return Insert(Folder.CreateSelect(CC, TC, FC), Name);
+    return Insert(SelectInst::Create(C, True, False), Name);
+  }
+
+  VAArgInst *CreateVAArg(Value *List, Type *Ty, const Twine &Name = "") {
+    return Insert(new VAArgInst(List, Ty), Name);
+  }
+
+  Value *CreateExtractElement(Value *Vec, Value *Idx,
+                              const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(Vec))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Insert(Folder.CreateExtractElement(VC, IC), Name);
+    return Insert(ExtractElementInst::Create(Vec, Idx), Name);
+  }
+
+  Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
+                             const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(Vec))
+      if (Constant *NC = dyn_cast<Constant>(NewElt))
+        if (Constant *IC = dyn_cast<Constant>(Idx))
+          return Insert(Folder.CreateInsertElement(VC, NC, IC), Name);
+    return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name);
+  }
+
+  Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
+                             const Twine &Name = "") {
+    if (Constant *V1C = dyn_cast<Constant>(V1))
+      if (Constant *V2C = dyn_cast<Constant>(V2))
+        if (Constant *MC = dyn_cast<Constant>(Mask))
+          return Insert(Folder.CreateShuffleVector(V1C, V2C, MC), Name);
+    return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
+  }
+
+  Value *CreateExtractValue(Value *Agg,
+                            ArrayRef<unsigned> Idxs,
+                            const Twine &Name = "") {
+    if (Constant *AggC = dyn_cast<Constant>(Agg))
+      return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
+    return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
+  }
+
+  Value *CreateInsertValue(Value *Agg, Value *Val,
+                           ArrayRef<unsigned> Idxs,
+                           const Twine &Name = "") {
+    if (Constant *AggC = dyn_cast<Constant>(Agg))
+      if (Constant *ValC = dyn_cast<Constant>(Val))
+        return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);
+    return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name);
+  }
+
+  LandingPadInst *CreateLandingPad(Type *Ty, Value *PersFn, unsigned NumClauses,
+                                   const Twine &Name = "") {
+    return Insert(LandingPadInst::Create(Ty, PersFn, NumClauses), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Utility creation methods
+  //===--------------------------------------------------------------------===//
+
+  /// \brief Return an i1 value testing if \p Arg is null.
+  Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
+    return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()),
+                        Name);
+  }
+
+  /// \brief Return an i1 value testing if \p Arg is not null.
+  Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
+    return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()),
+                        Name);
+  }
+
+  /// \brief Return the i64 difference between two pointer values, dividing out
+  /// the size of the pointed-to objects.
+  ///
+  /// This is intended to implement C-style pointer subtraction. As such, the
+  /// pointers must be appropriately aligned for their element types and
+  /// pointing into the same object.
+  Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") {
+    assert(LHS->getType() == RHS->getType() &&
+           "Pointer subtraction operand types must match!");
+    PointerType *ArgType = cast<PointerType>(LHS->getType());
+    Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
+    Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
+    Value *Difference = CreateSub(LHS_int, RHS_int);
+    return CreateExactSDiv(Difference,
+                           ConstantExpr::getSizeOf(ArgType->getElementType()),
+                           Name);
+  }
+
+  /// \brief Return a vector value that contains \arg V broadcasted to \p
+  /// NumElts elements.
+  Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") {
+    assert(NumElts > 0 && "Cannot splat to an empty vector!");
+
+    // First insert it into an undef vector so we can shuffle it.
+    Type *I32Ty = getInt32Ty();
+    Value *Undef = UndefValue::get(VectorType::get(V->getType(), NumElts));
+    V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0),
+                            Name + ".splatinsert");
+
+    // Shuffle the value across the desired number of elements.
+    Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, NumElts));
+    return CreateShuffleVector(V, Undef, Zeros, Name + ".splat");
+  }
+};
+
+}
+
+#endif
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
new file mode 100644
index 000000000000..33e4ab8522d1
--- /dev/null
+++ b/include/llvm/IR/InlineAsm.h
@@ -0,0 +1,309 @@
+//===-- llvm/InlineAsm.h - Class to represent inline asm strings-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the inline asm strings, which are Value*'s that are
+// used as the callee operand of call instructions.  InlineAsm's are uniqued
+// like constants, and created via InlineAsm::get(...).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_INLINEASM_H
+#define LLVM_IR_INLINEASM_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Value.h"
+#include <vector>
+
+namespace llvm {
+
+class PointerType;
+class FunctionType;
+class Module;
+struct InlineAsmKeyType;
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+         bool HasLargeKey>
+class ConstantUniqueMap;
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator;
+
+class InlineAsm : public Value {
+public:
+  enum AsmDialect {
+    AD_ATT,
+    AD_Intel
+  };
+
+private:
+  friend struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType>;
+  friend class ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&,
+                                 PointerType, InlineAsm, false>;
+
+  InlineAsm(const InlineAsm &) LLVM_DELETED_FUNCTION;
+  void operator=(const InlineAsm&) LLVM_DELETED_FUNCTION;
+
+  std::string AsmString, Constraints;
+  bool HasSideEffects;
+  bool IsAlignStack;
+  AsmDialect Dialect;
+
+  InlineAsm(PointerType *Ty, const std::string &AsmString,
+            const std::string &Constraints, bool hasSideEffects,
+            bool isAlignStack, AsmDialect asmDialect);
+  virtual ~InlineAsm();
+
+  /// When the ConstantUniqueMap merges two types and makes two InlineAsms
+  /// identical, it destroys one of them with this method.
+  void destroyConstant();
+public:
+
+  /// InlineAsm::get - Return the specified uniqued inline asm string.
+  ///
+  static InlineAsm *get(FunctionType *Ty, StringRef AsmString,
+                        StringRef Constraints, bool hasSideEffects,
+                        bool isAlignStack = false,
+                        AsmDialect asmDialect = AD_ATT);
+  
+  bool hasSideEffects() const { return HasSideEffects; }
+  bool isAlignStack() const { return IsAlignStack; }
+  AsmDialect getDialect() const { return Dialect; }
+
+  /// getType - InlineAsm's are always pointers.
+  ///
+  PointerType *getType() const {
+    return reinterpret_cast<PointerType*>(Value::getType());
+  }
+  
+  /// getFunctionType - InlineAsm's are always pointers to functions.
+  ///
+  FunctionType *getFunctionType() const;
+  
+  const std::string &getAsmString() const { return AsmString; }
+  const std::string &getConstraintString() const { return Constraints; }
+
+  /// Verify - This static method can be used by the parser to check to see if
+  /// the specified constraint string is legal for the type.  This returns true
+  /// if legal, false if not.
+  ///
+  static bool Verify(FunctionType *Ty, StringRef Constraints);
+
+  // Constraint String Parsing 
+  enum ConstraintPrefix {
+    isInput,            // 'x'
+    isOutput,           // '=x'
+    isClobber           // '~x'
+  };
+  
+  typedef std::vector<std::string> ConstraintCodeVector;
+  
+  struct SubConstraintInfo {
+    /// MatchingInput - If this is not -1, this is an output constraint where an
+    /// input constraint is required to match it (e.g. "0").  The value is the
+    /// constraint number that matches this one (for example, if this is
+    /// constraint #0 and constraint #4 has the value "0", this will be 4).
+    signed char MatchingInput;
+    /// Code - The constraint code, either the register name (in braces) or the
+    /// constraint letter/number.
+    ConstraintCodeVector Codes;
+    /// Default constructor.
+    SubConstraintInfo() : MatchingInput(-1) {}
+  };
+
+  typedef std::vector<SubConstraintInfo> SubConstraintInfoVector;
+  struct ConstraintInfo;
+  typedef std::vector<ConstraintInfo> ConstraintInfoVector;
+  
+  struct ConstraintInfo {
+    /// Type - The basic type of the constraint: input/output/clobber
+    ///
+    ConstraintPrefix Type;
+    
+    /// isEarlyClobber - "&": output operand writes result before inputs are all
+    /// read.  This is only ever set for an output operand.
+    bool isEarlyClobber; 
+    
+    /// MatchingInput - If this is not -1, this is an output constraint where an
+    /// input constraint is required to match it (e.g. "0").  The value is the
+    /// constraint number that matches this one (for example, if this is
+    /// constraint #0 and constraint #4 has the value "0", this will be 4).
+    signed char MatchingInput;
+    
+    /// hasMatchingInput - Return true if this is an output constraint that has
+    /// a matching input constraint.
+    bool hasMatchingInput() const { return MatchingInput != -1; }
+    
+    /// isCommutative - This is set to true for a constraint that is commutative
+    /// with the next operand.
+    bool isCommutative;
+    
+    /// isIndirect - True if this operand is an indirect operand.  This means
+    /// that the address of the source or destination is present in the call
+    /// instruction, instead of it being returned or passed in explicitly.  This
+    /// is represented with a '*' in the asm string.
+    bool isIndirect;
+    
+    /// Code - The constraint code, either the register name (in braces) or the
+    /// constraint letter/number.
+    ConstraintCodeVector Codes;
+    
+    /// isMultipleAlternative - '|': has multiple-alternative constraints.
+    bool isMultipleAlternative;
+    
+    /// multipleAlternatives - If there are multiple alternative constraints,
+    /// this array will contain them.  Otherwise it will be empty.
+    SubConstraintInfoVector multipleAlternatives;
+    
+    /// The currently selected alternative constraint index.
+    unsigned currentAlternativeIndex;
+    
+    ///Default constructor.
+    ConstraintInfo();
+    
+    /// Copy constructor.
+    ConstraintInfo(const ConstraintInfo &other);
+    
+    /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
+    /// fields in this structure.  If the constraint string is not understood,
+    /// return true, otherwise return false.
+    bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar);
+               
+    /// selectAlternative - Point this constraint to the alternative constraint
+    /// indicated by the index.
+    void selectAlternative(unsigned index);
+  };
+  
+  /// ParseConstraints - Split up the constraint string into the specific
+  /// constraints and their prefixes.  If this returns an empty vector, and if
+  /// the constraint string itself isn't empty, there was an error parsing.
+  static ConstraintInfoVector ParseConstraints(StringRef ConstraintString);
+  
+  /// ParseConstraints - Parse the constraints of this inlineasm object, 
+  /// returning them the same way that ParseConstraints(str) does.
+  ConstraintInfoVector ParseConstraints() const {
+    return ParseConstraints(Constraints);
+  }
+  
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::InlineAsmVal;
+  }
+
+  
+  // These are helper methods for dealing with flags in the INLINEASM SDNode
+  // in the backend.
+  
+  enum {
+    // Fixed operands on an INLINEASM SDNode.
+    Op_InputChain = 0,
+    Op_AsmString = 1,
+    Op_MDNode = 2,
+    Op_ExtraInfo = 3,    // HasSideEffects, IsAlignStack, AsmDialect.
+    Op_FirstOperand = 4,
+
+    // Fixed operands on an INLINEASM MachineInstr.
+    MIOp_AsmString = 0,
+    MIOp_ExtraInfo = 1,    // HasSideEffects, IsAlignStack, AsmDialect.
+    MIOp_FirstOperand = 2,
+
+    // Interpretation of the MIOp_ExtraInfo bit field.
+    Extra_HasSideEffects = 1,
+    Extra_IsAlignStack = 2,
+    Extra_AsmDialect = 4,
+    Extra_MayLoad = 8,
+    Extra_MayStore = 16,
+
+    // Inline asm operands map to multiple SDNode / MachineInstr operands.
+    // The first operand is an immediate describing the asm operand, the low
+    // bits is the kind:
+    Kind_RegUse = 1,             // Input register, "r".
+    Kind_RegDef = 2,             // Output register, "=r".
+    Kind_RegDefEarlyClobber = 3, // Early-clobber output register, "=&r".
+    Kind_Clobber = 4,            // Clobbered register, "~r".
+    Kind_Imm = 5,                // Immediate.
+    Kind_Mem = 6,                // Memory operand, "m".
+
+    Flag_MatchingOperand = 0x80000000
+  };
+  
+  static unsigned getFlagWord(unsigned Kind, unsigned NumOps) {
+    assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!");
+    assert(Kind >= Kind_RegUse && Kind <= Kind_Mem && "Invalid Kind");
+    return Kind | (NumOps << 3);
+  }
+  
+  /// getFlagWordForMatchingOp - Augment an existing flag word returned by
+  /// getFlagWord with information indicating that this input operand is tied 
+  /// to a previous output operand.
+  static unsigned getFlagWordForMatchingOp(unsigned InputFlag,
+                                           unsigned MatchedOperandNo) {
+    assert(MatchedOperandNo <= 0x7fff && "Too big matched operand");
+    assert((InputFlag & ~0xffff) == 0 && "High bits already contain data");
+    return InputFlag | Flag_MatchingOperand | (MatchedOperandNo << 16);
+  }
+
+  /// getFlagWordForRegClass - Augment an existing flag word returned by
+  /// getFlagWord with the required register class for the following register
+  /// operands.
+  /// A tied use operand cannot have a register class, use the register class
+  /// from the def operand instead.
+  static unsigned getFlagWordForRegClass(unsigned InputFlag, unsigned RC) {
+    // Store RC + 1, reserve the value 0 to mean 'no register class'.
+    ++RC;
+    assert(RC <= 0x7fff && "Too large register class ID");
+    assert((InputFlag & ~0xffff) == 0 && "High bits already contain data");
+    return InputFlag | (RC << 16);
+  }
+
+  static unsigned getKind(unsigned Flags) {
+    return Flags & 7;
+  }
+
+  static bool isRegDefKind(unsigned Flag){ return getKind(Flag) == Kind_RegDef;}
+  static bool isImmKind(unsigned Flag) { return getKind(Flag) == Kind_Imm; }
+  static bool isMemKind(unsigned Flag) { return getKind(Flag) == Kind_Mem; }
+  static bool isRegDefEarlyClobberKind(unsigned Flag) {
+    return getKind(Flag) == Kind_RegDefEarlyClobber;
+  }
+  static bool isClobberKind(unsigned Flag) {
+    return getKind(Flag) == Kind_Clobber;
+  }
+
+  /// getNumOperandRegisters - Extract the number of registers field from the
+  /// inline asm operand flag.
+  static unsigned getNumOperandRegisters(unsigned Flag) {
+    return (Flag & 0xffff) >> 3;
+  }
+
+  /// isUseOperandTiedToDef - Return true if the flag of the inline asm
+  /// operand indicates it is an use operand that's matched to a def operand.
+  static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) {
+    if ((Flag & Flag_MatchingOperand) == 0)
+      return false;
+    Idx = (Flag & ~Flag_MatchingOperand) >> 16;
+    return true;
+  }
+
+  /// hasRegClassConstraint - Returns true if the flag contains a register
+  /// class constraint.  Sets RC to the register class ID.
+  static bool hasRegClassConstraint(unsigned Flag, unsigned &RC) {
+    if (Flag & Flag_MatchingOperand)
+      return false;
+    unsigned High = Flag >> 16;
+    // getFlagWordForRegClass() uses 0 to mean no register class, and otherwise
+    // stores RC + 1.
+    if (!High)
+      return false;
+    RC = High - 1;
+    return true;
+  }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
new file mode 100644
index 000000000000..3e6903cb52d7
--- /dev/null
+++ b/include/llvm/IR/InstrTypes.h
@@ -0,0 +1,851 @@
+//===-- llvm/InstrTypes.h - Important Instruction subclasses ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various meta classes of instructions that exist in the VM
+// representation.  Specific concrete subclasses of these may be found in the
+// i*.h files...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_INSTRTYPES_H
+#define LLVM_IR_INSTRTYPES_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/OperandTraits.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+//                            TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+/// TerminatorInst - Subclasses of this class are all able to terminate a basic
+/// block.  Thus, these are all the flow control type of operations.
+///
+class TerminatorInst : public Instruction {
+protected:
+  TerminatorInst(Type *Ty, Instruction::TermOps iType,
+                 Use *Ops, unsigned NumOps,
+                 Instruction *InsertBefore = 0)
+    : Instruction(Ty, iType, Ops, NumOps, InsertBefore) {}
+
+  TerminatorInst(Type *Ty, Instruction::TermOps iType,
+                 Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd)
+    : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {}
+
+  // Out of line virtual method, so the vtable, etc has a home.
+  ~TerminatorInst();
+
+  /// Virtual methods - Terminators should overload these and provide inline
+  /// overrides of non-V methods.
+  virtual BasicBlock *getSuccessorV(unsigned idx) const = 0;
+  virtual unsigned getNumSuccessorsV() const = 0;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
+  virtual TerminatorInst *clone_impl() const = 0;
+public:
+
+  /// getNumSuccessors - Return the number of successors that this terminator
+  /// has.
+  unsigned getNumSuccessors() const {
+    return getNumSuccessorsV();
+  }
+
+  /// getSuccessor - Return the specified successor.
+  ///
+  BasicBlock *getSuccessor(unsigned idx) const {
+    return getSuccessorV(idx);
+  }
+
+  /// setSuccessor - Update the specified successor to point at the provided
+  /// block.
+  void setSuccessor(unsigned idx, BasicBlock *B) {
+    setSuccessorV(idx, B);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->isTerminator();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//                          UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+class UnaryInstruction : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+
+protected:
+  UnaryInstruction(Type *Ty, unsigned iType, Value *V,
+                   Instruction *IB = 0)
+    : Instruction(Ty, iType, &Op<0>(), 1, IB) {
+    Op<0>() = V;
+  }
+  UnaryInstruction(Type *Ty, unsigned iType, Value *V, BasicBlock *IAE)
+    : Instruction(Ty, iType, &Op<0>(), 1, IAE) {
+    Op<0>() = V;
+  }
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+
+  // Out of line virtual method, so the vtable, etc has a home.
+  ~UnaryInstruction();
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Alloca ||
+           I->getOpcode() == Instruction::Load ||
+           I->getOpcode() == Instruction::VAArg ||
+           I->getOpcode() == Instruction::ExtractValue ||
+           (I->getOpcode() >= CastOpsBegin && I->getOpcode() < CastOpsEnd);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<UnaryInstruction> :
+  public FixedNumOperandTraits<UnaryInstruction, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
+
+//===----------------------------------------------------------------------===//
+//                           BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+class BinaryOperator : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+protected:
+  void init(BinaryOps iType);
+  BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
+                 const Twine &Name, Instruction *InsertBefore);
+  BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
+                 const Twine &Name, BasicBlock *InsertAtEnd);
+  virtual BinaryOperator *clone_impl() const LLVM_OVERRIDE;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Create() - Construct a binary instruction, given the opcode and the two
+  /// operands.  Optionally (if InstBefore is specified) insert the instruction
+  /// into a BasicBlock right before the specified instruction.  The specified
+  /// Instruction is allowed to be a dereferenced end iterator.
+  ///
+  static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
+                                const Twine &Name = Twine(),
+                                Instruction *InsertBefore = 0);
+
+  /// Create() - Construct a binary instruction, given the opcode and the two
+  /// operands.  Also automatically insert this instruction to the end of the
+  /// BasicBlock specified.
+  ///
+  static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
+                                const Twine &Name, BasicBlock *InsertAtEnd);
+
+  /// Create* - These methods just forward to Create, and are useful when you
+  /// statically know what type of instruction you're going to create.  These
+  /// helpers just save some typing.
+#define HANDLE_BINARY_INST(N, OPC, CLASS) \
+  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
+                                     const Twine &Name = "") {\
+    return Create(Instruction::OPC, V1, V2, Name);\
+  }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_BINARY_INST(N, OPC, CLASS) \
+  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
+                                     const Twine &Name, BasicBlock *BB) {\
+    return Create(Instruction::OPC, V1, V2, Name, BB);\
+  }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_BINARY_INST(N, OPC, CLASS) \
+  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
+                                     const Twine &Name, Instruction *I) {\
+    return Create(Instruction::OPC, V1, V2, Name, I);\
+  }
+#include "llvm/IR/Instruction.def"
+
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+    BO->setIsExact(true);
+    return BO;
+  }
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
+    BO->setIsExact(true);
+    return BO;
+  }
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
+    BO->setIsExact(true);
+    return BO;
+  }
+  
+#define DEFINE_HELPERS(OPC, NUWNSWEXACT)                                     \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name = "") {                  \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name);            \
+  }                                                                          \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) {       \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB);        \
+  }                                                                          \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name, Instruction *I) {       \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I);         \
+  }
+  
+  DEFINE_HELPERS(Add, NSW)  // CreateNSWAdd
+  DEFINE_HELPERS(Add, NUW)  // CreateNUWAdd
+  DEFINE_HELPERS(Sub, NSW)  // CreateNSWSub
+  DEFINE_HELPERS(Sub, NUW)  // CreateNUWSub
+  DEFINE_HELPERS(Mul, NSW)  // CreateNSWMul
+  DEFINE_HELPERS(Mul, NUW)  // CreateNUWMul
+  DEFINE_HELPERS(Shl, NSW)  // CreateNSWShl
+  DEFINE_HELPERS(Shl, NUW)  // CreateNUWShl
+
+  DEFINE_HELPERS(SDiv, Exact)  // CreateExactSDiv
+  DEFINE_HELPERS(UDiv, Exact)  // CreateExactUDiv
+  DEFINE_HELPERS(AShr, Exact)  // CreateExactAShr
+  DEFINE_HELPERS(LShr, Exact)  // CreateExactLShr
+
+#undef DEFINE_HELPERS
+  
+  /// Helper functions to construct and inspect unary operations (NEG and NOT)
+  /// via binary operators SUB and XOR:
+  ///
+  /// CreateNeg, CreateNot - Create the NEG and NOT
+  ///     instructions out of SUB and XOR instructions.
+  ///
+  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name = "",
+                                   Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name,
+                                   BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name = "",
+                                      Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name,
+                                      BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateNUWNeg(Value *Op, const Twine &Name = "",
+                                      Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNUWNeg(Value *Op, const Twine &Name,
+                                      BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name = "",
+                                    Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name,
+                                    BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateNot(Value *Op, const Twine &Name = "",
+                                   Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNot(Value *Op, const Twine &Name,
+                                   BasicBlock *InsertAtEnd);
+
+  /// isNeg, isFNeg, isNot - Check if the given Value is a
+  /// NEG, FNeg, or NOT instruction.
+  ///
+  static bool isNeg(const Value *V);
+  static bool isFNeg(const Value *V, bool IgnoreZeroSign=false);
+  static bool isNot(const Value *V);
+
+  /// getNegArgument, getNotArgument - Helper functions to extract the
+  ///     unary argument of a NEG, FNEG or NOT operation implemented via
+  ///     Sub, FSub, or Xor.
+  ///
+  static const Value *getNegArgument(const Value *BinOp);
+  static       Value *getNegArgument(      Value *BinOp);
+  static const Value *getFNegArgument(const Value *BinOp);
+  static       Value *getFNegArgument(      Value *BinOp);
+  static const Value *getNotArgument(const Value *BinOp);
+  static       Value *getNotArgument(      Value *BinOp);
+
+  BinaryOps getOpcode() const {
+    return static_cast<BinaryOps>(Instruction::getOpcode());
+  }
+
+  /// swapOperands - Exchange the two operands to this instruction.
+  /// This instruction is safe to use on any binary instruction and
+  /// does not modify the semantics of the instruction.  If the instruction
+  /// cannot be reversed (ie, it's a Div), then return true.
+  ///
+  bool swapOperands();
+
+  /// setHasNoUnsignedWrap - Set or clear the nsw flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setHasNoUnsignedWrap(bool b = true);
+
+  /// setHasNoSignedWrap - Set or clear the nsw flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setHasNoSignedWrap(bool b = true);
+
+  /// setIsExact - Set or clear the exact flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setIsExact(bool b = true);
+
+  /// hasNoUnsignedWrap - Determine whether the no unsigned wrap flag is set.
+  bool hasNoUnsignedWrap() const;
+
+  /// hasNoSignedWrap - Determine whether the no signed wrap flag is set.
+  bool hasNoSignedWrap() const;
+
+  /// isExact - Determine whether the exact flag is set.
+  bool isExact() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->isBinaryOp();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<BinaryOperator> :
+  public FixedNumOperandTraits<BinaryOperator, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
+
+//===----------------------------------------------------------------------===//
+//                               CastInst Class
+//===----------------------------------------------------------------------===//
+
+/// CastInst - This is the base class for all instructions that perform data
+/// casts. It is simply provided so that instruction category testing
+/// can be performed with code like:
+///
+/// if (isa<CastInst>(Instr)) { ... }
+/// @brief Base class of casting instructions.
+class CastInst : public UnaryInstruction {
+  virtual void anchor() LLVM_OVERRIDE;
+protected:
+  /// @brief Constructor with insert-before-instruction semantics for subclasses
+  CastInst(Type *Ty, unsigned iType, Value *S,
+           const Twine &NameStr = "", Instruction *InsertBefore = 0)
+    : UnaryInstruction(Ty, iType, S, InsertBefore) {
+    setName(NameStr);
+  }
+  /// @brief Constructor with insert-at-end-of-block semantics for subclasses
+  CastInst(Type *Ty, unsigned iType, Value *S,
+           const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
+    setName(NameStr);
+  }
+public:
+  /// Provides a way to construct any of the CastInst subclasses using an
+  /// opcode instead of the subclass's constructor. The opcode must be in the
+  /// CastOps category (Instruction::isCast(opcode) returns true). This
+  /// constructor has insert-before-instruction semantics to automatically
+  /// insert the new CastInst before InsertBefore (if it is non-null).
+  /// @brief Construct any of the CastInst subclasses
+  static CastInst *Create(
+    Instruction::CastOps,    ///< The opcode of the cast instruction
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+  /// Provides a way to construct any of the CastInst subclasses using an
+  /// opcode instead of the subclass's constructor. The opcode must be in the
+  /// CastOps category. This constructor has insert-at-end-of-block semantics
+  /// to automatically insert the new CastInst at the end of InsertAtEnd (if
+  /// its non-null).
+  /// @brief Construct any of the CastInst subclasses
+  static CastInst *Create(
+    Instruction::CastOps,    ///< The opcode for the cast instruction
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a ZExt or BitCast cast instruction
+  static CastInst *CreateZExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a ZExt or BitCast cast instruction
+  static CastInst *CreateZExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a SExt or BitCast cast instruction
+  static CastInst *CreateSExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a SExt or BitCast cast instruction
+  static CastInst *CreateSExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a BitCast or a PtrToInt cast instruction
+  static CastInst *CreatePointerCast(
+    Value *S,                ///< The pointer value to be casted (operand 0)
+    Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a BitCast or a PtrToInt cast instruction
+  static CastInst *CreatePointerCast(
+    Value *S,                ///< The pointer value to be casted (operand 0)
+    Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
+  static CastInst *CreateIntegerCast(
+    Value *S,                ///< The pointer value to be casted (operand 0)
+    Type *Ty,          ///< The type to which cast should be made
+    bool isSigned,           ///< Whether to regard S as signed or not
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
+  static CastInst *CreateIntegerCast(
+    Value *S,                ///< The integer value to be casted (operand 0)
+    Type *Ty,          ///< The integer type to which operand is casted
+    bool isSigned,           ///< Whether to regard S as signed or not
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
+  static CastInst *CreateFPCast(
+    Value *S,                ///< The floating point value to be casted
+    Type *Ty,          ///< The floating point type to cast to
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
+  static CastInst *CreateFPCast(
+    Value *S,                ///< The floating point value to be casted
+    Type *Ty,          ///< The floating point type to cast to
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a Trunc or BitCast cast instruction
+  static CastInst *CreateTruncOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a Trunc or BitCast cast instruction
+  static CastInst *CreateTruncOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Check whether it is valid to call getCastOpcode for these types.
+  static bool isCastable(
+    Type *SrcTy, ///< The Type from which the value should be cast.
+    Type *DestTy ///< The Type to which the value should be cast.
+  );
+
+  /// Returns the opcode necessary to cast Val into Ty using usual casting
+  /// rules.
+  /// @brief Infer the opcode for cast operand and type
+  static Instruction::CastOps getCastOpcode(
+    const Value *Val, ///< The value to cast
+    bool SrcIsSigned, ///< Whether to treat the source as signed
+    Type *Ty,   ///< The Type to which the value should be casted
+    bool DstIsSigned  ///< Whether to treate the dest. as signed
+  );
+
+  /// There are several places where we need to know if a cast instruction
+  /// only deals with integer source and destination types. To simplify that
+  /// logic, this method is provided.
+  /// @returns true iff the cast has only integral typed operand and dest type.
+  /// @brief Determine if this is an integer-only cast.
+  bool isIntegerCast() const;
+
+  /// A lossless cast is one that does not alter the basic value. It implies
+  /// a no-op cast but is more stringent, preventing things like int->float,
+  /// long->double, or int->ptr.
+  /// @returns true iff the cast is lossless.
+  /// @brief Determine if this is a lossless cast.
+  bool isLosslessCast() const;
+
+  /// A no-op cast is one that can be effected without changing any bits.
+  /// It implies that the source and destination types are the same size. The
+  /// IntPtrTy argument is used to make accurate determinations for casts
+  /// involving Integer and Pointer types. They are no-op casts if the integer
+  /// is the same size as the pointer. However, pointer size varies with
+  /// platform. Generally, the result of DataLayout::getIntPtrType() should be
+  /// passed in. If that's not available, use Type::Int64Ty, which will make
+  /// the isNoopCast call conservative.
+  /// @brief Determine if the described cast is a no-op cast.
+  static bool isNoopCast(
+    Instruction::CastOps Opcode,  ///< Opcode of cast
+    Type *SrcTy,   ///< SrcTy of cast
+    Type *DstTy,   ///< DstTy of cast
+    Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null
+  );
+
+  /// @brief Determine if this cast is a no-op cast.
+  bool isNoopCast(
+    Type *IntPtrTy ///< Integer type corresponding to pointer
+  ) const;
+
+  /// Determine how a pair of casts can be eliminated, if they can be at all.
+  /// This is a helper function for both CastInst and ConstantExpr.
+  /// @returns 0 if the CastInst pair can't be eliminated, otherwise
+  /// returns Instruction::CastOps value for a cast that can replace
+  /// the pair, casting SrcTy to DstTy.
+  /// @brief Determine if a cast pair is eliminable
+  static unsigned isEliminableCastPair(
+    Instruction::CastOps firstOpcode,  ///< Opcode of first cast
+    Instruction::CastOps secondOpcode, ///< Opcode of second cast
+    Type *SrcTy, ///< SrcTy of 1st cast
+    Type *MidTy, ///< DstTy of 1st cast & SrcTy of 2nd cast
+    Type *DstTy, ///< DstTy of 2nd cast
+    Type *SrcIntPtrTy, ///< Integer type corresponding to Ptr SrcTy, or null
+    Type *MidIntPtrTy, ///< Integer type corresponding to Ptr MidTy, or null
+    Type *DstIntPtrTy  ///< Integer type corresponding to Ptr DstTy, or null
+  );
+
+  /// @brief Return the opcode of this CastInst
+  Instruction::CastOps getOpcode() const {
+    return Instruction::CastOps(Instruction::getOpcode());
+  }
+
+  /// @brief Return the source type, as a convenience
+  Type* getSrcTy() const { return getOperand(0)->getType(); }
+  /// @brief Return the destination type, as a convenience
+  Type* getDestTy() const { return getType(); }
+
+  /// This method can be used to determine if a cast from S to DstTy using
+  /// Opcode op is valid or not.
+  /// @returns true iff the proposed cast is valid.
+  /// @brief Determine if a cast is valid without creating one.
+  static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy);
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->isCast();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                               CmpInst Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the base class for the comparison instructions.
+/// @brief Abstract base class of comparison instructions.
+class CmpInst : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  CmpInst() LLVM_DELETED_FUNCTION;
+protected:
+  CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
+          Value *LHS, Value *RHS, const Twine &Name = "",
+          Instruction *InsertBefore = 0);
+
+  CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
+          Value *LHS, Value *RHS, const Twine &Name,
+          BasicBlock *InsertAtEnd);
+
+  virtual void anchor() LLVM_OVERRIDE; // Out of line virtual method.
+public:
+  /// This enumeration lists the possible predicates for CmpInst subclasses.
+  /// Values in the range 0-31 are reserved for FCmpInst, while values in the
+  /// range 32-64 are reserved for ICmpInst. This is necessary to ensure the
+  /// predicate values are not overlapping between the classes.
+  enum Predicate {
+    // Opcode              U L G E    Intuitive operation
+    FCMP_FALSE =  0,  ///< 0 0 0 0    Always false (always folded)
+    FCMP_OEQ   =  1,  ///< 0 0 0 1    True if ordered and equal
+    FCMP_OGT   =  2,  ///< 0 0 1 0    True if ordered and greater than
+    FCMP_OGE   =  3,  ///< 0 0 1 1    True if ordered and greater than or equal
+    FCMP_OLT   =  4,  ///< 0 1 0 0    True if ordered and less than
+    FCMP_OLE   =  5,  ///< 0 1 0 1    True if ordered and less than or equal
+    FCMP_ONE   =  6,  ///< 0 1 1 0    True if ordered and operands are unequal
+    FCMP_ORD   =  7,  ///< 0 1 1 1    True if ordered (no nans)
+    FCMP_UNO   =  8,  ///< 1 0 0 0    True if unordered: isnan(X) | isnan(Y)
+    FCMP_UEQ   =  9,  ///< 1 0 0 1    True if unordered or equal
+    FCMP_UGT   = 10,  ///< 1 0 1 0    True if unordered or greater than
+    FCMP_UGE   = 11,  ///< 1 0 1 1    True if unordered, greater than, or equal
+    FCMP_ULT   = 12,  ///< 1 1 0 0    True if unordered or less than
+    FCMP_ULE   = 13,  ///< 1 1 0 1    True if unordered, less than, or equal
+    FCMP_UNE   = 14,  ///< 1 1 1 0    True if unordered or not equal
+    FCMP_TRUE  = 15,  ///< 1 1 1 1    Always true (always folded)
+    FIRST_FCMP_PREDICATE = FCMP_FALSE,
+    LAST_FCMP_PREDICATE = FCMP_TRUE,
+    BAD_FCMP_PREDICATE = FCMP_TRUE + 1,
+    ICMP_EQ    = 32,  ///< equal
+    ICMP_NE    = 33,  ///< not equal
+    ICMP_UGT   = 34,  ///< unsigned greater than
+    ICMP_UGE   = 35,  ///< unsigned greater or equal
+    ICMP_ULT   = 36,  ///< unsigned less than
+    ICMP_ULE   = 37,  ///< unsigned less or equal
+    ICMP_SGT   = 38,  ///< signed greater than
+    ICMP_SGE   = 39,  ///< signed greater or equal
+    ICMP_SLT   = 40,  ///< signed less than
+    ICMP_SLE   = 41,  ///< signed less or equal
+    FIRST_ICMP_PREDICATE = ICMP_EQ,
+    LAST_ICMP_PREDICATE = ICMP_SLE,
+    BAD_ICMP_PREDICATE = ICMP_SLE + 1
+  };
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  /// Construct a compare instruction, given the opcode, the predicate and
+  /// the two operands.  Optionally (if InstBefore is specified) insert the
+  /// instruction into a BasicBlock right before the specified instruction.
+  /// The specified Instruction is allowed to be a dereferenced end iterator.
+  /// @brief Create a CmpInst
+  static CmpInst *Create(OtherOps Op,
+                         unsigned short predicate, Value *S1,
+                         Value *S2, const Twine &Name = "",
+                         Instruction *InsertBefore = 0);
+
+  /// Construct a compare instruction, given the opcode, the predicate and the
+  /// two operands.  Also automatically insert this instruction to the end of
+  /// the BasicBlock specified.
+  /// @brief Create a CmpInst
+  static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
+                         Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
+  
+  /// @brief Get the opcode casted to the right type
+  OtherOps getOpcode() const {
+    return static_cast<OtherOps>(Instruction::getOpcode());
+  }
+
+  /// @brief Return the predicate for this instruction.
+  Predicate getPredicate() const {
+    return Predicate(getSubclassDataFromInstruction());
+  }
+
+  /// @brief Set the predicate for this instruction to the specified value.
+  void setPredicate(Predicate P) { setInstructionSubclassData(P); }
+
+  static bool isFPPredicate(Predicate P) {
+    return P >= FIRST_FCMP_PREDICATE && P <= LAST_FCMP_PREDICATE;
+  }
+  
+  static bool isIntPredicate(Predicate P) {
+    return P >= FIRST_ICMP_PREDICATE && P <= LAST_ICMP_PREDICATE;
+  }
+  
+  bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
+  bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
+  
+  
+  /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
+  ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
+  /// @returns the inverse predicate for the instruction's current predicate.
+  /// @brief Return the inverse of the instruction's predicate.
+  Predicate getInversePredicate() const {
+    return getInversePredicate(getPredicate());
+  }
+
+  /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
+  ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
+  /// @returns the inverse predicate for predicate provided in \p pred.
+  /// @brief Return the inverse of a given predicate
+  static Predicate getInversePredicate(Predicate pred);
+
+  /// For example, EQ->EQ, SLE->SGE, ULT->UGT,
+  ///              OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
+  /// @returns the predicate that would be the result of exchanging the two
+  /// operands of the CmpInst instruction without changing the result
+  /// produced.
+  /// @brief Return the predicate as if the operands were swapped
+  Predicate getSwappedPredicate() const {
+    return getSwappedPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction
+  /// available.
+  /// @brief Return the predicate as if the operands were swapped.
+  static Predicate getSwappedPredicate(Predicate pred);
+
+  /// @brief Provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// This is just a convenience that dispatches to the subclasses.
+  /// @brief Swap the operands and adjust predicate accordingly to retain
+  /// the same comparison.
+  void swapOperands();
+
+  /// This is just a convenience that dispatches to the subclasses.
+  /// @brief Determine if this CmpInst is commutative.
+  bool isCommutative() const;
+
+  /// This is just a convenience that dispatches to the subclasses.
+  /// @brief Determine if this is an equals/not equals predicate.
+  bool isEquality() const;
+
+  /// @returns true if the comparison is signed, false otherwise.
+  /// @brief Determine if this instruction is using a signed comparison.
+  bool isSigned() const {
+    return isSigned(getPredicate());
+  }
+
+  /// @returns true if the comparison is unsigned, false otherwise.
+  /// @brief Determine if this instruction is using an unsigned comparison.
+  bool isUnsigned() const {
+    return isUnsigned(getPredicate());
+  }
+
+  /// This is just a convenience.
+  /// @brief Determine if this is true when both operands are the same.
+  bool isTrueWhenEqual() const {
+    return isTrueWhenEqual(getPredicate());
+  }
+
+  /// This is just a convenience.
+  /// @brief Determine if this is false when both operands are the same.
+  bool isFalseWhenEqual() const {
+    return isFalseWhenEqual(getPredicate());
+  }
+
+  /// @returns true if the predicate is unsigned, false otherwise.
+  /// @brief Determine if the predicate is an unsigned operation.
+  static bool isUnsigned(unsigned short predicate);
+
+  /// @returns true if the predicate is signed, false otherwise.
+  /// @brief Determine if the predicate is an signed operation.
+  static bool isSigned(unsigned short predicate);
+
+  /// @brief Determine if the predicate is an ordered operation.
+  static bool isOrdered(unsigned short predicate);
+
+  /// @brief Determine if the predicate is an unordered operation.
+  static bool isUnordered(unsigned short predicate);
+
+  /// Determine if the predicate is true when comparing a value with itself.
+  static bool isTrueWhenEqual(unsigned short predicate);
+
+  /// Determine if the predicate is false when comparing a value with itself.
+  static bool isFalseWhenEqual(unsigned short predicate);
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ICmp ||
+           I->getOpcode() == Instruction::FCmp;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+  
+  /// @brief Create a result type for fcmp/icmp
+  static Type* makeCmpResultType(Type* opnd_type) {
+    if (VectorType* vt = dyn_cast<VectorType>(opnd_type)) {
+      return VectorType::get(Type::getInt1Ty(opnd_type->getContext()),
+                             vt->getNumElements());
+    }
+    return Type::getInt1Ty(opnd_type->getContext());
+  }
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+
+// FIXME: these are redundant if CmpInst < BinaryOperator
+template <>
+struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Instruction.def b/include/llvm/IR/Instruction.def
index e59a0528e90f..e59a0528e90f 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/IR/Instruction.def
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
new file mode 100644
index 000000000000..5721d8f2f3fb
--- /dev/null
+++ b/include/llvm/IR/Instruction.h
@@ -0,0 +1,467 @@
+//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Instruction class, which is the
+// base class for all of the LLVM instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_INSTRUCTION_H
+#define LLVM_IR_INSTRUCTION_H
+
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class FastMathFlags;
+class LLVMContext;
+class MDNode;
+
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class Instruction : public User, public ilist_node<Instruction> {
+  void operator=(const Instruction &) LLVM_DELETED_FUNCTION;
+  Instruction(const Instruction &) LLVM_DELETED_FUNCTION;
+
+  BasicBlock *Parent;
+  DebugLoc DbgLoc;                         // 'dbg' Metadata cache.
+
+  enum {
+    /// HasMetadataBit - This is a bit stored in the SubClassData field which
+    /// indicates whether this instruction has metadata attached to it or not.
+    HasMetadataBit = 1 << 15
+  };
+public:
+  // Out of line virtual method, so the vtable, etc has a home.
+  ~Instruction();
+
+  /// use_back - Specialize the methods defined in Value, as we know that an
+  /// instruction can only be used by other instructions.
+  Instruction       *use_back()       { return cast<Instruction>(*use_begin());}
+  const Instruction *use_back() const { return cast<Instruction>(*use_begin());}
+
+  inline const BasicBlock *getParent() const { return Parent; }
+  inline       BasicBlock *getParent()       { return Parent; }
+
+  /// removeFromParent - This method unlinks 'this' from the containing basic
+  /// block, but does not delete it.
+  ///
+  void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing basic
+  /// block and deletes it.
+  ///
+  void eraseFromParent();
+
+  /// insertBefore - Insert an unlinked instructions into a basic block
+  /// immediately before the specified instruction.
+  void insertBefore(Instruction *InsertPos);
+
+  /// insertAfter - Insert an unlinked instructions into a basic block
+  /// immediately after the specified instruction.
+  void insertAfter(Instruction *InsertPos);
+
+  /// moveBefore - Unlink this instruction from its current basic block and
+  /// insert it into the basic block that MovePos lives in, right before
+  /// MovePos.
+  void moveBefore(Instruction *MovePos);
+
+  //===--------------------------------------------------------------------===//
+  // Subclass classification.
+  //===--------------------------------------------------------------------===//
+
+  /// getOpcode() returns a member of one of the enums like Instruction::Add.
+  unsigned getOpcode() const { return getValueID() - InstructionVal; }
+
+  const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
+  bool isTerminator() const { return isTerminator(getOpcode()); }
+  bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
+  bool isShift() { return isShift(getOpcode()); }
+  bool isCast() const { return isCast(getOpcode()); }
+
+  static const char* getOpcodeName(unsigned OpCode);
+
+  static inline bool isTerminator(unsigned OpCode) {
+    return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
+  }
+
+  static inline bool isBinaryOp(unsigned Opcode) {
+    return Opcode >= BinaryOpsBegin && Opcode < BinaryOpsEnd;
+  }
+
+  /// @brief Determine if the Opcode is one of the shift instructions.
+  static inline bool isShift(unsigned Opcode) {
+    return Opcode >= Shl && Opcode <= AShr;
+  }
+
+  /// isLogicalShift - Return true if this is a logical shift left or a logical
+  /// shift right.
+  inline bool isLogicalShift() const {
+    return getOpcode() == Shl || getOpcode() == LShr;
+  }
+
+  /// isArithmeticShift - Return true if this is an arithmetic shift right.
+  inline bool isArithmeticShift() const {
+    return getOpcode() == AShr;
+  }
+
+  /// @brief Determine if the OpCode is one of the CastInst instructions.
+  static inline bool isCast(unsigned OpCode) {
+    return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Metadata manipulation.
+  //===--------------------------------------------------------------------===//
+
+  /// hasMetadata() - Return true if this instruction has any metadata attached
+  /// to it.
+  bool hasMetadata() const {
+    return !DbgLoc.isUnknown() || hasMetadataHashEntry();
+  }
+
+  /// hasMetadataOtherThanDebugLoc - Return true if this instruction has
+  /// metadata attached to it other than a debug location.
+  bool hasMetadataOtherThanDebugLoc() const {
+    return hasMetadataHashEntry();
+  }
+
+  /// getMetadata - Get the metadata of given kind attached to this Instruction.
+  /// If the metadata is not found then return null.
+  MDNode *getMetadata(unsigned KindID) const {
+    if (!hasMetadata()) return 0;
+    return getMetadataImpl(KindID);
+  }
+
+  /// getMetadata - Get the metadata of given kind attached to this Instruction.
+  /// If the metadata is not found then return null.
+  MDNode *getMetadata(StringRef Kind) const {
+    if (!hasMetadata()) return 0;
+    return getMetadataImpl(Kind);
+  }
+
+  /// getAllMetadata - Get all metadata attached to this Instruction.  The first
+  /// element of each pair returned is the KindID, the second element is the
+  /// metadata value.  This list is returned sorted by the KindID.
+  void getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode*> > &MDs)const{
+    if (hasMetadata())
+      getAllMetadataImpl(MDs);
+  }
+
+  /// getAllMetadataOtherThanDebugLoc - This does the same thing as
+  /// getAllMetadata, except that it filters out the debug location.
+  void getAllMetadataOtherThanDebugLoc(SmallVectorImpl<std::pair<unsigned,
+                                       MDNode*> > &MDs) const {
+    if (hasMetadataOtherThanDebugLoc())
+      getAllMetadataOtherThanDebugLocImpl(MDs);
+  }
+
+  /// setMetadata - Set the metadata of the specified kind to the specified
+  /// node.  This updates/replaces metadata if already present, or removes it if
+  /// Node is null.
+  void setMetadata(unsigned KindID, MDNode *Node);
+  void setMetadata(StringRef Kind, MDNode *Node);
+
+  /// setDebugLoc - Set the debug location information for this instruction.
+  void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
+
+  /// getDebugLoc - Return the debug location for this node as a DebugLoc.
+  const DebugLoc &getDebugLoc() const { return DbgLoc; }
+
+  /// Set or clear the unsafe-algebra flag on this instruction, which must be an
+  /// operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasUnsafeAlgebra(bool B);
+
+  /// Set or clear the no-nans flag on this instruction, which must be an
+  /// operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasNoNaNs(bool B);
+
+  /// Set or clear the no-infs flag on this instruction, which must be an
+  /// operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasNoInfs(bool B);
+
+  /// Set or clear the no-signed-zeros flag on this instruction, which must be
+  /// an operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasNoSignedZeros(bool B);
+
+  /// Set or clear the allow-reciprocal flag on this instruction, which must be
+  /// an operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasAllowReciprocal(bool B);
+
+  /// Convenience function for setting all the fast-math flags on this
+  /// instruction, which must be an operator which supports these flags. See
+  /// LangRef.html for the meaning of these flats.
+  void setFastMathFlags(FastMathFlags FMF);
+
+  /// Determine whether the unsafe-algebra flag is set.
+  bool hasUnsafeAlgebra() const;
+
+  /// Determine whether the no-NaNs flag is set.
+  bool hasNoNaNs() const;
+
+  /// Determine whether the no-infs flag is set.
+  bool hasNoInfs() const;
+
+  /// Determine whether the no-signed-zeros flag is set.
+  bool hasNoSignedZeros() const;
+
+  /// Determine whether the allow-reciprocal flag is set.
+  bool hasAllowReciprocal() const;
+
+  /// Convenience function for getting all the fast-math flags, which must be an
+  /// operator which supports these flags. See LangRef.html for the meaning of
+  /// these flats.
+  FastMathFlags getFastMathFlags() const;
+
+  /// Copy I's fast-math flags
+  void copyFastMathFlags(const Instruction *I);
+
+private:
+  /// hasMetadataHashEntry - Return true if we have an entry in the on-the-side
+  /// metadata hash.
+  bool hasMetadataHashEntry() const {
+    return (getSubclassDataFromValue() & HasMetadataBit) != 0;
+  }
+
+  // These are all implemented in Metadata.cpp.
+  MDNode *getMetadataImpl(unsigned KindID) const;
+  MDNode *getMetadataImpl(StringRef Kind) const;
+  void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,MDNode*> > &)const;
+  void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+                                           MDNode*> > &) const;
+  void clearMetadataHashEntries();
+public:
+  //===--------------------------------------------------------------------===//
+  // Predicates and helper methods.
+  //===--------------------------------------------------------------------===//
+
+
+  /// isAssociative - Return true if the instruction is associative:
+  ///
+  ///   Associative operators satisfy:  x op (y op z) === (x op y) op z
+  ///
+  /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+  ///
+  bool isAssociative() const;
+  static bool isAssociative(unsigned op);
+
+  /// isCommutative - Return true if the instruction is commutative:
+  ///
+  ///   Commutative operators satisfy: (x op y) === (y op x)
+  ///
+  /// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+  /// applied to any type.
+  ///
+  bool isCommutative() const { return isCommutative(getOpcode()); }
+  static bool isCommutative(unsigned op);
+
+  /// isIdempotent - Return true if the instruction is idempotent:
+  ///
+  ///   Idempotent operators satisfy:  x op x === x
+  ///
+  /// In LLVM, the And and Or operators are idempotent.
+  ///
+  bool isIdempotent() const { return isIdempotent(getOpcode()); }
+  static bool isIdempotent(unsigned op);
+
+  /// isNilpotent - Return true if the instruction is nilpotent:
+  ///
+  ///   Nilpotent operators satisfy:  x op x === Id,
+  ///
+  ///   where Id is the identity for the operator, i.e. a constant such that
+  ///     x op Id === x and Id op x === x for all x.
+  ///
+  /// In LLVM, the Xor operator is nilpotent.
+  ///
+  bool isNilpotent() const { return isNilpotent(getOpcode()); }
+  static bool isNilpotent(unsigned op);
+
+  /// mayWriteToMemory - Return true if this instruction may modify memory.
+  ///
+  bool mayWriteToMemory() const;
+
+  /// mayReadFromMemory - Return true if this instruction may read memory.
+  ///
+  bool mayReadFromMemory() const;
+
+  /// mayReadOrWriteMemory - Return true if this instruction may read or
+  /// write memory.
+  ///
+  bool mayReadOrWriteMemory() const {
+    return mayReadFromMemory() || mayWriteToMemory();
+  }
+
+  /// mayThrow - Return true if this instruction may throw an exception.
+  ///
+  bool mayThrow() const;
+
+  /// mayReturn - Return true if this is a function that may return.
+  /// this is true for all normal instructions. The only exception
+  /// is functions that are marked with the 'noreturn' attribute.
+  ///
+  bool mayReturn() const;
+
+  /// mayHaveSideEffects - Return true if the instruction may have side effects.
+  ///
+  /// Note that this does not consider malloc and alloca to have side
+  /// effects because the newly allocated memory is completely invisible to
+  /// instructions which don't used the returned value.  For cases where this
+  /// matters, isSafeToSpeculativelyExecute may be more appropriate.
+  bool mayHaveSideEffects() const {
+    return mayWriteToMemory() || mayThrow() || !mayReturn();
+  }
+
+  /// clone() - Create a copy of 'this' instruction that is identical in all
+  /// ways except the following:
+  ///   * The instruction has no parent
+  ///   * The instruction has no name
+  ///
+  Instruction *clone() const;
+
+  /// isIdenticalTo - Return true if the specified instruction is exactly
+  /// identical to the current one.  This means that all operands match and any
+  /// extra information (e.g. load is volatile) agree.
+  bool isIdenticalTo(const Instruction *I) const;
+
+  /// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+  /// ignores the SubclassOptionalData flags, which specify conditions
+  /// under which the instruction's result is undefined.
+  bool isIdenticalToWhenDefined(const Instruction *I) const;
+
+  /// When checking for operation equivalence (using isSameOperationAs) it is
+  /// sometimes useful to ignore certain attributes.
+  enum OperationEquivalenceFlags {
+    /// Check for equivalence ignoring load/store alignment.
+    CompareIgnoringAlignment = 1<<0,
+    /// Check for equivalence treating a type and a vector of that type
+    /// as equivalent.
+    CompareUsingScalarTypes = 1<<1
+  };
+
+  /// This function determines if the specified instruction executes the same
+  /// operation as the current one. This means that the opcodes, type, operand
+  /// types and any other factors affecting the operation must be the same. This
+  /// is similar to isIdenticalTo except the operands themselves don't have to
+  /// be identical.
+  /// @returns true if the specified instruction is the same operation as
+  /// the current one.
+  /// @brief Determine if one instruction is the same operation as another.
+  bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const;
+
+  /// isUsedOutsideOfBlock - Return true if there are any uses of this
+  /// instruction in blocks other than the specified block.  Note that PHI nodes
+  /// are considered to evaluate their operands in the corresponding predecessor
+  /// block.
+  bool isUsedOutsideOfBlock(const BasicBlock *BB) const;
+
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return V->getValueID() >= Value::InstructionVal;
+  }
+
+  //----------------------------------------------------------------------
+  // Exported enumerations.
+  //
+  enum TermOps {       // These terminate basic blocks
+#define  FIRST_TERM_INST(N)             TermOpsBegin = N,
+#define HANDLE_TERM_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_TERM_INST(N)             TermOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+
+  enum BinaryOps {
+#define  FIRST_BINARY_INST(N)             BinaryOpsBegin = N,
+#define HANDLE_BINARY_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_BINARY_INST(N)             BinaryOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+
+  enum MemoryOps {
+#define  FIRST_MEMORY_INST(N)             MemoryOpsBegin = N,
+#define HANDLE_MEMORY_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_MEMORY_INST(N)             MemoryOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+
+  enum CastOps {
+#define  FIRST_CAST_INST(N)             CastOpsBegin = N,
+#define HANDLE_CAST_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_CAST_INST(N)             CastOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+
+  enum OtherOps {
+#define  FIRST_OTHER_INST(N)             OtherOpsBegin = N,
+#define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_OTHER_INST(N)             OtherOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+  unsigned short getSubclassDataFromValue() const {
+    return Value::getSubclassDataFromValue();
+  }
+
+  void setHasMetadataHashEntry(bool V) {
+    setValueSubclassData((getSubclassDataFromValue() & ~HasMetadataBit) |
+                         (V ? HasMetadataBit : 0));
+  }
+
+  friend class SymbolTableListTraits<Instruction, BasicBlock>;
+  void setParent(BasicBlock *P);
+protected:
+  // Instruction subclasses can stick up to 15 bits of stuff into the
+  // SubclassData field of instruction with these members.
+
+  // Verify that only the low 15 bits are used.
+  void setInstructionSubclassData(unsigned short D) {
+    assert((D & HasMetadataBit) == 0 && "Out of range value put into field");
+    setValueSubclassData((getSubclassDataFromValue() & HasMetadataBit) | D);
+  }
+
+  unsigned getSubclassDataFromInstruction() const {
+    return getSubclassDataFromValue() & ~HasMetadataBit;
+  }
+
+  Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
+              Instruction *InsertBefore = 0);
+  Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
+              BasicBlock *InsertAtEnd);
+  virtual Instruction *clone_impl() const = 0;
+
+};
+
+// Instruction* is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<Instruction*> {
+  typedef Instruction* PT;
+public:
+  static inline void *getAsVoidPointer(PT P) { return P; }
+  static inline PT getFromVoidPointer(void *P) {
+    return static_cast<PT>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
new file mode 100644
index 000000000000..7e29699f73d9
--- /dev/null
+++ b/include/llvm/IR/Instructions.h
@@ -0,0 +1,3716 @@
+//===-- llvm/Instructions.h - Instruction subclass definitions --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the class definitions of all of the subclasses of the
+// Instruction class.  This is meant to be an easy way to get access to all
+// instruction subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_INSTRUCTIONS_H
+#define LLVM_IR_INSTRUCTIONS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IntegersSubset.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include <iterator>
+
+namespace llvm {
+
+class APInt;
+class ConstantInt;
+class ConstantRange;
+class DataLayout;
+class LLVMContext;
+
+enum AtomicOrdering {
+  NotAtomic = 0,
+  Unordered = 1,
+  Monotonic = 2,
+  // Consume = 3,  // Not specified yet.
+  Acquire = 4,
+  Release = 5,
+  AcquireRelease = 6,
+  SequentiallyConsistent = 7
+};
+
+enum SynchronizationScope {
+  SingleThread = 0,
+  CrossThread = 1
+};
+
+//===----------------------------------------------------------------------===//
+//                                AllocaInst Class
+//===----------------------------------------------------------------------===//
+
+/// AllocaInst - an instruction to allocate memory on the stack
+///
+class AllocaInst : public UnaryInstruction {
+protected:
+  virtual AllocaInst *clone_impl() const;
+public:
+  explicit AllocaInst(Type *Ty, Value *ArraySize = 0,
+                      const Twine &Name = "", Instruction *InsertBefore = 0);
+  AllocaInst(Type *Ty, Value *ArraySize,
+             const Twine &Name, BasicBlock *InsertAtEnd);
+
+  AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore = 0);
+  AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd);
+
+  AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+             const Twine &Name = "", Instruction *InsertBefore = 0);
+  AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+             const Twine &Name, BasicBlock *InsertAtEnd);
+
+  // Out of line virtual method, so the vtable, etc. has a home.
+  virtual ~AllocaInst();
+
+  /// isArrayAllocation - Return true if there is an allocation size parameter
+  /// to the allocation instruction that is not 1.
+  ///
+  bool isArrayAllocation() const;
+
+  /// getArraySize - Get the number of elements allocated. For a simple
+  /// allocation of a single element, this will return a constant 1 value.
+  ///
+  const Value *getArraySize() const { return getOperand(0); }
+  Value *getArraySize() { return getOperand(0); }
+
+  /// getType - Overload to return most specific pointer type
+  ///
+  PointerType *getType() const {
+    return cast<PointerType>(Instruction::getType());
+  }
+
+  /// getAllocatedType - Return the type that is being allocated by the
+  /// instruction.
+  ///
+  Type *getAllocatedType() const;
+
+  /// getAlignment - Return the alignment of the memory that is being allocated
+  /// by the instruction.
+  ///
+  unsigned getAlignment() const {
+    return (1u << getSubclassDataFromInstruction()) >> 1;
+  }
+  void setAlignment(unsigned Align);
+
+  /// isStaticAlloca - Return true if this alloca is in the entry block of the
+  /// function and is a constant size.  If so, the code generator will fold it
+  /// into the prolog/epilog code, so it is basically free.
+  bool isStaticAlloca() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Alloca);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//                                LoadInst Class
+//===----------------------------------------------------------------------===//
+
+/// LoadInst - an instruction for reading from memory.  This uses the
+/// SubclassData field in Value to store whether or not the load is volatile.
+///
+class LoadInst : public UnaryInstruction {
+  void AssertOK();
+protected:
+  virtual LoadInst *clone_impl() const;
+public:
+  LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
+  LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false,
+           Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           BasicBlock *InsertAtEnd);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           unsigned Align, Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           unsigned Align, BasicBlock *InsertAtEnd);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           unsigned Align, AtomicOrdering Order,
+           SynchronizationScope SynchScope = CrossThread,
+           Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           unsigned Align, AtomicOrdering Order,
+           SynchronizationScope SynchScope,
+           BasicBlock *InsertAtEnd);
+
+  LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
+  LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd);
+  explicit LoadInst(Value *Ptr, const char *NameStr = 0,
+                    bool isVolatile = false,  Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const char *NameStr, bool isVolatile,
+           BasicBlock *InsertAtEnd);
+
+  /// isVolatile - Return true if this is a load from a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
+
+  /// setVolatile - Specify whether this is a volatile load or not.
+  ///
+  void setVolatile(bool V) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               (V ? 1 : 0));
+  }
+
+  /// getAlignment - Return the alignment of the access that is being performed
+  ///
+  unsigned getAlignment() const {
+    return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
+  }
+
+  void setAlignment(unsigned Align);
+
+  /// Returns the ordering effect of this fence.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7);
+  }
+
+  /// Set the ordering constraint on this load. May not be Release or
+  /// AcquireRelease.
+  void setOrdering(AtomicOrdering Ordering) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) |
+                               (Ordering << 7));
+  }
+
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1);
+  }
+
+  /// Specify whether this load is ordered with respect to all
+  /// concurrently executing threads, or only with respect to signal handlers
+  /// executing in the same thread.
+  void setSynchScope(SynchronizationScope xthread) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) |
+                               (xthread << 6));
+  }
+
+  bool isAtomic() const { return getOrdering() != NotAtomic; }
+  void setAtomic(AtomicOrdering Ordering,
+                 SynchronizationScope SynchScope = CrossThread) {
+    setOrdering(Ordering);
+    setSynchScope(SynchScope);
+  }
+
+  bool isSimple() const { return !isAtomic() && !isVolatile(); }
+  bool isUnordered() const {
+    return getOrdering() <= Unordered && !isVolatile();
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  /// \brief Returns the address space of the pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return getPointerOperand()->getType()->getPointerAddressSpace();
+  }
+
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Load;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//                                StoreInst Class
+//===----------------------------------------------------------------------===//
+
+/// StoreInst - an instruction for storing to memory
+///
+class StoreInst : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void AssertOK();
+protected:
+  virtual StoreInst *clone_impl() const;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  StoreInst(Value *Val, Value *Ptr, Instruction *InsertBefore);
+  StoreInst(Value *Val, Value *Ptr, BasicBlock *InsertAtEnd);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile = false,
+            Instruction *InsertBefore = 0);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
+            unsigned Align, Instruction *InsertBefore = 0);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
+            unsigned Align, BasicBlock *InsertAtEnd);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
+            unsigned Align, AtomicOrdering Order,
+            SynchronizationScope SynchScope = CrossThread,
+            Instruction *InsertBefore = 0);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
+            unsigned Align, AtomicOrdering Order,
+            SynchronizationScope SynchScope,
+            BasicBlock *InsertAtEnd);
+
+
+  /// isVolatile - Return true if this is a store to a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
+
+  /// setVolatile - Specify whether this is a volatile store or not.
+  ///
+  void setVolatile(bool V) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               (V ? 1 : 0));
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getAlignment - Return the alignment of the access that is being performed
+  ///
+  unsigned getAlignment() const {
+    return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
+  }
+
+  void setAlignment(unsigned Align);
+
+  /// Returns the ordering effect of this store.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7);
+  }
+
+  /// Set the ordering constraint on this store.  May not be Acquire or
+  /// AcquireRelease.
+  void setOrdering(AtomicOrdering Ordering) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) |
+                               (Ordering << 7));
+  }
+
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1);
+  }
+
+  /// Specify whether this store instruction is ordered with respect to all
+  /// concurrently executing threads, or only with respect to signal handlers
+  /// executing in the same thread.
+  void setSynchScope(SynchronizationScope xthread) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) |
+                               (xthread << 6));
+  }
+
+  bool isAtomic() const { return getOrdering() != NotAtomic; }
+  void setAtomic(AtomicOrdering Ordering,
+                 SynchronizationScope SynchScope = CrossThread) {
+    setOrdering(Ordering);
+    setSynchScope(SynchScope);
+  }
+
+  bool isSimple() const { return !isAtomic() && !isVolatile(); }
+  bool isUnordered() const {
+    return getOrdering() <= Unordered && !isVolatile();
+  }
+
+  Value *getValueOperand() { return getOperand(0); }
+  const Value *getValueOperand() const { return getOperand(0); }
+
+  Value *getPointerOperand() { return getOperand(1); }
+  const Value *getPointerOperand() const { return getOperand(1); }
+  static unsigned getPointerOperandIndex() { return 1U; }
+
+  /// \brief Returns the address space of the pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return getPointerOperand()->getType()->getPointerAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Store;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<StoreInst> : public FixedNumOperandTraits<StoreInst, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                FenceInst Class
+//===----------------------------------------------------------------------===//
+
+/// FenceInst - an instruction for ordering other memory operations
+///
+class FenceInst : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+protected:
+  virtual FenceInst *clone_impl() const;
+public:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+
+  // Ordering may only be Acquire, Release, AcquireRelease, or
+  // SequentiallyConsistent.
+  FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+            SynchronizationScope SynchScope = CrossThread,
+            Instruction *InsertBefore = 0);
+  FenceInst(LLVMContext &C, AtomicOrdering Ordering,
+            SynchronizationScope SynchScope,
+            BasicBlock *InsertAtEnd);
+
+  /// Returns the ordering effect of this fence.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering(getSubclassDataFromInstruction() >> 1);
+  }
+
+  /// Set the ordering constraint on this fence.  May only be Acquire, Release,
+  /// AcquireRelease, or SequentiallyConsistent.
+  void setOrdering(AtomicOrdering Ordering) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                               (Ordering << 1));
+  }
+
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope(getSubclassDataFromInstruction() & 1);
+  }
+
+  /// Specify whether this fence orders other operations with respect to all
+  /// concurrently executing threads, or only with respect to signal handlers
+  /// executing in the same thread.
+  void setSynchScope(SynchronizationScope xthread) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               xthread);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Fence;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                AtomicCmpXchgInst Class
+//===----------------------------------------------------------------------===//
+
+/// AtomicCmpXchgInst - an instruction that atomically checks whether a
+/// specified value is in a memory location, and, if it is, stores a new value
+/// there.  Returns the value that was loaded.
+///
+class AtomicCmpXchgInst : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void Init(Value *Ptr, Value *Cmp, Value *NewVal,
+            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+protected:
+  virtual AtomicCmpXchgInst *clone_impl() const;
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    Instruction *InsertBefore = 0);
+  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                    BasicBlock *InsertAtEnd);
+
+  /// isVolatile - Return true if this is a cmpxchg from a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const {
+    return getSubclassDataFromInstruction() & 1;
+  }
+
+  /// setVolatile - Specify whether this is a volatile cmpxchg.
+  ///
+  void setVolatile(bool V) {
+     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                                (unsigned)V);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Set the ordering constraint on this cmpxchg.
+  void setOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "CmpXchg instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
+                               (Ordering << 2));
+  }
+
+  /// Specify whether this cmpxchg is atomic and orders other operations with
+  /// respect to all concurrently executing threads, or only with respect to
+  /// signal handlers executing in the same thread.
+  void setSynchScope(SynchronizationScope SynchScope) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
+                               (SynchScope << 1));
+  }
+
+  /// Returns the ordering constraint on this cmpxchg.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering(getSubclassDataFromInstruction() >> 2);
+  }
+
+  /// Returns whether this cmpxchg is atomic between threads or only within a
+  /// single thread.
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  Value *getCompareOperand() { return getOperand(1); }
+  const Value *getCompareOperand() const { return getOperand(1); }
+
+  Value *getNewValOperand() { return getOperand(2); }
+  const Value *getNewValOperand() const { return getOperand(2); }
+
+  /// \brief Returns the address space of the pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return getPointerOperand()->getType()->getPointerAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::AtomicCmpXchg;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<AtomicCmpXchgInst> :
+    public FixedNumOperandTraits<AtomicCmpXchgInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                AtomicRMWInst Class
+//===----------------------------------------------------------------------===//
+
+/// AtomicRMWInst - an instruction that atomically reads a memory location,
+/// combines it with another value, and then stores the result back.  Returns
+/// the old value.
+///
+class AtomicRMWInst : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+protected:
+  virtual AtomicRMWInst *clone_impl() const;
+public:
+  /// This enumeration lists the possible modifications atomicrmw can make.  In
+  /// the descriptions, 'p' is the pointer to the instruction's memory location,
+  /// 'old' is the initial value of *p, and 'v' is the other value passed to the
+  /// instruction.  These instructions always return 'old'.
+  enum BinOp {
+    /// *p = v
+    Xchg,
+    /// *p = old + v
+    Add,
+    /// *p = old - v
+    Sub,
+    /// *p = old & v
+    And,
+    /// *p = ~old & v
+    Nand,
+    /// *p = old | v
+    Or,
+    /// *p = old ^ v
+    Xor,
+    /// *p = old >signed v ? old : v
+    Max,
+    /// *p = old <signed v ? old : v
+    Min,
+    /// *p = old >unsigned v ? old : v
+    UMax,
+    /// *p = old <unsigned v ? old : v
+    UMin,
+
+    FIRST_BINOP = Xchg,
+    LAST_BINOP = UMin,
+    BAD_BINOP
+  };
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                Instruction *InsertBefore = 0);
+  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                AtomicOrdering Ordering, SynchronizationScope SynchScope,
+                BasicBlock *InsertAtEnd);
+
+  BinOp getOperation() const {
+    return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
+  }
+
+  void setOperation(BinOp Operation) {
+    unsigned short SubclassData = getSubclassDataFromInstruction();
+    setInstructionSubclassData((SubclassData & 31) |
+                               (Operation << 5));
+  }
+
+  /// isVolatile - Return true if this is a RMW on a volatile memory location.
+  ///
+  bool isVolatile() const {
+    return getSubclassDataFromInstruction() & 1;
+  }
+
+  /// setVolatile - Specify whether this is a volatile RMW or not.
+  ///
+  void setVolatile(bool V) {
+     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                                (unsigned)V);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Set the ordering constraint on this RMW.
+  void setOrdering(AtomicOrdering Ordering) {
+    assert(Ordering != NotAtomic &&
+           "atomicrmw instructions can only be atomic.");
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 2)) |
+                               (Ordering << 2));
+  }
+
+  /// Specify whether this RMW orders other operations with respect to all
+  /// concurrently executing threads, or only with respect to signal handlers
+  /// executing in the same thread.
+  void setSynchScope(SynchronizationScope SynchScope) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
+                               (SynchScope << 1));
+  }
+
+  /// Returns the ordering constraint on this RMW.
+  AtomicOrdering getOrdering() const {
+    return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7);
+  }
+
+  /// Returns whether this RMW is atomic between threads or only within a
+  /// single thread.
+  SynchronizationScope getSynchScope() const {
+    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  Value *getValOperand() { return getOperand(1); }
+  const Value *getValOperand() const { return getOperand(1); }
+
+  /// \brief Returns the address space of the pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return getPointerOperand()->getType()->getPointerAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::AtomicRMW;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  void Init(BinOp Operation, Value *Ptr, Value *Val,
+            AtomicOrdering Ordering, SynchronizationScope SynchScope);
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<AtomicRMWInst>
+    : public FixedNumOperandTraits<AtomicRMWInst,2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicRMWInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                             GetElementPtrInst Class
+//===----------------------------------------------------------------------===//
+
+// checkGEPType - Simple wrapper function to give a better assertion failure
+// message on bad indexes for a gep instruction.
+//
+inline Type *checkGEPType(Type *Ty) {
+  assert(Ty && "Invalid GetElementPtrInst indices for type!");
+  return Ty;
+}
+
+/// GetElementPtrInst - an instruction for type-safe pointer arithmetic to
+/// access elements of arrays and structs
+///
+class GetElementPtrInst : public Instruction {
+  GetElementPtrInst(const GetElementPtrInst &GEPI);
+  void init(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr);
+
+  /// Constructors - Create a getelementptr instruction with a base pointer an
+  /// list of indices. The first ctor can optionally insert before an existing
+  /// instruction, the second appends the new instruction to the specified
+  /// BasicBlock.
+  inline GetElementPtrInst(Value *Ptr, ArrayRef<Value *> IdxList,
+                           unsigned Values, const Twine &NameStr,
+                           Instruction *InsertBefore);
+  inline GetElementPtrInst(Value *Ptr, ArrayRef<Value *> IdxList,
+                           unsigned Values, const Twine &NameStr,
+                           BasicBlock *InsertAtEnd);
+protected:
+  virtual GetElementPtrInst *clone_impl() const;
+public:
+  static GetElementPtrInst *Create(Value *Ptr, ArrayRef<Value *> IdxList,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    unsigned Values = 1 + unsigned(IdxList.size());
+    return new(Values)
+      GetElementPtrInst(Ptr, IdxList, Values, NameStr, InsertBefore);
+  }
+  static GetElementPtrInst *Create(Value *Ptr, ArrayRef<Value *> IdxList,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    unsigned Values = 1 + unsigned(IdxList.size());
+    return new(Values)
+      GetElementPtrInst(Ptr, IdxList, Values, NameStr, InsertAtEnd);
+  }
+
+  /// Create an "inbounds" getelementptr. See the documentation for the
+  /// "inbounds" flag in LangRef.html for details.
+  static GetElementPtrInst *CreateInBounds(Value *Ptr,
+                                           ArrayRef<Value *> IdxList,
+                                           const Twine &NameStr = "",
+                                           Instruction *InsertBefore = 0) {
+    GetElementPtrInst *GEP = Create(Ptr, IdxList, NameStr, InsertBefore);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  static GetElementPtrInst *CreateInBounds(Value *Ptr,
+                                           ArrayRef<Value *> IdxList,
+                                           const Twine &NameStr,
+                                           BasicBlock *InsertAtEnd) {
+    GetElementPtrInst *GEP = Create(Ptr, IdxList, NameStr, InsertAtEnd);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // getType - Overload to return most specific sequential type.
+  SequentialType *getType() const {
+    return cast<SequentialType>(Instruction::getType());
+  }
+
+  /// \brief Returns the address space of this instruction's pointer type.
+  unsigned getAddressSpace() const {
+    // Note that this is always the same as the pointer operand's address space
+    // and that is cheaper to compute, so cheat here.
+    return getPointerAddressSpace();
+  }
+
+  /// getIndexedType - Returns the type of the element that would be loaded with
+  /// a load instruction with the specified parameters.
+  ///
+  /// Null is returned if the indices are invalid for the specified
+  /// pointer type.
+  ///
+  static Type *getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList);
+  static Type *getIndexedType(Type *Ptr, ArrayRef<Constant *> IdxList);
+  static Type *getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList);
+
+  inline op_iterator       idx_begin()       { return op_begin()+1; }
+  inline const_op_iterator idx_begin() const { return op_begin()+1; }
+  inline op_iterator       idx_end()         { return op_end(); }
+  inline const_op_iterator idx_end()   const { return op_end(); }
+
+  Value *getPointerOperand() {
+    return getOperand(0);
+  }
+  const Value *getPointerOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getPointerOperandIndex() {
+    return 0U;    // get index for modifying correct operand.
+  }
+
+  /// getPointerOperandType - Method to return the pointer operand as a
+  /// PointerType.
+  Type *getPointerOperandType() const {
+    return getPointerOperand()->getType();
+  }
+
+  /// \brief Returns the address space of the pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return getPointerOperandType()->getPointerAddressSpace();
+  }
+
+  /// GetGEPReturnType - Returns the pointer type returned by the GEP
+  /// instruction, which may be a vector of pointers.
+  static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
+    Type *PtrTy = PointerType::get(checkGEPType(
+                                   getIndexedType(Ptr->getType(), IdxList)),
+                                   Ptr->getType()->getPointerAddressSpace());
+    // Vector GEP
+    if (Ptr->getType()->isVectorTy()) {
+      unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
+      return VectorType::get(PtrTy, NumElem);
+    }
+
+    // Scalar GEP
+    return PtrTy;
+  }
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return getNumOperands() - 1;
+  }
+
+  bool hasIndices() const {
+    return getNumOperands() > 1;
+  }
+
+  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
+  /// zeros.  If so, the result pointer and the first operand have the same
+  /// value, just potentially different types.
+  bool hasAllZeroIndices() const;
+
+  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
+  /// constant integers.  If so, the result pointer and the first operand have
+  /// a constant offset between them.
+  bool hasAllConstantIndices() const;
+
+  /// setIsInBounds - Set or clear the inbounds flag on this GEP instruction.
+  /// See LangRef.html for the meaning of inbounds on a getelementptr.
+  void setIsInBounds(bool b = true);
+
+  /// isInBounds - Determine whether the GEP has the inbounds flag.
+  bool isInBounds() const;
+
+  /// \brief Accumulate the constant address offset of this GEP if possible.
+  ///
+  /// This routine accepts an APInt into which it will accumulate the constant
+  /// offset of this GEP if the GEP is in fact constant. If the GEP is not
+  /// all-constant, it returns false and the value of the offset APInt is
+  /// undefined (it is *not* preserved!). The APInt passed into this routine
+  /// must be at least as wide as the IntPtr type for the address space of
+  /// the base GEP pointer.
+  bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::GetElementPtr);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<GetElementPtrInst> :
+  public VariadicOperandTraits<GetElementPtrInst, 1> {
+};
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr,
+                                     ArrayRef<Value *> IdxList,
+                                     unsigned Values,
+                                     const Twine &NameStr,
+                                     Instruction *InsertBefore)
+  : Instruction(getGEPReturnType(Ptr, IdxList),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - Values,
+                Values, InsertBefore) {
+  init(Ptr, IdxList, NameStr);
+}
+GetElementPtrInst::GetElementPtrInst(Value *Ptr,
+                                     ArrayRef<Value *> IdxList,
+                                     unsigned Values,
+                                     const Twine &NameStr,
+                                     BasicBlock *InsertAtEnd)
+  : Instruction(getGEPReturnType(Ptr, IdxList),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - Values,
+                Values, InsertAtEnd) {
+  init(Ptr, IdxList, NameStr);
+}
+
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                               ICmpInst Class
+//===----------------------------------------------------------------------===//
+
+/// This instruction compares its operands according to the predicate given
+/// to the constructor. It only operates on integers or pointers. The operands
+/// must be identical types.
+/// \brief Represent an integer comparison operator.
+class ICmpInst: public CmpInst {
+protected:
+  /// \brief Clone an identical ICmpInst
+  virtual ICmpInst *clone_impl() const;
+public:
+  /// \brief Constructor with insert-before-instruction semantics.
+  ICmpInst(
+    Instruction *InsertBefore,  ///< Where to insert
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr,
+              InsertBefore) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+           pred <= CmpInst::LAST_ICMP_PREDICATE &&
+           "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// \brief Constructor with insert-at-end semantics.
+  ICmpInst(
+    BasicBlock &InsertAtEnd, ///< Block to insert into.
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr,
+              &InsertAtEnd) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+          pred <= CmpInst::LAST_ICMP_PREDICATE &&
+          "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// \brief Constructor with no-insertion semantics
+  ICmpInst(
+    Predicate pred, ///< The predicate to use for the comparison
+    Value *LHS,     ///< The left-hand-side of the expression
+    Value *RHS,     ///< The right-hand-side of the expression
+    const Twine &NameStr = "" ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+           pred <= CmpInst::LAST_ICMP_PREDICATE &&
+           "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
+  /// @returns the predicate that would be the result if the operand were
+  /// regarded as signed.
+  /// \brief Return the signed version of the predicate
+  Predicate getSignedPredicate() const {
+    return getSignedPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction.
+  /// \brief Return the signed version of the predicate.
+  static Predicate getSignedPredicate(Predicate pred);
+
+  /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
+  /// @returns the predicate that would be the result if the operand were
+  /// regarded as unsigned.
+  /// \brief Return the unsigned version of the predicate
+  Predicate getUnsignedPredicate() const {
+    return getUnsignedPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction.
+  /// \brief Return the unsigned version of the predicate.
+  static Predicate getUnsignedPredicate(Predicate pred);
+
+  /// isEquality - Return true if this predicate is either EQ or NE.  This also
+  /// tests for commutativity.
+  static bool isEquality(Predicate P) {
+    return P == ICMP_EQ || P == ICMP_NE;
+  }
+
+  /// isEquality - Return true if this predicate is either EQ or NE.  This also
+  /// tests for commutativity.
+  bool isEquality() const {
+    return isEquality(getPredicate());
+  }
+
+  /// @returns true if the predicate of this ICmpInst is commutative
+  /// \brief Determine if this relation is commutative.
+  bool isCommutative() const { return isEquality(); }
+
+  /// isRelational - Return true if the predicate is relational (not EQ or NE).
+  ///
+  bool isRelational() const {
+    return !isEquality();
+  }
+
+  /// isRelational - Return true if the predicate is relational (not EQ or NE).
+  ///
+  static bool isRelational(Predicate P) {
+    return !isEquality(P);
+  }
+
+  /// Initialize a set of values that all satisfy the predicate with C.
+  /// \brief Make a ConstantRange for a relation with a constant value.
+  static ConstantRange makeConstantRange(Predicate pred, const APInt &C);
+
+  /// Exchange the two operands to this instruction in such a way that it does
+  /// not modify the semantics of the instruction. The predicate value may be
+  /// changed to retain the same result if the predicate is order dependent
+  /// (e.g. ult).
+  /// \brief Swap operands and adjust predicate.
+  void swapOperands() {
+    setPredicate(getSwappedPredicate());
+    Op<0>().swap(Op<1>());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ICmp;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+};
+
+//===----------------------------------------------------------------------===//
+//                               FCmpInst Class
+//===----------------------------------------------------------------------===//
+
+/// This instruction compares its operands according to the predicate given
+/// to the constructor. It only operates on floating point values or packed
+/// vectors of floating point values. The operands must be identical types.
+/// \brief Represents a floating point comparison operator.
+class FCmpInst: public CmpInst {
+protected:
+  /// \brief Clone an identical FCmpInst
+  virtual FCmpInst *clone_impl() const;
+public:
+  /// \brief Constructor with insert-before-instruction semantics.
+  FCmpInst(
+    Instruction *InsertBefore, ///< Where to insert
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr,
+              InsertBefore) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
+           "Invalid operand types for FCmp instruction");
+  }
+
+  /// \brief Constructor with insert-at-end semantics.
+  FCmpInst(
+    BasicBlock &InsertAtEnd, ///< Block to insert into.
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr,
+              &InsertAtEnd) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
+           "Invalid operand types for FCmp instruction");
+  }
+
+  /// \brief Constructor with no-insertion semantics
+  FCmpInst(
+    Predicate pred, ///< The predicate to use for the comparison
+    Value *LHS,     ///< The left-hand-side of the expression
+    Value *RHS,     ///< The right-hand-side of the expression
+    const Twine &NameStr = "" ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
+           "Invalid operand types for FCmp instruction");
+  }
+
+  /// @returns true if the predicate of this instruction is EQ or NE.
+  /// \brief Determine if this is an equality predicate.
+  bool isEquality() const {
+    return getPredicate() == FCMP_OEQ || getPredicate() == FCMP_ONE ||
+           getPredicate() == FCMP_UEQ || getPredicate() == FCMP_UNE;
+  }
+
+  /// @returns true if the predicate of this instruction is commutative.
+  /// \brief Determine if this is a commutative predicate.
+  bool isCommutative() const {
+    return isEquality() ||
+           getPredicate() == FCMP_FALSE ||
+           getPredicate() == FCMP_TRUE ||
+           getPredicate() == FCMP_ORD ||
+           getPredicate() == FCMP_UNO;
+  }
+
+  /// @returns true if the predicate is relational (not EQ or NE).
+  /// \brief Determine if this a relational predicate.
+  bool isRelational() const { return !isEquality(); }
+
+  /// Exchange the two operands to this instruction in such a way that it does
+  /// not modify the semantics of the instruction. The predicate value may be
+  /// changed to retain the same result if the predicate is order dependent
+  /// (e.g. ult).
+  /// \brief Swap operands and adjust predicate.
+  void swapOperands() {
+    setPredicate(getSwappedPredicate());
+    Op<0>().swap(Op<1>());
+  }
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::FCmp;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// CallInst - This class represents a function call, abstracting a target
+/// machine's calling convention.  This class uses low bit of the SubClassData
+/// field to indicate whether or not this is a tail call.  The rest of the bits
+/// hold the calling convention of the call.
+///
+class CallInst : public Instruction {
+  AttributeSet AttributeList; ///< parameter attributes for call
+  CallInst(const CallInst &CI);
+  void init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr);
+  void init(Value *Func, const Twine &NameStr);
+
+  /// Construct a CallInst given a range of arguments.
+  /// \brief Construct a CallInst from a range of arguments
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+                  const Twine &NameStr, Instruction *InsertBefore);
+
+  /// Construct a CallInst given a range of arguments.
+  /// \brief Construct a CallInst from a range of arguments
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+                  const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  CallInst(Value *F, Value *Actual, const Twine &NameStr,
+           Instruction *InsertBefore);
+  CallInst(Value *F, Value *Actual, const Twine &NameStr,
+           BasicBlock *InsertAtEnd);
+  explicit CallInst(Value *F, const Twine &NameStr,
+                    Instruction *InsertBefore);
+  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual CallInst *clone_impl() const;
+public:
+  static CallInst *Create(Value *Func,
+                          ArrayRef<Value *> Args,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = 0) {
+    return new(unsigned(Args.size() + 1))
+      CallInst(Func, Args, NameStr, InsertBefore);
+  }
+  static CallInst *Create(Value *Func,
+                          ArrayRef<Value *> Args,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new(unsigned(Args.size() + 1))
+      CallInst(Func, Args, NameStr, InsertAtEnd);
+  }
+  static CallInst *Create(Value *F, const Twine &NameStr = "",
+                          Instruction *InsertBefore = 0) {
+    return new(1) CallInst(F, NameStr, InsertBefore);
+  }
+  static CallInst *Create(Value *F, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd) {
+    return new(1) CallInst(F, NameStr, InsertAtEnd);
+  }
+  /// CreateMalloc - Generate the IR for a call to malloc:
+  /// 1. Compute the malloc call's argument as the specified type's size,
+  ///    possibly multiplied by the array size if the array size is not
+  ///    constant 1.
+  /// 2. Call malloc with that argument.
+  /// 3. Bitcast the result of the malloc call to the specified type.
+  static Instruction *CreateMalloc(Instruction *InsertBefore,
+                                   Type *IntPtrTy, Type *AllocTy,
+                                   Value *AllocSize, Value *ArraySize = 0,
+                                   Function* MallocF = 0,
+                                   const Twine &Name = "");
+  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
+                                   Type *IntPtrTy, Type *AllocTy,
+                                   Value *AllocSize, Value *ArraySize = 0,
+                                   Function* MallocF = 0,
+                                   const Twine &Name = "");
+  /// CreateFree - Generate the IR for a call to the builtin free function.
+  static Instruction* CreateFree(Value* Source, Instruction *InsertBefore);
+  static Instruction* CreateFree(Value* Source, BasicBlock *InsertAtEnd);
+
+  ~CallInst();
+
+  bool isTailCall() const { return getSubclassDataFromInstruction() & 1; }
+  void setTailCall(bool isTC = true) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               unsigned(isTC));
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumArgOperands - Return the number of call arguments.
+  ///
+  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+
+  /// getArgOperand/setArgOperand - Return/set the i-th call argument.
+  ///
+  Value *getArgOperand(unsigned i) const { return getOperand(i); }
+  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+  /// getCallingConv/setCallingConv - Get or set the calling convention of this
+  /// function call.
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 1);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                               (static_cast<unsigned>(CC) << 1));
+  }
+
+  /// getAttributes - Return the parameter attributes for this call.
+  ///
+  const AttributeSet &getAttributes() const { return AttributeList; }
+
+  /// setAttributes - Set the parameter attributes for this call.
+  ///
+  void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; }
+
+  /// addAttribute - adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attribute::AttrKind attr);
+
+  /// removeAttribute - removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, Attribute attr);
+
+  /// \brief Determine whether this call has the given attribute.
+  bool hasFnAttr(Attribute::AttrKind A) const;
+
+  /// \brief Determine whether the call or the callee has the given attributes.
+  bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
+
+  /// \brief Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned i) const {
+    return AttributeList.getParamAlignment(i);
+  }
+
+  /// \brief Return true if the call should not be inlined.
+  bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
+  void setIsNoInline() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline);
+  }
+
+  /// \brief Return true if the call can return twice
+  bool canReturnTwice() const {
+    return hasFnAttr(Attribute::ReturnsTwice);
+  }
+  void setCanReturnTwice() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice);
+  }
+
+  /// \brief Determine if the call does not access memory.
+  bool doesNotAccessMemory() const {
+    return hasFnAttr(Attribute::ReadNone);
+  }
+  void setDoesNotAccessMemory() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+  }
+
+  /// \brief Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
+  }
+
+  /// \brief Determine if the call cannot return.
+  bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
+  void setDoesNotReturn() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
+  }
+
+  /// \brief Determine if the call cannot unwind.
+  bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
+  void setDoesNotThrow() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
+  }
+
+  /// \brief Determine if the call cannot be duplicated.
+  bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
+  void setCannotDuplicate() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate);
+  }
+
+  /// \brief Determine if the call returns a structure through first
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    // Be friendly and also check the callee.
+    return paramHasAttr(1, Attribute::StructRet);
+  }
+
+  /// \brief Determine if any call argument is an aggregate passed by value.
+  bool hasByValArgument() const {
+    return AttributeList.hasAttrSomewhere(Attribute::ByVal);
+  }
+
+  /// getCalledFunction - Return the function called, or null if this is an
+  /// indirect function invocation.
+  ///
+  Function *getCalledFunction() const {
+    return dyn_cast<Function>(Op<-1>());
+  }
+
+  /// getCalledValue - Get a pointer to the function that is invoked by this
+  /// instruction.
+  const Value *getCalledValue() const { return Op<-1>(); }
+        Value *getCalledValue()       { return Op<-1>(); }
+
+  /// setCalledFunction - Set the function called.
+  void setCalledFunction(Value* Fn) {
+    Op<-1>() = Fn;
+  }
+
+  /// isInlineAsm - Check if this call is an inline asm statement.
+  bool isInlineAsm() const {
+    return isa<InlineAsm>(Op<-1>());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Call;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
+};
+
+CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
+                   const Twine &NameStr, BasicBlock *InsertAtEnd)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
+                unsigned(Args.size() + 1), InsertAtEnd) {
+  init(Func, Args, NameStr);
+}
+
+CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
+                   const Twine &NameStr, Instruction *InsertBefore)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
+                unsigned(Args.size() + 1), InsertBefore) {
+  init(Func, Args, NameStr);
+}
+
+
+// Note: if you get compile errors about private methods then
+//       please update your code to use the high-level operand
+//       interfaces. See line 943 above.
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// SelectInst - This class represents the LLVM 'select' instruction.
+///
+class SelectInst : public Instruction {
+  void init(Value *C, Value *S1, Value *S2) {
+    assert(!areInvalidOperands(C, S1, S2) && "Invalid operands for select");
+    Op<0>() = C;
+    Op<1>() = S1;
+    Op<2>() = S2;
+  }
+
+  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
+             Instruction *InsertBefore)
+    : Instruction(S1->getType(), Instruction::Select,
+                  &Op<0>(), 3, InsertBefore) {
+    init(C, S1, S2);
+    setName(NameStr);
+  }
+  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
+             BasicBlock *InsertAtEnd)
+    : Instruction(S1->getType(), Instruction::Select,
+                  &Op<0>(), 3, InsertAtEnd) {
+    init(C, S1, S2);
+    setName(NameStr);
+  }
+protected:
+  virtual SelectInst *clone_impl() const;
+public:
+  static SelectInst *Create(Value *C, Value *S1, Value *S2,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = 0) {
+    return new(3) SelectInst(C, S1, S2, NameStr, InsertBefore);
+  }
+  static SelectInst *Create(Value *C, Value *S1, Value *S2,
+                            const Twine &NameStr,
+                            BasicBlock *InsertAtEnd) {
+    return new(3) SelectInst(C, S1, S2, NameStr, InsertAtEnd);
+  }
+
+  const Value *getCondition() const { return Op<0>(); }
+  const Value *getTrueValue() const { return Op<1>(); }
+  const Value *getFalseValue() const { return Op<2>(); }
+  Value *getCondition() { return Op<0>(); }
+  Value *getTrueValue() { return Op<1>(); }
+  Value *getFalseValue() { return Op<2>(); }
+
+  /// areInvalidOperands - Return a string if the specified operands are invalid
+  /// for a select operation, otherwise return null.
+  static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  OtherOps getOpcode() const {
+    return static_cast<OtherOps>(Instruction::getOpcode());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Select;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<SelectInst> : public FixedNumOperandTraits<SelectInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                VAArgInst Class
+//===----------------------------------------------------------------------===//
+
+/// VAArgInst - This class represents the va_arg llvm instruction, which returns
+/// an argument of the specified type given a va_list and increments that list
+///
+class VAArgInst : public UnaryInstruction {
+protected:
+  virtual VAArgInst *clone_impl() const;
+
+public:
+  VAArgInst(Value *List, Type *Ty, const Twine &NameStr = "",
+             Instruction *InsertBefore = 0)
+    : UnaryInstruction(Ty, VAArg, List, InsertBefore) {
+    setName(NameStr);
+  }
+  VAArgInst(Value *List, Type *Ty, const Twine &NameStr,
+            BasicBlock *InsertAtEnd)
+    : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) {
+    setName(NameStr);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == VAArg;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                ExtractElementInst Class
+//===----------------------------------------------------------------------===//
+
+/// ExtractElementInst - This instruction extracts a single (scalar)
+/// element from a VectorType value
+///
+class ExtractElementInst : public Instruction {
+  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "",
+                     Instruction *InsertBefore = 0);
+  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
+                     BasicBlock *InsertAtEnd);
+protected:
+  virtual ExtractElementInst *clone_impl() const;
+
+public:
+  static ExtractElementInst *Create(Value *Vec, Value *Idx,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore);
+  }
+  static ExtractElementInst *Create(Value *Vec, Value *Idx,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd);
+  }
+
+  /// isValidOperands - Return true if an extractelement instruction can be
+  /// formed with the specified operands.
+  static bool isValidOperands(const Value *Vec, const Value *Idx);
+
+  Value *getVectorOperand() { return Op<0>(); }
+  Value *getIndexOperand() { return Op<1>(); }
+  const Value *getVectorOperand() const { return Op<0>(); }
+  const Value *getIndexOperand() const { return Op<1>(); }
+
+  VectorType *getVectorOperandType() const {
+    return cast<VectorType>(getVectorOperand()->getType());
+  }
+
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ExtractElement;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<ExtractElementInst> :
+  public FixedNumOperandTraits<ExtractElementInst, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                InsertElementInst Class
+//===----------------------------------------------------------------------===//
+
+/// InsertElementInst - This instruction inserts a single (scalar)
+/// element into a VectorType value
+///
+class InsertElementInst : public Instruction {
+  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
+                    const Twine &NameStr = "",
+                    Instruction *InsertBefore = 0);
+  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InsertElementInst *clone_impl() const;
+
+public:
+  static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore);
+  }
+  static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd);
+  }
+
+  /// isValidOperands - Return true if an insertelement instruction can be
+  /// formed with the specified operands.
+  static bool isValidOperands(const Value *Vec, const Value *NewElt,
+                              const Value *Idx);
+
+  /// getType - Overload to return most specific vector type.
+  ///
+  VectorType *getType() const {
+    return cast<VectorType>(Instruction::getType());
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::InsertElement;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<InsertElementInst> :
+  public FixedNumOperandTraits<InsertElementInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                           ShuffleVectorInst Class
+//===----------------------------------------------------------------------===//
+
+/// ShuffleVectorInst - This instruction constructs a fixed permutation of two
+/// input vectors.
+///
+class ShuffleVectorInst : public Instruction {
+protected:
+  virtual ShuffleVectorInst *clone_impl() const;
+
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                    const Twine &NameStr = "",
+                    Instruction *InsertBefor = 0);
+  ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  /// isValidOperands - Return true if a shufflevector instruction can be
+  /// formed with the specified operands.
+  static bool isValidOperands(const Value *V1, const Value *V2,
+                              const Value *Mask);
+
+  /// getType - Overload to return most specific vector type.
+  ///
+  VectorType *getType() const {
+    return cast<VectorType>(Instruction::getType());
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  Constant *getMask() const {
+    return cast<Constant>(getOperand(2));
+  }
+
+  /// getMaskValue - Return the index from the shuffle mask for the specified
+  /// output result.  This is either -1 if the element is undef or a number less
+  /// than 2*numelements.
+  static int getMaskValue(Constant *Mask, unsigned i);
+
+  int getMaskValue(unsigned i) const {
+    return getMaskValue(getMask(), i);
+  }
+
+  /// getShuffleMask - Return the full mask for this instruction, where each
+  /// element is the element number and undef's are returned as -1.
+  static void getShuffleMask(Constant *Mask, SmallVectorImpl<int> &Result);
+
+  void getShuffleMask(SmallVectorImpl<int> &Result) const {
+    return getShuffleMask(getMask(), Result);
+  }
+
+  SmallVector<int, 16> getShuffleMask() const {
+    SmallVector<int, 16> Mask;
+    getShuffleMask(Mask);
+    return Mask;
+  }
+
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ShuffleVector;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<ShuffleVectorInst> :
+  public FixedNumOperandTraits<ShuffleVectorInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+/// ExtractValueInst - This instruction extracts a struct member or array
+/// element value from an aggregate value.
+///
+class ExtractValueInst : public UnaryInstruction {
+  SmallVector<unsigned, 4> Indices;
+
+  ExtractValueInst(const ExtractValueInst &EVI);
+  void init(ArrayRef<unsigned> Idxs, const Twine &NameStr);
+
+  /// Constructors - Create a extractvalue instruction with a base aggregate
+  /// value and a list of indices.  The first ctor can optionally insert before
+  /// an existing instruction, the second appends the new instruction to the
+  /// specified BasicBlock.
+  inline ExtractValueInst(Value *Agg,
+                          ArrayRef<unsigned> Idxs,
+                          const Twine &NameStr,
+                          Instruction *InsertBefore);
+  inline ExtractValueInst(Value *Agg,
+                          ArrayRef<unsigned> Idxs,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+protected:
+  virtual ExtractValueInst *clone_impl() const;
+
+public:
+  static ExtractValueInst *Create(Value *Agg,
+                                  ArrayRef<unsigned> Idxs,
+                                  const Twine &NameStr = "",
+                                  Instruction *InsertBefore = 0) {
+    return new
+      ExtractValueInst(Agg, Idxs, NameStr, InsertBefore);
+  }
+  static ExtractValueInst *Create(Value *Agg,
+                                  ArrayRef<unsigned> Idxs,
+                                  const Twine &NameStr,
+                                  BasicBlock *InsertAtEnd) {
+    return new ExtractValueInst(Agg, Idxs, NameStr, InsertAtEnd);
+  }
+
+  /// getIndexedType - Returns the type of the element that would be extracted
+  /// with an extractvalue instruction with the specified parameters.
+  ///
+  /// Null is returned if the indices are invalid for the specified type.
+  static Type *getIndexedType(Type *Agg, ArrayRef<unsigned> Idxs);
+
+  typedef const unsigned* idx_iterator;
+  inline idx_iterator idx_begin() const { return Indices.begin(); }
+  inline idx_iterator idx_end()   const { return Indices.end(); }
+
+  Value *getAggregateOperand() {
+    return getOperand(0);
+  }
+  const Value *getAggregateOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getAggregateOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  ArrayRef<unsigned> getIndices() const {
+    return Indices;
+  }
+
+  unsigned getNumIndices() const {
+    return (unsigned)Indices.size();
+  }
+
+  bool hasIndices() const {
+    return true;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ExtractValue;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+ExtractValueInst::ExtractValueInst(Value *Agg,
+                                   ArrayRef<unsigned> Idxs,
+                                   const Twine &NameStr,
+                                   Instruction *InsertBefore)
+  : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)),
+                     ExtractValue, Agg, InsertBefore) {
+  init(Idxs, NameStr);
+}
+ExtractValueInst::ExtractValueInst(Value *Agg,
+                                   ArrayRef<unsigned> Idxs,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd)
+  : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)),
+                     ExtractValue, Agg, InsertAtEnd) {
+  init(Idxs, NameStr);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+/// InsertValueInst - This instruction inserts a struct field of array element
+/// value into an aggregate value.
+///
+class InsertValueInst : public Instruction {
+  SmallVector<unsigned, 4> Indices;
+
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  InsertValueInst(const InsertValueInst &IVI);
+  void init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+            const Twine &NameStr);
+
+  /// Constructors - Create a insertvalue instruction with a base aggregate
+  /// value, a value to insert, and a list of indices.  The first ctor can
+  /// optionally insert before an existing instruction, the second appends
+  /// the new instruction to the specified BasicBlock.
+  inline InsertValueInst(Value *Agg, Value *Val,
+                         ArrayRef<unsigned> Idxs,
+                         const Twine &NameStr,
+                         Instruction *InsertBefore);
+  inline InsertValueInst(Value *Agg, Value *Val,
+                         ArrayRef<unsigned> Idxs,
+                         const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  /// Constructors - These two constructors are convenience methods because one
+  /// and two index insertvalue instructions are so common.
+  InsertValueInst(Value *Agg, Value *Val,
+                  unsigned Idx, const Twine &NameStr = "",
+                  Instruction *InsertBefore = 0);
+  InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
+                  const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InsertValueInst *clone_impl() const;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  static InsertValueInst *Create(Value *Agg, Value *Val,
+                                 ArrayRef<unsigned> Idxs,
+                                 const Twine &NameStr = "",
+                                 Instruction *InsertBefore = 0) {
+    return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertBefore);
+  }
+  static InsertValueInst *Create(Value *Agg, Value *Val,
+                                 ArrayRef<unsigned> Idxs,
+                                 const Twine &NameStr,
+                                 BasicBlock *InsertAtEnd) {
+    return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertAtEnd);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  typedef const unsigned* idx_iterator;
+  inline idx_iterator idx_begin() const { return Indices.begin(); }
+  inline idx_iterator idx_end()   const { return Indices.end(); }
+
+  Value *getAggregateOperand() {
+    return getOperand(0);
+  }
+  const Value *getAggregateOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getAggregateOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  Value *getInsertedValueOperand() {
+    return getOperand(1);
+  }
+  const Value *getInsertedValueOperand() const {
+    return getOperand(1);
+  }
+  static unsigned getInsertedValueOperandIndex() {
+    return 1U;                      // get index for modifying correct operand
+  }
+
+  ArrayRef<unsigned> getIndices() const {
+    return Indices;
+  }
+
+  unsigned getNumIndices() const {
+    return (unsigned)Indices.size();
+  }
+
+  bool hasIndices() const {
+    return true;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::InsertValue;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<InsertValueInst> :
+  public FixedNumOperandTraits<InsertValueInst, 2> {
+};
+
+InsertValueInst::InsertValueInst(Value *Agg,
+                                 Value *Val,
+                                 ArrayRef<unsigned> Idxs,
+                                 const Twine &NameStr,
+                                 Instruction *InsertBefore)
+  : Instruction(Agg->getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this),
+                2, InsertBefore) {
+  init(Agg, Val, Idxs, NameStr);
+}
+InsertValueInst::InsertValueInst(Value *Agg,
+                                 Value *Val,
+                                 ArrayRef<unsigned> Idxs,
+                                 const Twine &NameStr,
+                                 BasicBlock *InsertAtEnd)
+  : Instruction(Agg->getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this),
+                2, InsertAtEnd) {
+  init(Agg, Val, Idxs, NameStr);
+}
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               PHINode Class
+//===----------------------------------------------------------------------===//
+
+// PHINode - The PHINode class is used to represent the magical mystical PHI
+// node, that can not exist in nature, but can be synthesized in a computer
+// scientist's overactive imagination.
+//
+class PHINode : public Instruction {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// the number actually in use.
+  unsigned ReservedSpace;
+  PHINode(const PHINode &PN);
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  explicit PHINode(Type *Ty, unsigned NumReservedValues,
+                   const Twine &NameStr = "", Instruction *InsertBefore = 0)
+    : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
+      ReservedSpace(NumReservedValues) {
+    setName(NameStr);
+    OperandList = allocHungoffUses(ReservedSpace);
+  }
+
+  PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr,
+          BasicBlock *InsertAtEnd)
+    : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
+      ReservedSpace(NumReservedValues) {
+    setName(NameStr);
+    OperandList = allocHungoffUses(ReservedSpace);
+  }
+protected:
+  // allocHungoffUses - this is more complicated than the generic
+  // User::allocHungoffUses, because we have to allocate Uses for the incoming
+  // values and pointers to the incoming blocks, all in one allocation.
+  Use *allocHungoffUses(unsigned) const;
+
+  virtual PHINode *clone_impl() const;
+public:
+  /// Constructors - NumReservedValues is a hint for the number of incoming
+  /// edges that this phi node will have (use 0 if you really have no idea).
+  static PHINode *Create(Type *Ty, unsigned NumReservedValues,
+                         const Twine &NameStr = "",
+                         Instruction *InsertBefore = 0) {
+    return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore);
+  }
+  static PHINode *Create(Type *Ty, unsigned NumReservedValues,
+                         const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd);
+  }
+  ~PHINode();
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Block iterator interface. This provides access to the list of incoming
+  // basic blocks, which parallels the list of incoming values.
+
+  typedef BasicBlock **block_iterator;
+  typedef BasicBlock * const *const_block_iterator;
+
+  block_iterator block_begin() {
+    Use::UserRef *ref =
+      reinterpret_cast<Use::UserRef*>(op_begin() + ReservedSpace);
+    return reinterpret_cast<block_iterator>(ref + 1);
+  }
+
+  const_block_iterator block_begin() const {
+    const Use::UserRef *ref =
+      reinterpret_cast<const Use::UserRef*>(op_begin() + ReservedSpace);
+    return reinterpret_cast<const_block_iterator>(ref + 1);
+  }
+
+  block_iterator block_end() {
+    return block_begin() + getNumOperands();
+  }
+
+  const_block_iterator block_end() const {
+    return block_begin() + getNumOperands();
+  }
+
+  /// getNumIncomingValues - Return the number of incoming edges
+  ///
+  unsigned getNumIncomingValues() const { return getNumOperands(); }
+
+  /// getIncomingValue - Return incoming value number x
+  ///
+  Value *getIncomingValue(unsigned i) const {
+    return getOperand(i);
+  }
+  void setIncomingValue(unsigned i, Value *V) {
+    setOperand(i, V);
+  }
+  static unsigned getOperandNumForIncomingValue(unsigned i) {
+    return i;
+  }
+  static unsigned getIncomingValueNumForOperand(unsigned i) {
+    return i;
+  }
+
+  /// getIncomingBlock - Return incoming basic block number @p i.
+  ///
+  BasicBlock *getIncomingBlock(unsigned i) const {
+    return block_begin()[i];
+  }
+
+  /// getIncomingBlock - Return incoming basic block corresponding
+  /// to an operand of the PHI.
+  ///
+  BasicBlock *getIncomingBlock(const Use &U) const {
+    assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
+    return getIncomingBlock(unsigned(&U - op_begin()));
+  }
+
+  /// getIncomingBlock - Return incoming basic block corresponding
+  /// to value use iterator.
+  ///
+  template <typename U>
+  BasicBlock *getIncomingBlock(value_use_iterator<U> I) const {
+    return getIncomingBlock(I.getUse());
+  }
+
+  void setIncomingBlock(unsigned i, BasicBlock *BB) {
+    block_begin()[i] = BB;
+  }
+
+  /// addIncoming - Add an incoming value to the end of the PHI list
+  ///
+  void addIncoming(Value *V, BasicBlock *BB) {
+    assert(V && "PHI node got a null value!");
+    assert(BB && "PHI node got a null basic block!");
+    assert(getType() == V->getType() &&
+           "All operands to PHI node must be the same type as the PHI node!");
+    if (NumOperands == ReservedSpace)
+      growOperands();  // Get more space!
+    // Initialize some new operands.
+    ++NumOperands;
+    setIncomingValue(NumOperands - 1, V);
+    setIncomingBlock(NumOperands - 1, BB);
+  }
+
+  /// removeIncomingValue - Remove an incoming value.  This is useful if a
+  /// predecessor basic block is deleted.  The value removed is returned.
+  ///
+  /// If the last incoming value for a PHI node is removed (and DeletePHIIfEmpty
+  /// is true), the PHI node is destroyed and any uses of it are replaced with
+  /// dummy values.  The only time there should be zero incoming values to a PHI
+  /// node is when the block is dead, so this strategy is sound.
+  ///
+  Value *removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty = true);
+
+  Value *removeIncomingValue(const BasicBlock *BB, bool DeletePHIIfEmpty=true) {
+    int Idx = getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "Invalid basic block argument to remove!");
+    return removeIncomingValue(Idx, DeletePHIIfEmpty);
+  }
+
+  /// getBasicBlockIndex - Return the first index of the specified basic
+  /// block in the value list for this PHI.  Returns -1 if no instance.
+  ///
+  int getBasicBlockIndex(const BasicBlock *BB) const {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (block_begin()[i] == BB)
+        return i;
+    return -1;
+  }
+
+  Value *getIncomingValueForBlock(const BasicBlock *BB) const {
+    int Idx = getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "Invalid basic block argument!");
+    return getIncomingValue(Idx);
+  }
+
+  /// hasConstantValue - If the specified PHI node always merges together the
+  /// same value, return the value, otherwise return null.
+  Value *hasConstantValue() const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::PHI;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+ private:
+  void growOperands();
+};
+
+template <>
+struct OperandTraits<PHINode> : public HungoffOperandTraits<2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)
+
+//===----------------------------------------------------------------------===//
+//                           LandingPadInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// LandingPadInst - The landingpad instruction holds all of the information
+/// necessary to generate correct exception handling. The landingpad instruction
+/// cannot be moved from the top of a landing pad block, which itself is
+/// accessible only from the 'unwind' edge of an invoke. This uses the
+/// SubclassData field in Value to store whether or not the landingpad is a
+/// cleanup.
+///
+class LandingPadInst : public Instruction {
+  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// the number actually in use.
+  unsigned ReservedSpace;
+  LandingPadInst(const LandingPadInst &LP);
+public:
+  enum ClauseType { Catch, Filter };
+private:
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  // Allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  void growOperands(unsigned Size);
+  void init(Value *PersFn, unsigned NumReservedValues, const Twine &NameStr);
+
+  explicit LandingPadInst(Type *RetTy, Value *PersonalityFn,
+                          unsigned NumReservedValues, const Twine &NameStr,
+                          Instruction *InsertBefore);
+  explicit LandingPadInst(Type *RetTy, Value *PersonalityFn,
+                          unsigned NumReservedValues, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd);
+protected:
+  virtual LandingPadInst *clone_impl() const;
+public:
+  /// Constructors - NumReservedClauses is a hint for the number of incoming
+  /// clauses that this landingpad will have (use 0 if you really have no idea).
+  static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
+                                unsigned NumReservedClauses,
+                                const Twine &NameStr = "",
+                                Instruction *InsertBefore = 0);
+  static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
+                                unsigned NumReservedClauses,
+                                const Twine &NameStr, BasicBlock *InsertAtEnd);
+  ~LandingPadInst();
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getPersonalityFn - Get the personality function associated with this
+  /// landing pad.
+  Value *getPersonalityFn() const { return getOperand(0); }
+
+  /// isCleanup - Return 'true' if this landingpad instruction is a
+  /// cleanup. I.e., it should be run when unwinding even if its landing pad
+  /// doesn't catch the exception.
+  bool isCleanup() const { return getSubclassDataFromInstruction() & 1; }
+
+  /// setCleanup - Indicate that this landingpad instruction is a cleanup.
+  void setCleanup(bool V) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               (V ? 1 : 0));
+  }
+
+  /// addClause - Add a catch or filter clause to the landing pad.
+  void addClause(Value *ClauseVal);
+
+  /// getClause - Get the value of the clause at index Idx. Use isCatch/isFilter
+  /// to determine what type of clause this is.
+  Value *getClause(unsigned Idx) const { return OperandList[Idx + 1]; }
+
+  /// isCatch - Return 'true' if the clause and index Idx is a catch clause.
+  bool isCatch(unsigned Idx) const {
+    return !isa<ArrayType>(OperandList[Idx + 1]->getType());
+  }
+
+  /// isFilter - Return 'true' if the clause and index Idx is a filter clause.
+  bool isFilter(unsigned Idx) const {
+    return isa<ArrayType>(OperandList[Idx + 1]->getType());
+  }
+
+  /// getNumClauses - Get the number of clauses for this landing pad.
+  unsigned getNumClauses() const { return getNumOperands() - 1; }
+
+  /// reserveClauses - Grow the size of the operand list to accommodate the new
+  /// number of clauses.
+  void reserveClauses(unsigned Size) { growOperands(Size); }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::LandingPad;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<LandingPadInst> : public HungoffOperandTraits<2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               ReturnInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// ReturnInst - Return a value (possibly void), from a function.  Execution
+/// does not continue in this function any longer.
+///
+class ReturnInst : public TerminatorInst {
+  ReturnInst(const ReturnInst &RI);
+
+private:
+  // ReturnInst constructors:
+  // ReturnInst()                  - 'ret void' instruction
+  // ReturnInst(    null)          - 'ret void' instruction
+  // ReturnInst(Value* X)          - 'ret X'    instruction
+  // ReturnInst(    null, Inst *I) - 'ret void' instruction, insert before I
+  // ReturnInst(Value* X, Inst *I) - 'ret X'    instruction, insert before I
+  // ReturnInst(    null, BB *B)   - 'ret void' instruction, insert @ end of B
+  // ReturnInst(Value* X, BB *B)   - 'ret X'    instruction, insert @ end of B
+  //
+  // NOTE: If the Value* passed is of type void then the constructor behaves as
+  // if it was passed NULL.
+  explicit ReturnInst(LLVMContext &C, Value *retVal = 0,
+                      Instruction *InsertBefore = 0);
+  ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
+  explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+protected:
+  virtual ReturnInst *clone_impl() const;
+public:
+  static ReturnInst* Create(LLVMContext &C, Value *retVal = 0,
+                            Instruction *InsertBefore = 0) {
+    return new(!!retVal) ReturnInst(C, retVal, InsertBefore);
+  }
+  static ReturnInst* Create(LLVMContext &C, Value *retVal,
+                            BasicBlock *InsertAtEnd) {
+    return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd);
+  }
+  static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) {
+    return new(0) ReturnInst(C, InsertAtEnd);
+  }
+  virtual ~ReturnInst();
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Convenience accessor. Returns null if there is no return value.
+  Value *getReturnValue() const {
+    return getNumOperands() != 0 ? getOperand(0) : 0;
+  }
+
+  unsigned getNumSuccessors() const { return 0; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Ret);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+ private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<ReturnInst> : public VariadicOperandTraits<ReturnInst> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               BranchInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// BranchInst - Conditional or Unconditional Branch instruction.
+///
+class BranchInst : public TerminatorInst {
+  /// Ops list - Branches are strange.  The operands are ordered:
+  ///  [Cond, FalseDest,] TrueDest.  This makes some accessors faster because
+  /// they don't have to check for cond/uncond branchness. These are mostly
+  /// accessed relative from op_end().
+  BranchInst(const BranchInst &BI);
+  void AssertOK();
+  // BranchInst constructors (where {B, T, F} are blocks, and C is a condition):
+  // BranchInst(BB *B)                           - 'br B'
+  // BranchInst(BB* T, BB *F, Value *C)          - 'br C, T, F'
+  // BranchInst(BB* B, Inst *I)                  - 'br B'        insert before I
+  // BranchInst(BB* T, BB *F, Value *C, Inst *I) - 'br C, T, F', insert before I
+  // BranchInst(BB* B, BB *I)                    - 'br B'        insert at end
+  // BranchInst(BB* T, BB *F, Value *C, BB *I)   - 'br C, T, F', insert at end
+  explicit BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore = 0);
+  BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+             Instruction *InsertBefore = 0);
+  BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd);
+  BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+             BasicBlock *InsertAtEnd);
+protected:
+  virtual BranchInst *clone_impl() const;
+public:
+  static BranchInst *Create(BasicBlock *IfTrue, Instruction *InsertBefore = 0) {
+    return new(1) BranchInst(IfTrue, InsertBefore);
+  }
+  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
+                            Value *Cond, Instruction *InsertBefore = 0) {
+    return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore);
+  }
+  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) {
+    return new(1) BranchInst(IfTrue, InsertAtEnd);
+  }
+  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
+                            Value *Cond, BasicBlock *InsertAtEnd) {
+    return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertAtEnd);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  bool isUnconditional() const { return getNumOperands() == 1; }
+  bool isConditional()   const { return getNumOperands() == 3; }
+
+  Value *getCondition() const {
+    assert(isConditional() && "Cannot get condition of an uncond branch!");
+    return Op<-3>();
+  }
+
+  void setCondition(Value *V) {
+    assert(isConditional() && "Cannot set condition of unconditional branch!");
+    Op<-3>() = V;
+  }
+
+  unsigned getNumSuccessors() const { return 1+isConditional(); }
+
+  BasicBlock *getSuccessor(unsigned i) const {
+    assert(i < getNumSuccessors() && "Successor # out of range for Branch!");
+    return cast_or_null<BasicBlock>((&Op<-1>() - i)->get());
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < getNumSuccessors() && "Successor # out of range for Branch!");
+    *(&Op<-1>() - idx) = (Value*)NewSucc;
+  }
+
+  /// \brief Swap the successors of this branch instruction.
+  ///
+  /// Swaps the successors of the branch instruction. This also swaps any
+  /// branch weight metadata associated with the instruction so that it
+  /// continues to map correctly to each operand.
+  void swapSuccessors();
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Br);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               SwitchInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// SwitchInst - Multiway switch
+///
+class SwitchInst : public TerminatorInst {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  unsigned ReservedSpace;
+  // Operands format:
+  // Operand[0]    = Value to switch on
+  // Operand[1]    = Default basic block destination
+  // Operand[2n  ] = Value to match
+  // Operand[2n+1] = BasicBlock to go to on match
+
+  // Store case values separately from operands list. We needn't User-Use
+  // concept here, since it is just a case value, it will always constant,
+  // and case value couldn't reused with another instructions/values.
+  // Additionally:
+  // It allows us to use custom type for case values that is not inherited
+  // from Value. Since case value is a complex type that implements
+  // the subset of integers, we needn't extract sub-constants within
+  // slow getAggregateElement method.
+  // For case values we will use std::list to by two reasons:
+  // 1. It allows to add/remove cases without whole collection reallocation.
+  // 2. In most of cases we needn't random access.
+  // Currently case values are also stored in Operands List, but it will moved
+  // out in future commits.
+  typedef std::list<IntegersSubset> Subsets;
+  typedef Subsets::iterator SubsetsIt;
+  typedef Subsets::const_iterator SubsetsConstIt;
+
+  Subsets TheSubsets;
+
+  SwitchInst(const SwitchInst &SI);
+  void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
+  void growOperands();
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  /// SwitchInst ctor - Create a new switch instruction, specifying a value to
+  /// switch on and a default destination.  The number of additional cases can
+  /// be specified here to make memory allocation more efficient.  This
+  /// constructor can also autoinsert before another instruction.
+  SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+             Instruction *InsertBefore);
+
+  /// SwitchInst ctor - Create a new switch instruction, specifying a value to
+  /// switch on and a default destination.  The number of additional cases can
+  /// be specified here to make memory allocation more efficient.  This
+  /// constructor also autoinserts at the end of the specified BasicBlock.
+  SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+             BasicBlock *InsertAtEnd);
+protected:
+  virtual SwitchInst *clone_impl() const;
+public:
+
+  // FIXME: Currently there are a lot of unclean template parameters,
+  // we need to make refactoring in future.
+  // All these parameters are used to implement both iterator and const_iterator
+  // without code duplication.
+  // SwitchInstTy may be "const SwitchInst" or "SwitchInst"
+  // ConstantIntTy may be "const ConstantInt" or "ConstantInt"
+  // SubsetsItTy may be SubsetsConstIt or SubsetsIt
+  // BasicBlockTy may be "const BasicBlock" or "BasicBlock"
+  template <class SwitchInstTy, class ConstantIntTy,
+            class SubsetsItTy, class BasicBlockTy>
+    class CaseIteratorT;
+
+  typedef CaseIteratorT<const SwitchInst, const ConstantInt,
+                        SubsetsConstIt, const BasicBlock> ConstCaseIt;
+  class CaseIt;
+
+  // -2
+  static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
+
+  static SwitchInst *Create(Value *Value, BasicBlock *Default,
+                            unsigned NumCases, Instruction *InsertBefore = 0) {
+    return new SwitchInst(Value, Default, NumCases, InsertBefore);
+  }
+  static SwitchInst *Create(Value *Value, BasicBlock *Default,
+                            unsigned NumCases, BasicBlock *InsertAtEnd) {
+    return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
+  }
+
+  ~SwitchInst();
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Accessor Methods for Switch stmt
+  Value *getCondition() const { return getOperand(0); }
+  void setCondition(Value *V) { setOperand(0, V); }
+
+  BasicBlock *getDefaultDest() const {
+    return cast<BasicBlock>(getOperand(1));
+  }
+
+  void setDefaultDest(BasicBlock *DefaultCase) {
+    setOperand(1, reinterpret_cast<Value*>(DefaultCase));
+  }
+
+  /// getNumCases - return the number of 'cases' in this switch instruction,
+  /// except the default case
+  unsigned getNumCases() const {
+    return getNumOperands()/2 - 1;
+  }
+
+  /// Returns a read/write iterator that points to the first
+  /// case in SwitchInst.
+  CaseIt case_begin() {
+    return CaseIt(this, 0, TheSubsets.begin());
+  }
+  /// Returns a read-only iterator that points to the first
+  /// case in the SwitchInst.
+  ConstCaseIt case_begin() const {
+    return ConstCaseIt(this, 0, TheSubsets.begin());
+  }
+
+  /// Returns a read/write iterator that points one past the last
+  /// in the SwitchInst.
+  CaseIt case_end() {
+    return CaseIt(this, getNumCases(), TheSubsets.end());
+  }
+  /// Returns a read-only iterator that points one past the last
+  /// in the SwitchInst.
+  ConstCaseIt case_end() const {
+    return ConstCaseIt(this, getNumCases(), TheSubsets.end());
+  }
+  /// Returns an iterator that points to the default case.
+  /// Note: this iterator allows to resolve successor only. Attempt
+  /// to resolve case value causes an assertion.
+  /// Also note, that increment and decrement also causes an assertion and
+  /// makes iterator invalid.
+  CaseIt case_default() {
+    return CaseIt(this, DefaultPseudoIndex, TheSubsets.end());
+  }
+  ConstCaseIt case_default() const {
+    return ConstCaseIt(this, DefaultPseudoIndex, TheSubsets.end());
+  }
+
+  /// findCaseValue - Search all of the case values for the specified constant.
+  /// If it is explicitly handled, return the case iterator of it, otherwise
+  /// return default case iterator to indicate
+  /// that it is handled by the default handler.
+  CaseIt findCaseValue(const ConstantInt *C) {
+    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i)
+      if (i.getCaseValueEx().isSatisfies(IntItem::fromConstantInt(C)))
+        return i;
+    return case_default();
+  }
+  ConstCaseIt findCaseValue(const ConstantInt *C) const {
+    for (ConstCaseIt i = case_begin(), e = case_end(); i != e; ++i)
+      if (i.getCaseValueEx().isSatisfies(IntItem::fromConstantInt(C)))
+        return i;
+    return case_default();
+  }
+
+  /// findCaseDest - Finds the unique case value for a given successor. Returns
+  /// null if the successor is not found, not unique, or is the default case.
+  ConstantInt *findCaseDest(BasicBlock *BB) {
+    if (BB == getDefaultDest()) return NULL;
+
+    ConstantInt *CI = NULL;
+    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i) {
+      if (i.getCaseSuccessor() == BB) {
+        if (CI) return NULL;   // Multiple cases lead to BB.
+        else CI = i.getCaseValue();
+      }
+    }
+    return CI;
+  }
+
+  /// addCase - Add an entry to the switch instruction...
+  /// @deprecated
+  /// Note:
+  /// This action invalidates case_end(). Old case_end() iterator will
+  /// point to the added case.
+  void addCase(ConstantInt *OnVal, BasicBlock *Dest);
+
+  /// addCase - Add an entry to the switch instruction.
+  /// Note:
+  /// This action invalidates case_end(). Old case_end() iterator will
+  /// point to the added case.
+  void addCase(IntegersSubset& OnVal, BasicBlock *Dest);
+
+  /// removeCase - This method removes the specified case and its successor
+  /// from the switch instruction. Note that this operation may reorder the
+  /// remaining cases at index idx and above.
+  /// Note:
+  /// This action invalidates iterators for all cases following the one removed,
+  /// including the case_end() iterator.
+  void removeCase(CaseIt& i);
+
+  unsigned getNumSuccessors() const { return getNumOperands()/2; }
+  BasicBlock *getSuccessor(unsigned idx) const {
+    assert(idx < getNumSuccessors() &&"Successor idx out of range for switch!");
+    return cast<BasicBlock>(getOperand(idx*2+1));
+  }
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < getNumSuccessors() && "Successor # out of range for switch!");
+    setOperand(idx*2+1, (Value*)NewSucc);
+  }
+
+  uint16_t hash() const {
+    uint32_t NumberOfCases = (uint32_t)getNumCases();
+    uint16_t Hash = (0xFFFF & NumberOfCases) ^ (NumberOfCases >> 16);
+    for (ConstCaseIt i = case_begin(), e = case_end();
+         i != e; ++i) {
+      uint32_t NumItems = (uint32_t)i.getCaseValueEx().getNumItems();
+      Hash = (Hash << 1) ^ (0xFFFF & NumItems) ^ (NumItems >> 16);
+    }
+    return Hash;
+  }
+
+  // Case iterators definition.
+
+  template <class SwitchInstTy, class ConstantIntTy,
+            class SubsetsItTy, class BasicBlockTy>
+  class CaseIteratorT {
+  protected:
+
+    SwitchInstTy *SI;
+    unsigned Index;
+    SubsetsItTy SubsetIt;
+
+    /// Initializes case iterator for given SwitchInst and for given
+    /// case number.
+    friend class SwitchInst;
+    CaseIteratorT(SwitchInstTy *SI, unsigned SuccessorIndex,
+                  SubsetsItTy CaseValueIt) {
+      this->SI = SI;
+      Index = SuccessorIndex;
+      this->SubsetIt = CaseValueIt;
+    }
+
+  public:
+    typedef typename SubsetsItTy::reference IntegersSubsetRef;
+    typedef CaseIteratorT<SwitchInstTy, ConstantIntTy,
+                          SubsetsItTy, BasicBlockTy> Self;
+
+    CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) {
+          this->SI = SI;
+          Index = CaseNum;
+          SubsetIt = SI->TheSubsets.begin();
+          std::advance(SubsetIt, CaseNum);
+        }
+
+
+    /// Initializes case iterator for given SwitchInst and for given
+    /// TerminatorInst's successor index.
+    static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) {
+      assert(SuccessorIndex < SI->getNumSuccessors() &&
+             "Successor index # out of range!");
+      return SuccessorIndex != 0 ?
+             Self(SI, SuccessorIndex - 1) :
+             Self(SI, DefaultPseudoIndex);
+    }
+
+    /// Resolves case value for current case.
+    /// @deprecated
+    ConstantIntTy *getCaseValue() {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      IntegersSubsetRef CaseRanges = *SubsetIt;
+
+      // FIXME: Currently we work with ConstantInt based cases.
+      // So return CaseValue as ConstantInt.
+      return CaseRanges.getSingleNumber(0).toConstantInt();
+    }
+
+    /// Resolves case value for current case.
+    IntegersSubsetRef getCaseValueEx() {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      return *SubsetIt;
+    }
+
+    /// Resolves successor for current case.
+    BasicBlockTy *getCaseSuccessor() {
+      assert((Index < SI->getNumCases() ||
+              Index == DefaultPseudoIndex) &&
+             "Index out the number of cases.");
+      return SI->getSuccessor(getSuccessorIndex());
+    }
+
+    /// Returns number of current case.
+    unsigned getCaseIndex() const { return Index; }
+
+    /// Returns TerminatorInst's successor index for current case successor.
+    unsigned getSuccessorIndex() const {
+      assert((Index == DefaultPseudoIndex || Index < SI->getNumCases()) &&
+             "Index out the number of cases.");
+      return Index != DefaultPseudoIndex ? Index + 1 : 0;
+    }
+
+    Self operator++() {
+      // Check index correctness after increment.
+      // Note: Index == getNumCases() means end().
+      assert(Index+1 <= SI->getNumCases() && "Index out the number of cases.");
+      ++Index;
+      if (Index == 0)
+        SubsetIt = SI->TheSubsets.begin();
+      else
+        ++SubsetIt;
+      return *this;
+    }
+    Self operator++(int) {
+      Self tmp = *this;
+      ++(*this);
+      return tmp;
+    }
+    Self operator--() {
+      // Check index correctness after decrement.
+      // Note: Index == getNumCases() means end().
+      // Also allow "-1" iterator here. That will became valid after ++.
+      unsigned NumCases = SI->getNumCases();
+      assert((Index == 0 || Index-1 <= NumCases) &&
+             "Index out the number of cases.");
+      --Index;
+      if (Index == NumCases) {
+        SubsetIt = SI->TheSubsets.end();
+        return *this;
+      }
+
+      if (Index != -1U)
+        --SubsetIt;
+
+      return *this;
+    }
+    Self operator--(int) {
+      Self tmp = *this;
+      --(*this);
+      return tmp;
+    }
+    bool operator==(const Self& RHS) const {
+      assert(RHS.SI == SI && "Incompatible operators.");
+      return RHS.Index == Index;
+    }
+    bool operator!=(const Self& RHS) const {
+      assert(RHS.SI == SI && "Incompatible operators.");
+      return RHS.Index != Index;
+    }
+  };
+
+  class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt,
+                                      SubsetsIt, BasicBlock> {
+    typedef CaseIteratorT<SwitchInst, ConstantInt, SubsetsIt, BasicBlock>
+      ParentTy;
+
+  protected:
+    friend class SwitchInst;
+    CaseIt(SwitchInst *SI, unsigned CaseNum, SubsetsIt SubsetIt) :
+      ParentTy(SI, CaseNum, SubsetIt) {}
+
+    void updateCaseValueOperand(IntegersSubset& V) {
+      SI->setOperand(2 + Index*2, reinterpret_cast<Value*>((Constant*)V));
+    }
+
+  public:
+
+    CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {}
+
+    CaseIt(const ParentTy& Src) : ParentTy(Src) {}
+
+    /// Sets the new value for current case.
+    /// @deprecated.
+    void setValue(ConstantInt *V) {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      IntegersSubsetToBB Mapping;
+      // FIXME: Currently we work with ConstantInt based cases.
+      // So inititalize IntItem container directly from ConstantInt.
+      Mapping.add(IntItem::fromConstantInt(V));
+      *SubsetIt = Mapping.getCase();
+      updateCaseValueOperand(*SubsetIt);
+    }
+
+    /// Sets the new value for current case.
+    void setValueEx(IntegersSubset& V) {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      *SubsetIt = V;
+      updateCaseValueOperand(*SubsetIt);
+    }
+
+    /// Sets the new successor for current case.
+    void setSuccessor(BasicBlock *S) {
+      SI->setSuccessor(getSuccessorIndex(), S);
+    }
+  };
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Switch;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                             IndirectBrInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// IndirectBrInst - Indirect Branch Instruction.
+///
+class IndirectBrInst : public TerminatorInst {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  unsigned ReservedSpace;
+  // Operand[0]    = Value to switch on
+  // Operand[1]    = Default basic block destination
+  // Operand[2n  ] = Value to match
+  // Operand[2n+1] = BasicBlock to go to on match
+  IndirectBrInst(const IndirectBrInst &IBI);
+  void init(Value *Address, unsigned NumDests);
+  void growOperands();
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Address to jump to.  The number of expected destinations can be specified
+  /// here to make memory allocation more efficient.  This constructor can also
+  /// autoinsert before another instruction.
+  IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore);
+
+  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Address to jump to.  The number of expected destinations can be specified
+  /// here to make memory allocation more efficient.  This constructor also
+  /// autoinserts at the end of the specified BasicBlock.
+  IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
+protected:
+  virtual IndirectBrInst *clone_impl() const;
+public:
+  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
+                                Instruction *InsertBefore = 0) {
+    return new IndirectBrInst(Address, NumDests, InsertBefore);
+  }
+  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
+                                BasicBlock *InsertAtEnd) {
+    return new IndirectBrInst(Address, NumDests, InsertAtEnd);
+  }
+  ~IndirectBrInst();
+
+  /// Provide fast operand accessors.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Accessor Methods for IndirectBrInst instruction.
+  Value *getAddress() { return getOperand(0); }
+  const Value *getAddress() const { return getOperand(0); }
+  void setAddress(Value *V) { setOperand(0, V); }
+
+
+  /// getNumDestinations - return the number of possible destinations in this
+  /// indirectbr instruction.
+  unsigned getNumDestinations() const { return getNumOperands()-1; }
+
+  /// getDestination - Return the specified destination.
+  BasicBlock *getDestination(unsigned i) { return getSuccessor(i); }
+  const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); }
+
+  /// addDestination - Add a destination.
+  ///
+  void addDestination(BasicBlock *Dest);
+
+  /// removeDestination - This method removes the specified successor from the
+  /// indirectbr instruction.
+  void removeDestination(unsigned i);
+
+  unsigned getNumSuccessors() const { return getNumOperands()-1; }
+  BasicBlock *getSuccessor(unsigned i) const {
+    return cast<BasicBlock>(getOperand(i+1));
+  }
+  void setSuccessor(unsigned i, BasicBlock *NewSucc) {
+    setOperand(i+1, (Value*)NewSucc);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::IndirectBr;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                               InvokeInst Class
+//===----------------------------------------------------------------------===//
+
+/// InvokeInst - Invoke instruction.  The SubclassData field is used to hold the
+/// calling convention of the call.
+///
+class InvokeInst : public TerminatorInst {
+  AttributeSet AttributeList;
+  InvokeInst(const InvokeInst &BI);
+  void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+            ArrayRef<Value *> Args, const Twine &NameStr);
+
+  /// Construct an InvokeInst given a range of arguments.
+  ///
+  /// \brief Construct an InvokeInst from a range of arguments
+  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+                    ArrayRef<Value *> Args, unsigned Values,
+                    const Twine &NameStr, Instruction *InsertBefore);
+
+  /// Construct an InvokeInst given a range of arguments.
+  ///
+  /// \brief Construct an InvokeInst from a range of arguments
+  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+                    ArrayRef<Value *> Args, unsigned Values,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InvokeInst *clone_impl() const;
+public:
+  static InvokeInst *Create(Value *Func,
+                            BasicBlock *IfNormal, BasicBlock *IfException,
+                            ArrayRef<Value *> Args, const Twine &NameStr = "",
+                            Instruction *InsertBefore = 0) {
+    unsigned Values = unsigned(Args.size()) + 3;
+    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
+                                  Values, NameStr, InsertBefore);
+  }
+  static InvokeInst *Create(Value *Func,
+                            BasicBlock *IfNormal, BasicBlock *IfException,
+                            ArrayRef<Value *> Args, const Twine &NameStr,
+                            BasicBlock *InsertAtEnd) {
+    unsigned Values = unsigned(Args.size()) + 3;
+    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
+                                  Values, NameStr, InsertAtEnd);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumArgOperands - Return the number of invoke arguments.
+  ///
+  unsigned getNumArgOperands() const { return getNumOperands() - 3; }
+
+  /// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
+  ///
+  Value *getArgOperand(unsigned i) const { return getOperand(i); }
+  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+  /// getCallingConv/setCallingConv - Get or set the calling convention of this
+  /// function call.
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction());
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    setInstructionSubclassData(static_cast<unsigned>(CC));
+  }
+
+  /// getAttributes - Return the parameter attributes for this invoke.
+  ///
+  const AttributeSet &getAttributes() const { return AttributeList; }
+
+  /// setAttributes - Set the parameter attributes for this invoke.
+  ///
+  void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; }
+
+  /// addAttribute - adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attribute::AttrKind attr);
+
+  /// removeAttribute - removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, Attribute attr);
+
+  /// \brief Determine whether this call has the NoAlias attribute.
+  bool hasFnAttr(Attribute::AttrKind A) const;
+
+  /// \brief Determine whether the call or the callee has the given attributes.
+  bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
+
+  /// \brief Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned i) const {
+    return AttributeList.getParamAlignment(i);
+  }
+
+  /// \brief Return true if the call should not be inlined.
+  bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
+  void setIsNoInline() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline);
+  }
+
+  /// \brief Determine if the call does not access memory.
+  bool doesNotAccessMemory() const {
+    return hasFnAttr(Attribute::ReadNone);
+  }
+  void setDoesNotAccessMemory() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+  }
+
+  /// \brief Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
+  }
+
+  /// \brief Determine if the call cannot return.
+  bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
+  void setDoesNotReturn() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
+  }
+
+  /// \brief Determine if the call cannot unwind.
+  bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
+  void setDoesNotThrow() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
+  }
+
+  /// \brief Determine if the call returns a structure through first
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    // Be friendly and also check the callee.
+    return paramHasAttr(1, Attribute::StructRet);
+  }
+
+  /// \brief Determine if any call argument is an aggregate passed by value.
+  bool hasByValArgument() const {
+    return AttributeList.hasAttrSomewhere(Attribute::ByVal);
+  }
+
+  /// getCalledFunction - Return the function called, or null if this is an
+  /// indirect function invocation.
+  ///
+  Function *getCalledFunction() const {
+    return dyn_cast<Function>(Op<-3>());
+  }
+
+  /// getCalledValue - Get a pointer to the function that is invoked by this
+  /// instruction
+  const Value *getCalledValue() const { return Op<-3>(); }
+        Value *getCalledValue()       { return Op<-3>(); }
+
+  /// setCalledFunction - Set the function called.
+  void setCalledFunction(Value* Fn) {
+    Op<-3>() = Fn;
+  }
+
+  // get*Dest - Return the destination basic blocks...
+  BasicBlock *getNormalDest() const {
+    return cast<BasicBlock>(Op<-2>());
+  }
+  BasicBlock *getUnwindDest() const {
+    return cast<BasicBlock>(Op<-1>());
+  }
+  void setNormalDest(BasicBlock *B) {
+    Op<-2>() = reinterpret_cast<Value*>(B);
+  }
+  void setUnwindDest(BasicBlock *B) {
+    Op<-1>() = reinterpret_cast<Value*>(B);
+  }
+
+  /// getLandingPadInst - Get the landingpad instruction from the landing pad
+  /// block (the unwind destination).
+  LandingPadInst *getLandingPadInst() const;
+
+  BasicBlock *getSuccessor(unsigned i) const {
+    assert(i < 2 && "Successor # out of range for invoke!");
+    return i == 0 ? getNormalDest() : getUnwindDest();
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < 2 && "Successor # out of range for invoke!");
+    *(&Op<-2>() + idx) = reinterpret_cast<Value*>(NewSucc);
+  }
+
+  unsigned getNumSuccessors() const { return 2; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Invoke);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
+};
+
+InvokeInst::InvokeInst(Value *Func,
+                       BasicBlock *IfNormal, BasicBlock *IfException,
+                       ArrayRef<Value *> Args, unsigned Values,
+                       const Twine &NameStr, Instruction *InsertBefore)
+  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                      ->getElementType())->getReturnType(),
+                   Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this) - Values,
+                   Values, InsertBefore) {
+  init(Func, IfNormal, IfException, Args, NameStr);
+}
+InvokeInst::InvokeInst(Value *Func,
+                       BasicBlock *IfNormal, BasicBlock *IfException,
+                       ArrayRef<Value *> Args, unsigned Values,
+                       const Twine &NameStr, BasicBlock *InsertAtEnd)
+  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                      ->getElementType())->getReturnType(),
+                   Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this) - Values,
+                   Values, InsertAtEnd) {
+  init(Func, IfNormal, IfException, Args, NameStr);
+}
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                              ResumeInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// ResumeInst - Resume the propagation of an exception.
+///
+class ResumeInst : public TerminatorInst {
+  ResumeInst(const ResumeInst &RI);
+
+  explicit ResumeInst(Value *Exn, Instruction *InsertBefore=0);
+  ResumeInst(Value *Exn, BasicBlock *InsertAtEnd);
+protected:
+  virtual ResumeInst *clone_impl() const;
+public:
+  static ResumeInst *Create(Value *Exn, Instruction *InsertBefore = 0) {
+    return new(1) ResumeInst(Exn, InsertBefore);
+  }
+  static ResumeInst *Create(Value *Exn, BasicBlock *InsertAtEnd) {
+    return new(1) ResumeInst(Exn, InsertAtEnd);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Convenience accessor.
+  Value *getValue() const { return Op<0>(); }
+
+  unsigned getNumSuccessors() const { return 0; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Resume;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<ResumeInst> :
+    public FixedNumOperandTraits<ResumeInst, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                           UnreachableInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// UnreachableInst - This function has undefined behavior.  In particular, the
+/// presence of this instruction indicates some higher level knowledge that the
+/// end of the block cannot be reached.
+///
+class UnreachableInst : public TerminatorInst {
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+protected:
+  virtual UnreachableInst *clone_impl() const;
+
+public:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = 0);
+  explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
+  unsigned getNumSuccessors() const { return 0; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Unreachable;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+//===----------------------------------------------------------------------===//
+//                                 TruncInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a truncation of integer types.
+class TruncInst : public CastInst {
+protected:
+  /// \brief Clone an identical TruncInst
+  virtual TruncInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  TruncInst(
+    Value *S,                     ///< The value to be truncated
+    Type *Ty,               ///< The (smaller) type to truncate to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  TruncInst(
+    Value *S,                     ///< The value to be truncated
+    Type *Ty,               ///< The (smaller) type to truncate to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Trunc;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 ZExtInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents zero extension of integer types.
+class ZExtInst : public CastInst {
+protected:
+  /// \brief Clone an identical ZExtInst
+  virtual ZExtInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  ZExtInst(
+    Value *S,                     ///< The value to be zero extended
+    Type *Ty,               ///< The type to zero extend to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end semantics.
+  ZExtInst(
+    Value *S,                     ///< The value to be zero extended
+    Type *Ty,               ///< The type to zero extend to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == ZExt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 SExtInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a sign extension of integer types.
+class SExtInst : public CastInst {
+protected:
+  /// \brief Clone an identical SExtInst
+  virtual SExtInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  SExtInst(
+    Value *S,                     ///< The value to be sign extended
+    Type *Ty,               ///< The type to sign extend to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  SExtInst(
+    Value *S,                     ///< The value to be sign extended
+    Type *Ty,               ///< The type to sign extend to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == SExt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPTruncInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a truncation of floating point types.
+class FPTruncInst : public CastInst {
+protected:
+  /// \brief Clone an identical FPTruncInst
+  virtual FPTruncInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  FPTruncInst(
+    Value *S,                     ///< The value to be truncated
+    Type *Ty,               ///< The type to truncate to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-before-instruction semantics
+  FPTruncInst(
+    Value *S,                     ///< The value to be truncated
+    Type *Ty,               ///< The type to truncate to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPTrunc;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPExtInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents an extension of floating point types.
+class FPExtInst : public CastInst {
+protected:
+  /// \brief Clone an identical FPExtInst
+  virtual FPExtInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  FPExtInst(
+    Value *S,                     ///< The value to be extended
+    Type *Ty,               ///< The type to extend to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  FPExtInst(
+    Value *S,                     ///< The value to be extended
+    Type *Ty,               ///< The type to extend to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPExt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 UIToFPInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a cast unsigned integer to floating point.
+class UIToFPInst : public CastInst {
+protected:
+  /// \brief Clone an identical UIToFPInst
+  virtual UIToFPInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  UIToFPInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  UIToFPInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == UIToFP;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 SIToFPInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a cast from signed integer to floating point.
+class SIToFPInst : public CastInst {
+protected:
+  /// \brief Clone an identical SIToFPInst
+  virtual SIToFPInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  SIToFPInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  SIToFPInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == SIToFP;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPToUIInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a cast from floating point to unsigned integer
+class FPToUIInst  : public CastInst {
+protected:
+  /// \brief Clone an identical FPToUIInst
+  virtual FPToUIInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  FPToUIInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  FPToUIInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< Where to insert the new instruction
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPToUI;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPToSIInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a cast from floating point to signed integer.
+class FPToSIInst  : public CastInst {
+protected:
+  /// \brief Clone an identical FPToSIInst
+  virtual FPToSIInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  FPToSIInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  FPToSIInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPToSI;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 IntToPtrInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a cast from an integer to a pointer.
+class IntToPtrInst : public CastInst {
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  IntToPtrInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  IntToPtrInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Clone an identical IntToPtrInst
+  virtual IntToPtrInst *clone_impl() const;
+
+  /// \brief Returns the address space of this instruction's pointer type.
+  unsigned getAddressSpace() const {
+    return getType()->getPointerAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == IntToPtr;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 PtrToIntInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a cast from a pointer to an integer
+class PtrToIntInst : public CastInst {
+protected:
+  /// \brief Clone an identical PtrToIntInst
+  virtual PtrToIntInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  PtrToIntInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  PtrToIntInst(
+    Value *S,                     ///< The value to be converted
+    Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// \brief Gets the pointer operand.
+  Value *getPointerOperand() { return getOperand(0); }
+  /// \brief Gets the pointer operand.
+  const Value *getPointerOperand() const { return getOperand(0); }
+  /// \brief Gets the operand index of the pointer operand.
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  /// \brief Returns the address space of the pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return getPointerOperand()->getType()->getPointerAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == PtrToInt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                             BitCastInst Class
+//===----------------------------------------------------------------------===//
+
+/// \brief This class represents a no-op cast from one type to another.
+class BitCastInst : public CastInst {
+protected:
+  /// \brief Clone an identical BitCastInst
+  virtual BitCastInst *clone_impl() const;
+
+public:
+  /// \brief Constructor with insert-before-instruction semantics
+  BitCastInst(
+    Value *S,                     ///< The value to be casted
+    Type *Ty,               ///< The type to casted to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// \brief Constructor with insert-at-end-of-block semantics
+  BitCastInst(
+    Value *S,                     ///< The value to be casted
+    Type *Ty,               ///< The type to casted to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == BitCast;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
new file mode 100644
index 000000000000..8344c56680aa
--- /dev/null
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -0,0 +1,316 @@
+//===-- llvm/IntrinsicInst.h - Intrinsic Instruction Wrappers ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes that make it really easy to deal with intrinsic
+// functions with the isa/dyncast family of functions.  In particular, this
+// allows you to do things like:
+//
+//     if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(Inst))
+//        ... MCI->getDest() ... MCI->getSource() ...
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class.  Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_INTRINSICINST_H
+#define LLVM_IR_INTRINSICINST_H
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+
+namespace llvm {
+  /// IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic
+  /// functions.  This allows the standard isa/dyncast/cast functionality to
+  /// work with calls to intrinsic functions.
+  class IntrinsicInst : public CallInst {
+    IntrinsicInst() LLVM_DELETED_FUNCTION;
+    IntrinsicInst(const IntrinsicInst&) LLVM_DELETED_FUNCTION;
+    void operator=(const IntrinsicInst&) LLVM_DELETED_FUNCTION;
+  public:
+    /// getIntrinsicID - Return the intrinsic ID of this intrinsic.
+    ///
+    Intrinsic::ID getIntrinsicID() const {
+      return (Intrinsic::ID)getCalledFunction()->getIntrinsicID();
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const CallInst *I) {
+      if (const Function *CF = I->getCalledFunction())
+        return CF->isIntrinsic();
+      return false;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<CallInst>(V) && classof(cast<CallInst>(V));
+    }
+  };
+
+  /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+  ///
+  class DbgInfoIntrinsic : public IntrinsicInst {
+  public:
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::dbg_declare:
+      case Intrinsic::dbg_value:
+        return true;
+      default: return false;
+      }
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    static Value *StripCast(Value *C);
+  };
+
+  /// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+  ///
+  class DbgDeclareInst : public DbgInfoIntrinsic {
+  public:
+    Value *getAddress() const;
+    MDNode *getVariable() const { return cast<MDNode>(getArgOperand(1)); }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::dbg_declare;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// DbgValueInst - This represents the llvm.dbg.value instruction.
+  ///
+  class DbgValueInst : public DbgInfoIntrinsic {
+  public:
+    const Value *getValue() const;
+    Value *getValue();
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(
+                          const_cast<Value*>(getArgOperand(1)))->getZExtValue();
+    }
+    MDNode *getVariable() const { return cast<MDNode>(getArgOperand(2)); }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::dbg_value;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemIntrinsic - This is the common base class for memset/memcpy/memmove.
+  ///
+  class MemIntrinsic : public IntrinsicInst {
+  public:
+    Value *getRawDest() const { return const_cast<Value*>(getArgOperand(0)); }
+
+    Value *getLength() const { return const_cast<Value*>(getArgOperand(2)); }
+    ConstantInt *getAlignmentCst() const {
+      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(3)));
+    }
+
+    unsigned getAlignment() const {
+      return getAlignmentCst()->getZExtValue();
+    }
+
+    ConstantInt *getVolatileCst() const {
+      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(4)));
+    }
+    bool isVolatile() const {
+      return !getVolatileCst()->isZero();
+    }
+
+    unsigned getDestAddressSpace() const {
+      return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
+    }
+
+    /// getDest - This is just like getRawDest, but it strips off any cast
+    /// instructions that feed it, giving the original input.  The returned
+    /// value is guaranteed to be a pointer.
+    Value *getDest() const { return getRawDest()->stripPointerCasts(); }
+
+    /// set* - Set the specified arguments of the instruction.
+    ///
+    void setDest(Value *Ptr) {
+      assert(getRawDest()->getType() == Ptr->getType() &&
+             "setDest called with pointer of wrong type!");
+      setArgOperand(0, Ptr);
+    }
+
+    void setLength(Value *L) {
+      assert(getLength()->getType() == L->getType() &&
+             "setLength called with value of wrong type!");
+      setArgOperand(2, L);
+    }
+
+    void setAlignment(Constant* A) {
+      setArgOperand(3, A);
+    }
+
+    void setVolatile(Constant* V) {
+      setArgOperand(4, V);
+    }
+
+    Type *getAlignmentType() const {
+      return getArgOperand(3)->getType();
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::memcpy:
+      case Intrinsic::memmove:
+      case Intrinsic::memset:
+        return true;
+      default: return false;
+      }
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemSetInst - This class wraps the llvm.memset intrinsic.
+  ///
+  class MemSetInst : public MemIntrinsic {
+  public:
+    /// get* - Return the arguments to the instruction.
+    ///
+    Value *getValue() const { return const_cast<Value*>(getArgOperand(1)); }
+
+    void setValue(Value *Val) {
+      assert(getValue()->getType() == Val->getType() &&
+             "setValue called with value of wrong type!");
+      setArgOperand(1, Val);
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memset;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
+  ///
+  class MemTransferInst : public MemIntrinsic {
+  public:
+    /// get* - Return the arguments to the instruction.
+    ///
+    Value *getRawSource() const { return const_cast<Value*>(getArgOperand(1)); }
+
+    /// getSource - This is just like getRawSource, but it strips off any cast
+    /// instructions that feed it, giving the original input.  The returned
+    /// value is guaranteed to be a pointer.
+    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
+
+    unsigned getSourceAddressSpace() const {
+      return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
+    }
+
+    void setSource(Value *Ptr) {
+      assert(getRawSource()->getType() == Ptr->getType() &&
+             "setSource called with pointer of wrong type!");
+      setArgOperand(1, Ptr);
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memcpy ||
+             I->getIntrinsicID() == Intrinsic::memmove;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+
+  /// MemCpyInst - This class wraps the llvm.memcpy intrinsic.
+  ///
+  class MemCpyInst : public MemTransferInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memcpy;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemMoveInst - This class wraps the llvm.memmove intrinsic.
+  ///
+  class MemMoveInst : public MemTransferInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memmove;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// VAStartInst - This represents the llvm.va_start intrinsic.
+  ///
+  class VAStartInst : public IntrinsicInst {
+  public:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::vastart;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    Value *getArgList() const { return const_cast<Value*>(getArgOperand(0)); }
+  };
+
+  /// VAEndInst - This represents the llvm.va_end intrinsic.
+  ///
+  class VAEndInst : public IntrinsicInst {
+  public:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::vaend;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    Value *getArgList() const { return const_cast<Value*>(getArgOperand(0)); }
+  };
+
+  /// VACopyInst - This represents the llvm.va_copy intrinsic.
+  ///
+  class VACopyInst : public IntrinsicInst {
+  public:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::vacopy;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    Value *getDest() const { return const_cast<Value*>(getArgOperand(0)); }
+    Value *getSrc() const { return const_cast<Value*>(getArgOperand(1)); }
+  };
+
+}
+
+#endif
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
new file mode 100644
index 000000000000..c97cd91d73a9
--- /dev/null
+++ b/include/llvm/IR/Intrinsics.h
@@ -0,0 +1,128 @@
+//===-- llvm/Instrinsics.h - LLVM Intrinsic Function Handling ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a set of enums which allow processing of intrinsic
+// functions.  Values of these enum types are returned by
+// Function::getIntrinsicID.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_INTRINSICS_H
+#define LLVM_IR_INTRINSICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include <string>
+
+namespace llvm {
+
+class Type;
+class FunctionType;
+class Function;
+class LLVMContext;
+class Module;
+class AttributeSet;
+
+/// Intrinsic Namespace - This namespace contains an enum with a value for
+/// every intrinsic/builtin function known by LLVM.  These enum values are
+/// returned by Function::getIntrinsicID().
+///
+namespace Intrinsic {
+  enum ID {
+    not_intrinsic = 0,   // Must be zero
+
+    // Get the intrinsic enums generated from Intrinsics.td
+#define GET_INTRINSIC_ENUM_VALUES
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_intrinsics
+  };
+  
+  /// Intrinsic::getName(ID) - Return the LLVM name for an intrinsic, such as
+  /// "llvm.ppc.altivec.lvx".
+  std::string getName(ID id, ArrayRef<Type*> Tys = ArrayRef<Type*>());
+  
+  /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
+  ///
+  FunctionType *getType(LLVMContext &Context, ID id,
+                        ArrayRef<Type*> Tys = ArrayRef<Type*>());
+
+  /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
+  /// overloaded.
+  bool isOverloaded(ID id);
+
+  /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic.
+  ///
+  AttributeSet getAttributes(LLVMContext &C, ID id);
+
+  /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
+  /// declaration for an intrinsic, and return it.
+  ///
+  /// The Tys and numTys parameters are for intrinsics with overloaded types
+  /// (e.g., those using iAny, fAny, vAny, or iPTRAny). For a declaration for an
+  /// overloaded intrinsic, Tys should point to an array of numTys pointers to
+  /// Type, and must provide exactly one type for each overloaded type in the
+  /// intrinsic.
+  Function *getDeclaration(Module *M, ID id,
+                           ArrayRef<Type*> Tys = ArrayRef<Type*>());
+                           
+  /// Map a GCC builtin name to an intrinsic ID.
+  ID getIntrinsicForGCCBuiltin(const char *Prefix, const char *BuiltinName);
+  
+  /// IITDescriptor - This is a type descriptor which explains the type
+  /// requirements of an intrinsic.  This is returned by
+  /// getIntrinsicInfoTableEntries.
+  struct IITDescriptor {
+    enum IITDescriptorKind {
+      Void, MMX, Metadata, Half, Float, Double,
+      Integer, Vector, Pointer, Struct,
+      Argument, ExtendVecArgument, TruncVecArgument
+    } Kind;
+    
+    union {
+      unsigned Integer_Width;
+      unsigned Float_Width;
+      unsigned Vector_Width;
+      unsigned Pointer_AddressSpace;
+      unsigned Struct_NumElements;
+      unsigned Argument_Info;
+    };
+    
+    enum ArgKind {
+      AK_AnyInteger,
+      AK_AnyFloat,
+      AK_AnyVector,
+      AK_AnyPointer
+    };
+    unsigned getArgumentNumber() const {
+      assert(Kind == Argument || Kind == ExtendVecArgument || 
+             Kind == TruncVecArgument);
+      return Argument_Info >> 2;
+    }
+    ArgKind getArgumentKind() const {
+      assert(Kind == Argument || Kind == ExtendVecArgument || 
+             Kind == TruncVecArgument);
+      return (ArgKind)(Argument_Info&3);
+    }
+    
+    static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
+      IITDescriptor Result = { K, { Field } };
+      return Result;
+    }
+  };
+  
+  /// getIntrinsicInfoTableEntries - Return the IIT table descriptor for the
+  /// specified intrinsic into an array of IITDescriptors.
+  /// 
+  void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
+  
+} // End Intrinsic namespace
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
new file mode 100644
index 000000000000..e252664e45cf
--- /dev/null
+++ b/include/llvm/IR/Intrinsics.td
@@ -0,0 +1,484 @@
+//===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines properties of all LLVM intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/CodeGen/ValueTypes.td"
+
+//===----------------------------------------------------------------------===//
+//  Properties we keep track of for intrinsics.
+//===----------------------------------------------------------------------===//
+
+class IntrinsicProperty;
+
+// Intr*Mem - Memory properties.  An intrinsic is allowed to have at most one of
+// these properties set.  They are listed from the most aggressive (best to use
+// if correct) to the least aggressive.  If no property is set, the worst case
+// is assumed (it may read and write any memory it can get access to and it may
+// have other side effects).
+
+// IntrNoMem - The intrinsic does not access memory or have any other side
+// effects.  It may be CSE'd deleted if dead, etc.
+def IntrNoMem : IntrinsicProperty;
+
+// IntrReadArgMem - This intrinsic reads only from memory that one of its
+// pointer-typed arguments points to, but may read an unspecified amount.
+def IntrReadArgMem : IntrinsicProperty;
+
+// IntrReadMem - This intrinsic reads from unspecified memory, so it cannot be
+// moved across stores.  However, it can be reordered otherwise and can be
+// deleted if dead.
+def IntrReadMem : IntrinsicProperty;
+
+// IntrReadWriteArgMem - This intrinsic reads and writes only from memory that
+// one of its arguments points to, but may access an unspecified amount.  The
+// reads and writes may be volatile, but except for this it has no other side
+// effects.
+def IntrReadWriteArgMem : IntrinsicProperty;
+
+// Commutative - This intrinsic is commutative: X op Y == Y op X.
+def Commutative : IntrinsicProperty;
+
+// Throws - This intrinsic can throw.
+def Throws : IntrinsicProperty;
+
+// NoCapture - The specified argument pointer is not captured by the intrinsic.
+class NoCapture<int argNo> : IntrinsicProperty {
+  int ArgNo = argNo;
+}
+
+def IntrNoReturn : IntrinsicProperty;
+
+//===----------------------------------------------------------------------===//
+// Types used by intrinsics.
+//===----------------------------------------------------------------------===//
+
+class LLVMType<ValueType vt> {
+  ValueType VT = vt;
+}
+
+class LLVMQualPointerType<LLVMType elty, int addrspace>
+  : LLVMType<iPTR>{
+  LLVMType ElTy = elty;
+  int AddrSpace = addrspace;
+}
+
+class LLVMPointerType<LLVMType elty>
+  : LLVMQualPointerType<elty, 0>;
+
+class LLVMAnyPointerType<LLVMType elty>
+  : LLVMType<iPTRAny>{
+  LLVMType ElTy = elty;
+}
+
+// Match the type of another intrinsic parameter.  Number is an index into the
+// list of overloaded types for the intrinsic, excluding all the fixed types.
+// The Number value must refer to a previously listed type.  For example:
+//   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyfloat_ty, LLVMMatchType<0>]>
+// has two overloaded types, the 2nd and 3rd arguments.  LLVMMatchType<0>
+// refers to the first overloaded type, which is the 2nd argument.
+class LLVMMatchType<int num>
+  : LLVMType<OtherVT>{
+  int Number = num;
+}
+
+// Match the type of another intrinsic parameter that is expected to be
+// an integral vector type, but change the element size to be twice as wide
+// or half as wide as the other type.  This is only useful when the intrinsic
+// is overloaded, so the matched type should be declared as iAny.
+class LLVMExtendedElementVectorType<int num> : LLVMMatchType<num>;
+class LLVMTruncatedElementVectorType<int num> : LLVMMatchType<num>;
+
+def llvm_void_ty       : LLVMType<isVoid>;
+def llvm_anyint_ty     : LLVMType<iAny>;
+def llvm_anyfloat_ty   : LLVMType<fAny>;
+def llvm_anyvector_ty  : LLVMType<vAny>;
+def llvm_i1_ty         : LLVMType<i1>;
+def llvm_i8_ty         : LLVMType<i8>;
+def llvm_i16_ty        : LLVMType<i16>;
+def llvm_i32_ty        : LLVMType<i32>;
+def llvm_i64_ty        : LLVMType<i64>;
+def llvm_half_ty       : LLVMType<f16>;
+def llvm_float_ty      : LLVMType<f32>;
+def llvm_double_ty     : LLVMType<f64>;
+def llvm_f80_ty        : LLVMType<f80>;
+def llvm_f128_ty       : LLVMType<f128>;
+def llvm_ppcf128_ty    : LLVMType<ppcf128>;
+def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
+def llvm_ptrptr_ty     : LLVMPointerType<llvm_ptr_ty>;            // i8**
+def llvm_anyptr_ty     : LLVMAnyPointerType<llvm_i8_ty>;          // (space)i8*
+def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
+def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
+def llvm_metadata_ty   : LLVMType<MetadataVT>;                    // !{...}
+
+def llvm_x86mmx_ty     : LLVMType<x86mmx>;
+def llvm_ptrx86mmx_ty  : LLVMPointerType<llvm_x86mmx_ty>;         // <1 x i64>*
+
+def llvm_v2i1_ty       : LLVMType<v2i1>;     //  2 x i1
+def llvm_v4i1_ty       : LLVMType<v4i1>;     //  4 x i1
+def llvm_v8i1_ty       : LLVMType<v8i1>;     //  8 x i1
+def llvm_v16i1_ty      : LLVMType<v16i1>;    // 16 x i1
+def llvm_v32i1_ty      : LLVMType<v32i1>;    // 32 x i1
+def llvm_v64i1_ty      : LLVMType<v64i1>;    // 64 x i1
+def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
+def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
+def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
+def llvm_v16i8_ty      : LLVMType<v16i8>;    // 16 x i8
+def llvm_v32i8_ty      : LLVMType<v32i8>;    // 32 x i8
+def llvm_v64i8_ty      : LLVMType<v64i8>;    // 64 x i8
+
+def llvm_v1i16_ty      : LLVMType<v1i16>;    //  1 x i16
+def llvm_v2i16_ty      : LLVMType<v2i16>;    //  2 x i16
+def llvm_v4i16_ty      : LLVMType<v4i16>;    //  4 x i16
+def llvm_v8i16_ty      : LLVMType<v8i16>;    //  8 x i16
+def llvm_v16i16_ty     : LLVMType<v16i16>;   // 16 x i16
+def llvm_v32i16_ty     : LLVMType<v32i16>;   // 32 x i16
+
+def llvm_v1i32_ty      : LLVMType<v1i32>;    //  1 x i32
+def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
+def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
+def llvm_v8i32_ty      : LLVMType<v8i32>;    //  8 x i32
+def llvm_v16i32_ty     : LLVMType<v16i32>;   // 16 x i32
+def llvm_v1i64_ty      : LLVMType<v1i64>;    //  1 x i64
+def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
+def llvm_v4i64_ty      : LLVMType<v4i64>;    //  4 x i64
+def llvm_v8i64_ty      : LLVMType<v8i64>;    //  8 x i64
+def llvm_v16i64_ty     : LLVMType<v16i64>;   // 16 x i64
+
+def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
+def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
+def llvm_v8f32_ty      : LLVMType<v8f32>;    //  8 x float
+def llvm_v16f32_ty     : LLVMType<v16f32>;   // 16 x float
+def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
+def llvm_v4f64_ty      : LLVMType<v4f64>;    //  4 x double
+def llvm_v8f64_ty      : LLVMType<v8f64>;    //  8 x double
+
+def llvm_vararg_ty     : LLVMType<isVoid>;   // this means vararg here
+
+
+//===----------------------------------------------------------------------===//
+// Intrinsic Definitions.
+//===----------------------------------------------------------------------===//
+
+// Intrinsic class - This is used to define one LLVM intrinsic.  The name of the
+// intrinsic definition should start with "int_", then match the LLVM intrinsic
+// name with the "llvm." prefix removed, and all "."s turned into "_"s.  For
+// example, llvm.bswap.i16 -> int_bswap_i16.
+//
+//  * RetTypes is a list containing the return types expected for the
+//    intrinsic.
+//  * ParamTypes is a list containing the parameter types expected for the
+//    intrinsic.
+//  * Properties can be set to describe the behavior of the intrinsic.
+//
+class SDPatternOperator;
+class Intrinsic<list<LLVMType> ret_types,
+                list<LLVMType> param_types = [],
+                list<IntrinsicProperty> properties = [],
+                string name = ""> : SDPatternOperator {
+  string LLVMName = name;
+  string TargetPrefix = "";   // Set to a prefix for target-specific intrinsics.
+  list<LLVMType> RetTypes = ret_types;
+  list<LLVMType> ParamTypes = param_types;
+  list<IntrinsicProperty> Properties = properties;
+
+  bit isTarget = 0;
+}
+
+/// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this
+/// specifies the name of the builtin.  This provides automatic CBE and CFE
+/// support.
+class GCCBuiltin<string name> {
+  string GCCBuiltinName = name;
+}
+
+
+//===--------------- Variable Argument Handling Intrinsics ----------------===//
+//
+
+def int_vastart : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
+def int_vacopy  : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
+                            "llvm.va_copy">;
+def int_vaend   : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
+
+//===------------------- Garbage Collection Intrinsics --------------------===//
+//
+def int_gcroot  : Intrinsic<[],
+                            [llvm_ptrptr_ty, llvm_ptr_ty]>;
+def int_gcread  : Intrinsic<[llvm_ptr_ty],
+                            [llvm_ptr_ty, llvm_ptrptr_ty],
+                            [IntrReadArgMem]>;
+def int_gcwrite : Intrinsic<[],
+                            [llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
+                            [IntrReadWriteArgMem, NoCapture<1>, NoCapture<2>]>;
+
+//===--------------------- Code Generator Intrinsics ----------------------===//
+//
+def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+// Note: we treat stacksave/stackrestore as writemem because we don't otherwise
+// model their dependencies on allocas.
+def int_stacksave     : Intrinsic<[llvm_ptr_ty]>,
+                        GCCBuiltin<"__builtin_stack_save">;
+def int_stackrestore  : Intrinsic<[], [llvm_ptr_ty]>,
+                        GCCBuiltin<"__builtin_stack_restore">;
+
+// IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch,
+// however it does conveniently prevent the prefetch from being reordered
+// with respect to nearby accesses to the same memory.
+def int_prefetch      : Intrinsic<[],
+                                  [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty,
+                                   llvm_i32_ty],
+                                  [IntrReadWriteArgMem, NoCapture<0>]>;
+def int_pcmarker      : Intrinsic<[], [llvm_i32_ty]>;
+
+def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
+
+// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
+// guard to the correct place on the stack frame.
+def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
+
+//===------------------- Standard C Library Intrinsics --------------------===//
+//
+
+def int_memcpy  : Intrinsic<[],
+                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
+                              llvm_i32_ty, llvm_i1_ty],
+                            [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
+def int_memmove : Intrinsic<[],
+                            [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
+                             llvm_i32_ty, llvm_i1_ty],
+                            [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
+def int_memset  : Intrinsic<[],
+                            [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
+                             llvm_i32_ty, llvm_i1_ty],
+                            [IntrReadWriteArgMem, NoCapture<0>]>;
+
+// These functions do not actually read memory, but they are sensitive to the
+// rounding mode.  This needs to be modelled separately; in the meantime
+// declaring them as reading memory is conservatively correct.
+let Properties = [IntrReadMem] in {
+  def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
+  def int_sin  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_cos  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_pow  : Intrinsic<[llvm_anyfloat_ty],
+                           [LLVMMatchType<0>, LLVMMatchType<0>]>;
+  def int_log  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_log10: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_exp  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_ceil  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_rint  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+}
+
+let Properties = [IntrNoMem] in {
+  def int_fma  : Intrinsic<[llvm_anyfloat_ty],
+                         [LLVMMatchType<0>, LLVMMatchType<0>,
+                          LLVMMatchType<0>]>;
+
+  def int_fmuladd : Intrinsic<[llvm_anyfloat_ty],
+                              [LLVMMatchType<0>, LLVMMatchType<0>,
+                               LLVMMatchType<0>]>;
+}
+
+// NOTE: these are internal interfaces.
+def int_setjmp     : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
+def int_longjmp    : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
+def int_sigsetjmp  : Intrinsic<[llvm_i32_ty] , [llvm_ptr_ty, llvm_i32_ty]>;
+def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
+
+// Internal interface for object size checking
+def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i1_ty],
+                               [IntrNoMem]>,
+                               GCCBuiltin<"__builtin_object_size">;
+
+//===------------------------- Expect Intrinsics --------------------------===//
+//
+def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+                                              LLVMMatchType<0>], [IntrNoMem]>;
+
+//===-------------------- Bit Manipulation Intrinsics ---------------------===//
+//
+
+// None of these intrinsics accesses memory at all.
+let Properties = [IntrNoMem] in {
+  def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+}
+
+//===------------------------ Debugger Intrinsics -------------------------===//
+//
+
+// None of these intrinsics accesses memory at all...but that doesn't mean the
+// optimizers can change them aggressively.  Special handling needed in a few
+// places.
+let Properties = [IntrNoMem] in {
+  def int_dbg_declare      : Intrinsic<[],
+                                       [llvm_metadata_ty, llvm_metadata_ty]>;
+  def int_dbg_value        : Intrinsic<[],
+                                       [llvm_metadata_ty, llvm_i64_ty,
+                                        llvm_metadata_ty]>;
+}
+
+//===------------------ Exception Handling Intrinsics----------------------===//
+//
+
+// The result of eh.typeid.for depends on the enclosing function, but inside a
+// given function it is 'const' and may be CSE'd etc.
+def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
+def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>;
+
+def int_eh_unwind_init: Intrinsic<[]>,
+                        GCCBuiltin<"__builtin_unwind_init">;
+
+def int_eh_dwarf_cfa  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
+
+let Properties = [IntrNoMem] in {
+  def int_eh_sjlj_lsda             : Intrinsic<[llvm_ptr_ty]>;
+  def int_eh_sjlj_callsite         : Intrinsic<[], [llvm_i32_ty]>;
+}
+def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
+def int_eh_sjlj_setjmp          : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
+def int_eh_sjlj_longjmp         : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
+
+//===---------------- Generic Variable Attribute Intrinsics----------------===//
+//
+def int_var_annotation : Intrinsic<[],
+                                   [llvm_ptr_ty, llvm_ptr_ty,
+                                    llvm_ptr_ty, llvm_i32_ty],
+                                   [], "llvm.var.annotation">;
+def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
+                                   [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty,
+                                    llvm_i32_ty],
+                                   [], "llvm.ptr.annotation">;
+def int_annotation : Intrinsic<[llvm_anyint_ty],
+                               [LLVMMatchType<0>, llvm_ptr_ty,
+                                llvm_ptr_ty, llvm_i32_ty],
+                               [], "llvm.annotation">;
+
+//===------------------------ Trampoline Intrinsics -----------------------===//
+//
+def int_init_trampoline : Intrinsic<[],
+                                    [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                                   GCCBuiltin<"__builtin_init_trampoline">;
+
+def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
+                                      [IntrReadArgMem]>,
+                                     GCCBuiltin<"__builtin_adjust_trampoline">;
+
+//===------------------------ Overflow Intrinsics -------------------------===//
+//
+
+// Expose the carry flag from add operations on two integrals.
+def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+
+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+
+//===------------------------- Memory Use Markers -------------------------===//
+//
+def int_lifetime_start  : Intrinsic<[],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
+def int_lifetime_end    : Intrinsic<[],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
+def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
+def int_invariant_end   : Intrinsic<[],
+                                    [llvm_descriptor_ty, llvm_i64_ty,
+                                     llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<2>]>;
+
+//===-------------------------- Other Intrinsics --------------------------===//
+//
+def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
+                     GCCBuiltin<"__builtin_flt_rounds">;
+def int_trap : Intrinsic<[], [], [IntrNoReturn]>,
+               GCCBuiltin<"__builtin_trap">;
+def int_debugtrap : Intrinsic<[]>,
+                    GCCBuiltin<"__builtin_debugtrap">;
+
+// NOP: calls/invokes to this intrinsic are removed by codegen
+def int_donothing : Intrinsic<[], [], [IntrNoMem]>;
+
+// Intrisics to support half precision floating point format
+let Properties = [IntrNoMem] in {
+def int_convert_to_fp16   : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>,
+                            GCCBuiltin<"__gnu_f2h_ieee">;
+def int_convert_from_fp16 : Intrinsic<[llvm_float_ty], [llvm_i16_ty]>,
+                            GCCBuiltin<"__gnu_h2f_ieee">;
+}
+
+// These convert intrinsics are to support various conversions between
+// various types with rounding and saturation. NOTE: avoid using these
+// intrinsics as they might be removed sometime in the future and
+// most targets don't support them.
+def int_convertff  : Intrinsic<[llvm_anyfloat_ty],
+                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertfsi : Intrinsic<[llvm_anyfloat_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertfui : Intrinsic<[llvm_anyfloat_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertsif : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertuif : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertss  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertsu  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertus  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertuu  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+
+//===----------------------------------------------------------------------===//
+// Target-specific intrinsics
+//===----------------------------------------------------------------------===//
+
+include "llvm/IR/IntrinsicsPowerPC.td"
+include "llvm/IR/IntrinsicsX86.td"
+include "llvm/IR/IntrinsicsARM.td"
+include "llvm/IR/IntrinsicsXCore.td"
+include "llvm/IR/IntrinsicsHexagon.td"
+include "llvm/IR/IntrinsicsNVVM.td"
+include "llvm/IR/IntrinsicsMips.td"
+include "llvm/IR/IntrinsicsR600.td"
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 93b1ae1dc887..93b1ae1dc887 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td
index 8a8872931f36..8a8872931f36 100644
--- a/include/llvm/IntrinsicsHexagon.td
+++ b/include/llvm/IR/IntrinsicsHexagon.td
diff --git a/include/llvm/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td
index e40e162a158d..e40e162a158d 100644
--- a/include/llvm/IntrinsicsMips.td
+++ b/include/llvm/IR/IntrinsicsMips.td
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
new file mode 100644
index 000000000000..ebfd03e48492
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -0,0 +1,962 @@
+//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
+//
+//===----------------------------------------------------------------------===//
+
+def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
+
+//
+// MISC
+//
+
+  def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
+      Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+  def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
+      Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+  def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Min Max
+//
+
+  def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
+        , [IntrNoMem, Commutative]>;
+  def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Multiplication
+//
+
+  def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Div
+//
+
+  def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Brev
+//
+
+  def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+//
+// Sad
+//
+
+  def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Floor  Ceil
+//
+
+  def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Abs
+//
+
+  def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
+      Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+  def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Round
+//
+
+  def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Trunc
+//
+
+  def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Saturate
+//
+
+  def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Exp2  Log2
+//
+
+  def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Sin  Cos
+//
+
+  def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+//
+// Fma
+//
+
+  def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
+      Intrinsic<[llvm_double_ty],
+        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
+      Intrinsic<[llvm_double_ty],
+        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
+      Intrinsic<[llvm_double_ty],
+        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
+      Intrinsic<[llvm_double_ty],
+        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Rcp
+//
+
+  def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Sqrt
+//
+
+  def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Rsqrt
+//
+
+  def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+//
+// Add
+//
+
+  def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+  def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+        [IntrNoMem, Commutative]>;
+
+//
+// Convert
+//
+
+  def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
+      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+  def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+  def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+  def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+  def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
+      Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
+        [IntrNoMem, Commutative]>;
+
+  def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
+      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
+      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+  def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
+      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+  def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+
+  def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
+      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
+      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+
+  def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
+      Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
+
+//
+// Bitcast
+//
+
+  def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
+      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
+      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+  def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
+      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+  def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
+      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+
+
+// Atomic not available as an llvm intrinsic.
+  def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
+          [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
+                                      [IntrReadWriteArgMem, NoCapture<0>]>;
+  def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
+          [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
+                                      [IntrReadWriteArgMem, NoCapture<0>]>;
+  def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
+          [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
+                                      [IntrReadWriteArgMem, NoCapture<0>]>;
+
+// Bar.Sync
+  def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
+      Intrinsic<[], [], []>;
+  def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
+      Intrinsic<[], [], []>;
+  def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+  def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+  def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
+      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+  // Membar
+  def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
+      Intrinsic<[], [], []>;
+  def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
+      Intrinsic<[], [], []>;
+  def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
+      Intrinsic<[], [], []>;
+
+
+// Accessing special registers
+  def int_nvvm_read_ptx_sreg_tid_x :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
+  def int_nvvm_read_ptx_sreg_tid_y :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
+  def int_nvvm_read_ptx_sreg_tid_z :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
+
+  def int_nvvm_read_ptx_sreg_ntid_x :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
+  def int_nvvm_read_ptx_sreg_ntid_y :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
+  def int_nvvm_read_ptx_sreg_ntid_z :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
+
+  def int_nvvm_read_ptx_sreg_ctaid_x :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
+  def int_nvvm_read_ptx_sreg_ctaid_y :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
+  def int_nvvm_read_ptx_sreg_ctaid_z :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
+
+  def int_nvvm_read_ptx_sreg_nctaid_x :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
+  def int_nvvm_read_ptx_sreg_nctaid_y :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
+  def int_nvvm_read_ptx_sreg_nctaid_z :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
+
+  def int_nvvm_read_ptx_sreg_warpsize :
+      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+      GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
+
+
+// Generated within nvvm. Use for ldu on sm_20 or later
+// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
+def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
+  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  "llvm.nvvm.ldu.global.i">;
+def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
+  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  "llvm.nvvm.ldu.global.f">;
+def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
+  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  "llvm.nvvm.ldu.global.p">;
+
+// Generated within nvvm. Use for ldg on sm_35 or later
+def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
+  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  "llvm.nvvm.ldg.global.i">;
+def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
+  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  "llvm.nvvm.ldg.global.f">;
+def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
+  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
+  "llvm.nvvm.ldg.global.p">;
+
+// Use for generic pointers
+// - These intrinsics are used to convert address spaces.
+// - The input pointer and output pointer must have the same type, except for
+//   the address-space. (This restriction is not enforced here as there is
+//   currently no way to describe it).
+// - This complements the llvm bitcast, which can be used to cast one type
+//   of pointer to another type of pointer, while the address space remains
+//   the same.
+def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.local.to.gen">;
+def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.shared.to.gen">;
+def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.global.to.gen">;
+def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.constant.to.gen">;
+
+def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.gen.to.global">;
+def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.gen.to.shared">;
+def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.gen.to.local">;
+def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
+                 [llvm_anyptr_ty], [IntrNoMem],
+                 "llvm.nvvm.ptr.gen.to.constant">;
+
+// Used in nvvm internally to help address space opt and ptx code generation
+// This is for params that are passed to kernel functions by pointer by-val.
+def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
+                                     [llvm_anyptr_ty],
+                                   [IntrNoMem],
+                                   "llvm.nvvm.ptr.gen.to.param">;
+
+// Move intrinsics, used in nvvm internally
+
+def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
+  "llvm.nvvm.move.i8">;
+def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
+  "llvm.nvvm.move.i16">;
+def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
+  "llvm.nvvm.move.i32">;
+def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
+  "llvm.nvvm.move.i64">;
+def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
+  [IntrNoMem], "llvm.nvvm.move.float">;
+def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
+  [IntrNoMem], "llvm.nvvm.move.double">;
+def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
+  [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
+
+
+/// Error / Warn
+def int_nvvm_compiler_error :
+    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
+def int_nvvm_compiler_warn :
+    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
+
+
+// Old PTX back-end intrinsics retained here for backwards-compatibility
+
+multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
+// FIXME: Do we need the 128-bit integer type version?
+//    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem]>;
+
+// FIXME: Enable this once v4i32 support is enabled in back-end.
+//    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
+
+  def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+               GCCBuiltin<!strconcat(prefix, "_x")>;
+  def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+               GCCBuiltin<!strconcat(prefix, "_y")>;
+  def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+               GCCBuiltin<!strconcat(prefix, "_z")>;
+  def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+               GCCBuiltin<!strconcat(prefix, "_w")>;
+}
+
+class PTXReadSpecialRegisterIntrinsic_r32<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+    GCCBuiltin<name>;
+
+class PTXReadSpecialRegisterIntrinsic_r64<string name>
+  : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
+    GCCBuiltin<name>;
+
+defm int_ptx_read_tid        : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_tid">;
+defm int_ptx_read_ntid       : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_ntid">;
+
+def int_ptx_read_laneid      : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_laneid">;
+def int_ptx_read_warpid      : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_warpid">;
+def int_ptx_read_nwarpid     : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_nwarpid">;
+
+defm int_ptx_read_ctaid      : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_ctaid">;
+defm int_ptx_read_nctaid     : PTXReadSpecialRegisterIntrinsic_v4i32
+                               <"__builtin_ptx_read_nctaid">;
+
+def int_ptx_read_smid        : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_smid">;
+def int_ptx_read_nsmid       : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_nsmid">;
+def int_ptx_read_gridid      : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_gridid">;
+
+def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_eq">;
+def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_le">;
+def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_lt">;
+def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_ge">;
+def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_lanemask_gt">;
+
+def int_ptx_read_clock       : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_clock">;
+def int_ptx_read_clock64     : PTXReadSpecialRegisterIntrinsic_r64
+                               <"__builtin_ptx_read_clock64">;
+
+def int_ptx_read_pm0         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm0">;
+def int_ptx_read_pm1         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm1">;
+def int_ptx_read_pm2         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm2">;
+def int_ptx_read_pm3         : PTXReadSpecialRegisterIntrinsic_r32
+                               <"__builtin_ptx_read_pm3">;
+
+def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
+                       GCCBuiltin<"__builtin_ptx_bar_sync">;
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
new file mode 100644
index 000000000000..cde39ccd3c52
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -0,0 +1,466 @@
+//===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PowerPC-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all PowerPC intrinsics.
+//
+
+// Non-altivec intrinsics.
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+  // dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
+  def int_ppc_dcba  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbf  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbi  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbt  : Intrinsic<[], [llvm_ptr_ty],
+    [IntrReadWriteArgMem, NoCapture<0>]>;
+  def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbz  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
+
+  // sync instruction
+  def int_ppc_sync : Intrinsic<[], [], []>;
+}
+
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  /// PowerPC_Vec_Intrinsic - Base class for all altivec intrinsics.
+  class PowerPC_Vec_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC Altivec Intrinsic Class Definitions.
+//
+
+/// PowerPC_Vec_FF_Intrinsic - A PowerPC intrinsic that takes one v4f32
+/// vector and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_FF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+
+/// PowerPC_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> 
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_Vec_HHH_Intrinsic - A PowerPC intrinsic that takes two v8i16
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix> 
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_Vec_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix> 
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                          [IntrNoMem]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Altivec Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+  // Data Stream Control.
+  def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">,
+              Intrinsic<[], [llvm_i32_ty], []>;
+  def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">,
+              Intrinsic<[], [], []>;
+  def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+  def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+  def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+  def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+
+  // VSCR access.
+  def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
+              Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>;
+  def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
+              Intrinsic<[], [llvm_v4i32_ty], []>;
+
+
+  // Loads.  These don't map directly to GCC builtins because they represent the
+  // source address with a single pointer.
+  def int_ppc_altivec_lvx :
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvxl :
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvebx :
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvehx :
+              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvewx :
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+
+  // Stores.  These don't map directly to GCC builtins because they represent the
+  // source address with a single pointer.
+  def int_ppc_altivec_stvx :
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvxl :
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvebx :
+              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvehx :
+              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvewx :
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+
+  // Comparisons setting a vector.
+  def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpeqfp : GCCBuiltin<"__builtin_altivec_vcmpeqfp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgefp : GCCBuiltin<"__builtin_altivec_vcmpgefp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsw : GCCBuiltin<"__builtin_altivec_vcmpgtsw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsh : GCCBuiltin<"__builtin_altivec_vcmpgtsh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+
+  def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsb : GCCBuiltin<"__builtin_altivec_vcmpgtsb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+
+  // Predicate Comparisons.  The first operand specifies interpretation of CR6.
+  def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpeqfp_p : GCCBuiltin<"__builtin_altivec_vcmpeqfp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgefp_p : GCCBuiltin<"__builtin_altivec_vcmpgefp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsw_p : GCCBuiltin<"__builtin_altivec_vcmpgtsw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsh_p : GCCBuiltin<"__builtin_altivec_vcmpgtsh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+
+  def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsb_p : GCCBuiltin<"__builtin_altivec_vcmpgtsb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector average.
+def int_ppc_altivec_vavgsb : PowerPC_Vec_BBB_Intrinsic<"vavgsb">;
+def int_ppc_altivec_vavgsh : PowerPC_Vec_HHH_Intrinsic<"vavgsh">;
+def int_ppc_altivec_vavgsw : PowerPC_Vec_WWW_Intrinsic<"vavgsw">;
+def int_ppc_altivec_vavgub : PowerPC_Vec_BBB_Intrinsic<"vavgub">;
+def int_ppc_altivec_vavguh : PowerPC_Vec_HHH_Intrinsic<"vavguh">;
+def int_ppc_altivec_vavguw : PowerPC_Vec_WWW_Intrinsic<"vavguw">;
+
+// Vector maximum.
+def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">;
+def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">;
+def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">;
+def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">;
+def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">;
+def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">;
+def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">;
+
+// Vector minimum.
+def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">;
+def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">;
+def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">;
+def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">;
+def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">;
+def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">;
+def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">;
+
+// Saturating adds.
+def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">;
+def int_ppc_altivec_vaddsbs : PowerPC_Vec_BBB_Intrinsic<"vaddsbs">;
+def int_ppc_altivec_vadduhs : PowerPC_Vec_HHH_Intrinsic<"vadduhs">;
+def int_ppc_altivec_vaddshs : PowerPC_Vec_HHH_Intrinsic<"vaddshs">;
+def int_ppc_altivec_vadduws : PowerPC_Vec_WWW_Intrinsic<"vadduws">;
+def int_ppc_altivec_vaddsws : PowerPC_Vec_WWW_Intrinsic<"vaddsws">;
+def int_ppc_altivec_vaddcuw : PowerPC_Vec_WWW_Intrinsic<"vaddcuw">;
+
+// Saturating subs.
+def int_ppc_altivec_vsububs : PowerPC_Vec_BBB_Intrinsic<"vsububs">;
+def int_ppc_altivec_vsubsbs : PowerPC_Vec_BBB_Intrinsic<"vsubsbs">;
+def int_ppc_altivec_vsubuhs : PowerPC_Vec_HHH_Intrinsic<"vsubuhs">;
+def int_ppc_altivec_vsubshs : PowerPC_Vec_HHH_Intrinsic<"vsubshs">;
+def int_ppc_altivec_vsubuws : PowerPC_Vec_WWW_Intrinsic<"vsubuws">;
+def int_ppc_altivec_vsubsws : PowerPC_Vec_WWW_Intrinsic<"vsubsws">;
+def int_ppc_altivec_vsubcuw : PowerPC_Vec_WWW_Intrinsic<"vsubcuw">;
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  // Saturating multiply-adds.
+  def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vnmsubfp : GCCBuiltin<"__builtin_altivec_vnmsubfp">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+
+  // Vector Multiply Sum Intructions.
+  def int_ppc_altivec_vmsummbm : GCCBuiltin<"__builtin_altivec_vmsummbm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumshm : GCCBuiltin<"__builtin_altivec_vmsumshm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, 
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+
+  // Vector Multiply Intructions.
+  def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+
+  def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+
+  // Vector Sum Intructions.
+  def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+
+  // Other multiplies.
+  def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                       llvm_v8i16_ty], [IntrNoMem]>;
+
+  // Packs.
+  def int_ppc_altivec_vpkpx : GCCBuiltin<"__builtin_altivec_vpkpx">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  // vpkuhum is lowered to a shuffle.
+  def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                      [IntrNoMem]>;
+  // vpkuwum is lowered to a shuffle.
+  def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+
+  // Unpacks.
+  def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupkhsb : GCCBuiltin<"__builtin_altivec_vupkhsb">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+
+
+  // FP <-> integer conversion.
+  def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+
+  def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vrfin : GCCBuiltin<"__builtin_altivec_vrfin">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vrfip : GCCBuiltin<"__builtin_altivec_vrfip">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+def int_ppc_altivec_vsl   : PowerPC_Vec_WWW_Intrinsic<"vsl">;
+def int_ppc_altivec_vslo  : PowerPC_Vec_WWW_Intrinsic<"vslo">;
+
+def int_ppc_altivec_vslb  : PowerPC_Vec_BBB_Intrinsic<"vslb">;
+def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
+def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+
+// Right Shifts.
+def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
+def int_ppc_altivec_vsro  : PowerPC_Vec_WWW_Intrinsic<"vsro">;
+  
+def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
+def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
+def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
+def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
+def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+
+// Rotates.
+def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
+def int_ppc_altivec_vrlh  : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
+def int_ppc_altivec_vrlw  : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  // Miscellaneous.
+  def int_ppc_altivec_lvsl :
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
+  def int_ppc_altivec_lvsr :
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+  def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
+def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
+def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
+def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
new file mode 100644
index 000000000000..ecb5668d8e95
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -0,0 +1,36 @@
+//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the R600-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "r600" in {
+
+class R600ReadPreloadRegisterIntrinsic<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+    GCCBuiltin<name>;
+
+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
+  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
+  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
+  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
+}
+
+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_global_size">;
+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_local_size">;
+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_ngroups">;
+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_tgid">;
+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_tidig">;
+} // End TargetPrefix = "r600"
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
new file mode 100644
index 000000000000..69e0ab4fa2ed
--- /dev/null
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -0,0 +1,2580 @@
+//===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the X86-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Interrupt traps
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
+}
+
+//===----------------------------------------------------------------------===//
+// 3DNow!
+
+let TargetPrefix = "x86" in {
+  def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// 3DNow! extensions
+
+let TargetPrefix = "x86" in {
+  def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_3dnowa_pswapd :
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE1
+
+// Arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+// Comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse_cmp_ps : GCCBuiltin<"__builtin_ia32_cmpps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+
+// Conversion ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtsi2ss : GCCBuiltin<"__builtin_ia32_cvtsi2ss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">,
+              Intrinsic<[], [], []>;
+}
+
+// Control register.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_stmxcsr :
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_sse_ldmxcsr :
+              Intrinsic<[], [llvm_ptr_ty], []>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE2
+
+// FP arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// FP comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_cmp_pd : GCCBuiltin<"__builtin_ia32_cmppd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+}
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi128_byteshift">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi128_byteshift">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Conversion ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsi2sd : GCCBuiltin<"__builtin_ia32_cvtsi2sd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtss2sd : GCCBuiltin<"__builtin_ia32_cvtss2sd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v2f64_ty], [IntrReadWriteArgMem]>;
+  def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v16i8_ty], [IntrReadWriteArgMem]>;
+  def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v4i32_ty], [IntrReadWriteArgMem]>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">,
+              Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
+              Intrinsic<[], [llvm_v16i8_ty,
+                         llvm_v16i8_ty, llvm_ptr_ty], []>;
+  def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
+              Intrinsic<[], [], []>;
+  def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
+              Intrinsic<[], [], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE3
+
+// Addition / subtraction ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// Horizontal ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// Specialized unaligned load.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// Thread synchronization ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
+              Intrinsic<[], [llvm_i32_ty,
+                         llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSSE3
+
+// Horizontal arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_phadd_w         : GCCBuiltin<"__builtin_ia32_phaddw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phadd_w_128     : GCCBuiltin<"__builtin_ia32_phaddw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phadd_d         : GCCBuiltin<"__builtin_ia32_phaddd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phadd_d_128     : GCCBuiltin<"__builtin_ia32_phaddd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phadd_sw        : GCCBuiltin<"__builtin_ia32_phaddsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phadd_sw_128    : GCCBuiltin<"__builtin_ia32_phaddsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phsub_w         : GCCBuiltin<"__builtin_ia32_phsubw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phsub_w_128     : GCCBuiltin<"__builtin_ia32_phsubw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phsub_d         : GCCBuiltin<"__builtin_ia32_phsubd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phsub_d_128     : GCCBuiltin<"__builtin_ia32_phsubd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phsub_sw        : GCCBuiltin<"__builtin_ia32_phsubsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phsub_sw_128    : GCCBuiltin<"__builtin_ia32_phsubsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_pmadd_ub_sw     : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+}
+
+// Packed multiply high with round and scale
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_pmul_hr_sw      : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_ssse3_pmul_hr_sw_128  : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+}
+
+// Shuffle ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_pshuf_b         : GCCBuiltin<"__builtin_ia32_pshufb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pshuf_b_128     : GCCBuiltin<"__builtin_ia32_pshufb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse_pshuf_w           : GCCBuiltin<"__builtin_ia32_pshufw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
+                         [IntrNoMem]>;
+}
+
+// Sign ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_psign_b         : GCCBuiltin<"__builtin_ia32_psignb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_psign_b_128     : GCCBuiltin<"__builtin_ia32_psignb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_psign_w         : GCCBuiltin<"__builtin_ia32_psignw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_psign_w_128     : GCCBuiltin<"__builtin_ia32_psignw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_psign_d         : GCCBuiltin<"__builtin_ia32_psignd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_psign_d_128     : GCCBuiltin<"__builtin_ia32_psignd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+// Absolute value ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_pabs_b     : GCCBuiltin<"__builtin_ia32_pabsb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_pabs_w     : GCCBuiltin<"__builtin_ia32_pabsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_pabs_d     : GCCBuiltin<"__builtin_ia32_pabsd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1
+
+// FP rounding ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_round_ss        : GCCBuiltin<"__builtin_ia32_roundss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse41_round_ps        : GCCBuiltin<"__builtin_ia32_roundps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse41_round_sd        : GCCBuiltin<"__builtin_ia32_roundsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse41_round_pd        : GCCBuiltin<"__builtin_ia32_roundpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector sign and zero extend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pmovsxbd        : GCCBuiltin<"__builtin_ia32_pmovsxbd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxbq        : GCCBuiltin<"__builtin_ia32_pmovsxbq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxbw        : GCCBuiltin<"__builtin_ia32_pmovsxbw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxdq        : GCCBuiltin<"__builtin_ia32_pmovsxdq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxwd        : GCCBuiltin<"__builtin_ia32_pmovsxwd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxwq        : GCCBuiltin<"__builtin_ia32_pmovsxwq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxbd        : GCCBuiltin<"__builtin_ia32_pmovzxbd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxbq        : GCCBuiltin<"__builtin_ia32_pmovzxbq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxbw        : GCCBuiltin<"__builtin_ia32_pmovzxbw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxdq        : GCCBuiltin<"__builtin_ia32_pmovzxdq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxwd        : GCCBuiltin<"__builtin_ia32_pmovzxwd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxwq        : GCCBuiltin<"__builtin_ia32_pmovzxwq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector min element
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_phminposuw     : GCCBuiltin<"__builtin_ia32_phminposuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector compare, min, max
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pmaxsb          : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pmaxsd          : GCCBuiltin<"__builtin_ia32_pmaxsd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pmaxud          : GCCBuiltin<"__builtin_ia32_pmaxud128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pmaxuw          : GCCBuiltin<"__builtin_ia32_pmaxuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminsb          : GCCBuiltin<"__builtin_ia32_pminsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminsd          : GCCBuiltin<"__builtin_ia32_pminsd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminud          : GCCBuiltin<"__builtin_ia32_pminud128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminuw          : GCCBuiltin<"__builtin_ia32_pminuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem, Commutative]>;
+}
+
+// Advanced Encryption Standard (AES) Instructions
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_aesni_aesimc          : GCCBuiltin<"__builtin_ia32_aesimc128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesenc          : GCCBuiltin<"__builtin_ia32_aesenc128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesdec          : GCCBuiltin<"__builtin_ia32_aesdec128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aeskeygenassist :
+              GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+}
+
+// PCLMUL instruction
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+}
+
+// Vector pack
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_packusdw        : GCCBuiltin<"__builtin_ia32_packusdw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector multiply
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+}
+
+// Vector extract
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pextrb         :
+              Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pextrd         :
+              Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pextrq         :
+              Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_extractps      : GCCBuiltin<"__builtin_ia32_extractps128">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector insert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+                    [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pblendvb         : GCCBuiltin<"__builtin_ia32_pblendvb128">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_pblendw          : GCCBuiltin<"__builtin_ia32_pblendw128">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendpd          : GCCBuiltin<"__builtin_ia32_blendpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendps          : GCCBuiltin<"__builtin_ia32_blendps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendvpd         : GCCBuiltin<"__builtin_ia32_blendvpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendvps         : GCCBuiltin<"__builtin_ia32_blendvps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty],
+                  [IntrNoMem]>;
+}
+
+// Vector dot product
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_dppd            : GCCBuiltin<"__builtin_ia32_dppd">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
+  def int_x86_sse41_dpps            : GCCBuiltin<"__builtin_ia32_dpps">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
+}
+
+// Vector sum of absolute differences
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_movntdqa        : GCCBuiltin<"__builtin_ia32_movntdqa">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// Test instruction with bitwise comparison.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_ptestz          : GCCBuiltin<"__builtin_ia32_ptestz128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse41_ptestc          : GCCBuiltin<"__builtin_ia32_ptestc128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse41_ptestnzc        : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                    [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.2
+
+// Miscellaneous
+// CRC Instruction
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse42_crc32_32_8       : GCCBuiltin<"__builtin_ia32_crc32qi">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_32_16      : GCCBuiltin<"__builtin_ia32_crc32hi">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_32_32      : GCCBuiltin<"__builtin_ia32_crc32si">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_64_8       :
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_64_64      : GCCBuiltin<"__builtin_ia32_crc32di">,
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                    [IntrNoMem]>;
+}
+
+// String/text processing ops.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse42_pcmpistrm128  : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
+    Intrinsic<[llvm_v16i8_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpistri128  : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestrm128  : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
+    Intrinsic<[llvm_v16i8_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestri128  : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+  def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
+    Intrinsic<[llvm_i32_ty],
+        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+         llvm_i8_ty],
+        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4A
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
+    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty],
+              [IntrNoMem]>;
+  def int_x86_sse4a_extrq  : GCCBuiltin<"__builtin_ia32_extrq">,
+    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
+    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                                llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse4a_insertq  : GCCBuiltin<"__builtin_ia32_insertq">,
+    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
+
+  def int_x86_sse4a_movnt_ss : GCCBuiltin<"__builtin_ia32_movntss">,
+    Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>;
+  def int_x86_sse4a_movnt_sd : GCCBuiltin<"__builtin_ia32_movntsd">,
+    Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX
+
+// Arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Horizontal ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector permutation
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                  llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                  llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vpermilvar_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpermilvar_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vperm2f128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vperm2f128_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vperm2f128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vperm2f128_si_256 :
+        GCCBuiltin<"__builtin_ia32_vperm2f128_si256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                  llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector dot product
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector compare
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_cmp_pd_256 : GCCBuiltin<"__builtin_ia32_cmppd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_cmp_ps_256 : GCCBuiltin<"__builtin_ia32_cmpps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector extract and insert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vextractf128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vextractf128_pd256">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vextractf128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vextractf128_ps256">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vextractf128_si_256 :
+        GCCBuiltin<"__builtin_ia32_vextractf128_si256">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vinsertf128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vinsertf128_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vinsertf128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vinsertf128_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vinsertf128_si_256 :
+        GCCBuiltin<"__builtin_ia32_vinsertf128_si256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                  llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector convert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector bit test
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">,
+        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                  llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">,
+        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                  llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">,
+        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                  llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                  llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                  llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                  llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+                  llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+                  llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+                  llvm_v4i64_ty], [IntrNoMem]>;
+}
+
+// Vector extract sign mask
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector zero
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">,
+        Intrinsic<[], [], []>;
+  def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">,
+        Intrinsic<[], [], []>;
+}
+
+// Vector load with broadcast
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vbroadcast_ss :
+        GCCBuiltin<"__builtin_ia32_vbroadcastss">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_x86_avx_vbroadcast_sd_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_x86_avx_vbroadcast_ss_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_x86_avx_vbroadcastf128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_x86_avx_vbroadcastf128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+}
+
+// SIMD load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_storeu_pd_256 : GCCBuiltin<"__builtin_ia32_storeupd256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
+  def int_x86_avx_storeu_ps_256 : GCCBuiltin<"__builtin_ia32_storeups256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
+  def int_x86_avx_storeu_dq_256 : GCCBuiltin<"__builtin_ia32_storedqu256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], [IntrReadWriteArgMem]>;
+}
+
+// Conditional load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty],
+                  [IntrReadArgMem]>;
+  def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty],
+                  [IntrReadArgMem]>;
+  def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty],
+                  [IntrReadArgMem]>;
+  def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
+                  [IntrReadArgMem]>;
+}
+
+// Conditional store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
+  def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+  def int_x86_avx_maskstore_pd_256 :
+        GCCBuiltin<"__builtin_ia32_maskstorepd256">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
+  def int_x86_avx_maskstore_ps_256 :
+        GCCBuiltin<"__builtin_ia32_maskstoreps256">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX2
+
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+}
+
+// Vector min, max
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+}
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Pack ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+}
+
+// Absolute value ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+}
+
+// Horizontal arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+}
+
+// Sign ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+}
+
+// Packed multiply high with round and scale
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+}
+
+// Vector sign and zero extend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector load with broadcast
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_vbroadcast_ss_ps :
+              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx2_vbroadcast_sd_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx2_vbroadcast_ss_ps_256 :
+              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx2_vbroadcasti128 :
+              GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_x86_avx2_pbroadcastb_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastb_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastw_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastw_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastd_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastd_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastq_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastq_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+}
+
+// Vector permutation
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector extract and insert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Conditional load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
+                  [IntrReadArgMem]>;
+  def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
+                  [IntrReadArgMem]>;
+  def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
+                  [IntrReadArgMem]>;
+  def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
+                  [IntrReadArgMem]>;
+}
+
+// Conditional store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                  [IntrReadWriteArgMem]>;
+  def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+                  [IntrReadWriteArgMem]>;
+  def int_x86_avx2_maskstore_d_256 :
+        GCCBuiltin<"__builtin_ia32_maskstored256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+                  [IntrReadWriteArgMem]>;
+  def int_x86_avx2_maskstore_q_256 :
+        GCCBuiltin<"__builtin_ia32_maskstoreq256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+                  [IntrReadWriteArgMem]>;
+}
+
+// Variable bit shift ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+}
+
+// Gather ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
+      Intrinsic<[llvm_v2f64_ty],
+        [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
+      Intrinsic<[llvm_v4f64_ty],
+        [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
+      Intrinsic<[llvm_v2f64_ty],
+        [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
+      Intrinsic<[llvm_v4f64_ty],
+        [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
+      Intrinsic<[llvm_v4f32_ty],
+        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
+      Intrinsic<[llvm_v8f32_ty],
+        [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
+      Intrinsic<[llvm_v4f32_ty],
+        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
+      Intrinsic<[llvm_v4f32_ty],
+        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+
+  def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
+      Intrinsic<[llvm_v2i64_ty],
+        [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
+      Intrinsic<[llvm_v4i64_ty],
+        [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
+      Intrinsic<[llvm_v2i64_ty],
+        [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
+      Intrinsic<[llvm_v4i64_ty],
+        [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
+      Intrinsic<[llvm_v4i32_ty],
+        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
+      Intrinsic<[llvm_v8i32_ty],
+        [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
+      Intrinsic<[llvm_v4i32_ty],
+        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+  def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
+      Intrinsic<[llvm_v4i32_ty],
+        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+        [IntrReadMem]>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
+              Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+                         llvm_i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// FMA3 and FMA4
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmaddsub_ps_256 :
+               GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmaddsub_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsubadd_ps_256 :
+              GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma_vfmsubadd_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// XOP
+
+  def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                                          llvm_v2f64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vpermil2pd_256 :
+              GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+                                          llvm_v4f64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                                          llvm_v4f32_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpermil2ps_256 :
+              GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+                                          llvm_v8f32_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_xop_vpcmov :
+              GCCBuiltin<"__builtin_ia32_vpcmov">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcmov_256 :
+              GCCBuiltin<"__builtin_ia32_vpcmov_256">,
+              Intrinsic<[llvm_v4i64_ty],
+                        [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_xop_vphaddbd :
+              GCCBuiltin<"__builtin_ia32_vphaddbd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddbq :
+              GCCBuiltin<"__builtin_ia32_vphaddbq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddbw :
+              GCCBuiltin<"__builtin_ia32_vphaddbw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphadddq :
+              GCCBuiltin<"__builtin_ia32_vphadddq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddubd :
+              GCCBuiltin<"__builtin_ia32_vphaddubd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddubq :
+              GCCBuiltin<"__builtin_ia32_vphaddubq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddubw :
+              GCCBuiltin<"__builtin_ia32_vphaddubw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddudq :
+              GCCBuiltin<"__builtin_ia32_vphaddudq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_xop_vphadduwd :
+              GCCBuiltin<"__builtin_ia32_vphadduwd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphadduwq :
+              GCCBuiltin<"__builtin_ia32_vphadduwq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddwd :
+              GCCBuiltin<"__builtin_ia32_vphaddwd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddwq :
+              GCCBuiltin<"__builtin_ia32_vphaddwq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphsubbw :
+              GCCBuiltin<"__builtin_ia32_vphsubbw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphsubdq :
+              GCCBuiltin<"__builtin_ia32_vphsubdq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_xop_vphsubwd :
+              GCCBuiltin<"__builtin_ia32_vphsubwd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vpmacsdd :
+              GCCBuiltin<"__builtin_ia32_vpmacsdd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsdqh :
+              GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsdql :
+              GCCBuiltin<"__builtin_ia32_vpmacsdql">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssdd :
+              GCCBuiltin<"__builtin_ia32_vpmacssdd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssdqh :
+              GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssdql :
+              GCCBuiltin<"__builtin_ia32_vpmacssdql">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsswd :
+              GCCBuiltin<"__builtin_ia32_vpmacsswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssww :
+              GCCBuiltin<"__builtin_ia32_vpmacssww">,
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacswd :
+              GCCBuiltin<"__builtin_ia32_vpmacswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsww :
+              GCCBuiltin<"__builtin_ia32_vpmacsww">,
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmadcsswd :
+              GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmadcswd :
+              GCCBuiltin<"__builtin_ia32_vpmadcswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpperm :
+              GCCBuiltin<"__builtin_ia32_vpperm">,
+              Intrinsic<[llvm_v16i8_ty],
+                        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vpshab :
+              GCCBuiltin<"__builtin_ia32_vpshab">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshad :
+              GCCBuiltin<"__builtin_ia32_vpshad">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshaq :
+              GCCBuiltin<"__builtin_ia32_vpshaq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshaw :
+              GCCBuiltin<"__builtin_ia32_vpshaw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshlb :
+              GCCBuiltin<"__builtin_ia32_vpshlb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshld :
+              GCCBuiltin<"__builtin_ia32_vpshld">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshlq :
+              GCCBuiltin<"__builtin_ia32_vpshlq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshlw :
+              GCCBuiltin<"__builtin_ia32_vpshlw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+
+//===----------------------------------------------------------------------===//
+// MMX
+
+// Empty MMX state op.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_emms  : GCCBuiltin<"__builtin_ia32_emms">,
+              Intrinsic<[], [], []>;
+  def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">,
+              Intrinsic<[], [], []>;
+}
+
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // Addition
+  def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Subtraction
+  def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  // Multiplication
+  def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Bitwise operations
+  def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+
+  // Averages
+  def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Maximum
+  def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Minimum
+  def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Packed sum of absolute differences
+  def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+}
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // Shift left logical
+  def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Pack ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// Unpacking ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+}
+
+// Integer comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
+              Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
+
+  def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
+              Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
+
+  def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                        llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// BMI
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">,
+              Intrinsic<[llvm_i32_ty], []>;
+  def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">,
+              Intrinsic<[llvm_i32_ty], []>;
+  def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">,
+              Intrinsic<[llvm_i64_ty], []>;
+  def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">,
+              Intrinsic<[llvm_i64_ty], []>;
+  def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">,
+              Intrinsic<[], [llvm_i32_ty]>;
+  def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">,
+              Intrinsic<[], [llvm_i32_ty]>;
+  def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">,
+              Intrinsic<[], [llvm_i64_ty]>;
+  def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">,
+              Intrinsic<[], [llvm_i64_ty]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Half float conversion
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// RDRAND intrinsics - Return a random value and whether it is valid.
+// RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and
+// whether it is valid.
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // These are declared side-effecting so they don't get eliminated by CSE or
+  // LICM.
+  def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdseed_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdseed_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
+  def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// RTM intrinsics. Transactional Memory support.
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">,
+              Intrinsic<[llvm_i32_ty], [], []>;
+  def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
+              Intrinsic<[], [], []>;
+  def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
+              Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>;
+  def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">,
+              Intrinsic<[llvm_i32_ty], [], []>;
+}
diff --git a/include/llvm/IntrinsicsXCore.td b/include/llvm/IR/IntrinsicsXCore.td
index a4813135da8d..a4813135da8d 100644
--- a/include/llvm/IntrinsicsXCore.td
+++ b/include/llvm/IR/IntrinsicsXCore.td
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
new file mode 100644
index 000000000000..ae81e5b1c3bc
--- /dev/null
+++ b/include/llvm/IR/LLVMContext.h
@@ -0,0 +1,114 @@
+//===-- llvm/LLVMContext.h - Class for managing "global" state --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares LLVMContext, a container of "global" state in LLVM, such
+// as the global type and constant uniquing tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_LLVMCONTEXT_H
+#define LLVM_IR_LLVMCONTEXT_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class LLVMContextImpl;
+class StringRef;
+class Twine;
+class Instruction;
+class Module;
+class SMDiagnostic;
+template <typename T> class SmallVectorImpl;
+
+/// This is an important class for using LLVM in a threaded context.  It
+/// (opaquely) owns and manages the core "global" data of LLVM's core 
+/// infrastructure, including the type and constant uniquing tables.
+/// LLVMContext itself provides no locking guarantees, so you should be careful
+/// to have one context per thread.
+class LLVMContext {
+public:
+  LLVMContextImpl *const pImpl;
+  LLVMContext();
+  ~LLVMContext();
+  
+  // Pinned metadata names, which always have the same value.  This is a
+  // compile-time performance optimization, not a correctness optimization.
+  enum {
+    MD_dbg = 0,  // "dbg"
+    MD_tbaa = 1, // "tbaa"
+    MD_prof = 2,  // "prof"
+    MD_fpmath = 3,  // "fpmath"
+    MD_range = 4, // "range"
+    MD_tbaa_struct = 5, // "tbaa.struct"
+    MD_invariant_load = 6 // "invariant.load"
+  };
+  
+  /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+  /// This ID is uniqued across modules in the current LLVMContext.
+  unsigned getMDKindID(StringRef Name) const;
+  
+  /// getMDKindNames - Populate client supplied SmallVector with the name for
+  /// custom metadata IDs registered in this LLVMContext.
+  void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
+  
+  
+  typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
+                                         unsigned LocCookie);
+  
+  /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked
+  /// when problems with inline asm are detected by the backend.  The first
+  /// argument is a function pointer and the second is a context pointer that
+  /// gets passed into the DiagHandler.
+  ///
+  /// LLVMContext doesn't take ownership or interpret either of these
+  /// pointers.
+  void setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                                     void *DiagContext = 0);
+
+  /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+  /// setInlineAsmDiagnosticHandler.
+  InlineAsmDiagHandlerTy getInlineAsmDiagnosticHandler() const;
+
+  /// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+  /// setInlineAsmDiagnosticHandler.
+  void *getInlineAsmDiagnosticContext() const;
+  
+  
+  /// emitError - Emit an error message to the currently installed error handler
+  /// with optional location information.  This function returns, so code should
+  /// be prepared to drop the erroneous construct on the floor and "not crash".
+  /// The generated code need not be correct.  The error message will be
+  /// implicitly prefixed with "error: " and should not end with a ".".
+  void emitError(unsigned LocCookie, const Twine &ErrorStr);
+  void emitError(const Instruction *I, const Twine &ErrorStr);
+  void emitError(const Twine &ErrorStr);
+
+private:
+  LLVMContext(LLVMContext&) LLVM_DELETED_FUNCTION;
+  void operator=(LLVMContext&) LLVM_DELETED_FUNCTION;
+
+  /// addModule - Register a module as being instantiated in this context.  If
+  /// the context is deleted, the module will be deleted as well.
+  void addModule(Module*);
+  
+  /// removeModule - Unregister a module from this context.
+  void removeModule(Module*);
+  
+  // Module needs access to the add/removeModule methods.
+  friend class Module;
+};
+
+/// getGlobalContext - Returns a global context.  This is for LLVM clients that
+/// only care about operating on a single thread.
+extern LLVMContext &getGlobalContext();
+
+}
+
+#endif
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
new file mode 100644
index 000000000000..a1e3fb1966ed
--- /dev/null
+++ b/include/llvm/IR/MDBuilder.h
@@ -0,0 +1,186 @@
+//===---- llvm/MDBuilder.h - Builder for LLVM metadata ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MDBuilder class, which is used as a convenient way to
+// create LLVM metadata with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_MDBUILDER_H
+#define LLVM_IR_MDBUILDER_H
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Metadata.h"
+
+namespace llvm {
+
+class APInt;
+class LLVMContext;
+
+class MDBuilder {
+  LLVMContext &Context;
+
+public:
+  MDBuilder(LLVMContext &context) : Context(context) {}
+
+  /// \brief Return the given string as metadata.
+  MDString *createString(StringRef Str) {
+    return MDString::get(Context, Str);
+  }
+
+  //===------------------------------------------------------------------===//
+  // FPMath metadata.
+  //===------------------------------------------------------------------===//
+
+  /// \brief Return metadata with the given settings.  The special value 0.0
+  /// for the Accuracy parameter indicates the default (maximal precision)
+  /// setting.
+  MDNode *createFPMath(float Accuracy) {
+    if (Accuracy == 0.0)
+      return 0;
+    assert(Accuracy > 0.0 && "Invalid fpmath accuracy!");
+    Value *Op = ConstantFP::get(Type::getFloatTy(Context), Accuracy);
+    return MDNode::get(Context, Op);
+  }
+
+  //===------------------------------------------------------------------===//
+  // Prof metadata.
+  //===------------------------------------------------------------------===//
+
+  /// \brief Return metadata containing two branch weights.
+  MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight) {
+    uint32_t Weights[] = { TrueWeight, FalseWeight };
+    return createBranchWeights(Weights);
+  }
+
+  /// \brief Return metadata containing a number of branch weights.
+  MDNode *createBranchWeights(ArrayRef<uint32_t> Weights) {
+    assert(Weights.size() >= 2 && "Need at least two branch weights!");
+
+    SmallVector<Value *, 4> Vals(Weights.size()+1);
+    Vals[0] = createString("branch_weights");
+
+    Type *Int32Ty = Type::getInt32Ty(Context);
+    for (unsigned i = 0, e = Weights.size(); i != e; ++i)
+      Vals[i+1] = ConstantInt::get(Int32Ty, Weights[i]);
+
+    return MDNode::get(Context, Vals);
+  }
+
+  //===------------------------------------------------------------------===//
+  // Range metadata.
+  //===------------------------------------------------------------------===//
+
+  /// \brief Return metadata describing the range [Lo, Hi).
+  MDNode *createRange(const APInt &Lo, const APInt &Hi) {
+    assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!");
+    // If the range is everything then it is useless.
+    if (Hi == Lo)
+      return 0;
+
+    // Return the range [Lo, Hi).
+    Type *Ty = IntegerType::get(Context, Lo.getBitWidth());
+    Value *Range[2] = { ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi) };
+    return MDNode::get(Context, Range);
+  }
+
+
+  //===------------------------------------------------------------------===//
+  // TBAA metadata.
+  //===------------------------------------------------------------------===//
+
+  /// \brief Return metadata appropriate for a TBAA root node.  Each returned
+  /// node is distinct from all other metadata and will never be identified
+  /// (uniqued) with anything else.
+  MDNode *createAnonymousTBAARoot() {
+    // To ensure uniqueness the root node is self-referential.
+    MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef<Value*>());
+    MDNode *Root = MDNode::get(Context, Dummy);
+    // At this point we have
+    //   !0 = metadata !{}            <- dummy
+    //   !1 = metadata !{metadata !0} <- root
+    // Replace the dummy operand with the root node itself and delete the dummy.
+    Root->replaceOperandWith(0, Root);
+    MDNode::deleteTemporary(Dummy);
+    // We now have
+    //   !1 = metadata !{metadata !1} <- self-referential root
+    return Root;
+  }
+
+  /// \brief Return metadata appropriate for a TBAA root node with the given
+  /// name.  This may be identified (uniqued) with other roots with the same
+  /// name.
+  MDNode *createTBAARoot(StringRef Name) {
+    return MDNode::get(Context, createString(Name));
+  }
+
+  /// \brief Return metadata for a non-root TBAA node with the given name,
+  /// parent in the TBAA tree, and value for 'pointsToConstantMemory'.
+  MDNode *createTBAANode(StringRef Name, MDNode *Parent,
+                         bool isConstant = false) {
+    if (isConstant) {
+      Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1);
+      Value *Ops[3] = { createString(Name), Parent, Flags };
+      return MDNode::get(Context, Ops);
+    } else {
+      Value *Ops[2] = { createString(Name), Parent };
+      return MDNode::get(Context, Ops);
+    }
+  }
+
+  struct TBAAStructField {
+    uint64_t Offset;
+    uint64_t Size;
+    MDNode *TBAA;
+    TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *TBAA) :
+      Offset(Offset), Size(Size), TBAA(TBAA) {}
+  };
+
+  /// \brief Return metadata for a tbaa.struct node with the given
+  /// struct field descriptions.
+  MDNode *createTBAAStructNode(ArrayRef<TBAAStructField> Fields) {
+    SmallVector<Value *, 4> Vals(Fields.size() * 3);
+    Type *Int64 = IntegerType::get(Context, 64);
+    for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
+      Vals[i * 3 + 0] = ConstantInt::get(Int64, Fields[i].Offset);
+      Vals[i * 3 + 1] = ConstantInt::get(Int64, Fields[i].Size);
+      Vals[i * 3 + 2] = Fields[i].TBAA;
+    }
+    return MDNode::get(Context, Vals);
+  }
+
+  /// \brief Return metadata for a TBAA struct node in the type DAG
+  /// with the given name, parents in the TBAA DAG.
+  MDNode *createTBAAStructTypeNode(StringRef Name,
+             ArrayRef<std::pair<uint64_t, MDNode*> > Fields) {
+    SmallVector<Value *, 4> Ops(Fields.size() * 2 + 1);
+    Type *Int64 = IntegerType::get(Context, 64);
+    Ops[0] = createString(Name);
+    for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
+      Ops[i * 2 + 1] = ConstantInt::get(Int64, Fields[i].first);
+      Ops[i * 2 + 2] = Fields[i].second;
+    }
+    return MDNode::get(Context, Ops);
+  }
+
+  /// \brief Return metadata for a TBAA tag node with the given
+  /// base type, access type and offset relative to the base type.
+  MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
+                                  uint64_t Offset) {
+    Type *Int64 = IntegerType::get(Context, 64);
+    Value *Ops[3] = { BaseType, AccessType, ConstantInt::get(Int64, Offset) };
+    return MDNode::get(Context, Ops);
+  }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
new file mode 100644
index 000000000000..8c2cfac235d2
--- /dev/null
+++ b/include/llvm/IR/Metadata.h
@@ -0,0 +1,242 @@
+//===-- llvm/Metadata.h - Metadata definitions ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations for metadata subclasses.
+/// They represent the different flavors of metadata that live in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_METADATA_H
+#define LLVM_IR_METADATA_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+class Constant;
+class Instruction;
+class LLVMContext;
+class Module;
+template <typename T> class SmallVectorImpl;
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+
+//===----------------------------------------------------------------------===//
+/// MDString - a single uniqued string.
+/// These are used to efficiently contain a byte sequence for metadata.
+/// MDString is always unnamed.
+class MDString : public Value {
+  virtual void anchor();
+  MDString(const MDString &) LLVM_DELETED_FUNCTION;
+
+  explicit MDString(LLVMContext &C);
+public:
+  static MDString *get(LLVMContext &Context, StringRef Str);
+  static MDString *get(LLVMContext &Context, const char *Str) {
+    return get(Context, Str ? StringRef(Str) : StringRef());
+  }
+
+  StringRef getString() const { return getName(); }
+
+  unsigned getLength() const { return (unsigned)getName().size(); }
+
+  typedef StringRef::iterator iterator;
+
+  /// begin() - Pointer to the first byte of the string.
+  iterator begin() const { return getName().begin(); }
+
+  /// end() - Pointer to one byte past the end of the string.
+  iterator end() const { return getName().end(); }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDStringVal;
+  }
+};
+
+
+class MDNodeOperand;
+
+//===----------------------------------------------------------------------===//
+/// MDNode - a tuple of other values.
+class MDNode : public Value, public FoldingSetNode {
+  MDNode(const MDNode &) LLVM_DELETED_FUNCTION;
+  void operator=(const MDNode &) LLVM_DELETED_FUNCTION;
+  friend class MDNodeOperand;
+  friend class LLVMContextImpl;
+  friend struct FoldingSetTrait<MDNode>;
+
+  /// Hash - If the MDNode is uniqued cache the hash to speed up lookup.
+  unsigned Hash;
+
+  /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the
+  /// end of this MDNode.
+  unsigned NumOperands;
+
+  // Subclass data enums.
+  enum {
+    /// FunctionLocalBit - This bit is set if this MDNode is function local.
+    /// This is true when it (potentially transitively) contains a reference to
+    /// something in a function, like an argument, basicblock, or instruction.
+    FunctionLocalBit = 1 << 0,
+
+    /// NotUniquedBit - This is set on MDNodes that are not uniqued because they
+    /// have a null operand.
+    NotUniquedBit    = 1 << 1,
+
+    /// DestroyFlag - This bit is set by destroy() so the destructor can assert
+    /// that the node isn't being destroyed with a plain 'delete'.
+    DestroyFlag      = 1 << 2
+  };
+
+  // FunctionLocal enums.
+  enum FunctionLocalness {
+    FL_Unknown = -1,
+    FL_No = 0,
+    FL_Yes = 1
+  };
+
+  /// replaceOperand - Replace each instance of F from the operand list of this
+  /// node with T.
+  void replaceOperand(MDNodeOperand *Op, Value *NewVal);
+  ~MDNode();
+
+  MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal);
+
+  static MDNode *getMDNode(LLVMContext &C, ArrayRef<Value*> Vals,
+                           FunctionLocalness FL, bool Insert = true);
+public:
+  // Constructors and destructors.
+  static MDNode *get(LLVMContext &Context, ArrayRef<Value*> Vals);
+  // getWhenValsUnresolved - Construct MDNode determining function-localness
+  // from isFunctionLocal argument, not by analyzing Vals.
+  static MDNode *getWhenValsUnresolved(LLVMContext &Context,
+                                       ArrayRef<Value*> Vals,
+                                       bool isFunctionLocal);
+
+  static MDNode *getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals);
+
+  /// getTemporary - Return a temporary MDNode, for use in constructing
+  /// cyclic MDNode structures. A temporary MDNode is not uniqued,
+  /// may be RAUW'd, and must be manually deleted with deleteTemporary.
+  static MDNode *getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals);
+
+  /// deleteTemporary - Deallocate a node created by getTemporary. The
+  /// node must not have any users.
+  static void deleteTemporary(MDNode *N);
+
+  /// replaceOperandWith - Replace a specific operand.
+  void replaceOperandWith(unsigned i, Value *NewVal);
+
+  /// getOperand - Return specified operand.
+  Value *getOperand(unsigned i) const;
+
+  /// getNumOperands - Return number of MDNode operands.
+  unsigned getNumOperands() const { return NumOperands; }
+
+  /// isFunctionLocal - Return whether MDNode is local to a function.
+  bool isFunctionLocal() const {
+    return (getSubclassDataFromValue() & FunctionLocalBit) != 0;
+  }
+
+  // getFunction - If this metadata is function-local and recursively has a
+  // function-local operand, return the first such operand's parent function.
+  // Otherwise, return null. getFunction() should not be used for performance-
+  // critical code because it recursively visits all the MDNode's operands.
+  const Function *getFunction() const;
+
+  /// Profile - calculate a unique identifier for this MDNode to collapse
+  /// duplicates
+  void Profile(FoldingSetNodeID &ID) const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDNodeVal;
+  }
+
+  /// Methods for metadata merging.
+  static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B);
+  static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);
+  static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
+private:
+  // destroy - Delete this node.  Only when there are no uses.
+  void destroy();
+
+  bool isNotUniqued() const {
+    return (getSubclassDataFromValue() & NotUniquedBit) != 0;
+  }
+  void setIsNotUniqued();
+
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // any future subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// NamedMDNode - a tuple of MDNodes. Despite its name, a NamedMDNode isn't
+/// itself an MDNode. NamedMDNodes belong to modules, have names, and contain
+/// lists of MDNodes.
+class NamedMDNode : public ilist_node<NamedMDNode> {
+  friend class SymbolTableListTraits<NamedMDNode, Module>;
+  friend struct ilist_traits<NamedMDNode>;
+  friend class LLVMContextImpl;
+  friend class Module;
+  NamedMDNode(const NamedMDNode &) LLVM_DELETED_FUNCTION;
+
+  std::string Name;
+  Module *Parent;
+  void *Operands; // SmallVector<TrackingVH<MDNode>, 4>
+
+  void setParent(Module *M) { Parent = M; }
+
+  explicit NamedMDNode(const Twine &N);
+
+public:
+  /// eraseFromParent - Drop all references and remove the node from parent
+  /// module.
+  void eraseFromParent();
+
+  /// dropAllReferences - Remove all uses and clear node vector.
+  void dropAllReferences();
+
+  /// ~NamedMDNode - Destroy NamedMDNode.
+  ~NamedMDNode();
+
+  /// getParent - Get the module that holds this named metadata collection.
+  inline Module *getParent() { return Parent; }
+  inline const Module *getParent() const { return Parent; }
+
+  /// getOperand - Return specified operand.
+  MDNode *getOperand(unsigned i) const;
+
+  /// getNumOperands - Return the number of NamedMDNode operands.
+  unsigned getNumOperands() const;
+
+  /// addOperand - Add metadata operand.
+  void addOperand(MDNode *M);
+
+  /// getName - Return a constant reference to this named metadata's name.
+  StringRef getName() const;
+
+  /// print - Implement operator<< on NamedMDNode.
+  void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const;
+
+  /// dump() - Allow printing of NamedMDNodes from the debugger.
+  void dump() const;
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
new file mode 100644
index 000000000000..4460aa435b94
--- /dev/null
+++ b/include/llvm/IR/Module.h
@@ -0,0 +1,589 @@
+//===-- llvm/Module.h - C++ class to represent a VM module ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// Module.h This file contains the declarations for the Module class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_MODULE_H
+#define LLVM_IR_MODULE_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class FunctionType;
+class GVMaterializer;
+class LLVMContext;
+class StructType;
+template<typename T> struct DenseMapInfo;
+template<typename KeyT, typename ValueT, typename KeyInfoT> class DenseMap;
+
+template<> struct ilist_traits<Function>
+  : public SymbolTableListTraits<Function, Module> {
+
+  // createSentinel is used to get hold of the node that marks the end of the
+  // list... (same trick used here as in ilist_traits<Instruction>)
+  Function *createSentinel() const {
+    return static_cast<Function*>(&Sentinel);
+  }
+  static void destroySentinel(Function*) {}
+
+  Function *provideInitialHead() const { return createSentinel(); }
+  Function *ensureHead(Function*) const { return createSentinel(); }
+  static void noteHead(Function*, Function*) {}
+
+private:
+  mutable ilist_node<Function> Sentinel;
+};
+
+template<> struct ilist_traits<GlobalVariable>
+  : public SymbolTableListTraits<GlobalVariable, Module> {
+  // createSentinel is used to create a node that marks the end of the list.
+  GlobalVariable *createSentinel() const {
+    return static_cast<GlobalVariable*>(&Sentinel);
+  }
+  static void destroySentinel(GlobalVariable*) {}
+
+  GlobalVariable *provideInitialHead() const { return createSentinel(); }
+  GlobalVariable *ensureHead(GlobalVariable*) const { return createSentinel(); }
+  static void noteHead(GlobalVariable*, GlobalVariable*) {}
+private:
+  mutable ilist_node<GlobalVariable> Sentinel;
+};
+
+template<> struct ilist_traits<GlobalAlias>
+  : public SymbolTableListTraits<GlobalAlias, Module> {
+  // createSentinel is used to create a node that marks the end of the list.
+  GlobalAlias *createSentinel() const {
+    return static_cast<GlobalAlias*>(&Sentinel);
+  }
+  static void destroySentinel(GlobalAlias*) {}
+
+  GlobalAlias *provideInitialHead() const { return createSentinel(); }
+  GlobalAlias *ensureHead(GlobalAlias*) const { return createSentinel(); }
+  static void noteHead(GlobalAlias*, GlobalAlias*) {}
+private:
+  mutable ilist_node<GlobalAlias> Sentinel;
+};
+
+template<> struct ilist_traits<NamedMDNode>
+  : public ilist_default_traits<NamedMDNode> {
+  // createSentinel is used to get hold of a node that marks the end of
+  // the list...
+  NamedMDNode *createSentinel() const {
+    return static_cast<NamedMDNode*>(&Sentinel);
+  }
+  static void destroySentinel(NamedMDNode*) {}
+
+  NamedMDNode *provideInitialHead() const { return createSentinel(); }
+  NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
+  static void noteHead(NamedMDNode*, NamedMDNode*) {}
+  void addNodeToList(NamedMDNode *) {}
+  void removeNodeFromList(NamedMDNode *) {}
+private:
+  mutable ilist_node<NamedMDNode> Sentinel;
+};
+
+/// A Module instance is used to store all the information related to an
+/// LLVM module. Modules are the top level container of all other LLVM
+/// Intermediate Representation (IR) objects. Each module directly contains a
+/// list of globals variables, a list of functions, a list of libraries (or
+/// other modules) this module depends on, a symbol table, and various data
+/// about the target's characteristics.
+///
+/// A module maintains a GlobalValRefMap object that is used to hold all
+/// constant references to global variables in the module.  When a global
+/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
+/// @brief The main container class for the LLVM Intermediate Representation.
+class Module {
+/// @name Types And Enumerations
+/// @{
+public:
+  /// The type for the list of global variables.
+  typedef iplist<GlobalVariable> GlobalListType;
+  /// The type for the list of functions.
+  typedef iplist<Function> FunctionListType;
+  /// The type for the list of aliases.
+  typedef iplist<GlobalAlias> AliasListType;
+  /// The type for the list of named metadata.
+  typedef ilist<NamedMDNode> NamedMDListType;
+
+  /// The Global Variable iterator.
+  typedef GlobalListType::iterator                      global_iterator;
+  /// The Global Variable constant iterator.
+  typedef GlobalListType::const_iterator          const_global_iterator;
+
+  /// The Function iterators.
+  typedef FunctionListType::iterator                           iterator;
+  /// The Function constant iterator
+  typedef FunctionListType::const_iterator               const_iterator;
+
+  /// The Global Alias iterators.
+  typedef AliasListType::iterator                        alias_iterator;
+  /// The Global Alias constant iterator
+  typedef AliasListType::const_iterator            const_alias_iterator;
+
+  /// The named metadata iterators.
+  typedef NamedMDListType::iterator             named_metadata_iterator;
+  /// The named metadata constant interators.
+  typedef NamedMDListType::const_iterator const_named_metadata_iterator;
+
+  /// An enumeration for describing the endianess of the target machine.
+  enum Endianness  { AnyEndianness, LittleEndian, BigEndian };
+
+  /// An enumeration for describing the size of a pointer on the target machine.
+  enum PointerSize { AnyPointerSize, Pointer32, Pointer64 };
+
+  /// This enumeration defines the supported behaviors of module flags.
+  enum ModFlagBehavior {
+    /// Emits an error if two values disagree, otherwise the resulting value is
+    /// that of the operands.
+    Error = 1,
+
+    /// Emits a warning if two values disagree. The result value will be the
+    /// operand for the flag from the first module being linked.
+    Warning  = 2,
+
+    /// Adds a requirement that another module flag be present and have a
+    /// specified value after linking is performed. The value must be a metadata
+    /// pair, where the first element of the pair is the ID of the module flag
+    /// to be restricted, and the second element of the pair is the value the
+    /// module flag should be restricted to. This behavior can be used to
+    /// restrict the allowable results (via triggering of an error) of linking
+    /// IDs with the **Override** behavior.
+    Require = 3,
+
+    /// Uses the specified value, regardless of the behavior or value of the
+    /// other module. If both modules specify **Override**, but the values
+    /// differ, an error will be emitted.
+    Override = 4,
+
+    /// Appends the two values, which are required to be metadata nodes.
+    Append = 5,
+
+    /// Appends the two values, which are required to be metadata
+    /// nodes. However, duplicate entries in the second list are dropped
+    /// during the append operation.
+    AppendUnique = 6
+  };
+
+  struct ModuleFlagEntry {
+    ModFlagBehavior Behavior;
+    MDString *Key;
+    Value *Val;
+    ModuleFlagEntry(ModFlagBehavior B, MDString *K, Value *V)
+      : Behavior(B), Key(K), Val(V) {}
+  };
+
+/// @}
+/// @name Member Variables
+/// @{
+private:
+  LLVMContext &Context;           ///< The LLVMContext from which types and
+                                  ///< constants are allocated.
+  GlobalListType GlobalList;      ///< The Global Variables in the module
+  FunctionListType FunctionList;  ///< The Functions in the module
+  AliasListType AliasList;        ///< The Aliases in the module
+  NamedMDListType NamedMDList;    ///< The named metadata in the module
+  std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
+  ValueSymbolTable *ValSymTab;    ///< Symbol table for values
+  OwningPtr<GVMaterializer> Materializer;  ///< Used to materialize GlobalValues
+  std::string ModuleID;           ///< Human readable identifier for the module
+  std::string TargetTriple;       ///< Platform target triple Module compiled on
+  std::string DataLayout;         ///< Target data description
+  void *NamedMDSymTab;            ///< NamedMDNode names.
+
+  friend class Constant;
+
+/// @}
+/// @name Constructors
+/// @{
+public:
+  /// The Module constructor. Note that there is no default constructor. You
+  /// must provide a name for the module upon construction.
+  explicit Module(StringRef ModuleID, LLVMContext& C);
+  /// The module destructor. This will dropAllReferences.
+  ~Module();
+
+/// @}
+/// @name Module Level Accessors
+/// @{
+
+  /// Get the module identifier which is, essentially, the name of the module.
+  /// @returns the module identifier as a string
+  const std::string &getModuleIdentifier() const { return ModuleID; }
+
+  /// Get the data layout string for the module's target platform.  This encodes
+  /// the type sizes and alignments expected by this module.
+  /// @returns the data layout as a string
+  const std::string &getDataLayout() const { return DataLayout; }
+
+  /// Get the target triple which is a string describing the target host.
+  /// @returns a string containing the target triple.
+  const std::string &getTargetTriple() const { return TargetTriple; }
+
+  /// Get the target endian information.
+  /// @returns Endianess - an enumeration for the endianess of the target
+  Endianness getEndianness() const;
+
+  /// Get the target pointer size.
+  /// @returns PointerSize - an enumeration for the size of the target's pointer
+  PointerSize getPointerSize() const;
+
+  /// Get the global data context.
+  /// @returns LLVMContext - a container for LLVM's global information
+  LLVMContext &getContext() const { return Context; }
+
+  /// Get any module-scope inline assembly blocks.
+  /// @returns a string containing the module-scope inline assembly blocks.
+  const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }
+
+/// @}
+/// @name Module Level Mutators
+/// @{
+
+  /// Set the module identifier.
+  void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
+
+  /// Set the data layout
+  void setDataLayout(StringRef DL) { DataLayout = DL; }
+
+  /// Set the target triple.
+  void setTargetTriple(StringRef T) { TargetTriple = T; }
+
+  /// Set the module-scope inline assembly blocks.
+  void setModuleInlineAsm(StringRef Asm) {
+    GlobalScopeAsm = Asm;
+    if (!GlobalScopeAsm.empty() &&
+        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
+      GlobalScopeAsm += '\n';
+  }
+
+  /// Append to the module-scope inline assembly blocks, automatically inserting
+  /// a separating newline if necessary.
+  void appendModuleInlineAsm(StringRef Asm) {
+    GlobalScopeAsm += Asm;
+    if (!GlobalScopeAsm.empty() &&
+        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
+      GlobalScopeAsm += '\n';
+  }
+
+/// @}
+/// @name Generic Value Accessors
+/// @{
+
+  /// getNamedValue - Return the global value in the module with
+  /// the specified name, of arbitrary type.  This method returns null
+  /// if a global with the specified name is not found.
+  GlobalValue *getNamedValue(StringRef Name) const;
+
+  /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+  /// This ID is uniqued across modules in the current LLVMContext.
+  unsigned getMDKindID(StringRef Name) const;
+
+  /// getMDKindNames - Populate client supplied SmallVector with the name for
+  /// custom metadata IDs registered in this LLVMContext.
+  void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
+
+  
+  typedef DenseMap<StructType*, unsigned, DenseMapInfo<StructType*> >
+                   NumeredTypesMapTy;
+
+  /// getTypeByName - Return the type with the specified name, or null if there
+  /// is none by that name.
+  StructType *getTypeByName(StringRef Name) const;
+
+/// @}
+/// @name Function Accessors
+/// @{
+
+  /// getOrInsertFunction - Look up the specified function in the module symbol
+  /// table.  Four possibilities:
+  ///   1. If it does not exist, add a prototype for the function and return it.
+  ///   2. If it exists, and has a local linkage, the existing function is
+  ///      renamed and a new one is inserted.
+  ///   3. Otherwise, if the existing function has the correct prototype, return
+  ///      the existing function.
+  ///   4. Finally, the function exists but has the wrong prototype: return the
+  ///      function with a constantexpr cast to the right prototype.
+  Constant *getOrInsertFunction(StringRef Name, FunctionType *T,
+                                AttributeSet AttributeList);
+
+  Constant *getOrInsertFunction(StringRef Name, FunctionType *T);
+
+  /// getOrInsertFunction - Look up the specified function in the module symbol
+  /// table.  If it does not exist, add a prototype for the function and return
+  /// it.  This function guarantees to return a constant of pointer to the
+  /// specified function type or a ConstantExpr BitCast of that type if the
+  /// named function has a different type.  This version of the method takes a
+  /// null terminated list of function arguments, which makes it easier for
+  /// clients to use.
+  Constant *getOrInsertFunction(StringRef Name,
+                                AttributeSet AttributeList,
+                                Type *RetTy, ...)  END_WITH_NULL;
+
+  /// getOrInsertFunction - Same as above, but without the attributes.
+  Constant *getOrInsertFunction(StringRef Name, Type *RetTy, ...)
+    END_WITH_NULL;
+
+  Constant *getOrInsertTargetIntrinsic(StringRef Name,
+                                       FunctionType *Ty,
+                                       AttributeSet AttributeList);
+
+  /// getFunction - Look up the specified function in the module symbol table.
+  /// If it does not exist, return null.
+  Function *getFunction(StringRef Name) const;
+
+/// @}
+/// @name Global Variable Accessors
+/// @{
+
+  /// getGlobalVariable - Look up the specified global variable in the module
+  /// symbol table.  If it does not exist, return null. If AllowInternal is set
+  /// to true, this function will return types that have InternalLinkage. By
+  /// default, these types are not returned.
+  GlobalVariable *getGlobalVariable(StringRef Name,
+                                    bool AllowInternal = false) const;
+
+  /// getNamedGlobal - Return the global variable in the module with the
+  /// specified name, of arbitrary type.  This method returns null if a global
+  /// with the specified name is not found.
+  GlobalVariable *getNamedGlobal(StringRef Name) const {
+    return getGlobalVariable(Name, true);
+  }
+
+  /// getOrInsertGlobal - Look up the specified global in the module symbol
+  /// table.
+  ///   1. If it does not exist, add a declaration of the global and return it.
+  ///   2. Else, the global exists but has the wrong type: return the function
+  ///      with a constantexpr cast to the right type.
+  ///   3. Finally, if the existing global is the correct declaration, return
+  ///      the existing global.
+  Constant *getOrInsertGlobal(StringRef Name, Type *Ty);
+
+/// @}
+/// @name Global Alias Accessors
+/// @{
+
+  /// getNamedAlias - Return the global alias in the module with the
+  /// specified name, of arbitrary type.  This method returns null if a global
+  /// with the specified name is not found.
+  GlobalAlias *getNamedAlias(StringRef Name) const;
+
+/// @}
+/// @name Named Metadata Accessors
+/// @{
+
+  /// getNamedMetadata - Return the NamedMDNode in the module with the
+  /// specified name. This method returns null if a NamedMDNode with the
+  /// specified name is not found.
+  NamedMDNode *getNamedMetadata(const Twine &Name) const;
+
+  /// getOrInsertNamedMetadata - Return the named MDNode in the module
+  /// with the specified name. This method returns a new NamedMDNode if a
+  /// NamedMDNode with the specified name is not found.
+  NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
+
+  /// eraseNamedMetadata - Remove the given NamedMDNode from this module
+  /// and delete it.
+  void eraseNamedMetadata(NamedMDNode *NMD);
+
+/// @}
+/// @name Module Flags Accessors
+/// @{
+
+  /// getModuleFlagsMetadata - Returns the module flags in the provided vector.
+  void getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const;
+
+  /// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
+  /// represents module-level flags. This method returns null if there are no
+  /// module-level flags.
+  NamedMDNode *getModuleFlagsMetadata() const;
+
+  /// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module
+  /// that represents module-level flags. If module-level flags aren't found,
+  /// it creates the named metadata that contains them.
+  NamedMDNode *getOrInsertModuleFlagsMetadata();
+
+  /// addModuleFlag - Add a module-level flag to the module-level flags
+  /// metadata. It will create the module-level flags named metadata if it
+  /// doesn't already exist.
+  void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Value *Val);
+  void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val);
+  void addModuleFlag(MDNode *Node);
+
+/// @}
+/// @name Materialization
+/// @{
+
+  /// setMaterializer - Sets the GVMaterializer to GVM.  This module must not
+  /// yet have a Materializer.  To reset the materializer for a module that
+  /// already has one, call MaterializeAllPermanently first.  Destroying this
+  /// module will destroy its materializer without materializing any more
+  /// GlobalValues.  Without destroying the Module, there is no way to detach or
+  /// destroy a materializer without materializing all the GVs it controls, to
+  /// avoid leaving orphan unmaterialized GVs.
+  void setMaterializer(GVMaterializer *GVM);
+  /// getMaterializer - Retrieves the GVMaterializer, if any, for this Module.
+  GVMaterializer *getMaterializer() const { return Materializer.get(); }
+
+  /// isMaterializable - True if the definition of GV has yet to be materialized
+  /// from the GVMaterializer.
+  bool isMaterializable(const GlobalValue *GV) const;
+  /// isDematerializable - Returns true if this GV was loaded from this Module's
+  /// GVMaterializer and the GVMaterializer knows how to dematerialize the GV.
+  bool isDematerializable(const GlobalValue *GV) const;
+
+  /// Materialize - Make sure the GlobalValue is fully read.  If the module is
+  /// corrupt, this returns true and fills in the optional string with
+  /// information about the problem.  If successful, this returns false.
+  bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+  /// Dematerialize - If the GlobalValue is read in, and if the GVMaterializer
+  /// supports it, release the memory for the function, and set it up to be
+  /// materialized lazily.  If !isDematerializable(), this method is a noop.
+  void Dematerialize(GlobalValue *GV);
+
+  /// MaterializeAll - Make sure all GlobalValues in this Module are fully read.
+  /// If the module is corrupt, this returns true and fills in the optional
+  /// string with information about the problem.  If successful, this returns
+  /// false.
+  bool MaterializeAll(std::string *ErrInfo = 0);
+
+  /// MaterializeAllPermanently - Make sure all GlobalValues in this Module are
+  /// fully read and clear the Materializer.  If the module is corrupt, this
+  /// returns true, fills in the optional string with information about the
+  /// problem, and DOES NOT clear the old Materializer.  If successful, this
+  /// returns false.
+  bool MaterializeAllPermanently(std::string *ErrInfo = 0);
+
+/// @}
+/// @name Direct access to the globals list, functions list, and symbol table
+/// @{
+
+  /// Get the Module's list of global variables (constant).
+  const GlobalListType   &getGlobalList() const       { return GlobalList; }
+  /// Get the Module's list of global variables.
+  GlobalListType         &getGlobalList()             { return GlobalList; }
+  static iplist<GlobalVariable> Module::*getSublistAccess(GlobalVariable*) {
+    return &Module::GlobalList;
+  }
+  /// Get the Module's list of functions (constant).
+  const FunctionListType &getFunctionList() const     { return FunctionList; }
+  /// Get the Module's list of functions.
+  FunctionListType       &getFunctionList()           { return FunctionList; }
+  static iplist<Function> Module::*getSublistAccess(Function*) {
+    return &Module::FunctionList;
+  }
+  /// Get the Module's list of aliases (constant).
+  const AliasListType    &getAliasList() const        { return AliasList; }
+  /// Get the Module's list of aliases.
+  AliasListType          &getAliasList()              { return AliasList; }
+  static iplist<GlobalAlias> Module::*getSublistAccess(GlobalAlias*) {
+    return &Module::AliasList;
+  }
+  /// Get the Module's list of named metadata (constant).
+  const NamedMDListType  &getNamedMDList() const      { return NamedMDList; }
+  /// Get the Module's list of named metadata.
+  NamedMDListType        &getNamedMDList()            { return NamedMDList; }
+  static ilist<NamedMDNode> Module::*getSublistAccess(NamedMDNode*) {
+    return &Module::NamedMDList;
+  }
+  /// Get the symbol table of global variable and function identifiers
+  const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
+  /// Get the Module's symbol table of global variable and function identifiers.
+  ValueSymbolTable       &getValueSymbolTable()       { return *ValSymTab; }
+
+/// @}
+/// @name Global Variable Iteration
+/// @{
+
+  global_iterator       global_begin()       { return GlobalList.begin(); }
+  const_global_iterator global_begin() const { return GlobalList.begin(); }
+  global_iterator       global_end  ()       { return GlobalList.end(); }
+  const_global_iterator global_end  () const { return GlobalList.end(); }
+  bool                  global_empty() const { return GlobalList.empty(); }
+
+/// @}
+/// @name Function Iteration
+/// @{
+
+  iterator                begin()       { return FunctionList.begin(); }
+  const_iterator          begin() const { return FunctionList.begin(); }
+  iterator                end  ()       { return FunctionList.end();   }
+  const_iterator          end  () const { return FunctionList.end();   }
+  size_t                  size() const  { return FunctionList.size(); }
+  bool                    empty() const { return FunctionList.empty(); }
+
+/// @}
+/// @name Alias Iteration
+/// @{
+
+  alias_iterator       alias_begin()            { return AliasList.begin(); }
+  const_alias_iterator alias_begin() const      { return AliasList.begin(); }
+  alias_iterator       alias_end  ()            { return AliasList.end();   }
+  const_alias_iterator alias_end  () const      { return AliasList.end();   }
+  size_t               alias_size () const      { return AliasList.size();  }
+  bool                 alias_empty() const      { return AliasList.empty(); }
+
+
+/// @}
+/// @name Named Metadata Iteration
+/// @{
+
+  named_metadata_iterator named_metadata_begin() { return NamedMDList.begin(); }
+  const_named_metadata_iterator named_metadata_begin() const {
+    return NamedMDList.begin();
+  }
+
+  named_metadata_iterator named_metadata_end() { return NamedMDList.end(); }
+  const_named_metadata_iterator named_metadata_end() const {
+    return NamedMDList.end();
+  }
+
+  size_t named_metadata_size() const { return NamedMDList.size();  }
+  bool named_metadata_empty() const { return NamedMDList.empty(); }
+
+
+/// @}
+/// @name Utility functions for printing and dumping Module objects
+/// @{
+
+  /// Print the module to an output stream with an optional
+  /// AssemblyAnnotationWriter.
+  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const;
+
+  /// Dump the module to stderr (for debugging).
+  void dump() const;
+  
+  /// This function causes all the subinstructions to "let go" of all references
+  /// that they are maintaining.  This allows one to 'delete' a whole class at
+  /// a time, even though there may be circular references... first all
+  /// references are dropped, and all use counts go to zero.  Then everything
+  /// is delete'd for real.  Note that no operations are valid on an object
+  /// that has "dropped all references", except operator delete.
+  void dropAllReferences();
+/// @}
+};
+
+/// An raw_ostream inserter for modules.
+inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
+  M.print(O, 0);
+  return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h
new file mode 100644
index 000000000000..0e4b1950f277
--- /dev/null
+++ b/include/llvm/IR/OperandTraits.h
@@ -0,0 +1,160 @@
+//===-- llvm/OperandTraits.h - OperandTraits class definition ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the traits classes that are handy for enforcing the correct
+// layout of various User subclasses. It also provides the means for accessing
+// the operands in the most efficient manner.
+//
+
+#ifndef LLVM_IR_OPERANDTRAITS_H
+#define LLVM_IR_OPERANDTRAITS_H
+
+#include "llvm/IR/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                          FixedNumOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// FixedNumOperandTraits - determine the allocation regime of the Use array
+/// when it is a prefix to the User object, and the number of Use objects is
+/// known at compile time.
+
+template <typename SubClass, unsigned ARITY>
+struct FixedNumOperandTraits {
+  static Use *op_begin(SubClass* U) {
+    return reinterpret_cast<Use*>(U) - ARITY;
+  }
+  static Use *op_end(SubClass* U) {
+    return reinterpret_cast<Use*>(U);
+  }
+  static unsigned operands(const User*) {
+    return ARITY;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          OptionalOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// OptionalOperandTraits - when the number of operands may change at runtime.
+/// Naturally it may only decrease, because the allocations may not change.
+
+template <typename SubClass, unsigned ARITY = 1>
+struct OptionalOperandTraits : public FixedNumOperandTraits<SubClass, ARITY> {
+  static unsigned operands(const User *U) {
+    return U->getNumOperands();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          VariadicOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// VariadicOperandTraits - determine the allocation regime of the Use array
+/// when it is a prefix to the User object, and the number of Use objects is
+/// only known at allocation time.
+
+template <typename SubClass, unsigned MINARITY = 0>
+struct VariadicOperandTraits {
+  static Use *op_begin(SubClass* U) {
+    return reinterpret_cast<Use*>(U) - static_cast<User*>(U)->getNumOperands();
+  }
+  static Use *op_end(SubClass* U) {
+    return reinterpret_cast<Use*>(U);
+  }
+  static unsigned operands(const User *U) {
+    return U->getNumOperands();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          HungoffOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// HungoffOperandTraits - determine the allocation regime of the Use array
+/// when it is not a prefix to the User object, but allocated at an unrelated
+/// heap address.
+/// Assumes that the User subclass that is determined by this traits class
+/// has an OperandList member of type User::op_iterator. [Note: this is now
+/// trivially satisfied, because User has that member for historic reasons.]
+///
+/// This is the traits class that is needed when the Use array must be
+/// resizable.
+
+template <unsigned MINARITY = 1>
+struct HungoffOperandTraits {
+  static Use *op_begin(User* U) {
+    return U->OperandList;
+  }
+  static Use *op_end(User* U) {
+    return U->OperandList + U->getNumOperands();
+  }
+  static unsigned operands(const User *U) {
+    return U->getNumOperands();
+  }
+};
+
+/// Macro for generating in-class operand accessor declarations.
+/// It should only be called in the public section of the interface.
+///
+#define DECLARE_TRANSPARENT_OPERAND_ACCESSORS(VALUECLASS) \
+  public: \
+  inline VALUECLASS *getOperand(unsigned) const; \
+  inline void setOperand(unsigned, VALUECLASS*); \
+  inline op_iterator op_begin(); \
+  inline const_op_iterator op_begin() const; \
+  inline op_iterator op_end(); \
+  inline const_op_iterator op_end() const; \
+  protected: \
+  template <int> inline Use &Op(); \
+  template <int> inline const Use &Op() const; \
+  public: \
+  inline unsigned getNumOperands() const
+
+/// Macro for generating out-of-class operand accessor definitions
+#define DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CLASS, VALUECLASS) \
+CLASS::op_iterator CLASS::op_begin() { \
+  return OperandTraits<CLASS>::op_begin(this); \
+} \
+CLASS::const_op_iterator CLASS::op_begin() const { \
+  return OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this)); \
+} \
+CLASS::op_iterator CLASS::op_end() { \
+  return OperandTraits<CLASS>::op_end(this); \
+} \
+CLASS::const_op_iterator CLASS::op_end() const { \
+  return OperandTraits<CLASS>::op_end(const_cast<CLASS*>(this)); \
+} \
+VALUECLASS *CLASS::getOperand(unsigned i_nocapture) const { \
+  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
+         && "getOperand() out of range!"); \
+  return cast_or_null<VALUECLASS>( \
+    OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this))[i_nocapture].get()); \
+} \
+void CLASS::setOperand(unsigned i_nocapture, VALUECLASS *Val_nocapture) { \
+  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
+         && "setOperand() out of range!"); \
+  OperandTraits<CLASS>::op_begin(this)[i_nocapture] = Val_nocapture; \
+} \
+unsigned CLASS::getNumOperands() const { \
+  return OperandTraits<CLASS>::operands(this); \
+} \
+template <int Idx_nocapture> Use &CLASS::Op() { \
+  return this->OpFrom<Idx_nocapture>(this); \
+} \
+template <int Idx_nocapture> const Use &CLASS::Op() const { \
+  return this->OpFrom<Idx_nocapture>(this); \
+}
+
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
new file mode 100644
index 000000000000..13ab72cfefc8
--- /dev/null
+++ b/include/llvm/IR/Operator.h
@@ -0,0 +1,478 @@
+//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various classes for working with Instructions and
+// ConstantExprs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_OPERATOR_H
+#define LLVM_IR_OPERATOR_H
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+
+namespace llvm {
+
+class GetElementPtrInst;
+class BinaryOperator;
+class ConstantExpr;
+
+/// Operator - This is a utility class that provides an abstraction for the
+/// common functionality between Instructions and ConstantExprs.
+///
+class Operator : public User {
+private:
+  // The Operator class is intended to be used as a utility, and is never itself
+  // instantiated.
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t s) LLVM_DELETED_FUNCTION;
+  Operator() LLVM_DELETED_FUNCTION;
+
+protected:
+  // NOTE: Cannot use LLVM_DELETED_FUNCTION because it's not legal to delete
+  // an overridden method that's not deleted in the base class. Cannot leave
+  // this unimplemented because that leads to an ODR-violation.
+  ~Operator();
+
+public:
+  /// getOpcode - Return the opcode for this Instruction or ConstantExpr.
+  ///
+  unsigned getOpcode() const {
+    if (const Instruction *I = dyn_cast<Instruction>(this))
+      return I->getOpcode();
+    return cast<ConstantExpr>(this)->getOpcode();
+  }
+
+  /// getOpcode - If V is an Instruction or ConstantExpr, return its
+  /// opcode. Otherwise return UserOp1.
+  ///
+  static unsigned getOpcode(const Value *V) {
+    if (const Instruction *I = dyn_cast<Instruction>(V))
+      return I->getOpcode();
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return CE->getOpcode();
+    return Instruction::UserOp1;
+  }
+
+  static inline bool classof(const Instruction *) { return true; }
+  static inline bool classof(const ConstantExpr *) { return true; }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) || isa<ConstantExpr>(V);
+  }
+};
+
+/// OverflowingBinaryOperator - Utility class for integer arithmetic operators
+/// which may exhibit overflow - Add, Sub, and Mul. It does not include SDiv,
+/// despite that operator having the potential for overflow.
+///
+class OverflowingBinaryOperator : public Operator {
+public:
+  enum {
+    NoUnsignedWrap = (1 << 0),
+    NoSignedWrap   = (1 << 1)
+  };
+
+private:
+  friend class BinaryOperator;
+  friend class ConstantExpr;
+  void setHasNoUnsignedWrap(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
+  }
+  void setHasNoSignedWrap(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
+  }
+
+public:
+  /// hasNoUnsignedWrap - Test whether this operation is known to never
+  /// undergo unsigned overflow, aka the nuw property.
+  bool hasNoUnsignedWrap() const {
+    return SubclassOptionalData & NoUnsignedWrap;
+  }
+
+  /// hasNoSignedWrap - Test whether this operation is known to never
+  /// undergo signed overflow, aka the nsw property.
+  bool hasNoSignedWrap() const {
+    return (SubclassOptionalData & NoSignedWrap) != 0;
+  }
+
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Add ||
+           I->getOpcode() == Instruction::Sub ||
+           I->getOpcode() == Instruction::Mul ||
+           I->getOpcode() == Instruction::Shl;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::Add ||
+           CE->getOpcode() == Instruction::Sub ||
+           CE->getOpcode() == Instruction::Mul ||
+           CE->getOpcode() == Instruction::Shl;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// PossiblyExactOperator - A udiv or sdiv instruction, which can be marked as
+/// "exact", indicating that no bits are destroyed.
+class PossiblyExactOperator : public Operator {
+public:
+  enum {
+    IsExact = (1 << 0)
+  };
+
+private:
+  friend class BinaryOperator;
+  friend class ConstantExpr;
+  void setIsExact(bool B) {
+    SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
+  }
+
+public:
+  /// isExact - Test whether this division is known to be exact, with
+  /// zero remainder.
+  bool isExact() const {
+    return SubclassOptionalData & IsExact;
+  }
+
+  static bool isPossiblyExactOpcode(unsigned OpC) {
+    return OpC == Instruction::SDiv ||
+           OpC == Instruction::UDiv ||
+           OpC == Instruction::AShr ||
+           OpC == Instruction::LShr;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return isPossiblyExactOpcode(CE->getOpcode());
+  }
+  static inline bool classof(const Instruction *I) {
+    return isPossiblyExactOpcode(I->getOpcode());
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// Convenience struct for specifying and reasoning about fast-math flags.
+class FastMathFlags {
+private:
+  friend class FPMathOperator;
+  unsigned Flags;
+  FastMathFlags(unsigned F) : Flags(F) { }
+
+public:
+  enum {
+    UnsafeAlgebra   = (1 << 0),
+    NoNaNs          = (1 << 1),
+    NoInfs          = (1 << 2),
+    NoSignedZeros   = (1 << 3),
+    AllowReciprocal = (1 << 4)
+  };
+
+  FastMathFlags() : Flags(0)
+  { }
+
+  /// Whether any flag is set
+  bool any() { return Flags != 0; }
+
+  /// Set all the flags to false
+  void clear() { Flags = 0; }
+
+  /// Flag queries
+  bool noNaNs()          { return 0 != (Flags & NoNaNs); }
+  bool noInfs()          { return 0 != (Flags & NoInfs); }
+  bool noSignedZeros()   { return 0 != (Flags & NoSignedZeros); }
+  bool allowReciprocal() { return 0 != (Flags & AllowReciprocal); }
+  bool unsafeAlgebra()   { return 0 != (Flags & UnsafeAlgebra); }
+
+  /// Flag setters
+  void setNoNaNs()          { Flags |= NoNaNs; }
+  void setNoInfs()          { Flags |= NoInfs; }
+  void setNoSignedZeros()   { Flags |= NoSignedZeros; }
+  void setAllowReciprocal() { Flags |= AllowReciprocal; }
+  void setUnsafeAlgebra() {
+    Flags |= UnsafeAlgebra;
+    setNoNaNs();
+    setNoInfs();
+    setNoSignedZeros();
+    setAllowReciprocal();
+  }
+};
+
+
+/// FPMathOperator - Utility class for floating point operations which can have
+/// information about relaxed accuracy requirements attached to them.
+class FPMathOperator : public Operator {
+private:
+  friend class Instruction;
+
+  void setHasUnsafeAlgebra(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::UnsafeAlgebra) |
+      (B * FastMathFlags::UnsafeAlgebra);
+
+    // Unsafe algebra implies all the others
+    if (B) {
+      setHasNoNaNs(true);
+      setHasNoInfs(true);
+      setHasNoSignedZeros(true);
+      setHasAllowReciprocal(true);
+    }
+  }
+  void setHasNoNaNs(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::NoNaNs) |
+      (B * FastMathFlags::NoNaNs);
+  }
+  void setHasNoInfs(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::NoInfs) |
+      (B * FastMathFlags::NoInfs);
+  }
+  void setHasNoSignedZeros(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) |
+      (B * FastMathFlags::NoSignedZeros);
+  }
+  void setHasAllowReciprocal(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) |
+      (B * FastMathFlags::AllowReciprocal);
+  }
+
+  /// Convenience function for setting all the fast-math flags
+  void setFastMathFlags(FastMathFlags FMF) {
+    SubclassOptionalData |= FMF.Flags;
+  }
+
+public:
+  /// Test whether this operation is permitted to be
+  /// algebraically transformed, aka the 'A' fast-math property.
+  bool hasUnsafeAlgebra() const {
+    return (SubclassOptionalData & FastMathFlags::UnsafeAlgebra) != 0;
+  }
+
+  /// Test whether this operation's arguments and results are to be
+  /// treated as non-NaN, aka the 'N' fast-math property.
+  bool hasNoNaNs() const {
+    return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0;
+  }
+
+  /// Test whether this operation's arguments and results are to be
+  /// treated as NoN-Inf, aka the 'I' fast-math property.
+  bool hasNoInfs() const {
+    return (SubclassOptionalData & FastMathFlags::NoInfs) != 0;
+  }
+
+  /// Test whether this operation can treat the sign of zero
+  /// as insignificant, aka the 'S' fast-math property.
+  bool hasNoSignedZeros() const {
+    return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0;
+  }
+
+  /// Test whether this operation is permitted to use
+  /// reciprocal instead of division, aka the 'R' fast-math property.
+  bool hasAllowReciprocal() const {
+    return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0;
+  }
+
+  /// Convenience function for getting all the fast-math flags
+  FastMathFlags getFastMathFlags() const {
+    return FastMathFlags(SubclassOptionalData);
+  }
+
+  /// \brief Get the maximum error permitted by this operation in ULPs.  An
+  /// accuracy of 0.0 means that the operation should be performed with the
+  /// default precision.
+  float getFPAccuracy() const;
+
+  static inline bool classof(const Instruction *I) {
+    return I->getType()->isFPOrFPVectorTy();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+
+/// ConcreteOperator - A helper template for defining operators for individual
+/// opcodes.
+template<typename SuperClass, unsigned Opc>
+class ConcreteOperator : public SuperClass {
+public:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Opc;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Opc;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+class AddOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
+};
+class SubOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
+};
+class MulOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
+};
+class ShlOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
+};
+
+
+class SDivOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
+};
+class UDivOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
+};
+class AShrOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
+};
+class LShrOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
+};
+
+
+
+class GEPOperator
+  : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
+  enum {
+    IsInBounds = (1 << 0)
+  };
+
+  friend class GetElementPtrInst;
+  friend class ConstantExpr;
+  void setIsInBounds(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
+  }
+
+public:
+  /// isInBounds - Test whether this is an inbounds GEP, as defined
+  /// by LangRef.html.
+  bool isInBounds() const {
+    return SubclassOptionalData & IsInBounds;
+  }
+
+  inline op_iterator       idx_begin()       { return op_begin()+1; }
+  inline const_op_iterator idx_begin() const { return op_begin()+1; }
+  inline op_iterator       idx_end()         { return op_end(); }
+  inline const_op_iterator idx_end()   const { return op_end(); }
+
+  Value *getPointerOperand() {
+    return getOperand(0);
+  }
+  const Value *getPointerOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getPointerOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  /// getPointerOperandType - Method to return the pointer operand as a
+  /// PointerType.
+  Type *getPointerOperandType() const {
+    return getPointerOperand()->getType();
+  }
+
+  /// getPointerAddressSpace - Method to return the address space of the
+  /// pointer operand.
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperandType())->getAddressSpace();
+  }
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return getNumOperands() - 1;
+  }
+
+  bool hasIndices() const {
+    return getNumOperands() > 1;
+  }
+
+  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
+  /// zeros.  If so, the result pointer and the first operand have the same
+  /// value, just potentially different types.
+  bool hasAllZeroIndices() const {
+    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
+      if (ConstantInt *C = dyn_cast<ConstantInt>(I))
+        if (C->isZero())
+          continue;
+      return false;
+    }
+    return true;
+  }
+
+  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
+  /// constant integers.  If so, the result pointer and the first operand have
+  /// a constant offset between them.
+  bool hasAllConstantIndices() const {
+    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
+      if (!isa<ConstantInt>(I))
+        return false;
+    }
+    return true;
+  }
+
+  /// \brief Accumulate the constant address offset of this GEP if possible.
+  ///
+  /// This routine accepts an APInt into which it will accumulate the constant
+  /// offset of this GEP if the GEP is in fact constant. If the GEP is not
+  /// all-constant, it returns false and the value of the offset APInt is
+  /// undefined (it is *not* preserved!). The APInt passed into this routine
+  /// must be at least as wide as the IntPtr type for the address space of
+  /// the base GEP pointer.
+  bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const {
+    assert(Offset.getBitWidth() ==
+           DL.getPointerSizeInBits(getPointerAddressSpace()) &&
+           "The offset must have exactly as many bits as our pointer.");
+
+    for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
+         GTI != GTE; ++GTI) {
+      ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+      if (!OpC)
+        return false;
+      if (OpC->isZero())
+        continue;
+
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        unsigned ElementIdx = OpC->getZExtValue();
+        const StructLayout *SL = DL.getStructLayout(STy);
+        Offset += APInt(Offset.getBitWidth(),
+                        SL->getElementOffset(ElementIdx));
+        continue;
+      }
+
+      // For array or vector indices, scale the index by the size of the type.
+      APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
+      Offset += Index * APInt(Offset.getBitWidth(),
+                              DL.getTypeAllocSize(GTI.getIndexedType()));
+    }
+    return true;
+  }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h
new file mode 100644
index 000000000000..561ce010c0e0
--- /dev/null
+++ b/include/llvm/IR/SymbolTableListTraits.h
@@ -0,0 +1,78 @@
+//===-- llvm/SymbolTableListTraits.h - Traits for iplist --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic class that is used to implement the automatic
+// symbol table manipulation that occurs when you put (for example) a named
+// instruction into a basic block.
+//
+// The way that this is implemented is by using a special traits class with the
+// intrusive list that makes up the list of instructions in a basic block.  When
+// a new element is added to the list of instructions, the traits class is
+// notified, allowing the symbol table to be updated.
+//
+// This generic class implements the traits class.  It must be generic so that
+// it can work for all uses it, which include lists of instructions, basic
+// blocks, arguments, functions, global variables, etc...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_SYMBOLTABLELISTTRAITS_H
+#define LLVM_IR_SYMBOLTABLELISTTRAITS_H
+
+#include "llvm/ADT/ilist.h"
+
+namespace llvm {
+class ValueSymbolTable;
+  
+template<typename NodeTy> class ilist_iterator;
+template<typename NodeTy, typename Traits> class iplist;
+template<typename Ty> struct ilist_traits;
+
+// ValueSubClass   - The type of objects that I hold, e.g. Instruction.
+// ItemParentClass - The type of object that owns the list, e.g. BasicBlock.
+//
+template<typename ValueSubClass, typename ItemParentClass>
+class SymbolTableListTraits : public ilist_default_traits<ValueSubClass> {
+  typedef ilist_traits<ValueSubClass> TraitsClass;
+public:
+  SymbolTableListTraits() {}
+
+  /// getListOwner - Return the object that owns this list.  If this is a list
+  /// of instructions, it returns the BasicBlock that owns them.
+  ItemParentClass *getListOwner() {
+    size_t Offset(size_t(&((ItemParentClass*)0->*ItemParentClass::
+                           getSublistAccess(static_cast<ValueSubClass*>(0)))));
+    iplist<ValueSubClass>* Anchor(static_cast<iplist<ValueSubClass>*>(this));
+    return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)-
+                                              Offset);
+  }
+
+  static iplist<ValueSubClass> &getList(ItemParentClass *Par) {
+    return Par->*(Par->getSublistAccess((ValueSubClass*)0));
+  }
+
+  static ValueSymbolTable *getSymTab(ItemParentClass *Par) {
+    return Par ? toPtr(Par->getValueSymbolTable()) : 0;
+  }
+
+  void addNodeToList(ValueSubClass *V);
+  void removeNodeFromList(ValueSubClass *V);
+  void transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+                             ilist_iterator<ValueSubClass> first,
+                             ilist_iterator<ValueSubClass> last);
+//private:
+  template<typename TPtr>
+  void setSymTabObject(TPtr *, TPtr);
+  static ValueSymbolTable *toPtr(ValueSymbolTable *P) { return P; }
+  static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
new file mode 100644
index 000000000000..d89ae243f5e7
--- /dev/null
+++ b/include/llvm/IR/Type.h
@@ -0,0 +1,472 @@
+//===-- llvm/Type.h - Classes for handling data types -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Type class.  For more "Type"
+// stuff, look in DerivedTypes.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_TYPE_H
+#define LLVM_IR_TYPE_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+class PointerType;
+class IntegerType;
+class raw_ostream;
+class Module;
+class LLVMContext;
+class LLVMContextImpl;
+class StringRef;
+template<class GraphType> struct GraphTraits;
+
+/// The instances of the Type class are immutable: once they are created,
+/// they are never changed.  Also note that only one instance of a particular
+/// type is ever created.  Thus seeing if two types are equal is a matter of
+/// doing a trivial pointer comparison. To enforce that no two equal instances
+/// are created, Type instances can only be created via static factory methods 
+/// in class Type and in derived classes.  Once allocated, Types are never
+/// free'd.
+/// 
+class Type {
+public:
+  //===--------------------------------------------------------------------===//
+  /// Definitions of all of the base types for the Type system.  Based on this
+  /// value, you can cast to a class defined in DerivedTypes.h.
+  /// Note: If you add an element to this, you need to add an element to the
+  /// Type::getPrimitiveType function, or else things will break!
+  /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
+  ///
+  enum TypeID {
+    // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
+    VoidTyID = 0,    ///<  0: type with no size
+    HalfTyID,        ///<  1: 16-bit floating point type
+    FloatTyID,       ///<  2: 32-bit floating point type
+    DoubleTyID,      ///<  3: 64-bit floating point type
+    X86_FP80TyID,    ///<  4: 80-bit floating point type (X87)
+    FP128TyID,       ///<  5: 128-bit floating point type (112-bit mantissa)
+    PPC_FP128TyID,   ///<  6: 128-bit floating point type (two 64-bits, PowerPC)
+    LabelTyID,       ///<  7: Labels
+    MetadataTyID,    ///<  8: Metadata
+    X86_MMXTyID,     ///<  9: MMX vectors (64 bits, X86 specific)
+
+    // Derived types... see DerivedTypes.h file.
+    // Make sure FirstDerivedTyID stays up to date!
+    IntegerTyID,     ///< 10: Arbitrary bit width integers
+    FunctionTyID,    ///< 11: Functions
+    StructTyID,      ///< 12: Structures
+    ArrayTyID,       ///< 13: Arrays
+    PointerTyID,     ///< 14: Pointers
+    VectorTyID,      ///< 15: SIMD 'packed' format, or other vector type
+
+    NumTypeIDs,                         // Must remain as last defined ID
+    LastPrimitiveTyID = X86_MMXTyID,
+    FirstDerivedTyID = IntegerTyID
+  };
+
+private:
+  /// Context - This refers to the LLVMContext in which this type was uniqued.
+  LLVMContext &Context;
+
+  // Due to Ubuntu GCC bug 910363:
+  // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363
+  // Bitpack ID and SubclassData manually.
+  // Note: TypeID : low 8 bit; SubclassData : high 24 bit.
+  uint32_t IDAndSubclassData;
+
+protected:
+  friend class LLVMContextImpl;
+  explicit Type(LLVMContext &C, TypeID tid)
+    : Context(C), IDAndSubclassData(0),
+      NumContainedTys(0), ContainedTys(0) {
+    setTypeID(tid);
+  }
+  ~Type() {}
+  
+  void setTypeID(TypeID ID) {
+    IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00);
+    assert(getTypeID() == ID && "TypeID data too large for field");
+  }
+  
+  unsigned getSubclassData() const { return IDAndSubclassData >> 8; }
+  
+  void setSubclassData(unsigned val) {
+    IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8);
+    // Ensure we don't have any accidental truncation.
+    assert(getSubclassData() == val && "Subclass data too large for field");
+  }
+
+  /// NumContainedTys - Keeps track of how many Type*'s there are in the
+  /// ContainedTys list.
+  unsigned NumContainedTys;
+
+  /// ContainedTys - A pointer to the array of Types contained by this Type.
+  /// For example, this includes the arguments of a function type, the elements
+  /// of a structure, the pointee of a pointer, the element type of an array,
+  /// etc.  This pointer may be 0 for types that don't contain other types
+  /// (Integer, Double, Float).
+  Type * const *ContainedTys;
+
+public:
+  void print(raw_ostream &O) const;
+  void dump() const;
+
+  /// getContext - Return the LLVMContext in which this type was uniqued.
+  LLVMContext &getContext() const { return Context; }
+
+  //===--------------------------------------------------------------------===//
+  // Accessors for working with types.
+  //
+
+  /// getTypeID - Return the type id for the type.  This will return one
+  /// of the TypeID enum elements defined above.
+  ///
+  TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); }
+
+  /// isVoidTy - Return true if this is 'void'.
+  bool isVoidTy() const { return getTypeID() == VoidTyID; }
+
+  /// isHalfTy - Return true if this is 'half', a 16-bit IEEE fp type.
+  bool isHalfTy() const { return getTypeID() == HalfTyID; }
+
+  /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
+  bool isFloatTy() const { return getTypeID() == FloatTyID; }
+  
+  /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
+  bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
+
+  /// isX86_FP80Ty - Return true if this is x86 long double.
+  bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
+
+  /// isFP128Ty - Return true if this is 'fp128'.
+  bool isFP128Ty() const { return getTypeID() == FP128TyID; }
+
+  /// isPPC_FP128Ty - Return true if this is powerpc long double.
+  bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
+
+  /// isFloatingPointTy - Return true if this is one of the six floating point
+  /// types
+  bool isFloatingPointTy() const {
+    return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
+           getTypeID() == DoubleTyID ||
+           getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
+           getTypeID() == PPC_FP128TyID;
+  }
+
+  const fltSemantics &getFltSemantics() const {
+    switch (getTypeID()) {
+    case HalfTyID: return APFloat::IEEEhalf;
+    case FloatTyID: return APFloat::IEEEsingle;
+    case DoubleTyID: return APFloat::IEEEdouble;
+    case X86_FP80TyID: return APFloat::x87DoubleExtended;
+    case FP128TyID: return APFloat::IEEEquad;
+    case PPC_FP128TyID: return APFloat::PPCDoubleDouble;
+    default: llvm_unreachable("Invalid floating type");
+    }
+  }
+
+  /// isX86_MMXTy - Return true if this is X86 MMX.
+  bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
+
+  /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
+  ///
+  bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
+ 
+  /// isLabelTy - Return true if this is 'label'.
+  bool isLabelTy() const { return getTypeID() == LabelTyID; }
+
+  /// isMetadataTy - Return true if this is 'metadata'.
+  bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
+
+  /// isIntegerTy - True if this is an instance of IntegerType.
+  ///
+  bool isIntegerTy() const { return getTypeID() == IntegerTyID; } 
+
+  /// isIntegerTy - Return true if this is an IntegerType of the given width.
+  bool isIntegerTy(unsigned Bitwidth) const;
+
+  /// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
+  /// integer types.
+  ///
+  bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
+  
+  /// isFunctionTy - True if this is an instance of FunctionType.
+  ///
+  bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
+
+  /// isStructTy - True if this is an instance of StructType.
+  ///
+  bool isStructTy() const { return getTypeID() == StructTyID; }
+
+  /// isArrayTy - True if this is an instance of ArrayType.
+  ///
+  bool isArrayTy() const { return getTypeID() == ArrayTyID; }
+
+  /// isPointerTy - True if this is an instance of PointerType.
+  ///
+  bool isPointerTy() const { return getTypeID() == PointerTyID; }
+
+  /// isPtrOrPtrVectorTy - Return true if this is a pointer type or a vector of
+  /// pointer types.
+  ///
+  bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
+ 
+  /// isVectorTy - True if this is an instance of VectorType.
+  ///
+  bool isVectorTy() const { return getTypeID() == VectorTyID; }
+
+  /// canLosslesslyBitCastTo - Return true if this type could be converted 
+  /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts 
+  /// are valid for types of the same size only where no re-interpretation of 
+  /// the bits is done.
+  /// @brief Determine if this type could be losslessly bitcast to Ty
+  bool canLosslesslyBitCastTo(Type *Ty) const;
+
+  /// isEmptyTy - Return true if this type is empty, that is, it has no
+  /// elements or all its elements are empty.
+  bool isEmptyTy() const;
+
+  /// Here are some useful little methods to query what type derived types are
+  /// Note that all other types can just compare to see if this == Type::xxxTy;
+  ///
+  bool isPrimitiveType() const { return getTypeID() <= LastPrimitiveTyID; }
+  bool isDerivedType()   const { return getTypeID() >= FirstDerivedTyID; }
+
+  /// isFirstClassType - Return true if the type is "first class", meaning it
+  /// is a valid type for a Value.
+  ///
+  bool isFirstClassType() const {
+    return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
+  }
+
+  /// isSingleValueType - Return true if the type is a valid type for a
+  /// register in codegen.  This includes all first-class types except struct
+  /// and array types.
+  ///
+  bool isSingleValueType() const {
+    return (getTypeID() != VoidTyID && isPrimitiveType()) ||
+            getTypeID() == IntegerTyID || getTypeID() == PointerTyID ||
+            getTypeID() == VectorTyID;
+  }
+
+  /// isAggregateType - Return true if the type is an aggregate type. This
+  /// means it is valid as the first operand of an insertvalue or
+  /// extractvalue instruction. This includes struct and array types, but
+  /// does not include vector types.
+  ///
+  bool isAggregateType() const {
+    return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
+  }
+
+  /// isSized - Return true if it makes sense to take the size of this type.  To
+  /// get the actual size for a particular target, it is reasonable to use the
+  /// DataLayout subsystem to do this.
+  ///
+  bool isSized() const {
+    // If it's a primitive, it is always sized.
+    if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
+        getTypeID() == PointerTyID ||
+        getTypeID() == X86_MMXTyID)
+      return true;
+    // If it is not something that can have a size (e.g. a function or label),
+    // it doesn't have a size.
+    if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
+        getTypeID() != VectorTyID)
+      return false;
+    // Otherwise we have to try harder to decide.
+    return isSizedDerivedType();
+  }
+
+  /// getPrimitiveSizeInBits - Return the basic size of this type if it is a
+  /// primitive type.  These are fixed by LLVM and are not target dependent.
+  /// This will return zero if the type does not have a size or is not a
+  /// primitive type.
+  ///
+  /// Note that this may not reflect the size of memory allocated for an
+  /// instance of the type or the number of bytes that are written when an
+  /// instance of the type is stored to memory. The DataLayout class provides
+  /// additional query functions to provide this information.
+  ///
+  unsigned getPrimitiveSizeInBits() const;
+
+  /// getScalarSizeInBits - If this is a vector type, return the
+  /// getPrimitiveSizeInBits value for the element type. Otherwise return the
+  /// getPrimitiveSizeInBits value for this type.
+  unsigned getScalarSizeInBits();
+
+  /// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+  /// is only valid on floating point types.  If the FP type does not
+  /// have a stable mantissa (e.g. ppc long double), this method returns -1.
+  int getFPMantissaWidth() const;
+
+  /// getScalarType - If this is a vector type, return the element type,
+  /// otherwise return 'this'.
+  const Type *getScalarType() const;
+  Type *getScalarType();
+
+  //===--------------------------------------------------------------------===//
+  // Type Iteration support.
+  //
+  typedef Type * const *subtype_iterator;
+  subtype_iterator subtype_begin() const { return ContainedTys; }
+  subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
+
+  /// getContainedType - This method is used to implement the type iterator
+  /// (defined a the end of the file).  For derived types, this returns the
+  /// types 'contained' in the derived type.
+  ///
+  Type *getContainedType(unsigned i) const {
+    assert(i < NumContainedTys && "Index out of range!");
+    return ContainedTys[i];
+  }
+
+  /// getNumContainedTypes - Return the number of types in the derived type.
+  ///
+  unsigned getNumContainedTypes() const { return NumContainedTys; }
+
+  //===--------------------------------------------------------------------===//
+  // Helper methods corresponding to subclass methods.  This forces a cast to
+  // the specified subclass and calls its accessor.  "getVectorNumElements" (for
+  // example) is shorthand for cast<VectorType>(Ty)->getNumElements().  This is
+  // only intended to cover the core methods that are frequently used, helper
+  // methods should not be added here.
+  
+  unsigned getIntegerBitWidth() const;
+
+  Type *getFunctionParamType(unsigned i) const;
+  unsigned getFunctionNumParams() const;
+  bool isFunctionVarArg() const;
+  
+  StringRef getStructName() const;
+  unsigned getStructNumElements() const;
+  Type *getStructElementType(unsigned N) const;
+  
+  Type *getSequentialElementType() const;
+  
+  uint64_t getArrayNumElements() const;
+  Type *getArrayElementType() const { return getSequentialElementType(); }
+
+  unsigned getVectorNumElements() const;
+  Type *getVectorElementType() const { return getSequentialElementType(); }
+
+  Type *getPointerElementType() const { return getSequentialElementType(); }
+
+  /// \brief Get the address space of this pointer or pointer vector type.
+  unsigned getPointerAddressSpace() const;
+  
+  //===--------------------------------------------------------------------===//
+  // Static members exported by the Type class itself.  Useful for getting
+  // instances of Type.
+  //
+
+  /// getPrimitiveType - Return a type based on an identifier.
+  static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
+
+  //===--------------------------------------------------------------------===//
+  // These are the builtin types that are always available.
+  //
+  static Type *getVoidTy(LLVMContext &C);
+  static Type *getLabelTy(LLVMContext &C);
+  static Type *getHalfTy(LLVMContext &C);
+  static Type *getFloatTy(LLVMContext &C);
+  static Type *getDoubleTy(LLVMContext &C);
+  static Type *getMetadataTy(LLVMContext &C);
+  static Type *getX86_FP80Ty(LLVMContext &C);
+  static Type *getFP128Ty(LLVMContext &C);
+  static Type *getPPC_FP128Ty(LLVMContext &C);
+  static Type *getX86_MMXTy(LLVMContext &C);
+  static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
+  static IntegerType *getInt1Ty(LLVMContext &C);
+  static IntegerType *getInt8Ty(LLVMContext &C);
+  static IntegerType *getInt16Ty(LLVMContext &C);
+  static IntegerType *getInt32Ty(LLVMContext &C);
+  static IntegerType *getInt64Ty(LLVMContext &C);
+
+  //===--------------------------------------------------------------------===//
+  // Convenience methods for getting pointer types with one of the above builtin
+  // types as pointee.
+  //
+  static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
+  static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
+
+  /// getPointerTo - Return a pointer to the current type.  This is equivalent
+  /// to PointerType::get(Foo, AddrSpace).
+  PointerType *getPointerTo(unsigned AddrSpace = 0);
+
+private:
+  /// isSizedDerivedType - Derived types like structures and arrays are sized
+  /// iff all of the members of the type are sized as well.  Since asking for
+  /// their size is relatively uncommon, move this operation out of line.
+  bool isSizedDerivedType() const;
+};
+
+// Printing of types.
+static inline raw_ostream &operator<<(raw_ostream &OS, Type &T) {
+  T.print(OS);
+  return OS;
+}
+
+// allow isa<PointerType>(x) to work without DerivedTypes.h included.
+template <> struct isa_impl<PointerType, Type> {
+  static inline bool doit(const Type &Ty) {
+    return Ty.getTypeID() == Type::PointerTyID;
+  }
+};
+
+  
+//===----------------------------------------------------------------------===//
+// Provide specializations of GraphTraits to be able to treat a type as a
+// graph of sub types.
+
+
+template <> struct GraphTraits<Type*> {
+  typedef Type NodeType;
+  typedef Type::subtype_iterator ChildIteratorType;
+
+  static inline NodeType *getEntryNode(Type *T) { return T; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->subtype_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->subtype_end();
+  }
+};
+
+template <> struct GraphTraits<const Type*> {
+  typedef const Type NodeType;
+  typedef Type::subtype_iterator ChildIteratorType;
+
+  static inline NodeType *getEntryNode(NodeType *T) { return T; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->subtype_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->subtype_end();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/TypeBuilder.h b/include/llvm/IR/TypeBuilder.h
new file mode 100644
index 000000000000..80c60a080614
--- /dev/null
+++ b/include/llvm/IR/TypeBuilder.h
@@ -0,0 +1,399 @@
+//===---- llvm/TypeBuilder.h - Builder for LLVM types -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBuilder class, which is used as a convenient way to
+// create LLVM types with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_TYPEBUILDER_H
+#define LLVM_IR_TYPEBUILDER_H
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include <limits.h>
+
+namespace llvm {
+
+/// TypeBuilder - This provides a uniform API for looking up types
+/// known at compile time.  To support cross-compilation, we define a
+/// series of tag types in the llvm::types namespace, like i<N>,
+/// ieee_float, ppc_fp128, etc.  TypeBuilder<T, false> allows T to be
+/// any of these, a native C type (whose size may depend on the host
+/// compiler), or a pointer, function, or struct type built out of
+/// these.  TypeBuilder<T, true> removes native C types from this set
+/// to guarantee that its result is suitable for cross-compilation.
+/// We define the primitive types, pointer types, and functions up to
+/// 5 arguments here, but to use this class with your own types,
+/// you'll need to specialize it.  For example, say you want to call a
+/// function defined externally as:
+///
+///   struct MyType {
+///     int32 a;
+///     int32 *b;
+///     void *array[1];  // Intended as a flexible array.
+///   };
+///   int8 AFunction(struct MyType *value);
+///
+/// You'll want to use
+///   Function::Create(TypeBuilder<types::i<8>(MyType*), true>::get(), ...)
+/// to declare the function, but when you first try this, your compiler will
+/// complain that TypeBuilder<MyType, true>::get() doesn't exist. To fix this,
+/// write:
+///
+///   namespace llvm {
+///   template<bool xcompile> class TypeBuilder<MyType, xcompile> {
+///   public:
+///     static StructType *get(LLVMContext &Context) {
+///       // If you cache this result, be sure to cache it separately
+///       // for each LLVMContext.
+///       return StructType::get(
+///         TypeBuilder<types::i<32>, xcompile>::get(Context),
+///         TypeBuilder<types::i<32>*, xcompile>::get(Context),
+///         TypeBuilder<types::i<8>*[], xcompile>::get(Context),
+///         NULL);
+///     }
+///
+///     // You may find this a convenient place to put some constants
+///     // to help with getelementptr.  They don't have any effect on
+///     // the operation of TypeBuilder.
+///     enum Fields {
+///       FIELD_A,
+///       FIELD_B,
+///       FIELD_ARRAY
+///     };
+///   }
+///   }  // namespace llvm
+///
+/// TypeBuilder cannot handle recursive types or types you only know at runtime.
+/// If you try to give it a recursive type, it will deadlock, infinitely
+/// recurse, or do something similarly undesirable.
+template<typename T, bool cross_compilable> class TypeBuilder {};
+
+// Types for use with cross-compilable TypeBuilders.  These correspond
+// exactly with an LLVM-native type.
+namespace types {
+/// i<N> corresponds to the LLVM IntegerType with N bits.
+template<uint32_t num_bits> class i {};
+
+// The following classes represent the LLVM floating types.
+class ieee_float {};
+class ieee_double {};
+class x86_fp80 {};
+class fp128 {};
+class ppc_fp128 {};
+// X86 MMX.
+class x86_mmx {};
+}  // namespace types
+
+// LLVM doesn't have const or volatile types.
+template<typename T, bool cross> class TypeBuilder<const T, cross>
+  : public TypeBuilder<T, cross> {};
+template<typename T, bool cross> class TypeBuilder<volatile T, cross>
+  : public TypeBuilder<T, cross> {};
+template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
+  : public TypeBuilder<T, cross> {};
+
+// Pointers
+template<typename T, bool cross> class TypeBuilder<T*, cross> {
+public:
+  static PointerType *get(LLVMContext &Context) {
+    return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
+  }
+};
+
+/// There is no support for references
+template<typename T, bool cross> class TypeBuilder<T&, cross> {};
+
+// Arrays
+template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
+public:
+  static ArrayType *get(LLVMContext &Context) {
+    return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
+  }
+};
+/// LLVM uses an array of length 0 to represent an unknown-length array.
+template<typename T, bool cross> class TypeBuilder<T[], cross> {
+public:
+  static ArrayType *get(LLVMContext &Context) {
+    return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
+  }
+};
+
+// Define the C integral types only for TypeBuilder<T, false>.
+//
+// C integral types do not have a defined size. It would be nice to use the
+// stdint.h-defined typedefs that do have defined sizes, but we'd run into the
+// following problem:
+//
+// On an ILP32 machine, stdint.h might define:
+//
+//   typedef int int32_t;
+//   typedef long long int64_t;
+//   typedef long size_t;
+//
+// If we defined TypeBuilder<int32_t> and TypeBuilder<int64_t>, then any use of
+// TypeBuilder<size_t> would fail.  We couldn't define TypeBuilder<size_t> in
+// addition to the defined-size types because we'd get duplicate definitions on
+// platforms where stdint.h instead defines:
+//
+//   typedef int int32_t;
+//   typedef long long int64_t;
+//   typedef int size_t;
+//
+// So we define all the primitive C types and nothing else.
+#define DEFINE_INTEGRAL_TYPEBUILDER(T) \
+template<> class TypeBuilder<T, false> { \
+public: \
+  static IntegerType *get(LLVMContext &Context) { \
+    return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
+  } \
+}; \
+template<> class TypeBuilder<T, true> { \
+  /* We provide a definition here so users don't accidentally */ \
+  /* define these types to work. */ \
+}
+DEFINE_INTEGRAL_TYPEBUILDER(char);
+DEFINE_INTEGRAL_TYPEBUILDER(signed char);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned char);
+DEFINE_INTEGRAL_TYPEBUILDER(short);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned short);
+DEFINE_INTEGRAL_TYPEBUILDER(int);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned int);
+DEFINE_INTEGRAL_TYPEBUILDER(long);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned long);
+#ifdef _MSC_VER
+DEFINE_INTEGRAL_TYPEBUILDER(__int64);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned __int64);
+#else /* _MSC_VER */
+DEFINE_INTEGRAL_TYPEBUILDER(long long);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
+#endif /* _MSC_VER */
+#undef DEFINE_INTEGRAL_TYPEBUILDER
+
+template<uint32_t num_bits, bool cross>
+class TypeBuilder<types::i<num_bits>, cross> {
+public:
+  static IntegerType *get(LLVMContext &C) {
+    return IntegerType::get(C, num_bits);
+  }
+};
+
+template<> class TypeBuilder<float, false> {
+public:
+  static Type *get(LLVMContext& C) {
+    return Type::getFloatTy(C);
+  }
+};
+template<> class TypeBuilder<float, true> {};
+
+template<> class TypeBuilder<double, false> {
+public:
+  static Type *get(LLVMContext& C) {
+    return Type::getDoubleTy(C);
+  }
+};
+template<> class TypeBuilder<double, true> {};
+
+template<bool cross> class TypeBuilder<types::ieee_float, cross> {
+public:
+  static Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
+};
+template<bool cross> class TypeBuilder<types::ieee_double, cross> {
+public:
+  static Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
+};
+template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
+public:
+  static Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
+};
+template<bool cross> class TypeBuilder<types::fp128, cross> {
+public:
+  static Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
+};
+template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
+public:
+  static Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
+};
+template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
+public:
+  static Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
+};
+
+template<bool cross> class TypeBuilder<void, cross> {
+public:
+  static Type *get(LLVMContext &C) {
+    return Type::getVoidTy(C);
+  }
+};
+
+/// void* is disallowed in LLVM types, but it occurs often enough in C code that
+/// we special case it.
+template<> class TypeBuilder<void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+template<> class TypeBuilder<const void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+template<> class TypeBuilder<volatile void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+template<> class TypeBuilder<const volatile void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+
+template<typename R, bool cross> class TypeBuilder<R(), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
+  }
+};
+template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+template<typename R, typename A1, typename A2, bool cross>
+class TypeBuilder<R(A1, A2), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+template<typename R, typename A1, typename A2, typename A3, bool cross>
+class TypeBuilder<R(A1, A2, A3), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+      TypeBuilder<A3, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         bool cross>
+class TypeBuilder<R(A1, A2, A3, A4), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+      TypeBuilder<A3, cross>::get(Context),
+      TypeBuilder<A4, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         typename A5, bool cross>
+class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+      TypeBuilder<A3, cross>::get(Context),
+      TypeBuilder<A4, cross>::get(Context),
+      TypeBuilder<A5, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+
+template<typename R, bool cross> class TypeBuilder<R(...), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
+  }
+};
+template<typename R, typename A1, bool cross>
+class TypeBuilder<R(A1, ...), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
+  }
+};
+template<typename R, typename A1, typename A2, bool cross>
+class TypeBuilder<R(A1, A2, ...), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
+  }
+};
+template<typename R, typename A1, typename A2, typename A3, bool cross>
+class TypeBuilder<R(A1, A2, A3, ...), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+      TypeBuilder<A3, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         bool cross>
+class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+      TypeBuilder<A3, cross>::get(Context),
+      TypeBuilder<A4, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, true);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         typename A5, bool cross>
+class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
+public:
+  static FunctionType *get(LLVMContext &Context) {
+    Type *params[] = {
+      TypeBuilder<A1, cross>::get(Context),
+      TypeBuilder<A2, cross>::get(Context),
+      TypeBuilder<A3, cross>::get(Context),
+      TypeBuilder<A4, cross>::get(Context),
+      TypeBuilder<A5, cross>::get(Context),
+    };
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
+  }
+};
+
+}  // namespace llvm
+
+#endif
diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h
new file mode 100644
index 000000000000..cea66a4ab069
--- /dev/null
+++ b/include/llvm/IR/TypeFinder.h
@@ -0,0 +1,78 @@
+//===-- llvm/IR/TypeFinder.h - Class to find used struct types --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the TypeFinder class. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_TYPEFINDER_H
+#define LLVM_IR_TYPEFINDER_H
+
+#include "llvm/ADT/DenseSet.h"
+#include <vector>
+
+namespace llvm {
+
+class MDNode;
+class Module;
+class StructType;
+class Type;
+class Value;
+
+/// TypeFinder - Walk over a module, identifying all of the types that are
+/// used by the module.
+class TypeFinder {
+  // To avoid walking constant expressions multiple times and other IR
+  // objects, we keep several helper maps.
+  DenseSet<const Value*> VisitedConstants;
+  DenseSet<Type*> VisitedTypes;
+
+  std::vector<StructType*> StructTypes;
+  bool OnlyNamed;
+
+public:
+  TypeFinder() : OnlyNamed(false) {}
+
+  void run(const Module &M, bool onlyNamed);
+  void clear();
+
+  typedef std::vector<StructType*>::iterator iterator;
+  typedef std::vector<StructType*>::const_iterator const_iterator;
+
+  iterator begin() { return StructTypes.begin(); }
+  iterator end() { return StructTypes.end(); }
+
+  const_iterator begin() const { return StructTypes.begin(); }
+  const_iterator end() const { return StructTypes.end(); }
+
+  bool empty() const { return StructTypes.empty(); }
+  size_t size() const { return StructTypes.size(); }
+  iterator erase(iterator I, iterator E) { return StructTypes.erase(I, E); }
+
+  StructType *&operator[](unsigned Idx) { return StructTypes[Idx]; }
+
+private:
+  /// incorporateType - This method adds the type to the list of used
+  /// structures if it's not in there already.
+  void incorporateType(Type *Ty);
+
+  /// incorporateValue - This method is used to walk operand lists finding types
+  /// hiding in constant expressions and other operands that won't be walked in
+  /// other ways.  GlobalValues, basic blocks, instructions, and inst operands
+  /// are all explicitly enumerated.
+  void incorporateValue(const Value *V);
+
+  /// incorporateMDNode - This method is used to walk the operands of an MDNode
+  /// to find types hiding within.
+  void incorporateMDNode(const MDNode *V);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
new file mode 100644
index 000000000000..4bc7ce500058
--- /dev/null
+++ b/include/llvm/IR/Use.h
@@ -0,0 +1,219 @@
+//===-- llvm/Use.h - Definition of the Use class ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines the Use class.  The Use class represents the operand of an
+// instruction or some other User instance which refers to a Value.  The Use
+// class keeps the "use list" of the referenced value up to date.
+//
+// Pointer tagging is used to efficiently find the User corresponding
+// to a Use without having to store a User pointer in every Use. A
+// User is preceded in memory by all the Uses corresponding to its
+// operands, and the low bits of one of the fields (Prev) of the Use
+// class are used to encode offsets to be able to find that User given
+// a pointer to any Use. For details, see:
+//
+//   http://www.llvm.org/docs/ProgrammersManual.html#UserLayout
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_USE_H
+#define LLVM_IR_USE_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Compiler.h"
+#include <cstddef>
+#include <iterator>
+
+namespace llvm {
+
+class Value;
+class User;
+class Use;
+template<typename>
+struct simplify_type;
+
+// Use** is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<Use**> {
+public:
+  static inline void *getAsVoidPointer(Use** P) { return P; }
+  static inline Use **getFromVoidPointer(void *P) {
+    return static_cast<Use**>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+
+//===----------------------------------------------------------------------===//
+//                                  Use Class
+//===----------------------------------------------------------------------===//
+
+/// Use is here to make keeping the "use" list of a Value up-to-date really
+/// easy.
+class Use {
+public:
+  /// swap - provide a fast substitute to std::swap<Use>
+  /// that also works with less standard-compliant compilers
+  void swap(Use &RHS);
+
+  // A type for the word following an array of hung-off Uses in memory, which is
+  // a pointer back to their User with the bottom bit set.
+  typedef PointerIntPair<User*, 1, unsigned> UserRef;
+
+private:
+  Use(const Use &U) LLVM_DELETED_FUNCTION;
+
+  /// Destructor - Only for zap()
+  ~Use() {
+    if (Val) removeFromList();
+  }
+
+  enum PrevPtrTag { zeroDigitTag
+                  , oneDigitTag
+                  , stopTag
+                  , fullStopTag };
+
+  /// Constructor
+  Use(PrevPtrTag tag) : Val(0) {
+    Prev.setInt(tag);
+  }
+
+public:
+  /// Normally Use will just implicitly convert to a Value* that it holds.
+  operator Value*() const { return Val; }
+  
+  /// If implicit conversion to Value* doesn't work, the get() method returns
+  /// the Value*.
+  Value *get() const { return Val; }
+  
+  /// getUser - This returns the User that contains this Use.  For an
+  /// instruction operand, for example, this will return the instruction.
+  User *getUser() const;
+
+  inline void set(Value *Val);
+
+  Value *operator=(Value *RHS) {
+    set(RHS);
+    return RHS;
+  }
+  const Use &operator=(const Use &RHS) {
+    set(RHS.Val);
+    return *this;
+  }
+
+        Value *operator->()       { return Val; }
+  const Value *operator->() const { return Val; }
+
+  Use *getNext() const { return Next; }
+
+  
+  /// initTags - initialize the waymarking tags on an array of Uses, so that
+  /// getUser() can find the User from any of those Uses.
+  static Use *initTags(Use *Start, Use *Stop);
+
+  /// zap - This is used to destroy Use operands when the number of operands of
+  /// a User changes.
+  static void zap(Use *Start, const Use *Stop, bool del = false);
+
+private:
+  const Use* getImpliedUser() const;
+  
+  Value *Val;
+  Use *Next;
+  PointerIntPair<Use**, 2, PrevPtrTag> Prev;
+
+  void setPrev(Use **NewPrev) {
+    Prev.setPointer(NewPrev);
+  }
+  void addToList(Use **List) {
+    Next = *List;
+    if (Next) Next->setPrev(&Next);
+    setPrev(List);
+    *List = this;
+  }
+  void removeFromList() {
+    Use **StrippedPrev = Prev.getPointer();
+    *StrippedPrev = Next;
+    if (Next) Next->setPrev(StrippedPrev);
+  }
+
+  friend class Value;
+};
+
+// simplify_type - Allow clients to treat uses just like values when using
+// casting operators.
+template<> struct simplify_type<Use> {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(Use &Val) {
+    return Val.get();
+  }
+};
+template<> struct simplify_type<const Use> {
+  typedef /*const*/ Value* SimpleType;
+  static SimpleType getSimplifiedValue(const Use &Val) {
+    return Val.get();
+  }
+};
+
+
+
+template<typename UserTy>  // UserTy == 'User' or 'const User'
+class value_use_iterator : public std::iterator<std::forward_iterator_tag,
+                                                UserTy*, ptrdiff_t> {
+  typedef std::iterator<std::forward_iterator_tag, UserTy*, ptrdiff_t> super;
+  typedef value_use_iterator<UserTy> _Self;
+
+  Use *U;
+  explicit value_use_iterator(Use *u) : U(u) {}
+  friend class Value;
+public:
+  typedef typename super::reference reference;
+  typedef typename super::pointer pointer;
+
+  value_use_iterator(const _Self &I) : U(I.U) {}
+  value_use_iterator() {}
+
+  bool operator==(const _Self &x) const {
+    return U == x.U;
+  }
+  bool operator!=(const _Self &x) const {
+    return !operator==(x);
+  }
+
+  /// atEnd - return true if this iterator is equal to use_end() on the value.
+  bool atEnd() const { return U == 0; }
+
+  // Iterator traversal: forward iteration only
+  _Self &operator++() {          // Preincrement
+    assert(U && "Cannot increment end iterator!");
+    U = U->getNext();
+    return *this;
+  }
+  _Self operator++(int) {        // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+
+  // Retrieve a pointer to the current User.
+  UserTy *operator*() const {
+    assert(U && "Cannot dereference end iterator!");
+    return U->getUser();
+  }
+
+  UserTy *operator->() const { return operator*(); }
+
+  Use &getUse() const { return *U; }
+  
+  /// getOperandNo - Return the operand # of this use in its User.  Defined in
+  /// User.h
+  ///
+  unsigned getOperandNo() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
new file mode 100644
index 000000000000..505bdeb178e9
--- /dev/null
+++ b/include/llvm/IR/User.h
@@ -0,0 +1,205 @@
+//===-- llvm/User.h - User class definition ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class defines the interface that one who uses a Value must implement.
+// Each instance of the Value class keeps track of what User's have handles
+// to it.
+//
+//  * Instructions are the largest class of Users.
+//  * Constants may be users of other constants (think arrays and stuff)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_USER_H
+#define LLVM_IR_USER_H
+
+#include "llvm/IR/Value.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// OperandTraits - Compile-time customization of
+/// operand-related allocators and accessors
+/// for use of the User class
+template <class>
+struct OperandTraits;
+
+class User : public Value {
+  User(const User &) LLVM_DELETED_FUNCTION;
+  void *operator new(size_t) LLVM_DELETED_FUNCTION;
+  template <unsigned>
+  friend struct HungoffOperandTraits;
+  virtual void anchor();
+protected:
+  /// OperandList - This is a pointer to the array of Uses for this User.
+  /// For nodes of fixed arity (e.g. a binary operator) this array will live
+  /// prefixed to some derived class instance.  For nodes of resizable variable
+  /// arity (e.g. PHINodes, SwitchInst etc.), this memory will be dynamically
+  /// allocated and should be destroyed by the classes' virtual dtor.
+  Use *OperandList;
+
+  /// NumOperands - The number of values used by this User.
+  ///
+  unsigned NumOperands;
+
+  void *operator new(size_t s, unsigned Us);
+  User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
+    : Value(ty, vty), OperandList(OpList), NumOperands(NumOps) {}
+  Use *allocHungoffUses(unsigned) const;
+  void dropHungoffUses() {
+    Use::zap(OperandList, OperandList + NumOperands, true);
+    OperandList = 0;
+    // Reset NumOperands so User::operator delete() does the right thing.
+    NumOperands = 0;
+  }
+public:
+  ~User() {
+    Use::zap(OperandList, OperandList + NumOperands);
+  }
+  /// operator delete - free memory allocated for User and Use objects
+  void operator delete(void *Usr);
+  /// placement delete - required by std, but never called.
+  void operator delete(void*, unsigned) {
+    llvm_unreachable("Constructor throws?");
+  }
+  /// placement delete - required by std, but never called.
+  void operator delete(void*, unsigned, bool) {
+    llvm_unreachable("Constructor throws?");
+  }
+protected:
+  template <int Idx, typename U> static Use &OpFrom(const U *that) {
+    return Idx < 0
+      ? OperandTraits<U>::op_end(const_cast<U*>(that))[Idx]
+      : OperandTraits<U>::op_begin(const_cast<U*>(that))[Idx];
+  }
+  template <int Idx> Use &Op() {
+    return OpFrom<Idx>(this);
+  }
+  template <int Idx> const Use &Op() const {
+    return OpFrom<Idx>(this);
+  }
+public:
+  Value *getOperand(unsigned i) const {
+    assert(i < NumOperands && "getOperand() out of range!");
+    return OperandList[i];
+  }
+  void setOperand(unsigned i, Value *Val) {
+    assert(i < NumOperands && "setOperand() out of range!");
+    assert((!isa<Constant>((const Value*)this) ||
+            isa<GlobalValue>((const Value*)this)) &&
+           "Cannot mutate a constant with setOperand!");
+    OperandList[i] = Val;
+  }
+  const Use &getOperandUse(unsigned i) const {
+    assert(i < NumOperands && "getOperandUse() out of range!");
+    return OperandList[i];
+  }
+  Use &getOperandUse(unsigned i) {
+    assert(i < NumOperands && "getOperandUse() out of range!");
+    return OperandList[i];
+  }
+
+  unsigned getNumOperands() const { return NumOperands; }
+
+  // ---------------------------------------------------------------------------
+  // Operand Iterator interface...
+  //
+  typedef Use*       op_iterator;
+  typedef const Use* const_op_iterator;
+
+  inline op_iterator       op_begin()       { return OperandList; }
+  inline const_op_iterator op_begin() const { return OperandList; }
+  inline op_iterator       op_end()         { return OperandList+NumOperands; }
+  inline const_op_iterator op_end()   const { return OperandList+NumOperands; }
+
+  /// Convenience iterator for directly iterating over the Values in the
+  /// OperandList
+  class value_op_iterator : public std::iterator<std::forward_iterator_tag,
+                                                 Value*> {
+    op_iterator OI;
+  public:
+    explicit value_op_iterator(Use *U) : OI(U) {}
+
+    bool operator==(const value_op_iterator &x) const {
+      return OI == x.OI;
+    }
+    bool operator!=(const value_op_iterator &x) const {
+      return !operator==(x);
+    }
+
+    /// Iterator traversal: forward iteration only
+    value_op_iterator &operator++() {          // Preincrement
+      ++OI;
+      return *this;
+    }
+    value_op_iterator operator++(int) {        // Postincrement
+      value_op_iterator tmp = *this; ++*this; return tmp;
+    }
+
+    /// Retrieve a pointer to the current Value.
+    Value *operator*() const {
+      return *OI;
+    }
+
+    Value *operator->() const { return operator*(); }
+  };
+
+  inline value_op_iterator value_op_begin() {
+    return value_op_iterator(op_begin());
+  }
+  inline value_op_iterator value_op_end() {
+    return value_op_iterator(op_end());
+  }
+
+  // dropAllReferences() - This function is in charge of "letting go" of all
+  // objects that this User refers to.  This allows one to
+  // 'delete' a whole class at a time, even though there may be circular
+  // references...  First all references are dropped, and all use counts go to
+  // zero.  Then everything is deleted for real.  Note that no operations are
+  // valid on an object that has "dropped all references", except operator
+  // delete.
+  //
+  void dropAllReferences() {
+    for (op_iterator i = op_begin(), e = op_end(); i != e; ++i)
+      i->set(0);
+  }
+
+  /// replaceUsesOfWith - Replaces all references to the "From" definition with
+  /// references to the "To" definition.
+  ///
+  void replaceUsesOfWith(Value *From, Value *To);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) || isa<Constant>(V);
+  }
+};
+
+template<> struct simplify_type<User::op_iterator> {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(User::op_iterator &Val) {
+    return Val->get();
+  }
+};
+template<> struct simplify_type<User::const_op_iterator> {
+  typedef /*const*/ Value* SimpleType;
+  static SimpleType getSimplifiedValue(User::const_op_iterator &Val) {
+    return Val->get();
+  }
+};
+
+// value_use_iterator::getOperandNo - Requires the definition of the User class.
+template<typename UserTy>
+unsigned value_use_iterator<UserTy>::getOperandNo() const {
+  return U - U->getUser()->op_begin();
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
new file mode 100644
index 000000000000..a4f78627a84d
--- /dev/null
+++ b/include/llvm/IR/Value.h
@@ -0,0 +1,411 @@
+//===-- llvm/Value.h - Definition of the Value class ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Value class. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_VALUE_H
+#define LLVM_IR_VALUE_H
+
+#include "llvm/IR/Use.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class Constant;
+class Argument;
+class Instruction;
+class BasicBlock;
+class GlobalValue;
+class Function;
+class GlobalVariable;
+class GlobalAlias;
+class InlineAsm;
+class ValueSymbolTable;
+template<typename ValueTy> class StringMapEntry;
+typedef StringMapEntry<Value*> ValueName;
+class raw_ostream;
+class AssemblyAnnotationWriter;
+class ValueHandleBase;
+class LLVMContext;
+class Twine;
+class MDNode;
+class Type;
+class StringRef;
+
+//===----------------------------------------------------------------------===//
+//                                 Value Class
+//===----------------------------------------------------------------------===//
+
+/// This is a very important LLVM class. It is the base class of all values 
+/// computed by a program that may be used as operands to other values. Value is
+/// the super class of other important classes such as Instruction and Function.
+/// All Values have a Type. Type is not a subclass of Value. Some values can
+/// have a name and they belong to some Module.  Setting the name on the Value
+/// automatically updates the module's symbol table.
+///
+/// Every value has a "use list" that keeps track of which other Values are
+/// using this Value.  A Value can also have an arbitrary number of ValueHandle
+/// objects that watch it and listen to RAUW and Destroy events.  See
+/// llvm/Support/ValueHandle.h for details.
+///
+/// @brief LLVM Value Representation
+class Value {
+  const unsigned char SubclassID;   // Subclass identifier (for isa/dyn_cast)
+  unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
+protected:
+  /// SubclassOptionalData - This member is similar to SubclassData, however it
+  /// is for holding information which may be used to aid optimization, but
+  /// which may be cleared to zero without affecting conservative
+  /// interpretation.
+  unsigned char SubclassOptionalData : 7;
+
+private:
+  /// SubclassData - This member is defined by this class, but is not used for
+  /// anything.  Subclasses can use it to hold whatever state they find useful.
+  /// This field is initialized to zero by the ctor.
+  unsigned short SubclassData;
+
+  Type *VTy;
+  Use *UseList;
+
+  friend class ValueSymbolTable; // Allow ValueSymbolTable to directly mod Name.
+  friend class ValueHandleBase;
+  ValueName *Name;
+
+  void operator=(const Value &) LLVM_DELETED_FUNCTION;
+  Value(const Value &) LLVM_DELETED_FUNCTION;
+
+protected:
+  /// printCustom - Value subclasses can override this to implement custom
+  /// printing behavior.
+  virtual void printCustom(raw_ostream &O) const;
+
+  Value(Type *Ty, unsigned scid);
+public:
+  virtual ~Value();
+
+  /// dump - Support for debugging, callable in GDB: V->dump()
+  //
+  void dump() const;
+
+  /// print - Implement operator<< on Value.
+  ///
+  void print(raw_ostream &O, AssemblyAnnotationWriter *AAW = 0) const;
+
+  /// All values are typed, get the type of this value.
+  ///
+  Type *getType() const { return VTy; }
+
+  /// All values hold a context through their type.
+  LLVMContext &getContext() const;
+
+  // All values can potentially be named.
+  bool hasName() const { return Name != 0 && SubclassID != MDStringVal; }
+  ValueName *getValueName() const { return Name; }
+  void setValueName(ValueName *VN) { Name = VN; }
+  
+  /// getName() - Return a constant reference to the value's name. This is cheap
+  /// and guaranteed to return the same reference as long as the value is not
+  /// modified.
+  StringRef getName() const;
+
+  /// setName() - Change the name of the value, choosing a new unique name if
+  /// the provided name is taken.
+  ///
+  /// \param Name The new name; or "" if the value's name should be removed.
+  void setName(const Twine &Name);
+
+  
+  /// takeName - transfer the name from V to this value, setting V's name to
+  /// empty.  It is an error to call V->takeName(V). 
+  void takeName(Value *V);
+
+  /// replaceAllUsesWith - Go through the uses list for this definition and make
+  /// each use point to "V" instead of "this".  After this completes, 'this's
+  /// use list is guaranteed to be empty.
+  ///
+  void replaceAllUsesWith(Value *V);
+
+  //----------------------------------------------------------------------
+  // Methods for handling the chain of uses of this Value.
+  //
+  typedef value_use_iterator<User>       use_iterator;
+  typedef value_use_iterator<const User> const_use_iterator;
+
+  bool               use_empty() const { return UseList == 0; }
+  use_iterator       use_begin()       { return use_iterator(UseList); }
+  const_use_iterator use_begin() const { return const_use_iterator(UseList); }
+  use_iterator       use_end()         { return use_iterator(0);   }
+  const_use_iterator use_end()   const { return const_use_iterator(0);   }
+  User              *use_back()        { return *use_begin(); }
+  const User        *use_back()  const { return *use_begin(); }
+
+  /// hasOneUse - Return true if there is exactly one user of this value.  This
+  /// is specialized because it is a common request and does not require
+  /// traversing the whole use list.
+  ///
+  bool hasOneUse() const {
+    const_use_iterator I = use_begin(), E = use_end();
+    if (I == E) return false;
+    return ++I == E;
+  }
+
+  /// hasNUses - Return true if this Value has exactly N users.
+  ///
+  bool hasNUses(unsigned N) const;
+
+  /// hasNUsesOrMore - Return true if this value has N users or more.  This is
+  /// logically equivalent to getNumUses() >= N.
+  ///
+  bool hasNUsesOrMore(unsigned N) const;
+
+  bool isUsedInBasicBlock(const BasicBlock *BB) const;
+
+  /// getNumUses - This method computes the number of uses of this Value.  This
+  /// is a linear time operation.  Use hasOneUse, hasNUses, or hasNUsesOrMore
+  /// to check for specific values.
+  unsigned getNumUses() const;
+
+  /// addUse - This method should only be used by the Use class.
+  ///
+  void addUse(Use &U) { U.addToList(&UseList); }
+
+  /// An enumeration for keeping track of the concrete subclass of Value that
+  /// is actually instantiated. Values of this enumeration are kept in the 
+  /// Value classes SubclassID field. They are used for concrete type
+  /// identification.
+  enum ValueTy {
+    ArgumentVal,              // This is an instance of Argument
+    BasicBlockVal,            // This is an instance of BasicBlock
+    FunctionVal,              // This is an instance of Function
+    GlobalAliasVal,           // This is an instance of GlobalAlias
+    GlobalVariableVal,        // This is an instance of GlobalVariable
+    UndefValueVal,            // This is an instance of UndefValue
+    BlockAddressVal,          // This is an instance of BlockAddress
+    ConstantExprVal,          // This is an instance of ConstantExpr
+    ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero
+    ConstantDataArrayVal,     // This is an instance of ConstantDataArray
+    ConstantDataVectorVal,    // This is an instance of ConstantDataVector
+    ConstantIntVal,           // This is an instance of ConstantInt
+    ConstantFPVal,            // This is an instance of ConstantFP
+    ConstantArrayVal,         // This is an instance of ConstantArray
+    ConstantStructVal,        // This is an instance of ConstantStruct
+    ConstantVectorVal,        // This is an instance of ConstantVector
+    ConstantPointerNullVal,   // This is an instance of ConstantPointerNull
+    MDNodeVal,                // This is an instance of MDNode
+    MDStringVal,              // This is an instance of MDString
+    InlineAsmVal,             // This is an instance of InlineAsm
+    PseudoSourceValueVal,     // This is an instance of PseudoSourceValue
+    FixedStackPseudoSourceValueVal, // This is an instance of 
+                                    // FixedStackPseudoSourceValue
+    InstructionVal,           // This is an instance of Instruction
+    // Enum values starting at InstructionVal are used for Instructions;
+    // don't add new values here!
+
+    // Markers:
+    ConstantFirstVal = FunctionVal,
+    ConstantLastVal  = ConstantPointerNullVal
+  };
+
+  /// getValueID - Return an ID for the concrete type of this object.  This is
+  /// used to implement the classof checks.  This should not be used for any
+  /// other purpose, as the values may change as LLVM evolves.  Also, note that
+  /// for instructions, the Instruction's opcode is added to InstructionVal. So
+  /// this means three things:
+  /// # there is no value with code InstructionVal (no opcode==0).
+  /// # there are more possible values for the value type than in ValueTy enum.
+  /// # the InstructionVal enumerator must be the highest valued enumerator in
+  ///   the ValueTy enum.
+  unsigned getValueID() const {
+    return SubclassID;
+  }
+
+  /// getRawSubclassOptionalData - Return the raw optional flags value
+  /// contained in this value. This should only be used when testing two
+  /// Values for equivalence.
+  unsigned getRawSubclassOptionalData() const {
+    return SubclassOptionalData;
+  }
+
+  /// clearSubclassOptionalData - Clear the optional flags contained in
+  /// this value.
+  void clearSubclassOptionalData() {
+    SubclassOptionalData = 0;
+  }
+
+  /// hasSameSubclassOptionalData - Test whether the optional flags contained
+  /// in this value are equal to the optional flags in the given value.
+  bool hasSameSubclassOptionalData(const Value *V) const {
+    return SubclassOptionalData == V->SubclassOptionalData;
+  }
+
+  /// intersectOptionalDataWith - Clear any optional flags in this value
+  /// that are not also set in the given value.
+  void intersectOptionalDataWith(const Value *V) {
+    SubclassOptionalData &= V->SubclassOptionalData;
+  }
+
+  /// hasValueHandle - Return true if there is a value handle associated with
+  /// this value.
+  bool hasValueHandle() const { return HasValueHandle; }
+
+  /// stripPointerCasts - This method strips off any unneeded pointer casts and
+  /// all-zero GEPs from the specified value, returning the original uncasted
+  /// value. If this is called on a non-pointer value, it returns 'this'.
+  Value *stripPointerCasts();
+  const Value *stripPointerCasts() const {
+    return const_cast<Value*>(this)->stripPointerCasts();
+  }
+
+  /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and
+  /// all-constant GEPs from the specified value, returning the original
+  /// pointer value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripInBoundsConstantOffsets();
+  const Value *stripInBoundsConstantOffsets() const {
+    return const_cast<Value*>(this)->stripInBoundsConstantOffsets();
+  }
+
+  /// stripInBoundsOffsets - This method strips off unneeded pointer casts and
+  /// any in-bounds Offsets from the specified value, returning the original
+  /// pointer value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripInBoundsOffsets();
+  const Value *stripInBoundsOffsets() const {
+    return const_cast<Value*>(this)->stripInBoundsOffsets();
+  }
+
+  /// isDereferenceablePointer - Test if this value is always a pointer to
+  /// allocated and suitably aligned memory for a simple load or store.
+  bool isDereferenceablePointer() const;
+  
+  /// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+  /// return the value in the PHI node corresponding to PredBB.  If not, return
+  /// ourself.  This is useful if you want to know the value something has in a
+  /// predecessor block.
+  Value *DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB);
+
+  const Value *DoPHITranslation(const BasicBlock *CurBB,
+                                const BasicBlock *PredBB) const{
+    return const_cast<Value*>(this)->DoPHITranslation(CurBB, PredBB);
+  }
+  
+  /// MaximumAlignment - This is the greatest alignment value supported by
+  /// load, store, and alloca instructions, and global values.
+  static const unsigned MaximumAlignment = 1u << 29;
+  
+  /// mutateType - Mutate the type of this Value to be of the specified type.
+  /// Note that this is an extremely dangerous operation which can create
+  /// completely invalid IR very easily.  It is strongly recommended that you
+  /// recreate IR objects with the right types instead of mutating them in
+  /// place.
+  void mutateType(Type *Ty) {
+    VTy = Ty;
+  }
+  
+protected:
+  unsigned short getSubclassDataFromValue() const { return SubclassData; }
+  void setValueSubclassData(unsigned short D) { SubclassData = D; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) {
+  V.print(OS);
+  return OS;
+}
+  
+void Use::set(Value *V) {
+  if (Val) removeFromList();
+  Val = V;
+  if (V) V->addUse(*this);
+}
+
+
+// isa - Provide some specializations of isa so that we don't have to include
+// the subtype header files to test to see if the value is a subclass...
+//
+template <> struct isa_impl<Constant, Value> {
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() >= Value::ConstantFirstVal &&
+      Val.getValueID() <= Value::ConstantLastVal;
+  }
+};
+
+template <> struct isa_impl<Argument, Value> {
+  static inline bool doit (const Value &Val) {
+    return Val.getValueID() == Value::ArgumentVal;
+  }
+};
+
+template <> struct isa_impl<InlineAsm, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::InlineAsmVal;
+  }
+};
+
+template <> struct isa_impl<Instruction, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() >= Value::InstructionVal;
+  }
+};
+
+template <> struct isa_impl<BasicBlock, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::BasicBlockVal;
+  }
+};
+
+template <> struct isa_impl<Function, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::FunctionVal;
+  }
+};
+
+template <> struct isa_impl<GlobalVariable, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::GlobalVariableVal;
+  }
+};
+
+template <> struct isa_impl<GlobalAlias, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::GlobalAliasVal;
+  }
+};
+
+template <> struct isa_impl<GlobalValue, Value> { 
+  static inline bool doit(const Value &Val) {
+    return isa<GlobalVariable>(Val) || isa<Function>(Val) ||
+      isa<GlobalAlias>(Val);
+  }
+};
+
+template <> struct isa_impl<MDNode, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::MDNodeVal;
+  }
+};
+  
+// Value* is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<Value*> {
+  typedef Value* PT;
+public:
+  static inline void *getAsVoidPointer(PT P) { return P; }
+  static inline PT getFromVoidPointer(void *P) {
+    return static_cast<PT>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h
new file mode 100644
index 000000000000..bf1fade1ccef
--- /dev/null
+++ b/include/llvm/IR/ValueSymbolTable.h
@@ -0,0 +1,133 @@
+//===-- llvm/ValueSymbolTable.h - Implement a Value Symtab ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the name/Value symbol table for LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_VALUESYMBOLTABLE_H
+#define LLVM_IR_VALUESYMBOLTABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  template<typename ValueSubClass, typename ItemParentClass>
+        class SymbolTableListTraits;
+  class BasicBlock;
+  class Function;
+  class NamedMDNode;
+  class Module;
+  class StringRef;
+
+/// This class provides a symbol table of name/value pairs. It is essentially
+/// a std::map<std::string,Value*> but has a controlled interface provided by
+/// LLVM as well as ensuring uniqueness of names.
+///
+class ValueSymbolTable {
+  friend class Value;
+  friend class SymbolTableListTraits<Argument, Function>;
+  friend class SymbolTableListTraits<BasicBlock, Function>;
+  friend class SymbolTableListTraits<Instruction, BasicBlock>;
+  friend class SymbolTableListTraits<Function, Module>;
+  friend class SymbolTableListTraits<GlobalVariable, Module>;
+  friend class SymbolTableListTraits<GlobalAlias, Module>;
+/// @name Types
+/// @{
+public:
+  /// @brief A mapping of names to values.
+  typedef StringMap<Value*> ValueMap;
+
+  /// @brief An iterator over a ValueMap.
+  typedef ValueMap::iterator iterator;
+
+  /// @brief A const_iterator over a ValueMap.
+  typedef ValueMap::const_iterator const_iterator;
+
+/// @}
+/// @name Constructors
+/// @{
+public:
+
+  ValueSymbolTable() : vmap(0), LastUnique(0) {}
+  ~ValueSymbolTable();
+
+/// @}
+/// @name Accessors
+/// @{
+public:
+
+  /// This method finds the value with the given \p Name in the
+  /// the symbol table. 
+  /// @returns the value associated with the \p Name
+  /// @brief Lookup a named Value.
+  Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
+
+  /// @returns true iff the symbol table is empty
+  /// @brief Determine if the symbol table is empty
+  inline bool empty() const { return vmap.empty(); }
+
+  /// @brief The number of name/type pairs is returned.
+  inline unsigned size() const { return unsigned(vmap.size()); }
+
+  /// This function can be used from the debugger to display the
+  /// content of the symbol table while debugging.
+  /// @brief Print out symbol table on stderr
+  void dump() const;
+
+/// @}
+/// @name Iteration
+/// @{
+public:
+  /// @brief Get an iterator that from the beginning of the symbol table.
+  inline iterator begin() { return vmap.begin(); }
+
+  /// @brief Get a const_iterator that from the beginning of the symbol table.
+  inline const_iterator begin() const { return vmap.begin(); }
+
+  /// @brief Get an iterator to the end of the symbol table.
+  inline iterator end() { return vmap.end(); }
+
+  /// @brief Get a const_iterator to the end of the symbol table.
+  inline const_iterator end() const { return vmap.end(); }
+  
+/// @}
+/// @name Mutators
+/// @{
+private:
+  /// This method adds the provided value \p N to the symbol table.  The Value
+  /// must have a name which is used to place the value in the symbol table. 
+  /// If the inserted name conflicts, this renames the value.
+  /// @brief Add a named value to the symbol table
+  void reinsertValue(Value *V);
+    
+  /// createValueName - This method attempts to create a value name and insert
+  /// it into the symbol table with the specified name.  If it conflicts, it
+  /// auto-renames the name and returns that instead.
+  ValueName *createValueName(StringRef Name, Value *V);
+  
+  /// This method removes a value from the symbol table.  It leaves the
+  /// ValueName attached to the value, but it is no longer inserted in the
+  /// symtab.
+  void removeValueName(ValueName *V);
+  
+/// @}
+/// @name Internal Data
+/// @{
+private:
+  ValueMap vmap;                    ///< The map that holds the symbol table.
+  mutable uint32_t LastUnique; ///< Counter for tracking unique names
+
+/// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IRBuilder.h b/include/llvm/IRBuilder.h
deleted file mode 100644
index f63a16051e30..000000000000
--- a/include/llvm/IRBuilder.h
+++ /dev/null
@@ -1,1353 +0,0 @@
-//===---- llvm/IRBuilder.h - Builder for LLVM Instructions ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the IRBuilder class, which is used as a convenient way
-// to create LLVM instructions with a consistent and simplified interface.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IRBUILDER_H
-#define LLVM_IRBUILDER_H
-
-#include "llvm/Instructions.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/DataLayout.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/ConstantFolder.h"
-
-namespace llvm {
-  class MDNode;
-
-/// IRBuilderDefaultInserter - This provides the default implementation of the
-/// IRBuilder 'InsertHelper' method that is called whenever an instruction is
-/// created by IRBuilder and needs to be inserted.  By default, this inserts the
-/// instruction at the insertion point.
-template <bool preserveNames = true>
-class IRBuilderDefaultInserter {
-protected:
-  void InsertHelper(Instruction *I, const Twine &Name,
-                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
-    if (BB) BB->getInstList().insert(InsertPt, I);
-    if (preserveNames)
-      I->setName(Name);
-  }
-};
-
-/// IRBuilderBase - Common base class shared among various IRBuilders.
-class IRBuilderBase {
-  DebugLoc CurDbgLocation;
-protected:
-  BasicBlock *BB;
-  BasicBlock::iterator InsertPt;
-  LLVMContext &Context;
-public:
-
-  IRBuilderBase(LLVMContext &context)
-    : Context(context) {
-    ClearInsertionPoint();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Builder configuration methods
-  //===--------------------------------------------------------------------===//
-
-  /// ClearInsertionPoint - Clear the insertion point: created instructions will
-  /// not be inserted into a block.
-  void ClearInsertionPoint() {
-    BB = 0;
-  }
-
-  BasicBlock *GetInsertBlock() const { return BB; }
-  BasicBlock::iterator GetInsertPoint() const { return InsertPt; }
-  LLVMContext &getContext() const { return Context; }
-
-  /// SetInsertPoint - This specifies that created instructions should be
-  /// appended to the end of the specified block.
-  void SetInsertPoint(BasicBlock *TheBB) {
-    BB = TheBB;
-    InsertPt = BB->end();
-  }
-
-  /// SetInsertPoint - This specifies that created instructions should be
-  /// inserted before the specified instruction.
-  void SetInsertPoint(Instruction *I) {
-    BB = I->getParent();
-    InsertPt = I;
-    SetCurrentDebugLocation(I->getDebugLoc());
-  }
-
-  /// SetInsertPoint - This specifies that created instructions should be
-  /// inserted at the specified point.
-  void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) {
-    BB = TheBB;
-    InsertPt = IP;
-  }
-
-  /// SetInsertPoint(Use) - Find the nearest point that dominates this use, and
-  /// specify that created instructions should be inserted at this point.
-  void SetInsertPoint(Use &U) {
-    Instruction *UseInst = cast<Instruction>(U.getUser());
-    if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
-      BasicBlock *PredBB = Phi->getIncomingBlock(U);
-      assert(U != PredBB->getTerminator() && "critical edge not split");
-      SetInsertPoint(PredBB, PredBB->getTerminator());
-      return;
-    }
-    SetInsertPoint(UseInst);
-  }
-
-  /// SetCurrentDebugLocation - Set location information used by debugging
-  /// information.
-  void SetCurrentDebugLocation(const DebugLoc &L) {
-    CurDbgLocation = L;
-  }
-
-  /// getCurrentDebugLocation - Get location information used by debugging
-  /// information.
-  DebugLoc getCurrentDebugLocation() const { return CurDbgLocation; }
-
-  /// SetInstDebugLocation - If this builder has a current debug location, set
-  /// it on the specified instruction.
-  void SetInstDebugLocation(Instruction *I) const {
-    if (!CurDbgLocation.isUnknown())
-      I->setDebugLoc(CurDbgLocation);
-  }
-
-  /// getCurrentFunctionReturnType - Get the return type of the current function
-  /// that we're emitting into.
-  Type *getCurrentFunctionReturnType() const;
-
-  /// InsertPoint - A saved insertion point.
-  class InsertPoint {
-    BasicBlock *Block;
-    BasicBlock::iterator Point;
-
-  public:
-    /// Creates a new insertion point which doesn't point to anything.
-    InsertPoint() : Block(0) {}
-
-    /// Creates a new insertion point at the given location.
-    InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint)
-      : Block(InsertBlock), Point(InsertPoint) {}
-
-    /// isSet - Returns true if this insert point is set.
-    bool isSet() const { return (Block != 0); }
-
-    llvm::BasicBlock *getBlock() const { return Block; }
-    llvm::BasicBlock::iterator getPoint() const { return Point; }
-  };
-
-  /// saveIP - Returns the current insert point.
-  InsertPoint saveIP() const {
-    return InsertPoint(GetInsertBlock(), GetInsertPoint());
-  }
-
-  /// saveAndClearIP - Returns the current insert point, clearing it
-  /// in the process.
-  InsertPoint saveAndClearIP() {
-    InsertPoint IP(GetInsertBlock(), GetInsertPoint());
-    ClearInsertionPoint();
-    return IP;
-  }
-
-  /// restoreIP - Sets the current insert point to a previously-saved
-  /// location.
-  void restoreIP(InsertPoint IP) {
-    if (IP.isSet())
-      SetInsertPoint(IP.getBlock(), IP.getPoint());
-    else
-      ClearInsertionPoint();
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Miscellaneous creation methods.
-  //===--------------------------------------------------------------------===//
-
-  /// CreateGlobalString - Make a new global variable with an initializer that
-  /// has array of i8 type filled in with the nul terminated string value
-  /// specified.  The new global variable will be marked mergable with any
-  /// others of the same contents.  If Name is specified, it is the name of the
-  /// global variable created.
-  Value *CreateGlobalString(StringRef Str, const Twine &Name = "");
-
-  /// getInt1 - Get a constant value representing either true or false.
-  ConstantInt *getInt1(bool V) {
-    return ConstantInt::get(getInt1Ty(), V);
-  }
-
-  /// getTrue - Get the constant value for i1 true.
-  ConstantInt *getTrue() {
-    return ConstantInt::getTrue(Context);
-  }
-
-  /// getFalse - Get the constant value for i1 false.
-  ConstantInt *getFalse() {
-    return ConstantInt::getFalse(Context);
-  }
-
-  /// getInt8 - Get a constant 8-bit value.
-  ConstantInt *getInt8(uint8_t C) {
-    return ConstantInt::get(getInt8Ty(), C);
-  }
-
-  /// getInt16 - Get a constant 16-bit value.
-  ConstantInt *getInt16(uint16_t C) {
-    return ConstantInt::get(getInt16Ty(), C);
-  }
-
-  /// getInt32 - Get a constant 32-bit value.
-  ConstantInt *getInt32(uint32_t C) {
-    return ConstantInt::get(getInt32Ty(), C);
-  }
-
-  /// getInt64 - Get a constant 64-bit value.
-  ConstantInt *getInt64(uint64_t C) {
-    return ConstantInt::get(getInt64Ty(), C);
-  }
-
-  /// getInt - Get a constant integer value.
-  ConstantInt *getInt(const APInt &AI) {
-    return ConstantInt::get(Context, AI);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Type creation methods
-  //===--------------------------------------------------------------------===//
-
-  /// getInt1Ty - Fetch the type representing a single bit
-  IntegerType *getInt1Ty() {
-    return Type::getInt1Ty(Context);
-  }
-
-  /// getInt8Ty - Fetch the type representing an 8-bit integer.
-  IntegerType *getInt8Ty() {
-    return Type::getInt8Ty(Context);
-  }
-
-  /// getInt16Ty - Fetch the type representing a 16-bit integer.
-  IntegerType *getInt16Ty() {
-    return Type::getInt16Ty(Context);
-  }
-
-  /// getInt32Ty - Fetch the type resepresenting a 32-bit integer.
-  IntegerType *getInt32Ty() {
-    return Type::getInt32Ty(Context);
-  }
-
-  /// getInt64Ty - Fetch the type representing a 64-bit integer.
-  IntegerType *getInt64Ty() {
-    return Type::getInt64Ty(Context);
-  }
-
-  /// getFloatTy - Fetch the type representing a 32-bit floating point value.
-  Type *getFloatTy() {
-    return Type::getFloatTy(Context);
-  }
-
-  /// getDoubleTy - Fetch the type representing a 64-bit floating point value.
-  Type *getDoubleTy() {
-    return Type::getDoubleTy(Context);
-  }
-
-  /// getVoidTy - Fetch the type representing void.
-  Type *getVoidTy() {
-    return Type::getVoidTy(Context);
-  }
-
-  PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
-    return Type::getInt8PtrTy(Context, AddrSpace);
-  }
-
-  IntegerType* getIntPtrTy(DataLayout *DL, unsigned AddrSpace = 0) {
-    return DL->getIntPtrType(Context, AddrSpace);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Intrinsic creation methods
-  //===--------------------------------------------------------------------===//
-
-  /// CreateMemSet - Create and insert a memset to the specified pointer and the
-  /// specified value.  If the pointer isn't an i8*, it will be converted.  If a
-  /// TBAA tag is specified, it will be added to the instruction.
-  CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align,
-                         bool isVolatile = false, MDNode *TBAATag = 0) {
-    return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, TBAATag);
-  }
-
-  CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
-                         bool isVolatile = false, MDNode *TBAATag = 0);
-
-  /// CreateMemCpy - Create and insert a memcpy between the specified pointers.
-  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
-  /// specified, it will be added to the instruction.
-  CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
-                         bool isVolatile = false, MDNode *TBAATag = 0,
-                         MDNode *TBAAStructTag = 0) {
-    return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag,
-                        TBAAStructTag);
-  }
-
-  CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
-                         bool isVolatile = false, MDNode *TBAATag = 0,
-                         MDNode *TBAAStructTag = 0);
-
-  /// CreateMemMove - Create and insert a memmove between the specified
-  /// pointers.  If the pointers aren't i8*, they will be converted.  If a TBAA
-  /// tag is specified, it will be added to the instruction.
-  CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
-                          bool isVolatile = false, MDNode *TBAATag = 0) {
-    return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
-  }
-
-  CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
-                          bool isVolatile = false, MDNode *TBAATag = 0);
-
-  /// CreateLifetimeStart - Create a lifetime.start intrinsic.  If the pointer
-  /// isn't i8* it will be converted.
-  CallInst *CreateLifetimeStart(Value *Ptr, ConstantInt *Size = 0);
-
-  /// CreateLifetimeEnd - Create a lifetime.end intrinsic.  If the pointer isn't
-  /// i8* it will be converted.
-  CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = 0);
-
-private:
-  Value *getCastedInt8PtrValue(Value *Ptr);
-};
-
-/// IRBuilder - This provides a uniform API for creating instructions and
-/// inserting them into a basic block: either at the end of a BasicBlock, or
-/// at a specific iterator location in a block.
-///
-/// Note that the builder does not expose the full generality of LLVM
-/// instructions.  For access to extra instruction properties, use the mutators
-/// (e.g. setVolatile) on the instructions after they have been created.
-/// The first template argument handles whether or not to preserve names in the
-/// final instruction output. This defaults to on.  The second template argument
-/// specifies a class to use for creating constants.  This defaults to creating
-/// minimally folded constants.  The fourth template argument allows clients to
-/// specify custom insertion hooks that are called on every newly created
-/// insertion.
-template<bool preserveNames = true, typename T = ConstantFolder,
-         typename Inserter = IRBuilderDefaultInserter<preserveNames> >
-class IRBuilder : public IRBuilderBase, public Inserter {
-  T Folder;
-  MDNode *DefaultFPMathTag;
-public:
-  IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
-            MDNode *FPMathTag = 0)
-    : IRBuilderBase(C), Inserter(I), Folder(F), DefaultFPMathTag(FPMathTag) {
-  }
-
-  explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = 0) : IRBuilderBase(C),
-    Folder(), DefaultFPMathTag(FPMathTag) {
-  }
-
-  explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = 0)
-    : IRBuilderBase(TheBB->getContext()), Folder(F),
-      DefaultFPMathTag(FPMathTag) {
-    SetInsertPoint(TheBB);
-  }
-
-  explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = 0)
-    : IRBuilderBase(TheBB->getContext()), Folder(),
-      DefaultFPMathTag(FPMathTag) {
-    SetInsertPoint(TheBB);
-  }
-
-  explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = 0)
-    : IRBuilderBase(IP->getContext()), Folder(), DefaultFPMathTag(FPMathTag) {
-    SetInsertPoint(IP);
-    SetCurrentDebugLocation(IP->getDebugLoc());
-  }
-
-  explicit IRBuilder(Use &U, MDNode *FPMathTag = 0)
-    : IRBuilderBase(U->getContext()), Folder(), DefaultFPMathTag(FPMathTag) {
-    SetInsertPoint(U);
-    SetCurrentDebugLocation(cast<Instruction>(U.getUser())->getDebugLoc());
-  }
-
-  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
-            MDNode *FPMathTag = 0)
-    : IRBuilderBase(TheBB->getContext()), Folder(F),
-      DefaultFPMathTag(FPMathTag) {
-    SetInsertPoint(TheBB, IP);
-  }
-
-  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, MDNode *FPMathTag = 0)
-    : IRBuilderBase(TheBB->getContext()), Folder(),
-      DefaultFPMathTag(FPMathTag) {
-    SetInsertPoint(TheBB, IP);
-  }
-
-  /// getFolder - Get the constant folder being used.
-  const T &getFolder() { return Folder; }
-
-  /// getDefaultFPMathTag - Get the floating point math metadata being used.
-  MDNode *getDefaultFPMathTag() const { return DefaultFPMathTag; }
-
-  /// SetDefaultFPMathTag - Set the floating point math metadata to be used.
-  void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
-
-  /// isNamePreserving - Return true if this builder is configured to actually
-  /// add the requested names to IR created through it.
-  bool isNamePreserving() const { return preserveNames; }
-
-  /// Insert - Insert and return the specified instruction.
-  template<typename InstTy>
-  InstTy *Insert(InstTy *I, const Twine &Name = "") const {
-    this->InsertHelper(I, Name, BB, InsertPt);
-    if (!getCurrentDebugLocation().isUnknown())
-      this->SetInstDebugLocation(I);
-    return I;
-  }
-
-  /// Insert - No-op overload to handle constants.
-  Constant *Insert(Constant *C, const Twine& = "") const {
-    return C;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instruction creation methods: Terminators
-  //===--------------------------------------------------------------------===//
-
-private:
-  /// \brief Helper to add branch weight metadata onto an instruction.
-  /// \returns The annotated instruction.
-  template <typename InstTy>
-  InstTy *addBranchWeights(InstTy *I, MDNode *Weights) {
-    if (Weights)
-      I->setMetadata(LLVMContext::MD_prof, Weights);
-    return I;
-  }
-
-public:
-  /// CreateRetVoid - Create a 'ret void' instruction.
-  ReturnInst *CreateRetVoid() {
-    return Insert(ReturnInst::Create(Context));
-  }
-
-  /// @verbatim
-  /// CreateRet - Create a 'ret <val>' instruction.
-  /// @endverbatim
-  ReturnInst *CreateRet(Value *V) {
-    return Insert(ReturnInst::Create(Context, V));
-  }
-
-  /// CreateAggregateRet - Create a sequence of N insertvalue instructions,
-  /// with one Value from the retVals array each, that build a aggregate
-  /// return value one value at a time, and a ret instruction to return
-  /// the resulting aggregate value. This is a convenience function for
-  /// code that uses aggregate return values as a vehicle for having
-  /// multiple return values.
-  ///
-  ReturnInst *CreateAggregateRet(Value *const *retVals, unsigned N) {
-    Value *V = UndefValue::get(getCurrentFunctionReturnType());
-    for (unsigned i = 0; i != N; ++i)
-      V = CreateInsertValue(V, retVals[i], i, "mrv");
-    return Insert(ReturnInst::Create(Context, V));
-  }
-
-  /// CreateBr - Create an unconditional 'br label X' instruction.
-  BranchInst *CreateBr(BasicBlock *Dest) {
-    return Insert(BranchInst::Create(Dest));
-  }
-
-  /// CreateCondBr - Create a conditional 'br Cond, TrueDest, FalseDest'
-  /// instruction.
-  BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
-                           MDNode *BranchWeights = 0) {
-    return Insert(addBranchWeights(BranchInst::Create(True, False, Cond),
-                                   BranchWeights));
-  }
-
-  /// CreateSwitch - Create a switch instruction with the specified value,
-  /// default dest, and with a hint for the number of cases that will be added
-  /// (for efficient allocation).
-  SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10,
-                           MDNode *BranchWeights = 0) {
-    return Insert(addBranchWeights(SwitchInst::Create(V, Dest, NumCases),
-                                   BranchWeights));
-  }
-
-  /// CreateIndirectBr - Create an indirect branch instruction with the
-  /// specified address operand, with an optional hint for the number of
-  /// destinations that will be added (for efficient allocation).
-  IndirectBrInst *CreateIndirectBr(Value *Addr, unsigned NumDests = 10) {
-    return Insert(IndirectBrInst::Create(Addr, NumDests));
-  }
-
-  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
-                           BasicBlock *UnwindDest, const Twine &Name = "") {
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
-                                     ArrayRef<Value *>()),
-                  Name);
-  }
-  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
-                           BasicBlock *UnwindDest, Value *Arg1,
-                           const Twine &Name = "") {
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Arg1),
-                  Name);
-  }
-  InvokeInst *CreateInvoke3(Value *Callee, BasicBlock *NormalDest,
-                            BasicBlock *UnwindDest, Value *Arg1,
-                            Value *Arg2, Value *Arg3,
-                            const Twine &Name = "") {
-    Value *Args[] = { Arg1, Arg2, Arg3 };
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
-                  Name);
-  }
-  /// CreateInvoke - Create an invoke instruction.
-  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
-                           BasicBlock *UnwindDest, ArrayRef<Value *> Args,
-                           const Twine &Name = "") {
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
-                  Name);
-  }
-
-  ResumeInst *CreateResume(Value *Exn) {
-    return Insert(ResumeInst::Create(Exn));
-  }
-
-  UnreachableInst *CreateUnreachable() {
-    return Insert(new UnreachableInst(Context));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instruction creation methods: Binary Operators
-  //===--------------------------------------------------------------------===//
-private:
-  BinaryOperator *CreateInsertNUWNSWBinOp(BinaryOperator::BinaryOps Opc,
-                                          Value *LHS, Value *RHS,
-                                          const Twine &Name,
-                                          bool HasNUW, bool HasNSW) {
-    BinaryOperator *BO = Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
-    if (HasNUW) BO->setHasNoUnsignedWrap();
-    if (HasNSW) BO->setHasNoSignedWrap();
-    return BO;
-  }
-
-  Instruction *AddFPMathTag(Instruction *I, MDNode *FPMathTag) const {
-    if (!FPMathTag)
-      FPMathTag = DefaultFPMathTag;
-    if (FPMathTag)
-      I->setMetadata(LLVMContext::MD_fpmath, FPMathTag);
-    return I;
-  }
-public:
-  Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
-    return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name,
-                                   HasNUW, HasNSW);
-  }
-  Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateAdd(LHS, RHS, Name, false, true);
-  }
-  Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateAdd(LHS, RHS, Name, true, false);
-  }
-  Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = 0) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFAdd(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFAdd(LHS, RHS),
-                               FPMathTag), Name);
-  }
-  Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateSub(LC, RC), Name);
-    return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name,
-                                   HasNUW, HasNSW);
-  }
-  Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateSub(LHS, RHS, Name, false, true);
-  }
-  Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateSub(LHS, RHS, Name, true, false);
-  }
-  Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = 0) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFSub(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFSub(LHS, RHS),
-                               FPMathTag), Name);
-  }
-  Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateMul(LC, RC), Name);
-    return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name,
-                                   HasNUW, HasNSW);
-  }
-  Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateMul(LHS, RHS, Name, false, true);
-  }
-  Value *CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateMul(LHS, RHS, Name, true, false);
-  }
-  Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = 0) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFMul(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFMul(LHS, RHS),
-                               FPMathTag), Name);
-  }
-  Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
-    if (!isExact)
-      return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
-    return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name);
-  }
-  Value *CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateUDiv(LHS, RHS, Name, true);
-  }
-  Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
-    if (!isExact)
-      return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
-    return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
-  }
-  Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateSDiv(LHS, RHS, Name, true);
-  }
-  Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = 0) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFDiv(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFDiv(LHS, RHS),
-                               FPMathTag), Name);
-  }
-  Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateURem(LC, RC), Name);
-    return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
-  }
-  Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateSRem(LC, RC), Name);
-    return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
-  }
-  Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "",
-                    MDNode *FPMathTag = 0) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFRem(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFRem(LHS, RHS),
-                               FPMathTag), Name);
-  }
-
-  Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
-    return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name,
-                                   HasNUW, HasNSW);
-  }
-  Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
-                     HasNUW, HasNSW);
-  }
-  Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
-                     HasNUW, HasNSW);
-  }
-
-  Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
-    if (!isExact)
-      return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
-    return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name);
-  }
-  Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
-  }
-  Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
-  }
-
-  Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
-    if (!isExact)
-      return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
-    return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name);
-  }
-  Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
-  }
-  Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
-                    bool isExact = false) {
-    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
-  }
-
-  Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *RC = dyn_cast<Constant>(RHS)) {
-      if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
-        return LHS;  // LHS & -1 -> LHS
-      if (Constant *LC = dyn_cast<Constant>(LHS))
-        return Insert(Folder.CreateAnd(LC, RC), Name);
-    }
-    return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
-  }
-  Value *CreateAnd(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
-  }
-  Value *CreateAnd(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
-  }
-
-  Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *RC = dyn_cast<Constant>(RHS)) {
-      if (RC->isNullValue())
-        return LHS;  // LHS | 0 -> LHS
-      if (Constant *LC = dyn_cast<Constant>(LHS))
-        return Insert(Folder.CreateOr(LC, RC), Name);
-    }
-    return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
-  }
-  Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
-  }
-  Value *CreateOr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
-  }
-
-  Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateXor(LC, RC), Name);
-    return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
-  }
-  Value *CreateXor(Value *LHS, const APInt &RHS, const Twine &Name = "") {
-    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
-  }
-  Value *CreateXor(Value *LHS, uint64_t RHS, const Twine &Name = "") {
-    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
-  }
-
-  Value *CreateBinOp(Instruction::BinaryOps Opc,
-                     Value *LHS, Value *RHS, const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
-    return Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
-  }
-
-  Value *CreateNeg(Value *V, const Twine &Name = "",
-                   bool HasNUW = false, bool HasNSW = false) {
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
-    BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
-    if (HasNUW) BO->setHasNoUnsignedWrap();
-    if (HasNSW) BO->setHasNoSignedWrap();
-    return BO;
-  }
-  Value *CreateNSWNeg(Value *V, const Twine &Name = "") {
-    return CreateNeg(V, Name, false, true);
-  }
-  Value *CreateNUWNeg(Value *V, const Twine &Name = "") {
-    return CreateNeg(V, Name, true, false);
-  }
-  Value *CreateFNeg(Value *V, const Twine &Name = "", MDNode *FPMathTag = 0) {
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateFNeg(VC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFNeg(V), FPMathTag), Name);
-  }
-  Value *CreateNot(Value *V, const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateNot(VC), Name);
-    return Insert(BinaryOperator::CreateNot(V), Name);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instruction creation methods: Memory Instructions
-  //===--------------------------------------------------------------------===//
-
-  AllocaInst *CreateAlloca(Type *Ty, Value *ArraySize = 0,
-                           const Twine &Name = "") {
-    return Insert(new AllocaInst(Ty, ArraySize), Name);
-  }
-  // Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
-  // converting the string to 'bool' for the isVolatile parameter.
-  LoadInst *CreateLoad(Value *Ptr, const char *Name) {
-    return Insert(new LoadInst(Ptr), Name);
-  }
-  LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
-    return Insert(new LoadInst(Ptr), Name);
-  }
-  LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
-    return Insert(new LoadInst(Ptr, 0, isVolatile), Name);
-  }
-  StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
-    return Insert(new StoreInst(Val, Ptr, isVolatile));
-  }
-  // Provided to resolve 'CreateAlignedLoad(Ptr, Align, "...")' correctly,
-  // instead of converting the string to 'bool' for the isVolatile parameter.
-  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name) {
-    LoadInst *LI = CreateLoad(Ptr, Name);
-    LI->setAlignment(Align);
-    return LI;
-  }
-  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align,
-                              const Twine &Name = "") {
-    LoadInst *LI = CreateLoad(Ptr, Name);
-    LI->setAlignment(Align);
-    return LI;
-  }
-  LoadInst *CreateAlignedLoad(Value *Ptr, unsigned Align, bool isVolatile,
-                              const Twine &Name = "") {
-    LoadInst *LI = CreateLoad(Ptr, isVolatile, Name);
-    LI->setAlignment(Align);
-    return LI;
-  }
-  StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align,
-                                bool isVolatile = false) {
-    StoreInst *SI = CreateStore(Val, Ptr, isVolatile);
-    SI->setAlignment(Align);
-    return SI;
-  }
-  FenceInst *CreateFence(AtomicOrdering Ordering,
-                         SynchronizationScope SynchScope = CrossThread) {
-    return Insert(new FenceInst(Context, Ordering, SynchScope));
-  }
-  AtomicCmpXchgInst *CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New,
-                                         AtomicOrdering Ordering,
-                               SynchronizationScope SynchScope = CrossThread) {
-    return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope));
-  }
-  AtomicRMWInst *CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val,
-                                 AtomicOrdering Ordering,
-                               SynchronizationScope SynchScope = CrossThread) {
-    return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SynchScope));
-  }
-  Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList,
-                   const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
-      // Every index must be constant.
-      size_t i, e;
-      for (i = 0, e = IdxList.size(); i != e; ++i)
-        if (!isa<Constant>(IdxList[i]))
-          break;
-      if (i == e)
-        return Insert(Folder.CreateGetElementPtr(PC, IdxList), Name);
-    }
-    return Insert(GetElementPtrInst::Create(Ptr, IdxList), Name);
-  }
-  Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
-                           const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
-      // Every index must be constant.
-      size_t i, e;
-      for (i = 0, e = IdxList.size(); i != e; ++i)
-        if (!isa<Constant>(IdxList[i]))
-          break;
-      if (i == e)
-        return Insert(Folder.CreateInBoundsGetElementPtr(PC, IdxList), Name);
-    }
-    return Insert(GetElementPtrInst::CreateInBounds(Ptr, IdxList), Name);
-  }
-  Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Insert(Folder.CreateGetElementPtr(PC, IC), Name);
-    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
-  }
-  Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Insert(Folder.CreateInBoundsGetElementPtr(PC, IC), Name);
-    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
-  }
-  Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
-    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idx), Name);
-
-    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
-  }
-  Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0,
-                                    const Twine &Name = "") {
-    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idx), Name);
-
-    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
-  }
-  Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
-                    const Twine &Name = "") {
-    Value *Idxs[] = {
-      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
-      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
-    };
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idxs), Name);
-
-    return Insert(GetElementPtrInst::Create(Ptr, Idxs), Name);
-  }
-  Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
-                                    const Twine &Name = "") {
-    Value *Idxs[] = {
-      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
-      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
-    };
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs), Name);
-
-    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs), Name);
-  }
-  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
-    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idx), Name);
-
-    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
-  }
-  Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
-                                    const Twine &Name = "") {
-    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idx), Name);
-
-    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
-  }
-  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
-                    const Twine &Name = "") {
-    Value *Idxs[] = {
-      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
-      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
-    };
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateGetElementPtr(PC, Idxs), Name);
-
-    return Insert(GetElementPtrInst::Create(Ptr, Idxs), Name);
-  }
-  Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
-                                    const Twine &Name = "") {
-    Value *Idxs[] = {
-      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
-      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
-    };
-
-    if (Constant *PC = dyn_cast<Constant>(Ptr))
-      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs), Name);
-
-    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs), Name);
-  }
-  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
-    return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name);
-  }
-
-  /// CreateGlobalStringPtr - Same as CreateGlobalString, but return a pointer
-  /// with "i8*" type instead of a pointer to array of i8.
-  Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") {
-    Value *gv = CreateGlobalString(Str, Name);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
-    Value *Args[] = { zero, zero };
-    return CreateInBoundsGEP(gv, Args, Name);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instruction creation methods: Cast/Conversion Operators
-  //===--------------------------------------------------------------------===//
-
-  Value *CreateTrunc(Value *V, Type *DestTy, const Twine &Name = "") {
-    return CreateCast(Instruction::Trunc, V, DestTy, Name);
-  }
-  Value *CreateZExt(Value *V, Type *DestTy, const Twine &Name = "") {
-    return CreateCast(Instruction::ZExt, V, DestTy, Name);
-  }
-  Value *CreateSExt(Value *V, Type *DestTy, const Twine &Name = "") {
-    return CreateCast(Instruction::SExt, V, DestTy, Name);
-  }
-  /// CreateZExtOrTrunc - Create a ZExt or Trunc from the integer value V to
-  /// DestTy. Return the value untouched if the type of V is already DestTy.
-  Value *CreateZExtOrTrunc(Value *V, IntegerType *DestTy,
-                           const Twine &Name = "") {
-    assert(isa<IntegerType>(V->getType()) && "Can only zero extend integers!");
-    IntegerType *IntTy = cast<IntegerType>(V->getType());
-    if (IntTy->getBitWidth() < DestTy->getBitWidth())
-      return CreateZExt(V, DestTy, Name);
-    if (IntTy->getBitWidth() > DestTy->getBitWidth())
-      return CreateTrunc(V, DestTy, Name);
-    return V;
-  }
-  /// CreateSExtOrTrunc - Create a SExt or Trunc from the integer value V to
-  /// DestTy. Return the value untouched if the type of V is already DestTy.
-  Value *CreateSExtOrTrunc(Value *V, IntegerType *DestTy,
-                           const Twine &Name = "") {
-    assert(isa<IntegerType>(V->getType()) && "Can only sign extend integers!");
-    IntegerType *IntTy = cast<IntegerType>(V->getType());
-    if (IntTy->getBitWidth() < DestTy->getBitWidth())
-      return CreateSExt(V, DestTy, Name);
-    if (IntTy->getBitWidth() > DestTy->getBitWidth())
-      return CreateTrunc(V, DestTy, Name);
-    return V;
-  }
-  Value *CreateFPToUI(Value *V, Type *DestTy, const Twine &Name = ""){
-    return CreateCast(Instruction::FPToUI, V, DestTy, Name);
-  }
-  Value *CreateFPToSI(Value *V, Type *DestTy, const Twine &Name = ""){
-    return CreateCast(Instruction::FPToSI, V, DestTy, Name);
-  }
-  Value *CreateUIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
-    return CreateCast(Instruction::UIToFP, V, DestTy, Name);
-  }
-  Value *CreateSIToFP(Value *V, Type *DestTy, const Twine &Name = ""){
-    return CreateCast(Instruction::SIToFP, V, DestTy, Name);
-  }
-  Value *CreateFPTrunc(Value *V, Type *DestTy,
-                       const Twine &Name = "") {
-    return CreateCast(Instruction::FPTrunc, V, DestTy, Name);
-  }
-  Value *CreateFPExt(Value *V, Type *DestTy, const Twine &Name = "") {
-    return CreateCast(Instruction::FPExt, V, DestTy, Name);
-  }
-  Value *CreatePtrToInt(Value *V, Type *DestTy,
-                        const Twine &Name = "") {
-    return CreateCast(Instruction::PtrToInt, V, DestTy, Name);
-  }
-  Value *CreateIntToPtr(Value *V, Type *DestTy,
-                        const Twine &Name = "") {
-    return CreateCast(Instruction::IntToPtr, V, DestTy, Name);
-  }
-  Value *CreateBitCast(Value *V, Type *DestTy,
-                       const Twine &Name = "") {
-    return CreateCast(Instruction::BitCast, V, DestTy, Name);
-  }
-  Value *CreateZExtOrBitCast(Value *V, Type *DestTy,
-                             const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
-    return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
-  }
-  Value *CreateSExtOrBitCast(Value *V, Type *DestTy,
-                             const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
-    return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
-  }
-  Value *CreateTruncOrBitCast(Value *V, Type *DestTy,
-                              const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
-    return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
-  }
-  Value *CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy,
-                    const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
-    return Insert(CastInst::Create(Op, V, DestTy), Name);
-  }
-  Value *CreatePointerCast(Value *V, Type *DestTy,
-                           const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
-    return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
-  }
-  Value *CreateIntCast(Value *V, Type *DestTy, bool isSigned,
-                       const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
-    return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
-  }
-private:
-  // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a compile time
-  // error, instead of converting the string to bool for the isSigned parameter.
-  Value *CreateIntCast(Value *, Type *, const char *) LLVM_DELETED_FUNCTION;
-public:
-  Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
-    if (V->getType() == DestTy)
-      return V;
-    if (Constant *VC = dyn_cast<Constant>(V))
-      return Insert(Folder.CreateFPCast(VC, DestTy), Name);
-    return Insert(CastInst::CreateFPCast(V, DestTy), Name);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instruction creation methods: Compare Instructions
-  //===--------------------------------------------------------------------===//
-
-  Value *CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_EQ, LHS, RHS, Name);
-  }
-  Value *CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_NE, LHS, RHS, Name);
-  }
-  Value *CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_UGT, LHS, RHS, Name);
-  }
-  Value *CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_UGE, LHS, RHS, Name);
-  }
-  Value *CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_ULT, LHS, RHS, Name);
-  }
-  Value *CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_ULE, LHS, RHS, Name);
-  }
-  Value *CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_SGT, LHS, RHS, Name);
-  }
-  Value *CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_SGE, LHS, RHS, Name);
-  }
-  Value *CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_SLT, LHS, RHS, Name);
-  }
-  Value *CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name);
-  }
-
-  Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name);
-  }
-  Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name);
-  }
-  Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name);
-  }
-  Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name);
-  }
-  Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name);
-  }
-  Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name);
-  }
-  Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name);
-  }
-  Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name);
-  }
-  Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name);
-  }
-  Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name);
-  }
-  Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name);
-  }
-  Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name);
-  }
-  Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name);
-  }
-  Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "") {
-    return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name);
-  }
-
-  Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
-                    const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateICmp(P, LC, RC), Name);
-    return Insert(new ICmpInst(P, LHS, RHS), Name);
-  }
-  Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
-                    const Twine &Name = "") {
-    if (Constant *LC = dyn_cast<Constant>(LHS))
-      if (Constant *RC = dyn_cast<Constant>(RHS))
-        return Insert(Folder.CreateFCmp(P, LC, RC), Name);
-    return Insert(new FCmpInst(P, LHS, RHS), Name);
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Instruction creation methods: Other Instructions
-  //===--------------------------------------------------------------------===//
-
-  PHINode *CreatePHI(Type *Ty, unsigned NumReservedValues,
-                     const Twine &Name = "") {
-    return Insert(PHINode::Create(Ty, NumReservedValues), Name);
-  }
-
-  CallInst *CreateCall(Value *Callee, const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee), Name);
-  }
-  CallInst *CreateCall(Value *Callee, Value *Arg, const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee, Arg), Name);
-  }
-  CallInst *CreateCall2(Value *Callee, Value *Arg1, Value *Arg2,
-                        const Twine &Name = "") {
-    Value *Args[] = { Arg1, Arg2 };
-    return Insert(CallInst::Create(Callee, Args), Name);
-  }
-  CallInst *CreateCall3(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
-                        const Twine &Name = "") {
-    Value *Args[] = { Arg1, Arg2, Arg3 };
-    return Insert(CallInst::Create(Callee, Args), Name);
-  }
-  CallInst *CreateCall4(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
-                        Value *Arg4, const Twine &Name = "") {
-    Value *Args[] = { Arg1, Arg2, Arg3, Arg4 };
-    return Insert(CallInst::Create(Callee, Args), Name);
-  }
-  CallInst *CreateCall5(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
-                        Value *Arg4, Value *Arg5, const Twine &Name = "") {
-    Value *Args[] = { Arg1, Arg2, Arg3, Arg4, Arg5 };
-    return Insert(CallInst::Create(Callee, Args), Name);
-  }
-
-  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
-                       const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee, Args), Name);
-  }
-
-  Value *CreateSelect(Value *C, Value *True, Value *False,
-                      const Twine &Name = "") {
-    if (Constant *CC = dyn_cast<Constant>(C))
-      if (Constant *TC = dyn_cast<Constant>(True))
-        if (Constant *FC = dyn_cast<Constant>(False))
-          return Insert(Folder.CreateSelect(CC, TC, FC), Name);
-    return Insert(SelectInst::Create(C, True, False), Name);
-  }
-
-  VAArgInst *CreateVAArg(Value *List, Type *Ty, const Twine &Name = "") {
-    return Insert(new VAArgInst(List, Ty), Name);
-  }
-
-  Value *CreateExtractElement(Value *Vec, Value *Idx,
-                              const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(Vec))
-      if (Constant *IC = dyn_cast<Constant>(Idx))
-        return Insert(Folder.CreateExtractElement(VC, IC), Name);
-    return Insert(ExtractElementInst::Create(Vec, Idx), Name);
-  }
-
-  Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
-                             const Twine &Name = "") {
-    if (Constant *VC = dyn_cast<Constant>(Vec))
-      if (Constant *NC = dyn_cast<Constant>(NewElt))
-        if (Constant *IC = dyn_cast<Constant>(Idx))
-          return Insert(Folder.CreateInsertElement(VC, NC, IC), Name);
-    return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name);
-  }
-
-  Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
-                             const Twine &Name = "") {
-    if (Constant *V1C = dyn_cast<Constant>(V1))
-      if (Constant *V2C = dyn_cast<Constant>(V2))
-        if (Constant *MC = dyn_cast<Constant>(Mask))
-          return Insert(Folder.CreateShuffleVector(V1C, V2C, MC), Name);
-    return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
-  }
-
-  Value *CreateExtractValue(Value *Agg,
-                            ArrayRef<unsigned> Idxs,
-                            const Twine &Name = "") {
-    if (Constant *AggC = dyn_cast<Constant>(Agg))
-      return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
-    return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
-  }
-
-  Value *CreateInsertValue(Value *Agg, Value *Val,
-                           ArrayRef<unsigned> Idxs,
-                           const Twine &Name = "") {
-    if (Constant *AggC = dyn_cast<Constant>(Agg))
-      if (Constant *ValC = dyn_cast<Constant>(Val))
-        return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);
-    return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name);
-  }
-
-  LandingPadInst *CreateLandingPad(Type *Ty, Value *PersFn, unsigned NumClauses,
-                                   const Twine &Name = "") {
-    return Insert(LandingPadInst::Create(Ty, PersFn, NumClauses, Name));
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Utility creation methods
-  //===--------------------------------------------------------------------===//
-
-  /// CreateIsNull - Return an i1 value testing if \p Arg is null.
-  Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
-    return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()),
-                        Name);
-  }
-
-  /// CreateIsNotNull - Return an i1 value testing if \p Arg is not null.
-  Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
-    return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()),
-                        Name);
-  }
-
-  /// CreatePtrDiff - Return the i64 difference between two pointer values,
-  /// dividing out the size of the pointed-to objects.  This is intended to
-  /// implement C-style pointer subtraction. As such, the pointers must be
-  /// appropriately aligned for their element types and pointing into the
-  /// same object.
-  Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") {
-    assert(LHS->getType() == RHS->getType() &&
-           "Pointer subtraction operand types must match!");
-    PointerType *ArgType = cast<PointerType>(LHS->getType());
-    Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
-    Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
-    Value *Difference = CreateSub(LHS_int, RHS_int);
-    return CreateExactSDiv(Difference,
-                           ConstantExpr::getSizeOf(ArgType->getElementType()),
-                           Name);
-  }
-};
-
-}
-
-#endif
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
new file mode 100644
index 000000000000..e2ae5f7164b2
--- /dev/null
+++ b/include/llvm/IRReader/IRReader.h
@@ -0,0 +1,55 @@
+//===---- llvm/IRReader/IRReader.h - Reader for LLVM IR files ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions for reading LLVM IR. They support both
+// Bitcode and Assembly, automatically detecting the input format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IRREADER_IRREADER_H
+#define LLVM_IRREADER_IRREADER_H
+
+#include <string>
+
+namespace llvm {
+
+class Module;
+class MemoryBuffer;
+class SMDiagnostic;
+class LLVMContext;
+
+/// If the given MemoryBuffer holds a bitcode image, return a Module for it
+/// which does lazy deserialization of function bodies.  Otherwise, attempt to
+/// parse it as LLVM Assembly and return a fully populated Module. This
+/// function *always* takes ownership of the given MemoryBuffer.
+Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+                        LLVMContext &Context);
+
+/// If the given file holds a bitcode image, return a Module
+/// for it which does lazy deserialization of function bodies.  Otherwise,
+/// attempt to parse it as LLVM Assembly and return a fully populated
+/// Module.
+Module *getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
+                            LLVMContext &Context);
+
+/// If the given MemoryBuffer holds a bitcode image, return a Module
+/// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
+/// a Module for it. This function *always* takes ownership of the given
+/// MemoryBuffer.
+Module *ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, LLVMContext &Context);
+
+/// If the given file holds a bitcode image, return a Module for it.
+/// Otherwise, attempt to parse it as LLVM Assembly and return a Module
+/// for it.
+Module *ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
+                    LLVMContext &Context);
+
+}
+
+#endif
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 8c164eb91984..9cc194b4248a 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -31,6 +31,10 @@ void initializeTransformUtils(PassRegistry&);
 /// ScalarOpts library.
 void initializeScalarOpts(PassRegistry&);
 
+/// initializeObjCARCOpts - Initialize all passes linked into the ObjCARCOpts
+/// library.
+void initializeObjCARCOpts(PassRegistry&);
+
 /// initializeVectorization - Initialize all passes linked into the
 /// Vectorize library.
 void initializeVectorization(PassRegistry&);
@@ -69,6 +73,7 @@ void initializeArgPromotionPass(PassRegistry&);
 void initializeBarrierNoopPass(PassRegistry&);
 void initializeBasicAliasAnalysisPass(PassRegistry&);
 void initializeBasicCallGraphPass(PassRegistry&);
+void initializeBasicTTIPass(PassRegistry&);
 void initializeBlockExtractorPassPass(PassRegistry&);
 void initializeBlockFrequencyInfoPass(PassRegistry&);
 void initializeBlockPlacementPass(PassRegistry&);
@@ -76,6 +81,8 @@ void initializeBoundsCheckingPass(PassRegistry&);
 void initializeBranchFolderPassPass(PassRegistry&);
 void initializeBranchProbabilityInfoPass(PassRegistry&);
 void initializeBreakCriticalEdgesPass(PassRegistry&);
+void initializeCallGraphPrinterPass(PassRegistry&);
+void initializeCallGraphViewerPass(PassRegistry&);
 void initializeCFGOnlyPrinterPass(PassRegistry&);
 void initializeCFGOnlyViewerPass(PassRegistry&);
 void initializeCFGPrinterPass(PassRegistry&);
@@ -84,7 +91,6 @@ void initializeCFGViewerPass(PassRegistry&);
 void initializeCalculateSpillWeightsPass(PassRegistry&);
 void initializeCallGraphAnalysisGroup(PassRegistry&);
 void initializeCodeGenPreparePass(PassRegistry&);
-void initializeCodePlacementOptPass(PassRegistry&);
 void initializeConstantMergePass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
 void initializeMachineCopyPropagationPass(PassRegistry&);
@@ -110,12 +116,13 @@ void initializeExpandPostRAPass(PassRegistry&);
 void initializePathProfilerPass(PassRegistry&);
 void initializeGCOVProfilerPass(PassRegistry&);
 void initializeAddressSanitizerPass(PassRegistry&);
+void initializeAddressSanitizerModulePass(PassRegistry&);
+void initializeMemorySanitizerPass(PassRegistry&);
 void initializeThreadSanitizerPass(PassRegistry&);
 void initializeEarlyCSEPass(PassRegistry&);
 void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeFindUsedTypesPass(PassRegistry&);
 void initializeFunctionAttrsPass(PassRegistry&);
-void initializeGCInfoDeleterPass(PassRegistry&);
 void initializeGCMachineCodeAnalysisPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
 void initializeGVNPass(PassRegistry&);
@@ -127,6 +134,7 @@ void initializeIPSCCPPass(PassRegistry&);
 void initializeIVUsersPass(PassRegistry&);
 void initializeIfConverterPass(PassRegistry&);
 void initializeIndVarSimplifyPass(PassRegistry&);
+void initializeInlineCostAnalysisPass(PassRegistry&);
 void initializeInstCombinerPass(PassRegistry&);
 void initializeInstCountPass(PassRegistry&);
 void initializeInstNamerPass(PassRegistry&);
@@ -172,7 +180,6 @@ void initializeMachineDominatorTreePass(PassRegistry&);
 void initializeMachinePostDominatorTreePass(PassRegistry&);
 void initializeMachineLICMPass(PassRegistry&);
 void initializeMachineLoopInfoPass(PassRegistry&);
-void initializeMachineLoopRangesPass(PassRegistry&);
 void initializeMachineModuleInfoPass(PassRegistry&);
 void initializeMachineSchedulerPass(PassRegistry&);
 void initializeMachineSinkingPass(PassRegistry&);
@@ -205,9 +212,9 @@ void initializePostDomViewerPass(PassRegistry&);
 void initializePostDominatorTreePass(PassRegistry&);
 void initializePostRASchedulerPass(PassRegistry&);
 void initializePreVerifierPass(PassRegistry&);
-void initializePrintDbgInfoPass(PassRegistry&);
 void initializePrintFunctionPassPass(PassRegistry&);
 void initializePrintModulePassPass(PassRegistry&);
+void initializePrintBasicBlockPassPass(PassRegistry&);
 void initializeProcessImplicitDefsPass(PassRegistry&);
 void initializeProfileEstimatorPassPass(PassRegistry&);
 void initializeProfileInfoAnalysisGroup(PassRegistry&);
@@ -249,7 +256,8 @@ void initializeTailCallElimPass(PassRegistry&);
 void initializeTailDuplicatePassPass(PassRegistry&);
 void initializeTargetPassConfigPass(PassRegistry&);
 void initializeDataLayoutPass(PassRegistry&);
-void initializeTargetTransformInfoPass(PassRegistry&);
+void initializeTargetTransformInfoAnalysisGroup(PassRegistry&);
+void initializeNoTTIPass(PassRegistry&);
 void initializeTargetLibraryInfoPass(PassRegistry&);
 void initializeTwoAddressInstructionPassPass(PassRegistry&);
 void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
deleted file mode 100644
index b5e0fd4effd6..000000000000
--- a/include/llvm/InlineAsm.h
+++ /dev/null
@@ -1,309 +0,0 @@
-//===-- llvm/InlineAsm.h - Class to represent inline asm strings-*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class represents the inline asm strings, which are Value*'s that are
-// used as the callee operand of call instructions.  InlineAsm's are uniqued
-// like constants, and created via InlineAsm::get(...).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INLINEASM_H
-#define LLVM_INLINEASM_H
-
-#include "llvm/Value.h"
-#include "llvm/ADT/StringRef.h"
-#include <vector>
-
-namespace llvm {
-
-class PointerType;
-class FunctionType;
-class Module;
-struct InlineAsmKeyType;
-template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
-         bool HasLargeKey>
-class ConstantUniqueMap;
-template<class ConstantClass, class TypeClass, class ValType>
-struct ConstantCreator;
-
-class InlineAsm : public Value {
-public:
-  enum AsmDialect {
-    AD_ATT,
-    AD_Intel
-  };
-
-private:
-  friend struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType>;
-  friend class ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&,
-                                 PointerType, InlineAsm, false>;
-
-  InlineAsm(const InlineAsm &) LLVM_DELETED_FUNCTION;
-  void operator=(const InlineAsm&) LLVM_DELETED_FUNCTION;
-
-  std::string AsmString, Constraints;
-  bool HasSideEffects;
-  bool IsAlignStack;
-  AsmDialect Dialect;
-
-  InlineAsm(PointerType *Ty, const std::string &AsmString,
-            const std::string &Constraints, bool hasSideEffects,
-            bool isAlignStack, AsmDialect asmDialect);
-  virtual ~InlineAsm();
-
-  /// When the ConstantUniqueMap merges two types and makes two InlineAsms
-  /// identical, it destroys one of them with this method.
-  void destroyConstant();
-public:
-
-  /// InlineAsm::get - Return the specified uniqued inline asm string.
-  ///
-  static InlineAsm *get(FunctionType *Ty, StringRef AsmString,
-                        StringRef Constraints, bool hasSideEffects,
-                        bool isAlignStack = false,
-                        AsmDialect asmDialect = AD_ATT);
-  
-  bool hasSideEffects() const { return HasSideEffects; }
-  bool isAlignStack() const { return IsAlignStack; }
-  AsmDialect getDialect() const { return Dialect; }
-
-  /// getType - InlineAsm's are always pointers.
-  ///
-  PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(Value::getType());
-  }
-  
-  /// getFunctionType - InlineAsm's are always pointers to functions.
-  ///
-  FunctionType *getFunctionType() const;
-  
-  const std::string &getAsmString() const { return AsmString; }
-  const std::string &getConstraintString() const { return Constraints; }
-
-  /// Verify - This static method can be used by the parser to check to see if
-  /// the specified constraint string is legal for the type.  This returns true
-  /// if legal, false if not.
-  ///
-  static bool Verify(FunctionType *Ty, StringRef Constraints);
-
-  // Constraint String Parsing 
-  enum ConstraintPrefix {
-    isInput,            // 'x'
-    isOutput,           // '=x'
-    isClobber           // '~x'
-  };
-  
-  typedef std::vector<std::string> ConstraintCodeVector;
-  
-  struct SubConstraintInfo {
-    /// MatchingInput - If this is not -1, this is an output constraint where an
-    /// input constraint is required to match it (e.g. "0").  The value is the
-    /// constraint number that matches this one (for example, if this is
-    /// constraint #0 and constraint #4 has the value "0", this will be 4).
-    signed char MatchingInput;
-    /// Code - The constraint code, either the register name (in braces) or the
-    /// constraint letter/number.
-    ConstraintCodeVector Codes;
-    /// Default constructor.
-    SubConstraintInfo() : MatchingInput(-1) {}
-  };
-
-  typedef std::vector<SubConstraintInfo> SubConstraintInfoVector;
-  struct ConstraintInfo;
-  typedef std::vector<ConstraintInfo> ConstraintInfoVector;
-  
-  struct ConstraintInfo {
-    /// Type - The basic type of the constraint: input/output/clobber
-    ///
-    ConstraintPrefix Type;
-    
-    /// isEarlyClobber - "&": output operand writes result before inputs are all
-    /// read.  This is only ever set for an output operand.
-    bool isEarlyClobber; 
-    
-    /// MatchingInput - If this is not -1, this is an output constraint where an
-    /// input constraint is required to match it (e.g. "0").  The value is the
-    /// constraint number that matches this one (for example, if this is
-    /// constraint #0 and constraint #4 has the value "0", this will be 4).
-    signed char MatchingInput;
-    
-    /// hasMatchingInput - Return true if this is an output constraint that has
-    /// a matching input constraint.
-    bool hasMatchingInput() const { return MatchingInput != -1; }
-    
-    /// isCommutative - This is set to true for a constraint that is commutative
-    /// with the next operand.
-    bool isCommutative;
-    
-    /// isIndirect - True if this operand is an indirect operand.  This means
-    /// that the address of the source or destination is present in the call
-    /// instruction, instead of it being returned or passed in explicitly.  This
-    /// is represented with a '*' in the asm string.
-    bool isIndirect;
-    
-    /// Code - The constraint code, either the register name (in braces) or the
-    /// constraint letter/number.
-    ConstraintCodeVector Codes;
-    
-    /// isMultipleAlternative - '|': has multiple-alternative constraints.
-    bool isMultipleAlternative;
-    
-    /// multipleAlternatives - If there are multiple alternative constraints,
-    /// this array will contain them.  Otherwise it will be empty.
-    SubConstraintInfoVector multipleAlternatives;
-    
-    /// The currently selected alternative constraint index.
-    unsigned currentAlternativeIndex;
-    
-    ///Default constructor.
-    ConstraintInfo();
-    
-    /// Copy constructor.
-    ConstraintInfo(const ConstraintInfo &other);
-    
-    /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
-    /// fields in this structure.  If the constraint string is not understood,
-    /// return true, otherwise return false.
-    bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar);
-               
-    /// selectAlternative - Point this constraint to the alternative constraint
-    /// indicated by the index.
-    void selectAlternative(unsigned index);
-  };
-  
-  /// ParseConstraints - Split up the constraint string into the specific
-  /// constraints and their prefixes.  If this returns an empty vector, and if
-  /// the constraint string itself isn't empty, there was an error parsing.
-  static ConstraintInfoVector ParseConstraints(StringRef ConstraintString);
-  
-  /// ParseConstraints - Parse the constraints of this inlineasm object, 
-  /// returning them the same way that ParseConstraints(str) does.
-  ConstraintInfoVector ParseConstraints() const {
-    return ParseConstraints(Constraints);
-  }
-  
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() == Value::InlineAsmVal;
-  }
-
-  
-  // These are helper methods for dealing with flags in the INLINEASM SDNode
-  // in the backend.
-  
-  enum {
-    // Fixed operands on an INLINEASM SDNode.
-    Op_InputChain = 0,
-    Op_AsmString = 1,
-    Op_MDNode = 2,
-    Op_ExtraInfo = 3,    // HasSideEffects, IsAlignStack, AsmDialect.
-    Op_FirstOperand = 4,
-
-    // Fixed operands on an INLINEASM MachineInstr.
-    MIOp_AsmString = 0,
-    MIOp_ExtraInfo = 1,    // HasSideEffects, IsAlignStack, AsmDialect.
-    MIOp_FirstOperand = 2,
-
-    // Interpretation of the MIOp_ExtraInfo bit field.
-    Extra_HasSideEffects = 1,
-    Extra_IsAlignStack = 2,
-    Extra_AsmDialect = 4,
-    Extra_MayLoad = 8,
-    Extra_MayStore = 16,
-
-    // Inline asm operands map to multiple SDNode / MachineInstr operands.
-    // The first operand is an immediate describing the asm operand, the low
-    // bits is the kind:
-    Kind_RegUse = 1,             // Input register, "r".
-    Kind_RegDef = 2,             // Output register, "=r".
-    Kind_RegDefEarlyClobber = 3, // Early-clobber output register, "=&r".
-    Kind_Clobber = 4,            // Clobbered register, "~r".
-    Kind_Imm = 5,                // Immediate.
-    Kind_Mem = 6,                // Memory operand, "m".
-
-    Flag_MatchingOperand = 0x80000000
-  };
-  
-  static unsigned getFlagWord(unsigned Kind, unsigned NumOps) {
-    assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!");
-    assert(Kind >= Kind_RegUse && Kind <= Kind_Mem && "Invalid Kind");
-    return Kind | (NumOps << 3);
-  }
-  
-  /// getFlagWordForMatchingOp - Augment an existing flag word returned by
-  /// getFlagWord with information indicating that this input operand is tied 
-  /// to a previous output operand.
-  static unsigned getFlagWordForMatchingOp(unsigned InputFlag,
-                                           unsigned MatchedOperandNo) {
-    assert(MatchedOperandNo <= 0x7fff && "Too big matched operand");
-    assert((InputFlag & ~0xffff) == 0 && "High bits already contain data");
-    return InputFlag | Flag_MatchingOperand | (MatchedOperandNo << 16);
-  }
-
-  /// getFlagWordForRegClass - Augment an existing flag word returned by
-  /// getFlagWord with the required register class for the following register
-  /// operands.
-  /// A tied use operand cannot have a register class, use the register class
-  /// from the def operand instead.
-  static unsigned getFlagWordForRegClass(unsigned InputFlag, unsigned RC) {
-    // Store RC + 1, reserve the value 0 to mean 'no register class'.
-    ++RC;
-    assert(RC <= 0x7fff && "Too large register class ID");
-    assert((InputFlag & ~0xffff) == 0 && "High bits already contain data");
-    return InputFlag | (RC << 16);
-  }
-
-  static unsigned getKind(unsigned Flags) {
-    return Flags & 7;
-  }
-
-  static bool isRegDefKind(unsigned Flag){ return getKind(Flag) == Kind_RegDef;}
-  static bool isImmKind(unsigned Flag) { return getKind(Flag) == Kind_Imm; }
-  static bool isMemKind(unsigned Flag) { return getKind(Flag) == Kind_Mem; }
-  static bool isRegDefEarlyClobberKind(unsigned Flag) {
-    return getKind(Flag) == Kind_RegDefEarlyClobber;
-  }
-  static bool isClobberKind(unsigned Flag) {
-    return getKind(Flag) == Kind_Clobber;
-  }
-
-  /// getNumOperandRegisters - Extract the number of registers field from the
-  /// inline asm operand flag.
-  static unsigned getNumOperandRegisters(unsigned Flag) {
-    return (Flag & 0xffff) >> 3;
-  }
-
-  /// isUseOperandTiedToDef - Return true if the flag of the inline asm
-  /// operand indicates it is an use operand that's matched to a def operand.
-  static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) {
-    if ((Flag & Flag_MatchingOperand) == 0)
-      return false;
-    Idx = (Flag & ~Flag_MatchingOperand) >> 16;
-    return true;
-  }
-
-  /// hasRegClassConstraint - Returns true if the flag contains a register
-  /// class constraint.  Sets RC to the register class ID.
-  static bool hasRegClassConstraint(unsigned Flag, unsigned &RC) {
-    if (Flag & Flag_MatchingOperand)
-      return false;
-    unsigned High = Flag >> 16;
-    // getFlagWordForRegClass() uses 0 to mean no register class, and otherwise
-    // stores RC + 1.
-    if (!High)
-      return false;
-    RC = High - 1;
-    return true;
-  }
-
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/InstVisitor.h b/include/llvm/InstVisitor.h
new file mode 100644
index 000000000000..291170334c0a
--- /dev/null
+++ b/include/llvm/InstVisitor.h
@@ -0,0 +1,288 @@
+//===- llvm/InstVisitor.h - Instruction visitor templates -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_INSTVISITOR_H
+#define LLVM_INSTVISITOR_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// We operate on opaque instruction classes, so forward declare all instruction
+// types now...
+//
+#define HANDLE_INST(NUM, OPCODE, CLASS)   class CLASS;
+#include "llvm/IR/Instruction.def"
+
+#define DELEGATE(CLASS_TO_VISIT) \
+  return static_cast<SubClass*>(this)-> \
+               visit##CLASS_TO_VISIT(static_cast<CLASS_TO_VISIT&>(I))
+
+
+/// @brief Base class for instruction visitors
+///
+/// Instruction visitors are used when you want to perform different actions
+/// for different kinds of instructions without having to use lots of casts
+/// and a big switch statement (in your code, that is).
+///
+/// To define your own visitor, inherit from this class, specifying your
+/// new type for the 'SubClass' template parameter, and "override" visitXXX
+/// functions in your class. I say "override" because this class is defined
+/// in terms of statically resolved overloading, not virtual functions.
+///
+/// For example, here is a visitor that counts the number of malloc
+/// instructions processed:
+///
+///  /// Declare the class.  Note that we derive from InstVisitor instantiated
+///  /// with _our new subclasses_ type.
+///  ///
+///  struct CountAllocaVisitor : public InstVisitor<CountAllocaVisitor> {
+///    unsigned Count;
+///    CountAllocaVisitor() : Count(0) {}
+///
+///    void visitAllocaInst(AllocaInst &AI) { ++Count; }
+///  };
+///
+///  And this class would be used like this:
+///    CountAllocaVisitor CAV;
+///    CAV.visit(function);
+///    NumAllocas = CAV.Count;
+///
+/// The defined has 'visit' methods for Instruction, and also for BasicBlock,
+/// Function, and Module, which recursively process all contained instructions.
+///
+/// Note that if you don't implement visitXXX for some instruction type,
+/// the visitXXX method for instruction superclass will be invoked. So
+/// if instructions are added in the future, they will be automatically
+/// supported, if you handle one of their superclasses.
+///
+/// The optional second template argument specifies the type that instruction
+/// visitation functions should return. If you specify this, you *MUST* provide
+/// an implementation of visitInstruction though!.
+///
+/// Note that this class is specifically designed as a template to avoid
+/// virtual function call overhead.  Defining and using an InstVisitor is just
+/// as efficient as having your own switch statement over the instruction
+/// opcode.
+template<typename SubClass, typename RetTy=void>
+class InstVisitor {
+  //===--------------------------------------------------------------------===//
+  // Interface code - This is the public interface of the InstVisitor that you
+  // use to visit instructions...
+  //
+
+public:
+  // Generic visit method - Allow visitation to all instructions in a range
+  template<class Iterator>
+  void visit(Iterator Start, Iterator End) {
+    while (Start != End)
+      static_cast<SubClass*>(this)->visit(*Start++);
+  }
+
+  // Define visitors for functions and basic blocks...
+  //
+  void visit(Module &M) {
+    static_cast<SubClass*>(this)->visitModule(M);
+    visit(M.begin(), M.end());
+  }
+  void visit(Function &F) {
+    static_cast<SubClass*>(this)->visitFunction(F);
+    visit(F.begin(), F.end());
+  }
+  void visit(BasicBlock &BB) {
+    static_cast<SubClass*>(this)->visitBasicBlock(BB);
+    visit(BB.begin(), BB.end());
+  }
+
+  // Forwarding functions so that the user can visit with pointers AND refs.
+  void visit(Module       *M)  { visit(*M); }
+  void visit(Function     *F)  { visit(*F); }
+  void visit(BasicBlock   *BB) { visit(*BB); }
+  RetTy visit(Instruction *I)  { return visit(*I); }
+
+  // visit - Finally, code to visit an instruction...
+  //
+  RetTy visit(Instruction &I) {
+    switch (I.getOpcode()) {
+    default: llvm_unreachable("Unknown instruction type encountered!");
+      // Build the switch statement using the Instruction.def file...
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: return \
+           static_cast<SubClass*>(this)-> \
+                      visit##OPCODE(static_cast<CLASS&>(I));
+#include "llvm/IR/Instruction.def"
+    }
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Visitation functions... these functions provide default fallbacks in case
+  // the user does not specify what to do for a particular instruction type.
+  // The default behavior is to generalize the instruction type to its subtype
+  // and try visiting the subtype.  All of this should be inlined perfectly,
+  // because there are no virtual functions to get in the way.
+  //
+
+  // When visiting a module, function or basic block directly, these methods get
+  // called to indicate when transitioning into a new unit.
+  //
+  void visitModule    (Module &M) {}
+  void visitFunction  (Function &F) {}
+  void visitBasicBlock(BasicBlock &BB) {}
+
+  // Define instruction specific visitor functions that can be overridden to
+  // handle SPECIFIC instructions.  These functions automatically define
+  // visitMul to proxy to visitBinaryOperator for instance in case the user does
+  // not need this generality.
+  //
+  // These functions can also implement fan-out, when a single opcode and
+  // instruction have multiple more specific Instruction subclasses. The Call
+  // instruction currently supports this. We implement that by redirecting that
+  // instruction to a special delegation helper.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    RetTy visit##OPCODE(CLASS &I) { \
+      if (NUM == Instruction::Call) \
+        return delegateCallInst(I); \
+      else \
+        DELEGATE(CLASS); \
+    }
+#include "llvm/IR/Instruction.def"
+
+  // Specific Instruction type classes... note that all of the casts are
+  // necessary because we use the instruction classes as opaque types...
+  //
+  RetTy visitReturnInst(ReturnInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitBranchInst(BranchInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitSwitchInst(SwitchInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitIndirectBrInst(IndirectBrInst &I)    { DELEGATE(TerminatorInst);}
+  RetTy visitResumeInst(ResumeInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitUnreachableInst(UnreachableInst &I)  { DELEGATE(TerminatorInst);}
+  RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
+  RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
+  RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitLoadInst(LoadInst     &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitStoreInst(StoreInst   &I)            { DELEGATE(Instruction);}
+  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);}
+  RetTy visitAtomicRMWInst(AtomicRMWInst &I)      { DELEGATE(Instruction);}
+  RetTy visitFenceInst(FenceInst   &I)            { DELEGATE(Instruction);}
+  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction);}
+  RetTy visitPHINode(PHINode       &I)            { DELEGATE(Instruction);}
+  RetTy visitTruncInst(TruncInst &I)              { DELEGATE(CastInst);}
+  RetTy visitZExtInst(ZExtInst &I)                { DELEGATE(CastInst);}
+  RetTy visitSExtInst(SExtInst &I)                { DELEGATE(CastInst);}
+  RetTy visitFPTruncInst(FPTruncInst &I)          { DELEGATE(CastInst);}
+  RetTy visitFPExtInst(FPExtInst &I)              { DELEGATE(CastInst);}
+  RetTy visitFPToUIInst(FPToUIInst &I)            { DELEGATE(CastInst);}
+  RetTy visitFPToSIInst(FPToSIInst &I)            { DELEGATE(CastInst);}
+  RetTy visitUIToFPInst(UIToFPInst &I)            { DELEGATE(CastInst);}
+  RetTy visitSIToFPInst(SIToFPInst &I)            { DELEGATE(CastInst);}
+  RetTy visitPtrToIntInst(PtrToIntInst &I)        { DELEGATE(CastInst);}
+  RetTy visitIntToPtrInst(IntToPtrInst &I)        { DELEGATE(CastInst);}
+  RetTy visitBitCastInst(BitCastInst &I)          { DELEGATE(CastInst);}
+  RetTy visitSelectInst(SelectInst &I)            { DELEGATE(Instruction);}
+  RetTy visitVAArgInst(VAArgInst   &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitExtractElementInst(ExtractElementInst &I) { DELEGATE(Instruction);}
+  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction);}
+  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction);}
+  RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);}
+  RetTy visitInsertValueInst(InsertValueInst &I)  { DELEGATE(Instruction); }
+  RetTy visitLandingPadInst(LandingPadInst &I)    { DELEGATE(Instruction); }
+
+  // Handle the special instrinsic instruction classes.
+  RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgInfoIntrinsic);}
+  RetTy visitDbgValueInst(DbgValueInst &I)        { DELEGATE(DbgInfoIntrinsic);}
+  RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { DELEGATE(IntrinsicInst); }
+  RetTy visitMemSetInst(MemSetInst &I)            { DELEGATE(MemIntrinsic); }
+  RetTy visitMemCpyInst(MemCpyInst &I)            { DELEGATE(MemTransferInst); }
+  RetTy visitMemMoveInst(MemMoveInst &I)          { DELEGATE(MemTransferInst); }
+  RetTy visitMemTransferInst(MemTransferInst &I)  { DELEGATE(MemIntrinsic); }
+  RetTy visitMemIntrinsic(MemIntrinsic &I)        { DELEGATE(IntrinsicInst); }
+  RetTy visitVAStartInst(VAStartInst &I)          { DELEGATE(IntrinsicInst); }
+  RetTy visitVAEndInst(VAEndInst &I)              { DELEGATE(IntrinsicInst); }
+  RetTy visitVACopyInst(VACopyInst &I)            { DELEGATE(IntrinsicInst); }
+  RetTy visitIntrinsicInst(IntrinsicInst &I)      { DELEGATE(CallInst); }
+
+  // Call and Invoke are slightly different as they delegate first through
+  // a generic CallSite visitor.
+  RetTy visitCallInst(CallInst &I) {
+    return static_cast<SubClass*>(this)->visitCallSite(&I);
+  }
+  RetTy visitInvokeInst(InvokeInst &I) {
+    return static_cast<SubClass*>(this)->visitCallSite(&I);
+  }
+
+  // Next level propagators: If the user does not overload a specific
+  // instruction type, they can overload one of these to get the whole class
+  // of instructions...
+  //
+  RetTy visitCastInst(CastInst &I)                { DELEGATE(UnaryInstruction);}
+  RetTy visitBinaryOperator(BinaryOperator &I)    { DELEGATE(Instruction);}
+  RetTy visitCmpInst(CmpInst &I)                  { DELEGATE(Instruction);}
+  RetTy visitTerminatorInst(TerminatorInst &I)    { DELEGATE(Instruction);}
+  RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);}
+
+  // Provide a special visitor for a 'callsite' that visits both calls and
+  // invokes. When unimplemented, properly delegates to either the terminator or
+  // regular instruction visitor.
+  RetTy visitCallSite(CallSite CS) {
+    assert(CS);
+    Instruction &I = *CS.getInstruction();
+    if (CS.isCall())
+      DELEGATE(Instruction);
+
+    assert(CS.isInvoke());
+    DELEGATE(TerminatorInst);
+  }
+
+  // If the user wants a 'default' case, they can choose to override this
+  // function.  If this function is not overloaded in the user's subclass, then
+  // this instruction just gets ignored.
+  //
+  // Note that you MUST override this function if your return type is not void.
+  //
+  void visitInstruction(Instruction &I) {}  // Ignore unhandled instructions
+
+private:
+  // Special helper function to delegate to CallInst subclass visitors.
+  RetTy delegateCallInst(CallInst &I) {
+    if (const Function *F = I.getCalledFunction()) {
+      switch ((Intrinsic::ID)F->getIntrinsicID()) {
+      default:                     DELEGATE(IntrinsicInst);
+      case Intrinsic::dbg_declare: DELEGATE(DbgDeclareInst);
+      case Intrinsic::dbg_value:   DELEGATE(DbgValueInst);
+      case Intrinsic::memcpy:      DELEGATE(MemCpyInst);
+      case Intrinsic::memmove:     DELEGATE(MemMoveInst);
+      case Intrinsic::memset:      DELEGATE(MemSetInst);
+      case Intrinsic::vastart:     DELEGATE(VAStartInst);
+      case Intrinsic::vaend:       DELEGATE(VAEndInst);
+      case Intrinsic::vacopy:      DELEGATE(VACopyInst);
+      case Intrinsic::not_intrinsic: break;
+      }
+    }
+    DELEGATE(CallInst);
+  }
+
+  // An overload that will never actually be called, it is used only from dead
+  // code in the dispatching from opcodes to instruction subclasses.
+  RetTy delegateCallInst(Instruction &I) {
+    llvm_unreachable("delegateCallInst called for non-CallInst");
+  }
+};
+
+#undef DELEGATE
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
deleted file mode 100644
index da17f3b80d7b..000000000000
--- a/include/llvm/InstrTypes.h
+++ /dev/null
@@ -1,851 +0,0 @@
-//===-- llvm/InstrTypes.h - Important Instruction subclasses ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines various meta classes of instructions that exist in the VM
-// representation.  Specific concrete subclasses of these may be found in the
-// i*.h files...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INSTRUCTION_TYPES_H
-#define LLVM_INSTRUCTION_TYPES_H
-
-#include "llvm/Instruction.h"
-#include "llvm/OperandTraits.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/Twine.h"
-
-namespace llvm {
-
-class LLVMContext;
-
-//===----------------------------------------------------------------------===//
-//                            TerminatorInst Class
-//===----------------------------------------------------------------------===//
-
-/// TerminatorInst - Subclasses of this class are all able to terminate a basic
-/// block.  Thus, these are all the flow control type of operations.
-///
-class TerminatorInst : public Instruction {
-protected:
-  TerminatorInst(Type *Ty, Instruction::TermOps iType,
-                 Use *Ops, unsigned NumOps,
-                 Instruction *InsertBefore = 0)
-    : Instruction(Ty, iType, Ops, NumOps, InsertBefore) {}
-
-  TerminatorInst(Type *Ty, Instruction::TermOps iType,
-                 Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd)
-    : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {}
-
-  // Out of line virtual method, so the vtable, etc has a home.
-  ~TerminatorInst();
-
-  /// Virtual methods - Terminators should overload these and provide inline
-  /// overrides of non-V methods.
-  virtual BasicBlock *getSuccessorV(unsigned idx) const = 0;
-  virtual unsigned getNumSuccessorsV() const = 0;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
-  virtual TerminatorInst *clone_impl() const = 0;
-public:
-
-  /// getNumSuccessors - Return the number of successors that this terminator
-  /// has.
-  unsigned getNumSuccessors() const {
-    return getNumSuccessorsV();
-  }
-
-  /// getSuccessor - Return the specified successor.
-  ///
-  BasicBlock *getSuccessor(unsigned idx) const {
-    return getSuccessorV(idx);
-  }
-
-  /// setSuccessor - Update the specified successor to point at the provided
-  /// block.
-  void setSuccessor(unsigned idx, BasicBlock *B) {
-    setSuccessorV(idx, B);
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->isTerminator();
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
-//                          UnaryInstruction Class
-//===----------------------------------------------------------------------===//
-
-class UnaryInstruction : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-
-protected:
-  UnaryInstruction(Type *Ty, unsigned iType, Value *V,
-                   Instruction *IB = 0)
-    : Instruction(Ty, iType, &Op<0>(), 1, IB) {
-    Op<0>() = V;
-  }
-  UnaryInstruction(Type *Ty, unsigned iType, Value *V, BasicBlock *IAE)
-    : Instruction(Ty, iType, &Op<0>(), 1, IAE) {
-    Op<0>() = V;
-  }
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-
-  // Out of line virtual method, so the vtable, etc has a home.
-  ~UnaryInstruction();
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Alloca ||
-           I->getOpcode() == Instruction::Load ||
-           I->getOpcode() == Instruction::VAArg ||
-           I->getOpcode() == Instruction::ExtractValue ||
-           (I->getOpcode() >= CastOpsBegin && I->getOpcode() < CastOpsEnd);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<UnaryInstruction> :
-  public FixedNumOperandTraits<UnaryInstruction, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
-
-//===----------------------------------------------------------------------===//
-//                           BinaryOperator Class
-//===----------------------------------------------------------------------===//
-
-class BinaryOperator : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-protected:
-  void init(BinaryOps iType);
-  BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
-                 const Twine &Name, Instruction *InsertBefore);
-  BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
-                 const Twine &Name, BasicBlock *InsertAtEnd);
-  virtual BinaryOperator *clone_impl() const LLVM_OVERRIDE;
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// Create() - Construct a binary instruction, given the opcode and the two
-  /// operands.  Optionally (if InstBefore is specified) insert the instruction
-  /// into a BasicBlock right before the specified instruction.  The specified
-  /// Instruction is allowed to be a dereferenced end iterator.
-  ///
-  static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
-                                const Twine &Name = Twine(),
-                                Instruction *InsertBefore = 0);
-
-  /// Create() - Construct a binary instruction, given the opcode and the two
-  /// operands.  Also automatically insert this instruction to the end of the
-  /// BasicBlock specified.
-  ///
-  static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
-                                const Twine &Name, BasicBlock *InsertAtEnd);
-
-  /// Create* - These methods just forward to Create, and are useful when you
-  /// statically know what type of instruction you're going to create.  These
-  /// helpers just save some typing.
-#define HANDLE_BINARY_INST(N, OPC, CLASS) \
-  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
-                                     const Twine &Name = "") {\
-    return Create(Instruction::OPC, V1, V2, Name);\
-  }
-#include "llvm/Instruction.def"
-#define HANDLE_BINARY_INST(N, OPC, CLASS) \
-  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
-                                     const Twine &Name, BasicBlock *BB) {\
-    return Create(Instruction::OPC, V1, V2, Name, BB);\
-  }
-#include "llvm/Instruction.def"
-#define HANDLE_BINARY_INST(N, OPC, CLASS) \
-  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
-                                     const Twine &Name, Instruction *I) {\
-    return Create(Instruction::OPC, V1, V2, Name, I);\
-  }
-#include "llvm/Instruction.def"
-
-  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
-                                   const Twine &Name = "") {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
-                                   const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
-                                   const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
-    BO->setHasNoSignedWrap(true);
-    return BO;
-  }
-  
-  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
-                                   const Twine &Name = "") {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
-                                   const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
-                                   const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
-    BO->setHasNoUnsignedWrap(true);
-    return BO;
-  }
-  
-  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
-                                     const Twine &Name = "") {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name);
-    BO->setIsExact(true);
-    return BO;
-  }
-  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
-                                     const Twine &Name, BasicBlock *BB) {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
-    BO->setIsExact(true);
-    return BO;
-  }
-  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
-                                     const Twine &Name, Instruction *I) {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
-    BO->setIsExact(true);
-    return BO;
-  }
-  
-#define DEFINE_HELPERS(OPC, NUWNSWEXACT)                                     \
-  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
-           (Value *V1, Value *V2, const Twine &Name = "") {                  \
-    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name);            \
-  }                                                                          \
-  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
-           (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) {       \
-    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB);        \
-  }                                                                          \
-  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
-           (Value *V1, Value *V2, const Twine &Name, Instruction *I) {       \
-    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I);         \
-  }
-  
-  DEFINE_HELPERS(Add, NSW)  // CreateNSWAdd
-  DEFINE_HELPERS(Add, NUW)  // CreateNUWAdd
-  DEFINE_HELPERS(Sub, NSW)  // CreateNSWSub
-  DEFINE_HELPERS(Sub, NUW)  // CreateNUWSub
-  DEFINE_HELPERS(Mul, NSW)  // CreateNSWMul
-  DEFINE_HELPERS(Mul, NUW)  // CreateNUWMul
-  DEFINE_HELPERS(Shl, NSW)  // CreateNSWShl
-  DEFINE_HELPERS(Shl, NUW)  // CreateNUWShl
-
-  DEFINE_HELPERS(SDiv, Exact)  // CreateExactSDiv
-  DEFINE_HELPERS(UDiv, Exact)  // CreateExactUDiv
-  DEFINE_HELPERS(AShr, Exact)  // CreateExactAShr
-  DEFINE_HELPERS(LShr, Exact)  // CreateExactLShr
-
-#undef DEFINE_HELPERS
-  
-  /// Helper functions to construct and inspect unary operations (NEG and NOT)
-  /// via binary operators SUB and XOR:
-  ///
-  /// CreateNeg, CreateNot - Create the NEG and NOT
-  ///     instructions out of SUB and XOR instructions.
-  ///
-  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name = "",
-                                   Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name,
-                                   BasicBlock *InsertAtEnd);
-  static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name = "",
-                                      Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name,
-                                      BasicBlock *InsertAtEnd);
-  static BinaryOperator *CreateNUWNeg(Value *Op, const Twine &Name = "",
-                                      Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateNUWNeg(Value *Op, const Twine &Name,
-                                      BasicBlock *InsertAtEnd);
-  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name = "",
-                                    Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name,
-                                    BasicBlock *InsertAtEnd);
-  static BinaryOperator *CreateNot(Value *Op, const Twine &Name = "",
-                                   Instruction *InsertBefore = 0);
-  static BinaryOperator *CreateNot(Value *Op, const Twine &Name,
-                                   BasicBlock *InsertAtEnd);
-
-  /// isNeg, isFNeg, isNot - Check if the given Value is a
-  /// NEG, FNeg, or NOT instruction.
-  ///
-  static bool isNeg(const Value *V);
-  static bool isFNeg(const Value *V);
-  static bool isNot(const Value *V);
-
-  /// getNegArgument, getNotArgument - Helper functions to extract the
-  ///     unary argument of a NEG, FNEG or NOT operation implemented via
-  ///     Sub, FSub, or Xor.
-  ///
-  static const Value *getNegArgument(const Value *BinOp);
-  static       Value *getNegArgument(      Value *BinOp);
-  static const Value *getFNegArgument(const Value *BinOp);
-  static       Value *getFNegArgument(      Value *BinOp);
-  static const Value *getNotArgument(const Value *BinOp);
-  static       Value *getNotArgument(      Value *BinOp);
-
-  BinaryOps getOpcode() const {
-    return static_cast<BinaryOps>(Instruction::getOpcode());
-  }
-
-  /// swapOperands - Exchange the two operands to this instruction.
-  /// This instruction is safe to use on any binary instruction and
-  /// does not modify the semantics of the instruction.  If the instruction
-  /// cannot be reversed (ie, it's a Div), then return true.
-  ///
-  bool swapOperands();
-
-  /// setHasNoUnsignedWrap - Set or clear the nsw flag on this instruction,
-  /// which must be an operator which supports this flag. See LangRef.html
-  /// for the meaning of this flag.
-  void setHasNoUnsignedWrap(bool b = true);
-
-  /// setHasNoSignedWrap - Set or clear the nsw flag on this instruction,
-  /// which must be an operator which supports this flag. See LangRef.html
-  /// for the meaning of this flag.
-  void setHasNoSignedWrap(bool b = true);
-
-  /// setIsExact - Set or clear the exact flag on this instruction,
-  /// which must be an operator which supports this flag. See LangRef.html
-  /// for the meaning of this flag.
-  void setIsExact(bool b = true);
-
-  /// hasNoUnsignedWrap - Determine whether the no unsigned wrap flag is set.
-  bool hasNoUnsignedWrap() const;
-
-  /// hasNoSignedWrap - Determine whether the no signed wrap flag is set.
-  bool hasNoSignedWrap() const;
-
-  /// isExact - Determine whether the exact flag is set.
-  bool isExact() const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->isBinaryOp();
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<BinaryOperator> :
-  public FixedNumOperandTraits<BinaryOperator, 2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
-
-//===----------------------------------------------------------------------===//
-//                               CastInst Class
-//===----------------------------------------------------------------------===//
-
-/// CastInst - This is the base class for all instructions that perform data
-/// casts. It is simply provided so that instruction category testing
-/// can be performed with code like:
-///
-/// if (isa<CastInst>(Instr)) { ... }
-/// @brief Base class of casting instructions.
-class CastInst : public UnaryInstruction {
-  virtual void anchor() LLVM_OVERRIDE;
-protected:
-  /// @brief Constructor with insert-before-instruction semantics for subclasses
-  CastInst(Type *Ty, unsigned iType, Value *S,
-           const Twine &NameStr = "", Instruction *InsertBefore = 0)
-    : UnaryInstruction(Ty, iType, S, InsertBefore) {
-    setName(NameStr);
-  }
-  /// @brief Constructor with insert-at-end-of-block semantics for subclasses
-  CastInst(Type *Ty, unsigned iType, Value *S,
-           const Twine &NameStr, BasicBlock *InsertAtEnd)
-    : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
-    setName(NameStr);
-  }
-public:
-  /// Provides a way to construct any of the CastInst subclasses using an
-  /// opcode instead of the subclass's constructor. The opcode must be in the
-  /// CastOps category (Instruction::isCast(opcode) returns true). This
-  /// constructor has insert-before-instruction semantics to automatically
-  /// insert the new CastInst before InsertBefore (if it is non-null).
-  /// @brief Construct any of the CastInst subclasses
-  static CastInst *Create(
-    Instruction::CastOps,    ///< The opcode of the cast instruction
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which cast should be made
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-  /// Provides a way to construct any of the CastInst subclasses using an
-  /// opcode instead of the subclass's constructor. The opcode must be in the
-  /// CastOps category. This constructor has insert-at-end-of-block semantics
-  /// to automatically insert the new CastInst at the end of InsertAtEnd (if
-  /// its non-null).
-  /// @brief Construct any of the CastInst subclasses
-  static CastInst *Create(
-    Instruction::CastOps,    ///< The opcode for the cast instruction
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which operand is casted
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Create a ZExt or BitCast cast instruction
-  static CastInst *CreateZExtOrBitCast(
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which cast should be made
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-
-  /// @brief Create a ZExt or BitCast cast instruction
-  static CastInst *CreateZExtOrBitCast(
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which operand is casted
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Create a SExt or BitCast cast instruction
-  static CastInst *CreateSExtOrBitCast(
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which cast should be made
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-
-  /// @brief Create a SExt or BitCast cast instruction
-  static CastInst *CreateSExtOrBitCast(
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which operand is casted
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Create a BitCast or a PtrToInt cast instruction
-  static CastInst *CreatePointerCast(
-    Value *S,                ///< The pointer value to be casted (operand 0)
-    Type *Ty,          ///< The type to which operand is casted
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Create a BitCast or a PtrToInt cast instruction
-  static CastInst *CreatePointerCast(
-    Value *S,                ///< The pointer value to be casted (operand 0)
-    Type *Ty,          ///< The type to which cast should be made
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-
-  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
-  static CastInst *CreateIntegerCast(
-    Value *S,                ///< The pointer value to be casted (operand 0)
-    Type *Ty,          ///< The type to which cast should be made
-    bool isSigned,           ///< Whether to regard S as signed or not
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-
-  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
-  static CastInst *CreateIntegerCast(
-    Value *S,                ///< The integer value to be casted (operand 0)
-    Type *Ty,          ///< The integer type to which operand is casted
-    bool isSigned,           ///< Whether to regard S as signed or not
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
-  static CastInst *CreateFPCast(
-    Value *S,                ///< The floating point value to be casted
-    Type *Ty,          ///< The floating point type to cast to
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-
-  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
-  static CastInst *CreateFPCast(
-    Value *S,                ///< The floating point value to be casted
-    Type *Ty,          ///< The floating point type to cast to
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Create a Trunc or BitCast cast instruction
-  static CastInst *CreateTruncOrBitCast(
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which cast should be made
-    const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
-  );
-
-  /// @brief Create a Trunc or BitCast cast instruction
-  static CastInst *CreateTruncOrBitCast(
-    Value *S,                ///< The value to be casted (operand 0)
-    Type *Ty,          ///< The type to which operand is casted
-    const Twine &Name, ///< The name for the instruction
-    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
-  );
-
-  /// @brief Check whether it is valid to call getCastOpcode for these types.
-  static bool isCastable(
-    Type *SrcTy, ///< The Type from which the value should be cast.
-    Type *DestTy ///< The Type to which the value should be cast.
-  );
-
-  /// Returns the opcode necessary to cast Val into Ty using usual casting
-  /// rules.
-  /// @brief Infer the opcode for cast operand and type
-  static Instruction::CastOps getCastOpcode(
-    const Value *Val, ///< The value to cast
-    bool SrcIsSigned, ///< Whether to treat the source as signed
-    Type *Ty,   ///< The Type to which the value should be casted
-    bool DstIsSigned  ///< Whether to treate the dest. as signed
-  );
-
-  /// There are several places where we need to know if a cast instruction
-  /// only deals with integer source and destination types. To simplify that
-  /// logic, this method is provided.
-  /// @returns true iff the cast has only integral typed operand and dest type.
-  /// @brief Determine if this is an integer-only cast.
-  bool isIntegerCast() const;
-
-  /// A lossless cast is one that does not alter the basic value. It implies
-  /// a no-op cast but is more stringent, preventing things like int->float,
-  /// long->double, or int->ptr.
-  /// @returns true iff the cast is lossless.
-  /// @brief Determine if this is a lossless cast.
-  bool isLosslessCast() const;
-
-  /// A no-op cast is one that can be effected without changing any bits.
-  /// It implies that the source and destination types are the same size. The
-  /// IntPtrTy argument is used to make accurate determinations for casts
-  /// involving Integer and Pointer types. They are no-op casts if the integer
-  /// is the same size as the pointer. However, pointer size varies with
-  /// platform. Generally, the result of DataLayout::getIntPtrType() should be
-  /// passed in. If that's not available, use Type::Int64Ty, which will make
-  /// the isNoopCast call conservative.
-  /// @brief Determine if the described cast is a no-op cast.
-  static bool isNoopCast(
-    Instruction::CastOps Opcode,  ///< Opcode of cast
-    Type *SrcTy,   ///< SrcTy of cast
-    Type *DstTy,   ///< DstTy of cast
-    Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null
-  );
-
-  /// @brief Determine if this cast is a no-op cast.
-  bool isNoopCast(
-    Type *IntPtrTy ///< Integer type corresponding to pointer
-  ) const;
-
-  /// Determine how a pair of casts can be eliminated, if they can be at all.
-  /// This is a helper function for both CastInst and ConstantExpr.
-  /// @returns 0 if the CastInst pair can't be eliminated, otherwise
-  /// returns Instruction::CastOps value for a cast that can replace
-  /// the pair, casting SrcTy to DstTy.
-  /// @brief Determine if a cast pair is eliminable
-  static unsigned isEliminableCastPair(
-    Instruction::CastOps firstOpcode,  ///< Opcode of first cast
-    Instruction::CastOps secondOpcode, ///< Opcode of second cast
-    Type *SrcTy, ///< SrcTy of 1st cast
-    Type *MidTy, ///< DstTy of 1st cast & SrcTy of 2nd cast
-    Type *DstTy, ///< DstTy of 2nd cast
-    Type *SrcIntPtrTy, ///< Integer type corresponding to Ptr SrcTy, or null
-    Type *MidIntPtrTy, ///< Integer type corresponding to Ptr MidTy, or null
-    Type *DstIntPtrTy  ///< Integer type corresponding to Ptr DstTy, or null
-  );
-
-  /// @brief Return the opcode of this CastInst
-  Instruction::CastOps getOpcode() const {
-    return Instruction::CastOps(Instruction::getOpcode());
-  }
-
-  /// @brief Return the source type, as a convenience
-  Type* getSrcTy() const { return getOperand(0)->getType(); }
-  /// @brief Return the destination type, as a convenience
-  Type* getDestTy() const { return getType(); }
-
-  /// This method can be used to determine if a cast from S to DstTy using
-  /// Opcode op is valid or not.
-  /// @returns true iff the proposed cast is valid.
-  /// @brief Determine if a cast is valid without creating one.
-  static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy);
-
-  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->isCast();
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                               CmpInst Class
-//===----------------------------------------------------------------------===//
-
-/// This class is the base class for the comparison instructions.
-/// @brief Abstract base class of comparison instructions.
-class CmpInst : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  CmpInst() LLVM_DELETED_FUNCTION;
-protected:
-  CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
-          Value *LHS, Value *RHS, const Twine &Name = "",
-          Instruction *InsertBefore = 0);
-
-  CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
-          Value *LHS, Value *RHS, const Twine &Name,
-          BasicBlock *InsertAtEnd);
-
-  virtual void anchor() LLVM_OVERRIDE; // Out of line virtual method.
-public:
-  /// This enumeration lists the possible predicates for CmpInst subclasses.
-  /// Values in the range 0-31 are reserved for FCmpInst, while values in the
-  /// range 32-64 are reserved for ICmpInst. This is necessary to ensure the
-  /// predicate values are not overlapping between the classes.
-  enum Predicate {
-    // Opcode              U L G E    Intuitive operation
-    FCMP_FALSE =  0,  ///< 0 0 0 0    Always false (always folded)
-    FCMP_OEQ   =  1,  ///< 0 0 0 1    True if ordered and equal
-    FCMP_OGT   =  2,  ///< 0 0 1 0    True if ordered and greater than
-    FCMP_OGE   =  3,  ///< 0 0 1 1    True if ordered and greater than or equal
-    FCMP_OLT   =  4,  ///< 0 1 0 0    True if ordered and less than
-    FCMP_OLE   =  5,  ///< 0 1 0 1    True if ordered and less than or equal
-    FCMP_ONE   =  6,  ///< 0 1 1 0    True if ordered and operands are unequal
-    FCMP_ORD   =  7,  ///< 0 1 1 1    True if ordered (no nans)
-    FCMP_UNO   =  8,  ///< 1 0 0 0    True if unordered: isnan(X) | isnan(Y)
-    FCMP_UEQ   =  9,  ///< 1 0 0 1    True if unordered or equal
-    FCMP_UGT   = 10,  ///< 1 0 1 0    True if unordered or greater than
-    FCMP_UGE   = 11,  ///< 1 0 1 1    True if unordered, greater than, or equal
-    FCMP_ULT   = 12,  ///< 1 1 0 0    True if unordered or less than
-    FCMP_ULE   = 13,  ///< 1 1 0 1    True if unordered, less than, or equal
-    FCMP_UNE   = 14,  ///< 1 1 1 0    True if unordered or not equal
-    FCMP_TRUE  = 15,  ///< 1 1 1 1    Always true (always folded)
-    FIRST_FCMP_PREDICATE = FCMP_FALSE,
-    LAST_FCMP_PREDICATE = FCMP_TRUE,
-    BAD_FCMP_PREDICATE = FCMP_TRUE + 1,
-    ICMP_EQ    = 32,  ///< equal
-    ICMP_NE    = 33,  ///< not equal
-    ICMP_UGT   = 34,  ///< unsigned greater than
-    ICMP_UGE   = 35,  ///< unsigned greater or equal
-    ICMP_ULT   = 36,  ///< unsigned less than
-    ICMP_ULE   = 37,  ///< unsigned less or equal
-    ICMP_SGT   = 38,  ///< signed greater than
-    ICMP_SGE   = 39,  ///< signed greater or equal
-    ICMP_SLT   = 40,  ///< signed less than
-    ICMP_SLE   = 41,  ///< signed less or equal
-    FIRST_ICMP_PREDICATE = ICMP_EQ,
-    LAST_ICMP_PREDICATE = ICMP_SLE,
-    BAD_ICMP_PREDICATE = ICMP_SLE + 1
-  };
-
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  /// Construct a compare instruction, given the opcode, the predicate and
-  /// the two operands.  Optionally (if InstBefore is specified) insert the
-  /// instruction into a BasicBlock right before the specified instruction.
-  /// The specified Instruction is allowed to be a dereferenced end iterator.
-  /// @brief Create a CmpInst
-  static CmpInst *Create(OtherOps Op,
-                         unsigned short predicate, Value *S1,
-                         Value *S2, const Twine &Name = "",
-                         Instruction *InsertBefore = 0);
-
-  /// Construct a compare instruction, given the opcode, the predicate and the
-  /// two operands.  Also automatically insert this instruction to the end of
-  /// the BasicBlock specified.
-  /// @brief Create a CmpInst
-  static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
-                         Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
-  
-  /// @brief Get the opcode casted to the right type
-  OtherOps getOpcode() const {
-    return static_cast<OtherOps>(Instruction::getOpcode());
-  }
-
-  /// @brief Return the predicate for this instruction.
-  Predicate getPredicate() const {
-    return Predicate(getSubclassDataFromInstruction());
-  }
-
-  /// @brief Set the predicate for this instruction to the specified value.
-  void setPredicate(Predicate P) { setInstructionSubclassData(P); }
-
-  static bool isFPPredicate(Predicate P) {
-    return P >= FIRST_FCMP_PREDICATE && P <= LAST_FCMP_PREDICATE;
-  }
-  
-  static bool isIntPredicate(Predicate P) {
-    return P >= FIRST_ICMP_PREDICATE && P <= LAST_ICMP_PREDICATE;
-  }
-  
-  bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
-  bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
-  
-  
-  /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
-  ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
-  /// @returns the inverse predicate for the instruction's current predicate.
-  /// @brief Return the inverse of the instruction's predicate.
-  Predicate getInversePredicate() const {
-    return getInversePredicate(getPredicate());
-  }
-
-  /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
-  ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
-  /// @returns the inverse predicate for predicate provided in \p pred.
-  /// @brief Return the inverse of a given predicate
-  static Predicate getInversePredicate(Predicate pred);
-
-  /// For example, EQ->EQ, SLE->SGE, ULT->UGT,
-  ///              OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
-  /// @returns the predicate that would be the result of exchanging the two
-  /// operands of the CmpInst instruction without changing the result
-  /// produced.
-  /// @brief Return the predicate as if the operands were swapped
-  Predicate getSwappedPredicate() const {
-    return getSwappedPredicate(getPredicate());
-  }
-
-  /// This is a static version that you can use without an instruction
-  /// available.
-  /// @brief Return the predicate as if the operands were swapped.
-  static Predicate getSwappedPredicate(Predicate pred);
-
-  /// @brief Provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// This is just a convenience that dispatches to the subclasses.
-  /// @brief Swap the operands and adjust predicate accordingly to retain
-  /// the same comparison.
-  void swapOperands();
-
-  /// This is just a convenience that dispatches to the subclasses.
-  /// @brief Determine if this CmpInst is commutative.
-  bool isCommutative() const;
-
-  /// This is just a convenience that dispatches to the subclasses.
-  /// @brief Determine if this is an equals/not equals predicate.
-  bool isEquality() const;
-
-  /// @returns true if the comparison is signed, false otherwise.
-  /// @brief Determine if this instruction is using a signed comparison.
-  bool isSigned() const {
-    return isSigned(getPredicate());
-  }
-
-  /// @returns true if the comparison is unsigned, false otherwise.
-  /// @brief Determine if this instruction is using an unsigned comparison.
-  bool isUnsigned() const {
-    return isUnsigned(getPredicate());
-  }
-
-  /// This is just a convenience.
-  /// @brief Determine if this is true when both operands are the same.
-  bool isTrueWhenEqual() const {
-    return isTrueWhenEqual(getPredicate());
-  }
-
-  /// This is just a convenience.
-  /// @brief Determine if this is false when both operands are the same.
-  bool isFalseWhenEqual() const {
-    return isFalseWhenEqual(getPredicate());
-  }
-
-  /// @returns true if the predicate is unsigned, false otherwise.
-  /// @brief Determine if the predicate is an unsigned operation.
-  static bool isUnsigned(unsigned short predicate);
-
-  /// @returns true if the predicate is signed, false otherwise.
-  /// @brief Determine if the predicate is an signed operation.
-  static bool isSigned(unsigned short predicate);
-
-  /// @brief Determine if the predicate is an ordered operation.
-  static bool isOrdered(unsigned short predicate);
-
-  /// @brief Determine if the predicate is an unordered operation.
-  static bool isUnordered(unsigned short predicate);
-
-  /// Determine if the predicate is true when comparing a value with itself.
-  static bool isTrueWhenEqual(unsigned short predicate);
-
-  /// Determine if the predicate is false when comparing a value with itself.
-  static bool isFalseWhenEqual(unsigned short predicate);
-
-  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::ICmp ||
-           I->getOpcode() == Instruction::FCmp;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-  
-  /// @brief Create a result type for fcmp/icmp
-  static Type* makeCmpResultType(Type* opnd_type) {
-    if (VectorType* vt = dyn_cast<VectorType>(opnd_type)) {
-      return VectorType::get(Type::getInt1Ty(opnd_type->getContext()),
-                             vt->getNumElements());
-    }
-    return Type::getInt1Ty(opnd_type->getContext());
-  }
-private:
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // subclasses cannot accidentally use it.
-  void setValueSubclassData(unsigned short D) {
-    Value::setValueSubclassData(D);
-  }
-};
-
-
-// FIXME: these are redundant if CmpInst < BinaryOperator
-template <>
-struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
deleted file mode 100644
index 8aa8a56bf825..000000000000
--- a/include/llvm/Instruction.h
+++ /dev/null
@@ -1,407 +0,0 @@
-//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Instruction class, which is the
-// base class for all of the LLVM instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INSTRUCTION_H
-#define LLVM_INSTRUCTION_H
-
-#include "llvm/User.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/Support/DebugLoc.h"
-
-namespace llvm {
-
-class LLVMContext;
-class MDNode;
-
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
-
-class Instruction : public User, public ilist_node<Instruction> {
-  void operator=(const Instruction &) LLVM_DELETED_FUNCTION;
-  Instruction(const Instruction &) LLVM_DELETED_FUNCTION;
-
-  BasicBlock *Parent;
-  DebugLoc DbgLoc;                         // 'dbg' Metadata cache.
-  
-  enum {
-    /// HasMetadataBit - This is a bit stored in the SubClassData field which
-    /// indicates whether this instruction has metadata attached to it or not.
-    HasMetadataBit = 1 << 15
-  };
-public:
-  // Out of line virtual method, so the vtable, etc has a home.
-  ~Instruction();
-  
-  /// use_back - Specialize the methods defined in Value, as we know that an
-  /// instruction can only be used by other instructions.
-  Instruction       *use_back()       { return cast<Instruction>(*use_begin());}
-  const Instruction *use_back() const { return cast<Instruction>(*use_begin());}
-  
-  inline const BasicBlock *getParent() const { return Parent; }
-  inline       BasicBlock *getParent()       { return Parent; }
-
-  /// removeFromParent - This method unlinks 'this' from the containing basic
-  /// block, but does not delete it.
-  ///
-  void removeFromParent();
-
-  /// eraseFromParent - This method unlinks 'this' from the containing basic
-  /// block and deletes it.
-  ///
-  void eraseFromParent();
-
-  /// insertBefore - Insert an unlinked instructions into a basic block
-  /// immediately before the specified instruction.
-  void insertBefore(Instruction *InsertPos);
-
-  /// insertAfter - Insert an unlinked instructions into a basic block
-  /// immediately after the specified instruction.
-  void insertAfter(Instruction *InsertPos);
-
-  /// moveBefore - Unlink this instruction from its current basic block and
-  /// insert it into the basic block that MovePos lives in, right before
-  /// MovePos.
-  void moveBefore(Instruction *MovePos);
-
-  //===--------------------------------------------------------------------===//
-  // Subclass classification.
-  //===--------------------------------------------------------------------===//
-  
-  /// getOpcode() returns a member of one of the enums like Instruction::Add.
-  unsigned getOpcode() const { return getValueID() - InstructionVal; }
-  
-  const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
-  bool isTerminator() const { return isTerminator(getOpcode()); }
-  bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
-  bool isShift() { return isShift(getOpcode()); }
-  bool isCast() const { return isCast(getOpcode()); }
-  
-  static const char* getOpcodeName(unsigned OpCode);
-
-  static inline bool isTerminator(unsigned OpCode) {
-    return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
-  }
-
-  static inline bool isBinaryOp(unsigned Opcode) {
-    return Opcode >= BinaryOpsBegin && Opcode < BinaryOpsEnd;
-  }
-
-  /// @brief Determine if the Opcode is one of the shift instructions.
-  static inline bool isShift(unsigned Opcode) {
-    return Opcode >= Shl && Opcode <= AShr;
-  }
-
-  /// isLogicalShift - Return true if this is a logical shift left or a logical
-  /// shift right.
-  inline bool isLogicalShift() const {
-    return getOpcode() == Shl || getOpcode() == LShr;
-  }
-
-  /// isArithmeticShift - Return true if this is an arithmetic shift right.
-  inline bool isArithmeticShift() const {
-    return getOpcode() == AShr;
-  }
-
-  /// @brief Determine if the OpCode is one of the CastInst instructions.
-  static inline bool isCast(unsigned OpCode) {
-    return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Metadata manipulation.
-  //===--------------------------------------------------------------------===//
-  
-  /// hasMetadata() - Return true if this instruction has any metadata attached
-  /// to it.
-  bool hasMetadata() const {
-    return !DbgLoc.isUnknown() || hasMetadataHashEntry();
-  }
-  
-  /// hasMetadataOtherThanDebugLoc - Return true if this instruction has
-  /// metadata attached to it other than a debug location.
-  bool hasMetadataOtherThanDebugLoc() const {
-    return hasMetadataHashEntry();
-  }
-  
-  /// getMetadata - Get the metadata of given kind attached to this Instruction.
-  /// If the metadata is not found then return null.
-  MDNode *getMetadata(unsigned KindID) const {
-    if (!hasMetadata()) return 0;
-    return getMetadataImpl(KindID);
-  }
-  
-  /// getMetadata - Get the metadata of given kind attached to this Instruction.
-  /// If the metadata is not found then return null.
-  MDNode *getMetadata(StringRef Kind) const {
-    if (!hasMetadata()) return 0;
-    return getMetadataImpl(Kind);
-  }
-  
-  /// getAllMetadata - Get all metadata attached to this Instruction.  The first
-  /// element of each pair returned is the KindID, the second element is the
-  /// metadata value.  This list is returned sorted by the KindID.
-  void getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode*> > &MDs)const{
-    if (hasMetadata())
-      getAllMetadataImpl(MDs);
-  }
-  
-  /// getAllMetadataOtherThanDebugLoc - This does the same thing as
-  /// getAllMetadata, except that it filters out the debug location.
-  void getAllMetadataOtherThanDebugLoc(SmallVectorImpl<std::pair<unsigned,
-                                       MDNode*> > &MDs) const {
-    if (hasMetadataOtherThanDebugLoc())
-      getAllMetadataOtherThanDebugLocImpl(MDs);
-  }
-  
-  /// setMetadata - Set the metadata of the specified kind to the specified
-  /// node.  This updates/replaces metadata if already present, or removes it if
-  /// Node is null.
-  void setMetadata(unsigned KindID, MDNode *Node);
-  void setMetadata(StringRef Kind, MDNode *Node);
-
-  /// setDebugLoc - Set the debug location information for this instruction.
-  void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
-  
-  /// getDebugLoc - Return the debug location for this node as a DebugLoc.
-  const DebugLoc &getDebugLoc() const { return DbgLoc; }
-  
-private:
-  /// hasMetadataHashEntry - Return true if we have an entry in the on-the-side
-  /// metadata hash.
-  bool hasMetadataHashEntry() const {
-    return (getSubclassDataFromValue() & HasMetadataBit) != 0;
-  }
-  
-  // These are all implemented in Metadata.cpp.
-  MDNode *getMetadataImpl(unsigned KindID) const;
-  MDNode *getMetadataImpl(StringRef Kind) const;
-  void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,MDNode*> > &)const;
-  void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
-                                           MDNode*> > &) const;
-  void clearMetadataHashEntries();
-public:
-  //===--------------------------------------------------------------------===//
-  // Predicates and helper methods.
-  //===--------------------------------------------------------------------===//
-  
-  
-  /// isAssociative - Return true if the instruction is associative:
-  ///
-  ///   Associative operators satisfy:  x op (y op z) === (x op y) op z
-  ///
-  /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
-  ///
-  bool isAssociative() const { return isAssociative(getOpcode()); }
-  static bool isAssociative(unsigned op);
-
-  /// isCommutative - Return true if the instruction is commutative:
-  ///
-  ///   Commutative operators satisfy: (x op y) === (y op x)
-  ///
-  /// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
-  /// applied to any type.
-  ///
-  bool isCommutative() const { return isCommutative(getOpcode()); }
-  static bool isCommutative(unsigned op);
-
-  /// isIdempotent - Return true if the instruction is idempotent:
-  ///
-  ///   Idempotent operators satisfy:  x op x === x
-  ///
-  /// In LLVM, the And and Or operators are idempotent.
-  ///
-  bool isIdempotent() const { return isIdempotent(getOpcode()); }
-  static bool isIdempotent(unsigned op);
-
-  /// isNilpotent - Return true if the instruction is nilpotent:
-  ///
-  ///   Nilpotent operators satisfy:  x op x === Id,
-  ///
-  ///   where Id is the identity for the operator, i.e. a constant such that
-  ///     x op Id === x and Id op x === x for all x.
-  ///
-  /// In LLVM, the Xor operator is nilpotent.
-  ///
-  bool isNilpotent() const { return isNilpotent(getOpcode()); }
-  static bool isNilpotent(unsigned op);
-
-  /// mayWriteToMemory - Return true if this instruction may modify memory.
-  ///
-  bool mayWriteToMemory() const;
-
-  /// mayReadFromMemory - Return true if this instruction may read memory.
-  ///
-  bool mayReadFromMemory() const;
-
-  /// mayReadOrWriteMemory - Return true if this instruction may read or
-  /// write memory.
-  ///
-  bool mayReadOrWriteMemory() const {
-    return mayReadFromMemory() || mayWriteToMemory();
-  }
-
-  /// mayThrow - Return true if this instruction may throw an exception.
-  ///
-  bool mayThrow() const;
-
-  /// mayHaveSideEffects - Return true if the instruction may have side effects.
-  ///
-  /// Note that this does not consider malloc and alloca to have side
-  /// effects because the newly allocated memory is completely invisible to
-  /// instructions which don't used the returned value.  For cases where this
-  /// matters, isSafeToSpeculativelyExecute may be more appropriate.
-  bool mayHaveSideEffects() const {
-    return mayWriteToMemory() || mayThrow();
-  }
-
-  /// clone() - Create a copy of 'this' instruction that is identical in all
-  /// ways except the following:
-  ///   * The instruction has no parent
-  ///   * The instruction has no name
-  ///
-  Instruction *clone() const;
-  
-  /// isIdenticalTo - Return true if the specified instruction is exactly
-  /// identical to the current one.  This means that all operands match and any
-  /// extra information (e.g. load is volatile) agree.
-  bool isIdenticalTo(const Instruction *I) const;
-  
-  /// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
-  /// ignores the SubclassOptionalData flags, which specify conditions
-  /// under which the instruction's result is undefined.
-  bool isIdenticalToWhenDefined(const Instruction *I) const;
-
-  /// When checking for operation equivalence (using isSameOperationAs) it is
-  /// sometimes useful to ignore certain attributes.
-  enum OperationEquivalenceFlags {
-    /// Check for equivalence ignoring load/store alignment.
-    CompareIgnoringAlignment = 1<<0,
-    /// Check for equivalence treating a type and a vector of that type
-    /// as equivalent.
-    CompareUsingScalarTypes = 1<<1
-  };
-  
-  /// This function determines if the specified instruction executes the same
-  /// operation as the current one. This means that the opcodes, type, operand
-  /// types and any other factors affecting the operation must be the same. This
-  /// is similar to isIdenticalTo except the operands themselves don't have to
-  /// be identical.
-  /// @returns true if the specified instruction is the same operation as
-  /// the current one.
-  /// @brief Determine if one instruction is the same operation as another.
-  bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const;
-  
-  /// isUsedOutsideOfBlock - Return true if there are any uses of this
-  /// instruction in blocks other than the specified block.  Note that PHI nodes
-  /// are considered to evaluate their operands in the corresponding predecessor
-  /// block.
-  bool isUsedOutsideOfBlock(const BasicBlock *BB) const;
-  
-  
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return V->getValueID() >= Value::InstructionVal;
-  }
-
-  //----------------------------------------------------------------------
-  // Exported enumerations.
-  //
-  enum TermOps {       // These terminate basic blocks
-#define  FIRST_TERM_INST(N)             TermOpsBegin = N,
-#define HANDLE_TERM_INST(N, OPC, CLASS) OPC = N,
-#define   LAST_TERM_INST(N)             TermOpsEnd = N+1
-#include "llvm/Instruction.def"
-  };
-
-  enum BinaryOps {
-#define  FIRST_BINARY_INST(N)             BinaryOpsBegin = N,
-#define HANDLE_BINARY_INST(N, OPC, CLASS) OPC = N,
-#define   LAST_BINARY_INST(N)             BinaryOpsEnd = N+1
-#include "llvm/Instruction.def"
-  };
-
-  enum MemoryOps {
-#define  FIRST_MEMORY_INST(N)             MemoryOpsBegin = N,
-#define HANDLE_MEMORY_INST(N, OPC, CLASS) OPC = N,
-#define   LAST_MEMORY_INST(N)             MemoryOpsEnd = N+1
-#include "llvm/Instruction.def"
-  };
-
-  enum CastOps {
-#define  FIRST_CAST_INST(N)             CastOpsBegin = N,
-#define HANDLE_CAST_INST(N, OPC, CLASS) OPC = N,
-#define   LAST_CAST_INST(N)             CastOpsEnd = N+1
-#include "llvm/Instruction.def"
-  };
-
-  enum OtherOps {
-#define  FIRST_OTHER_INST(N)             OtherOpsBegin = N,
-#define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N,
-#define   LAST_OTHER_INST(N)             OtherOpsEnd = N+1
-#include "llvm/Instruction.def"
-  };
-private:
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // subclasses cannot accidentally use it.
-  void setValueSubclassData(unsigned short D) {
-    Value::setValueSubclassData(D);
-  }
-  unsigned short getSubclassDataFromValue() const {
-    return Value::getSubclassDataFromValue();
-  }
-  
-  void setHasMetadataHashEntry(bool V) {
-    setValueSubclassData((getSubclassDataFromValue() & ~HasMetadataBit) |
-                         (V ? HasMetadataBit : 0));
-  }
-  
-  friend class SymbolTableListTraits<Instruction, BasicBlock>;
-  void setParent(BasicBlock *P);
-protected:
-  // Instruction subclasses can stick up to 15 bits of stuff into the
-  // SubclassData field of instruction with these members.
-  
-  // Verify that only the low 15 bits are used.
-  void setInstructionSubclassData(unsigned short D) {
-    assert((D & HasMetadataBit) == 0 && "Out of range value put into field");
-    setValueSubclassData((getSubclassDataFromValue() & HasMetadataBit) | D);
-  }
-  
-  unsigned getSubclassDataFromInstruction() const {
-    return getSubclassDataFromValue() & ~HasMetadataBit;
-  }
-  
-  Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
-              Instruction *InsertBefore = 0);
-  Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
-              BasicBlock *InsertAtEnd);
-  virtual Instruction *clone_impl() const = 0;
-  
-};
-
-// Instruction* is only 4-byte aligned.
-template<>
-class PointerLikeTypeTraits<Instruction*> {
-  typedef Instruction* PT;
-public:
-  static inline void *getAsVoidPointer(PT P) { return P; }
-  static inline PT getFromVoidPointer(void *P) {
-    return static_cast<PT>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-  
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
deleted file mode 100644
index 69593b48c1f1..000000000000
--- a/include/llvm/Instructions.h
+++ /dev/null
@@ -1,3716 +0,0 @@
-//===-- llvm/Instructions.h - Instruction subclass definitions --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes the class definitions of all of the subclasses of the
-// Instruction class.  This is meant to be an easy way to get access to all
-// instruction subclasses.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INSTRUCTIONS_H
-#define LLVM_INSTRUCTIONS_H
-
-#include "llvm/InstrTypes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Attributes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Support/IntegersSubset.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <iterator>
-
-namespace llvm {
-
-class ConstantInt;
-class ConstantRange;
-class APInt;
-class LLVMContext;
-
-enum AtomicOrdering {
-  NotAtomic = 0,
-  Unordered = 1,
-  Monotonic = 2,
-  // Consume = 3,  // Not specified yet.
-  Acquire = 4,
-  Release = 5,
-  AcquireRelease = 6,
-  SequentiallyConsistent = 7
-};
-
-enum SynchronizationScope {
-  SingleThread = 0,
-  CrossThread = 1
-};
-
-//===----------------------------------------------------------------------===//
-//                                AllocaInst Class
-//===----------------------------------------------------------------------===//
-
-/// AllocaInst - an instruction to allocate memory on the stack
-///
-class AllocaInst : public UnaryInstruction {
-protected:
-  virtual AllocaInst *clone_impl() const;
-public:
-  explicit AllocaInst(Type *Ty, Value *ArraySize = 0,
-                      const Twine &Name = "", Instruction *InsertBefore = 0);
-  AllocaInst(Type *Ty, Value *ArraySize,
-             const Twine &Name, BasicBlock *InsertAtEnd);
-
-  AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore = 0);
-  AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd);
-
-  AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
-             const Twine &Name = "", Instruction *InsertBefore = 0);
-  AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
-             const Twine &Name, BasicBlock *InsertAtEnd);
-
-  // Out of line virtual method, so the vtable, etc. has a home.
-  virtual ~AllocaInst();
-
-  /// isArrayAllocation - Return true if there is an allocation size parameter
-  /// to the allocation instruction that is not 1.
-  ///
-  bool isArrayAllocation() const;
-
-  /// getArraySize - Get the number of elements allocated. For a simple
-  /// allocation of a single element, this will return a constant 1 value.
-  ///
-  const Value *getArraySize() const { return getOperand(0); }
-  Value *getArraySize() { return getOperand(0); }
-
-  /// getType - Overload to return most specific pointer type
-  ///
-  PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(Instruction::getType());
-  }
-
-  /// getAllocatedType - Return the type that is being allocated by the
-  /// instruction.
-  ///
-  Type *getAllocatedType() const;
-
-  /// getAlignment - Return the alignment of the memory that is being allocated
-  /// by the instruction.
-  ///
-  unsigned getAlignment() const {
-    return (1u << getSubclassDataFromInstruction()) >> 1;
-  }
-  void setAlignment(unsigned Align);
-
-  /// isStaticAlloca - Return true if this alloca is in the entry block of the
-  /// function and is a constant size.  If so, the code generator will fold it
-  /// into the prolog/epilog code, so it is basically free.
-  bool isStaticAlloca() const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return (I->getOpcode() == Instruction::Alloca);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
-//                                LoadInst Class
-//===----------------------------------------------------------------------===//
-
-/// LoadInst - an instruction for reading from memory.  This uses the
-/// SubclassData field in Value to store whether or not the load is volatile.
-///
-class LoadInst : public UnaryInstruction {
-  void AssertOK();
-protected:
-  virtual LoadInst *clone_impl() const;
-public:
-  LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
-  LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false,
-           Instruction *InsertBefore = 0);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
-           BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
-           unsigned Align, Instruction *InsertBefore = 0);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
-           unsigned Align, BasicBlock *InsertAtEnd);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
-           unsigned Align, AtomicOrdering Order,
-           SynchronizationScope SynchScope = CrossThread,
-           Instruction *InsertBefore = 0);
-  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
-           unsigned Align, AtomicOrdering Order,
-           SynchronizationScope SynchScope,
-           BasicBlock *InsertAtEnd);
-
-  LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
-  LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd);
-  explicit LoadInst(Value *Ptr, const char *NameStr = 0,
-                    bool isVolatile = false,  Instruction *InsertBefore = 0);
-  LoadInst(Value *Ptr, const char *NameStr, bool isVolatile,
-           BasicBlock *InsertAtEnd);
-
-  /// isVolatile - Return true if this is a load from a volatile memory
-  /// location.
-  ///
-  bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
-
-  /// setVolatile - Specify whether this is a volatile load or not.
-  ///
-  void setVolatile(bool V) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                               (V ? 1 : 0));
-  }
-
-  /// getAlignment - Return the alignment of the access that is being performed
-  ///
-  unsigned getAlignment() const {
-    return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
-  }
-
-  void setAlignment(unsigned Align);
-
-  /// Returns the ordering effect of this fence.
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7);
-  }
-
-  /// Set the ordering constraint on this load. May not be Release or
-  /// AcquireRelease.
-  void setOrdering(AtomicOrdering Ordering) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) |
-                               (Ordering << 7));
-  }
-
-  SynchronizationScope getSynchScope() const {
-    return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1);
-  }
-
-  /// Specify whether this load is ordered with respect to all
-  /// concurrently executing threads, or only with respect to signal handlers
-  /// executing in the same thread.
-  void setSynchScope(SynchronizationScope xthread) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) |
-                               (xthread << 6));
-  }
-
-  bool isAtomic() const { return getOrdering() != NotAtomic; }
-  void setAtomic(AtomicOrdering Ordering,
-                 SynchronizationScope SynchScope = CrossThread) {
-    setOrdering(Ordering);
-    setSynchScope(SynchScope);
-  }
-
-  bool isSimple() const { return !isAtomic() && !isVolatile(); }
-  bool isUnordered() const {
-    return getOrdering() <= Unordered && !isVolatile();
-  }
-
-  Value *getPointerOperand() { return getOperand(0); }
-  const Value *getPointerOperand() const { return getOperand(0); }
-  static unsigned getPointerOperandIndex() { return 0U; }
-
-  /// \brief Returns the address space of the pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
-  }
-
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Load;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-
-//===----------------------------------------------------------------------===//
-//                                StoreInst Class
-//===----------------------------------------------------------------------===//
-
-/// StoreInst - an instruction for storing to memory
-///
-class StoreInst : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  void AssertOK();
-protected:
-  virtual StoreInst *clone_impl() const;
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  StoreInst(Value *Val, Value *Ptr, Instruction *InsertBefore);
-  StoreInst(Value *Val, Value *Ptr, BasicBlock *InsertAtEnd);
-  StoreInst(Value *Val, Value *Ptr, bool isVolatile = false,
-            Instruction *InsertBefore = 0);
-  StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd);
-  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
-            unsigned Align, Instruction *InsertBefore = 0);
-  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
-            unsigned Align, BasicBlock *InsertAtEnd);
-  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
-            unsigned Align, AtomicOrdering Order,
-            SynchronizationScope SynchScope = CrossThread,
-            Instruction *InsertBefore = 0);
-  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
-            unsigned Align, AtomicOrdering Order,
-            SynchronizationScope SynchScope,
-            BasicBlock *InsertAtEnd);
-          
-
-  /// isVolatile - Return true if this is a store to a volatile memory
-  /// location.
-  ///
-  bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
-
-  /// setVolatile - Specify whether this is a volatile store or not.
-  ///
-  void setVolatile(bool V) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                               (V ? 1 : 0));
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// getAlignment - Return the alignment of the access that is being performed
-  ///
-  unsigned getAlignment() const {
-    return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
-  }
-
-  void setAlignment(unsigned Align);
-
-  /// Returns the ordering effect of this store.
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7);
-  }
-
-  /// Set the ordering constraint on this store.  May not be Acquire or
-  /// AcquireRelease.
-  void setOrdering(AtomicOrdering Ordering) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) |
-                               (Ordering << 7));
-  }
-
-  SynchronizationScope getSynchScope() const {
-    return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1);
-  }
-
-  /// Specify whether this store instruction is ordered with respect to all
-  /// concurrently executing threads, or only with respect to signal handlers
-  /// executing in the same thread.
-  void setSynchScope(SynchronizationScope xthread) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) |
-                               (xthread << 6));
-  }
-
-  bool isAtomic() const { return getOrdering() != NotAtomic; }
-  void setAtomic(AtomicOrdering Ordering,
-                 SynchronizationScope SynchScope = CrossThread) {
-    setOrdering(Ordering);
-    setSynchScope(SynchScope);
-  }
-
-  bool isSimple() const { return !isAtomic() && !isVolatile(); }
-  bool isUnordered() const {
-    return getOrdering() <= Unordered && !isVolatile();
-  }
-
-  Value *getValueOperand() { return getOperand(0); }
-  const Value *getValueOperand() const { return getOperand(0); }
-
-  Value *getPointerOperand() { return getOperand(1); }
-  const Value *getPointerOperand() const { return getOperand(1); }
-  static unsigned getPointerOperandIndex() { return 1U; }
-
-  /// \brief Returns the address space of the pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Store;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-template <>
-struct OperandTraits<StoreInst> : public FixedNumOperandTraits<StoreInst, 2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                                FenceInst Class
-//===----------------------------------------------------------------------===//
-
-/// FenceInst - an instruction for ordering other memory operations
-///
-class FenceInst : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope);
-protected:
-  virtual FenceInst *clone_impl() const;
-public:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-
-  // Ordering may only be Acquire, Release, AcquireRelease, or
-  // SequentiallyConsistent.
-  FenceInst(LLVMContext &C, AtomicOrdering Ordering,
-            SynchronizationScope SynchScope = CrossThread,
-            Instruction *InsertBefore = 0);
-  FenceInst(LLVMContext &C, AtomicOrdering Ordering,
-            SynchronizationScope SynchScope,
-            BasicBlock *InsertAtEnd);
-
-  /// Returns the ordering effect of this fence.
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering(getSubclassDataFromInstruction() >> 1);
-  }
-
-  /// Set the ordering constraint on this fence.  May only be Acquire, Release,
-  /// AcquireRelease, or SequentiallyConsistent.
-  void setOrdering(AtomicOrdering Ordering) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
-                               (Ordering << 1));
-  }
-
-  SynchronizationScope getSynchScope() const {
-    return SynchronizationScope(getSubclassDataFromInstruction() & 1);
-  }
-
-  /// Specify whether this fence orders other operations with respect to all
-  /// concurrently executing threads, or only with respect to signal handlers
-  /// executing in the same thread.
-  void setSynchScope(SynchronizationScope xthread) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                               xthread);
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Fence;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                AtomicCmpXchgInst Class
-//===----------------------------------------------------------------------===//
-
-/// AtomicCmpXchgInst - an instruction that atomically checks whether a
-/// specified value is in a memory location, and, if it is, stores a new value
-/// there.  Returns the value that was loaded.
-///
-class AtomicCmpXchgInst : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  void Init(Value *Ptr, Value *Cmp, Value *NewVal,
-            AtomicOrdering Ordering, SynchronizationScope SynchScope);
-protected:
-  virtual AtomicCmpXchgInst *clone_impl() const;
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
-                    Instruction *InsertBefore = 0);
-  AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope,
-                    BasicBlock *InsertAtEnd);
-
-  /// isVolatile - Return true if this is a cmpxchg from a volatile memory
-  /// location.
-  ///
-  bool isVolatile() const {
-    return getSubclassDataFromInstruction() & 1;
-  }
-
-  /// setVolatile - Specify whether this is a volatile cmpxchg.
-  ///
-  void setVolatile(bool V) {
-     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                                (unsigned)V);
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// Set the ordering constraint on this cmpxchg.
-  void setOrdering(AtomicOrdering Ordering) {
-    assert(Ordering != NotAtomic &&
-           "CmpXchg instructions can only be atomic.");
-    setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
-                               (Ordering << 2));
-  }
-
-  /// Specify whether this cmpxchg is atomic and orders other operations with
-  /// respect to all concurrently executing threads, or only with respect to
-  /// signal handlers executing in the same thread.
-  void setSynchScope(SynchronizationScope SynchScope) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
-                               (SynchScope << 1));
-  }
-
-  /// Returns the ordering constraint on this cmpxchg.
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering(getSubclassDataFromInstruction() >> 2);
-  }
-
-  /// Returns whether this cmpxchg is atomic between threads or only within a
-  /// single thread.
-  SynchronizationScope getSynchScope() const {
-    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
-  }
-
-  Value *getPointerOperand() { return getOperand(0); }
-  const Value *getPointerOperand() const { return getOperand(0); }
-  static unsigned getPointerOperandIndex() { return 0U; }
-
-  Value *getCompareOperand() { return getOperand(1); }
-  const Value *getCompareOperand() const { return getOperand(1); }
-  
-  Value *getNewValOperand() { return getOperand(2); }
-  const Value *getNewValOperand() const { return getOperand(2); }
-  
-  /// \brief Returns the address space of the pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
-  }
-  
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::AtomicCmpXchg;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-template <>
-struct OperandTraits<AtomicCmpXchgInst> :
-    public FixedNumOperandTraits<AtomicCmpXchgInst, 3> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                                AtomicRMWInst Class
-//===----------------------------------------------------------------------===//
-
-/// AtomicRMWInst - an instruction that atomically reads a memory location,
-/// combines it with another value, and then stores the result back.  Returns
-/// the old value.
-///
-class AtomicRMWInst : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-protected:
-  virtual AtomicRMWInst *clone_impl() const;
-public:
-  /// This enumeration lists the possible modifications atomicrmw can make.  In
-  /// the descriptions, 'p' is the pointer to the instruction's memory location,
-  /// 'old' is the initial value of *p, and 'v' is the other value passed to the
-  /// instruction.  These instructions always return 'old'.
-  enum BinOp {
-    /// *p = v
-    Xchg,
-    /// *p = old + v
-    Add,
-    /// *p = old - v
-    Sub,
-    /// *p = old & v
-    And,
-    /// *p = ~old & v
-    Nand,
-    /// *p = old | v
-    Or,
-    /// *p = old ^ v
-    Xor,
-    /// *p = old >signed v ? old : v
-    Max,
-    /// *p = old <signed v ? old : v
-    Min,
-    /// *p = old >unsigned v ? old : v
-    UMax,
-    /// *p = old <unsigned v ? old : v
-    UMin,
-
-    FIRST_BINOP = Xchg,
-    LAST_BINOP = UMin,
-    BAD_BINOP
-  };
-
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
-                AtomicOrdering Ordering, SynchronizationScope SynchScope,
-                Instruction *InsertBefore = 0);
-  AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
-                AtomicOrdering Ordering, SynchronizationScope SynchScope,
-                BasicBlock *InsertAtEnd);
-
-  BinOp getOperation() const {
-    return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
-  }
-
-  void setOperation(BinOp Operation) {
-    unsigned short SubclassData = getSubclassDataFromInstruction();
-    setInstructionSubclassData((SubclassData & 31) |
-                               (Operation << 5));
-  }
-
-  /// isVolatile - Return true if this is a RMW on a volatile memory location.
-  ///
-  bool isVolatile() const {
-    return getSubclassDataFromInstruction() & 1;
-  }
-
-  /// setVolatile - Specify whether this is a volatile RMW or not.
-  ///
-  void setVolatile(bool V) {
-     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                                (unsigned)V);
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// Set the ordering constraint on this RMW.
-  void setOrdering(AtomicOrdering Ordering) {
-    assert(Ordering != NotAtomic &&
-           "atomicrmw instructions can only be atomic.");
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 2)) |
-                               (Ordering << 2));
-  }
-
-  /// Specify whether this RMW orders other operations with respect to all
-  /// concurrently executing threads, or only with respect to signal handlers
-  /// executing in the same thread.
-  void setSynchScope(SynchronizationScope SynchScope) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) |
-                               (SynchScope << 1));
-  }
-
-  /// Returns the ordering constraint on this RMW.
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7);
-  }
-
-  /// Returns whether this RMW is atomic between threads or only within a
-  /// single thread.
-  SynchronizationScope getSynchScope() const {
-    return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1);
-  }
-
-  Value *getPointerOperand() { return getOperand(0); }
-  const Value *getPointerOperand() const { return getOperand(0); }
-  static unsigned getPointerOperandIndex() { return 0U; }
-
-  Value *getValOperand() { return getOperand(1); }
-  const Value *getValOperand() const { return getOperand(1); }
-
-  /// \brief Returns the address space of the pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::AtomicRMW;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  void Init(BinOp Operation, Value *Ptr, Value *Val,
-            AtomicOrdering Ordering, SynchronizationScope SynchScope);
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-template <>
-struct OperandTraits<AtomicRMWInst>
-    : public FixedNumOperandTraits<AtomicRMWInst,2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicRMWInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                             GetElementPtrInst Class
-//===----------------------------------------------------------------------===//
-
-// checkGEPType - Simple wrapper function to give a better assertion failure
-// message on bad indexes for a gep instruction.
-//
-inline Type *checkGEPType(Type *Ty) {
-  assert(Ty && "Invalid GetElementPtrInst indices for type!");
-  return Ty;
-}
-
-/// GetElementPtrInst - an instruction for type-safe pointer arithmetic to
-/// access elements of arrays and structs
-///
-class GetElementPtrInst : public Instruction {
-  GetElementPtrInst(const GetElementPtrInst &GEPI);
-  void init(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr);
-
-  /// Constructors - Create a getelementptr instruction with a base pointer an
-  /// list of indices. The first ctor can optionally insert before an existing
-  /// instruction, the second appends the new instruction to the specified
-  /// BasicBlock.
-  inline GetElementPtrInst(Value *Ptr, ArrayRef<Value *> IdxList,
-                           unsigned Values, const Twine &NameStr,
-                           Instruction *InsertBefore);
-  inline GetElementPtrInst(Value *Ptr, ArrayRef<Value *> IdxList,
-                           unsigned Values, const Twine &NameStr,
-                           BasicBlock *InsertAtEnd);
-protected:
-  virtual GetElementPtrInst *clone_impl() const;
-public:
-  static GetElementPtrInst *Create(Value *Ptr, ArrayRef<Value *> IdxList,
-                                   const Twine &NameStr = "",
-                                   Instruction *InsertBefore = 0) {
-    unsigned Values = 1 + unsigned(IdxList.size());
-    return new(Values)
-      GetElementPtrInst(Ptr, IdxList, Values, NameStr, InsertBefore);
-  }
-  static GetElementPtrInst *Create(Value *Ptr, ArrayRef<Value *> IdxList,
-                                   const Twine &NameStr,
-                                   BasicBlock *InsertAtEnd) {
-    unsigned Values = 1 + unsigned(IdxList.size());
-    return new(Values)
-      GetElementPtrInst(Ptr, IdxList, Values, NameStr, InsertAtEnd);
-  }
-
-  /// Create an "inbounds" getelementptr. See the documentation for the
-  /// "inbounds" flag in LangRef.html for details.
-  static GetElementPtrInst *CreateInBounds(Value *Ptr,
-                                           ArrayRef<Value *> IdxList,
-                                           const Twine &NameStr = "",
-                                           Instruction *InsertBefore = 0) {
-    GetElementPtrInst *GEP = Create(Ptr, IdxList, NameStr, InsertBefore);
-    GEP->setIsInBounds(true);
-    return GEP;
-  }
-  static GetElementPtrInst *CreateInBounds(Value *Ptr,
-                                           ArrayRef<Value *> IdxList,
-                                           const Twine &NameStr,
-                                           BasicBlock *InsertAtEnd) {
-    GetElementPtrInst *GEP = Create(Ptr, IdxList, NameStr, InsertAtEnd);
-    GEP->setIsInBounds(true);
-    return GEP;
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // getType - Overload to return most specific pointer type...
-  PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(Instruction::getType());
-  }
-
-  /// \brief Returns the address space of this instruction's pointer type.
-  unsigned getAddressSpace() const {
-    // Note that this is always the same as the pointer operand's address space
-    // and that is cheaper to compute, so cheat here.
-    return getPointerAddressSpace();
-  }
-
-  /// getIndexedType - Returns the type of the element that would be loaded with
-  /// a load instruction with the specified parameters.
-  ///
-  /// Null is returned if the indices are invalid for the specified
-  /// pointer type.
-  ///
-  static Type *getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList);
-  static Type *getIndexedType(Type *Ptr, ArrayRef<Constant *> IdxList);
-  static Type *getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList);
-
-  inline op_iterator       idx_begin()       { return op_begin()+1; }
-  inline const_op_iterator idx_begin() const { return op_begin()+1; }
-  inline op_iterator       idx_end()         { return op_end(); }
-  inline const_op_iterator idx_end()   const { return op_end(); }
-
-  Value *getPointerOperand() {
-    return getOperand(0);
-  }
-  const Value *getPointerOperand() const {
-    return getOperand(0);
-  }
-  static unsigned getPointerOperandIndex() {
-    return 0U;    // get index for modifying correct operand.
-  }
-
-  /// getPointerOperandType - Method to return the pointer operand as a
-  /// PointerType.
-  Type *getPointerOperandType() const {
-    return getPointerOperand()->getType();
-  }
-
-  /// \brief Returns the address space of the pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return getPointerOperandType()->getPointerAddressSpace();
-  }
-
-  /// GetGEPReturnType - Returns the pointer type returned by the GEP
-  /// instruction, which may be a vector of pointers.
-  static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
-    Type *PtrTy = PointerType::get(checkGEPType(
-                                   getIndexedType(Ptr->getType(), IdxList)),
-                                   Ptr->getType()->getPointerAddressSpace());
-    // Vector GEP
-    if (Ptr->getType()->isVectorTy()) {
-      unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
-      return VectorType::get(PtrTy, NumElem);
-    }
-
-    // Scalar GEP
-    return PtrTy;
-  }
-
-  unsigned getNumIndices() const {  // Note: always non-negative
-    return getNumOperands() - 1;
-  }
-
-  bool hasIndices() const {
-    return getNumOperands() > 1;
-  }
-
-  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
-  /// zeros.  If so, the result pointer and the first operand have the same
-  /// value, just potentially different types.
-  bool hasAllZeroIndices() const;
-
-  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
-  /// constant integers.  If so, the result pointer and the first operand have
-  /// a constant offset between them.
-  bool hasAllConstantIndices() const;
-
-  /// setIsInBounds - Set or clear the inbounds flag on this GEP instruction.
-  /// See LangRef.html for the meaning of inbounds on a getelementptr.
-  void setIsInBounds(bool b = true);
-
-  /// isInBounds - Determine whether the GEP has the inbounds flag.
-  bool isInBounds() const;
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return (I->getOpcode() == Instruction::GetElementPtr);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<GetElementPtrInst> :
-  public VariadicOperandTraits<GetElementPtrInst, 1> {
-};
-
-GetElementPtrInst::GetElementPtrInst(Value *Ptr,
-                                     ArrayRef<Value *> IdxList,
-                                     unsigned Values,
-                                     const Twine &NameStr,
-                                     Instruction *InsertBefore)
-  : Instruction(getGEPReturnType(Ptr, IdxList),
-                GetElementPtr,
-                OperandTraits<GetElementPtrInst>::op_end(this) - Values,
-                Values, InsertBefore) {
-  init(Ptr, IdxList, NameStr);
-}
-GetElementPtrInst::GetElementPtrInst(Value *Ptr,
-                                     ArrayRef<Value *> IdxList,
-                                     unsigned Values,
-                                     const Twine &NameStr,
-                                     BasicBlock *InsertAtEnd)
-  : Instruction(getGEPReturnType(Ptr, IdxList),
-                GetElementPtr,
-                OperandTraits<GetElementPtrInst>::op_end(this) - Values,
-                Values, InsertAtEnd) {
-  init(Ptr, IdxList, NameStr);
-}
-
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
-
-
-//===----------------------------------------------------------------------===//
-//                               ICmpInst Class
-//===----------------------------------------------------------------------===//
-
-/// This instruction compares its operands according to the predicate given
-/// to the constructor. It only operates on integers or pointers. The operands
-/// must be identical types.
-/// \brief Represent an integer comparison operator.
-class ICmpInst: public CmpInst {
-protected:
-  /// \brief Clone an identical ICmpInst
-  virtual ICmpInst *clone_impl() const;
-public:
-  /// \brief Constructor with insert-before-instruction semantics.
-  ICmpInst(
-    Instruction *InsertBefore,  ///< Where to insert
-    Predicate pred,  ///< The predicate to use for the comparison
-    Value *LHS,      ///< The left-hand-side of the expression
-    Value *RHS,      ///< The right-hand-side of the expression
-    const Twine &NameStr = ""  ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::ICmp, pred, LHS, RHS, NameStr,
-              InsertBefore) {
-    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
-           pred <= CmpInst::LAST_ICMP_PREDICATE &&
-           "Invalid ICmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-          "Both operands to ICmp instruction are not of the same type!");
-    // Check that the operands are the right type
-    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
-           "Invalid operand types for ICmp instruction");
-  }
-
-  /// \brief Constructor with insert-at-end semantics.
-  ICmpInst(
-    BasicBlock &InsertAtEnd, ///< Block to insert into.
-    Predicate pred,  ///< The predicate to use for the comparison
-    Value *LHS,      ///< The left-hand-side of the expression
-    Value *RHS,      ///< The right-hand-side of the expression
-    const Twine &NameStr = ""  ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::ICmp, pred, LHS, RHS, NameStr,
-              &InsertAtEnd) {
-    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
-          pred <= CmpInst::LAST_ICMP_PREDICATE &&
-          "Invalid ICmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-          "Both operands to ICmp instruction are not of the same type!");
-    // Check that the operands are the right type
-    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->isPointerTy()) &&
-           "Invalid operand types for ICmp instruction");
-  }
-
-  /// \brief Constructor with no-insertion semantics
-  ICmpInst(
-    Predicate pred, ///< The predicate to use for the comparison
-    Value *LHS,     ///< The left-hand-side of the expression
-    Value *RHS,     ///< The right-hand-side of the expression
-    const Twine &NameStr = "" ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::ICmp, pred, LHS, RHS, NameStr) {
-    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
-           pred <= CmpInst::LAST_ICMP_PREDICATE &&
-           "Invalid ICmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-          "Both operands to ICmp instruction are not of the same type!");
-    // Check that the operands are the right type
-    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
-           "Invalid operand types for ICmp instruction");
-  }
-
-  /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
-  /// @returns the predicate that would be the result if the operand were
-  /// regarded as signed.
-  /// \brief Return the signed version of the predicate
-  Predicate getSignedPredicate() const {
-    return getSignedPredicate(getPredicate());
-  }
-
-  /// This is a static version that you can use without an instruction.
-  /// \brief Return the signed version of the predicate.
-  static Predicate getSignedPredicate(Predicate pred);
-
-  /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
-  /// @returns the predicate that would be the result if the operand were
-  /// regarded as unsigned.
-  /// \brief Return the unsigned version of the predicate
-  Predicate getUnsignedPredicate() const {
-    return getUnsignedPredicate(getPredicate());
-  }
-
-  /// This is a static version that you can use without an instruction.
-  /// \brief Return the unsigned version of the predicate.
-  static Predicate getUnsignedPredicate(Predicate pred);
-
-  /// isEquality - Return true if this predicate is either EQ or NE.  This also
-  /// tests for commutativity.
-  static bool isEquality(Predicate P) {
-    return P == ICMP_EQ || P == ICMP_NE;
-  }
-
-  /// isEquality - Return true if this predicate is either EQ or NE.  This also
-  /// tests for commutativity.
-  bool isEquality() const {
-    return isEquality(getPredicate());
-  }
-
-  /// @returns true if the predicate of this ICmpInst is commutative
-  /// \brief Determine if this relation is commutative.
-  bool isCommutative() const { return isEquality(); }
-
-  /// isRelational - Return true if the predicate is relational (not EQ or NE).
-  ///
-  bool isRelational() const {
-    return !isEquality();
-  }
-
-  /// isRelational - Return true if the predicate is relational (not EQ or NE).
-  ///
-  static bool isRelational(Predicate P) {
-    return !isEquality(P);
-  }
-
-  /// Initialize a set of values that all satisfy the predicate with C.
-  /// \brief Make a ConstantRange for a relation with a constant value.
-  static ConstantRange makeConstantRange(Predicate pred, const APInt &C);
-
-  /// Exchange the two operands to this instruction in such a way that it does
-  /// not modify the semantics of the instruction. The predicate value may be
-  /// changed to retain the same result if the predicate is order dependent
-  /// (e.g. ult).
-  /// \brief Swap operands and adjust predicate.
-  void swapOperands() {
-    setPredicate(getSwappedPredicate());
-    Op<0>().swap(Op<1>());
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::ICmp;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-
-};
-
-//===----------------------------------------------------------------------===//
-//                               FCmpInst Class
-//===----------------------------------------------------------------------===//
-
-/// This instruction compares its operands according to the predicate given
-/// to the constructor. It only operates on floating point values or packed
-/// vectors of floating point values. The operands must be identical types.
-/// \brief Represents a floating point comparison operator.
-class FCmpInst: public CmpInst {
-protected:
-  /// \brief Clone an identical FCmpInst
-  virtual FCmpInst *clone_impl() const;
-public:
-  /// \brief Constructor with insert-before-instruction semantics.
-  FCmpInst(
-    Instruction *InsertBefore, ///< Where to insert
-    Predicate pred,  ///< The predicate to use for the comparison
-    Value *LHS,      ///< The left-hand-side of the expression
-    Value *RHS,      ///< The right-hand-side of the expression
-    const Twine &NameStr = ""  ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::FCmp, pred, LHS, RHS, NameStr,
-              InsertBefore) {
-    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
-           "Invalid FCmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-           "Both operands to FCmp instruction are not of the same type!");
-    // Check that the operands are the right type
-    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
-           "Invalid operand types for FCmp instruction");
-  }
-
-  /// \brief Constructor with insert-at-end semantics.
-  FCmpInst(
-    BasicBlock &InsertAtEnd, ///< Block to insert into.
-    Predicate pred,  ///< The predicate to use for the comparison
-    Value *LHS,      ///< The left-hand-side of the expression
-    Value *RHS,      ///< The right-hand-side of the expression
-    const Twine &NameStr = ""  ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::FCmp, pred, LHS, RHS, NameStr,
-              &InsertAtEnd) {
-    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
-           "Invalid FCmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-           "Both operands to FCmp instruction are not of the same type!");
-    // Check that the operands are the right type
-    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
-           "Invalid operand types for FCmp instruction");
-  }
-
-  /// \brief Constructor with no-insertion semantics
-  FCmpInst(
-    Predicate pred, ///< The predicate to use for the comparison
-    Value *LHS,     ///< The left-hand-side of the expression
-    Value *RHS,     ///< The right-hand-side of the expression
-    const Twine &NameStr = "" ///< Name of the instruction
-  ) : CmpInst(makeCmpResultType(LHS->getType()),
-              Instruction::FCmp, pred, LHS, RHS, NameStr) {
-    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
-           "Invalid FCmp predicate value");
-    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
-           "Both operands to FCmp instruction are not of the same type!");
-    // Check that the operands are the right type
-    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
-           "Invalid operand types for FCmp instruction");
-  }
-
-  /// @returns true if the predicate of this instruction is EQ or NE.
-  /// \brief Determine if this is an equality predicate.
-  bool isEquality() const {
-    return getPredicate() == FCMP_OEQ || getPredicate() == FCMP_ONE ||
-           getPredicate() == FCMP_UEQ || getPredicate() == FCMP_UNE;
-  }
-
-  /// @returns true if the predicate of this instruction is commutative.
-  /// \brief Determine if this is a commutative predicate.
-  bool isCommutative() const {
-    return isEquality() ||
-           getPredicate() == FCMP_FALSE ||
-           getPredicate() == FCMP_TRUE ||
-           getPredicate() == FCMP_ORD ||
-           getPredicate() == FCMP_UNO;
-  }
-
-  /// @returns true if the predicate is relational (not EQ or NE).
-  /// \brief Determine if this a relational predicate.
-  bool isRelational() const { return !isEquality(); }
-
-  /// Exchange the two operands to this instruction in such a way that it does
-  /// not modify the semantics of the instruction. The predicate value may be
-  /// changed to retain the same result if the predicate is order dependent
-  /// (e.g. ult).
-  /// \brief Swap operands and adjust predicate.
-  void swapOperands() {
-    setPredicate(getSwappedPredicate());
-    Op<0>().swap(Op<1>());
-  }
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::FCmp;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-/// CallInst - This class represents a function call, abstracting a target
-/// machine's calling convention.  This class uses low bit of the SubClassData
-/// field to indicate whether or not this is a tail call.  The rest of the bits
-/// hold the calling convention of the call.
-///
-class CallInst : public Instruction {
-  AttrListPtr AttributeList; ///< parameter attributes for call
-  CallInst(const CallInst &CI);
-  void init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr);
-  void init(Value *Func, const Twine &NameStr);
-
-  /// Construct a CallInst given a range of arguments.
-  /// \brief Construct a CallInst from a range of arguments
-  inline CallInst(Value *Func, ArrayRef<Value *> Args,
-                  const Twine &NameStr, Instruction *InsertBefore);
-
-  /// Construct a CallInst given a range of arguments.
-  /// \brief Construct a CallInst from a range of arguments
-  inline CallInst(Value *Func, ArrayRef<Value *> Args,
-                  const Twine &NameStr, BasicBlock *InsertAtEnd);
-
-  CallInst(Value *F, Value *Actual, const Twine &NameStr,
-           Instruction *InsertBefore);
-  CallInst(Value *F, Value *Actual, const Twine &NameStr,
-           BasicBlock *InsertAtEnd);
-  explicit CallInst(Value *F, const Twine &NameStr,
-                    Instruction *InsertBefore);
-  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
-protected:
-  virtual CallInst *clone_impl() const;
-public:
-  static CallInst *Create(Value *Func,
-                          ArrayRef<Value *> Args,
-                          const Twine &NameStr = "",
-                          Instruction *InsertBefore = 0) {
-    return new(unsigned(Args.size() + 1))
-      CallInst(Func, Args, NameStr, InsertBefore);
-  }
-  static CallInst *Create(Value *Func,
-                          ArrayRef<Value *> Args,
-                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return new(unsigned(Args.size() + 1))
-      CallInst(Func, Args, NameStr, InsertAtEnd);
-  }
-  static CallInst *Create(Value *F, const Twine &NameStr = "",
-                          Instruction *InsertBefore = 0) {
-    return new(1) CallInst(F, NameStr, InsertBefore);
-  }
-  static CallInst *Create(Value *F, const Twine &NameStr,
-                          BasicBlock *InsertAtEnd) {
-    return new(1) CallInst(F, NameStr, InsertAtEnd);
-  }
-  /// CreateMalloc - Generate the IR for a call to malloc:
-  /// 1. Compute the malloc call's argument as the specified type's size,
-  ///    possibly multiplied by the array size if the array size is not
-  ///    constant 1.
-  /// 2. Call malloc with that argument.
-  /// 3. Bitcast the result of the malloc call to the specified type.
-  static Instruction *CreateMalloc(Instruction *InsertBefore,
-                                   Type *IntPtrTy, Type *AllocTy,
-                                   Value *AllocSize, Value *ArraySize = 0,
-                                   Function* MallocF = 0,
-                                   const Twine &Name = "");
-  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
-                                   Type *IntPtrTy, Type *AllocTy,
-                                   Value *AllocSize, Value *ArraySize = 0,
-                                   Function* MallocF = 0,
-                                   const Twine &Name = "");
-  /// CreateFree - Generate the IR for a call to the builtin free function.
-  static Instruction* CreateFree(Value* Source, Instruction *InsertBefore);
-  static Instruction* CreateFree(Value* Source, BasicBlock *InsertAtEnd);
-
-  ~CallInst();
-
-  bool isTailCall() const { return getSubclassDataFromInstruction() & 1; }
-  void setTailCall(bool isTC = true) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                               unsigned(isTC));
-  }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// getNumArgOperands - Return the number of call arguments.
-  ///
-  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
-
-  /// getArgOperand/setArgOperand - Return/set the i-th call argument.
-  ///
-  Value *getArgOperand(unsigned i) const { return getOperand(i); }
-  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
-
-  /// getCallingConv/setCallingConv - Get or set the calling convention of this
-  /// function call.
-  CallingConv::ID getCallingConv() const {
-    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 1);
-  }
-  void setCallingConv(CallingConv::ID CC) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
-                               (static_cast<unsigned>(CC) << 1));
-  }
-
-  /// getAttributes - Return the parameter attributes for this call.
-  ///
-  const AttrListPtr &getAttributes() const { return AttributeList; }
-
-  /// setAttributes - Set the parameter attributes for this call.
-  ///
-  void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; }
-
-  /// addAttribute - adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attributes attr);
-
-  /// removeAttribute - removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attributes attr);
-
-  /// \brief Determine whether this call has the given attribute.
-  bool hasFnAttr(Attributes::AttrVal A) const;
-
-  /// \brief Determine whether the call or the callee has the given attributes.
-  bool paramHasAttr(unsigned i, Attributes::AttrVal A) const;
-
-  /// \brief Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned i) const {
-    return AttributeList.getParamAlignment(i);
-  }
-
-  /// \brief Return true if the call should not be inlined.
-  bool isNoInline() const { return hasFnAttr(Attributes::NoInline); }
-  void setIsNoInline() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::NoInline));
-  }
-
-  /// \brief Return true if the call can return twice
-  bool canReturnTwice() const {
-    return hasFnAttr(Attributes::ReturnsTwice);
-  }
-  void setCanReturnTwice() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::ReturnsTwice));
-  }
-
-  /// \brief Determine if the call does not access memory.
-  bool doesNotAccessMemory() const {
-    return hasFnAttr(Attributes::ReadNone);
-  }
-  void setDoesNotAccessMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::ReadNone));
-  }
-
-  /// \brief Determine if the call does not access or only reads memory.
-  bool onlyReadsMemory() const {
-    return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly);
-  }
-  void setOnlyReadsMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::ReadOnly));
-  }
-
-  /// \brief Determine if the call cannot return.
-  bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); }
-  void setDoesNotReturn() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::NoReturn));
-  }
-
-  /// \brief Determine if the call cannot unwind.
-  bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); }
-  void setDoesNotThrow() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::NoUnwind));
-  }
-
-  /// \brief Determine if the call returns a structure through first
-  /// pointer argument.
-  bool hasStructRetAttr() const {
-    // Be friendly and also check the callee.
-    return paramHasAttr(1, Attributes::StructRet);
-  }
-
-  /// \brief Determine if any call argument is an aggregate passed by value.
-  bool hasByValArgument() const {
-    for (unsigned I = 0, E = AttributeList.getNumAttrs(); I != E; ++I)
-      if (AttributeList.getAttributesAtIndex(I).hasAttribute(Attributes::ByVal))
-        return true;
-    return false;
-  }
-
-  /// getCalledFunction - Return the function called, or null if this is an
-  /// indirect function invocation.
-  ///
-  Function *getCalledFunction() const {
-    return dyn_cast<Function>(Op<-1>());
-  }
-
-  /// getCalledValue - Get a pointer to the function that is invoked by this
-  /// instruction.
-  const Value *getCalledValue() const { return Op<-1>(); }
-        Value *getCalledValue()       { return Op<-1>(); }
-
-  /// setCalledFunction - Set the function called.
-  void setCalledFunction(Value* Fn) {
-    Op<-1>() = Fn;
-  }
-
-  /// isInlineAsm - Check if this call is an inline asm statement.
-  bool isInlineAsm() const {
-    return isa<InlineAsm>(Op<-1>());
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Call;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-template <>
-struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
-};
-
-CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
-                   const Twine &NameStr, BasicBlock *InsertAtEnd)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
-                unsigned(Args.size() + 1), InsertAtEnd) {
-  init(Func, Args, NameStr);
-}
-
-CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
-                   const Twine &NameStr, Instruction *InsertBefore)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
-                unsigned(Args.size() + 1), InsertBefore) {
-  init(Func, Args, NameStr);
-}
-
-
-// Note: if you get compile errors about private methods then
-//       please update your code to use the high-level operand
-//       interfaces. See line 943 above.
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                               SelectInst Class
-//===----------------------------------------------------------------------===//
-
-/// SelectInst - This class represents the LLVM 'select' instruction.
-///
-class SelectInst : public Instruction {
-  void init(Value *C, Value *S1, Value *S2) {
-    assert(!areInvalidOperands(C, S1, S2) && "Invalid operands for select");
-    Op<0>() = C;
-    Op<1>() = S1;
-    Op<2>() = S2;
-  }
-
-  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
-             Instruction *InsertBefore)
-    : Instruction(S1->getType(), Instruction::Select,
-                  &Op<0>(), 3, InsertBefore) {
-    init(C, S1, S2);
-    setName(NameStr);
-  }
-  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
-             BasicBlock *InsertAtEnd)
-    : Instruction(S1->getType(), Instruction::Select,
-                  &Op<0>(), 3, InsertAtEnd) {
-    init(C, S1, S2);
-    setName(NameStr);
-  }
-protected:
-  virtual SelectInst *clone_impl() const;
-public:
-  static SelectInst *Create(Value *C, Value *S1, Value *S2,
-                            const Twine &NameStr = "",
-                            Instruction *InsertBefore = 0) {
-    return new(3) SelectInst(C, S1, S2, NameStr, InsertBefore);
-  }
-  static SelectInst *Create(Value *C, Value *S1, Value *S2,
-                            const Twine &NameStr,
-                            BasicBlock *InsertAtEnd) {
-    return new(3) SelectInst(C, S1, S2, NameStr, InsertAtEnd);
-  }
-
-  const Value *getCondition() const { return Op<0>(); }
-  const Value *getTrueValue() const { return Op<1>(); }
-  const Value *getFalseValue() const { return Op<2>(); }
-  Value *getCondition() { return Op<0>(); }
-  Value *getTrueValue() { return Op<1>(); }
-  Value *getFalseValue() { return Op<2>(); }
-
-  /// areInvalidOperands - Return a string if the specified operands are invalid
-  /// for a select operation, otherwise return null.
-  static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  OtherOps getOpcode() const {
-    return static_cast<OtherOps>(Instruction::getOpcode());
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Select;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<SelectInst> : public FixedNumOperandTraits<SelectInst, 3> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                                VAArgInst Class
-//===----------------------------------------------------------------------===//
-
-/// VAArgInst - This class represents the va_arg llvm instruction, which returns
-/// an argument of the specified type given a va_list and increments that list
-///
-class VAArgInst : public UnaryInstruction {
-protected:
-  virtual VAArgInst *clone_impl() const;
-
-public:
-  VAArgInst(Value *List, Type *Ty, const Twine &NameStr = "",
-             Instruction *InsertBefore = 0)
-    : UnaryInstruction(Ty, VAArg, List, InsertBefore) {
-    setName(NameStr);
-  }
-  VAArgInst(Value *List, Type *Ty, const Twine &NameStr,
-            BasicBlock *InsertAtEnd)
-    : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) {
-    setName(NameStr);
-  }
-
-  Value *getPointerOperand() { return getOperand(0); }
-  const Value *getPointerOperand() const { return getOperand(0); }
-  static unsigned getPointerOperandIndex() { return 0U; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == VAArg;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                ExtractElementInst Class
-//===----------------------------------------------------------------------===//
-
-/// ExtractElementInst - This instruction extracts a single (scalar)
-/// element from a VectorType value
-///
-class ExtractElementInst : public Instruction {
-  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "",
-                     Instruction *InsertBefore = 0);
-  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
-                     BasicBlock *InsertAtEnd);
-protected:
-  virtual ExtractElementInst *clone_impl() const;
-
-public:
-  static ExtractElementInst *Create(Value *Vec, Value *Idx,
-                                   const Twine &NameStr = "",
-                                   Instruction *InsertBefore = 0) {
-    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore);
-  }
-  static ExtractElementInst *Create(Value *Vec, Value *Idx,
-                                   const Twine &NameStr,
-                                   BasicBlock *InsertAtEnd) {
-    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd);
-  }
-
-  /// isValidOperands - Return true if an extractelement instruction can be
-  /// formed with the specified operands.
-  static bool isValidOperands(const Value *Vec, const Value *Idx);
-
-  Value *getVectorOperand() { return Op<0>(); }
-  Value *getIndexOperand() { return Op<1>(); }
-  const Value *getVectorOperand() const { return Op<0>(); }
-  const Value *getIndexOperand() const { return Op<1>(); }
-
-  VectorType *getVectorOperandType() const {
-    return reinterpret_cast<VectorType*>(getVectorOperand()->getType());
-  }
-
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::ExtractElement;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<ExtractElementInst> :
-  public FixedNumOperandTraits<ExtractElementInst, 2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                                InsertElementInst Class
-//===----------------------------------------------------------------------===//
-
-/// InsertElementInst - This instruction inserts a single (scalar)
-/// element into a VectorType value
-///
-class InsertElementInst : public Instruction {
-  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
-                    const Twine &NameStr = "",
-                    Instruction *InsertBefore = 0);
-  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
-                    const Twine &NameStr, BasicBlock *InsertAtEnd);
-protected:
-  virtual InsertElementInst *clone_impl() const;
-
-public:
-  static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
-                                   const Twine &NameStr = "",
-                                   Instruction *InsertBefore = 0) {
-    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore);
-  }
-  static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
-                                   const Twine &NameStr,
-                                   BasicBlock *InsertAtEnd) {
-    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd);
-  }
-
-  /// isValidOperands - Return true if an insertelement instruction can be
-  /// formed with the specified operands.
-  static bool isValidOperands(const Value *Vec, const Value *NewElt,
-                              const Value *Idx);
-
-  /// getType - Overload to return most specific vector type.
-  ///
-  VectorType *getType() const {
-    return reinterpret_cast<VectorType*>(Instruction::getType());
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::InsertElement;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<InsertElementInst> :
-  public FixedNumOperandTraits<InsertElementInst, 3> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                           ShuffleVectorInst Class
-//===----------------------------------------------------------------------===//
-
-/// ShuffleVectorInst - This instruction constructs a fixed permutation of two
-/// input vectors.
-///
-class ShuffleVectorInst : public Instruction {
-protected:
-  virtual ShuffleVectorInst *clone_impl() const;
-
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                    const Twine &NameStr = "",
-                    Instruction *InsertBefor = 0);
-  ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                    const Twine &NameStr, BasicBlock *InsertAtEnd);
-
-  /// isValidOperands - Return true if a shufflevector instruction can be
-  /// formed with the specified operands.
-  static bool isValidOperands(const Value *V1, const Value *V2,
-                              const Value *Mask);
-
-  /// getType - Overload to return most specific vector type.
-  ///
-  VectorType *getType() const {
-    return reinterpret_cast<VectorType*>(Instruction::getType());
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  Constant *getMask() const {
-    return reinterpret_cast<Constant*>(getOperand(2));
-  }
-  
-  /// getMaskValue - Return the index from the shuffle mask for the specified
-  /// output result.  This is either -1 if the element is undef or a number less
-  /// than 2*numelements.
-  static int getMaskValue(Constant *Mask, unsigned i);
-
-  int getMaskValue(unsigned i) const {
-    return getMaskValue(getMask(), i);
-  }
-  
-  /// getShuffleMask - Return the full mask for this instruction, where each
-  /// element is the element number and undef's are returned as -1.
-  static void getShuffleMask(Constant *Mask, SmallVectorImpl<int> &Result);
-
-  void getShuffleMask(SmallVectorImpl<int> &Result) const {
-    return getShuffleMask(getMask(), Result);
-  }
-
-  SmallVector<int, 16> getShuffleMask() const {
-    SmallVector<int, 16> Mask;
-    getShuffleMask(Mask);
-    return Mask;
-  }
-
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::ShuffleVector;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<ShuffleVectorInst> :
-  public FixedNumOperandTraits<ShuffleVectorInst, 3> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                                ExtractValueInst Class
-//===----------------------------------------------------------------------===//
-
-/// ExtractValueInst - This instruction extracts a struct member or array
-/// element value from an aggregate value.
-///
-class ExtractValueInst : public UnaryInstruction {
-  SmallVector<unsigned, 4> Indices;
-
-  ExtractValueInst(const ExtractValueInst &EVI);
-  void init(ArrayRef<unsigned> Idxs, const Twine &NameStr);
-
-  /// Constructors - Create a extractvalue instruction with a base aggregate
-  /// value and a list of indices.  The first ctor can optionally insert before
-  /// an existing instruction, the second appends the new instruction to the
-  /// specified BasicBlock.
-  inline ExtractValueInst(Value *Agg,
-                          ArrayRef<unsigned> Idxs,
-                          const Twine &NameStr,
-                          Instruction *InsertBefore);
-  inline ExtractValueInst(Value *Agg,
-                          ArrayRef<unsigned> Idxs,
-                          const Twine &NameStr, BasicBlock *InsertAtEnd);
-
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-protected:
-  virtual ExtractValueInst *clone_impl() const;
-
-public:
-  static ExtractValueInst *Create(Value *Agg,
-                                  ArrayRef<unsigned> Idxs,
-                                  const Twine &NameStr = "",
-                                  Instruction *InsertBefore = 0) {
-    return new
-      ExtractValueInst(Agg, Idxs, NameStr, InsertBefore);
-  }
-  static ExtractValueInst *Create(Value *Agg,
-                                  ArrayRef<unsigned> Idxs,
-                                  const Twine &NameStr,
-                                  BasicBlock *InsertAtEnd) {
-    return new ExtractValueInst(Agg, Idxs, NameStr, InsertAtEnd);
-  }
-
-  /// getIndexedType - Returns the type of the element that would be extracted
-  /// with an extractvalue instruction with the specified parameters.
-  ///
-  /// Null is returned if the indices are invalid for the specified type.
-  static Type *getIndexedType(Type *Agg, ArrayRef<unsigned> Idxs);
-
-  typedef const unsigned* idx_iterator;
-  inline idx_iterator idx_begin() const { return Indices.begin(); }
-  inline idx_iterator idx_end()   const { return Indices.end(); }
-
-  Value *getAggregateOperand() {
-    return getOperand(0);
-  }
-  const Value *getAggregateOperand() const {
-    return getOperand(0);
-  }
-  static unsigned getAggregateOperandIndex() {
-    return 0U;                      // get index for modifying correct operand
-  }
-
-  ArrayRef<unsigned> getIndices() const {
-    return Indices;
-  }
-
-  unsigned getNumIndices() const {
-    return (unsigned)Indices.size();
-  }
-
-  bool hasIndices() const {
-    return true;
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::ExtractValue;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-ExtractValueInst::ExtractValueInst(Value *Agg,
-                                   ArrayRef<unsigned> Idxs,
-                                   const Twine &NameStr,
-                                   Instruction *InsertBefore)
-  : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)),
-                     ExtractValue, Agg, InsertBefore) {
-  init(Idxs, NameStr);
-}
-ExtractValueInst::ExtractValueInst(Value *Agg,
-                                   ArrayRef<unsigned> Idxs,
-                                   const Twine &NameStr,
-                                   BasicBlock *InsertAtEnd)
-  : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)),
-                     ExtractValue, Agg, InsertAtEnd) {
-  init(Idxs, NameStr);
-}
-
-
-//===----------------------------------------------------------------------===//
-//                                InsertValueInst Class
-//===----------------------------------------------------------------------===//
-
-/// InsertValueInst - This instruction inserts a struct field of array element
-/// value into an aggregate value.
-///
-class InsertValueInst : public Instruction {
-  SmallVector<unsigned, 4> Indices;
-
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  InsertValueInst(const InsertValueInst &IVI);
-  void init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
-            const Twine &NameStr);
-
-  /// Constructors - Create a insertvalue instruction with a base aggregate
-  /// value, a value to insert, and a list of indices.  The first ctor can
-  /// optionally insert before an existing instruction, the second appends
-  /// the new instruction to the specified BasicBlock.
-  inline InsertValueInst(Value *Agg, Value *Val,
-                         ArrayRef<unsigned> Idxs,
-                         const Twine &NameStr,
-                         Instruction *InsertBefore);
-  inline InsertValueInst(Value *Agg, Value *Val,
-                         ArrayRef<unsigned> Idxs,
-                         const Twine &NameStr, BasicBlock *InsertAtEnd);
-
-  /// Constructors - These two constructors are convenience methods because one
-  /// and two index insertvalue instructions are so common.
-  InsertValueInst(Value *Agg, Value *Val,
-                  unsigned Idx, const Twine &NameStr = "",
-                  Instruction *InsertBefore = 0);
-  InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
-                  const Twine &NameStr, BasicBlock *InsertAtEnd);
-protected:
-  virtual InsertValueInst *clone_impl() const;
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-
-  static InsertValueInst *Create(Value *Agg, Value *Val,
-                                 ArrayRef<unsigned> Idxs,
-                                 const Twine &NameStr = "",
-                                 Instruction *InsertBefore = 0) {
-    return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertBefore);
-  }
-  static InsertValueInst *Create(Value *Agg, Value *Val,
-                                 ArrayRef<unsigned> Idxs,
-                                 const Twine &NameStr,
-                                 BasicBlock *InsertAtEnd) {
-    return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertAtEnd);
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  typedef const unsigned* idx_iterator;
-  inline idx_iterator idx_begin() const { return Indices.begin(); }
-  inline idx_iterator idx_end()   const { return Indices.end(); }
-
-  Value *getAggregateOperand() {
-    return getOperand(0);
-  }
-  const Value *getAggregateOperand() const {
-    return getOperand(0);
-  }
-  static unsigned getAggregateOperandIndex() {
-    return 0U;                      // get index for modifying correct operand
-  }
-
-  Value *getInsertedValueOperand() {
-    return getOperand(1);
-  }
-  const Value *getInsertedValueOperand() const {
-    return getOperand(1);
-  }
-  static unsigned getInsertedValueOperandIndex() {
-    return 1U;                      // get index for modifying correct operand
-  }
-
-  ArrayRef<unsigned> getIndices() const {
-    return Indices;
-  }
-
-  unsigned getNumIndices() const {
-    return (unsigned)Indices.size();
-  }
-
-  bool hasIndices() const {
-    return true;
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::InsertValue;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<InsertValueInst> :
-  public FixedNumOperandTraits<InsertValueInst, 2> {
-};
-
-InsertValueInst::InsertValueInst(Value *Agg,
-                                 Value *Val,
-                                 ArrayRef<unsigned> Idxs,
-                                 const Twine &NameStr,
-                                 Instruction *InsertBefore)
-  : Instruction(Agg->getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this),
-                2, InsertBefore) {
-  init(Agg, Val, Idxs, NameStr);
-}
-InsertValueInst::InsertValueInst(Value *Agg,
-                                 Value *Val,
-                                 ArrayRef<unsigned> Idxs,
-                                 const Twine &NameStr,
-                                 BasicBlock *InsertAtEnd)
-  : Instruction(Agg->getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this),
-                2, InsertAtEnd) {
-  init(Agg, Val, Idxs, NameStr);
-}
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                               PHINode Class
-//===----------------------------------------------------------------------===//
-
-// PHINode - The PHINode class is used to represent the magical mystical PHI
-// node, that can not exist in nature, but can be synthesized in a computer
-// scientist's overactive imagination.
-//
-class PHINode : public Instruction {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
-  /// the number actually in use.
-  unsigned ReservedSpace;
-  PHINode(const PHINode &PN);
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-  explicit PHINode(Type *Ty, unsigned NumReservedValues,
-                   const Twine &NameStr = "", Instruction *InsertBefore = 0)
-    : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
-      ReservedSpace(NumReservedValues) {
-    setName(NameStr);
-    OperandList = allocHungoffUses(ReservedSpace);
-  }
-
-  PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr,
-          BasicBlock *InsertAtEnd)
-    : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
-      ReservedSpace(NumReservedValues) {
-    setName(NameStr);
-    OperandList = allocHungoffUses(ReservedSpace);
-  }
-protected:
-  // allocHungoffUses - this is more complicated than the generic
-  // User::allocHungoffUses, because we have to allocate Uses for the incoming
-  // values and pointers to the incoming blocks, all in one allocation.
-  Use *allocHungoffUses(unsigned) const;
-
-  virtual PHINode *clone_impl() const;
-public:
-  /// Constructors - NumReservedValues is a hint for the number of incoming
-  /// edges that this phi node will have (use 0 if you really have no idea).
-  static PHINode *Create(Type *Ty, unsigned NumReservedValues,
-                         const Twine &NameStr = "",
-                         Instruction *InsertBefore = 0) {
-    return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore);
-  }
-  static PHINode *Create(Type *Ty, unsigned NumReservedValues, 
-                         const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd);
-  }
-  ~PHINode();
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // Block iterator interface. This provides access to the list of incoming
-  // basic blocks, which parallels the list of incoming values.
-
-  typedef BasicBlock **block_iterator;
-  typedef BasicBlock * const *const_block_iterator;
-
-  block_iterator block_begin() {
-    Use::UserRef *ref =
-      reinterpret_cast<Use::UserRef*>(op_begin() + ReservedSpace);
-    return reinterpret_cast<block_iterator>(ref + 1);
-  }
-
-  const_block_iterator block_begin() const {
-    const Use::UserRef *ref =
-      reinterpret_cast<const Use::UserRef*>(op_begin() + ReservedSpace);
-    return reinterpret_cast<const_block_iterator>(ref + 1);
-  }
-
-  block_iterator block_end() {
-    return block_begin() + getNumOperands();
-  }
-
-  const_block_iterator block_end() const {
-    return block_begin() + getNumOperands();
-  }
-
-  /// getNumIncomingValues - Return the number of incoming edges
-  ///
-  unsigned getNumIncomingValues() const { return getNumOperands(); }
-
-  /// getIncomingValue - Return incoming value number x
-  ///
-  Value *getIncomingValue(unsigned i) const {
-    return getOperand(i);
-  }
-  void setIncomingValue(unsigned i, Value *V) {
-    setOperand(i, V);
-  }
-  static unsigned getOperandNumForIncomingValue(unsigned i) {
-    return i;
-  }
-  static unsigned getIncomingValueNumForOperand(unsigned i) {
-    return i;
-  }
-
-  /// getIncomingBlock - Return incoming basic block number @p i.
-  ///
-  BasicBlock *getIncomingBlock(unsigned i) const {
-    return block_begin()[i];
-  }
-
-  /// getIncomingBlock - Return incoming basic block corresponding
-  /// to an operand of the PHI.
-  ///
-  BasicBlock *getIncomingBlock(const Use &U) const {
-    assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
-    return getIncomingBlock(unsigned(&U - op_begin()));
-  }
-
-  /// getIncomingBlock - Return incoming basic block corresponding
-  /// to value use iterator.
-  ///
-  template <typename U>
-  BasicBlock *getIncomingBlock(value_use_iterator<U> I) const {
-    return getIncomingBlock(I.getUse());
-  }
-
-  void setIncomingBlock(unsigned i, BasicBlock *BB) {
-    block_begin()[i] = BB;
-  }
-
-  /// addIncoming - Add an incoming value to the end of the PHI list
-  ///
-  void addIncoming(Value *V, BasicBlock *BB) {
-    assert(V && "PHI node got a null value!");
-    assert(BB && "PHI node got a null basic block!");
-    assert(getType() == V->getType() &&
-           "All operands to PHI node must be the same type as the PHI node!");
-    if (NumOperands == ReservedSpace)
-      growOperands();  // Get more space!
-    // Initialize some new operands.
-    ++NumOperands;
-    setIncomingValue(NumOperands - 1, V);
-    setIncomingBlock(NumOperands - 1, BB);
-  }
-
-  /// removeIncomingValue - Remove an incoming value.  This is useful if a
-  /// predecessor basic block is deleted.  The value removed is returned.
-  ///
-  /// If the last incoming value for a PHI node is removed (and DeletePHIIfEmpty
-  /// is true), the PHI node is destroyed and any uses of it are replaced with
-  /// dummy values.  The only time there should be zero incoming values to a PHI
-  /// node is when the block is dead, so this strategy is sound.
-  ///
-  Value *removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty = true);
-
-  Value *removeIncomingValue(const BasicBlock *BB, bool DeletePHIIfEmpty=true) {
-    int Idx = getBasicBlockIndex(BB);
-    assert(Idx >= 0 && "Invalid basic block argument to remove!");
-    return removeIncomingValue(Idx, DeletePHIIfEmpty);
-  }
-
-  /// getBasicBlockIndex - Return the first index of the specified basic
-  /// block in the value list for this PHI.  Returns -1 if no instance.
-  ///
-  int getBasicBlockIndex(const BasicBlock *BB) const {
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-      if (block_begin()[i] == BB)
-        return i;
-    return -1;
-  }
-
-  Value *getIncomingValueForBlock(const BasicBlock *BB) const {
-    int Idx = getBasicBlockIndex(BB);
-    assert(Idx >= 0 && "Invalid basic block argument!");
-    return getIncomingValue(Idx);
-  }
-
-  /// hasConstantValue - If the specified PHI node always merges together the
-  /// same value, return the value, otherwise return null.
-  Value *hasConstantValue() const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::PHI;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
- private:
-  void growOperands();
-};
-
-template <>
-struct OperandTraits<PHINode> : public HungoffOperandTraits<2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)
-
-//===----------------------------------------------------------------------===//
-//                           LandingPadInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// LandingPadInst - The landingpad instruction holds all of the information
-/// necessary to generate correct exception handling. The landingpad instruction
-/// cannot be moved from the top of a landing pad block, which itself is
-/// accessible only from the 'unwind' edge of an invoke. This uses the
-/// SubclassData field in Value to store whether or not the landingpad is a
-/// cleanup.
-///
-class LandingPadInst : public Instruction {
-  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
-  /// the number actually in use.
-  unsigned ReservedSpace;
-  LandingPadInst(const LandingPadInst &LP);
-public:
-  enum ClauseType { Catch, Filter };
-private:
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  // Allocate space for exactly zero operands.
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-  void growOperands(unsigned Size);
-  void init(Value *PersFn, unsigned NumReservedValues, const Twine &NameStr);
-
-  explicit LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                          unsigned NumReservedValues, const Twine &NameStr,
-                          Instruction *InsertBefore);
-  explicit LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                          unsigned NumReservedValues, const Twine &NameStr,
-                          BasicBlock *InsertAtEnd);
-protected:
-  virtual LandingPadInst *clone_impl() const;
-public:
-  /// Constructors - NumReservedClauses is a hint for the number of incoming
-  /// clauses that this landingpad will have (use 0 if you really have no idea).
-  static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
-                                unsigned NumReservedClauses,
-                                const Twine &NameStr = "",
-                                Instruction *InsertBefore = 0);
-  static LandingPadInst *Create(Type *RetTy, Value *PersonalityFn,
-                                unsigned NumReservedClauses,
-                                const Twine &NameStr, BasicBlock *InsertAtEnd);
-  ~LandingPadInst();
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// getPersonalityFn - Get the personality function associated with this
-  /// landing pad.
-  Value *getPersonalityFn() const { return getOperand(0); }
-
-  /// isCleanup - Return 'true' if this landingpad instruction is a
-  /// cleanup. I.e., it should be run when unwinding even if its landing pad
-  /// doesn't catch the exception.
-  bool isCleanup() const { return getSubclassDataFromInstruction() & 1; }
-
-  /// setCleanup - Indicate that this landingpad instruction is a cleanup.
-  void setCleanup(bool V) {
-    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
-                               (V ? 1 : 0));
-  }
-
-  /// addClause - Add a catch or filter clause to the landing pad.
-  void addClause(Value *ClauseVal);
-
-  /// getClause - Get the value of the clause at index Idx. Use isCatch/isFilter
-  /// to determine what type of clause this is.
-  Value *getClause(unsigned Idx) const { return OperandList[Idx + 1]; }
-
-  /// isCatch - Return 'true' if the clause and index Idx is a catch clause.
-  bool isCatch(unsigned Idx) const {
-    return !isa<ArrayType>(OperandList[Idx + 1]->getType());
-  }
-
-  /// isFilter - Return 'true' if the clause and index Idx is a filter clause.
-  bool isFilter(unsigned Idx) const {
-    return isa<ArrayType>(OperandList[Idx + 1]->getType());
-  }
-
-  /// getNumClauses - Get the number of clauses for this landing pad.
-  unsigned getNumClauses() const { return getNumOperands() - 1; }
-
-  /// reserveClauses - Grow the size of the operand list to accommodate the new
-  /// number of clauses.
-  void reserveClauses(unsigned Size) { growOperands(Size); }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::LandingPad;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-template <>
-struct OperandTraits<LandingPadInst> : public HungoffOperandTraits<2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                               ReturnInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// ReturnInst - Return a value (possibly void), from a function.  Execution
-/// does not continue in this function any longer.
-///
-class ReturnInst : public TerminatorInst {
-  ReturnInst(const ReturnInst &RI);
-
-private:
-  // ReturnInst constructors:
-  // ReturnInst()                  - 'ret void' instruction
-  // ReturnInst(    null)          - 'ret void' instruction
-  // ReturnInst(Value* X)          - 'ret X'    instruction
-  // ReturnInst(    null, Inst *I) - 'ret void' instruction, insert before I
-  // ReturnInst(Value* X, Inst *I) - 'ret X'    instruction, insert before I
-  // ReturnInst(    null, BB *B)   - 'ret void' instruction, insert @ end of B
-  // ReturnInst(Value* X, BB *B)   - 'ret X'    instruction, insert @ end of B
-  //
-  // NOTE: If the Value* passed is of type void then the constructor behaves as
-  // if it was passed NULL.
-  explicit ReturnInst(LLVMContext &C, Value *retVal = 0,
-                      Instruction *InsertBefore = 0);
-  ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
-  explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
-protected:
-  virtual ReturnInst *clone_impl() const;
-public:
-  static ReturnInst* Create(LLVMContext &C, Value *retVal = 0,
-                            Instruction *InsertBefore = 0) {
-    return new(!!retVal) ReturnInst(C, retVal, InsertBefore);
-  }
-  static ReturnInst* Create(LLVMContext &C, Value *retVal,
-                            BasicBlock *InsertAtEnd) {
-    return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd);
-  }
-  static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) {
-    return new(0) ReturnInst(C, InsertAtEnd);
-  }
-  virtual ~ReturnInst();
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// Convenience accessor. Returns null if there is no return value.
-  Value *getReturnValue() const {
-    return getNumOperands() != 0 ? getOperand(0) : 0;
-  }
-
-  unsigned getNumSuccessors() const { return 0; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return (I->getOpcode() == Instruction::Ret);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
- private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-template <>
-struct OperandTraits<ReturnInst> : public VariadicOperandTraits<ReturnInst> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                               BranchInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// BranchInst - Conditional or Unconditional Branch instruction.
-///
-class BranchInst : public TerminatorInst {
-  /// Ops list - Branches are strange.  The operands are ordered:
-  ///  [Cond, FalseDest,] TrueDest.  This makes some accessors faster because
-  /// they don't have to check for cond/uncond branchness. These are mostly
-  /// accessed relative from op_end().
-  BranchInst(const BranchInst &BI);
-  void AssertOK();
-  // BranchInst constructors (where {B, T, F} are blocks, and C is a condition):
-  // BranchInst(BB *B)                           - 'br B'
-  // BranchInst(BB* T, BB *F, Value *C)          - 'br C, T, F'
-  // BranchInst(BB* B, Inst *I)                  - 'br B'        insert before I
-  // BranchInst(BB* T, BB *F, Value *C, Inst *I) - 'br C, T, F', insert before I
-  // BranchInst(BB* B, BB *I)                    - 'br B'        insert at end
-  // BranchInst(BB* T, BB *F, Value *C, BB *I)   - 'br C, T, F', insert at end
-  explicit BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore = 0);
-  BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
-             Instruction *InsertBefore = 0);
-  BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd);
-  BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
-             BasicBlock *InsertAtEnd);
-protected:
-  virtual BranchInst *clone_impl() const;
-public:
-  static BranchInst *Create(BasicBlock *IfTrue, Instruction *InsertBefore = 0) {
-    return new(1) BranchInst(IfTrue, InsertBefore);
-  }
-  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
-                            Value *Cond, Instruction *InsertBefore = 0) {
-    return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore);
-  }
-  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) {
-    return new(1) BranchInst(IfTrue, InsertAtEnd);
-  }
-  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
-                            Value *Cond, BasicBlock *InsertAtEnd) {
-    return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertAtEnd);
-  }
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  bool isUnconditional() const { return getNumOperands() == 1; }
-  bool isConditional()   const { return getNumOperands() == 3; }
-
-  Value *getCondition() const {
-    assert(isConditional() && "Cannot get condition of an uncond branch!");
-    return Op<-3>();
-  }
-
-  void setCondition(Value *V) {
-    assert(isConditional() && "Cannot set condition of unconditional branch!");
-    Op<-3>() = V;
-  }
-
-  unsigned getNumSuccessors() const { return 1+isConditional(); }
-
-  BasicBlock *getSuccessor(unsigned i) const {
-    assert(i < getNumSuccessors() && "Successor # out of range for Branch!");
-    return cast_or_null<BasicBlock>((&Op<-1>() - i)->get());
-  }
-
-  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
-    assert(idx < getNumSuccessors() && "Successor # out of range for Branch!");
-    *(&Op<-1>() - idx) = (Value*)NewSucc;
-  }
-
-  /// \brief Swap the successors of this branch instruction.
-  ///
-  /// Swaps the successors of the branch instruction. This also swaps any
-  /// branch weight metadata associated with the instruction so that it
-  /// continues to map correctly to each operand.
-  void swapSuccessors();
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return (I->getOpcode() == Instruction::Br);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-template <>
-struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                               SwitchInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// SwitchInst - Multiway switch
-///
-class SwitchInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  unsigned ReservedSpace;
-  // Operands format:
-  // Operand[0]    = Value to switch on
-  // Operand[1]    = Default basic block destination
-  // Operand[2n  ] = Value to match
-  // Operand[2n+1] = BasicBlock to go to on match
-  
-  // Store case values separately from operands list. We needn't User-Use
-  // concept here, since it is just a case value, it will always constant,
-  // and case value couldn't reused with another instructions/values.
-  // Additionally:
-  // It allows us to use custom type for case values that is not inherited
-  // from Value. Since case value is a complex type that implements
-  // the subset of integers, we needn't extract sub-constants within
-  // slow getAggregateElement method.
-  // For case values we will use std::list to by two reasons:
-  // 1. It allows to add/remove cases without whole collection reallocation.
-  // 2. In most of cases we needn't random access.
-  // Currently case values are also stored in Operands List, but it will moved
-  // out in future commits.
-  typedef std::list<IntegersSubset> Subsets;
-  typedef Subsets::iterator SubsetsIt;
-  typedef Subsets::const_iterator SubsetsConstIt;
-  
-  Subsets TheSubsets;
-  
-  SwitchInst(const SwitchInst &SI);
-  void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
-  void growOperands();
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-  /// SwitchInst ctor - Create a new switch instruction, specifying a value to
-  /// switch on and a default destination.  The number of additional cases can
-  /// be specified here to make memory allocation more efficient.  This
-  /// constructor can also autoinsert before another instruction.
-  SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
-             Instruction *InsertBefore);
-
-  /// SwitchInst ctor - Create a new switch instruction, specifying a value to
-  /// switch on and a default destination.  The number of additional cases can
-  /// be specified here to make memory allocation more efficient.  This
-  /// constructor also autoinserts at the end of the specified BasicBlock.
-  SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
-             BasicBlock *InsertAtEnd);
-protected:
-  virtual SwitchInst *clone_impl() const;
-public:
-  
-  // FIXME: Currently there are a lot of unclean template parameters,
-  // we need to make refactoring in future.
-  // All these parameters are used to implement both iterator and const_iterator
-  // without code duplication.
-  // SwitchInstTy may be "const SwitchInst" or "SwitchInst"
-  // ConstantIntTy may be "const ConstantInt" or "ConstantInt"
-  // SubsetsItTy may be SubsetsConstIt or SubsetsIt
-  // BasicBlockTy may be "const BasicBlock" or "BasicBlock"
-  template <class SwitchInstTy, class ConstantIntTy,
-            class SubsetsItTy, class BasicBlockTy> 
-    class CaseIteratorT;
-
-  typedef CaseIteratorT<const SwitchInst, const ConstantInt,
-                        SubsetsConstIt, const BasicBlock> ConstCaseIt;
-  class CaseIt;
-  
-  // -2
-  static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
-  
-  static SwitchInst *Create(Value *Value, BasicBlock *Default,
-                            unsigned NumCases, Instruction *InsertBefore = 0) {
-    return new SwitchInst(Value, Default, NumCases, InsertBefore);
-  }
-  static SwitchInst *Create(Value *Value, BasicBlock *Default,
-                            unsigned NumCases, BasicBlock *InsertAtEnd) {
-    return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
-  }
-  
-  ~SwitchInst();
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // Accessor Methods for Switch stmt
-  Value *getCondition() const { return getOperand(0); }
-  void setCondition(Value *V) { setOperand(0, V); }
-
-  BasicBlock *getDefaultDest() const {
-    return cast<BasicBlock>(getOperand(1));
-  }
-
-  void setDefaultDest(BasicBlock *DefaultCase) {
-    setOperand(1, reinterpret_cast<Value*>(DefaultCase));
-  }
-
-  /// getNumCases - return the number of 'cases' in this switch instruction,
-  /// except the default case
-  unsigned getNumCases() const {
-    return getNumOperands()/2 - 1;
-  }
-
-  /// Returns a read/write iterator that points to the first
-  /// case in SwitchInst.
-  CaseIt case_begin() {
-    return CaseIt(this, 0, TheSubsets.begin());
-  }
-  /// Returns a read-only iterator that points to the first
-  /// case in the SwitchInst.
-  ConstCaseIt case_begin() const {
-    return ConstCaseIt(this, 0, TheSubsets.begin());
-  }
-  
-  /// Returns a read/write iterator that points one past the last
-  /// in the SwitchInst.
-  CaseIt case_end() {
-    return CaseIt(this, getNumCases(), TheSubsets.end());
-  }
-  /// Returns a read-only iterator that points one past the last
-  /// in the SwitchInst.
-  ConstCaseIt case_end() const {
-    return ConstCaseIt(this, getNumCases(), TheSubsets.end());
-  }
-  /// Returns an iterator that points to the default case.
-  /// Note: this iterator allows to resolve successor only. Attempt
-  /// to resolve case value causes an assertion.
-  /// Also note, that increment and decrement also causes an assertion and
-  /// makes iterator invalid. 
-  CaseIt case_default() {
-    return CaseIt(this, DefaultPseudoIndex, TheSubsets.end());
-  }
-  ConstCaseIt case_default() const {
-    return ConstCaseIt(this, DefaultPseudoIndex, TheSubsets.end());
-  }
-  
-  /// findCaseValue - Search all of the case values for the specified constant.
-  /// If it is explicitly handled, return the case iterator of it, otherwise
-  /// return default case iterator to indicate
-  /// that it is handled by the default handler.
-  CaseIt findCaseValue(const ConstantInt *C) {
-    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i)
-      if (i.getCaseValueEx().isSatisfies(IntItem::fromConstantInt(C)))
-        return i;
-    return case_default();
-  }
-  ConstCaseIt findCaseValue(const ConstantInt *C) const {
-    for (ConstCaseIt i = case_begin(), e = case_end(); i != e; ++i)
-      if (i.getCaseValueEx().isSatisfies(IntItem::fromConstantInt(C)))
-        return i;
-    return case_default();
-  }    
-  
-  /// findCaseDest - Finds the unique case value for a given successor. Returns
-  /// null if the successor is not found, not unique, or is the default case.
-  ConstantInt *findCaseDest(BasicBlock *BB) {
-    if (BB == getDefaultDest()) return NULL;
-
-    ConstantInt *CI = NULL;
-    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i) {
-      if (i.getCaseSuccessor() == BB) {
-        if (CI) return NULL;   // Multiple cases lead to BB.
-        else CI = i.getCaseValue();
-      }
-    }
-    return CI;
-  }
-
-  /// addCase - Add an entry to the switch instruction...
-  /// @deprecated
-  /// Note:
-  /// This action invalidates case_end(). Old case_end() iterator will
-  /// point to the added case.
-  void addCase(ConstantInt *OnVal, BasicBlock *Dest);
-  
-  /// addCase - Add an entry to the switch instruction.
-  /// Note:
-  /// This action invalidates case_end(). Old case_end() iterator will
-  /// point to the added case.
-  void addCase(IntegersSubset& OnVal, BasicBlock *Dest);
-
-  /// removeCase - This method removes the specified case and its successor
-  /// from the switch instruction. Note that this operation may reorder the
-  /// remaining cases at index idx and above.
-  /// Note:
-  /// This action invalidates iterators for all cases following the one removed,
-  /// including the case_end() iterator.
-  void removeCase(CaseIt& i);
-
-  unsigned getNumSuccessors() const { return getNumOperands()/2; }
-  BasicBlock *getSuccessor(unsigned idx) const {
-    assert(idx < getNumSuccessors() &&"Successor idx out of range for switch!");
-    return cast<BasicBlock>(getOperand(idx*2+1));
-  }
-  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
-    assert(idx < getNumSuccessors() && "Successor # out of range for switch!");
-    setOperand(idx*2+1, (Value*)NewSucc);
-  }
-  
-  uint16_t hash() const {
-    uint32_t NumberOfCases = (uint32_t)getNumCases();
-    uint16_t Hash = (0xFFFF & NumberOfCases) ^ (NumberOfCases >> 16);
-    for (ConstCaseIt i = case_begin(), e = case_end();
-         i != e; ++i) {
-      uint32_t NumItems = (uint32_t)i.getCaseValueEx().getNumItems(); 
-      Hash = (Hash << 1) ^ (0xFFFF & NumItems) ^ (NumItems >> 16);
-    }
-    return Hash;
-  }  
-  
-  // Case iterators definition.
-
-  template <class SwitchInstTy, class ConstantIntTy,
-            class SubsetsItTy, class BasicBlockTy> 
-  class CaseIteratorT {
-  protected:
-    
-    SwitchInstTy *SI;
-    unsigned long Index;
-    SubsetsItTy SubsetIt;
-    
-    /// Initializes case iterator for given SwitchInst and for given
-    /// case number.    
-    friend class SwitchInst;
-    CaseIteratorT(SwitchInstTy *SI, unsigned SuccessorIndex,
-                  SubsetsItTy CaseValueIt) {
-      this->SI = SI;
-      Index = SuccessorIndex;
-      this->SubsetIt = CaseValueIt;
-    }
-    
-  public:
-    typedef typename SubsetsItTy::reference IntegersSubsetRef;
-    typedef CaseIteratorT<SwitchInstTy, ConstantIntTy,
-                          SubsetsItTy, BasicBlockTy> Self;
-    
-    CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) {
-          this->SI = SI;
-          Index = CaseNum;
-          SubsetIt = SI->TheSubsets.begin();
-          std::advance(SubsetIt, CaseNum);
-        }
-        
-    
-    /// Initializes case iterator for given SwitchInst and for given
-    /// TerminatorInst's successor index.
-    static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) {
-      assert(SuccessorIndex < SI->getNumSuccessors() &&
-             "Successor index # out of range!");    
-      return SuccessorIndex != 0 ? 
-             Self(SI, SuccessorIndex - 1) :
-             Self(SI, DefaultPseudoIndex);       
-    }
-    
-    /// Resolves case value for current case.
-    /// @deprecated
-    ConstantIntTy *getCaseValue() {
-      assert(Index < SI->getNumCases() && "Index out the number of cases.");
-      IntegersSubsetRef CaseRanges = *SubsetIt;
-      
-      // FIXME: Currently we work with ConstantInt based cases.
-      // So return CaseValue as ConstantInt.
-      return CaseRanges.getSingleNumber(0).toConstantInt();
-    }
-
-    /// Resolves case value for current case.
-    IntegersSubsetRef getCaseValueEx() {
-      assert(Index < SI->getNumCases() && "Index out the number of cases.");
-      return *SubsetIt;
-    }
-    
-    /// Resolves successor for current case.
-    BasicBlockTy *getCaseSuccessor() {
-      assert((Index < SI->getNumCases() ||
-              Index == DefaultPseudoIndex) &&
-             "Index out the number of cases.");
-      return SI->getSuccessor(getSuccessorIndex());      
-    }
-    
-    /// Returns number of current case.
-    unsigned getCaseIndex() const { return Index; }
-    
-    /// Returns TerminatorInst's successor index for current case successor.
-    unsigned getSuccessorIndex() const {
-      assert((Index == DefaultPseudoIndex || Index < SI->getNumCases()) &&
-             "Index out the number of cases.");
-      return Index != DefaultPseudoIndex ? Index + 1 : 0;
-    }
-    
-    Self operator++() {
-      // Check index correctness after increment.
-      // Note: Index == getNumCases() means end().
-      assert(Index+1 <= SI->getNumCases() && "Index out the number of cases.");
-      ++Index;
-      if (Index == 0)
-        SubsetIt = SI->TheSubsets.begin();
-      else
-        ++SubsetIt;
-      return *this;
-    }
-    Self operator++(int) {
-      Self tmp = *this;
-      ++(*this);
-      return tmp;
-    }
-    Self operator--() { 
-      // Check index correctness after decrement.
-      // Note: Index == getNumCases() means end().
-      // Also allow "-1" iterator here. That will became valid after ++.
-      unsigned NumCases = SI->getNumCases();
-      assert((Index == 0 || Index-1 <= NumCases) &&
-             "Index out the number of cases.");
-      --Index;
-      if (Index == NumCases) {
-        SubsetIt = SI->TheSubsets.end();
-        return *this;
-      }
-        
-      if (Index != -1UL)
-        --SubsetIt;
-      
-      return *this;
-    }
-    Self operator--(int) {
-      Self tmp = *this;
-      --(*this);
-      return tmp;
-    }
-    bool operator==(const Self& RHS) const {
-      assert(RHS.SI == SI && "Incompatible operators.");
-      return RHS.Index == Index;
-    }
-    bool operator!=(const Self& RHS) const {
-      assert(RHS.SI == SI && "Incompatible operators.");
-      return RHS.Index != Index;
-    }
-  };
-
-  class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt,
-                                      SubsetsIt, BasicBlock> {
-    typedef CaseIteratorT<SwitchInst, ConstantInt, SubsetsIt, BasicBlock>
-      ParentTy;
-    
-  protected:
-    friend class SwitchInst;
-    CaseIt(SwitchInst *SI, unsigned CaseNum, SubsetsIt SubsetIt) :
-      ParentTy(SI, CaseNum, SubsetIt) {}
-    
-    void updateCaseValueOperand(IntegersSubset& V) {
-      SI->setOperand(2 + Index*2, reinterpret_cast<Value*>((Constant*)V));      
-    }
-  
-  public:
-
-    CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {}    
-    
-    CaseIt(const ParentTy& Src) : ParentTy(Src) {}
-
-    /// Sets the new value for current case.    
-    /// @deprecated.
-    void setValue(ConstantInt *V) {
-      assert(Index < SI->getNumCases() && "Index out the number of cases.");
-      IntegersSubsetToBB Mapping;
-      // FIXME: Currently we work with ConstantInt based cases.
-      // So inititalize IntItem container directly from ConstantInt.
-      Mapping.add(IntItem::fromConstantInt(V));
-      *SubsetIt = Mapping.getCase();
-      updateCaseValueOperand(*SubsetIt);
-    }
-    
-    /// Sets the new value for current case.
-    void setValueEx(IntegersSubset& V) {
-      assert(Index < SI->getNumCases() && "Index out the number of cases.");
-      *SubsetIt = V;
-      updateCaseValueOperand(*SubsetIt);   
-    }
-    
-    /// Sets the new successor for current case.
-    void setSuccessor(BasicBlock *S) {
-      SI->setSuccessor(getSuccessorIndex(), S);      
-    }
-  };
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Switch;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-template <>
-struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
-
-
-//===----------------------------------------------------------------------===//
-//                             IndirectBrInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// IndirectBrInst - Indirect Branch Instruction.
-///
-class IndirectBrInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  unsigned ReservedSpace;
-  // Operand[0]    = Value to switch on
-  // Operand[1]    = Default basic block destination
-  // Operand[2n  ] = Value to match
-  // Operand[2n+1] = BasicBlock to go to on match
-  IndirectBrInst(const IndirectBrInst &IBI);
-  void init(Value *Address, unsigned NumDests);
-  void growOperands();
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
-  /// Address to jump to.  The number of expected destinations can be specified
-  /// here to make memory allocation more efficient.  This constructor can also
-  /// autoinsert before another instruction.
-  IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore);
-
-  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
-  /// Address to jump to.  The number of expected destinations can be specified
-  /// here to make memory allocation more efficient.  This constructor also
-  /// autoinserts at the end of the specified BasicBlock.
-  IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
-protected:
-  virtual IndirectBrInst *clone_impl() const;
-public:
-  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
-                                Instruction *InsertBefore = 0) {
-    return new IndirectBrInst(Address, NumDests, InsertBefore);
-  }
-  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
-                                BasicBlock *InsertAtEnd) {
-    return new IndirectBrInst(Address, NumDests, InsertAtEnd);
-  }
-  ~IndirectBrInst();
-
-  /// Provide fast operand accessors.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  // Accessor Methods for IndirectBrInst instruction.
-  Value *getAddress() { return getOperand(0); }
-  const Value *getAddress() const { return getOperand(0); }
-  void setAddress(Value *V) { setOperand(0, V); }
-
-
-  /// getNumDestinations - return the number of possible destinations in this
-  /// indirectbr instruction.
-  unsigned getNumDestinations() const { return getNumOperands()-1; }
-
-  /// getDestination - Return the specified destination.
-  BasicBlock *getDestination(unsigned i) { return getSuccessor(i); }
-  const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); }
-
-  /// addDestination - Add a destination.
-  ///
-  void addDestination(BasicBlock *Dest);
-
-  /// removeDestination - This method removes the specified successor from the
-  /// indirectbr instruction.
-  void removeDestination(unsigned i);
-
-  unsigned getNumSuccessors() const { return getNumOperands()-1; }
-  BasicBlock *getSuccessor(unsigned i) const {
-    return cast<BasicBlock>(getOperand(i+1));
-  }
-  void setSuccessor(unsigned i, BasicBlock *NewSucc) {
-    setOperand(i+1, (Value*)NewSucc);
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::IndirectBr;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-template <>
-struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
-
-
-//===----------------------------------------------------------------------===//
-//                               InvokeInst Class
-//===----------------------------------------------------------------------===//
-
-/// InvokeInst - Invoke instruction.  The SubclassData field is used to hold the
-/// calling convention of the call.
-///
-class InvokeInst : public TerminatorInst {
-  AttrListPtr AttributeList;
-  InvokeInst(const InvokeInst &BI);
-  void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-            ArrayRef<Value *> Args, const Twine &NameStr);
-
-  /// Construct an InvokeInst given a range of arguments.
-  ///
-  /// \brief Construct an InvokeInst from a range of arguments
-  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    ArrayRef<Value *> Args, unsigned Values,
-                    const Twine &NameStr, Instruction *InsertBefore);
-
-  /// Construct an InvokeInst given a range of arguments.
-  ///
-  /// \brief Construct an InvokeInst from a range of arguments
-  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    ArrayRef<Value *> Args, unsigned Values,
-                    const Twine &NameStr, BasicBlock *InsertAtEnd);
-protected:
-  virtual InvokeInst *clone_impl() const;
-public:
-  static InvokeInst *Create(Value *Func,
-                            BasicBlock *IfNormal, BasicBlock *IfException,
-                            ArrayRef<Value *> Args, const Twine &NameStr = "",
-                            Instruction *InsertBefore = 0) {
-    unsigned Values = unsigned(Args.size()) + 3;
-    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
-                                  Values, NameStr, InsertBefore);
-  }
-  static InvokeInst *Create(Value *Func,
-                            BasicBlock *IfNormal, BasicBlock *IfException,
-                            ArrayRef<Value *> Args, const Twine &NameStr,
-                            BasicBlock *InsertAtEnd) {
-    unsigned Values = unsigned(Args.size()) + 3;
-    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
-                                  Values, NameStr, InsertAtEnd);
-  }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// getNumArgOperands - Return the number of invoke arguments.
-  ///
-  unsigned getNumArgOperands() const { return getNumOperands() - 3; }
-
-  /// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
-  ///
-  Value *getArgOperand(unsigned i) const { return getOperand(i); }
-  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
-
-  /// getCallingConv/setCallingConv - Get or set the calling convention of this
-  /// function call.
-  CallingConv::ID getCallingConv() const {
-    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction());
-  }
-  void setCallingConv(CallingConv::ID CC) {
-    setInstructionSubclassData(static_cast<unsigned>(CC));
-  }
-
-  /// getAttributes - Return the parameter attributes for this invoke.
-  ///
-  const AttrListPtr &getAttributes() const { return AttributeList; }
-
-  /// setAttributes - Set the parameter attributes for this invoke.
-  ///
-  void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; }
-
-  /// addAttribute - adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, Attributes attr);
-
-  /// removeAttribute - removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attributes attr);
-
-  /// \brief Determine whether this call has the NoAlias attribute.
-  bool hasFnAttr(Attributes::AttrVal A) const;
-
-  /// \brief Determine whether the call or the callee has the given attributes.
-  bool paramHasAttr(unsigned i, Attributes::AttrVal A) const;
-
-  /// \brief Extract the alignment for a call or parameter (0=unknown).
-  unsigned getParamAlignment(unsigned i) const {
-    return AttributeList.getParamAlignment(i);
-  }
-
-  /// \brief Return true if the call should not be inlined.
-  bool isNoInline() const { return hasFnAttr(Attributes::NoInline); }
-  void setIsNoInline() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::NoInline));
-  }
-
-  /// \brief Determine if the call does not access memory.
-  bool doesNotAccessMemory() const {
-    return hasFnAttr(Attributes::ReadNone);
-  }
-  void setDoesNotAccessMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::ReadNone));
-  }
-
-  /// \brief Determine if the call does not access or only reads memory.
-  bool onlyReadsMemory() const {
-    return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly);
-  }
-  void setOnlyReadsMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::ReadOnly));
-  }
-
-  /// \brief Determine if the call cannot return.
-  bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); }
-  void setDoesNotReturn() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::NoReturn));
-  }
-
-  /// \brief Determine if the call cannot unwind.
-  bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); }
-  void setDoesNotThrow() {
-    addAttribute(AttrListPtr::FunctionIndex,
-                 Attributes::get(getContext(), Attributes::NoUnwind));
-  }
-
-  /// \brief Determine if the call returns a structure through first
-  /// pointer argument.
-  bool hasStructRetAttr() const {
-    // Be friendly and also check the callee.
-    return paramHasAttr(1, Attributes::StructRet);
-  }
-
-  /// \brief Determine if any call argument is an aggregate passed by value.
-  bool hasByValArgument() const {
-    for (unsigned I = 0, E = AttributeList.getNumAttrs(); I != E; ++I)
-      if (AttributeList.getAttributesAtIndex(I).hasAttribute(Attributes::ByVal))
-        return true;
-    return false;
-  }
-
-  /// getCalledFunction - Return the function called, or null if this is an
-  /// indirect function invocation.
-  ///
-  Function *getCalledFunction() const {
-    return dyn_cast<Function>(Op<-3>());
-  }
-
-  /// getCalledValue - Get a pointer to the function that is invoked by this
-  /// instruction
-  const Value *getCalledValue() const { return Op<-3>(); }
-        Value *getCalledValue()       { return Op<-3>(); }
-
-  /// setCalledFunction - Set the function called.
-  void setCalledFunction(Value* Fn) {
-    Op<-3>() = Fn;
-  }
-
-  // get*Dest - Return the destination basic blocks...
-  BasicBlock *getNormalDest() const {
-    return cast<BasicBlock>(Op<-2>());
-  }
-  BasicBlock *getUnwindDest() const {
-    return cast<BasicBlock>(Op<-1>());
-  }
-  void setNormalDest(BasicBlock *B) {
-    Op<-2>() = reinterpret_cast<Value*>(B);
-  }
-  void setUnwindDest(BasicBlock *B) {
-    Op<-1>() = reinterpret_cast<Value*>(B);
-  }
-
-  /// getLandingPadInst - Get the landingpad instruction from the landing pad
-  /// block (the unwind destination).
-  LandingPadInst *getLandingPadInst() const;
-
-  BasicBlock *getSuccessor(unsigned i) const {
-    assert(i < 2 && "Successor # out of range for invoke!");
-    return i == 0 ? getNormalDest() : getUnwindDest();
-  }
-
-  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
-    assert(idx < 2 && "Successor # out of range for invoke!");
-    *(&Op<-2>() + idx) = reinterpret_cast<Value*>(NewSucc);
-  }
-
-  unsigned getNumSuccessors() const { return 2; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return (I->getOpcode() == Instruction::Invoke);
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-
-  // Shadow Instruction::setInstructionSubclassData with a private forwarding
-  // method so that subclasses cannot accidentally use it.
-  void setInstructionSubclassData(unsigned short D) {
-    Instruction::setInstructionSubclassData(D);
-  }
-};
-
-template <>
-struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
-};
-
-InvokeInst::InvokeInst(Value *Func,
-                       BasicBlock *IfNormal, BasicBlock *IfException,
-                       ArrayRef<Value *> Args, unsigned Values,
-                       const Twine &NameStr, Instruction *InsertBefore)
-  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                      ->getElementType())->getReturnType(),
-                   Instruction::Invoke,
-                   OperandTraits<InvokeInst>::op_end(this) - Values,
-                   Values, InsertBefore) {
-  init(Func, IfNormal, IfException, Args, NameStr);
-}
-InvokeInst::InvokeInst(Value *Func,
-                       BasicBlock *IfNormal, BasicBlock *IfException,
-                       ArrayRef<Value *> Args, unsigned Values,
-                       const Twine &NameStr, BasicBlock *InsertAtEnd)
-  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                      ->getElementType())->getReturnType(),
-                   Instruction::Invoke,
-                   OperandTraits<InvokeInst>::op_end(this) - Values,
-                   Values, InsertAtEnd) {
-  init(Func, IfNormal, IfException, Args, NameStr);
-}
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                              ResumeInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// ResumeInst - Resume the propagation of an exception.
-///
-class ResumeInst : public TerminatorInst {
-  ResumeInst(const ResumeInst &RI);
-
-  explicit ResumeInst(Value *Exn, Instruction *InsertBefore=0);
-  ResumeInst(Value *Exn, BasicBlock *InsertAtEnd);
-protected:
-  virtual ResumeInst *clone_impl() const;
-public:
-  static ResumeInst *Create(Value *Exn, Instruction *InsertBefore = 0) {
-    return new(1) ResumeInst(Exn, InsertBefore);
-  }
-  static ResumeInst *Create(Value *Exn, BasicBlock *InsertAtEnd) {
-    return new(1) ResumeInst(Exn, InsertAtEnd);
-  }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  /// Convenience accessor.
-  Value *getValue() const { return Op<0>(); }
-
-  unsigned getNumSuccessors() const { return 0; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Resume;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-template <>
-struct OperandTraits<ResumeInst> :
-    public FixedNumOperandTraits<ResumeInst, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
-
-//===----------------------------------------------------------------------===//
-//                           UnreachableInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// UnreachableInst - This function has undefined behavior.  In particular, the
-/// presence of this instruction indicates some higher level knowledge that the
-/// end of the block cannot be reached.
-///
-class UnreachableInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-protected:
-  virtual UnreachableInst *clone_impl() const;
-
-public:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-  explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = 0);
-  explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
-
-  unsigned getNumSuccessors() const { return 0; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Unreachable;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-//===----------------------------------------------------------------------===//
-//                                 TruncInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a truncation of integer types.
-class TruncInst : public CastInst {
-protected:
-  /// \brief Clone an identical TruncInst
-  virtual TruncInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  TruncInst(
-    Value *S,                     ///< The value to be truncated
-    Type *Ty,               ///< The (smaller) type to truncate to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  TruncInst(
-    Value *S,                     ///< The value to be truncated
-    Type *Ty,               ///< The (smaller) type to truncate to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Trunc;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 ZExtInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents zero extension of integer types.
-class ZExtInst : public CastInst {
-protected:
-  /// \brief Clone an identical ZExtInst
-  virtual ZExtInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  ZExtInst(
-    Value *S,                     ///< The value to be zero extended
-    Type *Ty,               ///< The type to zero extend to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end semantics.
-  ZExtInst(
-    Value *S,                     ///< The value to be zero extended
-    Type *Ty,               ///< The type to zero extend to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == ZExt;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 SExtInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a sign extension of integer types.
-class SExtInst : public CastInst {
-protected:
-  /// \brief Clone an identical SExtInst
-  virtual SExtInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  SExtInst(
-    Value *S,                     ///< The value to be sign extended
-    Type *Ty,               ///< The type to sign extend to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  SExtInst(
-    Value *S,                     ///< The value to be sign extended
-    Type *Ty,               ///< The type to sign extend to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == SExt;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 FPTruncInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a truncation of floating point types.
-class FPTruncInst : public CastInst {
-protected:
-  /// \brief Clone an identical FPTruncInst
-  virtual FPTruncInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  FPTruncInst(
-    Value *S,                     ///< The value to be truncated
-    Type *Ty,               ///< The type to truncate to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-before-instruction semantics
-  FPTruncInst(
-    Value *S,                     ///< The value to be truncated
-    Type *Ty,               ///< The type to truncate to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == FPTrunc;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 FPExtInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents an extension of floating point types.
-class FPExtInst : public CastInst {
-protected:
-  /// \brief Clone an identical FPExtInst
-  virtual FPExtInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  FPExtInst(
-    Value *S,                     ///< The value to be extended
-    Type *Ty,               ///< The type to extend to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  FPExtInst(
-    Value *S,                     ///< The value to be extended
-    Type *Ty,               ///< The type to extend to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == FPExt;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 UIToFPInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a cast unsigned integer to floating point.
-class UIToFPInst : public CastInst {
-protected:
-  /// \brief Clone an identical UIToFPInst
-  virtual UIToFPInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  UIToFPInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  UIToFPInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == UIToFP;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 SIToFPInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a cast from signed integer to floating point.
-class SIToFPInst : public CastInst {
-protected:
-  /// \brief Clone an identical SIToFPInst
-  virtual SIToFPInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  SIToFPInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  SIToFPInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == SIToFP;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 FPToUIInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a cast from floating point to unsigned integer
-class FPToUIInst  : public CastInst {
-protected:
-  /// \brief Clone an identical FPToUIInst
-  virtual FPToUIInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  FPToUIInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  FPToUIInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< Where to insert the new instruction
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == FPToUI;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 FPToSIInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a cast from floating point to signed integer.
-class FPToSIInst  : public CastInst {
-protected:
-  /// \brief Clone an identical FPToSIInst
-  virtual FPToSIInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  FPToSIInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  FPToSIInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == FPToSI;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 IntToPtrInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a cast from an integer to a pointer.
-class IntToPtrInst : public CastInst {
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  IntToPtrInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  IntToPtrInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Clone an identical IntToPtrInst
-  virtual IntToPtrInst *clone_impl() const;
-
-  /// \brief Returns the address space of this instruction's pointer type.
-  unsigned getAddressSpace() const {
-    return getType()->getPointerAddressSpace();
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == IntToPtr;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                                 PtrToIntInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a cast from a pointer to an integer
-class PtrToIntInst : public CastInst {
-protected:
-  /// \brief Clone an identical PtrToIntInst
-  virtual PtrToIntInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  PtrToIntInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  PtrToIntInst(
-    Value *S,                     ///< The value to be converted
-    Type *Ty,               ///< The type to convert to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  /// \brief Gets the pointer operand.
-  Value *getPointerOperand() { return getOperand(0); }
-  /// \brief Gets the pointer operand.
-  const Value *getPointerOperand() const { return getOperand(0); }
-  /// \brief Gets the operand index of the pointer operand.
-  static unsigned getPointerOperandIndex() { return 0U; }
-
-  /// \brief Returns the address space of the pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return getPointerOperand()->getType()->getPointerAddressSpace();
-  }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == PtrToInt;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                             BitCastInst Class
-//===----------------------------------------------------------------------===//
-
-/// \brief This class represents a no-op cast from one type to another.
-class BitCastInst : public CastInst {
-protected:
-  /// \brief Clone an identical BitCastInst
-  virtual BitCastInst *clone_impl() const;
-
-public:
-  /// \brief Constructor with insert-before-instruction semantics
-  BitCastInst(
-    Value *S,                     ///< The value to be casted
-    Type *Ty,               ///< The type to casted to
-    const Twine &NameStr = "",    ///< A name for the new instruction
-    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
-  );
-
-  /// \brief Constructor with insert-at-end-of-block semantics
-  BitCastInst(
-    Value *S,                     ///< The value to be casted
-    Type *Ty,               ///< The type to casted to
-    const Twine &NameStr,         ///< A name for the new instruction
-    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
-  );
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == BitCast;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
deleted file mode 100644
index 9b2afd56e05f..000000000000
--- a/include/llvm/IntrinsicInst.h
+++ /dev/null
@@ -1,316 +0,0 @@
-//===-- llvm/IntrinsicInst.h - Intrinsic Instruction Wrappers ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines classes that make it really easy to deal with intrinsic
-// functions with the isa/dyncast family of functions.  In particular, this
-// allows you to do things like:
-//
-//     if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(Inst))
-//        ... MCI->getDest() ... MCI->getSource() ...
-//
-// All intrinsic function calls are instances of the call instruction, so these
-// are all subclasses of the CallInst class.  Note that none of these classes
-// has state or virtual methods, which is an important part of this gross/neat
-// hack working.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INTRINSICINST_H
-#define LLVM_INTRINSICINST_H
-
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-
-namespace llvm {
-  /// IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic
-  /// functions.  This allows the standard isa/dyncast/cast functionality to
-  /// work with calls to intrinsic functions.
-  class IntrinsicInst : public CallInst {
-    IntrinsicInst() LLVM_DELETED_FUNCTION;
-    IntrinsicInst(const IntrinsicInst&) LLVM_DELETED_FUNCTION;
-    void operator=(const IntrinsicInst&) LLVM_DELETED_FUNCTION;
-  public:
-    /// getIntrinsicID - Return the intrinsic ID of this intrinsic.
-    ///
-    Intrinsic::ID getIntrinsicID() const {
-      return (Intrinsic::ID)getCalledFunction()->getIntrinsicID();
-    }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const CallInst *I) {
-      if (const Function *CF = I->getCalledFunction())
-        return CF->getIntrinsicID() != 0;
-      return false;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<CallInst>(V) && classof(cast<CallInst>(V));
-    }
-  };
-  
-  /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
-  ///
-  class DbgInfoIntrinsic : public IntrinsicInst {
-  public:
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      switch (I->getIntrinsicID()) {
-      case Intrinsic::dbg_declare:
-      case Intrinsic::dbg_value:
-        return true;
-      default: return false;
-      }
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-
-    static Value *StripCast(Value *C);
-  };
-
-  /// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
-  ///
-  class DbgDeclareInst : public DbgInfoIntrinsic {
-  public:
-    Value *getAddress() const;
-    MDNode *getVariable() const { return cast<MDNode>(getArgOperand(1)); }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::dbg_declare;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// DbgValueInst - This represents the llvm.dbg.value instruction.
-  ///
-  class DbgValueInst : public DbgInfoIntrinsic {
-  public:
-    const Value *getValue() const;
-    Value *getValue();
-    uint64_t getOffset() const {
-      return cast<ConstantInt>(
-                          const_cast<Value*>(getArgOperand(1)))->getZExtValue();
-    }
-    MDNode *getVariable() const { return cast<MDNode>(getArgOperand(2)); }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::dbg_value;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// MemIntrinsic - This is the common base class for memset/memcpy/memmove.
-  ///
-  class MemIntrinsic : public IntrinsicInst {
-  public:
-    Value *getRawDest() const { return const_cast<Value*>(getArgOperand(0)); }
-
-    Value *getLength() const { return const_cast<Value*>(getArgOperand(2)); }
-    ConstantInt *getAlignmentCst() const {
-      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(3)));
-    }
-
-    unsigned getAlignment() const {
-      return getAlignmentCst()->getZExtValue();
-    }
-
-    ConstantInt *getVolatileCst() const {
-      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(4)));
-    }
-    bool isVolatile() const {
-      return !getVolatileCst()->isZero();
-    }
-
-    unsigned getDestAddressSpace() const {
-      return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
-    }
-
-    /// getDest - This is just like getRawDest, but it strips off any cast
-    /// instructions that feed it, giving the original input.  The returned
-    /// value is guaranteed to be a pointer.
-    Value *getDest() const { return getRawDest()->stripPointerCasts(); }
-
-    /// set* - Set the specified arguments of the instruction.
-    ///
-    void setDest(Value *Ptr) {
-      assert(getRawDest()->getType() == Ptr->getType() &&
-             "setDest called with pointer of wrong type!");
-      setArgOperand(0, Ptr);
-    }
-
-    void setLength(Value *L) {
-      assert(getLength()->getType() == L->getType() &&
-             "setLength called with value of wrong type!");
-      setArgOperand(2, L);
-    }
-
-    void setAlignment(Constant* A) {
-      setArgOperand(3, A);
-    }
-
-    void setVolatile(Constant* V) {
-      setArgOperand(4, V);
-    }
-
-    Type *getAlignmentType() const {
-      return getArgOperand(3)->getType();
-    }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      switch (I->getIntrinsicID()) {
-      case Intrinsic::memcpy:
-      case Intrinsic::memmove:
-      case Intrinsic::memset:
-        return true;
-      default: return false;
-      }
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// MemSetInst - This class wraps the llvm.memset intrinsic.
-  ///
-  class MemSetInst : public MemIntrinsic {
-  public:
-    /// get* - Return the arguments to the instruction.
-    ///
-    Value *getValue() const { return const_cast<Value*>(getArgOperand(1)); }
-
-    void setValue(Value *Val) {
-      assert(getValue()->getType() == Val->getType() &&
-             "setValue called with value of wrong type!");
-      setArgOperand(1, Val);
-    }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::memset;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
-  ///
-  class MemTransferInst : public MemIntrinsic {
-  public:
-    /// get* - Return the arguments to the instruction.
-    ///
-    Value *getRawSource() const { return const_cast<Value*>(getArgOperand(1)); }
-
-    /// getSource - This is just like getRawSource, but it strips off any cast
-    /// instructions that feed it, giving the original input.  The returned
-    /// value is guaranteed to be a pointer.
-    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
-
-    unsigned getSourceAddressSpace() const {
-      return cast<PointerType>(getRawSource()->getType())->getAddressSpace();
-    }
-
-    void setSource(Value *Ptr) {
-      assert(getRawSource()->getType() == Ptr->getType() &&
-             "setSource called with pointer of wrong type!");
-      setArgOperand(1, Ptr);
-    }
-
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::memcpy ||
-             I->getIntrinsicID() == Intrinsic::memmove;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-
-  /// MemCpyInst - This class wraps the llvm.memcpy intrinsic.
-  ///
-  class MemCpyInst : public MemTransferInst {
-  public:
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::memcpy;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// MemMoveInst - This class wraps the llvm.memmove intrinsic.
-  ///
-  class MemMoveInst : public MemTransferInst {
-  public:
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::memmove;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// VAStartInst - This represents the llvm.va_start intrinsic.
-  ///
-  class VAStartInst : public IntrinsicInst {
-  public:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::vastart;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-
-    Value *getArgList() const { return const_cast<Value*>(getArgOperand(0)); }
-  };
-
-  /// VAEndInst - This represents the llvm.va_end intrinsic.
-  ///
-  class VAEndInst : public IntrinsicInst {
-  public:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::vaend;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-
-    Value *getArgList() const { return const_cast<Value*>(getArgOperand(0)); }
-  };
-
-  /// VACopyInst - This represents the llvm.va_copy intrinsic.
-  ///
-  class VACopyInst : public IntrinsicInst {
-  public:
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::vacopy;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-
-    Value *getDest() const { return const_cast<Value*>(getArgOperand(0)); }
-    Value *getSrc() const { return const_cast<Value*>(getArgOperand(1)); }
-  };
-
-}
-
-#endif
diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h
deleted file mode 100644
index 3108a8e5251c..000000000000
--- a/include/llvm/Intrinsics.h
+++ /dev/null
@@ -1,128 +0,0 @@
-//===-- llvm/Instrinsics.h - LLVM Intrinsic Function Handling ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a set of enums which allow processing of intrinsic
-// functions.  Values of these enum types are returned by
-// Function::getIntrinsicID.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INTRINSICS_H
-#define LLVM_INTRINSICS_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include <string>
-
-namespace llvm {
-
-class Type;
-class FunctionType;
-class Function;
-class LLVMContext;
-class Module;
-class AttrListPtr;
-
-/// Intrinsic Namespace - This namespace contains an enum with a value for
-/// every intrinsic/builtin function known by LLVM.  These enum values are
-/// returned by Function::getIntrinsicID().
-///
-namespace Intrinsic {
-  enum ID {
-    not_intrinsic = 0,   // Must be zero
-
-    // Get the intrinsic enums generated from Intrinsics.td
-#define GET_INTRINSIC_ENUM_VALUES
-#include "llvm/Intrinsics.gen"    
-#undef GET_INTRINSIC_ENUM_VALUES
-    , num_intrinsics
-  };
-  
-  /// Intrinsic::getName(ID) - Return the LLVM name for an intrinsic, such as
-  /// "llvm.ppc.altivec.lvx".
-  std::string getName(ID id, ArrayRef<Type*> Tys = ArrayRef<Type*>());
-  
-  /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
-  ///
-  FunctionType *getType(LLVMContext &Context, ID id,
-                        ArrayRef<Type*> Tys = ArrayRef<Type*>());
-
-  /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
-  /// overloaded.
-  bool isOverloaded(ID id);
-
-  /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic.
-  ///
-  AttrListPtr getAttributes(LLVMContext &C, ID id);
-
-  /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
-  /// declaration for an intrinsic, and return it.
-  ///
-  /// The Tys and numTys parameters are for intrinsics with overloaded types
-  /// (e.g., those using iAny, fAny, vAny, or iPTRAny). For a declaration for an
-  /// overloaded intrinsic, Tys should point to an array of numTys pointers to
-  /// Type, and must provide exactly one type for each overloaded type in the
-  /// intrinsic.
-  Function *getDeclaration(Module *M, ID id,
-                           ArrayRef<Type*> Tys = ArrayRef<Type*>());
-                           
-  /// Map a GCC builtin name to an intrinsic ID.
-  ID getIntrinsicForGCCBuiltin(const char *Prefix, const char *BuiltinName);
-  
-  /// IITDescriptor - This is a type descriptor which explains the type
-  /// requirements of an intrinsic.  This is returned by
-  /// getIntrinsicInfoTableEntries.
-  struct IITDescriptor {
-    enum IITDescriptorKind {
-      Void, MMX, Metadata, Float, Double,
-      Integer, Vector, Pointer, Struct,
-      Argument, ExtendVecArgument, TruncVecArgument
-    } Kind;
-    
-    union {
-      unsigned Integer_Width;
-      unsigned Float_Width;
-      unsigned Vector_Width;
-      unsigned Pointer_AddressSpace;
-      unsigned Struct_NumElements;
-      unsigned Argument_Info;
-    };
-    
-    enum ArgKind {
-      AK_AnyInteger,
-      AK_AnyFloat,
-      AK_AnyVector,
-      AK_AnyPointer
-    };
-    unsigned getArgumentNumber() const {
-      assert(Kind == Argument || Kind == ExtendVecArgument || 
-             Kind == TruncVecArgument);
-      return Argument_Info >> 2;
-    }
-    ArgKind getArgumentKind() const {
-      assert(Kind == Argument || Kind == ExtendVecArgument || 
-             Kind == TruncVecArgument);
-      return (ArgKind)(Argument_Info&3);
-    }
-    
-    static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
-      IITDescriptor Result = { K, { Field } };
-      return Result;
-    }
-  };
-  
-  /// getIntrinsicInfoTableEntries - Return the IIT table descriptor for the
-  /// specified intrinsic into an array of IITDescriptors.
-  /// 
-  void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
-  
-} // End Intrinsic namespace
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
deleted file mode 100644
index 2e1597fe6f6b..000000000000
--- a/include/llvm/Intrinsics.td
+++ /dev/null
@@ -1,471 +0,0 @@
-//===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines properties of all LLVM intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-include "llvm/CodeGen/ValueTypes.td"
-
-//===----------------------------------------------------------------------===//
-//  Properties we keep track of for intrinsics.
-//===----------------------------------------------------------------------===//
-
-class IntrinsicProperty;
-
-// Intr*Mem - Memory properties.  An intrinsic is allowed to have at most one of
-// these properties set.  They are listed from the most aggressive (best to use
-// if correct) to the least aggressive.  If no property is set, the worst case
-// is assumed (it may read and write any memory it can get access to and it may
-// have other side effects).
-
-// IntrNoMem - The intrinsic does not access memory or have any other side
-// effects.  It may be CSE'd deleted if dead, etc.
-def IntrNoMem : IntrinsicProperty;
-
-// IntrReadArgMem - This intrinsic reads only from memory that one of its
-// pointer-typed arguments points to, but may read an unspecified amount.
-def IntrReadArgMem : IntrinsicProperty;
-
-// IntrReadMem - This intrinsic reads from unspecified memory, so it cannot be
-// moved across stores.  However, it can be reordered otherwise and can be
-// deleted if dead.
-def IntrReadMem : IntrinsicProperty;
-
-// IntrReadWriteArgMem - This intrinsic reads and writes only from memory that
-// one of its arguments points to, but may access an unspecified amount.  The
-// reads and writes may be volatile, but except for this it has no other side
-// effects.
-def IntrReadWriteArgMem : IntrinsicProperty;
-
-// Commutative - This intrinsic is commutative: X op Y == Y op X.
-def Commutative : IntrinsicProperty;
-
-// Throws - This intrinsic can throw.
-def Throws : IntrinsicProperty;
-
-// NoCapture - The specified argument pointer is not captured by the intrinsic.
-class NoCapture<int argNo> : IntrinsicProperty {
-  int ArgNo = argNo;
-}
-
-def IntrNoReturn : IntrinsicProperty;
-
-//===----------------------------------------------------------------------===//
-// Types used by intrinsics.
-//===----------------------------------------------------------------------===//
-
-class LLVMType<ValueType vt> {
-  ValueType VT = vt;
-}
-
-class LLVMQualPointerType<LLVMType elty, int addrspace>
-  : LLVMType<iPTR>{
-  LLVMType ElTy = elty;
-  int AddrSpace = addrspace;
-}
-
-class LLVMPointerType<LLVMType elty>
-  : LLVMQualPointerType<elty, 0>;
-
-class LLVMAnyPointerType<LLVMType elty>
-  : LLVMType<iPTRAny>{
-  LLVMType ElTy = elty;
-}
-
-// Match the type of another intrinsic parameter.  Number is an index into the
-// list of overloaded types for the intrinsic, excluding all the fixed types.
-// The Number value must refer to a previously listed type.  For example:
-//   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyfloat_ty, LLVMMatchType<0>]>
-// has two overloaded types, the 2nd and 3rd arguments.  LLVMMatchType<0>
-// refers to the first overloaded type, which is the 2nd argument.
-class LLVMMatchType<int num>
-  : LLVMType<OtherVT>{
-  int Number = num;
-}
-
-// Match the type of another intrinsic parameter that is expected to be
-// an integral vector type, but change the element size to be twice as wide
-// or half as wide as the other type.  This is only useful when the intrinsic
-// is overloaded, so the matched type should be declared as iAny.
-class LLVMExtendedElementVectorType<int num> : LLVMMatchType<num>;
-class LLVMTruncatedElementVectorType<int num> : LLVMMatchType<num>;
-
-def llvm_void_ty       : LLVMType<isVoid>;
-def llvm_anyint_ty     : LLVMType<iAny>;
-def llvm_anyfloat_ty   : LLVMType<fAny>;
-def llvm_anyvector_ty  : LLVMType<vAny>;
-def llvm_i1_ty         : LLVMType<i1>;
-def llvm_i8_ty         : LLVMType<i8>;
-def llvm_i16_ty        : LLVMType<i16>;
-def llvm_i32_ty        : LLVMType<i32>;
-def llvm_i64_ty        : LLVMType<i64>;
-def llvm_float_ty      : LLVMType<f32>;
-def llvm_double_ty     : LLVMType<f64>;
-def llvm_f80_ty        : LLVMType<f80>;
-def llvm_f128_ty       : LLVMType<f128>;
-def llvm_ppcf128_ty    : LLVMType<ppcf128>;
-def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
-def llvm_ptrptr_ty     : LLVMPointerType<llvm_ptr_ty>;            // i8**
-def llvm_anyptr_ty     : LLVMAnyPointerType<llvm_i8_ty>;          // (space)i8*
-def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
-def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
-def llvm_metadata_ty   : LLVMType<MetadataVT>;                    // !{...}
-
-def llvm_x86mmx_ty     : LLVMType<x86mmx>;
-def llvm_ptrx86mmx_ty  : LLVMPointerType<llvm_x86mmx_ty>;         // <1 x i64>*
-
-def llvm_v2i1_ty       : LLVMType<v2i1>;     //  2 x i1
-def llvm_v4i1_ty       : LLVMType<v4i1>;     //  4 x i1
-def llvm_v8i1_ty       : LLVMType<v8i1>;     //  8 x i1
-def llvm_v16i1_ty      : LLVMType<v16i1>;    // 16 x i1
-def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
-def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
-def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
-def llvm_v16i8_ty      : LLVMType<v16i8>;    // 16 x i8
-def llvm_v32i8_ty      : LLVMType<v32i8>;    // 32 x i8
-def llvm_v1i16_ty      : LLVMType<v1i16>;    //  1 x i16
-def llvm_v2i16_ty      : LLVMType<v2i16>;    //  2 x i16
-def llvm_v4i16_ty      : LLVMType<v4i16>;    //  4 x i16
-def llvm_v8i16_ty      : LLVMType<v8i16>;    //  8 x i16
-def llvm_v16i16_ty     : LLVMType<v16i16>;   // 16 x i16
-def llvm_v1i32_ty      : LLVMType<v1i32>;    //  1 x i32
-def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
-def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
-def llvm_v8i32_ty      : LLVMType<v8i32>;    //  8 x i32
-def llvm_v16i32_ty     : LLVMType<v16i32>;   // 16 x i32
-def llvm_v1i64_ty      : LLVMType<v1i64>;    //  1 x i64
-def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
-def llvm_v4i64_ty      : LLVMType<v4i64>;    //  4 x i64
-def llvm_v8i64_ty      : LLVMType<v8i64>;    //  8 x i64
-def llvm_v16i64_ty     : LLVMType<v16i64>;   // 16 x i64
-
-def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
-def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
-def llvm_v8f32_ty      : LLVMType<v8f32>;    //  8 x float
-def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
-def llvm_v4f64_ty      : LLVMType<v4f64>;    //  4 x double
-
-def llvm_vararg_ty     : LLVMType<isVoid>;   // this means vararg here
-
-
-//===----------------------------------------------------------------------===//
-// Intrinsic Definitions.
-//===----------------------------------------------------------------------===//
-
-// Intrinsic class - This is used to define one LLVM intrinsic.  The name of the
-// intrinsic definition should start with "int_", then match the LLVM intrinsic
-// name with the "llvm." prefix removed, and all "."s turned into "_"s.  For
-// example, llvm.bswap.i16 -> int_bswap_i16.
-//
-//  * RetTypes is a list containing the return types expected for the
-//    intrinsic.
-//  * ParamTypes is a list containing the parameter types expected for the
-//    intrinsic.
-//  * Properties can be set to describe the behavior of the intrinsic.
-//
-class SDPatternOperator;
-class Intrinsic<list<LLVMType> ret_types,
-                list<LLVMType> param_types = [],
-                list<IntrinsicProperty> properties = [],
-                string name = ""> : SDPatternOperator {
-  string LLVMName = name;
-  string TargetPrefix = "";   // Set to a prefix for target-specific intrinsics.
-  list<LLVMType> RetTypes = ret_types;
-  list<LLVMType> ParamTypes = param_types;
-  list<IntrinsicProperty> Properties = properties;
-
-  bit isTarget = 0;
-}
-
-/// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this
-/// specifies the name of the builtin.  This provides automatic CBE and CFE
-/// support.
-class GCCBuiltin<string name> {
-  string GCCBuiltinName = name;
-}
-
-
-//===--------------- Variable Argument Handling Intrinsics ----------------===//
-//
-
-def int_vastart : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
-def int_vacopy  : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
-                            "llvm.va_copy">;
-def int_vaend   : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
-
-//===------------------- Garbage Collection Intrinsics --------------------===//
-//
-def int_gcroot  : Intrinsic<[],
-                            [llvm_ptrptr_ty, llvm_ptr_ty]>;
-def int_gcread  : Intrinsic<[llvm_ptr_ty],
-                            [llvm_ptr_ty, llvm_ptrptr_ty],
-                            [IntrReadArgMem]>;
-def int_gcwrite : Intrinsic<[],
-                            [llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
-                            [IntrReadWriteArgMem, NoCapture<1>, NoCapture<2>]>;
-
-//===--------------------- Code Generator Intrinsics ----------------------===//
-//
-def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
-def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
-
-// Note: we treat stacksave/stackrestore as writemem because we don't otherwise
-// model their dependencies on allocas.
-def int_stacksave     : Intrinsic<[llvm_ptr_ty]>,
-                        GCCBuiltin<"__builtin_stack_save">;
-def int_stackrestore  : Intrinsic<[], [llvm_ptr_ty]>,
-                        GCCBuiltin<"__builtin_stack_restore">;
-
-// IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch,
-// however it does conveniently prevent the prefetch from being reordered
-// with respect to nearby accesses to the same memory.
-def int_prefetch      : Intrinsic<[],
-                                  [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty,
-                                   llvm_i32_ty],
-                                  [IntrReadWriteArgMem, NoCapture<0>]>;
-def int_pcmarker      : Intrinsic<[], [llvm_i32_ty]>;
-
-def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
-
-// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
-// guard to the correct place on the stack frame.
-def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
-
-//===------------------- Standard C Library Intrinsics --------------------===//
-//
-
-def int_memcpy  : Intrinsic<[],
-                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
-                              llvm_i32_ty, llvm_i1_ty],
-                            [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
-def int_memmove : Intrinsic<[],
-                            [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
-                             llvm_i32_ty, llvm_i1_ty],
-                            [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
-def int_memset  : Intrinsic<[],
-                            [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
-                             llvm_i32_ty, llvm_i1_ty],
-                            [IntrReadWriteArgMem, NoCapture<0>]>;
-
-// These functions do not actually read memory, but they are sensitive to the
-// rounding mode.  This needs to be modelled separately; in the meantime
-// declaring them as reading memory is conservatively correct.
-let Properties = [IntrReadMem] in {
-  def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
-  def int_sin  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_cos  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_pow  : Intrinsic<[llvm_anyfloat_ty],
-                           [LLVMMatchType<0>, LLVMMatchType<0>]>;
-  def int_log  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_log10: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_exp  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-  def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-}
-
-let Properties = [IntrNoMem] in {
-  def int_fma  : Intrinsic<[llvm_anyfloat_ty],
-                         [LLVMMatchType<0>, LLVMMatchType<0>,
-                          LLVMMatchType<0>]>;
-
-  def int_fmuladd : Intrinsic<[llvm_anyfloat_ty],
-                              [LLVMMatchType<0>, LLVMMatchType<0>,
-                               LLVMMatchType<0>]>;
-}
-
-// NOTE: these are internal interfaces.
-def int_setjmp     : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
-def int_longjmp    : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
-def int_sigsetjmp  : Intrinsic<[llvm_i32_ty] , [llvm_ptr_ty, llvm_i32_ty]>;
-def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
-
-// Internal interface for object size checking
-def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i1_ty],
-                               [IntrNoMem]>,
-                               GCCBuiltin<"__builtin_object_size">;
-
-//===------------------------- Expect Intrinsics --------------------------===//
-//
-def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
-                                              LLVMMatchType<0>], [IntrNoMem]>;
-
-//===-------------------- Bit Manipulation Intrinsics ---------------------===//
-//
-
-// None of these intrinsics accesses memory at all.
-let Properties = [IntrNoMem] in {
-  def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
-  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
-}
-
-//===------------------------ Debugger Intrinsics -------------------------===//
-//
-
-// None of these intrinsics accesses memory at all...but that doesn't mean the
-// optimizers can change them aggressively.  Special handling needed in a few
-// places.
-let Properties = [IntrNoMem] in {
-  def int_dbg_declare      : Intrinsic<[],
-                                       [llvm_metadata_ty, llvm_metadata_ty]>;
-  def int_dbg_value        : Intrinsic<[],
-                                       [llvm_metadata_ty, llvm_i64_ty,
-                                        llvm_metadata_ty]>;
-}
-
-//===------------------ Exception Handling Intrinsics----------------------===//
-//
-
-// The result of eh.typeid.for depends on the enclosing function, but inside a
-// given function it is 'const' and may be CSE'd etc.
-def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
-
-def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
-def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>;
-
-def int_eh_unwind_init: Intrinsic<[]>,
-                        GCCBuiltin<"__builtin_unwind_init">;
-
-def int_eh_dwarf_cfa  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
-
-let Properties = [IntrNoMem] in {
-  def int_eh_sjlj_lsda             : Intrinsic<[llvm_ptr_ty]>;
-  def int_eh_sjlj_callsite         : Intrinsic<[], [llvm_i32_ty]>;
-}
-def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
-def int_eh_sjlj_setjmp          : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
-def int_eh_sjlj_longjmp         : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
-
-//===---------------- Generic Variable Attribute Intrinsics----------------===//
-//
-def int_var_annotation : Intrinsic<[],
-                                   [llvm_ptr_ty, llvm_ptr_ty,
-                                    llvm_ptr_ty, llvm_i32_ty],
-                                   [], "llvm.var.annotation">;
-def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
-                                   [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty,
-                                    llvm_i32_ty],
-                                   [], "llvm.ptr.annotation">;
-def int_annotation : Intrinsic<[llvm_anyint_ty],
-                               [LLVMMatchType<0>, llvm_ptr_ty,
-                                llvm_ptr_ty, llvm_i32_ty],
-                               [], "llvm.annotation">;
-
-//===------------------------ Trampoline Intrinsics -----------------------===//
-//
-def int_init_trampoline : Intrinsic<[],
-                                    [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
-                                    [IntrReadWriteArgMem, NoCapture<0>]>,
-                                   GCCBuiltin<"__builtin_init_trampoline">;
-
-def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
-                                      [IntrReadArgMem]>,
-                                     GCCBuiltin<"__builtin_adjust_trampoline">;
-
-//===------------------------ Overflow Intrinsics -------------------------===//
-//
-
-// Expose the carry flag from add operations on two integrals.
-def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
-                                       [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
-def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
-                                       [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
-
-def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
-                                       [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
-def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
-                                       [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
-
-def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
-                                       [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
-def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
-                                       [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
-
-//===------------------------- Memory Use Markers -------------------------===//
-//
-def int_lifetime_start  : Intrinsic<[],
-                                    [llvm_i64_ty, llvm_ptr_ty],
-                                    [IntrReadWriteArgMem, NoCapture<1>]>;
-def int_lifetime_end    : Intrinsic<[],
-                                    [llvm_i64_ty, llvm_ptr_ty],
-                                    [IntrReadWriteArgMem, NoCapture<1>]>;
-def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
-                                    [llvm_i64_ty, llvm_ptr_ty],
-                                    [IntrReadWriteArgMem, NoCapture<1>]>;
-def int_invariant_end   : Intrinsic<[],
-                                    [llvm_descriptor_ty, llvm_i64_ty,
-                                     llvm_ptr_ty],
-                                    [IntrReadWriteArgMem, NoCapture<2>]>;
-
-//===-------------------------- Other Intrinsics --------------------------===//
-//
-def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
-                     GCCBuiltin<"__builtin_flt_rounds">;
-def int_trap : Intrinsic<[], [], [IntrNoReturn]>,
-               GCCBuiltin<"__builtin_trap">;
-def int_debugtrap : Intrinsic<[]>,
-                    GCCBuiltin<"__builtin_debugtrap">;
-
-// NOP: calls/invokes to this intrinsic are removed by codegen
-def int_donothing : Intrinsic<[], [], [IntrNoMem]>;
-
-// Intrisics to support half precision floating point format
-let Properties = [IntrNoMem] in {
-def int_convert_to_fp16   : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>,
-                            GCCBuiltin<"__gnu_f2h_ieee">;
-def int_convert_from_fp16 : Intrinsic<[llvm_float_ty], [llvm_i16_ty]>,
-                            GCCBuiltin<"__gnu_h2f_ieee">;
-}
-
-// These convert intrinsics are to support various conversions between
-// various types with rounding and saturation. NOTE: avoid using these
-// intrinsics as they might be removed sometime in the future and
-// most targets don't support them.
-def int_convertff  : Intrinsic<[llvm_anyfloat_ty],
-                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertfsi : Intrinsic<[llvm_anyfloat_ty],
-                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertfui : Intrinsic<[llvm_anyfloat_ty],
-                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertsif : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertuif : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertss  : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertsu  : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertus  : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
-def int_convertuu  : Intrinsic<[llvm_anyint_ty],
-                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
-
-//===----------------------------------------------------------------------===//
-// Target-specific intrinsics
-//===----------------------------------------------------------------------===//
-
-include "llvm/IntrinsicsPowerPC.td"
-include "llvm/IntrinsicsX86.td"
-include "llvm/IntrinsicsARM.td"
-include "llvm/IntrinsicsCellSPU.td"
-include "llvm/IntrinsicsXCore.td"
-include "llvm/IntrinsicsHexagon.td"
-include "llvm/IntrinsicsNVVM.td"
-include "llvm/IntrinsicsMips.td"
diff --git a/include/llvm/IntrinsicsCellSPU.td b/include/llvm/IntrinsicsCellSPU.td
deleted file mode 100644
index 1e311bbecbc6..000000000000
--- a/include/llvm/IntrinsicsCellSPU.td
+++ /dev/null
@@ -1,242 +0,0 @@
-//==- IntrinsicsCellSPU.td - Cell SDK intrinsics           -*- tablegen -*-==//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// Department at The Aerospace Corporation and is distributed under the
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-// Cell SPU Instructions:
-//===----------------------------------------------------------------------===//
-// TODO Items (not urgent today, but would be nice, low priority)
-//
-// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
-// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
-// in 16-bit and 32-bit constants and reduce instruction count.
-//===----------------------------------------------------------------------===//
-
-// 7-bit integer type, used as an immediate:
-def cell_i7_ty: LLVMType<i8>;
-def cell_i8_ty: LLVMType<i8>;
-
-// Keep this here until it's actually supported:
-def llvm_i128_ty : LLVMType<i128>;
-
-class v16i8_u7imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, cell_i7_ty],
-            [IntrNoMem]>;
-
-class v16i8_u8imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
-            [IntrNoMem]>;
-
-class v16i8_s10imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty],
-            [IntrNoMem]>;
-
-class v16i8_u16imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty],
-            [IntrNoMem]>;
-
-class v16i8_rr<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-            [IntrNoMem]>;
-
-class v8i16_s10imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty],
-            [IntrNoMem]>;
-
-class v8i16_u16imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty],
-            [IntrNoMem]>;
-
-class v8i16_rr<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-            [IntrNoMem]>;
-
-class v4i32_rr<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-            [IntrNoMem]>;
-
-class v4i32_u7imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, cell_i7_ty],
-            [IntrNoMem]>;
-
-class v4i32_s10imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty],
-            [IntrNoMem]>;
-
-class v4i32_u16imm<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty],
-            [IntrNoMem]>;
-
-class v4f32_rr<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-            [IntrNoMem]>;
-
-class v4f32_rrr<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-            [IntrNoMem]>;
-
-class v2f64_rr<string builtin_suffix> :
-  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
-  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
-            [IntrNoMem]>;
-
-// All Cell SPU intrinsics start with "llvm.spu.".
-let TargetPrefix = "spu" in {
-  def int_spu_si_fsmbi  : v8i16_u16imm<"fsmbi">;
-  def int_spu_si_ah     : v8i16_rr<"ah">;
-  def int_spu_si_ahi    : v8i16_s10imm<"ahi">;
-  def int_spu_si_a      : v4i32_rr<"a">;
-  def int_spu_si_ai     : v4i32_s10imm<"ai">;
-  def int_spu_si_sfh    : v8i16_rr<"sfh">;
-  def int_spu_si_sfhi   : v8i16_s10imm<"sfhi">;
-  def int_spu_si_sf     : v4i32_rr<"sf">;
-  def int_spu_si_sfi    : v4i32_s10imm<"sfi">;
-  def int_spu_si_addx   : v4i32_rr<"addx">;
-  def int_spu_si_cg     : v4i32_rr<"cg">;
-  def int_spu_si_cgx    : v4i32_rr<"cgx">;
-  def int_spu_si_sfx    : v4i32_rr<"sfx">;
-  def int_spu_si_bg     : v4i32_rr<"bg">;
-  def int_spu_si_bgx    : v4i32_rr<"bgx">;
-  def int_spu_si_mpy    : // This is special:
-    GCCBuiltin<"__builtin_si_mpy">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyu   : // This is special:
-    GCCBuiltin<"__builtin_si_mpyu">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyi   : // This is special:
-    GCCBuiltin<"__builtin_si_mpyi">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyui  : // This is special:
-    GCCBuiltin<"__builtin_si_mpyui">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpya   : // This is special:
-    GCCBuiltin<"__builtin_si_mpya">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyh   : // This is special:
-    GCCBuiltin<"__builtin_si_mpyh">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpys   : // This is special:
-    GCCBuiltin<"__builtin_si_mpys">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyhh  : // This is special:
-    GCCBuiltin<"__builtin_si_mpyhh">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyhha : // This is special:
-    GCCBuiltin<"__builtin_si_mpyhha">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyhhu : // This is special:
-    GCCBuiltin<"__builtin_si_mpyhhu">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-  def int_spu_si_mpyhhau : // This is special:
-    GCCBuiltin<"__builtin_si_mpyhhau">,
-    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-              [IntrNoMem]>;
-
-  def int_spu_si_shli:          v4i32_u7imm<"shli">;
-
-  def int_spu_si_shlqbi:
-    GCCBuiltin<!strconcat("__builtin_si_", "shlqbi")>,
-    Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
-              [IntrNoMem]>;
-
-  def int_spu_si_shlqbii:       v16i8_u7imm<"shlqbii">;
-  def int_spu_si_shlqby:
-    GCCBuiltin<!strconcat("__builtin_si_", "shlqby")>,
-    Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
-              [IntrNoMem]>;
-  def int_spu_si_shlqbyi:       v16i8_u7imm<"shlqbyi">;
-  
-  def int_spu_si_ceq:           v4i32_rr<"ceq">;
-  def int_spu_si_ceqi:          v4i32_s10imm<"ceqi">;
-  def int_spu_si_ceqb:          v16i8_rr<"ceqb">;
-  def int_spu_si_ceqbi:         v16i8_u8imm<"ceqbi">;
-  def int_spu_si_ceqh:          v8i16_rr<"ceqh">;
-  def int_spu_si_ceqhi:         v8i16_s10imm<"ceqhi">;
-  def int_spu_si_cgt:           v4i32_rr<"cgt">;
-  def int_spu_si_cgti:          v4i32_s10imm<"cgti">;
-  def int_spu_si_cgtb:          v16i8_rr<"cgtb">;
-  def int_spu_si_cgtbi:         v16i8_u8imm<"cgtbi">;
-  def int_spu_si_cgth:          v8i16_rr<"cgth">;
-  def int_spu_si_cgthi:         v8i16_s10imm<"cgthi">;
-  def int_spu_si_clgtb:         v16i8_rr<"clgtb">;
-  def int_spu_si_clgtbi:        v16i8_u8imm<"clgtbi">;
-  def int_spu_si_clgth:         v8i16_rr<"clgth">;
-  def int_spu_si_clgthi:        v8i16_s10imm<"clgthi">;
-  def int_spu_si_clgt:          v4i32_rr<"clgt">;
-  def int_spu_si_clgti:         v4i32_s10imm<"clgti">;
-  
-  def int_spu_si_and:           v4i32_rr<"and">;
-  def int_spu_si_andbi:         v16i8_u8imm<"andbi">;
-  def int_spu_si_andc:          v4i32_rr<"andc">;
-  def int_spu_si_andhi:         v8i16_s10imm<"andhi">;
-  def int_spu_si_andi:          v4i32_s10imm<"andi">;
-  
-  def int_spu_si_or:            v4i32_rr<"or">;
-  def int_spu_si_orbi:          v16i8_u8imm<"orbi">;
-  def int_spu_si_orc:           v4i32_rr<"orc">;
-  def int_spu_si_orhi:          v8i16_s10imm<"orhi">;
-  def int_spu_si_ori:           v4i32_s10imm<"ori">;
-  
-  def int_spu_si_xor:           v4i32_rr<"xor">;
-  def int_spu_si_xorbi:         v16i8_u8imm<"xorbi">;
-  def int_spu_si_xorhi:         v8i16_s10imm<"xorhi">;
-  def int_spu_si_xori:          v4i32_s10imm<"xori">;
-
-  def int_spu_si_nor:           v4i32_rr<"nor">;
-  def int_spu_si_nand:          v4i32_rr<"nand">;
-  
-  def int_spu_si_fa:            v4f32_rr<"fa">;
-  def int_spu_si_fs:            v4f32_rr<"fs">;
-  def int_spu_si_fm:            v4f32_rr<"fm">;
-  
-  def int_spu_si_fceq:          v4f32_rr<"fceq">;
-  def int_spu_si_fcmeq:         v4f32_rr<"fcmeq">;
-  def int_spu_si_fcgt:          v4f32_rr<"fcgt">;
-  def int_spu_si_fcmgt:         v4f32_rr<"fcmgt">;
-  
-  def int_spu_si_fma:           v4f32_rrr<"fma">;
-  def int_spu_si_fnms:          v4f32_rrr<"fnms">;
-  def int_spu_si_fms:           v4f32_rrr<"fms">;
-
-  def int_spu_si_dfa:           v2f64_rr<"dfa">;
-  def int_spu_si_dfs:           v2f64_rr<"dfs">;
-  def int_spu_si_dfm:           v2f64_rr<"dfm">;
-  
-//def int_spu_si_dfceq:         v2f64_rr<"dfceq">;
-//def int_spu_si_dfcmeq:        v2f64_rr<"dfcmeq">;
-//def int_spu_si_dfcgt:         v2f64_rr<"dfcgt">;
-//def int_spu_si_dfcmgt:        v2f64_rr<"dfcmgt">;
-  
-  def int_spu_si_dfnma:         v2f64_rr<"dfnma">;
-  def int_spu_si_dfma:          v2f64_rr<"dfma">;
-  def int_spu_si_dfnms:         v2f64_rr<"dfnms">;
-  def int_spu_si_dfms:          v2f64_rr<"dfms">;
-}
diff --git a/include/llvm/IntrinsicsNVVM.td b/include/llvm/IntrinsicsNVVM.td
deleted file mode 100644
index 1853c9988b47..000000000000
--- a/include/llvm/IntrinsicsNVVM.td
+++ /dev/null
@@ -1,952 +0,0 @@
-//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the NVVM-specific intrinsics for use with NVPTX.
-//
-//===----------------------------------------------------------------------===//
-
-def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
-
-//
-// MISC
-//
-
-  def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
-      Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
-
-  def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
-      Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
-
-  def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Min Max
-//
-
-  def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
-        , [IntrNoMem, Commutative]>;
-  def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Multiplication
-//
-
-  def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Div
-//
-
-  def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Brev
-//
-
-  def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
-
-//
-// Sad
-//
-
-  def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Floor  Ceil
-//
-
-  def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Abs
-//
-
-  def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
-
-  def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Round
-//
-
-  def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Trunc
-//
-
-  def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Saturate
-//
-
-  def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Exp2  Log2
-//
-
-  def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Sin  Cos
-//
-
-  def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-//
-// Fma
-//
-
-  def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
-      Intrinsic<[llvm_double_ty],
-        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
-      Intrinsic<[llvm_double_ty],
-        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
-      Intrinsic<[llvm_double_ty],
-        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
-      Intrinsic<[llvm_double_ty],
-        [llvm_double_ty, llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Rcp
-//
-
-  def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Sqrt
-//
-
-  def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Rsqrt
-//
-
-  def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
-
-//
-// Add
-//
-
-  def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
-      Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-  def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
-      Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
-        [IntrNoMem, Commutative]>;
-
-//
-// Convert
-//
-
-  def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-
-  def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
-
-  def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-
-  def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-
-  def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
-      Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
-        [IntrNoMem, Commutative]>;
-
-  def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
-      Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-  def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-
-  def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
-      Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
-
-  def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-
-  def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
-      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
-      Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
-
-  def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
-      Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
-
-//
-// Bitcast
-//
-
-  def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
-      Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-  def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
-      Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-
-  def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
-      Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
-  def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
-      Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
-
-
-// Atomic not available as an llvm intrinsic.
-  def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
-          [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
-                                      [IntrReadWriteArgMem, NoCapture<0>]>;
-  def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
-          [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
-                                      [IntrReadWriteArgMem, NoCapture<0>]>;
-  def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
-          [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
-                                      [IntrReadWriteArgMem, NoCapture<0>]>;
-
-// Bar.Sync
-  def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
-      Intrinsic<[], [], []>;
-  def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
-      Intrinsic<[], [], []>;
-  def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-  def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-  def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
-      Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
-
-  // Membar
-  def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
-      Intrinsic<[], [], []>;
-  def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
-      Intrinsic<[], [], []>;
-  def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
-      Intrinsic<[], [], []>;
-
-
-// Accessing special registers
-  def int_nvvm_read_ptx_sreg_tid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
-  def int_nvvm_read_ptx_sreg_tid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
-  def int_nvvm_read_ptx_sreg_tid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
-
-  def int_nvvm_read_ptx_sreg_ntid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
-  def int_nvvm_read_ptx_sreg_ntid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
-  def int_nvvm_read_ptx_sreg_ntid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
-
-  def int_nvvm_read_ptx_sreg_ctaid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
-  def int_nvvm_read_ptx_sreg_ctaid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
-  def int_nvvm_read_ptx_sreg_ctaid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
-
-  def int_nvvm_read_ptx_sreg_nctaid_x :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
-  def int_nvvm_read_ptx_sreg_nctaid_y :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
-  def int_nvvm_read_ptx_sreg_nctaid_z :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
-
-  def int_nvvm_read_ptx_sreg_warpsize :
-      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-      GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
-
-
-// Generated within nvvm. Use for ldu on sm_20 or later
-// @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
-def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
-  "llvm.nvvm.ldu.global.i">;
-def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
-  "llvm.nvvm.ldu.global.f">;
-def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
-  [LLVMPointerType<LLVMMatchType<0>>], [IntrReadMem, NoCapture<0>],
-  "llvm.nvvm.ldu.global.p">;
-
-
-// Use for generic pointers
-// - These intrinsics are used to convert address spaces.
-// - The input pointer and output pointer must have the same type, except for
-//   the address-space. (This restriction is not enforced here as there is
-//   currently no way to describe it).
-// - This complements the llvm bitcast, which can be used to cast one type
-//   of pointer to another type of pointer, while the address space remains
-//   the same.
-def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.local.to.gen">;
-def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.shared.to.gen">;
-def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.global.to.gen">;
-def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.constant.to.gen">;
-
-def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.gen.to.global">;
-def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.gen.to.shared">;
-def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.gen.to.local">;
-def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
-                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
-                 "llvm.nvvm.ptr.gen.to.constant">;
-
-// Used in nvvm internally to help address space opt and ptx code generation
-// This is for params that are passed to kernel functions by pointer by-val.
-def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
-                                     [llvm_anyptr_ty],
-                                   [IntrNoMem, NoCapture<0>],
-                                   "llvm.nvvm.ptr.gen.to.param">;
-
-// Move intrinsics, used in nvvm internally
-
-def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
-  "llvm.nvvm.move.i8">;
-def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
-  "llvm.nvvm.move.i16">;
-def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
-  "llvm.nvvm.move.i32">;
-def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
-  "llvm.nvvm.move.i64">;
-def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
-  [IntrNoMem], "llvm.nvvm.move.float">;
-def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
-  [IntrNoMem], "llvm.nvvm.move.double">;
-def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
-  [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
-
-
-/// Error / Warn
-def int_nvvm_compiler_error :
-    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
-def int_nvvm_compiler_warn :
-    Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
-
-
-// Old PTX back-end intrinsics retained here for backwards-compatibility
-
-multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
-// FIXME: Do we need the 128-bit integer type version?
-//    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem]>;
-
-// FIXME: Enable this once v4i32 support is enabled in back-end.
-//    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
-
-  def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_x")>;
-  def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_y")>;
-  def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_z")>;
-  def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-               GCCBuiltin<!strconcat(prefix, "_w")>;
-}
-
-class PTXReadSpecialRegisterIntrinsic_r32<string name>
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
-
-class PTXReadSpecialRegisterIntrinsic_r64<string name>
-  : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
-
-defm int_ptx_read_tid        : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_tid">;
-defm int_ptx_read_ntid       : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_ntid">;
-
-def int_ptx_read_laneid      : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_laneid">;
-def int_ptx_read_warpid      : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_warpid">;
-def int_ptx_read_nwarpid     : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_nwarpid">;
-
-defm int_ptx_read_ctaid      : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_ctaid">;
-defm int_ptx_read_nctaid     : PTXReadSpecialRegisterIntrinsic_v4i32
-                               <"__builtin_ptx_read_nctaid">;
-
-def int_ptx_read_smid        : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_smid">;
-def int_ptx_read_nsmid       : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_nsmid">;
-def int_ptx_read_gridid      : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_gridid">;
-
-def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_eq">;
-def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_le">;
-def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_lt">;
-def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_ge">;
-def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_lanemask_gt">;
-
-def int_ptx_read_clock       : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_clock">;
-def int_ptx_read_clock64     : PTXReadSpecialRegisterIntrinsic_r64
-                               <"__builtin_ptx_read_clock64">;
-
-def int_ptx_read_pm0         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm0">;
-def int_ptx_read_pm1         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm1">;
-def int_ptx_read_pm2         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm2">;
-def int_ptx_read_pm3         : PTXReadSpecialRegisterIntrinsic_r32
-                               <"__builtin_ptx_read_pm3">;
-
-def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
-                       GCCBuiltin<"__builtin_ptx_bar_sync">;
diff --git a/include/llvm/IntrinsicsPowerPC.td b/include/llvm/IntrinsicsPowerPC.td
deleted file mode 100644
index da85bfba8631..000000000000
--- a/include/llvm/IntrinsicsPowerPC.td
+++ /dev/null
@@ -1,465 +0,0 @@
-//===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the PowerPC-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Definitions for all PowerPC intrinsics.
-//
-
-// Non-altivec intrinsics.
-let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
-  // dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
-  def int_ppc_dcba  : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbf  : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbi  : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbt  : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbz  : Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
-
-  // sync instruction
-  def int_ppc_sync : Intrinsic<[], [], []>;
-}
-
-
-let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
-  /// PowerPC_Vec_Intrinsic - Base class for all altivec intrinsics.
-  class PowerPC_Vec_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
-                              list<LLVMType> param_types,
-                              list<IntrinsicProperty> properties>
-    : GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
-      Intrinsic<ret_types, param_types, properties>;
-}
-
-//===----------------------------------------------------------------------===//
-// PowerPC Altivec Intrinsic Class Definitions.
-//
-
-/// PowerPC_Vec_FF_Intrinsic - A PowerPC intrinsic that takes one v4f32
-/// vector and returns one.  These intrinsics have no side effects.
-class PowerPC_Vec_FF_Intrinsic<string GCCIntSuffix>
-  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
-                          [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
-/// PowerPC_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
-/// vectors and returns one.  These intrinsics have no side effects.
-class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
-  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
-                          [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-                          [IntrNoMem]>;
-
-/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8
-/// vectors and returns one.  These intrinsics have no side effects.
-class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> 
-  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
-                          [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                          [IntrNoMem]>;
-
-/// PowerPC_Vec_HHH_Intrinsic - A PowerPC intrinsic that takes two v8i16
-/// vectors and returns one.  These intrinsics have no side effects.
-class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix> 
-  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
-                          [llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                          [IntrNoMem]>;
-
-/// PowerPC_Vec_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
-/// vectors and returns one.  These intrinsics have no side effects.
-class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix> 
-  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
-                          [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                          [IntrNoMem]>;
-
-
-//===----------------------------------------------------------------------===//
-// PowerPC Altivec Intrinsic Definitions.
-
-let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
-  // Data Stream Control.
-  def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">,
-              Intrinsic<[], [llvm_i32_ty], []>;
-  def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">,
-              Intrinsic<[], [], []>;
-  def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">,
-              Intrinsic<[],
-                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
-                        []>;
-  def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">,
-              Intrinsic<[],
-                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
-                        []>;
-  def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">,
-              Intrinsic<[],
-                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
-                        []>;
-  def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">,
-              Intrinsic<[],
-                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
-                        []>;
-
-  // VSCR access.
-  def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
-              Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>;
-  def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
-              Intrinsic<[], [llvm_v4i32_ty], []>;
-
-
-  // Loads.  These don't map directly to GCC builtins because they represent the
-  // source address with a single pointer.
-  def int_ppc_altivec_lvx :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_ppc_altivec_lvxl :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_ppc_altivec_lvebx :
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_ppc_altivec_lvehx :
-              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_ppc_altivec_lvewx :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
-
-  // Stores.  These don't map directly to GCC builtins because they represent the
-  // source address with a single pointer.
-  def int_ppc_altivec_stvx :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
-  def int_ppc_altivec_stvxl :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
-  def int_ppc_altivec_stvebx :
-              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty], []>;
-  def int_ppc_altivec_stvehx :
-              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty], []>;
-  def int_ppc_altivec_stvewx :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
-
-  // Comparisons setting a vector.
-  def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpeqfp : GCCBuiltin<"__builtin_altivec_vcmpeqfp">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgefp : GCCBuiltin<"__builtin_altivec_vcmpgefp">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-                        
-  def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtsw : GCCBuiltin<"__builtin_altivec_vcmpgtsw">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-                        
-  def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtsh : GCCBuiltin<"__builtin_altivec_vcmpgtsh">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-
-  def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtsb : GCCBuiltin<"__builtin_altivec_vcmpgtsb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-
-  // Predicate Comparisons.  The first operand specifies interpretation of CR6.
-  def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpeqfp_p : GCCBuiltin<"__builtin_altivec_vcmpeqfp_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgefp_p : GCCBuiltin<"__builtin_altivec_vcmpgefp_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
-                        [IntrNoMem]>;
-                        
-  def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtsw_p : GCCBuiltin<"__builtin_altivec_vcmpgtsw_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
-                        [IntrNoMem]>;
-                        
-  def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtsh_p : GCCBuiltin<"__builtin_altivec_vcmpgtsh_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
-                        [IntrNoMem]>;
-
-  def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtsb_p : GCCBuiltin<"__builtin_altivec_vcmpgtsb_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
-              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
-                        [IntrNoMem]>;
-}
-
-// Vector average.
-def int_ppc_altivec_vavgsb : PowerPC_Vec_BBB_Intrinsic<"vavgsb">;
-def int_ppc_altivec_vavgsh : PowerPC_Vec_HHH_Intrinsic<"vavgsh">;
-def int_ppc_altivec_vavgsw : PowerPC_Vec_WWW_Intrinsic<"vavgsw">;
-def int_ppc_altivec_vavgub : PowerPC_Vec_BBB_Intrinsic<"vavgub">;
-def int_ppc_altivec_vavguh : PowerPC_Vec_HHH_Intrinsic<"vavguh">;
-def int_ppc_altivec_vavguw : PowerPC_Vec_WWW_Intrinsic<"vavguw">;
-
-// Vector maximum.
-def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">;
-def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">;
-def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">;
-def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">;
-def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">;
-def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">;
-def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">;
-
-// Vector minimum.
-def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">;
-def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">;
-def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">;
-def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">;
-def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">;
-def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">;
-def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">;
-
-// Saturating adds.
-def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">;
-def int_ppc_altivec_vaddsbs : PowerPC_Vec_BBB_Intrinsic<"vaddsbs">;
-def int_ppc_altivec_vadduhs : PowerPC_Vec_HHH_Intrinsic<"vadduhs">;
-def int_ppc_altivec_vaddshs : PowerPC_Vec_HHH_Intrinsic<"vaddshs">;
-def int_ppc_altivec_vadduws : PowerPC_Vec_WWW_Intrinsic<"vadduws">;
-def int_ppc_altivec_vaddsws : PowerPC_Vec_WWW_Intrinsic<"vaddsws">;
-def int_ppc_altivec_vaddcuw : PowerPC_Vec_WWW_Intrinsic<"vaddcuw">;
-
-// Saturating subs.
-def int_ppc_altivec_vsububs : PowerPC_Vec_BBB_Intrinsic<"vsububs">;
-def int_ppc_altivec_vsubsbs : PowerPC_Vec_BBB_Intrinsic<"vsubsbs">;
-def int_ppc_altivec_vsubuhs : PowerPC_Vec_HHH_Intrinsic<"vsubuhs">;
-def int_ppc_altivec_vsubshs : PowerPC_Vec_HHH_Intrinsic<"vsubshs">;
-def int_ppc_altivec_vsubuws : PowerPC_Vec_WWW_Intrinsic<"vsubuws">;
-def int_ppc_altivec_vsubsws : PowerPC_Vec_WWW_Intrinsic<"vsubsws">;
-def int_ppc_altivec_vsubcuw : PowerPC_Vec_WWW_Intrinsic<"vsubcuw">;
-
-let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
-  // Saturating multiply-adds.
-  def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vnmsubfp : GCCBuiltin<"__builtin_altivec_vnmsubfp">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-
-  // Vector Multiply Sum Intructions.
-  def int_ppc_altivec_vmsummbm : GCCBuiltin<"__builtin_altivec_vmsummbm">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                       llvm_v4i32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vmsumshm : GCCBuiltin<"__builtin_altivec_vmsumshm">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                       llvm_v4i32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
-                       llvm_v4i32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, 
-                       llvm_v4i32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                       llvm_v4i32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                       llvm_v4i32_ty], [IntrNoMem]>;
-
-  // Vector Multiply Intructions.
-  def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                    [IntrNoMem]>;
-  def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
-          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                    [IntrNoMem]>;
-  def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                    [IntrNoMem]>;
-  def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
-          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                    [IntrNoMem]>;
-
-  def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                    [IntrNoMem]>;
-  def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
-          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                    [IntrNoMem]>;
-  def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                    [IntrNoMem]>;
-  def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
-          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                    [IntrNoMem]>;
-
-  // Vector Sum Intructions.
-  def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-
-  // Other multiplies.
-  def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
-            Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
-                       llvm_v8i16_ty], [IntrNoMem]>;
-
-  // Packs.
-  def int_ppc_altivec_vpkpx : GCCBuiltin<"__builtin_altivec_vpkpx">,
-            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">,
-            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">,
-            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
-            Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
-            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-  // vpkuhum is lowered to a shuffle.
-  def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">,
-            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                      [IntrNoMem]>;
-  // vpkuwum is lowered to a shuffle.
-  def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">,
-            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                      [IntrNoMem]>;
-
-  // Unpacks.
-  def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vupkhsb : GCCBuiltin<"__builtin_altivec_vupkhsb">,
-            Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">,
-            Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">,
-            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-
-
-  // FP <-> integer conversion.
-  def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-
-  def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vrfin : GCCBuiltin<"__builtin_altivec_vrfin">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vrfip : GCCBuiltin<"__builtin_altivec_vrfip">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-}
-
-def int_ppc_altivec_vsl   : PowerPC_Vec_WWW_Intrinsic<"vsl">;
-def int_ppc_altivec_vslo  : PowerPC_Vec_WWW_Intrinsic<"vslo">;
-
-def int_ppc_altivec_vslb  : PowerPC_Vec_BBB_Intrinsic<"vslb">;
-def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
-def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
-
-// Right Shifts.
-def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
-def int_ppc_altivec_vsro  : PowerPC_Vec_WWW_Intrinsic<"vsro">;
-  
-def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
-def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
-def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
-def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
-def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
-def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
-
-// Rotates.
-def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
-def int_ppc_altivec_vrlh  : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
-def int_ppc_altivec_vrlw  : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
-
-let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
-  // Miscellaneous.
-  def int_ppc_altivec_lvsl :
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
-  def int_ppc_altivec_lvsr :
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
-
-  def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
-                         llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
-  def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
-                         llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
-}
-
-def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
-def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
-def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
-def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
deleted file mode 100644
index d2463c0efa14..000000000000
--- a/include/llvm/IntrinsicsX86.td
+++ /dev/null
@@ -1,2573 +0,0 @@
-//===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the X86-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Interrupt traps
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
-}
-
-//===----------------------------------------------------------------------===//
-// 3DNow!
-
-let TargetPrefix = "x86" in {
-  def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// 3DNow! extensions
-
-let TargetPrefix = "x86" in {
-  def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_3dnowa_pswapd :
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE1
-
-// Arithmetic ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-}
-
-// Comparison ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse_cmp_ps : GCCBuiltin<"__builtin_ia32_cmpps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-}
-
-
-// Conversion ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtsi2ss : GCCBuiltin<"__builtin_ia32_cvtsi2ss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-}
-
-// SIMD store ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
-              Intrinsic<[], [llvm_ptr_ty,
-                         llvm_v4f32_ty], [IntrReadWriteArgMem]>;
-}
-
-// Cacheability support ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">,
-              Intrinsic<[], [], []>;
-}
-
-// Control register.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_stmxcsr :
-              Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_x86_sse_ldmxcsr :
-              Intrinsic<[], [llvm_ptr_ty], []>;
-}
-
-// Misc.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE2
-
-// FP arithmetic ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-}
-
-// FP comparison ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_cmp_pd : GCCBuiltin<"__builtin_ia32_cmppd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-}
-
-// Integer arithmetic ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
-}
-
-// Integer shift ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-
-  def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi128_byteshift">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi128_byteshift">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Conversion ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsi2sd : GCCBuiltin<"__builtin_ia32_cvtsi2sd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtss2sd : GCCBuiltin<"__builtin_ia32_cvtss2sd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-}
-
-// SIMD store ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
-              Intrinsic<[], [llvm_ptr_ty,
-                         llvm_v2f64_ty], [IntrReadWriteArgMem]>;
-  def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
-              Intrinsic<[], [llvm_ptr_ty,
-                         llvm_v16i8_ty], [IntrReadWriteArgMem]>;
-  def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
-              Intrinsic<[], [llvm_ptr_ty,
-                         llvm_v4i32_ty], [IntrReadWriteArgMem]>;
-}
-
-// Misc.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">,
-              Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
-              Intrinsic<[], [llvm_v16i8_ty,
-                         llvm_v16i8_ty, llvm_ptr_ty], []>;
-  def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
-              Intrinsic<[], [llvm_ptr_ty], []>;
-  def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
-              Intrinsic<[], [], []>;
-  def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
-              Intrinsic<[], [], []>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE3
-
-// Addition / subtraction ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-}
-
-// Horizontal ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-}
-
-// Specialized unaligned load.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
-}
-
-// Thread synchronization ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
-              Intrinsic<[], [llvm_ptr_ty,
-                         llvm_i32_ty, llvm_i32_ty], []>;
-  def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
-              Intrinsic<[], [llvm_i32_ty,
-                         llvm_i32_ty], []>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSSE3
-
-// Horizontal arithmetic ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_ssse3_phadd_w         : GCCBuiltin<"__builtin_ia32_phaddw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_phadd_w_128     : GCCBuiltin<"__builtin_ia32_phaddw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_phadd_d         : GCCBuiltin<"__builtin_ia32_phaddd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_phadd_d_128     : GCCBuiltin<"__builtin_ia32_phaddd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_phadd_sw        : GCCBuiltin<"__builtin_ia32_phaddsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_phadd_sw_128    : GCCBuiltin<"__builtin_ia32_phaddsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_phsub_w         : GCCBuiltin<"__builtin_ia32_phsubw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_phsub_w_128     : GCCBuiltin<"__builtin_ia32_phsubw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_phsub_d         : GCCBuiltin<"__builtin_ia32_phsubd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_phsub_d_128     : GCCBuiltin<"__builtin_ia32_phsubd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_phsub_sw        : GCCBuiltin<"__builtin_ia32_phsubsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_phsub_sw_128    : GCCBuiltin<"__builtin_ia32_phsubsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_pmadd_ub_sw     : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-}
-
-// Packed multiply high with round and scale
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_ssse3_pmul_hr_sw      : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_ssse3_pmul_hr_sw_128  : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
-}
-
-// Shuffle ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_ssse3_pshuf_b         : GCCBuiltin<"__builtin_ia32_pshufb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_pshuf_b_128     : GCCBuiltin<"__builtin_ia32_pshufb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse_pshuf_w           : GCCBuiltin<"__builtin_ia32_pshufw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
-                         [IntrNoMem]>;
-}
-
-// Sign ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_ssse3_psign_b         : GCCBuiltin<"__builtin_ia32_psignb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_psign_b_128     : GCCBuiltin<"__builtin_ia32_psignb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_psign_w         : GCCBuiltin<"__builtin_ia32_psignw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_psign_w_128     : GCCBuiltin<"__builtin_ia32_psignw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_psign_d         : GCCBuiltin<"__builtin_ia32_psignd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_psign_d_128     : GCCBuiltin<"__builtin_ia32_psignd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-}
-
-// Absolute value ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_ssse3_pabs_b     : GCCBuiltin<"__builtin_ia32_pabsb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_pabs_w     : GCCBuiltin<"__builtin_ia32_pabsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-
-  def int_x86_ssse3_pabs_d     : GCCBuiltin<"__builtin_ia32_pabsd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE4.1
-
-// FP rounding ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_round_ss        : GCCBuiltin<"__builtin_ia32_roundss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse41_round_ps        : GCCBuiltin<"__builtin_ia32_roundps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse41_round_sd        : GCCBuiltin<"__builtin_ia32_roundsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse41_round_pd        : GCCBuiltin<"__builtin_ia32_roundpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Vector sign and zero extend
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pmovsxbd        : GCCBuiltin<"__builtin_ia32_pmovsxbd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovsxbq        : GCCBuiltin<"__builtin_ia32_pmovsxbq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovsxbw        : GCCBuiltin<"__builtin_ia32_pmovsxbw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovsxdq        : GCCBuiltin<"__builtin_ia32_pmovsxdq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovsxwd        : GCCBuiltin<"__builtin_ia32_pmovsxwd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovsxwq        : GCCBuiltin<"__builtin_ia32_pmovsxwq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovzxbd        : GCCBuiltin<"__builtin_ia32_pmovzxbd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovzxbq        : GCCBuiltin<"__builtin_ia32_pmovzxbq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovzxbw        : GCCBuiltin<"__builtin_ia32_pmovzxbw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovzxdq        : GCCBuiltin<"__builtin_ia32_pmovzxdq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovzxwd        : GCCBuiltin<"__builtin_ia32_pmovzxwd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pmovzxwq        : GCCBuiltin<"__builtin_ia32_pmovzxwq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-}
-
-// Vector min element
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_phminposuw     : GCCBuiltin<"__builtin_ia32_phminposuw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-}
-
-// Vector compare, min, max
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pmaxsb          : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pmaxsd          : GCCBuiltin<"__builtin_ia32_pmaxsd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pmaxud          : GCCBuiltin<"__builtin_ia32_pmaxud128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pmaxuw          : GCCBuiltin<"__builtin_ia32_pmaxuw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pminsb          : GCCBuiltin<"__builtin_ia32_pminsb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pminsd          : GCCBuiltin<"__builtin_ia32_pminsd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pminud          : GCCBuiltin<"__builtin_ia32_pminud128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse41_pminuw          : GCCBuiltin<"__builtin_ia32_pminuw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem, Commutative]>;
-}
-
-// Advanced Encryption Standard (AES) Instructions
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_aesni_aesimc          : GCCBuiltin<"__builtin_ia32_aesimc128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_aesni_aesenc          : GCCBuiltin<"__builtin_ia32_aesenc128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_aesni_aesdec          : GCCBuiltin<"__builtin_ia32_aesdec128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_aesni_aeskeygenassist :
-              GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-}
-
-// PCLMUL instruction
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
-  def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">,
-          Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-}
-
-// Vector pack
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_packusdw        : GCCBuiltin<"__builtin_ia32_packusdw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-}
-
-// Vector multiply
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem, Commutative]>;
-}
-
-// Vector extract
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pextrb         :
-              Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pextrd         :
-              Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_pextrq         :
-              Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_sse41_extractps      : GCCBuiltin<"__builtin_ia32_extractps128">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-}
-
-// Vector insert
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
-          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
-                    [IntrNoMem]>;
-}
-
-// Vector blend
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pblendvb         : GCCBuiltin<"__builtin_ia32_pblendvb128">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
-                  [IntrNoMem]>;
-  def int_x86_sse41_pblendw          : GCCBuiltin<"__builtin_ia32_pblendw128">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
-                  [IntrNoMem]>;
-  def int_x86_sse41_blendpd          : GCCBuiltin<"__builtin_ia32_blendpd">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
-                  [IntrNoMem]>;
-  def int_x86_sse41_blendps          : GCCBuiltin<"__builtin_ia32_blendps">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
-                  [IntrNoMem]>;
-  def int_x86_sse41_blendvpd         : GCCBuiltin<"__builtin_ia32_blendvpd">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
-                  [IntrNoMem]>;
-  def int_x86_sse41_blendvps         : GCCBuiltin<"__builtin_ia32_blendvps">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty],
-                  [IntrNoMem]>;
-}
-
-// Vector dot product
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_dppd            : GCCBuiltin<"__builtin_ia32_dppd">,
-          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
-                    [IntrNoMem, Commutative]>;
-  def int_x86_sse41_dpps            : GCCBuiltin<"__builtin_ia32_dpps">,
-          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
-                    [IntrNoMem, Commutative]>;
-}
-
-// Vector sum of absolute differences
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
-                    [IntrNoMem, Commutative]>;
-}
-
-// Cacheability support ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_movntdqa        : GCCBuiltin<"__builtin_ia32_movntdqa">,
-          Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
-}
-
-// Test instruction with bitwise comparison.
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_ptestz          : GCCBuiltin<"__builtin_ia32_ptestz128">,
-          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                    [IntrNoMem]>;
-  def int_x86_sse41_ptestc          : GCCBuiltin<"__builtin_ia32_ptestc128">,
-          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                    [IntrNoMem]>;
-  def int_x86_sse41_ptestnzc        : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
-          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                    [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE4.2
-
-// Miscellaneous
-// CRC Instruction
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
-  def int_x86_sse42_crc32_32_8       : GCCBuiltin<"__builtin_ia32_crc32qi">,
-          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_sse42_crc32_32_16      : GCCBuiltin<"__builtin_ia32_crc32hi">,
-          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty],
-                    [IntrNoMem]>;
-  def int_x86_sse42_crc32_32_32      : GCCBuiltin<"__builtin_ia32_crc32si">,
-          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-                    [IntrNoMem]>;
-  def int_x86_sse42_crc32_64_8       :
-          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_sse42_crc32_64_64      : GCCBuiltin<"__builtin_ia32_crc32di">,
-          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
-                    [IntrNoMem]>;
-}
-
-// String/text processing ops.
-let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
-  def int_x86_sse42_pcmpistrm128  : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
-    Intrinsic<[llvm_v16i8_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpistri128  : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestrm128  : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
-    Intrinsic<[llvm_v16i8_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestri128  : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-  def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
-    Intrinsic<[llvm_i32_ty],
-        [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
-         llvm_i8_ty],
-        [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE4A
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
-    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty],
-              [IntrNoMem]>;
-  def int_x86_sse4a_extrq  : GCCBuiltin<"__builtin_ia32_extrq">,
-    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>;
-
-  def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
-    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                                llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse4a_insertq  : GCCBuiltin<"__builtin_ia32_insertq">,
-    Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
-
-  def int_x86_sse4a_movnt_ss : GCCBuiltin<"__builtin_ia32_movntss">,
-    Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty], []>;
-  def int_x86_sse4a_movnt_sd : GCCBuiltin<"__builtin_ia32_movntsd">,
-    Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty], []>;
-}
-
-//===----------------------------------------------------------------------===//
-// AVX
-
-// Arithmetic ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-
-  def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-
-  def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-
-  def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-
-  def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Horizontal ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-}
-
-// Vector permutation
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                  llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                  llvm_v4i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx_vpermilvar_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>;
-  def int_x86_avx_vpermilvar_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx_vperm2f128_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vperm2f128_pd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vperm2f128_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vperm2f128_si_256 :
-        GCCBuiltin<"__builtin_ia32_vperm2f128_si256">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                  llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
-// Vector blend
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
-}
-
-// Vector dot product
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Vector compare
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_cmp_pd_256 : GCCBuiltin<"__builtin_ia32_cmppd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_cmp_ps_256 : GCCBuiltin<"__builtin_ia32_cmpps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
-// Vector extract and insert
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vextractf128_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vextractf128_pd256">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vextractf128_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vextractf128_ps256">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vextractf128_si_256 :
-        GCCBuiltin<"__builtin_ia32_vextractf128_si256">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx_vinsertf128_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vinsertf128_pd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
-                  llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vinsertf128_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vinsertf128_ps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vinsertf128_si_256 :
-        GCCBuiltin<"__builtin_ia32_vinsertf128_si256">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                  llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
-// Vector convert
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-}
-
-// Vector bit test
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">,
-        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                  llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">,
-        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                  llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">,
-        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
-                  llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                  llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                  llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
-                  llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
-                  llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty], [IntrNoMem]>;
-  def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
-                  llvm_v4i64_ty], [IntrNoMem]>;
-  def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
-                  llvm_v4i64_ty], [IntrNoMem]>;
-  def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
-                  llvm_v4i64_ty], [IntrNoMem]>;
-}
-
-// Vector extract sign mask
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">,
-        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-}
-
-// Vector zero
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">,
-        Intrinsic<[], [], []>;
-  def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">,
-        Intrinsic<[], [], []>;
-}
-
-// Vector load with broadcast
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vbroadcast_ss :
-        GCCBuiltin<"__builtin_ia32_vbroadcastss">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx_vbroadcast_sd_256 :
-        GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx_vbroadcast_ss_256 :
-        GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx_vbroadcastf128_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx_vbroadcastf128_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-}
-
-// SIMD load ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
-}
-
-// SIMD store ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_storeu_pd_256 : GCCBuiltin<"__builtin_ia32_storeupd256">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
-  def int_x86_avx_storeu_ps_256 : GCCBuiltin<"__builtin_ia32_storeups256">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
-  def int_x86_avx_storeu_dq_256 : GCCBuiltin<"__builtin_ia32_storedqu256">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], [IntrReadWriteArgMem]>;
-}
-
-// Conditional load ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
-                  [IntrReadArgMem]>;
-}
-
-// Conditional store ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
-        Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
-  def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
-        Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
-  def int_x86_avx_maskstore_pd_256 :
-        GCCBuiltin<"__builtin_ia32_maskstorepd256">,
-        Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
-  def int_x86_avx_maskstore_ps_256 :
-        GCCBuiltin<"__builtin_ia32_maskstoreps256">,
-        Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// AVX2
-
-// Integer arithmetic ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-}
-
-// Vector min, max
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
-}
-
-// Integer shift ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Pack ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-}
-
-// Absolute value ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
-}
-
-// Horizontal arithmetic ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-}
-
-// Sign ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty], [IntrNoMem]>;
-}
-
-// Packed multiply high with round and scale
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
-}
-
-// Vector sign and zero extend
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-}
-
-// Vector blend
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Vector load with broadcast
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_vbroadcast_ss_ps :
-              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx2_vbroadcast_sd_pd_256 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
-              Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx2_vbroadcast_ss_ps_256 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx2_vbroadcasti128 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx2_pbroadcastb_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastb_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastw_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastw_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastd_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastd_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastq_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastq_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-}
-
-// Vector permutation
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
-// Vector extract and insert
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
-// Conditional load ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
-                  [IntrReadArgMem]>;
-  def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
-                  [IntrReadArgMem]>;
-}
-
-// Conditional store ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty],
-                  [IntrReadWriteArgMem]>;
-  def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty],
-                  [IntrReadWriteArgMem]>;
-  def int_x86_avx2_maskstore_d_256 :
-        GCCBuiltin<"__builtin_ia32_maskstored256">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty],
-                  [IntrReadWriteArgMem]>;
-  def int_x86_avx2_maskstore_q_256 :
-        GCCBuiltin<"__builtin_ia32_maskstoreq256">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
-                  [IntrReadWriteArgMem]>;
-}
-
-// Variable bit shift ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
-                        [IntrNoMem]>;
-}
-
-// Gather ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
-      Intrinsic<[llvm_v2f64_ty],
-        [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
-      Intrinsic<[llvm_v4f64_ty],
-        [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
-      Intrinsic<[llvm_v2f64_ty],
-        [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
-      Intrinsic<[llvm_v4f64_ty],
-        [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
-      Intrinsic<[llvm_v4f32_ty],
-        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
-      Intrinsic<[llvm_v8f32_ty],
-        [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
-      Intrinsic<[llvm_v4f32_ty],
-        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
-      Intrinsic<[llvm_v4f32_ty],
-        [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-
-  def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
-      Intrinsic<[llvm_v2i64_ty],
-        [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
-      Intrinsic<[llvm_v4i64_ty],
-        [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
-      Intrinsic<[llvm_v2i64_ty],
-        [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
-      Intrinsic<[llvm_v4i64_ty],
-        [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
-      Intrinsic<[llvm_v4i32_ty],
-        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
-      Intrinsic<[llvm_v8i32_ty],
-        [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
-      Intrinsic<[llvm_v4i32_ty],
-        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-  def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
-      Intrinsic<[llvm_v4i32_ty],
-        [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
-        [IntrReadMem]>;
-}
-
-// Misc.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
-              Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
-                         llvm_v32i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_i32_ty], [IntrNoMem, Commutative]>;
-  def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// FMA3 and FMA4
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_fma_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_ps_256 :
-               GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmaddsub_pd_256 :
-              GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_ps_256 :
-              GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_vfmsubadd_pd_256 :
-              GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
-                        [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// XOP
-
-  def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                                          llvm_v2f64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_xop_vpermil2pd_256 :
-              GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
-              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                                          llvm_v4f64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                                          llvm_v4f32_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpermil2ps_256 :
-              GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                                          llvm_v8f32_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">,
-              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
-  def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
-
-  def int_x86_xop_vpcmov :
-              GCCBuiltin<"__builtin_ia32_vpcmov">,
-              Intrinsic<[llvm_v2i64_ty],
-                        [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpcmov_256 :
-              GCCBuiltin<"__builtin_ia32_vpcmov_256">,
-              Intrinsic<[llvm_v4i64_ty],
-                        [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_xop_vphaddbd :
-              GCCBuiltin<"__builtin_ia32_vphaddbd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddbq :
-              GCCBuiltin<"__builtin_ia32_vphaddbq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddbw :
-              GCCBuiltin<"__builtin_ia32_vphaddbw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphadddq :
-              GCCBuiltin<"__builtin_ia32_vphadddq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddubd :
-              GCCBuiltin<"__builtin_ia32_vphaddubd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddubq :
-              GCCBuiltin<"__builtin_ia32_vphaddubq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddubw :
-              GCCBuiltin<"__builtin_ia32_vphaddubw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddudq :
-              GCCBuiltin<"__builtin_ia32_vphaddudq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_xop_vphadduwd :
-              GCCBuiltin<"__builtin_ia32_vphadduwd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_xop_vphadduwq :
-              GCCBuiltin<"__builtin_ia32_vphadduwq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddwd :
-              GCCBuiltin<"__builtin_ia32_vphaddwd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_xop_vphaddwq :
-              GCCBuiltin<"__builtin_ia32_vphaddwq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_xop_vphsubbw :
-              GCCBuiltin<"__builtin_ia32_vphsubbw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_xop_vphsubdq :
-              GCCBuiltin<"__builtin_ia32_vphsubdq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_xop_vphsubwd :
-              GCCBuiltin<"__builtin_ia32_vphsubwd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_xop_vpmacsdd :
-              GCCBuiltin<"__builtin_ia32_vpmacsdd">,
-              Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacsdqh :
-              GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
-              Intrinsic<[llvm_v2i64_ty],
-                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacsdql :
-              GCCBuiltin<"__builtin_ia32_vpmacsdql">,
-              Intrinsic<[llvm_v2i64_ty],
-                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacssdd :
-              GCCBuiltin<"__builtin_ia32_vpmacssdd">,
-              Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacssdqh :
-              GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
-              Intrinsic<[llvm_v2i64_ty],
-                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacssdql :
-              GCCBuiltin<"__builtin_ia32_vpmacssdql">,
-              Intrinsic<[llvm_v2i64_ty],
-                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacsswd :
-              GCCBuiltin<"__builtin_ia32_vpmacsswd">,
-              Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacssww :
-              GCCBuiltin<"__builtin_ia32_vpmacssww">,
-              Intrinsic<[llvm_v8i16_ty],
-                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacswd :
-              GCCBuiltin<"__builtin_ia32_vpmacswd">,
-              Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmacsww :
-              GCCBuiltin<"__builtin_ia32_vpmacsww">,
-              Intrinsic<[llvm_v8i16_ty],
-                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmadcsswd :
-              GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
-              Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpmadcswd :
-              GCCBuiltin<"__builtin_ia32_vpmadcswd">,
-              Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpperm :
-              GCCBuiltin<"__builtin_ia32_vpperm">,
-              Intrinsic<[llvm_v16i8_ty],
-                        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_xop_vprotb : GCCBuiltin<"__builtin_ia32_vprotb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotd : GCCBuiltin<"__builtin_ia32_vprotd">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotq : GCCBuiltin<"__builtin_ia32_vprotq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotw : GCCBuiltin<"__builtin_ia32_vprotw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotbi : GCCBuiltin<"__builtin_ia32_vprotbi">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotdi : GCCBuiltin<"__builtin_ia32_vprotdi">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotqi : GCCBuiltin<"__builtin_ia32_vprotqi">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vprotwi : GCCBuiltin<"__builtin_ia32_vprotwi">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_xop_vpshab :
-              GCCBuiltin<"__builtin_ia32_vpshab">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshad :
-              GCCBuiltin<"__builtin_ia32_vpshad">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshaq :
-              GCCBuiltin<"__builtin_ia32_vpshaq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshaw :
-              GCCBuiltin<"__builtin_ia32_vpshaw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshlb :
-              GCCBuiltin<"__builtin_ia32_vpshlb">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshld :
-              GCCBuiltin<"__builtin_ia32_vpshld">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshlq :
-              GCCBuiltin<"__builtin_ia32_vpshlq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
-  def int_x86_xop_vpshlw :
-              GCCBuiltin<"__builtin_ia32_vpshlw">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
-                        [IntrNoMem]>;
-
-//===----------------------------------------------------------------------===//
-// MMX
-
-// Empty MMX state op.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_mmx_emms  : GCCBuiltin<"__builtin_ia32_emms">,
-              Intrinsic<[], [], []>;
-  def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">,
-              Intrinsic<[], [], []>;
-}
-
-// Integer arithmetic ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  // Addition
-  def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  // Subtraction
-  def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-
-  // Multiplication
-  def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  // Bitwise operations
-  def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-
-  // Averages
-  def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  // Maximum
-  def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  // Minimum
-  def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  // Packed sum of absolute differences
-  def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-}
-
-// Integer shift ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  // Shift left logical
-  def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-}
-
-// Pack ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-}
-
-// Unpacking ops.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-  def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
-}
-
-// Integer comparison ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-  def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
-
-  def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-  def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
-}
-
-// Misc.
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
-              Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
-
-  def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
-              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
-              Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
-
-  def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                        llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
-              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
-              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// BMI
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">,
-              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">,
-              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">,
-              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">,
-              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">,
-              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// FS/GS Base
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">,
-              Intrinsic<[llvm_i32_ty], []>;
-  def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">,
-              Intrinsic<[llvm_i32_ty], []>;
-  def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">,
-              Intrinsic<[llvm_i64_ty], []>;
-  def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">,
-              Intrinsic<[llvm_i64_ty], []>;
-  def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">,
-              Intrinsic<[], [llvm_i32_ty]>;
-  def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">,
-              Intrinsic<[], [llvm_i32_ty]>;
-  def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">,
-              Intrinsic<[], [llvm_i64_ty]>;
-  def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">,
-              Intrinsic<[], [llvm_i64_ty]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Half float conversion
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-}
-
-//===----------------------------------------------------------------------===//
-// RDRAND intrinsics. Return a random value and whether it is valid.
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  // These are declared side-effecting so they don't get eliminated by CSE or
-  // LICM.
-  def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>;
-  def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
-  def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
-}
-
-//===----------------------------------------------------------------------===//
-// RTM intrinsics. Transactional Memory support.
-
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">,
-              Intrinsic<[llvm_i32_ty], [], []>;
-  def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
-              Intrinsic<[], [], []>;
-  def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
-              Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>;
-}
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
deleted file mode 100644
index 5903e2e55e1f..000000000000
--- a/include/llvm/LLVMContext.h
+++ /dev/null
@@ -1,113 +0,0 @@
-//===-- llvm/LLVMContext.h - Class for managing "global" state --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares LLVMContext, a container of "global" state in LLVM, such
-// as the global type and constant uniquing tables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LLVMCONTEXT_H
-#define LLVM_LLVMCONTEXT_H
-
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class LLVMContextImpl;
-class StringRef;
-class Twine;
-class Instruction;
-class Module;
-class SMDiagnostic;
-template <typename T> class SmallVectorImpl;
-
-/// This is an important class for using LLVM in a threaded context.  It
-/// (opaquely) owns and manages the core "global" data of LLVM's core 
-/// infrastructure, including the type and constant uniquing tables.
-/// LLVMContext itself provides no locking guarantees, so you should be careful
-/// to have one context per thread.
-class LLVMContext {
-public:
-  LLVMContextImpl *const pImpl;
-  LLVMContext();
-  ~LLVMContext();
-  
-  // Pinned metadata names, which always have the same value.  This is a
-  // compile-time performance optimization, not a correctness optimization.
-  enum {
-    MD_dbg = 0,  // "dbg"
-    MD_tbaa = 1, // "tbaa"
-    MD_prof = 2,  // "prof"
-    MD_fpmath = 3,  // "fpmath"
-    MD_range = 4, // "range"
-    MD_tbaa_struct = 5 // "tbaa.struct"
-  };
-  
-  /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
-  /// This ID is uniqued across modules in the current LLVMContext.
-  unsigned getMDKindID(StringRef Name) const;
-  
-  /// getMDKindNames - Populate client supplied SmallVector with the name for
-  /// custom metadata IDs registered in this LLVMContext.
-  void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
-  
-  
-  typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
-                                         unsigned LocCookie);
-  
-  /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked
-  /// when problems with inline asm are detected by the backend.  The first
-  /// argument is a function pointer and the second is a context pointer that
-  /// gets passed into the DiagHandler.
-  ///
-  /// LLVMContext doesn't take ownership or interpret either of these
-  /// pointers.
-  void setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
-                                     void *DiagContext = 0);
-
-  /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
-  /// setInlineAsmDiagnosticHandler.
-  InlineAsmDiagHandlerTy getInlineAsmDiagnosticHandler() const;
-
-  /// getInlineAsmDiagnosticContext - Return the diagnostic context set by
-  /// setInlineAsmDiagnosticHandler.
-  void *getInlineAsmDiagnosticContext() const;
-  
-  
-  /// emitError - Emit an error message to the currently installed error handler
-  /// with optional location information.  This function returns, so code should
-  /// be prepared to drop the erroneous construct on the floor and "not crash".
-  /// The generated code need not be correct.  The error message will be
-  /// implicitly prefixed with "error: " and should not end with a ".".
-  void emitError(unsigned LocCookie, const Twine &ErrorStr);
-  void emitError(const Instruction *I, const Twine &ErrorStr);
-  void emitError(const Twine &ErrorStr);
-
-private:
-  LLVMContext(LLVMContext&) LLVM_DELETED_FUNCTION;
-  void operator=(LLVMContext&) LLVM_DELETED_FUNCTION;
-
-  /// addModule - Register a module as being instantiated in this context.  If
-  /// the context is deleted, the module will be deleted as well.
-  void addModule(Module*);
-  
-  /// removeModule - Unregister a module from this context.
-  void removeModule(Module*);
-  
-  // Module needs access to the add/removeModule methods.
-  friend class Module;
-};
-
-/// getGlobalContext - Returns a global context.  This is for LLVM clients that
-/// only care about operating on a single thread.
-extern LLVMContext &getGlobalContext();
-
-}
-
-#endif
diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h
new file mode 100644
index 000000000000..4c1aaca7a385
--- /dev/null
+++ b/include/llvm/LinkAllIR.h
@@ -0,0 +1,53 @@
+//===----- LinkAllIR.h - Reference All VMCore Code --------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file pulls in all the object modules of the VMCore library so
+// that tools like llc, opt, and lli can ensure they are linked with all symbols
+// from libVMCore.a It should only be used from a tool's main program.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LINKALLIR_H
+#define LLVM_LINKALLIR_H
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/TimeValue.h"
+#include <cstdlib>
+
+namespace {
+  struct ForceVMCoreLinking {
+    ForceVMCoreLinking() {
+      // We must reference VMCore in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+      (void)new llvm::Module("", llvm::getGlobalContext());
+      (void)new llvm::UnreachableInst(llvm::getGlobalContext());
+      (void)    llvm::createVerifierPass(); 
+    }
+  } ForceVMCoreLinking;
+}
+
+#endif
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 806e4b37b73d..1f017e471de5 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -16,23 +16,25 @@
 #define LLVM_LINKALLPASSES_H
 
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/CallPrinter.h"
 #include "llvm/Analysis/DomPrinter.h"
 #include "llvm/Analysis/FindUsedTypes.h"
 #include "llvm/Analysis/IntervalPartition.h"
+#include "llvm/Analysis/Lint.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Lint.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
-#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Vectorize.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Vectorize.h"
 #include <cstdlib>
 
 namespace {
@@ -57,6 +59,8 @@ namespace {
       (void) llvm::createBlockPlacementPass();
       (void) llvm::createBoundsCheckingPass();
       (void) llvm::createBreakCriticalEdgesPass();
+      (void) llvm::createCallGraphPrinterPass();
+      (void) llvm::createCallGraphViewerPass();
       (void) llvm::createCFGSimplificationPass();
       (void) llvm::createConstantMergePass();
       (void) llvm::createConstantPropagationPass();
@@ -147,7 +151,7 @@ namespace {
       (void) llvm::createMergeFunctionsPass();
       (void) llvm::createPrintModulePass(0);
       (void) llvm::createPrintFunctionPass("", 0);
-      (void) llvm::createDbgInfoPrinterPass();
+      (void) llvm::createPrintBasicBlockPass(0);
       (void) llvm::createModuleDebugInfoPrinterPass();
       (void) llvm::createPartialInliningPass();
       (void) llvm::createLintPass();
diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h
deleted file mode 100644
index 83684c0fb65d..000000000000
--- a/include/llvm/LinkAllVMCore.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- LinkAllVMCore.h - Reference All VMCore Code --------------*- C++ -*-===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header file pulls in all the object modules of the VMCore library so
-// that tools like llc, opt, and lli can ensure they are linked with all symbols
-// from libVMCore.a It should only be used from a tool's main program.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LINKALLVMCORE_H
-#define LLVM_LINKALLVMCORE_H
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/Signals.h"
-#include "llvm/Support/TimeValue.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/MathExtras.h"
-#include <cstdlib>
-
-namespace {
-  struct ForceVMCoreLinking {
-    ForceVMCoreLinking() {
-      // We must reference VMCore in such a way that compilers will not
-      // delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough
-      // to know that getenv() never returns -1, this will do the job.
-      if (std::getenv("bar") != (char*) -1)
-        return;
-      (void)new llvm::Module("", llvm::getGlobalContext());
-      (void)new llvm::UnreachableInst(llvm::getGlobalContext());
-      (void)    llvm::createVerifierPass(); 
-    }
-  } ForceVMCoreLinking;
-}
-
-#endif
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index 1ebcd6b53863..679638427d67 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -6,10 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines the interface to the module/file/archive linker.
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LINKER_H
 #define LLVM_LINKER_H
@@ -19,7 +15,6 @@
 #include <vector>
 
 namespace llvm {
-  namespace sys { class Path; }
 
 class Module;
 class LLVMContext;
@@ -31,26 +26,17 @@ class StringRef;
 /// In this case the Linker still retains ownership of the Module. If the
 /// releaseModule() method is used, the ownership of the Module is transferred
 /// to the caller and the Linker object is only suitable for destruction.
-/// The Linker can link Modules from memory, bitcode files, or bitcode
-/// archives.  It retains a set of search paths in which to find any libraries
-/// presented to it. By default, the linker will generate error and warning
-/// messages to stderr but this capability can be turned off with the
-/// QuietWarnings and QuietErrors flags. It can also be instructed to verbosely
-/// print out the linking actions it is taking with the Verbose flag.
+/// The Linker can link Modules from memory. By default, the linker
+/// will generate error and warning messages to stderr but this capability can
+/// be turned off with the QuietWarnings and QuietErrors flags. It can also be
+/// instructed to verbosely print out the linking actions it is taking with
+/// the Verbose flag.
 /// @brief The LLVM Linker.
 class Linker {
 
   /// @name Types
   /// @{
   public:
-    /// This type is used to pass the linkage items (libraries and files) to
-    /// the LinkItems function. It is composed of string/bool pairs. The string
-    /// provides the name of the file or library (as with the -l option). The
-    /// bool should be true for libraries and false for files, signifying
-    /// "isLibrary".
-    /// @brief A list of linkage items
-    typedef std::vector<std::pair<std::string,bool> > ItemList;
-
     /// This enumeration is used to control various optional features of the
     /// linker.
     enum ControlFlags {
@@ -58,12 +44,12 @@ class Linker {
       QuietWarnings = 2, ///< Don't print warnings to stderr.
       QuietErrors   = 4  ///< Don't print errors to stderr.
     };
-  
+
     enum LinkerMode {
       DestroySource = 0, // Allow source module to be destroyed.
       PreserveSource = 1 // Preserve the source module.
     };
-  
+
   /// @}
   /// @name Constructors
   /// @{
@@ -104,16 +90,10 @@ class Linker {
     /// must arrange for its destruct. After this method is called, the Linker
     /// terminates the linking session for the returned Module. It will no
     /// longer utilize the returned Module but instead resets itself for
-    /// subsequent linking as if the constructor had been called. The Linker's
-    /// LibPaths and flags to be reset, and memory will be released.
+    /// subsequent linking as if the constructor had been called.
     /// @brief Release the linked/composite module.
     Module* releaseModule();
 
-    /// This method gets the list of libraries that form the path that the
-    /// Linker will search when it is presented with a library name.
-    /// @brief Get the Linkers library path
-    const std::vector<sys::Path>& getLibPaths() const { return LibPaths; }
-
     /// This method returns an error string suitable for printing to the user.
     /// The return value will be empty unless an error occurred in one of the
     /// LinkIn* methods. In those cases, the LinkIn* methods will have returned
@@ -128,130 +108,16 @@ class Linker {
   /// @name Mutators
   /// @{
   public:
-    /// Add a path to the list of paths that the Linker will search. The Linker
-    /// accumulates the set of libraries added
-    /// library paths for the target platform. The standard libraries will
-    /// always be searched last. The added libraries will be searched in the
-    /// order added.
-    /// @brief Add a path.
-    void addPath(const sys::Path& path);
-
-    /// Add a set of paths to the list of paths that the linker will search. The
-    /// Linker accumulates the set of libraries added. The \p paths will be
-    /// added to the end of the Linker's list. Order will be retained.
-    /// @brief Add a set of paths.
-    void addPaths(const std::vector<std::string>& paths);
-
-    /// This method augments the Linker's list of library paths with the system
-    /// paths of the host operating system, include LLVM_LIB_SEARCH_PATH.
-    /// @brief Add the system paths.
-    void addSystemPaths();
-
-    /// Control optional linker behavior by setting a group of flags. The flags
-    /// are defined in the ControlFlags enumeration.
-    /// @see ControlFlags
-    /// @brief Set control flags.
-    void setFlags(unsigned flags) { Flags = flags; }
-
-    /// This method is the main interface to the linker. It can be used to
-    /// link a set of linkage items into a module. A linkage item is either a
-    /// file name with fully qualified path, or a library for which the Linker's
-    /// LibraryPath will be utilized to locate the library. The bool value in
-    /// the LinkItemKind should be set to true for libraries.  This function
-    /// allows linking to preserve the order of specification associated with
-    /// the command line, or for other purposes. Each item will be linked in
-    /// turn as it occurs in \p Items.
-    /// @returns true if an error occurred, false otherwise
-    /// @see LinkItemKind
-    /// @see getLastError
-    bool LinkInItems (
-      const ItemList& Items, ///< Set of libraries/files to link in
-      ItemList& NativeItems  ///< Output list of native files/libs
-    );
-
-    /// This function links the bitcode \p Files into the composite module.
-    /// Note that this does not do any linking of unresolved symbols. The \p
-    /// Files are all completely linked into \p HeadModule regardless of
-    /// unresolved symbols. This function just loads each bitcode file and
-    /// calls LinkInModule on them.
-    /// @returns true if an error occurs, false otherwise
-    /// @see getLastError
-    /// @brief Link in multiple files.
-    bool LinkInFiles (
-      const std::vector<sys::Path> & Files ///< Files to link in
-    );
-
-    /// This function links a single bitcode file, \p File, into the composite
-    /// module. Note that this does not attempt to resolve symbols. This method
-    /// just loads the bitcode file and calls LinkInModule on it. If an error
-    /// occurs, the Linker's error string is set.
-    /// @returns true if an error occurs, false otherwise
-    /// @see getLastError
-    /// @brief Link in a single file.
-    bool LinkInFile(
-      const sys::Path& File, ///< File to link in.
-      bool &is_native        ///< Indicates if the file is native object file
-    );
-
-    /// This function provides a way to selectively link in a set of modules,
-    /// found in libraries, based on the unresolved symbols in the composite
-    /// module. Each item in \p Libraries should be the base name of a library,
-    /// as if given with the -l option of a linker tool.  The Linker's LibPaths
-    /// are searched for the \p Libraries and any found will be linked in with
-    /// LinkInArchive.  If an error occurs, the Linker's error string is set.
-    /// @see LinkInArchive
-    /// @see getLastError
-    /// @returns true if an error occurs, false otherwise
-    /// @brief Link libraries into the module
-    bool LinkInLibraries (
-      const std::vector<std::string> & Libraries ///< Libraries to link in
-    );
-
-    /// This function provides a way to selectively link in a set of modules,
-    /// found in one library, based on the unresolved symbols in the composite
-    /// module.The \p Library should be the base name of a library, as if given
-    /// with the -l option of a linker tool. The Linker's LibPaths are searched
-    /// for the \p Library and if found, it will be linked in with via the
-    /// LinkInArchive method. If an error occurs, the Linker's error string is
-    /// set.
-    /// @see LinkInArchive
-    /// @see getLastError
-    /// @returns true if an error occurs, false otherwise
-    /// @brief Link one library into the module
-    bool LinkInLibrary (
-      StringRef Library, ///< The library to link in
-      bool& is_native    ///< Indicates if lib a native library
-    );
-
-    /// This function links one bitcode archive, \p Filename, into the module.
-    /// The archive is searched to resolve outstanding symbols. Any modules in
-    /// the archive that resolve outstanding symbols will be linked in. The
-    /// library is searched repeatedly until no more modules that resolve
-    /// symbols can be found. If an error occurs, the error string is  set.
-    /// To speed up this function, ensure the archive has been processed
-    /// llvm-ranlib or the S option was given to llvm-ar when the archive was
-    /// created. These tools add a symbol table to the archive which makes the
-    /// search for undefined symbols much faster.
-    /// @see getLastError
-    /// @returns true if an error occurs, otherwise false.
-    /// @brief Link in one archive.
-    bool LinkInArchive(
-      const sys::Path& Filename, ///< Filename of the archive to link
-      bool& is_native            ///<  Indicates if archive is a native archive
-    );
-
     /// This method links the \p Src module into the Linker's Composite module
-    /// by calling LinkModules.  All the other LinkIn* methods eventually
-    /// result in calling this method to link a Module into the Linker's
-    /// composite.
+    /// by calling LinkModules.
     /// @see LinkModules
     /// @returns True if an error occurs, false otherwise.
     /// @brief Link in a module.
     bool LinkInModule(
       Module* Src,              ///< Module linked into \p Dest
       std::string* ErrorMsg = 0 /// Error/diagnostic string
-    ) { 
-      return LinkModules(Composite, Src, Linker::DestroySource, ErrorMsg ); 
+    ) {
+      return LinkModules(Composite, Src, Linker::DestroySource, ErrorMsg);
     }
 
     /// This is the heart of the linker. This method will take unconditional
@@ -268,21 +134,10 @@ class Linker {
     static bool LinkModules(Module* Dest, Module* Src, unsigned Mode,
                             std::string* ErrorMsg);
 
-    /// This function looks through the Linker's LibPaths to find a library with
-    /// the name \p Filename. If the library cannot be found, the returned path
-    /// will be empty (i.e. sys::Path::isEmpty() will return true).
-    /// @returns A sys::Path to the found library
-    /// @brief Find a library from its short name.
-    sys::Path FindLib(StringRef Filename);
-
   /// @}
   /// @name Implementation
   /// @{
   private:
-    /// Read in and parse the bitcode file named by FN and return the
-    /// Module it contains (wrapped in an auto_ptr), or 0 if an error occurs.
-    std::auto_ptr<Module> LoadObject(const sys::Path& FN);
-
     bool warning(StringRef message);
     bool error(StringRef message);
     void verbose(StringRef message);
@@ -293,7 +148,6 @@ class Linker {
   private:
     LLVMContext& Context; ///< The context for global information
     Module* Composite; ///< The composite module linked together
-    std::vector<sys::Path> LibPaths; ///< The library search paths
     unsigned Flags;    ///< Flags to control optional behavior.
     std::string Error; ///< Text of error that occurred.
     std::string ProgramName; ///< Name of the program being linked
diff --git a/include/llvm/MC/EDInstInfo.h b/include/llvm/MC/EDInstInfo.h
deleted file mode 100644
index 5b024675cdc8..000000000000
--- a/include/llvm/MC/EDInstInfo.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- llvm/MC/EDInstInfo.h - EDis instruction info ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#ifndef EDINSTINFO_H
-#define EDINSTINFO_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-#define EDIS_MAX_OPERANDS 13
-#define EDIS_MAX_SYNTAXES 2
-
-struct EDInstInfo {
-  uint8_t       instructionType;
-  uint8_t       numOperands;
-  uint8_t       operandTypes[EDIS_MAX_OPERANDS];
-  uint8_t       operandFlags[EDIS_MAX_OPERANDS];
-  const signed char operandOrders[EDIS_MAX_SYNTAXES][EDIS_MAX_OPERANDS];
-};
-
-} // namespace llvm
-
-#endif
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 72ed1a317c55..9a6b70340808 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -22,7 +22,7 @@ class MCELFObjectTargetWriter;
 struct MCFixupKindInfo;
 class MCFragment;
 class MCInst;
-class MCInstFragment;
+class MCRelaxableFragment;
 class MCObjectWriter;
 class MCSection;
 class MCValue;
@@ -41,6 +41,9 @@ protected: // Can only create subclasses.
 public:
   virtual ~MCAsmBackend();
 
+  /// lifetime management
+  virtual void reset() { }
+
   /// createObjectWriter - Create a new MCObjectWriter instance for use by the
   /// assembler backend to emit the final object file.
   virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
@@ -127,7 +130,7 @@ public:
   /// fixup requires the associated instruction to be relaxed.
   virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
                                     uint64_t Value,
-                                    const MCInstFragment *DF,
+                                    const MCRelaxableFragment *DF,
                                     const MCAsmLayout &Layout) const = 0;
 
   /// RelaxInstruction - Relax the instruction in the given fragment to the next
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 97aad71fd955..28256b3677ef 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -13,11 +13,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_ASM_INFO_H
-#define LLVM_TARGET_ASM_INFO_H
+#ifndef LLVM_MC_MCASMINFO_H
+#define LLVM_MC_MCASMINFO_H
 
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MachineLocation.h"
 #include <cassert>
 #include <vector>
 
@@ -48,6 +48,11 @@ namespace llvm {
     ///               Default is 4.
     unsigned PointerSize;
 
+    /// CalleeSaveStackSlotSize - Size of the stack slot reserved for
+    ///                           callee-saved registers, in bytes.
+    ///                           Default is same as pointer size.
+    unsigned CalleeSaveStackSlotSize;
+
     /// IsLittleEndian - True if target is little endian.
     ///                  Default is true.
     bool IsLittleEndian;
@@ -102,6 +107,9 @@ namespace llvm {
     /// LabelSuffix - This is appended to emitted labels.
     const char *LabelSuffix;                 // Defaults to ":"
 
+    /// LabelSuffix - This is appended to emitted labels.
+    const char *DebugLabelSuffix;                 // Defaults to ":"
+
     /// GlobalPrefix - If this is set to a non-empty string, it is prepended
     /// onto all global symbols.  This is often used for "_" or ".".
     const char *GlobalPrefix;                // Defaults to ""
@@ -340,7 +348,13 @@ namespace llvm {
       return PointerSize;
     }
 
-    /// islittleendian - True if the target is little endian.
+    /// getCalleeSaveStackSlotSize - Get the callee-saved register stack slot
+    /// size in bytes.
+    unsigned getCalleeSaveStackSlotSize() const {
+      return CalleeSaveStackSlotSize;
+    }
+
+    /// isLittleEndian - True if the target is little endian.
     bool isLittleEndian() const {
       return IsLittleEndian;
     }
@@ -426,6 +440,11 @@ namespace llvm {
     const char *getLabelSuffix() const {
       return LabelSuffix;
     }
+
+    const char *getDebugLabelSuffix() const {
+      return DebugLabelSuffix;
+    }
+
     const char *getGlobalPrefix() const {
       return GlobalPrefix;
     }
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
index 0ff3e127ed0e..7286151760c0 100644
--- a/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_COFF_TARGET_ASM_INFO_H
-#define LLVM_COFF_TARGET_ASM_INFO_H
+#ifndef LLVM_MC_MCASMINFOCOFF_H
+#define LLVM_MC_MCASMINFOCOFF_H
 
 #include "llvm/MC/MCAsmInfo.h"
 
@@ -33,4 +33,4 @@ namespace llvm {
 }
 
 
-#endif // LLVM_COFF_TARGET_ASM_INFO_H
+#endif // LLVM_MC_MCASMINFOCOFF_H
diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h
index af552de6e690..3d249f93068d 100644
--- a/include/llvm/MC/MCAsmInfoDarwin.h
+++ b/include/llvm/MC/MCAsmInfoDarwin.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DARWIN_TARGET_ASM_INFO_H
-#define LLVM_DARWIN_TARGET_ASM_INFO_H
+#ifndef LLVM_MC_MCASMINFODARWIN_H
+#define LLVM_MC_MCASMINFODARWIN_H
 
 #include "llvm/MC/MCAsmInfo.h"
 
@@ -26,4 +26,4 @@ namespace llvm {
 }
 
 
-#endif // LLVM_DARWIN_TARGET_ASM_INFO_H
+#endif // LLVM_MC_MCASMINFODARWIN_H
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index cf79216d076a..3058b7b48742 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -21,10 +21,10 @@ class MCSymbolData;
 
 /// Encapsulates the layout of an assembly file at a particular point in time.
 ///
-/// Assembly may requiring compute multiple layouts for a particular assembly
+/// Assembly may require computing multiple layouts for a particular assembly
 /// file as part of the relaxation process. This class encapsulates the layout
 /// at a single point in time in such a way that it is always possible to
-/// efficiently compute the exact addresses of any symbol in the assembly file,
+/// efficiently compute the exact address of any symbol in the assembly file,
 /// even during the relaxation process.
 class MCAsmLayout {
 public:
@@ -39,14 +39,20 @@ private:
 
   /// The last fragment which was laid out, or 0 if nothing has been laid
   /// out. Fragments are always laid out in order, so all fragments with a
-  /// lower ordinal will be up to date.
-  mutable DenseMap<const MCSectionData*, MCFragment *> LastValidFragment;
+  /// lower ordinal will be valid.
+  mutable DenseMap<const MCSectionData*, MCFragment*> LastValidFragment;
 
   /// \brief Make sure that the layout for the given fragment is valid, lazily
   /// computing it if necessary.
-  void EnsureValid(const MCFragment *F) const;
+  void ensureValid(const MCFragment *F) const;
 
-  bool isFragmentUpToDate(const MCFragment *F) const;
+  /// \brief Is the layout for this fragment valid?
+  bool isFragmentValid(const MCFragment *F) const;
+
+  /// \brief Compute the amount of padding required before this fragment to
+  /// obey bundling restrictions.
+  uint64_t computeBundlePadding(const MCFragment *F,
+                                uint64_t FOffset, uint64_t FSize);
 
 public:
   MCAsmLayout(MCAssembler &_Assembler);
@@ -54,14 +60,15 @@ public:
   /// Get the assembler object this is a layout for.
   MCAssembler &getAssembler() const { return Assembler; }
 
-  /// \brief Invalidate all following fragments because a fragment has been
-  /// resized. The fragments size should have already been updated.
-  void Invalidate(MCFragment *F);
+  /// \brief Invalidate the fragments starting with F because it has been
+  /// resized. The fragment's size should have already been updated, but
+  /// its bundle padding will be recomputed.
+  void invalidateFragmentsFrom(MCFragment *F);
 
   /// \brief Perform layout for a single fragment, assuming that the previous
   /// fragment has already been laid out correctly, and the parent section has
   /// been initialized.
-  void LayoutFragment(MCFragment *Fragment);
+  void layoutFragment(MCFragment *Fragment);
 
   /// @name Section Access (in layout order)
   /// @{
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 5771415c81cc..43fbdc9301ac 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -10,13 +10,13 @@
 #ifndef LLVM_MC_MCASSEMBLER_H
 #define LLVM_MC_MCASSEMBLER_H
 
-#include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCInst.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/DataTypes.h"
 #include <vector> // FIXME: Shouldn't be needed.
@@ -47,8 +47,9 @@ public:
   enum FragmentType {
     FT_Align,
     FT_Data,
+    FT_CompactEncodedInst,
     FT_Fill,
-    FT_Inst,
+    FT_Relaxable,
     FT_Org,
     FT_Dwarf,
     FT_DwarfFrame,
@@ -99,42 +100,139 @@ public:
   unsigned getLayoutOrder() const { return LayoutOrder; }
   void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
 
+  /// \brief Does this fragment have instructions emitted into it? By default
+  /// this is false, but specific fragment types may set it to true.
+  virtual bool hasInstructions() const { return false; }
+
+  /// \brief Should this fragment be placed at the end of an aligned bundle?
+  virtual bool alignToBundleEnd() const { return false; }
+  virtual void setAlignToBundleEnd(bool V) { }
+
+  /// \brief Get the padding size that must be inserted before this fragment.
+  /// Used for bundling. By default, no padding is inserted.
+  /// Note that padding size is restricted to 8 bits. This is an optimization
+  /// to reduce the amount of space used for each fragment. In practice, larger
+  /// padding should never be required.
+  virtual uint8_t getBundlePadding() const {
+    return 0;
+  }
+
+  /// \brief Set the padding size for this fragment. By default it's a no-op,
+  /// and only some fragments have a meaningful implementation.
+  virtual void setBundlePadding(uint8_t N) {
+  }
+
   void dump();
 };
 
-class MCDataFragment : public MCFragment {
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data.
+///
+class MCEncodedFragment : public MCFragment {
   virtual void anchor();
-  SmallString<32> Contents;
-
-  /// Fixups - The list of fixups in this fragment.
-  std::vector<MCFixup> Fixups;
 
+  uint8_t BundlePadding;
 public:
-  typedef std::vector<MCFixup>::const_iterator const_fixup_iterator;
-  typedef std::vector<MCFixup>::iterator fixup_iterator;
+  MCEncodedFragment(MCFragment::FragmentType FType, MCSectionData *SD = 0)
+    : MCFragment(FType, SD), BundlePadding(0)
+  {
+  }
+  virtual ~MCEncodedFragment();
+
+  virtual SmallVectorImpl<char> &getContents() = 0;
+  virtual const SmallVectorImpl<char> &getContents() const = 0;
+
+  virtual uint8_t getBundlePadding() const {
+    return BundlePadding;
+  }
+
+  virtual void setBundlePadding(uint8_t N) {
+    BundlePadding = N;
+  }
+
+  static bool classof(const MCFragment *F) {
+    MCFragment::FragmentType Kind = F->getKind();
+    switch (Kind) {
+      default:
+        return false;
+      case MCFragment::FT_Relaxable:
+      case MCFragment::FT_CompactEncodedInst:
+      case MCFragment::FT_Data:
+        return true;
+    }
+  }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data and also have fixups registered.
+///
+class MCEncodedFragmentWithFixups : public MCEncodedFragment {
+  virtual void anchor();
 
 public:
-  MCDataFragment(MCSectionData *SD = 0) : MCFragment(FT_Data, SD) {}
+  MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
+                              MCSectionData *SD = 0)
+    : MCEncodedFragment(FType, SD)
+  {
+  }
 
-  /// @name Accessors
-  /// @{
+  virtual ~MCEncodedFragmentWithFixups();
 
-  SmallString<32> &getContents() { return Contents; }
-  const SmallString<32> &getContents() const { return Contents; }
+  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
+  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
 
-  /// @}
-  /// @name Fixup Access
-  /// @{
+  virtual SmallVectorImpl<MCFixup> &getFixups() = 0;
+  virtual const SmallVectorImpl<MCFixup> &getFixups() const = 0;
+
+  virtual fixup_iterator fixup_begin() = 0;
+  virtual const_fixup_iterator fixup_begin() const  = 0;
+  virtual fixup_iterator fixup_end() = 0;
+  virtual const_fixup_iterator fixup_end() const = 0;
+
+  static bool classof(const MCFragment *F) {
+    MCFragment::FragmentType Kind = F->getKind();
+    return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data;
+  }
+};
+
+/// Fragment for data and encoded instructions.
+///
+class MCDataFragment : public MCEncodedFragmentWithFixups {
+  virtual void anchor();
+
+  /// \brief Does this fragment contain encoded instructions anywhere in it?
+  bool HasInstructions;
+
+  /// \brief Should this fragment be aligned to the end of a bundle?
+  bool AlignToBundleEnd;
+
+  SmallVector<char, 32> Contents;
 
-  void addFixup(MCFixup Fixup) {
-    // Enforce invariant that fixups are in offset order.
-    assert((Fixups.empty() || Fixup.getOffset() >= Fixups.back().getOffset()) &&
-           "Fixups must be added in order!");
-    Fixups.push_back(Fixup);
+  /// Fixups - The list of fixups in this fragment.
+  SmallVector<MCFixup, 4> Fixups;
+public:
+  MCDataFragment(MCSectionData *SD = 0)
+    : MCEncodedFragmentWithFixups(FT_Data, SD),
+      HasInstructions(false), AlignToBundleEnd(false)
+  {
   }
 
-  std::vector<MCFixup> &getFixups() { return Fixups; }
-  const std::vector<MCFixup> &getFixups() const { return Fixups; }
+  virtual SmallVectorImpl<char> &getContents() { return Contents; }
+  virtual const SmallVectorImpl<char> &getContents() const { return Contents; }
+
+  SmallVectorImpl<MCFixup> &getFixups() {
+    return Fixups;
+  }
+
+  const SmallVectorImpl<MCFixup> &getFixups() const {
+    return Fixups;
+  }
+
+  virtual bool hasInstructions() const { return HasInstructions; }
+  virtual void setHasInstructions(bool V) { HasInstructions = V; }
+
+  virtual bool alignToBundleEnd() const { return AlignToBundleEnd; }
+  virtual void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
 
   fixup_iterator fixup_begin() { return Fixups.begin(); }
   const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
@@ -142,60 +240,79 @@ public:
   fixup_iterator fixup_end() {return Fixups.end();}
   const_fixup_iterator fixup_end() const {return Fixups.end();}
 
-  size_t fixup_size() const { return Fixups.size(); }
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Data;
+  }
+};
 
-  /// @}
+/// This is a compact (memory-size-wise) fragment for holding an encoded
+/// instruction (non-relaxable) that has no fixups registered. When applicable,
+/// it can be used instead of MCDataFragment and lead to lower memory
+/// consumption.
+///
+class MCCompactEncodedInstFragment : public MCEncodedFragment {
+  virtual void anchor();
+
+  /// \brief Should this fragment be aligned to the end of a bundle?
+  bool AlignToBundleEnd;
+
+  SmallVector<char, 4> Contents;
+public:
+  MCCompactEncodedInstFragment(MCSectionData *SD = 0)
+    : MCEncodedFragment(FT_CompactEncodedInst, SD), AlignToBundleEnd(false)
+  {
+  }
+
+  virtual bool hasInstructions() const {
+    return true;
+  }
+
+  virtual SmallVectorImpl<char> &getContents() { return Contents; }
+  virtual const SmallVectorImpl<char> &getContents() const { return Contents; }
+
+  virtual bool alignToBundleEnd() const { return AlignToBundleEnd; }
+  virtual void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
 
   static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Data;
+    return F->getKind() == MCFragment::FT_CompactEncodedInst;
   }
 };
 
-// FIXME: This current incarnation of MCInstFragment doesn't make much sense, as
-// it is almost entirely a duplicate of MCDataFragment. If we decide to stick
-// with this approach (as opposed to making MCInstFragment a very light weight
-// object with just the MCInst and a code size, then we should just change
-// MCDataFragment to have an optional MCInst at its end.
-class MCInstFragment : public MCFragment {
+/// A relaxable fragment holds on to its MCInst, since it may need to be
+/// relaxed during the assembler layout and relaxation stage.
+///
+class MCRelaxableFragment : public MCEncodedFragmentWithFixups {
   virtual void anchor();
 
   /// Inst - The instruction this is a fragment for.
   MCInst Inst;
 
-  /// Code - Binary data for the currently encoded instruction.
-  SmallString<8> Code;
+  /// Contents - Binary data for the currently encoded instruction.
+  SmallVector<char, 8> Contents;
 
   /// Fixups - The list of fixups in this fragment.
   SmallVector<MCFixup, 1> Fixups;
 
 public:
-  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
-  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
-
-public:
-  MCInstFragment(const MCInst &_Inst, MCSectionData *SD = 0)
-    : MCFragment(FT_Inst, SD), Inst(_Inst) {
+  MCRelaxableFragment(const MCInst &_Inst, MCSectionData *SD = 0)
+    : MCEncodedFragmentWithFixups(FT_Relaxable, SD), Inst(_Inst) {
   }
 
-  /// @name Accessors
-  /// @{
-
-  SmallVectorImpl<char> &getCode() { return Code; }
-  const SmallVectorImpl<char> &getCode() const { return Code; }
+  virtual SmallVectorImpl<char> &getContents() { return Contents; }
+  virtual const SmallVectorImpl<char> &getContents() const { return Contents; }
 
-  unsigned getInstSize() const { return Code.size(); }
-
-  MCInst &getInst() { return Inst; }
   const MCInst &getInst() const { return Inst; }
-
   void setInst(const MCInst& Value) { Inst = Value; }
 
-  /// @}
-  /// @name Fixup Access
-  /// @{
+  SmallVectorImpl<MCFixup> &getFixups() {
+    return Fixups;
+  }
+
+  const SmallVectorImpl<MCFixup> &getFixups() const {
+    return Fixups;
+  }
 
-  SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
-  const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
+  virtual bool hasInstructions() const { return true; }
 
   fixup_iterator fixup_begin() { return Fixups.begin(); }
   const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
@@ -203,12 +320,8 @@ public:
   fixup_iterator fixup_end() {return Fixups.end();}
   const_fixup_iterator fixup_end() const {return Fixups.end();}
 
-  size_t fixup_size() const { return Fixups.size(); }
-
-  /// @}
-
   static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Inst;
+    return F->getKind() == MCFragment::FT_Relaxable;
   }
 };
 
@@ -442,6 +555,12 @@ public:
   typedef FragmentListType::const_reverse_iterator const_reverse_iterator;
   typedef FragmentListType::reverse_iterator reverse_iterator;
 
+  /// \brief Express the state of bundle locked groups while emitting code.
+  enum BundleLockStateType {
+    NotBundleLocked,
+    BundleLocked,
+    BundleLockedAlignToEnd
+  };
 private:
   FragmentListType Fragments;
   const MCSection *Section;
@@ -455,6 +574,13 @@ private:
   /// Alignment - The maximum alignment seen in this section.
   unsigned Alignment;
 
+  /// \brief Keeping track of bundle-locked state.
+  BundleLockStateType BundleLockState; 
+
+  /// \brief We've seen a bundle_lock directive but not its first instruction
+  /// yet.
+  bool BundleGroupBeforeFirstInst;
+
   /// @name Assembler Backend Data
   /// @{
   //
@@ -507,6 +633,26 @@ public:
 
   bool empty() const { return Fragments.empty(); }
 
+  bool isBundleLocked() const {
+    return BundleLockState != NotBundleLocked;
+  }
+
+  BundleLockStateType getBundleLockState() const {
+    return BundleLockState;
+  }
+
+  void setBundleLockState(BundleLockStateType NewState) {
+    BundleLockState = NewState;
+  }
+
+  bool isBundleGroupBeforeFirstInst() const {
+    return BundleGroupBeforeFirstInst;
+  }
+
+  void setBundleGroupBeforeFirstInst(bool IsFirst) {
+    BundleGroupBeforeFirstInst = IsFirst;
+  }
+
   void dump();
 
   /// @}
@@ -703,6 +849,10 @@ private:
   std::vector<IndirectSymbolData> IndirectSymbols;
 
   std::vector<DataRegionData> DataRegions;
+
+  /// The list of linker options to propagate into the object file.
+  std::vector<std::vector<std::string> > LinkerOptions;
+
   /// The set of function symbols for which a .thumb_func directive has
   /// been seen.
   //
@@ -712,10 +862,21 @@ private:
   // refactoring too.
   SmallPtrSet<const MCSymbol*, 64> ThumbFuncs;
 
+  /// \brief The bundle alignment size currently set in the assembler.
+  ///
+  /// By default it's 0, which means bundling is disabled.
+  unsigned BundleAlignSize;
+
   unsigned RelaxAll : 1;
   unsigned NoExecStack : 1;
   unsigned SubsectionsViaSymbols : 1;
 
+  /// ELF specific e_header flags
+  // It would be good if there were an MCELFAssembler class to hold this.
+  // ELF header flags are used both by the integrated and standalone assemblers.
+  // Access to the flags is necessary in cases where assembler directives affect
+  // which flags to be set.
+  unsigned ELFHeaderEFlags;
 private:
   /// Evaluate a fixup to a relocatable expression and the value which should be
   /// placed into the fixup.
@@ -736,20 +897,22 @@ private:
 
   /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
   /// (increased in size, in order to hold its value correctly).
-  bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF,
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   /// Check whether the given fragment needs relaxation.
-  bool fragmentNeedsRelaxation(const MCInstFragment *IF,
+  bool fragmentNeedsRelaxation(const MCRelaxableFragment *IF,
                                const MCAsmLayout &Layout) const;
 
-  /// layoutOnce - Perform one layout iteration and return true if any offsets
+  /// \brief Perform one layout iteration and return true if any offsets
   /// were adjusted.
   bool layoutOnce(MCAsmLayout &Layout);
 
+  /// \brief Perform one layout iteration of the given section and return true
+  /// if any offsets were adjusted.
   bool layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
 
-  bool relaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
+  bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF);
 
   bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
 
@@ -791,6 +954,10 @@ public:
   /// Flag a function symbol as the target of a .thumb_func directive.
   void setIsThumbFunc(const MCSymbol *Func) { ThumbFuncs.insert(Func); }
 
+  /// ELF e_header flags
+  unsigned getELFHeaderEFlags() const {return ELFHeaderEFlags;}
+  void setELFHeaderEFlags(unsigned Flags) { ELFHeaderEFlags = Flags;}
+
 public:
   /// Construct a new assembler instance.
   ///
@@ -805,6 +972,10 @@ public:
               raw_ostream &OS);
   ~MCAssembler();
 
+  /// Reuse an assembler instance
+  ///
+  void reset();
+
   MCContext &getContext() const { return Context; }
 
   MCAsmBackend &getBackend() const { return Backend; }
@@ -832,6 +1003,20 @@ public:
   bool getNoExecStack() const { return NoExecStack; }
   void setNoExecStack(bool Value) { NoExecStack = Value; }
 
+  bool isBundlingEnabled() const {
+    return BundleAlignSize != 0;
+  }
+
+  unsigned getBundleAlignSize() const {
+    return BundleAlignSize;
+  }
+
+  void setBundleAlignSize(unsigned Size) {
+    assert((Size == 0 || !(Size & (Size - 1))) && 
+           "Expect a power-of-two bundle align size");
+    BundleAlignSize = Size;
+  }
+
   /// @name Section List Access
   /// @{
 
@@ -889,6 +1074,14 @@ public:
   size_t indirect_symbol_size() const { return IndirectSymbols.size(); }
 
   /// @}
+  /// @name Linker Option List Access
+  /// @{
+
+  std::vector<std::vector<std::string> > &getLinkerOptions() {
+    return LinkerOptions;
+  }
+
+  /// @}
   /// @name Data Region List Access
   /// @{
 
diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h
index 682cf7cd76c6..ae5bf0bc2069 100644
--- a/include/llvm/MC/MCAtom.h
+++ b/include/llvm/MC/MCAtom.h
@@ -46,8 +46,8 @@ class MCAtom {
     : Type(T), Parent(P), Begin(B), End(E) { }
 
 public:
-  bool isTextAtom() { return Type == TextAtom; }
-  bool isDataAtom() { return Type == DataAtom; }
+  bool isTextAtom() const { return Type == TextAtom; }
+  bool isDataAtom() const { return Type == DataAtom; }
 
   void addInst(const MCInst &I, uint64_t Address, unsigned Size);
   void addData(const MCData &D);
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index 057489090293..9bfa08eb5d01 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -29,6 +29,9 @@ protected: // Can only create subclasses.
 public:
   virtual ~MCCodeEmitter();
 
+  /// Lifetime management
+  virtual void reset() { }
+
   /// EncodeInstruction - Encode the given \p Inst to bytes on the output
   /// stream \p OS.
   virtual void EncodeInstruction(const MCInst &Inst, raw_ostream &OS,
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 5a8830cb66ce..0db3dee2ff05 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -10,13 +10,15 @@
 #ifndef LLVM_MC_MCCONTEXT_H
 #define LLVM_MC_MCCONTEXT_H
 
-#include "llvm/MC/SectionKind.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
+#include <map>
 #include <vector> // FIXME: Shouldn't be needed.
 
 namespace llvm {
@@ -94,9 +96,19 @@ namespace llvm {
     /// .secure_log_reset appearing between them.
     bool SecureLogUsed;
 
+    /// The compilation directory to use for DW_AT_comp_dir.
+    std::string CompilationDir;
+
+    /// The main file name if passed in explicitly.
+    std::string MainFileName;
+
     /// The dwarf file and directory tables from the dwarf .file directive.
-    std::vector<MCDwarfFile *> MCDwarfFiles;
-    std::vector<StringRef> MCDwarfDirs;
+    /// We now emit a line table for each compile unit. To reduce the prologue
+    /// size of each line table, the files and directories used by each compile
+    /// unit are separated.
+    typedef std::map<unsigned, SmallVector<MCDwarfFile *, 4> > MCDwarfFilesMap;
+    MCDwarfFilesMap MCDwarfFilesCUMap;
+    std::map<unsigned, SmallVector<StringRef, 4> > MCDwarfDirsCUMap;
 
     /// The current dwarf line information from the last dwarf .loc directive.
     MCDwarfLoc CurrentDwarfLoc;
@@ -123,6 +135,10 @@ namespace llvm {
     /// non-empty.
     StringRef DwarfDebugFlags;
 
+    /// The string to embed in as the dwarf AT_producer for the compile unit, if
+    /// non-empty.
+    StringRef DwarfDebugProducer;
+
     /// Honor temporary labels, this is useful for debugging semantic
     /// differences between temporary and non-temporary labels (primarily on
     /// Darwin).
@@ -134,14 +150,22 @@ namespace llvm {
     /// We need a deterministic iteration order, so we remember the order
     /// the elements were added.
     std::vector<const MCSection *> MCLineSectionOrder;
+    /// The Compile Unit ID that we are currently processing.
+    unsigned DwarfCompileUnitID;
+    /// The line table start symbol for each Compile Unit.
+    DenseMap<unsigned, MCSymbol *> MCLineTableSymbols;
 
     void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap;
 
+    /// Do automatic reset in destructor
+    bool AutoReset;
+
     MCSymbol *CreateSymbol(StringRef Name);
 
   public:
     explicit MCContext(const MCAsmInfo &MAI, const MCRegisterInfo &MRI,
-                       const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0);
+                       const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0,
+                       bool DoAutoReset = true);
     ~MCContext();
 
     const SourceMgr *getSourceManager() const { return SrcMgr; }
@@ -154,6 +178,15 @@ namespace llvm {
 
     void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
 
+    /// @name Module Lifetime Management
+    /// @{
+
+    /// reset - return object to right after construction state to prepare
+    /// to process a new module
+    void reset();
+
+    /// @}
+
     /// @name Symbol Management
     /// @{
 
@@ -235,21 +268,45 @@ namespace llvm {
     /// @name Dwarf Management
     /// @{
 
+    /// \brief Get the compilation directory for DW_AT_comp_dir
+    /// This can be overridden by clients which want to control the reported
+    /// compilation directory and have it be something other than the current
+    /// working directory.
+    const std::string &getCompilationDir() const { return CompilationDir; }
+
+    /// \brief Set the compilation directory for DW_AT_comp_dir
+    /// Override the default (CWD) compilation directory.
+    void setCompilationDir(StringRef S) { CompilationDir = S.str(); }
+
+    /// \brief Get the main file name for use in error messages and debug
+    /// info. This can be set to ensure we've got the correct file name
+    /// after preprocessing or for -save-temps.
+    const std::string &getMainFileName() const { return MainFileName; }
+
+    /// \brief Set the main file name and override the default.
+    void setMainFileName(StringRef S) { MainFileName = S.str(); }
+
     /// GetDwarfFile - creates an entry in the dwarf file and directory tables.
     unsigned GetDwarfFile(StringRef Directory, StringRef FileName,
-                          unsigned FileNumber);
+                          unsigned FileNumber, unsigned CUID);
 
-    bool isValidDwarfFileNumber(unsigned FileNumber);
+    bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0);
 
     bool hasDwarfFiles() const {
-      return !MCDwarfFiles.empty();
+      // Traverse MCDwarfFilesCUMap and check whether each entry is empty.
+      MCDwarfFilesMap::const_iterator MapB, MapE;
+      for (MapB = MCDwarfFilesCUMap.begin(), MapE = MCDwarfFilesCUMap.end();
+           MapB != MapE; MapB++)
+        if (!MapB->second.empty())
+           return true;
+      return false;
     }
 
-    const std::vector<MCDwarfFile *> &getMCDwarfFiles() {
-      return MCDwarfFiles;
+    const SmallVectorImpl<MCDwarfFile *> &getMCDwarfFiles(unsigned CUID = 0) {
+      return MCDwarfFilesCUMap[CUID];
     }
-    const std::vector<StringRef> &getMCDwarfDirs() {
-      return MCDwarfDirs;
+    const SmallVectorImpl<StringRef> &getMCDwarfDirs(unsigned CUID = 0) {
+      return MCDwarfDirsCUMap[CUID];
     }
 
     const DenseMap<const MCSection *, MCLineSection *>
@@ -263,6 +320,25 @@ namespace llvm {
       MCLineSections[Sec] = Line;
       MCLineSectionOrder.push_back(Sec);
     }
+    unsigned getDwarfCompileUnitID() {
+      return DwarfCompileUnitID;
+    }
+    void setDwarfCompileUnitID(unsigned CUIndex) {
+      DwarfCompileUnitID = CUIndex;
+    }
+    const DenseMap<unsigned, MCSymbol *> &getMCLineTableSymbols() const {
+      return MCLineTableSymbols;
+    }
+    MCSymbol *getMCLineTableSymbol(unsigned ID) const {
+      DenseMap<unsigned, MCSymbol *>::const_iterator CIter =
+        MCLineTableSymbols.find(ID);
+      if (CIter == MCLineTableSymbols.end())
+        return NULL;
+      return CIter->second;
+    }
+    void setMCLineTableSymbol(MCSymbol *Sym, unsigned ID) {
+      MCLineTableSymbols[ID] = Sym;
+    }
 
     /// setCurrentDwarfLoc - saves the information from the currently parsed
     /// dwarf .loc directive and sets DwarfLocSeen.  When the next instruction
@@ -309,6 +385,9 @@ namespace llvm {
     void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; }
     StringRef getDwarfDebugFlags() { return DwarfDebugFlags; }
 
+    void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; }
+    StringRef getDwarfDebugProducer() { return DwarfDebugProducer; }
+
     /// @}
 
     char *getSecureLogFile() { return SecureLogFile; }
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index 53a9ce0a3648..36fbcb02d9f6 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -6,11 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-#ifndef MCDISASSEMBLER_H
-#define MCDISASSEMBLER_H
+#ifndef LLVM_MC_MCDISASSEMBLER_H
+#define LLVM_MC_MCDISASSEMBLER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm-c/Disassembler.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -20,8 +20,6 @@ class MemoryObject;
 class raw_ostream;
 class MCContext;
 
-struct EDInstInfo;
-
 /// MCDisassembler - Superclass for all disassemblers.  Consumes a memory region
 ///   and provides an array of assembly instructions.
 class MCDisassembler {
@@ -84,14 +82,6 @@ public:
                                        raw_ostream &vStream,
                                        raw_ostream &cStream) const = 0;
 
-  /// getEDInfo - Returns the enhanced instruction information corresponding to
-  ///   the disassembler.
-  ///
-  /// @return         - An array of instruction information, with one entry for
-  ///                   each MCInst opcode this disassembler returns.
-  ///                   NULL if there is no info for this target.
-  virtual const EDInstInfo   *getEDInfo() const { return (EDInstInfo*)0; }
-
 private:
   //
   // Hooks for symbolic disassembly via the public 'C' interface.
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 8fc437f3e691..1a392e8755ee 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -16,10 +16,10 @@
 #define LLVM_MC_MCDWARF_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
 #include <vector>
 
 namespace llvm {
@@ -187,29 +187,43 @@ namespace llvm {
     MCLineSection() {}
 
     // addLineEntry - adds an entry to this MCLineSection's line entries
-    void addLineEntry(const MCLineEntry &LineEntry) {
-      MCLineEntries.push_back(LineEntry);
+    void addLineEntry(const MCLineEntry &LineEntry, unsigned CUID) {
+      MCLineDivisions[CUID].push_back(LineEntry);
     }
 
     typedef std::vector<MCLineEntry> MCLineEntryCollection;
     typedef MCLineEntryCollection::iterator iterator;
     typedef MCLineEntryCollection::const_iterator const_iterator;
+    typedef std::map<unsigned, MCLineEntryCollection> MCLineDivisionMap;
 
   private:
-    MCLineEntryCollection MCLineEntries;
+    // A collection of MCLineEntry for each Compile Unit ID.
+    MCLineDivisionMap MCLineDivisions;
 
   public:
-    const MCLineEntryCollection *getMCLineEntries() const {
-      return &MCLineEntries;
+    // Returns whether MCLineSection contains entries for a given Compile
+    // Unit ID.
+    bool containEntriesForID(unsigned CUID) const {
+      return MCLineDivisions.count(CUID);
+    }
+    // Returns the collection of MCLineEntry for a given Compile Unit ID.
+    const MCLineEntryCollection &getMCLineEntries(unsigned CUID) const {
+      MCLineDivisionMap::const_iterator CIter = MCLineDivisions.find(CUID);
+      assert(CIter != MCLineDivisions.end());
+      return CIter->second;
     }
   };
 
   class MCDwarfFileTable {
   public:
     //
-    // This emits the Dwarf file and the line tables.
+    // This emits the Dwarf file and the line tables for all Compile Units.
     //
     static const MCSymbol *Emit(MCStreamer *MCOS);
+    //
+    // This emits the Dwarf file and the line tables for a given Compile Unit.
+    //
+    static const MCSymbol *EmitCU(MCStreamer *MCOS, unsigned ID);
   };
 
   class MCDwarfLineAddr {
@@ -266,42 +280,115 @@ namespace llvm {
 
   class MCCFIInstruction {
   public:
-    enum OpType { SameValue, RememberState, RestoreState, Move, RelMove, Escape,
-                  Restore};
+    enum OpType { OpSameValue, OpRememberState, OpRestoreState, OpOffset,
+                  OpDefCfaRegister, OpDefCfaOffset, OpDefCfa, OpRelOffset,
+                  OpAdjustCfaOffset, OpEscape, OpRestore, OpUndefined,
+                  OpRegister };
   private:
     OpType Operation;
     MCSymbol *Label;
-    // Move to & from location.
-    MachineLocation Destination;
-    MachineLocation Source;
+    unsigned Register;
+    union {
+      int Offset;
+      unsigned Register2;
+    };
     std::vector<char> Values;
+
+    MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V) :
+      Operation(Op), Label(L), Register(R), Offset(O),
+      Values(V.begin(), V.end()) {
+      assert(Op != OpRegister);
+    }
+
+    MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2) :
+      Operation(Op), Label(L), Register(R1), Register2(R2) {
+      assert(Op == OpRegister);
+    }
+
   public:
-    MCCFIInstruction(OpType Op, MCSymbol *L)
-      : Operation(Op), Label(L) {
-      assert(Op == RememberState || Op == RestoreState);
+    static MCCFIInstruction
+    createOffset(MCSymbol *L, unsigned Register, int Offset) {
+      return MCCFIInstruction(OpOffset, L, Register, Offset, "");
+    }
+
+    static MCCFIInstruction
+    createDefCfaRegister(MCSymbol *L, unsigned Register) {
+      return MCCFIInstruction(OpDefCfaRegister, L, Register, 0, "");
+    }
+
+    static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset) {
+      return MCCFIInstruction(OpDefCfaOffset, L, 0, -Offset, "");
+    }
+
+    static MCCFIInstruction
+    createDefCfa(MCSymbol *L, unsigned Register, int Offset) {
+      return MCCFIInstruction(OpDefCfa, L, Register, -Offset, "");
+    }
+
+    static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register) {
+      return MCCFIInstruction(OpUndefined, L, Register, 0, "");
     }
-    MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register)
-      : Operation(Op), Label(L), Destination(Register) {
-      assert(Op == SameValue || Op == Restore);
+
+    static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register) {
+      return MCCFIInstruction(OpRestore, L, Register, 0, "");
+    }
+
+    static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register) {
+      return MCCFIInstruction(OpSameValue, L, Register, 0, "");
+    }
+
+    static MCCFIInstruction createRestoreState(MCSymbol *L) {
+      return MCCFIInstruction(OpRestoreState, L, 0, 0, "");
     }
-    MCCFIInstruction(MCSymbol *L, const MachineLocation &D,
-                     const MachineLocation &S)
-      : Operation(Move), Label(L), Destination(D), Source(S) {
+
+    static MCCFIInstruction createRememberState(MCSymbol *L) {
+      return MCCFIInstruction(OpRememberState, L, 0, 0, "");
+    }
+
+    static MCCFIInstruction
+    createRelOffset(MCSymbol *L, unsigned Register, int Offset) {
+      return MCCFIInstruction(OpRelOffset, L, Register, Offset, "");
+    }
+
+    static MCCFIInstruction
+    createAdjustCfaOffset(MCSymbol *L, int Adjustment) {
+      return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, "");
     }
-    MCCFIInstruction(OpType Op, MCSymbol *L, const MachineLocation &D,
-                     const MachineLocation &S)
-      : Operation(Op), Label(L), Destination(D), Source(S) {
-      assert(Op == RelMove);
+
+    static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals) {
+      return MCCFIInstruction(OpEscape, L, 0, 0, Vals);
     }
-    MCCFIInstruction(OpType Op, MCSymbol *L, StringRef Vals)
-      : Operation(Op), Label(L), Values(Vals.begin(), Vals.end()) {
-      assert(Op == Escape);
+
+   static MCCFIInstruction
+   createRegister(MCSymbol *L, unsigned Register1, unsigned Register2) {
+      return MCCFIInstruction(OpRegister, L, Register1, Register2);
     }
+
     OpType getOperation() const { return Operation; }
     MCSymbol *getLabel() const { return Label; }
-    const MachineLocation &getDestination() const { return Destination; }
-    const MachineLocation &getSource() const { return Source; }
+
+    unsigned getRegister() const {
+      assert(Operation == OpDefCfa || Operation == OpOffset ||
+             Operation == OpRestore || Operation == OpUndefined ||
+             Operation == OpSameValue || Operation == OpDefCfaRegister ||
+             Operation == OpRelOffset || Operation == OpRegister);
+      return Register;
+    }
+
+    unsigned getRegister2() const {
+      assert(Operation == OpRegister);
+      return Register2;
+    }
+
+    int getOffset() const {
+      assert(Operation == OpDefCfa || Operation == OpOffset ||
+             Operation == OpRelOffset || Operation == OpDefCfaOffset ||
+             Operation == OpAdjustCfaOffset);
+      return Offset;
+    }
+
     const StringRef getValues() const {
+      assert(Operation == OpEscape);
       return StringRef(&Values[0], Values.size());
     }
   };
diff --git a/include/llvm/MC/MCELF.h b/include/llvm/MC/MCELF.h
new file mode 100644
index 000000000000..7e59911a89c3
--- /dev/null
+++ b/include/llvm/MC/MCELF.h
@@ -0,0 +1,37 @@
+//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some support functions used by the ELF Streamer and
+// ObjectWriter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELF_H
+#define LLVM_MC_MCELF_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCSymbolData;
+
+class MCELF {
+ public:
+  static void SetBinding(MCSymbolData &SD, unsigned Binding);
+  static unsigned GetBinding(const MCSymbolData &SD);
+  static void SetType(MCSymbolData &SD, unsigned Type);
+  static unsigned GetType(const MCSymbolData &SD);
+  static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
+  static unsigned GetVisibility(MCSymbolData &SD);
+  static void setOther(MCSymbolData &SD, unsigned Other);
+  static unsigned getOther(MCSymbolData &SD);
+};
+
+}
+
+#endif
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 38cdc7293ba0..a59776d5cdaa 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -79,7 +79,6 @@ public:
   virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                                 bool IsPCRel, bool IsRelocWithSymbol,
                                 int64_t Addend) const = 0;
-  virtual unsigned getEFlags() const;
   virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
                                          const MCValue &Target,
                                          const MCFragment &F,
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
new file mode 100644
index 000000000000..6fb2d22be2e7
--- /dev/null
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -0,0 +1,125 @@
+//===- MCELFStreamer.h - MCStreamer ELF Object File Interface ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+class MCAsmBackend;
+class MCAssembler;
+class MCCodeEmitter;
+class MCExpr;
+class MCInst;
+class MCSymbol;
+class MCSymbolData;
+class raw_ostream;
+
+class MCELFStreamer : public MCObjectStreamer {
+protected:
+  MCELFStreamer(StreamerKind Kind, MCContext &Context, MCAsmBackend &TAB,
+                raw_ostream &OS, MCCodeEmitter *Emitter)
+      : MCObjectStreamer(Kind, Context, TAB, OS, Emitter) {}
+
+public:
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+                MCCodeEmitter *Emitter)
+      : MCObjectStreamer(SK_ELFStreamer, Context, TAB, OS, Emitter) {}
+
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+                MCCodeEmitter *Emitter, MCAssembler *Assembler)
+      : MCObjectStreamer(SK_ELFStreamer, Context, TAB, OS, Emitter,
+                         Assembler) {}
+
+  virtual ~MCELFStreamer();
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void InitToTextSection();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitDebugLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+
+  virtual MCSymbolData &getOrCreateSymbolData(MCSymbol *Symbol);
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                     unsigned ByteAlignment);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            uint64_t Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void EmitTCEntry(const MCSymbol &S);
+
+  virtual void EmitValueToAlignment(unsigned, int64_t, unsigned, unsigned);
+
+  virtual void FinishImpl();
+  /// @}
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_ELFStreamer || S->getKind() == SK_ARMELFStreamer;
+  }
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  virtual void EmitBundleAlignMode(unsigned AlignPow2);
+  virtual void EmitBundleLock(bool AlignToEnd);
+  virtual void EmitBundleUnlock();
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+
+
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind);
+  void SetSectionData();
+  void SetSectionText();
+  void SetSectionBss();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 1007aa526493..b5bfed18eca4 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -160,6 +160,7 @@ public:
     VK_TLVP,      // Mach-O thread local variable relocation
     VK_SECREL,
     // FIXME: We'd really like to use the generic Kinds listed above for these.
+    VK_ARM_NONE,
     VK_ARM_PLT,   // ARM-style PLT references. i.e., (PLT) instead of @PLT
     VK_ARM_TLSGD, //   ditto for TLSGD, GOT, GOTOFF, TPOFF and GOTTPOFF
     VK_ARM_GOT,
@@ -168,15 +169,29 @@ public:
     VK_ARM_GOTTPOFF,
     VK_ARM_TARGET1,
     VK_ARM_TARGET2,
+    VK_ARM_PREL31,
 
     VK_PPC_TOC,          // TOC base
     VK_PPC_TOC_ENTRY,    // TOC entry
     VK_PPC_DARWIN_HA16,  // ha16(symbol)
     VK_PPC_DARWIN_LO16,  // lo16(symbol)
     VK_PPC_GAS_HA16,     // symbol@ha
-    VK_PPC_GAS_LO16,      // symbol@l
+    VK_PPC_GAS_LO16,     // symbol@l
     VK_PPC_TPREL16_HA,   // symbol@tprel@ha
     VK_PPC_TPREL16_LO,   // symbol@tprel@l
+    VK_PPC_DTPREL16_HA,  // symbol@dtprel@ha
+    VK_PPC_DTPREL16_LO,  // symbol@dtprel@l
+    VK_PPC_TOC16_HA,     // symbol@toc@ha
+    VK_PPC_TOC16_LO,     // symbol@toc@l
+    VK_PPC_GOT_TPREL16_HA, // symbol@got@tprel@ha
+    VK_PPC_GOT_TPREL16_LO, // symbol@got@tprel@l
+    VK_PPC_TLS,            // symbol@tls
+    VK_PPC_GOT_TLSGD16_HA, // symbol@got@tlsgd@ha
+    VK_PPC_GOT_TLSGD16_LO, // symbol@got@tlsgd@l
+    VK_PPC_TLSGD,          // symbol@tlsgd
+    VK_PPC_GOT_TLSLD16_HA, // symbol@got@tlsld@ha
+    VK_PPC_GOT_TLSLD16_LO, // symbol@got@tlsld@l
+    VK_PPC_TLSLD,          // symbol@tlsld
 
     VK_Mips_GPREL,
     VK_Mips_GOT_CALL,
@@ -457,6 +472,8 @@ public:
   virtual void AddValueSymbols(MCAssembler *) const = 0;
   virtual const MCSection *FindAssociatedSection() const = 0;
 
+  virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
+
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
index 22b3c32abde9..ad99943df2c3 100644
--- a/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 // Fixed length disassembler decoder state machine driver.
 //===----------------------------------------------------------------------===//
-#ifndef MCFIXEDLENDISASSEMBLER_H
-#define MCFIXEDLENDISASSEMBLER_H
+#ifndef LLVM_MC_MCFIXEDLENDISASSEMBLER_H
+#define LLVM_MC_MCFIXEDLENDISASSEMBLER_H
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCInstBuilder.h b/include/llvm/MC/MCInstBuilder.h
new file mode 100644
index 000000000000..c5acb26eecac
--- /dev/null
+++ b/include/llvm/MC/MCInstBuilder.h
@@ -0,0 +1,68 @@
+//===-- llvm/MC/MCInstBuilder.h - Simplify creation of MCInsts --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MCInstBuilder class for convenient creation of
+// MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCINSTBUILDER_H
+#define LLVM_MC_MCINSTBUILDER_H
+
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+
+class MCInstBuilder {
+  MCInst Inst;
+
+public:
+  /// \brief Create a new MCInstBuilder for an MCInst with a specific opcode.
+  MCInstBuilder(unsigned Opcode) {
+    Inst.setOpcode(Opcode);
+  }
+
+  /// \brief Add a new register operand.
+  MCInstBuilder &addReg(unsigned Reg) {
+    Inst.addOperand(MCOperand::CreateReg(Reg));
+    return *this;
+  }
+
+  /// \brief Add a new integer immediate operand.
+  MCInstBuilder &addImm(int64_t Val) {
+    Inst.addOperand(MCOperand::CreateImm(Val));
+    return *this;
+  }
+
+  /// \brief Add a new floating point immediate operand.
+  MCInstBuilder &addFPImm(double Val) {
+    Inst.addOperand(MCOperand::CreateFPImm(Val));
+    return *this;
+  }
+
+  /// \brief Add a new MCExpr operand.
+  MCInstBuilder &addExpr(const MCExpr *Val) {
+    Inst.addOperand(MCOperand::CreateExpr(Val));
+    return *this;
+  }
+
+  /// \brief Add a new MCInst operand.
+  MCInstBuilder &addInst(const MCInst *Val) {
+    Inst.addOperand(MCOperand::CreateInst(Val));
+    return *this;
+  }
+
+  operator MCInst&() {
+    return Inst;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 3b9420a40389..a18cbd94bbbf 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -10,6 +10,9 @@
 #ifndef LLVM_MC_MCINSTPRINTER_H
 #define LLVM_MC_MCINSTPRINTER_H
 
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Format.h"
+
 namespace llvm {
 class MCInst;
 class raw_ostream;
@@ -36,13 +39,16 @@ protected:
   /// True if we are printing marked up assembly.
   bool UseMarkup;
 
+  /// True if we are printing immediates as hex.
+  bool PrintImmHex;
+
   /// Utility function for printing annotations.
   void printAnnotation(raw_ostream &OS, StringRef Annot);
 public:
   MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
                 const MCRegisterInfo &mri)
     : CommentStream(0), MAI(mai), MII(mii), MRI(mri), AvailableFeatures(0),
-      UseMarkup(0) {}
+      UseMarkup(0), PrintImmHex(0) {}
 
   virtual ~MCInstPrinter();
 
@@ -70,6 +76,12 @@ public:
   /// Utility functions to make adding mark ups simpler.
   StringRef markup(StringRef s) const;
   StringRef markup(StringRef a, StringRef b) const;
+
+  bool getPrintImmHex() const { return PrintImmHex; }
+  void setPrintImmHex(bool Value) { PrintImmHex = Value; }
+
+  /// Utility function to print immediates in decimal or hex.
+  format_object1<int64_t> formatImm(const int64_t Value) const;
 };
 
 } // namespace llvm
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 02383f8bc658..9b5415add241 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_MC_MCINSTRDESC_H
 #define LLVM_MC_MCINSTRDESC_H
 
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -144,7 +146,7 @@ public:
   const uint16_t *ImplicitDefs;  // Registers implicitly defined by this instr
   const MCOperandInfo *OpInfo;   // 'NumOperands' entries about operands
 
-  /// getOperandConstraint - Returns the value of the specific constraint if
+  /// \brief Returns the value of the specific constraint if
   /// it is set. Returns -1 if it is not set.
   int getOperandConstraint(unsigned OpNum,
                            MCOI::OperandConstraint Constraint) const {
@@ -156,12 +158,12 @@ public:
     return -1;
   }
 
-  /// getOpcode - Return the opcode number for this descriptor.
+  /// \brief Return the opcode number for this descriptor.
   unsigned getOpcode() const {
     return Opcode;
   }
 
-  /// getNumOperands - Return the number of declared MachineOperands for this
+  /// \brief Return the number of declared MachineOperands for this
   /// MachineInstruction.  Note that variadic (isVariadic() returns true)
   /// instructions may have additional operands at the end of the list, and note
   /// that the machine instruction may include implicit register def/uses as
@@ -170,7 +172,7 @@ public:
     return NumOperands;
   }
 
-  /// getNumDefs - Return the number of MachineOperands that are register
+  /// \brief Return the number of MachineOperands that are register
   /// definitions.  Register definitions always occur at the start of the
   /// machine operand list.  This is the number of "outs" in the .td file,
   /// and does not include implicit defs.
@@ -178,11 +180,10 @@ public:
     return NumDefs;
   }
 
-  /// getFlags - Return flags of this instruction.
-  ///
+  /// \brief Return flags of this instruction.
   unsigned getFlags() const { return Flags; }
 
-  /// isVariadic - Return true if this instruction can have a variable number of
+  /// \brief Return true if this instruction can have a variable number of
   /// operands.  In this case, the variable operands will be after the normal
   /// operands but before the implicit definitions and uses (if any are
   /// present).
@@ -190,35 +191,37 @@ public:
     return Flags & (1 << MCID::Variadic);
   }
 
-  /// hasOptionalDef - Set if this instruction has an optional definition, e.g.
+  /// \brief Set if this instruction has an optional definition, e.g.
   /// ARM instructions which can set condition code if 's' bit is set.
   bool hasOptionalDef() const {
     return Flags & (1 << MCID::HasOptionalDef);
   }
 
-  /// isPseudo - Return true if this is a pseudo instruction that doesn't
+  /// \brief Return true if this is a pseudo instruction that doesn't
   /// correspond to a real machine instruction.
   ///
   bool isPseudo() const {
     return Flags & (1 << MCID::Pseudo);
   }
 
+  /// \brief Return true if the instruction is a return.
   bool isReturn() const {
     return Flags & (1 << MCID::Return);
   }
 
+  /// \brief  Return true if the instruction is a call.
   bool isCall() const {
     return Flags & (1 << MCID::Call);
   }
 
-  /// isBarrier - Returns true if the specified instruction stops control flow
+  /// \brief Returns true if the specified instruction stops control flow
   /// from executing the instruction immediately following it.  Examples include
   /// unconditional branches and return instructions.
   bool isBarrier() const {
     return Flags & (1 << MCID::Barrier);
   }
 
-  /// isTerminator - Returns true if this instruction part of the terminator for
+  /// \brief Returns true if this instruction part of the terminator for
   /// a basic block.  Typically this is things like return and branch
   /// instructions.
   ///
@@ -228,7 +231,7 @@ public:
     return Flags & (1 << MCID::Terminator);
   }
 
-  /// isBranch - Returns true if this is a conditional, unconditional, or
+  /// \brief Returns true if this is a conditional, unconditional, or
   /// indirect branch.  Predicates below can be used to discriminate between
   /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
   /// get more information.
@@ -236,13 +239,13 @@ public:
     return Flags & (1 << MCID::Branch);
   }
 
-  /// isIndirectBranch - Return true if this is an indirect branch, such as a
+  /// \brief Return true if this is an indirect branch, such as a
   /// branch through a register.
   bool isIndirectBranch() const {
     return Flags & (1 << MCID::IndirectBranch);
   }
 
-  /// isConditionalBranch - Return true if this is a branch which may fall
+  /// \brief Return true if this is a branch which may fall
   /// through to the next instruction or may transfer control flow to some other
   /// block.  The TargetInstrInfo::AnalyzeBranch method can be used to get more
   /// information about this branch.
@@ -250,7 +253,7 @@ public:
     return isBranch() & !isBarrier() & !isIndirectBranch();
   }
 
-  /// isUnconditionalBranch - Return true if this is a branch which always
+  /// \brief Return true if this is a branch which always
   /// transfers control flow to some other block.  The
   /// TargetInstrInfo::AnalyzeBranch method can be used to get more information
   /// about this branch.
@@ -258,38 +261,47 @@ public:
     return isBranch() & isBarrier() & !isIndirectBranch();
   }
 
-  // isPredicable - Return true if this instruction has a predicate operand that
-  // controls execution.  It may be set to 'always', or may be set to other
-  /// values.   There are various methods in TargetInstrInfo that can be used to
+  /// \brief Return true if this is a branch or an instruction which directly
+  /// writes to the program counter. Considered 'may' affect rather than
+  /// 'does' affect as things like predication are not taken into account.
+  bool mayAffectControlFlow(const MCInst &MI, const MCRegisterInfo &RI) const {
+    if (isBranch() || isCall() || isReturn() || isIndirectBranch())
+      return true;
+    unsigned PC = RI.getProgramCounter();
+    if (PC == 0) return false;
+    return hasDefOfPhysReg(MI, PC, RI);
+  }
+
+  /// \brief Return true if this instruction has a predicate operand
+  /// that controls execution. It may be set to 'always', or may be set to other
+  /// values. There are various methods in TargetInstrInfo that can be used to
   /// control and modify the predicate in this instruction.
   bool isPredicable() const {
     return Flags & (1 << MCID::Predicable);
   }
 
-  /// isCompare - Return true if this instruction is a comparison.
+  /// \brief Return true if this instruction is a comparison.
   bool isCompare() const {
     return Flags & (1 << MCID::Compare);
   }
 
-  /// isMoveImmediate - Return true if this instruction is a move immediate
+  /// \brief Return true if this instruction is a move immediate
   /// (including conditional moves) instruction.
   bool isMoveImmediate() const {
     return Flags & (1 << MCID::MoveImm);
   }
 
-  /// isBitcast - Return true if this instruction is a bitcast instruction.
-  ///
+  /// \brief Return true if this instruction is a bitcast instruction.
   bool isBitcast() const {
     return Flags & (1 << MCID::Bitcast);
   }
 
-  /// isSelect - Return true if this is a select instruction.
-  ///
+  /// \brief Return true if this is a select instruction.
   bool isSelect() const {
     return Flags & (1 << MCID::Select);
   }
 
-  /// isNotDuplicable - Return true if this instruction cannot be safely
+  /// \brief Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
   bool isNotDuplicable() const {
@@ -318,7 +330,7 @@ public:
   // Side Effect Analysis
   //===--------------------------------------------------------------------===//
 
-  /// mayLoad - Return true if this instruction could possibly read memory.
+  /// \brief Return true if this instruction could possibly read memory.
   /// Instructions with this flag set are not necessarily simple load
   /// instructions, they may load a value and modify it, for example.
   bool mayLoad() const {
@@ -326,7 +338,7 @@ public:
   }
 
 
-  /// mayStore - Return true if this instruction could possibly modify memory.
+  /// \brief Return true if this instruction could possibly modify memory.
   /// Instructions with this flag set are not necessarily simple store
   /// instructions, they may store a modified value based on their operands, or
   /// may not actually modify anything, for example.
@@ -459,8 +471,7 @@ public:
     return ImplicitUses;
   }
 
-  /// getNumImplicitUses - Return the number of implicit uses this instruction
-  /// has.
+  /// \brief Return the number of implicit uses this instruction has.
   unsigned getNumImplicitUses() const {
     if (ImplicitUses == 0) return 0;
     unsigned i = 0;
@@ -482,8 +493,7 @@ public:
     return ImplicitDefs;
   }
 
-  /// getNumImplicitDefs - Return the number of implicit defs this instruction
-  /// has.
+  /// \brief Return the number of implicit defs this instruct has.
   unsigned getNumImplicitDefs() const {
     if (ImplicitDefs == 0) return 0;
     unsigned i = 0;
@@ -491,7 +501,7 @@ public:
     return i;
   }
 
-  /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
+  /// \brief Return true if this instruction implicitly
   /// uses the specified physical register.
   bool hasImplicitUseOfPhysReg(unsigned Reg) const {
     if (const uint16_t *ImpUses = ImplicitUses)
@@ -500,31 +510,43 @@ public:
     return false;
   }
 
-  /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
+  /// \brief Return true if this instruction implicitly
   /// defines the specified physical register.
-  bool hasImplicitDefOfPhysReg(unsigned Reg) const {
+  bool hasImplicitDefOfPhysReg(unsigned Reg,
+                               const MCRegisterInfo *MRI = 0) const {
     if (const uint16_t *ImpDefs = ImplicitDefs)
       for (; *ImpDefs; ++ImpDefs)
-        if (*ImpDefs == Reg) return true;
+        if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs)))
+            return true;
     return false;
   }
 
-  /// getSchedClass - Return the scheduling class for this instruction.  The
+  /// \brief Return true if this instruction defines the specified physical
+  /// register, either explicitly or implicitly.
+  bool hasDefOfPhysReg(const MCInst &MI, unsigned Reg,
+                       const MCRegisterInfo &RI) const {
+    for (int i = 0, e = NumDefs; i != e; ++i)
+      if (MI.getOperand(i).isReg() &&
+          RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
+        return true;
+    return hasImplicitDefOfPhysReg(Reg, &RI);
+  }
+
+  /// \brief Return the scheduling class for this instruction.  The
   /// scheduling class is an index into the InstrItineraryData table.  This
   /// returns zero if there is no known scheduling information for the
   /// instruction.
-  ///
   unsigned getSchedClass() const {
     return SchedClass;
   }
 
-  /// getSize - Return the number of bytes in the encoding of this instruction,
+  /// \brief Return the number of bytes in the encoding of this instruction,
   /// or zero if the encoding size cannot be known from the opcode.
   unsigned getSize() const {
     return Size;
   }
 
-  /// findFirstPredOperandIdx() - Find the index of the first operand in the
+  /// \brief Find the index of the first operand in the
   /// operand list that is used to represent the predicate. It returns -1 if
   /// none is found.
   int findFirstPredOperandIdx() const {
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index efaabfb9e88b..3c9a588d0413 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -45,6 +45,13 @@ protected:
 public:
   virtual ~MCMachObjectTargetWriter();
 
+  /// @name Lifetime Management
+  /// @{
+
+  virtual void reset() {};
+
+  /// @}
+
   /// @name Accessors
   /// @{
 
@@ -111,6 +118,13 @@ public:
     : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
   }
 
+  /// @name Lifetime management Methods
+  /// @{
+
+  virtual void reset();
+
+  /// @}
+
   /// @name Utility Methods
   /// @{
 
@@ -182,6 +196,8 @@ public:
   void WriteLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
                                 uint32_t DataSize);
 
+  void WriteLinkerOptionsLoadCommand(const std::vector<std::string> &Options);
+
   // FIXME: We really need to improve the relocation validation. Basically, we
   // want to implement a separate computation which evaluates the relocation
   // entry as the linker would, and verifies that the resultant fixup value is
@@ -223,8 +239,6 @@ public:
   /// ComputeSymbolTable - Compute the symbol table data
   ///
   /// \param StringTable [out] - The string table data.
-  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-  /// string table.
   void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
                           std::vector<MachSymbolData> &LocalSymbolData,
                           std::vector<MachSymbolData> &ExternalSymbolData,
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index 23e5513ae35e..c8d748420e31 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -84,15 +84,6 @@ protected:
   /// this is the section to emit them into.
   const MCSection *CompactUnwindSection;
 
-  /// DwarfAccelNamesSection, DwarfAccelObjCSection,
-  /// DwarfAccelNamespaceSection, DwarfAccelTypesSection -
-  /// If we use the DWARF accelerated hash tables then we want toe emit these
-  /// sections.
-  const MCSection *DwarfAccelNamesSection;
-  const MCSection *DwarfAccelObjCSection;
-  const MCSection *DwarfAccelNamespaceSection;
-  const MCSection *DwarfAccelTypesSection;
-
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
   const MCSection *DwarfAbbrevSection;
@@ -106,6 +97,28 @@ protected:
   const MCSection *DwarfARangesSection;
   const MCSection *DwarfRangesSection;
   const MCSection *DwarfMacroInfoSection;
+  // The pubnames section is no longer generated by default.  The generation
+  // can be enabled by a compiler flag.
+  const MCSection *DwarfPubNamesSection;
+
+  // DWARF5 Experimental Debug Info Sections
+  /// DwarfAccelNamesSection, DwarfAccelObjCSection,
+  /// DwarfAccelNamespaceSection, DwarfAccelTypesSection -
+  /// If we use the DWARF accelerated hash tables then we want to emit these
+  /// sections.
+  const MCSection *DwarfAccelNamesSection;
+  const MCSection *DwarfAccelObjCSection;
+  const MCSection *DwarfAccelNamespaceSection;
+  const MCSection *DwarfAccelTypesSection;
+
+  /// These are used for the Fission separate debug information files.
+  const MCSection *DwarfInfoDWOSection;
+  const MCSection *DwarfAbbrevDWOSection;
+  const MCSection *DwarfStrDWOSection;
+  const MCSection *DwarfLineDWOSection;
+  const MCSection *DwarfLocDWOSection;
+  const MCSection *DwarfStrOffDWOSection;
+  const MCSection *DwarfAddrSection;
 
   // Extra TLS Variable Data section.  If the target needs to put additional
   // information for a TLS variable, it'll go here.
@@ -195,22 +208,11 @@ public:
   const MCSection *getCompactUnwindSection() const{
     return CompactUnwindSection;
   }
-  const MCSection *getDwarfAccelNamesSection() const {
-    return DwarfAccelNamesSection;
-  }
-  const MCSection *getDwarfAccelObjCSection() const {
-    return DwarfAccelObjCSection;
-  }
-  const MCSection *getDwarfAccelNamespaceSection() const {
-    return DwarfAccelNamespaceSection;
-  }
-  const MCSection *getDwarfAccelTypesSection() const {
-    return DwarfAccelTypesSection;
-  }
   const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
   const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
   const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; }
+  const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;}
   const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;}
   const MCSection *getDwarfDebugInlineSection() const {
     return DwarfDebugInlineSection;
@@ -222,6 +224,42 @@ public:
   const MCSection *getDwarfMacroInfoSection() const {
     return DwarfMacroInfoSection;
   }
+
+  // DWARF5 Experimental Debug Info Sections
+  const MCSection *getDwarfAccelNamesSection() const {
+    return DwarfAccelNamesSection;
+  }
+  const MCSection *getDwarfAccelObjCSection() const {
+    return DwarfAccelObjCSection;
+  }
+  const MCSection *getDwarfAccelNamespaceSection() const {
+    return DwarfAccelNamespaceSection;
+  }
+  const MCSection *getDwarfAccelTypesSection() const {
+    return DwarfAccelTypesSection;
+  }
+  const MCSection *getDwarfInfoDWOSection() const {
+    return DwarfInfoDWOSection;
+  }
+  const MCSection *getDwarfAbbrevDWOSection() const {
+    return DwarfAbbrevDWOSection;
+  }
+  const MCSection *getDwarfStrDWOSection() const {
+    return DwarfStrDWOSection;
+  }
+  const MCSection *getDwarfLineDWOSection() const {
+    return DwarfLineDWOSection;
+  }
+  const MCSection *getDwarfLocDWOSection() const {
+    return DwarfLocDWOSection;
+  }
+  const MCSection *getDwarfStrOffDWOSection() const {
+    return DwarfStrOffDWOSection;
+  }
+  const MCSection *getDwarfAddrSection() const {
+    return DwarfAddrSection;
+  }
+
   const MCSection *getTLSExtraDataSection() const {
     return TLSExtraDataSection;
   }
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 08b00f1c478e..f06c49ff082a 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -38,13 +38,18 @@ class MCObjectStreamer : public MCStreamer {
   virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame);
 
 protected:
-  MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
+  MCObjectStreamer(StreamerKind Kind, MCContext &Context, MCAsmBackend &TAB,
                    raw_ostream &_OS, MCCodeEmitter *_Emitter);
-  MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
+  MCObjectStreamer(StreamerKind Kind, MCContext &Context, MCAsmBackend &TAB,
                    raw_ostream &_OS, MCCodeEmitter *_Emitter,
                    MCAssembler *_Assembler);
   ~MCObjectStreamer();
 
+public:
+  /// state management
+  virtual void reset();
+
+protected:
   MCSectionData *getCurrentSectionData() const {
     return CurSectionData;
   }
@@ -64,6 +69,8 @@ public:
   /// @{
 
   virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitDebugLabel(MCSymbol *Symbol);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
                              unsigned AddrSpace);
   virtual void EmitULEB128Value(const MCExpr *Value);
@@ -71,8 +78,15 @@ public:
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
   virtual void ChangeSection(const MCSection *Section);
   virtual void EmitInstruction(const MCInst &Inst);
+
+  /// \brief Emit an instruction to a special fragment, because this instruction
+  /// can change its size during relaxation.
   virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+  virtual void EmitBundleAlignMode(unsigned AlignPow2);
+  virtual void EmitBundleLock(bool AlignToEnd);
+  virtual void EmitBundleUnlock();
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace = 0);
   virtual void EmitValueToAlignment(unsigned ByteAlignment,
                                     int64_t Value = 0,
                                     unsigned ValueSize = 1,
@@ -89,10 +103,14 @@ public:
   virtual void EmitGPRel32Value(const MCExpr *Value);
   virtual void EmitGPRel64Value(const MCExpr *Value);
   virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
-                        unsigned AddrSpace);
+                        unsigned AddrSpace = 0);
   virtual void FinishImpl();
 
   /// @}
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() >= SK_ELFStreamer && S->getKind() <= SK_WinCOFFStreamer;
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 14fe75fd4c31..4939a3f1fb07 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -10,9 +10,10 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H
 
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 
 namespace llvm {
@@ -51,6 +52,9 @@ protected: // Can only create subclasses.
 public:
   virtual ~MCObjectWriter();
 
+  /// lifetime management
+  virtual void reset() { }
+
   bool isLittleEndian() const { return IsLittleEndian; }
 
   raw_ostream &getStream() { return OS; }
@@ -58,15 +62,15 @@ public:
   /// @name High-Level API
   /// @{
 
-  /// Perform any late binding of symbols (for example, to assign symbol indices
-  /// for use when generating relocations).
+  /// \brief Perform any late binding of symbols (for example, to assign symbol
+  /// indices for use when generating relocations).
   ///
   /// This routine is called by the assembler after layout and relaxation is
   /// complete.
   virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
                                         const MCAsmLayout &Layout) = 0;
 
-  /// Record a relocation entry.
+  /// \brief Record a relocation entry.
   ///
   /// This routine is called by the assembler after layout and relaxation, and
   /// post layout binding. The implementation is responsible for storing
@@ -96,8 +100,7 @@ public:
                                          bool InSet,
                                          bool IsPCRel) const;
 
-
-  /// Write the object file.
+  /// \brief Write the object file.
   ///
   /// This routine is called by the assembler after layout and relaxation is
   /// complete, fixups have been evaluated and applied, and relocations
@@ -173,7 +176,13 @@ public:
     OS << StringRef(Zeros, N % 16);
   }
 
+  void WriteBytes(const SmallVectorImpl<char> &ByteVec, unsigned ZeroFillSize = 0) {
+    WriteBytes(StringRef(ByteVec.data(), ByteVec.size()), ZeroFillSize);
+  }
+
   void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    // TODO: this version may need to go away once all fragment contents are
+    // converted to SmallVector<char, N>
     assert((ZeroFillSize == 0 || Str.size () <= ZeroFillSize) &&
       "data size greater than fill size, unexpected large write will occur");
     OS << Str;
diff --git a/include/llvm/MC/MCParser/AsmCond.h b/include/llvm/MC/MCParser/AsmCond.h
index 92a115eb8038..a918b5600ed5 100644
--- a/include/llvm/MC/MCParser/AsmCond.h
+++ b/include/llvm/MC/MCParser/AsmCond.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ASMCOND_H
-#define ASMCOND_H
+#ifndef LLVM_MC_MCPARSER_ASMCOND_H
+#define LLVM_MC_MCPARSER_ASMCOND_H
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index e102dfb82c4a..0dab31489fbb 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef ASMLEXER_H
-#define ASMLEXER_H
+#ifndef LLVM_MC_MCPARSER_ASMLEXER_H
+#define LLVM_MC_MCPARSER_ASMLEXER_H
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 0a961d6d0971..53b380f12f71 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MC_MCASMLEXER_H
-#define LLVM_MC_MCASMLEXER_H
+#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
+#define LLVM_MC_MCPARSER_MCASMLEXER_H
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
@@ -34,9 +34,6 @@ public:
     // Real values.
     Real,
 
-    // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
-    Register,
-
     // No-value.
     EndOfStatement,
     Colon,
@@ -104,13 +101,6 @@ public:
     assert(Kind == Integer && "This token isn't an integer!");
     return IntVal;
   }
-
-  /// getRegVal - Get the register number for the current token, which should
-  /// be a register.
-  unsigned getRegVal() const {
-    assert(Kind == Register && "This token isn't a register!");
-    return static_cast<unsigned>(IntVal);
-  }
 };
 
 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index a71d3c321741..d7e3902ac478 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -7,14 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MC_MCASMPARSER_H
-#define LLVM_MC_MCASMPARSER_H
+#ifndef LLVM_MC_MCPARSER_MCASMPARSER_H
+#define LLVM_MC_MCPARSER_MCASMPARSER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-class AsmToken;
 class MCAsmInfo;
 class MCAsmLexer;
 class MCAsmParserExtension;
@@ -22,13 +23,11 @@ class MCContext;
 class MCExpr;
 class MCInstPrinter;
 class MCInstrInfo;
-class MCParsedAsmOperand;
 class MCStreamer;
 class MCTargetAsmParser;
 class SMLoc;
 class SMRange;
 class SourceMgr;
-class StringRef;
 class Twine;
 
 /// MCAsmParserSemaCallback - Generic Sema callback for assembly parser.
@@ -36,16 +35,21 @@ class MCAsmParserSemaCallback {
 public:
   virtual ~MCAsmParserSemaCallback(); 
   virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc,
-                                          unsigned &Size) = 0;
+                                          unsigned &Length, unsigned &Size, 
+                                          unsigned &Type, bool &IsVarDecl) = 0;
+
   virtual bool LookupInlineAsmField(StringRef Base, StringRef Member,
                                     unsigned &Offset) = 0;
 };
 
+
 /// MCAsmParser - Generic assembler parser interface, for use by target specific
 /// assembly parsers.
 class MCAsmParser {
 public:
   typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc);
+  typedef std::pair<MCAsmParserExtension*, DirectiveHandler>
+    ExtensionDirectiveHandler;
 
 private:
   MCAsmParser(const MCAsmParser &) LLVM_DELETED_FUNCTION;
@@ -61,9 +65,8 @@ protected: // Can only create subclasses.
 public:
   virtual ~MCAsmParser();
 
-  virtual void AddDirectiveHandler(MCAsmParserExtension *Object,
-                                   StringRef Directive,
-                                   DirectiveHandler Handler) = 0;
+  virtual void addDirectiveHandler(StringRef Directive,
+                                   ExtensionDirectiveHandler Handler) = 0;
 
   virtual SourceMgr &getSourceManager() = 0;
 
@@ -89,8 +92,8 @@ public:
   virtual void setParsingInlineAsm(bool V) = 0;
   virtual bool isParsingInlineAsm() = 0;
 
-  /// ParseMSInlineAsm - Parse ms-style inline assembly.
-  virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+  /// parseMSInlineAsm - Parse ms-style inline assembly.
+  virtual bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
                                 unsigned &NumOutputs, unsigned &NumInputs,
                                 SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
                                 SmallVectorImpl<std::string> &Constraints,
@@ -123,42 +126,50 @@ public:
   bool TokError(const Twine &Msg,
                 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
 
-  /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+  /// parseIdentifier - Parse an identifier or string (as a quoted identifier)
   /// and set \p Res to the identifier contents.
-  virtual bool ParseIdentifier(StringRef &Res) = 0;
+  virtual bool parseIdentifier(StringRef &Res) = 0;
 
   /// \brief Parse up to the end of statement and return the contents from the
   /// current token until the end of the statement; the current token on exit
   /// will be either the EndOfStatement or EOF.
-  virtual StringRef ParseStringToEndOfStatement() = 0;
+  virtual StringRef parseStringToEndOfStatement() = 0;
+
+  /// parseEscapedString - Parse the current token as a string which may include
+  /// escaped characters and return the string contents.
+  virtual bool parseEscapedString(std::string &Data) = 0;
 
-  /// EatToEndOfStatement - Skip to the end of the current statement, for error
+  /// eatToEndOfStatement - Skip to the end of the current statement, for error
   /// recovery.
-  virtual void EatToEndOfStatement() = 0;
+  virtual void eatToEndOfStatement() = 0;
 
-  /// ParseExpression - Parse an arbitrary expression.
+  /// parseExpression - Parse an arbitrary expression.
   ///
   /// @param Res - The value of the expression. The result is undefined
   /// on error.
   /// @result - False on success.
-  virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
-  bool ParseExpression(const MCExpr *&Res);
+  virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+  bool parseExpression(const MCExpr *&Res);
 
-  /// ParseParenExpression - Parse an arbitrary expression, assuming that an
+  /// parseParenExpression - Parse an arbitrary expression, assuming that an
   /// initial '(' has already been consumed.
   ///
   /// @param Res - The value of the expression. The result is undefined
   /// on error.
   /// @result - False on success.
-  virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+  virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
 
-  /// ParseAbsoluteExpression - Parse an expression which must evaluate to an
+  /// parseAbsoluteExpression - Parse an expression which must evaluate to an
   /// absolute value.
   ///
   /// @param Res - The value of the absolute expression. The result is undefined
   /// on error.
   /// @result - False on success.
-  virtual bool ParseAbsoluteExpression(int64_t &Res) = 0;
+  virtual bool parseAbsoluteExpression(int64_t &Res) = 0;
+
+  /// checkForValidSection - Ensure that we have a valid section set in the
+  /// streamer. Otherwise, report an error and switch to .text.
+  virtual void checkForValidSection() = 0;
 };
 
 /// \brief Create an MCAsmParser instance.
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 0918c93bdf3d..2eda3a9a2143 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MC_MCASMPARSEREXTENSION_H
-#define LLVM_MC_MCASMPARSEREXTENSION_H
+#ifndef LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H
+#define LLVM_MC_MCPARSER_MCASMPARSEREXTENSION_H
 
-#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/Support/SMLoc.h"
 
 namespace llvm {
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 60e7887a5396..4650bf21be7e 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MC_MCASMOPERAND_H
-#define LLVM_MC_MCASMOPERAND_H
+#ifndef LLVM_MC_MCPARSER_MCPARSEDASMOPERAND_H
+#define LLVM_MC_MCPARSER_MCPARSEDASMOPERAND_H
 
 namespace llvm {
 class SMLoc;
@@ -57,18 +57,15 @@ public:
 
   /// isMem - Is this a memory operand?
   virtual bool isMem() const = 0;
-  virtual unsigned getMemSize() const { return 0; }
 
   /// getStartLoc - Get the location of the first token of this operand.
   virtual SMLoc getStartLoc() const = 0;
   /// getEndLoc - Get the location of the last token of this operand.
   virtual SMLoc getEndLoc() const = 0;
 
-  /// needAsmRewrite - AsmRewrites happen in both the target-independent and
-  /// target-dependent parsers.  The target-independent parser calls this
-  /// function to determine if the target-dependent parser has already taken
-  /// care of the rewrites.  Only valid when parsing MS-style inline assembly.
-  virtual bool needAsmRewrite() const { return true; }
+  /// needAddressOf - Do we need to emit code to get the address of the
+  /// variable/label?   Only valid when parsing MS-style inline assembly.
+  virtual bool needAddressOf() const { return false; }
 
   /// isOffsetOf - Do we need to emit code to get the offset of the variable,
   /// rather then the value of the variable?   Only valid when parsing MS-style
@@ -78,10 +75,6 @@ public:
   /// getOffsetOfLoc - Get the location of the offset operator.
   virtual SMLoc getOffsetOfLoc() const { return SMLoc(); }
 
-  /// needSizeDirective - Do we need to emit a sizing directive for this
-  /// operand?  Only valid when parsing MS-style inline assembly.
-  virtual bool needSizeDirective() const { return false; }
-
   /// print - Print a debug representation of the operand to the given stream.
   virtual void print(raw_ostream &OS) const = 0;
   /// dump - Print to the debug stream.
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index f05baeaaf689..f5b4dddc5198 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -22,11 +22,15 @@
 
 namespace llvm {
 
+/// An unsigned integer type large enough to represent all physical registers,
+/// but not necessarily virtual registers.
+typedef uint16_t MCPhysReg;
+
 /// MCRegisterClass - Base class of TargetRegisterClass.
 class MCRegisterClass {
 public:
-  typedef const uint16_t* iterator;
-  typedef const uint16_t* const_iterator;
+  typedef const MCPhysReg* iterator;
+  typedef const MCPhysReg* const_iterator;
 
   const char *Name;
   const iterator RegsBegin;
@@ -148,11 +152,12 @@ private:
   const MCRegisterDesc *Desc;                 // Pointer to the descriptor array
   unsigned NumRegs;                           // Number of entries in the array
   unsigned RAReg;                             // Return address register
+  unsigned PCReg;                             // Program counter register
   const MCRegisterClass *Classes;             // Pointer to the regclass array
   unsigned NumClasses;                        // Number of entries in the array
   unsigned NumRegUnits;                       // Number of regunits.
   const uint16_t (*RegUnitRoots)[2];          // Pointer to regunit root table.
-  const uint16_t *DiffLists;                  // Pointer to the difflists array
+  const MCPhysReg *DiffLists;                 // Pointer to the difflists array
   const char *RegStrings;                     // Pointer to the string table.
   const uint16_t *SubRegIndices;              // Pointer to the subreg lookup
                                               // array.
@@ -177,7 +182,7 @@ public:
   /// defined below.
   class DiffListIterator {
     uint16_t Val;
-    const uint16_t *List;
+    const MCPhysReg *List;
 
   protected:
     /// Create an invalid iterator. Call init() to point to something useful.
@@ -186,7 +191,7 @@ public:
     /// init - Point the iterator to InitVal, decoding subsequent values from
     /// DiffList. The iterator will initially point to InitVal, sub-classes are
     /// responsible for skipping the seed value if it is not part of the list.
-    void init(uint16_t InitVal, const uint16_t *DiffList) {
+    void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
       Val = InitVal;
       List = DiffList;
     }
@@ -196,7 +201,7 @@ public:
     /// is the caller's responsibility (by checking for a 0 return value).
     unsigned advance() {
       assert(isValid() && "Cannot move off the end of the list.");
-      uint16_t D = *List++;
+      MCPhysReg D = *List++;
       Val += D;
       return D;
     }
@@ -225,13 +230,14 @@ public:
   friend class MCRegUnitIterator;
   friend class MCRegUnitRootIterator;
 
-  /// InitMCRegisterInfo - Initialize MCRegisterInfo, called by TableGen
+  /// \brief Initialize MCRegisterInfo, called by TableGen
   /// auto-generated routines. *DO NOT USE*.
   void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
+                          unsigned PC,
                           const MCRegisterClass *C, unsigned NC,
                           const uint16_t (*RURoots)[2],
                           unsigned NRU,
-                          const uint16_t *DL,
+                          const MCPhysReg *DL,
                           const char *Strings,
                           const uint16_t *SubIndices,
                           unsigned NumIndices,
@@ -239,6 +245,7 @@ public:
     Desc = D;
     NumRegs = NR;
     RAReg = RA;
+    PCReg = PC;
     Classes = C;
     DiffLists = DL;
     RegStrings = Strings;
@@ -250,7 +257,7 @@ public:
     RegEncodingTable = RET;
   }
 
-  /// mapLLVMRegsToDwarfRegs - Used to initialize LLVM register to Dwarf
+  /// \brief Used to initialize LLVM register to Dwarf
   /// register number mapping. Called by TableGen auto-generated routines.
   /// *DO NOT USE*.
   void mapLLVMRegsToDwarfRegs(const DwarfLLVMRegPair *Map, unsigned Size,
@@ -264,7 +271,7 @@ public:
     }
   }
 
-  /// mapDwarfRegsToLLVMRegs - Used to initialize Dwarf register to LLVM
+  /// \brief Used to initialize Dwarf register to LLVM
   /// register number mapping. Called by TableGen auto-generated routines.
   /// *DO NOT USE*.
   void mapDwarfRegsToLLVMRegs(const DwarfLLVMRegPair *Map, unsigned Size,
@@ -287,77 +294,80 @@ public:
     L2SEHRegs[LLVMReg] = SEHReg;
   }
 
-  /// getRARegister - This method should return the register where the return
+  /// \brief This method should return the register where the return
   /// address can be found.
   unsigned getRARegister() const {
     return RAReg;
   }
 
+  /// Return the register which is the program counter.
+  unsigned getProgramCounter() const {
+    return PCReg;
+  }
+
   const MCRegisterDesc &operator[](unsigned RegNo) const {
     assert(RegNo < NumRegs &&
            "Attempting to access record for invalid register number!");
     return Desc[RegNo];
   }
 
-  /// Provide a get method, equivalent to [], but more useful if we have a
+  /// \brief Provide a get method, equivalent to [], but more useful with a
   /// pointer to this object.
-  ///
   const MCRegisterDesc &get(unsigned RegNo) const {
     return operator[](RegNo);
   }
 
-  /// getSubReg - Returns the physical register number of sub-register "Index"
+  /// \brief Returns the physical register number of sub-register "Index"
   /// for physical register RegNo. Return zero if the sub-register does not
   /// exist.
   unsigned getSubReg(unsigned Reg, unsigned Idx) const;
 
-  /// getMatchingSuperReg - Return a super-register of the specified register
+  /// \brief Return a super-register of the specified register
   /// Reg so its sub-register of index SubIdx is Reg.
   unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
                                const MCRegisterClass *RC) const;
 
-  /// getSubRegIndex - For a given register pair, return the sub-register index
+  /// \brief For a given register pair, return the sub-register index
   /// if the second register is a sub-register of the first. Return zero
   /// otherwise.
   unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const;
 
-  /// getName - Return the human-readable symbolic target-specific name for the
+  /// \brief Return the human-readable symbolic target-specific name for the
   /// specified physical register.
   const char *getName(unsigned RegNo) const {
     return RegStrings + get(RegNo).Name;
   }
 
-  /// getNumRegs - Return the number of registers this target has (useful for
+  /// \brief Return the number of registers this target has (useful for
   /// sizing arrays holding per register information)
   unsigned getNumRegs() const {
     return NumRegs;
   }
 
-  /// getNumSubRegIndices - Return the number of sub-register indices
+  /// \brief Return the number of sub-register indices
   /// understood by the target. Index 0 is reserved for the no-op sub-register,
   /// while 1 to getNumSubRegIndices() - 1 represent real sub-registers.
   unsigned getNumSubRegIndices() const {
     return NumSubRegIndices;
   }
 
-  /// getNumRegUnits - Return the number of (native) register units in the
+  /// \brief Return the number of (native) register units in the
   /// target. Register units are numbered from 0 to getNumRegUnits() - 1. They
   /// can be accessed through MCRegUnitIterator defined below.
   unsigned getNumRegUnits() const {
     return NumRegUnits;
   }
 
-  /// getDwarfRegNum - Map a target register to an equivalent dwarf register
+  /// \brief Map a target register to an equivalent dwarf register
   /// number.  Returns -1 if there is no equivalent value.  The second
   /// parameter allows targets to use different numberings for EH info and
   /// debugging info.
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
 
-  /// getLLVMRegNum - Map a dwarf register back to a target register.
-  ///
+  /// \brief Map a dwarf register back to a target register.
   int getLLVMRegNum(unsigned RegNum, bool isEH) const;
 
-  /// getSEHRegNum - Map a target register to an equivalent SEH register
+  /// \brief Map a target register to an equivalent SEH register
   /// number.  Returns LLVM register number if there is no equivalent value.
   int getSEHRegNum(unsigned RegNum) const;
 
@@ -368,20 +378,39 @@ public:
     return (unsigned)(regclass_end()-regclass_begin());
   }
 
-  /// getRegClass - Returns the register class associated with the enumeration
+  /// \brief Returns the register class associated with the enumeration
   /// value.  See class MCOperandInfo.
   const MCRegisterClass& getRegClass(unsigned i) const {
     assert(i < getNumRegClasses() && "Register Class ID out of range");
     return Classes[i];
   }
 
-   /// getEncodingValue - Returns the encoding for RegNo
+   /// \brief Returns the encoding for RegNo
   uint16_t getEncodingValue(unsigned RegNo) const {
     assert(RegNo < NumRegs &&
            "Attempting to get encoding for invalid register number!");
     return RegEncodingTable[RegNo];
   }
 
+  /// \brief Returns true if RegB is a sub-register of RegA.
+  bool isSubRegister(unsigned RegA, unsigned RegB) const {
+    return isSuperRegister(RegB, RegA);
+  }
+
+  /// \brief Returns true if RegB is a super-register of RegA.
+  bool isSuperRegister(unsigned RegA, unsigned RegB) const;
+
+  /// \brief Returns true if RegB is a sub-register of RegA or if RegB == RegA.
+  bool isSubRegisterEq(unsigned RegA, unsigned RegB) const {
+    return isSuperRegisterEq(RegB, RegA);
+  }
+
+  /// \brief Returns true if RegB is a super-register of RegA or if
+  /// RegB == RegA.
+  bool isSuperRegisterEq(unsigned RegA, unsigned RegB) const {
+    return RegA == RegB || isSuperRegister(RegA, RegB);
+  }
+
 };
 
 //===----------------------------------------------------------------------===//
@@ -422,6 +451,15 @@ public:
   }
 };
 
+// Definition for isSuperRegister. Put it down here since it needs the
+// iterator defined above in addition to the MCRegisterInfo class itself.
+inline bool MCRegisterInfo::isSuperRegister(unsigned RegA, unsigned RegB) const{
+  for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I)
+    if (*I == RegB)
+      return true;
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 //                               Register Units
 //===----------------------------------------------------------------------===//
@@ -441,6 +479,7 @@ public:
   /// MCRegUnitIterator - Create an iterator that traverses the register units
   /// in Reg.
   MCRegUnitIterator(unsigned Reg, const MCRegisterInfo *MCRI) {
+    assert(Reg && "Null register has no regunits");
     // Decode the RegUnits MCRegisterDesc field.
     unsigned RU = MCRI->get(Reg).RegUnits;
     unsigned Scale = RU & 15;
@@ -480,17 +519,17 @@ public:
     Reg1 = MCRI->RegUnitRoots[RegUnit][1];
   }
 
-  /// Dereference to get the current root register.
+  /// \brief Dereference to get the current root register.
   unsigned operator*() const {
     return Reg0;
   }
 
-  /// isValid - Check if the iterator is at the end of the list.
+  /// \brief Check if the iterator is at the end of the list.
   bool isValid() const {
     return Reg0;
   }
 
-  /// Preincrement to move to the next root register.
+  /// \brief Preincrement to move to the next root register.
   void operator++() {
     assert(isValid() && "Cannot move off the end of the list.");
     Reg0 = Reg1;
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 0c71ee513500..defa29903543 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MC_MCSCHEDMODEL_H
-#define LLVM_MC_MCSCHEDMODEL_H
+#ifndef LLVM_MC_MCSCHEDULE_H
+#define LLVM_MC_MCSCHEDULE_H
 
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
@@ -155,7 +155,7 @@ public:
   //      Optional InstrItinerary OperandCycles provides expected latency.
   //      TODO: can't yet specify both min and expected latency per operand.
   int MinLatency;
-  static const unsigned DefaultMinLatency = -1;
+  static const int DefaultMinLatency = -1;
 
   // LoadLatency is the expected latency of load instructions.
   //
@@ -172,6 +172,16 @@ public:
   unsigned HighLatency;
   static const unsigned DefaultHighLatency = 10;
 
+  // ILPWindow is the number of cycles that the scheduler effectively ignores
+  // before attempting to hide latency. This should be zero for in-order cpus to
+  // always hide expected latency. For out-of-order cpus, it may be tweaked as
+  // desired to roughly approximate instruction buffers. The actual threshold is
+  // not very important for an OOO processor, as long as it isn't too high. A
+  // nonzero value helps avoid rescheduling to hide latency when its is fairly
+  // obviously useless and makes register pressure heuristics more effective.
+  unsigned ILPWindow;
+  static const unsigned DefaultILPWindow = 0;
+
   // MispredictPenalty is the typical number of extra cycles the processor
   // takes to recover from a branch misprediction.
   unsigned MispredictPenalty;
@@ -196,6 +206,7 @@ public:
                   MinLatency(DefaultMinLatency),
                   LoadLatency(DefaultLoadLatency),
                   HighLatency(DefaultHighLatency),
+                  ILPWindow(DefaultILPWindow),
                   MispredictPenalty(DefaultMispredictPenalty),
                   ProcID(0), ProcResourceTable(0), SchedClassTable(0),
                   NumProcResourceKinds(0), NumSchedClasses(0),
@@ -205,12 +216,12 @@ public:
   }
 
   // Table-gen driven ctor.
-  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned mp,
-               unsigned pi, const MCProcResourceDesc *pr,
+  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp,
+               unsigned mp, unsigned pi, const MCProcResourceDesc *pr,
                const MCSchedClassDesc *sc, unsigned npr, unsigned nsc,
                const InstrItinerary *ii):
     IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl),
-    MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
+    ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
     SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc),
     InstrItineraries(ii) {}
 
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 21fdb6bd39b8..e5754249e91b 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_MC_MCSECTION_H
 #define LLVM_MC_MCSECTION_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Compiler.h"
 
@@ -49,6 +50,11 @@ namespace llvm {
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
                                       raw_ostream &OS) const = 0;
 
+    // Convenience routines to get label names for the beginning/end of a
+    // section.
+    virtual std::string getLabelBeginName() const = 0;
+    virtual std::string getLabelEndName() const = 0;
+
     /// isBaseAddressKnownZero - Return true if we know that this section will
     /// get a base address of zero.  In cases where we know that this is true we
     /// can emit section offsets as direct references to avoid a subtraction
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index b050c0f442b6..07c47144cbdc 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_MC_MCSECTIONCOFF_H
 #define LLVM_MC_MCSECTIONCOFF_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/Support/COFF.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
@@ -50,6 +50,12 @@ namespace llvm {
     bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
 
     StringRef getSectionName() const { return SectionName; }
+    virtual std::string getLabelBeginName() const {
+      return SectionName.str() + "_begin";
+    }
+    virtual std::string getLabelEndName() const {
+      return SectionName.str() + "_end";
+    }
     unsigned getCharacteristics() const { return Characteristics; }
     int getSelection () const { return Selection; }
 
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 4d54465760d4..4b8b849c79ed 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -14,9 +14,11 @@
 #ifndef LLVM_MC_MCSECTIONELF_H
 #define LLVM_MC_MCSECTIONELF_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
@@ -57,6 +59,11 @@ public:
   bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
 
   StringRef getSectionName() const { return SectionName; }
+  virtual std::string getLabelBeginName() const {
+    return SectionName.str() + "_begin"; }
+  virtual std::string getLabelEndName() const {
+    return SectionName.str() + "_end";
+  }
   unsigned getType() const { return Type; }
   unsigned getFlags() const { return Flags; }
   unsigned getEntrySize() const { return EntrySize; }
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 71ea8f3e901d..898f5714907f 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCSECTIONMACHO_H
 #define LLVM_MC_MCSECTIONMACHO_H
 
-#include "llvm/MC/MCSection.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCSection.h"
 
 namespace llvm {
 
@@ -145,6 +145,14 @@ public:
     return StringRef(SectionName);
   }
 
+  virtual std::string getLabelBeginName() const {
+    return StringRef(getSegmentName().str() + getSectionName().str() + "_begin");
+  }
+
+  virtual std::string getLabelEndName() const {
+    return StringRef(getSegmentName().str() + getSectionName().str() + "_end");
+  }
+
   unsigned getTypeAndAttributes() const { return TypeAndAttributes; }
   unsigned getStubSize() const { return Reserved2; }
 
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 230d27ef2ef0..a069a2b0cafa 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -14,12 +14,14 @@
 #ifndef LLVM_MC_MCSTREAMER_H
 #define LLVM_MC_MCSTREAMER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCWin64EH.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
 
 namespace llvm {
   class MCAsmBackend;
@@ -45,6 +47,23 @@ namespace llvm {
   /// a .s file, and implementations that write out .o files of various formats.
   ///
   class MCStreamer {
+  public:
+    enum StreamerKind {
+      SK_AsmStreamer,
+      SK_NullStreamer,
+      SK_RecordStreamer,
+
+      // MCObjectStreamer subclasses.
+      SK_ELFStreamer,
+      SK_ARMELFStreamer,
+      SK_MachOStreamer,
+      SK_PureStreamer,
+      SK_MipsELFStreamer,
+      SK_WinCOFFStreamer
+    };
+
+  private:
+    const StreamerKind Kind;
     MCContext &Context;
 
     MCStreamer(const MCStreamer&) LLVM_DELETED_FUNCTION;
@@ -55,6 +74,7 @@ namespace llvm {
 
     std::vector<MCDwarfFrameInfo> FrameInfos;
     MCDwarfFrameInfo *getCurrentFrameInfo();
+    MCSymbol *EmitCFICommon();
     void EnsureValidFrame();
 
     std::vector<MCWin64EHUnwindInfo *> W64UnwindInfos;
@@ -69,8 +89,10 @@ namespace llvm {
     SmallVector<std::pair<const MCSection *,
                 const MCSection *>, 4> SectionStack;
 
+    bool AutoInitSections;
+
   protected:
-    MCStreamer(MCContext &Ctx);
+    MCStreamer(StreamerKind Kind, MCContext &Ctx);
 
     const MCExpr *BuildSymbolDiff(MCContext &Context, const MCSymbol *A,
                                   const MCSymbol *B);
@@ -89,6 +111,12 @@ namespace llvm {
   public:
     virtual ~MCStreamer();
 
+    StreamerKind getKind() const { return Kind; }
+
+    /// State management
+    ///
+    virtual void reset();
+
     MCContext &getContext() const { return Context; }
 
     unsigned getNumFrameInfos() {
@@ -213,9 +241,23 @@ namespace llvm {
         SectionStack.back().first = Section;
     }
 
+    /// Initialize the streamer.
+    void InitStreamer() {
+      if (AutoInitSections)
+        InitSections();
+    }
+
+    /// Tell this MCStreamer to call InitSections upon initialization.
+    void setAutoInitSections(bool AutoInitSections) {
+      this->AutoInitSections = AutoInitSections;
+    }
+
     /// InitSections - Create the default sections and set the initial one.
     virtual void InitSections() = 0;
 
+    /// InitToTextSection - Create a text section and switch the streamer to it.
+    virtual void InitToTextSection() = 0;
+
     /// EmitLabel - Emit a label for @p Symbol into the current section.
     ///
     /// This corresponds to an assembler statement such as:
@@ -226,12 +268,18 @@ namespace llvm {
     /// used in an assignment.
     virtual void EmitLabel(MCSymbol *Symbol);
 
+    virtual void EmitDebugLabel(MCSymbol *Symbol);
+
     virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
                                      MCSymbol *EHSymbol);
 
     /// EmitAssemblerFlag - Note in the output the specified @p Flag.
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) = 0;
 
+    /// EmitLinkerOptions - Emit the given list @p Options of strings as linker
+    /// options into the output.
+    virtual void EmitLinkerOptions(ArrayRef<std::string> Kind) {}
+
     /// EmitDataRegion - Note in the output the specified region @p Kind.
     virtual void EmitDataRegion(MCDataRegionType Kind) {}
 
@@ -239,6 +287,9 @@ namespace llvm {
     /// a Thumb mode function (ARM target only).
     virtual void EmitThumbFunc(MCSymbol *Func) = 0;
 
+    /// getOrCreateSymbolData - Get symbol data for given symbol.
+    virtual MCSymbolData &getOrCreateSymbolData(MCSymbol *Symbol);
+
     /// EmitAssignment - Emit an assignment of @p Value to @p Symbol.
     ///
     /// This corresponds to an assembler statement such as:
@@ -346,7 +397,7 @@ namespace llvm {
     ///
     /// This is used to implement assembler directives such as .byte, .ascii,
     /// etc.
-    virtual void EmitBytes(StringRef Data, unsigned AddrSpace) = 0;
+    virtual void EmitBytes(StringRef Data, unsigned AddrSpace = 0) = 0;
 
     /// EmitValue - Emit the expression @p Value into the output as a native
     /// integer of the given @p Size bytes.
@@ -380,8 +431,8 @@ namespace llvm {
 
     /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
     /// client having to pass in a MCExpr for constant integers.
-    void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0,
-                             unsigned Padding = 0);
+    void EmitULEB128IntValue(uint64_t Value, unsigned Padding = 0,
+                             unsigned AddrSpace = 0);
 
     /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
     /// client having to pass in a MCExpr for constant integers.
@@ -409,15 +460,14 @@ namespace llvm {
     /// EmitFill - Emit NumBytes bytes worth of the value specified by
     /// FillValue.  This implements directives such as '.space'.
     virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
-                          unsigned AddrSpace);
+                          unsigned AddrSpace = 0);
 
     /// EmitZeros - Emit NumBytes worth of zeros.  This is a convenience
     /// function that just wraps EmitFill.
-    void EmitZeros(uint64_t NumBytes, unsigned AddrSpace) {
+    void EmitZeros(uint64_t NumBytes, unsigned AddrSpace = 0) {
       EmitFill(NumBytes, 0, AddrSpace);
     }
 
-
     /// EmitValueToAlignment - Emit some number of copies of @p Value until
     /// the byte alignment @p ByteAlignment is reached.
     ///
@@ -475,7 +525,7 @@ namespace llvm {
     /// file number.  This implements the DWARF2 '.file 4 "foo.c"' assembler
     /// directive.
     virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                        StringRef Filename);
+                                        StringRef Filename, unsigned CUID = 0);
 
     /// EmitDwarfLocDirective - This implements the DWARF2
     // '.loc fileno lineno ...' assembler directive.
@@ -515,6 +565,8 @@ namespace llvm {
     virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
     virtual void EmitCFIEscape(StringRef Values);
     virtual void EmitCFISignalFrame();
+    virtual void EmitCFIUndefined(int64_t Register);
+    virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
 
     virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
     virtual void EmitWin64EHEndProc();
@@ -535,6 +587,20 @@ namespace llvm {
     /// section.
     virtual void EmitInstruction(const MCInst &Inst) = 0;
 
+    /// \brief Set the bundle alignment mode from now on in the section.
+    /// The argument is the power of 2 to which the alignment is set. The
+    /// value 0 means turn the bundle alignment off.
+    virtual void EmitBundleAlignMode(unsigned AlignPow2) = 0;
+
+    /// \brief The following instructions are a bundle-locked group.
+    ///
+    /// \param AlignToEnd - If true, the bundle-locked group will be aligned to
+    ///                     the end of a bundle.
+    virtual void EmitBundleLock(bool AlignToEnd) = 0;
+
+    /// \brief Ends a bundle-locked group.
+    virtual void EmitBundleUnlock() = 0;
+
     /// EmitRawText - If this file is backed by a assembly streamer, this dumps
     /// the specified string in the output .s file.  This capability is
     /// indicated by the hasRawTextSupport() predicate.  By default this aborts.
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index 69213cd77d92..346fb2df0ffc 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCSUBTARGET_H
 #define LLVM_MC_MCSUBTARGET_H
 
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/MC/MCTargetAsmLexer.h b/include/llvm/MC/MCTargetAsmLexer.h
deleted file mode 100644
index b1cc546e1efa..000000000000
--- a/include/llvm/MC/MCTargetAsmLexer.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===-- llvm/MC/MCTargetAsmLexer.h - Target Assembly Lexer ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCTARGETASMLEXER_H
-#define LLVM_MC_MCTARGETASMLEXER_H
-
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-
-namespace llvm {
-class Target;
-
-/// MCTargetAsmLexer - Generic interface to target specific assembly lexers.
-class MCTargetAsmLexer {
-  /// The current token
-  AsmToken CurTok;
-
-  /// The location and description of the current error
-  SMLoc ErrLoc;
-  std::string Err;
-
-  MCTargetAsmLexer(const MCTargetAsmLexer &) LLVM_DELETED_FUNCTION;
-  void operator=(const MCTargetAsmLexer &) LLVM_DELETED_FUNCTION;
-protected: // Can only create subclasses.
-  MCTargetAsmLexer(const Target &);
-
-  virtual AsmToken LexToken() = 0;
-
-  void SetError(const SMLoc &errLoc, const std::string &err) {
-    ErrLoc = errLoc;
-    Err = err;
-  }
-
-  /// TheTarget - The Target that this machine was created for.
-  const Target &TheTarget;
-  MCAsmLexer *Lexer;
-
-public:
-  virtual ~MCTargetAsmLexer();
-
-  const Target &getTarget() const { return TheTarget; }
-
-  /// InstallLexer - Set the lexer to get tokens from lower-level lexer \p L.
-  void InstallLexer(MCAsmLexer &L) {
-    Lexer = &L;
-  }
-
-  MCAsmLexer *getLexer() {
-    return Lexer;
-  }
-
-  /// Lex - Consume the next token from the input stream and return it.
-  const AsmToken &Lex() {
-    return CurTok = LexToken();
-  }
-
-  /// getTok - Get the current (last) lexed token.
-  const AsmToken &getTok() {
-    return CurTok;
-  }
-
-  /// getErrLoc - Get the current error location
-  const SMLoc &getErrLoc() {
-    return ErrLoc;
-  }
-
-  /// getErr - Get the current error string
-  const std::string &getErr() {
-    return Err;
-  }
-
-  /// getKind - Get the kind of current token.
-  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
-
-  /// is - Check if the current token has kind \p K.
-  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
-
-  /// isNot - Check if the current token has kind \p K.
-  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 483a80b3b595..4c5b17612569 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -22,6 +22,7 @@ class MCInst;
 template <typename T> class SmallVectorImpl;
 
 enum AsmRewriteKind {
+  AOK_Align,          // Rewrite align as .align.
   AOK_DotOperator,    // Rewrite a dot operator expression as an immediate.
                       // E.g., [eax].foo.bar -> [eax].8
   AOK_Emit,           // Rewrite _emit as .byte.
@@ -142,6 +143,15 @@ public:
                           MCStreamer &Out, unsigned &ErrorInfo,
                           bool MatchingInlineAsm) = 0;
 
+  /// Allow a target to add special case operand matching for things that
+  /// tblgen doesn't/can't handle effectively. For example, literal
+  /// immediates on ARM. TableGen expects a token operand, but the parser
+  /// will recognize them as immediates.
+  virtual unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+                                              unsigned Kind) {
+    return Match_InvalidOperand;
+  }
+
   /// checkTargetMatchPredicate - Validate the instruction match against
   /// any complex target predicates not expressible via match classes.
   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index f9af8bcfbf61..a4e730111484 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCVALUE_H
 #define LLVM_MC_MCVALUE_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index 7a0b1ffaf0a0..11df5749d450 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -11,6 +11,9 @@
 #define LLVM_MC_MCWINCOFFOBJECTWRITER_H
 
 namespace llvm {
+  class MCObjectWriter;
+  class raw_ostream;
+
   class MCWinCOFFObjectTargetWriter {
     const unsigned Machine;
 
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 57f0518cbf3a..37ae03b45ca2 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -18,9 +18,9 @@
 #ifndef LLVM_MC_SUBTARGETFEATURE_H
 #define LLVM_MC_SUBTARGETFEATURE_H
 
-#include <vector>
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/DataTypes.h"
+#include <vector>
 
 namespace llvm {
   class raw_ostream;
diff --git a/include/llvm/MDBuilder.h b/include/llvm/MDBuilder.h
deleted file mode 100644
index 1867a639236e..000000000000
--- a/include/llvm/MDBuilder.h
+++ /dev/null
@@ -1,162 +0,0 @@
-//===---- llvm/MDBuilder.h - Builder for LLVM metadata ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MDBuilder class, which is used as a convenient way to
-// create LLVM metadata with a consistent and simplified interface.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MDBUILDER_H
-#define LLVM_MDBUILDER_H
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/APInt.h"
-
-namespace llvm {
-
-  class MDBuilder {
-    LLVMContext &Context;
-
-  public:
-    MDBuilder(LLVMContext &context) : Context(context) {}
-
-    /// \brief Return the given string as metadata.
-    MDString *createString(StringRef Str) {
-      return MDString::get(Context, Str);
-    }
-
-    //===------------------------------------------------------------------===//
-    // FPMath metadata.
-    //===------------------------------------------------------------------===//
-
-    /// \brief Return metadata with the given settings.  The special value 0.0
-    /// for the Accuracy parameter indicates the default (maximal precision)
-    /// setting.
-    MDNode *createFPMath(float Accuracy) {
-      if (Accuracy == 0.0)
-        return 0;
-      assert(Accuracy > 0.0 && "Invalid fpmath accuracy!");
-      Value *Op = ConstantFP::get(Type::getFloatTy(Context), Accuracy);
-      return MDNode::get(Context, Op);
-    }
-
-    //===------------------------------------------------------------------===//
-    // Prof metadata.
-    //===------------------------------------------------------------------===//
-
-    /// \brief Return metadata containing two branch weights.
-    MDNode *createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight) {
-      uint32_t Weights[] = { TrueWeight, FalseWeight };
-      return createBranchWeights(Weights);
-    }
-
-    /// \brief Return metadata containing a number of branch weights.
-    MDNode *createBranchWeights(ArrayRef<uint32_t> Weights) {
-      assert(Weights.size() >= 2 && "Need at least two branch weights!");
-
-      SmallVector<Value *, 4> Vals(Weights.size()+1);
-      Vals[0] = createString("branch_weights");
-
-      Type *Int32Ty = Type::getInt32Ty(Context);
-      for (unsigned i = 0, e = Weights.size(); i != e; ++i)
-        Vals[i+1] = ConstantInt::get(Int32Ty, Weights[i]);
-
-      return MDNode::get(Context, Vals);
-    }
-
-    //===------------------------------------------------------------------===//
-    // Range metadata.
-    //===------------------------------------------------------------------===//
-
-    /// \brief Return metadata describing the range [Lo, Hi).
-    MDNode *createRange(const APInt &Lo, const APInt &Hi) {
-      assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!");
-      // If the range is everything then it is useless.
-      if (Hi == Lo)
-        return 0;
-
-      // Return the range [Lo, Hi).
-      Type *Ty = IntegerType::get(Context, Lo.getBitWidth());
-      Value *Range[2] = { ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi) };
-      return MDNode::get(Context, Range);
-    }
-
-
-    //===------------------------------------------------------------------===//
-    // TBAA metadata.
-    //===------------------------------------------------------------------===//
-
-    /// \brief Return metadata appropriate for a TBAA root node.  Each returned
-    /// node is distinct from all other metadata and will never be identified
-    /// (uniqued) with anything else.
-    MDNode *createAnonymousTBAARoot() {
-      // To ensure uniqueness the root node is self-referential.
-      MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef<Value*>());
-      MDNode *Root = MDNode::get(Context, Dummy);
-      // At this point we have
-      //   !0 = metadata !{}            <- dummy
-      //   !1 = metadata !{metadata !0} <- root
-      // Replace the dummy operand with the root node itself and delete the dummy.
-      Root->replaceOperandWith(0, Root);
-      MDNode::deleteTemporary(Dummy);
-      // We now have
-      //   !1 = metadata !{metadata !1} <- self-referential root
-      return Root;
-    }
-
-    /// \brief Return metadata appropriate for a TBAA root node with the given
-    /// name.  This may be identified (uniqued) with other roots with the same
-    /// name.
-    MDNode *createTBAARoot(StringRef Name) {
-      return MDNode::get(Context, createString(Name));
-    }
-
-    /// \brief Return metadata for a non-root TBAA node with the given name,
-    /// parent in the TBAA tree, and value for 'pointsToConstantMemory'.
-    MDNode *createTBAANode(StringRef Name, MDNode *Parent,
-                           bool isConstant = false) {
-      if (isConstant) {
-        Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1);
-        Value *Ops[3] = { createString(Name), Parent, Flags };
-        return MDNode::get(Context, Ops);
-      } else {
-        Value *Ops[2] = { createString(Name), Parent };
-        return MDNode::get(Context, Ops);
-      }
-    }
-
-    struct TBAAStructField {
-      uint64_t Offset;
-      uint64_t Size;
-      MDNode *TBAA;
-      TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *TBAA) :
-        Offset(Offset), Size(Size), TBAA(TBAA) {}
-    };
-
-    /// \brief Return metadata for a tbaa.struct node with the given
-    /// struct field descriptions.
-    MDNode *createTBAAStructNode(ArrayRef<TBAAStructField> Fields) {
-      SmallVector<Value *, 4> Vals(Fields.size() * 3);
-      Type *Int64 = IntegerType::get(Context, 64);
-      for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
-        Vals[i * 3 + 0] = ConstantInt::get(Int64, Fields[i].Offset);
-        Vals[i * 3 + 1] = ConstantInt::get(Int64, Fields[i].Size);
-        Vals[i * 3 + 2] = Fields[i].TBAA;
-      }
-      return MDNode::get(Context, Vals);
-    }
-
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
deleted file mode 100644
index 0fbbb959888b..000000000000
--- a/include/llvm/Metadata.h
+++ /dev/null
@@ -1,242 +0,0 @@
-//===-- llvm/Metadata.h - Metadata definitions ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// @file
-/// This file contains the declarations for metadata subclasses.
-/// They represent the different flavors of metadata that live in LLVM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_METADATA_H
-#define LLVM_METADATA_H
-
-#include "llvm/Value.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/ilist_node.h"
-
-namespace llvm {
-class Constant;
-class Instruction;
-class LLVMContext;
-class Module;
-template <typename T> class SmallVectorImpl;
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
-  
-  
-//===----------------------------------------------------------------------===//
-/// MDString - a single uniqued string.
-/// These are used to efficiently contain a byte sequence for metadata.
-/// MDString is always unnamed.
-class MDString : public Value {
-  virtual void anchor();
-  MDString(const MDString &) LLVM_DELETED_FUNCTION;
-
-  explicit MDString(LLVMContext &C);
-public:
-  static MDString *get(LLVMContext &Context, StringRef Str);
-  static MDString *get(LLVMContext &Context, const char *Str) {
-    return get(Context, Str ? StringRef(Str) : StringRef());
-  }
-
-  StringRef getString() const { return getName(); }
-
-  unsigned getLength() const { return (unsigned)getName().size(); }
-
-  typedef StringRef::iterator iterator;
-  
-  /// begin() - Pointer to the first byte of the string.
-  iterator begin() const { return getName().begin(); }
-
-  /// end() - Pointer to one byte past the end of the string.
-  iterator end() const { return getName().end(); }
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == MDStringVal;
-  }
-};
-
-  
-class MDNodeOperand;
-  
-//===----------------------------------------------------------------------===//
-/// MDNode - a tuple of other values.
-class MDNode : public Value, public FoldingSetNode {
-  MDNode(const MDNode &) LLVM_DELETED_FUNCTION;
-  void operator=(const MDNode &) LLVM_DELETED_FUNCTION;
-  friend class MDNodeOperand;
-  friend class LLVMContextImpl;
-  friend struct FoldingSetTrait<MDNode>;
-
-  /// Hash - If the MDNode is uniqued cache the hash to speed up lookup.
-  unsigned Hash;
-
-  /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the
-  /// end of this MDNode.
-  unsigned NumOperands;
-  
-  // Subclass data enums.
-  enum {
-    /// FunctionLocalBit - This bit is set if this MDNode is function local.
-    /// This is true when it (potentially transitively) contains a reference to
-    /// something in a function, like an argument, basicblock, or instruction.
-    FunctionLocalBit = 1 << 0,
-    
-    /// NotUniquedBit - This is set on MDNodes that are not uniqued because they
-    /// have a null operand.
-    NotUniquedBit    = 1 << 1,
-    
-    /// DestroyFlag - This bit is set by destroy() so the destructor can assert
-    /// that the node isn't being destroyed with a plain 'delete'.
-    DestroyFlag      = 1 << 2
-  };
-  
-  // FunctionLocal enums.
-  enum FunctionLocalness {
-    FL_Unknown = -1,
-    FL_No = 0,
-    FL_Yes = 1
-  };
-  
-  /// replaceOperand - Replace each instance of F from the operand list of this 
-  /// node with T.
-  void replaceOperand(MDNodeOperand *Op, Value *NewVal);
-  ~MDNode();
-
-  MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal);
-  
-  static MDNode *getMDNode(LLVMContext &C, ArrayRef<Value*> Vals,
-                           FunctionLocalness FL, bool Insert = true);
-public:
-  // Constructors and destructors.
-  static MDNode *get(LLVMContext &Context, ArrayRef<Value*> Vals);
-  // getWhenValsUnresolved - Construct MDNode determining function-localness
-  // from isFunctionLocal argument, not by analyzing Vals.
-  static MDNode *getWhenValsUnresolved(LLVMContext &Context,
-                                       ArrayRef<Value*> Vals,
-                                       bool isFunctionLocal);
-                                       
-  static MDNode *getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals);
-
-  /// getTemporary - Return a temporary MDNode, for use in constructing
-  /// cyclic MDNode structures. A temporary MDNode is not uniqued,
-  /// may be RAUW'd, and must be manually deleted with deleteTemporary.
-  static MDNode *getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals);
-
-  /// deleteTemporary - Deallocate a node created by getTemporary. The
-  /// node must not have any users.
-  static void deleteTemporary(MDNode *N);
-
-  /// replaceOperandWith - Replace a specific operand.
-  void replaceOperandWith(unsigned i, Value *NewVal);
-  
-  /// getOperand - Return specified operand.
-  Value *getOperand(unsigned i) const;
-  
-  /// getNumOperands - Return number of MDNode operands.
-  unsigned getNumOperands() const { return NumOperands; }
-  
-  /// isFunctionLocal - Return whether MDNode is local to a function.
-  bool isFunctionLocal() const {
-    return (getSubclassDataFromValue() & FunctionLocalBit) != 0;
-  }
-  
-  // getFunction - If this metadata is function-local and recursively has a
-  // function-local operand, return the first such operand's parent function.
-  // Otherwise, return null. getFunction() should not be used for performance-
-  // critical code because it recursively visits all the MDNode's operands.  
-  const Function *getFunction() const;
-
-  /// Profile - calculate a unique identifier for this MDNode to collapse
-  /// duplicates
-  void Profile(FoldingSetNodeID &ID) const;
-
-  /// Methods for support type inquiry through isa, cast, and dyn_cast:
-  static bool classof(const Value *V) {
-    return V->getValueID() == MDNodeVal;
-  }
-
-  /// Methods for metadata merging.
-  static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B);
-  static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);
-  static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
-private:
-  // destroy - Delete this node.  Only when there are no uses.
-  void destroy();
-
-  bool isNotUniqued() const { 
-    return (getSubclassDataFromValue() & NotUniquedBit) != 0;
-  }
-  void setIsNotUniqued();
-  
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // any future subclasses cannot accidentally use it.
-  void setValueSubclassData(unsigned short D) {
-    Value::setValueSubclassData(D);
-  }
-};
-
-//===----------------------------------------------------------------------===//
-/// NamedMDNode - a tuple of MDNodes. Despite its name, a NamedMDNode isn't
-/// itself an MDNode. NamedMDNodes belong to modules, have names, and contain
-/// lists of MDNodes.
-class NamedMDNode : public ilist_node<NamedMDNode> {
-  friend class SymbolTableListTraits<NamedMDNode, Module>;
-  friend struct ilist_traits<NamedMDNode>;
-  friend class LLVMContextImpl;
-  friend class Module;
-  NamedMDNode(const NamedMDNode &) LLVM_DELETED_FUNCTION;
-
-  std::string Name;
-  Module *Parent;
-  void *Operands; // SmallVector<TrackingVH<MDNode>, 4>
-
-  void setParent(Module *M) { Parent = M; }
-
-  explicit NamedMDNode(const Twine &N);
-
-public:
-  /// eraseFromParent - Drop all references and remove the node from parent
-  /// module.
-  void eraseFromParent();
-
-  /// dropAllReferences - Remove all uses and clear node vector.
-  void dropAllReferences();
-
-  /// ~NamedMDNode - Destroy NamedMDNode.
-  ~NamedMDNode();
-
-  /// getParent - Get the module that holds this named metadata collection.
-  inline Module *getParent() { return Parent; }
-  inline const Module *getParent() const { return Parent; }
-
-  /// getOperand - Return specified operand.
-  MDNode *getOperand(unsigned i) const;
-  
-  /// getNumOperands - Return the number of NamedMDNode operands.
-  unsigned getNumOperands() const;
-
-  /// addOperand - Add metadata operand.
-  void addOperand(MDNode *M);
-
-  /// getName - Return a constant reference to this named metadata's name.
-  StringRef getName() const;
-
-  /// print - Implement operator<< on NamedMDNode.
-  void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const;
-
-  /// dump() - Allow printing of NamedMDNodes from the debugger.
-  void dump() const;
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
deleted file mode 100644
index e6303ac7752d..000000000000
--- a/include/llvm/Module.h
+++ /dev/null
@@ -1,605 +0,0 @@
-//===-- llvm/Module.h - C++ class to represent a VM module ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// @file
-/// Module.h This file contains the declarations for the Module class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MODULE_H
-#define LLVM_MODULE_H
-
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/DataTypes.h"
-#include <vector>
-
-namespace llvm {
-
-class FunctionType;
-class GVMaterializer;
-class LLVMContext;
-class StructType;
-template<typename T> struct DenseMapInfo;
-template<typename KeyT, typename ValueT, typename KeyInfoT> class DenseMap;
-
-template<> struct ilist_traits<Function>
-  : public SymbolTableListTraits<Function, Module> {
-
-  // createSentinel is used to get hold of the node that marks the end of the
-  // list... (same trick used here as in ilist_traits<Instruction>)
-  Function *createSentinel() const {
-    return static_cast<Function*>(&Sentinel);
-  }
-  static void destroySentinel(Function*) {}
-
-  Function *provideInitialHead() const { return createSentinel(); }
-  Function *ensureHead(Function*) const { return createSentinel(); }
-  static void noteHead(Function*, Function*) {}
-
-private:
-  mutable ilist_node<Function> Sentinel;
-};
-
-template<> struct ilist_traits<GlobalVariable>
-  : public SymbolTableListTraits<GlobalVariable, Module> {
-  // createSentinel is used to create a node that marks the end of the list.
-  GlobalVariable *createSentinel() const {
-    return static_cast<GlobalVariable*>(&Sentinel);
-  }
-  static void destroySentinel(GlobalVariable*) {}
-
-  GlobalVariable *provideInitialHead() const { return createSentinel(); }
-  GlobalVariable *ensureHead(GlobalVariable*) const { return createSentinel(); }
-  static void noteHead(GlobalVariable*, GlobalVariable*) {}
-private:
-  mutable ilist_node<GlobalVariable> Sentinel;
-};
-
-template<> struct ilist_traits<GlobalAlias>
-  : public SymbolTableListTraits<GlobalAlias, Module> {
-  // createSentinel is used to create a node that marks the end of the list.
-  GlobalAlias *createSentinel() const {
-    return static_cast<GlobalAlias*>(&Sentinel);
-  }
-  static void destroySentinel(GlobalAlias*) {}
-
-  GlobalAlias *provideInitialHead() const { return createSentinel(); }
-  GlobalAlias *ensureHead(GlobalAlias*) const { return createSentinel(); }
-  static void noteHead(GlobalAlias*, GlobalAlias*) {}
-private:
-  mutable ilist_node<GlobalAlias> Sentinel;
-};
-
-template<> struct ilist_traits<NamedMDNode>
-  : public ilist_default_traits<NamedMDNode> {
-  // createSentinel is used to get hold of a node that marks the end of
-  // the list...
-  NamedMDNode *createSentinel() const {
-    return static_cast<NamedMDNode*>(&Sentinel);
-  }
-  static void destroySentinel(NamedMDNode*) {}
-
-  NamedMDNode *provideInitialHead() const { return createSentinel(); }
-  NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
-  static void noteHead(NamedMDNode*, NamedMDNode*) {}
-  void addNodeToList(NamedMDNode *) {}
-  void removeNodeFromList(NamedMDNode *) {}
-private:
-  mutable ilist_node<NamedMDNode> Sentinel;
-};
-
-/// A Module instance is used to store all the information related to an
-/// LLVM module. Modules are the top level container of all other LLVM
-/// Intermediate Representation (IR) objects. Each module directly contains a
-/// list of globals variables, a list of functions, a list of libraries (or
-/// other modules) this module depends on, a symbol table, and various data
-/// about the target's characteristics.
-///
-/// A module maintains a GlobalValRefMap object that is used to hold all
-/// constant references to global variables in the module.  When a global
-/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
-/// @brief The main container class for the LLVM Intermediate Representation.
-class Module {
-/// @name Types And Enumerations
-/// @{
-public:
-  /// The type for the list of global variables.
-  typedef iplist<GlobalVariable> GlobalListType;
-  /// The type for the list of functions.
-  typedef iplist<Function> FunctionListType;
-  /// The type for the list of aliases.
-  typedef iplist<GlobalAlias> AliasListType;
-  /// The type for the list of named metadata.
-  typedef ilist<NamedMDNode> NamedMDListType;
-
-  /// The type for the list of dependent libraries.
-  typedef std::vector<std::string> LibraryListType;
-
-  /// The Global Variable iterator.
-  typedef GlobalListType::iterator                      global_iterator;
-  /// The Global Variable constant iterator.
-  typedef GlobalListType::const_iterator          const_global_iterator;
-
-  /// The Function iterators.
-  typedef FunctionListType::iterator                           iterator;
-  /// The Function constant iterator
-  typedef FunctionListType::const_iterator               const_iterator;
-
-  /// The Global Alias iterators.
-  typedef AliasListType::iterator                        alias_iterator;
-  /// The Global Alias constant iterator
-  typedef AliasListType::const_iterator            const_alias_iterator;
-
-  /// The named metadata iterators.
-  typedef NamedMDListType::iterator             named_metadata_iterator;
-  /// The named metadata constant interators.
-  typedef NamedMDListType::const_iterator const_named_metadata_iterator;
-  /// The Library list iterator.
-  typedef LibraryListType::const_iterator lib_iterator;
-
-  /// An enumeration for describing the endianess of the target machine.
-  enum Endianness  { AnyEndianness, LittleEndian, BigEndian };
-
-  /// An enumeration for describing the size of a pointer on the target machine.
-  enum PointerSize { AnyPointerSize, Pointer32, Pointer64 };
-
-  /// An enumeration for the supported behaviors of module flags. The following
-  /// module flags behavior values are supported:
-  ///
-  ///    Value        Behavior
-  ///    -----        --------
-  ///      1          Error
-  ///                   Emits an error if two values disagree.
-  ///
-  ///      2          Warning
-  ///                   Emits a warning if two values disagree.
-  ///
-  ///      3          Require
-  ///                   Emits an error when the specified value is not present
-  ///                   or doesn't have the specified value. It is an error for
-  ///                   two (or more) llvm.module.flags with the same ID to have
-  ///                   the Require behavior but different values. There may be
-  ///                   multiple Require flags per ID.
-  ///
-  ///      4          Override
-  ///                   Uses the specified value if the two values disagree. It
-  ///                   is an error for two (or more) llvm.module.flags with the
-  ///                   same ID to have the Override behavior but different
-  ///                   values.
-  enum ModFlagBehavior { Error = 1, Warning  = 2, Require = 3, Override = 4 };
-
-  struct ModuleFlagEntry {
-    ModFlagBehavior Behavior;
-    MDString *Key;
-    Value *Val;
-    ModuleFlagEntry(ModFlagBehavior B, MDString *K, Value *V)
-      : Behavior(B), Key(K), Val(V) {}
-  };
-
-/// @}
-/// @name Member Variables
-/// @{
-private:
-  LLVMContext &Context;           ///< The LLVMContext from which types and
-                                  ///< constants are allocated.
-  GlobalListType GlobalList;      ///< The Global Variables in the module
-  FunctionListType FunctionList;  ///< The Functions in the module
-  AliasListType AliasList;        ///< The Aliases in the module
-  LibraryListType LibraryList;    ///< The Libraries needed by the module
-  NamedMDListType NamedMDList;    ///< The named metadata in the module
-  std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
-  ValueSymbolTable *ValSymTab;    ///< Symbol table for values
-  OwningPtr<GVMaterializer> Materializer;  ///< Used to materialize GlobalValues
-  std::string ModuleID;           ///< Human readable identifier for the module
-  std::string TargetTriple;       ///< Platform target triple Module compiled on
-  std::string DataLayout;         ///< Target data description
-  void *NamedMDSymTab;            ///< NamedMDNode names.
-
-  friend class Constant;
-
-/// @}
-/// @name Constructors
-/// @{
-public:
-  /// The Module constructor. Note that there is no default constructor. You
-  /// must provide a name for the module upon construction.
-  explicit Module(StringRef ModuleID, LLVMContext& C);
-  /// The module destructor. This will dropAllReferences.
-  ~Module();
-
-/// @}
-/// @name Module Level Accessors
-/// @{
-
-  /// Get the module identifier which is, essentially, the name of the module.
-  /// @returns the module identifier as a string
-  const std::string &getModuleIdentifier() const { return ModuleID; }
-
-  /// Get the data layout string for the module's target platform.  This encodes
-  /// the type sizes and alignments expected by this module.
-  /// @returns the data layout as a string
-  const std::string &getDataLayout() const { return DataLayout; }
-
-  /// Get the target triple which is a string describing the target host.
-  /// @returns a string containing the target triple.
-  const std::string &getTargetTriple() const { return TargetTriple; }
-
-  /// Get the target endian information.
-  /// @returns Endianess - an enumeration for the endianess of the target
-  Endianness getEndianness() const;
-
-  /// Get the target pointer size.
-  /// @returns PointerSize - an enumeration for the size of the target's pointer
-  PointerSize getPointerSize() const;
-
-  /// Get the global data context.
-  /// @returns LLVMContext - a container for LLVM's global information
-  LLVMContext &getContext() const { return Context; }
-
-  /// Get any module-scope inline assembly blocks.
-  /// @returns a string containing the module-scope inline assembly blocks.
-  const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }
-
-/// @}
-/// @name Module Level Mutators
-/// @{
-
-  /// Set the module identifier.
-  void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
-
-  /// Set the data layout
-  void setDataLayout(StringRef DL) { DataLayout = DL; }
-
-  /// Set the target triple.
-  void setTargetTriple(StringRef T) { TargetTriple = T; }
-
-  /// Set the module-scope inline assembly blocks.
-  void setModuleInlineAsm(StringRef Asm) {
-    GlobalScopeAsm = Asm;
-    if (!GlobalScopeAsm.empty() &&
-        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
-      GlobalScopeAsm += '\n';
-  }
-
-  /// Append to the module-scope inline assembly blocks, automatically inserting
-  /// a separating newline if necessary.
-  void appendModuleInlineAsm(StringRef Asm) {
-    GlobalScopeAsm += Asm;
-    if (!GlobalScopeAsm.empty() &&
-        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
-      GlobalScopeAsm += '\n';
-  }
-
-/// @}
-/// @name Generic Value Accessors
-/// @{
-
-  /// getNamedValue - Return the global value in the module with
-  /// the specified name, of arbitrary type.  This method returns null
-  /// if a global with the specified name is not found.
-  GlobalValue *getNamedValue(StringRef Name) const;
-
-  /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
-  /// This ID is uniqued across modules in the current LLVMContext.
-  unsigned getMDKindID(StringRef Name) const;
-
-  /// getMDKindNames - Populate client supplied SmallVector with the name for
-  /// custom metadata IDs registered in this LLVMContext.
-  void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
-
-  
-  typedef DenseMap<StructType*, unsigned, DenseMapInfo<StructType*> >
-                   NumeredTypesMapTy;
-
-  /// getTypeByName - Return the type with the specified name, or null if there
-  /// is none by that name.
-  StructType *getTypeByName(StringRef Name) const;
-
-/// @}
-/// @name Function Accessors
-/// @{
-
-  /// getOrInsertFunction - Look up the specified function in the module symbol
-  /// table.  Four possibilities:
-  ///   1. If it does not exist, add a prototype for the function and return it.
-  ///   2. If it exists, and has a local linkage, the existing function is
-  ///      renamed and a new one is inserted.
-  ///   3. Otherwise, if the existing function has the correct prototype, return
-  ///      the existing function.
-  ///   4. Finally, the function exists but has the wrong prototype: return the
-  ///      function with a constantexpr cast to the right prototype.
-  Constant *getOrInsertFunction(StringRef Name, FunctionType *T,
-                                AttrListPtr AttributeList);
-
-  Constant *getOrInsertFunction(StringRef Name, FunctionType *T);
-
-  /// getOrInsertFunction - Look up the specified function in the module symbol
-  /// table.  If it does not exist, add a prototype for the function and return
-  /// it.  This function guarantees to return a constant of pointer to the
-  /// specified function type or a ConstantExpr BitCast of that type if the
-  /// named function has a different type.  This version of the method takes a
-  /// null terminated list of function arguments, which makes it easier for
-  /// clients to use.
-  Constant *getOrInsertFunction(StringRef Name,
-                                AttrListPtr AttributeList,
-                                Type *RetTy, ...)  END_WITH_NULL;
-
-  /// getOrInsertFunction - Same as above, but without the attributes.
-  Constant *getOrInsertFunction(StringRef Name, Type *RetTy, ...)
-    END_WITH_NULL;
-
-  Constant *getOrInsertTargetIntrinsic(StringRef Name,
-                                       FunctionType *Ty,
-                                       AttrListPtr AttributeList);
-
-  /// getFunction - Look up the specified function in the module symbol table.
-  /// If it does not exist, return null.
-  Function *getFunction(StringRef Name) const;
-
-/// @}
-/// @name Global Variable Accessors
-/// @{
-
-  /// getGlobalVariable - Look up the specified global variable in the module
-  /// symbol table.  If it does not exist, return null. If AllowInternal is set
-  /// to true, this function will return types that have InternalLinkage. By
-  /// default, these types are not returned.
-  GlobalVariable *getGlobalVariable(StringRef Name,
-                                    bool AllowInternal = false) const;
-
-  /// getNamedGlobal - Return the global variable in the module with the
-  /// specified name, of arbitrary type.  This method returns null if a global
-  /// with the specified name is not found.
-  GlobalVariable *getNamedGlobal(StringRef Name) const {
-    return getGlobalVariable(Name, true);
-  }
-
-  /// getOrInsertGlobal - Look up the specified global in the module symbol
-  /// table.
-  ///   1. If it does not exist, add a declaration of the global and return it.
-  ///   2. Else, the global exists but has the wrong type: return the function
-  ///      with a constantexpr cast to the right type.
-  ///   3. Finally, if the existing global is the correct declaration, return
-  ///      the existing global.
-  Constant *getOrInsertGlobal(StringRef Name, Type *Ty);
-
-/// @}
-/// @name Global Alias Accessors
-/// @{
-
-  /// getNamedAlias - Return the global alias in the module with the
-  /// specified name, of arbitrary type.  This method returns null if a global
-  /// with the specified name is not found.
-  GlobalAlias *getNamedAlias(StringRef Name) const;
-
-/// @}
-/// @name Named Metadata Accessors
-/// @{
-
-  /// getNamedMetadata - Return the NamedMDNode in the module with the
-  /// specified name. This method returns null if a NamedMDNode with the
-  /// specified name is not found.
-  NamedMDNode *getNamedMetadata(const Twine &Name) const;
-
-  /// getOrInsertNamedMetadata - Return the named MDNode in the module
-  /// with the specified name. This method returns a new NamedMDNode if a
-  /// NamedMDNode with the specified name is not found.
-  NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
-
-  /// eraseNamedMetadata - Remove the given NamedMDNode from this module
-  /// and delete it.
-  void eraseNamedMetadata(NamedMDNode *NMD);
-
-/// @}
-/// @name Module Flags Accessors
-/// @{
-
-  /// getModuleFlagsMetadata - Returns the module flags in the provided vector.
-  void getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const;
-
-  /// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
-  /// represents module-level flags. This method returns null if there are no
-  /// module-level flags.
-  NamedMDNode *getModuleFlagsMetadata() const;
-
-  /// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module
-  /// that represents module-level flags. If module-level flags aren't found,
-  /// it creates the named metadata that contains them.
-  NamedMDNode *getOrInsertModuleFlagsMetadata();
-
-  /// addModuleFlag - Add a module-level flag to the module-level flags
-  /// metadata. It will create the module-level flags named metadata if it
-  /// doesn't already exist.
-  void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Value *Val);
-  void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val);
-  void addModuleFlag(MDNode *Node);
-
-/// @}
-/// @name Materialization
-/// @{
-
-  /// setMaterializer - Sets the GVMaterializer to GVM.  This module must not
-  /// yet have a Materializer.  To reset the materializer for a module that
-  /// already has one, call MaterializeAllPermanently first.  Destroying this
-  /// module will destroy its materializer without materializing any more
-  /// GlobalValues.  Without destroying the Module, there is no way to detach or
-  /// destroy a materializer without materializing all the GVs it controls, to
-  /// avoid leaving orphan unmaterialized GVs.
-  void setMaterializer(GVMaterializer *GVM);
-  /// getMaterializer - Retrieves the GVMaterializer, if any, for this Module.
-  GVMaterializer *getMaterializer() const { return Materializer.get(); }
-
-  /// isMaterializable - True if the definition of GV has yet to be materialized
-  /// from the GVMaterializer.
-  bool isMaterializable(const GlobalValue *GV) const;
-  /// isDematerializable - Returns true if this GV was loaded from this Module's
-  /// GVMaterializer and the GVMaterializer knows how to dematerialize the GV.
-  bool isDematerializable(const GlobalValue *GV) const;
-
-  /// Materialize - Make sure the GlobalValue is fully read.  If the module is
-  /// corrupt, this returns true and fills in the optional string with
-  /// information about the problem.  If successful, this returns false.
-  bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
-  /// Dematerialize - If the GlobalValue is read in, and if the GVMaterializer
-  /// supports it, release the memory for the function, and set it up to be
-  /// materialized lazily.  If !isDematerializable(), this method is a noop.
-  void Dematerialize(GlobalValue *GV);
-
-  /// MaterializeAll - Make sure all GlobalValues in this Module are fully read.
-  /// If the module is corrupt, this returns true and fills in the optional
-  /// string with information about the problem.  If successful, this returns
-  /// false.
-  bool MaterializeAll(std::string *ErrInfo = 0);
-
-  /// MaterializeAllPermanently - Make sure all GlobalValues in this Module are
-  /// fully read and clear the Materializer.  If the module is corrupt, this
-  /// returns true, fills in the optional string with information about the
-  /// problem, and DOES NOT clear the old Materializer.  If successful, this
-  /// returns false.
-  bool MaterializeAllPermanently(std::string *ErrInfo = 0);
-
-/// @}
-/// @name Direct access to the globals list, functions list, and symbol table
-/// @{
-
-  /// Get the Module's list of global variables (constant).
-  const GlobalListType   &getGlobalList() const       { return GlobalList; }
-  /// Get the Module's list of global variables.
-  GlobalListType         &getGlobalList()             { return GlobalList; }
-  static iplist<GlobalVariable> Module::*getSublistAccess(GlobalVariable*) {
-    return &Module::GlobalList;
-  }
-  /// Get the Module's list of functions (constant).
-  const FunctionListType &getFunctionList() const     { return FunctionList; }
-  /// Get the Module's list of functions.
-  FunctionListType       &getFunctionList()           { return FunctionList; }
-  static iplist<Function> Module::*getSublistAccess(Function*) {
-    return &Module::FunctionList;
-  }
-  /// Get the Module's list of aliases (constant).
-  const AliasListType    &getAliasList() const        { return AliasList; }
-  /// Get the Module's list of aliases.
-  AliasListType          &getAliasList()              { return AliasList; }
-  static iplist<GlobalAlias> Module::*getSublistAccess(GlobalAlias*) {
-    return &Module::AliasList;
-  }
-  /// Get the Module's list of named metadata (constant).
-  const NamedMDListType  &getNamedMDList() const      { return NamedMDList; }
-  /// Get the Module's list of named metadata.
-  NamedMDListType        &getNamedMDList()            { return NamedMDList; }
-  static ilist<NamedMDNode> Module::*getSublistAccess(NamedMDNode*) {
-    return &Module::NamedMDList;
-  }
-  /// Get the symbol table of global variable and function identifiers
-  const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
-  /// Get the Module's symbol table of global variable and function identifiers.
-  ValueSymbolTable       &getValueSymbolTable()       { return *ValSymTab; }
-
-/// @}
-/// @name Global Variable Iteration
-/// @{
-
-  global_iterator       global_begin()       { return GlobalList.begin(); }
-  const_global_iterator global_begin() const { return GlobalList.begin(); }
-  global_iterator       global_end  ()       { return GlobalList.end(); }
-  const_global_iterator global_end  () const { return GlobalList.end(); }
-  bool                  global_empty() const { return GlobalList.empty(); }
-
-/// @}
-/// @name Function Iteration
-/// @{
-
-  iterator                begin()       { return FunctionList.begin(); }
-  const_iterator          begin() const { return FunctionList.begin(); }
-  iterator                end  ()       { return FunctionList.end();   }
-  const_iterator          end  () const { return FunctionList.end();   }
-  size_t                  size() const  { return FunctionList.size(); }
-  bool                    empty() const { return FunctionList.empty(); }
-
-/// @}
-/// @name Dependent Library Iteration
-/// @{
-
-  /// @brief Get a constant iterator to beginning of dependent library list.
-  inline lib_iterator lib_begin() const { return LibraryList.begin(); }
-  /// @brief Get a constant iterator to end of dependent library list.
-  inline lib_iterator lib_end()   const { return LibraryList.end();   }
-  /// @brief Returns the number of items in the list of libraries.
-  inline size_t       lib_size()  const { return LibraryList.size();  }
-  /// @brief Add a library to the list of dependent libraries
-  void addLibrary(StringRef Lib);
-  /// @brief Remove a library from the list of dependent libraries
-  void removeLibrary(StringRef Lib);
-  /// @brief Get all the libraries
-  inline const LibraryListType& getLibraries() const { return LibraryList; }
-
-/// @}
-/// @name Alias Iteration
-/// @{
-
-  alias_iterator       alias_begin()            { return AliasList.begin(); }
-  const_alias_iterator alias_begin() const      { return AliasList.begin(); }
-  alias_iterator       alias_end  ()            { return AliasList.end();   }
-  const_alias_iterator alias_end  () const      { return AliasList.end();   }
-  size_t               alias_size () const      { return AliasList.size();  }
-  bool                 alias_empty() const      { return AliasList.empty(); }
-
-
-/// @}
-/// @name Named Metadata Iteration
-/// @{
-
-  named_metadata_iterator named_metadata_begin() { return NamedMDList.begin(); }
-  const_named_metadata_iterator named_metadata_begin() const {
-    return NamedMDList.begin();
-  }
-
-  named_metadata_iterator named_metadata_end() { return NamedMDList.end(); }
-  const_named_metadata_iterator named_metadata_end() const {
-    return NamedMDList.end();
-  }
-
-  size_t named_metadata_size() const { return NamedMDList.size();  }
-  bool named_metadata_empty() const { return NamedMDList.empty(); }
-
-
-/// @}
-/// @name Utility functions for printing and dumping Module objects
-/// @{
-
-  /// Print the module to an output stream with an optional
-  /// AssemblyAnnotationWriter.
-  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const;
-
-  /// Dump the module to stderr (for debugging).
-  void dump() const;
-  
-  /// This function causes all the subinstructions to "let go" of all references
-  /// that they are maintaining.  This allows one to 'delete' a whole class at
-  /// a time, even though there may be circular references... first all
-  /// references are dropped, and all use counts go to zero.  Then everything
-  /// is delete'd for real.  Note that no operations are valid on an object
-  /// that has "dropped all references", except operator delete.
-  void dropAllReferences();
-/// @}
-};
-
-/// An raw_ostream inserter for modules.
-inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
-  M.print(O, 0);
-  return O;
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index f3d824960c2f..e2478f6754b0 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -14,22 +14,78 @@
 #ifndef LLVM_OBJECT_ARCHIVE_H
 #define LLVM_OBJECT_ARCHIVE_H
 
-#include "llvm/Object/Binary.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Object/Binary.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace object {
+struct ArchiveMemberHeader {
+  char Name[16];
+  char LastModified[12];
+  char UID[6];
+  char GID[6];
+  char AccessMode[8];
+  char Size[10]; ///< Size of data, not including header or padding.
+  char Terminator[2];
+
+  ///! Get the name without looking up long names.
+  llvm::StringRef getName() const {
+    char EndCond;
+    if (Name[0] == '/' || Name[0] == '#')
+      EndCond = ' ';
+    else
+      EndCond = '/';
+    llvm::StringRef::size_type end =
+        llvm::StringRef(Name, sizeof(Name)).find(EndCond);
+    if (end == llvm::StringRef::npos)
+      end = sizeof(Name);
+    assert(end <= sizeof(Name) && end > 0);
+    // Don't include the EndCond if there is one.
+    return llvm::StringRef(Name, end);
+  }
+
+  uint64_t getSize() const {
+    uint64_t ret;
+    if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, ret))
+      llvm_unreachable("Size is not an integer.");
+    return ret;
+  }
+};
+
+static const ArchiveMemberHeader *ToHeader(const char *base) {
+  return reinterpret_cast<const ArchiveMemberHeader *>(base);
+}
 
 class Archive : public Binary {
   virtual void anchor();
 public:
   class Child {
     const Archive *Parent;
+    /// \brief Includes header but not padding byte.
     StringRef Data;
+    /// \brief Offset from Data to the start of the file.
+    uint16_t StartOfFile;
 
   public:
-    Child(const Archive *p, StringRef d) : Parent(p), Data(d) {}
+    Child(const Archive *p, StringRef d) : Parent(p), Data(d) {
+      if (!p || d.empty())
+        return;
+      // Setup StartOfFile and PaddingBytes.
+      StartOfFile = sizeof(ArchiveMemberHeader);
+      // Don't include attached name.
+      StringRef Name = ToHeader(Data.data())->getName();
+      if (Name.startswith("#1/")) {
+        uint64_t NameSize;
+        if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
+          llvm_unreachable("Long name length is not an integer");
+        StartOfFile += NameSize;
+      }
+    }
 
     bool operator ==(const Child &other) const {
       return (Parent == other.Parent) && (Data.begin() == other.Data.begin());
@@ -39,16 +95,48 @@ public:
       return Data.begin() < other.Data.begin();
     }
 
-    Child getNext() const;
+    Child getNext() const {
+      size_t SpaceToSkip = Data.size();
+      // If it's odd, add 1 to make it even.
+      if (SpaceToSkip & 1)
+        ++SpaceToSkip;
+
+      const char *NextLoc = Data.data() + SpaceToSkip;
+
+      // Check to see if this is past the end of the archive.
+      if (NextLoc >= Parent->Data->getBufferEnd())
+        return Child(Parent, StringRef(0, 0));
+
+      size_t NextSize =
+          sizeof(ArchiveMemberHeader) + ToHeader(NextLoc)->getSize();
+
+      return Child(Parent, StringRef(NextLoc, NextSize));
+    }
+
     error_code getName(StringRef &Result) const;
     int getLastModified() const;
     int getUID() const;
     int getGID() const;
     int getAccessMode() const;
-    ///! Return the size of the archive member without the header or padding.
-    uint64_t getSize() const;
+    /// \return the size of the archive member without the header or padding.
+    uint64_t getSize() const { return Data.size() - StartOfFile; }
+
+    StringRef getBuffer() const {
+      return StringRef(Data.data() + StartOfFile, getSize());
+    }
+
+    error_code getMemoryBuffer(OwningPtr<MemoryBuffer> &Result,
+                               bool FullPath = false) const {
+      StringRef Name;
+      if (error_code ec = getName(Name))
+        return ec;
+      SmallString<128> Path;
+      Result.reset(MemoryBuffer::getMemBuffer(
+          getBuffer(), FullPath ? (Twine(Parent->getFileName()) + "(" + Name +
+                                   ")").toStringRef(Path) : Name, false));
+      return error_code::success();
+    }
 
-    MemoryBuffer *getBuffer() const;
     error_code getAsBinary(OwningPtr<Binary> &Result) const;
   };
 
@@ -122,6 +210,16 @@ public:
 
   Archive(MemoryBuffer *source, error_code &ec);
 
+  enum Kind {
+    K_GNU,
+    K_BSD,
+    K_COFF
+  };
+
+  Kind kind() const { 
+    return Format;
+  }
+
   child_iterator begin_children(bool skip_internal = true) const;
   child_iterator end_children() const;
 
@@ -133,9 +231,13 @@ public:
     return v->isArchive();
   }
 
+  // check if a symbol is in the archive
+  child_iterator findSym(StringRef name) const;
+
 private:
   child_iterator SymbolTable;
   child_iterator StringTable;
+  Kind Format;
 };
 
 }
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index d555de3accc2..8bbcd8b4d4c6 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -49,8 +49,8 @@ protected:
     ID_EndObjects
   };
 
-  static inline unsigned int getELFType(bool isLittleEndian, bool is64Bits) {
-    if (isLittleEndian)
+  static inline unsigned int getELFType(bool isLE, bool is64Bits) {
+    if (isLE)
       return is64Bits ? ID_ELF64L : ID_ELF32L;
     else
       return is64Bits ? ID_ELF64B : ID_ELF32B;
@@ -85,6 +85,10 @@ public:
   bool isCOFF() const {
     return TypeID == ID_COFF;
   }
+
+  bool isLittleEndian() const {
+    return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B);
+  }
 };
 
 /// @brief Create a Binary from Source, autodetecting the file type.
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 466de93a78b2..8ea5e46e09e8 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_OBJECT_ELF_H
 #define LLVM_OBJECT_ELF_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ELF.h"
@@ -33,6 +33,21 @@
 namespace llvm {
 namespace object {
 
+using support::endianness;
+
+template<endianness target_endianness, std::size_t max_alignment, bool is64Bits>
+struct ELFType {
+  static const endianness TargetEndianness = target_endianness;
+  static const std::size_t MaxAlignment = max_alignment;
+  static const bool Is64Bits = is64Bits;
+};
+
+template<typename T, int max_align>
+struct MaximumAlignment {
+  enum {value = AlignOf<T>::Alignment > max_align ? max_align
+                                                  : AlignOf<T>::Alignment};
+};
+
 // Subclasses of ELFObjectFile may need this for template instantiation
 inline std::pair<unsigned char, unsigned char>
 getElfArchType(MemoryBuffer *Object) {
@@ -43,69 +58,78 @@ getElfArchType(MemoryBuffer *Object) {
 }
 
 // Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
-template<support::endianness target_endianness>
+template<endianness target_endianness, std::size_t max_alignment>
 struct ELFDataTypeTypedefHelperCommon {
   typedef support::detail::packed_endian_specific_integral
-    <uint16_t, target_endianness, support::aligned> Elf_Half;
+    <uint16_t, target_endianness,
+     MaximumAlignment<uint16_t, max_alignment>::value> Elf_Half;
   typedef support::detail::packed_endian_specific_integral
-    <uint32_t, target_endianness, support::aligned> Elf_Word;
+    <uint32_t, target_endianness,
+     MaximumAlignment<uint32_t, max_alignment>::value> Elf_Word;
   typedef support::detail::packed_endian_specific_integral
-    <int32_t, target_endianness, support::aligned> Elf_Sword;
+    <int32_t, target_endianness,
+     MaximumAlignment<int32_t, max_alignment>::value> Elf_Sword;
   typedef support::detail::packed_endian_specific_integral
-    <uint64_t, target_endianness, support::aligned> Elf_Xword;
+    <uint64_t, target_endianness,
+     MaximumAlignment<uint64_t, max_alignment>::value> Elf_Xword;
   typedef support::detail::packed_endian_specific_integral
-    <int64_t, target_endianness, support::aligned> Elf_Sxword;
+    <int64_t, target_endianness,
+     MaximumAlignment<int64_t, max_alignment>::value> Elf_Sxword;
 };
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct ELFDataTypeTypedefHelper;
 
 /// ELF 32bit types.
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelper<target_endianness, false>
-  : ELFDataTypeTypedefHelperCommon<target_endianness> {
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, false> >
+  : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
   typedef uint32_t value_type;
   typedef support::detail::packed_endian_specific_integral
-    <value_type, target_endianness, support::aligned> Elf_Addr;
+    <value_type, TargetEndianness,
+     MaximumAlignment<value_type, MaxAlign>::value> Elf_Addr;
   typedef support::detail::packed_endian_specific_integral
-    <value_type, target_endianness, support::aligned> Elf_Off;
+    <value_type, TargetEndianness,
+     MaximumAlignment<value_type, MaxAlign>::value> Elf_Off;
 };
 
 /// ELF 64bit types.
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelper<target_endianness, true>
-  : ELFDataTypeTypedefHelperCommon<target_endianness>{
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, true> >
+  : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
   typedef uint64_t value_type;
   typedef support::detail::packed_endian_specific_integral
-    <value_type, target_endianness, support::aligned> Elf_Addr;
+    <value_type, TargetEndianness,
+     MaximumAlignment<value_type, MaxAlign>::value> Elf_Addr;
   typedef support::detail::packed_endian_specific_integral
-    <value_type, target_endianness, support::aligned> Elf_Off;
+    <value_type, TargetEndianness,
+     MaximumAlignment<value_type, MaxAlign>::value> Elf_Off;
 };
 
 // I really don't like doing this, but the alternative is copypasta.
-#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+#define LLVM_ELF_IMPORT_TYPES(ELFT) \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Addr Elf_Addr; \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Off Elf_Off; \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Half Elf_Half; \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Word Elf_Word; \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Sword Elf_Sword; \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Xword Elf_Xword; \
+typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Sxword Elf_Sxword;
+
+// This is required to get template types into a macro :(
+#define LLVM_ELF_COMMA ,
 
   // Section header.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Shdr_Base;
 
-template<support::endianness target_endianness>
-struct Elf_Shdr_Base<target_endianness, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
   Elf_Word sh_name;     // Section name (index into string table)
   Elf_Word sh_type;     // Section type (SHT_*)
   Elf_Word sh_flags;    // Section flags (SHF_*)
@@ -118,9 +142,11 @@ struct Elf_Shdr_Base<target_endianness, false> {
   Elf_Word sh_entsize;  // Size of records contained within the section
 };
 
-template<support::endianness target_endianness>
-struct Elf_Shdr_Base<target_endianness, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
   Elf_Word  sh_name;     // Section name (index into string table)
   Elf_Word  sh_type;     // Section type (SHT_*)
   Elf_Xword sh_flags;    // Section flags (SHF_*)
@@ -133,10 +159,10 @@ struct Elf_Shdr_Base<target_endianness, true> {
   Elf_Xword sh_entsize;  // Size of records contained within the section
 };
 
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
-  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
-  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+template<class ELFT>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<ELFT> {
+  using Elf_Shdr_Base<ELFT>::sh_entsize;
+  using Elf_Shdr_Base<ELFT>::sh_size;
 
   /// @brief Get the number of entities this section contains if it has any.
   unsigned getEntityCount() const {
@@ -146,12 +172,14 @@ struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
   }
 };
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Sym_Base;
 
-template<support::endianness target_endianness>
-struct Elf_Sym_Base<target_endianness, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
   Elf_Word      st_name;  // Symbol name (index into string table)
   Elf_Addr      st_value; // Value or address associated with the symbol
   Elf_Word      st_size;  // Size of the symbol
@@ -160,9 +188,11 @@ struct Elf_Sym_Base<target_endianness, false> {
   Elf_Half      st_shndx; // Which section (header table index) it's defined in
 };
 
-template<support::endianness target_endianness>
-struct Elf_Sym_Base<target_endianness, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
   Elf_Word      st_name;  // Symbol name (index into string table)
   unsigned char st_info;  // Symbol's type and binding attributes
   unsigned char st_other; // Must be zero; reserved
@@ -171,9 +201,9 @@ struct Elf_Sym_Base<target_endianness, true> {
   Elf_Xword     st_size;  // Size of the symbol
 };
 
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
-  using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+template<class ELFT>
+struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
+  using Elf_Sym_Base<ELFT>::st_info;
 
   // These accessors and mutators correspond to the ELF32_ST_BIND,
   // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
@@ -188,21 +218,21 @@ struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
 
 /// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section
 /// (.gnu.version). This structure is identical for ELF32 and ELF64.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Versym_Impl {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  LLVM_ELF_IMPORT_TYPES(ELFT)
   Elf_Half vs_index;   // Version index with flags (e.g. VERSYM_HIDDEN)
 };
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Verdaux_Impl;
 
 /// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section
 /// (.gnu.version_d). This structure is identical for ELF32 and ELF64.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Verdef_Impl {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
-  typedef Elf_Verdaux_Impl<target_endianness, is64Bits> Elf_Verdaux;
+  LLVM_ELF_IMPORT_TYPES(ELFT)
+  typedef Elf_Verdaux_Impl<ELFT> Elf_Verdaux;
   Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT)
   Elf_Half vd_flags;   // Bitwise flags (VER_DEF_*)
   Elf_Half vd_ndx;     // Version index, used in .gnu.version entries
@@ -219,18 +249,18 @@ struct Elf_Verdef_Impl {
 
 /// Elf_Verdaux: This is the structure of auxiliary data in the SHT_GNU_verdef
 /// section (.gnu.version_d). This structure is identical for ELF32 and ELF64.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Verdaux_Impl {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  LLVM_ELF_IMPORT_TYPES(ELFT)
   Elf_Word vda_name; // Version name (offset in string table)
   Elf_Word vda_next; // Offset to next Verdaux entry (in bytes)
 };
 
 /// Elf_Verneed: This is the structure of entries in the SHT_GNU_verneed
 /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Verneed_Impl {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  LLVM_ELF_IMPORT_TYPES(ELFT)
   Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT)
   Elf_Half vn_cnt;     // Number of associated Vernaux entries
   Elf_Word vn_file;    // Library name (string table offset)
@@ -240,9 +270,9 @@ struct Elf_Verneed_Impl {
 
 /// Elf_Vernaux: This is the structure of auxiliary data in SHT_GNU_verneed
 /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Vernaux_Impl {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  LLVM_ELF_IMPORT_TYPES(ELFT)
   Elf_Word vna_hash;  // Hash of dependency name
   Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*)
   Elf_Half vna_other; // Version index, used in .gnu.version entries
@@ -252,12 +282,14 @@ struct Elf_Vernaux_Impl {
 
 /// Elf_Dyn_Base: This structure matches the form of entries in the dynamic
 ///               table section (.dynamic) look like.
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Dyn_Base;
 
-template<support::endianness target_endianness>
-struct Elf_Dyn_Base<target_endianness, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
   Elf_Sword d_tag;
   union {
     Elf_Word d_val;
@@ -265,9 +297,11 @@ struct Elf_Dyn_Base<target_endianness, false> {
   } d_un;
 };
 
-template<support::endianness target_endianness>
-struct Elf_Dyn_Base<target_endianness, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
   Elf_Sxword d_tag;
   union {
     Elf_Xword d_val;
@@ -276,120 +310,154 @@ struct Elf_Dyn_Base<target_endianness, true> {
 };
 
 /// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters.
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Dyn_Impl : Elf_Dyn_Base<target_endianness, is64Bits> {
-  using Elf_Dyn_Base<target_endianness, is64Bits>::d_tag;
-  using Elf_Dyn_Base<target_endianness, is64Bits>::d_un;
+template<class ELFT>
+struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
+  using Elf_Dyn_Base<ELFT>::d_tag;
+  using Elf_Dyn_Base<ELFT>::d_un;
   int64_t getTag() const { return d_tag; }
   uint64_t getVal() const { return d_un.d_val; }
   uint64_t getPtr() const { return d_un.ptr; }
 };
 
-template<support::endianness target_endianness, bool is64Bits>
-class ELFObjectFile;
-
-// DynRefImpl: Reference to an entry in the dynamic table
-// This is an ELF-specific interface.
-template<support::endianness target_endianness, bool is64Bits>
-class DynRefImpl {
-  typedef Elf_Dyn_Impl<target_endianness, is64Bits> Elf_Dyn;
-  typedef ELFObjectFile<target_endianness, is64Bits> OwningType;
-
-  DataRefImpl DynPimpl;
-  const OwningType *OwningObject;
-
-public:
-  DynRefImpl() : OwningObject(NULL) { }
-
-  DynRefImpl(DataRefImpl DynP, const OwningType *Owner);
-
-  bool operator==(const DynRefImpl &Other) const;
-  bool operator <(const DynRefImpl &Other) const;
-
-  error_code getNext(DynRefImpl &Result) const;
-  int64_t getTag() const;
-  uint64_t getVal() const;
-  uint64_t getPtr() const;
-
-  DataRefImpl getRawDataRefImpl() const;
-};
-
 // Elf_Rel: Elf Relocation
-template<support::endianness target_endianness, bool is64Bits, bool isRela>
+template<class ELFT, bool isRela>
 struct Elf_Rel_Base;
 
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, false, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, false> {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word      r_info;  // Symbol table index and type of relocation to apply
+
+  uint32_t getRInfo(bool isMips64EL) const {
+    assert(!isMips64EL);
+    return r_info;
+  }
+  void setRInfo(uint32_t R) {
+    r_info = R;
+  }
 };
 
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, true, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, false> {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
+
+  uint64_t getRInfo(bool isMips64EL) const {
+    uint64_t t = r_info;
+    if (!isMips64EL)
+      return t;
+    // Mip64 little endian has a "special" encoding of r_info. Instead of one
+    // 64 bit little endian number, it is a little ending 32 bit number followed
+    // by a 32 bit big endian number.
+    return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) |
+      ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff);
+    return r_info;
+  }
+  void setRInfo(uint64_t R) {
+    // FIXME: Add mips64el support.
+    r_info = R;
+  }
 };
 
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, false, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, true> {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word      r_info;   // Symbol table index and type of relocation to apply
   Elf_Sword     r_addend; // Compute value for relocatable field by adding this
+
+  uint32_t getRInfo(bool isMips64EL) const {
+    assert(!isMips64EL);
+    return r_info;
+  }
+  void setRInfo(uint32_t R) {
+    r_info = R;
+  }
 };
 
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, true, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, true> {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
   Elf_Sxword    r_addend; // Compute value for relocatable field by adding this.
+
+  uint64_t getRInfo(bool isMips64EL) const {
+    // Mip64 little endian has a "special" encoding of r_info. Instead of one
+    // 64 bit little endian number, it is a little ending 32 bit number followed
+    // by a 32 bit big endian number.
+    uint64_t t = r_info;
+    if (!isMips64EL)
+      return t;
+    return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) |
+      ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff);
+  }
+  void setRInfo(uint64_t R) {
+    // FIXME: Add mips64el support.
+    r_info = R;
+  }
 };
 
-template<support::endianness target_endianness, bool is64Bits, bool isRela>
+template<class ELFT, bool isRela>
 struct Elf_Rel_Impl;
 
-template<support::endianness target_endianness, bool isRela>
-struct Elf_Rel_Impl<target_endianness, true, isRela>
-       : Elf_Rel_Base<target_endianness, true, isRela> {
-  using Elf_Rel_Base<target_endianness, true, isRela>::r_info;
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
+struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, true>, isRela>
+       : Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, isRela> {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
-  uint64_t getSymbol() const { return (r_info >> 32); }
-  unsigned char getType() const {
-    return (unsigned char) (r_info & 0xffffffffL);
+  uint32_t getSymbol(bool isMips64EL) const {
+    return (uint32_t) (this->getRInfo(isMips64EL) >> 32);
   }
-  void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); }
-  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
-  void setSymbolAndType(uint64_t s, unsigned char t) {
-    r_info = (s << 32) + (t&0xffffffffL);
+  uint32_t getType(bool isMips64EL) const {
+    return (uint32_t) (this->getRInfo(isMips64EL) & 0xffffffffL);
+  }
+  void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
+  void setType(uint32_t t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(uint32_t s, uint32_t t) {
+    this->setRInfo(((uint64_t)s << 32) + (t&0xffffffffL));
   }
 };
 
-template<support::endianness target_endianness, bool isRela>
-struct Elf_Rel_Impl<target_endianness, false, isRela>
-       : Elf_Rel_Base<target_endianness, false, isRela> {
-  using Elf_Rel_Base<target_endianness, false, isRela>::r_info;
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
+struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, false>, isRela>
+       : Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, isRela> {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
 
   // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
   // and ELF32_R_INFO macros defined in the ELF specification:
-  uint32_t getSymbol() const { return (r_info >> 8); }
-  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  uint32_t getSymbol(bool isMips64EL) const {
+    return this->getRInfo(isMips64EL) >> 8;
+  }
+  unsigned char getType(bool isMips64EL) const {
+    return (unsigned char) (this->getRInfo(isMips64EL) & 0x0ff);
+  }
   void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
   void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(uint32_t s, unsigned char t) {
-    r_info = (s << 8) + t;
+    this->setRInfo((s << 8) + t);
   }
 };
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 struct Elf_Ehdr_Impl {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  LLVM_ELF_IMPORT_TYPES(ELFT)
   unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
   Elf_Half e_type;     // Type of file (see ET_*)
   Elf_Half e_machine;  // Required architecture for this file (see EM_*)
@@ -412,15 +480,17 @@ struct Elf_Ehdr_Impl {
    unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
 };
 
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Phdr;
+template<class ELFT>
+struct Elf_Phdr_Impl;
 
-template<support::endianness target_endianness>
-struct Elf_Phdr<target_endianness, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA false>)
   Elf_Word p_type;   // Type of segment
   Elf_Off  p_offset; // FileOffset where segment is located, in bytes
-  Elf_Addr p_vaddr;  // Virtual Address of beginning of segment 
+  Elf_Addr p_vaddr;  // Virtual Address of beginning of segment
   Elf_Addr p_paddr;  // Physical address of beginning of segment (OS-specific)
   Elf_Word p_filesz; // Num. of bytes in file image of segment (may be zero)
   Elf_Word p_memsz;  // Num. of bytes in mem image of segment (may be zero)
@@ -428,121 +498,140 @@ struct Elf_Phdr<target_endianness, false> {
   Elf_Word p_align;  // Segment alignment constraint
 };
 
-template<support::endianness target_endianness>
-struct Elf_Phdr<target_endianness, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+template<template<endianness, std::size_t, bool> class ELFT,
+         endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
+                             MaxAlign LLVM_ELF_COMMA true>)
   Elf_Word p_type;   // Type of segment
   Elf_Word p_flags;  // Segment flags
   Elf_Off  p_offset; // FileOffset where segment is located, in bytes
-  Elf_Addr p_vaddr;  // Virtual Address of beginning of segment 
+  Elf_Addr p_vaddr;  // Virtual Address of beginning of segment
   Elf_Addr p_paddr;  // Physical address of beginning of segment (OS-specific)
-  Elf_Word p_filesz; // Num. of bytes in file image of segment (may be zero)
-  Elf_Word p_memsz;  // Num. of bytes in mem image of segment (may be zero)
-  Elf_Word p_align;  // Segment alignment constraint
+  Elf_Xword p_filesz; // Num. of bytes in file image of segment (may be zero)
+  Elf_Xword p_memsz;  // Num. of bytes in mem image of segment (may be zero)
+  Elf_Xword p_align;  // Segment alignment constraint
 };
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 class ELFObjectFile : public ObjectFile {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
-
-  typedef Elf_Ehdr_Impl<target_endianness, is64Bits> Elf_Ehdr;
-  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
-  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
-  typedef Elf_Dyn_Impl<target_endianness, is64Bits> Elf_Dyn;
-  typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
-  typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
-  typedef Elf_Verdef_Impl<target_endianness, is64Bits> Elf_Verdef;
-  typedef Elf_Verdaux_Impl<target_endianness, is64Bits> Elf_Verdaux;
-  typedef Elf_Verneed_Impl<target_endianness, is64Bits> Elf_Verneed;
-  typedef Elf_Vernaux_Impl<target_endianness, is64Bits> Elf_Vernaux;
-  typedef Elf_Versym_Impl<target_endianness, is64Bits> Elf_Versym;
-  typedef DynRefImpl<target_endianness, is64Bits> DynRef;
-  typedef content_iterator<DynRef> dyn_iterator;
-
-protected:
-  // This flag is used for classof, to distinguish ELFObjectFile from
-  // its subclass. If more subclasses will be created, this flag will
-  // have to become an enum.
-  bool isDyldELFObject;
-
-private:
-  typedef SmallVector<const Elf_Shdr*, 1> Sections_t;
-  typedef DenseMap<unsigned, unsigned> IndexMap_t;
-  typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t;
-
-  const Elf_Ehdr *Header;
-  const Elf_Shdr *SectionHeaderTable;
-  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
-  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
-  const Elf_Shdr *dot_dynstr_sec;   // Dynamic symbol string table.
-
-  // SymbolTableSections[0] always points to the dynamic string table section
-  // header, or NULL if there is no dynamic string table.
-  Sections_t SymbolTableSections;
-  IndexMap_t SymbolTableSectionsIndexMap;
-  DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
-
-  const Elf_Shdr *dot_dynamic_sec;       // .dynamic
-  const Elf_Shdr *dot_gnu_version_sec;   // .gnu.version
-  const Elf_Shdr *dot_gnu_version_r_sec; // .gnu.version_r
-  const Elf_Shdr *dot_gnu_version_d_sec; // .gnu.version_d
-
-  // Pointer to SONAME entry in dynamic string table
-  // This is set the first time getLoadName is called.
-  mutable const char *dt_soname;
+  LLVM_ELF_IMPORT_TYPES(ELFT)
 
 public:
-  /// \brief Iterate over relocations in a .rel or .rela section.
-  template<class RelocT>
-  class ELFRelocationIterator {
+  /// \brief Iterate over constant sized entities.
+  template<class EntT>
+  class ELFEntityIterator {
   public:
-    typedef void difference_type;
-    typedef const RelocT value_type;
-    typedef std::forward_iterator_tag iterator_category;
+    typedef ptrdiff_t difference_type;
+    typedef EntT value_type;
+    typedef std::random_access_iterator_tag iterator_category;
     typedef value_type &reference;
     typedef value_type *pointer;
 
     /// \brief Default construct iterator.
-    ELFRelocationIterator() : Section(0), Current(0) {}
-    ELFRelocationIterator(const Elf_Shdr *Sec, const char *Start)
-      : Section(Sec)
+    ELFEntityIterator() : EntitySize(0), Current(0) {}
+    ELFEntityIterator(uint64_t EntSize, const char *Start)
+      : EntitySize(EntSize)
       , Current(Start) {}
 
     reference operator *() {
       assert(Current && "Attempted to dereference an invalid iterator!");
-      return *reinterpret_cast<const RelocT*>(Current);
+      return *reinterpret_cast<pointer>(Current);
     }
 
     pointer operator ->() {
       assert(Current && "Attempted to dereference an invalid iterator!");
-      return reinterpret_cast<const RelocT*>(Current);
+      return reinterpret_cast<pointer>(Current);
     }
 
-    bool operator ==(const ELFRelocationIterator &Other) {
-      return Section == Other.Section && Current == Other.Current;
+    bool operator ==(const ELFEntityIterator &Other) {
+      return Current == Other.Current;
     }
 
-    bool operator !=(const ELFRelocationIterator &Other) {
+    bool operator !=(const ELFEntityIterator &Other) {
       return !(*this == Other);
     }
 
-    ELFRelocationIterator &operator ++(int) {
+    ELFEntityIterator &operator ++() {
       assert(Current && "Attempted to increment an invalid iterator!");
-      Current += Section->sh_entsize;
+      Current += EntitySize;
       return *this;
     }
 
-    ELFRelocationIterator operator ++() {
-      ELFRelocationIterator Tmp = *this;
+    ELFEntityIterator operator ++(int) {
+      ELFEntityIterator Tmp = *this;
       ++*this;
       return Tmp;
     }
 
+    ELFEntityIterator &operator =(const ELFEntityIterator &Other) {
+      EntitySize = Other.EntitySize;
+      Current = Other.Current;
+      return *this;
+    }
+
+    difference_type operator -(const ELFEntityIterator &Other) const {
+      assert(EntitySize == Other.EntitySize &&
+             "Subtracting iterators of different EntitiySize!");
+      return (Current - Other.Current) / EntitySize;
+    }
+
+    const char *get() const { return Current; }
+
   private:
-    const Elf_Shdr *Section;
+    uint64_t EntitySize;
     const char *Current;
   };
 
+  typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
+  typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
+  typedef Elf_Sym_Impl<ELFT> Elf_Sym;
+  typedef Elf_Dyn_Impl<ELFT> Elf_Dyn;
+  typedef Elf_Phdr_Impl<ELFT> Elf_Phdr;
+  typedef Elf_Rel_Impl<ELFT, false> Elf_Rel;
+  typedef Elf_Rel_Impl<ELFT, true> Elf_Rela;
+  typedef Elf_Verdef_Impl<ELFT> Elf_Verdef;
+  typedef Elf_Verdaux_Impl<ELFT> Elf_Verdaux;
+  typedef Elf_Verneed_Impl<ELFT> Elf_Verneed;
+  typedef Elf_Vernaux_Impl<ELFT> Elf_Vernaux;
+  typedef Elf_Versym_Impl<ELFT> Elf_Versym;
+  typedef ELFEntityIterator<const Elf_Dyn> Elf_Dyn_iterator;
+  typedef ELFEntityIterator<const Elf_Sym> Elf_Sym_iterator;
+  typedef ELFEntityIterator<const Elf_Rela> Elf_Rela_Iter;
+  typedef ELFEntityIterator<const Elf_Rel> Elf_Rel_Iter;
+
+protected:
+  // This flag is used for classof, to distinguish ELFObjectFile from
+  // its subclass. If more subclasses will be created, this flag will
+  // have to become an enum.
+  bool isDyldELFObject;
+
+private:
+  typedef SmallVector<const Elf_Shdr *, 2> Sections_t;
+  typedef DenseMap<unsigned, unsigned> IndexMap_t;
+  typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t;
+
+  const Elf_Ehdr *Header;
+  const Elf_Shdr *SectionHeaderTable;
+  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
+  const Elf_Shdr *dot_dynstr_sec;   // Dynamic symbol string table.
+
+  // SymbolTableSections[0] always points to the dynamic string table section
+  // header, or NULL if there is no dynamic string table.
+  Sections_t SymbolTableSections;
+  IndexMap_t SymbolTableSectionsIndexMap;
+  DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
+
+  const Elf_Shdr *dot_dynamic_sec;       // .dynamic
+  const Elf_Shdr *dot_gnu_version_sec;   // .gnu.version
+  const Elf_Shdr *dot_gnu_version_r_sec; // .gnu.version_r
+  const Elf_Shdr *dot_gnu_version_d_sec; // .gnu.version_d
+
+  // Pointer to SONAME entry in dynamic string table
+  // This is set the first time getLoadName is called.
+  mutable const char *dt_soname;
+
 private:
   // Records for each version index the corresponding Verdef or Vernaux entry.
   // This is filled the first time LoadVersionMap() is called.
@@ -579,6 +668,7 @@ private:
     return getSection(Rel.w.b);
   }
 
+public:
   bool            isRelocationHasAddend(DataRefImpl Rel) const;
   template<typename T>
   const T        *getEntry(uint16_t Section, uint32_t Entry) const;
@@ -609,6 +699,7 @@ public:
   const Elf_Dyn  *getDyn(DataRefImpl DynData) const;
   error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
                               bool &IsDefault) const;
+  uint64_t getSymbolIndex(const Elf_Sym *sym) const;
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
@@ -622,9 +713,6 @@ protected:
                                       section_iterator &Res) const;
   virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const;
 
-  friend class DynRefImpl<target_endianness, is64Bits>;
-  virtual error_code getDynNext(DataRefImpl DynData, DynRef &Result) const;
-
   virtual error_code getLibraryNext(DataRefImpl Data, LibraryRef &Result) const;
   virtual error_code getLibraryPath(DataRefImpl Data, StringRef &Res) const;
 
@@ -666,6 +754,13 @@ protected:
 
 public:
   ELFObjectFile(MemoryBuffer *Object, error_code &ec);
+
+  bool isMips64EL() const {
+    return Header->e_machine == ELF::EM_MIPS &&
+      Header->getFileClass() == ELF::ELFCLASS64 &&
+      Header->getDataEncoding() == ELF::ELFDATA2LSB;
+  }
+
   virtual symbol_iterator begin_symbols() const;
   virtual symbol_iterator end_symbols() const;
 
@@ -678,30 +773,70 @@ public:
   virtual library_iterator begin_libraries_needed() const;
   virtual library_iterator end_libraries_needed() const;
 
-  virtual dyn_iterator begin_dynamic_table() const;
-  virtual dyn_iterator end_dynamic_table() const;
+  const Elf_Shdr *getDynamicSymbolTableSectionHeader() const {
+    return SymbolTableSections[0];
+  }
+
+  const Elf_Shdr *getDynamicStringTableSectionHeader() const {
+    return dot_dynstr_sec;
+  }
+
+  Elf_Dyn_iterator begin_dynamic_table() const;
+  /// \param NULLEnd use one past the first DT_NULL entry as the end instead of
+  /// the section size.
+  Elf_Dyn_iterator end_dynamic_table(bool NULLEnd = false) const;
+
+  Elf_Sym_iterator begin_elf_dynamic_symbols() const {
+    const Elf_Shdr *DynSymtab = SymbolTableSections[0];
+    if (DynSymtab)
+      return Elf_Sym_iterator(DynSymtab->sh_entsize,
+                              (const char *)base() + DynSymtab->sh_offset);
+    return Elf_Sym_iterator(0, 0);
+  }
 
-  typedef ELFRelocationIterator<Elf_Rela> Elf_Rela_Iter;
-  typedef ELFRelocationIterator<Elf_Rel> Elf_Rel_Iter;
+  Elf_Sym_iterator end_elf_dynamic_symbols() const {
+    const Elf_Shdr *DynSymtab = SymbolTableSections[0];
+    if (DynSymtab)
+      return Elf_Sym_iterator(DynSymtab->sh_entsize, (const char *)base() +
+                              DynSymtab->sh_offset + DynSymtab->sh_size);
+    return Elf_Sym_iterator(0, 0);
+  }
 
-  virtual Elf_Rela_Iter beginELFRela(const Elf_Shdr *sec) const {
-    return Elf_Rela_Iter(sec, (const char *)(base() + sec->sh_offset));
+  Elf_Rela_Iter beginELFRela(const Elf_Shdr *sec) const {
+    return Elf_Rela_Iter(sec->sh_entsize,
+                         (const char *)(base() + sec->sh_offset));
   }
 
-  virtual Elf_Rela_Iter endELFRela(const Elf_Shdr *sec) const {
-    return Elf_Rela_Iter(sec, (const char *)
+  Elf_Rela_Iter endELFRela(const Elf_Shdr *sec) const {
+    return Elf_Rela_Iter(sec->sh_entsize, (const char *)
                          (base() + sec->sh_offset + sec->sh_size));
   }
 
-  virtual Elf_Rel_Iter beginELFRel(const Elf_Shdr *sec) const {
-    return Elf_Rel_Iter(sec, (const char *)(base() + sec->sh_offset));
+  Elf_Rel_Iter beginELFRel(const Elf_Shdr *sec) const {
+    return Elf_Rel_Iter(sec->sh_entsize,
+                        (const char *)(base() + sec->sh_offset));
   }
 
-  virtual Elf_Rel_Iter endELFRel(const Elf_Shdr *sec) const {
-    return Elf_Rel_Iter(sec, (const char *)
+  Elf_Rel_Iter endELFRel(const Elf_Shdr *sec) const {
+    return Elf_Rel_Iter(sec->sh_entsize, (const char *)
                         (base() + sec->sh_offset + sec->sh_size));
   }
 
+  /// \brief Iterate over program header table.
+  typedef ELFEntityIterator<const Elf_Phdr> Elf_Phdr_Iter;
+
+  Elf_Phdr_Iter begin_program_headers() const {
+    return Elf_Phdr_Iter(Header->e_phentsize,
+                         (const char*)base() + Header->e_phoff);
+  }
+
+  Elf_Phdr_Iter end_program_headers() const {
+    return Elf_Phdr_Iter(Header->e_phentsize,
+                         (const char*)base() +
+                           Header->e_phoff +
+                           (Header->e_phnum * Header->e_phentsize));
+  }
+
   virtual uint8_t getBytesInAddress() const;
   virtual StringRef getFileFormatName() const;
   virtual StringRef getObjectType() const { return "ELF"; }
@@ -713,6 +848,7 @@ public:
   uint64_t getNumSections() const;
   uint64_t getStringTableIndex() const;
   ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
+  const Elf_Ehdr *getElfHeader() const;
   const Elf_Shdr *getSection(const Elf_Sym *symb) const;
   const Elf_Shdr *getElfSection(section_iterator &It) const;
   const Elf_Sym *getElfSymbol(symbol_iterator &It) const;
@@ -721,16 +857,15 @@ public:
   // Methods for type inquiry through isa, cast, and dyn_cast
   bool isDyldType() const { return isDyldELFObject; }
   static inline bool classof(const Binary *v) {
-    return v->getType() == getELFType(target_endianness == support::little,
-                                      is64Bits);
+    return v->getType() == getELFType(ELFT::TargetEndianness == support::little,
+                                      ELFT::Is64Bits);
   }
 };
 
 // Iterate through the version definitions, and place each Elf_Verdef
 // in the VersionMap according to its index.
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>::
-                  LoadVersionDefs(const Elf_Shdr *sec) const {
+template<class ELFT>
+void ELFObjectFile<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
   unsigned vd_size = sec->sh_size; // Size of section in bytes
   unsigned vd_count = sec->sh_info; // Number of Verdef entries
   const char *sec_start = (const char*)base() + sec->sh_offset;
@@ -754,9 +889,8 @@ void ELFObjectFile<target_endianness, is64Bits>::
 
 // Iterate through the versions needed section, and place each Elf_Vernaux
 // in the VersionMap according to its index.
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>::
-                  LoadVersionNeeds(const Elf_Shdr *sec) const {
+template<class ELFT>
+void ELFObjectFile<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
   unsigned vn_size = sec->sh_size; // Size of section in bytes
   unsigned vn_count = sec->sh_info; // Number of Verneed entries
   const char *sec_start = (const char*)base() + sec->sh_offset;
@@ -787,8 +921,8 @@ void ELFObjectFile<target_endianness, is64Bits>::
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>::LoadVersionMap() const {
+template<class ELFT>
+void ELFObjectFile<ELFT>::LoadVersionMap() const {
   // If there is no dynamic symtab or version table, there is nothing to do.
   if (SymbolTableSections[0] == NULL || dot_gnu_version_sec == NULL)
     return;
@@ -809,9 +943,9 @@ void ELFObjectFile<target_endianness, is64Bits>::LoadVersionMap() const {
     LoadVersionNeeds(dot_gnu_version_r_sec);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>
-                  ::validateSymbol(DataRefImpl Symb) const {
+template<class ELFT>
+void ELFObjectFile<ELFT>::validateSymbol(DataRefImpl Symb) const {
+#ifndef NDEBUG
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
   // FIXME: We really need to do proper error handling in the case of an invalid
@@ -826,12 +960,12 @@ void ELFObjectFile<target_endianness, is64Bits>
                    + SymbolTableSection->sh_size)))
     // FIXME: Proper error handling.
     report_fatal_error("Symb must point to a valid symbol!");
+#endif
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolNext(DataRefImpl Symb,
-                                        SymbolRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolNext(DataRefImpl Symb,
+                                              SymbolRef &Result) const {
   validateSymbol(Symb);
   const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
 
@@ -856,20 +990,18 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolName(DataRefImpl Symb,
-                                        StringRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Symb,
+                                              StringRef &Result) const {
   validateSymbol(Symb);
   const Elf_Sym *symb = getSymbol(Symb);
   return getSymbolName(SymbolTableSections[Symb.d.b], symb, Result);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolVersion(SymbolRef SymRef,
-                                           StringRef &Version,
-                                           bool &IsDefault) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolVersion(SymbolRef SymRef,
+                                                 StringRef &Version,
+                                                 bool &IsDefault) const {
   DataRefImpl Symb = SymRef.getRawDataRefImpl();
   validateSymbol(Symb);
   const Elf_Sym *symb = getSymbol(Symb);
@@ -877,18 +1009,17 @@ error_code ELFObjectFile<target_endianness, is64Bits>
                           Version, IsDefault);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits>
-                      ::getSymbolTableIndex(const Elf_Sym *symb) const {
+template<class ELFT>
+ELF::Elf64_Word ELFObjectFile<ELFT>
+                             ::getSymbolTableIndex(const Elf_Sym *symb) const {
   if (symb->st_shndx == ELF::SHN_XINDEX)
     return ExtendedSymbolTable.lookup(symb);
   return symb->st_shndx;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>
-                             ::getSection(const Elf_Sym *symb) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Shdr *
+ELFObjectFile<ELFT>::getSection(const Elf_Sym *symb) const {
   if (symb->st_shndx == ELF::SHN_XINDEX)
     return getSection(ExtendedSymbolTable.lookup(symb));
   if (symb->st_shndx >= ELF::SHN_LORESERVE)
@@ -896,35 +1027,37 @@ ELFObjectFile<target_endianness, is64Bits>
   return getSection(symb->st_shndx);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>
-                             ::getElfSection(section_iterator &It) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Ehdr *
+ELFObjectFile<ELFT>::getElfHeader() const {
+  return Header;
+}
+
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Shdr *
+ELFObjectFile<ELFT>::getElfSection(section_iterator &It) const {
   llvm::object::DataRefImpl ShdrRef = It->getRawDataRefImpl();
   return reinterpret_cast<const Elf_Shdr *>(ShdrRef.p);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
-ELFObjectFile<target_endianness, is64Bits>
-                             ::getElfSymbol(symbol_iterator &It) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Sym *
+ELFObjectFile<ELFT>::getElfSymbol(symbol_iterator &It) const {
   return getSymbol(It->getRawDataRefImpl());
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
-ELFObjectFile<target_endianness, is64Bits>
-                             ::getElfSymbol(uint32_t index) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Sym *
+ELFObjectFile<ELFT>::getElfSymbol(uint32_t index) const {
   DataRefImpl SymbolData;
   SymbolData.d.a = index;
   SymbolData.d.b = 1;
   return getSymbol(SymbolData);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolFileOffset(DataRefImpl Symb,
-                                          uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolFileOffset(DataRefImpl Symb,
+                                                    uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section;
@@ -942,7 +1075,7 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 
   switch (symb->getType()) {
   case ELF::STT_SECTION:
-    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+    Result = Section ? Section->sh_offset : UnknownAddressOrSize;
     return object_error::success;
   case ELF::STT_FUNC:
   case ELF::STT_OBJECT:
@@ -956,10 +1089,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolAddress(DataRefImpl Symb,
-                                           uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
+                                                 uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section;
@@ -991,6 +1123,11 @@ error_code ELFObjectFile<target_endianness, is64Bits>
       IsRelocatable = true;
     }
     Result = symb->st_value;
+
+    // Clear the ARM/Thumb indicator flag.
+    if (Header->e_machine == ELF::EM_ARM)
+      Result &= ~1;
+
     if (IsRelocatable && Section != 0)
       Result += Section->sh_addr;
     return object_error::success;
@@ -1000,10 +1137,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolSize(DataRefImpl Symb,
-                                        uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Symb,
+                                              uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   if (symb->st_size == 0)
@@ -1012,10 +1148,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolNMTypeChar(DataRefImpl Symb,
-                                              char &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolNMTypeChar(DataRefImpl Symb,
+                                                    char &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section = getSection(symb);
@@ -1077,10 +1212,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolType(DataRefImpl Symb,
-                                        SymbolRef::Type &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb,
+                                              SymbolRef::Type &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
 
@@ -1109,10 +1243,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolFlags(DataRefImpl Symb,
-                                         uint32_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Symb,
+                                               uint32_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
 
@@ -1144,10 +1277,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolSection(DataRefImpl Symb,
-                                           section_iterator &Res) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb,
+                                                 section_iterator &Res) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *sec = getSection(symb);
@@ -1161,19 +1293,18 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolValue(DataRefImpl Symb,
-                                         uint64_t &Val) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolValue(DataRefImpl Symb,
+                                               uint64_t &Val) const {
   validateSymbol(Symb);
   const Elf_Sym *symb = getSymbol(Symb);
   Val = symb->st_value;
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionNext(DataRefImpl Sec,
+                                               SectionRef &Result) const {
   const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
   sec += Header->e_shentsize;
   Sec.p = reinterpret_cast<intptr_t>(sec);
@@ -1181,65 +1312,58 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionName(DataRefImpl Sec,
-                                         StringRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
+                                               StringRef &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionAddress(DataRefImpl Sec,
-                                            uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionAddress(DataRefImpl Sec,
+                                                  uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   Result = sec->sh_addr;
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionSize(DataRefImpl Sec,
-                                         uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionSize(DataRefImpl Sec,
+                                               uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   Result = sec->sh_size;
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionContents(DataRefImpl Sec,
-                                             StringRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
+                                                   StringRef &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   const char *start = (const char*)base() + sec->sh_offset;
   Result = StringRef(start, sec->sh_size);
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionContents(const Elf_Shdr *Sec,
-                                             StringRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionContents(const Elf_Shdr *Sec,
+                                                   StringRef &Result) const {
   const char *start = (const char*)base() + Sec->sh_offset;
   Result = StringRef(start, Sec->sh_size);
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionAlignment(DataRefImpl Sec,
-                                              uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionAlignment(DataRefImpl Sec,
+                                                    uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   Result = sec->sh_addralign;
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionText(DataRefImpl Sec,
-                                        bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionText(DataRefImpl Sec,
+                                              bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & ELF::SHF_EXECINSTR)
     Result = true;
@@ -1248,10 +1372,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionData(DataRefImpl Sec,
-                                        bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionData(DataRefImpl Sec,
+                                              bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
       && sec->sh_type == ELF::SHT_PROGBITS)
@@ -1261,10 +1384,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionBSS(DataRefImpl Sec,
-                                       bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionBSS(DataRefImpl Sec,
+                                             bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
       && sec->sh_type == ELF::SHT_NOBITS)
@@ -1274,10 +1396,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionRequiredForExecution(DataRefImpl Sec,
-                                                        bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionRequiredForExecution(
+    DataRefImpl Sec, bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & ELF::SHF_ALLOC)
     Result = true;
@@ -1286,10 +1407,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionVirtual(DataRefImpl Sec,
-                                           bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionVirtual(DataRefImpl Sec,
+                                                 bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_type == ELF::SHT_NOBITS)
     Result = true;
@@ -1298,24 +1418,19 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionZeroInit(DataRefImpl Sec,
-                                            bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionZeroInit(DataRefImpl Sec,
+                                                  bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   // For ELF, all zero-init sections are virtual (that is, they occupy no space
   //   in the object image) and vice versa.
-  if (sec->sh_flags & ELF::SHT_NOBITS)
-    Result = true;
-  else
-    Result = false;
+  Result = sec->sh_type == ELF::SHT_NOBITS;
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                       ::isSectionReadOnlyData(DataRefImpl Sec,
-                                               bool &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::isSectionReadOnlyData(DataRefImpl Sec,
+                                                      bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & ELF::SHF_WRITE || sec->sh_flags & ELF::SHF_EXECINSTR)
     Result = false;
@@ -1324,19 +1439,26 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                          ::sectionContainsSymbol(DataRefImpl Sec,
-                                                  DataRefImpl Symb,
-                                                  bool &Result) const {
-  // FIXME: Unimplemented.
-  Result = false;
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::sectionContainsSymbol(DataRefImpl Sec,
+                                                      DataRefImpl Symb,
+                                                      bool &Result) const {
+  validateSymbol(Symb);
+
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  unsigned shndx = symb->st_shndx;
+  bool Reserved = shndx >= ELF::SHN_LORESERVE
+               && shndx <= ELF::SHN_HIRESERVE;
+
+  Result = !Reserved && (sec == getSection(symb->st_shndx));
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-relocation_iterator ELFObjectFile<target_endianness, is64Bits>
-                                 ::getSectionRelBegin(DataRefImpl Sec) const {
+template<class ELFT>
+relocation_iterator
+ELFObjectFile<ELFT>::getSectionRelBegin(DataRefImpl Sec) const {
   DataRefImpl RelData;
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
@@ -1348,9 +1470,9 @@ relocation_iterator ELFObjectFile<target_endianness, is64Bits>
   return relocation_iterator(RelocationRef(RelData, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-relocation_iterator ELFObjectFile<target_endianness, is64Bits>
-                                 ::getSectionRelEnd(DataRefImpl Sec) const {
+template<class ELFT>
+relocation_iterator
+ELFObjectFile<ELFT>::getSectionRelEnd(DataRefImpl Sec) const {
   DataRefImpl RelData;
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
@@ -1366,10 +1488,9 @@ relocation_iterator ELFObjectFile<target_endianness, is64Bits>
 }
 
 // Relocations
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationNext(DataRefImpl Rel,
-                                            RelocationRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationNext(DataRefImpl Rel,
+                                                  RelocationRef &Result) const {
   ++Rel.w.c;
   const Elf_Shdr *relocsec = getSection(Rel.w.b);
   if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) {
@@ -1395,21 +1516,20 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationSymbol(DataRefImpl Rel,
-                                              SymbolRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel,
+                                                    SymbolRef &Result) const {
   uint32_t symbolIdx;
   const Elf_Shdr *sec = getSection(Rel.w.b);
   switch (sec->sh_type) {
     default :
       report_fatal_error("Invalid section type in Rel!");
     case ELF::SHT_REL : {
-      symbolIdx = getRel(Rel)->getSymbol();
+      symbolIdx = getRel(Rel)->getSymbol(isMips64EL());
       break;
     }
     case ELF::SHT_RELA : {
-      symbolIdx = getRela(Rel)->getSymbol();
+      symbolIdx = getRela(Rel)->getSymbol(isMips64EL());
       break;
     }
   }
@@ -1423,10 +1543,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationAddress(DataRefImpl Rel,
-                                               uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationAddress(DataRefImpl Rel,
+                                                     uint64_t &Result) const {
   uint64_t offset;
   const Elf_Shdr *sec = getSection(Rel.w.b);
   switch (sec->sh_type) {
@@ -1446,10 +1565,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationOffset(DataRefImpl Rel,
-                                              uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationOffset(DataRefImpl Rel,
+                                                    uint64_t &Result) const {
   uint64_t offset;
   const Elf_Shdr *sec = getSection(Rel.w.b);
   switch (sec->sh_type) {
@@ -1469,20 +1587,19 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationType(DataRefImpl Rel,
-                                            uint64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel,
+                                                  uint64_t &Result) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
   switch (sec->sh_type) {
     default :
       report_fatal_error("Invalid section type in Rel!");
     case ELF::SHT_REL : {
-      Result = getRel(Rel)->getType();
+      Result = getRel(Rel)->getType(isMips64EL());
       break;
     }
     case ELF::SHT_RELA : {
-      Result = getRela(Rel)->getType();
+      Result = getRela(Rel)->getType(isMips64EL());
       break;
     }
   }
@@ -1492,22 +1609,21 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 #define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
   case ELF::enum: res = #enum; break;
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationTypeName(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
-  uint8_t type;
+  uint32_t type;
   StringRef res;
   switch (sec->sh_type) {
     default :
       return object_error::parse_failed;
     case ELF::SHT_REL : {
-      type = getRel(Rel)->getType();
+      type = getRel(Rel)->getType(isMips64EL());
       break;
     }
     case ELF::SHT_RELA : {
-      type = getRela(Rel)->getType();
+      type = getRela(Rel)->getType(isMips64EL());
       break;
     }
   }
@@ -1596,6 +1712,143 @@ error_code ELFObjectFile<target_endianness, is64Bits>
       res = "Unknown";
     }
     break;
+  case ELF::EM_MIPS:
+    switch (type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LITERAL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT5);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT6);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_DISP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_OFST);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SUB);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_A);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_B);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_DELETE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHER);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHEST);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SCN_DISP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_ADD_IMMEDIATE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PJUMP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_RELGOT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JALR);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_LDM);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GOTTPREL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT);
+    default:
+      res = "Unknown";
+    }
+    break;
+  case ELF::EM_AARCH64:
+    switch (type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
+
+    default:
+      res = "Unknown";
+    }
+    break;
   case ELF::EM_ARM:
     switch (type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE);
@@ -1834,10 +2087,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 
 #undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationAdditionalInfo(DataRefImpl Rel,
-                                                      int64_t &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationAdditionalInfo(
+    DataRefImpl Rel, int64_t &Result) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
   switch (sec->sh_type) {
     default :
@@ -1853,10 +2105,9 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationValueString(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationValueString(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
   uint8_t type;
   StringRef res;
@@ -1866,14 +2117,14 @@ error_code ELFObjectFile<target_endianness, is64Bits>
     default:
       return object_error::parse_failed;
     case ELF::SHT_REL: {
-      type = getRel(Rel)->getType();
-      symbol_index = getRel(Rel)->getSymbol();
+      type = getRel(Rel)->getType(isMips64EL());
+      symbol_index = getRel(Rel)->getSymbol(isMips64EL());
       // TODO: Read implicit addend from section data.
       break;
     }
     case ELF::SHT_RELA: {
-      type = getRela(Rel)->getType();
-      symbol_index = getRela(Rel)->getSymbol();
+      type = getRela(Rel)->getType(isMips64EL());
+      symbol_index = getRela(Rel)->getSymbol(isMips64EL());
       addend = getRela(Rel)->r_addend;
       break;
     }
@@ -1911,6 +2162,7 @@ error_code ELFObjectFile<target_endianness, is64Bits>
       res = "Unknown";
     }
     break;
+  case ELF::EM_AARCH64:
   case ELF::EM_ARM:
   case ELF::EM_HEXAGON:
     res = symname;
@@ -1924,20 +2176,21 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 }
 
 // Verify that the last byte in the string table in a null.
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>
-                  ::VerifyStrTab(const Elf_Shdr *sh) const {
+template<class ELFT>
+void ELFObjectFile<ELFT>::VerifyStrTab(const Elf_Shdr *sh) const {
   const char *strtab = (const char*)base() + sh->sh_offset;
   if (strtab[sh->sh_size - 1] != 0)
     // FIXME: Proper error handling.
     report_fatal_error("String table must end with a null terminator!");
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
-                                                          , error_code &ec)
-  : ObjectFile(getELFType(target_endianness == support::little, is64Bits),
-               Object, ec)
+template<class ELFT>
+ELFObjectFile<ELFT>::ELFObjectFile(MemoryBuffer *Object, error_code &ec)
+  : ObjectFile(getELFType(
+      static_cast<endianness>(ELFT::TargetEndianness) == support::little,
+      ELFT::Is64Bits),
+      Object,
+      ec)
   , isDyldELFObject(false)
   , SectionHeaderTable(0)
   , dot_shstrtab_sec(0)
@@ -2094,9 +2347,22 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::begin_symbols() const {
+// Get the symbol table index in the symtab section given a symbol
+template<class ELFT>
+uint64_t ELFObjectFile<ELFT>::getSymbolIndex(const Elf_Sym *Sym) const {
+  assert(SymbolTableSections.size() == 1 && "Only one symbol table supported!");
+  const Elf_Shdr *SymTab = *SymbolTableSections.begin();
+  uintptr_t SymLoc = uintptr_t(Sym);
+  uintptr_t SymTabLoc = uintptr_t(base() + SymTab->sh_offset);
+  assert(SymLoc > SymTabLoc && "Symbol not in symbol table!");
+  uint64_t SymOffset = SymLoc - SymTabLoc;
+  assert(SymOffset % SymTab->sh_entsize == 0 &&
+         "Symbol not multiple of symbol size!");
+  return SymOffset / SymTab->sh_entsize;
+}
+
+template<class ELFT>
+symbol_iterator ELFObjectFile<ELFT>::begin_symbols() const {
   DataRefImpl SymbolData;
   if (SymbolTableSections.size() <= 1) {
     SymbolData.d.a = std::numeric_limits<uint32_t>::max();
@@ -2108,18 +2374,16 @@ symbol_iterator ELFObjectFile<target_endianness, is64Bits>
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::end_symbols() const {
+template<class ELFT>
+symbol_iterator ELFObjectFile<ELFT>::end_symbols() const {
   DataRefImpl SymbolData;
   SymbolData.d.a = std::numeric_limits<uint32_t>::max();
   SymbolData.d.b = std::numeric_limits<uint32_t>::max();
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::begin_dynamic_symbols() const {
+template<class ELFT>
+symbol_iterator ELFObjectFile<ELFT>::begin_dynamic_symbols() const {
   DataRefImpl SymbolData;
   if (SymbolTableSections[0] == NULL) {
     SymbolData.d.a = std::numeric_limits<uint32_t>::max();
@@ -2131,26 +2395,23 @@ symbol_iterator ELFObjectFile<target_endianness, is64Bits>
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::end_dynamic_symbols() const {
+template<class ELFT>
+symbol_iterator ELFObjectFile<ELFT>::end_dynamic_symbols() const {
   DataRefImpl SymbolData;
   SymbolData.d.a = std::numeric_limits<uint32_t>::max();
   SymbolData.d.b = std::numeric_limits<uint32_t>::max();
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-section_iterator ELFObjectFile<target_endianness, is64Bits>
-                              ::begin_sections() const {
+template<class ELFT>
+section_iterator ELFObjectFile<ELFT>::begin_sections() const {
   DataRefImpl ret;
   ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
   return section_iterator(SectionRef(ret, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-section_iterator ELFObjectFile<target_endianness, is64Bits>
-                              ::end_sections() const {
+template<class ELFT>
+section_iterator ELFObjectFile<ELFT>::end_sections() const {
   DataRefImpl ret;
   ret.p = reinterpret_cast<intptr_t>(base()
                                      + Header->e_shoff
@@ -2158,58 +2419,47 @@ section_iterator ELFObjectFile<target_endianness, is64Bits>
   return section_iterator(SectionRef(ret, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-typename ELFObjectFile<target_endianness, is64Bits>::dyn_iterator
-ELFObjectFile<target_endianness, is64Bits>::begin_dynamic_table() const {
-  DataRefImpl DynData;
-  if (dot_dynamic_sec == NULL || dot_dynamic_sec->sh_size == 0) {
-    DynData.d.a = std::numeric_limits<uint32_t>::max();
-  } else {
-    DynData.d.a = 0;
-  }
-  return dyn_iterator(DynRef(DynData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-typename ELFObjectFile<target_endianness, is64Bits>::dyn_iterator
-ELFObjectFile<target_endianness, is64Bits>
-                          ::end_dynamic_table() const {
-  DataRefImpl DynData;
-  DynData.d.a = std::numeric_limits<uint32_t>::max();
-  return dyn_iterator(DynRef(DynData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getDynNext(DataRefImpl DynData,
-                                     DynRef &Result) const {
-  ++DynData.d.a;
-
-  // Check to see if we are at the end of .dynamic
-  if (DynData.d.a >= dot_dynamic_sec->getEntityCount()) {
-    // We are at the end. Return the terminator.
-    DynData.d.a = std::numeric_limits<uint32_t>::max();
+template<class ELFT>
+typename ELFObjectFile<ELFT>::Elf_Dyn_iterator
+ELFObjectFile<ELFT>::begin_dynamic_table() const {
+  if (dot_dynamic_sec)
+    return Elf_Dyn_iterator(dot_dynamic_sec->sh_entsize,
+                            (const char *)base() + dot_dynamic_sec->sh_offset);
+  return Elf_Dyn_iterator(0, 0);
+}
+
+template<class ELFT>
+typename ELFObjectFile<ELFT>::Elf_Dyn_iterator
+ELFObjectFile<ELFT>::end_dynamic_table(bool NULLEnd) const {
+  if (dot_dynamic_sec) {
+    Elf_Dyn_iterator Ret(dot_dynamic_sec->sh_entsize,
+                         (const char *)base() + dot_dynamic_sec->sh_offset +
+                         dot_dynamic_sec->sh_size);
+
+    if (NULLEnd) {
+      Elf_Dyn_iterator Start = begin_dynamic_table();
+      while (Start != Ret && Start->getTag() != ELF::DT_NULL)
+        ++Start;
+
+      // Include the DT_NULL.
+      if (Start != Ret)
+        ++Start;
+      Ret = Start;
+    }
+    return Ret;
   }
-
-  Result = DynRef(DynData, this);
-  return object_error::success;
+  return Elf_Dyn_iterator(0, 0);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-StringRef
-ELFObjectFile<target_endianness, is64Bits>::getLoadName() const {
+template<class ELFT>
+StringRef ELFObjectFile<ELFT>::getLoadName() const {
   if (!dt_soname) {
     // Find the DT_SONAME entry
-    dyn_iterator it = begin_dynamic_table();
-    dyn_iterator ie = end_dynamic_table();
-    error_code ec;
-    while (it != ie) {
-      if (it->getTag() == ELF::DT_SONAME)
-        break;
-      it.increment(ec);
-      if (ec)
-        report_fatal_error("dynamic table iteration failed");
-    }
+    Elf_Dyn_iterator it = begin_dynamic_table();
+    Elf_Dyn_iterator ie = end_dynamic_table();
+    while (it != ie && it->getTag() != ELF::DT_SONAME)
+      ++it;
+
     if (it != ie) {
       if (dot_dynstr_sec == NULL)
         report_fatal_error("Dynamic string table is missing");
@@ -2221,57 +2471,43 @@ ELFObjectFile<target_endianness, is64Bits>::getLoadName() const {
   return dt_soname;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-library_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::begin_libraries_needed() const {
+template<class ELFT>
+library_iterator ELFObjectFile<ELFT>::begin_libraries_needed() const {
   // Find the first DT_NEEDED entry
-  dyn_iterator i = begin_dynamic_table();
-  dyn_iterator e = end_dynamic_table();
-  error_code ec;
-  while (i != e) {
-    if (i->getTag() == ELF::DT_NEEDED)
-      break;
-    i.increment(ec);
-    if (ec)
-      report_fatal_error("dynamic table iteration failed");
-  }
-  // Use the same DataRefImpl format as DynRef.
-  return library_iterator(LibraryRef(i->getRawDataRefImpl(), this));
+  Elf_Dyn_iterator i = begin_dynamic_table();
+  Elf_Dyn_iterator e = end_dynamic_table();
+  while (i != e && i->getTag() != ELF::DT_NEEDED)
+    ++i;
+
+  DataRefImpl DRI;
+  DRI.p = reinterpret_cast<uintptr_t>(i.get());
+  return library_iterator(LibraryRef(DRI, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getLibraryNext(DataRefImpl Data,
-                                         LibraryRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getLibraryNext(DataRefImpl Data,
+                                               LibraryRef &Result) const {
   // Use the same DataRefImpl format as DynRef.
-  dyn_iterator i = dyn_iterator(DynRef(Data, this));
-  dyn_iterator e = end_dynamic_table();
-
-  // Skip the current dynamic table entry.
-  error_code ec;
-  if (i != e) {
-    i.increment(ec);
-    // TODO: proper error handling
-    if (ec)
-      report_fatal_error("dynamic table iteration failed");
-  }
-
-  // Find the next DT_NEEDED entry.
-  while (i != e) {
-    if (i->getTag() == ELF::DT_NEEDED)
-      break;
-    i.increment(ec);
-    if (ec)
-      report_fatal_error("dynamic table iteration failed");
-  }
-  Result = LibraryRef(i->getRawDataRefImpl(), this);
+  Elf_Dyn_iterator i = Elf_Dyn_iterator(dot_dynamic_sec->sh_entsize,
+                                        reinterpret_cast<const char *>(Data.p));
+  Elf_Dyn_iterator e = end_dynamic_table();
+
+  // Skip the current dynamic table entry and find the next DT_NEEDED entry.
+  do
+    ++i;
+  while (i != e && i->getTag() != ELF::DT_NEEDED);
+
+  DataRefImpl DRI;
+  DRI.p = reinterpret_cast<uintptr_t>(i.get());
+  Result = LibraryRef(DRI, this);
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-         ::getLibraryPath(DataRefImpl Data, StringRef &Res) const {
-  dyn_iterator i = dyn_iterator(DynRef(Data, this));
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getLibraryPath(DataRefImpl Data,
+                                               StringRef &Res) const {
+  Elf_Dyn_iterator i = Elf_Dyn_iterator(dot_dynamic_sec->sh_entsize,
+                                        reinterpret_cast<const char *>(Data.p));
   if (i == end_dynamic_table())
     report_fatal_error("getLibraryPath() called on iterator end");
 
@@ -2289,22 +2525,21 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-library_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::end_libraries_needed() const {
-  dyn_iterator e = end_dynamic_table();
-  // Use the same DataRefImpl format as DynRef.
-  return library_iterator(LibraryRef(e->getRawDataRefImpl(), this));
+template<class ELFT>
+library_iterator ELFObjectFile<ELFT>::end_libraries_needed() const {
+  Elf_Dyn_iterator e = end_dynamic_table();
+  DataRefImpl DRI;
+  DRI.p = reinterpret_cast<uintptr_t>(e.get());
+  return library_iterator(LibraryRef(DRI, this));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
-  return is64Bits ? 8 : 4;
+template<class ELFT>
+uint8_t ELFObjectFile<ELFT>::getBytesInAddress() const {
+  return ELFT::Is64Bits ? 8 : 4;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getFileFormatName() const {
+template<class ELFT>
+StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
   switch(Header->e_ident[ELF::EI_CLASS]) {
   case ELF::ELFCLASS32:
     switch(Header->e_machine) {
@@ -2316,6 +2551,8 @@ StringRef ELFObjectFile<target_endianness, is64Bits>
       return "ELF32-arm";
     case ELF::EM_HEXAGON:
       return "ELF32-hexagon";
+    case ELF::EM_MIPS:
+      return "ELF32-mips";
     default:
       return "ELF32-unknown";
     }
@@ -2325,6 +2562,8 @@ StringRef ELFObjectFile<target_endianness, is64Bits>
       return "ELF64-i386";
     case ELF::EM_X86_64:
       return "ELF64-x86-64";
+    case ELF::EM_AARCH64:
+      return "ELF64-aarch64";
     case ELF::EM_PPC64:
       return "ELF64-ppc64";
     default:
@@ -2336,19 +2575,21 @@ StringRef ELFObjectFile<target_endianness, is64Bits>
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+template<class ELFT>
+unsigned ELFObjectFile<ELFT>::getArch() const {
   switch(Header->e_machine) {
   case ELF::EM_386:
     return Triple::x86;
   case ELF::EM_X86_64:
     return Triple::x86_64;
+  case ELF::EM_AARCH64:
+    return Triple::aarch64;
   case ELF::EM_ARM:
     return Triple::arm;
   case ELF::EM_HEXAGON:
     return Triple::hexagon;
   case ELF::EM_MIPS:
-    return (target_endianness == support::little) ?
+    return (ELFT::TargetEndianness == support::little) ?
            Triple::mipsel : Triple::mips;
   case ELF::EM_PPC64:
     return Triple::ppc64;
@@ -2357,8 +2598,8 @@ unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
   }
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const {
+template<class ELFT>
+uint64_t ELFObjectFile<ELFT>::getNumSections() const {
   assert(Header && "Header not initialized!");
   if (Header->e_shnum == ELF::SHN_UNDEF) {
     assert(SectionHeaderTable && "SectionHeaderTable not initialized!");
@@ -2367,9 +2608,9 @@ uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const {
   return Header->e_shnum;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 uint64_t
-ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const {
+ELFObjectFile<ELFT>::getStringTableIndex() const {
   if (Header->e_shnum == ELF::SHN_UNDEF) {
     if (Header->e_shstrndx == ELF::SHN_HIRESERVE)
       return SectionHeaderTable->sh_link;
@@ -2379,53 +2620,44 @@ ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const {
   return Header->e_shstrndx;
 }
 
-
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 template<typename T>
 inline const T *
-ELFObjectFile<target_endianness, is64Bits>::getEntry(uint16_t Section,
-                                                     uint32_t Entry) const {
+ELFObjectFile<ELFT>::getEntry(uint16_t Section, uint32_t Entry) const {
   return getEntry<T>(getSection(Section), Entry);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 template<typename T>
 inline const T *
-ELFObjectFile<target_endianness, is64Bits>::getEntry(const Elf_Shdr * Section,
-                                                     uint32_t Entry) const {
+ELFObjectFile<ELFT>::getEntry(const Elf_Shdr * Section, uint32_t Entry) const {
   return reinterpret_cast<const T *>(
            base()
            + Section->sh_offset
            + (Entry * Section->sh_entsize));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
-ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Sym *
+ELFObjectFile<ELFT>::getSymbol(DataRefImpl Symb) const {
   return getEntry<Elf_Sym>(SymbolTableSections[Symb.d.b], Symb.d.a);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Dyn *
-ELFObjectFile<target_endianness, is64Bits>::getDyn(DataRefImpl DynData) const {
-  return getEntry<Elf_Dyn>(dot_dynamic_sec, DynData.d.a);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rel *
-ELFObjectFile<target_endianness, is64Bits>::getRel(DataRefImpl Rel) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Rel *
+ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
   return getEntry<Elf_Rel>(Rel.w.b, Rel.w.c);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rela *
-ELFObjectFile<target_endianness, is64Bits>::getRela(DataRefImpl Rela) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Rela *
+ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
   return getEntry<Elf_Rela>(Rela.w.b, Rela.w.c);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Shdr *
+ELFObjectFile<ELFT>::getSection(DataRefImpl Symb) const {
   const Elf_Shdr *sec = getSection(Symb.d.b);
   if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM)
     // FIXME: Proper error handling.
@@ -2433,9 +2665,9 @@ ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
   return sec;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const {
+template<class ELFT>
+const typename ELFObjectFile<ELFT>::Elf_Shdr *
+ELFObjectFile<ELFT>::getSection(uint32_t index) const {
   if (index == 0)
     return 0;
   if (!SectionHeaderTable || index >= getNumSections())
@@ -2447,17 +2679,15 @@ ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const {
          + (index * Header->e_shentsize));
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const char *ELFObjectFile<target_endianness, is64Bits>
-                         ::getString(uint32_t section,
-                                     ELF::Elf32_Word offset) const {
+template<class ELFT>
+const char *ELFObjectFile<ELFT>::getString(uint32_t section,
+                                           ELF::Elf32_Word offset) const {
   return getString(getSection(section), offset);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-const char *ELFObjectFile<target_endianness, is64Bits>
-                         ::getString(const Elf_Shdr *section,
-                                     ELF::Elf32_Word offset) const {
+template<class ELFT>
+const char *ELFObjectFile<ELFT>::getString(const Elf_Shdr *section,
+                                           ELF::Elf32_Word offset) const {
   assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
   if (offset >= section->sh_size)
     // FIXME: Proper error handling.
@@ -2465,11 +2695,10 @@ const char *ELFObjectFile<target_endianness, is64Bits>
   return (const char *)base() + section->sh_offset + offset;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolName(const Elf_Shdr *section,
-                                        const Elf_Sym *symb,
-                                        StringRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolName(const Elf_Shdr *section,
+                                              const Elf_Sym *symb,
+                                              StringRef &Result) const {
   if (symb->st_name == 0) {
     const Elf_Shdr *section = getSection(symb);
     if (!section)
@@ -2489,20 +2718,18 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionName(const Elf_Shdr *section,
-                                        StringRef &Result) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSectionName(const Elf_Shdr *section,
+                                               StringRef &Result) const {
   Result = StringRef(getString(dot_shstrtab_sec, section->sh_name));
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolVersion(const Elf_Shdr *section,
-                                           const Elf_Sym *symb,
-                                           StringRef &Version,
-                                           bool &IsDefault) const {
+template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolVersion(const Elf_Shdr *section,
+                                                 const Elf_Sym *symb,
+                                                 StringRef &Version,
+                                                 bool &IsDefault) const {
   // Handle non-dynamic symbols.
   if (section != SymbolTableSections[0]) {
     // Non-dynamic symbols can have versions in their names
@@ -2580,54 +2807,6 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-inline DynRefImpl<target_endianness, is64Bits>
-                 ::DynRefImpl(DataRefImpl DynP, const OwningType *Owner)
-  : DynPimpl(DynP)
-  , OwningObject(Owner) {}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline bool DynRefImpl<target_endianness, is64Bits>
-                      ::operator==(const DynRefImpl &Other) const {
-  return DynPimpl == Other.DynPimpl;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline bool DynRefImpl<target_endianness, is64Bits>
-                      ::operator <(const DynRefImpl &Other) const {
-  return DynPimpl < Other.DynPimpl;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline error_code DynRefImpl<target_endianness, is64Bits>
-                            ::getNext(DynRefImpl &Result) const {
-  return OwningObject->getDynNext(DynPimpl, Result);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline int64_t DynRefImpl<target_endianness, is64Bits>
-                            ::getTag() const {
-  return OwningObject->getDyn(DynPimpl)->d_tag;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline uint64_t DynRefImpl<target_endianness, is64Bits>
-                            ::getVal() const {
-  return OwningObject->getDyn(DynPimpl)->d_un.d_val;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline uint64_t DynRefImpl<target_endianness, is64Bits>
-                            ::getPtr() const {
-  return OwningObject->getDyn(DynPimpl)->d_un.d_ptr;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-inline DataRefImpl DynRefImpl<target_endianness, is64Bits>
-                             ::getRawDataRefImpl() const {
-  return DynPimpl;
-}
-
 /// This is a generic interface for retrieving GNU symbol version
 /// information from an ELFObjectFile.
 static inline error_code GetELFSymbolVersion(const ObjectFile *Obj,
@@ -2635,28 +2814,43 @@ static inline error_code GetELFSymbolVersion(const ObjectFile *Obj,
                                              StringRef &Version,
                                              bool &IsDefault) {
   // Little-endian 32-bit
-  if (const ELFObjectFile<support::little, false> *ELFObj =
-          dyn_cast<ELFObjectFile<support::little, false> >(Obj))
+  if (const ELFObjectFile<ELFType<support::little, 4, false> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::little, 4, false> > >(Obj))
     return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
 
   // Big-endian 32-bit
-  if (const ELFObjectFile<support::big, false> *ELFObj =
-          dyn_cast<ELFObjectFile<support::big, false> >(Obj))
+  if (const ELFObjectFile<ELFType<support::big, 4, false> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::big, 4, false> > >(Obj))
     return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
 
   // Little-endian 64-bit
-  if (const ELFObjectFile<support::little, true> *ELFObj =
-          dyn_cast<ELFObjectFile<support::little, true> >(Obj))
+  if (const ELFObjectFile<ELFType<support::little, 8, true> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::little, 8, true> > >(Obj))
     return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
 
   // Big-endian 64-bit
-  if (const ELFObjectFile<support::big, true> *ELFObj =
-          dyn_cast<ELFObjectFile<support::big, true> >(Obj))
+  if (const ELFObjectFile<ELFType<support::big, 8, true> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::big, 8, true> > >(Obj))
     return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
 
   llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF");
 }
 
+/// This function returns the hash value for a symbol in the .dynsym section
+/// Name of the API remains consistent as specified in the libelf
+/// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
+static inline unsigned elf_hash(StringRef &symbolName) {
+  unsigned h = 0, g;
+  for (unsigned i = 0, j = symbolName.size(); i < j; i++) {
+    h = (h << 4) + symbolName[i];
+    g = h & 0xf0000000L;
+    if (g != 0)
+      h ^= g >> 24;
+    h &= ~g;
+  }
+  return h;
+}
+
 }
 }
 
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 4e03daab16a3..ed7aabd2c868 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_OBJECT_MACHO_H
 #define LLVM_OBJECT_MACHO_H
 
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 namespace object {
@@ -44,7 +44,12 @@ public:
   virtual unsigned getArch() const;
   virtual StringRef getLoadName() const;
 
-  MachOObject *getObject() { return MachOObj; }
+  // In a MachO file, sections have a segment name. This is used in the .o
+  // files. They have a single segment, but this field specifies which segment
+  // a section should be put in in the final object.
+  error_code getSectionFinalSegmentName(DataRefImpl Sec, StringRef &Res) const;
+
+  MachOObject *getObject() { return MachOObj.get(); }
 
   static inline bool classof(const Binary *v) {
     return v->isMachO();
@@ -104,7 +109,7 @@ protected:
   virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const;
 
 private:
-  MachOObject *MachOObj;
+  OwningPtr<MachOObject> MachOObj;
   mutable uint32_t RegisteredStringTable;
   typedef SmallVector<DataRefImpl, 1> SectionList;
   SectionList Sections;
diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h
index c0f700d3c870..ffca391ea228 100644
--- a/include/llvm/Object/MachOFormat.h
+++ b/include/llvm/Object/MachOFormat.h
@@ -64,7 +64,10 @@ namespace mach {
     CSARM_V7     = 9,
     CSARM_V7F    = 10,
     CSARM_V7S    = 11,
-    CSARM_V7K    = 12
+    CSARM_V7K    = 12,
+    CSARM_V6M    = 14,
+    CSARM_V7M    = 15,
+    CSARM_V7EM   = 16
   };
 
   /// \brief PowerPC Machine Subtypes.
@@ -145,7 +148,8 @@ namespace macho {
     LCT_CodeSignature = 0x1d,
     LCT_SegmentSplitInfo = 0x1e,
     LCT_FunctionStarts = 0x26,
-    LCT_DataInCode = 0x29
+    LCT_DataInCode = 0x29,
+    LCT_LinkerOptions = 0x2D
   };
 
   /// \brief Load command structure.
@@ -233,10 +237,22 @@ namespace macho {
     uint32_t DataSize;
   };
 
+  struct LinkerOptionsLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    uint32_t Count;
+    // Load command is followed by Count number of zero-terminated UTF8 strings,
+    // and then zero-filled to be 4-byte aligned.
+  };
+
   /// @}
   /// @name Section Data
   /// @{
 
+  enum SectionFlags {
+    SF_PureInstructions = 0x80000000
+  };
+
   struct Section {
     char Name[16];
     char SegmentName[16];
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
index 86f150a2940f..9e4ab199f572 100644
--- a/include/llvm/Object/MachOObject.h
+++ b/include/llvm/Object/MachOObject.h
@@ -10,11 +10,11 @@
 #ifndef LLVM_OBJECT_MACHOOBJECT_H
 #define LLVM_OBJECT_MACHOOBJECT_H
 
-#include <string>
 #include "llvm/ADT/InMemoryStruct.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/MachOFormat.h"
+#include <string>
 
 namespace llvm {
 
@@ -153,6 +153,9 @@ public:
   void ReadLinkeditDataLoadCommand(
     const LoadCommandInfo &LCI,
     InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const;
+  void ReadLinkerOptionsLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const;
   void ReadIndirectSymbolTableEntry(
     const macho::DysymtabLoadCommand &DLC,
     unsigned Index,
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 1a3120ab8ba3..6a66653fe223 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_OBJECT_OBJECT_FILE_H
-#define LLVM_OBJECT_OBJECT_FILE_H
+#ifndef LLVM_OBJECT_OBJECTFILE_H
+#define LLVM_OBJECT_OBJECTFILE_H
 
-#include "llvm/Object/Binary.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Binary.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index 7668bdedb7bb..2dcbdf905327 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -13,14 +13,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LLVM_OBJECT_RELOCVISITOR
-#define _LLVM_OBJECT_RELOCVISITOR
+#ifndef LLVM_OBJECT_RELOCVISITOR_H
+#define LLVM_OBJECT_RELOCVISITOR_H
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 namespace object {
@@ -40,7 +40,7 @@ struct RelocToApply {
 /// @brief Base class for object file relocation visitors.
 class RelocVisitor {
 public:
-  explicit RelocVisitor(llvm::StringRef FileFormat)
+  explicit RelocVisitor(StringRef FileFormat)
     : FileFormat(FileFormat), HasError(false) {}
 
   // TODO: Should handle multiple applied relocations via either passing in the
@@ -64,35 +64,77 @@ public:
           HasError = true;
           return RelocToApply();
       }
+    } else if (FileFormat == "ELF32-i386") {
+      switch (RelocType) {
+      case llvm::ELF::R_386_NONE:
+        return visitELF_386_NONE(R);
+      case llvm::ELF::R_386_32:
+        return visitELF_386_32(R, Value);
+      case llvm::ELF::R_386_PC32:
+        return visitELF_386_PC32(R, Value, SecAddr);
+      default:
+        HasError = true;
+        return RelocToApply();
+      }
+    } else if (FileFormat == "ELF64-ppc64") {
+      switch (RelocType) {
+      case llvm::ELF::R_PPC64_ADDR32:
+        return visitELF_PPC64_ADDR32(R, Value);
+      default:
+        HasError = true;
+        return RelocToApply();
+      }
+    } else if (FileFormat == "ELF32-mips") {
+      switch (RelocType) {
+      case llvm::ELF::R_MIPS_32:
+        return visitELF_MIPS_32(R, Value);
+      default:
+        HasError = true;
+        return RelocToApply();
+      }
+    } else if (FileFormat == "ELF64-aarch64") {
+      switch (RelocType) {
+      case llvm::ELF::R_AARCH64_ABS32:
+        return visitELF_AARCH64_ABS32(R, Value);
+      case llvm::ELF::R_AARCH64_ABS64:
+        return visitELF_AARCH64_ABS64(R, Value);
+      default:
+        HasError = true;
+        return RelocToApply();
+      }
     }
+    HasError = true;
     return RelocToApply();
   }
 
   bool error() { return HasError; }
 
 private:
-  llvm::StringRef FileFormat;
+  StringRef FileFormat;
   bool HasError;
 
   /// Operations
 
-  // Width is the width in bytes of the extend.
-  RelocToApply zeroExtend(RelocToApply r, char Width) {
-    if (Width == r.Width)
-      return r;
-    r.Value &= (1 << ((Width * 8))) - 1;
-    return r;
+  /// 386-ELF
+  RelocToApply visitELF_386_NONE(RelocationRef R) {
+    return RelocToApply(0, 0);
   }
-  RelocToApply signExtend(RelocToApply r, char Width) {
-    if (Width == r.Width)
-      return r;
-    bool SignBit = r.Value & (1 << ((Width * 8) - 1));
-    if (SignBit) {
-      r.Value |= ~((1 << (Width * 8)) - 1);
-    } else {
-      r.Value &= (1 << (Width * 8)) - 1;
-    }
-    return r;
+
+  // Ideally the Addend here will be the addend in the data for
+  // the relocation. It's not actually the case for Rel relocations.
+  RelocToApply visitELF_386_32(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    return RelocToApply(Value + Addend, 4);
+  }
+
+  RelocToApply visitELF_386_PC32(RelocationRef R, uint64_t Value,
+                                 uint64_t SecAddr) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    uint64_t Address;
+    R.getAddress(Address);
+    return RelocToApply(Value + Addend - Address, 4);
   }
 
   /// X86-64 ELF
@@ -124,6 +166,42 @@ private:
     int32_t Res = (Value + Addend) & 0xFFFFFFFF;
     return RelocToApply(Res, 4);
   }
+
+  /// PPC64 ELF
+  RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
+    return RelocToApply(Res, 4);
+  }
+
+  /// MIPS ELF
+  RelocToApply visitELF_MIPS_32(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    uint32_t Res = (Value + Addend) & 0xFFFFFFFF;
+    return RelocToApply(Res, 4);
+  }
+
+  // AArch64 ELF
+  RelocToApply visitELF_AARCH64_ABS32(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    int64_t Res =  Value + Addend;
+
+    // Overflow check allows for both signed and unsigned interpretation.
+    if (Res < INT32_MIN || Res > UINT32_MAX)
+      HasError = true;
+
+    return RelocToApply(static_cast<uint32_t>(Res), 4);
+  }
+
+  RelocToApply visitELF_AARCH64_ABS64(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    return RelocToApply(Value + Addend, 8);
+  }
+
 };
 
 }
diff --git a/include/llvm/OperandTraits.h b/include/llvm/OperandTraits.h
deleted file mode 100644
index 3d8dc329b39f..000000000000
--- a/include/llvm/OperandTraits.h
+++ /dev/null
@@ -1,160 +0,0 @@
-//===-- llvm/OperandTraits.h - OperandTraits class definition ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the traits classes that are handy for enforcing the correct
-// layout of various User subclasses. It also provides the means for accessing
-// the operands in the most efficient manner.
-//
-
-#ifndef LLVM_OPERAND_TRAITS_H
-#define LLVM_OPERAND_TRAITS_H
-
-#include "llvm/User.h"
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-//                          FixedNumOperand Trait Class
-//===----------------------------------------------------------------------===//
-
-/// FixedNumOperandTraits - determine the allocation regime of the Use array
-/// when it is a prefix to the User object, and the number of Use objects is
-/// known at compile time.
-
-template <typename SubClass, unsigned ARITY>
-struct FixedNumOperandTraits {
-  static Use *op_begin(SubClass* U) {
-    return reinterpret_cast<Use*>(U) - ARITY;
-  }
-  static Use *op_end(SubClass* U) {
-    return reinterpret_cast<Use*>(U);
-  }
-  static unsigned operands(const User*) {
-    return ARITY;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                          OptionalOperand Trait Class
-//===----------------------------------------------------------------------===//
-
-/// OptionalOperandTraits - when the number of operands may change at runtime.
-/// Naturally it may only decrease, because the allocations may not change.
-
-template <typename SubClass, unsigned ARITY = 1>
-struct OptionalOperandTraits : public FixedNumOperandTraits<SubClass, ARITY> {
-  static unsigned operands(const User *U) {
-    return U->getNumOperands();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                          VariadicOperand Trait Class
-//===----------------------------------------------------------------------===//
-
-/// VariadicOperandTraits - determine the allocation regime of the Use array
-/// when it is a prefix to the User object, and the number of Use objects is
-/// only known at allocation time.
-
-template <typename SubClass, unsigned MINARITY = 0>
-struct VariadicOperandTraits {
-  static Use *op_begin(SubClass* U) {
-    return reinterpret_cast<Use*>(U) - static_cast<User*>(U)->getNumOperands();
-  }
-  static Use *op_end(SubClass* U) {
-    return reinterpret_cast<Use*>(U);
-  }
-  static unsigned operands(const User *U) {
-    return U->getNumOperands();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-//                          HungoffOperand Trait Class
-//===----------------------------------------------------------------------===//
-
-/// HungoffOperandTraits - determine the allocation regime of the Use array
-/// when it is not a prefix to the User object, but allocated at an unrelated
-/// heap address.
-/// Assumes that the User subclass that is determined by this traits class
-/// has an OperandList member of type User::op_iterator. [Note: this is now
-/// trivially satisfied, because User has that member for historic reasons.]
-///
-/// This is the traits class that is needed when the Use array must be
-/// resizable.
-
-template <unsigned MINARITY = 1>
-struct HungoffOperandTraits {
-  static Use *op_begin(User* U) {
-    return U->OperandList;
-  }
-  static Use *op_end(User* U) {
-    return U->OperandList + U->getNumOperands();
-  }
-  static unsigned operands(const User *U) {
-    return U->getNumOperands();
-  }
-};
-
-/// Macro for generating in-class operand accessor declarations.
-/// It should only be called in the public section of the interface.
-///
-#define DECLARE_TRANSPARENT_OPERAND_ACCESSORS(VALUECLASS) \
-  public: \
-  inline VALUECLASS *getOperand(unsigned) const; \
-  inline void setOperand(unsigned, VALUECLASS*); \
-  inline op_iterator op_begin(); \
-  inline const_op_iterator op_begin() const; \
-  inline op_iterator op_end(); \
-  inline const_op_iterator op_end() const; \
-  protected: \
-  template <int> inline Use &Op(); \
-  template <int> inline const Use &Op() const; \
-  public: \
-  inline unsigned getNumOperands() const
-
-/// Macro for generating out-of-class operand accessor definitions
-#define DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CLASS, VALUECLASS) \
-CLASS::op_iterator CLASS::op_begin() { \
-  return OperandTraits<CLASS>::op_begin(this); \
-} \
-CLASS::const_op_iterator CLASS::op_begin() const { \
-  return OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this)); \
-} \
-CLASS::op_iterator CLASS::op_end() { \
-  return OperandTraits<CLASS>::op_end(this); \
-} \
-CLASS::const_op_iterator CLASS::op_end() const { \
-  return OperandTraits<CLASS>::op_end(const_cast<CLASS*>(this)); \
-} \
-VALUECLASS *CLASS::getOperand(unsigned i_nocapture) const { \
-  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
-         && "getOperand() out of range!"); \
-  return cast_or_null<VALUECLASS>( \
-    OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this))[i_nocapture].get()); \
-} \
-void CLASS::setOperand(unsigned i_nocapture, VALUECLASS *Val_nocapture) { \
-  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
-         && "setOperand() out of range!"); \
-  OperandTraits<CLASS>::op_begin(this)[i_nocapture] = Val_nocapture; \
-} \
-unsigned CLASS::getNumOperands() const { \
-  return OperandTraits<CLASS>::operands(this); \
-} \
-template <int Idx_nocapture> Use &CLASS::Op() { \
-  return this->OpFrom<Idx_nocapture>(this); \
-} \
-template <int Idx_nocapture> const Use &CLASS::Op() const { \
-  return this->OpFrom<Idx_nocapture>(this); \
-}
-
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
deleted file mode 100644
index b326c1135206..000000000000
--- a/include/llvm/Operator.h
+++ /dev/null
@@ -1,314 +0,0 @@
-//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines various classes for working with Instructions and
-// ConstantExprs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OPERATOR_H
-#define LLVM_OPERATOR_H
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instruction.h"
-#include "llvm/Type.h"
-
-namespace llvm {
-
-class GetElementPtrInst;
-class BinaryOperator;
-class ConstantExpr;
-
-/// Operator - This is a utility class that provides an abstraction for the
-/// common functionality between Instructions and ConstantExprs.
-///
-class Operator : public User {
-private:
-  // Do not implement any of these. The Operator class is intended to be used
-  // as a utility, and is never itself instantiated.
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-  void *operator new(size_t s) LLVM_DELETED_FUNCTION;
-  Operator() LLVM_DELETED_FUNCTION;
-
-protected:
-  // NOTE: Cannot use LLVM_DELETED_FUNCTION because it's not legal to delete
-  // an overridden method that's not deleted in the base class. Cannot leave
-  // this unimplemented because that leads to an ODR-violation.
-  ~Operator();
-
-public:
-  /// getOpcode - Return the opcode for this Instruction or ConstantExpr.
-  ///
-  unsigned getOpcode() const {
-    if (const Instruction *I = dyn_cast<Instruction>(this))
-      return I->getOpcode();
-    return cast<ConstantExpr>(this)->getOpcode();
-  }
-
-  /// getOpcode - If V is an Instruction or ConstantExpr, return its
-  /// opcode. Otherwise return UserOp1.
-  ///
-  static unsigned getOpcode(const Value *V) {
-    if (const Instruction *I = dyn_cast<Instruction>(V))
-      return I->getOpcode();
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      return CE->getOpcode();
-    return Instruction::UserOp1;
-  }
-
-  static inline bool classof(const Instruction *) { return true; }
-  static inline bool classof(const ConstantExpr *) { return true; }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) || isa<ConstantExpr>(V);
-  }
-};
-
-/// OverflowingBinaryOperator - Utility class for integer arithmetic operators
-/// which may exhibit overflow - Add, Sub, and Mul. It does not include SDiv,
-/// despite that operator having the potential for overflow.
-///
-class OverflowingBinaryOperator : public Operator {
-public:
-  enum {
-    NoUnsignedWrap = (1 << 0),
-    NoSignedWrap   = (1 << 1)
-  };
-
-private:
-  friend class BinaryOperator;
-  friend class ConstantExpr;
-  void setHasNoUnsignedWrap(bool B) {
-    SubclassOptionalData =
-      (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
-  }
-  void setHasNoSignedWrap(bool B) {
-    SubclassOptionalData =
-      (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
-  }
-
-public:
-  /// hasNoUnsignedWrap - Test whether this operation is known to never
-  /// undergo unsigned overflow, aka the nuw property.
-  bool hasNoUnsignedWrap() const {
-    return SubclassOptionalData & NoUnsignedWrap;
-  }
-
-  /// hasNoSignedWrap - Test whether this operation is known to never
-  /// undergo signed overflow, aka the nsw property.
-  bool hasNoSignedWrap() const {
-    return (SubclassOptionalData & NoSignedWrap) != 0;
-  }
-
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Add ||
-           I->getOpcode() == Instruction::Sub ||
-           I->getOpcode() == Instruction::Mul ||
-           I->getOpcode() == Instruction::Shl;
-  }
-  static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Instruction::Add ||
-           CE->getOpcode() == Instruction::Sub ||
-           CE->getOpcode() == Instruction::Mul ||
-           CE->getOpcode() == Instruction::Shl;
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
-};
-
-/// PossiblyExactOperator - A udiv or sdiv instruction, which can be marked as
-/// "exact", indicating that no bits are destroyed.
-class PossiblyExactOperator : public Operator {
-public:
-  enum {
-    IsExact = (1 << 0)
-  };
-  
-private:
-  friend class BinaryOperator;
-  friend class ConstantExpr;
-  void setIsExact(bool B) {
-    SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
-  }
-  
-public:
-  /// isExact - Test whether this division is known to be exact, with
-  /// zero remainder.
-  bool isExact() const {
-    return SubclassOptionalData & IsExact;
-  }
-  
-  static bool isPossiblyExactOpcode(unsigned OpC) {
-    return OpC == Instruction::SDiv ||
-           OpC == Instruction::UDiv ||
-           OpC == Instruction::AShr ||
-           OpC == Instruction::LShr;
-  }
-  static inline bool classof(const ConstantExpr *CE) {
-    return isPossiblyExactOpcode(CE->getOpcode());
-  }
-  static inline bool classof(const Instruction *I) {
-    return isPossiblyExactOpcode(I->getOpcode());
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
-};
-
-/// FPMathOperator - Utility class for floating point operations which can have
-/// information about relaxed accuracy requirements attached to them.
-class FPMathOperator : public Operator {
-public:
-
-  /// \brief Get the maximum error permitted by this operation in ULPs.  An
-  /// accuracy of 0.0 means that the operation should be performed with the
-  /// default precision.
-  float getFPAccuracy() const;
-
-  static inline bool classof(const Instruction *I) {
-    return I->getType()->isFPOrFPVectorTy();
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-};
-
-  
-/// ConcreteOperator - A helper template for defining operators for individual
-/// opcodes.
-template<typename SuperClass, unsigned Opc>
-class ConcreteOperator : public SuperClass {
-public:
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Opc;
-  }
-  static inline bool classof(const ConstantExpr *CE) {
-    return CE->getOpcode() == Opc;
-  }
-  static inline bool classof(const Value *V) {
-    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
-           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
-  }
-};
-
-class AddOperator
-  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
-};
-class SubOperator
-  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
-};
-class MulOperator
-  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
-};
-class ShlOperator
-  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
-};
-
-
-class SDivOperator
-  : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
-};
-class UDivOperator
-  : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
-};
-class AShrOperator
-  : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
-};
-class LShrOperator
-  : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
-};
-
-
-
-class GEPOperator
-  : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
-  enum {
-    IsInBounds = (1 << 0)
-  };
-
-  friend class GetElementPtrInst;
-  friend class ConstantExpr;
-  void setIsInBounds(bool B) {
-    SubclassOptionalData =
-      (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
-  }
-
-public:
-  /// isInBounds - Test whether this is an inbounds GEP, as defined
-  /// by LangRef.html.
-  bool isInBounds() const {
-    return SubclassOptionalData & IsInBounds;
-  }
-
-  inline op_iterator       idx_begin()       { return op_begin()+1; }
-  inline const_op_iterator idx_begin() const { return op_begin()+1; }
-  inline op_iterator       idx_end()         { return op_end(); }
-  inline const_op_iterator idx_end()   const { return op_end(); }
-
-  Value *getPointerOperand() {
-    return getOperand(0);
-  }
-  const Value *getPointerOperand() const {
-    return getOperand(0);
-  }
-  static unsigned getPointerOperandIndex() {
-    return 0U;                      // get index for modifying correct operand
-  }
-
-  /// getPointerOperandType - Method to return the pointer operand as a
-  /// PointerType.
-  Type *getPointerOperandType() const {
-    return getPointerOperand()->getType();
-  }
-
-  /// getPointerAddressSpace - Method to return the address space of the
-  /// pointer operand.
-  unsigned getPointerAddressSpace() const {
-    return cast<PointerType>(getPointerOperandType())->getAddressSpace();
-  }
-
-  unsigned getNumIndices() const {  // Note: always non-negative
-    return getNumOperands() - 1;
-  }
-
-  bool hasIndices() const {
-    return getNumOperands() > 1;
-  }
-
-  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
-  /// zeros.  If so, the result pointer and the first operand have the same
-  /// value, just potentially different types.
-  bool hasAllZeroIndices() const {
-    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
-      if (ConstantInt *C = dyn_cast<ConstantInt>(I))
-        if (C->isZero())
-          continue;
-      return false;
-    }
-    return true;
-  }
-
-  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
-  /// constant integers.  If so, the result pointer and the first operand have
-  /// a constant offset between them.
-  bool hasAllConstantIndices() const {
-    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
-      if (!isa<ConstantInt>(I))
-        return false;
-    }
-    return true;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h
new file mode 100644
index 000000000000..6b8ed3f7d2b1
--- /dev/null
+++ b/include/llvm/Option/Arg.h
@@ -0,0 +1,132 @@
+//===--- Arg.h - Parsed Argument Classes ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Defines the llvm::Arg class for parsed arguments.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPTION_ARG_H
+#define LLVM_OPTION_ARG_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/Option.h"
+#include <string>
+
+namespace llvm {
+namespace opt {
+class ArgList;
+
+/// \brief A concrete instance of a particular driver option.
+///
+/// The Arg class encodes just enough information to be able to
+/// derive the argument values efficiently. In addition, Arg
+/// instances have an intrusive double linked list which is used by
+/// ArgList to provide efficient iteration over all instances of a
+/// particular option.
+class Arg {
+  Arg(const Arg &) LLVM_DELETED_FUNCTION;
+  void operator=(const Arg &) LLVM_DELETED_FUNCTION;
+
+private:
+  /// \brief The option this argument is an instance of.
+  const Option Opt;
+
+  /// \brief The argument this argument was derived from (during tool chain
+  /// argument translation), if any.
+  const Arg *BaseArg;
+
+  /// \brief How this instance of the option was spelled.
+  StringRef Spelling;
+
+  /// \brief The index at which this argument appears in the containing
+  /// ArgList.
+  unsigned Index;
+
+  /// \brief Was this argument used to effect compilation?
+  ///
+  /// This is used for generating "argument unused" diagnostics.
+  mutable unsigned Claimed : 1;
+
+  /// \brief Does this argument own its values?
+  mutable unsigned OwnsValues : 1;
+
+  /// \brief The argument values, as C strings.
+  SmallVector<const char *, 2> Values;
+
+public:
+  Arg(const Option Opt, StringRef Spelling, unsigned Index,
+      const Arg *BaseArg = 0);
+  Arg(const Option Opt, StringRef Spelling, unsigned Index,
+      const char *Value0, const Arg *BaseArg = 0);
+  Arg(const Option Opt, StringRef Spelling, unsigned Index,
+      const char *Value0, const char *Value1, const Arg *BaseArg = 0);
+  ~Arg();
+
+  const Option getOption() const { return Opt; }
+  StringRef getSpelling() const { return Spelling; }
+  unsigned getIndex() const { return Index; }
+
+  /// \brief Return the base argument which generated this arg.
+  ///
+  /// This is either the argument itself or the argument it was
+  /// derived from during tool chain specific argument translation.
+  const Arg &getBaseArg() const {
+    return BaseArg ? *BaseArg : *this;
+  }
+  void setBaseArg(const Arg *_BaseArg) {
+    BaseArg = _BaseArg;
+  }
+
+  bool getOwnsValues() const { return OwnsValues; }
+  void setOwnsValues(bool Value) const { OwnsValues = Value; }
+
+  bool isClaimed() const { return getBaseArg().Claimed; }
+
+  /// \brief Set the Arg claimed bit.
+  void claim() const { getBaseArg().Claimed = true; }
+
+  unsigned getNumValues() const { return Values.size(); }
+  const char *getValue(unsigned N = 0) const {
+    return Values[N];
+  }
+
+  SmallVectorImpl<const char*> &getValues() {
+    return Values;
+  }
+
+  bool containsValue(StringRef Value) const {
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+      if (Values[i] == Value)
+        return true;
+    return false;
+  }
+
+  /// \brief Append the argument onto the given array as strings.
+  void render(const ArgList &Args, ArgStringList &Output) const;
+
+  /// \brief Append the argument, render as an input, onto the given
+  /// array as strings.
+  ///
+  /// The distinction is that some options only render their values
+  /// when rendered as a input (e.g., Xlinker).
+  void renderAsInput(const ArgList &Args, ArgStringList &Output) const;
+
+  void dump() const;
+
+  /// \brief Return a formatted version of the argument and
+  /// its values, for debugging and diagnostics.
+  std::string getAsString(const ArgList &Args) const;
+};
+
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
new file mode 100644
index 000000000000..d3accfe7f1e0
--- /dev/null
+++ b/include/llvm/Option/ArgList.h
@@ -0,0 +1,414 @@
+//===--- ArgList.h - Argument List Management -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPTION_ARGLIST_H
+#define LLVM_OPTION_ARGLIST_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/OptSpecifier.h"
+#include "llvm/Option/Option.h"
+#include <list>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace opt {
+class Arg;
+class ArgList;
+class Option;
+
+/// arg_iterator - Iterates through arguments stored inside an ArgList.
+class arg_iterator {
+  /// The current argument.
+  SmallVectorImpl<Arg*>::const_iterator Current;
+
+  /// The argument list we are iterating over.
+  const ArgList &Args;
+
+  /// Optional filters on the arguments which will be match. Most clients
+  /// should never want to iterate over arguments without filters, so we won't
+  /// bother to factor this into two separate iterator implementations.
+  //
+  // FIXME: Make efficient; the idea is to provide efficient iteration over
+  // all arguments which match a particular id and then just provide an
+  // iterator combinator which takes multiple iterators which can be
+  // efficiently compared and returns them in order.
+  OptSpecifier Id0, Id1, Id2;
+
+  void SkipToNextArg();
+
+public:
+  typedef Arg * const *                 value_type;
+  typedef Arg * const &                 reference;
+  typedef Arg * const *                 pointer;
+  typedef std::forward_iterator_tag   iterator_category;
+  typedef std::ptrdiff_t              difference_type;
+
+  arg_iterator(SmallVectorImpl<Arg*>::const_iterator it,
+                const ArgList &_Args, OptSpecifier _Id0 = 0U,
+                OptSpecifier _Id1 = 0U, OptSpecifier _Id2 = 0U)
+    : Current(it), Args(_Args), Id0(_Id0), Id1(_Id1), Id2(_Id2) {
+    SkipToNextArg();
+  }
+
+  operator const Arg*() { return *Current; }
+  reference operator*() const { return *Current; }
+  pointer operator->() const { return Current; }
+
+  arg_iterator &operator++() {
+    ++Current;
+    SkipToNextArg();
+    return *this;
+  }
+
+  arg_iterator operator++(int) {
+    arg_iterator tmp(*this);
+    ++(*this);
+    return tmp;
+  }
+
+  friend bool operator==(arg_iterator LHS, arg_iterator RHS) {
+    return LHS.Current == RHS.Current;
+  }
+  friend bool operator!=(arg_iterator LHS, arg_iterator RHS) {
+    return !(LHS == RHS);
+  }
+};
+
+/// ArgList - Ordered collection of driver arguments.
+///
+/// The ArgList class manages a list of Arg instances as well as
+/// auxiliary data and convenience methods to allow Tools to quickly
+/// check for the presence of Arg instances for a particular Option
+/// and to iterate over groups of arguments.
+class ArgList {
+private:
+  ArgList(const ArgList &) LLVM_DELETED_FUNCTION;
+  void operator=(const ArgList &) LLVM_DELETED_FUNCTION;
+
+public:
+  typedef SmallVector<Arg*, 16> arglist_type;
+  typedef arglist_type::iterator iterator;
+  typedef arglist_type::const_iterator const_iterator;
+  typedef arglist_type::reverse_iterator reverse_iterator;
+  typedef arglist_type::const_reverse_iterator const_reverse_iterator;
+
+private:
+  /// The internal list of arguments.
+  arglist_type Args;
+
+protected:
+  ArgList();
+
+public:
+  virtual ~ArgList();
+
+  /// @name Arg Access
+  /// @{
+
+  /// append - Append \p A to the arg list.
+  void append(Arg *A);
+
+  arglist_type &getArgs() { return Args; }
+  const arglist_type &getArgs() const { return Args; }
+
+  unsigned size() const { return Args.size(); }
+
+  /// @}
+  /// @name Arg Iteration
+  /// @{
+
+  iterator begin() { return Args.begin(); }
+  iterator end() { return Args.end(); }
+
+  reverse_iterator rbegin() { return Args.rbegin(); }
+  reverse_iterator rend() { return Args.rend(); }
+
+  const_iterator begin() const { return Args.begin(); }
+  const_iterator end() const { return Args.end(); }
+
+  const_reverse_iterator rbegin() const { return Args.rbegin(); }
+  const_reverse_iterator rend() const { return Args.rend(); }
+
+  arg_iterator filtered_begin(OptSpecifier Id0 = 0U, OptSpecifier Id1 = 0U,
+                              OptSpecifier Id2 = 0U) const {
+    return arg_iterator(Args.begin(), *this, Id0, Id1, Id2);
+  }
+  arg_iterator filtered_end() const {
+    return arg_iterator(Args.end(), *this);
+  }
+
+  /// @}
+  /// @name Arg Removal
+  /// @{
+
+  /// eraseArg - Remove any option matching \p Id.
+  void eraseArg(OptSpecifier Id);
+
+  /// @}
+  /// @name Arg Access
+  /// @{
+
+  /// hasArg - Does the arg list contain any option matching \p Id.
+  ///
+  /// \p Claim Whether the argument should be claimed, if it exists.
+  bool hasArgNoClaim(OptSpecifier Id) const {
+    return getLastArgNoClaim(Id) != 0;
+  }
+  bool hasArg(OptSpecifier Id) const {
+    return getLastArg(Id) != 0;
+  }
+  bool hasArg(OptSpecifier Id0, OptSpecifier Id1) const {
+    return getLastArg(Id0, Id1) != 0;
+  }
+  bool hasArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const {
+    return getLastArg(Id0, Id1, Id2) != 0;
+  }
+
+  /// getLastArg - Return the last argument matching \p Id, or null.
+  ///
+  /// \p Claim Whether the argument should be claimed, if it exists.
+  Arg *getLastArgNoClaim(OptSpecifier Id) const;
+  Arg *getLastArg(OptSpecifier Id) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5,
+                  OptSpecifier Id6) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5,
+                  OptSpecifier Id6, OptSpecifier Id7) const;
+
+  /// getArgString - Return the input argument string at \p Index.
+  virtual const char *getArgString(unsigned Index) const = 0;
+
+  /// getNumInputArgStrings - Return the number of original argument strings,
+  /// which are guaranteed to be the first strings in the argument string
+  /// list.
+  virtual unsigned getNumInputArgStrings() const = 0;
+
+  /// @}
+  /// @name Argument Lookup Utilities
+  /// @{
+
+  /// getLastArgValue - Return the value of the last argument, or a default.
+  StringRef getLastArgValue(OptSpecifier Id,
+                                  StringRef Default = "") const;
+
+  /// getAllArgValues - Get the values of all instances of the given argument
+  /// as strings.
+  std::vector<std::string> getAllArgValues(OptSpecifier Id) const;
+
+  /// @}
+  /// @name Translation Utilities
+  /// @{
+
+  /// hasFlag - Given an option \p Pos and its negative form \p Neg, return
+  /// true if the option is present, false if the negation is present, and
+  /// \p Default if neither option is given. If both the option and its
+  /// negation are present, the last one wins.
+  bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default=true) const;
+
+  /// AddLastArg - Render only the last argument match \p Id0, if present.
+  void AddLastArg(ArgStringList &Output, OptSpecifier Id0) const;
+
+  /// AddAllArgs - Render all arguments matching the given ids.
+  void AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
+                  OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const;
+
+  /// AddAllArgValues - Render the argument values of all arguments
+  /// matching the given ids.
+  void AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
+                        OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const;
+
+  /// AddAllArgsTranslated - Render all the arguments matching the
+  /// given ids, but forced to separate args and using the provided
+  /// name instead of the first option value.
+  ///
+  /// \param Joined - If true, render the argument as joined with
+  /// the option specifier.
+  void AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
+                            const char *Translation,
+                            bool Joined = false) const;
+
+  /// ClaimAllArgs - Claim all arguments which match the given
+  /// option id.
+  void ClaimAllArgs(OptSpecifier Id0) const;
+
+  /// ClaimAllArgs - Claim all arguments.
+  ///
+  void ClaimAllArgs() const;
+
+  /// @}
+  /// @name Arg Synthesis
+  /// @{
+
+  /// MakeArgString - Construct a constant string pointer whose
+  /// lifetime will match that of the ArgList.
+  virtual const char *MakeArgString(StringRef Str) const = 0;
+  const char *MakeArgString(const char *Str) const {
+    return MakeArgString(StringRef(Str));
+  }
+  const char *MakeArgString(std::string Str) const {
+    return MakeArgString(StringRef(Str));
+  }
+  const char *MakeArgString(const Twine &Str) const;
+
+  /// \brief Create an arg string for (\p LHS + \p RHS), reusing the
+  /// string at \p Index if possible.
+  const char *GetOrMakeJoinedArgString(unsigned Index, StringRef LHS,
+                                        StringRef RHS) const;
+
+  /// @}
+};
+
+class InputArgList : public ArgList  {
+private:
+  /// List of argument strings used by the contained Args.
+  ///
+  /// This is mutable since we treat the ArgList as being the list
+  /// of Args, and allow routines to add new strings (to have a
+  /// convenient place to store the memory) via MakeIndex.
+  mutable ArgStringList ArgStrings;
+
+  /// Strings for synthesized arguments.
+  ///
+  /// This is mutable since we treat the ArgList as being the list
+  /// of Args, and allow routines to add new strings (to have a
+  /// convenient place to store the memory) via MakeIndex.
+  mutable std::list<std::string> SynthesizedStrings;
+
+  /// The number of original input argument strings.
+  unsigned NumInputArgStrings;
+
+public:
+  InputArgList(const char* const *ArgBegin, const char* const *ArgEnd);
+  ~InputArgList();
+
+  virtual const char *getArgString(unsigned Index) const {
+    return ArgStrings[Index];
+  }
+
+  virtual unsigned getNumInputArgStrings() const {
+    return NumInputArgStrings;
+  }
+
+  /// @name Arg Synthesis
+  /// @{
+
+public:
+  /// MakeIndex - Get an index for the given string(s).
+  unsigned MakeIndex(StringRef String0) const;
+  unsigned MakeIndex(StringRef String0, StringRef String1) const;
+
+  virtual const char *MakeArgString(StringRef Str) const;
+
+  /// @}
+};
+
+/// DerivedArgList - An ordered collection of driver arguments,
+/// whose storage may be in another argument list.
+class DerivedArgList : public ArgList {
+  const InputArgList &BaseArgs;
+
+  /// The list of arguments we synthesized.
+  mutable arglist_type SynthesizedArgs;
+
+public:
+  /// Construct a new derived arg list from \p BaseArgs.
+  DerivedArgList(const InputArgList &BaseArgs);
+  ~DerivedArgList();
+
+  virtual const char *getArgString(unsigned Index) const {
+    return BaseArgs.getArgString(Index);
+  }
+
+  virtual unsigned getNumInputArgStrings() const {
+    return BaseArgs.getNumInputArgStrings();
+  }
+
+  const InputArgList &getBaseArgs() const {
+    return BaseArgs;
+  }
+
+  /// @name Arg Synthesis
+  /// @{
+
+  /// AddSynthesizedArg - Add a argument to the list of synthesized arguments
+  /// (to be freed).
+  void AddSynthesizedArg(Arg *A) {
+    SynthesizedArgs.push_back(A);
+  }
+
+  virtual const char *MakeArgString(StringRef Str) const;
+
+  /// AddFlagArg - Construct a new FlagArg for the given option \p Id and
+  /// append it to the argument list.
+  void AddFlagArg(const Arg *BaseArg, const Option Opt) {
+    append(MakeFlagArg(BaseArg, Opt));
+  }
+
+  /// AddPositionalArg - Construct a new Positional arg for the given option
+  /// \p Id, with the provided \p Value and append it to the argument
+  /// list.
+  void AddPositionalArg(const Arg *BaseArg, const Option Opt,
+                        StringRef Value) {
+    append(MakePositionalArg(BaseArg, Opt, Value));
+  }
+
+
+  /// AddSeparateArg - Construct a new Positional arg for the given option
+  /// \p Id, with the provided \p Value and append it to the argument
+  /// list.
+  void AddSeparateArg(const Arg *BaseArg, const Option Opt,
+                      StringRef Value) {
+    append(MakeSeparateArg(BaseArg, Opt, Value));
+  }
+
+
+  /// AddJoinedArg - Construct a new Positional arg for the given option
+  /// \p Id, with the provided \p Value and append it to the argument list.
+  void AddJoinedArg(const Arg *BaseArg, const Option Opt,
+                    StringRef Value) {
+    append(MakeJoinedArg(BaseArg, Opt, Value));
+  }
+
+
+  /// MakeFlagArg - Construct a new FlagArg for the given option \p Id.
+  Arg *MakeFlagArg(const Arg *BaseArg, const Option Opt) const;
+
+  /// MakePositionalArg - Construct a new Positional arg for the
+  /// given option \p Id, with the provided \p Value.
+  Arg *MakePositionalArg(const Arg *BaseArg, const Option Opt,
+                          StringRef Value) const;
+
+  /// MakeSeparateArg - Construct a new Positional arg for the
+  /// given option \p Id, with the provided \p Value.
+  Arg *MakeSeparateArg(const Arg *BaseArg, const Option Opt,
+                        StringRef Value) const;
+
+  /// MakeJoinedArg - Construct a new Positional arg for the
+  /// given option \p Id, with the provided \p Value.
+  Arg *MakeJoinedArg(const Arg *BaseArg, const Option Opt,
+                      StringRef Value) const;
+
+  /// @}
+};
+
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Option/OptParser.td b/include/llvm/Option/OptParser.td
new file mode 100644
index 000000000000..e781fa02d75b
--- /dev/null
+++ b/include/llvm/Option/OptParser.td
@@ -0,0 +1,127 @@
+//===--- OptParser.td - Common Option Parsing Interfaces ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the common interfaces used by the option parsing TableGen
+//  backend.
+//
+//===----------------------------------------------------------------------===//
+
+// Define the kinds of options.
+
+class OptionKind<string name, int predecence = 0, bit sentinel = 0> {
+  string Name = name;
+  // The kind precedence, kinds with lower precedence are matched first.
+  int Precedence = predecence;
+  // Indicate a sentinel option.
+  bit Sentinel = sentinel;
+}
+
+// An option group.
+def KIND_GROUP : OptionKind<"Group">;
+// The input option kind.
+def KIND_INPUT : OptionKind<"Input", 1, 1>;
+// The unknown option kind.
+def KIND_UNKNOWN : OptionKind<"Unknown", 2, 1>;
+// A flag with no values.
+def KIND_FLAG : OptionKind<"Flag">;
+// An option which prefixes its (single) value.
+def KIND_JOINED : OptionKind<"Joined", 1>;
+// An option which is followed by its value.
+def KIND_SEPARATE : OptionKind<"Separate">;
+// An option followed by its values, which are separated by commas.
+def KIND_COMMAJOINED : OptionKind<"CommaJoined">;
+// An option which is which takes multiple (separate) arguments.
+def KIND_MULTIARG : OptionKind<"MultiArg">;
+// An option which is either joined to its (non-empty) value, or followed by its
+// value.
+def KIND_JOINED_OR_SEPARATE : OptionKind<"JoinedOrSeparate">;
+// An option which is both joined to its (first) value, and followed by its
+// (second) value.
+def KIND_JOINED_AND_SEPARATE : OptionKind<"JoinedAndSeparate">;
+
+// Define the option flags.
+
+class OptionFlag {}
+
+// HelpHidden - The option should not be displayed in --help, even if it has
+// help text. Clients *can* use this in conjunction with the OptTable::PrintHelp
+// arguments to implement hidden help groups.
+def HelpHidden : OptionFlag;
+
+// RenderAsInput - The option should not render the name when rendered as an
+// input (i.e., the option is rendered as values).
+def RenderAsInput : OptionFlag;
+
+// RenderJoined - The option should be rendered joined, even if separate (only
+// sensible on single value separate options).
+def RenderJoined : OptionFlag;
+
+// RenderSeparate - The option should be rendered separately, even if joined
+// (only sensible on joined options).
+def RenderSeparate : OptionFlag;
+
+// Define the option group class.
+
+class OptionGroup<string name> {
+  string EnumName = ?; // Uses the def name if undefined.
+  string Name = name;
+  string HelpText = ?;
+  OptionGroup Group = ?;
+}
+
+// Define the option class.
+
+class Option<list<string> prefixes, string name, OptionKind kind> {
+  string EnumName = ?; // Uses the def name if undefined.
+  list<string> Prefixes = prefixes;
+  string Name = name;
+  OptionKind Kind = kind;
+  // Used by MultiArg option kind.
+  int NumArgs = 0;
+  string HelpText = ?;
+  string MetaVarName = ?;
+  list<OptionFlag> Flags = [];
+  OptionGroup Group = ?;
+  Option Alias = ?;
+}
+
+// Helpers for defining options.
+
+class Flag<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_FLAG>;
+class Joined<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_JOINED>;
+class Separate<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_SEPARATE>;
+class CommaJoined<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_COMMAJOINED>;
+class MultiArg<list<string> prefixes, string name, int numargs>
+  : Option<prefixes, name, KIND_MULTIARG> {
+  int NumArgs = numargs;
+}
+class JoinedOrSeparate<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_JOINED_OR_SEPARATE>;
+class JoinedAndSeparate<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_JOINED_AND_SEPARATE>;
+
+// Mix-ins for adding optional attributes.
+
+class Alias<Option alias> { Option Alias = alias; }
+class EnumName<string name> { string EnumName = name; }
+class Flags<list<OptionFlag> flags> { list<OptionFlag> Flags = flags; }
+class Group<OptionGroup group> { OptionGroup Group = group; }
+class HelpText<string text> { string HelpText = text; }
+class MetaVarName<string name> { string MetaVarName = name; }
+
+// Predefined options.
+
+// FIXME: Have generator validate that these appear in correct position (and
+// aren't duplicated).
+def INPUT : Option<[], "<input>", KIND_INPUT>;
+def UNKNOWN : Option<[], "<unknown>", KIND_UNKNOWN>;
diff --git a/include/llvm/Option/OptSpecifier.h b/include/llvm/Option/OptSpecifier.h
new file mode 100644
index 000000000000..02bc6b175edb
--- /dev/null
+++ b/include/llvm/Option/OptSpecifier.h
@@ -0,0 +1,39 @@
+//===--- OptSpecifier.h - Option Specifiers ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPTION_OPTSPECIFIER_H
+#define LLVM_OPTION_OPTSPECIFIER_H
+
+namespace llvm {
+namespace opt {
+  class Option;
+
+  /// OptSpecifier - Wrapper class for abstracting references to option IDs.
+  class OptSpecifier {
+    unsigned ID;
+
+  private:
+    explicit OptSpecifier(bool) LLVM_DELETED_FUNCTION;
+
+  public:
+    OptSpecifier() : ID(0) {}
+    /*implicit*/ OptSpecifier(unsigned _ID) : ID(_ID) {}
+    /*implicit*/ OptSpecifier(const Option *Opt);
+
+    bool isValid() const { return ID != 0; }
+
+    unsigned getID() const { return ID; }
+
+    bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); }
+    bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); }
+  };
+}
+}
+
+#endif
diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h
new file mode 100644
index 000000000000..a93acbf11e9b
--- /dev/null
+++ b/include/llvm/Option/OptTable.h
@@ -0,0 +1,161 @@
+//===--- OptTable.h - Option Table ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPTION_OPTTABLE_H
+#define LLVM_OPTION_OPTTABLE_H
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Option/OptSpecifier.h"
+
+namespace llvm {
+class raw_ostream;
+namespace opt {
+class Arg;
+class ArgList;
+class InputArgList;
+class Option;
+
+/// \brief Provide access to the Option info table.
+///
+/// The OptTable class provides a layer of indirection which allows Option
+/// instance to be created lazily. In the common case, only a few options will
+/// be needed at runtime; the OptTable class maintains enough information to
+/// parse command lines without instantiating Options, while letting other
+/// parts of the driver still use Option instances where convenient.
+class OptTable {
+public:
+  /// \brief Entry for a single option instance in the option data table.
+  struct Info {
+    /// A null terminated array of prefix strings to apply to name while
+    /// matching.
+    const char *const *Prefixes;
+    const char *Name;
+    const char *HelpText;
+    const char *MetaVar;
+    unsigned ID;
+    unsigned char Kind;
+    unsigned char Param;
+    unsigned short Flags;
+    unsigned short GroupID;
+    unsigned short AliasID;
+  };
+
+private:
+  /// \brief The static option information table.
+  const Info *OptionInfos;
+  unsigned NumOptionInfos;
+
+  unsigned TheInputOptionID;
+  unsigned TheUnknownOptionID;
+
+  /// The index of the first option which can be parsed (i.e., is not a
+  /// special option like 'input' or 'unknown', and is not an option group).
+  unsigned FirstSearchableIndex;
+
+  /// The union of all option prefixes. If an argument does not begin with
+  /// one of these, it is an input.
+  StringSet<> PrefixesUnion;
+  std::string PrefixChars;
+
+private:
+  const Info &getInfo(OptSpecifier Opt) const {
+    unsigned id = Opt.getID();
+    assert(id > 0 && id - 1 < getNumOptions() && "Invalid Option ID.");
+    return OptionInfos[id - 1];
+  }
+
+protected:
+  OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos);
+public:
+  ~OptTable();
+
+  /// \brief Return the total number of option classes.
+  unsigned getNumOptions() const { return NumOptionInfos; }
+
+  /// \brief Get the given Opt's Option instance, lazily creating it
+  /// if necessary.
+  ///
+  /// \return The option, or null for the INVALID option id.
+  const Option getOption(OptSpecifier Opt) const;
+
+  /// \brief Lookup the name of the given option.
+  const char *getOptionName(OptSpecifier id) const {
+    return getInfo(id).Name;
+  }
+
+  /// \brief Get the kind of the given option.
+  unsigned getOptionKind(OptSpecifier id) const {
+    return getInfo(id).Kind;
+  }
+
+  /// \brief Get the group id for the given option.
+  unsigned getOptionGroupID(OptSpecifier id) const {
+    return getInfo(id).GroupID;
+  }
+
+  /// \brief Should the help for the given option be hidden by default.
+  bool isOptionHelpHidden(OptSpecifier id) const;
+
+  /// \brief Get the help text to use to describe this option.
+  const char *getOptionHelpText(OptSpecifier id) const {
+    return getInfo(id).HelpText;
+  }
+
+  /// \brief Get the meta-variable name to use when describing
+  /// this options values in the help text.
+  const char *getOptionMetaVar(OptSpecifier id) const {
+    return getInfo(id).MetaVar;
+  }
+
+  /// \brief Parse a single argument; returning the new argument and
+  /// updating Index.
+  ///
+  /// \param [in,out] Index - The current parsing position in the argument
+  /// string list; on return this will be the index of the next argument
+  /// string to parse.
+  ///
+  /// \return The parsed argument, or 0 if the argument is missing values
+  /// (in which case Index still points at the conceptual next argument string
+  /// to parse).
+  Arg *ParseOneArg(const ArgList &Args, unsigned &Index) const;
+
+  /// \brief Parse an list of arguments into an InputArgList.
+  ///
+  /// The resulting InputArgList will reference the strings in [\p ArgBegin,
+  /// \p ArgEnd), and their lifetime should extend past that of the returned
+  /// InputArgList.
+  ///
+  /// The only error that can occur in this routine is if an argument is
+  /// missing values; in this case \p MissingArgCount will be non-zero.
+  ///
+  /// \param ArgBegin - The beginning of the argument vector.
+  /// \param ArgEnd - The end of the argument vector.
+  /// \param MissingArgIndex - On error, the index of the option which could
+  /// not be parsed.
+  /// \param MissingArgCount - On error, the number of missing options.
+  /// \return An InputArgList; on error this will contain all the options
+  /// which could be parsed.
+  InputArgList *ParseArgs(const char* const *ArgBegin,
+                          const char* const *ArgEnd,
+                          unsigned &MissingArgIndex,
+                          unsigned &MissingArgCount) const;
+
+  /// \brief Render the help text for an option table.
+  ///
+  /// \param OS - The stream to write the help text to.
+  /// \param Name - The name to use in the usage line.
+  /// \param Title - The title to use in the usage line.
+  /// \param ShowHidden - Whether help-hidden arguments should be shown.
+  void PrintHelp(raw_ostream &OS, const char *Name,
+                  const char *Title, bool ShowHidden = false) const;
+};
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h
new file mode 100644
index 000000000000..541aa8d99185
--- /dev/null
+++ b/include/llvm/Option/Option.h
@@ -0,0 +1,193 @@
+//===--- Option.h - Abstract Driver Options ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPTION_OPTION_H
+#define LLVM_OPTION_OPTION_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace opt {
+class Arg;
+class ArgList;
+/// ArgStringList - Type used for constructing argv lists for subprocesses.
+typedef SmallVector<const char*, 16> ArgStringList;
+
+/// Base flags for all options. Custom flags may be added after.
+enum DriverFlag {
+  HelpHidden       = (1 << 0),
+  RenderAsInput    = (1 << 1),
+  RenderJoined     = (1 << 2),
+  RenderSeparate   = (1 << 3)
+};
+
+/// Option - Abstract representation for a single form of driver
+/// argument.
+///
+/// An Option class represents a form of option that the driver
+/// takes, for example how many arguments the option has and how
+/// they can be provided. Individual option instances store
+/// additional information about what group the option is a member
+/// of (if any), if the option is an alias, and a number of
+/// flags. At runtime the driver parses the command line into
+/// concrete Arg instances, each of which corresponds to a
+/// particular Option instance.
+class Option {
+public:
+  enum OptionClass {
+    GroupClass = 0,
+    InputClass,
+    UnknownClass,
+    FlagClass,
+    JoinedClass,
+    SeparateClass,
+    CommaJoinedClass,
+    MultiArgClass,
+    JoinedOrSeparateClass,
+    JoinedAndSeparateClass
+  };
+
+  enum RenderStyleKind {
+    RenderCommaJoinedStyle,
+    RenderJoinedStyle,
+    RenderSeparateStyle,
+    RenderValuesStyle
+  };
+
+protected:
+  const OptTable::Info *Info;
+  const OptTable *Owner;
+
+public:
+  Option(const OptTable::Info *Info, const OptTable *Owner);
+  ~Option();
+
+  bool isValid() const {
+    return Info != 0;
+  }
+
+  unsigned getID() const {
+    assert(Info && "Must have a valid info!");
+    return Info->ID;
+  }
+
+  OptionClass getKind() const {
+    assert(Info && "Must have a valid info!");
+    return OptionClass(Info->Kind);
+  }
+
+  /// \brief Get the name of this option without any prefix.
+  StringRef getName() const {
+    assert(Info && "Must have a valid info!");
+    return Info->Name;
+  }
+
+  const Option getGroup() const {
+    assert(Info && "Must have a valid info!");
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOption(Info->GroupID);
+  }
+
+  const Option getAlias() const {
+    assert(Info && "Must have a valid info!");
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOption(Info->AliasID);
+  }
+
+  /// \brief Get the default prefix for this option.
+  StringRef getPrefix() const {
+    const char *Prefix = *Info->Prefixes;
+    return Prefix ? Prefix : StringRef();
+  }
+
+  /// \brief Get the name of this option with the default prefix.
+  std::string getPrefixedName() const {
+    std::string Ret = getPrefix();
+    Ret += getName();
+    return Ret;
+  }
+
+  unsigned getNumArgs() const { return Info->Param; }
+
+  bool hasNoOptAsInput() const { return Info->Flags & RenderAsInput;}
+
+  RenderStyleKind getRenderStyle() const {
+    if (Info->Flags & RenderJoined)
+      return RenderJoinedStyle;
+    if (Info->Flags & RenderSeparate)
+      return RenderSeparateStyle;
+    switch (getKind()) {
+    case GroupClass:
+    case InputClass:
+    case UnknownClass:
+      return RenderValuesStyle;
+    case JoinedClass:
+    case JoinedAndSeparateClass:
+      return RenderJoinedStyle;
+    case CommaJoinedClass:
+      return RenderCommaJoinedStyle;
+    case FlagClass:
+    case SeparateClass:
+    case MultiArgClass:
+    case JoinedOrSeparateClass:
+      return RenderSeparateStyle;
+    }
+    llvm_unreachable("Unexpected kind!");
+  }
+
+  /// Test if this option has the flag \a Val.
+  bool hasFlag(unsigned Val) const {
+    return Info->Flags & Val;
+  }
+
+  /// getUnaliasedOption - Return the final option this option
+  /// aliases (itself, if the option has no alias).
+  const Option getUnaliasedOption() const {
+    const Option Alias = getAlias();
+    if (Alias.isValid()) return Alias.getUnaliasedOption();
+    return *this;
+  }
+
+  /// getRenderName - Return the name to use when rendering this
+  /// option.
+  StringRef getRenderName() const {
+    return getUnaliasedOption().getName();
+  }
+
+  /// matches - Predicate for whether this option is part of the
+  /// given option (which may be a group).
+  ///
+  /// Note that matches against options which are an alias should never be
+  /// done -- aliases do not participate in matching and so such a query will
+  /// always be false.
+  bool matches(OptSpecifier ID) const;
+
+  /// accept - Potentially accept the current argument, returning a
+  /// new Arg instance, or 0 if the option does not accept this
+  /// argument (or the argument is missing values).
+  ///
+  /// If the option accepts the current argument, accept() sets
+  /// Index to the position where argument parsing should resume
+  /// (even if the argument is missing values).
+  ///
+  /// \parm ArgSize The number of bytes taken up by the matched Option prefix
+  ///               and name. This is used to determine where joined values
+  ///               start.
+  Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+
+  void dump() const;
+};
+
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index cd651db1f1c2..35ec022516a5 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -104,6 +104,16 @@ public:
     return PassID;
   }
 
+  /// doInitialization - Virtual method overridden by subclasses to do
+  /// any necessary initialization before any pass is run.
+  ///
+  virtual bool doInitialization(Module &)  { return false; }
+
+  /// doFinalization - Virtual method overriden by subclasses to do any
+  /// necessary clean up after all passes have run.
+  ///
+  virtual bool doFinalization(Module &) { return false; }
+
   /// print - Print out the internal state of the pass.  This is called by
   /// Analyze to print out the contents of an analysis.  Otherwise it is not
   /// necessary to implement this method.  Beware that the module pointer MAY be
@@ -287,21 +297,11 @@ public:
   /// createPrinterPass - Get a function printer pass.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
-  /// doInitialization - Virtual method overridden by subclasses to do
-  /// any necessary per-module initialization.
-  ///
-  virtual bool doInitialization(Module &);
-
   /// runOnFunction - Virtual method overriden by subclasses to do the
   /// per-function processing of the pass.
   ///
   virtual bool runOnFunction(Function &F) = 0;
 
-  /// doFinalization - Virtual method overriden by subclasses to do any post
-  /// processing needed after all passes have run.
-  ///
-  virtual bool doFinalization(Module &);
-
   virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
@@ -328,10 +328,8 @@ public:
   /// createPrinterPass - Get a basic block printer pass.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
-  /// doInitialization - Virtual method overridden by subclasses to do
-  /// any necessary per-module initialization.
-  ///
-  virtual bool doInitialization(Module &);
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
 
   /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
   /// to do any necessary per-function initialization.
@@ -348,11 +346,6 @@ public:
   ///
   virtual bool doFinalization(Function &);
 
-  /// doFinalization - Virtual method overriden by subclasses to do any post
-  /// processing needed after all passes have run.
-  ///
-  virtual bool doFinalization(Module &);
-
   virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index d14d73b1b14f..a581802c47c0 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -16,12 +16,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_PASS_ANALYSIS_SUPPORT_H
-#define LLVM_PASS_ANALYSIS_SUPPORT_H
+#ifndef LLVM_PASSANALYSISSUPPORT_H
+#define LLVM_PASSANALYSISSUPPORT_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Pass.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index 0af58533805e..7afb0a0fbf90 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -14,13 +14,13 @@
 #ifndef LLVM_PASSMANAGERS_H
 #define LLVM_PASSMANAGERS_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/DenseMap.h"
-#include <vector>
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Pass.h"
 #include <map>
+#include <vector>
 
 //===----------------------------------------------------------------------===//
 // Overview:
@@ -168,7 +168,7 @@ class PMTopLevelManager {
 protected:
   explicit PMTopLevelManager(PMDataManager *PMDM);
 
-  virtual unsigned getNumContainedManagers() const {
+  unsigned getNumContainedManagers() const {
     return (unsigned)PassManagers.size();
   }
 
@@ -343,7 +343,7 @@ public:
   void dumpRequiredSet(const Pass *P) const;
   void dumpPreservedSet(const Pass *P) const;
 
-  virtual unsigned getNumContainedPasses() const {
+  unsigned getNumContainedPasses() const {
     return (unsigned)PassVector.size();
   }
 
@@ -352,7 +352,7 @@ public:
     return PMT_Unknown;
   }
 
-  std::map<AnalysisID, Pass*> *getAvailableAnalysis() {
+  DenseMap<AnalysisID, Pass*> *getAvailableAnalysis() {
     return &AvailableAnalysis;
   }
 
@@ -375,8 +375,7 @@ protected:
   // Collection of Analysis provided by Parent pass manager and
   // used by current pass manager. At at time there can not be more
   // then PMT_Last active pass mangers.
-  std::map<AnalysisID, Pass *> *InheritedAnalysis[PMT_Last];
-
+  DenseMap<AnalysisID, Pass *> *InheritedAnalysis[PMT_Last];
 
   /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
   /// or higher is specified.
@@ -390,7 +389,7 @@ private:
   // pass. If a pass requires an analysis which is not available then
   // the required analysis pass is scheduled to run before the pass itself is
   // scheduled to run.
-  std::map<AnalysisID, Pass*> AvailableAnalysis;
+  DenseMap<AnalysisID, Pass*> AvailableAnalysis;
 
   // Collection of higher level analysis used by the pass managed by
   // this manager.
@@ -420,10 +419,20 @@ public:
   /// cleanup - After running all passes, clean up pass manager cache.
   void cleanup();
 
+  /// doInitialization - Overrides ModulePass doInitialization for global
+  /// initialization tasks
+  ///
+  using ModulePass::doInitialization;
+
   /// doInitialization - Run all of the initializers for the function passes.
   ///
   bool doInitialization(Module &M);
 
+  /// doFinalization - Overrides ModulePass doFinalization for global
+  /// finalization tasks
+  /// 
+  using ModulePass::doFinalization;
+  
   /// doFinalization - Run all of the finalizers for the function passes.
   ///
   bool doFinalization(Module &M);
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index c6ad44f5f4ec..ccc79345e030 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -18,12 +18,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_PASS_SUPPORT_H
-#define LLVM_PASS_SUPPORT_H
+#ifndef LLVM_PASSSUPPORT_H
+#define LLVM_PASSSUPPORT_H
 
 #include "Pass.h"
-#include "llvm/PassRegistry.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/Atomic.h"
 #include "llvm/Support/Valgrind.h"
 #include <vector>
@@ -305,7 +305,7 @@ struct RegisterAnalysisGroup : public RegisterAGBase {
 /// clients that are interested in which passes get registered and unregistered
 /// at runtime (which can be because of the RegisterPass constructors being run
 /// as the program starts up, or may be because a shared object just got
-/// loaded).  Deriving from the PassRegistationListener class automatically
+/// loaded).  Deriving from the PassRegistrationListener class automatically
 /// registers your object to receive callbacks indicating when passes are loaded
 /// and removed.
 ///
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index d6b0ab8b3750..bba34248569a 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -19,7 +19,6 @@
 #include <cstddef>
 
 namespace llvm {
-
 template <typename T>
 struct AlignmentCalcImpl {
   char x;
@@ -49,7 +48,6 @@ struct AlignOf {
   enum { Alignment_LessEqual_4Bytes = Alignment <= 4 ? 1 : 0 };
   enum { Alignment_LessEqual_8Bytes = Alignment <= 8 ? 1 : 0 };
   enum { Alignment_LessEqual_16Bytes = Alignment <= 16 ? 1 : 0 };
-
 };
 
 /// alignOf - A templated function that returns the minimum alignment of
@@ -59,112 +57,148 @@ struct AlignOf {
 template <typename T>
 inline unsigned alignOf() { return AlignOf<T>::Alignment; }
 
-
+/// \struct AlignedCharArray
 /// \brief Helper for building an aligned character array type.
 ///
 /// This template is used to explicitly build up a collection of aligned
-/// character types. We have to build these up using a macro and explicit
+/// character array types. We have to build these up using a macro and explicit
 /// specialization to cope with old versions of MSVC and GCC where only an
 /// integer literal can be used to specify an alignment constraint. Once built
 /// up here, we can then begin to indirect between these using normal C++
 /// template parameters.
-template <size_t Alignment> struct AlignedCharArrayImpl;
 
 // MSVC requires special handling here.
 #ifndef _MSC_VER
 
 #if __has_feature(cxx_alignas)
-#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \
-  template <> struct AlignedCharArrayImpl<x> { \
-    char alignas(x) aligned; \
-  }
+template<std::size_t Alignment, std::size_t Size>
+struct AlignedCharArray {
+  alignas(Alignment) char buffer[Size];
+};
+
 #elif defined(__GNUC__) || defined(__IBM_ATTRIBUTES)
+/// \brief Create a type with an aligned char buffer.
+template<std::size_t Alignment, std::size_t Size>
+struct AlignedCharArray;
+
 #define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \
-  template <> struct AlignedCharArrayImpl<x> { \
-    char aligned __attribute__((aligned(x))); \
-  }
-#else
-# error No supported align as directive.
-#endif
+  template<std::size_t Size> \
+  struct AlignedCharArray<x, Size> { \
+    __attribute__((aligned(x))) char buffer[Size]; \
+  };
 
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(512);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1024);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2048);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4096);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128)
 
 #undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT
 
+#else
+# error No supported align as directive.
+#endif
+
 #else // _MSC_VER
 
+/// \brief Create a type with an aligned char buffer.
+template<std::size_t Alignment, std::size_t Size>
+struct AlignedCharArray;
+
 // We provide special variations of this template for the most common
 // alignments because __declspec(align(...)) doesn't actually work when it is
 // a member of a by-value function argument in MSVC, even if the alignment
-// request is something reasonably like 8-byte or 16-byte.
-template <> struct AlignedCharArrayImpl<1> { char aligned; };
-template <> struct AlignedCharArrayImpl<2> { short aligned; };
-template <> struct AlignedCharArrayImpl<4> { int aligned; };
-template <> struct AlignedCharArrayImpl<8> { double aligned; };
+// request is something reasonably like 8-byte or 16-byte. Note that we can't
+// even include the declspec with the union that forces the alignment because
+// MSVC warns on the existence of the declspec despite the union member forcing
+// proper alignment.
+
+template<std::size_t Size>
+struct AlignedCharArray<1, Size> {
+  union {
+    char aligned;
+    char buffer[Size];
+  };
+};
+
+template<std::size_t Size>
+struct AlignedCharArray<2, Size> {
+  union {
+    short aligned;
+    char buffer[Size];
+  };
+};
+
+template<std::size_t Size>
+struct AlignedCharArray<4, Size> {
+  union {
+    int aligned;
+    char buffer[Size];
+  };
+};
+
+template<std::size_t Size>
+struct AlignedCharArray<8, Size> {
+  union {
+    double aligned;
+    char buffer[Size];
+  };
+};
+
+
+// The rest of these are provided with a __declspec(align(...)) and we simply
+// can't pass them by-value as function arguments on MSVC.
 
 #define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \
-  template <> struct AlignedCharArrayImpl<x> { \
-    __declspec(align(x)) char aligned; \
-  }
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(512);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1024);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2048);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4096);
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
-// Any larger and MSVC complains.
+  template<std::size_t Size> \
+  struct AlignedCharArray<x, Size> { \
+    __declspec(align(x)) char buffer[Size]; \
+  };
+
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64)
+LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128)
+
 #undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT
 
 #endif // _MSC_VER
 
+namespace detail {
+template <typename T1,
+          typename T2 = char, typename T3 = char, typename T4 = char,
+          typename T5 = char, typename T6 = char, typename T7 = char>
+class AlignerImpl {
+  T1 t1; T2 t2; T3 t3; T4 t4; T5 t5; T6 t6; T7 t7;
+
+  AlignerImpl(); // Never defined or instantiated.
+};
+
+template <typename T1,
+          typename T2 = char, typename T3 = char, typename T4 = char,
+          typename T5 = char, typename T6 = char, typename T7 = char>
+union SizerImpl {
+  char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)],
+       arr5[sizeof(T5)], arr6[sizeof(T6)], arr7[sizeof(T7)];
+};
+} // end namespace detail
+
 /// \brief This union template exposes a suitably aligned and sized character
 /// array member which can hold elements of any of up to four types.
 ///
 /// These types may be arrays, structs, or any other types. The goal is to
-/// produce a union type containing a character array which, when used, forms
-/// storage suitable to placement new any of these types over. Support for more
-/// than four types can be added at the cost of more boiler plate.
+/// expose a char array buffer member which can be used as suitable storage for
+/// a placement new of any of these types. Support for more than seven types can
+/// be added at the cost of more boiler plate.
 template <typename T1,
-          typename T2 = char, typename T3 = char, typename T4 = char>
-union AlignedCharArrayUnion {
-private:
-  class AlignerImpl {
-    T1 t1; T2 t2; T3 t3; T4 t4;
-
-    AlignerImpl(); // Never defined or instantiated.
-  };
-  union SizerImpl {
-    char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)];
-  };
-
-public:
-  /// \brief The character array buffer for use by clients.
-  ///
-  /// No other member of this union should be referenced. The exist purely to
-  /// constrain the layout of this character array.
-  char buffer[sizeof(SizerImpl)];
-
-private:
-  // Tests seem to indicate that both Clang and GCC will properly register the
-  // alignment of a struct containing an aligned member, and this alignment
-  // should carry over to the character array in the union.
-  llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment> nonce_member;
+          typename T2 = char, typename T3 = char, typename T4 = char,
+          typename T5 = char, typename T6 = char, typename T7 = char>
+struct AlignedCharArrayUnion : llvm::AlignedCharArray<
+    AlignOf<detail::AlignerImpl<T1, T2, T3, T4, T5, T6, T7> >::Alignment,
+    sizeof(detail::SizerImpl<T1, T2, T3, T4, T5, T6, T7>)> {
 };
-
 } // end namespace llvm
 #endif
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index a644b133660f..3243fd9cea57 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -15,12 +15,12 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/Support/AlignOf.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
-#include <cstdlib>
 #include <cstddef>
+#include <cstdlib>
 
 namespace llvm {
 template <typename T> struct ReferenceAdder { typedef T& result; };
diff --git a/include/llvm/Support/ArrayRecycler.h b/include/llvm/Support/ArrayRecycler.h
new file mode 100644
index 000000000000..c7e0cba279e6
--- /dev/null
+++ b/include/llvm/Support/ArrayRecycler.h
@@ -0,0 +1,143 @@
+//==- llvm/Support/ArrayRecycler.h - Recycling of Arrays ---------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ArrayRecycler class template which can recycle small
+// arrays allocated from one of the allocators in Allocator.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ARRAYRECYCLER_H
+#define LLVM_SUPPORT_ARRAYRECYCLER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+
+namespace llvm {
+
+class BumpPtrAllocator;
+
+/// Recycle small arrays allocated from a BumpPtrAllocator.
+///
+/// Arrays are allocated in a small number of fixed sizes. For each supported
+/// array size, the ArrayRecycler keeps a free list of available arrays.
+///
+template<class T, size_t Align = AlignOf<T>::Alignment>
+class ArrayRecycler {
+  // The free list for a given array size is a simple singly linked list.
+  // We can't use iplist or Recycler here since those classes can't be copied.
+  struct FreeList {
+    FreeList *Next;
+  };
+
+  // Keep a free list for each array size.
+  SmallVector<FreeList*, 8> Bucket;
+
+  // Remove an entry from the free list in Bucket[Idx] and return it.
+  // Return NULL if no entries are available.
+  T *pop(unsigned Idx) {
+    if (Idx >= Bucket.size())
+      return 0;
+    FreeList *Entry = Bucket[Idx];
+    if (!Entry)
+      return 0;
+    Bucket[Idx] = Entry->Next;
+    return reinterpret_cast<T*>(Entry);
+  }
+
+  // Add an entry to the free list at Bucket[Idx].
+  void push(unsigned Idx, T *Ptr) {
+    assert(Ptr && "Cannot recycle NULL pointer");
+    assert(sizeof(T) >= sizeof(FreeList) && "Objects are too small");
+    assert(Align >= AlignOf<FreeList>::Alignment && "Object underaligned");
+    FreeList *Entry = reinterpret_cast<FreeList*>(Ptr);
+    if (Idx >= Bucket.size())
+      Bucket.resize(size_t(Idx) + 1);
+    Entry->Next = Bucket[Idx];
+    Bucket[Idx] = Entry;
+  }
+
+public:
+  /// The size of an allocated array is represented by a Capacity instance.
+  ///
+  /// This class is much smaller than a size_t, and it provides methods to work
+  /// with the set of legal array capacities.
+  class Capacity {
+    uint8_t Index;
+    explicit Capacity(uint8_t idx) : Index(idx) {}
+
+  public:
+    Capacity() : Index(0) {}
+
+    /// Get the capacity of an array that can hold at least N elements.
+    static Capacity get(size_t N) {
+      return Capacity(N ? Log2_64_Ceil(N) : 0);
+    }
+
+    /// Get the number of elements in an array with this capacity.
+    size_t getSize() const { return size_t(1u) << Index; }
+
+    /// Get the bucket number for this capacity.
+    unsigned getBucket() const { return Index; }
+
+    /// Get the next larger capacity. Large capacities grow exponentially, so
+    /// this function can be used to reallocate incrementally growing vectors
+    /// in amortized linear time.
+    Capacity getNext() const { return Capacity(Index + 1); }
+  };
+
+  ~ArrayRecycler() {
+    // The client should always call clear() so recycled arrays can be returned
+    // to the allocator.
+    assert(Bucket.empty() && "Non-empty ArrayRecycler deleted!");
+  }
+
+  /// Release all the tracked allocations to the allocator. The recycler must
+  /// be free of any tracked allocations before being deleted.
+  template<class AllocatorType>
+  void clear(AllocatorType &Allocator) {
+    for (; !Bucket.empty(); Bucket.pop_back())
+      while (T *Ptr = pop(Bucket.size() - 1))
+        Allocator.Deallocate(Ptr);
+  }
+
+  /// Special case for BumpPtrAllocator which has an empty Deallocate()
+  /// function.
+  ///
+  /// There is no need to traverse the free lists, pulling all the objects into
+  /// cache.
+  void clear(BumpPtrAllocator&) {
+    Bucket.clear();
+  }
+
+  /// Allocate an array of at least the requested capacity.
+  ///
+  /// Return an existing recycled array, or allocate one from Allocator if
+  /// none are available for recycling.
+  ///
+  template<class AllocatorType>
+  T *allocate(Capacity Cap, AllocatorType &Allocator) {
+    // Try to recycle an existing array.
+    if (T *Ptr = pop(Cap.getBucket()))
+      return Ptr;
+    // Nope, get more memory.
+    return static_cast<T*>(Allocator.Allocate(sizeof(T)*Cap.getSize(), Align));
+  }
+
+  /// Deallocate an array with the specified Capacity.
+  ///
+  /// Cap must be the same capacity that was given to allocate().
+  ///
+  void deallocate(Capacity Cap, T *Ptr) {
+    push(Cap.getBucket(), Ptr);
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/include/llvm/Support/Atomic.h b/include/llvm/Support/Atomic.h
index 1a6c606aa5f6..9ec23e827023 100644
--- a/include/llvm/Support/Atomic.h
+++ b/include/llvm/Support/Atomic.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_ATOMIC_H
-#define LLVM_SYSTEM_ATOMIC_H
+#ifndef LLVM_SUPPORT_ATOMIC_H
+#define LLVM_SUPPORT_ATOMIC_H
 
 #include "llvm/Support/DataTypes.h"
 
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index f5dc8ea055a3..265b886daff7 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -16,8 +16,8 @@
 #define LLVM_SUPPORT_CFG_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/Function.h"
-#include "llvm/InstrTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 
 namespace llvm {
 
@@ -27,8 +27,9 @@ namespace llvm {
 
 template <class Ptr, class USE_iterator> // Predecessor Iterator
 class PredIterator : public std::iterator<std::forward_iterator_tag,
-                                          Ptr, ptrdiff_t> {
-  typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t> super;
+                                          Ptr, ptrdiff_t, Ptr*, Ptr*> {
+  typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t, Ptr*,
+                                                                    Ptr*> super;
   typedef PredIterator<Ptr, USE_iterator> Self;
   USE_iterator It;
 
@@ -40,6 +41,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
 
 public:
   typedef typename super::pointer pointer;
+  typedef typename super::reference reference;
 
   PredIterator() {}
   explicit inline PredIterator(Ptr *bb) : It(bb->use_begin()) {
@@ -50,7 +52,7 @@ public:
   inline bool operator==(const Self& x) const { return It == x.It; }
   inline bool operator!=(const Self& x) const { return !operator==(x); }
 
-  inline pointer operator*() const {
+  inline reference operator*() const {
     assert(!It.atEnd() && "pred_iterator out of range!");
     return cast<TerminatorInst>(*It)->getParent();
   }
@@ -100,10 +102,11 @@ inline const_pred_iterator pred_end(const BasicBlock *BB) {
 
 template <class Term_, class BB_>           // Successor Iterator
 class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
-                                          BB_, ptrdiff_t> {
+                                          BB_, ptrdiff_t, BB_*, BB_*> {
   const Term_ Term;
   unsigned idx;
-  typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t> super;
+  typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t, BB_*,
+                                                                    BB_*> super;
   typedef SuccIterator<Term_, BB_> Self;
 
   inline bool index_is_valid(int idx) {
@@ -112,6 +115,7 @@ class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
 
 public:
   typedef typename super::pointer pointer;
+  typedef typename super::reference reference;
   // TODO: This can be random access iterator, only operator[] missing.
 
   explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
@@ -142,7 +146,7 @@ public:
   inline bool operator==(const Self& x) const { return idx == x.idx; }
   inline bool operator!=(const Self& x) const { return !operator==(x); }
 
-  inline pointer operator*() const { return Term->getSuccessor(idx); }
+  inline reference operator*() const { return Term->getSuccessor(idx); }
   inline pointer operator->() const { return operator*(); }
 
   inline Self& operator++() { ++idx; return *this; } // Preincrement
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index ba8adb018173..823b43ad938a 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -20,8 +20,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_WIN_COFF_H
-#define LLVM_SUPPORT_WIN_COFF_H
+#ifndef LLVM_SUPPORT_COFF_H
+#define LLVM_SUPPORT_COFF_H
 
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
@@ -321,7 +321,8 @@ namespace COFF {
     IMAGE_COMDAT_SELECT_SAME_SIZE,
     IMAGE_COMDAT_SELECT_EXACT_MATCH,
     IMAGE_COMDAT_SELECT_ASSOCIATIVE,
-    IMAGE_COMDAT_SELECT_LARGEST
+    IMAGE_COMDAT_SELECT_LARGEST,
+    IMAGE_COMDAT_SELECT_NEWEST
   };
 
   // Auxiliary Symbol Formats
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index ad8d6d41fc4a..92107ac02526 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -26,11 +26,10 @@
 #ifndef LLVM_SUPPORT_CALLSITE_H
 #define LLVM_SUPPORT_CALLSITE_H
 
-#include "llvm/Attributes.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 
@@ -177,20 +176,20 @@ public:
 
   /// getAttributes/setAttributes - get or set the parameter attributes of
   /// the call.
-  const AttrListPtr &getAttributes() const {
+  const AttributeSet &getAttributes() const {
     CALLSITE_DELEGATE_GETTER(getAttributes());
   }
-  void setAttributes(const AttrListPtr &PAL) {
+  void setAttributes(const AttributeSet &PAL) {
     CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
   }
 
   /// \brief Return true if this function has the given attribute.
-  bool hasFnAttr(Attributes::AttrVal A) const {
+  bool hasFnAttr(Attribute::AttrKind A) const {
     CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
   }
 
   /// \brief Return true if the call or the callee has the given attribute.
-  bool paramHasAttr(unsigned i, Attributes::AttrVal A) const {
+  bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
     CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));
   }
 
@@ -244,12 +243,12 @@ public:
 
   /// @brief Determine whether this argument is not captured.
   bool doesNotCapture(unsigned ArgNo) const {
-    return paramHasAttr(ArgNo + 1, Attributes::NoCapture);
+    return paramHasAttr(ArgNo + 1, Attribute::NoCapture);
   }
 
   /// @brief Determine whether this argument is passed by value.
   bool isByValArgument(unsigned ArgNo) const {
-    return paramHasAttr(ArgNo + 1, Attributes::ByVal);
+    return paramHasAttr(ArgNo + 1, Attribute::ByVal);
   }
 
   /// hasArgument - Returns true if this CallSite passes the given Value* as an
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 0c71882a77b1..0d2d6c92fdb0 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -36,9 +36,13 @@ template<typename From> struct simplify_type {
 };
 
 template<typename From> struct simplify_type<const From> {
-  typedef const From SimpleType;
-  static SimpleType &getSimplifiedValue(const From &Val) {
-    return simplify_type<From>::getSimplifiedValue(static_cast<From&>(Val));
+  typedef typename simplify_type<From>::SimpleType NonConstSimpleType;
+  typedef typename add_const_past_pointer<NonConstSimpleType>::type
+    SimpleType;
+  typedef typename add_lvalue_reference_if_not_pointer<SimpleType>::type
+    RetType;
+  static RetType getSimplifiedValue(const From& Val) {
+    return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
   }
 };
 
@@ -55,8 +59,8 @@ struct isa_impl {
 /// \brief Always allow upcasts, and perform no dynamic check for them.
 template <typename To, typename From>
 struct isa_impl<To, From,
-                typename llvm::enable_if_c<
-                  llvm::is_base_of<To, From>::value
+                typename enable_if<
+                  llvm::is_base_of<To, From>
                 >::type
                > {
   static inline bool doit(const From &) { return true; }
@@ -81,6 +85,13 @@ template <typename To, typename From> struct isa_impl_cl<To, From*> {
   }
 };
 
+template <typename To, typename From> struct isa_impl_cl<To, From*const> {
+  static inline bool doit(const From *Val) {
+    assert(Val && "isa<> used on a null pointer");
+    return isa_impl<To, From>::doit(*Val);
+  }
+};
+
 template <typename To, typename From> struct isa_impl_cl<To, const From*> {
   static inline bool doit(const From *Val) {
     assert(Val && "isa<> used on a null pointer");
@@ -102,7 +113,7 @@ struct isa_impl_wrap {
   static bool doit(const From &Val) {
     return isa_impl_wrap<To, SimpleFrom,
       typename simplify_type<SimpleFrom>::SimpleType>::doit(
-                          simplify_type<From>::getSimplifiedValue(Val));
+                          simplify_type<const From>::getSimplifiedValue(Val));
   }
 };
 
@@ -121,7 +132,8 @@ struct isa_impl_wrap<To, FromTy, FromTy> {
 //
 template <class X, class Y>
 inline bool isa(const Y &Val) {
-  return isa_impl_wrap<X, Y, typename simplify_type<Y>::SimpleType>::doit(Val);
+  return isa_impl_wrap<X, const Y,
+                       typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
 
 //===----------------------------------------------------------------------===//
@@ -178,7 +190,7 @@ struct cast_retty {
 //
 template<class To, class From, class SimpleFrom> struct cast_convert_val {
   // This is not a simple type, use the template to simplify it...
-  static typename cast_retty<To, From>::ret_type doit(const From &Val) {
+  static typename cast_retty<To, From>::ret_type doit(From &Val) {
     return cast_convert_val<To, SimpleFrom,
       typename simplify_type<SimpleFrom>::SimpleType>::doit(
                           simplify_type<From>::getSimplifiedValue(Val));
@@ -204,12 +216,29 @@ template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
 //  cast<Instruction>(myVal)->getParent()
 //
 template <class X, class Y>
-inline typename cast_retty<X, Y>::ret_type cast(const Y &Val) {
+inline typename cast_retty<X, const Y>::ret_type cast(const Y &Val) {
+  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  return cast_convert_val<X, const Y,
+                        typename simplify_type<const Y>::SimpleType>::doit(Val);
+}
+
+template <class X, class Y>
+inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
   assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
   return cast_convert_val<X, Y,
                           typename simplify_type<Y>::SimpleType>::doit(Val);
 }
 
+template <class X, class Y>
+inline typename enable_if<
+  is_same<Y, typename simplify_type<Y>::SimpleType>,
+  typename cast_retty<X, Y*>::ret_type
+>::type cast(Y *Val) {
+  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  return cast_convert_val<X, Y*,
+                          typename simplify_type<Y*>::SimpleType>::doit(Val);
+}
+
 // cast_or_null<X> - Functionally identical to cast, except that a null value is
 // accepted.
 //
@@ -230,8 +259,21 @@ inline typename cast_retty<X, Y*>::ret_type cast_or_null(Y *Val) {
 //
 
 template <class X, class Y>
-inline typename cast_retty<X, Y>::ret_type dyn_cast(const Y &Val) {
-  return isa<X>(Val) ? cast<X, Y>(Val) : 0;
+inline typename cast_retty<X, const Y>::ret_type dyn_cast(const Y &Val) {
+  return isa<X>(Val) ? cast<X>(Val) : 0;
+}
+
+template <class X, class Y>
+inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
+  return isa<X>(Val) ? cast<X>(Val) : 0;
+}
+
+template <class X, class Y>
+inline typename enable_if<
+  is_same<Y, typename simplify_type<Y>::SimpleType>,
+  typename cast_retty<X, Y*>::ret_type
+>::type dyn_cast(Y *Val) {
+  return isa<X>(Val) ? cast<X>(Val) : 0;
 }
 
 // dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 872c57998c4e..2e84d7b349d1 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -20,10 +20,10 @@
 #ifndef LLVM_SUPPORT_COMMANDLINE_H
 #define LLVM_SUPPORT_COMMANDLINE_H
 
-#include "llvm/Support/type_traits.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <climits>
 #include <cstdarg>
@@ -469,8 +469,7 @@ public:
 
   template<class Opt>
   void apply(Opt &O) const {
-    for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
-         i != e; ++i)
+    for (size_t i = 0, e = Values.size(); i != e; ++i)
       O.getParser().addLiteralOption(Values[i].first, Values[i].second.first,
                                      Values[i].second.second);
   }
@@ -629,8 +628,7 @@ public:
     else
       ArgVal = ArgName;
 
-    for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
-         i != e; ++i)
+    for (size_t i = 0, e = Values.size(); i != e; ++i)
       if (Values[i].Name == ArgVal) {
         V = Values[i].V.getValue();
         return false;
@@ -1092,7 +1090,7 @@ public:
 
   // Make sure we initialize the value with the default constructor for the
   // type.
-  opt_storage() : Value(DataType()) {}
+  opt_storage() : Value(DataType()), Default(DataType()) {}
 
   template<class T>
   void setValue(const T &V, bool initial = false) {
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 7ceeb3212119..13d057be049f 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -15,29 +15,90 @@
 #ifndef LLVM_SUPPORT_COMPILER_H
 #define LLVM_SUPPORT_COMPILER_H
 
+#include "llvm/Config/llvm-config.h"
+
 #ifndef __has_feature
 # define __has_feature(x) 0
 #endif
 
-/// LLVM_HAS_RVALUE_REFERENCES - Does the compiler provide r-value references?
+/// \brief Does the compiler support r-value references?
 /// This implies that <utility> provides the one-argument std::move;  it
 /// does not imply the existence of any other C++ library features.
 #if (__has_feature(cxx_rvalue_references)   \
      || defined(__GXX_EXPERIMENTAL_CXX0X__) \
      || (defined(_MSC_VER) && _MSC_VER >= 1600))
-#define LLVM_USE_RVALUE_REFERENCES 1
+#define LLVM_HAS_RVALUE_REFERENCES 1
+#else
+#define LLVM_HAS_RVALUE_REFERENCES 0
+#endif
+
+/// \brief Does the compiler support r-value reference *this?
+///
+/// Sadly, this is separate from just r-value reference support because GCC
+/// implemented everything but this thus far. No release of GCC yet has support
+/// for this feature so it is enabled with Clang only.
+/// FIXME: This should change to a version check when GCC grows support for it.
+#if __has_feature(cxx_rvalue_references)
+#define LLVM_HAS_RVALUE_REFERENCE_THIS 1
+#else
+#define LLVM_HAS_RVALUE_REFERENCE_THIS 0
+#endif
+
+/// \macro LLVM_HAS_CXX11_TYPETRAITS
+/// \brief Does the compiler have the C++11 type traits.
+///
+/// #include <type_traits>
+///
+/// * enable_if
+/// * {true,false}_type
+/// * is_constructible
+/// * etc...
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) \
+    || (defined(_MSC_VER) && _MSC_VER >= 1700)
+#define LLVM_HAS_CXX11_TYPETRAITS 1
+#else
+#define LLVM_HAS_CXX11_TYPETRAITS 0
+#endif
+
+/// \macro LLVM_HAS_CXX11_STDLIB
+/// \brief Does the compiler have the C++11 standard library.
+///
+/// Implies LLVM_HAS_RVALUE_REFERENCES, LLVM_HAS_CXX11_TYPETRAITS
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) \
+    || (defined(_MSC_VER) && _MSC_VER >= 1700)
+#define LLVM_HAS_CXX11_STDLIB 1
 #else
-#define LLVM_USE_RVALUE_REFERENCES 0
+#define LLVM_HAS_CXX11_STDLIB 0
+#endif
+
+/// \macro LLVM_HAS_VARIADIC_TEMPLATES
+/// \brief Does this compiler support variadic templates.
+///
+/// Implies LLVM_HAS_RVALUE_REFERENCES and the existence of std::forward.
+#if __has_feature(cxx_variadic_templates)
+# define LLVM_HAS_VARIADIC_TEMPLATES 1
+#else
+# define LLVM_HAS_VARIADIC_TEMPLATES 0
 #endif
 
 /// llvm_move - Expands to ::std::move if the compiler supports
 /// r-value references; otherwise, expands to the argument.
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 #define llvm_move(value) (::std::move(value))
 #else
 #define llvm_move(value) (value)
 #endif
 
+/// Expands to '&' if r-value references are supported.
+///
+/// This can be used to provide l-value/r-value overrides of member functions.
+/// The r-value override should be guarded by LLVM_HAS_RVALUE_REFERENCE_THIS
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+#define LLVM_LVALUE_FUNCTION &
+#else
+#define LLVM_LVALUE_FUNCTION
+#endif
+
 /// LLVM_DELETED_FUNCTION - Expands to = delete if the compiler supports it.
 /// Use to mark functions as uncallable. Member functions with this should
 /// be declared private so that some behavior is kept in C++03 mode.
@@ -59,7 +120,8 @@
 
 /// LLVM_FINAL - Expands to 'final' if the compiler supports it.
 /// Use to mark classes or virtual methods as final.
-#if (__has_feature(cxx_override_control))
+#if __has_feature(cxx_override_control) \
+    || (defined(_MSC_VER) && _MSC_VER >= 1700)
 #define LLVM_FINAL final
 #else
 #define LLVM_FINAL
@@ -67,12 +129,19 @@
 
 /// LLVM_OVERRIDE - Expands to 'override' if the compiler supports it.
 /// Use to mark virtual methods as overriding a base class method.
-#if (__has_feature(cxx_override_control))
+#if __has_feature(cxx_override_control) \
+    || (defined(_MSC_VER) && _MSC_VER >= 1700)
 #define LLVM_OVERRIDE override
 #else
 #define LLVM_OVERRIDE
 #endif
 
+#if __has_feature(cxx_constexpr) || defined(__GXX_EXPERIMENTAL_CXX0X__)
+# define LLVM_CONSTEXPR constexpr
+#else
+# define LLVM_CONSTEXPR
+#endif
+
 /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
 /// into a shared library, then the class should be private to the library and
 /// not accessible from outside it.  Can also be used to mark variables and
@@ -129,7 +198,6 @@
 #define LLVM_UNLIKELY(EXPR) (EXPR)
 #endif
 
-
 // C++ doesn't support 'extern template' of template specializations.  GCC does,
 // but requires __extension__ before it.  In the header, use this:
 //   EXTERN_TEMPLATE_INSTANTIATION(class foo<bar>);
@@ -143,8 +211,8 @@
 #define TEMPLATE_INSTANTIATION(X)
 #endif
 
-// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
-// mark a method "not for inlining".
+/// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
+/// mark a method "not for inlining".
 #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
 #define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
@@ -153,10 +221,10 @@
 #define LLVM_ATTRIBUTE_NOINLINE
 #endif
 
-// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
-// so, mark a method "always inline" because it is performance sensitive. GCC
-// 3.4 supported this but is buggy in various cases and produces unimplemented
-// errors, just use it in GCC 4.0 and later.
+/// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
+/// so, mark a method "always inline" because it is performance sensitive. GCC
+/// 3.4 supported this but is buggy in various cases and produces unimplemented
+/// errors, just use it in GCC 4.0 and later.
 #if __GNUC__ > 3
 #define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
 #elif defined(_MSC_VER)
@@ -165,7 +233,6 @@
 #define LLVM_ATTRIBUTE_ALWAYS_INLINE
 #endif
 
-
 #ifdef __GNUC__
 #define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
 #elif defined(_MSC_VER)
@@ -174,8 +241,8 @@
 #define LLVM_ATTRIBUTE_NORETURN
 #endif
 
-// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
-// pedantic diagnostics.
+/// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
+/// pedantic diagnostics.
 #ifdef __GNUC__
 #define LLVM_EXTENSION __extension__
 #else
@@ -197,16 +264,18 @@
   decl
 #endif
 
-// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
-// to an expression which states that it is undefined behavior for the
-// compiler to reach this point.  Otherwise is not defined.
+/// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
+/// to an expression which states that it is undefined behavior for the
+/// compiler to reach this point.  Otherwise is not defined.
 #if defined(__clang__) || (__GNUC__ > 4) \
  || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
 # define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
+#elif defined(_MSC_VER)
+# define LLVM_BUILTIN_UNREACHABLE __assume(false)
 #endif
 
-// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
-// which causes the program to exit abnormally.
+/// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
+/// which causes the program to exit abnormally.
 #if defined(__clang__) || (__GNUC__ > 4) \
  || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
 # define LLVM_BUILTIN_TRAP __builtin_trap()
@@ -214,4 +283,82 @@
 # define LLVM_BUILTIN_TRAP *(volatile int*)0x11 = 0
 #endif
 
+/// \macro LLVM_ASSUME_ALIGNED
+/// \brief Returns a pointer with an assumed alignment.
+#if !defined(__clang__) && ((__GNUC__ > 4) \
+ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
+// FIXME: Enable on clang when it supports it.
+# define LLVM_ASSUME_ALIGNED(p, a) __builtin_assume_aligned(p, a)
+#elif defined(LLVM_BUILTIN_UNREACHABLE)
+# define LLVM_ASSUME_ALIGNED(p, a) \
+           (((uintptr_t(p) % (a)) == 0) ? (p) : (LLVM_BUILTIN_UNREACHABLE, (p)))
+#else
+# define LLVM_ASSUME_ALIGNED(p, a) (p)
+#endif
+
+/// \macro LLVM_FUNCTION_NAME
+/// \brief Expands to __func__ on compilers which support it.  Otherwise,
+/// expands to a compiler-dependent replacement.
+#if defined(_MSC_VER)
+# define LLVM_FUNCTION_NAME __FUNCTION__
+#else
+# define LLVM_FUNCTION_NAME __func__
+#endif
+
+#if defined(HAVE_SANITIZER_MSAN_INTERFACE_H)
+# include <sanitizer/msan_interface.h>
+#else
+# define __msan_allocated_memory(p, size)
+# define __msan_unpoison(p, size)
+#endif
+
+/// \macro LLVM_MEMORY_SANITIZER_BUILD
+/// \brief Whether LLVM itself is built with MemorySanitizer instrumentation.
+#if __has_feature(memory_sanitizer)
+# define LLVM_MEMORY_SANITIZER_BUILD 1
+#else
+# define LLVM_MEMORY_SANITIZER_BUILD 0
+#endif
+
+/// \macro LLVM_ADDRESS_SANITIZER_BUILD
+/// \brief Whether LLVM itself is built with AddressSanitizer instrumentation.
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+# define LLVM_ADDRESS_SANITIZER_BUILD 1
+#else
+# define LLVM_ADDRESS_SANITIZER_BUILD 0
+#endif
+
+/// \macro LLVM_IS_UNALIGNED_ACCESS_FAST
+/// \brief Is unaligned memory access fast on the host machine.
+///
+/// Don't specialize on alignment for platforms where unaligned memory accesses
+/// generates the same code as aligned memory accesses for common types.
+#if defined(_M_AMD64) || defined(_M_IX86) || defined(__amd64) || \
+    defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || \
+    defined(_X86_) || defined(__i386) || defined(__i386__)
+# define LLVM_IS_UNALIGNED_ACCESS_FAST 1
+#else
+# define LLVM_IS_UNALIGNED_ACCESS_FAST 0
+#endif
+
+/// \macro LLVM_EXPLICIT
+/// \brief Expands to explicit on compilers which support explicit conversion
+/// operators. Otherwise expands to nothing.
+#if (__has_feature(cxx_explicit_conversions) \
+     || defined(__GXX_EXPERIMENTAL_CXX0X__))
+#define LLVM_EXPLICIT explicit
+#else
+#define LLVM_EXPLICIT
+#endif
+
+/// \macro LLVM_STATIC_ASSERT
+/// \brief Expands to C/C++'s static_assert on compilers which support it.
+#if __has_feature(cxx_static_assert)
+# define LLVM_STATIC_ASSERT(expr, msg) static_assert(expr, msg)
+#elif __has_feature(c_static_assert)
+# define LLVM_STATIC_ASSERT(expr, msg) _Static_assert(expr, msg)
+#else
+# define LLVM_STATIC_ASSERT(expr, msg)
+#endif
+
 #endif
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index 93aa3436d273..4aad952aaca1 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -17,8 +17,8 @@
 #ifndef LLVM_SUPPORT_CONSTANTFOLDER_H
 #define LLVM_SUPPORT_CONSTANTFOLDER_H
 
-#include "llvm/Constants.h"
-#include "llvm/InstrTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h
index 90dd69fa478f..0f29256b8045 100644
--- a/include/llvm/Support/ConstantRange.h
+++ b/include/llvm/Support/ConstantRange.h
@@ -29,8 +29,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_CONSTANT_RANGE_H
-#define LLVM_SUPPORT_CONSTANT_RANGE_H
+#ifndef LLVM_SUPPORT_CONSTANTRANGE_H
+#define LLVM_SUPPORT_CONSTANTRANGE_H
 
 #include "llvm/ADT/APInt.h"
 #include "llvm/Support/DataTypes.h"
diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h
new file mode 100644
index 000000000000..1eae6d662229
--- /dev/null
+++ b/include/llvm/Support/ConvertUTF.h
@@ -0,0 +1,228 @@
+/*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *==------------------------------------------------------------------------==*/
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
+
+    Several funtions are included here, forming a complete set of
+    conversions between the three formats.  UTF-7 is not included
+    here, but is handled in a separate source file.
+
+    Each of these routines takes pointers to input buffers and output
+    buffers.  The input buffers are const.
+
+    Each routine converts the text between *sourceStart and sourceEnd,
+    putting the result into the buffer between *targetStart and
+    targetEnd. Note: the end pointers are *after* the last item: e.g.
+    *(sourceEnd - 1) is the last item.
+
+    The return result indicates whether the conversion was successful,
+    and if not, whether the problem was in the source or target buffers.
+    (Only the first encountered problem is indicated.)
+
+    After the conversion, *sourceStart and *targetStart are both
+    updated to point to the end of last text successfully converted in
+    the respective buffers.
+
+    Input parameters:
+        sourceStart - pointer to a pointer to the source buffer.
+                The contents of this are modified on return so that
+                it points at the next thing to be converted.
+        targetStart - similarly, pointer to pointer to the target buffer.
+        sourceEnd, targetEnd - respectively pointers to the ends of the
+                two buffers, for overflow checking only.
+
+    These conversion functions take a ConversionFlags argument. When this
+    flag is set to strict, both irregular sequences and isolated surrogates
+    will cause an error.  When the flag is set to lenient, both irregular
+    sequences and isolated surrogates are converted.
+
+    Whether the flag is strict or lenient, all illegal sequences will cause
+    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
+    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
+    must check for illegal sequences.
+
+    When the flag is set to lenient, characters over 0x10FFFF are converted
+    to the replacement character; otherwise (when the flag is set to strict)
+    they constitute an error.
+
+    Output parameters:
+        The value "sourceIllegal" is returned from some routines if the input
+        sequence is malformed.  When "sourceIllegal" is returned, the source
+        value will point to the illegal value that caused the problem. E.g.,
+        in UTF-8 when a sequence is malformed, it points to the start of the
+        malformed sequence.
+
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+         Fixes & updates, Sept 2001.
+
+------------------------------------------------------------------------ */
+
+#ifndef CLANG_BASIC_CONVERTUTF_H
+#define CLANG_BASIC_CONVERTUTF_H
+
+/* ---------------------------------------------------------------------
+    The following 4 definitions are compiler-specific.
+    The C standard does not guarantee that wchar_t has at least
+    16 bits, so wchar_t is no less portable than unsigned short!
+    All should be unsigned values to avoid sign extension during
+    bit mask & shift operations.
+------------------------------------------------------------------------ */
+
+typedef unsigned int    UTF32;  /* at least 32 bits */
+typedef unsigned short  UTF16;  /* at least 16 bits */
+typedef unsigned char   UTF8;   /* typically 8 bits */
+typedef unsigned char   Boolean; /* 0 or 1 */
+
+/* Some fundamental constants */
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
+
+typedef enum {
+  conversionOK,           /* conversion successful */
+  sourceExhausted,        /* partial character in source, but hit end */
+  targetExhausted,        /* insuff. room in target for conversion */
+  sourceIllegal           /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+  strictConversion = 0,
+  lenientConversion
+} ConversionFlags;
+
+/* This is for C++ and does no harm in C */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ConversionResult ConvertUTF8toUTF16 (
+  const UTF8** sourceStart, const UTF8* sourceEnd,
+  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF8toUTF32 (
+  const UTF8** sourceStart, const UTF8* sourceEnd,
+  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF8 (
+  const UTF16** sourceStart, const UTF16* sourceEnd,
+  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF8 (
+  const UTF32** sourceStart, const UTF32* sourceEnd,
+  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF16toUTF32 (
+  const UTF16** sourceStart, const UTF16* sourceEnd,
+  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
+
+ConversionResult ConvertUTF32toUTF16 (
+  const UTF32** sourceStart, const UTF32* sourceEnd,
+  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
+
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
+
+Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
+
+unsigned getNumBytesForUTF8(UTF8 firstByte);
+
+#ifdef __cplusplus
+}
+
+/*************************************************************************/
+/* Below are LLVM-specific wrappers of the functions above. */
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+/**
+ * Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
+ * WideCharWidth. The converted data is written to ResultPtr, which needs to
+ * point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
+ * ResultPtr will point one after the end of the copied string. On failure,
+ * ResultPtr will not be changed, and ErrorPtr will be set to the location of
+ * the first character which could not be converted.
+ * \return true on success.
+ */
+bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
+                       char *&ResultPtr, const UTF8 *&ErrorPtr);
+
+/**
+ * Convert an Unicode code point to UTF8 sequence.
+ *
+ * \param Source a Unicode code point.
+ * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
+ * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes.  On success \c ResultPtr is
+ * updated one past end of the converted sequence.
+ *
+ * \returns true on success.
+ */
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
+
+/**
+ * Convert the first UTF8 sequence in the given source buffer to a UTF32
+ * code point.
+ *
+ * \param [in,out] source A pointer to the source buffer. If the conversion
+ * succeeds, this pointer will be updated to point to the byte just past the
+ * end of the converted sequence.
+ * \param sourceEnd A pointer just past the end of the source buffer.
+ * \param [out] target The converted code
+ * \param flags Whether the conversion is strict or lenient.
+ *
+ * \returns conversionOK on success
+ *
+ * \sa ConvertUTF8toUTF32
+ */
+static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
+                                                   const UTF8 *sourceEnd,
+                                                   UTF32 *target,
+                                                   ConversionFlags flags) {
+  if (*source == sourceEnd)
+    return sourceExhausted;
+  unsigned size = getNumBytesForUTF8(**source);
+  if ((ptrdiff_t)size > sourceEnd - *source)
+    return sourceExhausted;
+  return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
+}
+} /* end namespace llvm */
+
+#endif
+
+/* --------------------------------------------------------------------- */
+
+#endif
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 483f2674af7b..95e37c01d7d5 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -79,6 +79,11 @@ public:
     return false;
   }
 
+  template<typename GraphType>
+  static std::string getNodeDescription(const void *, const GraphType &) {
+    return "";
+  }
+
   /// If you want to specify custom node attributes, this is the place to do so
   ///
   template<typename GraphType>
diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h
index a3ae78204074..e8a19cd773b5 100644
--- a/include/llvm/Support/DataExtractor.h
+++ b/include/llvm/Support/DataExtractor.h
@@ -18,22 +18,24 @@ namespace llvm {
 class DataExtractor {
   StringRef Data;
   uint8_t IsLittleEndian;
-  uint8_t PointerSize;
+  uint8_t AddressSize;
 public:
   /// Construct with a buffer that is owned by the caller.
   ///
   /// This constructor allows us to use data that is owned by the
   /// caller. The data must stay around as long as this object is
   /// valid.
-  DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t PointerSize)
-    : Data(Data), IsLittleEndian(IsLittleEndian), PointerSize(PointerSize) {}
+  DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
+    : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
 
-  /// getData - Get the data pointed to by this extractor.
+  /// \brief Get the data pointed to by this extractor.
   StringRef getData() const { return Data; }
-  /// isLittleEndian - Get the endianess for this extractor.
+  /// \brief Get the endianess for this extractor.
   bool isLittleEndian() const { return IsLittleEndian; }
-  /// getAddressSize - Get the address size for this extractor.
-  uint8_t getAddressSize() const { return PointerSize; }
+  /// \brief Get the address size for this extractor.
+  uint8_t getAddressSize() const { return AddressSize; }
+  /// \brief Set the address size for this extractor.
+  void setAddressSize(uint8_t Size) { AddressSize = Size; }
 
   /// Extract a C string from \a *offset_ptr.
   ///
@@ -113,7 +115,7 @@ public:
   ///
   /// Extract a single pointer from the data and update the offset
   /// pointed to by \a offset_ptr. The size of the extracted pointer
-  /// comes from the \a m_addr_size member variable and should be
+  /// is \a getAddressSize(), so the address size has to be
   /// set correctly prior to extracting any pointer values.
   ///
   /// @param[in,out] offset_ptr
@@ -126,7 +128,7 @@ public:
   /// @return
   ///     The extracted pointer value as a 64 integer.
   uint64_t getAddress(uint32_t *offset_ptr) const {
-    return getUnsigned(offset_ptr, PointerSize);
+    return getUnsigned(offset_ptr, AddressSize);
   }
 
   /// Extract a uint8_t value from \a *offset_ptr.
diff --git a/include/llvm/Support/DataFlow.h b/include/llvm/Support/DataFlow.h
index 355c402f542d..a09ccaac2789 100644
--- a/include/llvm/Support/DataFlow.h
+++ b/include/llvm/Support/DataFlow.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_SUPPORT_DATAFLOW_H
 #define LLVM_SUPPORT_DATAFLOW_H
 
-#include "llvm/User.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/IR/User.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/DataStream.h b/include/llvm/Support/DataStream.h
index fedb0c925611..8bc413360307 100644
--- a/include/llvm/Support/DataStream.h
+++ b/include/llvm/Support/DataStream.h
@@ -14,8 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 
-#ifndef LLVM_SUPPORT_DATASTREAM_H_
-#define LLVM_SUPPORT_DATASTREAM_H_
+#ifndef LLVM_SUPPORT_DATASTREAM_H
+#define LLVM_SUPPORT_DATASTREAM_H
 
 #include <string>
 
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 049807570711..f35d40729263 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -9,7 +9,7 @@
 //
 // This file defines a number of light weight data structures used
 // to describe and track debug location information.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_DEBUGLOC_H
@@ -19,7 +19,7 @@ namespace llvm {
   template <typename T> struct DenseMapInfo;
   class MDNode;
   class LLVMContext;
-  
+
   /// DebugLoc - Debug location id.  This is carried by Instruction, SDNode,
   /// and MachineInstr to compactly encode file/line/scope information for an
   /// operation.
@@ -46,18 +46,18 @@ namespace llvm {
     /// location, encoded as 24-bits for line and 8 bits for col.  A value of 0
     /// for either means unknown.
     unsigned LineCol;
-    
+
     /// ScopeIdx - This is an opaque ID# for Scope/InlinedAt information,
     /// decoded by LLVMContext.  0 is unknown.
     int ScopeIdx;
   public:
     DebugLoc() : LineCol(0), ScopeIdx(0) {}  // Defaults to unknown.
-    
+
     /// get - Get a new DebugLoc that corresponds to the specified line/col
     /// scope/inline location.
     static DebugLoc get(unsigned Line, unsigned Col,
                         MDNode *Scope, MDNode *InlinedAt = 0);
-    
+
     /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
     static DebugLoc getFromDILocation(MDNode *N);
 
@@ -66,32 +66,32 @@ namespace llvm {
 
     /// isUnknown - Return true if this is an unknown location.
     bool isUnknown() const { return ScopeIdx == 0; }
-    
+
     unsigned getLine() const {
       return (LineCol << 8) >> 8;  // Mask out column.
     }
-    
+
     unsigned getCol() const {
       return LineCol >> 24;
     }
-    
+
     /// getScope - This returns the scope pointer for this DebugLoc, or null if
     /// invalid.
     MDNode *getScope(const LLVMContext &Ctx) const;
-    
+
     /// getInlinedAt - This returns the InlinedAt pointer for this DebugLoc, or
     /// null if invalid or not present.
     MDNode *getInlinedAt(const LLVMContext &Ctx) const;
-    
+
     /// getScopeAndInlinedAt - Return both the Scope and the InlinedAt values.
     void getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
                               const LLVMContext &Ctx) const;
-    
-    
+
+
     /// getAsMDNode - This method converts the compressed DebugLoc node into a
     /// DILocation compatible MDNode.
     MDNode *getAsMDNode(const LLVMContext &Ctx) const;
-    
+
     bool operator==(const DebugLoc &DL) const {
       return LineCol == DL.LineCol && ScopeIdx == DL.ScopeIdx;
     }
@@ -109,4 +109,4 @@ namespace llvm {
   };
 } // end namespace llvm
 
-#endif /* LLVM_DEBUGLOC_H */
+#endif /* LLVM_SUPPORT_DEBUGLOC_H */
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 8f18a991a9e1..b52914f93851 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -16,6 +16,9 @@
 #ifndef LLVM_SUPPORT_DWARF_H
 #define LLVM_SUPPORT_DWARF_H
 
+#include "llvm/Support/DataTypes.h"
+
+
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
@@ -37,7 +40,7 @@ enum {
 namespace dwarf {
 
 //===----------------------------------------------------------------------===//
-// Dwarf constants as gleaned from the DWARF Debugging Information Format V.3
+// Dwarf constants as gleaned from the DWARF Debugging Information Format V.4
 // reference manual http://dwarf.freestandards.org .
 //
 
@@ -50,15 +53,19 @@ enum llvm_dwarf_constants {
 
   DW_TAG_auto_variable = 0x100,         // Tag for local (auto) variables.
   DW_TAG_arg_variable = 0x101,          // Tag for argument variables.
-  DW_TAG_return_variable = 0x102,       // Tag for return variables.
-  DW_TAG_vector_type = 0x103,           // Tag for vector types.
 
   DW_TAG_user_base = 0x1000,            // Recommended base for user tags.
 
-  DW_CIE_VERSION = 1,                   // Common frame information version.
-  DW_CIE_ID       = 0xffffffff          // Common frame information mark.
+  DW_CIE_VERSION = 1                    // Common frame information version.
 };
 
+
+// Special ID values that distinguish a CIE from a FDE in DWARF CFI.
+// Not inside an enum because a 64-bit value is needed.
+const uint32_t DW_CIE_ID = UINT32_MAX;
+const uint64_t DW64_CIE_ID = UINT64_MAX;
+
+
 enum dwarf_constants {
   DWARF_VERSION = 2,
 
@@ -231,6 +238,10 @@ enum dwarf_constants {
   DW_AT_const_expr = 0x6c,
   DW_AT_enum_class = 0x6d,
   DW_AT_linkage_name = 0x6e,
+
+  DW_AT_lo_user = 0x2000,
+  DW_AT_hi_user = 0x3fff,
+
   DW_AT_MIPS_loop_begin = 0x2002,
   DW_AT_MIPS_tail_loop_begin = 0x2003,
   DW_AT_MIPS_epilog_begin = 0x2004,
@@ -246,6 +257,12 @@ enum dwarf_constants {
   DW_AT_MIPS_ptr_dopetype = 0x200e,
   DW_AT_MIPS_allocatable_dopetype = 0x200f,
   DW_AT_MIPS_assumed_shape_dopetype = 0x2010,
+
+  // This one appears to have only been implemented by Open64 for
+  // fortran and may conflict with other extensions.
+  DW_AT_MIPS_assumed_size = 0x2011,
+
+  // GNU extensions
   DW_AT_sf_names = 0x2101,
   DW_AT_src_info = 0x2102,
   DW_AT_mac_info = 0x2103,
@@ -254,9 +271,14 @@ enum dwarf_constants {
   DW_AT_body_end = 0x2106,
   DW_AT_GNU_vector = 0x2107,
   DW_AT_GNU_template_name = 0x2110,
-  DW_AT_MIPS_assumed_size = 0x2011,
-  DW_AT_lo_user = 0x2000,
-  DW_AT_hi_user = 0x3fff,
+
+  // Extensions for Fission proposal.
+  DW_AT_GNU_dwo_name = 0x2130,
+  DW_AT_GNU_dwo_id = 0x2131,
+  DW_AT_GNU_ranges_base = 0x2132,
+  DW_AT_GNU_addr_base = 0x2133,
+  DW_AT_GNU_pubnames = 0x2134,
+  DW_AT_GNU_pubtypes = 0x2135,
 
   // Apple extensions.
   DW_AT_APPLE_optimized = 0x3fe1,
@@ -300,6 +322,10 @@ enum dwarf_constants {
   DW_FORM_flag_present = 0x19,
   DW_FORM_ref_sig8 = 0x20,
 
+  // Extensions for Fission proposal
+  DW_FORM_GNU_addr_index = 0x1f01,
+  DW_FORM_GNU_str_index = 0x1f02,
+
   // Operation encodings
   DW_OP_addr = 0x03,
   DW_OP_deref = 0x06,
@@ -458,6 +484,10 @@ enum dwarf_constants {
   DW_OP_lo_user = 0xe0,
   DW_OP_hi_user = 0xff,
 
+  // Extensions for Fission proposal.
+  DW_OP_GNU_addr_index = 0xfb,
+  DW_OP_GNU_const_index = 0xfc,
+
   // Encoding attribute values
   DW_ATE_address = 0x01,
   DW_ATE_boolean = 0x02,
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
index 0f59cbf23947..1e2d16ccbc81 100644
--- a/include/llvm/Support/DynamicLibrary.h
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_DYNAMIC_LIBRARY_H
-#define LLVM_SYSTEM_DYNAMIC_LIBRARY_H
+#ifndef LLVM_SYSTEM_DYNAMICLIBRARY_H
+#define LLVM_SYSTEM_DYNAMICLIBRARY_H
 
 #include <string>
 
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 2cd267116cab..ea597fc1a251 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -271,6 +271,7 @@ enum {
   EM_SLE9X         = 179, // Infineon Technologies SLE9X core
   EM_L10M          = 180, // Intel L10M
   EM_K10M          = 181, // Intel K10M
+  EM_AARCH64       = 183, // ARM AArch64
   EM_AVR32         = 185, // Atmel Corporation 32-bit microprocessor family
   EM_STM8          = 186, // STMicroeletronics STM8 8-bit microcontroller
   EM_TILE64        = 187, // Tilera TILE64 multicore architecture family
@@ -366,7 +367,8 @@ enum {
   R_X86_64_SIZE64     = 33,
   R_X86_64_GOTPC32_TLSDESC = 34,
   R_X86_64_TLSDESC_CALL    = 35,
-  R_X86_64_TLSDESC    = 36
+  R_X86_64_TLSDESC    = 36,
+  R_X86_64_IRELATIVE  = 37
 };
 
 // i386 relocations.
@@ -464,20 +466,140 @@ enum {
 
 // ELF Relocation types for PPC64
 enum {
+  R_PPC64_ADDR32              = 1,
   R_PPC64_ADDR16_LO           = 4,
   R_PPC64_ADDR16_HI           = 5,
   R_PPC64_ADDR14              = 7,
   R_PPC64_REL24               = 10,
+  R_PPC64_REL32               = 26,
   R_PPC64_ADDR64              = 38,
   R_PPC64_ADDR16_HIGHER       = 39,
   R_PPC64_ADDR16_HIGHEST      = 41,
+  R_PPC64_REL64               = 44,
   R_PPC64_TOC16               = 47,
+  R_PPC64_TOC16_LO            = 48,
+  R_PPC64_TOC16_HA            = 50,
   R_PPC64_TOC                 = 51,
-  R_PPC64_TOC16_DS            = 63
+  R_PPC64_ADDR16_DS           = 56,
+  R_PPC64_ADDR16_LO_DS        = 57,
+  R_PPC64_TOC16_DS            = 63,
+  R_PPC64_TOC16_LO_DS         = 64,
+  R_PPC64_TLS                 = 67,
+  R_PPC64_TPREL16_LO          = 70,
+  R_PPC64_DTPREL16_LO         = 75,
+  R_PPC64_DTPREL16_HA         = 77,
+  R_PPC64_GOT_TLSGD16_LO      = 80,
+  R_PPC64_GOT_TLSGD16_HA      = 82,
+  R_PPC64_GOT_TLSLD16_LO      = 84,
+  R_PPC64_GOT_TLSLD16_HA      = 86,
+  R_PPC64_GOT_TPREL16_LO_DS   = 88,
+  R_PPC64_GOT_TPREL16_HA      = 90,
+  R_PPC64_TLSGD               = 107,
+  R_PPC64_TLSLD               = 108
+};
+
+// ELF Relocation types for AArch64
+
+enum {
+  R_AARCH64_NONE                        = 0x100,
+
+  R_AARCH64_ABS64                       = 0x101,
+  R_AARCH64_ABS32                       = 0x102,
+  R_AARCH64_ABS16                       = 0x103,
+  R_AARCH64_PREL64                      = 0x104,
+  R_AARCH64_PREL32                      = 0x105,
+  R_AARCH64_PREL16                      = 0x106,
+
+  R_AARCH64_MOVW_UABS_G0                = 0x107,
+  R_AARCH64_MOVW_UABS_G0_NC             = 0x108,
+  R_AARCH64_MOVW_UABS_G1                = 0x109,
+  R_AARCH64_MOVW_UABS_G1_NC             = 0x10a,
+  R_AARCH64_MOVW_UABS_G2                = 0x10b,
+  R_AARCH64_MOVW_UABS_G2_NC             = 0x10c,
+  R_AARCH64_MOVW_UABS_G3                = 0x10d,
+  R_AARCH64_MOVW_SABS_G0                = 0x10e,
+  R_AARCH64_MOVW_SABS_G1                = 0x10f,
+  R_AARCH64_MOVW_SABS_G2                = 0x110,
+
+  R_AARCH64_LD_PREL_LO19                = 0x111,
+  R_AARCH64_ADR_PREL_LO21               = 0x112,
+  R_AARCH64_ADR_PREL_PG_HI21            = 0x113,
+  R_AARCH64_ADD_ABS_LO12_NC             = 0x115,
+  R_AARCH64_LDST8_ABS_LO12_NC           = 0x116,
+
+  R_AARCH64_TSTBR14                     = 0x117,
+  R_AARCH64_CONDBR19                    = 0x118,
+  R_AARCH64_JUMP26                      = 0x11a,
+  R_AARCH64_CALL26                      = 0x11b,
+
+  R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c,
+  R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d,
+  R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e,
+
+  R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b,
+
+  R_AARCH64_ADR_GOT_PAGE                = 0x137,
+  R_AARCH64_LD64_GOT_LO12_NC            = 0x138,
+
+  R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0        = 0x20e,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC     = 0x20f,
+  R_AARCH64_TLSLD_ADD_DTPREL_HI12       = 0x210,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12       = 0x211,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC    = 0x212,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12     = 0x213,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC  = 0x214,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12    = 0x215,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12    = 0x217,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12    = 0x219,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a,
+
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G1      = 0x21b,
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC   = 0x21c,
+  R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21   = 0x21d,
+  R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e,
+  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19    = 0x21f,
+
+  R_AARCH64_TLSLE_MOVW_TPREL_G2         = 0x220,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1         = 0x221,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1_NC      = 0x222,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0         = 0x223,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0_NC      = 0x224,
+  R_AARCH64_TLSLE_ADD_TPREL_HI12        = 0x225,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12        = 0x226,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12_NC     = 0x227,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12      = 0x228,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC   = 0x229,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12     = 0x22a,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC  = 0x22b,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12     = 0x22c,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f,
+
+  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232,
+  R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233,
+  R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234,
+
+  R_AARCH64_TLSDESC_CALL                = 0x239
 };
 
 // ARM Specific e_flags
-enum { EF_ARM_EABIMASK = 0xFF000000U };
+enum {
+  EF_ARM_SOFT_FLOAT =     0x00000200U,
+  EF_ARM_VFP_FLOAT =      0x00000400U,
+  EF_ARM_EABI_UNKNOWN =   0x00000000U,
+  EF_ARM_EABI_VER1 =      0x01000000U,
+  EF_ARM_EABI_VER2 =      0x02000000U,
+  EF_ARM_EABI_VER3 =      0x03000000U,
+  EF_ARM_EABI_VER4 =      0x04000000U,
+  EF_ARM_EABI_VER5 =      0x05000000U,
+  EF_ARM_EABIMASK =       0xFF000000U
+};
 
 // ELF Relocation types for ARM
 // Meets 2.08 ABI Specs.
@@ -621,6 +743,13 @@ enum {
   EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions
   EF_MIPS_PIC       = 0x00000002, // Position independent code
   EF_MIPS_CPIC      = 0x00000004, // Call object with Position independent code
+  EF_MIPS_ABI_O32   = 0x00001000, // This file follows the first MIPS 32 bit ABI
+
+  //ARCH_ASE
+  EF_MIPS_MICROMIPS = 0x02000000, // microMIPS
+  EF_MIPS_ARCH_ASE_M16 =
+                      0x04000000, // Has Mips-16 ISA extensions
+  //ARCH
   EF_MIPS_ARCH_1    = 0x00000000, // MIPS1 instruction set
   EF_MIPS_ARCH_2    = 0x10000000, // MIPS2 instruction set
   EF_MIPS_ARCH_3    = 0x20000000, // MIPS3 instruction set
@@ -691,6 +820,11 @@ enum {
   R_MIPS_NUM               = 218
 };
 
+// Special values for the st_other field in the symbol table entry for MIPS.
+enum {
+  STO_MIPS_MICROMIPS       = 0x80 // MIPS Specific ISA for MicroMips
+};
+
 // Hexagon Specific e_flags
 // Release 5 ABI
 enum {
@@ -710,14 +844,14 @@ enum {
 };
 
 // Hexagon specific Section indexes for common small data
-// Release 5 ABI 
+// Release 5 ABI
 enum {
   SHN_HEXAGON_SCOMMON     = 0xff00,       // Other access sizes
   SHN_HEXAGON_SCOMMON_1   = 0xff01,       // Byte-sized access
   SHN_HEXAGON_SCOMMON_2   = 0xff02,       // Half-word-sized access
   SHN_HEXAGON_SCOMMON_4   = 0xff03,       // Word-sized access
   SHN_HEXAGON_SCOMMON_8   = 0xff04        // Double-word-size access
-};   
+};
 
 // ELF Relocation types for Hexagon
 // Release 5 ABI
@@ -878,7 +1012,7 @@ enum {
   SHT_GNU_verneed   = 0x6ffffffe, // GNU version references.
   SHT_GNU_versym    = 0x6fffffff, // GNU symbol versions table.
   SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
-  SHT_LOPROC        = 0x70000000, // Lowest processor architecture-specific type.
+  SHT_LOPROC        = 0x70000000, // Lowest processor arch-specific type.
   // Fixme: All this is duplicated in MCSectionELF. Why??
   // Exception Index table
   SHT_ARM_EXIDX           = 0x70000001U,
@@ -888,10 +1022,14 @@ enum {
   SHT_ARM_ATTRIBUTES      = 0x70000003U,
   SHT_ARM_DEBUGOVERLAY    = 0x70000004U,
   SHT_ARM_OVERLAYSECTION  = 0x70000005U,
-
+  SHT_HEX_ORDERED         = 0x70000000, // Link editor is to sort the entries in
+                                        // this section based on their sizes
   SHT_X86_64_UNWIND       = 0x70000001, // Unwind information
 
-  SHT_HIPROC        = 0x7fffffff, // Highest processor architecture-specific type.
+  SHT_MIPS_REGINFO        = 0x70000006, // Register usage information
+  SHT_MIPS_OPTIONS        = 0x7000000d, // General options
+
+  SHT_HIPROC        = 0x7fffffff, // Highest processor arch-specific type.
   SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
   SHT_HIUSER        = 0xffffffff  // Highest type reserved for applications.
 };
@@ -953,7 +1091,14 @@ enum {
   // sets this flag besides being able to refer to data in a section that does
   // not set it; likewise, a small code model object can refer only to code in a
   // section that does not set this flag.
-  SHF_X86_64_LARGE = 0x10000000
+  SHF_X86_64_LARGE = 0x10000000,
+
+  // All sections with the GPREL flag are grouped into a global data area
+  // for faster accesses
+  SHF_HEX_GPREL = 0x10000000,
+
+  // Do not strip this section. FIXME: We need target specific SHF_ enums.
+  SHF_MIPS_NOSTRIP = 0x8000000
 };
 
 // Section Group Flags
@@ -988,7 +1133,7 @@ struct Elf64_Sym {
   Elf64_Word      st_name;  // Symbol name (index into string table)
   unsigned char   st_info;  // Symbol's type and binding attributes
   unsigned char   st_other; // Must be zero; reserved
-  Elf64_Half      st_shndx; // Which section (header table index) it's defined in
+  Elf64_Half      st_shndx; // Which section (header tbl index) it's defined in
   Elf64_Addr      st_value; // Value or address associated with the symbol
   Elf64_Xword     st_size;  // Size of the symbol
 
@@ -1043,6 +1188,11 @@ enum {
   STV_PROTECTED = 3   // Visible in other components but not preemptable
 };
 
+// Symbol number.
+enum {
+  STN_UNDEF = 0
+};
+
 // Relocation entry, without explicit addend.
 struct Elf32_Rel {
   Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr)
@@ -1083,14 +1233,14 @@ struct Elf64_Rel {
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
-  Elf64_Xword getSymbol() const { return (r_info >> 32); }
-  unsigned char getType() const {
-    return (unsigned char) (r_info & 0xffffffffL);
+  Elf64_Word getSymbol() const { return (r_info >> 32); }
+  Elf64_Word getType() const {
+    return (Elf64_Word) (r_info & 0xffffffffL);
   }
-  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
-  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
-  void setSymbolAndType(Elf64_Xword s, unsigned char t) {
-    r_info = (s << 32) + (t&0xffffffffL);
+  void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); }
+  void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf64_Word s, Elf64_Word t) {
+    r_info = ((Elf64_Xword)s << 32) + (t&0xffffffffL);
   }
 };
 
@@ -1102,14 +1252,14 @@ struct Elf64_Rela {
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
-  Elf64_Xword getSymbol() const { return (r_info >> 32); }
-  unsigned char getType() const {
-    return (unsigned char) (r_info & 0xffffffffL);
+  Elf64_Word getSymbol() const { return (r_info >> 32); }
+  Elf64_Word getType() const {
+    return (Elf64_Word) (r_info & 0xffffffffL);
   }
-  void setSymbol(Elf64_Xword s) { setSymbolAndType(s, getType()); }
-  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
-  void setSymbolAndType(Elf64_Xword s, unsigned char t) {
-    r_info = (s << 32) + (t&0xffffffffL);
+  void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); }
+  void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf64_Word s, Elf64_Word t) {
+    r_info = ((Elf64_Xword)s << 32) + (t&0xffffffffL);
   }
 };
 
@@ -1131,7 +1281,7 @@ struct Elf64_Phdr {
   Elf64_Word   p_flags;  // Segment flags
   Elf64_Off    p_offset; // File offset where segment is located, in bytes
   Elf64_Addr   p_vaddr;  // Virtual address of beginning of segment
-  Elf64_Addr   p_paddr;  // Physical address of beginning of segment (OS-specific)
+  Elf64_Addr   p_paddr;  // Physical addr of beginning of segment (OS-specific)
   Elf64_Xword  p_filesz; // Num. of bytes in file image of segment (may be zero)
   Elf64_Xword  p_memsz;  // Num. of bytes in mem image of segment (may be zero)
   Elf64_Xword  p_align;  // Segment alignment constraint
@@ -1162,7 +1312,7 @@ enum {
   PT_GNU_RELRO  = 0x6474e552, // Read-only after relocation.
 
   // ARM program header types.
-  PT_ARM_ARCHEXT = 0x70000000, // Platform architecture compatibility information
+  PT_ARM_ARCHEXT = 0x70000000, // Platform architecture compatibility info
   // These all contain stack unwind tables.
   PT_ARM_EXIDX   = 0x70000001,
   PT_ARM_UNWIND  = 0x70000001
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index 8d5649dc1f91..d438facfa4e1 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -14,136 +14,78 @@
 #ifndef LLVM_SUPPORT_ENDIAN_H
 #define LLVM_SUPPORT_ENDIAN_H
 
+#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
 #include "llvm/Support/type_traits.h"
 
 namespace llvm {
 namespace support {
+enum endianness {big, little, native};
 
-enum endianness {big, little};
-enum alignment {unaligned, aligned};
+// These are named values for common alignments.
+enum {aligned = 0, unaligned = 1};
 
 namespace detail {
-
-template<typename value_type, alignment align>
-struct alignment_access_helper;
-
-template<typename value_type>
-struct alignment_access_helper<value_type, aligned>
-{
-  value_type val;
-};
-
-// Provides unaligned loads and stores.
-#pragma pack(push)
-#pragma pack(1)
-template<typename value_type>
-struct alignment_access_helper<value_type, unaligned>
-{
-  value_type val;
-};
-#pragma pack(pop)
-
+  /// \brief ::value is either alignment, or alignof(T) if alignment is 0.
+  template<class T, int alignment>
+  struct PickAlignment {
+    enum {value = alignment == 0 ? AlignOf<T>::Alignment : alignment};
+  };
 } // end namespace detail
 
 namespace endian {
-  template<typename value_type, alignment align>
-  inline value_type read_le(const void *memory) {
-    value_type t =
-      reinterpret_cast<const detail::alignment_access_helper
-        <value_type, align> *>(memory)->val;
-    if (sys::isBigEndianHost())
-      return sys::SwapByteOrder(t);
-    return t;
-  }
-
-  template<typename value_type, alignment align>
-  inline void write_le(void *memory, value_type value) {
-    if (sys::isBigEndianHost())
-      value = sys::SwapByteOrder(value);
-    reinterpret_cast<detail::alignment_access_helper<value_type, align> *>
-      (memory)->val = value;
-  }
+template<typename value_type, endianness endian>
+inline value_type byte_swap(value_type value) {
+  if (endian != native && sys::isBigEndianHost() != (endian == big))
+    return sys::SwapByteOrder(value);
+  return value;
+}
 
-  template<typename value_type, alignment align>
-  inline value_type read_be(const void *memory) {
-    value_type t =
-      reinterpret_cast<const detail::alignment_access_helper
-        <value_type, align> *>(memory)->val;
-    if (sys::isLittleEndianHost())
-      return sys::SwapByteOrder(t);
-    return t;
-  }
+template<typename value_type,
+         endianness endian,
+         std::size_t alignment>
+inline value_type read(const void *memory) {
+  value_type ret;
+
+  memcpy(&ret,
+         LLVM_ASSUME_ALIGNED(memory,
+           (detail::PickAlignment<value_type, alignment>::value)),
+         sizeof(value_type));
+  return byte_swap<value_type, endian>(ret);
+}
 
-  template<typename value_type, alignment align>
-  inline void write_be(void *memory, value_type value) {
-    if (sys::isLittleEndianHost())
-      value = sys::SwapByteOrder(value);
-    reinterpret_cast<detail::alignment_access_helper<value_type, align> *>
-      (memory)->val = value;
-  }
+template<typename value_type,
+         endianness endian,
+         std::size_t alignment>
+inline void write(void *memory, value_type value) {
+  value = byte_swap<value_type, endian>(value);
+  memcpy(LLVM_ASSUME_ALIGNED(memory,
+           (detail::PickAlignment<value_type, alignment>::value)),
+         &value,
+         sizeof(value_type));
 }
+} // end namespace endian
 
 namespace detail {
-
 template<typename value_type,
          endianness endian,
-         alignment  align>
-class packed_endian_specific_integral;
-
-template<typename value_type>
-class packed_endian_specific_integral<value_type, little, unaligned> {
-public:
-  operator value_type() const {
-    return endian::read_le<value_type, unaligned>(Value);
-  }
-  void operator=(value_type newValue) {
-    endian::write_le<value_type, unaligned>((void *)&Value, newValue);
-  }
-private:
-  uint8_t Value[sizeof(value_type)];
-};
-
-template<typename value_type>
-class packed_endian_specific_integral<value_type, big, unaligned> {
-public:
+         std::size_t alignment>
+struct packed_endian_specific_integral {
   operator value_type() const {
-    return endian::read_be<value_type, unaligned>(Value);
+    return endian::read<value_type, endian, alignment>(
+      (const void*)Value.buffer);
   }
-  void operator=(value_type newValue) {
-    endian::write_be<value_type, unaligned>((void *)&Value, newValue);
-  }
-private:
-  uint8_t Value[sizeof(value_type)];
-};
 
-template<typename value_type>
-class packed_endian_specific_integral<value_type, little, aligned> {
-public:
-  operator value_type() const {
-    return endian::read_le<value_type, aligned>(&Value);
-  }
   void operator=(value_type newValue) {
-    endian::write_le<value_type, aligned>((void *)&Value, newValue);
+    endian::write<value_type, endian, alignment>(
+      (void*)Value.buffer, newValue);
   }
-private:
-  value_type Value;
-};
 
-template<typename value_type>
-class packed_endian_specific_integral<value_type, big, aligned> {
-public:
-  operator value_type() const {
-    return endian::read_be<value_type, aligned>(&Value);
-  }
-  void operator=(value_type newValue) {
-    endian::write_be<value_type, aligned>((void *)&Value, newValue);
-  }
 private:
-  value_type Value;
+  AlignedCharArray<PickAlignment<value_type, alignment>::value,
+                   sizeof(value_type)> Value;
 };
-
 } // end namespace detail
 
 typedef detail::packed_endian_specific_integral
@@ -218,6 +160,19 @@ typedef detail::packed_endian_specific_integral
 typedef detail::packed_endian_specific_integral
                      <int64_t, big, aligned>    aligned_big64_t;
 
+typedef detail::packed_endian_specific_integral
+                  <uint16_t, native, unaligned> unaligned_uint16_t;
+typedef detail::packed_endian_specific_integral
+                  <uint32_t, native, unaligned> unaligned_uint32_t;
+typedef detail::packed_endian_specific_integral
+                  <uint64_t, native, unaligned> unaligned_uint64_t;
+
+typedef detail::packed_endian_specific_integral
+                   <int16_t, native, unaligned> unaligned_int16_t;
+typedef detail::packed_endian_specific_integral
+                   <int32_t, native, unaligned> unaligned_int32_t;
+typedef detail::packed_endian_specific_integral
+                   <int64_t, native, unaligned> unaligned_int64_t;
 } // end namespace llvm
 } // end namespace support
 
diff --git a/include/llvm/Support/Errno.h b/include/llvm/Support/Errno.h
index 150bdb701626..8e145c7b0b51 100644
--- a/include/llvm/Support/Errno.h
+++ b/include/llvm/Support/Errno.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_ERRNO_H
-#define LLVM_SYSTEM_ERRNO_H
+#ifndef LLVM_SUPPORT_ERRNO_H
+#define LLVM_SUPPORT_ERRNO_H
 
 #include <string>
 
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 95b01095c1b2..b948d97bff92 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_SUPPORT_ERRORHANDLING_H
 #define LLVM_SUPPORT_ERRORHANDLING_H
 
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 #include <string>
 
 namespace llvm {
@@ -24,7 +24,8 @@ namespace llvm {
 
   /// An error handler callback.
   typedef void (*fatal_error_handler_t)(void *user_data,
-                                        const std::string& reason);
+                                        const std::string& reason,
+                                        bool gen_crash_diag);
 
   /// install_fatal_error_handler - Installs a new error handler to be used
   /// whenever a serious (non-recoverable) error is encountered by LLVM.
@@ -73,10 +74,14 @@ namespace llvm {
   /// standard error, followed by a newline.
   /// After the error handler is called this function will call exit(1), it 
   /// does not return.
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
+                                                  bool gen_crash_diag = true);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
+                                                  bool gen_crash_diag = true);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
+                                                  bool gen_crash_diag = true);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
+                                                  bool gen_crash_diag = true);
 
   /// This function calls abort(), and prints the optional message to stderr.
   /// Use the llvm_unreachable macro (that adds location info), instead of
diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h
new file mode 100644
index 000000000000..f3ac305fe775
--- /dev/null
+++ b/include/llvm/Support/ErrorOr.h
@@ -0,0 +1,514 @@
+//===- llvm/Support/ErrorOr.h - Error Smart Pointer -----------------------===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// Provides ErrorOr<T> smart pointer.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ERROR_OR_H
+#define LLVM_SUPPORT_ERROR_OR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/type_traits.h"
+
+#include <cassert>
+#if LLVM_HAS_CXX11_TYPETRAITS
+#include <type_traits>
+#endif
+
+namespace llvm {
+struct ErrorHolderBase {
+  error_code Error;
+  uint16_t RefCount;
+  bool HasUserData;
+
+  ErrorHolderBase() : RefCount(1) {}
+
+  void aquire() {
+    ++RefCount;
+  }
+
+  void release() {
+    if (--RefCount == 0)
+      delete this;
+  }
+
+protected:
+  virtual ~ErrorHolderBase() {}
+};
+
+template<class T>
+struct ErrorHolder : ErrorHolderBase {
+#if LLVM_HAS_RVALUE_REFERENCES
+  ErrorHolder(T &&UD) : UserData(llvm_move(UD)) {}
+#else
+  ErrorHolder(T &UD) : UserData(UD) {}
+#endif
+  T UserData;
+};
+
+template<class Tp> struct ErrorOrUserDataTraits : llvm::false_type {};
+
+#if LLVM_HAS_CXX11_TYPETRAITS && LLVM_HAS_RVALUE_REFERENCES
+template<class T, class V>
+typename std::enable_if< std::is_constructible<T, V>::value
+                       , typename std::remove_reference<V>::type>::type &&
+ moveIfMoveConstructible(V &Val) {
+  return std::move(Val);
+}
+
+template<class T, class V>
+typename std::enable_if< !std::is_constructible<T, V>::value
+                       , typename std::remove_reference<V>::type>::type &
+moveIfMoveConstructible(V &Val) {
+  return Val;
+}
+#else
+template<class T, class V>
+V &moveIfMoveConstructible(V &Val) {
+  return Val;
+}
+#endif
+
+/// \brief Stores a reference that can be changed.
+template <typename T>
+class ReferenceStorage {
+  T *Storage;
+
+public:
+  ReferenceStorage(T &Ref) : Storage(&Ref) {}
+
+  operator T &() const { return *Storage; }
+  T &get() const { return *Storage; }
+};
+
+/// \brief Represents either an error or a value T.
+///
+/// ErrorOr<T> is a pointer-like class that represents the result of an
+/// operation. The result is either an error, or a value of type T. This is
+/// designed to emulate the usage of returning a pointer where nullptr indicates
+/// failure. However instead of just knowing that the operation failed, we also
+/// have an error_code and optional user data that describes why it failed.
+///
+/// It is used like the following.
+/// \code
+///   ErrorOr<Buffer> getBuffer();
+///   void handleError(error_code ec);
+///
+///   auto buffer = getBuffer();
+///   if (!buffer)
+///     handleError(buffer);
+///   buffer->write("adena");
+/// \endcode
+///
+/// ErrorOr<T> also supports user defined data for specific error_codes. To use
+/// this feature you must first add a template specialization of
+/// ErrorOrUserDataTraits derived from std::true_type for your type in the lld
+/// namespace. This specialization must have a static error_code error()
+/// function that returns the error_code this data is used with.
+///
+/// getError<UserData>() may be called to get either the stored user data, or
+/// a default constructed UserData if none was stored.
+///
+/// Example:
+/// \code
+///   struct InvalidArgError {
+///     InvalidArgError() {}
+///     InvalidArgError(std::string S) : ArgName(S) {}
+///     std::string ArgName;
+///   };
+///
+///   namespace llvm {
+///   template<>
+///   struct ErrorOrUserDataTraits<InvalidArgError> : std::true_type {
+///     static error_code error() {
+///       return make_error_code(errc::invalid_argument);
+///     }
+///   };
+///   } // end namespace llvm
+///
+///   using namespace llvm;
+///
+///   ErrorOr<int> foo() {
+///     return InvalidArgError("adena");
+///   }
+///
+///   int main() {
+///     auto a = foo();
+///     if (!a && error_code(a) == errc::invalid_argument)
+///       llvm::errs() << a.getError<InvalidArgError>().ArgName << "\n";
+///   }
+/// \endcode
+///
+/// An implicit conversion to bool provides a way to check if there was an
+/// error. The unary * and -> operators provide pointer like access to the
+/// value. Accessing the value when there is an error has undefined behavior.
+///
+/// When T is a reference type the behaivor is slightly different. The reference
+/// is held in a std::reference_wrapper<std::remove_reference<T>::type>, and
+/// there is special handling to make operator -> work as if T was not a
+/// reference.
+///
+/// T cannot be a rvalue reference.
+template<class T>
+class ErrorOr {
+  template <class OtherT> friend class ErrorOr;
+  static const bool isRef = is_reference<T>::value;
+  typedef ReferenceStorage<typename remove_reference<T>::type> wrap;
+
+public:
+  typedef typename
+    conditional< isRef
+               , wrap
+               , T
+               >::type storage_type;
+
+private:
+  typedef typename remove_reference<T>::type &reference;
+  typedef typename remove_reference<T>::type *pointer;
+
+public:
+  ErrorOr() : IsValid(false) {}
+
+  template <class E>
+  ErrorOr(E ErrorCode, typename enable_if_c<is_error_code_enum<E>::value ||
+                                            is_error_condition_enum<E>::value,
+                                            void *>::type = 0)
+      : HasError(true), IsValid(true) {
+    Error = new ErrorHolderBase;
+    Error->Error = make_error_code(ErrorCode);
+    Error->HasUserData = false;
+  }
+
+  ErrorOr(llvm::error_code EC) : HasError(true), IsValid(true) {
+    Error = new ErrorHolderBase;
+    Error->Error = EC;
+    Error->HasUserData = false;
+  }
+
+  template<class UserDataT>
+  ErrorOr(UserDataT UD, typename
+          enable_if_c<ErrorOrUserDataTraits<UserDataT>::value>::type* = 0)
+    : HasError(true), IsValid(true) {
+    Error = new ErrorHolder<UserDataT>(llvm_move(UD));
+    Error->Error = ErrorOrUserDataTraits<UserDataT>::error();
+    Error->HasUserData = true;
+  }
+
+  ErrorOr(T Val) : HasError(false), IsValid(true) {
+    new (get()) storage_type(moveIfMoveConstructible<storage_type>(Val));
+  }
+
+  ErrorOr(const ErrorOr &Other) : IsValid(false) {
+    copyConstruct(Other);
+  }
+
+  template <class OtherT>
+  ErrorOr(const ErrorOr<OtherT> &Other) : IsValid(false) {
+    copyConstruct(Other);
+  }
+
+  ErrorOr &operator =(const ErrorOr &Other) {
+    copyAssign(Other);
+    return *this;
+  }
+
+  template <class OtherT>
+  ErrorOr &operator =(const ErrorOr<OtherT> &Other) {
+    copyAssign(Other);
+    return *this;
+  }
+
+#if LLVM_HAS_RVALUE_REFERENCES
+  ErrorOr(ErrorOr &&Other) : IsValid(false) {
+    moveConstruct(std::move(Other));
+  }
+
+  template <class OtherT>
+  ErrorOr(ErrorOr<OtherT> &&Other) : IsValid(false) {
+    moveConstruct(std::move(Other));
+  }
+
+  ErrorOr &operator =(ErrorOr &&Other) {
+    moveAssign(std::move(Other));
+    return *this;
+  }
+
+  template <class OtherT>
+  ErrorOr &operator =(ErrorOr<OtherT> &&Other) {
+    moveAssign(std::move(Other));
+    return *this;
+  }
+#endif
+
+  ~ErrorOr() {
+    if (!IsValid)
+      return;
+    if (HasError)
+      Error->release();
+    else
+      get()->~storage_type();
+  }
+
+  template<class ET>
+  ET getError() const {
+    assert(IsValid && "Cannot get the error of a default constructed ErrorOr!");
+    assert(HasError && "Cannot get an error if none exists!");
+    assert(ErrorOrUserDataTraits<ET>::error() == Error->Error &&
+           "Incorrect user error data type for error!");
+    if (!Error->HasUserData)
+      return ET();
+    return reinterpret_cast<const ErrorHolder<ET>*>(Error)->UserData;
+  }
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  /// \brief Return false if there is an error.
+  operator unspecified_bool_type() const {
+    assert(IsValid && "Can't do anything on a default constructed ErrorOr!");
+    return HasError ? 0 : unspecified_bool_true;
+  }
+
+  operator llvm::error_code() const {
+    assert(IsValid && "Can't do anything on a default constructed ErrorOr!");
+    return HasError ? Error->Error : llvm::error_code::success();
+  }
+
+  pointer operator ->() {
+    return toPointer(get());
+  }
+
+  reference operator *() {
+    return *get();
+  }
+
+private:
+  template <class OtherT>
+  void copyConstruct(const ErrorOr<OtherT> &Other) {
+    // Construct an invalid ErrorOr if other is invalid.
+    if (!Other.IsValid)
+      return;
+    IsValid = true;
+    if (!Other.HasError) {
+      // Get the other value.
+      HasError = false;
+      new (get()) storage_type(*Other.get());
+    } else {
+      // Get other's error.
+      Error = Other.Error;
+      HasError = true;
+      Error->aquire();
+    }
+  }
+
+  template <class T1>
+  static bool compareThisIfSameType(const T1 &a, const T1 &b) {
+    return &a == &b;
+  }
+
+  template <class T1, class T2>
+  static bool compareThisIfSameType(const T1 &a, const T2 &b) {
+    return false;
+  }
+
+  template <class OtherT>
+  void copyAssign(const ErrorOr<OtherT> &Other) {
+    if (compareThisIfSameType(*this, Other))
+      return;
+
+    this->~ErrorOr();
+    new (this) ErrorOr(Other);
+  }
+
+#if LLVM_HAS_RVALUE_REFERENCES
+  template <class OtherT>
+  void moveConstruct(ErrorOr<OtherT> &&Other) {
+    // Construct an invalid ErrorOr if other is invalid.
+    if (!Other.IsValid)
+      return;
+    IsValid = true;
+    if (!Other.HasError) {
+      // Get the other value.
+      HasError = false;
+      new (get()) storage_type(std::move(*Other.get()));
+      // Tell other not to do any destruction.
+      Other.IsValid = false;
+    } else {
+      // Get other's error.
+      Error = Other.Error;
+      HasError = true;
+      // Tell other not to do any destruction.
+      Other.IsValid = false;
+    }
+  }
+
+  template <class OtherT>
+  void moveAssign(ErrorOr<OtherT> &&Other) {
+    if (compareThisIfSameType(*this, Other))
+      return;
+
+    this->~ErrorOr();
+    new (this) ErrorOr(std::move(Other));
+  }
+#endif
+
+  pointer toPointer(pointer Val) {
+    return Val;
+  }
+
+  pointer toPointer(wrap *Val) {
+    return &Val->get();
+  }
+
+  storage_type *get() {
+    assert(IsValid && "Can't do anything on a default constructed ErrorOr!");
+    assert(!HasError && "Cannot get value when an error exists!");
+    return reinterpret_cast<storage_type*>(TStorage.buffer);
+  }
+
+  const storage_type *get() const {
+    assert(IsValid && "Can't do anything on a default constructed ErrorOr!");
+    assert(!HasError && "Cannot get value when an error exists!");
+    return reinterpret_cast<const storage_type*>(TStorage.buffer);
+  }
+
+  union {
+    AlignedCharArrayUnion<storage_type> TStorage;
+    ErrorHolderBase *Error;
+  };
+  bool HasError : 1;
+  bool IsValid : 1;
+};
+
+// ErrorOr specialization for void.
+template <>
+class ErrorOr<void> {
+public:
+  ErrorOr() : Error(0, 0) {}
+
+  template <class E>
+  ErrorOr(E ErrorCode, typename enable_if_c<is_error_code_enum<E>::value ||
+                                            is_error_condition_enum<E>::value,
+                                            void *> ::type = 0)
+      : Error(0, 0) {
+    error_code EC = make_error_code(ErrorCode);
+    if (EC == errc::success) {
+      Error.setInt(1);
+      return;
+    }
+    ErrorHolderBase *EHB = new ErrorHolderBase;
+    EHB->Error = EC;
+    EHB->HasUserData = false;
+    Error.setPointer(EHB);
+  }
+
+  ErrorOr(llvm::error_code EC) : Error(0, 0) {
+    if (EC == errc::success) {
+      Error.setInt(1);
+      return;
+    }
+    ErrorHolderBase *E = new ErrorHolderBase;
+    E->Error = EC;
+    E->HasUserData = false;
+    Error.setPointer(E);
+  }
+
+  template<class UserDataT>
+  ErrorOr(UserDataT UD, typename
+          enable_if_c<ErrorOrUserDataTraits<UserDataT>::value>::type* = 0)
+      : Error(0, 0) {
+    ErrorHolderBase *E = new ErrorHolder<UserDataT>(llvm_move(UD));
+    E->Error = ErrorOrUserDataTraits<UserDataT>::error();
+    E->HasUserData = true;
+    Error.setPointer(E);
+  }
+
+  ErrorOr(const ErrorOr &Other) : Error(0, 0) {
+    Error = Other.Error;
+    if (Other.Error.getPointer()->Error) {
+      Error.getPointer()->aquire();
+    }
+  }
+
+  ErrorOr &operator =(const ErrorOr &Other) {
+    if (this == &Other)
+      return *this;
+
+    this->~ErrorOr();
+    new (this) ErrorOr(Other);
+
+    return *this;
+  }
+
+#if LLVM_HAS_RVALUE_REFERENCES
+  ErrorOr(ErrorOr &&Other) : Error(0) {
+    // Get other's error.
+    Error = Other.Error;
+    // Tell other not to do any destruction.
+    Other.Error.setPointer(0);
+  }
+
+  ErrorOr &operator =(ErrorOr &&Other) {
+    if (this == &Other)
+      return *this;
+
+    this->~ErrorOr();
+    new (this) ErrorOr(std::move(Other));
+
+    return *this;
+  }
+#endif
+
+  ~ErrorOr() {
+    if (Error.getPointer())
+      Error.getPointer()->release();
+  }
+
+  template<class ET>
+  ET getError() const {
+    assert(ErrorOrUserDataTraits<ET>::error() == *this &&
+           "Incorrect user error data type for error!");
+    if (!Error.getPointer()->HasUserData)
+      return ET();
+    return reinterpret_cast<const ErrorHolder<ET> *>(
+        Error.getPointer())->UserData;
+  }
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  /// \brief Return false if there is an error.
+  operator unspecified_bool_type() const {
+    return Error.getInt() ? unspecified_bool_true : 0;
+  }
+
+  operator llvm::error_code() const {
+    return Error.getInt() ? make_error_code(errc::success)
+                          : Error.getPointer()->Error;
+  }
+
+private:
+  // If the bit is 1, the error is success.
+  llvm::PointerIntPair<ErrorHolderBase *, 1> Error;
+};
+
+template<class T, class E>
+typename enable_if_c<is_error_code_enum<E>::value ||
+                     is_error_condition_enum<E>::value, bool>::type
+operator ==(ErrorOr<T> &Err, E Code) {
+  return error_code(Err) == Code;
+}
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/FEnv.h b/include/llvm/Support/FEnv.h
index f6f43337bd29..8560ee0a8afe 100644
--- a/include/llvm/Support/FEnv.h
+++ b/include/llvm/Support/FEnv.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_FENV_H
-#define LLVM_SYSTEM_FENV_H
+#ifndef LLVM_SUPPORT_FENV_H
+#define LLVM_SUPPORT_FENV_H
 
 #include "llvm/Config/config.h"
 #include <cerrno>
@@ -32,7 +32,7 @@ namespace sys {
 
 /// llvm_fenv_clearexcept - Clear the floating-point exception state.
 static inline void llvm_fenv_clearexcept() {
-#ifdef HAVE_FENV_H
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
   feclearexcept(FE_ALL_EXCEPT);
 #endif
   errno = 0;
@@ -43,7 +43,7 @@ static inline bool llvm_fenv_testexcept() {
   int errno_val = errno;
   if (errno_val == ERANGE || errno_val == EDOM)
     return true;
-#ifdef HAVE_FENV_H
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
   if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
     return true;
 #endif
diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h
index bcd35e3c1e1b..cbc9c467d23d 100644
--- a/include/llvm/Support/FileOutputBuffer.h
+++ b/include/llvm/Support/FileOutputBuffer.h
@@ -14,85 +14,79 @@
 #ifndef LLVM_SUPPORT_FILEOUTPUTBUFFER_H
 #define LLVM_SUPPORT_FILEOUTPUTBUFFER_H
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/FileSystem.h"
 
 namespace llvm {
-
 class error_code;
-template<class T> class OwningPtr;
 
 /// FileOutputBuffer - This interface provides simple way to create an in-memory
-/// buffer which will be written to a file. During the lifetime of these 
+/// buffer which will be written to a file. During the lifetime of these
 /// objects, the content or existence of the specified file is undefined. That
 /// is, creating an OutputBuffer for a file may immediately remove the file.
-/// If the FileOutputBuffer is committed, the target file's content will become 
-/// the buffer content at the time of the commit.  If the FileOutputBuffer is  
+/// If the FileOutputBuffer is committed, the target file's content will become
+/// the buffer content at the time of the commit.  If the FileOutputBuffer is
 /// not committed, the file will be deleted in the FileOutputBuffer destructor.
 class FileOutputBuffer {
 public:
 
   enum  {
     F_executable = 1  /// set the 'x' bit on the resulting file
-  }; 
+  };
 
   /// Factory method to create an OutputBuffer object which manages a read/write
   /// buffer of the specified size. When committed, the buffer will be written
-  /// to the file at the specified path.  
-  static error_code create(StringRef FilePath, size_t Size, 
-                           OwningPtr<FileOutputBuffer> &Result, 
-                           unsigned Flags=0);
-  
+  /// to the file at the specified path.
+  static error_code create(StringRef FilePath, size_t Size,
+                           OwningPtr<FileOutputBuffer> &Result,
+                           unsigned Flags = 0);
 
   /// Returns a pointer to the start of the buffer.
-  uint8_t *getBufferStart() const {
-    return BufferStart;
+  uint8_t *getBufferStart() {
+    return (uint8_t*)Region->data();
   }
-  
+
   /// Returns a pointer to the end of the buffer.
-  uint8_t *getBufferEnd() const {
-    return BufferEnd;
+  uint8_t *getBufferEnd() {
+    return (uint8_t*)Region->data() + Region->size();
   }
-  
+
   /// Returns size of the buffer.
   size_t getBufferSize() const {
-    return BufferEnd - BufferStart;
+    return Region->size();
   }
-  
+
   /// Returns path where file will show up if buffer is committed.
   StringRef getPath() const {
     return FinalPath;
   }
-    
-  /// Flushes the content of the buffer to its file and deallocates the 
+
+  /// Flushes the content of the buffer to its file and deallocates the
   /// buffer.  If commit() is not called before this object's destructor
   /// is called, the file is deleted in the destructor. The optional parameter
   /// is used if it turns out you want the file size to be smaller than
   /// initially requested.
   error_code commit(int64_t NewSmallerSize = -1);
-  
+
   /// If this object was previously committed, the destructor just deletes
   /// this object.  If this object was not committed, the destructor
   /// deallocates the buffer and the target file is never written.
   ~FileOutputBuffer();
 
-  
 private:
   FileOutputBuffer(const FileOutputBuffer &) LLVM_DELETED_FUNCTION;
   FileOutputBuffer &operator=(const FileOutputBuffer &) LLVM_DELETED_FUNCTION;
-protected:
-  FileOutputBuffer(uint8_t *Start, uint8_t *End,
-                    StringRef Path, StringRef TempPath);
-    
-  uint8_t            *BufferStart;
-  uint8_t            *BufferEnd;
+
+  FileOutputBuffer(llvm::sys::fs::mapped_file_region *R,
+                   StringRef Path, StringRef TempPath);
+
+  OwningPtr<llvm::sys::fs::mapped_file_region> Region;
   SmallString<128>    FinalPath;
   SmallString<128>    TempPath;
 };
-
-
-
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index b455b28b819a..ffa642787b0b 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -24,8 +24,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_FILE_SYSTEM_H
-#define LLVM_SUPPORT_FILE_SYSTEM_H
+#ifndef LLVM_SUPPORT_FILESYSTEM_H
+#define LLVM_SUPPORT_FILESYSTEM_H
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -602,12 +602,12 @@ private:
   void *FileMappingHandle;
 #endif
 
-  error_code init(int FD, uint64_t Offset);
+  error_code init(int FD, bool CloseFD, uint64_t Offset);
 
 public:
   typedef char char_type;
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   mapped_file_region(mapped_file_region&&);
   mapped_file_region &operator =(mapped_file_region&&);
 #endif
@@ -633,8 +633,10 @@ public:
                      error_code &ec);
 
   /// \param fd An open file descriptor to map. mapped_file_region takes
-  ///           ownership. It must have been opended in the correct mode.
+  ///   ownership if closefd is true. It must have been opended in the correct
+  ///   mode.
   mapped_file_region(int fd,
+                     bool closefd,
                      mapmode mode,
                      uint64_t length,
                      uint64_t offset,
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 21635dcfb688..2e4bd5aeca2d 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -17,125 +17,125 @@
 
 #include "llvm/Support/raw_ostream.h"
 
-namespace llvm 
-{
-  /// formatted_raw_ostream - Formatted raw_fd_ostream to handle
-  /// asm-specific constructs.
+namespace llvm {
+
+/// formatted_raw_ostream - A raw_ostream that wraps another one and keeps track
+/// of column position, allowing padding out to specific column boundaries.
+///
+class formatted_raw_ostream : public raw_ostream {
+public:
+  /// DELETE_STREAM - Tell the destructor to delete the held stream.
   ///
-  class formatted_raw_ostream : public raw_ostream {
-  public:
-    /// DELETE_STREAM - Tell the destructor to delete the held stream.
-    ///
-    static const bool DELETE_STREAM = true;
-
-    /// PRESERVE_STREAM - Tell the destructor to not delete the held
-    /// stream.
-    ///
-    static const bool PRESERVE_STREAM = false;
-
-  private:
-    /// TheStream - The real stream we output to. We set it to be
-    /// unbuffered, since we're already doing our own buffering.
-    ///
-    raw_ostream *TheStream;
-
-    /// DeleteStream - Do we need to delete TheStream in the
-    /// destructor?
-    ///
-    bool DeleteStream;
-
-    /// ColumnScanned - The current output column of the data that's
-    /// been flushed and the portion of the buffer that's been
-    /// scanned.  The column scheme is zero-based.
-    ///
-    unsigned ColumnScanned;
-
-    /// Scanned - This points to one past the last character in the
-    /// buffer we've scanned.
-    ///
-    const char *Scanned;
-
-    virtual void write_impl(const char *Ptr, size_t Size) LLVM_OVERRIDE;
-
-    /// current_pos - Return the current position within the stream,
-    /// not counting the bytes currently in the buffer.
-    virtual uint64_t current_pos() const LLVM_OVERRIDE {
-      // Our current position in the stream is all the contents which have been
-      // written to the underlying stream (*not* the current position of the
-      // underlying stream).
-      return TheStream->tell();
-    }
-
-    /// ComputeColumn - Examine the given output buffer and figure out which
-    /// column we end up in after output.
-    ///
-    void ComputeColumn(const char *Ptr, size_t size);
-
-  public:
-    /// formatted_raw_ostream - Open the specified file for
-    /// writing. If an error occurs, information about the error is
-    /// put into ErrorInfo, and the stream should be immediately
-    /// destroyed; the string will be empty if no error occurred.
-    ///
-    /// As a side effect, the given Stream is set to be Unbuffered.
-    /// This is because formatted_raw_ostream does its own buffering,
-    /// so it doesn't want another layer of buffering to be happening
-    /// underneath it.
-    ///
-    formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
-      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
-      setStream(Stream, Delete);
-    }
-    explicit formatted_raw_ostream()
-      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
-      Scanned = 0;
-    }
-
-    ~formatted_raw_ostream() {
-      flush();
-      releaseStream();
-    }
-
-    void setStream(raw_ostream &Stream, bool Delete = false) {
-      releaseStream();
-
-      TheStream = &Stream;
-      DeleteStream = Delete;
-
-      // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
-      // own buffering, and it doesn't need or want TheStream to do another
-      // layer of buffering underneath. Resize the buffer to what TheStream
-      // had been using, and tell TheStream not to do its own buffering.
-      if (size_t BufferSize = TheStream->GetBufferSize())
-        SetBufferSize(BufferSize);
-      else
-        SetUnbuffered();
-      TheStream->SetUnbuffered();
+  static const bool DELETE_STREAM = true;
+
+  /// PRESERVE_STREAM - Tell the destructor to not delete the held
+  /// stream.
+  ///
+  static const bool PRESERVE_STREAM = false;
+
+private:
+  /// TheStream - The real stream we output to. We set it to be
+  /// unbuffered, since we're already doing our own buffering.
+  ///
+  raw_ostream *TheStream;
 
-      Scanned = 0;
-    }
-
-    /// PadToColumn - Align the output to some column number.  If the current
-    /// column is already equal to or more than NewCol, PadToColumn inserts one
-    /// space.
-    ///
-    /// \param NewCol - The column to move to.
-    formatted_raw_ostream &PadToColumn(unsigned NewCol);
-
-  private:
-    void releaseStream() {
-      // Delete the stream if needed. Otherwise, transfer the buffer
-      // settings from this raw_ostream back to the underlying stream.
-      if (!TheStream)
-        return;
-      if (DeleteStream)
-        delete TheStream;
-      else if (size_t BufferSize = GetBufferSize())
-        TheStream->SetBufferSize(BufferSize);
-      else
-        TheStream->SetUnbuffered();
-    }
-  };
+  /// DeleteStream - Do we need to delete TheStream in the
+  /// destructor?
+  ///
+  bool DeleteStream;
+
+  /// ColumnScanned - The current output column of the data that's
+  /// been flushed and the portion of the buffer that's been
+  /// scanned.  The column scheme is zero-based.
+  ///
+  unsigned ColumnScanned;
+
+  /// Scanned - This points to one past the last character in the
+  /// buffer we've scanned.
+  ///
+  const char *Scanned;
+
+  virtual void write_impl(const char *Ptr, size_t Size) LLVM_OVERRIDE;
+
+  /// current_pos - Return the current position within the stream,
+  /// not counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const LLVM_OVERRIDE {
+    // Our current position in the stream is all the contents which have been
+    // written to the underlying stream (*not* the current position of the
+    // underlying stream).
+    return TheStream->tell();
+  }
+
+  /// ComputeColumn - Examine the given output buffer and figure out which
+  /// column we end up in after output.
+  ///
+  void ComputeColumn(const char *Ptr, size_t size);
+
+public:
+  /// formatted_raw_ostream - Open the specified file for
+  /// writing. If an error occurs, information about the error is
+  /// put into ErrorInfo, and the stream should be immediately
+  /// destroyed; the string will be empty if no error occurred.
+  ///
+  /// As a side effect, the given Stream is set to be Unbuffered.
+  /// This is because formatted_raw_ostream does its own buffering,
+  /// so it doesn't want another layer of buffering to be happening
+  /// underneath it.
+  ///
+  formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
+    : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+    setStream(Stream, Delete);
+  }
+  explicit formatted_raw_ostream()
+    : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+    Scanned = 0;
+  }
+
+  ~formatted_raw_ostream() {
+    flush();
+    releaseStream();
+  }
+
+  void setStream(raw_ostream &Stream, bool Delete = false) {
+    releaseStream();
+
+    TheStream = &Stream;
+    DeleteStream = Delete;
+
+    // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
+    // own buffering, and it doesn't need or want TheStream to do another
+    // layer of buffering underneath. Resize the buffer to what TheStream
+    // had been using, and tell TheStream not to do its own buffering.
+    if (size_t BufferSize = TheStream->GetBufferSize())
+      SetBufferSize(BufferSize);
+    else
+      SetUnbuffered();
+    TheStream->SetUnbuffered();
+
+    Scanned = 0;
+  }
+
+  /// PadToColumn - Align the output to some column number.  If the current
+  /// column is already equal to or more than NewCol, PadToColumn inserts one
+  /// space.
+  ///
+  /// \param NewCol - The column to move to.
+  formatted_raw_ostream &PadToColumn(unsigned NewCol);
+
+private:
+  void releaseStream() {
+    // Delete the stream if needed. Otherwise, transfer the buffer
+    // settings from this raw_ostream back to the underlying stream.
+    if (!TheStream)
+      return;
+    if (DeleteStream)
+      delete TheStream;
+    else if (size_t BufferSize = GetBufferSize())
+      TheStream->SetBufferSize(BufferSize);
+    else
+      TheStream->SetUnbuffered();
+  }
+};
 
 /// fouts() - This returns a reference to a formatted_raw_ostream for
 /// standard output.  Use it like: fouts() << "foo" << "bar";
diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h
index e552315f4558..f1040f545c93 100644
--- a/include/llvm/Support/GCOV.h
+++ b/include/llvm/Support/GCOV.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_GCOV_H
-#define LLVM_GCOV_H
+#ifndef LLVM_SUPPORT_GCOV_H
+#define LLVM_SUPPORT_GCOV_H
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
diff --git a/include/llvm/Support/GetElementPtrTypeIterator.h b/include/llvm/Support/GetElementPtrTypeIterator.h
index ef92c95ee7e0..5a90553a0074 100644
--- a/include/llvm/Support/GetElementPtrTypeIterator.h
+++ b/include/llvm/Support/GetElementPtrTypeIterator.h
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_GETELEMENTPTRTYPE_H
-#define LLVM_SUPPORT_GETELEMENTPTRTYPE_H
+#ifndef LLVM_SUPPORT_GETELEMENTPTRTYPEITERATOR_H
+#define LLVM_SUPPORT_GETELEMENTPTRTYPEITERATOR_H
 
-#include "llvm/User.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/User.h"
 
 namespace llvm {
   template<typename ItTy = User::const_op_iterator>
@@ -83,15 +83,15 @@ namespace llvm {
   typedef generic_gep_type_iterator<> gep_type_iterator;
 
   inline gep_type_iterator gep_type_begin(const User *GEP) {
-    return gep_type_iterator::begin(GEP->getOperand(0)->getType(),
-                                    GEP->op_begin()+1);
+    return gep_type_iterator::begin
+      (GEP->getOperand(0)->getType()->getScalarType(), GEP->op_begin()+1);
   }
   inline gep_type_iterator gep_type_end(const User *GEP) {
     return gep_type_iterator::end(GEP->op_end());
   }
   inline gep_type_iterator gep_type_begin(const User &GEP) {
-    return gep_type_iterator::begin(GEP.getOperand(0)->getType(),
-                                    GEP.op_begin()+1);
+    return gep_type_iterator::begin
+      (GEP.getOperand(0)->getType()->getScalarType(), GEP.op_begin()+1);
   }
   inline gep_type_iterator gep_type_end(const User &GEP) {
     return gep_type_iterator::end(GEP.op_end());
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index f178b0caa8aa..22181d4e1d46 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -23,17 +23,21 @@
 #ifndef LLVM_SUPPORT_GRAPHWRITER_H
 #define LLVM_SUPPORT_GRAPHWRITER_H
 
-#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/Path.h"
-#include <vector>
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
+#include <vector>
 
 namespace llvm {
 
 namespace DOT {  // Private functions...
   std::string EscapeString(const std::string &Label);
+
+  /// \brief Get a color string for this node number. Simply round-robin selects
+  /// from a reasonable number of colors.
+  StringRef getColorString(unsigned NodeNumber);
 }
 
 namespace GraphProgram {
@@ -173,6 +177,10 @@ public:
       // If we should include the address of the node in the label, do so now.
       if (DTraits.hasNodeAddressLabel(Node, G))
         O << "|" << static_cast<const void*>(Node);
+
+      std::string NodeDesc = DTraits.getNodeDescription(Node, G);
+      if (!NodeDesc.empty())
+        O << "|" << DOT::EscapeString(NodeDesc);
     }
 
     std::string edgeSourceLabels;
@@ -193,6 +201,10 @@ public:
       // If we should include the address of the node in the label, do so now.
       if (DTraits.hasNodeAddressLabel(Node, G))
         O << "|" << static_cast<const void*>(Node);
+
+      std::string NodeDesc = DTraits.getNodeDescription(Node, G);
+      if (!NodeDesc.empty())
+        O << "|" << DOT::EscapeString(NodeDesc);
     }
 
     if (DTraits.hasEdgeDestLabels()) {
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index b33101632268..3a4440573910 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_HOST_H
-#define LLVM_SYSTEM_HOST_H
+#ifndef LLVM_SUPPORT_HOST_H
+#define LLVM_SUPPORT_HOST_H
 
 #include "llvm/ADT/StringMap.h"
 #include <string>
@@ -42,6 +42,10 @@ namespace sys {
   ///   CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM
   std::string getDefaultTargetTriple();
 
+  /// getProcessTriple() - Return an appropriate target triple for generating
+  /// code to be loaded into the current process, e.g. when using the JIT.
+  std::string getProcessTriple();
+
   /// getHostCPUName - Get the LLVM name for the host CPU. The particular format
   /// of the name is target dependent, and suitable for passing as -mcpu to the
   /// target which matches the host.
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
deleted file mode 100644
index 6d8a9b30ae1f..000000000000
--- a/include/llvm/Support/IRReader.h
+++ /dev/null
@@ -1,112 +0,0 @@
-//===---- llvm/Support/IRReader.h - Reader for LLVM IR files ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines functions for reading LLVM IR. They support both
-// Bitcode and Assembly, automatically detecting the input format.
-//
-// These functions must be defined in a header file in order to avoid
-// library dependencies, since they reference both Bitcode and Assembly
-// functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_IRREADER_H
-#define LLVM_SUPPORT_IRREADER_H
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Assembly/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/system_error.h"
-
-namespace llvm {
-
-  /// If the given MemoryBuffer holds a bitcode image, return a Module for it
-  /// which does lazy deserialization of function bodies.  Otherwise, attempt to
-  /// parse it as LLVM Assembly and return a fully populated Module. This
-  /// function *always* takes ownership of the given MemoryBuffer.
-  inline Module *getLazyIRModule(MemoryBuffer *Buffer,
-                                 SMDiagnostic &Err,
-                                 LLVMContext &Context) {
-    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
-                  (const unsigned char *)Buffer->getBufferEnd())) {
-      std::string ErrMsg;
-      Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
-      if (M == 0) {
-        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                           ErrMsg);
-        // ParseBitcodeFile does not take ownership of the Buffer in the
-        // case of an error.
-        delete Buffer;
-      }
-      return M;
-    }
-
-    return ParseAssembly(Buffer, 0, Err, Context);
-  }
-
-  /// If the given file holds a bitcode image, return a Module
-  /// for it which does lazy deserialization of function bodies.  Otherwise,
-  /// attempt to parse it as LLVM Assembly and return a fully populated
-  /// Module.
-  inline Module *getLazyIRFileModule(const std::string &Filename,
-                                     SMDiagnostic &Err,
-                                     LLVMContext &Context) {
-    OwningPtr<MemoryBuffer> File;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-      Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
-                         "Could not open input file: " + ec.message());
-      return 0;
-    }
-
-    return getLazyIRModule(File.take(), Err, Context);
-  }
-
-  /// If the given MemoryBuffer holds a bitcode image, return a Module
-  /// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
-  /// a Module for it. This function *always* takes ownership of the given
-  /// MemoryBuffer.
-  inline Module *ParseIR(MemoryBuffer *Buffer,
-                         SMDiagnostic &Err,
-                         LLVMContext &Context) {
-    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
-                  (const unsigned char *)Buffer->getBufferEnd())) {
-      std::string ErrMsg;
-      Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
-      if (M == 0)
-        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                           ErrMsg);
-      // ParseBitcodeFile does not take ownership of the Buffer.
-      delete Buffer;
-      return M;
-    }
-
-    return ParseAssembly(Buffer, 0, Err, Context);
-  }
-
-  /// If the given file holds a bitcode image, return a Module for it.
-  /// Otherwise, attempt to parse it as LLVM Assembly and return a Module
-  /// for it.
-  inline Module *ParseIRFile(const std::string &Filename,
-                             SMDiagnostic &Err,
-                             LLVMContext &Context) {
-    OwningPtr<MemoryBuffer> File;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-      Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
-                         "Could not open input file: " + ec.message());
-      return 0;
-    }
-
-    return ParseIR(File.take(), Err, Context);
-  }
-
-}
-
-#endif
diff --git a/include/llvm/Support/IncludeFile.h b/include/llvm/Support/IncludeFile.h
index a9319725d477..2067e34f0d71 100644
--- a/include/llvm/Support/IncludeFile.h
+++ b/include/llvm/Support/IncludeFile.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_INCLUDEFILE_H
-#define LLVM_SYSTEM_INCLUDEFILE_H
+#ifndef LLVM_SUPPORT_INCLUDEFILE_H
+#define LLVM_SUPPORT_INCLUDEFILE_H
 
 /// This macro is the public interface that IncludeFile.h exports. This gives
 /// us the option to implement the "link the definition" capability in any
diff --git a/include/llvm/Support/InstIterator.h b/include/llvm/Support/InstIterator.h
index 7d3f8835098e..ac936a11a62d 100644
--- a/include/llvm/Support/InstIterator.h
+++ b/include/llvm/Support/InstIterator.h
@@ -19,8 +19,8 @@
 #ifndef LLVM_SUPPORT_INSTITERATOR_H
 #define LLVM_SUPPORT_INSTITERATOR_H
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Function.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
deleted file mode 100644
index 6dfb4dec0e23..000000000000
--- a/include/llvm/Support/InstVisitor.h
+++ /dev/null
@@ -1,288 +0,0 @@
-//===- llvm/Support/InstVisitor.h - Define instruction visitors -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_SUPPORT_INSTVISITOR_H
-#define LLVM_SUPPORT_INSTVISITOR_H
-
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-// We operate on opaque instruction classes, so forward declare all instruction
-// types now...
-//
-#define HANDLE_INST(NUM, OPCODE, CLASS)   class CLASS;
-#include "llvm/Instruction.def"
-
-#define DELEGATE(CLASS_TO_VISIT) \
-  return static_cast<SubClass*>(this)-> \
-               visit##CLASS_TO_VISIT(static_cast<CLASS_TO_VISIT&>(I))
-
-
-/// @brief Base class for instruction visitors
-///
-/// Instruction visitors are used when you want to perform different actions
-/// for different kinds of instructions without having to use lots of casts
-/// and a big switch statement (in your code, that is).
-///
-/// To define your own visitor, inherit from this class, specifying your
-/// new type for the 'SubClass' template parameter, and "override" visitXXX
-/// functions in your class. I say "override" because this class is defined
-/// in terms of statically resolved overloading, not virtual functions.
-///
-/// For example, here is a visitor that counts the number of malloc
-/// instructions processed:
-///
-///  /// Declare the class.  Note that we derive from InstVisitor instantiated
-///  /// with _our new subclasses_ type.
-///  ///
-///  struct CountAllocaVisitor : public InstVisitor<CountAllocaVisitor> {
-///    unsigned Count;
-///    CountAllocaVisitor() : Count(0) {}
-///
-///    void visitAllocaInst(AllocaInst &AI) { ++Count; }
-///  };
-///
-///  And this class would be used like this:
-///    CountAllocaVisitor CAV;
-///    CAV.visit(function);
-///    NumAllocas = CAV.Count;
-///
-/// The defined has 'visit' methods for Instruction, and also for BasicBlock,
-/// Function, and Module, which recursively process all contained instructions.
-///
-/// Note that if you don't implement visitXXX for some instruction type,
-/// the visitXXX method for instruction superclass will be invoked. So
-/// if instructions are added in the future, they will be automatically
-/// supported, if you handle one of their superclasses.
-///
-/// The optional second template argument specifies the type that instruction
-/// visitation functions should return. If you specify this, you *MUST* provide
-/// an implementation of visitInstruction though!.
-///
-/// Note that this class is specifically designed as a template to avoid
-/// virtual function call overhead.  Defining and using an InstVisitor is just
-/// as efficient as having your own switch statement over the instruction
-/// opcode.
-template<typename SubClass, typename RetTy=void>
-class InstVisitor {
-  //===--------------------------------------------------------------------===//
-  // Interface code - This is the public interface of the InstVisitor that you
-  // use to visit instructions...
-  //
-
-public:
-  // Generic visit method - Allow visitation to all instructions in a range
-  template<class Iterator>
-  void visit(Iterator Start, Iterator End) {
-    while (Start != End)
-      static_cast<SubClass*>(this)->visit(*Start++);
-  }
-
-  // Define visitors for functions and basic blocks...
-  //
-  void visit(Module &M) {
-    static_cast<SubClass*>(this)->visitModule(M);
-    visit(M.begin(), M.end());
-  }
-  void visit(Function &F) {
-    static_cast<SubClass*>(this)->visitFunction(F);
-    visit(F.begin(), F.end());
-  }
-  void visit(BasicBlock &BB) {
-    static_cast<SubClass*>(this)->visitBasicBlock(BB);
-    visit(BB.begin(), BB.end());
-  }
-
-  // Forwarding functions so that the user can visit with pointers AND refs.
-  void visit(Module       *M)  { visit(*M); }
-  void visit(Function     *F)  { visit(*F); }
-  void visit(BasicBlock   *BB) { visit(*BB); }
-  RetTy visit(Instruction *I)  { return visit(*I); }
-
-  // visit - Finally, code to visit an instruction...
-  //
-  RetTy visit(Instruction &I) {
-    switch (I.getOpcode()) {
-    default: llvm_unreachable("Unknown instruction type encountered!");
-      // Build the switch statement using the Instruction.def file...
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
-    case Instruction::OPCODE: return \
-           static_cast<SubClass*>(this)-> \
-                      visit##OPCODE(static_cast<CLASS&>(I));
-#include "llvm/Instruction.def"
-    }
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Visitation functions... these functions provide default fallbacks in case
-  // the user does not specify what to do for a particular instruction type.
-  // The default behavior is to generalize the instruction type to its subtype
-  // and try visiting the subtype.  All of this should be inlined perfectly,
-  // because there are no virtual functions to get in the way.
-  //
-
-  // When visiting a module, function or basic block directly, these methods get
-  // called to indicate when transitioning into a new unit.
-  //
-  void visitModule    (Module &M) {}
-  void visitFunction  (Function &F) {}
-  void visitBasicBlock(BasicBlock &BB) {}
-
-  // Define instruction specific visitor functions that can be overridden to
-  // handle SPECIFIC instructions.  These functions automatically define
-  // visitMul to proxy to visitBinaryOperator for instance in case the user does
-  // not need this generality.
-  //
-  // These functions can also implement fan-out, when a single opcode and
-  // instruction have multiple more specific Instruction subclasses. The Call
-  // instruction currently supports this. We implement that by redirecting that
-  // instruction to a special delegation helper.
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
-    RetTy visit##OPCODE(CLASS &I) { \
-      if (NUM == Instruction::Call) \
-        return delegateCallInst(I); \
-      else \
-        DELEGATE(CLASS); \
-    }
-#include "llvm/Instruction.def"
-
-  // Specific Instruction type classes... note that all of the casts are
-  // necessary because we use the instruction classes as opaque types...
-  //
-  RetTy visitReturnInst(ReturnInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitBranchInst(BranchInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitSwitchInst(SwitchInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitIndirectBrInst(IndirectBrInst &I)    { DELEGATE(TerminatorInst);}
-  RetTy visitResumeInst(ResumeInst &I)            { DELEGATE(TerminatorInst);}
-  RetTy visitUnreachableInst(UnreachableInst &I)  { DELEGATE(TerminatorInst);}
-  RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
-  RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
-  RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
-  RetTy visitLoadInst(LoadInst     &I)            { DELEGATE(UnaryInstruction);}
-  RetTy visitStoreInst(StoreInst   &I)            { DELEGATE(Instruction);}
-  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);}
-  RetTy visitAtomicRMWInst(AtomicRMWInst &I)      { DELEGATE(Instruction);}
-  RetTy visitFenceInst(FenceInst   &I)            { DELEGATE(Instruction);}
-  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction);}
-  RetTy visitPHINode(PHINode       &I)            { DELEGATE(Instruction);}
-  RetTy visitTruncInst(TruncInst &I)              { DELEGATE(CastInst);}
-  RetTy visitZExtInst(ZExtInst &I)                { DELEGATE(CastInst);}
-  RetTy visitSExtInst(SExtInst &I)                { DELEGATE(CastInst);}
-  RetTy visitFPTruncInst(FPTruncInst &I)          { DELEGATE(CastInst);}
-  RetTy visitFPExtInst(FPExtInst &I)              { DELEGATE(CastInst);}
-  RetTy visitFPToUIInst(FPToUIInst &I)            { DELEGATE(CastInst);}
-  RetTy visitFPToSIInst(FPToSIInst &I)            { DELEGATE(CastInst);}
-  RetTy visitUIToFPInst(UIToFPInst &I)            { DELEGATE(CastInst);}
-  RetTy visitSIToFPInst(SIToFPInst &I)            { DELEGATE(CastInst);}
-  RetTy visitPtrToIntInst(PtrToIntInst &I)        { DELEGATE(CastInst);}
-  RetTy visitIntToPtrInst(IntToPtrInst &I)        { DELEGATE(CastInst);}
-  RetTy visitBitCastInst(BitCastInst &I)          { DELEGATE(CastInst);}
-  RetTy visitSelectInst(SelectInst &I)            { DELEGATE(Instruction);}
-  RetTy visitVAArgInst(VAArgInst   &I)            { DELEGATE(UnaryInstruction);}
-  RetTy visitExtractElementInst(ExtractElementInst &I) { DELEGATE(Instruction);}
-  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction);}
-  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction);}
-  RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);}
-  RetTy visitInsertValueInst(InsertValueInst &I)  { DELEGATE(Instruction); }
-  RetTy visitLandingPadInst(LandingPadInst &I)    { DELEGATE(Instruction); }
-
-  // Handle the special instrinsic instruction classes.
-  RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgInfoIntrinsic);}
-  RetTy visitDbgValueInst(DbgValueInst &I)        { DELEGATE(DbgInfoIntrinsic);}
-  RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { DELEGATE(IntrinsicInst); }
-  RetTy visitMemSetInst(MemSetInst &I)            { DELEGATE(MemIntrinsic); }
-  RetTy visitMemCpyInst(MemCpyInst &I)            { DELEGATE(MemTransferInst); }
-  RetTy visitMemMoveInst(MemMoveInst &I)          { DELEGATE(MemTransferInst); }
-  RetTy visitMemTransferInst(MemTransferInst &I)  { DELEGATE(MemIntrinsic); }
-  RetTy visitMemIntrinsic(MemIntrinsic &I)        { DELEGATE(IntrinsicInst); }
-  RetTy visitVAStartInst(VAStartInst &I)          { DELEGATE(IntrinsicInst); }
-  RetTy visitVAEndInst(VAEndInst &I)              { DELEGATE(IntrinsicInst); }
-  RetTy visitVACopyInst(VACopyInst &I)            { DELEGATE(IntrinsicInst); }
-  RetTy visitIntrinsicInst(IntrinsicInst &I)      { DELEGATE(CallInst); }
-
-  // Call and Invoke are slightly different as they delegate first through
-  // a generic CallSite visitor.
-  RetTy visitCallInst(CallInst &I) {
-    return static_cast<SubClass*>(this)->visitCallSite(&I);
-  }
-  RetTy visitInvokeInst(InvokeInst &I) {
-    return static_cast<SubClass*>(this)->visitCallSite(&I);
-  }
-
-  // Next level propagators: If the user does not overload a specific
-  // instruction type, they can overload one of these to get the whole class
-  // of instructions...
-  //
-  RetTy visitCastInst(CastInst &I)                { DELEGATE(UnaryInstruction);}
-  RetTy visitBinaryOperator(BinaryOperator &I)    { DELEGATE(Instruction);}
-  RetTy visitCmpInst(CmpInst &I)                  { DELEGATE(Instruction);}
-  RetTy visitTerminatorInst(TerminatorInst &I)    { DELEGATE(Instruction);}
-  RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);}
-
-  // Provide a special visitor for a 'callsite' that visits both calls and
-  // invokes. When unimplemented, properly delegates to either the terminator or
-  // regular instruction visitor.
-  RetTy visitCallSite(CallSite CS) {
-    assert(CS);
-    Instruction &I = *CS.getInstruction();
-    if (CS.isCall())
-      DELEGATE(Instruction);
-
-    assert(CS.isInvoke());
-    DELEGATE(TerminatorInst);
-  }
-
-  // If the user wants a 'default' case, they can choose to override this
-  // function.  If this function is not overloaded in the user's subclass, then
-  // this instruction just gets ignored.
-  //
-  // Note that you MUST override this function if your return type is not void.
-  //
-  void visitInstruction(Instruction &I) {}  // Ignore unhandled instructions
-
-private:
-  // Special helper function to delegate to CallInst subclass visitors.
-  RetTy delegateCallInst(CallInst &I) {
-    if (const Function *F = I.getCalledFunction()) {
-      switch ((Intrinsic::ID)F->getIntrinsicID()) {
-      default:                     DELEGATE(IntrinsicInst);
-      case Intrinsic::dbg_declare: DELEGATE(DbgDeclareInst);
-      case Intrinsic::dbg_value:   DELEGATE(DbgValueInst);
-      case Intrinsic::memcpy:      DELEGATE(MemCpyInst);
-      case Intrinsic::memmove:     DELEGATE(MemMoveInst);
-      case Intrinsic::memset:      DELEGATE(MemSetInst);
-      case Intrinsic::vastart:     DELEGATE(VAStartInst);
-      case Intrinsic::vaend:       DELEGATE(VAEndInst);
-      case Intrinsic::vacopy:      DELEGATE(VACopyInst);
-      case Intrinsic::not_intrinsic: break;
-      }
-    }
-    DELEGATE(CallInst);
-  }
-
-  // An overload that will never actually be called, it is used only from dead
-  // code in the dispatching from opcodes to instruction subclasses.
-  RetTy delegateCallInst(Instruction &I) {
-    llvm_unreachable("delegateCallInst called for non-CallInst");
-  }
-};
-
-#undef DELEGATE
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Support/IntegersSubset.h b/include/llvm/Support/IntegersSubset.h
index 03039fd6459f..ce34d785d593 100644
--- a/include/llvm/Support/IntegersSubset.h
+++ b/include/llvm/Support/IntegersSubset.h
@@ -15,15 +15,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef CONSTANTRANGESSET_H_
-#define CONSTANTRANGESSET_H_
+#ifndef LLVM_SUPPORT_INTEGERSSUBSET_H
+#define LLVM_SUPPORT_INTEGERSSUBSET_H
 
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
 #include <list>
 
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-
 namespace llvm {
 
   // The IntItem is a wrapper for APInt.
@@ -538,4 +537,4 @@ public:
 
 }
 
-#endif /* CONSTANTRANGESSET_H_ */
+#endif /* CLLVM_SUPPORT_INTEGERSSUBSET_H */
diff --git a/include/llvm/Support/IntegersSubsetMapping.h b/include/llvm/Support/IntegersSubsetMapping.h
index 7635d5e91221..641ce78c5d87 100644
--- a/include/llvm/Support/IntegersSubsetMapping.h
+++ b/include/llvm/Support/IntegersSubsetMapping.h
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef CRSBUILDER_H_
-#define CRSBUILDER_H_
+#ifndef LLVM_SUPPORT_INTEGERSSUBSETMAPPING_H
+#define LLVM_SUPPORT_INTEGERSSUBSETMAPPING_H
 
 #include "llvm/Support/IntegersSubset.h"
 #include <list>
@@ -585,4 +585,4 @@ typedef IntegersSubsetMapping<BasicBlock> IntegersSubsetToBB;
 
 }
 
-#endif /* CRSBUILDER_H_ */
+#endif /* LLVM_SUPPORT_INTEGERSSUBSETMAPPING_CRSBUILDER_H */
diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h
index b52e5bc9ad33..802b4f354a5a 100644
--- a/include/llvm/Support/LEB128.h
+++ b/include/llvm/Support/LEB128.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_LEB128_H
-#define LLVM_SYSTEM_LEB128_H
+#ifndef LLVM_SUPPORT_LEB128_H
+#define LLVM_SUPPORT_LEB128_H
 
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/include/llvm/Support/Locale.h b/include/llvm/Support/Locale.h
index b0f12958029f..b384d58baea7 100644
--- a/include/llvm/Support/Locale.h
+++ b/include/llvm/Support/Locale.h
@@ -1,5 +1,5 @@
-#ifndef LLVM_SUPPORT_LOCALE
-#define LLVM_SUPPORT_LOCALE
+#ifndef LLVM_SUPPORT_LOCALE_H
+#define LLVM_SUPPORT_LOCALE_H
 
 #include "llvm/ADT/StringRef.h"
 
@@ -14,4 +14,4 @@ bool isPrint(int c);
 }
 }
 
-#endif // LLVM_SUPPORT_LOCALE
+#endif // LLVM_SUPPORT_LOCALE_H
diff --git a/include/llvm/Support/LockFileManager.h b/include/llvm/Support/LockFileManager.h
index 8c4a760291b8..9df8675ef0af 100644
--- a/include/llvm/Support/LockFileManager.h
+++ b/include/llvm/Support/LockFileManager.h
@@ -41,6 +41,7 @@ public:
   };
 
 private:
+  SmallString<128> FileName;
   SmallString<128> LockFileName;
   SmallString<128> UniqueLockFileName;
 
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 11f9e63c9bbc..d6ae58dc457c 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -16,6 +16,10 @@
 
 #include "llvm/Support/SwapByteOrder.h"
 
+#ifdef _MSC_VER
+# include <intrin.h>
+#endif
+
 namespace llvm {
 
 // NOTE: The following support functions use the _32/_64 extensions instead of
@@ -61,7 +65,7 @@ inline bool isShiftedInt(int64_t x) {
 /// isUInt - Checks if an unsigned integer fits into the given bit width.
 template<unsigned N>
 inline bool isUInt(uint64_t x) {
-  return N >= 64 || x < (UINT64_C(1)<<N);
+  return N >= 64 || x < (UINT64_C(1)<<(N));
 }
 // Template specializations to get better code for common cases.
 template<>
@@ -254,7 +258,10 @@ inline unsigned CountTrailingZeros_32(uint32_t Value) {
     4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
     5, 20, 8, 19, 18
   };
-  return Mod37BitPosition[(-Value & Value) % 37];
+  // Replace "-Value" by "1+~Value" in the following commented code to avoid 
+  // MSVC warning C4146
+  //    return Mod37BitPosition[(-Value & Value) % 37];
+  return Mod37BitPosition[((1 + ~Value) & Value) % 37];
 #endif
 }
 
@@ -281,7 +288,10 @@ inline unsigned CountTrailingZeros_64(uint64_t Value) {
     29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
     7, 48, 35, 6, 34, 33, 0
   };
-  return Mod67Position[(-Value & Value) % 67];
+  // Replace "-Value" by "1+~Value" in the following commented code to avoid 
+  // MSVC warning C4146
+  //    return Mod67Position[(-Value & Value) % 67];
+  return Mod67Position[((1 + ~Value) & Value) % 67];
 #endif
 }
 
@@ -416,7 +426,11 @@ int IsInf(double d);
 /// alignment that may be assumed after adding the two together.
 inline uint64_t MinAlign(uint64_t A, uint64_t B) {
   // The largest power of 2 that divides both A and B.
-  return (A | B) & -(A | B);
+  //
+  // Replace "-Value" by "1+~Value" in the following commented code to avoid 
+  // MSVC warning C4146
+  //    return (A | B) & -(A | B);
+  return (A | B) & (1 + ~(A | B));
 }
 
 /// NextPowerOf2 - Returns the next power of two (in 64-bits)
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index 025eee7f9f3e..a08c79649d53 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_MEMORY_H
-#define LLVM_SYSTEM_MEMORY_H
+#ifndef LLVM_SUPPORT_MEMORY_H
+#define LLVM_SUPPORT_MEMORY_H
 
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/system_error.h"
diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h
index b778b08de932..732b0f077465 100644
--- a/include/llvm/Support/MemoryObject.h
+++ b/include/llvm/Support/MemoryObject.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MEMORYOBJECT_H
-#define MEMORYOBJECT_H
+#ifndef LLVM_SUPPORT_MEMORYOBJECT_H
+#define LLVM_SUPPORT_MEMORYOBJECT_H
 
 #include "llvm/Support/DataTypes.h"
 
diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index 6abc533d28d6..496a4381f3fc 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_MUTEX_H
-#define LLVM_SYSTEM_MUTEX_H
+#ifndef LLVM_SUPPORT_MUTEX_H
+#define LLVM_SUPPORT_MUTEX_H
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Threading.h"
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 8e41a64b1770..ecfbbaa78247 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -23,8 +23,8 @@
 #define LLVM_SUPPORT_NOFOLDER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/PassNameParser.h b/include/llvm/Support/PassNameParser.h
index a24a6f0c5e94..317416c97487 100644
--- a/include/llvm/Support/PassNameParser.h
+++ b/include/llvm/Support/PassNameParser.h
@@ -20,11 +20,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_PASS_NAME_PARSER_H
-#define LLVM_SUPPORT_PASS_NAME_PARSER_H
+#ifndef LLVM_SUPPORT_PASSNAMEPARSER_H
+#define LLVM_SUPPORT_PASSNAMEPARSER_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h
index 643ee8c6c1d0..86328f06ab14 100644
--- a/include/llvm/Support/PathV1.h
+++ b/include/llvm/Support/PathV1.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_PATH_H
-#define LLVM_SYSTEM_PATH_H
+#ifndef LLVM_SUPPORT_PATHV1_H
+#define LLVM_SUPPORT_PATHV1_H
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index 221fa8b3ebf9..9fbe4349b393 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -29,9 +29,11 @@
 #ifndef LLVM_SUPPORT_PATTERNMATCH_H
 #define LLVM_SUPPORT_PATTERNMATCH_H
 
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CallSite.h"
 
 namespace llvm {
 namespace PatternMatch {
@@ -41,13 +43,13 @@ bool match(Val *V, const Pattern &P) {
   return const_cast<Pattern&>(P).match(V);
 }
 
-  
+
 template<typename SubPattern_t>
 struct OneUse_match {
   SubPattern_t SubPattern;
-  
+
   OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
-  
+
   template<typename OpTy>
   bool match(OpTy *V) {
     return V->hasOneUse() && SubPattern.match(V);
@@ -56,8 +58,8 @@ struct OneUse_match {
 
 template<typename T>
 inline OneUse_match<T> m_OneUse(const T &SubPattern) { return SubPattern; }
-  
-  
+
+
 template<typename Class>
 struct class_match {
   template<typename ITy>
@@ -74,7 +76,53 @@ inline class_match<ConstantInt> m_ConstantInt() {
 inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
 
 inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
-  
+
+/// Matching combinators
+template<typename LTy, typename RTy>
+struct match_combine_or {
+  LTy L;
+  RTy R;
+
+  match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) { }
+
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (L.match(V))
+      return true;
+    if (R.match(V))
+      return true;
+    return false;
+  }
+};
+
+template<typename LTy, typename RTy>
+struct match_combine_and {
+  LTy L;
+  RTy R;
+
+  match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) { }
+
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (L.match(V))
+      if (R.match(V))
+        return true;
+    return false;
+  }
+};
+
+/// Combine two pattern matchers matching L || R
+template<typename LTy, typename RTy>
+inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
+  return match_combine_or<LTy, RTy>(L, R);
+}
+
+/// Combine two pattern matchers matching L && R
+template<typename LTy, typename RTy>
+inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
+  return match_combine_and<LTy, RTy>(L, R);
+}
+
 struct match_zero {
   template<typename ITy>
   bool match(ITy *V) {
@@ -83,12 +131,33 @@ struct match_zero {
     return false;
   }
 };
-  
+
 /// m_Zero() - Match an arbitrary zero/null constant.  This includes
 /// zero_initializer for vectors and ConstantPointerNull for pointers.
 inline match_zero m_Zero() { return match_zero(); }
-  
-  
+
+struct match_neg_zero {
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const Constant *C = dyn_cast<Constant>(V))
+      return C->isNegativeZeroValue();
+    return false;
+  }
+};
+
+/// m_NegZero() - Match an arbitrary zero/null constant.  This includes
+/// zero_initializer for vectors and ConstantPointerNull for pointers. For
+/// floating point constants, this will match negative zero but not positive
+/// zero
+inline match_neg_zero m_NegZero() { return match_neg_zero(); }
+
+/// m_AnyZero() - Match an arbitrary zero/null constant.  This includes
+/// zero_initializer for vectors and ConstantPointerNull for pointers. For
+/// floating point constants, this will match negative zero and positive zero
+inline match_combine_or<match_zero, match_neg_zero> m_AnyZero() {
+  return m_CombineOr(m_Zero(), m_NegZero());
+}
+
 struct apint_match {
   const APInt *&Res;
   apint_match(const APInt *&R) : Res(R) {}
@@ -98,28 +167,22 @@ struct apint_match {
       Res = &CI->getValue();
       return true;
     }
-    // FIXME: Remove this.
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
-      if (ConstantInt *CI =
-          dyn_cast_or_null<ConstantInt>(CV->getSplatValue())) {
-        Res = &CI->getValue();
-        return true;
-      }
-    if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
-      if (ConstantInt *CI =
-          dyn_cast_or_null<ConstantInt>(CV->getSplatValue())) {
-        Res = &CI->getValue();
-        return true;
-      }
+    if (V->getType()->isVectorTy())
+      if (const Constant *C = dyn_cast<Constant>(V))
+        if (ConstantInt *CI =
+            dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+          Res = &CI->getValue();
+          return true;
+        }
     return false;
   }
 };
-  
+
 /// m_APInt - Match a ConstantInt or splatted ConstantVector, binding the
 /// specified pointer to the contained APInt.
 inline apint_match m_APInt(const APInt *&Res) { return Res; }
 
-  
+
 template<int64_t Val>
 struct constantint_match {
   template<typename ITy>
@@ -151,17 +214,15 @@ struct cst_pred_ty : public Predicate {
   bool match(ITy *V) {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
       return this->isValue(CI->getValue());
-    // FIXME: Remove this.
-    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
-      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
-        return this->isValue(CI->getValue());
-    if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
-      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
-        return this->isValue(CI->getValue());
+    if (V->getType()->isVectorTy())
+      if (const Constant *C = dyn_cast<Constant>(V))
+        if (const ConstantInt *CI =
+            dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
+          return this->isValue(CI->getValue());
     return false;
   }
 };
-  
+
 /// api_pred_ty - This helper class is used to match scalar and vector constants
 /// that satisfy a specified predicate, and bind them to an APInt.
 template<typename Predicate>
@@ -175,27 +236,19 @@ struct api_pred_ty : public Predicate {
         Res = &CI->getValue();
         return true;
       }
-    
-    // FIXME: remove.
-    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
-      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
-        if (this->isValue(CI->getValue())) {
-          Res = &CI->getValue();
-          return true;
-        }
-    
-    if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
-      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
-        if (this->isValue(CI->getValue())) {
-          Res = &CI->getValue();
-          return true;
-        }
+    if (V->getType()->isVectorTy())
+      if (const Constant *C = dyn_cast<Constant>(V))
+        if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
+          if (this->isValue(CI->getValue())) {
+            Res = &CI->getValue();
+            return true;
+          }
 
     return false;
   }
 };
-  
-  
+
+
 struct is_one {
   bool isValue(const APInt &C) { return C == 1; }
 };
@@ -203,11 +256,11 @@ struct is_one {
 /// m_One() - Match an integer 1 or a vector with all elements equal to 1.
 inline cst_pred_ty<is_one> m_One() { return cst_pred_ty<is_one>(); }
 inline api_pred_ty<is_one> m_One(const APInt *&V) { return V; }
-    
+
 struct is_all_ones {
   bool isValue(const APInt &C) { return C.isAllOnesValue(); }
 };
-  
+
 /// m_AllOnes() - Match an integer or vector with all bits set to true.
 inline cst_pred_ty<is_all_ones> m_AllOnes() {return cst_pred_ty<is_all_ones>();}
 inline api_pred_ty<is_all_ones> m_AllOnes(const APInt *&V) { return V; }
@@ -252,6 +305,9 @@ inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
 /// m_Constant - Match a Constant, capturing the value if we match.
 inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
 
+/// m_ConstantFP - Match a ConstantFP, capturing the value if we match.
+inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
+
 /// specificval_ty - Match a specified Value*.
 struct specificval_ty {
   const Value *Val;
@@ -266,10 +322,35 @@ struct specificval_ty {
 /// m_Specific - Match if we have a specific specified value.
 inline specificval_ty m_Specific(const Value *V) { return V; }
 
+/// Match a specified floating point value or vector of all elements of that
+/// value.
+struct specific_fpval {
+  double Val;
+  specific_fpval(double V) : Val(V) {}
+
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+      return CFP->isExactlyValue(Val);
+    if (V->getType()->isVectorTy())
+      if (const Constant *C = dyn_cast<Constant>(V))
+        if (ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
+          return CFP->isExactlyValue(Val);
+    return false;
+  }
+};
+
+/// Match a specific floating point value or vector with all elements equal to
+/// the value.
+inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
+
+/// Match a float 1.0 or vector with all elements equal to 1.0.
+inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
+
 struct bind_const_intval_ty {
   uint64_t &VR;
   bind_const_intval_ty(uint64_t &V) : VR(V) {}
-  
+
   template<typename ITy>
   bool match(ITy *V) {
     if (ConstantInt *CV = dyn_cast<ConstantInt>(V))
@@ -284,7 +365,7 @@ struct bind_const_intval_ty {
 /// m_ConstantInt - Match a ConstantInt and bind to its value.  This does not
 /// match ConstantInts wider than 64-bits.
 inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
-  
+
 //===----------------------------------------------------------------------===//
 // Matchers for specific binary operators.
 //
@@ -583,7 +664,7 @@ inline CastClass_match<OpTy, Instruction::BitCast>
 m_BitCast(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::BitCast>(Op);
 }
-  
+
 /// m_PtrToInt
 template<typename OpTy>
 inline CastClass_match<OpTy, Instruction::PtrToInt>
@@ -611,7 +692,7 @@ inline CastClass_match<OpTy, Instruction::ZExt>
 m_ZExt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::ZExt>(Op);
 }
-  
+
 
 //===----------------------------------------------------------------------===//
 // Matchers for unary operators
@@ -700,6 +781,25 @@ inline fneg_match<LHS> m_FNeg(const LHS &L) { return L; }
 // Matchers for control flow.
 //
 
+struct br_match {
+  BasicBlock *&Succ;
+  br_match(BasicBlock *&Succ)
+    : Succ(Succ) {
+  }
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (BranchInst *BI = dyn_cast<BranchInst>(V))
+      if (BI->isUnconditional()) {
+        Succ = BI->getSuccessor(0);
+        return true;
+      }
+    return false;
+  }
+};
+
+inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
+
 template<typename Cond_t>
 struct brc_match {
   Cond_t Cond;
@@ -818,6 +918,102 @@ m_UMin(const LHS &L, const RHS &R) {
   return MaxMin_match<LHS, RHS, umin_pred_ty>(L, R);
 }
 
+template<typename Opnd_t>
+struct Argument_match {
+  unsigned OpI;
+  Opnd_t Val;
+  Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) { }
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    CallSite CS(V);
+    return CS.isCall() && Val.match(CS.getArgument(OpI));
+  }
+};
+
+/// Match an argument
+template<unsigned OpI, typename Opnd_t>
+inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
+  return Argument_match<Opnd_t>(OpI, Op);
+}
+
+/// Intrinsic matchers.
+struct IntrinsicID_match {
+  unsigned ID;
+  IntrinsicID_match(unsigned IntrID) : ID(IntrID) { }
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(V);
+    return II && II->getIntrinsicID() == ID;
+  }
+};
+
+/// Intrinsic matches are combinations of ID matchers, and argument
+/// matchers. Higher arity matcher are defined recursively in terms of and-ing
+/// them with lower arity matchers. Here's some convenient typedefs for up to
+/// several arguments, and more can be added as needed
+template <typename T0 = void, typename T1 = void, typename T2 = void,
+          typename T3 = void, typename T4 = void, typename T5 = void,
+          typename T6 = void, typename T7 = void, typename T8 = void,
+          typename T9 = void, typename T10 = void> struct m_Intrinsic_Ty;
+template <typename T0>
+struct m_Intrinsic_Ty<T0> {
+  typedef match_combine_and<IntrinsicID_match, Argument_match<T0> > Ty;
+};
+template <typename T0, typename T1>
+struct m_Intrinsic_Ty<T0, T1> {
+  typedef match_combine_and<typename m_Intrinsic_Ty<T0>::Ty,
+                            Argument_match<T1> > Ty;
+};
+template <typename T0, typename T1, typename T2>
+struct m_Intrinsic_Ty<T0, T1, T2> {
+  typedef match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
+                            Argument_match<T2> > Ty;
+};
+template <typename T0, typename T1, typename T2, typename T3>
+struct m_Intrinsic_Ty<T0, T1, T2, T3> {
+  typedef match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
+                            Argument_match<T3> > Ty;
+};
+
+/// Match intrinsic calls like this:
+///   m_Intrinsic<Intrinsic::fabs>(m_Value(X))
+template <unsigned IntrID>
+inline IntrinsicID_match
+m_Intrinsic() { return IntrinsicID_match(IntrID); }
+
+template<unsigned IntrID, typename T0>
+inline typename m_Intrinsic_Ty<T0>::Ty
+m_Intrinsic(const T0 &Op0) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
+}
+
+template<unsigned IntrID, typename T0, typename T1>
+inline typename m_Intrinsic_Ty<T0, T1>::Ty
+m_Intrinsic(const T0 &Op0, const T1 &Op1) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
+}
+
+template<unsigned IntrID, typename T0, typename T1, typename T2>
+inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
+m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
+}
+
+template<unsigned IntrID, typename T0, typename T1, typename T2, typename T3>
+inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
+m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
+}
+
+// Helper intrinsic matching specializations
+template<typename Opnd0>
+inline typename m_Intrinsic_Ty<Opnd0>::Ty
+m_BSwap(const Opnd0 &Op0) {
+  return m_Intrinsic<Intrinsic::bswap>(Op0);
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/include/llvm/Support/PredIteratorCache.h b/include/llvm/Support/PredIteratorCache.h
index bb66a8ed58b7..c5fb78050106 100644
--- a/include/llvm/Support/PredIteratorCache.h
+++ b/include/llvm/Support/PredIteratorCache.h
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
 
 #ifndef LLVM_SUPPORT_PREDITERATORCACHE_H
 #define LLVM_SUPPORT_PREDITERATORCACHE_H
diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h
index 088897c903d0..4256d4a03b9d 100644
--- a/include/llvm/Support/Process.h
+++ b/include/llvm/Support/Process.h
@@ -6,152 +6,246 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file declares the llvm::sys::Process class.
-//
+/// \file
+///
+/// Provides a library for accessing information about this process and other
+/// processes on the operating system. Also provides means of spawning
+/// subprocess for commands. The design of this library is modeled after the
+/// proposed design of the Boost.Process library, and is design specifically to
+/// follow the style of standard libraries and potentially become a proposal
+/// for a standard library.
+///
+/// This file declares the llvm::sys::Process class which contains a collection
+/// of legacy static interfaces for extracting various information about the
+/// current process. The goal is to migrate users of this API over to the new
+/// interfaces.
+///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_PROCESS_H
-#define LLVM_SYSTEM_PROCESS_H
+#ifndef LLVM_SUPPORT_PROCESS_H
+#define LLVM_SUPPORT_PROCESS_H
 
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/TimeValue.h"
 
 namespace llvm {
 namespace sys {
 
-  /// This class provides an abstraction for getting information about the
-  /// currently executing process.
-  /// @since 1.4
-  /// @brief An abstraction for operating system processes.
-  class Process {
-    /// @name Accessors
-    /// @{
-    public:
-      /// This static function will return the operating system's virtual memory
-      /// page size.
-      /// @returns The number of bytes in a virtual memory page.
-      /// @brief Get the virtual memory page size
-      static unsigned GetPageSize();
-
-      /// This static function will return the total amount of memory allocated
-      /// by the process. This only counts the memory allocated via the malloc,
-      /// calloc and realloc functions and includes any "free" holes in the
-      /// allocated space.
-      /// @brief Return process memory usage.
-      static size_t GetMallocUsage();
-
-      /// This static function will return the total memory usage of the
-      /// process. This includes code, data, stack and mapped pages usage. Notei
-      /// that the value returned here is not necessarily the Running Set Size,
-      /// it is the total virtual memory usage, regardless of mapped state of
-      /// that memory.
-      static size_t GetTotalMemoryUsage();
-
-      /// This static function will set \p user_time to the amount of CPU time
-      /// spent in user (non-kernel) mode and \p sys_time to the amount of CPU
-      /// time spent in system (kernel) mode.  If the operating system does not
-      /// support collection of these metrics, a zero TimeValue will be for both
-      /// values.
-      static void GetTimeUsage(
-        TimeValue& elapsed,
-          ///< Returns the TimeValue::now() giving current time
-        TimeValue& user_time,
-          ///< Returns the current amount of user time for the process
-        TimeValue& sys_time
-          ///< Returns the current amount of system time for the process
-      );
-
-      /// This static function will return the process' current user id number.
-      /// Not all operating systems support this feature. Where it is not
-      /// supported, the function should return 65536 as the value.
-      static int GetCurrentUserId();
-
-      /// This static function will return the process' current group id number.
-      /// Not all operating systems support this feature. Where it is not
-      /// supported, the function should return 65536 as the value.
-      static int GetCurrentGroupId();
-
-      /// This function makes the necessary calls to the operating system to
-      /// prevent core files or any other kind of large memory dumps that can
-      /// occur when a program fails.
-      /// @brief Prevent core file generation.
-      static void PreventCoreFiles();
-
-      /// This function determines if the standard input is connected directly
-      /// to a user's input (keyboard probably), rather than coming from a file
-      /// or pipe.
-      static bool StandardInIsUserInput();
-
-      /// This function determines if the standard output is connected to a
-      /// "tty" or "console" window. That is, the output would be displayed to
-      /// the user rather than being put on a pipe or stored in a file.
-      static bool StandardOutIsDisplayed();
-
-      /// This function determines if the standard error is connected to a
-      /// "tty" or "console" window. That is, the output would be displayed to
-      /// the user rather than being put on a pipe or stored in a file.
-      static bool StandardErrIsDisplayed();
-
-      /// This function determines if the given file descriptor is connected to
-      /// a "tty" or "console" window. That is, the output would be displayed to
-      /// the user rather than being put on a pipe or stored in a file.
-      static bool FileDescriptorIsDisplayed(int fd);
-
-      /// This function determines if the given file descriptor is displayd and
-      /// supports colors.
-      static bool FileDescriptorHasColors(int fd);
-
-      /// This function determines the number of columns in the window
-      /// if standard output is connected to a "tty" or "console"
-      /// window. If standard output is not connected to a tty or
-      /// console, or if the number of columns cannot be determined,
-      /// this routine returns zero.
-      static unsigned StandardOutColumns();
-
-      /// This function determines the number of columns in the window
-      /// if standard error is connected to a "tty" or "console"
-      /// window. If standard error is not connected to a tty or
-      /// console, or if the number of columns cannot be determined,
-      /// this routine returns zero.
-      static unsigned StandardErrColumns();
-
-      /// This function determines whether the terminal connected to standard
-      /// output supports colors. If standard output is not connected to a
-      /// terminal, this function returns false.
-      static bool StandardOutHasColors();
-
-      /// This function determines whether the terminal connected to standard
-      /// error supports colors. If standard error is not connected to a
-      /// terminal, this function returns false.
-      static bool StandardErrHasColors();
-
-      /// Whether changing colors requires the output to be flushed.
-      /// This is needed on systems that don't support escape sequences for
-      /// changing colors.
-      static bool ColorNeedsFlush();
-
-      /// This function returns the colorcode escape sequences.
-      /// If ColorNeedsFlush() is true then this function will change the colors
-      /// and return an empty escape sequence. In that case it is the
-      /// responsibility of the client to flush the output stream prior to
-      /// calling this function.
-      static const char *OutputColor(char c, bool bold, bool bg);
-
-      /// Same as OutputColor, but only enables the bold attribute.
-      static const char *OutputBold(bool bg);
-
-      /// This function returns the escape sequence to reverse forground and
-      /// background colors.
-      static const char *OutputReverse();
-
-      /// Resets the terminals colors, or returns an escape sequence to do so.
-      static const char *ResetColor();
-
-      /// Get the result of a process wide random number generator. The
-      /// generator will be automatically seeded in non-deterministic fashion.
-      static unsigned GetRandomNumber();
-    /// @}
-  };
+class self_process;
+
+/// \brief Generic base class which exposes information about an operating
+/// system process.
+///
+/// This base class is the core interface behind any OS process. It exposes
+/// methods to query for generic information about a particular process.
+///
+/// Subclasses implement this interface based on the mechanisms available, and
+/// can optionally expose more interfaces unique to certain process kinds.
+class process {
+protected:
+  /// \brief Only specific subclasses of process objects can be destroyed.
+  virtual ~process();
+
+public:
+  /// \brief Operating system specific type to identify a process.
+  ///
+  /// Note that the windows one is defined to 'void *' as this is the
+  /// documented type for HANDLE on windows, and we don't want to pull in the
+  /// Windows headers here.
+#if defined(LLVM_ON_UNIX)
+  typedef pid_t id_type;
+#elif defined(LLVM_ON_WIN32)
+  typedef void *id_type; // Must match the type of HANDLE.
+#else
+#error Unsupported operating system.
+#endif
+
+  /// \brief Get the operating system specific identifier for this process.
+  virtual id_type get_id() = 0;
+
+  /// \brief Get the user time consumed by this process.
+  ///
+  /// Note that this is often an approximation and may be zero on platforms
+  /// where we don't have good support for the functionality.
+  virtual TimeValue get_user_time() const = 0;
+
+  /// \brief Get the system time consumed by this process.
+  ///
+  /// Note that this is often an approximation and may be zero on platforms
+  /// where we don't have good support for the functionality.
+  virtual TimeValue get_system_time() const = 0;
+
+  /// \brief Get the wall time consumed by this process.
+  ///
+  /// Note that this is often an approximation and may be zero on platforms
+  /// where we don't have good support for the functionality.
+  virtual TimeValue get_wall_time() const = 0;
+
+  /// \name Static factory routines for processes.
+  /// @{
+
+  /// \brief Get the process object for the current process.
+  static self_process *get_self();
+
+  /// @}
+
+};
+
+/// \brief The specific class representing the current process.
+///
+/// The current process can both specialize the implementation of the routines
+/// and can expose certain information not available for other OS processes.
+class self_process : public process {
+  friend class process;
+
+  /// \brief Private destructor, as users shouldn't create objects of this
+  /// type.
+  virtual ~self_process();
+
+public:
+  virtual id_type get_id();
+  virtual TimeValue get_user_time() const;
+  virtual TimeValue get_system_time() const;
+  virtual TimeValue get_wall_time() const;
+
+  /// \name Process configuration (sysconf on POSIX)
+  /// @{
+
+  /// \brief Get the virtual memory page size.
+  ///
+  /// Query the operating system for this process's page size.
+  size_t page_size() const { return PageSize; };
+
+  /// @}
+
+private:
+  /// \name Cached process state.
+  /// @{
+
+  /// \brief Cached page size, this cannot vary during the life of the process.
+  size_t PageSize;
+
+  /// @}
+
+  /// \brief Constructor, used by \c process::get_self() only.
+  self_process();
+};
+
+
+/// \brief A collection of legacy interfaces for querying information about the
+/// current executing process.
+class Process {
+public:
+  /// \brief Return process memory usage.
+  /// This static function will return the total amount of memory allocated
+  /// by the process. This only counts the memory allocated via the malloc,
+  /// calloc and realloc functions and includes any "free" holes in the
+  /// allocated space.
+  static size_t GetMallocUsage();
+
+  /// This static function will set \p user_time to the amount of CPU time
+  /// spent in user (non-kernel) mode and \p sys_time to the amount of CPU
+  /// time spent in system (kernel) mode.  If the operating system does not
+  /// support collection of these metrics, a zero TimeValue will be for both
+  /// values.
+  /// \param elapsed Returns the TimeValue::now() giving current time
+  /// \param user_time Returns the current amount of user time for the process
+  /// \param sys_time Returns the current amount of system time for the process
+  static void GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
+                           TimeValue &sys_time);
+
+  /// This static function will return the process' current user id number.
+  /// Not all operating systems support this feature. Where it is not
+  /// supported, the function should return 65536 as the value.
+  static int GetCurrentUserId();
+
+  /// This static function will return the process' current group id number.
+  /// Not all operating systems support this feature. Where it is not
+  /// supported, the function should return 65536 as the value.
+  static int GetCurrentGroupId();
+
+  /// This function makes the necessary calls to the operating system to
+  /// prevent core files or any other kind of large memory dumps that can
+  /// occur when a program fails.
+  /// @brief Prevent core file generation.
+  static void PreventCoreFiles();
+
+  /// This function determines if the standard input is connected directly
+  /// to a user's input (keyboard probably), rather than coming from a file
+  /// or pipe.
+  static bool StandardInIsUserInput();
+
+  /// This function determines if the standard output is connected to a
+  /// "tty" or "console" window. That is, the output would be displayed to
+  /// the user rather than being put on a pipe or stored in a file.
+  static bool StandardOutIsDisplayed();
+
+  /// This function determines if the standard error is connected to a
+  /// "tty" or "console" window. That is, the output would be displayed to
+  /// the user rather than being put on a pipe or stored in a file.
+  static bool StandardErrIsDisplayed();
+
+  /// This function determines if the given file descriptor is connected to
+  /// a "tty" or "console" window. That is, the output would be displayed to
+  /// the user rather than being put on a pipe or stored in a file.
+  static bool FileDescriptorIsDisplayed(int fd);
+
+  /// This function determines if the given file descriptor is displayd and
+  /// supports colors.
+  static bool FileDescriptorHasColors(int fd);
+
+  /// This function determines the number of columns in the window
+  /// if standard output is connected to a "tty" or "console"
+  /// window. If standard output is not connected to a tty or
+  /// console, or if the number of columns cannot be determined,
+  /// this routine returns zero.
+  static unsigned StandardOutColumns();
+
+  /// This function determines the number of columns in the window
+  /// if standard error is connected to a "tty" or "console"
+  /// window. If standard error is not connected to a tty or
+  /// console, or if the number of columns cannot be determined,
+  /// this routine returns zero.
+  static unsigned StandardErrColumns();
+
+  /// This function determines whether the terminal connected to standard
+  /// output supports colors. If standard output is not connected to a
+  /// terminal, this function returns false.
+  static bool StandardOutHasColors();
+
+  /// This function determines whether the terminal connected to standard
+  /// error supports colors. If standard error is not connected to a
+  /// terminal, this function returns false.
+  static bool StandardErrHasColors();
+
+  /// Whether changing colors requires the output to be flushed.
+  /// This is needed on systems that don't support escape sequences for
+  /// changing colors.
+  static bool ColorNeedsFlush();
+
+  /// This function returns the colorcode escape sequences.
+  /// If ColorNeedsFlush() is true then this function will change the colors
+  /// and return an empty escape sequence. In that case it is the
+  /// responsibility of the client to flush the output stream prior to
+  /// calling this function.
+  static const char *OutputColor(char c, bool bold, bool bg);
+
+  /// Same as OutputColor, but only enables the bold attribute.
+  static const char *OutputBold(bool bg);
+
+  /// This function returns the escape sequence to reverse forground and
+  /// background colors.
+  static const char *OutputReverse();
+
+  /// Resets the terminals colors, or returns an escape sequence to do so.
+  static const char *ResetColor();
+
+  /// Get the result of a process wide random number generator. The
+  /// generator will be automatically seeded in non-deterministic fashion.
+  static unsigned GetRandomNumber();
+};
+
 }
 }
 
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index 7c9a95103158..bf650112f280 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_PROGRAM_H
-#define LLVM_SYSTEM_PROGRAM_H
+#ifndef LLVM_SUPPORT_PROGRAM_H
+#define LLVM_SUPPORT_PROGRAM_H
 
 #include "llvm/Support/Path.h"
 
@@ -39,14 +39,10 @@ namespace sys {
 
     /// @name Methods
     /// @{
-  public:
 
     Program();
     ~Program();
 
-    /// Return process ID of this program.
-    unsigned GetPid() const;
-
     /// This function executes the program using the \p arguments provided.  The
     /// invoked program will inherit the stdin, stdout, and stderr file
     /// descriptors, the environment and other configuration settings of the
@@ -103,17 +99,7 @@ namespace sys {
       ///< is non-empty upon return an error occurred while waiting.
       );
 
-    /// This function terminates the program.
-    /// @returns true if an error occurred.
-    /// @see Execute
-    /// @brief Terminates the program.
-    bool Kill
-    ( std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
-      ///< instance in which error messages will be returned. If the string
-      ///< is non-empty upon return an error occurred while killing the
-      ///< program.
-      );
-
+  public:
     /// This static constructor (factory) will attempt to locate a program in
     /// the operating system's file system using some pre-determined set of
     /// locations to search (e.g. the PATH on Unix). Paths with slashes are
@@ -139,7 +125,8 @@ namespace sys {
                               const sys::Path** redirects = 0,
                               unsigned secondsToWait = 0,
                               unsigned memoryLimit = 0,
-                              std::string* ErrMsg = 0);
+                              std::string* ErrMsg = 0,
+                              bool *ExecutionFailed = 0);
 
     /// A convenience function equivalent to Program prg; prg.Execute(..);
     /// @see Execute
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index fa6e189e97bd..bcc561db2d5c 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -22,6 +22,8 @@
 
 namespace llvm {
 
+class BumpPtrAllocator;
+
 /// PrintRecyclingAllocatorStats - Helper for RecyclingAllocator for
 /// printing statistics.
 ///
@@ -87,6 +89,15 @@ public:
     }
   }
 
+  /// Special case for BumpPtrAllocator which has an empty Deallocate()
+  /// function.
+  ///
+  /// There is no need to traverse the free list, pulling all the objects into
+  /// cache.
+  void clear(BumpPtrAllocator&) {
+    FreeList.clearAndLeakNodesUnsafely();
+  }
+
   template<class SubClass, class AllocatorType>
   SubClass *Allocate(AllocatorType &Allocator) {
     assert(sizeof(SubClass) <= Size &&
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index ffe09b19b68b..82df2c67bd02 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -7,7 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a POSIX regular expression matcher.
+// This file implements a POSIX regular expression matcher.  Both Basic and
+// Extended POSIX regular expressions (ERE) are supported.  EREs were extended
+// to support backreferences in matches.
+// This implementation also supports matching strings with embedded NUL chars.
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,12 +36,14 @@ namespace llvm {
       /// null string after any newline in the string in addition to its normal
       /// function, and the $ anchor matches the null string before any
       /// newline in the string in addition to its normal function.
-      Newline=2
+      Newline=2,
+      /// By default, the POSIX extended regular expression (ERE) syntax is
+      /// assumed. Pass this flag to turn on basic regular expressions (BRE)
+      /// instead.
+      BasicRegex=4
     };
 
-    /// Compiles the given POSIX Extended Regular Expression \p Regex.
-    /// This implementation supports regexes and matching strings with embedded
-    /// NUL characters.
+    /// Compiles the given regular expression \p Regex.
     Regex(StringRef Regex, unsigned Flags = NoFlags);
     ~Regex();
 
diff --git a/include/llvm/Support/RegistryParser.h b/include/llvm/Support/RegistryParser.h
index 2cc578370fef..a6997b6fe774 100644
--- a/include/llvm/Support/RegistryParser.h
+++ b/include/llvm/Support/RegistryParser.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_REGISTRY_PARSER_H
-#define LLVM_SUPPORT_REGISTRY_PARSER_H
+#ifndef LLVM_SUPPORT_REGISTRYPARSER_H
+#define LLVM_SUPPORT_REGISTRYPARSER_H
 
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Registry.h"
@@ -52,4 +52,4 @@ namespace llvm {
 
 }
 
-#endif // LLVM_SUPPORT_REGISTRY_PARSER_H
+#endif // LLVM_SUPPORT_REGISTRYPARSER_H
diff --git a/include/llvm/Support/SMLoc.h b/include/llvm/Support/SMLoc.h
index 1bf810b4aaf2..0906471f624e 100644
--- a/include/llvm/Support/SMLoc.h
+++ b/include/llvm/Support/SMLoc.h
@@ -12,14 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef SUPPORT_SMLOC_H
-#define SUPPORT_SMLOC_H
+#ifndef LLVM_SUPPORT_SMLOC_H
+#define LLVM_SUPPORT_SMLOC_H
 
 #include <cassert>
 
 namespace llvm {
 
-/// SMLoc - Represents a location in source code.
+/// Represents a location in source code.
 class SMLoc {
   const char *Ptr;
 public:
@@ -39,9 +39,11 @@ public:
   }
 };
 
-/// SMRange - Represents a range in source code.  Note that unlike standard STL
-/// ranges, the locations specified are considered to be *inclusive*.  For
-/// example, [X,X] *does* include X, it isn't an empty range.
+/// Represents a range in source code.
+///
+/// SMRange is implemented using a half-open range, as is the convention in C++.
+/// In the string "abc", the range (1,3] represents the substring "bc", and the
+/// range (2,2] represents an empty range between the characters "b" and "c".
 class SMRange {
 public:
   SMLoc Start, End;
diff --git a/include/llvm/Support/SaveAndRestore.h b/include/llvm/Support/SaveAndRestore.h
index ffa99b968d3c..6330becda9f6 100644
--- a/include/llvm/Support/SaveAndRestore.h
+++ b/include/llvm/Support/SaveAndRestore.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ADT_SAVERESTORE
-#define LLVM_ADT_SAVERESTORE
+#ifndef LLVM_SUPPORT_SAVEANDRESTORE_H
+#define LLVM_SUPPORT_SAVEANDRESTORE_H
 
 namespace llvm {
 
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index 634f4cf76dc0..465656b94116 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -12,10 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_SIGNALS_H
-#define LLVM_SYSTEM_SIGNALS_H
+#ifndef LLVM_SUPPORT_SIGNALS_H
+#define LLVM_SUPPORT_SIGNALS_H
 
 #include "llvm/Support/Path.h"
+#include <cstdio>
 
 namespace llvm {
 namespace sys {
@@ -38,6 +39,9 @@ namespace sys {
   /// @brief Print a stack trace if a fatal signal occurs.
   void PrintStackTraceOnErrorSignal();
 
+  /// \brief Print the stack trace using the given \c FILE object.
+  void PrintStackTrace(FILE *);
+
   /// AddSignalHandler - Add a function to be called when an abort/kill signal
   /// is delivered to the process.  The handler can have a cookie passed to it
   /// to identify what instance of the handler it is.
diff --git a/include/llvm/Support/Solaris.h b/include/llvm/Support/Solaris.h
index 57eee2cb4973..6228c4b43b52 100644
--- a/include/llvm/Support/Solaris.h
+++ b/include/llvm/Support/Solaris.h
@@ -11,8 +11,8 @@
  *
  *===----------------------------------------------------------------------===*/
 
-#ifndef LLVM_SYSTEM_SOLARIS_H
-#define LLVM_SYSTEM_SOLARIS_H
+#ifndef LLVM_SUPPORT_SOLARIS_H
+#define LLVM_SUPPORT_SOLARIS_H
 
 #include <sys/types.h>
 #include <sys/regset.h>
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index bcf95f2f6e66..02abf92daa41 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -13,17 +13,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef SUPPORT_SOURCEMGR_H
-#define SUPPORT_SOURCEMGR_H
+#ifndef LLVM_SUPPORT_SOURCEMGR_H
+#define LLVM_SUPPORT_SOURCEMGR_H
 
-#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SMLoc.h"
 #include <string>
 
 namespace llvm {
   class MemoryBuffer;
   class SourceMgr;
   class SMDiagnostic;
+  class SMFixIt;
   class Twine;
   class raw_ostream;
 
@@ -95,6 +98,10 @@ public:
     return Buffers[i].Buffer;
   }
 
+  unsigned getNumBuffers() const {
+    return Buffers.size();
+  }
+
   SMLoc getParentIncludeLoc(unsigned i) const {
     assert(i < Buffers.size() && "Invalid Buffer ID!");
     return Buffers[i].IncludeLoc;
@@ -139,6 +146,7 @@ public:
   /// the default error handler is used.
   void PrintMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg,
                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+                    ArrayRef<SMFixIt> FixIts = ArrayRef<SMFixIt>(),
                     bool ShowColors = true) const;
 
 
@@ -148,7 +156,8 @@ public:
   /// @param Msg If non-null, the kind of message (e.g., "error") which is
   /// prefixed to the message.
   SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, 
-                          ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const;
+                          ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+                          ArrayRef<SMFixIt> FixIts = ArrayRef<SMFixIt>()) const;
 
   /// PrintIncludeStack - Prints the names of included files and the line of the
   /// file they were included from.  A diagnostic handler can use this before
@@ -160,6 +169,38 @@ public:
 };
 
 
+/// Represents a single fixit, a replacement of one range of text with another.
+class SMFixIt {
+  SMRange Range;
+
+  std::string Text;
+
+public:
+  // FIXME: Twine.str() is not very efficient.
+  SMFixIt(SMLoc Loc, const Twine &Insertion)
+    : Range(Loc, Loc), Text(Insertion.str()) {
+    assert(Loc.isValid());
+  }
+
+  // FIXME: Twine.str() is not very efficient.
+  SMFixIt(SMRange R, const Twine &Replacement)
+    : Range(R), Text(Replacement.str()) {
+    assert(R.isValid());
+  }
+
+  StringRef getText() const { return Text; }
+  SMRange getRange() const { return Range; }
+
+  bool operator<(const SMFixIt &Other) const {
+    if (Range.Start.getPointer() != Other.Range.Start.getPointer())
+      return Range.Start.getPointer() < Other.Range.Start.getPointer();
+    if (Range.End.getPointer() != Other.Range.End.getPointer())
+      return Range.End.getPointer() < Other.Range.End.getPointer();
+    return Text < Other.Text;
+  }
+};
+
+
 /// SMDiagnostic - Instances of this class encapsulate one diagnostic report,
 /// allowing printing to a raw_ostream as a caret diagnostic.
 class SMDiagnostic {
@@ -170,35 +211,46 @@ class SMDiagnostic {
   SourceMgr::DiagKind Kind;
   std::string Message, LineContents;
   std::vector<std::pair<unsigned, unsigned> > Ranges;
+  SmallVector<SMFixIt, 4> FixIts;
 
 public:
   // Null diagnostic.
   SMDiagnostic()
     : SM(0), LineNo(0), ColumnNo(0), Kind(SourceMgr::DK_Error) {}
   // Diagnostic with no location (e.g. file not found, command line arg error).
-  SMDiagnostic(const std::string &filename, SourceMgr::DiagKind Knd,
-               const std::string &Msg)
+  SMDiagnostic(StringRef filename, SourceMgr::DiagKind Knd, StringRef Msg)
     : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Knd),
       Message(Msg) {}
   
   // Diagnostic with a location.
-  SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
+  SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
                int Line, int Col, SourceMgr::DiagKind Kind,
-               const std::string &Msg, const std::string &LineStr,
-               ArrayRef<std::pair<unsigned,unsigned> > Ranges);
+               StringRef Msg, StringRef LineStr,
+               ArrayRef<std::pair<unsigned,unsigned> > Ranges,
+               ArrayRef<SMFixIt> FixIts = ArrayRef<SMFixIt>());
 
   const SourceMgr *getSourceMgr() const { return SM; }
   SMLoc getLoc() const { return Loc; }
-  const std::string &getFilename() const { return Filename; }
+  StringRef getFilename() const { return Filename; }
   int getLineNo() const { return LineNo; }
   int getColumnNo() const { return ColumnNo; }
   SourceMgr::DiagKind getKind() const { return Kind; }
-  const std::string &getMessage() const { return Message; }
-  const std::string &getLineContents() const { return LineContents; }
-  const std::vector<std::pair<unsigned, unsigned> > &getRanges() const {
+  StringRef getMessage() const { return Message; }
+  StringRef getLineContents() const { return LineContents; }
+  ArrayRef<std::pair<unsigned, unsigned> > getRanges() const {
     return Ranges;
   }
-  void print(const char *ProgName, raw_ostream &S, bool ShowColors = true) const;
+
+  void addFixIt(const SMFixIt &Hint) {
+    FixIts.push_back(Hint);
+  }
+
+  ArrayRef<SMFixIt> getFixIts() const {
+    return FixIts;
+  }
+
+  void print(const char *ProgName, raw_ostream &S,
+             bool ShowColors = true) const;
 };
 
 }  // end llvm namespace
diff --git a/include/llvm/Support/StreamableMemoryObject.h b/include/llvm/Support/StreamableMemoryObject.h
index a2b4bcb9aa08..385548579b1f 100644
--- a/include/llvm/Support/StreamableMemoryObject.h
+++ b/include/llvm/Support/StreamableMemoryObject.h
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 
-#ifndef STREAMABLEMEMORYOBJECT_H_
-#define STREAMABLEMEMORYOBJECT_H_
+#ifndef LLVM_SUPPORT_STREAMABLEMEMORYOBJECT_H
+#define LLVM_SUPPORT_STREAMABLEMEMORYOBJECT_H
 
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/DataStream.h"
+#include "llvm/Support/MemoryObject.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index de05e0b547a1..71adbc5342e2 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -30,8 +30,8 @@
 #define LLVM_SUPPORT_STRINGPOOL_H
 
 #include "llvm/ADT/StringMap.h"
-#include <new>
 #include <cassert>
+#include <new>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h
index 6c0592c05ad7..e65f9cc0729d 100644
--- a/include/llvm/Support/SwapByteOrder.h
+++ b/include/llvm/Support/SwapByteOrder.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_SWAP_BYTE_ORDER_H
-#define LLVM_SYSTEM_SWAP_BYTE_ORDER_H
+#ifndef LLVM_SUPPORT_SWAPBYTEORDER_H
+#define LLVM_SUPPORT_SWAPBYTEORDER_H
 
 #include "llvm/Support/DataTypes.h"
 #include <cstddef>
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index 45f781633656..5c1978dddb79 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -19,10 +19,10 @@
 #ifndef LLVM_SUPPORT_TARGETFOLDER_H
 #define LLVM_SUPPORT_TARGETFOLDER_H
 
-#include "llvm/Constants.h"
-#include "llvm/InstrTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index ca58bfb0d73b..b06676d4d2f5 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -19,10 +19,10 @@
 #ifndef LLVM_SUPPORT_TARGETREGISTRY_H
 #define LLVM_SUPPORT_TARGETREGISTRY_H
 
-#include "llvm/Support/CodeGen.h"
 #include "llvm/ADT/Triple.h"
-#include <string>
+#include "llvm/Support/CodeGen.h"
 #include <cassert>
+#include <string>
 
 namespace llvm {
   class AsmPrinter;
@@ -41,7 +41,6 @@ namespace llvm {
   class MCRegisterInfo;
   class MCStreamer;
   class MCSubtargetInfo;
-  class MCTargetAsmLexer;
   class MCTargetAsmParser;
   class TargetMachine;
   class TargetOptions;
@@ -96,9 +95,6 @@ namespace llvm {
     typedef MCAsmBackend *(*MCAsmBackendCtorTy)(const Target &T,
                                                 StringRef TT,
                                                 StringRef CPU);
-    typedef MCTargetAsmLexer *(*MCAsmLexerCtorTy)(const Target &T,
-                                                  const MCRegisterInfo &MRI,
-                                                  const MCAsmInfo &MAI);
     typedef MCTargetAsmParser *(*MCAsmParserCtorTy)(MCSubtargetInfo &STI,
                                                     MCAsmParser &P);
     typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T,
@@ -182,10 +178,6 @@ namespace llvm {
     /// MCAsmBackend, if registered.
     MCAsmBackendCtorTy MCAsmBackendCtorFn;
 
-    /// MCAsmLexerCtorFn - Construction function for this target's
-    /// MCTargetAsmLexer, if registered.
-    MCAsmLexerCtorTy MCAsmLexerCtorFn;
-
     /// MCAsmParserCtorFn - Construction function for this target's
     /// MCTargetAsmParser, if registered.
     MCAsmParserCtorTy MCAsmParserCtorFn;
@@ -242,9 +234,6 @@ namespace llvm {
     /// hasMCAsmBackend - Check if this target supports .o generation.
     bool hasMCAsmBackend() const { return MCAsmBackendCtorFn != 0; }
 
-    /// hasMCAsmLexer - Check if this target supports .s lexing.
-    bool hasMCAsmLexer() const { return MCAsmLexerCtorFn != 0; }
-
     /// hasAsmParser - Check if this target supports .s parsing.
     bool hasMCAsmParser() const { return MCAsmParserCtorFn != 0; }
 
@@ -360,15 +349,6 @@ namespace llvm {
       return MCAsmBackendCtorFn(*this, Triple, CPU);
     }
 
-    /// createMCAsmLexer - Create a target specific assembly lexer.
-    ///
-    MCTargetAsmLexer *createMCAsmLexer(const MCRegisterInfo &MRI,
-                                       const MCAsmInfo &MAI) const {
-      if (!MCAsmLexerCtorFn)
-        return 0;
-      return MCAsmLexerCtorFn(*this, MRI, MAI);
-    }
-
     /// createMCAsmParser - Create a target specific assembly parser.
     ///
     /// \param Parser The target independent parser implementation to use for
@@ -676,20 +656,6 @@ namespace llvm {
         T.MCAsmBackendCtorFn = Fn;
     }
 
-    /// RegisterMCAsmLexer - Register a MCTargetAsmLexer implementation for the
-    /// given target.
-    ///
-    /// Clients are responsible for ensuring that registration doesn't occur
-    /// while another thread is attempting to access the registry. Typically
-    /// this is done by initializing all targets at program startup.
-    ///
-    /// @param T - The target being registered.
-    /// @param Fn - A function to construct an MCAsmLexer for the target.
-    static void RegisterMCAsmLexer(Target &T, Target::MCAsmLexerCtorTy Fn) {
-      if (!T.MCAsmLexerCtorFn)
-        T.MCAsmLexerCtorFn = Fn;
-    }
-
     /// RegisterMCAsmParser - Register a MCTargetAsmParser implementation for
     /// the given target.
     ///
@@ -1070,28 +1036,6 @@ namespace llvm {
     }
   };
 
-  /// RegisterMCAsmLexer - Helper template for registering a target specific
-  /// assembly lexer, for use in the target machine initialization
-  /// function. Usage:
-  ///
-  /// extern "C" void LLVMInitializeFooMCAsmLexer() {
-  ///   extern Target TheFooTarget;
-  ///   RegisterMCAsmLexer<FooMCAsmLexer> X(TheFooTarget);
-  /// }
-  template<class MCAsmLexerImpl>
-  struct RegisterMCAsmLexer {
-    RegisterMCAsmLexer(Target &T) {
-      TargetRegistry::RegisterMCAsmLexer(T, &Allocator);
-    }
-
-  private:
-    static MCTargetAsmLexer *Allocator(const Target &T,
-                                       const MCRegisterInfo &MRI,
-                                       const MCAsmInfo &MAI) {
-      return new MCAsmLexerImpl(T, MRI, MAI);
-    }
-  };
-
   /// RegisterMCAsmParser - Helper template for registering a target specific
   /// assembly parser, for use in the target machine initialization
   /// function. Usage:
diff --git a/include/llvm/Support/ThreadLocal.h b/include/llvm/Support/ThreadLocal.h
index 62ec90ad24f5..7518626901e0 100644
--- a/include/llvm/Support/ThreadLocal.h
+++ b/include/llvm/Support/ThreadLocal.h
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_THREAD_LOCAL_H
-#define LLVM_SYSTEM_THREAD_LOCAL_H
+#ifndef LLVM_SUPPORT_THREADLOCAL_H
+#define LLVM_SUPPORT_THREADLOCAL_H
 
-#include "llvm/Support/Threading.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Threading.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index 9017afb89038..a7e8774558d5 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_THREADING_H
-#define LLVM_SYSTEM_THREADING_H
+#ifndef LLVM_SUPPORT_THREADING_H
+#define LLVM_SUPPORT_THREADING_H
 
 namespace llvm {
   /// llvm_start_multithreaded - Allocate and initialize structures needed to
diff --git a/include/llvm/Support/TimeValue.h b/include/llvm/Support/TimeValue.h
index e780b50c6039..4b48b849f20d 100644
--- a/include/llvm/Support/TimeValue.h
+++ b/include/llvm/Support/TimeValue.h
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_SUPPORT_TIMEVALUE_H
+#define LLVM_SUPPORT_TIMEVALUE_H
+
 #include "llvm/Support/DataTypes.h"
 #include <string>
 
-#ifndef LLVM_SYSTEM_TIMEVALUE_H
-#define LLVM_SYSTEM_TIMEVALUE_H
-
 namespace llvm {
 namespace sys {
   /// This class is used where a precise fixed point in time is required. The
@@ -82,6 +82,9 @@ namespace sys {
   /// @name Constructors
   /// @{
   public:
+    /// \brief Default construct a time value, initializing to ZeroTime.
+    TimeValue() : seconds_(0), nanos_(0) {}
+
     /// Caller provides the exact value in seconds and nanoseconds. The
     /// \p nanos argument defaults to zero for convenience.
     /// @brief Explicit constructor
@@ -237,7 +240,7 @@ namespace sys {
     /// Posix, correcting for the difference in Posix zero time.
     /// @brief Convert to unix time (100 nanoseconds since 12:00:00a Jan 1,1970)
     uint64_t toPosixTime() const {
-      uint64_t result = seconds_ - PosixZeroTime.seconds_;
+      uint64_t result = seconds_ - PosixZeroTimeSeconds;
       result += nanos_ / NANOSECONDS_PER_POSIX_TICK;
       return result;
     }
@@ -245,14 +248,14 @@ namespace sys {
     /// Converts the TimeValue into the corresponding number of seconds
     /// since the epoch (00:00:00 Jan 1,1970).
     uint64_t toEpochTime() const {
-      return seconds_ - PosixZeroTime.seconds_;
+      return seconds_ - PosixZeroTimeSeconds;
     }
 
     /// Converts the TimeValue into the corresponding number of "ticks" for
     /// Win32 platforms, correcting for the difference in Win32 zero time.
     /// @brief Convert to windows time (seconds since 12:00:00a Jan 1, 1601)
     uint64_t toWin32Time() const {
-      uint64_t result = seconds_ - Win32ZeroTime.seconds_;
+      uint64_t result = seconds_ - Win32ZeroTimeSeconds;
       result += nanos_ / NANOSECONDS_PER_WIN32_TICK;
       return result;
     }
@@ -261,7 +264,7 @@ namespace sys {
     /// correction for the Posix zero time.
     /// @brief Convert to timespec time (ala POSIX.1b)
     void getTimespecTime( uint64_t& seconds, uint32_t& nanos ) const {
-      seconds = seconds_ - PosixZeroTime.seconds_;
+      seconds = seconds_ - PosixZeroTimeSeconds;
       nanos = nanos_;
     }
 
@@ -328,7 +331,7 @@ namespace sys {
     /// TimeValue and assigns that value to \p this.
     /// @brief Convert seconds form PosixTime to TimeValue
     void fromEpochTime( SecondsType seconds ) {
-      seconds_ = seconds + PosixZeroTime.seconds_;
+      seconds_ = seconds + PosixZeroTimeSeconds;
       nanos_ = 0;
       this->normalize();
     }
@@ -337,7 +340,7 @@ namespace sys {
     /// corresponding TimeValue and assigns that value to \p this.
     /// @brief Convert seconds form Windows FILETIME to TimeValue
     void fromWin32Time( uint64_t win32Time ) {
-      this->seconds_ = win32Time / 10000000 + Win32ZeroTime.seconds_;
+      this->seconds_ = win32Time / 10000000 + Win32ZeroTimeSeconds;
       this->nanos_ = NanoSecondsType(win32Time  % 10000000) * 100;
     }
 
@@ -357,6 +360,9 @@ namespace sys {
     /// Store the values as a <timeval>.
     SecondsType      seconds_;///< Stores the seconds part of the TimeVal
     NanoSecondsType  nanos_;  ///< Stores the nanoseconds part of the TimeVal
+
+    static const SecondsType PosixZeroTimeSeconds;
+    static const SecondsType Win32ZeroTimeSeconds;
   /// @}
 
   };
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index a7418827ca32..d009d7fae513 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -6,22 +6,17 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file defines three classes: Timer, TimeRegion, and TimerGroup,
-// documented below.
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_TIMER_H
 #define LLVM_SUPPORT_TIMER_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
-#include <vector>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
@@ -78,7 +73,7 @@ public:
 /// invocations of its startTimer()/stopTimer() methods.  Given appropriate OS
 /// support it can also keep track of the RSS of the program at various points.
 /// By default, the Timer will print the amount of time it has captured to
-/// standard error when the laster timer is destroyed, otherwise it is printed
+/// standard error when the last timer is destroyed, otherwise it is printed
 /// when its TimerGroup is destroyed.  Timers do not print their information
 /// if they are never started.
 ///
@@ -126,7 +121,7 @@ private:
 
 /// The TimeRegion class is used as a helper class to call the startTimer() and
 /// stopTimer() methods of the Timer class.  When the object is constructed, it
-/// starts the timer specified as it's argument.  When it is destroyed, it stops
+/// starts the timer specified as its argument.  When it is destroyed, it stops
 /// the relevant timer.  This makes it easy to time a region of code.
 ///
 class TimeRegion {
diff --git a/include/llvm/Support/ToolOutputFile.h b/include/llvm/Support/ToolOutputFile.h
index 65b182a24535..b3b7c577b722 100644
--- a/include/llvm/Support/ToolOutputFile.h
+++ b/include/llvm/Support/ToolOutputFile.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_TOOL_OUTPUT_FILE_H
-#define LLVM_SUPPORT_TOOL_OUTPUT_FILE_H
+#ifndef LLVM_SUPPORT_TOOLOUTPUTFILE_H
+#define LLVM_SUPPORT_TOOLOUTPUTFILE_H
 
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index e14764703932..a1397db8eb2c 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_SYSTEM_VALGRIND_H
 #define LLVM_SYSTEM_VALGRIND_H
 
-#include "llvm/Support/Compiler.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Compiler.h"
 #include <stddef.h>
 
 #if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index dbcf0fd11d19..b49341c3ffb6 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -16,10 +16,11 @@
 
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/Value.h"
+#include "llvm/IR/Value.h"
 
 namespace llvm {
 class ValueHandleBase;
+template<typename From> struct simplify_type;
 
 // ValueHandleBase** is only 4-byte aligned.
 template<>
@@ -162,14 +163,12 @@ public:
 
 // Specialize simplify_type to allow WeakVH to participate in
 // dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const WeakVH> {
+template<> struct simplify_type<WeakVH> {
   typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const WeakVH &WVH) {
-    return static_cast<Value *>(WVH);
+  static SimpleType getSimplifiedValue(WeakVH &WVH) {
+    return WVH;
   }
 };
-template<> struct simplify_type<WeakVH> : public simplify_type<const WeakVH> {};
 
 /// AssertingVH - This is a Value Handle that points to a value and asserts out
 /// if the value is destroyed while the handle is still live.  This is very
@@ -236,18 +235,6 @@ public:
   ValueTy &operator*() const { return *getValPtr(); }
 };
 
-// Specialize simplify_type to allow AssertingVH to participate in
-// dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const AssertingVH<Value> > {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const AssertingVH<Value> &AVH) {
-    return static_cast<Value *>(AVH);
-  }
-};
-template<> struct simplify_type<AssertingVH<Value> >
-  : public simplify_type<const AssertingVH<Value> > {};
-
 // Specialize DenseMapInfo to allow AssertingVH to participate in DenseMap.
 template<typename T>
 struct DenseMapInfo<AssertingVH<T> > {
@@ -345,18 +332,6 @@ public:
   ValueTy &operator*() const { return *getValPtr(); }
 };
 
-// Specialize simplify_type to allow TrackingVH to participate in
-// dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const TrackingVH<Value> > {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const TrackingVH<Value> &AVH) {
-    return static_cast<Value *>(AVH);
-  }
-};
-template<> struct simplify_type<TrackingVH<Value> >
-  : public simplify_type<const TrackingVH<Value> > {};
-
 /// CallbackVH - This is a value handle that allows subclasses to define
 /// callbacks that run when the underlying Value has RAUW called on it or is
 /// destroyed.  This class can be used as the key of a map, as long as the user
@@ -399,18 +374,6 @@ public:
   virtual void allUsesReplacedWith(Value *);
 };
 
-// Specialize simplify_type to allow CallbackVH to participate in
-// dyn_cast, isa, etc.
-template<typename From> struct simplify_type;
-template<> struct simplify_type<const CallbackVH> {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const CallbackVH &CVH) {
-    return static_cast<Value *>(CVH);
-  }
-};
-template<> struct simplify_type<CallbackVH>
-  : public simplify_type<const CallbackVH> {};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/Watchdog.h b/include/llvm/Support/Watchdog.h
new file mode 100644
index 000000000000..b58496b2fb8e
--- /dev/null
+++ b/include/llvm/Support/Watchdog.h
@@ -0,0 +1,38 @@
+//===--- Watchdog.h - Watchdog timer ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares the llvm::sys::Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_WATCHDOG_H
+#define LLVM_SUPPORT_WATCHDOG_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  namespace sys {
+
+    /// This class provides an abstraction for a timeout around an operation
+    /// that must complete in a given amount of time. Failure to complete before
+    /// the timeout is an unrecoverable situation and no mechanisms to attempt
+    /// to handle it are provided.
+    class Watchdog {
+    public:
+      Watchdog(unsigned int seconds);
+      ~Watchdog();
+    private:
+      // Noncopyable.
+      Watchdog(const Watchdog &other) LLVM_DELETED_FUNCTION;
+      Watchdog &operator=(const Watchdog &other) LLVM_DELETED_FUNCTION;
+    };
+  }
+}
+
+#endif
diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h
index 8d74e10be003..ecce71368041 100644
--- a/include/llvm/Support/Win64EH.h
+++ b/include/llvm/Support/Win64EH.h
@@ -17,6 +17,7 @@
 #define LLVM_SUPPORT_WIN64EH_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
 
 namespace llvm {
 namespace Win64EH {
@@ -39,11 +40,17 @@ enum UnwindOpcodes {
 /// or part thereof.
 union UnwindCode {
   struct {
-    uint8_t codeOffset;
-    uint8_t unwindOp:4,
-            opInfo:4;
+    support::ulittle8_t CodeOffset;
+    support::ulittle8_t UnwindOpAndOpInfo;
   } u;
-  uint16_t frameOffset;
+  support::ulittle16_t FrameOffset;
+
+  uint8_t getUnwindOp() const {
+    return u.UnwindOpAndOpInfo & 0x0F;
+  }
+  uint8_t getOpInfo() const {
+    return (u.UnwindOpAndOpInfo >> 4) & 0x0F;
+  }
 };
 
 enum {
@@ -60,37 +67,75 @@ enum {
 
 /// RuntimeFunction - An entry in the table of functions with unwind info.
 struct RuntimeFunction {
-  uint64_t startAddress;
-  uint64_t endAddress;
-  uint64_t unwindInfoOffset;
+  support::ulittle32_t StartAddress;
+  support::ulittle32_t EndAddress;
+  support::ulittle32_t UnwindInfoOffset;
 };
 
 /// UnwindInfo - An entry in the exception table.
 struct UnwindInfo {
-  uint8_t version:3,
-          flags:5;
-  uint8_t prologSize;
-  uint8_t numCodes;
-  uint8_t frameRegister:4,
-          frameOffset:4;
-  UnwindCode unwindCodes[1];
+  support::ulittle8_t VersionAndFlags;
+  support::ulittle8_t PrologSize;
+  support::ulittle8_t NumCodes;
+  support::ulittle8_t FrameRegisterAndOffset;
+  UnwindCode UnwindCodes[1];
 
+  uint8_t getVersion() const {
+    return VersionAndFlags & 0x07;
+  }
+  uint8_t getFlags() const {
+    return (VersionAndFlags >> 3) & 0x1f;
+  }
+  uint8_t getFrameRegister() const {
+    return FrameRegisterAndOffset & 0x0f;
+  }
+  uint8_t getFrameOffset() const {
+    return (FrameRegisterAndOffset >> 4) & 0x0f;
+  }
+
+  // The data after unwindCodes depends on flags.
+  // If UNW_ExceptionHandler or UNW_TerminateHandler is set then follows
+  // the address of the language-specific exception handler.
+  // If UNW_ChainInfo is set then follows a RuntimeFunction which defines
+  // the chained unwind info.
+  // For more information please see MSDN at:
+  // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
+
+  /// \brief Return pointer to language specific data part of UnwindInfo.
   void *getLanguageSpecificData() {
-    return reinterpret_cast<void *>(&unwindCodes[(numCodes+1) & ~1]);
+    return reinterpret_cast<void *>(&UnwindCodes[(NumCodes+1) & ~1]);
   }
-  uint64_t getLanguageSpecificHandlerOffset() {
-    return *reinterpret_cast<uint64_t *>(getLanguageSpecificData());
+
+  /// \brief Return pointer to language specific data part of UnwindInfo.
+  const void *getLanguageSpecificData() const {
+    return reinterpret_cast<const void *>(&UnwindCodes[(NumCodes+1) & ~1]);
   }
-  void setLanguageSpecificHandlerOffset(uint64_t offset) {
-    *reinterpret_cast<uint64_t *>(getLanguageSpecificData()) = offset;
+
+  /// \brief Return image-relative offset of language-specific exception handler.
+  uint32_t getLanguageSpecificHandlerOffset() const {
+    return *reinterpret_cast<const uint32_t *>(getLanguageSpecificData());
   }
-  RuntimeFunction *getChainedFunctionEntry() {
-    return reinterpret_cast<RuntimeFunction *>(getLanguageSpecificData());
+
+  /// \brief Set image-relative offset of language-specific exception handler.
+  void setLanguageSpecificHandlerOffset(uint32_t offset) {
+    *reinterpret_cast<uint32_t *>(getLanguageSpecificData()) = offset;
   }
+
+  /// \brief Return pointer to exception-specific data.
   void *getExceptionData() {
-    return reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(
+    return reinterpret_cast<void *>(reinterpret_cast<uint32_t *>(
                                                   getLanguageSpecificData())+1);
   }
+
+  /// \brief Return pointer to chained unwind info.
+  RuntimeFunction *getChainedFunctionEntry() {
+    return reinterpret_cast<RuntimeFunction *>(getLanguageSpecificData());
+  }
+
+  /// \brief Return pointer to chained unwind info.
+  const RuntimeFunction *getChainedFunctionEntry() const {
+    return reinterpret_cast<const RuntimeFunction *>(getLanguageSpecificData());
+  }
 };
 
 
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index 12958fa173d0..6e4f57f6ab4a 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -35,15 +35,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_YAML_PARSER_H
-#define LLVM_SUPPORT_YAML_PARSER_H
+#ifndef LLVM_SUPPORT_YAMLPARSER_H
+#define LLVM_SUPPORT_YAMLPARSER_H
 
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/SMLoc.h"
-
 #include <limits>
 #include <utility>
 
@@ -77,7 +76,11 @@ std::string escape(StringRef Input);
 ///        documents.
 class Stream {
 public:
+  /// @brief This keeps a reference to the string referenced by \p Input.
   Stream(StringRef Input, SourceMgr &);
+
+  /// @brief This takes ownership of \p InputBuffer.
+  Stream(MemoryBuffer *InputBuffer, SourceMgr &);
   ~Stream();
 
   document_iterator begin();
@@ -181,7 +184,7 @@ public:
     : Node(NK_Scalar, D, Anchor)
     , Value(Val) {
     SMLoc Start = SMLoc::getFromPointer(Val.begin());
-    SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
+    SMLoc End = SMLoc::getFromPointer(Val.end());
     SourceRange = SMRange(Start, End);
   }
 
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
new file mode 100644
index 000000000000..801868ff1f1f
--- /dev/null
+++ b/include/llvm/Support/YAMLTraits.h
@@ -0,0 +1,1104 @@
+//===- llvm/Supporrt/YAMLTraits.h -------------------------------*- C++ -*-===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_YAMLTRAITS_H
+#define LLVM_SUPPORT_YAMLTRAITS_H
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/type_traits.h"
+
+
+namespace llvm {
+namespace yaml {
+
+
+/// This class should be specialized by any type that needs to be converted
+/// to/from a YAML mapping.  For example:
+///
+///     struct ScalarBitSetTraits<MyStruct> {
+///       static void mapping(IO &io, MyStruct &s) {
+///         io.mapRequired("name", s.name);
+///         io.mapRequired("size", s.size);
+///         io.mapOptional("age",  s.age);
+///       }
+///     };
+template<class T>
+struct MappingTraits {
+  // Must provide:
+  // static void mapping(IO &io, T &fields);
+};
+
+
+/// This class should be specialized by any integral type that converts
+/// to/from a YAML scalar where there is a one-to-one mapping between
+/// in-memory values and a string in YAML.  For example:
+///
+///     struct ScalarEnumerationTraits<Colors> {
+///         static void enumeration(IO &io, Colors &value) {
+///           io.enumCase(value, "red",   cRed);
+///           io.enumCase(value, "blue",  cBlue);
+///           io.enumCase(value, "green", cGreen);
+///         }
+///       };
+template<typename T>
+struct ScalarEnumerationTraits {
+  // Must provide:
+  // static void enumeration(IO &io, T &value);
+};
+
+
+/// This class should be specialized by any integer type that is a union
+/// of bit values and the YAML representation is a flow sequence of
+/// strings.  For example:
+///
+///      struct ScalarBitSetTraits<MyFlags> {
+///        static void bitset(IO &io, MyFlags &value) {
+///          io.bitSetCase(value, "big",   flagBig);
+///          io.bitSetCase(value, "flat",  flagFlat);
+///          io.bitSetCase(value, "round", flagRound);
+///        }
+///      };
+template<typename T>
+struct ScalarBitSetTraits {
+  // Must provide:
+  // static void bitset(IO &io, T &value);
+};
+
+
+/// This class should be specialized by type that requires custom conversion
+/// to/from a yaml scalar.  For example:
+///
+///    template<>
+///    struct ScalarTraits<MyType> {
+///      static void output(const MyType &val, void*, llvm::raw_ostream &out) {
+///        // stream out custom formatting
+///        out << llvm::format("%x", val);
+///      }
+///      static StringRef input(StringRef scalar, void*, MyType &value) {
+///        // parse scalar and set `value`
+///        // return empty string on success, or error string
+///        return StringRef();
+///      }
+///    };
+template<typename T>
+struct ScalarTraits {
+  // Must provide:
+  //
+  // Function to write the value as a string:
+  //static void output(const T &value, void *ctxt, llvm::raw_ostream &out);
+  //
+  // Function to convert a string to a value.  Returns the empty
+  // StringRef on success or an error string if string is malformed:
+  //static StringRef input(StringRef scalar, void *ctxt, T &value);
+};
+
+
+/// This class should be specialized by any type that needs to be converted
+/// to/from a YAML sequence.  For example:
+///
+///    template<>
+///    struct SequenceTraits< std::vector<MyType> > {
+///      static size_t size(IO &io, std::vector<MyType> &seq) {
+///        return seq.size();
+///      }
+///      static MyType& element(IO &, std::vector<MyType> &seq, size_t index) {
+///        if ( index >= seq.size() )
+///          seq.resize(index+1);
+///        return seq[index];
+///      }
+///    };
+template<typename T>
+struct SequenceTraits {
+  // Must provide:
+  // static size_t size(IO &io, T &seq);
+  // static T::value_type& element(IO &io, T &seq, size_t index);
+  //
+  // The following is option and will cause generated YAML to use
+  // a flow sequence (e.g. [a,b,c]).
+  // static const bool flow = true;
+};
+
+
+/// This class should be specialized by any type that needs to be converted
+/// to/from a list of YAML documents.
+template<typename T>
+struct DocumentListTraits {
+  // Must provide:
+  // static size_t size(IO &io, T &seq);
+  // static T::value_type& element(IO &io, T &seq, size_t index);
+};
+
+
+// Only used by compiler if both template types are the same
+template <typename T, T>
+struct SameType;
+
+// Only used for better diagnostics of missing traits
+template <typename T>
+struct MissingTrait;
+
+
+
+// Test if ScalarEnumerationTraits<T> is defined on type T.
+template <class T>
+struct has_ScalarEnumerationTraits
+{
+  typedef void (*Signature_enumeration)(class IO&, T&);
+
+  template <typename U>
+  static char test(SameType<Signature_enumeration, &U::enumeration>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value = (sizeof(test<ScalarEnumerationTraits<T> >(0)) == 1);
+};
+
+
+// Test if ScalarBitSetTraits<T> is defined on type T.
+template <class T>
+struct has_ScalarBitSetTraits
+{
+  typedef void (*Signature_bitset)(class IO&, T&);
+
+  template <typename U>
+  static char test(SameType<Signature_bitset, &U::bitset>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value = (sizeof(test<ScalarBitSetTraits<T> >(0)) == 1);
+};
+
+
+// Test if ScalarTraits<T> is defined on type T.
+template <class T>
+struct has_ScalarTraits
+{
+  typedef StringRef (*Signature_input)(StringRef, void*, T&);
+  typedef void (*Signature_output)(const T&, void*, llvm::raw_ostream&);
+
+  template <typename U>
+  static char test(SameType<Signature_input, &U::input>*,
+                   SameType<Signature_output, &U::output>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value = (sizeof(test<ScalarTraits<T> >(0,0)) == 1);
+};
+
+
+// Test if MappingTraits<T> is defined on type T.
+template <class T>
+struct has_MappingTraits
+{
+  typedef void (*Signature_mapping)(class IO&, T&);
+
+  template <typename U>
+  static char test(SameType<Signature_mapping, &U::mapping>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value = (sizeof(test<MappingTraits<T> >(0)) == 1);
+};
+
+
+// Test if SequenceTraits<T> is defined on type T.
+template <class T>
+struct has_SequenceMethodTraits
+{
+  typedef size_t (*Signature_size)(class IO&, T&);
+
+  template <typename U>
+  static char test(SameType<Signature_size, &U::size>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value =  (sizeof(test<SequenceTraits<T> >(0)) == 1);
+};
+
+
+// has_FlowTraits<int> will cause an error with some compilers because
+// it subclasses int.  Using this wrapper only instantiates the
+// real has_FlowTraits only if the template type is a class.
+template <typename T, bool Enabled = llvm::is_class<T>::value>
+class has_FlowTraits
+{
+public:
+   static const bool value = false;
+};
+
+// Some older gcc compilers don't support straight forward tests
+// for members, so test for ambiguity cause by the base and derived
+// classes both defining the member.
+template <class T>
+struct has_FlowTraits<T, true>
+{
+  struct Fallback { bool flow; };
+  struct Derived : T, Fallback { };
+
+  template<typename C>
+  static char (&f(SameType<bool Fallback::*, &C::flow>*))[1];
+
+  template<typename C>
+  static char (&f(...))[2];
+
+public:
+  static bool const value = sizeof(f<Derived>(0)) == 2;
+};
+
+
+
+// Test if SequenceTraits<T> is defined on type T
+template<typename T>
+struct has_SequenceTraits : public  llvm::integral_constant<bool,
+                                      has_SequenceMethodTraits<T>::value > { };
+
+
+// Test if DocumentListTraits<T> is defined on type T
+template <class T>
+struct has_DocumentListTraits
+{
+  typedef size_t (*Signature_size)(class IO&, T&);
+
+  template <typename U>
+  static char test(SameType<Signature_size, &U::size>*);
+
+  template <typename U>
+  static double test(...);
+
+public:
+  static bool const value =  (sizeof(test<DocumentListTraits<T> >(0)) == 1);
+};
+
+
+
+
+template<typename T>
+struct missingTraits : public  llvm::integral_constant<bool,
+                                         !has_ScalarEnumerationTraits<T>::value
+                                      && !has_ScalarBitSetTraits<T>::value
+                                      && !has_ScalarTraits<T>::value
+                                      && !has_MappingTraits<T>::value
+                                      && !has_SequenceTraits<T>::value
+                                      && !has_DocumentListTraits<T>::value >  {};
+
+
+// Base class for Input and Output.
+class IO {
+public:
+
+  IO(void *Ctxt=NULL);
+  virtual ~IO();
+
+  virtual bool outputting() = 0;
+
+  virtual unsigned beginSequence() = 0;
+  virtual bool preflightElement(unsigned, void *&) = 0;
+  virtual void postflightElement(void*) = 0;
+  virtual void endSequence() = 0;
+
+  virtual unsigned beginFlowSequence() = 0;
+  virtual bool preflightFlowElement(unsigned, void *&) = 0;
+  virtual void postflightFlowElement(void*) = 0;
+  virtual void endFlowSequence() = 0;
+
+  virtual void beginMapping() = 0;
+  virtual void endMapping() = 0;
+  virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0;
+  virtual void postflightKey(void*) = 0;
+
+  virtual void beginEnumScalar() = 0;
+  virtual bool matchEnumScalar(const char*, bool) = 0;
+  virtual void endEnumScalar() = 0;
+
+  virtual bool beginBitSetScalar(bool &) = 0;
+  virtual bool bitSetMatch(const char*, bool) = 0;
+  virtual void endBitSetScalar() = 0;
+
+  virtual void scalarString(StringRef &) = 0;
+
+  virtual void setError(const Twine &) = 0;
+
+  template <typename T>
+  void enumCase(T &Val, const char* Str, const T ConstVal) {
+    if ( matchEnumScalar(Str, outputting() && Val == ConstVal) ) {
+      Val = ConstVal;
+    }
+  }
+
+  // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
+  template <typename T>
+  void enumCase(T &Val, const char* Str, const uint32_t ConstVal) {
+    if ( matchEnumScalar(Str, outputting() && Val == static_cast<T>(ConstVal)) ) {
+      Val = ConstVal;
+    }
+  }
+
+  template <typename T>
+  void bitSetCase(T &Val, const char* Str, const T ConstVal) {
+    if ( bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal) ) {
+      Val = Val | ConstVal;
+    }
+  }
+
+  // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
+  template <typename T>
+  void bitSetCase(T &Val, const char* Str, const uint32_t ConstVal) {
+    if ( bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal) ) {
+      Val = Val | ConstVal;
+    }
+  }
+
+  void *getContext();
+  void setContext(void *);
+
+  template <typename T>
+  void mapRequired(const char* Key, T& Val) {
+    this->processKey(Key, Val, true);
+  }
+
+  template <typename T>
+  typename llvm::enable_if_c<has_SequenceTraits<T>::value,void>::type
+  mapOptional(const char* Key, T& Val) {
+    // omit key/value instead of outputting empty sequence
+    if ( this->outputting() && !(Val.begin() != Val.end()) )
+      return;
+    this->processKey(Key, Val, false);
+  }
+
+  template <typename T>
+  typename llvm::enable_if_c<!has_SequenceTraits<T>::value,void>::type
+  mapOptional(const char* Key, T& Val) {
+    this->processKey(Key, Val, false);
+  }
+
+  template <typename T>
+  void mapOptional(const char* Key, T& Val, const T& Default) {
+    this->processKeyWithDefault(Key, Val, Default, false);
+  }
+
+
+private:
+  template <typename T>
+  void processKeyWithDefault(const char *Key, T &Val, const T& DefaultValue,
+                                                                bool Required) {
+    void *SaveInfo;
+    bool UseDefault;
+    const bool sameAsDefault = outputting() && Val == DefaultValue;
+    if ( this->preflightKey(Key, Required, sameAsDefault, UseDefault,
+                                                                  SaveInfo) ) {
+      yamlize(*this, Val, Required);
+      this->postflightKey(SaveInfo);
+    }
+    else {
+      if ( UseDefault )
+        Val = DefaultValue;
+    }
+  }
+
+  template <typename T>
+  void processKey(const char *Key, T &Val, bool Required) {
+    void *SaveInfo;
+    bool UseDefault;
+    if ( this->preflightKey(Key, Required, false, UseDefault, SaveInfo) ) {
+      yamlize(*this, Val, Required);
+      this->postflightKey(SaveInfo);
+    }
+  }
+
+private:
+  void  *Ctxt;
+};
+
+
+
+template<typename T>
+typename llvm::enable_if_c<has_ScalarEnumerationTraits<T>::value,void>::type
+yamlize(IO &io, T &Val, bool) {
+  io.beginEnumScalar();
+  ScalarEnumerationTraits<T>::enumeration(io, Val);
+  io.endEnumScalar();
+}
+
+template<typename T>
+typename llvm::enable_if_c<has_ScalarBitSetTraits<T>::value,void>::type
+yamlize(IO &io, T &Val, bool) {
+  bool DoClear;
+  if ( io.beginBitSetScalar(DoClear) ) {
+    if ( DoClear )
+      Val = static_cast<T>(0);
+    ScalarBitSetTraits<T>::bitset(io, Val);
+    io.endBitSetScalar();
+  }
+}
+
+
+template<typename T>
+typename llvm::enable_if_c<has_ScalarTraits<T>::value,void>::type
+yamlize(IO &io, T &Val, bool) {
+  if ( io.outputting() ) {
+    std::string Storage;
+    llvm::raw_string_ostream Buffer(Storage);
+    ScalarTraits<T>::output(Val, io.getContext(), Buffer);
+    StringRef Str = Buffer.str();
+    io.scalarString(Str);
+  }
+  else {
+    StringRef Str;
+    io.scalarString(Str);
+    StringRef Result = ScalarTraits<T>::input(Str, io.getContext(), Val);
+    if ( !Result.empty() ) {
+      io.setError(llvm::Twine(Result));
+    }
+  }
+}
+
+
+template<typename T>
+typename llvm::enable_if_c<has_MappingTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool) {
+  io.beginMapping();
+  MappingTraits<T>::mapping(io, Val);
+  io.endMapping();
+}
+
+template<typename T>
+typename llvm::enable_if_c<missingTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool) {
+  char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
+}
+
+template<typename T>
+typename llvm::enable_if_c<has_SequenceTraits<T>::value,void>::type
+yamlize(IO &io, T &Seq, bool) {
+  if ( has_FlowTraits< SequenceTraits<T> >::value ) {
+    unsigned incnt = io.beginFlowSequence();
+    unsigned count = io.outputting() ? SequenceTraits<T>::size(io, Seq) : incnt;
+    for(unsigned i=0; i < count; ++i) {
+      void *SaveInfo;
+      if ( io.preflightFlowElement(i, SaveInfo) ) {
+        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true);
+        io.postflightFlowElement(SaveInfo);
+      }
+    }
+    io.endFlowSequence();
+  }
+  else {
+    unsigned incnt = io.beginSequence();
+    unsigned count = io.outputting() ? SequenceTraits<T>::size(io, Seq) : incnt;
+    for(unsigned i=0; i < count; ++i) {
+      void *SaveInfo;
+      if ( io.preflightElement(i, SaveInfo) ) {
+        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true);
+        io.postflightElement(SaveInfo);
+      }
+    }
+    io.endSequence();
+  }
+}
+
+
+template<>
+struct ScalarTraits<bool> {
+  static void output(const bool &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, bool &);
+};
+
+template<>
+struct ScalarTraits<StringRef> {
+  static void output(const StringRef &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, StringRef &);
+};
+
+template<>
+struct ScalarTraits<uint8_t> {
+  static void output(const uint8_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, uint8_t &);
+};
+
+template<>
+struct ScalarTraits<uint16_t> {
+  static void output(const uint16_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, uint16_t &);
+};
+
+template<>
+struct ScalarTraits<uint32_t> {
+  static void output(const uint32_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, uint32_t &);
+};
+
+template<>
+struct ScalarTraits<uint64_t> {
+  static void output(const uint64_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, uint64_t &);
+};
+
+template<>
+struct ScalarTraits<int8_t> {
+  static void output(const int8_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, int8_t &);
+};
+
+template<>
+struct ScalarTraits<int16_t> {
+  static void output(const int16_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, int16_t &);
+};
+
+template<>
+struct ScalarTraits<int32_t> {
+  static void output(const int32_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, int32_t &);
+};
+
+template<>
+struct ScalarTraits<int64_t> {
+  static void output(const int64_t &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, int64_t &);
+};
+
+template<>
+struct ScalarTraits<float> {
+  static void output(const float &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, float &);
+};
+
+template<>
+struct ScalarTraits<double> {
+  static void output(const double &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, double &);
+};
+
+
+
+// Utility for use within MappingTraits<>::mapping() method
+// to [de]normalize an object for use with YAML conversion.
+template <typename TNorm, typename TFinal>
+struct MappingNormalization {
+  MappingNormalization(IO &i_o, TFinal &Obj)
+      : io(i_o), BufPtr(NULL), Result(Obj) {
+    if ( io.outputting() ) {
+      BufPtr = new (&Buffer) TNorm(io, Obj);
+    }
+    else {
+      BufPtr = new (&Buffer) TNorm(io);
+    }
+  }
+
+  ~MappingNormalization() {
+    if ( ! io.outputting() ) {
+      Result = BufPtr->denormalize(io);
+    }
+    BufPtr->~TNorm();
+  }
+
+  TNorm* operator->() { return BufPtr; }
+
+private:
+  typedef llvm::AlignedCharArrayUnion<TNorm> Storage;
+
+  Storage       Buffer;
+  IO           &io;
+  TNorm        *BufPtr;
+  TFinal       &Result;
+};
+
+
+
+// Utility for use within MappingTraits<>::mapping() method
+// to [de]normalize an object for use with YAML conversion.
+template <typename TNorm, typename TFinal>
+struct MappingNormalizationHeap {
+  MappingNormalizationHeap(IO &i_o, TFinal &Obj)
+    : io(i_o), BufPtr(NULL), Result(Obj) {
+    if ( io.outputting() ) {
+      BufPtr = new (&Buffer) TNorm(io, Obj);
+    }
+    else {
+      BufPtr = new TNorm(io);
+    }
+  }
+
+  ~MappingNormalizationHeap() {
+    if ( io.outputting() ) {
+      BufPtr->~TNorm();
+    }
+    else {
+      Result = BufPtr->denormalize(io);
+    }
+  }
+
+  TNorm* operator->() { return BufPtr; }
+
+private:
+  typedef llvm::AlignedCharArrayUnion<TNorm> Storage;
+
+  Storage       Buffer;
+  IO           &io;
+  TNorm        *BufPtr;
+  TFinal       &Result;
+};
+
+
+
+///
+/// The Input class is used to parse a yaml document into in-memory structs
+/// and vectors.
+///
+/// It works by using YAMLParser to do a syntax parse of the entire yaml
+/// document, then the Input class builds a graph of HNodes which wraps
+/// each yaml Node.  The extra layer is buffering.  The low level yaml
+/// parser only lets you look at each node once.  The buffering layer lets
+/// you search and interate multiple times.  This is necessary because
+/// the mapRequired() method calls may not be in the same order
+/// as the keys in the document.
+///
+class Input : public IO {
+public:
+  // Construct a yaml Input object from a StringRef and optional user-data.
+  Input(StringRef InputContent, void *Ctxt=NULL);
+  ~Input();
+  
+  // Check if there was an syntax or semantic error during parsing.
+  llvm::error_code error();
+
+  // To set alternate error reporting.
+  void setDiagHandler(llvm::SourceMgr::DiagHandlerTy Handler, void *Ctxt = 0);
+
+private:
+  virtual bool outputting();
+  virtual void beginMapping();
+  virtual void endMapping();
+  virtual bool preflightKey(const char *, bool, bool, bool &, void *&);
+  virtual void postflightKey(void *);
+  virtual unsigned beginSequence();
+  virtual void endSequence();
+  virtual bool preflightElement(unsigned index, void *&);
+  virtual void postflightElement(void *);
+  virtual unsigned beginFlowSequence();
+  virtual bool preflightFlowElement(unsigned , void *&);
+  virtual void postflightFlowElement(void *);
+  virtual void endFlowSequence();
+  virtual void beginEnumScalar();
+  virtual bool matchEnumScalar(const char*, bool);
+  virtual void endEnumScalar();
+  virtual bool beginBitSetScalar(bool &);
+  virtual bool bitSetMatch(const char *, bool );
+  virtual void endBitSetScalar();
+  virtual void scalarString(StringRef &);
+  virtual void setError(const Twine &message);
+
+  class HNode {
+  public:
+    HNode(Node *n) : _node(n) { }
+    virtual ~HNode() { }
+    static inline bool classof(const HNode *) { return true; }
+
+    Node *_node;
+  };
+
+  class EmptyHNode : public HNode {
+  public:
+    EmptyHNode(Node *n) : HNode(n) { }
+    virtual ~EmptyHNode() {}
+    static inline bool classof(const HNode *n) {
+      return NullNode::classof(n->_node);
+    }
+    static inline bool classof(const EmptyHNode *) { return true; }
+  };
+
+  class ScalarHNode : public HNode {
+  public:
+    ScalarHNode(Node *n, StringRef s) : HNode(n), _value(s) { }
+    virtual ~ScalarHNode() { }
+
+    StringRef value() const { return _value; }
+
+    static inline bool classof(const HNode *n) {
+      return ScalarNode::classof(n->_node);
+    }
+    static inline bool classof(const ScalarHNode *) { return true; }
+  protected:
+    StringRef _value;
+  };
+
+  class MapHNode : public HNode {
+  public:
+    MapHNode(Node *n) : HNode(n) { }
+    virtual ~MapHNode();
+
+    static inline bool classof(const HNode *n) {
+      return MappingNode::classof(n->_node);
+    }
+    static inline bool classof(const MapHNode *) { return true; }
+
+    struct StrMappingInfo {
+      static StringRef getEmptyKey() { return StringRef(); }
+      static StringRef getTombstoneKey() { return StringRef(" ", 0); }
+      static unsigned getHashValue(StringRef const val) {
+                                                return llvm::HashString(val); }
+      static bool isEqual(StringRef const lhs,
+                          StringRef const rhs) { return lhs.equals(rhs); }
+    };
+    typedef llvm::DenseMap<StringRef, HNode*, StrMappingInfo> NameToNode;
+
+    bool isValidKey(StringRef key);
+
+    NameToNode                        Mapping;
+    llvm::SmallVector<const char*, 6> ValidKeys;
+  };
+
+  class SequenceHNode : public HNode {
+  public:
+    SequenceHNode(Node *n) : HNode(n) { }
+    virtual ~SequenceHNode();
+
+    static inline bool classof(const HNode *n) {
+      return SequenceNode::classof(n->_node);
+    }
+    static inline bool classof(const SequenceHNode *) { return true; }
+
+    std::vector<HNode*> Entries;
+  };
+
+  Input::HNode *createHNodes(Node *node);
+  void setError(HNode *hnode, const Twine &message);
+  void setError(Node *node, const Twine &message);
+
+
+public:
+  // These are only used by operator>>. They could be private
+  // if those templated things could be made friends.
+  bool setCurrentDocument();
+  void nextDocument();
+
+private:
+  llvm::SourceMgr                  SrcMgr; // must be before Strm
+  OwningPtr<llvm::yaml::Stream>    Strm;
+  OwningPtr<HNode>                 TopNode;
+  llvm::error_code                 EC;
+  llvm::BumpPtrAllocator           StringAllocator;
+  llvm::yaml::document_iterator    DocIterator;
+  std::vector<bool>                BitValuesUsed;
+  HNode                           *CurrentNode;
+  bool                             ScalarMatchFound;
+};
+
+
+
+
+///
+/// The Output class is used to generate a yaml document from in-memory structs
+/// and vectors.
+///
+class Output : public IO {
+public:
+  Output(llvm::raw_ostream &, void *Ctxt=NULL);
+  virtual ~Output();
+
+  virtual bool outputting();
+  virtual void beginMapping();
+  virtual void endMapping();
+  virtual bool preflightKey(const char *key, bool, bool, bool &, void *&);
+  virtual void postflightKey(void *);
+  virtual unsigned beginSequence();
+  virtual void endSequence();
+  virtual bool preflightElement(unsigned, void *&);
+  virtual void postflightElement(void *);
+  virtual unsigned beginFlowSequence();
+  virtual bool preflightFlowElement(unsigned, void *&);
+  virtual void postflightFlowElement(void *);
+  virtual void endFlowSequence();
+  virtual void beginEnumScalar();
+  virtual bool matchEnumScalar(const char*, bool);
+  virtual void endEnumScalar();
+  virtual bool beginBitSetScalar(bool &);
+  virtual bool bitSetMatch(const char *, bool );
+  virtual void endBitSetScalar();
+  virtual void scalarString(StringRef &);
+  virtual void setError(const Twine &message);
+
+public:
+  // These are only used by operator<<. They could be private
+  // if that templated operator could be made a friend.
+  void beginDocuments();
+  bool preflightDocument(unsigned);
+  void postflightDocument();
+  void endDocuments();
+
+private:
+  void output(StringRef s);
+  void outputUpToEndOfLine(StringRef s);
+  void newLineCheck();
+  void outputNewLine();
+  void paddedKey(StringRef key);
+
+  enum InState { inSeq, inFlowSeq, inMapFirstKey, inMapOtherKey };
+
+  llvm::raw_ostream       &Out;
+  SmallVector<InState, 8>  StateStack;
+  int                      Column;
+  int                      ColumnAtFlowStart;
+  bool                     NeedBitValueComma;
+  bool                     NeedFlowSequenceComma;
+  bool                     EnumerationMatchFound;
+  bool                     NeedsNewLine;
+};
+
+
+
+
+/// YAML I/O does conversion based on types. But often native data types
+/// are just a typedef of built in intergral types (e.g. int).  But the C++
+/// type matching system sees through the typedef and all the typedefed types
+/// look like a built in type. This will cause the generic YAML I/O conversion
+/// to be used. To provide better control over the YAML conversion, you can
+/// use this macro instead of typedef.  It will create a class with one field
+/// and automatic conversion operators to and from the base type.
+/// Based on BOOST_STRONG_TYPEDEF
+#define LLVM_YAML_STRONG_TYPEDEF(_base, _type)                                 \
+    struct _type {                                                             \
+        _type() { }                                                            \
+        _type(const _base v) : value(v) { }                                    \
+        _type(const _type &v) : value(v.value) {}                              \
+        _type &operator=(const _type &rhs) { value = rhs.value; return *this; }\
+        _type &operator=(const _base &rhs) { value = rhs; return *this; }      \
+        operator const _base & () const { return value; }                      \
+        bool operator==(const _type &rhs) const { return value == rhs.value; } \
+        bool operator==(const _base &rhs) const { return value == rhs; }       \
+        bool operator<(const _type &rhs) const { return value < rhs.value; }   \
+        _base value;                                                           \
+    };
+
+
+
+///
+/// Use these types instead of uintXX_t in any mapping to have
+/// its yaml output formatted as hexadecimal.
+///
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, Hex8)
+LLVM_YAML_STRONG_TYPEDEF(uint16_t, Hex16)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, Hex32)
+LLVM_YAML_STRONG_TYPEDEF(uint64_t, Hex64)
+
+
+template<>
+struct ScalarTraits<Hex8> {
+  static void output(const Hex8 &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, Hex8 &);
+};
+
+template<>
+struct ScalarTraits<Hex16> {
+  static void output(const Hex16 &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, Hex16 &);
+};
+
+template<>
+struct ScalarTraits<Hex32> {
+  static void output(const Hex32 &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, Hex32 &);
+};
+
+template<>
+struct ScalarTraits<Hex64> {
+  static void output(const Hex64 &, void*, llvm::raw_ostream &);
+  static StringRef input(StringRef, void*, Hex64 &);
+};
+
+
+// Define non-member operator>> so that Input can stream in a document list.
+template <typename T>
+inline
+typename llvm::enable_if_c<has_DocumentListTraits<T>::value,Input &>::type
+operator>>(Input &yin, T &docList) {
+  int i = 0;
+  while ( yin.setCurrentDocument() ) {
+    yamlize(yin, DocumentListTraits<T>::element(yin, docList, i), true);
+    if ( yin.error() )
+      return yin;
+    yin.nextDocument();
+    ++i;
+  }
+  return yin;
+}
+
+// Define non-member operator>> so that Input can stream in a map as a document.
+template <typename T>
+inline
+typename llvm::enable_if_c<has_MappingTraits<T>::value,Input &>::type
+operator>>(Input &yin, T &docMap) {
+  yin.setCurrentDocument();
+  yamlize(yin, docMap, true);
+  return yin;
+}
+
+// Define non-member operator>> so that Input can stream in a sequence as
+// a document.
+template <typename T>
+inline
+typename llvm::enable_if_c<has_SequenceTraits<T>::value,Input &>::type
+operator>>(Input &yin, T &docSeq) {
+  yin.setCurrentDocument();
+  yamlize(yin, docSeq, true);
+  return yin;
+}
+
+// Provide better error message about types missing a trait specialization
+template <typename T>
+inline
+typename llvm::enable_if_c<missingTraits<T>::value,Input &>::type
+operator>>(Input &yin, T &docSeq) {
+  char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
+  return yin;
+}
+
+
+// Define non-member operator<< so that Output can stream out document list.
+template <typename T>
+inline
+typename llvm::enable_if_c<has_DocumentListTraits<T>::value,Output &>::type
+operator<<(Output &yout, T &docList) {
+  yout.beginDocuments();
+  const size_t count = DocumentListTraits<T>::size(yout, docList);
+  for(size_t i=0; i < count; ++i) {
+    if ( yout.preflightDocument(i) ) {
+      yamlize(yout, DocumentListTraits<T>::element(yout, docList, i), true);
+      yout.postflightDocument();
+    }
+  }
+  yout.endDocuments();
+  return yout;
+}
+
+// Define non-member operator<< so that Output can stream out a map.
+template <typename T>
+inline
+typename llvm::enable_if_c<has_MappingTraits<T>::value,Output &>::type
+operator<<(Output &yout, T &map) {
+  yout.beginDocuments();
+  if ( yout.preflightDocument(0) ) {
+    yamlize(yout, map, true);
+    yout.postflightDocument();
+  }
+  yout.endDocuments();
+  return yout;
+}
+
+// Define non-member operator<< so that Output can stream out a sequence.
+template <typename T>
+inline
+typename llvm::enable_if_c<has_SequenceTraits<T>::value,Output &>::type
+operator<<(Output &yout, T &seq) {
+  yout.beginDocuments();
+  if ( yout.preflightDocument(0) ) {
+    yamlize(yout, seq, true);
+    yout.postflightDocument();
+  }
+  yout.endDocuments();
+  return yout;
+}
+
+// Provide better error message about types missing a trait specialization
+template <typename T>
+inline
+typename llvm::enable_if_c<missingTraits<T>::value,Output &>::type
+operator<<(Output &yout, T &seq) {
+  char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
+  return yout;
+}
+
+
+} // namespace yaml
+} // namespace llvm
+
+
+/// Utility for declaring that a std::vector of a particular type
+/// should be considered a YAML sequence.
+#define LLVM_YAML_IS_SEQUENCE_VECTOR(_type)                                 \
+  namespace llvm {                                                          \
+  namespace yaml {                                                          \
+    template<>                                                              \
+    struct SequenceTraits< std::vector<_type> > {                           \
+      static size_t size(IO &io, std::vector<_type> &seq) {                 \
+        return seq.size();                                                  \
+      }                                                                     \
+      static _type& element(IO &io, std::vector<_type> &seq, size_t index) {\
+        if ( index >= seq.size() )                                          \
+          seq.resize(index+1);                                              \
+        return seq[index];                                                  \
+      }                                                                     \
+    };                                                                      \
+  }                                                                         \
+  }
+
+/// Utility for declaring that a std::vector of a particular type
+/// should be considered a YAML flow sequence.
+#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(_type)                            \
+  namespace llvm {                                                          \
+  namespace yaml {                                                          \
+    template<>                                                              \
+    struct SequenceTraits< std::vector<_type> > {                           \
+      static size_t size(IO &io, std::vector<_type> &seq) {                 \
+        return seq.size();                                                  \
+      }                                                                     \
+      static _type& element(IO &io, std::vector<_type> &seq, size_t index) {\
+        if ( index >= seq.size() )                                          \
+          seq.resize(index+1);                                              \
+        return seq[index];                                                  \
+      }                                                                     \
+      static const bool flow = true;                                        \
+    };                                                                      \
+  }                                                                         \
+  }
+
+/// Utility for declaring that a std::vector of a particular type
+/// should be considered a YAML document list.
+#define LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(_type)                            \
+  namespace llvm {                                                          \
+  namespace yaml {                                                          \
+    template<>                                                              \
+    struct DocumentListTraits< std::vector<_type> > {                       \
+      static size_t size(IO &io, std::vector<_type> &seq) {                 \
+        return seq.size();                                                  \
+      }                                                                     \
+      static _type& element(IO &io, std::vector<_type> &seq, size_t index) {\
+        if ( index >= seq.size() )                                          \
+          seq.resize(index+1);                                              \
+        return seq[index];                                                  \
+      }                                                                     \
+    };                                                                      \
+  }                                                                         \
+  }
+
+
+
+#endif // LLVM_SUPPORT_YAMLTRAITS_H
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index 2823af33b746..900030685725 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -71,7 +71,7 @@ namespace llvm
 
     /// flushBuffer - Dump the contents of the buffer to Stream.
     ///
-    void flushBuffer(void) {
+    void flushBuffer() {
       if (Filled)
         // Write the older portion of the buffer.
         TheStream->write(Cur, BufferArray + BufferSize - Cur);
@@ -151,7 +151,7 @@ namespace llvm
     /// flushBufferWithBanner - Force output of the buffer along with
     /// a small header.
     ///
-    void flushBufferWithBanner(void);
+    void flushBufferWithBanner();
 
   private:
     /// releaseStream - Delete the held stream if needed. Otherwise,
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index eab0f2d8057e..d2b4a2af278a 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -29,7 +29,6 @@ namespace llvm {
 /// a chunk at a time.
 class raw_ostream {
 private:
-  // Do not implement. raw_ostream is noncopyable.
   void operator=(const raw_ostream &) LLVM_DELETED_FUNCTION;
   raw_ostream(const raw_ostream &) LLVM_DELETED_FUNCTION;
 
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
index 0d164f688d37..43dace6ab816 100644
--- a/include/llvm/Support/system_error.h
+++ b/include/llvm/Support/system_error.h
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SYSTEM_SYSTEM_ERROR_H
-#define LLVM_SYSTEM_SYSTEM_ERROR_H
+#ifndef LLVM_SUPPORT_SYSTEM_ERROR_H
+#define LLVM_SUPPORT_SYSTEM_ERROR_H
 
 #include "llvm/Support/Compiler.h"
 
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index f9306395fce3..906e97c91fb6 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -145,6 +145,10 @@ template <typename T> struct is_pointer<T* const> : true_type {};
 template <typename T> struct is_pointer<T* volatile> : true_type {};
 template <typename T> struct is_pointer<T* const volatile> : true_type {};
 
+/// \brief Metafunction that determines wheather the given type is a reference.
+template <typename T> struct is_reference : false_type {};
+template <typename T> struct is_reference<T&> : true_type {};
+
 /// \brief Metafunction that determines whether the given type is either an
 /// integral type or an enumeration type.
 ///
@@ -205,6 +209,26 @@ template <typename T> struct remove_pointer<T*volatile> { typedef T type; };
 template <typename T> struct remove_pointer<T*const volatile> {
     typedef T type; };
 
+// If T is a pointer, just return it. If it is not, return T&.
+template<typename T, typename Enable = void>
+struct add_lvalue_reference_if_not_pointer { typedef T &type; };
+
+template<typename T>
+struct add_lvalue_reference_if_not_pointer<T,
+                                     typename enable_if<is_pointer<T> >::type> {
+  typedef T type;
+};
+
+// If T is a pointer to X, return a pointer to const X. If it is not, return
+// const T.
+template<typename T, typename Enable = void>
+struct add_const_past_pointer { typedef const T type; };
+
+template<typename T>
+struct add_const_past_pointer<T, typename enable_if<is_pointer<T> >::type> {
+  typedef const typename remove_pointer<T>::type *type;
+};
+
 template <bool, typename T, typename F>
 struct conditional { typedef T type; };
 
diff --git a/include/llvm/SymbolTableListTraits.h b/include/llvm/SymbolTableListTraits.h
deleted file mode 100644
index ec5c88f5c8a7..000000000000
--- a/include/llvm/SymbolTableListTraits.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- llvm/SymbolTableListTraits.h - Traits for iplist --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a generic class that is used to implement the automatic
-// symbol table manipulation that occurs when you put (for example) a named
-// instruction into a basic block.
-//
-// The way that this is implemented is by using a special traits class with the
-// intrusive list that makes up the list of instructions in a basic block.  When
-// a new element is added to the list of instructions, the traits class is
-// notified, allowing the symbol table to be updated.
-//
-// This generic class implements the traits class.  It must be generic so that
-// it can work for all uses it, which include lists of instructions, basic
-// blocks, arguments, functions, global variables, etc...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYMBOLTABLELISTTRAITS_H
-#define LLVM_SYMBOLTABLELISTTRAITS_H
-
-#include "llvm/ADT/ilist.h"
-
-namespace llvm {
-class ValueSymbolTable;
-  
-template<typename NodeTy> class ilist_iterator;
-template<typename NodeTy, typename Traits> class iplist;
-template<typename Ty> struct ilist_traits;
-
-// ValueSubClass   - The type of objects that I hold, e.g. Instruction.
-// ItemParentClass - The type of object that owns the list, e.g. BasicBlock.
-//
-template<typename ValueSubClass, typename ItemParentClass>
-class SymbolTableListTraits : public ilist_default_traits<ValueSubClass> {
-  typedef ilist_traits<ValueSubClass> TraitsClass;
-public:
-  SymbolTableListTraits() {}
-
-  /// getListOwner - Return the object that owns this list.  If this is a list
-  /// of instructions, it returns the BasicBlock that owns them.
-  ItemParentClass *getListOwner() {
-    size_t Offset(size_t(&((ItemParentClass*)0->*ItemParentClass::
-                           getSublistAccess(static_cast<ValueSubClass*>(0)))));
-    iplist<ValueSubClass>* Anchor(static_cast<iplist<ValueSubClass>*>(this));
-    return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)-
-                                              Offset);
-  }
-
-  static iplist<ValueSubClass> &getList(ItemParentClass *Par) {
-    return Par->*(Par->getSublistAccess((ValueSubClass*)0));
-  }
-
-  static ValueSymbolTable *getSymTab(ItemParentClass *Par) {
-    return Par ? toPtr(Par->getValueSymbolTable()) : 0;
-  }
-
-  void addNodeToList(ValueSubClass *V);
-  void removeNodeFromList(ValueSubClass *V);
-  void transferNodesFromList(ilist_traits<ValueSubClass> &L2,
-                             ilist_iterator<ValueSubClass> first,
-                             ilist_iterator<ValueSubClass> last);
-//private:
-  template<typename TPtr>
-  void setSymTabObject(TPtr *, TPtr);
-  static ValueSymbolTable *toPtr(ValueSymbolTable *P) { return P; }
-  static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index 2f6b7e625c3d..2d0a2b45a96a 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -32,6 +32,7 @@ LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
                                              const std::string &Msg);
 
 extern SourceMgr SrcMgr;
+extern unsigned ErrorsPrinted;
 
 
 } // end namespace "llvm"
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index 319298c13253..76ee69dd8dbd 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -19,9 +19,9 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 
@@ -128,16 +128,8 @@ public:   // These methods should only be called from subclasses of Init
     return convertValue((TypedInit*)FI);
   }
 
-public:   // These methods should only be called by subclasses of RecTy.
-  // baseClassOf - These virtual methods should be overloaded to return true iff
-  // all values of type 'RHS' can be converted to the 'this' type.
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+public:
+  virtual bool baseClassOf(const RecTy*) const;
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
@@ -179,19 +171,11 @@ public:
   virtual bool typeIsConvertibleTo(const RecTy *RHS) const {
     return RHS->baseClassOf(this);
   }
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return true; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const;
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
-
+  virtual bool baseClassOf(const RecTy*) const;
 };
 
 
-// BitsRecTy - 'bits<n>' - Represent a fixed number of bits
-/// BitsRecTy - 'bits&lt;n&gt;' - Represent a fixed number of bits
+/// BitsRecTy - 'bits<n>' - Represent a fixed number of bits
 ///
 class BitsRecTy : public RecTy {
   unsigned Size;
@@ -226,16 +210,7 @@ public:
   virtual bool typeIsConvertibleTo(const RecTy *RHS) const {
     return RHS->baseClassOf(this);
   }
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return Size == 1; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const {
-    return RHS->Size == Size;
-  }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
-
+  virtual bool baseClassOf(const RecTy*) const;
 };
 
 
@@ -273,14 +248,7 @@ public:
     return RHS->baseClassOf(this);
   }
 
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return true; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return true; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
-
+  virtual bool baseClassOf(const RecTy*) const;
 };
 
 /// StringRecTy - 'string' - Represent an string value
@@ -317,20 +285,10 @@ public:
   virtual bool typeIsConvertibleTo(const RecTy *RHS) const {
     return RHS->baseClassOf(this);
   }
-
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return true; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 };
 
-// ListRecTy - 'list<Ty>' - Represent a list of values, all of which must be of
-// the specified type.
-/// ListRecTy - 'list&lt;Ty&gt;' - Represent a list of values, all of which must
-/// be of the specified type.
+/// ListRecTy - 'list<Ty>' - Represent a list of values, all of which must be of
+/// the specified type.
 ///
 class ListRecTy : public RecTy {
   RecTy *Ty;
@@ -366,15 +324,7 @@ public:
     return RHS->baseClassOf(this);
   }
 
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const {
-    return RHS->getElementType()->typeIsConvertibleTo(Ty);
-  }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const RecTy*) const;
 };
 
 /// DagRecTy - 'dag' - Represent a dag fragment
@@ -410,14 +360,6 @@ public:
   virtual bool typeIsConvertibleTo(const RecTy *RHS) const {
     return RHS->baseClassOf(this);
   }
-
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return true; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 };
 
 
@@ -458,13 +400,7 @@ public:
   virtual bool typeIsConvertibleTo(const RecTy *RHS) const {
     return RHS->baseClassOf(this);
   }
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const;
+  virtual bool baseClassOf(const RecTy*) const;
 };
 
 /// resolveTypes - Find a common type that T1 and T2 convert to.
@@ -991,7 +927,7 @@ public:
 ///
 class BinOpInit : public OpInit {
 public:
-  enum BinaryOp { SHL, SRA, SRL, STRCONCAT, CONCAT, EQ };
+  enum BinaryOp { ADD, SHL, SRA, SRL, STRCONCAT, CONCAT, EQ };
 private:
   BinaryOp Opc;
   Init *LHS, *RHS;
@@ -1448,12 +1384,14 @@ class Record {
   SmallVector<SMLoc, 4> Locs;
   std::vector<Init *> TemplateArgs;
   std::vector<RecordVal> Values;
-  std::vector<Record*> SuperClasses;
+  std::vector<Record *> SuperClasses;
+  std::vector<SMRange> SuperClassRanges;
 
   // Tracks Record instances. Not owned by Record.
   RecordKeeper &TrackedRecords;
 
   DefInit *TheInit;
+  bool IsAnonymous;
 
   void init();
   void checkName();
@@ -1462,14 +1400,15 @@ public:
 
   // Constructs a record.
   explicit Record(const std::string &N, ArrayRef<SMLoc> locs,
-                  RecordKeeper &records) :
+                  RecordKeeper &records, bool Anonymous = false) :
     ID(LastID++), Name(StringInit::get(N)), Locs(locs.begin(), locs.end()),
-    TrackedRecords(records), TheInit(0) {
+    TrackedRecords(records), TheInit(0), IsAnonymous(Anonymous) {
     init();
   }
-  explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records) :
+  explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
+                  bool Anonymous = false) :
     ID(LastID++), Name(N), Locs(locs.begin(), locs.end()),
-    TrackedRecords(records), TheInit(0) {
+    TrackedRecords(records), TheInit(0), IsAnonymous(Anonymous) {
     init();
   }
 
@@ -1478,7 +1417,8 @@ public:
   Record(const Record &O) :
     ID(LastID++), Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
     Values(O.Values), SuperClasses(O.SuperClasses),
-    TrackedRecords(O.TrackedRecords), TheInit(O.TheInit) { }
+    SuperClassRanges(O.SuperClassRanges), TrackedRecords(O.TrackedRecords),
+    TheInit(O.TheInit), IsAnonymous(O.IsAnonymous) { }
 
   ~Record() {}
 
@@ -1509,6 +1449,7 @@ public:
   }
   const std::vector<RecordVal> &getValues() const { return Values; }
   const std::vector<Record*>   &getSuperClasses() const { return SuperClasses; }
+  ArrayRef<SMRange> getSuperClassRanges() const { return SuperClassRanges; }
 
   bool isTemplateArg(Init *Name) const {
     for (unsigned i = 0, e = TemplateArgs.size(); i != e; ++i)
@@ -1583,9 +1524,10 @@ public:
     return false;
   }
 
-  void addSuperClass(Record *R) {
+  void addSuperClass(Record *R, SMRange Range) {
     assert(!isSubClassOf(R) && "Already subclassing record!");
     SuperClasses.push_back(R);
+    SuperClassRanges.push_back(Range);
   }
 
   /// resolveReferences - If there are any field references that refer to fields
@@ -1602,6 +1544,10 @@ public:
     return TrackedRecords;
   }
 
+  bool isAnonymous() const {
+    return IsAnonymous;
+  }
+
   void dump() const;
 
   //===--------------------------------------------------------------------===//
@@ -1613,6 +1559,11 @@ public:
   ///
   Init *getValueInit(StringRef FieldName) const;
 
+  /// Return true if the named field is unset.
+  bool isValueUnset(StringRef FieldName) const {
+    return getValueInit(FieldName) == UnsetInit::get();
+  }
+
   /// getValueAsString - This method looks up the specified field and returns
   /// its value as a string, throwing an exception if the field does not exist
   /// or if the value is not a string.
diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h
index 1dadc76200b0..99cbcadd7693 100644
--- a/include/llvm/TableGen/StringMatcher.h
+++ b/include/llvm/TableGen/StringMatcher.h
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef STRINGMATCHER_H
-#define STRINGMATCHER_H
+#ifndef LLVM_TABLEGEN_STRINGMATCHER_H
+#define LLVM_TABLEGEN_STRINGMATCHER_H
 
-#include <vector>
+#include "llvm/ADT/StringRef.h"
 #include <string>
 #include <utility>
-#include "llvm/ADT/StringRef.h"
+#include <vector>
 
 namespace llvm {
   class raw_ostream;
diff --git a/include/llvm/Target/CostTable.h b/include/llvm/Target/CostTable.h
new file mode 100644
index 000000000000..a974b5609456
--- /dev/null
+++ b/include/llvm/Target/CostTable.h
@@ -0,0 +1,64 @@
+//===-- CostTable.h - Instruction Cost Table handling -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Cost tables and simple lookup functions
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_COSTTABLE_H_
+#define LLVM_TARGET_COSTTABLE_H_
+
+namespace llvm {
+
+/// Cost Table Entry
+template <class TypeTy>
+struct CostTblEntry {
+  int ISD;
+  TypeTy Type;
+  unsigned Cost;
+};
+
+/// Find in cost table, TypeTy must be comparable by ==
+template <class TypeTy>
+int CostTableLookup(const CostTblEntry<TypeTy> *Tbl,
+                    unsigned len, int ISD, TypeTy Ty) {
+  for (unsigned int i = 0; i < len; ++i)
+    if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
+      return i;
+
+  // Could not find an entry.
+  return -1;
+}
+
+/// Type Conversion Cost Table
+template <class TypeTy>
+struct TypeConversionCostTblEntry {
+  int ISD;
+  TypeTy Dst;
+  TypeTy Src;
+  unsigned Cost;
+};
+
+/// Find in type conversion cost table, TypeTy must be comparable by ==
+template <class TypeTy>
+int ConvertCostTableLookup(const TypeConversionCostTblEntry<TypeTy> *Tbl,
+                           unsigned len, int ISD, TypeTy Dst, TypeTy Src) {
+  for (unsigned int i = 0; i < len; ++i)
+    if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
+      return i;
+
+  // Could not find an entry.
+  return -1;
+}
+
+} // namespace llvm
+
+
+#endif /* LLVM_TARGET_COSTTABLE_H_ */
diff --git a/include/llvm/Target/Mangler.h b/include/llvm/Target/Mangler.h
index a50f54a436e9..9500f1cc8f48 100644
--- a/include/llvm/Target/Mangler.h
+++ b/include/llvm/Target/Mangler.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_MANGLER_H
-#define LLVM_SUPPORT_MANGLER_H
+#ifndef LLVM_TARGET_MANGLER_H
+#define LLVM_TARGET_MANGLER_H
 
 #include "llvm/ADT/DenseMap.h"
 
@@ -69,4 +69,4 @@ public:
 
 } // End llvm namespace
 
-#endif // LLVM_SUPPORT_MANGLER_H
+#endif // LLVM_TARGET_MANGLER_H
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 12f5c0eb306a..deee2eb6debf 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 // Include all information about LLVM intrinsics.
-include "llvm/Intrinsics.td"
+include "llvm/IR/Intrinsics.td"
 
 //===----------------------------------------------------------------------===//
 // Register file description - These classes are used to fill in the target
@@ -367,8 +367,9 @@ class Instruction {
   //  hasSideEffects - The instruction has side effects that are not
   //    captured by any operands of the instruction or other flags.
   //
-  //  neverHasSideEffects - Set on an instruction with no pattern if it has no
-  //    side effects.
+  //  neverHasSideEffects (deprecated) - Set on an instruction with no pattern
+  //    if it has no side effects. This is now equivalent to setting
+  //    "hasSideEffects = 0".
   bit hasSideEffects = ?;
   bit neverHasSideEffects = 0;
 
@@ -396,6 +397,9 @@ class Instruction {
 
   InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling.
 
+  // Scheduling information from TargetSchedule.td.
+  list<SchedReadWrite> SchedRW;
+
   string Constraints = "";  // OperandConstraint, e.g. $src = $dst.
 
   /// DisableEncoding - List of operand names (e.g. "$op1,$op2") that should not
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index d56db7b5118e..d5f30f40addb 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -15,7 +15,6 @@
 #define LLVM_TARGET_TARGETFRAMELOWERING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-
 #include <utility>
 #include <vector>
 
@@ -48,11 +47,12 @@ private:
   unsigned StackAlignment;
   unsigned TransientStackAlignment;
   int LocalAreaOffset;
+  bool StackRealignable;
 public:
   TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
-                      unsigned TransAl = 1)
+                      unsigned TransAl = 1, bool StackReal = true)
     : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
-      LocalAreaOffset(LAO) {}
+      LocalAreaOffset(LAO), StackRealignable(StackReal) {}
 
   virtual ~TargetFrameLowering();
 
@@ -77,6 +77,12 @@ public:
     return TransientStackAlignment;
   }
 
+  /// isStackRealignable - This method returns whether the stack can be
+  /// realigned.
+  bool isStackRealignable() const {
+    return StackRealignable;
+  }
+
   /// getOffsetOfLocalArea - This method returns the offset of the local area
   /// from the stack pointer on entrance to a function.
   ///
@@ -114,6 +120,10 @@ public:
   /// by adding a check even before the "normal" function prologue.
   virtual void adjustForSegmentedStacks(MachineFunction &MF) const { }
 
+  /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in
+  /// the assembly prologue to explicitly handle the stack.
+  virtual void adjustForHiPEPrologue(MachineFunction &MF) const { }
+
   /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
   /// saved registers and returns true if it isn't possible / profitable to do
   /// so by issuing a series of store instructions via
@@ -184,7 +194,23 @@ public:
   /// finalized.  Once the frame is finalized, MO_FrameIndex operands are
   /// replaced with direct constants.  This method is optional.
   ///
-  virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                               RegScavenger *RS = NULL) const {
+  }
+
+  /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
+  /// code insertion to eliminate call frame setup and destroy pseudo
+  /// instructions (but only if the Target is using them).  It is responsible
+  /// for eliminating these instructions, replacing them with concrete
+  /// instructions.  This method need only be implemented if using call frame
+  /// setup/destroy pseudo instructions.
+  ///
+  virtual void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const {
+    llvm_unreachable("Call Frame Pseudo Instructions do not exist on this "
+                     "target!");
   }
 };
 
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 4570813ba6c2..0ba75e5d7ca0 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -15,9 +15,9 @@
 #define LLVM_TARGET_TARGETINSTRINFO_H
 
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
 
@@ -143,9 +143,7 @@ public:
   /// missed.
   virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
                                     const MachineMemOperand *&MMO,
-                                    int &FrameIndex) const {
-    return 0;
-  }
+                                    int &FrameIndex) const;
 
   /// isStoreToStackSlot - If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
@@ -173,9 +171,7 @@ public:
   /// stack.  This is just a hint, as some cases may be missed.
   virtual bool hasStoreToStackSlot(const MachineInstr *MI,
                                    const MachineMemOperand *&MMO,
-                                   int &FrameIndex) const {
-    return 0;
-  }
+                                   int &FrameIndex) const;
 
   /// reMaterialize - Re-issue the specified 'original' instruction at the
   /// specific location targeting a new destination register.
@@ -186,7 +182,7 @@ public:
                              MachineBasicBlock::iterator MI,
                              unsigned DestReg, unsigned SubIdx,
                              const MachineInstr *Orig,
-                             const TargetRegisterInfo &TRI) const = 0;
+                             const TargetRegisterInfo &TRI) const;
 
   /// duplicate - Create a duplicate of the Orig instruction in MF. This is like
   /// MachineFunction::CloneMachineInstr(), but the target may update operands
@@ -194,7 +190,7 @@ public:
   ///
   /// The instruction must be duplicable as indicated by isNotDuplicable().
   virtual MachineInstr *duplicate(MachineInstr *Orig,
-                                  MachineFunction &MF) const = 0;
+                                  MachineFunction &MF) const;
 
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
@@ -221,13 +217,13 @@ public:
   /// method for a non-commutable instruction, but there may be some cases
   /// where this method fails and returns null.
   virtual MachineInstr *commuteInstruction(MachineInstr *MI,
-                                           bool NewMI = false) const = 0;
+                                           bool NewMI = false) const;
 
   /// findCommutedOpIndices - If specified MI is commutable, return the two
   /// operand indices that would swap value. Return false if the instruction
   /// is not in a form which this routine understands.
   virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
-                                     unsigned &SrcOpIdx2) const = 0;
+                                     unsigned &SrcOpIdx2) const;
 
   /// produceSameValue - Return true if two machine instructions would produce
   /// identical values. By default, this is only true when the two instructions
@@ -236,7 +232,7 @@ public:
   /// aggressive checks.
   virtual bool produceSameValue(const MachineInstr *MI0,
                                 const MachineInstr *MI1,
-                                const MachineRegisterInfo *MRI = 0) const = 0;
+                                const MachineRegisterInfo *MRI = 0) const;
 
   /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -298,7 +294,7 @@ public:
   /// after it, replacing it with an unconditional branch to NewDest. This is
   /// used by the tail merging pass.
   virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
-                                       MachineBasicBlock *NewDest) const = 0;
+                                       MachineBasicBlock *NewDest) const;
 
   /// isLegalToSplitMBBAt - Return true if it's legal to split the given basic
   /// block at the specified instruction (i.e. instruction would be the start
@@ -368,11 +364,10 @@ public:
   /// condition code in Cond.
   ///
   /// When successful, also return the latency in cycles from TrueReg,
-  /// FalseReg, and Cond to the destination register. The Cond latency should
-  /// compensate for a conditional branch being removed. For example, if a
-  /// conditional branch has a 3 cycle latency from the condition code read,
-  /// and a cmov instruction has a 2 cycle latency from the condition code
-  /// read, CondCycles should be returned as -1.
+  /// FalseReg, and Cond to the destination register. In most cases, a select
+  /// instruction will be 1 cycle, so CondCycles = TrueCycles = FalseCycles = 1
+  ///
+  /// Some x86 implementations have 2-cycle cmov instructions.
   ///
   /// @param MBB         Block where select instruction would be inserted.
   /// @param Cond        Condition returned by AnalyzeBranch.
@@ -435,7 +430,7 @@ public:
                              SmallVectorImpl<MachineOperand> &Cond,
                              unsigned &TrueOp, unsigned &FalseOp,
                              bool &Optimizable) const {
-    assert(MI && MI->isSelect() && "MI must be a select instruction");
+    assert(MI && MI->getDesc().isSelect() && "MI must be a select instruction");
     return true;
   }
 
@@ -569,7 +564,7 @@ public:
   /// folding is possible.
   virtual
   bool canFoldMemoryOperand(const MachineInstr *MI,
-                            const SmallVectorImpl<unsigned> &Ops) const =0;
+                            const SmallVectorImpl<unsigned> &Ops) const;
 
   /// unfoldMemoryOperand - Separate a single instruction which folded a load or
   /// a store or a load and a store into two or more instruction. If this is
@@ -621,6 +616,26 @@ public:
     return false;
   }
 
+  /// \brief Get the base register and byte offset of a load/store instr.
+  virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt,
+                                    unsigned &BaseReg, unsigned &Offset,
+                                    const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  virtual bool shouldClusterLoads(MachineInstr *FirstLdSt,
+                                  MachineInstr *SecondLdSt,
+                                  unsigned NumLoads) const {
+    return false;
+  }
+
+  /// \brief Can this target fuse the given instructions if they are scheduled
+  /// adjacent.
+  virtual bool shouldScheduleAdjacent(MachineInstr* First,
+                                      MachineInstr *Second) const {
+    return false;
+  }
+
   /// ReverseBranchCondition - Reverses the branch condition of the specified
   /// condition list, returning false on success and true if it cannot be
   /// reversed.
@@ -649,13 +664,13 @@ public:
 
   /// isUnpredicatedTerminator - Returns true if the instruction is a
   /// terminator instruction that has not been predicated.
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const = 0;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
 
   /// PredicateInstruction - Convert the instruction into a predicated
   /// instruction. It returns true if the operation was successful.
   virtual
   bool PredicateInstruction(MachineInstr *MI,
-                        const SmallVectorImpl<MachineOperand> &Pred) const = 0;
+                        const SmallVectorImpl<MachineOperand> &Pred) const;
 
   /// SubsumesPredicate - Returns true if the first specified predicate
   /// subsumes the second, e.g. GE subsumes GT.
@@ -691,7 +706,7 @@ public:
   /// terminators.
   virtual bool isSchedulingBoundary(const MachineInstr *MI,
                                     const MachineBasicBlock *MBB,
-                                    const MachineFunction &MF) const = 0;
+                                    const MachineFunction &MF) const;
 
   /// Measure the specified inline asm to determine an approximation of its
   /// length.
@@ -703,21 +718,25 @@ public:
   /// register allocation.
   virtual ScheduleHazardRecognizer*
   CreateTargetHazardRecognizer(const TargetMachine *TM,
-                               const ScheduleDAG *DAG) const = 0;
+                               const ScheduleDAG *DAG) const;
 
   /// CreateTargetMIHazardRecognizer - Allocate and return a hazard recognizer
   /// to use for this target when scheduling the machine instructions before
   /// register allocation.
   virtual ScheduleHazardRecognizer*
   CreateTargetMIHazardRecognizer(const InstrItineraryData*,
-                                 const ScheduleDAG *DAG) const = 0;
+                                 const ScheduleDAG *DAG) const;
 
   /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard
   /// recognizer to use for this target when scheduling the machine instructions
   /// after register allocation.
   virtual ScheduleHazardRecognizer*
   CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
-                                     const ScheduleDAG *DAG) const = 0;
+                                     const ScheduleDAG *DAG) const;
+
+  /// Provide a global flag for disabling the PreRA hazard recognizer that
+  /// targets may choose to honor.
+  bool usePreRAHazardRecognizer() const;
 
   /// analyzeCompare - For a comparison instruction, return the source registers
   /// in SrcReg and SrcReg2 if having two register operands, and the value it
@@ -765,7 +784,7 @@ public:
   /// IssueWidth is the number of microops that can be dispatched each
   /// cycle. An instruction with zero microops takes no dispatch resources.
   virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
-                                  const MachineInstr *MI) const = 0;
+                                  const MachineInstr *MI) const;
 
   /// isZeroCost - Return true for pseudo instructions that don't consume any
   /// machine resources in their current form. These are common cases that the
@@ -777,7 +796,7 @@ public:
 
   virtual int getOperandLatency(const InstrItineraryData *ItinData,
                                 SDNode *DefNode, unsigned DefIdx,
-                                SDNode *UseNode, unsigned UseIdx) const = 0;
+                                SDNode *UseNode, unsigned UseIdx) const;
 
   /// getOperandLatency - Compute and return the use operand latency of a given
   /// pair of def and use.
@@ -790,7 +809,7 @@ public:
   virtual int getOperandLatency(const InstrItineraryData *ItinData,
                                 const MachineInstr *DefMI, unsigned DefIdx,
                                 const MachineInstr *UseMI,
-                                unsigned UseIdx) const = 0;
+                                unsigned UseIdx) const;
 
   /// computeOperandLatency - Compute and return the latency of the given data
   /// dependent def and use when the operand indices are already known.
@@ -806,10 +825,10 @@ public:
   /// PredCost.
   virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
                                    const MachineInstr *MI,
-                                   unsigned *PredCost = 0) const = 0;
+                                   unsigned *PredCost = 0) const;
 
   virtual int getInstrLatency(const InstrItineraryData *ItinData,
-                              SDNode *Node) const = 0;
+                              SDNode *Node) const;
 
   /// Return the default expected latency for a def based on it's opcode.
   unsigned defaultDefLatency(const MCSchedModel *SchedModel,
@@ -839,7 +858,7 @@ public:
   /// if the target considered it 'low'.
   virtual
   bool hasLowDefLatency(const InstrItineraryData *ItinData,
-                        const MachineInstr *DefMI, unsigned DefIdx) const = 0;
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
 
   /// verifyInstruction - Perform target specific instruction verification.
   virtual
@@ -956,84 +975,6 @@ private:
   int CallFrameSetupOpcode, CallFrameDestroyOpcode;
 };
 
-/// TargetInstrInfoImpl - This is the default implementation of
-/// TargetInstrInfo, which just provides a couple of default implementations
-/// for various methods.  This separated out because it is implemented in
-/// libcodegen, not in libtarget.
-class TargetInstrInfoImpl : public TargetInstrInfo {
-protected:
-  TargetInstrInfoImpl(int CallFrameSetupOpcode = -1,
-                      int CallFrameDestroyOpcode = -1)
-    : TargetInstrInfo(CallFrameSetupOpcode, CallFrameDestroyOpcode) {}
-public:
-  virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
-                                       MachineBasicBlock *NewDest) const;
-  virtual MachineInstr *commuteInstruction(MachineInstr *MI,
-                                           bool NewMI = false) const;
-  virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
-                                     unsigned &SrcOpIdx2) const;
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-  virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
-                                    const MachineMemOperand *&MMO,
-                                    int &FrameIndex) const;
-  virtual bool hasStoreToStackSlot(const MachineInstr *MI,
-                                   const MachineMemOperand *&MMO,
-                                   int &FrameIndex) const;
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
-  virtual bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const;
-  virtual void reMaterialize(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator MI,
-                             unsigned DestReg, unsigned SubReg,
-                             const MachineInstr *Orig,
-                             const TargetRegisterInfo &TRI) const;
-  virtual MachineInstr *duplicate(MachineInstr *Orig,
-                                  MachineFunction &MF) const;
-  virtual bool produceSameValue(const MachineInstr *MI0,
-                                const MachineInstr *MI1,
-                                const MachineRegisterInfo *MRI) const;
-  virtual bool isSchedulingBoundary(const MachineInstr *MI,
-                                    const MachineBasicBlock *MBB,
-                                    const MachineFunction &MF) const;
-
-  virtual int getOperandLatency(const InstrItineraryData *ItinData,
-                                SDNode *DefNode, unsigned DefIdx,
-                                SDNode *UseNode, unsigned UseIdx) const;
-
-  virtual int getInstrLatency(const InstrItineraryData *ItinData,
-                              SDNode *Node) const;
-
-  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
-                                  const MachineInstr *MI) const;
-
-  virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
-                                   const MachineInstr *MI,
-                                   unsigned *PredCost = 0) const;
-
-  virtual
-  bool hasLowDefLatency(const InstrItineraryData *ItinData,
-                        const MachineInstr *DefMI, unsigned DefIdx) const;
-
-  virtual int getOperandLatency(const InstrItineraryData *ItinData,
-                                const MachineInstr *DefMI, unsigned DefIdx,
-                                const MachineInstr *UseMI,
-                                unsigned UseIdx) const;
-
-  bool usePreRAHazardRecognizer() const;
-
-  virtual ScheduleHazardRecognizer *
-  CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const;
-
-  virtual ScheduleHazardRecognizer *
-  CreateTargetMIHazardRecognizer(const InstrItineraryData*,
-                                 const ScheduleDAG*) const;
-
-  virtual ScheduleHazardRecognizer *
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
-                                     const ScheduleDAG*) const;
-};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h
index 044afd9b7392..f9bd0fb9f981 100644
--- a/include/llvm/Target/TargetJITInfo.h
+++ b/include/llvm/Target/TargetJITInfo.h
@@ -17,8 +17,8 @@
 #ifndef LLVM_TARGET_TARGETJITINFO_H
 #define LLVM_TARGET_TARGETJITINFO_H
 
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index a2c97d782e29..5f01c8d8351a 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -10,14 +10,18 @@
 #ifndef LLVM_TARGET_TARGETLIBRARYINFO_H
 #define LLVM_TARGET_TARGETLIBRARYINFO_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
   class Triple;
 
   namespace LibFunc {
     enum Func {
+      /// int _IO_getc(_IO_FILE * __fp);
+      under_IO_getc,
+      /// int _IO_putc(int __c, _IO_FILE * __fp);
+      under_IO_putc,
       /// void operator delete[](void*);
       ZdaPv,
       /// void operator delete(void*);
@@ -47,8 +51,22 @@ namespace llvm {
       cxa_guard_acquire,
       /// void __cxa_guard_release(guard_t *guard);
       cxa_guard_release,
+      /// int __isoc99_scanf (const char *format, ...)
+      dunder_isoc99_scanf,
+      /// int __isoc99_sscanf(const char *s, const char *format, ...)
+      dunder_isoc99_sscanf,
       /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
       memcpy_chk,
+      /// char * __strdup(const char *s);
+      dunder_strdup,
+      /// char *__strndup(const char *s, size_t n);
+      dunder_strndup,
+      /// char * __strtok_r(char *s, const char *delim, char **save_ptr);
+      dunder_strtok_r,
+      /// int abs(int j);
+      abs,
+      /// int access(const char *path, int amode);
+      access,
       /// double acos(double x);
       acos,
       /// float acosf(float x);
@@ -91,6 +109,20 @@ namespace llvm {
       atanhl,
       /// long double atanl(long double x);
       atanl,
+      /// double atof(const char *str);
+      atof,
+      /// int atoi(const char *str);
+      atoi,
+      /// long atol(const char *str);
+      atol,
+      /// long long atoll(const char *nptr);
+      atoll,
+      /// int bcmp(const void *s1, const void *s2, size_t n);
+      bcmp,
+      /// void bcopy(const void *s1, void *s2, size_t n);
+      bcopy,
+      /// void bzero(void *s, size_t n);
+      bzero,
       /// void *calloc(size_t count, size_t size);
       calloc,
       /// double cbrt(double x);
@@ -105,6 +137,14 @@ namespace llvm {
       ceilf,
       /// long double ceill(long double x);
       ceill,
+      /// int chmod(const char *path, mode_t mode);
+      chmod,
+      /// int chown(const char *path, uid_t owner, gid_t group);
+      chown,
+      /// void clearerr(FILE *stream);
+      clearerr,
+      /// int closedir(DIR *dirp);
+      closedir,
       /// double copysign(double x, double y);
       copysign,
       /// float copysignf(float x, float y);
@@ -123,6 +163,8 @@ namespace llvm {
       coshl,
       /// long double cosl(long double x);
       cosl,
+      /// char *ctermid(char *s);
+      ctermid,
       /// double exp(double x);
       exp,
       /// double exp10(double x);
@@ -153,8 +195,34 @@ namespace llvm {
       fabsf,
       /// long double fabsl(long double x);
       fabsl,
+      /// int fclose(FILE *stream);
+      fclose,
+      /// FILE *fdopen(int fildes, const char *mode);
+      fdopen,
+      /// int feof(FILE *stream);
+      feof,
+      /// int ferror(FILE *stream);
+      ferror,
+      /// int fflush(FILE *stream);
+      fflush,
+      /// int ffs(int i);
+      ffs,
+      /// int ffsl(long int i);
+      ffsl,
+      /// int ffsll(long long int i);
+      ffsll,
+      /// int fgetc(FILE *stream);
+      fgetc,
+      /// int fgetpos(FILE *stream, fpos_t *pos);
+      fgetpos,
+      /// char *fgets(char *s, int n, FILE *stream);
+      fgets,
+      /// int fileno(FILE *stream);
+      fileno,
       /// int fiprintf(FILE *stream, const char *format, ...);
       fiprintf,
+      /// void flockfile(FILE *file);
+      flockfile,
       /// double floor(double x);
       floor,
       /// float floorf(float x);
@@ -167,17 +235,89 @@ namespace llvm {
       fmodf,
       /// long double fmodl(long double x, long double y);
       fmodl,
+      /// FILE *fopen(const char *filename, const char *mode);
+      fopen,
+      /// FILE *fopen64(const char *filename, const char *opentype)
+      fopen64,
+      /// int fprintf(FILE *stream, const char *format, ...);
+      fprintf,
       /// int fputc(int c, FILE *stream);
       fputc,
       /// int fputs(const char *s, FILE *stream);
       fputs,
+      /// size_t fread(void *ptr, size_t size, size_t nitems, FILE *stream);
+      fread,
       /// void free(void *ptr);
       free,
+      /// double frexp(double num, int *exp);
+      frexp,
+      /// float frexpf(float num, int *exp);
+      frexpf,
+      /// long double frexpl(long double num, int *exp);
+      frexpl,
+      /// int fscanf(FILE *stream, const char *format, ... );
+      fscanf,
+      /// int fseek(FILE *stream, long offset, int whence);
+      fseek,
+      /// int fseeko(FILE *stream, off_t offset, int whence);
+      fseeko,
+      /// int fseeko64(FILE *stream, off64_t offset, int whence)
+      fseeko64,
+      /// int fsetpos(FILE *stream, const fpos_t *pos);
+      fsetpos,
+      /// int fstat(int fildes, struct stat *buf);
+      fstat,
+      /// int fstat64(int filedes, struct stat64 *buf)
+      fstat64,
+      /// int fstatvfs(int fildes, struct statvfs *buf);
+      fstatvfs,
+      /// int fstatvfs64(int fildes, struct statvfs64 *buf);
+      fstatvfs64,
+      /// long ftell(FILE *stream);
+      ftell,
+      /// off_t ftello(FILE *stream);
+      ftello,
+      /// off64_t ftello64(FILE *stream)
+      ftello64,
+      /// int ftrylockfile(FILE *file);
+      ftrylockfile,
+      /// void funlockfile(FILE *file);
+      funlockfile,
       /// size_t fwrite(const void *ptr, size_t size, size_t nitems,
       /// FILE *stream);
       fwrite,
+      /// int getc(FILE *stream);
+      getc,
+      /// int getc_unlocked(FILE *stream);
+      getc_unlocked,
+      /// int getchar(void);
+      getchar,
+      /// char *getenv(const char *name);
+      getenv,
+      /// int getitimer(int which, struct itimerval *value);
+      getitimer,
+      /// int getlogin_r(char *name, size_t namesize);
+      getlogin_r,
+      /// struct passwd *getpwnam(const char *name);
+      getpwnam,
+      /// char *gets(char *s);
+      gets,
+      /// uint32_t htonl(uint32_t hostlong);
+      htonl,
+      /// uint16_t htons(uint16_t hostshort);
+      htons,
       /// int iprintf(const char *format, ...);
       iprintf,
+      /// int isascii(int c);
+      isascii,
+      /// int isdigit(int c);
+      isdigit,
+      /// long int labs(long int j);
+      labs,
+      /// int lchown(const char *path, uid_t owner, gid_t group);
+      lchown,
+      /// long long int llabs(long long int j);
+      llabs,
       /// double log(double x);
       log,
       /// double log10(double x);
@@ -208,8 +348,16 @@ namespace llvm {
       logf,
       /// long double logl(long double x);
       logl,
+      /// int lstat(const char *path, struct stat *buf);
+      lstat,
+      /// int lstat64(const char *path, struct stat64 *buf);
+      lstat64,
       /// void *malloc(size_t size);
       malloc,
+      /// void *memalign(size_t boundary, size_t size);
+      memalign,
+      /// void *memccpy(void *s1, const void *s2, int c, size_t n);
+      memccpy,
       /// void *memchr(const void *s, int c, size_t n);
       memchr,
       /// int memcmp(const void *s1, const void *s2, size_t n);
@@ -218,16 +366,44 @@ namespace llvm {
       memcpy,
       /// void *memmove(void *s1, const void *s2, size_t n);
       memmove,
+      // void *memrchr(const void *s, int c, size_t n);
+      memrchr,
       /// void *memset(void *b, int c, size_t len);
       memset,
       /// void memset_pattern16(void *b, const void *pattern16, size_t len);
       memset_pattern16,
+      /// int mkdir(const char *path, mode_t mode);
+      mkdir,
+      /// time_t mktime(struct tm *timeptr);
+      mktime,
+      /// double modf(double x, double *iptr);
+      modf,
+      /// float modff(float, float *iptr);
+      modff,
+      /// long double modfl(long double value, long double *iptr);
+      modfl,
       /// double nearbyint(double x);
       nearbyint,
       /// float nearbyintf(float x);
       nearbyintf,
       /// long double nearbyintl(long double x);
       nearbyintl,
+      /// uint32_t ntohl(uint32_t netlong);
+      ntohl,
+      /// uint16_t ntohs(uint16_t netshort);
+      ntohs,
+      /// int open(const char *path, int oflag, ... );
+      open,
+      /// int open64(const char *filename, int flags[, mode_t mode])
+      open64,
+      /// DIR *opendir(const char *dirname);
+      opendir,
+      /// int pclose(FILE *stream);
+      pclose,
+      /// void perror(const char *s);
+      perror,
+      /// FILE *popen(const char *command, const char *mode);
+      popen,
       /// int posix_memalign(void **memptr, size_t alignment, size_t size);
       posix_memalign,
       /// double pow(double x, double y);
@@ -236,26 +412,61 @@ namespace llvm {
       powf,
       /// long double powl(long double x, long double y);
       powl,
+      /// ssize_t pread(int fildes, void *buf, size_t nbyte, off_t offset);
+      pread,
+      /// int printf(const char *format, ...);
+      printf,
+      /// int putc(int c, FILE *stream);
+      putc,
       /// int putchar(int c);
       putchar,
       /// int puts(const char *s);
       puts,
+      /// ssize_t pwrite(int fildes, const void *buf, size_t nbyte,
+      ///                off_t offset);
+      pwrite,
+      /// void qsort(void *base, size_t nel, size_t width,
+      ///            int (*compar)(const void *, const void *));
+      qsort,
+      /// ssize_t read(int fildes, void *buf, size_t nbyte);
+      read,
+      /// ssize_t readlink(const char *path, char *buf, size_t bufsize);
+      readlink,
       /// void *realloc(void *ptr, size_t size);
       realloc,
       /// void *reallocf(void *ptr, size_t size);
       reallocf,
+      /// char *realpath(const char *file_name, char *resolved_name);
+      realpath,
+      /// int remove(const char *path);
+      remove,
+      /// int rename(const char *old, const char *new);
+      rename,
+      /// void rewind(FILE *stream);
+      rewind,
       /// double rint(double x);
       rint,
       /// float rintf(float x);
       rintf,
       /// long double rintl(long double x);
       rintl,
+      /// int rmdir(const char *path);
+      rmdir,
       /// double round(double x);
       round,
       /// float roundf(float x);
       roundf,
       /// long double roundl(long double x);
       roundl,
+      /// int scanf(const char *restrict format, ... );
+      scanf,
+      /// void setbuf(FILE *stream, char *buf);
+      setbuf,
+      /// int setitimer(int which, const struct itimerval *value,
+      ///               struct itimerval *ovalue);
+      setitimer,
+      /// int setvbuf(FILE *stream, char *buf, int type, size_t size);
+      setvbuf,
       /// double sin(double x);
       sin,
       /// float sinf(float x);
@@ -270,20 +481,40 @@ namespace llvm {
       sinl,
       /// int siprintf(char *str, const char *format, ...);
       siprintf,
+      /// int snprintf(char *s, size_t n, const char *format, ...);
+      snprintf,
+      /// int sprintf(char *str, const char *format, ...);
+      sprintf,
       /// double sqrt(double x);
       sqrt,
       /// float sqrtf(float x);
       sqrtf,
       /// long double sqrtl(long double x);
       sqrtl,
+      /// int sscanf(const char *s, const char *format, ... );
+      sscanf,
+      /// int stat(const char *path, struct stat *buf);
+      stat,
+      /// int stat64(const char *path, struct stat64 *buf);
+      stat64,
+      /// int statvfs(const char *path, struct statvfs *buf);
+      statvfs,
+      /// int statvfs64(const char *path, struct statvfs64 *buf)
+      statvfs64,
       /// char *stpcpy(char *s1, const char *s2);
       stpcpy,
+      /// char *stpncpy(char *s1, const char *s2, size_t n);
+      stpncpy,
+      /// int strcasecmp(const char *s1, const char *s2);
+      strcasecmp,
       /// char *strcat(char *s1, const char *s2);
       strcat,
       /// char *strchr(const char *s, int c);
       strchr,
       /// int strcmp(const char *s1, const char *s2);
       strcmp,
+      /// int strcoll(const char *s1, const char *s2);
+      strcoll,
       /// char *strcpy(char *s1, const char *s2);
       strcpy,
       /// size_t strcspn(const char *s1, const char *s2);
@@ -292,6 +523,8 @@ namespace llvm {
       strdup,
       /// size_t strlen(const char *s);
       strlen,
+      /// int strncasecmp(const char *s1, const char *s2, size_t n);
+      strncasecmp,
       /// char *strncat(char *s1, const char *s2, size_t n);
       strncat,
       /// int strncmp(const char *s1, const char *s2, size_t n);
@@ -314,6 +547,10 @@ namespace llvm {
       strtod,
       /// float strtof(const char *nptr, char **endptr);
       strtof,
+      // char *strtok(char *s1, const char *s2);
+      strtok,
+      // char *strtok_r(char *s, const char *sep, char **lasts);
+      strtok_r,
       /// long int strtol(const char *nptr, char **endptr, int base);
       strtol,
       /// long double strtold(const char *nptr, char **endptr);
@@ -325,6 +562,10 @@ namespace llvm {
       /// unsigned long long int strtoull(const char *nptr, char **endptr,
       ///                                 int base);
       strtoull,
+      /// size_t strxfrm(char *s1, const char *s2, size_t n);
+      strxfrm,
+      /// int system(const char *command);
+      system,
       /// double tan(double x);
       tan,
       /// float tanf(float x);
@@ -337,14 +578,50 @@ namespace llvm {
       tanhl,
       /// long double tanl(long double x);
       tanl,
+      /// clock_t times(struct tms *buffer);
+      times,
+      /// FILE *tmpfile(void);
+      tmpfile,
+      /// FILE *tmpfile64(void)
+      tmpfile64,
+      /// int toascii(int c);
+      toascii,
       /// double trunc(double x);
       trunc,
       /// float truncf(float x);
       truncf,
       /// long double truncl(long double x);
       truncl,
+      /// int uname(struct utsname *name);
+      uname,
+      /// int ungetc(int c, FILE *stream);
+      ungetc,
+      /// int unlink(const char *path);
+      unlink,
+      /// int unsetenv(const char *name);
+      unsetenv,
+      /// int utime(const char *path, const struct utimbuf *times);
+      utime,
+      /// int utimes(const char *path, const struct timeval times[2]);
+      utimes,
       /// void *valloc(size_t size);
       valloc,
+      /// int vfprintf(FILE *stream, const char *format, va_list ap);
+      vfprintf,
+      /// int vfscanf(FILE *stream, const char *format, va_list arg);
+      vfscanf,
+      /// int vprintf(const char *restrict format, va_list ap);
+      vprintf,
+      /// int vscanf(const char *format, va_list arg);
+      vscanf,
+      /// int vsnprintf(char *s, size_t n, const char *format, va_list ap);
+      vsnprintf,
+      /// int vsprintf(char *s, const char *format, va_list ap);
+      vsprintf,
+      /// int vsscanf(const char *s, const char *format, va_list arg);
+      vsscanf,
+      /// ssize_t write(int fildes, const void *buf, size_t nbyte);
+      write,
 
       NumLibFuncs
     };
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 580a30fcd2d8..1786bd28f392 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -22,14 +22,14 @@
 #ifndef LLVM_TARGET_TARGETLOWERING_H
 #define LLVM_TARGET_TARGETLOWERING_H
 
-#include "llvm/AddressingMode.h"
-#include "llvm/CallingConv.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Attributes.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Target/TargetCallingConv.h"
 #include "llvm/Target/TargetMachine.h"
@@ -68,18 +68,12 @@ namespace llvm {
     };
   }
 
+/// TargetLoweringBase - This base class for TargetLowering contains the
+/// SelectionDAG-independent parts that can be used from the rest of CodeGen.
+class TargetLoweringBase {
+  TargetLoweringBase(const TargetLoweringBase&) LLVM_DELETED_FUNCTION;
+  void operator=(const TargetLoweringBase&) LLVM_DELETED_FUNCTION;
 
-//===----------------------------------------------------------------------===//
-/// TargetLowering - This class defines information used to lower LLVM code to
-/// legal SelectionDAG operators that the target instruction selector can accept
-/// natively.
-///
-/// This class also defines callbacks that targets must implement to lower
-/// target-specific constructs to SelectionDAG operators.
-///
-class TargetLowering {
-  TargetLowering(const TargetLowering&) LLVM_DELETED_FUNCTION;
-  void operator=(const TargetLowering&) LLVM_DELETED_FUNCTION;
 public:
   /// LegalizeAction - This enum indicates whether operations are valid for a
   /// target, and if not, what action should be used to make them valid.
@@ -137,9 +131,9 @@ public:
   }
 
   /// NOTE: The constructor takes ownership of TLOF.
-  explicit TargetLowering(const TargetMachine &TM,
-                          const TargetLoweringObjectFile *TLOF);
-  virtual ~TargetLowering();
+  explicit TargetLoweringBase(const TargetMachine &TM,
+                              const TargetLoweringObjectFile *TLOF);
+  virtual ~TargetLoweringBase();
 
   const TargetMachine &getTargetMachine() const { return TM; }
   const DataLayout *getDataLayout() const { return TD; }
@@ -151,7 +145,9 @@ public:
   // the pointer type from the data layout.
   // FIXME: The default needs to be removed once all the code is updated.
   virtual MVT getPointerTy(uint32_t AS = 0) const { return PointerTy; }
-  virtual MVT getShiftAmountTy(EVT LHSTy) const;
+  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
+
+  EVT getShiftAmountTy(EVT LHSTy) const;
 
   /// isSelectExpensive - Return true if the select operation is expensive for
   /// this target.
@@ -159,6 +155,11 @@ public:
 
   virtual bool isSelectSupported(SelectSupportKind kind) const { return true; }
 
+  /// shouldSplitVectorElementType - Return true if a vector of the given type
+  /// should be split (TypeSplitVector) instead of promoted
+  /// (TypePromoteInteger) during type legalization.
+  virtual bool shouldSplitVectorElementType(EVT VT) const { return false; }
+
   /// isIntDivCheap() - Return true if integer divide is usually cheaper than
   /// a sequence of several shifts, adds, and multiplies for this target.
   bool isIntDivCheap() const { return IntDivIsCheap; }
@@ -184,7 +185,7 @@ public:
   /// isPredictableSelectExpensive - Return true if selects are only cheaper
   /// than branches if the branch is unlikely to be predicted right.
   bool isPredictableSelectExpensive() const {
-    return predictableSelectIsExpensive;
+    return PredictableSelectIsExpensive;
   }
 
   /// getSetCCResultType - Return the ValueType of the result of SETCC
@@ -227,9 +228,8 @@ public:
 
   /// getRegClassFor - Return the register class that should be used for the
   /// specified value type.
-  virtual const TargetRegisterClass *getRegClassFor(EVT VT) const {
-    assert(VT.isSimple() && "getRegClassFor called on illegal type!");
-    const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+  virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
+    const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
@@ -239,17 +239,15 @@ public:
   /// legal super-reg register class for the register class of the value type.
   /// For example, on i386 the rep register class for i8, i16, and i32 are GR32;
   /// while the rep register class is GR64 on x86_64.
-  virtual const TargetRegisterClass *getRepRegClassFor(EVT VT) const {
-    assert(VT.isSimple() && "getRepRegClassFor called on illegal type!");
-    const TargetRegisterClass *RC = RepRegClassForVT[VT.getSimpleVT().SimpleTy];
+  virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+    const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
     return RC;
   }
 
   /// getRepRegClassCostFor - Return the cost of the 'representative' register
   /// class for the specified value type.
-  virtual uint8_t getRepRegClassCostFor(EVT VT) const {
-    assert(VT.isSimple() && "getRepRegClassCostFor called on illegal type!");
-    return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy];
+  virtual uint8_t getRepRegClassCostFor(MVT VT) const {
+    return RepRegClassCostForVT[VT.SimpleTy];
   }
 
   /// isTypeLegal - Return true if the target has native support for the
@@ -275,8 +273,8 @@ public:
       return (LegalizeTypeAction)ValueTypeActions[VT.SimpleTy];
     }
 
-    void setTypeAction(EVT VT, LegalizeTypeAction Action) {
-      unsigned I = VT.getSimpleVT().SimpleTy;
+    void setTypeAction(MVT VT, LegalizeTypeAction Action) {
+      unsigned I = VT.SimpleTy;
       ValueTypeActions[I] = Action;
     }
   };
@@ -337,7 +335,7 @@ public:
   unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
                                   EVT &IntermediateVT,
                                   unsigned &NumIntermediates,
-                                  EVT &RegisterVT) const;
+                                  MVT &RegisterVT) const;
 
   /// getTgtMemIntrinsic: Given an intrinsic, checks if on the target the
   /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If
@@ -411,6 +409,15 @@ public:
        getOperationAction(Op, VT) == Custom);
   }
 
+  /// isOperationLegalOrPromote - Return true if the specified operation is
+  /// legal on this target or can be made legal using promotion. This
+  /// is used to help guide high-level lowering decisions.
+  bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+      (getOperationAction(Op, VT) == Legal ||
+       getOperationAction(Op, VT) == Promote);
+  }
+
   /// isOperationExpand - Return true if the specified operation is illegal on
   /// this target or unlikely to be made legal with custom lowering. This is
   /// used to help guide high-level lowering decisions.
@@ -429,36 +436,35 @@ public:
   /// either it is legal, needs to be promoted to a larger size, needs to be
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
-  LegalizeAction getLoadExtAction(unsigned ExtType, EVT VT) const {
-    assert(ExtType < ISD::LAST_LOADEXT_TYPE &&
-           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+  LegalizeAction getLoadExtAction(unsigned ExtType, MVT VT) const {
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE && VT < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)LoadExtActions[VT.getSimpleVT().SimpleTy][ExtType];
+    return (LegalizeAction)LoadExtActions[VT.SimpleTy][ExtType];
   }
 
   /// isLoadExtLegal - Return true if the specified load with extension is legal
   /// on this target.
   bool isLoadExtLegal(unsigned ExtType, EVT VT) const {
-    return VT.isSimple() && getLoadExtAction(ExtType, VT) == Legal;
+    return VT.isSimple() &&
+      getLoadExtAction(ExtType, VT.getSimpleVT()) == Legal;
   }
 
   /// getTruncStoreAction - Return how this store with truncation should be
   /// treated: either it is legal, needs to be promoted to a larger size, needs
   /// to be expanded to some other code sequence, or the target has a custom
   /// expander for it.
-  LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
-    assert(ValVT.getSimpleVT() < MVT::LAST_VALUETYPE &&
-           MemVT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+  LegalizeAction getTruncStoreAction(MVT ValVT, MVT MemVT) const {
+    assert(ValVT < MVT::LAST_VALUETYPE && MemVT < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)TruncStoreActions[ValVT.getSimpleVT().SimpleTy]
-                                            [MemVT.getSimpleVT().SimpleTy];
+    return (LegalizeAction)TruncStoreActions[ValVT.SimpleTy]
+                                            [MemVT.SimpleTy];
   }
 
   /// isTruncStoreLegal - Return true if the specified store with truncation is
   /// legal on this target.
   bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
     return isTypeLegal(ValVT) && MemVT.isSimple() &&
-           getTruncStoreAction(ValVT, MemVT) == Legal;
+      getTruncStoreAction(ValVT.getSimpleVT(), MemVT.getSimpleVT()) == Legal;
   }
 
   /// getIndexedLoadAction - Return how the indexed load should be treated:
@@ -466,11 +472,10 @@ public:
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
   LegalizeAction
-  getIndexedLoadAction(unsigned IdxMode, EVT VT) const {
-    assert(IdxMode < ISD::LAST_INDEXED_MODE &&
-           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+  getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
+    assert(IdxMode < ISD::LAST_INDEXED_MODE && VT < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
+    unsigned Ty = (unsigned)VT.SimpleTy;
     return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
   }
 
@@ -478,8 +483,8 @@ public:
   /// on this target.
   bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
     return VT.isSimple() &&
-      (getIndexedLoadAction(IdxMode, VT) == Legal ||
-       getIndexedLoadAction(IdxMode, VT) == Custom);
+      (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
+       getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
   }
 
   /// getIndexedStoreAction - Return how the indexed store should be treated:
@@ -487,11 +492,10 @@ public:
   /// expanded to some other code sequence, or the target has a custom expander
   /// for it.
   LegalizeAction
-  getIndexedStoreAction(unsigned IdxMode, EVT VT) const {
-    assert(IdxMode < ISD::LAST_INDEXED_MODE &&
-           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+  getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
+    assert(IdxMode < ISD::LAST_INDEXED_MODE && VT < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
+    unsigned Ty = (unsigned)VT.SimpleTy;
     return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
   }
 
@@ -499,54 +503,54 @@ public:
   /// on this target.
   bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
     return VT.isSimple() &&
-      (getIndexedStoreAction(IdxMode, VT) == Legal ||
-       getIndexedStoreAction(IdxMode, VT) == Custom);
+      (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
+       getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
   }
 
   /// getCondCodeAction - Return how the condition code should be treated:
   /// either it is legal, needs to be expanded to some other code sequence,
   /// or the target has a custom expander for it.
   LegalizeAction
-  getCondCodeAction(ISD::CondCode CC, EVT VT) const {
+  getCondCodeAction(ISD::CondCode CC, MVT VT) const {
     assert((unsigned)CC < array_lengthof(CondCodeActions) &&
-           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(CondCodeActions[0])*4 &&
+           (unsigned)VT.SimpleTy < sizeof(CondCodeActions[0])*4 &&
            "Table isn't big enough!");
     /// The lower 5 bits of the SimpleTy index into Nth 2bit set from the 64bit
     /// value and the upper 27 bits index into the second dimension of the
     /// array to select what 64bit value to use.
     LegalizeAction Action = (LegalizeAction)
-      ((CondCodeActions[CC][VT.getSimpleVT().SimpleTy >> 5]
-        >> (2*(VT.getSimpleVT().SimpleTy & 0x1F))) & 3);
+      ((CondCodeActions[CC][VT.SimpleTy >> 5] >> (2*(VT.SimpleTy & 0x1F))) & 3);
     assert(Action != Promote && "Can't promote condition code!");
     return Action;
   }
 
   /// isCondCodeLegal - Return true if the specified condition code is legal
   /// on this target.
-  bool isCondCodeLegal(ISD::CondCode CC, EVT VT) const {
-    return getCondCodeAction(CC, VT) == Legal ||
-           getCondCodeAction(CC, VT) == Custom;
+  bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
+    return
+      getCondCodeAction(CC, VT) == Legal ||
+      getCondCodeAction(CC, VT) == Custom;
   }
 
 
   /// getTypeToPromoteTo - If the action for this operation is to promote, this
   /// method returns the ValueType to promote to.
-  EVT getTypeToPromoteTo(unsigned Op, EVT VT) const {
+  MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
     assert(getOperationAction(Op, VT) == Promote &&
            "This operation isn't promoted!");
 
     // See if this has an explicit type specified.
     std::map<std::pair<unsigned, MVT::SimpleValueType>,
              MVT::SimpleValueType>::const_iterator PTTI =
-      PromoteToType.find(std::make_pair(Op, VT.getSimpleVT().SimpleTy));
+      PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
     if (PTTI != PromoteToType.end()) return PTTI->second;
 
     assert((VT.isInteger() || VT.isFloatingPoint()) &&
            "Cannot autopromote this type, add it with AddPromotedToType.");
 
-    EVT NVT = VT;
+    MVT NVT = VT;
     do {
-      NVT = (MVT::SimpleValueType)(NVT.getSimpleVT().SimpleTy+1);
+      NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
       assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&
              "Didn't find type to promote to!");
     } while (!isTypeLegal(NVT) ||
@@ -573,7 +577,11 @@ public:
     }
     return EVT::getEVT(Ty, AllowUnknown);
   }
-  
+
+  /// Return the MVT corresponding to this LLVM type. See getValueType.
+  MVT getSimpleValueType(Type *Ty, bool AllowUnknown = false) const {
+    return getValueType(Ty, AllowUnknown).getSimpleVT();
+  }
 
   /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
   /// function arguments in the caller parameter area.  This is the actual
@@ -582,21 +590,22 @@ public:
 
   /// getRegisterType - Return the type of registers that this ValueType will
   /// eventually require.
-  EVT getRegisterType(MVT VT) const {
+  MVT getRegisterType(MVT VT) const {
     assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT));
     return RegisterTypeForVT[VT.SimpleTy];
   }
 
   /// getRegisterType - Return the type of registers that this ValueType will
   /// eventually require.
-  EVT getRegisterType(LLVMContext &Context, EVT VT) const {
+  MVT getRegisterType(LLVMContext &Context, EVT VT) const {
     if (VT.isSimple()) {
       assert((unsigned)VT.getSimpleVT().SimpleTy <
                 array_lengthof(RegisterTypeForVT));
       return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
-      EVT VT1, RegisterVT;
+      EVT VT1;
+      MVT RegisterVT;
       unsigned NumIntermediates;
       (void)getVectorTypeBreakdown(Context, VT, VT1,
                                    NumIntermediates, RegisterVT);
@@ -621,7 +630,8 @@ public:
       return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
-      EVT VT1, VT2;
+      EVT VT1;
+      MVT VT2;
       unsigned NumIntermediates;
       return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
     }
@@ -651,7 +661,7 @@ public:
   /// return the limit for functions that have OptSize attribute.
   /// @brief Get maximum # of store operations permitted for llvm.memset
   unsigned getMaxStoresPerMemset(bool OptSize) const {
-    return OptSize ? maxStoresPerMemsetOptSize : maxStoresPerMemset;
+    return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
   }
 
   /// This function returns the maximum number of store operations permitted
@@ -660,7 +670,7 @@ public:
   /// return the limit for functions that have OptSize attribute.
   /// @brief Get maximum # of store operations permitted for llvm.memcpy
   unsigned getMaxStoresPerMemcpy(bool OptSize) const {
-    return OptSize ? maxStoresPerMemcpyOptSize : maxStoresPerMemcpy;
+    return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
   }
 
   /// This function returns the maximum number of store operations permitted
@@ -669,46 +679,51 @@ public:
   /// return the limit for functions that have OptSize attribute.
   /// @brief Get maximum # of store operations permitted for llvm.memmove
   unsigned getMaxStoresPerMemmove(bool OptSize) const {
-    return OptSize ? maxStoresPerMemmoveOptSize : maxStoresPerMemmove;
+    return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
   }
 
   /// This function returns true if the target allows unaligned memory accesses.
-  /// of the specified type. This is used, for example, in situations where an
-  /// array copy/move/set is  converted to a sequence of store operations. It's
-  /// use helps to ensure that such replacements don't generate code that causes
-  /// an alignment error  (trap) on the target machine.
+  /// of the specified type. If true, it also returns whether the unaligned
+  /// memory access is "fast" in the second argument by reference. This is used,
+  /// for example, in situations where an array copy/move/set is  converted to a
+  /// sequence of store operations. It's use helps to ensure that such
+  /// replacements don't generate code that causes an alignment error  (trap) on
+  /// the target machine.
   /// @brief Determine if the target supports unaligned memory accesses.
-  virtual bool allowsUnalignedMemoryAccesses(EVT) const {
+  virtual bool allowsUnalignedMemoryAccesses(EVT, bool *Fast = 0) const {
     return false;
   }
 
-  /// This function returns true if the target would benefit from code placement
-  /// optimization.
-  /// @brief Determine if the target should perform code placement optimization.
-  bool shouldOptimizeCodePlacement() const {
-    return benefitFromCodePlacementOpt;
-  }
-
   /// getOptimalMemOpType - Returns the target specific optimal type for load
   /// and store operations as a result of memset, memcpy, and memmove
   /// lowering. If DstAlign is zero that means it's safe to destination
   /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
   /// means there isn't a need to check it against alignment requirement,
-  /// probably because the source does not need to be loaded. If
-  /// 'IsZeroVal' is true, that means it's safe to return a
-  /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-  /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-  /// constant so it does not need to be loaded.
+  /// probably because the source does not need to be loaded. If 'IsMemset' is
+  /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+  /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+  /// source is constant so it does not need to be loaded.
   /// It returns EVT::Other if the type should be determined using generic
   /// target-independent logic.
   virtual EVT getOptimalMemOpType(uint64_t /*Size*/,
                                   unsigned /*DstAlign*/, unsigned /*SrcAlign*/,
-                                  bool /*IsZeroVal*/,
+                                  bool /*IsMemset*/,
+                                  bool /*ZeroMemset*/,
                                   bool /*MemcpyStrSrc*/,
                                   MachineFunction &/*MF*/) const {
     return MVT::Other;
   }
 
+  /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+  /// specified type to expand memcpy / memset inline. This is mostly true
+  /// for all types except for some special cases. For example, on X86
+  /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+  /// also does type conversion. Note the specified type doesn't have to be
+  /// legal as the hook is used before type legalization.
+  virtual bool isSafeMemOpType(MVT VT) const {
+    return true;
+  }
+
   /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp
   /// to implement llvm.setjmp.
   bool usesUnderscoreSetJmp() const {
@@ -804,55 +819,6 @@ public:
     return InsertFencesForAtomic;
   }
 
-  /// getPreIndexedAddressParts - returns true by value, base pointer and
-  /// offset pointer and addressing mode by reference if the node's address
-  /// can be legally represented as pre-indexed load / store address.
-  virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
-                                         SDValue &/*Offset*/,
-                                         ISD::MemIndexedMode &/*AM*/,
-                                         SelectionDAG &/*DAG*/) const {
-    return false;
-  }
-
-  /// getPostIndexedAddressParts - returns true by value, base pointer and
-  /// offset pointer and addressing mode by reference if this node can be
-  /// combined with a load / store to form a post-indexed load / store.
-  virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
-                                          SDValue &/*Base*/, SDValue &/*Offset*/,
-                                          ISD::MemIndexedMode &/*AM*/,
-                                          SelectionDAG &/*DAG*/) const {
-    return false;
-  }
-
-  /// getJumpTableEncoding - Return the entry encoding for a jump table in the
-  /// current function.  The returned value is a member of the
-  /// MachineJumpTableInfo::JTEntryKind enum.
-  virtual unsigned getJumpTableEncoding() const;
-
-  virtual const MCExpr *
-  LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
-                            const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
-                            MCContext &/*Ctx*/) const {
-    llvm_unreachable("Need to implement this hook if target has custom JTIs");
-  }
-
-  /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
-  /// jumptable.
-  virtual SDValue getPICJumpTableRelocBase(SDValue Table,
-                                           SelectionDAG &DAG) const;
-
-  /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
-  /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
-  /// MCExpr.
-  virtual const MCExpr *
-  getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
-                               unsigned JTI, MCContext &Ctx) const;
-
-  /// isOffsetFoldingLegal - Return true if folding a constant offset
-  /// with the given GlobalAddress is legal.  It is frequently not legal in
-  /// PIC relocation models.
-  virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
   /// getStackCookieLocation - Return true if the target stores stack
   /// protector cookies at a fixed offset in some non-standard address
   /// space, and populates the address space and offset as
@@ -869,148 +835,16 @@ public:
   }
 
   //===--------------------------------------------------------------------===//
-  // TargetLowering Optimization Methods
-  //
-
-  /// TargetLoweringOpt - A convenience struct that encapsulates a DAG, and two
-  /// SDValues for returning information from TargetLowering to its clients
-  /// that want to combine
-  struct TargetLoweringOpt {
-    SelectionDAG &DAG;
-    bool LegalTys;
-    bool LegalOps;
-    SDValue Old;
-    SDValue New;
-
-    explicit TargetLoweringOpt(SelectionDAG &InDAG,
-                               bool LT, bool LO) :
-      DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
-
-    bool LegalTypes() const { return LegalTys; }
-    bool LegalOperations() const { return LegalOps; }
-
-    bool CombineTo(SDValue O, SDValue N) {
-      Old = O;
-      New = N;
-      return true;
-    }
-
-    /// ShrinkDemandedConstant - Check to see if the specified operand of the
-    /// specified instruction is a constant integer.  If so, check to see if
-    /// there are any bits set in the constant that are not demanded.  If so,
-    /// shrink the constant and return true.
-    bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded);
-
-    /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
-    /// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
-    /// cast, but it could be generalized for targets with other types of
-    /// implicit widening casts.
-    bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
-                          DebugLoc dl);
-  };
-
-  /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
-  /// DemandedMask bits of the result of Op are ever used downstream.  If we can
-  /// use this information to simplify Op, create a new simplified DAG node and
-  /// return true, returning the original and new nodes in Old and New.
-  /// Otherwise, analyze the expression and return a mask of KnownOne and
-  /// KnownZero bits for the expression (used to simplify the caller).
-  /// The KnownZero/One bits may only be accurate for those bits in the
-  /// DemandedMask.
-  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
-                            APInt &KnownZero, APInt &KnownOne,
-                            TargetLoweringOpt &TLO, unsigned Depth = 0) const;
-
-  /// computeMaskedBitsForTargetNode - Determine which of the bits specified in
-  /// Mask are known to be either zero or one and return them in the
-  /// KnownZero/KnownOne bitsets.
-  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                              APInt &KnownZero,
-                                              APInt &KnownOne,
-                                              const SelectionDAG &DAG,
-                                              unsigned Depth = 0) const;
-
-  /// ComputeNumSignBitsForTargetNode - This method can be implemented by
-  /// targets that want to expose additional information about sign bits to the
-  /// DAG Combiner.
-  virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
-                                                   unsigned Depth = 0) const;
-
-  struct DAGCombinerInfo {
-    void *DC;  // The DAG Combiner object.
-    bool BeforeLegalize;
-    bool BeforeLegalizeOps;
-    bool CalledByLegalizer;
-  public:
-    SelectionDAG &DAG;
-
-    DAGCombinerInfo(SelectionDAG &dag, bool bl, bool blo, bool cl, void *dc)
-      : DC(dc), BeforeLegalize(bl), BeforeLegalizeOps(blo),
-        CalledByLegalizer(cl), DAG(dag) {}
+  /// \name Helpers for TargetTransformInfo implementations
+  /// @{
 
-    bool isBeforeLegalize() const { return BeforeLegalize; }
-    bool isBeforeLegalizeOps() const { return BeforeLegalizeOps; }
-    bool isCalledByLegalizer() const { return CalledByLegalizer; }
+  /// Get the ISD node that corresponds to the Instruction class opcode.
+  int InstructionOpcodeToISD(unsigned Opcode) const;
 
-    void AddToWorklist(SDNode *N);
-    void RemoveFromWorklist(SDNode *N);
-    SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To,
-                      bool AddTo = true);
-    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
-    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
-
-    void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
-  };
-
-  /// SimplifySetCC - Try to simplify a setcc built with the specified operands
-  /// and cc. If it is unable to simplify it, return a null SDValue.
-  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
-                          ISD::CondCode Cond, bool foldBooleans,
-                          DAGCombinerInfo &DCI, DebugLoc dl) const;
+  /// Estimate the cost of type-legalization and the legalized type.
+  std::pair<unsigned, MVT> getTypeLegalizationCost(Type *Ty) const;
 
-  /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-  /// node is a GlobalAddress + offset.
-  virtual bool
-  isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
-
-  /// PerformDAGCombine - This method will be invoked for all target nodes and
-  /// for any target-independent nodes that the target has registered with
-  /// invoke it for.
-  ///
-  /// The semantics are as follows:
-  /// Return Value:
-  ///   SDValue.Val == 0   - No change was made
-  ///   SDValue.Val == N   - N was replaced, is dead, and is already handled.
-  ///   otherwise          - N should be replaced by the returned Operand.
-  ///
-  /// In addition, methods provided by DAGCombinerInfo may be used to perform
-  /// more complex transformations.
-  ///
-  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
-  /// isTypeDesirableForOp - Return true if the target has native support for
-  /// the specified value type and it is 'desirable' to use the type for the
-  /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
-  /// instruction encodings are longer and some i16 instructions are slow.
-  virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
-    // By default, assume all legal types are desirable.
-    return isTypeLegal(VT);
-  }
-
-  /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
-  /// to transform a floating point op of specified opcode to a equivalent op of
-  /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
-  virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
-                                                 EVT /*VT*/) const {
-    return false;
-  }
-
-  /// IsDesirableToPromoteOp - This method query the target whether it is
-  /// beneficial for dag combiner to promote the specified node. If true, it
-  /// should return the desired promotion type by reference.
-  virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
-    return false;
-  }
+  /// @}
 
   //===--------------------------------------------------------------------===//
   // TargetLowering Configuration Methods - These methods should be invoked by
@@ -1111,16 +945,23 @@ protected:
   /// addRegisterClass - Add the specified register class as an available
   /// regclass for the specified value type.  This indicates the selector can
   /// handle values of that class natively.
-  void addRegisterClass(EVT VT, const TargetRegisterClass *RC) {
-    assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
+  void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT));
     AvailableRegClasses.push_back(std::make_pair(VT, RC));
-    RegClassForVT[VT.getSimpleVT().SimpleTy] = RC;
+    RegClassForVT[VT.SimpleTy] = RC;
+  }
+
+  /// clearRegisterClasses - remove all register classes
+  void clearRegisterClasses() {
+    for (unsigned i = 0 ; i<array_lengthof(RegClassForVT); i++)
+      RegClassForVT[i] = 0;
+    AvailableRegClasses.clear();
   }
 
   /// findRepresentativeClass - Return the largest legal super-reg register class
   /// of the register class for the specified type and its associated "cost".
   virtual std::pair<const TargetRegisterClass*, uint8_t>
-  findRepresentativeClass(EVT VT) const;
+  findRepresentativeClass(MVT VT) const;
 
   /// computeRegisterProperties - Once all of the register classes are added,
   /// this allows us to compute derived properties we expose.
@@ -1263,387 +1104,6 @@ protected:
 
 public:
   //===--------------------------------------------------------------------===//
-  // Lowering methods - These methods must be implemented by targets so that
-  // the SelectionDAGBuilder code knows how to lower these.
-  //
-
-  /// LowerFormalArguments - This hook must be implemented to lower the
-  /// incoming (formal) arguments, described by the Ins array, into the
-  /// specified DAG. The implementation should fill in the InVals array
-  /// with legal-type argument values, and return the resulting token
-  /// chain value.
-  ///
-  virtual SDValue
-    LowerFormalArguments(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
-                         bool /*isVarArg*/,
-                         const SmallVectorImpl<ISD::InputArg> &/*Ins*/,
-                         DebugLoc /*dl*/, SelectionDAG &/*DAG*/,
-                         SmallVectorImpl<SDValue> &/*InVals*/) const {
-    llvm_unreachable("Not Implemented");
-  }
-
-  struct ArgListEntry {
-    SDValue Node;
-    Type* Ty;
-    bool isSExt  : 1;
-    bool isZExt  : 1;
-    bool isInReg : 1;
-    bool isSRet  : 1;
-    bool isNest  : 1;
-    bool isByVal : 1;
-    uint16_t Alignment;
-
-    ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
-      isSRet(false), isNest(false), isByVal(false), Alignment(0) { }
-  };
-  typedef std::vector<ArgListEntry> ArgListTy;
-
-  /// CallLoweringInfo - This structure contains all information that is
-  /// necessary for lowering calls. It is passed to TLI::LowerCallTo when the
-  /// SelectionDAG builder needs to lower a call, and targets will see this
-  /// struct in their LowerCall implementation.
-  struct CallLoweringInfo {
-    SDValue Chain;
-    Type *RetTy;
-    bool RetSExt           : 1;
-    bool RetZExt           : 1;
-    bool IsVarArg          : 1;
-    bool IsInReg           : 1;
-    bool DoesNotReturn     : 1;
-    bool IsReturnValueUsed : 1;
-
-    // IsTailCall should be modified by implementations of
-    // TargetLowering::LowerCall that perform tail call conversions.
-    bool IsTailCall;
-
-    unsigned NumFixedArgs;
-    CallingConv::ID CallConv;
-    SDValue Callee;
-    ArgListTy &Args;
-    SelectionDAG &DAG;
-    DebugLoc DL;
-    ImmutableCallSite *CS;
-    SmallVector<ISD::OutputArg, 32> Outs;
-    SmallVector<SDValue, 32> OutVals;
-    SmallVector<ISD::InputArg, 32> Ins;
-
-
-    /// CallLoweringInfo - Constructs a call lowering context based on the
-    /// ImmutableCallSite \p cs.
-    CallLoweringInfo(SDValue chain, Type *retTy,
-                     FunctionType *FTy, bool isTailCall, SDValue callee,
-                     ArgListTy &args, SelectionDAG &dag, DebugLoc dl,
-                     ImmutableCallSite &cs)
-    : Chain(chain), RetTy(retTy), RetSExt(cs.paramHasAttr(0, Attributes::SExt)),
-      RetZExt(cs.paramHasAttr(0, Attributes::ZExt)), IsVarArg(FTy->isVarArg()),
-      IsInReg(cs.paramHasAttr(0, Attributes::InReg)),
-      DoesNotReturn(cs.doesNotReturn()),
-      IsReturnValueUsed(!cs.getInstruction()->use_empty()),
-      IsTailCall(isTailCall), NumFixedArgs(FTy->getNumParams()),
-      CallConv(cs.getCallingConv()), Callee(callee), Args(args), DAG(dag),
-      DL(dl), CS(&cs) {}
-
-    /// CallLoweringInfo - Constructs a call lowering context based on the
-    /// provided call information.
-    CallLoweringInfo(SDValue chain, Type *retTy, bool retSExt, bool retZExt,
-                     bool isVarArg, bool isInReg, unsigned numFixedArgs,
-                     CallingConv::ID callConv, bool isTailCall,
-                     bool doesNotReturn, bool isReturnValueUsed, SDValue callee,
-                     ArgListTy &args, SelectionDAG &dag, DebugLoc dl)
-    : Chain(chain), RetTy(retTy), RetSExt(retSExt), RetZExt(retZExt),
-      IsVarArg(isVarArg), IsInReg(isInReg), DoesNotReturn(doesNotReturn),
-      IsReturnValueUsed(isReturnValueUsed), IsTailCall(isTailCall),
-      NumFixedArgs(numFixedArgs), CallConv(callConv), Callee(callee),
-      Args(args), DAG(dag), DL(dl), CS(NULL) {}
-  };
-
-  /// LowerCallTo - This function lowers an abstract call to a function into an
-  /// actual call.  This returns a pair of operands.  The first element is the
-  /// return value for the function (if RetTy is not VoidTy).  The second
-  /// element is the outgoing token chain. It calls LowerCall to do the actual
-  /// lowering.
-  std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
-
-  /// LowerCall - This hook must be implemented to lower calls into the
-  /// the specified DAG. The outgoing arguments to the call are described
-  /// by the Outs array, and the values to be returned by the call are
-  /// described by the Ins array. The implementation should fill in the
-  /// InVals array with legal-type return values from the call, and return
-  /// the resulting token chain value.
-  virtual SDValue
-    LowerCall(CallLoweringInfo &/*CLI*/,
-              SmallVectorImpl<SDValue> &/*InVals*/) const {
-    llvm_unreachable("Not Implemented");
-  }
-
-  /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
-  virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
-
-  /// CanLowerReturn - This hook should be implemented to check whether the
-  /// return values described by the Outs array can fit into the return
-  /// registers.  If false is returned, an sret-demotion is performed.
-  ///
-  virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
-                              MachineFunction &/*MF*/, bool /*isVarArg*/,
-               const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
-               LLVMContext &/*Context*/) const
-  {
-    // Return true by default to get preexisting behavior.
-    return true;
-  }
-
-  /// LowerReturn - This hook must be implemented to lower outgoing
-  /// return values, described by the Outs array, into the specified
-  /// DAG. The implementation should return the resulting token chain
-  /// value.
-  ///
-  virtual SDValue
-    LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
-                bool /*isVarArg*/,
-                const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
-                const SmallVectorImpl<SDValue> &/*OutVals*/,
-                DebugLoc /*dl*/, SelectionDAG &/*DAG*/) const {
-    llvm_unreachable("Not Implemented");
-  }
-
-  /// isUsedByReturnOnly - Return true if result of the specified node is used
-  /// by a return node only. It also compute and return the input chain for the
-  /// tail call.
-  /// This is used to determine whether it is possible
-  /// to codegen a libcall as tail call at legalization time.
-  virtual bool isUsedByReturnOnly(SDNode *, SDValue &Chain) const {
-    return false;
-  }
-
-  /// mayBeEmittedAsTailCall - Return true if the target may be able emit the
-  /// call instruction as a tail call. This is used by optimization passes to
-  /// determine if it's profitable to duplicate return instructions to enable
-  /// tailcall optimization.
-  virtual bool mayBeEmittedAsTailCall(CallInst *) const {
-    return false;
-  }
-
-  /// getTypeForExtArgOrReturn - Return the type that should be used to zero or
-  /// sign extend a zeroext/signext integer argument or return value.
-  /// FIXME: Most C calling convention requires the return type to be promoted,
-  /// but this is not true all the time, e.g. i1 on x86-64. It is also not
-  /// necessary for non-C calling conventions. The frontend should handle this
-  /// and include all of the necessary information.
-  virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
-                                       ISD::NodeType /*ExtendKind*/) const {
-    EVT MinVT = getRegisterType(Context, MVT::i32);
-    return VT.bitsLT(MinVT) ? MinVT : VT;
-  }
-
-  /// LowerOperationWrapper - This callback is invoked by the type legalizer
-  /// to legalize nodes with an illegal operand type but legal result types.
-  /// It replaces the LowerOperation callback in the type Legalizer.
-  /// The reason we can not do away with LowerOperation entirely is that
-  /// LegalizeDAG isn't yet ready to use this callback.
-  /// TODO: Consider merging with ReplaceNodeResults.
-
-  /// The target places new result values for the node in Results (their number
-  /// and types must exactly match those of the original return values of
-  /// the node), or leaves Results empty, which indicates that the node is not
-  /// to be custom lowered after all.
-  /// The default implementation calls LowerOperation.
-  virtual void LowerOperationWrapper(SDNode *N,
-                                     SmallVectorImpl<SDValue> &Results,
-                                     SelectionDAG &DAG) const;
-
-  /// LowerOperation - This callback is invoked for operations that are
-  /// unsupported by the target, which are registered to use 'custom' lowering,
-  /// and whose defined values are all legal.
-  /// If the target has no operations that require custom lowering, it need not
-  /// implement this.  The default implementation of this aborts.
-  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
-  /// ReplaceNodeResults - This callback is invoked when a node result type is
-  /// illegal for the target, and the operation was registered to use 'custom'
-  /// lowering for that result type.  The target places new result values for
-  /// the node in Results (their number and types must exactly match those of
-  /// the original return values of the node), or leaves Results empty, which
-  /// indicates that the node is not to be custom lowered after all.
-  ///
-  /// If the target has no operations that require custom lowering, it need not
-  /// implement this.  The default implementation aborts.
-  virtual void ReplaceNodeResults(SDNode * /*N*/,
-                                  SmallVectorImpl<SDValue> &/*Results*/,
-                                  SelectionDAG &/*DAG*/) const {
-    llvm_unreachable("ReplaceNodeResults not implemented for this target!");
-  }
-
-  /// getTargetNodeName() - This method returns the name of a target specific
-  /// DAG node.
-  virtual const char *getTargetNodeName(unsigned Opcode) const;
-
-  /// createFastISel - This method returns a target specific FastISel object,
-  /// or null if the target does not support "fast" ISel.
-  virtual FastISel *createFastISel(FunctionLoweringInfo &,
-                                   const TargetLibraryInfo *) const {
-    return 0;
-  }
-
-  //===--------------------------------------------------------------------===//
-  // Inline Asm Support hooks
-  //
-
-  /// ExpandInlineAsm - This hook allows the target to expand an inline asm
-  /// call to be explicit llvm code if it wants to.  This is useful for
-  /// turning simple inline asms into LLVM intrinsics, which gives the
-  /// compiler more information about the behavior of the code.
-  virtual bool ExpandInlineAsm(CallInst *) const {
-    return false;
-  }
-
-  enum ConstraintType {
-    C_Register,            // Constraint represents specific register(s).
-    C_RegisterClass,       // Constraint represents any of register(s) in class.
-    C_Memory,              // Memory constraint.
-    C_Other,               // Something else.
-    C_Unknown              // Unsupported constraint.
-  };
-
-  enum ConstraintWeight {
-    // Generic weights.
-    CW_Invalid  = -1,     // No match.
-    CW_Okay     = 0,      // Acceptable.
-    CW_Good     = 1,      // Good weight.
-    CW_Better   = 2,      // Better weight.
-    CW_Best     = 3,      // Best weight.
-
-    // Well-known weights.
-    CW_SpecificReg  = CW_Okay,    // Specific register operands.
-    CW_Register     = CW_Good,    // Register operands.
-    CW_Memory       = CW_Better,  // Memory operands.
-    CW_Constant     = CW_Best,    // Constant operand.
-    CW_Default      = CW_Okay     // Default or don't know type.
-  };
-
-  /// AsmOperandInfo - This contains information for each constraint that we are
-  /// lowering.
-  struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
-    /// ConstraintCode - This contains the actual string for the code, like "m".
-    /// TargetLowering picks the 'best' code from ConstraintInfo::Codes that
-    /// most closely matches the operand.
-    std::string ConstraintCode;
-
-    /// ConstraintType - Information about the constraint code, e.g. Register,
-    /// RegisterClass, Memory, Other, Unknown.
-    TargetLowering::ConstraintType ConstraintType;
-
-    /// CallOperandval - If this is the result output operand or a
-    /// clobber, this is null, otherwise it is the incoming operand to the
-    /// CallInst.  This gets modified as the asm is processed.
-    Value *CallOperandVal;
-
-    /// ConstraintVT - The ValueType for the operand value.
-    EVT ConstraintVT;
-
-    /// isMatchingInputConstraint - Return true of this is an input operand that
-    /// is a matching constraint like "4".
-    bool isMatchingInputConstraint() const;
-
-    /// getMatchedOperand - If this is an input matching constraint, this method
-    /// returns the output operand it matches.
-    unsigned getMatchedOperand() const;
-
-    /// Copy constructor for copying from an AsmOperandInfo.
-    AsmOperandInfo(const AsmOperandInfo &info)
-      : InlineAsm::ConstraintInfo(info),
-        ConstraintCode(info.ConstraintCode),
-        ConstraintType(info.ConstraintType),
-        CallOperandVal(info.CallOperandVal),
-        ConstraintVT(info.ConstraintVT) {
-    }
-
-    /// Copy constructor for copying from a ConstraintInfo.
-    AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
-      : InlineAsm::ConstraintInfo(info),
-        ConstraintType(TargetLowering::C_Unknown),
-        CallOperandVal(0), ConstraintVT(MVT::Other) {
-    }
-  };
-
-  typedef std::vector<AsmOperandInfo> AsmOperandInfoVector;
-
-  /// ParseConstraints - Split up the constraint string from the inline
-  /// assembly value into the specific constraints and their prefixes,
-  /// and also tie in the associated operand values.
-  /// If this returns an empty vector, and if the constraint string itself
-  /// isn't empty, there was an error parsing.
-  virtual AsmOperandInfoVector ParseConstraints(ImmutableCallSite CS) const;
-
-  /// Examine constraint type and operand type and determine a weight value.
-  /// The operand object must already have been set up with the operand type.
-  virtual ConstraintWeight getMultipleConstraintMatchWeight(
-      AsmOperandInfo &info, int maIndex) const;
-
-  /// Examine constraint string and operand type and determine a weight value.
-  /// The operand object must already have been set up with the operand type.
-  virtual ConstraintWeight getSingleConstraintMatchWeight(
-      AsmOperandInfo &info, const char *constraint) const;
-
-  /// ComputeConstraintToUse - Determines the constraint code and constraint
-  /// type to use for the specific AsmOperandInfo, setting
-  /// OpInfo.ConstraintCode and OpInfo.ConstraintType.  If the actual operand
-  /// being passed in is available, it can be passed in as Op, otherwise an
-  /// empty SDValue can be passed.
-  virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
-                                      SDValue Op,
-                                      SelectionDAG *DAG = 0) const;
-
-  /// getConstraintType - Given a constraint, return the type of constraint it
-  /// is for this target.
-  virtual ConstraintType getConstraintType(const std::string &Constraint) const;
-
-  /// getRegForInlineAsmConstraint - Given a physical register constraint (e.g.
-  /// {edx}), return the register number and the register class for the
-  /// register.
-  ///
-  /// Given a register class constraint, like 'r', if this corresponds directly
-  /// to an LLVM register class, return a register of 0 and the register class
-  /// pointer.
-  ///
-  /// This should only be used for C_Register constraints.  On error,
-  /// this returns a register number of 0 and a null register class pointer..
-  virtual std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint,
-                                 EVT VT) const;
-
-  /// LowerXConstraint - try to replace an X constraint, which matches anything,
-  /// with another that has more specific requirements based on the type of the
-  /// corresponding operand.  This returns null if there is no replacement to
-  /// make.
-  virtual const char *LowerXConstraint(EVT ConstraintVT) const;
-
-  /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-  /// vector.  If it is invalid, don't add anything to Ops.
-  virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
-                                            std::vector<SDValue> &Ops,
-                                            SelectionDAG &DAG) const;
-
-  //===--------------------------------------------------------------------===//
-  // Instruction Emitting Hooks
-  //
-
-  // EmitInstrWithCustomInserter - This method should be implemented by targets
-  // that mark instructions with the 'usesCustomInserter' flag.  These
-  // instructions are special in various ways, which require special support to
-  // insert.  The specified MachineInstr is created but not inserted into any
-  // basic blocks, and this method is called to expand it into a sequence of
-  // instructions, potentially also creating new basic blocks and control flow.
-  virtual MachineBasicBlock *
-    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
-
-  /// AdjustInstrPostInstrSelection - This method should be implemented by
-  /// targets that mark instructions with the 'hasPostISelHook' flag. These
-  /// instructions must be adjusted after instruction selection by target hooks.
-  /// e.g. To fill in optional defs for ARM 's' setting instructions.
-  virtual void
-  AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
-
-  //===--------------------------------------------------------------------===//
   // Addressing mode description hooks (used by LSR etc).
   //
 
@@ -1658,6 +1118,22 @@ public:
     return false;
   }
 
+  /// AddrMode - This represents an addressing mode of:
+  ///    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
+  /// If BaseGV is null,  there is no BaseGV.
+  /// If BaseOffs is zero, there is no base offset.
+  /// If HasBaseReg is false, there is no base register.
+  /// If Scale is zero, there is no ScaleReg.  Scale of 1 indicates a reg with
+  /// no scale.
+  ///
+  struct AddrMode {
+    GlobalValue *BaseGV;
+    int64_t      BaseOffs;
+    bool         HasBaseReg;
+    int64_t      Scale;
+    AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {}
+  };
+
   /// isLegalAddressingMode - Return true if the addressing mode represented by
   /// AM is legal for this target, for a load/store of the specified type.
   /// The type may be VoidTy, in which case only return true if the addressing
@@ -1708,6 +1184,13 @@ public:
     return false;
   }
 
+  /// isZExtFree - Return true if zero-extending the specific node Val to type
+  /// VT2 is free (either because it's implicitly zero-extended such as ARM
+  /// ldrb / ldrh or because it's folded such as X86 zero-extending loads).
+  virtual bool isZExtFree(SDValue Val, EVT VT2) const {
+    return isZExtFree(Val.getValueType(), VT2);
+  }
+
   /// isFNegFree - Return true if an fneg operation is free to the point where
   /// it is never worthwhile to replace it with a bitwise operation.
   virtual bool isFNegFree(EVT) const {
@@ -1736,17 +1219,6 @@ public:
   }
 
   //===--------------------------------------------------------------------===//
-  // Div utility functions
-  //
-  SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
-                         SelectionDAG &DAG) const;
-  SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-                      std::vector<SDNode*>* Created) const;
-  SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-                      std::vector<SDNode*>* Created) const;
-
-
-  //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
 
@@ -1909,7 +1381,7 @@ private:
   /// each ValueType the target supports natively.
   const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
   unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
-  EVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
+  MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
 
   /// RepRegClassForVT - This indicates the "representative" register class to
   /// use for each ValueType the target supports natively. This information is
@@ -1929,7 +1401,7 @@ private:
   /// contains one step of the expand (e.g. i64 -> i32), even if there are
   /// multiple steps required (e.g. i64 -> i16).  For types natively supported
   /// by the system, this holds the same type (e.g. i32 -> i32).
-  EVT TransformToType[MVT::LAST_VALUETYPE];
+  MVT TransformToType[MVT::LAST_VALUETYPE];
 
   /// OpActions - For each operation and each value type, keep a LegalizeAction
   /// that indicates how instruction selection should deal with the operation.
@@ -1970,19 +1442,22 @@ public:
   getTypeConversion(LLVMContext &Context, EVT VT) const {
     // If this is a simple type, use the ComputeRegisterProp mechanism.
     if (VT.isSimple()) {
-      assert((unsigned)VT.getSimpleVT().SimpleTy <
-             array_lengthof(TransformToType));
-      EVT NVT = TransformToType[VT.getSimpleVT().SimpleTy];
-      LegalizeTypeAction LA = ValueTypeActions.getTypeAction(VT.getSimpleVT());
+      MVT SVT = VT.getSimpleVT();
+      assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+      MVT NVT = TransformToType[SVT.SimpleTy];
+      LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
 
       assert(
-        (!(NVT.isSimple() && LA != TypeLegal) ||
-         ValueTypeActions.getTypeAction(NVT.getSimpleVT()) != TypePromoteInteger)
+        (LA == TypeLegal ||
+         ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)
          && "Promote may not follow Expand or Promote");
 
       if (LA == TypeSplitVector)
-        NVT = EVT::getVectorVT(Context, VT.getVectorElementType(),
-                               VT.getVectorNumElements() / 2);
+        return LegalizeKind(LA, EVT::getVectorVT(Context,
+                                                 SVT.getVectorElementType(),
+                                                 SVT.getVectorNumElements()/2));
+      if (LA == TypeScalarizeVector)
+        return LegalizeKind(LA, SVT.getVectorElementType());
       return LegalizeKind(LA, NVT);
     }
 
@@ -2086,7 +1561,7 @@ public:
   }
 
 private:
-  std::vector<std::pair<EVT, const TargetRegisterClass*> > AvailableRegClasses;
+  std::vector<std::pair<MVT, const TargetRegisterClass*> > AvailableRegClasses;
 
   /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
   /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(),
@@ -2125,11 +1600,11 @@ protected:
   /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
   /// store.  This only applies to setting a constant array of a constant size.
   /// @brief Specify maximum number of store instructions per memset call.
-  unsigned maxStoresPerMemset;
+  unsigned MaxStoresPerMemset;
 
   /// Maximum number of stores operations that may be substituted for the call
   /// to memset, used for functions with OptSize attribute.
-  unsigned maxStoresPerMemsetOptSize;
+  unsigned MaxStoresPerMemsetOptSize;
 
   /// When lowering \@llvm.memcpy this field specifies the maximum number of
   /// store operations that may be substituted for a call to memcpy. Targets
@@ -2141,11 +1616,11 @@ protected:
   /// and one 1-byte store. This only applies to copying a constant array of
   /// constant size.
   /// @brief Specify maximum bytes of store instructions per memcpy call.
-  unsigned maxStoresPerMemcpy;
+  unsigned MaxStoresPerMemcpy;
 
   /// Maximum number of store operations that may be substituted for a call
   /// to memcpy, used for functions with OptSize attribute.
-  unsigned maxStoresPerMemcpyOptSize;
+  unsigned MaxStoresPerMemcpyOptSize;
 
   /// When lowering \@llvm.memmove this field specifies the maximum number of
   /// store instructions that may be substituted for a call to memmove. Targets
@@ -2156,30 +1631,641 @@ protected:
   /// with 8-bit alignment would result in nine 1-byte stores.  This only
   /// applies to copying a constant array of constant size.
   /// @brief Specify maximum bytes of store instructions per memmove call.
-  unsigned maxStoresPerMemmove;
+  unsigned MaxStoresPerMemmove;
 
   /// Maximum number of store instructions that may be substituted for a call
   /// to memmove, used for functions with OpSize attribute.
-  unsigned maxStoresPerMemmoveOptSize;
-
-  /// This field specifies whether the target can benefit from code placement
-  /// optimization.
-  bool benefitFromCodePlacementOpt;
+  unsigned MaxStoresPerMemmoveOptSize;
 
-  /// predictableSelectIsExpensive - Tells the code generator that select is
+  /// PredictableSelectIsExpensive - Tells the code generator that select is
   /// more expensive than a branch if the branch is usually predicted right.
-  bool predictableSelectIsExpensive;
+  bool PredictableSelectIsExpensive;
 
-private:
+protected:
   /// isLegalRC - Return true if the value types that can be represented by the
   /// specified register class are all legal.
   bool isLegalRC(const TargetRegisterClass *RC) const;
 };
 
+//===----------------------------------------------------------------------===//
+/// TargetLowering - This class defines information used to lower LLVM code to
+/// legal SelectionDAG operators that the target instruction selector can accept
+/// natively.
+///
+/// This class also defines callbacks that targets must implement to lower
+/// target-specific constructs to SelectionDAG operators.
+///
+class TargetLowering : public TargetLoweringBase {
+  TargetLowering(const TargetLowering&) LLVM_DELETED_FUNCTION;
+  void operator=(const TargetLowering&) LLVM_DELETED_FUNCTION;
+
+public:
+  /// NOTE: The constructor takes ownership of TLOF.
+  explicit TargetLowering(const TargetMachine &TM,
+                          const TargetLoweringObjectFile *TLOF);
+
+  /// getPreIndexedAddressParts - returns true by value, base pointer and
+  /// offset pointer and addressing mode by reference if the node's address
+  /// can be legally represented as pre-indexed load / store address.
+  virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
+                                         SDValue &/*Offset*/,
+                                         ISD::MemIndexedMode &/*AM*/,
+                                         SelectionDAG &/*DAG*/) const {
+    return false;
+  }
+
+  /// getPostIndexedAddressParts - returns true by value, base pointer and
+  /// offset pointer and addressing mode by reference if this node can be
+  /// combined with a load / store to form a post-indexed load / store.
+  virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
+                                          SDValue &/*Base*/, SDValue &/*Offset*/,
+                                          ISD::MemIndexedMode &/*AM*/,
+                                          SelectionDAG &/*DAG*/) const {
+    return false;
+  }
+
+  /// getJumpTableEncoding - Return the entry encoding for a jump table in the
+  /// current function.  The returned value is a member of the
+  /// MachineJumpTableInfo::JTEntryKind enum.
+  virtual unsigned getJumpTableEncoding() const;
+
+  virtual const MCExpr *
+  LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
+                            const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
+                            MCContext &/*Ctx*/) const {
+    llvm_unreachable("Need to implement this hook if target has custom JTIs");
+  }
+
+  /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+  /// jumptable.
+  virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+                                           SelectionDAG &DAG) const;
+
+  /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+  /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+  /// MCExpr.
+  virtual const MCExpr *
+  getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                               unsigned JTI, MCContext &Ctx) const;
+
+  /// isOffsetFoldingLegal - Return true if folding a constant offset
+  /// with the given GlobalAddress is legal.  It is frequently not legal in
+  /// PIC relocation models.
+  virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+  bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                            SDValue &Chain) const;
+
+  void softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+                           SDValue &NewLHS, SDValue &NewRHS,
+                           ISD::CondCode &CCCode, DebugLoc DL) const;
+
+  SDValue makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
+                      const SDValue *Ops, unsigned NumOps,
+                      bool isSigned, DebugLoc dl) const;
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Optimization Methods
+  //
+
+  /// TargetLoweringOpt - A convenience struct that encapsulates a DAG, and two
+  /// SDValues for returning information from TargetLowering to its clients
+  /// that want to combine
+  struct TargetLoweringOpt {
+    SelectionDAG &DAG;
+    bool LegalTys;
+    bool LegalOps;
+    SDValue Old;
+    SDValue New;
+
+    explicit TargetLoweringOpt(SelectionDAG &InDAG,
+                               bool LT, bool LO) :
+      DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
+
+    bool LegalTypes() const { return LegalTys; }
+    bool LegalOperations() const { return LegalOps; }
+
+    bool CombineTo(SDValue O, SDValue N) {
+      Old = O;
+      New = N;
+      return true;
+    }
+
+    /// ShrinkDemandedConstant - Check to see if the specified operand of the
+    /// specified instruction is a constant integer.  If so, check to see if
+    /// there are any bits set in the constant that are not demanded.  If so,
+    /// shrink the constant and return true.
+    bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded);
+
+    /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+    /// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+    /// cast, but it could be generalized for targets with other types of
+    /// implicit widening casts.
+    bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
+                          DebugLoc dl);
+  };
+
+  /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+  /// DemandedMask bits of the result of Op are ever used downstream.  If we can
+  /// use this information to simplify Op, create a new simplified DAG node and
+  /// return true, returning the original and new nodes in Old and New.
+  /// Otherwise, analyze the expression and return a mask of KnownOne and
+  /// KnownZero bits for the expression (used to simplify the caller).
+  /// The KnownZero/One bits may only be accurate for those bits in the
+  /// DemandedMask.
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+                            APInt &KnownZero, APInt &KnownOne,
+                            TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+
+  /// computeMaskedBitsForTargetNode - Determine which of the bits specified in
+  /// Mask are known to be either zero or one and return them in the
+  /// KnownZero/KnownOne bitsets.
+  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const SelectionDAG &DAG,
+                                              unsigned Depth = 0) const;
+
+  /// ComputeNumSignBitsForTargetNode - This method can be implemented by
+  /// targets that want to expose additional information about sign bits to the
+  /// DAG Combiner.
+  virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   unsigned Depth = 0) const;
+
+  struct DAGCombinerInfo {
+    void *DC;  // The DAG Combiner object.
+    CombineLevel Level;
+    bool CalledByLegalizer;
+  public:
+    SelectionDAG &DAG;
+
+    DAGCombinerInfo(SelectionDAG &dag, CombineLevel level,  bool cl, void *dc)
+      : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
+
+    bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
+    bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
+    bool isAfterLegalizeVectorOps() const {
+      return Level == AfterLegalizeDAG;
+    }
+    CombineLevel getDAGCombineLevel() { return Level; }
+    bool isCalledByLegalizer() const { return CalledByLegalizer; }
+
+    void AddToWorklist(SDNode *N);
+    void RemoveFromWorklist(SDNode *N);
+    SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To,
+                      bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
+
+    void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
+  };
+
+  /// SimplifySetCC - Try to simplify a setcc built with the specified operands
+  /// and cc. If it is unable to simplify it, return a null SDValue.
+  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+                          ISD::CondCode Cond, bool foldBooleans,
+                          DAGCombinerInfo &DCI, DebugLoc dl) const;
+
+  /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+  /// node is a GlobalAddress + offset.
+  virtual bool
+  isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+  /// PerformDAGCombine - This method will be invoked for all target nodes and
+  /// for any target-independent nodes that the target has registered with
+  /// invoke it for.
+  ///
+  /// The semantics are as follows:
+  /// Return Value:
+  ///   SDValue.Val == 0   - No change was made
+  ///   SDValue.Val == N   - N was replaced, is dead, and is already handled.
+  ///   otherwise          - N should be replaced by the returned Operand.
+  ///
+  /// In addition, methods provided by DAGCombinerInfo may be used to perform
+  /// more complex transformations.
+  ///
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// isTypeDesirableForOp - Return true if the target has native support for
+  /// the specified value type and it is 'desirable' to use the type for the
+  /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+  /// instruction encodings are longer and some i16 instructions are slow.
+  virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
+    // By default, assume all legal types are desirable.
+    return isTypeLegal(VT);
+  }
+
+  /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
+  /// to transform a floating point op of specified opcode to a equivalent op of
+  /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
+  virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
+                                                 EVT /*VT*/) const {
+    return false;
+  }
+
+  /// IsDesirableToPromoteOp - This method query the target whether it is
+  /// beneficial for dag combiner to promote the specified node. If true, it
+  /// should return the desired promotion type by reference.
+  virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
+    return false;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Lowering methods - These methods must be implemented by targets so that
+  // the SelectionDAGBuilder code knows how to lower these.
+  //
+
+  /// LowerFormalArguments - This hook must be implemented to lower the
+  /// incoming (formal) arguments, described by the Ins array, into the
+  /// specified DAG. The implementation should fill in the InVals array
+  /// with legal-type argument values, and return the resulting token
+  /// chain value.
+  ///
+  virtual SDValue
+    LowerFormalArguments(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
+                         bool /*isVarArg*/,
+                         const SmallVectorImpl<ISD::InputArg> &/*Ins*/,
+                         DebugLoc /*dl*/, SelectionDAG &/*DAG*/,
+                         SmallVectorImpl<SDValue> &/*InVals*/) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  struct ArgListEntry {
+    SDValue Node;
+    Type* Ty;
+    bool isSExt  : 1;
+    bool isZExt  : 1;
+    bool isInReg : 1;
+    bool isSRet  : 1;
+    bool isNest  : 1;
+    bool isByVal : 1;
+    uint16_t Alignment;
+
+    ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
+      isSRet(false), isNest(false), isByVal(false), Alignment(0) { }
+  };
+  typedef std::vector<ArgListEntry> ArgListTy;
+
+  /// CallLoweringInfo - This structure contains all information that is
+  /// necessary for lowering calls. It is passed to TLI::LowerCallTo when the
+  /// SelectionDAG builder needs to lower a call, and targets will see this
+  /// struct in their LowerCall implementation.
+  struct CallLoweringInfo {
+    SDValue Chain;
+    Type *RetTy;
+    bool RetSExt           : 1;
+    bool RetZExt           : 1;
+    bool IsVarArg          : 1;
+    bool IsInReg           : 1;
+    bool DoesNotReturn     : 1;
+    bool IsReturnValueUsed : 1;
+
+    // IsTailCall should be modified by implementations of
+    // TargetLowering::LowerCall that perform tail call conversions.
+    bool IsTailCall;
+
+    unsigned NumFixedArgs;
+    CallingConv::ID CallConv;
+    SDValue Callee;
+    ArgListTy &Args;
+    SelectionDAG &DAG;
+    DebugLoc DL;
+    ImmutableCallSite *CS;
+    SmallVector<ISD::OutputArg, 32> Outs;
+    SmallVector<SDValue, 32> OutVals;
+    SmallVector<ISD::InputArg, 32> Ins;
+
+
+    /// CallLoweringInfo - Constructs a call lowering context based on the
+    /// ImmutableCallSite \p cs.
+    CallLoweringInfo(SDValue chain, Type *retTy,
+                     FunctionType *FTy, bool isTailCall, SDValue callee,
+                     ArgListTy &args, SelectionDAG &dag, DebugLoc dl,
+                     ImmutableCallSite &cs)
+    : Chain(chain), RetTy(retTy), RetSExt(cs.paramHasAttr(0, Attribute::SExt)),
+      RetZExt(cs.paramHasAttr(0, Attribute::ZExt)), IsVarArg(FTy->isVarArg()),
+      IsInReg(cs.paramHasAttr(0, Attribute::InReg)),
+      DoesNotReturn(cs.doesNotReturn()),
+      IsReturnValueUsed(!cs.getInstruction()->use_empty()),
+      IsTailCall(isTailCall), NumFixedArgs(FTy->getNumParams()),
+      CallConv(cs.getCallingConv()), Callee(callee), Args(args), DAG(dag),
+      DL(dl), CS(&cs) {}
+
+    /// CallLoweringInfo - Constructs a call lowering context based on the
+    /// provided call information.
+    CallLoweringInfo(SDValue chain, Type *retTy, bool retSExt, bool retZExt,
+                     bool isVarArg, bool isInReg, unsigned numFixedArgs,
+                     CallingConv::ID callConv, bool isTailCall,
+                     bool doesNotReturn, bool isReturnValueUsed, SDValue callee,
+                     ArgListTy &args, SelectionDAG &dag, DebugLoc dl)
+    : Chain(chain), RetTy(retTy), RetSExt(retSExt), RetZExt(retZExt),
+      IsVarArg(isVarArg), IsInReg(isInReg), DoesNotReturn(doesNotReturn),
+      IsReturnValueUsed(isReturnValueUsed), IsTailCall(isTailCall),
+      NumFixedArgs(numFixedArgs), CallConv(callConv), Callee(callee),
+      Args(args), DAG(dag), DL(dl), CS(NULL) {}
+  };
+
+  /// LowerCallTo - This function lowers an abstract call to a function into an
+  /// actual call.  This returns a pair of operands.  The first element is the
+  /// return value for the function (if RetTy is not VoidTy).  The second
+  /// element is the outgoing token chain. It calls LowerCall to do the actual
+  /// lowering.
+  std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
+
+  /// LowerCall - This hook must be implemented to lower calls into the
+  /// the specified DAG. The outgoing arguments to the call are described
+  /// by the Outs array, and the values to be returned by the call are
+  /// described by the Ins array. The implementation should fill in the
+  /// InVals array with legal-type return values from the call, and return
+  /// the resulting token chain value.
+  virtual SDValue
+    LowerCall(CallLoweringInfo &/*CLI*/,
+              SmallVectorImpl<SDValue> &/*InVals*/) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
+  virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
+
+  /// CanLowerReturn - This hook should be implemented to check whether the
+  /// return values described by the Outs array can fit into the return
+  /// registers.  If false is returned, an sret-demotion is performed.
+  ///
+  virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
+                              MachineFunction &/*MF*/, bool /*isVarArg*/,
+               const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
+               LLVMContext &/*Context*/) const
+  {
+    // Return true by default to get preexisting behavior.
+    return true;
+  }
+
+  /// LowerReturn - This hook must be implemented to lower outgoing
+  /// return values, described by the Outs array, into the specified
+  /// DAG. The implementation should return the resulting token chain
+  /// value.
+  ///
+  virtual SDValue
+    LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
+                bool /*isVarArg*/,
+                const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
+                const SmallVectorImpl<SDValue> &/*OutVals*/,
+                DebugLoc /*dl*/, SelectionDAG &/*DAG*/) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// isUsedByReturnOnly - Return true if result of the specified node is used
+  /// by a return node only. It also compute and return the input chain for the
+  /// tail call.
+  /// This is used to determine whether it is possible
+  /// to codegen a libcall as tail call at legalization time.
+  virtual bool isUsedByReturnOnly(SDNode *, SDValue &Chain) const {
+    return false;
+  }
+
+  /// mayBeEmittedAsTailCall - Return true if the target may be able emit the
+  /// call instruction as a tail call. This is used by optimization passes to
+  /// determine if it's profitable to duplicate return instructions to enable
+  /// tailcall optimization.
+  virtual bool mayBeEmittedAsTailCall(CallInst *) const {
+    return false;
+  }
+
+  /// getTypeForExtArgOrReturn - Return the type that should be used to zero or
+  /// sign extend a zeroext/signext integer argument or return value.
+  /// FIXME: Most C calling convention requires the return type to be promoted,
+  /// but this is not true all the time, e.g. i1 on x86-64. It is also not
+  /// necessary for non-C calling conventions. The frontend should handle this
+  /// and include all of the necessary information.
+  virtual MVT getTypeForExtArgOrReturn(MVT VT,
+                                       ISD::NodeType /*ExtendKind*/) const {
+    MVT MinVT = getRegisterType(MVT::i32);
+    return VT.bitsLT(MinVT) ? MinVT : VT;
+  }
+
+  /// LowerOperationWrapper - This callback is invoked by the type legalizer
+  /// to legalize nodes with an illegal operand type but legal result types.
+  /// It replaces the LowerOperation callback in the type Legalizer.
+  /// The reason we can not do away with LowerOperation entirely is that
+  /// LegalizeDAG isn't yet ready to use this callback.
+  /// TODO: Consider merging with ReplaceNodeResults.
+
+  /// The target places new result values for the node in Results (their number
+  /// and types must exactly match those of the original return values of
+  /// the node), or leaves Results empty, which indicates that the node is not
+  /// to be custom lowered after all.
+  /// The default implementation calls LowerOperation.
+  virtual void LowerOperationWrapper(SDNode *N,
+                                     SmallVectorImpl<SDValue> &Results,
+                                     SelectionDAG &DAG) const;
+
+  /// LowerOperation - This callback is invoked for operations that are
+  /// unsupported by the target, which are registered to use 'custom' lowering,
+  /// and whose defined values are all legal.
+  /// If the target has no operations that require custom lowering, it need not
+  /// implement this.  The default implementation of this aborts.
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  /// ReplaceNodeResults - This callback is invoked when a node result type is
+  /// illegal for the target, and the operation was registered to use 'custom'
+  /// lowering for that result type.  The target places new result values for
+  /// the node in Results (their number and types must exactly match those of
+  /// the original return values of the node), or leaves Results empty, which
+  /// indicates that the node is not to be custom lowered after all.
+  ///
+  /// If the target has no operations that require custom lowering, it need not
+  /// implement this.  The default implementation aborts.
+  virtual void ReplaceNodeResults(SDNode * /*N*/,
+                                  SmallVectorImpl<SDValue> &/*Results*/,
+                                  SelectionDAG &/*DAG*/) const {
+    llvm_unreachable("ReplaceNodeResults not implemented for this target!");
+  }
+
+  /// getTargetNodeName() - This method returns the name of a target specific
+  /// DAG node.
+  virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+  /// createFastISel - This method returns a target specific FastISel object,
+  /// or null if the target does not support "fast" ISel.
+  virtual FastISel *createFastISel(FunctionLoweringInfo &,
+                                   const TargetLibraryInfo *) const {
+    return 0;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Inline Asm Support hooks
+  //
+
+  /// ExpandInlineAsm - This hook allows the target to expand an inline asm
+  /// call to be explicit llvm code if it wants to.  This is useful for
+  /// turning simple inline asms into LLVM intrinsics, which gives the
+  /// compiler more information about the behavior of the code.
+  virtual bool ExpandInlineAsm(CallInst *) const {
+    return false;
+  }
+
+  enum ConstraintType {
+    C_Register,            // Constraint represents specific register(s).
+    C_RegisterClass,       // Constraint represents any of register(s) in class.
+    C_Memory,              // Memory constraint.
+    C_Other,               // Something else.
+    C_Unknown              // Unsupported constraint.
+  };
+
+  enum ConstraintWeight {
+    // Generic weights.
+    CW_Invalid  = -1,     // No match.
+    CW_Okay     = 0,      // Acceptable.
+    CW_Good     = 1,      // Good weight.
+    CW_Better   = 2,      // Better weight.
+    CW_Best     = 3,      // Best weight.
+
+    // Well-known weights.
+    CW_SpecificReg  = CW_Okay,    // Specific register operands.
+    CW_Register     = CW_Good,    // Register operands.
+    CW_Memory       = CW_Better,  // Memory operands.
+    CW_Constant     = CW_Best,    // Constant operand.
+    CW_Default      = CW_Okay     // Default or don't know type.
+  };
+
+  /// AsmOperandInfo - This contains information for each constraint that we are
+  /// lowering.
+  struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
+    /// ConstraintCode - This contains the actual string for the code, like "m".
+    /// TargetLowering picks the 'best' code from ConstraintInfo::Codes that
+    /// most closely matches the operand.
+    std::string ConstraintCode;
+
+    /// ConstraintType - Information about the constraint code, e.g. Register,
+    /// RegisterClass, Memory, Other, Unknown.
+    TargetLowering::ConstraintType ConstraintType;
+
+    /// CallOperandval - If this is the result output operand or a
+    /// clobber, this is null, otherwise it is the incoming operand to the
+    /// CallInst.  This gets modified as the asm is processed.
+    Value *CallOperandVal;
+
+    /// ConstraintVT - The ValueType for the operand value.
+    MVT ConstraintVT;
+
+    /// isMatchingInputConstraint - Return true of this is an input operand that
+    /// is a matching constraint like "4".
+    bool isMatchingInputConstraint() const;
+
+    /// getMatchedOperand - If this is an input matching constraint, this method
+    /// returns the output operand it matches.
+    unsigned getMatchedOperand() const;
+
+    /// Copy constructor for copying from an AsmOperandInfo.
+    AsmOperandInfo(const AsmOperandInfo &info)
+      : InlineAsm::ConstraintInfo(info),
+        ConstraintCode(info.ConstraintCode),
+        ConstraintType(info.ConstraintType),
+        CallOperandVal(info.CallOperandVal),
+        ConstraintVT(info.ConstraintVT) {
+    }
+
+    /// Copy constructor for copying from a ConstraintInfo.
+    AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+      : InlineAsm::ConstraintInfo(info),
+        ConstraintType(TargetLowering::C_Unknown),
+        CallOperandVal(0), ConstraintVT(MVT::Other) {
+    }
+  };
+
+  typedef std::vector<AsmOperandInfo> AsmOperandInfoVector;
+
+  /// ParseConstraints - Split up the constraint string from the inline
+  /// assembly value into the specific constraints and their prefixes,
+  /// and also tie in the associated operand values.
+  /// If this returns an empty vector, and if the constraint string itself
+  /// isn't empty, there was an error parsing.
+  virtual AsmOperandInfoVector ParseConstraints(ImmutableCallSite CS) const;
+
+  /// Examine constraint type and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getMultipleConstraintMatchWeight(
+      AsmOperandInfo &info, int maIndex) const;
+
+  /// Examine constraint string and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+  /// ComputeConstraintToUse - Determines the constraint code and constraint
+  /// type to use for the specific AsmOperandInfo, setting
+  /// OpInfo.ConstraintCode and OpInfo.ConstraintType.  If the actual operand
+  /// being passed in is available, it can be passed in as Op, otherwise an
+  /// empty SDValue can be passed.
+  virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+                                      SDValue Op,
+                                      SelectionDAG *DAG = 0) const;
+
+  /// getConstraintType - Given a constraint, return the type of constraint it
+  /// is for this target.
+  virtual ConstraintType getConstraintType(const std::string &Constraint) const;
+
+  /// getRegForInlineAsmConstraint - Given a physical register constraint (e.g.
+  /// {edx}), return the register number and the register class for the
+  /// register.
+  ///
+  /// Given a register class constraint, like 'r', if this corresponds directly
+  /// to an LLVM register class, return a register of 0 and the register class
+  /// pointer.
+  ///
+  /// This should only be used for C_Register constraints.  On error,
+  /// this returns a register number of 0 and a null register class pointer..
+  virtual std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+                                 EVT VT) const;
+
+  /// LowerXConstraint - try to replace an X constraint, which matches anything,
+  /// with another that has more specific requirements based on the type of the
+  /// corresponding operand.  This returns null if there is no replacement to
+  /// make.
+  virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+  /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+  /// vector.  If it is invalid, don't add anything to Ops.
+  virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+                                            std::vector<SDValue> &Ops,
+                                            SelectionDAG &DAG) const;
+
+  //===--------------------------------------------------------------------===//
+  // Div utility functions
+  //
+  SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+                         SelectionDAG &DAG) const;
+  SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+                      std::vector<SDNode*> *Created) const;
+  SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+                      std::vector<SDNode*> *Created) const;
+
+  //===--------------------------------------------------------------------===//
+  // Instruction Emitting Hooks
+  //
+
+  // EmitInstrWithCustomInserter - This method should be implemented by targets
+  // that mark instructions with the 'usesCustomInserter' flag.  These
+  // instructions are special in various ways, which require special support to
+  // insert.  The specified MachineInstr is created but not inserted into any
+  // basic blocks, and this method is called to expand it into a sequence of
+  // instructions, potentially also creating new basic blocks and control flow.
+  virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  /// AdjustInstrPostInstrSelection - This method should be implemented by
+  /// targets that mark instructions with the 'hasPostISelHook' flag. These
+  /// instructions must be adjusted after instruction selection by target hooks.
+  /// e.g. To fill in optional defs for ARM 's' setting instructions.
+  virtual void
+  AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const;
+};
+
 /// GetReturnInfo - Given an LLVM IR type and return type attributes,
 /// compute the return value EVTs and flags, and optionally also
 /// the offsets, if the return value is being lowered to memory.
-void GetReturnInfo(Type* ReturnType, Attributes attr,
+void GetReturnInfo(Type* ReturnType, AttributeSet attr,
                    SmallVectorImpl<ISD::OutputArg> &Outs,
                    const TargetLowering &TLI);
 
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 13a6fe37d7a9..9958755a6686 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 
-#include "llvm/Module.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/SectionKind.h"
-#include "llvm/ADT/ArrayRef.h"
 
 namespace llvm {
   class MachineModuleInfo;
@@ -27,6 +27,7 @@ namespace llvm {
   class MCExpr;
   class MCSection;
   class MCSymbol;
+  class MCSymbolRefExpr;
   class MCStreamer;
   class GlobalValue;
   class TargetMachine;
@@ -108,13 +109,13 @@ public:
     return 0;
   }
   
-  /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference
+  /// getTTypeGlobalReference - Return an MCExpr to use for a reference
   /// to the specified global variable from exception handling information.
   ///
   virtual const MCExpr *
-  getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI, unsigned Encoding,
-                                 MCStreamer &Streamer) const;
+  getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI, unsigned Encoding,
+                          MCStreamer &Streamer) const;
 
   // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
   virtual MCSymbol *
@@ -123,8 +124,8 @@ public:
 
   /// 
   const MCExpr *
-  getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
-                           MCStreamer &Streamer) const;
+  getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+                    MCStreamer &Streamer) const;
 
   virtual const MCSection *
   getStaticCtorSection(unsigned Priority = 65535) const {
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 50066473b552..66f3a3c71b68 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -14,12 +14,10 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
 
@@ -43,6 +41,8 @@ class TargetPassConfig;
 class TargetRegisterInfo;
 class TargetSelectionDAGInfo;
 class TargetSubtargetInfo;
+class ScalarTargetTransformInfo;
+class VectorTargetTransformInfo;
 class formatted_raw_ostream;
 class raw_ostream;
 
@@ -59,10 +59,6 @@ protected: // Can only create subclasses.
   TargetMachine(const Target &T, StringRef TargetTriple,
                 StringRef CPU, StringRef FS, const TargetOptions &Options);
 
-  /// getSubtargetImpl - virtual method implemented by subclasses that returns
-  /// a reference to that target's TargetSubtargetInfo-derived member variable.
-  virtual const TargetSubtargetInfo *getSubtargetImpl() const { return 0; }
-
   /// TheTarget - The Target that this machine was created for.
   const Target &TheTarget;
 
@@ -95,7 +91,14 @@ public:
   const StringRef getTargetCPU() const { return TargetCPU; }
   const StringRef getTargetFeatureString() const { return TargetFS; }
 
-  TargetOptions Options;
+  /// getSubtargetImpl - virtual method implemented by subclasses that returns
+  /// a reference to that target's TargetSubtargetInfo-derived member variable.
+  virtual const TargetSubtargetInfo *getSubtargetImpl() const { return 0; }
+
+  mutable TargetOptions Options;
+
+  /// \brief Reset the target options based on the function's attributes.
+  void resetTargetOptions(const MachineFunction *MF) const;
 
   // Interfaces to the major aspects of target machine information:
   // -- Instruction opcode and operand information
@@ -108,10 +111,6 @@ public:
   virtual const TargetLowering    *getTargetLowering() const { return 0; }
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; }
   virtual const DataLayout             *getDataLayout() const { return 0; }
-  virtual const ScalarTargetTransformInfo*
-  getScalarTargetTransformInfo() const { return 0; }
-  virtual const VectorTargetTransformInfo*
-  getVectorTargetTransformInfo() const { return 0; }
 
   /// getMCAsmInfo - Return target specific asm information.
   ///
@@ -232,6 +231,9 @@ public:
   /// sections.
   static void setFunctionSections(bool);
 
+  /// \brief Register analysis passes for this target with a pass manager.
+  virtual void addAnalysisPasses(PassManagerBase &) {}
+
   /// CodeGenFileType - These enums are meant to be passed into
   /// addPassesToEmitFile to indicate what type of file to emit, and returned by
   /// it to indicate what type of file could actually be made.
@@ -290,6 +292,11 @@ protected: // Can only create subclasses.
                     CodeGenOpt::Level OL);
 
 public:
+  /// \brief Register analysis passes for this target with a pass manager.
+  ///
+  /// This registers target independent analysis passes.
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+
   /// createPassConfig - Create a pass configuration object to be used by
   /// addPassToEmitX methods for generating a pipeline of CodeGen passes.
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 68ca5678369a..c31db244900e 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -24,7 +24,7 @@ namespace llvm {
   // Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
   namespace FloatABI {
     enum ABIType {
-      Default, // Target-specific (either soft or hard depending on triple, etc).
+      Default, // Target-specific (either soft or hard depending on triple,etc).
       Soft, // Soft float.
       Hard  // Hard float.
     };
@@ -48,10 +48,10 @@ namespace llvm {
           UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
           JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
           GuaranteedTailCallOpt(false), DisableTailCalls(false),
-          StackAlignmentOverride(0), RealignStack(true), EnableFastISel(false),
-          PositionIndependentExecutable(false), EnableSegmentedStacks(false),
-          UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default),
-          AllowFPOpFusion(FPOpFusion::Standard)
+          StackAlignmentOverride(0), RealignStack(true), SSPBufferSize(0),
+          EnableFastISel(false), PositionIndependentExecutable(false),
+          EnableSegmentedStacks(false), UseInitArray(false), TrapFuncName(""),
+          FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard)
     {}
 
     /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
@@ -202,10 +202,10 @@ namespace llvm {
     /// Strict mode - allow fusion only if/when it can be proven that the excess
     /// precision won't effect the result.
     ///
-    /// Note: This option only controls formation of fused ops by the optimizers.
-    /// Fused operations that are explicitly specified (e.g. FMA via the
-    /// llvm.fma.* intrinsic) will always be honored, regardless of the value of
-    /// this option.
+    /// Note: This option only controls formation of fused ops by the
+    /// optimizers.  Fused operations that are explicitly specified (e.g. FMA
+    /// via the llvm.fma.* intrinsic) will always be honored, regardless of
+    /// the value of this option.
     FPOpFusion::FPOpFusionMode AllowFPOpFusion;
 
   };
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index afa2ee27443a..6b1e70bba11b 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -16,11 +16,11 @@
 #ifndef LLVM_TARGET_TARGETREGISTERINFO_H
 #define LLVM_TARGET_TARGETREGISTERINFO_H
 
-#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/CallingConv.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include <cassert>
 #include <functional>
 
@@ -30,12 +30,13 @@ class BitVector;
 class MachineFunction;
 class RegScavenger;
 template<class T> class SmallVectorImpl;
+class VirtRegMap;
 class raw_ostream;
 
 class TargetRegisterClass {
 public:
-  typedef const uint16_t* iterator;
-  typedef const uint16_t* const_iterator;
+  typedef const MCPhysReg* iterator;
+  typedef const MCPhysReg* const_iterator;
   typedef const MVT::SimpleValueType* vt_iterator;
   typedef const TargetRegisterClass* const * sc_iterator;
 
@@ -45,7 +46,7 @@ public:
   const uint32_t *SubClassMask;
   const uint16_t *SuperRegIndices;
   const sc_iterator SuperClasses;
-  ArrayRef<uint16_t> (*OrderFunc)(const MachineFunction&);
+  ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
 
   /// getID() - Return the register class ID number.
   ///
@@ -190,7 +191,7 @@ public:
   ///
   /// By default, this method returns all registers in the class.
   ///
-  ArrayRef<uint16_t> getRawAllocationOrder(const MachineFunction &MF) const {
+  ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const {
     return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs());
   }
 };
@@ -387,27 +388,12 @@ public:
     return false;
   }
 
-  /// isSubRegister - Returns true if regB is a sub-register of regA.
-  ///
-  bool isSubRegister(unsigned regA, unsigned regB) const {
-    return isSuperRegister(regB, regA);
-  }
-
-  /// isSuperRegister - Returns true if regB is a super-register of regA.
-  ///
-  bool isSuperRegister(unsigned RegA, unsigned RegB) const {
-    for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I)
-      if (*I == RegB)
-        return true;
-    return false;
-  }
-
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
   /// callee saved registers on this target. The register should be in the
   /// order of desired callee-save stack frame offset. The first register is
   /// closest to the incoming stack pointer if stack grows down, and vice versa.
   ///
-  virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF = 0)
+  virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF = 0)
                                                                       const = 0;
 
   /// getCallPreservedMask - Return a mask of call-preserved registers for the
@@ -594,10 +580,13 @@ public:
     return 0;
   }
 
-// Get the weight in units of pressure for this register class.
+  /// Get the weight in units of pressure for this register class.
   virtual const RegClassWeight &getRegClassWeight(
     const TargetRegisterClass *RC) const = 0;
 
+  /// Get the weight in units of pressure for this register unit.
+  virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
+
   /// Get the number of dimensions of register pressure.
   virtual unsigned getNumRegPressureSets() const = 0;
 
@@ -613,27 +602,29 @@ public:
   virtual const int *getRegClassPressureSets(
     const TargetRegisterClass *RC) const = 0;
 
-  /// getRawAllocationOrder - Returns the register allocation order for a
-  /// specified register class with a target-dependent hint. The returned list
-  /// may contain reserved registers that cannot be allocated.
-  ///
-  /// Register allocators need only call this function to resolve
-  /// target-dependent hints, but it should work without hinting as well.
-  virtual ArrayRef<uint16_t>
-  getRawAllocationOrder(const TargetRegisterClass *RC,
-                        unsigned HintType, unsigned HintReg,
-                        const MachineFunction &MF) const {
-    return RC->getRawAllocationOrder(MF);
-  }
-
-  /// ResolveRegAllocHint - Resolves the specified register allocation hint
-  /// to a physical register. Returns the physical register if it is successful.
-  virtual unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                       const MachineFunction &MF) const {
-    if (Type == 0 && Reg && isPhysicalRegister(Reg))
-      return Reg;
-    return 0;
-  }
+  /// Get the dimensions of register pressure impacted by this register unit.
+  /// Returns a -1 terminated array of pressure set IDs.
+  virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0;
+
+  /// Get a list of 'hint' registers that the register allocator should try
+  /// first when allocating a physical register for the virtual register
+  /// VirtReg. These registers are effectively moved to the front of the
+  /// allocation order.
+  ///
+  /// The Order argument is the allocation order for VirtReg's register class
+  /// as returned from RegisterClassInfo::getOrder(). The hint registers must
+  /// come from Order, and they must not be reserved.
+  ///
+  /// The default implementation of this function can resolve
+  /// target-independent hints provided to MRI::setRegAllocationHint with
+  /// HintType == 0. Targets that override this function should defer to the
+  /// default implementation if they have no reason to change the allocation
+  /// order for VirtReg. There may be target-independent hints.
+  virtual void getRegAllocationHints(unsigned VirtReg,
+                                     ArrayRef<MCPhysReg> Order,
+                                     SmallVectorImpl<MCPhysReg> &Hints,
+                                     const MachineFunction &MF,
+                                     const VirtRegMap *VRM = 0) const;
 
   /// avoidWriteAfterWrite - Return true if the register allocator should avoid
   /// writing a register from RC in two consecutive instructions.
@@ -742,21 +733,6 @@ public:
     llvm_unreachable("isFrameOffsetLegal does not exist on this target");
   }
 
-  /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
-  /// code insertion to eliminate call frame setup and destroy pseudo
-  /// instructions (but only if the Target is using them).  It is responsible
-  /// for eliminating these instructions, replacing them with concrete
-  /// instructions.  This method need only be implemented if using call frame
-  /// setup/destroy pseudo instructions.
-  ///
-  virtual void
-  eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI) const {
-    llvm_unreachable("Call Frame Pseudo Instructions do not exist on this "
-                     "target!");
-  }
-
 
   /// saveScavengerRegister - Spill the register so it can be used by the
   /// register scavenger. Return true if the register was spilled, false
@@ -776,10 +752,11 @@ public:
   /// referenced by the iterator contains an MO_FrameIndex operand which must be
   /// eliminated by this method.  This method may modify or replace the
   /// specified instruction, as long as it keeps the iterator pointing at the
-  /// finished product. SPAdj is the SP adjustment due to call frame setup
-  /// instruction.
+  /// finished product.  SPAdj is the SP adjustment due to call frame setup
+  /// instruction.  FIOperandNum is the FI operand number.
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                                   int SPAdj, RegScavenger *RS=NULL) const = 0;
+                                   int SPAdj, unsigned FIOperandNum,
+                                   RegScavenger *RS = NULL) const = 0;
 
   //===--------------------------------------------------------------------===//
   /// Debug information queries.
@@ -876,7 +853,8 @@ class PrintReg {
   unsigned Reg;
   unsigned SubIdx;
 public:
-  PrintReg(unsigned reg, const TargetRegisterInfo *tri = 0, unsigned subidx = 0)
+  explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = 0,
+                    unsigned subidx = 0)
     : TRI(tri), Reg(reg), SubIdx(subidx) {}
   void print(raw_ostream&) const;
 };
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 0da82fdd8971..660d2c48b6c5 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -76,6 +76,7 @@ class SchedMachineModel {
   int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle.
   int MinLatency = -1; // Determines which instrucions are allowed in a group.
                        // (-1) inorder (0) ooo, (1): inorder +var latencies.
+  int ILPWindow = -1;  // Cycles of latency likely hidden by hardware buffers.
   int LoadLatency = -1; // Cycles for loads to access the cache.
   int HighLatency = -1; // Approximation of cycles for "high latency" ops.
   int MispredictPenalty = -1; // Extra cycles for a mispredicted branch.
@@ -132,6 +133,11 @@ def EponymousProcResourceKind : ProcResourceKind;
 class ProcResource<int num> : ProcResourceKind,
   ProcResourceUnits<EponymousProcResourceKind, num>;
 
+class ProcResGroup<list<ProcResource> resources> : ProcResourceKind {
+  list<ProcResource> Resources = resources;
+  SchedMachineModel SchedModel = ?;
+}
+
 // A target architecture may define SchedReadWrite types and associate
 // them with instruction operands.
 class SchedReadWrite;
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index 6db96d980b5e..b2d405de8464 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -19,6 +19,7 @@
 
 namespace llvm {
 
+class MachineFunction;
 class MachineInstr;
 class SDep;
 class SUnit;
@@ -54,6 +55,13 @@ public:
     return 0;
   }
 
+  /// \brief True if the subtarget should run MachineScheduler after aggressive
+  /// coalescing.
+  ///
+  /// This currently replaces the SelectionDAG scheduler with the "source" order
+  /// scheduler. It does not yet disable the postRA scheduler.
+  virtual bool enableMachineScheduler() const;
+
   // enablePostRAScheduler - If the target can benefit from post-regalloc
   // scheduling and the specified optimization level meets the requirement
   // return true to enable post-register-allocation scheduling. In
@@ -66,6 +74,9 @@ public:
   // the latency of a schedule dependency.
   virtual void adjustSchedDependency(SUnit *def, SUnit *use,
                                      SDep& dep) const { }
+
+  /// \brief Reset the features for the subtarget.
+  virtual void resetSubtargetFeatures(const MachineFunction *MF) { }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
deleted file mode 100644
index 7ea2396076dc..000000000000
--- a/include/llvm/Target/TargetTransformImpl.h
+++ /dev/null
@@ -1,98 +0,0 @@
-//=- llvm/Target/TargetTransformImpl.h - Target Loop Trans Info----*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the target-specific implementations of the
-// TargetTransform interfaces.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
-#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
-
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-
-namespace llvm {
-
-class TargetLowering;
-
-/// ScalarTargetTransformInfo - This is a default implementation for the
-/// ScalarTargetTransformInfo interface. Different targets can implement
-/// this interface differently.
-class ScalarTargetTransformImpl : public ScalarTargetTransformInfo {
-private:
-  const TargetLowering *TLI;
-
-public:
-  /// Ctor
-  explicit ScalarTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
-
-  virtual bool isLegalAddImmediate(int64_t imm) const;
-
-  virtual bool isLegalICmpImmediate(int64_t imm) const;
-
-  virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
-
-  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
-
-  virtual bool isTypeLegal(Type *Ty) const;
-
-  virtual unsigned getJumpBufAlignment() const;
-
-  virtual unsigned getJumpBufSize() const;
-
-  virtual bool shouldBuildLookupTables() const;
-};
-
-class VectorTargetTransformImpl : public VectorTargetTransformInfo {
-protected:
-  const TargetLowering *TLI;
-
-  /// Estimate the cost of type-legalization and the legalized type.
-  std::pair<unsigned, MVT> getTypeLegalizationCost(Type *Ty) const;
-
-  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
-  /// are set if the result needs to be inserted and/or extracted from vectors.
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-  // Get the ISD node that corresponds to the Instruction class opcode.
-  int InstructionOpcodeToISD(unsigned Opcode) const;
-
-public:
-  explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
-
-  virtual ~VectorTargetTransformImpl() {}
-
-  virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const;
-
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
-
-  virtual unsigned getBroadcastCost(Type *Tp) const;
-
-  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const;
-
-  virtual unsigned getCFInstrCost(unsigned Opcode) const;
-
-  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const;
-
-  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const;
-
-  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const;
-
-  virtual unsigned getNumberOfParts(Type *Tp) const;
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h
deleted file mode 100644
index 94db49044332..000000000000
--- a/include/llvm/TargetTransformInfo.h
+++ /dev/null
@@ -1,204 +0,0 @@
-//===- llvm/Transforms/TargetTransformInfo.h --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass exposes codegen information to IR-level passes. Every
-// transformation that uses codegen information is broken into three parts:
-// 1. The IR-level analysis pass.
-// 2. The IR-level transformation interface which provides the needed
-//    information.
-// 3. Codegen-level implementation which uses target-specific hooks.
-//
-// This file defines #2, which is the interface that IR-level transformations
-// use for querying the codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
-#define LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
-
-#include "llvm/Pass.h"
-#include "llvm/AddressingMode.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Type.h"
-
-namespace llvm {
-
-class ScalarTargetTransformInfo;
-class VectorTargetTransformInfo;
-
-/// TargetTransformInfo - This pass provides access to the codegen
-/// interfaces that are needed for IR-level transformations.
-class TargetTransformInfo : public ImmutablePass {
-private:
-  const ScalarTargetTransformInfo *STTI;
-  const VectorTargetTransformInfo *VTTI;
-public:
-  /// Default ctor.
-  ///
-  /// @note This has to exist, because this is a pass, but it should never be
-  /// used.
-  TargetTransformInfo();
-
-  TargetTransformInfo(const ScalarTargetTransformInfo* S,
-                      const VectorTargetTransformInfo *V)
-      : ImmutablePass(ID), STTI(S), VTTI(V) {
-    initializeTargetTransformInfoPass(*PassRegistry::getPassRegistry());
-  }
-
-  TargetTransformInfo(const TargetTransformInfo &T) :
-    ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { }
-
-  const ScalarTargetTransformInfo* getScalarTargetTransformInfo() const {
-    return STTI;
-  }
-  const VectorTargetTransformInfo* getVectorTargetTransformInfo() const {
-    return VTTI;
-  }
-
-  /// Pass identification, replacement for typeid.
-  static char ID;
-};
-
-// ---------------------------------------------------------------------------//
-//  The classes below are inherited and implemented by target-specific classes
-//  in the codegen.
-// ---------------------------------------------------------------------------//
-
-/// ScalarTargetTransformInfo - This interface is used by IR-level passes
-/// that need target-dependent information for generic scalar transformations.
-/// LSR, and LowerInvoke use this interface.
-class ScalarTargetTransformInfo {
-public:
-  virtual ~ScalarTargetTransformInfo() {}
-
-  /// isLegalAddImmediate - Return true if the specified immediate is legal
-  /// add immediate, that is the target has add instructions which can add
-  /// a register with the immediate without having to materialize the
-  /// immediate into a register.
-  virtual bool isLegalAddImmediate(int64_t) const {
-    return false;
-  }
-  /// isLegalICmpImmediate - Return true if the specified immediate is legal
-  /// icmp immediate, that is the target has icmp instructions which can compare
-  /// a register against the immediate without having to materialize the
-  /// immediate into a register.
-  virtual bool isLegalICmpImmediate(int64_t) const {
-    return false;
-  }
-  /// isLegalAddressingMode - Return true if the addressing mode represented by
-  /// AM is legal for this target, for a load/store of the specified type.
-  /// The type may be VoidTy, in which case only return true if the addressing
-  /// mode is legal for a load/store of any legal type.
-  /// TODO: Handle pre/postinc as well.
-  virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
-    return false;
-  }
-  /// isTruncateFree - Return true if it's free to truncate a value of
-  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
-  /// register EAX to i16 by referencing its sub-register AX.
-  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const {
-    return false;
-  }
-  /// Is this type legal.
-  virtual bool isTypeLegal(Type *Ty) const {
-    return false;
-  }
-  /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes
-  virtual unsigned getJumpBufAlignment() const {
-    return 0;
-  }
-  /// getJumpBufSize - returns the target's jmp_buf size in bytes.
-  virtual unsigned getJumpBufSize() const {
-    return 0;
-  }
-  /// shouldBuildLookupTables - Return true if switches should be turned into
-  /// lookup tables for the target.
-  virtual bool shouldBuildLookupTables() const {
-    return true;
-  }
-};
-
-/// VectorTargetTransformInfo - This interface is used by the vectorizers
-/// to estimate the profitability of vectorization for different instructions.
-class VectorTargetTransformInfo {
-public:
-  virtual ~VectorTargetTransformInfo() {}
-
-  /// Returns the expected cost of the instruction opcode. The opcode is one of
-  /// the enums like Instruction::Add. The type arguments are the type of the
-  /// operation.
-  /// Most instructions only use the first type and in that case the second
-  /// operand is ignored.
-  ///
-  /// Exceptions:
-  /// * Br instructions do not use any of the types.
-  /// * Select instructions pass the return type as Ty1 and the selector as Ty2.
-  /// * Cast instructions pass the destination as Ty1 and the source as Ty2.
-  /// * Insert/Extract element pass only the vector type as Ty1.
-  /// * ShuffleVector, Load, Store do not use this call.
-  virtual unsigned getInstrCost(unsigned Opcode,
-                                Type *Ty1 = 0,
-                                Type *Ty2 = 0) const {
-    return 1;
-  }
-
-  /// Returns the expected cost of arithmetic ops, such as mul, xor, fsub, etc.
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
-    return 1;
-  }
-
-  /// Returns the cost of a vector broadcast of a scalar at place zero to a
-  /// vector of type 'Tp'.
-  virtual unsigned getBroadcastCost(Type *Tp) const {
-    return 1;
-  }
-
-  /// Returns the expected cost of cast instructions, such as bitcast, trunc,
-  /// zext, etc.
-  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const {
-    return 1;
-  }
-
-  /// Returns the expected cost of control-flow related instrutctions such as
-  /// Phi, Ret, Br.
-  virtual unsigned getCFInstrCost(unsigned Opcode) const {
-    return 1;
-  }
-
-  /// Returns the expected cost of compare and select instructions.
-  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy = 0) const {
-    return 1;
-  }
-
-  /// Returns the expected cost of vector Insert and Extract.
-  /// Use -1 to indicate that there is no information on the index value.
-  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index = -1) const {
-    return 1;
-  }
-
-  /// Returns the cost of Load and Store instructions.
-  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const {
-    return 1;
-  }
-
-  /// Returns the number of pieces into which the provided type must be
-  /// split during legalization. Zero is returned when the answer is unknown.
-  virtual unsigned getNumberOfParts(Type *Tp) const {
-    return 0;
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index fc1cd59e4e10..e6eb8d38bb8c 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_TRANSFORMS_IPO_H
 #define LLVM_TRANSFORMS_IPO_H
 
-#include <vector>
+#include "llvm/ADT/ArrayRef.h"
 
 namespace llvm {
 
@@ -109,7 +109,7 @@ Pass *createPruneEHPass();
 ///
 /// Note that commandline options that are used with the above function are not
 /// used now!
-ModulePass *createInternalizePass(const std::vector<const char *> &exportList);
+ModulePass *createInternalizePass(ArrayRef<const char *> exportList);
 /// createInternalizePass - Same as above, but with an empty exportList.
 ModulePass *createInternalizePass();
 
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index b036040f5121..43a0ac8cc1f7 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -17,7 +17,7 @@
 #ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
 #define LLVM_TRANSFORMS_IPO_INLINERPASS_H
 
-#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
 
 namespace llvm {
   class CallSite;
@@ -42,6 +42,7 @@ struct Inliner : public CallGraphSCCPass {
   // Pass class.
   virtual bool runOnSCC(CallGraphSCC &SCC);
 
+  using llvm::Pass::doFinalization;
   // doFinalization - Remove now-dead linkonce functions at the end of
   // processing to avoid breaking the SCC traversal.
   virtual bool doFinalization(CallGraph &CG);
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 3ea0a427200d..209f68db6fd9 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_PASSMANAGERBUILDER_H
-#define LLVM_SUPPORT_PASSMANAGERBUILDER_H
+#ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
+#define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
 
 #include <vector>
 
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 8e63aaa4e873..4aae200dd083 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_H
 #define LLVM_TRANSFORMS_INSTRUMENTATION_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
 
 class ModulePass;
@@ -29,21 +31,52 @@ ModulePass *createOptimalEdgeProfilerPass();
 ModulePass *createPathProfilerPass();
 
 // Insert GCOV profiling instrumentation
-ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true,
-                                   bool Use402Format = false,
-                                   bool UseExtraChecksum = false);
+struct GCOVOptions {
+  static GCOVOptions getDefault();
+
+  // Specify whether to emit .gcno files.
+  bool EmitNotes;
+
+  // Specify whether to modify the program to emit .gcda files when run.
+  bool EmitData;
+
+  // A four-byte version string. The meaning of a version string is described in
+  // gcc's gcov-io.h
+  char Version[4];
+
+  // Emit a "cfg checksum" that follows the "line number checksum" of a
+  // function. This affects both .gcno and .gcda files.
+  bool UseCfgChecksum;
+
+  // Add the 'noredzone' attribute to added runtime library calls.
+  bool NoRedZone;
+
+  // Emit the name of the function in the .gcda files. This is redundant, as
+  // the function identifier can be used to find the name from the .gcno file.
+  bool FunctionNamesInData;
+};
+ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
+                                   GCOVOptions::getDefault());
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerPass();
-// Insert ThreadSanitizer (race detection) instrumentation
-FunctionPass *createThreadSanitizerPass();
+FunctionPass *createAddressSanitizerFunctionPass(
+    bool CheckInitOrder = true, bool CheckUseAfterReturn = false,
+    bool CheckLifetime = false, StringRef BlacklistFile = StringRef(),
+    bool ZeroBaseShadow = false);
+ModulePass *createAddressSanitizerModulePass(
+    bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(),
+    bool ZeroBaseShadow = false);
 
+// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
+FunctionPass *createMemorySanitizerPass(bool TrackOrigins = false,
+                                        StringRef BlacklistFile = StringRef());
+
+// Insert ThreadSanitizer (race detection) instrumentation
+FunctionPass *createThreadSanitizerPass(StringRef BlacklistFile = StringRef());
 
 // BoundsChecking - This pass instruments the code to perform run-time bounds
 // checking on loads, stores, and other memory intrinsics.
-// Penalty is the maximum run-time that is acceptable for the user.
-//
-FunctionPass *createBoundsCheckingPass(unsigned Penalty = 5);
+FunctionPass *createBoundsCheckingPass();
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/ObjCARC.h b/include/llvm/Transforms/ObjCARC.h
new file mode 100644
index 000000000000..b3c19c077eab
--- /dev/null
+++ b/include/llvm/Transforms/ObjCARC.h
@@ -0,0 +1,49 @@
+//===-- ObjCARC.h - ObjCARC Scalar Transformations --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for accessor functions that expose passes
+// in the ObjCARC Scalar Transformations library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_H
+#define LLVM_TRANSFORMS_OBJCARC_H
+
+namespace llvm {
+
+class Pass;
+
+//===----------------------------------------------------------------------===//
+//
+// ObjCARCAPElim - ObjC ARC autorelease pool elimination.
+//
+Pass *createObjCARCAPElimPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ObjCARCExpand - ObjC ARC preliminary simplifications.
+//
+Pass *createObjCARCExpandPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ObjCARCContract - Late ObjC ARC cleanups.
+//
+Pass *createObjCARCContractPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ObjCARCOpt - ObjC ARC optimization.
+//
+Pass *createObjCARCOptPass();
+
+} // End llvm namespace
+
+#endif
+
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index a5d8eed74622..e833aaa6d69e 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -115,11 +115,9 @@ Pass *createLICMPass();
 //===----------------------------------------------------------------------===//
 //
 // LoopStrengthReduce - This pass is strength reduces GEP instructions that use
-// a loop's canonical induction variable as one of their indices.  It takes an
-// optional parameter used to consult the target machine whether certain
-// transformations are profitable.
+// a loop's canonical induction variable as one of their indices.
 //
-Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
+Pass *createLoopStrengthReducePass();
 
 Pass *createGlobalMergePass(const TargetLowering *TLI = 0);
 
@@ -336,30 +334,6 @@ Pass *createCorrelatedValuePropagationPass();
 
 //===----------------------------------------------------------------------===//
 //
-// ObjCARCAPElim - ObjC ARC autorelease pool elimination.
-//
-Pass *createObjCARCAPElimPass();
-
-//===----------------------------------------------------------------------===//
-//
-// ObjCARCExpand - ObjC ARC preliminary simplifications.
-//
-Pass *createObjCARCExpandPass();
-
-//===----------------------------------------------------------------------===//
-//
-// ObjCARCContract - Late ObjC ARC cleanups.
-//
-Pass *createObjCARCContractPass();
-
-//===----------------------------------------------------------------------===//
-//
-// ObjCARCOpt - ObjC ARC optimization.
-//
-Pass *createObjCARCOptPass();
-
-//===----------------------------------------------------------------------===//
-//
 // InstructionSimplifier - Remove redundant instructions.
 //
 FunctionPass *createInstructionSimplifierPass();
@@ -368,7 +342,7 @@ extern char &InstructionSimplifierID;
 
 //===----------------------------------------------------------------------===//
 //
-// LowerExpectIntriniscs - Removes llvm.expect intrinsics and creates
+// LowerExpectIntrinsics - Removes llvm.expect intrinsics and creates
 // "block_weights" metadata.
 FunctionPass *createLowerExpectIntrinsicPass();
 
diff --git a/include/llvm/Transforms/Utils/AddrModeMatcher.h b/include/llvm/Transforms/Utils/AddrModeMatcher.h
deleted file mode 100644
index 7d672839a630..000000000000
--- a/include/llvm/Transforms/Utils/AddrModeMatcher.h
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- AddrModeMatcher.h - Addressing mode matching facility ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// AddressingModeMatcher - This class exposes a single public method, which is
-// used to construct a "maximal munch" of the addressing mode for the target
-// specified by TLI for an access to "V" with an access type of AccessTy.  This
-// returns the addressing mode that is actually matched by value, but also
-// returns the list of instructions involved in that addressing computation in
-// AddrModeInsts.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
-#define LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
-
-#include "llvm/AddressingMode.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-class GlobalValue;
-class Instruction;
-class Value;
-class Type;
-class User;
-class raw_ostream;
-
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
-/// which holds actual Value*'s for register values.
-struct ExtAddrMode : public AddrMode {
-  Value *BaseReg;
-  Value *ScaledReg;
-  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
-  void print(raw_ostream &OS) const;
-  void dump() const;
-  
-  bool operator==(const ExtAddrMode& O) const {
-    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
-           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
-           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
-  }
-};
-
-static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
-  AM.print(OS);
-  return OS;
-}
-
-class AddressingModeMatcher {
-  SmallVectorImpl<Instruction*> &AddrModeInsts;
-  const TargetLowering &TLI;
-
-  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
-  /// the memory instruction that we're computing this address for.
-  Type *AccessTy;
-  Instruction *MemoryInst;
-  
-  /// AddrMode - This is the addressing mode that we're building up.  This is
-  /// part of the return value of this addressing mode matching stuff.
-  ExtAddrMode &AddrMode;
-  
-  /// IgnoreProfitability - This is set to true when we should not do
-  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
-  /// always returns true.
-  bool IgnoreProfitability;
-  
-  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
-                        const TargetLowering &T, Type *AT,
-                        Instruction *MI, ExtAddrMode &AM)
-    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
-    IgnoreProfitability = false;
-  }
-public:
-  
-  /// Match - Find the maximal addressing mode that a load/store of V can fold,
-  /// give an access type of AccessTy.  This returns a list of involved
-  /// instructions in AddrModeInsts.
-  static ExtAddrMode Match(Value *V, Type *AccessTy,
-                           Instruction *MemoryInst,
-                           SmallVectorImpl<Instruction*> &AddrModeInsts,
-                           const TargetLowering &TLI) {
-    ExtAddrMode Result;
-
-    bool Success = 
-      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
-                            MemoryInst, Result).MatchAddr(V, 0);
-    (void)Success; assert(Success && "Couldn't select *anything*?");
-    return Result;
-  }
-private:
-  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
-  bool MatchAddr(Value *V, unsigned Depth);
-  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
-  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
-                                            ExtAddrMode &AMBefore,
-                                            ExtAddrMode &AMAfter);
-  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index b810f1a818c6..8f1a6e2b75c3 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -12,14 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_BASICBLOCK_H
-#define LLVM_TRANSFORMS_UTILS_BASICBLOCK_H
+#ifndef LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
+#define LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
 
 // FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock
 
-#include "llvm/BasicBlock.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/BlackList.h b/include/llvm/Transforms/Utils/BlackList.h
new file mode 100644
index 000000000000..f19470e19d8a
--- /dev/null
+++ b/include/llvm/Transforms/Utils/BlackList.h
@@ -0,0 +1,58 @@
+//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+// The blacklist disables instrumentation of various functions and global
+// variables.  Each line contains a prefix, followed by a wild card expression.
+// Empty lines and lines starting with "#" are ignored.
+// ---
+// # Blacklisted items:
+// fun:*_ZN4base6subtle*
+// global:*global_with_bad_access_or_initialization*
+// global-init:*global_with_initialization_issues*
+// global-init-type:*Namespace::ClassName*
+// src:file_with_tricky_code.cc
+// ---
+// Note that the wild card is in fact an llvm::Regex, but * is automatically
+// replaced with .*
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+class Function;
+class GlobalVariable;
+class Module;
+class Regex;
+class StringRef;
+
+class BlackList {
+ public:
+  BlackList(const StringRef Path);
+  // Returns whether either this function or it's source file are blacklisted.
+  bool isIn(const Function &F) const;
+  // Returns whether either this global or it's source file are blacklisted.
+  bool isIn(const GlobalVariable &G) const;
+  // Returns whether this module is blacklisted by filename.
+  bool isIn(const Module &M) const;
+  // Returns whether a global should be excluded from initialization checking.
+  bool isInInit(const GlobalVariable &G) const;
+ private:
+  StringMap<Regex*> Entries;
+
+  bool inSection(const StringRef Section, const StringRef Query) const;
+};
+
+}  // namespace llvm
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index ab9fc475faee..181ed071eab1 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TRANSFORMS_UTILS_BUILDLIBCALLS_H
-#define TRANSFORMS_UTILS_BUILDLIBCALLS_H
+#ifndef LLVM_TRANSFORMS_UTILS_BUILDLIBCALLS_H
+#define LLVM_TRANSFORMS_UTILS_BUILDLIBCALLS_H
 
-#include "llvm/IRBuilder.h"
+#include "llvm/IR/IRBuilder.h"
 
 namespace llvm {
   class Value;
@@ -81,7 +81,7 @@ namespace llvm {
   /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
   /// suffix.
   Value *EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
-                              const AttrListPtr &Attrs);
+                              const AttributeSet &Attrs);
 
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h
index ac8af122f038..0d081c0194bf 100644
--- a/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -15,10 +15,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
-#define TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
+#ifndef LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
+#define LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
 
-#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 1780025a2797..14212f622ba5 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -18,9 +18,9 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H
 #define LLVM_TRANSFORMS_UTILS_CLONING_H
 
-#include "llvm/ADT/ValueMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
diff --git a/include/llvm/Transforms/Utils/CmpInstAnalysis.h b/include/llvm/Transforms/Utils/CmpInstAnalysis.h
index 7ad7bddce503..488d7a59d329 100644
--- a/include/llvm/Transforms/Utils/CmpInstAnalysis.h
+++ b/include/llvm/Transforms/Utils/CmpInstAnalysis.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CMPINSTANALYSIS_H
 #define LLVM_TRANSFORMS_UTILS_CMPINSTANALYSIS_H
 
-#include "llvm/InstrTypes.h"
+#include "llvm/IR/InstrTypes.h"
 
 namespace llvm {
   class ICmpInst;
diff --git a/include/llvm/Transforms/Utils/IntegerDivision.h b/include/llvm/Transforms/Utils/IntegerDivision.h
index cecc8075de7d..27d3c588b518 100644
--- a/include/llvm/Transforms/Utils/IntegerDivision.h
+++ b/include/llvm/Transforms/Utils/IntegerDivision.h
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TRANSFORMS_UTILS_INTEGERDIVISION_H
-#define TRANSFORMS_UTILS_INTEGERDIVISION_H
+#ifndef LLVM_TRANSFORMS_UTILS_INTEGERDIVISION_H
+#define LLVM_TRANSFORMS_UTILS_INTEGERDIVISION_H
 
 namespace llvm {
   class BinaryOperator;
@@ -43,6 +43,20 @@ namespace llvm {
   /// @brief Replace Div with generated code.
   bool expandDivision(BinaryOperator* Div);
 
+  /// Generate code to calculate the remainder of two integers, replacing Rem
+  /// with the generated code. Uses the above 32bit routine, therefore adequate
+  /// for targets with little or no support for less than 32 bit arithmetic.
+  ///
+  /// @brief Replace Rem with generated code.
+  bool expandRemainderUpTo32Bits(BinaryOperator *Rem);
+
+  /// Generate code to divide two integers, replacing Div with the generated 
+  /// code. Uses the above 32bit routine, therefore adequate for targets with 
+  /// little or no support for less than 32 bit arithmetic.
+  /// 
+  /// @brief Replace Rem with generated code.
+  bool expandDivisionUpTo32Bits(BinaryOperator *Div);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index be3029e545de..687c9d517b8d 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
 #define LLVM_TRANSFORMS_UTILS_LOCAL_H
 
-#include "llvm/IRBuilder.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
@@ -135,8 +135,8 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB);
 /// of the CFG.  It returns true if a modification was made, possibly deleting
 /// the basic block that was pointed to.
 ///
-bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0,
-                 const TargetTransformInfo *TTI = 0);
+bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+                 const DataLayout *TD = 0);
 
 /// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
 /// and if a predecessor branches to us and one of our successors, fold the
@@ -252,6 +252,16 @@ bool LowerDbgDeclare(Function &F);
 /// an alloca, if any.
 DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
 
+/// replaceDbgDeclareForAlloca - Replaces llvm.dbg.declare instruction when
+/// alloca is replaced with a new value.
+bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+                                DIBuilder &Builder);
+
+/// \brief Remove all blocks that can not be reached from the function's entry.
+///
+/// Returns true if any basic block was removed.
+bool removeUnreachableBlocks(Function &F);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index 2c0ec9b118cf..bb7fc06bf530 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
-#define LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
+#ifndef LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
+#define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
 
 namespace llvm {
 
@@ -30,4 +30,4 @@ void appendToGlobalDtors(Module &M, Function *F, int Priority);
 
 } // End llvm namespace
 
-#endif //  LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
+#endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h
index 0bb6ec69bbd5..52a6157d95ad 100644
--- a/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TRANSFORMS_UTILS_PROMOTEMEMTOREG_H
-#define TRANSFORMS_UTILS_PROMOTEMEMTOREG_H
+#ifndef LLVM_TRANSFORMS_UTILS_PROMOTEMEMTOREG_H
+#define LLVM_TRANSFORMS_UTILS_PROMOTEMEMTOREG_H
 
 #include <vector>
 
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index db65a47e972d..cd048936e0bf 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
   class BasicBlock;
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index fde452bca235..6bb81be2fd5f 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -31,7 +31,8 @@ namespace llvm {
     /// simplifier.
     LibCallSimplifierImpl *Impl;
   public:
-    LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI);
+    LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI,
+                      bool UnsafeFPShrink);
     virtual ~LibCallSimplifier();
 
     /// optimizeCall - Take the given call instruction and return a more
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 41e53a83e2f8..d205dbdede2e 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -18,6 +18,7 @@
 namespace llvm {
 class BasicBlock;
 class BasicBlockPass;
+class Pass;
 
 //===----------------------------------------------------------------------===//
 /// @brief Vectorize configuration.
@@ -83,6 +84,9 @@ struct VectorizeConfig {
   /// @brief The maximum number of pairable instructions per group.
   unsigned MaxInsts;
 
+  /// @brief The maximum number of candidate instruction pairs per group.
+  unsigned MaxPairs;
+
   /// @brief The maximum number of pairing iterations.
   unsigned MaxIter;
 
@@ -110,7 +114,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
 //
 // LoopVectorize - Create a loop vectorization pass.
 //
-Pass * createLoopVectorizePass();
+Pass *createLoopVectorizePass();
 
 //===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
deleted file mode 100644
index def45750dd71..000000000000
--- a/include/llvm/Type.h
+++ /dev/null
@@ -1,458 +0,0 @@
-//===-- llvm/Type.h - Classes for handling data types -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the Type class.  For more "Type"
-// stuff, look in DerivedTypes.h.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TYPE_H
-#define LLVM_TYPE_H
-
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class PointerType;
-class IntegerType;
-class raw_ostream;
-class Module;
-class LLVMContext;
-class LLVMContextImpl;
-class StringRef;
-template<class GraphType> struct GraphTraits;
-
-/// The instances of the Type class are immutable: once they are created,
-/// they are never changed.  Also note that only one instance of a particular
-/// type is ever created.  Thus seeing if two types are equal is a matter of
-/// doing a trivial pointer comparison. To enforce that no two equal instances
-/// are created, Type instances can only be created via static factory methods 
-/// in class Type and in derived classes.  Once allocated, Types are never
-/// free'd.
-/// 
-class Type {
-public:
-  //===--------------------------------------------------------------------===//
-  /// Definitions of all of the base types for the Type system.  Based on this
-  /// value, you can cast to a class defined in DerivedTypes.h.
-  /// Note: If you add an element to this, you need to add an element to the
-  /// Type::getPrimitiveType function, or else things will break!
-  /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
-  ///
-  enum TypeID {
-    // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
-    VoidTyID = 0,    ///<  0: type with no size
-    HalfTyID,        ///<  1: 16-bit floating point type
-    FloatTyID,       ///<  2: 32-bit floating point type
-    DoubleTyID,      ///<  3: 64-bit floating point type
-    X86_FP80TyID,    ///<  4: 80-bit floating point type (X87)
-    FP128TyID,       ///<  5: 128-bit floating point type (112-bit mantissa)
-    PPC_FP128TyID,   ///<  6: 128-bit floating point type (two 64-bits, PowerPC)
-    LabelTyID,       ///<  7: Labels
-    MetadataTyID,    ///<  8: Metadata
-    X86_MMXTyID,     ///<  9: MMX vectors (64 bits, X86 specific)
-
-    // Derived types... see DerivedTypes.h file.
-    // Make sure FirstDerivedTyID stays up to date!
-    IntegerTyID,     ///< 10: Arbitrary bit width integers
-    FunctionTyID,    ///< 11: Functions
-    StructTyID,      ///< 12: Structures
-    ArrayTyID,       ///< 13: Arrays
-    PointerTyID,     ///< 14: Pointers
-    VectorTyID,      ///< 15: SIMD 'packed' format, or other vector type
-
-    NumTypeIDs,                         // Must remain as last defined ID
-    LastPrimitiveTyID = X86_MMXTyID,
-    FirstDerivedTyID = IntegerTyID
-  };
-
-private:
-  /// Context - This refers to the LLVMContext in which this type was uniqued.
-  LLVMContext &Context;
-
-  // Due to Ubuntu GCC bug 910363:
-  // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363
-  // Bitpack ID and SubclassData manually.
-  // Note: TypeID : low 8 bit; SubclassData : high 24 bit.
-  uint32_t IDAndSubclassData;
-
-protected:
-  friend class LLVMContextImpl;
-  explicit Type(LLVMContext &C, TypeID tid)
-    : Context(C), IDAndSubclassData(0),
-      NumContainedTys(0), ContainedTys(0) {
-    setTypeID(tid);
-  }
-  ~Type() {}
-  
-  void setTypeID(TypeID ID) {
-    IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00);
-    assert(getTypeID() == ID && "TypeID data too large for field");
-  }
-  
-  unsigned getSubclassData() const { return IDAndSubclassData >> 8; }
-  
-  void setSubclassData(unsigned val) {
-    IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8);
-    // Ensure we don't have any accidental truncation.
-    assert(getSubclassData() == val && "Subclass data too large for field");
-  }
-
-  /// NumContainedTys - Keeps track of how many Type*'s there are in the
-  /// ContainedTys list.
-  unsigned NumContainedTys;
-
-  /// ContainedTys - A pointer to the array of Types contained by this Type.
-  /// For example, this includes the arguments of a function type, the elements
-  /// of a structure, the pointee of a pointer, the element type of an array,
-  /// etc.  This pointer may be 0 for types that don't contain other types
-  /// (Integer, Double, Float).
-  Type * const *ContainedTys;
-
-public:
-  void print(raw_ostream &O) const;
-  void dump() const;
-
-  /// getContext - Return the LLVMContext in which this type was uniqued.
-  LLVMContext &getContext() const { return Context; }
-
-  //===--------------------------------------------------------------------===//
-  // Accessors for working with types.
-  //
-
-  /// getTypeID - Return the type id for the type.  This will return one
-  /// of the TypeID enum elements defined above.
-  ///
-  TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); }
-
-  /// isVoidTy - Return true if this is 'void'.
-  bool isVoidTy() const { return getTypeID() == VoidTyID; }
-
-  /// isHalfTy - Return true if this is 'half', a 16-bit IEEE fp type.
-  bool isHalfTy() const { return getTypeID() == HalfTyID; }
-
-  /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
-  bool isFloatTy() const { return getTypeID() == FloatTyID; }
-  
-  /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
-  bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
-
-  /// isX86_FP80Ty - Return true if this is x86 long double.
-  bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
-
-  /// isFP128Ty - Return true if this is 'fp128'.
-  bool isFP128Ty() const { return getTypeID() == FP128TyID; }
-
-  /// isPPC_FP128Ty - Return true if this is powerpc long double.
-  bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
-
-  /// isFloatingPointTy - Return true if this is one of the six floating point
-  /// types
-  bool isFloatingPointTy() const {
-    return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
-           getTypeID() == DoubleTyID ||
-           getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
-           getTypeID() == PPC_FP128TyID;
-  }
-
-  /// isX86_MMXTy - Return true if this is X86 MMX.
-  bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
-
-  /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
-  ///
-  bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
- 
-  /// isLabelTy - Return true if this is 'label'.
-  bool isLabelTy() const { return getTypeID() == LabelTyID; }
-
-  /// isMetadataTy - Return true if this is 'metadata'.
-  bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
-
-  /// isIntegerTy - True if this is an instance of IntegerType.
-  ///
-  bool isIntegerTy() const { return getTypeID() == IntegerTyID; } 
-
-  /// isIntegerTy - Return true if this is an IntegerType of the given width.
-  bool isIntegerTy(unsigned Bitwidth) const;
-
-  /// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
-  /// integer types.
-  ///
-  bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
-  
-  /// isFunctionTy - True if this is an instance of FunctionType.
-  ///
-  bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
-
-  /// isStructTy - True if this is an instance of StructType.
-  ///
-  bool isStructTy() const { return getTypeID() == StructTyID; }
-
-  /// isArrayTy - True if this is an instance of ArrayType.
-  ///
-  bool isArrayTy() const { return getTypeID() == ArrayTyID; }
-
-  /// isPointerTy - True if this is an instance of PointerType.
-  ///
-  bool isPointerTy() const { return getTypeID() == PointerTyID; }
-
-  /// isPtrOrPtrVectorTy - Return true if this is a pointer type or a vector of
-  /// pointer types.
-  ///
-  bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
- 
-  /// isVectorTy - True if this is an instance of VectorType.
-  ///
-  bool isVectorTy() const { return getTypeID() == VectorTyID; }
-
-  /// canLosslesslyBitCastTo - Return true if this type could be converted 
-  /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts 
-  /// are valid for types of the same size only where no re-interpretation of 
-  /// the bits is done.
-  /// @brief Determine if this type could be losslessly bitcast to Ty
-  bool canLosslesslyBitCastTo(Type *Ty) const;
-
-  /// isEmptyTy - Return true if this type is empty, that is, it has no
-  /// elements or all its elements are empty.
-  bool isEmptyTy() const;
-
-  /// Here are some useful little methods to query what type derived types are
-  /// Note that all other types can just compare to see if this == Type::xxxTy;
-  ///
-  bool isPrimitiveType() const { return getTypeID() <= LastPrimitiveTyID; }
-  bool isDerivedType()   const { return getTypeID() >= FirstDerivedTyID; }
-
-  /// isFirstClassType - Return true if the type is "first class", meaning it
-  /// is a valid type for a Value.
-  ///
-  bool isFirstClassType() const {
-    return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
-  }
-
-  /// isSingleValueType - Return true if the type is a valid type for a
-  /// register in codegen.  This includes all first-class types except struct
-  /// and array types.
-  ///
-  bool isSingleValueType() const {
-    return (getTypeID() != VoidTyID && isPrimitiveType()) ||
-            getTypeID() == IntegerTyID || getTypeID() == PointerTyID ||
-            getTypeID() == VectorTyID;
-  }
-
-  /// isAggregateType - Return true if the type is an aggregate type. This
-  /// means it is valid as the first operand of an insertvalue or
-  /// extractvalue instruction. This includes struct and array types, but
-  /// does not include vector types.
-  ///
-  bool isAggregateType() const {
-    return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
-  }
-
-  /// isSized - Return true if it makes sense to take the size of this type.  To
-  /// get the actual size for a particular target, it is reasonable to use the
-  /// DataLayout subsystem to do this.
-  ///
-  bool isSized() const {
-    // If it's a primitive, it is always sized.
-    if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
-        getTypeID() == PointerTyID ||
-        getTypeID() == X86_MMXTyID)
-      return true;
-    // If it is not something that can have a size (e.g. a function or label),
-    // it doesn't have a size.
-    if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
-        getTypeID() != VectorTyID)
-      return false;
-    // Otherwise we have to try harder to decide.
-    return isSizedDerivedType();
-  }
-
-  /// getPrimitiveSizeInBits - Return the basic size of this type if it is a
-  /// primitive type.  These are fixed by LLVM and are not target dependent.
-  /// This will return zero if the type does not have a size or is not a
-  /// primitive type.
-  ///
-  /// Note that this may not reflect the size of memory allocated for an
-  /// instance of the type or the number of bytes that are written when an
-  /// instance of the type is stored to memory. The DataLayout class provides
-  /// additional query functions to provide this information.
-  ///
-  unsigned getPrimitiveSizeInBits() const;
-
-  /// getScalarSizeInBits - If this is a vector type, return the
-  /// getPrimitiveSizeInBits value for the element type. Otherwise return the
-  /// getPrimitiveSizeInBits value for this type.
-  unsigned getScalarSizeInBits();
-
-  /// getFPMantissaWidth - Return the width of the mantissa of this type.  This
-  /// is only valid on floating point types.  If the FP type does not
-  /// have a stable mantissa (e.g. ppc long double), this method returns -1.
-  int getFPMantissaWidth() const;
-
-  /// getScalarType - If this is a vector type, return the element type,
-  /// otherwise return 'this'.
-  const Type *getScalarType() const;
-  Type *getScalarType();
-
-  //===--------------------------------------------------------------------===//
-  // Type Iteration support.
-  //
-  typedef Type * const *subtype_iterator;
-  subtype_iterator subtype_begin() const { return ContainedTys; }
-  subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
-
-  /// getContainedType - This method is used to implement the type iterator
-  /// (defined a the end of the file).  For derived types, this returns the
-  /// types 'contained' in the derived type.
-  ///
-  Type *getContainedType(unsigned i) const {
-    assert(i < NumContainedTys && "Index out of range!");
-    return ContainedTys[i];
-  }
-
-  /// getNumContainedTypes - Return the number of types in the derived type.
-  ///
-  unsigned getNumContainedTypes() const { return NumContainedTys; }
-
-  //===--------------------------------------------------------------------===//
-  // Helper methods corresponding to subclass methods.  This forces a cast to
-  // the specified subclass and calls its accessor.  "getVectorNumElements" (for
-  // example) is shorthand for cast<VectorType>(Ty)->getNumElements().  This is
-  // only intended to cover the core methods that are frequently used, helper
-  // methods should not be added here.
-  
-  unsigned getIntegerBitWidth() const;
-
-  Type *getFunctionParamType(unsigned i) const;
-  unsigned getFunctionNumParams() const;
-  bool isFunctionVarArg() const;
-  
-  StringRef getStructName() const;
-  unsigned getStructNumElements() const;
-  Type *getStructElementType(unsigned N) const;
-  
-  Type *getSequentialElementType() const;
-  
-  uint64_t getArrayNumElements() const;
-  Type *getArrayElementType() const { return getSequentialElementType(); }
-
-  unsigned getVectorNumElements() const;
-  Type *getVectorElementType() const { return getSequentialElementType(); }
-
-  Type *getPointerElementType() const { return getSequentialElementType(); }
-
-  /// \brief Get the address space of this pointer or pointer vector type.
-  unsigned getPointerAddressSpace() const;
-  
-  //===--------------------------------------------------------------------===//
-  // Static members exported by the Type class itself.  Useful for getting
-  // instances of Type.
-  //
-
-  /// getPrimitiveType - Return a type based on an identifier.
-  static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
-
-  //===--------------------------------------------------------------------===//
-  // These are the builtin types that are always available.
-  //
-  static Type *getVoidTy(LLVMContext &C);
-  static Type *getLabelTy(LLVMContext &C);
-  static Type *getHalfTy(LLVMContext &C);
-  static Type *getFloatTy(LLVMContext &C);
-  static Type *getDoubleTy(LLVMContext &C);
-  static Type *getMetadataTy(LLVMContext &C);
-  static Type *getX86_FP80Ty(LLVMContext &C);
-  static Type *getFP128Ty(LLVMContext &C);
-  static Type *getPPC_FP128Ty(LLVMContext &C);
-  static Type *getX86_MMXTy(LLVMContext &C);
-  static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
-  static IntegerType *getInt1Ty(LLVMContext &C);
-  static IntegerType *getInt8Ty(LLVMContext &C);
-  static IntegerType *getInt16Ty(LLVMContext &C);
-  static IntegerType *getInt32Ty(LLVMContext &C);
-  static IntegerType *getInt64Ty(LLVMContext &C);
-
-  //===--------------------------------------------------------------------===//
-  // Convenience methods for getting pointer types with one of the above builtin
-  // types as pointee.
-  //
-  static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
-  static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
-  static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
-
-  /// getPointerTo - Return a pointer to the current type.  This is equivalent
-  /// to PointerType::get(Foo, AddrSpace).
-  PointerType *getPointerTo(unsigned AddrSpace = 0);
-
-private:
-  /// isSizedDerivedType - Derived types like structures and arrays are sized
-  /// iff all of the members of the type are sized as well.  Since asking for
-  /// their size is relatively uncommon, move this operation out of line.
-  bool isSizedDerivedType() const;
-};
-
-// Printing of types.
-static inline raw_ostream &operator<<(raw_ostream &OS, Type &T) {
-  T.print(OS);
-  return OS;
-}
-
-// allow isa<PointerType>(x) to work without DerivedTypes.h included.
-template <> struct isa_impl<PointerType, Type> {
-  static inline bool doit(const Type &Ty) {
-    return Ty.getTypeID() == Type::PointerTyID;
-  }
-};
-
-  
-//===----------------------------------------------------------------------===//
-// Provide specializations of GraphTraits to be able to treat a type as a
-// graph of sub types.
-
-
-template <> struct GraphTraits<Type*> {
-  typedef Type NodeType;
-  typedef Type::subtype_iterator ChildIteratorType;
-
-  static inline NodeType *getEntryNode(Type *T) { return T; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->subtype_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->subtype_end();
-  }
-};
-
-template <> struct GraphTraits<const Type*> {
-  typedef const Type NodeType;
-  typedef Type::subtype_iterator ChildIteratorType;
-
-  static inline NodeType *getEntryNode(NodeType *T) { return T; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->subtype_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->subtype_end();
-  }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/TypeBuilder.h b/include/llvm/TypeBuilder.h
deleted file mode 100644
index 0b5647973184..000000000000
--- a/include/llvm/TypeBuilder.h
+++ /dev/null
@@ -1,399 +0,0 @@
-//===---- llvm/TypeBuilder.h - Builder for LLVM types -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the TypeBuilder class, which is used as a convenient way to
-// create LLVM types with a consistent and simplified interface.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TYPEBUILDER_H
-#define LLVM_TYPEBUILDER_H
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include <limits.h>
-
-namespace llvm {
-
-/// TypeBuilder - This provides a uniform API for looking up types
-/// known at compile time.  To support cross-compilation, we define a
-/// series of tag types in the llvm::types namespace, like i<N>,
-/// ieee_float, ppc_fp128, etc.  TypeBuilder<T, false> allows T to be
-/// any of these, a native C type (whose size may depend on the host
-/// compiler), or a pointer, function, or struct type built out of
-/// these.  TypeBuilder<T, true> removes native C types from this set
-/// to guarantee that its result is suitable for cross-compilation.
-/// We define the primitive types, pointer types, and functions up to
-/// 5 arguments here, but to use this class with your own types,
-/// you'll need to specialize it.  For example, say you want to call a
-/// function defined externally as:
-///
-///   struct MyType {
-///     int32 a;
-///     int32 *b;
-///     void *array[1];  // Intended as a flexible array.
-///   };
-///   int8 AFunction(struct MyType *value);
-///
-/// You'll want to use
-///   Function::Create(TypeBuilder<types::i<8>(MyType*), true>::get(), ...)
-/// to declare the function, but when you first try this, your compiler will
-/// complain that TypeBuilder<MyType, true>::get() doesn't exist. To fix this,
-/// write:
-///
-///   namespace llvm {
-///   template<bool xcompile> class TypeBuilder<MyType, xcompile> {
-///   public:
-///     static StructType *get(LLVMContext &Context) {
-///       // If you cache this result, be sure to cache it separately
-///       // for each LLVMContext.
-///       return StructType::get(
-///         TypeBuilder<types::i<32>, xcompile>::get(Context),
-///         TypeBuilder<types::i<32>*, xcompile>::get(Context),
-///         TypeBuilder<types::i<8>*[], xcompile>::get(Context),
-///         NULL);
-///     }
-///
-///     // You may find this a convenient place to put some constants
-///     // to help with getelementptr.  They don't have any effect on
-///     // the operation of TypeBuilder.
-///     enum Fields {
-///       FIELD_A,
-///       FIELD_B,
-///       FIELD_ARRAY
-///     };
-///   }
-///   }  // namespace llvm
-///
-/// TypeBuilder cannot handle recursive types or types you only know at runtime.
-/// If you try to give it a recursive type, it will deadlock, infinitely
-/// recurse, or do something similarly undesirable.
-template<typename T, bool cross_compilable> class TypeBuilder {};
-
-// Types for use with cross-compilable TypeBuilders.  These correspond
-// exactly with an LLVM-native type.
-namespace types {
-/// i<N> corresponds to the LLVM IntegerType with N bits.
-template<uint32_t num_bits> class i {};
-
-// The following classes represent the LLVM floating types.
-class ieee_float {};
-class ieee_double {};
-class x86_fp80 {};
-class fp128 {};
-class ppc_fp128 {};
-// X86 MMX.
-class x86_mmx {};
-}  // namespace types
-
-// LLVM doesn't have const or volatile types.
-template<typename T, bool cross> class TypeBuilder<const T, cross>
-  : public TypeBuilder<T, cross> {};
-template<typename T, bool cross> class TypeBuilder<volatile T, cross>
-  : public TypeBuilder<T, cross> {};
-template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
-  : public TypeBuilder<T, cross> {};
-
-// Pointers
-template<typename T, bool cross> class TypeBuilder<T*, cross> {
-public:
-  static PointerType *get(LLVMContext &Context) {
-    return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
-  }
-};
-
-/// There is no support for references
-template<typename T, bool cross> class TypeBuilder<T&, cross> {};
-
-// Arrays
-template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
-public:
-  static ArrayType *get(LLVMContext &Context) {
-    return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
-  }
-};
-/// LLVM uses an array of length 0 to represent an unknown-length array.
-template<typename T, bool cross> class TypeBuilder<T[], cross> {
-public:
-  static ArrayType *get(LLVMContext &Context) {
-    return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
-  }
-};
-
-// Define the C integral types only for TypeBuilder<T, false>.
-//
-// C integral types do not have a defined size. It would be nice to use the
-// stdint.h-defined typedefs that do have defined sizes, but we'd run into the
-// following problem:
-//
-// On an ILP32 machine, stdint.h might define:
-//
-//   typedef int int32_t;
-//   typedef long long int64_t;
-//   typedef long size_t;
-//
-// If we defined TypeBuilder<int32_t> and TypeBuilder<int64_t>, then any use of
-// TypeBuilder<size_t> would fail.  We couldn't define TypeBuilder<size_t> in
-// addition to the defined-size types because we'd get duplicate definitions on
-// platforms where stdint.h instead defines:
-//
-//   typedef int int32_t;
-//   typedef long long int64_t;
-//   typedef int size_t;
-//
-// So we define all the primitive C types and nothing else.
-#define DEFINE_INTEGRAL_TYPEBUILDER(T) \
-template<> class TypeBuilder<T, false> { \
-public: \
-  static IntegerType *get(LLVMContext &Context) { \
-    return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
-  } \
-}; \
-template<> class TypeBuilder<T, true> { \
-  /* We provide a definition here so users don't accidentally */ \
-  /* define these types to work. */ \
-}
-DEFINE_INTEGRAL_TYPEBUILDER(char);
-DEFINE_INTEGRAL_TYPEBUILDER(signed char);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned char);
-DEFINE_INTEGRAL_TYPEBUILDER(short);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned short);
-DEFINE_INTEGRAL_TYPEBUILDER(int);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned int);
-DEFINE_INTEGRAL_TYPEBUILDER(long);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned long);
-#ifdef _MSC_VER
-DEFINE_INTEGRAL_TYPEBUILDER(__int64);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned __int64);
-#else /* _MSC_VER */
-DEFINE_INTEGRAL_TYPEBUILDER(long long);
-DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
-#endif /* _MSC_VER */
-#undef DEFINE_INTEGRAL_TYPEBUILDER
-
-template<uint32_t num_bits, bool cross>
-class TypeBuilder<types::i<num_bits>, cross> {
-public:
-  static IntegerType *get(LLVMContext &C) {
-    return IntegerType::get(C, num_bits);
-  }
-};
-
-template<> class TypeBuilder<float, false> {
-public:
-  static Type *get(LLVMContext& C) {
-    return Type::getFloatTy(C);
-  }
-};
-template<> class TypeBuilder<float, true> {};
-
-template<> class TypeBuilder<double, false> {
-public:
-  static Type *get(LLVMContext& C) {
-    return Type::getDoubleTy(C);
-  }
-};
-template<> class TypeBuilder<double, true> {};
-
-template<bool cross> class TypeBuilder<types::ieee_float, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
-};
-template<bool cross> class TypeBuilder<types::ieee_double, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
-};
-template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::fp128, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
-};
-template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
-public:
-  static Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
-};
-
-template<bool cross> class TypeBuilder<void, cross> {
-public:
-  static Type *get(LLVMContext &C) {
-    return Type::getVoidTy(C);
-  }
-};
-
-/// void* is disallowed in LLVM types, but it occurs often enough in C code that
-/// we special case it.
-template<> class TypeBuilder<void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<const void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<volatile void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-template<> class TypeBuilder<const volatile void*, false>
-  : public TypeBuilder<types::i<8>*, false> {};
-
-template<typename R, bool cross> class TypeBuilder<R(), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
-  }
-};
-template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-template<typename R, typename A1, typename A2, bool cross>
-class TypeBuilder<R(A1, A2), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-template<typename R, typename A1, typename A2, typename A3, bool cross>
-class TypeBuilder<R(A1, A2, A3), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         bool cross>
-class TypeBuilder<R(A1, A2, A3, A4), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         typename A5, bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-      TypeBuilder<A5, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, false);
-  }
-};
-
-template<typename R, bool cross> class TypeBuilder<R(...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
-  }
-};
-template<typename R, typename A1, bool cross>
-class TypeBuilder<R(A1, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
-  }
-};
-template<typename R, typename A1, typename A2, bool cross>
-class TypeBuilder<R(A1, A2, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                                   params, true);
-  }
-};
-template<typename R, typename A1, typename A2, typename A3, bool cross>
-class TypeBuilder<R(A1, A2, A3, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                                   params, true);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                             params, true);
-  }
-};
-
-template<typename R, typename A1, typename A2, typename A3, typename A4,
-         typename A5, bool cross>
-class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
-public:
-  static FunctionType *get(LLVMContext &Context) {
-    Type *params[] = {
-      TypeBuilder<A1, cross>::get(Context),
-      TypeBuilder<A2, cross>::get(Context),
-      TypeBuilder<A3, cross>::get(Context),
-      TypeBuilder<A4, cross>::get(Context),
-      TypeBuilder<A5, cross>::get(Context),
-    };
-    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
-                                   params, true);
-  }
-};
-
-}  // namespace llvm
-
-#endif
diff --git a/include/llvm/TypeFinder.h b/include/llvm/TypeFinder.h
deleted file mode 100644
index 5d807057a32d..000000000000
--- a/include/llvm/TypeFinder.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- llvm/TypeFinder.h - Class for finding used struct types -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the TypeFinder class. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TYPEFINDER_H
-#define LLVM_TYPEFINDER_H
-
-#include "llvm/ADT/DenseSet.h"
-#include <vector>
-
-namespace llvm {
-
-class MDNode;
-class Module;
-class StructType;
-class Type;
-class Value;
-
-/// TypeFinder - Walk over a module, identifying all of the types that are
-/// used by the module.
-class TypeFinder {
-  // To avoid walking constant expressions multiple times and other IR
-  // objects, we keep several helper maps.
-  DenseSet<const Value*> VisitedConstants;
-  DenseSet<Type*> VisitedTypes;
-
-  std::vector<StructType*> StructTypes;
-  bool OnlyNamed;
-
-public:
-  TypeFinder() : OnlyNamed(false) {}
-
-  void run(const Module &M, bool onlyNamed);
-  void clear();
-
-  typedef std::vector<StructType*>::iterator iterator;
-  typedef std::vector<StructType*>::const_iterator const_iterator;
-
-  iterator begin() { return StructTypes.begin(); }
-  iterator end() { return StructTypes.end(); }
-
-  const_iterator begin() const { return StructTypes.begin(); }
-  const_iterator end() const { return StructTypes.end(); }
-
-  bool empty() const { return StructTypes.empty(); }
-  size_t size() const { return StructTypes.size(); }
-  iterator erase(iterator I, iterator E) { return StructTypes.erase(I, E); }
-
-  StructType *&operator[](unsigned Idx) { return StructTypes[Idx]; }
-
-private:
-  /// incorporateType - This method adds the type to the list of used
-  /// structures if it's not in there already.
-  void incorporateType(Type *Ty);
-
-  /// incorporateValue - This method is used to walk operand lists finding types
-  /// hiding in constant expressions and other operands that won't be walked in
-  /// other ways.  GlobalValues, basic blocks, instructions, and inst operands
-  /// are all explicitly enumerated.
-  void incorporateValue(const Value *V);
-
-  /// incorporateMDNode - This method is used to walk the operands of an MDNode
-  /// to find types hiding within.
-  void incorporateMDNode(const MDNode *V);
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/include/llvm/Use.h b/include/llvm/Use.h
deleted file mode 100644
index 80804459cc33..000000000000
--- a/include/llvm/Use.h
+++ /dev/null
@@ -1,220 +0,0 @@
-//===-- llvm/Use.h - Definition of the Use class ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines the Use class.  The Use class represents the operand of an
-// instruction or some other User instance which refers to a Value.  The Use
-// class keeps the "use list" of the referenced value up to date.
-//
-// Pointer tagging is used to efficiently find the User corresponding
-// to a Use without having to store a User pointer in every Use. A
-// User is preceded in memory by all the Uses corresponding to its
-// operands, and the low bits of one of the fields (Prev) of the Use
-// class are used to encode offsets to be able to find that User given
-// a pointer to any Use. For details, see:
-//
-//   http://www.llvm.org/docs/ProgrammersManual.html#UserLayout
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_USE_H
-#define LLVM_USE_H
-
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/Support/Compiler.h"
-#include <cstddef>
-#include <iterator>
-
-namespace llvm {
-
-class Value;
-class User;
-class Use;
-template<typename>
-struct simplify_type;
-
-// Use** is only 4-byte aligned.
-template<>
-class PointerLikeTypeTraits<Use**> {
-public:
-  static inline void *getAsVoidPointer(Use** P) { return P; }
-  static inline Use **getFromVoidPointer(void *P) {
-    return static_cast<Use**>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
-//===----------------------------------------------------------------------===//
-//                                  Use Class
-//===----------------------------------------------------------------------===//
-
-/// Use is here to make keeping the "use" list of a Value up-to-date really
-/// easy.
-class Use {
-public:
-  /// swap - provide a fast substitute to std::swap<Use>
-  /// that also works with less standard-compliant compilers
-  void swap(Use &RHS);
-
-  // A type for the word following an array of hung-off Uses in memory, which is
-  // a pointer back to their User with the bottom bit set.
-  typedef PointerIntPair<User*, 1, unsigned> UserRef;
-
-private:
-  /// Copy ctor - do not implement
-  Use(const Use &U) LLVM_DELETED_FUNCTION;
-
-  /// Destructor - Only for zap()
-  ~Use() {
-    if (Val) removeFromList();
-  }
-
-  enum PrevPtrTag { zeroDigitTag
-                  , oneDigitTag
-                  , stopTag
-                  , fullStopTag };
-
-  /// Constructor
-  Use(PrevPtrTag tag) : Val(0) {
-    Prev.setInt(tag);
-  }
-
-public:
-  /// Normally Use will just implicitly convert to a Value* that it holds.
-  operator Value*() const { return Val; }
-  
-  /// If implicit conversion to Value* doesn't work, the get() method returns
-  /// the Value*.
-  Value *get() const { return Val; }
-  
-  /// getUser - This returns the User that contains this Use.  For an
-  /// instruction operand, for example, this will return the instruction.
-  User *getUser() const;
-
-  inline void set(Value *Val);
-
-  Value *operator=(Value *RHS) {
-    set(RHS);
-    return RHS;
-  }
-  const Use &operator=(const Use &RHS) {
-    set(RHS.Val);
-    return *this;
-  }
-
-        Value *operator->()       { return Val; }
-  const Value *operator->() const { return Val; }
-
-  Use *getNext() const { return Next; }
-
-  
-  /// initTags - initialize the waymarking tags on an array of Uses, so that
-  /// getUser() can find the User from any of those Uses.
-  static Use *initTags(Use *Start, Use *Stop);
-
-  /// zap - This is used to destroy Use operands when the number of operands of
-  /// a User changes.
-  static void zap(Use *Start, const Use *Stop, bool del = false);
-
-private:
-  const Use* getImpliedUser() const;
-  
-  Value *Val;
-  Use *Next;
-  PointerIntPair<Use**, 2, PrevPtrTag> Prev;
-
-  void setPrev(Use **NewPrev) {
-    Prev.setPointer(NewPrev);
-  }
-  void addToList(Use **List) {
-    Next = *List;
-    if (Next) Next->setPrev(&Next);
-    setPrev(List);
-    *List = this;
-  }
-  void removeFromList() {
-    Use **StrippedPrev = Prev.getPointer();
-    *StrippedPrev = Next;
-    if (Next) Next->setPrev(StrippedPrev);
-  }
-
-  friend class Value;
-};
-
-// simplify_type - Allow clients to treat uses just like values when using
-// casting operators.
-template<> struct simplify_type<Use> {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const Use &Val) {
-    return static_cast<SimpleType>(Val.get());
-  }
-};
-template<> struct simplify_type<const Use> {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(const Use &Val) {
-    return static_cast<SimpleType>(Val.get());
-  }
-};
-
-
-
-template<typename UserTy>  // UserTy == 'User' or 'const User'
-class value_use_iterator : public std::iterator<std::forward_iterator_tag,
-                                                UserTy*, ptrdiff_t> {
-  typedef std::iterator<std::forward_iterator_tag, UserTy*, ptrdiff_t> super;
-  typedef value_use_iterator<UserTy> _Self;
-
-  Use *U;
-  explicit value_use_iterator(Use *u) : U(u) {}
-  friend class Value;
-public:
-  typedef typename super::reference reference;
-  typedef typename super::pointer pointer;
-
-  value_use_iterator(const _Self &I) : U(I.U) {}
-  value_use_iterator() {}
-
-  bool operator==(const _Self &x) const {
-    return U == x.U;
-  }
-  bool operator!=(const _Self &x) const {
-    return !operator==(x);
-  }
-
-  /// atEnd - return true if this iterator is equal to use_end() on the value.
-  bool atEnd() const { return U == 0; }
-
-  // Iterator traversal: forward iteration only
-  _Self &operator++() {          // Preincrement
-    assert(U && "Cannot increment end iterator!");
-    U = U->getNext();
-    return *this;
-  }
-  _Self operator++(int) {        // Postincrement
-    _Self tmp = *this; ++*this; return tmp;
-  }
-
-  // Retrieve a pointer to the current User.
-  UserTy *operator*() const {
-    assert(U && "Cannot dereference end iterator!");
-    return U->getUser();
-  }
-
-  UserTy *operator->() const { return operator*(); }
-
-  Use &getUse() const { return *U; }
-  
-  /// getOperandNo - Return the operand # of this use in its User.  Defined in
-  /// User.h
-  ///
-  unsigned getOperandNo() const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/User.h b/include/llvm/User.h
deleted file mode 100644
index df303d0dd5f2..000000000000
--- a/include/llvm/User.h
+++ /dev/null
@@ -1,215 +0,0 @@
-//===-- llvm/User.h - User class definition ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class defines the interface that one who uses a Value must implement.
-// Each instance of the Value class keeps track of what User's have handles
-// to it.
-//
-//  * Instructions are the largest class of Users.
-//  * Constants may be users of other constants (think arrays and stuff)
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_USER_H
-#define LLVM_USER_H
-
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Value.h"
-
-namespace llvm {
-
-/// OperandTraits - Compile-time customization of
-/// operand-related allocators and accessors
-/// for use of the User class
-template <class>
-struct OperandTraits;
-
-class User : public Value {
-  User(const User &) LLVM_DELETED_FUNCTION;
-  void *operator new(size_t) LLVM_DELETED_FUNCTION;
-  template <unsigned>
-  friend struct HungoffOperandTraits;
-  virtual void anchor();
-protected:
-  /// OperandList - This is a pointer to the array of Uses for this User.
-  /// For nodes of fixed arity (e.g. a binary operator) this array will live
-  /// prefixed to some derived class instance.  For nodes of resizable variable
-  /// arity (e.g. PHINodes, SwitchInst etc.), this memory will be dynamically
-  /// allocated and should be destroyed by the classes' virtual dtor.
-  Use *OperandList;
-
-  /// NumOperands - The number of values used by this User.
-  ///
-  unsigned NumOperands;
-
-  void *operator new(size_t s, unsigned Us);
-  User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
-    : Value(ty, vty), OperandList(OpList), NumOperands(NumOps) {}
-  Use *allocHungoffUses(unsigned) const;
-  void dropHungoffUses() {
-    Use::zap(OperandList, OperandList + NumOperands, true);
-    OperandList = 0;
-    // Reset NumOperands so User::operator delete() does the right thing.
-    NumOperands = 0;
-  }
-public:
-  ~User() {
-    Use::zap(OperandList, OperandList + NumOperands);
-  }
-  /// operator delete - free memory allocated for User and Use objects
-  void operator delete(void *Usr);
-  /// placement delete - required by std, but never called.
-  void operator delete(void*, unsigned) {
-    llvm_unreachable("Constructor throws?");
-  }
-  /// placement delete - required by std, but never called.
-  void operator delete(void*, unsigned, bool) {
-    llvm_unreachable("Constructor throws?");
-  }
-protected:
-  template <int Idx, typename U> static Use &OpFrom(const U *that) {
-    return Idx < 0
-      ? OperandTraits<U>::op_end(const_cast<U*>(that))[Idx]
-      : OperandTraits<U>::op_begin(const_cast<U*>(that))[Idx];
-  }
-  template <int Idx> Use &Op() {
-    return OpFrom<Idx>(this);
-  }
-  template <int Idx> const Use &Op() const {
-    return OpFrom<Idx>(this);
-  }
-public:
-  Value *getOperand(unsigned i) const {
-    assert(i < NumOperands && "getOperand() out of range!");
-    return OperandList[i];
-  }
-  void setOperand(unsigned i, Value *Val) {
-    assert(i < NumOperands && "setOperand() out of range!");
-    assert((!isa<Constant>((const Value*)this) ||
-            isa<GlobalValue>((const Value*)this)) &&
-           "Cannot mutate a constant with setOperand!");
-    OperandList[i] = Val;
-  }
-  const Use &getOperandUse(unsigned i) const {
-    assert(i < NumOperands && "getOperandUse() out of range!");
-    return OperandList[i];
-  }
-  Use &getOperandUse(unsigned i) {
-    assert(i < NumOperands && "getOperandUse() out of range!");
-    return OperandList[i];
-  }
-
-  unsigned getNumOperands() const { return NumOperands; }
-
-  // ---------------------------------------------------------------------------
-  // Operand Iterator interface...
-  //
-  typedef Use*       op_iterator;
-  typedef const Use* const_op_iterator;
-
-  inline op_iterator       op_begin()       { return OperandList; }
-  inline const_op_iterator op_begin() const { return OperandList; }
-  inline op_iterator       op_end()         { return OperandList+NumOperands; }
-  inline const_op_iterator op_end()   const { return OperandList+NumOperands; }
-
-  /// Convenience iterator for directly iterating over the Values in the
-  /// OperandList
-  class value_op_iterator : public std::iterator<std::forward_iterator_tag,
-                                                 Value*> {
-    op_iterator OI;
-  public:
-    explicit value_op_iterator(Use *U) : OI(U) {}
-
-    bool operator==(const value_op_iterator &x) const {
-      return OI == x.OI;
-    }
-    bool operator!=(const value_op_iterator &x) const {
-      return !operator==(x);
-    }
-
-    /// Iterator traversal: forward iteration only
-    value_op_iterator &operator++() {          // Preincrement
-      ++OI;
-      return *this;
-    }
-    value_op_iterator operator++(int) {        // Postincrement
-      value_op_iterator tmp = *this; ++*this; return tmp;
-    }
-
-    /// Retrieve a pointer to the current Value.
-    Value *operator*() const {
-      return *OI;
-    }
-
-    Value *operator->() const { return operator*(); }
-  };
-
-  inline value_op_iterator value_op_begin() {
-    return value_op_iterator(op_begin());
-  }
-  inline value_op_iterator value_op_end() {
-    return value_op_iterator(op_end());
-  }
-
-  // dropAllReferences() - This function is in charge of "letting go" of all
-  // objects that this User refers to.  This allows one to
-  // 'delete' a whole class at a time, even though there may be circular
-  // references...  First all references are dropped, and all use counts go to
-  // zero.  Then everything is deleted for real.  Note that no operations are
-  // valid on an object that has "dropped all references", except operator
-  // delete.
-  //
-  void dropAllReferences() {
-    for (op_iterator i = op_begin(), e = op_end(); i != e; ++i)
-      i->set(0);
-  }
-
-  /// replaceUsesOfWith - Replaces all references to the "From" definition with
-  /// references to the "To" definition.
-  ///
-  void replaceUsesOfWith(Value *From, Value *To);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) || isa<Constant>(V);
-  }
-};
-
-template<> struct simplify_type<User::op_iterator> {
-  typedef Value* SimpleType;
-
-  static SimpleType getSimplifiedValue(const User::op_iterator &Val) {
-    return static_cast<SimpleType>(Val->get());
-  }
-};
-
-template<> struct simplify_type<const User::op_iterator>
-  : public simplify_type<User::op_iterator> {};
-
-template<> struct simplify_type<User::const_op_iterator> {
-  typedef Value* SimpleType;
-
-  static SimpleType getSimplifiedValue(const User::const_op_iterator &Val) {
-    return static_cast<SimpleType>(Val->get());
-  }
-};
-
-template<> struct simplify_type<const User::const_op_iterator>
-  : public simplify_type<User::const_op_iterator> {};
-
-
-// value_use_iterator::getOperandNo - Requires the definition of the User class.
-template<typename UserTy>
-unsigned value_use_iterator<UserTy>::getOperandNo() const {
-  return U - U->getUser()->op_begin();
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
deleted file mode 100644
index 5b19435ebaf4..000000000000
--- a/include/llvm/Value.h
+++ /dev/null
@@ -1,411 +0,0 @@
-//===-- llvm/Value.h - Definition of the Value class ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Value class. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_VALUE_H
-#define LLVM_VALUE_H
-
-#include "llvm/Use.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class Constant;
-class Argument;
-class Instruction;
-class BasicBlock;
-class GlobalValue;
-class Function;
-class GlobalVariable;
-class GlobalAlias;
-class InlineAsm;
-class ValueSymbolTable;
-template<typename ValueTy> class StringMapEntry;
-typedef StringMapEntry<Value*> ValueName;
-class raw_ostream;
-class AssemblyAnnotationWriter;
-class ValueHandleBase;
-class LLVMContext;
-class Twine;
-class MDNode;
-class Type;
-class StringRef;
-
-//===----------------------------------------------------------------------===//
-//                                 Value Class
-//===----------------------------------------------------------------------===//
-
-/// This is a very important LLVM class. It is the base class of all values 
-/// computed by a program that may be used as operands to other values. Value is
-/// the super class of other important classes such as Instruction and Function.
-/// All Values have a Type. Type is not a subclass of Value. Some values can
-/// have a name and they belong to some Module.  Setting the name on the Value
-/// automatically updates the module's symbol table.
-///
-/// Every value has a "use list" that keeps track of which other Values are
-/// using this Value.  A Value can also have an arbitrary number of ValueHandle
-/// objects that watch it and listen to RAUW and Destroy events.  See
-/// llvm/Support/ValueHandle.h for details.
-///
-/// @brief LLVM Value Representation
-class Value {
-  const unsigned char SubclassID;   // Subclass identifier (for isa/dyn_cast)
-  unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
-protected:
-  /// SubclassOptionalData - This member is similar to SubclassData, however it
-  /// is for holding information which may be used to aid optimization, but
-  /// which may be cleared to zero without affecting conservative
-  /// interpretation.
-  unsigned char SubclassOptionalData : 7;
-
-private:
-  /// SubclassData - This member is defined by this class, but is not used for
-  /// anything.  Subclasses can use it to hold whatever state they find useful.
-  /// This field is initialized to zero by the ctor.
-  unsigned short SubclassData;
-
-  Type *VTy;
-  Use *UseList;
-
-  friend class ValueSymbolTable; // Allow ValueSymbolTable to directly mod Name.
-  friend class ValueHandleBase;
-  ValueName *Name;
-
-  void operator=(const Value &) LLVM_DELETED_FUNCTION;
-  Value(const Value &) LLVM_DELETED_FUNCTION;
-
-protected:
-  /// printCustom - Value subclasses can override this to implement custom
-  /// printing behavior.
-  virtual void printCustom(raw_ostream &O) const;
-
-  Value(Type *Ty, unsigned scid);
-public:
-  virtual ~Value();
-
-  /// dump - Support for debugging, callable in GDB: V->dump()
-  //
-  void dump() const;
-
-  /// print - Implement operator<< on Value.
-  ///
-  void print(raw_ostream &O, AssemblyAnnotationWriter *AAW = 0) const;
-
-  /// All values are typed, get the type of this value.
-  ///
-  Type *getType() const { return VTy; }
-
-  /// All values hold a context through their type.
-  LLVMContext &getContext() const;
-
-  // All values can potentially be named.
-  bool hasName() const { return Name != 0 && SubclassID != MDStringVal; }
-  ValueName *getValueName() const { return Name; }
-  void setValueName(ValueName *VN) { Name = VN; }
-  
-  /// getName() - Return a constant reference to the value's name. This is cheap
-  /// and guaranteed to return the same reference as long as the value is not
-  /// modified.
-  StringRef getName() const;
-
-  /// setName() - Change the name of the value, choosing a new unique name if
-  /// the provided name is taken.
-  ///
-  /// \param Name The new name; or "" if the value's name should be removed.
-  void setName(const Twine &Name);
-
-  
-  /// takeName - transfer the name from V to this value, setting V's name to
-  /// empty.  It is an error to call V->takeName(V). 
-  void takeName(Value *V);
-
-  /// replaceAllUsesWith - Go through the uses list for this definition and make
-  /// each use point to "V" instead of "this".  After this completes, 'this's
-  /// use list is guaranteed to be empty.
-  ///
-  void replaceAllUsesWith(Value *V);
-
-  //----------------------------------------------------------------------
-  // Methods for handling the chain of uses of this Value.
-  //
-  typedef value_use_iterator<User>       use_iterator;
-  typedef value_use_iterator<const User> const_use_iterator;
-
-  bool               use_empty() const { return UseList == 0; }
-  use_iterator       use_begin()       { return use_iterator(UseList); }
-  const_use_iterator use_begin() const { return const_use_iterator(UseList); }
-  use_iterator       use_end()         { return use_iterator(0);   }
-  const_use_iterator use_end()   const { return const_use_iterator(0);   }
-  User              *use_back()        { return *use_begin(); }
-  const User        *use_back()  const { return *use_begin(); }
-
-  /// hasOneUse - Return true if there is exactly one user of this value.  This
-  /// is specialized because it is a common request and does not require
-  /// traversing the whole use list.
-  ///
-  bool hasOneUse() const {
-    const_use_iterator I = use_begin(), E = use_end();
-    if (I == E) return false;
-    return ++I == E;
-  }
-
-  /// hasNUses - Return true if this Value has exactly N users.
-  ///
-  bool hasNUses(unsigned N) const;
-
-  /// hasNUsesOrMore - Return true if this value has N users or more.  This is
-  /// logically equivalent to getNumUses() >= N.
-  ///
-  bool hasNUsesOrMore(unsigned N) const;
-
-  bool isUsedInBasicBlock(const BasicBlock *BB) const;
-
-  /// getNumUses - This method computes the number of uses of this Value.  This
-  /// is a linear time operation.  Use hasOneUse, hasNUses, or hasNUsesOrMore
-  /// to check for specific values.
-  unsigned getNumUses() const;
-
-  /// addUse - This method should only be used by the Use class.
-  ///
-  void addUse(Use &U) { U.addToList(&UseList); }
-
-  /// An enumeration for keeping track of the concrete subclass of Value that
-  /// is actually instantiated. Values of this enumeration are kept in the 
-  /// Value classes SubclassID field. They are used for concrete type
-  /// identification.
-  enum ValueTy {
-    ArgumentVal,              // This is an instance of Argument
-    BasicBlockVal,            // This is an instance of BasicBlock
-    FunctionVal,              // This is an instance of Function
-    GlobalAliasVal,           // This is an instance of GlobalAlias
-    GlobalVariableVal,        // This is an instance of GlobalVariable
-    UndefValueVal,            // This is an instance of UndefValue
-    BlockAddressVal,          // This is an instance of BlockAddress
-    ConstantExprVal,          // This is an instance of ConstantExpr
-    ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero
-    ConstantDataArrayVal,     // This is an instance of ConstantDataArray
-    ConstantDataVectorVal,    // This is an instance of ConstantDataVector
-    ConstantIntVal,           // This is an instance of ConstantInt
-    ConstantFPVal,            // This is an instance of ConstantFP
-    ConstantArrayVal,         // This is an instance of ConstantArray
-    ConstantStructVal,        // This is an instance of ConstantStruct
-    ConstantVectorVal,        // This is an instance of ConstantVector
-    ConstantPointerNullVal,   // This is an instance of ConstantPointerNull
-    MDNodeVal,                // This is an instance of MDNode
-    MDStringVal,              // This is an instance of MDString
-    InlineAsmVal,             // This is an instance of InlineAsm
-    PseudoSourceValueVal,     // This is an instance of PseudoSourceValue
-    FixedStackPseudoSourceValueVal, // This is an instance of 
-                                    // FixedStackPseudoSourceValue
-    InstructionVal,           // This is an instance of Instruction
-    // Enum values starting at InstructionVal are used for Instructions;
-    // don't add new values here!
-
-    // Markers:
-    ConstantFirstVal = FunctionVal,
-    ConstantLastVal  = ConstantPointerNullVal
-  };
-
-  /// getValueID - Return an ID for the concrete type of this object.  This is
-  /// used to implement the classof checks.  This should not be used for any
-  /// other purpose, as the values may change as LLVM evolves.  Also, note that
-  /// for instructions, the Instruction's opcode is added to InstructionVal. So
-  /// this means three things:
-  /// # there is no value with code InstructionVal (no opcode==0).
-  /// # there are more possible values for the value type than in ValueTy enum.
-  /// # the InstructionVal enumerator must be the highest valued enumerator in
-  ///   the ValueTy enum.
-  unsigned getValueID() const {
-    return SubclassID;
-  }
-
-  /// getRawSubclassOptionalData - Return the raw optional flags value
-  /// contained in this value. This should only be used when testing two
-  /// Values for equivalence.
-  unsigned getRawSubclassOptionalData() const {
-    return SubclassOptionalData;
-  }
-
-  /// clearSubclassOptionalData - Clear the optional flags contained in
-  /// this value.
-  void clearSubclassOptionalData() {
-    SubclassOptionalData = 0;
-  }
-
-  /// hasSameSubclassOptionalData - Test whether the optional flags contained
-  /// in this value are equal to the optional flags in the given value.
-  bool hasSameSubclassOptionalData(const Value *V) const {
-    return SubclassOptionalData == V->SubclassOptionalData;
-  }
-
-  /// intersectOptionalDataWith - Clear any optional flags in this value
-  /// that are not also set in the given value.
-  void intersectOptionalDataWith(const Value *V) {
-    SubclassOptionalData &= V->SubclassOptionalData;
-  }
-
-  /// hasValueHandle - Return true if there is a value handle associated with
-  /// this value.
-  bool hasValueHandle() const { return HasValueHandle; }
-
-  /// stripPointerCasts - This method strips off any unneeded pointer casts and
-  /// all-zero GEPs from the specified value, returning the original uncasted
-  /// value. If this is called on a non-pointer value, it returns 'this'.
-  Value *stripPointerCasts();
-  const Value *stripPointerCasts() const {
-    return const_cast<Value*>(this)->stripPointerCasts();
-  }
-
-  /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and
-  /// all-constant GEPs from the specified value, returning the original
-  /// pointer value. If this is called on a non-pointer value, it returns
-  /// 'this'.
-  Value *stripInBoundsConstantOffsets();
-  const Value *stripInBoundsConstantOffsets() const {
-    return const_cast<Value*>(this)->stripInBoundsConstantOffsets();
-  }
-
-  /// stripInBoundsOffsets - This method strips off unneeded pointer casts and
-  /// any in-bounds Offsets from the specified value, returning the original
-  /// pointer value. If this is called on a non-pointer value, it returns
-  /// 'this'.
-  Value *stripInBoundsOffsets();
-  const Value *stripInBoundsOffsets() const {
-    return const_cast<Value*>(this)->stripInBoundsOffsets();
-  }
-
-  /// isDereferenceablePointer - Test if this value is always a pointer to
-  /// allocated and suitably aligned memory for a simple load or store.
-  bool isDereferenceablePointer() const;
-  
-  /// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
-  /// return the value in the PHI node corresponding to PredBB.  If not, return
-  /// ourself.  This is useful if you want to know the value something has in a
-  /// predecessor block.
-  Value *DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB);
-
-  const Value *DoPHITranslation(const BasicBlock *CurBB,
-                                const BasicBlock *PredBB) const{
-    return const_cast<Value*>(this)->DoPHITranslation(CurBB, PredBB);
-  }
-  
-  /// MaximumAlignment - This is the greatest alignment value supported by
-  /// load, store, and alloca instructions, and global values.
-  static const unsigned MaximumAlignment = 1u << 29;
-  
-  /// mutateType - Mutate the type of this Value to be of the specified type.
-  /// Note that this is an extremely dangerous operation which can create
-  /// completely invalid IR very easily.  It is strongly recommended that you
-  /// recreate IR objects with the right types instead of mutating them in
-  /// place.
-  void mutateType(Type *Ty) {
-    VTy = Ty;
-  }
-  
-protected:
-  unsigned short getSubclassDataFromValue() const { return SubclassData; }
-  void setValueSubclassData(unsigned short D) { SubclassData = D; }
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) {
-  V.print(OS);
-  return OS;
-}
-  
-void Use::set(Value *V) {
-  if (Val) removeFromList();
-  Val = V;
-  if (V) V->addUse(*this);
-}
-
-
-// isa - Provide some specializations of isa so that we don't have to include
-// the subtype header files to test to see if the value is a subclass...
-//
-template <> struct isa_impl<Constant, Value> {
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() >= Value::ConstantFirstVal &&
-      Val.getValueID() <= Value::ConstantLastVal;
-  }
-};
-
-template <> struct isa_impl<Argument, Value> {
-  static inline bool doit (const Value &Val) {
-    return Val.getValueID() == Value::ArgumentVal;
-  }
-};
-
-template <> struct isa_impl<InlineAsm, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() == Value::InlineAsmVal;
-  }
-};
-
-template <> struct isa_impl<Instruction, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() >= Value::InstructionVal;
-  }
-};
-
-template <> struct isa_impl<BasicBlock, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() == Value::BasicBlockVal;
-  }
-};
-
-template <> struct isa_impl<Function, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() == Value::FunctionVal;
-  }
-};
-
-template <> struct isa_impl<GlobalVariable, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() == Value::GlobalVariableVal;
-  }
-};
-
-template <> struct isa_impl<GlobalAlias, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() == Value::GlobalAliasVal;
-  }
-};
-
-template <> struct isa_impl<GlobalValue, Value> { 
-  static inline bool doit(const Value &Val) {
-    return isa<GlobalVariable>(Val) || isa<Function>(Val) ||
-      isa<GlobalAlias>(Val);
-  }
-};
-
-template <> struct isa_impl<MDNode, Value> { 
-  static inline bool doit(const Value &Val) {
-    return Val.getValueID() == Value::MDNodeVal;
-  }
-};
-  
-// Value* is only 4-byte aligned.
-template<>
-class PointerLikeTypeTraits<Value*> {
-  typedef Value* PT;
-public:
-  static inline void *getAsVoidPointer(PT P) { return P; }
-  static inline PT getFromVoidPointer(void *P) {
-    return static_cast<PT>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
deleted file mode 100644
index 1738cc4a7a79..000000000000
--- a/include/llvm/ValueSymbolTable.h
+++ /dev/null
@@ -1,133 +0,0 @@
-//===-- llvm/ValueSymbolTable.h - Implement a Value Symtab ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the name/Value symbol table for LLVM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_VALUE_SYMBOL_TABLE_H
-#define LLVM_VALUE_SYMBOL_TABLE_H
-
-#include "llvm/Value.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-  template<typename ValueSubClass, typename ItemParentClass>
-        class SymbolTableListTraits;
-  class BasicBlock;
-  class Function;
-  class NamedMDNode;
-  class Module;
-  class StringRef;
-
-/// This class provides a symbol table of name/value pairs. It is essentially
-/// a std::map<std::string,Value*> but has a controlled interface provided by
-/// LLVM as well as ensuring uniqueness of names.
-///
-class ValueSymbolTable {
-  friend class Value;
-  friend class SymbolTableListTraits<Argument, Function>;
-  friend class SymbolTableListTraits<BasicBlock, Function>;
-  friend class SymbolTableListTraits<Instruction, BasicBlock>;
-  friend class SymbolTableListTraits<Function, Module>;
-  friend class SymbolTableListTraits<GlobalVariable, Module>;
-  friend class SymbolTableListTraits<GlobalAlias, Module>;
-/// @name Types
-/// @{
-public:
-  /// @brief A mapping of names to values.
-  typedef StringMap<Value*> ValueMap;
-
-  /// @brief An iterator over a ValueMap.
-  typedef ValueMap::iterator iterator;
-
-  /// @brief A const_iterator over a ValueMap.
-  typedef ValueMap::const_iterator const_iterator;
-
-/// @}
-/// @name Constructors
-/// @{
-public:
-
-  ValueSymbolTable() : vmap(0), LastUnique(0) {}
-  ~ValueSymbolTable();
-
-/// @}
-/// @name Accessors
-/// @{
-public:
-
-  /// This method finds the value with the given \p Name in the
-  /// the symbol table. 
-  /// @returns the value associated with the \p Name
-  /// @brief Lookup a named Value.
-  Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
-
-  /// @returns true iff the symbol table is empty
-  /// @brief Determine if the symbol table is empty
-  inline bool empty() const { return vmap.empty(); }
-
-  /// @brief The number of name/type pairs is returned.
-  inline unsigned size() const { return unsigned(vmap.size()); }
-
-  /// This function can be used from the debugger to display the
-  /// content of the symbol table while debugging.
-  /// @brief Print out symbol table on stderr
-  void dump() const;
-
-/// @}
-/// @name Iteration
-/// @{
-public:
-  /// @brief Get an iterator that from the beginning of the symbol table.
-  inline iterator begin() { return vmap.begin(); }
-
-  /// @brief Get a const_iterator that from the beginning of the symbol table.
-  inline const_iterator begin() const { return vmap.begin(); }
-
-  /// @brief Get an iterator to the end of the symbol table.
-  inline iterator end() { return vmap.end(); }
-
-  /// @brief Get a const_iterator to the end of the symbol table.
-  inline const_iterator end() const { return vmap.end(); }
-  
-/// @}
-/// @name Mutators
-/// @{
-private:
-  /// This method adds the provided value \p N to the symbol table.  The Value
-  /// must have a name which is used to place the value in the symbol table. 
-  /// If the inserted name conflicts, this renames the value.
-  /// @brief Add a named value to the symbol table
-  void reinsertValue(Value *V);
-    
-  /// createValueName - This method attempts to create a value name and insert
-  /// it into the symbol table with the specified name.  If it conflicts, it
-  /// auto-renames the name and returns that instead.
-  ValueName *createValueName(StringRef Name, Value *V);
-  
-  /// This method removes a value from the symbol table.  It leaves the
-  /// ValueName attached to the value, but it is no longer inserted in the
-  /// symtab.
-  void removeValueName(ValueName *V);
-  
-/// @}
-/// @name Internal Data
-/// @{
-private:
-  ValueMap vmap;                    ///< The map that holds the symbol table.
-  mutable uint32_t LastUnique; ///< Counter for tracking unique names
-
-/// @}
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 752edd52b454..210b80ab63ef 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -28,14 +28,14 @@
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
@@ -361,8 +361,28 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
 }
 
 namespace {
+  // Conservatively return true. Return false, if there is a single path
+  // starting from "From" and the path does not reach "To".
+  static bool hasPath(const BasicBlock *From, const BasicBlock *To) {
+    const unsigned MaxCheck = 5;
+    const BasicBlock *Current = From;
+    for (unsigned I = 0; I < MaxCheck; I++) {
+      unsigned NumSuccs = Current->getTerminator()->getNumSuccessors();
+      if (NumSuccs > 1)
+        return true;
+      if (NumSuccs == 0)
+        return false;
+      Current = Current->getTerminator()->getSuccessor(0);
+      if (Current == To)
+        return true;
+    }
+    return true;
+  }
+
   /// Only find pointer captures which happen before the given instruction. Uses
   /// the dominator tree to determine whether one instruction is before another.
+  /// Only support the case where the Value is defined in the same basic block
+  /// as the given instruction and the use.
   struct CapturesBefore : public CaptureTracker {
     CapturesBefore(const Instruction *I, DominatorTree *DT)
       : BeforeHere(I), DT(DT), Captured(false) {}
@@ -372,8 +392,15 @@ namespace {
     bool shouldExplore(Use *U) {
       Instruction *I = cast<Instruction>(U->getUser());
       BasicBlock *BB = I->getParent();
-      if (BeforeHere != I &&
-          (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+      // We explore this usage only if the usage can reach "BeforeHere".
+      // If use is not reachable from entry, there is no need to explore.
+      if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+        return false;
+      // If the value is defined in the same basic block as use and BeforeHere,
+      // there is no need to explore the use if BeforeHere dominates use.
+      // Check whether there is a path from I to BeforeHere.
+      if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+          !hasPath(BB, BeforeHere->getParent()))
         return false;
       return true;
     }
@@ -381,8 +408,11 @@ namespace {
     bool captured(Use *U) {
       Instruction *I = cast<Instruction>(U->getUser());
       BasicBlock *BB = I->getParent();
-      if (BeforeHere != I &&
-          (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+      // Same logic as in shouldExplore.
+      if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+        return false;
+      if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
+          !hasPath(BB, BeforeHere->getParent()))
         return false;
       Captured = true;
       return true;
@@ -503,7 +533,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
 bool llvm::isNoAliasCall(const Value *V) {
   if (isa<CallInst>(V) || isa<InvokeInst>(V))
     return ImmutableCallSite(cast<Instruction>(V))
-      .paramHasAttr(0, Attributes::NoAlias);
+      .paramHasAttr(0, Attribute::NoAlias);
   return false;
 }
 
@@ -525,19 +555,3 @@ bool llvm::isIdentifiedObject(const Value *V) {
     return A->hasNoAliasAttr() || A->hasByValAttr();
   return false;
 }
-
-/// isKnownNonNull - Return true if we know that the specified value is never
-/// null.
-bool llvm::isKnownNonNull(const Value *V) {
-  // Alloca never returns null, malloc might.
-  if (isa<AllocaInst>(V)) return true;
-
-  // A byval argument is never null.
-  if (const Argument *A = dyn_cast<Argument>(V))
-    return A->hasByValAttr();
-
-  // Global values are not null unless extern weak.
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return !GV->hasExternalWeakLinkage();
-  return false;
-}
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 9f219f563739..9f4a47c77e03 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index ac72983a8d7b..a571463dfe12 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -17,19 +17,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
@@ -44,6 +44,8 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
 static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
 static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
 
+static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden);
+
 namespace {
   class AAEval : public FunctionPass {
     unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
@@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
   }
 }
 
+static inline void
+PrintLoadStoreResults(const char *Msg, bool P, const Value *V1,
+                      const Value *V2, const Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ": " << *V1
+           << " <-> " << *V2 << '\n';
+  }
+}
+
 static inline bool isInterestingPointer(Value *V) {
   return V->getType()->isPointerTy()
       && !isa<ConstantPointerNull>(V);
@@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) {
 
   SetVector<Value *> Pointers;
   SetVector<CallSite> CallSites;
+  SetVector<Value *> Loads;
+  SetVector<Value *> Stores;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
     if (I->getType()->isPointerTy())    // Add all pointer arguments.
@@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) {
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
     if (I->getType()->isPointerTy()) // Add all pointer instructions.
       Pointers.insert(&*I);
+    if (EvalTBAA && isa<LoadInst>(&*I))
+      Loads.insert(&*I);
+    if (EvalTBAA && isa<StoreInst>(&*I))
+      Stores.insert(&*I);
     Instruction &Inst = *I;
     if (CallSite CS = cast<Value>(&Inst)) {
       Value *Callee = CS.getCalledValue();
@@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) {
     }
   }
 
+  if (EvalTBAA) {
+    // iterate over all pairs of load, store
+    for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
+         I1 != E; ++I1) {
+      for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end();
+           I2 != E2; ++I2) {
+        switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)),
+                         AA.getLocation(cast<StoreInst>(*I2)))) {
+        case AliasAnalysis::NoAlias:
+          PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+                                F.getParent());
+          ++NoAlias; break;
+        case AliasAnalysis::MayAlias:
+          PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+                                F.getParent());
+          ++MayAlias; break;
+        case AliasAnalysis::PartialAlias:
+          PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                                F.getParent());
+          ++PartialAlias; break;
+        case AliasAnalysis::MustAlias:
+          PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+                                F.getParent());
+          ++MustAlias; break;
+        }
+      }
+    }
+
+    // iterate over all pairs of store, store
+    for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end();
+         I1 != E; ++I1) {
+      for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
+        switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)),
+                         AA.getLocation(cast<StoreInst>(*I2)))) {
+        case AliasAnalysis::NoAlias:
+          PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
+                                F.getParent());
+          ++NoAlias; break;
+        case AliasAnalysis::MayAlias:
+          PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
+                                F.getParent());
+          ++MayAlias; break;
+        case AliasAnalysis::PartialAlias:
+          PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                                F.getParent());
+          ++PartialAlias; break;
+        case AliasAnalysis::MustAlias:
+          PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
+                                F.getParent());
+          ++MustAlias; break;
+        }
+      }
+    }
+  }
+
   // Mod/ref alias analysis: compare all pairs of calls and values
   for (SetVector<CallSite>::iterator C = CallSites.begin(),
          Ce = CallSites.end(); C != Ce; ++C) {
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index f15c05153e10..f6178e36f0a9 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -17,12 +17,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 388c755cbd31..591052671d6e 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -13,13 +13,13 @@
 
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 9dc81a6a630f..66e416cd140c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm-c/Analysis.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include <cstring>
 
 using namespace llvm;
@@ -31,7 +31,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCFGPrinterPass(Registry);
   initializeCFGOnlyViewerPass(Registry);
   initializeCFGOnlyPrinterPass(Registry);
-  initializePrintDbgInfoPass(Registry);
   initializeDependenceAnalysisPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
@@ -70,6 +69,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeRegionOnlyPrinterPass(Registry);
   initializeScalarEvolutionPass(Registry);
   initializeScalarEvolutionAliasAnalysisPass(Registry);
+  initializeTargetTransformInfoAnalysisGroup(Registry);
   initializeTypeBasedAliasAnalysisPass(Registry);
 }
 
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 4bb93ee88a49..ae6da1af0c4f 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -13,28 +13,28 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+  if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -631,7 +631,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
   // For intrinsics, we can check the table.
   if (unsigned iid = F->getIntrinsicID()) {
 #define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/Intrinsics.gen"
+#include "llvm/IR/Intrinsics.gen"
 #undef GET_INTRINSIC_MODREF_BEHAVIOR
   }
 
@@ -851,9 +851,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   // pointers, figure out if the indexes to the GEP tell us anything about the
   // derived pointer.
   if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Do the base pointers alias?
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                                       UnderlyingV2, UnknownSize, 0);
+
     // Check for geps of non-aliasing underlying pointers where the offsets are
     // identical.
-    if (V1Size == V2Size) {
+    if ((BaseAlias == MayAlias) && V1Size == V2Size) {
       // Do the base pointers alias assuming type and size.
       AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
                                                 V1TBAAInfo, UnderlyingV2,
@@ -881,10 +885,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         GEP1VariableIndices.clear();
       }
     }
-
-    // Do the base pointers alias?
-    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
-                                       UnderlyingV2, UnknownSize, 0);
     
     // If we get a No or May, then return it immediately, no amount of analysis
     // will improve this situation.
@@ -1064,39 +1064,20 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
                    Location(V2, V2Size, V2TBAAInfo));
       if (PN > V2)
         std::swap(Locs.first, Locs.second);
-
-      AliasResult Alias =
-        aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
-                   PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
-                   V2Size, V2TBAAInfo);
-      if (Alias == MayAlias)
-        return MayAlias;
-
-      // If the first source of the PHI nodes NoAlias and the other inputs are
-      // the PHI node itself through some amount of recursion this does not add
-      // any new information so just return NoAlias.
-      // bb:
-      //    ptr = ptr2 + 1
-      // loop:
-      //    ptr_phi = phi [bb, ptr], [loop, ptr_plus_one]
-      //    ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one]
-      //    ...
-      //    ptr_plus_one = gep ptr_phi, 1
-      //    ptr2_plus_one = gep ptr2_phi, 1
-      // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do
-      // not alias each other.
-      bool ArePhisAssumedNoAlias = false;
-      AliasResult OrigAliasResult = NoAlias;
-      if (Alias == NoAlias) {
-        // Pretend the phis do not alias.
-        assert(AliasCache.count(Locs) &&
-               "There must exist an entry for the phi node");
-        OrigAliasResult = AliasCache[Locs];
-        AliasCache[Locs] = NoAlias;
-        ArePhisAssumedNoAlias = true;
-      }
-
-      for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+      // Analyse the PHIs' inputs under the assumption that the PHIs are
+      // NoAlias.
+      // If the PHIs are May/MustAlias there must be (recursively) an input
+      // operand from outside the PHIs' cycle that is MayAlias/MustAlias or
+      // there must be an operation on the PHIs within the PHIs' value cycle
+      // that causes a MayAlias.
+      // Pretend the phis do not alias.
+      AliasResult Alias = NoAlias;
+      assert(AliasCache.count(Locs) &&
+             "There must exist an entry for the phi node");
+      AliasResult OrigAliasResult = AliasCache[Locs];
+      AliasCache[Locs] = NoAlias;
+
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         AliasResult ThisAlias =
           aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
                      PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
@@ -1107,7 +1088,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
       }
 
       // Reset if speculation failed.
-      if (ArePhisAssumedNoAlias && Alias != NoAlias)
+      if (Alias != NoAlias)
         AliasCache[Locs] = OrigAliasResult;
 
       return Alias;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 8a660f737c9b..100e5c8ae7dd 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 04a6560262cb..6c5885601fa3 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,14 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 76854000bd23..9b6879a42ed4 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -18,7 +18,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CFGPrinter.h"
-
 #include "llvm/Pass.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index b3a40bee4211..597c767a8e04 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -13,12 +13,10 @@ add_llvm_library(LLVMAnalysis
   CostModel.cpp
   CodeMetrics.cpp
   ConstantFolding.cpp
-  DbgInfoPrinter.cpp
   DependenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
   IVUsers.cpp
-  InlineCost.cpp
   InstCount.cpp
   InstructionSimplify.cpp
   Interval.cpp
@@ -47,6 +45,7 @@ add_llvm_library(LLVMAnalysis
   ProfileVerifierPass.cpp
   ProfileDataLoader.cpp
   ProfileDataLoaderPass.cpp
+  PtrUseVisitor.cpp
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
@@ -55,6 +54,7 @@ add_llvm_library(LLVMAnalysis
   ScalarEvolutionExpander.cpp
   ScalarEvolutionNormalization.cpp
   SparsePropagation.cpp
+  TargetTransformInfo.cpp
   Trace.cpp
   TypeBasedAliasAnalysis.cpp
   ValueTracking.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index d9c02990a801..a7292706dfa8 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -18,7 +18,12 @@
 
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CallSite.h"
+
 using namespace llvm;
 
 CaptureTracker::~CaptureTracker() {}
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 651a54be1b9e..8cda01a24c0d 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -12,121 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Function.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DataLayout.h"
 
 using namespace llvm;
 
-/// callIsSmall - If a call is likely to lower to a single target instruction,
-/// or is otherwise deemed small return true.
-/// TODO: Perhaps calls like memcpy, strcpy, etc?
-bool llvm::callIsSmall(ImmutableCallSite CS) {
-  if (isa<IntrinsicInst>(CS.getInstruction()))
-    return true;
-
-  const Function *F = CS.getCalledFunction();
-  if (!F) return false;
-
-  if (F->hasLocalLinkage()) return false;
-
-  if (!F->hasName()) return false;
-
-  StringRef Name = F->getName();
-
-  // These will all likely lower to a single selection DAG node.
-  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
-      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
-      Name == "sin" || Name == "sinf" || Name == "sinl" ||
-      Name == "cos" || Name == "cosf" || Name == "cosl" ||
-      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
-    return true;
-
-  // These are all likely to be optimized into something smaller.
-  if (Name == "pow" || Name == "powf" || Name == "powl" ||
-      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
-      Name == "floor" || Name == "floorf" || Name == "ceil" ||
-      Name == "round" || Name == "ffs" || Name == "ffsl" ||
-      Name == "abs" || Name == "labs" || Name == "llabs")
-    return true;
-
-  return false;
-}
-
-bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) {
-  if (isa<PHINode>(I))
-    return true;
-
-  // If a GEP has all constant indices, it will probably be folded with
-  // a load/store.
-  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
-    return GEP->hasAllConstantIndices();
-
-  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-    switch (II->getIntrinsicID()) {
-    default:
-      return false;
-    case Intrinsic::dbg_declare:
-    case Intrinsic::dbg_value:
-    case Intrinsic::invariant_start:
-    case Intrinsic::invariant_end:
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-    case Intrinsic::objectsize:
-    case Intrinsic::ptr_annotation:
-    case Intrinsic::var_annotation:
-      // These intrinsics don't count as size.
-      return true;
-    }
-  }
-
-  if (const CastInst *CI = dyn_cast<CastInst>(I)) {
-    // Noop casts, including ptr <-> int,  don't count.
-    if (CI->isLosslessCast())
-      return true;
-
-    Value *Op = CI->getOperand(0);
-    // An inttoptr cast is free so long as the input is a legal integer type
-    // which doesn't contain values outside the range of a pointer.
-    if (isa<IntToPtrInst>(CI) && TD &&
-        TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
-        Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits())
-      return true;
-
-    // A ptrtoint cast is free so long as the result is large enough to store
-    // the pointer, and a legal integer type.
-    if (isa<PtrToIntInst>(CI) && TD &&
-        TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
-        Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits())
-      return true;
-
-    // trunc to a native type is free (assuming the target has compare and
-    // shift-right of the same width).
-    if (TD && isa<TruncInst>(CI) &&
-        TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
-      return true;
-    // Result of a cmp instruction is often extended (to be used by other
-    // cmp instructions, logical or return instructions). These are usually
-    // nop on most sane targets.
-    if (isa<CmpInst>(CI->getOperand(0)))
-      return true;
-  }
-
-  return false;
-}
-
 /// analyzeBasicBlock - Fill in the current structure with information gleaned
 /// from the specified block.
 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
-                                    const DataLayout *TD) {
+                                    const TargetTransformInfo &TTI) {
   ++NumBlocks;
   unsigned NumInstsBeforeThisBB = NumInsts;
   for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
        II != E; ++II) {
-    if (isInstructionFree(II, TD))
-      continue;
-
     // Special handling for calls.
     if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
       ImmutableCallSite CS(cast<Instruction>(II));
@@ -144,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
         // for that case.
         if (F == BB->getParent())
           isRecursive = true;
-      }
-
-      if (!callIsSmall(CS)) {
-        // Each argument to a call takes on average one instruction to set up.
-        NumInsts += CS.arg_size();
 
+        if (TTI.isLoweredToCall(F))
+          ++NumCalls;
+      } else {
         // We don't want inline asm to count as a call - that would prevent loop
         // unrolling. The argument setup cost is still real, though.
         if (!isa<InlineAsm>(CS.getCalledValue()))
@@ -165,7 +64,15 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
     if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
       ++NumVectorInsts;
 
-    ++NumInsts;
+    if (const CallInst *CI = dyn_cast<CallInst>(II))
+      if (CI->hasFnAttr(Attribute::NoDuplicate))
+        notDuplicatable = true;
+
+    if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II))
+      if (InvI->hasFnAttr(Attribute::NoDuplicate))
+        notDuplicatable = true;
+
+    NumInsts += TTI.getUserCost(&*II);
   }
 
   if (isa<ReturnInst>(BB->getTerminator()))
@@ -182,23 +89,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
   // if someone is using a blockaddress without an indirectbr, and that
   // reference somehow ends up in another function or global, we probably
   // don't want to inline this function.
-  if (isa<IndirectBrInst>(BB->getTerminator()))
-    containsIndirectBr = true;
+  notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
 
   // Remember NumInsts for this BB.
   NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
 }
-
-void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) {
-  // If this function contains a call that "returns twice" (e.g., setjmp or
-  // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
-  // This is a hack because we depend on the user marking their local variables
-  // as volatile if they are live across a setjmp call, and they probably
-  // won't do this in callers.
-  exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
-    !F->getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
-
-  // Look at the size of the callee.
-  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    analyzeBasicBlock(&*BB, TD);
-}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 91a5b84e8a63..09d7608c51da 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -9,30 +9,30 @@
 //
 // This file defines routines for folding instructions into constants.
 //
-// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// Also, to supplement the basic IR ConstantExpr simplifications,
 // this file defines some additional folding routines that can make use of
-// DataLayout information. These functions cannot go in VMCore due to library
+// DataLayout information. These functions cannot go in IR due to library
 // dependency issues.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Operator.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FEnv.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/FEnv.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <cerrno>
 #include <cmath>
 using namespace llvm;
@@ -54,13 +54,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
 
   // Handle a vector->integer cast.
   if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
-    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
-    if (CDV == 0)
+    VectorType *VTy = dyn_cast<VectorType>(C->getType());
+    if (VTy == 0)
       return ConstantExpr::getBitCast(C, DestTy);
 
-    unsigned NumSrcElts = CDV->getType()->getNumElements();
-
-    Type *SrcEltTy = CDV->getType()->getElementType();
+    unsigned NumSrcElts = VTy->getNumElements();
+    Type *SrcEltTy = VTy->getElementType();
 
     // If the vector is a vector of floating point, convert it to vector of int
     // to simplify things.
@@ -68,11 +67,14 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
       unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
       Type *SrcIVTy =
         VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
-      // Ask VMCore to do the conversion now that #elts line up.
+      // Ask IR to do the conversion now that #elts line up.
       C = ConstantExpr::getBitCast(C, SrcIVTy);
-      CDV = cast<ConstantDataVector>(C);
     }
 
+    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    if (CDV == 0)
+      return ConstantExpr::getBitCast(C, DestTy);
+
     // Now that we know that the input value is a vector of integers, just shift
     // and insert them into our result.
     unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
@@ -104,7 +106,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
   if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
     return ConstantExpr::getBitCast(C, DestTy);
 
-  // If the element types match, VMCore can fold it.
+  // If the element types match, IR can fold it.
   unsigned NumDstElt = DestVTy->getNumElements();
   unsigned NumSrcElt = C->getType()->getVectorNumElements();
   if (NumDstElt == NumSrcElt)
@@ -131,7 +133,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
     // Recursively handle this integer conversion, if possible.
     C = FoldBitCast(C, DestIVTy, TD);
 
-    // Finally, VMCore can handle this now that #elts line up.
+    // Finally, IR can handle this now that #elts line up.
     return ConstantExpr::getBitCast(C, DestTy);
   }
 
@@ -141,9 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
     unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
     Type *SrcIVTy =
       VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
-    // Ask VMCore to do the conversion now that #elts line up.
+    // Ask IR to do the conversion now that #elts line up.
     C = ConstantExpr::getBitCast(C, SrcIVTy);
-    // If VMCore wasn't able to fold it, bail out.
+    // If IR wasn't able to fold it, bail out.
     if (!isa<ConstantVector>(C) &&  // FIXME: Remove ConstantVector.
         !isa<ConstantDataVector>(C))
       return C;
@@ -218,10 +220,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
 /// from a global, return the global and the constant.  Because of
 /// constantexprs, this function is recursive.
 static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
-                                       int64_t &Offset, const DataLayout &TD) {
+                                       APInt &Offset, const DataLayout &TD) {
   // Trivial case, constant is the global.
   if ((GV = dyn_cast<GlobalValue>(C))) {
-    Offset = 0;
+    Offset.clearAllBits();
     return true;
   }
 
@@ -235,34 +237,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
     return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
 
   // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
-  if (CE->getOpcode() == Instruction::GetElementPtr) {
-    // Cannot compute this if the element type of the pointer is missing size
-    // info.
-    if (!cast<PointerType>(CE->getOperand(0)->getType())
-                 ->getElementType()->isSized())
-      return false;
-
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
     // If the base isn't a global+constant, we aren't either.
     if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
       return false;
 
     // Otherwise, add any offset that our operands provide.
-    gep_type_iterator GTI = gep_type_begin(CE);
-    for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
-         i != e; ++i, ++GTI) {
-      ConstantInt *CI = dyn_cast<ConstantInt>(*i);
-      if (!CI) return false;  // Index isn't a simple constant?
-      if (CI->isZero()) continue;  // Not adding anything.
-
-      if (StructType *ST = dyn_cast<StructType>(*GTI)) {
-        // N = N + Offset
-        Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
-      } else {
-        SequentialType *SQT = cast<SequentialType>(*GTI);
-        Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
-      }
-    }
-    return true;
+    return GEP->accumulateConstantOffset(TD, Offset);
   }
 
   return false;
@@ -310,6 +291,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
       C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
       return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
     }
+    if (CFP->getType()->isHalfTy()){
+      C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+    }
     return false;
   }
 
@@ -402,7 +387,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
     // that address spaces don't matter here since we're not going to result in
     // an actual new load.
     Type *MapTy;
-    if (LoadTy->isFloatTy())
+    if (LoadTy->isHalfTy())
+      MapTy = Type::getInt16PtrTy(C->getContext());
+    else if (LoadTy->isFloatTy())
       MapTy = Type::getInt32PtrTy(C->getContext());
     else if (LoadTy->isDoubleTy())
       MapTy = Type::getInt64PtrTy(C->getContext());
@@ -423,7 +410,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
   if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
 
   GlobalValue *GVal;
-  int64_t Offset;
+  APInt Offset(TD.getPointerSizeInBits(), 0);
   if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
     return 0;
 
@@ -434,14 +421,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
 
   // If we're loading off the beginning of the global, some bytes may be valid,
   // but we don't try to handle this.
-  if (Offset < 0) return 0;
+  if (Offset.isNegative()) return 0;
 
   // If we're not accessing anything in this constant, the result is undefined.
-  if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
+  if (Offset.getZExtValue() >=
+      TD.getTypeAllocSize(GV->getInitializer()->getType()))
     return UndefValue::get(IntType);
 
   unsigned char RawBytes[32] = {0};
-  if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
+  if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
                           BytesLoaded, TD))
     return 0;
 
@@ -550,10 +538,10 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
 
 /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
 /// Attempt to symbolically evaluate the result of a binary operator merging
-/// these together.  If target data info is available, it is provided as TD,
-/// otherwise TD is null.
+/// these together.  If target data info is available, it is provided as DL,
+/// otherwise DL is null.
 static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
-                                           Constant *Op1, const DataLayout *TD){
+                                           Constant *Op1, const DataLayout *DL){
   // SROA
 
   // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
@@ -561,17 +549,44 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
   // bits.
 
 
+  if (Opc == Instruction::And && DL) {
+    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+    APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
+    APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
+    ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
+    ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL);
+    if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
+      // All the bits of Op0 that the 'and' could be masking are already zero.
+      return Op0;
+    }
+    if ((KnownOne0 | KnownZero1).isAllOnesValue()) {
+      // All the bits of Op1 that the 'and' could be masking are already zero.
+      return Op1;
+    }
+
+    APInt KnownZero = KnownZero0 | KnownZero1;
+    APInt KnownOne = KnownOne0 & KnownOne1;
+    if ((KnownZero | KnownOne).isAllOnesValue()) {
+      return ConstantInt::get(Op0->getType(), KnownOne);
+    }
+  }
+
   // If the constant expr is something like &A[123] - &A[4].f, fold this into a
   // constant.  This happens frequently when iterating over a global array.
-  if (Opc == Instruction::Sub && TD) {
+  if (Opc == Instruction::Sub && DL) {
     GlobalValue *GV1, *GV2;
-    int64_t Offs1, Offs2;
+    unsigned PtrSize = DL->getPointerSizeInBits();
+    unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+    APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
 
-    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
-      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
+    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
+      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
           GV1 == GV2) {
         // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
-        return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+        // PtrToInt may change the bitwidth so we have convert to the right size
+        // first.
+        return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
+                                                Offs2.zextOrTrunc(OpSize));
       }
   }
 
@@ -1104,6 +1119,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 bool
 llvm::canConstantFoldCallTo(const Function *F) {
   switch (F->getIntrinsicID()) {
+  case Intrinsic::fabs:
+  case Intrinsic::log:
+  case Intrinsic::log2:
+  case Intrinsic::log10:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::floor:
   case Intrinsic::sqrt:
   case Intrinsic::pow:
   case Intrinsic::powi:
@@ -1142,8 +1164,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
   switch (Name[0]) {
   default: return false;
   case 'a':
-    return Name == "acos" || Name == "asin" ||
-      Name == "atan" || Name == "atan2";
+    return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2";
   case 'c':
     return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
   case 'e':
@@ -1171,11 +1192,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
     return 0;
   }
 
+  if (Ty->isHalfTy()) {
+    APFloat APF(V);
+    bool unused;
+    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    return ConstantFP::get(Ty->getContext(), APF);
+  }
   if (Ty->isFloatTy())
     return ConstantFP::get(Ty->getContext(), APFloat((float)V));
   if (Ty->isDoubleTy())
     return ConstantFP::get(Ty->getContext(), APFloat(V));
-  llvm_unreachable("Can only constant fold float/double");
+  llvm_unreachable("Can only constant fold half/float/double");
 }
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
@@ -1187,11 +1214,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
     return 0;
   }
 
+  if (Ty->isHalfTy()) {
+    APFloat APF(V);
+    bool unused;
+    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    return ConstantFP::get(Ty->getContext(), APF);
+  }
   if (Ty->isFloatTy())
     return ConstantFP::get(Ty->getContext(), APFloat((float)V));
   if (Ty->isDoubleTy())
     return ConstantFP::get(Ty->getContext(), APFloat(V));
-  llvm_unreachable("Can only constant fold float/double");
+  llvm_unreachable("Can only constant fold half/float/double");
 }
 
 /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
@@ -1243,7 +1276,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
       if (!TLI)
         return 0;
 
-      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
 
       /// We only fold functions with finite arguments. Folding NaN and inf is
@@ -1256,8 +1289,46 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
       /// the host native double versions.  Float versions are not called
       /// directly but for all these it is true (float)(f((double)arg)) ==
       /// f(arg).  Long double not supported yet.
-      double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
-                                     Op->getValueAPF().convertToDouble();
+      double V;
+      if (Ty->isFloatTy())
+        V = Op->getValueAPF().convertToFloat();
+      else if (Ty->isDoubleTy())
+        V = Op->getValueAPF().convertToDouble();
+      else {
+        bool unused;
+        APFloat APF = Op->getValueAPF();
+        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+        V = APF.convertToDouble();
+      }
+
+      switch (F->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::fabs:
+          return ConstantFoldFP(fabs, V, Ty);
+#if HAVE_LOG2
+        case Intrinsic::log2:
+          return ConstantFoldFP(log2, V, Ty);
+#endif
+#if HAVE_LOG
+        case Intrinsic::log:
+          return ConstantFoldFP(log, V, Ty);
+#endif
+#if HAVE_LOG10
+        case Intrinsic::log10:
+          return ConstantFoldFP(log10, V, Ty);
+#endif
+#if HAVE_EXP
+        case Intrinsic::exp:
+          return ConstantFoldFP(exp, V, Ty);
+#endif
+#if HAVE_EXP2
+        case Intrinsic::exp2:
+          return ConstantFoldFP(exp2, V, Ty);
+#endif
+        case Intrinsic::floor:
+          return ConstantFoldFP(floor, V, Ty);
+      }
+
       switch (Name[0]) {
       case 'a':
         if (Name == "acos" && TLI->has(LibFunc::acos))
@@ -1299,7 +1370,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
         else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
           return ConstantFoldFP(log10, V, Ty);
         else if (F->getIntrinsicID() == Intrinsic::sqrt &&
-                 (Ty->isFloatTy() || Ty->isDoubleTy())) {
+                 (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
           if (V >= -0.0)
             return ConstantFoldFP(sqrt, V, Ty);
           else // Undefined
@@ -1337,7 +1408,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
       case Intrinsic::ctpop:
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
       case Intrinsic::convert_from_fp16: {
-        APFloat Val(Op->getValue());
+        APFloat Val(APFloat::IEEEhalf, Op->getValue());
 
         bool lost = false;
         APFloat::opStatus status =
@@ -1391,18 +1462,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 
   if (Operands.size() == 2) {
     if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
-      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
-      double Op1V = Ty->isFloatTy() ?
-                      (double)Op1->getValueAPF().convertToFloat() :
-                      Op1->getValueAPF().convertToDouble();
+      double Op1V;
+      if (Ty->isFloatTy())
+        Op1V = Op1->getValueAPF().convertToFloat();
+      else if (Ty->isDoubleTy())
+        Op1V = Op1->getValueAPF().convertToDouble();
+      else {
+        bool unused;
+        APFloat APF = Op1->getValueAPF();
+        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+        Op1V = APF.convertToDouble();
+      }
+
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
         if (Op2->getType() != Op1->getType())
           return 0;
 
-        double Op2V = Ty->isFloatTy() ?
-                      (double)Op2->getValueAPF().convertToFloat():
-                      Op2->getValueAPF().convertToDouble();
+        double Op2V;
+        if (Ty->isFloatTy())
+          Op2V = Op2->getValueAPF().convertToFloat();
+        else if (Ty->isDoubleTy())
+          Op2V = Op2->getValueAPF().convertToDouble();
+        else {
+          bool unused;
+          APFloat APF = Op2->getValueAPF();
+          APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+          Op2V = APF.convertToDouble();
+        }
 
         if (F->getIntrinsicID() == Intrinsic::pow) {
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
@@ -1416,6 +1504,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
         if (Name == "atan2" && TLI->has(LibFunc::atan2))
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy())
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
+                                                 (int)Op2C->getZExtValue())));
         if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
           return ConstantFP::get(F->getContext(),
                                  APFloat((float)std::pow((float)Op1V,
@@ -1468,12 +1560,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
           return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
         }
         case Intrinsic::cttz:
-          // FIXME: This should check for Op2 == 1, and become unreachable if
-          // Op1 == 0.
+          if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
+            return UndefValue::get(Ty);
           return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
         case Intrinsic::ctlz:
-          // FIXME: This should check for Op2 == 1, and become unreachable if
-          // Op1 == 0.
+          if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
+            return UndefValue::get(Ty);
           return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
         }
       }
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 5adbf458104e..98a7780ad9a6 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -8,20 +8,24 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines the cost model analysis. It provides a very basic cost
-// estimation for LLVM-IR. The cost result can be thought of as cycles, but it
-// is really unit-less. The estimated cost is ment to be used for comparing
-// alternatives.
+// estimation for LLVM-IR. This analysis uses the services of the codegen
+// to approximate the cost of any IR instruction when lowered to machine
+// instructions. The cost results are unit-less and the cost number represents
+// the throughput of the machine assuming that all loads hit the cache, all
+// branches are predicted, etc. The cost numbers can be added in order to
+// compare two or more transformation alternatives.
 //
 //===----------------------------------------------------------------------===//
 
 #define CM_NAME "cost-model"
 #define DEBUG_TYPE CM_NAME
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/Value.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -31,7 +35,7 @@ namespace {
 
   public:
     static char ID; // Class identification, replacement for typeinfo
-    CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) {
+    CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) {
       initializeCostModelAnalysisPass(
         *PassRegistry::getPassRegistry());
     }
@@ -40,7 +44,7 @@ namespace {
     /// Returns -1 if the cost is unknown.
     /// Note, this method does not cache the cost calculation and it
     /// can be expensive in some cases.
-    unsigned getInstructionCost(Instruction *I) const;
+    unsigned getInstructionCost(const Instruction *I) const;
 
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -49,8 +53,8 @@ namespace {
 
     /// The function that we analyze.
     Function *F;
-    /// Vector target information.
-    const VectorTargetTransformInfo *VTTI;
+    /// Target information.
+    const TargetTransformInfo *TTI;
   };
 }  // End of anonymous namespace
 
@@ -72,25 +76,49 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 CostModelAnalysis::runOnFunction(Function &F) {
  this->F = &F;
-
- // Target information.
- TargetTransformInfo *TTI;
  TTI = getAnalysisIfAvailable<TargetTransformInfo>();
- if (TTI)
-   VTTI = TTI->getVectorTargetTransformInfo();
 
  return false;
 }
 
-unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
-  if (!VTTI)
+static bool isReverseVectorMask(SmallVector<int, 16> &Mask) {
+  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
+    if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
+      return false;
+  return true;
+}
+
+static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
+  TargetTransformInfo::OperandValueKind OpInfo =
+    TargetTransformInfo::OK_AnyValue;
+
+  // Check for a splat of a constant.
+  ConstantDataVector *CDV = 0;
+  if ((CDV = dyn_cast<ConstantDataVector>(V)))
+    if (CDV->getSplatValue() != NULL)
+      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+  ConstantVector *CV = 0;
+  if ((CV = dyn_cast<ConstantVector>(V)))
+    if (CV->getSplatValue() != NULL)
+      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+
+  return OpInfo;
+}
+
+unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
+  if (!TTI)
     return -1;
 
   switch (I->getOpcode()) {
+  case Instruction::GetElementPtr:{
+    Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
+    return TTI->getAddressComputationCost(ValTy);
+  }
+
   case Instruction::Ret:
   case Instruction::PHI:
   case Instruction::Br: {
-    return VTTI->getCFInstrCost(I->getOpcode());
+    return TTI->getCFInstrCost(I->getOpcode());
   }
   case Instruction::Add:
   case Instruction::FAdd:
@@ -110,28 +138,33 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType());
+    TargetTransformInfo::OperandValueKind Op1VK =
+      getOperandInfo(I->getOperand(0));
+    TargetTransformInfo::OperandValueKind Op2VK =
+      getOperandInfo(I->getOperand(1));
+    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+                                       Op2VK);
   }
   case Instruction::Select: {
-    SelectInst *SI = cast<SelectInst>(I);
+    const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
-    return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
   }
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
-    return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
   }
   case Instruction::Store: {
-    StoreInst *SI = cast<StoreInst>(I);
+    const StoreInst *SI = cast<StoreInst>(I);
     Type *ValTy = SI->getValueOperand()->getType();
-    return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+    return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
                                  SI->getAlignment(),
                                  SI->getPointerAddressSpace());
   }
   case Instruction::Load: {
-    LoadInst *LI = cast<LoadInst>(I);
-    return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
+    const LoadInst *LI = cast<LoadInst>(I);
+    return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
                                  LI->getAlignment(),
                                  LI->getPointerAddressSpace());
   }
@@ -148,26 +181,47 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
   case Instruction::FPTrunc:
   case Instruction::BitCast: {
     Type *SrcTy = I->getOperand(0)->getType();
-    return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
+    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
   }
   case Instruction::ExtractElement: {
-    ExtractElementInst * EEI = cast<ExtractElementInst>(I);
+    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
     unsigned Idx = -1;
     if (CI)
       Idx = CI->getZExtValue();
-    return VTTI->getVectorInstrCost(I->getOpcode(),
-                                    EEI->getOperand(0)->getType(), Idx);
+    return TTI->getVectorInstrCost(I->getOpcode(),
+                                   EEI->getOperand(0)->getType(), Idx);
   }
   case Instruction::InsertElement: {
-      InsertElementInst * IE = cast<InsertElementInst>(I);
+      const InsertElementInst * IE = cast<InsertElementInst>(I);
       ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
       unsigned Idx = -1;
       if (CI)
         Idx = CI->getZExtValue();
-      return VTTI->getVectorInstrCost(I->getOpcode(),
-                                      IE->getType(), Idx);
+      return TTI->getVectorInstrCost(I->getOpcode(),
+                                     IE->getType(), Idx);
     }
+  case Instruction::ShuffleVector: {
+    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
+    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
+    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+
+    if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
+      return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
+                                 0);
+    return -1;
+  }
+  case Instruction::Call:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      SmallVector<Type*, 4> Tys;
+      for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
+        Tys.push_back(II->getArgOperand(J)->getType());
+
+      return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
+                                        Tys);
+    }
+    return -1;
   default:
     // We don't have any information on this instruction.
     return -1;
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
deleted file mode 100644
index 41cd34c07be0..000000000000
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that prints instructions, and associated debug
-// info:
-// 
-//   - source/line/col information
-//   - original variable name
-//   - original type name
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static cl::opt<bool>
-PrintDirectory("print-fullpath",
-               cl::desc("Print fullpath when printing debug info"),
-               cl::Hidden);
-
-namespace {
-  class PrintDbgInfo : public FunctionPass {
-    raw_ostream &Out;
-    void printVariableDeclaration(const Value *V);
-  public:
-    static char ID; // Pass identification
-    PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
-      initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual bool runOnFunction(Function &F);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-  };
-  char PrintDbgInfo::ID = 0;
-}
-
-INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
-                "Print debug info in human readable form", false, false)
-
-FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
-
-/// Find the debug info descriptor corresponding to this global variable.
-static Value *findDbgGlobalDeclare(GlobalVariable *V) {
-  const Module *M = V->getParent();
-  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
-  if (!NMD)
-    return 0;
-
-  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
-    if (!DIG.isGlobalVariable())
-      continue;
-    if (DIGlobalVariable(DIG).getGlobal() == V)
-      return DIG;
-  }
-  return 0;
-}
-
-/// Find the debug info descriptor corresponding to this function.
-static Value *findDbgSubprogramDeclare(Function *V) {
-  const Module *M = V->getParent();
-  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
-  if (!NMD)
-    return 0;
-
-  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
-    if (!DIG.isSubprogram())
-      continue;
-    if (DISubprogram(DIG).getFunction() == V)
-      return DIG;
-  }
-  return 0;
-}
-
-/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
-/// It looks through pointer casts too.
-static const DbgDeclareInst *findDbgDeclare(const Value *V) {
-  V = V->stripPointerCasts();
-
-  if (!isa<Instruction>(V) && !isa<Argument>(V))
-    return 0;
-
-  const Function *F = NULL;
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    F = I->getParent()->getParent();
-  else if (const Argument *A = dyn_cast<Argument>(V))
-    F = A->getParent();
-
-  for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
-    for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
-         BI != BE; ++BI)
-      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
-        if (DDI->getAddress() == V)
-          return DDI;
-
-  return 0;
-}
-
-static bool getLocationInfo(const Value *V, std::string &DisplayName,
-                            std::string &Type, unsigned &LineNo,
-                            std::string &File, std::string &Dir) {
-  DICompileUnit Unit;
-  DIType TypeD;
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
-    Value *DIGV = findDbgGlobalDeclare(GV);
-    if (!DIGV) return false;
-    DIGlobalVariable Var(cast<MDNode>(DIGV));
-
-    StringRef D = Var.getDisplayName();
-    if (!D.empty())
-      DisplayName = D;
-    LineNo = Var.getLineNumber();
-    Unit = Var.getCompileUnit();
-    TypeD = Var.getType();
-  } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
-    Value *DIF = findDbgSubprogramDeclare(F);
-    if (!DIF) return false;
-    DISubprogram Var(cast<MDNode>(DIF));
-
-    StringRef D = Var.getDisplayName();
-    if (!D.empty())
-      DisplayName = D;
-    LineNo = Var.getLineNumber();
-    Unit = Var.getCompileUnit();
-    TypeD = Var.getType();
-  } else {
-    const DbgDeclareInst *DDI = findDbgDeclare(V);
-    if (!DDI) return false;
-    DIVariable Var(cast<MDNode>(DDI->getVariable()));
-
-    StringRef D = Var.getName();
-    if (!D.empty())
-      DisplayName = D;
-    LineNo = Var.getLineNumber();
-    Unit = Var.getCompileUnit();
-    TypeD = Var.getType();
-  }
-
-  StringRef T = TypeD.getName();
-  if (!T.empty())
-    Type = T;
-  StringRef F = Unit.getFilename();
-  if (!F.empty())
-    File = F;
-  StringRef D = Unit.getDirectory();
-  if (!D.empty())
-    Dir = D;
-  return true;
-}
-
-void PrintDbgInfo::printVariableDeclaration(const Value *V) {
-  std::string DisplayName, File, Directory, Type;
-  unsigned LineNo = 0;
-
-  if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
-    return;
-
-  Out << "; ";
-  WriteAsOperand(Out, V, false, 0);
-  if (isa<Function>(V)) 
-    Out << " is function " << DisplayName
-        << " of type " << Type << " declared at ";
-  else
-    Out << " is variable " << DisplayName
-        << " of type " << Type << " declared at ";
-
-  if (PrintDirectory)
-    Out << Directory << "/";
-
-  Out << File << ":" << LineNo << "\n";
-}
-
-bool PrintDbgInfo::runOnFunction(Function &F) {
-  if (F.isDeclaration())
-    return false;
-
-  Out << "function " << F.getName() << "\n\n";
-
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *BB = I;
-
-    if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
-      // Skip dead blocks.
-      continue;
-
-    Out << BB->getName();
-    Out << ":";
-
-    Out << "\n";
-
-    for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
-         i != e; ++i) {
-
-        printVariableDeclaration(i);
-
-        if (const User *U = dyn_cast<User>(i)) {
-          for(unsigned i=0;i<U->getNumOperands();i++)
-            printVariableDeclaration(U->getOperand(i));
-        }
-    }
-  }
-  return false;
-}
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 95ac5ea233b1..cbc71bd6e739 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -55,12 +55,12 @@
 
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
@@ -145,22 +145,20 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 
 
 // Used to test the dependence analyzer.
-// Looks through the function, noting the first store instruction
-// and the first load instruction
-// (which always follows the first load in our tests).
-// Calls depends() and prints out the result.
+// Looks through the function, noting loads and stores.
+// Calls depends() on every possible pair and prints out the result.
 // Ignores all other instructions.
 static
 void dumpExampleDependence(raw_ostream &OS, Function *F,
                            DependenceAnalysis *DA) {
   for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
        SrcI != SrcE; ++SrcI) {
-    if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) {
+    if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
       for (inst_iterator DstI = SrcI, DstE = inst_end(F);
            DstI != DstE; ++DstI) {
-        if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) {
+        if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
           OS << "da analyze - ";
-          if (Dependence *D = DA->depends(Src, Dst, true)) {
+          if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
             D->dump(OS);
             for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
               if (D->isSplitable(Level)) {
@@ -173,7 +171,6 @@ void dumpExampleDependence(raw_ostream &OS, Function *F,
           }
           else
             OS << "none!\n";
-          return;
         }
       }
     }
@@ -224,8 +221,8 @@ bool Dependence::isScalar(unsigned level) const {
 //===----------------------------------------------------------------------===//
 // FullDependence methods
 
-FullDependence::FullDependence(const Instruction *Source,
-                               const Instruction *Destination,
+FullDependence::FullDependence(Instruction *Source,
+                               Instruction *Destination,
                                bool PossiblyLoopIndependent,
                                unsigned CommonLevels) :
   Dependence(Source, Destination),
@@ -586,42 +583,40 @@ void Dependence::dump(raw_ostream &OS) const {
     else if (isInput())
       OS << "input";
     unsigned Levels = getLevels();
-    if (Levels) {
-      OS << " [";
-      for (unsigned II = 1; II <= Levels; ++II) {
-        if (isSplitable(II))
-          Splitable = true;
-        if (isPeelFirst(II))
-          OS << 'p';
-        const SCEV *Distance = getDistance(II);
-        if (Distance)
-          OS << *Distance;
-        else if (isScalar(II))
-          OS << "S";
+    OS << " [";
+    for (unsigned II = 1; II <= Levels; ++II) {
+      if (isSplitable(II))
+        Splitable = true;
+      if (isPeelFirst(II))
+        OS << 'p';
+      const SCEV *Distance = getDistance(II);
+      if (Distance)
+        OS << *Distance;
+      else if (isScalar(II))
+        OS << "S";
+      else {
+        unsigned Direction = getDirection(II);
+        if (Direction == DVEntry::ALL)
+          OS << "*";
         else {
-          unsigned Direction = getDirection(II);
-          if (Direction == DVEntry::ALL)
-            OS << "*";
-          else {
-            if (Direction & DVEntry::LT)
-              OS << "<";
-            if (Direction & DVEntry::EQ)
-              OS << "=";
-            if (Direction & DVEntry::GT)
-              OS << ">";
-          }
+          if (Direction & DVEntry::LT)
+            OS << "<";
+          if (Direction & DVEntry::EQ)
+            OS << "=";
+          if (Direction & DVEntry::GT)
+            OS << ">";
         }
-        if (isPeelLast(II))
-          OS << 'p';
-        if (II < Levels)
-          OS << " ";
       }
-      if (isLoopIndependent())
-        OS << "|<";
-      OS << "]";
-      if (Splitable)
-        OS << " splitable";
+      if (isPeelLast(II))
+        OS << 'p';
+      if (II < Levels)
+        OS << " ";
     }
+    if (isLoopIndependent())
+      OS << "|<";
+    OS << "]";
+    if (Splitable)
+      OS << " splitable";
   }
   OS << "!\n";
 }
@@ -652,10 +647,10 @@ bool isLoadOrStore(const Instruction *I) {
 
 
 static
-const Value *getPointerOperand(const Instruction *I) {
-  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+Value *getPointerOperand(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
     return LI->getPointerOperand();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getPointerOperand();
   llvm_unreachable("Value is not load or store instruction");
   return 0;
@@ -2215,13 +2210,13 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
 //
 // It occurs to me that the presence of loop-invariant variables
 // changes the nature of the test from "greatest common divisor"
-// to "a common divisor!"
+// to "a common divisor".
 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
                                     const SCEV *Dst,
                                     FullDependence &Result) const {
   DEBUG(dbgs() << "starting gcd\n");
   ++GCDapplications;
-  unsigned BitWidth = Src->getType()->getIntegerBitWidth();
+  unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
   APInt RunningGCD = APInt::getNullValue(BitWidth);
 
   // Examine Src coefficients.
@@ -3197,42 +3192,42 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
 //            Goff, Kennedy, Tseng
 //            PLDI 1991
 //
-// Care is required to keep the code below up to date w.r.t. this routine.
-Dependence *DependenceAnalysis::depends(const Instruction *Src,
-                                        const Instruction *Dst,
+// Care is required to keep the routine below, getSplitIteration(),
+// up to date with respect to this routine.
+Dependence *DependenceAnalysis::depends(Instruction *Src,
+                                        Instruction *Dst,
                                         bool PossiblyLoopIndependent) {
+  if (Src == Dst)
+    PossiblyLoopIndependent = false;
+
   if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
       (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
     // if both instructions don't reference memory, there's no dependence
     return NULL;
 
-  if (!isLoadOrStore(Src) || !isLoadOrStore(Dst))
+  if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
+    DEBUG(dbgs() << "can only handle simple loads and stores\n");
     return new Dependence(Src, Dst);
+  }
 
-  const Value *SrcPtr = getPointerOperand(Src);
-  const Value *DstPtr = getPointerOperand(Dst);
+  Value *SrcPtr = getPointerOperand(Src);
+  Value *DstPtr = getPointerOperand(Dst);
 
   switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
   case AliasAnalysis::MayAlias:
   case AliasAnalysis::PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
+    DEBUG(dbgs() << "can't analyze may or partial alias\n");
     return new Dependence(Src, Dst);
   case AliasAnalysis::NoAlias:
     // If the objects noalias, they are distinct, accesses are independent.
+    DEBUG(dbgs() << "no alias\n");
     return NULL;
   case AliasAnalysis::MustAlias:
     break; // The underlying objects alias; test accesses for dependence.
   }
 
-  const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
-  const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
-  if (!SrcGEP || !DstGEP)
-    return new Dependence(Src, Dst); // missing GEP, assume dependence
-
-  if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType())
-    return new Dependence(Src, Dst); // different types, assume dependence
-
   // establish loop nesting levels
   establishNestingLevels(Src, Dst);
   DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
@@ -3241,36 +3236,62 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
   FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
   ++TotalArrayPairs;
 
-  // classify subscript pairs
-  unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+  // See if there are GEPs we can use.
+  bool UsefulGEP = false;
+  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+  if (SrcGEP && DstGEP &&
+      SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+    DEBUG(dbgs() << "    SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
+    DEBUG(dbgs() << "    DstPtrSCEV = " << *DstPtrSCEV << "\n");
+
+    UsefulGEP =
+      isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+      isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+  }
+  unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   SmallVector<Subscript, 4> Pair(Pairs);
-  for (unsigned SI = 0; SI < Pairs; ++SI) {
-    Pair[SI].Loops.resize(MaxLevels + 1);
-    Pair[SI].GroupLoops.resize(MaxLevels + 1);
-    Pair[SI].Group.resize(Pairs);
-  }
-  Pairs = 0;
-  for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
-         SrcEnd = SrcGEP->idx_end(),
-         DstIdx = DstGEP->idx_begin(),
-         DstEnd = DstGEP->idx_end();
-       SrcIdx != SrcEnd && DstIdx != DstEnd;
-       ++SrcIdx, ++DstIdx, ++Pairs) {
-    Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
-    Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
-    removeMatchingExtensions(&Pair[Pairs]);
-    Pair[Pairs].Classification =
-      classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
-                   Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
-                   Pair[Pairs].Loops);
-    Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
-    Pair[Pairs].Group.set(Pairs);
-    DEBUG(dbgs() << "    subscript " << Pairs << "\n");
-    DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n");
-    DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n");
-    DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n");
+  if (UsefulGEP) {
+    DEBUG(dbgs() << "    using GEPs\n");
+    unsigned P = 0;
+    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+           SrcEnd = SrcGEP->idx_end(),
+           DstIdx = DstGEP->idx_begin();
+         SrcIdx != SrcEnd;
+         ++SrcIdx, ++DstIdx, ++P) {
+      Pair[P].Src = SE->getSCEV(*SrcIdx);
+      Pair[P].Dst = SE->getSCEV(*DstIdx);
+    }
+  }
+  else {
+    DEBUG(dbgs() << "    ignoring GEPs\n");
+    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+    DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
+    DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
+    Pair[0].Src = SrcSCEV;
+    Pair[0].Dst = DstSCEV;
+  }
+
+  for (unsigned P = 0; P < Pairs; ++P) {
+    Pair[P].Loops.resize(MaxLevels + 1);
+    Pair[P].GroupLoops.resize(MaxLevels + 1);
+    Pair[P].Group.resize(Pairs);
+    removeMatchingExtensions(&Pair[P]);
+    Pair[P].Classification =
+      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+                   Pair[P].Loops);
+    Pair[P].GroupLoops = Pair[P].Loops;
+    Pair[P].Group.set(P);
+    DEBUG(dbgs() << "    subscript " << P << "\n");
+    DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
+    DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
+    DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
     DEBUG(dbgs() << "\tloops = ");
-    DEBUG(dumpSmallBitVector(Pair[Pairs].Loops));
+    DEBUG(dumpSmallBitVector(Pair[P].Loops));
   }
 
   SmallBitVector Separable(Pairs);
@@ -3532,7 +3553,7 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
     }
   }
 
-  // make sure Scalar flags are set correctly
+  // Make sure the Scalar flags are set correctly.
   SmallBitVector CompleteLoops(MaxLevels + 1);
   for (unsigned SI = 0; SI < Pairs; ++SI)
     CompleteLoops |= Pair[SI].Loops;
@@ -3540,8 +3561,10 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
     if (CompleteLoops[II])
       Result.DV[II - 1].Scalar = false;
 
-  // make sure loopIndepent flag is set correctly
   if (PossiblyLoopIndependent) {
+    // Make sure the LoopIndependent flag is set correctly.
+    // All directions must include equal, otherwise no
+    // loop-independent dependence is possible.
     for (unsigned II = 1; II <= CommonLevels; ++II) {
       if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
         Result.LoopIndependent = false;
@@ -3549,6 +3572,19 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
       }
     }
   }
+  else {
+    // On the other hand, if all directions are equal and there's no
+    // loop-independent dependence possible, then no dependence exists.
+    bool AllEqual = true;
+    for (unsigned II = 1; II <= CommonLevels; ++II) {
+      if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
+        AllEqual = false;
+        break;
+      }
+    }
+    if (AllEqual)
+      return NULL;
+  }
 
   FullDependence *Final = new FullDependence(Result);
   Result.DV = NULL;
@@ -3565,7 +3601,8 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
 // though simplified since we know that the dependence exists.
 // It's tedious, since we must go through all propagations, etc.
 //
-// Care is required to keep this code up to date w.r.t. the code above.
+// Care is required to keep this code up to date with respect to the routine
+// above, depends().
 //
 // Generally, the dependence analyzer will be used to build
 // a dependence graph for a function (basically a map from instructions
@@ -3608,50 +3645,65 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
   assert(Dep && "expected a pointer to a Dependence");
   assert(Dep->isSplitable(SplitLevel) &&
          "Dep should be splitable at SplitLevel");
-  const Instruction *Src = Dep->getSrc();
-  const Instruction *Dst = Dep->getDst();
+  Instruction *Src = Dep->getSrc();
+  Instruction *Dst = Dep->getDst();
   assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
   assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
   assert(isLoadOrStore(Src));
   assert(isLoadOrStore(Dst));
-  const Value *SrcPtr = getPointerOperand(Src);
-  const Value *DstPtr = getPointerOperand(Dst);
+  Value *SrcPtr = getPointerOperand(Src);
+  Value *DstPtr = getPointerOperand(Dst);
   assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
          AliasAnalysis::MustAlias);
-  const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
-  const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
-  assert(SrcGEP);
-  assert(DstGEP);
-  assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType());
 
   // establish loop nesting levels
   establishNestingLevels(Src, Dst);
 
   FullDependence Result(Src, Dst, false, CommonLevels);
 
-  // classify subscript pairs
-  unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+  // See if there are GEPs we can use.
+  bool UsefulGEP = false;
+  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+  if (SrcGEP && DstGEP &&
+      SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+    UsefulGEP =
+      isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+      isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+  }
+  unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
   SmallVector<Subscript, 4> Pair(Pairs);
-  for (unsigned SI = 0; SI < Pairs; ++SI) {
-    Pair[SI].Loops.resize(MaxLevels + 1);
-    Pair[SI].GroupLoops.resize(MaxLevels + 1);
-    Pair[SI].Group.resize(Pairs);
-  }
-  Pairs = 0;
-  for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
-         SrcEnd = SrcGEP->idx_end(),
-         DstIdx = DstGEP->idx_begin(),
-         DstEnd = DstGEP->idx_end();
-       SrcIdx != SrcEnd && DstIdx != DstEnd;
-       ++SrcIdx, ++DstIdx, ++Pairs) {
-    Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
-    Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
-    Pair[Pairs].Classification =
-      classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
-                   Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
-                   Pair[Pairs].Loops);
-    Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
-    Pair[Pairs].Group.set(Pairs);
+  if (UsefulGEP) {
+    unsigned P = 0;
+    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+           SrcEnd = SrcGEP->idx_end(),
+           DstIdx = DstGEP->idx_begin();
+         SrcIdx != SrcEnd;
+         ++SrcIdx, ++DstIdx, ++P) {
+      Pair[P].Src = SE->getSCEV(*SrcIdx);
+      Pair[P].Dst = SE->getSCEV(*DstIdx);
+    }
+  }
+  else {
+    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+    Pair[0].Src = SrcSCEV;
+    Pair[0].Dst = DstSCEV;
+  }
+
+  for (unsigned P = 0; P < Pairs; ++P) {
+    Pair[P].Loops.resize(MaxLevels + 1);
+    Pair[P].GroupLoops.resize(MaxLevels + 1);
+    Pair[P].Group.resize(Pairs);
+    removeMatchingExtensions(&Pair[P]);
+    Pair[P].Classification =
+      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+                   Pair[P].Loops);
+    Pair[P].GroupLoops = Pair[P].Loops;
+    Pair[P].Group.set(P);
   }
 
   SmallBitVector Separable(Pairs);
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 3e537e9f1a36..7e4a89f1bd57 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DominanceFrontier.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 34d6d1bdd421..67b413577980 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -1,9 +1,11 @@
 add_llvm_library(LLVMipa
   CallGraph.cpp
   CallGraphSCCPass.cpp
+  CallPrinter.cpp
   FindUsedTypes.cpp
   GlobalsModRef.cpp
   IPA.cpp
+  InlineCost.cpp
   )
 
 add_dependencies(LLVMipa intrinsics_gen)
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index dec0eced2786..7620fd9842cc 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 449b7ee87b1c..a0d788f34a3c 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -16,13 +16,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "cgscc-passmgr"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/PassManagers.h"
-#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/PassManagers.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
@@ -51,6 +51,9 @@ public:
   /// whether any of the passes modifies the module, and if so, return true.
   bool runOnModule(Module &M);
 
+  using ModulePass::doInitialization;
+  using ModulePass::doFinalization;
+
   bool doInitialization(CallGraph &CG);
   bool doFinalization(CallGraph &CG);
 
diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp
new file mode 100644
index 000000000000..306ae7a4dbfb
--- /dev/null
+++ b/lib/Analysis/IPA/CallPrinter.cpp
@@ -0,0 +1,87 @@
+//===- CallPrinter.cpp - DOT printer for call graph -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-callgraph', which emit a callgraph.<fnname>.dot
+// containing the call graph of a module.
+//
+// There is also a pass available to directly call dotty ('-view-callgraph').
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+template<>
+struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(CallGraph *Graph) {
+    return "Call graph";
+  }
+
+  std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
+    if (Function *Func = Node->getFunction())
+      return Func->getName();
+
+    return "external node";
+  }
+};
+
+} // end llvm namespace
+
+namespace {
+
+struct CallGraphViewer
+  : public DOTGraphTraitsModuleViewer<CallGraph, true> {
+  static char ID;
+
+  CallGraphViewer()
+    : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) {
+    initializeCallGraphViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct CallGraphPrinter
+  : public DOTGraphTraitsModulePrinter<CallGraph, true> {
+  static char ID;
+
+  CallGraphPrinter()
+    : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) {
+      initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+} // end anonymous namespace
+
+char CallGraphViewer::ID = 0;
+INITIALIZE_PASS(CallGraphViewer, "view-callgraph",
+                "View call graph",
+                false, false)
+
+char CallGraphPrinter::ID = 0;
+INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph",
+                "Print call graph to 'dot' file",
+                false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+ModulePass *llvm::createCallGraphViewerPass() {
+  return new CallGraphViewer();
+}
+
+ModulePass *llvm::createCallGraphPrinterPass() {
+  return new CallGraphPrinter();
+}
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index e9df3ca01022..1c4f17d3819a 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -14,10 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 990caa80c8d2..92d0d2318e0d 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -16,20 +16,20 @@
 
 #define DEBUG_TYPE "globalsmodref-aa"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SCCIterator.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index 0ba2e04c6302..aa5164e9e79b 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -20,6 +20,8 @@ using namespace llvm;
 void llvm::initializeIPA(PassRegistry &Registry) {
   initializeBasicCallGraphPass(Registry);
   initializeCallGraphAnalysisGroup(Registry);
+  initializeCallGraphPrinterPass(Registry);
+  initializeCallGraphViewerPass(Registry);
   initializeFindUsedTypesPass(Registry);
   initializeGlobalsModRefPass(Registry);
 }
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
new file mode 100644
index 000000000000..35c45e61808b
--- /dev/null
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -0,0 +1,1239 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline-cost"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+  typedef InstVisitor<CallAnalyzer, bool> Base;
+  friend class InstVisitor<CallAnalyzer, bool>;
+
+  // DataLayout if available, or null.
+  const DataLayout *const TD;
+
+  /// The TargetTransformInfo available for this compilation.
+  const TargetTransformInfo &TTI;
+
+  // The called function.
+  Function &F;
+
+  int Threshold;
+  int Cost;
+
+  bool IsCallerRecursive;
+  bool IsRecursiveCall;
+  bool ExposesReturnsTwice;
+  bool HasDynamicAlloca;
+  bool ContainsNoDuplicateCall;
+
+  /// Number of bytes allocated statically by the callee.
+  uint64_t AllocatedSize;
+  unsigned NumInstructions, NumVectorInstructions;
+  int FiftyPercentVectorBonus, TenPercentVectorBonus;
+  int VectorBonus;
+
+  // While we walk the potentially-inlined instructions, we build up and
+  // maintain a mapping of simplified values specific to this callsite. The
+  // idea is to propagate any special information we have about arguments to
+  // this call through the inlinable section of the function, and account for
+  // likely simplifications post-inlining. The most important aspect we track
+  // is CFG altering simplifications -- when we prove a basic block dead, that
+  // can cause dramatic shifts in the cost of inlining a function.
+  DenseMap<Value *, Constant *> SimplifiedValues;
+
+  // Keep track of the values which map back (through function arguments) to
+  // allocas on the caller stack which could be simplified through SROA.
+  DenseMap<Value *, Value *> SROAArgValues;
+
+  // The mapping of caller Alloca values to their accumulated cost savings. If
+  // we have to disable SROA for one of the allocas, this tells us how much
+  // cost must be added.
+  DenseMap<Value *, int> SROAArgCosts;
+
+  // Keep track of values which map to a pointer base and constant offset.
+  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+  // Custom simplification helper routines.
+  bool isAllocaDerivedArg(Value *V);
+  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+                            DenseMap<Value *, int>::iterator &CostIt);
+  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+  void disableSROA(Value *V);
+  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                          int InstructionCost);
+  bool handleSROACandidate(bool IsSROAValid,
+                           DenseMap<Value *, int>::iterator CostIt,
+                           int InstructionCost);
+  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+  bool simplifyCallSite(Function *F, CallSite CS);
+  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+  // Custom analysis routines.
+  bool analyzeBlock(BasicBlock *BB);
+
+  // Disable several entry points to the visitor so we don't accidentally use
+  // them by declaring but not defining them here.
+  void visit(Module *);     void visit(Module &);
+  void visit(Function *);   void visit(Function &);
+  void visit(BasicBlock *); void visit(BasicBlock &);
+
+  // Provide base case for our instruction visit.
+  bool visitInstruction(Instruction &I);
+
+  // Our visit overrides.
+  bool visitAlloca(AllocaInst &I);
+  bool visitPHI(PHINode &I);
+  bool visitGetElementPtr(GetElementPtrInst &I);
+  bool visitBitCast(BitCastInst &I);
+  bool visitPtrToInt(PtrToIntInst &I);
+  bool visitIntToPtr(IntToPtrInst &I);
+  bool visitCastInst(CastInst &I);
+  bool visitUnaryInstruction(UnaryInstruction &I);
+  bool visitICmp(ICmpInst &I);
+  bool visitSub(BinaryOperator &I);
+  bool visitBinaryOperator(BinaryOperator &I);
+  bool visitLoad(LoadInst &I);
+  bool visitStore(StoreInst &I);
+  bool visitExtractValue(ExtractValueInst &I);
+  bool visitInsertValue(InsertValueInst &I);
+  bool visitCallSite(CallSite CS);
+
+public:
+  CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
+               Function &Callee, int Threshold)
+      : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+        IsCallerRecursive(false), IsRecursiveCall(false),
+        ExposesReturnsTwice(false), HasDynamicAlloca(false),
+        ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
+
+  bool analyzeCall(CallSite CS);
+
+  int getThreshold() { return Threshold; }
+  int getCost() { return Cost; }
+
+  // Keep a bunch of stats about the cost savings found so we can print them
+  // out when debugging.
+  unsigned NumConstantArgs;
+  unsigned NumConstantOffsetPtrArgs;
+  unsigned NumAllocaArgs;
+  unsigned NumConstantPtrCmps;
+  unsigned NumConstantPtrDiffs;
+  unsigned NumInstructionsSimplified;
+  unsigned SROACostSavings;
+  unsigned SROACostSavingsLost;
+
+  void dump();
+};
+
+} // namespace
+
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+  return SROAArgValues.count(V);
+}
+
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+  if (SROAArgValues.empty() || SROAArgCosts.empty())
+    return false;
+
+  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+  if (ArgIt == SROAArgValues.end())
+    return false;
+
+  Arg = ArgIt->second;
+  CostIt = SROAArgCosts.find(Arg);
+  return CostIt != SROAArgCosts.end();
+}
+
+/// \brief Disable SROA for the candidate marked by this cost iterator.
+///
+/// This marks the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+  // If we're no longer able to perform SROA we need to undo its cost savings
+  // and prevent subsequent analysis.
+  Cost += CostIt->second;
+  SROACostSavings -= CostIt->second;
+  SROACostSavingsLost += CostIt->second;
+  SROAArgCosts.erase(CostIt);
+}
+
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+    disableSROA(CostIt);
+}
+
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                                      int InstructionCost) {
+  CostIt->second += InstructionCost;
+  SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+                                       DenseMap<Value *, int>::iterator CostIt,
+                                       int InstructionCost) {
+  if (IsSROAValid) {
+    accumulateSROACost(CostIt, InstructionCost);
+    return true;
+  }
+
+  disableSROA(CostIt);
+  return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+      return false;
+
+  return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+  if (!TD)
+    return false;
+
+  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  assert(IntPtrWidth == Offset.getBitWidth());
+
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+    if (!OpC)
+      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+        OpC = dyn_cast<ConstantInt>(SimpleOp);
+    if (!OpC)
+      return false;
+    if (OpC->isZero()) continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = TD->getStructLayout(STy);
+      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+  }
+  return true;
+}
+
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+  // FIXME: Check whether inlining will turn a dynamic alloca into a static
+  // alloca, and handle that case.
+
+  // Accumulate the allocated size.
+  if (I.isStaticAlloca()) {
+    Type *Ty = I.getAllocatedType();
+    AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
+                      Ty->getPrimitiveSizeInBits());
+  }
+
+  // We will happily inline static alloca instructions.
+  if (I.isStaticAlloca())
+    return Base::visitAlloca(I);
+
+  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+  // a variety of reasons, and so we would like to not inline them into
+  // functions which don't currently have a dynamic alloca. This simply
+  // disables inlining altogether in the presence of a dynamic alloca.
+  HasDynamicAlloca = true;
+  return false;
+}
+
+bool CallAnalyzer::visitPHI(PHINode &I) {
+  // FIXME: We should potentially be tracking values through phi nodes,
+  // especially when they collapse to a single value due to deleted CFG edges
+  // during inlining.
+
+  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+  // though we don't want to propagate it's bonuses. The idea is to disable
+  // SROA if it *might* be used in an inappropriate manner.
+
+  // Phi nodes are always zero-cost.
+  return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+                                            SROAArg, CostIt);
+
+  // Try to fold GEPs of constant-offset call site argument pointers. This
+  // requires target data and inbounds GEPs.
+  if (TD && I.isInBounds()) {
+    // Check if we have a base + offset for the pointer.
+    Value *Ptr = I.getPointerOperand();
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+    if (BaseAndOffset.first) {
+      // Check if the offset of this GEP is constant, and if so accumulate it
+      // into Offset.
+      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+        // Non-constant GEPs aren't folded, and disable SROA.
+        if (SROACandidate)
+          disableSROA(CostIt);
+        return false;
+      }
+
+      // Add the result as a new mapping to Base + Offset.
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+      // Also handle SROA candidates here, we already know that the GEP is
+      // all-constant indexed.
+      if (SROACandidate)
+        SROAArgValues[&I] = SROAArg;
+
+      return true;
+    }
+  }
+
+  if (isGEPOffsetConstant(I)) {
+    if (SROACandidate)
+      SROAArgValues[&I] = SROAArg;
+
+    // Constant GEPs are modeled as free.
+    return true;
+  }
+
+  // Variable GEPs will require math and will disable SROA.
+  if (SROACandidate)
+    disableSROA(CostIt);
+  return false;
+}
+
+bool CallAnalyzer::visitBitCast(BitCastInst &I) {
+  // Propagate constants through bitcasts.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offsets through casts
+  std::pair<Value *, APInt> BaseAndOffset
+    = ConstantOffsetPtrs.lookup(I.getOperand(0));
+  // Casts don't change the offset, just wrap it up.
+  if (BaseAndOffset.first)
+    ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+  // Also look for SROA candidates here.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  // Bitcasts are always zero cost.
+  return true;
+}
+
+bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+  // Propagate constants through ptrtoint.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offset pairs when converted to a plain integer provided the
+  // integer is large enough to represent the pointer.
+  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
+  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+    std::pair<Value *, APInt> BaseAndOffset
+      = ConstantOffsetPtrs.lookup(I.getOperand(0));
+    if (BaseAndOffset.first)
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+  }
+
+  // This is really weird. Technically, ptrtoint will disable SROA. However,
+  // unless that ptrtoint is *used* somewhere in the live basic blocks after
+  // inlining, it will be nuked, and SROA should proceed. All of the uses which
+  // would block SROA would also block SROA if applied directly to a pointer,
+  // and so we can just add the integer in here. The only places where SROA is
+  // preserved either cannot fire on an integer, or won't in-and-of themselves
+  // disable SROA (ext) w/o some later use that we would see and disable.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+  // Propagate constants through ptrtoint.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offset pairs when round-tripped through a pointer without
+  // modifications provided the integer is not too large.
+  Value *Op = I.getOperand(0);
+  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
+  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
+    if (BaseAndOffset.first)
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+  }
+
+  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitCastInst(CastInst &I) {
+  // Propagate constants through ptrtoint.
+  Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+  if (!COp)
+    COp = SimplifiedValues.lookup(I.getOperand(0));
+  if (COp)
+    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+  disableSROA(I.getOperand(0));
+
+  return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
+}
+
+bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
+  Value *Operand = I.getOperand(0);
+  Constant *COp = dyn_cast<Constant>(Operand);
+  if (!COp)
+    COp = SimplifiedValues.lookup(Operand);
+  if (COp)
+    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
+                                               COp, TD)) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Disable any SROA on the argument to arbitrary unary operators.
+  disableSROA(Operand);
+
+  return false;
+}
+
+bool CallAnalyzer::visitICmp(ICmpInst &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  // First try to handle simplified comparisons.
+  if (!isa<Constant>(LHS))
+    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+      LHS = SimpleLHS;
+  if (!isa<Constant>(RHS))
+    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+      RHS = SimpleRHS;
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        return true;
+      }
+
+  // Otherwise look for a comparison between constant offset pointers with
+  // a common base.
+  Value *LHSBase, *RHSBase;
+  APInt LHSOffset, RHSOffset;
+  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  if (LHSBase) {
+    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    if (RHSBase && LHSBase == RHSBase) {
+      // We have common bases, fold the icmp to a constant based on the
+      // offsets.
+      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        ++NumConstantPtrCmps;
+        return true;
+      }
+    }
+  }
+
+  // If the comparison is an equality comparison with null, we can simplify it
+  // for any alloca-derived argument.
+  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
+    if (isAllocaDerivedArg(I.getOperand(0))) {
+      // We can actually predict the result of comparisons between an
+      // alloca-derived value and null. Note that this fires regardless of
+      // SROA firing.
+      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+                                        : ConstantInt::getFalse(I.getType());
+      return true;
+    }
+
+  // Finally check for SROA candidates in comparisons.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (isa<ConstantPointerNull>(I.getOperand(1))) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
+
+    disableSROA(CostIt);
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitSub(BinaryOperator &I) {
+  // Try to handle a special case: we can fold computing the difference of two
+  // constant-related pointers.
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Value *LHSBase, *RHSBase;
+  APInt LHSOffset, RHSOffset;
+  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  if (LHSBase) {
+    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    if (RHSBase && LHSBase == RHSBase) {
+      // We have common bases, fold the subtract to a constant based on the
+      // offsets.
+      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        ++NumConstantPtrDiffs;
+        return true;
+      }
+    }
+  }
+
+  // Otherwise, fall back to the generic logic for simplifying and handling
+  // instructions.
+  return Base::visitSub(I);
+}
+
+bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  if (!isa<Constant>(LHS))
+    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+      LHS = SimpleLHS;
+  if (!isa<Constant>(RHS))
+    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+      RHS = SimpleRHS;
+  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
+    SimplifiedValues[&I] = C;
+    return true;
+  }
+
+  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
+  disableSROA(LHS);
+  disableSROA(RHS);
+
+  return false;
+}
+
+bool CallAnalyzer::visitLoad(LoadInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (I.isSimple()) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
+
+    disableSROA(CostIt);
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitStore(StoreInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (I.isSimple()) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
+
+    disableSROA(CostIt);
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
+  // Constant folding for extract value is trivial.
+  Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
+  if (!C)
+    C = SimplifiedValues.lookup(I.getAggregateOperand());
+  if (C) {
+    SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
+    return true;
+  }
+
+  // SROA can look through these but give them a cost.
+  return false;
+}
+
+bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
+  // Constant folding for insert value is trivial.
+  Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
+  if (!AggC)
+    AggC = SimplifiedValues.lookup(I.getAggregateOperand());
+  Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
+  if (!InsertedC)
+    InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
+  if (AggC && InsertedC) {
+    SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
+                                                        I.getIndices());
+    return true;
+  }
+
+  // SROA can look through these but give them a cost.
+  return false;
+}
+
+/// \brief Try to simplify a call site.
+///
+/// Takes a concrete function and callsite and tries to actually simplify it by
+/// analyzing the arguments and call itself with instsimplify. Returns true if
+/// it has simplified the callsite to some other entity (a constant), making it
+/// free.
+bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
+  // FIXME: Using the instsimplify logic directly for this is inefficient
+  // because we have to continually rebuild the argument list even when no
+  // simplifications can be performed. Until that is fixed with remapping
+  // inside of instsimplify, directly constant fold calls here.
+  if (!canConstantFoldCallTo(F))
+    return false;
+
+  // Try to re-map the arguments to constants.
+  SmallVector<Constant *, 4> ConstantArgs;
+  ConstantArgs.reserve(CS.arg_size());
+  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I) {
+    Constant *C = dyn_cast<Constant>(*I);
+    if (!C)
+      C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
+    if (!C)
+      return false; // This argument doesn't map to a constant.
+
+    ConstantArgs.push_back(C);
+  }
+  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
+    SimplifiedValues[CS.getInstruction()] = C;
+    return true;
+  }
+
+  return false;
+}
+
+bool CallAnalyzer::visitCallSite(CallSite CS) {
+  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+      !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::ReturnsTwice)) {
+    // This aborts the entire analysis.
+    ExposesReturnsTwice = true;
+    return false;
+  }
+  if (CS.isCall() &&
+      cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
+    ContainsNoDuplicateCall = true;
+
+  if (Function *F = CS.getCalledFunction()) {
+    // When we have a concrete function, first try to simplify it directly.
+    if (simplifyCallSite(F, CS))
+      return true;
+
+    // Next check if it is an intrinsic we know about.
+    // FIXME: Lift this into part of the InstVisitor.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+      switch (II->getIntrinsicID()) {
+      default:
+        return Base::visitCallSite(CS);
+
+      case Intrinsic::memset:
+      case Intrinsic::memcpy:
+      case Intrinsic::memmove:
+        // SROA can usually chew through these intrinsics, but they aren't free.
+        return false;
+      }
+    }
+
+    if (F == CS.getInstruction()->getParent()->getParent()) {
+      // This flag will fully abort the analysis, so don't bother with anything
+      // else.
+      IsRecursiveCall = true;
+      return false;
+    }
+
+    if (TTI.isLoweredToCall(F)) {
+      // We account for the average 1 instruction per call argument setup
+      // here.
+      Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+      // Everything other than inline ASM will also have a significant cost
+      // merely from making the call.
+      if (!isa<InlineAsm>(CS.getCalledValue()))
+        Cost += InlineConstants::CallPenalty;
+    }
+
+    return Base::visitCallSite(CS);
+  }
+
+  // Otherwise we're in a very special case -- an indirect function call. See
+  // if we can be particularly clever about this.
+  Value *Callee = CS.getCalledValue();
+
+  // First, pay the price of the argument setup. We account for the average
+  // 1 instruction per call argument setup here.
+  Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+  // Next, check if this happens to be an indirect function call to a known
+  // function in this inline context. If not, we've done all we can.
+  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+  if (!F)
+    return Base::visitCallSite(CS);
+
+  // If we have a constant that we are calling as a function, we can peer
+  // through it and see the function target. This happens not infrequently
+  // during devirtualization and so we want to give it a hefty bonus for
+  // inlining, but cap that bonus in the event that inlining wouldn't pan
+  // out. Pretend to inline the function, with a custom threshold.
+  CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold);
+  if (CA.analyzeCall(CS)) {
+    // We were able to inline the indirect call! Subtract the cost from the
+    // bonus we want to apply, but don't go below zero.
+    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+  }
+
+  return Base::visitCallSite(CS);
+}
+
+bool CallAnalyzer::visitInstruction(Instruction &I) {
+  // Some instructions are free. All of the free intrinsics can also be
+  // handled by SROA, etc.
+  if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
+    return true;
+
+  // We found something we don't understand or can't handle. Mark any SROA-able
+  // values in the operand list as no longer viable.
+  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
+    disableSROA(*OI);
+
+  return false;
+}
+
+
+/// \brief Analyze a basic block for its contribution to the inline cost.
+///
+/// This method walks the analyzer over every instruction in the given basic
+/// block and accounts for their cost during inlining at this callsite. It
+/// aborts early if the threshold has been exceeded or an impossible to inline
+/// construct has been detected. It returns false if inlining is no longer
+/// viable, and true if inlining remains viable.
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+  for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
+       I != E; ++I) {
+    ++NumInstructions;
+    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
+      ++NumVectorInstructions;
+
+    // If the instruction simplified to a constant, there is no cost to this
+    // instruction. Visit the instructions using our InstVisitor to account for
+    // all of the per-instruction logic. The visit tree returns true if we
+    // consumed the instruction in any way, and false if the instruction's base
+    // cost should count against inlining.
+    if (Base::visit(I))
+      ++NumInstructionsSimplified;
+    else
+      Cost += InlineConstants::InstrCost;
+
+    // If the visit this instruction detected an uninlinable pattern, abort.
+    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+      return false;
+
+    // If the caller is a recursive function then we don't want to inline
+    // functions which allocate a lot of stack space because it would increase
+    // the caller stack usage dramatically.
+    if (IsCallerRecursive &&
+        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+      return false;
+
+    if (NumVectorInstructions > NumInstructions/2)
+      VectorBonus = FiftyPercentVectorBonus;
+    else if (NumVectorInstructions > NumInstructions/10)
+      VectorBonus = TenPercentVectorBonus;
+    else
+      VectorBonus = 0;
+
+    // Check if we've past the threshold so we don't spin in huge basic
+    // blocks that will never inline.
+    if (Cost > (Threshold + VectorBonus))
+      return false;
+  }
+
+  return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
+  if (!TD || !V->getType()->isPointerTy())
+    return 0;
+
+  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
+        return 0;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        break;
+      V = GA->getAliasee();
+    } else {
+      break;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
+
+  Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
+}
+
+/// \brief Analyze a call site for potential inlining.
+///
+/// Returns true if inlining this call is viable, and false if it is not
+/// viable. It computes the cost and adjusts the threshold based on numerous
+/// factors and heuristics. If this method returns false but the computed cost
+/// is below the computed threshold, then inlining was forcibly disabled by
+/// some artifact of the routine.
+bool CallAnalyzer::analyzeCall(CallSite CS) {
+  ++NumCallsAnalyzed;
+
+  // Track whether the post-inlining function would have more than one basic
+  // block. A single basic block is often intended for inlining. Balloon the
+  // threshold by 50% until we pass the single-BB phase.
+  bool SingleBB = true;
+  int SingleBBBonus = Threshold / 2;
+  Threshold += SingleBBBonus;
+
+  // Perform some tweaks to the cost and threshold based on the direct
+  // callsite information.
+
+  // We want to more aggressively inline vector-dense kernels, so up the
+  // threshold, and we'll lower it if the % of vector instructions gets too
+  // low.
+  assert(NumInstructions == 0);
+  assert(NumVectorInstructions == 0);
+  FiftyPercentVectorBonus = Threshold;
+  TenPercentVectorBonus = Threshold / 2;
+
+  // Give out bonuses per argument, as the instructions setting them up will
+  // be gone after inlining.
+  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+    if (TD && CS.isByValArgument(I)) {
+      // We approximate the number of loads and stores needed by dividing the
+      // size of the byval type by the target's pointer size.
+      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+      unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = TD->getPointerSizeInBits();
+      // Ceiling division.
+      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+      // If it generates more than 8 stores it is likely to be expanded as an
+      // inline memcpy so we take that as an upper bound. Otherwise we assume
+      // one load and one store per word copied.
+      // FIXME: The maxStoresPerMemcpy setting from the target should be used
+      // here instead of a magic number of 8, but it's not available via
+      // DataLayout.
+      NumStores = std::min(NumStores, 8U);
+
+      Cost -= 2 * NumStores * InlineConstants::InstrCost;
+    } else {
+      // For non-byval arguments subtract off one instruction per call
+      // argument.
+      Cost -= InlineConstants::InstrCost;
+    }
+  }
+
+  // If there is only one call of the function, and it has internal linkage,
+  // the cost of inlining it drops dramatically.
+  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+    &F == CS.getCalledFunction();
+  if (OnlyOneCallAndLocalLinkage)
+    Cost += InlineConstants::LastCallToStaticBonus;
+
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn. As such,
+  // there is little point in inlining this unless there is literally zero
+  // cost.
+  Instruction *Instr = CS.getInstruction();
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+      Threshold = 1;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+    Threshold = 1;
+
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (F.getCallingConv() == CallingConv::Cold)
+    Cost += InlineConstants::ColdccPenalty;
+
+  // Check if we're done. This can happen due to bonuses and penalties.
+  if (Cost > Threshold)
+    return false;
+
+  if (F.empty())
+    return true;
+
+  Function *Caller = CS.getInstruction()->getParent()->getParent();
+  // Check if the caller function is recursive itself.
+  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
+       U != E; ++U) {
+    CallSite Site(cast<Value>(*U));
+    if (!Site)
+      continue;
+    Instruction *I = Site.getInstruction();
+    if (I->getParent()->getParent() == Caller) {
+      IsCallerRecursive = true;
+      break;
+    }
+  }
+
+  // Track whether we've seen a return instruction. The first return
+  // instruction is free, as at least one will usually disappear in inlining.
+  bool HasReturn = false;
+
+  // Populate our simplified values by mapping from function arguments to call
+  // arguments with known important simplifications.
+  CallSite::arg_iterator CAI = CS.arg_begin();
+  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
+       FAI != FAE; ++FAI, ++CAI) {
+    assert(CAI != CS.arg_end());
+    if (Constant *C = dyn_cast<Constant>(CAI))
+      SimplifiedValues[FAI] = C;
+
+    Value *PtrArg = *CAI;
+    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
+      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+
+      // We can SROA any pointer arguments derived from alloca instructions.
+      if (isa<AllocaInst>(PtrArg)) {
+        SROAArgValues[FAI] = PtrArg;
+        SROAArgCosts[PtrArg] = 0;
+      }
+    }
+  }
+  NumConstantArgs = SimplifiedValues.size();
+  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
+  NumAllocaArgs = SROAArgValues.size();
+
+  // The worklist of live basic blocks in the callee *after* inlining. We avoid
+  // adding basic blocks of the callee which can be proven to be dead for this
+  // particular call site in order to get more accurate cost estimates. This
+  // requires a somewhat heavyweight iteration pattern: we need to walk the
+  // basic blocks in a breadth-first order as we insert live successors. To
+  // accomplish this, prioritizing for small iterations because we exit after
+  // crossing our threshold, we use a small-size optimized SetVector.
+  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
+                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
+  BBSetVector BBWorklist;
+  BBWorklist.insert(&F.getEntryBlock());
+  // Note that we *must not* cache the size, this loop grows the worklist.
+  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+    // Bail out the moment we cross the threshold. This means we'll under-count
+    // the cost, but only when undercounting doesn't matter.
+    if (Cost > (Threshold + VectorBonus))
+      break;
+
+    BasicBlock *BB = BBWorklist[Idx];
+    if (BB->empty())
+      continue;
+
+    // Handle the terminator cost here where we can track returns and other
+    // function-wide constructs.
+    TerminatorInst *TI = BB->getTerminator();
+
+    // We never want to inline functions that contain an indirectbr.  This is
+    // incorrect because all the blockaddress's (in static global initializers
+    // for example) would be referring to the original function, and this
+    // indirect jump would jump from the inlined copy of the function into the 
+    // original function which is extremely undefined behavior.
+    // FIXME: This logic isn't really right; we can safely inline functions
+    // with indirectbr's as long as no other function or global references the
+    // blockaddress of a block within the current function.  And as a QOI issue,
+    // if someone is using a blockaddress without an indirectbr, and that
+    // reference somehow ends up in another function or global, we probably
+    // don't want to inline this function.
+    if (isa<IndirectBrInst>(TI))
+      return false;
+
+    if (!HasReturn && isa<ReturnInst>(TI))
+      HasReturn = true;
+    else
+      Cost += InlineConstants::InstrCost;
+
+    // Analyze the cost of this block. If we blow through the threshold, this
+    // returns false, and we can bail on out.
+    if (!analyzeBlock(BB)) {
+      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+        return false;
+
+      // If the caller is a recursive function then we don't want to inline
+      // functions which allocate a lot of stack space because it would increase
+      // the caller stack usage dramatically.
+      if (IsCallerRecursive &&
+          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
+        return false;
+
+      break;
+    }
+
+    // Add in the live successors by first checking whether we have terminator
+    // that may be simplified based on the values simplified by this call.
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional()) {
+        Value *Cond = BI->getCondition();
+        if (ConstantInt *SimpleCond
+              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
+          continue;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      Value *Cond = SI->getCondition();
+      if (ConstantInt *SimpleCond
+            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+        continue;
+      }
+    }
+
+    // If we're unable to select a particular successor, just count all of
+    // them.
+    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
+         ++TIdx)
+      BBWorklist.insert(TI->getSuccessor(TIdx));
+
+    // If we had any successors at this point, than post-inlining is likely to
+    // have them as well. Note that we assume any basic blocks which existed
+    // due to branches or switches which folded above will also fold after
+    // inlining.
+    if (SingleBB && TI->getNumSuccessors() > 1) {
+      // Take off the bonus we applied to the threshold.
+      Threshold -= SingleBBBonus;
+      SingleBB = false;
+    }
+  }
+
+  // If this is a noduplicate call, we can still inline as long as 
+  // inlining this would cause the removal of the caller (so the instruction
+  // is not actually duplicated, just moved).
+  if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
+    return false;
+
+  Threshold += VectorBonus;
+
+  return Cost < Threshold;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// \brief Dump stats about this call's analysis.
+void CallAnalyzer::dump() {
+#define DEBUG_PRINT_STAT(x) llvm::dbgs() << "      " #x ": " << x << "\n"
+  DEBUG_PRINT_STAT(NumConstantArgs);
+  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
+  DEBUG_PRINT_STAT(NumAllocaArgs);
+  DEBUG_PRINT_STAT(NumConstantPtrCmps);
+  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
+  DEBUG_PRINT_STAT(NumInstructionsSimplified);
+  DEBUG_PRINT_STAT(SROACostSavings);
+  DEBUG_PRINT_STAT(SROACostSavingsLost);
+  DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
+#undef DEBUG_PRINT_STAT
+}
+#endif
+
+INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
+                      true, true)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
+                    true, true)
+
+char InlineCostAnalysis::ID = 0;
+
+InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {}
+
+InlineCostAnalysis::~InlineCostAnalysis() {}
+
+void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetTransformInfo>();
+  CallGraphSCCPass::getAnalysisUsage(AU);
+}
+
+bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
+  TD = getAnalysisIfAvailable<DataLayout>();
+  TTI = &getAnalysis<TargetTransformInfo>();
+  return false;
+}
+
+InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
+  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
+}
+
+InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
+                                             int Threshold) {
+  // Cannot inline indirect calls.
+  if (!Callee)
+    return llvm::InlineCost::getNever();
+
+  // Calls to functions with always-inline attributes should be inlined
+  // whenever possible.
+  if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::AlwaysInline)) {
+    if (isInlineViable(*Callee))
+      return llvm::InlineCost::getAlways();
+    return llvm::InlineCost::getNever();
+  }
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline or call sites
+  // marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::NoInline) ||
+      CS.isNoInline())
+    return llvm::InlineCost::getNever();
+
+  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
+        << "...\n");
+
+  CallAnalyzer CA(TD, *TTI, *Callee, Threshold);
+  bool ShouldInline = CA.analyzeCall(CS);
+
+  DEBUG(CA.dump());
+
+  // Check if there was a reason to force inlining or no inlining.
+  if (!ShouldInline && CA.getCost() < CA.getThreshold())
+    return InlineCost::getNever();
+  if (ShouldInline && CA.getCost() >= CA.getThreshold())
+    return InlineCost::getAlways();
+
+  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+}
+
+bool InlineCostAnalysis::isInlineViable(Function &F) {
+  bool ReturnsTwice =
+    F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                   Attribute::ReturnsTwice);
+  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+    // Disallow inlining of functions which contain an indirect branch.
+    if (isa<IndirectBrInst>(BI->getTerminator()))
+      return false;
+
+    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+         ++II) {
+      CallSite CS(II);
+      if (!CS)
+        continue;
+
+      // Disallow recursive calls.
+      if (&F == CS.getCalledFunction())
+        return false;
+
+      // Disallow calls which expose returns-twice to a function not previously
+      // attributed as such.
+      if (!ReturnsTwice && CS.isCall() &&
+          cast<CallInst>(CS.getInstruction())->canReturnTwice())
+        return false;
+    }
+  }
+
+  return true;
+}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index d4221b89e0f6..b33e2cb9999e 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -14,17 +14,17 @@
 
 #define DEBUG_TYPE "iv-users"
 #include "llvm/Analysis/IVUsers.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
deleted file mode 100644
index 5f51f775f142..000000000000
--- a/lib/Analysis/InlineCost.cpp
+++ /dev/null
@@ -1,1067 +0,0 @@
-//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements inline cost analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "inline-cost"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/CallingConv.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
-
-namespace {
-
-class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
-  typedef InstVisitor<CallAnalyzer, bool> Base;
-  friend class InstVisitor<CallAnalyzer, bool>;
-
-  // DataLayout if available, or null.
-  const DataLayout *const TD;
-
-  // The called function.
-  Function &F;
-
-  int Threshold;
-  int Cost;
-  const bool AlwaysInline;
-
-  bool IsCallerRecursive;
-  bool IsRecursiveCall;
-  bool ExposesReturnsTwice;
-  bool HasDynamicAlloca;
-  /// Number of bytes allocated statically by the callee.
-  uint64_t AllocatedSize;
-  unsigned NumInstructions, NumVectorInstructions;
-  int FiftyPercentVectorBonus, TenPercentVectorBonus;
-  int VectorBonus;
-
-  // While we walk the potentially-inlined instructions, we build up and
-  // maintain a mapping of simplified values specific to this callsite. The
-  // idea is to propagate any special information we have about arguments to
-  // this call through the inlinable section of the function, and account for
-  // likely simplifications post-inlining. The most important aspect we track
-  // is CFG altering simplifications -- when we prove a basic block dead, that
-  // can cause dramatic shifts in the cost of inlining a function.
-  DenseMap<Value *, Constant *> SimplifiedValues;
-
-  // Keep track of the values which map back (through function arguments) to
-  // allocas on the caller stack which could be simplified through SROA.
-  DenseMap<Value *, Value *> SROAArgValues;
-
-  // The mapping of caller Alloca values to their accumulated cost savings. If
-  // we have to disable SROA for one of the allocas, this tells us how much
-  // cost must be added.
-  DenseMap<Value *, int> SROAArgCosts;
-
-  // Keep track of values which map to a pointer base and constant offset.
-  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
-
-  // Custom simplification helper routines.
-  bool isAllocaDerivedArg(Value *V);
-  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
-                            DenseMap<Value *, int>::iterator &CostIt);
-  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
-  void disableSROA(Value *V);
-  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
-                          int InstructionCost);
-  bool handleSROACandidate(bool IsSROAValid,
-                           DenseMap<Value *, int>::iterator CostIt,
-                           int InstructionCost);
-  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
-  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
-  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
-
-  // Custom analysis routines.
-  bool analyzeBlock(BasicBlock *BB);
-
-  // Disable several entry points to the visitor so we don't accidentally use
-  // them by declaring but not defining them here.
-  void visit(Module *);     void visit(Module &);
-  void visit(Function *);   void visit(Function &);
-  void visit(BasicBlock *); void visit(BasicBlock &);
-
-  // Provide base case for our instruction visit.
-  bool visitInstruction(Instruction &I);
-
-  // Our visit overrides.
-  bool visitAlloca(AllocaInst &I);
-  bool visitPHI(PHINode &I);
-  bool visitGetElementPtr(GetElementPtrInst &I);
-  bool visitBitCast(BitCastInst &I);
-  bool visitPtrToInt(PtrToIntInst &I);
-  bool visitIntToPtr(IntToPtrInst &I);
-  bool visitCastInst(CastInst &I);
-  bool visitUnaryInstruction(UnaryInstruction &I);
-  bool visitICmp(ICmpInst &I);
-  bool visitSub(BinaryOperator &I);
-  bool visitBinaryOperator(BinaryOperator &I);
-  bool visitLoad(LoadInst &I);
-  bool visitStore(StoreInst &I);
-  bool visitCallSite(CallSite CS);
-
-public:
-  CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold)
-    : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
-      AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)),
-      IsCallerRecursive(false), IsRecursiveCall(false),
-      ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
-      NumInstructions(0), NumVectorInstructions(0),
-      FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
-      NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-      NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-      NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {
-  }
-
-  bool analyzeCall(CallSite CS);
-
-  int getThreshold() { return Threshold; }
-  int getCost() { return Cost; }
-  bool isAlwaysInline() { return AlwaysInline; }
-
-  // Keep a bunch of stats about the cost savings found so we can print them
-  // out when debugging.
-  unsigned NumConstantArgs;
-  unsigned NumConstantOffsetPtrArgs;
-  unsigned NumAllocaArgs;
-  unsigned NumConstantPtrCmps;
-  unsigned NumConstantPtrDiffs;
-  unsigned NumInstructionsSimplified;
-  unsigned SROACostSavings;
-  unsigned SROACostSavingsLost;
-
-  void dump();
-};
-
-} // namespace
-
-/// \brief Test whether the given value is an Alloca-derived function argument.
-bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
-  return SROAArgValues.count(V);
-}
-
-/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
-/// Returns false if V does not map to a SROA-candidate.
-bool CallAnalyzer::lookupSROAArgAndCost(
-    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
-  if (SROAArgValues.empty() || SROAArgCosts.empty())
-    return false;
-
-  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
-  if (ArgIt == SROAArgValues.end())
-    return false;
-
-  Arg = ArgIt->second;
-  CostIt = SROAArgCosts.find(Arg);
-  return CostIt != SROAArgCosts.end();
-}
-
-/// \brief Disable SROA for the candidate marked by this cost iterator.
-///
-/// This marks the candidate as no longer viable for SROA, and adds the cost
-/// savings associated with it back into the inline cost measurement.
-void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
-  // If we're no longer able to perform SROA we need to undo its cost savings
-  // and prevent subsequent analysis.
-  Cost += CostIt->second;
-  SROACostSavings -= CostIt->second;
-  SROACostSavingsLost += CostIt->second;
-  SROAArgCosts.erase(CostIt);
-}
-
-/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
-void CallAnalyzer::disableSROA(Value *V) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
-    disableSROA(CostIt);
-}
-
-/// \brief Accumulate the given cost for a particular SROA candidate.
-void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
-                                      int InstructionCost) {
-  CostIt->second += InstructionCost;
-  SROACostSavings += InstructionCost;
-}
-
-/// \brief Helper for the common pattern of handling a SROA candidate.
-/// Either accumulates the cost savings if the SROA remains valid, or disables
-/// SROA for the candidate.
-bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
-                                       DenseMap<Value *, int>::iterator CostIt,
-                                       int InstructionCost) {
-  if (IsSROAValid) {
-    accumulateSROACost(CostIt, InstructionCost);
-    return true;
-  }
-
-  disableSROA(CostIt);
-  return false;
-}
-
-/// \brief Check whether a GEP's indices are all constant.
-///
-/// Respects any simplified values known during the analysis of this callsite.
-bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
-  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
-    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
-      return false;
-
-  return true;
-}
-
-/// \brief Accumulate a constant GEP offset into an APInt if possible.
-///
-/// Returns false if unable to compute the offset for any reason. Respects any
-/// simplified values known during the analysis of this callsite.
-bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
-  if (!TD)
-    return false;
-
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
-  assert(IntPtrWidth == Offset.getBitWidth());
-
-  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
-       GTI != GTE; ++GTI) {
-    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
-    if (!OpC)
-      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
-        OpC = dyn_cast<ConstantInt>(SimpleOp);
-    if (!OpC)
-      return false;
-    if (OpC->isZero()) continue;
-
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = TD->getStructLayout(STy);
-      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
-      continue;
-    }
-
-    APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
-    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
-  }
-  return true;
-}
-
-bool CallAnalyzer::visitAlloca(AllocaInst &I) {
-  // FIXME: Check whether inlining will turn a dynamic alloca into a static
-  // alloca, and handle that case.
-
-  // Accumulate the allocated size.
-  if (I.isStaticAlloca()) {
-    Type *Ty = I.getAllocatedType();
-    AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
-                      Ty->getPrimitiveSizeInBits());
-  }
-
-  // We will happily inline static alloca instructions or dynamic alloca
-  // instructions in always-inline situations.
-  if (AlwaysInline || I.isStaticAlloca())
-    return Base::visitAlloca(I);
-
-  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
-  // a variety of reasons, and so we would like to not inline them into
-  // functions which don't currently have a dynamic alloca. This simply
-  // disables inlining altogether in the presence of a dynamic alloca.
-  HasDynamicAlloca = true;
-  return false;
-}
-
-bool CallAnalyzer::visitPHI(PHINode &I) {
-  // FIXME: We should potentially be tracking values through phi nodes,
-  // especially when they collapse to a single value due to deleted CFG edges
-  // during inlining.
-
-  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
-  // though we don't want to propagate it's bonuses. The idea is to disable
-  // SROA if it *might* be used in an inappropriate manner.
-
-  // Phi nodes are always zero-cost.
-  return true;
-}
-
-bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
-                                            SROAArg, CostIt);
-
-  // Try to fold GEPs of constant-offset call site argument pointers. This
-  // requires target data and inbounds GEPs.
-  if (TD && I.isInBounds()) {
-    // Check if we have a base + offset for the pointer.
-    Value *Ptr = I.getPointerOperand();
-    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
-    if (BaseAndOffset.first) {
-      // Check if the offset of this GEP is constant, and if so accumulate it
-      // into Offset.
-      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
-        // Non-constant GEPs aren't folded, and disable SROA.
-        if (SROACandidate)
-          disableSROA(CostIt);
-        return false;
-      }
-
-      // Add the result as a new mapping to Base + Offset.
-      ConstantOffsetPtrs[&I] = BaseAndOffset;
-
-      // Also handle SROA candidates here, we already know that the GEP is
-      // all-constant indexed.
-      if (SROACandidate)
-        SROAArgValues[&I] = SROAArg;
-
-      return true;
-    }
-  }
-
-  if (isGEPOffsetConstant(I)) {
-    if (SROACandidate)
-      SROAArgValues[&I] = SROAArg;
-
-    // Constant GEPs are modeled as free.
-    return true;
-  }
-
-  // Variable GEPs will require math and will disable SROA.
-  if (SROACandidate)
-    disableSROA(CostIt);
-  return false;
-}
-
-bool CallAnalyzer::visitBitCast(BitCastInst &I) {
-  // Propagate constants through bitcasts.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
-    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
-
-  // Track base/offsets through casts
-  std::pair<Value *, APInt> BaseAndOffset
-    = ConstantOffsetPtrs.lookup(I.getOperand(0));
-  // Casts don't change the offset, just wrap it up.
-  if (BaseAndOffset.first)
-    ConstantOffsetPtrs[&I] = BaseAndOffset;
-
-  // Also look for SROA candidates here.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
-    SROAArgValues[&I] = SROAArg;
-
-  // Bitcasts are always zero cost.
-  return true;
-}
-
-bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
-  // Propagate constants through ptrtoint.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
-    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
-
-  // Track base/offset pairs when converted to a plain integer provided the
-  // integer is large enough to represent the pointer.
-  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
-  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
-    std::pair<Value *, APInt> BaseAndOffset
-      = ConstantOffsetPtrs.lookup(I.getOperand(0));
-    if (BaseAndOffset.first)
-      ConstantOffsetPtrs[&I] = BaseAndOffset;
-  }
-
-  // This is really weird. Technically, ptrtoint will disable SROA. However,
-  // unless that ptrtoint is *used* somewhere in the live basic blocks after
-  // inlining, it will be nuked, and SROA should proceed. All of the uses which
-  // would block SROA would also block SROA if applied directly to a pointer,
-  // and so we can just add the integer in here. The only places where SROA is
-  // preserved either cannot fire on an integer, or won't in-and-of themselves
-  // disable SROA (ext) w/o some later use that we would see and disable.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
-    SROAArgValues[&I] = SROAArg;
-
-  return isInstructionFree(&I, TD);
-}
-
-bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
-  // Propagate constants through ptrtoint.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
-    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
-
-  // Track base/offset pairs when round-tripped through a pointer without
-  // modifications provided the integer is not too large.
-  Value *Op = I.getOperand(0);
-  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
-  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
-    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
-    if (BaseAndOffset.first)
-      ConstantOffsetPtrs[&I] = BaseAndOffset;
-  }
-
-  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
-    SROAArgValues[&I] = SROAArg;
-
-  return isInstructionFree(&I, TD);
-}
-
-bool CallAnalyzer::visitCastInst(CastInst &I) {
-  // Propagate constants through ptrtoint.
-  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
-    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
-
-  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
-  disableSROA(I.getOperand(0));
-
-  return isInstructionFree(&I, TD);
-}
-
-bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
-  Value *Operand = I.getOperand(0);
-  Constant *Ops[1] = { dyn_cast<Constant>(Operand) };
-  if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand)))
-    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
-                                               Ops, TD)) {
-      SimplifiedValues[&I] = C;
-      return true;
-    }
-
-  // Disable any SROA on the argument to arbitrary unary operators.
-  disableSROA(Operand);
-
-  return false;
-}
-
-bool CallAnalyzer::visitICmp(ICmpInst &I) {
-  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-  // First try to handle simplified comparisons.
-  if (!isa<Constant>(LHS))
-    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
-      LHS = SimpleLHS;
-  if (!isa<Constant>(RHS))
-    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
-      RHS = SimpleRHS;
-  if (Constant *CLHS = dyn_cast<Constant>(LHS))
-    if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
-        SimplifiedValues[&I] = C;
-        return true;
-      }
-
-  // Otherwise look for a comparison between constant offset pointers with
-  // a common base.
-  Value *LHSBase, *RHSBase;
-  APInt LHSOffset, RHSOffset;
-  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
-  if (LHSBase) {
-    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
-    if (RHSBase && LHSBase == RHSBase) {
-      // We have common bases, fold the icmp to a constant based on the
-      // offsets.
-      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
-      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
-      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
-        SimplifiedValues[&I] = C;
-        ++NumConstantPtrCmps;
-        return true;
-      }
-    }
-  }
-
-  // If the comparison is an equality comparison with null, we can simplify it
-  // for any alloca-derived argument.
-  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
-    if (isAllocaDerivedArg(I.getOperand(0))) {
-      // We can actually predict the result of comparisons between an
-      // alloca-derived value and null. Note that this fires regardless of
-      // SROA firing.
-      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
-      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
-                                        : ConstantInt::getFalse(I.getType());
-      return true;
-    }
-
-  // Finally check for SROA candidates in comparisons.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
-    if (isa<ConstantPointerNull>(I.getOperand(1))) {
-      accumulateSROACost(CostIt, InlineConstants::InstrCost);
-      return true;
-    }
-
-    disableSROA(CostIt);
-  }
-
-  return false;
-}
-
-bool CallAnalyzer::visitSub(BinaryOperator &I) {
-  // Try to handle a special case: we can fold computing the difference of two
-  // constant-related pointers.
-  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-  Value *LHSBase, *RHSBase;
-  APInt LHSOffset, RHSOffset;
-  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
-  if (LHSBase) {
-    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
-    if (RHSBase && LHSBase == RHSBase) {
-      // We have common bases, fold the subtract to a constant based on the
-      // offsets.
-      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
-      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
-      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
-        SimplifiedValues[&I] = C;
-        ++NumConstantPtrDiffs;
-        return true;
-      }
-    }
-  }
-
-  // Otherwise, fall back to the generic logic for simplifying and handling
-  // instructions.
-  return Base::visitSub(I);
-}
-
-bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
-  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
-  if (!isa<Constant>(LHS))
-    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
-      LHS = SimpleLHS;
-  if (!isa<Constant>(RHS))
-    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
-      RHS = SimpleRHS;
-  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
-  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
-    SimplifiedValues[&I] = C;
-    return true;
-  }
-
-  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
-  disableSROA(LHS);
-  disableSROA(RHS);
-
-  return false;
-}
-
-bool CallAnalyzer::visitLoad(LoadInst &I) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
-    if (I.isSimple()) {
-      accumulateSROACost(CostIt, InlineConstants::InstrCost);
-      return true;
-    }
-
-    disableSROA(CostIt);
-  }
-
-  return false;
-}
-
-bool CallAnalyzer::visitStore(StoreInst &I) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
-    if (I.isSimple()) {
-      accumulateSROACost(CostIt, InlineConstants::InstrCost);
-      return true;
-    }
-
-    disableSROA(CostIt);
-  }
-
-  return false;
-}
-
-bool CallAnalyzer::visitCallSite(CallSite CS) {
-  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
-      !F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice)) {
-    // This aborts the entire analysis.
-    ExposesReturnsTwice = true;
-    return false;
-  }
-
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
-    switch (II->getIntrinsicID()) {
-    default:
-      return Base::visitCallSite(CS);
-
-    case Intrinsic::memset:
-    case Intrinsic::memcpy:
-    case Intrinsic::memmove:
-      // SROA can usually chew through these intrinsics, but they aren't free.
-      return false;
-    }
-  }
-
-  if (Function *F = CS.getCalledFunction()) {
-    if (F == CS.getInstruction()->getParent()->getParent()) {
-      // This flag will fully abort the analysis, so don't bother with anything
-      // else.
-      IsRecursiveCall = true;
-      return false;
-    }
-
-    if (!callIsSmall(CS)) {
-      // We account for the average 1 instruction per call argument setup
-      // here.
-      Cost += CS.arg_size() * InlineConstants::InstrCost;
-
-      // Everything other than inline ASM will also have a significant cost
-      // merely from making the call.
-      if (!isa<InlineAsm>(CS.getCalledValue()))
-        Cost += InlineConstants::CallPenalty;
-    }
-
-    return Base::visitCallSite(CS);
-  }
-
-  // Otherwise we're in a very special case -- an indirect function call. See
-  // if we can be particularly clever about this.
-  Value *Callee = CS.getCalledValue();
-
-  // First, pay the price of the argument setup. We account for the average
-  // 1 instruction per call argument setup here.
-  Cost += CS.arg_size() * InlineConstants::InstrCost;
-
-  // Next, check if this happens to be an indirect function call to a known
-  // function in this inline context. If not, we've done all we can.
-  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
-  if (!F)
-    return Base::visitCallSite(CS);
-
-  // If we have a constant that we are calling as a function, we can peer
-  // through it and see the function target. This happens not infrequently
-  // during devirtualization and so we want to give it a hefty bonus for
-  // inlining, but cap that bonus in the event that inlining wouldn't pan
-  // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold);
-  if (CA.analyzeCall(CS)) {
-    // We were able to inline the indirect call! Subtract the cost from the
-    // bonus we want to apply, but don't go below zero.
-    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
-  }
-
-  return Base::visitCallSite(CS);
-}
-
-bool CallAnalyzer::visitInstruction(Instruction &I) {
-  // Some instructions are free. All of the free intrinsics can also be
-  // handled by SROA, etc.
-  if (isInstructionFree(&I, TD))
-    return true;
-
-  // We found something we don't understand or can't handle. Mark any SROA-able
-  // values in the operand list as no longer viable.
-  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
-    disableSROA(*OI);
-
-  return false;
-}
-
-
-/// \brief Analyze a basic block for its contribution to the inline cost.
-///
-/// This method walks the analyzer over every instruction in the given basic
-/// block and accounts for their cost during inlining at this callsite. It
-/// aborts early if the threshold has been exceeded or an impossible to inline
-/// construct has been detected. It returns false if inlining is no longer
-/// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
-  for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
-       I != E; ++I) {
-    ++NumInstructions;
-    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
-      ++NumVectorInstructions;
-
-    // If the instruction simplified to a constant, there is no cost to this
-    // instruction. Visit the instructions using our InstVisitor to account for
-    // all of the per-instruction logic. The visit tree returns true if we
-    // consumed the instruction in any way, and false if the instruction's base
-    // cost should count against inlining.
-    if (Base::visit(I))
-      ++NumInstructionsSimplified;
-    else
-      Cost += InlineConstants::InstrCost;
-
-    // If the visit this instruction detected an uninlinable pattern, abort.
-    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
-      return false;
-
-    // If the caller is a recursive function then we don't want to inline
-    // functions which allocate a lot of stack space because it would increase
-    // the caller stack usage dramatically.
-    if (IsCallerRecursive &&
-        AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
-      return false;
-
-    if (NumVectorInstructions > NumInstructions/2)
-      VectorBonus = FiftyPercentVectorBonus;
-    else if (NumVectorInstructions > NumInstructions/10)
-      VectorBonus = TenPercentVectorBonus;
-    else
-      VectorBonus = 0;
-
-    // Check if we've past the threshold so we don't spin in huge basic
-    // blocks that will never inline.
-    if (!AlwaysInline && Cost > (Threshold + VectorBonus))
-      return false;
-  }
-
-  return true;
-}
-
-/// \brief Compute the base pointer and cumulative constant offsets for V.
-///
-/// This strips all constant offsets off of V, leaving it the base pointer, and
-/// accumulates the total constant offset applied in the returned constant. It
-/// returns 0 if V is not a pointer, and returns the constant '0' if there are
-/// no constant offsets applied.
-ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
-  if (!TD || !V->getType()->isPointerTy())
-    return 0;
-
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
-  APInt Offset = APInt::getNullValue(IntPtrWidth);
-
-  // Even though we don't look through PHI nodes, we could be called on an
-  // instruction in an unreachable block, which may be on a cycle.
-  SmallPtrSet<Value *, 4> Visited;
-  Visited.insert(V);
-  do {
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
-        return 0;
-      V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
-      V = cast<Operator>(V)->getOperand(0);
-    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->mayBeOverridden())
-        break;
-      V = GA->getAliasee();
-    } else {
-      break;
-    }
-    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
-  } while (Visited.insert(V));
-
-  Type *IntPtrTy = TD->getIntPtrType(V->getContext());
-  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
-}
-
-/// \brief Analyze a call site for potential inlining.
-///
-/// Returns true if inlining this call is viable, and false if it is not
-/// viable. It computes the cost and adjusts the threshold based on numerous
-/// factors and heuristics. If this method returns false but the computed cost
-/// is below the computed threshold, then inlining was forcibly disabled by
-/// some artifact of the rountine.
-bool CallAnalyzer::analyzeCall(CallSite CS) {
-  ++NumCallsAnalyzed;
-
-  // Track whether the post-inlining function would have more than one basic
-  // block. A single basic block is often intended for inlining. Balloon the
-  // threshold by 50% until we pass the single-BB phase.
-  bool SingleBB = true;
-  int SingleBBBonus = Threshold / 2;
-  Threshold += SingleBBBonus;
-
-  // Unless we are always-inlining, perform some tweaks to the cost and
-  // threshold based on the direct callsite information.
-  if (!AlwaysInline) {
-    // We want to more aggressively inline vector-dense kernels, so up the
-    // threshold, and we'll lower it if the % of vector instructions gets too
-    // low.
-    assert(NumInstructions == 0);
-    assert(NumVectorInstructions == 0);
-    FiftyPercentVectorBonus = Threshold;
-    TenPercentVectorBonus = Threshold / 2;
-
-    // Give out bonuses per argument, as the instructions setting them up will
-    // be gone after inlining.
-    for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-      if (TD && CS.isByValArgument(I)) {
-        // We approximate the number of loads and stores needed by dividing the
-        // size of the byval type by the target's pointer size.
-        PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
-        unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
-        unsigned PointerSize = TD->getPointerSizeInBits();
-        // Ceiling division.
-        unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
-
-        // If it generates more than 8 stores it is likely to be expanded as an
-        // inline memcpy so we take that as an upper bound. Otherwise we assume
-        // one load and one store per word copied.
-        // FIXME: The maxStoresPerMemcpy setting from the target should be used
-        // here instead of a magic number of 8, but it's not available via
-        // DataLayout.
-        NumStores = std::min(NumStores, 8U);
-
-        Cost -= 2 * NumStores * InlineConstants::InstrCost;
-      } else {
-        // For non-byval arguments subtract off one instruction per call
-        // argument.
-        Cost -= InlineConstants::InstrCost;
-      }
-    }
-
-    // If there is only one call of the function, and it has internal linkage,
-    // the cost of inlining it drops dramatically.
-    if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
-      Cost += InlineConstants::LastCallToStaticBonus;
-
-    // If the instruction after the call, or if the normal destination of the
-    // invoke is an unreachable instruction, the function is noreturn. As such,
-    // there is little point in inlining this unless there is literally zero
-    // cost.
-    Instruction *Instr = CS.getInstruction();
-    if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
-      if (isa<UnreachableInst>(II->getNormalDest()->begin()))
-        Threshold = 1;
-    } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
-      Threshold = 1;
-
-    // If this function uses the coldcc calling convention, prefer not to inline
-    // it.
-    if (F.getCallingConv() == CallingConv::Cold)
-      Cost += InlineConstants::ColdccPenalty;
-
-    // Check if we're done. This can happen due to bonuses and penalties.
-    if (Cost > Threshold)
-      return false;
-  }
-
-  if (F.empty())
-    return true;
-
-  Function *Caller = CS.getInstruction()->getParent()->getParent();
-  // Check if the caller function is recursive itself.
-  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
-       U != E; ++U) {
-    CallSite Site(cast<Value>(*U));
-    if (!Site)
-      continue;
-    Instruction *I = Site.getInstruction();
-    if (I->getParent()->getParent() == Caller) {
-      IsCallerRecursive = true;
-      break;
-    }
-  }
-
-  // Track whether we've seen a return instruction. The first return
-  // instruction is free, as at least one will usually disappear in inlining.
-  bool HasReturn = false;
-
-  // Populate our simplified values by mapping from function arguments to call
-  // arguments with known important simplifications.
-  CallSite::arg_iterator CAI = CS.arg_begin();
-  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
-       FAI != FAE; ++FAI, ++CAI) {
-    assert(CAI != CS.arg_end());
-    if (Constant *C = dyn_cast<Constant>(CAI))
-      SimplifiedValues[FAI] = C;
-
-    Value *PtrArg = *CAI;
-    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
-      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
-
-      // We can SROA any pointer arguments derived from alloca instructions.
-      if (isa<AllocaInst>(PtrArg)) {
-        SROAArgValues[FAI] = PtrArg;
-        SROAArgCosts[PtrArg] = 0;
-      }
-    }
-  }
-  NumConstantArgs = SimplifiedValues.size();
-  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
-  NumAllocaArgs = SROAArgValues.size();
-
-  // The worklist of live basic blocks in the callee *after* inlining. We avoid
-  // adding basic blocks of the callee which can be proven to be dead for this
-  // particular call site in order to get more accurate cost estimates. This
-  // requires a somewhat heavyweight iteration pattern: we need to walk the
-  // basic blocks in a breadth-first order as we insert live successors. To
-  // accomplish this, prioritizing for small iterations because we exit after
-  // crossing our threshold, we use a small-size optimized SetVector.
-  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
-                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
-  BBSetVector BBWorklist;
-  BBWorklist.insert(&F.getEntryBlock());
-  // Note that we *must not* cache the size, this loop grows the worklist.
-  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
-    // Bail out the moment we cross the threshold. This means we'll under-count
-    // the cost, but only when undercounting doesn't matter.
-    if (!AlwaysInline && Cost > (Threshold + VectorBonus))
-      break;
-
-    BasicBlock *BB = BBWorklist[Idx];
-    if (BB->empty())
-      continue;
-
-    // Handle the terminator cost here where we can track returns and other
-    // function-wide constructs.
-    TerminatorInst *TI = BB->getTerminator();
-
-    // We never want to inline functions that contain an indirectbr.  This is
-    // incorrect because all the blockaddress's (in static global initializers
-    // for example) would be referring to the original function, and this
-    // indirect jump would jump from the inlined copy of the function into the 
-    // original function which is extremely undefined behavior.
-    // FIXME: This logic isn't really right; we can safely inline functions
-    // with indirectbr's as long as no other function or global references the
-    // blockaddress of a block within the current function.  And as a QOI issue,
-    // if someone is using a blockaddress without an indirectbr, and that
-    // reference somehow ends up in another function or global, we probably
-    // don't want to inline this function.
-    if (isa<IndirectBrInst>(TI))
-      return false;
-
-    if (!HasReturn && isa<ReturnInst>(TI))
-      HasReturn = true;
-    else
-      Cost += InlineConstants::InstrCost;
-
-    // Analyze the cost of this block. If we blow through the threshold, this
-    // returns false, and we can bail on out.
-    if (!analyzeBlock(BB)) {
-      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
-        return false;
-
-      // If the caller is a recursive function then we don't want to inline
-      // functions which allocate a lot of stack space because it would increase
-      // the caller stack usage dramatically.
-      if (IsCallerRecursive &&
-          AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
-        return false;
-
-      break;
-    }
-
-    // Add in the live successors by first checking whether we have terminator
-    // that may be simplified based on the values simplified by this call.
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-      if (BI->isConditional()) {
-        Value *Cond = BI->getCondition();
-        if (ConstantInt *SimpleCond
-              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
-          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
-          continue;
-        }
-      }
-    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      Value *Cond = SI->getCondition();
-      if (ConstantInt *SimpleCond
-            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
-        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
-        continue;
-      }
-    }
-
-    // If we're unable to select a particular successor, just count all of
-    // them.
-    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
-         ++TIdx)
-      BBWorklist.insert(TI->getSuccessor(TIdx));
-
-    // If we had any successors at this point, than post-inlining is likely to
-    // have them as well. Note that we assume any basic blocks which existed
-    // due to branches or switches which folded above will also fold after
-    // inlining.
-    if (SingleBB && TI->getNumSuccessors() > 1) {
-      // Take off the bonus we applied to the threshold.
-      Threshold -= SingleBBBonus;
-      SingleBB = false;
-    }
-  }
-
-  Threshold += VectorBonus;
-
-  return AlwaysInline || Cost < Threshold;
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-/// \brief Dump stats about this call's analysis.
-void CallAnalyzer::dump() {
-#define DEBUG_PRINT_STAT(x) llvm::dbgs() << "      " #x ": " << x << "\n"
-  DEBUG_PRINT_STAT(NumConstantArgs);
-  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
-  DEBUG_PRINT_STAT(NumAllocaArgs);
-  DEBUG_PRINT_STAT(NumConstantPtrCmps);
-  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
-  DEBUG_PRINT_STAT(NumInstructionsSimplified);
-  DEBUG_PRINT_STAT(SROACostSavings);
-  DEBUG_PRINT_STAT(SROACostSavingsLost);
-#undef DEBUG_PRINT_STAT
-}
-#endif
-
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) {
-  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
-}
-
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
-                                             int Threshold) {
-  // Don't inline functions which can be redefined at link-time to mean
-  // something else.  Don't inline functions marked noinline or call sites
-  // marked noinline.
-  if (!Callee || Callee->mayBeOverridden() ||
-      Callee->getFnAttributes().hasAttribute(Attributes::NoInline) ||
-      CS.isNoInline())
-    return llvm::InlineCost::getNever();
-
-  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
-        << "...\n");
-
-  CallAnalyzer CA(TD, *Callee, Threshold);
-  bool ShouldInline = CA.analyzeCall(CS);
-
-  DEBUG(CA.dump());
-
-  // Check if there was a reason to force inlining or no inlining.
-  if (!ShouldInline && CA.getCost() < CA.getThreshold())
-    return InlineCost::getNever();
-  if (ShouldInline && (CA.isAlwaysInline() ||
-                       CA.getCost() >= CA.getThreshold()))
-    return InlineCost::getAlways();
-
-  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
-}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 3b385d26ba3c..75a49eb90a88 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -13,13 +13,13 @@
 
 #define DEBUG_TYPE "instcount"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Pass.h"
-#include "llvm/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
@@ -30,7 +30,7 @@ STATISTIC(TotalMemInst, "Number of memory instructions");
 #define HANDLE_INST(N, OPCODE, CLASS) \
   STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
 
-#include "llvm/Instruction.def"
+#include "llvm/IR/Instruction.def"
 
 
 namespace {
@@ -43,7 +43,7 @@ namespace {
 #define HANDLE_INST(N, OPCODE, CLASS) \
     void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
 
-#include "llvm/Instruction.def"
+#include "llvm/IR/Instruction.def"
 
     void visitInstruction(Instruction &I) {
       errs() << "Instruction Count does not know about " << I;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index a76e5ad1b8f8..4a3c74e9db35 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,20 +18,20 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Operator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -657,51 +657,26 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                            RecursionLimit);
 }
 
-/// \brief Accumulate the constant integer offset a GEP represents.
-///
-/// Given a getelementptr instruction/constantexpr, accumulate the constant
-/// offset from the base pointer into the provided APInt 'Offset'. Returns true
-/// if the GEP has all-constant indices. Returns false if any non-constant
-/// index is encountered leaving the 'Offset' in an undefined state. The
-/// 'Offset' APInt must be the bitwidth of the target's pointer size.
-static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
-                                APInt &Offset) {
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
-  assert(IntPtrWidth == Offset.getBitWidth());
-
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
-       ++I, ++GTI) {
-    ConstantInt *OpC = dyn_cast<ConstantInt>(*I);
-    if (!OpC) return false;
-    if (OpC->isZero()) continue;
-
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = TD.getStructLayout(STy);
-      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
-      continue;
-    }
-
-    APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType()));
-    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
-  }
-  return true;
-}
-
 /// \brief Compute the base pointer and cumulative constant offsets for V.
 ///
 /// This strips all constant offsets off of V, leaving it the base pointer, and
 /// accumulates the total constant offset applied in the returned constant. It
 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
 /// no constant offsets applied.
-static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
+///
+/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
+/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
+/// folding.
+static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
                                                 Value *&V) {
-  if (!V->getType()->isPointerTy())
-    return 0;
+  assert(V->getType()->getScalarType()->isPointerTy());
+
+  // Without DataLayout, just be conservative for now. Theoretically, more could
+  // be done in this case.
+  if (!TD)
+    return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
 
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned IntPtrWidth = TD->getPointerSizeInBits();
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -710,7 +685,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
   Visited.insert(V);
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset))
+      if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset))
         break;
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -722,23 +697,24 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
     } else {
       break;
     }
-    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+    assert(V->getType()->getScalarType()->isPointerTy() &&
+           "Unexpected operand type!");
   } while (Visited.insert(V));
 
-  Type *IntPtrTy = TD.getIntPtrType(V->getContext());
-  return ConstantInt::get(IntPtrTy, Offset);
+  Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+  Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
+  if (V->getType()->isVectorTy())
+    return ConstantVector::getSplat(V->getType()->getVectorNumElements(),
+                                    OffsetIntPtr);
+  return OffsetIntPtr;
 }
 
 /// \brief Compute the constant difference between two pointer values.
 /// If the difference is not a constant, returns zero.
-static Constant *computePointerDifference(const DataLayout &TD,
+static Constant *computePointerDifference(const DataLayout *TD,
                                           Value *LHS, Value *RHS) {
   Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
-  if (!LHSOffset)
-    return 0;
   Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
-  if (!RHSOffset)
-    return 0;
 
   // If LHS and RHS are not related via constant offsets to the same base
   // value, there is nothing we can do here.
@@ -852,9 +828,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
           return W;
 
   // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
-  if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) &&
+  if (match(Op0, m_PtrToInt(m_Value(X))) &&
       match(Op1, m_PtrToInt(m_Value(Y))))
-    if (Constant *Result = computePointerDifference(*Q.TD, X, Y))
+    if (Constant *Result = computePointerDifference(Q.TD, X, Y))
       return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
 
   // Mul distributes over Sub.  Try some generic simplifications based on this.
@@ -886,6 +862,112 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                            RecursionLimit);
 }
 
+/// Given operands for an FAdd, see if we can fold the result.  If not, this
+/// returns null.
+static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(),
+                                      Ops, Q.TD, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // fadd X, -0 ==> X
+  if (match(Op1, m_NegZero()))
+    return Op0;
+
+  // fadd X, 0 ==> X, when we know X is not -0
+  if (match(Op1, m_Zero()) &&
+      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+    return Op0;
+
+  // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+  //   where nnan and ninf have to occur at least once somewhere in this
+  //   expression
+  Value *SubOp = 0;
+  if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
+    SubOp = Op1;
+  else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
+    SubOp = Op0;
+  if (SubOp) {
+    Instruction *FSub = cast<Instruction>(SubOp);
+    if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
+        (FMF.noInfs() || FSub->hasNoInfs()))
+      return Constant::getNullValue(Op0->getType());
+  }
+
+  return 0;
+}
+
+/// Given operands for an FSub, see if we can fold the result.  If not, this
+/// returns null.
+static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(),
+                                      Ops, Q.TD, Q.TLI);
+    }
+  }
+
+  // fsub X, 0 ==> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // fsub X, -0 ==> X, when we know X is not -0
+  if (match(Op1, m_NegZero()) &&
+      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+    return Op0;
+
+  // fsub 0, (fsub -0.0, X) ==> X
+  Value *X;
+  if (match(Op0, m_AnyZero())) {
+    if (match(Op1, m_FSub(m_NegZero(), m_Value(X))))
+      return X;
+    if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
+      return X;
+  }
+
+  // fsub nnan ninf x, x ==> 0.0
+  if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  return 0;
+}
+
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
+                               FastMathFlags FMF,
+                               const Query &Q,
+                               unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(),
+                                      Ops, Q.TD, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+ }
+
+ // fmul X, 1.0 ==> X
+ if (match(Op1, m_FPOne()))
+   return Op0;
+
+ // fmul nnan nsz X, 0 ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+   return Op1;
+
+ return 0;
+}
+
 /// SimplifyMulInst - Given operands for a Mul, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
@@ -951,6 +1033,26 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
+Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
+Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
+                              FastMathFlags FMF,
+                              const DataLayout *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
 Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
@@ -1364,9 +1466,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
   // A & (-A) = A if A is a power of two or zero.
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
-    if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true))
+    if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true))
       return Op0;
-    if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true))
+    if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true))
       return Op1;
   }
 
@@ -1591,9 +1693,48 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
   return 0;
 }
 
-static Constant *computePointerICmp(const DataLayout &TD,
+// A significant optimization not implemented here is assuming that alloca
+// addresses are not equal to incoming argument values. They don't *alias*,
+// as we say, but that doesn't mean they aren't equal, so we take a
+// conservative approach.
+//
+// This is inspired in part by C++11 5.10p1:
+//   "Two pointers of the same type compare equal if and only if they are both
+//    null, both point to the same function, or both represent the same
+//    address."
+//
+// This is pretty permissive.
+//
+// It's also partly due to C11 6.5.9p6:
+//   "Two pointers compare equal if and only if both are null pointers, both are
+//    pointers to the same object (including a pointer to an object and a
+//    subobject at its beginning) or function, both are pointers to one past the
+//    last element of the same array object, or one is a pointer to one past the
+//    end of one array object and the other is a pointer to the start of a
+//    different array object that happens to immediately follow the ﬁrst array
+//    object in the address space.)
+//
+// C11's version is more restrictive, however there's no reason why an argument
+// couldn't be a one-past-the-end value for a stack object in the caller and be
+// equal to the beginning of a stack object in the callee.
+//
+// If the C and C++ standards are ever made sufficiently restrictive in this
+// area, it may be possible to update LLVM's semantics accordingly and reinstate
+// this optimization.
+static Constant *computePointerICmp(const DataLayout *TD,
+                                    const TargetLibraryInfo *TLI,
                                     CmpInst::Predicate Pred,
                                     Value *LHS, Value *RHS) {
+  // First, skip past any trivial no-ops.
+  LHS = LHS->stripPointerCasts();
+  RHS = RHS->stripPointerCasts();
+
+  // A non-null pointer is not equal to a null pointer.
+  if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+      (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
+    return ConstantInt::get(GetCompareTy(LHS),
+                            !CmpInst::isTrueWhenEqual(Pred));
+
   // We can only fold certain predicates on pointer comparisons.
   switch (Pred) {
   default:
@@ -1616,19 +1757,83 @@ static Constant *computePointerICmp(const DataLayout &TD,
     break;
   }
 
+  // Strip off any constant offsets so that we can reason about them.
+  // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets
+  // here and compare base addresses like AliasAnalysis does, however there are
+  // numerous hazards. AliasAnalysis and its utilities rely on special rules
+  // governing loads and stores which don't apply to icmps. Also, AliasAnalysis
+  // doesn't need to guarantee pointer inequality when it says NoAlias.
   Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
-  if (!LHSOffset)
-    return 0;
   Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
-  if (!RHSOffset)
-    return 0;
 
-  // If LHS and RHS are not related via constant offsets to the same base
-  // value, there is nothing we can do here.
-  if (LHS != RHS)
-    return 0;
+  // If LHS and RHS are related via constant offsets to the same base
+  // value, we can replace it with an icmp which just compares the offsets.
+  if (LHS == RHS)
+    return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+
+  // Various optimizations for (in)equality comparisons.
+  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+    // Different non-empty allocations that exist at the same time have
+    // different addresses (if the program can tell). Global variables always
+    // exist, so they always exist during the lifetime of each other and all
+    // allocas. Two different allocas usually have different addresses...
+    //
+    // However, if there's an @llvm.stackrestore dynamically in between two
+    // allocas, they may have the same address. It's tempting to reduce the
+    // scope of the problem by only looking at *static* allocas here. That would
+    // cover the majority of allocas while significantly reducing the likelihood
+    // of having an @llvm.stackrestore pop up in the middle. However, it's not
+    // actually impossible for an @llvm.stackrestore to pop up in the middle of
+    // an entry block. Also, if we have a block that's not attached to a
+    // function, we can't tell if it's "static" under the current definition.
+    // Theoretically, this problem could be fixed by creating a new kind of
+    // instruction kind specifically for static allocas. Such a new instruction
+    // could be required to be at the top of the entry block, thus preventing it
+    // from being subject to a @llvm.stackrestore. Instcombine could even
+    // convert regular allocas into these special allocas. It'd be nifty.
+    // However, until then, this problem remains open.
+    //
+    // So, we'll assume that two non-empty allocas have different addresses
+    // for now.
+    //
+    // With all that, if the offsets are within the bounds of their allocations
+    // (and not one-past-the-end! so we can't use inbounds!), and their
+    // allocations aren't the same, the pointers are not equal.
+    //
+    // Note that it's not necessary to check for LHS being a global variable
+    // address, due to canonicalization and constant folding.
+    if (isa<AllocaInst>(LHS) &&
+        (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) {
+      ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
+      ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
+      uint64_t LHSSize, RHSSize;
+      if (LHSOffsetCI && RHSOffsetCI &&
+          getObjectSize(LHS, LHSSize, TD, TLI) &&
+          getObjectSize(RHS, RHSSize, TD, TLI)) {
+        const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
+        const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
+        if (!LHSOffsetValue.isNegative() &&
+            !RHSOffsetValue.isNegative() &&
+            LHSOffsetValue.ult(LHSSize) &&
+            RHSOffsetValue.ult(RHSSize)) {
+          return ConstantInt::get(GetCompareTy(LHS),
+                                  !CmpInst::isTrueWhenEqual(Pred));
+        }
+      }
 
-  return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+      // Repeat the above check but this time without depending on DataLayout
+      // or being able to compute a precise size.
+      if (!cast<PointerType>(LHS->getType())->isEmptyTy() &&
+          !cast<PointerType>(RHS->getType())->isEmptyTy() &&
+          LHSOffset->isNullValue() &&
+          RHSOffset->isNullValue())
+        return ConstantInt::get(GetCompareTy(LHS),
+                                !CmpInst::isTrueWhenEqual(Pred));
+    }
+  }
+
+  // Otherwise, fail.
+  return 0;
 }
 
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
@@ -1693,62 +1898,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
-  // icmp <object*>, <object*/null> - Different identified objects have
-  // different addresses (unless null), and what's more the address of an
-  // identified local is never equal to another argument (again, barring null).
-  // Note that generalizing to the case where LHS is a global variable address
-  // or null is pointless, since if both LHS and RHS are constants then we
-  // already constant folded the compare, and if only one of them is then we
-  // moved it to RHS already.
-  Value *LHSPtr = LHS->stripPointerCasts();
-  Value *RHSPtr = RHS->stripPointerCasts();
-  if (LHSPtr == RHSPtr)
-    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
-  // Be more aggressive about stripping pointer adjustments when checking a
-  // comparison of an alloca address to another object.  We can rip off all
-  // inbounds GEP operations, even if they are variable.
-  LHSPtr = LHSPtr->stripInBoundsOffsets();
-  if (llvm::isIdentifiedObject(LHSPtr)) {
-    RHSPtr = RHSPtr->stripInBoundsOffsets();
-    if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) {
-      // If both sides are different identified objects, they aren't equal
-      // unless they're null.
-      if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) &&
-          Pred == CmpInst::ICMP_EQ)
-        return ConstantInt::get(ITy, false);
-
-      // A local identified object (alloca or noalias call) can't equal any
-      // incoming argument, unless they're both null or they belong to
-      // different functions. The latter happens during inlining.
-      if (Instruction *LHSInst = dyn_cast<Instruction>(LHSPtr))
-        if (Argument *RHSArg = dyn_cast<Argument>(RHSPtr))
-          if (LHSInst->getParent()->getParent() == RHSArg->getParent() &&
-              Pred == CmpInst::ICMP_EQ)
-            return ConstantInt::get(ITy, false);
-    }
-
-    // Assume that the constant null is on the right.
-    if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) {
-      if (Pred == CmpInst::ICMP_EQ)
-        return ConstantInt::get(ITy, false);
-      else if (Pred == CmpInst::ICMP_NE)
-        return ConstantInt::get(ITy, true);
-    }
-  } else if (Argument *LHSArg = dyn_cast<Argument>(LHSPtr)) {
-    RHSPtr = RHSPtr->stripInBoundsOffsets();
-    // An alloca can't be equal to an argument unless they come from separate
-    // functions via inlining.
-    if (AllocaInst *RHSInst = dyn_cast<AllocaInst>(RHSPtr)) {
-      if (LHSArg->getParent() == RHSInst->getParent()->getParent()) {
-        if (Pred == CmpInst::ICMP_EQ)
-          return ConstantInt::get(ITy, false);
-        else if (Pred == CmpInst::ICMP_NE)
-          return ConstantInt::get(ITy, true);
-      }
-    }
-  }
-
   // If we are comparing with zero then try hard since this is a common case.
   if (match(RHS, m_Zero())) {
     bool LHSKnownNonNegative, LHSKnownNegative;
@@ -2375,8 +2524,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
-  if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy())
-    if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS))
+  if (LHS->getType()->isPointerTy())
+    if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS))
       return C;
 
   if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
@@ -2697,10 +2846,18 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   case Instruction::Add:
     return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
                            Q, MaxRecurse);
+  case Instruction::FAdd:
+    return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+
   case Instruction::Sub:
     return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
                            Q, MaxRecurse);
+  case Instruction::FSub:
+    return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+
   case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::FMul:
+    return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
   case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
@@ -2768,14 +2925,88 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                            RecursionLimit);
 }
 
-static Value *SimplifyCallInst(CallInst *CI, const Query &) {
-  // call undef -> undef
-  if (isa<UndefValue>(CI->getCalledValue()))
-    return UndefValue::get(CI->getType());
+static bool IsIdempotent(Intrinsic::ID ID) {
+  switch (ID) {
+  default: return false;
+
+  // Unary idempotent: f(f(x)) = f(x)
+  case Intrinsic::fabs:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+    return true;
+  }
+}
+
+template <typename IterTy>
+static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd,
+                                const Query &Q, unsigned MaxRecurse) {
+  // Perform idempotent optimizations
+  if (!IsIdempotent(IID))
+    return 0;
+
+  // Unary Ops
+  if (std::distance(ArgBegin, ArgEnd) == 1)
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+      if (II->getIntrinsicID() == IID)
+        return II;
 
   return 0;
 }
 
+template <typename IterTy>
+static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
+                           const Query &Q, unsigned MaxRecurse) {
+  Type *Ty = V->getType();
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    Ty = PTy->getElementType();
+  FunctionType *FTy = cast<FunctionType>(Ty);
+
+  // call undef -> undef
+  if (isa<UndefValue>(V))
+    return UndefValue::get(FTy->getReturnType());
+
+  Function *F = dyn_cast<Function>(V);
+  if (!F)
+    return 0;
+
+  if (unsigned IID = F->getIntrinsicID())
+    if (Value *Ret =
+        SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse))
+      return Ret;
+
+  if (!canConstantFoldCallTo(F))
+    return 0;
+
+  SmallVector<Constant *, 4> ConstantArgs;
+  ConstantArgs.reserve(ArgEnd - ArgBegin);
+  for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
+    Constant *C = dyn_cast<Constant>(*I);
+    if (!C)
+      return 0;
+    ConstantArgs.push_back(C);
+  }
+
+  return ConstantFoldCall(F, ConstantArgs, Q.TLI);
+}
+
+Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
+                          User::op_iterator ArgEnd, const DataLayout *TD,
+                          const TargetLibraryInfo *TLI,
+                          const DominatorTree *DT) {
+  return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT),
+                        RecursionLimit);
+}
+
+Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
+                          const DataLayout *TD, const TargetLibraryInfo *TLI,
+                          const DominatorTree *DT) {
+  return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT),
+                        RecursionLimit);
+}
+
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
 Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
@@ -2787,18 +3018,30 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
   default:
     Result = ConstantFoldInstruction(I, TD, TLI);
     break;
+  case Instruction::FAdd:
+    Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), TD, TLI, DT);
+    break;
   case Instruction::Add:
     Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
                              TD, TLI, DT);
     break;
+  case Instruction::FSub:
+    Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), TD, TLI, DT);
+    break;
   case Instruction::Sub:
     Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
                              TD, TLI, DT);
     break;
+  case Instruction::FMul:
+    Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), TD, TLI, DT);
+    break;
   case Instruction::Mul:
     Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
@@ -2872,9 +3115,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
   case Instruction::PHI:
     Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
     break;
-  case Instruction::Call:
-    Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT));
+  case Instruction::Call: {
+    CallSite CS(cast<CallInst>(I));
+    Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
+                          TD, TLI, DT);
     break;
+  }
   case Instruction::Trunc:
     Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
     break;
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index ca9cdcaf2464..26a0322407ec 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Interval.h"
-#include "llvm/BasicBlock.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 2b87d80d3732..66b5e852c02f 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -13,23 +13,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lazy-value-info"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <map>
 #include <stack>
 using namespace llvm;
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index efb722bb97c4..fefa51660f92 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LibCallAliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Function.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 using namespace llvm;
   
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 81b0f46f3740..0592ccb26c12 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -15,7 +15,7 @@
 
 #include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 using namespace llvm;
 
 /// getMap - This impl pointer in ~LibCallInfo is actually a StringMap.  This
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 6d6d580ed19a..9393508a9e67 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -34,26 +34,26 @@
 // 
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 namespace {
@@ -412,51 +412,49 @@ void Lint::visitMemoryReference(Instruction &I,
   }
 
   // Check for buffer overflows and misalignment.
-  if (TD) {
-    // Only handles memory references that read/write something simple like an
-    // alloca instruction or a global variable.
-    int64_t Offset = 0;
-    if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) {
-      // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
-      // something we can handle and if so extract the size of this base object
-      // along with its alignment.
-      uint64_t BaseSize = AliasAnalysis::UnknownSize;
-      unsigned BaseAlign = 0;
-
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
-        Type *ATy = AI->getAllocatedType();
-        if (!AI->isArrayAllocation() && ATy->isSized())
-          BaseSize = TD->getTypeAllocSize(ATy);
-        BaseAlign = AI->getAlignment();
-        if (BaseAlign == 0 && ATy->isSized())
-          BaseAlign = TD->getABITypeAlignment(ATy);
-      } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
-        // If the global may be defined differently in another compilation unit
-        // then don't warn about funky memory accesses.
-        if (GV->hasDefinitiveInitializer()) {
-          Type *GTy = GV->getType()->getElementType();
-          if (GTy->isSized())
-            BaseSize = TD->getTypeAllocSize(GTy);
-          BaseAlign = GV->getAlignment();
-          if (BaseAlign == 0 && GTy->isSized())
-            BaseAlign = TD->getABITypeAlignment(GTy);
-        }
+  // Only handles memory references that read/write something simple like an
+  // alloca instruction or a global variable.
+  int64_t Offset = 0;
+  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) {
+    // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
+    // something we can handle and if so extract the size of this base object
+    // along with its alignment.
+    uint64_t BaseSize = AliasAnalysis::UnknownSize;
+    unsigned BaseAlign = 0;
+
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+      Type *ATy = AI->getAllocatedType();
+      if (TD && !AI->isArrayAllocation() && ATy->isSized())
+        BaseSize = TD->getTypeAllocSize(ATy);
+      BaseAlign = AI->getAlignment();
+      if (TD && BaseAlign == 0 && ATy->isSized())
+        BaseAlign = TD->getABITypeAlignment(ATy);
+    } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+      // If the global may be defined differently in another compilation unit
+      // then don't warn about funky memory accesses.
+      if (GV->hasDefinitiveInitializer()) {
+        Type *GTy = GV->getType()->getElementType();
+        if (TD && GTy->isSized())
+          BaseSize = TD->getTypeAllocSize(GTy);
+        BaseAlign = GV->getAlignment();
+        if (TD && BaseAlign == 0 && GTy->isSized())
+          BaseAlign = TD->getABITypeAlignment(GTy);
       }
-
-      // Accesses from before the start or after the end of the object are not
-      // defined.
-      Assert1(Size == AliasAnalysis::UnknownSize ||
-              BaseSize == AliasAnalysis::UnknownSize ||
-              (Offset >= 0 && Offset + Size <= BaseSize),
-              "Undefined behavior: Buffer overflow", &I);
-
-      // Accesses that say that the memory is more aligned than it is are not
-      // defined.
-      if (Align == 0 && Ty && Ty->isSized())
-        Align = TD->getABITypeAlignment(Ty);
-      Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
-              "Undefined behavior: Memory reference address is misaligned", &I);
     }
+
+    // Accesses from before the start or after the end of the object are not
+    // defined.
+    Assert1(Size == AliasAnalysis::UnknownSize ||
+            BaseSize == AliasAnalysis::UnknownSize ||
+            (Offset >= 0 && Offset + Size <= BaseSize),
+            "Undefined behavior: Buffer overflow", &I);
+
+    // Accesses that say that the memory is more aligned than it is are not
+    // defined.
+    if (TD && Align == 0 && Ty && Ty->isSized())
+      Align = TD->getABITypeAlignment(Ty);
+    Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+            "Undefined behavior: Memory reference address is misaligned", &I);
   }
 }
 
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 73aa8b49cda5..0902a39a9f81 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -13,12 +13,13 @@
 
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/DataLayout.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
 using namespace llvm;
 
 /// AreEquivalentAddressValues - Test if A and B will obviously have the same
@@ -48,48 +49,18 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
   return false;
 }
 
-/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
-/// bitcasts to get back to the underlying object being addressed, keeping
-/// track of the offset in bytes from the GEPs relative to the result.
-/// This is closely related to GetUnderlyingObject but is located
-/// here to avoid making VMCore depend on DataLayout.
-static Value *getUnderlyingObjectWithOffset(Value *V, const DataLayout *TD,
-                                            uint64_t &ByteOffset,
-                                            unsigned MaxLookup = 6) {
-  if (!V->getType()->isPointerTy())
-    return V;
-  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->hasAllConstantIndices())
-        return V;
-      SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
-      ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
-                                         Indices);
-      V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
-      V = cast<Operator>(V)->getOperand(0);
-    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->mayBeOverridden())
-        return V;
-      V = GA->getAliasee();
-    } else {
-      return V;
-    }
-    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
-  }
-  return V;
-}
-
 /// isSafeToLoadUnconditionally - Return true if we know that executing a load
 /// from this value cannot trap.  If it is not obviously safe to load from the
 /// specified pointer, we do a quick local scan of the basic block containing
 /// ScanFrom, to determine if the address is already accessed.
 bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
                                        unsigned Align, const DataLayout *TD) {
-  uint64_t ByteOffset = 0;
+  int64_t ByteOffset = 0;
   Value *Base = V;
-  if (TD)
-    Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+  Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD);
+
+  if (ByteOffset < 0) // out of bounds
+    return false;
 
   Type *BaseType = 0;
   unsigned BaseAlign = 0;
@@ -97,10 +68,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
     // An alloca is safe to load from as load as it is suitably aligned.
     BaseType = AI->getAllocatedType();
     BaseAlign = AI->getAlignment();
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
     // Global variables are safe to load from but their size cannot be
     // guaranteed if they are overridden.
-    if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+    if (!GV->mayBeOverridden()) {
       BaseType = GV->getType()->getElementType();
       BaseAlign = GV->getAlignment();
     }
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 8341f9d83055..f1ad6506e4ba 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -15,18 +15,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -213,14 +214,75 @@ bool Loop::isLoopSimplifyForm() const {
 /// isSafeToClone - Return true if the loop body is safe to clone in practice.
 /// Routines that reform the loop CFG and split edges often fail on indirectbr.
 bool Loop::isSafeToClone() const {
-  // Return false if any loop blocks contain indirectbrs.
+  // Return false if any loop blocks contain indirectbrs, or there are any calls
+  // to noduplicate functions.
   for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
-    if (isa<IndirectBrInst>((*I)->getTerminator()))
+    if (isa<IndirectBrInst>((*I)->getTerminator())) {
+      return false;
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+      if (II->hasFnAttr(Attribute::NoDuplicate))
+        return false;
+    }
+
+    for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
+      if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
+        if (CI->hasFnAttr(Attribute::NoDuplicate))
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool Loop::isAnnotatedParallel() const {
+
+  BasicBlock *latch = getLoopLatch();
+  if (latch == NULL)
+    return false;
+
+  MDNode *desiredLoopIdMetadata =
+    latch->getTerminator()->getMetadata("llvm.loop.parallel");
+
+  if (!desiredLoopIdMetadata)
       return false;
+
+  // The loop branch contains the parallel loop metadata. In order to ensure
+  // that any parallel-loop-unaware optimization pass hasn't added loop-carried
+  // dependencies (thus converted the loop back to a sequential loop), check
+  // that all the memory instructions in the loop contain parallelism metadata
+  // that point to the same unique "loop id metadata" the loop branch does.
+  for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) {
+    for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end();
+         II != EE; II++) {
+
+      if (!II->mayReadOrWriteMemory())
+        continue;
+
+      if (!II->getMetadata("llvm.mem.parallel_loop_access"))
+        return false;
+
+      // The memory instruction can refer to the loop identifier metadata
+      // directly or indirectly through another list metadata (in case of
+      // nested parallel loops). The loop identifier metadata refers to
+      // itself so we can check both cases with the same routine.
+      MDNode *loopIdMD =
+          dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access"));
+      bool loopIdMDFound = false;
+      for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
+        if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
+          loopIdMDFound = true;
+          break;
+        }
+      }
+
+      if (!loopIdMDFound)
+        return false;
+    }
   }
   return true;
 }
 
+
 /// hasDedicatedExits - Return true if no exit block for the loop
 /// has a predecessor that is outside the loop.
 bool Loop::hasDedicatedExits() const {
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 8578a63bee1f..d26aaf1b9048 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -10,15 +10,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0a539fe75825..d490d5419f75 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -8,24 +8,24 @@
 //===----------------------------------------------------------------------===//
 //
 // This family of functions identifies calls to builtin functions that allocate
-// or free memory.  
+// or free memory.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "memory-builtins"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -88,6 +88,10 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
 static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
                                            const TargetLibraryInfo *TLI,
                                            bool LookThroughBitCast = false) {
+  // Skip intrinsics
+  if (isa<IntrinsicInst>(V))
+    return 0;
+
   Function *Callee = getCalledFunction(V, LookThroughBitCast);
   if (!Callee)
     return 0;
@@ -132,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
 
 static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
   ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
-  return CS && CS.hasFnAttr(Attributes::NoAlias);
+  return CS && CS.hasFnAttr(Attribute::NoAlias);
 }
 
 
@@ -194,12 +198,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
                                const TargetLibraryInfo *TLI,
                                bool LookThroughSExt = false) {
   if (!CI)
-    return NULL;
+    return 0;
 
   // The size of the malloc's result type must be known to determine array size.
   Type *T = getMallocAllocatedType(CI, TLI);
   if (!T || !T->isSized() || !TD)
-    return NULL;
+    return 0;
 
   unsigned ElementSize = TD->getTypeAllocSize(T);
   if (StructType *ST = dyn_cast<StructType>(T))
@@ -208,15 +212,15 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
   // If malloc call's arg can be determined to be a multiple of ElementSize,
   // return the multiple.  Otherwise, return NULL.
   Value *MallocArg = CI->getArgOperand(0);
-  Value *Multiple = NULL;
+  Value *Multiple = 0;
   if (ComputeMultiple(MallocArg, ElementSize, Multiple,
                       LookThroughSExt))
     return Multiple;
 
-  return NULL;
+  return 0;
 }
 
-/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction
 /// is a call to malloc whose array size can be determined and the array size
 /// is not constant 1.  Otherwise, return NULL.
 const CallInst *llvm::isArrayMalloc(const Value *I,
@@ -225,12 +229,12 @@ const CallInst *llvm::isArrayMalloc(const Value *I,
   const CallInst *CI = extractMallocCall(I, TLI);
   Value *ArraySize = computeArraySize(CI, TD, TLI);
 
-  if (ArraySize &&
-      ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
-    return CI;
+  if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
+    if (ConstSize->isOne())
+      return CI;
 
   // CI is a non-array malloc or we can't figure out that it is an array malloc.
-  return NULL;
+  return 0;
 }
 
 /// getMallocType - Returns the PointerType resulting from the malloc call.
@@ -241,8 +245,8 @@ const CallInst *llvm::isArrayMalloc(const Value *I,
 PointerType *llvm::getMallocType(const CallInst *CI,
                                  const TargetLibraryInfo *TLI) {
   assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
-  
-  PointerType *MallocType = NULL;
+
+  PointerType *MallocType = 0;
   unsigned NumOfBitCastUses = 0;
 
   // Determine if CallInst has a bitcast use.
@@ -262,7 +266,7 @@ PointerType *llvm::getMallocType(const CallInst *CI,
     return cast<PointerType>(CI->getType());
 
   // Type could not be determined.
-  return NULL;
+  return 0;
 }
 
 /// getMallocAllocatedType - Returns the Type allocated by malloc call.
@@ -273,10 +277,10 @@ PointerType *llvm::getMallocType(const CallInst *CI,
 Type *llvm::getMallocAllocatedType(const CallInst *CI,
                                    const TargetLibraryInfo *TLI) {
   PointerType *PT = getMallocType(CI, TLI);
-  return PT ? PT->getElementType() : NULL;
+  return PT ? PT->getElementType() : 0;
 }
 
-/// getMallocArraySize - Returns the array size of a malloc call.  If the 
+/// getMallocArraySize - Returns the array size of a malloc call.  If the
 /// argument passed to malloc is a multiple of the size of the malloced type,
 /// then return that multiple.  For non-array mallocs, the multiple is
 /// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
@@ -300,7 +304,7 @@ const CallInst *llvm::extractCallocCall(const Value *I,
 /// isFreeCall - Returns non-null if the value is a call to the builtin free()
 const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
   const CallInst *CI = dyn_cast<CallInst>(I);
-  if (!CI)
+  if (!CI || isa<IntrinsicInst>(CI))
     return 0;
   Function *Callee = CI->getCalledFunction();
   if (Callee == 0 || !Callee->isDeclaration())
@@ -317,7 +321,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
     return 0;
 
   // Check free prototype.
-  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
   // attribute will exist.
   FunctionType *FTy = Callee->getFunctionType();
   if (!FTy->getReturnType()->isVoidTy())
@@ -360,6 +364,26 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
   return true;
 }
 
+/// \brief Compute the size of the underlying object pointed by Ptr. Returns
+/// true and the object size in Size if successful, and false otherwise.
+/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
+/// byval arguments, and global variables.
+bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
+                                   const DataLayout *TD,
+                                   const TargetLibraryInfo *TLI,
+                                   bool RoundToAlign) {
+  if (!TD)
+    return false;
+
+  ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+  SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
+  if (!Visitor.knownSize(Data))
+    return false;
+
+  Size = Data.first.getZExtValue();
+  return true;
+}
+
 
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
@@ -385,20 +409,29 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   V = V->stripPointerCasts();
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
-    // If we have already seen this instruction, bail out. Cycles can happen in
-    // unreachable code after constant propagation.
-    if (!SeenInsts.insert(I))
-      return unknown();
 
+  if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
+    // Return cached value or insert unknown in cache if size of V was not
+    // computed yet in order to avoid recursions in PHis.
+    std::pair<CacheMapTy::iterator, bool> CacheVal =
+      CacheMap.insert(std::make_pair(V, unknown()));
+    if (!CacheVal.second)
+      return CacheVal.first->second;
+
+    SizeOffsetType Result;
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      return visitGEPOperator(*GEP);
-    return visit(*I);
+      Result = visitGEPOperator(*GEP);
+    else
+      Result = visit(cast<Instruction>(*V));
+    return CacheMap[V] = Result;
   }
+
   if (Argument *A = dyn_cast<Argument>(V))
     return visitArgument(*A);
   if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
     return visitConstantPointerNull(*P);
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return visitGlobalAlias(*GA);
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     return visitGlobalVariable(*GV);
   if (UndefValue *UV = dyn_cast<UndefValue>(V))
@@ -406,8 +439,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::IntToPtr)
       return unknown(); // clueless
-    if (CE->getOpcode() == Instruction::GetElementPtr)
-      return visitGEPOperator(cast<GEPOperator>(*CE));
   }
 
   DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -510,14 +541,19 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
   SizeOffsetType PtrData = compute(GEP.getPointerOperand());
-  if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices())
+  APInt Offset(IntTyBits, 0);
+  if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset))
     return unknown();
 
-  SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end());
-  APInt Offset(IntTyBits,TD->getIndexedOffset(GEP.getPointerOperandType(),Ops));
   return std::make_pair(PtrData.first, PtrData.second + Offset);
 }
 
+SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
+  if (GA.mayBeOverridden())
+    return unknown();
+  return compute(GA.getAliasee());
+}
+
 SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
   if (!GV.hasDefinitiveInitializer())
     return unknown();
@@ -536,9 +572,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
   return unknown();
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
-  // too complex to analyze statically.
-  return unknown();
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
+  if (PHI.getNumIncomingValues() == 0)
+    return unknown();
+
+  SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
+  if (!bothKnown(Ret))
+    return unknown();
+
+  // Verify that all PHI incoming pointers have the same size and offset.
+  for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
+    SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
+    if (!bothKnown(EdgeData) || EdgeData != Ret)
+      return unknown();
+  }
+  return Ret;
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
@@ -619,6 +667,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
   } else if (isa<Argument>(V) ||
              (isa<ConstantExpr>(V) &&
               cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) ||
+             isa<GlobalAlias>(V) ||
              isa<GlobalVariable>(V)) {
     // ignore values where we cannot do more than what ObjectSizeVisitor can
     Result = unknown();
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 987289049455..2240e9de33eb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements an analysis that determines, for a given memory
-// operation, what preceding memory operations it depends on.  It builds on 
+// operation, what preceding memory operations it depends on.  It builds on
 // alias analysis information, and tries to provide a lazy, caching interface to
 // a common kind of alias information query.
 //
@@ -16,21 +16,21 @@
 
 #define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/PredIteratorCache.h"
 using namespace llvm;
 
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
@@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
 static const int BlockScanLimit = 500;
 
 char MemoryDependenceAnalysis::ID = 0;
-  
+
 // Register this pass...
 INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
                 "Memory Dependence Analysis", false, true)
@@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) {
 /// RemoveFromReverseMap - This is a helper function that removes Val from
 /// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
 template <typename KeyTy>
-static void RemoveFromReverseMap(DenseMap<Instruction*, 
+static void RemoveFromReverseMap(DenseMap<Instruction*,
                                  SmallPtrSet<KeyTy, 4> > &ReverseMap,
                                  Instruction *Inst, KeyTy Val) {
   typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
@@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
     if (LI->isUnordered()) {
       Loc = AA->getLocation(LI);
       return AliasAnalysis::Ref;
-    } else if (LI->getOrdering() == Monotonic) {
+    }
+    if (LI->getOrdering() == Monotonic) {
       Loc = AA->getLocation(LI);
       return AliasAnalysis::ModRef;
     }
@@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
     if (SI->isUnordered()) {
       Loc = AA->getLocation(SI);
       return AliasAnalysis::Mod;
-    } else if (SI->getOrdering() == Monotonic) {
+    }
+    if (SI->getOrdering() == Monotonic) {
       Loc = AA->getLocation(SI);
       return AliasAnalysis::ModRef;
     }
@@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
   // Walk backwards through the block, looking for dependencies
   while (ScanIt != BB->begin()) {
     // Limit the amount of scanning we do so we don't end up with quadratic
-    // running time on extreme testcases. 
+    // running time on extreme testcases.
     --Limit;
     if (!Limit)
       return MemDepResult::getUnknown();
 
     Instruction *Inst = --ScanIt;
-    
+
     // If this inst is a memory op, get the pointer it accessed
     AliasAnalysis::Location Loc;
     AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
@@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 ///
 /// MemLocBase, MemLocOffset are lazily computed here the first time the
 /// base/offs of memloc is needed.
-static bool 
+static bool
 isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
                                        const Value *&MemLocBase,
                                        int64_t &MemLocOffs,
@@ -262,7 +264,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
 
   // If we haven't already computed the base/offset of MemLoc, do so now.
   if (MemLocBase == 0)
-    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD);
 
   unsigned Size = MemoryDependenceAnalysis::
     getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
@@ -283,25 +285,31 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
                                 const DataLayout &TD) {
   // We can only extend simple integer loads.
   if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
-  
+
+  // Load widening is hostile to ThreadSanitizer: it may cause false positives
+  // or make the reports more cryptic (access sizes are wrong).
+  if (LI->getParent()->getParent()->getAttributes().
+      hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
+    return 0;
+
   // Get the base of this load.
   int64_t LIOffs = 0;
-  const Value *LIBase = 
-    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
-  
+  const Value *LIBase =
+    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD);
+
   // If the two pointers are not based on the same pointer, we can't tell that
   // they are related.
   if (LIBase != MemLocBase) return 0;
-  
+
   // Okay, the two values are based on the same pointer, but returned as
   // no-alias.  This happens when we have things like two byte loads at "P+1"
   // and "P+3".  Check to see if increasing the size of the "LI" load up to its
   // alignment (or the largest native integer type) will allow us to load all
   // the bits required by MemLoc.
-  
+
   // If MemLoc is before LI, then no widening of LI will help us out.
   if (MemLocOffs < LIOffs) return 0;
-  
+
   // Get the alignment of the load in bytes.  We assume that it is safe to load
   // any legal integer up to this size without a problem.  For example, if we're
   // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
@@ -310,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
   unsigned LoadAlign = LI->getAlignment();
 
   int64_t MemLocEnd = MemLocOffs+MemLocSize;
-  
+
   // If no amount of rounding up will let MemLoc fit into LI, then bail out.
   if (LIOffs+LoadAlign < MemLocEnd) return 0;
-  
+
   // This is the size of the load to try.  Start with the next larger power of
   // two.
   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
-  
+
   while (1) {
     // If this load size is bigger than our known alignment or would not fit
     // into a native integer register, then we fail.
@@ -327,8 +335,8 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
       return 0;
 
     if (LIOffs+NewLoadByteSize > MemLocEnd &&
-        LI->getParent()->getParent()->getFnAttributes().
-          hasAttribute(Attributes::AddressSafety))
+        LI->getParent()->getParent()->getAttributes().
+          hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress))
       // We will be reading past the location accessed by the original program.
       // While this is safe in a regular build, Address Safety analysis tools
       // may start reporting false warnings. So, don't do widening.
@@ -337,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
     // If a load of this width would include all of MemLoc, then we succeed.
     if (LIOffs+NewLoadByteSize >= MemLocEnd)
       return NewLoadByteSize;
-    
+
     NewLoadByteSize <<= 1;
   }
 }
@@ -345,15 +353,23 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
 /// getPointerDependencyFrom - Return the instruction on which a memory
 /// location depends.  If isLoad is true, this routine ignores may-aliases with
 /// read-only operations.  If isLoad is false, this routine ignores may-aliases
-/// with reads from read-only locations.
+/// with reads from read-only locations.  If possible, pass the query
+/// instruction as well; this function may take advantage of the metadata
+/// annotated to the query instruction to refine the result.
 MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
-                         BasicBlock::iterator ScanIt, BasicBlock *BB) {
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
+                         BasicBlock::iterator ScanIt, BasicBlock *BB,
+                         Instruction *QueryInst) {
 
   const Value *MemLocBase = 0;
   int64_t MemLocOffset = 0;
-
   unsigned Limit = BlockScanLimit;
+  bool isInvariantLoad = false;
+  if (isLoad && QueryInst) {
+    LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
+    if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0)
+      isInvariantLoad = true;
+  }
 
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
@@ -368,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
       // Debug intrinsics don't (and can't) cause dependences.
       if (isa<DbgInfoIntrinsic>(II)) continue;
-      
+
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
@@ -392,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         return MemDepResult::getClobber(LI);
 
       AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
-      
+
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
-      
+
       if (isLoad) {
         if (R == AliasAnalysis::NoAlias) {
           // If this is an over-aligned integer load (for example,
@@ -409,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
                 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
                                                        MemLocOffset, LI, TD))
               return MemDepResult::getClobber(Inst);
-          
+
           continue;
         }
-        
+
         // Must aliased loads are defs of each other.
         if (R == AliasAnalysis::MustAlias)
           return MemDepResult::getDef(Inst);
@@ -427,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         if (R == AliasAnalysis::PartialAlias)
           return MemDepResult::getClobber(Inst);
 #endif
-        
+
         // Random may-alias loads don't depend on each other without a
         // dependence.
         continue;
@@ -444,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       // Stores depend on may/must aliased loads.
       return MemDepResult::getDef(Inst);
     }
-    
+
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // Atomic stores have complications involved.
       // FIXME: This is overly conservative.
@@ -460,14 +476,16 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
       AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
-      
+
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
-      
+
       if (R == AliasAnalysis::NoAlias)
         continue;
       if (R == AliasAnalysis::MustAlias)
         return MemDepResult::getDef(Inst);
+      if (isInvariantLoad)
+       continue;
       return MemDepResult::getClobber(Inst);
     }
 
@@ -482,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
-      
+
       if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
         return MemDepResult::getDef(Inst);
       // Be conservative if the accessed pointer may alias the allocation.
@@ -516,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       return MemDepResult::getClobber(Inst);
     }
   }
-  
+
   // No dependence found.  If this is the entry block of the function, it is
   // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
@@ -528,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 /// depends.
 MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   Instruction *ScanPos = QueryInst;
-  
+
   // Check for a cached result
   MemDepResult &LocalCache = LocalDeps[QueryInst];
-  
+
   // If the cached entry is non-dirty, just return it.  Note that this depends
   // on MemDepResult's default constructing to 'dirty'.
   if (!LocalCache.isDirty())
     return LocalCache;
-    
+
   // Otherwise, if we have a dirty entry, we know we can start the scan at that
   // instruction, which may save us some work.
   if (Instruction *Inst = LocalCache.getInst()) {
     ScanPos = Inst;
-   
+
     RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
   }
-  
+
   BasicBlock *QueryParent = QueryInst->getParent();
-  
+
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
     // No dependence found.  If this is the entry block of the function, it is
@@ -565,7 +583,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
 
       LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
-                                            QueryParent);
+                                            QueryParent, QueryInst);
     } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
       CallSite QueryCS(QueryInst);
       bool isReadOnly = AA->onlyReadsMemory(QueryCS);
@@ -575,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       // Non-memory instruction.
       LocalCache = MemDepResult::getUnknown();
   }
-  
+
   // Remember the result!
   if (Instruction *I = LocalCache.getInst())
     ReverseLocalDeps[I].insert(QueryInst);
-  
+
   return LocalCache;
 }
 
@@ -620,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
   /// the uncached case, this starts out as the set of predecessors we care
   /// about.
   SmallVector<BasicBlock*, 32> DirtyBlocks;
-  
+
   if (!Cache.empty()) {
     // Okay, we have a cache entry.  If we know it is not dirty, just return it
     // with no computation.
@@ -628,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       ++NumCacheNonLocal;
       return Cache;
     }
-    
+
     // If we already have a partially computed set of results, scan them to
     // determine what is dirty, seeding our initial DirtyBlocks worklist.
     for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
        I != E; ++I)
       if (I->getResult().isDirty())
         DirtyBlocks.push_back(I->getBB());
-    
+
     // Sort the cache so that we can do fast binary search lookups below.
     std::sort(Cache.begin(), Cache.end());
-    
+
     ++NumCacheDirtyNonLocal;
     //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
     //     << Cache.size() << " cached: " << *QueryInst;
@@ -649,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       DirtyBlocks.push_back(*PI);
     ++NumUncacheNonLocal;
   }
-  
+
   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
   bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
 
   SmallPtrSet<BasicBlock*, 64> Visited;
-  
+
   unsigned NumSortedEntries = Cache.size();
   DEBUG(AssertSorted(Cache));
-  
+
   // Iterate while we still have blocks to update.
   while (!DirtyBlocks.empty()) {
     BasicBlock *DirtyBB = DirtyBlocks.back();
     DirtyBlocks.pop_back();
-    
+
     // Already processed this block?
     if (!Visited.insert(DirtyBB))
       continue;
-    
+
     // Do a binary search to see if we already have an entry for this block in
     // the cache set.  If so, find it.
     DEBUG(AssertSorted(Cache, NumSortedEntries));
-    NonLocalDepInfo::iterator Entry = 
+    NonLocalDepInfo::iterator Entry =
       std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
                        NonLocalDepEntry(DirtyBB));
     if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
       --Entry;
-    
+
     NonLocalDepEntry *ExistingResult = 0;
-    if (Entry != Cache.begin()+NumSortedEntries && 
+    if (Entry != Cache.begin()+NumSortedEntries &&
         Entry->getBB() == DirtyBB) {
       // If we already have an entry, and if it isn't already dirty, the block
       // is done.
       if (!Entry->getResult().isDirty())
         continue;
-      
+
       // Otherwise, remember this slot so we can update the value.
       ExistingResult = &*Entry;
     }
-    
+
     // If the dirty entry has a pointer, start scanning from it so we don't have
     // to rescan the entire block.
     BasicBlock::iterator ScanPos = DirtyBB->end();
@@ -699,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
                              QueryCS.getInstruction());
       }
     }
-    
+
     // Find out if this block has a local dependency for QueryInst.
     MemDepResult Dep;
-    
+
     if (ScanPos != DirtyBB->begin()) {
       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
@@ -712,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     } else {
       Dep = MemDepResult::getNonFuncLocal();
     }
-    
+
     // If we had a dirty entry for the block, update it.  Otherwise, just add
     // a new entry.
     if (ExistingResult)
       ExistingResult->setResult(Dep);
     else
       Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
-    
+
     // If the block has a dependency (i.e. it isn't completely transparent to
     // the value), remember the association!
     if (!Dep.isNonLocal()) {
@@ -728,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       if (Instruction *Inst = Dep.getInst())
         ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
     } else {
-    
+
       // If the block *is* completely transparent to the load, we need to check
       // the predecessors of this block.  Add them to our worklist.
       for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
         DirtyBlocks.push_back(*PI);
     }
   }
-  
+
   return Cache;
 }
 
@@ -753,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
   assert(Loc.Ptr->getType()->isPointerTy() &&
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
-  
+
   PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
-  
+
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
   // a block with multiple different pointers.  This can happen during PHI
@@ -778,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis::
 GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                         bool isLoad, BasicBlock *BB,
                         NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
-  
+
   // Do a binary search to see if we already have an entry for this block in
   // the cache set.  If so, find it.
   NonLocalDepInfo::iterator Entry =
@@ -786,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                      NonLocalDepEntry(BB));
   if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
     --Entry;
-  
+
   NonLocalDepEntry *ExistingResult = 0;
   if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
     ExistingResult = &*Entry;
-  
+
   // If we have a cached entry, and it is non-dirty, use it as the value for
   // this dependency.
   if (ExistingResult && !ExistingResult->getResult().isDirty()) {
     ++NumCacheNonLocalPtr;
     return ExistingResult->getResult();
-  }    
-  
+  }
+
   // Otherwise, we have to scan for the value.  If we have a dirty cache
   // entry, start scanning from its position, otherwise we scan from the end
   // of the block.
@@ -807,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
            "Instruction invalidated?");
     ++NumCacheDirtyNonLocalPtr;
     ScanPos = ExistingResult->getResult().getInst();
-    
+
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
   } else {
     ++NumUncacheNonLocalPtr;
   }
-  
+
   // Scan the block for the dependency.
   MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
-  
+
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
   if (ExistingResult)
     ExistingResult->setResult(Dep);
   else
     Cache->push_back(NonLocalDepEntry(BB, Dep));
-  
+
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
   // to Cache!
   if (!Dep.isDef() && !Dep.isClobber())
     return Dep;
-  
+
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
   // update MemDep when we remove instructions.
   Instruction *Inst = Dep.getInst();
@@ -843,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
 /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
 /// number of elements in the array that are already properly ordered.  This is
 /// optimized for the case when only a few entries are added.
-static void 
+static void
 SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
                          unsigned NumSortedEntries) {
   switch (Cache.size() - NumSortedEntries) {
@@ -895,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                             SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
-  
+
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
 
@@ -909,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 
   // Get the NLPI for CacheKey, inserting one into the map if it doesn't
   // already have one.
-  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
     NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
   NonLocalPointerInfo *CacheInfo = &Pair.first->second;
 
@@ -971,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
         if (VI == Visited.end() || VI->second == Pointer.getAddr())
           continue;
-        
+
         // We have a pointer mismatch in a block.  Just return clobber, saying
         // that something was clobbered in this result.  We could also do a
         // non-fully cached query, but there is little point in doing this.
         return true;
       }
     }
-    
+
     Value *Addr = Pointer.getAddr();
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
@@ -989,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     ++NumCacheCompleteNonLocalPtr;
     return false;
   }
-  
+
   // Otherwise, either this is a new block, a block with an invalid cache
   // pointer or one that we're about to invalidate by putting more info into it
   // than its valid cache info.  If empty, the result will be valid cache info,
@@ -998,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   else
     CacheInfo->Pair = BBSkipFirstBlockPair();
-  
+
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(StartBB);
-  
+
   // PredList used inside loop.
   SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
 
@@ -1012,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
   // revisit blocks after we insert info for them.
   unsigned NumSortedEntries = Cache->size();
   DEBUG(AssertSorted(*Cache));
-  
+
   while (!Worklist.empty()) {
     BasicBlock *BB = Worklist.pop_back_val();
-    
+
     // Skip the first block if we have it.
     if (!SkipFirstBlock) {
       // Analyze the dependency of *Pointer in FromBB.  See if we already have
@@ -1027,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
       MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
                                                  NumSortedEntries);
-      
+
       // If we got a Def or Clobber, add this to the list of results.
       if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
         Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
         continue;
       }
     }
-    
+
     // If 'Pointer' is an instruction defined in this block, then we need to do
     // phi translation to change it into a value live in the predecessor block.
     // If not, we just add the predecessors to the worklist and scan them with
@@ -1051,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
           NewBlocks.push_back(*PI);
           continue;
         }
-        
+
         // If we have seen this block before, but it was with a different
         // pointer then we have a phi translation failure and we have to treat
         // this as a clobber.
@@ -1066,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       Worklist.append(NewBlocks.begin(), NewBlocks.end());
       continue;
     }
-    
+
     // We do need to do phi translation, if we know ahead of time we can't phi
     // translate this value, don't even try.
     if (!Pointer.IsPotentiallyPHITranslatable())
       goto PredTranslationFailure;
-    
+
     // We may have added values to the cache list before this PHI translation.
     // If so, we haven't done anything to ensure that the cache remains sorted.
     // Sort it now (if needed) so that recursive invocations of
@@ -1094,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       PredPointer.PHITranslateValue(BB, Pred, 0);
 
       Value *PredPtrVal = PredPointer.getAddr();
-      
+
       // Check to see if we have already visited this pred block with another
       // pointer.  If so, we can't do this lookup.  This failure can occur
       // with PHI translation when a critical edge exists and the PHI node in
@@ -1111,14 +1129,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         // the analysis and can ignore it.
         if (InsertRes.first->second == PredPtrVal)
           continue;
-        
+
         // Otherwise, the block was previously analyzed with a different
         // pointer.  We can't represent the result of this case, so we just
         // treat this as a phi translation failure.
 
         // Make sure to clean up the Visited map before continuing on to
         // PredTranslationFailure.
-        for (unsigned i = 0; i < PredList.size(); i++)
+        for (unsigned i = 0, n = PredList.size(); i < n; ++i)
           Visited.erase(PredList[i].first);
 
         goto PredTranslationFailure;
@@ -1127,10 +1145,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 
     // Actually process results here; this need to be a separate loop to avoid
     // calling getNonLocalPointerDepFromBB for blocks we don't want to return
-    // any results for.  (getNonLocalPointerDepFromBB will modify our 
+    // any results for.  (getNonLocalPointerDepFromBB will modify our
     // datastructures in ways the code after the PredTranslationFailure label
     // doesn't expect.)
-    for (unsigned i = 0; i < PredList.size(); i++) {
+    for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
       BasicBlock *Pred = PredList[i].first;
       PHITransAddr &PredPointer = PredList[i].second;
       Value *PredPtrVal = PredPointer.getAddr();
@@ -1170,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         continue;
       }
     }
-    
+
     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
     CacheInfo = &NonLocalPointerDeps[CacheKey];
     Cache = &CacheInfo->NonLocalDeps;
     NumSortedEntries = Cache->size();
-    
+
     // Since we did phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
@@ -1188,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // The following code is "failure"; we can't produce a sane translation
     // for the given block.  It assumes that we haven't modified any of
     // our datastructures while processing the current block.
-    
+
     if (Cache == 0) {
       // Refresh the CacheInfo/Cache pointer if it got invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->NonLocalDeps;
       NumSortedEntries = Cache->size();
     }
-    
+
     // Since we failed phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
     // results from the set".  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
-    
+
     // If *nothing* works, mark the pointer as unknown.
     //
     // If this is the magic first block, return this as a clobber of the whole
@@ -1209,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // we have to bail out.
     if (SkipFirstBlock)
       return true;
-    
+
     for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
       assert(I != Cache->rend() && "Didn't find current block??");
       if (I->getBB() != BB)
         continue;
-      
+
       assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
       I->setResult(MemDepResult::getUnknown());
@@ -1234,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 /// CachedNonLocalPointerInfo, remove it.
 void MemoryDependenceAnalysis::
 RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
-  CachedNonLocalPointerInfo::iterator It = 
+  CachedNonLocalPointerInfo::iterator It =
     NonLocalPointerDeps.find(P);
   if (It == NonLocalPointerDeps.end()) return;
-  
+
   // Remove all of the entries in the BB->val map.  This involves removing
   // instructions from the reverse map.
   NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
-  
+
   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
     Instruction *Target = PInfo[i].getResult().getInst();
     if (Target == 0) continue;  // Ignore non-local dep results.
     assert(Target->getParent() == PInfo[i].getBB());
-    
+
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
   }
-  
+
   // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
   NonLocalPointerDeps.erase(It);
 }
@@ -1305,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
     // Remove this local dependency info.
     LocalDeps.erase(LocalDepEntry);
   }
-  
+
   // If we have any cached pointer dependencies on this instruction, remove
   // them.  If the instruction has non-pointer type, then it can't be a pointer
   // base.
-  
+
   // Remove it from both the load info and the store info.  The instruction
   // can't be in either of these maps if it is non-pointer.
   if (RemInst->getType()->isPointerTy()) {
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   }
-  
+
   // Loop over all of the things that depend on the instruction we're removing.
-  // 
+  //
   SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
 
   // If we find RemInst as a clobber or Def in any of the maps for other values,
@@ -1330,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   MemDepResult NewDirtyVal;
   if (!RemInst->isTerminator())
     NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
-  
+
   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   if (ReverseDepIt != ReverseLocalDeps.end()) {
     SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
     // RemInst can't be the terminator if it has local stuff depending on it.
     assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
            "Nothing can locally depend on a terminator");
-    
+
     for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
          E = ReverseDeps.end(); I != E; ++I) {
       Instruction *InstDependingOnRemInst = *I;
       assert(InstDependingOnRemInst != RemInst &&
              "Already removed our local dep info");
-                        
+
       LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
-      
+
       // Make sure to remember that new things depend on NewDepInst.
       assert(NewDirtyVal.getInst() && "There is no way something else can have "
              "a local dep on this if it is a terminator!");
-      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 
+      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
                                                 InstDependingOnRemInst));
     }
-    
+
     ReverseLocalDeps.erase(ReverseDepIt);
 
     // Add new reverse deps after scanning the set, to avoid invalidating the
@@ -1363,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       ReverseDepsToAdd.pop_back();
     }
   }
-  
+
   ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
   if (ReverseDepIt != ReverseNonLocalDeps.end()) {
     SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
     for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
          I != E; ++I) {
       assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
-      
+
       PerInstNLInfo &INLD = NonLocalDeps[*I];
       // The information is now dirty!
       INLD.second = true;
-      
-      for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 
+
+      for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
            DE = INLD.first.end(); DI != DE; ++DI) {
         if (DI->getResult().getInst() != RemInst) continue;
-        
+
         // Convert to a dirty entry for the subsequent instruction.
         DI->setResult(NewDirtyVal);
-        
+
         if (Instruction *NextI = NewDirtyVal.getInst())
           ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
       }
@@ -1396,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       ReverseDepsToAdd.pop_back();
     }
   }
-  
+
   // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
   // value in the NonLocalPointerDeps info.
   ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
@@ -1404,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
     SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
     SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
-    
+
     for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
          E = Set.end(); I != E; ++I) {
       ValueIsLoadPair P = *I;
       assert(P.getPointer() != RemInst &&
              "Already removed NonLocalPointerDeps info for RemInst");
-      
+
       NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
-      
+
       // The cache is not valid for any specific block anymore.
       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
-      
+
       // Update any entries for RemInst to use the instruction after it.
       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
            DI != DE; ++DI) {
         if (DI->getResult().getInst() != RemInst) continue;
-        
+
         // Convert to a dirty entry for the subsequent instruction.
         DI->setResult(NewDirtyVal);
-        
+
         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
       }
-      
+
       // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
       // subsequent value may invalidate the sortedness.
       std::sort(NLPDI.begin(), NLPDI.end());
     }
-    
+
     ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
-    
+
     while (!ReversePtrDepsToAdd.empty()) {
       ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
         .insert(ReversePtrDepsToAdd.back().second);
       ReversePtrDepsToAdd.pop_back();
     }
   }
-  
-  
+
+
   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
   AA->deleteValue(RemInst);
   DEBUG(verifyRemoved(RemInst));
@@ -1456,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
     assert(I->second.getInst() != D &&
            "Inst occurs in data structures");
   }
-  
+
   for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
        E = NonLocalPointerDeps.end(); I != E; ++I) {
     assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
@@ -1465,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          II != E; ++II)
       assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
   }
-  
+
   for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
        E = NonLocalDeps.end(); I != E; ++I) {
     assert(I->first != D && "Inst occurs in data structures");
@@ -1474,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          EE = INLD.first.end(); II  != EE; ++II)
       assert(II->getResult().getInst() != D && "Inst occurs in data structures");
   }
-  
+
   for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
        E = ReverseLocalDeps.end(); I != E; ++I) {
     assert(I->first != D && "Inst occurs in data structures");
@@ -1482,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          EE = I->second.end(); II != EE; ++II)
       assert(*II != D && "Inst occurs in data structures");
   }
-  
+
   for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
        E = ReverseNonLocalDeps.end();
        I != E; ++I) {
@@ -1491,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
          EE = I->second.end(); II != EE; ++II)
       assert(*II != D && "Inst occurs in data structures");
   }
-  
+
   for (ReverseNonLocalPtrDepTy::const_iterator
        I = ReverseNonLocalPtrDeps.begin(),
        E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
     assert(I->first != D && "Inst occurs in rev NLPD map");
-    
+
     for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
          E = I->second.end(); II != E; ++II)
       assert(*II != ValueIsLoadPair(D, false) &&
              *II != ValueIsLoadPair(D, true) &&
              "Inst occurs in ReverseNonLocalPtrDeps map");
   }
-  
+
 }
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f8c751481976..03415375263a 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -16,13 +16,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 2eb4137c533a..907e9621baed 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index c35737e4724c..e6af0663feaa 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index d4ad72648166..30d213b77576 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -25,24 +25,23 @@
 #define DEBUG_TYPE "ball-larus-numbering"
 
 #include "llvm/Analysis/PathNumbering.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
 #include "llvm/Pass.h"
-#include "llvm/TypeBuilder.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <queue>
+#include <sstream>
 #include <stack>
 #include <string>
 #include <utility>
-#include <sstream>
 
 using namespace llvm;
 
diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp
index b361d3f4fa94..bc53221d3176 100644
--- a/lib/Analysis/PathProfileInfo.cpp
+++ b/lib/Analysis/PathProfileInfo.cpp
@@ -13,15 +13,14 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "path-profile-info"
 
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/PathProfileInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <cstdio>
 
 using namespace llvm;
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index 0fcdfe75aefd..48d7d05d788f 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -13,15 +13,14 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "path-profile-verifier"
 
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
 #include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <stdio.h>
 
 using namespace llvm;
@@ -85,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     if (F->isDeclaration()) continue;
 
-    arrayMap[0][F->begin()][0] = i++;
+    arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
 
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
       TerminatorInst *TI = BB->getTerminator();
@@ -126,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
             << currentPath->getCount() << "\n");
       // setup the entry edge (normally path profiling doesn't care about this)
       if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
-        edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+        edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
           += currentPath->getCount();
 
       for( ProfilePathEdgeIterator nextEdge = pev->begin(),
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 6ed27297923f..96804a01edc6 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -14,13 +14,13 @@
 #define DEBUG_TYPE "postdomtree"
 
 #include "llvm/Analysis/PostDominators.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp
index a4f634af531e..d7f444b4b6d7 100644
--- a/lib/Analysis/ProfileDataLoader.cpp
+++ b/lib/Analysis/ProfileDataLoader.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ProfileDataLoader.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Module.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
 #include "llvm/Analysis/ProfileDataTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <cstdio>
diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp
index c43cff05a4da..2ee0093a8f57 100644
--- a/lib/Analysis/ProfileDataLoaderPass.cpp
+++ b/lib/Analysis/ProfileDataLoaderPass.cpp
@@ -15,22 +15,22 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-metadata-loader"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Module.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumEdgesRead, "The # of edges read.");
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index 12b59e0a6fd5..b284b995ac78 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -12,14 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-estimator"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<double>
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index b5b7ac1e5011..9626a48b9d0d 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -12,16 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-info"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/SmallSet.h"
-#include <set>
-#include <queue>
 #include <limits>
+#include <queue>
+#include <set>
 using namespace llvm;
 
 namespace llvm {
@@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>::
 
     succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
     if (Succ == End) {
-      P[0] = BB;
+      P[(const BasicBlock*)0] = BB;
       if (Mode & GetPathToExit) {
         hasFoundPath = true;
         BB = 0;
@@ -752,10 +752,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
            Succ != End; ++Succ) {
         Path P;
         GetPath(*Succ, 0, P, GetPathToExit);
-        if (Dest && Dest != P[0]) {
+        if (Dest && Dest != P[(const BasicBlock*)0]) {
           AllEdgesHaveSameReturn = false;
         }
-        Dest = P[0];
+        Dest = P[(const BasicBlock*)0];
       }
       if (AllEdgesHaveSameReturn) {
         if(EstimateMissingEdges(BB)) {
@@ -927,7 +927,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
 
       Path P;
       const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
-      Dest = P[0];
+      Dest = P[(const BasicBlock*)0];
       if (!Dest) continue;
 
       if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index 5c7c97cad1e5..f1f3e940c932 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -14,8 +14,8 @@
 
 #include "llvm/Analysis/ProfileInfoLoader.h"
 #include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/Module.h"
-#include "llvm/InstrTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <cstdlib>
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 5ecf052a1a24..346f8d6d6258 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -12,20 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-loader"
-#include "llvm/BasicBlock.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/raw_ostream.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 0cb158865afe..c8896de89301 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -12,17 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-verifier"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Debug.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
new file mode 100644
index 000000000000..0a342b2167e4
--- /dev/null
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -0,0 +1,36 @@
+//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Implementation of the pointer use visitors.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PtrUseVisitor.h"
+
+using namespace llvm;
+
+void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
+  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+       UI != UE; ++UI) {
+    if (VisitedUses.insert(&UI.getUse())) {
+      UseToVisit NewU = {
+        UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown),
+        Offset
+      };
+      Worklist.push_back(llvm_move(NewU));
+    }
+  }
+}
+
+bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
+  if (!IsOffsetKnown)
+    return false;
+
+  return GEPI.accumulateConstantOffset(DL, Offset);
+}
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 30f0d2f10d86..fad5074086ce 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,14 +10,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionIterator.h"
-
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #define DEBUG_TYPE "region"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 8b23cc704242..c5f1b925921b 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -9,16 +9,16 @@
 // Print out the region tree of a function using dotty/graphviz.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/RegionPrinter.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/DOTGraphTraitsPass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e3189ecc8994..6ea915fdb0b7 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -59,22 +59,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "scalar-evolution"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
@@ -83,9 +86,7 @@
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -4229,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
   return Max ? Max : SE->getCouldNotCompute();
 }
 
+bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
+                                                    ScalarEvolution *SE) const {
+  if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+    return true;
+
+  if (!ExitNotTaken.ExitingBlock)
+    return false;
+
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != 0; ENT = ENT->getNextExit()) {
+
+    if (ENT->ExactNotTaken != SE->getCouldNotCompute()
+        && SE->hasOperand(ENT->ExactNotTaken, S)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -6120,8 +6140,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
       getTypeSizeInBits(ICI->getOperand(0)->getType()))
     return false;
 
-  // Now that we found a conditional branch that dominates the loop, check to
-  // see if it is the comparison we are looking for.
+  // Now that we found a conditional branch that dominates the loop or controls
+  // the loop latch. Check to see if it is the comparison we are looking for.
   ICmpInst::Predicate FoundPred;
   if (Inverse)
     FoundPred = ICI->getInversePredicate();
@@ -6939,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
   BlockDispositions.erase(S);
   UnsignedRanges.erase(S);
   SignedRanges.erase(S);
+
+  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
+    BackedgeTakenInfo &BEInfo = I->second;
+    if (BEInfo.hasOperand(S, this)) {
+      BEInfo.clear();
+      BackedgeTakenCounts.erase(I++);
+    }
+    else
+      ++I;
+  }
 }
 
 typedef DenseMap<const Loop *, std::string> VerifyMap;
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index e9edb3e083de..79c5f0deb03b 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,9 +19,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 111bfb4a6a76..fcd7ce272a22 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,13 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/STLExtras.h"
 
 using namespace llvm;
 
@@ -1523,9 +1523,8 @@ Value *SCEVExpander::expand(const SCEV *S) {
     }
 
   // Check to see if we already expanded this here.
-  std::map<std::pair<const SCEV *, Instruction *>,
-           AssertingVH<Value> >::iterator I =
-    InsertedExpressions.find(std::make_pair(S, InsertPt));
+  std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator
+    I = InsertedExpressions.find(std::make_pair(S, InsertPt));
   if (I != InsertedExpressions.end())
     return I->second;
 
@@ -1600,14 +1599,14 @@ static bool width_descending(Value *lhs, Value *rhs) {
 /// the same context that SCEVExpander is used.
 unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                            SmallVectorImpl<WeakVH> &DeadInsts,
-                                           const TargetLowering *TLI) {
+                                           const TargetTransformInfo *TTI) {
   // Find integer phis in order of increasing width.
   SmallVector<PHINode*, 8> Phis;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
     Phis.push_back(Phi);
   }
-  if (TLI)
+  if (TTI)
     std::sort(Phis.begin(), Phis.end(), width_descending);
 
   unsigned NumElim = 0;
@@ -1635,8 +1634,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
     PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
     if (!OrigPhiRef) {
       OrigPhiRef = Phi;
-      if (Phi->getType()->isIntegerTy() && TLI
-          && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+      if (Phi->getType()->isIntegerTy() && TTI
+          && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
         // This phi can be freely truncated to the narrowest phi type. Map the
         // truncated expression to it so it will be reused for narrow types.
         const SCEV *TruncExpr =
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index c819666ee444..15b78728a73c 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -14,9 +14,9 @@
 
 #define DEBUG_TYPE "sparseprop"
 #include "llvm/Analysis/SparsePropagation.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
new file mode 100644
index 000000000000..64f8e96884c7
--- /dev/null
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -0,0 +1,558 @@
+//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tti"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// Setup the analysis group to manage the TargetTransformInfo passes.
+INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
+char TargetTransformInfo::ID = 0;
+
+TargetTransformInfo::~TargetTransformInfo() {
+}
+
+void TargetTransformInfo::pushTTIStack(Pass *P) {
+  TopTTI = this;
+  PrevTTI = &P->getAnalysis<TargetTransformInfo>();
+
+  // Walk up the chain and update the top TTI pointer.
+  for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
+    PTTI->TopTTI = this;
+}
+
+void TargetTransformInfo::popTTIStack() {
+  TopTTI = 0;
+
+  // Walk up the chain and update the top TTI pointer.
+  for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
+    PTTI->TopTTI = PrevTTI;
+
+  PrevTTI = 0;
+}
+
+void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfo>();
+}
+
+unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
+                                               Type *OpTy) const {
+  return PrevTTI->getOperationCost(Opcode, Ty, OpTy);
+}
+
+unsigned TargetTransformInfo::getGEPCost(
+    const Value *Ptr, ArrayRef<const Value *> Operands) const {
+  return PrevTTI->getGEPCost(Ptr, Operands);
+}
+
+unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
+                                          int NumArgs) const {
+  return PrevTTI->getCallCost(FTy, NumArgs);
+}
+
+unsigned TargetTransformInfo::getCallCost(const Function *F,
+                                          int NumArgs) const {
+  return PrevTTI->getCallCost(F, NumArgs);
+}
+
+unsigned TargetTransformInfo::getCallCost(
+    const Function *F, ArrayRef<const Value *> Arguments) const {
+  return PrevTTI->getCallCost(F, Arguments);
+}
+
+unsigned TargetTransformInfo::getIntrinsicCost(
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const {
+  return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+}
+
+unsigned TargetTransformInfo::getIntrinsicCost(
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
+  return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments);
+}
+
+unsigned TargetTransformInfo::getUserCost(const User *U) const {
+  return PrevTTI->getUserCost(U);
+}
+
+bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
+  return PrevTTI->isLoweredToCall(F);
+}
+
+bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
+  return PrevTTI->isLegalAddImmediate(Imm);
+}
+
+bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
+  return PrevTTI->isLegalICmpImmediate(Imm);
+}
+
+bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                                int64_t BaseOffset,
+                                                bool HasBaseReg,
+                                                int64_t Scale) const {
+  return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+                                        Scale);
+}
+
+bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  return PrevTTI->isTruncateFree(Ty1, Ty2);
+}
+
+bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
+  return PrevTTI->isTypeLegal(Ty);
+}
+
+unsigned TargetTransformInfo::getJumpBufAlignment() const {
+  return PrevTTI->getJumpBufAlignment();
+}
+
+unsigned TargetTransformInfo::getJumpBufSize() const {
+  return PrevTTI->getJumpBufSize();
+}
+
+bool TargetTransformInfo::shouldBuildLookupTables() const {
+  return PrevTTI->shouldBuildLookupTables();
+}
+
+TargetTransformInfo::PopcntSupportKind
+TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
+  return PrevTTI->getPopcntSupport(IntTyWidthInBit);
+}
+
+unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(Imm, Ty);
+}
+
+unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
+  return PrevTTI->getNumberOfRegisters(Vector);
+}
+
+unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
+  return PrevTTI->getRegisterBitWidth(Vector);
+}
+
+unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
+  return PrevTTI->getMaximumUnrollFactor();
+}
+
+unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
+                                                Type *Ty,
+                                                OperandValueKind Op1Info,
+                                                OperandValueKind Op2Info) const {
+  return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+}
+
+unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
+                                             int Index, Type *SubTp) const {
+  return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                               Type *Src) const {
+  return PrevTTI->getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+  return PrevTTI->getCFInstrCost(Opcode);
+}
+
+unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                                 Type *CondTy) const {
+  return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                                 unsigned Index) const {
+  return PrevTTI->getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                              unsigned Alignment,
+                                              unsigned AddressSpace) const {
+  return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  ;
+}
+
+unsigned
+TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,
+                                           Type *RetTy,
+                                           ArrayRef<Type *> Tys) const {
+  return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
+unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
+  return PrevTTI->getNumberOfParts(Tp);
+}
+
+unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const {
+  return PrevTTI->getAddressComputationCost(Tp);
+}
+
+namespace {
+
+struct NoTTI : ImmutablePass, TargetTransformInfo {
+  const DataLayout *DL;
+
+  NoTTI() : ImmutablePass(ID), DL(0) {
+    initializeNoTTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    // Note that this subclass is special, and must *not* call initializeTTI as
+    // it does not chain.
+    TopTTI = this;
+    PrevTTI = 0;
+    DL = getAnalysisIfAvailable<DataLayout>();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    // Note that this subclass is special, and must *not* call
+    // TTI::getAnalysisUsage as it breaks the recursion.
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const {
+    switch (Opcode) {
+    default:
+      // By default, just classify everything as 'basic'.
+      return TCC_Basic;
+
+    case Instruction::GetElementPtr:
+      llvm_unreachable("Use getGEPCost for GEP operations!");
+
+    case Instruction::BitCast:
+      assert(OpTy && "Cast instructions must provide the operand type");
+      if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
+        // Identity and pointer-to-pointer casts are free.
+        return TCC_Free;
+
+      // Otherwise, the default basic cost is used.
+      return TCC_Basic;
+
+    case Instruction::IntToPtr:
+      // An inttoptr cast is free so long as the input is a legal integer type
+      // which doesn't contain values outside the range of a pointer.
+      if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
+          OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits())
+        return TCC_Free;
+
+      // Otherwise it's not a no-op.
+      return TCC_Basic;
+
+    case Instruction::PtrToInt:
+      // A ptrtoint cast is free so long as the result is large enough to store
+      // the pointer, and a legal integer type.
+      if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
+          Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+        return TCC_Free;
+
+      // Otherwise it's not a no-op.
+      return TCC_Basic;
+
+    case Instruction::Trunc:
+      // trunc to a native type is free (assuming the target has compare and
+      // shift-right of the same width).
+      if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty)))
+        return TCC_Free;
+
+      return TCC_Basic;
+    }
+  }
+
+  unsigned getGEPCost(const Value *Ptr,
+                      ArrayRef<const Value *> Operands) const {
+    // In the basic model, we just assume that all-constant GEPs will be folded
+    // into their uses via addressing modes.
+    for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
+      if (!isa<Constant>(Operands[Idx]))
+        return TCC_Basic;
+
+    return TCC_Free;
+  }
+
+  unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const {
+    assert(FTy && "FunctionType must be provided to this routine.");
+
+    // The target-independent implementation just measures the size of the
+    // function by approximating that each argument will take on average one
+    // instruction to prepare.
+
+    if (NumArgs < 0)
+      // Set the argument number to the number of explicit arguments in the
+      // function.
+      NumArgs = FTy->getNumParams();
+
+    return TCC_Basic * (NumArgs + 1);
+  }
+
+  unsigned getCallCost(const Function *F, int NumArgs = -1) const {
+    assert(F && "A concrete function must be provided to this routine.");
+
+    if (NumArgs < 0)
+      // Set the argument number to the number of explicit arguments in the
+      // function.
+      NumArgs = F->arg_size();
+
+    if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) {
+      FunctionType *FTy = F->getFunctionType();
+      SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
+      return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
+    }
+
+    if (!TopTTI->isLoweredToCall(F))
+      return TCC_Basic; // Give a basic cost if it will be lowered directly.
+
+    return TopTTI->getCallCost(F->getFunctionType(), NumArgs);
+  }
+
+  unsigned getCallCost(const Function *F,
+                       ArrayRef<const Value *> Arguments) const {
+    // Simply delegate to generic handling of the call.
+    // FIXME: We should use instsimplify or something else to catch calls which
+    // will constant fold with these arguments.
+    return TopTTI->getCallCost(F, Arguments.size());
+  }
+
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<Type *> ParamTys) const {
+    switch (IID) {
+    default:
+      // Intrinsics rarely (if ever) have normal argument setup constraints.
+      // Model them as having a basic instruction cost.
+      // FIXME: This is wrong for libc intrinsics.
+      return TCC_Basic;
+
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::objectsize:
+    case Intrinsic::ptr_annotation:
+    case Intrinsic::var_annotation:
+      // These intrinsics don't actually represent code after lowering.
+      return TCC_Free;
+    }
+  }
+
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<const Value *> Arguments) const {
+    // Delegate to the generic intrinsic handling code. This mostly provides an
+    // opportunity for targets to (for example) special case the cost of
+    // certain intrinsics based on constants used as arguments.
+    SmallVector<Type *, 8> ParamTys;
+    ParamTys.reserve(Arguments.size());
+    for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
+      ParamTys.push_back(Arguments[Idx]->getType());
+    return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+  }
+
+  unsigned getUserCost(const User *U) const {
+    if (isa<PHINode>(U))
+      return TCC_Free; // Model all PHI nodes as free.
+
+    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U))
+      // In the basic model we just assume that all-constant GEPs will be
+      // folded into their uses via addressing modes.
+      return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic;
+
+    if (ImmutableCallSite CS = U) {
+      const Function *F = CS.getCalledFunction();
+      if (!F) {
+        // Just use the called value type.
+        Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
+        return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
+      }
+
+      SmallVector<const Value *, 8> Arguments;
+      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(),
+                                           AE = CS.arg_end();
+           AI != AE; ++AI)
+        Arguments.push_back(*AI);
+
+      return TopTTI->getCallCost(F, Arguments);
+    }
+
+    if (const CastInst *CI = dyn_cast<CastInst>(U)) {
+      // Result of a cmp instruction is often extended (to be used by other
+      // cmp instructions, logical or return instructions). These are usually
+      // nop on most sane targets.
+      if (isa<CmpInst>(CI->getOperand(0)))
+        return TCC_Free;
+    }
+
+    // Otherwise delegate to the fully generic implementations.
+    return getOperationCost(Operator::getOpcode(U), U->getType(),
+                            U->getNumOperands() == 1 ?
+                                U->getOperand(0)->getType() : 0);
+  }
+
+  bool isLoweredToCall(const Function *F) const {
+    // FIXME: These should almost certainly not be handled here, and instead
+    // handled with the help of TLI or the target itself. This was largely
+    // ported from existing analysis heuristics here so that such refactorings
+    // can take place in the future.
+
+    if (F->isIntrinsic())
+      return false;
+
+    if (F->hasLocalLinkage() || !F->hasName())
+      return true;
+
+    StringRef Name = F->getName();
+
+    // These will all likely lower to a single selection DAG node.
+    if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+        Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
+        Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
+        Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
+      return false;
+
+    // These are all likely to be optimized into something smaller.
+    if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
+        Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name ==
+        "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" ||
+        Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs")
+      return false;
+
+    return true;
+  }
+
+  bool isLegalAddImmediate(int64_t Imm) const {
+    return false;
+  }
+
+  bool isLegalICmpImmediate(int64_t Imm) const {
+    return false;
+  }
+
+  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
+                             bool HasBaseReg, int64_t Scale) const {
+    // Guess that reg+reg addressing is allowed. This heuristic is taken from
+    // the implementation of LSR.
+    return !BaseGV && BaseOffset == 0 && Scale <= 1;
+  }
+
+  bool isTruncateFree(Type *Ty1, Type *Ty2) const {
+    return false;
+  }
+
+  bool isTypeLegal(Type *Ty) const {
+    return false;
+  }
+
+  unsigned getJumpBufAlignment() const {
+    return 0;
+  }
+
+  unsigned getJumpBufSize() const {
+    return 0;
+  }
+
+  bool shouldBuildLookupTables() const {
+    return true;
+  }
+
+  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
+    return PSK_Software;
+  }
+
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
+    return 1;
+  }
+
+  unsigned getNumberOfRegisters(bool Vector) const {
+    return 8;
+  }
+
+  unsigned  getRegisterBitWidth(bool Vector) const {
+    return 32;
+  }
+
+  unsigned getMaximumUnrollFactor() const {
+    return 1;
+  }
+
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+                                  OperandValueKind) const {
+    return 1;
+  }
+
+  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                          int Index = 0, Type *SubTp = 0) const {
+    return 1;
+  }
+
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                            Type *Src) const {
+    return 1;
+  }
+
+  unsigned getCFInstrCost(unsigned Opcode) const {
+    return 1;
+  }
+
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                              Type *CondTy = 0) const {
+    return 1;
+  }
+
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                              unsigned Index = -1) const {
+    return 1;
+  }
+
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                           unsigned Alignment,
+                           unsigned AddressSpace) const {
+    return 1;
+  }
+
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID,
+                                 Type *RetTy,
+                                 ArrayRef<Type*> Tys) const {
+    return 1;
+  }
+
+  unsigned getNumberOfParts(Type *Tp) const {
+    return 0;
+  }
+
+  unsigned getAddressComputationCost(Type *Tp) const {
+    return 0;
+  }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti",
+                   "No target information", true, true, true)
+char NoTTI::ID = 0;
+
+ImmutablePass *llvm::createNoTargetTransformInfoPass() {
+  return new NoTTI();
+}
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 22da85762034..4c68322b8282 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -16,8 +16,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Trace.h"
-#include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 0faf1398ec76..68e43b2cdb63 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -57,12 +57,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Constants.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 3beb373dc5cc..45dcc5e37ecf 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -13,21 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Operator.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include <cstring>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -58,7 +58,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
         llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
-    
+
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
         // from [0-C].
@@ -84,7 +84,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
   unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
 
   llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
-  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
   unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
 
   // Determine which operand has more trailing zeros, and use that
@@ -266,11 +266,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       Elt = CDS->getElementAsInteger(i);
       KnownZero &= ~Elt;
-      KnownOne &= Elt;      
+      KnownOne &= Elt;
     }
     return;
   }
-  
+
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
@@ -306,7 +306,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     }
     return;
   }
-  
+
   if (Argument *A = dyn_cast<Argument>(V)) {
     unsigned Align = 0;
 
@@ -345,9 +345,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     // If either the LHS or the RHS are Zero, the result is zero.
     ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
     ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
@@ -357,9 +357,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   case Instruction::Or: {
     ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
     ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero &= KnownZero2;
     // Output known-1 are known to be set if set in either the LHS | RHS.
@@ -369,9 +369,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   case Instruction::Xor: {
     ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
     ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
@@ -407,8 +407,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
     ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
                       Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
@@ -433,7 +433,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     unsigned SrcBitWidth;
     // Note that we handle pointer operands here because of inttoptr/ptrtoint
     // which fall through here.
-    SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
+    if(TD) {
+      SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
+    } else {
+      SrcBitWidth = SrcTy->getScalarSizeInBits();
+      if (!SrcBitWidth) return;
+    }
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
     KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
@@ -460,11 +465,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
-      
+
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
     ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
 
@@ -481,7 +486,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
       ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero <<= ShiftAmt;
       KnownOne  <<= ShiftAmt;
       KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
@@ -493,10 +498,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       // Compute the new bits that are at the top now.
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
+
       // Unsigned shift right.
       ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
       // high bits known zero.
@@ -509,13 +514,13 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       // Compute the new bits that are at the top now.
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
-      
+
       // Signed shift right.
       ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-        
+
       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
       if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
         KnownZero |= HighBits;
@@ -559,7 +564,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
         if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
           KnownOne |= ~LowBits;
 
-        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       }
     }
 
@@ -606,7 +611,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     unsigned Align = AI->getAlignment();
     if (Align == 0 && TD)
       Align = TD->getABITypeAlignment(AI->getType()->getElementType());
-    
+
     if (Align > 0)
       KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
     break;
@@ -643,7 +648,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
                                    LocalKnownZero.countTrailingOnes()));
       }
     }
-    
+
     KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
     break;
   }
@@ -799,12 +804,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
   KnownZero = ZeroBits[BitWidth - 1];
 }
 
-/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one
 /// bit set when defined. For vectors return true if every element is known to
 /// be a power of two when defined.  Supports values with integer or pointer
 /// types and vectors of integers.
-bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero,
-                        unsigned Depth) {
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
       return OrZero;
@@ -831,19 +835,19 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero,
   // A shift of a power of two is a power of two or zero.
   if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
                  match(V, m_Shr(m_Value(X), m_Value()))))
-    return isPowerOfTwo(X, TD, /*OrZero*/true, Depth);
+    return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth);
 
   if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
-    return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth);
+    return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth);
 
   if (SelectInst *SI = dyn_cast<SelectInst>(V))
-    return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) &&
-      isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth);
+    return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) &&
+      isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth);
 
   if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
     // A power of two and'd with anything is a power of two or zero.
-    if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) ||
-        isPowerOfTwo(Y, TD, /*OrZero*/true, Depth))
+    if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) ||
+        isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth))
       return true;
     // X & (-X) is always a power of two or zero.
     if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
@@ -856,7 +860,73 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero,
   // copying a sign bit (sdiv int_min, 2).
   if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
       match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
-    return isPowerOfTwo(cast<Operator>(V)->getOperand(0), TD, OrZero, Depth);
+    return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, Depth);
+  }
+
+  return false;
+}
+
+/// \brief Test whether a GEP's result is known to be non-null.
+///
+/// Uses properties inherent in a GEP to try to determine whether it is known
+/// to be non-null.
+///
+/// Currently this routine does not support vector GEPs.
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
+                              unsigned Depth) {
+  if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
+    return false;
+
+  // FIXME: Support vector-GEPs.
+  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
+
+  // If the base pointer is non-null, we cannot walk to a null address with an
+  // inbounds GEP in address space zero.
+  if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
+    return true;
+
+  // Past this, if we don't have DataLayout, we can't do much.
+  if (!DL)
+    return false;
+
+  // Walk the GEP operands and see if any operand introduces a non-zero offset.
+  // If so, then the GEP cannot produce a null pointer, as doing so would
+  // inherently violate the inbounds contract within address space zero.
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    // Struct types are easy -- they must always be indexed by a constant.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = DL->getStructLayout(STy);
+      uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
+      if (ElementOffset > 0)
+        return true;
+      continue;
+    }
+
+    // If we have a zero-sized type, the index doesn't matter. Keep looping.
+    if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
+      continue;
+
+    // Fast path the constant operand case both for efficiency and so we don't
+    // increment Depth when just zipping down an all-constant GEP.
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
+      if (!OpC->isZero())
+        return true;
+      continue;
+    }
+
+    // We post-increment Depth here because while isKnownNonZero increments it
+    // as well, when we pop back up that increment won't persist. We don't want
+    // to recurse 10k times just because we have 10k GEP operands. We don't
+    // bail completely out because we want to handle constant GEPs regardless
+    // of depth.
+    if (Depth++ >= MaxDepth)
+      continue;
+
+    if (isKnownNonZero(GTI.getOperand(), DL, Depth))
+      return true;
   }
 
   return false;
@@ -881,7 +951,16 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
   if (Depth++ >= MaxDepth)
     return false;
 
-  unsigned BitWidth = getBitWidth(V->getType(), TD);
+  // Check for pointer simplifications.
+  if (V->getType()->isPointerTy()) {
+    if (isKnownNonNull(V))
+      return true; 
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      if (isGEPKnownNonNull(GEP, TD, Depth))
+        return true;
+  }
+
+  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
 
   // X | Y != 0 if X != 0 or Y != 0.
   Value *X = 0, *Y = 0;
@@ -955,9 +1034,9 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
     }
 
     // The sum of a non-negative number and a power of two is not zero.
-    if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth))
+    if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth))
       return true;
-    if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth))
+    if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth))
       return true;
   }
   // X * Y.
@@ -996,7 +1075,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
                              const DataLayout *TD, unsigned Depth) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
   ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
   return (KnownZero & Mask) == Mask;
 }
 
@@ -1026,14 +1105,14 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
 
   if (Depth == 6)
     return 1;  // Limit search depth.
-  
+
   Operator *U = dyn_cast<Operator>(V);
   switch (Operator::getOpcode(V)) {
   default: break;
   case Instruction::SExt:
     Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
     return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
-    
+
   case Instruction::AShr: {
     Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
     // ashr X, C   -> adds C sign bits.  Vectors too.
@@ -1075,38 +1154,38 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
     if (Tmp == 1) return 1;  // Early out.
     Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
     return std::min(Tmp, Tmp2);
-    
+
   case Instruction::Add:
     // Add can have at most one carry bit.  Thus we know that the output
     // is, at worst, one more bit than the inputs.
     Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-      
+
     // Special case decrementing a value (ADD X, -1):
     if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
       if (CRHS->isAllOnesValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
         ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-        
+
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
         if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
           return TyBits;
-        
+
         // If we are subtracting one from a positive number, there is no carry
         // out of the result.
         if (KnownZero.isNegative())
           return Tmp;
       }
-      
+
     Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
     if (Tmp2 == 1) return 1;
     return std::min(Tmp, Tmp2)-1;
-    
+
   case Instruction::Sub:
     Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
     if (Tmp2 == 1) return 1;
-      
+
     // Handle NEG.
     if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
       if (CLHS->isNullValue()) {
@@ -1116,26 +1195,26 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
         // sign bits set.
         if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
           return TyBits;
-        
+
         // If the input is known to be positive (the sign bit is known clear),
         // the output of the NEG has the same number of sign bits as the input.
         if (KnownZero.isNegative())
           return Tmp2;
-        
+
         // Otherwise, we treat this like a SUB.
       }
-    
+
     // Sub can have at most one carry bit.  Thus we know that the output
     // is, at worst, one more bit than the inputs.
     Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
     if (Tmp == 1) return 1;  // Early out.
     return std::min(Tmp, Tmp2)-1;
-      
+
   case Instruction::PHI: {
     PHINode *PN = cast<PHINode>(U);
     // Don't analyze large in-degree PHIs.
     if (PN->getNumIncomingValues() > 4) break;
-    
+
     // Take the minimum of all incoming values.  This can't infinitely loop
     // because of our depth threshold.
     Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
@@ -1152,13 +1231,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
     // case for targets like X86.
     break;
   }
-  
+
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
   // use this information.
   APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
   APInt Mask;
   ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
-  
+
   if (KnownZero.isNegative()) {        // sign bit is 0
     Mask = KnownZero;
   } else if (KnownOne.isNegative()) {  // sign bit is 1;
@@ -1167,7 +1246,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
     // Nothing known.
     return FirstAnswer;
   }
-  
+
   // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
   // the number of identical bits in the top of the input value.
   Mask = ~Mask;
@@ -1195,7 +1274,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
 
   if (Base == 0)
     return false;
-    
+
   if (Base == 1) {
     Multiple = V;
     return true;
@@ -1211,11 +1290,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
 
   if (CI && CI->getZExtValue() % Base == 0) {
     Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
-    return true;  
+    return true;
   }
-  
+
   if (Depth == MaxDepth) return false;  // Limit search depth.
-        
+
   Operator *I = dyn_cast<Operator>(V);
   if (!I) return false;
 
@@ -1247,13 +1326,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
     if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
       if (Constant *Op1C = dyn_cast<Constant>(Op1))
         if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
-          if (Op1C->getType()->getPrimitiveSizeInBits() < 
+          if (Op1C->getType()->getPrimitiveSizeInBits() <
               MulC->getType()->getPrimitiveSizeInBits())
             Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
-          if (Op1C->getType()->getPrimitiveSizeInBits() > 
+          if (Op1C->getType()->getPrimitiveSizeInBits() >
               MulC->getType()->getPrimitiveSizeInBits())
             MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
-          
+
           // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
           Multiple = ConstantExpr::getMul(MulC, Op1C);
           return true;
@@ -1271,13 +1350,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
     if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
       if (Constant *Op0C = dyn_cast<Constant>(Op0))
         if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
-          if (Op0C->getType()->getPrimitiveSizeInBits() < 
+          if (Op0C->getType()->getPrimitiveSizeInBits() <
               MulC->getType()->getPrimitiveSizeInBits())
             Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
-          if (Op0C->getType()->getPrimitiveSizeInBits() > 
+          if (Op0C->getType()->getPrimitiveSizeInBits() >
               MulC->getType()->getPrimitiveSizeInBits())
             MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
-          
+
           // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
           Multiple = ConstantExpr::getMul(MulC, Op0C);
           return true;
@@ -1297,7 +1376,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
   return false;
 }
 
-/// CannotBeNegativeZero - Return true if we can prove that the specified FP 
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP
 /// value is never equal to -0.0.
 ///
 /// NOTE: this function will need to be revisited when we support non-default
@@ -1306,28 +1385,33 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
 bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
     return !CFP->getValueAPF().isNegZero();
-  
+
   if (Depth == 6)
     return 1;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
   if (I == 0) return false;
-  
+
+  // Check if the nsz fast-math flag is set
+  if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
+    if (FPO->hasNoSignedZeros())
+      return true;
+
   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
-  if (I->getOpcode() == Instruction::FAdd &&
-      isa<ConstantFP>(I->getOperand(1)) && 
-      cast<ConstantFP>(I->getOperand(1))->isNullValue())
-    return true;
-    
+  if (I->getOpcode() == Instruction::FAdd)
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
+      if (CFP->isNullValue())
+        return true;
+
   // sitofp and uitofp turn into +0.0 for zero.
   if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
     return true;
-  
+
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
     // sqrt(-0.0) = -0.0, no other negative results are possible.
     if (II->getIntrinsicID() == Intrinsic::sqrt)
       return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
-  
+
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (const Function *F = CI->getCalledFunction()) {
       if (F->isDeclaration()) {
@@ -1342,7 +1426,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
           return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
       }
     }
-  
+
   return false;
 }
 
@@ -1359,9 +1443,9 @@ Value *llvm::isBytewiseValue(Value *V) {
   if (Constant *C = dyn_cast<Constant>(V))
     if (C->isNullValue())
       return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
-  
+
   // Constant float and double values can be handled as integer values if the
-  // corresponding integer value is "byteable".  An important case is 0.0. 
+  // corresponding integer value is "byteable".  An important case is 0.0.
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
     if (CFP->getType()->isFloatTy())
       V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
@@ -1369,8 +1453,8 @@ Value *llvm::isBytewiseValue(Value *V) {
       V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
     // Don't handle long double formats, which have strange constraints.
   }
-  
-  // We can handle constant integers that are power of two in size and a 
+
+  // We can handle constant integers that are power of two in size and a
   // multiple of 8 bits.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     unsigned Width = CI->getBitWidth();
@@ -1384,7 +1468,7 @@ Value *llvm::isBytewiseValue(Value *V) {
         Val2  = Val.lshr(NextWidth);
         Val2 = Val2.trunc(Val.getBitWidth()/2);
         Val = Val.trunc(Val.getBitWidth()/2);
-        
+
         // If the top/bottom halves aren't the same, reject it.
         if (Val != Val2)
           return 0;
@@ -1392,7 +1476,7 @@ Value *llvm::isBytewiseValue(Value *V) {
       return ConstantInt::get(V->getContext(), Val);
     }
   }
-  
+
   // A ConstantDataArray/Vector is splatable if all its members are equal and
   // also splatable.
   if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
@@ -1400,11 +1484,11 @@ Value *llvm::isBytewiseValue(Value *V) {
     Value *Val = isBytewiseValue(Elt);
     if (!Val)
       return 0;
-    
+
     for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
       if (CA->getElementAsConstant(I) != Elt)
         return 0;
-    
+
     return Val;
   }
 
@@ -1428,7 +1512,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
                                 SmallVector<unsigned, 10> &Idxs,
                                 unsigned IdxSkip,
                                 Instruction *InsertBefore) {
-  llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+  llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
   if (STy) {
     // Save the original To argument so we can modify it
     Value *OrigTo = To;
@@ -1459,7 +1543,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
   // the struct's elements had a value that was inserted directly. In the latter
   // case, perhaps we can't determine each of the subelements individually, but
   // we might be able to find the complete struct somewhere.
-  
+
   // Find the value that is at that particular spot
   Value *V = FindInsertedValue(From, Idxs);
 
@@ -1518,7 +1602,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
     if (C == 0) return 0;
     return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
   }
-    
+
   if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
@@ -1543,7 +1627,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
         return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
                                  InsertBefore);
       }
-      
+
       // This insert value inserts something else than what we are looking for.
       // See if the (aggregrate) value inserted into has the value we are
       // looking for, then.
@@ -1558,26 +1642,26 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
                              makeArrayRef(req_idx, idx_range.end()),
                              InsertBefore);
   }
-  
+
   if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
     // something else, we can extract from that something else directly instead.
     // However, we will need to chain I's indices with the requested indices.
-   
-    // Calculate the number of indices required 
+
+    // Calculate the number of indices required
     unsigned size = I->getNumIndices() + idx_range.size();
     // Allocate some space to put the new indices in
     SmallVector<unsigned, 5> Idxs;
     Idxs.reserve(size);
     // Add indices from the extract value instruction
     Idxs.append(I->idx_begin(), I->idx_end());
-    
+
     // Add requested indices
     Idxs.append(idx_range.begin(), idx_range.end());
 
-    assert(Idxs.size() == size 
+    assert(Idxs.size() == size
            && "Number of indices added not correct?");
-    
+
     return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
   }
   // Otherwise, we don't know (such as, extracting from a function return value
@@ -1589,41 +1673,33 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
 /// it can be expressed as a base pointer plus a constant offset.  Return the
 /// base and offset to the caller.
 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                              const DataLayout &TD) {
-  Operator *PtrOp = dyn_cast<Operator>(Ptr);
-  if (PtrOp == 0 || Ptr->getType()->isVectorTy())
-    return Ptr;
-  
-  // Just look through bitcasts.
-  if (PtrOp->getOpcode() == Instruction::BitCast)
-    return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
-  
-  // If this is a GEP with constant indices, we can look through it.
-  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
-  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
-  
-  gep_type_iterator GTI = gep_type_begin(GEP);
-  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
-       ++I, ++GTI) {
-    ConstantInt *OpC = cast<ConstantInt>(*I);
-    if (OpC->isZero()) continue;
-    
-    // Handle a struct and array indices which add their offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+                                              const DataLayout *TD) {
+  // Without DataLayout, conservatively assume 64-bit offsets, which is
+  // the widest we support.
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  APInt ByteOffset(BitWidth, 0);
+  while (1) {
+    if (Ptr->getType()->isVectorTy())
+      break;
+
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+      APInt GEPOffset(BitWidth, 0);
+      if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset))
+        break;
+      ByteOffset += GEPOffset;
+      Ptr = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
+      Ptr = cast<Operator>(Ptr)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
+      if (GA->mayBeOverridden())
+        break;
+      Ptr = GA->getAliasee();
     } else {
-      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
-      Offset += OpC->getSExtValue()*Size;
+      break;
     }
   }
-  
-  // Re-sign extend from the pointer size if needed to get overflow edge cases
-  // right.
-  unsigned PtrSize = TD.getPointerSizeInBits();
-  if (PtrSize < 64)
-    Offset = SignExtend64(Offset, PtrSize);
-  
-  return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+  Offset = ByteOffset.getSExtValue();
+  return Ptr;
 }
 
 
@@ -1636,26 +1712,26 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
 
   // Look through bitcast instructions and geps.
   V = V->stripPointerCasts();
-  
+
   // If the value is a GEP instructionor  constant expression, treat it as an
   // offset.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     // Make sure the GEP has exactly three arguments.
     if (GEP->getNumOperands() != 3)
       return false;
-    
+
     // Make sure the index-ee is a pointer to array of i8.
     PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
     if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
       return false;
-    
+
     // Check to make sure that the first operand of the GEP is an integer and
     // has value 0 so that we are sure we're indexing into the initializer.
     const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
     if (FirstIdx == 0 || !FirstIdx->isZero())
       return false;
-    
+
     // If the second index isn't a ConstantInt, then this is a variable index
     // into the array.  If this occurs, we can't say anything meaningful about
     // the string.
@@ -1681,13 +1757,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
     Str = "";
     return true;
   }
-  
+
   // Must be a Constant Array
   const ConstantDataArray *Array =
     dyn_cast<ConstantDataArray>(GV->getInitializer());
   if (Array == 0 || !Array->isString())
     return false;
-  
+
   // Get the number of elements in the array
   uint64_t NumElts = Array->getType()->getArrayNumElements();
 
@@ -1696,10 +1772,10 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
 
   if (Offset > NumElts)
     return false;
-  
+
   // Skip over 'offset' bytes.
   Str = Str.substr(Offset);
-  
+
   if (TrimAtNul) {
     // Trim off the \0 and anything after it.  If the array is not nul
     // terminated, we just return the whole end of string.  The client may know
@@ -1753,7 +1829,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     if (Len1 != Len2) return 0;
     return Len1;
   }
-  
+
   // Otherwise, see if we can read the string.
   StringRef StrData;
   if (!getConstantStringInfo(V, StrData))
@@ -1940,3 +2016,19 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
     return false; // Misc instructions which have effects
   }
 }
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+bool llvm::isKnownNonNull(const Value *V) {
+  // Alloca never returns null, malloc might.
+  if (isa<AllocaInst>(V)) return true;
+
+  // A byval argument is never null.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+
+  // Global values are not null unless extern weak.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return !GV->hasExternalWeakLinkage();
+  return false;
+}
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index 1eab27d3eba3..1f36a00ab086 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -12,15 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/Archive.h"
 #include "ArchiveInternals.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/system_error.h"
-#include <memory>
 #include <cstring>
+#include <memory>
 using namespace llvm;
 
 // getMemberSize - compute the actual physical size of the file member as seen
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index 639f5ac2691b..f6c87e899f25 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -14,10 +14,9 @@
 #ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
 #define LIB_ARCHIVE_ARCHIVEINTERNALS_H
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/Archive.h"
 #include "llvm/Support/TimeValue.h"
-#include "llvm/ADT/StringExtras.h"
-
 #include <cstring>
 
 #define ARFILE_MAGIC "!<arch>\n"                   ///< magic string
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 5052495c0d62..14713e692c0f 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/Archive.h"
 #include "ArchiveInternals.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Module.h"
 #include <cstdio>
 #include <cstdlib>
-#include <memory>
 using namespace llvm;
 
 /// Read a variable-bit-rate encoded unsigned integer
@@ -176,7 +177,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
           }
           if (p >= endp) {
             if (error)
-              *error = "missing name termiantor in string table";
+              *error = "missing name terminator in string table";
             return 0;
           }
         } else {
@@ -325,14 +326,14 @@ Archive::loadArchive(std::string* error) {
 
 // Open and completely load the archive file.
 Archive*
-Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, 
+Archive::OpenAndLoad(const sys::Path& File, LLVMContext& C,
                      std::string* ErrorMessage) {
-  std::auto_ptr<Archive> result ( new Archive(file, C));
+  OwningPtr<Archive> result ( new Archive(File, C));
   if (result->mapToMemory(ErrorMessage))
-    return 0;
+    return NULL;
   if (!result->loadArchive(ErrorMessage))
-    return 0;
-  return result.release();
+    return NULL;
+  return result.take();
 }
 
 // Get all the bitcode modules from the archive
@@ -439,15 +440,15 @@ Archive::loadSymbolTable(std::string* ErrorMsg) {
 }
 
 // Open the archive and load just the symbol tables
-Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
+Archive* Archive::OpenAndLoadSymbols(const sys::Path& File,
                                      LLVMContext& C,
                                      std::string* ErrorMessage) {
-  std::auto_ptr<Archive> result ( new Archive(file, C) );
+  OwningPtr<Archive> result ( new Archive(File, C) );
   if (result->mapToMemory(ErrorMessage))
-    return 0;
+    return NULL;
   if (!result->loadSymbolTable(ErrorMessage))
-    return 0;
-  return result.release();
+    return NULL;
+  return result.take();
 }
 
 // Look up one symbol in the symbol table and return the module that defines
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index ec6b4b87584b..3eba701c9535 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -11,18 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/Archive.h"
 #include "ArchiveInternals.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/system_error.h"
 #include <fstream>
-#include <ostream>
 #include <iomanip>
+#include <ostream>
 using namespace llvm;
 
 // Write an integer using variable bit rate encoding. This saves a few bytes
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index a60e4aa41c42..f46383be7e46 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -12,14 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLLexer.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instruction.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Assembly/Parser.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
@@ -55,22 +56,12 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
   return Result;
 }
 
-static char parseHexChar(char C) {
-  if (C >= '0' && C <= '9')
-    return C-'0';
-  if (C >= 'A' && C <= 'F')
-    return C-'A'+10;
-  if (C >= 'a' && C <= 'f')
-    return C-'a'+10;
-  return 0;
-}
-
 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
   uint64_t Result = 0;
   for (; Buffer != End; ++Buffer) {
     uint64_t OldRes = Result;
     Result *= 16;
-    Result += parseHexChar(*Buffer);
+    Result += hexDigitValue(*Buffer);
 
     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
       Error("constant bigger than 64 bits detected!");
@@ -86,12 +77,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
   for (int i=0; i<16; i++, Buffer++) {
     assert(Buffer != End);
     Pair[0] *= 16;
-    Pair[0] += parseHexChar(*Buffer);
+    Pair[0] += hexDigitValue(*Buffer);
   }
   Pair[1] = 0;
   for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
     Pair[1] *= 16;
-    Pair[1] += parseHexChar(*Buffer);
+    Pair[1] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
     Error("constant bigger than 128 bits detected!");
@@ -105,12 +96,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
   for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
     assert(Buffer != End);
     Pair[1] *= 16;
-    Pair[1] += parseHexChar(*Buffer);
+    Pair[1] += hexDigitValue(*Buffer);
   }
   Pair[0] = 0;
   for (int i=0; i<16; i++, Buffer++) {
     Pair[0] *= 16;
-    Pair[0] += parseHexChar(*Buffer);
+    Pair[0] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
     Error("constant bigger than 128 bits detected!");
@@ -128,8 +119,10 @@ static void UnEscapeLexed(std::string &Str) {
       if (BIn < EndBuffer-1 && BIn[1] == '\\') {
         *BOut++ = '\\'; // Two \ becomes one
         BIn += 2;
-      } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
-        *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]);
+      } else if (BIn < EndBuffer-2 &&
+                 isxdigit(static_cast<unsigned char>(BIn[1])) &&
+                 isxdigit(static_cast<unsigned char>(BIn[2]))) {
+        *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
         BIn += 3;                           // Skip over handled chars
         ++BOut;
       } else {
@@ -144,7 +137,8 @@ static void UnEscapeLexed(std::string &Str) {
 
 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
 static bool isLabelChar(char C) {
-  return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
+  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
+         C == '.' || C == '_';
 }
 
 
@@ -197,7 +191,7 @@ lltok::Kind LLLexer::LexToken() {
   switch (CurChar) {
   default:
     // Handle letters: [a-zA-Z_]
-    if (isalpha(CurChar) || CurChar == '_')
+    if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
       return LexIdentifier();
 
     return lltok::Error;
@@ -235,6 +229,7 @@ lltok::Kind LLLexer::LexToken() {
     SkipLineComment();
     return LexToken();
   case '!': return LexExclaim();
+  case '#': return LexHash();
   case '0': case '1': case '2': case '3': case '4':
   case '5': case '6': case '7': case '8': case '9':
   case '-':
@@ -290,8 +285,8 @@ lltok::Kind LLLexer::LexAt() {
     return lltok::GlobalVar;
 
   // Handle GlobalVarID: @[0-9]+
-  if (isdigit(CurPtr[0])) {
-    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
       /*empty*/;
 
     uint64_t Val = atoull(TokStart+1, CurPtr);
@@ -325,10 +320,12 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
 /// ReadVarName - Read the rest of a token containing a variable name.
 bool LLLexer::ReadVarName() {
   const char *NameStart = CurPtr;
-  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
+      CurPtr[0] == '-' || CurPtr[0] == '$' ||
       CurPtr[0] == '.' || CurPtr[0] == '_') {
     ++CurPtr;
-    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
+           CurPtr[0] == '-' || CurPtr[0] == '$' ||
            CurPtr[0] == '.' || CurPtr[0] == '_')
       ++CurPtr;
 
@@ -354,8 +351,8 @@ lltok::Kind LLLexer::LexPercent() {
     return lltok::LocalVar;
 
   // Handle LocalVarID: %[0-9]+
-  if (isdigit(CurPtr[0])) {
-    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
       /*empty*/;
 
     uint64_t Val = atoull(TokStart+1, CurPtr);
@@ -389,10 +386,12 @@ lltok::Kind LLLexer::LexQuote() {
 ///    !
 lltok::Kind LLLexer::LexExclaim() {
   // Lex a metadata name as a MetadataVar.
-  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
+      CurPtr[0] == '-' || CurPtr[0] == '$' ||
       CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
     ++CurPtr;
-    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
+           CurPtr[0] == '-' || CurPtr[0] == '$' ||
            CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
       ++CurPtr;
 
@@ -402,7 +401,25 @@ lltok::Kind LLLexer::LexExclaim() {
   }
   return lltok::exclaim;
 }
-  
+
+/// LexHash - Lex all tokens that start with a # character:
+///    AttrGrpID ::= #[0-9]+
+lltok::Kind LLLexer::LexHash() {
+  // Handle AttrGrpID: #[0-9]+
+  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
+    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
+      /*empty*/;
+
+    uint64_t Val = atoull(TokStart+1, CurPtr);
+    if ((unsigned)Val != Val)
+      Error("invalid value number (too large)!");
+    UIntVal = unsigned(Val);
+    return lltok::AttrGrpID;
+  }
+
+  return lltok::Error;
+}
+
 /// LexIdentifier: Handle several related productions:
 ///    Label           [-a-zA-Z$._0-9]+:
 ///    IntegerType     i[0-9]+
@@ -415,8 +432,11 @@ lltok::Kind LLLexer::LexIdentifier() {
 
   for (; isLabelChar(*CurPtr); ++CurPtr) {
     // If we decide this is an integer, remember the end of the sequence.
-    if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
-    if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
+    if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
+      IntEnd = CurPtr;
+    if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
+        *CurPtr != '_')
+      KeywordEnd = CurPtr;
   }
 
   // If we stopped due to a colon, this really is a label.
@@ -445,9 +465,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   CurPtr = KeywordEnd;
   --StartChar;
   unsigned Len = CurPtr-StartChar;
-#define KEYWORD(STR) \
-  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
-    return lltok::kw_##STR;
+#define KEYWORD(STR)                                                    \
+  do {                                                                  \
+    if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR)))  \
+      return lltok::kw_##STR;                                           \
+  } while (0)
 
   KEYWORD(true);    KEYWORD(false);
   KEYWORD(declare); KEYWORD(define);
@@ -472,6 +494,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(hidden);
   KEYWORD(protected);
   KEYWORD(unnamed_addr);
+  KEYWORD(externally_initialized);
   KEYWORD(extern_weak);
   KEYWORD(external);
   KEYWORD(thread_local);
@@ -486,7 +509,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(target);
   KEYWORD(triple);
   KEYWORD(unwind);
-  KEYWORD(deplibs);
+  KEYWORD(deplibs);             // FIXME: Remove in 4.0.
   KEYWORD(datalayout);
   KEYWORD(volatile);
   KEYWORD(atomic);
@@ -498,6 +521,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(seq_cst);
   KEYWORD(singlethread);
 
+  KEYWORD(nnan);
+  KEYWORD(ninf);
+  KEYWORD(nsz);
+  KEYWORD(arcp);
+  KEYWORD(fast);
   KEYWORD(nuw);
   KEYWORD(nsw);
   KEYWORD(exact);
@@ -532,33 +560,39 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(cc);
   KEYWORD(c);
 
-  KEYWORD(signext);
-  KEYWORD(zeroext);
+  KEYWORD(attributes);
+
+  KEYWORD(alwaysinline);
+  KEYWORD(byval);
+  KEYWORD(inlinehint);
   KEYWORD(inreg);
-  KEYWORD(sret);
-  KEYWORD(nounwind);
-  KEYWORD(noreturn);
+  KEYWORD(minsize);
+  KEYWORD(naked);
+  KEYWORD(nest);
   KEYWORD(noalias);
+  KEYWORD(nobuiltin);
   KEYWORD(nocapture);
-  KEYWORD(byval);
-  KEYWORD(nest);
+  KEYWORD(noduplicate);
+  KEYWORD(noimplicitfloat);
+  KEYWORD(noinline);
+  KEYWORD(nonlazybind);
+  KEYWORD(noredzone);
+  KEYWORD(noreturn);
+  KEYWORD(nounwind);
+  KEYWORD(optsize);
   KEYWORD(readnone);
   KEYWORD(readonly);
-  KEYWORD(uwtable);
   KEYWORD(returns_twice);
-
-  KEYWORD(inlinehint);
-  KEYWORD(noinline);
-  KEYWORD(alwaysinline);
-  KEYWORD(optsize);
+  KEYWORD(signext);
+  KEYWORD(sret);
   KEYWORD(ssp);
   KEYWORD(sspreq);
-  KEYWORD(noredzone);
-  KEYWORD(noimplicitfloat);
-  KEYWORD(naked);
-  KEYWORD(nonlazybind);
-  KEYWORD(address_safety);
-  KEYWORD(minsize);
+  KEYWORD(sspstrong);
+  KEYWORD(sanitize_address);
+  KEYWORD(sanitize_thread);
+  KEYWORD(sanitize_memory);
+  KEYWORD(uwtable);
+  KEYWORD(zeroext);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -653,7 +687,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
   // the CFE to avoid forcing it to deal with 64-bit numbers.
   if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
-      TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
+      TokStart[1] == '0' && TokStart[2] == 'x' &&
+      isxdigit(static_cast<unsigned char>(TokStart[3]))) {
     int len = CurPtr-TokStart-3;
     uint32_t bits = len * 4;
     APInt Tmp(bits, StringRef(TokStart+3, len), 16);
@@ -693,13 +728,13 @@ lltok::Kind LLLexer::Lex0x() {
     Kind = 'J';
   }
 
-  if (!isxdigit(CurPtr[0])) {
+  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
     // Bad token, return it as an error.
     CurPtr = TokStart+1;
     return lltok::Error;
   }
 
-  while (isxdigit(CurPtr[0]))
+  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
     ++CurPtr;
 
   if (Kind == 'J') {
@@ -716,20 +751,21 @@ lltok::Kind LLLexer::Lex0x() {
   case 'K':
     // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
     FP80HexToIntPair(TokStart+3, CurPtr, Pair);
-    APFloatVal = APFloat(APInt(80, Pair));
+    APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
     return lltok::APFloat;
   case 'L':
     // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
     HexToIntPair(TokStart+3, CurPtr, Pair);
-    APFloatVal = APFloat(APInt(128, Pair), true);
+    APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
     return lltok::APFloat;
   case 'M':
     // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
     HexToIntPair(TokStart+3, CurPtr, Pair);
-    APFloatVal = APFloat(APInt(128, Pair));
+    APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
     return lltok::APFloat;
   case 'H':
-    APFloatVal = APFloat(APInt(16,HexIntToVal(TokStart+3, CurPtr)));
+    APFloatVal = APFloat(APFloat::IEEEhalf,
+                         APInt(16,HexIntToVal(TokStart+3, CurPtr)));
     return lltok::APFloat;
   }
 }
@@ -744,8 +780,9 @@ lltok::Kind LLLexer::Lex0x() {
 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
 lltok::Kind LLLexer::LexDigitOrNegative() {
-  // If the letter after the negative is a number, this is probably a label.
-  if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
+  // If the letter after the negative is not a number, this is probably a label.
+  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
+      !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
     // Okay, this is not a number after the -, it's probably a label.
     if (const char *End = isLabelTail(CurPtr)) {
       StrVal.assign(TokStart, End-1);
@@ -759,7 +796,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
   // At this point, it is either a label, int or fp constant.
 
   // Skip digits, we have at least one.
-  for (; isdigit(CurPtr[0]); ++CurPtr)
+  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
     /*empty*/;
 
   // Check to see if this really is a label afterall, e.g. "-1:".
@@ -796,13 +833,14 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
   ++CurPtr;
 
   // Skip over [0-9]*([eE][-+]?[0-9]+)?
-  while (isdigit(CurPtr[0])) ++CurPtr;
+  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
 
   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
-    if (isdigit(CurPtr[1]) ||
-        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
+        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
+          isdigit(static_cast<unsigned char>(CurPtr[2])))) {
       CurPtr += 2;
-      while (isdigit(CurPtr[0])) ++CurPtr;
+      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
     }
   }
 
@@ -814,11 +852,11 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
 lltok::Kind LLLexer::LexPositive() {
   // If the letter after the negative is a number, this is probably not a
   // label.
-  if (!isdigit(CurPtr[0]))
+  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
     return lltok::Error;
 
   // Skip digits.
-  for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
     /*empty*/;
 
   // At this point, we need a '.'.
@@ -830,13 +868,14 @@ lltok::Kind LLLexer::LexPositive() {
   ++CurPtr;
 
   // Skip over [0-9]*([eE][-+]?[0-9]+)?
-  while (isdigit(CurPtr[0])) ++CurPtr;
+  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
 
   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
-    if (isdigit(CurPtr[1]) ||
-        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
+        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
+        isdigit(static_cast<unsigned char>(CurPtr[2])))) {
       CurPtr += 2;
-      while (isdigit(CurPtr[0])) ++CurPtr;
+      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
     }
   }
 
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 09aea5b01825..85703c766b09 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -15,8 +15,8 @@
 #define LIB_ASMPARSER_LLLEXER_H
 
 #include "LLToken.h"
-#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/Support/SourceMgr.h"
 #include <string>
 
@@ -81,6 +81,7 @@ namespace llvm {
     lltok::Kind LexPercent();
     lltok::Kind LexQuote();
     lltok::Kind Lex0x();
+    lltok::Kind LexHash();
 
     uint64_t atoull(const char *Buffer, const char *End);
     uint64_t HexIntToVal(const char *Buffer, const char *End);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index b24291ffb329..c8da1f8bc661 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLParser.h"
-#include "llvm/AutoUpgrade.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -52,10 +52,10 @@ bool LLParser::ValidateEndOfModule() {
          I != E; ++I) {
       Instruction *Inst = I->first;
       const std::vector<MDRef> &MDList = I->second;
-      
+
       for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
         unsigned SlotNo = MDList[i].MDSlot;
-        
+
         if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
           return Error(MDList[i].Loc, "use of undefined metadata '!" +
                        Twine(SlotNo) + "'");
@@ -64,8 +64,66 @@ bool LLParser::ValidateEndOfModule() {
     }
     ForwardRefInstMetadata.clear();
   }
-  
-  
+
+  // Handle any function attribute group forward references.
+  for (std::map<Value*, std::vector<unsigned> >::iterator
+         I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end();
+         I != E; ++I) {
+    Value *V = I->first;
+    std::vector<unsigned> &Vec = I->second;
+    AttrBuilder B;
+
+    for (std::vector<unsigned>::iterator VI = Vec.begin(), VE = Vec.end();
+         VI != VE; ++VI)
+      B.merge(NumberedAttrBuilders[*VI]);
+
+    if (Function *Fn = dyn_cast<Function>(V)) {
+      AttributeSet AS = Fn->getAttributes();
+      AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
+      AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
+                               AS.getFnAttributes());
+
+      FnAttrs.merge(B);
+
+      // If the alignment was parsed as an attribute, move to the alignment
+      // field.
+      if (FnAttrs.hasAlignmentAttr()) {
+        Fn->setAlignment(FnAttrs.getAlignment());
+        FnAttrs.removeAttribute(Attribute::Alignment);
+      }
+
+      AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
+                            AttributeSet::get(Context,
+                                              AttributeSet::FunctionIndex,
+                                              FnAttrs));
+      Fn->setAttributes(AS);
+    } else if (CallInst *CI = dyn_cast<CallInst>(V)) {
+      AttributeSet AS = CI->getAttributes();
+      AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
+      AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
+                               AS.getFnAttributes());
+      FnAttrs.merge(B);
+      AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
+                            AttributeSet::get(Context,
+                                              AttributeSet::FunctionIndex,
+                                              FnAttrs));
+      CI->setAttributes(AS);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
+      AttributeSet AS = II->getAttributes();
+      AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
+      AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
+                               AS.getFnAttributes());
+      FnAttrs.merge(B);
+      AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
+                            AttributeSet::get(Context,
+                                              AttributeSet::FunctionIndex,
+                                              FnAttrs));
+      II->setAttributes(AS);
+    } else {
+      llvm_unreachable("invalid object with forward attribute group reference");
+    }
+  }
+
   // If there are entries in ForwardRefBlockAddresses at this point, they are
   // references after the function was defined.  Resolve those now.
   while (!ForwardRefBlockAddresses.empty()) {
@@ -76,19 +134,19 @@ bool LLParser::ValidateEndOfModule() {
       TheFn = M->getFunction(Fn.StrVal);
     else if (Fn.UIntVal < NumberedVals.size())
       TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
-    
+
     if (TheFn == 0)
       return Error(Fn.Loc, "unknown function referenced by blockaddress");
-    
+
     // Resolve all these references.
-    if (ResolveForwardRefBlockAddresses(TheFn, 
+    if (ResolveForwardRefBlockAddresses(TheFn,
                                       ForwardRefBlockAddresses.begin()->second,
                                         0))
       return true;
-    
+
     ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
   }
-  
+
   for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
     if (NumberedTypes[i].second.isValid())
       return Error(NumberedTypes[i].second,
@@ -123,7 +181,7 @@ bool LLParser::ValidateEndOfModule() {
   return false;
 }
 
-bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, 
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
                              std::vector<std::pair<ValID, GlobalValue*> > &Refs,
                                                PerFunctionState *PFS) {
   // Loop over all the references, resolving them.
@@ -141,11 +199,11 @@ bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
       Res = dyn_cast_or_null<BasicBlock>(
                      TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
     }
-    
+
     if (Res == 0)
       return Error(Refs[i].first.Loc,
                    "referenced value is not a basic block");
-    
+
     // Get the BlockAddress for this and update references to use it.
     BlockAddress *BA = BlockAddress::get(TheFn, Res);
     Refs[i].second->replaceAllUsesWith(BA);
@@ -174,7 +232,7 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
     case lltok::exclaim:    if (ParseStandaloneMetadata()) return true; break;
-    case lltok::MetadataVar: if (ParseNamedMetadata()) return true; break;
+    case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break;
 
     // The Global variable production with no name can have many different
     // optional leading prefixes, the production is:
@@ -220,6 +278,8 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_global:        // GlobalType
       if (ParseGlobal("", SMLoc(), 0, false, 0)) return true;
       break;
+
+    case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
     }
   }
 }
@@ -267,6 +327,7 @@ bool LLParser::ParseTargetDefinition() {
 /// toplevelentity
 ///   ::= 'deplibs' '=' '[' ']'
 ///   ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+/// FIXME: Remove in 4.0. Currently parse, but ignore.
 bool LLParser::ParseDepLibs() {
   assert(Lex.getKind() == lltok::kw_deplibs);
   Lex.Lex();
@@ -277,14 +338,10 @@ bool LLParser::ParseDepLibs() {
   if (EatIfPresent(lltok::rsquare))
     return false;
 
-  std::string Str;
-  if (ParseStringConstant(Str)) return true;
-  M->addLibrary(Str);
-
-  while (EatIfPresent(lltok::comma)) {
+  do {
+    std::string Str;
     if (ParseStringConstant(Str)) return true;
-    M->addLibrary(Str);
-  }
+  } while (EatIfPresent(lltok::comma));
 
   return ParseToken(lltok::rsquare, "expected ']' at end of list");
 }
@@ -302,11 +359,11 @@ bool LLParser::ParseUnnamedType() {
 
   if (TypeID >= NumberedTypes.size())
     NumberedTypes.resize(TypeID+1);
-  
+
   Type *Result = 0;
   if (ParseStructDefinition(TypeLoc, "",
                             NumberedTypes[TypeID], Result)) return true;
-  
+
   if (!isa<StructType>(Result)) {
     std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
     if (Entry.first)
@@ -329,11 +386,11 @@ bool LLParser::ParseNamedType() {
   if (ParseToken(lltok::equal, "expected '=' after name") ||
       ParseToken(lltok::kw_type, "expected 'type' after name"))
     return true;
-  
+
   Type *Result = 0;
   if (ParseStructDefinition(NameLoc, Name,
                             NamedTypes[Name], Result)) return true;
-  
+
   if (!isa<StructType>(Result)) {
     std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
     if (Entry.first)
@@ -341,7 +398,7 @@ bool LLParser::ParseNamedType() {
     Entry.first = Result;
     Entry.second = SMLoc();
   }
-  
+
   return false;
 }
 
@@ -473,7 +530,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
   // Otherwise, create MDNode forward reference.
   MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
   ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
-  
+
   if (NumberedMetadata.size() <= MID)
     NumberedMetadata.resize(MID+1);
   NumberedMetadata[MID] = FwdNode;
@@ -498,7 +555,7 @@ bool LLParser::ParseNamedMetadata() {
     do {
       if (ParseToken(lltok::exclaim, "Expected '!' here"))
         return true;
-    
+
       MDNode *N = 0;
       if (ParseMDNodeID(N)) return true;
       NMD->addOperand(N);
@@ -530,7 +587,7 @@ bool LLParser::ParseStandaloneMetadata() {
     return true;
 
   MDNode *Init = MDNode::get(Context, Elts);
-  
+
   // See if this was forward referenced, if so, handle it.
   std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
     FI = ForwardRefMDNodes.find(MetadataID);
@@ -539,7 +596,7 @@ bool LLParser::ParseStandaloneMetadata() {
     Temp->replaceAllUsesWith(Init);
     MDNode::deleteTemporary(Temp);
     ForwardRefMDNodes.erase(FI);
-    
+
     assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
   } else {
     if (MetadataID >= NumberedMetadata.size())
@@ -635,9 +692,11 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
 
 /// ParseGlobal
 ///   ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
-///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///       OptionalAddrSpace OptionalUnNammedAddr
+///       OptionalExternallyInitialized GlobalType Type Const
 ///   ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
-///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///       OptionalAddrSpace OptionalUnNammedAddr
+///       OptionalExternallyInitialized GlobalType Type Const
 ///
 /// Everything through visibility has been parsed already.
 ///
@@ -645,9 +704,10 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
                            unsigned Linkage, bool HasLinkage,
                            unsigned Visibility) {
   unsigned AddrSpace;
-  bool IsConstant, UnnamedAddr;
+  bool IsConstant, UnnamedAddr, IsExternallyInitialized;
   GlobalVariable::ThreadLocalMode TLM;
   LocTy UnnamedAddrLoc;
+  LocTy IsExternallyInitializedLoc;
   LocTy TyLoc;
 
   Type *Ty = 0;
@@ -655,6 +715,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       ParseOptionalAddrSpace(AddrSpace) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
                          &UnnamedAddrLoc) ||
+      ParseOptionalToken(lltok::kw_externally_initialized,
+                         IsExternallyInitialized,
+                         &IsExternallyInitializedLoc) ||
       ParseGlobalType(IsConstant) ||
       ParseType(Ty, TyLoc))
     return true;
@@ -712,6 +775,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   GV->setConstant(IsConstant);
   GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
   GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+  GV->setExternallyInitialized(IsExternallyInitialized);
   GV->setThreadLocalMode(TLM);
   GV->setUnnamedAddr(UnnamedAddr);
 
@@ -736,6 +800,159 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   return false;
 }
 
+/// ParseUnnamedAttrGrp
+///   ::= 'attributes' AttrGrpID '=' '{' AttrValPair+ '}'
+bool LLParser::ParseUnnamedAttrGrp() {
+  assert(Lex.getKind() == lltok::kw_attributes);
+  LocTy AttrGrpLoc = Lex.getLoc();
+  Lex.Lex();
+
+  assert(Lex.getKind() == lltok::AttrGrpID);
+  unsigned VarID = Lex.getUIntVal();
+  std::vector<unsigned> unused;
+  LocTy NoBuiltinLoc;
+  Lex.Lex();
+
+  if (ParseToken(lltok::equal, "expected '=' here") ||
+      ParseToken(lltok::lbrace, "expected '{' here") ||
+      ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true,
+                                 NoBuiltinLoc) ||
+      ParseToken(lltok::rbrace, "expected end of attribute group"))
+    return true;
+
+  if (!NumberedAttrBuilders[VarID].hasAttributes())
+    return Error(AttrGrpLoc, "attribute group has no attributes");
+
+  return false;
+}
+
+/// ParseFnAttributeValuePairs
+///   ::= <attr> | <attr> '=' <value>
+bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
+                                          std::vector<unsigned> &FwdRefAttrGrps,
+                                          bool inAttrGrp, LocTy &NoBuiltinLoc) {
+  bool HaveError = false;
+
+  B.clear();
+
+  while (true) {
+    lltok::Kind Token = Lex.getKind();
+    if (Token == lltok::kw_nobuiltin)
+      NoBuiltinLoc = Lex.getLoc();
+    switch (Token) {
+    default:
+      if (!inAttrGrp) return HaveError;
+      return Error(Lex.getLoc(), "unterminated attribute group");
+    case lltok::rbrace:
+      // Finished.
+      return false;
+
+    case lltok::AttrGrpID: {
+      // Allow a function to reference an attribute group:
+      //
+      //   define void @foo() #1 { ... }
+      if (inAttrGrp)
+        HaveError |=
+          Error(Lex.getLoc(),
+              "cannot have an attribute group reference in an attribute group");
+
+      unsigned AttrGrpNum = Lex.getUIntVal();
+      if (inAttrGrp) break;
+
+      // Save the reference to the attribute group. We'll fill it in later.
+      FwdRefAttrGrps.push_back(AttrGrpNum);
+      break;
+    }
+    // Target-dependent attributes:
+    case lltok::StringConstant: {
+      std::string Attr = Lex.getStrVal();
+      Lex.Lex();
+      std::string Val;
+      if (EatIfPresent(lltok::equal) &&
+          ParseStringConstant(Val))
+        return true;
+
+      B.addAttribute(Attr, Val);
+      continue;
+    }
+
+    // Target-independent attributes:
+    case lltok::kw_align: {
+      // As a hack, we allow "align 2" on functions as a synonym for "alignstack
+      // 2".
+      unsigned Alignment;
+      if (inAttrGrp) {
+        Lex.Lex();
+        if (ParseToken(lltok::equal, "expected '=' here") ||
+            ParseUInt32(Alignment))
+          return true;
+      } else {
+        if (ParseOptionalAlignment(Alignment))
+          return true;
+      }
+      B.addAlignmentAttr(Alignment);
+      continue;
+    }
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (inAttrGrp) {
+        Lex.Lex();
+        if (ParseToken(lltok::equal, "expected '=' here") ||
+            ParseUInt32(Alignment))
+          return true;
+      } else {
+        if (ParseOptionalStackAlignment(Alignment))
+          return true;
+      }
+      B.addStackAlignmentAttr(Alignment);
+      continue;
+    }
+    case lltok::kw_alwaysinline:      B.addAttribute(Attribute::AlwaysInline); break;
+    case lltok::kw_inlinehint:        B.addAttribute(Attribute::InlineHint); break;
+    case lltok::kw_minsize:           B.addAttribute(Attribute::MinSize); break;
+    case lltok::kw_naked:             B.addAttribute(Attribute::Naked); break;
+    case lltok::kw_nobuiltin:         B.addAttribute(Attribute::NoBuiltin); break;
+    case lltok::kw_noduplicate:       B.addAttribute(Attribute::NoDuplicate); break;
+    case lltok::kw_noimplicitfloat:   B.addAttribute(Attribute::NoImplicitFloat); break;
+    case lltok::kw_noinline:          B.addAttribute(Attribute::NoInline); break;
+    case lltok::kw_nonlazybind:       B.addAttribute(Attribute::NonLazyBind); break;
+    case lltok::kw_noredzone:         B.addAttribute(Attribute::NoRedZone); break;
+    case lltok::kw_noreturn:          B.addAttribute(Attribute::NoReturn); break;
+    case lltok::kw_nounwind:          B.addAttribute(Attribute::NoUnwind); break;
+    case lltok::kw_optsize:           B.addAttribute(Attribute::OptimizeForSize); break;
+    case lltok::kw_readnone:          B.addAttribute(Attribute::ReadNone); break;
+    case lltok::kw_readonly:          B.addAttribute(Attribute::ReadOnly); break;
+    case lltok::kw_returns_twice:     B.addAttribute(Attribute::ReturnsTwice); break;
+    case lltok::kw_ssp:               B.addAttribute(Attribute::StackProtect); break;
+    case lltok::kw_sspreq:            B.addAttribute(Attribute::StackProtectReq); break;
+    case lltok::kw_sspstrong:         B.addAttribute(Attribute::StackProtectStrong); break;
+    case lltok::kw_sanitize_address:  B.addAttribute(Attribute::SanitizeAddress); break;
+    case lltok::kw_sanitize_thread:   B.addAttribute(Attribute::SanitizeThread); break;
+    case lltok::kw_sanitize_memory:   B.addAttribute(Attribute::SanitizeMemory); break;
+    case lltok::kw_uwtable:           B.addAttribute(Attribute::UWTable); break;
+
+    // Error handling.
+    case lltok::kw_inreg:
+    case lltok::kw_signext:
+    case lltok::kw_zeroext:
+      HaveError |=
+        Error(Lex.getLoc(),
+              "invalid use of attribute on a function");
+      break;
+    case lltok::kw_byval:
+    case lltok::kw_nest:
+    case lltok::kw_noalias:
+    case lltok::kw_nocapture:
+    case lltok::kw_sret:
+      HaveError |=
+        Error(Lex.getLoc(),
+              "invalid use of parameter-only attribute on a function");
+      break;
+    }
+
+    Lex.Lex();
+  }
+}
 
 //===----------------------------------------------------------------------===//
 // GlobalValue Reference/Resolution Routines.
@@ -915,11 +1132,8 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
          ParseToken(lltok::rparen, "expected ')' in address space");
 }
 
-/// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
-/// indicates what kind of attribute list this is: 0: function arg, 1: result,
-/// 2: function attr.
-bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
-  LocTy AttrLoc = Lex.getLoc();
+/// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes.
+bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
   bool HaveError = false;
 
   B.clear();
@@ -929,42 +1143,6 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
     switch (Token) {
     default:  // End of attributes.
       return HaveError;
-    case lltok::kw_zeroext:         B.addAttribute(Attributes::ZExt); break;
-    case lltok::kw_signext:         B.addAttribute(Attributes::SExt); break;
-    case lltok::kw_inreg:           B.addAttribute(Attributes::InReg); break;
-    case lltok::kw_sret:            B.addAttribute(Attributes::StructRet); break;
-    case lltok::kw_noalias:         B.addAttribute(Attributes::NoAlias); break;
-    case lltok::kw_nocapture:       B.addAttribute(Attributes::NoCapture); break;
-    case lltok::kw_byval:           B.addAttribute(Attributes::ByVal); break;
-    case lltok::kw_nest:            B.addAttribute(Attributes::Nest); break;
-
-    case lltok::kw_noreturn:        B.addAttribute(Attributes::NoReturn); break;
-    case lltok::kw_nounwind:        B.addAttribute(Attributes::NoUnwind); break;
-    case lltok::kw_uwtable:         B.addAttribute(Attributes::UWTable); break;
-    case lltok::kw_returns_twice:   B.addAttribute(Attributes::ReturnsTwice); break;
-    case lltok::kw_noinline:        B.addAttribute(Attributes::NoInline); break;
-    case lltok::kw_readnone:        B.addAttribute(Attributes::ReadNone); break;
-    case lltok::kw_readonly:        B.addAttribute(Attributes::ReadOnly); break;
-    case lltok::kw_inlinehint:      B.addAttribute(Attributes::InlineHint); break;
-    case lltok::kw_alwaysinline:    B.addAttribute(Attributes::AlwaysInline); break;
-    case lltok::kw_optsize:         B.addAttribute(Attributes::OptimizeForSize); break;
-    case lltok::kw_ssp:             B.addAttribute(Attributes::StackProtect); break;
-    case lltok::kw_sspreq:          B.addAttribute(Attributes::StackProtectReq); break;
-    case lltok::kw_noredzone:       B.addAttribute(Attributes::NoRedZone); break;
-    case lltok::kw_noimplicitfloat: B.addAttribute(Attributes::NoImplicitFloat); break;
-    case lltok::kw_naked:           B.addAttribute(Attributes::Naked); break;
-    case lltok::kw_nonlazybind:     B.addAttribute(Attributes::NonLazyBind); break;
-    case lltok::kw_address_safety:  B.addAttribute(Attributes::AddressSafety); break;
-    case lltok::kw_minsize:         B.addAttribute(Attributes::MinSize); break;
-
-    case lltok::kw_alignstack: {
-      unsigned Alignment;
-      if (ParseOptionalStackAlignment(Alignment))
-        return true;
-      B.addStackAlignmentAttr(Alignment);
-      continue;
-    }
-
     case lltok::kw_align: {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
@@ -972,51 +1150,70 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
-
+    case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
+    case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
+    case lltok::kw_nest:            B.addAttribute(Attribute::Nest); break;
+    case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
+    case lltok::kw_nocapture:       B.addAttribute(Attribute::NoCapture); break;
+    case lltok::kw_signext:         B.addAttribute(Attribute::SExt); break;
+    case lltok::kw_sret:            B.addAttribute(Attribute::StructRet); break;
+    case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
+
+    case lltok::kw_alignstack:      case lltok::kw_nounwind:
+    case lltok::kw_alwaysinline:    case lltok::kw_optsize:
+    case lltok::kw_inlinehint:      case lltok::kw_readnone:
+    case lltok::kw_minsize:         case lltok::kw_readonly:
+    case lltok::kw_naked:           case lltok::kw_returns_twice:
+    case lltok::kw_nobuiltin:       case lltok::kw_sanitize_address:
+    case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory:
+    case lltok::kw_noinline:        case lltok::kw_sanitize_thread:
+    case lltok::kw_nonlazybind:     case lltok::kw_ssp:
+    case lltok::kw_noredzone:       case lltok::kw_sspreq:
+    case lltok::kw_noreturn:        case lltok::kw_uwtable:
+      HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
+      break;
     }
 
-    // Perform some error checking.
-    switch (Token) {
-    default:
-      if (AttrKind == 2)
-        HaveError |= Error(AttrLoc, "invalid use of attribute on a function");
-      break;
-    case lltok::kw_align:
-      // As a hack, we allow "align 2" on functions as a synonym for
-      // "alignstack 2".
-      break;
+    Lex.Lex();
+  }
+}
 
-    // Parameter Only:
-    case lltok::kw_sret:
-    case lltok::kw_nocapture:
-    case lltok::kw_byval:
-    case lltok::kw_nest:
-      if (AttrKind != 0)
-        HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute");
+/// ParseOptionalReturnAttrs - Parse a potentially empty list of return attributes.
+bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
+  bool HaveError = false;
+
+  B.clear();
+
+  while (1) {
+    lltok::Kind Token = Lex.getKind();
+    switch (Token) {
+    default:  // End of attributes.
+      return HaveError;
+    case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
+    case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
+    case lltok::kw_signext:         B.addAttribute(Attribute::SExt); break;
+    case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
+
+    // Error handling.
+    case lltok::kw_sret:  case lltok::kw_nocapture:
+    case lltok::kw_byval: case lltok::kw_nest:
+      HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
 
-    // Function Only:
-    case lltok::kw_noreturn:
-    case lltok::kw_nounwind:
-    case lltok::kw_readnone:
-    case lltok::kw_readonly:
-    case lltok::kw_noinline:
-    case lltok::kw_alwaysinline:
-    case lltok::kw_optsize:
-    case lltok::kw_ssp:
-    case lltok::kw_sspreq:
-    case lltok::kw_noredzone:
-    case lltok::kw_noimplicitfloat:
-    case lltok::kw_naked:
-    case lltok::kw_inlinehint:
-    case lltok::kw_alignstack:
-    case lltok::kw_uwtable:
-    case lltok::kw_nonlazybind:
-    case lltok::kw_returns_twice:
-    case lltok::kw_address_safety:
-    case lltok::kw_minsize:
-      if (AttrKind != 2)
-        HaveError |= Error(AttrLoc, "invalid use of function-only attribute");
+    case lltok::kw_align:                 case lltok::kw_noreturn:
+    case lltok::kw_alignstack:            case lltok::kw_nounwind:
+    case lltok::kw_alwaysinline:          case lltok::kw_optsize:
+    case lltok::kw_inlinehint:            case lltok::kw_readnone:
+    case lltok::kw_minsize:               case lltok::kw_readonly:
+    case lltok::kw_naked:                 case lltok::kw_returns_twice:
+    case lltok::kw_nobuiltin:             case lltok::kw_sanitize_address:
+    case lltok::kw_noduplicate:           case lltok::kw_sanitize_memory:
+    case lltok::kw_noimplicitfloat:       case lltok::kw_sanitize_thread:
+    case lltok::kw_noinline:              case lltok::kw_ssp:
+    case lltok::kw_nonlazybind:           case lltok::kw_sspreq:
+    case lltok::kw_noredzone:             case lltok::kw_sspstrong:
+                                          case lltok::kw_uwtable:
+      HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
 
@@ -1207,7 +1404,7 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
 }
 
 /// ParseOptionalCommaAlign
-///   ::= 
+///   ::=
 ///   ::= ',' align 4
 ///
 /// This returns with AteExtraComma set to true if it ate an excess comma at the
@@ -1221,7 +1418,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
       AteExtraComma = true;
       return false;
     }
-    
+
     if (Lex.getKind() != lltok::kw_align)
       return Error(Lex.getLoc(), "expected metadata or 'align'");
 
@@ -1289,7 +1486,7 @@ bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
 bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
                               bool &AteExtraComma) {
   AteExtraComma = false;
-  
+
   if (Lex.getKind() != lltok::comma)
     return TokError("expected ',' as start of index list");
 
@@ -1345,7 +1542,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
   case lltok::LocalVar: {
     // Type ::= %foo
     std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
-    
+
     // If the type hasn't been defined yet, create a forward definition and
     // remember where that forward def'n was seen (in case it never is defined).
     if (Entry.first == 0) {
@@ -1362,7 +1559,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
     if (Lex.getUIntVal() >= NumberedTypes.size())
       NumberedTypes.resize(Lex.getUIntVal()+1);
     std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
-    
+
     // If the type hasn't been defined yet, create a forward definition and
     // remember where that forward def'n was seen (in case it never is defined).
     if (Entry.first == 0) {
@@ -1432,6 +1629,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
   if (ParseToken(lltok::lparen, "expected '(' in call"))
     return true;
 
+  unsigned AttrIndex = 1;
   while (Lex.getKind() != lltok::rparen) {
     // If this isn't the first argument, we need a comma.
     if (!ArgList.empty() &&
@@ -1447,10 +1645,11 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
       return true;
 
     // Otherwise, handle normal operands.
-    if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS))
+    if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS))
       return true;
-    ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(),
-                                                           ArgAttrs)));
+    ArgList.push_back(ParamInfo(ArgLoc, V, AttributeSet::get(V->getContext(),
+                                                             AttrIndex++,
+                                                             ArgAttrs)));
   }
 
   Lex.Lex();  // Lex the ')'.
@@ -1486,7 +1685,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
     std::string Name;
 
     if (ParseType(ArgTy) ||
-        ParseOptionalAttrs(Attrs, 0)) return true;
+        ParseOptionalParamAttrs(Attrs)) return true;
 
     if (ArgTy->isVoidTy())
       return Error(TypeLoc, "argument can not have void type");
@@ -1499,9 +1698,10 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
     if (!FunctionType::isValidArgumentType(ArgTy))
       return Error(TypeLoc, "invalid type for function argument");
 
+    unsigned AttrIndex = 1;
     ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
-                              Attributes::get(ArgTy->getContext(),
-                                              Attrs), Name));
+                              AttributeSet::get(ArgTy->getContext(),
+                                                AttrIndex++, Attrs), Name));
 
     while (EatIfPresent(lltok::comma)) {
       // Handle ... at end of arg list.
@@ -1512,7 +1712,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
 
       // Otherwise must be an argument type.
       TypeLoc = Lex.getLoc();
-      if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true;
+      if (ParseType(ArgTy) || ParseOptionalParamAttrs(Attrs)) return true;
 
       if (ArgTy->isVoidTy())
         return Error(TypeLoc, "argument can not have void type");
@@ -1528,7 +1728,8 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
         return Error(TypeLoc, "invalid type for function argument");
 
       ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
-                                Attributes::get(ArgTy->getContext(), Attrs),
+                                AttributeSet::get(ArgTy->getContext(),
+                                                  AttrIndex++, Attrs),
                                 Name));
     }
   }
@@ -1553,7 +1754,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (ArgList[i].Attrs.hasAttributes())
+    if (ArgList[i].Attrs.hasAttributes(i + 1))
       return Error(ArgList[i].Loc,
                    "argument attributes invalid in function type");
   }
@@ -1571,7 +1772,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
 bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
   SmallVector<Type*, 8> Elts;
   if (ParseStructBody(Elts)) return true;
-  
+
   Result = StructType::get(Context, Elts, Packed);
   return false;
 }
@@ -1583,20 +1784,20 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
   // If the type was already defined, diagnose the redefinition.
   if (Entry.first && !Entry.second.isValid())
     return Error(TypeLoc, "redefinition of type");
-  
+
   // If we have opaque, just return without filling in the definition for the
   // struct.  This counts as a definition as far as the .ll file goes.
   if (EatIfPresent(lltok::kw_opaque)) {
     // This type is being defined, so clear the location to indicate this.
     Entry.second = SMLoc();
-    
+
     // If this type number has never been uttered, create it.
     if (Entry.first == 0)
       Entry.first = StructType::create(Context, Name);
     ResultTy = Entry.first;
     return false;
   }
-  
+
   // If the type starts with '<', then it is either a packed struct or a vector.
   bool isPacked = EatIfPresent(lltok::less);
 
@@ -1606,27 +1807,27 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
   if (Lex.getKind() != lltok::lbrace) {
     if (Entry.first)
       return Error(TypeLoc, "forward references to non-struct type");
-  
+
     ResultTy = 0;
     if (isPacked)
       return ParseArrayVectorType(ResultTy, true);
     return ParseType(ResultTy);
   }
-                               
+
   // This type is being defined, so clear the location to indicate this.
   Entry.second = SMLoc();
-  
+
   // If this type number has never been uttered, create it.
   if (Entry.first == 0)
     Entry.first = StructType::create(Context, Name);
-  
+
   StructType *STy = cast<StructType>(Entry.first);
- 
+
   SmallVector<Type*, 8> Body;
   if (ParseStructBody(Body) ||
       (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
     return true;
-  
+
   STy->setBody(Body, isPacked);
   ResultTy = STy;
   return false;
@@ -1699,8 +1900,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
     if ((unsigned)Size != Size)
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
-      return Error(TypeLoc,
-       "vector element type must be fp, integer or a pointer to these types");
+      return Error(TypeLoc, "invalid vector element type");
     Result = VectorType::get(EltTy, unsigned(Size));
   } else {
     if (!ArrayType::isValidElementType(EltTy))
@@ -1757,18 +1957,18 @@ bool LLParser::PerFunctionState::FinishFunction() {
       FunctionID.Kind = ValID::t_GlobalID;
       FunctionID.UIntVal = FunctionNumber;
     }
-  
+
     std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
       FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
     if (FRBAI != P.ForwardRefBlockAddresses.end()) {
       // Resolve all these references.
       if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
         return true;
-      
+
       P.ForwardRefBlockAddresses.erase(FRBAI);
     }
   }
-  
+
   if (!ForwardRefVals.empty())
     return P.Error(ForwardRefVals.begin()->second.second,
                    "use of undefined value '%" + ForwardRefVals.begin()->first +
@@ -2118,7 +2318,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     return false;
 
   case lltok::kw_asm: {
-    // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT
+    // ValID ::= 'asm' SideEffect? AlignStack? IntelDialect? STRINGCONSTANT ','
+    //             STRINGCONSTANT
     bool HasSideEffect, AlignStack, AsmDialect;
     Lex.Lex();
     if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
@@ -2141,19 +2342,19 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     ValID Fn, Label;
     LocTy FnLoc, LabelLoc;
-    
+
     if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
         ParseValID(Fn) ||
         ParseToken(lltok::comma, "expected comma in block address expression")||
         ParseValID(Label) ||
         ParseToken(lltok::rparen, "expected ')' in block address expression"))
       return true;
-    
+
     if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
       return Error(Fn.Loc, "expected function name in blockaddress");
     if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
       return Error(Label.Loc, "expected basic block name in blockaddress");
-    
+
     // Make a global variable as a placeholder for this reference.
     GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
                                            false, GlobalValue::InternalLinkage,
@@ -2163,7 +2364,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     ID.Kind = ValID::t_Constant;
     return false;
   }
-      
+
   case lltok::kw_trunc:
   case lltok::kw_zext:
   case lltok::kw_sext:
@@ -2543,7 +2744,7 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
     return (V == 0);
   case ValID::t_InlineAsm: {
     PointerType *PTy = dyn_cast<PointerType>(Ty);
-    FunctionType *FTy = 
+    FunctionType *FTy =
       PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
     if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
       return Error(ID.Loc, "invalid type for inline asm constraint string");
@@ -2632,13 +2833,13 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
                      "initializer with struct type has wrong # elements");
       if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
         return Error(ID.Loc, "packed'ness of initializer and type don't match");
-        
+
       // Verify that the elements are compatible with the structtype.
       for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
         if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
           return Error(ID.Loc, "element " + Twine(i) +
                     " of struct initializer doesn't match struct element type");
-      
+
       V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts,
                                                ID.UIntVal));
     } else
@@ -2690,7 +2891,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (ParseOptionalLinkage(Linkage) ||
       ParseOptionalVisibility(Visibility) ||
       ParseOptionalCallingConv(CC) ||
-      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/))
     return true;
 
@@ -2748,6 +2949,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
   AttrBuilder FuncAttrs;
+  std::vector<unsigned> FwdRefAttrGrps;
+  LocTy NoBuiltinLoc;
   std::string Section;
   unsigned Alignment;
   std::string GC;
@@ -2757,7 +2960,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (ParseArgumentList(ArgList, isVarArg) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
                          &UnnamedAddrLoc) ||
-      ParseOptionalAttrs(FuncAttrs, 2) ||
+      ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false,
+                                 NoBuiltinLoc) ||
       (EatIfPresent(lltok::kw_section) &&
        ParseStringConstant(Section)) ||
       ParseOptionalAlignment(Alignment) ||
@@ -2765,39 +2969,41 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
        ParseStringConstant(GC)))
     return true;
 
+  if (FuncAttrs.contains(Attribute::NoBuiltin))
+    return Error(NoBuiltinLoc, "'nobuiltin' attribute not valid on function");
+
   // If the alignment was parsed as an attribute, move to the alignment field.
   if (FuncAttrs.hasAlignmentAttr()) {
     Alignment = FuncAttrs.getAlignment();
-    FuncAttrs.removeAttribute(Attributes::Alignment);
+    FuncAttrs.removeAttribute(Attribute::Alignment);
   }
 
   // Okay, if we got here, the function is syntactically valid.  Convert types
   // and do semantic checks.
   std::vector<Type*> ParamTypeList;
-  SmallVector<AttributeWithIndex, 8> Attrs;
+  SmallVector<AttributeSet, 8> Attrs;
 
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                              Attributes::get(RetType->getContext(),
-                                              RetAttrs)));
+    Attrs.push_back(AttributeSet::get(RetType->getContext(),
+                                      AttributeSet::ReturnIndex,
+                                      RetAttrs));
 
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     ParamTypeList.push_back(ArgList[i].Ty);
-    if (ArgList[i].Attrs.hasAttributes())
-      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+    if (ArgList[i].Attrs.hasAttributes(i + 1)) {
+      AttrBuilder B(ArgList[i].Attrs, i + 1);
+      Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
+    }
   }
 
   if (FuncAttrs.hasAttributes())
-    Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                              Attributes::get(RetType->getContext(),
-                                              FuncAttrs)));
+    Attrs.push_back(AttributeSet::get(RetType->getContext(),
+                                      AttributeSet::FunctionIndex,
+                                      FuncAttrs));
 
-  AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
+  AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
-  if (PAL.getParamAttributes(1).hasAttribute(Attributes::StructRet) &&
-      !RetType->isVoidTy())
+  if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy())
     return Error(RetTypeLoc, "functions with 'sret' argument must return void");
 
   FunctionType *FT =
@@ -2818,7 +3024,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       if (Fn->getType() != PFT)
         return Error(FRVI->second.second, "invalid forward reference to "
                      "function '" + FunctionName + "' with wrong type!");
-      
+
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
       // Reject redefinitions.
@@ -2858,6 +3064,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setAlignment(Alignment);
   Fn->setSection(Section);
   if (!GC.empty()) Fn->setGC(GC.c_str());
+  ForwardRefAttrGroups[Fn] = FwdRefAttrGrps;
 
   // Add all of the arguments we parsed to the function.
   Function::arg_iterator ArgIt = Fn->arg_begin();
@@ -2887,13 +3094,13 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
 
   int FunctionNumber = -1;
   if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
-  
+
   PerFunctionState PFS(*this, Fn, FunctionNumber);
 
   // We need at least one basic block.
   if (Lex.getKind() == lltok::rbrace)
     return TokError("function body requires at least one basic block");
-  
+
   while (Lex.getKind() != lltok::rbrace)
     if (ParseBasicBlock(PFS)) return true;
 
@@ -2961,7 +3168,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       // *must* be followed by metadata.
       if (ParseInstructionMetadata(Inst, &PFS))
         return true;
-      break;        
+      break;
     }
 
     // Set the name on the instruction.
@@ -3004,16 +3211,26 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
     bool NUW = EatIfPresent(lltok::kw_nuw);
     bool NSW = EatIfPresent(lltok::kw_nsw);
     if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
-    
+
     if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
-    
+
     if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
     if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
     return false;
   }
   case lltok::kw_fadd:
   case lltok::kw_fsub:
-  case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+  case lltok::kw_fmul:
+  case lltok::kw_fdiv:
+  case lltok::kw_frem: {
+    FastMathFlags FMF = EatFastMathFlagsIfPresent();
+    int Res = ParseArithmetic(Inst, PFS, KeywordVal, 2);
+    if (Res != 0)
+      return Res;
+    if (FMF.any())
+      Inst->setFastMathFlags(FMF);
+    return 0;
+  }
 
   case lltok::kw_sdiv:
   case lltok::kw_udiv:
@@ -3028,8 +3245,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
 
   case lltok::kw_urem:
   case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
-  case lltok::kw_fdiv:
-  case lltok::kw_frem:   return ParseArithmetic(Inst, PFS, KeywordVal, 2);
   case lltok::kw_and:
   case lltok::kw_or:
   case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
@@ -3075,7 +3290,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
 bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
   if (Opc == Instruction::FCmp) {
     switch (Lex.getKind()) {
-    default: TokError("expected fcmp predicate (e.g. 'oeq')");
+    default: return TokError("expected fcmp predicate (e.g. 'oeq')");
     case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
     case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
     case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
@@ -3095,7 +3310,7 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
     }
   } else {
     switch (Lex.getKind()) {
-    default: TokError("expected icmp predicate (e.g. 'eq')");
+    default: return TokError("expected icmp predicate (e.g. 'eq')");
     case lltok::kw_eq:  P = CmpInst::ICMP_EQ; break;
     case lltok::kw_ne:  P = CmpInst::ICMP_NE; break;
     case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
@@ -3126,12 +3341,12 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
   if (ParseType(Ty, true /*void allowed*/)) return true;
 
   Type *ResType = PFS.getFunction().getReturnType();
-  
+
   if (Ty->isVoidTy()) {
     if (!ResType->isVoidTy())
       return Error(TypeLoc, "value doesn't match function result type '" +
                    getTypeString(ResType) + "'");
-    
+
     Inst = ReturnInst::Create(Context);
     return false;
   }
@@ -3142,7 +3357,7 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
   if (ResType != RV->getType())
     return Error(TypeLoc, "value doesn't match function result type '" +
                  getTypeString(ResType) + "'");
-  
+
   Inst = ReturnInst::Create(Context, RV);
   return false;
 }
@@ -3204,7 +3419,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
         ParseToken(lltok::comma, "expected ',' after case value") ||
         ParseTypeAndBasicBlock(DestBB, PFS))
       return true;
-    
+
     if (!SeenCases.insert(Constant))
       return Error(CondLoc, "duplicate case value in switch");
     if (!isa<ConstantInt>(Constant))
@@ -3232,26 +3447,26 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
       ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
     return true;
-  
+
   if (!Address->getType()->isPointerTy())
     return Error(AddrLoc, "indirectbr address must have pointer type");
-  
+
   // Parse the destination list.
   SmallVector<BasicBlock*, 16> DestList;
-  
+
   if (Lex.getKind() != lltok::rsquare) {
     BasicBlock *DestBB;
     if (ParseTypeAndBasicBlock(DestBB, PFS))
       return true;
     DestList.push_back(DestBB);
-    
+
     while (EatIfPresent(lltok::comma)) {
       if (ParseTypeAndBasicBlock(DestBB, PFS))
         return true;
       DestList.push_back(DestBB);
     }
   }
-  
+
   if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
     return true;
 
@@ -3269,6 +3484,8 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
 bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CallLoc = Lex.getLoc();
   AttrBuilder RetAttrs, FnAttrs;
+  std::vector<unsigned> FwdRefAttrGrps;
+  LocTy NoBuiltinLoc;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc;
@@ -3277,11 +3494,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
   BasicBlock *NormalBB, *UnwindBB;
   if (ParseOptionalCallingConv(CC) ||
-      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) ||
       ParseParameterList(ArgList, PFS) ||
-      ParseOptionalAttrs(FnAttrs, 2) ||
+      ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+                                 NoBuiltinLoc) ||
       ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
       ParseTypeAndBasicBlock(NormalBB, PFS) ||
       ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
@@ -3311,13 +3529,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
-  // Set up the Attributes for the function.
-  SmallVector<AttributeWithIndex, 8> Attrs;
+  // Set up the Attribute for the function.
+  SmallVector<AttributeSet, 8> Attrs;
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                              Attributes::get(Callee->getContext(),
-                                              RetAttrs)));
+    Attrs.push_back(AttributeSet::get(RetType->getContext(),
+                                      AttributeSet::ReturnIndex,
+                                      RetAttrs));
 
   SmallVector<Value*, 8> Args;
 
@@ -3337,25 +3554,27 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
                    getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
-    if (ArgList[i].Attrs.hasAttributes())
-      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+    if (ArgList[i].Attrs.hasAttributes(i + 1)) {
+      AttrBuilder B(ArgList[i].Attrs, i + 1);
+      Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
+    }
   }
 
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
   if (FnAttrs.hasAttributes())
-    Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                              Attributes::get(Callee->getContext(),
-                                              FnAttrs)));
+    Attrs.push_back(AttributeSet::get(RetType->getContext(),
+                                      AttributeSet::FunctionIndex,
+                                      FnAttrs));
 
-  // Finish off the Attributes and check them
-  AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
+  // Finish off the Attribute and check them
+  AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
   InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
   II->setCallingConv(CC);
   II->setAttributes(PAL);
+  ForwardRefAttrGroups[II] = FwdRefAttrGrps;
   Inst = II;
   return false;
 }
@@ -3674,6 +3893,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
 bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          bool isTail) {
   AttrBuilder RetAttrs, FnAttrs;
+  std::vector<unsigned> FwdRefAttrGrps;
+  LocTy NoBuiltinLoc;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc;
@@ -3683,11 +3904,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
   if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
       ParseOptionalCallingConv(CC) ||
-      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) ||
       ParseParameterList(ArgList, PFS) ||
-      ParseOptionalAttrs(FnAttrs, 2))
+      ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+                                 NoBuiltinLoc))
     return true;
 
   // If RetType is a non-function pointer type, then this is the short syntax
@@ -3713,13 +3935,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
-  // Set up the Attributes for the function.
-  SmallVector<AttributeWithIndex, 8> Attrs;
+  // Set up the Attribute for the function.
+  SmallVector<AttributeSet, 8> Attrs;
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                              Attributes::get(Callee->getContext(),
-                                              RetAttrs)));
+    Attrs.push_back(AttributeSet::get(RetType->getContext(),
+                                      AttributeSet::ReturnIndex,
+                                      RetAttrs));
 
   SmallVector<Value*, 8> Args;
 
@@ -3739,26 +3960,28 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
                    getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
-    if (ArgList[i].Attrs.hasAttributes())
-      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+    if (ArgList[i].Attrs.hasAttributes(i + 1)) {
+      AttrBuilder B(ArgList[i].Attrs, i + 1);
+      Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
+    }
   }
 
   if (I != E)
     return Error(CallLoc, "not enough parameters specified for call");
 
   if (FnAttrs.hasAttributes())
-    Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                              Attributes::get(Callee->getContext(),
-                                              FnAttrs)));
+    Attrs.push_back(AttributeSet::get(RetType->getContext(),
+                                      AttributeSet::FunctionIndex,
+                                      FnAttrs));
 
-  // Finish off the Attributes and check them
-  AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
+  // Finish off the Attribute and check them
+  AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
   CallInst *CI = CallInst::Create(Callee, Args);
   CI->setTailCall(isTail);
   CI->setCallingConv(CC);
   CI->setAttributes(PAL);
+  ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
   Inst = CI;
   return false;
 }
@@ -3798,7 +4021,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
 
 /// ParseLoad
 ///   ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
-///   ::= 'load' 'atomic' 'volatile'? TypeAndValue 
+///   ::= 'load' 'atomic' 'volatile'? TypeAndValue
 ///       'singlethread'? AtomicOrdering (',' 'align' i32)?
 int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val; LocTy Loc;
@@ -4034,9 +4257,6 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     Indices.push_back(Val);
   }
 
-  if (Val && Val->getType()->isVectorTy() && Indices.size() != 1)
-    return Error(EltLoc, "vector getelementptrs must have a single index");
-
   if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ptr, Indices);
@@ -4075,7 +4295,7 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseTypeAndValue(Val1, Loc1, PFS) ||
       ParseIndexList(Indices, AteExtraComma))
     return true;
-  
+
   if (!Val0->getType()->isAggregateType())
     return Error(Loc0, "insertvalue operand must be aggregate type");
 
@@ -4105,7 +4325,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
       Elts.push_back(0);
       continue;
     }
-    
+
     Value *V = 0;
     if (ParseTypeAndValue(V, PFS)) return true;
     Elts.push_back(V);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index c6bbdb27aeef..1f2879e948d9 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -15,12 +15,13 @@
 #define LLVM_ASMPARSER_LLPARSER_H
 
 #include "LLLexer.h"
-#include "llvm/Attributes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/ValueHandle.h"
 #include <map>
 
@@ -55,7 +56,7 @@ namespace llvm {
       t_ConstantStruct,           // Value in ConstantStructElts.
       t_PackedConstantStruct      // Value in ConstantStructElts.
     } Kind;
-    
+
     LLLexer::LocTy Loc;
     unsigned UIntVal;
     std::string StrVal, StrVal2;
@@ -65,23 +66,23 @@ namespace llvm {
     MDNode *MDNodeVal;
     MDString *MDStringVal;
     Constant **ConstantStructElts;
-    
+
     ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
     ~ValID() {
       if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
         delete [] ConstantStructElts;
     }
-    
+
     bool operator<(const ValID &RHS) const {
       if (Kind == t_LocalID || Kind == t_GlobalID)
         return UIntVal < RHS.UIntVal;
       assert((Kind == t_LocalName || Kind == t_GlobalName ||
-              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && 
+              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
              "Ordering not defined for this ValID kind yet");
       return StrVal < RHS.StrVal;
     }
   };
-  
+
   class LLParser {
   public:
     typedef LLLexer::LocTy LocTy;
@@ -89,7 +90,7 @@ namespace llvm {
     LLVMContext &Context;
     LLLexer Lex;
     Module *M;
-    
+
     // Instruction metadata resolution.  Each instruction can have a list of
     // MDRef info associated with them.
     //
@@ -110,7 +111,7 @@ namespace llvm {
     // have processed a use of the type but not a definition yet.
     StringMap<std::pair<Type*, LocTy> > NamedTypes;
     std::vector<std::pair<Type*, LocTy> > NumberedTypes;
-    
+
     std::vector<TrackingVH<MDNode> > NumberedMetadata;
     std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
 
@@ -118,14 +119,18 @@ namespace llvm {
     std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
     std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
     std::vector<GlobalValue*> NumberedVals;
-    
+
     // References to blockaddress.  The key is the function ValID, the value is
     // a list of references to blocks in that function.
     std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
       ForwardRefBlockAddresses;
-    
+
+    // Attribute builder reference information.
+    std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
+    std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
+
   public:
-    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
+    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
       Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
       M(m) {}
     bool Run();
@@ -154,6 +159,21 @@ namespace llvm {
       Lex.Lex();
       return true;
     }
+
+    FastMathFlags EatFastMathFlagsIfPresent() {
+      FastMathFlags FMF;
+      while (true)
+        switch (Lex.getKind()) {
+        case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
+        case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
+        case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
+        case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
+        case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
+        default: return FMF;
+        }
+      return FMF;
+    }
+
     bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
       if (Lex.getKind() != T) {
         Present = false;
@@ -175,7 +195,8 @@ namespace llvm {
     bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalAddrSpace(unsigned &AddrSpace);
-    bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind);
+    bool ParseOptionalParamAttrs(AttrBuilder &B);
+    bool ParseOptionalReturnAttrs(AttrBuilder &B);
     bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
     bool ParseOptionalLinkage(unsigned &Linkage) {
       bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
@@ -200,8 +221,8 @@ namespace llvm {
     bool ParseTopLevelEntities();
     bool ValidateEndOfModule();
     bool ParseTargetDefinition();
-    bool ParseDepLibs();
     bool ParseModuleAsm();
+    bool ParseDepLibs();        // FIXME: Remove in 4.0.
     bool ParseUnnamedType();
     bool ParseNamedType();
     bool ParseDeclare();
@@ -218,6 +239,10 @@ namespace llvm {
     bool ParseMDString(MDString *&Result);
     bool ParseMDNodeID(MDNode *&Result);
     bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
+    bool ParseUnnamedAttrGrp();
+    bool ParseFnAttributeValuePairs(AttrBuilder &B,
+                                    std::vector<unsigned> &FwdRefAttrGrps,
+                                    bool inAttrGrp, LocTy &NoBuiltinLoc);
 
     // Type Parsing.
     bool ParseType(Type *&Result, bool AllowVoid = false);
@@ -241,7 +266,7 @@ namespace llvm {
       std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
       std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
       std::vector<Value*> NumberedVals;
-      
+
       /// FunctionNumber - If this is an unnamed function, this is the slot
       /// number of it, otherwise it is -1.
       int FunctionNumber;
@@ -308,8 +333,8 @@ namespace llvm {
     struct ParamInfo {
       LocTy Loc;
       Value *V;
-      Attributes Attrs;
-      ParamInfo(LocTy loc, Value *v, Attributes attrs)
+      AttributeSet Attrs;
+      ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
         : Loc(loc), V(v), Attrs(attrs) {}
     };
     bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
@@ -329,9 +354,9 @@ namespace llvm {
     struct ArgInfo {
       LocTy Loc;
       Type *Ty;
-      Attributes Attrs;
+      AttributeSet Attrs;
       std::string Name;
-      ArgInfo(LocTy L, Type *ty, Attributes Attr, const std::string &N)
+      ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
         : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
     };
     bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
@@ -375,8 +400,8 @@ namespace llvm {
     int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
     int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
     int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
-    
-    bool ResolveForwardRefBlockAddresses(Function *TheFn, 
+
+    bool ResolveForwardRefBlockAddresses(Function *TheFn,
                              std::vector<std::pair<ValID, GlobalValue*> > &Refs,
                                          PerFunctionState *PFS);
   };
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 036686d31823..cd25ba30008f 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -30,6 +30,7 @@ namespace lltok {
     lparen, rparen,    // (  )
     backslash,         // \    (not /)
     exclaim,           // !
+    hash,              // #
 
     kw_x,
     kw_true,    kw_false,
@@ -44,6 +45,7 @@ namespace lltok {
     kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
     kw_default, kw_hidden, kw_protected,
     kw_unnamed_addr,
+    kw_externally_initialized,
     kw_extern_weak,
     kw_external, kw_thread_local,
     kw_localdynamic, kw_initialexec, kw_localexec,
@@ -54,12 +56,17 @@ namespace lltok {
     kw_target,
     kw_triple,
     kw_unwind,
-    kw_deplibs,
+    kw_deplibs,                 // FIXME: Remove in 4.0
     kw_datalayout,
     kw_volatile,
     kw_atomic,
     kw_unordered, kw_monotonic, kw_acquire, kw_release, kw_acq_rel, kw_seq_cst,
     kw_singlethread,
+    kw_nnan,
+    kw_ninf,
+    kw_nsz,
+    kw_arcp,
+    kw_fast,
     kw_nuw,
     kw_nsw,
     kw_exact,
@@ -84,33 +91,39 @@ namespace lltok {
     kw_ptx_kernel, kw_ptx_device,
     kw_spir_kernel, kw_spir_func,
 
-    kw_signext,
-    kw_zeroext,
+    // Attributes:
+    kw_attributes,
+    kw_alwaysinline,
+    kw_sanitize_address,
+    kw_byval,
+    kw_inlinehint,
     kw_inreg,
-    kw_sret,
-    kw_nounwind,
-    kw_noreturn,
+    kw_minsize,
+    kw_naked,
+    kw_nest,
     kw_noalias,
+    kw_nobuiltin,
     kw_nocapture,
-    kw_byval,
-    kw_nest,
+    kw_noduplicate,
+    kw_noimplicitfloat,
+    kw_noinline,
+    kw_nonlazybind,
+    kw_noredzone,
+    kw_noreturn,
+    kw_nounwind,
+    kw_optsize,
     kw_readnone,
     kw_readonly,
-    kw_uwtable,
     kw_returns_twice,
-
-    kw_inlinehint,
-    kw_noinline,
-    kw_alwaysinline,
-    kw_optsize,
+    kw_signext,
     kw_ssp,
     kw_sspreq,
-    kw_noredzone,
-    kw_noimplicitfloat,
-    kw_naked,
-    kw_nonlazybind,
-    kw_address_safety,
-    kw_minsize,
+    kw_sspstrong,
+    kw_sret,
+    kw_sanitize_thread,
+    kw_sanitize_memory,
+    kw_uwtable,
+    kw_zeroext,
 
     kw_type,
     kw_opaque,
@@ -147,6 +160,7 @@ namespace lltok {
     // Unsigned Valued tokens (UIntVal).
     GlobalID,          // @42
     LocalVarID,        // %42
+    AttrGrpID,         // #42
 
     // String valued tokens (StrVal).
     LabelStr,          // foo:
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 21b7fd411e3d..bb4f03bacc17 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -13,10 +13,10 @@
 
 #include "llvm/Assembly/Parser.h"
 #include "LLParser.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <cstring>
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 15844c0041c3..5cd6c552bd8a 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -9,10 +9,10 @@
 
 #include "llvm-c/BitReader.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <string>
 #include <cstring>
+#include <string>
 
 using namespace llvm;
 
@@ -30,7 +30,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
                                    LLVMModuleRef *OutModule,
                                    char **OutMessage) {
   std::string Message;
-  
+
   *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
                                      &Message));
   if (!*OutModule) {
@@ -38,19 +38,19 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
       *OutMessage = strdup(Message.c_str());
     return 1;
   }
-  
+
   return 0;
 }
 
 /* Reads a module from the specified path, returning via the OutModule parameter
    a module provider which performs lazy deserialization. Returns 0 on success.
-   Optionally returns a human-readable error message via OutMessage. */ 
+   Optionally returns a human-readable error message via OutMessage. */
 LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
                                        LLVMMemoryBufferRef MemBuf,
                                        LLVMModuleRef *OutM,
                                        char **OutMessage) {
   std::string Message;
-  
+
   *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
                                     &Message));
   if (!*OutM) {
@@ -58,7 +58,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
       *OutMessage = strdup(Message.c_str());
     return 1;
   }
-  
+
   return 0;
 
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4ec9da12ddcf..f34884391a74 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -6,26 +6,22 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This header defines the BitcodeReader class.
-//
-//===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "BitcodeReader.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/AutoUpgrade.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/OperandTraits.h"
 using namespace llvm;
 
 enum {
@@ -47,7 +43,7 @@ void BitcodeReader::FreeState() {
   ValueList.clear();
   MDValueList.clear();
 
-  std::vector<AttrListPtr>().swap(MAttributes);
+  std::vector<AttributeSet>().swap(MAttributes);
   std::vector<BasicBlock*>().swap(FunctionBBs);
   std::vector<Function*>().swap(FunctionsWithBodies);
   DeferredFunctionInfo.clear();
@@ -432,6 +428,26 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
 //  Functions for parsing blocks from the bitcode file
 //===----------------------------------------------------------------------===//
 
+
+/// \brief This fills an AttrBuilder object with the LLVM attributes that have
+/// been decoded from the given integer. This function must stay in sync with
+/// 'encodeLLVMAttributesForBitcode'.
+static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
+                                           uint64_t EncodedAttrs) {
+  // FIXME: Remove in 4.0.
+
+  // The alignment is stored as a 16-bit raw value from bits 31--16.  We shift
+  // the bits above 31 down by 11 bits.
+  unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
+  assert((!Alignment || isPowerOf2_32(Alignment)) &&
+         "Alignment must be a power of two.");
+
+  if (Alignment)
+    B.addAlignmentAttr(Alignment);
+  B.addRawValue(((EncodedAttrs & (0xfffffULL << 32)) >> 11) |
+                (EncodedAttrs & 0xffff));
+}
+
 bool BitcodeReader::ParseAttributeBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
     return Error("Malformed block record");
@@ -441,54 +457,124 @@ bool BitcodeReader::ParseAttributeBlock() {
 
   SmallVector<uint64_t, 64> Record;
 
-  SmallVector<AttributeWithIndex, 8> Attrs;
+  SmallVector<AttributeSet, 8> Attrs;
 
   // Read all the records.
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of PARAMATTR block");
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("Error at end of PARAMATTR block");
+    case BitstreamEntry::EndBlock:
       return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
+    // Read a record.
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...]
+      // FIXME: Remove in 4.0.
+      if (Record.size() & 1)
+        return Error("Invalid ENTRY record");
+
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        AttrBuilder B;
+        decodeLLVMAttributesForBitcode(B, Record[i+1]);
+        Attrs.push_back(AttributeSet::get(Context, Record[i], B));
+      }
+
+      MAttributes.push_back(AttributeSet::get(Context, Attrs));
+      Attrs.clear();
+      break;
     }
+    case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [attrgrp0, attrgrp1, ...]
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        Attrs.push_back(MAttributeGroups[Record[i]]);
 
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+      MAttributes.push_back(AttributeSet::get(Context, Attrs));
+      Attrs.clear();
+      break;
+    }
+    }
+  }
+}
+
+bool BitcodeReader::ParseAttributeGroupBlock() {
+  if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
+    return Error("Malformed block record");
+
+  if (!MAttributeGroups.empty())
+    return Error("Multiple PARAMATTR_GROUP blocks found!");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("Error at end of PARAMATTR_GROUP block");
+    case BitstreamEntry::EndBlock:
+      return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
     // Read a record.
     Record.clear();
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: ignore.
       break;
-    case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...]
-      if (Record.size() & 1)
+    case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
+      if (Record.size() < 3)
         return Error("Invalid ENTRY record");
 
-      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
-        Attributes ReconstitutedAttr =
-          Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]);
-        Record[i+1] = ReconstitutedAttr.Raw();
-      }
+      uint64_t GrpID = Record[0];
+      uint64_t Idx = Record[1]; // Index of the object this attribute refers to.
 
-      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
-        AttrBuilder B(Record[i+1]);
-        if (B.hasAttributes())
-          Attrs.push_back(AttributeWithIndex::get(Record[i],
-                                                  Attributes::get(Context, B)));
+      AttrBuilder B;
+      for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+        if (Record[i] == 0) {        // Enum attribute
+          B.addAttribute(Attribute::AttrKind(Record[++i]));
+        } else if (Record[i] == 1) { // Align attribute
+          if (Attribute::AttrKind(Record[++i]) == Attribute::Alignment)
+            B.addAlignmentAttr(Record[++i]);
+          else
+            B.addStackAlignmentAttr(Record[++i]);
+        } else {                     // String attribute
+          assert((Record[i] == 3 || Record[i] == 4) &&
+                 "Invalid attribute group entry");
+          bool HasValue = (Record[i++] == 4);
+          SmallString<64> KindStr;
+          SmallString<64> ValStr;
+
+          while (Record[i] != 0 && i != e)
+            KindStr += Record[i++];
+          assert(Record[i] == 0 && "Kind string not null terminated");
+
+          if (HasValue) {
+            // Has a value associated with it.
+            ++i; // Skip the '0' that terminates the "kind" string.
+            while (Record[i] != 0 && i != e)
+              ValStr += Record[i++];
+            assert(Record[i] == 0 && "Value string not null terminated");
+          }
+
+          B.addAttribute(KindStr.str(), ValStr.str());
+        }
       }
 
-      MAttributes.push_back(AttrListPtr::get(Context, Attrs));
-      Attrs.clear();
+      MAttributeGroups[GrpID] = AttributeSet::get(Context, Idx, B);
       break;
     }
     }
@@ -513,32 +599,26 @@ bool BitcodeReader::ParseTypeTableBody() {
 
   // Read all the records for this type table.
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      Error("Error in the type table block");
+      return true;
+    case BitstreamEntry::EndBlock:
       if (NumRecords != TypeList.size())
         return Error("Invalid type forward reference in TYPE_BLOCK");
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of type table block");
       return false;
-    }
-
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
     // Read a record.
     Record.clear();
     Type *ResultTy = 0;
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default: return Error("unknown type in type table");
     case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
       // TYPE_CODE_NUMENTRY contains a count of the number of types in the
@@ -607,7 +687,7 @@ bool BitcodeReader::ParseTypeTableBody() {
         else
           break;
       }
-      
+
       ResultTy = getTypeByID(Record[2]);
       if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
         return Error("invalid type in function type");
@@ -626,7 +706,7 @@ bool BitcodeReader::ParseTypeTableBody() {
         else
           break;
       }
-      
+
       ResultTy = getTypeByID(Record[1]);
       if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
         return Error("invalid type in function type");
@@ -657,10 +737,10 @@ bool BitcodeReader::ParseTypeTableBody() {
     case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
         return Error("Invalid STRUCT type record");
-      
+
       if (NumRecords >= TypeList.size())
         return Error("invalid TYPE table");
-      
+
       // Check to see if this was forward referenced, if so fill in the temp.
       StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
       if (Res) {
@@ -669,7 +749,7 @@ bool BitcodeReader::ParseTypeTableBody() {
       } else  // Otherwise, create a new struct.
         Res = StructType::create(Context, TypeName);
       TypeName.clear();
-      
+
       SmallVector<Type*, 8> EltTys;
       for (unsigned i = 1, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
@@ -689,7 +769,7 @@ bool BitcodeReader::ParseTypeTableBody() {
 
       if (NumRecords >= TypeList.size())
         return Error("invalid TYPE table");
-      
+
       // Check to see if this was forward referenced, if so fill in the temp.
       StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
       if (Res) {
@@ -700,7 +780,7 @@ bool BitcodeReader::ParseTypeTableBody() {
       TypeName.clear();
       ResultTy = Res;
       break;
-    }        
+    }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid ARRAY type record");
@@ -736,28 +816,22 @@ bool BitcodeReader::ParseValueSymbolTable() {
   // Read all the records for this value table.
   SmallString<128> ValueName;
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of value symbol table block");
-      return false;
-    }
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("malformed value symbol table block");
+    case BitstreamEntry::EndBlock:
+      return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
     // Read a record.
     Record.clear();
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
@@ -797,41 +871,35 @@ bool BitcodeReader::ParseMetadata() {
 
   // Read all the records.
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of PARAMATTR block");
-      return false;
-    }
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      Error("malformed metadata block");
+      return true;
+    case BitstreamEntry::EndBlock:
+      return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
     bool IsFunctionLocal = false;
     // Read a record.
     Record.clear();
-    Code = Stream.ReadRecord(Code, Record);
+    unsigned Code = Stream.readRecord(Entry.ID, Record);
     switch (Code) {
     default:  // Default behavior: ignore.
       break;
     case bitc::METADATA_NAME: {
-      // Read named of the named metadata.
+      // Read name of the named metadata.
       SmallString<8> Name(Record.begin(), Record.end());
       Record.clear();
       Code = Stream.ReadCode();
 
       // METADATA_NAME is always followed by METADATA_NAMED_NODE.
-      unsigned NextBitCode = Stream.ReadRecord(Code, Record);
+      unsigned NextBitCode = Stream.readRecord(Code, Record);
       assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
 
       // Read named metadata elements.
@@ -958,27 +1026,29 @@ bool BitcodeReader::ParseConstants() {
   Type *CurTy = Type::getInt32Ty(Context);
   unsigned NextCstNo = ValueList.size();
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK)
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("malformed block record in AST file");
+    case BitstreamEntry::EndBlock:
+      if (NextCstNo != ValueList.size())
+        return Error("Invalid constant reference!");
+
+      // Once all the constants have been read, go through and resolve forward
+      // references.
+      ValueList.ResolveConstantForwardRefs();
+      return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
       break;
-
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
     }
 
     // Read a record.
     Record.clear();
     Value *V = 0;
-    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default:  // Default behavior: unknown constant
     case bitc::CST_CODE_UNDEF:     // UNDEF
@@ -1006,28 +1076,34 @@ bool BitcodeReader::ParseConstants() {
       APInt VInt = ReadWideAPInt(Record,
                                  cast<IntegerType>(CurTy)->getBitWidth());
       V = ConstantInt::get(Context, VInt);
-      
+
       break;
     }
     case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
       if (Record.empty())
         return Error("Invalid FLOAT record");
       if (CurTy->isHalfTy())
-        V = ConstantFP::get(Context, APFloat(APInt(16, (uint16_t)Record[0])));
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
+                                             APInt(16, (uint16_t)Record[0])));
       else if (CurTy->isFloatTy())
-        V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle,
+                                             APInt(32, (uint32_t)Record[0])));
       else if (CurTy->isDoubleTy())
-        V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble,
+                                             APInt(64, Record[0])));
       else if (CurTy->isX86_FP80Ty()) {
         // Bits are not stored the same way as a normal i80 APInt, compensate.
         uint64_t Rearrange[2];
         Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
         Rearrange[1] = Record[0] >> 48;
-        V = ConstantFP::get(Context, APFloat(APInt(80, Rearrange)));
+        V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended,
+                                             APInt(80, Rearrange)));
       } else if (CurTy->isFP128Ty())
-        V = ConstantFP::get(Context, APFloat(APInt(128, Record), true));
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad,
+                                             APInt(128, Record)));
       else if (CurTy->isPPC_FP128Ty())
-        V = ConstantFP::get(Context, APFloat(APInt(128, Record)));
+        V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble,
+                                             APInt(128, Record)));
       else
         V = UndefValue::get(CurTy);
       break;
@@ -1073,10 +1149,10 @@ bool BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_DATA: {// DATA: [n x value]
       if (Record.empty())
         return Error("Invalid CST_DATA record");
-      
+
       Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
       unsigned Size = Record.size();
-      
+
       if (EltTy->isIntegerTy(8)) {
         SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
         if (isa<VectorType>(CurTy))
@@ -1182,10 +1258,11 @@ bool BitcodeReader::ParseConstants() {
     }
     case bitc::CST_CODE_CE_SELECT:  // CE_SELECT: [opval#, opval#, opval#]
       if (Record.size() < 3) return Error("Invalid CE_SELECT record");
-      V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
-                                                              Type::getInt1Ty(Context)),
-                                  ValueList.getConstantFwdRef(Record[1],CurTy),
-                                  ValueList.getConstantFwdRef(Record[2],CurTy));
+      V = ConstantExpr::getSelect(
+                          ValueList.getConstantFwdRef(Record[0],
+                                                      Type::getInt1Ty(Context)),
+                          ValueList.getConstantFwdRef(Record[1],CurTy),
+                          ValueList.getConstantFwdRef(Record[2],CurTy));
       break;
     case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
       if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
@@ -1193,7 +1270,8 @@ bool BitcodeReader::ParseConstants() {
         dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
       if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
-      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
+                                                  Type::getInt32Ty(Context));
       V = ConstantExpr::getExtractElement(Op0, Op1);
       break;
     }
@@ -1204,7 +1282,8 @@ bool BitcodeReader::ParseConstants() {
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
                                                   OpTy->getElementType());
-      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2],
+                                                  Type::getInt32Ty(Context));
       V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
       break;
     }
@@ -1324,23 +1403,12 @@ bool BitcodeReader::ParseConstants() {
         V = FwdRef;
       }
       break;
-    }  
+    }
     }
 
     ValueList.AssignValue(V, NextCstNo);
     ++NextCstNo;
   }
-
-  if (NextCstNo != ValueList.size())
-    return Error("Invalid constant reference!");
-
-  if (Stream.ReadBlockEnd())
-    return Error("Error at end of constants block");
-
-  // Once all the constants have been read, go through and resolve forward
-  // references.
-  ValueList.ResolveConstantForwardRefs();
-  return false;
 }
 
 bool BitcodeReader::ParseUseLists() {
@@ -1348,32 +1416,25 @@ bool BitcodeReader::ParseUseLists() {
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
-  
+
   // Read all the records.
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of use-list table block");
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("malformed use list block");
+    case BitstreamEntry::EndBlock:
       return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
-    
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-    
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
-    }
-    
+
     // Read a use list record.
     Record.clear();
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
@@ -1445,17 +1506,18 @@ bool BitcodeReader::ParseModule(bool Resume) {
   std::vector<std::string> GCTable;
 
   // Read all the records for this module.
-  while (!Stream.AtEndOfStream()) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of module block");
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
 
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      Error("malformed module block");
+      return true;
+    case BitstreamEntry::EndBlock:
       return GlobalCleanup();
-    }
 
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      switch (Stream.ReadSubBlockID()) {
+    case BitstreamEntry::SubBlock:
+      switch (Entry.ID) {
       default:  // Skip unknown content.
         if (Stream.SkipBlock())
           return Error("Malformed block record");
@@ -1468,6 +1530,10 @@ bool BitcodeReader::ParseModule(bool Resume) {
         if (ParseAttributeBlock())
           return true;
         break;
+      case bitc::PARAMATTR_GROUP_BLOCK_ID:
+        if (ParseAttributeGroupBlock())
+          return true;
+        break;
       case bitc::TYPE_BLOCK_ID_NEW:
         if (ParseTypeTable())
           return true;
@@ -1514,15 +1580,15 @@ bool BitcodeReader::ParseModule(bool Resume) {
         break;
       }
       continue;
-    }
 
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
+
     // Read a record.
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_VERSION: {  // VERSION: [version#]
       if (Record.size() < 1)
@@ -1562,10 +1628,11 @@ bool BitcodeReader::ParseModule(bool Resume) {
       break;
     }
     case bitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
+      // FIXME: Remove in 4.0.
       std::string S;
       if (ConvertToString(Record, 0, S))
         return Error("Invalid MODULE_CODE_DEPLIB record");
-      TheModule->addLibrary(S);
+      // Ignore value.
       break;
     }
     case bitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
@@ -1616,9 +1683,13 @@ bool BitcodeReader::ParseModule(bool Resume) {
       if (Record.size() > 8)
         UnnamedAddr = Record[8];
 
+      bool ExternallyInitialized = false;
+      if (Record.size() > 9)
+        ExternallyInitialized = Record[9];
+
       GlobalVariable *NewGV =
         new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
-                           TLM, AddressSpace);
+                           TLM, AddressSpace, ExternallyInitialized);
       NewGV->setAlignment(Alignment);
       if (!Section.empty())
         NewGV->setSection(Section);
@@ -1709,8 +1780,6 @@ bool BitcodeReader::ParseModule(bool Resume) {
     }
     Record.clear();
   }
-
-  return Error("Premature end of bitstream");
 }
 
 bool BitcodeReader::ParseBitcodeInto(Module *M) {
@@ -1729,47 +1798,55 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
 
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
-  while (!Stream.AtEndOfStream()) {
-    unsigned Code = Stream.ReadCode();
+  while (1) {
+    if (Stream.AtEndOfStream())
+      return false;
+
+    BitstreamEntry Entry =
+      Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
 
-    if (Code != bitc::ENTER_SUBBLOCK) {
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      Error("malformed module file");
+      return true;
+    case BitstreamEntry::EndBlock:
+      return false;
+
+    case BitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      case bitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case bitc::MODULE_BLOCK_ID:
+        // Reject multiple MODULE_BLOCK's in a single bitstream.
+        if (TheModule)
+          return Error("Multiple MODULE_BLOCKs in same stream");
+        TheModule = M;
+        if (ParseModule(false))
+          return true;
+        if (LazyStreamer) return false;
+        break;
+      default:
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      }
+      continue;
+    case BitstreamEntry::Record:
+      // There should be no records in the top-level of blocks.
 
-      // The ranlib in xcode 4 will align archive members by appending newlines
+      // The ranlib in Xcode 4 will align archive members by appending newlines
       // to the end of them. If this file size is a multiple of 4 but not 8, we
       // have to read and ignore these final 4 bytes :-(
-      if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 &&
+      if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 &&
           Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
           Stream.AtEndOfStream())
         return false;
 
       return Error("Invalid record at top-level");
     }
-
-    unsigned BlockID = Stream.ReadSubBlockID();
-
-    // We only know the MODULE subblock ID.
-    switch (BlockID) {
-    case bitc::BLOCKINFO_BLOCK_ID:
-      if (Stream.ReadBlockInfoBlock())
-        return Error("Malformed BlockInfoBlock");
-      break;
-    case bitc::MODULE_BLOCK_ID:
-      // Reject multiple MODULE_BLOCK's in a single bitstream.
-      if (TheModule)
-        return Error("Multiple MODULE_BLOCKs in same stream");
-      TheModule = M;
-      if (ParseModule(false))
-        return true;
-      if (LazyStreamer) return false;
-      break;
-    default:
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      break;
-    }
   }
-
-  return false;
 }
 
 bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
@@ -1779,32 +1856,22 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records for this module.
-  while (!Stream.AtEndOfStream()) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of module block");
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("malformed module block");
+    case BitstreamEntry::EndBlock:
       return false;
-    }
-
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      switch (Stream.ReadSubBlockID()) {
-      default:  // Skip unknown content.
-        if (Stream.SkipBlock())
-          return Error("Malformed block record");
-        break;
-      }
-      continue;
-    }
-
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
     // Read a record.
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
       std::string S;
@@ -1816,8 +1883,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
     }
     Record.clear();
   }
-
-  return Error("Premature end of bitstream");
 }
 
 bool BitcodeReader::ParseTriple(std::string &Triple) {
@@ -1834,28 +1899,32 @@ bool BitcodeReader::ParseTriple(std::string &Triple) {
 
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
-  while (!Stream.AtEndOfStream()) {
-    unsigned Code = Stream.ReadCode();
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
 
-    if (Code != bitc::ENTER_SUBBLOCK)
-      return Error("Invalid record at top-level");
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      Error("malformed module file");
+      return true;
+    case BitstreamEntry::EndBlock:
+      return false;
 
-    unsigned BlockID = Stream.ReadSubBlockID();
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::MODULE_BLOCK_ID)
+        return ParseModuleTriple(Triple);
 
-    // We only know the MODULE subblock ID.
-    switch (BlockID) {
-    case bitc::MODULE_BLOCK_ID:
-      if (ParseModuleTriple(Triple))
+      // Ignore other sub-blocks.
+      if (Stream.SkipBlock()) {
+        Error("malformed block record in AST file");
         return true;
-      break;
-    default:
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      break;
+      }
+      continue;
+
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
     }
   }
-
-  return false;
 }
 
 /// ParseMetadataAttachment - Parse metadata attachments.
@@ -1864,20 +1933,23 @@ bool BitcodeReader::ParseMetadataAttachment() {
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
-  while(1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of PARAMATTR block");
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return Error("malformed metadata block");
+    case BitstreamEntry::EndBlock:
+      return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
       break;
     }
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
-    }
+
     // Read a metadata attachment record.
     Record.clear();
-    switch (Stream.ReadRecord(Code, Record)) {
+    switch (Stream.readRecord(Entry.ID, Record)) {
     default:  // Default behavior: ignore.
       break;
     case bitc::METADATA_ATTACHMENT: {
@@ -1898,7 +1970,6 @@ bool BitcodeReader::ParseMetadataAttachment() {
     }
     }
   }
-  return false;
 }
 
 /// ParseFunctionBody - Lazily parse the specified function body block.
@@ -1919,19 +1990,20 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
   unsigned CurBBNo = 0;
 
   DebugLoc LastLoc;
-  
+
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
   while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of function block");
-      break;
-    }
+    BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return Error("Bitcode error in function block");
+    case BitstreamEntry::EndBlock:
+      goto OutOfRecordLoop;
 
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      switch (Stream.ReadSubBlockID()) {
+    case BitstreamEntry::SubBlock:
+      switch (Entry.ID) {
       default:  // Skip unknown content.
         if (Stream.SkipBlock())
           return Error("Malformed block record");
@@ -1951,17 +2023,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         break;
       }
       continue;
-    }
 
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
     }
 
     // Read a record.
     Record.clear();
     Instruction *I = 0;
-    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
     switch (BitCode) {
     default: // Default behavior: reject
       return Error("Unknown instruction");
@@ -1974,24 +2045,24 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         FunctionBBs[i] = BasicBlock::Create(Context, "", F);
       CurBB = FunctionBBs[0];
       continue;
-        
+
     case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:  // DEBUG_LOC_AGAIN
       // This record indicates that the last instruction is at the same
       // location as the previous instruction with a location.
       I = 0;
-        
+
       // Get the last instruction emitted.
       if (CurBB && !CurBB->empty())
         I = &CurBB->back();
       else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
                !FunctionBBs[CurBBNo-1]->empty())
         I = &FunctionBBs[CurBBNo-1]->back();
-        
+
       if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
       I->setDebugLoc(LastLoc);
       I = 0;
       continue;
-        
+
     case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
       I = 0;     // Get the last instruction emitted.
       if (CurBB && !CurBB->empty())
@@ -2001,10 +2072,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         I = &FunctionBBs[CurBBNo-1]->back();
       if (I == 0 || Record.size() < 4)
         return Error("Invalid FUNC_CODE_DEBUG_LOC record");
-      
+
       unsigned Line = Record[0], Col = Record[1];
       unsigned ScopeID = Record[2], IAID = Record[3];
-      
+
       MDNode *Scope = 0, *IA = 0;
       if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
       if (IAID)    IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
@@ -2041,7 +2112,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
                    Opc == Instruction::AShr) {
           if (Record[OpNum] & (1 << bitc::PEO_EXACT))
             cast<BinaryOperator>(I)->setIsExact(true);
+        } else if (isa<FPMathOperator>(I)) {
+          FastMathFlags FMF;
+          if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra))
+            FMF.setUnsafeAlgebra();
+          if (0 != (Record[OpNum] & FastMathFlags::NoNaNs))
+            FMF.setNoNaNs();
+          if (0 != (Record[OpNum] & FastMathFlags::NoInfs))
+            FMF.setNoInfs();
+          if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros))
+            FMF.setNoSignedZeros();
+          if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal))
+            FMF.setAllowReciprocal();
+          if (FMF.any())
+            I->setFastMathFlags(FMF);
         }
+
       }
       break;
     }
@@ -2272,10 +2358,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
     case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
-      // Check magic 
+      // Check magic
       if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
         // New SwitchInst format with case ranges.
-        
+
         Type *OpTy = getTypeByID(Record[1]);
         unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
 
@@ -2285,17 +2371,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           return Error("Invalid SWITCH record");
 
         unsigned NumCases = Record[4];
-        
+
         SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
         InstructionList.push_back(SI);
-        
+
         unsigned CurIdx = 5;
         for (unsigned i = 0; i != NumCases; ++i) {
           IntegersSubsetToBB CaseBuilder;
           unsigned NumItems = Record[CurIdx++];
           for (unsigned ci = 0; ci != NumItems; ++ci) {
             bool isSingleNumber = Record[CurIdx++];
-            
+
             APInt Low;
             unsigned ActiveWords = 1;
             if (ValueBitWidth > 64)
@@ -2311,7 +2397,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
               APInt High =
                   ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
                                 ValueBitWidth);
-              
+
               CaseBuilder.add(IntItem::fromType(OpTy, Low),
                               IntItem::fromType(OpTy, High));
               CurIdx += ActiveWords;
@@ -2319,7 +2405,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
               CaseBuilder.add(IntItem::fromType(OpTy, Low));
           }
           BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
-          IntegersSubset Case = CaseBuilder.getCase(); 
+          IntegersSubset Case = CaseBuilder.getCase();
           SI->addCase(Case, DestBB);
         }
         uint16_t Hash = SI->hash();
@@ -2328,9 +2414,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         I = SI;
         break;
       }
-      
+
       // Old SwitchInst format without case ranges.
-      
+
       if (Record.size() < 3 || (Record.size() & 1) == 0)
         return Error("Invalid SWITCH record");
       Type *OpTy = getTypeByID(Record[0]);
@@ -2375,11 +2461,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = IBI;
       break;
     }
-        
+
     case bitc::FUNC_CODE_INST_INVOKE: {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4) return Error("Invalid INVOKE record");
-      AttrListPtr PAL = getAttributes(Record[0]);
+      AttributeSet PAL = getAttributes(Record[0]);
       unsigned CCInfo = Record[1];
       BasicBlock *NormalBB = getBasicBlock(Record[2]);
       BasicBlock *UnwindBB = getBasicBlock(Record[3]);
@@ -2534,7 +2620,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
           OpNum+4 != Record.size())
         return Error("Invalid LOADATOMIC record");
-        
+
 
       AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
       if (Ordering == NotAtomic || Ordering == Release ||
@@ -2644,7 +2730,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 3)
         return Error("Invalid CALL record");
 
-      AttrListPtr PAL = getAttributes(Record[0]);
+      AttributeSet PAL = getAttributes(Record[0]);
       unsigned CCInfo = Record[1];
 
       unsigned OpNum = 2;
@@ -2723,6 +2809,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       ValueList.AssignValue(I, NextValueNo++);
   }
 
+OutOfRecordLoop:
+
   // Check the function list for unresolved values.
   if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
     if (A->getParent() == 0) {
@@ -2750,15 +2838,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       unsigned BlockIdx = RefList[i].first;
       if (BlockIdx >= FunctionBBs.size())
         return Error("Invalid blockaddress block #");
-    
+
       GlobalVariable *FwdRef = RefList[i].second;
       FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
       FwdRef->eraseFromParent();
     }
-    
+
     BlockAddrFwdRefs.erase(BAFRI);
   }
-  
+
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   MDValueList.shrinkTo(ModuleMDValueListSize);
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 3d5c0eb4def4..28674eb14ef2 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -14,27 +14,27 @@
 #ifndef BITCODE_READER_H
 #define BITCODE_READER_H
 
-#include "llvm/GVMaterializer.h"
-#include "llvm/Attributes.h"
-#include "llvm/Type.h"
-#include "llvm/OperandTraits.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseMap.h"
 #include <vector>
 
 namespace llvm {
   class MemoryBuffer;
   class LLVMContext;
-  
+
 //===----------------------------------------------------------------------===//
 //                          BitcodeReaderValueList Class
 //===----------------------------------------------------------------------===//
 
 class BitcodeReaderValueList {
   std::vector<WeakVH> ValuePtrs;
-  
+
   /// ResolveConstants - As we resolve forward-referenced constants, we add
   /// information about them to this vector.  This allows us to resolve them in
   /// bulk instead of resolving each reference at a time.  See the code in
@@ -57,17 +57,17 @@ public:
   void push_back(Value *V) {
     ValuePtrs.push_back(V);
   }
-  
+
   void clear() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
     ValuePtrs.clear();
   }
-  
+
   Value *operator[](unsigned i) const {
     assert(i < ValuePtrs.size());
     return ValuePtrs[i];
   }
-  
+
   Value *back() const { return ValuePtrs.back(); }
     void pop_back() { ValuePtrs.pop_back(); }
   bool empty() const { return ValuePtrs.empty(); }
@@ -75,12 +75,12 @@ public:
     assert(N <= size() && "Invalid shrinkTo request!");
     ValuePtrs.resize(N);
   }
-  
+
   Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
   Value *getValueFwdRef(unsigned Idx, Type *Ty);
-  
+
   void AssignValue(Value *V, unsigned Idx);
-  
+
   /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
   /// resolves any forward references.
   void ResolveConstantForwardRefs();
@@ -93,7 +93,7 @@ public:
 
 class BitcodeReaderMDValueList {
   std::vector<WeakVH> MDValuePtrs;
-  
+
   LLVMContext &Context;
 public:
   BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
@@ -106,12 +106,12 @@ public:
   Value *back() const         { return MDValuePtrs.back(); }
   void pop_back()             { MDValuePtrs.pop_back(); }
   bool empty() const          { return MDValuePtrs.empty(); }
-  
+
   Value *operator[](unsigned i) const {
     assert(i < MDValuePtrs.size());
     return MDValuePtrs[i];
   }
-  
+
   void shrinkTo(unsigned N) {
     assert(N <= size() && "Invalid shrinkTo request!");
     MDValuePtrs.resize(N);
@@ -131,9 +131,9 @@ class BitcodeReader : public GVMaterializer {
   DataStreamer *LazyStreamer;
   uint64_t NextUnreadBit;
   bool SeenValueSymbolTable;
-  
+
   const char *ErrorString;
-  
+
   std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
   BitcodeReaderMDValueList MDValueList;
@@ -142,38 +142,41 @@ class BitcodeReader : public GVMaterializer {
 
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
-  
+
   /// MAttributes - The set of attributes by index.  Index zero in the
   /// file is for null, and is thus not represented here.  As such all indices
   /// are off by one.
-  std::vector<AttrListPtr> MAttributes;
-  
+  std::vector<AttributeSet> MAttributes;
+
+  /// \brief The set of attribute groups.
+  std::map<unsigned, AttributeSet> MAttributeGroups;
+
   /// FunctionBBs - While parsing a function body, this is a list of the basic
   /// blocks for the function.
   std::vector<BasicBlock*> FunctionBBs;
-  
+
   // When reading the module header, this list is populated with functions that
   // have bodies later in the file.
   std::vector<Function*> FunctionsWithBodies;
 
-  // When intrinsic functions are encountered which require upgrading they are 
+  // When intrinsic functions are encountered which require upgrading they are
   // stored here with their replacement function.
   typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
   UpgradedIntrinsicMap UpgradedIntrinsics;
 
   // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
   DenseMap<unsigned, unsigned> MDKindMap;
-  
+
   // Several operations happen after the module header has been read, but
   // before function bodies are processed. This keeps track of whether
   // we've done this yet.
   bool SeenFirstFunctionBody;
-  
+
   /// DeferredFunctionInfo - When function bodies are initially scanned, this
   /// map contains info about where to find deferred function body in the
   /// stream.
   DenseMap<Function*, uint64_t> DeferredFunctionInfo;
-  
+
   /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
   /// are resolved lazily when functions are loaded.
   typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
@@ -208,11 +211,11 @@ public:
   void materializeForwardReferencedFunctions();
 
   void FreeState();
-  
+
   /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
   /// when the reader is destroyed.
   void setBufferOwned(bool Owned) { BufferOwned = Owned; }
-  
+
   virtual bool isMaterializable(const GlobalValue *GV) const;
   virtual bool isDematerializable(const GlobalValue *GV) const;
   virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
@@ -224,7 +227,7 @@ public:
     return true;
   }
   const char *getErrorString() const { return ErrorString; }
-  
+
   /// @brief Main interface to parsing a bitcode buffer.
   /// @returns true if an error occurred.
   bool ParseBitcodeInto(Module *M);
@@ -246,12 +249,12 @@ private:
     if (ID >= FunctionBBs.size()) return 0; // Invalid ID
     return FunctionBBs[ID];
   }
-  AttrListPtr getAttributes(unsigned i) const {
+  AttributeSet getAttributes(unsigned i) const {
     if (i-1 < MAttributes.size())
       return MAttributes[i-1];
-    return AttrListPtr();
+    return AttributeSet();
   }
-  
+
   /// getValueTypePair - Read a value/type pair out of the specified record from
   /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
   /// Return true on failure.
@@ -320,6 +323,7 @@ private:
 
   bool ParseModule(bool Resume);
   bool ParseAttributeBlock();
+  bool ParseAttributeGroupBlock();
   bool ParseTypeTable();
   bool ParseTypeTableBody();
 
@@ -339,7 +343,7 @@ private:
   bool FindFunctionInStream(Function *F,
          DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
 };
-  
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
new file mode 100644
index 000000000000..9dafe2a03670
--- /dev/null
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -0,0 +1,371 @@
+//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitstreamReader.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  BitstreamCursor implementation
+//===----------------------------------------------------------------------===//
+
+void BitstreamCursor::operator=(const BitstreamCursor &RHS) {
+  freeState();
+
+  BitStream = RHS.BitStream;
+  NextChar = RHS.NextChar;
+  CurWord = RHS.CurWord;
+  BitsInCurWord = RHS.BitsInCurWord;
+  CurCodeSize = RHS.CurCodeSize;
+
+  // Copy abbreviations, and bump ref counts.
+  CurAbbrevs = RHS.CurAbbrevs;
+  for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+    CurAbbrevs[i]->addRef();
+
+  // Copy block scope and bump ref counts.
+  BlockScope = RHS.BlockScope;
+  for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+    std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+    for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+      Abbrevs[i]->addRef();
+  }
+}
+
+void BitstreamCursor::freeState() {
+  // Free all the Abbrevs.
+  for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+    CurAbbrevs[i]->dropRef();
+  CurAbbrevs.clear();
+
+  // Free all the Abbrevs in the block scope.
+  for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+    std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+    for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+      Abbrevs[i]->dropRef();
+  }
+  BlockScope.clear();
+}
+
+/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+/// the block, and return true if the block has an error.
+bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
+  // Save the current block's state on BlockScope.
+  BlockScope.push_back(Block(CurCodeSize));
+  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+  // Add the abbrevs specific to this block to the CurAbbrevs list.
+  if (const BitstreamReader::BlockInfo *Info =
+      BitStream->getBlockInfo(BlockID)) {
+    for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) {
+      CurAbbrevs.push_back(Info->Abbrevs[i]);
+      CurAbbrevs.back()->addRef();
+    }
+  }
+
+  // Get the codesize of this block.
+  CurCodeSize = ReadVBR(bitc::CodeLenWidth);
+  SkipToFourByteBoundary();
+  unsigned NumWords = Read(bitc::BlockSizeWidth);
+  if (NumWordsP) *NumWordsP = NumWords;
+
+  // Validate that this block is sane.
+  if (CurCodeSize == 0 || AtEndOfStream())
+    return true;
+
+  return false;
+}
+
+void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
+                                             SmallVectorImpl<uint64_t> &Vals) {
+  assert(Op.isLiteral() && "Not a literal");
+  // If the abbrev specifies the literal value to use, use it.
+  Vals.push_back(Op.getLiteralValue());
+}
+
+void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
+                                           SmallVectorImpl<uint64_t> &Vals) {
+  assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  case BitCodeAbbrevOp::Array:
+  case BitCodeAbbrevOp::Blob:
+    assert(0 && "Should not reach here");
+  case BitCodeAbbrevOp::Fixed:
+    Vals.push_back(Read((unsigned)Op.getEncodingData()));
+    break;
+  case BitCodeAbbrevOp::VBR:
+    Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
+    break;
+  case BitCodeAbbrevOp::Char6:
+    Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
+    break;
+  }
+}
+
+void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
+  assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  case BitCodeAbbrevOp::Array:
+  case BitCodeAbbrevOp::Blob:
+    assert(0 && "Should not reach here");
+  case BitCodeAbbrevOp::Fixed:
+    (void)Read((unsigned)Op.getEncodingData());
+    break;
+  case BitCodeAbbrevOp::VBR:
+    (void)ReadVBR64((unsigned)Op.getEncodingData());
+    break;
+  case BitCodeAbbrevOp::Char6:
+    (void)Read(6);
+    break;
+  }
+}
+
+
+
+/// skipRecord - Read the current record and discard it.
+void BitstreamCursor::skipRecord(unsigned AbbrevID) {
+  // Skip unabbreviated records by reading past their entries.
+  if (AbbrevID == bitc::UNABBREV_RECORD) {
+    unsigned Code = ReadVBR(6);
+    (void)Code;
+    unsigned NumElts = ReadVBR(6);
+    for (unsigned i = 0; i != NumElts; ++i)
+      (void)ReadVBR64(6);
+    return;
+  }
+
+  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral())
+      continue;
+
+    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
+        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
+      skipAbbreviatedField(Op);
+      continue;
+    }
+
+    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      unsigned NumElts = ReadVBR(6);
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      for (; NumElts; --NumElts)
+        skipAbbreviatedField(EltEnc);
+      continue;
+    }
+
+    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
+    // Blob case.  Read the number of bytes as a vbr6.
+    unsigned NumElts = ReadVBR(6);
+    SkipToFourByteBoundary();  // 32-bit alignment
+
+    // Figure out where the end of this blob will be including tail padding.
+    size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
+
+    // If this would read off the end of the bitcode file, just set the
+    // record to empty and return.
+    if (!canSkipToPos(NewEnd/8)) {
+      NextChar = BitStream->getBitcodeBytes().getExtent();
+      break;
+    }
+
+    // Skip over the blob.
+    JumpToBit(NewEnd);
+  }
+}
+
+unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
+                                     SmallVectorImpl<uint64_t> &Vals,
+                                     StringRef *Blob) {
+  if (AbbrevID == bitc::UNABBREV_RECORD) {
+    unsigned Code = ReadVBR(6);
+    unsigned NumElts = ReadVBR(6);
+    for (unsigned i = 0; i != NumElts; ++i)
+      Vals.push_back(ReadVBR64(6));
+    return Code;
+  }
+
+  const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral()) {
+      readAbbreviatedLiteral(Op, Vals);
+      continue;
+    }
+
+    if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
+        Op.getEncoding() != BitCodeAbbrevOp::Blob) {
+      readAbbreviatedField(Op, Vals);
+      continue;
+    }
+
+    if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      unsigned NumElts = ReadVBR(6);
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      for (; NumElts; --NumElts)
+        readAbbreviatedField(EltEnc, Vals);
+      continue;
+    }
+
+    assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
+    // Blob case.  Read the number of bytes as a vbr6.
+    unsigned NumElts = ReadVBR(6);
+    SkipToFourByteBoundary();  // 32-bit alignment
+
+    // Figure out where the end of this blob will be including tail padding.
+    size_t CurBitPos = GetCurrentBitNo();
+    size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
+
+    // If this would read off the end of the bitcode file, just set the
+    // record to empty and return.
+    if (!canSkipToPos(NewEnd/8)) {
+      Vals.append(NumElts, 0);
+      NextChar = BitStream->getBitcodeBytes().getExtent();
+      break;
+    }
+
+    // Otherwise, inform the streamer that we need these bytes in memory.
+    const char *Ptr = (const char*)
+      BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
+
+    // If we can return a reference to the data, do so to avoid copying it.
+    if (Blob) {
+      *Blob = StringRef(Ptr, NumElts);
+    } else {
+      // Otherwise, unpack into Vals with zero extension.
+      for (; NumElts; --NumElts)
+        Vals.push_back((unsigned char)*Ptr++);
+    }
+    // Skip over tail padding.
+    JumpToBit(NewEnd);
+  }
+
+  unsigned Code = (unsigned)Vals[0];
+  Vals.erase(Vals.begin());
+  return Code;
+}
+
+
+void BitstreamCursor::ReadAbbrevRecord() {
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  unsigned NumOpInfo = ReadVBR(5);
+  for (unsigned i = 0; i != NumOpInfo; ++i) {
+    bool IsLiteral = Read(1) ? true : false;
+    if (IsLiteral) {
+      Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
+      continue;
+    }
+
+    BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
+    if (BitCodeAbbrevOp::hasEncodingData(E)) {
+      unsigned Data = ReadVBR64(5);
+
+      // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
+      // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
+      // a slow path in Read() to have to handle reading zero bits.
+      if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
+          Data == 0) {
+        Abbv->Add(BitCodeAbbrevOp(0));
+        continue;
+      }
+
+      Abbv->Add(BitCodeAbbrevOp(E, Data));
+    } else
+      Abbv->Add(BitCodeAbbrevOp(E));
+  }
+  CurAbbrevs.push_back(Abbv);
+}
+
+bool BitstreamCursor::ReadBlockInfoBlock() {
+  // If this is the second stream to get to the block info block, skip it.
+  if (BitStream->hasBlockInfoRecords())
+    return SkipBlock();
+
+  if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
+
+  SmallVector<uint64_t, 64> Record;
+  BitstreamReader::BlockInfo *CurBlockInfo = 0;
+
+  // Read all the records for this module.
+  while (1) {
+    BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::SubBlock: // Handled for us already.
+    case llvm::BitstreamEntry::Error:
+      return true;
+    case llvm::BitstreamEntry::EndBlock:
+      return false;
+    case llvm::BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read abbrev records, associate them with CurBID.
+    if (Entry.ID == bitc::DEFINE_ABBREV) {
+      if (!CurBlockInfo) return true;
+      ReadAbbrevRecord();
+
+      // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
+      // appropriate BlockInfo.
+      BitCodeAbbrev *Abbv = CurAbbrevs.back();
+      CurAbbrevs.pop_back();
+      CurBlockInfo->Abbrevs.push_back(Abbv);
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (readRecord(Entry.ID, Record)) {
+      default: break;  // Default behavior, ignore unknown content.
+      case bitc::BLOCKINFO_CODE_SETBID:
+        if (Record.size() < 1) return true;
+        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
+        break;
+      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 0, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->Name = Name;
+        break;
+      }
+      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 1, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+                                                           Name));
+        break;
+      }
+    }
+  }
+}
+
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
index dfe7e1065c7d..f614c9fd4a03 100644
--- a/lib/Bitcode/Reader/CMakeLists.txt
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMBitReader
   BitReader.cpp
   BitcodeReader.cpp
+  BitstreamReader.cpp
   )
 
 add_dependencies(LLVMBitReader intrinsics_gen)
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 428842246331..9f51c35ad92e 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -17,12 +17,11 @@ using namespace llvm;
 
 int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
   std::string ErrorInfo;
-  raw_fd_ostream OS(Path, ErrorInfo,
-                    raw_fd_ostream::F_Binary);
-  
+  raw_fd_ostream OS(Path, ErrorInfo, raw_fd_ostream::F_Binary);
+
   if (!ErrorInfo.empty())
     return -1;
-  
+
   WriteBitcodeToFile(unwrap(M), OS);
   return 0;
 }
@@ -30,7 +29,7 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
 int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
                          int Unbuffered) {
   raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
-  
+
   WriteBitcodeToFile(unwrap(M), OS);
   return 0;
 }
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 60c657ae6dd4..1b73f23e8f60 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -12,22 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Bitcode/BitstreamWriter.h"
-#include "llvm/Bitcode/LLVMBitCodes.h"
 #include "ValueEnumerator.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cctype>
 #include <map>
 using namespace llvm;
@@ -61,7 +61,7 @@ enum {
   FUNCTION_INST_RET_VOID_ABBREV,
   FUNCTION_INST_RET_VAL_ABBREV,
   FUNCTION_INST_UNREACHABLE_ABBREV,
-  
+
   // SwitchInst Magic
   SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
 };
@@ -161,22 +161,66 @@ static void WriteStringRecord(unsigned Code, StringRef Str,
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
 }
 
-// Emit information about parameter attributes.
+static void WriteAttributeGroupTable(const ValueEnumerator &VE,
+                                     BitstreamWriter &Stream) {
+  const std::vector<AttributeSet> &AttrGrps = VE.getAttributeGroups();
+  if (AttrGrps.empty()) return;
+
+  Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3);
+
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) {
+    AttributeSet AS = AttrGrps[i];
+    for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) {
+      AttributeSet A = AS.getSlotAttributes(i);
+
+      Record.push_back(VE.getAttributeGroupID(A));
+      Record.push_back(AS.getSlotIndex(i));
+
+      for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0);
+           I != E; ++I) {
+        Attribute Attr = *I;
+        if (Attr.isEnumAttribute()) {
+          Record.push_back(0);
+          Record.push_back(Attr.getKindAsEnum());
+        } else if (Attr.isAlignAttribute()) {
+          Record.push_back(1);
+          Record.push_back(Attr.getKindAsEnum());
+          Record.push_back(Attr.getValueAsInt());
+        } else {
+          StringRef Kind = Attr.getKindAsString();
+          StringRef Val = Attr.getValueAsString();
+
+          Record.push_back(Val.empty() ? 3 : 4);
+          Record.append(Kind.begin(), Kind.end());
+          Record.push_back(0);
+          if (!Val.empty()) {
+            Record.append(Val.begin(), Val.end());
+            Record.push_back(0);
+          }
+        }
+      }
+
+      Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record);
+      Record.clear();
+    }
+  }
+
+  Stream.ExitBlock();
+}
+
 static void WriteAttributeTable(const ValueEnumerator &VE,
                                 BitstreamWriter &Stream) {
-  const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
+  const std::vector<AttributeSet> &Attrs = VE.getAttributes();
   if (Attrs.empty()) return;
 
   Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-    const AttrListPtr &A = Attrs[i];
-    for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
-      const AttributeWithIndex &PAWI = A.getSlot(i);
-      Record.push_back(PAWI.Index);
-      Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs));
-    }
+    const AttributeSet &A = Attrs[i];
+    for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i)
+      Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i)));
 
     Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
     Record.clear();
@@ -234,7 +278,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
 
   unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
-  
+
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -256,16 +300,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 
     switch (T->getTypeID()) {
     default: llvm_unreachable("Unknown type!");
-    case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;   break;
-    case Type::HalfTyID:      Code = bitc::TYPE_CODE_HALF;   break;
-    case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;  break;
-    case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE; break;
-    case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80; break;
-    case Type::FP128TyID:     Code = bitc::TYPE_CODE_FP128; break;
+    case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;      break;
+    case Type::HalfTyID:      Code = bitc::TYPE_CODE_HALF;      break;
+    case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;     break;
+    case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE;    break;
+    case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80;  break;
+    case Type::FP128TyID:     Code = bitc::TYPE_CODE_FP128;     break;
     case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
-    case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;  break;
-    case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA; break;
-    case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX; break;
+    case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;     break;
+    case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA;  break;
+    case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX;   break;
     case Type::IntegerTyID:
       // INTEGER: [width]
       Code = bitc::TYPE_CODE_INTEGER;
@@ -300,7 +344,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
       for (StructType::element_iterator I = ST->element_begin(),
            E = ST->element_end(); I != E; ++I)
         TypeVals.push_back(VE.getTypeID(*I));
-      
+
       if (ST->isLiteral()) {
         Code = bitc::TYPE_CODE_STRUCT_ANON;
         AbbrevToUse = StructAnonAbbrev;
@@ -392,10 +436,6 @@ static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) {
 // descriptors for global variables, and function prototype info.
 static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
                             BitstreamWriter &Stream) {
-  // Emit the list of dependent libraries for the Module.
-  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
-    WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream);
-
   // Emit various pieces of data attached to a module.
   if (!M->getTargetTriple().empty())
     WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
@@ -494,10 +534,11 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
     if (GV->isThreadLocal() ||
         GV->getVisibility() != GlobalValue::DefaultVisibility ||
-        GV->hasUnnamedAddr()) {
+        GV->hasUnnamedAddr() || GV->isExternallyInitialized()) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(getEncodedThreadLocalMode(GV));
       Vals.push_back(GV->hasUnnamedAddr());
+      Vals.push_back(GV->isExternallyInitialized());
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }
@@ -553,6 +594,18 @@ static uint64_t GetOptimizationFlags(const Value *V) {
                dyn_cast<PossiblyExactOperator>(V)) {
     if (PEO->isExact())
       Flags |= 1 << bitc::PEO_EXACT;
+  } else if (const FPMathOperator *FPMO =
+             dyn_cast<const FPMathOperator>(V)) {
+    if (FPMO->hasUnsafeAlgebra())
+      Flags |= FastMathFlags::UnsafeAlgebra;
+    if (FPMO->hasNoNaNs())
+      Flags |= FastMathFlags::NoNaNs;
+    if (FPMO->hasNoInfs())
+      Flags |= FastMathFlags::NoInfs;
+    if (FPMO->hasNoSignedZeros())
+      Flags |= FastMathFlags::NoSignedZeros;
+    if (FPMO->hasAllowReciprocal())
+      Flags |= FastMathFlags::AllowReciprocal;
   }
 
   return Flags;
@@ -658,7 +711,7 @@ static void WriteFunctionLocalMetadata(const Function &F,
         }
         WriteMDNode(N, VE, Stream, Record);
       }
-      
+
   if (StartedMetadataBlock)
     Stream.ExitBlock();
 }
@@ -673,18 +726,18 @@ static void WriteMetadataAttachment(const Function &F,
   // Write metadata attachments
   // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
   SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
-  
+
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       MDs.clear();
       I->getAllMetadataOtherThanDebugLoc(MDs);
-      
+
       // If no metadata, ignore instruction.
       if (MDs.empty()) continue;
 
       Record.push_back(VE.getInstructionID(I));
-      
+
       for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
         Record.push_back(MDs[i].first);
         Record.push_back(VE.getValueID(MDs[i].second));
@@ -701,18 +754,18 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
 
   // Write metadata kinds
   // METADATA_KIND - [n x [id, name]]
-  SmallVector<StringRef, 4> Names;
+  SmallVector<StringRef, 8> Names;
   M->getMDKindNames(Names);
-  
+
   if (Names.empty()) return;
 
   Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
-  
+
   for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
     Record.push_back(MDKindID);
     StringRef KName = Names[MDKindID];
     Record.append(KName.begin(), KName.end());
-    
+
     Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
     Record.clear();
   }
@@ -743,10 +796,10 @@ static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
     // format it is likely that the high bits are going to be zero.
     // So, we only write the number of active words.
     unsigned NWords = Val.getActiveWords();
-    
+
     if (EmitSizeForWideNumbers)
       Vals.push_back(NWords);
-    
+
     const uint64_t *RawWords = Val.getRawData();
     for (unsigned i = 0; i != NWords; ++i) {
       emitSignedInt64(Vals, RawWords[i]);
@@ -881,12 +934,12 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
         if (isCStrChar6)
           isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
       }
-      
+
       if (isCStrChar6)
         AbbrevToUse = CString6Abbrev;
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
-    } else if (const ConstantDataSequential *CDS = 
+    } else if (const ConstantDataSequential *CDS =
                   dyn_cast<ConstantDataSequential>(C)) {
       Code = bitc::CST_CODE_DATA;
       Type *EltTy = CDS->getType()->getElementType();
@@ -1166,7 +1219,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   case Instruction::Br:
     {
       Code = bitc::FUNC_CODE_INST_BR;
-      BranchInst &II = cast<BranchInst>(I);
+      const BranchInst &II = cast<BranchInst>(I);
       Vals.push_back(VE.getValueID(II.getSuccessor(0)));
       if (II.isConditional()) {
         Vals.push_back(VE.getValueID(II.getSuccessor(1)));
@@ -1179,36 +1232,36 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       // Redefine Vals, since here we need to use 64 bit values
       // explicitly to store large APInt numbers.
       SmallVector<uint64_t, 128> Vals64;
-      
+
       Code = bitc::FUNC_CODE_INST_SWITCH;
-      SwitchInst &SI = cast<SwitchInst>(I);
-      
-      uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16); 
-      Vals64.push_back(SwitchRecordHeader);      
-      
+      const SwitchInst &SI = cast<SwitchInst>(I);
+
+      uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16);
+      Vals64.push_back(SwitchRecordHeader);
+
       Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
       pushValue64(SI.getCondition(), InstID, Vals64, VE);
       Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
       Vals64.push_back(SI.getNumCases());
-      for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+      for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
            i != e; ++i) {
-        IntegersSubset& CaseRanges = i.getCaseValueEx();
+        const IntegersSubset& CaseRanges = i.getCaseValueEx();
         unsigned Code, Abbrev; // will unused.
-        
+
         if (CaseRanges.isSingleNumber()) {
           Vals64.push_back(1/*NumItems = 1*/);
           Vals64.push_back(true/*IsSingleNumber = true*/);
           EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true);
         } else {
-          
+
           Vals64.push_back(CaseRanges.getNumItems());
-          
+
           if (CaseRanges.isSingleNumbersOnly()) {
             for (unsigned ri = 0, rn = CaseRanges.getNumItems();
                  ri != rn; ++ri) {
-              
+
               Vals64.push_back(true/*IsSingleNumber = true*/);
-              
+
               EmitAPInt(Vals64, Code, Abbrev,
                         CaseRanges.getSingleNumber(ri), true);
             }
@@ -1217,9 +1270,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
                  ri != rn; ++ri) {
               IntegersSubset::Range r = CaseRanges.getItem(ri);
               bool IsSingleNumber = CaseRanges.isSingleNumber(ri);
-    
+
               Vals64.push_back(IsSingleNumber);
-              
+
               EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true);
               if (!IsSingleNumber)
                 EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true);
@@ -1227,9 +1280,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
         }
         Vals64.push_back(VE.getValueID(i.getCaseSuccessor()));
       }
-      
+
       Stream.EmitRecord(Code, Vals64, AbbrevToUse);
-      
+
       // Also do expected action - clear external Vals collection:
       Vals.clear();
       return;
@@ -1243,7 +1296,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
       Vals.push_back(VE.getValueID(I.getOperand(i)));
     break;
-      
+
   case Instruction::Invoke: {
     const InvokeInst *II = cast<InvokeInst>(&I);
     const Value *Callee(II->getCalledValue());
@@ -1502,21 +1555,21 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   unsigned InstID = CstEnd;
 
   bool NeedsMetadataAttachment = false;
-  
+
   DebugLoc LastDL;
-  
+
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       WriteInstruction(*I, InstID, VE, Stream, Vals);
-      
+
       if (!I->getType()->isVoidTy())
         ++InstID;
-      
+
       // If the instruction has metadata, write a metadata attachment later.
       NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
-      
+
       // If the instruction has a debug location, emit it.
       DebugLoc DL = I->getDebugLoc();
       if (DL.isUnknown()) {
@@ -1527,14 +1580,14 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
       } else {
         MDNode *Scope, *IA;
         DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
-        
+
         Vals.push_back(DL.getLine());
         Vals.push_back(DL.getCol());
         Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
         Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
         Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
         Vals.clear();
-        
+
         LastDL = DL;
       }
     }
@@ -1709,7 +1762,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
-// Sort the Users based on the order in which the reader parses the bitcode 
+// Sort the Users based on the order in which the reader parses the bitcode
 // file.
 static bool bitcodereader_order(const User *lhs, const User *rhs) {
   // TODO: Implement.
@@ -1778,9 +1831,9 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I)
     I->removeDeadConstantUsers();
-  
+
   // Write the global variables.
-  for (Module::const_global_iterator GI = M->global_begin(), 
+  for (Module::const_global_iterator GI = M->global_begin(),
          GE = M->global_end(); GI != GE; ++GI) {
     WriteUseList(GI, VE, Stream);
 
@@ -1821,6 +1874,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   // Emit blockinfo, which defines the standard abbreviations etc.
   WriteBlockInfo(VE, Stream);
 
+  // Emit information about attribute groups.
+  WriteAttributeGroupTable(VE, Stream);
+
   // Emit information about parameter attributes.
   WriteAttributeTable(VE, Stream);
 
@@ -1931,7 +1987,7 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
-  SmallVector<char, 1024> Buffer;
+  SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
   // If this is darwin or another generic macho target, reserve space for the
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 91e115cba6cc..e5e76e29bd2d 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -22,9 +22,9 @@ namespace {
     static char ID; // Pass identification, replacement for typeid
     explicit WriteBitcodePass(raw_ostream &o)
       : ModulePass(ID), OS(o) {}
-    
+
     const char *getPassName() const { return "Bitcode Writer"; }
-    
+
     bool runOnModule(Module &M) {
       WriteBitcodeToFile(&M, OS);
       return false;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 1ed9004eb5a1..8bac6da89285 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "ValueEnumerator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
-static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
-  return V.first->getType()->isIntegerTy();
+static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
+  return V.first->getType()->isIntOrIntVectorTy();
 }
 
 /// ValueEnumerator - Enumerate module-level information.
@@ -60,7 +60,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
        I != E; ++I)
     EnumerateValue(I->getAliasee());
 
-  // Insert constants and metadata that are named at module level into the slot 
+  // Insert constants and metadata that are named at module level into the slot
   // pool so that the module symbol table can refer to them...
   EnumerateValueSymbolTable(M->getValueSymbolTable());
   EnumerateNamedMetadata(M);
@@ -95,7 +95,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
         I->getAllMetadataOtherThanDebugLoc(MDs);
         for (unsigned i = 0, e = MDs.size(); i != e; ++i)
           EnumerateMetadata(MDs[i].second);
-        
+
         if (!I->getDebugLoc().isUnknown()) {
           MDNode *Scope, *IA;
           I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
@@ -192,10 +192,11 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
   CstSortPredicate P(*this);
   std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
 
-  // Ensure that integer constants are at the start of the constant pool.  This
-  // is important so that GEP structure indices come before gep constant exprs.
+  // Ensure that integer and vector of integer constants are at the start of the
+  // constant pool.  This is important so that GEP structure indices come before
+  // gep constant exprs.
   std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
-                 isIntegerValue);
+                 isIntOrIntVectorValue);
 
   // Rebuild the modified portion of ValueMap.
   for (; CstStart != CstEnd; ++CstStart)
@@ -362,16 +363,16 @@ void ValueEnumerator::EnumerateType(Type *Ty) {
   if (StructType *STy = dyn_cast<StructType>(Ty))
     if (!STy->isLiteral())
       *TypeID = ~0U;
-  
+
   // Enumerate all of the subtypes before we enumerate this type.  This ensures
   // that the type will be enumerated in an order that can be directly built.
   for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
        I != E; ++I)
     EnumerateType(*I);
-  
+
   // Refresh the TypeID pointer in case the table rehashed.
   TypeID = &TypeMap[Ty];
-  
+
   // Check to see if we got the pointer another way.  This can happen when
   // enumerating recursive types that hit the base case deeper than they start.
   //
@@ -379,10 +380,10 @@ void ValueEnumerator::EnumerateType(Type *Ty) {
   // then emit the definition now that all of its contents are available.
   if (*TypeID && *TypeID != ~0U)
     return;
-  
+
   // Add this type now that its contents are all happily enumerated.
   Types.push_back(Ty);
-  
+
   *TypeID = Types.size();
 }
 
@@ -390,7 +391,7 @@ void ValueEnumerator::EnumerateType(Type *Ty) {
 // walk through it, enumerating the types of the constant.
 void ValueEnumerator::EnumerateOperandType(const Value *V) {
   EnumerateType(V->getType());
-  
+
   if (const Constant *C = dyn_cast<Constant>(V)) {
     // If this constant is already enumerated, ignore it, we know its type must
     // be enumerated.
@@ -400,11 +401,11 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
     // them.
     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
       const Value *Op = C->getOperand(i);
-      
+
       // Don't enumerate basic blocks here, this happens as operands to
       // blockaddress.
       if (isa<BasicBlock>(Op)) continue;
-      
+
       EnumerateOperandType(Op);
     }
 
@@ -417,14 +418,25 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
     EnumerateMetadata(V);
 }
 
-void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
+void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
   if (PAL.isEmpty()) return;  // null is always 0.
+
   // Do a lookup.
-  unsigned &Entry = AttributeMap[PAL.getRawPointer()];
+  unsigned &Entry = AttributeMap[PAL];
   if (Entry == 0) {
     // Never saw this before, add it.
-    Attributes.push_back(PAL);
-    Entry = Attributes.size();
+    Attribute.push_back(PAL);
+    Entry = Attribute.size();
+  }
+
+  // Do lookups for all attribute groups.
+  for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
+    AttributeSet AS = PAL.getSlotAttributes(i);
+    unsigned &Entry = AttributeGroupMap[AS];
+    if (Entry == 0) {
+      AttributeGroups.push_back(AS);
+      Entry = AttributeGroups.size();
+    }
   }
 }
 
@@ -481,7 +493,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
         if (N->isFunctionLocal() && N->getFunction())
           FnLocalMDVector.push_back(N);
       }
-        
+
       if (!I->getType()->isVoidTy())
         EnumerateValue(I);
     }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 75468e6c5e2e..0af6164c944f 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -16,7 +16,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Attributes.h"
+#include "llvm/IR/Attributes.h"
 #include <vector>
 
 namespace llvm {
@@ -29,7 +29,7 @@ class Function;
 class Module;
 class MDNode;
 class NamedMDNode;
-class AttrListPtr;
+class AttributeSet;
 class ValueSymbolTable;
 class MDSymbolTable;
 class raw_ostream;
@@ -51,15 +51,19 @@ private:
   ValueList MDValues;
   SmallVector<const MDNode *, 8> FunctionLocalMDs;
   ValueMapType MDValueMap;
-  
-  typedef DenseMap<void*, unsigned> AttributeMapType;
+
+  typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
+  AttributeGroupMapType AttributeGroupMap;
+  std::vector<AttributeSet> AttributeGroups;
+
+  typedef DenseMap<AttributeSet, unsigned> AttributeMapType;
   AttributeMapType AttributeMap;
-  std::vector<AttrListPtr> Attributes;
-  
+  std::vector<AttributeSet> Attribute;
+
   /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
   /// the "getGlobalBasicBlockID" method.
   mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
-  
+
   typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
   InstructionMapType InstructionMap;
   unsigned InstructionCount;
@@ -67,7 +71,7 @@ private:
   /// BasicBlocks - This contains all the basic blocks for the currently
   /// incorporated function.  Their reverse mapping is stored in ValueMap.
   std::vector<const BasicBlock*> BasicBlocks;
-  
+
   /// When a function is incorporated, this is the size of the Values list
   /// before incorporation.
   unsigned NumModuleValues;
@@ -98,33 +102,43 @@ public:
   unsigned getInstructionID(const Instruction *I) const;
   void setInstructionID(const Instruction *I);
 
-  unsigned getAttributeID(const AttrListPtr &PAL) const {
+  unsigned getAttributeID(AttributeSet PAL) const {
     if (PAL.isEmpty()) return 0;  // Null maps to zero.
-    AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
+    AttributeMapType::const_iterator I = AttributeMap.find(PAL);
     assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
     return I->second;
   }
 
+  unsigned getAttributeGroupID(AttributeSet PAL) const {
+    if (PAL.isEmpty()) return 0;  // Null maps to zero.
+    AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL);
+    assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!");
+    return I->second;
+  }
+
   /// getFunctionConstantRange - Return the range of values that corresponds to
   /// function-local constants.
   void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
     Start = FirstFuncConstantID;
     End = FirstInstID;
   }
-  
+
   const ValueList &getValues() const { return Values; }
   const ValueList &getMDValues() const { return MDValues; }
-  const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const { 
+  const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const {
     return FunctionLocalMDs;
   }
   const TypeList &getTypes() const { return Types; }
   const std::vector<const BasicBlock*> &getBasicBlocks() const {
-    return BasicBlocks; 
+    return BasicBlocks;
+  }
+  const std::vector<AttributeSet> &getAttributes() const {
+    return Attribute;
   }
-  const std::vector<AttrListPtr> &getAttributes() const {
-    return Attributes;
+  const std::vector<AttributeSet> &getAttributeGroups() const {
+    return AttributeGroups;
   }
-  
+
   /// getGlobalBasicBlockID - This returns the function-specific ID for the
   /// specified basic block.  This is relatively expensive information, so it
   /// should only be used by rare constructs such as address-of-label.
@@ -138,7 +152,7 @@ public:
 
 private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
-    
+
   void EnumerateMDNodeOperands(const MDNode *N);
   void EnumerateMetadata(const Value *MD);
   void EnumerateFunctionLocalMetadata(const MDNode *N);
@@ -146,8 +160,8 @@ private:
   void EnumerateValue(const Value *V);
   void EnumerateType(Type *T);
   void EnumerateOperandType(const Value *V);
-  void EnumerateAttributes(const AttrListPtr &PAL);
-  
+  void EnumerateAttributes(AttributeSet PAL);
+
   void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
   void EnumerateNamedMetadata(const Module *M);
 };
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index fb63c63f327c..76ebe9aca9a3 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -1,6 +1,7 @@
 # `Support' and `TableGen' libraries are added on the top-level CMakeLists.txt
 
-add_subdirectory(VMCore)
+add_subdirectory(IR)
+add_subdirectory(IRReader)
 add_subdirectory(CodeGen)
 add_subdirectory(Bitcode)
 add_subdirectory(Transforms)
@@ -8,6 +9,7 @@ add_subdirectory(Linker)
 add_subdirectory(Analysis)
 add_subdirectory(MC)
 add_subdirectory(Object)
+add_subdirectory(Option)
 add_subdirectory(DebugInfo)
 add_subdirectory(ExecutionEngine)
 add_subdirectory(Target)
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 7a1c049d522d..c50f8b5a42ad 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -20,14 +20,13 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
@@ -152,23 +151,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
 
-  // Determine the live-out physregs for this block.
-  if (IsReturnBlock) {
-    // In a return block, examine the function live-out regs.
-    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
-         E = MRI.liveout_end(); I != E; ++I) {
-      for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
-        unsigned Reg = *AI;
-        State->UnionGroups(Reg, 0);
-        KillIndices[Reg] = BB->size();
-        DefIndices[Reg] = ~0u;
-      }
-    }
-  }
-
-  // In a non-return block, examine the live-in regs of all successors.
-  // Note a return block can have successors if the return instruction is
-  // predicated.
+  // Examine the live-in regs of all successors.
   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
          SE = BB->succ_end(); SI != SE; ++SI)
     for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
@@ -616,7 +599,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   const TargetRegisterClass *SuperRC =
     TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
 
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
   if (Order.empty()) {
     DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
     return false;
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 706778485429..6683630fba6d 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -18,15 +18,15 @@
 #define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <map>
 
 namespace llvm {
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 7cde136c5ef3..3fa1f8ff206c 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -14,10 +14,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "regalloc"
 #include "AllocationOrder.h"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -25,56 +29,24 @@ using namespace llvm;
 AllocationOrder::AllocationOrder(unsigned VirtReg,
                                  const VirtRegMap &VRM,
                                  const RegisterClassInfo &RegClassInfo)
-  : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
-  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
-  std::pair<unsigned, unsigned> HintPair =
-    VRM.getRegInfo().getRegAllocationHint(VirtReg);
-  const MachineRegisterInfo &MRI = VRM.getRegInfo();
-
-  // HintPair.second is a register, phys or virt.
-  Hint = HintPair.second;
-
-  // Translate to physreg, or 0 if not assigned yet.
-  if (TargetRegisterInfo::isVirtualRegister(Hint))
-    Hint = VRM.getPhys(Hint);
-
-  // The first hint pair component indicates a target-specific hint.
-  if (HintPair.first) {
-    const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
-    // The remaining allocation order may depend on the hint.
-    ArrayRef<uint16_t> Order =
-      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
-                                VRM.getMachineFunction());
-    if (Order.empty())
-      return;
-
-    // Copy the allocation order with reserved registers removed.
-    OwnedBegin = true;
-    unsigned *P = new unsigned[Order.size()];
-    Begin = P;
-    for (unsigned i = 0; i != Order.size(); ++i)
-      if (!MRI.isReserved(Order[i]))
-        *P++ = Order[i];
-    End = P;
-
-    // Target-dependent hints require resolution.
-    Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
-                                   VRM.getMachineFunction());
-  } else {
-    // If there is no hint or just a normal hint, use the cached allocation
-    // order from RegisterClassInfo.
-    ArrayRef<unsigned> O = RCI.getOrder(RC);
-    Begin = O.begin();
-    End = O.end();
-  }
-
-  // The hint must be a valid physreg for allocation.
-  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || MRI.isReserved(Hint)))
-    Hint = 0;
-}
-
-AllocationOrder::~AllocationOrder() {
-  if (OwnedBegin)
-    delete [] Begin;
+  : Pos(0) {
+  const MachineFunction &MF = VRM.getMachineFunction();
+  const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+  Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+  TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+  rewind();
+
+  DEBUG({
+    if (!Hints.empty()) {
+      dbgs() << "hints:";
+      for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+        dbgs() << ' ' << PrintReg(Hints[I], TRI);
+      dbgs() << '\n';
+    }
+  });
+#ifndef NDEBUG
+  for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+    assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+           "Target hint is outside allocation order.");
+#endif
 }
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 0ce7e0c3b5f6..aed461a7ed02 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -17,21 +17,21 @@
 #ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
 #define LLVM_CODEGEN_ALLOCATIONORDER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
 namespace llvm {
 
 class RegisterClassInfo;
 class VirtRegMap;
 
 class AllocationOrder {
-  const unsigned *Begin;
-  const unsigned *End;
-  const unsigned *Pos;
-  const RegisterClassInfo &RCI;
-  unsigned Hint;
-  bool OwnedBegin;
-public:
+  SmallVector<MCPhysReg, 16> Hints;
+  ArrayRef<MCPhysReg> Order;
+  int Pos;
 
-  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+public:
+  /// Create a new AllocationOrder for VirtReg.
   /// @param VirtReg      Virtual register to allocate for.
   /// @param VRM          Virtual register map for function.
   /// @param RegClassInfo Information about reserved and allocatable registers.
@@ -39,32 +39,45 @@ public:
                   const VirtRegMap &VRM,
                   const RegisterClassInfo &RegClassInfo);
 
-  ~AllocationOrder();
+  /// Get the allocation order without reordered hints.
+  ArrayRef<MCPhysReg> getOrder() const { return Order; }
 
-  /// next - Return the next physical register in the allocation order, or 0.
-  /// It is safe to call next again after it returned 0.
-  /// It will keep returning 0 until rewind() is called.
+  /// Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next() again after it returned 0, it will keep
+  /// returning 0 until rewind() is called.
   unsigned next() {
-    // First take the hint.
-    if (!Pos) {
-      Pos = Begin;
-      if (Hint)
-        return Hint;
-    }
-    // Then look at the order from TRI.
-    while (Pos != End) {
-      unsigned Reg = *Pos++;
-      if (Reg != Hint)
+    if (Pos < 0)
+      return Hints.end()[Pos++];
+    while (Pos < int(Order.size())) {
+      unsigned Reg = Order[Pos++];
+      if (!isHint(Reg))
         return Reg;
     }
     return 0;
   }
 
-  /// rewind - Start over from the beginning.
-  void rewind() { Pos = 0; }
+  /// As next(), but allow duplicates to be returned, and stop before the
+  /// Limit'th register in the RegisterClassInfo allocation order.
+  ///
+  /// This can produce more than Limit registers if there are hints.
+  unsigned nextWithDups(unsigned Limit) {
+    if (Pos < 0)
+      return Hints.end()[Pos++];
+    if (Pos < int(Limit))
+      return Order[Pos++];
+    return 0;
+  }
+
+  /// Start over from the beginning.
+  void rewind() { Pos = -int(Hints.size()); }
 
-  /// isHint - Return true if PhysReg is a preferred register.
-  bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+  /// Return true if the last register returned from next() was a preferred register.
+  bool isHint() const { return Pos <= 0; }
+
+  /// Return true if PhysReg is a preferred register.
+  bool isHint(unsigned PhysReg) const {
+    return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 5162ad762e73..dd7282c0ad97 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -13,19 +13,17 @@
 
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
 /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
@@ -266,8 +264,7 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
-                                const TargetLowering &TLI) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
   const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
@@ -313,14 +310,16 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
-  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
-      AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
+  AttributeSet CallerAttrs = F->getAttributes();
+  if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+        removeAttribute(Attribute::NoAlias) !=
+      AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex).
+        removeAttribute(Attribute::NoAlias))
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
-  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
-      CallerRetAttr.hasAttribute(Attributes::SExt))
+  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
     return false;
 
   // Otherwise, make sure the unmodified return value of I is the return value.
@@ -348,23 +347,3 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   
   return true;
 }
-
-bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
-                                SDValue &Chain, const TargetLowering &TLI) {
-  const Function *F = DAG.getMachineFunction().getFunction();
-
-  // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
-  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (AttrBuilder(CallerRetAttr)
-      .removeAttribute(Attributes::NoAlias).hasAttributes())
-    return false;
-
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
-      CallerRetAttr.hasAttribute(Attributes::SExt))
-    return false;
-
-  // Check if the only use is a function return node.
-  return TLI.isUsedByReturnOnly(Node, Chain);
-}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index b2ebf04e518f..188047d94f48 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -12,32 +12,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-cl::opt<bool>
+static cl::opt<bool>
 EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
   cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
   cl::init(false));
@@ -69,24 +68,69 @@ void ARMException::EndFunction() {
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                   Asm->getFunctionNumber()));
 
-    // Emit references to personality.
-    if (const Function * Personality =
-        MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
-      MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
-      Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
-      Asm->OutStreamer.EmitPersonality(PerSym);
-    }
-
     if (EnableARMEHABIDescriptors) {
       // Map all labels and get rid of any dead landing pads.
       MMI->TidyLandingPads();
 
-      Asm->OutStreamer.EmitHandlerData();
+      if (!MMI->getLandingPads().empty()) {
+        // Emit references to personality.
+        if (const Function * Personality =
+            MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+          MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+          Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+          Asm->OutStreamer.EmitPersonality(PerSym);
+        }
+
+        // Emit .handlerdata directive.
+        Asm->OutStreamer.EmitHandlerData();
 
-      // Emit actual exception table
-      EmitExceptionTable();
+        // Emit actual exception table
+        EmitExceptionTable();
+      }
     }
   }
 
   Asm->OutStreamer.EmitFnEnd();
 }
+
+void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+  int Entry = 0;
+  // Emit the Catch TypeInfos.
+  if (VerboseAsm && !TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+    Asm->OutStreamer.AddBlankLine();
+    Entry = TypeInfos.size();
+  }
+
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
+         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+    const GlobalVariable *GV = *I;
+    if (VerboseAsm)
+      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+    Asm->EmitTTypeReference(GV, TTypeEncoding);
+  }
+
+  // Emit the Exception Specifications.
+  if (VerboseAsm && !FilterIds.empty()) {
+    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+    Asm->OutStreamer.AddBlankLine();
+    Entry = 0;
+  }
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+    unsigned TypeID = *I;
+    if (VerboseAsm) {
+      --Entry;
+      if (TypeID != 0)
+        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+    }
+
+    Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]),
+                            TTypeEncoding);
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d74a70362a2a..d4a745d985e8 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -15,8 +15,10 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "DwarfDebug.h"
 #include "DwarfException.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,7 +26,10 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -32,20 +37,16 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Timer.h"
 using namespace llvm;
 
 static const char *DWARFGroupName = "DWARF Emission";
@@ -90,9 +91,6 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
   return NumBits;
 }
 
-
-
-
 AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
   : MachineFunctionPass(ID),
     TM(tm), MAI(tm.getMCAsmInfo()),
@@ -130,7 +128,6 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return TM.getTargetLowering()->getObjFileLowering();
 }
 
-
 /// getDataLayout - Return information about data layout.
 const DataLayout &AsmPrinter::getDataLayout() const {
   return *TM.getDataLayout();
@@ -153,6 +150,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
+  OutStreamer.InitStreamer();
+
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MMI->AnalyzeModule(M);
 
@@ -312,8 +311,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       return;
     }
 
-    if (Align == 1 ||
-        MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+    // Use .lcomm only if it supports user-specified alignment.
+    // Otherwise, while it would still be correct to use .lcomm in some
+    // cases (e.g. when Align == 1), the external assembler might enfore
+    // some -unknown- default alignment behavior, which could cause
+    // spurious differences between external and integrated assembler.
+    // Prefer to simply fall back to .local / .comm in this case.
+    if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
       // .lcomm _foo, 42
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
       return;
@@ -387,9 +391,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     //   - pointer to mangled symbol above with initializer
     unsigned PtrSize = TD->getPointerSizeInBits()/8;
     OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
-                          PtrSize, 0);
-    OutStreamer.EmitIntValue(0, PtrSize, 0);
-    OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+				PtrSize);
+    OutStreamer.EmitIntValue(0, PtrSize);
+    OutStreamer.EmitSymbolValue(MangSym, PtrSize);
 
     OutStreamer.AddBlankLine();
     return;
@@ -943,6 +947,8 @@ bool AsmPrinter::doFinalization(Module &M) {
   MMI = 0;
 
   OutStreamer.Finish();
+  OutStreamer.reset();
+
   return false;
 }
 
@@ -1034,7 +1040,7 @@ void AsmPrinter::EmitConstantPool() {
       // Emit inter-object padding for alignment.
       unsigned AlignMask = CPE.getAlignment() - 1;
       unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
-      OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+      OutStreamer.EmitZeros(NewOffset - Offset);
 
       Type *Ty = CPE.getType();
       Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
@@ -1197,7 +1203,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   assert(Value && "Unknown entry kind!");
 
   unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
-  OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+  OutStreamer.EmitValue(Value, EntrySize);
 }
 
 
@@ -1320,19 +1326,19 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
 /// EmitInt8 - Emit a byte directive and value.
 ///
 void AsmPrinter::EmitInt8(int Value) const {
-  OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+  OutStreamer.EmitIntValue(Value, 1);
 }
 
 /// EmitInt16 - Emit a short directive and value.
 ///
 void AsmPrinter::EmitInt16(int Value) const {
-  OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+  OutStreamer.EmitIntValue(Value, 2);
 }
 
 /// EmitInt32 - Emit a long directive and value.
 ///
 void AsmPrinter::EmitInt32(int Value) const {
-  OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+  OutStreamer.EmitIntValue(Value, 4);
 }
 
 /// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
@@ -1347,14 +1353,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
                             OutContext);
 
   if (!MAI->hasSetDirective()) {
-    OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+    OutStreamer.EmitValue(Diff, Size);
     return;
   }
 
   // Otherwise, emit with .set (aka assignment).
   MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
   OutStreamer.EmitAssignment(SetLabel, Diff);
-  OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
+  OutStreamer.EmitSymbolValue(SetLabel, Size);
 }
 
 /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
@@ -1378,12 +1384,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
                             OutContext);
 
   if (!MAI->hasSetDirective())
-    OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+    OutStreamer.EmitValue(Diff, 4);
   else {
     // Otherwise, emit with .set (aka assignment).
     MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
     OutStreamer.EmitAssignment(SetLabel, Diff);
-    OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+    OutStreamer.EmitSymbolValue(SetLabel, 4);
   }
 }
 
@@ -1401,7 +1407,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
                                    MCConstantExpr::Create(Offset, OutContext),
                                    OutContext);
 
-  OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/);
+  OutStreamer.EmitValue(Expr, Size);
 }
 
 
@@ -1472,19 +1478,14 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
   case Instruction::GetElementPtr: {
     const DataLayout &TD = *AP.TM.getDataLayout();
     // Generate a symbolic expression for the byte address
-    const Constant *PtrVal = CE->getOperand(0);
-    SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
-    int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+    APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+    cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
 
     const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
-    if (Offset == 0)
+    if (!OffsetAI)
       return Base;
 
-    // Truncate/sext the offset to the pointer size.
-    unsigned Width = TD.getPointerSizeInBits();
-    if (Width < 64)
-      Offset = SignExtend64(Offset, Width);
-
+    int64_t Offset = OffsetAI.getSExtValue();
     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
                                    Ctx);
   }
@@ -1614,7 +1615,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     }
     return Byte;
   }
-  
+
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
     return isRepeatedByteSequence(CDS);
 
@@ -1623,7 +1624,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
 
 static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
                                              unsigned AddrSpace,AsmPrinter &AP){
-  
+
   // See if we can aggregate this into a .fill, if so, emit it as such.
   int Value = isRepeatedByteSequence(CDS, AP.TM);
   if (Value != -1) {
@@ -1632,7 +1633,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
     if (Bytes > 1)
       return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
   }
-  
+
   // If this can be emitted with .ascii/.asciz, emit it as such.
   if (CDS->isString())
     return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
@@ -1656,7 +1657,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
         float F;
         uint32_t I;
       };
-      
+
       F = CDS->getElementAsFloat(i);
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
@@ -1669,7 +1670,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
         double F;
         uint64_t I;
       };
-      
+
       F = CDS->getElementAsDouble(i);
       if (AP.isVerbose())
         AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
@@ -1745,87 +1746,48 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS,
 
 static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
-  if (CFP->getType()->isHalfTy()) {
-    if (AP.isVerbose()) {
-      SmallString<10> Str;
-      CFP->getValueAPF().toString(Str);
-      AP.OutStreamer.GetCommentOS() << "half " << Str << '\n';
-    }
-    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace);
-    return;
-  }
-
-  if (CFP->getType()->isFloatTy()) {
-    if (AP.isVerbose()) {
-      float Val = CFP->getValueAPF().convertToFloat();
-      uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'
-                                    << " (" << format("0x%x", IntVal) << ")\n";
-    }
-    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
-    return;
-  }
+  APInt API = CFP->getValueAPF().bitcastToAPInt();
 
-  // FP Constants are printed as integer constants to avoid losing
-  // precision.
-  if (CFP->getType()->isDoubleTy()) {
-    if (AP.isVerbose()) {
-      double Val = CFP->getValueAPF().convertToDouble();
-      uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'
-                                    << " (" << format("0x%lx", IntVal) << ")\n";
-    }
+  // First print a comment with what we think the original floating-point value
+  // should have been.
+  if (AP.isVerbose()) {
+    SmallString<8> StrVal;
+    CFP->getValueAPF().toString(StrVal);
 
-    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
-    return;
+    CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+    AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
   }
 
-  if (CFP->getType()->isX86_FP80Ty()) {
-    // all long double variants are printed as hex
-    // API needed to prevent premature destruction
-    APInt API = CFP->getValueAPF().bitcastToAPInt();
-    const uint64_t *p = API.getRawData();
-    if (AP.isVerbose()) {
-      // Convert to double so we can print the approximate val as a comment.
-      APFloat DoubleVal = CFP->getValueAPF();
-      bool ignored;
-      DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                        &ignored);
-      AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
-        << DoubleVal.convertToDouble() << '\n';
-    }
+  // Now iterate through the APInt chunks, emitting them in endian-correct
+  // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
+  // floats).
+  unsigned NumBytes = API.getBitWidth() / 8;
+  unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
+  const uint64_t *p = API.getRawData();
 
-    if (AP.TM.getDataLayout()->isBigEndian()) {
-      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
-      AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
-    } else {
-      AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
-      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
-    }
+  // PPC's long double has odd notions of endianness compared to how LLVM
+  // handles it: p[0] goes first for *big* endian on PPC.
+  if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+    int Chunk = API.getNumWords() - 1;
 
-    // Emit the tail padding for the long double.
-    const DataLayout &TD = *AP.TM.getDataLayout();
-    AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
-                             TD.getTypeStoreSize(CFP->getType()), AddrSpace);
-    return;
-  }
+    if (TrailingBytes)
+      AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace);
 
-  assert(CFP->getType()->isPPC_FP128Ty() &&
-         "Floating point constant type not handled");
-  // All long double variants are printed as hex
-  // API needed to prevent premature destruction.
-  APInt API = CFP->getValueAPF().bitcastToAPInt();
-  const uint64_t *p = API.getRawData();
-  if (AP.TM.getDataLayout()->isBigEndian()) {
-    AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
-    AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+    for (; Chunk >= 0; --Chunk)
+      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
   } else {
-    AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
-    AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+    unsigned Chunk;
+    for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
+      AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace);
+
+    if (TrailingBytes)
+      AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace);
   }
+
+  // Emit the tail padding for the long double.
+  const DataLayout &TD = *AP.TM.getDataLayout();
+  AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+                           TD.getTypeStoreSize(CFP->getType()), AddrSpace);
 }
 
 static void emitGlobalConstantLargeInt(const ConstantInt *CI,
@@ -1878,7 +1840,7 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
 
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
     return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
-  
+
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
     return emitGlobalConstantArray(CVA, AddrSpace, AP);
 
@@ -1900,10 +1862,10 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
         return emitGlobalConstantImpl(New, AddrSpace, AP);
     }
   }
-  
+
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
     return emitGlobalConstantVector(V, AddrSpace, AP);
-    
+
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
   AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d94e1fe61bf7..156acace553d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -13,19 +13,19 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -46,19 +46,19 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
 
-  OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo);
+  OutStreamer.EmitULEB128IntValue(Value, PadTo);
 }
 
 /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
 void AsmPrinter::EmitCFAByte(unsigned Val) const {
   if (isVerbose()) {
     if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
-      OutStreamer.AddComment("DW_CFA_offset + Reg (" + 
+      OutStreamer.AddComment("DW_CFA_offset + Reg (" +
                              Twine(Val-dwarf::DW_CFA_offset) + ")");
     else
       OutStreamer.AddComment(dwarf::CallFrameString(Val));
   }
-  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+  OutStreamer.EmitIntValue(Val, 1);
 }
 
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
@@ -83,7 +83,7 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
   case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
     return "indirect pcrel sdata8";
   }
-  
+
   return "<unknown encoding>";
 }
 
@@ -101,15 +101,15 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
       OutStreamer.AddComment(Twine("Encoding = ") +
                              DecodeDWARFEncoding(Val));
   }
-  
-  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+
+  OutStreamer.EmitIntValue(Val, 1);
 }
 
 /// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
 unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   if (Encoding == dwarf::DW_EH_PE_omit)
     return 0;
-  
+
   switch (Encoding & 0x07) {
   default: llvm_unreachable("Invalid encoded value.");
   case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
@@ -119,20 +119,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   }
 }
 
-void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
-  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  
-  const MCExpr *Exp =
-    TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
-  OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
-}
+void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+                                    unsigned Encoding) const {
+  if (GV) {
+    const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
-void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
-  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  
-  const MCExpr *Exp =
-    TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
-  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+    const MCExpr *Exp =
+      TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+    OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+  } else
+    OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
 }
 
 /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
@@ -149,22 +145,22 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
     OutStreamer.EmitCOFFSecRel32(Label);
     return;
   }
-  
+
   // Get the section that we're referring to, based on SectionLabel.
   const MCSection &Section = SectionLabel->getSection();
-  
+
   // If Label has already been emitted, verify that it is in the same section as
   // section label for sanity.
   assert((!Label->isInSection() || &Label->getSection() == &Section) &&
          "Section offset using wrong section base for label");
-  
+
   // If the section in question will end up with an address of 0 anyway, we can
   // just emit an absolute reference to save a relocation.
   if (Section.isBaseAddressKnownZero()) {
-    OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+    OutStreamer.EmitSymbolValue(Label, 4);
     return;
   }
-  
+
   // Otherwise, emit it as a label difference from the start of the section.
   EmitLabelDifference(Label, SectionLabel, 4);
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 50f0fc30a07c..abfa330fa29d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -13,26 +13,26 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 58fe2ed9d357..8d15c069c6f8 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter
   DwarfCompileUnit.cpp
   DwarfDebug.cpp
   DwarfException.cpp
+  ErlangGCPrinter.cpp
   OcamlGCPrinter.cpp
   Win64Exception.cpp
   )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4d73b3c22261..57e0acda890f 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -8,16 +8,16 @@
 //===----------------------------------------------------------------------===//
 //
 // Data structures for DWARF info entries.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #include "DIE.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -112,6 +112,17 @@ DIE::~DIE() {
     delete Children[i];
 }
 
+/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
+DIE *DIE::getCompileUnit() const{
+  DIE *p = getParent();
+  while (p) {
+    if (p->getTag() == dwarf::DW_TAG_compile_unit)
+      return p;
+    p = p->getParent();
+  }
+  llvm_unreachable("We should not have orphaned DIEs.");
+}
+
 #ifndef NDEBUG
 void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
@@ -133,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
     O << "Size: " << Size << "\n";
   }
 
-  const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+  const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData();
 
   IndentCount += 2;
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -193,17 +204,20 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_data1: Size = 1; break;
   case dwarf::DW_FORM_ref2:  // Fall thru
   case dwarf::DW_FORM_data2: Size = 2; break;
+  case dwarf::DW_FORM_sec_offset: // Fall thru
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: Size = 4; break;
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: Size = 8; break;
+  case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
+  case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   case dwarf::DW_FORM_addr:
     Size = Asm->getDataLayout().getPointerSize(); break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
-  Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+  Asm->OutStreamer.EmitIntValue(Integer, Size);
 }
 
 /// SizeOf - Determine size of integer value in bytes.
@@ -216,10 +230,13 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_data1: return sizeof(int8_t);
   case dwarf::DW_FORM_ref2:  // Fall thru
   case dwarf::DW_FORM_data2: return sizeof(int16_t);
+  case dwarf::DW_FORM_sec_offset: // Fall thru
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
+  case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer);
+  case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
   case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
@@ -241,13 +258,14 @@ void DIEInteger::print(raw_ostream &O) {
 /// EmitValue - Emit label value.
 ///
 void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
-  AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
+  AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form));
 }
 
 /// SizeOf - Determine size of label value in bytes.
 ///
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
   return AP->getDataLayout().getPointerSize();
 }
@@ -306,7 +324,7 @@ void DIEEntry::print(raw_ostream &O) {
 ///
 unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
   if (!Size) {
-    const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+    const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
       Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
   }
@@ -325,7 +343,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
   }
 
-  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
   for (unsigned i = 0, N = Values.size(); i < N; ++i)
     Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 28a96f3b2b65..c332aa2a7db6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // Data structures for DWARF info entries.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef CODEGEN_ASMPRINTER_DIE_H__
@@ -66,7 +66,7 @@ namespace llvm {
 
     /// Data - Raw data bytes for abbreviation.
     ///
-    SmallVector<DIEAbbrevData, 8> Data;
+    SmallVector<DIEAbbrevData, 12> Data;
 
   public:
     DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
@@ -75,7 +75,7 @@ namespace llvm {
     uint16_t getTag() const { return Tag; }
     unsigned getNumber() const { return Number; }
     uint16_t getChildrenFlag() const { return ChildrenFlag; }
-    const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+    const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
     void setTag(uint16_t T) { Tag = T; }
     void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
     void setNumber(unsigned N) { Number = N; }
@@ -108,7 +108,7 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   /// DIE - A structured debug information entry.  Has an abbreviation which
-  /// describes it's organization.
+  /// describes its organization.
   class DIEValue;
 
   class DIE {
@@ -131,9 +131,9 @@ namespace llvm {
 
     DIE *Parent;
 
-    /// Attributes values.
+    /// Attribute values.
     ///
-    SmallVector<DIEValue*, 32> Values;
+    SmallVector<DIEValue*, 12> Values;
 
     // Private data for print()
     mutable unsigned IndentCount;
@@ -150,12 +150,15 @@ namespace llvm {
     unsigned getOffset() const { return Offset; }
     unsigned getSize() const { return Size; }
     const std::vector<DIE *> &getChildren() const { return Children; }
-    const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+    const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
     DIE *getParent() const { return Parent; }
+    /// Climb up the parent chain to get the compile unit DIE this DIE belongs
+    /// to.
+    DIE *getCompileUnit() const;
     void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
     void setOffset(unsigned O) { Offset = O; }
     void setSize(unsigned S) { Size = S; }
-    
+
     /// addValue - Add a value and attributes to a DIE.
     ///
     void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
@@ -232,9 +235,10 @@ namespace llvm {
     ///
     static unsigned BestForm(bool IsSigned, uint64_t Int) {
       if (IsSigned) {
-        if ((char)Int == (signed)Int)   return dwarf::DW_FORM_data1;
-        if ((short)Int == (signed)Int)  return dwarf::DW_FORM_data2;
-        if ((int)Int == (signed)Int)    return dwarf::DW_FORM_data4;
+        const int64_t SignedInt = Int;
+        if ((char)Int == SignedInt)     return dwarf::DW_FORM_data1;
+        if ((short)Int == SignedInt)    return dwarf::DW_FORM_data2;
+        if ((int)Int == SignedInt)      return dwarf::DW_FORM_data4;
       } else {
         if ((unsigned char)Int == Int)  return dwarf::DW_FORM_data1;
         if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 05e0f2fb63b3..f58ec9b4bf46 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfAccelTable.h"
-#include "DwarfDebug.h"
 #include "DIE.h"
-#include "llvm/ADT/Twine.h"
+#include "DwarfDebug.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
@@ -32,7 +32,7 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
   case eAtomTypeTag: return "eAtomTypeTag";
   case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
   case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
-  } 
+  }
   llvm_unreachable("invalid AtomType!");
 }
 
@@ -155,7 +155,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
            HE = Buckets[i].end(); HI != HE; ++HI) {
       Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
       Asm->EmitInt32((*HI)->HashValue);
-    } 
+    }
   }
 }
 
@@ -173,7 +173,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
         MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
                                 MCSymbolRefExpr::Create(SecBegin, Context),
                                 Context);
-      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t));
     }
   }
 }
@@ -181,7 +181,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
 // Walk through the buckets and emit the full data for each element in
 // the bucket. For the string case emit the dies and the various offsets.
 // Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
   uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
@@ -190,7 +190,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
       Asm->OutStreamer.EmitLabel((*HI)->Sym);
       Asm->OutStreamer.AddComment((*HI)->Str);
       Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
-                             D->getStringPool());
+                             D->getStringPoolSym());
       Asm->OutStreamer.AddComment("Num DIEs");
       Asm->EmitInt32((*HI)->Data.size());
       for (ArrayRef<HashDataContents*>::const_iterator
@@ -215,7 +215,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
 
 // Emit the entire data structure to the output file.
 void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
-                           DwarfDebug *D) {
+                           DwarfUnits *D) {
   // Emit the header.
   EmitHeader(Asm);
 
@@ -258,7 +258,7 @@ void DwarfAccelTable::print(raw_ostream &O) {
     for (std::vector<HashData*>::const_iterator
            DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
       (*DI)->print(O);
-  
+
 
 }
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 92d1bbe4f7e8..9915bcaa9b69 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -14,18 +14,18 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
 #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
 
-#include "llvm/ADT/StringMap.h"
+#include "DIE.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
-#include "DIE.h"
-#include <vector>
 #include <map>
+#include <vector>
 
 // The dwarf accelerator tables are an indirect hash table optimized
 // for null lookup rather than access to known data. They are output into
@@ -51,7 +51,7 @@
 // section contains all of the 32-bit hash values in contiguous memory, and
 // the offsets contain the offset into the data area for the particular
 // hash.
-// 
+//
 // For a lookup example, we could hash a function name and take it modulo the
 // number of buckets giving us our bucket. From there we take the bucket value
 // as an index into the hashes table and look at each successive hash as long
@@ -63,8 +63,8 @@ namespace llvm {
 
 class AsmPrinter;
 class DIE;
-class DwarfDebug;
-  
+class DwarfUnits;
+
 class DwarfAccelTable {
 
   enum HashFunctionType {
@@ -81,7 +81,7 @@ class DwarfAccelTable {
   // Helper function to compute the number of buckets needed based on
   // the number of unique hashes.
   void ComputeBucketCount (void);
-  
+
   struct TableHeader {
     uint32_t   magic;           // 'HASH' magic value to allow endian detection
     uint16_t   version;         // Version number.
@@ -94,7 +94,7 @@ class DwarfAccelTable {
     // Also written to disk is the implementation specific header data.
 
     static const uint32_t MagicHash = 0x48415348;
-    
+
     TableHeader (uint32_t data_len) :
       magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
       bucket_count (0), hashes_count (0), header_data_len (data_len)
@@ -123,7 +123,7 @@ public:
   //
   // uint32_t die_offset_base
   // uint32_t atom_count
-  // atom_count Atoms  
+  // atom_count Atoms
   enum AtomType {
     eAtomTypeNULL       = 0u,
     eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
@@ -138,12 +138,12 @@ public:
 
   enum TypeFlags {
     eTypeFlagClassMask = 0x0000000fu,
-    
+
     // Always set for C++, only set for ObjC if this is the
     // @implementation for a class.
     eTypeFlagClassIsImplementation  = ( 1u << 1 )
-  };  
-  
+  };
+
   // Make these public so that they can be used as a general interface to
   // the class.
   struct Atom {
@@ -245,7 +245,7 @@ private:
   void EmitBuckets(AsmPrinter *);
   void EmitHashes(AsmPrinter *);
   void EmitOffsets(AsmPrinter *, MCSymbol *);
-  void EmitData(AsmPrinter *, DwarfDebug *D);
+  void EmitData(AsmPrinter *, DwarfUnits *D);
 
   // Allocator for HashData and HashDataContents.
   BumpPtrAllocator Allocator;
@@ -265,14 +265,14 @@ private:
   typedef std::vector<HashList> BucketList;
   BucketList Buckets;
   HashList Hashes;
-  
+
   // Public Implementation
  public:
   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
   ~DwarfAccelTable();
   void AddName(StringRef, DIE*, char = 0);
   void FinalizeTable(AsmPrinter *, const char *);
-  void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+  void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
 #ifndef NDEBUG
   void print(raw_ostream &O);
   void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 4fdd5ca25221..fec5cedc684b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -12,31 +12,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
@@ -122,8 +122,9 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
   const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
   Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
 
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitDebugLabel
+    (Asm->GetTempSymbol("eh_func_begin",
+                        Asm->getFunctionNumber()));
 
   // Provide LSDA information.
   if (!shouldEmitLSDA)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 2b07dda31ffe..f9b6f9472141 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,28 +13,29 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
-#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
+#include "DwarfAccelTable.h"
 #include "DwarfDebug.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/DIBuilder.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
 /// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
-                         DwarfDebug *DW)
-  : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
+                         DwarfDebug *DW, DwarfUnits *DWU)
+  : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU),
+    IndexTyDie(0), DebugInfoOffset(0) {
   DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
 }
 
@@ -51,6 +52,50 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
   return Value;
 }
 
+/// getDefaultLowerBound - Return the default lower bound for an array. If the
+/// DWARF version doesn't handle the language, return -1.
+int64_t CompileUnit::getDefaultLowerBound() const {
+  switch (Language) {
+  default:
+    break;
+
+  case dwarf::DW_LANG_C89:
+  case dwarf::DW_LANG_C99:
+  case dwarf::DW_LANG_C:
+  case dwarf::DW_LANG_C_plus_plus:
+  case dwarf::DW_LANG_ObjC:
+  case dwarf::DW_LANG_ObjC_plus_plus:
+    return 0;
+
+  case dwarf::DW_LANG_Fortran77:
+  case dwarf::DW_LANG_Fortran90:
+  case dwarf::DW_LANG_Fortran95:
+    return 1;
+
+  // The languages below have valid values only if the DWARF version >= 4.
+  case dwarf::DW_LANG_Java:
+  case dwarf::DW_LANG_Python:
+  case dwarf::DW_LANG_UPC:
+  case dwarf::DW_LANG_D:
+    if (dwarf::DWARF_VERSION >= 4)
+      return 0;
+    break;
+
+  case dwarf::DW_LANG_Ada83:
+  case dwarf::DW_LANG_Ada95:
+  case dwarf::DW_LANG_Cobol74:
+  case dwarf::DW_LANG_Cobol85:
+  case dwarf::DW_LANG_Modula2:
+  case dwarf::DW_LANG_Pascal83:
+  case dwarf::DW_LANG_PLI:
+    if (dwarf::DWARF_VERSION >= 4)
+      return 1;
+    break;
+  }
+
+  return -1;
+}
+
 /// addFlag - Add a flag that is true.
 void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
   if (!DD->useDarwinGDBCompat())
@@ -81,14 +126,37 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
 
 /// addString - Add a string attribute data and value. We always emit a
 /// reference to the string pool instead of immediate strings so that DIEs have
-/// more predictable sizes.
+/// more predictable sizes. In the case of split dwarf we emit an index
+/// into another table which gets us the static offset into the string
+/// table.
 void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
-  MCSymbol *Symb = DD->getStringPoolEntry(String);
+  if (!DD->useSplitDwarf()) {
+    MCSymbol *Symb = DU->getStringPoolEntry(String);
+    DIEValue *Value;
+    if (Asm->needsRelocationsForDwarfStringPool())
+      Value = new (DIEValueAllocator) DIELabel(Symb);
+    else {
+      MCSymbol *StringPool = DU->getStringPoolSym();
+      Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+    }
+    Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+  } else {
+    unsigned idx = DU->getStringPoolIndex(String);
+    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+    Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value);
+  }
+}
+
+/// addLocalString - Add a string attribute data and value. This is guaranteed
+/// to be in the local string pool instead of indirected.
+void CompileUnit::addLocalString(DIE *Die, unsigned Attribute,
+                                 StringRef String) {
+  MCSymbol *Symb = DU->getStringPoolEntry(String);
   DIEValue *Value;
   if (Asm->needsRelocationsForDwarfStringPool())
     Value = new (DIEValueAllocator) DIELabel(Symb);
   else {
-    MCSymbol *StringPool = DD->getStringPool();
+    MCSymbol *StringPool = DU->getStringPoolSym();
     Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
   }
   Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
@@ -102,6 +170,42 @@ void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
   Die->addValue(Attribute, Form, Value);
 }
 
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute,
+                                  MCSymbol *Label) {
+  if (!DD->useSplitDwarf()) {
+    if (Label != NULL) {
+      DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+      Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+    } else {
+      DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
+      Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
+    }
+  } else {
+    unsigned idx = DU->getAddrPoolIndex(Label);
+    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+    Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+  }
+}
+
+/// addOpAddress - Add a dwarf op address data and value using the
+/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) {
+
+  if (!DD->useSplitDwarf()) {
+    addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    addLabel(Die, 0, dwarf::DW_FORM_udata, Sym);
+  } else {
+    unsigned idx = DU->getAddrPoolIndex(Sym);
+    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+    addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+    Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value);
+  }
+}
+
 /// addDelta - Add a label delta attribute data and value.
 ///
 void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
@@ -132,12 +236,13 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
   // Verify variable.
   if (!V.Verify())
     return;
-  
+
   unsigned Line = V.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
-                                            V.getContext().getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
+                                            V.getContext().getDirectory(),
+                                            getUniqueID());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -153,7 +258,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
   unsigned Line = G.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(),
+                                            getUniqueID());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -171,8 +277,8 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
   if (Line == 0)
     return;
 
-  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
-                                            SP.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
+                                            SP.getDirectory(), getUniqueID());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -188,8 +294,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
   unsigned Line = Ty.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
-                                            Ty.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
+                                            Ty.getDirectory(), getUniqueID());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -206,8 +312,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
   if (Line == 0)
     return;
   DIFile File = Ty.getFile();
-  unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
-                                            File.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
+                                            File.getDirectory(), getUniqueID());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -225,15 +331,16 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
     return;
   StringRef FN = NS.getFilename();
 
-  unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+  unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(),
+                                            getUniqueID());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// addVariableAddress - Add DW_AT_location attribute for a 
+/// addVariableAddress - Add DW_AT_location attribute for a
 /// DbgVariable based on provided MachineLocation.
-void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, 
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
                                      MachineLocation Location) {
   if (DV->variableHasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
@@ -492,7 +599,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
     case 64: Form = dwarf::DW_FORM_data8; break;
     default: break;
   }
-  SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) 
+  SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
     : addUInt(Block, 0, Form, MO.getImm());
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
@@ -524,10 +631,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   return true;
 }
 
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+  return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false);
+}
+
 /// addConstantValue - Add constant value entry in variable DIE.
 bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
                                    bool Unsigned) {
-  unsigned CIBitWidth = CI->getBitWidth();
+  return addConstantValue(Die, CI->getValue(), Unsigned);
+}
+
+// addConstantValue - Add constant value entry in variable DIE.
+bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
+                                   bool Unsigned) {
+  unsigned CIBitWidth = Val.getBitWidth();
   if (CIBitWidth <= 64) {
     unsigned form = 0;
     switch (CIBitWidth) {
@@ -535,20 +653,19 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
     case 16: form = dwarf::DW_FORM_data2; break;
     case 32: form = dwarf::DW_FORM_data4; break;
     case 64: form = dwarf::DW_FORM_data8; break;
-    default: 
+    default:
       form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
     }
     if (Unsigned)
-      addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue());
+      addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue());
     else
-      addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue());
+      addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue());
     return true;
   }
 
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
   // Get the raw data form of the large APInt.
-  const APInt Val = CI->getValue();
   const uint64_t *Ptr64 = Val.getRawData();
 
   int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
@@ -582,18 +699,21 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
   }
 }
 
+/// getOrCreateContextDIE - Get context owner's DIE.
+DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
+  if (Context.isType())
+    return getOrCreateTypeDIE(DIType(Context));
+  else if (Context.isNameSpace())
+    return getOrCreateNameSpace(DINameSpace(Context));
+  else if (Context.isSubprogram())
+    return getOrCreateSubprogramDIE(DISubprogram(Context));
+  else 
+    return getDIE(Context);
+}
+
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
-  if (Context.isType()) {
-    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
-    ContextDIE->addChild(Die);
-  } else if (Context.isNameSpace()) {
-    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
-    ContextDIE->addChild(Die);
-  } else if (Context.isSubprogram()) {
-    DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context));
-    ContextDIE->addChild(Die);
-  } else if (DIE *ContextDIE = getDIE(Context))
+  if (DIE *ContextDIE = getOrCreateContextDIE(Context))
     ContextDIE->addChild(Die);
   else
     addDie(Die);
@@ -635,7 +755,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
                      DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
     addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
   }
-  
+
   addToContextOwner(TyDIE, Ty.getContext());
   return TyDIE;
 }
@@ -670,8 +790,8 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
 ///
 void CompileUnit::addGlobalType(DIType Ty) {
   DIDescriptor Context = Ty.getContext();
-  if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() 
-      && (!Context || Context.isCompileUnit() || Context.isFile() 
+  if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+      && (!Context || Context.isCompileUnit() || Context.isFile()
           || Context.isNameSpace()))
     if (DIEEntry *Entry = getDIEEntry(Ty))
       GlobalTypes[Ty.getName()] = Entry->getEntry();
@@ -739,6 +859,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
   if (Size && Tag != dwarf::DW_TAG_pointer_type)
     addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 
+  if (Tag == dwarf::DW_TAG_ptr_to_member_type)
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+                  getOrCreateTypeDIE(DTy.getClassType()));
   // Add source line info if available and TyDesc is not a forward declaration.
   if (!DTy.isForwardDecl())
     addSourceLine(&Buffer, DTy);
@@ -754,7 +877,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   Buffer.setTag(Tag);
 
   switch (Tag) {
-  case dwarf::DW_TAG_vector_type:
   case dwarf::DW_TAG_array_type:
     constructArrayTypeDIE(Buffer, &CTy);
     break;
@@ -794,6 +916,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       } else {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
         addType(Arg, DIType(Ty));
+        if (DIType(Ty).isArtificial())
+          addFlag(Arg, dwarf::DW_AT_artificial);
         Buffer.addChild(Arg);
       }
     }
@@ -830,27 +954,20 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         else if (SP.isPrivate())
           addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_private);
-        else 
+        else
           addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
         if (SP.isExplicit())
           addFlag(ElemDie, dwarf::DW_AT_explicit);
-      }
-      else if (Element.isVariable()) {
-        DIVariable DV(Element);
-        ElemDie = new DIE(dwarf::DW_TAG_variable);
-        addString(ElemDie, dwarf::DW_AT_name, DV.getName());
-        addType(ElemDie, DV.getType());
-        addFlag(ElemDie, dwarf::DW_AT_declaration);
-        addFlag(ElemDie, dwarf::DW_AT_external);
-        addSourceLine(ElemDie, DV);
       } else if (Element.isDerivedType()) {
         DIDerivedType DDTy(Element);
         if (DDTy.getTag() == dwarf::DW_TAG_friend) {
           ElemDie = new DIE(dwarf::DW_TAG_friend);
           addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
-        } else
-          ElemDie = createMemberDIE(DIDerivedType(Element));
+        } else if (DDTy.isStaticMember())
+          ElemDie = createStaticMemberDIE(DDTy);
+        else
+          ElemDie = createMemberDIE(DDTy);
       } else if (Element.isObjCProperty()) {
         DIObjCProperty Property(Element);
         ElemDie = new DIE(Property.getTag());
@@ -878,7 +995,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         if (Property.isNonAtomicObjCProperty())
           PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
         if (PropertyAttributes)
-          addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+          addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
                  PropertyAttributes);
 
         DIEEntry *Entry = getDIEEntry(Element);
@@ -951,7 +1068,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   }
 }
 
-/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
 /// for the given DITemplateTypeParameter.
 DIE *
 CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
@@ -965,7 +1082,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
   return ParamDIE;
 }
 
-/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
 /// for the given DITemplateValueParameter.
 DIE *
 CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
@@ -977,7 +1094,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
   addType(ParamDIE, TPV.getType());
   if (!TPV.getName().empty())
     addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
-  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
           TPV.getValue());
   return ParamDIE;
 }
@@ -1095,7 +1212,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
 
   if (!SP.isDefinition()) {
     addFlag(SPDie, dwarf::DW_AT_declaration);
-    
+
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled while processing variables.
     DICompositeType SPTy = SP.getType();
@@ -1166,39 +1283,56 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
   if (!GV.Verify())
     return;
 
-  DIE *VariableDIE = new DIE(GV.getTag());
-  // Add to map.
-  insertDIE(N, VariableDIE);
-
-  // Add name.
-  addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
-  StringRef LinkageName = GV.getLinkageName();
-  bool isGlobalVariable = GV.getGlobal() != NULL;
-  if (!LinkageName.empty() && isGlobalVariable)
-    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
-              getRealLinkageName(LinkageName));
-  // Add type.
+  DIDescriptor GVContext = GV.getContext();
   DIType GTy = GV.getType();
-  addType(VariableDIE, GTy);
 
-  // Add scoping info.
-  if (!GV.isLocalToUnit())
-    addFlag(VariableDIE, dwarf::DW_AT_external);
+  // If this is a static data member definition, some attributes belong
+  // to the declaration DIE.
+  DIE *VariableDIE = NULL;
+  bool IsStaticMember = false;
+  DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
+  if (SDMDecl.Verify()) {
+    assert(SDMDecl.isStaticMember() && "Expected static member decl");
+    // We need the declaration DIE that is in the static member's class.
+    // But that class might not exist in the DWARF yet.
+    // Creating the class will create the static member decl DIE.
+    getOrCreateContextDIE(SDMDecl.getContext());
+    VariableDIE = getDIE(SDMDecl);
+    assert(VariableDIE && "Static member decl has no context?");
+    IsStaticMember = true;
+  }
+
+  // If this is not a static data member definition, create the variable
+  // DIE and add the initial set of attributes to it.
+  if (!VariableDIE) {
+    VariableDIE = new DIE(GV.getTag());
+    // Add to map.
+    insertDIE(N, VariableDIE);
+
+    // Add name and type.
+    addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+    addType(VariableDIE, GTy);
+
+    // Add scoping info.
+    if (!GV.isLocalToUnit()) {
+      addFlag(VariableDIE, dwarf::DW_AT_external);
+      addGlobalName(GV.getName(), VariableDIE);
+    }
+
+    // Add line number info.
+    addSourceLine(VariableDIE, GV);
+    // Add to context owner.
+    addToContextOwner(VariableDIE, GVContext);
+  }
 
-  // Add line number info.
-  addSourceLine(VariableDIE, GV);
-  // Add to context owner.
-  DIDescriptor GVContext = GV.getContext();
-  addToContextOwner(VariableDIE, GVContext);
   // Add location.
   bool addToAccelTable = false;
   DIE *VariableSpecDIE = NULL;
+  bool isGlobalVariable = GV.getGlobal() != NULL;
   if (isGlobalVariable) {
     addToAccelTable = true;
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Block, 0, dwarf::DW_FORM_udata,
-             Asm->Mang->getSymbol(GV.getGlobal()));
+    addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal()));
     // Do not create specification DIE if context is either compile unit
     // or a subprogram.
     if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
@@ -1208,25 +1342,44 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
       addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                   dwarf::DW_FORM_ref4, VariableDIE);
       addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
-      addFlag(VariableDIE, dwarf::DW_AT_declaration);
+      // A static member's declaration is already flagged as such.
+      if (!SDMDecl.Verify())
+        addFlag(VariableDIE, dwarf::DW_AT_declaration);
       addDie(VariableSpecDIE);
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     }
-  } else if (const ConstantInt *CI = 
-             dyn_cast_or_null<ConstantInt>(GV.getConstant()))
-    addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
-  else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    // Add linkage name.
+    StringRef LinkageName = GV.getLinkageName();
+    if (!LinkageName.empty()) {
+      // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
+      // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
+      // TAG_variable.
+      addString(IsStaticMember && VariableSpecDIE ?
+                VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+                getRealLinkageName(LinkageName));
+      // In compatibility mode with older gdbs we put the linkage name on both
+      // the TAG_variable DIE and on the TAG_member DIE.
+      if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat())
+        addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+                  getRealLinkageName(LinkageName));
+    }
+  } else if (const ConstantInt *CI =
+             dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+    // AT_const_value was added when the static member was created. To avoid
+    // emitting AT_const_value multiple times, we only add AT_const_value when
+    // it is not a static member.
+    if (!IsStaticMember)
+      addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
+  } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
     addToAccelTable = true;
     // GV is a merged global.
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     Value *Ptr = CE->getOperand(0);
-    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-    addLabel(Block, 0, dwarf::DW_FORM_udata,
-                    Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
+    addOpAddress(Block, Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
-    addUInt(Block, 0, dwarf::DW_FORM_udata, 
+    addUInt(Block, 0, dwarf::DW_FORM_udata,
                    Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
     addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
@@ -1250,22 +1403,25 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
                                        DIE *IndexTy) {
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  uint64_t L = SR.getLo();
-  uint64_t H = SR.getHi();
 
-  // The L value defines the lower bounds which is typically zero for C/C++. The
-  // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
-  // of the array. If L > H then do not emit DW_AT_lower_bound and 
-  // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
-  // array has one element and in such case do not emit lower bound.
+  // The LowerBound value defines the lower bounds which is typically zero for
+  // C/C++. The Count value is the number of elements.  Values are 64 bit. If
+  // Count == -1 then the array is unbounded and we do not emit
+  // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
+  // Count == 0, then the array has zero elements in which case we do not emit
+  // an upper bound.
+  int64_t LowerBound = SR.getLo();
+  int64_t DefaultLowerBound = getDefaultLowerBound();
+  int64_t Count = SR.getCount();
+
+  if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
+    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound);
+
+  if (Count != -1 && Count != 0)
+    // FIXME: An unbounded array should reference the expression that defines
+    // the array.
+    addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1);
 
-  if (L > H) {
-    Buffer.addChild(DW_Subrange);
-    return;
-  }
-  if (L)
-    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
   Buffer.addChild(DW_Subrange);
 }
 
@@ -1273,7 +1429,7 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
 void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
                                         DICompositeType *CTy) {
   Buffer.setTag(dwarf::DW_TAG_array_type);
-  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+  if (CTy->isVector())
     addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit derived type.
@@ -1281,10 +1437,13 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
   DIArray Elements = CTy->getTypeArray();
 
   // Get an anonymous type for index type.
+  // FIXME: This type should be passed down from the front end
+  // as different languages may have different sizes for indexes.
   DIE *IdxTy = getIndexTyDie();
   if (!IdxTy) {
     // Construct an anonymous type for index type.
     IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addString(IdxTy, dwarf::DW_AT_name, "int");
     addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
     addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
             dwarf::DW_ATE_signed);
@@ -1327,8 +1486,6 @@ void CompileUnit::constructContainingTypeDIEs() {
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
 DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
   StringRef Name = DV->getName();
-  if (Name.empty())
-    return NULL;
 
   // Translate tag to proper Dwarf tag.
   unsigned Tag = DV->getTag();
@@ -1376,20 +1533,20 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
             TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
           unsigned FrameReg = 0;
           const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-          int Offset = 
-            TFI->getFrameIndexReference(*Asm->MF, 
-                                        DVInsn->getOperand(1).getImm(), 
+          int Offset =
+            TFI->getFrameIndexReference(*Asm->MF,
+                                        DVInsn->getOperand(1).getImm(),
                                         FrameReg);
           MachineLocation Location(FrameReg, Offset);
           addVariableAddress(DV, VariableDie, Location);
-          
+
         } else if (RegOp.getReg())
-          addVariableAddress(DV, VariableDie, 
+          addVariableAddress(DV, VariableDie,
                                          MachineLocation(RegOp.getReg()));
         updated = true;
       }
       else if (DVInsn->getOperand(0).isImm())
-        updated = 
+        updated =
           addConstantValue(VariableDie, DVInsn->getOperand(0),
                                        DV->getType());
       else if (DVInsn->getOperand(0).isFPImm())
@@ -1397,11 +1554,11 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
           addConstantFPValue(VariableDie, DVInsn->getOperand(0));
       else if (DVInsn->getOperand(0).isCImm())
         updated =
-          addConstantValue(VariableDie, 
+          addConstantValue(VariableDie,
                                        DVInsn->getOperand(0).getCImm(),
                                        DV->getType().isUnsignedDIType());
     } else {
-      addVariableAddress(DV, VariableDie, 
+      addVariableAddress(DV, VariableDie,
                                      Asm->getDebugValueLocation(DVInsn));
       updated = true;
     }
@@ -1419,7 +1576,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
     if (FI != ~0) {
       unsigned FrameReg = 0;
       const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
-      int Offset = 
+      int Offset =
         TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
       MachineLocation Location(FrameReg, Offset);
       addVariableAddress(DV, VariableDie, Location);
@@ -1499,7 +1656,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
-  else 
+  else
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
@@ -1509,35 +1666,46 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
   // Objective-C properties.
   if (MDNode *PNode = DT.getObjCProperty())
     if (DIEEntry *PropertyDie = getDIEEntry(PNode))
-      MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, 
+      MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
                           PropertyDie);
 
-  // This is only for backward compatibility.
-  StringRef PropertyName = DT.getObjCPropertyName();
-  if (!PropertyName.empty()) {
-    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
-    StringRef GetterName = DT.getObjCPropertyGetterName();
-    if (!GetterName.empty())
-      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
-    StringRef SetterName = DT.getObjCPropertySetterName();
-    if (!SetterName.empty())
-      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
-    unsigned PropertyAttributes = 0;
-    if (DT.isReadOnlyObjCProperty())
-      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
-    if (DT.isReadWriteObjCProperty())
-      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
-    if (DT.isAssignObjCProperty())
-      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
-    if (DT.isRetainObjCProperty())
-      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
-    if (DT.isCopyObjCProperty())
-      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
-    if (DT.isNonAtomicObjCProperty())
-      PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
-    if (PropertyAttributes)
-      addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
-              PropertyAttributes);
-  }
+  if (DT.isArtificial())
+    addFlag(MemberDie, dwarf::DW_AT_artificial);
+
   return MemberDie;
 }
+
+/// createStaticMemberDIE - Create new DIE for C++ static member.
+DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) {
+  if (!DT.Verify())
+    return NULL;
+
+  DIE *StaticMemberDIE = new DIE(DT.getTag());
+  DIType Ty = DT.getTypeDerivedFrom();
+
+  addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+  addType(StaticMemberDIE, Ty);
+  addSourceLine(StaticMemberDIE, DT);
+  addFlag(StaticMemberDIE, dwarf::DW_AT_external);
+  addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
+
+  // FIXME: We could omit private if the parent is a class_type, and
+  // public if the parent is something else.
+  if (DT.isProtected())
+    addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+            dwarf::DW_ACCESS_protected);
+  else if (DT.isPrivate())
+    addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+            dwarf::DW_ACCESS_private);
+  else
+    addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+            dwarf::DW_ACCESS_public);
+
+  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+    addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType());
+  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+    addConstantFPValue(StaticMemberDIE, CFP);
+
+  insertDIE(DT, StaticMemberDIE);
+  return StaticMemberDIE;
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index fad9b6e06684..2b180c6cc3f4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,26 +15,28 @@
 #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 
 #include "DIE.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo.h"
 
 namespace llvm {
 
 class DwarfDebug;
+class DwarfUnits;
 class MachineLocation;
 class MachineOperand;
 class ConstantInt;
+class ConstantFP;
 class DbgVariable;
 
 //===----------------------------------------------------------------------===//
 /// CompileUnit - This dwarf writer support class manages information associated
 /// with a source file.
 class CompileUnit {
-  /// ID - File identifier for source.
+  /// UniqueID - a numeric ID unique among all CUs in the module
   ///
-  unsigned ID;
+  unsigned UniqueID;
 
   /// Language - The DW_AT_language of the compile unit
   ///
@@ -47,7 +49,9 @@ class CompileUnit {
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
 
+  // Holders for some common dwarf information.
   DwarfDebug *DD;
+  DwarfUnits *DU;
 
   /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
   DIE *IndexTyDie;
@@ -60,6 +64,10 @@ class CompileUnit {
   /// descriptors to debug information entries using a DIEEntry proxy.
   DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
 
+  /// GlobalNames - A map of globally visible named entities for this unit.
+  ///
+  StringMap<DIE*> GlobalNames;
+
   /// GlobalTypes - A map of globally visible types for this unit.
   ///
   StringMap<DIE*> GlobalTypes;
@@ -79,14 +87,27 @@ class CompileUnit {
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
+  /// Offset of the CUDie from beginning of debug info section.
+  unsigned DebugInfoOffset;
+
+  /// getLowerBoundDefault - Return the default lower bound for an array. If the
+  /// DWARF version doesn't handle the language, return -1.
+  int64_t getDefaultLowerBound() const;
+
+  /// getOrCreateContextDIE - Get context owner's DIE.
+  DIE *getOrCreateContextDIE(DIDescriptor Context);
+
 public:
-  CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
+              DwarfUnits *);
   ~CompileUnit();
 
   // Accessors.
-  unsigned getID()                  const { return ID; }
+  unsigned getUniqueID()            const { return UniqueID; }
   unsigned getLanguage()            const { return Language; }
   DIE* getCUDie()                   const { return CUDie.get(); }
+  unsigned getDebugInfoOffset()     const { return DebugInfoOffset; }
+  const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
   const StringMap<std::vector<DIE*> > &getAccelNames() const {
@@ -102,11 +123,16 @@ public:
   &getAccelTypes() const {
     return AccelTypes;
   }
-  
+
+  void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
   /// hasContent - Return true if this compile unit has something to write out.
   ///
   bool hasContent() const { return !CUDie->getChildren().empty(); }
 
+  /// addGlobalName - Add a new global entity to the compile unit.
+  ///
+  void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; }
+
   /// addGlobalType - Add a new global type to the compile unit.
   ///
   void addGlobalType(DIType Ty);
@@ -129,12 +155,12 @@ public:
     std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
     DIEs.push_back(Die);
   }
-  
+
   /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
   DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
 
-  DIEBlock *getDIEBlock() { 
+  DIEBlock *getDIEBlock() {
     return new (DIEValueAllocator) DIEBlock();
   }
 
@@ -174,11 +200,10 @@ public:
   void setIndexTyDie(DIE *D) {
     IndexTyDie = D;
   }
-public:
 
   /// addFlag - Add a flag that is true to the DIE.
   void addFlag(DIE *Die, unsigned Attribute);
-  
+
   /// addUInt - Add an unsigned integer attribute data and value.
   ///
   void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
@@ -191,11 +216,25 @@ public:
   ///
   void addString(DIE *Die, unsigned Attribute, const StringRef Str);
 
+  /// addLocalString - Add a string attribute data and value.
+  ///
+  void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str);
+
   /// addLabel - Add a Dwarf label attribute data and value.
   ///
   void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
                 const MCSymbol *Label);
 
+  /// addLabelAddress - Add a dwarf label attribute data and value using
+  /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+  ///
+  void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label);
+
+  /// addOpAddress - Add a dwarf op address data and value using the
+  /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+  ///
+  void addOpAddress(DIE *Die, MCSymbol *Label);
+
   /// addDelta - Add a label delta attribute data and value.
   ///
   void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
@@ -204,7 +243,7 @@ public:
   /// addDIEEntry - Add a DIE attribute data and value.
   ///
   void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
-  
+
   /// addBlock - Add block data.
   ///
   void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
@@ -226,9 +265,11 @@ public:
   /// addConstantValue - Add constant value entry in variable DIE.
   bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
   bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+  bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
   bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+  bool addConstantFPValue(DIE *Die, const ConstantFP *CFP);
 
   /// addTemplateParams - Add template parameters in buffer.
   void addTemplateParams(DIE &Buffer, DIArray TParams);
@@ -257,7 +298,7 @@ public:
   void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                             const MachineLocation &Location);
 
-  /// addVariableAddress - Add DW_AT_location attribute for a 
+  /// addVariableAddress - Add DW_AT_location attribute for a
   /// DbgVariable based on provided MachineLocation.
   void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location);
 
@@ -279,7 +320,7 @@ public:
   /// given DIType.
   DIE *getOrCreateTypeDIE(const MDNode *N);
 
-  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
   /// for the given DITemplateTypeParameter.
   DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
 
@@ -312,7 +353,7 @@ public:
   void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
 
   /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, 
+  void constructArrayTypeDIE(DIE &Buffer,
                              DICompositeType *CTy);
 
   /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
@@ -328,6 +369,9 @@ public:
   /// createMemberDIE - Create new member DIE.
   DIE *createMemberDIE(DIDerivedType DT);
 
+  /// createStaticMemberDIE - Create new static data member DIE.
+  DIE *createStaticMemberDIE(DIDerivedType DT);
+
 private:
 
   // DIEValueAllocator - All DIEValues are allocated through this allocator.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 367b52307925..d3cb4f9c1c0e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,34 +16,34 @@
 #include "DIE.h"
 #include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
@@ -54,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::desc("Make an absence of debug location information explicit."),
      cl::init(false));
 
+static cl::opt<bool> GenerateDwarfPubNamesSection("generate-dwarf-pubnames",
+     cl::Hidden, cl::init(false),
+     cl::desc("Generate DWARF pubnames section"));
+
 namespace {
   enum DefaultOnOff {
     Default, Enable, Disable
@@ -78,6 +82,15 @@ static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
                 clEnumValEnd),
      cl::init(Default));
 
+static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
+     cl::desc("Output prototype dwarf split debug info."),
+     cl::values(
+                clEnumVal(Default, "Default for platform"),
+                clEnumVal(Enable, "Enabled"),
+                clEnumVal(Disable, "Disabled"),
+                clEnumValEnd),
+     cl::init(Default));
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -85,8 +98,8 @@ namespace {
 
 //===----------------------------------------------------------------------===//
 
-/// Configuration values for initial hash set sizes (log2).
-///
+// Configuration values for initial hash set sizes (log2).
+//
 static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
 
 namespace llvm {
@@ -102,19 +115,19 @@ DIType DbgVariable::getType() const {
        either the struct, or a pointer to the struct, as its type.  This
        is necessary for various behind-the-scenes things the compiler
        needs to do with by-reference variables in blocks.
-       
+
        However, as far as the original *programmer* is concerned, the
        variable should still have type 'SomeType', as originally declared.
-       
+
        The following function dives into the __Block_byref_x_VarName
        struct to find the original type of the variable.  This will be
        passed back to the code generating the type for the Debug
        Information Entry for the variable 'VarName'.  'VarName' will then
        have the original type 'SomeType' in its debug information.
-       
+
        The original type 'SomeType' will be the type of the field named
        'VarName' inside the __Block_byref_x_VarName struct.
-       
+
        NOTE: In order for this to not completely fail on the debugger
        side, the Debug Information Entry for the variable VarName needs to
        have a DW_AT_location that tells the debugger how to unwind through
@@ -122,15 +135,15 @@ DIType DbgVariable::getType() const {
        value of the variable.  The function addBlockByrefType does this.  */
     DIType subType = Ty;
     unsigned tag = Ty.getTag();
-    
+
     if (tag == dwarf::DW_TAG_pointer_type) {
       DIDerivedType DTy = DIDerivedType(Ty);
       subType = DTy.getTypeDerivedFrom();
     }
-    
+
     DICompositeType blockStruct = DICompositeType(subType);
     DIArray Elements = blockStruct.getTypeArray();
-    
+
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
       DIDerivedType DT = DIDerivedType(Element);
@@ -146,45 +159,55 @@ DIType DbgVariable::getType() const {
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), FirstCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize),
-    SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator),
-    PrevLabel(NULL) {
-  NextStringPoolNumber = 0;
+    SourceIdMap(DIEValueAllocator),
+    PrevLabel(NULL), GlobalCUIndexCount(0),
+    InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string",
+               DIEValueAllocator),
+    SkeletonAbbrevSet(InitAbbreviationsSetSize),
+    SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string",
+                   DIEValueAllocator) {
 
   DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
-  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+  DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
 
   // Turn on accelerator tables and older gdb compatibility
   // for Darwin.
-  bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+  bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
   if (DarwinGDBCompat == Default) {
-    if (isDarwin)
-      isDarwinGDBCompat = true;
+    if (IsDarwin)
+      IsDarwinGDBCompat = true;
     else
-      isDarwinGDBCompat = false;
+      IsDarwinGDBCompat = false;
   } else
-    isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+    IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
 
   if (DwarfAccelTables == Default) {
-    if (isDarwin)
-      hasDwarfAccelTables = true;
+    if (IsDarwin)
+      HasDwarfAccelTables = true;
     else
-      hasDwarfAccelTables = false;
+      HasDwarfAccelTables = false;
   } else
-    hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+    HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
+  if (SplitDwarf == Default)
+    HasSplitDwarf = false;
+  else
+    HasSplitDwarf = SplitDwarf == Enable ? true : false;
 
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-    beginModule(M);
+    beginModule();
   }
 }
 DwarfDebug::~DwarfDebug() {
 }
 
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+// Switch to the specified MCSection and emit an assembler
+// temporary label to it if SymbolStem is specified.
+static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
                                 const char *SymbolStem = 0) {
   Asm->OutStreamer.SwitchSection(Section);
   if (!SymbolStem) return 0;
@@ -194,44 +217,64 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
   return TmpSym;
 }
 
-MCSymbol *DwarfDebug::getStringPool() {
-  return Asm->GetTempSymbol("section_str");
+MCSymbol *DwarfUnits::getStringPoolSym() {
+  return Asm->GetTempSymbol(StringPref);
 }
 
-MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
-  std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry =
+    StringPool.GetOrCreateValue(Str).getValue();
   if (Entry.first) return Entry.first;
 
   Entry.second = NextStringPoolNumber++;
-  return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+  return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
 }
 
-/// assignAbbrevNumber - Define a unique number for the abbreviation.
-///
-void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry =
+    StringPool.GetOrCreateValue(Str).getValue();
+  if (Entry.first) return Entry.second;
+
+  Entry.second = NextStringPoolNumber++;
+  Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
+  return Entry.second;
+}
+
+unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) {
+  std::pair<MCSymbol*, unsigned> &Entry = AddressPool[Sym];
+  if (Entry.first) return Entry.second;
+
+  Entry.second = NextAddrPoolNumber++;
+  Entry.first = Sym;
+  return Entry.second;
+}
+
+// Define a unique number for the abbreviation.
+//
+void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   // Profile the node so that we can make it unique.
   FoldingSetNodeID ID;
   Abbrev.Profile(ID);
 
   // Check the set for priors.
-  DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+  DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
 
   // If it's newly added.
   if (InSet == &Abbrev) {
     // Add to abbreviation list.
-    Abbreviations.push_back(&Abbrev);
+    Abbreviations->push_back(&Abbrev);
 
     // Assign the vector position + 1 as its number.
-    Abbrev.setNumber(Abbreviations.size());
+    Abbrev.setNumber(Abbreviations->size());
   } else {
     // Assign existing abbreviation number.
     Abbrev.setNumber(InSet->getNumber());
   }
 }
 
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
-/// printer to not emit usual symbol prefix before the symbol name is used then
-/// return linkage name after skipping this special LLVM prefix.
+// If special LLVM prefix that is used to inform the asm
+// printer to not emit usual symbol prefix before the symbol name is used then
+// return linkage name after skipping this special LLVM prefix.
 static StringRef getRealLinkageName(StringRef LinkageName) {
   char One = '\1';
   if (LinkageName.startswith(StringRef(&One, 1)))
@@ -275,7 +318,7 @@ static StringRef getObjCMethodName(StringRef In) {
 static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
                                DIE* Die) {
   if (!SP.isDefinition()) return;
-  
+
   TheCU->addAccelName(SP.getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
@@ -296,10 +339,9 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
   }
 }
 
-/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
-/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
-/// If there are global variables in this scope then create and insert
-/// DIEs for these variables.
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
 DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
                                           const MDNode *SPNode) {
   DIE *SPDie = SPCU->getDIE(SPNode);
@@ -310,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   // If we're updating an abstract DIE, then we will be adding the children and
   // object pointer later on. But what we don't want to do is process the
   // concrete DIE twice.
-  if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+  DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode);
+  if (AbsSPDIE) {
+    bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie());
     // Pick up abstract subprogram DIE.
     SPDie = new DIE(dwarf::DW_TAG_subprogram);
+    // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of
+    // DW_FORM_ref4.
     SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
-                      dwarf::DW_FORM_ref4, AbsSPDIE);
+                      InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+                      AbsSPDIE);
     SPCU->addDie(SPDie);
   } else {
     DISubprogram SPDecl = SP.getFunctionDeclaration();
@@ -347,17 +394,19 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
           }
         DIE *SPDeclDie = SPDie;
         SPDie = new DIE(dwarf::DW_TAG_subprogram);
-        SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                          SPDeclDie);
+        SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification,
+                          dwarf::DW_FORM_ref4, SPDeclDie);
         SPCU->addDie(SPDie);
       }
     }
   }
 
-  SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-                 Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
-  SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-                 Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+  SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc,
+                        Asm->GetTempSymbol("func_begin",
+                                           Asm->getFunctionNumber()));
+  SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc,
+                        Asm->GetTempSymbol("func_end",
+                                           Asm->getFunctionNumber()));
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
   SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
@@ -365,13 +414,13 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_subprogram nodes.
   addSubprogramNames(SPCU, SP, SPDie);
-  
+
   return SPDie;
 }
 
-/// constructLexicalScope - Construct new DW_TAG_lexical_block
-/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
-DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, 
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
@@ -387,7 +436,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     // .debug_range as a uint, size 4, for now. emitDIE will handle
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-                   DebugRangeSymbols.size() 
+                   DebugRangeSymbols.size()
                    * Asm->getDataLayout().getPointerSize());
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
@@ -399,23 +448,22 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     return ScopeDIE;
   }
 
-  const MCSymbol *Start = getLabelBeforeInsn(RI->first);
-  const MCSymbol *End = getLabelAfterInsn(RI->second);
+  MCSymbol *Start = getLabelBeforeInsn(RI->first);
+  MCSymbol *End = getLabelAfterInsn(RI->second);
 
   if (End == 0) return 0;
 
   assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
   assert(End->isDefined() && "Invalid end label for an inlined scope!");
 
-  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
-  TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+  TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start);
+  TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End);
 
   return ScopeDIE;
 }
 
-/// constructInlinedScopeDIE - This scope represents inlined body of
-/// a function. Construct DIE to represent this concrete inlined copy
-/// of the function.
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
   const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
@@ -433,8 +481,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   }
 
   SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin();
-  const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
-  const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+  MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+  MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
   if (StartLabel == 0 || EndLabel == 0) {
     llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
@@ -453,7 +501,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     // .debug_range as a uint, size 4, for now. emitDIE will handle
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-                   DebugRangeSymbols.size() 
+                   DebugRangeSymbols.size()
                    * Asm->getDataLayout().getPointerSize());
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
@@ -463,10 +511,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     DebugRangeSymbols.push_back(NULL);
     DebugRangeSymbols.push_back(NULL);
   } else {
-    TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 
-                    StartLabel);
-    TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, 
-                    EndLabel);
+    TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel);
+    TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel);
   }
 
   InlinedSubprogramDIEs.insert(OriginDIE);
@@ -487,21 +533,28 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
 
   DILocation DL(Scope->getInlinedAt());
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
-                 GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+                 getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
+                                     TheCU->getUniqueID()));
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
   addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
-  
+
   return ScopeDIE;
 }
 
-/// constructScopeDIE - Construct a DIE for this scope.
+// Construct a DIE for this scope.
 DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
+  DIScope DS(Scope->getScopeNode());
+  // Early return to avoid creating dangling variable|scope DIEs.
+  if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() &&
+      !TheCU->getDIE(DS))
+    return NULL;
+
   SmallVector<DIE *, 8> Children;
   DIE *ObjectPointer = NULL;
 
@@ -509,7 +562,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   if (LScopes.isCurrentFunctionScope(Scope))
     for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
       if (DbgVariable *ArgDV = CurrentFnArguments[i])
-        if (DIE *Arg = 
+        if (DIE *Arg =
             TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
           Children.push_back(Arg);
           if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
@@ -518,7 +571,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   // Collect lexical scope children first.
   const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
   for (unsigned i = 0, N = Variables.size(); i < N; ++i)
-    if (DIE *Variable = 
+    if (DIE *Variable =
         TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
       Children.push_back(Variable);
       if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
@@ -527,7 +580,6 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
     if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
       Children.push_back(Nested);
-  DIScope DS(Scope->getScopeNode());
   DIE *ScopeDIE = NULL;
   if (Scope->getInlinedAt())
     ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
@@ -548,7 +600,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
       return NULL;
     ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
   }
-  
+
   if (!ScopeDIE) return NULL;
 
   // Add children
@@ -566,24 +618,33 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   return ScopeDIE;
 }
 
-/// GetOrCreateSourceID - Look up the source id with the given directory and
-/// source file names. If none currently exists, create a new id and insert it
-/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
-/// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
-                                         StringRef DirName) {
+// Look up the source id with the given directory and source file names.
+// If none currently exists, create a new id and insert it in the
+// SourceIds map. This can update DirectoryNames and SourceFileNames maps
+// as well.
+unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
+                                         StringRef DirName, unsigned CUID) {
+  // If we use .loc in assembly, we can't separate .file entries according to
+  // compile units. Thus all files will belong to the default compile unit.
+  if (Asm->TM.hasMCUseLoc() &&
+      Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+    CUID = 0;
+
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
-    return GetOrCreateSourceID("<stdin>", StringRef());
+    return getOrCreateSourceID("<stdin>", StringRef(), CUID);
 
   // TODO: this might not belong here. See if we can factor this better.
   if (DirName == CompilationDir)
     DirName = "";
 
-  unsigned SrcId = SourceIdMap.size()+1;
+  // FileIDCUMap stores the current ID for the given compile unit.
+  unsigned SrcId = FileIDCUMap[CUID] + 1;
 
-  // We look up the file/dir pair by concatenating them with a zero byte.
+  // We look up the CUID/file/dir by concatenating them with a zero byte.
   SmallString<128> NamePair;
+  NamePair += CUID;
+  NamePair += '\0';
   NamePair += DirName;
   NamePair += '\0'; // Zero bytes are not allowed in paths.
   NamePair += FileName;
@@ -592,37 +653,57 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
   if (Ent.getValue() != SrcId)
     return Ent.getValue();
 
+  FileIDCUMap[CUID] = SrcId;
   // Print out a .file directive to specify files for .loc directives.
-  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID);
 
   return SrcId;
 }
 
-/// constructCompileUnit - Create new CompileUnit for the given
-/// metadata node with tag DW_TAG_compile_unit.
+// Create new CompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
 CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   CompilationDir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die,
-                                       Asm, this);
+  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+                                       DIUnit.getLanguage(), Die, Asm,
+                                       this, &InfoHolder);
+
+  FileIDCUMap[NewCU->getUniqueID()] = 0;
+  // Call this to emit a .file directive if it wasn't emitted for the source
+  // file this CU comes from yet.
+  getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID());
+
   NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
   NewCU->addString(Die, dwarf::DW_AT_name, FN);
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
-  // into an entity.
-  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+  // into an entity. We're using 0 (or a NULL label) for this.
+  NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+
+  // Define start line table label for each Compile Unit.
+  MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
+                                                   NewCU->getUniqueID());
+  Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
+                                                     NewCU->getUniqueID());
+
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
+  // The line table entries are not always emitted in assembly, so it
+  // is not okay to use line_table_start here.
   if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
     NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    Asm->GetTempSymbol("section_line"));
-  else
+                    NewCU->getUniqueID() == 0 ?
+                    Asm->GetTempSymbol("section_line") : LineTableStartSym);
+  else if (NewCU->getUniqueID() == 0)
     NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+  else
+    NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                    LineTableStartSym, DwarfLineSectionSym);
 
   if (!CompilationDir.empty())
     NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -632,19 +713,22 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
     NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
-  
+
   if (unsigned RVer = DIUnit.getRunTimeVersion())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
   if (!FirstCU)
     FirstCU = NewCU;
+
+  InfoHolder.addUnit(NewCU);
+
   CUMap.insert(std::make_pair(N, NewCU));
   return NewCU;
 }
 
-/// construct SubprogramDIE - Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, 
+// Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
                                         const MDNode *N) {
   CompileUnit *&CURef = SPMap[N];
   if (CURef)
@@ -665,133 +749,83 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
   // Add to context owner.
   TheCU->addToContextOwner(SubprogramDie, SP.getContext());
 
-  return;
-}
-
-/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
-/// as llvm.dbg.enum and llvm.dbg.ty
-void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
-  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-      const MDNode *N = NMD->getOperand(i);
-      if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
-        constructSubprogramDIE(CU, N);
-    }
-  
-  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-      const MDNode *N = NMD->getOperand(i);
-      if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
-        CU->createGlobalVariableDIE(N);
-    }
-  
-  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-      DIType Ty(NMD->getOperand(i));
-      if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
-        CU->getOrCreateTypeDIE(Ty);
-    }
-  
-  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-      DIType Ty(NMD->getOperand(i));
-      if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
-        CU->getOrCreateTypeDIE(Ty);
-    }
-}
-
-/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
-/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
-bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
-  DebugInfoFinder DbgFinder;
-  DbgFinder.processModule(*M);
-  
-  bool HasDebugInfo = false;
-  // Scan all the compile-units to see if there are any marked as the main
-  // unit. If not, we do not generate debug info.
-  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-         E = DbgFinder.compile_unit_end(); I != E; ++I) {
-    if (DICompileUnit(*I).isMain()) {
-      HasDebugInfo = true;
-      break;
-    }
-  }
-  if (!HasDebugInfo) return false;
-  
-  // Create all the compile unit DIEs.
-  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-         E = DbgFinder.compile_unit_end(); I != E; ++I)
-    constructCompileUnit(*I);
-  
-  // Create DIEs for each global variable.
-  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
-         E = DbgFinder.global_variable_end(); I != E; ++I) {
-    const MDNode *N = *I;
-    if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
-      CU->createGlobalVariableDIE(N);
-  }
-    
-  // Create DIEs for each subprogram.
-  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
-         E = DbgFinder.subprogram_end(); I != E; ++I) {
-    const MDNode *N = *I;
-    if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
-      constructSubprogramDIE(CU, N);
-  }
-
-  return HasDebugInfo;
+  // Expose as global, if requested.
+  if (GenerateDwarfPubNamesSection)
+    TheCU->addGlobalName(SP.getName(), SubprogramDie);
 }
 
-/// beginModule - Emit all Dwarf sections that should come prior to the
-/// content. Create global DIEs and emit initial debug info sections.
-/// This is invoked by the target AsmPrinter.
-void DwarfDebug::beginModule(Module *M) {
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
+void DwarfDebug::beginModule() {
   if (DisableDebugInfoPrinting)
     return;
 
+  const Module *M = MMI->getModule();
+
   // If module has named metadata anchors then use them, otherwise scan the
   // module using debug info finder to collect debug info.
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
-  if (CU_Nodes) {
-    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CUNode(CU_Nodes->getOperand(i));
-      CompileUnit *CU = constructCompileUnit(CUNode);
-      DIArray GVs = CUNode.getGlobalVariables();
-      for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
-        CU->createGlobalVariableDIE(GVs.getElement(i));
-      DIArray SPs = CUNode.getSubprograms();
-      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-        constructSubprogramDIE(CU, SPs.getElement(i));
-      DIArray EnumTypes = CUNode.getEnumTypes();
-      for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
-        CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
-      DIArray RetainedTypes = CUNode.getRetainedTypes();
-      for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
-        CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
-    }
-  } else if (!collectLegacyDebugInfo(M))
+  if (!CU_Nodes)
     return;
 
-  collectInfoFromNamedMDNodes(M);
-  
+  // Emit initial sections so we can reference labels later.
+  emitSectionLabels();
+
+  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+    DICompileUnit CUNode(CU_Nodes->getOperand(i));
+    CompileUnit *CU = constructCompileUnit(CUNode);
+    DIArray GVs = CUNode.getGlobalVariables();
+    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
+      CU->createGlobalVariableDIE(GVs.getElement(i));
+    DIArray SPs = CUNode.getSubprograms();
+    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+      constructSubprogramDIE(CU, SPs.getElement(i));
+    DIArray EnumTypes = CUNode.getEnumTypes();
+    for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+      CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+    DIArray RetainedTypes = CUNode.getRetainedTypes();
+    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+      CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+    // If we're splitting the dwarf out now that we've got the entire
+    // CU then construct a skeleton CU based upon it.
+    if (useSplitDwarf()) {
+    // This should be a unique identifier when we want to build .dwp files.
+      CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+      // Now construct the skeleton CU associated.
+      constructSkeletonCU(CUNode);
+    }
+  }
+
   // Tell MMI that we have debug info.
   MMI->setDebugInfoAvailability(true);
-  
-  // Emit initial sections.
-  EmitSectionLabels();
 
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
 }
 
-/// endModule - Emit all Dwarf sections that should come after the content.
-///
-void DwarfDebug::endModule() {
-  if (!FirstCU) return;
+// Attach DW_AT_inline attribute with inlined subprogram DIEs.
+void DwarfDebug::computeInlinedDIEs() {
+  // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+    DIE *ISP = *AI;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+         AE = AbstractSPDies.end(); AI != AE; ++AI) {
+    DIE *ISP = AI->second;
+    if (InlinedSubprogramDIEs.count(ISP))
+      continue;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+}
+
+// Collect info for variables that were optimized out.
+void DwarfDebug::collectDeadVariables() {
   const Module *M = MMI->getModule();
   DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
 
-  // Collect info for variables that were optimized out.
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
       DICompileUnit TheCU(CU_Nodes->getOperand(i));
@@ -804,7 +838,7 @@ void DwarfDebug::endModule() {
         DIArray Variables = SP.getVariables();
         if (Variables.getNumElements() == 0) continue;
 
-        LexicalScope *Scope = 
+        LexicalScope *Scope =
           new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
         DeadFnScopeMap[SP] = Scope;
 
@@ -817,27 +851,22 @@ void DwarfDebug::endModule() {
           DIVariable DV(Variables.getElement(vi));
           if (!DV.Verify()) continue;
           DbgVariable *NewVar = new DbgVariable(DV, NULL);
-          if (DIE *VariableDIE = 
+          if (DIE *VariableDIE =
               SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
             ScopeDIE->addChild(VariableDIE);
         }
       }
     }
   }
+  DeleteContainerSeconds(DeadFnScopeMap);
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+  // Collect info for variables that were optimized out.
+  collectDeadVariables();
 
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
-  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
-         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
-    DIE *ISP = *AI;
-    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
-  }
-  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
-         AE = AbstractSPDies.end(); AI != AE; ++AI) {
-    DIE *ISP = AI->second;
-    if (InlinedSubprogramDIEs.count(ISP))
-      continue;
-    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
-  }
+  computeInlinedDIEs();
 
   // Emit DW_AT_containing_type attribute to connect types with their
   // vtable holding type.
@@ -847,6 +876,13 @@ void DwarfDebug::endModule() {
     TheCU->constructContainingTypeDIEs();
   }
 
+   // Compute DIE offsets and sizes.
+  InfoHolder.computeSizeAndOffsets();
+  if (useSplitDwarf())
+    SkeletonHolder.computeSizeAndOffsets();
+}
+
+void DwarfDebug::endSections() {
   // Standard sections final addresses.
   Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
@@ -858,15 +894,79 @@ void DwarfDebug::endModule() {
     Asm->OutStreamer.SwitchSection(SectionMap[I]);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
   }
+}
+
+// Emit all Dwarf sections that should come after the content.
+void DwarfDebug::endModule() {
 
-  // Compute DIE offsets and sizes.
-  computeSizeAndOffsets();
+  if (!FirstCU) return;
+
+  // End any existing sections.
+  // TODO: Does this need to happen?
+  endSections();
+
+  // Finalize the debug info for the module.
+  finalizeModuleInfo();
+
+  if (!useSplitDwarf()) {
+    // Emit all the DIEs into a debug info section.
+    emitDebugInfo();
+
+    // Corresponding abbreviations into a abbrev section.
+    emitAbbreviations();
 
-  // Emit all the DIEs into a debug info section
-  emitDebugInfo();
+    // Emit info into a debug loc section.
+    emitDebugLoc();
+
+    // Emit info into a debug aranges section.
+    emitDebugARanges();
+
+    // Emit info into a debug ranges section.
+    emitDebugRanges();
+
+    // Emit info into a debug macinfo section.
+    emitDebugMacInfo();
+
+    // Emit inline info.
+    // TODO: When we don't need the option anymore we
+    // can remove all of the code that this section
+    // depends upon.
+    if (useDarwinGDBCompat())
+      emitDebugInlineInfo();
+  } else {
+    // TODO: Fill this in for separated debug sections and separate
+    // out information into new sections.
 
-  // Corresponding abbreviations into a abbrev section.
-  emitAbbreviations();
+    // Emit the debug info section and compile units.
+    emitDebugInfo();
+    emitDebugInfoDWO();
+
+    // Corresponding abbreviations into a abbrev section.
+    emitAbbreviations();
+    emitDebugAbbrevDWO();
+
+    // Emit info into a debug loc section.
+    emitDebugLoc();
+
+    // Emit info into a debug aranges section.
+    emitDebugARanges();
+
+    // Emit info into a debug ranges section.
+    emitDebugRanges();
+
+    // Emit info into a debug macinfo section.
+    emitDebugMacInfo();
+
+    // Emit DWO addresses.
+    InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
+
+    // Emit inline info.
+    // TODO: When we don't need the option anymore we
+    // can remove all of the code that this section
+    // depends upon.
+    if (useDarwinGDBCompat())
+      emitDebugInlineInfo();
+  }
 
   // Emit info into the dwarf accelerator table sections.
   if (useDwarfAccelTables()) {
@@ -875,45 +975,37 @@ void DwarfDebug::endModule() {
     emitAccelNamespaces();
     emitAccelTypes();
   }
-  
+
+  // Emit info into a debug pubnames section, if requested.
+  if (GenerateDwarfPubNamesSection)
+    emitDebugPubnames();
+
   // Emit info into a debug pubtypes section.
   // TODO: When we don't need the option anymore we can
   // remove all of the code that adds to the table.
   if (useDarwinGDBCompat())
     emitDebugPubTypes();
 
-  // Emit info into a debug loc section.
-  emitDebugLoc();
-
-  // Emit info into a debug aranges section.
-  EmitDebugARanges();
-
-  // Emit info into a debug ranges section.
-  emitDebugRanges();
-
-  // Emit info into a debug macinfo section.
-  emitDebugMacInfo();
-
-  // Emit inline info.
-  // TODO: When we don't need the option anymore we
-  // can remove all of the code that this section
-  // depends upon.
-  if (useDarwinGDBCompat())
-    emitDebugInlineInfo();
-
-  // Emit info into a debug str section.
+  // Finally emit string information into a string table.
   emitDebugStr();
+  if (useSplitDwarf())
+    emitDebugStrDWO();
 
   // clean up.
-  DeleteContainerSeconds(DeadFnScopeMap);
   SPMap.clear();
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I)
     delete I->second;
-  FirstCU = NULL;  // Reset for the next Module, if any.
+
+  for (SmallVector<CompileUnit *, 1>::iterator I = SkeletonCUs.begin(),
+         E = SkeletonCUs.end(); I != E; ++I)
+    delete *I;
+
+  // Reset these for the next Module if we have one.
+  FirstCU = NULL;
 }
 
-/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+// Find abstract variable, if any, associated with Var.
 DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
                                               DebugLoc ScopeLoc) {
   LLVMContext &Ctx = DV->getContext();
@@ -933,8 +1025,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
   return AbsDbgVariable;
 }
 
-/// addCurrentFnArgument - If Var is a current function argument then add
-/// it to CurrentFnArguments list.
+// If Var is a current function argument then add it to CurrentFnArguments list.
 bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
                                       DbgVariable *Var, LexicalScope *Scope) {
   if (!LScopes.isCurrentFunctionScope(Scope))
@@ -943,7 +1034,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
   if (DV.getTag() != dwarf::DW_TAG_arg_variable)
     return false;
   unsigned ArgNo = DV.getArgNumber();
-  if (ArgNo == 0) 
+  if (ArgNo == 0)
     return false;
 
   size_t Size = CurrentFnArguments.size();
@@ -957,8 +1048,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
   return true;
 }
 
-/// collectVariableInfoFromMMITable - Collect variable information from
-/// side table maintained by MMI.
+// Collect variable information from side table maintained by MMI.
 void
 DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
                                    SmallPtrSet<const MDNode *, 16> &Processed) {
@@ -987,8 +1077,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
   }
 }
 
-/// isDbgValueInDefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in a defined reg.
+// Return true if debug value, encoded by DBG_VALUE instruction, is in a
+// defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
   assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
   return MI->getNumOperands() == 3 &&
@@ -996,10 +1086,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
 }
 
-/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting
-/// at MI.
-static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, 
-                                         const MCSymbol *FLabel, 
+// Get .debug_loc entry for the instruction range starting at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+                                         const MCSymbol *FLabel,
                                          const MCSymbol *SLabel,
                                          const MachineInstr *MI) {
   const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
@@ -1023,12 +1112,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
 }
 
-/// collectVariableInfo - Find variables for each lexical scope.
+// Find variables for each lexical scope.
 void
 DwarfDebug::collectVariableInfo(const MachineFunction *MF,
                                 SmallPtrSet<const MDNode *, 16> &Processed) {
 
-  /// collection info from MMI table.
+  // collection info from MMI table.
   collectVariableInfoFromMMITable(MF, Processed);
 
   for (SmallVectorImpl<const MDNode*>::const_iterator
@@ -1050,16 +1139,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
         DISubprogram(DV.getContext()).describes(MF->getFunction()))
       Scope = LScopes.getCurrentFunctionScope();
-    else {
-      if (DV.getVersion() <= LLVMDebugVersion9)
-        Scope = LScopes.findLexicalScope(MInsn->getDebugLoc());
-      else {
-        if (MDNode *IA = DV.getInlinedAt())
-          Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
-        else
-          Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
-      }
-    }
+    else if (MDNode *IA = DV.getInlinedAt())
+      Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
+    else
+      Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
@@ -1080,7 +1163,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       continue;
     }
 
-    // handle multiple DBG_VALUE instructions describing one variable.
+    // Handle multiple DBG_VALUE instructions describing one variable.
     RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
 
     for (SmallVectorImpl<const MachineInstr*>::const_iterator
@@ -1103,7 +1186,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
         SLabel = FunctionEndSym;
       else {
         const MachineInstr *End = HI[1];
-        DEBUG(dbgs() << "DotDebugLoc Pair:\n" 
+        DEBUG(dbgs() << "DotDebugLoc Pair:\n"
               << "\t" << *Begin << "\t" << *End << "\n");
         if (End->isDebugValue())
           SLabel = getLabelBeforeInsn(End);
@@ -1134,19 +1217,19 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
   }
 }
 
-/// getLabelBeforeInsn - Return Label preceding the instruction.
-const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+// Return Label preceding the instruction.
+MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
   MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
   assert(Label && "Didn't insert label before instruction");
   return Label;
 }
 
-/// getLabelAfterInsn - Return Label immediately following the instruction.
-const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+// Return Label immediately following the instruction.
+MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
   return LabelsAfterInsn.lookup(MI);
 }
 
-/// beginInstruction - Process beginning of an instruction.
+// Process beginning of an instruction.
 void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // Check if source location changes, but ignore DBG_VALUE locations.
   if (!MI->isDebugValue()) {
@@ -1188,7 +1271,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   I->second = PrevLabel;
 }
 
-/// endInstruction - Process end of an instruction.
+// Process end of an instruction.
 void DwarfDebug::endInstruction(const MachineInstr *MI) {
   // Don't create a new label after DBG_VALUE instructions.
   // They don't generate code.
@@ -1214,11 +1297,10 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) {
   I->second = PrevLabel;
 }
 
-/// identifyScopeMarkers() -
-/// Each LexicalScope has first instruction and last instruction to mark
-/// beginning and end of a scope respectively. Create an inverse map that list
-/// scopes starts (and ends) with an instruction. One instruction may start (or
-/// end) multiple scopes. Ignore scopes that are not reachable.
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
 void DwarfDebug::identifyScopeMarkers() {
   SmallVector<LexicalScope *, 4> WorkList;
   WorkList.push_back(LScopes.getCurrentFunctionScope());
@@ -1247,15 +1329,15 @@ void DwarfDebug::identifyScopeMarkers() {
   }
 }
 
-/// getScopeNode - Get MDNode for DebugLoc's scope.
+// Get MDNode for DebugLoc's scope.
 static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
   if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
     return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
   return DL.getScope(Ctx);
 }
 
-/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number info for the function.
+// Walk up the scope chain of given debug loc and find line number info
+// for the function.
 static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   const MDNode *Scope = getScopeNode(DL, Ctx);
   DISubprogram SP = getDISubprogram(Scope);
@@ -1271,14 +1353,21 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   return DebugLoc();
 }
 
-/// beginFunction - Gather pre-function debug information.  Assumes being
-/// emitted immediately after the function entry point.
+// Gather pre-function debug information.  Assumes being called immediately
+// after the function entry point has been emitted.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   LScopes.initialize(*MF);
   if (LScopes.empty()) return;
   identifyScopeMarkers();
 
+  // Set DwarfCompileUnitID in MCContext to the Compile Unit this function
+  // belongs to.
+  LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+  CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+  assert(TheCU && "Unable to find compile unit!");
+  Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
@@ -1287,7 +1376,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
 
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
-  /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+  // LiveUserVar - Map physreg numbers to the MDNode they contain.
   std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
 
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
@@ -1327,7 +1416,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
             if (History.size() >= 2 &&
                 Prev->isIdenticalTo(History[History.size() - 2])) {
               DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
-                    << "\t" << *Prev 
+                    << "\t" << *Prev
                     << "\t" << *History[History.size() - 2] << "\n");
               History.pop_back();
             }
@@ -1413,7 +1502,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
     const MachineInstr *Prev = History.back();
     if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
       const MachineBasicBlock *PrevMBB = Prev->getParent();
-      MachineBasicBlock::const_iterator LastMI = 
+      MachineBasicBlock::const_iterator LastMI =
         PrevMBB->getLastNonDebugInstr();
       if (LastMI == PrevMBB->end())
         // Drop DBG_VALUE for empty range.
@@ -1442,7 +1531,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                                        MF->getFunction()->getContext());
     recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
                      FnStartDL.getScope(MF->getFunction()->getContext()),
-                     0);
+    // We'd like to list the prologue as "not statements" but GDB behaves
+    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+                     DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1452,8 +1543,7 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
 //  Vars.push_back(Var);
 }
 
-/// endFunction - Gather and emit post-function debug information.
-///
+// Gather and emit post-function debug information.
 void DwarfDebug::endFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo() || LScopes.empty()) return;
 
@@ -1462,10 +1552,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
                                       Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
   Asm->OutStreamer.EmitLabel(FunctionEndSym);
-  
+  // Set DwarfCompileUnitID in MCContext to default value.
+  Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+
   SmallPtrSet<const MDNode *, 16> ProcessedVars;
   collectVariableInfo(MF, ProcessedVars);
-  
+
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
@@ -1495,9 +1587,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
       constructScopeDIE(TheCU, AScope);
   }
-  
+
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
-  
+
   if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
     TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
@@ -1518,9 +1610,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   PrevLabel = NULL;
 }
 
-/// recordSourceLine - Register a source line with debug info. Returns the
-/// unique label that was emitted and which provides correspondence to
-/// the source line list.
+// Register a source line with debug info. Returns the  unique label that was
+// emitted and which provides correspondence to the source line list.
 void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
                                   unsigned Flags) {
   StringRef Fn;
@@ -1552,7 +1643,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
     } else
       llvm_unreachable("Unexpected scope info");
 
-    Src = GetOrCreateSourceID(Fn, Dir);
+    Src = getOrCreateSourceID(Fn, Dir,
+            Asm->OutStreamer.getContext().getDwarfCompileUnitID());
   }
   Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
 }
@@ -1561,10 +1653,9 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-/// computeSizeAndOffset - Compute the size and offset of a DIE.
-///
+// Compute the size and offset of a DIE.
 unsigned
-DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
@@ -1573,7 +1664,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
 
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+  const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1);
 
   // Set DIE offset
   Die->setOffset(Offset);
@@ -1581,8 +1672,8 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   // Start the size with the size of abbreviation code.
   Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
 
-  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
-  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+  const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
 
   // Size the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -1595,7 +1686,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
            "Children flag not set");
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+      Offset = computeSizeAndOffset(Children[j], Offset);
 
     // End of children marker.
     Offset += sizeof(int8_t);
@@ -1605,57 +1696,68 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   return Offset;
 }
 
-/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
-///
-void DwarfDebug::computeSizeAndOffsets() {
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    // Compute size of compile unit header.
-    unsigned Offset = 
+// Compute the size and offset of all the DIEs.
+void DwarfUnits::computeSizeAndOffsets() {
+  // Offset from the beginning of debug info section.
+  unsigned AccuOffset = 0;
+  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) {
+    (*I)->setDebugInfoOffset(AccuOffset);
+    unsigned Offset =
       sizeof(int32_t) + // Length of Compilation Unit Info
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
       sizeof(int8_t);   // Pointer Size (in bytes)
-    computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+
+    unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+    AccuOffset += EndOffset;
   }
 }
 
-/// EmitSectionLabels - Emit initial Dwarf sections with a label at
-/// the start of each one.
-void DwarfDebug::EmitSectionLabels() {
+// Emit initial Dwarf sections with a label at the start of each one.
+void DwarfDebug::emitSectionLabels() {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   // Dwarf sections base addresses.
   DwarfInfoSectionSym =
-    EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+    emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
   DwarfAbbrevSectionSym =
-    EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
-  EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+    emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+  if (useSplitDwarf())
+    DwarfAbbrevDWOSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(),
+                     "section_abbrev_dwo");
+  emitSectionSym(Asm, TLOF.getDwarfARangesSection());
 
   if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
-    EmitSectionSym(Asm, MacroInfo);
-
-  EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
-  EmitSectionSym(Asm, TLOF.getDwarfLocSection());
-  EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+    emitSectionSym(Asm, MacroInfo);
+
+  DwarfLineSectionSym =
+    emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+  emitSectionSym(Asm, TLOF.getDwarfLocSection());
+  if (GenerateDwarfPubNamesSection)
+    emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+  emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
-    EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
-  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+    emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
+  if (useSplitDwarf())
+    DwarfStrDWOSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+  DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
                                              "debug_range");
 
-  DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+  DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(),
                                            "section_debug_loc");
 
-  TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
-  EmitSectionSym(Asm, TLOF.getDataSection());
+  TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+  emitSectionSym(Asm, TLOF.getDataSection());
 }
 
-/// emitDIE - Recursively emits a debug information entry.
-///
-void DwarfDebug::emitDIE(DIE *Die) {
+// Recursively emits a debug information entry.
+void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) {
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+  const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1);
 
   // Emit the code (index) for the abbreviation.
   if (Asm->isVerbose())
@@ -1665,8 +1767,8 @@ void DwarfDebug::emitDIE(DIE *Die) {
                                 dwarf::TagString(Abbrev->getTag()));
   Asm->EmitULEB128(AbbrevNumber);
 
-  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
-  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+  const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
 
   // Emit the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i) {
@@ -1682,6 +1784,13 @@ void DwarfDebug::emitDIE(DIE *Die) {
       DIEEntry *E = cast<DIEEntry>(Values[i]);
       DIE *Origin = E->getEntry();
       unsigned Addr = Origin->getOffset();
+      if (Form == dwarf::DW_FORM_ref_addr) {
+        // For DW_FORM_ref_addr, output the offset from beginning of debug info
+        // section. Origin->getOffset() returns the offset from start of the
+        // compile unit.
+        DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+        Addr += Holder.getCUOffset(Origin->getCompileUnit());
+      }
       Asm->EmitInt32(Addr);
       break;
     }
@@ -1732,7 +1841,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
     const std::vector<DIE *> &Children = Die->getChildren();
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      emitDIE(Children[j]);
+      emitDIE(Children[j], Abbrevs);
 
     if (Asm->isVerbose())
       Asm->OutStreamer.AddComment("End Of Children Mark");
@@ -1740,20 +1849,22 @@ void DwarfDebug::emitDIE(DIE *Die) {
   }
 }
 
-/// emitDebugInfo - Emit the debug info section.
-///
-void DwarfDebug::emitDebugInfo() {
-  // Start debug info section.
-  Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfInfoSection());
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfUnits::emitUnits(DwarfDebug *DD,
+                           const MCSection *USection,
+                           const MCSection *ASection,
+                           const MCSymbol *ASectionSym) {
+  Asm->OutStreamer.SwitchSection(USection);
+  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) {
+    CompileUnit *TheCU = *I;
     DIE *Die = TheCU->getCUDie();
 
     // Emit the compile units header.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
-                                                  TheCU->getID()));
+    Asm->OutStreamer
+      .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(),
+                                    TheCU->getUniqueID()));
 
     // Emit size of content not including length itself
     unsigned ContentSize = Die->getSize() +
@@ -1766,31 +1877,62 @@ void DwarfDebug::emitDebugInfo() {
     Asm->OutStreamer.AddComment("DWARF version number");
     Asm->EmitInt16(dwarf::DWARF_VERSION);
     Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
-                           DwarfAbbrevSectionSym);
+    Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
+                           ASectionSym);
     Asm->OutStreamer.AddComment("Address Size (in bytes)");
     Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
-    emitDIE(Die);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+    DD->emitDIE(Die, Abbreviations);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(),
+                                                  TheCU->getUniqueID()));
   }
 }
 
-/// emitAbbreviations - Emit the abbreviation section.
-///
-void DwarfDebug::emitAbbreviations() const {
+/// For a given compile unit DIE, returns offset from beginning of debug info.
+unsigned DwarfUnits::getCUOffset(DIE *Die) {
+  assert(Die->getTag() == dwarf::DW_TAG_compile_unit  &&
+         "Input DIE should be compile unit in getCUOffset.");
+  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
+       E = CUs.end(); I != E; ++I) {
+    CompileUnit *TheCU = *I;
+    if (TheCU->getCUDie() == Die)
+      return TheCU->getDebugInfoOffset();
+  }
+  llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits.");
+}
+
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+  DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+  Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(),
+                   Asm->getObjFileLowering().getDwarfAbbrevSection(),
+                   DwarfAbbrevSectionSym);
+}
+
+// Emit the abbreviation section.
+void DwarfDebug::emitAbbreviations() {
+  if (!useSplitDwarf())
+    emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(),
+                &Abbreviations);
+  else
+    emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+}
+
+void DwarfDebug::emitAbbrevs(const MCSection *Section,
+                             std::vector<DIEAbbrev *> *Abbrevs) {
   // Check to see if it is worth the effort.
-  if (!Abbreviations.empty()) {
+  if (!Abbrevs->empty()) {
     // Start the debug abbrev section.
-    Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfAbbrevSection());
+    Asm->OutStreamer.SwitchSection(Section);
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
+    MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName());
+    Asm->OutStreamer.EmitLabel(Begin);
 
     // For each abbrevation.
-    for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+    for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) {
       // Get abbreviation data
-      const DIEAbbrev *Abbrev = Abbreviations[i];
+      const DIEAbbrev *Abbrev = Abbrevs->at(i);
 
       // Emit the abbrevations code (base 1 index.)
       Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
@@ -1802,13 +1944,12 @@ void DwarfDebug::emitAbbreviations() const {
     // Mark end of abbreviations.
     Asm->EmitULEB128(0, "EOM(3)");
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
+    MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName());
+    Asm->OutStreamer.EmitLabel(End);
   }
 }
 
-/// emitEndOfLineMatrix - Emit the last address of the section and the end of
-/// the line matrix.
-///
+// Emit the last address of the section and the end of the line matrix.
 void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   // Define last address of section.
   Asm->OutStreamer.AddComment("Extended Op");
@@ -1822,8 +1963,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->OutStreamer.AddComment("Section end label");
 
   Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
-                                   Asm->getDataLayout().getPointerSize(),
-                                   0/*AddrSpace*/);
+                                   Asm->getDataLayout().getPointerSize());
 
   // Mark end of matrix.
   Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
@@ -1832,8 +1972,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitAccelNames - Emit visible names into a hashed accelerator table
-/// section.
+// Emit visible names into a hashed accelerator table section.
 void DwarfDebug::emitAccelNames() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1858,11 +1997,11 @@ void DwarfDebug::emitAccelNames() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
-/// emitAccelObjC - Emit objective C classes and categories into a hashed
-/// accelerator table section.
+// Emit objective C classes and categories into a hashed accelerator table
+// section.
 void DwarfDebug::emitAccelObjC() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1887,11 +2026,10 @@ void DwarfDebug::emitAccelObjC() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
-/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
-/// table.
+// Emit namespace dies into a hashed accelerator table.
 void DwarfDebug::emitAccelNamespaces() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1916,10 +2054,10 @@ void DwarfDebug::emitAccelNamespaces() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
 }
 
-/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+// Emit type dies into a hashed accelerator table.
 void DwarfDebug::emitAccelTypes() {
   std::vector<DwarfAccelTable::Atom> Atoms;
   Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
@@ -1951,7 +2089,62 @@ void DwarfDebug::emitAccelTypes() {
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
   // Emit the full data.
-  AT.Emit(Asm, SectionBegin, this);
+  AT.Emit(Asm, SectionBegin, &InfoHolder);
+}
+
+/// emitDebugPubnames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubnames() {
+  const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+
+  typedef DenseMap<const MDNode*, CompileUnit*> CUMapType;
+  for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    unsigned ID = TheCU->getUniqueID();
+
+    if (TheCU->getGlobalNames().empty())
+      continue;
+
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+    Asm->OutStreamer.AddComment("Length of Public Names Info");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID),
+                             Asm->GetTempSymbol("pubnames_begin", ID), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
+
+    Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+                           DwarfInfoSectionSym);
+
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID),
+                             Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
+                             4);
+
+    const StringMap<DIE*> &Globals = TheCU->getGlobalNames();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const DIE *Entity = GI->second;
+
+      Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+
+      if (Asm->isVerbose())
+        Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+    }
+
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID));
+  }
 }
 
 void DwarfDebug::emitDebugPubTypes() {
@@ -1963,22 +2156,26 @@ void DwarfDebug::emitDebugPubTypes() {
       Asm->getObjFileLowering().getDwarfPubTypesSection());
     Asm->OutStreamer.AddComment("Length of Public Types Info");
     Asm->EmitLabelDifference(
-      Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
-      Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+      Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
+      Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
 
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
 
     if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
     Asm->EmitInt16(dwarf::DWARF_VERSION);
 
     Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+    const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
+    Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(),
+                                              TheCU->getUniqueID()),
                            DwarfInfoSectionSym);
 
     Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
-                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+    Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(),
+                                                TheCU->getUniqueID()),
+                             Asm->GetTempSymbol(ISec->getLabelBeginName(),
+                                                TheCU->getUniqueID()),
                              4);
 
     const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
@@ -1992,33 +2189,34 @@ void DwarfDebug::emitDebugPubTypes() {
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
       // Emit the name with a terminating null byte.
-      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
     }
 
     Asm->OutStreamer.AddComment("End Mark");
     Asm->EmitInt32(0);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
   }
 }
 
-/// emitDebugStr - Emit visible names into a debug str section.
-///
-void DwarfDebug::emitDebugStr() {
-  // Check to see if it is worth the effort.
+// Emit strings into a string section.
+void DwarfUnits::emitStrings(const MCSection *StrSection,
+                             const MCSection *OffsetSection = NULL,
+                             const MCSymbol *StrSecSym = NULL) {
+
   if (StringPool.empty()) return;
 
   // Start the dwarf str section.
-  Asm->OutStreamer.SwitchSection(
-                                Asm->getObjFileLowering().getDwarfStrSection());
+  Asm->OutStreamer.SwitchSection(StrSection);
 
   // Get all of the string pool entries and put them in an array by their ID so
   // we can sort them.
   SmallVector<std::pair<unsigned,
-      StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+                 StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
 
   for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
-       I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+         I = StringPool.begin(), E = StringPool.end();
+       I != E; ++I)
     Entries.push_back(std::make_pair(I->second.second, &*I));
 
   array_pod_sort(Entries.begin(), Entries.end());
@@ -2029,18 +2227,65 @@ void DwarfDebug::emitDebugStr() {
 
     // Emit the string itself with a terminating null byte.
     Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
-                                         Entries[i].second->getKeyLength()+1),
-                               0/*addrspace*/);
+                                         Entries[i].second->getKeyLength()+1));
+  }
+
+  // If we've got an offset section go ahead and emit that now as well.
+  if (OffsetSection) {
+    Asm->OutStreamer.SwitchSection(OffsetSection);
+    unsigned offset = 0;
+    unsigned size = 4; // FIXME: DWARF64 is 8.
+    for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+      Asm->OutStreamer.EmitIntValue(offset, size);
+      offset += Entries[i].second->getKeyLength() + 1;
+    }
   }
 }
 
-/// emitDebugLoc - Emit visible names into a debug loc section.
-///
+// Emit strings into a string section.
+void DwarfUnits::emitAddresses(const MCSection *AddrSection) {
+
+  if (AddressPool.empty()) return;
+
+  // Start the dwarf addr section.
+  Asm->OutStreamer.SwitchSection(AddrSection);
+
+  // Get all of the string pool entries and put them in an array by their ID so
+  // we can sort them.
+  SmallVector<std::pair<unsigned,
+                        std::pair<MCSymbol*, unsigned>* >, 64> Entries;
+
+  for (DenseMap<MCSymbol*, std::pair<MCSymbol*, unsigned> >::iterator
+         I = AddressPool.begin(), E = AddressPool.end();
+       I != E; ++I)
+    Entries.push_back(std::make_pair(I->second.second, &(I->second)));
+
+  array_pod_sort(Entries.begin(), Entries.end());
+
+  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+    // Emit a label for reference from debug information entries.
+    MCSymbol *Sym = Entries[i].second->first;
+    if (Sym)
+      Asm->EmitLabelReference(Entries[i].second->first,
+                              Asm->getDataLayout().getPointerSize());
+    else
+      Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize());
+  }
+
+}
+
+// Emit visible names into a debug str section.
+void DwarfDebug::emitDebugStr() {
+  DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+  Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
+}
+
+// Emit visible names into a debug loc section.
 void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
 
-  for (SmallVector<DotDebugLocEntry, 4>::iterator
+  for (SmallVectorImpl<DotDebugLocEntry>::iterator
          I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
        I != E; ++I) {
     DotDebugLocEntry &Entry = *I;
@@ -2054,18 +2299,18 @@ void DwarfDebug::emitDebugLoc() {
   unsigned char Size = Asm->getDataLayout().getPointerSize();
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
   unsigned index = 1;
-  for (SmallVector<DotDebugLocEntry, 4>::iterator
+  for (SmallVectorImpl<DotDebugLocEntry>::iterator
          I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
        I != E; ++I, ++index) {
     DotDebugLocEntry &Entry = *I;
     if (Entry.isMerged()) continue;
     if (Entry.isEmpty()) {
-      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
-      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitIntValue(0, Size);
+      Asm->OutStreamer.EmitIntValue(0, Size);
       Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
     } else {
-      Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
-      Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+      Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size);
+      Asm->OutStreamer.EmitSymbolValue(Entry.End, Size);
       DIVariable DV(Entry.Variable);
       Asm->OutStreamer.AddComment("Loc expr size");
       MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
@@ -2075,7 +2320,7 @@ void DwarfDebug::emitDebugLoc() {
       if (Entry.isInt()) {
         DIBasicType BTy(DV.getType());
         if (BTy.Verify() &&
-            (BTy.getEncoding()  == dwarf::DW_ATE_signed 
+            (BTy.getEncoding()  == dwarf::DW_ATE_signed
              || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
           Asm->OutStreamer.AddComment("DW_OP_consts");
           Asm->EmitInt8(dwarf::DW_OP_consts);
@@ -2086,7 +2331,7 @@ void DwarfDebug::emitDebugLoc() {
           Asm->EmitULEB128(Entry.getInt());
         }
       } else if (Entry.isLocation()) {
-        if (!DV.hasComplexAddress()) 
+        if (!DV.hasComplexAddress())
           // Regular entry.
           Asm->EmitDwarfRegOp(Entry.Loc);
         else {
@@ -2112,7 +2357,7 @@ void DwarfDebug::emitDebugLoc() {
           } else {
             Asm->EmitDwarfRegOp(Entry.Loc);
           }
-          
+
           // Emit remaining complex address elements.
           for (; i < N; ++i) {
             uint64_t Element = DV.getAddrElement(i);
@@ -2134,33 +2379,30 @@ void DwarfDebug::emitDebugLoc() {
   }
 }
 
-/// EmitDebugARanges - Emit visible names into a debug aranges section.
-///
-void DwarfDebug::EmitDebugARanges() {
+// Emit visible names into a debug aranges section.
+void DwarfDebug::emitDebugARanges() {
   // Start the dwarf aranges section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfARangesSection());
 }
 
-/// emitDebugRanges - Emit visible names into a debug ranges section.
-///
+// Emit visible names into a debug ranges section.
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfRangesSection());
   unsigned char Size = Asm->getDataLayout().getPointerSize();
-  for (SmallVector<const MCSymbol *, 8>::iterator
+  for (SmallVectorImpl<const MCSymbol *>::iterator
          I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
        I != E; ++I) {
     if (*I)
-      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size);
     else
-      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitIntValue(0, Size);
   }
 }
 
-/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
-///
+// Emit visible names into a debug macinfo section.
 void DwarfDebug::emitDebugMacInfo() {
   if (const MCSection *LineInfo =
       Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
@@ -2169,24 +2411,24 @@ void DwarfDebug::emitDebugMacInfo() {
   }
 }
 
-/// emitDebugInlineInfo - Emit inline info using following format.
-/// Section Header:
-/// 1. length of section
-/// 2. Dwarf version number
-/// 3. address size.
-///
-/// Entries (one "entry" for each function that was inlined):
-///
-/// 1. offset into __debug_str section for MIPS linkage name, if exists;
-///   otherwise offset into __debug_str for regular function name.
-/// 2. offset into __debug_str section for regular function name.
-/// 3. an unsigned LEB128 number indicating the number of distinct inlining
-/// instances for the function.
-///
-/// The rest of the entry consists of a {die_offset, low_pc} pair for each
-/// inlined instance; the die_offset points to the inlined_subroutine die in the
-/// __debug_info section, and the low_pc is the starting address for the
-/// inlining instance.
+// Emit inline info using following format.
+// Section Header:
+// 1. length of section
+// 2. Dwarf version number
+// 3. address size.
+//
+// Entries (one "entry" for each function that was inlined):
+//
+// 1. offset into __debug_str section for MIPS linkage name, if exists;
+//   otherwise offset into __debug_str for regular function name.
+// 2. offset into __debug_str section for regular function name.
+// 3. an unsigned LEB128 number indicating the number of distinct inlining
+// instances for the function.
+//
+// The rest of the entry consists of a {die_offset, low_pc} pair for each
+// inlined instance; the die_offset points to the inlined_subroutine die in the
+// __debug_info section, and the low_pc is the starting address for the
+// inlining instance.
 void DwarfDebug::emitDebugInlineInfo() {
   if (!Asm->MAI->doesDwarfUseInlineInfoSection())
     return;
@@ -2208,38 +2450,121 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
-  for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+  for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
 
     const MDNode *Node = *I;
     DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
       = InlineInfo.find(Node);
-    SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+    SmallVectorImpl<InlineInfoLabels> &Labels = II->second;
     DISubprogram SP(Node);
     StringRef LName = SP.getLinkageName();
     StringRef Name = SP.getName();
 
     Asm->OutStreamer.AddComment("MIPS linkage name");
     if (LName.empty())
-      Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+      Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+                             DwarfStrSectionSym);
     else
-      Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+      Asm->EmitSectionOffset(InfoHolder
+                             .getStringPoolEntry(getRealLinkageName(LName)),
                              DwarfStrSectionSym);
 
     Asm->OutStreamer.AddComment("Function name");
-    Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name),
+                           DwarfStrSectionSym);
     Asm->EmitULEB128(Labels.size(), "Inline count");
 
-    for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+    for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(),
            LE = Labels.end(); LI != LE; ++LI) {
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
       Asm->EmitInt32(LI->second->getOffset());
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
       Asm->OutStreamer.EmitSymbolValue(LI->first,
-                                       Asm->getDataLayout().getPointerSize(),0);
+                                       Asm->getDataLayout().getPointerSize());
     }
   }
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
 }
+
+// DWARF5 Experimental Separate Dwarf emitters.
+
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present,
+// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
+CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
+  DICompileUnit DIUnit(N);
+  CompilationDir = DIUnit.getDirectory();
+
+  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+                                       DIUnit.getLanguage(), Die, Asm,
+                                       this, &SkeletonHolder);
+
+  NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
+                        DIUnit.getSplitDebugFilename());
+
+  // This should be a unique identifier when we want to build .dwp files.
+  NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+
+  // FIXME: The addr base should be relative for each compile unit, however,
+  // this one is going to be 0 anyhow.
+  NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+
+  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+  // into an entity. We're using 0, or a NULL label for this.
+  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
+                    DwarfLineSectionSym);
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0);
+
+  if (!CompilationDir.empty())
+    NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+  SkeletonHolder.addUnit(NewCU);
+  SkeletonCUs.push_back(NewCU);
+
+  return NewCU;
+}
+
+void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) {
+  assert(useSplitDwarf() && "No split dwarf debug info?");
+  emitAbbrevs(Section, &SkeletonAbbrevs);
+}
+
+// Emit the .debug_info.dwo section for separated dwarf. This contains the
+// compile units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+  assert(useSplitDwarf() && "No split dwarf debug info?");
+  InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(),
+                       Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+                       DwarfAbbrevDWOSectionSym);
+}
+
+// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
+// abbreviations for the .debug_info.dwo section.
+void DwarfDebug::emitDebugAbbrevDWO() {
+  assert(useSplitDwarf() && "No split dwarf?");
+  emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
+              &Abbreviations);
+}
+
+// Emit the .debug_str.dwo section for separated dwarf. This contains the
+// string section and is identical in format to traditional .debug_str
+// sections.
+void DwarfDebug::emitDebugStrDWO() {
+  assert(useSplitDwarf() && "No split dwarf?");
+  const MCSection *OffSec = Asm->getObjFileLowering()
+                            .getDwarfStrOffDWOSection();
+  const MCSymbol *StrSym = DwarfStrSectionSym;
+  InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
+                         OffSec, StrSym);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 61d9a51a5279..81e345e6281d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,15 +15,15 @@
 #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 
 #include "DIE.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DebugLoc.h"
 
@@ -41,10 +41,10 @@ class DIEAbbrev;
 class DIE;
 class DIEBlock;
 class DIEEntry;
+class DwarfDebug;
 
 //===----------------------------------------------------------------------===//
-/// SrcLineInfo - This class is used to record source line correspondence.
-///
+/// \brief This class is used to record source line correspondence.
 class SrcLineInfo {
   unsigned Line;                     // Source line number.
   unsigned Column;                   // Source column.
@@ -61,8 +61,8 @@ public:
   MCSymbol *getLabel() const { return Label; }
 };
 
-/// DotDebugLocEntry - This struct describes location entries emitted in
-/// .debug_loc section.
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
 typedef struct DotDebugLocEntry {
   const MCSymbol *Begin;
   const MCSymbol *End;
@@ -83,25 +83,25 @@ typedef struct DotDebugLocEntry {
     const ConstantFP *CFP;
     const ConstantInt *CIP;
   } Constants;
-  DotDebugLocEntry() 
-    : Begin(0), End(0), Variable(0), Merged(false), 
+  DotDebugLocEntry()
+    : Begin(0), End(0), Variable(0), Merged(false),
       Constant(false) { Constants.Int = 0;}
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
-                   const MDNode *V) 
-    : Begin(B), End(E), Loc(L), Variable(V), Merged(false), 
+                   const MDNode *V)
+    : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
       Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
-    : Begin(B), End(E), Variable(0), Merged(false), 
+    : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
-    : Begin(B), End(E), Variable(0), Merged(false), 
+    : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
                    const ConstantInt *IPtr)
-    : Begin(B), End(E), Variable(0), Merged(false), 
+    : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
 
-  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// \brief Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
   bool isEmpty() { return Begin == 0 && End == 0; }
   bool isMerged() { return Merged; }
@@ -121,8 +121,7 @@ typedef struct DotDebugLocEntry {
 } DotDebugLocEntry;
 
 //===----------------------------------------------------------------------===//
-/// DbgVariable - This class is used to track local variable information.
-///
+/// \brief This class is used to track local variable information.
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   DIE *TheDIE;                       // Variable DIE.
@@ -132,7 +131,7 @@ class DbgVariable {
   int FrameIndex;
 public:
   // AbsVar may be NULL.
-  DbgVariable(DIVariable V, DbgVariable *AV) 
+  DbgVariable(DIVariable V, DbgVariable *AV)
     : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
       FrameIndex(~0) {}
 
@@ -148,14 +147,14 @@ public:
   void setMInsn(const MachineInstr *M)     { MInsn = M; }
   int getFrameIndex()                const { return FrameIndex; }
   void setFrameIndex(int FI)               { FrameIndex = FI; }
-  // Translate tag to proper Dwarf tag.  
-  unsigned getTag()                  const { 
+  // Translate tag to proper Dwarf tag.
+  unsigned getTag()                  const {
     if (Var.getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
-    
+
     return dwarf::DW_TAG_variable;
   }
-  /// isArtificial - Return true if DbgVariable is artificial.
+  /// \brief Return true if DbgVariable is artificial.
   bool isArtificial()                const {
     if (Var.isArtificial())
       return true;
@@ -171,7 +170,7 @@ public:
       return true;
     return false;
   }
-  
+
   bool variableHasComplexAddress()   const {
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.hasComplexAddress();
@@ -180,7 +179,7 @@ public:
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.isBlockByrefVariable();
   }
-  unsigned getNumAddrElements()      const { 
+  unsigned getNumAddrElements()      const {
     assert(Var.Verify() && "Invalid complex DbgVariable!");
     return Var.getNumAddrElements();
   }
@@ -190,108 +189,192 @@ public:
   DIType getType() const;
 };
 
+
+// A String->Symbol mapping of strings used by indirect
+// references.
+typedef StringMap<std::pair<MCSymbol*, unsigned>,
+                  BumpPtrAllocator&> StrPool;
+
+// A Symbol->pair<Symbol, unsigned> mapping of addresses used by indirect
+// references.
+typedef DenseMap<MCSymbol *, std::pair<MCSymbol *, unsigned> > AddrPool;
+
+/// \brief Collects and handles information specific to a particular
+/// collection of units.
+class DwarfUnits {
+  // Target of Dwarf emission, used for sizing of abbreviations.
+  AsmPrinter *Asm;
+
+  // Used to uniquely define abbreviations.
+  FoldingSet<DIEAbbrev> *AbbreviationsSet;
+
+  // A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> *Abbreviations;
+
+  // A pointer to all units in the section.
+  SmallVector<CompileUnit *, 1> CUs;
+
+  // Collection of strings for this unit and assorted symbols.
+  StrPool StringPool;
+  unsigned NextStringPoolNumber;
+  std::string StringPref;
+
+  // Collection of addresses for this unit and assorted labels.
+  AddrPool AddressPool;
+  unsigned NextAddrPoolNumber;
+
+public:
+  DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
+             std::vector<DIEAbbrev *> *A, const char *Pref,
+             BumpPtrAllocator &DA) :
+    Asm(AP), AbbreviationsSet(AS), Abbreviations(A),
+    StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
+    AddressPool(), NextAddrPoolNumber(0) {}
+
+  /// \brief Compute the size and offset of a DIE given an incoming Offset.
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+  /// \brief Compute the size and offset of all the DIEs.
+  void computeSizeAndOffsets();
+
+  /// \brief Define a unique number for the abbreviation.
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+  /// \brief Add a unit to the list of CUs.
+  void addUnit(CompileUnit *CU) { CUs.push_back(CU); }
+
+  /// \brief Emit all of the units to the section listed with the given
+  /// abbreviation section.
+  void emitUnits(DwarfDebug *, const MCSection *, const MCSection *,
+                 const MCSymbol *);
+
+  /// \brief Emit all of the strings to the section given.
+  void emitStrings(const MCSection *, const MCSection *, const MCSymbol *);
+
+  /// \brief Emit all of the addresses to the section given.
+  void emitAddresses(const MCSection *);
+
+  /// \brief Returns the entry into the start of the pool.
+  MCSymbol *getStringPoolSym();
+
+  /// \brief Returns an entry into the string pool with the given
+  /// string text.
+  MCSymbol *getStringPoolEntry(StringRef Str);
+
+  /// \brief Returns the index into the string pool with the given
+  /// string text.
+  unsigned getStringPoolIndex(StringRef Str);
+
+  /// \brief Returns the string pool.
+  StrPool *getStringPool() { return &StringPool; }
+
+  /// \brief Returns the index into the address pool with the given
+  /// label/symbol.
+  unsigned getAddrPoolIndex(MCSymbol *);
+
+  /// \brief Returns the address pool.
+  AddrPool *getAddrPool() { return &AddressPool; }
+
+  /// \brief for a given compile unit DIE, returns offset from beginning of
+  /// debug info.
+  unsigned getCUOffset(DIE *Die);
+};
+
+/// \brief Collects and handles dwarf debug information.
 class DwarfDebug {
-  /// Asm - Target of Dwarf emission.
+  // Target of Dwarf emission.
   AsmPrinter *Asm;
 
-  /// MMI - Collected machine module information.
+  // Collected machine module information.
   MachineModuleInfo *MMI;
 
-  /// DIEValueAllocator - All DIEValues are allocated through this allocator.
+  // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
   //===--------------------------------------------------------------------===//
-  // Attributes used to construct specific Dwarf sections.
+  // Attribute used to construct specific Dwarf sections.
   //
 
   CompileUnit *FirstCU;
 
-  /// Maps MDNode with its corresponding CompileUnit.
+  // Maps MDNode with its corresponding CompileUnit.
   DenseMap <const MDNode *, CompileUnit *> CUMap;
 
-  /// Maps subprogram MDNode with its corresponding CompileUnit.
+  // Maps subprogram MDNode with its corresponding CompileUnit.
   DenseMap <const MDNode *, CompileUnit *> SPMap;
 
-  /// AbbreviationsSet - Used to uniquely define abbreviations.
-  ///
+  // Used to uniquely define abbreviations.
   FoldingSet<DIEAbbrev> AbbreviationsSet;
 
-  /// Abbreviations - A list of all the unique abbreviations in use.
-  ///
+  // A list of all the unique abbreviations in use.
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
-  /// separated by a zero byte, mapped to a unique id.
+  // Stores the current file ID for a given compile unit.
+  DenseMap <unsigned, unsigned> FileIDCUMap;
+  // Source id map, i.e. CUID, source filename and directory,
+  // separated by a zero byte, mapped to a unique id.
   StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
 
-  /// StringPool - A String->Symbol mapping of strings used by indirect
-  /// references.
-  StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool;
-  unsigned NextStringPoolNumber;
-  
-  /// SectionMap - Provides a unique id per text section.
-  ///
+  // Provides a unique id per text section.
   SetVector<const MCSection*> SectionMap;
 
-  /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+  // List of Arguments (DbgValues) for current function.
   SmallVector<DbgVariable *, 8> CurrentFnArguments;
 
   LexicalScopes LScopes;
 
-  /// AbstractSPDies - Collection of abstract subprogram DIEs.
+  // Collection of abstract subprogram DIEs.
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
 
-  /// ScopeVariables - Collection of dbg variables of a scope.
+  // Collection of dbg variables of a scope.
   DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
 
-  /// AbstractVariables - Collection of abstract variables.
+  // Collection of abstract variables.
   DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
 
-  /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+  // Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
-  /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
-  /// (at the end of the module) as DW_AT_inline.
+  // Collection of subprogram DIEs that are marked (at the end of the module)
+  // as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
-  /// InlineInfo - Keep track of inlined functions and their location.  This
-  /// information is used to populate the debug_inlined section.
+  // Keep track of inlined functions and their location.  This
+  // information is used to populate the debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
   DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<const MDNode *, 4> InlinedSPNodes;
 
-  // ProcessedSPNodes - This is a collection of subprogram MDNodes that
-  // are processed to create DIEs.
+  // This is a collection of subprogram MDNodes that are processed to
+  // create DIEs.
   SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
 
-  /// LabelsBeforeInsn - Maps instruction with label emitted before 
-  /// instruction.
+  // Maps instruction with label emitted before instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
 
-  /// LabelsAfterInsn - Maps instruction with label emitted after
-  /// instruction.
+  // Maps instruction with label emitted after instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
 
-  /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
-  /// in order of appearance.
+  // Every user variable mentioned by a DBG_VALUE instruction in order of
+  // appearance.
   SmallVector<const MDNode*, 8> UserVariables;
 
-  /// DbgValues - For each user variable, keep a list of DBG_VALUE
-  /// instructions in order. The list can also contain normal instructions that
-  /// clobber the previous DBG_VALUE.
+  // For each user variable, keep a list of DBG_VALUE instructions in order.
+  // The list can also contain normal instructions that clobber the previous
+  // DBG_VALUE.
   typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
     DbgValueHistoryMap;
   DbgValueHistoryMap DbgValues;
 
   SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
-  /// Previous instruction's location information. This is used to determine
-  /// label location to indicate scope boundries in dwarf debug info.
+  // Previous instruction's location information. This is used to determine
+  // label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel;
 
-  /// PrologEndLoc - This location indicates end of function prologue and
-  /// beginning of function body.
+  // This location indicates end of function prologue and beginning of function
+  // body.
   DebugLoc PrologEndLoc;
 
   struct FunctionDebugFrameInfo {
@@ -309,180 +392,208 @@ class DwarfDebug {
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-  MCSymbol *DwarfDebugLocSectionSym;
+  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
+  MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
 
   // As an optimization, there is no need to emit an entry in the directory
   // table for the same directory as DW_at_comp_dir.
   StringRef CompilationDir;
 
-  // A holder for the DarwinGDBCompat flag so that the compile unit can use it.
-  bool isDarwinGDBCompat;
-  bool hasDwarfAccelTables;
-private:
+  // Counter for assigning globally unique IDs for CUs.
+  unsigned GlobalCUIndexCount;
 
-  /// assignAbbrevNumber - Define a unique number for the abbreviation.
-  ///
-  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+  // Holder for the file specific debug information.
+  DwarfUnits InfoHolder;
+
+  // Holders for the various debug information flags that we might need to
+  // have exposed. See accessor functions below for description.
+
+  // Whether or not we're emitting info for older versions of gdb on darwin.
+  bool IsDarwinGDBCompat;
+
+  // DWARF5 Experimental Options
+  bool HasDwarfAccelTables;
+  bool HasSplitDwarf;
+
+  // Separated Dwarf Variables
+  // In general these will all be for bits that are left in the
+  // original object file, rather than things that are meant
+  // to be in the .dwo sections.
+
+  // The CUs left in the original object file for separated debug info.
+  SmallVector<CompileUnit *, 1> SkeletonCUs;
+
+  // Used to uniquely define abbreviations for the skeleton emission.
+  FoldingSet<DIEAbbrev> SkeletonAbbrevSet;
+
+  // A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> SkeletonAbbrevs;
+
+  // Holder for the skeleton information.
+  DwarfUnits SkeletonHolder;
+
+private:
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
-  /// findAbstractVariable - Find abstract variable associated with Var.
+  /// \brief Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
 
-  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
-  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
-  /// If there are global variables in this scope then create and insert
-  /// DIEs for these variables.
+  /// \brief Find DIE for the given subprogram and attach appropriate
+  /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+  /// variables in this scope then create and insert DIEs for these
+  /// variables.
   DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
 
-  /// constructLexicalScope - Construct new DW_TAG_lexical_block 
-  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  /// \brief Construct new DW_TAG_lexical_block for this scope and
+  /// attach DW_AT_low_pc/DW_AT_high_pc labels.
   DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// constructInlinedScopeDIE - This scope represents inlined body of
-  /// a function. Construct DIE to represent this concrete inlined copy
-  /// of the function.
+  /// \brief This scope represents inlined body of a function. Construct
+  /// DIE to represent this concrete inlined copy of the function.
   DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// constructScopeDIE - Construct a DIE for this scope.
+  /// \brief Construct a DIE for this scope.
   DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// EmitSectionLabels - Emit initial Dwarf sections with a label at
-  /// the start of each one.
-  void EmitSectionLabels();
+  /// \brief Emit initial Dwarf sections with a label at the start of each one.
+  void emitSectionLabels();
 
-  /// emitDIE - Recursively Emits a debug information entry.
-  ///
-  void emitDIE(DIE *Die);
+  /// \brief Compute the size and offset of a DIE given an incoming Offset.
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
 
-  /// computeSizeAndOffset - Compute the size and offset of a DIE.
-  ///
-  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
-
-  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
-  ///
+  /// \brief Compute the size and offset of all the DIEs.
   void computeSizeAndOffsets();
 
-  /// EmitDebugInfo - Emit the debug info section.
-  ///
+  /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  void computeInlinedDIEs();
+
+  /// \brief Collect info for variables that were optimized out.
+  void collectDeadVariables();
+
+  /// \brief Finish off debug information after all functions have been
+  /// processed.
+  void finalizeModuleInfo();
+
+  /// \brief Emit labels to close any remaining sections that have been left
+  /// open.
+  void endSections();
+
+  /// \brief Emit a set of abbreviations to the specific section.
+  void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *);
+
+  /// \brief Emit the debug info section.
   void emitDebugInfo();
 
-  /// emitAbbreviations - Emit the abbreviation section.
-  ///
-  void emitAbbreviations() const;
+  /// \brief Emit the abbreviation section.
+  void emitAbbreviations();
 
-  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// \brief Emit the last address of the section and the end of
   /// the line matrix.
-  ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitAccelNames - Emit visible names into a hashed accelerator table
-  /// section.
+  /// \brief Emit visible names into a hashed accelerator table section.
   void emitAccelNames();
-  
-  /// emitAccelObjC - Emit objective C classes and categories into a hashed
+
+  /// \brief Emit objective C classes and categories into a hashed
   /// accelerator table section.
   void emitAccelObjC();
 
-  /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
-  /// table.
+  /// \brief Emit namespace dies into a hashed accelerator table.
   void emitAccelNamespaces();
 
-  /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
-  ///
+  /// \brief Emit type dies into a hashed accelerator table.
   void emitAccelTypes();
-  
-  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
-  ///
+
+  /// \brief Emit visible names into a debug pubnames section.
+  void emitDebugPubnames();
+
+  /// \brief Emit visible types into a debug pubtypes section.
   void emitDebugPubTypes();
 
-  /// emitDebugStr - Emit visible names into a debug str section.
-  ///
+  /// \brief Emit visible names into a debug str section.
   void emitDebugStr();
 
-  /// emitDebugLoc - Emit visible names into a debug loc section.
-  ///
+  /// \brief Emit visible names into a debug loc section.
   void emitDebugLoc();
 
-  /// EmitDebugARanges - Emit visible names into a debug aranges section.
-  ///
-  void EmitDebugARanges();
+  /// \brief Emit visible names into a debug aranges section.
+  void emitDebugARanges();
 
-  /// emitDebugRanges - Emit visible names into a debug ranges section.
-  ///
+  /// \brief Emit visible names into a debug ranges section.
   void emitDebugRanges();
 
-  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
-  ///
+  /// \brief Emit visible names into a debug macinfo section.
   void emitDebugMacInfo();
 
-  /// emitDebugInlineInfo - Emit inline info using following format.
-  /// Section Header:
-  /// 1. length of section
-  /// 2. Dwarf version number
-  /// 3. address size.
-  ///
-  /// Entries (one "entry" for each function that was inlined):
-  ///
-  /// 1. offset into __debug_str section for MIPS linkage name, if exists; 
-  ///   otherwise offset into __debug_str for regular function name.
-  /// 2. offset into __debug_str section for regular function name.
-  /// 3. an unsigned LEB128 number indicating the number of distinct inlining 
-  /// instances for the function.
-  /// 
-  /// The rest of the entry consists of a {die_offset, low_pc} pair for each 
-  /// inlined instance; the die_offset points to the inlined_subroutine die in
-  /// the __debug_info section, and the low_pc is the starting address for the
-  /// inlining instance.
+  /// \brief Emit inline info using custom format.
   void emitDebugInlineInfo();
 
-  /// constructCompileUnit - Create new CompileUnit for the given 
-  /// metadata node with tag DW_TAG_compile_unit.
+  /// DWARF 5 Experimental Split Dwarf Emitters
+
+  /// \brief Construct the split debug info compile unit for the debug info
+  /// section.
+  CompileUnit *constructSkeletonCU(const MDNode *);
+
+  /// \brief Emit the local split abbreviations.
+  void emitSkeletonAbbrevs(const MCSection *);
+
+  /// \brief Emit the debug info dwo section.
+  void emitDebugInfoDWO();
+
+  /// \brief Emit the debug abbrev dwo section.
+  void emitDebugAbbrevDWO();
+
+  /// \brief Emit the debug str dwo section.
+  void emitDebugStrDWO();
+
+  /// \brief Create new CompileUnit for the given metadata node with tag
+  /// DW_TAG_compile_unit.
   CompileUnit *constructCompileUnit(const MDNode *N);
 
-  /// construct SubprogramDIE - Construct subprogram DIE.
+  /// \brief Construct subprogram DIE.
   void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
 
-  /// recordSourceLine - Register a source line with debug info. Returns the
-  /// unique label that was emitted and which provides correspondence to
-  /// the source line list.
+  /// \brief Register a source line with debug info. Returns the unique
+  /// label that was emitted and which provides correspondence to the
+  /// source line list.
   void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
                         unsigned Flags);
-  
-  /// identifyScopeMarkers() - Indentify instructions that are marking the
-  /// beginning of or ending of a scope.
+
+  /// \brief Indentify instructions that are marking the beginning of or
+  /// ending of a scope.
   void identifyScopeMarkers();
 
-  /// addCurrentFnArgument - If Var is an current function argument that add
-  /// it in CurrentFnArguments list.
+  /// \brief If Var is an current function argument that add it in
+  /// CurrentFnArguments list.
   bool addCurrentFnArgument(const MachineFunction *MF,
                             DbgVariable *Var, LexicalScope *Scope);
 
-  /// collectVariableInfo - Populate LexicalScope entries with variables' info.
+  /// \brief Populate LexicalScope entries with variables' info.
   void collectVariableInfo(const MachineFunction *,
                            SmallPtrSet<const MDNode *, 16> &ProcessedVars);
-  
-  /// collectVariableInfoFromMMITable - Collect variable information from
-  /// side table maintained by MMI.
+
+  /// \brief Collect variable information from the side table maintained
+  /// by MMI.
   void collectVariableInfoFromMMITable(const MachineFunction * MF,
                                        SmallPtrSet<const MDNode *, 16> &P);
 
-  /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+  /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
     LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
   }
 
-  /// getLabelBeforeInsn - Return Label preceding the instruction.
-  const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+  /// \brief Return Label preceding the instruction.
+  MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
 
-  /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+  /// \brief Ensure that a label will be emitted after MI.
   void requestLabelAfterInsn(const MachineInstr *MI) {
     LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
   }
 
-  /// getLabelAfterInsn - Return Label immediately following the instruction.
-  const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+  /// \brief Return Label immediately following the instruction.
+  MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
 public:
   //===--------------------------------------------------------------------===//
@@ -491,52 +602,47 @@ public:
   DwarfDebug(AsmPrinter *A, Module *M);
   ~DwarfDebug();
 
-  /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
-  /// as llvm.dbg.enum and llvm.dbg.ty
-  void collectInfoFromNamedMDNodes(Module *M);
-
-  /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
-  /// FIXME - Remove this when DragonEgg switches to DIBuilder.
-  bool collectLegacyDebugInfo(Module *M);
-
-  /// beginModule - Emit all Dwarf sections that should come prior to the
+  /// \brief Emit all Dwarf sections that should come prior to the
   /// content.
-  void beginModule(Module *M);
+  void beginModule();
 
-  /// endModule - Emit all Dwarf sections that should come after the content.
-  ///
+  /// \brief Emit all Dwarf sections that should come after the content.
   void endModule();
 
-  /// beginFunction - Gather pre-function debug information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// \brief Gather pre-function debug information.
   void beginFunction(const MachineFunction *MF);
 
-  /// endFunction - Gather and emit post-function debug information.
-  ///
+  /// \brief Gather and emit post-function debug information.
   void endFunction(const MachineFunction *MF);
 
-  /// beginInstruction - Process beginning of an instruction.
+  /// \brief Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI);
 
-  /// endInstruction - Prcess end of an instruction.
+  /// \brief Process end of an instruction.
   void endInstruction(const MachineInstr *MI);
 
-  /// GetOrCreateSourceID - Look up the source id with the given directory and
-  /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map.
-  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
-
-  /// getStringPool - returns the entry into the start of the pool.
-  MCSymbol *getStringPool();
+  /// \brief Look up the source id with the given directory and source file
+  /// names. If none currently exists, create a new id and insert it in the
+  /// SourceIds map.
+  unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
+                               unsigned CUID);
 
-  /// getStringPoolEntry - returns an entry into the string pool with the given
-  /// string text.
-  MCSymbol *getStringPoolEntry(StringRef Str);
+  /// \brief Recursively Emits a debug information entry.
+  void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs);
 
-  /// useDarwinGDBCompat - returns whether or not to limit some of our debug
+  /// \brief Returns whether or not to limit some of our debug
   /// output to the limitations of darwin gdb.
-  bool useDarwinGDBCompat() { return isDarwinGDBCompat; }
-  bool useDwarfAccelTables() { return hasDwarfAccelTables; }
+  bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
+
+  // Experimental DWARF5 features.
+
+  /// \brief Returns whether or not to emit tables that dwarf consumers can
+  /// use to accelerate lookup.
+  bool useDwarfAccelTables() { return HasDwarfAccelTables; }
+
+  /// \brief Returns whether or not to change the current debug info for the
+  /// split dwarf proposal support.
+  bool useSplitDwarf() { return HasSplitDwarf; }
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 08fb6b3f52c5..7133458129cc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -12,30 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
@@ -608,7 +607,7 @@ void DwarfException::EmitExceptionTable() {
       if (!S.PadLabel) {
         if (VerboseAsm)
           Asm->OutStreamer.AddComment("    has no landing pad");
-        Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
       } else {
         if (VerboseAsm)
           Asm->OutStreamer.AddComment(Twine("    jumps to ") +
@@ -672,6 +671,18 @@ void DwarfException::EmitExceptionTable() {
     Asm->EmitSLEB128(Action.NextAction);
   }
 
+  EmitTypeInfos(TTypeEncoding);
+
+  Asm->EmitAlignment(2);
+}
+
+void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+  int Entry = 0;
   // Emit the Catch TypeInfos.
   if (VerboseAsm && !TypeInfos.empty()) {
     Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
@@ -684,11 +695,7 @@ void DwarfException::EmitExceptionTable() {
     const GlobalVariable *GV = *I;
     if (VerboseAsm)
       Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
-    if (GV)
-      Asm->EmitReference(GV, TTypeEncoding);
-    else
-      Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
-                                    0);
+    Asm->EmitTTypeReference(GV, TTypeEncoding);
   }
 
   // Emit the Exception Specifications.
@@ -708,8 +715,6 @@ void DwarfException::EmitExceptionTable() {
 
     Asm->EmitULEB128(TypeID);
   }
-
-  Asm->EmitAlignment(2);
 }
 
 /// EndModule - Emit all exception information that should come after the
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index fe9e49360951..74b1b13367a2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -121,6 +121,8 @@ protected:
   ///     catches in the function.  This tables is reversed indexed base 1.
   void EmitExceptionTable();
 
+  virtual void EmitTypeInfos(unsigned TTypeEncoding);
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -175,6 +177,7 @@ public:
 };
 
 class ARMException : public DwarfException {
+  void EmitTypeInfos(unsigned TTypeEncoding);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 000000000000..a8fb66dcf17b
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,120 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+  class ErlangGCPrinter : public GCMetadataPrinter {
+  public:
+    void beginAssembly(AsmPrinter &AP);
+    void finishAssembly(AsmPrinter &AP);
+  };
+
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() { }
+
+void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { }
+
+void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) {
+  MCStreamer &OS = AP.OutStreamer;
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+
+  // Put this in a custom .note section.
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext()
+    .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0,
+                   SectionKind::getDataRel()));
+
+  // For each function...
+  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+    GCFunctionInfo &MD = **FI;
+
+    /** A compact GC layout. Emit this data structure:
+     *
+     * struct {
+     *   int16_t PointCount;
+     *   void *SafePointAddress[PointCount];
+     *   int16_t StackFrameSize; (in words)
+     *   int16_t StackArity;
+     *   int16_t LiveCount;
+     *   int16_t LiveOffsets[LiveCount];
+     * } __gcmap_<FUNCTIONNAME>;
+     **/
+
+    // Align to address width.
+    AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+    // Emit PointCount.
+    OS.AddComment("safe point count");
+    AP.EmitInt16(MD.size());
+
+    // And each safe point...
+    for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+         ++PI) {
+      // Emit the address of the safe point.
+      OS.AddComment("safe point address");
+      MCSymbol *Label = PI->Label;
+      AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+    }
+
+    // Stack information never change in safe points! Only print info from the
+    // first call-site.
+    GCFunctionInfo::iterator PI = MD.begin();
+
+    // Emit the stack frame size.
+    OS.AddComment("stack frame size (in words)");
+    AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+    // Emit stack arity, i.e. the number of stacked arguments.
+    unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+                          MD.getFunction().arg_size() - RegisteredArgs : 0;
+    OS.AddComment("stack arity");
+    AP.EmitInt16(StackArity);
+
+    // Emit the number of live roots in the function.
+    OS.AddComment("live root count");
+    AP.EmitInt16(MD.live_size(PI));
+
+    // And for each live root...
+    for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+                                       LE = MD.live_end(PI);
+                                       LI != LE; ++LI) {
+      // Emit live root's offset within the stack frame.
+      OS.AddComment("stack index (offset / wordsize)");
+      AP.EmitInt16(LI->StackOffset / IntPtrSize);
+    }
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f7c011968c23..98177c0ba1cf 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
 #include <cctype>
 using namespace llvm;
 
@@ -100,7 +100,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
   EmitCamlGlobal(getModule(), AP, "data_end");
 
   // FIXME: Why does ocaml emit this??
-  AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
+  AP.OutStreamer.EmitIntValue(0, IntPtrSize);
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
   EmitCamlGlobal(getModule(), AP, "frametable");
@@ -145,7 +145,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
                            "Live root count "+Twine(LiveCount)+" >= 65536.");
       }
 
-      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
+      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize);
       AP.EmitInt16(FrameSize);
       AP.EmitInt16(LiveCount);
 
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 70742a8d2e35..156101286b75 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -12,30 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 Win64Exception::Win64Exception(AsmPrinter *A)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
new file mode 100644
index 000000000000..012ff8ad8339
--- /dev/null
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -0,0 +1,466 @@
+//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the implementation of a basic TargetTransformInfo pass
+/// predicated on the target abstractions present in the target independent
+/// code generator. It uses these (primarily TargetLowering) to model as much
+/// of the TTI query interface as possible. It is included by most targets so
+/// that they can specialize only a small subset of the query space.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basictti"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+class BasicTTI : public ImmutablePass, public TargetTransformInfo {
+  const TargetLoweringBase *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+  BasicTTI() : ImmutablePass(ID), TLI(0) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) {
+    initializeBasicTTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    pushTTIStack(this);
+  }
+
+  virtual void finalizePass() {
+    popTTIStack();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  virtual bool isLegalAddImmediate(int64_t imm) const;
+  virtual bool isLegalICmpImmediate(int64_t imm) const;
+  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                     int64_t BaseOffset, bool HasBaseReg,
+                                     int64_t Scale) const;
+  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+  virtual bool isTypeLegal(Type *Ty) const;
+  virtual unsigned getJumpBufAlignment() const;
+  virtual unsigned getJumpBufSize() const;
+  virtual bool shouldBuildLookupTables() const;
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  virtual unsigned getNumberOfRegisters(bool Vector) const;
+  virtual unsigned getMaximumUnrollFactor() const;
+  virtual unsigned getRegisterBitWidth(bool Vector) const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const;
+  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                                  int Index, Type *SubTp) const;
+  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const;
+  virtual unsigned getCFInstrCost(unsigned Opcode) const;
+  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const;
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const;
+  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const;
+  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+                                         ArrayRef<Type*> Tys) const;
+  virtual unsigned getNumberOfParts(Type *Tp) const;
+  virtual unsigned getAddressComputationCost(Type *Ty) const;
+
+  /// @}
+};
+
+}
+
+INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti",
+                   "Target independent code generator's TTI", true, true, false)
+char BasicTTI::ID = 0;
+
+ImmutablePass *
+llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) {
+  return new BasicTTI(TLI);
+}
+
+
+bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
+  return TLI->isLegalAddImmediate(imm);
+}
+
+bool BasicTTI::isLegalICmpImmediate(int64_t imm) const {
+  return TLI->isLegalICmpImmediate(imm);
+}
+
+bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                                     int64_t BaseOffset, bool HasBaseReg,
+                                     int64_t Scale) const {
+  TargetLoweringBase::AddrMode AM;
+  AM.BaseGV = BaseGV;
+  AM.BaseOffs = BaseOffset;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Scale;
+  return TLI->isLegalAddressingMode(AM, Ty);
+}
+
+bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  return TLI->isTruncateFree(Ty1, Ty2);
+}
+
+bool BasicTTI::isTypeLegal(Type *Ty) const {
+  EVT T = TLI->getValueType(Ty);
+  return TLI->isTypeLegal(T);
+}
+
+unsigned BasicTTI::getJumpBufAlignment() const {
+  return TLI->getJumpBufAlignment();
+}
+
+unsigned BasicTTI::getJumpBufSize() const {
+  return TLI->getJumpBufSize();
+}
+
+bool BasicTTI::shouldBuildLookupTables() const {
+  return TLI->supportJumpTables() &&
+      (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+       TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Calls used by the vectorizers.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert,
+                                            bool Extract) const {
+  assert (Ty->isVectorTy() && "Can only scalarize vectors");
+  unsigned Cost = 0;
+
+  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+    if (Insert)
+      Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+    if (Extract)
+      Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+  }
+
+  return Cost;
+}
+
+unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
+  return 1;
+}
+
+unsigned BasicTTI::getRegisterBitWidth(bool Vector) const {
+  return 32;
+}
+
+unsigned BasicTTI::getMaximumUnrollFactor() const {
+  return 1;
+}
+
+unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const {
+  // Check if any of the operands are vector operands.
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1.
+    // If the type is split to multiple registers, assume that thre is some
+    // overhead to this.
+    // TODO: Once we have extract/insert subvector cost we need to use them.
+    if (LT.first > 1)
+      return LT.first * 2;
+    return LT.first * 1;
+  }
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // If the operation is custom lowered then assume
+    // thare the code is twice as expensive.
+    return LT.first * 2;
+  }
+
+  // Else, assume that we need to scalarize this op.
+  if (Ty->isVectorTy()) {
+    unsigned Num = Ty->getVectorNumElements();
+    unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType());
+    // return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
+  }
+
+  // We don't know anything about this scalar instruction.
+  return 1;
+}
+
+unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                                  Type *SubTp) const {
+  return 1;
+}
+
+unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst);
+
+  // Check for NOOP conversions.
+  if (SrcLT.first == DstLT.first &&
+      SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+      // Bitcast between types that are legalized to the same type are free.
+      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
+        return 0;
+  }
+
+  if (Opcode == Instruction::Trunc &&
+      TLI->isTruncateFree(SrcLT.second, DstLT.second))
+    return 0;
+
+  if (Opcode == Instruction::ZExt &&
+      TLI->isZExtFree(SrcLT.second, DstLT.second))
+    return 0;
+
+  // If the cast is marked as legal (or promote) then assume low cost.
+  if (TLI->isOperationLegalOrPromote(ISD, DstLT.second))
+    return 1;
+
+  // Handle scalar conversions.
+  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
+
+    // Scalar bitcasts are usually free.
+    if (Opcode == Instruction::BitCast)
+      return 0;
+
+    // Just check the op cost. If the operation is legal then assume it costs 1.
+    if (!TLI->isOperationExpand(ISD, DstLT.second))
+      return  1;
+
+    // Assume that illegal scalar instruction are expensive.
+    return 4;
+  }
+
+  // Check vector-to-vector casts.
+  if (Dst->isVectorTy() && Src->isVectorTy()) {
+
+    // If the cast is between same-sized registers, then the check is simple.
+    if (SrcLT.first == DstLT.first &&
+        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+
+      // Assume that Zext is done using AND.
+      if (Opcode == Instruction::ZExt)
+        return 1;
+
+      // Assume that sext is done using SHL and SRA.
+      if (Opcode == Instruction::SExt)
+        return 2;
+
+      // Just check the op cost. If the operation is legal then assume it costs
+      // 1 and multiply by the type-legalization overhead.
+      if (!TLI->isOperationExpand(ISD, DstLT.second))
+        return SrcLT.first * 1;
+    }
+
+    // If we are converting vectors and the operation is illegal, or
+    // if the vectors are legalized to different types, estimate the
+    // scalarization costs.
+    unsigned Num = Dst->getVectorNumElements();
+    unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(),
+                                             Src->getScalarType());
+
+    // Return the cost of multiple scalar invocation plus the cost of
+    // inserting and extracting the values.
+    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
+  }
+
+  // We already handled vector-to-vector and scalar-to-scalar conversions. This
+  // is where we handle bitcast between vectors and scalars. We need to assume
+  //  that the conversion is scalarized in one way or another.
+  if (Opcode == Instruction::BitCast)
+    // Illegal bitcasts are done by storing and loading from a stack slot.
+    return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
+           (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
+
+  llvm_unreachable("Unhandled cast");
+ }
+
+unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const {
+  // Branches are assumed to be predicted.
+  return 0;
+}
+
+unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Selects on vectors are actually vector selects.
+  if (ISD == ISD::SELECT) {
+    assert(CondTy && "CondTy must exist");
+    if (CondTy->isVectorTy())
+      ISD = ISD::VSELECT;
+  }
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1. Multiply
+    // by the type-legalization overhead.
+    return LT.first * 1;
+  }
+
+  // Otherwise, assume that the cast is scalarized.
+  if (ValTy->isVectorTy()) {
+    unsigned Num = ValTy->getVectorNumElements();
+    if (CondTy)
+      CondTy = CondTy->getScalarType();
+    unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
+                                               CondTy);
+
+    // Return the cost of multiple scalar invocation plus the cost of inserting
+    // and extracting the values.
+    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
+  }
+
+  // Unknown scalar opcode.
+  return 1;
+}
+
+unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const {
+  return 1;
+}
+
+unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const {
+  assert(!Src->isVoidTy() && "Invalid type");
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+  // Assume that all loads of legal types cost 1.
+  return LT.first;
+}
+
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+                                         ArrayRef<Type *> Tys) const {
+  unsigned ISD = 0;
+  switch (IID) {
+  default: {
+    // Assume that we need to scalarize this intrinsic.
+    unsigned ScalarizationCost = 0;
+    unsigned ScalarCalls = 1;
+    if (RetTy->isVectorTy()) {
+      ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+      ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+    }
+    for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+      if (Tys[i]->isVectorTy()) {
+        ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+        ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+      }
+    }
+
+    return ScalarCalls + ScalarizationCost;
+  }
+  // Look for intrinsics that can be lowered directly or turned into a scalar
+  // intrinsic call.
+  case Intrinsic::sqrt:    ISD = ISD::FSQRT;  break;
+  case Intrinsic::sin:     ISD = ISD::FSIN;   break;
+  case Intrinsic::cos:     ISD = ISD::FCOS;   break;
+  case Intrinsic::exp:     ISD = ISD::FEXP;   break;
+  case Intrinsic::exp2:    ISD = ISD::FEXP2;  break;
+  case Intrinsic::log:     ISD = ISD::FLOG;   break;
+  case Intrinsic::log10:   ISD = ISD::FLOG10; break;
+  case Intrinsic::log2:    ISD = ISD::FLOG2;  break;
+  case Intrinsic::fabs:    ISD = ISD::FABS;   break;
+  case Intrinsic::floor:   ISD = ISD::FFLOOR; break;
+  case Intrinsic::ceil:    ISD = ISD::FCEIL;  break;
+  case Intrinsic::trunc:   ISD = ISD::FTRUNC; break;
+  case Intrinsic::rint:    ISD = ISD::FRINT;  break;
+  case Intrinsic::pow:     ISD = ISD::FPOW;   break;
+  case Intrinsic::fma:     ISD = ISD::FMA;    break;
+  case Intrinsic::fmuladd: ISD = ISD::FMA;    break; // FIXME: mul + add?
+  }
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+    // The operation is legal. Assume it costs 1.
+    // If the type is split to multiple registers, assume that thre is some
+    // overhead to this.
+    // TODO: Once we have extract/insert subvector cost we need to use them.
+    if (LT.first > 1)
+      return LT.first * 2;
+    return LT.first * 1;
+  }
+
+  if (!TLI->isOperationExpand(ISD, LT.second)) {
+    // If the operation is custom lowered then assume
+    // thare the code is twice as expensive.
+    return LT.first * 2;
+  }
+
+  // Else, assume that we need to scalarize this intrinsic. For math builtins
+  // this will emit a costly libcall, adding call overhead and spills. Make it
+  // very expensive.
+  if (RetTy->isVectorTy()) {
+    unsigned Num = RetTy->getVectorNumElements();
+    unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+                                                  Tys);
+    return 10 * Cost * Num;
+  }
+
+  // This is going to be turned into a library call, make it expensive.
+  return 10;
+}
+
+unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+  return LT.first;
+}
+
+unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+  return 0;
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 6f4c5a2f667b..f8cc3b3999e8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -18,24 +18,23 @@
 
 #define DEBUG_TYPE "branchfolding"
 #include "BranchFolding.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -571,8 +570,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   if (EffectiveTailLen >= 2 &&
-      MF->getFunction()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize) &&
+      MF->getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
       (I1 == MBB1->begin() || I2 == MBB2->begin()))
     return true;
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index fa6d4e16cfe8..56aa3309d3dd 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -2,17 +2,18 @@ add_llvm_library(LLVMCodeGen
   AggressiveAntiDepBreaker.cpp
   AllocationOrder.cpp
   Analysis.cpp
+  BasicTargetTransformInfo.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
   CallingConvLower.cpp
   CodeGen.cpp
-  CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
-  DeadMachineInstructionElim.cpp
   DFAPacketizer.cpp
+  DeadMachineInstructionElim.cpp
   DwarfEHPrepare.cpp
   EarlyIfConversion.cpp
   EdgeBundles.cpp
+  ErlangGC.cpp
   ExecutionDepsFix.cpp
   ExpandISelPseudos.cpp
   ExpandPostRAPseudos.cpp
@@ -31,21 +32,20 @@ add_llvm_library(LLVMCodeGen
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
   LiveIntervalUnion.cpp
+  LiveRangeCalc.cpp
+  LiveRangeEdit.cpp
   LiveRegMatrix.cpp
   LiveStackAnalysis.cpp
   LiveVariables.cpp
-  LiveRangeCalc.cpp
-  LiveRangeEdit.cpp
   LocalStackSlotAllocation.cpp
   MachineBasicBlock.cpp
   MachineBlockFrequencyInfo.cpp
   MachineBlockPlacement.cpp
   MachineBranchProbabilityInfo.cpp
+  MachineCSE.cpp
   MachineCodeEmitter.cpp
   MachineCopyPropagation.cpp
-  MachineCSE.cpp
   MachineDominators.cpp
-  MachinePostDominators.cpp
   MachineFunction.cpp
   MachineFunctionAnalysis.cpp
   MachineFunctionPass.cpp
@@ -54,10 +54,10 @@ add_llvm_library(LLVMCodeGen
   MachineInstrBundle.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
-  MachineLoopRanges.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
+  MachinePostDominators.cpp
   MachineRegisterInfo.cpp
   MachineSSAUpdater.cpp
   MachineScheduler.cpp
@@ -91,18 +91,20 @@ add_llvm_library(LLVMCodeGen
   ShrinkWrapping.cpp
   SjLjEHPrepare.cpp
   SlotIndexes.cpp
-  Spiller.cpp
   SpillPlacement.cpp
+  Spiller.cpp
   SplitKit.cpp
+  StackColoring.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
-  StackColoring.cpp
   StrongPHIElimination.cpp
   TailDuplication.cpp
   TargetFrameLoweringImpl.cpp
-  TargetInstrInfoImpl.cpp
+  TargetInstrInfo.cpp
+  TargetLoweringBase.cpp
   TargetLoweringObjectFileImpl.cpp
   TargetOptionsImpl.cpp
+  TargetRegisterInfo.cpp
   TargetSchedule.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 22b91409240b..f1d4ace92273 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -14,13 +14,13 @@
 
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
@@ -74,7 +74,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Formal argument #" << i << " has unhandled type "
-             << EVT(ArgVT).getEVTString();
+             << EVT(ArgVT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -106,7 +106,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Return operand #" << i << " has unhandled type "
-             << EVT(VT).getEVTString();
+             << EVT(VT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -124,7 +124,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << EVT(ArgVT).getEVTString();
+             << EVT(ArgVT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -143,7 +143,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << EVT(ArgVT).getEVTString();
+             << EVT(ArgVT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -160,7 +160,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call result #" << i << " has unhandled type "
-             << EVT(VT).getEVTString() << "\n";
+             << EVT(VT).getEVTString() << '\n';
 #endif
       llvm_unreachable(0);
     }
@@ -173,7 +173,7 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
     dbgs() << "Call result has unhandled type "
-           << EVT(VT).getEVTString();
+           << EVT(VT).getEVTString() << '\n';
 #endif
     llvm_unreachable(0);
   }
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a53f6f8d0f1b..35ec68d00cec 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -19,9 +19,9 @@ using namespace llvm;
 
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeBasicTTIPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeCalculateSpillWeightsPass(Registry);
-  initializeCodePlacementOptPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeEarlyIfConverterPass(Registry);
   initializeExpandPostRAPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
deleted file mode 100644
index d8e06c33a68e..000000000000
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass that optimizes code placement and aligns loop
-// headers to target-specific alignment boundaries.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "code-placement"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumLoopsAligned,  "Number of loops aligned");
-STATISTIC(NumIntraElim,     "Number of intra loop branches eliminated");
-STATISTIC(NumIntraMoved,    "Number of intra loop branches moved");
-
-namespace {
-  class CodePlacementOpt : public MachineFunctionPass {
-    const MachineLoopInfo *MLI;
-    const TargetInstrInfo *TII;
-    const TargetLowering  *TLI;
-
-  public:
-    static char ID;
-    CodePlacementOpt() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<MachineLoopInfo>();
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-  private:
-    bool HasFallthrough(MachineBasicBlock *MBB);
-    bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
-    void Splice(MachineFunction &MF,
-                MachineFunction::iterator InsertPt,
-                MachineFunction::iterator Begin,
-                MachineFunction::iterator End);
-    bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
-                                          MachineLoop *L);
-    bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
-                                     MachineLoop *L);
-    bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
-    bool OptimizeIntraLoopEdges(MachineFunction &MF);
-    bool AlignLoops(MachineFunction &MF);
-    bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
-  };
-
-  char CodePlacementOpt::ID = 0;
-} // end anonymous namespace
-
-char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
-INITIALIZE_PASS(CodePlacementOpt, "code-placement",
-                "Code Placement Optimizer", false, false)
-
-/// HasFallthrough - Test whether the given branch has a fallthrough, either as
-/// a plain fallthrough or as a fallthrough case of a conditional branch.
-///
-bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
-    return false;
-  // This conditional branch has no fallthrough.
-  if (FBB)
-    return false;
-  // An unconditional branch has no fallthrough.
-  if (Cond.empty() && TBB)
-    return false;
-  // It has a fallthrough.
-  return true;
-}
-
-/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
-/// This is called before major changes are begun to test whether it will be
-/// possible to complete the changes.
-///
-/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
-/// whenever possible.
-///
-bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
-  // Conservatively ignore EH landing pads.
-  if (MBB->isLandingPad()) return false;
-
-  // Aggressively handle return blocks and similar constructs.
-  if (MBB->succ_empty()) return true;
-
-  // Ask the target's AnalyzeBranch if it can handle this block.
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  // Make sure the terminator is understood.
-  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
-    return false;
-   // Ignore blocks which look like they might have EH-related control flow.
-   // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
-   // recognize the possibility of a control transfer through an unwind.
-   // Such blocks contain EH_LABEL instructions, however they may be in the
-   // middle of the block. Instead of searching for them, just check to see
-   // if the CFG disagrees with AnalyzeBranch.
-  if (1u + !Cond.empty() != MBB->succ_size())
-    return false;
-  // Make sure we have the option of reversing the condition.
-  if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
-    return false;
-  return true;
-}
-
-/// Splice - Move the sequence of instructions [Begin,End) to just before
-/// InsertPt. Update branch instructions as needed to account for broken
-/// fallthrough edges and to take advantage of newly exposed fallthrough
-/// opportunities.
-///
-void CodePlacementOpt::Splice(MachineFunction &MF,
-                              MachineFunction::iterator InsertPt,
-                              MachineFunction::iterator Begin,
-                              MachineFunction::iterator End) {
-  assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
-         "Splice can't change the entry block!");
-  MachineFunction::iterator OldBeginPrior = prior(Begin);
-  MachineFunction::iterator OldEndPrior = prior(End);
-
-  MF.splice(InsertPt, Begin, End);
-
-  prior(Begin)->updateTerminator();
-  OldBeginPrior->updateTerminator();
-  OldEndPrior->updateTerminator();
-}
-
-/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
-/// to the loop top to the top of the loop so that they have a fall through.
-/// This can introduce a branch on entry to the loop, but it can eliminate a
-/// branch within the loop. See the @simple case in
-/// test/CodeGen/X86/loop_blocks.ll for an example of this.
-bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
-                                                        MachineLoop *L) {
-  bool Changed = false;
-  MachineBasicBlock *TopMBB = L->getTopBlock();
-
-  bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
-
-  if (TopMBB == MF.begin() ||
-      HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
-  new_top:
-    for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
-         PE = TopMBB->pred_end(); PI != PE; ++PI) {
-      MachineBasicBlock *Pred = *PI;
-      if (Pred == TopMBB) continue;
-      if (HasFallthrough(Pred)) continue;
-      if (!L->contains(Pred)) continue;
-
-      // Verify that we can analyze all the loop entry edges before beginning
-      // any changes which will require us to be able to analyze them.
-      if (Pred == MF.begin())
-        continue;
-      if (!HasAnalyzableTerminator(Pred))
-        continue;
-      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
-        continue;
-
-      // Move the block.
-      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
-                   << " to top of loop.\n");
-      Changed = true;
-
-      // Move it and all the blocks that can reach it via fallthrough edges
-      // exclusively, to keep existing fallthrough edges intact.
-      MachineFunction::iterator Begin = Pred;
-      MachineFunction::iterator End = llvm::next(Begin);
-      while (Begin != MF.begin()) {
-        MachineFunction::iterator Prior = prior(Begin);
-        if (Prior == MF.begin())
-          break;
-        // Stop when a non-fallthrough edge is found.
-        if (!HasFallthrough(Prior))
-          break;
-        // Stop if a block which could fall-through out of the loop is found.
-        if (Prior->isSuccessor(End))
-          break;
-        // If we've reached the top, stop scanning.
-        if (Prior == MachineFunction::iterator(TopMBB)) {
-          // We know top currently has a fall through (because we just checked
-          // it) which would be lost if we do the transformation, so it isn't
-          // worthwhile to do the transformation unless it would expose a new
-          // fallthrough edge.
-          if (!Prior->isSuccessor(End))
-            goto next_pred;
-          // Otherwise we can stop scanning and proceed to move the blocks.
-          break;
-        }
-        // If we hit a switch or something complicated, don't move anything
-        // for this predecessor.
-        if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
-          break;
-        // Ok, the block prior to Begin will be moved along with the rest.
-        // Extend the range to include it.
-        Begin = Prior;
-        ++NumIntraMoved;
-      }
-
-      // Move the blocks.
-      Splice(MF, TopMBB, Begin, End);
-
-      // Update TopMBB.
-      TopMBB = L->getTopBlock();
-
-      // We have a new loop top. Iterate on it. We shouldn't have to do this
-      // too many times if BranchFolding has done a reasonable job.
-      goto new_top;
-    next_pred:;
-    }
-  }
-
-  // If the loop previously didn't exit with a fall-through and it now does,
-  // we eliminated a branch.
-  if (Changed &&
-      !BotHasFallthrough &&
-      HasFallthrough(L->getBottomBlock())) {
-    ++NumIntraElim;
-  }
-
-  return Changed;
-}
-
-/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
-/// portion of the loop contiguous with the header. This usually makes the loop
-/// contiguous, provided that AnalyzeBranch can handle all the relevant
-/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
-/// for an example of this.
-bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
-                                                   MachineLoop *L) {
-  bool Changed = false;
-  MachineBasicBlock *TopMBB = L->getTopBlock();
-  MachineBasicBlock *BotMBB = L->getBottomBlock();
-
-  // Determine a position to move orphaned loop blocks to. If TopMBB is not
-  // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
-  // to the top of the loop to avoid losing that fallthrough. Otherwise append
-  // them to the bottom, even if it previously had a fallthrough, on the theory
-  // that it's worth an extra branch to keep the loop contiguous.
-  MachineFunction::iterator InsertPt =
-    llvm::next(MachineFunction::iterator(BotMBB));
-  bool InsertAtTop = false;
-  if (TopMBB != MF.begin() &&
-      !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
-      HasFallthrough(BotMBB)) {
-    InsertPt = TopMBB;
-    InsertAtTop = true;
-  }
-
-  // Keep a record of which blocks are in the portion of the loop contiguous
-  // with the loop header.
-  SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
-  for (MachineFunction::iterator I = TopMBB,
-       E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
-    ContiguousBlocks.insert(I);
-
-  // Find non-contigous blocks and fix them.
-  if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
-    for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
-         BI != BE; ++BI) {
-      MachineBasicBlock *BB = *BI;
-
-      // Verify that we can analyze all the loop entry edges before beginning
-      // any changes which will require us to be able to analyze them.
-      if (!HasAnalyzableTerminator(BB))
-        continue;
-      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
-        continue;
-
-      // If the layout predecessor is part of the loop, this block will be
-      // processed along with it. This keeps them in their relative order.
-      if (BB != MF.begin() &&
-          L->contains(prior(MachineFunction::iterator(BB))))
-        continue;
-
-      // Check to see if this block is already contiguous with the main
-      // portion of the loop.
-      if (!ContiguousBlocks.insert(BB))
-        continue;
-
-      // Move the block.
-      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
-                   << " to be contiguous with loop.\n");
-      Changed = true;
-
-      // Process this block and all loop blocks contiguous with it, to keep
-      // them in their relative order.
-      MachineFunction::iterator Begin = BB;
-      MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
-      for (; End != MF.end(); ++End) {
-        if (!L->contains(End)) break;
-        if (!HasAnalyzableTerminator(End)) break;
-        ContiguousBlocks.insert(End);
-        ++NumIntraMoved;
-      }
-
-      // If we're inserting at the bottom of the loop, and the code we're
-      // moving originally had fall-through successors, bring the sucessors
-      // up with the loop blocks to preserve the fall-through edges.
-      if (!InsertAtTop)
-        for (; End != MF.end(); ++End) {
-          if (L->contains(End)) break;
-          if (!HasAnalyzableTerminator(End)) break;
-          if (!HasFallthrough(prior(End))) break;
-        }
-
-      // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
-      // we don't need them anymore at this point.
-      Splice(MF, InsertPt, Begin, End);
-    }
-
-  return Changed;
-}
-
-/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-/// This code takes the approach of making minor changes to the existing
-/// layout to fix specific loop-oriented problems. Also, it depends on
-/// AnalyzeBranch, which can't understand complex control instructions.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
-                                                        MachineLoop *L) {
-  bool Changed = false;
-
-  // Do optimization for nested loops.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
-  // Do optimization for this loop.
-  Changed |= EliminateUnconditionalJumpsToTop(MF, L);
-  Changed |= MoveDiscontiguousLoopBlocks(MF, L);
-
-  return Changed;
-}
-
-/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
-/// intra-loop branching and to form contiguous loops.
-///
-bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
-  bool Changed = false;
-
-  if (!TLI->shouldOptimizeCodePlacement())
-    return Changed;
-
-  // Do optimization for each loop in the function.
-  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
-       I != E; ++I)
-    if (!(*I)->getParentLoop())
-      Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
-
-  return Changed;
-}
-
-/// AlignLoops - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
-  const Function *F = MF.getFunction();
-  if (F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
-    return false;
-
-  unsigned Align = TLI->getPrefLoopAlignment();
-  if (!Align)
-    return false;  // Don't care about loop alignment.
-
-  bool Changed = false;
-
-  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
-       I != E; ++I)
-    Changed |= AlignLoop(MF, *I, Align);
-
-  return Changed;
-}
-
-/// AlignLoop - Align loop headers to target preferred alignments.
-///
-bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
-                                 unsigned Align) {
-  bool Changed = false;
-
-  // Do alignment for nested loops.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    Changed |= AlignLoop(MF, *I, Align);
-
-  L->getTopBlock()->setAlignment(Align);
-  Changed = true;
-  ++NumLoopsAligned;
-
-  return Changed;
-}
-
-bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
-  MLI = &getAnalysis<MachineLoopInfo>();
-  if (MLI->empty())
-    return false;  // No loops.
-
-  TLI = MF.getTarget().getTargetLowering();
-  TII = MF.getTarget().getInstrInfo();
-
-  bool Changed = OptimizeIntraLoopEdges(MF);
-
-  Changed |= AlignLoops(MF);
-
-  return Changed;
-}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 377b4712beac..0eb74a40d589 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -17,12 +17,12 @@
 #include "CriticalAntiDepBreaker.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -57,23 +57,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
 
   bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
 
-  // Determine the live-out physregs for this block.
-  if (IsReturnBlock) {
-    // In a return block, examine the function live-out regs.
-    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
-         E = MRI.liveout_end(); I != E; ++I) {
-      for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
-        unsigned Reg = *AI;
-        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[Reg] = BBSize;
-        DefIndices[Reg] = ~0u;
-      }
-    }
-  }
-
-  // In a non-return block, examine the live-in regs of all successors.
-  // Note a return block can have successors if the return instruction is
-  // predicated.
+  // Examine the live-in regs of all successors.
   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
          SE = BB->succ_end(); SI != SE; ++SI)
     for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
@@ -371,14 +355,15 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
   return false;
 }
 
-unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
-                                                 RegRefIter RegRefEnd,
-                                                 unsigned AntiDepReg,
-                                                 unsigned LastNewReg,
-                                                 const TargetRegisterClass *RC)
+unsigned CriticalAntiDepBreaker::
+findSuitableFreeRegister(RegRefIter RegRefBegin,
+                         RegRefIter RegRefEnd,
+                         unsigned AntiDepReg,
+                         unsigned LastNewReg,
+                         const TargetRegisterClass *RC,
+                         SmallVector<unsigned, 2> &Forbid)
 {
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
   for (unsigned i = 0; i != Order.size(); ++i) {
     unsigned NewReg = Order[i];
     // Don't replace a register with itself.
@@ -401,6 +386,15 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
         Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
         KillIndices[AntiDepReg] > DefIndices[NewReg])
       continue;
+    // If NewReg overlaps any of the forbidden registers, we can't use it.
+    bool Forbidden = false;
+    for (SmallVector<unsigned, 2>::iterator it = Forbid.begin(),
+           ite = Forbid.end(); it != ite; ++it)
+      if (TRI->regsOverlap(NewReg, *it)) {
+        Forbidden = true;
+        break;
+      }
+    if (Forbidden) continue;
     return NewReg;
   }
 
@@ -564,6 +558,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
 
     PrescanInstruction(MI);
 
+    SmallVector<unsigned, 2> ForbidRegs;
+
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
@@ -574,7 +570,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
       AntiDepReg = 0;
     else if (AntiDepReg) {
       // If this instruction has a use of AntiDepReg, breaking it
-      // is invalid.
+      // is invalid.  If the instruction defines other registers,
+      // save a list of them so that we don't pick a new register
+      // that overlaps any of them.
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
         if (!MO.isReg()) continue;
@@ -584,6 +582,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
           AntiDepReg = 0;
           break;
         }
+        if (MO.isDef() && Reg != AntiDepReg)
+          ForbidRegs.push_back(Reg);
       }
     }
 
@@ -606,7 +606,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
       if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
                                                      AntiDepReg,
                                                      LastNewReg[AntiDepReg],
-                                                     RC)) {
+                                                     RC, ForbidRegs)) {
         DEBUG(dbgs() << "Breaking anti-dependence edge on "
               << TRI->getName(AntiDepReg)
               << " with " << RegRefs.count(AntiDepReg) << " references"
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index ad95c4819119..df13dd31f6b2 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,13 +17,13 @@
 #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/ADT/BitVector.h"
 #include <map>
 
 namespace llvm {
@@ -102,7 +102,8 @@ class TargetRegisterInfo;
                                       RegRefIter RegRefEnd,
                                       unsigned AntiDepReg,
                                       unsigned LastNewReg,
-                                      const TargetRegisterClass *RC);
+                                      const TargetRegisterClass *RC,
+                                      SmallVector<unsigned, 2> &Forbid);
   };
 }
 
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index ff2f11353afd..840a10128daf 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,12 +23,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 8964269dde5f..a54217f5b2fb 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -13,14 +13,14 @@
 
 #define DEBUG_TYPE "codegen-dce"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
@@ -99,15 +99,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     // Start out assuming that reserved registers are live out of this block.
     LivePhysRegs = MRI->getReservedRegs();
 
-    // Also add any explicit live-out physregs for this block.
-    if (!MBB->empty() && MBB->back().isReturn())
-      for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
-           LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
-        unsigned Reg = *LOI;
-        if (TargetRegisterInfo::isPhysicalRegister(Reg))
-          LivePhysRegs.set(Reg);
-      }
-
     // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
     // live across blocks, but some targets (x86) can have flags live out of a
     // block.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 709562438ce2..f27ec770ebad 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dwarfehprepare"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -33,7 +33,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
 namespace {
   class DwarfEHPrepare : public FunctionPass {
     const TargetMachine *TM;
-    const TargetLowering *TLI;
+    const TargetLoweringBase *TLI;
 
     // RewindFunction - _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index d5d84041b69f..5447df09cbb2 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,7 +17,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "early-ifcvt"
-#include "MachineTraceMetrics.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
@@ -30,13 +29,14 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -459,7 +459,6 @@ void SSAIfConv::replacePHIInstrs() {
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
     PHIInfo &PI = PHIs[i];
     DEBUG(dbgs() << "If-converting " << *PI.PHI);
-    assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands.");
     unsigned DstReg = PI.PHI->getOperand(0).getReg();
     TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
     DEBUG(dbgs() << "          --> " << *llvm::prior(FirstTerm));
@@ -593,6 +592,7 @@ public:
   EarlyIfConverter() : MachineFunctionPass(ID) {}
   void getAnalysisUsage(AnalysisUsage &AU) const;
   bool runOnMachineFunction(MachineFunction &MF);
+  const char *getPassName() const { return "Early If-Conversion"; }
 
 private:
   bool tryConvertIf(MachineBasicBlock*);
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
new file mode 100644
index 000000000000..8a1e2d9c99a8
--- /dev/null
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -0,0 +1,81 @@
+//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Erlang/OTP runtime-compatible garbage collector
+// (e.g. defines safe points, root initialization etc.)
+//
+// The frametable emitter is in ErlangGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+  class ErlangGC : public GCStrategy {
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          DebugLoc DL) const;
+  public:
+    ErlangGC();
+    bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+  };
+
+}
+
+static GCRegistry::Add<ErlangGC>
+X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGC() { }
+
+ErlangGC::ErlangGC() {
+  InitRoots = false;
+  NeededSafePoints = 1 << GC::PostCall;
+  UsesMetadata = true;
+  CustomRoots = false;
+  CustomSafePoints = true;
+}
+
+MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                DebugLoc DL) const {
+  const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo();
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+  return Label;
+}
+
+bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) {
+  for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+       ++BBI)
+    for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+         MI != ME; ++MI)
+
+      if (MI->getDesc().isCall()) {
+
+        // Do not treat tail call sites as safe points.
+        if (MI->getDesc().isTerminator())
+          continue;
+
+        /* Code copied from VisitCallPoint(...) */
+        MachineBasicBlock::iterator RAI = MI; ++RAI;
+        MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc());
+        FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc());
+      }
+
+  return false;
+}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index ed78f1942150..9b0e76fa20cb 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -21,15 +21,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "execution-fix"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 2c4a93543cc3..b2b68828a226 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index ffe4b63c1b11..1611db8d91a3 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -18,11 +18,11 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
@@ -49,8 +49,6 @@ private:
   bool LowerSubregToReg(MachineInstr *MI);
   bool LowerCopy(MachineInstr *MI);
 
-  void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
-                        const TargetRegisterInfo *TRI);
   void TransferImplicitDefs(MachineInstr *MI);
 };
 } // end anonymous namespace
@@ -61,21 +59,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
 INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
                 "Post-RA pseudo instruction expansion pass", false, false)
 
-/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
-/// and the lowered replacement instructions immediately precede it.
-/// Mark the replacement instructions with the dead flag.
-void
-ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
-                               const TargetRegisterInfo *TRI) {
-  for (MachineBasicBlock::iterator MII =
-        prior(MachineBasicBlock::iterator(MI)); ; --MII) {
-    if (MII->addRegisterDead(DstReg, TRI))
-      break;
-    assert(MII != MI->getParent()->begin() &&
-           "copyPhysReg output doesn't reference destination register!");
-  }
-}
-
 /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
 /// replacement instructions immediately precede it.  Copy any implicit-def
 /// operands from MI to the replacement instruction.
@@ -114,6 +97,12 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
 
   DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
+  if (MI->allDefsAreDead()) {
+    MI->setDesc(TII->get(TargetOpcode::KILL));
+    DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+    return true;
+  }
+
   if (DstSubReg == InsReg) {
     // No need to insert an identify copy instruction.
     // Watch out for case like this:
@@ -135,10 +124,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
     MachineBasicBlock::iterator CopyMI = MI;
     --CopyMI;
     CopyMI->addRegisterDefined(DstReg);
-
-    // Transfer the kill/dead flags, if needed.
-    if (MI->getOperand(0).isDead())
-      TransferDeadFlag(MI, DstSubReg, TRI);
     DEBUG(dbgs() << "subreg: " << *CopyMI);
   }
 
@@ -148,6 +133,14 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
 }
 
 bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
+
+  if (MI->allDefsAreDead()) {
+    DEBUG(dbgs() << "dead copy: " << *MI);
+    MI->setDesc(TII->get(TargetOpcode::KILL));
+    DEBUG(dbgs() << "replaced by: " << *MI);
+    return true;
+  }
+
   MachineOperand &DstMO = MI->getOperand(0);
   MachineOperand &SrcMO = MI->getOperand(1);
 
@@ -155,7 +148,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
     DEBUG(dbgs() << "identity copy: " << *MI);
     // No need to insert an identity copy instruction, but replace with a KILL
     // if liveness is changed.
-    if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+    if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
       // We must make sure the super-register gets killed. Replace the
       // instruction with KILL.
       MI->setDesc(TII->get(TargetOpcode::KILL));
@@ -171,8 +164,6 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
   TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
                    DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
 
-  if (DstMO.isDead())
-    TransferDeadFlag(MI, DstMO.getReg(), TRI);
   if (MI->getNumOperands() > 2)
     TransferImplicitDefs(MI);
   DEBUG({
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 1caf8c233976..ef5247c2edff 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -14,10 +14,10 @@
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -37,21 +37,9 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const;
     
     bool runOnFunction(Function &F);
-  };
-  
-  class Deleter : public FunctionPass {
-    static char ID;
-    
-  public:
-    Deleter();
-    
-    const char *getPassName() const;
-    void getAnalysisUsage(AnalysisUsage &AU) const;
-    
-    bool runOnFunction(Function &F);
     bool doFinalization(Module &M);
   };
-  
+
 }
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
@@ -182,32 +170,9 @@ bool Printer::runOnFunction(Function &F) {
   return false;
 }
 
-// -----------------------------------------------------------------------------
-
-char Deleter::ID = 0;
-
-FunctionPass *llvm::createGCInfoDeleter() {
-  return new Deleter();
-}
-
-Deleter::Deleter() : FunctionPass(ID) {}
-
-const char *Deleter::getPassName() const {
-  return "Delete Garbage Collector Information";
-}
-
-void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<GCModuleInfo>();
-}
-
-bool Deleter::runOnFunction(Function &MF) {
-  return false;
-}
-
-bool Deleter::doFinalization(Module &M) {
+bool Printer::doFinalization(Module &M) {
   GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
-  assert(GMI && "Deleter didn't require GCModuleInfo?!");
+  assert(GMI && "Printer didn't require GCModuleInfo?!");
   GMI->clear();
   return false;
 }
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index f4755bb1635c..1173d1102125 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -16,22 +16,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 31e36f0168cb..9958d7daada8 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -12,24 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ifcvt"
-#include "BranchFolding.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 // Hidden options for help debugging.
@@ -150,7 +151,7 @@ namespace {
     /// basic block number.
     std::vector<BBInfo> BBAnalysis;
 
-    const TargetLowering *TLI;
+    const TargetLoweringBase *TLI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     const InstrItineraryData *InstrItins;
@@ -994,14 +995,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
         Redefs.erase(*SubRegs);
     }
   }
+  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
     unsigned Reg = Defs[i];
     if (!Redefs.insert(Reg)) {
       if (AddImpUse)
         // Treat predicated update as read + write.
-        MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
-                                              true/*IsImp*/,false/*IsKill*/,
-                                              false/*IsDead*/,true/*IsUndef*/));
+        MIB.addReg(Reg, RegState::Implicit | RegState::Undef);
     } else {
       for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
         Redefs.insert(*SubRegs);
@@ -1557,7 +1557,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
     if (Succ == FallThrough)
       continue;
     FromBBI.BB->removeSuccessor(Succ);
-    if (AddEdges)
+    if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
       ToBBI.BB->addSuccessor(Succ);
   }
 
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 37828a70b56f..c6d1a18dbd06 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -22,16 +21,17 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 1541bf0c8512..a8e711e33bdf 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -13,9 +13,9 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "InterferenceCache.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 3c928a50864b..c02fb9a1ee24 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_INTERFERENCECACHE
 #define LLVM_CODEGEN_INTERFERENCECACHE
 
-#include "LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 6120ae56b4a7..07f0ccf52f8c 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 
 template <class ArgIt>
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index fee0347ea659..81ef1aa89dd4 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG
 type = Library
 name = CodeGen
 parent = Libraries
-required_libraries = Analysis Core MC Scalar Support Target TransformUtils
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 24daafaa62e1..1a0983783484 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,30 +11,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 // Enable or disable FastISel. Both options are needed, because
@@ -79,6 +79,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
          "and that InitializeAllTargetMCs() is being invoked!");
 }
 
+void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+}
+
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
                                           PassManagerBase &PM,
@@ -96,6 +100,8 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
 
   PassConfig->addIRPasses();
 
+  PassConfig->addCodeGenPrepare();
+
   PassConfig->addPassesToHandleExceptions();
 
   PassConfig->addISelPrepare();
@@ -191,7 +197,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
                                                          STI, *Context);
-    MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+    MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
+                                                       TargetCPU);
     if (MCE == 0 || MAB == 0)
       return true;
 
@@ -199,7 +206,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                                          *Context, *MAB, Out,
                                                          MCE, hasMCRelaxAll(),
                                                          hasMCNoExecStack()));
-    AsmStreamer.get()->InitSections();
+    AsmStreamer.get()->setAutoInitSections(true);
     break;
   }
   case CGFT_Null:
@@ -219,7 +226,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
   PM.add(Printer);
 
-  PM.add(createGCInfoDeleter());
   return false;
 }
 
@@ -238,7 +244,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
     return true;
 
   addCodeEmitter(PM, JCE);
-  PM.add(createGCInfoDeleter());
 
   return false; // success!
 }
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 6b6b9d084e1f..81721541cd89 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -16,10 +16,10 @@
 
 #define DEBUG_TYPE "lexicalscopes"
 #include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
@@ -314,24 +314,22 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
 void LexicalScope::anchor() { }
 
 /// dump - Print data structures.
-void LexicalScope::dump() const {
+void LexicalScope::dump(unsigned Indent) const {
 #ifndef NDEBUG
   raw_ostream &err = dbgs();
-  err.indent(IndentLevel);
+  err.indent(Indent);
   err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
   const MDNode *N = Desc;
+  err.indent(Indent);
   N->dump();
   if (AbstractScope)
-    err << "Abstract Scope\n";
+    err << std::string(Indent, ' ') << "Abstract Scope\n";
 
-  IndentLevel += 2;
   if (!Children.empty())
-    err << "Children ...\n";
+    err << std::string(Indent + 2, ' ') << "Children ...\n";
   for (unsigned i = 0, e = Children.size(); i != e; ++i)
     if (Children[i] != this)
-      Children[i]->dump();
-
-  IndentLevel -= 2;
+      Children[i]->dump(Indent + 2);
 #endif
 }
 
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index defc1279ec8c..0b117ac6566b 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -21,11 +21,6 @@
 
 #define DEBUG_TYPE "livedebug"
 #include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Metadata.h"
-#include "llvm/Value.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LexicalScopes.h"
@@ -35,6 +30,11 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -247,10 +247,6 @@ public:
                         LiveIntervals &LIS, MachineDominatorTree &MDT,
                         UserValueScopes &UVS);
 
-  /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
-  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
-                      const TargetRegisterInfo *TRI);
-
   /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
   /// live. Returns true if any changes were made.
   bool splitRegister(unsigned OldLocNo, ArrayRef<LiveInterval*> NewRegs);
@@ -259,7 +255,7 @@ public:
   /// provided virtual register map.
   void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
 
-  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
   void emitDebugValues(VirtRegMap *VRM,
                        LiveIntervals &LIS, const TargetInstrInfo &TRI);
 
@@ -286,6 +282,11 @@ class LDVImpl {
   MachineDominatorTree *MDT;
   const TargetRegisterInfo *TRI;
 
+  /// Whether emitDebugValues is called.
+  bool EmitDone;
+  /// Whether the machine function is modified during the pass.
+  bool ModifiedMF;
+
   /// userValues - All allocated UserValue instances.
   SmallVector<UserValue*, 8> userValues;
 
@@ -320,27 +321,30 @@ class LDVImpl {
   void computeIntervals();
 
 public:
-  LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+  LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
+                                    ModifiedMF(false) {}
   bool runOnMachineFunction(MachineFunction &mf);
 
-  /// clear - Relase all memory.
+  /// clear - Release all memory.
   void clear() {
     DeleteContainerPointers(userValues);
     userValues.clear();
     virtRegToEqClass.clear();
     userVarMap.clear();
+    // Make sure we call emitDebugValues if the machine function was modified.
+    assert((!ModifiedMF || EmitDone) &&
+           "Dbg values are not emitted in LDV");
+    EmitDone = false;
+    ModifiedMF = false;
   }
 
   /// mapVirtReg - Map virtual register to an equivalence class.
   void mapVirtReg(unsigned VirtReg, UserValue *EC);
 
-  /// renameRegister - Replace all references to OldReg with NewReg:SubIdx.
-  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
-
   /// splitRegister -  Replace all references to OldReg with NewRegs.
   void splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs);
 
-  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
   void emitDebugValues(VirtRegMap *VRM);
 
   void print(raw_ostream&);
@@ -693,6 +697,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
   computeIntervals();
   DEBUG(print(dbgs()));
   LS.releaseMemory();
+  ModifiedMF = Changed;
   return Changed;
 }
 
@@ -714,45 +719,6 @@ LiveDebugVariables::~LiveDebugVariables() {
     delete static_cast<LDVImpl*>(pImpl);
 }
 
-void UserValue::
-renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
-               const TargetRegisterInfo *TRI) {
-  for (unsigned i = locations.size(); i; --i) {
-    unsigned LocNo = i - 1;
-    MachineOperand &Loc = locations[LocNo];
-    if (!Loc.isReg() || Loc.getReg() != OldReg)
-      continue;
-    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
-      Loc.substPhysReg(NewReg, *TRI);
-    else
-      Loc.substVirtReg(NewReg, SubIdx, *TRI);
-    coalesceLocation(LocNo);
-  }
-}
-
-void LDVImpl::
-renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
-  UserValue *UV = lookupVirtReg(OldReg);
-  if (!UV)
-    return;
-
-  if (TargetRegisterInfo::isVirtualRegister(NewReg))
-    mapVirtReg(NewReg, UV);
-  if (OldReg != NewReg)
-    virtRegToEqClass.erase(OldReg);
-
-  do {
-    UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
-    UV = UV->getNext();
-  } while (UV);
-}
-
-void LiveDebugVariables::
-renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
-  if (pImpl)
-    static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
-}
-
 //===----------------------------------------------------------------------===//
 //                           Live Range Splitting
 //===----------------------------------------------------------------------===//
@@ -1011,6 +977,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
     userValues[i]->rewriteLocations(*VRM, *TRI);
     userValues[i]->emitDebugValues(VRM, *LIS, *TII);
   }
+  EmitDone = true;
 }
 
 void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 8585cbb30dee..dccd847d070c 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,15 +19,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "RegisterCoalescer.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -440,7 +440,7 @@ void LiveInterval::join(LiveInterval &Other,
 
     iterator OutIt = begin();
     OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
-    for (iterator I = next(OutIt), E = end(); I != E; ++I) {
+    for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) {
       VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
       assert(nextValNo != 0 && "Huh?");
 
@@ -464,10 +464,12 @@ void LiveInterval::join(LiveInterval &Other,
     ranges.erase(OutIt, end());
   }
 
-  // Remember assignements because val# ids are changing.
-  SmallVector<unsigned, 16> OtherAssignments;
+  // Rewrite Other values before changing the VNInfo ids.
+  // This can leave Other in an invalid state because we're not coalescing
+  // touching segments that now have identical values. That's OK since Other is
+  // not supposed to be valid after calling join();
   for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
-    OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+    I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]];
 
   // Update val# info. Renumber them and make sure they all belong to this
   // LiveInterval now. Also remove dead val#'s.
@@ -486,148 +488,9 @@ void LiveInterval::join(LiveInterval &Other,
     valnos.resize(NumNewVals);  // shrinkify
 
   // Okay, now insert the RHS live ranges into the LHS.
-  unsigned RangeNo = 0;
-  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
-    // Map the valno in the other live range to the current live range.
-    I->valno = NewVNInfo[OtherAssignments[RangeNo]];
-    assert(I->valno && "Adding a dead range?");
-  }
-  mergeIntervalRanges(Other);
-
-  verify();
-}
-
-/// \brief Helper function for merging in another LiveInterval's ranges.
-///
-/// This is a helper routine implementing an efficient merge of another
-/// LiveIntervals ranges into the current interval.
-///
-/// \param LHSValNo If non-NULL, set as the new value number for every range
-///                 from RHS which is merged into the LHS.
-/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value
-///                 number maches this value number will be merged into LHS.
-void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
-                                       VNInfo *LHSValNo,
-                                       const VNInfo *RHSValNo) {
-  if (RHS.empty())
-    return;
-
-  // Ensure we're starting with a valid range. Note that we don't verify RHS
-  // because it may have had its value numbers adjusted in preparation for
-  // merging.
-  verify();
-
-  // The strategy for merging these efficiently is as follows:
-  //
-  // 1) Find the beginning of the impacted ranges in the LHS.
-  // 2) Create a new, merged sub-squence of ranges merging from the position in
-  //    #1 until either LHS or RHS is exhausted. Any part of LHS between RHS
-  //    entries being merged will be copied into this new range.
-  // 3) Replace the relevant section in LHS with these newly merged ranges.
-  // 4) Append any remaning ranges from RHS if LHS is exhausted in #2.
-  //
-  // We don't follow the typical in-place merge strategy for sorted ranges of
-  // appending the new ranges to the back and then using std::inplace_merge
-  // because one step of the merge can both mutate the original elements and
-  // remove elements from the original. Essentially, because the merge includes
-  // collapsing overlapping ranges, a more complex approach is required.
-
-  // We do an initial binary search to optimize for a common pattern: a large
-  // LHS, and a very small RHS.
-  const_iterator RI = RHS.begin(), RE = RHS.end();
-  iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI);
-
-  // Merge into NewRanges until one of the ranges is exhausted.
-  SmallVector<LiveRange, 4> NewRanges;
-
-  // Keep track of where to begin the replacement.
-  iterator ReplaceI = LI;
-
-  // If there are preceding ranges in the LHS, put the last one into NewRanges
-  // so we can optionally extend it. Adjust the replacement point accordingly.
-  if (LI != begin()) {
-    ReplaceI = llvm::prior(LI);
-    NewRanges.push_back(*ReplaceI);
-  }
-
-  // Now loop over the mergable portions of both LHS and RHS, merging into
-  // NewRanges.
-  while (LI != LE && RI != RE) {
-    // Skip incoming ranges with the wrong value.
-    if (RHSValNo && RI->valno != RHSValNo) {
-      ++RI;
-      continue;
-    }
-
-    // Select the first range. We pick the earliest start point, and then the
-    // largest range.
-    LiveRange R = *LI;
-    if (*RI < R) {
-      R = *RI;
-      ++RI;
-      if (LHSValNo)
-        R.valno = LHSValNo;
-    } else {
-      ++LI;
-    }
-
-    if (NewRanges.empty()) {
-      NewRanges.push_back(R);
-      continue;
-    }
-
-    LiveRange &LastR = NewRanges.back();
-    if (R.valno == LastR.valno) {
-      // Try to merge this range into the last one.
-      if (R.start <= LastR.end) {
-        LastR.end = std::max(LastR.end, R.end);
-        continue;
-      }
-    } else {
-      // We can't merge ranges across a value number.
-      assert(R.start >= LastR.end &&
-             "Cannot overlap two LiveRanges with differing ValID's");
-    }
-
-    // If all else fails, just append the range.
-    NewRanges.push_back(R);
-  }
-  assert(RI == RE || LI == LE);
-
-  // Check for being able to merge into the trailing sequence of ranges on the LHS.
-  if (!NewRanges.empty())
-    for (; LI != LE && (LI->valno == NewRanges.back().valno &&
-                        LI->start <= NewRanges.back().end);
-         ++LI)
-      NewRanges.back().end = std::max(NewRanges.back().end, LI->end);
-
-  // Replace the ranges in the LHS with the newly merged ones. It would be
-  // really nice if there were a move-supporting 'replace' directly in
-  // SmallVector, but as there is not, we pay the price of copies to avoid
-  // wasted memory allocations.
-  SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(),
-                                       NRE = NewRanges.end();
-  for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI)
-    *ReplaceI = *NRI;
-  if (NRI == NRE)
-    ranges.erase(ReplaceI, LI);
-  else
-    ranges.insert(LI, NRI, NRE);
-
-  // And finally insert any trailing end of RHS (if we have one).
-  for (; RI != RE; ++RI) {
-    LiveRange R = *RI;
-    if (LHSValNo)
-      R.valno = LHSValNo;
-    if (!ranges.empty() &&
-        ranges.back().valno == R.valno && R.start <= ranges.back().end)
-      ranges.back().end = std::max(ranges.back().end, R.end);
-    else
-      ranges.push_back(R);
-  }
-
-  // Ensure we finished with a valid new sequence of ranges.
-  verify();
+  LiveRangeUpdater Updater(this);
+  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+    Updater.add(*I);
 }
 
 /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -636,7 +499,9 @@ void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
 /// the overlapping LiveRanges have the specified value number.
 void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
                                         VNInfo *LHSValNo) {
-  mergeIntervalRanges(RHS, LHSValNo);
+  LiveRangeUpdater Updater(this);
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+    Updater.add(I->start, I->end, LHSValNo);
 }
 
 /// MergeValueInAsValue - Merge all of the live ranges of a specific val#
@@ -647,7 +512,10 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
 void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
                                        const VNInfo *RHSValNo,
                                        VNInfo *LHSValNo) {
-  mergeIntervalRanges(RHS, LHSValNo, RHSValNo);
+  LiveRangeUpdater Updater(this);
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+    if (I->valno == RHSValNo)
+      Updater.add(I->start, I->end, LHSValNo);
 }
 
 /// MergeValueNumberInto - This method is called when two value nubmers
@@ -785,6 +653,206 @@ void LiveRange::print(raw_ostream &os) const {
   os << *this;
 }
 
+//===----------------------------------------------------------------------===//
+//                           LiveRangeUpdater class
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeUpdater class always maintains these invariants:
+//
+// - When LastStart is invalid, Spills is empty and the iterators are invalid.
+//   This is the initial state, and the state created by flush().
+//   In this state, isDirty() returns false.
+//
+// Otherwise, segments are kept in three separate areas:
+//
+// 1. [begin; WriteI) at the front of LI.
+// 2. [ReadI; end) at the back of LI.
+// 3. Spills.
+//
+// - LI.begin() <= WriteI <= ReadI <= LI.end().
+// - Segments in all three areas are fully ordered and coalesced.
+// - Segments in area 1 precede and can't coalesce with segments in area 2.
+// - Segments in Spills precede and can't coalesce with segments in area 2.
+// - No coalescing is possible between segments in Spills and segments in area
+//   1, and there are no overlapping segments.
+//
+// The segments in Spills are not ordered with respect to the segments in area
+// 1. They need to be merged.
+//
+// When they exist, Spills.back().start <= LastStart,
+//                 and WriteI[-1].start <= LastStart.
+
+void LiveRangeUpdater::print(raw_ostream &OS) const {
+  if (!isDirty()) {
+    if (LI)
+      OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n';
+    else
+      OS << "Null updater.\n";
+    return;
+  }
+  assert(LI && "Can't have null LI in dirty updater.");
+  OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI)
+     << ", last start = " << LastStart
+     << ":\n  Area 1:";
+  for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I)
+    OS << ' ' << *I;
+  OS << "\n  Spills:";
+  for (unsigned I = 0, E = Spills.size(); I != E; ++I)
+    OS << ' ' << Spills[I];
+  OS << "\n  Area 2:";
+  for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I)
+    OS << ' ' << *I;
+  OS << '\n';
+}
+
+void LiveRangeUpdater::dump() const
+{
+  print(errs());
+}
+
+// Determine if A and B should be coalesced.
+static inline bool coalescable(const LiveRange &A, const LiveRange &B) {
+  assert(A.start <= B.start && "Unordered live ranges.");
+  if (A.end == B.start)
+    return A.valno == B.valno;
+  if (A.end < B.start)
+    return false;
+  assert(A.valno == B.valno && "Cannot overlap different values");
+  return true;
+}
+
+void LiveRangeUpdater::add(LiveRange Seg) {
+  assert(LI && "Cannot add to a null destination");
+
+  // Flush the state if Start moves backwards.
+  if (!LastStart.isValid() || LastStart > Seg.start) {
+    if (isDirty())
+      flush();
+    // This brings us to an uninitialized state. Reinitialize.
+    assert(Spills.empty() && "Leftover spilled segments");
+    WriteI = ReadI = LI->begin();
+  }
+
+  // Remember start for next time.
+  LastStart = Seg.start;
+
+  // Advance ReadI until it ends after Seg.start.
+  LiveInterval::iterator E = LI->end();
+  if (ReadI != E && ReadI->end <= Seg.start) {
+    // First try to close the gap between WriteI and ReadI with spills.
+    if (ReadI != WriteI)
+      mergeSpills();
+    // Then advance ReadI.
+    if (ReadI == WriteI)
+      ReadI = WriteI = LI->find(Seg.start);
+    else
+      while (ReadI != E && ReadI->end <= Seg.start)
+        *WriteI++ = *ReadI++;
+  }
+
+  assert(ReadI == E || ReadI->end > Seg.start);
+
+  // Check if the ReadI segment begins early.
+  if (ReadI != E && ReadI->start <= Seg.start) {
+    assert(ReadI->valno == Seg.valno && "Cannot overlap different values");
+    // Bail if Seg is completely contained in ReadI.
+    if (ReadI->end >= Seg.end)
+      return;
+    // Coalesce into Seg.
+    Seg.start = ReadI->start;
+    ++ReadI;
+  }
+
+  // Coalesce as much as possible from ReadI into Seg.
+  while (ReadI != E && coalescable(Seg, *ReadI)) {
+    Seg.end = std::max(Seg.end, ReadI->end);
+    ++ReadI;
+  }
+
+  // Try coalescing Spills.back() into Seg.
+  if (!Spills.empty() && coalescable(Spills.back(), Seg)) {
+    Seg.start = Spills.back().start;
+    Seg.end = std::max(Spills.back().end, Seg.end);
+    Spills.pop_back();
+  }
+
+  // Try coalescing Seg into WriteI[-1].
+  if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) {
+    WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
+    return;
+  }
+
+  // Seg doesn't coalesce with anything, and needs to be inserted somewhere.
+  if (WriteI != ReadI) {
+    *WriteI++ = Seg;
+    return;
+  }
+
+  // Finally, append to LI or Spills.
+  if (WriteI == E) {
+    LI->ranges.push_back(Seg);
+    WriteI = ReadI = LI->ranges.end();
+  } else
+    Spills.push_back(Seg);
+}
+
+// Merge as many spilled segments as possible into the gap between WriteI
+// and ReadI. Advance WriteI to reflect the inserted instructions.
+void LiveRangeUpdater::mergeSpills() {
+  // Perform a backwards merge of Spills and [SpillI;WriteI).
+  size_t GapSize = ReadI - WriteI;
+  size_t NumMoved = std::min(Spills.size(), GapSize);
+  LiveInterval::iterator Src = WriteI;
+  LiveInterval::iterator Dst = Src + NumMoved;
+  LiveInterval::iterator SpillSrc = Spills.end();
+  LiveInterval::iterator B = LI->begin();
+
+  // This is the new WriteI position after merging spills.
+  WriteI = Dst;
+
+  // Now merge Src and Spills backwards.
+  while (Src != Dst) {
+    if (Src != B && Src[-1].start > SpillSrc[-1].start)
+      *--Dst = *--Src;
+    else
+      *--Dst = *--SpillSrc;
+  }
+  assert(NumMoved == size_t(Spills.end() - SpillSrc));
+  Spills.erase(SpillSrc, Spills.end());
+}
+
+void LiveRangeUpdater::flush() {
+  if (!isDirty())
+    return;
+  // Clear the dirty state.
+  LastStart = SlotIndex();
+
+  assert(LI && "Cannot add to a null destination");
+
+  // Nothing to merge?
+  if (Spills.empty()) {
+    LI->ranges.erase(WriteI, ReadI);
+    LI->verify();
+    return;
+  }
+
+  // Resize the WriteI - ReadI gap to match Spills.
+  size_t GapSize = ReadI - WriteI;
+  if (GapSize < Spills.size()) {
+    // The gap is too small. Make some room.
+    size_t WritePos = WriteI - LI->begin();
+    LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange());
+    // This also invalidated ReadI, but it is recomputed below.
+    WriteI = LI->ranges.begin() + WritePos;
+  } else {
+    // Shrink the gap if necessary.
+    LI->ranges.erase(WriteI + Spills.size(), ReadI);
+  }
+  ReadI = WriteI + Spills.size();
+  mergeSpills();
+  LI->verify();
+}
+
 unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
   // Create initial equivalence classes.
   EqClass.clear();
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 4e75d892e523..f1b839481131 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -17,34 +17,29 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Value.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "LiveRangeCalc.h"
-#include "VirtRegMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
-#include <limits>
 #include <cmath>
+#include <limits>
 using namespace llvm;
 
-// Switch to the new experimental algorithm for computing live intervals.
-static cl::opt<bool>
-NewLiveIntervals("new-live-intervals", cl::Hidden,
-                 cl::desc("Use new algorithm forcomputing live intervals"));
-
 char LiveIntervals::ID = 0;
 char &llvm::LiveIntervalsID = LiveIntervals::ID;
 INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
@@ -60,6 +55,9 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<AliasAnalysis>();
   AU.addPreserved<AliasAnalysis>();
+  // LiveVariables isn't really required by this analysis, it is only required
+  // here to make sure it is live during TwoAddressInstructionPass and
+  // PHIElimination. This is temporary.
   AU.addRequired<LiveVariables>();
   AU.addPreserved<LiveVariables>();
   AU.addPreservedID(MachineLoopInfoID);
@@ -105,7 +103,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
   AA = &getAnalysis<AliasAnalysis>();
-  LV = &getAnalysis<LiveVariables>();
   Indexes = &getAnalysis<SlotIndexes>();
   DomTree = &getAnalysis<MachineDominatorTree>();
   if (!LRCalc)
@@ -114,16 +111,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   // Allocate space for all virtual registers.
   VirtRegIntervals.resize(MRI->getNumVirtRegs());
 
-  if (NewLiveIntervals) {
-    // This is the new way of computing live intervals.
-    // It is independent of LiveVariables, and it can run at any time.
-    computeVirtRegs();
-    computeRegMasks();
-  } else {
-    // This is the old way of computing live intervals.
-    // It depends on LiveVariables.
-    computeIntervals();
-  }
+  computeVirtRegs();
+  computeRegMasks();
   computeLiveInRegUnits();
 
   DEBUG(dump());
@@ -165,298 +154,6 @@ void LiveIntervals::dumpInstrs() const {
 }
 #endif
 
-static
-bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
-  unsigned Reg = MI.getOperand(MOIdx).getReg();
-  for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg())
-      continue;
-    if (MO.getReg() == Reg && MO.isDef()) {
-      assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
-             MI.getOperand(MOIdx).getSubReg() &&
-             (MO.getSubReg() || MO.isImplicit()));
-      return true;
-    }
-  }
-  return false;
-}
-
-/// isPartialRedef - Return true if the specified def at the specific index is
-/// partially re-defining the specified live interval. A common case of this is
-/// a definition of the sub-register.
-bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
-                                   LiveInterval &interval) {
-  if (!MO.getSubReg() || MO.isEarlyClobber())
-    return false;
-
-  SlotIndex RedefIndex = MIIdx.getRegSlot();
-  const LiveRange *OldLR =
-    interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
-  MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
-  if (DefMI != 0) {
-    return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
-  }
-  return false;
-}
-
-void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
-                                             MachineBasicBlock::iterator mi,
-                                             SlotIndex MIIdx,
-                                             MachineOperand& MO,
-                                             unsigned MOIdx,
-                                             LiveInterval &interval) {
-  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI));
-
-  // Virtual registers may be defined multiple times (due to phi
-  // elimination and 2-addr elimination).  Much of what we do only has to be
-  // done once for the vreg.  We use an empty interval to detect the first
-  // time we see a vreg.
-  LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg);
-  if (interval.empty()) {
-    // Get the Idx of the defining instructions.
-    SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
-
-    // Make sure the first definition is not a partial redefinition.
-    assert(!MO.readsReg() && "First def cannot also read virtual register "
-           "missing <undef> flag?");
-
-    VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
-    assert(ValNo->id == 0 && "First value in interval is not 0?");
-
-    // Loop over all of the blocks that the vreg is defined in.  There are
-    // two cases we have to handle here.  The most common case is a vreg
-    // whose lifetime is contained within a basic block.  In this case there
-    // will be a single kill, in MBB, which comes after the definition.
-    if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
-      // FIXME: what about dead vars?
-      SlotIndex killIdx;
-      if (vi.Kills[0] != mi)
-        killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
-      else
-        killIdx = defIndex.getDeadSlot();
-
-      // If the kill happens after the definition, we have an intra-block
-      // live range.
-      if (killIdx > defIndex) {
-        assert(vi.AliveBlocks.empty() &&
-               "Shouldn't be alive across any blocks!");
-        LiveRange LR(defIndex, killIdx, ValNo);
-        interval.addRange(LR);
-        DEBUG(dbgs() << " +" << LR << "\n");
-        return;
-      }
-    }
-
-    // The other case we handle is when a virtual register lives to the end
-    // of the defining block, potentially live across some blocks, then is
-    // live into some number of blocks, but gets killed.  Start by adding a
-    // range that goes from this definition to the end of the defining block.
-    LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
-    DEBUG(dbgs() << " +" << NewLR);
-    interval.addRange(NewLR);
-
-    bool PHIJoin = LV->isPHIJoin(interval.reg);
-
-    if (PHIJoin) {
-      // A phi join register is killed at the end of the MBB and revived as a
-      // new valno in the killing blocks.
-      assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
-      DEBUG(dbgs() << " phi-join");
-    } else {
-      // Iterate over all of the blocks that the variable is completely
-      // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
-      // live interval.
-      for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
-               E = vi.AliveBlocks.end(); I != E; ++I) {
-        MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I);
-        LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock),
-                     ValNo);
-        interval.addRange(LR);
-        DEBUG(dbgs() << " +" << LR);
-      }
-    }
-
-    // Finally, this virtual register is live from the start of any killing
-    // block to the 'use' slot of the killing instruction.
-    for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
-      MachineInstr *Kill = vi.Kills[i];
-      SlotIndex Start = getMBBStartIdx(Kill->getParent());
-      SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();
-
-      // Create interval with one of a NEW value number.  Note that this value
-      // number isn't actually defined by an instruction, weird huh? :)
-      if (PHIJoin) {
-        assert(getInstructionFromIndex(Start) == 0 &&
-               "PHI def index points at actual instruction.");
-        ValNo = interval.getNextValue(Start, VNInfoAllocator);
-      }
-      LiveRange LR(Start, killIdx, ValNo);
-      interval.addRange(LR);
-      DEBUG(dbgs() << " +" << LR);
-    }
-
-  } else {
-    if (MultipleDefsBySameMI(*mi, MOIdx))
-      // Multiple defs of the same virtual register by the same instruction.
-      // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-      // This is likely due to elimination of REG_SEQUENCE instructions. Return
-      // here since there is nothing to do.
-      return;
-
-    // If this is the second time we see a virtual register definition, it
-    // must be due to phi elimination or two addr elimination.  If this is
-    // the result of two address elimination, then the vreg is one of the
-    // def-and-use register operand.
-
-    // It may also be partial redef like this:
-    // 80  %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
-    // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
-    bool PartReDef = isPartialRedef(MIIdx, MO, interval);
-    if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
-      // If this is a two-address definition, then we have already processed
-      // the live range.  The only problem is that we didn't realize there
-      // are actually two values in the live interval.  Because of this we
-      // need to take the LiveRegion that defines this register and split it
-      // into two values.
-      SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
-
-      const LiveRange *OldLR =
-        interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
-      VNInfo *OldValNo = OldLR->valno;
-      SlotIndex DefIndex = OldValNo->def.getRegSlot();
-
-      // Delete the previous value, which should be short and continuous,
-      // because the 2-addr copy must be in the same MBB as the redef.
-      interval.removeRange(DefIndex, RedefIndex);
-
-      // The new value number (#1) is defined by the instruction we claimed
-      // defined value #0.
-      VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
-
-      // Value#0 is now defined by the 2-addr instruction.
-      OldValNo->def = RedefIndex;
-
-      // Add the new live interval which replaces the range for the input copy.
-      LiveRange LR(DefIndex, RedefIndex, ValNo);
-      DEBUG(dbgs() << " replace range with " << LR);
-      interval.addRange(LR);
-
-      // If this redefinition is dead, we need to add a dummy unit live
-      // range covering the def slot.
-      if (MO.isDead())
-        interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
-                                    OldValNo));
-
-      DEBUG(dbgs() << " RESULT: " << interval);
-    } else if (LV->isPHIJoin(interval.reg)) {
-      // In the case of PHI elimination, each variable definition is only
-      // live until the end of the block.  We've already taken care of the
-      // rest of the live range.
-
-      SlotIndex defIndex = MIIdx.getRegSlot();
-      if (MO.isEarlyClobber())
-        defIndex = MIIdx.getRegSlot(true);
-
-      VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
-
-      SlotIndex killIndex = getMBBEndIdx(mbb);
-      LiveRange LR(defIndex, killIndex, ValNo);
-      interval.addRange(LR);
-      DEBUG(dbgs() << " phi-join +" << LR);
-    } else {
-      llvm_unreachable("Multiply defined register");
-    }
-  }
-
-  DEBUG(dbgs() << '\n');
-}
-
-void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
-                                      MachineBasicBlock::iterator MI,
-                                      SlotIndex MIIdx,
-                                      MachineOperand& MO,
-                                      unsigned MOIdx) {
-  if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-    handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
-                             getOrCreateInterval(MO.getReg()));
-}
-
-/// computeIntervals - computes the live intervals for virtual
-/// registers. for some ordering of the machine instructions [1,N] a
-/// live interval is an interval [i, j) where 1 <= i <= j < N for
-/// which a variable is live
-void LiveIntervals::computeIntervals() {
-  DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
-               << "********** Function: " << MF->getName() << '\n');
-
-  RegMaskBlocks.resize(MF->getNumBlockIDs());
-
-  SmallVector<unsigned, 8> UndefUses;
-  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
-       MBBI != E; ++MBBI) {
-    MachineBasicBlock *MBB = MBBI;
-    RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
-
-    if (MBB->empty())
-      continue;
-
-    // Track the index of the current machine instr.
-    SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(dbgs() << "BB#" << MBB->getNumber()
-          << ":\t\t# derived from " << MBB->getName() << "\n");
-
-    // Skip over empty initial indices.
-    if (getInstructionFromIndex(MIIndex) == 0)
-      MIIndex = Indexes->getNextNonNullIndex(MIIndex);
-
-    for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
-         MI != miEnd; ++MI) {
-      DEBUG(dbgs() << MIIndex << "\t" << *MI);
-      if (MI->isDebugValue())
-        continue;
-      assert(Indexes->getInstructionFromIndex(MIIndex) == MI &&
-             "Lost SlotIndex synchronization");
-
-      // Handle defs.
-      for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
-        MachineOperand &MO = MI->getOperand(i);
-
-        // Collect register masks.
-        if (MO.isRegMask()) {
-          RegMaskSlots.push_back(MIIndex.getRegSlot());
-          RegMaskBits.push_back(MO.getRegMask());
-          continue;
-        }
-
-        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-          continue;
-
-        // handle register defs - build intervals
-        if (MO.isDef())
-          handleRegisterDef(MBB, MI, MIIndex, MO, i);
-        else if (MO.isUndef())
-          UndefUses.push_back(MO.getReg());
-      }
-
-      // Move to the next instr slot.
-      MIIndex = Indexes->getNextNonNullIndex(MIIndex);
-    }
-
-    // Compute the number of register mask instructions in this block.
-    std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
-    RMB.second = RegMaskSlots.size() - RMB.first;
-  }
-
-  // Create empty intervals for registers defined by implicit_def's (except
-  // for those implicit_def that define values which are liveout of their
-  // blocks.
-  for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
-    unsigned UndefReg = UndefUses[i];
-    (void)getOrCreateInterval(UndefReg);
-  }
-}
-
 LiveInterval* LiveIntervals::createInterval(unsigned reg) {
   float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
   return new LiveInterval(reg, Weight);
@@ -1275,9 +972,9 @@ private:
 
   // Return the last use of reg between NewIdx and OldIdx.
   SlotIndex findLastUseBefore(unsigned Reg) {
-    SlotIndex LastUse = NewIdx;
 
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      SlotIndex LastUse = NewIdx;
       for (MachineRegisterInfo::use_nodbg_iterator
              UI = MRI.use_nodbg_begin(Reg),
              UE = MRI.use_nodbg_end();
@@ -1287,26 +984,42 @@ private:
         if (InstSlot > LastUse && InstSlot < OldIdx)
           LastUse = InstSlot;
       }
-    } else {
-      MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx);
-      MachineBasicBlock::iterator MII(MI);
-      ++MII;
-      MachineBasicBlock* MBB = MI->getParent();
-      for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){
-        for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
-                                        MOE = MII->operands_end();
-             MOI != MOE; ++MOI) {
-          const MachineOperand& mop = *MOI;
-          if (!mop.isReg() || mop.getReg() == 0 ||
-              TargetRegisterInfo::isVirtualRegister(mop.getReg()))
-            continue;
-
-          if (TRI.hasRegUnit(mop.getReg(), Reg))
-            LastUse = LIS.getInstructionIndex(MII);
-        }
-      }
+      return LastUse;
     }
-    return LastUse;
+
+    // This is a regunit interval, so scanning the use list could be very
+    // expensive. Scan upwards from OldIdx instead.
+    assert(NewIdx < OldIdx && "Expected upwards move");
+    SlotIndexes *Indexes = LIS.getSlotIndexes();
+    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx);
+
+    // OldIdx may not correspond to an instruction any longer, so set MII to
+    // point to the next instruction after OldIdx, or MBB->end().
+    MachineBasicBlock::iterator MII = MBB->end();
+    if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+                           Indexes->getNextNonNullIndex(OldIdx)))
+      if (MI->getParent() == MBB)
+        MII = MI;
+
+    MachineBasicBlock::iterator Begin = MBB->begin();
+    while (MII != Begin) {
+      if ((--MII)->isDebugValue())
+        continue;
+      SlotIndex Idx = Indexes->getInstructionIndex(MII);
+
+      // Stop searching when NewIdx is reached.
+      if (!SlotIndex::isEarlierInstr(NewIdx, Idx))
+        return NewIdx;
+
+      // Check if MII uses Reg.
+      for (MIBundleOperands MO(MII); MO.isValid(); ++MO)
+        if (MO->isReg() &&
+            TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+            TRI.hasRegUnit(MO->getReg(), Reg))
+          return Idx;
+    }
+    // Didn't reach NewIdx. It must be the first instruction in the block.
+    return NewIdx;
   }
 };
 
@@ -1331,3 +1044,129 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
   HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
   HME.updateAllRanges(MI);
 }
+
+void
+LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator Begin,
+                                      MachineBasicBlock::iterator End,
+                                      ArrayRef<unsigned> OrigRegs) {
+  // Find anchor points, which are at the beginning/end of blocks or at
+  // instructions that already have indexes.
+  while (Begin != MBB->begin() && !Indexes->hasIndex(Begin))
+    --Begin;
+  while (End != MBB->end() && !Indexes->hasIndex(End))
+    ++End;
+
+  SlotIndex endIdx;
+  if (End == MBB->end())
+    endIdx = getMBBEndIdx(MBB).getPrevSlot();
+  else
+    endIdx = getInstructionIndex(End);
+
+  Indexes->repairIndexesInRange(MBB, Begin, End);
+
+  for (MachineBasicBlock::iterator I = End; I != Begin;) {
+    --I;
+    MachineInstr *MI = I;
+    if (MI->isDebugValue())
+      continue;
+    for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+         MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+      if (MOI->isReg() &&
+          TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+          !hasInterval(MOI->getReg())) {
+        LiveInterval &LI = getOrCreateInterval(MOI->getReg());
+        computeVirtRegInterval(&LI);
+      }
+    }
+  }
+
+  for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
+    unsigned Reg = OrigRegs[i];
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    LiveInterval &LI = getInterval(Reg);
+    // FIXME: Should we support undefs that gain defs?
+    if (!LI.hasAtLeastOneValue())
+      continue;
+
+    LiveInterval::iterator LII = LI.find(endIdx);
+    SlotIndex lastUseIdx;
+    if (LII != LI.end() && LII->start < endIdx)
+      lastUseIdx = LII->end;
+    else
+      --LII;
+
+    for (MachineBasicBlock::iterator I = End; I != Begin;) {
+      --I;
+      MachineInstr *MI = I;
+      if (MI->isDebugValue())
+        continue;
+
+      SlotIndex instrIdx = getInstructionIndex(MI);
+      bool isStartValid = getInstructionFromIndex(LII->start);
+      bool isEndValid = getInstructionFromIndex(LII->end);
+
+      // FIXME: This doesn't currently handle early-clobber or multiple removed
+      // defs inside of the region to repair.
+      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+           OE = MI->operands_end(); OI != OE; ++OI) {
+        const MachineOperand &MO = *OI;
+        if (!MO.isReg() || MO.getReg() != Reg)
+          continue;
+
+        if (MO.isDef()) {
+          if (!isStartValid) {
+            if (LII->end.isDead()) {
+              SlotIndex prevStart;
+              if (LII != LI.begin())
+                prevStart = llvm::prior(LII)->start;
+
+              // FIXME: This could be more efficient if there was a removeRange
+              // method that returned an iterator.
+              LI.removeRange(*LII, true);
+              if (prevStart.isValid())
+                LII = LI.find(prevStart);
+              else
+                LII = LI.begin();
+            } else {
+              LII->start = instrIdx.getRegSlot();
+              LII->valno->def = instrIdx.getRegSlot();
+              if (MO.getSubReg() && !MO.isUndef())
+                lastUseIdx = instrIdx.getRegSlot();
+              else
+                lastUseIdx = SlotIndex();
+              continue;
+            }
+          }
+
+          if (!lastUseIdx.isValid()) {
+            VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+                                          VNInfoAllocator);
+            LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI);
+            LII = LI.addRange(LR);
+          } else if (LII->start != instrIdx.getRegSlot()) {
+            VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(),
+                                          VNInfoAllocator);
+            LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI);
+            LII = LI.addRange(LR);
+          }
+
+          if (MO.getSubReg() && !MO.isUndef())
+            lastUseIdx = instrIdx.getRegSlot();
+          else
+            lastUseIdx = SlotIndex();
+        } else if (MO.isUse()) {
+          // FIXME: This should probably be handled outside of this branch,
+          // either as part of the def case (for defs inside of the region) or
+          // after the loop over the region.
+          if (!isEndValid && !LII->end.isBlock())
+            LII->end = instrIdx.getRegSlot();
+          if (!lastUseIdx.isValid())
+            lastUseIdx = instrIdx.getRegSlot();
+        }
+      }
+    }
+  }
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index dadd02bfc654..d5a81a311c64 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,13 +14,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineLoopRanges.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <algorithm>
 
 using namespace llvm;
@@ -182,33 +180,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
   return InterferingVRegs.size();
 }
 
-bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
-  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
-  // overlaps.
-  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
-    Overlaps(LiveUnion->getMap(), Loop->getMap());
-  if (!Overlaps.valid())
-    return false;
-
-  // The loop is overlapping an LIU assignment. Check VirtReg as well.
-  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
-
-  for (;;) {
-    if (VRI == VirtReg->end())
-      return false;
-    if (VRI->start < Overlaps.stop())
-      return true;
-
-    Overlaps.advanceTo(VRI->start);
-    if (!Overlaps.valid())
-      return false;
-    if (Overlaps.start() < VRI->end)
-      return true;
-
-    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
-  }
-}
-
 void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
                                     unsigned NSize) {
   // Reuse existing allocation.
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
deleted file mode 100644
index 4d41fca85ad3..000000000000
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// LiveIntervalUnion is a union of live segments across multiple live virtual
-// registers. This may be used during coalescing to represent a congruence
-// class, or during register allocation to model liveness of a physical
-// register.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
-#define LLVM_CODEGEN_LIVEINTERVALUNION
-
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/CodeGen/LiveInterval.h"
-
-namespace llvm {
-
-class MachineLoopRange;
-class TargetRegisterInfo;
-
-#ifndef NDEBUG
-// forward declaration
-template <unsigned Element> class SparseBitVector;
-typedef SparseBitVector<128> LiveVirtRegBitSet;
-#endif
-
-/// Compare a live virtual register segment to a LiveIntervalUnion segment.
-inline bool
-overlap(const LiveRange &VRSeg,
-        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
-  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
-}
-
-/// Union of live intervals that are strong candidates for coalescing into a
-/// single register (either physical or virtual depending on the context).  We
-/// expect the constituent live intervals to be disjoint, although we may
-/// eventually make exceptions to handle value-based interference.
-class LiveIntervalUnion {
-  // A set of live virtual register segments that supports fast insertion,
-  // intersection, and removal.
-  // Mapping SlotIndex intervals to virtual register numbers.
-  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
-
-public:
-  // SegmentIter can advance to the next segment ordered by starting position
-  // which may belong to a different live virtual register. We also must be able
-  // to reach the current segment's containing virtual register.
-  typedef LiveSegments::iterator SegmentIter;
-
-  // LiveIntervalUnions share an external allocator.
-  typedef LiveSegments::Allocator Allocator;
-
-  class Query;
-
-private:
-  unsigned Tag;           // unique tag for current contents.
-  LiveSegments Segments;  // union of virtual reg segments
-
-public:
-  explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {}
-
-  // Iterate over all segments in the union of live virtual registers ordered
-  // by their starting position.
-  SegmentIter begin() { return Segments.begin(); }
-  SegmentIter end() { return Segments.end(); }
-  SegmentIter find(SlotIndex x) { return Segments.find(x); }
-  bool empty() const { return Segments.empty(); }
-  SlotIndex startIndex() const { return Segments.start(); }
-
-  // Provide public access to the underlying map to allow overlap iteration.
-  typedef LiveSegments Map;
-  const Map &getMap() { return Segments; }
-
-  /// getTag - Return an opaque tag representing the current state of the union.
-  unsigned getTag() const { return Tag; }
-
-  /// changedSince - Return true if the union change since getTag returned tag.
-  bool changedSince(unsigned tag) const { return tag != Tag; }
-
-  // Add a live virtual register to this union and merge its segments.
-  void unify(LiveInterval &VirtReg);
-
-  // Remove a live virtual register's segments from this union.
-  void extract(LiveInterval &VirtReg);
-
-  // Remove all inserted virtual registers.
-  void clear() { Segments.clear(); ++Tag; }
-
-  // Print union, using TRI to translate register names
-  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
-
-#ifndef NDEBUG
-  // Verify the live intervals in this union and add them to the visited set.
-  void verify(LiveVirtRegBitSet& VisitedVRegs);
-#endif
-
-  /// Query interferences between a single live virtual register and a live
-  /// interval union.
-  class Query {
-    LiveIntervalUnion *LiveUnion;
-    LiveInterval *VirtReg;
-    LiveInterval::iterator VirtRegI; // current position in VirtReg
-    SegmentIter LiveUnionI;          // current position in LiveUnion
-    SmallVector<LiveInterval*,4> InterferingVRegs;
-    bool CheckedFirstInterference;
-    bool SeenAllInterferences;
-    bool SeenUnspillableVReg;
-    unsigned Tag, UserTag;
-
-  public:
-    Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {}
-
-    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
-      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
-      SeenAllInterferences(false), SeenUnspillableVReg(false)
-    {}
-
-    void clear() {
-      LiveUnion = NULL;
-      VirtReg = NULL;
-      InterferingVRegs.clear();
-      CheckedFirstInterference = false;
-      SeenAllInterferences = false;
-      SeenUnspillableVReg = false;
-      Tag = 0;
-      UserTag = 0;
-    }
-
-    void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) {
-      assert(VReg && LIU && "Invalid arguments");
-      if (UserTag == UTag && VirtReg == VReg &&
-          LiveUnion == LIU && !LIU->changedSince(Tag)) {
-        // Retain cached results, e.g. firstInterference.
-        return;
-      }
-      clear();
-      LiveUnion = LIU;
-      VirtReg = VReg;
-      Tag = LIU->getTag();
-      UserTag = UTag;
-    }
-
-    LiveInterval &virtReg() const {
-      assert(VirtReg && "uninitialized");
-      return *VirtReg;
-    }
-
-    // Does this live virtual register interfere with the union?
-    bool checkInterference() { return collectInterferingVRegs(1); }
-
-    // Count the virtual registers in this union that interfere with this
-    // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
-
-    // Was this virtual register visited during collectInterferingVRegs?
-    bool isSeenInterference(LiveInterval *VReg) const;
-
-    // Did collectInterferingVRegs collect all interferences?
-    bool seenAllInterferences() const { return SeenAllInterferences; }
-
-    // Did collectInterferingVRegs encounter an unspillable vreg?
-    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
-
-    // Vector generated by collectInterferingVRegs.
-    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
-      return InterferingVRegs;
-    }
-
-    /// checkLoopInterference - Return true if there is interference overlapping
-    /// Loop.
-    bool checkLoopInterference(MachineLoopRange*);
-
-  private:
-    Query(const Query&) LLVM_DELETED_FUNCTION;
-    void operator=(const Query&) LLVM_DELETED_FUNCTION;
-  };
-
-  // Array of LiveIntervalUnions.
-  class Array {
-    unsigned Size;
-    LiveIntervalUnion *LIUs;
-  public:
-    Array() : Size(0), LIUs(0) {}
-    ~Array() { clear(); }
-
-    // Initialize the array to have Size entries.
-    // Reuse an existing allocation if the size matches.
-    void init(LiveIntervalUnion::Allocator&, unsigned Size);
-
-    unsigned size() const { return Size; }
-
-    void clear();
-
-    LiveIntervalUnion& operator[](unsigned idx) {
-      assert(idx <  Size && "idx out of bounds");
-      return LIUs[idx];
-    }
-  };
-};
-
-} // end namespace llvm
-
-#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index c3ff4f1b6d2e..dede490d91ba 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -18,10 +18,11 @@
 
 using namespace llvm;
 
-void LiveRangeCalc::reset(const MachineFunction *MF,
+void LiveRangeCalc::reset(const MachineFunction *mf,
                           SlotIndexes *SI,
                           MachineDominatorTree *MDT,
                           VNInfo::Allocator *VNIA) {
+  MF = mf;
   MRI = &MF->getRegInfo();
   Indexes = SI;
   DomTree = MDT;
@@ -104,28 +105,28 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
 
 
 // Transfer information from the LiveIn vector to the live ranges.
-void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) {
+void LiveRangeCalc::updateLiveIns() {
+  LiveRangeUpdater Updater;
   for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
          E = LiveIn.end(); I != E; ++I) {
     if (!I->DomNode)
       continue;
     MachineBasicBlock *MBB = I->DomNode->getBlock();
-
-    VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value;
-    assert(VNI && "No live-in value found");
-
+    assert(I->Value && "No live-in value found");
     SlotIndex Start, End;
     tie(Start, End) = Indexes->getMBBRange(MBB);
 
     if (I->Kill.isValid())
-      I->LI->addRange(LiveRange(Start, I->Kill, VNI));
+      // Value is killed inside this block.
+      End = I->Kill;
     else {
-      I->LI->addRange(LiveRange(Start, End, VNI));
-      // The value is live-through, update LiveOut as well.  Defer the Domtree
-      // lookup until it is needed.
+      // The value is live-through, update LiveOut as well.
+      // Defer the Domtree lookup until it is needed.
       assert(Seen.test(MBB->getNumber()));
-      LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+      LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0);
     }
+    Updater.setDest(I->LI);
+    Updater.add(Start, End, I->Value);
   }
   LiveIn.clear();
 }
@@ -150,13 +151,11 @@ void LiveRangeCalc::extend(LiveInterval *LI,
   // multiple values, and we may need to create even more phi-defs to preserve
   // VNInfo SSA form.  Perform a search for all predecessor blocks where we
   // know the dominating VNInfo.
-  VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg);
+  if (findReachingDefs(LI, KillMBB, Kill, PhysReg))
+    return;
 
   // When there were multiple different values, we may need new PHIs.
-  if (!VNI)
-    updateSSA();
-
-  updateLiveIns(VNI);
+  calculateValues();
 }
 
 
@@ -167,16 +166,18 @@ void LiveRangeCalc::calculateValues() {
   assert(Indexes && "Missing SlotIndexes");
   assert(DomTree && "Missing dominator tree");
   updateSSA();
-  updateLiveIns(0);
+  updateLiveIns();
 }
 
 
-VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
-                                        MachineBasicBlock *KillMBB,
-                                        SlotIndex Kill,
-                                        unsigned PhysReg) {
-  // Blocks where LI should be live-in.
-  SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
+bool LiveRangeCalc::findReachingDefs(LiveInterval *LI,
+                                     MachineBasicBlock *KillMBB,
+                                     SlotIndex Kill,
+                                     unsigned PhysReg) {
+  unsigned KillMBBNum = KillMBB->getNumber();
+
+  // Block numbers where LI should be live-in.
+  SmallVector<unsigned, 16> WorkList(1, KillMBBNum);
 
   // Remember if we have seen more than one value.
   bool UniqueVNI = true;
@@ -184,7 +185,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
 
   // Using Seen as a visited set, perform a BFS for all reaching defs.
   for (unsigned i = 0; i != WorkList.size(); ++i) {
-    MachineBasicBlock *MBB = WorkList[i];
+    MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
 
 #ifndef NDEBUG
     if (MBB->pred_empty()) {
@@ -231,25 +232,50 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
 
        // No, we need a live-in value for Pred as well
        if (Pred != KillMBB)
-          WorkList.push_back(Pred);
+          WorkList.push_back(Pred->getNumber());
        else
           // Loopback to KillMBB, so value is really live through.
          Kill = SlotIndex();
     }
   }
 
-  // Transfer WorkList to LiveInBlocks in reverse order.
-  // This ordering works best with updateSSA().
   LiveIn.clear();
-  LiveIn.reserve(WorkList.size());
-  while(!WorkList.empty())
-    addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val()));
 
-  // The kill block may not be live-through.
-  assert(LiveIn.back().DomNode->getBlock() == KillMBB);
-  LiveIn.back().Kill = Kill;
+  // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
+  // neither require it. Skip the sorting overhead for small updates.
+  if (WorkList.size() > 4)
+    array_pod_sort(WorkList.begin(), WorkList.end());
+
+  // If a unique reaching def was found, blit in the live ranges immediately.
+  if (UniqueVNI) {
+    LiveRangeUpdater Updater(LI);
+    for (SmallVectorImpl<unsigned>::const_iterator
+         I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+       SlotIndex Start, End;
+       tie(Start, End) = Indexes->getMBBRange(*I);
+       // Trim the live range in KillMBB.
+       if (*I == KillMBBNum && Kill.isValid())
+         End = Kill;
+       else
+         LiveOut[MF->getBlockNumbered(*I)] =
+           LiveOutPair(TheVNI, (MachineDomTreeNode *)0);
+       Updater.add(Start, End, TheVNI);
+    }
+    return true;
+  }
+
+  // Multiple values were found, so transfer the work list to the LiveIn array
+  // where UpdateSSA will use it as a work list.
+  LiveIn.reserve(WorkList.size());
+  for (SmallVectorImpl<unsigned>::const_iterator
+       I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+    addLiveInBlock(LI, DomTree->getNode(MBB));
+    if (MBB == KillMBB)
+      LiveIn.back().Kill = Kill;
+  }
 
-  return UniqueVNI ? TheVNI : 0;
+  return false;
 }
 
 
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 909829b22851..57cab7b34220 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -34,6 +34,7 @@ template <class NodeT> class DomTreeNodeBase;
 typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
 
 class LiveRangeCalc {
+  const MachineFunction *MF;
   const MachineRegisterInfo *MRI;
   SlotIndexes *Indexes;
   MachineDominatorTree *DomTree;
@@ -100,17 +101,20 @@ class LiveRangeCalc {
   /// used to add entries directly.
   SmallVector<LiveInBlock, 16> LiveIn;
 
-  /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at
-  /// Kill, search for values that can reach KillMBB.  All blocks that need LI
-  /// to be live-in are added to LiveIn.  If a unique reaching def is found,
-  /// its value is returned, if Kill is jointly dominated by multiple values,
-  /// NULL is returned.
+  /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set
+  /// of defs that can reach it.
+  ///
+  /// If only one def can reach Kill, all paths from the def to kill are added
+  /// to LI, and the function returns true.
+  ///
+  /// If multiple values can reach Kill, the blocks that need LI to be live in
+  /// are added to the LiveIn array, and the function returns false.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  VNInfo *findReachingDefs(LiveInterval *LI,
-                           MachineBasicBlock *KillMBB,
-                           SlotIndex Kill,
-                           unsigned PhysReg);
+  bool findReachingDefs(LiveInterval *LI,
+                        MachineBasicBlock *KillMBB,
+                        SlotIndex Kill,
+                        unsigned PhysReg);
 
   /// updateSSA - Compute the values that will be live in to all requested
   /// blocks in LiveIn.  Create PHI-def values as required to preserve SSA form.
@@ -119,12 +123,11 @@ class LiveRangeCalc {
   /// blocks.  No values are read from the live ranges.
   void updateSSA();
 
-  /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI
-  /// as a wildcard value for LiveIn entries without a value.
-  void updateLiveIns(VNInfo *VNI);
+  /// Add liveness as specified in the LiveIn vector.
+  void updateLiveIns();
 
 public:
-  LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+  LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
 
   //===--------------------------------------------------------------------===//
   // High-level interface.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index f8fbc7ddf0c1..7793e96c3540 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
@@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
 /// OrigIdx are also available with the same value at UseIdx.
 bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
                                        SlotIndex OrigIdx,
-                                       SlotIndex UseIdx) {
+                                       SlotIndex UseIdx) const {
   OrigIdx = OrigIdx.getRegSlot(true);
   UseIdx = UseIdx.getRegSlot(true);
   for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 7f22478d01cd..0ef069f47827 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "RegisterCoalescer.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h
deleted file mode 100644
index 8f22c24478f4..000000000000
--- a/lib/CodeGen/LiveRegMatrix.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRegMatrix analysis pass keeps track of virtual register interference
-// along two dimensions: Slot indexes and register units. The matrix is used by
-// register allocators to ensure that no interfering virtual registers get
-// assigned to overlapping physical registers.
-//
-// Register units are defined in MCRegisterInfo.h, they represent the smallest
-// unit of interference when dealing with overlapping physical registers. The
-// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
-// a virtual register is assigned to a physical register, the live range for
-// the virtual register is inserted into the LiveIntervalUnion for each regunit
-// in the physreg.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H
-#define LLVM_CODEGEN_LIVEREGMATRIX_H
-
-#include "LiveIntervalUnion.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-namespace llvm {
-
-class LiveInterval;
-class LiveIntervalAnalysis;
-class MachineRegisterInfo;
-class TargetRegisterInfo;
-class VirtRegMap;
-
-class LiveRegMatrix : public MachineFunctionPass {
-  const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-  LiveIntervals *LIS;
-  VirtRegMap *VRM;
-
-  // UserTag changes whenever virtual registers have been modified.
-  unsigned UserTag;
-
-  // The matrix is represented as a LiveIntervalUnion per register unit.
-  LiveIntervalUnion::Allocator LIUAlloc;
-  LiveIntervalUnion::Array Matrix;
-
-  // Cached queries per register unit.
-  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
-
-  // Cached register mask interference info.
-  unsigned RegMaskTag;
-  unsigned RegMaskVirtReg;
-  BitVector RegMaskUsable;
-
-  // MachineFunctionPass boilerplate.
-  virtual void getAnalysisUsage(AnalysisUsage&) const;
-  virtual bool runOnMachineFunction(MachineFunction&);
-  virtual void releaseMemory();
-public:
-  static char ID;
-  LiveRegMatrix();
-
-  //===--------------------------------------------------------------------===//
-  // High-level interface.
-  //===--------------------------------------------------------------------===//
-  //
-  // Check for interference before assigning virtual registers to physical
-  // registers.
-  //
-
-  /// Invalidate cached interference queries after modifying virtual register
-  /// live ranges. Interference checks may return stale information unless
-  /// caches are invalidated.
-  void invalidateVirtRegs() { ++UserTag; }
-
-  enum InterferenceKind {
-    /// No interference, go ahead and assign.
-    IK_Free = 0,
-
-    /// Virtual register interference. There are interfering virtual registers
-    /// assigned to PhysReg or its aliases. This interference could be resolved
-    /// by unassigning those other virtual registers.
-    IK_VirtReg,
-
-    /// Register unit interference. A fixed live range is in the way, typically
-    /// argument registers for a call. This can't be resolved by unassigning
-    /// other virtual registers.
-    IK_RegUnit,
-
-    /// RegMask interference. The live range is crossing an instruction with a
-    /// regmask operand that doesn't preserve PhysReg. This typically means
-    /// VirtReg is live across a call, and PhysReg isn't call-preserved.
-    IK_RegMask
-  };
-
-  /// Check for interference before assigning VirtReg to PhysReg.
-  /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
-  /// When there is more than one kind of interference, the InterferenceKind
-  /// with the highest enum value is returned.
-  InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
-
-  /// Assign VirtReg to PhysReg.
-  /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
-  /// update VirtRegMap. The live range is expected to be available in PhysReg.
-  void assign(LiveInterval &VirtReg, unsigned PhysReg);
-
-  /// Unassign VirtReg from its PhysReg.
-  /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
-  /// the assignment and updates VirtRegMap accordingly.
-  void unassign(LiveInterval &VirtReg);
-
-  //===--------------------------------------------------------------------===//
-  // Low-level interface.
-  //===--------------------------------------------------------------------===//
-  //
-  // Provide access to the underlying LiveIntervalUnions.
-  //
-
-  /// Check for regmask interference only.
-  /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
-  /// If PhysReg is null, check if VirtReg crosses any regmask operands.
-  bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
-
-  /// Check for regunit interference only.
-  /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
-  /// register units.
-  bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
-
-  /// Query a line of the assigned virtual register matrix directly.
-  /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
-  /// This returns a reference to an internal Query data structure that is only
-  /// valid until the next query() call.
-  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
-
-  /// Directly access the live interval unions per regunit.
-  /// This returns an array indexed by the regunit number.
-  LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_LIVEREGMATRIX_H
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index f0b522bd7d36..be11a8fa86ef 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -15,12 +15,12 @@
 
 #define DEBUG_TYPE "livestacks"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <limits>
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 6ea933d4304b..789eddc42774 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -27,17 +27,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -619,29 +619,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
                                 MBB);
     }
 
-    // Finally, if the last instruction in the block is a return, make sure to
-    // mark it as using all of the live-out values in the function.
-    // Things marked both call and return are tail calls; do not do this for
-    // them.  The tail callee need not take the same registers as input
-    // that it produces as output, and there are dependencies for its input
-    // registers elsewhere.
-    if (!MBB->empty() && MBB->back().isReturn()
-        && !MBB->back().isCall()) {
-      MachineInstr *Ret = &MBB->back();
-
-      for (MachineRegisterInfo::liveout_iterator
-           I = MF->getRegInfo().liveout_begin(),
-           E = MF->getRegInfo().liveout_end(); I != E; ++I) {
-        assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
-               "Cannot have a live-out virtual register!");
-        HandlePhysRegUse(*I, Ret);
-
-        // Add live-out registers as implicit uses.
-        if (!Ret->readsRegister(*I))
-          Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
-      }
-    }
-
     // MachineCSE may CSE instructions which write to non-allocatable physical
     // registers across MBBs. Remember if any reserved register is liveout.
     SmallSet<unsigned, 4> LiveOuts;
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index fbc9e20517c2..352ef942591f 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -15,26 +15,26 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "localstackalloc"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 18d021d521d6..898e165feeab 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -12,24 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/BasicBlock.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -663,6 +665,13 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
         << " -- BB#" << NMBB->getNumber()
         << " -- BB#" << Succ->getNumber() << '\n');
 
+  LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
+  SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
+  if (LIS)
+    LIS->insertMBBInMaps(NMBB);
+  else if (Indexes)
+    Indexes->insertMBBInMaps(NMBB);
+
   // On some targets like Mips, branches may kill virtual registers. Make sure
   // that LiveVariables is properly updated after updateTerminator replaces the
   // terminators.
@@ -689,14 +698,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
       }
     }
 
+  SmallVector<unsigned, 4> UsedRegs;
+  if (LIS) {
+    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+         I != E; ++I) {
+      MachineInstr *MI = I;
+
+      for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+           OE = MI->operands_end(); OI != OE; ++OI) {
+        if (!OI->isReg() || OI->getReg() == 0)
+          continue;
+
+        unsigned Reg = OI->getReg();
+        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+          UsedRegs.push_back(Reg);
+      }
+    }
+  }
+
   ReplaceUsesOfBlockWith(Succ, NMBB);
+
+  // If updateTerminator() removes instructions, we need to remove them from
+  // SlotIndexes.
+  SmallVector<MachineInstr*, 4> Terminators;
+  if (Indexes) {
+    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+         I != E; ++I)
+      Terminators.push_back(I);
+  }
+
   updateTerminator();
 
+  if (Indexes) {
+    SmallVector<MachineInstr*, 4> NewTerminators;
+    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+         I != E; ++I)
+      NewTerminators.push_back(I);
+
+    for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
+        E = Terminators.end(); I != E; ++I) {
+      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
+          NewTerminators.end())
+       Indexes->removeMachineInstrFromMaps(*I);
+    }
+  }
+
   // Insert unconditional "jump Succ" instruction in NMBB if necessary.
   NMBB->addSuccessor(Succ);
   if (!NMBB->isLayoutSuccessor(Succ)) {
     Cond.clear();
     MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+
+    if (Indexes) {
+      for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
+           I != E; ++I) {
+        // Some instructions may have been moved to NMBB by updateTerminator(),
+        // so we first remove any instruction that already has an index.
+        if (Indexes->hasIndex(I))
+          Indexes->removeMachineInstrFromMaps(I);
+        Indexes->insertMachineInstrInMaps(I);
+      }
+    }
   }
 
   // Fix PHI nodes in Succ so they refer to NMBB instead of this
@@ -731,6 +793,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
     LV->addNewBlock(NMBB, this, Succ);
   }
 
+  if (LIS) {
+    // After splitting the edge and updating SlotIndexes, live intervals may be
+    // in one of two situations, depending on whether this block was the last in
+    // the function. If the original block was the last in the function, all live
+    // intervals will end prior to the beginning of the new split block. If the
+    // original block was not at the end of the function, all live intervals will
+    // extend to the end of the new split block.
+
+    bool isLastMBB =
+      llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+
+    SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
+    SlotIndex PrevIndex = StartIndex.getPrevSlot();
+    SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
+
+    // Find the registers used from NMBB in PHIs in Succ.
+    SmallSet<unsigned, 8> PHISrcRegs;
+    for (MachineBasicBlock::instr_iterator
+         I = Succ->instr_begin(), E = Succ->instr_end();
+         I != E && I->isPHI(); ++I) {
+      for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
+        if (I->getOperand(ni+1).getMBB() == NMBB) {
+          MachineOperand &MO = I->getOperand(ni);
+          unsigned Reg = MO.getReg();
+          PHISrcRegs.insert(Reg);
+          if (MO.isUndef())
+            continue;
+
+          LiveInterval &LI = LIS->getInterval(Reg);
+          VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+          assert(VNI && "PHI sources should be live out of their predecessors.");
+          LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+        }
+      }
+    }
+
+    MachineRegisterInfo *MRI = &getParent()->getRegInfo();
+    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+      unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+      if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
+        continue;
+
+      LiveInterval &LI = LIS->getInterval(Reg);
+      if (!LI.liveAt(PrevIndex))
+        continue;
+
+      bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
+      if (isLiveOut && isLastMBB) {
+        VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+        assert(VNI && "LiveInterval should have VNInfo where it is live.");
+        LI.addRange(LiveRange(StartIndex, EndIndex, VNI));
+      } else if (!isLiveOut && !isLastMBB) {
+        LI.removeRange(StartIndex, EndIndex);
+      }
+    }
+
+    // Update all intervals for registers whose uses may have been modified by
+    // updateTerminator().
+    LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
+  }
+
   if (MachineDominatorTree *MDT =
       P->getAnalysisIfAvailable<MachineDominatorTree>()) {
     // Update dominator information.
@@ -788,40 +911,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   return NMBB;
 }
 
-MachineBasicBlock::iterator
-MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
-  if (I->isBundle()) {
-    MachineBasicBlock::iterator E = llvm::next(I);
-    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
-  }
-
-  return Insts.erase(I.getInstrIterator());
+/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
+/// neighboring instructions so the bundle won't be broken by removing MI.
+static void unbundleSingleMI(MachineInstr *MI) {
+  // Removing the first instruction in a bundle.
+  if (MI->isBundledWithSucc() && !MI->isBundledWithPred())
+    MI->unbundleFromSucc();
+  // Removing the last instruction in a bundle.
+  if (MI->isBundledWithPred() && !MI->isBundledWithSucc())
+    MI->unbundleFromPred();
+  // If MI is not bundled, or if it is internal to a bundle, the neighbor flags
+  // are already fine.
 }
 
-MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
-  if (I->isBundle()) {
-    instr_iterator MII = llvm::next(I);
-    iterator E = end();
-    while (MII != E && MII->isInsideBundle()) {
-      MachineInstr *MI = &*MII++;
-      Insts.remove(MI);
-    }
-  }
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
+  unbundleSingleMI(I);
+  return Insts.erase(I);
+}
 
-  return Insts.remove(I);
+MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
+  unbundleSingleMI(MI);
+  MI->clearFlag(MachineInstr::BundledPred);
+  MI->clearFlag(MachineInstr::BundledSucc);
+  return Insts.remove(MI);
 }
 
-void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
-                               MachineBasicBlock *Other,
-                               MachineBasicBlock::iterator From) {
-  if (From->isBundle()) {
-    MachineBasicBlock::iterator To = llvm::next(From);
-    Insts.splice(where.getInstrIterator(), Other->Insts,
-                 From.getInstrIterator(), To.getInstrIterator());
-    return;
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+  assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+         "Cannot insert instruction with bundle flags");
+  // Set the bundle flags when inserting inside a bundle.
+  if (I != instr_end() && I->isBundledWithPred()) {
+    MI->setFlag(MachineInstr::BundledPred);
+    MI->setFlag(MachineInstr::BundledSucc);
   }
-
-  Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+  return Insts.insert(I, MI);
 }
 
 /// removeFromParent - This method unlinks 'this' from the containing function,
@@ -982,7 +1107,6 @@ MachineBasicBlock::LivenessQueryResult
 MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
                                            unsigned Reg, MachineInstr *MI,
                                            unsigned Neighborhood) {
-  
   unsigned N = Neighborhood;
   MachineBasicBlock *MBB = MI->getParent();
 
@@ -997,14 +1121,18 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
       MachineOperandIteratorBase::PhysRegInfo Analysis =
         MIOperands(I).analyzePhysReg(Reg, TRI);
 
-      if (Analysis.Kills)
+      if (Analysis.Defines)
+        // Outputs happen after inputs so they take precedence if both are
+        // present.
+        return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+
+      if (Analysis.Kills || Analysis.Clobbers)
         // Register killed, so isn't live.
         return LQR_Dead;
 
-      else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap)
+      else if (Analysis.ReadsOverlap)
         // Defined or read without a previous kill - live.
-        return (Analysis.Defines || Analysis.Reads) ? 
-          LQR_Live : LQR_OverlappingLive;
+        return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
 
     } while (I != MBB->begin() && --N > 0);
   }
@@ -1036,7 +1164,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
         return (Analysis.Reads) ?
           LQR_Live : LQR_OverlappingLive;
 
-      else if (Analysis.DefinesOverlap)
+      else if (Analysis.Clobbers || Analysis.Defines)
         // Defined (but not read) therefore cannot have been live.
         return LQR_Dead;
     }
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index a079d6e59139..070daf2e2ba2 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index cd3f19944e46..cd948e24a6b2 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -26,6 +26,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -33,13 +38,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
@@ -171,7 +171,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
   const TargetInstrInfo *TII;
 
   /// \brief A handle to the target's lowering info.
-  const TargetLowering *TLI;
+  const TargetLoweringBase *TLI;
 
   /// \brief Allocator and owner of BlockChain structures.
   ///
@@ -1013,8 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
-  if (F.getFunction()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize))
+  if (F.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
     return;
   unsigned Align = TLI->getPrefLoopAlignment();
   if (!Align)
@@ -1061,7 +1061,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     }
 
     // Align this block if the layout predecessor's edge into this block is
-    // cold relative to the block. When this is true, othe predecessors make up
+    // cold relative to the block. When this is true, other predecessors make up
     // all of the hot entries into the block and thus alignment is likely to be
     // important.
     BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 447921147f03..ae70912b6c69 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index dbc41defeb5a..61d8d384cd38 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -15,17 +15,17 @@
 
 #define DEBUG_TYPE "machine-cse"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -126,8 +126,6 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
       // deleted.
       continue;
     MachineInstr *DefMI = MRI->getVRegDef(Reg);
-    if (DefMI->getParent() != MBB)
-      continue;
     if (!DefMI->isCopy())
       continue;
     unsigned SrcReg = DefMI->getOperand(1).getReg();
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 4a793281b2cd..dc8a2241c7e1 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -13,19 +13,19 @@
 
 #define DEBUG_TYPE "codegen-cp"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumDeletes, "Number of dead copies deleted");
@@ -33,6 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
 namespace {
   class MachineCopyPropagation : public MachineFunctionPass {
     const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
     MachineRegisterInfo *MRI;
 
   public:
@@ -51,6 +52,7 @@ namespace {
                                  SourceMap &SrcMap,
                                  DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
     bool CopyPropagateBlock(MachineBasicBlock &MBB);
+    void removeCopy(MachineInstr *MI);
   };
 }
 char MachineCopyPropagation::ID = 0;
@@ -124,6 +126,16 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
   return false;
 }
 
+// Remove MI from the function because it has been determined it is dead.
+// Turn it into a noop KILL instruction if it has super-register liveness
+// adjustments.
+void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
+  if (MI->getNumOperands() == 2)
+    MI->eraseFromParent();
+  else
+    MI->setDesc(TII->get(TargetOpcode::KILL));
+}
+
 bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
   DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
@@ -169,7 +181,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
           for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
             I->clearRegisterKills(Def, TRI);
 
-          MI->eraseFromParent();
+          removeCopy(MI);
           Changed = true;
           ++NumDeletes;
           continue;
@@ -262,7 +274,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         unsigned Reg = (*DI)->getOperand(0).getReg();
         if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
           continue;
-        (*DI)->eraseFromParent();
+        removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
       }
@@ -298,7 +310,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
          DI != DE; ++DI) {
       if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
-        (*DI)->eraseFromParent();
+        removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
       }
@@ -312,6 +324,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 91d521185767..04321f329282 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -14,28 +14,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -58,14 +58,17 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
-  if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment))
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(),
+                                               TM.Options.RealignStack);
+  if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment))
     FrameInfo->ensureMaxAlignment(Fn->getAttributes().
-                                  getFnAttributes().getStackAlignment());
+                                getStackAlignment(AttributeSet::FunctionIndex));
   ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
   Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
-  if (!Fn->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
+  if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                        Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          TM.getTargetLowering()->getPrefFunctionAlignment());
   FunctionNumber = FunctionNum;
@@ -73,8 +76,15 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
 }
 
 MachineFunction::~MachineFunction() {
-  BasicBlocks.clear();
+  // Don't call destructors on MachineInstr and MachineOperand. All of their
+  // memory comes from the BumpPtrAllocator which is about to be purged.
+  //
+  // Do call MachineBasicBlock destructors, it contains std::vectors.
+  for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
+    I->Insts.clearAndLeakNodesUnsafely();
+
   InstructionRecycler.clear(Allocator);
+  OperandRecycler.clear(Allocator);
   BasicBlockRecycler.clear(Allocator);
   if (RegInfo) {
     RegInfo->~MachineRegisterInfo();
@@ -157,7 +167,7 @@ MachineInstr *
 MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
                                     DebugLoc DL, bool NoImp) {
   return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
-    MachineInstr(MCID, DL, NoImp);
+    MachineInstr(*this, MCID, DL, NoImp);
 }
 
 /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
@@ -172,9 +182,17 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
 
 /// DeleteMachineInstr - Delete the given MachineInstr.
 ///
+/// This function also serves as the MachineInstr destructor - the real
+/// ~MachineInstr() destructor must be empty.
 void
 MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
-  MI->~MachineInstr();
+  // Strip it for parts. The operand array and the MI object itself are
+  // independently recyclable.
+  if (MI->Operands)
+    deallocateOperandArray(MI->CapOperands, MI->Operands);
+  // Don't call ~MachineInstr() which must be trivial anyway because
+  // ~MachineFunction drops whole lists of MachineInstrs wihout calling their
+  // destructors.
   InstructionRecycler.Deallocate(Allocator, MI);
 }
 
@@ -328,13 +346,6 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     }
     OS << '\n';
   }
-  if (RegInfo && !RegInfo->liveout_empty()) {
-    OS << "Function Live Outs:";
-    for (MachineRegisterInfo::liveout_iterator
-         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I)
-      OS << ' ' << PrintReg(*I, TRI);
-    OS << '\n';
-  }
 
   for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
     OS << '\n';
@@ -445,6 +456,70 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
 
+/// ensureMaxAlignment - Make sure the function is at least Align bytes
+/// aligned.
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+  if (!TFI.isStackRealignable() || !RealignOption)
+    assert(Align <= TFI.getStackAlignment() &&
+           "For targets without stack realignment, Align is out of limit!");
+  if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// clampStackAlignment - Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+                                           unsigned StackAlign) {
+  if (!ShouldClamp || Align <= StackAlign)
+    return Align;
+  DEBUG(dbgs() << "Warning: requested alignment " << Align
+               << " exceeds the stack alignment " << StackAlign
+               << " when stack realignment is off" << '\n');
+  return StackAlign;
+}
+
+/// CreateStackObject - Create a new statically sized stack object, returning
+/// a nonnegative identifier to represent it.
+///
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+                      bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
+  assert(Size != 0 && "Cannot allocate zero size stack objects!");
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment());
+  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
+                                Alloca));
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  assert(Index >= 0 && "Bad frame index!");
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+/// CreateSpillStackObject - Create a new statically sized stack object that
+/// represents a spill slot, returning a nonnegative identifier to represent
+/// it.
+///
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+                                             unsigned Alignment) {
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment()); 
+  CreateStackObject(Size, Alignment, true, false);
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
+/// variable sized object has been created.  This must be created whenever a
+/// variable sized object is created, whether or not the index returned is
+/// actually used.
+///
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
+  HasVarSizedObjects = true;
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment()); 
+  Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
+  ensureMaxAlignment(Alignment);
+  return (int)Objects.size()-NumFixedObjects-1;
+}
+
 /// CreateFixedObject - Create a new object at a fixed location on the stack.
 /// All fixed objects should be created before other objects are created for
 /// efficiency. By default, fixed objects are immutable. This returns an
@@ -459,6 +534,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   // object is 16-byte aligned.
   unsigned StackAlign = TFI.getStackAlignment();
   unsigned Align = MinAlign(SPOffset, StackAlign);
+  Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                              Align, TFI.getStackAlignment()); 
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/   false,
                                               /*NeedSP*/ false,
@@ -497,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   return BV;
 }
 
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = getMaxAlignment();
+  int Offset = 0;
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+    if (isDeadObjectIndex(i))
+      continue;
+    Offset += getObjectSize(i);
+    unsigned Align = getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (adjustsStack() || hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+  return (unsigned)Offset;
+}
 
 void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   if (Objects.empty()) return;
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index e5a491270a8c..674cc80a006c 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index ed94efb93551..fa9c821b2af7 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index ce8d52000b47..32d066894b5b 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -12,15 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Value.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -28,19 +22,24 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -144,7 +143,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   // Change this to a register and set the reg#.
   OpKind = MO_Register;
   SmallContents.RegNo = Reg;
-  SubReg = 0;
+  SubReg_TargetFlags = 0;
   IsDef = isDef;
   IsImp = isImp;
   IsKill = isKill;
@@ -518,89 +517,50 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 // MachineInstr Implementation
 //===----------------------------------------------------------------------===//
 
-/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-/// MCID NULL and no operands.
-MachineInstr::MachineInstr()
-  : MCID(0), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0),
-    Parent(0) {
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
-
-void MachineInstr::addImplicitDefUseOperands() {
+void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
   if (MCID->ImplicitDefs)
     for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
-      addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+      addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID->ImplicitUses)
     for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
-      addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+      addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
 /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
 /// implicit operands. It reserves space for the number of operands specified by
 /// the MCInstrDesc.
-MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
-                           bool NoImp)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
-  unsigned NumImplicitOps = 0;
-  if (!NoImp)
-    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
+                           const DebugLoc dl, bool NoImp)
+  : MCID(&tid), Parent(0), Operands(0), NumOperands(0),
+    Flags(0), AsmPrinterFlags(0),
+    NumMemRefs(0), MemRefs(0), debugLoc(dl) {
+  // Reserve space for the expected number of operands.
+  if (unsigned NumOps = MCID->getNumOperands() +
+    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
+    CapOperands = OperandCapacity::get(NumOps);
+    Operands = MF.allocateOperandArray(CapOperands);
+  }
+
   if (!NoImp)
-    addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
-
-/// MachineInstr ctor - Work exactly the same as the ctor two above, except
-/// that the MachineInstr is created and added to the end of the specified
-/// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-                           const MCInstrDesc &tid)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
-  assert(MBB && "Cannot use inserting ctor with null basic block!");
-  unsigned NumImplicitOps =
-    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-  MBB->push_back(this);  // Add instruction to end of basic block!
+    addImplicitDefUseOperands(MF);
 }
 
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
+  : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0),
+    Flags(0), AsmPrinterFlags(0),
     NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
-    Parent(0), debugLoc(MI.getDebugLoc()) {
-  Operands.reserve(MI.getNumOperands());
+    debugLoc(MI.getDebugLoc()) {
+  CapOperands = OperandCapacity::get(MI.getNumOperands());
+  Operands = MF.allocateOperandArray(CapOperands);
 
-  // Add operands
+  // Copy operands.
   for (unsigned i = 0; i != MI.getNumOperands(); ++i)
-    addOperand(MI.getOperand(i));
+    addOperand(MF, MI.getOperand(i));
 
-  // Copy all the flags.
-  Flags = MI.Flags;
-
-  // Set parent to null.
-  Parent = 0;
-
-  LeakDetector::addGarbageObject(this);
-}
-
-MachineInstr::~MachineInstr() {
-  LeakDetector::removeGarbageObject(this);
-#ifndef NDEBUG
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-    assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
-    assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
-           "Reg operand def/use list corrupted");
-  }
-#endif
+  // Copy all the sensible flags.
+  setFlags(MI.Flags);
 }
 
 /// getRegInfo - If this instruction is embedded into a MachineFunction,
@@ -616,7 +576,7 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
 void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     if (Operands[i].isReg())
       MRI.removeRegOperandFromUseList(&Operands[i]);
 }
@@ -625,40 +585,65 @@ void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
 /// this instruction from their respective use lists.  This requires that the
 /// operands not be on their use lists yet.
 void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     if (Operands[i].isReg())
       MRI.addRegOperandToUseList(&Operands[i]);
 }
 
+void MachineInstr::addOperand(const MachineOperand &Op) {
+  MachineBasicBlock *MBB = getParent();
+  assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs");
+  MachineFunction *MF = MBB->getParent();
+  assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs");
+  addOperand(*MF, Op);
+}
+
+/// Move NumOps MachineOperands from Src to Dst, with support for overlapping
+/// ranges. If MRI is non-null also update use-def chains.
+static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
+                         unsigned NumOps, MachineRegisterInfo *MRI) {
+  if (MRI)
+    return MRI->moveOperands(Dst, Src, NumOps);
+
+  // Here it would be convenient to call memmove, so that isn't allowed because
+  // MachineOperand has a constructor and so isn't a POD type.
+  if (Dst < Src)
+    for (unsigned i = 0; i != NumOps; ++i)
+      new (Dst + i) MachineOperand(Src[i]);
+  else
+    for (unsigned i = NumOps; i ; --i)
+      new (Dst + i - 1) MachineOperand(Src[i - 1]);
+}
+
 /// addOperand - Add the specified operand to the instruction.  If it is an
 /// implicit operand, it is added to the end of the operand list.  If it is
 /// an explicit operand it is added at the end of the explicit operand list
 /// (before the first implicit operand).
-void MachineInstr::addOperand(const MachineOperand &Op) {
+void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
   assert(MCID && "Cannot add operands before providing an instr descriptor");
-  bool isImpReg = Op.isReg() && Op.isImplicit();
-  MachineRegisterInfo *RegInfo = getRegInfo();
 
-  // If the Operands backing store is reallocated, all register operands must
-  // be removed and re-added to RegInfo.  It is storing pointers to operands.
-  bool Reallocate = RegInfo &&
-    !Operands.empty() && Operands.size() == Operands.capacity();
+  // Check if we're adding one of our existing operands.
+  if (&Op >= Operands && &Op < Operands + NumOperands) {
+    // This is unusual: MI->addOperand(MI->getOperand(i)).
+    // If adding Op requires reallocating or moving existing operands around,
+    // the Op reference could go stale. Support it by copying Op.
+    MachineOperand CopyOp(Op);
+    return addOperand(MF, CopyOp);
+  }
 
   // Find the insert location for the new operand.  Implicit registers go at
-  // the end, everything goes before the implicit regs.
-  unsigned OpNo = Operands.size();
-
-  // Remove all the implicit operands from RegInfo if they need to be shifted.
+  // the end, everything else goes before the implicit regs.
+  //
   // FIXME: Allow mixed explicit and implicit operands on inline asm.
   // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
   // implicit-defs, but they must not be moved around.  See the FIXME in
   // InstrEmitter.cpp.
+  unsigned OpNo = getNumOperands();
+  bool isImpReg = Op.isReg() && Op.isImplicit();
   if (!isImpReg && !isInlineAsm()) {
     while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
       --OpNo;
       assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
-      if (RegInfo)
-        RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
     }
   }
 
@@ -669,55 +654,56 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
           OpNo < MCID->getNumOperands()) &&
          "Trying to add an operand to a machine instr that is already done!");
 
-  // All operands from OpNo have been removed from RegInfo.  If the Operands
-  // backing store needs to be reallocated, we also need to remove any other
-  // register operands.
-  if (Reallocate)
-    for (unsigned i = 0; i != OpNo; ++i)
-      if (Operands[i].isReg())
-        RegInfo->removeRegOperandFromUseList(&Operands[i]);
-
-  // Insert the new operand at OpNo.
-  Operands.insert(Operands.begin() + OpNo, Op);
-  Operands[OpNo].ParentMI = this;
-
-  // The Operands backing store has now been reallocated, so we can re-add the
-  // operands before OpNo.
-  if (Reallocate)
-    for (unsigned i = 0; i != OpNo; ++i)
-      if (Operands[i].isReg())
-        RegInfo->addRegOperandToUseList(&Operands[i]);
-
-  // When adding a register operand, tell RegInfo about it.
-  if (Operands[OpNo].isReg()) {
+  MachineRegisterInfo *MRI = getRegInfo();
+
+  // Determine if the Operands array needs to be reallocated.
+  // Save the old capacity and operand array.
+  OperandCapacity OldCap = CapOperands;
+  MachineOperand *OldOperands = Operands;
+  if (!OldOperands || OldCap.getSize() == getNumOperands()) {
+    CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1);
+    Operands = MF.allocateOperandArray(CapOperands);
+    // Move the operands before the insertion point.
+    if (OpNo)
+      moveOperands(Operands, OldOperands, OpNo, MRI);
+  }
+
+  // Move the operands following the insertion point.
+  if (OpNo != NumOperands)
+    moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo,
+                 MRI);
+  ++NumOperands;
+
+  // Deallocate the old operand array.
+  if (OldOperands != Operands && OldOperands)
+    MF.deallocateOperandArray(OldCap, OldOperands);
+
+  // Copy Op into place. It still needs to be inserted into the MRI use lists.
+  MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op);
+  NewMO->ParentMI = this;
+
+  // When adding a register operand, tell MRI about it.
+  if (NewMO->isReg()) {
     // Ensure isOnRegUseList() returns false, regardless of Op's status.
-    Operands[OpNo].Contents.Reg.Prev = 0;
+    NewMO->Contents.Reg.Prev = 0;
     // Ignore existing ties. This is not a property that can be copied.
-    Operands[OpNo].TiedTo = 0;
-    // Add the new operand to RegInfo.
-    if (RegInfo)
-      RegInfo->addRegOperandToUseList(&Operands[OpNo]);
+    NewMO->TiedTo = 0;
+    // Add the new operand to MRI, but only for instructions in an MBB.
+    if (MRI)
+      MRI->addRegOperandToUseList(NewMO);
     // The MCID operand information isn't accurate until we start adding
     // explicit operands. The implicit operands are added first, then the
     // explicits are inserted before them.
     if (!isImpReg) {
       // Tie uses to defs as indicated in MCInstrDesc.
-      if (Operands[OpNo].isUse()) {
+      if (NewMO->isUse()) {
         int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
         if (DefIdx != -1)
           tieOperands(DefIdx, OpNo);
       }
       // If the register operand is flagged as early, mark the operand as such.
       if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
-        Operands[OpNo].setIsEarlyClobber(true);
-    }
-  }
-
-  // Re-add all the implicit ops.
-  if (RegInfo) {
-    for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
-      assert(Operands[i].isReg() && "Should only be an implicit reg!");
-      RegInfo->addRegOperandToUseList(&Operands[i]);
+        NewMO->setIsEarlyClobber(true);
     }
   }
 }
@@ -726,45 +712,27 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
 /// fewer operand than it started with.
 ///
 void MachineInstr::RemoveOperand(unsigned OpNo) {
-  assert(OpNo < Operands.size() && "Invalid operand number");
+  assert(OpNo < getNumOperands() && "Invalid operand number");
   untieRegOperand(OpNo);
-  MachineRegisterInfo *RegInfo = getRegInfo();
-
-  // Special case removing the last one.
-  if (OpNo == Operands.size()-1) {
-    // If needed, remove from the reg def/use list.
-    if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
-      RegInfo->removeRegOperandFromUseList(&Operands.back());
-
-    Operands.pop_back();
-    return;
-  }
-
-  // Otherwise, we are removing an interior operand.  If we have reginfo to
-  // update, remove all operands that will be shifted down from their reg lists,
-  // move everything down, then re-add them.
-  if (RegInfo) {
-    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
-      if (Operands[i].isReg())
-        RegInfo->removeRegOperandFromUseList(&Operands[i]);
-    }
-  }
 
 #ifndef NDEBUG
   // Moving tied operands would break the ties.
-  for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i)
+  for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i)
     if (Operands[i].isReg())
       assert(!Operands[i].isTied() && "Cannot move tied operands");
 #endif
 
-  Operands.erase(Operands.begin()+OpNo);
+  MachineRegisterInfo *MRI = getRegInfo();
+  if (MRI && Operands[OpNo].isReg())
+    MRI->removeRegOperandFromUseList(Operands + OpNo);
 
-  if (RegInfo) {
-    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
-      if (Operands[i].isReg())
-        RegInfo->addRegOperandToUseList(&Operands[i]);
-    }
-  }
+  // Don't call the MachineOperand destructor. A lot of this code depends on
+  // MachineOperand having a trivial destructor anyway, and adding a call here
+  // wouldn't make it 'destructor-correct'.
+
+  if (unsigned N = NumOperands - 1 - OpNo)
+    moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI);
+  --NumOperands;
 }
 
 /// addMemOperand - Add a MachineMemOperand to the machine instruction.
@@ -773,33 +741,30 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
 void MachineInstr::addMemOperand(MachineFunction &MF,
                                  MachineMemOperand *MO) {
   mmo_iterator OldMemRefs = MemRefs;
-  uint16_t OldNumMemRefs = NumMemRefs;
+  unsigned OldNumMemRefs = NumMemRefs;
 
-  uint16_t NewNum = NumMemRefs + 1;
+  unsigned NewNum = NumMemRefs + 1;
   mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
 
   std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
   NewMemRefs[NewNum - 1] = MO;
-
-  MemRefs = NewMemRefs;
-  NumMemRefs = NewNum;
+  setMemRefs(NewMemRefs, NewMemRefs + NewNum);
 }
 
 bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
-  const MachineBasicBlock *MBB = getParent();
-  MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
-  while (MII != MBB->end() && MII->isInsideBundle()) {
+  assert(!isBundledWithPred() && "Must be called on bundle header");
+  for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
     if (MII->getDesc().getFlags() & Mask) {
       if (Type == AnyInBundle)
         return true;
     } else {
-      if (Type == AllInBundle)
+      if (Type == AllInBundle && !MII->isBundle())
         return false;
     }
-    ++MII;
+    // This was the last instruction in the bundle.
+    if (!MII->isBundledWithSucc())
+      return Type == AllInBundle;
   }
-
-  return Type == AllInBundle;
 }
 
 bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
@@ -865,46 +830,25 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   return true;
 }
 
-/// removeFromParent - This method unlinks 'this' from the containing basic
-/// block, and returns it, but does not delete it.
 MachineInstr *MachineInstr::removeFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
-
-  // If it's a bundle then remove the MIs inside the bundle as well.
-  if (isBundle()) {
-    MachineBasicBlock *MBB = getParent();
-    MachineBasicBlock::instr_iterator MII = *this; ++MII;
-    MachineBasicBlock::instr_iterator E = MBB->instr_end();
-    while (MII != E && MII->isInsideBundle()) {
-      MachineInstr *MI = &*MII;
-      ++MII;
-      MBB->remove(MI);
-    }
-  }
-  getParent()->remove(this);
-  return this;
+  return getParent()->remove(this);
 }
 
+MachineInstr *MachineInstr::removeFromBundle() {
+  assert(getParent() && "Not embedded in a basic block!");
+  return getParent()->remove_instr(this);
+}
 
-/// eraseFromParent - This method unlinks 'this' from the containing basic
-/// block, and deletes it.
 void MachineInstr::eraseFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
-  // If it's a bundle then remove the MIs inside the bundle as well.
-  if (isBundle()) {
-    MachineBasicBlock *MBB = getParent();
-    MachineBasicBlock::instr_iterator MII = *this; ++MII;
-    MachineBasicBlock::instr_iterator E = MBB->instr_end();
-    while (MII != E && MII->isInsideBundle()) {
-      MachineInstr *MI = &*MII;
-      ++MII;
-      MBB->erase(MI);
-    }
-  }
-  // Erase the individual instruction, which may itself be inside a bundle.
-  getParent()->erase_instr(this);
+  getParent()->erase(this);
 }
 
+void MachineInstr::eraseFromBundle() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->erase_instr(this);
+}
 
 /// getNumExplicitOperands - Returns the number of non-implicit operands.
 ///
@@ -921,14 +865,40 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
-/// isBundled - Return true if this instruction part of a bundle. This is true
-/// if either itself or its following instruction is marked "InsideBundle".
-bool MachineInstr::isBundled() const {
-  if (isInsideBundle())
-    return true;
-  MachineBasicBlock::const_instr_iterator nextMI = this;
-  ++nextMI;
-  return nextMI != Parent->instr_end() && nextMI->isInsideBundle();
+void MachineInstr::bundleWithPred() {
+  assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+  setFlag(BundledPred);
+  MachineBasicBlock::instr_iterator Pred = this;
+  --Pred;
+  assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+  Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+  assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+  setFlag(BundledSucc);
+  MachineBasicBlock::instr_iterator Succ = this;
+  ++Succ;
+  assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
+  Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+  assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+  clearFlag(BundledPred);
+  MachineBasicBlock::instr_iterator Pred = this;
+  --Pred;
+  assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+  Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+  assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+  clearFlag(BundledSucc);
+  MachineBasicBlock::instr_iterator Succ = this;
+  ++Succ;
+  assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
+  Succ->clearFlag(BundledPred);
 }
 
 bool MachineInstr::isStackAligningInlineAsm() const {
@@ -1011,18 +981,13 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
   return NULL;
 }
 
-/// getBundleSize - Return the number of instructions inside the MI bundle.
+/// Return the number of instructions inside the MI bundle, not counting the
+/// header instruction.
 unsigned MachineInstr::getBundleSize() const {
-  assert(isBundle() && "Expecting a bundle");
-
-  const MachineBasicBlock *MBB = getParent();
-  MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end();
+  MachineBasicBlock::const_instr_iterator I = this;
   unsigned Size = 0;
-  while ((++I != E) && I->isInsideBundle()) {
-    ++Size;
-  }
-  assert(Size > 1 && "Malformed bundle");
-
+  while (I->isBundledWithSucc())
+    ++Size, ++I;
   return Size;
 }
 
@@ -1231,41 +1196,6 @@ void MachineInstr::clearKillInfo() {
   }
 }
 
-/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
-///
-void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
-      continue;
-    for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
-      MachineOperand &MOp = getOperand(j);
-      if (!MOp.isIdenticalTo(MO))
-        continue;
-      if (MO.isKill())
-        MOp.setIsKill();
-      else
-        MOp.setIsDead();
-      break;
-    }
-  }
-}
-
-/// copyPredicates - Copies predicate operand(s) from MI.
-void MachineInstr::copyPredicates(const MachineInstr *MI) {
-  assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
-
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
-    return;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (MCID.OpInfo[i].isPredicate()) {
-      // Predicated operands must be last operands.
-      addOperand(MI->getOperand(i));
-    }
-  }
-}
-
 void MachineInstr::substituteRegister(unsigned FromReg,
                                       unsigned ToReg,
                                       unsigned SubIdx,
@@ -1460,12 +1390,13 @@ bool MachineInstr::allDefsAreDead() const {
 
 /// copyImplicitOps - Copy implicit register operands from specified
 /// instruction to this instruction.
-void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+void MachineInstr::copyImplicitOps(MachineFunction &MF,
+                                   const MachineInstr *MI) {
   for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
        i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.isImplicit())
-      addOperand(MO);
+      addOperand(MF, MO);
   }
 }
 
@@ -1497,7 +1428,8 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
   }
 }
 
-void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
+                         bool SkipOpers) const {
   // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
   const MachineFunction *MF = 0;
   const MachineRegisterInfo *MRI = 0;
@@ -1534,6 +1466,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   else
     OS << "UNKNOWN";
 
+  if (SkipOpers)
+    return;
+
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
   bool FirstOp = true;
@@ -1545,10 +1480,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << " ";
     getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
 
-    // Print HasSideEffects, IsAlignStack
+    // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
     if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
       OS << " [sideeffect]";
+    if (ExtraInfo & InlineAsm::Extra_MayLoad)
+      OS << " [mayload]";
+    if (ExtraInfo & InlineAsm::Extra_MayStore)
+      OS << " [maystore]";
     if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
       OS << " [alignstack]";
     if (getInlineAsmDialect() == InlineAsm::AD_ATT)
@@ -1576,12 +1515,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         const MachineRegisterInfo &MRI = MF->getRegInfo();
-        if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+        if (MRI.use_empty(Reg)) {
           bool HasAliasLive = false;
           for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
                AI.isValid(); ++AI) {
             unsigned AliasReg = *AI;
-            if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+            if (!MRI.use_empty(AliasReg)) {
               HasAliasLive = true;
               break;
             }
@@ -1653,7 +1592,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   }
 
   bool HaveSemi = false;
-  if (Flags) {
+  const unsigned PrintableFlags = FrameSetup;
+  if (Flags & PrintableFlags) {
     if (!HaveSemi) OS << ";"; HaveSemi = true;
     OS << " flags: ";
 
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 1f7fbfc719b0..77bcd1d7c8e3 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 namespace {
@@ -47,8 +47,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
       // Remove BUNDLE instruction and the InsideBundle flags from bundled
       // instructions.
       if (MI->isBundle()) {
-        while (++MII != MIE && MII->isInsideBundle()) {
-          MII->setIsInsideBundle(false);
+        while (++MII != MIE && MII->isBundledWithPred()) {
+          MII->unbundleFromPred();
           for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
             MachineOperand &MO = MII->getOperand(i);
             if (MO.isReg() && MO.isInternalRead())
@@ -101,13 +101,15 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
                           MachineBasicBlock::instr_iterator FirstMI,
                           MachineBasicBlock::instr_iterator LastMI) {
   assert(FirstMI != LastMI && "Empty bundle?");
+  MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
 
   const TargetMachine &TM = MBB.getParent()->getTarget();
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 
-  MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+  MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(),
                                     TII->get(TargetOpcode::BUNDLE));
+  Bundle.prepend(MIB);
 
   SmallVector<unsigned, 32> LocalDefs;
   SmallSet<unsigned, 32> LocalDefSet;
@@ -177,7 +179,6 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
       }
     }
 
-    FirstMI->setIsInsideBundle();
     Defs.clear();
   }
 
@@ -223,14 +224,13 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
   bool Changed = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock &MBB = *I;
-
     MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
-    assert(!MII->isInsideBundle() &&
-           "First instr cannot be inside bundle before finalization!");
-
     MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
     if (MII == MIE)
       continue;
+    assert(!MII->isInsideBundle() &&
+           "First instr cannot be inside bundle before finalization!");
+
     for (++MII; MII != MIE; ) {
       if (!MII->isInsideBundle())
         ++MII;
@@ -281,7 +281,7 @@ MachineOperandIteratorBase::PhysRegInfo
 MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
                                            const TargetRegisterInfo *TRI) {
   bool AllDefsDead = true;
-  PhysRegInfo PRI = {false, false, false, false, false, false, false};
+  PhysRegInfo PRI = {false, false, false, false, false, false};
 
   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
          "analyzePhysReg not given a physical register!");
@@ -305,7 +305,9 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
       // Reg or a super-reg is read, and perhaps killed also.
       PRI.Reads = true;
       PRI.Kills = MO.isKill();
-    } if (IsRegOrOverlapping && MO.readsReg()) {
+    }
+
+    if (IsRegOrOverlapping && MO.readsReg()) {
       PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
     }
 
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 169443e03d77..ed3ed4d4d916 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,6 +22,10 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -29,17 +33,13 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -62,7 +62,7 @@ namespace {
   class MachineLICM : public MachineFunctionPass {
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
-    const TargetLowering *TLI;
+    const TargetLoweringBase *TLI;
     const TargetRegisterInfo *TRI;
     const MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
@@ -780,7 +780,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
                                        unsigned Reg, unsigned OpIdx,
                                        unsigned &RCId, unsigned &RCCost) const {
   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  EVT VT = *RC->vt_begin();
+  MVT VT = *RC->vt_begin();
   if (VT == MVT::Untyped) {
     RCId = RC->getID();
     RCCost = 1;
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 27afeec1d973..4e2cfdc4e568 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -15,9 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp
deleted file mode 100644
index 17fe67f65045..000000000000
--- a/lib/CodeGen/MachineLoopRanges.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the implementation of the MachineLoopRanges analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineLoopRanges.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-
-using namespace llvm;
-
-char MachineLoopRanges::ID = 0;
-INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
-                "Machine Loop Ranges", true, true)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
-                "Machine Loop Ranges", true, true)
-
-char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
-
-void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequiredTransitive<SlotIndexes>();
-  AU.addRequiredTransitive<MachineLoopInfo>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
-bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
-  releaseMemory();
-  Indexes = &getAnalysis<SlotIndexes>();
-  return false;
-}
-
-void MachineLoopRanges::releaseMemory() {
-  DeleteContainerSeconds(Cache);
-  Cache.clear();
-}
-
-MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
-  MachineLoopRange *&Range = Cache[Loop];
-  if (!Range)
-    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
-  return Range;
-}
-
-/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
-MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
-                                   MachineLoopRange::Allocator &alloc,
-                                   SlotIndexes &Indexes)
-  : Loop(loop), Intervals(alloc), Area(0) {
-  // Compute loop coverage.
-  for (MachineLoop::block_iterator I = Loop->block_begin(),
-         E = Loop->block_end(); I != E; ++I) {
-    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
-    Intervals.insert(Range.first, Range.second, 1u);
-    Area += Range.first.distance(Range.second);
-  }
-}
-
-/// overlaps - Return true if this loop overlaps the given range of machine
-/// instructions.
-bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
-  Map::const_iterator I = Intervals.find(Start);
-  return I.valid() && Stop > I.start();
-}
-
-unsigned MachineLoopRange::getNumber() const {
-  return Loop->getHeader()->getNumber();
-}
-
-/// byNumber - Comparator for array_pod_sort that sorts a list of
-/// MachineLoopRange pointers by number.
-int MachineLoopRange::byNumber(const void *pa, const void *pb) {
-  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
-  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
-  unsigned na = a->getNumber();
-  unsigned nb = b->getNumber();
-  if (na < nb)
-    return -1;
-  if (na > nb)
-    return 1;
-  return 0;
-}
-
-/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
-/// MachineLoopRange pointers by:
-/// 1. Descending area.
-/// 2. Ascending number.
-int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
-  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
-  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
-  if (a->getArea() != b->getArea())
-    return a->getArea() > b->getArea() ? -1 : 1;
-  return byNumber(pa, pb);
-}
-
-void MachineLoopRange::print(raw_ostream &OS) const {
-  OS << "Loop#" << getNumber() << " =";
-  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
-    OS << " [" << I.start() << ';' << I.stop() << ')';
-}
-
-raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
-  MLR.print(OS);
-  return OS;
-}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 005bf783e3da..0ea9ae0fcc89 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -8,18 +8,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -254,15 +253,8 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
                                      const MCRegisterInfo &MRI,
                                      const MCObjectFileInfo *MOFI)
-  : ImmutablePass(ID), Context(MAI, MRI, MOFI),
-    ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
-    CallsUnwindInit(0), DbgInfoAvailable(false),
-    UsesVAFloatArgument(false) {
+  : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-  // Always emit some info, by default "no personality" info.
-  Personalities.push_back(NULL);
-  AddrLabelSymbols = 0;
-  TheModule = 0;
 }
 
 MachineModuleInfo::MachineModuleInfo()
@@ -274,26 +266,36 @@ MachineModuleInfo::MachineModuleInfo()
 }
 
 MachineModuleInfo::~MachineModuleInfo() {
-  delete ObjFileMMI;
+}
 
-  // FIXME: Why isn't doFinalization being called??
-  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
-  delete AddrLabelSymbols;
+bool MachineModuleInfo::doInitialization(Module &M) {
+
+  ObjFileMMI = 0;
+  CompactUnwindEncoding = 0;
+  CurCallSite = 0;
+  CallsEHReturn = 0;
+  CallsUnwindInit = 0;
+  DbgInfoAvailable = UsesVAFloatArgument = false; 
+  // Always emit some info, by default "no personality" info.
+  Personalities.push_back(NULL);
   AddrLabelSymbols = 0;
-}
+  TheModule = 0;
 
-/// doInitialization - Initialize the state for a new module.
-///
-bool MachineModuleInfo::doInitialization() {
-  assert(AddrLabelSymbols == 0 && "Improperly initialized");
   return false;
 }
 
-/// doFinalization - Tear down the state after completion of a module.
-///
-bool MachineModuleInfo::doFinalization() {
+bool MachineModuleInfo::doFinalization(Module &M) {
+
+  Personalities.clear();
+
   delete AddrLabelSymbols;
   AddrLabelSymbols = 0;
+
+  Context.reset();
+
+  delete ObjFileMMI;
+  ObjFileMMI = 0;
+
   return false;
 }
 
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 95d7a7dd6897..1af00e84a6ed 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -30,12 +30,6 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
 }
 
 MachineRegisterInfo::~MachineRegisterInfo() {
-#ifndef NDEBUG
-  clearVirtRegs();
-  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
-    assert(!PhysRegUseDefLists[i] &&
-           "PhysRegUseDefLists has entries after all instructions are deleted");
-#endif
   delete [] PhysRegUseDefLists;
 }
 
@@ -43,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
 ///
 void
 MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+  assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
   VRegInfo[Reg].first = RC;
 }
 
@@ -180,6 +175,55 @@ void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
   MO->Contents.Reg.Next = 0;
 }
 
+/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
+///
+/// The Dst range is assumed to be uninitialized memory. (Or it may contain
+/// operands that won't be destroyed, which is OK because the MO destructor is
+/// trivial anyway).
+///
+/// The Src and Dst ranges may overlap.
+void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
+                                       MachineOperand *Src,
+                                       unsigned NumOps) {
+  assert(Src != Dst && NumOps && "Noop moveOperands");
+
+  // Copy backwards if Dst is within the Src range.
+  int Stride = 1;
+  if (Dst >= Src && Dst < Src + NumOps) {
+    Stride = -1;
+    Dst += NumOps - 1;
+    Src += NumOps - 1;
+  }
+
+  // Copy one operand at a time.
+  do {
+    new (Dst) MachineOperand(*Src);
+
+    // Dst takes Src's place in the use-def chain.
+    if (Src->isReg()) {
+      MachineOperand *&Head = getRegUseDefListHead(Src->getReg());
+      MachineOperand *Prev = Src->Contents.Reg.Prev;
+      MachineOperand *Next = Src->Contents.Reg.Next;
+      assert(Head && "List empty, but operand is chained");
+      assert(Prev && "Operand was not on use-def list");
+
+      // Prev links are circular, next link is NULL instead of looping back to
+      // Head.
+      if (Src == Head)
+        Head = Dst;
+      else
+        Prev->Contents.Reg.Next = Dst;
+
+      // Update Prev pointer. This also works when Src was pointing to itself
+      // in a 1-element list. In that case Head == Dst.
+      (Next ? Next : Head)->Contents.Reg.Prev = Dst;
+    }
+
+    Dst += Stride;
+    Src += Stride;
+  } while (--NumOps);
+}
+
 /// replaceRegWith - Replace all instances of FromReg with ToReg in the
 /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
 /// except that it also changes any definitions of the register as well.
@@ -240,13 +284,6 @@ bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
   return false;
 }
 
-bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
-  for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
-    if (*I == Reg)
-      return true;
-  return false;
-}
-
 /// getLiveInPhysReg - If VReg is a live-in virtual register, return the
 /// corresponding live-in physical register.
 unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 076547a5ed87..bb6aad7f948e 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 using namespace llvm;
 
@@ -109,7 +109,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
 /// a value of the given register class at the start of the specified basic
 /// block. It returns the virtual register defined by the instruction.
 static
-MachineInstr *InsertNewDef(unsigned Opcode,
+MachineInstrBuilder InsertNewDef(unsigned Opcode,
                            MachineBasicBlock *BB, MachineBasicBlock::iterator I,
                            const TargetRegisterClass *RC,
                            MachineRegisterInfo *MRI,
@@ -183,13 +183,12 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
 
   // Otherwise, we do need a PHI: insert one now.
   MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
-  MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
-                                           Loc, VRC, MRI, TII);
+  MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+                                                 Loc, VRC, MRI, TII);
 
   // Fill in all the predecessors of the PHI.
-  MachineInstrBuilder MIB(InsertedPHI);
   for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
-    MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+    InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
 
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
@@ -316,8 +315,7 @@ public:
   /// the specified predecessor block.
   static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
                             MachineBasicBlock *Pred) {
-    PHI->addOperand(MachineOperand::CreateReg(Val, false));
-    PHI->addOperand(MachineOperand::CreateMBB(Pred));
+    MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
   }
 
   /// InstrIsPHI - Check if an instruction is a PHI.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index a4817d09c0d3..5bd2349b50f6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -14,20 +14,22 @@
 
 #define DEBUG_TYPE "misched"
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/ScheduleDAGILP.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/PriorityQueue.h"
-
 #include <queue>
 
 using namespace llvm;
@@ -49,14 +51,19 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
-// Threshold to very roughly model an out-of-order processor's instruction
-// buffers. If the actual value of this threshold matters much in practice, then
-// it can be specified by the machine model. For now, it's an experimental
-// tuning knob to determine when and if it matters.
-static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
-  cl::desc("Allow expected latency to exceed the critical path by N cycles "
-           "before attempting to balance ILP"),
-  cl::init(10U));
+// Experimental heuristics
+static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
+  cl::desc("Enable load clustering."), cl::init(true));
+
+// Experimental heuristics
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+  cl::desc("Verify machine instrs before and after machine scheduling"));
+
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
 
 //===----------------------------------------------------------------------===//
 // Machine Instruction Scheduling Pass and Registry
@@ -195,6 +202,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   LIS = &getAnalysis<LiveIntervals>();
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
+  if (VerifyScheduling) {
+    DEBUG(LIS->print(dbgs()));
+    MF->verify(this, "Before machine scheduling.");
+  }
   RegClassInfo->runOnMachineFunction(*MF);
 
   // Select the scheduler, or set the default.
@@ -261,7 +272,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
       }
       DEBUG(dbgs() << "********** MI Scheduling **********\n");
       DEBUG(dbgs() << MF->getName()
-            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
+            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+            << "\n  From: " << *I << "    To: ";
             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
             else dbgs() << "End";
             dbgs() << " Remaining: " << RemainingInstrs << "\n");
@@ -282,6 +294,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   }
   Scheduler->finalizeSchedule();
   DEBUG(LIS->print(dbgs()));
+  if (VerifyScheduling)
+    MF->verify(this, "After machine scheduling.");
   return true;
 }
 
@@ -291,7 +305,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ReadyQueue::dump() {
-  dbgs() << Name << ": ";
+  dbgs() << "  " << Name << ": ";
   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
     dbgs() << Queue[i]->NodeNum << " ";
   dbgs() << "\n";
@@ -303,6 +317,25 @@ void ReadyQueue::dump() {
 // preservation.
 //===----------------------------------------------------------------------===//
 
+ScheduleDAGMI::~ScheduleDAGMI() {
+  delete DFSResult;
+  DeleteContainerPointers(Mutations);
+  delete SchedImpl;
+}
+
+bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+  if (SuccSU != &ExitSU) {
+    // Do not use WillCreateCycle, it assumes SD scheduling.
+    // If Pred is reachable from Succ, then the edge creates a cycle.
+    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+      return false;
+    Topo.AddPred(SuccSU, PredDep.getSUnit());
+  }
+  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+  // Return true regardless of whether a new edge needed to be inserted.
+  return true;
+}
+
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 /// NumPredsLeft reaches zero, release the successor node.
 ///
@@ -310,6 +343,12 @@ void ReadyQueue::dump() {
 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
+  if (SuccEdge->isWeak()) {
+    --SuccSU->WeakPredsLeft;
+    if (SuccEdge->isCluster())
+      NextClusterSucc = SuccSU;
+    return;
+  }
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
@@ -338,6 +377,12 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
 
+  if (PredEdge->isWeak()) {
+    --PredSU->WeakSuccsLeft;
+    if (PredEdge->isCluster())
+      NextClusterPred = PredSU;
+    return;
+  }
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
@@ -433,7 +478,8 @@ void ScheduleDAGMI::initRegPressure() {
   // Cache the list of excess pressure sets in this region. This will also track
   // the max pressure in the scheduled code for these sets.
   RegionCriticalPSets.clear();
-  std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+  const std::vector<unsigned> &RegionPressure =
+    RPTracker.getPressure().MaxSetPressure;
   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
     unsigned Limit = TRI->getRegPressureSetLimit(i);
     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
@@ -452,7 +498,7 @@ void ScheduleDAGMI::initRegPressure() {
 // FIXME: When the pressure tracker deals in pressure differences then we won't
 // iterate over all RegionCriticalPSets[i].
 void ScheduleDAGMI::
-updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
     unsigned ID = RegionCriticalPSets[i].PSetID;
     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
@@ -474,14 +520,23 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
 void ScheduleDAGMI::schedule() {
   buildDAGWithRegPressure();
 
+  Topo.InitDAGTopologicalSorting();
+
   postprocessDAG();
 
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+
+  // Initialize the strategy before modifying the DAG.
+  // This may initialize a DFSResult to be used for queue priority.
+  SchedImpl->initialize(this);
+
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
-
   if (ViewMISchedDAGs) viewGraph();
 
-  initQueues();
+  // Initialize ready queues now that the DAG and priority data are finalized.
+  initQueues(TopRoots, BotRoots);
 
   bool IsTopNode = false;
   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
@@ -498,7 +553,7 @@ void ScheduleDAGMI::schedule() {
   placeDebugValues();
 
   DEBUG({
-      unsigned BBNum = top()->getParent()->getNumber();
+      unsigned BBNum = begin()->getParent()->getNumber();
       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
       dumpSchedule();
       dbgs() << '\n';
@@ -516,7 +571,6 @@ void ScheduleDAGMI::buildDAGWithRegPressure() {
 
   // Build the DAG, and compute current register pressure.
   buildSchedGraph(AA, &RPTracker);
-  if (ViewMISchedDAGs) viewGraph();
 
   // Initialize top/bottom trackers after computing region pressure.
   initRegPressure();
@@ -529,42 +583,67 @@ void ScheduleDAGMI::postprocessDAG() {
   }
 }
 
-// Release all DAG roots for scheduling.
-void ScheduleDAGMI::releaseRoots() {
-  SmallVector<SUnit*, 16> BotRoots;
+void ScheduleDAGMI::computeDFSResult() {
+  if (!DFSResult)
+    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+  DFSResult->clear();
+  ScheduledTrees.clear();
+  DFSResult->resize(SUnits.size());
+  DFSResult->compute(SUnits);
+  ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
 
+void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+                                          SmallVectorImpl<SUnit*> &BotRoots) {
   for (std::vector<SUnit>::iterator
          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+    SUnit *SU = &(*I);
+    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+
+    // Order predecessors so DFSResult follows the critical path.
+    SU->biasCriticalPath();
+
     // A SUnit is ready to top schedule if it has no predecessors.
-    if (I->Preds.empty())
-      SchedImpl->releaseTopNode(&(*I));
+    if (!I->NumPredsLeft)
+      TopRoots.push_back(SU);
     // A SUnit is ready to bottom schedule if it has no successors.
-    if (I->Succs.empty())
-      BotRoots.push_back(&(*I));
+    if (!I->NumSuccsLeft)
+      BotRoots.push_back(SU);
   }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
-    SchedImpl->releaseBottomNode(*I);
+  ExitSU.biasCriticalPath();
 }
 
 /// Identify DAG roots and setup scheduler queues.
-void ScheduleDAGMI::initQueues() {
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+                               ArrayRef<SUnit*> BotRoots) {
+  NextClusterSucc = NULL;
+  NextClusterPred = NULL;
 
-  // Initialize the strategy before modifying the DAG.
-  SchedImpl->initialize(this);
+  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+  //
+  // Nodes with unreleased weak edges can still be roots.
+  // Release top roots in forward order.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+    SchedImpl->releaseTopNode(*I);
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+    SchedImpl->releaseBottomNode(*I);
+  }
 
-  // Release edges from the special Entry node or to the special Exit node.
   releaseSuccessors(&EntrySU);
   releasePredecessors(&ExitSU);
 
-  // Release all DAG roots for scheduling.
-  releaseRoots();
-
   SchedImpl->registerRoots();
 
+  // Advance past initial DebugValues.
+  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+  TopRPTracker.setPos(CurrentTop);
+
   CurrentBottom = RegionEnd;
 }
 
@@ -618,6 +697,15 @@ void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 
   SU->isScheduled = true;
 
+  if (DFSResult) {
+    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+    if (!ScheduledTrees.test(SubtreeID)) {
+      ScheduledTrees.set(SubtreeID);
+      DFSResult->scheduleTree(SubtreeID);
+      SchedImpl->scheduleTree(SubtreeID);
+    }
+  }
+
   // Notify the scheduling strategy after updating the DAG.
   SchedImpl->schedNode(SU, IsTopNode);
 }
@@ -635,6 +723,8 @@ void ScheduleDAGMI::placeDebugValues() {
     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
     MachineInstr *DbgValue = P.first;
     MachineBasicBlock::iterator OrigPrevMI = P.second;
+    if (&*RegionBegin == DbgValue)
+      ++RegionBegin;
     BB->splice(++OrigPrevMI, BB, DbgValue);
     if (OrigPrevMI == llvm::prior(RegionEnd))
       RegionEnd = DbgValue;
@@ -655,6 +745,166 @@ void ScheduleDAGMI::dumpSchedule() const {
 #endif
 
 //===----------------------------------------------------------------------===//
+// LoadClusterMutation - DAG post-processing to cluster loads.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between neighboring
+/// loads.
+class LoadClusterMutation : public ScheduleDAGMutation {
+  struct LoadInfo {
+    SUnit *SU;
+    unsigned BaseReg;
+    unsigned Offset;
+    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
+      : SU(su), BaseReg(reg), Offset(ofs) {}
+  };
+  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
+                           const LoadClusterMutation::LoadInfo &RHS);
+
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+public:
+  LoadClusterMutation(const TargetInstrInfo *tii,
+                      const TargetRegisterInfo *tri)
+    : TII(tii), TRI(tri) {}
+
+  virtual void apply(ScheduleDAGMI *DAG);
+protected:
+  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+bool LoadClusterMutation::LoadInfoLess(
+  const LoadClusterMutation::LoadInfo &LHS,
+  const LoadClusterMutation::LoadInfo &RHS) {
+  if (LHS.BaseReg != RHS.BaseReg)
+    return LHS.BaseReg < RHS.BaseReg;
+  return LHS.Offset < RHS.Offset;
+}
+
+void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
+                                                  ScheduleDAGMI *DAG) {
+  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
+  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
+    SUnit *SU = Loads[Idx];
+    unsigned BaseReg;
+    unsigned Offset;
+    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+  }
+  if (LoadRecords.size() < 2)
+    return;
+  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
+  unsigned ClusterLength = 1;
+  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
+    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+      ClusterLength = 1;
+      continue;
+    }
+
+    SUnit *SUa = LoadRecords[Idx].SU;
+    SUnit *SUb = LoadRecords[Idx+1].SU;
+    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
+        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+
+      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+            << SUb->NodeNum << ")\n");
+      // Copy successor edges from SUa to SUb. Interleaving computation
+      // dependent on SUa can prevent load combining due to register reuse.
+      // Predecessor edges do not need to be copied from SUb to SUa since nearby
+      // loads should have effectively the same inputs.
+      for (SUnit::const_succ_iterator
+             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
+        if (SI->getSUnit() == SUb)
+          continue;
+        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
+        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+      }
+      ++ClusterLength;
+    }
+    else
+      ClusterLength = 1;
+  }
+}
+
+/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+  // Map DAG NodeNum to store chain ID.
+  DenseMap<unsigned, unsigned> StoreChainIDs;
+  // Map each store chain to a set of dependent loads.
+  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+    SUnit *SU = &DAG->SUnits[Idx];
+    if (!SU->getInstr()->mayLoad())
+      continue;
+    unsigned ChainPredID = DAG->SUnits.size();
+    for (SUnit::const_pred_iterator
+           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+      if (PI->isCtrl()) {
+        ChainPredID = PI->getSUnit()->NodeNum;
+        break;
+      }
+    }
+    // Check if this chain-like pred has been seen
+    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+    unsigned NumChains = StoreChainDependents.size();
+    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
+      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
+    if (Result.second)
+      StoreChainDependents.resize(NumChains + 1);
+    StoreChainDependents[Result.first->second].push_back(SU);
+  }
+  // Iterate over the store chains.
+  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
+    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+  const TargetInstrInfo *TII;
+public:
+  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+
+  virtual void apply(ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// \brief Callback from DAG postProcessing to create cluster edges to encourage
+/// fused operations.
+void MacroFusion::apply(ScheduleDAGMI *DAG) {
+  // For now, assume targets can only fuse with the branch.
+  MachineInstr *Branch = DAG->ExitSU.getInstr();
+  if (!Branch)
+    return;
+
+  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
+    SUnit *SU = &DAG->SUnits[--Idx];
+    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+      continue;
+
+    // Create a single weak edge from SU to ExitSU. The only effect is to cause
+    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
+    // need to copy predecessor edges from ExitSU to SU, since top-down
+    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+    // of SU, we could create an artificial edge from the deepest root, but it
+    // hasn't been needed yet.
+    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+    (void)Success;
+    assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+    break;
+  }
+}
+
+//===----------------------------------------------------------------------===//
 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
@@ -666,9 +916,10 @@ public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
-    NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand,
-    BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce,
-    SingleMax, MultiPressure, NextDefUse, NodeOrder};
+    NoCand, SingleExcess, SingleCritical, Cluster,
+    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
+    NodeOrder};
 
 #ifndef NDEBUG
   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
@@ -748,23 +999,26 @@ public:
     unsigned CritResIdx;
     // Number of micro-ops left to schedule.
     unsigned RemainingMicroOps;
-    // Is the unscheduled zone resource limited.
-    bool IsResourceLimited;
-
-    unsigned MaxRemainingCount;
 
     void reset() {
       CriticalPath = 0;
       RemainingCounts.clear();
       CritResIdx = 0;
       RemainingMicroOps = 0;
-      IsResourceLimited = false;
-      MaxRemainingCount = 0;
     }
 
     SchedRemainder() { reset(); }
 
     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
+
+    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
+      if (!SchedModel->hasInstrSchedModel())
+        return 0;
+
+      return std::max(
+        RemainingMicroOps * SchedModel->getMicroOpFactor(),
+        RemainingCounts[CritResIdx]);
+    }
   };
 
   /// Each Scheduling boundary is associated with ready queues. It tracks the
@@ -805,15 +1059,15 @@ public:
 
     unsigned ExpectedCount;
 
-    // Policy flag: attempt to find ILP until expected latency is covered.
-    bool ShouldIncreaseILP;
-
 #ifndef NDEBUG
     // Remember the greatest min operand latency.
     unsigned MaxMinLatency;
 #endif
 
     void reset() {
+      // A new HazardRec is created for each DAG and owned by SchedBoundary.
+      delete HazardRec;
+
       Available.clear();
       Pending.clear();
       CheckPending = false;
@@ -828,7 +1082,6 @@ public:
       CritResIdx = 0;
       IsResourceLimited = false;
       ExpectedCount = 0;
-      ShouldIncreaseILP = false;
 #ifndef NDEBUG
       MaxMinLatency = 0;
 #endif
@@ -840,7 +1093,8 @@ public:
     /// PendingFlag set.
     SchedBoundary(unsigned ID, const Twine &Name):
       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
-      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
+      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+      HazardRec(0) {
       reset();
     }
 
@@ -856,7 +1110,7 @@ public:
     unsigned getUnscheduledLatency(SUnit *SU) const {
       if (isTop())
         return SU->getHeight();
-      return SU->getDepth();
+      return SU->getDepth() + SU->Latency;
     }
 
     unsigned getCriticalCount() const {
@@ -865,7 +1119,7 @@ public:
 
     bool checkHazard(SUnit *SU);
 
-    void checkILPPolicy();
+    void setLatencyPolicy(CandPolicy &Policy);
 
     void releaseNode(SUnit *SU, unsigned ReadyCycle);
 
@@ -938,7 +1192,7 @@ protected:
                          SchedCandidate &Candidate);
 
 #ifndef NDEBUG
-  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
+  void traceCandidate(const SchedCandidate &Cand);
 #endif
 };
 } // namespace
@@ -961,6 +1215,13 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
       RemainingCounts[PIdx] += (Factor * PI->Cycles);
     }
   }
+  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
+       PIdx != PEnd; ++PIdx) {
+    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
+        >= (int)SchedModel->getLatencyFactor()) {
+      CritResIdx = PIdx;
+    }
+  }
 }
 
 void ConvergingScheduler::SchedBoundary::
@@ -977,6 +1238,7 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   DAG = dag;
   SchedModel = DAG->getSchedModel();
   TRI = DAG->TRI;
+
   Rem.init(DAG, SchedModel);
   Top.init(DAG, SchedModel, &Rem);
   Bot.init(DAG, SchedModel, &Rem);
@@ -998,7 +1260,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
   if (SU->isScheduled)
     return;
 
-  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
     unsigned MinLatency = I->getMinLatency();
@@ -1019,6 +1281,8 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
 
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
+    if (I->isWeak())
+      continue;
     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
     unsigned MinLatency = I->getMinLatency();
 #ifndef NDEBUG
@@ -1067,12 +1331,28 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
   return false;
 }
 
-/// If expected latency is covered, disable ILP policy.
-void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
-  if (ShouldIncreaseILP
-      && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
-    ShouldIncreaseILP = false;
-    DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
+/// Compute the remaining latency to determine whether ILP should be increased.
+void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
+  // FIXME: compile time. In all, we visit four queues here one we should only
+  // need to visit the one that was last popped if we cache the result.
+  unsigned RemLatency = 0;
+  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency)
+      RemLatency = L;
+  }
+  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency)
+      RemLatency = L;
+  }
+  unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+  if (RemLatency + ExpectedLatency >= CriticalPathLimit
+      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
+    Policy.ReduceLatency = true;
+    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
   }
 }
 
@@ -1091,15 +1371,6 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
 
   // Record this node as an immediate dependent of the scheduled node.
   NextSUs.insert(SU);
-
-  // If CriticalPath has been computed, then check if the unscheduled nodes
-  // exceed the ILP window. Before registerRoots, CriticalPath==0.
-  if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
-                            > Rem->CriticalPath + ILPWindow)) {
-    ShouldIncreaseILP = true;
-    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
-          << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
-  }
 }
 
 /// Move the boundary of scheduled code by one cycle.
@@ -1130,8 +1401,8 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
   CheckPending = true;
   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
 
-  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
-        << CurrCycle << '\n');
+  DEBUG(dbgs() << "  " << Available.getName()
+        << " Cycle: " << CurrCycle << '\n');
 }
 
 /// Add the given processor resource to this scheduled zone.
@@ -1147,9 +1418,6 @@ void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
   Rem->RemainingCounts[PIdx] -= Count;
 
-  // Reset MaxRemainingCount for sanity.
-  Rem->MaxRemainingCount = 0;
-
   // Check if this resource exceeds the current critical resource by a full
   // cycle. If so, it becomes the critical resource.
   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
@@ -1281,9 +1549,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
 /// resources.
 ///
 /// If the CriticalZone is latency limited, don't force a policy for the
-/// candidates here. Instead, When releasing each candidate, releaseNode
-/// compares the region's critical path to the candidate's height or depth and
-/// the scheduled zone's expected latency then sets ShouldIncreaseILP.
+/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
 void ConvergingScheduler::balanceZones(
   ConvergingScheduler::SchedBoundary &CriticalZone,
   ConvergingScheduler::SchedCandidate &CriticalCand,
@@ -1292,6 +1558,7 @@ void ConvergingScheduler::balanceZones(
 
   if (!CriticalZone.IsResourceLimited)
     return;
+  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
 
   SchedRemainder *Rem = CriticalZone.Rem;
 
@@ -1299,7 +1566,7 @@ void ConvergingScheduler::balanceZones(
   // remainder, try to reduce it.
   unsigned RemainingCritCount =
     Rem->RemainingCounts[CriticalZone.CritResIdx];
-  if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
+  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
       > (int)SchedModel->getLatencyFactor()) {
     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
@@ -1325,12 +1592,9 @@ void ConvergingScheduler::checkResourceLimits(
   ConvergingScheduler::SchedCandidate &TopCand,
   ConvergingScheduler::SchedCandidate &BotCand) {
 
-  Bot.checkILPPolicy();
-  Top.checkILPPolicy();
-  if (Bot.ShouldIncreaseILP)
-    BotCand.Policy.ReduceLatency = true;
-  if (Top.ShouldIncreaseILP)
-    TopCand.Policy.ReduceLatency = true;
+  // Set ReduceLatency to true if needed.
+  Bot.setLatencyPolicy(BotCand.Policy);
+  Top.setLatencyPolicy(TopCand.Policy);
 
   // Handle resource-limited regions.
   if (Top.IsResourceLimited && Bot.IsResourceLimited
@@ -1365,9 +1629,6 @@ void ConvergingScheduler::checkResourceLimits(
   // The critical resource is different in each zone, so request balancing.
 
   // Compute the cost of each zone.
-  Rem.MaxRemainingCount = std::max(
-    Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
-    Rem.RemainingCounts[Rem.CritResIdx]);
   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
   Top.ExpectedCount = std::max(
     Top.getCriticalCount(),
@@ -1399,7 +1660,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
 }
 
 /// Return true if this heuristic determines order.
-static bool tryLess(unsigned TryVal, unsigned CandVal,
+static bool tryLess(int TryVal, int CandVal,
                     ConvergingScheduler::SchedCandidate &TryCand,
                     ConvergingScheduler::SchedCandidate &Cand,
                     ConvergingScheduler::CandReason Reason) {
@@ -1414,7 +1675,8 @@ static bool tryLess(unsigned TryVal, unsigned CandVal,
   }
   return false;
 }
-static bool tryGreater(unsigned TryVal, unsigned CandVal,
+
+static bool tryGreater(int TryVal, int CandVal,
                        ConvergingScheduler::SchedCandidate &TryCand,
                        ConvergingScheduler::SchedCandidate &Cand,
                        ConvergingScheduler::CandReason Reason) {
@@ -1430,6 +1692,10 @@ static bool tryGreater(unsigned TryVal, unsigned CandVal,
   return false;
 }
 
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -1472,6 +1738,26 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   if (Cand.Reason == SingleCritical)
     Cand.Reason = MultiPressure;
 
+  // Keep clustered nodes together to encourage downstream peephole
+  // optimizations which may reduce resource requirements.
+  //
+  // This is a best effort to set things up for a post-RA pass. Optimizations
+  // like generating loads of multiple registers should ideally be done within
+  // the scheduler pass by combining the loads during DAG postprocessing.
+  const SUnit *NextClusterSU =
+    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+                 TryCand, Cand, Cluster))
+    return;
+  // Currently, weak edges are for clustering, so we hard-code that reason.
+  // However, deferring the current TryCand will not change Cand's reason.
+  CandReason OrigReason = Cand.Reason;
+  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
+              getWeakLeft(Cand.SU, Zone.isTop()),
+              TryCand, Cand, Cluster)) {
+    Cand.Reason = OrigReason;
+    return;
+  }
   // Avoid critical resource consumption and balance the schedule.
   TryCand.initResourceDelta(DAG, SchedModel);
   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
@@ -1518,15 +1804,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   // Prefer immediate defs/users of the last scheduled instruction. This is a
   // nice pressure avoidance strategy that also conserves the processor's
   // register renaming resources and keeps the machine code readable.
-  if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) {
-    TryCand.Reason = NextDefUse;
-    return;
-  }
-  if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) {
-    if (Cand.Reason > NextDefUse)
-      Cand.Reason = NextDefUse;
+  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
+                 TryCand, Cand, NextDefUse))
     return;
-  }
+
   // Fall through to original instruction order.
   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
@@ -1572,6 +1853,7 @@ const char *ConvergingScheduler::getReasonStr(
   case NoCand:         return "NOCAND    ";
   case SingleExcess:   return "REG-EXCESS";
   case SingleCritical: return "REG-CRIT  ";
+  case Cluster:        return "CLUSTER   ";
   case SingleMax:      return "REG-MAX   ";
   case MultiPressure:  return "REG-MULTI ";
   case ResourceReduce: return "RES-REDUCE";
@@ -1586,9 +1868,7 @@ const char *ConvergingScheduler::getReasonStr(
   llvm_unreachable("Unknown reason!");
 }
 
-void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
-                                         const SchedBoundary &Zone) {
-  const char *Label = getReasonStr(Cand.Reason);
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
   PressureElement P;
   unsigned ResIdx = 0;
   unsigned Latency = 0;
@@ -1623,21 +1903,21 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
     Latency = Cand.SU->getDepth();
     break;
   }
-  dbgs() << Label << " " << Zone.Available.getName() << " ";
+  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
   if (P.isValid())
-    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
-           << " ";
+    dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
+           << ":" << P.UnitIncrease << " ";
   else
-    dbgs() << "     ";
+    dbgs() << "      ";
   if (ResIdx)
-    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
+    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
   else
-    dbgs() << "        ";
+    dbgs() << "         ";
   if (Latency)
-    dbgs() << Latency << " cycles ";
+    dbgs() << " " << Latency << " cycles ";
   else
-    dbgs() << "         ";
-  Cand.SU->dump(DAG);
+    dbgs() << "          ";
+  dbgs() << '\n';
 }
 #endif
 
@@ -1666,15 +1946,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
       if (TryCand.ResDelta == SchedResourceDelta())
         TryCand.initResourceDelta(DAG, SchedModel);
       Cand.setBest(TryCand);
-      DEBUG(traceCandidate(Cand, Zone));
+      DEBUG(traceCandidate(Cand));
     }
-    TryCand.SU = *I;
   }
 }
 
 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
                       bool IsTop) {
-  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
+  DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot")
         << " SU(" << Cand.SU->NodeNum << ") "
         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
 }
@@ -1786,10 +2065,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
   if (SU->isBottomReady())
     Bot.removeReady(SU);
 
-  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
-        << " Scheduling Instruction in cycle "
-        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
-        SU->dump(DAG));
+  DEBUG(dbgs() << "Scheduling " << *SU->getInstr());
   return SU;
 }
 
@@ -1812,7 +2088,13 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
   assert((!ForceTopDown || !ForceBottomUp) &&
          "-misched-topdown incompatible with -misched-bottomup");
-  return new ScheduleDAGMI(C, new ConvergingScheduler());
+  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
+  // Register DAG post-processors.
+  if (EnableLoadCluster)
+    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
+  if (EnableMacroFusion)
+    DAG->addMutation(new MacroFusion(DAG->TII));
+  return DAG;
 }
 static MachineSchedRegistry
 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
@@ -1825,58 +2107,97 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
 namespace {
 /// \brief Order nodes by the ILP metric.
 struct ILPOrder {
-  ScheduleDAGILP *ILP;
+  const SchedDFSResult *DFSResult;
+  const BitVector *ScheduledTrees;
   bool MaximizeILP;
 
-  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
 
   /// \brief Apply a less-than relation on node priority.
+  ///
+  /// (Return true if A comes after B in the Q.)
   bool operator()(const SUnit *A, const SUnit *B) const {
-    // Return true if A comes after B in the Q.
+    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+    if (SchedTreeA != SchedTreeB) {
+      // Unscheduled trees have lower priority.
+      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+        return ScheduledTrees->test(SchedTreeB);
+
+      // Trees with shallower connections have have lower priority.
+      if (DFSResult->getSubtreeLevel(SchedTreeA)
+          != DFSResult->getSubtreeLevel(SchedTreeB)) {
+        return DFSResult->getSubtreeLevel(SchedTreeA)
+          < DFSResult->getSubtreeLevel(SchedTreeB);
+      }
+    }
     if (MaximizeILP)
-      return ILP->getILP(A) < ILP->getILP(B);
+      return DFSResult->getILP(A) < DFSResult->getILP(B);
     else
-      return ILP->getILP(A) > ILP->getILP(B);
+      return DFSResult->getILP(A) > DFSResult->getILP(B);
   }
 };
 
 /// \brief Schedule based on the ILP metric.
 class ILPScheduler : public MachineSchedStrategy {
-  ScheduleDAGILP ILP;
+  /// In case all subtrees are eventually connected to a common root through
+  /// data dependence (e.g. reduction), place an upper limit on their size.
+  ///
+  /// FIXME: A subtree limit is generally good, but in the situation commented
+  /// above, where multiple similar subtrees feed a common root, we should
+  /// only split at a point where the resulting subtrees will be balanced.
+  /// (a motivating test case must be found).
+  static const unsigned SubtreeLimit = 16;
+
+  ScheduleDAGMI *DAG;
   ILPOrder Cmp;
 
   std::vector<SUnit*> ReadyQ;
 public:
-  ILPScheduler(bool MaximizeILP)
-  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
 
-  virtual void initialize(ScheduleDAGMI *DAG) {
+  virtual void initialize(ScheduleDAGMI *dag) {
+    DAG = dag;
+    DAG->computeDFSResult();
+    Cmp.DFSResult = DAG->getDFSResult();
+    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
     ReadyQ.clear();
-    ILP.resize(DAG->SUnits.size());
   }
 
   virtual void registerRoots() {
-    for (std::vector<SUnit*>::const_iterator
-           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
-      ILP.computeILP(*I);
-    }
+    // Restore the heap in ReadyQ with the updated DFS results.
+    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
 
   /// Implement MachineSchedStrategy interface.
   /// -----------------------------------------
 
+  /// Callback to select the highest priority node from the ready Q.
   virtual SUnit *pickNode(bool &IsTopNode) {
     if (ReadyQ.empty()) return NULL;
-    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
     SUnit *SU = ReadyQ.back();
     ReadyQ.pop_back();
     IsTopNode = false;
-    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
-          << " ILP: " << ILP.getILP(SU) << '\n');
+    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
+          << *SU->getInstr()
+          << " ILP: " << DAG->getDFSResult()->getILP(SU)
+          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
+          << DAG->getDFSResult()->getSubtreeLevel(
+            DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
     return SU;
   }
 
-  virtual void schedNode(SUnit *, bool) {}
+  /// \brief Scheduler callback to notify that a new subtree is scheduled.
+  virtual void scheduleTree(unsigned SubtreeID) {
+    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+
+  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+  /// DFSResults, and resort the priority Q.
+  virtual void schedNode(SUnit *SU, bool IsTopNode) {
+    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+  }
 
   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
 
@@ -1986,3 +2307,90 @@ static MachineSchedRegistry ShufflerRegistry(
   "shuffle", "Shuffle machine instructions alternating directions",
   createInstructionShuffler);
 #endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMI.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const ScheduleDAG *G) {
+    return G->MF.getName();
+  }
+
+  static bool renderGraphFromBottomUp() {
+    return true;
+  }
+
+  static bool isNodeHidden(const SUnit *Node) {
+    return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+  }
+
+  static bool hasNodeAddressLabel(const SUnit *Node,
+                                  const ScheduleDAG *Graph) {
+    return false;
+  }
+
+  /// If you want to override the dot attributes printed for a particular
+  /// edge, override this method.
+  static std::string getEdgeAttributes(const SUnit *Node,
+                                       SUnitIterator EI,
+                                       const ScheduleDAG *Graph) {
+    if (EI.isArtificialDep())
+      return "color=cyan,style=dashed";
+    if (EI.isCtrlDep())
+      return "color=blue,style=dashed";
+    return "";
+  }
+
+  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+    std::string Str;
+    raw_string_ostream SS(Str);
+    SS << "SU(" << SU->NodeNum << ')';
+    return SS.str();
+  }
+  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+    return G->getGraphNodeLabel(SU);
+  }
+
+  static std::string getNodeAttributes(const SUnit *N,
+                                       const ScheduleDAG *Graph) {
+    std::string Str("shape=Mrecord");
+    const SchedDFSResult *DFS =
+      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
+    if (DFS) {
+      Str += ",style=filled,fillcolor=\"#";
+      Str += DOT::getColorString(DFS->getSubtreeID(N));
+      Str += '"';
+    }
+    return Str;
+  }
+};
+} // namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+  ViewGraph(this, Name, false, Title);
+#else
+  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index b117f8c3a206..4dafbe5a3e3a 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -18,18 +18,18 @@
 
 #define DEBUG_TYPE "machine-sink"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 9686b0413293..49d8c4e9470d 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -8,20 +8,21 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "machine-trace-metrics"
-#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SparseSet.h"
 
 using namespace llvm;
 
@@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
     MF->getTarget().getSubtarget<TargetSubtargetInfo>();
   SchedModel.init(*ST.getSchedModel(), &ST, TII);
   BlockInfo.resize(MF->getNumBlockIDs());
+  ProcResourceCycles.resize(MF->getNumBlockIDs() *
+                            SchedModel.getNumProcResourceKinds());
   return false;
 }
 
@@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
     return FBI;
 
   // Compute resource usage in the block.
-  // FIXME: Compute per-functional unit counts.
   FBI->HasCalls = false;
   unsigned InstrCount = 0;
+
+  // Add up per-processor resource cycles as well.
+  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+  SmallVector<unsigned, 32> PRCycles(PRKinds);
+
   for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
        I != E; ++I) {
     const MachineInstr *MI = I;
@@ -96,11 +103,43 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
     ++InstrCount;
     if (MI->isCall())
       FBI->HasCalls = true;
+
+    // Count processor resources used.
+    if (!SchedModel.hasInstrSchedModel())
+      continue;
+    const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+    if (!SC->isValid())
+      continue;
+
+    for (TargetSchedModel::ProcResIter
+         PI = SchedModel.getWriteProcResBegin(SC),
+         PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+      assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+      PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+    }
   }
   FBI->InstrCount = InstrCount;
+
+  // Scale the resource cycles so they are comparable.
+  unsigned PROffset = MBB->getNumber() * PRKinds;
+  for (unsigned K = 0; K != PRKinds; ++K)
+    ProcResourceCycles[PROffset + K] =
+      PRCycles[K] * SchedModel.getResourceFactor(K);
+
   return FBI;
 }
 
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+  assert(BlockInfo[MBBNum].hasResources() &&
+         "getResources() must be called before getProcResourceCycles()");
+  unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+  assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+  return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds,
+                            PRKinds);
+}
+
+
 //===----------------------------------------------------------------------===//
 //                         Ensemble utility functions
 //===----------------------------------------------------------------------===//
@@ -108,6 +147,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
   : MTM(*ct) {
   BlockInfo.resize(MTM.BlockInfo.size());
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+  ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
 }
 
 // Virtual destructor serves as an anchor.
@@ -123,21 +165,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
 void MachineTraceMetrics::Ensemble::
 computeDepthResources(const MachineBasicBlock *MBB) {
   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  unsigned PROffset = MBB->getNumber() * PRKinds;
 
   // Compute resources from trace above. The top block is simple.
   if (!TBI->Pred) {
     TBI->InstrDepth = 0;
     TBI->Head = MBB->getNumber();
+    std::fill(ProcResourceDepths.begin() + PROffset,
+              ProcResourceDepths.begin() + PROffset + PRKinds, 0);
     return;
   }
 
   // Compute from the block above. A post-order traversal ensures the
   // predecessor is always computed first.
-  TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+  unsigned PredNum = TBI->Pred->getNumber();
+  TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
   assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
   const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
   TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
   TBI->Head = PredTBI->Head;
+
+  // Compute per-resource depths.
+  ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+  ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+  for (unsigned K = 0; K != PRKinds; ++K)
+    ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
 }
 
 // Update resource-related information in the TraceBlockInfo for MBB.
@@ -145,22 +198,33 @@ computeDepthResources(const MachineBasicBlock *MBB) {
 void MachineTraceMetrics::Ensemble::
 computeHeightResources(const MachineBasicBlock *MBB) {
   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  unsigned PROffset = MBB->getNumber() * PRKinds;
 
   // Compute resources for the current block.
   TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+  ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
 
   // The trace tail is done.
   if (!TBI->Succ) {
     TBI->Tail = MBB->getNumber();
+    std::copy(PRCycles.begin(), PRCycles.end(),
+              ProcResourceHeights.begin() + PROffset);
     return;
   }
 
   // Compute from the block below. A post-order traversal ensures the
   // predecessor is always computed first.
-  TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+  unsigned SuccNum = TBI->Succ->getNumber();
+  TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
   assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
   TBI->InstrHeight += SuccTBI->InstrHeight;
   TBI->Tail = SuccTBI->Tail;
+
+  // Compute per-resource heights.
+  ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+  for (unsigned K = 0; K != PRKinds; ++K)
+    ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
 }
 
 // Check if depth resources for MBB are valid and return the TBI.
@@ -181,6 +245,35 @@ getHeightResources(const MachineBasicBlock *MBB) const {
   return TBI->hasValidHeight() ? TBI : 0;
 }
 
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+  return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds,
+                            PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+  unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+  assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+  return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds,
+                            PRKinds);
+}
+
 //===----------------------------------------------------------------------===//
 //                         Trace Selection Strategies
 //===----------------------------------------------------------------------===//
@@ -677,7 +770,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
     const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
     // Ignore dependencies outside the current trace.
     const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
-    if (!DefTBI.isEarlierInSameTrace(TBI))
+    if (!DefTBI.isUsefulDominator(TBI))
       continue;
     unsigned Len = LIR.Height + Cycles[DefMI].Depth;
     MaxLen = std::max(MaxLen, Len);
@@ -713,11 +806,24 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
   SmallVector<DataDep, 8> Deps;
   while (!Stack.empty()) {
     MBB = Stack.pop_back_val();
-    DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+    DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
     TBI.HasValidInstrDepths = true;
     TBI.CriticalPath = 0;
 
+    // Print out resource depths here as well.
+    DEBUG({
+      dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+      ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+      for (unsigned K = 0; K != PRDepths.size(); ++K)
+        if (PRDepths[K]) {
+          unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+          dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+                 << MTM.SchedModel.getProcResource(K)->Name << " ("
+                 << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+        }
+    });
+
     // Also compute the critical path length through MBB when possible.
     if (TBI.HasValidInstrHeights)
       TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
@@ -740,7 +846,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
         const TraceBlockInfo&DepTBI =
           BlockInfo[Dep.DefMI->getParent()->getNumber()];
         // Ignore dependencies from outside the current trace.
-        if (!DepTBI.isEarlierInSameTrace(TBI))
+        if (!DepTBI.isUsefulDominator(TBI))
           continue;
         assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
         unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
@@ -928,6 +1034,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
     TBI.HasValidInstrHeights = true;
     TBI.CriticalPath = 0;
 
+    DEBUG({
+      dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+      ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+      for (unsigned K = 0; K != PRHeights.size(); ++K)
+        if (PRHeights[K]) {
+          unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+          dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+                 << MTM.SchedModel.getProcResource(K)->Name << " ("
+                 << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+        }
+    });
+
     // Get dependencies from PHIs in the trace successor.
     const MachineBasicBlock *Succ = TBI.Succ;
     // If MBB is the last block in the trace, and it has a back-edge to the
@@ -1058,27 +1176,52 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
 }
 
 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
-  // For now, we compute the resource depth from instruction count / issue
-  // width. Eventually, we should compute resource depth per functional unit
-  // and return the max.
+  // Find the limiting processor resource.
+  // Numbers have been pre-scaled to be comparable.
+  unsigned PRMax = 0;
+  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+  if (Bottom) {
+    ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+    for (unsigned K = 0; K != PRDepths.size(); ++K)
+      PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+  } else {
+    for (unsigned K = 0; K != PRDepths.size(); ++K)
+      PRMax = std::max(PRMax, PRDepths[K]);
+  }
+  // Convert to cycle count.
+  PRMax = TE.MTM.getCycles(PRMax);
+
   unsigned Instrs = TBI.InstrDepth;
   if (Bottom)
     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
     Instrs /= IW;
   // Assume issue width 1 without a schedule model.
-  return Instrs;
+  return std::max(Instrs, PRMax);
 }
 
 unsigned MachineTraceMetrics::Trace::
 getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+  // Add up resources above and below the center block.
+  ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+  ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+  unsigned PRMax = 0;
+  for (unsigned K = 0; K != PRDepths.size(); ++K) {
+    unsigned PRCycles = PRDepths[K] + PRHeights[K];
+    for (unsigned I = 0; I != Extrablocks.size(); ++I)
+      PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+    PRMax = std::max(PRMax, PRCycles);
+  }
+  // Convert to cycle count.
+  PRMax = TE.MTM.getCycles(PRMax);
+
   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
   for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
     Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
     Instrs /= IW;
   // Assume issue width 1 without a schedule model.
-  return Instrs;
+  return std::max(Instrs, PRMax);
 }
 
 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
deleted file mode 100644
index 460730b04059..000000000000
--- a/lib/CodeGen/MachineTraceMetrics.h
+++ /dev/null
@@ -1,350 +0,0 @@
-//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the MachineTraceMetrics analysis pass
-// that estimates CPU resource usage and critical data dependency paths through
-// preferred traces. This is useful for super-scalar CPUs where execution speed
-// can be limited both by data dependencies and by limited execution resources.
-//
-// Out-of-order CPUs will often be executing instructions from multiple basic
-// blocks at the same time. This makes it difficult to estimate the resource
-// usage accurately in a single basic block. Resources can be estimated better
-// by looking at a trace through the current basic block.
-//
-// For every block, the MachineTraceMetrics pass will pick a preferred trace
-// that passes through the block. The trace is chosen based on loop structure,
-// branch probabilities, and resource usage. The intention is to pick likely
-// traces that would be the most affected by code transformations.
-//
-// It is expensive to compute a full arbitrary trace for every block, so to
-// save some computations, traces are chosen to be convergent. This means that
-// if the traces through basic blocks A and B ever cross when moving away from
-// A and B, they never diverge again. This applies in both directions - If the
-// traces meet above A and B, they won't diverge when going further back.
-//
-// Traces tend to align with loops. The trace through a block in an inner loop
-// will begin at the loop entry block and end at a back edge. If there are
-// nested loops, the trace may begin and end at those instead.
-//
-// For each trace, we compute the critical path length, which is the number of
-// cycles required to execute the trace when execution is limited by data
-// dependencies only. We also compute the resource height, which is the number
-// of cycles required to execute all instructions in the trace when ignoring
-// data dependencies.
-//
-// Every instruction in the current block has a slack - the number of cycles
-// execution of the instruction can be delayed without extending the critical
-// path.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
-#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-
-namespace llvm {
-
-class InstrItineraryData;
-class MachineBasicBlock;
-class MachineInstr;
-class MachineLoop;
-class MachineLoopInfo;
-class MachineRegisterInfo;
-class TargetInstrInfo;
-class TargetRegisterInfo;
-class raw_ostream;
-
-class MachineTraceMetrics : public MachineFunctionPass {
-  const MachineFunction *MF;
-  const TargetInstrInfo *TII;
-  const TargetRegisterInfo *TRI;
-  const MachineRegisterInfo *MRI;
-  const MachineLoopInfo *Loops;
-  TargetSchedModel SchedModel;
-
-public:
-  class Ensemble;
-  class Trace;
-  static char ID;
-  MachineTraceMetrics();
-  void getAnalysisUsage(AnalysisUsage&) const;
-  bool runOnMachineFunction(MachineFunction&);
-  void releaseMemory();
-  void verifyAnalysis() const;
-
-  friend class Ensemble;
-  friend class Trace;
-
-  /// Per-basic block information that doesn't depend on the trace through the
-  /// block.
-  struct FixedBlockInfo {
-    /// The number of non-trivial instructions in the block.
-    /// Doesn't count PHI and COPY instructions that are likely to be removed.
-    unsigned InstrCount;
-
-    /// True when the block contains calls.
-    bool HasCalls;
-
-    FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
-
-    /// Returns true when resource information for this block has been computed.
-    bool hasResources() const { return InstrCount != ~0u; }
-
-    /// Invalidate resource information.
-    void invalidate() { InstrCount = ~0u; }
-  };
-
-  /// Get the fixed resource information about MBB. Compute it on demand.
-  const FixedBlockInfo *getResources(const MachineBasicBlock*);
-
-  /// A virtual register or regunit required by a basic block or its trace
-  /// successors.
-  struct LiveInReg {
-    /// The virtual register required, or a register unit.
-    unsigned Reg;
-
-    /// For virtual registers: Minimum height of the defining instruction.
-    /// For regunits: Height of the highest user in the trace.
-    unsigned Height;
-
-    LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
-  };
-
-  /// Per-basic block information that relates to a specific trace through the
-  /// block. Convergent traces means that only one of these is required per
-  /// block in a trace ensemble.
-  struct TraceBlockInfo {
-    /// Trace predecessor, or NULL for the first block in the trace.
-    /// Valid when hasValidDepth().
-    const MachineBasicBlock *Pred;
-
-    /// Trace successor, or NULL for the last block in the trace.
-    /// Valid when hasValidHeight().
-    const MachineBasicBlock *Succ;
-
-    /// The block number of the head of the trace. (When hasValidDepth()).
-    unsigned Head;
-
-    /// The block number of the tail of the trace. (When hasValidHeight()).
-    unsigned Tail;
-
-    /// Accumulated number of instructions in the trace above this block.
-    /// Does not include instructions in this block.
-    unsigned InstrDepth;
-
-    /// Accumulated number of instructions in the trace below this block.
-    /// Includes instructions in this block.
-    unsigned InstrHeight;
-
-    TraceBlockInfo() :
-      Pred(0), Succ(0),
-      InstrDepth(~0u), InstrHeight(~0u),
-      HasValidInstrDepths(false), HasValidInstrHeights(false) {}
-
-    /// Returns true if the depth resources have been computed from the trace
-    /// above this block.
-    bool hasValidDepth() const { return InstrDepth != ~0u; }
-
-    /// Returns true if the height resources have been computed from the trace
-    /// below this block.
-    bool hasValidHeight() const { return InstrHeight != ~0u; }
-
-    /// Invalidate depth resources when some block above this one has changed.
-    void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
-
-    /// Invalidate height resources when a block below this one has changed.
-    void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
-
-    /// Determine if this block belongs to the same trace as TBI and comes
-    /// before it in the trace.
-    /// Also returns true when TBI == this.
-    bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const {
-      return hasValidDepth() && TBI.hasValidDepth() &&
-        Head == TBI.Head && InstrDepth <= TBI.InstrDepth;
-    }
-
-    // Data-dependency-related information. Per-instruction depth and height
-    // are computed from data dependencies in the current trace, using
-    // itinerary data.
-
-    /// Instruction depths have been computed. This implies hasValidDepth().
-    bool HasValidInstrDepths;
-
-    /// Instruction heights have been computed. This implies hasValidHeight().
-    bool HasValidInstrHeights;
-
-    /// Critical path length. This is the number of cycles in the longest data
-    /// dependency chain through the trace. This is only valid when both
-    /// HasValidInstrDepths and HasValidInstrHeights are set.
-    unsigned CriticalPath;
-
-    /// Live-in registers. These registers are defined above the current block
-    /// and used by this block or a block below it.
-    /// This does not include PHI uses in the current block, but it does
-    /// include PHI uses in deeper blocks.
-    SmallVector<LiveInReg, 4> LiveIns;
-
-    void print(raw_ostream&) const;
-  };
-
-  /// InstrCycles represents the cycle height and depth of an instruction in a
-  /// trace.
-  struct InstrCycles {
-    /// Earliest issue cycle as determined by data dependencies and instruction
-    /// latencies from the beginning of the trace. Data dependencies from
-    /// before the trace are not included.
-    unsigned Depth;
-
-    /// Minimum number of cycles from this instruction is issued to the of the
-    /// trace, as determined by data dependencies and instruction latencies.
-    unsigned Height;
-  };
-
-  /// A trace represents a plausible sequence of executed basic blocks that
-  /// passes through the current basic block one. The Trace class serves as a
-  /// handle to internal cached data structures.
-  class Trace {
-    Ensemble &TE;
-    TraceBlockInfo &TBI;
-
-    unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
-
-  public:
-    explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
-    void print(raw_ostream&) const;
-
-    /// Compute the total number of instructions in the trace.
-    unsigned getInstrCount() const {
-      return TBI.InstrDepth + TBI.InstrHeight;
-    }
-
-    /// Return the resource depth of the top/bottom of the trace center block.
-    /// This is the number of cycles required to execute all instructions from
-    /// the trace head to the trace center block. The resource depth only
-    /// considers execution resources, it ignores data dependencies.
-    /// When Bottom is set, instructions in the trace center block are included.
-    unsigned getResourceDepth(bool Bottom) const;
-
-    /// Return the resource length of the trace. This is the number of cycles
-    /// required to execute the instructions in the trace if they were all
-    /// independent, exposing the maximum instruction-level parallelism.
-    ///
-    /// Any blocks in Extrablocks are included as if they were part of the
-    /// trace.
-    unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
-                               ArrayRef<const MachineBasicBlock*>()) const;
-
-    /// Return the length of the (data dependency) critical path through the
-    /// trace.
-    unsigned getCriticalPath() const { return TBI.CriticalPath; }
-
-    /// Return the depth and height of MI. The depth is only valid for
-    /// instructions in or above the trace center block. The height is only
-    /// valid for instructions in or below the trace center block.
-    InstrCycles getInstrCycles(const MachineInstr *MI) const {
-      return TE.Cycles.lookup(MI);
-    }
-
-    /// Return the slack of MI. This is the number of cycles MI can be delayed
-    /// before the critical path becomes longer.
-    /// MI must be an instruction in the trace center block.
-    unsigned getInstrSlack(const MachineInstr *MI) const;
-
-    /// Return the Depth of a PHI instruction in a trace center block successor.
-    /// The PHI does not have to be part of the trace.
-    unsigned getPHIDepth(const MachineInstr *PHI) const;
-  };
-
-  /// A trace ensemble is a collection of traces selected using the same
-  /// strategy, for example 'minimum resource height'. There is one trace for
-  /// every block in the function.
-  class Ensemble {
-    SmallVector<TraceBlockInfo, 4> BlockInfo;
-    DenseMap<const MachineInstr*, InstrCycles> Cycles;
-    friend class Trace;
-
-    void computeTrace(const MachineBasicBlock*);
-    void computeDepthResources(const MachineBasicBlock*);
-    void computeHeightResources(const MachineBasicBlock*);
-    unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
-    void computeInstrDepths(const MachineBasicBlock*);
-    void computeInstrHeights(const MachineBasicBlock*);
-    void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
-                    ArrayRef<const MachineBasicBlock*> Trace);
-
-  protected:
-    MachineTraceMetrics &MTM;
-    virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
-    virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
-    explicit Ensemble(MachineTraceMetrics*);
-    const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
-    const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
-    const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
-
-  public:
-    virtual ~Ensemble();
-    virtual const char *getName() const =0;
-    void print(raw_ostream&) const;
-    void invalidate(const MachineBasicBlock *MBB);
-    void verify() const;
-
-    /// Get the trace that passes through MBB.
-    /// The trace is computed on demand.
-    Trace getTrace(const MachineBasicBlock *MBB);
-  };
-
-  /// Strategies for selecting traces.
-  enum Strategy {
-    /// Select the trace through a block that has the fewest instructions.
-    TS_MinInstrCount,
-
-    TS_NumStrategies
-  };
-
-  /// Get the trace ensemble representing the given trace selection strategy.
-  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
-  /// and valid for the lifetime of the analysis pass.
-  Ensemble *getEnsemble(Strategy);
-
-  /// Invalidate cached information about MBB. This must be called *before* MBB
-  /// is erased, or the CFG is otherwise changed.
-  ///
-  /// This invalidates per-block information about resource usage for MBB only,
-  /// and it invalidates per-trace information for any trace that passes
-  /// through MBB.
-  ///
-  /// Call Ensemble::getTrace() again to update any trace handles.
-  void invalidate(const MachineBasicBlock *MBB);
-
-private:
-  // One entry per basic block, indexed by block number.
-  SmallVector<FixedBlockInfo, 4> BlockInfo;
-
-  // One ensemble per strategy.
-  Ensemble* Ensembles[TS_NumStrategies];
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS,
-                               const MachineTraceMetrics::Trace &Tr) {
-  Tr.print(OS);
-  return OS;
-}
-
-inline raw_ostream &operator<<(raw_ostream &OS,
-                               const MachineTraceMetrics::Ensemble &En) {
-  En.print(OS);
-  return OS;
-}
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 69a3ae84ec99..4b1230029a74 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,28 +23,28 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/BasicBlock.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
@@ -307,6 +307,9 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
     visitMachineBasicBlockBefore(MFI);
     // Keep track of the current bundle header.
     const MachineInstr *CurBundle = 0;
+    // Do we expect the next instruction to be part of the same bundle?
+    bool InBundle = false;
+
     for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
@@ -314,6 +317,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
         *OS << "Instruction: " << *MBBI;
         continue;
       }
+
+      // Check for consistent bundle flags.
+      if (InBundle && !MBBI->isBundledWithPred())
+        report("Missing BundledPred flag, "
+               "BundledSucc was set on predecessor", MBBI);
+      if (!InBundle && MBBI->isBundledWithPred())
+        report("BundledPred flag is set, "
+               "but BundledSucc not set on predecessor", MBBI);
+
       // Is this a bundle header?
       if (!MBBI->isInsideBundle()) {
         if (CurBundle)
@@ -326,9 +338,14 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
       visitMachineInstrAfter(MBBI);
+
+      // Was this the last bundled instruction?
+      InBundle = MBBI->isBundledWithSucc();
     }
     if (CurBundle)
       visitMachineBundleAfter(CurBundle);
+    if (InBundle)
+      report("BundledSucc flag set on last instruction in block", &MFI->back());
     visitMachineBasicBlockAfter(MFI);
   }
   visitMachineFunctionAfter();
@@ -580,7 +597,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       ++MBBI;
       if (MBBI == MF->end()) {
         report("MBB conditionally falls through out of function!", MBB);
-      } if (MBB->succ_size() == 1) {
+      } else if (MBB->succ_size() == 1) {
         // A conditional branch with only one successor is weird, but allowed.
         if (&*MBBI != TBB)
           report("MBB exits via conditional branch/fall-through but only has "
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 6da313e632af..3982612e8c11 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -14,13 +14,13 @@
 
 #define DEBUG_TYPE "phi-opt"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index e6e23da27c1d..5584708eae36 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,23 +14,24 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/Passes.h"
 #include "PHIEliminationUtils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -39,9 +40,16 @@ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
                      cl::Hidden, cl::desc("Disable critical edge splitting "
                                           "during PHI elimination"));
 
+static cl::opt<bool>
+SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+                      cl::Hidden, cl::desc("Split all critical edges during "
+                                           "PHI elimination"));
+
 namespace {
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo *MRI; // Machine register information
+    LiveVariables *LV;
+    LiveIntervals *LIS;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -57,8 +65,8 @@ namespace {
     /// in predecessor basic blocks.
     ///
     bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
-    void LowerAtomicPHINode(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator AfterPHIsIt);
+    void LowerPHINode(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator AfterPHIsIt);
 
     /// analyzePHINodes - Gather information about the PHI nodes in
     /// here. In particular, we want to map the number of uses of a virtual
@@ -70,7 +78,12 @@ namespace {
 
     /// Split critical edges where necessary for good coalescer performance.
     bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
-                       LiveVariables &LV, MachineLoopInfo *MLI);
+                       MachineLoopInfo *MLI);
+
+    // These functions are temporary abstractions around LiveVariables and
+    // LiveIntervals, so they can go away when LiveVariables does.
+    bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB);
+    bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB);
 
     typedef std::pair<unsigned, unsigned> BBVRegPair;
     typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
@@ -87,7 +100,7 @@ namespace {
   };
 }
 
-STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumLowered, "Number of phis lowered");
 STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
 STATISTIC(NumReused, "Number of reused lowered phis");
 
@@ -103,6 +116,8 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
 
 void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveVariables>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addPreserved<LiveIntervals>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addPreserved<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -110,19 +125,20 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
+  LV = getAnalysisIfAvailable<LiveVariables>();
+  LIS = getAnalysisIfAvailable<LiveIntervals>();
 
   bool Changed = false;
 
   // This pass takes the function out of SSA form.
   MRI->leaveSSA();
 
-  // Split critical edges to help the coalescer
-  if (!DisableEdgeSplitting) {
-    if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
-      MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
-      for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-        Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
-    }
+  // Split critical edges to help the coalescer. This does not yet support
+  // updating LiveIntervals, so we disable it.
+  if (!DisableEdgeSplitting && (LV || LIS)) {
+    MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+      Changed |= SplitPHIEdges(MF, *I, MLI);
   }
 
   // Populate VRegPHIUseCount
@@ -137,14 +153,20 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
          E = ImpDefs.end(); I != E; ++I) {
     MachineInstr *DefMI = *I;
     unsigned DefReg = DefMI->getOperand(0).getReg();
-    if (MRI->use_nodbg_empty(DefReg))
+    if (MRI->use_nodbg_empty(DefReg)) {
+      if (LIS)
+        LIS->RemoveMachineInstrFromMaps(DefMI);
       DefMI->eraseFromParent();
+    }
   }
 
   // Clean up the lowered PHI instructions.
   for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
-       I != E; ++I)
+       I != E; ++I) {
+    if (LIS)
+      LIS->RemoveMachineInstrFromMaps(I->first);
     MF.DeleteMachineInstr(I->first);
+  }
 
   LoweredPHIs.clear();
   ImpDefs.clear();
@@ -166,7 +188,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
   MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
 
   while (MBB.front().isPHI())
-    LowerAtomicPHINode(MBB, AfterPHIsIt);
+    LowerPHINode(MBB, AfterPHIsIt);
 
   return true;
 }
@@ -193,15 +215,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
 }
 
 
-/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
-/// under the assumption that it needs to be lowered in a way that supports
-/// atomic execution of PHIs.  This lowering method is always correct all of the
-/// time.
+/// LowerPHINode - Lower the PHI node at the top of the specified block,
 ///
-void PHIElimination::LowerAtomicPHINode(
-                                      MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator AfterPHIsIt) {
-  ++NumAtomic;
+void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator AfterPHIsIt) {
+  ++NumLowered;
   // Unlink the PHI node from the basic block, but don't delete the PHI yet.
   MachineInstr *MPhi = MBB.remove(MBB.begin());
 
@@ -244,7 +262,6 @@ void PHIElimination::LowerAtomicPHINode(
   }
 
   // Update live variable information if there is any.
-  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
   if (LV) {
     MachineInstr *PHICopy = prior(AfterPHIsIt);
 
@@ -283,6 +300,48 @@ void PHIElimination::LowerAtomicPHINode(
     }
   }
 
+  // Update LiveIntervals for the new copy or implicit def.
+  if (LIS) {
+    MachineInstr *NewInstr = prior(AfterPHIsIt);
+    SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);
+
+    SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
+    if (IncomingReg) {
+      // Add the region from the beginning of MBB to the copy instruction to
+      // IncomingReg's live interval.
+      LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg);
+      VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
+      if (!IncomingVNI)
+        IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
+                                              LIS->getVNInfoAllocator());
+      IncomingLI.addRange(LiveRange(MBBStartIndex,
+                                    DestCopyIndex.getRegSlot(),
+                                    IncomingVNI));
+    }
+
+    LiveInterval &DestLI = LIS->getInterval(DestReg);
+    assert(DestLI.begin() != DestLI.end() &&
+           "PHIs should have nonempty LiveIntervals.");
+    if (DestLI.endIndex().isDead()) {
+      // A dead PHI's live range begins and ends at the start of the MBB, but
+      // the lowered copy, which will still be dead, needs to begin and end at
+      // the copy instruction.
+      VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
+      assert(OrigDestVNI && "PHI destination should be live at block entry.");
+      DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot());
+      DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
+                           LIS->getVNInfoAllocator());
+      DestLI.removeValNo(OrigDestVNI);
+    } else {
+      // Otherwise, remove the region from the beginning of MBB to the copy
+      // instruction from DestReg's live interval.
+      DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot());
+      VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+      assert(DestVNI && "PHI destination should be live at its definition.");
+      DestVNI->def = DestCopyIndex.getRegSlot();
+    }
+  }
+
   // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
   for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
     --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
@@ -315,45 +374,44 @@ void PHIElimination::LowerAtomicPHINode(
       findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
 
     // Insert the copy.
+    MachineInstr *NewSrcInstr = 0;
     if (!reusedIncoming && IncomingReg) {
       if (SrcUndef) {
         // The source register is undefined, so there is no need for a real
         // COPY, but we still need to ensure joint dominance by defs.
         // Insert an IMPLICIT_DEF instruction.
-        BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
-                TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg);
+        NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+                              TII->get(TargetOpcode::IMPLICIT_DEF),
+                              IncomingReg);
 
         // Clean up the old implicit-def, if there even was one.
         if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
           if (DefMI->isImplicitDef())
             ImpDefs.insert(DefMI);
       } else {
-        BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
-                TII->get(TargetOpcode::COPY), IncomingReg)
-          .addReg(SrcReg, 0, SrcSubReg);
+        NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+                            TII->get(TargetOpcode::COPY), IncomingReg)
+                        .addReg(SrcReg, 0, SrcSubReg);
       }
     }
 
-    // Now update live variable information if we have it.  Otherwise we're done
-    if (SrcUndef || !LV) continue;
-
-    // We want to be able to insert a kill of the register if this PHI (aka, the
-    // copy we just inserted) is the last use of the source value.  Live
-    // variable analysis conservatively handles this by saying that the value is
-    // live until the end of the block the PHI entry lives in.  If the value
-    // really is dead at the PHI copy, there will be no successor blocks which
-    // have the value live-in.
-
-    // Also check to see if this register is in use by another PHI node which
-    // has not yet been eliminated.  If so, it will be killed at an appropriate
-    // point later.
-
-    // Is it used by any PHI instructions in this block?
-    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
+    // We only need to update the LiveVariables kill of SrcReg if this was the
+    // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
+    // out of the predecessor. We can also ignore undef sources.
+    if (LV && !SrcUndef &&
+        !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
+        !LV->isLiveOut(SrcReg, opBlock)) {
+      // We want to be able to insert a kill of the register if this PHI (aka,
+      // the copy we just inserted) is the last use of the source value. Live
+      // variable analysis conservatively handles this by saying that the value
+      // is live until the end of the block the PHI entry lives in. If the value
+      // really is dead at the PHI copy, there will be no successor blocks which
+      // have the value live-in.
+
+      // Okay, if we now know that the value is not live out of the block, we
+      // can add a kill marker in this block saying that it kills the incoming
+      // value!
 
-    // Okay, if we now know that the value is not live out of the block, we can
-    // add a kill marker in this block saying that it kills the incoming value!
-    if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
       // In our final twist, we have to decide which instruction kills the
       // register.  In most cases this is the copy, however, terminator
       // instructions at the end of the block may also use the value. In this
@@ -394,11 +452,74 @@ void PHIElimination::LowerAtomicPHINode(
       unsigned opBlockNum = opBlock.getNumber();
       LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
     }
+
+    if (LIS) {
+      if (NewSrcInstr) {
+        LIS->InsertMachineInstrInMaps(NewSrcInstr);
+        LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr);
+      }
+
+      if (!SrcUndef &&
+          !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
+        LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+
+        bool isLiveOut = false;
+        for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+             SE = opBlock.succ_end(); SI != SE; ++SI) {
+          SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
+          VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
+
+          // Definitions by other PHIs are not truly live-in for our purposes.
+          if (VNI && VNI->def != startIdx) {
+            isLiveOut = true;
+            break;
+          }
+        }
+
+        if (!isLiveOut) {
+          MachineBasicBlock::iterator KillInst = opBlock.end();
+          MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+          for (MachineBasicBlock::iterator Term = FirstTerm;
+              Term != opBlock.end(); ++Term) {
+            if (Term->readsRegister(SrcReg))
+              KillInst = Term;
+          }
+
+          if (KillInst == opBlock.end()) {
+            // No terminator uses the register.
+
+            if (reusedIncoming || !IncomingReg) {
+              // We may have to rewind a bit if we didn't just insert a copy.
+              KillInst = FirstTerm;
+              while (KillInst != opBlock.begin()) {
+                --KillInst;
+                if (KillInst->isDebugValue())
+                  continue;
+                if (KillInst->readsRegister(SrcReg))
+                  break;
+              }
+            } else {
+              // We just inserted this copy.
+              KillInst = prior(InsertPos);
+            }
+          }
+          assert(KillInst->readsRegister(SrcReg) &&
+                 "Cannot find kill instruction");
+
+          SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
+          SrcLI.removeRange(LastUseIndex.getRegSlot(),
+                            LIS->getMBBEndIdx(&opBlock));
+        }
+      }
+    }
   }
 
   // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
-  if (reusedIncoming || !IncomingReg)
+  if (reusedIncoming || !IncomingReg) {
+    if (LIS)
+      LIS->RemoveMachineInstrFromMaps(MPhi);
     MF.DeleteMachineInstr(MPhi);
+  }
 }
 
 /// analyzePHINodes - Gather information about the PHI nodes in here. In
@@ -418,7 +539,6 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
 
 bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
                                    MachineBasicBlock &MBB,
-                                   LiveVariables &LV,
                                    MachineLoopInfo *MLI) {
   if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
     return false;   // Quick exit for basic blocks without PHIs.
@@ -438,10 +558,10 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
 
       // Avoid splitting backedges of loops. It would introduce small
       // out-of-line blocks into the loop which is very bad for code placement.
-      if (PreMBB == &MBB)
+      if (PreMBB == &MBB && !SplitAllCriticalEdges)
         continue;
       const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
-      if (IsLoopHeader && PreLoop == CurLoop)
+      if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
         continue;
 
       // LV doesn't consider a phi use live-out, so isLiveOut only returns true
@@ -450,7 +570,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // there is a risk it may not be coalesced away.
       //
       // If the copy would be a kill, there is no need to split the edge.
-      if (!LV.isLiveOut(Reg, *PreMBB))
+      if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
         continue;
 
       DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
@@ -465,7 +585,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // is likely to be left after coalescing. If we are looking at a loop
       // exiting edge, split it so we won't insert code in the loop, otherwise
       // don't bother.
-      bool ShouldSplit = !LV.isLiveIn(Reg, MBB);
+      bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
 
       // Check for a loop exiting edge.
       if (!ShouldSplit && CurLoop != PreLoop) {
@@ -492,3 +612,33 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
   }
   return Changed;
 }
+
+bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) {
+  assert((LV || LIS) &&
+         "isLiveIn() requires either LiveVariables or LiveIntervals");
+  if (LIS)
+    return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB);
+  else
+    return LV->isLiveIn(Reg, *MBB);
+}
+
+bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) {
+  assert((LV || LIS) &&
+         "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
+  // LiveVariables considers uses in PHIs to be in the predecessor basic block,
+  // so that a register used only in a PHI is not live out of the block. In
+  // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
+  // in the predecessor basic block, so that a register used only in a PHI is live
+  // out of the block.
+  if (LIS) {
+    const LiveInterval &LI = LIS->getInterval(Reg);
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      if (LI.liveAt(LIS->getMBBStartIdx(*SI)))
+        return true;
+    }
+    return false;
+  } else {
+    return LV->isLiveOut(Reg, *MBB);
+  }
+}
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 10bfdcce6769..e1b56e962fa9 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 4ea21d4ff7bd..1af65c88abeb 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,21 +12,21 @@
 //
 //===---------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -39,12 +39,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
 static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
     cl::desc("Disable pre-register allocation tail duplication"));
 static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
-    cl::Hidden, cl::desc("Disable the probability-driven block placement, and "
-                         "re-enable the old code placement pass"));
+    cl::Hidden, cl::desc("Disable probability-driven block placement"));
 static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
     cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
-    cl::desc("Disable code placement"));
 static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
     cl::desc("Disable Stack Slot Coloring"));
 static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
@@ -88,7 +85,7 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
                    cl::desc("Print machine instrs"),
                    cl::value_desc("pass-name"), cl::init("option-unspecified"));
 
-// Experimental option to run live inteerval analysis early.
+// Experimental option to run live interval analysis early.
 static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
     cl::desc("Run live interval analysis earlier in the pipeline"));
 
@@ -149,10 +146,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
     return applyDisable(TargetID, DisableEarlyTailDup);
 
   if (StandardID == &MachineBlockPlacementID)
-    return applyDisable(TargetID, DisableCodePlace);
-
-  if (StandardID == &CodePlacementOptID)
-    return applyDisable(TargetID, DisableCodePlace);
+    return applyDisable(TargetID, DisableBlockPlacement);
 
   if (StandardID == &StackSlotColoringID)
     return applyDisable(TargetID, DisableSSC);
@@ -237,11 +231,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
   substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
   substitutePass(&PostRAMachineLICMID, &MachineLICMID);
 
-  // Disable early if-conversion. Targets that are ready can enable it.
-  disablePass(&EarlyIfConverterID);
-
   // Temporarily disable experimental passes.
-  substitutePass(&MachineSchedulerID, 0);
+  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+  if (!ST.enableMachineScheduler())
+    disablePass(&MachineSchedulerID);
 }
 
 /// Insert InsertedPassID pass after TargetPassID.
@@ -359,7 +352,7 @@ void TargetPassConfig::addIRPasses() {
 
   // Run loop strength reduction before anything else.
   if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
-    addPass(createLoopStrengthReducePass(getTargetLowering()));
+    addPass(createLoopStrengthReducePass());
     if (PrintLSR)
       addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
   }
@@ -397,12 +390,16 @@ void TargetPassConfig::addPassesToHandleExceptions() {
   }
 }
 
-/// Add common passes that perform LLVM IR to IR transforms in preparation for
-/// instruction selection.
-void TargetPassConfig::addISelPrepare() {
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
   if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
     addPass(createCodeGenPreparePass(getTargetLowering()));
+}
 
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
   addPass(createStackProtectorPass(getTargetLowering()));
 
   addPreISel();
@@ -462,8 +459,7 @@ void TargetPassConfig::addMachinePasses() {
   // Add passes that optimize machine instructions in SSA form.
   if (getOptLevel() != CodeGenOpt::None) {
     addMachineSSAOptimization();
-  }
-  else {
+  } else {
     // If the target requests it, assign local variables to stack slots relative
     // to one another and simplify frame index references where possible.
     addPass(&LocalStackSlotAllocationID);
@@ -507,9 +503,10 @@ void TargetPassConfig::addMachinePasses() {
   }
 
   // GC
-  addPass(&GCMachineCodeAnalysisID);
-  if (PrintGCInfo)
-    addPass(createGCInfoPrinter(dbgs()));
+  if (addGCPasses()) {
+    if (PrintGCInfo)
+      addPass(createGCInfoPrinter(dbgs()));
+  }
 
   // Basic block placement.
   if (getOptLevel() != CodeGenOpt::None)
@@ -544,7 +541,12 @@ void TargetPassConfig::addMachineSSAOptimization() {
   addPass(&DeadMachineInstructionElimID);
   printAndVerify("After codegen DCE pass");
 
-  addPass(&EarlyIfConverterID);
+  // Allow targets to insert passes that improve instruction level parallelism,
+  // like if-conversion. Such passes will typically need dominator trees and
+  // loop info, just like LICM and CSE below.
+  if (addILPOpts())
+    printAndVerify("After ILP optimizations");
+
   addPass(&MachineLICMID);
   addPass(&MachineCSEID);
   addPass(&MachineSinkingID);
@@ -726,18 +728,15 @@ void TargetPassConfig::addMachineLateOptimization() {
     printAndVerify("After copy propagation pass");
 }
 
+/// Add standard GC passes.
+bool TargetPassConfig::addGCPasses() {
+  addPass(&GCMachineCodeAnalysisID);
+  return true;
+}
+
 /// Add standard basic block placement passes.
 void TargetPassConfig::addBlockPlacement() {
-  AnalysisID PassID = 0;
-  if (!DisableBlockPlacement) {
-    // MachineBlockPlacement is a new pass which subsumes the functionality of
-    // CodPlacementOpt. The old code placement pass can be restored by
-    // disabling block placement, but eventually it will be removed.
-    PassID = addPass(&MachineBlockPlacementID);
-  } else {
-    PassID = addPass(&CodePlacementOptID);
-  }
-  if (PassID) {
+  if (addPass(&MachineBlockPlacementID)) {
     // Run a separate pass to collect block placement statistics.
     if (EnableBlockPlacementStats)
       addPass(&MachineBlockPlacementStatsID);
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index a795ac8448f5..a7439b5129b5 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -49,20 +49,26 @@
 //     v1 = bitcast v0
 //        = v0
 //
+// - Optimize Loads:
+//
+//     Loads that can be folded into a later instruction. A load is foldable
+//     if it loads to virtual registers and the virtual register defined has 
+//     a single use.
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "peephole-opt"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 // Optimize Extensions
@@ -473,6 +479,9 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
 }
 
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+
   if (DisablePeephole)
     return false;
 
@@ -547,6 +556,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
                                                       FoldAsLoadDefReg, DefMI);
         if (FoldMI) {
           // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+          DEBUG(dbgs() << "Replacing: " << *MI);
+          DEBUG(dbgs() << "     With: " << *FoldMI);
           LocalMIs.erase(MI);
           LocalMIs.erase(DefMI);
           LocalMIs.insert(FoldMI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index d57bc7362de9..53fe273a1032 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -19,32 +19,33 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "post-RA-sched"
-#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/Passes.h"
 #include "AggressiveAntiDepBreaker.h"
+#include "AntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
@@ -111,9 +112,6 @@ namespace {
     /// added to the AvailableQueue.
     std::vector<SUnit*> PendingQueue;
 
-    /// Topo - A topological ordering for SUnits.
-    ScheduleDAGTopologicalSort Topo;
-
     /// HazardRec - The hazard recognizer to use.
     ScheduleHazardRecognizer *HazardRec;
 
@@ -198,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList(
   AliasAnalysis *AA, const RegisterClassInfo &RCI,
   TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
   SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
-  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
     LiveRegs(TRI->getNumRegs())
 {
   const TargetMachine &TM = MF.getTarget();
@@ -420,11 +418,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
   // Start with no live registers.
   LiveRegs.reset();
 
-  // Determine the live-out physregs for this block.
-  if (!BB->empty() && BB->back().isReturn()) {
-    // In a return block, examine the function live-out regs.
-    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
-           E = MRI.liveout_end(); I != E; ++I) {
+  // Examine the live-in regs of all successors.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+       SE = BB->succ_end(); SI != SE; ++SI) {
+    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+         E = (*SI)->livein_end(); I != E; ++I) {
       unsigned Reg = *I;
       LiveRegs.set(Reg);
       // Repeat, for all subregs.
@@ -432,20 +430,6 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
         LiveRegs.set(*SubRegs);
     }
   }
-  else {
-    // In a non-return block, examine the live-in regs of all successors.
-    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-           SE = BB->succ_end(); SI != SE; ++SI) {
-      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-             E = (*SI)->livein_end(); I != E; ++I) {
-        unsigned Reg = *I;
-        LiveRegs.set(Reg);
-        // Repeat, for all subregs.
-        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
-          LiveRegs.set(*SubRegs);
-      }
-    }
-  }
 }
 
 bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
@@ -467,13 +451,10 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
   MO.setIsKill(false);
   bool AllDead = true;
   const unsigned SuperReg = MO.getReg();
+  MachineInstrBuilder MIB(MF, MI);
   for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
     if (LiveRegs.test(*SubRegs)) {
-      MI->addOperand(MachineOperand::CreateReg(*SubRegs,
-                                               true  /*IsDef*/,
-                                               true  /*IsImp*/,
-                                               false /*IsKill*/,
-                                               false /*IsDead*/));
+      MIB.addReg(*SubRegs, RegState::ImplicitDefine);
       AllDead = false;
     }
   }
@@ -580,10 +561,14 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
 //===----------------------------------------------------------------------===//
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+/// the PendingQueue if the count reaches zero.
 void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
+  if (SuccEdge->isWeak()) {
+    --SuccSU->WeakPredsLeft;
+    return;
+  }
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
     dbgs() << "*** Scheduling failed! ***\n";
@@ -653,8 +638,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   // Add all leaves to Available queue.
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     // It is available if it has no predecessors.
-    bool available = SUnits[i].Preds.empty();
-    if (available) {
+    if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
       AvailableQueue.push(&SUnits[i]);
       SUnits[i].isAvailable = true;
     }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 77554d691c26..e5872df731a0 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,25 +21,24 @@
 
 #define DEBUG_TYPE "pei"
 #include "PrologEpilogInserter.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 
 using namespace llvm;
@@ -56,7 +55,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog",
                     "Prologue/Epilogue Insertion & Frame Finalization",
                     false, false)
 
-STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
 STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
 STATISTIC(NumBytesStackSpace,
           "Number of bytes used for stack in all functions");
@@ -96,12 +94,13 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   placeCSRSpillsAndRestores(Fn);
 
   // Add the code to save and restore the callee saved registers
-  if (!F->getFnAttributes().hasAttribute(Attributes::Naked))
+  if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::Naked))
     insertCSRSpillsAndRestores(Fn);
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
-  TFI->processFunctionBeforeFrameFinalized(Fn);
+  TFI->processFunctionBeforeFrameFinalized(Fn, RS);
 
   // Calculate actual frame offsets for all abstract stack objects...
   calculateFrameObjectOffsets(Fn);
@@ -111,7 +110,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // called functions.  Because of this, calculateCalleeSavedRegisters()
   // must be called before this function in order to set the AdjustsStack
   // and MaxCallFrameSize variables.
-  if (!F->getFnAttributes().hasAttribute(Attributes::Naked))
+  if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::Naked))
     insertPrologEpilogCode(Fn);
 
   // Replace all MO_FrameIndex operands with physical register references
@@ -133,24 +133,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
-#if 0
-void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-  if (ShrinkWrapping || ShrinkWrapFunc != "") {
-    AU.addRequired<MachineLoopInfo>();
-    AU.addRequired<MachineDominatorTree>();
-  }
-  AU.addPreserved<MachineLoopInfo>();
-  AU.addPreserved<MachineDominatorTree>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-#endif
-
 /// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
 /// variables for the function's frame information and eliminate call frame
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
-  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
   const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -197,20 +183,20 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
     // here. The sub/add sp instruction pairs are still inserted, but we don't
     // need to track the SP adjustment for frame index elimination.
     if (TFI->canSimplifyCallFramePseudos(Fn))
-      RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+      TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
   }
 }
 
 
 /// calculateCalleeSavedRegisters - Scan the function for modified callee saved
 /// registers.
-void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
-  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
+  const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = F.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = F.getFrameInfo();
 
   // Get the callee saved register list...
-  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F);
 
   // These are used to keep track the callee-save area. Initialize them.
   MinCSFrameIndex = INT_MAX;
@@ -221,13 +207,14 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;
 
   // In Naked functions we aren't going to save any registers.
-  if (Fn.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+  if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                    Attribute::Naked))
     return;
 
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+    if (F.getRegInfo().isPhysRegUsed(Reg)) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
     }
@@ -248,7 +235,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
 
     int FrameIdx;
-    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+    if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
       I->setFrameIdx(FrameIdx);
       continue;
     }
@@ -560,9 +547,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
   if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
       !RegInfo->needsStackRealignment(Fn)) {
-    int SFI = RS->getScavengingFrameIndex();
-    if (SFI >= 0)
-      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+    SmallVector<int, 2> SFIs;
+    RS->getScavengingFrameIndices(SFIs);
+    for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+         IE = SFIs.end(); I != IE; ++I)
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
   }
 
   // FIXME: Once this is working, then enable flag will change to a target
@@ -605,7 +594,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
         continue;
       if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
         continue;
-      if (RS && (int)i == RS->getScavengingFrameIndex())
+      if (RS && RS->isScavengingFrameIndex((int)i))
         continue;
       if (MFI->isDeadObjectIndex(i))
         continue;
@@ -627,7 +616,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
       continue;
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
-    if (RS && (int)i == RS->getScavengingFrameIndex())
+    if (RS && RS->isScavengingFrameIndex((int)i))
       continue;
     if (MFI->isDeadObjectIndex(i))
       continue;
@@ -643,9 +632,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // stack pointer.
   if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
              !RegInfo->useFPForScavengingIndex(Fn))) {
-    int SFI = RS->getScavengingFrameIndex();
-    if (SFI >= 0)
-      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+    SmallVector<int, 2> SFIs;
+    RS->getScavengingFrameIndices(SFIs);
+    for (SmallVector<int, 2>::iterator I = SFIs.begin(),
+         IE = SFIs.end(); I != IE; ++I)
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
   }
 
   if (!TFI.targetHandlesStackFrameRounding()) {
@@ -703,6 +694,14 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // space in small chunks instead of one large contiguous block.
   if (Fn.getTarget().Options.EnableSegmentedStacks)
     TFI.adjustForSegmentedStacks(Fn);
+
+  // Emit additional code that is required to explicitly handle the stack in
+  // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+  // approach is rather similar to that of Segmented Stacks, but it uses a
+  // different conditional check and another BIF for allocating more stack
+  // space.
+  if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+    TFI.adjustForHiPEPrologue(Fn);
 }
 
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -749,7 +748,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
 
         MachineBasicBlock::iterator PrevI = BB->end();
         if (I != BB->begin()) PrevI = prior(I);
-        TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+        TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
 
         // Visit the instructions created by eliminateCallFramePseudoInstr().
         if (PrevI == BB->end())
@@ -761,34 +760,36 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
 
       MachineInstr *MI = I;
       bool DoIncr = true;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
-        if (MI->getOperand(i).isFI()) {
-          // Some instructions (e.g. inline asm instructions) can have
-          // multiple frame indices and/or cause eliminateFrameIndex
-          // to insert more than one instruction. We need the register
-          // scavenger to go through all of these instructions so that
-          // it can update its register information. We keep the
-          // iterator at the point before insertion so that we can
-          // revisit them in full.
-          bool AtBeginning = (I == BB->begin());
-          if (!AtBeginning) --I;
-
-          // If this instruction has a FrameIndex operand, we need to
-          // use that target machine register info object to eliminate
-          // it.
-          TRI.eliminateFrameIndex(MI, SPAdj,
-                                  FrameIndexVirtualScavenging ?  NULL : RS);
-
-          // Reset the iterator if we were at the beginning of the BB.
-          if (AtBeginning) {
-            I = BB->begin();
-            DoIncr = false;
-          }
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        if (!MI->getOperand(i).isFI())
+            continue;
 
-          MI = 0;
-          break;
+        // Some instructions (e.g. inline asm instructions) can have
+        // multiple frame indices and/or cause eliminateFrameIndex
+        // to insert more than one instruction. We need the register
+        // scavenger to go through all of these instructions so that
+        // it can update its register information. We keep the
+        // iterator at the point before insertion so that we can
+        // revisit them in full.
+        bool AtBeginning = (I == BB->begin());
+        if (!AtBeginning) --I;
+
+        // If this instruction has a FrameIndex operand, we need to
+        // use that target machine register info object to eliminate
+        // it.
+        TRI.eliminateFrameIndex(MI, SPAdj, i,
+                                FrameIndexVirtualScavenging ?  NULL : RS);
+
+        // Reset the iterator if we were at the beginning of the BB.
+        if (AtBeginning) {
+          I = BB->begin();
+          DoIncr = false;
         }
 
+        MI = 0;
+        break;
+      }
+
       if (DoIncr && I != BB->end()) ++I;
 
       // Update register states.
@@ -818,14 +819,22 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
        E = Fn.end(); BB != E; ++BB) {
     RS->enterBasicBlock(BB);
 
-    unsigned VirtReg = 0;
-    unsigned ScratchReg = 0;
     int SPAdj = 0;
 
     // The instruction stream may change in the loop, so check BB->end()
     // directly.
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
       MachineInstr *MI = I;
+      MachineBasicBlock::iterator J = llvm::next(I);
+      MachineBasicBlock::iterator P = I == BB->begin() ?
+        MachineBasicBlock::iterator(NULL) : llvm::prior(I);
+
+      // RS should process this instruction before we might scavenge at this
+      // location. This is because we might be replacing a virtual register
+      // defined by this instruction, and if so, registers killed by this
+      // instruction are available, and defined registers are not.
+      RS->forward(I);
+
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         if (MI->getOperand(i).isReg()) {
           MachineOperand &MO = MI->getOperand(i);
@@ -835,29 +844,49 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
           if (!TargetRegisterInfo::isVirtualRegister(Reg))
             continue;
 
-          ++NumVirtualFrameRegs;
-
-          // Have we already allocated a scratch register for this virtual?
-          if (Reg != VirtReg) {
-            // When we first encounter a new virtual register, it
-            // must be a definition.
-            assert(MI->getOperand(i).isDef() &&
-                   "frame index virtual missing def!");
-            // Scavenge a new scratch register
-            VirtReg = Reg;
-            const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
-            ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
-            ++NumScavengedRegs;
-          }
+          // When we first encounter a new virtual register, it
+          // must be a definition.
+          assert(MI->getOperand(i).isDef() &&
+                 "frame index virtual missing def!");
+          // Scavenge a new scratch register
+          const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+          unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+          ++NumScavengedRegs;
+
           // Replace this reference to the virtual register with the
           // scratch register.
           assert (ScratchReg && "Missing scratch register!");
-          MI->getOperand(i).setReg(ScratchReg);
+          Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
 
+          // Because this instruction was processed by the RS before this
+          // register was allocated, make sure that the RS now records the
+          // register as being used.
+          RS->setUsed(ScratchReg);
         }
       }
-      RS->forward(I);
-      ++I;
+
+      // If the scavenger needed to use one of its spill slots, the
+      // spill code will have been inserted in between I and J. This is a
+      // problem because we need the spill code before I: Move I to just
+      // prior to J.
+      if (I != llvm::prior(J)) {
+        BB->splice(J, BB, I);
+
+        // Before we move I, we need to prepare the RS to visit I again.
+        // Specifically, RS will assert if it sees uses of registers that
+        // it believes are undefined. Because we have already processed
+        // register kills in I, when it visits I again, it will believe that
+        // those registers are undefined. To avoid this situation, unprocess
+        // the instruction I.
+        assert(RS->getCurrentPosition() == I &&
+          "The register scavenger has an unexpected position");
+        I = P;
+        RS->unprocess(P);
+
+        // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
+      } else
+        ++I;
     }
   }
 }
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 0d140a9bb481..87fff9afb309 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -22,11 +22,11 @@
 #ifndef LLVM_CODEGEN_PEI_H
 #define LLVM_CODEGEN_PEI_H
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 49599b3ab980..85649111d7f1 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,14 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 using namespace llvm;
 
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 993dbc71ded3..c0355903574f 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "RegAllocBase.h"
-#include "LiveRegMatrix.h"
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #ifndef NDEBUG
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index db0c8e13d30a..064e40f06b7b 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -37,9 +37,9 @@
 #ifndef LLVM_CODEGEN_REGALLOCBASE
 #define LLVM_CODEGEN_REGALLOCBASE
 
-#include "LiveIntervalUnion.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 8a49609552ad..0b6dc68cdf09 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,30 +13,28 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
-#include "RegAllocBase.h"
 #include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
 #include "Spiller.h"
-#include "VirtRegMap.h"
-#include "LiveRegMatrix.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cstdlib>
 #include <queue>
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 88922169b306..bb9c05c5f42d 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -13,28 +13,28 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -113,12 +113,27 @@ namespace {
     // PhysRegState - One of the RegState enums, or a virtreg.
     std::vector<unsigned> PhysRegState;
 
+    // Set of register units.
     typedef SparseSet<unsigned> UsedInInstrSet;
 
-    // UsedInInstr - Set of physregs that are used in the current instruction,
-    // and so cannot be allocated.
+    // Set of register units that are used in the current instruction, and so
+    // cannot be allocated.
     UsedInInstrSet UsedInInstr;
 
+    // Mark a physreg as used in this instruction.
+    void markRegUsedInInstr(unsigned PhysReg) {
+      for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+        UsedInInstr.insert(*Units);
+    }
+
+    // Check if a physreg or any of its aliases are used in this instruction.
+    bool isRegUsedInInstr(unsigned PhysReg) const {
+      for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+        if (UsedInInstr.count(*Units))
+          return true;
+      return false;
+    }
+
     // SkippedInstrs - Descriptors of instructions whose clobber list was
     // ignored because all registers were spilled. It is still necessary to
     // mark all the clobbered registers as used by the function.
@@ -177,7 +192,6 @@ namespace {
                                        unsigned VirtReg, unsigned Hint);
     void spillAll(MachineBasicBlock::iterator MI);
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
-    void addRetOperands(MachineBasicBlock *MBB);
   };
   char RAFast::ID = 0;
 }
@@ -334,7 +348,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
   unsigned PhysReg = MO.getReg();
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
          "Bad usePhysReg operand");
-
+  markRegUsedInInstr(PhysReg);
   switch (PhysRegState[PhysReg]) {
   case regDisabled:
     break;
@@ -342,7 +356,6 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     PhysRegState[PhysReg] = regFree;
     // Fall through
   case regFree:
-    UsedInInstr.insert(PhysReg);
     MO.setIsKill();
     return;
   default:
@@ -362,13 +375,11 @@ void RAFast::usePhysReg(MachineOperand &MO) {
              "Instruction is not using a subregister of a reserved register");
       // Leave the superregister in the working set.
       PhysRegState[Alias] = regFree;
-      UsedInInstr.insert(Alias);
       MO.getParent()->addRegisterKilled(Alias, TRI, true);
       return;
     case regFree:
       if (TRI->isSuperRegister(PhysReg, Alias)) {
         // Leave the superregister in the working set.
-        UsedInInstr.insert(Alias);
         MO.getParent()->addRegisterKilled(Alias, TRI, true);
         return;
       }
@@ -382,7 +393,6 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 
   // All aliases are disabled, bring register into working set.
   PhysRegState[PhysReg] = regFree;
-  UsedInInstr.insert(PhysReg);
   MO.setIsKill();
 }
 
@@ -391,7 +401,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
 /// reserved instead of allocated.
 void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
                            RegState NewState) {
-  UsedInInstr.insert(PhysReg);
+  markRegUsedInInstr(PhysReg);
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
@@ -431,7 +441,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // can be allocated directly.
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
-  if (UsedInInstr.count(PhysReg)) {
+  if (isRegUsedInInstr(PhysReg)) {
     DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
     return spillImpossible;
   }
@@ -456,8 +466,6 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   unsigned Cost = 0;
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
     unsigned Alias = *AI;
-    if (UsedInInstr.count(Alias))
-      return spillImpossible;
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
@@ -527,12 +535,12 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
     }
   }
 
-  ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
+  ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
 
   // First try to find a completely free register.
-  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
     unsigned PhysReg = *I;
-    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
+    if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
       assignVirtToPhysReg(*LRI, PhysReg);
       return LRI;
     }
@@ -542,7 +550,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
                << RC->getName() << "\n");
 
   unsigned BestReg = 0, BestCost = spillImpossible;
-  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
     unsigned Cost = calcSpillCost(*I);
     DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
@@ -598,7 +606,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
   LRI->LastUse = MI;
   LRI->LastOpNum = OpNum;
   LRI->Dirty = true;
-  UsedInInstr.insert(LRI->PhysReg);
+  markRegUsedInInstr(LRI->PhysReg);
   return LRI;
 }
 
@@ -648,7 +656,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
   assert(LRI->PhysReg && "Register not assigned");
   LRI->LastUse = MI;
   LRI->LastOpNum = OpNum;
-  UsedInInstr.insert(LRI->PhysReg);
+  markRegUsedInInstr(LRI->PhysReg);
   return LRI;
 }
 
@@ -709,8 +717,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    markRegUsedInInstr(Reg);
     for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-      UsedInInstr.insert(*AI);
       if (ThroughRegs.count(PhysRegState[*AI]))
         definePhysReg(MI, *AI, regFree);
     }
@@ -766,67 +774,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
                  << " as used in instr\n");
-    UsedInInstr.insert(Reg);
+    markRegUsedInInstr(Reg);
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
   for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
-    UsedInInstr.insert(PartialDefs[i]);
-}
-
-/// addRetOperand - ensure that a return instruction has an operand for each
-/// value live out of the function.
-///
-/// Things marked both call and return are tail calls; do not do this for them.
-/// The tail callee need not take the same registers as input that it produces
-/// as output, and there are dependencies for its input registers elsewhere.
-///
-/// FIXME: This should be done as part of instruction selection, and this helper
-/// should be deleted. Until then, we use custom logic here to create the proper
-/// operand under all circumstances. We can't use addRegisterKilled because that
-/// doesn't make sense for undefined values. We can't simply avoid calling it
-/// for undefined values, because we must ensure that the operand always exists.
-void RAFast::addRetOperands(MachineBasicBlock *MBB) {
-  if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall())
-    return;
-
-  MachineInstr *MI = &MBB->back();
-
-  for (MachineRegisterInfo::liveout_iterator
-         I = MBB->getParent()->getRegInfo().liveout_begin(),
-         E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) {
-    unsigned Reg = *I;
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
-           "Cannot have a live-out virtual register.");
-
-    bool hasDef = PhysRegState[Reg] == regReserved;
-
-    // Check if this register already has an operand.
-    bool Found = false;
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isUse())
-        continue;
-
-      unsigned OperReg = MO.getReg();
-      if (!TargetRegisterInfo::isPhysicalRegister(OperReg))
-        continue;
-
-      if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) {
-        // If the ret already has an operand for this physreg or a superset,
-        // don't duplicate it. Set the kill flag if the value is defined.
-        if (hasDef && !MO.isKill())
-          MO.setIsKill();
-        Found = true;
-        break;
-      }
-    }
-    if (!Found)
-      MI->addOperand(MachineOperand::CreateReg(Reg,
-                                               false /*IsDef*/,
-                                               true  /*IsImp*/,
-                                               hasDef/*IsKill*/));
-  }
+    markRegUsedInInstr(PartialDefs[i]);
 }
 
 void RAFast::AllocateBasicBlock() {
@@ -1025,7 +978,7 @@ void RAFast::AllocateBasicBlock() {
 
     for (UsedInInstrSet::iterator
          I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
-      MRI->setPhysRegUsed(*I);
+      MRI->setRegUnitUsed(*I);
 
     // Track registers defined by instruction - early clobbers and tied uses at
     // this point.
@@ -1038,8 +991,7 @@ void RAFast::AllocateBasicBlock() {
         if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
         // Look for physreg defs and tied uses.
         if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
-        for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-          UsedInInstr.insert(*AI);
+        markRegUsedInInstr(Reg);
       }
     }
 
@@ -1091,7 +1043,7 @@ void RAFast::AllocateBasicBlock() {
 
     for (UsedInInstrSet::iterator
          I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
-      MRI->setPhysRegUsed(*I);
+      MRI->setRegUnitUsed(*I);
 
     if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
       DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1111,9 +1063,6 @@ void RAFast::AllocateBasicBlock() {
     MBB->erase(Coalesced[i]);
   NumCopies += Coalesced.size();
 
-  // addRetOperands must run after we've seen all defs in this block.
-  addRetOperands(MBB);
-
   DEBUG(MBB->dump());
 }
 
@@ -1130,7 +1079,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   MRI->freezeReservedRegs(Fn);
   RegClassInfo.runOnMachineFunction(Fn);
   UsedInInstr.clear();
-  UsedInInstr.setUniverse(TRI->getNumRegs());
+  UsedInInstr.setUniverse(TRI->getNumRegUnits());
 
   assert(!MRI->isSSA() && "regalloc requires leaving SSA");
 
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 06f69c1e0d16..6d84176af261 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -13,36 +13,34 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
 #include "InterferenceCache.h"
 #include "LiveDebugVariables.h"
-#include "LiveRegMatrix.h"
 #include "RegAllocBase.h"
-#include "Spiller.h"
 #include "SpillPlacement.h"
+#include "Spiller.h"
 #include "SplitKit.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
-
+#include "llvm/Support/raw_ostream.h"
 #include <queue>
 
 using namespace llvm;
@@ -414,7 +412,7 @@ void RAGreedy::enqueue(LiveInterval *LI) {
     Prio = (1u << 31) + Size;
 
     // Boost ranges that have a physical register hint.
-    if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+    if (VRM->hasKnownPreference(Reg))
       Prio |= (1u << 30);
   }
 
@@ -443,7 +441,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   while ((PhysReg = Order.next()))
     if (!Matrix->checkInterference(VirtReg, PhysReg))
       break;
-  if (!PhysReg || Order.isHint(PhysReg))
+  if (!PhysReg || Order.isHint())
     return PhysReg;
 
   // PhysReg is available, but there may be a better choice.
@@ -633,16 +631,33 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
   // Keep track of the cheapest interference seen so far.
   EvictionCost BestCost(~0u);
   unsigned BestPhys = 0;
+  unsigned OrderLimit = Order.getOrder().size();
 
   // When we are just looking for a reduced cost per use, don't break any
   // hints, and only evict smaller spill weights.
   if (CostPerUseLimit < ~0u) {
     BestCost.BrokenHints = 0;
     BestCost.MaxWeight = VirtReg.weight;
+
+    // Check of any registers in RC are below CostPerUseLimit.
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+    unsigned MinCost = RegClassInfo.getMinCost(RC);
+    if (MinCost >= CostPerUseLimit) {
+      DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost
+                   << ", no cheaper registers to be found.\n");
+      return 0;
+    }
+
+    // It is normal for register classes to have a long tail of registers with
+    // the same cost. We don't need to look at them if they're too expensive.
+    if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
+      OrderLimit = RegClassInfo.getLastCostChange(RC);
+      DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
+    }
   }
 
   Order.rewind();
-  while (unsigned PhysReg = Order.next()) {
+  while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) {
     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
       continue;
     // The first use of a callee-saved register in a function has cost 1.
@@ -662,7 +677,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
     BestPhys = PhysReg;
 
     // Stop if the hint can be used.
-    if (Order.isHint(PhysReg))
+    if (Order.isHint())
       break;
   }
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 02ebce7a11a0..607edac24bd2 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,24 +31,24 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "Spiller.h"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
-#include "llvm/Module.h"
+#include "Spiller.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
 #include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
 #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -526,7 +526,7 @@ void RegAllocPBQP::finalizeAlloc() const {
          itr != end; ++itr) {
     LiveInterval *li = &lis->getInterval(*itr);
 
-    unsigned physReg = vrm->getRegAllocPref(li->reg);
+    unsigned physReg = mri->getSimpleHint(li->reg);
 
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 805d23567307..87382d8f7c42 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -18,10 +18,10 @@
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -44,7 +44,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   }
 
   // Does this MF have different CSRs?
-  const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
+  const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
   if (Update || CSR != CalleeSaved) {
     // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
     // overlapping CSR.
@@ -79,30 +79,47 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   unsigned NumRegs = RC->getNumRegs();
 
   if (!RCI.Order)
-    RCI.Order.reset(new unsigned[NumRegs]);
+    RCI.Order.reset(new MCPhysReg[NumRegs]);
 
   unsigned N = 0;
-  SmallVector<unsigned, 16> CSRAlias;
+  SmallVector<MCPhysReg, 16> CSRAlias;
+  unsigned MinCost = 0xff;
+  unsigned LastCost = ~0u;
+  unsigned LastCostChange = 0;
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
+  ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
   for (unsigned i = 0; i != RawOrder.size(); ++i) {
     unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
     if (Reserved.test(PhysReg))
       continue;
+    unsigned Cost = TRI->getCostPerUse(PhysReg);
+    MinCost = std::min(MinCost, Cost);
+
     if (CSRNum[PhysReg])
       // PhysReg aliases a CSR, save it for later.
       CSRAlias.push_back(PhysReg);
-    else
+    else {
+      if (Cost != LastCost)
+        LastCostChange = N;
       RCI.Order[N++] = PhysReg;
+      LastCost = Cost;
+    }
   }
   RCI.NumRegs = N + CSRAlias.size();
   assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
 
   // CSR aliases go after the volatile registers, preserve the target's order.
-  std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
+  for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
+    unsigned PhysReg = CSRAlias[i];
+    unsigned Cost = TRI->getCostPerUse(PhysReg);
+    if (Cost != LastCost)
+      LastCostChange = N;
+    RCI.Order[N++] = PhysReg;
+    LastCost = Cost;
+  }
 
   // Register allocator stress test.  Clip register class to N registers.
   if (StressRA && RCI.NumRegs > StressRA)
@@ -113,6 +130,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
     if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
       RCI.ProperSubClass = true;
 
+  RCI.MinCost = uint8_t(MinCost);
+  RCI.LastCostChange = LastCostChange;
+
   DEBUG({
     dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
     for (unsigned I = 0; I != RCI.NumRegs; ++I)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2538f10ede59..d85646dd3c58 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -15,36 +15,30 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "RegisterCoalescer.h"
-#include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -63,6 +57,17 @@ EnableJoining("join-liveintervals",
               cl::desc("Coalesce copies (default=true)"),
               cl::init(true));
 
+// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+  cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+  cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+  cl::init(cl::BOU_UNSET), cl::Hidden);
+
 static cl::opt<bool>
 VerifyCoalescing("verify-coalescing",
          cl::desc("Verify machine instrs before and after register coalescing"),
@@ -77,13 +82,21 @@ namespace {
     const TargetRegisterInfo* TRI;
     const TargetInstrInfo* TII;
     LiveIntervals *LIS;
-    LiveDebugVariables *LDV;
     const MachineLoopInfo* Loops;
     AliasAnalysis *AA;
     RegisterClassInfo RegClassInfo;
 
+    /// \brief True if the coalescer should aggressively coalesce global copies
+    /// in favor of keeping local copies.
+    bool JoinGlobalCopies;
+
+    /// \brief True if the coalescer should aggressively coalesce fall-thru
+    /// blocks exclusively containing copies.
+    bool JoinSplitEdges;
+
     /// WorkList - Copy instructions yet to be coalesced.
     SmallVector<MachineInstr*, 8> WorkList;
+    SmallVector<MachineInstr*, 8> LocalWorkList;
 
     /// ErasedInstrs - Set of instruction pointers that have been erased, and
     /// that may be present in WorkList.
@@ -101,6 +114,9 @@ namespace {
     /// LiveRangeEdit callback.
     void LRE_WillEraseInstruction(MachineInstr *MI);
 
+    /// coalesceLocals - coalesce the LocalWorkList.
+    void coalesceLocals();
+
     /// joinAllIntervals - join compatible live intervals
     void joinAllIntervals();
 
@@ -108,9 +124,9 @@ namespace {
     /// copies that cannot yet be coalesced into WorkList.
     void copyCoalesceInMBB(MachineBasicBlock *MBB);
 
-    /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after
-    /// position From. Return true if any progress was made.
-    bool copyCoalesceWorkList(unsigned From = 0);
+    /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+    /// true if any progress was made.
+    bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
 
     /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
     /// which are the src/dst of the copy instruction CopyMI.  This returns
@@ -150,11 +166,10 @@ namespace {
 
     /// reMaterializeTrivialDef - If the source of a copy is defined by a
     /// trivial computation, replace the copy by rematerialize the definition.
-    bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
-                                 MachineInstr *CopyMI);
+    bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI);
 
     /// canJoinPhys - Return true if a physreg copy should be joined.
-    bool canJoinPhys(CoalescerPair &CP);
+    bool canJoinPhys(const CoalescerPair &CP);
 
     /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
     /// update the subregister number if it is not zero. If DstReg is a
@@ -189,7 +204,6 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
                       "Simple Register Coalescing", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -217,6 +231,23 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
   return true;
 }
 
+// Return true if this block should be vacated by the coalescer to eliminate
+// branches. The important cases to handle in the coalescer are critical edges
+// split during phi elimination which contain only copies. Simple blocks that
+// contain non-branches should also be vacated, but this can be handled by an
+// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+    return false;
+
+  for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
+       MII != E; ++MII) {
+    if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
+      return false;
+  }
+  return true;
+}
+
 bool CoalescerPair::setRegisters(const MachineInstr *MI) {
   SrcReg = DstReg = 0;
   SrcIdx = DstIdx = 0;
@@ -358,8 +389,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
-  AU.addRequired<LiveDebugVariables>();
-  AU.addPreserved<LiveDebugVariables>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
@@ -701,9 +730,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
 
 /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
 /// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
-                                                unsigned DstReg,
+bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
                                                 MachineInstr *CopyMI) {
+  unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+  unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+  if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+    return false;
+
+  LiveInterval &SrcInt = LIS->getInterval(SrcReg);
   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
   LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
   assert(SrcLR != SrcInt.end() && "Live range not found!");
@@ -724,13 +758,17 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
   const MCInstrDesc &MCID = DefMI->getDesc();
   if (MCID.getNumDefs() != 1)
     return false;
+  // Only support subregister destinations when the def is read-undef.
+  MachineOperand &DstOperand = CopyMI->getOperand(0);
+  if (DstOperand.getSubReg() && !DstOperand.isUndef())
+    return false;
   if (!DefMI->isImplicitDef()) {
     // Make sure the copy destination register class fits the instruction
     // definition register class. The mismatch can happen as a result of earlier
     // extract_subreg, insert_subreg, subreg_to_reg coalescing.
     const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
     if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
-      if (MRI->getRegClass(DstReg) != RC)
+      if (!MRI->constrainRegClass(DstReg, RC))
         return false;
     } else if (!RC->contains(DstReg))
       return false;
@@ -742,6 +780,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
   TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
   MachineInstr *NewMI = prior(MII);
 
+  // The original DefMI may have been a subregister def, but the full register
+  // class of its destination matches the destination of CopyMI, and CopyMI is
+  // either a full register def or is read-undef. Therefore we can clear the
+  // subregister index on the rematerialized instruction.
+  NewMI->getOperand(0).setSubReg(0);
+
   // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
   // We need to remember these so we can add intervals once we insert
   // NewMI into SlotIndexes.
@@ -847,9 +891,6 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
   LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
 
-  // Update LiveDebugVariables.
-  LDV->renameRegister(SrcReg, DstReg, SubIdx);
-
   SmallPtrSet<MachineInstr*, 8> Visited;
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
        MachineInstr *UseMI = I.skipInstruction();) {
@@ -896,7 +937,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
 }
 
 /// canJoinPhys - Return true if a copy involving a physreg should be joined.
-bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
   /// Always join simple intervals that are defined by a single copy from a
   /// reserved register. This doesn't increase register pressure, so it is
   /// always beneficial.
@@ -974,9 +1015,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     if (!canJoinPhys(CP)) {
       // Before giving up coalescing, if definition of source is defined by
       // trivial computation, try rematerializing it.
-      if (!CP.isFlipped() &&
-          reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
-                                  CP.getDstReg(), CopyMI))
+      if (reMaterializeTrivialDef(CP, CopyMI))
         return true;
       return false;
     }
@@ -1009,9 +1048,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
 
     // If definition of source is defined by trivial computation, try
     // rematerializing it.
-    if (!CP.isFlipped() &&
-        reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
-                                CP.getDstReg(), CopyMI))
+    if (reMaterializeTrivialDef(CP, CopyMI))
       return true;
 
     // If we can eliminate the copy without merging the live ranges, do so now.
@@ -1246,8 +1283,18 @@ class JoinVals {
     // Value in the other live range that overlaps this def, if any.
     VNInfo *OtherVNI;
 
-    // Is this value an IMPLICIT_DEF?
-    bool IsImplicitDef;
+    // Is this value an IMPLICIT_DEF that can be erased?
+    //
+    // IMPLICIT_DEF values should only exist at the end of a basic block that
+    // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+    // safely erased if they are overlapping a live value in the other live
+    // interval.
+    //
+    // Weird control flow graphs and incomplete PHI handling in
+    // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+    // longer live ranges. Such IMPLICIT_DEF values should be treated like
+    // normal values.
+    bool ErasableImplicitDef;
 
     // True when the live range of this value will be pruned because of an
     // overlapping CR_Replace value in the other live range.
@@ -1257,8 +1304,8 @@ class JoinVals {
     bool PrunedComputed;
 
     Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
-            RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
-            PrunedComputed(false) {}
+            RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false),
+            Pruned(false), PrunedComputed(false) {}
 
     bool isAnalyzed() const { return WriteLanes != 0; }
   };
@@ -1396,7 +1443,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
 
     // An IMPLICIT_DEF writes undef values.
     if (DefMI->isImplicitDef()) {
-      V.IsImplicitDef = true;
+      // We normally expect IMPLICIT_DEF values to be live only until the end
+      // of their block. If the value is really live longer and gets pruned in
+      // another block, this flag is cleared again.
+      V.ErasableImplicitDef = true;
       V.ValidLanes &= ~V.WriteLanes;
     }
   }
@@ -1449,7 +1499,22 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   // We have overlapping values, or possibly a kill of Other.
   // Recursively compute assignments up the dominator tree.
   Other.computeAssignment(V.OtherVNI->id, *this);
-  const Val &OtherV = Other.Vals[V.OtherVNI->id];
+  Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+  // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+  // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+  // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+  // technically.
+  //
+  // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+  // to erase the IMPLICIT_DEF instruction.
+  if (OtherV.ErasableImplicitDef && DefMI &&
+      DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+    DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+                 << " extends into BB#" << DefMI->getParent()->getNumber()
+                 << ", keeping it.\n");
+    OtherV.ErasableImplicitDef = false;
+  }
 
   // Allow overlapping PHI values. Any real interference would show up in a
   // predecessor, the PHI itself can't introduce any conflicts.
@@ -1758,7 +1823,8 @@ void JoinVals::pruneValues(JoinVals &Other,
       // predecessors, so the instruction should simply go away once its value
       // has been replaced.
       Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
-      bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
+      bool EraseImpDef = OtherV.ErasableImplicitDef &&
+                         OtherV.Resolution == CR_Keep;
       if (!Def.isBlock()) {
         // Remove <def,read-undef> flags. This def is now a partial redef.
         // Also remove <def,dead> flags since the joined live range will
@@ -1807,7 +1873,7 @@ void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
       // longer. The IMPLICIT_DEF instructions are only inserted by
       // PHIElimination to guarantee that all PHI predecessors have a value.
-      if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+      if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
         break;
       // Remove value number i from LI. Note that this VNInfo is still present
       // in NewVNInfo, so it will appear as an unused value number in the final
@@ -1904,47 +1970,77 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
 }
 
 namespace {
-  // DepthMBBCompare - Comparison predicate that sort first based on the loop
-  // depth of the basic block (the unsigned), and then on the MBB number.
-  struct DepthMBBCompare {
-    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
-    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
-      // Deeper loops first
-      if (LHS.first != RHS.first)
-        return LHS.first > RHS.first;
-
-      // Prefer blocks that are more connected in the CFG. This takes care of
-      // the most difficult copies first while intervals are short.
-      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
-      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
-      if (cl != cr)
-        return cl > cr;
-
-      // As a last resort, sort by block number.
-      return LHS.second->getNumber() < RHS.second->getNumber();
-    }
-  };
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+  MachineBasicBlock *MBB;
+  unsigned Depth;
+  bool IsSplit;
+
+  MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+    : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
+
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+  const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+  const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+  // Deeper loops first
+  if (LHS->Depth != RHS->Depth)
+    return LHS->Depth > RHS->Depth ? -1 : 1;
+
+  // Try to unsplit critical edges next.
+  if (LHS->IsSplit != RHS->IsSplit)
+    return LHS->IsSplit ? -1 : 1;
+
+  // Prefer blocks that are more connected in the CFG. This takes care of
+  // the most difficult copies first while intervals are short.
+  unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+  unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+  if (cl != cr)
+    return cl > cr ? -1 : 1;
+
+  // As a last resort, sort by block number.
+  return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+  if (!Copy->isCopy())
+    return false;
+
+  unsigned SrcReg = Copy->getOperand(1).getReg();
+  unsigned DstReg = Copy->getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
+      || TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return false;
+
+  return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+    || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
 }
 
 // Try joining WorkList copies starting from index From.
 // Null out any successful joins.
-bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) {
-  assert(From <= WorkList.size() && "Out of range");
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   bool Progress = false;
-  for (unsigned i = From, e = WorkList.size(); i != e; ++i) {
-    if (!WorkList[i])
+  for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
+    if (!CurrList[i])
       continue;
     // Skip instruction pointers that have already been erased, for example by
     // dead code elimination.
-    if (ErasedInstrs.erase(WorkList[i])) {
-      WorkList[i] = 0;
+    if (ErasedInstrs.erase(CurrList[i])) {
+      CurrList[i] = 0;
       continue;
     }
     bool Again = false;
-    bool Success = joinCopy(WorkList[i], Again);
+    bool Success = joinCopy(CurrList[i], Again);
     Progress |= Success;
     if (Success || !Again)
-      WorkList[i] = 0;
+      CurrList[i] = 0;
   }
   return Progress;
 }
@@ -1956,52 +2052,74 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   // Collect all copy-like instructions in MBB. Don't start coalescing anything
   // yet, it might invalidate the iterator.
   const unsigned PrevSize = WorkList.size();
-  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
-       MII != E; ++MII)
-    if (MII->isCopyLike())
-      WorkList.push_back(MII);
-
+  if (JoinGlobalCopies) {
+    // Coalesce copies bottom-up to coalesce local defs before local uses. They
+    // are not inherently easier to resolve, but slightly preferable until we
+    // have local live range splitting. In particular this is required by
+    // cmp+jmp macro fusion.
+    for (MachineBasicBlock::reverse_iterator
+           MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) {
+      if (!MII->isCopyLike())
+        continue;
+      if (isLocalCopy(&(*MII), LIS))
+        LocalWorkList.push_back(&(*MII));
+      else
+        WorkList.push_back(&(*MII));
+    }
+  }
+  else {
+     for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+          MII != E; ++MII)
+       if (MII->isCopyLike())
+         WorkList.push_back(MII);
+  }
   // Try coalescing the collected copies immediately, and remove the nulls.
   // This prevents the WorkList from getting too large since most copies are
   // joinable on the first attempt.
-  if (copyCoalesceWorkList(PrevSize))
+  MutableArrayRef<MachineInstr*>
+    CurrList(WorkList.begin() + PrevSize, WorkList.end());
+  if (copyCoalesceWorkList(CurrList))
     WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
                                (MachineInstr*)0), WorkList.end());
 }
 
+void RegisterCoalescer::coalesceLocals() {
+  copyCoalesceWorkList(LocalWorkList);
+  for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+    if (LocalWorkList[j])
+      WorkList.push_back(LocalWorkList[j]);
+  }
+  LocalWorkList.clear();
+}
+
 void RegisterCoalescer::joinAllIntervals() {
   DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
-  assert(WorkList.empty() && "Old data still around.");
-
-  if (Loops->empty()) {
-    // If there are no loops in the function, join intervals in function order.
-    for (MachineFunction::iterator I = MF->begin(), E = MF->end();
-         I != E; ++I)
-      copyCoalesceInMBB(I);
-  } else {
-    // Otherwise, join intervals in inner loops before other intervals.
-    // Unfortunately we can't just iterate over loop hierarchy here because
-    // there may be more MBB's than BB's.  Collect MBB's for sorting.
-
-    // Join intervals in the function prolog first. We want to join physical
-    // registers with virtual registers before the intervals got too long.
-    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
-    for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
-      MachineBasicBlock *MBB = I;
-      MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I));
+  assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+  std::vector<MBBPriorityInfo> MBBs;
+  MBBs.reserve(MF->size());
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+    MachineBasicBlock *MBB = I;
+    MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
+                                   JoinSplitEdges && isSplitEdge(MBB)));
+  }
+  array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+  // Coalesce intervals in MBB priority order.
+  unsigned CurrDepth = UINT_MAX;
+  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+    // Try coalescing the collected local copies for deeper loops.
+    if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+      coalesceLocals();
+      CurrDepth = MBBs[i].Depth;
     }
-
-    // Sort by loop depth.
-    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
-
-    // Finally, join intervals in loop nest order.
-    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
-      copyCoalesceInMBB(MBBs[i].second);
+    copyCoalesceInMBB(MBBs[i].MBB);
   }
+  coalesceLocals();
 
   // Joining intervals can allow other intervals to be joined.  Iteratively join
   // until we make no progress.
-  while (copyCoalesceWorkList())
+  while (copyCoalesceWorkList(WorkList))
     /* empty */ ;
 }
 
@@ -2019,10 +2137,20 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
   LIS = &getAnalysis<LiveIntervals>();
-  LDV = &getAnalysis<LiveDebugVariables>();
   AA = &getAnalysis<AliasAnalysis>();
   Loops = &getAnalysis<MachineLoopInfo>();
 
+  const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+  if (EnableGlobalCopies == cl::BOU_UNSET)
+    JoinGlobalCopies = ST.enableMachineScheduler();
+  else
+    JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+  // The MachineScheduler does not currently require JoinSplitEdges. This will
+  // either be enabled unconditionally or replaced by a more general live range
+  // splitting optimization.
+  JoinSplitEdges = EnableJoinSplits;
+
   DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
                << "********** Function: " << MF->getName() << '\n');
 
@@ -2054,7 +2182,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   }
 
   DEBUG(dump());
-  DEBUG(LDV->dump());
   if (VerifyCoalescing)
     MF->verify(this, "After register coalescing");
   return true;
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 543c426458d7..97f22e1049f6 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -12,25 +12,22 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
-/// Increase register pressure for each set impacted by this register class.
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
 static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 std::vector<unsigned> &MaxSetPressure,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
-  for (const int *PSet = TRI->getRegClassPressureSets(RC);
-       *PSet != -1; ++PSet) {
+                                const int *PSet, unsigned Weight) {
+  for (; *PSet != -1; ++PSet) {
     CurrSetPressure[*PSet] += Weight;
     if (&CurrSetPressure != &MaxSetPressure
         && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
@@ -39,32 +36,57 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
   }
 }
 
-/// Decrease register pressure for each set impacted by this register class.
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
 static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
-  for (const int *PSet = TRI->getRegClassPressureSets(RC);
-       *PSet != -1; ++PSet) {
+                                const int *PSet, unsigned Weight) {
+  for (; *PSet != -1; ++PSet) {
     assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
     CurrSetPressure[*PSet] -= Weight;
   }
 }
 
 /// Directly increase pressure only within this RegisterPressure result.
-void RegisterPressure::increase(const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI);
+void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI,
+                                const MachineRegisterInfo *MRI) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    increaseSetPressure(MaxSetPressure, MaxSetPressure,
+                        TRI->getRegClassPressureSets(RC),
+                        TRI->getRegClassWeight(RC).RegWeight);
+  }
+  else {
+    increaseSetPressure(MaxSetPressure, MaxSetPressure,
+                        TRI->getRegUnitPressureSets(Reg),
+                        TRI->getRegUnitWeight(Reg));
+  }
 }
 
 /// Directly decrease pressure only within this RegisterPressure result.
-void RegisterPressure::decrease(const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  decreaseSetPressure(MaxSetPressure, RC, TRI);
+void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+                                const MachineRegisterInfo *MRI) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC),
+                        TRI->getRegClassWeight(RC).RegWeight);
+  }
+  else {
+    decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg),
+                        TRI->getRegUnitWeight(Reg));
+  }
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void dumpSetPressure(const std::vector<unsigned> &SetPressure,
+                            const TargetRegisterInfo *TRI) {
+  for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+    if (SetPressure[i] != 0)
+      dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+  }
+}
+
 void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+  dbgs() << "Max Pressure: ";
+  dumpSetPressure(MaxSetPressure, TRI);
   dbgs() << "Live In: ";
   for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
@@ -73,42 +95,47 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
   dbgs() << '\n';
-  for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) {
-    if (MaxSetPressure[i] != 0)
-      dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i]
-             << '\n';
-  }
-}
-#endif
-
-/// Increase the current pressure as impacted by these physical registers and
-/// bump the high water mark if needed.
-void RegPressureTracker::increasePhysRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
-                        TRI->getMinimalPhysRegClass(Regs[I]), TRI);
 }
 
-/// Simply decrease the current pressure as impacted by these physcial
-/// registers.
-void RegPressureTracker::decreasePhysRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]),
-                        TRI);
+void RegPressureTracker::dump() const {
+  dbgs() << "Curr Pressure: ";
+  dumpSetPressure(CurrSetPressure, TRI);
+  P.dump(TRI);
 }
+#endif
 
-/// Increase the current pressure as impacted by these virtual registers and
-/// bump the high water mark if needed.
-void RegPressureTracker::increaseVirtRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
-                        MRI->getRegClass(Regs[I]), TRI);
+/// Increase the current pressure as impacted by these registers and bump
+/// the high water mark if needed.
+void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) {
+  for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+    if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+      increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+    else {
+      increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+                          TRI->getRegUnitPressureSets(Regs[I]),
+                          TRI->getRegUnitWeight(Regs[I]));
+    }
+  }
 }
 
-/// Simply decrease the current pressure as impacted by these virtual registers.
-void RegPressureTracker::decreaseVirtRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI);
+/// Simply decrease the current pressure as impacted by these registers.
+void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) {
+  for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+    if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+      decreaseSetPressure(CurrSetPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+    else {
+      decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]),
+                          TRI->getRegUnitWeight(Regs[I]));
+    }
+  }
 }
 
 /// Clear the result so it can be used for another round of pressure tracking.
@@ -160,6 +187,12 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
   LiveInRegs.clear();
 }
 
+const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const {
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return &LIS->getInterval(Reg);
+  return LIS->getCachedRegUnit(Reg);
+}
+
 /// Setup the RegPressureTracker.
 ///
 /// TODO: Add support for pressure without LiveIntervals.
@@ -181,9 +214,6 @@ void RegPressureTracker::init(const MachineFunction *mf,
   }
 
   CurrPos = pos;
-  while (CurrPos != MBB->end() && CurrPos->isDebugValue())
-    ++CurrPos;
-
   CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
 
   if (RequireIntervals)
@@ -192,10 +222,10 @@ void RegPressureTracker::init(const MachineFunction *mf,
     static_cast<RegionPressure&>(P).reset();
   P.MaxSetPressure = CurrSetPressure;
 
-  LivePhysRegs.clear();
-  LivePhysRegs.setUniverse(TRI->getNumRegs());
-  LiveVirtRegs.clear();
-  LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+  LiveRegs.PhysRegs.clear();
+  LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
+  LiveRegs.VirtRegs.clear();
+  LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
 }
 
 /// Does this pressure result have a valid top position and live ins.
@@ -214,19 +244,28 @@ bool RegPressureTracker::isBottomClosed() const {
           MachineBasicBlock::const_iterator());
 }
 
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+  MachineBasicBlock::const_iterator IdxPos = CurrPos;
+  while (IdxPos != MBB->end() && IdxPos->isDebugValue())
+    ++IdxPos;
+  if (IdxPos == MBB->end())
+    return LIS->getMBBEndIdx(MBB);
+  return LIS->getInstructionIndex(IdxPos).getRegSlot();
+}
+
 /// Set the boundary for the top of the region and summarize live ins.
 void RegPressureTracker::closeTop() {
   if (RequireIntervals)
-    static_cast<IntervalPressure&>(P).TopIdx =
-      LIS->getInstructionIndex(CurrPos).getRegSlot();
+    static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
   else
     static_cast<RegionPressure&>(P).TopPos = CurrPos;
 
   assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
-  P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
-  P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+  P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+  P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
   for (SparseSet<unsigned>::const_iterator I =
-         LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
     P.LiveInRegs.push_back(*I);
   std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
   P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
@@ -236,19 +275,15 @@ void RegPressureTracker::closeTop() {
 /// Set the boundary for the bottom of the region and summarize live outs.
 void RegPressureTracker::closeBottom() {
   if (RequireIntervals)
-    if (CurrPos == MBB->end())
-      static_cast<IntervalPressure&>(P).BottomIdx = LIS->getMBBEndIdx(MBB);
-    else
-      static_cast<IntervalPressure&>(P).BottomIdx =
-        LIS->getInstructionIndex(CurrPos).getRegSlot();
+    static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
   else
     static_cast<RegionPressure&>(P).BottomPos = CurrPos;
 
   assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
-  P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
-  P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+  P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+  P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
   for (SparseSet<unsigned>::const_iterator I =
-         LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
     P.LiveOutRegs.push_back(*I);
   std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
   P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
@@ -258,7 +293,7 @@ void RegPressureTracker::closeBottom() {
 /// Finalize the region boundaries and record live ins and live outs.
 void RegPressureTracker::closeRegion() {
   if (!isTopClosed() && !isBottomClosed()) {
-    assert(LivePhysRegs.empty() && LiveVirtRegs.empty() &&
+    assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
            "no region boundary");
     return;
   }
@@ -269,151 +304,97 @@ void RegPressureTracker::closeRegion() {
   // If both top and bottom are closed, do nothing.
 }
 
-/// Return true if Reg aliases a register in Regs SparseSet.
-static bool hasRegAlias(unsigned Reg, SparseSet<unsigned> &Regs,
-                        const TargetRegisterInfo *TRI) {
-  assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs");
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-    if (Regs.count(*AI))
-      return true;
-  return false;
-}
-
-/// Return true if Reg aliases a register in unsorted Regs SmallVector.
-/// This is only valid for physical registers.
-static SmallVectorImpl<unsigned>::iterator
-findRegAlias(unsigned Reg, SmallVectorImpl<unsigned> &Regs,
-             const TargetRegisterInfo *TRI) {
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-    SmallVectorImpl<unsigned>::iterator I =
-      std::find(Regs.begin(), Regs.end(), *AI);
-    if (I != Regs.end())
-      return I;
-  }
-  return Regs.end();
-}
-
-/// Return true if Reg can be inserted into Regs SmallVector. For virtual
-/// register, do a linear search. For physical registers check for aliases.
-static SmallVectorImpl<unsigned>::iterator
-findReg(unsigned Reg, bool isVReg, SmallVectorImpl<unsigned> &Regs,
-        const TargetRegisterInfo *TRI) {
-  if(isVReg)
-    return std::find(Regs.begin(), Regs.end(), Reg);
-  return findRegAlias(Reg, Regs, TRI);
+/// \brief Convenient wrapper for checking membership in RegisterOperands.
+static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
+  return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
 }
 
 /// Collect this instruction's unique uses and defs into SmallVectors for
 /// processing defs and uses in order.
-template<bool isVReg>
-struct RegisterOperands {
+class RegisterOperands {
+  const TargetRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+
+public:
   SmallVector<unsigned, 8> Uses;
   SmallVector<unsigned, 8> Defs;
   SmallVector<unsigned, 8> DeadDefs;
 
+  RegisterOperands(const TargetRegisterInfo *tri,
+                   const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {}
+
   /// Push this operand's register onto the correct vector.
-  void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) {
-    if (MO.readsReg()) {
-      if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end())
-      Uses.push_back(MO.getReg());
-    }
+  void collect(const MachineOperand &MO) {
+    if (!MO.isReg() || !MO.getReg())
+      return;
+    if (MO.readsReg())
+      pushRegUnits(MO.getReg(), Uses);
     if (MO.isDef()) {
-      if (MO.isDead()) {
-        if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
-          DeadDefs.push_back(MO.getReg());
+      if (MO.isDead())
+        pushRegUnits(MO.getReg(), DeadDefs);
+      else
+        pushRegUnits(MO.getReg(), Defs);
+    }
+  }
+
+protected:
+  void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (containsReg(Regs, Reg))
+        return;
+      Regs.push_back(Reg);
+    }
+    else if (MRI->isAllocatable(Reg)) {
+      for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+        if (containsReg(Regs, *Units))
+          continue;
+        Regs.push_back(*Units);
       }
-      else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
-        Defs.push_back(MO.getReg());
     }
   }
 };
-typedef RegisterOperands<false> PhysRegOperands;
-typedef RegisterOperands<true> VirtRegOperands;
 
 /// Collect physical and virtual register operands.
 static void collectOperands(const MachineInstr *MI,
-                            PhysRegOperands &PhysRegOpers,
-                            VirtRegOperands &VirtRegOpers,
-                            const TargetRegisterInfo *TRI,
-                            const MachineRegisterInfo *MRI) {
-  for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
-    const MachineOperand &MO = *OperI;
-    if (!MO.isReg() || !MO.getReg())
-      continue;
+                            RegisterOperands &RegOpers) {
+  for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+    RegOpers.collect(*OperI);
 
-    if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-      VirtRegOpers.collect(MO, TRI);
-    else if (MRI->isAllocatable(MO.getReg()))
-      PhysRegOpers.collect(MO, TRI);
-  }
   // Remove redundant physreg dead defs.
-  for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) {
-    unsigned Reg = PhysRegOpers.DeadDefs[i-1];
-    if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end())
-      PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]);
-  }
+  SmallVectorImpl<unsigned>::iterator I =
+    std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+                   std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+  RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
 }
 
 /// Force liveness of registers.
 void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    if (TargetRegisterInfo::isVirtualRegister(Regs[i])) {
-      if (LiveVirtRegs.insert(Regs[i]).second)
-        increaseVirtRegPressure(Regs[i]);
-    }
-    else  {
-      if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) {
-        LivePhysRegs.insert(Regs[i]);
-        increasePhysRegPressure(Regs[i]);
-      }
-    }
+    if (LiveRegs.insert(Regs[i]))
+      increaseRegPressure(Regs[i]);
   }
 }
 
-/// Add PhysReg to the live in set and increase max pressure.
-void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) {
-  assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end())
+/// Add Reg to the live in set and increase max pressure.
+void RegPressureTracker::discoverLiveIn(unsigned Reg) {
+  assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+  if (containsReg(P.LiveInRegs, Reg))
     return;
 
   // At live in discovery, unconditionally increase the high water mark.
   P.LiveInRegs.push_back(Reg);
-  P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
+  P.increase(Reg, TRI, MRI);
 }
 
-/// Add PhysReg to the live out set and increase max pressure.
-void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) {
-  assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end())
+/// Add Reg to the live out set and increase max pressure.
+void RegPressureTracker::discoverLiveOut(unsigned Reg) {
+  assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+  if (containsReg(P.LiveOutRegs, Reg))
     return;
 
   // At live out discovery, unconditionally increase the high water mark.
   P.LiveOutRegs.push_back(Reg);
-  P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
-}
-
-/// Add VirtReg to the live in set and increase max pressure.
-void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) {
-  assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) !=
-      P.LiveInRegs.end())
-    return;
-
-  // At live in discovery, unconditionally increase the high water mark.
-  P.LiveInRegs.push_back(Reg);
-  P.increase(MRI->getRegClass(Reg), TRI);
-}
-
-/// Add VirtReg to the live out set and increase max pressure.
-void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) {
-  assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) !=
-      P.LiveOutRegs.end())
-    return;
-
-  // At live out discovery, unconditionally increase the high water mark.
-  P.LiveOutRegs.push_back(Reg);
-  P.increase(MRI->getRegClass(Reg), TRI);
+  P.increase(Reg, TRI, MRI);
 }
 
 /// Recede across the previous instruction.
@@ -447,52 +428,35 @@ bool RegPressureTracker::recede() {
   if (RequireIntervals && isTopClosed())
     static_cast<IntervalPressure&>(P).openTop(SlotIdx);
 
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(CurrPos, RegOpers);
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Kill liveness at live defs.
   // TODO: consider earlyclobbers?
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (LivePhysRegs.erase(Reg))
-      decreasePhysRegPressure(Reg);
-    else
-      discoverPhysLiveOut(Reg);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (LiveVirtRegs.erase(Reg))
-      decreaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (LiveRegs.erase(Reg))
+      decreaseRegPressure(Reg);
     else
-      discoverVirtLiveOut(Reg);
+      discoverLiveOut(Reg);
   }
 
   // Generate liveness for uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
-      increasePhysRegPressure(Reg);
-      LivePhysRegs.insert(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
-    if (!LiveVirtRegs.count(Reg)) {
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (!LiveRegs.contains(Reg)) {
       // Adjust liveouts if LiveIntervals are available.
       if (RequireIntervals) {
-        const LiveInterval *LI = &LIS->getInterval(Reg);
-        if (!LI->killedAt(SlotIdx))
-          discoverVirtLiveOut(Reg);
+        const LiveInterval *LI = getInterval(Reg);
+        if (LI && !LI->killedAt(SlotIdx))
+          discoverLiveOut(Reg);
       }
-      increaseVirtRegPressure(Reg);
-      LiveVirtRegs.insert(Reg);
+      increaseRegPressure(Reg);
+      LiveRegs.insert(Reg);
     }
   }
   return true;
@@ -510,7 +474,7 @@ bool RegPressureTracker::advance() {
 
   SlotIndex SlotIdx;
   if (RequireIntervals)
-    SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+    SlotIdx = getCurrSlot();
 
   // Open the bottom of the region using slot indexes.
   if (isBottomClosed()) {
@@ -520,57 +484,43 @@ bool RegPressureTracker::advance() {
       static_cast<RegionPressure&>(P).openBottom(CurrPos);
   }
 
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
-
-  // Kill liveness at last uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI))
-      discoverPhysLiveIn(Reg);
-    else {
-      // Allocatable physregs are always single-use before regalloc.
-      decreasePhysRegPressure(Reg);
-      LivePhysRegs.erase(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(CurrPos, RegOpers);
+
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    // Discover live-ins.
+    bool isLive = LiveRegs.contains(Reg);
+    if (!isLive)
+      discoverLiveIn(Reg);
+    // Kill liveness at last uses.
+    bool lastUse = false;
     if (RequireIntervals) {
-      const LiveInterval *LI = &LIS->getInterval(Reg);
-      if (LI->killedAt(SlotIdx)) {
-        if (LiveVirtRegs.erase(Reg))
-          decreaseVirtRegPressure(Reg);
-        else
-          discoverVirtLiveIn(Reg);
-      }
+      const LiveInterval *LI = getInterval(Reg);
+      lastUse = LI && LI->killedAt(SlotIdx);
     }
-    else if (!LiveVirtRegs.count(Reg)) {
-      discoverVirtLiveIn(Reg);
-      increaseVirtRegPressure(Reg);
+    else {
+      // Allocatable physregs are always single-use before register rewriting.
+      lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
     }
+    if (lastUse && isLive) {
+      LiveRegs.erase(Reg);
+      decreaseRegPressure(Reg);
+    }
+    else if (!lastUse && !isLive)
+      increaseRegPressure(Reg);
   }
 
   // Generate liveness for defs.
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
-      increasePhysRegPressure(Reg);
-      LivePhysRegs.insert(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (LiveVirtRegs.insert(Reg).second)
-      increaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (LiveRegs.insert(Reg))
+      increaseRegPressure(Reg);
   }
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Find the next instruction.
   do
@@ -661,39 +611,28 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
 /// This is intended for speculative queries. It leaves pressure inconsistent
 /// with the current position, so must be restored by the caller.
 void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
   // Account for register pressure similar to RegPressureTracker::recede().
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(MI, RegOpers);
 
   // Boost max pressure for all dead defs together.
   // Since CurrSetPressure and MaxSetPressure
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Kill liveness at live defs.
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (!findReg(Reg, false, PhysRegOpers.Uses, TRI))
-      decreasePhysRegPressure(PhysRegOpers.Defs);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (!findReg(Reg, true, VirtRegOpers.Uses, TRI))
-      decreaseVirtRegPressure(VirtRegOpers.Defs);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (!containsReg(RegOpers.Uses, Reg))
+      decreaseRegPressure(Reg);
   }
   // Generate liveness for uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI))
-      increasePhysRegPressure(Reg);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
-    if (!LiveVirtRegs.count(Reg))
-      increaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (!LiveRegs.contains(Reg))
+      increaseRegPressure(Reg);
   }
 }
 
@@ -740,6 +679,8 @@ static bool findUseBetween(unsigned Reg,
          UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
          UI != UE; UI.skipInstruction()) {
       const MachineInstr* MI = &*UI;
+      if (MI->isDebugValue())
+        continue;
       SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
       if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
         return true;
@@ -754,38 +695,42 @@ static bool findUseBetween(unsigned Reg,
 /// This is intended for speculative queries. It leaves pressure inconsistent
 /// with the current position, so must be restored by the caller.
 void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
   // Account for register pressure similar to RegPressureTracker::recede().
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(MI, RegOpers);
 
   // Kill liveness at last uses. Assume allocatable physregs are single-use
   // rather than checking LiveIntervals.
-  decreasePhysRegPressure(PhysRegOpers.Uses);
-  if (RequireIntervals) {
-    SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
-    for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-      unsigned Reg = VirtRegOpers.Uses[i];
-      const LiveInterval *LI = &LIS->getInterval(Reg);
-      // FIXME: allow the caller to pass in the list of vreg uses that remain to
-      // be bottom-scheduled to avoid searching uses at each query.
-      SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
-      if (LI->killedAt(SlotIdx)
+  SlotIndex SlotIdx;
+  if (RequireIntervals)
+    SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (RequireIntervals) {
+      // FIXME: allow the caller to pass in the list of vreg uses that remain
+      // to be bottom-scheduled to avoid searching uses at each query.
+      SlotIndex CurrIdx = getCurrSlot();
+      const LiveInterval *LI = getInterval(Reg);
+      if (LI && LI->killedAt(SlotIdx)
           && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
-        decreaseVirtRegPressure(Reg);
+        decreaseRegPressure(Reg);
       }
     }
+    else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+      // Allocatable physregs are always single-use before register rewriting.
+      decreaseRegPressure(Reg);
+    }
   }
 
   // Generate liveness for defs.
-  increasePhysRegPressure(PhysRegOpers.Defs);
-  increaseVirtRegPressure(VirtRegOpers.Defs);
+  increaseRegPressure(RegOpers.Defs);
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 }
 
 /// Consider the pressure increase caused by traversing this instruction
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 5ec6564ce398..07ace7a436c7 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,21 +16,17 @@
 
 #define DEBUG_TYPE "reg-scavenging"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 /// setUsed - Set the register and its sub-registers as being used.
@@ -43,15 +39,17 @@ void RegScavenger::setUsed(unsigned Reg) {
 
 bool RegScavenger::isAliasUsed(unsigned Reg) const {
   for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-    if (isUsed(*AI))
+    if (isUsed(*AI, *AI == Reg))
       return true;
   return false;
 }
 
 void RegScavenger::initRegState() {
-  ScavengedReg = 0;
-  ScavengedRC = NULL;
-  ScavengeRestore = NULL;
+  for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+       IE = Scavenged.end(); I != IE; ++I) {
+    I->Reg = 0;
+    I->Restore = NULL;
+  }
 
   // All registers started out unused.
   RegsAvailable.set();
@@ -112,27 +110,11 @@ void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
     BV.set(*SubRegs);
 }
 
-void RegScavenger::forward() {
-  // Move ptr forward.
-  if (!Tracking) {
-    MBBI = MBB->begin();
-    Tracking = true;
-  } else {
-    assert(MBBI != MBB->end() && "Already past the end of the basic block!");
-    MBBI = llvm::next(MBBI);
-  }
-  assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+void RegScavenger::determineKillsAndDefs() {
+  assert(Tracking && "Must be tracking to determine kills and defs");
 
   MachineInstr *MI = MBBI;
-
-  if (MI == ScavengeRestore) {
-    ScavengedReg = 0;
-    ScavengedRC = NULL;
-    ScavengeRestore = NULL;
-  }
-
-  if (MI->isDebugValue())
-    return;
+  assert(!MI->isDebugValue() && "Debug values have no kills or defs");
 
   // Find out which registers are early clobbered, killed, defined, and marked
   // def-dead in this instruction.
@@ -149,7 +131,7 @@ void RegScavenger::forward() {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || isReserved(Reg))
+    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
       continue;
 
     if (MO.isUse()) {
@@ -166,6 +148,54 @@ void RegScavenger::forward() {
         addRegWithSubRegs(DefRegs, Reg);
     }
   }
+}
+
+void RegScavenger::unprocess() {
+  assert(Tracking && "Cannot unprocess because we're not tracking");
+
+  MachineInstr *MI = MBBI;
+  if (MI->isDebugValue())
+    return;
+
+  determineKillsAndDefs();
+
+  // Commit the changes.
+  setUsed(KillRegs);
+  setUnused(DefRegs);
+
+  if (MBBI == MBB->begin()) {
+    MBBI = MachineBasicBlock::iterator(NULL);
+    Tracking = false;
+  } else
+    --MBBI;
+}
+
+void RegScavenger::forward() {
+  // Move ptr forward.
+  if (!Tracking) {
+    MBBI = MBB->begin();
+    Tracking = true;
+  } else {
+    assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+    MBBI = llvm::next(MBBI);
+  }
+  assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+  MachineInstr *MI = MBBI;
+
+  for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(),
+       IE = Scavenged.end(); I != IE; ++I) {
+    if (I->Restore != MI)
+      continue;
+
+    I->Reg = 0;
+    I->Restore = NULL;
+  }
+
+  if (MI->isDebugValue())
+    return;
+
+  determineKillsAndDefs();
 
   // Verify uses and defs.
 #ifndef NDEBUG
@@ -174,7 +204,7 @@ void RegScavenger::forward() {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || isReserved(Reg))
+    if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
       continue;
     if (MO.isUse()) {
       if (MO.isUndef())
@@ -320,6 +350,16 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
   return Survivor;
 }
 
+static unsigned getFrameIndexOperandNum(MachineInstr *MI) {
+  unsigned i = 0;
+  while (!MI->getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+  return i;
+}
+
 unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
                                         MachineBasicBlock::iterator I,
                                         int SPAdj) {
@@ -354,33 +394,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
     return SReg;
   }
 
-  assert(ScavengedReg == 0 &&
-         "Scavenger slot is live, unable to scavenge another register!");
+  // Find an available scavenging slot.
+  unsigned SI;
+  for (SI = 0; SI < Scavenged.size(); ++SI)
+    if (Scavenged[SI].Reg == 0)
+      break;
+
+  if (SI == Scavenged.size()) {
+    // We need to scavenge a register but have no spill slot, the target
+    // must know how to do it (if not, we'll assert below).
+    Scavenged.push_back(ScavengedInfo());
+  }
 
   // Avoid infinite regress
-  ScavengedReg = SReg;
+  Scavenged[SI].Reg = SReg;
 
   // If the target knows how to save/restore the register, let it do so;
   // otherwise, use the emergency stack spill slot.
   if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
     // Spill the scavenged register before I.
-    assert(ScavengingFrameIndex >= 0 &&
+    assert(Scavenged[SI].FrameIndex >= 0 &&
            "Cannot scavenge register without an emergency spill slot!");
-    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+                             RC, TRI);
     MachineBasicBlock::iterator II = prior(I);
-    TRI->eliminateFrameIndex(II, SPAdj, this);
+
+    unsigned FIOperandNum = getFrameIndexOperandNum(II);
+    TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
 
     // Restore the scavenged register before its use (or first terminator).
-    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+                              RC, TRI);
     II = prior(UseMI);
-    TRI->eliminateFrameIndex(II, SPAdj, this);
+
+    FIOperandNum = getFrameIndexOperandNum(II);
+    TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
   }
 
-  ScavengeRestore = prior(UseMI);
+  Scavenged[SI].Restore = prior(UseMI);
 
   // Doing this here leads to infinite regress.
-  // ScavengedReg = SReg;
-  ScavengedRC = RC;
+  // Scavenged[SI].Reg = SReg;
 
   DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
         "\n");
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 9a6507100170..07e5b470fb1e 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -16,12 +16,12 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -62,10 +62,14 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
 /// addPred - This adds the specified edge as a pred of the current node if
 /// not already.  It also adds the current node as a successor of the
 /// specified node.
-bool SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D, bool Required) {
   // If this node already has this depenence, don't add a redundant one.
   for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
        I != E; ++I) {
+    // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+    // add them if another kind of edge already exists.
+    if (!Required && I->getSUnit() == D.getSUnit())
+      return false;
     if (I->overlaps(D)) {
       // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
       if (I->getLatency() < D.getLatency()) {
@@ -97,12 +101,22 @@ bool SUnit::addPred(const SDep &D) {
     ++N->NumSuccs;
   }
   if (!N->isScheduled) {
-    assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
-    ++NumPredsLeft;
+    if (D.isWeak()) {
+      ++WeakPredsLeft;
+    }
+    else {
+      assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+      ++NumPredsLeft;
+    }
   }
   if (!isScheduled) {
-    assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
-    ++N->NumSuccsLeft;
+    if (D.isWeak()) {
+      ++N->WeakSuccsLeft;
+    }
+    else {
+      assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+      ++N->NumSuccsLeft;
+    }
   }
   Preds.push_back(D);
   N->Succs.push_back(P);
@@ -121,20 +135,14 @@ void SUnit::removePred(const SDep &D) {
   for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
        I != E; ++I)
     if (*I == D) {
-      bool FoundSucc = false;
       // Find the corresponding successor in N.
       SDep P = D;
       P.setSUnit(this);
       SUnit *N = D.getSUnit();
-      for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
-             EE = N->Succs.end(); II != EE; ++II)
-        if (*II == P) {
-          FoundSucc = true;
-          N->Succs.erase(II);
-          break;
-        }
-      assert(FoundSucc && "Mismatching preds / succs lists!");
-      (void)FoundSucc;
+      SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
+                                                       N->Succs.end(), P);
+      assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+      N->Succs.erase(Succ);
       Preds.erase(I);
       // Update the bookkeeping.
       if (P.getKind() == SDep::Data) {
@@ -144,12 +152,20 @@ void SUnit::removePred(const SDep &D) {
         --N->NumSuccs;
       }
       if (!N->isScheduled) {
-        assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
-        --NumPredsLeft;
+        if (D.isWeak())
+          --WeakPredsLeft;
+        else {
+          assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+          --NumPredsLeft;
+        }
       }
       if (!isScheduled) {
-        assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
-        --N->NumSuccsLeft;
+        if (D.isWeak())
+          --N->WeakSuccsLeft;
+        else {
+          assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+          --N->NumSuccsLeft;
+        }
       }
       if (P.getLatency() != 0) {
         this->setDepthDirty();
@@ -279,6 +295,21 @@ void SUnit::ComputeHeight() {
   } while (!WorkList.empty());
 }
 
+void SUnit::biasCriticalPath() {
+  if (NumPreds < 2)
+    return;
+
+  SUnit::pred_iterator BestI = Preds.begin();
+  unsigned MaxDepth = BestI->getSUnit()->getDepth();
+  for (SUnit::pred_iterator
+         I = llvm::next(BestI), E = Preds.end(); I != E; ++I) {
+    if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+      BestI = I;
+  }
+  if (BestI != Preds.begin())
+    std::swap(*Preds.begin(), *BestI);
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
@@ -292,10 +323,14 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 
   dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
   dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  if (WeakPredsLeft)
+    dbgs() << "  # weak preds left  : " << WeakPredsLeft << "\n";
+  if (WeakSuccsLeft)
+    dbgs() << "  # weak succs left  : " << WeakSuccsLeft << "\n";
   dbgs() << "  # rdefs left       : " << NumRegDefsLeft << "\n";
   dbgs() << "  Latency            : " << Latency << "\n";
-  dbgs() << "  Depth              : " << Depth << "\n";
-  dbgs() << "  Height             : " << Height << "\n";
+  dbgs() << "  Depth              : " << getDepth() << "\n";
+  dbgs() << "  Height             : " << getHeight() << "\n";
 
   if (Preds.size() != 0) {
     dbgs() << "  Predecessors:\n";
@@ -332,6 +367,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
+      if (I->isAssignedRegDep())
+        dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
       dbgs() << "\n";
     }
   }
@@ -429,6 +466,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
   Node2Index.resize(DAGSize);
 
   // Initialize the data structures.
+  if (ExitSU)
+    WorkList.push_back(ExitSU);
   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
     SUnit *SU = &SUnits[i];
     int NodeNum = SU->NodeNum;
@@ -448,11 +487,12 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
   while (!WorkList.empty()) {
     SUnit *SU = WorkList.back();
     WorkList.pop_back();
-    Allocate(SU->NodeNum, --Id);
+    if (SU->NodeNum < DAGSize)
+      Allocate(SU->NodeNum, --Id);
     for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
          I != E; ++I) {
       SUnit *SU = I->getSUnit();
-      if (!--Node2Index[SU->NodeNum])
+      if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
         // If all dependencies of the node are processed already,
         // then the node can be computed now.
         WorkList.push_back(SU);
@@ -513,7 +553,10 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
     WorkList.pop_back();
     Visited.set(SU->NodeNum);
     for (int I = SU->Succs.size()-1; I >= 0; --I) {
-      int s = SU->Succs[I].getSUnit()->NodeNum;
+      unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+      // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+      if (s >= Node2Index.size())
+        continue;
       if (Node2Index[s] == UpperBound) {
         HasLoop = true;
         return;
@@ -554,15 +597,16 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
 }
 
 
-/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
-/// create a cycle.
-bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
-  if (IsReachable(TargetSU, SU))
+/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
+/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+  // Is SU reachable from TargetSU via successor edges?
+  if (IsReachable(SU, TargetSU))
     return true;
-  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I)
+  for (SUnit::pred_iterator
+         I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
     if (I->isAssignedRegDep() &&
-        IsReachable(TargetSU, I->getSUnit()))
+        IsReachable(SU, I->getSUnit()))
       return true;
   return false;
 }
@@ -592,6 +636,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
 }
 
 ScheduleDAGTopologicalSort::
-ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+  : SUnits(sunits), ExitSU(exitsu) {}
 
 ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index a4d4a93e6dd5..71e7a21ef2bc 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,8 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "sched-instrs"
-#include "llvm/Operator.h"
+#define DEBUG_TYPE "misched"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -22,19 +25,17 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAGILP.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
@@ -66,7 +67,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
       // regular getUnderlyingObjectFromInt.
       if (U->getOpcode() == Instruction::PtrToInt)
         return U->getOperand(0);
-      // If we find an add of a constant or a multiplied value, it's
+      // If we find an add of a constant, a multiplied value, or a phi, it's
       // likely that the other operand will lead us to the base
       // object. We don't have to worry about the case where the
       // object address is somehow being computed by the multiply,
@@ -74,7 +75,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
       // identifiable object.
       if (U->getOpcode() != Instruction::Add ||
           (!isa<ConstantInt>(U->getOperand(1)) &&
-           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+           !isa<PHINode>(U->getOperand(1))))
         return V;
       V = U->getOperand(0);
     } else {
@@ -84,56 +86,77 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
   } while (1);
 }
 
-/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
+/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
-static const Value *getUnderlyingObject(const Value *V) {
-  // First just call Value::getUnderlyingObject to let it do what it does.
+static void getUnderlyingObjects(const Value *V,
+                                 SmallVectorImpl<Value *> &Objects) {
+  SmallPtrSet<const Value*, 16> Visited;
+  SmallVector<const Value *, 4> Working(1, V);
   do {
-    V = GetUnderlyingObject(V);
-    // If it found an inttoptr, use special code to continue climing.
-    if (Operator::getOpcode(V) != Instruction::IntToPtr)
-      break;
-    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
-    // If that succeeded in finding a pointer, continue the search.
-    if (!O->getType()->isPointerTy())
-      break;
-    V = O;
-  } while (1);
-  return V;
+    V = Working.pop_back_val();
+
+    SmallVector<Value *, 4> Objs;
+    GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+    for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+         I != IE; ++I) {
+      V = *I;
+      if (!Visited.insert(V))
+        continue;
+      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+        const Value *O =
+          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+        if (O->getType()->isPointerTy()) {
+          Working.push_back(O);
+          continue;
+        }
+      }
+      Objects.push_back(const_cast<Value *>(V));
+    }
+  } while (!Working.empty());
 }
 
-/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
 /// information and it can be tracked to a normal reference to a known
-/// object, return the Value for that object. Otherwise return null.
-static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
-                                                const MachineFrameInfo *MFI,
-                                                bool &MayAlias) {
-  MayAlias = true;
+/// object, return the Value for that object.
+static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+              const MachineFrameInfo *MFI,
+              SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
   if (!MI->hasOneMemOperand() ||
       !(*MI->memoperands_begin())->getValue() ||
       (*MI->memoperands_begin())->isVolatile())
-    return 0;
+    return;
 
   const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
-    return 0;
-
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    // For now, ignore PseudoSourceValues which may alias LLVM IR values
-    // because the code that uses this function has no way to cope with
-    // such aliases.
-    if (PSV->isAliased(MFI))
-      return 0;
-
-    MayAlias = PSV->mayAlias(MFI);
-    return V;
-  }
+    return;
+
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs);
 
-  if (isIdentifiedObject(V))
-    return V;
+  for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+       I != IE; ++I) {
+    bool MayAlias = true;
+    V = *I;
+
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+      // For now, ignore PseudoSourceValues which may alias LLVM IR values
+      // because the code that uses this function has no way to cope with
+      // such aliases.
 
-  return 0;
+      if (PSV->isAliased(MFI)) {
+        Objects.clear();
+        return;
+      }
+
+      MayAlias = PSV->mayAlias(MFI);
+    } else if (!isIdentifiedObject(V)) {
+      Objects.clear();
+      return;
+    }
+
+    Objects.push_back(std::make_pair(V, MayAlias));
+  }
 }
 
 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
@@ -145,20 +168,6 @@ void ScheduleDAGInstrs::finishBlock() {
   BB = 0;
 }
 
-/// Initialize the map with the number of registers.
-void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
-  PhysRegSet.setUniverse(Limit);
-  SUnits.resize(Limit);
-}
-
-/// Clear the map without deallocating storage.
-void Reg2SUnitsMap::clear() {
-  for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
-    SUnits[*I].clear();
-  }
-  PhysRegSet.clear();
-}
-
 /// Initialize the DAG and common scheduler state for the current scheduling
 /// region. This does not actually create the DAG, only clears it. The
 /// scheduling driver may call BuildSchedGraph multiple times per scheduling
@@ -205,10 +214,11 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
       if (Reg == 0) continue;
 
       if (TRI->isPhysicalRegister(Reg))
-        Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
+        Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
       else {
         assert(!IsPostRA && "Virtual register encountered after regalloc.");
-        addVRegUseDeps(&ExitSU, i);
+        if (MO.readsReg()) // ignore undef operands
+          addVRegUseDeps(&ExitSU, i);
       }
     }
   } else {
@@ -221,7 +231,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
         if (!Uses.contains(Reg))
-          Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
+          Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
       }
   }
 }
@@ -239,27 +249,31 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
        Alias.isValid(); ++Alias) {
     if (!Uses.contains(*Alias))
       continue;
-    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
-    for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-      SUnit *UseSU = UseList[i].SU;
+    for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+      SUnit *UseSU = I->SU;
       if (UseSU == SU)
         continue;
 
-      SDep dep(SU, SDep::Data, *Alias);
-
       // Adjust the dependence latency using operand def/use information,
       // then allow the target to perform its own adjustments.
-      int UseOp = UseList[i].OpIdx;
-      MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
-      dep.setLatency(
+      int UseOp = I->OpIdx;
+      MachineInstr *RegUse = 0;
+      SDep Dep;
+      if (UseOp < 0)
+        Dep = SDep(SU, SDep::Artificial);
+      else {
+        Dep = SDep(SU, SDep::Data, *Alias);
+        RegUse = UseSU->getInstr();
+        Dep.setMinLatency(
+          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                           RegUse, UseOp, /*FindMin=*/true));
+      }
+      Dep.setLatency(
         SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
                                          RegUse, UseOp, /*FindMin=*/false));
-      dep.setMinLatency(
-        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
-                                         RegUse, UseOp, /*FindMin=*/true));
 
-      ST.adjustSchedDependency(SU, UseSU, dep);
-      UseSU->addPred(dep);
+      ST.adjustSchedDependency(SU, UseSU, Dep);
+      UseSU->addPred(Dep);
     }
   }
 }
@@ -282,9 +296,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
        Alias.isValid(); ++Alias) {
     if (!Defs.contains(*Alias))
       continue;
-    std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
-    for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-      SUnit *DefSU = DefList[i].SU;
+    for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+      SUnit *DefSU = I->SU;
       if (DefSU == &ExitSU)
         continue;
       if (DefSU != SU &&
@@ -308,33 +321,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
-    Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
+    Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
   }
   else {
     addPhysRegDataDeps(SU, OperIdx);
-
-    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
-    // retrieve the existing SUnits list for this register's defs.
-    std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
+    unsigned Reg = MO.getReg();
 
     // clear this register's use list
-    if (Uses.contains(MO.getReg()))
-      Uses[MO.getReg()].clear();
-
-    if (!MO.isDead())
-      DefList.clear();
-
-    // Calls will not be reordered because of chain dependencies (see
-    // below). Since call operands are dead, calls may continue to be added
-    // to the DefList making dependence checking quadratic in the size of
-    // the block. Instead, we leave only one call at the back of the
-    // DefList.
-    if (SU->isCall) {
-      while (!DefList.empty() && DefList.back().SU->isCall)
-        DefList.pop_back();
+    if (Uses.contains(Reg))
+      Uses.eraseAll(Reg);
+
+    if (!MO.isDead()) {
+      Defs.eraseAll(Reg);
+    } else if (SU->isCall) {
+      // Calls will not be reordered because of chain dependencies (see
+      // below). Since call operands are dead, calls may continue to be added
+      // to the DefList making dependence checking quadratic in the size of
+      // the block. Instead, we leave only one call at the back of the
+      // DefList.
+      Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
+      Reg2SUnitsMap::iterator B = P.first;
+      Reg2SUnitsMap::iterator I = P.second;
+      for (bool isBegin = I == B; !isBegin; /* empty */) {
+        isBegin = (--I) == B;
+        if (!I->SU->isCall)
+          break;
+        I = Defs.erase(I);
+      }
     }
+
     // Defs are pushed in the order they are visited and never reordered.
-    DefList.push_back(PhysRegSUOper(SU, OperIdx));
+    Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
   }
 }
 
@@ -445,23 +462,29 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
   if ((*MI->memoperands_begin())->isVolatile() ||
        MI->hasUnmodeledSideEffects())
     return true;
-
   const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
     return true;
 
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    // Similarly to getUnderlyingObjectForInstr:
-    // For now, ignore PseudoSourceValues which may alias LLVM IR values
-    // because the code that uses this function has no way to cope with
-    // such aliases.
-    if (PSV->isAliased(MFI))
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs);
+  for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
+       IE = Objs.end(); I != IE; ++I) {
+    V = *I;
+
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+      // Similarly to getUnderlyingObjectForInstr:
+      // For now, ignore PseudoSourceValues which may alias LLVM IR values
+      // because the code that uses this function has no way to cope with
+      // such aliases.
+      if (PSV->isAliased(MFI))
+        return true;
+    }
+
+    // Does this pointer refer to a distinct and identifiable object?
+    if (!isIdentifiedObject(V))
       return true;
   }
-  // Does this pointer refer to a distinct and identifiable object?
-  if (!isIdentifiedObject(V))
-    return true;
 
   return false;
 }
@@ -680,8 +703,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   // so that they can be given more precise dependencies. We track
   // separately the known memory locations that may alias and those
   // that are known not to alias
-  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
-  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+  MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+  MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
   std::set<SUnit*> RejectMemNodes;
 
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
@@ -691,8 +714,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 
   assert(Defs.empty() && Uses.empty() &&
          "Only BuildGraph should update Defs/Uses");
-  Defs.setRegLimit(TRI->getNumRegs());
-  Uses.setRegLimit(TRI->getNumRegs());
+  Defs.setUniverse(TRI->getNumRegs());
+  Uses.setUniverse(TRI->getNumRegs());
 
   assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
   // FIXME: Allow SparseSet to reserve space for the creation of virtual
@@ -705,17 +728,17 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   addSchedBarrierDeps();
 
   // Walk the list of instructions, from bottom moving up.
-  MachineInstr *PrevMI = NULL;
+  MachineInstr *DbgMI = NULL;
   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
-    if (MI && PrevMI) {
-      DbgValues.push_back(std::make_pair(PrevMI, MI));
-      PrevMI = NULL;
+    if (MI && DbgMI) {
+      DbgValues.push_back(std::make_pair(DbgMI, MI));
+      DbgMI = NULL;
     }
 
     if (MI->isDebugValue()) {
-      PrevMI = MI;
+      DbgMI = MI;
       continue;
     }
     if (RPTracker) {
@@ -723,13 +746,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
     }
 
-    assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
+    assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
            "Cannot schedule terminators or labels!");
 
     SUnit *SU = MISUnitMap[MI];
     assert(SU && "No SUnit mapped to this MI");
 
     // Add register-based dependencies (data, anti, and output).
+    bool HasVRegDef = false;
     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
       const MachineOperand &MO = MI->getOperand(j);
       if (!MO.isReg()) continue;
@@ -740,12 +764,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         addPhysRegDeps(SU, j);
       else {
         assert(!IsPostRA && "Virtual register encountered!");
-        if (MO.isDef())
+        if (MO.isDef()) {
+          HasVRegDef = true;
           addVRegDefDeps(SU, j);
+        }
         else if (MO.readsReg()) // ignore undef operands
           addVRegUseDeps(SU, j);
       }
     }
+    // If we haven't seen any uses in this scheduling region, create a
+    // dependence edge to ExitSU to model the live-out latency. This is required
+    // for vreg defs with no in-region use, and prefetches with no vreg def.
+    //
+    // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+    // check currently relies on being called before adding chain deps.
+    if (SU->NumSuccs == 0 && SU->Latency > 1
+        && (HasVRegDef || MI->mayLoad())) {
+      SDep Dep(SU, SDep::Artificial);
+      Dep.setLatency(SU->Latency - 1);
+      ExitSU.addPred(Dep);
+    }
 
     // Add chain dependencies.
     // Chain dependencies used to enforce memory order should have
@@ -760,11 +798,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     if (isGlobalMemoryObject(AA, MI)) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
-      for (std::map<const Value *, SUnit *>::iterator I =
+      for (MapVector<const Value *, SUnit *>::iterator I =
              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
         I->second->addPred(SDep(SU, SDep::Barrier));
       }
-      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+      for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
           SDep Dep(SU, SDep::Barrier);
@@ -798,10 +836,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
         addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
                            TrueMemOrderLatency);
-      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+      for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
            E = AliasMemDefs.end(); I != E; ++I)
         addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
-      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+      for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
           addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
@@ -813,60 +851,70 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       AliasMemDefs.clear();
       AliasMemUses.clear();
     } else if (MI->mayStore()) {
-      bool MayAlias = true;
-      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+      SmallVector<std::pair<const Value *, bool>, 4> Objs;
+      getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+      if (Objs.empty()) {
+        // Treat all other stores conservatively.
+        goto new_alias_chain;
+      }
+
+      bool MayAlias = false;
+      for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+           K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
+        const Value *V = K->first;
+        bool ThisMayAlias = K->second;
+        if (ThisMayAlias)
+          MayAlias = true;
+
         // A store to a specific PseudoSourceValue. Add precise dependencies.
         // Record the def in MemDefs, first adding a dep if there is
         // an existing def.
-        std::map<const Value *, SUnit *>::iterator I =
-          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-        std::map<const Value *, SUnit *>::iterator IE =
-          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+        MapVector<const Value *, SUnit *>::iterator I =
+          ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+        MapVector<const Value *, SUnit *>::iterator IE =
+          ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
-          addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
-                             0, true);
+          addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
           I->second = SU;
         } else {
-          if (MayAlias)
+          if (ThisMayAlias)
             AliasMemDefs[V] = SU;
           else
             NonAliasMemDefs[V] = SU;
         }
         // Handle the uses in MemUses, if there are any.
-        std::map<const Value *, std::vector<SUnit *> >::iterator J =
-          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
-        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
-          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+        MapVector<const Value *, std::vector<SUnit *> >::iterator J =
+          ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+        MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
+          ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
         if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
             addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
                                TrueMemOrderLatency, true);
           J->second.clear();
         }
-        if (MayAlias) {
-          // Add dependencies from all the PendingLoads, i.e. loads
-          // with no underlying object.
-          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-            addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
-                               TrueMemOrderLatency);
-          // Add dependence on alias chain, if needed.
-          if (AliasChain)
-            addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
-          // But we also should check dependent instructions for the
-          // SU in question.
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
-                          TrueMemOrderLatency);
-        }
-        // Add dependence on barrier chain, if needed.
-        // There is no point to check aliasing on barrier event. Even if
-        // SU and barrier _could_ be reordered, they should not. In addition,
-        // we have lost all RejectMemNodes below barrier.
-        if (BarrierChain)
-          BarrierChain->addPred(SDep(SU, SDep::Barrier));
-      } else {
-        // Treat all other stores conservatively.
-        goto new_alias_chain;
       }
+      if (MayAlias) {
+        // Add dependencies from all the PendingLoads, i.e. loads
+        // with no underlying object.
+        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+          addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+                             TrueMemOrderLatency);
+        // Add dependence on alias chain, if needed.
+        if (AliasChain)
+          addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+        // But we also should check dependent instructions for the
+        // SU in question.
+        adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                        TrueMemOrderLatency);
+      }
+      // Add dependence on barrier chain, if needed.
+      // There is no point to check aliasing on barrier event. Even if
+      // SU and barrier _could_ be reordered, they should not. In addition,
+      // we have lost all RejectMemNodes below barrier.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
       if (!ExitSU.isPred(SU))
         // Push store's up a bit to avoid them getting in between cmp
@@ -877,28 +925,41 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
-        if (const Value *V =
-            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
-          // A load from a specific PseudoSourceValue. Add precise dependencies.
-          std::map<const Value *, SUnit *>::iterator I =
-            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-          std::map<const Value *, SUnit *>::iterator IE =
-            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
-          if (I != IE)
-            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
-          if (MayAlias)
-            AliasMemUses[V].push_back(SU);
-          else
-            NonAliasMemUses[V].push_back(SU);
-        } else {
+        SmallVector<std::pair<const Value *, bool>, 4> Objs;
+        getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+        if (Objs.empty()) {
           // A load with no underlying object. Depend on all
           // potentially aliasing stores.
-          for (std::map<const Value *, SUnit *>::iterator I =
+          for (MapVector<const Value *, SUnit *>::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 
           PendingLoads.push_back(SU);
           MayAlias = true;
+        } else {
+          MayAlias = false;
+        }
+
+        for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+             J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+          const Value *V = J->first;
+          bool ThisMayAlias = J->second;
+
+          if (ThisMayAlias)
+            MayAlias = true;
+
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          MapVector<const Value *, SUnit *>::iterator I =
+            ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          MapVector<const Value *, SUnit *>::iterator IE =
+            ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+          if (ThisMayAlias)
+            AliasMemUses[V].push_back(SU);
+          else
+            NonAliasMemUses[V].push_back(SU);
         }
         if (MayAlias)
           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
@@ -910,8 +971,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       }
     }
   }
-  if (PrevMI)
-    FirstDbgValue = PrevMI;
+  if (DbgMI)
+    FirstDbgValue = DbgMI;
 
   Defs.clear();
   Uses.clear();
@@ -933,7 +994,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
   else if (SU == &ExitSU)
     oss << "<exit>";
   else
-    SU->getInstr()->print(oss);
+    SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
   return oss.str();
 }
 
@@ -943,6 +1004,203 @@ std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
 
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+/// \brief Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+  SchedDFSResult &R;
+
+  /// Join DAG nodes into equivalence classes by their subtree.
+  IntEqClasses SubtreeClasses;
+  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+
+  struct RootData {
+    unsigned NodeID;
+    unsigned ParentNodeID;  // Parent node (member of the parent subtree).
+    unsigned SubInstrCount; // Instr count in this tree only, not children.
+
+    RootData(unsigned id): NodeID(id),
+                           ParentNodeID(SchedDFSResult::InvalidSubtreeID),
+                           SubInstrCount(0) {}
+
+    unsigned getSparseSetIndex() const { return NodeID; }
+  };
+
+  SparseSet<RootData> RootSet;
+
+public:
+  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
+    RootSet.setUniverse(R.DFSNodeData.size());
+  }
+
+  /// Return true if this node been visited by the DFS traversal.
+  ///
+  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
+  /// ID. Later, SubtreeID is updated but remains valid.
+  bool isVisited(const SUnit *SU) const {
+    return R.DFSNodeData[SU->NodeNum].SubtreeID
+      != SchedDFSResult::InvalidSubtreeID;
+  }
+
+  /// Initialize this node's instruction count. We don't need to flag the node
+  /// visited until visitPostorder because the DAG cannot have cycles.
+  void visitPreorder(const SUnit *SU) {
+    R.DFSNodeData[SU->NodeNum].InstrCount =
+      SU->getInstr()->isTransient() ? 0 : 1;
+  }
+
+  /// Called once for each node after all predecessors are visited. Revisit this
+  /// node's predecessors and potentially join them now that we know the ILP of
+  /// the other predecessors.
+  void visitPostorderNode(const SUnit *SU) {
+    // Mark this node as the root of a subtree. It may be joined with its
+    // successors later.
+    R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
+    RootData RData(SU->NodeNum);
+    RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+
+    // If any predecessors are still in their own subtree, they either cannot be
+    // joined or are large enough to remain separate. If this parent node's
+    // total instruction count is not greater than a child subtree by at least
+    // the subtree limit, then try to join it now since splitting subtrees is
+    // only useful if multiple high-pressure paths are possible.
+    unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
+    for (SUnit::const_pred_iterator
+           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+      if (PI->getKind() != SDep::Data)
+        continue;
+      unsigned PredNum = PI->getSUnit()->NodeNum;
+      if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
+        joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+
+      // Either link or merge the TreeData entry from the child to the parent.
+      if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+        // If the predecessor's parent is invalid, this is a tree edge and the
+        // current node is the parent.
+        if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+          RootSet[PredNum].ParentNodeID = SU->NodeNum;
+      }
+      else if (RootSet.count(PredNum)) {
+        // The predecessor is not a root, but is still in the root set. This
+        // must be the new parent that it was just joined to. Note that
+        // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+        // set to the original parent.
+        RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
+        RootSet.erase(PredNum);
+      }
+    }
+    RootSet[SU->NodeNum] = RData;
+  }
+
+  /// Called once for each tree edge after calling visitPostOrderNode on the
+  /// predecessor. Increment the parent node's instruction count and
+  /// preemptively join this subtree to its parent's if it is small enough.
+  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
+    R.DFSNodeData[Succ->NodeNum].InstrCount
+      += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
+    joinPredSubtree(PredDep, Succ);
+  }
+
+  /// Add a connection for cross edges.
+  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
+    ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+  }
+
+  /// Set each node's subtree ID to the representative ID and record connections
+  /// between trees.
+  void finalize() {
+    SubtreeClasses.compress();
+    R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
+    assert(SubtreeClasses.getNumClasses() == RootSet.size()
+           && "number of roots should match trees");
+    for (SparseSet<RootData>::const_iterator
+           RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
+      unsigned TreeID = SubtreeClasses[RI->NodeID];
+      if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+        R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
+      R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+      // Note that SubInstrCount may be greater than InstrCount if we joined
+      // subtrees across a cross edge. InstrCount will be attributed to the
+      // original parent, while SubInstrCount will be attributed to the joined
+      // parent.
+    }
+    R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+    R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+    DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+    for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
+      R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
+      DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
+            << R.DFSNodeData[Idx].SubtreeID << '\n');
+    }
+    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
+           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
+         I != E; ++I) {
+      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
+      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+      if (PredTree == SuccTree)
+        continue;
+      unsigned Depth = I->first->getDepth();
+      addConnection(PredTree, SuccTree, Depth);
+      addConnection(SuccTree, PredTree, Depth);
+    }
+  }
+
+protected:
+  /// Join the predecessor subtree with the successor that is its DFS
+  /// parent. Apply some heuristics before joining.
+  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
+                       bool CheckLimit = true) {
+    assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
+
+    // Check if the predecessor is already joined.
+    const SUnit *PredSU = PredDep.getSUnit();
+    unsigned PredNum = PredSU->NodeNum;
+    if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
+      return false;
+
+    // Four is the magic number of successors before a node is considered a
+    // pinch point.
+    unsigned NumDataSucs = 0;
+    for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
+           SE = PredSU->Succs.end(); SI != SE; ++SI) {
+      if (SI->getKind() == SDep::Data) {
+        if (++NumDataSucs >= 4)
+          return false;
+      }
+    }
+    if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
+      return false;
+    R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
+    SubtreeClasses.join(Succ->NodeNum, PredNum);
+    return true;
+  }
+
+  /// Called by finalize() to record a connection between trees.
+  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+    if (!Depth)
+      return;
+
+    do {
+      SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+        R.SubtreeConnections[FromTree];
+      for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+             I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+        if (I->TreeID == ToTree) {
+          I->Level = std::max(I->Level, Depth);
+          return;
+        }
+      }
+      Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+      FromTree = R.DFSTreeData[FromTree].ParentTreeID;
+    } while (FromTree != SchedDFSResult::InvalidSubtreeID);
+  }
+};
+} // namespace llvm
+
 namespace {
 /// \brief Manage the stack used by a reverse depth-first search over the DAG.
 class SchedDAGReverseDFS {
@@ -955,7 +1213,10 @@ public:
   }
   void advance() { ++DFSStack.back().second; }
 
-  void backtrack() { DFSStack.pop_back(); }
+  const SDep *backtrack() {
+    DFSStack.pop_back();
+    return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
+  }
 
   const SUnit *getCurr() const { return DFSStack.back().first; }
 
@@ -967,57 +1228,83 @@ public:
 };
 } // anonymous
 
-void ScheduleDAGILP::resize(unsigned NumSUnits) {
-  ILPValues.resize(NumSUnits);
-}
-
-ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
-  return ILPValues[SU->NodeNum];
-}
-
-// A leaf node has an ILP of 1/1.
-static ILPValue initILP(const SUnit *SU) {
-  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
-  return ILPValue(Cnt, 1 + SU->getDepth());
+static bool hasDataSucc(const SUnit *SU) {
+  for (SUnit::const_succ_iterator
+         SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
+    if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+      return true;
+  }
+  return false;
 }
 
 /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
 /// search from this root.
-void ScheduleDAGILP::computeILP(const SUnit *Root) {
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
   if (!IsBottomUp)
     llvm_unreachable("Top-down ILP metric is unimplemnted");
 
-  SchedDAGReverseDFS DFS;
-  // Mark a node visited by validating it.
-  ILPValues[Root->NodeNum] = initILP(Root);
-  DFS.follow(Root);
-  for (;;) {
-    // Traverse the leftmost path as far as possible.
-    while (DFS.getPred() != DFS.getPredEnd()) {
-      const SUnit *PredSU = DFS.getPred()->getSUnit();
-      DFS.advance();
-      // If the pred is already valid, skip it.
-      if (ILPValues[PredSU->NodeNum].isValid())
-        continue;
-      ILPValues[PredSU->NodeNum] = initILP(PredSU);
-      DFS.follow(PredSU);
+  SchedDFSImpl Impl(*this);
+  for (ArrayRef<SUnit>::const_iterator
+         SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
+    const SUnit *SU = &*SI;
+    if (Impl.isVisited(SU) || hasDataSucc(SU))
+      continue;
+
+    SchedDAGReverseDFS DFS;
+    Impl.visitPreorder(SU);
+    DFS.follow(SU);
+    for (;;) {
+      // Traverse the leftmost path as far as possible.
+      while (DFS.getPred() != DFS.getPredEnd()) {
+        const SDep &PredDep = *DFS.getPred();
+        DFS.advance();
+        // Ignore non-data edges.
+        if (PredDep.getKind() != SDep::Data
+            || PredDep.getSUnit()->isBoundaryNode()) {
+          continue;
+        }
+        // An already visited edge is a cross edge, assuming an acyclic DAG.
+        if (Impl.isVisited(PredDep.getSUnit())) {
+          Impl.visitCrossEdge(PredDep, DFS.getCurr());
+          continue;
+        }
+        Impl.visitPreorder(PredDep.getSUnit());
+        DFS.follow(PredDep.getSUnit());
+      }
+      // Visit the top of the stack in postorder and backtrack.
+      const SUnit *Child = DFS.getCurr();
+      const SDep *PredDep = DFS.backtrack();
+      Impl.visitPostorderNode(Child);
+      if (PredDep)
+        Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
+      if (DFS.isComplete())
+        break;
     }
-    // Visit the top of the stack in postorder and backtrack.
-    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
-    DFS.backtrack();
-    if (DFS.isComplete())
-      break;
-    // Add the recently finished predecessor's bottom-up descendent count.
-    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+  }
+  Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+  for (SmallVectorImpl<Connection>::const_iterator
+         I = SubtreeConnections[SubtreeID].begin(),
+         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
+    SubtreeConnectLevels[I->TreeID] =
+      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
+    DEBUG(dbgs() << "  Tree: " << I->TreeID
+          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
   }
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ILPValue::print(raw_ostream &OS) const {
-  if (!isValid())
+  OS << InstrCount << " / " << Length << " = ";
+  if (!Length)
     OS << "BADILP";
-  OS << InstrCount << " / " << Cycles << " = "
-     << format("%g", ((double)InstrCount / Cycles));
+  else
+    OS << format("%g", ((double)InstrCount / Length));
 }
 
 void ILPValue::dump() const {
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 6e781b199a5f..8ddb3e892f25 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -11,19 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <fstream>
 using namespace llvm;
 
@@ -41,6 +41,10 @@ namespace llvm {
       return true;
     }
 
+    static bool isNodeHidden(const SUnit *Node) {
+      return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+    }
+
     static bool hasNodeAddressLabel(const SUnit *Node,
                                     const ScheduleDAG *Graph) {
       return true;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 37d7731aa158..eb1609575016 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18,22 +18,23 @@
 
 #define DEBUG_TYPE "dagcombine"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -291,6 +292,10 @@ namespace {
                  unsigned SrcValueAlign2,
                  const MDNode *TBAAInfo2) const;
 
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
+
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
@@ -1178,7 +1183,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
 
       // Expose the DAG combiner to the target combiner impls.
       TargetLowering::DAGCombinerInfo
-        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+        DagCombineInfo(DAG, Level, false, this);
 
       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
     }
@@ -1377,6 +1382,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (add x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
   }
 
   // fold (add x, undef) -> undef
@@ -1620,6 +1631,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (sub x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (sub x, x) -> 0
@@ -2423,6 +2438,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (and x, 0) -> 0, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+
+    // fold (and x, -1) -> x, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N0;
   }
 
   // fold (and x, undef) -> 0
@@ -2606,7 +2633,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       bool isInteger = LL.getValueType().isInteger();
       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
       if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+          (!LegalOperations ||
+           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+            TLI.isOperationLegal(ISD::SETCC,
+                            TLI.getSetCCResultType(N0.getSimpleValueType())))))
         return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
                             LL, LR, Result);
     }
@@ -2766,7 +2796,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       }
     }
   }
-      
 
   return SDValue();
 }
@@ -2959,7 +2988,8 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   SDValue N00 = N0.getOperand(0);
   SDValue N01 = N0.getOperand(1);
 
-  if (N1.getOpcode() == ISD::OR) {
+  if (N1.getOpcode() == ISD::OR &&
+      N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
     // (or (or (and), (and)), (or (and), (and)))
     SDValue N000 = N00.getOperand(0);
     if (!isBSwapHWordElement(N000, Parts))
@@ -3021,6 +3051,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (or x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+
+    // fold (or x, -1) -> -1, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N1;
   }
 
   // fold (or x, undef) -> -1
@@ -3103,7 +3145,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
       bool isInteger = LL.getValueType().isInteger();
       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
       if (Result != ISD::SETCC_INVALID &&
-          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+          (!LegalOperations ||
+           (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+            TLI.isOperationLegal(ISD::SETCC,
+              TLI.getSetCCResultType(N0.getValueType())))))
         return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
                             LL, LR, Result);
     }
@@ -3330,6 +3375,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (xor x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
@@ -3360,7 +3411,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
                                                isInt);
 
-    if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+    if (!LegalOperations ||
+        TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
       switch (N0.getOpcode()) {
       default:
         llvm_unreachable("Unhandled SetCC Equivalent!");
@@ -4444,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                        NegOne, DAG.getConstant(0, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
-    if (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+    if (!VT.isVector() && (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))))
       return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
                          DAG.getSetCC(N->getDebugLoc(),
                                       TLI.getSetCCResultType(VT),
@@ -5025,11 +5077,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
       // At this point, we must have a load or else we can't do the transform.
       if (!isa<LoadSDNode>(N0)) return SDValue();
 
+      // Because a SRL must be assumed to *need* to zero-extend the high bits
+      // (as opposed to anyext the high bits), we can't combine the zextload
+      // lowering of SRL and an sextload.
+      if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+        return SDValue();
+
       // If the shift amount is larger than the input type then we're not
       // accessing any of the loaded bytes.  If the load was a zextload/extload
       // then the result of the shift+trunc is zero/undef (handled elsewhere).
-      // If the load was a sextload then the result is a splat of the sign bit
-      // of the extended byte.  This is not worth optimizing for.
       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
         return SDValue();
     }
@@ -5048,16 +5104,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   // If we haven't found a load, we can't narrow it.  Don't transform one with
   // multiple uses, this would require adding a new load.
-  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
-      // Don't change the width of a volatile load.
-      cast<LoadSDNode>(N0)->isVolatile())
+  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+    return SDValue();
+
+  // Don't change the width of a volatile load.
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  if (LN0->isVolatile())
     return SDValue();
 
   // Verify that we are actually reducing a load width here.
-  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+  if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
+    return SDValue();
+
+  // For the transform to be legal, the load must produce only two values
+  // (the value loaded and the chain).  Don't transform a pre-increment
+  // load, for example, which produces an extra value.  Otherwise the 
+  // transformation is not equivalent, and the downstream logic to replace
+  // uses gets things wrong.
+  if (LN0->getNumValues() > 2)
     return SDValue();
 
-  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   EVT PtrType = N0.getOperand(1).getValueType();
 
   if (PtrType == MVT::Untyped || PtrType.isExtended())
@@ -5101,8 +5167,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
       ShImmTy = VT;
-    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
-                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+    // If the shift amount is as large as the result size (but, presumably,
+    // no larger than the source) then the useful bits of the result are
+    // zero; we can't simply return the shortened shift, because the result
+    // of that operation is undefined.
+    if (ShLeftAmt >= VT.getSizeInBits())
+      Result = DAG.getConstant(0, VT);
+    else
+      Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+                          Result, DAG.getConstant(ShLeftAmt, ShImmTy));
   }
 
   // Return the new loaded value.
@@ -5187,6 +5260,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    AddToWorkList(ExtLoad.getNode());
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
@@ -5287,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     }
   }
 
+  // Fold a series of buildvector, bitcast, and truncate if possible.
+  // For example fold
+  //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+  //   (2xi32 (buildvector x, y)).
+  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+      N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+      N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+      N0.getOperand(0).hasOneUse()) {
+
+    SDValue BuildVect = N0.getOperand(0);
+    EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+    EVT TruncVecEltTy = VT.getVectorElementType();
+
+    // Check that the element types match.
+    if (BuildVectEltTy == TruncVecEltTy) {
+      // Now we only need to compute the offset of the truncated elements.
+      unsigned BuildVecNumElts =  BuildVect.getNumOperands();
+      unsigned TruncVecNumElts = VT.getVectorNumElements();
+      unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+      assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+             "Invalid number of elements");
+
+      SmallVector<SDValue, 8> Opnds;
+      for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+        Opnds.push_back(BuildVect.getOperand(i));
+
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+                         Opnds.size());
+    }
+  }
+
   // See if we can simplify the input to this truncate through knowledge that
   // only the low bits are being used.
   // For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -5729,14 +5835,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
 
+  // No FP constant should be created after legalization as Instruction
+  // Selection pass has hard time in dealing with FP constant.
+  //
+  // We don't need test this condition for transformation like following, as
+  // the DAG being transformed implies it is legal to take FP constant as
+  // operand.
+  // 
+  //  (fadd (fmul c, x), x) -> (fmul c+1, x)
+  // 
+  bool AllowNewFpConst = (Level < AfterLegalizeDAG);
+
   // If allow, fold (fadd (fneg x), x) -> 0.0
-  if (DAG.getTarget().Options.UnsafeFPMath &&
+  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
       N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
     return DAG.getConstantFP(0.0, VT);
   }
 
     // If allow, fold (fadd x, (fneg x)) -> 0.0
-  if (DAG.getTarget().Options.UnsafeFPMath &&
+  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
       N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
     return DAG.getConstantFP(0.0, VT);
   }
@@ -5769,13 +5886,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                            N1, NewCFP);
       }
 
-      // (fadd (fadd x, x), x) -> (fmul 3.0, x)
-      if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) &&
-          N0.getOperand(0) == N1) {
-        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
-                           N1, DAG.getConstantFP(3.0, VT));
-      }
-
       // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
       if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
           N1.getOperand(0) == N1.getOperand(1) &&
@@ -5821,12 +5931,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                            N0, NewCFP);
       }
 
-      // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
-      if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) &&
-          N1.getOperand(0) == N0) {
-        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
-                           N0, DAG.getConstantFP(3.0, VT));
-      }
 
       // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
       if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
@@ -5851,8 +5955,29 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       }
     }
 
+    if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
+      ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+      // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+      if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+          (N0.getOperand(0) == N1)) {
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N1, DAG.getConstantFP(3.0, VT));
+      }
+    }
+
+    if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
+      ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+      // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+      if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+          N1.getOperand(0) == N0) {
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           N0, DAG.getConstantFP(3.0, VT));
+      }
+    }
+
     // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
-    if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+    if (AllowNewFpConst &&
+        N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
         N0.getOperand(0) == N0.getOperand(1) &&
         N1.getOperand(0) == N1.getOperand(1) &&
         N0.getOperand(0) == N1.getOperand(0)) {
@@ -6596,7 +6721,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   // on the target.
   if (N1.getOpcode() == ISD::SETCC &&
-      TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+      TLI.isOperationLegalOrCustom(ISD::BR_CC,
+                                   N1.getOperand(0).getValueType())) {
     return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
                        Chain, N1.getOperand(2),
                        N1.getOperand(0), N1.getOperand(1), N2);
@@ -6682,18 +6808,24 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     if (Op0.getOpcode() == Op1.getOpcode()) {
       // Avoid missing important xor optimizations.
       SDValue Tmp = visitXOR(TheXor);
-      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
-        DEBUG(dbgs() << "\nReplacing.8 ";
-              TheXor->dump(&DAG);
-              dbgs() << "\nWith: ";
-              Tmp.getNode()->dump(&DAG);
-              dbgs() << '\n');
-        WorkListRemover DeadNodes(*this);
-        DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
-        removeFromWorkList(TheXor);
-        DAG.DeleteNode(TheXor);
-        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
-                           MVT::Other, Chain, Tmp, N2);
+      if (Tmp.getNode()) {
+        if (Tmp.getNode() != TheXor) {
+          DEBUG(dbgs() << "\nReplacing.8 ";
+                TheXor->dump(&DAG);
+                dbgs() << "\nWith: ";
+                Tmp.getNode()->dump(&DAG);
+                dbgs() << '\n');
+          WorkListRemover DeadNodes(*this);
+          DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
+          removeFromWorkList(TheXor);
+          DAG.DeleteNode(TheXor);
+          return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                             MVT::Other, Chain, Tmp, N2);
+        }
+
+        // visitXOR has changed XOR's operands or replaced the XOR completely,
+        // bail out.
+        return SDValue(N, 0);
       }
     }
 
@@ -6772,7 +6904,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   } else
     return false;
 
-  AddrMode AM;
+  TargetLowering::AddrMode AM;
   if (N->getOpcode() == ISD::ADD) {
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (Offset)
@@ -6841,6 +6973,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   ISD::MemIndexedMode AM = ISD::UNINDEXED;
   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
     return false;
+
+  // Backends without true r+i pre-indexed forms may need to pass a
+  // constant base with a variable offset so that constant coercion
+  // will work with the patterns in canonical form.
+  bool Swapped = false;
+  if (isa<ConstantSDNode>(BasePtr)) {
+    std::swap(BasePtr, Offset);
+    Swapped = true;
+  }
+
   // Don't create a indexed load / store with zero offset.
   if (isa<ConstantSDNode>(Offset) &&
       cast<ConstantSDNode>(Offset)->isNullValue())
@@ -6866,6 +7008,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
       return false;
   }
 
+  // If the offset is a constant, there may be other adds of constants that
+  // can be folded with this one. We should do this to avoid having to keep
+  // a copy of the original base pointer.
+  SmallVector<SDNode *, 16> OtherUses;
+  if (isa<ConstantSDNode>(Offset))
+    for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
+         E = BasePtr.getNode()->use_end(); I != E; ++I) {
+      SDNode *Use = *I;
+      if (Use == Ptr.getNode())
+        continue;
+
+      if (Use->isPredecessorOf(N))
+        continue;
+
+      if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+        OtherUses.clear();
+        break;
+      }
+
+      SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
+      if (Op1.getNode() == BasePtr.getNode())
+        std::swap(Op0, Op1);
+      assert(Op0.getNode() == BasePtr.getNode() &&
+             "Use of ADD/SUB but not an operand");
+
+      if (!isa<ConstantSDNode>(Op1)) {
+        OtherUses.clear();
+        break;
+      }
+
+      // FIXME: In some cases, we can be smarter about this.
+      if (Op1.getValueType() != Offset.getValueType()) {
+        OtherUses.clear();
+        break;
+      }
+
+      OtherUses.push_back(Use);
+    }
+
+  if (Swapped)
+    std::swap(BasePtr, Offset);
+
   // Now check for #3 and #4.
   bool RealUse = false;
 
@@ -6915,6 +7099,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   // Finally, since the node is now dead, remove it from the graph.
   DAG.DeleteNode(N);
 
+  if (Swapped)
+    std::swap(BasePtr, Offset);
+
+  // Replace other uses of BasePtr that can be updated to use Ptr
+  for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+    unsigned OffsetIdx = 1;
+    if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+      OffsetIdx = 0;
+    assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+           BasePtr.getNode() && "Expected BasePtr operand");
+
+    APInt OV =
+      cast<ConstantSDNode>(Offset)->getAPIntValue();
+    if (AM == ISD::PRE_DEC)
+      OV = -OV;
+
+    ConstantSDNode *CN =
+      cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+    APInt CNV = CN->getAPIntValue();
+    if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
+      CNV += OV;
+    else
+      CNV -= OV;
+
+    SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
+    SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
+    if (OffsetIdx == 0)
+      std::swap(NewOp1, NewOp2);
+
+    SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+                                 OtherUses[i]->getDebugLoc(),
+                                 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+    removeFromWorkList(OtherUses[i]);
+    DAG.DeleteNode(OtherUses[i]);
+  }
+
   // Replace the uses of Ptr with uses of the updated base value.
   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   removeFromWorkList(Ptr.getNode());
@@ -7123,12 +7344,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   // Try to infer better alignment information than the load already has.
   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > LD->getAlignment())
-        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+      if (Align > LD->getMemOperand()->getBaseAlignment()) {
+        SDValue NewLoad =
+               DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
                               LD->getValueType(0),
                               Chain, Ptr, LD->getPointerInfo(),
                               LD->getMemoryVT(),
                               LD->isVolatile(), LD->isNonTemporal(), Align);
+        return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+      }
     }
   }
 
@@ -7386,7 +7610,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     // start at the previous one.
     if (ShAmt % NewBW)
       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
-    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+                                   std::min(BitWidth, ShAmt + NewBW));
     if ((Imm & Mask) == Imm) {
       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
       if (Opc == ISD::AND)
@@ -7486,16 +7711,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
-/// Returns the base pointer and an integer offset from that object.
-static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
-  if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
-    int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
-    SDValue Base = Ptr->getOperand(0);
-    return std::make_pair(Base, Offset);
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+///  (load (i64 add (i64 copyfromreg %c)
+///                 (i64 signextend (add (i8 load %index)
+///                                      (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
+///                                         (i32 1)))))
+struct BaseIndexOffset {
+  SDValue Base;
+  SDValue Index;
+  int64_t Offset;
+  bool IsIndexSignExt;
+
+  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+                  bool IsIndexSignExt) :
+    Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+  bool equalBaseIndex(const BaseIndexOffset &Other) {
+    return Other.Base == Base && Other.Index == Index &&
+      Other.IsIndexSignExt == IsIndexSignExt;
   }
 
-  return std::make_pair(Ptr, 0);
-}
+  /// Parses tree in Ptr for base, index, offset addresses.
+  static BaseIndexOffset match(SDValue Ptr) {
+    bool IsIndexSignExt = false;
+
+    // Just Base or possibly anything else.
+    if (Ptr->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // Base + offset.
+    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+                              IsIndexSignExt);
+    }
+
+    // Look at Base + Index + Offset cases.
+    SDValue Base = Ptr->getOperand(0);
+    SDValue IndexOffset = Ptr->getOperand(1);
+
+    // Skip signextends.
+    if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+      IndexOffset = IndexOffset->getOperand(0);
+      IsIndexSignExt = true;
+    }
+
+    // Either the case of Base + Index (no offset) or something else.
+    if (IndexOffset->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+    // Now we have the case of Base + Index + offset.
+    SDValue Index = IndexOffset->getOperand(0);
+    SDValue Offset = IndexOffset->getOperand(1);
+
+    if (!isa<ConstantSDNode>(Offset))
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // Ignore signextends.
+    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+      Index = Index->getOperand(0);
+      IsIndexSignExt = true;
+    } else IsIndexSignExt = false;
+
+    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+  }
+};
 
 /// Holds a pointer to an LSBaseSDNode as well as information on where it
 /// is located in a sequence of memory operations connected by a chain.
@@ -7522,6 +7813,8 @@ struct ConsecutiveMemoryChainSorter {
 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   EVT MemVT = St->getMemoryVT();
   int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+  bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
 
   // Don't merge vectors into wider inputs.
   if (MemVT.isVector() || !MemVT.isSimple())
@@ -7540,19 +7833,26 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
     return false;
 
-  // This holds the base pointer and the offset in bytes from the base pointer.
-  std::pair<SDValue, int64_t> BasePtr =
-      GetPointerBaseAndOffset(St->getBasePtr());
+  // This holds the base pointer, index, and the offset in bytes from the base
+  // pointer.
+  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
 
   // We must have a base and an offset.
-  if (!BasePtr.first.getNode())
+  if (!BasePtr.Base.getNode())
     return false;
 
   // Do not handle stores to undef base pointers.
-  if (BasePtr.first.getOpcode() == ISD::UNDEF)
+  if (BasePtr.Base.getOpcode() == ISD::UNDEF)
     return false;
 
+  // Save the LoadSDNodes that we find in the chain.
+  // We need to make sure that these nodes do not interfere with
+  // any of the store nodes.
+  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+
+  // Save the StoreSDNodes that we find in the chain.
   SmallVector<MemOpLink, 8> StoreNodes;
+
   // Walk up the chain and look for nodes with offsets from the same
   // base pointer. Stop when reaching an instruction with a different kind
   // or instruction which has a different base pointer.
@@ -7564,11 +7864,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       break;
 
     // Find the base pointer and offset for this memory node.
-    std::pair<SDValue, int64_t> Ptr =
-      GetPointerBaseAndOffset(Index->getBasePtr());
+    BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
 
     // Check that the base pointer is the same as the original one.
-    if (Ptr.first.getNode() != BasePtr.first.getNode())
+    if (!Ptr.equalBaseIndex(BasePtr))
       break;
 
     // Check that the alignment is the same.
@@ -7594,10 +7893,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       break;
 
     // We found a potential memory operand to merge.
-    StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
-
-    // Move up the chain to the next memory operation.
-    Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
+    StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
+
+    // Find the next memory operand in the chain. If the next operand in the
+    // chain is a store then move up and continue the scan with the next
+    // memory operand. If the next operand is a load save it and use alias
+    // information to check if it interferes with anything.
+    SDNode *NextInChain = Index->getChain().getNode();
+    while (1) {
+      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+        // We found a store node. Use it for the next iteration.
+        Index = STn;
+        break;
+      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+        // Save the load node for later. Continue the scan.
+        AliasLoadNodes.push_back(Ldn);
+        NextInChain = Ldn->getChain().getNode();
+        continue;
+      } else {
+        Index = NULL;
+        break;
+      }
+    }
   }
 
   // Check if there is anything to merge.
@@ -7612,9 +7929,25 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   // store memory address.
   unsigned LastConsecutiveStore = 0;
   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
-  for (unsigned i=1; i<StoreNodes.size(); ++i) {
-    int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
-    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
+
+    // Check that the addresses are consecutive starting from the second
+    // element in the list of stores.
+    if (i > 0) {
+      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+        break;
+    }
+
+    bool Alias = false;
+    // Check if this store interferes with any of the loads that we found.
+    for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
+      if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
+        Alias = true;
+        break;
+      }
+    // We found a load that alias with this store. Stop the sequence.
+    if (Alias)
       break;
 
     // Mark this node as useful.
@@ -7647,6 +7980,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
       if (TLI.isTypeLegal(StoreTy))
         LastLegalType = i+1;
+      // Or check whether a truncstore is legal.
+      else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+               TargetLowering::TypePromoteInteger) {
+        EVT LegalizedStoredValueTy =
+          TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
+          LastLegalType = i+1;
+      }
 
       // Find a legal type for the vector store.
       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
@@ -7654,15 +7995,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
         LastLegalVectorType = i + 1;
     }
 
-    // We only use vectors if the constant is known to be zero.
-    if (NonZero)
+    // We only use vectors if the constant is known to be zero and the
+    // function is not marked with the noimplicitfloat attribute.
+    if (NonZero || NoVectors)
       LastLegalVectorType = 0;
 
     // Check if we found a legal integer type to store.
     if (LastLegalType == 0 && LastLegalVectorType == 0)
       return false;
 
-    bool UseVector = LastLegalVectorType > LastLegalType;
+    bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
 
     // Make sure we have something to merge.
@@ -7756,7 +8098,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
   // Find acceptable loads. Loads need to have the same chain (token factor),
   // must not be zext, volatile, indexed, and they must be consecutive.
-  SDValue LdBasePtr;
+  BaseIndexOffset LdBasePtr;
   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
@@ -7782,21 +8124,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (Ld->getMemoryVT() != MemVT)
       break;
 
-    std::pair<SDValue, int64_t> LdPtr =
-    GetPointerBaseAndOffset(Ld->getBasePtr());
-
+    BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
     // If this is not the first ptr that we check.
-    if (LdBasePtr.getNode()) {
+    if (LdBasePtr.Base.getNode()) {
       // The base ptr must be the same.
-      if (LdPtr.first != LdBasePtr)
+      if (!LdPtr.equalBaseIndex(LdBasePtr))
         break;
     } else {
       // Check that all other base pointers are the same as this one.
-      LdBasePtr = LdPtr.first;
+      LdBasePtr = LdPtr;
     }
 
     // We found a potential memory operand to merge.
-    LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+    LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
   }
 
   if (LoadNodes.size() < 2)
@@ -7815,7 +8155,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     // All loads much share the same chain.
     if (LoadNodes[i].MemNode->getChain() != FirstChain)
       break;
-    
+
     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
       break;
@@ -7831,11 +8171,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
     if (TLI.isTypeLegal(StoreTy))
       LastLegalIntegerType = i + 1;
+    // Or check whether a truncstore and extload is legal.
+    else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+             TargetLowering::TypePromoteInteger) {
+      EVT LegalizedStoredValueTy =
+        TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+      if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+          TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) &&
+          TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) &&
+          TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy))
+        LastLegalIntegerType = i+1;
+    }
   }
 
   // Only use vector types if the vector type is larger than the integer type.
   // If they are the same, use integers.
-  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType;
+  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
 
   // We add +1 here because the LastXXX variables refer to location while
@@ -8116,8 +8467,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // Only perform this optimization before the types are legal, because we
   // don't want to perform this optimization on every DAGCombine invocation.
-  if (!LegalTypes && MergeConsecutiveStores(ST))
-    return SDValue(N, 0);
+  if (!LegalTypes) {
+    bool EverChanged = false;
+
+    do {
+      // There can be multiple store sequences on the same chain.
+      // Keep trying to merge store sequences until we are unable to do so
+      // or until we merge the last store on the chain.
+      bool Changed = MergeConsecutiveStores(ST);
+      EverChanged |= Changed;
+      if (!Changed) break;
+    } while (ST->getOpcode() != ISD::DELETED_NODE);
+
+    if (EverChanged)
+      return SDValue(N, 0);
+  }
 
   return ReduceLoadOpStoreWidth(N);
 }
@@ -8514,11 +8878,8 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
     if (Opcode == ISD::DELETED_NODE &&
         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
       Opcode = Opc;
-      // If not supported by target, bail out.
-      if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
-          TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
-        return SDValue();
     }
+
     if (Opc != Opcode)
       return SDValue();
 
@@ -8543,6 +8904,10 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   assert(SrcVT != MVT::Other && "Cannot determine source type!");
 
   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+
+  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
+    return SDValue();
+
   SmallVector<SDValue, 8> Opnds;
   for (unsigned i = 0; i != NumInScalars; ++i) {
     SDValue In = N->getOperand(i);
@@ -8707,12 +9072,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   EVT NVT = N->getValueType(0);
   SDValue V = N->getOperand(0);
 
+  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+    // Combine:
+    //    (extract_subvec (concat V1, V2, ...), i)
+    // Into:
+    //    Vi if possible
+    // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+    if (V->getOperand(0).getValueType() != NVT)
+      return SDValue();
+    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned NumElems = NVT.getVectorNumElements();
+    assert((Idx % NumElems) == 0 &&
+           "IDX in concat is not a multiple of the result vector length.");
+    return V->getOperand(Idx / NumElems);
+  }
+
+  // Skip bitcasting
+  if (V->getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+
   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+    DebugLoc dl = N->getDebugLoc();
     // Handle only simple case where vector being inserted and vector
     // being extracted are of same type, and are half size of larger vectors.
     EVT BigVT = V->getOperand(0).getValueType();
     EVT SmallVT = V->getOperand(1).getValueType();
-    if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+    if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
       return SDValue();
 
     // Only handle cases where both indexes are constants with the same type.
@@ -8725,30 +9110,18 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       // Combine:
       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
       // Into:
-      //    indices are equal => V1
+      //    indices are equal or bit offsets are equal => V1
       //    otherwise => (extract_subvec V1, ExtIdx)
-      if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
-        return V->getOperand(1);
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
-                         V->getOperand(0), N->getOperand(1));
+      if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+          ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+        return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
+                         DAG.getNode(ISD::BITCAST, dl,
+                                     N->getOperand(0).getValueType(),
+                                     V->getOperand(0)), N->getOperand(1));
     }
   }
 
-  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
-    // Combine:
-    //    (extract_subvec (concat V1, V2, ...), i)
-    // Into:
-    //    Vi if possible
-    // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
-    if (V->getOperand(0).getValueType() != NVT)
-      return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-    unsigned NumElems = NVT.getVectorNumElements();
-    assert((Idx % NumElems) == 0 &&
-           "IDX in concat is not a multiple of the result vector length.");
-    return V->getOperand(Idx / NumElems);
-  }
-
   return SDValue();
 }
 
@@ -8992,11 +9365,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
 
 /// SimplifyVBinOp - Visit a binary vector operation, like ADD.
 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
-  // After legalize, the target may be depending on adds and other
-  // binary ops to provide legal ways to construct constants or other
-  // things. Simplifying them may result in a loss of legality.
-  if (LegalOperations) return SDValue();
-
   assert(N->getValueType(0).isVector() &&
          "SimplifyVBinOp only works on vectors!");
 
@@ -9066,11 +9434,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
 
 /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
 SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
-  // After legalize, the target may be depending on adds and other
-  // binary ops to provide legal ways to construct constants or other
-  // things. Simplifying them may result in a loss of legality.
-  if (LegalOperations) return SDValue();
-
   assert(N->getValueType(0).isVector() &&
          "SimplifyVUnaryOp only works on vectors!");
 
@@ -9173,7 +9536,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
         // src value info, don't do the transformation if the memory
         // locations are not in the default address space.
         LLD->getPointerInfo().getAddrSpace() != 0 ||
-        RLD->getPointerInfo().getAddrSpace() != 0)
+        RLD->getPointerInfo().getAddrSpace() != 0 ||
+        !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+                                      LLD->getBasePtr().getValueType()))
       return false;
 
     // Check that the select condition doesn't reach either load.  If so,
@@ -9537,7 +9902,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
                                    SDValue N1, ISD::CondCode Cond,
                                    DebugLoc DL, bool foldBooleans) {
   TargetLowering::DAGCombinerInfo
-    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+    DagCombineInfo(DAG, Level, false, this);
   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
 }
 
@@ -9680,6 +10045,23 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   return true;
 }
 
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
+  SDValue Ptr0, Ptr1;
+  int64_t Size0, Size1;
+  const Value *SrcValue0, *SrcValue1;
+  int SrcValueOffset0, SrcValueOffset1;
+  unsigned SrcValueAlign0, SrcValueAlign1;
+  const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
+  FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+                SrcValueAlign0, SrcTBAAInfo0);
+  FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+                SrcValueAlign1, SrcTBAAInfo1);
+  return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+                 SrcValueAlign0, SrcTBAAInfo0,
+                 Ptr1, Size1, SrcValue1, SrcValueOffset1,
+                 SrcValueAlign1, SrcTBAAInfo1);
+}
+
 /// FindAliasInfo - Extracts the relevant alias information from the memory
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4854cf7b261f..9ac738e50726 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,27 +40,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
@@ -87,6 +87,27 @@ void FastISel::startNewBlock() {
   LastLocalValue = EmitStartPt;
 }
 
+bool FastISel::LowerArguments() {
+  if (!FuncInfo.CanLowerReturn)
+    // Fallback to SDISel argument lowering code to deal with sret pointer
+    // parameter.
+    return false;
+  
+  if (!FastLowerArguments())
+    return false;
+
+  // Enter non-dead arguments into ValueMap for uses in non-entry BBs.
+  for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+         E = FuncInfo.Fn->arg_end(); I != E; ++I) {
+    if (!I->use_empty()) {
+      DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+      assert(VI != LocalValueMap.end() && "Missed an argument?");
+      FuncInfo.ValueMap[I] = VI->second;
+    }
+  }
+  return true;
+}
+
 void FastISel::flushLocalValueMap() {
   LocalValueMap.clear();
   LastLocalValue = EmitStartPt;
@@ -675,6 +696,13 @@ bool FastISel::SelectCall(const User *I) {
     UpdateValueMap(Call, ResultReg);
     return true;
   }
+  case Intrinsic::expect: {
+    unsigned ResultReg = getRegForValue(Call->getArgOperand(0));
+    if (ResultReg == 0)
+      return false;
+    UpdateValueMap(Call, ResultReg);
+    return true;
+  }
   }
 
   // Usually, it does not make sense to initialize a value,
@@ -684,7 +712,7 @@ bool FastISel::SelectCall(const User *I) {
   // all the values which have already been materialized,
   // appear after the call. It also makes sense to skip intrinsics
   // since they tend to be inlined.
-  if (!isa<IntrinsicInst>(F))
+  if (!isa<IntrinsicInst>(Call))
     flushLocalValueMap();
 
   // An arbitrary call. Bail.
@@ -737,15 +765,15 @@ bool FastISel::SelectBitCast(const User *I) {
   }
 
   // Bitcasts of other values become reg-reg copies or BITCAST operators.
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  EVT DstVT = TLI.getValueType(I->getType());
-
-  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
-      DstVT == MVT::Other || !DstVT.isSimple() ||
-      !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+  EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstEVT = TLI.getValueType(I->getType());
+  if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+      !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
     // Unhandled type. Halt "fast" selection and bail.
     return false;
 
+  MVT SrcVT = SrcEVT.getSimpleVT();
+  MVT DstVT = DstEVT.getSimpleVT();
   unsigned Op0 = getRegForValue(I->getOperand(0));
   if (Op0 == 0)
     // Unhandled operand. Halt "fast" selection and bail.
@@ -755,7 +783,7 @@ bool FastISel::SelectBitCast(const User *I) {
 
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
-  if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+  if (SrcVT == DstVT) {
     const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
     const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
     // Don't attempt a cross-class copy. It will likely fail.
@@ -768,8 +796,7 @@ bool FastISel::SelectBitCast(const User *I) {
 
   // If the reg-reg copy failed, select a BITCAST opcode.
   if (!ResultReg)
-    ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
-                           ISD::BITCAST, Op0, Op0IsKill);
+    ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
 
   if (!ResultReg)
     return false;
@@ -837,7 +864,8 @@ FastISel::SelectInstruction(const Instruction *I) {
 void
 FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
 
-  if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+  if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+      FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
     // For more accurate line information if this is the only instruction
     // in the block then emit it, otherwise we have the unconditional
     // fall-through case, which needs no instructions.
@@ -1068,6 +1096,10 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo,
 
 FastISel::~FastISel() {}
 
+bool FastISel::FastLowerArguments() {
+  return false;
+}
+
 unsigned FastISel::FastEmit_(MVT, MVT,
                              unsigned) {
   return 0;
@@ -1151,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
     IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(),
                                               VT.getSizeInBits());
     MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+    assert (MaterialReg != 0 && "Unable to materialize imm.");
+    if (MaterialReg == 0) return 0;
   }
   return FastEmit_rr(VT, VT, Opcode,
                      Op0, Op0IsKill,
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index a4182906cbf4..b46edad7a3d4 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,29 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "function-lowering-info"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -66,8 +66,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(Fn->getReturnType(),
-                Fn->getAttributes().getRetAttributes(), Outs, TLI);
+  GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI);
   CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
                                       Fn->isVarArg(),
                                       Outs, Fn->getContext());
@@ -208,7 +207,7 @@ void FunctionLoweringInfo::clear() {
 }
 
 /// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
   return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
 }
 
@@ -226,7 +225,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
   unsigned FirstReg = 0;
   for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
     EVT ValueVT = ValueVTs[Value];
-    EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+    MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
 
     unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a8381b25ba12..3b1abd7c836e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -16,18 +16,18 @@
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
 #include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 /// MinRCSize - Smallest register class we allow when constraining virtual
@@ -99,7 +99,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   // the CopyToReg'd destination register instead of creating a new vreg.
   bool MatchReg = true;
   const TargetRegisterClass *UseRC = NULL;
-  EVT VT = Node->getValueType(ResNo);
+  MVT VT = Node->getSimpleValueType(ResNo);
 
   // Stick to the preferred register classes for legal types.
   if (TLI->isTypeLegal(VT))
@@ -124,7 +124,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
           SDValue Op = User->getOperand(i);
           if (Op.getNode() != Node || Op.getResNo() != ResNo)
             continue;
-          EVT VT = Node->getValueType(Op.getResNo());
+          MVT VT = Node->getSimpleValueType(Op.getResNo());
           if (VT == MVT::Other || VT == MVT::Glue)
             continue;
           Match = false;
@@ -203,7 +203,8 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
   return 0;
 }
 
-void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+                                       MachineInstrBuilder &MIB,
                                        const MCInstrDesc &II,
                                        bool IsClone, bool IsCloned,
                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
@@ -222,7 +223,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
       unsigned NumResults = CountResults(Node);
       VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
       assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
-      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+      MIB.addReg(VRBase, RegState::Define);
     }
 
     if (!VRBase && !IsClone && !IsCloned)
@@ -237,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
             const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
             if (RegRC == RC) {
               VRBase = Reg;
-              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              MIB.addReg(VRBase, RegState::Define);
               break;
             }
           }
@@ -249,7 +250,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
     if (VRBase == 0) {
       assert(RC && "Isn't a register operand!");
       VRBase = MRI->createVirtualRegister(RC);
-      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+      MIB.addReg(VRBase, RegState::Define);
     }
 
     SDValue Op(Node, i);
@@ -272,7 +273,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
-      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      const TargetRegisterClass *RC =
+        TLI->getRegClassFor(Op.getSimpleValueType());
       VReg = MRI->createVirtualRegister(RC);
     }
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
@@ -290,7 +292,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
 /// specified machine instr. Insert register copies if the register is
 /// not in the required register class.
 void
-InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+                                 SDValue Op,
                                  unsigned IIOpNum,
                                  const MCInstrDesc *II,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
@@ -302,7 +305,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
   unsigned VReg = getVR(Op, VRBaseMap);
   assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
 
-  const MCInstrDesc &MCID = MI->getDesc();
+  const MCInstrDesc &MCID = MIB->getDesc();
   bool isOptDef = IIOpNum < MCID.getNumOperands() &&
     MCID.OpInfo[IIOpNum].isOptionalDef();
 
@@ -334,56 +337,53 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                 !IsDebug &&
                 !(IsClone || IsCloned);
   if (isKill) {
-    unsigned Idx = MI->getNumOperands();
+    unsigned Idx = MIB->getNumOperands();
     while (Idx > 0 &&
-           MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+           MIB->getOperand(Idx-1).isReg() &&
+           MIB->getOperand(Idx-1).isImplicit())
       --Idx;
-    bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+    bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
     if (isTied)
       isKill = false;
   }
 
-  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
-                                           false/*isImp*/, isKill,
-                                           false/*isDead*/, false/*isUndef*/,
-                                           false/*isEarlyClobber*/,
-                                           0/*SubReg*/, IsDebug));
+  MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+             getDebugRegState(IsDebug));
 }
 
 /// AddOperand - Add the specified operand to the specified machine instr.  II
 /// specifies the instruction information for the node, and IIOpNum is the
 /// operand number (in the II) that we are adding.
-void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+                              SDValue Op,
                               unsigned IIOpNum,
                               const MCInstrDesc *II,
                               DenseMap<SDValue, unsigned> &VRBaseMap,
                               bool IsDebug, bool IsClone, bool IsCloned) {
   if (Op.isMachineOpcode()) {
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+    AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
                        IsDebug, IsClone, IsCloned);
   } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+    MIB.addImm(C->getSExtValue());
   } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
-    const ConstantFP *CFP = F->getConstantFPValue();
-    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+    MIB.addFPImm(F->getConstantFPValue());
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     // Turn additional physreg operands into implicit uses on non-variadic
     // instructions. This is used by call and return instructions passing
     // arguments in registers.
     bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
-    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp));
+    MIB.addReg(R->getReg(), getImplRegState(Imp));
   } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
+    MIB.addRegMask(RM->getRegMask());
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
-                                            TGA->getTargetFlags()));
+    MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+                         TGA->getTargetFlags());
   } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+    MIB.addMBB(BBNode->getBasicBlock());
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+    MIB.addFrameIndex(FI->getIndex());
   } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
-                                             JT->getTargetFlags()));
+    MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
   } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
     int Offset = CP->getOffset();
     unsigned Align = CP->getAlignment();
@@ -403,30 +403,26 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
       Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
     else
       Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
-    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
-                                             CP->getTargetFlags()));
+    MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
   } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
-                                            ES->getTargetFlags()));
+    MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
-                                            BA->getOffset(),
-                                            BA->getTargetFlags()));
+    MIB.addBlockAddress(BA->getBlockAddress(),
+                        BA->getOffset(),
+                        BA->getTargetFlags());
   } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
-                                                     TI->getOffset(),
-                                                     TI->getTargetFlags()));
+    MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Glue &&
            "Chain and glue operands should occur at end of operand list!");
-    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+    AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
                        IsDebug, IsClone, IsCloned);
   }
 }
 
 unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                                          EVT VT, DebugLoc DL) {
+                                          MVT VT, DebugLoc DL) {
   const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
   const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
 
@@ -477,7 +473,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     // constraints on the %dst register, COPY can target all legal register
     // classes.
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
-    const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0));
+    const TargetRegisterClass *TRC =
+      TLI->getRegClassFor(Node->getSimpleValueType(0));
 
     unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
     MachineInstr *DefMI = MRI->getVRegDef(VReg);
@@ -500,7 +497,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       // constrain its register class or issue a COPY to a compatible register
       // class.
       VReg = ConstrainForSubReg(VReg, SubIdx,
-                                Node->getOperand(0).getValueType(),
+                                Node->getOperand(0).getSimpleValueType(),
                                 Node->getDebugLoc());
 
       // Create the destreg if it is missing.
@@ -532,7 +529,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     //
     // There is no constraint on the %src register class.
     //
-    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0));
+    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
     SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
     assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
 
@@ -540,22 +537,22 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       VRBase = MRI->createVirtualRegister(SRC);
 
     // Create the insert_subreg or subreg_to_reg machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
-    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    MachineInstrBuilder MIB =
+      BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
 
     // If creating a subreg_to_reg, then the first input operand
     // is an implicit value immediate, otherwise it's a register
     if (Opc == TargetOpcode::SUBREG_TO_REG) {
       const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
-      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+      MIB.addImm(SD->getZExtValue());
     } else
-      AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+      AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
                  IsClone, IsCloned);
     // Add the subregster being inserted
-    AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+    AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
                IsClone, IsCloned);
-    MI->addOperand(MachineOperand::CreateImm(SubIdx));
-    MBB->insert(InsertPos, MI);
+    MIB.addImm(SubIdx);
+    MBB->insert(InsertPos, MIB);
   } else
     llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
 
@@ -596,12 +593,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
   const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
   unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
-  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                             TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
   unsigned NumOps = Node->getNumOperands();
   assert((NumOps & 1) == 1 &&
          "REG_SEQUENCE must have an odd number of operands!");
-  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
   for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
     if ((i & 1) == 0) {
@@ -620,11 +616,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
         }
       }
     }
-    AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+    AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
                IsClone, IsCloned);
   }
 
-  MBB->insert(InsertPos, MI);
+  MBB->insert(InsertPos, MIB);
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
   (void)isNew; // Silence compiler warning.
@@ -661,7 +657,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     if (I==VRBaseMap.end())
       MIB.addReg(0U);       // undef
     else
-      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+      AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
                  /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
   } else if (SD->getKind() == SDDbgValue::CONST) {
     const Value *V = SD->getConst();
@@ -737,12 +733,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 #endif
 
   // Create the new machine instruction.
-  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+  MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
 
   // Add result register values for things that are defined by this
   // instruction.
   if (NumResults)
-    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+    CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
 
   // Emit all of the actual operands of this instruction, adding them to the
   // instruction as appropriate.
@@ -751,17 +747,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
          "Unable to cope with optional defs and phys regs defs!");
   unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
   for (unsigned i = NumSkip; i != NodeOperands; ++i)
-    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+    AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
                VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
 
   // Transfer all of the memory reference descriptions of this instruction.
-  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+  MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
                  cast<MachineSDNode>(Node)->memoperands_end());
 
   // Insert the instruction into position in the block. This needs to
   // happen before any custom inserter hook is called so that the
   // hook knows where in the block to insert the replacement code.
-  MBB->insert(InsertPos, MI);
+  MBB->insert(InsertPos, MIB);
 
   // The MachineInstr may also define physregs instead of virtregs.  These
   // physreg values can reach other instructions in different ways:
@@ -819,13 +815,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
 
   // Finally mark unused registers as dead.
   if (!UsedRegs.empty() || II.getImplicitDefs())
-    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+    MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
   // Run post-isel target hook to adjust this instruction if needed.
 #ifdef NDEBUG
   if (II.hasPostISelHook())
 #endif
-    TLI->AdjustInstrPostInstrSelection(MI, Node);
+    TLI->AdjustInstrPostInstrSelection(MIB, Node);
 }
 
 /// EmitSpecialNode - Generate machine code for a target-independent node and
@@ -889,20 +885,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       --NumOps;  // Ignore the glue operand.
 
     // Create the inline asm machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                               TII->get(TargetOpcode::INLINEASM));
+    MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
+                                      TII->get(TargetOpcode::INLINEASM));
 
     // Add the asm string as an external symbol operand.
     SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
     const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
-    MI->addOperand(MachineOperand::CreateES(AsmStr));
+    MIB.addExternalSymbol(AsmStr);
 
     // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
     // bits.
     int64_t ExtraInfo =
       cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
                           getZExtValue();
-    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+    MIB.addImm(ExtraInfo);
 
     // Remember to operand index of the group flags.
     SmallVector<unsigned, 8> GroupIdx;
@@ -913,8 +909,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
       const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
 
-      GroupIdx.push_back(MI->getNumOperands());
-      MI->addOperand(MachineOperand::CreateImm(Flags));
+      GroupIdx.push_back(MIB->getNumOperands());
+      MIB.addImm(Flags);
       ++i;  // Skip the ID value.
 
       switch (InlineAsm::getKind(Flags)) {
@@ -925,20 +921,16 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
           // FIXME: Add dead flags for physical and virtual registers defined.
           // For now, mark physical register defs as implicit to help fast
           // regalloc. This makes inline asm look a lot like calls.
-          MI->addOperand(MachineOperand::CreateReg(Reg, true,
-                       /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
+          MIB.addReg(Reg, RegState::Define |
+                  getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
         }
         break;
       case InlineAsm::Kind_RegDefEarlyClobber:
       case InlineAsm::Kind_Clobber:
         for (unsigned j = 0; j != NumVals; ++j, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
-          MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
-                         /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
-                                                   /*isKill=*/ false,
-                                                   /*isDead=*/ false,
-                                                   /*isUndef=*/false,
-                                                   /*isEarlyClobber=*/ true));
+          MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+                  getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
         }
         break;
       case InlineAsm::Kind_RegUse:  // Use of register.
@@ -947,7 +939,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         // The addressing mode has been selected, just add all of the
         // operands to the machine instruction.
         for (unsigned j = 0; j != NumVals; ++j, ++i)
-          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+          AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap,
                      /*IsDebug=*/false, IsClone, IsCloned);
 
         // Manually set isTied bits.
@@ -957,7 +949,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
             unsigned DefIdx = GroupIdx[DefGroup] + 1;
             unsigned UseIdx = GroupIdx.back() + 1;
             for (unsigned j = 0; j != NumVals; ++j)
-              MI->tieOperands(DefIdx + j, UseIdx + j);
+              MIB->tieOperands(DefIdx + j, UseIdx + j);
           }
         }
         break;
@@ -968,9 +960,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
     const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
     if (MD)
-      MI->addOperand(MachineOperand::CreateMetadata(MD));
+      MIB.addMetadata(MD);
 
-    MBB->insert(InsertPos, MI);
+    MBB->insert(InsertPos, MIB);
     break;
   }
   }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 9eddee9e33d3..a9c2203e8400 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -16,12 +16,13 @@
 #ifndef INSTREMITTER_H
 #define INSTREMITTER_H
 
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 
 namespace llvm {
 
+class MachineInstrBuilder;
 class MCInstrDesc;
 class SDDbgValue;
 
@@ -48,7 +49,8 @@ class InstrEmitter {
   unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
                                     unsigned ResNo) const;
 
-  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+  void CreateVirtualRegisters(SDNode *Node,
+                              MachineInstrBuilder &MIB,
                               const MCInstrDesc &II,
                               bool IsClone, bool IsCloned,
                               DenseMap<SDValue, unsigned> &VRBaseMap);
@@ -61,7 +63,8 @@ class InstrEmitter {
   /// AddRegisterOperand - Add the specified register as an operand to the
   /// specified machine instr. Insert register copies if the register is
   /// not in the required register class.
-  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+  void AddRegisterOperand(MachineInstrBuilder &MIB,
+                          SDValue Op,
                           unsigned IIOpNum,
                           const MCInstrDesc *II,
                           DenseMap<SDValue, unsigned> &VRBaseMap,
@@ -71,7 +74,8 @@ class InstrEmitter {
   /// specifies the instruction information for the node, and IIOpNum is the
   /// operand number (in the II) that we are adding. IIOpNum and II are used for
   /// assertions only.
-  void AddOperand(MachineInstr *MI, SDValue Op,
+  void AddOperand(MachineInstrBuilder &MIB,
+                  SDValue Op,
                   unsigned IIOpNum,
                   const MCInstrDesc *II,
                   DenseMap<SDValue, unsigned> &VRBaseMap,
@@ -81,7 +85,7 @@ class InstrEmitter {
   /// supports SubIdx sub-registers.  Emit a copy if that isn't possible.
   /// Return the virtual register to use.
   unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                              EVT VT, DebugLoc DL);
+                              MVT VT, DebugLoc DL);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index abf40b77a18f..51cc254b2c82 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,26 +11,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -101,6 +102,7 @@ private:
                                                  SDNode *Node, bool isSigned);
   SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
                           RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+                          RTLIB::Libcall Call_F128,
                           RTLIB::Libcall Call_PPCF128);
   SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
                            RTLIB::Libcall Call_I8,
@@ -109,6 +111,7 @@ private:
                            RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
   void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -321,7 +324,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     // Do a (aligned) store to a stack slot, then copy from the stack slot
     // to the final destination using (unaligned) integer loads and stores.
     EVT StoredVT = ST->getMemoryVT();
-    EVT RegVT =
+    MVT RegVT =
       TLI.getRegisterType(*DAG.getContext(),
                           EVT::getIntegerVT(*DAG.getContext(),
                                             StoredVT.getSizeInBits()));
@@ -447,7 +450,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
 
     // Copy the value to a (aligned) stack slot using (unaligned) integer
     // loads and stores, then do a (aligned) load from the stack slot.
-    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+    MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
     unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
     unsigned RegBytes = RegVT.getSizeInBits() / 8;
     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
@@ -710,7 +713,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
 
       {
         SDValue Value = ST->getValue();
-        EVT VT = Value.getValueType();
+        MVT VT = Value.getSimpleValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
         default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
@@ -731,9 +734,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           return;
         }
         case TargetLowering::Promote: {
-          assert(VT.isVector() && "Unknown legal promote case!");
-          Value = DAG.getNode(ISD::BITCAST, dl,
-                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
+          MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+          assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+                 "Can only promote stores to same size type");
+          Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
           SDValue Result =
             DAG.getStore(Chain, dl, Value, Ptr,
                          ST->getPointerInfo(), isVolatile,
@@ -817,7 +821,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
         ReplaceNode(SDValue(Node, 0), Result);
       } else {
-        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+        switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
+                                        StVT.getSimpleVT())) {
         default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
@@ -862,7 +867,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
   ISD::LoadExtType ExtType = LD->getExtensionType();
   if (ExtType == ISD::NON_EXTLOAD) {
-    EVT VT = Node->getValueType(0);
+    MVT VT = Node->getSimpleValueType(0);
     SDValue RVal = SDValue(Node, 0);
     SDValue RChain = SDValue(Node, 1);
 
@@ -889,10 +894,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       break;
     }
     case TargetLowering::Promote: {
-      // Only promote a load of vector type to another.
-      assert(VT.isVector() && "Cannot promote this load!");
-      // Change base type to a different vector type.
-      EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+      MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+      assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+             "Can only promote loads to same size type");
 
       SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
                          LD->isVolatile(), LD->isNonTemporal(),
@@ -1037,7 +1041,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     Chain = Ch;
   } else {
     bool isCustom = false;
-    switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+    switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
              isCustom = true;
@@ -1184,7 +1188,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
                          Node->getOpcode() == ISD::SETCC ? 2 : 1;
     unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
-    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
     ISD::CondCode CCCode =
         cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
     Action = TLI.getCondCodeAction(CCCode, OpVT);
@@ -1591,7 +1595,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
                                                  SDValue &LHS, SDValue &RHS,
                                                  SDValue &CC,
                                                  DebugLoc dl) {
-  EVT OpVT = LHS.getValueType();
+  MVT OpVT = LHS.getSimpleValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
   default: llvm_unreachable("Unknown condition code action!");
@@ -1869,7 +1873,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   // isTailCall may be true since the callee does not reference caller stack
   // frame. Check if it's in the right position.
   SDValue TCChain = InChain;
-  bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+  bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain);
   if (isTailCall)
     InChain = TCChain;
 
@@ -1956,6 +1960,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_F32,
                                               RTLIB::Libcall Call_F64,
                                               RTLIB::Libcall Call_F80,
+                                              RTLIB::Libcall Call_F128,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
@@ -1963,6 +1968,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
+  case MVT::f128: LC = Call_F128; break;
   case MVT::ppcf128: LC = Call_PPCF128; break;
   }
   return ExpandLibCall(LC, Node, false);
@@ -2091,6 +2097,120 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
   Results.push_back(Rem);
 }
 
+/// isSinCosLibcallAvailable - Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::f32:     LC = RTLIB::SINCOS_F32; break;
+  case MVT::f64:     LC = RTLIB::SINCOS_F64; break;
+  case MVT::f80:     LC = RTLIB::SINCOS_F80; break;
+  case MVT::f128:    LC = RTLIB::SINCOS_F128; break;
+  case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+  }
+  return TLI.getLibcallName(LC) != 0;
+}
+
+/// canCombineSinCosLibcall - Return true if sincos libcall is available and
+/// can be used to combine sin and cos.
+static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
+                                    const TargetMachine &TM) {
+  if (!isSinCosLibcallAvailable(Node, TLI))
+    return false;
+  // GNU sin/cos functions set errno while sincos does not. Therefore
+  // combining sin and cos is only safe if unsafe-fpmath is enabled.
+  bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU;
+  if (isGNU && !TM.Options.UnsafeFPMath)
+    return false;
+  return true;
+}
+
+/// useSinCos - Only issue sincos libcall if both sin and cos are
+/// needed.
+static bool useSinCos(SDNode *Node) {
+  unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+    ? ISD::FCOS : ISD::FSIN;
+  
+  SDValue Op0 = Node->getOperand(0);
+  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+       UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == Node)
+      continue;
+    // The other user might have been turned into sincos already.
+    if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+      return true;
+  }
+  return false;
+}
+
+/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos
+/// pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::f32:     LC = RTLIB::SINCOS_F32; break;
+  case MVT::f64:     LC = RTLIB::SINCOS_F64; break;
+  case MVT::f80:     LC = RTLIB::SINCOS_F80; break;
+  case MVT::f128:    LC = RTLIB::SINCOS_F128; break;
+  case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+  }
+  
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+  
+  EVT RetVT = Node->getValueType(0);
+  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  
+  // Pass the argument.
+  Entry.Node = Node->getOperand(0);
+  Entry.Ty = RetTy;
+  Entry.isSExt = false;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+  
+  // Pass the return address of sin.
+  SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+  Entry.Node = SinPtr;
+  Entry.Ty = RetTy->getPointerTo();
+  Entry.isSExt = false;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+  
+  // Also pass the return address of the cos.
+  SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+  Entry.Node = CosPtr;
+  Entry.Ty = RetTy->getPointerTo();
+  Entry.isSExt = false;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+  
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+  
+  DebugLoc dl = Node->getDebugLoc();
+  TargetLowering::
+  CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
+                       false, false, false, false,
+                       0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                       /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                       Callee, Args, DAG, dl);
+  std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+  Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
+                                MachinePointerInfo(), false, false, false, 0));
+  Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr,
+                                MachinePointerInfo(), false, false, false, 0));
+}
+
 /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
 /// INT_TO_FP operation of the specified operand when the target requests that
 /// we expand it.  At this point, we know that the result and operand types are
@@ -2419,18 +2539,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   }
 }
 
-/// SplatByte - Distribute ByteVal over NumBits bits.
-// FIXME: Move this helper to a common place.
-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
-  APInt Val = APInt(NumBits, ByteVal);
-  unsigned Shift = 8;
-  for (unsigned i = NumBits; i > 8; i >>= 1) {
-    Val = (Val << Shift) | Val;
-    Shift <<= 1;
-  }
-  return Val;
-}
-
 /// ExpandBitCount - Expand the specified bitcount instruction into operations.
 ///
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2448,10 +2556,10 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     // This is the "best" algorithm from
     // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
 
-    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
-    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
-    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
-    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+    SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT);
+    SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT);
+    SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT);
+    SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT);
 
     // v = v - ((v >> 1) & 0x55555555...)
     Op = DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2801,7 +2909,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue True, False;
     EVT VT =  Node->getOperand(0).getValueType();
     EVT NVT = Node->getValueType(0);
-    APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+    APFloat apf(DAG.EVTToAPFloatSemantics(VT),
+                APInt::getNullValue(VT.getSizeInBits()));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
     (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
     Tmp1 = DAG.getConstantFP(apf, VT);
@@ -3032,77 +3141,114 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::FSQRT:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
-                                      RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+                                      RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+                                      RTLIB::SQRT_PPCF128));
     break;
   case ISD::FSIN:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
-                                      RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+  case ISD::FCOS: {
+    EVT VT = Node->getValueType(0);
+    bool isSIN = Node->getOpcode() == ISD::FSIN;
+    // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+    // fcos which share the same operand and both are used.
+    if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+         canCombineSinCosLibcall(Node, TLI, TM))
+        && useSinCos(Node)) {
+      SDVTList VTs = DAG.getVTList(VT, VT);
+      Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+      if (!isSIN)
+        Tmp1 = Tmp1.getValue(1);
+      Results.push_back(Tmp1);
+    } else if (isSIN) {
+      Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                        RTLIB::SIN_F80, RTLIB::SIN_F128,
+                                        RTLIB::SIN_PPCF128));
+    } else {
+      Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+                                        RTLIB::COS_F80, RTLIB::COS_F128,
+                                        RTLIB::COS_PPCF128));
+    }
     break;
-  case ISD::FCOS:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
-                                      RTLIB::COS_F80, RTLIB::COS_PPCF128));
+  }
+  case ISD::FSINCOS:
+    // Expand into sincos libcall.
+    ExpandSinCosLibCall(Node, Results);
     break;
   case ISD::FLOG:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
-                                      RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+                                      RTLIB::LOG_F80, RTLIB::LOG_F128,
+                                      RTLIB::LOG_PPCF128));
     break;
   case ISD::FLOG2:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
-                                      RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+                                      RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+                                      RTLIB::LOG2_PPCF128));
     break;
   case ISD::FLOG10:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
-                                      RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+                                      RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+                                      RTLIB::LOG10_PPCF128));
     break;
   case ISD::FEXP:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
-                                      RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+                                      RTLIB::EXP_F80, RTLIB::EXP_F128,
+                                      RTLIB::EXP_PPCF128));
     break;
   case ISD::FEXP2:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
-                                      RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+                                      RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+                                      RTLIB::EXP2_PPCF128));
     break;
   case ISD::FTRUNC:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
-                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+                                      RTLIB::TRUNC_PPCF128));
     break;
   case ISD::FFLOOR:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
-                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+                                      RTLIB::FLOOR_PPCF128));
     break;
   case ISD::FCEIL:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
-                                      RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+                                      RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+                                      RTLIB::CEIL_PPCF128));
     break;
   case ISD::FRINT:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
-                                      RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+                                      RTLIB::RINT_F80, RTLIB::RINT_F128,
+                                      RTLIB::RINT_PPCF128));
     break;
   case ISD::FNEARBYINT:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
                                       RTLIB::NEARBYINT_F64,
                                       RTLIB::NEARBYINT_F80,
+                                      RTLIB::NEARBYINT_F128,
                                       RTLIB::NEARBYINT_PPCF128));
     break;
   case ISD::FPOWI:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
-                                      RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+                                      RTLIB::POWI_F80, RTLIB::POWI_F128,
+                                      RTLIB::POWI_PPCF128));
     break;
   case ISD::FPOW:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
-                                      RTLIB::POW_F80, RTLIB::POW_PPCF128));
+                                      RTLIB::POW_F80, RTLIB::POW_F128,
+                                      RTLIB::POW_PPCF128));
     break;
   case ISD::FDIV:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
-                                      RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+                                      RTLIB::DIV_F80, RTLIB::DIV_F128,
+                                      RTLIB::DIV_PPCF128));
     break;
   case ISD::FREM:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
-                                      RTLIB::REM_F80, RTLIB::REM_PPCF128));
+                                      RTLIB::REM_F80, RTLIB::REM_F128,
+                                      RTLIB::REM_PPCF128));
     break;
   case ISD::FMA:
     Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
-                                      RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
+                                      RTLIB::FMA_PPCF128));
     break;
   case ISD::FP16_TO_FP32:
     Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
@@ -3158,7 +3304,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::UREM:
   case ISD::SREM: {
     EVT VT = Node->getValueType(0);
-    SDVTList VTs = DAG.getVTList(VT, VT);
     bool isSigned = Node->getOpcode() == ISD::SREM;
     unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
@@ -3169,6 +3314,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
          // If div is legal, it's better to do the normal expansion
          !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
          useDivRem(Node, isSigned, false))) {
+      SDVTList VTs = DAG.getVTList(VT, VT);
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
       // X % Y -> X-X/Y*Y
@@ -3486,8 +3632,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // Otherwise, SETCC for the given comparison type must be completely
     // illegal; expand it into a SELECT_CC.
     EVT VT = Node->getValueType(0);
+    int TrueValue;
+    switch (TLI.getBooleanContents(VT.isVector())) {
+    case TargetLowering::ZeroOrOneBooleanContent:
+    case TargetLowering::UndefinedBooleanContent:
+      TrueValue = 1;
+      break;
+    case TargetLowering::ZeroOrNegativeOneBooleanContent:
+      TrueValue = -1;
+      break;
+    }
     Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+                       DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT),
+                       Tmp3);
     Results.push_back(Tmp1);
     break;
   }
@@ -3575,13 +3732,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
 void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   SmallVector<SDValue, 8> Results;
-  EVT OVT = Node->getValueType(0);
+  MVT OVT = Node->getSimpleValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
       Node->getOpcode() == ISD::SINT_TO_FP ||
       Node->getOpcode() == ISD::SETCC) {
-    OVT = Node->getOperand(0).getValueType();
+    OVT = Node->getOperand(0).getSimpleValueType();
   }
-  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 92dc5a9831b6..de217d8571ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -29,11 +29,13 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
                                    RTLIB::Libcall Call_F32,
                                    RTLIB::Libcall Call_F64,
                                    RTLIB::Libcall Call_F80,
+                                   RTLIB::Libcall Call_F128,
                                    RTLIB::Libcall Call_PPCF128) {
   return
     VT == MVT::f32 ? Call_F32 :
     VT == MVT::f64 ? Call_F64 :
     VT == MVT::f80 ? Call_F80 :
+    VT == MVT::f128 ? Call_F128 :
     VT == MVT::ppcf128 ? Call_PPCF128 :
     RTLIB::UNKNOWN_LIBCALL;
 }
@@ -152,23 +154,25 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::ADD_F32,
-                                  RTLIB::ADD_F64,
-                                  RTLIB::ADD_F80,
-                                  RTLIB::ADD_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::ADD_F32,
+                                           RTLIB::ADD_F64,
+                                           RTLIB::ADD_F80,
+                                           RTLIB::ADD_F128,
+                                           RTLIB::ADD_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::CEIL_F32,
-                                  RTLIB::CEIL_F64,
-                                  RTLIB::CEIL_F80,
-                                  RTLIB::CEIL_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::CEIL_F32,
+                                           RTLIB::CEIL_F64,
+                                           RTLIB::CEIL_F80,
+                                           RTLIB::CEIL_F128,
+                                           RTLIB::CEIL_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -216,90 +220,98 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::COS_F32,
-                                  RTLIB::COS_F64,
-                                  RTLIB::COS_F80,
-                                  RTLIB::COS_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::COS_F32,
+                                           RTLIB::COS_F64,
+                                           RTLIB::COS_F80,
+                                           RTLIB::COS_F128,
+                                           RTLIB::COS_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::DIV_F32,
-                                  RTLIB::DIV_F64,
-                                  RTLIB::DIV_F80,
-                                  RTLIB::DIV_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::DIV_F32,
+                                           RTLIB::DIV_F64,
+                                           RTLIB::DIV_F80,
+                                           RTLIB::DIV_F128,
+                                           RTLIB::DIV_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::EXP_F32,
-                                  RTLIB::EXP_F64,
-                                  RTLIB::EXP_F80,
-                                  RTLIB::EXP_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::EXP_F32,
+                                           RTLIB::EXP_F64,
+                                           RTLIB::EXP_F80,
+                                           RTLIB::EXP_F128,
+                                           RTLIB::EXP_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::EXP2_F32,
-                                  RTLIB::EXP2_F64,
-                                  RTLIB::EXP2_F80,
-                                  RTLIB::EXP2_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::EXP2_F32,
+                                           RTLIB::EXP2_F64,
+                                           RTLIB::EXP2_F80,
+                                           RTLIB::EXP2_F128,
+                                           RTLIB::EXP2_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::FLOOR_F32,
-                                  RTLIB::FLOOR_F64,
-                                  RTLIB::FLOOR_F80,
-                                  RTLIB::FLOOR_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::FLOOR_F32,
+                                           RTLIB::FLOOR_F64,
+                                           RTLIB::FLOOR_F80,
+                                           RTLIB::FLOOR_F128,
+                                           RTLIB::FLOOR_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::LOG_F32,
-                                  RTLIB::LOG_F64,
-                                  RTLIB::LOG_F80,
-                                  RTLIB::LOG_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::LOG_F32,
+                                           RTLIB::LOG_F64,
+                                           RTLIB::LOG_F80,
+                                           RTLIB::LOG_F128,
+                                           RTLIB::LOG_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::LOG2_F32,
-                                  RTLIB::LOG2_F64,
-                                  RTLIB::LOG2_F80,
-                                  RTLIB::LOG2_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::LOG2_F32,
+                                           RTLIB::LOG2_F64,
+                                           RTLIB::LOG2_F80,
+                                           RTLIB::LOG2_F128,
+                                           RTLIB::LOG2_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::LOG10_F32,
-                                  RTLIB::LOG10_F64,
-                                  RTLIB::LOG10_F80,
-                                  RTLIB::LOG10_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::LOG10_F32,
+                                           RTLIB::LOG10_F64,
+                                           RTLIB::LOG10_F80,
+                                           RTLIB::LOG10_F128,
+                                           RTLIB::LOG10_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -307,35 +319,38 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
   SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)),
                      GetSoftenedFloat(N->getOperand(2)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::FMA_F32,
-                                  RTLIB::FMA_F64,
-                                  RTLIB::FMA_F80,
-                                  RTLIB::FMA_PPCF128),
-                     NVT, Ops, 3, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::FMA_F32,
+                                           RTLIB::FMA_F64,
+                                           RTLIB::FMA_F80,
+                                           RTLIB::FMA_F128,
+                                           RTLIB::FMA_PPCF128),
+                         NVT, Ops, 3, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::MUL_F32,
-                                  RTLIB::MUL_F64,
-                                  RTLIB::MUL_F80,
-                                  RTLIB::MUL_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::MUL_F32,
+                                           RTLIB::MUL_F64,
+                                           RTLIB::MUL_F80,
+                                           RTLIB::MUL_F128,
+                                           RTLIB::MUL_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::NEARBYINT_F32,
-                                  RTLIB::NEARBYINT_F64,
-                                  RTLIB::NEARBYINT_F80,
-                                  RTLIB::NEARBYINT_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::NEARBYINT_F32,
+                                           RTLIB::NEARBYINT_F64,
+                                           RTLIB::NEARBYINT_F80,
+                                           RTLIB::NEARBYINT_F128,
+                                           RTLIB::NEARBYINT_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
@@ -343,12 +358,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
   SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
                      GetSoftenedFloat(N->getOperand(0)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::SUB_F32,
-                                  RTLIB::SUB_F64,
-                                  RTLIB::SUB_F80,
-                                  RTLIB::SUB_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::SUB_F32,
+                                           RTLIB::SUB_F64,
+                                           RTLIB::SUB_F80,
+                                           RTLIB::SUB_F128,
+                                           RTLIB::SUB_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -356,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
-  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -364,8 +380,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
-  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
-                     N->getDebugLoc());
+  return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+                         N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -373,19 +389,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
-  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::POW_F32,
-                                  RTLIB::POW_F64,
-                                  RTLIB::POW_F80,
-                                  RTLIB::POW_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::POW_F32,
+                                           RTLIB::POW_F64,
+                                           RTLIB::POW_F80,
+                                           RTLIB::POW_F128,
+                                           RTLIB::POW_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -393,80 +410,87 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
          "Unsupported power type!");
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::POWI_F32,
-                                  RTLIB::POWI_F64,
-                                  RTLIB::POWI_F80,
-                                  RTLIB::POWI_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::POWI_F32,
+                                           RTLIB::POWI_F64,
+                                           RTLIB::POWI_F80,
+                                           RTLIB::POWI_F128,
+                                           RTLIB::POWI_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::REM_F32,
-                                  RTLIB::REM_F64,
-                                  RTLIB::REM_F80,
-                                  RTLIB::REM_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::REM_F32,
+                                           RTLIB::REM_F64,
+                                           RTLIB::REM_F80,
+                                           RTLIB::REM_F128,
+                                           RTLIB::REM_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::RINT_F32,
-                                  RTLIB::RINT_F64,
-                                  RTLIB::RINT_F80,
-                                  RTLIB::RINT_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::RINT_F32,
+                                           RTLIB::RINT_F64,
+                                           RTLIB::RINT_F80,
+                                           RTLIB::RINT_F128,
+                                           RTLIB::RINT_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::SIN_F32,
-                                  RTLIB::SIN_F64,
-                                  RTLIB::SIN_F80,
-                                  RTLIB::SIN_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::SIN_F32,
+                                           RTLIB::SIN_F64,
+                                           RTLIB::SIN_F80,
+                                           RTLIB::SIN_F128,
+                                           RTLIB::SIN_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::SQRT_F32,
-                                  RTLIB::SQRT_F64,
-                                  RTLIB::SQRT_F80,
-                                  RTLIB::SQRT_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::SQRT_F32,
+                                           RTLIB::SQRT_F64,
+                                           RTLIB::SQRT_F80,
+                                           RTLIB::SQRT_F128,
+                                           RTLIB::SQRT_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
                      GetSoftenedFloat(N->getOperand(1)) };
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::SUB_F32,
-                                  RTLIB::SUB_F64,
-                                  RTLIB::SUB_F80,
-                                  RTLIB::SUB_PPCF128),
-                     NVT, Ops, 2, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::SUB_F32,
+                                           RTLIB::SUB_F64,
+                                           RTLIB::SUB_F80,
+                                           RTLIB::SUB_F128,
+                                           RTLIB::SUB_PPCF128),
+                         NVT, Ops, 2, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                  RTLIB::TRUNC_F32,
-                                  RTLIB::TRUNC_F64,
-                                  RTLIB::TRUNC_F80,
-                                  RTLIB::TRUNC_PPCF128),
-                     NVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::TRUNC_F32,
+                                           RTLIB::TRUNC_F64,
+                                           RTLIB::TRUNC_F80,
+                                           RTLIB::TRUNC_F128,
+                                           RTLIB::TRUNC_PPCF128),
+                         NVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -559,8 +583,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                            NVT, N->getOperand(0));
-  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
-                     &Op, 1, false, dl);
+  return TLI.makeLibCall(DAG, LC,
+                         TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                         &Op, 1, false, dl);
 }
 
 
@@ -607,92 +632,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
-/// SoftenSetCCOperands - Soften the operands of a comparison.  This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
-void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
-                                           ISD::CondCode &CCCode, DebugLoc dl) {
-  SDValue LHSInt = GetSoftenedFloat(NewLHS);
-  SDValue RHSInt = GetSoftenedFloat(NewRHS);
-  EVT VT = NewLHS.getValueType();
-
-  assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
-
-  // Expand into one or more soft-fp libcall(s).
-  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
-  switch (CCCode) {
-  case ISD::SETEQ:
-  case ISD::SETOEQ:
-    LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
-    break;
-  case ISD::SETNE:
-  case ISD::SETUNE:
-    LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
-    break;
-  case ISD::SETGE:
-  case ISD::SETOGE:
-    LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
-    break;
-  case ISD::SETLT:
-  case ISD::SETOLT:
-    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
-    break;
-  case ISD::SETLE:
-  case ISD::SETOLE:
-    LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
-    break;
-  case ISD::SETGT:
-  case ISD::SETOGT:
-    LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
-    break;
-  case ISD::SETUO:
-    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
-    break;
-  case ISD::SETO:
-    LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
-    break;
-  default:
-    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
-    switch (CCCode) {
-    case ISD::SETONE:
-      // SETONE = SETOLT | SETOGT
-      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
-      // Fallthrough
-    case ISD::SETUGT:
-      LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
-      break;
-    case ISD::SETUGE:
-      LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
-      break;
-    case ISD::SETULT:
-      LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
-      break;
-    case ISD::SETULE:
-      LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
-      break;
-    case ISD::SETUEQ:
-      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
-      break;
-    default: llvm_unreachable("Do not know how to soften this setcc!");
-    }
-  }
-
-  // Use the target specific return value for comparions lib calls.
-  EVT RetVT = TLI.getCmpLibcallReturnType();
-  SDValue Ops[2] = { LHSInt, RHSInt };
-  NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
-  NewRHS = DAG.getConstant(0, RetVT);
-  CCCode = TLI.getCmpLibcallCC(LC1);
-  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
-    SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
-                                NewLHS, NewRHS, DAG.getCondCode(CCCode));
-    NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
-    NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
-                         NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
-    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
-    NewRHS = SDValue();
-  }
-}
-
 SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
   return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
                      GetSoftenedFloat(N->getOperand(0)));
@@ -706,15 +645,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
 
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
   SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
   ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
-  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
 
-  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  EVT VT = NewLHS.getValueType();
+  NewLHS = GetSoftenedFloat(NewLHS);
+  NewRHS = GetSoftenedFloat(NewRHS);
+  TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If softenSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (NewRHS.getNode() == 0) {
     NewRHS = DAG.getConstant(0, NewLHS.getValueType());
@@ -733,7 +676,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
@@ -741,22 +684,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
   EVT RVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
   SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
   ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
-  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
 
-  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  EVT VT = NewLHS.getValueType();
+  NewLHS = GetSoftenedFloat(NewLHS);
+  NewRHS = GetSoftenedFloat(NewRHS);
+  TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If softenSetCCOperands returned a scalar, we need to compare the result
   // against zero to select between true and false values.
   if (NewRHS.getNode() == 0) {
     NewRHS = DAG.getConstant(0, NewLHS.getValueType());
@@ -773,9 +720,13 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
   SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
   ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
-  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
 
-  // If SoftenSetCCOperands returned a scalar, use it.
+  EVT VT = NewLHS.getValueType();
+  NewLHS = GetSoftenedFloat(NewLHS);
+  NewRHS = GetSoftenedFloat(NewRHS);
+  TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If softenSetCCOperands returned a scalar, use it.
   if (NewRHS.getNode() == 0) {
     assert(NewLHS.getValueType() == N->getValueType(0) &&
            "Unexpected setcc expansion!");
@@ -873,6 +824,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+  case ISD::FREM:       ExpandFloatRes_FREM(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -886,9 +838,11 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
   assert(NVT.getSizeInBits() == integerPartWidth &&
          "Do not know how to expand this float constant!");
   APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
-  Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])),
+  Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+                                 APInt(integerPartWidth, C.getRawData()[1])),
                          NVT);
-  Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])),
+  Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+                                 APInt(integerPartWidth, C.getRawData()[0])),
                          NVT);
 }
 
@@ -910,7 +864,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::ADD_F32, RTLIB::ADD_F64,
-                                         RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+                                         RTLIB::ADD_F80, RTLIB::ADD_F128,
+                                         RTLIB::ADD_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -919,7 +874,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::CEIL_F32, RTLIB::CEIL_F64,
-                                         RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+                                         RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+                                         RTLIB::CEIL_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -930,6 +886,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
                                          RTLIB::COPYSIGN_F32,
                                          RTLIB::COPYSIGN_F64,
                                          RTLIB::COPYSIGN_F80,
+                                         RTLIB::COPYSIGN_F128,
                                          RTLIB::COPYSIGN_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
@@ -939,7 +896,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::COS_F32, RTLIB::COS_F64,
-                                         RTLIB::COS_F80, RTLIB::COS_PPCF128),
+                                         RTLIB::COS_F80, RTLIB::COS_F128,
+                                         RTLIB::COS_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -947,13 +905,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
 void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                          RTLIB::DIV_F32,
-                                          RTLIB::DIV_F64,
-                                          RTLIB::DIV_F80,
-                                          RTLIB::DIV_PPCF128),
-                             N->getValueType(0), Ops, 2, false,
-                             N->getDebugLoc());
+  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                                   RTLIB::DIV_F32,
+                                                   RTLIB::DIV_F64,
+                                                   RTLIB::DIV_F80,
+                                                   RTLIB::DIV_F128,
+                                                   RTLIB::DIV_PPCF128),
+                                 N->getValueType(0), Ops, 2, false,
+                                 N->getDebugLoc());
   GetPairElements(Call, Lo, Hi);
 }
 
@@ -961,7 +920,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::EXP_F32, RTLIB::EXP_F64,
-                                         RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+                                         RTLIB::EXP_F80, RTLIB::EXP_F128,
+                                         RTLIB::EXP_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -970,7 +930,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::EXP2_F32, RTLIB::EXP2_F64,
-                                         RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+                                         RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+                                         RTLIB::EXP2_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -978,8 +939,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
 void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
-                                         RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+                                         RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                                         RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+                                         RTLIB::FLOOR_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -988,7 +950,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::LOG_F32, RTLIB::LOG_F64,
-                                         RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+                                         RTLIB::LOG_F80, RTLIB::LOG_F128,
+                                         RTLIB::LOG_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -997,7 +960,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::LOG2_F32, RTLIB::LOG2_F64,
-                                         RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+                                         RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+                                         RTLIB::LOG2_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1005,8 +969,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
 void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::LOG10_F32,RTLIB::LOG10_F64,
-                                         RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+                                         RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                                         RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+                                         RTLIB::LOG10_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1014,26 +979,28 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
 void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
                                           SDValue &Hi) {
   SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
-  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                          RTLIB::FMA_F32,
-                                          RTLIB::FMA_F64,
-                                          RTLIB::FMA_F80,
-                                          RTLIB::FMA_PPCF128),
-                             N->getValueType(0), Ops, 3, false,
-                             N->getDebugLoc());
+  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                                   RTLIB::FMA_F32,
+                                                   RTLIB::FMA_F64,
+                                                   RTLIB::FMA_F80,
+                                                   RTLIB::FMA_F128,
+                                                   RTLIB::FMA_PPCF128),
+                                 N->getValueType(0), Ops, 3, false,
+                                 N->getDebugLoc());
   GetPairElements(Call, Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                          RTLIB::MUL_F32,
-                                          RTLIB::MUL_F64,
-                                          RTLIB::MUL_F80,
-                                          RTLIB::MUL_PPCF128),
-                             N->getValueType(0), Ops, 2, false,
-                             N->getDebugLoc());
+  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                                   RTLIB::MUL_F32,
+                                                   RTLIB::MUL_F64,
+                                                   RTLIB::MUL_F80,
+                                                   RTLIB::MUL_F128,
+                                                   RTLIB::MUL_PPCF128),
+                                 N->getValueType(0), Ops, 2, false,
+                                 N->getDebugLoc());
   GetPairElements(Call, Lo, Hi);
 }
 
@@ -1043,6 +1010,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
                                          RTLIB::NEARBYINT_F32,
                                          RTLIB::NEARBYINT_F64,
                                          RTLIB::NEARBYINT_F80,
+                                         RTLIB::NEARBYINT_F128,
                                          RTLIB::NEARBYINT_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
@@ -1060,14 +1028,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
-  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+  Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+                                 APInt(NVT.getSizeInBits(), 0)), NVT);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::POW_F32, RTLIB::POW_F64,
-                                         RTLIB::POW_F80, RTLIB::POW_PPCF128),
+                                         RTLIB::POW_F80, RTLIB::POW_F128,
+                                         RTLIB::POW_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1076,7 +1046,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::POWI_F32, RTLIB::POWI_F64,
-                                         RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+                                         RTLIB::POWI_F80, RTLIB::POWI_F128,
+                                         RTLIB::POWI_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::REM_F32, RTLIB::REM_F64,
+                                         RTLIB::REM_F80, RTLIB::REM_F128,
+                                         RTLIB::REM_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1085,7 +1066,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::RINT_F32, RTLIB::RINT_F64,
-                                         RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+                                         RTLIB::RINT_F80, RTLIB::RINT_F128,
+                                         RTLIB::RINT_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1094,7 +1076,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::SIN_F32, RTLIB::SIN_F64,
-                                         RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+                                         RTLIB::SIN_F80, RTLIB::SIN_F128,
+                                         RTLIB::SIN_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1103,7 +1086,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::SQRT_F32, RTLIB::SQRT_F64,
-                                         RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+                                         RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+                                         RTLIB::SQRT_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1111,13 +1095,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
 void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
-                                          RTLIB::SUB_F32,
-                                          RTLIB::SUB_F64,
-                                          RTLIB::SUB_F80,
-                                          RTLIB::SUB_PPCF128),
-                             N->getValueType(0), Ops, 2, false,
-                             N->getDebugLoc());
+  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+                                                   RTLIB::SUB_F32,
+                                                   RTLIB::SUB_F64,
+                                                   RTLIB::SUB_F80,
+                                                   RTLIB::SUB_F128,
+                                                   RTLIB::SUB_PPCF128),
+                                 N->getValueType(0), Ops, 2, false,
+                                 N->getDebugLoc());
   GetPairElements(Call, Lo, Hi);
 }
 
@@ -1125,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
                                          RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
-                                         RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+                                         RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+                                         RTLIB::TRUNC_PPCF128),
                             N, false);
   GetPairElements(Call, Lo, Hi);
 }
@@ -1155,7 +1141,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
   Chain = Hi.getValue(1);
 
   // The low part is zero.
-  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+  Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+                                 APInt(NVT.getSizeInBits(), 0)), NVT);
 
   // Modified the chain - switch anything that used the old chain to use the
   // new one.
@@ -1179,7 +1166,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
     // The integer can be represented exactly in an f64.
     Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
                       MVT::i32, Src);
-    Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+    Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+                                   APInt(NVT.getSizeInBits(), 0)), NVT);
     Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
   } else {
     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -1193,7 +1181,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
     }
     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
 
-    Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+    Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl);
     GetPairElements(Hi, Lo, Hi);
   }
 
@@ -1225,7 +1213,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
   }
 
   Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
-                   DAG.getConstantFP(APFloat(APInt(128, Parts)),
+                   DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+                                             APInt(128, Parts)),
                                      MVT::ppcf128));
   Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
                    Lo, Hi, DAG.getCondCode(ISD::SETLT));
@@ -1364,7 +1353,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
 
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
-  return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+  return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl);
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1377,7 +1366,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
     assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
            "Logic only correct for ppcf128!");
     const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
-    APFloat APF = APFloat(APInt(128, TwoE31));
+    APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
     SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
@@ -1396,7 +1385,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
 
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
-  return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+  return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+                         false, dl);
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a370faeb2399..d19c13b8ff13 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,7 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -515,7 +515,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
   // Only use the result of getSetCCResultType if it is legal,
   // otherwise just use the promoted result type (NVT).
   if (!TLI.isTypeLegal(SVT))
-      SVT = NVT;
+    SVT = NVT;
 
   DebugLoc dl = N->getDebugLoc();
   assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
@@ -531,9 +531,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
-  return DAG.getNode(ISD::SHL, N->getDebugLoc(),
-                TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
-                     GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+  SDValue Res = GetPromotedInteger(N->getOperand(0));
+  SDValue Amt = N->getOperand(1);
+  Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+  return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -549,22 +550,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
   SDValue LHS = GetPromotedInteger(N->getOperand(0));
   SDValue RHS = GetPromotedInteger(N->getOperand(1));
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
-                    LHS.getValueType(), LHS, RHS);
+                     LHS.getValueType(), LHS, RHS);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
   // The input value must be properly sign extended.
   SDValue Res = SExtPromotedInteger(N->getOperand(0));
-  return DAG.getNode(ISD::SRA, N->getDebugLoc(),
-                     Res.getValueType(), Res, N->getOperand(1));
+  SDValue Amt = N->getOperand(1);
+  Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+  return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
   // The input value must be properly zero extended.
-  EVT VT = N->getValueType(0);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Res = ZExtPromotedInteger(N->getOperand(0));
-  return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+  SDValue Amt = N->getOperand(1);
+  Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt;
+  return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
@@ -703,7 +705,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
-  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
   unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
   // The argument is passed as NumRegs registers of type RegVT.
 
@@ -1767,7 +1769,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
-  SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl),
+               Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
@@ -1777,7 +1780,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
   SDValue Op = N->getOperand(0);
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
-  SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl),
+               Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
@@ -1992,7 +1996,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
 
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl),
+               Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
@@ -2054,7 +2059,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
 
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2092,9 +2097,20 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     // Expand the subcomponents.
     SDValue LHSL, LHSH;
     GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
-
-    SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
     EVT VT = LHSL.getValueType();
+
+    // If the shift amount operand is coming from a vector legalization it may
+    // have an illegal type.  Fix that first by casting the operand, otherwise
+    // the new SHL_PARTS operation would need further legalization.
+    SDValue ShiftOp = N->getOperand(1);
+    EVT ShiftTy = TLI.getShiftAmountTy(VT);
+    assert(ShiftTy.getScalarType().getSizeInBits() >=
+           Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+           "ShiftAmountTy is too small to cover the range of this type!");
+    if (ShiftOp.getValueType() != ShiftTy)
+      ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+    SDValue Ops[] = { LHSL, LHSH, ShiftOp };
     Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
     Hi = Lo.getValue(1);
     return;
@@ -2138,7 +2154,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
 
   if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
     SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-    SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+    SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
     return;
   }
 
@@ -2221,7 +2237,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
 
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2361,7 +2377,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
 
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2381,7 +2397,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
 
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2549,7 +2565,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
 
   // NOTE: on targets without efficient SELECT of bools, we can always use
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
-  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
   SDValue Tmp1, Tmp2;
   Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
                            LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
@@ -2668,7 +2684,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this SINT_TO_FP!");
-  return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+  return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc());
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2764,17 +2780,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
   return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
 }
 
-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unknown FP format");
-  case MVT::f32:     return &APFloat::IEEEsingle;
-  case MVT::f64:     return &APFloat::IEEEdouble;
-  case MVT::f80:     return &APFloat::x87DoubleExtended;
-  case MVT::f128:    return &APFloat::IEEEquad;
-  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
-  }
-}
-
 SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
   EVT SrcVT = Op.getValueType();
@@ -2784,8 +2789,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   // The following optimization is valid only if every value in SrcVT (when
   // treated as signed) is representable in DstVT.  Check that the mantissa
   // size of DstVT is >= than the number of bits in SrcVT -1.
-  const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
-  if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
+  if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
       TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
     // Do a signed conversion then adjust the result.
     SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
@@ -2846,7 +2851,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this UINT_TO_FP!");
-  return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+  return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl);
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 644e36e35e21..b6436bf42741 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
   SDValue &OpEntry = PromotedIntegers[Op];
   assert(OpEntry.getNode() == 0 && "Node is already promoted!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
@@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
   SDValue &OpEntry = SoftenedFloats[Op];
   assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
@@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
   SDValue &OpEntry = ScalarizedVectors[Op];
   assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
@@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
   assert(Entry.first.getNode() == 0 && "Node already expanded");
   Entry.first = Lo;
   Entry.second = Hi;
+
+  // Propagate ordering
+  DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+  DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
@@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
   assert(Entry.first.getNode() == 0 && "Node already expanded");
   Entry.first = Lo;
   Entry.second = Hi;
+
+  // Propagate ordering
+  DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+  DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
@@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
   assert(Entry.first.getNode() == 0 && "Node already split");
   Entry.first = Lo;
   Entry.second = Hi;
+
+  // Propagate ordering
+  DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode()));
+  DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
@@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
   SDValue &OpEntry = WidenedVectors[Op];
   assert(OpEntry.getNode() == 0 && "Node already widened!");
   OpEntry = Result;
+
+  // Propagate node ordering
+  DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode()));
 }
 
 
@@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
   // Make everything that once used N's values now use those in Results instead.
   assert(Results.size() == N->getNumValues() &&
          "Custom lowering returned the wrong number of results!");
-  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+  for (unsigned i = 0, e = Results.size(); i != e; ++i) {
     ReplaceValueWith(SDValue(N, i), Results[i]);
+    // Propagate node ordering
+    DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N));
+  }
   return true;
 }
 
@@ -1020,50 +1047,20 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
   unsigned NumOps = N->getNumOperands();
   DebugLoc dl = N->getDebugLoc();
   if (NumOps == 0) {
-    return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+    return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl);
   } else if (NumOps == 1) {
     SDValue Op = N->getOperand(0);
-    return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+    return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl);
   } else if (NumOps == 2) {
     SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-    return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+    return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl);
   }
   SmallVector<SDValue, 8> Ops(NumOps);
   for (unsigned i = 0; i < NumOps; ++i)
     Ops[i] = N->getOperand(i);
 
-  return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
-}
-
-/// MakeLibCall - Generate a libcall taking the given operands as arguments and
-/// returning a result of type RetVT.
-SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
-                                      const SDValue *Ops, unsigned NumOps,
-                                      bool isSigned, DebugLoc dl) {
-  TargetLowering::ArgListTy Args;
-  Args.reserve(NumOps);
-
-  TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    Entry.Node = Ops[i];
-    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
-    Entry.isSExt = isSigned;
-    Entry.isZExt = !isSigned;
-    Args.push_back(Entry);
-  }
-  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
-
-  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
-  TargetLowering::
-  CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, 0, TLI.getLibcallCallingConv(LC),
-                    /*isTailCall=*/false,
-                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
-                    Callee, Args, DAG, dl);
-  std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
-  return CallInfo.first;
+  return TLI.makeLibCall(DAG, LC, N->getValueType(0),
+                         &Ops[0], NumOps, isSigned, dl);
 }
 
 // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 20b7ce6b15ba..54ea926241cf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -17,12 +17,12 @@
 #define SELECTIONDAG_LEGALIZETYPES_H
 
 #define DEBUG_TYPE "legalize-types"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
 
@@ -80,35 +80,35 @@ private:
 
   /// PromotedIntegers - For integer nodes that are below legal width, this map
   /// indicates what promoted value to use.
-  DenseMap<SDValue, SDValue> PromotedIntegers;
+  SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
 
   /// ExpandedIntegers - For integer nodes that need to be expanded this map
   /// indicates which operands are the expanded version of the input.
-  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+  SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
 
   /// SoftenedFloats - For floating point nodes converted to integers of
   /// the same size, this map indicates the converted value to use.
-  DenseMap<SDValue, SDValue> SoftenedFloats;
+  SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
 
   /// ExpandedFloats - For float nodes that need to be expanded this map
   /// indicates which operands are the expanded version of the input.
-  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+  SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
 
   /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
   /// scalar value of type 'ty' to use.
-  DenseMap<SDValue, SDValue> ScalarizedVectors;
+  SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
 
   /// SplitVectors - For nodes that need to be split this map indicates
   /// which operands are the expanded version of the input.
-  DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+  SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
 
   /// WidenedVectors - For vector nodes that need to be widened, indicates
   /// the widened value to use.
-  DenseMap<SDValue, SDValue> WidenedVectors;
+  SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
 
   /// ReplacedValues - For values that have been replaced with another,
   /// indicates the replacement value to use.
-  DenseMap<SDValue, SDValue> ReplacedValues;
+  SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
 
   /// Worklist - This defines a worklist of nodes to process.  In order to be
   /// pushed onto this worklist, all operands of a node must have already been
@@ -159,9 +159,6 @@ private:
   SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
-  SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
-                      const SDValue *Ops, unsigned NumOps, bool isSigned,
-                      DebugLoc dl);
   
   std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                  SDNode *Node, bool isSigned);
@@ -433,9 +430,6 @@ private:
   SDValue SoftenFloatOp_SETCC(SDNode *N);
   SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
 
-  void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
-                           ISD::CondCode &CCCode, DebugLoc dl);
-
   //===--------------------------------------------------------------------===//
   // Float Expansion Support: LegalizeFloatTypes.cpp
   //===--------------------------------------------------------------------===//
@@ -471,6 +465,7 @@ private:
   void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FPOW      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FPOWI     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FREM      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FRINT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -536,6 +531,7 @@ private:
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+  SDValue ScalarizeVecOp_EXTEND(SDNode *N);
   SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -578,6 +574,7 @@ private:
 
   // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 6bcb3b25e98e..222d1c043a63 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -20,7 +20,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22f8d51ab2a9..c6e066e2709b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -40,7 +40,7 @@ class VectorLegalizer {
   /// LegalizedNodes - For nodes that are of legal width, and that have more
   /// than one use, this map indicates what regularized operand to use.  This
   /// allows us to avoid legalizing the same thing more than once.
-  DenseMap<SDValue, SDValue> LegalizedNodes;
+  SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
 
   // Adds a node to the translation cache
   void AddLegalizedOperand(SDValue From, SDValue To) {
@@ -61,6 +61,8 @@ class VectorLegalizer {
   // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
   // SINT_TO_FLOAT and SHR on vectors isn't legal.
   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+  // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+  SDValue ExpandSEXTINREG(SDValue Op);
   // Implement vselect in terms of XOR, AND, OR when blend is not supported
   // by the target.
   SDValue ExpandVSELECT(SDValue Op);
@@ -83,6 +85,25 @@ class VectorLegalizer {
 };
 
 bool VectorLegalizer::Run() {
+  // Before we start legalizing vector nodes, check if there are any vectors.
+  bool HasVectors = false;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
+    // Check if the values of the nodes contain vectors. We don't need to check
+    // the operands because we are going to check their values at some point.
+    for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
+         J != E; ++J)
+      HasVectors |= J->isVector();
+
+    // If we found a vector node we can start the legalization.
+    if (HasVectors)
+      break;
+  }
+
+  // If this basic block has no vectors then no need to legalize vectors.
+  if (!HasVectors)
+    return false;
+
   // The legalize process is inherently a bottom-up recursive process (users
   // legalize their uses before themselves).  Given infinite stack space, we
   // could just start legalizing on the root and traverse the whole graph.  In
@@ -142,9 +163,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   } else if (Op.getOpcode() == ISD::STORE) {
     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     EVT StVT = ST->getMemoryVT();
-    EVT ValVT = ST->getValue().getValueType();
+    MVT ValVT = ST->getValue().getSimpleValueType();
     if (StVT.isVector() && ST->isTruncatingStore())
-      switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+      switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         return TranslateLegalizeResults(Op, Result);
@@ -221,6 +242,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
   case ISD::FMA:
   case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
@@ -260,7 +283,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     // FALL THROUGH
   }
   case TargetLowering::Expand:
-    if (Node->getOpcode() == ISD::VSELECT)
+    if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
+      Result = ExpandSEXTINREG(Op);
+    else if (Node->getOpcode() == ISD::VSELECT)
       Result = ExpandVSELECT(Op);
     else if (Node->getOpcode() == ISD::SELECT)
       Result = ExpandSELECT(Op);
@@ -291,10 +316,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   // Vector "promotion" is basically just bitcasting and doing the operation
   // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
   // v1i64.
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
-  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
   DebugLoc dl = Op.getDebugLoc();
   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 
@@ -357,30 +382,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
   EVT SrcVT = LD->getMemoryVT();
   ISD::LoadExtType ExtType = LD->getExtensionType();
 
-  SmallVector<SDValue, 8> LoadVals;
+  SmallVector<SDValue, 8> Vals;
   SmallVector<SDValue, 8> LoadChains;
   unsigned NumElem = SrcVT.getVectorNumElements();
-  unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
 
-  for (unsigned Idx=0; Idx<NumElem; Idx++) {
-    SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
-              Op.getNode()->getValueType(0).getScalarType(),
-              Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
-              SrcVT.getScalarType(),
-              LD->isVolatile(), LD->isNonTemporal(),
-              LD->getAlignment());
+  EVT SrcEltVT = SrcVT.getScalarType();
+  EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
+
+  if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+    // When elements in a vector is not byte-addressable, we cannot directly
+    // load each element by advancing pointer, which could only address bytes.
+    // Instead, we load all significant words, mask bits off, and concatenate
+    // them to form each element. Finally, they are extended to destination
+    // scalar type to build the destination vector.
+    EVT WideVT = TLI.getPointerTy();
+
+    assert(WideVT.isRound() &&
+           "Could not handle the sophisticated case when the widest integer is"
+           " not power of 2.");
+    assert(WideVT.bitsGE(SrcEltVT) &&
+           "Type is not legalized?");
+
+    unsigned WideBytes = WideVT.getStoreSize();
+    unsigned Offset = 0;
+    unsigned RemainingBytes = SrcVT.getStoreSize();
+    SmallVector<SDValue, 8> LoadVals;
+
+    while (RemainingBytes > 0) {
+      SDValue ScalarLoad;
+      unsigned LoadBytes = WideBytes;
+
+      if (RemainingBytes >= LoadBytes) {
+        ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
+                                 LD->getPointerInfo().getWithOffset(Offset),
+                                 LD->isVolatile(), LD->isNonTemporal(),
+                                 LD->isInvariant(), LD->getAlignment());
+      } else {
+        EVT LoadVT = WideVT;
+        while (RemainingBytes < LoadBytes) {
+          LoadBytes >>= 1; // Reduce the load size by half.
+          LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
+        }
+        ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+                                    LD->getPointerInfo().getWithOffset(Offset),
+                                    LoadVT, LD->isVolatile(),
+                                    LD->isNonTemporal(), LD->getAlignment());
+      }
 
-    BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
-                       DAG.getIntPtrConstant(Stride));
+      RemainingBytes -= LoadBytes;
+      Offset += LoadBytes;
+      BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+                            DAG.getIntPtrConstant(LoadBytes));
+
+      LoadVals.push_back(ScalarLoad.getValue(0));
+      LoadChains.push_back(ScalarLoad.getValue(1));
+    }
+
+    // Extract bits, pack and extend/trunc them into destination type.
+    unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+    SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
+
+    unsigned BitOffset = 0;
+    unsigned WideIdx = 0;
+    unsigned WideBits = WideVT.getSizeInBits();
+
+    for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
+      SDValue Lo, Hi, ShAmt;
+
+      if (BitOffset < WideBits) {
+        ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
+        Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+        Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
+      }
 
-     LoadVals.push_back(ScalarLoad.getValue(0));
-     LoadChains.push_back(ScalarLoad.getValue(1));
+      BitOffset += SrcEltBits;
+      if (BitOffset >= WideBits) {
+        WideIdx++;
+        Offset -= WideBits;
+        if (Offset > 0) {
+          ShAmt = DAG.getConstant(SrcEltBits - Offset,
+                                  TLI.getShiftAmountTy(WideVT));
+          Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+          Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+        }
+      }
+
+      if (Hi.getNode())
+        Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+
+      switch (ExtType) {
+      default: llvm_unreachable("Unknown extended-load op!");
+      case ISD::EXTLOAD:
+        Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
+        break;
+      case ISD::ZEXTLOAD:
+        Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
+        break;
+      case ISD::SEXTLOAD:
+        ShAmt = DAG.getConstant(WideBits - SrcEltBits,
+                                TLI.getShiftAmountTy(WideVT));
+        Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
+        Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
+        Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
+        break;
+      }
+      Vals.push_back(Lo);
+    }
+  } else {
+    unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+    for (unsigned Idx=0; Idx<NumElem; Idx++) {
+      SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+                Op.getNode()->getValueType(0).getScalarType(),
+                Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                SrcVT.getScalarType(),
+                LD->isVolatile(), LD->isNonTemporal(),
+                LD->getAlignment());
+
+      BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+                         DAG.getIntPtrConstant(Stride));
+
+      Vals.push_back(ScalarLoad.getValue(0));
+      LoadChains.push_back(ScalarLoad.getValue(1));
+    }
   }
 
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
             &LoadChains[0], LoadChains.size());
   SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
-            Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+            Op.getNode()->getValueType(0), &Vals[0], Vals.size());
 
   AddLegalizedOperand(Op.getValue(0), Value);
   AddLegalizedOperand(Op.getValue(1), NewChain);
@@ -499,6 +629,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
 }
 
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+  EVT VT = Op.getValueType();
+
+  // Make sure that the SRA and SHL instructions are available.
+  if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+  unsigned BW = VT.getScalarType().getSizeInBits();
+  unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
+
+  Op = Op.getOperand(0);
+  Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+  return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // Implement VSELECT in terms of XOR, AND, OR
   // on platforms which do not support blend natively.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d51a6eb192ee..5ec853563888 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -365,6 +365,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::BITCAST:
       Res = ScalarizeVecOp_BITCAST(N);
       break;
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::SIGN_EXTEND:
+      Res = ScalarizeVecOp_EXTEND(N);
+      break;
     case ISD::CONCAT_VECTORS:
       Res = ScalarizeVecOp_CONCAT_VECTORS(N);
       break;
@@ -400,6 +405,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
                      N->getValueType(0), Elt);
 }
 
+/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs
+/// to be scalarized, it must be <1 x ty>.  Extend the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) {
+  assert(N->getValueType(0).getVectorNumElements() == 1 &&
+         "Unexected vector type!");
+  SDValue Elt = GetScalarizedVector(N->getOperand(0));
+  SmallVector<SDValue, 1> Ops(1);
+  Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                       N->getValueType(0).getScalarType(), Elt);
+  // Revectorize the result so the types line up with what the uses of this
+  // expression expect.
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+                     &Ops[0], 1);
+}
+
 /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
 /// use a BUILD_VECTOR instead.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
@@ -1030,7 +1050,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
-
+    case ISD::VSELECT:
+      Res = SplitVecOp_VSELECT(N, OpNo);
+      break;
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
@@ -1064,6 +1086,58 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+  // The only possibility for an illegal operand is the mask, since result type
+  // legalization would have handled this node already otherwise.
+  assert(OpNo == 0 && "Illegal operand must be mask");
+
+  SDValue Mask = N->getOperand(0);
+  SDValue Src0 = N->getOperand(1);
+  SDValue Src1 = N->getOperand(2);
+  DebugLoc DL = N->getDebugLoc();
+  EVT MaskVT = Mask.getValueType();
+  assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  assert(Lo.getValueType() == Hi.getValueType() &&
+         "Lo and Hi have differing types");;
+
+  unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+  unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
+  assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
+
+  LLVMContext &Ctx = *DAG.getContext();
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  SDValue LoElts = DAG.getIntPtrConstant(LoNumElts);
+  EVT Src0VT = Src0.getValueType();
+  EVT Src0EltTy = Src0VT.getVectorElementType();
+  EVT MaskEltTy = MaskVT.getVectorElementType();
+
+  EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
+  EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
+  EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
+  EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+
+  SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
+  SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
+
+  SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
+  SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
+
+  SDValue LoMask =
+    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
+  SDValue HiMask =
+    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+
+  SDValue LoSelect =
+    DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+  SDValue HiSelect =
+    DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   // The result has a legal vector type, but the input needs splitting.
   EVT ResVT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index c3794d5f7863..473e1384e399 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -21,13 +21,13 @@
 
 #define DEBUG_TYPE "scheduler"
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -94,9 +94,9 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
       continue;
 
     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
-      EVT VT = ScegN->getValueType(i);
+      MVT VT = ScegN->getSimpleValueType(i);
       if (TLI->isTypeLegal(VT)
-         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
         NumberDeps++;
         break;
       }
@@ -132,9 +132,9 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
 
     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
       const SDValue &Op = ScegN->getOperand(i);
-      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
       if (TLI->isTypeLegal(VT)
-         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+          && (TLI->getRegClassFor(VT)->getID() == RCId)) {
         NumberDeps++;
         break;
       }
@@ -332,7 +332,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
 
   // Gen estimate.
   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
-      EVT VT = SU->getNode()->getValueType(i);
+      MVT VT = SU->getNode()->getSimpleValueType(i);
       if (TLI->isTypeLegal(VT)
           && TLI->getRegClassFor(VT)
           && TLI->getRegClassFor(VT)->getID() == RCId)
@@ -341,7 +341,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
   // Kill estimate.
   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
       const SDValue &Op = SU->getNode()->getOperand(i);
-      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
       if (isa<ConstantSDNode>(Op.getNode()))
         continue;
 
@@ -485,7 +485,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
   if (ScegN->isMachineOpcode()) {
     // Estimate generated regs.
     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
-      EVT VT = ScegN->getValueType(i);
+      MVT VT = ScegN->getSimpleValueType(i);
 
       if (TLI->isTypeLegal(VT)) {
         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
@@ -496,7 +496,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
     // Estimate killed regs.
     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
       const SDValue &Op = ScegN->getOperand(i);
-      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
 
       if (TLI->isTypeLegal(VT)) {
         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
@@ -604,10 +604,8 @@ SUnit *ResourcePriorityQueue::pop() {
   std::vector<SUnit *>::iterator Best = Queue.begin();
   if (!DisableDFASched) {
     signed BestCost = SUSchedulingCost(*Best);
-    for (std::vector<SUnit *>::iterator I = Queue.begin(),
+    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
            E = Queue.end(); I != E; ++I) {
-      if (*I == *Best)
-        continue;
 
       if (SUSchedulingCost(*I) > BestCost) {
         BestCost = SUSchedulingCost(*I);
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 2dcb22957325..4af7172847d7 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -15,8 +15,8 @@
 #define LLVM_CODEGEN_SDNODEDBGVALUE_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index d2269f8accf1..7e7b8974be48 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -33,8 +33,10 @@ class SDNodeOrdering {
 public:
   SDNodeOrdering() {}
 
-  void add(const SDNode *Node, unsigned O) {
-    OrderMap[Node] = O;
+  void add(const SDNode *Node, unsigned NewOrder) {
+    unsigned &OldOrder = OrderMap[Node];
+    if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder))
+      OldOrder = NewOrder;
   }
   void remove(const SDNode *Node) {
     DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2ecdd8941551..d1f36cb647dc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "InstrEmitter.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c55456902c87..c009cfcc516d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -16,22 +16,23 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -142,6 +143,12 @@ private:
   std::vector<SUnit*> LiveRegDefs;
   std::vector<SUnit*> LiveRegGens;
 
+  // Collect interferences between physical register use/defs.
+  // Each interference is an SUnit and set of physical registers.
+  SmallVector<SUnit*, 4> Interferences;
+  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+  LRegsMapT LRegsMap;
+
   /// Topo - A topological ordering for SUnits which permits fast IsReachable
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
@@ -156,7 +163,7 @@ public:
                     CodeGenOpt::Level OptLevel)
     : ScheduleDAGSDNodes(mf),
       NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
-      Topo(SUnits) {
+      Topo(SUnits, NULL) {
 
     const TargetMachine &tm = mf.getTarget();
     if (DisableSchedCycles || !NeedLatency)
@@ -225,6 +232,8 @@ private:
                                 SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
 
+  void releaseInterferences(unsigned Reg = 0);
+
   SUnit *PickNodeToScheduleBottomUp();
   void ListScheduleBottomUp();
 
@@ -268,14 +277,23 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
                           const TargetRegisterInfo *TRI,
                           unsigned &RegClass, unsigned &Cost,
                           const MachineFunction &MF) {
-  EVT VT = RegDefPos.GetValue();
+  MVT VT = RegDefPos.GetValue();
 
   // Special handling for untyped values.  These values can only come from
   // the expansion of custom DAG-to-DAG patterns.
   if (VT == MVT::Untyped) {
     const SDNode *Node = RegDefPos.GetNode();
-    unsigned Opcode = Node->getMachineOpcode();
 
+    // Special handling for CopyFromReg of untyped values.
+    if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+      RegClass = RC->getID();
+      Cost = 1;
+      return;
+    }
+
+    unsigned Opcode = Node->getMachineOpcode();
     if (Opcode == TargetOpcode::REG_SEQUENCE) {
       unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
       const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
@@ -312,6 +330,7 @@ void ScheduleDAGRRList::Schedule() {
   LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
   LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
   CallSeqEndForStart.clear();
+  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
 
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -725,6 +744,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
       --NumLiveRegs;
       LiveRegDefs[I->getReg()] = NULL;
       LiveRegGens[I->getReg()] = NULL;
+      releaseInterferences(I->getReg());
     }
   }
   // Release the special call resource dependence, if this is the beginning
@@ -739,6 +759,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
         --NumLiveRegs;
         LiveRegDefs[CallResource] = NULL;
         LiveRegGens[CallResource] = NULL;
+        releaseInterferences(CallResource);
       }
     }
 
@@ -794,6 +815,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
       --NumLiveRegs;
       LiveRegDefs[I->getReg()] = NULL;
       LiveRegGens[I->getReg()] = NULL;
+      releaseInterferences(I->getReg());
     }
   }
 
@@ -821,6 +843,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
         --NumLiveRegs;
         LiveRegDefs[CallResource] = NULL;
         LiveRegGens[CallResource] = NULL;
+        releaseInterferences(CallResource);
       }
     }
 
@@ -881,9 +904,6 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
   SUnit *OldSU = Sequence.back();
   while (true) {
     Sequence.pop_back();
-    if (SU->isSucc(OldSU))
-      // Don't try to remove SU from AvailableQueue.
-      SU->isAvailable = false;
     // FIXME: use ready cycle instead of height
     CurCycle = OldSU->getHeight();
     UnscheduleNodeBottomUp(OldSU);
@@ -1305,34 +1325,60 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
   return !LRegs.empty();
 }
 
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+  // Add the nodes that aren't ready back onto the available list.
+  for (unsigned i = Interferences.size(); i > 0; --i) {
+    SUnit *SU = Interferences[i-1];
+    LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+    if (Reg) {
+      SmallVector<unsigned, 4> &LRegs = LRegsPos->second;
+      if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+        continue;
+    }
+    SU->isPending = false;
+    // The interfering node may no longer be available due to backtracking.
+    // Furthermore, it may have been made available again, in which case it is
+    // now already in the AvailableQueue.
+    if (SU->isAvailable && !SU->NodeQueueId) {
+      DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
+      AvailableQueue->push(SU);
+    }
+    if (i < Interferences.size())
+      Interferences[i-1] = Interferences.back();
+    Interferences.pop_back();
+    LRegsMap.erase(LRegsPos);
+  }
+}
+
 /// Return a node that can be scheduled in this cycle. Requirements:
 /// (1) Ready: latency has been satisfied
 /// (2) No Hazards: resources are available
 /// (3) No Interferences: may unschedule to break register interferences.
 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
-  SmallVector<SUnit*, 4> Interferences;
-  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
-
-  SUnit *CurSU = AvailableQueue->pop();
+  SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop();
   while (CurSU) {
     SmallVector<unsigned, 4> LRegs;
     if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
       break;
-    LRegsMap.insert(std::make_pair(CurSU, LRegs));
-
-    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
-    Interferences.push_back(CurSU);
+    DEBUG(dbgs() << "    Interfering reg " <<
+          (LRegs[0] == TRI->getNumRegs() ? "CallResource"
+           : TRI->getName(LRegs[0]))
+           << " SU #" << CurSU->NodeNum << '\n');
+    std::pair<LRegsMapT::iterator, bool> LRegsPair =
+      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+    if (LRegsPair.second) {
+      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+      Interferences.push_back(CurSU);
+    }
+    else {
+      assert(CurSU->isPending && "Intereferences are pending");
+      // Update the interference with current live regs.
+      LRegsPair.first->second = LRegs;
+    }
     CurSU = AvailableQueue->pop();
   }
-  if (CurSU) {
-    // Add the nodes that aren't ready back onto the available list.
-    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
-      Interferences[i]->isPending = false;
-      assert(Interferences[i]->isAvailable && "must still be available");
-      AvailableQueue->push(Interferences[i]);
-    }
+  if (CurSU)
     return CurSU;
-  }
 
   // All candidates are delayed due to live physical reg dependencies.
   // Try backtracking, code duplication, or inserting cross class copies
@@ -1353,6 +1399,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
       }
     }
     if (!WillCreateCycle(TrySU, BtSU))  {
+      // BacktrackBottomUp mutates Interferences!
       BacktrackBottomUp(TrySU, BtSU);
 
       // Force the current node to be scheduled before the node that
@@ -1362,19 +1409,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
         if (!BtSU->isPending)
           AvailableQueue->remove(BtSU);
       }
+      DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
+            << TrySU->NodeNum << ")\n");
       AddPred(TrySU, SDep(BtSU, SDep::Artificial));
 
       // If one or more successors has been unscheduled, then the current
-      // node is no longer avaialable. Schedule a successor that's now
-      // available instead.
-      if (!TrySU->isAvailable) {
+      // node is no longer available.
+      if (!TrySU->isAvailable)
         CurSU = AvailableQueue->pop();
-      }
       else {
+        AvailableQueue->remove(TrySU);
         CurSU = TrySU;
-        TrySU->isPending = false;
-        Interferences.erase(Interferences.begin()+i);
       }
+      // Interferences has been mutated. We must break.
       break;
     }
   }
@@ -1425,17 +1472,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
     TrySU->isAvailable = false;
     CurSU = NewDef;
   }
-
   assert(CurSU && "Unable to resolve live physical register dependencies!");
-
-  // Add the nodes that aren't ready back onto the available list.
-  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
-    Interferences[i]->isPending = false;
-    // May no longer be available due to backtracking.
-    if (Interferences[i]->isAvailable) {
-      AvailableQueue->push(Interferences[i]);
-    }
-  }
   return CurSU;
 }
 
@@ -1456,7 +1493,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   Sequence.reserve(SUnits.size());
-  while (!AvailableQueue->empty()) {
+  while (!AvailableQueue->empty() || !Interferences.empty()) {
     DEBUG(dbgs() << "\nExamining Available:\n";
           AvailableQueue->dump(this));
 
@@ -1939,7 +1976,7 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
 
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
   for (unsigned i = 0; i != NumDefs; ++i) {
-    EVT VT = N->getValueType(i);
+    MVT VT = N->getSimpleValueType(i);
     if (!N->hasAnyUseOfValue(i))
       continue;
     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
@@ -1973,7 +2010,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
     }
     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
          RegDefPos.IsValid(); RegDefPos.Advance()) {
-      EVT VT = RegDefPos.GetValue();
+      MVT VT = RegDefPos.GetValue();
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
       if (RegPressure[RCId] >= RegLimit[RCId])
         ++PDiff;
@@ -1986,7 +2023,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
 
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
   for (unsigned i = 0; i != NumDefs; ++i) {
-    EVT VT = N->getValueType(i);
+    MVT VT = N->getSimpleValueType(i);
     if (!N->hasAnyUseOfValue(i))
       continue;
     unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
@@ -2097,7 +2134,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
     const SDNode *PN = PredSU->getNode();
     if (!PN->isMachineOpcode()) {
       if (PN->getOpcode() == ISD::CopyFromReg) {
-        EVT VT = PN->getValueType(0);
+        MVT VT = PN->getSimpleValueType(0);
         unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
         RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
       }
@@ -2109,14 +2146,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
     if (POpc == TargetOpcode::EXTRACT_SUBREG ||
         POpc == TargetOpcode::INSERT_SUBREG ||
         POpc == TargetOpcode::SUBREG_TO_REG) {
-      EVT VT = PN->getValueType(0);
+      MVT VT = PN->getSimpleValueType(0);
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
       continue;
     }
     unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
     for (unsigned i = 0; i != NumDefs; ++i) {
-      EVT VT = PN->getValueType(i);
+      MVT VT = PN->getSimpleValueType(i);
       if (!PN->hasAnyUseOfValue(i))
         continue;
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
@@ -2133,7 +2170,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
   if (SU->NumSuccs && N->isMachineOpcode()) {
     unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-      EVT VT = N->getValueType(i);
+      MVT VT = N->getSimpleValueType(i);
       if (VT == MVT::Glue || VT == MVT::Other)
         continue;
       if (!N->hasAnyUseOfValue(i))
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index a197fcbfa593..b22440daf16d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -13,26 +13,26 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "SDNodeDbgValue.h"
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 STATISTIC(LoadsClustered, "Number of loads clustered together");
@@ -562,7 +562,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
     for (;DefIdx < NodeNumDefs; ++DefIdx) {
       if (!Node->hasAnyUseOfValue(DefIdx))
         continue;
-      ValueType = Node->getValueType(DefIdx);
+      ValueType = Node->getSimpleValueType(DefIdx);
       ++DefIdx;
       return; // Found a normal regdef.
     }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 907356fd212c..2ff37e0a15e1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -15,8 +15,8 @@
 #ifndef SCHEDULEDAGSDNODES_H
 #define SCHEDULEDAGSDNODES_H
 
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 
 namespace llvm {
   /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
@@ -135,13 +135,13 @@ namespace llvm {
       const SDNode *Node;
       unsigned DefIdx;
       unsigned NodeNumDefs;
-      EVT ValueType;
+      MVT ValueType;
     public:
       RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
 
       bool IsValid() const { return Node != NULL; }
 
-      EVT GetValue() const {
+      MVT GetValue() const {
         assert(IsValid() && "bad iterator");
         return ValueType;
       }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 30f03ac737b9..58aa1fe0ebbe 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -19,19 +19,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
@@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
     llvm_unreachable(0);
   }
 #endif
+  assert(!D.isWeak() && "unexpected artificial DAG edge");
+
   --SuccSU->NumPredsLeft;
 
   SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f000ce38d367..64244313a326 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12,42 +12,43 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "SDNodeOrdering.h"
 #include "SDNodeDbgValue.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
+#include "SDNodeOrdering.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -59,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
   return Res;
 }
 
-static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unknown FP format");
-  case MVT::f16:     return &APFloat::IEEEhalf;
-  case MVT::f32:     return &APFloat::IEEEsingle;
-  case MVT::f64:     return &APFloat::IEEEdouble;
-  case MVT::f80:     return &APFloat::x87DoubleExtended;
-  case MVT::f128:    return &APFloat::IEEEquad;
-  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
-  }
-}
-
 // Default null implementations of the callbacks.
 void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
 void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
@@ -94,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
   // convert modifies in place, so make a copy.
   APFloat Val2 = APFloat(Val);
   bool losesInfo;
-  (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+  (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+                      APFloat::rmNearestTiesToEven,
                       &losesInfo);
   return !losesInfo;
 }
@@ -884,15 +874,17 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 // EntryNode could meaningfully have debug info if we can find it...
 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
   : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
-    OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(),
+                                    getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
   DbgInfo = new SDDbgInfo();
 }
 
-void SelectionDAG::init(MachineFunction &mf) {
+void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) {
   MF = &mf;
+  TTI = tti;
   Context = &mf.getFunction()->getContext();
 }
 
@@ -1074,10 +1066,11 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
     return getConstantFP(APFloat((float)Val), VT, isTarget);
   else if (EltVT==MVT::f64)
     return getConstantFP(APFloat(Val), VT, isTarget);
-  else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
+           EltVT==MVT::f16) {
     bool ignored;
     APFloat apf = APFloat(Val);
-    apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+    apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
                 &ignored);
     return getConstantFP(apf, VT, isTarget);
   } else
@@ -1525,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
   EVT OpTy = Op.getValueType();
-  MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+  EVT ShTy = TLI.getShiftAmountTy(LHSTy);
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
   ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1924,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
-    if (ISD::isZEXTLoad(Op.getNode())) {
+    // If this is a ZEXTLoad and we are looking at the loaded value.
+    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
@@ -2294,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     break;
   }
 
-  // Handle LOADX separately here. EXTLOAD case will fallthrough.
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
-    unsigned ExtType = LD->getExtensionType();
-    switch (ExtType) {
-    default: break;
-    case ISD::SEXTLOAD:    // '17' bits known
-      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
-      return VTBits-Tmp+1;
-    case ISD::ZEXTLOAD:    // '16' bits known
-      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
-      return VTBits-Tmp;
+  // If we are looking at the loaded value of the SDNode.
+  if (Op.getResNo() == 0) {
+    // Handle LOADX separately here. EXTLOAD case will fallthrough.
+    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+      unsigned ExtType = LD->getExtensionType();
+      switch (ExtType) {
+        default: break;
+        case ISD::SEXTLOAD:    // '17' bits known
+          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          return VTBits-Tmp+1;
+        case ISD::ZEXTLOAD:    // '16' bits known
+          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          return VTBits-Tmp;
+      }
     }
   }
 
@@ -2438,7 +2435,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
-      APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+      APFloat apf(EVTToAPFloatSemantics(VT),
+                  APInt::getNullValue(VT.getSizeInBits()));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
@@ -2446,9 +2444,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     }
     case ISD::BITCAST:
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(APFloat(Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(APFloat(Val), VT);
+        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
       break;
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), VT);
@@ -2495,7 +2493,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       bool ignored;
       // This can return overflow, underflow, or inexact; we don't care.
       // FIXME need to be more flexible about rounding mode.
-      (void)V.convert(*EVTToAPFloatSemantics(VT),
+      (void)V.convert(EVTToAPFloatSemantics(VT),
                       APFloat::rmNearestTiesToEven, &ignored);
       return getConstantFP(V, VT);
     }
@@ -2686,44 +2684,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
-                                             EVT VT,
-                                             ConstantSDNode *Cst1,
-                                             ConstantSDNode *Cst2) {
-  const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
+                                             SDNode *Cst1, SDNode *Cst2) {
+  SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
+  SmallVector<SDValue, 4> Outputs;
+  EVT SVT = VT.getScalarType();
 
-  switch (Opcode) {
-  case ISD::ADD:  return getConstant(C1 + C2, VT);
-  case ISD::SUB:  return getConstant(C1 - C2, VT);
-  case ISD::MUL:  return getConstant(C1 * C2, VT);
-  case ISD::UDIV:
-    if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
-    break;
-  case ISD::UREM:
-    if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
-    break;
-  case ISD::SDIV:
-    if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
-    break;
-  case ISD::SREM:
-    if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
-    break;
-  case ISD::AND:  return getConstant(C1 & C2, VT);
-  case ISD::OR:   return getConstant(C1 | C2, VT);
-  case ISD::XOR:  return getConstant(C1 ^ C2, VT);
-  case ISD::SHL:  return getConstant(C1 << C2, VT);
-  case ISD::SRL:  return getConstant(C1.lshr(C2), VT);
-  case ISD::SRA:  return getConstant(C1.ashr(C2), VT);
-  case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
-  case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
-  default: break;
+  ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
+  ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
+  if (Scalar1 && Scalar2) {
+    // Scalar instruction.
+    Inputs.push_back(std::make_pair(Scalar1, Scalar2));
+  } else {
+    // For vectors extract each constant element into Inputs so we can constant
+    // fold them individually.
+    BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+    BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+    if (!BV1 || !BV2)
+      return SDValue();
+
+    assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+    for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+      ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+      ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+      if (!V1 || !V2) // Not a constant, bail.
+        return SDValue();
+
+      // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+      // FIXME: This is valid and could be handled by truncating the APInts.
+      if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+        return SDValue();
+
+      Inputs.push_back(std::make_pair(V1, V2));
+    }
   }
 
-  return SDValue();
+  // We have a number of constant values, constant fold them element by element.
+  for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
+    const APInt &C1 = Inputs[I].first->getAPIntValue();
+    const APInt &C2 = Inputs[I].second->getAPIntValue();
+
+    switch (Opcode) {
+    case ISD::ADD:
+      Outputs.push_back(getConstant(C1 + C2, SVT));
+      break;
+    case ISD::SUB:
+      Outputs.push_back(getConstant(C1 - C2, SVT));
+      break;
+    case ISD::MUL:
+      Outputs.push_back(getConstant(C1 * C2, SVT));
+      break;
+    case ISD::UDIV:
+      if (!C2.getBoolValue())
+        return SDValue();
+      Outputs.push_back(getConstant(C1.udiv(C2), SVT));
+      break;
+    case ISD::UREM:
+      if (!C2.getBoolValue())
+        return SDValue();
+      Outputs.push_back(getConstant(C1.urem(C2), SVT));
+      break;
+    case ISD::SDIV:
+      if (!C2.getBoolValue())
+        return SDValue();
+      Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
+      break;
+    case ISD::SREM:
+      if (!C2.getBoolValue())
+        return SDValue();
+      Outputs.push_back(getConstant(C1.srem(C2), SVT));
+      break;
+    case ISD::AND:
+      Outputs.push_back(getConstant(C1 & C2, SVT));
+      break;
+    case ISD::OR:
+      Outputs.push_back(getConstant(C1 | C2, SVT));
+      break;
+    case ISD::XOR:
+      Outputs.push_back(getConstant(C1 ^ C2, SVT));
+      break;
+    case ISD::SHL:
+      Outputs.push_back(getConstant(C1 << C2, SVT));
+      break;
+    case ISD::SRL:
+      Outputs.push_back(getConstant(C1.lshr(C2), SVT));
+      break;
+    case ISD::SRA:
+      Outputs.push_back(getConstant(C1.ashr(C2), SVT));
+      break;
+    case ISD::ROTL:
+      Outputs.push_back(getConstant(C1.rotl(C2), SVT));
+      break;
+    case ISD::ROTR:
+      Outputs.push_back(getConstant(C1.rotr(C2), SVT));
+      break;
+    default:
+      return SDValue();
+    }
+  }
+
+  // Handle the scalar case first.
+  if (Outputs.size() == 1)
+    return Outputs.back();
+
+  // Otherwise build a big vector out of the scalar elements we generated.
+  return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
+                 Outputs.size());
 }
 
-SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
-                              SDValue N1, SDValue N2) {
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1,
+                              SDValue N2) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   switch (Opcode) {
@@ -2845,6 +2916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
            "Shift operators return type must be the same as their first arg");
     assert(VT.isInteger() && N2.getValueType().isInteger() &&
            "Shifts only work on integers");
+    assert((!VT.isVector() || VT == N2.getValueType()) &&
+           "Vector shift amounts must be in the same as their first arg");
     // Verify that the shift amount VT is bit enough to hold valid shift
     // amounts.  This catches things like trying to shift an i1024 value by an
     // i8, which is easy to fall into in generic code that uses
@@ -3019,16 +3092,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   }
   }
 
-  if (N1C) {
-    if (N2C) {
-      SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
-      if (SV.getNode()) return SV;
-    } else {      // Cannonicalize constant to RHS if commutative
-      if (isCommutativeBinOp(Opcode)) {
-        std::swap(N1C, N2C);
-        std::swap(N1, N2);
-      }
-    }
+  // Perform trivial constant folding.
+  SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode());
+  if (SV.getNode()) return SV;
+
+  // Canonicalize constant to RHS if commutative.
+  if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
+    std::swap(N1C, N2C);
+    std::swap(N1, N2);
   }
 
   // Constant fold FP operations.
@@ -3036,7 +3107,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
   if (N1CFP) {
     if (!N2CFP && isCommutativeBinOp(Opcode)) {
-      // Cannonicalize constant to RHS if commutative
+      // Canonicalize constant to RHS if commutative.
       std::swap(N1CFP, N2CFP);
       std::swap(N1, N2);
     } else if (N2CFP) {
@@ -3080,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       bool ignored;
       // This can return overflow, underflow, or inexact; we don't care.
       // FIXME need to be more flexible about rounding mode.
-      (void)V.convert(*EVTToAPFloatSemantics(VT),
+      (void)V.convert(EVTToAPFloatSemantics(VT),
                       APFloat::rmNearestTiesToEven, &ignored);
       return getConstantFP(V, VT);
     }
@@ -3312,17 +3383,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
                  &ArgChains[0], ArgChains.size());
 }
 
-/// SplatByte - Distribute ByteVal over NumBits bits.
-static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
-  APInt Val = APInt(NumBits, ByteVal);
-  unsigned Shift = 8;
-  for (unsigned i = NumBits; i > 8; i >>= 1) {
-    Val = (Val << Shift) | Val;
-    Shift <<= 1;
-  }
-  return Val;
-}
-
 /// getMemsetValue - Vectorized representation of the memset value
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3331,17 +3391,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
 
   unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
-    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+    assert(C->getAPIntValue().getBitWidth() == 8);
+    APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
     if (VT.isInteger())
       return DAG.getConstant(Val, VT);
-    return DAG.getConstantFP(APFloat(Val), VT);
+    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
   }
 
   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
   if (NumBits > 8) {
     // Use a multiplication with 0x010101... to extend the input to the
     // required length.
-    APInt Magic = SplatByte(NumBits, 0x01);
+    APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
     Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
   }
 
@@ -3370,10 +3431,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
-  unsigned NumVTBytes = VT.getSizeInBits() / 8;
+  unsigned NumVTBits = VT.getSizeInBits();
+  unsigned NumVTBytes = NumVTBits / 8;
   unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
 
-  uint64_t Val = 0;
+  APInt Val(NumVTBits, 0);
   if (TLI.isLittleEndian()) {
     for (unsigned i = 0; i != NumBytes; ++i)
       Val |= (uint64_t)(unsigned char)Str[i] << i*8;
@@ -3382,7 +3444,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
       Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
 
-  return DAG.getConstant(Val, VT);
+  // If the "cost" of materializing the integer immediate is 1 or free, then
+  // it is cost effective to turn the load into the immediate.
+  const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
+  if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+    return DAG.getConstant(Val, VT);
+  return SDValue(0, 0);
 }
 
 /// getMemBasePlusOffset - Returns base and offset node for the
@@ -3420,8 +3487,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
-                                     bool IsZeroVal,
+                                     bool IsMemset,
+                                     bool ZeroMemset,
                                      bool MemcpyStrSrc,
+                                     bool AllowOverlap,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3434,7 +3503,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
   // not need to be loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   IsZeroVal, MemcpyStrSrc,
+                                   IsMemset, ZeroMemset, MemcpyStrSrc,
                                    DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
@@ -3464,21 +3533,51 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
     unsigned VTSize = VT.getSizeInBits() / 8;
     while (VTSize > Size) {
       // For now, only use non-vector load / store's for the left-over pieces.
+      EVT NewVT = VT;
+      unsigned NewVTSize;
+
+      bool Found = false;
       if (VT.isVector() || VT.isFloatingPoint()) {
-        VT = MVT::i64;
-        while (!TLI.isTypeLegal(VT))
-          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        VTSize = VT.getSizeInBits() / 8;
-      } else {
-        // This can result in a type that is not legal on the target, e.g.
-        // 1 or 2 bytes on PPC.
-        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        VTSize >>= 1;
+        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+        if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+            TLI.isSafeMemOpType(NewVT.getSimpleVT()))
+          Found = true;
+        else if (NewVT == MVT::i64 &&
+                 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+                 TLI.isSafeMemOpType(MVT::f64)) {
+          // i64 is usually not legal on 32-bit targets, but f64 may be.
+          NewVT = MVT::f64;
+          Found = true;
+        }
+      }
+
+      if (!Found) {
+        do {
+          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+          if (NewVT == MVT::i8)
+            break;
+        } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
+      }
+      NewVTSize = NewVT.getSizeInBits() / 8;
+
+      // If the new VT cannot cover all of the remaining bits, then consider
+      // issuing a (or a pair of) unaligned and overlapping load / store.
+      // FIXME: Only does this for 64-bit or more since we don't have proper
+      // cost model for unaligned load / store.
+      bool Fast;
+      if (NumMemOps && AllowOverlap &&
+          VTSize >= 8 && NewVTSize < Size &&
+          TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+        VTSize = Size;
+      else {
+        VT = NewVT;
+        VTSize = NewVTSize;
       }
     }
 
     if (++NumMemOps > Limit)
       return false;
+
     MemOps.push_back(VT);
     Size -= VTSize;
   }
@@ -3507,8 +3606,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   bool OptSize =
-    MF.getFunction()->getFnAttributes().
-      hasAttribute(Attributes::OptimizeForSize);
+    MF.getFunction()->getAttributes().
+      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3523,12 +3622,21 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
                                 (isZeroStr ? 0 : SrcAlign),
-                                true, CopyFromStr, DAG, TLI))
+                                false, false, CopyFromStr, true, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
     unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+
+    // Don't promote to an alignment that would require dynamic stack
+    // realignment.  
+    const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+    if (!TRI->needsStackRealignment(MF))
+       while (NewAlign > Align &&
+             TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+          NewAlign /= 2;
+
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3545,6 +3653,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
+    if (VTSize > Size) {
+      // Issuing an unaligned load / store pair  that overlaps with the previous
+      // pair. Adjust the offset accordingly.
+      assert(i == NumMemOps-1 && i != 0);
+      SrcOff -= VTSize - Size;
+      DstOff -= VTSize - Size;
+    }
+
     if (CopyFromStr &&
         (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
       // It's unlikely a store of a vector immediate can be done in a single
@@ -3553,11 +3669,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
-      Store = DAG.getStore(Chain, dl, Value,
-                           getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstPtrInfo.getWithOffset(DstOff), isVol,
-                           false, Align);
-    } else {
+      if (Value.getNode())
+        Store = DAG.getStore(Chain, dl, Value,
+                             getMemBasePlusOffset(Dst, DstOff, DAG),
+                             DstPtrInfo.getWithOffset(DstOff), isVol,
+                             false, Align);
+    }
+
+    if (!Store.getNode()) {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
       // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
@@ -3577,6 +3696,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     OutChains.push_back(Store);
     SrcOff += VTSize;
     DstOff += VTSize;
+    Size -= VTSize;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3601,8 +3721,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getFnAttributes().
-    hasAttribute(Attributes::OptimizeForSize);
+  bool OptSize = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3612,8 +3732,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
-                                (DstAlignCanChange ? 0 : Align),
-                                SrcAlign, true, false, DAG, TLI))
+                                (DstAlignCanChange ? 0 : Align), SrcAlign,
+                                false, false, false, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3680,8 +3800,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getFnAttributes().
-    hasAttribute(Attributes::OptimizeForSize);
+  bool OptSize = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3689,7 +3809,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                IsZeroVal, false, DAG, TLI))
+                                true, IsZeroVal, false, true, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3716,6 +3836,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    if (VTSize > Size) {
+      // Issuing an unaligned load / store pair  that overlaps with the previous
+      // pair. Adjust the offset accordingly.
+      assert(i == NumMemOps-1 && i != 0);
+      DstOff -= VTSize - Size;
+    }
 
     // If this store is smaller than the largest store see whether we can get
     // the smaller value for free with a truncate.
@@ -3734,6 +3861,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                                  isVol, false, Align);
     OutChains.push_back(Store);
     DstOff += VT.getSizeInBits() / 8;
+    Size -= VTSize;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3745,6 +3873,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 unsigned Align, bool isVol, bool AlwaysInline,
                                 MachinePointerInfo DstPtrInfo,
                                 MachinePointerInfo SrcPtrInfo) {
+  assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
   // Check to see if we should lower the memcpy to loads and stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3812,6 +3941,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
                                  unsigned Align, bool isVol,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo) {
+  assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
   // Check to see if we should lower the memmove to loads and stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3866,6 +3996,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol,
                                 MachinePointerInfo DstPtrInfo) {
+  assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
   // Check to see if we should lower the memset to stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -4577,7 +4708,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
-                              const std::vector<EVT> &ResultTys,
+                              ArrayRef<EVT> ResultTys,
                               const SDValue *Ops, unsigned NumOps) {
   return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
                  Ops, NumOps);
@@ -5229,7 +5360,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
-                             const std::vector<EVT> &ResultTys,
+                             ArrayRef<EVT> ResultTys,
                              const SDValue *Ops, unsigned NumOps) {
   SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
   return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3fbf7c2fe66b..ce40cd6a0c9c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,51 +12,51 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64;
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT, const Value *V);
+                                      MVT PartVT, EVT ValueVT, const Value *V);
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@@ -98,7 +98,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 /// (ISD::AssertSext).
 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                 const SDValue *Parts,
-                                unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                unsigned NumParts, MVT PartVT, EVT ValueVT,
                                 const Value *V,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   if (ValueVT.isVector())
@@ -161,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
       }
     } else if (PartVT.isFloatingPoint()) {
       // FP split into multiple FP parts (for ppcf128)
-      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
              "Unexpected split");
       SDValue Lo, Hi;
       Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
@@ -179,25 +179,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
-  PartVT = Val.getValueType();
+  EVT PartEVT = Val.getValueType();
 
-  if (PartVT == ValueVT)
+  if (PartEVT == ValueVT)
     return Val;
 
-  if (PartVT.isInteger() && ValueVT.isInteger()) {
-    if (ValueVT.bitsLT(PartVT)) {
+  if (PartEVT.isInteger() && ValueVT.isInteger()) {
+    if (ValueVT.bitsLT(PartEVT)) {
       // For a truncate, see if we have any information to
       // indicate whether the truncated bits will always be
       // zero or sign-extension.
       if (AssertOp != ISD::DELETED_NODE)
-        Val = DAG.getNode(AssertOp, DL, PartVT, Val,
+        Val = DAG.getNode(AssertOp, DL, PartEVT, Val,
                           DAG.getValueType(ValueVT));
       return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
     }
     return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
   }
 
-  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
@@ -206,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
 
-  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
@@ -219,7 +219,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
 /// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT, const Value *V) {
+                                      MVT PartVT, EVT ValueVT, const Value *V) {
   assert(ValueVT.isVector() && "Not a vector value");
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -227,7 +227,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 
   // Handle a multi-element vector.
   if (NumParts > 1) {
-    EVT IntermediateVT, RegisterVT;
+    EVT IntermediateVT;
+    MVT RegisterVT;
     unsigned NumIntermediates;
     unsigned NumRegs =
     TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
@@ -235,7 +236,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
     NumParts = NumRegs; // Silence a compiler warning.
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-    assert(RegisterVT == Parts[0].getValueType() &&
+    assert(RegisterVT == Parts[0].getSimpleValueType() &&
            "Part type doesn't match part!");
 
     // Assemble the parts into intermediate operands.
@@ -265,31 +266,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
-  PartVT = Val.getValueType();
+  EVT PartEVT = Val.getValueType();
 
-  if (PartVT == ValueVT)
+  if (PartEVT == ValueVT)
     return Val;
 
-  if (PartVT.isVector()) {
+  if (PartEVT.isVector()) {
     // If the element type of the source/dest vectors are the same, but the
     // parts vector has more elements than the value vector, then we have a
     // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
     // elements we want.
-    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
-      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+    if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+      assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
                          DAG.getIntPtrConstant(0));
     }
 
     // Vector/Vector bitcast.
-    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+    if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+    assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
       "Cannot handle this kind of promotion");
     // Promoted vector extract
-    bool Smaller = ValueVT.bitsLE(PartVT);
+    bool Smaller = ValueVT.bitsLE(PartEVT);
     return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
                        DL, ValueVT, Val);
 
@@ -297,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 
   // Trivial bitcast if the types are the same size and the destination
   // vector type is legal.
-  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
@@ -317,8 +318,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   }
 
   if (ValueVT.getVectorNumElements() == 1 &&
-      ValueVT.getVectorElementType() != PartVT) {
-    bool Smaller = ValueVT.bitsLE(PartVT);
+      ValueVT.getVectorElementType() != PartEVT) {
+    bool Smaller = ValueVT.bitsLE(PartEVT);
     Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
                        DL, ValueVT.getScalarType(), Val);
   }
@@ -328,14 +329,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT, const Value *V);
+                                 MVT PartVT, const Value *V);
 
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
 static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                            SDValue Val, SDValue *Parts, unsigned NumParts,
-                           EVT PartVT, const Value *V,
+                           MVT PartVT, const Value *V,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   EVT ValueVT = Val.getValueType();
 
@@ -352,7 +353,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     return;
 
   assert(!ValueVT.isVector() && "Vector case handled elsewhere");
-  if (PartVT == ValueVT) {
+  EVT PartEVT = PartVT;
+  if (PartEVT == ValueVT) {
     assert(NumParts == 1 && "No-op copy with multiple parts!");
     Parts[0] = Val;
     return;
@@ -374,7 +376,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
-    assert(NumParts == 1 && PartVT != ValueVT);
+    assert(NumParts == 1 && PartEVT != ValueVT);
     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
@@ -393,7 +395,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
          "Failed to tile the value with PartVT!");
 
   if (NumParts == 1) {
-    if (PartVT != ValueVT) {
+    if (PartEVT != ValueVT) {
       LLVMContext &Ctx = *DAG.getContext();
       Twine ErrMsg("scalar-to-vector conversion failed");
       if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
@@ -466,20 +468,21 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT, const Value *V) {
+                                 MVT PartVT, const Value *V) {
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   if (NumParts == 1) {
-    if (PartVT == ValueVT) {
+    EVT PartEVT = PartVT;
+    if (PartEVT == ValueVT) {
       // Nothing to do.
     } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
       // Bitconvert vector->vector case.
       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
-               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+               PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+               PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
       EVT ElementVT = PartVT.getVectorElementType();
       // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
       // undef elements.
@@ -499,12 +502,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       //SDValue UndefElts = DAG.getUNDEF(VectorTy);
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType().bitsGE(
+               PartEVT.getVectorElementType().bitsGE(
                  ValueVT.getVectorElementType()) &&
-               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+               PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
 
       // Promoted vector extract
-      bool Smaller = PartVT.bitsLE(ValueVT);
+      bool Smaller = PartEVT.bitsLE(ValueVT);
       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
                         DL, PartVT, Val);
     } else{
@@ -524,7 +527,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
   }
 
   // Handle a multi-element vector.
-  EVT IntermediateVT, RegisterVT;
+  EVT IntermediateVT;
+  MVT RegisterVT;
   unsigned NumIntermediates;
   unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
                                                 IntermediateVT,
@@ -589,7 +593,7 @@ namespace {
     /// getRegisterType member function, however when with physical registers
     /// it is necessary to have a separate record of the types.
     ///
-    SmallVector<EVT, 4> RegVTs;
+    SmallVector<MVT, 4> RegVTs;
 
     /// Regs - This list holds the registers assigned to the values.
     /// Each legal or promoted value requires one register, and each
@@ -600,7 +604,7 @@ namespace {
     RegsForValue() {}
 
     RegsForValue(const SmallVector<unsigned, 4> &regs,
-                 EVT regvt, EVT valuevt)
+                 MVT regvt, EVT valuevt)
       : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 
     RegsForValue(LLVMContext &Context, const TargetLowering &tli,
@@ -610,7 +614,7 @@ namespace {
       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
         EVT ValueVT = ValueVTs[Value];
         unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
-        EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+        MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
         for (unsigned i = 0; i != NumRegs; ++i)
           Regs.push_back(Reg + i);
         RegVTs.push_back(RegisterVT);
@@ -621,7 +625,7 @@ namespace {
     /// areValueTypesLegal - Return true if types of all the values are legal.
     bool areValueTypesLegal(const TargetLowering &TLI) {
       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        EVT RegisterVT = RegVTs[Value];
+        MVT RegisterVT = RegVTs[Value];
         if (!TLI.isTypeLegal(RegisterVT))
           return false;
       }
@@ -683,7 +687,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
     // Copy the legal parts from the registers.
     EVT ValueVT = ValueVTs[Value];
     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    EVT RegisterVT = RegVTs[Value];
+    MVT RegisterVT = RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -768,10 +772,12 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     EVT ValueVT = ValueVTs[Value];
     unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    EVT RegisterVT = RegVTs[Value];
+    MVT RegisterVT = RegVTs[Value];
+    ISD::NodeType ExtendKind =
+      TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
 
     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
-                   &Parts[Part], NumParts, RegisterVT, V);
+                   &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
     Part += NumParts;
   }
 
@@ -834,7 +840,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
 
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
-    EVT RegisterVT = RegVTs[Value];
+    MVT RegisterVT = RegVTs[Value];
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
@@ -967,7 +973,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
     // Build the switch statement using the Instruction.def file.
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
     case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
-#include "llvm/Instruction.def"
+#include "llvm/IR/Instruction.def"
   }
 
   // Assign the ordering to the freshly created DAG nodes.
@@ -1227,16 +1233,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
         const Function *F = I.getParent()->getParent();
-        if (F->getRetAttributes().hasAttribute(Attributes::SExt))
+        if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                            Attribute::SExt))
           ExtendKind = ISD::SIGN_EXTEND;
-        else if (F->getRetAttributes().hasAttribute(Attributes::ZExt))
+        else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                                 Attribute::ZExt))
           ExtendKind = ISD::ZERO_EXTEND;
 
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
-          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
+          VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind);
 
         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
-        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurDebugLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
@@ -1244,7 +1252,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
         // 'inreg' on function refers to return value
         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-        if (F->getRetAttributes().hasAttribute(Attributes::InReg))
+        if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                            Attribute::InReg))
           Flags.setInReg();
 
         // Propagate extension type if any
@@ -1758,8 +1767,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
     Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
   }
 
-  B.RegVT = VT;
-  B.Reg = FuncInfo.CreateReg(VT);
+  B.RegVT = VT.getSimpleVT();
+  B.Reg = FuncInfo.CreateReg(B.RegVT);
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
                                     B.Reg, Sub);
 
@@ -1793,7 +1802,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
-  EVT VT = BB.RegVT;
+  MVT VT = BB.RegVT;
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                        Reg, VT);
   SDValue Cmp;
@@ -2645,7 +2654,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
 
-  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+  EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
 
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -3137,12 +3146,12 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
        OI != E; ++OI) {
     const Value *Idx = *OI;
     if (StructType *StTy = dyn_cast<StructType>(Ty)) {
-      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
       if (Field) {
         // N = N + Offset
         uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
-                        DAG.getIntPtrConstant(Offset));
+                        DAG.getConstant(Offset, N.getValueType()));
       }
 
       Ty = StTy->getElementType(Field);
@@ -3187,7 +3196,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
                              N.getValueType(), IdxN,
                              DAG.getConstant(Amt, IdxN.getValueType()));
         } else {
-          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+          SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
                              N.getValueType(), IdxN, Scale);
         }
@@ -3510,7 +3519,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
 
   EVT VT = TLI.getValueType(I.getType());
 
-  if (I.getAlignment() * 8 < VT.getSizeInBits())
+  if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic load");
 
   SDValue L =
@@ -3540,7 +3549,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 
   EVT VT = TLI.getValueType(I.getValueOperand()->getType());
 
-  if (I.getAlignment() * 8 < VT.getSizeInBits())
+  if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic store");
 
   if (TLI.getInsertFencesForAtomic())
@@ -3654,7 +3663,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
 ///
 ///   Op = (Op & 0x007fffff) | 0x3f800000;
 ///
-/// where Op is the hexidecimal representation of floating point value.
+/// where Op is the hexadecimal representation of floating point value.
 static SDValue
 GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
@@ -3668,7 +3677,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
 ///
 ///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
 ///
-/// where Op is the hexidecimal representation of floating point value.
+/// where Op is the hexadecimal representation of floating point value.
 static SDValue
 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
             DebugLoc dl) {
@@ -3684,19 +3693,16 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
 /// getF32Constant - Get 32-bit floating point constant.
 static SDValue
 getF32Constant(SelectionDAG &DAG, unsigned Flt) {
-  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)),
+                           MVT::f32);
 }
 
-/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitExp(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                         const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
 
     // Put the exponent in the right bit position for later addition to the
     // final result:
@@ -3715,6 +3721,7 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
+    SDValue TwoToFracPartOfX;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3728,16 +3735,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f3c50c8));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                               TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                     getF32Constant(DAG, 0x3f7f5e7e));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   TwoToFractionalPartOfX =
@@ -3754,16 +3754,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f324b07));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                               TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                     getF32Constant(DAG, 0x3f7ff8fd));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   TwoToFractionalPartOfX =
@@ -3792,37 +3785,27 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
                                 getF32Constant(DAG, 0x3f317234));
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
-                                             MVT::i32, t13);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
+      TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                     getF32Constant(DAG, 0x3f800000));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FEXP, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    // Add the exponent into the result in integer domain.
+    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   t13, IntegerPartOfX));
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
 }
 
-/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                         const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log(2) [0.69314718f].
@@ -3834,6 +3817,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl);
 
+    SDValue LogOfMantissa;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3847,12 +3831,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
                                getF32Constant(DAG, 0x3fb3a2b1));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                          getF32Constant(DAG, 0x3f949a29));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                  getF32Constant(DAG, 0x3f949a29));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   LogOfMantissa =
@@ -3873,12 +3854,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x40348e95));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                          getF32Constant(DAG, 0x3fdef31a));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                  getF32Constant(DAG, 0x3fdef31a));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   LogOfMantissa =
@@ -3907,32 +3885,23 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
                                getF32Constant(DAG, 0x408797cb));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                          getF32Constant(DAG, 0x4006dcab));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
+      LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                  getF32Constant(DAG, 0x4006dcab));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
 }
 
-/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog2(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Get the exponent.
@@ -3944,6 +3913,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
 
     // Different possible minimax approximations of significand in
     // floating-point for various degrees of accuracy over [1,2].
+    SDValue Log2ofMantissa;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -3955,12 +3925,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
                                getF32Constant(DAG, 0x40019463));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                           getF32Constant(DAG, 0x3fd6633d));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                   getF32Constant(DAG, 0x3fd6633d));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   Log2ofMantissa =
@@ -3981,12 +3948,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x40823e2f));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x4020d29c));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                   getF32Constant(DAG, 0x4020d29c));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   Log2ofMantissa =
@@ -4016,32 +3980,23 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
                                getF32Constant(DAG, 0x40c39dad));
       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                           getF32Constant(DAG, 0x4042902c));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
+      Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                   getF32Constant(DAG, 0x4042902c));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG2, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
 }
 
-/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog10(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                           const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log10(2) [0.30102999f].
@@ -4053,6 +4008,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
     // exponent of 1.
     SDValue X = GetSignificand(DAG, Op1, dl);
 
+    SDValue Log10ofMantissa;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -4066,12 +4022,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
                                getF32Constant(DAG, 0x3f1c0789));
       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                            getF32Constant(DAG, 0x3f011300));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                    getF32Constant(DAG, 0x3f011300));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   Log10ofMantissa =
@@ -4088,12 +4041,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f6ae232));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                                            getF32Constant(DAG, 0x3f25f7c3));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                                    getF32Constant(DAG, 0x3f25f7c3));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   Log10ofMantissa =
@@ -4118,33 +4068,23 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3fc4316c));
       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
-                                            getF32Constant(DAG, 0x3f57ce70));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
+      Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+                                    getF32Constant(DAG, 0x3f57ce70));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG10, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
 }
 
-/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
 /// limited-precision mode.
-void
-SelectionDAGBuilder::visitExp2(const CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(0));
-
     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
 
     //   FractionalPartOfX = x - (float)IntegerPartOfX;
@@ -4155,6 +4095,7 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
+    SDValue TwoToFractionalPartOfX;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -4168,15 +4109,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f3c50c8));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                           getF32Constant(DAG, 0x3f7f5e7e));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   TwoToFractionalPartOfX =
@@ -4193,15 +4128,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f324b07));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x3f7ff8fd));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   TwoToFractionalPartOfX =
@@ -4229,54 +4158,42 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
                                 getF32Constant(DAG, 0x3f317234));
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                           getF32Constant(DAG, 0x3f800000));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FEXP2, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)));
+
+    // Add the exponent into the result in integer domain.
+    SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+                              TwoToFractionalPartOfX);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   t13, IntegerPartOfX));
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
 }
 
 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
 /// limited-precision mode with x == 10.0f.
-void
-SelectionDAGBuilder::visitPow(const CallInst &I) {
-  SDValue result;
-  const Value *Val = I.getArgOperand(0);
-  DebugLoc dl = getCurDebugLoc();
+static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS,
+                         SelectionDAG &DAG, const TargetLowering &TLI) {
   bool IsExp10 = false;
-
-  if (getValue(Val).getValueType() == MVT::f32 &&
-      getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
+  if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-        APFloat Ten(10.0f);
-        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
-      }
+    if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+      APFloat Ten(10.0f);
+      IsExp10 = LHSC->isExactlyValue(Ten);
     }
   }
 
-  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getArgOperand(1));
-
+  if (IsExp10) {
     // Put the exponent in the right bit position for later addition to the
     // final result:
     //
     //   #define LOG2OF10 3.3219281f
     //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
-    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
                              getF32Constant(DAG, 0x40549a78));
     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
 
@@ -4288,6 +4205,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
                                  DAG.getConstant(23, TLI.getPointerTy()));
 
+    SDValue TwoToFractionalPartOfX;
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
       //
@@ -4301,15 +4219,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
                                getF32Constant(DAG, 0x3f3c50c8));
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                                           getF32Constant(DAG, 0x3f7f5e7e));
+    } else if (LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
       //   TwoToFractionalPartOfX =
@@ -4326,15 +4238,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f324b07));
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x3f7ff8fd));
+    } else { // LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
       //   TwoToFractionalPartOfX =
@@ -4362,24 +4268,18 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
                                 getF32Constant(DAG, 0x3f317234));
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BITCAST, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
+      TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                           getF32Constant(DAG, 0x3f800000));
     }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FPOW, dl,
-                         getValue(I.getArgOperand(0)).getValueType(),
-                         getValue(I.getArgOperand(0)),
-                         getValue(I.getArgOperand(1)));
+
+    SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   t13, IntegerPartOfX));
   }
 
-  setValue(&I, result);
+  // No special expansion.
+  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
 }
 
 
@@ -4400,7 +4300,8 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
       return DAG.getConstantFP(1.0, LHS.getValueType());
 
     const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) ||
+    if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                         Attribute::OptimizeForSize) ||
         // If optimizing for size, don't insert too many multiplies.  This
         // inserts up to 5 multiplies.
         CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -4566,6 +4467,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    if (!Align)
+      Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
                               MachinePointerInfo(I.getArgOperand(0)),
@@ -4582,6 +4485,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    if (!Align)
+      Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                               MachinePointerInfo(I.getArgOperand(0))));
@@ -4599,6 +4504,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    if (!Align)
+      Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
                                MachinePointerInfo(I.getArgOperand(0)),
@@ -4873,7 +4780,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
     // to be zero.
     // We must do this early because v2i32 is not a legal type.
-    DebugLoc dl = getCurDebugLoc();
     SDValue ShOps[2];
     ShOps[0] = ShAmt;
     ShOps[1] = DAG.getConstant(0, MVT::i32);
@@ -4890,7 +4796,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::x86_avx_vinsertf128_ps_256:
   case Intrinsic::x86_avx_vinsertf128_si_256:
   case Intrinsic::x86_avx2_vinserti128: {
-    DebugLoc dl = getCurDebugLoc();
     EVT DestVT = TLI.getValueType(I.getType());
     EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
@@ -4906,7 +4811,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::x86_avx_vextractf128_ps_256:
   case Intrinsic::x86_avx_vextractf128_si_256:
   case Intrinsic::x86_avx2_vextracti128: {
-    DebugLoc dl = getCurDebugLoc();
     EVT DestVT = TLI.getValueType(I.getType());
     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
                    DestVT.getVectorNumElements();
@@ -4940,7 +4844,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     }
     EVT DestVT = TLI.getValueType(I.getType());
     const Value *Op1 = I.getArgOperand(0);
-    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+    Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1),
                                DAG.getValueType(DestVT),
                                DAG.getValueType(getValue(Op1).getValueType()),
                                getValue(I.getArgOperand(1)),
@@ -4949,53 +4853,57 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
-  case Intrinsic::sqrt:
-    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
   case Intrinsic::powi:
     setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
                             getValue(I.getArgOperand(1)), DAG));
     return 0;
-  case Intrinsic::sin:
-    setValue(&I, DAG.getNode(ISD::FSIN, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
-  case Intrinsic::cos:
-    setValue(&I, DAG.getNode(ISD::FCOS, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
   case Intrinsic::log:
-    visitLog(I);
+    setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::log2:
-    visitLog2(I);
+    setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::log10:
-    visitLog10(I);
+    setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::exp:
-    visitExp(I);
+    setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::exp2:
-    visitExp2(I);
+    setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
     return 0;
   case Intrinsic::pow:
-    visitPow(I);
+    setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)),
+                           getValue(I.getArgOperand(1)), DAG, TLI));
     return 0;
+  case Intrinsic::sqrt:
   case Intrinsic::fabs:
-    setValue(&I, DAG.getNode(ISD::FABS, dl,
-                             getValue(I.getArgOperand(0)).getValueType(),
-                             getValue(I.getArgOperand(0))));
-    return 0;
+  case Intrinsic::sin:
+  case Intrinsic::cos:
   case Intrinsic::floor:
-    setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint: {
+    unsigned Opcode;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
+    case Intrinsic::fabs:      Opcode = ISD::FABS;       break;
+    case Intrinsic::sin:       Opcode = ISD::FSIN;       break;
+    case Intrinsic::cos:       Opcode = ISD::FCOS;       break;
+    case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
+    case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
+    case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
+    case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
+    case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+    }
+
+    setValue(&I, DAG.getNode(Opcode, dl,
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
     return 0;
+  }
   case Intrinsic::fma:
     setValue(&I, DAG.getNode(ISD::FMA, dl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -5006,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::fmuladd: {
     EVT VT = TLI.getValueType(I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI.isOperationLegal(ISD::FMA, VT) &&
         TLI.isFMAFasterThanMulAndAdd(VT)){
       setValue(&I, DAG.getNode(ISD::FMA, dl,
                                getValue(I.getArgOperand(0)).getValueType(),
@@ -5103,7 +5010,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
 
     // Store the stack protector onto the stack.
-    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+    Res = DAG.getStore(getRoot(), dl, Src, FIN,
                        MachinePointerInfo::getFixedStack(FI),
                        true, false, 0);
     setValue(&I, Res);
@@ -5191,7 +5098,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                  /*isTailCall=*/false,
                  /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                  DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
-                 Args, DAG, getCurDebugLoc());
+                 Args, DAG, dl);
     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
     DAG.setRoot(Result.second);
     return 0;
@@ -5217,7 +5124,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op2 = getValue(I.getArgOperand(1));
 
     SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-    setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+    setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2));
     return 0;
   }
   case Intrinsic::prefetch: {
@@ -5267,6 +5174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
       DAG.setRoot(Res);
     }
+    return 0;
   }
   case Intrinsic::invariant_start:
     // Discard region information.
@@ -5296,8 +5204,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
-                Outs, TLI);
+  GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI);
 
   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
                                            DAG.getMachineFunction(),
@@ -5342,12 +5249,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     Entry.Node = ArgNode; Entry.Ty = V->getType();
 
     unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt  = CS.paramHasAttr(attrInd, Attributes::SExt);
-    Entry.isZExt  = CS.paramHasAttr(attrInd, Attributes::ZExt);
-    Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg);
-    Entry.isSRet  = CS.paramHasAttr(attrInd, Attributes::StructRet);
-    Entry.isNest  = CS.paramHasAttr(attrInd, Attributes::Nest);
-    Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal);
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
     Entry.Alignment = CS.getParamAlignment(attrInd);
     Args.push_back(Entry);
   }
@@ -5376,13 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // Check if target-independent constraints permit a tail call here.
   // Target-dependent constraints are checked within TLI.LowerCallTo.
-  if (isTailCall &&
-      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
-    isTailCall = false;
-
-  // If there's a possibility that fast-isel has already selected some amount
-  // of the current basic block, don't emit a tail call.
-  if (isTailCall && TM.Options.EnableFastISel)
+  if (isTailCall && !isInTailCallPosition(CS, TLI))
     isTailCall = false;
 
   TargetLowering::
@@ -5856,7 +5757,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
       // Try to convert to the first EVT that the reg class contains.  If the
       // types are identical size, use a bitcast to convert (e.g. two differing
       // vector types).
-      EVT RegVT = *PhysReg.second->vt_begin();
+      MVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
@@ -5866,8 +5767,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
         // bitcast to the corresponding integer type.  This turns an f64 value
         // into i64, which can be passed with two i32 values on a 32-bit
         // machine.
-        RegVT = EVT::getIntegerVT(Context,
-                                  OpInfo.ConstraintVT.getSizeInBits());
+        RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
@@ -5877,7 +5777,7 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
   }
 
-  EVT RegVT;
+  MVT RegVT;
   EVT ValueVT = OpInfo.ConstraintVT;
 
   // If this is a constraint for a specific physical register, like {r17},
@@ -5951,7 +5851,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
-    EVT OpVT = MVT::Other;
+    MVT OpVT = MVT::Other;
 
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
@@ -5966,10 +5866,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpVT = TLI.getValueType(STy->getElementType(ResNo));
+        OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpVT = TLI.getValueType(CS.getType());
+        OpVT = TLI.getSimpleValueType(CS.getType());
       }
       ++ResNo;
       break;
@@ -5990,7 +5890,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD).
+        getSimpleVT();
     }
 
     OpInfo.ConstraintVT = OpVT;
@@ -6052,6 +5953,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // Compute the constraint code and ConstraintType to use.
     TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
 
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        OpInfo.Type == InlineAsm::isClobber)
+      continue;
+
     // If this is a memory input, and if the operand is not indirect, do what we
     // need to to provide an address for the memory input.
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
@@ -6155,6 +6060,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         ExtraInfo |= InlineAsm::Extra_MayLoad;
       else if (OpInfo.Type == InlineAsm::isOutput)
         ExtraInfo |= InlineAsm::Extra_MayStore;
+      else if (OpInfo.Type == InlineAsm::isClobber)
+        ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
     }
   }
 
@@ -6253,11 +6160,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
             Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
                           " don't know how to handle tied "
                           "indirect register inputs");
+            report_fatal_error("Cannot handle indirect register inputs!");
           }
 
           RegsForValue MatchedRegs;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
-          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
           MatchedRegs.RegVTs.push_back(RegVT);
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
@@ -6524,7 +6432,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
-      EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
       unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -6559,11 +6467,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   ComputeValueVTs(*this, CLI.RetTy, RetTys);
   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
     EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
     for (unsigned i = 0; i != NumRegs; ++i) {
       ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT.getSimpleVT();
+      MyFlags.VT = RegisterVT;
       MyFlags.Used = CLI.IsReturnValueUsed;
       if (CLI.RetSExt)
         MyFlags.Flags.setSExt();
@@ -6613,7 +6521,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   unsigned CurReg = 0;
   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
     EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
 
     ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
@@ -6681,19 +6589,12 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   return true;
 }
 
-void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
-  // If this is the entry block, emit arguments.
-  const Function &F = *LLVMBB->getParent();
+void SelectionDAGISel::LowerArguments(const Function &F) {
   SelectionDAG &DAG = SDB->DAG;
   DebugLoc dl = SDB->getCurDebugLoc();
   const DataLayout *TD = TLI.getDataLayout();
   SmallVector<ISD::InputArg, 16> Ins;
 
-  // Check whether the function can return without sret-demotion.
-  SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
-                Outs, TLI);
-
   if (!FuncInfo->CanLowerReturn) {
     // Put in an sret pointer parameter before all the other parameters.
     SmallVector<EVT, 1> ValueVTs;
@@ -6703,7 +6604,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     // or one register.
     ISD::ArgFlagsTy Flags;
     Flags.setSRet();
-    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+    MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
     ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
     Ins.push_back(RetArg);
   }
@@ -6723,15 +6624,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       unsigned OriginalAlignment =
         TD->getABITypeAlignment(ArgTy);
 
-      if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
         Flags.setZExt();
-      if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
         Flags.setSExt();
-      if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
         Flags.setInReg();
-      if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
         Flags.setSRet();
-      if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) {
+      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
         Flags.setByVal();
         PointerType *Ty = cast<PointerType>(I->getType());
         Type *ElementTy = Ty->getElementType();
@@ -6745,11 +6646,11 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
           FrameAlign = TLI.getByValTypeAlignment(ElementTy);
         Flags.setByValAlign(FrameAlign);
       }
-      if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
         Flags.setNest();
       Flags.setOrigAlign(OriginalAlignment);
 
-      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
@@ -6795,8 +6696,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
     // from the sret argument into it.
     SmallVector<EVT, 1> ValueVTs;
     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
-    EVT VT = ValueVTs[0];
-    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    MVT VT = ValueVTs[0].getSimpleVT();
+    MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
     ISD::NodeType AssertOp = ISD::DELETED_NODE;
     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
                                         RegVT, VT, NULL, AssertOp);
@@ -6828,14 +6729,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     for (unsigned Val = 0; Val != NumValues; ++Val) {
       EVT VT = ValueVTs[Val];
-      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
 
       if (!I->use_empty()) {
         ISD::NodeType AssertOp = ISD::DELETED_NODE;
-        if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
+        if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
           AssertOp = ISD::AssertSext;
-        else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
+        else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9e46d9664f96..9188945bd906 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,12 +14,12 @@
 #ifndef SELECTIONDAGBUILDER_H
 #define SELECTIONDAGBUILDER_H
 
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <vector>
@@ -262,7 +262,7 @@ private:
 
   struct BitTestBlock {
     BitTestBlock(APInt F, APInt R, const Value* SV,
-                 unsigned Rg, EVT RgVT, bool E,
+                 unsigned Rg, MVT RgVT, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
       First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
@@ -271,7 +271,7 @@ private:
     APInt Range;
     const Value *SValue;
     unsigned Reg;
-    EVT RegVT;
+    MVT RegVT;
     bool Emitted;
     MachineBasicBlock *Parent;
     MachineBasicBlock *Default;
@@ -533,13 +533,6 @@ private:
   const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
   void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
 
-  void visitPow(const CallInst &I);
-  void visitExp2(const CallInst &I);
-  void visitExp(const CallInst &I);
-  void visitLog(const CallInst &I);
-  void visitLog2(const CallInst &I);
-  void visitLog10(const CallInst &I);
-
   void visitVAStart(const CallInst &I);
   void visitVAArg(const VAArgInst &I);
   void visitVAEnd(const CallInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 6f3ce7a44bc4..3b5823bfb277 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -11,23 +11,23 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 std::string SDNode::getOperationName(const SelectionDAG *G) const {
@@ -140,6 +140,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::FSIN:                       return "fsin";
   case ISD::FCOS:                       return "fcos";
+  case ISD::FSINCOS:                    return "fsincos";
   case ISD::FTRUNC:                     return "ftrunc";
   case ISD::FFLOOR:                     return "ffloor";
   case ISD::FCEIL:                      return "fceil";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c314fa5b5118..eeea9e4cfcff 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -12,23 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "ScheduleDAGSDNodes.h"
 #include "SelectionDAGBuilder.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,22 +32,29 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -142,7 +144,12 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
                    "instruction selector"));
 static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
-          cl::desc("Enable abort calls when \"fast\" instruction fails"));
+          cl::desc("Enable abort calls when \"fast\" instruction selection "
+                   "fails to lower an instruction"));
+static cl::opt<bool>
+EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
+          cl::desc("Enable abort calls when \"fast\" instruction selection "
+                   "fails to lower a formal argument"));
 
 static cl::opt<bool>
 UseMBPI("use-mbpi",
@@ -216,8 +223,9 @@ namespace llvm {
   ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
                                              CodeGenOpt::Level OptLevel) {
     const TargetLowering &TLI = IS->getTargetLowering();
+    const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
 
-    if (OptLevel == CodeGenOpt::None ||
+    if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
         TLI.getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::RegPressure)
@@ -348,13 +356,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   LibInfo = &getAnalysis<TargetLibraryInfo>();
+  TTI = getAnalysisIfAvailable<TargetTransformInfo>();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
 
+  TargetSubtargetInfo &ST =
+    const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
+  ST.resetSubtargetFeatures(MF);
+  TM.resetTargetOptions(MF);
+
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
 
-  CurDAG->init(*MF);
+  CurDAG->init(*MF, TTI);
   FuncInfo->set(Fn, *MF);
 
   if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -364,6 +378,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   SDB->init(GFI, *AA, LibInfo);
 
+  MF->setHasMSInlineAsm(false);
   SelectAllBasicBlocks(Fn);
 
   // If the first basic block in the function has live ins that need to be
@@ -434,24 +449,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   // Determine if there are any calls in this machine function.
   MachineFrameInfo *MFI = MF->getFrameInfo();
-  if (!MFI->hasCalls()) {
-    for (MachineFunction::const_iterator
-           I = MF->begin(), E = MF->end(); I != E; ++I) {
-      const MachineBasicBlock *MBB = I;
-      for (MachineBasicBlock::const_iterator
-             II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
-        const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
-
-        if ((MCID.isCall() && !MCID.isReturn()) ||
-            II->isStackAligningInlineAsm()) {
-          MFI->setHasCalls(true);
-          goto done;
-        }
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+       ++I) {
+
+    if (MFI->hasCalls() && MF->hasMSInlineAsm())
+      break;
+
+    const MachineBasicBlock *MBB = I;
+    for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end();
+         II != IE; ++II) {
+      const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+      if ((MCID.isCall() && !MCID.isReturn()) ||
+          II->isStackAligningInlineAsm()) {
+        MFI->setHasCalls(true);
+      }
+      if (II->isMSInlineAsm()) {
+        MF->setHasMSInlineAsm(true);
       }
     }
   }
 
-  done:
   // Determine if there is a call to setjmp in the machine function.
   MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
 
@@ -768,8 +785,12 @@ void SelectionDAGISel::DoInstructionSelection() {
       if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
         continue;
       // Replace node.
-      if (ResNode)
+      if (ResNode) {
+        // Propagate ordering
+        CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node));
+
         ReplaceUses(Node, ResNode);
+      }
 
       // If after the replacement this node is not used any more,
       // remove this dead node.
@@ -1004,34 +1025,28 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
       if (AllPredsVisited) {
         for (BasicBlock::const_iterator I = LLVMBB->begin();
-             isa<PHINode>(I); ++I)
-          FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+          FuncInfo->ComputePHILiveOutRegInfo(PN);
       } else {
         for (BasicBlock::const_iterator I = LLVMBB->begin();
-             isa<PHINode>(I); ++I)
-          FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+          FuncInfo->InvalidatePHILiveOutRegInfo(PN);
       }
 
       FuncInfo->VisitedBBs.insert(LLVMBB);
     }
 
-    FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
-    FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
-
     BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
     BasicBlock::const_iterator const End = LLVMBB->end();
     BasicBlock::const_iterator BI = End;
 
+    FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
 
     // Setup an EH landing-pad block.
     if (FuncInfo->MBB->isLandingPad())
       PrepareEHLandingPad();
 
-    // Lower any arguments needed in this block if this is the entry block.
-    if (LLVMBB == &Fn.getEntryBlock())
-      LowerArguments(LLVMBB);
-
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS) {
       FastIS->startNewBlock();
@@ -1039,9 +1054,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
-        CurDAG->setRoot(SDB->getControlRoot());
-        SDB->clear();
-        CodeGenAndEmitDAG();
+        // Lower any arguments needed in this block if this is the entry block.
+        if (!FastIS->LowerArguments()) {
+          // Fast isel failed to lower these arguments
+          if (EnableFastISelAbortArgs)
+            llvm_unreachable("FastISel didn't lower all arguments");
+
+          // Use SelectionDAG argument lowering
+          LowerArguments(Fn);
+          CurDAG->setRoot(SDB->getControlRoot());
+          SDB->clear();
+          CodeGenAndEmitDAG();
+        }
 
         // If we inserted any instructions at the beginning, make a note of
         // where they are, so we can be sure to emit subsequent instructions
@@ -1111,19 +1135,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
 
           bool HadTailCall = false;
+          MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
           SelectBasicBlock(Inst, BI, HadTailCall);
 
-          // Recompute NumFastIselRemaining as Selection DAG instruction
-          // selection may have handled the call, input args, etc.
-          unsigned RemainingNow = std::distance(Begin, BI);
-          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
-
           // If the call was emitted as a tail call, we're done with the block.
+          // We also need to delete any previously emitted instructions.
           if (HadTailCall) {
+            FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
             --BI;
             break;
           }
 
+          // Recompute NumFastIselRemaining as Selection DAG instruction
+          // selection may have handled the call, input args, etc.
+          unsigned RemainingNow = std::distance(Begin, BI);
+          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
           NumFastIselRemaining = RemainingNow;
           continue;
         }
@@ -1150,6 +1176,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       }
 
       FastIS->recomputeInsertPt();
+    } else {
+      // Lower any arguments needed in this block if this is the entry block.
+      if (LLVMBB == &Fn.getEntryBlock())
+        LowerArguments(Fn);
     }
 
     if (Begin != BI)
@@ -1189,14 +1219,12 @@ SelectionDAGISel::FinishBasicBlock() {
       SDB->JTCases.empty() &&
       SDB->BitTestCases.empty()) {
     for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
         continue;
-      PHI->addOperand(
-        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
-      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
     }
     return;
   }
@@ -1248,33 +1276,23 @@ SelectionDAGISel::FinishBasicBlock() {
     // Update PHI Nodes
     for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
          pi != pe; ++pi) {
-      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
-      if (PHIBB == SDB->BitTestCases[i].Default) {
-        PHI->addOperand(MachineOperand::
-                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                  false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
-        PHI->addOperand(MachineOperand::
-                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                  false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
-                                                  back().ThisBB));
-      }
+      if (PHIBB == SDB->BitTestCases[i].Default)
+        PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+           .addMBB(SDB->BitTestCases[i].Parent)
+           .addReg(FuncInfo->PHINodesToUpdate[pi].second)
+           .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB);
       // One of "cases" BB.
       for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
            j != ej; ++j) {
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
-        if (cBB->isSuccessor(PHIBB)) {
-          PHI->addOperand(MachineOperand::
-                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                    false));
-          PHI->addOperand(MachineOperand::CreateMBB(cBB));
-        }
+        if (cBB->isSuccessor(PHIBB))
+          PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
       }
     }
   }
@@ -1309,25 +1327,17 @@ SelectionDAGISel::FinishBasicBlock() {
     // Update PHI Nodes
     for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
          pi != pe; ++pi) {
-      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
-      if (PHIBB == SDB->JTCases[i].second.Default) {
-        PHI->addOperand
-          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                     false));
-        PHI->addOperand
-          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
-      }
+      if (PHIBB == SDB->JTCases[i].second.Default)
+        PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+           .addMBB(SDB->JTCases[i].first.HeaderBB);
       // JT BB. Just iterate over successors here
-      if (FuncInfo->MBB->isSuccessor(PHIBB)) {
-        PHI->addOperand
-          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
-                                     false));
-        PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
-      }
+      if (FuncInfo->MBB->isSuccessor(PHIBB))
+        PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
     }
   }
   SDB->JTCases.clear();
@@ -1335,14 +1345,11 @@ SelectionDAGISel::FinishBasicBlock() {
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
   for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+    MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
     assert(PHI->isPHI() &&
            "This is not a machine PHI node that we are updating!");
-    if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(
-        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
-      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
-    }
+    if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
+      PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
   }
 
   // If we generated any switch lowering information, build and codegen any
@@ -1378,18 +1385,16 @@ SelectionDAGISel::FinishBasicBlock() {
       // FuncInfo->MBB may have been removed from the CFG if a branch was
       // constant folded.
       if (ThisBB->isSuccessor(FuncInfo->MBB)) {
-        for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
-             Phi != FuncInfo->MBB->end() && Phi->isPHI();
-             ++Phi) {
+        for (MachineBasicBlock::iterator
+             MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+             MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+          MachineInstrBuilder PHI(*MF, MBBI);
           // This value for this PHI node is recorded in PHINodesToUpdate.
           for (unsigned pn = 0; ; ++pn) {
             assert(pn != FuncInfo->PHINodesToUpdate.size() &&
                    "Didn't find PHI entry!");
-            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
-              Phi->addOperand(MachineOperand::
-                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
-                                        false));
-              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+            if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+              PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
               break;
             }
           }
@@ -1669,9 +1674,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
   std::vector<SDValue> Ops(N->op_begin(), N->op_end());
   SelectInlineAsmMemoryOperands(Ops);
 
-  std::vector<EVT> VTs;
-  VTs.push_back(MVT::Other);
-  VTs.push_back(MVT::Glue);
+  EVT VTs[] = { MVT::Other, MVT::Glue };
   SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
   New->setNodeId(-1);
@@ -2605,11 +2608,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       SDValue Imm = RecordedNodes[RecNo].first;
 
       if (Imm->getOpcode() == ISD::Constant) {
-        int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
-        Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+        const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+        Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true);
       } else if (Imm->getOpcode() == ISD::ConstantFP) {
         const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
-        Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+        Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true);
       }
 
       RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 39216356522f..b752b482e3a1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -11,21 +11,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 49f55e2fc608..f5fc66c4d3da 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,1037 +12,191 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cctype>
 using namespace llvm;
 
-/// InitLibcallNames - Set default libcall names.
-///
-static void InitLibcallNames(const char **Names) {
-  Names[RTLIB::SHL_I16] = "__ashlhi3";
-  Names[RTLIB::SHL_I32] = "__ashlsi3";
-  Names[RTLIB::SHL_I64] = "__ashldi3";
-  Names[RTLIB::SHL_I128] = "__ashlti3";
-  Names[RTLIB::SRL_I16] = "__lshrhi3";
-  Names[RTLIB::SRL_I32] = "__lshrsi3";
-  Names[RTLIB::SRL_I64] = "__lshrdi3";
-  Names[RTLIB::SRL_I128] = "__lshrti3";
-  Names[RTLIB::SRA_I16] = "__ashrhi3";
-  Names[RTLIB::SRA_I32] = "__ashrsi3";
-  Names[RTLIB::SRA_I64] = "__ashrdi3";
-  Names[RTLIB::SRA_I128] = "__ashrti3";
-  Names[RTLIB::MUL_I8] = "__mulqi3";
-  Names[RTLIB::MUL_I16] = "__mulhi3";
-  Names[RTLIB::MUL_I32] = "__mulsi3";
-  Names[RTLIB::MUL_I64] = "__muldi3";
-  Names[RTLIB::MUL_I128] = "__multi3";
-  Names[RTLIB::MULO_I32] = "__mulosi4";
-  Names[RTLIB::MULO_I64] = "__mulodi4";
-  Names[RTLIB::MULO_I128] = "__muloti4";
-  Names[RTLIB::SDIV_I8] = "__divqi3";
-  Names[RTLIB::SDIV_I16] = "__divhi3";
-  Names[RTLIB::SDIV_I32] = "__divsi3";
-  Names[RTLIB::SDIV_I64] = "__divdi3";
-  Names[RTLIB::SDIV_I128] = "__divti3";
-  Names[RTLIB::UDIV_I8] = "__udivqi3";
-  Names[RTLIB::UDIV_I16] = "__udivhi3";
-  Names[RTLIB::UDIV_I32] = "__udivsi3";
-  Names[RTLIB::UDIV_I64] = "__udivdi3";
-  Names[RTLIB::UDIV_I128] = "__udivti3";
-  Names[RTLIB::SREM_I8] = "__modqi3";
-  Names[RTLIB::SREM_I16] = "__modhi3";
-  Names[RTLIB::SREM_I32] = "__modsi3";
-  Names[RTLIB::SREM_I64] = "__moddi3";
-  Names[RTLIB::SREM_I128] = "__modti3";
-  Names[RTLIB::UREM_I8] = "__umodqi3";
-  Names[RTLIB::UREM_I16] = "__umodhi3";
-  Names[RTLIB::UREM_I32] = "__umodsi3";
-  Names[RTLIB::UREM_I64] = "__umoddi3";
-  Names[RTLIB::UREM_I128] = "__umodti3";
-
-  // These are generally not available.
-  Names[RTLIB::SDIVREM_I8] = 0;
-  Names[RTLIB::SDIVREM_I16] = 0;
-  Names[RTLIB::SDIVREM_I32] = 0;
-  Names[RTLIB::SDIVREM_I64] = 0;
-  Names[RTLIB::SDIVREM_I128] = 0;
-  Names[RTLIB::UDIVREM_I8] = 0;
-  Names[RTLIB::UDIVREM_I16] = 0;
-  Names[RTLIB::UDIVREM_I32] = 0;
-  Names[RTLIB::UDIVREM_I64] = 0;
-  Names[RTLIB::UDIVREM_I128] = 0;
-
-  Names[RTLIB::NEG_I32] = "__negsi2";
-  Names[RTLIB::NEG_I64] = "__negdi2";
-  Names[RTLIB::ADD_F32] = "__addsf3";
-  Names[RTLIB::ADD_F64] = "__adddf3";
-  Names[RTLIB::ADD_F80] = "__addxf3";
-  Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
-  Names[RTLIB::SUB_F32] = "__subsf3";
-  Names[RTLIB::SUB_F64] = "__subdf3";
-  Names[RTLIB::SUB_F80] = "__subxf3";
-  Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
-  Names[RTLIB::MUL_F32] = "__mulsf3";
-  Names[RTLIB::MUL_F64] = "__muldf3";
-  Names[RTLIB::MUL_F80] = "__mulxf3";
-  Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
-  Names[RTLIB::DIV_F32] = "__divsf3";
-  Names[RTLIB::DIV_F64] = "__divdf3";
-  Names[RTLIB::DIV_F80] = "__divxf3";
-  Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
-  Names[RTLIB::REM_F32] = "fmodf";
-  Names[RTLIB::REM_F64] = "fmod";
-  Names[RTLIB::REM_F80] = "fmodl";
-  Names[RTLIB::REM_PPCF128] = "fmodl";
-  Names[RTLIB::FMA_F32] = "fmaf";
-  Names[RTLIB::FMA_F64] = "fma";
-  Names[RTLIB::FMA_F80] = "fmal";
-  Names[RTLIB::FMA_PPCF128] = "fmal";
-  Names[RTLIB::POWI_F32] = "__powisf2";
-  Names[RTLIB::POWI_F64] = "__powidf2";
-  Names[RTLIB::POWI_F80] = "__powixf2";
-  Names[RTLIB::POWI_PPCF128] = "__powitf2";
-  Names[RTLIB::SQRT_F32] = "sqrtf";
-  Names[RTLIB::SQRT_F64] = "sqrt";
-  Names[RTLIB::SQRT_F80] = "sqrtl";
-  Names[RTLIB::SQRT_PPCF128] = "sqrtl";
-  Names[RTLIB::LOG_F32] = "logf";
-  Names[RTLIB::LOG_F64] = "log";
-  Names[RTLIB::LOG_F80] = "logl";
-  Names[RTLIB::LOG_PPCF128] = "logl";
-  Names[RTLIB::LOG2_F32] = "log2f";
-  Names[RTLIB::LOG2_F64] = "log2";
-  Names[RTLIB::LOG2_F80] = "log2l";
-  Names[RTLIB::LOG2_PPCF128] = "log2l";
-  Names[RTLIB::LOG10_F32] = "log10f";
-  Names[RTLIB::LOG10_F64] = "log10";
-  Names[RTLIB::LOG10_F80] = "log10l";
-  Names[RTLIB::LOG10_PPCF128] = "log10l";
-  Names[RTLIB::EXP_F32] = "expf";
-  Names[RTLIB::EXP_F64] = "exp";
-  Names[RTLIB::EXP_F80] = "expl";
-  Names[RTLIB::EXP_PPCF128] = "expl";
-  Names[RTLIB::EXP2_F32] = "exp2f";
-  Names[RTLIB::EXP2_F64] = "exp2";
-  Names[RTLIB::EXP2_F80] = "exp2l";
-  Names[RTLIB::EXP2_PPCF128] = "exp2l";
-  Names[RTLIB::SIN_F32] = "sinf";
-  Names[RTLIB::SIN_F64] = "sin";
-  Names[RTLIB::SIN_F80] = "sinl";
-  Names[RTLIB::SIN_PPCF128] = "sinl";
-  Names[RTLIB::COS_F32] = "cosf";
-  Names[RTLIB::COS_F64] = "cos";
-  Names[RTLIB::COS_F80] = "cosl";
-  Names[RTLIB::COS_PPCF128] = "cosl";
-  Names[RTLIB::POW_F32] = "powf";
-  Names[RTLIB::POW_F64] = "pow";
-  Names[RTLIB::POW_F80] = "powl";
-  Names[RTLIB::POW_PPCF128] = "powl";
-  Names[RTLIB::CEIL_F32] = "ceilf";
-  Names[RTLIB::CEIL_F64] = "ceil";
-  Names[RTLIB::CEIL_F80] = "ceill";
-  Names[RTLIB::CEIL_PPCF128] = "ceill";
-  Names[RTLIB::TRUNC_F32] = "truncf";
-  Names[RTLIB::TRUNC_F64] = "trunc";
-  Names[RTLIB::TRUNC_F80] = "truncl";
-  Names[RTLIB::TRUNC_PPCF128] = "truncl";
-  Names[RTLIB::RINT_F32] = "rintf";
-  Names[RTLIB::RINT_F64] = "rint";
-  Names[RTLIB::RINT_F80] = "rintl";
-  Names[RTLIB::RINT_PPCF128] = "rintl";
-  Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
-  Names[RTLIB::NEARBYINT_F64] = "nearbyint";
-  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
-  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
-  Names[RTLIB::FLOOR_F32] = "floorf";
-  Names[RTLIB::FLOOR_F64] = "floor";
-  Names[RTLIB::FLOOR_F80] = "floorl";
-  Names[RTLIB::FLOOR_PPCF128] = "floorl";
-  Names[RTLIB::COPYSIGN_F32] = "copysignf";
-  Names[RTLIB::COPYSIGN_F64] = "copysign";
-  Names[RTLIB::COPYSIGN_F80] = "copysignl";
-  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
-  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
-  Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
-  Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
-  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
-  Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
-  Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
-  Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
-  Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
-  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
-  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
-  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
-  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
-  Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
-  Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
-  Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
-  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
-  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
-  Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
-  Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
-  Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
-  Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
-  Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
-  Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
-  Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
-  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
-  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
-  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
-  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
-  Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
-  Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
-  Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
-  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
-  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
-  Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
-  Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
-  Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
-  Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
-  Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
-  Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
-  Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
-  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
-  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
-  Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
-  Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
-  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
-  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
-  Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
-  Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
-  Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
-  Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
-  Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
-  Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
-  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
-  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
-  Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
-  Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
-  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
-  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
-  Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
-  Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
-  Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
-  Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
-  Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
-  Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
-  Names[RTLIB::OEQ_F32] = "__eqsf2";
-  Names[RTLIB::OEQ_F64] = "__eqdf2";
-  Names[RTLIB::UNE_F32] = "__nesf2";
-  Names[RTLIB::UNE_F64] = "__nedf2";
-  Names[RTLIB::OGE_F32] = "__gesf2";
-  Names[RTLIB::OGE_F64] = "__gedf2";
-  Names[RTLIB::OLT_F32] = "__ltsf2";
-  Names[RTLIB::OLT_F64] = "__ltdf2";
-  Names[RTLIB::OLE_F32] = "__lesf2";
-  Names[RTLIB::OLE_F64] = "__ledf2";
-  Names[RTLIB::OGT_F32] = "__gtsf2";
-  Names[RTLIB::OGT_F64] = "__gtdf2";
-  Names[RTLIB::UO_F32] = "__unordsf2";
-  Names[RTLIB::UO_F64] = "__unorddf2";
-  Names[RTLIB::O_F32] = "__unordsf2";
-  Names[RTLIB::O_F64] = "__unorddf2";
-  Names[RTLIB::MEMCPY] = "memcpy";
-  Names[RTLIB::MEMMOVE] = "memmove";
-  Names[RTLIB::MEMSET] = "memset";
-  Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
-  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
-  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
-  Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
-  Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
-  Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
-  Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
-  Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
-  Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
-  Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
-  Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
-  Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
-  Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
-  Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
-  Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
-}
-
-/// InitLibcallCallingConvs - Set default libcall CallingConvs.
-///
-static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
-  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
-    CCs[i] = CallingConv::C;
-  }
-}
-
-/// getFPEXT - Return the FPEXT_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
-  if (OpVT == MVT::f32) {
-    if (RetVT == MVT::f64)
-      return FPEXT_F32_F64;
-  }
-
-  return UNKNOWN_LIBCALL;
-}
-
-/// getFPROUND - Return the FPROUND_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
-  if (RetVT == MVT::f32) {
-    if (OpVT == MVT::f64)
-      return FPROUND_F64_F32;
-    if (OpVT == MVT::f80)
-      return FPROUND_F80_F32;
-    if (OpVT == MVT::ppcf128)
-      return FPROUND_PPCF128_F32;
-  } else if (RetVT == MVT::f64) {
-    if (OpVT == MVT::f80)
-      return FPROUND_F80_F64;
-    if (OpVT == MVT::ppcf128)
-      return FPROUND_PPCF128_F64;
-  }
-
-  return UNKNOWN_LIBCALL;
-}
-
-/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
-  if (OpVT == MVT::f32) {
-    if (RetVT == MVT::i8)
-      return FPTOSINT_F32_I8;
-    if (RetVT == MVT::i16)
-      return FPTOSINT_F32_I16;
-    if (RetVT == MVT::i32)
-      return FPTOSINT_F32_I32;
-    if (RetVT == MVT::i64)
-      return FPTOSINT_F32_I64;
-    if (RetVT == MVT::i128)
-      return FPTOSINT_F32_I128;
-  } else if (OpVT == MVT::f64) {
-    if (RetVT == MVT::i8)
-      return FPTOSINT_F64_I8;
-    if (RetVT == MVT::i16)
-      return FPTOSINT_F64_I16;
-    if (RetVT == MVT::i32)
-      return FPTOSINT_F64_I32;
-    if (RetVT == MVT::i64)
-      return FPTOSINT_F64_I64;
-    if (RetVT == MVT::i128)
-      return FPTOSINT_F64_I128;
-  } else if (OpVT == MVT::f80) {
-    if (RetVT == MVT::i32)
-      return FPTOSINT_F80_I32;
-    if (RetVT == MVT::i64)
-      return FPTOSINT_F80_I64;
-    if (RetVT == MVT::i128)
-      return FPTOSINT_F80_I128;
-  } else if (OpVT == MVT::ppcf128) {
-    if (RetVT == MVT::i32)
-      return FPTOSINT_PPCF128_I32;
-    if (RetVT == MVT::i64)
-      return FPTOSINT_PPCF128_I64;
-    if (RetVT == MVT::i128)
-      return FPTOSINT_PPCF128_I128;
-  }
-  return UNKNOWN_LIBCALL;
-}
-
-/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
-  if (OpVT == MVT::f32) {
-    if (RetVT == MVT::i8)
-      return FPTOUINT_F32_I8;
-    if (RetVT == MVT::i16)
-      return FPTOUINT_F32_I16;
-    if (RetVT == MVT::i32)
-      return FPTOUINT_F32_I32;
-    if (RetVT == MVT::i64)
-      return FPTOUINT_F32_I64;
-    if (RetVT == MVT::i128)
-      return FPTOUINT_F32_I128;
-  } else if (OpVT == MVT::f64) {
-    if (RetVT == MVT::i8)
-      return FPTOUINT_F64_I8;
-    if (RetVT == MVT::i16)
-      return FPTOUINT_F64_I16;
-    if (RetVT == MVT::i32)
-      return FPTOUINT_F64_I32;
-    if (RetVT == MVT::i64)
-      return FPTOUINT_F64_I64;
-    if (RetVT == MVT::i128)
-      return FPTOUINT_F64_I128;
-  } else if (OpVT == MVT::f80) {
-    if (RetVT == MVT::i32)
-      return FPTOUINT_F80_I32;
-    if (RetVT == MVT::i64)
-      return FPTOUINT_F80_I64;
-    if (RetVT == MVT::i128)
-      return FPTOUINT_F80_I128;
-  } else if (OpVT == MVT::ppcf128) {
-    if (RetVT == MVT::i32)
-      return FPTOUINT_PPCF128_I32;
-    if (RetVT == MVT::i64)
-      return FPTOUINT_PPCF128_I64;
-    if (RetVT == MVT::i128)
-      return FPTOUINT_PPCF128_I128;
-  }
-  return UNKNOWN_LIBCALL;
-}
-
-/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
-  if (OpVT == MVT::i32) {
-    if (RetVT == MVT::f32)
-      return SINTTOFP_I32_F32;
-    else if (RetVT == MVT::f64)
-      return SINTTOFP_I32_F64;
-    else if (RetVT == MVT::f80)
-      return SINTTOFP_I32_F80;
-    else if (RetVT == MVT::ppcf128)
-      return SINTTOFP_I32_PPCF128;
-  } else if (OpVT == MVT::i64) {
-    if (RetVT == MVT::f32)
-      return SINTTOFP_I64_F32;
-    else if (RetVT == MVT::f64)
-      return SINTTOFP_I64_F64;
-    else if (RetVT == MVT::f80)
-      return SINTTOFP_I64_F80;
-    else if (RetVT == MVT::ppcf128)
-      return SINTTOFP_I64_PPCF128;
-  } else if (OpVT == MVT::i128) {
-    if (RetVT == MVT::f32)
-      return SINTTOFP_I128_F32;
-    else if (RetVT == MVT::f64)
-      return SINTTOFP_I128_F64;
-    else if (RetVT == MVT::f80)
-      return SINTTOFP_I128_F80;
-    else if (RetVT == MVT::ppcf128)
-      return SINTTOFP_I128_PPCF128;
-  }
-  return UNKNOWN_LIBCALL;
-}
-
-/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
-/// UNKNOWN_LIBCALL if there is none.
-RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
-  if (OpVT == MVT::i32) {
-    if (RetVT == MVT::f32)
-      return UINTTOFP_I32_F32;
-    else if (RetVT == MVT::f64)
-      return UINTTOFP_I32_F64;
-    else if (RetVT == MVT::f80)
-      return UINTTOFP_I32_F80;
-    else if (RetVT == MVT::ppcf128)
-      return UINTTOFP_I32_PPCF128;
-  } else if (OpVT == MVT::i64) {
-    if (RetVT == MVT::f32)
-      return UINTTOFP_I64_F32;
-    else if (RetVT == MVT::f64)
-      return UINTTOFP_I64_F64;
-    else if (RetVT == MVT::f80)
-      return UINTTOFP_I64_F80;
-    else if (RetVT == MVT::ppcf128)
-      return UINTTOFP_I64_PPCF128;
-  } else if (OpVT == MVT::i128) {
-    if (RetVT == MVT::f32)
-      return UINTTOFP_I128_F32;
-    else if (RetVT == MVT::f64)
-      return UINTTOFP_I128_F64;
-    else if (RetVT == MVT::f80)
-      return UINTTOFP_I128_F80;
-    else if (RetVT == MVT::ppcf128)
-      return UINTTOFP_I128_PPCF128;
-  }
-  return UNKNOWN_LIBCALL;
-}
-
-/// InitCmpLibcallCCs - Set default comparison libcall CC.
-///
-static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
-  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
-  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
-  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
-  CCs[RTLIB::UNE_F32] = ISD::SETNE;
-  CCs[RTLIB::UNE_F64] = ISD::SETNE;
-  CCs[RTLIB::OGE_F32] = ISD::SETGE;
-  CCs[RTLIB::OGE_F64] = ISD::SETGE;
-  CCs[RTLIB::OLT_F32] = ISD::SETLT;
-  CCs[RTLIB::OLT_F64] = ISD::SETLT;
-  CCs[RTLIB::OLE_F32] = ISD::SETLE;
-  CCs[RTLIB::OLE_F64] = ISD::SETLE;
-  CCs[RTLIB::OGT_F32] = ISD::SETGT;
-  CCs[RTLIB::OGT_F64] = ISD::SETGT;
-  CCs[RTLIB::UO_F32] = ISD::SETNE;
-  CCs[RTLIB::UO_F64] = ISD::SETNE;
-  CCs[RTLIB::O_F32] = ISD::SETEQ;
-  CCs[RTLIB::O_F64] = ISD::SETEQ;
-}
-
 /// NOTE: The constructor takes ownership of TLOF.
 TargetLowering::TargetLowering(const TargetMachine &tm,
                                const TargetLoweringObjectFile *tlof)
-  : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
-  // All operations default to being supported.
-  memset(OpActions, 0, sizeof(OpActions));
-  memset(LoadExtActions, 0, sizeof(LoadExtActions));
-  memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
-  memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
-  memset(CondCodeActions, 0, sizeof(CondCodeActions));
-
-  // Set default actions for various operations.
-  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
-    // Default all indexed load / store to expand.
-    for (unsigned IM = (unsigned)ISD::PRE_INC;
-         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
-      setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
-      setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
-    }
-
-    // These operations default to expand.
-    setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
-  }
-
-  // Most targets ignore the @llvm.prefetch intrinsic.
-  setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-
-  // ConstantFP nodes default to expand.  Targets can either change this to
-  // Legal, in which case all fp constants are legal, or use isFPImmLegal()
-  // to optimize expansions for certain constants.
-  setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
-  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
-  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
-  setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
-
-  // These library functions default to expand.
-  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
-  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
-
-  // Default ISD::TRAP to expand (which turns it into abort).
-  setOperationAction(ISD::TRAP, MVT::Other, Expand);
-
-  // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
-  // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
-  //
-  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
-  IsLittleEndian = TD->isLittleEndian();
-  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
-  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
-  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
-  maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
-  maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
-    = maxStoresPerMemmoveOptSize = 4;
-  benefitFromCodePlacementOpt = false;
-  UseUnderscoreSetJmp = false;
-  UseUnderscoreLongJmp = false;
-  SelectIsExpensive = false;
-  IntDivIsCheap = false;
-  Pow2DivIsCheap = false;
-  JumpIsExpensive = false;
-  predictableSelectIsExpensive = false;
-  StackPointerRegisterToSaveRestore = 0;
-  ExceptionPointerRegister = 0;
-  ExceptionSelectorRegister = 0;
-  BooleanContents = UndefinedBooleanContent;
-  BooleanVectorContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = Sched::ILP;
-  JumpBufSize = 0;
-  JumpBufAlignment = 0;
-  MinFunctionAlignment = 0;
-  PrefFunctionAlignment = 0;
-  PrefLoopAlignment = 0;
-  MinStackArgumentAlignment = 1;
-  ShouldFoldAtomicFences = false;
-  InsertFencesForAtomic = false;
-  SupportJumpTables = true;
-  MinimumJumpTableEntries = 4;
-
-  InitLibcallNames(LibcallRoutineNames);
-  InitCmpLibcallCCs(CmpLibcallCCs);
-  InitLibcallCallingConvs(LibcallCallingConvs);
-}
+  : TargetLoweringBase(tm, tlof) {}
 
-TargetLowering::~TargetLowering() {
-  delete &TLOF;
-}
-
-MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8*TD->getPointerSize(0));
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  return NULL;
 }
 
-/// canOpTrap - Returns true if the operation can trap for the value type.
-/// VT must be a legal type.
-bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
-  assert(isTypeLegal(VT));
-  switch (Op) {
-  default:
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                          SDValue &Chain) const {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  AttributeSet CallerAttrs = F->getAttributes();
+  if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
+      .removeAttribute(Attribute::NoAlias).hasAttributes())
     return false;
-  case ISD::FDIV:
-  case ISD::FREM:
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SREM:
-  case ISD::UREM:
-    return true;
-  }
-}
-
-
-static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
-                                          unsigned &NumIntermediates,
-                                          EVT &RegisterVT,
-                                          TargetLowering *TLI) {
-  // Figure out the right, legal destination reg to copy into.
-  unsigned NumElts = VT.getVectorNumElements();
-  MVT EltTy = VT.getVectorElementType();
-
-  unsigned NumVectorRegs = 1;
-
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
-  // could break down into LHS/RHS like LegalizeDAG does.
-  if (!isPowerOf2_32(NumElts)) {
-    NumVectorRegs = NumElts;
-    NumElts = 1;
-  }
-
-  // Divide the input until we get to a supported size.  This will always
-  // end with a scalar if the target doesn't support vectors.
-  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
-    NumElts >>= 1;
-    NumVectorRegs <<= 1;
-  }
-
-  NumIntermediates = NumVectorRegs;
 
-  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
-  if (!TLI->isTypeLegal(NewVT))
-    NewVT = EltTy;
-  IntermediateVT = NewVT;
-
-  unsigned NewVTSize = NewVT.getSizeInBits();
-
-  // Convert sizes such as i33 to i64.
-  if (!isPowerOf2_32(NewVTSize))
-    NewVTSize = NextPowerOf2(NewVTSize);
-
-  EVT DestVT = TLI->getRegisterType(NewVT);
-  RegisterVT = DestVT;
-  if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+    return false;
 
-  // Otherwise, promotion or legal types use the same number of registers as
-  // the vector decimated to the appropriate level.
-  return NumVectorRegs;
+  // Check if the only use is a function return node.
+  return isUsedByReturnOnly(Node, Chain);
 }
 
-/// isLegalRC - Return true if the value types that can be represented by the
-/// specified register class are all legal.
-bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
-  for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-       I != E; ++I) {
-    if (isTypeLegal(*I))
-      return true;
-  }
-  return false;
-}
 
-/// findRepresentativeClass - Return the largest legal super-reg register class
-/// of the register class for the specified type and its associated "cost".
-std::pair<const TargetRegisterClass*, uint8_t>
-TargetLowering::findRepresentativeClass(EVT VT) const {
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
-  if (!RC)
-    return std::make_pair(RC, 0);
-
-  // Compute the set of all super-register classes.
-  BitVector SuperRegRC(TRI->getNumRegClasses());
-  for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
-    SuperRegRC.setBitsInMask(RCI.getMask());
-
-  // Find the first legal register class with the largest spill size.
-  const TargetRegisterClass *BestRC = RC;
-  for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
-    const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
-    // We want the largest possible spill size.
-    if (SuperRC->getSize() <= BestRC->getSize())
-      continue;
-    if (!isLegalRC(SuperRC))
-      continue;
-    BestRC = SuperRC;
-  }
-  return std::make_pair(BestRC, 1);
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+SDValue TargetLowering::makeLibCall(SelectionDAG &DAG,
+                                    RTLIB::Libcall LC, EVT RetVT,
+                                    const SDValue *Ops, unsigned NumOps,
+                                    bool isSigned, DebugLoc dl) const {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
+
+  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  TargetLowering::
+  CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                    false, 0, getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
+  std::pair<SDValue,SDValue> CallInfo = LowerCallTo(CLI);
+
+  return CallInfo.first;
 }
 
-/// computeRegisterProperties - Once all of the register classes are added,
-/// this allows us to compute derived properties we expose.
-void TargetLowering::computeRegisterProperties() {
-  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
-         "Too many value types for ValueTypeActions to hold!");
 
-  // Everything defaults to needing one register.
-  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
-    NumRegistersForVT[i] = 1;
-    RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
-  }
-  // ...except isVoid, which doesn't need any registers.
-  NumRegistersForVT[MVT::isVoid] = 0;
-
-  // Find the largest integer register class.
-  unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
-  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
-    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
-
-  // Every integer value type larger than this largest register takes twice as
-  // many registers to represent as the previous ValueType.
-  for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
-    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
-    if (!ExpandedVT.isInteger())
+/// SoftenSetCCOperands - Soften the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+                                         SDValue &NewLHS, SDValue &NewRHS,
+                                         ISD::CondCode &CCCode,
+                                         DebugLoc dl) const {
+  assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+         && "Unsupported setcc type!");
+
+  // Expand into one or more soft-fp libcall(s).
+  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+  switch (CCCode) {
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+          (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+    break;
+  case ISD::SETNE:
+  case ISD::SETUNE:
+    LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+          (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128;
+    break;
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+          (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+    break;
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+          (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+    break;
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+          (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+    break;
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+          (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+    break;
+  case ISD::SETUO:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+          (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+    break;
+  case ISD::SETO:
+    LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
+          (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
+    break;
+  default:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+          (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+    switch (CCCode) {
+    case ISD::SETONE:
+      // SETONE = SETOLT | SETOGT
+      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+            (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+      // Fallthrough
+    case ISD::SETUGT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+            (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
       break;
-    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
-    RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
-    TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
-    ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
-  }
-
-  // Inspect all of the ValueType's smaller than the largest integer
-  // register to see which ones need promotion.
-  unsigned LegalIntReg = LargestIntReg;
-  for (unsigned IntReg = LargestIntReg - 1;
-       IntReg >= (unsigned)MVT::i1; --IntReg) {
-    EVT IVT = (MVT::SimpleValueType)IntReg;
-    if (isTypeLegal(IVT)) {
-      LegalIntReg = IntReg;
-    } else {
-      RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
-        (const MVT::SimpleValueType)LegalIntReg;
-      ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
-    }
-  }
-
-  // ppcf128 type is really two f64's.
-  if (!isTypeLegal(MVT::ppcf128)) {
-    NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
-    RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
-    TransformToType[MVT::ppcf128] = MVT::f64;
-    ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
-  }
-
-  // Decide how to handle f64. If the target does not have native f64 support,
-  // expand it to i64 and we will be generating soft float library calls.
-  if (!isTypeLegal(MVT::f64)) {
-    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
-    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
-    TransformToType[MVT::f64] = MVT::i64;
-    ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
-  }
-
-  // Decide how to handle f32. If the target does not have native support for
-  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
-  if (!isTypeLegal(MVT::f32)) {
-    if (isTypeLegal(MVT::f64)) {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
-      TransformToType[MVT::f32] = MVT::f64;
-      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
-    } else {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
-      TransformToType[MVT::f32] = MVT::i32;
-      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
-    }
-  }
-
-  // Loop over all of the vector value types to see which need transformations.
-  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
-       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
-    if (isTypeLegal(VT)) continue;
-
-    // Determine if there is a legal wider type.  If so, we should promote to
-    // that wider vector type.
-    EVT EltVT = VT.getVectorElementType();
-    unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1) {
-      bool IsLegalWiderType = false;
-      // First try to promote the elements of integer vectors. If no legal
-      // promotion was found, fallback to the widen-vector method.
-      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        EVT SVT = (MVT::SimpleValueType)nVT;
-        // Promote vectors of integers to vectors with the same number
-        // of elements, with a wider element type.
-        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
-            && SVT.getVectorNumElements() == NElts &&
-            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
-          TransformToType[i] = SVT;
-          RegisterTypeForVT[i] = SVT;
-          NumRegistersForVT[i] = 1;
-          ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
-          IsLegalWiderType = true;
-          break;
-        }
-      }
-
-      if (IsLegalWiderType) continue;
-
-      // Try to widen the vector.
-      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-        EVT SVT = (MVT::SimpleValueType)nVT;
-        if (SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts &&
-            isTypeLegal(SVT)) {
-          TransformToType[i] = SVT;
-          RegisterTypeForVT[i] = SVT;
-          NumRegistersForVT[i] = 1;
-          ValueTypeActions.setTypeAction(VT, TypeWidenVector);
-          IsLegalWiderType = true;
-          break;
-        }
-      }
-      if (IsLegalWiderType) continue;
-    }
-
-    MVT IntermediateVT;
-    EVT RegisterVT;
-    unsigned NumIntermediates;
-    NumRegistersForVT[i] =
-      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
-                                RegisterVT, this);
-    RegisterTypeForVT[i] = RegisterVT;
-
-    EVT NVT = VT.getPow2VectorType();
-    if (NVT == VT) {
-      // Type is already a power of 2.  The default action is to split.
-      TransformToType[i] = MVT::Other;
-      unsigned NumElts = VT.getVectorNumElements();
-      ValueTypeActions.setTypeAction(VT,
-            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
-    } else {
-      TransformToType[i] = NVT;
-      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+    case ISD::SETUGE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+            (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+      break;
+    case ISD::SETULT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+            (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+      break;
+    case ISD::SETULE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+            (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+      break;
+    case ISD::SETUEQ:
+      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+            (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+      break;
+    default: llvm_unreachable("Do not know how to soften this setcc!");
     }
   }
 
-  // Determine the 'representative' register class for each value type.
-  // An representative register class is the largest (meaning one which is
-  // not a sub-register class / subreg register class) legal register class for
-  // a group of value types. For example, on i386, i8, i16, and i32
-  // representative would be GR32; while on x86_64 it's GR64.
-  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
-    const TargetRegisterClass* RRC;
-    uint8_t Cost;
-    tie(RRC, Cost) =  findRepresentativeClass((MVT::SimpleValueType)i);
-    RepRegClassForVT[i] = RRC;
-    RepRegClassCostForVT[i] = Cost;
+  // Use the target specific return value for comparions lib calls.
+  EVT RetVT = getCmpLibcallReturnType();
+  SDValue Ops[2] = { NewLHS, NewRHS };
+  NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+  NewRHS = DAG.getConstant(0, RetVT);
+  CCCode = getCmpLibcallCC(LC1);
+  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+    SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT),
+                              NewLHS, NewRHS, DAG.getCondCode(CCCode));
+    NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+    NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS,
+                         NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+    NewRHS = SDValue();
   }
 }
 
-const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  return NULL;
-}
-
-EVT TargetLowering::getSetCCResultType(EVT VT) const {
-  assert(!VT.isVector() && "No default SetCC type for vectors!");
-  return getPointerTy(0).SimpleTy;
-}
-
-MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
-  return MVT::i32; // return the default value
-}
-
-/// getVectorTypeBreakdown - Vector types are broken down into some number of
-/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
-/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
-/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
-///
-/// This method returns the number of registers needed, and the VT for each
-/// register.  It also returns the VT and quantity of the intermediate values
-/// before they are promoted/expanded.
-///
-unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
-                                                EVT &IntermediateVT,
-                                                unsigned &NumIntermediates,
-                                                EVT &RegisterVT) const {
-  unsigned NumElts = VT.getVectorNumElements();
-
-  // If there is a wider vector type with the same element type as this one,
-  // or a promoted vector type that has the same number of elements which
-  // are wider, then we should convert to that legal vector type.
-  // This handles things like <2 x float> -> <4 x float> and
-  // <4 x i1> -> <4 x i32>.
-  LegalizeTypeAction TA = getTypeAction(Context, VT);
-  if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
-    RegisterVT = getTypeToTransformTo(Context, VT);
-    if (isTypeLegal(RegisterVT)) {
-      IntermediateVT = RegisterVT;
-      NumIntermediates = 1;
-      return 1;
-    }
-  }
-
-  // Figure out the right, legal destination reg to copy into.
-  EVT EltTy = VT.getVectorElementType();
-
-  unsigned NumVectorRegs = 1;
-
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
-  // could break down into LHS/RHS like LegalizeDAG does.
-  if (!isPowerOf2_32(NumElts)) {
-    NumVectorRegs = NumElts;
-    NumElts = 1;
-  }
-
-  // Divide the input until we get to a supported size.  This will always
-  // end with a scalar if the target doesn't support vectors.
-  while (NumElts > 1 && !isTypeLegal(
-                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
-    NumElts >>= 1;
-    NumVectorRegs <<= 1;
-  }
-
-  NumIntermediates = NumVectorRegs;
-
-  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
-  if (!isTypeLegal(NewVT))
-    NewVT = EltTy;
-  IntermediateVT = NewVT;
-
-  EVT DestVT = getRegisterType(Context, NewVT);
-  RegisterVT = DestVT;
-  unsigned NewVTSize = NewVT.getSizeInBits();
-
-  // Convert sizes such as i33 to i64.
-  if (!isPowerOf2_32(NewVTSize))
-    NewVTSize = NextPowerOf2(NewVTSize);
-
-  if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
-
-  // Otherwise, promotion or legal types use the same number of registers as
-  // the vector decimated to the appropriate level.
-  return NumVectorRegs;
-}
-
-/// Get the EVTs and ArgFlags collections that represent the legalized return
-/// type of the given function.  This does not require a DAG or a return value,
-/// and is suitable for use before any DAGs for the function are constructed.
-/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
-                         SmallVectorImpl<ISD::OutputArg> &Outs,
-                         const TargetLowering &TLI) {
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, ReturnType, ValueVTs);
-  unsigned NumValues = ValueVTs.size();
-  if (NumValues == 0) return;
-
-  for (unsigned j = 0, f = NumValues; j != f; ++j) {
-    EVT VT = ValueVTs[j];
-    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
-    if (attr.hasAttribute(Attributes::SExt))
-      ExtendKind = ISD::SIGN_EXTEND;
-    else if (attr.hasAttribute(Attributes::ZExt))
-      ExtendKind = ISD::ZERO_EXTEND;
-
-    // FIXME: C calling convention requires the return type to be promoted to
-    // at least 32-bit. But this is not necessary for non-C calling
-    // conventions. The frontend should mark functions whose return values
-    // require promoting with signext or zeroext attributes.
-    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
-      if (VT.bitsLT(MinVT))
-        VT = MinVT;
-    }
-
-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
-
-    // 'inreg' on function refers to return value
-    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (attr.hasAttribute(Attributes::InReg))
-      Flags.setInReg();
-
-    // Propagate extension type if any
-    if (attr.hasAttribute(Attributes::SExt))
-      Flags.setSExt();
-    else if (attr.hasAttribute(Attributes::ZExt))
-      Flags.setZExt();
-
-    for (unsigned i = 0; i < NumParts; ++i)
-      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
-  }
-}
-
-/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
-/// function arguments in the caller parameter area.  This is the actual
-/// alignment, not its logarithm.
-unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const {
-  return TD->getCallFrameTypeAlignment(Ty);
-}
-
 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
 /// current function.  The returned value is a member of the
 /// MachineJumpTableInfo::JTEntryKind enum.
@@ -1162,7 +316,8 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   // Search for the smallest integer type with free casts to and from
   // Op's type. For expedience, just check power-of-2 integer types.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+  unsigned SmallVTBits = DemandedSize;
   if (!isPowerOf2_32(SmallVTBits))
     SmallVTBits = NextPowerOf2(SmallVTBits);
   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
@@ -1175,7 +330,9 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
                                           Op.getNode()->getOperand(0)),
                               DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
                                           Op.getNode()->getOperand(1)));
-      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      bool NeedZext = DemandedSize > SmallVTBits;
+      SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
+                              dl, Op.getValueType(), X);
       return CombineTo(Op, Z);
     }
   }
@@ -2039,7 +1196,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
           for (unsigned offset=0; offset<origWidth/width; offset++) {
             if ((newMask & Mask) == Mask) {
-              if (!TD->isLittleEndian())
+              if (!getDataLayout()->isLittleEndian())
                 bestOffset = (origWidth/width - offset - 1) * (width/8);
               else
                 bestOffset = (uint64_t)offset * (width/8);
@@ -2111,7 +1268,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         EVT newVT = N0.getOperand(0).getValueType();
         if (DCI.isBeforeLegalizeOps() ||
             (isOperationLegal(ISD::SETCC, newVT) &&
-              getCondCodeAction(Cond, newVT)==Legal))
+             getCondCodeAction(Cond, newVT.getSimpleVT())==Legal))
           return DAG.getSetCC(dl, VT, N0.getOperand(0),
                               DAG.getConstant(C1.trunc(InSize), newVT),
                               Cond);
@@ -2207,9 +1364,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
           return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
                               Cond);
-        } else if (Op0.getOpcode() == ISD::AND &&
-                isa<ConstantSDNode>(Op0.getOperand(1)) &&
-                cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+        }
+        if (Op0.getOpcode() == ISD::AND &&
+            isa<ConstantSDNode>(Op0.getOperand(1)) &&
+            cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
           if (Op0.getValueType().bitsGT(VT))
             Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -2224,6 +1382,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               DAG.getConstant(0, Op0.getValueType()),
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
+        if (Op0.getOpcode() == ISD::AssertZext &&
+            cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+          return DAG.getSetCC(dl, VT, Op0,
+                              DAG.getConstant(0, Op0.getValueType()),
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
       }
     }
 
@@ -2276,7 +1439,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(MinVal, N0.getValueType()),
                           ISD::SETEQ);
     // If we have setugt X, Max-1, turn it into seteq X, Max
-    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
       return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(MaxVal, N0.getValueType()),
                           ISD::SETEQ);
@@ -2406,36 +1569,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // If the condition is not legal, see if we can find an equivalent one
     // which is legal.
-    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+    if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
       // If the comparison was an awkward floating-point == or != and one of
       // the comparison operands is infinity or negative infinity, convert the
       // condition to a less-awkward <= or >=.
       if (CFP->getValueAPF().isInfinity()) {
         if (CFP->getValueAPF().isNegative()) {
           if (Cond == ISD::SETOEQ &&
-              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
           if (Cond == ISD::SETUEQ &&
-              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
           if (Cond == ISD::SETUNE &&
-              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
           if (Cond == ISD::SETONE &&
-              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
         } else {
           if (Cond == ISD::SETOEQ &&
-              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
           if (Cond == ISD::SETUEQ &&
-              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+              isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
           if (Cond == ISD::SETUNE &&
-              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
           if (Cond == ISD::SETONE &&
-              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+              isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
         }
       }
@@ -2469,7 +1632,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
     if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
-          getCondCodeAction(NewCond, N0.getValueType()) == Legal))
+          getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
   }
 
@@ -2550,7 +1713,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (DAG.isCommutativeBinOp(N0.getOpcode()))
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
                                 DAG.getConstant(0, N0.getValueType()), Cond);
-          else if (N0.getNode()->hasOneUse()) {
+          if (N0.getNode()->hasOneUse()) {
             assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
             // (Z-X) == X  --> Z == X<<1
             SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
@@ -2566,14 +1729,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
         N1.getOpcode() == ISD::XOR) {
       // Simplify  X == (X+Z) -->  Z == 0
-      if (N1.getOperand(0) == N0) {
+      if (N1.getOperand(0) == N0)
         return DAG.getSetCC(dl, VT, N1.getOperand(1),
                         DAG.getConstant(0, N1.getValueType()), Cond);
-      } else if (N1.getOperand(1) == N0) {
-        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+      if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode()))
           return DAG.getSetCC(dl, VT, N1.getOperand(0),
                           DAG.getConstant(0, N1.getValueType()), Cond);
-        } else if (N1.getNode()->hasOneUse()) {
+        if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
@@ -2707,7 +1870,9 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
 
 TargetLowering::ConstraintType
 TargetLowering::getConstraintType(const std::string &Constraint) const {
-  if (Constraint.size() == 1) {
+  unsigned S = Constraint.size();
+
+  if (S == 1) {
     switch (Constraint[0]) {
     default: break;
     case 'r': return C_RegisterClass;
@@ -2736,9 +1901,11 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
     }
   }
 
-  if (Constraint.size() > 1 && Constraint[0] == '{' &&
-      Constraint[Constraint.size()-1] == '}')
+  if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+    if (S == 8 && !Constraint.compare(1, 6, "memory", 6))  // "{memory}"
+      return C_Memory;
     return C_Register;
+  }
   return C_Unknown;
 }
 
@@ -2830,8 +1997,11 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
   // Remove the braces from around the name.
   StringRef RegName(Constraint.data()+1, Constraint.size()-2);
 
+  std::pair<unsigned, const TargetRegisterClass*> R =
+    std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+
   // Figure out which register class contains this reg.
-  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
   for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
        E = RI->regclass_end(); RCI != E; ++RCI) {
     const TargetRegisterClass *RC = *RCI;
@@ -2843,12 +2013,22 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
 
     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
          I != E; ++I) {
-      if (RegName.equals_lower(RI->getName(*I)))
-        return std::make_pair(*I, RC);
+      if (RegName.equals_lower(RI->getName(*I))) {
+        std::pair<unsigned, const TargetRegisterClass*> S =
+          std::make_pair(*I, RC);
+
+        // If this register class has the requested value type, return it,
+        // otherwise keep searching and return the first class found
+        // if no other is found which explicitly has the requested type.
+        if (RC->hasType(VT))
+          return S;
+        else if (!R.second)
+          R = S;
+      }
     }
   }
 
-  return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+  return R;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2858,7 +2038,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
 /// a matching constraint like "4".
 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
   assert(!ConstraintCode.empty() && "No known constraint!");
-  return isdigit(ConstraintCode[0]);
+  return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
 }
 
 /// getMatchedOperand - If this is an input matching constraint, this method
@@ -2913,10 +2093,10 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       assert(!CS.getType()->isVoidTy() &&
              "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+        OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpInfo.ConstraintVT = getValueType(CS.getType());
+        OpInfo.ConstraintVT = getSimpleValueType(CS.getType());
       }
       ++ResNo;
       break;
@@ -2945,7 +2125,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       // If OpTy is not a single value, it may be a struct/union that we
       // can tile with integers.
       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
-        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy);
         switch (BitSize) {
         default: break;
         case 1:
@@ -2955,14 +2135,14 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
         case 64:
         case 128:
           OpInfo.ConstraintVT =
-              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+            MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
           break;
         }
       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
         OpInfo.ConstraintVT = MVT::getIntegerVT(
-            8*TD->getPointerSize(PT->getAddressSpace()));
+            8*getDataLayout()->getPointerSize(PT->getAddressSpace()));
       } else {
-        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+        OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
       }
     }
   }
@@ -3255,44 +2435,6 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
   }
 }
 
-//===----------------------------------------------------------------------===//
-//  Loop Strength Reduction hooks
-//===----------------------------------------------------------------------===//
-
-/// isLegalAddressingMode - Return true if the addressing mode represented
-/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                           Type *Ty) const {
-  // The default implementation of this implements a conservative RISCy, r+r and
-  // r+i addr mode.
-
-  // Allows a sign-extended 16-bit immediate field.
-  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
-    return false;
-
-  // No global is ever allowed as a base.
-  if (AM.BaseGV)
-    return false;
-
-  // Only support r+r,
-  switch (AM.Scale) {
-  case 0:  // "r+i" or just "i", depending on HasBaseReg.
-    break;
-  case 1:
-    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
-      return false;
-    // Otherwise we have r+r or r+i.
-    break;
-  case 2:
-    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
-      return false;
-    // Allow 2*r as r+r.
-    break;
-  }
-
-  return true;
-}
-
 /// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
 /// with the multiplicative inverse of the constant.
 SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
@@ -3325,7 +2467,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::
 BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-          std::vector<SDNode*>* Created) const {
+          std::vector<SDNode*> *Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
 
@@ -3385,7 +2527,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::
 BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-          std::vector<SDNode*>* Created) const {
+          std::vector<SDNode*> *Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 8a6b120f97e6..10f64c709c7a 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -26,12 +26,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "shadowstackgc"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
 
 using namespace llvm;
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 4fbe1b360577..9ab491808fe5 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -35,22 +35,21 @@
 #define DEBUG_TYPE "shrink-wrap"
 
 #include "PrologEpilogInserter.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <sstream>
 
 using namespace llvm;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 4b566fcba931..3903743878b4 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -13,24 +13,24 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sjljehprepare"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
   class SjLjEHPrepare : public FunctionPass {
-    const TargetLowering *TLI;
+    const TargetLoweringBase *TLI;
     Type *FunctionContextTy;
     Constant *RegisterFn;
     Constant *UnregisterFn;
@@ -58,7 +58,7 @@ namespace {
     AllocaInst *FuncCtx;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
+    explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL)
       : FunctionPass(ID), TLI(tli) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
@@ -82,7 +82,7 @@ namespace {
 char SjLjEHPrepare::ID = 0;
 
 // Public Interface To the SjLjEHPrepare pass.
-FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) {
   return new SjLjEHPrepare(TLI);
 }
 // doInitialization - Set up decalarations and types needed to process
@@ -379,13 +379,22 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
 /// the function context and marking the call sites with the appropriate
 /// values. These values are used by the DWARF EH emitter.
 bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
-  SmallVector<ReturnInst*,     16> Returns;
-  SmallVector<InvokeInst*,     16> Invokes;
+  SmallVector<ReturnInst*, 16> Returns;
+  SmallVector<InvokeInst*, 16> Invokes;
   SmallSetVector<LandingPadInst*, 16> LPads;
 
   // Look through the terminators of the basic blocks to find invokes.
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      if (Function *Callee = II->getCalledFunction())
+        if (Callee->isIntrinsic() &&
+            Callee->getIntrinsicID() == Intrinsic::donothing) {
+          // Remove the NOP invoke.
+          BranchInst::Create(II->getNormalDest(), II);
+          II->eraseFromParent();
+          continue;
+        }
+
       Invokes.push_back(II);
       LPads.insert(II->getUnwindDest()->getLandingPadInst());
     } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 95faafab45a9..20049a89d15d 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -142,6 +142,76 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
   ++NumLocalRenum;
 }
 
+// Repair indexes after adding and removing instructions.
+void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator Begin,
+                                       MachineBasicBlock::iterator End) {
+  // FIXME: Is this really necessary? The only caller repairIntervalsForRange()
+  // does the same thing.
+  // Find anchor points, which are at the beginning/end of blocks or at
+  // instructions that already have indexes.
+  while (Begin != MBB->begin() && !hasIndex(Begin))
+    --Begin;
+  while (End != MBB->end() && !hasIndex(End))
+    ++End;
+
+  bool includeStart = (Begin == MBB->begin());
+  SlotIndex startIdx;
+  if (includeStart)
+    startIdx = getMBBStartIdx(MBB);
+  else
+    startIdx = getInstructionIndex(Begin);
+
+  SlotIndex endIdx;
+  if (End == MBB->end())
+    endIdx = getMBBEndIdx(MBB);
+  else
+    endIdx = getInstructionIndex(End);
+
+  // FIXME: Conceptually, this code is implementing an iterator on MBB that
+  // optionally includes an additional position prior to MBB->begin(), indicated
+  // by the includeStart flag. This is done so that we can iterate MIs in a MBB
+  // in parallel with SlotIndexes, but there should be a better way to do this.
+  IndexList::iterator ListB = startIdx.listEntry();
+  IndexList::iterator ListI = endIdx.listEntry();
+  MachineBasicBlock::iterator MBBI = End;
+  bool pastStart = false;
+  while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
+    assert(ListI->getIndex() >= startIdx.getIndex() &&
+           (includeStart || !pastStart) &&
+           "Decremented past the beginning of region to repair.");
+
+    MachineInstr *SlotMI = ListI->getInstr();
+    MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0;
+    bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+
+    if (SlotMI == MI && !MBBIAtBegin) {
+      --ListI;
+      if (MBBI != Begin)
+        --MBBI;
+      else
+        pastStart = true;
+    } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+      if (MBBI != Begin)
+        --MBBI;
+      else
+        pastStart = true;
+    } else {
+      --ListI;
+      if (SlotMI)
+        removeMachineInstrFromMaps(SlotMI);
+    }
+  }
+
+  // In theory this could be combined with the previous loop, but it is tricky
+  // to update the IndexList while we are iterating it.
+  for (MachineBasicBlock::iterator I = End; I != Begin;) {
+    --I;
+    MachineInstr *MI = I;
+    if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end())
+      insertMachineInstrInMaps(MI);
+  }
+}
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void SlotIndexes::dump() const {
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 320128a999ea..c5bbba3ffccc 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -29,6 +29,7 @@
 
 #define DEBUG_TYPE "spillplacement"
 #include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 4cd22eb60f55..209792fd407b 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -10,7 +10,6 @@
 #define DEBUG_TYPE "spiller"
 
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -19,12 +18,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index dca15ee7580f..0a3818e43ff9 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 1cbee843a125..a789a2596dbf 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -22,39 +22,37 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackcoloring"
-#include "MachineTraceMetrics.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -69,14 +67,14 @@ DisableColoring("no-stack-coloring",
 /// code. If this flag is enabled, we try to save the user.
 static cl::opt<bool>
 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
-        cl::init(false), cl::Hidden,
-        cl::desc("Do not optimize lifetime zones that are broken"));
+                          cl::init(false), cl::Hidden,
+                          cl::desc("Do not optimize lifetime zones that "
+                                   "are broken"));
 
 STATISTIC(NumMarkerSeen,  "Number of lifetime markers found.");
 STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
-STATISTIC(EscapedAllocas,
-          "Number of allocas that escaped the lifetime region");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
 
 //===----------------------------------------------------------------------===//
 //                           StackColoring Pass
@@ -104,12 +102,13 @@ class StackColoring : public MachineFunctionPass {
   };
 
   /// Maps active slots (per bit) for each basic block.
-  DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness;
+  typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+  LivenessMap BlockLiveness;
 
   /// Maps serial numbers to basic blocks.
-  DenseMap<MachineBasicBlock*, int> BasicBlocks;
+  DenseMap<const MachineBasicBlock*, int> BasicBlocks;
   /// Maps basic blocks to a serial number.
-  SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering;
+  SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
 
   /// Maps liveness intervals for each slot.
   SmallVector<LiveInterval*, 16> Intervals;
@@ -146,7 +145,7 @@ public:
 
 private:
   /// Debug.
-  void dump();
+  void dump() const;
 
   /// Removes all of the lifetime marker instructions from the function.
   /// \returns true if any markers were removed.
@@ -201,31 +200,35 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-void StackColoring::dump() {
+void StackColoring::dump() const {
   for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
        FI != FE; ++FI) {
-    unsigned Num = BasicBlocks[*FI];
-    DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n");
-    Num = 0;
+    DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<<
+          " ["<<FI->getName()<<"]\n");
+
+    LivenessMap::const_iterator BI = BlockLiveness.find(*FI);
+    assert(BI != BlockLiveness.end() && "Block not found");
+    const BlockLifetimeInfo &BlockInfo = BI->second;
+
     DEBUG(dbgs()<<"BEGIN  : {");
-    for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i)
-      DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" ");
+    for (unsigned i=0; i < BlockInfo.Begin.size(); ++i)
+      DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" ");
     DEBUG(dbgs()<<"}\n");
 
     DEBUG(dbgs()<<"END    : {");
-    for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i)
-      DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" ");
+    for (unsigned i=0; i < BlockInfo.End.size(); ++i)
+      DEBUG(dbgs()<<BlockInfo.End.test(i)<<" ");
 
     DEBUG(dbgs()<<"}\n");
 
     DEBUG(dbgs()<<"LIVE_IN: {");
-    for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i)
-      DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" ");
+    for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i)
+      DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" ");
 
     DEBUG(dbgs()<<"}\n");
     DEBUG(dbgs()<<"LIVEOUT: {");
-    for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i)
-      DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" ");
+    for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i)
+      DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" ");
     DEBUG(dbgs()<<"}\n");
   }
 }
@@ -243,8 +246,11 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
     BasicBlocks[*FI] = BasicBlockNumbering.size();
     BasicBlockNumbering.push_back(*FI);
 
-    BlockLiveness[*FI].Begin.resize(NumSlot);
-    BlockLiveness[*FI].End.resize(NumSlot);
+    // Keep a reference to avoid repeated lookups.
+    BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI];
+
+    BlockInfo.Begin.resize(NumSlot);
+    BlockInfo.End.resize(NumSlot);
 
     for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
          BI != BE; ++BI) {
@@ -256,7 +262,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
       Markers.push_back(BI);
 
       bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
-      MachineOperand &MI = BI->getOperand(0);
+      const MachineOperand &MI = BI->getOperand(0);
       unsigned Slot = MI.getIndex();
 
       MarkersFound++;
@@ -268,15 +274,15 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
       }
 
       if (IsStart) {
-        BlockLiveness[*FI].Begin.set(Slot);
+        BlockInfo.Begin.set(Slot);
       } else {
-        if (BlockLiveness[*FI].Begin.test(Slot)) {
+        if (BlockInfo.Begin.test(Slot)) {
           // Allocas that start and end within a single block are handled
           // specially when computing the LiveIntervals to avoid pessimizing
           // the liveness propagation.
-          BlockLiveness[*FI].Begin.reset(Slot);
+          BlockInfo.Begin.reset(Slot);
         } else {
-          BlockLiveness[*FI].End.set(Slot);
+          BlockInfo.End.set(Slot);
         }
       }
     }
@@ -293,47 +299,58 @@ void StackColoring::calculateLocalLiveness() {
   // formulation, and END is equivalent to GEN.  The result of this computation
   // is a map from blocks to bitvectors where the bitvectors represent which
   // allocas are live in/out of that block.
-  SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
-                                           BasicBlockNumbering.end());
+  SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
+                                                 BasicBlockNumbering.end());
   unsigned NumSSMIters = 0;
   bool changed = true;
   while (changed) {
     changed = false;
     ++NumSSMIters;
 
-    SmallPtrSet<MachineBasicBlock*, 8> NextBBSet;
+    SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
 
-    for (SmallVector<MachineBasicBlock*, 8>::iterator
+    for (SmallVector<const MachineBasicBlock*, 8>::iterator
          PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
          PI != PE; ++PI) {
 
-      MachineBasicBlock *BB = *PI;
+      const MachineBasicBlock *BB = *PI;
       if (!BBSet.count(BB)) continue;
 
+      // Use an iterator to avoid repeated lookups.
+      LivenessMap::iterator BI = BlockLiveness.find(BB);
+      assert(BI != BlockLiveness.end() && "Block not found");
+      BlockLifetimeInfo &BlockInfo = BI->second;
+
       BitVector LocalLiveIn;
       BitVector LocalLiveOut;
 
       // Forward propagation from begins to ends.
-      for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
-           PE = BB->pred_end(); PI != PE; ++PI)
-        LocalLiveIn |= BlockLiveness[*PI].LiveOut;
-      LocalLiveIn |= BlockLiveness[BB].End;
-      LocalLiveIn.reset(BlockLiveness[BB].Begin);
+      for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+           PE = BB->pred_end(); PI != PE; ++PI) {
+        LivenessMap::const_iterator I = BlockLiveness.find(*PI);
+        assert(I != BlockLiveness.end() && "Predecessor not found");
+        LocalLiveIn |= I->second.LiveOut;
+      }
+      LocalLiveIn |= BlockInfo.End;
+      LocalLiveIn.reset(BlockInfo.Begin);
 
       // Reverse propagation from ends to begins.
-      for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-           SE = BB->succ_end(); SI != SE; ++SI)
-        LocalLiveOut |= BlockLiveness[*SI].LiveIn;
-      LocalLiveOut |= BlockLiveness[BB].Begin;
-      LocalLiveOut.reset(BlockLiveness[BB].End);
+      for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI) {
+        LivenessMap::const_iterator I = BlockLiveness.find(*SI);
+        assert(I != BlockLiveness.end() && "Successor not found");
+        LocalLiveOut |= I->second.LiveIn;
+      }
+      LocalLiveOut |= BlockInfo.Begin;
+      LocalLiveOut.reset(BlockInfo.End);
 
       LocalLiveIn |= LocalLiveOut;
       LocalLiveOut |= LocalLiveIn;
 
       // After adopting the live bits, we need to turn-off the bits which
       // are de-activated in this block.
-      LocalLiveOut.reset(BlockLiveness[BB].End);
-      LocalLiveIn.reset(BlockLiveness[BB].Begin);
+      LocalLiveOut.reset(BlockInfo.End);
+      LocalLiveIn.reset(BlockInfo.Begin);
 
       // If we have both BEGIN and END markers in the same basic block then
       // we know that the BEGIN marker comes after the END, because we already
@@ -342,25 +359,25 @@ void StackColoring::calculateLocalLiveness() {
       // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
       // BEGIN and END because it means that the value lives before and after
       // this basic block.
-      BitVector LocalEndBegin = BlockLiveness[BB].End;
-      LocalEndBegin &= BlockLiveness[BB].Begin;
+      BitVector LocalEndBegin = BlockInfo.End;
+      LocalEndBegin &= BlockInfo.Begin;
       LocalLiveIn |= LocalEndBegin;
       LocalLiveOut |= LocalEndBegin;
 
-      if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) {
+      if (LocalLiveIn.test(BlockInfo.LiveIn)) {
         changed = true;
-        BlockLiveness[BB].LiveIn |= LocalLiveIn;
+        BlockInfo.LiveIn |= LocalLiveIn;
 
-        for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+        for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
              PE = BB->pred_end(); PI != PE; ++PI)
           NextBBSet.insert(*PI);
       }
 
-      if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) {
+      if (LocalLiveOut.test(BlockInfo.LiveOut)) {
         changed = true;
-        BlockLiveness[BB].LiveOut |= LocalLiveOut;
+        BlockInfo.LiveOut |= LocalLiveOut;
 
-        for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+        for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
              SE = BB->succ_end(); SI != SE; ++SI)
           NextBBSet.insert(*SI);
       }
@@ -384,9 +401,9 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
     Finishes.resize(NumSlots);
 
     // Create the interval for the basic blocks with lifetime markers in them.
-    for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(),
+    for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(),
          e = Markers.end(); it != e; ++it) {
-      MachineInstr *MI = *it;
+      const MachineInstr *MI = *it;
       if (MI->getParent() != MBB)
         continue;
 
@@ -395,7 +412,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
              "Invalid Lifetime marker");
 
       bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
-      MachineOperand &Mo = MI->getOperand(0);
+      const MachineOperand &Mo = MI->getOperand(0);
       int Slot = Mo.getIndex();
       assert(Slot >= 0 && "Invalid slot");
 
@@ -482,7 +499,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
 
   // Keep a list of *allocas* which need to be remapped.
   DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
-  for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
+  for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(),
        e = SlotRemap.end(); it != e; ++it) {
     const AllocaInst *From = MFI->getObjectAllocation(it->first);
     const AllocaInst *To = MFI->getObjectAllocation(it->second);
@@ -560,7 +577,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
           SlotIndex Index = Indexes->getInstructionIndex(I);
           LiveInterval *Interval = Intervals[FromSlot];
           assert(Interval->find(Index) != Interval->end() &&
-               "Found instruction usage outside of live range.");
+                 "Found instruction usage outside of live range.");
         }
 #endif
 
@@ -577,8 +594,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
 }
 
 void StackColoring::removeInvalidSlotRanges() {
-  MachineFunction::iterator BB, BBE;
-  MachineBasicBlock::iterator I, IE;
+  MachineFunction::const_iterator BB, BBE;
+  MachineBasicBlock::const_iterator I, IE;
   for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
     for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
 
@@ -597,7 +614,7 @@ void StackColoring::removeInvalidSlotRanges() {
 
       // Check all of the machine operands.
       for (unsigned i = 0 ; i <  I->getNumOperands(); ++i) {
-        MachineOperand &MO = I->getOperand(i);
+        const MachineOperand &MO = I->getOperand(i);
 
         if (!MO.isFI())
           continue;
@@ -720,11 +737,13 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
   // and continue.
 
   // Sort the slots according to their size. Place unused slots at the end.
-  std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
+  // Use stable sort to guarantee deterministic code generation.
+  std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
+                   SlotSizeSorter(MFI));
 
-  bool Chanded = true;
-  while (Chanded) {
-    Chanded = false;
+  bool Changed = true;
+  while (Changed) {
+    Changed = false;
     for (unsigned I = 0; I < NumSlots; ++I) {
       if (SortedSlots[I] == -1)
         continue;
@@ -741,7 +760,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
 
         // Merge disjoint slots.
         if (!First->overlaps(*Second)) {
-          Chanded = true;
+          Changed = true;
           First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
           SlotRemap[SecondSlot] = FirstSlot;
           SortedSlots[J] = -1;
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 31e9ec0ac0b9..fbef34772b08 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,33 +16,44 @@
 
 #define DEBUG_TYPE "stack-protector"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
+STATISTIC(NumFunProtected, "Number of functions protected");
+STATISTIC(NumAddrTaken, "Number of local variables that have their address"
+                        " taken.");
+
 namespace {
   class StackProtector : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// target type sizes.
-    const TargetLowering *TLI;
+    const TargetLoweringBase *TLI;
 
     Function *F;
     Module *M;
 
     DominatorTree *DT;
 
+    /// VisitedPHIs - The set of PHI nodes visited when determining
+    /// if a variable's reference has been taken.  This set 
+    /// is maintained to ensure we don't visit the same PHI node multiple
+    /// times.
+    SmallPtrSet<const PHINode*, 16> VisitedPHIs;
+
     /// InsertStackProtectors - Insert code into the prologue and epilogue of
     /// the function.
     ///
@@ -58,17 +69,21 @@ namespace {
     /// ContainsProtectableArray - Check whether the type either is an array or
     /// contains an array of sufficient size so that we need stack protectors
     /// for it.
-    bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+    bool ContainsProtectableArray(Type *Ty, bool Strong = false,
+                                  bool InStruct = false) const;
+
+    /// \brief Check whether a stack allocation has its address taken.
+    bool HasAddressTaken(const Instruction *AI);
 
     /// RequiresStackProtector - Check whether or not this function needs a
     /// stack protector based upon the stack protector level.
-    bool RequiresStackProtector() const;
+    bool RequiresStackProtector();
   public:
     static char ID;             // Pass identification, replacement for typeid.
     StackProtector() : FunctionPass(ID), TLI(0) {
       initializeStackProtectorPass(*PassRegistry::getPassRegistry());
     }
-    StackProtector(const TargetLowering *tli)
+    StackProtector(const TargetLoweringBase *tli)
       : FunctionPass(ID), TLI(tli) {
       initializeStackProtectorPass(*PassRegistry::getPassRegistry());
     }
@@ -85,7 +100,7 @@ char StackProtector::ID = 0;
 INITIALIZE_PASS(StackProtector, "stack-protector",
                 "Insert stack protectors", false, false)
 
-FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) {
   return new StackProtector(tli);
 }
 
@@ -96,15 +111,21 @@ bool StackProtector::runOnFunction(Function &Fn) {
 
   if (!RequiresStackProtector()) return false;
 
+  ++NumFunProtected;
   return InsertStackProtectors();
 }
 
 /// ContainsProtectableArray - Check whether the type either is an array or
 /// contains a char array of sufficient size so that we need stack protectors
 /// for it.
-bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong,
+                                              bool InStruct) const {
   if (!Ty) return false;
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    // In strong mode any array, regardless of type and size, triggers a
+    // protector
+    if (Strong)
+      return true;
     const TargetMachine &TM = TLI->getTargetMachine();
     if (!AT->getElementType()->isIntegerTy(8)) {
       Triple Trip(TM.getTargetTriple());
@@ -126,37 +147,103 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
 
   for (StructType::element_iterator I = ST->element_begin(),
          E = ST->element_end(); I != E; ++I)
-    if (ContainsProtectableArray(*I, true))
+    if (ContainsProtectableArray(*I, Strong, true))
       return true;
 
   return false;
 }
 
-/// RequiresStackProtector - Check whether or not this function needs a stack
-/// protector based upon the stack protector level. The heuristic we use is to
-/// add a guard variable to functions that call alloca, and functions with
-/// buffers larger than SSPBufferSize bytes.
-bool StackProtector::RequiresStackProtector() const {
-  if (F->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
-    return true;
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+        UI != UE; ++UI) {
+    const User *U = *UI;
+    if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (AI == SI->getValueOperand())
+        return true;
+    } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
+      if (AI == SI->getOperand(0))
+        return true;
+    } else if (isa<CallInst>(U)) {
+      return true;
+    } else if (isa<InvokeInst>(U)) {
+      return true;
+    } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
+      if (HasAddressTaken(SI))
+        return true;
+    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+      // Keep track of what PHI nodes we have already visited to ensure
+      // they are only visited once.
+      if (VisitedPHIs.insert(PN))
+        if (HasAddressTaken(PN))
+          return true;
+    } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      if (HasAddressTaken(GEP))
+        return true;
+    } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+      if (HasAddressTaken(BI))
+        return true;
+    }
+  }
+  return false;
+}
 
-  if (!F->getFnAttributes().hasAttribute(Attributes::StackProtect))
+/// \brief Check whether or not this function needs a stack protector based
+/// upon the stack protector level.
+///
+/// We use two heuristics: a standard (ssp) and strong (sspstrong).
+/// The standard heuristic which will add a guard variable to functions that
+/// call alloca with a either a variable size or a size >= SSPBufferSize,
+/// functions with character buffers larger than SSPBufferSize, and functions
+/// with aggregates containing character buffers larger than SSPBufferSize. The
+/// strong heuristic will add a guard variables to functions that call alloca
+/// regardless of size, functions with any buffer regardless of type and size,
+/// functions with aggregates that contain any buffer regardless of type and
+/// size, and functions that contain stack-based variables that have had their
+/// address taken.
+bool StackProtector::RequiresStackProtector() {
+  bool Strong = false;
+  if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::StackProtectReq))
+    return true;
+  else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::StackProtectStrong))
+    Strong = true;
+  else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                            Attribute::StackProtect))
     return false;
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
     BasicBlock *BB = I;
 
     for (BasicBlock::iterator
-           II = BB->begin(), IE = BB->end(); II != IE; ++II)
+           II = BB->begin(), IE = BB->end(); II != IE; ++II) {
       if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
-        if (AI->isArrayAllocation())
-          // This is a call to alloca with a variable size. Emit stack
-          // protectors.
+        if (AI->isArrayAllocation()) {
+          // SSP-Strong: Enable protectors for any call to alloca, regardless
+          // of size.
+          if (Strong)
+            return true;
+  
+          if (const ConstantInt *CI =
+               dyn_cast<ConstantInt>(AI->getArraySize())) {
+            unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize;
+            if (CI->getLimitedValue(BufferSize) >= BufferSize)
+              // A call to alloca with size >= SSPBufferSize requires
+              // stack protectors.
+              return true;
+          } else // A call to alloca with a variable size requires protectors.
+            return true;
+        }
+
+        if (ContainsProtectableArray(AI->getAllocatedType(), Strong))
           return true;
 
-        if (ContainsProtectableArray(AI->getAllocatedType()))
+        if (Strong && HasAddressTaken(AI)) {
+          ++NumAddrTaken; 
           return true;
+        }
       }
+    }
   }
 
   return false;
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index d349abc35774..f9515610d7e9 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -12,8 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackslotcoloring"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -22,14 +25,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 39fd600d4abf..b337c5393343 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -39,17 +39,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "strongphielim"
-#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 1497d1ba6287..1ec88172a0b0 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "tailduplication"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumTails     , "Number of tails duplicated");
@@ -461,6 +461,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
          II != EE; ++II) {
       if (!II->isPHI())
         break;
+      MachineInstrBuilder MIB(*FromBB->getParent(), II);
       unsigned Idx = 0;
       for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
         MachineOperand &MO = II->getOperand(i+1);
@@ -508,8 +509,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
             II->getOperand(Idx+1).setMBB(SrcBB);
             Idx = 0;
           } else {
-            II->addOperand(MachineOperand::CreateReg(SrcReg, false));
-            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+            MIB.addReg(SrcReg).addMBB(SrcBB);
           }
         }
       } else {
@@ -521,8 +521,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
             II->getOperand(Idx+1).setMBB(SrcBB);
             Idx = 0;
           } else {
-            II->addOperand(MachineOperand::CreateReg(Reg, false));
-            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+            MIB.addReg(Reg).addMBB(SrcBB);
           }
         }
       }
@@ -552,8 +551,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
   if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF.getFunction()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize))
+      MF.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cadb87815dbe..883e9d1846d9 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -11,12 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..20eb91879317
--- /dev/null
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -0,0 +1,739 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+  "disable-sched-hazard", cl::Hidden, cl::init(false),
+  cl::desc("Disable hazard detection during preRA scheduling"));
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+                             const TargetRegisterInfo *TRI,
+                             const MachineFunction &MF) const {
+  if (OpNum >= MCID.getNumOperands())
+    return 0;
+
+  short RegClass = MCID.OpInfo[OpNum].RegClass;
+  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+    return TRI->getPointerRegClass(MF, RegClass);
+
+  // Instructions like INSERT_SUBREG do not have fixed register classes.
+  if (RegClass < 0)
+    return 0;
+
+  // Otherwise just look it up normally.
+  return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI) const {
+  llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+                                             const MCAsmInfo &MAI) const {
+
+
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  return Length;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                         MachineBasicBlock *NewDest) const {
+  MachineBasicBlock *MBB = Tail->getParent();
+
+  // Remove all the old successors of MBB from the CFG.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_begin());
+
+  // Remove all the dead instructions from the end of MBB.
+  MBB->erase(Tail, MBB->end());
+
+  // If MBB isn't immediately before MBB, insert a branch to it.
+  if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+    InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+                 Tail->getDebugLoc());
+  MBB->addSuccessor(NewDest);
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+                                                  bool NewMI) const {
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool HasDef = MCID.getNumDefs();
+  if (HasDef && !MI->getOperand(0).isReg())
+    // No idea how to commute this instruction. Target should implement its own.
+    return 0;
+  unsigned Idx1, Idx2;
+  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Don't know how to commute: " << *MI;
+    report_fatal_error(Msg.str());
+  }
+
+  assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+         "This only knows how to commute register operands so far");
+  unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+  unsigned Reg1 = MI->getOperand(Idx1).getReg();
+  unsigned Reg2 = MI->getOperand(Idx2).getReg();
+  unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+  unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+  unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
+  bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+  bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+  // If destination is tied to either of the commuted source register, then
+  // it must be updated.
+  if (HasDef && Reg0 == Reg1 &&
+      MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+    Reg2IsKill = false;
+    Reg0 = Reg2;
+    SubReg0 = SubReg2;
+  } else if (HasDef && Reg0 == Reg2 &&
+             MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+    Reg1IsKill = false;
+    Reg0 = Reg1;
+    SubReg0 = SubReg1;
+  }
+
+  if (NewMI) {
+    // Create a new instruction.
+    MachineFunction &MF = *MI->getParent()->getParent();
+    MI = MF.CloneMachineInstr(MI);
+  }
+
+  if (HasDef) {
+    MI->getOperand(0).setReg(Reg0);
+    MI->getOperand(0).setSubReg(SubReg0);
+  }
+  MI->getOperand(Idx2).setReg(Reg1);
+  MI->getOperand(Idx1).setReg(Reg2);
+  MI->getOperand(Idx2).setSubReg(SubReg1);
+  MI->getOperand(Idx1).setSubReg(SubReg2);
+  MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+  MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+  return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+                                            unsigned &SrcOpIdx1,
+                                            unsigned &SrcOpIdx2) const {
+  assert(!MI->isBundle() &&
+         "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isCommutable())
+    return false;
+  // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+  // is not true, then the target must implement this.
+  SrcOpIdx1 = MCID.getNumDefs();
+  SrcOpIdx2 = SrcOpIdx1 + 1;
+  if (!MI->getOperand(SrcOpIdx1).isReg() ||
+      !MI->getOperand(SrcOpIdx2).isReg())
+    // No idea.
+    return false;
+  return true;
+}
+
+
+bool
+TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+  if (!MI->isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+
+bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const {
+  bool MadeChange = false;
+
+  assert(!MI->isBundle() &&
+         "TargetInstrInfo::PredicateInstruction() can't handle bundles");
+
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MI->isPredicable())
+    return false;
+
+  for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (MCID.OpInfo[i].isPredicate()) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg()) {
+        MO.setReg(Pred[j].getReg());
+        MadeChange = true;
+      } else if (MO.isImm()) {
+        MO.setImm(Pred[j].getImm());
+        MadeChange = true;
+      } else if (MO.isMBB()) {
+        MO.setMBB(Pred[j].getMBB());
+        MadeChange = true;
+      }
+      ++j;
+    }
+  }
+  return MadeChange;
+}
+
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                           const MachineMemOperand *&MMO,
+                                           int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isLoad() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                          const MachineMemOperand *&MMO,
+                                          int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isStore() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned DestReg,
+                                    unsigned SubIdx,
+                                    const MachineInstr *Orig,
+                                    const TargetRegisterInfo &TRI) const {
+  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+  MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+  MBB.insert(I, MI);
+}
+
+bool
+TargetInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                  const MachineInstr *MI1,
+                                  const MachineRegisterInfo *MRI) const {
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig,
+                                         MachineFunction &MF) const {
+  assert(!Orig->isNotDuplicable() &&
+         "Instruction cannot be duplicated");
+  return MF.CloneMachineInstr(Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+                                              unsigned FoldIdx) {
+  assert(MI->isCopy() && "MI must be a COPY instruction");
+  if (MI->getNumOperands() != 2)
+    return 0;
+  assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+  const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+  const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+  if (FoldOp.getSubReg() || LiveOp.getSubReg())
+    return 0;
+
+  unsigned FoldReg = FoldOp.getReg();
+  unsigned LiveReg = LiveOp.getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+         "Cannot fold physregs");
+
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+  if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+    return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+  if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+    return RC;
+
+  // FIXME: Allow folding when register classes are memory compatible.
+  return 0;
+}
+
+bool TargetInstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                   const SmallVectorImpl<unsigned> &Ops,
+                                   int FI) const {
+  unsigned Flags = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    if (MI->getOperand(Ops[i]).isDef())
+      Flags |= MachineMemOperand::MOStore;
+    else
+      Flags |= MachineMemOperand::MOLoad;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  assert(MBB && "foldMemoryOperand needs an inserted instruction");
+  MachineFunction &MF = *MBB->getParent();
+
+  // Ask the target to do the actual folding.
+  if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+    // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+    assert((!(Flags & MachineMemOperand::MOStore) ||
+            NewMI->mayStore()) &&
+           "Folded a def to a non-store!");
+    assert((!(Flags & MachineMemOperand::MOLoad) ||
+            NewMI->mayLoad()) &&
+           "Folded a use to a non-load!");
+    const MachineFrameInfo &MFI = *MF.getFrameInfo();
+    assert(MFI.getObjectOffset(FI) != -1);
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                              Flags, MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    NewMI->addMemOperand(MF, MMO);
+
+    // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+    return MBB->insert(MI, NewMI);
+  }
+
+  // Straight COPY may fold as load/store.
+  if (!MI->isCopy() || Ops.size() != 1)
+    return 0;
+
+  const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+  if (!RC)
+    return 0;
+
+  const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+  MachineBasicBlock::iterator Pos = MI;
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+  if (Flags == MachineMemOperand::MOStore)
+    storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+  else
+    loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+  return --Pos;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                   const SmallVectorImpl<unsigned> &Ops,
+                                   MachineInstr* LoadMI) const {
+  assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  // Ask the target to do the actual folding.
+  MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+  if (!NewMI) return 0;
+
+  NewMI = MBB.insert(MI, NewMI);
+
+  // Copy the memoperands from the load to the folded instruction.
+  NewMI->setMemRefs(LoadMI->memoperands_begin(),
+                    LoadMI->memoperands_end());
+
+  return NewMI;
+}
+
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+
+  // Remat clients assume operand 0 is the defined register.
+  if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
+    return false;
+  unsigned DefReg = MI->getOperand(0).getReg();
+
+  // A sub-register definition can only be rematerialized if the instruction
+  // doesn't read the other parts of the register.  Otherwise it is really a
+  // read-modify-write operation on the full virtual register which cannot be
+  // moved safely.
+  if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
+      MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
+    return false;
+
+  // A load from a fixed stack slot can be rematerialized. This may be
+  // redundant with subsequent checks, but it's target-independent,
+  // simple, and a common case.
+  int FrameIdx = 0;
+  if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+    return true;
+
+  // Avoid instructions obviously unsafe for remat.
+  if (MI->isNotDuplicable() || MI->mayStore() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  // Don't remat inline asm. We have no idea how expensive it is
+  // even if it's side effect free.
+  if (MI->isInlineAsm())
+    return false;
+
+  // Avoid instructions which load from potentially varying memory.
+  if (MI->mayLoad() && !MI->isInvariantLoad(AA))
+    return false;
+
+  // If any of the registers accessed are non-constant, conservatively assume
+  // the instruction is not rematerializable.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // Check for a well-behaved physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI.isConstantPhysReg(Reg, MF))
+          return false;
+      } else {
+        // A physreg def. We can't remat it.
+        return false;
+      }
+      continue;
+    }
+
+    // Only allow one virtual-register def.  There may be multiple defs of the
+    // same virtual register, though.
+    if (MO.isDef() && Reg != DefReg)
+      return false;
+
+    // Don't allow any virtual-register uses. Rematting an instruction with
+    // virtual register uses would length the live ranges of the uses, which
+    // is not necessarily a good idea, certainly not "trivial".
+    if (MO.isUse())
+      return false;
+  }
+
+  // Everything checked out.
+  return true;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                           const MachineBasicBlock *MBB,
+                                           const MachineFunction &MF) const {
+  // Terminators and labels can't be scheduled around.
+  if (MI->isTerminator() || MI->isLabel())
+    return true;
+
+  // Don't attempt to schedule around any instruction that defines
+  // a stack-oriented pointer, as it's unlikely to be profitable. This
+  // saves compile time, because it doesn't require every single
+  // stack slot reference to depend on the instruction that does the
+  // modification.
+  const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
+    return true;
+
+  return false;
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
+  return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  // Dummy hazard recognizer allows all instructions to issue.
+  return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+                               const ScheduleDAG *DAG) const {
+  return (ScheduleHazardRecognizer *)
+    new ScoreboardHazardRecognizer(II, DAG, "misched");
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  return (ScheduleHazardRecognizer *)
+    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
+
+//===----------------------------------------------------------------------===//
+//  SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+//===----------------------------------------------------------------------===//
+//  MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Class = MI->getDesc().getSchedClass();
+  int UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps >= 0)
+    return UOps;
+
+  // The # of u-ops is dynamically determined. The specific target should
+  // override this function to return the right number.
+  return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
+                                            const MachineInstr *DefMI) const {
+  if (DefMI->isTransient())
+    return 0;
+  if (DefMI->mayLoad())
+    return SchedModel->LoadLatency;
+  if (isHighLatencyDef(DefMI->getOpcode()))
+    return SchedModel->HighLatency;
+  return 1;
+}
+
+unsigned TargetInstrInfo::
+getInstrLatency(const InstrItineraryData *ItinData,
+                const MachineInstr *MI,
+                unsigned *PredCost) const {
+  // Default to one cycle for no itinerary. However, an "empty" itinerary may
+  // still have a MinLatency property, which getStageLatency checks.
+  if (!ItinData)
+    return MI->mayLoad() ? 2 : 1;
+
+  return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+  return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// Both DefMI and UseMI must be valid.  By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfo::
+getOperandLatency(const InstrItineraryData *ItinData,
+                  const MachineInstr *DefMI, unsigned DefIdx,
+                  const MachineInstr *UseMI, unsigned UseIdx) const {
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  unsigned UseClass = UseMI->getDesc().getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+/// If we can determine the operand latency from the def only, without itinerary
+/// lookup, do so. Otherwise return -1.
+int TargetInstrInfo::computeDefOperandLatency(
+  const InstrItineraryData *ItinData,
+  const MachineInstr *DefMI, bool FindMin) const {
+
+  // Let the target hook getInstrLatency handle missing itineraries.
+  if (!ItinData)
+    return getInstrLatency(ItinData, DefMI);
+
+  // Return a latency based on the itinerary properties and defining instruction
+  // if possible. Some common subtargets don't require per-operand latency,
+  // especially for minimum latencies.
+  if (FindMin) {
+    // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+    // it exists before defaulting to MinLatency.
+    if (ItinData->SchedModel->MinLatency >= 0)
+      return getInstrLatency(ItinData, DefMI);
+
+    // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+    // For empty itineraries, short-cirtuit the check and default to one cycle.
+    if (ItinData->isEmpty())
+      return 1;
+  }
+  else if(ItinData->isEmpty())
+    return defaultDefLatency(ItinData->SchedModel, DefMI);
+
+  // ...operand lookup required
+  return -1;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use when the operand indices are already known. UseMI may
+/// be NULL for an unknown use.
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// Depending on the subtarget's itinerary properties, this may or may not need
+/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
+/// UseIdx to compute min latency.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx,
+                      bool FindMin) const {
+
+  int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
+  if (DefLatency >= 0)
+    return DefLatency;
+
+  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+  int OperLatency = 0;
+  if (UseMI)
+    OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  else {
+    unsigned DefClass = DefMI->getDesc().getSchedClass();
+    OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+  }
+  if (OperLatency >= 0)
+    return OperLatency;
+
+  // No operand latency was found.
+  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+  // Expected latency is the max of the stage latency and itinerary props.
+  if (!FindMin)
+    InstrLatency = std::max(InstrLatency,
+                            defaultDefLatency(ItinData->SchedModel, DefMI));
+  return InstrLatency;
+}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
deleted file mode 100644
index 4439192fe2f4..000000000000
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ /dev/null
@@ -1,681 +0,0 @@
-//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetInstrInfoImpl class, it just provides default
-// implementations of various methods.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<bool> DisableHazardRecognizer(
-  "disable-sched-hazard", cl::Hidden, cl::init(false),
-  cl::desc("Disable hazard detection during preRA scheduling"));
-
-/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest.
-void
-TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
-                                             MachineBasicBlock *NewDest) const {
-  MachineBasicBlock *MBB = Tail->getParent();
-
-  // Remove all the old successors of MBB from the CFG.
-  while (!MBB->succ_empty())
-    MBB->removeSuccessor(MBB->succ_begin());
-
-  // Remove all the dead instructions from the end of MBB.
-  MBB->erase(Tail, MBB->end());
-
-  // If MBB isn't immediately before MBB, insert a branch to it.
-  if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
-    InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
-                 Tail->getDebugLoc());
-  MBB->addSuccessor(NewDest);
-}
-
-// commuteInstruction - The default implementation of this method just exchanges
-// the two operands returned by findCommutedOpIndices.
-MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
-                                                      bool NewMI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  bool HasDef = MCID.getNumDefs();
-  if (HasDef && !MI->getOperand(0).isReg())
-    // No idea how to commute this instruction. Target should implement its own.
-    return 0;
-  unsigned Idx1, Idx2;
-  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
-    std::string msg;
-    raw_string_ostream Msg(msg);
-    Msg << "Don't know how to commute: " << *MI;
-    report_fatal_error(Msg.str());
-  }
-
-  assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
-         "This only knows how to commute register operands so far");
-  unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
-  unsigned Reg1 = MI->getOperand(Idx1).getReg();
-  unsigned Reg2 = MI->getOperand(Idx2).getReg();
-  unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
-  unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
-  unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
-  bool Reg1IsKill = MI->getOperand(Idx1).isKill();
-  bool Reg2IsKill = MI->getOperand(Idx2).isKill();
-  // If destination is tied to either of the commuted source register, then
-  // it must be updated.
-  if (HasDef && Reg0 == Reg1 &&
-      MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
-    Reg2IsKill = false;
-    Reg0 = Reg2;
-    SubReg0 = SubReg2;
-  } else if (HasDef && Reg0 == Reg2 &&
-             MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
-    Reg1IsKill = false;
-    Reg0 = Reg1;
-    SubReg0 = SubReg1;
-  }
-
-  if (NewMI) {
-    // Create a new instruction.
-    MachineFunction &MF = *MI->getParent()->getParent();
-    MI = MF.CloneMachineInstr(MI);
-  }
-
-  if (HasDef) {
-    MI->getOperand(0).setReg(Reg0);
-    MI->getOperand(0).setSubReg(SubReg0);
-  }
-  MI->getOperand(Idx2).setReg(Reg1);
-  MI->getOperand(Idx1).setReg(Reg2);
-  MI->getOperand(Idx2).setSubReg(SubReg1);
-  MI->getOperand(Idx1).setSubReg(SubReg2);
-  MI->getOperand(Idx2).setIsKill(Reg1IsKill);
-  MI->getOperand(Idx1).setIsKill(Reg2IsKill);
-  return MI;
-}
-
-/// findCommutedOpIndices - If specified MI is commutable, return the two
-/// operand indices that would swap value. Return true if the instruction
-/// is not in a form which this routine understands.
-bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
-                                                unsigned &SrcOpIdx1,
-                                                unsigned &SrcOpIdx2) const {
-  assert(!MI->isBundle() &&
-         "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
-
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isCommutable())
-    return false;
-  // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
-  // is not true, then the target must implement this.
-  SrcOpIdx1 = MCID.getNumDefs();
-  SrcOpIdx2 = SrcOpIdx1 + 1;
-  if (!MI->getOperand(SrcOpIdx1).isReg() ||
-      !MI->getOperand(SrcOpIdx2).isReg())
-    // No idea.
-    return false;
-  return true;
-}
-
-
-bool
-TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  if (!MI->isTerminator()) return false;
-
-  // Conditional branch is a special case.
-  if (MI->isBranch() && !MI->isBarrier())
-    return true;
-  if (!MI->isPredicable())
-    return true;
-  return !isPredicated(MI);
-}
-
-
-bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const {
-  bool MadeChange = false;
-
-  assert(!MI->isBundle() &&
-         "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
-
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MI->isPredicable())
-    return false;
-
-  for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (MCID.OpInfo[i].isPredicate()) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg()) {
-        MO.setReg(Pred[j].getReg());
-        MadeChange = true;
-      } else if (MO.isImm()) {
-        MO.setImm(Pred[j].getImm());
-        MadeChange = true;
-      } else if (MO.isMBB()) {
-        MO.setMBB(Pred[j].getMBB());
-        MadeChange = true;
-      }
-      ++j;
-    }
-  }
-  return MadeChange;
-}
-
-bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
-                                        const MachineMemOperand *&MMO,
-                                        int &FrameIndex) const {
-  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
-         oe = MI->memoperands_end();
-       o != oe;
-       ++o) {
-    if ((*o)->isLoad() && (*o)->getValue())
-      if (const FixedStackPseudoSourceValue *Value =
-          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
-        FrameIndex = Value->getFrameIndex();
-        MMO = *o;
-        return true;
-      }
-  }
-  return false;
-}
-
-bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
-                                       const MachineMemOperand *&MMO,
-                                       int &FrameIndex) const {
-  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
-         oe = MI->memoperands_end();
-       o != oe;
-       ++o) {
-    if ((*o)->isStore() && (*o)->getValue())
-      if (const FixedStackPseudoSourceValue *Value =
-          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
-        FrameIndex = Value->getFrameIndex();
-        MMO = *o;
-        return true;
-      }
-  }
-  return false;
-}
-
-void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        unsigned DestReg,
-                                        unsigned SubIdx,
-                                        const MachineInstr *Orig,
-                                        const TargetRegisterInfo &TRI) const {
-  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
-  MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
-  MBB.insert(I, MI);
-}
-
-bool
-TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
-                                      const MachineInstr *MI1,
-                                      const MachineRegisterInfo *MRI) const {
-  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
-}
-
-MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
-                                             MachineFunction &MF) const {
-  assert(!Orig->isNotDuplicable() &&
-         "Instruction cannot be duplicated");
-  return MF.CloneMachineInstr(Orig);
-}
-
-// If the COPY instruction in MI can be folded to a stack operation, return
-// the register class to use.
-static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
-                                              unsigned FoldIdx) {
-  assert(MI->isCopy() && "MI must be a COPY instruction");
-  if (MI->getNumOperands() != 2)
-    return 0;
-  assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
-
-  const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
-  const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
-
-  if (FoldOp.getSubReg() || LiveOp.getSubReg())
-    return 0;
-
-  unsigned FoldReg = FoldOp.getReg();
-  unsigned LiveReg = LiveOp.getReg();
-
-  assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
-         "Cannot fold physregs");
-
-  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
-  const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
-
-  if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
-    return RC->contains(LiveOp.getReg()) ? RC : 0;
-
-  if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
-    return RC;
-
-  // FIXME: Allow folding when register classes are memory compatible.
-  return 0;
-}
-
-bool TargetInstrInfoImpl::
-canFoldMemoryOperand(const MachineInstr *MI,
-                     const SmallVectorImpl<unsigned> &Ops) const {
-  return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
-}
-
-/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
-/// slot into the specified machine instruction for the specified operand(s).
-/// If this is possible, a new instruction is returned with the specified
-/// operand folded, otherwise NULL is returned. The client is responsible for
-/// removing the old instruction and adding the new one in the instruction
-/// stream.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                   const SmallVectorImpl<unsigned> &Ops,
-                                   int FI) const {
-  unsigned Flags = 0;
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    if (MI->getOperand(Ops[i]).isDef())
-      Flags |= MachineMemOperand::MOStore;
-    else
-      Flags |= MachineMemOperand::MOLoad;
-
-  MachineBasicBlock *MBB = MI->getParent();
-  assert(MBB && "foldMemoryOperand needs an inserted instruction");
-  MachineFunction &MF = *MBB->getParent();
-
-  // Ask the target to do the actual folding.
-  if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
-    // Add a memory operand, foldMemoryOperandImpl doesn't do that.
-    assert((!(Flags & MachineMemOperand::MOStore) ||
-            NewMI->mayStore()) &&
-           "Folded a def to a non-store!");
-    assert((!(Flags & MachineMemOperand::MOLoad) ||
-            NewMI->mayLoad()) &&
-           "Folded a use to a non-load!");
-    const MachineFrameInfo &MFI = *MF.getFrameInfo();
-    assert(MFI.getObjectOffset(FI) != -1);
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              Flags, MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
-    NewMI->addMemOperand(MF, MMO);
-
-    // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
-    return MBB->insert(MI, NewMI);
-  }
-
-  // Straight COPY may fold as load/store.
-  if (!MI->isCopy() || Ops.size() != 1)
-    return 0;
-
-  const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
-  if (!RC)
-    return 0;
-
-  const MachineOperand &MO = MI->getOperand(1-Ops[0]);
-  MachineBasicBlock::iterator Pos = MI;
-  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
-
-  if (Flags == MachineMemOperand::MOStore)
-    storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
-  else
-    loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
-  return --Pos;
-}
-
-/// foldMemoryOperand - Same as the previous version except it allows folding
-/// of any load and store from / to any address, not just from a specific
-/// stack slot.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                   const SmallVectorImpl<unsigned> &Ops,
-                                   MachineInstr* LoadMI) const {
-  assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
-#ifndef NDEBUG
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
-#endif
-  MachineBasicBlock &MBB = *MI->getParent();
-  MachineFunction &MF = *MBB.getParent();
-
-  // Ask the target to do the actual folding.
-  MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
-  if (!NewMI) return 0;
-
-  NewMI = MBB.insert(MI, NewMI);
-
-  // Copy the memoperands from the load to the folded instruction.
-  NewMI->setMemRefs(LoadMI->memoperands_begin(),
-                    LoadMI->memoperands_end());
-
-  return NewMI;
-}
-
-bool TargetInstrInfo::
-isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
-                                         AliasAnalysis *AA) const {
-  const MachineFunction &MF = *MI->getParent()->getParent();
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  const TargetMachine &TM = MF.getTarget();
-  const TargetInstrInfo &TII = *TM.getInstrInfo();
-
-  // Remat clients assume operand 0 is the defined register.
-  if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
-    return false;
-  unsigned DefReg = MI->getOperand(0).getReg();
-
-  // A sub-register definition can only be rematerialized if the instruction
-  // doesn't read the other parts of the register.  Otherwise it is really a
-  // read-modify-write operation on the full virtual register which cannot be
-  // moved safely.
-  if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
-      MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
-    return false;
-
-  // A load from a fixed stack slot can be rematerialized. This may be
-  // redundant with subsequent checks, but it's target-independent,
-  // simple, and a common case.
-  int FrameIdx = 0;
-  if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
-      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
-    return true;
-
-  // Avoid instructions obviously unsafe for remat.
-  if (MI->isNotDuplicable() || MI->mayStore() ||
-      MI->hasUnmodeledSideEffects())
-    return false;
-
-  // Don't remat inline asm. We have no idea how expensive it is
-  // even if it's side effect free.
-  if (MI->isInlineAsm())
-    return false;
-
-  // Avoid instructions which load from potentially varying memory.
-  if (MI->mayLoad() && !MI->isInvariantLoad(AA))
-    return false;
-
-  // If any of the registers accessed are non-constant, conservatively assume
-  // the instruction is not rematerializable.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg()) continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == 0)
-      continue;
-
-    // Check for a well-behaved physical register.
-    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      if (MO.isUse()) {
-        // If the physreg has no defs anywhere, it's just an ambient register
-        // and we can freely move its uses. Alternatively, if it's allocatable,
-        // it could get allocated to something with a def during allocation.
-        if (!MRI.isConstantPhysReg(Reg, MF))
-          return false;
-      } else {
-        // A physreg def. We can't remat it.
-        return false;
-      }
-      continue;
-    }
-
-    // Only allow one virtual-register def.  There may be multiple defs of the
-    // same virtual register, though.
-    if (MO.isDef() && Reg != DefReg)
-      return false;
-
-    // Don't allow any virtual-register uses. Rematting an instruction with
-    // virtual register uses would length the live ranges of the uses, which
-    // is not necessarily a good idea, certainly not "trivial".
-    if (MO.isUse())
-      return false;
-  }
-
-  // Everything checked out.
-  return true;
-}
-
-/// isSchedulingBoundary - Test if the given instruction should be
-/// considered a scheduling boundary. This primarily includes labels
-/// and terminators.
-bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
-                                               const MachineBasicBlock *MBB,
-                                               const MachineFunction &MF) const{
-  // Terminators and labels can't be scheduled around.
-  if (MI->isTerminator() || MI->isLabel())
-    return true;
-
-  // Don't attempt to schedule around any instruction that defines
-  // a stack-oriented pointer, as it's unlikely to be profitable. This
-  // saves compile time, because it doesn't require every single
-  // stack slot reference to depend on the instruction that does the
-  // modification.
-  const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
-  if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
-    return true;
-
-  return false;
-}
-
-// Provide a global flag for disabling the PreRA hazard recognizer that targets
-// may choose to honor.
-bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
-  return !DisableHazardRecognizer;
-}
-
-// Default implementation of CreateTargetRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
-                             const ScheduleDAG *DAG) const {
-  // Dummy hazard recognizer allows all instructions to issue.
-  return new ScheduleHazardRecognizer();
-}
-
-// Default implementation of CreateTargetMIHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
-                               const ScheduleDAG *DAG) const {
-  return (ScheduleHazardRecognizer *)
-    new ScoreboardHazardRecognizer(II, DAG, "misched");
-}
-
-// Default implementation of CreateTargetPostRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
-                                   const ScheduleDAG *DAG) const {
-  return (ScheduleHazardRecognizer *)
-    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
-}
-
-//===----------------------------------------------------------------------===//
-//  SelectionDAG latency interface.
-//===----------------------------------------------------------------------===//
-
-int
-TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
-                                       SDNode *DefNode, unsigned DefIdx,
-                                       SDNode *UseNode, unsigned UseIdx) const {
-  if (!ItinData || ItinData->isEmpty())
-    return -1;
-
-  if (!DefNode->isMachineOpcode())
-    return -1;
-
-  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
-  if (!UseNode->isMachineOpcode())
-    return ItinData->getOperandCycle(DefClass, DefIdx);
-  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
-  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
-int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
-                                         SDNode *N) const {
-  if (!ItinData || ItinData->isEmpty())
-    return 1;
-
-  if (!N->isMachineOpcode())
-    return 1;
-
-  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
-}
-
-//===----------------------------------------------------------------------===//
-//  MachineInstr latency interface.
-//===----------------------------------------------------------------------===//
-
-unsigned
-TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
-                                    const MachineInstr *MI) const {
-  if (!ItinData || ItinData->isEmpty())
-    return 1;
-
-  unsigned Class = MI->getDesc().getSchedClass();
-  int UOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (UOps >= 0)
-    return UOps;
-
-  // The # of u-ops is dynamically determined. The specific target should
-  // override this function to return the right number.
-  return 1;
-}
-
-/// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
-                                            const MachineInstr *DefMI) const {
-  if (DefMI->isTransient())
-    return 0;
-  if (DefMI->mayLoad())
-    return SchedModel->LoadLatency;
-  if (isHighLatencyDef(DefMI->getOpcode()))
-    return SchedModel->HighLatency;
-  return 1;
-}
-
-unsigned TargetInstrInfoImpl::
-getInstrLatency(const InstrItineraryData *ItinData,
-                const MachineInstr *MI,
-                unsigned *PredCost) const {
-  // Default to one cycle for no itinerary. However, an "empty" itinerary may
-  // still have a MinLatency property, which getStageLatency checks.
-  if (!ItinData)
-    return MI->mayLoad() ? 2 : 1;
-
-  return ItinData->getStageLatency(MI->getDesc().getSchedClass());
-}
-
-bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
-                                           const MachineInstr *DefMI,
-                                           unsigned DefIdx) const {
-  if (!ItinData || ItinData->isEmpty())
-    return false;
-
-  unsigned DefClass = DefMI->getDesc().getSchedClass();
-  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
-  return (DefCycle != -1 && DefCycle <= 1);
-}
-
-/// Both DefMI and UseMI must be valid.  By default, call directly to the
-/// itinerary. This may be overriden by the target.
-int TargetInstrInfoImpl::
-getOperandLatency(const InstrItineraryData *ItinData,
-                  const MachineInstr *DefMI, unsigned DefIdx,
-                  const MachineInstr *UseMI, unsigned UseIdx) const {
-  unsigned DefClass = DefMI->getDesc().getSchedClass();
-  unsigned UseClass = UseMI->getDesc().getSchedClass();
-  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
-/// If we can determine the operand latency from the def only, without itinerary
-/// lookup, do so. Otherwise return -1.
-int TargetInstrInfo::computeDefOperandLatency(
-  const InstrItineraryData *ItinData,
-  const MachineInstr *DefMI, bool FindMin) const {
-
-  // Let the target hook getInstrLatency handle missing itineraries.
-  if (!ItinData)
-    return getInstrLatency(ItinData, DefMI);
-
-  // Return a latency based on the itinerary properties and defining instruction
-  // if possible. Some common subtargets don't require per-operand latency,
-  // especially for minimum latencies.
-  if (FindMin) {
-    // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
-    // it exists before defaulting to MinLatency.
-    if (ItinData->SchedModel->MinLatency >= 0)
-      return getInstrLatency(ItinData, DefMI);
-
-    // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
-    // For empty itineraries, short-cirtuit the check and default to one cycle.
-    if (ItinData->isEmpty())
-      return 1;
-  }
-  else if(ItinData->isEmpty())
-    return defaultDefLatency(ItinData->SchedModel, DefMI);
-
-  // ...operand lookup required
-  return -1;
-}
-
-/// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use when the operand indices are already known. UseMI may
-/// be NULL for an unknown use.
-///
-/// FindMin may be set to get the minimum vs. expected latency. Minimum
-/// latency is used for scheduling groups, while expected latency is for
-/// instruction cost and critical path.
-///
-/// Depending on the subtarget's itinerary properties, this may or may not need
-/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
-/// UseIdx to compute min latency.
-unsigned TargetInstrInfo::
-computeOperandLatency(const InstrItineraryData *ItinData,
-                      const MachineInstr *DefMI, unsigned DefIdx,
-                      const MachineInstr *UseMI, unsigned UseIdx,
-                      bool FindMin) const {
-
-  int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
-  if (DefLatency >= 0)
-    return DefLatency;
-
-  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-
-  int OperLatency = 0;
-  if (UseMI)
-    OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
-  else {
-    unsigned DefClass = DefMI->getDesc().getSchedClass();
-    OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
-  }
-  if (OperLatency >= 0)
-    return OperLatency;
-
-  // No operand latency was found.
-  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
-
-  // Expected latency is the max of the stage latency and itinerary props.
-  if (!FindMin)
-    InstrLatency = std::max(InstrLatency,
-                            defaultDefLatency(ItinData->SchedModel, DefMI));
-  return InstrLatency;
-}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
new file mode 100644
index 000000000000..f42bdbd27643
--- /dev/null
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -0,0 +1,1305 @@
+//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLoweringBase class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+  Names[RTLIB::SHL_I16] = "__ashlhi3";
+  Names[RTLIB::SHL_I32] = "__ashlsi3";
+  Names[RTLIB::SHL_I64] = "__ashldi3";
+  Names[RTLIB::SHL_I128] = "__ashlti3";
+  Names[RTLIB::SRL_I16] = "__lshrhi3";
+  Names[RTLIB::SRL_I32] = "__lshrsi3";
+  Names[RTLIB::SRL_I64] = "__lshrdi3";
+  Names[RTLIB::SRL_I128] = "__lshrti3";
+  Names[RTLIB::SRA_I16] = "__ashrhi3";
+  Names[RTLIB::SRA_I32] = "__ashrsi3";
+  Names[RTLIB::SRA_I64] = "__ashrdi3";
+  Names[RTLIB::SRA_I128] = "__ashrti3";
+  Names[RTLIB::MUL_I8] = "__mulqi3";
+  Names[RTLIB::MUL_I16] = "__mulhi3";
+  Names[RTLIB::MUL_I32] = "__mulsi3";
+  Names[RTLIB::MUL_I64] = "__muldi3";
+  Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
+  Names[RTLIB::SDIV_I8] = "__divqi3";
+  Names[RTLIB::SDIV_I16] = "__divhi3";
+  Names[RTLIB::SDIV_I32] = "__divsi3";
+  Names[RTLIB::SDIV_I64] = "__divdi3";
+  Names[RTLIB::SDIV_I128] = "__divti3";
+  Names[RTLIB::UDIV_I8] = "__udivqi3";
+  Names[RTLIB::UDIV_I16] = "__udivhi3";
+  Names[RTLIB::UDIV_I32] = "__udivsi3";
+  Names[RTLIB::UDIV_I64] = "__udivdi3";
+  Names[RTLIB::UDIV_I128] = "__udivti3";
+  Names[RTLIB::SREM_I8] = "__modqi3";
+  Names[RTLIB::SREM_I16] = "__modhi3";
+  Names[RTLIB::SREM_I32] = "__modsi3";
+  Names[RTLIB::SREM_I64] = "__moddi3";
+  Names[RTLIB::SREM_I128] = "__modti3";
+  Names[RTLIB::UREM_I8] = "__umodqi3";
+  Names[RTLIB::UREM_I16] = "__umodhi3";
+  Names[RTLIB::UREM_I32] = "__umodsi3";
+  Names[RTLIB::UREM_I64] = "__umoddi3";
+  Names[RTLIB::UREM_I128] = "__umodti3";
+
+  // These are generally not available.
+  Names[RTLIB::SDIVREM_I8] = 0;
+  Names[RTLIB::SDIVREM_I16] = 0;
+  Names[RTLIB::SDIVREM_I32] = 0;
+  Names[RTLIB::SDIVREM_I64] = 0;
+  Names[RTLIB::SDIVREM_I128] = 0;
+  Names[RTLIB::UDIVREM_I8] = 0;
+  Names[RTLIB::UDIVREM_I16] = 0;
+  Names[RTLIB::UDIVREM_I32] = 0;
+  Names[RTLIB::UDIVREM_I64] = 0;
+  Names[RTLIB::UDIVREM_I128] = 0;
+
+  Names[RTLIB::NEG_I32] = "__negsi2";
+  Names[RTLIB::NEG_I64] = "__negdi2";
+  Names[RTLIB::ADD_F32] = "__addsf3";
+  Names[RTLIB::ADD_F64] = "__adddf3";
+  Names[RTLIB::ADD_F80] = "__addxf3";
+  Names[RTLIB::ADD_F128] = "__addtf3";
+  Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+  Names[RTLIB::SUB_F32] = "__subsf3";
+  Names[RTLIB::SUB_F64] = "__subdf3";
+  Names[RTLIB::SUB_F80] = "__subxf3";
+  Names[RTLIB::SUB_F128] = "__subtf3";
+  Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+  Names[RTLIB::MUL_F32] = "__mulsf3";
+  Names[RTLIB::MUL_F64] = "__muldf3";
+  Names[RTLIB::MUL_F80] = "__mulxf3";
+  Names[RTLIB::MUL_F128] = "__multf3";
+  Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+  Names[RTLIB::DIV_F32] = "__divsf3";
+  Names[RTLIB::DIV_F64] = "__divdf3";
+  Names[RTLIB::DIV_F80] = "__divxf3";
+  Names[RTLIB::DIV_F128] = "__divtf3";
+  Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+  Names[RTLIB::REM_F32] = "fmodf";
+  Names[RTLIB::REM_F64] = "fmod";
+  Names[RTLIB::REM_F80] = "fmodl";
+  Names[RTLIB::REM_F128] = "fmodl";
+  Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::FMA_F32] = "fmaf";
+  Names[RTLIB::FMA_F64] = "fma";
+  Names[RTLIB::FMA_F80] = "fmal";
+  Names[RTLIB::FMA_F128] = "fmal";
+  Names[RTLIB::FMA_PPCF128] = "fmal";
+  Names[RTLIB::POWI_F32] = "__powisf2";
+  Names[RTLIB::POWI_F64] = "__powidf2";
+  Names[RTLIB::POWI_F80] = "__powixf2";
+  Names[RTLIB::POWI_F128] = "__powitf2";
+  Names[RTLIB::POWI_PPCF128] = "__powitf2";
+  Names[RTLIB::SQRT_F32] = "sqrtf";
+  Names[RTLIB::SQRT_F64] = "sqrt";
+  Names[RTLIB::SQRT_F80] = "sqrtl";
+  Names[RTLIB::SQRT_F128] = "sqrtl";
+  Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+  Names[RTLIB::LOG_F32] = "logf";
+  Names[RTLIB::LOG_F64] = "log";
+  Names[RTLIB::LOG_F80] = "logl";
+  Names[RTLIB::LOG_F128] = "logl";
+  Names[RTLIB::LOG_PPCF128] = "logl";
+  Names[RTLIB::LOG2_F32] = "log2f";
+  Names[RTLIB::LOG2_F64] = "log2";
+  Names[RTLIB::LOG2_F80] = "log2l";
+  Names[RTLIB::LOG2_F128] = "log2l";
+  Names[RTLIB::LOG2_PPCF128] = "log2l";
+  Names[RTLIB::LOG10_F32] = "log10f";
+  Names[RTLIB::LOG10_F64] = "log10";
+  Names[RTLIB::LOG10_F80] = "log10l";
+  Names[RTLIB::LOG10_F128] = "log10l";
+  Names[RTLIB::LOG10_PPCF128] = "log10l";
+  Names[RTLIB::EXP_F32] = "expf";
+  Names[RTLIB::EXP_F64] = "exp";
+  Names[RTLIB::EXP_F80] = "expl";
+  Names[RTLIB::EXP_F128] = "expl";
+  Names[RTLIB::EXP_PPCF128] = "expl";
+  Names[RTLIB::EXP2_F32] = "exp2f";
+  Names[RTLIB::EXP2_F64] = "exp2";
+  Names[RTLIB::EXP2_F80] = "exp2l";
+  Names[RTLIB::EXP2_F128] = "exp2l";
+  Names[RTLIB::EXP2_PPCF128] = "exp2l";
+  Names[RTLIB::SIN_F32] = "sinf";
+  Names[RTLIB::SIN_F64] = "sin";
+  Names[RTLIB::SIN_F80] = "sinl";
+  Names[RTLIB::SIN_F128] = "sinl";
+  Names[RTLIB::SIN_PPCF128] = "sinl";
+  Names[RTLIB::COS_F32] = "cosf";
+  Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::COS_F80] = "cosl";
+  Names[RTLIB::COS_F128] = "cosl";
+  Names[RTLIB::COS_PPCF128] = "cosl";
+  Names[RTLIB::POW_F32] = "powf";
+  Names[RTLIB::POW_F64] = "pow";
+  Names[RTLIB::POW_F80] = "powl";
+  Names[RTLIB::POW_F128] = "powl";
+  Names[RTLIB::POW_PPCF128] = "powl";
+  Names[RTLIB::CEIL_F32] = "ceilf";
+  Names[RTLIB::CEIL_F64] = "ceil";
+  Names[RTLIB::CEIL_F80] = "ceill";
+  Names[RTLIB::CEIL_F128] = "ceill";
+  Names[RTLIB::CEIL_PPCF128] = "ceill";
+  Names[RTLIB::TRUNC_F32] = "truncf";
+  Names[RTLIB::TRUNC_F64] = "trunc";
+  Names[RTLIB::TRUNC_F80] = "truncl";
+  Names[RTLIB::TRUNC_F128] = "truncl";
+  Names[RTLIB::TRUNC_PPCF128] = "truncl";
+  Names[RTLIB::RINT_F32] = "rintf";
+  Names[RTLIB::RINT_F64] = "rint";
+  Names[RTLIB::RINT_F80] = "rintl";
+  Names[RTLIB::RINT_F128] = "rintl";
+  Names[RTLIB::RINT_PPCF128] = "rintl";
+  Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+  Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+  Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
+  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+  Names[RTLIB::FLOOR_F32] = "floorf";
+  Names[RTLIB::FLOOR_F64] = "floor";
+  Names[RTLIB::FLOOR_F80] = "floorl";
+  Names[RTLIB::FLOOR_F128] = "floorl";
+  Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::COPYSIGN_F32] = "copysignf";
+  Names[RTLIB::COPYSIGN_F64] = "copysign";
+  Names[RTLIB::COPYSIGN_F80] = "copysignl";
+  Names[RTLIB::COPYSIGN_F128] = "copysignl";
+  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+  Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
+  Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
+  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+  Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+  Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+  Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
+  Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+  Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+  Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+  Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+  Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+  Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+  Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+  Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+  Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+  Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+  Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
+  Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
+  Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+  Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+  Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+  Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+  Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+  Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+  Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+  Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+  Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+  Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+  Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+  Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+  Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
+  Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+  Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+  Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+  Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+  Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
+  Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+  Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+  Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
+  Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+  Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+  Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+  Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+  Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
+  Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+  Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+  Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
+  Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+  Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+  Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
+  Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+  Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+  Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+  Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+  Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
+  Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+  Names[RTLIB::OEQ_F32] = "__eqsf2";
+  Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::OEQ_F128] = "__eqtf2";
+  Names[RTLIB::UNE_F32] = "__nesf2";
+  Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::UNE_F128] = "__netf2";
+  Names[RTLIB::OGE_F32] = "__gesf2";
+  Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OGE_F128] = "__getf2";
+  Names[RTLIB::OLT_F32] = "__ltsf2";
+  Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLT_F128] = "__lttf2";
+  Names[RTLIB::OLE_F32] = "__lesf2";
+  Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OLE_F128] = "__letf2";
+  Names[RTLIB::OGT_F32] = "__gtsf2";
+  Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::OGT_F128] = "__gttf2";
+  Names[RTLIB::UO_F32] = "__unordsf2";
+  Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::UO_F128] = "__unordtf2";
+  Names[RTLIB::O_F32] = "__unordsf2";
+  Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::O_F128] = "__unordtf2";
+  Names[RTLIB::MEMCPY] = "memcpy";
+  Names[RTLIB::MEMMOVE] = "memmove";
+  Names[RTLIB::MEMSET] = "memset";
+  Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+  Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+  Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+  Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+  Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+  Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+  Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+  Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+  Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+  
+  if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+    Names[RTLIB::SINCOS_F32] = "sincosf";
+    Names[RTLIB::SINCOS_F64] = "sincos";
+    Names[RTLIB::SINCOS_F80] = "sincosl";
+    Names[RTLIB::SINCOS_F128] = "sincosl";
+    Names[RTLIB::SINCOS_PPCF128] = "sincosl";
+  } else {
+    // These are generally not available.
+    Names[RTLIB::SINCOS_F32] = 0;
+    Names[RTLIB::SINCOS_F64] = 0;
+    Names[RTLIB::SINCOS_F80] = 0;
+    Names[RTLIB::SINCOS_F128] = 0;
+    Names[RTLIB::SINCOS_PPCF128] = 0;
+  }
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+    CCs[i] = CallingConv::C;
+  }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::f64)
+      return FPEXT_F32_F64;
+    if (RetVT == MVT::f128)
+      return FPEXT_F32_F128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::f128)
+      return FPEXT_F64_F128;
+  }
+
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+  if (RetVT == MVT::f32) {
+    if (OpVT == MVT::f64)
+      return FPROUND_F64_F32;
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F32;
+    if (OpVT == MVT::f128)
+      return FPROUND_F128_F32;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F32;
+  } else if (RetVT == MVT::f64) {
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F64;
+    if (OpVT == MVT::f128)
+      return FPROUND_F128_F64;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F64;
+  }
+
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F64_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F80_I128;
+  } else if (OpVT == MVT::f128) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F128_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F64_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F80_I128;
+  } else if (OpVT == MVT::f128) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F128_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I32_F32;
+    if (RetVT == MVT::f64)
+      return SINTTOFP_I32_F64;
+    if (RetVT == MVT::f80)
+      return SINTTOFP_I32_F80;
+    if (RetVT == MVT::f128)
+      return SINTTOFP_I32_F128;
+    if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I64_F32;
+    if (RetVT == MVT::f64)
+      return SINTTOFP_I64_F64;
+    if (RetVT == MVT::f80)
+      return SINTTOFP_I64_F80;
+    if (RetVT == MVT::f128)
+      return SINTTOFP_I64_F128;
+    if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I128_F32;
+    if (RetVT == MVT::f64)
+      return SINTTOFP_I128_F64;
+    if (RetVT == MVT::f80)
+      return SINTTOFP_I128_F80;
+    if (RetVT == MVT::f128)
+      return SINTTOFP_I128_F128;
+    if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I32_F32;
+    if (RetVT == MVT::f64)
+      return UINTTOFP_I32_F64;
+    if (RetVT == MVT::f80)
+      return UINTTOFP_I32_F80;
+    if (RetVT == MVT::f128)
+      return UINTTOFP_I32_F128;
+    if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I64_F32;
+    if (RetVT == MVT::f64)
+      return UINTTOFP_I64_F64;
+    if (RetVT == MVT::f80)
+      return UINTTOFP_I64_F80;
+    if (RetVT == MVT::f128)
+      return UINTTOFP_I64_F128;
+    if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I128_F32;
+    if (RetVT == MVT::f64)
+      return UINTTOFP_I128_F64;
+    if (RetVT == MVT::f80)
+      return UINTTOFP_I128_F80;
+    if (RetVT == MVT::f128)
+      return UINTTOFP_I128_F128;
+    if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+  CCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::UNE_F128] = ISD::SETNE;
+  CCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OGE_F128] = ISD::SETGE;
+  CCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLT_F128] = ISD::SETLT;
+  CCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OLE_F128] = ISD::SETLE;
+  CCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::OGT_F128] = ISD::SETGT;
+  CCs[RTLIB::UO_F32] = ISD::SETNE;
+  CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::UO_F128] = ISD::SETNE;
+  CCs[RTLIB::O_F32] = ISD::SETEQ;
+  CCs[RTLIB::O_F64] = ISD::SETEQ;
+  CCs[RTLIB::O_F128] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
+                                       const TargetLoweringObjectFile *tlof)
+  : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+  // All operations default to being supported.
+  memset(OpActions, 0, sizeof(OpActions));
+  memset(LoadExtActions, 0, sizeof(LoadExtActions));
+  memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+  memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+  memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+  // Set default actions for various operations.
+  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+    // Default all indexed load / store to expand.
+    for (unsigned IM = (unsigned)ISD::PRE_INC;
+         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+      setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+      setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+    }
+
+    // These operations default to expand.
+    setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  // Most targets ignore the @llvm.prefetch intrinsic.
+  setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+  // ConstantFP nodes default to expand.  Targets can either change this to
+  // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+  // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+
+  // These library functions default to expand.
+  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f128, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f128, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f128, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f128, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f128, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f128, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f128, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+
+  // Default ISD::TRAP to expand (which turns it into abort).
+  setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+  // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+  // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+  //
+  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+  IsLittleEndian = TD->isLittleEndian();
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+    = MaxStoresPerMemmoveOptSize = 4;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
+  PredictableSelectIsExpensive = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  BooleanVectorContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = Sched::ILP;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  MinFunctionAlignment = 0;
+  PrefFunctionAlignment = 0;
+  PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
+  ShouldFoldAtomicFences = false;
+  InsertFencesForAtomic = false;
+  SupportJumpTables = true;
+  MinimumJumpTableEntries = 4;
+
+  InitLibcallNames(LibcallRoutineNames, TM);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+  delete &TLOF;
+}
+
+MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
+  return MVT::getIntegerVT(8*TD->getPointerSize(0));
+}
+
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+  if (LHSTy.isVector())
+    return LHSTy;
+  return getScalarShiftAmountTy(LHSTy);
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+                                          unsigned &NumIntermediates,
+                                          MVT &RegisterVT,
+                                          TargetLoweringBase *TLI) {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltTy = VT.getVectorElementType();
+
+  unsigned NumVectorRegs = 1;
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+
+  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  if (!TLI->isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
+  MVT DestVT = TLI->getRegisterType(NewVT);
+  RegisterVT = DestVT;
+  if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+  // Otherwise, promotion or legal types use the same number of registers as
+  // the vector decimated to the appropriate level.
+  return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+       I != E; ++I) {
+    if (isTypeLegal(*I))
+      return true;
+  }
+  return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLoweringBase::findRepresentativeClass(MVT VT) const {
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+  if (!RC)
+    return std::make_pair(RC, 0);
+
+  // Compute the set of all super-register classes.
+  BitVector SuperRegRC(TRI->getNumRegClasses());
+  for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+    SuperRegRC.setBitsInMask(RCI.getMask());
+
+  // Find the first legal register class with the largest spill size.
+  const TargetRegisterClass *BestRC = RC;
+  for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+    const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+    // We want the largest possible spill size.
+    if (SuperRC->getSize() <= BestRC->getSize())
+      continue;
+    if (!isLegalRC(SuperRC))
+      continue;
+    BestRC = SuperRC;
+  }
+  return std::make_pair(BestRC, 1);
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLoweringBase::computeRegisterProperties() {
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+         "Too many value types for ValueTypeActions to hold!");
+
+  // Everything defaults to needing one register.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    NumRegistersForVT[i] = 1;
+    RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+  }
+  // ...except isVoid, which doesn't need any registers.
+  NumRegistersForVT[MVT::isVoid] = 0;
+
+  // Find the largest integer register class.
+  unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+  // Every integer value type larger than this largest register takes twice as
+  // many registers to represent as the previous ValueType.
+  for (unsigned ExpandedReg = LargestIntReg + 1;
+       ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
+    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+    RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+    TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+    ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+                                   TypeExpandInteger);
+  }
+
+  // Inspect all of the ValueType's smaller than the largest integer
+  // register to see which ones need promotion.
+  unsigned LegalIntReg = LargestIntReg;
+  for (unsigned IntReg = LargestIntReg - 1;
+       IntReg >= (unsigned)MVT::i1; --IntReg) {
+    MVT IVT = (MVT::SimpleValueType)IntReg;
+    if (isTypeLegal(IVT)) {
+      LegalIntReg = IntReg;
+    } else {
+      RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+        (const MVT::SimpleValueType)LegalIntReg;
+      ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+    }
+  }
+
+  // ppcf128 type is really two f64's.
+  if (!isTypeLegal(MVT::ppcf128)) {
+    NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+    RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+    TransformToType[MVT::ppcf128] = MVT::f64;
+    ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+  }
+
+  // Decide how to handle f128. If the target does not have native f128 support,
+  // expand it to i128 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f128)) {
+    NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+    RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+    TransformToType[MVT::f128] = MVT::i128;
+    ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+  }
+
+  // Decide how to handle f64. If the target does not have native f64 support,
+  // expand it to i64 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f64)) {
+    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+    TransformToType[MVT::f64] = MVT::i64;
+    ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+  }
+
+  // Decide how to handle f32. If the target does not have native support for
+  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  if (!isTypeLegal(MVT::f32)) {
+    if (isTypeLegal(MVT::f64)) {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+      TransformToType[MVT::f32] = MVT::f64;
+      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
+    } else {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+      TransformToType[MVT::f32] = MVT::i32;
+      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+    }
+  }
+
+  // Loop over all of the vector value types to see which need transformations.
+  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT VT = (MVT::SimpleValueType)i;
+    if (isTypeLegal(VT)) continue;
+
+    // Determine if there is a legal wider type.  If so, we should promote to
+    // that wider vector type.
+    MVT EltVT = VT.getVectorElementType();
+    unsigned NElts = VT.getVectorNumElements();
+    if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
+      bool IsLegalWiderType = false;
+      // First try to promote the elements of integer vectors. If no legal
+      // promotion was found, fallback to the widen-vector method.
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType)nVT;
+        // Promote vectors of integers to vectors with the same number
+        // of elements, with a wider element type.
+        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+            && SVT.getVectorNumElements() == NElts &&
+            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+
+      if (IsLegalWiderType) continue;
+
+      // Try to widen the vector.
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        MVT SVT = (MVT::SimpleValueType)nVT;
+        if (SVT.getVectorElementType() == EltVT &&
+            SVT.getVectorNumElements() > NElts &&
+            isTypeLegal(SVT)) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+      if (IsLegalWiderType) continue;
+    }
+
+    MVT IntermediateVT;
+    MVT RegisterVT;
+    unsigned NumIntermediates;
+    NumRegistersForVT[i] =
+      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+                                RegisterVT, this);
+    RegisterTypeForVT[i] = RegisterVT;
+
+    MVT NVT = VT.getPow2VectorType();
+    if (NVT == VT) {
+      // Type is already a power of 2.  The default action is to split.
+      TransformToType[i] = MVT::Other;
+      unsigned NumElts = VT.getVectorNumElements();
+      ValueTypeActions.setTypeAction(VT,
+            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+    } else {
+      TransformToType[i] = NVT;
+      ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+    }
+  }
+
+  // Determine the 'representative' register class for each value type.
+  // An representative register class is the largest (meaning one which is
+  // not a sub-register class / subreg register class) legal register class for
+  // a group of value types. For example, on i386, i8, i16, and i32
+  // representative would be GR32; while on x86_64 it's GR64.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    const TargetRegisterClass* RRC;
+    uint8_t Cost;
+    tie(RRC, Cost) =  findRepresentativeClass((MVT::SimpleValueType)i);
+    RepRegClassForVT[i] = RRC;
+    RepRegClassCostForVT[i] = Cost;
+  }
+}
+
+EVT TargetLoweringBase::getSetCCResultType(EVT VT) const {
+  assert(!VT.isVector() && "No default SetCC type for vectors!");
+  return getPointerTy(0).SimpleTy;
+}
+
+MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
+  return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register.  It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                                EVT &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                                MVT &RegisterVT) const {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // If there is a wider vector type with the same element type as this one,
+  // or a promoted vector type that has the same number of elements which
+  // are wider, then we should convert to that legal vector type.
+  // This handles things like <2 x float> -> <4 x float> and
+  // <4 x i1> -> <4 x i32>.
+  LegalizeTypeAction TA = getTypeAction(Context, VT);
+  if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+    EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+    if (isTypeLegal(RegisterEVT)) {
+      IntermediateVT = RegisterEVT;
+      RegisterVT = RegisterEVT.getSimpleVT();
+      NumIntermediates = 1;
+      return 1;
+    }
+  }
+
+  // Figure out the right, legal destination reg to copy into.
+  EVT EltTy = VT.getVectorElementType();
+
+  unsigned NumVectorRegs = 1;
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !isTypeLegal(
+                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+  if (!isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  MVT DestVT = getRegisterType(Context, NewVT);
+  RegisterVT = DestVT;
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
+  if (EVT(DestVT).bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+  // Otherwise, promotion or legal types use the same number of registers as
+  // the vector decimated to the appropriate level.
+  return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function.  This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
+                         SmallVectorImpl<ISD::OutputArg> &Outs,
+                         const TargetLowering &TLI) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, ReturnType, ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+
+  for (unsigned j = 0, f = NumValues; j != f; ++j) {
+    EVT VT = ValueVTs[j];
+    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+    if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+      ExtendKind = ISD::SIGN_EXTEND;
+    else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+      ExtendKind = ISD::ZERO_EXTEND;
+
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling
+    // conventions. The frontend should mark functions whose return values
+    // require promoting with signext or zeroext attributes.
+    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+      MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      if (VT.bitsLT(MinVT))
+        VT = MinVT;
+    }
+
+    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+    MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+
+    // 'inreg' on function refers to return value
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+    if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+      Flags.setInReg();
+
+    // Propagate extension type if any
+    if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+      Flags.setSExt();
+    else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+      Flags.setZExt();
+
+    for (unsigned i = 0; i < NumParts; ++i)
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
+  }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.  This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
+  return TD->getCallFrameTypeAlignment(Ty);
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
+  enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+  };
+  switch (static_cast<InstructionOpcodes>(Opcode)) {
+  case Ret:            return 0;
+  case Br:             return 0;
+  case Switch:         return 0;
+  case IndirectBr:     return 0;
+  case Invoke:         return 0;
+  case Resume:         return 0;
+  case Unreachable:    return 0;
+  case Add:            return ISD::ADD;
+  case FAdd:           return ISD::FADD;
+  case Sub:            return ISD::SUB;
+  case FSub:           return ISD::FSUB;
+  case Mul:            return ISD::MUL;
+  case FMul:           return ISD::FMUL;
+  case UDiv:           return ISD::UDIV;
+  case SDiv:           return ISD::UDIV;
+  case FDiv:           return ISD::FDIV;
+  case URem:           return ISD::UREM;
+  case SRem:           return ISD::SREM;
+  case FRem:           return ISD::FREM;
+  case Shl:            return ISD::SHL;
+  case LShr:           return ISD::SRL;
+  case AShr:           return ISD::SRA;
+  case And:            return ISD::AND;
+  case Or:             return ISD::OR;
+  case Xor:            return ISD::XOR;
+  case Alloca:         return 0;
+  case Load:           return ISD::LOAD;
+  case Store:          return ISD::STORE;
+  case GetElementPtr:  return 0;
+  case Fence:          return 0;
+  case AtomicCmpXchg:  return 0;
+  case AtomicRMW:      return 0;
+  case Trunc:          return ISD::TRUNCATE;
+  case ZExt:           return ISD::ZERO_EXTEND;
+  case SExt:           return ISD::SIGN_EXTEND;
+  case FPToUI:         return ISD::FP_TO_UINT;
+  case FPToSI:         return ISD::FP_TO_SINT;
+  case UIToFP:         return ISD::UINT_TO_FP;
+  case SIToFP:         return ISD::SINT_TO_FP;
+  case FPTrunc:        return ISD::FP_ROUND;
+  case FPExt:          return ISD::FP_EXTEND;
+  case PtrToInt:       return ISD::BITCAST;
+  case IntToPtr:       return ISD::BITCAST;
+  case BitCast:        return ISD::BITCAST;
+  case ICmp:           return ISD::SETCC;
+  case FCmp:           return ISD::SETCC;
+  case PHI:            return 0;
+  case Call:           return 0;
+  case Select:         return ISD::SELECT;
+  case UserOp1:        return 0;
+  case UserOp2:        return 0;
+  case VAArg:          return 0;
+  case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+  case InsertElement:  return ISD::INSERT_VECTOR_ELT;
+  case ShuffleVector:  return ISD::VECTOR_SHUFFLE;
+  case ExtractValue:   return ISD::MERGE_VALUES;
+  case InsertValue:    return ISD::MERGE_VALUES;
+  case LandingPad:     return 0;
+  }
+
+  llvm_unreachable("Unknown instruction type encountered!");
+}
+
+std::pair<unsigned, MVT>
+TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
+  LLVMContext &C = Ty->getContext();
+  EVT MTy = getValueType(Ty);
+
+  unsigned Cost = 1;
+  // We keep legalizing the type until we find a legal kind. We assume that
+  // the only operation that costs anything is the split. After splitting
+  // we need to handle two types.
+  while (true) {
+    LegalizeKind LK = getTypeConversion(C, MTy);
+
+    if (LK.first == TypeLegal)
+      return std::make_pair(Cost, MTy.getSimpleVT());
+
+    if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
+      Cost *= 2;
+
+    // Keep legalizing the type.
+    MTy = LK.second;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
+                                           Type *Ty) const {
+  // The default implementation of this implements a conservative RISCy, r+r and
+  // r+i addr mode.
+
+  // Allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // Only support r+r,
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 8f5d770f6651..3bdca4c64078 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -13,30 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 using namespace dwarf;
 
@@ -88,6 +87,36 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
   Streamer.EmitSymbolValue(Sym, Size);
 }
 
+const MCExpr *TargetLoweringObjectFileELF::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += ".DW.stub";
+
+    // Add information about the stub reference to ELFMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+    }
+
+    return TargetLoweringObjectFile::
+      getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+                        Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+  }
+
+  return TargetLoweringObjectFile::
+    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
   // N.B.: The defaults used in here are no the same ones used in MC.
@@ -314,35 +343,6 @@ getSectionForConstant(SectionKind Kind) const {
   return DataRelROSection;
 }
 
-const MCExpr *TargetLoweringObjectFileELF::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                               MachineModuleInfo *MMI,
-                               unsigned Encoding, MCStreamer &Streamer) const {
-
-  if (Encoding & dwarf::DW_EH_PE_indirect) {
-    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    SmallString<128> Name;
-    Mang->getNameWithPrefix(Name, GV, true);
-    Name += ".DW.stub";
-
-    // Add information about the stub reference to ELFMMI so that the stub
-    // gets emitted by the asmprinter.
-    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
-    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
-    if (StubSym.getPointer() == 0) {
-      MCSymbol *Sym = Mang->getSymbol(GV);
-      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
-    }
-
-    return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
-  }
-
-  return TargetLoweringObjectFile::
-    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
-}
-
 const MCSection *
 TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
   // The default scheme is .ctor / .dtor, so we have to invert the priority
@@ -405,14 +405,14 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
-/// emitModuleFlags - Emit the module flags that specify the garbage collection
-/// information.
+/// emitModuleFlags - Perform code emission for module flags.
 void TargetLoweringObjectFileMachO::
 emitModuleFlags(MCStreamer &Streamer,
                 ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
                 Mangler *Mang, const TargetMachine &TM) const {
   unsigned VersionVal = 0;
   unsigned ImageInfoFlags = 0;
+  MDNode *LinkerOptions = 0;
   StringRef SectionVal;
 
   for (ArrayRef<Module::ModuleFlagEntry>::iterator
@@ -426,14 +426,33 @@ emitModuleFlags(MCStreamer &Streamer,
     StringRef Key = MFE.Key->getString();
     Value *Val = MFE.Val;
 
-    if (Key == "Objective-C Image Info Version")
+    if (Key == "Objective-C Image Info Version") {
       VersionVal = cast<ConstantInt>(Val)->getZExtValue();
-    else if (Key == "Objective-C Garbage Collection" ||
-             Key == "Objective-C GC Only" ||
-             Key == "Objective-C Is Simulated")
+    } else if (Key == "Objective-C Garbage Collection" ||
+               Key == "Objective-C GC Only" ||
+               Key == "Objective-C Is Simulated") {
       ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
-    else if (Key == "Objective-C Image Info Section")
+    } else if (Key == "Objective-C Image Info Section") {
       SectionVal = cast<MDString>(Val)->getString();
+    } else if (Key == "Linker Options") {
+      LinkerOptions = cast<MDNode>(Val);
+    }
+  }
+
+  // Emit the linker options if present.
+  if (LinkerOptions) {
+    for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+      MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+      SmallVector<std::string, 4> StrOptions;
+
+      // Convert to strings.
+      for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+        MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+        StrOptions.push_back(MDOption->getString());
+      }
+
+      Streamer.EmitLinkerOptions(StrOptions);
+    }
   }
 
   // The section is mandatory. If we don't have it, then we don't have GC info.
@@ -604,9 +623,9 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
 }
 
 const MCExpr *TargetLoweringObjectFileMachO::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                               MachineModuleInfo *MMI, unsigned Encoding,
-                               MCStreamer &Streamer) const {
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
@@ -629,11 +648,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     }
 
     return TargetLoweringObjectFile::
-      getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+      getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+                        Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
-    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 MCSymbol *TargetLoweringObjectFileMachO::
@@ -701,8 +721,19 @@ getCOFFSectionFlags(SectionKind K) {
 const MCSection *TargetLoweringObjectFileCOFF::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
-  return getContext().getCOFFSection(GV->getSection(),
-                                     getCOFFSectionFlags(Kind),
+  int Selection = 0;
+  unsigned Characteristics = getCOFFSectionFlags(Kind);
+  SmallString<128> Name(GV->getSection().c_str());
+  if (GV->isWeakForLinker()) {
+    Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
+    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append("$");
+    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+  }
+  return getContext().getCOFFSection(Name,
+                                     Characteristics,
+                                     Selection,
                                      Kind);
 }
 
@@ -711,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
     return ".text$";
   if (Kind.isBSS ())
     return ".bss$";
-  if (Kind.isThreadLocal())
-    return ".tls$";
+  if (Kind.isThreadLocal()) {
+    // 'LLVM' is just an arbitary string to ensure that the section name gets
+    // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
+    return ".tls$LLVM";
+  }
   if (Kind.isWriteable())
     return ".data$";
   return ".rdata$";
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..84b4bfc33221
--- /dev/null
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -0,0 +1,285 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+                             regclass_iterator RCB, regclass_iterator RCE,
+                             const char *const *SRINames,
+                             const unsigned *SRILaneMasks)
+  : InfoDesc(ID), SubRegIndexNames(SRINames),
+    SubRegIndexLaneMasks(SRILaneMasks),
+    RegClassBegin(RCB), RegClassEnd(RCE) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+  if (!Reg)
+    OS << "%noreg";
+  else if (TargetRegisterInfo::isStackSlot(Reg))
+    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+  else if (TargetRegisterInfo::isVirtualRegister(Reg))
+    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+  else if (TRI && Reg < TRI->getNumRegs())
+    OS << '%' << TRI->getName(Reg);
+  else
+    OS << "%physreg" << Reg;
+  if (SubIdx) {
+    if (TRI)
+      OS << ':' << TRI->getSubRegIndexName(SubIdx);
+    else
+      OS << ":sub(" << SubIdx << ')';
+  }
+}
+
+void PrintRegUnit::print(raw_ostream &OS) const {
+  // Generic printout when TRI is missing.
+  if (!TRI) {
+    OS << "Unit~" << Unit;
+    return;
+  }
+
+  // Check for invalid register units.
+  if (Unit >= TRI->getNumRegUnits()) {
+    OS << "BadUnit~" << Unit;
+    return;
+  }
+
+  // Normal units have at least one root.
+  MCRegUnitRootIterator Roots(Unit, TRI);
+  assert(Roots.isValid() && "Unit has no roots.");
+  OS << TRI->getName(*Roots);
+  for (++Roots; Roots.isValid(); ++Roots)
+    OS << '~' << TRI->getName(*Roots);
+}
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+  if (!RC || RC->isAllocatable())
+    return RC;
+
+  const unsigned *SubClass = RC->getSubClassMask();
+  for (unsigned Base = 0, BaseE = getNumRegClasses();
+       Base < BaseE; Base += 32) {
+    unsigned Idx = Base;
+    for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
+      unsigned Offset = CountTrailingZeros_32(Mask);
+      const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
+      if (SubRC->isAllocatable())
+        return SubRC;
+      Mask >>= Offset;
+      Idx += Offset + 1;
+    }
+  }
+  return NULL;
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+  assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+  // Pick the most sub register class of the right type that contains
+  // this physreg.
+  const TargetRegisterClass* BestRC = 0;
+  for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+    const TargetRegisterClass* RC = *I;
+    if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+        (!BestRC || BestRC->hasSubClass(RC)))
+      BestRC = RC;
+  }
+
+  assert(BestRC && "Couldn't find the register class");
+  return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+                                   const TargetRegisterClass *RC, BitVector &R){
+  assert(RC->isAllocatable() && "invalid for nonallocatable sets");
+  ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
+  for (unsigned i = 0; i != Order.size(); ++i)
+    R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+                                          const TargetRegisterClass *RC) const {
+  BitVector Allocatable(getNumRegs());
+  if (RC) {
+    // A register class with no allocatable subclass returns an empty set.
+    const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+    if (SubClass)
+      getAllocatableSetForRC(MF, SubClass, Allocatable);
+  } else {
+    for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+         E = regclass_end(); I != E; ++I)
+      if ((*I)->isAllocatable())
+        getAllocatableSetForRC(MF, *I, Allocatable);
+  }
+
+  // Mask out the reserved registers
+  BitVector Reserved = getReservedRegs(MF);
+  Allocatable &= Reserved.flip();
+
+  return Allocatable;
+}
+
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+                                            const uint32_t *B,
+                                            const TargetRegisterInfo *TRI) {
+  for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+    if (unsigned Common = *A++ & *B++)
+      return TRI->getRegClass(I + CountTrailingZeros_32(Common));
+  return 0;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+                                      const TargetRegisterClass *B) const {
+  // First take care of the trivial cases.
+  if (A == B)
+    return A;
+  if (!A || !B)
+    return 0;
+
+  // Register classes are ordered topologically, so the largest common
+  // sub-class it the common sub-class with the smallest ID.
+  return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                             const TargetRegisterClass *B,
+                                             unsigned Idx) const {
+  assert(A && B && "Missing register class");
+  assert(Idx && "Bad sub-register index");
+
+  // Find Idx in the list of super-register indices.
+  for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+    if (RCI.getSubReg() == Idx)
+      // The bit mask contains all register classes that are projected into B
+      // by Idx. Find a class that is also a sub-class of A.
+      return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+  return 0;
+}
+
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+                       const TargetRegisterClass *RCB, unsigned SubB,
+                       unsigned &PreA, unsigned &PreB) const {
+  assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+  // Search all pairs of sub-register indices that project into RCA and RCB
+  // respectively. This is quadratic, but usually the sets are very small. On
+  // most targets like X86, there will only be a single sub-register index
+  // (e.g., sub_16bit projecting into GR16).
+  //
+  // The worst case is a register class like DPR on ARM.
+  // We have indices dsub_0..dsub_7 projecting into that class.
+  //
+  // It is very common that one register class is a sub-register of the other.
+  // Arrange for RCA to be the larger register so the answer will be found in
+  // the first iteration. This makes the search linear for the most common
+  // case.
+  const TargetRegisterClass *BestRC = 0;
+  unsigned *BestPreA = &PreA;
+  unsigned *BestPreB = &PreB;
+  if (RCA->getSize() < RCB->getSize()) {
+    std::swap(RCA, RCB);
+    std::swap(SubA, SubB);
+    std::swap(BestPreA, BestPreB);
+  }
+
+  // Also terminate the search one we have found a register class as small as
+  // RCA.
+  unsigned MinSize = RCA->getSize();
+
+  for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+    unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+    for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+      // Check if a common super-register class exists for this index pair.
+      const TargetRegisterClass *RC =
+        firstCommonClass(IA.getMask(), IB.getMask(), this);
+      if (!RC || RC->getSize() < MinSize)
+        continue;
+
+      // The indexes must compose identically: PreA+SubA == PreB+SubB.
+      unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+      if (FinalA != FinalB)
+        continue;
+
+      // Is RC a better candidate than BestRC?
+      if (BestRC && RC->getSize() >= BestRC->getSize())
+        continue;
+
+      // Yes, RC is the smallest super-register seen so far.
+      BestRC = RC;
+      *BestPreA = IA.getSubReg();
+      *BestPreB = IB.getSubReg();
+
+      // Bail early if we reached MinSize. We won't find a better candidate.
+      if (BestRC->getSize() == MinSize)
+        return BestRC;
+    }
+  }
+  return BestRC;
+}
+
+// Compute target-independent register allocator hints to help eliminate copies.
+void
+TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+                                          ArrayRef<MCPhysReg> Order,
+                                          SmallVectorImpl<MCPhysReg> &Hints,
+                                          const MachineFunction &MF,
+                                          const VirtRegMap *VRM) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+  // Hints with HintType != 0 were set by target-dependent code.
+  // Such targets must provide their own implementation of
+  // TRI::getRegAllocationHints to interpret those hint types.
+  assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
+
+  // Target-independent hints are either a physical or a virtual register.
+  unsigned Phys = Hint.second;
+  if (VRM && isVirtualRegister(Phys))
+    Phys = VRM->getPhys(Phys);
+
+  // Check that Phys is a valid hint in VirtReg's register class.
+  if (!isPhysicalRegister(Phys))
+    return;
+  if (MRI.isReserved(Phys))
+    return;
+  // Check that Phys is in the allocation order. We shouldn't heed hints
+  // from VirtReg's register class if they aren't in the allocation order. The
+  // target probably has a reason for removing the register.
+  if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+    return;
+
+  // All clear, tell the register allocator to prefer this register.
+  Hints.push_back(Phys);
+}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index ca3b0e0b1173..783bfa1c1a18 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -13,12 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency(
     report_fatal_error(ss.str());
   }
 #endif
-  return DefMI->isTransient() ? 0 : 1;
+  // FIXME: Automatically giving all implicit defs defaultDefLatency is
+  // undesirable. We should only do it for defs that are known to the MC
+  // desc like flags. Truly implicit defs should get 1 cycle latency.
+  return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI);
 }
 
 unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index a9058bc7f6d9..e6dfe104c82f 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -29,26 +29,25 @@
 
 #define DEBUG_TYPE "twoaddrinstr"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
@@ -67,7 +66,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   const InstrItineraryData *InstrItins;
   MachineRegisterInfo *MRI;
   LiveVariables *LV;
-  SlotIndexes *Indexes;
   LiveIntervals *LIS;
   AliasAnalysis *AA;
   CodeGenOpt::Level OptLevel;
@@ -92,10 +90,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // virtual registers. e.g. r1 = move v1024.
   DenseMap<unsigned, unsigned> DstRegMap;
 
-  /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
-  /// during the initial walk of the machine function.
-  SmallVector<MachineInstr*, 16> RegSequences;
-
   bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
                             MachineBasicBlock::iterator OldPos);
 
@@ -125,7 +119,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
                                MachineBasicBlock::iterator &nmi,
                                unsigned SrcIdx, unsigned DstIdx,
-                               unsigned Dist);
+                               unsigned Dist, bool shouldOnlyCommute);
 
   void scanUses(unsigned DstReg);
 
@@ -135,11 +129,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
   bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
   void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
-
-  /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of
-  /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register
-  /// references of the register defined by REG_SEQUENCE.
-  bool eliminateRegSequences();
+  void eliminateRegSequence(MachineBasicBlock::iterator&);
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -172,6 +162,8 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
 
 char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
 
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+
 /// sink3AddrInstruction - A two-address instruction has been converted to a
 /// three-address instruction to avoid clobbering a register. Try to sink it
 /// past the instruction that would kill the above mentioned register to reduce
@@ -213,14 +205,29 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
 
   // Find the instruction that kills SavedReg.
   MachineInstr *KillMI = NULL;
-  for (MachineRegisterInfo::use_nodbg_iterator
-         UI = MRI->use_nodbg_begin(SavedReg),
-         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-    MachineOperand &UseMO = UI.getOperand();
-    if (!UseMO.isKill())
-      continue;
-    KillMI = UseMO.getParent();
-    break;
+  if (LIS) {
+    LiveInterval &LI = LIS->getInterval(SavedReg);
+    assert(LI.end() != LI.begin() &&
+           "Reg should not have empty live interval.");
+
+    SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+    LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+    if (I != LI.end() && I->start < MBBEndIdx)
+      return false;
+
+    --I;
+    KillMI = LIS->getInstructionFromIndex(I->end);
+  }
+  if (!KillMI) {
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SavedReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineOperand &UseMO = UI.getOperand();
+      if (!UseMO.isKill())
+        continue;
+      KillMI = UseMO.getParent();
+      break;
+    }
   }
 
   // If we find the instruction that kills SavedReg, and it is in an
@@ -259,7 +266,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
       if (DefReg == MOReg)
         return false;
 
-      if (MO.isKill()) {
+      if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) {
         if (OtherMI == KillMI && MOReg == SavedReg)
           // Save the operand that kills the register. We want to unset the kill
           // marker if we can sink MI past it.
@@ -272,13 +279,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
   }
   assert(KillMO && "Didn't find kill");
 
-  // Update kill and LV information.
-  KillMO->setIsKill(false);
-  KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
-  KillMO->setIsKill(true);
+  if (!LIS) {
+    // Update kill and LV information.
+    KillMO->setIsKill(false);
+    KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+    KillMO->setIsKill(true);
 
-  if (LV)
-    LV->replaceKillInstruction(SavedReg, KillMI, MI);
+    if (LV)
+      LV->replaceKillInstruction(SavedReg, KillMI, MI);
+  }
 
   // Move instruction to its destination.
   MBB->remove(MI);
@@ -339,6 +348,33 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
   return true;
 }
 
+/// isPLainlyKilled - Test if the given register value, which is used by the
+// given instruction, is killed by the given instruction.
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+                            LiveIntervals *LIS) {
+  if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
+      !LIS->isNotInMIMap(MI)) {
+    // FIXME: Sometimes tryInstructionTransform() will add instructions and
+    // test whether they can be folded before keeping them. In this case it
+    // sets a kill before recursively calling tryInstructionTransform() again.
+    // If there is no interval available, we assume that this instruction is
+    // one of those. A kill flag is manually inserted on the operand so the
+    // check below will handle it.
+    LiveInterval &LI = LIS->getInterval(Reg);
+    // This is to match the kill flag version where undefs don't have kill
+    // flags.
+    if (!LI.hasAtLeastOneValue())
+      return false;
+
+    SlotIndex useIdx = LIS->getInstructionIndex(MI);
+    LiveInterval::const_iterator I = LI.find(useIdx);
+    assert(I != LI.end() && "Reg must be live-in to use.");
+    return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+  }
+
+  return MI->killsRegister(Reg);
+}
+
 /// isKilled - Test if the given register value, which is used by the given
 /// instruction, is killed by the given instruction. This looks through
 /// coalescable copies to see if the original value is potentially not killed.
@@ -354,12 +390,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
 /// normal heuristics commute the (two-address) add, which lets
 /// coalescing eliminate the extra copy.
 ///
+/// If allowFalsePositives is true then likely kills are treated as kills even
+/// if it can't be proven that they are kills.
 static bool isKilled(MachineInstr &MI, unsigned Reg,
                      const MachineRegisterInfo *MRI,
-                     const TargetInstrInfo *TII) {
+                     const TargetInstrInfo *TII,
+                     LiveIntervals *LIS,
+                     bool allowFalsePositives) {
   MachineInstr *DefMI = &MI;
   for (;;) {
-    if (!DefMI->killsRegister(Reg))
+    // All uses of physical registers are likely to be kills.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+        (allowFalsePositives || MRI->hasOneUse(Reg)))
+      return true;
+    if (!isPlainlyKilled(DefMI, Reg, LIS))
       return false;
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
       return true;
@@ -480,7 +524,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
   // insert => %reg1030<def> = MOV8rr %reg1029
   // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
 
-  if (!MI->killsRegister(regC))
+  if (!isPlainlyKilled(MI, regC, LIS))
     return false;
 
   // Ok, we have something like:
@@ -536,19 +580,9 @@ commuteInstruction(MachineBasicBlock::iterator &mi,
   }
 
   DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
-  // If the instruction changed to commute it, update livevar.
-  if (NewMI != MI) {
-    if (LV)
-      // Update live variables
-      LV->replaceKillInstruction(RegC, MI, NewMI);
-    if (Indexes)
-      Indexes->replaceMachineInstrInMaps(MI, NewMI);
-
-    MBB->insert(mi, NewMI);           // Insert the new inst
-    MBB->erase(mi);                   // Nuke the old inst.
-    mi = NewMI;
-    DistanceMap.insert(std::make_pair(NewMI, Dist));
-  }
+  assert(NewMI == MI &&
+         "TargetInstrInfo::commuteInstruction() should not return a new "
+         "instruction unless it was requested.");
 
   // Update source register map.
   unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
@@ -595,8 +629,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
   DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
   bool Sunk = false;
 
-  if (Indexes)
-    Indexes->replaceMachineInstrInMaps(mi, NewMI);
+  if (LIS)
+    LIS->ReplaceMachineInstrInMaps(mi, NewMI);
 
   if (NewMI->findRegisterUseOperand(RegB, false, TRI))
     // FIXME: Temporary workaround. If the new instruction doesn't
@@ -708,9 +742,9 @@ bool TwoAddressInstructionPass::
 rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
                       MachineBasicBlock::iterator &nmi,
                       unsigned Reg) {
-  // Bail immediately if we don't have LV available. We use it to find kills
-  // efficiently.
-  if (!LV)
+  // Bail immediately if we don't have LV or LIS available. We use them to find
+  // kills efficiently.
+  if (!LV && !LIS)
     return false;
 
   MachineInstr *MI = &*mi;
@@ -719,7 +753,22 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
     // Must be created from unfolded load. Don't waste time trying this.
     return false;
 
-  MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+  MachineInstr *KillMI = 0;
+  if (LIS) {
+    LiveInterval &LI = LIS->getInterval(Reg);
+    assert(LI.end() != LI.begin() &&
+           "Reg should not have empty live interval.");
+
+    SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+    LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+    if (I != LI.end() && I->start < MBBEndIdx)
+      return false;
+
+    --I;
+    KillMI = LIS->getInstructionFromIndex(I->end);
+  } else {
+    KillMI = LV->getVarInfo(Reg).findKill(MBB);
+  }
   if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
     // Don't mess with copies, they may be coalesced later.
     return false;
@@ -755,24 +804,27 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
       Defs.insert(MOReg);
     else {
       Uses.insert(MOReg);
-      if (MO.isKill() && MOReg != Reg)
+      if (MOReg != Reg && (MO.isKill() ||
+                           (LIS && isPlainlyKilled(MI, MOReg, LIS))))
         Kills.insert(MOReg);
     }
   }
 
   // Move the copies connected to MI down as well.
-  MachineBasicBlock::iterator From = MI;
-  MachineBasicBlock::iterator To = llvm::next(From);
-  while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
-    Defs.insert(To->getOperand(0).getReg());
-    ++To;
+  MachineBasicBlock::iterator Begin = MI;
+  MachineBasicBlock::iterator AfterMI = llvm::next(Begin);
+
+  MachineBasicBlock::iterator End = AfterMI;
+  while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
+    Defs.insert(End->getOperand(0).getReg());
+    ++End;
   }
 
   // Check if the reschedule will not break depedencies.
   unsigned NumVisited = 0;
   MachineBasicBlock::iterator KillPos = KillMI;
   ++KillPos;
-  for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+  for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) {
     MachineInstr *OtherMI = I;
     // DBG_VALUE cannot be counted against the limit.
     if (OtherMI->isDebugValue())
@@ -803,11 +855,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
       } else {
         if (Defs.count(MOReg))
           return false;
+        bool isKill = MO.isKill() ||
+                      (LIS && isPlainlyKilled(OtherMI, MOReg, LIS));
         if (MOReg != Reg &&
-            ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
+            ((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
           // Don't want to extend other live ranges and update kills.
           return false;
-        if (MOReg == Reg && !MO.isKill())
+        if (MOReg == Reg && !isKill)
           // We can't schedule across a use of the register in question.
           return false;
         // Ensure that if this is register in question, its the kill we expect.
@@ -818,19 +872,35 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
   }
 
   // Move debug info as well.
-  while (From != MBB->begin() && llvm::prior(From)->isDebugValue())
-    --From;
+  while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue())
+    --Begin;
+
+  nmi = End;
+  MachineBasicBlock::iterator InsertPos = KillPos;
+  if (LIS) {
+    // We have to move the copies first so that the MBB is still well-formed
+    // when calling handleMove().
+    for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
+      MachineInstr *CopyMI = MBBI;
+      ++MBBI;
+      MBB->splice(InsertPos, MBB, CopyMI);
+      LIS->handleMove(CopyMI);
+      InsertPos = CopyMI;
+    }
+    End = llvm::next(MachineBasicBlock::iterator(MI));
+  }
 
   // Copies following MI may have been moved as well.
-  nmi = To;
-  MBB->splice(KillPos, MBB, From, To);
+  MBB->splice(InsertPos, MBB, Begin, End);
   DistanceMap.erase(DI);
 
   // Update live variables
-  LV->removeVirtualRegisterKilled(Reg, KillMI);
-  LV->addVirtualRegisterKilled(Reg, MI);
-  if (LIS)
+  if (LIS) {
     LIS->handleMove(MI);
+  } else {
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  }
 
   DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
   return true;
@@ -866,9 +936,9 @@ bool TwoAddressInstructionPass::
 rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
                       MachineBasicBlock::iterator &nmi,
                       unsigned Reg) {
-  // Bail immediately if we don't have LV available. We use it to find kills
-  // efficiently.
-  if (!LV)
+  // Bail immediately if we don't have LV or LIS available. We use them to find
+  // kills efficiently.
+  if (!LV && !LIS)
     return false;
 
   MachineInstr *MI = &*mi;
@@ -877,7 +947,22 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
     // Must be created from unfolded load. Don't waste time trying this.
     return false;
 
-  MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+  MachineInstr *KillMI = 0;
+  if (LIS) {
+    LiveInterval &LI = LIS->getInterval(Reg);
+    assert(LI.end() != LI.begin() &&
+           "Reg should not have empty live interval.");
+
+    SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+    LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+    if (I != LI.end() && I->start < MBBEndIdx)
+      return false;
+
+    --I;
+    KillMI = LIS->getInstructionFromIndex(I->end);
+  } else {
+    KillMI = LV->getVarInfo(Reg).findKill(MBB);
+  }
   if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
     // Don't mess with copies, they may be coalesced later.
     return false;
@@ -904,10 +989,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
         continue;
       if (isDefTooClose(MOReg, DI->second, MI))
         return false;
-      if (MOReg == Reg && !MO.isKill())
+      bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+      if (MOReg == Reg && !isKill)
         return false;
       Uses.insert(MOReg);
-      if (MO.isKill() && MOReg != Reg)
+      if (isKill && MOReg != Reg)
         Kills.insert(MOReg);
     } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
       Defs.insert(MOReg);
@@ -947,7 +1033,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
         if (Kills.count(MOReg))
           // Don't want to extend other live ranges and update kills.
           return false;
-        if (OtherMI != MI && MOReg == Reg && !MO.isKill())
+        if (OtherMI != MI && MOReg == Reg &&
+            !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))))
           // We can't schedule across a use of the register in question.
           return false;
       } else {
@@ -981,10 +1068,12 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
   DistanceMap.erase(DI);
 
   // Update live variables
-  LV->removeVirtualRegisterKilled(Reg, KillMI);
-  LV->addVirtualRegisterKilled(Reg, MI);
-  if (LIS)
+  if (LIS) {
     LIS->handleMove(KillMI);
+  } else {
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  }
 
   DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
   return true;
@@ -995,11 +1084,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
 /// either eliminate the tied operands or improve the opportunities for
 /// coalescing away the register copy.  Returns true if no copy needs to be
 /// inserted to untie mi's operands (either because they were untied, or
-/// because mi was rescheduled, and will be visited again later).
+/// because mi was rescheduled, and will be visited again later). If the
+/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
 bool TwoAddressInstructionPass::
 tryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
-                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
+                        unsigned SrcIdx, unsigned DstIdx,
+                        unsigned Dist, bool shouldOnlyCommute) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -1009,7 +1100,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   assert(TargetRegisterInfo::isVirtualRegister(regB) &&
          "cannot make instruction into two-address form");
-  bool regBKilled = isKilled(MI, regB, MRI, TII);
+  bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
 
   if (TargetRegisterInfo::isVirtualRegister(regA))
     scanUses(regA);
@@ -1029,7 +1120,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
     if (regCIdx != ~0U) {
       regC = MI.getOperand(regCIdx).getReg();
-      if (!regBKilled && isKilled(MI, regC, MRI, TII))
+      if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
         // If C dies but B does not, swap the B and C operands.
         // This makes the live ranges of A and C joinable.
         TryCommute = true;
@@ -1048,6 +1139,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
     return false;
   }
 
+  if (shouldOnlyCommute)
+    return false;
+
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
   if (rescheduleMIBelowKill(mi, nmi, regB)) {
@@ -1123,10 +1217,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
         unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
         unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
         MachineBasicBlock::iterator NewMI = NewMIs[1];
-        bool TransformSuccess =
-          tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist);
-        if (TransformSuccess ||
-            NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+        bool TransformResult =
+          tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
+        (void)TransformResult;
+        assert(!TransformResult &&
+               "tryInstructionTransform() should return false.");
+        if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
           // Success, or at least we made an improvement. Keep the unfolded
           // instructions and discard the original.
           if (LV) {
@@ -1157,10 +1253,26 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
             }
             LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
           }
+
+          SmallVector<unsigned, 4> OrigRegs;
+          if (LIS) {
+            for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+                 MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+              if (MOI->isReg())
+                OrigRegs.push_back(MOI->getReg());
+            }
+          }
+
           MI.eraseFromParent();
+
+          // Update LiveIntervals.
+          if (LIS) {
+            MachineBasicBlock::iterator Begin(NewMIs[0]);
+            MachineBasicBlock::iterator End(NewMIs[1]);
+            LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs);
+          }
+
           mi = NewMIs[1];
-          if (TransformSuccess)
-            return true;
         } else {
           // Transforming didn't eliminate the tie and didn't lead to an
           // improvement. Clean up the unfolded instructions and keep the
@@ -1223,9 +1335,15 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
                                             TiedPairList &TiedPairs,
                                             unsigned &Dist) {
   bool IsEarlyClobber = false;
+  for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+    const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second);
+    IsEarlyClobber |= DstMO.isEarlyClobber();
+  }
+
   bool RemovedKillFlag = false;
   bool AllUsesCopied = true;
   unsigned LastCopiedReg = 0;
+  SlotIndex LastCopyIdx;
   unsigned RegB = 0;
   for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
     unsigned SrcIdx = TiedPairs[tpi].first;
@@ -1233,7 +1351,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
 
     const MachineOperand &DstMO = MI->getOperand(DstIdx);
     unsigned RegA = DstMO.getReg();
-    IsEarlyClobber |= DstMO.isEarlyClobber();
 
     // Grab RegB from the instruction because it may have changed if the
     // instruction was commuted.
@@ -1271,9 +1388,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
     DistanceMap.insert(std::make_pair(PrevMI, Dist));
     DistanceMap[MI] = ++Dist;
 
-    SlotIndex CopyIdx;
-    if (Indexes)
-      CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+    if (LIS) {
+      LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot();
+
+      if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+        LiveInterval &LI = LIS->getInterval(RegA);
+        VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+        SlotIndex endIdx =
+          LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber);
+        LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI));
+      }
+    }
 
     DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
 
@@ -1319,6 +1444,18 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
       LV->addVirtualRegisterKilled(RegB, PrevMI);
     }
 
+    // Update LiveIntervals.
+    if (LIS) {
+      LiveInterval &LI = LIS->getInterval(RegB);
+      SlotIndex MIIdx = LIS->getInstructionIndex(MI);
+      LiveInterval::const_iterator I = LI.find(MIIdx);
+      assert(I != LI.end() && "RegB must be live-in to use.");
+
+      SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
+      if (I->end == UseIdx)
+        LI.removeRange(LastCopyIdx, UseIdx);
+    }
+
   } else if (RemovedKillFlag) {
     // Some tied uses of regB matched their destination registers, so
     // regB is still used in this instruction, but a kill flag was
@@ -1343,7 +1480,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
   InstrItins = TM.getInstrItineraryData();
-  Indexes = getAnalysisIfAvailable<SlotIndexes>();
   LV = getAnalysisIfAvailable<LiveVariables>();
   LIS = getAnalysisIfAvailable<LiveIntervals>();
   AA = &getAnalysis<AliasAnalysis>();
@@ -1375,9 +1511,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
         continue;
       }
 
-      // Remember REG_SEQUENCE instructions, we'll deal with them later.
+      // Expand REG_SEQUENCE instructions. This will position mi at the first
+      // expanded instruction.
       if (mi->isRegSequence())
-        RegSequences.push_back(&*mi);
+        eliminateRegSequence(mi);
 
       DistanceMap.insert(std::make_pair(mi, ++Dist));
 
@@ -1406,7 +1543,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
           unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
           unsigned DstReg = mi->getOperand(DstIdx).getReg();
           if (SrcReg != DstReg &&
-              tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) {
+              tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
             // The tied operands have been eliminated or shifted further down the
             // block to ease elimination. Continue processing with 'nmi'.
             TiedOperands.clear();
@@ -1444,192 +1581,98 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
     }
   }
 
-  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
-  // SSA form. It's now safe to de-SSA.
-  MadeChange |= eliminateRegSequences();
+  if (LIS)
+    MF->verify(this, "After two-address instruction pass");
 
   return MadeChange;
 }
 
-static void UpdateRegSequenceSrcs(unsigned SrcReg,
-                                  unsigned DstReg, unsigned SubIdx,
-                                  MachineRegisterInfo *MRI,
-                                  const TargetRegisterInfo &TRI) {
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-         RE = MRI->reg_end(); RI != RE; ) {
-    MachineOperand &MO = RI.getOperand();
-    ++RI;
-    MO.substVirtReg(DstReg, SubIdx, TRI);
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
+///
+/// The instruction is turned into a sequence of sub-register copies:
+///
+///   %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+///   %dst:ssub0<def,undef> = COPY %v1
+///   %dst:ssub1<def> = COPY %v2
+///
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = MBBI;
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (MI->getOperand(0).getSubReg() ||
+      TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      !(MI->getNumOperands() & 1)) {
+    DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+    llvm_unreachable(0);
   }
-}
-
-// Find the first def of Reg, assuming they are all in the same basic block.
-static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
-  SmallPtrSet<MachineInstr*, 8> Defs;
-  MachineInstr *First = 0;
-  for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
-       MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
-    First = MI;
-  if (!First)
-    return 0;
-
-  MachineBasicBlock *MBB = First->getParent();
-  MachineBasicBlock::iterator A = First, B = First;
-  bool Moving;
-  do {
-    Moving = false;
-    if (A != MBB->begin()) {
-      Moving = true;
-      --A;
-      if (Defs.erase(A)) First = A;
-    }
-    if (B != MBB->end()) {
-      Defs.erase(B);
-      ++B;
-      Moving = true;
-    }
-  } while (Moving && !Defs.empty());
-  assert(Defs.empty() && "Instructions outside basic block!");
-  return First;
-}
 
-static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
-                                    MachineRegisterInfo *MRI) {
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI != RegSeq && UseMI->isRegSequence())
-      return true;
+  SmallVector<unsigned, 4> OrigRegs;
+  if (LIS) {
+    OrigRegs.push_back(MI->getOperand(0).getReg());
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2)
+      OrigRegs.push_back(MI->getOperand(i).getReg());
   }
-  return false;
-}
 
-/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
-/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
-/// sub-register references of the register defined by REG_SEQUENCE. e.g.
-///
-/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
-/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
-/// =>
-/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool TwoAddressInstructionPass::eliminateRegSequences() {
-  if (RegSequences.empty())
-    return false;
-
-  for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
-    MachineInstr *MI = RegSequences[i];
-    unsigned DstReg = MI->getOperand(0).getReg();
-    if (MI->getOperand(0).getSubReg() ||
-        TargetRegisterInfo::isPhysicalRegister(DstReg) ||
-        !(MI->getNumOperands() & 1)) {
-      DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
-      llvm_unreachable(0);
-    }
-
-    bool IsImpDef = true;
-    SmallVector<unsigned, 4> RealSrcs;
-    SmallSet<unsigned, 4> Seen;
-    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
-      // Nothing needs to be inserted for <undef> operands.
-      if (MI->getOperand(i).isUndef()) {
-        MI->getOperand(i).setReg(0);
-        continue;
-      }
-      unsigned SrcReg = MI->getOperand(i).getReg();
-      unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
-      unsigned SubIdx = MI->getOperand(i+1).getImm();
-      // DefMI of NULL means the value does not have a vreg in this block
-      // i.e., its a physical register or a subreg.
-      // In either case we force a copy to be generated.
-      MachineInstr *DefMI = NULL;
-      if (!MI->getOperand(i).getSubReg() &&
-          !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-        DefMI = MRI->getUniqueVRegDef(SrcReg);
-      }
+  bool DefEmitted = false;
+  for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+    MachineOperand &UseMO = MI->getOperand(i);
+    unsigned SrcReg = UseMO.getReg();
+    unsigned SubIdx = MI->getOperand(i+1).getImm();
+    // Nothing needs to be inserted for <undef> operands.
+    if (UseMO.isUndef())
+      continue;
 
-      if (DefMI && DefMI->isImplicitDef()) {
-        DefMI->eraseFromParent();
-        continue;
-      }
-      IsImpDef = false;
-
-      // Remember COPY sources. These might be candidate for coalescing.
-      if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
-        RealSrcs.push_back(DefMI->getOperand(1).getReg());
-
-      bool isKill = MI->getOperand(i).isKill();
-      if (!DefMI || !Seen.insert(SrcReg) ||
-          MI->getParent() != DefMI->getParent() ||
-          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
-          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
-                                         MRI->getRegClass(SrcReg), SubIdx)) {
-        // REG_SEQUENCE cannot have duplicated operands, add a copy.
-        // Also add an copy if the source is live-in the block. We don't want
-        // to end up with a partial-redef of a livein, e.g.
-        // BB0:
-        // reg1051:10<def> =
-        // ...
-        // BB1:
-        // ... = reg1051:10
-        // BB2:
-        // reg1051:9<def> =
-        // LiveIntervalAnalysis won't like it.
-        //
-        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
-        // correctly up to date becomes very difficult. Insert a copy.
-
-        // Defer any kill flag to the last operand using SrcReg. Otherwise, we
-        // might insert a COPY that uses SrcReg after is was killed.
-        if (isKill)
-          for (unsigned j = i + 2; j < e; j += 2)
-            if (MI->getOperand(j).getReg() == SrcReg) {
-              MI->getOperand(j).setIsKill();
-              isKill = false;
-              break;
-            }
+    // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+    // might insert a COPY that uses SrcReg after is was killed.
+    bool isKill = UseMO.isKill();
+    if (isKill)
+      for (unsigned j = i + 2; j < e; j += 2)
+        if (MI->getOperand(j).getReg() == SrcReg) {
+          MI->getOperand(j).setIsKill();
+          UseMO.setIsKill(false);
+          isKill = false;
+          break;
+        }
 
-        MachineBasicBlock::iterator InsertLoc = MI;
-        MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
-                                MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
-            .addReg(DstReg, RegState::Define, SubIdx)
-            .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
-        MI->getOperand(i).setReg(0);
-        if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
-          LV->replaceKillInstruction(SrcReg, MI, CopyMI);
-        DEBUG(dbgs() << "Inserted: " << *CopyMI);
-      }
+    // Insert the sub-register copy.
+    MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY))
+      .addReg(DstReg, RegState::Define, SubIdx)
+      .addOperand(UseMO);
+
+    // The first def needs an <undef> flag because there is no live register
+    // before it.
+    if (!DefEmitted) {
+      CopyMI->getOperand(0).setIsUndef(true);
+      // Return an iterator pointing to the first inserted instr.
+      MBBI = CopyMI;
     }
+    DefEmitted = true;
 
-    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
-      unsigned SrcReg = MI->getOperand(i).getReg();
-      if (!SrcReg) continue;
-      unsigned SubIdx = MI->getOperand(i+1).getImm();
-      UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
-    }
+    // Update LiveVariables' kill info.
+    if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+      LV->replaceKillInstruction(SrcReg, MI, CopyMI);
 
-    // Set <def,undef> flags on the first DstReg def in the basic block.
-    // It marks the beginning of the live range. All the other defs are
-    // read-modify-write.
-    if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
-      for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = Def->getOperand(i);
-        if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
-          MO.setIsUndef();
-      }
-      DEBUG(dbgs() << "First def: " << *Def);
-    }
+    DEBUG(dbgs() << "Inserted: " << *CopyMI);
+  }
 
-    if (IsImpDef) {
-      DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
-      MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
-      for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
-        MI->RemoveOperand(j);
-    } else {
-      DEBUG(dbgs() << "Eliminated: " << *MI);
-      MI->eraseFromParent();
-    }
+  MachineBasicBlock::iterator EndMBBI =
+      llvm::next(MachineBasicBlock::iterator(MI));
+
+  if (!DefEmitted) {
+    DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+    MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+    for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+      MI->RemoveOperand(j);
+  } else {
+    DEBUG(dbgs() << "Eliminated: " << *MI);
+    MI->eraseFromParent();
   }
 
-  RegSequences.clear();
-  return true;
+  // Udpate LiveIntervals.
+  if (LIS)
+    LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs);
 }
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 52693f03e828..a95ebcd16da8 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -21,22 +21,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index bb93bdc0bc25..cd012d297489 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -17,8 +17,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "LiveDebugVariables.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -26,15 +28,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -77,15 +77,22 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
   return SS;
 }
 
-unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
-  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
-  unsigned physReg = Hint.second;
-  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
-    physReg = getPhys(physReg);
-  if (Hint.first == 0)
-    return (TargetRegisterInfo::isPhysicalRegister(physReg))
-      ? physReg : 0;
-  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
+  unsigned Hint = MRI->getSimpleHint(VirtReg);
+  if (!Hint)
+    return 0;
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = getPhys(Hint);
+  return getPhys(VirtReg) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
+  if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+    return true;
+  if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+    return hasPhys(Hint.second);
+  return false;
 }
 
 int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
deleted file mode 100644
index 7974dda66a5f..000000000000
--- a/lib/CodeGen/VirtRegMap.h
+++ /dev/null
@@ -1,190 +0,0 @@
-//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a virtual register map. This maps virtual registers to
-// physical registers and virtual registers to stack slots. It is created and
-// updated by a register allocator and then used by a machine code rewriter that
-// adds spill code and rewrites virtual into physical register references.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_VIRTREGMAP_H
-#define LLVM_CODEGEN_VIRTREGMAP_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/IndexedMap.h"
-
-namespace llvm {
-  class MachineInstr;
-  class MachineFunction;
-  class MachineRegisterInfo;
-  class TargetInstrInfo;
-  class raw_ostream;
-  class SlotIndexes;
-
-  class VirtRegMap : public MachineFunctionPass {
-  public:
-    enum {
-      NO_PHYS_REG = 0,
-      NO_STACK_SLOT = (1L << 30)-1,
-      MAX_STACK_SLOT = (1L << 18)-1
-    };
-
-  private:
-    MachineRegisterInfo *MRI;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    MachineFunction *MF;
-
-    /// Virt2PhysMap - This is a virtual to physical register
-    /// mapping. Each virtual register is required to have an entry in
-    /// it; even spilled virtual registers (the register mapped to a
-    /// spilled register is the temporary used to load it from the
-    /// stack).
-    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
-
-    /// Virt2StackSlotMap - This is virtual register to stack slot
-    /// mapping. Each spilled virtual register has an entry in it
-    /// which corresponds to the stack slot this register is spilled
-    /// at.
-    IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
-
-    /// Virt2SplitMap - This is virtual register to splitted virtual register
-    /// mapping.
-    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
-
-    /// createSpillSlot - Allocate a spill slot for RC from MFI.
-    unsigned createSpillSlot(const TargetRegisterClass *RC);
-
-    VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION;
-    void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION;
-
-  public:
-    static char ID;
-    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    MachineFunction &getMachineFunction() const {
-      assert(MF && "getMachineFunction called before runOnMachineFunction");
-      return *MF;
-    }
-
-    MachineRegisterInfo &getRegInfo() const { return *MRI; }
-    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
-
-    void grow();
-
-    /// @brief returns true if the specified virtual register is
-    /// mapped to a physical register
-    bool hasPhys(unsigned virtReg) const {
-      return getPhys(virtReg) != NO_PHYS_REG;
-    }
-
-    /// @brief returns the physical register mapped to the specified
-    /// virtual register
-    unsigned getPhys(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      return Virt2PhysMap[virtReg];
-    }
-
-    /// @brief creates a mapping for the specified virtual register to
-    /// the specified physical register
-    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
-             TargetRegisterInfo::isPhysicalRegister(physReg));
-      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
-             "attempt to assign physical register to already mapped "
-             "virtual register");
-      Virt2PhysMap[virtReg] = physReg;
-    }
-
-    /// @brief clears the specified virtual register's, physical
-    /// register mapping
-    void clearVirt(unsigned virtReg) {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
-             "attempt to clear a not assigned virtual register");
-      Virt2PhysMap[virtReg] = NO_PHYS_REG;
-    }
-
-    /// @brief clears all virtual to physical register mappings
-    void clearAllVirt() {
-      Virt2PhysMap.clear();
-      grow();
-    }
-
-    /// @brief returns the register allocation preference.
-    unsigned getRegAllocPref(unsigned virtReg);
-
-    /// @brief returns true if VirtReg is assigned to its preferred physreg.
-    bool hasPreferredPhys(unsigned VirtReg) {
-      return getPhys(VirtReg) == getRegAllocPref(VirtReg);
-    }
-
-    /// @brief records virtReg is a split live interval from SReg.
-    void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
-      Virt2SplitMap[virtReg] = SReg;
-    }
-
-    /// @brief returns the live interval virtReg is split from.
-    unsigned getPreSplitReg(unsigned virtReg) const {
-      return Virt2SplitMap[virtReg];
-    }
-
-    /// getOriginal - Return the original virtual register that VirtReg descends
-    /// from through splitting.
-    /// A register that was not created by splitting is its own original.
-    /// This operation is idempotent.
-    unsigned getOriginal(unsigned VirtReg) const {
-      unsigned Orig = getPreSplitReg(VirtReg);
-      return Orig ? Orig : VirtReg;
-    }
-
-    /// @brief returns true if the specified virtual register is not
-    /// mapped to a stack slot or rematerialized.
-    bool isAssignedReg(unsigned virtReg) const {
-      if (getStackSlot(virtReg) == NO_STACK_SLOT)
-        return true;
-      // Split register can be assigned a physical register as well as a
-      // stack slot or remat id.
-      return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
-    }
-
-    /// @brief returns the stack slot mapped to the specified virtual
-    /// register
-    int getStackSlot(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      return Virt2StackSlotMap[virtReg];
-    }
-
-    /// @brief create a mapping for the specifed virtual register to
-    /// the next available stack slot
-    int assignVirt2StackSlot(unsigned virtReg);
-    /// @brief create a mapping for the specified virtual register to
-    /// the specified stack slot
-    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
-
-    void print(raw_ostream &OS, const Module* M = 0) const;
-    void dump() const;
-  };
-
-  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
-    VRM.print(OS);
-    return OS;
-  }
-} // End llvm namespace
-
-#endif
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index 1e9e509fd2a1..e97455abace2 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMDebugInfo
   DWARFDebugAbbrev.cpp
   DWARFDebugArangeSet.cpp
   DWARFDebugAranges.cpp
+  DWARFDebugFrame.cpp
   DWARFDebugInfoEntry.cpp
   DWARFDebugLine.cpp
   DWARFDebugRangeList.cpp
diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp
index 691a92c392c2..49a44097d3e2 100644
--- a/lib/DebugInfo/DIContext.cpp
+++ b/lib/DebugInfo/DIContext.cpp
@@ -13,15 +13,6 @@ using namespace llvm;
 
 DIContext::~DIContext() {}
 
-DIContext *DIContext::getDWARFContext(bool isLittleEndian,
-                                      StringRef infoSection,
-                                      StringRef abbrevSection,
-                                      StringRef aRangeSection,
-                                      StringRef lineSection,
-                                      StringRef stringSection,
-                                      StringRef rangeSection,
-                                      const RelocAddrMap &Map) {
-  return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection,
-                                  aRangeSection, lineSection, stringSection,
-                                  rangeSection, Map);
+DIContext *DIContext::getDWARFContext(object::ObjectFile *Obj) {
+  return new DWARFContextInMemory(Obj);
 }
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
index 0df692c3a3b7..2de62ab9380d 100644
--- a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp
@@ -23,7 +23,7 @@ bool
 DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr,
                                       uint32_t code) {
   Code = code;
-  Attributes.clear();
+  Attribute.clear();
   if (Code) {
     Tag = data.getULEB128(offset_ptr);
     HasChildren = data.getU8(offset_ptr);
@@ -33,7 +33,7 @@ DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr,
       uint16_t form = data.getULEB128(offset_ptr);
 
       if (attr && form)
-        Attributes.push_back(DWARFAttribute(attr, form));
+        Attribute.push_back(DWARFAttribute(attr, form));
       else
         break;
     }
@@ -55,19 +55,19 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
   else
     OS << format("DW_TAG_Unknown_%x", getTag());
   OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n';
-  for (unsigned i = 0, e = Attributes.size(); i != e; ++i) {
+  for (unsigned i = 0, e = Attribute.size(); i != e; ++i) {
     OS << '\t';
-    const char *attrString = AttributeString(Attributes[i].getAttribute());
+    const char *attrString = AttributeString(Attribute[i].getAttribute());
     if (attrString)
       OS << attrString;
     else
-      OS << format("DW_AT_Unknown_%x", Attributes[i].getAttribute());
+      OS << format("DW_AT_Unknown_%x", Attribute[i].getAttribute());
     OS << '\t';
-    const char *formString = FormEncodingString(Attributes[i].getForm());
+    const char *formString = FormEncodingString(Attribute[i].getForm());
     if (formString)
       OS << formString;
     else
-      OS << format("DW_FORM_Unknown_%x", Attributes[i].getForm());
+      OS << format("DW_FORM_Unknown_%x", Attribute[i].getForm());
     OS << '\n';
   }
   OS << '\n';
@@ -75,8 +75,8 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
 
 uint32_t
 DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const {
-  for (uint32_t i = 0, e = Attributes.size(); i != e; ++i) {
-    if (Attributes[i].getAttribute() == attr)
+  for (uint32_t i = 0, e = Attribute.size(); i != e; ++i) {
+    if (Attribute[i].getAttribute() == attr)
       return i;
   }
   return -1U;
diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
index 2463a3cc0494..9a3fcd8a783c 100644
--- a/lib/DebugInfo/DWARFAbbreviationDeclaration.h
+++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.h
@@ -22,7 +22,7 @@ class DWARFAbbreviationDeclaration {
   uint32_t Code;
   uint32_t Tag;
   bool HasChildren;
-  SmallVector<DWARFAttribute, 8> Attributes;
+  SmallVector<DWARFAttribute, 8> Attribute;
 public:
   enum { InvalidCode = 0 };
   DWARFAbbreviationDeclaration()
@@ -31,12 +31,12 @@ public:
   uint32_t getCode() const { return Code; }
   uint32_t getTag() const { return Tag; }
   bool hasChildren() const { return HasChildren; }
-  uint32_t getNumAttributes() const { return Attributes.size(); }
+  uint32_t getNumAttributes() const { return Attribute.size(); }
   uint16_t getAttrByIndex(uint32_t idx) const {
-    return Attributes.size() > idx ? Attributes[idx].getAttribute() : 0;
+    return Attribute.size() > idx ? Attribute[idx].getAttribute() : 0;
   }
   uint16_t getFormByIndex(uint32_t idx) const {
-    return Attributes.size() > idx ? Attributes[idx].getForm() : 0;
+    return Attribute.size() > idx ? Attribute[idx].getForm() : 0;
   }
 
   uint32_t findAttributeIndex(uint16_t attr) const;
@@ -45,7 +45,7 @@ public:
   bool isValid() const { return Code != 0 && Tag != 0; }
   void dump(raw_ostream &OS) const;
   const SmallVectorImpl<DWARFAttribute> &getAttributes() const {
-    return Attributes;
+    return Attribute;
   }
 };
 
diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp
index bdd65b77e4b6..e3e4ccd7d9e1 100644
--- a/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -17,8 +17,7 @@ using namespace llvm;
 using namespace dwarf;
 
 DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const {
-  return DataExtractor(Context.getInfoSection(),
-                       Context.isLittleEndian(), getAddressByteSize());
+  return DataExtractor(InfoSection, isLittleEndian, AddrSize);
 }
 
 bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
@@ -28,7 +27,6 @@ bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
 
   if (debug_info.isValidOffset(*offset_ptr)) {
     uint64_t abbrOffset;
-    const DWARFDebugAbbrev *abbr = Context.getDebugAbbrev();
     Length = debug_info.getU32(offset_ptr);
     Version = debug_info.getU16(offset_ptr);
     abbrOffset = debug_info.getU32(offset_ptr);
@@ -36,11 +34,11 @@ bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
 
     bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1);
     bool versionOK = DWARFContext::isSupportedVersion(Version);
-    bool abbrOffsetOK = Context.getAbbrevSection().size() > abbrOffset;
+    bool abbrOffsetOK = AbbrevSection.size() > abbrOffset;
     bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
 
-    if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && abbr != NULL) {
-      Abbrevs = abbr->getAbbreviationDeclarationSet(abbrOffset);
+    if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && Abbrev != NULL) {
+      Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset);
       return true;
     }
 
@@ -79,8 +77,7 @@ bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset,
                                         DWARFDebugRangeList &RangeList) const {
   // Require that compile unit is extracted.
   assert(DieArray.size() > 0);
-  DataExtractor RangesData(Context.getRangeSection(),
-                           Context.isLittleEndian(), AddrSize);
+  DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize);
   return RangeList.extract(RangesData, &RangeListOffset);
 }
 
@@ -211,7 +208,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
   // should always terminate at or before the start of the next compilation
   // unit header).
   if (offset > next_cu_offset)
-    fprintf(stderr, "warning: DWARF compile unit extends beyond its"
+    fprintf(stderr, "warning: DWARF compile unit extends beyond its "
                     "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset);
 
   setDIERelations();
diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h
index 03e28620d4b3..2a74605fcb2d 100644
--- a/lib/DebugInfo/DWARFCompileUnit.h
+++ b/lib/DebugInfo/DWARFCompileUnit.h
@@ -13,15 +13,25 @@
 #include "DWARFDebugAbbrev.h"
 #include "DWARFDebugInfoEntry.h"
 #include "DWARFDebugRangeList.h"
+#include "DWARFRelocMap.h"
 #include <vector>
 
 namespace llvm {
 
-class DWARFContext;
+class DWARFDebugAbbrev;
+class StringRef;
 class raw_ostream;
 
 class DWARFCompileUnit {
-  DWARFContext &Context;
+  const DWARFDebugAbbrev *Abbrev;
+  StringRef InfoSection;
+  StringRef AbbrevSection;
+  StringRef RangeSection;
+  StringRef StringSection;
+  StringRef StringOffsetSection;
+  StringRef AddrOffsetSection;
+  const RelocAddrMap *RelocMap;
+  bool isLittleEndian;
 
   uint32_t Offset;
   uint32_t Length;
@@ -32,11 +42,20 @@ class DWARFCompileUnit {
   // The compile unit debug information entry item.
   std::vector<DWARFDebugInfoEntryMinimal> DieArray;
 public:
-  DWARFCompileUnit(DWARFContext &context) : Context(context) {
+
+  DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
+                   StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+                   const RelocAddrMap *M, bool LE) :
+    Abbrev(DA), InfoSection(IS), AbbrevSection(AS),
+    RangeSection(RS), StringSection(SS), StringOffsetSection(SOS),
+    AddrOffsetSection(AOS), RelocMap(M), isLittleEndian(LE) {
     clear();
   }
 
-  DWARFContext &getContext() const { return Context; }
+  StringRef getStringSection() const { return StringSection; }
+  StringRef getStringOffsetSection() const { return StringOffsetSection; }
+  StringRef getAddrOffsetSection() const { return AddrOffsetSection; }
+  const RelocAddrMap *getRelocMap() const { return RelocMap; }
   DataExtractor getDebugInfoExtractor() const;
 
   bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index afd614cc356e..9e19310a99c0 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -19,57 +19,124 @@ using namespace dwarf;
 
 typedef DWARFDebugLine::LineTable DWARFLineTable;
 
-void DWARFContext::dump(raw_ostream &OS) {
-  OS << ".debug_abbrev contents:\n";
-  getDebugAbbrev()->dump(OS);
+void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
+  if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) {
+    OS << ".debug_abbrev contents:\n";
+    getDebugAbbrev()->dump(OS);
+  }
 
-  OS << "\n.debug_info contents:\n";
-  for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i)
-    getCompileUnitAtIndex(i)->dump(OS);
+  if (DumpType == DIDT_All || DumpType == DIDT_Info) {
+    OS << "\n.debug_info contents:\n";
+    for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i)
+      getCompileUnitAtIndex(i)->dump(OS);
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_Frames) {
+    OS << "\n.debug_frame contents:\n";
+    getDebugFrame()->dump(OS);
+  }
 
-  OS << "\n.debug_aranges contents:\n";
-  DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
   uint32_t offset = 0;
-  DWARFDebugArangeSet set;
-  while (set.extract(arangesData, &offset))
-    set.dump(OS);
+  if (DumpType == DIDT_All || DumpType == DIDT_Aranges) {
+    OS << "\n.debug_aranges contents:\n";
+    DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0);
+    DWARFDebugArangeSet set;
+    while (set.extract(arangesData, &offset))
+      set.dump(OS);
+  }
 
   uint8_t savedAddressByteSize = 0;
-  OS << "\n.debug_lines contents:\n";
-  for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) {
-    DWARFCompileUnit *cu = getCompileUnitAtIndex(i);
-    savedAddressByteSize = cu->getAddressByteSize();
-    unsigned stmtOffset =
-      cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
-                                                           -1U);
-    if (stmtOffset != -1U) {
-      DataExtractor lineData(getLineSection(), isLittleEndian(),
+  if (DumpType == DIDT_All || DumpType == DIDT_Line) {
+    OS << "\n.debug_line contents:\n";
+    for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) {
+      DWARFCompileUnit *cu = getCompileUnitAtIndex(i);
+      savedAddressByteSize = cu->getAddressByteSize();
+      unsigned stmtOffset =
+        cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
+                                                             -1U);
+      if (stmtOffset != -1U) {
+        DataExtractor lineData(getLineSection(), isLittleEndian(),
+                               savedAddressByteSize);
+        DWARFDebugLine::DumpingState state(OS);
+        DWARFDebugLine::parseStatementTable(lineData, &lineRelocMap(), &stmtOffset, state);
+      }
+    }
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_Str) {
+    OS << "\n.debug_str contents:\n";
+    DataExtractor strData(getStringSection(), isLittleEndian(), 0);
+    offset = 0;
+    uint32_t strOffset = 0;
+    while (const char *s = strData.getCStr(&offset)) {
+      OS << format("0x%8.8x: \"%s\"\n", strOffset, s);
+      strOffset = offset;
+    }
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_Ranges) {
+    OS << "\n.debug_ranges contents:\n";
+    // In fact, different compile units may have different address byte
+    // sizes, but for simplicity we just use the address byte size of the last
+    // compile unit (there is no easy and fast way to associate address range
+    // list and the compile unit it describes).
+    DataExtractor rangesData(getRangeSection(), isLittleEndian(),
                              savedAddressByteSize);
-      DWARFDebugLine::DumpingState state(OS);
-      DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state);
+    offset = 0;
+    DWARFDebugRangeList rangeList;
+    while (rangeList.extract(rangesData, &offset))
+      rangeList.dump(OS);
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) {
+    OS << "\n.debug_pubnames contents:\n";
+    DataExtractor pubNames(getPubNamesSection(), isLittleEndian(), 0);
+    offset = 0;
+    OS << "Length:                " << pubNames.getU32(&offset) << "\n";
+    OS << "Version:               " << pubNames.getU16(&offset) << "\n";
+    OS << "Offset in .debug_info: " << pubNames.getU32(&offset) << "\n";
+    OS << "Size:                  " << pubNames.getU32(&offset) << "\n";
+    OS << "\n  Offset    Name\n";
+    while (offset < getPubNamesSection().size()) {
+      uint32_t n = pubNames.getU32(&offset);
+      if (n == 0)
+        break;
+      OS << format("%8x    ", n);
+      OS << pubNames.getCStr(&offset) << "\n";
     }
   }
 
-  OS << "\n.debug_str contents:\n";
-  DataExtractor strData(getStringSection(), isLittleEndian(), 0);
-  offset = 0;
-  uint32_t lastOffset = 0;
-  while (const char *s = strData.getCStr(&offset)) {
-    OS << format("0x%8.8x: \"%s\"\n", lastOffset, s);
-    lastOffset = offset;
+  if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) {
+    OS << "\n.debug_abbrev.dwo contents:\n";
+    getDebugAbbrevDWO()->dump(OS);
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) {
+    OS << "\n.debug_info.dwo contents:\n";
+    for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
+      getDWOCompileUnitAtIndex(i)->dump(OS);
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) {
+    OS << "\n.debug_str.dwo contents:\n";
+    DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
+    offset = 0;
+    uint32_t strDWOOffset = 0;
+    while (const char *s = strDWOData.getCStr(&offset)) {
+      OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
+      strDWOOffset = offset;
+    }
   }
 
-  OS << "\n.debug_ranges contents:\n";
-  // In fact, different compile units may have different address byte
-  // sizes, but for simplicity we just use the address byte size of the last
-  // compile unit (there is no easy and fast way to associate address range
-  // list and the compile unit it describes).
-  DataExtractor rangesData(getRangeSection(), isLittleEndian(),
-                           savedAddressByteSize);
-  offset = 0;
-  DWARFDebugRangeList rangeList;
-  while (rangeList.extract(rangesData, &offset))
-    rangeList.dump(OS);
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
+    OS << "\n.debug_str_offsets.dwo contents:\n";
+    DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
+    offset = 0;
+    while (offset < getStringOffsetDWOSection().size()) {
+      OS << format("0x%8.8x: ", offset);
+      OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+    }
+  }
 }
 
 const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
@@ -83,6 +150,16 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
   return Abbrev.get();
 }
 
+const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() {
+  if (AbbrevDWO)
+    return AbbrevDWO.get();
+
+  DataExtractor abbrData(getAbbrevDWOSection(), isLittleEndian(), 0);
+  AbbrevDWO.reset(new DWARFDebugAbbrev());
+  AbbrevDWO->parse(abbrData);
+  return AbbrevDWO.get();
+}
+
 const DWARFDebugAranges *DWARFContext::getDebugAranges() {
   if (Aranges)
     return Aranges.get();
@@ -91,15 +168,37 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() {
 
   Aranges.reset(new DWARFDebugAranges());
   Aranges->extract(arangesData);
-  if (Aranges->isEmpty()) // No aranges in file, generate them from the DIEs.
-    Aranges->generate(this);
+  // Generate aranges from DIEs: even if .debug_aranges section is present,
+  // it may describe only a small subset of compilation units, so we need to
+  // manually build aranges for the rest of them.
+  Aranges->generate(this);
   return Aranges.get();
 }
 
+const DWARFDebugFrame *DWARFContext::getDebugFrame() {
+  if (DebugFrame)
+    return DebugFrame.get();
+
+  // There's a "bug" in the DWARFv3 standard with respect to the target address
+  // size within debug frame sections. While DWARF is supposed to be independent
+  // of its container, FDEs have fields with size being "target address size",
+  // which isn't specified in DWARF in general. It's only specified for CUs, but
+  // .eh_frame can appear without a .debug_info section. Follow the example of
+  // other tools (libdwarf) and extract this from the container (ObjectFile
+  // provides this information). This problem is fixed in DWARFv4
+  // See this dwarf-discuss discussion for more details:
+  // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html
+  DataExtractor debugFrameData(getDebugFrameSection(), isLittleEndian(),
+                               getAddressSize());
+  DebugFrame.reset(new DWARFDebugFrame());
+  DebugFrame->parse(debugFrameData);
+  return DebugFrame.get();
+}
+
 const DWARFLineTable *
 DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
   if (!Line)
-    Line.reset(new DWARFDebugLine());
+    Line.reset(new DWARFDebugLine(&lineRelocMap()));
 
   unsigned stmtOffset =
     cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list,
@@ -122,7 +221,12 @@ void DWARFContext::parseCompileUnits() {
   const DataExtractor &DIData = DataExtractor(getInfoSection(),
                                               isLittleEndian(), 0);
   while (DIData.isValidOffset(offset)) {
-    CUs.push_back(DWARFCompileUnit(*this));
+    CUs.push_back(DWARFCompileUnit(getDebugAbbrev(), getInfoSection(),
+                                   getAbbrevSection(), getRangeSection(),
+                                   getStringSection(), StringRef(),
+                                   getAddrSection(),
+                                   &infoRelocMap(),
+                                   isLittleEndian()));
     if (!CUs.back().extract(DIData, &offset)) {
       CUs.pop_back();
       break;
@@ -132,6 +236,28 @@ void DWARFContext::parseCompileUnits() {
   }
 }
 
+void DWARFContext::parseDWOCompileUnits() {
+  uint32_t offset = 0;
+  const DataExtractor &DIData = DataExtractor(getInfoDWOSection(),
+                                              isLittleEndian(), 0);
+  while (DIData.isValidOffset(offset)) {
+    DWOCUs.push_back(DWARFCompileUnit(getDebugAbbrevDWO(), getInfoDWOSection(),
+                                      getAbbrevDWOSection(),
+                                      getRangeDWOSection(),
+                                      getStringDWOSection(),
+                                      getStringOffsetDWOSection(),
+                                      getAddrSection(),
+                                      &infoDWORelocMap(),
+                                      isLittleEndian()));
+    if (!DWOCUs.back().extract(DIData, &offset)) {
+      DWOCUs.pop_back();
+      break;
+    }
+
+    offset = DWOCUs.back().getNextCompileUnitOffset();
+  }
+}
+
 namespace {
   struct OffsetComparator {
     bool operator()(const DWARFCompileUnit &LHS,
@@ -242,6 +368,64 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
                     Line, Column);
 }
 
+DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address,
+    uint64_t Size,
+    DILineInfoSpecifier Specifier) {
+  DILineInfoTable  Lines;
+  DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+  if (!CU)
+    return Lines;
+
+  std::string FunctionName = "<invalid>";
+  if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
+    // The address may correspond to instruction in some inlined function,
+    // so we have to build the chain of inlined functions and take the
+    // name of the topmost function in it.
+    const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain =
+        CU->getInlinedChainForAddress(Address);
+    if (InlinedChain.size() > 0) {
+      const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0];
+      if (const char *Name = TopFunctionDIE.getSubroutineName(CU))
+        FunctionName = Name;
+    }
+  }
+
+  StringRef  FuncNameRef = StringRef(FunctionName);
+
+  // If the Specifier says we don't need FileLineInfo, just
+  // return the top-most function at the starting address.
+  if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
+    Lines.push_back(std::make_pair(Address, 
+                                   DILineInfo(StringRef("<invalid>"), 
+                                              FuncNameRef, 0, 0)));
+    return Lines;
+  }
+
+  const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
+  const bool NeedsAbsoluteFilePath =
+      Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
+
+  // Get the index of row we're looking for in the line table.
+  std::vector<uint32_t> RowVector;
+  if (!LineTable->lookupAddressRange(Address, Size, RowVector))
+    return Lines;
+
+  uint32_t NumRows = RowVector.size();
+  for (uint32_t i = 0; i < NumRows; ++i) {
+    uint32_t RowIndex = RowVector[i];
+    // Take file number and line/column from the row.
+    const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
+    std::string FileName = "<invalid>";
+    getFileNameForCompileUnit(CU, LineTable, Row.File,
+                              NeedsAbsoluteFilePath, FileName);
+    Lines.push_back(std::make_pair(Row.Address, 
+                                   DILineInfo(StringRef(FileName),
+                                         FuncNameRef, Row.Line, Row.Column)));
+  }
+
+  return Lines;
+}
+
 DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
     DILineInfoSpecifier Specifier) {
   DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
@@ -298,4 +482,115 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
   return InliningInfo;
 }
 
+DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
+  IsLittleEndian(Obj->isLittleEndian()),
+  AddressSize(Obj->getBytesInAddress()) {
+  error_code ec;
+  for (object::section_iterator i = Obj->begin_sections(),
+         e = Obj->end_sections();
+       i != e; i.increment(ec)) {
+    StringRef name;
+    i->getName(name);
+    StringRef data;
+    i->getContents(data);
+
+    name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
+    if (name == "debug_info")
+      InfoSection = data;
+    else if (name == "debug_abbrev")
+      AbbrevSection = data;
+    else if (name == "debug_line")
+      LineSection = data;
+    else if (name == "debug_aranges")
+      ARangeSection = data;
+    else if (name == "debug_frame")
+      DebugFrameSection = data;
+    else if (name == "debug_str")
+      StringSection = data;
+    else if (name == "debug_ranges") {
+      // FIXME: Use the other dwo range section when we emit it.
+      RangeDWOSection = data;
+      RangeSection = data;
+    }
+    else if (name == "debug_pubnames")
+      PubNamesSection = data;
+    else if (name == "debug_info.dwo")
+      InfoDWOSection = data;
+    else if (name == "debug_abbrev.dwo")
+      AbbrevDWOSection = data;
+    else if (name == "debug_str.dwo")
+      StringDWOSection = data;
+    else if (name == "debug_str_offsets.dwo")
+      StringOffsetDWOSection = data;
+    else if (name == "debug_addr")
+      AddrSection = data;
+    // Any more debug info sections go here.
+    else
+      continue;
+
+    // TODO: Add support for relocations in other sections as needed.
+    // Record relocations for the debug_info and debug_line sections.
+    RelocAddrMap *Map;
+    if (name == "debug_info")
+      Map = &InfoRelocMap;
+    else if (name == "debug_info.dwo")
+      Map = &InfoDWORelocMap;
+    else if (name == "debug_line")
+      Map = &LineRelocMap;
+    else
+      continue;
+
+    if (i->begin_relocations() != i->end_relocations()) {
+      uint64_t SectionSize;
+      i->getSize(SectionSize);
+      for (object::relocation_iterator reloc_i = i->begin_relocations(),
+             reloc_e = i->end_relocations();
+           reloc_i != reloc_e; reloc_i.increment(ec)) {
+        uint64_t Address;
+        reloc_i->getAddress(Address);
+        uint64_t Type;
+        reloc_i->getType(Type);
+        uint64_t SymAddr = 0;
+        // ELF relocations may need the symbol address
+        if (Obj->isELF()) {
+          object::SymbolRef Sym;
+          reloc_i->getSymbol(Sym);
+          Sym.getAddress(SymAddr);
+        }
+
+        object::RelocVisitor V(Obj->getFileFormatName());
+        // The section address is always 0 for debug sections.
+        object::RelocToApply R(V.visit(Type, *reloc_i, 0, SymAddr));
+        if (V.error()) {
+          SmallString<32> Name;
+          error_code ec(reloc_i->getTypeName(Name));
+          if (ec) {
+            errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
+          }
+          errs() << "error: failed to compute relocation: "
+                 << Name << "\n";
+          continue;
+        }
+
+        if (Address + R.Width > SectionSize) {
+          errs() << "error: " << R.Width << "-byte relocation starting "
+                 << Address << " bytes into section " << name << " which is "
+                 << SectionSize << " bytes long.\n";
+          continue;
+        }
+        if (R.Width > 8) {
+          errs() << "error: can't handle a relocation of more than 8 bytes at "
+                    "a time.\n";
+          continue;
+        }
+        DEBUG(dbgs() << "Writing " << format("%p", R.Value)
+                     << " at " << format("%p", Address)
+                     << " with width " << format("%d", R.Width)
+                     << "\n");
+        Map->insert(std::make_pair(Address, std::make_pair(R.Width, R.Value)));
+      }
+    }
+  }
+}
+
 void DWARFContextInMemory::anchor() { }
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 4001792b3d5f..37b272993f37 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -12,11 +12,12 @@
 
 #include "DWARFCompileUnit.h"
 #include "DWARFDebugAranges.h"
+#include "DWARFDebugFrame.h"
 #include "DWARFDebugLine.h"
 #include "DWARFDebugRangeList.h"
-#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DIContext.h"
 
 namespace llvm {
 
@@ -25,24 +26,28 @@ namespace llvm {
 /// information parsing. The actual data is supplied through pure virtual
 /// methods that a concrete implementation provides.
 class DWARFContext : public DIContext {
-  bool IsLittleEndian;
-  const RelocAddrMap &RelocMap;
-
   SmallVector<DWARFCompileUnit, 1> CUs;
   OwningPtr<DWARFDebugAbbrev> Abbrev;
   OwningPtr<DWARFDebugAranges> Aranges;
   OwningPtr<DWARFDebugLine> Line;
+  OwningPtr<DWARFDebugFrame> DebugFrame;
+
+  SmallVector<DWARFCompileUnit, 1> DWOCUs;
+  OwningPtr<DWARFDebugAbbrev> AbbrevDWO;
 
   DWARFContext(DWARFContext &) LLVM_DELETED_FUNCTION;
   DWARFContext &operator=(DWARFContext &) LLVM_DELETED_FUNCTION;
 
   /// Read compile units from the debug_info section and store them in CUs.
   void parseCompileUnits();
-protected:
-  DWARFContext(bool isLittleEndian, const RelocAddrMap &Map) :
-    IsLittleEndian(isLittleEndian), RelocMap(Map) {}
+
+  /// Read compile units from the debug_info.dwo section and store them in
+  /// DWOCUs.
+  void parseDWOCompileUnits();
+
 public:
-  virtual void dump(raw_ostream &OS);
+  DWARFContext() {}
+  virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All);
 
   /// Get the number of compile units in this context.
   unsigned getNumCompileUnits() {
@@ -50,6 +55,14 @@ public:
       parseCompileUnits();
     return CUs.size();
   }
+
+  /// Get the number of compile units in the DWO context.
+  unsigned getNumDWOCompileUnits() {
+    if (DWOCUs.empty())
+      parseDWOCompileUnits();
+    return DWOCUs.size();
+  }
+
   /// Get the compile unit at the specified index for this compile unit.
   DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) {
     if (CUs.empty())
@@ -57,30 +70,57 @@ public:
     return &CUs[index];
   }
 
+  /// Get the compile unit at the specified index for the DWO compile units.
+  DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) {
+    if (DWOCUs.empty())
+      parseDWOCompileUnits();
+    return &DWOCUs[index];
+  }
+
   /// Get a pointer to the parsed DebugAbbrev object.
   const DWARFDebugAbbrev *getDebugAbbrev();
 
+  /// Get a pointer to the parsed dwo abbreviations object.
+  const DWARFDebugAbbrev *getDebugAbbrevDWO();
+
   /// Get a pointer to the parsed DebugAranges object.
   const DWARFDebugAranges *getDebugAranges();
 
+  /// Get a pointer to the parsed frame information object.
+  const DWARFDebugFrame *getDebugFrame();
+
   /// Get a pointer to a parsed line table corresponding to a compile unit.
   const DWARFDebugLine::LineTable *
   getLineTableForCompileUnit(DWARFCompileUnit *cu);
 
   virtual DILineInfo getLineInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier());
+  virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address,
+      uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier());
   virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier());
 
-  bool isLittleEndian() const { return IsLittleEndian; }
-  const RelocAddrMap &relocMap() const { return RelocMap; }
-
+  virtual bool isLittleEndian() const = 0;
+  virtual uint8_t getAddressSize() const = 0;
+  virtual const RelocAddrMap &infoRelocMap() const = 0;
+  virtual const RelocAddrMap &lineRelocMap() const = 0;
   virtual StringRef getInfoSection() = 0;
   virtual StringRef getAbbrevSection() = 0;
   virtual StringRef getARangeSection() = 0;
+  virtual StringRef getDebugFrameSection() = 0;
   virtual StringRef getLineSection() = 0;
   virtual StringRef getStringSection() = 0;
   virtual StringRef getRangeSection() = 0;
+  virtual StringRef getPubNamesSection() = 0;
+
+  // Sections for DWARF5 split dwarf proposal.
+  virtual StringRef getInfoDWOSection() = 0;
+  virtual StringRef getAbbrevDWOSection() = 0;
+  virtual StringRef getStringDWOSection() = 0;
+  virtual StringRef getStringOffsetDWOSection() = 0;
+  virtual StringRef getRangeDWOSection() = 0;
+  virtual StringRef getAddrSection() = 0;
+  virtual const RelocAddrMap &infoDWORelocMap() const = 0;
 
   static bool isSupportedVersion(unsigned version) {
     return version == 2 || version == 3;
@@ -99,36 +139,57 @@ private:
 /// pointers to it.
 class DWARFContextInMemory : public DWARFContext {
   virtual void anchor();
+  bool IsLittleEndian;
+  uint8_t AddressSize;
+  RelocAddrMap InfoRelocMap;
+  RelocAddrMap LineRelocMap;
   StringRef InfoSection;
   StringRef AbbrevSection;
   StringRef ARangeSection;
+  StringRef DebugFrameSection;
   StringRef LineSection;
   StringRef StringSection;
   StringRef RangeSection;
-public:
-  DWARFContextInMemory(bool isLittleEndian,
-                       StringRef infoSection,
-                       StringRef abbrevSection,
-                       StringRef aRangeSection,
-                       StringRef lineSection,
-                       StringRef stringSection,
-                       StringRef rangeSection,
-                       const RelocAddrMap &Map = RelocAddrMap())
-    : DWARFContext(isLittleEndian, Map),
-      InfoSection(infoSection),
-      AbbrevSection(abbrevSection),
-      ARangeSection(aRangeSection),
-      LineSection(lineSection),
-      StringSection(stringSection),
-      RangeSection(rangeSection)
-    {}
+  StringRef PubNamesSection;
 
+  // Sections for DWARF5 split dwarf proposal.
+  RelocAddrMap InfoDWORelocMap;
+  StringRef InfoDWOSection;
+  StringRef AbbrevDWOSection;
+  StringRef StringDWOSection;
+  StringRef StringOffsetDWOSection;
+  StringRef RangeDWOSection;
+  StringRef AddrSection;
+
+public:
+  DWARFContextInMemory(object::ObjectFile *);
+  virtual bool isLittleEndian() const { return IsLittleEndian; }
+  virtual uint8_t getAddressSize() const { return AddressSize; }
+  virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; }
+  virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; }
   virtual StringRef getInfoSection() { return InfoSection; }
   virtual StringRef getAbbrevSection() { return AbbrevSection; }
   virtual StringRef getARangeSection() { return ARangeSection; }
+  virtual StringRef getDebugFrameSection() { return DebugFrameSection; }
   virtual StringRef getLineSection() { return LineSection; }
   virtual StringRef getStringSection() { return StringSection; }
   virtual StringRef getRangeSection() { return RangeSection; }
+  virtual StringRef getPubNamesSection() { return PubNamesSection; }
+
+  // Sections for DWARF5 split dwarf proposal.
+  virtual StringRef getInfoDWOSection() { return InfoDWOSection; }
+  virtual StringRef getAbbrevDWOSection() { return AbbrevDWOSection; }
+  virtual StringRef getStringDWOSection() { return StringDWOSection; }
+  virtual StringRef getStringOffsetDWOSection() {
+    return StringOffsetDWOSection;
+  }
+  virtual StringRef getRangeDWOSection() { return RangeDWOSection; }
+  virtual StringRef getAddrSection() {
+    return AddrSection;
+  }
+  virtual const RelocAddrMap &infoDWORelocMap() const {
+    return InfoDWORelocMap;
+  }
 };
 
 }
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index 2efbfd1f92fb..7dff9ff49a62 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
 
 void DWARFDebugArangeSet::clear() {
   Offset = -1U;
-  std::memset(&Header, 0, sizeof(Header));
+  std::memset(&HeaderData, 0, sizeof(Header));
   ArangeDescriptors.clear();
 }
 
@@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
     // descriptor on the target system. This header is followed by a series
     // of tuples. Each tuple consists of an address and a length, each in
     // the size appropriate for an address on the target architecture.
-    Header.Length = data.getU32(offset_ptr);
-    Header.Version = data.getU16(offset_ptr);
-    Header.CuOffset = data.getU32(offset_ptr);
-    Header.AddrSize = data.getU8(offset_ptr);
-    Header.SegSize = data.getU8(offset_ptr);
+    HeaderData.Length = data.getU32(offset_ptr);
+    HeaderData.Version = data.getU16(offset_ptr);
+    HeaderData.CuOffset = data.getU32(offset_ptr);
+    HeaderData.AddrSize = data.getU8(offset_ptr);
+    HeaderData.SegSize = data.getU8(offset_ptr);
 
     // Perform basic validation of the header fields.
-    if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) ||
-        (Header.AddrSize != 4 && Header.AddrSize != 8)) {
+    if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) ||
+        (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) {
       clear();
       return false;
     }
@@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
     // size of an address). The header is padded, if necessary, to the
     // appropriate boundary.
     const uint32_t header_size = *offset_ptr - Offset;
-    const uint32_t tuple_size = Header.AddrSize * 2;
+    const uint32_t tuple_size = HeaderData.AddrSize * 2;
     uint32_t first_tuple_offset = 0;
     while (first_tuple_offset < header_size)
       first_tuple_offset += tuple_size;
@@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
     Descriptor arangeDescriptor;
 
     assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length));
-    assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize);
+    assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize);
 
     while (data.isValidOffset(*offset_ptr)) {
-      arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize);
-      arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize);
+      arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
+      arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize);
 
       // Each set of tuples is terminated by a 0 for the address and 0
       // for the length.
@@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) {
 
 void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
   OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ",
-               Header.Length, Header.Version)
+               HeaderData.Length, HeaderData.Version)
      << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n",
-               Header.CuOffset, Header.AddrSize, Header.SegSize);
+               HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize);
 
-  const uint32_t hex_width = Header.AddrSize * 2;
+  const uint32_t hex_width = HeaderData.AddrSize * 2;
   for (DescriptorConstIter pos = ArangeDescriptors.begin(),
        end = ArangeDescriptors.end(); pos != end; ++pos)
     OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
@@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const {
     std::find_if(ArangeDescriptors.begin(), end, // Range
                  DescriptorContainsAddress(address)); // Predicate
   if (pos != end)
-    return Header.CuOffset;
+    return HeaderData.CuOffset;
 
   return -1U;
 }
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index 9a2a6d0f0037..d76867615aa1 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -48,7 +48,7 @@ private:
   typedef DescriptorColl::const_iterator DescriptorConstIter;
 
   uint32_t Offset;
-  Header Header;
+  Header HeaderData;
   DescriptorColl ArangeDescriptors;
 
 public:
@@ -58,11 +58,11 @@ public:
   bool extract(DataExtractor data, uint32_t *offset_ptr);
   void dump(raw_ostream &OS) const;
 
-  uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; }
-  uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; }
+  uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
+  uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; }
   uint32_t findAddress(uint64_t address) const;
   uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
-  const struct Header &getHeader() const { return Header; }
+  const struct Header &getHeader() const { return HeaderData; }
   const Descriptor *getDescriptor(uint32_t i) const {
     if (i < ArangeDescriptors.size())
       return &ArangeDescriptors[i];
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index f9a34c908f1d..f79862d606f5 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -26,34 +26,40 @@ namespace {
   class CountArangeDescriptors {
   public:
     CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {}
-    void operator()(const DWARFDebugArangeSet &set) {
-      Count += set.getNumDescriptors();
+    void operator()(const DWARFDebugArangeSet &Set) {
+      Count += Set.getNumDescriptors();
     }
     uint32_t &Count;
   };
 
   class AddArangeDescriptors {
   public:
-    AddArangeDescriptors(DWARFDebugAranges::RangeColl &ranges)
-      : RangeCollection(ranges) {}
-    void operator()(const DWARFDebugArangeSet& set) {
-      const DWARFDebugArangeSet::Descriptor* arange_desc_ptr;
-      DWARFDebugAranges::Range range;
-      range.Offset = set.getCompileUnitDIEOffset();
-
-      for (uint32_t i=0; (arange_desc_ptr = set.getDescriptor(i)) != NULL; ++i){
-        range.LoPC = arange_desc_ptr->Address;
-        range.Length = arange_desc_ptr->Length;
+    AddArangeDescriptors(DWARFDebugAranges::RangeColl &Ranges,
+                         DWARFDebugAranges::ParsedCUOffsetColl &CUOffsets)
+      : RangeCollection(Ranges),
+        CUOffsetCollection(CUOffsets) {}
+    void operator()(const DWARFDebugArangeSet &Set) {
+      DWARFDebugAranges::Range Range;
+      Range.Offset = Set.getCompileUnitDIEOffset();
+      CUOffsetCollection.insert(Range.Offset);
+
+      for (uint32_t i = 0, n = Set.getNumDescriptors(); i < n; ++i) {
+        const DWARFDebugArangeSet::Descriptor *ArangeDescPtr =
+            Set.getDescriptor(i);
+        Range.LoPC = ArangeDescPtr->Address;
+        Range.Length = ArangeDescPtr->Length;
 
         // Insert each item in increasing address order so binary searching
         // can later be done!
-        DWARFDebugAranges::RangeColl::iterator insert_pos =
+        DWARFDebugAranges::RangeColl::iterator InsertPos =
           std::lower_bound(RangeCollection.begin(), RangeCollection.end(),
-                           range, RangeLessThan);
-        RangeCollection.insert(insert_pos, range);
+                           Range, RangeLessThan);
+        RangeCollection.insert(InsertPos, Range);
       }
+
     }
-    DWARFDebugAranges::RangeColl& RangeCollection;
+    DWARFDebugAranges::RangeColl &RangeCollection;
+    DWARFDebugAranges::ParsedCUOffsetColl &CUOffsetCollection;
   };
 }
 
@@ -75,7 +81,7 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
 
     if (count > 0) {
       Aranges.reserve(count);
-      AddArangeDescriptors range_adder(Aranges);
+      AddArangeDescriptors range_adder(Aranges, ParsedCUOffsets);
       std::for_each(sets.begin(), sets.end(), range_adder);
     }
   }
@@ -83,13 +89,14 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
 }
 
 bool DWARFDebugAranges::generate(DWARFContext *ctx) {
-  clear();
   if (ctx) {
     const uint32_t num_compile_units = ctx->getNumCompileUnits();
     for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) {
-      DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx);
-      if (cu)
-        cu->buildAddressRangeTable(this, true);
+      if (DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx)) {
+        uint32_t CUOffset = cu->getOffset();
+        if (ParsedCUOffsets.insert(CUOffset).second)
+          cu->buildAddressRangeTable(this, true);
+      }
     }
   }
   sort(true, /* overlap size */ 0);
@@ -179,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const {
     Range range(address);
     RangeCollIterator begin = Aranges.begin();
     RangeCollIterator end = Aranges.end();
-    RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan);
+    RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan);
 
     if (pos != end && pos->LoPC <= address && address < pos->HiPC()) {
       return pos->Offset;
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
index 12afb60beb40..1509ffad41f1 100644
--- a/lib/DebugInfo/DWARFDebugAranges.h
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -11,6 +11,7 @@
 #define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
 
 #include "DWARFDebugArangeSet.h"
+#include "llvm/ADT/DenseSet.h"
 #include <list>
 
 namespace llvm {
@@ -60,7 +61,10 @@ public:
     uint32_t Offset; // Offset of the compile unit or die
   };
 
-  void clear() { Aranges.clear(); }
+  void clear() {
+    Aranges.clear();
+    ParsedCUOffsets.clear();
+  }
   bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const;
   bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const;
   bool extract(DataExtractor debug_aranges_data);
@@ -88,9 +92,11 @@ public:
 
   typedef std::vector<Range>              RangeColl;
   typedef RangeColl::const_iterator       RangeCollIterator;
+  typedef DenseSet<uint32_t>              ParsedCUOffsetColl;
 
 private:
   RangeColl Aranges;
+  ParsedCUOffsetColl ParsedCUOffsets;
 };
 
 }
diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp
new file mode 100644
index 000000000000..3efe6a1ebd30
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugFrame.cpp
@@ -0,0 +1,391 @@
+//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFDebugFrame.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace dwarf;
+
+
+/// \brief Abstract frame entry defining the common interface concrete
+/// entries implement.
+class llvm::FrameEntry {
+public:
+  enum FrameKind {FK_CIE, FK_FDE};
+  FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length)
+    : Kind(K), Data(D), Offset(Offset), Length(Length) {}
+
+  virtual ~FrameEntry() {
+  }
+
+  FrameKind getKind() const { return Kind; }
+  virtual uint64_t getOffset() const { return Offset; }
+
+  /// \brief Parse and store a sequence of CFI instructions from our data
+  /// stream, starting at *Offset and ending at EndOffset. If everything
+  /// goes well, *Offset should be equal to EndOffset when this method
+  /// returns. Otherwise, an error occurred.
+  virtual void parseInstructions(uint32_t *Offset, uint32_t EndOffset);
+
+  /// \brief Dump the entry header to the given output stream.
+  virtual void dumpHeader(raw_ostream &OS) const = 0;
+
+  /// \brief Dump the entry's instructions to the given output stream.
+  virtual void dumpInstructions(raw_ostream &OS) const;
+
+protected:
+  const FrameKind Kind;
+
+  /// \brief The data stream holding the section from which the entry was
+  /// parsed.
+  DataExtractor Data;
+
+  /// \brief Offset of this entry in the section.
+  uint64_t Offset;
+
+  /// \brief Entry length as specified in DWARF.
+  uint64_t Length;
+
+  /// An entry may contain CFI instructions. An instruction consists of an
+  /// opcode and an optional sequence of operands.
+  typedef std::vector<uint64_t> Operands;
+  struct Instruction {
+    Instruction(uint8_t Opcode)
+      : Opcode(Opcode)
+    {}
+
+    uint8_t Opcode;
+    Operands Ops;
+  };
+
+  std::vector<Instruction> Instructions;
+
+  /// Convenience methods to add a new instruction with the given opcode and
+  /// operands to the Instructions vector.
+  void addInstruction(uint8_t Opcode) {
+    Instructions.push_back(Instruction(Opcode));
+  }
+
+  void addInstruction(uint8_t Opcode, uint64_t Operand1) {
+    Instructions.push_back(Instruction(Opcode));
+    Instructions.back().Ops.push_back(Operand1);
+  }
+
+  void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) {
+    Instructions.push_back(Instruction(Opcode));
+    Instructions.back().Ops.push_back(Operand1);
+    Instructions.back().Ops.push_back(Operand2);
+  }
+};
+
+
+// See DWARF standard v3, section 7.23
+const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
+const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
+
+
+void FrameEntry::parseInstructions(uint32_t *Offset, uint32_t EndOffset) {
+  while (*Offset < EndOffset) {
+    uint8_t Opcode = Data.getU8(Offset);
+    // Some instructions have a primary opcode encoded in the top bits.
+    uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK;
+
+    if (Primary) {
+      // If it's a primary opcode, the first operand is encoded in the bottom
+      // bits of the opcode itself.
+      uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK;
+      switch (Primary) {
+        default: llvm_unreachable("Impossible primary CFI opcode");
+        case DW_CFA_advance_loc:
+        case DW_CFA_restore:
+          addInstruction(Primary, Op1);
+          break;
+        case DW_CFA_offset:
+          addInstruction(Primary, Op1, Data.getULEB128(Offset));
+          break;
+      }
+    } else {
+      // Extended opcode - its value is Opcode itself.
+      switch (Opcode) {
+        default: llvm_unreachable("Invalid extended CFI opcode");
+        case DW_CFA_nop:
+        case DW_CFA_remember_state:
+        case DW_CFA_restore_state:
+          // No operands
+          addInstruction(Opcode);
+          break;
+        case DW_CFA_set_loc:
+          // Operands: Address
+          addInstruction(Opcode, Data.getAddress(Offset));
+          break;
+        case DW_CFA_advance_loc1:
+          // Operands: 1-byte delta
+          addInstruction(Opcode, Data.getU8(Offset));
+          break;
+        case DW_CFA_advance_loc2:
+          // Operands: 2-byte delta
+          addInstruction(Opcode, Data.getU16(Offset));
+          break;
+        case DW_CFA_advance_loc4:
+          // Operands: 4-byte delta
+          addInstruction(Opcode, Data.getU32(Offset));
+          break;
+        case DW_CFA_restore_extended:
+        case DW_CFA_undefined:
+        case DW_CFA_same_value:
+        case DW_CFA_def_cfa_register:
+        case DW_CFA_def_cfa_offset:
+          // Operands: ULEB128
+          addInstruction(Opcode, Data.getULEB128(Offset));
+          break;
+        case DW_CFA_def_cfa_offset_sf:
+          // Operands: SLEB128
+          addInstruction(Opcode, Data.getSLEB128(Offset));
+          break;
+        case DW_CFA_offset_extended:
+        case DW_CFA_register:
+        case DW_CFA_def_cfa:
+        case DW_CFA_val_offset:
+          // Operands: ULEB128, ULEB128
+          addInstruction(Opcode, Data.getULEB128(Offset),
+                                 Data.getULEB128(Offset));
+          break;
+        case DW_CFA_offset_extended_sf:
+        case DW_CFA_def_cfa_sf:
+        case DW_CFA_val_offset_sf:
+          // Operands: ULEB128, SLEB128
+          addInstruction(Opcode, Data.getULEB128(Offset),
+                                 Data.getSLEB128(Offset));
+          break;
+        case DW_CFA_def_cfa_expression:
+        case DW_CFA_expression:
+        case DW_CFA_val_expression:
+          // TODO: implement this
+          report_fatal_error("Values with expressions not implemented yet!");
+      }
+    }
+  }
+}
+
+
+void FrameEntry::dumpInstructions(raw_ostream &OS) const {
+  // TODO: at the moment only instruction names are dumped. Expand this to
+  // dump operands as well.
+  for (std::vector<Instruction>::const_iterator I = Instructions.begin(),
+                                                E = Instructions.end();
+       I != E; ++I) {
+    uint8_t Opcode = I->Opcode;
+    if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
+      Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
+    OS << "  " << CallFrameString(Opcode) << ":\n";
+  }
+}
+
+
+namespace {
+/// \brief DWARF Common Information Entry (CIE)
+class CIE : public FrameEntry {
+public:
+  // CIEs (and FDEs) are simply container classes, so the only sensible way to
+  // create them is by providing the full parsed contents in the constructor.
+  CIE(DataExtractor D, uint64_t Offset, uint64_t Length, uint8_t Version,
+      SmallString<8> Augmentation, uint64_t CodeAlignmentFactor,
+      int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister)
+   : FrameEntry(FK_CIE, D, Offset, Length), Version(Version),
+     Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor),
+     DataAlignmentFactor(DataAlignmentFactor),
+     ReturnAddressRegister(ReturnAddressRegister) {}
+
+  ~CIE() {
+  }
+
+  void dumpHeader(raw_ostream &OS) const {
+    OS << format("%08x %08x %08x CIE",
+                 (uint32_t)Offset, (uint32_t)Length, DW_CIE_ID)
+       << "\n";
+    OS << format("  Version:               %d\n", Version);
+    OS << "  Augmentation:          \"" << Augmentation << "\"\n";
+    OS << format("  Code alignment factor: %u\n",
+                 (uint32_t)CodeAlignmentFactor);
+    OS << format("  Data alignment factor: %d\n",
+                 (int32_t)DataAlignmentFactor);
+    OS << format("  Return address column: %d\n",
+                 (int32_t)ReturnAddressRegister);
+    OS << "\n";
+  }
+
+  static bool classof(const FrameEntry *FE) {
+    return FE->getKind() == FK_CIE;
+  } 
+
+private:
+  /// The following fields are defined in section 6.4.1 of the DWARF standard v3
+  uint8_t Version;
+  SmallString<8> Augmentation;
+  uint64_t CodeAlignmentFactor;
+  int64_t DataAlignmentFactor;
+  uint64_t ReturnAddressRegister;
+};
+
+
+/// \brief DWARF Frame Description Entry (FDE)
+class FDE : public FrameEntry {
+public:
+  // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with
+  // an offset to the CIE (provided by parsing the FDE header). The CIE itself
+  // is obtained lazily once it's actually required.
+  FDE(DataExtractor D, uint64_t Offset, uint64_t Length,
+      int64_t LinkedCIEOffset, uint64_t InitialLocation, uint64_t AddressRange)
+   : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset),
+     InitialLocation(InitialLocation), AddressRange(AddressRange),
+     LinkedCIE(NULL) {}
+
+  ~FDE() {
+  }
+
+  void dumpHeader(raw_ostream &OS) const {
+    OS << format("%08x %08x %08x FDE ",
+                 (uint32_t)Offset, (uint32_t)Length, (int32_t)LinkedCIEOffset);
+    OS << format("cie=%08x pc=%08x...%08x\n",
+                 (int32_t)LinkedCIEOffset,
+                 (uint32_t)InitialLocation,
+                 (uint32_t)InitialLocation + (uint32_t)AddressRange);
+    if (LinkedCIE) {
+      OS << format("%p\n", LinkedCIE);
+    }
+  }
+
+  static bool classof(const FrameEntry *FE) {
+    return FE->getKind() == FK_FDE;
+  } 
+private:
+
+  /// The following fields are defined in section 6.4.1 of the DWARF standard v3
+  uint64_t LinkedCIEOffset;
+  uint64_t InitialLocation;
+  uint64_t AddressRange;
+  CIE *LinkedCIE;
+};
+} // end anonymous namespace
+
+
+DWARFDebugFrame::DWARFDebugFrame() {
+}
+
+
+DWARFDebugFrame::~DWARFDebugFrame() {
+  for (EntryVector::iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    delete *I;
+  }
+}
+
+
+static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
+                                              uint32_t Offset, int Length) {
+  errs() << "DUMP: ";
+  for (int i = 0; i < Length; ++i) {
+    uint8_t c = Data.getU8(&Offset);
+    errs().write_hex(c); errs() << " ";
+  }
+  errs() << "\n";
+}
+
+
+void DWARFDebugFrame::parse(DataExtractor Data) {
+  uint32_t Offset = 0;
+
+  while (Data.isValidOffset(Offset)) {
+    uint32_t StartOffset = Offset;
+
+    bool IsDWARF64 = false;
+    uint64_t Length = Data.getU32(&Offset);
+    uint64_t Id;
+
+    if (Length == UINT32_MAX) {
+      // DWARF-64 is distinguished by the first 32 bits of the initial length
+      // field being 0xffffffff. Then, the next 64 bits are the actual entry
+      // length.
+      IsDWARF64 = true;
+      Length = Data.getU64(&Offset);
+    }
+
+    // At this point, Offset points to the next field after Length.
+    // Length is the structure size excluding itself. Compute an offset one
+    // past the end of the structure (needed to know how many instructions to
+    // read).
+    // TODO: For honest DWARF64 support, DataExtractor will have to treat
+    //       offset_ptr as uint64_t*
+    uint32_t EndStructureOffset = Offset + static_cast<uint32_t>(Length);
+
+    // The Id field's size depends on the DWARF format
+    Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4);
+    bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID);
+
+    FrameEntry *Entry = 0;
+    if (IsCIE) {
+      // Note: this is specifically DWARFv3 CIE header structure. It was
+      // changed in DWARFv4. We currently don't support reading DWARFv4
+      // here because LLVM itself does not emit it (and LLDB doesn't
+      // support it either).
+      uint8_t Version = Data.getU8(&Offset);
+      const char *Augmentation = Data.getCStr(&Offset);
+      uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset);
+      int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
+      uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
+
+      Entry = new CIE(Data, StartOffset, Length, Version,
+                      StringRef(Augmentation), CodeAlignmentFactor,
+                      DataAlignmentFactor, ReturnAddressRegister);
+    } else {
+      // FDE
+      uint64_t CIEPointer = Id;
+      uint64_t InitialLocation = Data.getAddress(&Offset);
+      uint64_t AddressRange = Data.getAddress(&Offset);
+
+      Entry = new FDE(Data, StartOffset, Length, CIEPointer,
+                      InitialLocation, AddressRange);
+    }
+
+    assert(Entry && "Expected Entry to be populated with CIE or FDE");
+    Entry->parseInstructions(&Offset, EndStructureOffset);
+
+    if (Offset == EndStructureOffset) {
+      // Entry instrucitons parsed successfully.
+      Entries.push_back(Entry);
+    } else {
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << format("Parsing entry instructions at %lx failed",
+                   Entry->getOffset());
+      report_fatal_error(Str);
+    }
+  }
+}
+
+
+void DWARFDebugFrame::dump(raw_ostream &OS) const {
+  OS << "\n";
+  for (EntryVector::const_iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    FrameEntry *Entry = *I;
+    Entry->dumpHeader(OS);
+    Entry->dumpInstructions(OS);
+    OS << "\n";
+  }
+}
+
diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h
new file mode 100644
index 000000000000..48b8d63a5a64
--- /dev/null
+++ b/lib/DebugInfo/DWARFDebugFrame.h
@@ -0,0 +1,46 @@
+//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFDEBUGFRAME_H
+#define LLVM_DEBUGINFO_DWARFDEBUGFRAME_H
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+
+namespace llvm {
+
+class FrameEntry;
+
+
+/// \brief A parsed .debug_frame section
+///
+class DWARFDebugFrame {
+public:
+  DWARFDebugFrame();
+  ~DWARFDebugFrame();
+
+  /// \brief Dump the section data into the given stream.
+  void dump(raw_ostream &OS) const;
+
+  /// \brief Parse the section from raw data.
+  /// data is assumed to be pointing to the beginning of the section.
+  void parse(DataExtractor Data);
+
+private:
+  typedef std::vector<FrameEntry *> EntryVector;
+  EntryVector Entries;
+};
+
+
+} // namespace llvm
+
+#endif 
+
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index ab6746445388..02b15d69043f 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -12,6 +12,7 @@
 #include "DWARFContext.h"
 #include "DWARFDebugAbbrev.h"
 #include "DWARFFormValue.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -39,7 +40,7 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
         OS << format(" [%u] %c\n", abbrCode,
                      AbbrevDecl->hasChildren() ? '*' : ' ');
 
-        // Dump all data in the .debug_info for the attributes
+        // Dump all data in the DIE for the attributes.
         const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
         for (uint32_t i = 0; i != numAttributes; ++i) {
           uint16_t attr = AbbrevDecl->getAttrByIndex(i);
@@ -113,9 +114,14 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
     uint32_t i;
     uint16_t form;
     for (i=0; i<numAttributes; ++i) {
+
       form = AbbrevDecl->getFormByIndex(i);
 
-      const uint8_t fixed_skip_size = fixed_form_sizes[form];
+      // FIXME: Currently we're checking if this is less than the last
+      // entry in the fixed_form_sizes table, but this should be changed
+      // to use dynamic dispatch.
+      const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ?
+                                       fixed_form_sizes[form] : 0;
       if (fixed_skip_size)
         offset += fixed_skip_size;
       else {
@@ -187,6 +193,8 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
           case DW_FORM_sdata:
           case DW_FORM_udata:
           case DW_FORM_ref_udata:
+          case DW_FORM_GNU_str_index:
+          case DW_FORM_GNU_addr_index:
             debug_info_data.getULEB128(&offset);
             break;
 
@@ -195,11 +203,9 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
             form = debug_info_data.getULEB128(&offset);
             break;
 
+            // FIXME: 64-bit for DWARF64
           case DW_FORM_sec_offset:
-            if (cu->getAddressByteSize() == 4)
-              debug_info_data.getU32(offset_ptr);
-            else
-              debug_info_data.getU64(offset_ptr);
+            debug_info_data.getU32(offset_ptr);
             break;
 
           default:
@@ -207,7 +213,6 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
             return false;
           }
           offset += form_size;
-
         } while (form_is_indirect);
       }
     }
@@ -327,6 +332,8 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
               case DW_FORM_sdata:
               case DW_FORM_udata:
               case DW_FORM_ref_udata:
+              case DW_FORM_GNU_str_index:
+              case DW_FORM_GNU_addr_index:
                 debug_info_data.getULEB128(&offset);
                 break;
 
@@ -335,11 +342,9 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
                 form_is_indirect = true;
                 break;
 
+                // FIXME: 64-bit for DWARF64.
               case DW_FORM_sec_offset:
-                if (cu->getAddressByteSize() == 4)
-                  debug_info_data.getU32(offset_ptr);
-                else
-                  debug_info_data.getU64(offset_ptr);
+                debug_info_data.getU32(offset_ptr);
                 break;
 
               default:
@@ -417,8 +422,7 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
                                                      const {
   DWARFFormValue form_value;
   if (getAttributeValue(cu, attr, form_value)) {
-    DataExtractor stringExtractor(cu->getContext().getStringSection(),
-        false, 0);
+    DataExtractor stringExtractor(cu->getStringSection(), false, 0);
     return form_value.getAsCString(&stringExtractor);
   }
   return fail_value;
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index 267364adfaca..192381c6f7c6 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -155,7 +155,7 @@ DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data,
   if (pos.second) {
     // Parse and cache the line table for at this offset.
     State state;
-    if (!parseStatementTable(debug_line_data, &offset, state))
+    if (!parseStatementTable(debug_line_data, RelocMap, &offset, state))
       return 0;
     pos.first->second = state;
   }
@@ -219,7 +219,8 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
 }
 
 bool
-DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
+DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, 
+                                    const RelocAddrMap *RMap,
                                     uint32_t *offset_ptr, State &state) {
   const uint32_t debug_line_offset = *offset_ptr;
 
@@ -268,7 +269,15 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
         // relocatable address. All of the other statement program opcodes
         // that affect the address register add a delta to it. This instruction
         // stores a relocatable value into it instead.
-        state.Address = debug_line_data.getAddress(offset_ptr);
+        {
+          // If this address is in our relocation map, apply the relocation.
+          RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr);
+          if (AI != RMap->end()) {
+             const std::pair<uint8_t, int64_t> &R = AI->second;
+             state.Address = debug_line_data.getAddress(offset_ptr) + R.second;
+          } else
+            state.Address = debug_line_data.getAddress(offset_ptr);
+        }
         break;
 
       case DW_LNE_define_file:
@@ -516,6 +525,83 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
 }
 
 bool
+DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address,
+                                       uint64_t size, 
+                                       std::vector<uint32_t>& result) const {
+  if (Sequences.empty())
+    return false;
+  uint64_t end_addr = address + size;
+  // First, find an instruction sequence containing the given address.
+  DWARFDebugLine::Sequence sequence;
+  sequence.LowPC = address;
+  SequenceIter first_seq = Sequences.begin();
+  SequenceIter last_seq = Sequences.end();
+  SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
+      DWARFDebugLine::Sequence::orderByLowPC);
+  if (seq_pos == last_seq || seq_pos->LowPC != address) {
+    if (seq_pos == first_seq)
+      return false;
+    seq_pos--;
+  }
+  if (!seq_pos->containsPC(address))
+    return false;
+
+  SequenceIter start_pos = seq_pos;
+
+  // Add the rows from the first sequence to the vector, starting with the
+  // index we just calculated
+
+  while (seq_pos != last_seq && seq_pos->LowPC < end_addr) {
+    DWARFDebugLine::Sequence cur_seq = *seq_pos;
+    uint32_t first_row_index;
+    uint32_t last_row_index;
+    if (seq_pos == start_pos) {
+      // For the first sequence, we need to find which row in the sequence is the
+      // first in our range. Rows are stored in a vector, so we may use
+      // arithmetical operations with iterators.
+      DWARFDebugLine::Row row;
+      row.Address = address;
+      RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
+      RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
+      RowIter row_pos = std::upper_bound(first_row, last_row, row,
+                                         DWARFDebugLine::Row::orderByAddress);
+      // The 'row_pos' iterator references the first row that is greater than
+      // our start address. Unless that's the first row, we want to start at
+      // the row before that.
+      first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row);
+      if (row_pos != first_row)
+        --first_row_index;
+    } else
+      first_row_index = cur_seq.FirstRowIndex;
+
+    // For the last sequence in our range, we need to figure out the last row in
+    // range.  For all other sequences we can go to the end of the sequence.
+    if (cur_seq.HighPC > end_addr) {
+      DWARFDebugLine::Row row;
+      row.Address = end_addr;
+      RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
+      RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
+      RowIter row_pos = std::upper_bound(first_row, last_row, row,
+                                         DWARFDebugLine::Row::orderByAddress);
+      // The 'row_pos' iterator references the first row that is greater than
+      // our end address.  The row before that is the last row we want.
+      last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1;
+    } else
+      // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex
+      // isn't a valid index within the current sequence.  It's that plus one.
+      last_row_index = cur_seq.LastRowIndex - 1;
+
+    for (uint32_t i = first_row_index; i <= last_row_index; ++i) {
+      result.push_back(i);
+    }
+
+    ++seq_pos;
+  }
+
+  return true;
+}
+
+bool
 DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
                                               bool NeedsAbsoluteFilePath,
                                               std::string &Result) const {
diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h
index 586dd7e8784f..2990756bd7c9 100644
--- a/lib/DebugInfo/DWARFDebugLine.h
+++ b/lib/DebugInfo/DWARFDebugLine.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H
 #define LLVM_DEBUGINFO_DWARFDEBUGLINE_H
 
+#include "DWARFRelocMap.h"
 #include "llvm/Support/DataExtractor.h"
 #include <map>
 #include <string>
@@ -21,6 +22,7 @@ class raw_ostream;
 
 class DWARFDebugLine {
 public:
+  DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {}
   struct FileNameEntry {
     FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {}
 
@@ -176,6 +178,10 @@ public:
     // or -1 if there is no such row.
     uint32_t lookupAddress(uint64_t address) const;
 
+    bool lookupAddressRange(uint64_t address,
+                            uint64_t size, 
+                            std::vector<uint32_t>& result) const;
+
     // Extracts filename by its index in filename table in prologue.
     // Returns true on success.
     bool getFileNameByIndex(uint64_t FileIndex,
@@ -227,6 +233,7 @@ public:
                             Prologue *prologue);
   /// Parse a single line table (prologue and all rows).
   static bool parseStatementTable(DataExtractor debug_line_data,
+                                  const RelocAddrMap *RMap,
                                   uint32_t *offset_ptr, State &state);
 
   const LineTable *getLineTable(uint32_t offset) const;
@@ -238,6 +245,7 @@ private:
   typedef LineTableMapTy::iterator LineTableIter;
   typedef LineTableMapTy::const_iterator LineTableConstIter;
 
+  const RelocAddrMap *RelocMap;
   LineTableMapTy LineTableMap;
 };
 
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index fea9fd7f7d34..9f807aac5fd4 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -72,7 +72,7 @@ static const uint8_t form_sizes_addr8[] = {
   8, // 0x14 DW_FORM_ref8
   0, // 0x15 DW_FORM_ref_udata
   0, // 0x16 DW_FORM_indirect
-  8, // 0x17 DW_FORM_sec_offset
+  4, // 0x17 DW_FORM_sec_offset
   0, // 0x18 DW_FORM_exprloc
   0, // 0x19 DW_FORM_flag_present
   8, // 0x20 DW_FORM_ref_sig8
@@ -101,15 +101,15 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     case DW_FORM_addr:
     case DW_FORM_ref_addr: {
       RelocAddrMap::const_iterator AI
-        = cu->getContext().relocMap().find(*offset_ptr);
-      if (AI != cu->getContext().relocMap().end()) {
+        = cu->getRelocMap()->find(*offset_ptr);
+      if (AI != cu->getRelocMap()->end()) {
         const std::pair<uint8_t, int64_t> &R = AI->second;
-        Value.uval = R.second;
-        *offset_ptr += R.first;
+        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) +
+                     R.second;
       } else
         Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
-    }
       break;
+    }
     case DW_FORM_exprloc:
     case DW_FORM_block:
       Value.uval = data.getULEB128(offset_ptr);
@@ -149,11 +149,10 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       break;
     case DW_FORM_strp: {
       RelocAddrMap::const_iterator AI
-        = cu->getContext().relocMap().find(*offset_ptr);
-      if (AI != cu->getContext().relocMap().end()) {
+        = cu->getRelocMap()->find(*offset_ptr);
+      if (AI != cu->getRelocMap()->end()) {
         const std::pair<uint8_t, int64_t> &R = AI->second;
-        Value.uval = R.second;
-        *offset_ptr += R.first;
+        Value.uval = data.getU32(offset_ptr) + R.second;
       } else
         Value.uval = data.getU32(offset_ptr);
       break;
@@ -174,10 +173,8 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       indirect = true;
       break;
     case DW_FORM_sec_offset:
-      if (cu->getAddressByteSize() == 4)
-        Value.uval = data.getU32(offset_ptr);
-      else
-        Value.uval = data.getU64(offset_ptr);
+      // FIXME: This is 64-bit for DWARF64.
+      Value.uval = data.getU32(offset_ptr);
       break;
     case DW_FORM_flag_present:
       Value.uval = 1;
@@ -185,6 +182,12 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     case DW_FORM_ref_sig8:
       Value.uval = data.getU64(offset_ptr);
       break;
+    case DW_FORM_GNU_addr_index:
+      Value.uval = data.getULEB128(offset_ptr);
+      break;
+    case DW_FORM_GNU_str_index:
+      Value.uval = data.getULEB128(offset_ptr);
+      break;
     default:
       return false;
     }
@@ -253,7 +256,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
     // 0 byte values - implied from the form.
     case DW_FORM_flag_present:
       return true;
-      
+
     // 1 byte values
     case DW_FORM_data1:
     case DW_FORM_flag:
@@ -286,6 +289,8 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
     case DW_FORM_sdata:
     case DW_FORM_udata:
     case DW_FORM_ref_udata:
+    case DW_FORM_GNU_str_index:
+    case DW_FORM_GNU_addr_index:
       debug_info_data.getULEB128(offset_ptr);
       return true;
 
@@ -294,14 +299,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
       form = debug_info_data.getULEB128(offset_ptr);
       break;
 
-    // 4 for DWARF32, 8 for DWARF64.
+    // FIXME: 4 for DWARF32, 8 for DWARF64.
     case DW_FORM_sec_offset:
-      if (cu->getAddressByteSize() == 4)
-        *offset_ptr += 4;
-      else
-        *offset_ptr += 8;
+      *offset_ptr += 4;
       return true;
-      
+
     default:
       return false;
     }
@@ -311,12 +313,23 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
 
 void
 DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
-  DataExtractor debug_str_data(cu->getContext().getStringSection(), true, 0);
+  DataExtractor debug_str_data(cu->getStringSection(), true, 0);
+  DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0);
   uint64_t uvalue = getUnsigned();
   bool cu_relative_offset = false;
 
   switch (Form) {
   case DW_FORM_addr:      OS << format("0x%016" PRIx64, uvalue); break;
+  case DW_FORM_GNU_addr_index: {
+    StringRef AddrOffsetSec = cu->getAddrOffsetSection();
+    OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue);
+    if (AddrOffsetSec.size() != 0) {
+      DataExtractor DA(AddrOffsetSec, true, cu->getAddressByteSize());
+      OS << format("0x%016" PRIx64, getIndirectAddress(&DA, cu));
+    } else
+      OS << "<no .debug_addr section>";
+    break;
+  }
   case DW_FORM_flag_present: OS << "true"; break;
   case DW_FORM_flag:
   case DW_FORM_data1:     OS << format("0x%02x", (uint8_t)uvalue); break;
@@ -370,6 +383,17 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
     }
     break;
   }
+  case DW_FORM_GNU_str_index: {
+    OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue);
+    const char *dbg_str = getIndirectCString(&debug_str_data,
+                                             &debug_str_offset_data);
+    if (dbg_str) {
+      OS << '"';
+      OS.write_escaped(dbg_str);
+      OS << '"';
+    }
+    break;
+  }
   case DW_FORM_ref_addr:
     OS << format("0x%016" PRIx64, uvalue);
     break;
@@ -400,13 +424,11 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
     OS << "DW_FORM_indirect";
     break;
 
+    // Should be formatted to 64-bit for DWARF64.
   case DW_FORM_sec_offset:
-    if (cu->getAddressByteSize() == 4)
-      OS << format("0x%08x", (uint32_t)uvalue);
-    else
-      OS << format("0x%016" PRIx64, uvalue);
+    OS << format("0x%08x", (uint32_t)uvalue);
     break;
-    
+
   default:
     OS << format("DW_FORM(0x%4.4x)", Form);
     break;
@@ -427,6 +449,25 @@ DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const {
   return NULL;
 }
 
+const char*
+DWARFFormValue::getIndirectCString(const DataExtractor *DS,
+                                   const DataExtractor *DSO) const {
+  if (!DS || !DSO) return NULL;
+
+  uint32_t offset = Value.uval * 4;
+  uint32_t soffset = DSO->getU32(&offset);
+  return DS->getCStr(&soffset);
+}
+
+uint64_t
+DWARFFormValue::getIndirectAddress(const DataExtractor *DA,
+                                   const DWARFCompileUnit *cu) const {
+  if (!DA) return 0;
+
+  uint32_t offset = Value.uval * cu->getAddressByteSize();
+  return DA->getAddress(&offset);
+}
+
 uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const {
   uint64_t die_offset = Value.uval;
   switch (Form) {
diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h
index c5b590db95f5..b863001e4af8 100644
--- a/lib/DebugInfo/DWARFFormValue.h
+++ b/lib/DebugInfo/DWARFFormValue.h
@@ -64,6 +64,10 @@ public:
   uint64_t getUnsigned() const { return Value.uval; }
   int64_t getSigned() const { return Value.sval; }
   const char *getAsCString(const DataExtractor *debug_str_data_ptr) const;
+  const char *getIndirectCString(const DataExtractor *,
+                                 const DataExtractor *) const;
+  uint64_t getIndirectAddress(const DataExtractor *,
+                              const DWARFCompileUnit *) const;
   bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr,
                  const DWARFCompileUnit *cu) const;
   static bool skipValue(uint16_t form, DataExtractor debug_info_data,
diff --git a/lib/DebugInfo/DWARFRelocMap.h b/lib/DebugInfo/DWARFRelocMap.h
new file mode 100644
index 000000000000..6929e367b84c
--- /dev/null
+++ b/lib/DebugInfo/DWARFRelocMap.h
@@ -0,0 +1,22 @@
+//===-- DWARFRelocMap.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARFRELOCMAP_H
+#define LLVM_DEBUGINFO_DWARFRELOCMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+typedef DenseMap<uint64_t, std::pair<uint8_t, int64_t> > RelocAddrMap;
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H
+
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
index 911d1d68b23a..314db8bd84c2 100644
--- a/lib/ExecutionEngine/EventListenerCommon.h
+++ b/lib/ExecutionEngine/EventListenerCommon.h
@@ -14,11 +14,11 @@
 #ifndef EVENT_LISTENER_COMMON_H
 #define EVENT_LISTENER_COMMON_H
 
-#include "llvm/DebugInfo.h"
-#include "llvm/Metadata.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 05987f2b74e7..906a3a3fda7f 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -14,22 +14,22 @@
 
 #define DEBUG_TYPE "jit"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cmath>
 #include <cstring>
@@ -535,6 +535,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   if (isa<UndefValue>(C)) {
     GenericValue Result;
     switch (C->getType()->getTypeID()) {
+    default:
+      break;
     case Type::IntegerTyID:
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
@@ -543,7 +545,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       // with the correct bit width.
       Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
       break;
-    default:
+    case Type::VectorTyID:
+      // if the whole vector is 'undef' just reserve memory for the value.
+      const VectorType* VTy = dyn_cast<VectorType>(C->getType());
+      const Type *ElemTy = VTy->getElementType();
+      unsigned int elemNum = VTy->getNumElements();
+      Result.AggregateVal.resize(elemNum);
+      if (ElemTy->isIntegerTy())
+        for (unsigned int i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = 
+            APInt(ElemTy->getPrimitiveSizeInBits(), 0);
       break;
     }
     return Result;
@@ -556,11 +567,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     case Instruction::GetElementPtr: {
       // Compute the index
       GenericValue Result = getConstantValue(Op0);
-      SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
-      uint64_t Offset = TD->getIndexedOffset(Op0->getType(), Indices);
+      APInt Offset(TD->getPointerSizeInBits(), 0);
+      cast<GEPOperator>(CE)->accumulateConstantOffset(*TD, Offset);
 
       char* tmp = (char*) Result.PointerVal;
-      Result = PTOGV(tmp + Offset);
+      Result = PTOGV(tmp + Offset.getSExtValue());
       return Result;
     }
     case Instruction::Trunc: {
@@ -632,7 +643,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       else if (Op0->getType()->isDoubleTy())
         GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
       else if (Op0->getType()->isX86_FP80Ty()) {
-        APFloat apf = APFloat(GV.IntVal);
+        APFloat apf = APFloat(APFloat::x87DoubleExtended, GV.IntVal);
         uint64_t v;
         bool ignored;
         (void)apf.convertToInteger(&v, BitWidth,
@@ -751,27 +762,32 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       case Type::X86_FP80TyID:
       case Type::PPC_FP128TyID:
       case Type::FP128TyID: {
-        APFloat apfLHS = APFloat(LHS.IntVal);
+        const fltSemantics &Sem = CE->getOperand(0)->getType()->getFltSemantics();
+        APFloat apfLHS = APFloat(Sem, LHS.IntVal);
         switch (CE->getOpcode()) {
           default: llvm_unreachable("Invalid long double opcode");
           case Instruction::FAdd:
-            apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            apfLHS.add(APFloat(Sem, RHS.IntVal), APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           case Instruction::FSub:
-            apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            apfLHS.subtract(APFloat(Sem, RHS.IntVal),
+                            APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           case Instruction::FMul:
-            apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            apfLHS.multiply(APFloat(Sem, RHS.IntVal),
+                            APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           case Instruction::FDiv:
-            apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            apfLHS.divide(APFloat(Sem, RHS.IntVal),
+                          APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           case Instruction::FRem:
-            apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            apfLHS.mod(APFloat(Sem, RHS.IntVal),
+                       APFloat::rmNearestTiesToEven);
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           }
@@ -820,6 +836,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     else
       llvm_unreachable("Unknown constant pointer type!");
     break;
+  case Type::VectorTyID: {
+    unsigned elemNum;
+    Type* ElemTy;
+    const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    const ConstantVector *CV = dyn_cast<ConstantVector>(C);
+    const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C);
+
+    if (CDV) {
+        elemNum = CDV->getNumElements();
+        ElemTy = CDV->getElementType();
+    } else if (CV || CAZ) {
+        VectorType* VTy = dyn_cast<VectorType>(C->getType());
+        elemNum = VTy->getNumElements();
+        ElemTy = VTy->getElementType();
+    } else {
+        llvm_unreachable("Unknown constant vector type!");
+    }
+
+    Result.AggregateVal.resize(elemNum);
+    // Check if vector holds floats.
+    if(ElemTy->isFloatTy()) {
+      if (CAZ) {
+        GenericValue floatZero;
+        floatZero.FloatVal = 0.f;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  floatZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].FloatVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToFloat();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i);
+
+      break;
+    }
+    // Check if vector holds doubles.
+    if (ElemTy->isDoubleTy()) {
+      if (CAZ) {
+        GenericValue doubleZero;
+        doubleZero.DoubleVal = 0.0;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  doubleZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].DoubleVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToDouble();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i);
+
+      break;
+    }
+    // Check if vector holds integers.
+    if (ElemTy->isIntegerTy()) {
+      if (CAZ) {
+        GenericValue intZero;     
+        intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  intZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].IntVal = cast<ConstantInt>(
+                                            CV->getOperand(i))->getValue();
+          else {
+            Result.AggregateVal[i].IntVal =
+              APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0);
+          }
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = APInt(
+            CDV->getElementType()->getPrimitiveSizeInBits(),
+            CDV->getElementAsInteger(i));
+
+      break;
+    }
+    llvm_unreachable("Unknown constant pointer type!");
+  }
+  break;
+
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
@@ -861,6 +972,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
   const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
 
   switch (Ty->getTypeID()) {
+  default:
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+    break;
   case Type::IntegerTyID:
     StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
     break;
@@ -880,8 +994,19 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
 
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
-  default:
-    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+  case Type::VectorTyID:
+    for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) {
+      if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal;
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
+      if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
+        unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
+        StoreIntToMemory(Val.AggregateVal[i].IntVal, 
+          (uint8_t*)Ptr + numOfBytes*i, numOfBytes);
+      }
+    }
+    break;
   }
 
   if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
@@ -893,7 +1018,8 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
 static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
-  uint8_t *Dst = (uint8_t *)IntVal.getRawData();
+  uint8_t *Dst = reinterpret_cast<uint8_t *>(
+                   const_cast<uint64_t *>(IntVal.getRawData()));
 
   if (sys::isLittleEndianHost())
     // Little-endian host - the destination must be ordered from LSB to MSB.
@@ -945,6 +1071,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     Result.IntVal = APInt(80, y);
     break;
   }
+  case Type::VectorTyID: {
+    const VectorType *VT = cast<VectorType>(Ty);
+    const Type *ElemT = VT->getElementType();
+    const unsigned numElems = VT->getNumElements();
+    if (ElemT->isFloatTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].FloatVal = *((float*)Ptr+i);
+    }
+    if (ElemT->isDoubleTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i);
+    }
+    if (ElemT->isIntegerTy()) {
+      GenericValue intZero;
+      const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth();
+      intZero.IntVal = APInt(elemBitWidth, 0);
+      Result.AggregateVal.resize(numElems, intZero);
+      for (unsigned i = 0; i < numElems; ++i)
+        LoadIntFromMemory(Result.AggregateVal[i].IntVal,
+          (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8);
+    }
+  break;
+  }
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 1e790e781da0..f4e8246476a5 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "jit"
 #include "llvm-c/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 4cb0270d576d..7dc295fcbf73 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -17,11 +17,14 @@
 
 #define DEBUG_TYPE "amplifier-jit-event-listener"
 #include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Metadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Errno.h"
@@ -41,6 +44,11 @@ class IntelJITEventListener : public JITEventListener {
   MethodIDMap MethodIDs;
   FilenameCache Filenames;
 
+  typedef SmallVector<const void *, 64> MethodAddressVector;
+  typedef DenseMap<const void *, MethodAddressVector>  ObjectMap;
+
+  ObjectMap  LoadedObjectMap;
+
 public:
   IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
       Wrapper.reset(libraryWrapper);
@@ -72,6 +80,17 @@ static LineNumberInfo LineStartToIntelJITFormat(
   return Result;
 }
 
+static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress,
+                                                 uintptr_t Address,
+                                                 DILineInfo Line) {
+  LineNumberInfo Result;
+
+  Result.Offset = Address - StartAddress;
+  Result.LineNumber = Line.getLine();
+
+  return Result;
+}
+
 static iJIT_Method_Load FunctionDescToIntelJITFormat(
     IntelJITEventsWrapper& Wrapper,
     const char* FnName,
@@ -169,9 +188,101 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
 }
 
 void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
+  // Get the address of the object image for use as a unique identifier
+  const void* ObjData = Obj.getData().data();
+  DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile());
+  MethodAddressVector Functions;
+
+  // Use symbol info to iterate functions in the object.
+  error_code ec;
+  for (object::symbol_iterator I = Obj.begin_symbols(),
+                               E = Obj.end_symbols();
+                        I != E && !ec;
+                        I.increment(ec)) {
+    std::vector<LineNumberInfo> LineInfo;
+    std::string SourceFileName;
+
+    object::SymbolRef::Type SymType;
+    if (I->getType(SymType)) continue;
+    if (SymType == object::SymbolRef::ST_Function) {
+      StringRef  Name;
+      uint64_t   Addr;
+      uint64_t   Size;
+      if (I->getName(Name)) continue;
+      if (I->getAddress(Addr)) continue;
+      if (I->getSize(Size)) continue;
+
+      // Record this address in a local vector
+      Functions.push_back((void*)Addr);
+
+      // Build the function loaded notification message
+      iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
+                                           Name.data(),
+                                           Addr,
+                                           Size);
+      if (Context) {
+        DILineInfoTable  Lines = Context->getLineInfoForAddressRange(Addr, Size);
+        DILineInfoTable::iterator  Begin = Lines.begin();
+        DILineInfoTable::iterator  End = Lines.end();
+        for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+          LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr,
+                                                        It->first,
+                                                        It->second));
+        }
+        if (LineInfo.size() == 0) {
+          FunctionMessage.source_file_name = 0;
+          FunctionMessage.line_number_size = 0;
+          FunctionMessage.line_number_table = 0;
+        } else {
+          SourceFileName = Lines.front().second.getFileName();
+          FunctionMessage.source_file_name = (char *)SourceFileName.c_str();
+          FunctionMessage.line_number_size = LineInfo.size();
+          FunctionMessage.line_number_table = &*LineInfo.begin();
+        }
+      } else {
+        FunctionMessage.source_file_name = 0;
+        FunctionMessage.line_number_size = 0;
+        FunctionMessage.line_number_table = 0;
+      }
+
+      Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+                                &FunctionMessage);
+      MethodIDs[(void*)Addr] = FunctionMessage.method_id;
+    }
+  }
+
+  // To support object unload notification, we need to keep a list of
+  // registered function addresses for each loaded object.  We will
+  // use the MethodIDs map to get the registered ID for each function.
+  LoadedObjectMap[ObjData] = Functions;
 }
 
 void IntelJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) {
+  // Get the address of the object image for use as a unique identifier
+  const void* ObjData = Obj.getData().data();
+
+  // Get the object's function list from LoadedObjectMap
+  ObjectMap::iterator OI = LoadedObjectMap.find(ObjData);
+  if (OI == LoadedObjectMap.end())
+    return;
+  MethodAddressVector& Functions = OI->second;
+
+  // Walk the function list, unregistering each function
+  for (MethodAddressVector::iterator FI = Functions.begin(),
+                                     FE = Functions.end();
+       FI != FE;
+       ++FI) {
+    void* FnStart = const_cast<void*>(*FI);
+    MethodIDMap::iterator MI = MethodIDs.find(FnStart);
+    if (MI != MethodIDs.end()) {
+      Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+                                &MI->second);
+      MethodIDs.erase(MI);
+    }
+  }
+
+  // Erase the object from LoadedObjectMap
+  LoadedObjectMap.erase(OI);
 }
 
 }  // anonymous namespace.
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index 7ab08e15a8b3..3d9ff5351610 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -35,8 +35,6 @@ class IntelJITEventsWrapper {
   NotifyEventPtr NotifyEventFunc;
   RegisterCallbackExPtr RegisterCallbackExFunc;
   IsProfilingActivePtr IsProfilingActiveFunc;
-  FinalizeThreadPtr FinalizeThreadFunc;
-  FinalizeProcessPtr FinalizeProcessFunc;
   GetNewMethodIDPtr GetNewMethodIDFunc;
 
 public:
@@ -48,8 +46,6 @@ public:
   : NotifyEventFunc(::iJIT_NotifyEvent),
     RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
     IsProfilingActiveFunc(::iJIT_IsProfilingActive),
-    FinalizeThreadFunc(::FinalizeThread),
-    FinalizeProcessFunc(::FinalizeProcess),
     GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
   }
 
@@ -62,8 +58,6 @@ public:
   : NotifyEventFunc(NotifyEventImpl),
     RegisterCallbackExFunc(RegisterCallbackExImpl),
     IsProfilingActiveFunc(IsProfilingActiveImpl),
-    FinalizeThreadFunc(FinalizeThreadImpl),
-    FinalizeProcessFunc(FinalizeProcessImpl),
     GetNewMethodIDFunc(GetNewMethodIDImpl) {
   }
 
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 5202b091654e..526c04e082d2 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -13,16 +13,16 @@
 
 #define DEBUG_TYPE "interpreter"
 #include "Interpreter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cmath>
@@ -1169,10 +1169,12 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
                       .VarArgs[VAList.UIntPairVal.second];
   Type *Ty = I.getType();
   switch (Ty->getTypeID()) {
-    case Type::IntegerTyID: Dest.IntVal = Src.IntVal;
-    IMPLEMENT_VAARG(Pointer);
-    IMPLEMENT_VAARG(Float);
-    IMPLEMENT_VAARG(Double);
+  case Type::IntegerTyID:
+    Dest.IntVal = Src.IntVal;
+    break;
+  IMPLEMENT_VAARG(Pointer);
+  IMPLEMENT_VAARG(Float);
+  IMPLEMENT_VAARG(Double);
   default:
     dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -1185,6 +1187,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
   ++VAList.UIntPairVal.second;
 }
 
+void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+
+  Type *Ty = I.getType();
+  const unsigned indx = unsigned(Src2.IntVal.getZExtValue());
+
+  if(Src1.AggregateVal.size() > indx) {
+    switch (Ty->getTypeID()) {
+    default:
+      dbgs() << "Unhandled destination type for extractelement instruction: "
+      << *Ty << "\n";
+      llvm_unreachable(0);
+      break;
+    case Type::IntegerTyID:
+      Dest.IntVal = Src1.AggregateVal[indx].IntVal;
+      break;
+    case Type::FloatTyID:
+      Dest.FloatVal = Src1.AggregateVal[indx].FloatVal;
+      break;
+    case Type::DoubleTyID:
+      Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal;
+      break;
+    }
+  } else {
+    dbgs() << "Invalid index in extractelement instruction\n";
+  }
+
+  SetValue(&I, Dest, SF);
+}
+
 GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
                                                 ExecutionContext &SF) {
   switch (CE->getOpcode()) {
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index e16e2d112a99..bef4bbf66023 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -20,19 +20,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "Interpreter.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
 #include "llvm/Config/config.h"     // Detect libffi
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/DynamicLibrary.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include <cmath>
 #include <csignal>
 #include <cstdio>
-#include <map>
-#include <cmath>
 #include <cstring>
+#include <map>
 
 #ifdef HAVE_FFI_CALL
 #ifdef HAVE_FFI_H
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 55152dbbea11..9ee9d9456d1d 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -15,8 +15,8 @@
 
 #include "Interpreter.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include <cstring>
 using namespace llvm;
 
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 72c42c15db30..2952d7eabe2b 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -14,14 +14,14 @@
 #ifndef LLI_INTERPRETER_H
 #define LLI_INTERPRETER_H
 
-#include "llvm/Function.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
 namespace llvm {
 
@@ -178,6 +178,7 @@ public:
   void visitAShr(BinaryOperator &I);
 
   void visitVAArgInst(VAArgInst &I);
+  void visitExtractElementInst(ExtractElementInst &I);
   void visitInstruction(Instruction &I) {
     errs() << I << "\n";
     llvm_unreachable("Instruction not interpretable yet!");
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 1ad338203a2b..53ea0a260087 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -13,26 +13,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "JIT.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -522,7 +522,8 @@ GenericValue JIT::runFunction(Function *F,
     case Type::PPC_FP128TyID:
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
-        C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal));
+        C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(),
+                                                     AV.IntVal));
         break;
     case Type::PointerTyID:
       void *ArgPtr = GVTOP(AV);
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 19c197903a63..35d2b8b1e9f2 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -12,21 +12,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "JIT.h"
 #include "JITDwarfEmitter.h"
-#include "llvm/Function.h"
+#include "JIT.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index 9cdbeac86ace..98ac34049176 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -15,9 +15,13 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
 #define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
 
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
 namespace llvm {
 
 class Function;
+class JIT;
 class JITCodeEmitter;
 class MachineFunction;
 class MachineModuleInfo;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index ecafda7286f6..c27387699ab6 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -15,39 +15,39 @@
 #define DEBUG_TYPE "jit"
 #include "JIT.h"
 #include "JITDwarfEmitter.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/DebugInfo.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Disassembler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Disassembler.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/ValueMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #ifndef NDEBUG
 #include <iomanip>
@@ -969,14 +969,24 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     SavedBufferBegin = BufferBegin;
     SavedBufferEnd = BufferEnd;
     SavedCurBufferPtr = CurBufferPtr;
-
-    BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
-                                                             ActualSize);
-    BufferEnd = BufferBegin+ActualSize;
-    EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
-    uint8_t *EhStart;
-    uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd,
-                                                EhStart);
+    uint8_t *FrameRegister;
+
+    while (true) {
+      BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
+                                                               ActualSize);
+      BufferEnd = BufferBegin+ActualSize;
+      EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
+      uint8_t *EhStart;
+      FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart);
+
+      // If the buffer was large enough to hold the table then we are done.
+      if (CurBufferPtr != BufferEnd)
+        break;
+
+      // Try again with twice as much space.
+      ActualSize = (CurBufferPtr - BufferBegin) * 2;
+      MemMgr->deallocateExceptionTable(BufferBegin);
+    }
     MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
                               FrameRegister);
     BufferBegin = SavedBufferBegin;
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 61bc119d305b..66aeb772ddc3 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -16,20 +16,19 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Memory.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-#include <vector>
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <climits>
 #include <cstring>
+#include <vector>
 
 #if defined(__linux__)
 #if defined(HAVE_SYS_STAT_H)
@@ -73,15 +72,20 @@ namespace {
     /// getBlockAfter - Return the memory block immediately after this one.
     ///
     MemoryRangeHeader &getBlockAfter() const {
-      return *(MemoryRangeHeader*)((char*)this+BlockSize);
+      return *reinterpret_cast<MemoryRangeHeader *>(
+                reinterpret_cast<char*>(
+                  const_cast<MemoryRangeHeader *>(this))+BlockSize);
     }
 
     /// getFreeBlockBefore - If the block before this one is free, return it,
     /// otherwise return null.
     FreeRangeHeader *getFreeBlockBefore() const {
       if (PrevAllocated) return 0;
-      intptr_t PrevSize = ((intptr_t *)this)[-1];
-      return (FreeRangeHeader*)((char*)this-PrevSize);
+      intptr_t PrevSize = reinterpret_cast<intptr_t *>(
+                            const_cast<MemoryRangeHeader *>(this))[-1];
+      return reinterpret_cast<FreeRangeHeader *>(
+               reinterpret_cast<char*>(
+                 const_cast<MemoryRangeHeader *>(this))-PrevSize);
     }
 
     /// FreeBlock - Turn an allocated block into a free block, adjusting
@@ -501,10 +505,14 @@ namespace {
 
     /// allocateDataSection - Allocate memory for a data section.
     uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                 unsigned SectionID) {
+                                 unsigned SectionID, bool IsReadOnly) {
       return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
     }
 
+    bool applyPermissions(std::string *ErrMsg) {
+      return false;
+    }
+
     /// startExceptionTable - Use startFunctionBody to allocate memory for the
     /// function's exception table.
     uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index 2911a5077220..088635a0e999 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_llvm_library(LLVMMCJIT
   MCJIT.cpp
+  SectionMemoryManager.cpp
   )
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
index 90f4d2f75e24..900460bf1cb4 100644
--- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = MCJIT
 parent = ExecutionEngine
-required_libraries = Core ExecutionEngine RuntimeDyld Support Target
+required_libraries = Core ExecutionEngine RuntimeDyld Support Target JIT
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 752c5b73ea32..fee10e194355 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -8,20 +8,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCJIT.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/DataLayout.h"
 
 using namespace llvm;
 
@@ -118,17 +118,26 @@ void MCJIT::emitObject(Module *m) {
 
 // FIXME: Add a parameter to identify which object is being finalized when
 // MCJIT supports multiple modules.
+// FIXME: Provide a way to separate code emission, relocations and page 
+// protection in the interface.
 void MCJIT::finalizeObject() {
   // If the module hasn't been compiled, just do that.
   if (!isCompiled) {
     // If the call to Dyld.resolveRelocations() is removed from emitObject()
     // we'll need to do that here.
     emitObject(M);
+
+    // Set page permissions.
+    MemMgr->applyPermissions();
+
     return;
   }
 
   // Resolve any relocations.
   Dyld.resolveRelocations();
+
+  // Set page permissions.
+  MemMgr->applyPermissions();
 }
 
 void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 571080d2bd22..283a8e528118 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -10,10 +10,10 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
 #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
 
-#include "llvm/PassManager.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/PassManager.h"
 
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
new file mode 100644
index 000000000000..fa35acd389ae
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -0,0 +1,226 @@
+//===- SectionMemoryManager.cpp - Memory manager for MCJIT/RtDyld *- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the section-based memory manager used by the MCJIT
+// execution engine and RuntimeDyld
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MathExtras.h"
+
+#ifdef __linux__
+  // These includes used by SectionMemoryManager::getPointerToNamedFunction()
+  // for Glibc trickery. See comments in this function for more information.
+  #ifdef HAVE_SYS_STAT_H
+    #include <sys/stat.h>
+  #endif
+  #include <fcntl.h>
+  #include <unistd.h>
+#endif
+
+namespace llvm {
+
+uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
+                                                    unsigned Alignment,
+                                                    unsigned SectionID,
+                                                    bool IsReadOnly) {
+  if (IsReadOnly)
+    return allocateSection(RODataMem, Size, Alignment);
+  return allocateSection(RWDataMem, Size, Alignment);
+}
+
+uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  return allocateSection(CodeMem, Size, Alignment);
+}
+
+uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
+                                               uintptr_t Size,
+                                               unsigned Alignment) {
+  if (!Alignment)
+    Alignment = 16;
+
+  assert(!(Alignment & (Alignment - 1)) && "Alignment must be a power of two.");
+
+  uintptr_t RequiredSize = Alignment * ((Size + Alignment - 1)/Alignment + 1);
+  uintptr_t Addr = 0;
+
+  // Look in the list of free memory regions and use a block there if one
+  // is available.
+  for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) {
+    sys::MemoryBlock &MB = MemGroup.FreeMem[i];
+    if (MB.size() >= RequiredSize) {
+      Addr = (uintptr_t)MB.base();
+      uintptr_t EndOfBlock = Addr + MB.size();
+      // Align the address.
+      Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+      // Store cutted free memory block.
+      MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
+                                             EndOfBlock - Addr - Size);
+      return (uint8_t*)Addr;
+    }
+  }
+
+  // No pre-allocated free block was large enough. Allocate a new memory region.
+  // Note that all sections get allocated as read-write.  The permissions will
+  // be updated later based on memory group.
+  //
+  // FIXME: It would be useful to define a default allocation size (or add
+  // it as a constructor parameter) to minimize the number of allocations.
+  //
+  // FIXME: Initialize the Near member for each memory group to avoid
+  // interleaving.
+  error_code ec;
+  sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(RequiredSize,
+                                                          &MemGroup.Near,
+                                                          sys::Memory::MF_READ |
+                                                            sys::Memory::MF_WRITE,
+                                                          ec);
+  if (ec) {
+    // FIXME: Add error propogation to the interface.
+    return NULL;
+  }
+
+  // Save this address as the basis for our next request
+  MemGroup.Near = MB;
+
+  MemGroup.AllocatedMem.push_back(MB);
+  Addr = (uintptr_t)MB.base();
+  uintptr_t EndOfBlock = Addr + MB.size();
+
+  // Align the address.
+  Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+
+  // The allocateMappedMemory may allocate much more memory than we need. In
+  // this case, we store the unused memory as a free memory block.
+  unsigned FreeSize = EndOfBlock-Addr-Size;
+  if (FreeSize > 16)
+    MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
+
+  // Return aligned address
+  return (uint8_t*)Addr;
+}
+
+bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
+{
+  // FIXME: Should in-progress permissions be reverted if an error occurs?
+  error_code ec;
+
+  // Make code memory executable.
+  ec = applyMemoryGroupPermissions(CodeMem,
+                                   sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+  if (ec) {
+    if (ErrMsg) {
+      *ErrMsg = ec.message();
+    }
+    return true;
+  }
+
+  // Make read-only data memory read-only.
+  ec = applyMemoryGroupPermissions(RODataMem,
+                                   sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+  if (ec) {
+    if (ErrMsg) {
+      *ErrMsg = ec.message();
+    }
+    return true;
+  }
+
+  // Read-write data memory already has the correct permissions
+
+  return false;
+}
+
+error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
+                                                             unsigned Permissions) {
+
+  for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) {
+      error_code ec;
+      ec = sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i],
+                                            Permissions);
+      if (ec) {
+        return ec;
+      }
+  }
+
+  return error_code::success();
+}
+
+void SectionMemoryManager::invalidateInstructionCache() {
+  for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(),
+                                            CodeMem.AllocatedMem[i].size());
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                                       bool AbortOnFailure) {
+#if defined(__linux__)
+  //===--------------------------------------------------------------------===//
+  // Function stubs that are invoked instead of certain library calls
+  //
+  // Force the following functions to be linked in to anything that uses the
+  // JIT. This is a hack designed to work around the all-too-clever Glibc
+  // strategy of making these functions work differently when inlined vs. when
+  // not inlined, and hiding their real definitions in a separate archive file
+  // that the dynamic linker can't see. For more info, search for
+  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+  if (Name == "stat") return (void*)(intptr_t)&stat;
+  if (Name == "fstat") return (void*)(intptr_t)&fstat;
+  if (Name == "lstat") return (void*)(intptr_t)&lstat;
+  if (Name == "stat64") return (void*)(intptr_t)&stat64;
+  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
+  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
+  if (Name == "atexit") return (void*)(intptr_t)&atexit;
+  if (Name == "mknod") return (void*)(intptr_t)&mknod;
+#endif // __linux__
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  if (AbortOnFailure)
+    report_fatal_error("Program used external function '" + Name +
+                      "' which could not be resolved!");
+  return 0;
+}
+
+SectionMemoryManager::~SectionMemoryManager() {
+  for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]);
+  for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]);
+  for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 6b8e9d1954b0..38867ecca591 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -17,7 +17,7 @@
 
 #define DEBUG_TYPE "oprofile-jit-event-listener"
 #include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
index d67f5370b862..7c0d39518595 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -29,6 +29,7 @@
 #include <dirent.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <unistd.h>
 
 namespace {
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
index 50cd0724ea4f..603c526d06e3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
@@ -9,10 +9,10 @@
 
 #include "JITRegistrar.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
 
 using namespace llvm;
 
@@ -44,7 +44,7 @@ extern "C" {
   // We put information about the JITed function in this global, which the
   // debugger reads.  Make sure to specify the version statically, because the
   // debugger checks the version before we can set it during runtime.
-  static struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
 
   // Debuggers puts a breakpoint in this function.
   LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index 17f3a2146492..89350cc5b621 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -1,76 +1,78 @@
-//===-- ObjectImageCommon.h - Format independent executuable object image -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares a file format independent ObjectImage class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
-#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
-
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
-#include "llvm/ExecutionEngine/ObjectBuffer.h"
-
-namespace llvm {
-
-class ObjectImageCommon : public ObjectImage {
-  ObjectImageCommon(); // = delete
-  ObjectImageCommon(const ObjectImageCommon &other); // = delete
-
-protected:
-  object::ObjectFile *ObjFile;
-
-  // This form of the constructor allows subclasses to use
-  // format-specific subclasses of ObjectFile directly
-  ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj)
-  : ObjectImage(Input), // saves Input as Buffer and takes ownership
-    ObjFile(Obj)
-  {
-  }
-
-public:
-  ObjectImageCommon(ObjectBuffer* Input)
-  : ObjectImage(Input) // saves Input as Buffer and takes ownership
-  {
-    ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer());
-  }
-  virtual ~ObjectImageCommon() { delete ObjFile; }
-
-  virtual object::symbol_iterator begin_symbols() const
-              { return ObjFile->begin_symbols(); }
-  virtual object::symbol_iterator end_symbols() const
-              { return ObjFile->end_symbols(); }
-
-  virtual object::section_iterator begin_sections() const
-              { return ObjFile->begin_sections(); }
-  virtual object::section_iterator end_sections() const
-              { return ObjFile->end_sections(); }
-
-  virtual /* Triple::ArchType */ unsigned getArch() const
-              { return ObjFile->getArch(); }
-
-  virtual StringRef getData() const { return ObjFile->getData(); }
-
-  // Subclasses can override these methods to update the image with loaded
-  // addresses for sections and common symbols
-  virtual void updateSectionAddress(const object::SectionRef &Sec,
-                                    uint64_t Addr) {}
-  virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr)
-              {}
-
-  // Subclasses can override these methods to provide JIT debugging support
-  virtual void registerWithDebugger() {}
-  virtual void deregisterWithDebugger() {}
-};
-
-} // end namespace llvm
-
-#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
-
+//===-- ObjectImageCommon.h - Format independent executuable object image -===//
+//
+//		       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectImage class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Object/ObjectFile.h"
+
+namespace llvm {
+
+class ObjectImageCommon : public ObjectImage {
+  ObjectImageCommon(); // = delete
+  ObjectImageCommon(const ObjectImageCommon &other); // = delete
+
+protected:
+  object::ObjectFile *ObjFile;
+
+  // This form of the constructor allows subclasses to use
+  // format-specific subclasses of ObjectFile directly
+  ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj)
+  : ObjectImage(Input), // saves Input as Buffer and takes ownership
+    ObjFile(Obj)
+  {
+  }
+
+public:
+  ObjectImageCommon(ObjectBuffer* Input)
+  : ObjectImage(Input) // saves Input as Buffer and takes ownership
+  {
+    ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer());
+  }
+  virtual ~ObjectImageCommon() { delete ObjFile; }
+
+  virtual object::symbol_iterator begin_symbols() const
+	      { return ObjFile->begin_symbols(); }
+  virtual object::symbol_iterator end_symbols() const
+	      { return ObjFile->end_symbols(); }
+
+  virtual object::section_iterator begin_sections() const
+	      { return ObjFile->begin_sections(); }
+  virtual object::section_iterator end_sections() const
+	      { return ObjFile->end_sections(); }
+
+  virtual /* Triple::ArchType */ unsigned getArch() const
+	      { return ObjFile->getArch(); }
+
+  virtual StringRef getData() const { return ObjFile->getData(); }
+
+  virtual object::ObjectFile* getObjectFile() const { return ObjFile; }
+
+  // Subclasses can override these methods to update the image with loaded
+  // addresses for sections and common symbols
+  virtual void updateSectionAddress(const object::SectionRef &Sec,
+				    uint64_t Addr) {}
+  virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr)
+	      {}
+
+  // Subclasses can override these methods to provide JIT debugging support
+  virtual void registerWithDebugger() {}
+  virtual void deregisterWithDebugger() {}
+};
+
+} // end namespace llvm
+
+#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
+
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a180e36e83f8..409b25fef3af 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dyld"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "ObjectImageCommon.h"
-#include "RuntimeDyldImpl.h"
 #include "RuntimeDyldELF.h"
+#include "RuntimeDyldImpl.h"
 #include "RuntimeDyldMachO.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -106,28 +107,24 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
           SymType == object::SymbolRef::ST_Unknown) {
         uint64_t FileOffset;
         StringRef SectionData;
+        bool IsCode;
         section_iterator si = obj->end_sections();
         Check(i->getFileOffset(FileOffset));
         Check(i->getSection(si));
         if (si == obj->end_sections()) continue;
         Check(si->getContents(SectionData));
+        Check(si->isText(IsCode));
         const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
                                 (uintptr_t)FileOffset;
         uintptr_t SectOffset = (uintptr_t)(SymPtr -
                                            (const uint8_t*)SectionData.begin());
-        unsigned SectionID =
-          findOrEmitSection(*obj,
-                            *si,
-                            SymType == object::SymbolRef::ST_Function,
-                            LocalSections);
+        unsigned SectionID = findOrEmitSection(*obj, *si, IsCode, LocalSections);
         LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
         DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
                      << " flags: " << flags
                      << " SID: " << SectionID
                      << " Offset: " << format("%p", SectOffset));
-        bool isGlobal = flags & SymbolRef::SF_Global;
-        if (isGlobal)
-          GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+        GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
       }
     }
     DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
@@ -182,7 +179,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
   // Allocate memory for the section
   unsigned SectionID = Sections.size();
   uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
-                                              SectionID);
+                                              SectionID, false);
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
@@ -237,11 +234,13 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
   bool IsRequired;
   bool IsVirtual;
   bool IsZeroInit;
+  bool IsReadOnly;
   uint64_t DataSize;
   StringRef Name;
   Check(Section.isRequiredForExecution(IsRequired));
   Check(Section.isVirtual(IsVirtual));
   Check(Section.isZeroInit(IsZeroInit));
+  Check(Section.isReadOnlyData(IsReadOnly));
   Check(Section.getSize(DataSize));
   Check(Section.getName(Name));
 
@@ -256,7 +255,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
     Allocate = DataSize + StubBufSize;
     Addr = IsCode
       ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
-      : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+      : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, IsReadOnly);
     if (!Addr)
       report_fatal_error("Unable to allocate section memory!");
 
@@ -433,14 +432,20 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
     RelocationList &Relocs = i->second;
     SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name);
     if (Loc == GlobalSymbolTable.end()) {
-      // This is an external symbol, try to get it address from
-      // MemoryManager.
-      uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
+      if (Name.size() == 0) {
+        // This is an absolute symbol, use an address of zero.
+        DEBUG(dbgs() << "Resolving absolute relocations." << "\n");
+        resolveRelocationList(Relocs, 0);
+      } else {
+        // This is an external symbol, try to get its address from
+        // MemoryManager.
+        uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
                                                                    true);
-      DEBUG(dbgs() << "Resolving relocations Name: " << Name
-              << "\t" << format("%p", Addr)
-              << "\n");
-      resolveRelocationList(Relocs, (uintptr_t)Addr);
+        DEBUG(dbgs() << "Resolving relocations Name: " << Name
+                << "\t" << format("%p", Addr)
+                << "\n");
+        resolveRelocationList(Relocs, (uintptr_t)Addr);
+      }
     } else {
       report_fatal_error("Expected external symbol");
     }
@@ -451,6 +456,12 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
 //===----------------------------------------------------------------------===//
 // RuntimeDyld class implementation
 RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+  // FIXME: There's a potential issue lurking here if a single instance of
+  // RuntimeDyld is used to load multiple objects.  The current implementation
+  // associates a single memory manager with a RuntimeDyld instance.  Even
+  // though the public class spawns a new 'impl' instance for each load,
+  // they share a single memory manager.  This can become a problem when page
+  // permissions are applied.
   Dyld = 0;
   MM = mm;
 }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f7015cdf6b5e..b8537b1f2f9c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -15,16 +15,16 @@
 #include "RuntimeDyldELF.h"
 #include "JITRegistrar.h"
 #include "ObjectImageCommon.h"
+#include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
-#include "llvm/ExecutionEngine/ObjectBuffer.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 using namespace llvm::object;
 
@@ -38,19 +38,22 @@ error_code check(error_code Err) {
   return Err;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+template<class ELFT>
+class DyldELFObject
+  : public ELFObjectFile<ELFT> {
+  LLVM_ELF_IMPORT_TYPES(ELFT)
 
-  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
-  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
-  typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
-  typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
+  typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
+  typedef Elf_Sym_Impl<ELFT> Elf_Sym;
+  typedef
+    Elf_Rel_Impl<ELFT, false> Elf_Rel;
+  typedef
+    Elf_Rel_Impl<ELFT, true> Elf_Rela;
 
-  typedef Elf_Ehdr_Impl<target_endianness, is64Bits> Elf_Ehdr;
+  typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
 
   typedef typename ELFDataTypeTypedefHelper<
-          target_endianness, is64Bits>::value_type addr_type;
+          ELFT>::value_type addr_type;
 
 public:
   DyldELFObject(MemoryBuffer *Wrapper, error_code &ec);
@@ -60,24 +63,25 @@ public:
 
   // Methods for type inquiry through isa, cast and dyn_cast
   static inline bool classof(const Binary *v) {
-    return (isa<ELFObjectFile<target_endianness, is64Bits> >(v)
-            && classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v)));
+    return (isa<ELFObjectFile<ELFT> >(v)
+            && classof(cast<ELFObjectFile
+                <ELFT> >(v)));
   }
   static inline bool classof(
-      const ELFObjectFile<target_endianness, is64Bits> *v) {
+      const ELFObjectFile<ELFT> *v) {
     return v->isDyldType();
   }
 };
 
-template<support::endianness target_endianness, bool is64Bits>
+template<class ELFT>
 class ELFObjectImage : public ObjectImageCommon {
   protected:
-    DyldELFObject<target_endianness, is64Bits> *DyldObj;
+    DyldELFObject<ELFT> *DyldObj;
     bool Registered;
 
   public:
     ELFObjectImage(ObjectBuffer *Input,
-                   DyldELFObject<target_endianness, is64Bits> *Obj)
+                 DyldELFObject<ELFT> *Obj)
     : ObjectImageCommon(Input, Obj),
       DyldObj(Obj),
       Registered(false) {}
@@ -113,17 +117,15 @@ class ELFObjectImage : public ObjectImageCommon {
 // The MemoryBuffer passed into this constructor is just a wrapper around the
 // actual memory.  Ultimately, the Binary parent class will take ownership of
 // this MemoryBuffer object but not the underlying memory.
-template<support::endianness target_endianness, bool is64Bits>
-DyldELFObject<target_endianness, is64Bits>::DyldELFObject(MemoryBuffer *Wrapper,
-                                                          error_code &ec)
-  : ELFObjectFile<target_endianness, is64Bits>(Wrapper, ec) {
+template<class ELFT>
+DyldELFObject<ELFT>::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec)
+  : ELFObjectFile<ELFT>(Wrapper, ec) {
   this->isDyldELFObject = true;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-void DyldELFObject<target_endianness, is64Bits>::updateSectionAddress(
-                                                       const SectionRef &Sec,
-                                                       uint64_t Addr) {
+template<class ELFT>
+void DyldELFObject<ELFT>::updateSectionAddress(const SectionRef &Sec,
+                                               uint64_t Addr) {
   DataRefImpl ShdrRef = Sec.getRawDataRefImpl();
   Elf_Shdr *shdr = const_cast<Elf_Shdr*>(
                           reinterpret_cast<const Elf_Shdr *>(ShdrRef.p));
@@ -133,14 +135,12 @@ void DyldELFObject<target_endianness, is64Bits>::updateSectionAddress(
   shdr->sh_addr = static_cast<addr_type>(Addr);
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-void DyldELFObject<target_endianness, is64Bits>::updateSymbolAddress(
-                                                       const SymbolRef &SymRef,
-                                                       uint64_t Addr) {
+template<class ELFT>
+void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
+                                              uint64_t Addr) {
 
   Elf_Sym *sym = const_cast<Elf_Sym*>(
-                                 ELFObjectFile<target_endianness, is64Bits>::
-                                   getSymbol(SymRef.getRawDataRefImpl()));
+    ELFObjectFile<ELFT>::getSymbol(SymRef.getRawDataRefImpl()));
 
   // This assumes the address passed in matches the target address bitness
   // The template-based type cast handles everything else.
@@ -149,7 +149,6 @@ void DyldELFObject<target_endianness, is64Bits>::updateSymbolAddress(
 
 } // namespace
 
-
 namespace llvm {
 
 ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
@@ -161,24 +160,28 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
   error_code ec;
 
   if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) {
-    DyldELFObject<support::little, false> *Obj =
-           new DyldELFObject<support::little, false>(Buffer->getMemBuffer(), ec);
-    return new ELFObjectImage<support::little, false>(Buffer, Obj);
+    DyldELFObject<ELFType<support::little, 4, false> > *Obj =
+      new DyldELFObject<ELFType<support::little, 4, false> >(
+        Buffer->getMemBuffer(), ec);
+    return new ELFObjectImage<ELFType<support::little, 4, false> >(Buffer, Obj);
   }
   else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) {
-    DyldELFObject<support::big, false> *Obj =
-           new DyldELFObject<support::big, false>(Buffer->getMemBuffer(), ec);
-    return new ELFObjectImage<support::big, false>(Buffer, Obj);
+    DyldELFObject<ELFType<support::big, 4, false> > *Obj =
+      new DyldELFObject<ELFType<support::big, 4, false> >(
+        Buffer->getMemBuffer(), ec);
+    return new ELFObjectImage<ELFType<support::big, 4, false> >(Buffer, Obj);
   }
   else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) {
-    DyldELFObject<support::big, true> *Obj =
-           new DyldELFObject<support::big, true>(Buffer->getMemBuffer(), ec);
-    return new ELFObjectImage<support::big, true>(Buffer, Obj);
+    DyldELFObject<ELFType<support::big, 8, true> > *Obj =
+      new DyldELFObject<ELFType<support::big, 8, true> >(
+        Buffer->getMemBuffer(), ec);
+    return new ELFObjectImage<ELFType<support::big, 8, true> >(Buffer, Obj);
   }
   else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
-    DyldELFObject<support::little, true> *Obj =
-           new DyldELFObject<support::little, true>(Buffer->getMemBuffer(), ec);
-    return new ELFObjectImage<support::little, true>(Buffer, Obj);
+    DyldELFObject<ELFType<support::little, 8, true> > *Obj =
+      new DyldELFObject<ELFType<support::little, 8, true> >(
+        Buffer->getMemBuffer(), ec);
+    return new ELFObjectImage<ELFType<support::little, 8, true> >(Buffer, Obj);
   }
   else
     llvm_unreachable("Unexpected ELF format");
@@ -207,7 +210,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
   case ELF::R_X86_64_32S: {
     Value += Addend;
     assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) ||
-           (Type == ELF::R_X86_64_32S && 
+           (Type == ELF::R_X86_64_32S &&
              ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
     uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
     uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset);
@@ -288,8 +291,9 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
   default:
     llvm_unreachable("Not implemented relocation type!");
 
-  // Write a 32bit value to relocation address, taking into account the 
+  // Write a 32bit value to relocation address, taking into account the
   // implicit addend encoded in the target.
+  case ELF::R_ARM_TARGET1 :
   case ELF::R_ARM_ABS32 :
     *TargetPtr += Value;
     break;
@@ -298,7 +302,7 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
   // Last 4 bit should be shifted.
   case ELF::R_ARM_MOVW_ABS_NC :
     // We are not expecting any other addend in the relocation address.
-    // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2 
+    // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2
     // non-contiguous fields.
     assert((*TargetPtr & 0x000F0FFF) == 0);
     Value = Value & 0xFFFF;
@@ -516,6 +520,12 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
     uint8_t aalk = *(LocalAddress+3);
     writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc));
   } break;
+  case ELF::R_PPC64_ADDR32 : {
+    int32_t Result = static_cast<int32_t>(Value + Addend);
+    if (SignExtend32<32>(Result) != Result)
+      llvm_unreachable("Relocation R_PPC64_ADDR32 overflow");
+    writeInt32BE(LocalAddress, Result);
+  } break;
   case ELF::R_PPC64_REL24 : {
     uint64_t FinalAddress = (Section.LoadAddress + Offset);
     int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
@@ -524,6 +534,13 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
     // Generates a 'bl <address>' instruction
     writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC));
   } break;
+  case ELF::R_PPC64_REL32 : {
+    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
+    if (SignExtend32<32>(delta) != delta)
+      llvm_unreachable("Relocation R_PPC64_REL32 overflow");
+    writeInt32BE(LocalAddress, delta);
+  } break;
   case ELF::R_PPC64_ADDR64 :
     writeInt64BE(LocalAddress, Value + Addend);
     break;
@@ -543,7 +560,6 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
   }
 }
 
-
 void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                                        uint64_t Offset,
                                        uint64_t Value,
@@ -623,9 +639,9 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           // Default to 'true' in case isText fails (though it never does).
           bool isCode = true;
           si->isText(isCode);
-          Value.SectionID = findOrEmitSection(Obj, 
-                                              (*si), 
-                                              isCode, 
+          Value.SectionID = findOrEmitSection(Obj,
+                                              (*si),
+                                              isCode,
                                               ObjSectionToID);
           Value.Addend = Addend;
           break;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index a292ee1a8479..f1009945775c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_RUNTIME_DYLD_IMPL_H
 #define LLVM_RUNTIME_DYLD_IMPL_H
 
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 987c0c3afc26..bcc3df1b4e7c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dyld"
+#include "RuntimeDyldMachO.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "RuntimeDyldMachO.h"
+#include "llvm/ADT/StringRef.h"
 using namespace llvm;
 using namespace llvm::object;
 
@@ -96,6 +96,7 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress,
       *p++ = (uint8_t)(ValueToWrite & 0xff);
       ValueToWrite >>= 8;
     }
+    return false;
   }
   case macho::RIT_Difference:
   case macho::RIT_Generic_LocalDifference:
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index fe3539dff6f5..62d84870780c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_RUNTIME_DYLD_MACHO_H
 #define LLVM_RUNTIME_DYLD_MACHO_H
 
+#include "RuntimeDyldImpl.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/Object/MachOObject.h"
 #include "llvm/Support/Format.h"
-#include "RuntimeDyldImpl.h"
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 8b6104fdca9c..ca4330fa22b0 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -15,13 +15,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -32,8 +32,7 @@ TargetMachine *EngineBuilder::selectTarget() {
   // must use the host architecture.
   if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M)
     TT.setTriple(M->getTargetTriple());
-  else
-    TT.setTriple(LLVM_HOSTTRIPLE);
+
   return selectTarget(TT, MArch, MCPU, MAttrs);
 }
 
@@ -45,7 +44,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
                               const SmallVectorImpl<std::string>& MAttrs) {
   Triple TheTriple(TargetTriple);
   if (TheTriple.getTriple().empty())
-    TheTriple.setTriple(sys::getDefaultTargetTriple());
+    TheTriple.setTriple(sys::getProcessTriple());
 
   // Adjust the triple to match what the user requested.
   const Target *TheTarget = 0;
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
new file mode 100644
index 000000000000..fb591a891dae
--- /dev/null
+++ b/lib/IR/AsmWriter.cpp
@@ -0,0 +1,2236 @@
+//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Writer.h
+//
+// Note that these routines must be extremely tolerant of various errors in the
+// LLVM code, because it can be used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Make virtual table appear in this compilation unit.
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static const Module *getModuleFromVal(const Value *V) {
+  if (const Argument *MA = dyn_cast<Argument>(V))
+    return MA->getParent() ? MA->getParent()->getParent() : 0;
+
+  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return BB->getParent() ? BB->getParent()->getParent() : 0;
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
+    return M ? M->getParent() : 0;
+  }
+
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return GV->getParent();
+  return 0;
+}
+
+static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
+  switch (cc) {
+  default:                         Out << "cc" << cc; break;
+  case CallingConv::Fast:          Out << "fastcc"; break;
+  case CallingConv::Cold:          Out << "coldcc"; break;
+  case CallingConv::X86_StdCall:   Out << "x86_stdcallcc"; break;
+  case CallingConv::X86_FastCall:  Out << "x86_fastcallcc"; break;
+  case CallingConv::X86_ThisCall:  Out << "x86_thiscallcc"; break;
+  case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
+  case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
+  case CallingConv::ARM_AAPCS:     Out << "arm_aapcscc"; break;
+  case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break;
+  case CallingConv::MSP430_INTR:   Out << "msp430_intrcc"; break;
+  case CallingConv::PTX_Kernel:    Out << "ptx_kernel"; break;
+  case CallingConv::PTX_Device:    Out << "ptx_device"; break;
+  }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
+  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+    unsigned char C = Name[i];
+    if (isprint(C) && C != '\\' && C != '"')
+      Out << C;
+    else
+      Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+  }
+}
+
+enum PrefixType {
+  GlobalPrefix,
+  LabelPrefix,
+  LocalPrefix,
+  NoPrefix
+};
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it).  Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+  assert(!Name.empty() && "Cannot get empty name!");
+  switch (Prefix) {
+  case NoPrefix: break;
+  case GlobalPrefix: OS << '@'; break;
+  case LabelPrefix:  break;
+  case LocalPrefix:  OS << '%'; break;
+  }
+
+  // Scan the name to see if it needs quotes first.
+  bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0]));
+  if (!NeedsQuotes) {
+    for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+      // By making this unsigned, the value passed in to isalnum will always be
+      // in the range 0-255.  This is important when building with MSVC because
+      // its implementation will assert.  This situation can arise when dealing
+      // with UTF-8 multibyte characters.
+      unsigned char C = Name[i];
+      if (!isalnum(static_cast<unsigned char>(C)) && C != '-' && C != '.' &&
+          C != '_') {
+        NeedsQuotes = true;
+        break;
+      }
+    }
+  }
+
+  // If we didn't need any quotes, just write out the name in one blast.
+  if (!NeedsQuotes) {
+    OS << Name;
+    return;
+  }
+
+  // Okay, we need quotes.  Output the quotes and escape any scary characters as
+  // needed.
+  OS << '"';
+  PrintEscapedString(Name, OS);
+  OS << '"';
+}
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it).  Print it out.
+static void PrintLLVMName(raw_ostream &OS, const Value *V) {
+  PrintLLVMName(OS, V->getName(),
+                isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
+}
+
+//===----------------------------------------------------------------------===//
+// TypePrinting Class: Type printing machinery
+//===----------------------------------------------------------------------===//
+
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+  TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION;
+  void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION;
+public:
+
+  /// NamedTypes - The named types that are used by the current module.
+  TypeFinder NamedTypes;
+
+  /// NumberedTypes - The numbered types, along with their value.
+  DenseMap<StructType*, unsigned> NumberedTypes;
+
+
+  TypePrinting() {}
+  ~TypePrinting() {}
+
+  void incorporateTypes(const Module &M);
+
+  void print(Type *Ty, raw_ostream &OS);
+
+  void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // end anonymous namespace.
+
+
+void TypePrinting::incorporateTypes(const Module &M) {
+  NamedTypes.run(M, false);
+
+  // The list of struct types we got back includes all the struct types, split
+  // the unnamed ones out to a numbering and remove the anonymous structs.
+  unsigned NextNumber = 0;
+
+  std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
+  for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
+    StructType *STy = *I;
+
+    // Ignore anonymous types.
+    if (STy->isLiteral())
+      continue;
+
+    if (STy->getName().empty())
+      NumberedTypes[STy] = NextNumber++;
+    else
+      *NextToUse++ = STy;
+  }
+
+  NamedTypes.erase(NextToUse, NamedTypes.end());
+}
+
+
+/// CalcTypeName - Write the specified type to the specified raw_ostream, making
+/// use of type names or up references to shorten the type name where possible.
+void TypePrinting::print(Type *Ty, raw_ostream &OS) {
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:      OS << "void"; break;
+  case Type::HalfTyID:      OS << "half"; break;
+  case Type::FloatTyID:     OS << "float"; break;
+  case Type::DoubleTyID:    OS << "double"; break;
+  case Type::X86_FP80TyID:  OS << "x86_fp80"; break;
+  case Type::FP128TyID:     OS << "fp128"; break;
+  case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
+  case Type::LabelTyID:     OS << "label"; break;
+  case Type::MetadataTyID:  OS << "metadata"; break;
+  case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
+  case Type::IntegerTyID:
+    OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
+    return;
+
+  case Type::FunctionTyID: {
+    FunctionType *FTy = cast<FunctionType>(Ty);
+    print(FTy->getReturnType(), OS);
+    OS << " (";
+    for (FunctionType::param_iterator I = FTy->param_begin(),
+         E = FTy->param_end(); I != E; ++I) {
+      if (I != FTy->param_begin())
+        OS << ", ";
+      print(*I, OS);
+    }
+    if (FTy->isVarArg()) {
+      if (FTy->getNumParams()) OS << ", ";
+      OS << "...";
+    }
+    OS << ')';
+    return;
+  }
+  case Type::StructTyID: {
+    StructType *STy = cast<StructType>(Ty);
+
+    if (STy->isLiteral())
+      return printStructBody(STy, OS);
+
+    if (!STy->getName().empty())
+      return PrintLLVMName(OS, STy->getName(), LocalPrefix);
+
+    DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
+    if (I != NumberedTypes.end())
+      OS << '%' << I->second;
+    else  // Not enumerated, print the hex address.
+      OS << "%\"type " << STy << '\"';
+    return;
+  }
+  case Type::PointerTyID: {
+    PointerType *PTy = cast<PointerType>(Ty);
+    print(PTy->getElementType(), OS);
+    if (unsigned AddressSpace = PTy->getAddressSpace())
+      OS << " addrspace(" << AddressSpace << ')';
+    OS << '*';
+    return;
+  }
+  case Type::ArrayTyID: {
+    ArrayType *ATy = cast<ArrayType>(Ty);
+    OS << '[' << ATy->getNumElements() << " x ";
+    print(ATy->getElementType(), OS);
+    OS << ']';
+    return;
+  }
+  case Type::VectorTyID: {
+    VectorType *PTy = cast<VectorType>(Ty);
+    OS << "<" << PTy->getNumElements() << " x ";
+    print(PTy->getElementType(), OS);
+    OS << '>';
+    return;
+  }
+  default:
+    OS << "<unrecognized-type>";
+    return;
+  }
+}
+
+void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
+  if (STy->isOpaque()) {
+    OS << "opaque";
+    return;
+  }
+
+  if (STy->isPacked())
+    OS << '<';
+
+  if (STy->getNumElements() == 0) {
+    OS << "{}";
+  } else {
+    StructType::element_iterator I = STy->element_begin();
+    OS << "{ ";
+    print(*I++, OS);
+    for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
+      OS << ", ";
+      print(*I, OS);
+    }
+
+    OS << " }";
+  }
+  if (STy->isPacked())
+    OS << '>';
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// SlotTracker Class: Enumerate slot numbers for unnamed values
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class provides computation of slot numbers for LLVM Assembly writing.
+///
+class SlotTracker {
+public:
+  /// ValueMap - A mapping of Values to slot numbers.
+  typedef DenseMap<const Value*, unsigned> ValueMap;
+
+private:
+  /// TheModule - The module for which we are holding slot numbers.
+  const Module* TheModule;
+
+  /// TheFunction - The function for which we are holding slot numbers.
+  const Function* TheFunction;
+  bool FunctionProcessed;
+
+  /// mMap - The slot map for the module level data.
+  ValueMap mMap;
+  unsigned mNext;
+
+  /// fMap - The slot map for the function level data.
+  ValueMap fMap;
+  unsigned fNext;
+
+  /// mdnMap - Map for MDNodes.
+  DenseMap<const MDNode*, unsigned> mdnMap;
+  unsigned mdnNext;
+
+  /// asMap - The slot map for attribute sets.
+  DenseMap<AttributeSet, unsigned> asMap;
+  unsigned asNext;
+public:
+  /// Construct from a module
+  explicit SlotTracker(const Module *M);
+  /// Construct from a function, starting out in incorp state.
+  explicit SlotTracker(const Function *F);
+
+  /// Return the slot number of the specified value in it's type
+  /// plane.  If something is not in the SlotTracker, return -1.
+  int getLocalSlot(const Value *V);
+  int getGlobalSlot(const GlobalValue *V);
+  int getMetadataSlot(const MDNode *N);
+  int getAttributeGroupSlot(AttributeSet AS);
+
+  /// If you'd like to deal with a function instead of just a module, use
+  /// this method to get its data into the SlotTracker.
+  void incorporateFunction(const Function *F) {
+    TheFunction = F;
+    FunctionProcessed = false;
+  }
+
+  /// After calling incorporateFunction, use this method to remove the
+  /// most recently incorporated function from the SlotTracker. This
+  /// will reset the state of the machine back to just the module contents.
+  void purgeFunction();
+
+  /// MDNode map iterators.
+  typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
+  mdn_iterator mdn_begin() { return mdnMap.begin(); }
+  mdn_iterator mdn_end() { return mdnMap.end(); }
+  unsigned mdn_size() const { return mdnMap.size(); }
+  bool mdn_empty() const { return mdnMap.empty(); }
+
+  /// AttributeSet map iterators.
+  typedef DenseMap<AttributeSet, unsigned>::iterator as_iterator;
+  as_iterator as_begin()   { return asMap.begin(); }
+  as_iterator as_end()     { return asMap.end(); }
+  unsigned as_size() const { return asMap.size(); }
+  bool as_empty() const    { return asMap.empty(); }
+
+  /// This function does the actual initialization.
+  inline void initialize();
+
+  // Implementation Details
+private:
+  /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+  void CreateModuleSlot(const GlobalValue *V);
+
+  /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
+  void CreateMetadataSlot(const MDNode *N);
+
+  /// CreateFunctionSlot - Insert the specified Value* into the slot table.
+  void CreateFunctionSlot(const Value *V);
+
+  /// \brief Insert the specified AttributeSet into the slot table.
+  void CreateAttributeSetSlot(AttributeSet AS);
+
+  /// Add all of the module level global variables (and their initializers)
+  /// and function declarations, but not the contents of those functions.
+  void processModule();
+
+  /// Add all of the functions arguments, basic blocks, and instructions.
+  void processFunction();
+
+  SlotTracker(const SlotTracker &) LLVM_DELETED_FUNCTION;
+  void operator=(const SlotTracker &) LLVM_DELETED_FUNCTION;
+};
+
+}  // end anonymous namespace
+
+
+static SlotTracker *createSlotTracker(const Value *V) {
+  if (const Argument *FA = dyn_cast<Argument>(V))
+    return new SlotTracker(FA->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getParent())
+      return new SlotTracker(I->getParent()->getParent());
+
+  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return new SlotTracker(BB->getParent());
+
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return new SlotTracker(GV->getParent());
+
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return new SlotTracker(GA->getParent());
+
+  if (const Function *Func = dyn_cast<Function>(V))
+    return new SlotTracker(Func);
+
+  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+    if (!MD->isFunctionLocal())
+      return new SlotTracker(MD->getFunction());
+
+    return new SlotTracker((Function *)0);
+  }
+
+  return 0;
+}
+
+#if 0
+#define ST_DEBUG(X) dbgs() << X
+#else
+#define ST_DEBUG(X)
+#endif
+
+// Module level constructor. Causes the contents of the Module (sans functions)
+// to be added to the slot table.
+SlotTracker::SlotTracker(const Module *M)
+  : TheModule(M), TheFunction(0), FunctionProcessed(false),
+    mNext(0), fNext(0),  mdnNext(0), asNext(0) {
+}
+
+// Function level constructor. Causes the contents of the Module and the one
+// function provided to be added to the slot table.
+SlotTracker::SlotTracker(const Function *F)
+  : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
+    mNext(0), fNext(0), mdnNext(0), asNext(0) {
+}
+
+inline void SlotTracker::initialize() {
+  if (TheModule) {
+    processModule();
+    TheModule = 0; ///< Prevent re-processing next time we're called.
+  }
+
+  if (TheFunction && !FunctionProcessed)
+    processFunction();
+}
+
+// Iterate through all the global variables, functions, and global
+// variable initializers and create slots for them.
+void SlotTracker::processModule() {
+  ST_DEBUG("begin processModule!\n");
+
+  // Add all of the unnamed global variables to the value table.
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    if (!I->hasName())
+      CreateModuleSlot(I);
+  }
+
+  // Add metadata used by named metadata.
+  for (Module::const_named_metadata_iterator
+         I = TheModule->named_metadata_begin(),
+         E = TheModule->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      CreateMetadataSlot(NMD->getOperand(i));
+  }
+
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I) {
+    if (!I->hasName())
+      // Add all the unnamed functions to the table.
+      CreateModuleSlot(I);
+
+    // Add all the function attributes to the table.
+    // FIXME: Add attributes of other objects?
+    AttributeSet FnAttrs = I->getAttributes().getFnAttributes();
+    if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex))
+      CreateAttributeSetSlot(FnAttrs);
+  }
+
+  ST_DEBUG("end processModule!\n");
+}
+
+// Process the arguments, basic blocks, and instructions  of a function.
+void SlotTracker::processFunction() {
+  ST_DEBUG("begin processFunction!\n");
+  fNext = 0;
+
+  // Add all the function arguments with no names.
+  for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
+      AE = TheFunction->arg_end(); AI != AE; ++AI)
+    if (!AI->hasName())
+      CreateFunctionSlot(AI);
+
+  ST_DEBUG("Inserting Instructions:\n");
+
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+  // Add all of the basic blocks and instructions with no names.
+  for (Function::const_iterator BB = TheFunction->begin(),
+       E = TheFunction->end(); BB != E; ++BB) {
+    if (!BB->hasName())
+      CreateFunctionSlot(BB);
+
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+         ++I) {
+      if (!I->getType()->isVoidTy() && !I->hasName())
+        CreateFunctionSlot(I);
+
+      // Intrinsics can directly use metadata.  We allow direct calls to any
+      // llvm.foo function here, because the target may not be linked into the
+      // optimizer.
+      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (Function *F = CI->getCalledFunction())
+          if (F->getName().startswith("llvm."))
+            for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+              if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+                CreateMetadataSlot(N);
+
+        // Add all the call attributes to the table.
+        AttributeSet Attrs = CI->getAttributes().getFnAttributes();
+        if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+          CreateAttributeSetSlot(Attrs);
+      } else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+        // Add all the call attributes to the table.
+        AttributeSet Attrs = II->getAttributes().getFnAttributes();
+        if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+          CreateAttributeSetSlot(Attrs);
+      }
+
+      // Process metadata attached with this instruction.
+      I->getAllMetadata(MDForInst);
+      for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+        CreateMetadataSlot(MDForInst[i].second);
+      MDForInst.clear();
+    }
+  }
+
+  FunctionProcessed = true;
+
+  ST_DEBUG("end processFunction!\n");
+}
+
+/// Clean up after incorporating a function. This is the only way to get out of
+/// the function incorporation state that affects get*Slot/Create*Slot. Function
+/// incorporation state is indicated by TheFunction != 0.
+void SlotTracker::purgeFunction() {
+  ST_DEBUG("begin purgeFunction!\n");
+  fMap.clear(); // Simply discard the function level map
+  TheFunction = 0;
+  FunctionProcessed = false;
+  ST_DEBUG("end purgeFunction!\n");
+}
+
+/// getGlobalSlot - Get the slot number of a global value.
+int SlotTracker::getGlobalSlot(const GlobalValue *V) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the value in the module map
+  ValueMap::iterator MI = mMap.find(V);
+  return MI == mMap.end() ? -1 : (int)MI->second;
+}
+
+/// getMetadataSlot - Get the slot number of a MDNode.
+int SlotTracker::getMetadataSlot(const MDNode *N) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the MDNode in the module map
+  mdn_iterator MI = mdnMap.find(N);
+  return MI == mdnMap.end() ? -1 : (int)MI->second;
+}
+
+
+/// getLocalSlot - Get the slot number for a value that is local to a function.
+int SlotTracker::getLocalSlot(const Value *V) {
+  assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
+
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  ValueMap::iterator FI = fMap.find(V);
+  return FI == fMap.end() ? -1 : (int)FI->second;
+}
+
+int SlotTracker::getAttributeGroupSlot(AttributeSet AS) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the AttributeSet in the module map.
+  as_iterator AI = asMap.find(AS);
+  return AI == asMap.end() ? -1 : (int)AI->second;
+}
+
+/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
+  assert(V && "Can't insert a null Value into SlotTracker!");
+  assert(!V->getType()->isVoidTy() && "Doesn't need a slot!");
+  assert(!V->hasName() && "Doesn't need a slot!");
+
+  unsigned DestSlot = mNext++;
+  mMap[V] = DestSlot;
+
+  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+           DestSlot << " [");
+  // G = Global, F = Function, A = Alias, o = other
+  ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
+            (isa<Function>(V) ? 'F' :
+             (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
+}
+
+/// CreateSlot - Create a new slot for the specified value if it has no name.
+void SlotTracker::CreateFunctionSlot(const Value *V) {
+  assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!");
+
+  unsigned DestSlot = fNext++;
+  fMap[V] = DestSlot;
+
+  // G = Global, F = Function, o = other
+  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+           DestSlot << " [o]\n");
+}
+
+/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
+void SlotTracker::CreateMetadataSlot(const MDNode *N) {
+  assert(N && "Can't insert a null Value into SlotTracker!");
+
+  // Don't insert if N is a function-local metadata, these are always printed
+  // inline.
+  if (!N->isFunctionLocal()) {
+    mdn_iterator I = mdnMap.find(N);
+    if (I != mdnMap.end())
+      return;
+
+    unsigned DestSlot = mdnNext++;
+    mdnMap[N] = DestSlot;
+  }
+
+  // Recursively add any MDNodes referenced by operands.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (const MDNode *Op = dyn_cast_or_null<MDNode>(N->getOperand(i)))
+      CreateMetadataSlot(Op);
+}
+
+void SlotTracker::CreateAttributeSetSlot(AttributeSet AS) {
+  assert(AS.hasAttributes(AttributeSet::FunctionIndex) &&
+         "Doesn't need a slot!");
+
+  as_iterator I = asMap.find(AS);
+  if (I != asMap.end())
+    return;
+
+  unsigned DestSlot = asNext++;
+  asMap[AS] = DestSlot;
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter Implementation
+//===----------------------------------------------------------------------===//
+
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context);
+
+
+
+static const char *getPredicateText(unsigned predicate) {
+  const char * pred = "unknown";
+  switch (predicate) {
+  case FCmpInst::FCMP_FALSE: pred = "false"; break;
+  case FCmpInst::FCMP_OEQ:   pred = "oeq"; break;
+  case FCmpInst::FCMP_OGT:   pred = "ogt"; break;
+  case FCmpInst::FCMP_OGE:   pred = "oge"; break;
+  case FCmpInst::FCMP_OLT:   pred = "olt"; break;
+  case FCmpInst::FCMP_OLE:   pred = "ole"; break;
+  case FCmpInst::FCMP_ONE:   pred = "one"; break;
+  case FCmpInst::FCMP_ORD:   pred = "ord"; break;
+  case FCmpInst::FCMP_UNO:   pred = "uno"; break;
+  case FCmpInst::FCMP_UEQ:   pred = "ueq"; break;
+  case FCmpInst::FCMP_UGT:   pred = "ugt"; break;
+  case FCmpInst::FCMP_UGE:   pred = "uge"; break;
+  case FCmpInst::FCMP_ULT:   pred = "ult"; break;
+  case FCmpInst::FCMP_ULE:   pred = "ule"; break;
+  case FCmpInst::FCMP_UNE:   pred = "une"; break;
+  case FCmpInst::FCMP_TRUE:  pred = "true"; break;
+  case ICmpInst::ICMP_EQ:    pred = "eq"; break;
+  case ICmpInst::ICMP_NE:    pred = "ne"; break;
+  case ICmpInst::ICMP_SGT:   pred = "sgt"; break;
+  case ICmpInst::ICMP_SGE:   pred = "sge"; break;
+  case ICmpInst::ICMP_SLT:   pred = "slt"; break;
+  case ICmpInst::ICMP_SLE:   pred = "sle"; break;
+  case ICmpInst::ICMP_UGT:   pred = "ugt"; break;
+  case ICmpInst::ICMP_UGE:   pred = "uge"; break;
+  case ICmpInst::ICMP_ULT:   pred = "ult"; break;
+  case ICmpInst::ICMP_ULE:   pred = "ule"; break;
+  }
+  return pred;
+}
+
+static void writeAtomicRMWOperation(raw_ostream &Out,
+                                    AtomicRMWInst::BinOp Op) {
+  switch (Op) {
+  default: Out << " <unknown operation " << Op << ">"; break;
+  case AtomicRMWInst::Xchg: Out << " xchg"; break;
+  case AtomicRMWInst::Add:  Out << " add"; break;
+  case AtomicRMWInst::Sub:  Out << " sub"; break;
+  case AtomicRMWInst::And:  Out << " and"; break;
+  case AtomicRMWInst::Nand: Out << " nand"; break;
+  case AtomicRMWInst::Or:   Out << " or"; break;
+  case AtomicRMWInst::Xor:  Out << " xor"; break;
+  case AtomicRMWInst::Max:  Out << " max"; break;
+  case AtomicRMWInst::Min:  Out << " min"; break;
+  case AtomicRMWInst::UMax: Out << " umax"; break;
+  case AtomicRMWInst::UMin: Out << " umin"; break;
+  }
+}
+
+static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+  if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
+    // Unsafe algebra implies all the others, no need to write them all out
+    if (FPO->hasUnsafeAlgebra())
+      Out << " fast";
+    else {
+      if (FPO->hasNoNaNs())
+        Out << " nnan";
+      if (FPO->hasNoInfs())
+        Out << " ninf";
+      if (FPO->hasNoSignedZeros())
+        Out << " nsz";
+      if (FPO->hasAllowReciprocal())
+        Out << " arcp";
+    }
+  }
+
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(U)) {
+    if (OBO->hasNoUnsignedWrap())
+      Out << " nuw";
+    if (OBO->hasNoSignedWrap())
+      Out << " nsw";
+  } else if (const PossiblyExactOperator *Div =
+               dyn_cast<PossiblyExactOperator>(U)) {
+    if (Div->isExact())
+      Out << " exact";
+  } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+    if (GEP->isInBounds())
+      Out << " inbounds";
+  }
+}
+
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+                                  TypePrinting &TypePrinter,
+                                  SlotTracker *Machine,
+                                  const Module *Context) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (CI->getType()->isIntegerTy(1)) {
+      Out << (CI->getZExtValue() ? "true" : "false");
+      return;
+    }
+    Out << CI->getValue();
+    return;
+  }
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle ||
+        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) {
+      // We would like to output the FP constant value in exponential notation,
+      // but we cannot do this if doing so will lose precision.  Check here to
+      // make sure that we only output it in exponential format if we can parse
+      // the value back and get the same value.
+      //
+      bool ignored;
+      bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
+      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+      bool isInf = CFP->getValueAPF().isInfinity();
+      bool isNaN = CFP->getValueAPF().isNaN();
+      if (!isHalf && !isInf && !isNaN) {
+        double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+                                CFP->getValueAPF().convertToFloat();
+        SmallString<128> StrVal;
+        raw_svector_ostream(StrVal) << Val;
+
+        // Check to make sure that the stringized number is not some string like
+        // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
+        // that the string matches the "[-+]?[0-9]" regex.
+        //
+        if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+            ((StrVal[0] == '-' || StrVal[0] == '+') &&
+             (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+          // Reparse stringized version!
+          if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
+            Out << StrVal.str();
+            return;
+          }
+        }
+      }
+      // Otherwise we could not reparse it to exactly the same value, so we must
+      // output the string in hexadecimal format!  Note that loading and storing
+      // floating point types changes the bits of NaNs on some hosts, notably
+      // x86, so we must not use these types.
+      assert(sizeof(double) == sizeof(uint64_t) &&
+             "assuming that double is 64 bits!");
+      char Buffer[40];
+      APFloat apf = CFP->getValueAPF();
+      // Halves and floats are represented in ASCII IR as double, convert.
+      if (!isDouble)
+        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                          &ignored);
+      Out << "0x" <<
+              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
+                            Buffer+40);
+      return;
+    }
+
+    // Either half, or some form of long double.
+    // These appear as a magic letter identifying the type, then a
+    // fixed number of hex digits.
+    Out << "0x";
+    // Bit position, in the current word, of the next nibble to print.
+    int shiftcount;
+
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+      Out << 'K';
+      // api needed to prevent premature destruction
+      APInt api = CFP->getValueAPF().bitcastToAPInt();
+      const uint64_t* p = api.getRawData();
+      uint64_t word = p[1];
+      shiftcount = 12;
+      int width = api.getBitWidth();
+      for (int j=0; j<width; j+=4, shiftcount-=4) {
+        unsigned int nibble = (word>>shiftcount) & 15;
+        if (nibble < 10)
+          Out << (unsigned char)(nibble + '0');
+        else
+          Out << (unsigned char)(nibble - 10 + 'A');
+        if (shiftcount == 0 && j+4 < width) {
+          word = *p;
+          shiftcount = 64;
+          if (width-j-4 < 64)
+            shiftcount = width-j-4;
+        }
+      }
+      return;
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) {
+      shiftcount = 60;
+      Out << 'L';
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) {
+      shiftcount = 60;
+      Out << 'M';
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) {
+      shiftcount = 12;
+      Out << 'H';
+    } else
+      llvm_unreachable("Unsupported floating point type");
+    // api needed to prevent premature destruction
+    APInt api = CFP->getValueAPF().bitcastToAPInt();
+    const uint64_t* p = api.getRawData();
+    uint64_t word = *p;
+    int width = api.getBitWidth();
+    for (int j=0; j<width; j+=4, shiftcount-=4) {
+      unsigned int nibble = (word>>shiftcount) & 15;
+      if (nibble < 10)
+        Out << (unsigned char)(nibble + '0');
+      else
+        Out << (unsigned char)(nibble - 10 + 'A');
+      if (shiftcount == 0 && j+4 < width) {
+        word = *(++p);
+        shiftcount = 64;
+        if (width-j-4 < 64)
+          shiftcount = width-j-4;
+      }
+    }
+    return;
+  }
+
+  if (isa<ConstantAggregateZero>(CV)) {
+    Out << "zeroinitializer";
+    return;
+  }
+
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    Out << "blockaddress(";
+    WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+                           Context);
+    Out << ", ";
+    WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+                           Context);
+    Out << ")";
+    return;
+  }
+
+  if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+    Type *ETy = CA->getType()->getElementType();
+    Out << '[';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CA->getOperand(0),
+                           &TypePrinter, Machine,
+                           Context);
+    for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+                             Context);
+    }
+    Out << ']';
+    return;
+  }
+
+  if (const ConstantDataArray *CA = dyn_cast<ConstantDataArray>(CV)) {
+    // As a special case, print the array as a string if it is an array of
+    // i8 with ConstantInt values.
+    if (CA->isString()) {
+      Out << "c\"";
+      PrintEscapedString(CA->getAsString(), Out);
+      Out << '"';
+      return;
+    }
+
+    Type *ETy = CA->getType()->getElementType();
+    Out << '[';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
+                           &TypePrinter, Machine,
+                           Context);
+    for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
+                             Machine, Context);
+    }
+    Out << ']';
+    return;
+  }
+
+
+  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+    if (CS->getType()->isPacked())
+      Out << '<';
+    Out << '{';
+    unsigned N = CS->getNumOperands();
+    if (N) {
+      Out << ' ';
+      TypePrinter.print(CS->getOperand(0)->getType(), Out);
+      Out << ' ';
+
+      WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+                             Context);
+
+      for (unsigned i = 1; i < N; i++) {
+        Out << ", ";
+        TypePrinter.print(CS->getOperand(i)->getType(), Out);
+        Out << ' ';
+
+        WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+                               Context);
+      }
+      Out << ' ';
+    }
+
+    Out << '}';
+    if (CS->getType()->isPacked())
+      Out << '>';
+    return;
+  }
+
+  if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
+    Type *ETy = CV->getType()->getVectorElementType();
+    Out << '<';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
+                           Machine, Context);
+    for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
+                             Machine, Context);
+    }
+    Out << '>';
+    return;
+  }
+
+  if (isa<ConstantPointerNull>(CV)) {
+    Out << "null";
+    return;
+  }
+
+  if (isa<UndefValue>(CV)) {
+    Out << "undef";
+    return;
+  }
+
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    Out << CE->getOpcodeName();
+    WriteOptimizationInfo(Out, CE);
+    if (CE->isCompare())
+      Out << ' ' << getPredicateText(CE->getPredicate());
+    Out << " (";
+
+    for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+      TypePrinter.print((*OI)->getType(), Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+      if (OI+1 != CE->op_end())
+        Out << ", ";
+    }
+
+    if (CE->hasIndices()) {
+      ArrayRef<unsigned> Indices = CE->getIndices();
+      for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+        Out << ", " << Indices[i];
+    }
+
+    if (CE->isCast()) {
+      Out << " to ";
+      TypePrinter.print(CE->getType(), Out);
+    }
+
+    Out << ')';
+    return;
+  }
+
+  Out << "<placeholder or erroneous Constant>";
+}
+
+static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
+                                    TypePrinting *TypePrinter,
+                                    SlotTracker *Machine,
+                                    const Module *Context) {
+  Out << "!{";
+  for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
+    const Value *V = Node->getOperand(mi);
+    if (V == 0)
+      Out << "null";
+    else {
+      TypePrinter->print(V->getType(), Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, Node->getOperand(mi),
+                             TypePrinter, Machine, Context);
+    }
+    if (mi + 1 != me)
+      Out << ", ";
+  }
+
+  Out << "}";
+}
+
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream.  This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context) {
+  if (V->hasName()) {
+    PrintLLVMName(Out, V);
+    return;
+  }
+
+  const Constant *CV = dyn_cast<Constant>(V);
+  if (CV && !isa<GlobalValue>(CV)) {
+    assert(TypePrinter && "Constants require TypePrinting!");
+    WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+    return;
+  }
+
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    Out << "asm ";
+    if (IA->hasSideEffects())
+      Out << "sideeffect ";
+    if (IA->isAlignStack())
+      Out << "alignstack ";
+    // We don't emit the AD_ATT dialect as it's the assumed default.
+    if (IA->getDialect() == InlineAsm::AD_Intel)
+      Out << "inteldialect ";
+    Out << '"';
+    PrintEscapedString(IA->getAsmString(), Out);
+    Out << "\", \"";
+    PrintEscapedString(IA->getConstraintString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (const MDNode *N = dyn_cast<MDNode>(V)) {
+    if (N->isFunctionLocal()) {
+      // Print metadata inline, not via slot reference number.
+      WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
+      return;
+    }
+
+    if (!Machine) {
+      if (N->isFunctionLocal())
+        Machine = new SlotTracker(N->getFunction());
+      else
+        Machine = new SlotTracker(Context);
+    }
+    int Slot = Machine->getMetadataSlot(N);
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
+    return;
+  }
+
+  if (const MDString *MDS = dyn_cast<MDString>(V)) {
+    Out << "!\"";
+    PrintEscapedString(MDS->getString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (V->getValueID() == Value::PseudoSourceValueVal ||
+      V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
+    V->print(Out);
+    return;
+  }
+
+  char Prefix = '%';
+  int Slot;
+  // If we have a SlotTracker, use it.
+  if (Machine) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+      Slot = Machine->getGlobalSlot(GV);
+      Prefix = '@';
+    } else {
+      Slot = Machine->getLocalSlot(V);
+
+      // If the local value didn't succeed, then we may be referring to a value
+      // from a different function.  Translate it, as this can happen when using
+      // address of blocks.
+      if (Slot == -1)
+        if ((Machine = createSlotTracker(V))) {
+          Slot = Machine->getLocalSlot(V);
+          delete Machine;
+        }
+    }
+  } else if ((Machine = createSlotTracker(V))) {
+    // Otherwise, create one to get the # and then destroy it.
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+      Slot = Machine->getGlobalSlot(GV);
+      Prefix = '@';
+    } else {
+      Slot = Machine->getLocalSlot(V);
+    }
+    delete Machine;
+    Machine = 0;
+  } else {
+    Slot = -1;
+  }
+
+  if (Slot != -1)
+    Out << Prefix << Slot;
+  else
+    Out << "<badref>";
+}
+
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
+                          bool PrintType, const Module *Context) {
+
+  // Fast path: Don't construct and populate a TypePrinting object if we
+  // won't be needing any types printed.
+  if (!PrintType &&
+      ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+       V->hasName() || isa<GlobalValue>(V))) {
+    WriteAsOperandInternal(Out, V, 0, 0, Context);
+    return;
+  }
+
+  if (Context == 0) Context = getModuleFromVal(V);
+
+  TypePrinting TypePrinter;
+  if (Context)
+    TypePrinter.incorporateTypes(*Context);
+  if (PrintType) {
+    TypePrinter.print(V->getType(), Out);
+    Out << ' ';
+  }
+
+  WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
+}
+
+namespace {
+
+class AssemblyWriter {
+  formatted_raw_ostream &Out;
+  SlotTracker &Machine;
+  const Module *TheModule;
+  TypePrinting TypePrinter;
+  AssemblyAnnotationWriter *AnnotationWriter;
+
+public:
+  inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+                        const Module *M,
+                        AssemblyAnnotationWriter *AAW)
+    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+    if (M)
+      TypePrinter.incorporateTypes(*M);
+  }
+
+  void printMDNodeBody(const MDNode *MD);
+  void printNamedMDNode(const NamedMDNode *NMD);
+
+  void printModule(const Module *M);
+
+  void writeOperand(const Value *Op, bool PrintType);
+  void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+  void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+
+  void writeAllMDNodes();
+  void writeAllAttributeGroups();
+
+  void printTypeIdentities();
+  void printGlobal(const GlobalVariable *GV);
+  void printAlias(const GlobalAlias *GV);
+  void printFunction(const Function *F);
+  void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
+  void printBasicBlock(const BasicBlock *BB);
+  void printInstruction(const Instruction &I);
+
+private:
+  // printInfoComment - Print a little comment after the instruction indicating
+  // which slot it occupies.
+  void printInfoComment(const Value &V);
+};
+}  // end of anonymous namespace
+
+void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
+  if (Operand == 0) {
+    Out << "<null operand!>";
+    return;
+  }
+  if (PrintType) {
+    TypePrinter.print(Operand->getType(), Out);
+    Out << ' ';
+  }
+  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
+                                 SynchronizationScope SynchScope) {
+  if (Ordering == NotAtomic)
+    return;
+
+  switch (SynchScope) {
+  case SingleThread: Out << " singlethread"; break;
+  case CrossThread: break;
+  }
+
+  switch (Ordering) {
+  default: Out << " <bad ordering " << int(Ordering) << ">"; break;
+  case Unordered: Out << " unordered"; break;
+  case Monotonic: Out << " monotonic"; break;
+  case Acquire: Out << " acquire"; break;
+  case Release: Out << " release"; break;
+  case AcquireRelease: Out << " acq_rel"; break;
+  case SequentiallyConsistent: Out << " seq_cst"; break;
+  }
+}
+
+void AssemblyWriter::writeParamOperand(const Value *Operand,
+                                       AttributeSet Attrs, unsigned Idx) {
+  if (Operand == 0) {
+    Out << "<null operand!>";
+    return;
+  }
+
+  // Print the type
+  TypePrinter.print(Operand->getType(), Out);
+  // Print parameter attributes list
+  if (Attrs.hasAttributes(Idx))
+    Out << ' ' << Attrs.getAsString(Idx);
+  Out << ' ';
+  // Print the operand
+  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::printModule(const Module *M) {
+  Machine.initialize();
+
+  if (!M->getModuleIdentifier().empty() &&
+      // Don't print the ID if it will start a new line (which would
+      // require a comment char before it).
+      M->getModuleIdentifier().find('\n') == std::string::npos)
+    Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+
+  if (!M->getDataLayout().empty())
+    Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
+  if (!M->getTargetTriple().empty())
+    Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
+
+  if (!M->getModuleInlineAsm().empty()) {
+    // Split the string into lines, to make it easier to read the .ll file.
+    std::string Asm = M->getModuleInlineAsm();
+    size_t CurPos = 0;
+    size_t NewLine = Asm.find_first_of('\n', CurPos);
+    Out << '\n';
+    while (NewLine != std::string::npos) {
+      // We found a newline, print the portion of the asm string from the
+      // last newline up to this newline.
+      Out << "module asm \"";
+      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+                         Out);
+      Out << "\"\n";
+      CurPos = NewLine+1;
+      NewLine = Asm.find_first_of('\n', CurPos);
+    }
+    std::string rest(Asm.begin()+CurPos, Asm.end());
+    if (!rest.empty()) {
+      Out << "module asm \"";
+      PrintEscapedString(rest, Out);
+      Out << "\"\n";
+    }
+  }
+
+  printTypeIdentities();
+
+  // Output all globals.
+  if (!M->global_empty()) Out << '\n';
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    printGlobal(I); Out << '\n';
+  }
+
+  // Output all aliases.
+  if (!M->alias_empty()) Out << "\n";
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    printAlias(I);
+
+  // Output all of the functions.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+    printFunction(I);
+
+  // Output all attribute groups.
+  if (!Machine.as_empty()) {
+    Out << '\n';
+    writeAllAttributeGroups();
+  }
+
+  // Output named metadata.
+  if (!M->named_metadata_empty()) Out << '\n';
+
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I)
+    printNamedMDNode(I);
+
+  // Output metadata.
+  if (!Machine.mdn_empty()) {
+    Out << '\n';
+    writeAllMDNodes();
+  }
+}
+
+void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
+  Out << '!';
+  StringRef Name = NMD->getName();
+  if (Name.empty()) {
+    Out << "<empty name> ";
+  } else {
+    if (isalpha(static_cast<unsigned char>(Name[0])) ||
+        Name[0] == '-' || Name[0] == '$' ||
+        Name[0] == '.' || Name[0] == '_')
+      Out << Name[0];
+    else
+      Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+    for (unsigned i = 1, e = Name.size(); i != e; ++i) {
+      unsigned char C = Name[i];
+      if (isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
+          C == '.' || C == '_')
+        Out << C;
+      else
+        Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+    }
+  }
+  Out << " = !{";
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    if (i) Out << ", ";
+    int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
+  }
+  Out << "}\n";
+}
+
+
+static void PrintLinkage(GlobalValue::LinkageTypes LT,
+                         formatted_raw_ostream &Out) {
+  switch (LT) {
+  case GlobalValue::ExternalLinkage: break;
+  case GlobalValue::PrivateLinkage:       Out << "private ";        break;
+  case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    Out << "linker_private_weak ";
+    break;
+  case GlobalValue::InternalLinkage:      Out << "internal ";       break;
+  case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
+  case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
+    Out << "linkonce_odr_auto_hide ";
+    break;
+  case GlobalValue::WeakAnyLinkage:       Out << "weak ";           break;
+  case GlobalValue::WeakODRLinkage:       Out << "weak_odr ";       break;
+  case GlobalValue::CommonLinkage:        Out << "common ";         break;
+  case GlobalValue::AppendingLinkage:     Out << "appending ";      break;
+  case GlobalValue::DLLImportLinkage:     Out << "dllimport ";      break;
+  case GlobalValue::DLLExportLinkage:     Out << "dllexport ";      break;
+  case GlobalValue::ExternalWeakLinkage:  Out << "extern_weak ";    break;
+  case GlobalValue::AvailableExternallyLinkage:
+    Out << "available_externally ";
+    break;
+  }
+}
+
+
+static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
+                            formatted_raw_ostream &Out) {
+  switch (Vis) {
+  case GlobalValue::DefaultVisibility: break;
+  case GlobalValue::HiddenVisibility:    Out << "hidden "; break;
+  case GlobalValue::ProtectedVisibility: Out << "protected "; break;
+  }
+}
+
+static void PrintThreadLocalModel(GlobalVariable::ThreadLocalMode TLM,
+                                  formatted_raw_ostream &Out) {
+  switch (TLM) {
+    case GlobalVariable::NotThreadLocal:
+      break;
+    case GlobalVariable::GeneralDynamicTLSModel:
+      Out << "thread_local ";
+      break;
+    case GlobalVariable::LocalDynamicTLSModel:
+      Out << "thread_local(localdynamic) ";
+      break;
+    case GlobalVariable::InitialExecTLSModel:
+      Out << "thread_local(initialexec) ";
+      break;
+    case GlobalVariable::LocalExecTLSModel:
+      Out << "thread_local(localexec) ";
+      break;
+  }
+}
+
+void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+  if (GV->isMaterializable())
+    Out << "; Materializable\n";
+
+  WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+  Out << " = ";
+
+  if (!GV->hasInitializer() && GV->hasExternalLinkage())
+    Out << "external ";
+
+  PrintLinkage(GV->getLinkage(), Out);
+  PrintVisibility(GV->getVisibility(), Out);
+  PrintThreadLocalModel(GV->getThreadLocalMode(), Out);
+
+  if (unsigned AddressSpace = GV->getType()->getAddressSpace())
+    Out << "addrspace(" << AddressSpace << ") ";
+  if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
+  if (GV->isExternallyInitialized()) Out << "externally_initialized ";
+  Out << (GV->isConstant() ? "constant " : "global ");
+  TypePrinter.print(GV->getType()->getElementType(), Out);
+
+  if (GV->hasInitializer()) {
+    Out << ' ';
+    writeOperand(GV->getInitializer(), false);
+  }
+
+  if (GV->hasSection()) {
+    Out << ", section \"";
+    PrintEscapedString(GV->getSection(), Out);
+    Out << '"';
+  }
+  if (GV->getAlignment())
+    Out << ", align " << GV->getAlignment();
+
+  printInfoComment(*GV);
+}
+
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+  if (GA->isMaterializable())
+    Out << "; Materializable\n";
+
+  // Don't crash when dumping partially built GA
+  if (!GA->hasName())
+    Out << "<<nameless>> = ";
+  else {
+    PrintLLVMName(Out, GA);
+    Out << " = ";
+  }
+  PrintVisibility(GA->getVisibility(), Out);
+
+  Out << "alias ";
+
+  PrintLinkage(GA->getLinkage(), Out);
+
+  const Constant *Aliasee = GA->getAliasee();
+
+  if (Aliasee == 0) {
+    TypePrinter.print(GA->getType(), Out);
+    Out << " <<NULL ALIASEE>>";
+  } else {
+    writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
+  }
+
+  printInfoComment(*GA);
+  Out << '\n';
+}
+
+void AssemblyWriter::printTypeIdentities() {
+  if (TypePrinter.NumberedTypes.empty() &&
+      TypePrinter.NamedTypes.empty())
+    return;
+
+  Out << '\n';
+
+  // We know all the numbers that each type is used and we know that it is a
+  // dense assignment.  Convert the map to an index table.
+  std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
+  for (DenseMap<StructType*, unsigned>::iterator I =
+       TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
+       I != E; ++I) {
+    assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
+    NumberedTypes[I->second] = I->first;
+  }
+
+  // Emit all numbered types.
+  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
+    Out << '%' << i << " = type ";
+
+    // Make sure we print out at least one level of the type structure, so
+    // that we do not get %2 = type %2
+    TypePrinter.printStructBody(NumberedTypes[i], Out);
+    Out << '\n';
+  }
+
+  for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
+    PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
+    Out << " = type ";
+
+    // Make sure we print out at least one level of the type structure, so
+    // that we do not get %FILE = type %FILE
+    TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
+    Out << '\n';
+  }
+}
+
+/// printFunction - Print all aspects of a function.
+///
+void AssemblyWriter::printFunction(const Function *F) {
+  // Print out the return type and name.
+  Out << '\n';
+
+  if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
+
+  if (F->isMaterializable())
+    Out << "; Materializable\n";
+
+  if (F->isDeclaration())
+    Out << "declare ";
+  else
+    Out << "define ";
+
+  PrintLinkage(F->getLinkage(), Out);
+  PrintVisibility(F->getVisibility(), Out);
+
+  // Print the calling convention.
+  if (F->getCallingConv() != CallingConv::C) {
+    PrintCallingConv(F->getCallingConv(), Out);
+    Out << " ";
+  }
+
+  FunctionType *FT = F->getFunctionType();
+  const AttributeSet &Attrs = F->getAttributes();
+  if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+    Out <<  Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
+  TypePrinter.print(F->getReturnType(), Out);
+  Out << ' ';
+  WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+  Out << '(';
+  Machine.incorporateFunction(F);
+
+  // Loop over the arguments, printing them...
+
+  unsigned Idx = 1;
+  if (!F->isDeclaration()) {
+    // If this isn't a declaration, print the argument names as well.
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I) {
+      // Insert commas as we go... the first arg doesn't get a comma
+      if (I != F->arg_begin()) Out << ", ";
+      printArgument(I, Attrs, Idx);
+      Idx++;
+    }
+  } else {
+    // Otherwise, print the types from the function type.
+    for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+      // Insert commas as we go... the first arg doesn't get a comma
+      if (i) Out << ", ";
+
+      // Output type...
+      TypePrinter.print(FT->getParamType(i), Out);
+
+      if (Attrs.hasAttributes(i+1))
+        Out << ' ' << Attrs.getAsString(i+1);
+    }
+  }
+
+  // Finish printing arguments...
+  if (FT->isVarArg()) {
+    if (FT->getNumParams()) Out << ", ";
+    Out << "...";  // Output varargs portion of signature!
+  }
+  Out << ')';
+  if (F->hasUnnamedAddr())
+    Out << " unnamed_addr";
+  if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+    Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
+  if (F->hasSection()) {
+    Out << " section \"";
+    PrintEscapedString(F->getSection(), Out);
+    Out << '"';
+  }
+  if (F->getAlignment())
+    Out << " align " << F->getAlignment();
+  if (F->hasGC())
+    Out << " gc \"" << F->getGC() << '"';
+  if (F->isDeclaration()) {
+    Out << '\n';
+  } else {
+    Out << " {";
+    // Output all of the function's basic blocks.
+    for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
+      printBasicBlock(I);
+
+    Out << "}\n";
+  }
+
+  Machine.purgeFunction();
+}
+
+/// printArgument - This member is called for every argument that is passed into
+/// the function.  Simply print it out
+///
+void AssemblyWriter::printArgument(const Argument *Arg,
+                                   AttributeSet Attrs, unsigned Idx) {
+  // Output type...
+  TypePrinter.print(Arg->getType(), Out);
+
+  // Output parameter attributes list
+  if (Attrs.hasAttributes(Idx))
+    Out << ' ' << Attrs.getAsString(Idx);
+
+  // Output name, if available...
+  if (Arg->hasName()) {
+    Out << ' ';
+    PrintLLVMName(Out, Arg);
+  }
+}
+
+/// printBasicBlock - This member is called for each basic block in a method.
+///
+void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+  if (BB->hasName()) {              // Print out the label if it exists...
+    Out << "\n";
+    PrintLLVMName(Out, BB->getName(), LabelPrefix);
+    Out << ':';
+  } else if (!BB->use_empty()) {      // Don't print block # of no uses...
+    Out << "\n; <label>:";
+    int Slot = Machine.getLocalSlot(BB);
+    if (Slot != -1)
+      Out << Slot;
+    else
+      Out << "<badref>";
+  }
+
+  if (BB->getParent() == 0) {
+    Out.PadToColumn(50);
+    Out << "; Error: Block without parent!";
+  } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
+    // Output predecessors for the block.
+    Out.PadToColumn(50);
+    Out << ";";
+    const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+    if (PI == PE) {
+      Out << " No predecessors!";
+    } else {
+      Out << " preds = ";
+      writeOperand(*PI, false);
+      for (++PI; PI != PE; ++PI) {
+        Out << ", ";
+        writeOperand(*PI, false);
+      }
+    }
+  }
+
+  Out << "\n";
+
+  if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
+
+  // Output all of the instructions in the basic block...
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    printInstruction(*I);
+    Out << '\n';
+  }
+
+  if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
+}
+
+/// printInfoComment - Print a little comment after the instruction indicating
+/// which slot it occupies.
+///
+void AssemblyWriter::printInfoComment(const Value &V) {
+  if (AnnotationWriter) {
+    AnnotationWriter->printInfoComment(V, Out);
+    return;
+  }
+}
+
+// This member is called for each Instruction in a function..
+void AssemblyWriter::printInstruction(const Instruction &I) {
+  if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
+
+  // Print out indentation for an instruction.
+  Out << "  ";
+
+  // Print out name if it exists...
+  if (I.hasName()) {
+    PrintLLVMName(Out, &I);
+    Out << " = ";
+  } else if (!I.getType()->isVoidTy()) {
+    // Print out the def slot taken.
+    int SlotNum = Machine.getLocalSlot(&I);
+    if (SlotNum == -1)
+      Out << "<badref> = ";
+    else
+      Out << '%' << SlotNum << " = ";
+  }
+
+  if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall())
+    Out << "tail ";
+
+  // Print out the opcode...
+  Out << I.getOpcodeName();
+
+  // If this is an atomic load or store, print out the atomic marker.
+  if ((isa<LoadInst>(I)  && cast<LoadInst>(I).isAtomic()) ||
+      (isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic()))
+    Out << " atomic";
+
+  // If this is a volatile operation, print out the volatile marker.
+  if ((isa<LoadInst>(I)  && cast<LoadInst>(I).isVolatile()) ||
+      (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) ||
+      (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isVolatile()) ||
+      (isa<AtomicRMWInst>(I) && cast<AtomicRMWInst>(I).isVolatile()))
+    Out << " volatile";
+
+  // Print out optimization information.
+  WriteOptimizationInfo(Out, &I);
+
+  // Print out the compare instruction predicates
+  if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
+    Out << ' ' << getPredicateText(CI->getPredicate());
+
+  // Print out the atomicrmw operation
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
+    writeAtomicRMWOperation(Out, RMWI->getOperation());
+
+  // Print out the type of the operands...
+  const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+
+  // Special case conditional branches to swizzle the condition out to the front
+  if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
+    const BranchInst &BI(cast<BranchInst>(I));
+    Out << ' ';
+    writeOperand(BI.getCondition(), true);
+    Out << ", ";
+    writeOperand(BI.getSuccessor(0), true);
+    Out << ", ";
+    writeOperand(BI.getSuccessor(1), true);
+
+  } else if (isa<SwitchInst>(I)) {
+    const SwitchInst& SI(cast<SwitchInst>(I));
+    // Special case switch instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(SI.getCondition(), true);
+    Out << ", ";
+    writeOperand(SI.getDefaultDest(), true);
+    Out << " [";
+    for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+         i != e; ++i) {
+      Out << "\n    ";
+      writeOperand(i.getCaseValue(), true);
+      Out << ", ";
+      writeOperand(i.getCaseSuccessor(), true);
+    }
+    Out << "\n  ]";
+  } else if (isa<IndirectBrInst>(I)) {
+    // Special case indirectbr instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(Operand, true);
+    Out << ", [";
+
+    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+      if (i != 1)
+        Out << ", ";
+      writeOperand(I.getOperand(i), true);
+    }
+    Out << ']';
+  } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
+    Out << ' ';
+    TypePrinter.print(I.getType(), Out);
+    Out << ' ';
+
+    for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
+      if (op) Out << ", ";
+      Out << "[ ";
+      writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+      writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
+    }
+  } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
+    Out << ' ';
+    writeOperand(I.getOperand(0), true);
+    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+      Out << ", " << *i;
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
+    Out << ' ';
+    writeOperand(I.getOperand(0), true); Out << ", ";
+    writeOperand(I.getOperand(1), true);
+    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+      Out << ", " << *i;
+  } else if (const LandingPadInst *LPI = dyn_cast<LandingPadInst>(&I)) {
+    Out << ' ';
+    TypePrinter.print(I.getType(), Out);
+    Out << " personality ";
+    writeOperand(I.getOperand(0), true); Out << '\n';
+
+    if (LPI->isCleanup())
+      Out << "          cleanup";
+
+    for (unsigned i = 0, e = LPI->getNumClauses(); i != e; ++i) {
+      if (i != 0 || LPI->isCleanup()) Out << "\n";
+      if (LPI->isCatch(i))
+        Out << "          catch ";
+      else
+        Out << "          filter ";
+
+      writeOperand(LPI->getClause(i), true);
+    }
+  } else if (isa<ReturnInst>(I) && !Operand) {
+    Out << " void";
+  } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+    // Print the calling convention being used.
+    if (CI->getCallingConv() != CallingConv::C) {
+      Out << " ";
+      PrintCallingConv(CI->getCallingConv(), Out);
+    }
+
+    Operand = CI->getCalledValue();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
+    const AttributeSet &PAL = CI->getAttributes();
+
+    if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+      Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex);
+
+    // If possible, print out the short form of the call instruction.  We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    if (!FTy->isVarArg() &&
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+      TypePrinter.print(RetTy, Out);
+      Out << ' ';
+      writeOperand(Operand, false);
+    } else {
+      writeOperand(Operand, true);
+    }
+    Out << '(';
+    for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+      if (op > 0)
+        Out << ", ";
+      writeParamOperand(CI->getArgOperand(op), PAL, op + 1);
+    }
+    Out << ')';
+    if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+      Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+    Operand = II->getCalledValue();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
+    const AttributeSet &PAL = II->getAttributes();
+
+    // Print the calling convention being used.
+    if (II->getCallingConv() != CallingConv::C) {
+      Out << " ";
+      PrintCallingConv(II->getCallingConv(), Out);
+    }
+
+    if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+      Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex);
+
+    // If possible, print out the short form of the invoke instruction. We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    if (!FTy->isVarArg() &&
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+      TypePrinter.print(RetTy, Out);
+      Out << ' ';
+      writeOperand(Operand, false);
+    } else {
+      writeOperand(Operand, true);
+    }
+    Out << '(';
+    for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+      if (op)
+        Out << ", ";
+      writeParamOperand(II->getArgOperand(op), PAL, op + 1);
+    }
+
+    Out << ')';
+    if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+      Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+
+    Out << "\n          to ";
+    writeOperand(II->getNormalDest(), true);
+    Out << " unwind ";
+    writeOperand(II->getUnwindDest(), true);
+
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+    Out << ' ';
+    TypePrinter.print(AI->getAllocatedType(), Out);
+    if (!AI->getArraySize() || AI->isArrayAllocation()) {
+      Out << ", ";
+      writeOperand(AI->getArraySize(), true);
+    }
+    if (AI->getAlignment()) {
+      Out << ", align " << AI->getAlignment();
+    }
+  } else if (isa<CastInst>(I)) {
+    if (Operand) {
+      Out << ' ';
+      writeOperand(Operand, true);   // Work with broken code
+    }
+    Out << " to ";
+    TypePrinter.print(I.getType(), Out);
+  } else if (isa<VAArgInst>(I)) {
+    if (Operand) {
+      Out << ' ';
+      writeOperand(Operand, true);   // Work with broken code
+    }
+    Out << ", ";
+    TypePrinter.print(I.getType(), Out);
+  } else if (Operand) {   // Print the normal way.
+
+    // PrintAllTypes - Instructions who have operands of all the same type
+    // omit the type from all but the first operand.  If the instruction has
+    // different type operands (for example br), then they are all printed.
+    bool PrintAllTypes = false;
+    Type *TheType = Operand->getType();
+
+    // Select, Store and ShuffleVector always print all types.
+    if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
+        || isa<ReturnInst>(I)) {
+      PrintAllTypes = true;
+    } else {
+      for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
+        Operand = I.getOperand(i);
+        // note that Operand shouldn't be null, but the test helps make dump()
+        // more tolerant of malformed IR
+        if (Operand && Operand->getType() != TheType) {
+          PrintAllTypes = true;    // We have differing types!  Print them all!
+          break;
+        }
+      }
+    }
+
+    if (!PrintAllTypes) {
+      Out << ' ';
+      TypePrinter.print(TheType, Out);
+    }
+
+    Out << ' ';
+    for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
+      if (i) Out << ", ";
+      writeOperand(I.getOperand(i), PrintAllTypes);
+    }
+  }
+
+  // Print atomic ordering/alignment for memory operations
+  if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+    if (LI->isAtomic())
+      writeAtomic(LI->getOrdering(), LI->getSynchScope());
+    if (LI->getAlignment())
+      Out << ", align " << LI->getAlignment();
+  } else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+    if (SI->isAtomic())
+      writeAtomic(SI->getOrdering(), SI->getSynchScope());
+    if (SI->getAlignment())
+      Out << ", align " << SI->getAlignment();
+  } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+    writeAtomic(CXI->getOrdering(), CXI->getSynchScope());
+  } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
+    writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
+  } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
+    writeAtomic(FI->getOrdering(), FI->getSynchScope());
+  }
+
+  // Print Metadata info.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+  I.getAllMetadata(InstMD);
+  if (!InstMD.empty()) {
+    SmallVector<StringRef, 8> MDNames;
+    I.getType()->getContext().getMDKindNames(MDNames);
+    for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+      unsigned Kind = InstMD[i].first;
+       if (Kind < MDNames.size()) {
+         Out << ", !" << MDNames[Kind];
+      } else {
+        Out << ", !<unknown kind #" << Kind << ">";
+      }
+      Out << ' ';
+      WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+                             TheModule);
+    }
+  }
+  printInfoComment(I);
+}
+
+static void WriteMDNodeComment(const MDNode *Node,
+                               formatted_raw_ostream &Out) {
+  if (Node->getNumOperands() < 1)
+    return;
+
+  Value *Op = Node->getOperand(0);
+  if (!Op || !isa<ConstantInt>(Op) || cast<ConstantInt>(Op)->getBitWidth() < 32)
+    return;
+
+  DIDescriptor Desc(Node);
+  if (!Desc.Verify())
+    return;
+
+  unsigned Tag = Desc.getTag();
+  Out.PadToColumn(50);
+  if (dwarf::TagString(Tag)) {
+    Out << "; ";
+    Desc.print(Out);
+  } else if (Tag == dwarf::DW_TAG_user_base) {
+    Out << "; [ DW_TAG_user_base ]";
+  }
+}
+
+void AssemblyWriter::writeAllMDNodes() {
+  SmallVector<const MDNode *, 16> Nodes;
+  Nodes.resize(Machine.mdn_size());
+  for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
+       I != E; ++I)
+    Nodes[I->second] = cast<MDNode>(I->first);
+
+  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+    Out << '!' << i << " = metadata ";
+    printMDNodeBody(Nodes[i]);
+  }
+}
+
+void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
+  WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+  WriteMDNodeComment(Node, Out);
+  Out << "\n";
+}
+
+void AssemblyWriter::writeAllAttributeGroups() {
+  std::vector<std::pair<AttributeSet, unsigned> > asVec;
+  asVec.resize(Machine.as_size());
+
+  for (SlotTracker::as_iterator I = Machine.as_begin(), E = Machine.as_end();
+       I != E; ++I)
+    asVec[I->second] = *I;
+
+  for (std::vector<std::pair<AttributeSet, unsigned> >::iterator
+         I = asVec.begin(), E = asVec.end(); I != E; ++I)
+    Out << "attributes #" << I->second << " = { "
+        << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n";
+}
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declarations
+//===----------------------------------------------------------------------===//
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(this);
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, this, AAW);
+  W.printModule(this);
+}
+
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(getParent());
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+  W.printNamedMDNode(this);
+}
+
+void Type::print(raw_ostream &OS) const {
+  if (this == 0) {
+    OS << "<null Type>";
+    return;
+  }
+  TypePrinting TP;
+  TP.print(const_cast<Type*>(this), OS);
+
+  // If the type is a named struct type, print the body as well.
+  if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
+    if (!STy->isLiteral()) {
+      OS << " = type ";
+      TP.printStructBody(STy, OS);
+    }
+}
+
+void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  if (this == 0) {
+    ROS << "printing a <null> value\n";
+    return;
+  }
+  formatted_raw_ostream OS(ROS);
+  if (const Instruction *I = dyn_cast<Instruction>(this)) {
+    const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
+    W.printInstruction(*I);
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
+    SlotTracker SlotTable(BB->getParent());
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
+    W.printBasicBlock(BB);
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    SlotTracker SlotTable(GV->getParent());
+    AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+    if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
+      W.printGlobal(V);
+    else if (const Function *F = dyn_cast<Function>(GV))
+      W.printFunction(F);
+    else
+      W.printAlias(cast<GlobalAlias>(GV));
+  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+    const Function *F = N->getFunction();
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+    W.printMDNodeBody(N);
+  } else if (const Constant *C = dyn_cast<Constant>(this)) {
+    TypePrinting TypePrinter;
+    TypePrinter.print(C->getType(), OS);
+    OS << ' ';
+    WriteConstantInternal(OS, C, TypePrinter, 0, 0);
+  } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
+             isa<Argument>(this)) {
+    WriteAsOperand(OS, this, true, 0);
+  } else {
+    // Otherwise we don't know what it is. Call the virtual function to
+    // allow a subclass to print itself.
+    printCustom(OS);
+  }
+}
+
+// Value::printCustom - subclasses should override this to implement printing.
+void Value::printCustom(raw_ostream &OS) const {
+  llvm_unreachable("Unknown value to print out!");
+}
+
+// Value::dump - allow easy printing of Values from the debugger.
+void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+
+// Type::dump - allow easy printing of Types from the debugger.
+void Type::dump() const { print(dbgs()); }
+
+// Module::dump() - Allow printing of Modules from the debugger.
+void Module::dump() const { print(dbgs(), 0); }
+
+// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
new file mode 100644
index 000000000000..ad2670dade12
--- /dev/null
+++ b/lib/IR/AttributeImpl.h
@@ -0,0 +1,278 @@
+//===-- AttributeImpl.h - Attribute Internals -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines various helper methods and classes used by
+/// LLVMContextImpl for creating and managing attributes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ATTRIBUTESIMPL_H
+#define LLVM_ATTRIBUTESIMPL_H
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/Attributes.h"
+#include <string>
+
+namespace llvm {
+
+class Constant;
+class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief A set of classes that contain the kind and (optional) value of the
+/// attribute object. There are three main categories: enum attribute entries,
+/// represented by Attribute::AttrKind; alignment attribute entries; and string
+/// attribute enties, which are for target-dependent attributes.
+class AttributeEntry {
+  unsigned char KindID;
+protected:
+  enum AttrEntryKind {
+    EnumAttrEntry,
+    AlignAttrEntry,
+    StringAttrEntry
+  };
+public:
+  AttributeEntry(AttrEntryKind Kind)
+    : KindID(Kind) {}
+  virtual ~AttributeEntry() {}
+
+  unsigned getKindID() const { return KindID; }
+
+  static inline bool classof(const AttributeEntry *) { return true; }
+};
+
+class EnumAttributeEntry : public AttributeEntry {
+  Attribute::AttrKind Kind;
+public:
+  EnumAttributeEntry(Attribute::AttrKind Kind)
+    : AttributeEntry(EnumAttrEntry), Kind(Kind) {}
+
+  Attribute::AttrKind getEnumKind() const { return Kind; }
+
+  static inline bool classof(const AttributeEntry *AE) {
+    return AE->getKindID() == EnumAttrEntry;
+  }
+  static inline bool classof(const EnumAttributeEntry *) { return true; }
+};
+
+class AlignAttributeEntry : public AttributeEntry {
+  Attribute::AttrKind Kind;
+  unsigned Align;
+public:
+  AlignAttributeEntry(Attribute::AttrKind Kind, unsigned Align)
+    : AttributeEntry(AlignAttrEntry), Kind(Kind), Align(Align) {}
+
+  Attribute::AttrKind getEnumKind() const { return Kind; }
+  unsigned getAlignment() const { return Align; }
+
+  static inline bool classof(const AttributeEntry *AE) {
+    return AE->getKindID() == AlignAttrEntry;
+  }
+  static inline bool classof(const AlignAttributeEntry *) { return true; }
+};
+
+class StringAttributeEntry : public AttributeEntry {
+  std::string Kind;
+  std::string Val;
+public:
+  StringAttributeEntry(StringRef Kind, StringRef Val = StringRef())
+    : AttributeEntry(StringAttrEntry), Kind(Kind), Val(Val) {}
+
+  StringRef getStringKind() const { return Kind; }
+  StringRef getStringValue() const { return Val; }
+
+  static inline bool classof(const AttributeEntry *AE) {
+    return AE->getKindID() == StringAttrEntry;
+  }
+  static inline bool classof(const StringAttributeEntry *) { return true; }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a single, uniqued attribute. That attribute
+/// could be a single enum, a tuple, or a string.
+class AttributeImpl : public FoldingSetNode {
+  LLVMContext &Context;  ///< Global context for uniquing objects
+
+  AttributeEntry *Entry; ///< Holds the kind and value of the attribute
+
+  // AttributesImpl is uniqued, these should not be publicly available.
+  void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION;
+  AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION;
+public:
+  AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind);
+  AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind, unsigned Align);
+  AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val = StringRef());
+  ~AttributeImpl();
+
+  LLVMContext &getContext() { return Context; }
+
+  bool isEnumAttribute() const;
+  bool isAlignAttribute() const;
+  bool isStringAttribute() const;
+
+  bool hasAttribute(Attribute::AttrKind A) const;
+  bool hasAttribute(StringRef Kind) const;
+
+  Attribute::AttrKind getKindAsEnum() const;
+  uint64_t getValueAsInt() const;
+
+  StringRef getKindAsString() const;
+  StringRef getValueAsString() const;
+
+  /// \brief Used when sorting the attributes.
+  bool operator<(const AttributeImpl &AI) const;
+
+  void Profile(FoldingSetNodeID &ID) const {
+    if (isEnumAttribute())
+      Profile(ID, getKindAsEnum(), 0);
+    else if (isAlignAttribute())
+      Profile(ID, getKindAsEnum(), getValueAsInt());
+    else
+      Profile(ID, getKindAsString(), getValueAsString());
+  }
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      uint64_t Val) {
+    ID.AddInteger(Kind);
+    if (Val) ID.AddInteger(Val);
+  }
+  static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) {
+    ID.AddString(Kind);
+    if (!Values.empty()) ID.AddString(Values);
+  }
+
+  // FIXME: Remove this!
+  static uint64_t getAttrMask(Attribute::AttrKind Val);
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a group of attributes that apply to one
+/// element: function, return type, or parameter.
+class AttributeSetNode : public FoldingSetNode {
+  SmallVector<Attribute, 4> AttrList;
+
+  AttributeSetNode(ArrayRef<Attribute> Attrs)
+    : AttrList(Attrs.begin(), Attrs.end()) {}
+
+  // AttributesSetNode is uniqued, these should not be publicly available.
+  void operator=(const AttributeSetNode &) LLVM_DELETED_FUNCTION;
+  AttributeSetNode(const AttributeSetNode &) LLVM_DELETED_FUNCTION;
+public:
+  static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
+
+  bool hasAttribute(Attribute::AttrKind Kind) const;
+  bool hasAttribute(StringRef Kind) const;
+  bool hasAttributes() const { return !AttrList.empty(); }
+
+  Attribute getAttribute(Attribute::AttrKind Kind) const;
+  Attribute getAttribute(StringRef Kind) const;
+
+  unsigned getAlignment() const;
+  unsigned getStackAlignment() const;
+  std::string getAsString(bool InAttrGrp) const;
+
+  typedef SmallVectorImpl<Attribute>::iterator       iterator;
+  typedef SmallVectorImpl<Attribute>::const_iterator const_iterator;
+
+  iterator begin() { return AttrList.begin(); }
+  iterator end()   { return AttrList.end(); }
+
+  const_iterator begin() const { return AttrList.begin(); }
+  const_iterator end() const   { return AttrList.end(); }
+
+  void Profile(FoldingSetNodeID &ID) const {
+    Profile(ID, AttrList);
+  }
+  static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
+    for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
+      AttrList[I].Profile(ID);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a set of attributes that apply to the function,
+/// return type, and parameters.
+class AttributeSetImpl : public FoldingSetNode {
+  friend class AttributeSet;
+
+  LLVMContext &Context;
+
+  typedef std::pair<unsigned, AttributeSetNode*> IndexAttrPair;
+  SmallVector<IndexAttrPair, 4> AttrNodes;
+
+  // AttributesSet is uniqued, these should not be publicly available.
+  void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION;
+  AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION;
+public:
+  AttributeSetImpl(LLVMContext &C,
+                   ArrayRef<std::pair<unsigned, AttributeSetNode*> > attrs)
+    : Context(C), AttrNodes(attrs.begin(), attrs.end()) {}
+
+  /// \brief Get the context that created this AttributeSetImpl.
+  LLVMContext &getContext() { return Context; }
+
+  /// \brief Return the number of attributes this AttributeSet contains.
+  unsigned getNumAttributes() const { return AttrNodes.size(); }
+
+  /// \brief Get the index of the given "slot" in the AttrNodes list. This index
+  /// is the index of the return, parameter, or function object that the
+  /// attributes are applied to, not the index into the AttrNodes list where the
+  /// attributes reside.
+  uint64_t getSlotIndex(unsigned Slot) const {
+    return AttrNodes[Slot].first;
+  }
+
+  /// \brief Retrieve the attributes for the given "slot" in the AttrNode list.
+  /// \p Slot is an index into the AttrNodes list, not the index of the return /
+  /// parameter/ function which the attributes apply to.
+  AttributeSet getSlotAttributes(unsigned Slot) const {
+    return AttributeSet::get(Context, AttrNodes[Slot]);
+  }
+
+  /// \brief Retrieve the attribute set node for the given "slot" in the
+  /// AttrNode list.
+  AttributeSetNode *getSlotNode(unsigned Slot) const {
+    return AttrNodes[Slot].second;
+  }
+
+  typedef AttributeSetNode::iterator       iterator;
+  typedef AttributeSetNode::const_iterator const_iterator;
+
+  iterator begin(unsigned Idx)
+    { return AttrNodes[Idx].second->begin(); }
+  iterator end(unsigned Idx)
+    { return AttrNodes[Idx].second->end(); }
+
+  const_iterator begin(unsigned Idx) const
+    { return AttrNodes[Idx].second->begin(); }
+  const_iterator end(unsigned Idx) const
+    { return AttrNodes[Idx].second->end(); }
+
+  void Profile(FoldingSetNodeID &ID) const {
+    Profile(ID, AttrNodes);
+  }
+  static void Profile(FoldingSetNodeID &ID,
+                      ArrayRef<std::pair<unsigned, AttributeSetNode*> > Nodes) {
+    for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+      ID.AddInteger(Nodes[i].first);
+      ID.AddPointer(Nodes[i].second);
+    }
+  }
+
+  // FIXME: This atrocity is temporary.
+  uint64_t Raw(uint64_t Index) const;
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
new file mode 100644
index 000000000000..2d828914cdca
--- /dev/null
+++ b/lib/IR/Attributes.cpp
@@ -0,0 +1,1180 @@
+//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// \brief This file implements the Attribute, AttributeImpl, AttrBuilder,
+// AttributeSetImpl, and AttributeSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "AttributeImpl.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Attribute Construction Methods
+//===----------------------------------------------------------------------===//
+
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+                         uint64_t Val) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  ID.AddInteger(Kind);
+  if (Val) ID.AddInteger(Val);
+
+  void *InsertPoint;
+  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (!PA) {
+    // If we didn't find any existing attributes of the same shape then create a
+    // new one and insert it.
+    PA = !Val ?
+      new AttributeImpl(Context, Kind) :
+      new AttributeImpl(Context, Kind, Val);
+    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the Attribute that we found or created.
+  return Attribute(PA);
+}
+
+Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  ID.AddString(Kind);
+  if (!Val.empty()) ID.AddString(Val);
+
+  void *InsertPoint;
+  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (!PA) {
+    // If we didn't find any existing attributes of the same shape then create a
+    // new one and insert it.
+    PA = new AttributeImpl(Context, Kind, Val);
+    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the Attribute that we found or created.
+  return Attribute(PA);
+}
+
+Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
+  assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+  assert(Align <= 0x40000000 && "Alignment too large.");
+  return get(Context, Alignment, Align);
+}
+
+Attribute Attribute::getWithStackAlignment(LLVMContext &Context,
+                                           uint64_t Align) {
+  assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+  assert(Align <= 0x100 && "Alignment too large.");
+  return get(Context, StackAlignment, Align);
+}
+
+//===----------------------------------------------------------------------===//
+// Attribute Accessor Methods
+//===----------------------------------------------------------------------===//
+
+bool Attribute::isEnumAttribute() const {
+  return pImpl && pImpl->isEnumAttribute();
+}
+
+bool Attribute::isAlignAttribute() const {
+  return pImpl && pImpl->isAlignAttribute();
+}
+
+bool Attribute::isStringAttribute() const {
+  return pImpl && pImpl->isStringAttribute();
+}
+
+Attribute::AttrKind Attribute::getKindAsEnum() const {
+  assert((isEnumAttribute() || isAlignAttribute()) &&
+         "Invalid attribute type to get the kind as an enum!");
+  return pImpl ? pImpl->getKindAsEnum() : None;
+}
+
+uint64_t Attribute::getValueAsInt() const {
+  assert(isAlignAttribute() &&
+         "Expected the attribute to be an alignment attribute!");
+  return pImpl ? pImpl->getValueAsInt() : 0;
+}
+
+StringRef Attribute::getKindAsString() const {
+  assert(isStringAttribute() &&
+         "Invalid attribute type to get the kind as a string!");
+  return pImpl ? pImpl->getKindAsString() : StringRef();
+}
+
+StringRef Attribute::getValueAsString() const {
+  assert(isStringAttribute() &&
+         "Invalid attribute type to get the value as a string!");
+  return pImpl ? pImpl->getValueAsString() : StringRef();
+}
+
+bool Attribute::hasAttribute(AttrKind Kind) const {
+  return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
+}
+
+bool Attribute::hasAttribute(StringRef Kind) const {
+  if (!isStringAttribute()) return false;
+  return pImpl && pImpl->hasAttribute(Kind);
+}
+
+/// This returns the alignment field of an attribute as a byte alignment value.
+unsigned Attribute::getAlignment() const {
+  assert(hasAttribute(Attribute::Alignment) &&
+         "Trying to get alignment from non-alignment attribute!");
+  return pImpl->getValueAsInt();
+}
+
+/// This returns the stack alignment field of an attribute as a byte alignment
+/// value.
+unsigned Attribute::getStackAlignment() const {
+  assert(hasAttribute(Attribute::StackAlignment) &&
+         "Trying to get alignment from non-alignment attribute!");
+  return pImpl->getValueAsInt();
+}
+
+std::string Attribute::getAsString(bool InAttrGrp) const {
+  if (!pImpl) return "";
+
+  if (hasAttribute(Attribute::SanitizeAddress))
+    return "sanitize_address";
+  if (hasAttribute(Attribute::AlwaysInline))
+    return "alwaysinline";
+  if (hasAttribute(Attribute::ByVal))
+    return "byval";
+  if (hasAttribute(Attribute::InlineHint))
+    return "inlinehint";
+  if (hasAttribute(Attribute::InReg))
+    return "inreg";
+  if (hasAttribute(Attribute::MinSize))
+    return "minsize";
+  if (hasAttribute(Attribute::Naked))
+    return "naked";
+  if (hasAttribute(Attribute::Nest))
+    return "nest";
+  if (hasAttribute(Attribute::NoAlias))
+    return "noalias";
+  if (hasAttribute(Attribute::NoBuiltin))
+    return "nobuiltin";
+  if (hasAttribute(Attribute::NoCapture))
+    return "nocapture";
+  if (hasAttribute(Attribute::NoDuplicate))
+    return "noduplicate";
+  if (hasAttribute(Attribute::NoImplicitFloat))
+    return "noimplicitfloat";
+  if (hasAttribute(Attribute::NoInline))
+    return "noinline";
+  if (hasAttribute(Attribute::NonLazyBind))
+    return "nonlazybind";
+  if (hasAttribute(Attribute::NoRedZone))
+    return "noredzone";
+  if (hasAttribute(Attribute::NoReturn))
+    return "noreturn";
+  if (hasAttribute(Attribute::NoUnwind))
+    return "nounwind";
+  if (hasAttribute(Attribute::OptimizeForSize))
+    return "optsize";
+  if (hasAttribute(Attribute::ReadNone))
+    return "readnone";
+  if (hasAttribute(Attribute::ReadOnly))
+    return "readonly";
+  if (hasAttribute(Attribute::ReturnsTwice))
+    return "returns_twice";
+  if (hasAttribute(Attribute::SExt))
+    return "signext";
+  if (hasAttribute(Attribute::StackProtect))
+    return "ssp";
+  if (hasAttribute(Attribute::StackProtectReq))
+    return "sspreq";
+  if (hasAttribute(Attribute::StackProtectStrong))
+    return "sspstrong";
+  if (hasAttribute(Attribute::StructRet))
+    return "sret";
+  if (hasAttribute(Attribute::SanitizeThread))
+    return "sanitize_thread";
+  if (hasAttribute(Attribute::SanitizeMemory))
+    return "sanitize_memory";
+  if (hasAttribute(Attribute::UWTable))
+    return "uwtable";
+  if (hasAttribute(Attribute::ZExt))
+    return "zeroext";
+
+  // FIXME: These should be output like this:
+  //
+  //   align=4
+  //   alignstack=8
+  //
+  if (hasAttribute(Attribute::Alignment)) {
+    std::string Result;
+    Result += "align";
+    Result += (InAttrGrp) ? "=" : " ";
+    Result += utostr(getValueAsInt());
+    return Result;
+  }
+
+  if (hasAttribute(Attribute::StackAlignment)) {
+    std::string Result;
+    Result += "alignstack";
+    if (InAttrGrp) {
+      Result += "=";
+      Result += utostr(getValueAsInt());
+    } else {
+      Result += "(";
+      Result += utostr(getValueAsInt());
+      Result += ")";
+    }
+    return Result;
+  }
+
+  // Convert target-dependent attributes to strings of the form:
+  //
+  //   "kind"
+  //   "kind" = "value"
+  //
+  if (isStringAttribute()) {
+    std::string Result;
+    Result += '\"' + getKindAsString().str() + '"';
+
+    StringRef Val = pImpl->getValueAsString();
+    if (Val.empty()) return Result;
+
+    Result += "=\"" + Val.str() + '"';
+    return Result;
+  }
+
+  llvm_unreachable("Unknown attribute");
+}
+
+bool Attribute::operator<(Attribute A) const {
+  if (!pImpl && !A.pImpl) return false;
+  if (!pImpl) return true;
+  if (!A.pImpl) return false;
+  return *pImpl < *A.pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeImpl Definition
+//===----------------------------------------------------------------------===//
+
+AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind)
+  : Context(C), Entry(new EnumAttributeEntry(Kind)) {}
+
+AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind,
+                             unsigned Align)
+  : Context(C) {
+  assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) &&
+         "Wrong kind for alignment attribute!");
+  Entry = new AlignAttributeEntry(Kind, Align);
+}
+
+AttributeImpl::AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val)
+  : Context(C), Entry(new StringAttributeEntry(Kind, Val)) {}
+
+AttributeImpl::~AttributeImpl() {
+  delete Entry;
+}
+
+bool AttributeImpl::isEnumAttribute() const {
+  return isa<EnumAttributeEntry>(Entry);
+}
+
+bool AttributeImpl::isAlignAttribute() const {
+  return isa<AlignAttributeEntry>(Entry);
+}
+
+bool AttributeImpl::isStringAttribute() const {
+  return isa<StringAttributeEntry>(Entry);
+}
+
+bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
+  if (isStringAttribute()) return false;
+  return getKindAsEnum() == A;
+}
+
+bool AttributeImpl::hasAttribute(StringRef Kind) const {
+  if (!isStringAttribute()) return false;
+  return getKindAsString() == Kind;
+}
+
+Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
+  if (EnumAttributeEntry *E = dyn_cast<EnumAttributeEntry>(Entry))
+    return E->getEnumKind();
+  return cast<AlignAttributeEntry>(Entry)->getEnumKind();
+}
+
+uint64_t AttributeImpl::getValueAsInt() const {
+  return cast<AlignAttributeEntry>(Entry)->getAlignment();
+}
+
+StringRef AttributeImpl::getKindAsString() const {
+  return cast<StringAttributeEntry>(Entry)->getStringKind();
+}
+
+StringRef AttributeImpl::getValueAsString() const {
+  return cast<StringAttributeEntry>(Entry)->getStringValue();
+}
+
+bool AttributeImpl::operator<(const AttributeImpl &AI) const {
+  // This sorts the attributes with Attribute::AttrKinds coming first (sorted
+  // relative to their enum value) and then strings.
+  if (isEnumAttribute()) {
+    if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
+    if (AI.isAlignAttribute()) return true;
+    if (AI.isStringAttribute()) return true;
+  }
+
+  if (isAlignAttribute()) {
+    if (AI.isEnumAttribute()) return false;
+    if (AI.isAlignAttribute()) return getValueAsInt() < AI.getValueAsInt();
+    if (AI.isStringAttribute()) return true;
+  }
+
+  if (AI.isEnumAttribute()) return false;
+  if (AI.isAlignAttribute()) return false;
+  if (getKindAsString() == AI.getKindAsString())
+    return getValueAsString() < AI.getValueAsString();
+  return getKindAsString() < AI.getKindAsString();
+}
+
+uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
+  // FIXME: Remove this.
+  switch (Val) {
+  case Attribute::EndAttrKinds:
+    llvm_unreachable("Synthetic enumerators which should never get here");
+
+  case Attribute::None:            return 0;
+  case Attribute::ZExt:            return 1 << 0;
+  case Attribute::SExt:            return 1 << 1;
+  case Attribute::NoReturn:        return 1 << 2;
+  case Attribute::InReg:           return 1 << 3;
+  case Attribute::StructRet:       return 1 << 4;
+  case Attribute::NoUnwind:        return 1 << 5;
+  case Attribute::NoAlias:         return 1 << 6;
+  case Attribute::ByVal:           return 1 << 7;
+  case Attribute::Nest:            return 1 << 8;
+  case Attribute::ReadNone:        return 1 << 9;
+  case Attribute::ReadOnly:        return 1 << 10;
+  case Attribute::NoInline:        return 1 << 11;
+  case Attribute::AlwaysInline:    return 1 << 12;
+  case Attribute::OptimizeForSize: return 1 << 13;
+  case Attribute::StackProtect:    return 1 << 14;
+  case Attribute::StackProtectReq: return 1 << 15;
+  case Attribute::Alignment:       return 31 << 16;
+  case Attribute::NoCapture:       return 1 << 21;
+  case Attribute::NoRedZone:       return 1 << 22;
+  case Attribute::NoImplicitFloat: return 1 << 23;
+  case Attribute::Naked:           return 1 << 24;
+  case Attribute::InlineHint:      return 1 << 25;
+  case Attribute::StackAlignment:  return 7 << 26;
+  case Attribute::ReturnsTwice:    return 1 << 29;
+  case Attribute::UWTable:         return 1 << 30;
+  case Attribute::NonLazyBind:     return 1U << 31;
+  case Attribute::SanitizeAddress: return 1ULL << 32;
+  case Attribute::MinSize:         return 1ULL << 33;
+  case Attribute::NoDuplicate:     return 1ULL << 34;
+  case Attribute::StackProtectStrong: return 1ULL << 35;
+  case Attribute::SanitizeThread:  return 1ULL << 36;
+  case Attribute::SanitizeMemory:  return 1ULL << 37;
+  case Attribute::NoBuiltin:       return 1ULL << 38;
+  }
+  llvm_unreachable("Unsupported attribute type");
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSetNode Definition
+//===----------------------------------------------------------------------===//
+
+AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
+                                        ArrayRef<Attribute> Attrs) {
+  if (Attrs.empty())
+    return 0;
+
+  // Otherwise, build a key to look up the existing attributes.
+  LLVMContextImpl *pImpl = C.pImpl;
+  FoldingSetNodeID ID;
+
+  SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
+  array_pod_sort(SortedAttrs.begin(), SortedAttrs.end());
+
+  for (SmallVectorImpl<Attribute>::iterator I = SortedAttrs.begin(),
+         E = SortedAttrs.end(); I != E; ++I)
+    I->Profile(ID);
+
+  void *InsertPoint;
+  AttributeSetNode *PA =
+    pImpl->AttrsSetNodes.FindNodeOrInsertPos(ID, InsertPoint);
+
+  // If we didn't find any existing attributes of the same shape then create a
+  // new one and insert it.
+  if (!PA) {
+    PA = new AttributeSetNode(SortedAttrs);
+    pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the AttributesListNode that we found or created.
+  return PA;
+}
+
+bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const {
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ++I)
+    if (I->hasAttribute(Kind))
+      return true;
+  return false;
+}
+
+bool AttributeSetNode::hasAttribute(StringRef Kind) const {
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ++I)
+    if (I->hasAttribute(Kind))
+      return true;
+  return false;
+}
+
+Attribute AttributeSetNode::getAttribute(Attribute::AttrKind Kind) const {
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ++I)
+    if (I->hasAttribute(Kind))
+      return *I;
+  return Attribute();
+}
+
+Attribute AttributeSetNode::getAttribute(StringRef Kind) const {
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ++I)
+    if (I->hasAttribute(Kind))
+      return *I;
+  return Attribute();
+}
+
+unsigned AttributeSetNode::getAlignment() const {
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ++I)
+    if (I->hasAttribute(Attribute::Alignment))
+      return I->getAlignment();
+  return 0;
+}
+
+unsigned AttributeSetNode::getStackAlignment() const {
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ++I)
+    if (I->hasAttribute(Attribute::StackAlignment))
+      return I->getStackAlignment();
+  return 0;
+}
+
+std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
+  std::string Str = "";
+  for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
+         E = AttrList.end(); I != E; ) {
+    Str += I->getAsString(InAttrGrp);
+    if (++I != E) Str += " ";
+  }
+  return Str;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSetImpl Definition
+//===----------------------------------------------------------------------===//
+
+uint64_t AttributeSetImpl::Raw(uint64_t Index) const {
+  for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) {
+    if (getSlotIndex(I) != Index) continue;
+    const AttributeSetNode *ASN = AttrNodes[I].second;
+    uint64_t Mask = 0;
+
+    for (AttributeSetNode::const_iterator II = ASN->begin(),
+           IE = ASN->end(); II != IE; ++II) {
+      Attribute Attr = *II;
+
+      // This cannot handle string attributes.
+      if (Attr.isStringAttribute()) continue;
+
+      Attribute::AttrKind Kind = Attr.getKindAsEnum();
+
+      if (Kind == Attribute::Alignment)
+        Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16;
+      else if (Kind == Attribute::StackAlignment)
+        Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26;
+      else
+        Mask |= AttributeImpl::getAttrMask(Kind);
+    }
+
+    return Mask;
+  }
+
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSet Construction and Mutation Methods
+//===----------------------------------------------------------------------===//
+
+AttributeSet
+AttributeSet::getImpl(LLVMContext &C,
+                      ArrayRef<std::pair<unsigned, AttributeSetNode*> > Attrs) {
+  LLVMContextImpl *pImpl = C.pImpl;
+  FoldingSetNodeID ID;
+  AttributeSetImpl::Profile(ID, Attrs);
+
+  void *InsertPoint;
+  AttributeSetImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID, InsertPoint);
+
+  // If we didn't find any existing attributes of the same shape then
+  // create a new one and insert it.
+  if (!PA) {
+    PA = new AttributeSetImpl(C, Attrs);
+    pImpl->AttrsLists.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the AttributesList that we found or created.
+  return AttributeSet(PA);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C,
+                               ArrayRef<std::pair<unsigned, Attribute> > Attrs){
+  // If there are no attributes then return a null AttributesList pointer.
+  if (Attrs.empty())
+    return AttributeSet();
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+    assert((!i || Attrs[i-1].first <= Attrs[i].first) &&
+           "Misordered Attributes list!");
+    assert(!Attrs[i].second.hasAttribute(Attribute::None) &&
+           "Pointless attribute!");
+  }
+#endif
+
+  // Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
+  // list.
+  SmallVector<std::pair<unsigned, AttributeSetNode*>, 8> AttrPairVec;
+  for (ArrayRef<std::pair<unsigned, Attribute> >::iterator I = Attrs.begin(),
+         E = Attrs.end(); I != E; ) {
+    unsigned Index = I->first;
+    SmallVector<Attribute, 4> AttrVec;
+    while (I != E && I->first == Index) {
+      AttrVec.push_back(I->second);
+      ++I;
+    }
+
+    AttrPairVec.push_back(std::make_pair(Index,
+                                         AttributeSetNode::get(C, AttrVec)));
+  }
+
+  return getImpl(C, AttrPairVec);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C,
+                               ArrayRef<std::pair<unsigned,
+                                                  AttributeSetNode*> > Attrs) {
+  // If there are no attributes then return a null AttributesList pointer.
+  if (Attrs.empty())
+    return AttributeSet();
+
+  return getImpl(C, Attrs);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
+  if (!B.hasAttributes())
+    return AttributeSet();
+
+  // Add target-independent attributes.
+  SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
+  for (Attribute::AttrKind Kind = Attribute::None;
+       Kind != Attribute::EndAttrKinds; Kind = Attribute::AttrKind(Kind + 1)) {
+    if (!B.contains(Kind))
+      continue;
+
+    if (Kind == Attribute::Alignment)
+      Attrs.push_back(std::make_pair(Idx, Attribute::
+                                     getWithAlignment(C, B.getAlignment())));
+    else if (Kind == Attribute::StackAlignment)
+      Attrs.push_back(std::make_pair(Idx, Attribute::
+                              getWithStackAlignment(C, B.getStackAlignment())));
+    else
+      Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind)));
+  }
+
+  // Add target-dependent (string) attributes.
+  for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
+       I != E; ++I)
+    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second)));
+
+  return get(C, Attrs);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx,
+                               ArrayRef<Attribute::AttrKind> Kind) {
+  SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
+  for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
+         E = Kind.end(); I != E; ++I)
+    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I)));
+  return get(C, Attrs);
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
+  if (Attrs.empty()) return AttributeSet();
+
+  SmallVector<std::pair<unsigned, AttributeSetNode*>, 8> AttrNodeVec;
+  for (unsigned I = 0, E = Attrs.size(); I != E; ++I) {
+    AttributeSet AS = Attrs[I];
+    if (!AS.pImpl) continue;
+    AttrNodeVec.append(AS.pImpl->AttrNodes.begin(), AS.pImpl->AttrNodes.end());
+  }
+
+  return getImpl(C, AttrNodeVec);
+}
+
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+                                        Attribute::AttrKind Attr) const {
+  if (hasAttribute(Idx, Attr)) return *this;
+  return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+}
+
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+                                        StringRef Kind) const {
+  llvm::AttrBuilder B;
+  B.addAttribute(Kind);
+  return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+}
+
+AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
+                                         AttributeSet Attrs) const {
+  if (!pImpl) return Attrs;
+  if (!Attrs.pImpl) return *this;
+
+#ifndef NDEBUG
+  // FIXME it is not obvious how this should work for alignment. For now, say
+  // we can't change a known alignment.
+  unsigned OldAlign = getParamAlignment(Idx);
+  unsigned NewAlign = Attrs.getParamAlignment(Idx);
+  assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
+         "Attempt to change alignment!");
+#endif
+
+  // Add the attribute slots before the one we're trying to add.
+  SmallVector<AttributeSet, 4> AttrSet;
+  uint64_t NumAttrs = pImpl->getNumAttributes();
+  AttributeSet AS;
+  uint64_t LastIndex = 0;
+  for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
+    if (getSlotIndex(I) >= Idx) {
+      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+      break;
+    }
+    LastIndex = I + 1;
+    AttrSet.push_back(getSlotAttributes(I));
+  }
+
+  // Now add the attribute into the correct slot. There may already be an
+  // AttributeSet there.
+  AttrBuilder B(AS, Idx);
+
+  for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
+    if (Attrs.getSlotIndex(I) == Idx) {
+      for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I),
+             IE = Attrs.pImpl->end(I); II != IE; ++II)
+        B.addAttribute(*II);
+      break;
+    }
+
+  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+
+  // Add the remaining attribute slots.
+  for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
+    AttrSet.push_back(getSlotAttributes(I));
+
+  return get(C, AttrSet);
+}
+
+AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
+                                           Attribute::AttrKind Attr) const {
+  if (!hasAttribute(Idx, Attr)) return *this;
+  return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+}
+
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
+                                            AttributeSet Attrs) const {
+  if (!pImpl) return AttributeSet();
+  if (!Attrs.pImpl) return *this;
+
+#ifndef NDEBUG
+  // FIXME it is not obvious how this should work for alignment.
+  // For now, say we can't pass in alignment, which no current use does.
+  assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) &&
+         "Attempt to change alignment!");
+#endif
+
+  // Add the attribute slots before the one we're trying to add.
+  SmallVector<AttributeSet, 4> AttrSet;
+  uint64_t NumAttrs = pImpl->getNumAttributes();
+  AttributeSet AS;
+  uint64_t LastIndex = 0;
+  for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
+    if (getSlotIndex(I) >= Idx) {
+      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+      break;
+    }
+    LastIndex = I + 1;
+    AttrSet.push_back(getSlotAttributes(I));
+  }
+
+  // Now remove the attribute from the correct slot. There may already be an
+  // AttributeSet there.
+  AttrBuilder B(AS, Idx);
+
+  for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
+    if (Attrs.getSlotIndex(I) == Idx) {
+      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx);
+      break;
+    }
+
+  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+
+  // Add the remaining attribute slots.
+  for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
+    AttrSet.push_back(getSlotAttributes(I));
+
+  return get(C, AttrSet);
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSet Accessor Methods
+//===----------------------------------------------------------------------===//
+
+LLVMContext &AttributeSet::getContext() const {
+  return pImpl->getContext();
+}
+
+AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const {
+  return pImpl && hasAttributes(Idx) ?
+    AttributeSet::get(pImpl->getContext(),
+                      ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
+                        std::make_pair(Idx, getAttributes(Idx)))) :
+    AttributeSet();
+}
+
+AttributeSet AttributeSet::getRetAttributes() const {
+  return pImpl && hasAttributes(ReturnIndex) ?
+    AttributeSet::get(pImpl->getContext(),
+                      ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
+                        std::make_pair(ReturnIndex,
+                                       getAttributes(ReturnIndex)))) :
+    AttributeSet();
+}
+
+AttributeSet AttributeSet::getFnAttributes() const {
+  return pImpl && hasAttributes(FunctionIndex) ?
+    AttributeSet::get(pImpl->getContext(),
+                      ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
+                        std::make_pair(FunctionIndex,
+                                       getAttributes(FunctionIndex)))) :
+    AttributeSet();
+}
+
+bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->hasAttribute(Kind) : false;
+}
+
+bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->hasAttribute(Kind) : false;
+}
+
+bool AttributeSet::hasAttributes(unsigned Index) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->hasAttributes() : false;
+}
+
+/// \brief Return true if the specified attribute is set for at least one
+/// parameter or for the return value.
+bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const {
+  if (pImpl == 0) return false;
+
+  for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
+    for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
+           IE = pImpl->end(I); II != IE; ++II)
+      if (II->hasAttribute(Attr))
+        return true;
+
+  return false;
+}
+
+Attribute AttributeSet::getAttribute(unsigned Index,
+                                     Attribute::AttrKind Kind) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->getAttribute(Kind) : Attribute();
+}
+
+Attribute AttributeSet::getAttribute(unsigned Index,
+                                     StringRef Kind) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->getAttribute(Kind) : Attribute();
+}
+
+unsigned AttributeSet::getParamAlignment(unsigned Index) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->getAlignment() : 0;
+}
+
+unsigned AttributeSet::getStackAlignment(unsigned Index) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->getStackAlignment() : 0;
+}
+
+std::string AttributeSet::getAsString(unsigned Index,
+                                      bool InAttrGrp) const {
+  AttributeSetNode *ASN = getAttributes(Index);
+  return ASN ? ASN->getAsString(InAttrGrp) : std::string("");
+}
+
+/// \brief The attributes for the specified index are returned.
+AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const {
+  if (!pImpl) return 0;
+
+  // Loop through to find the attribute node we want.
+  for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
+    if (pImpl->getSlotIndex(I) == Idx)
+      return pImpl->getSlotNode(I);
+
+  return 0;
+}
+
+AttributeSet::iterator AttributeSet::begin(unsigned Idx) const {
+  if (!pImpl)
+    return ArrayRef<Attribute>().begin();
+  return pImpl->begin(Idx);
+}
+
+AttributeSet::iterator AttributeSet::end(unsigned Idx) const {
+  if (!pImpl)
+    return ArrayRef<Attribute>().end();
+  return pImpl->end(Idx);
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeSet Introspection Methods
+//===----------------------------------------------------------------------===//
+
+/// \brief Return the number of slots used in this attribute list.  This is the
+/// number of arguments that have an attribute set on them (including the
+/// function itself).
+unsigned AttributeSet::getNumSlots() const {
+  return pImpl ? pImpl->getNumAttributes() : 0;
+}
+
+uint64_t AttributeSet::getSlotIndex(unsigned Slot) const {
+  assert(pImpl && Slot < pImpl->getNumAttributes() &&
+         "Slot # out of range!");
+  return pImpl->getSlotIndex(Slot);
+}
+
+AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const {
+  assert(pImpl && Slot < pImpl->getNumAttributes() &&
+         "Slot # out of range!");
+  return pImpl->getSlotAttributes(Slot);
+}
+
+uint64_t AttributeSet::Raw(unsigned Index) const {
+  // FIXME: Remove this.
+  return pImpl ? pImpl->Raw(Index) : 0;
+}
+
+void AttributeSet::dump() const {
+  dbgs() << "PAL[\n";
+
+  for (unsigned i = 0, e = getNumSlots(); i < e; ++i) {
+    uint64_t Index = getSlotIndex(i);
+    dbgs() << "  { ";
+    if (Index == ~0U)
+      dbgs() << "~0U";
+    else
+      dbgs() << Index;
+    dbgs() << " => " << getAsString(Index) << " }\n";
+  }
+
+  dbgs() << "]\n";
+}
+
+//===----------------------------------------------------------------------===//
+// AttrBuilder Method Implementations
+//===----------------------------------------------------------------------===//
+
+AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx)
+  : Attrs(0), Alignment(0), StackAlignment(0) {
+  AttributeSetImpl *pImpl = AS.pImpl;
+  if (!pImpl) return;
+
+  for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) {
+    if (pImpl->getSlotIndex(I) != Idx) continue;
+
+    for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
+           IE = pImpl->end(I); II != IE; ++II)
+      addAttribute(*II);
+
+    break;
+  }
+}
+
+void AttrBuilder::clear() {
+  Attrs.reset();
+  Alignment = StackAlignment = 0;
+}
+
+AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
+  assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
+  assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment &&
+         "Adding alignment attribute without adding alignment value!");
+  Attrs[Val] = true;
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
+  if (Attr.isStringAttribute()) {
+    addAttribute(Attr.getKindAsString(), Attr.getValueAsString());
+    return *this;
+  }
+
+  Attribute::AttrKind Kind = Attr.getKindAsEnum();
+  Attrs[Kind] = true;
+
+  if (Kind == Attribute::Alignment)
+    Alignment = Attr.getAlignment();
+  else if (Kind == Attribute::StackAlignment)
+    StackAlignment = Attr.getStackAlignment();
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::addAttribute(StringRef A, StringRef V) {
+  TargetDepAttrs[A] = V;
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
+  assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
+  Attrs[Val] = false;
+
+  if (Val == Attribute::Alignment)
+    Alignment = 0;
+  else if (Val == Attribute::StackAlignment)
+    StackAlignment = 0;
+
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
+  unsigned Idx = ~0U;
+  for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
+    if (A.getSlotIndex(I) == Index) {
+      Idx = I;
+      break;
+    }
+
+  assert(Idx != ~0U && "Couldn't find index in AttributeSet!");
+
+  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) {
+    Attribute Attr = *I;
+    if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
+      Attribute::AttrKind Kind = I->getKindAsEnum();
+      Attrs[Kind] = false;
+
+      if (Kind == Attribute::Alignment)
+        Alignment = 0;
+      else if (Kind == Attribute::StackAlignment)
+        StackAlignment = 0;
+    } else {
+      assert(Attr.isStringAttribute() && "Invalid attribute type!");
+      std::map<std::string, std::string>::iterator
+        Iter = TargetDepAttrs.find(Attr.getKindAsString());
+      if (Iter != TargetDepAttrs.end())
+        TargetDepAttrs.erase(Iter);
+    }
+  }
+
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::removeAttribute(StringRef A) {
+  std::map<std::string, std::string>::iterator I = TargetDepAttrs.find(A);
+  if (I != TargetDepAttrs.end())
+    TargetDepAttrs.erase(I);
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
+  if (Align == 0) return *this;
+
+  assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+  assert(Align <= 0x40000000 && "Alignment too large.");
+
+  Attrs[Attribute::Alignment] = true;
+  Alignment = Align;
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) {
+  // Default alignment, allow the target to define how to align it.
+  if (Align == 0) return *this;
+
+  assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
+  assert(Align <= 0x100 && "Alignment too large.");
+
+  Attrs[Attribute::StackAlignment] = true;
+  StackAlignment = Align;
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
+  // FIXME: What if both have alignments, but they don't match?!
+  if (!Alignment)
+    Alignment = B.Alignment;
+
+  if (!StackAlignment)
+    StackAlignment = B.StackAlignment;
+
+  Attrs |= B.Attrs;
+
+  for (td_const_iterator I = B.TargetDepAttrs.begin(),
+         E = B.TargetDepAttrs.end(); I != E; ++I)
+    TargetDepAttrs[I->first] = I->second;
+
+  return *this;
+}
+
+bool AttrBuilder::contains(StringRef A) const {
+  return TargetDepAttrs.find(A) != TargetDepAttrs.end();
+}
+
+bool AttrBuilder::hasAttributes() const {
+  return !Attrs.none() || !TargetDepAttrs.empty();
+}
+
+bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
+  unsigned Idx = ~0U;
+  for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
+    if (A.getSlotIndex(I) == Index) {
+      Idx = I;
+      break;
+    }
+
+  assert(Idx != ~0U && "Couldn't find the index!");
+
+  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx);
+       I != E; ++I) {
+    Attribute Attr = *I;
+    if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
+      if (Attrs[I->getKindAsEnum()])
+        return true;
+    } else {
+      assert(Attr.isStringAttribute() && "Invalid attribute kind!");
+      return TargetDepAttrs.find(Attr.getKindAsString())!=TargetDepAttrs.end();
+    }
+  }
+
+  return false;
+}
+
+bool AttrBuilder::hasAlignmentAttr() const {
+  return Alignment != 0;
+}
+
+bool AttrBuilder::operator==(const AttrBuilder &B) {
+  if (Attrs != B.Attrs)
+    return false;
+
+  for (td_const_iterator I = TargetDepAttrs.begin(),
+         E = TargetDepAttrs.end(); I != E; ++I)
+    if (B.TargetDepAttrs.find(I->first) == B.TargetDepAttrs.end())
+      return false;
+
+  return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
+}
+
+void AttrBuilder::removeFunctionOnlyAttrs() {
+  removeAttribute(Attribute::NoReturn)
+    .removeAttribute(Attribute::NoUnwind)
+    .removeAttribute(Attribute::ReadNone)
+    .removeAttribute(Attribute::ReadOnly)
+    .removeAttribute(Attribute::NoInline)
+    .removeAttribute(Attribute::AlwaysInline)
+    .removeAttribute(Attribute::OptimizeForSize)
+    .removeAttribute(Attribute::StackProtect)
+    .removeAttribute(Attribute::StackProtectReq)
+    .removeAttribute(Attribute::StackProtectStrong)
+    .removeAttribute(Attribute::NoRedZone)
+    .removeAttribute(Attribute::NoImplicitFloat)
+    .removeAttribute(Attribute::Naked)
+    .removeAttribute(Attribute::InlineHint)
+    .removeAttribute(Attribute::StackAlignment)
+    .removeAttribute(Attribute::UWTable)
+    .removeAttribute(Attribute::NonLazyBind)
+    .removeAttribute(Attribute::ReturnsTwice)
+    .removeAttribute(Attribute::SanitizeAddress)
+    .removeAttribute(Attribute::SanitizeThread)
+    .removeAttribute(Attribute::SanitizeMemory)
+    .removeAttribute(Attribute::MinSize)
+    .removeAttribute(Attribute::NoDuplicate)
+    .removeAttribute(Attribute::NoBuiltin);
+}
+
+AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
+  // FIXME: Remove this in 4.0.
+  if (!Val) return *this;
+
+  for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
+       I = Attribute::AttrKind(I + 1)) {
+    if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
+      Attrs[I] = true;
+ 
+      if (I == Attribute::Alignment)
+        Alignment = 1ULL << ((A >> 16) - 1);
+      else if (I == Attribute::StackAlignment)
+        StackAlignment = 1ULL << ((A >> 26)-1);
+    }
+  }
+ 
+  return *this;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeFuncs Function Defintions
+//===----------------------------------------------------------------------===//
+
+/// \brief Which attributes cannot be applied to a type.
+AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
+  AttrBuilder Incompatible;
+
+  if (!Ty->isIntegerTy())
+    // Attribute that only apply to integers.
+    Incompatible.addAttribute(Attribute::SExt)
+      .addAttribute(Attribute::ZExt);
+
+  if (!Ty->isPointerTy())
+    // Attribute that only apply to pointers.
+    Incompatible.addAttribute(Attribute::ByVal)
+      .addAttribute(Attribute::Nest)
+      .addAttribute(Attribute::NoAlias)
+      .addAttribute(Attribute::NoCapture)
+      .addAttribute(Attribute::StructRet);
+
+  return AttributeSet::get(Ty->getContext(), Index, Incompatible);
+}
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
new file mode 100644
index 000000000000..f2375374e356
--- /dev/null
+++ b/lib/IR/AutoUpgrade.cpp
@@ -0,0 +1,393 @@
+//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the auto-upgrade helper functions 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+using namespace llvm;
+
+// Upgrade the declarations of the SSE4.1 functions whose arguments have
+// changed their type from v4f32 to v2i64.
+static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
+                                 Function *&NewFn) {
+  // Check whether this is an old version of the function, which received
+  // v4f32 arguments.
+  Type *Arg0Type = F->getFunctionType()->getParamType(0);
+  if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
+    return false;
+
+  // Yes, it's old, replace it with new version.
+  F->setName(F->getName() + ".old");
+  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+  return true;
+}
+
+static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+  assert(F && "Illegal to upgrade a non-existent Function.");
+
+  // Quickly eliminate it, if it's not a candidate.
+  StringRef Name = F->getName();
+  if (Name.size() <= 8 || !Name.startswith("llvm."))
+    return false;
+  Name = Name.substr(5); // Strip off "llvm."
+
+  switch (Name[0]) {
+  default: break;
+  case 'a': {
+    if (Name.startswith("arm.neon.vclz")) {
+      Type* args[2] = {
+        F->arg_begin()->getType(), 
+        Type::getInt1Ty(F->getContext())
+      };
+      // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
+      // the end of the name. Change name from llvm.arm.neon.vclz.* to
+      //  llvm.ctlz.*
+      FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
+      NewFn = Function::Create(fType, F->getLinkage(), 
+                               "llvm.ctlz." + Name.substr(14), F->getParent());
+      return true;
+    }
+    if (Name.startswith("arm.neon.vcnt")) {
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
+                                        F->arg_begin()->getType());
+      return true;
+    }
+    break;
+  }
+  case 'c': {
+    if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+                                        F->arg_begin()->getType());
+      return true;
+    }
+    if (Name.startswith("cttz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+                                        F->arg_begin()->getType());
+      return true;
+    }
+    break;
+  }
+  case 'x': {
+    if (Name.startswith("x86.sse2.pcmpeq.") ||
+        Name.startswith("x86.sse2.pcmpgt.") ||
+        Name.startswith("x86.avx2.pcmpeq.") ||
+        Name.startswith("x86.avx2.pcmpgt.") ||
+        Name.startswith("x86.avx.vpermil.") ||
+        Name == "x86.avx.movnt.dq.256" ||
+        Name == "x86.avx.movnt.pd.256" ||
+        Name == "x86.avx.movnt.ps.256" ||
+        (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
+      NewFn = 0;
+      return true;
+    }
+    // SSE4.1 ptest functions may have an old signature.
+    if (Name.startswith("x86.sse41.ptest")) {
+      if (Name == "x86.sse41.ptestc")
+        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
+      if (Name == "x86.sse41.ptestz")
+        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
+      if (Name == "x86.sse41.ptestnzc")
+        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
+    }
+    // frcz.ss/sd may need to have an argument dropped
+    if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::x86_xop_vfrcz_ss);
+      return true;
+    }
+    if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::x86_xop_vfrcz_sd);
+      return true;
+    }
+    // Fix the FMA4 intrinsics to remove the 4
+    if (Name.startswith("x86.fma4.")) {
+      F->setName("llvm.x86.fma" + Name.substr(8));
+      NewFn = F;
+      return true;
+    }
+    break;
+  }
+  }
+
+  //  This may not belong here. This function is effectively being overloaded
+  //  to both detect an intrinsic which needs upgrading, and to provide the
+  //  upgraded form of the intrinsic. We should perhaps have two separate
+  //  functions for this.
+  return false;
+}
+
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+  NewFn = 0;
+  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+
+  // Upgrade intrinsic attributes.  This does not change the function.
+  if (NewFn)
+    F = NewFn;
+  if (unsigned id = F->getIntrinsicID())
+    F->setAttributes(Intrinsic::getAttributes(F->getContext(),
+                                              (Intrinsic::ID)id));
+  return Upgraded;
+}
+
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+  // Nothing to do yet.
+  return false;
+}
+
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
+// upgraded intrinsic. All argument and return casting must be provided in
+// order to seamlessly integrate with existing context.
+void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+  Function *F = CI->getCalledFunction();
+  LLVMContext &C = CI->getContext();
+  IRBuilder<> Builder(C);
+  Builder.SetInsertPoint(CI->getParent(), CI);
+
+  assert(F && "Intrinsic call is not direct?");
+
+  if (!NewFn) {
+    // Get the Function's name.
+    StringRef Name = F->getName();
+
+    Value *Rep;
+    // Upgrade packed integer vector compares intrinsics to compare instructions
+    if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
+        Name.startswith("llvm.x86.avx2.pcmpeq.")) {
+      Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 "pcmpeq");
+      // need to sign extend since icmp returns vector of i1
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
+               Name.startswith("llvm.x86.avx2.pcmpgt.")) {
+      Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+                                  "pcmpgt");
+      // need to sign extend since icmp returns vector of i1
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
+               Name == "llvm.x86.avx.movnt.ps.256" ||
+               Name == "llvm.x86.avx.movnt.pd.256") {
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      Module *M = F->getParent();
+      SmallVector<Value *, 1> Elts;
+      Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+      MDNode *Node = MDNode::get(C, Elts);
+
+      Value *Arg0 = CI->getArgOperand(0);
+      Value *Arg1 = CI->getArgOperand(1);
+
+      // Convert the type of the pointer to a pointer to the stored type.
+      Value *BC = Builder.CreateBitCast(Arg0,
+                                        PointerType::getUnqual(Arg1->getType()),
+                                        "cast");
+      StoreInst *SI = Builder.CreateStore(Arg1, BC);
+      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+      SI->setAlignment(16);
+
+      // Remove intrinsic.
+      CI->eraseFromParent();
+      return;
+    } else if (Name.startswith("llvm.x86.xop.vpcom")) {
+      Intrinsic::ID intID;
+      if (Name.endswith("ub"))
+        intID = Intrinsic::x86_xop_vpcomub;
+      else if (Name.endswith("uw"))
+        intID = Intrinsic::x86_xop_vpcomuw;
+      else if (Name.endswith("ud"))
+        intID = Intrinsic::x86_xop_vpcomud;
+      else if (Name.endswith("uq"))
+        intID = Intrinsic::x86_xop_vpcomuq;
+      else if (Name.endswith("b"))
+        intID = Intrinsic::x86_xop_vpcomb;
+      else if (Name.endswith("w"))
+        intID = Intrinsic::x86_xop_vpcomw;
+      else if (Name.endswith("d"))
+        intID = Intrinsic::x86_xop_vpcomd;
+      else if (Name.endswith("q"))
+        intID = Intrinsic::x86_xop_vpcomq;
+      else
+        llvm_unreachable("Unknown suffix");
+
+      Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
+      unsigned Imm;
+      if (Name.startswith("lt"))
+        Imm = 0;
+      else if (Name.startswith("le"))
+        Imm = 1;
+      else if (Name.startswith("gt"))
+        Imm = 2;
+      else if (Name.startswith("ge"))
+        Imm = 3;
+      else if (Name.startswith("eq"))
+        Imm = 4;
+      else if (Name.startswith("ne"))
+        Imm = 5;
+      else if (Name.startswith("true"))
+        Imm = 6;
+      else if (Name.startswith("false"))
+        Imm = 7;
+      else
+        llvm_unreachable("Unknown condition");
+
+      Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
+      Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
+                                CI->getArgOperand(1), Builder.getInt8(Imm));
+    } else {
+      bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
+      if (Name == "llvm.x86.avx.vpermil.pd.256")
+        PD256 = true;
+      else if (Name == "llvm.x86.avx.vpermil.pd")
+        PD128 = true;
+      else if (Name == "llvm.x86.avx.vpermil.ps.256")
+        PS256 = true;
+      else if (Name == "llvm.x86.avx.vpermil.ps")
+        PS128 = true;
+
+      if (PD256 || PD128 || PS256 || PS128) {
+        Value *Op0 = CI->getArgOperand(0);
+        unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+        SmallVector<Constant*, 8> Idxs;
+
+        if (PD128)
+          for (unsigned i = 0; i != 2; ++i)
+            Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
+        else if (PD256)
+          for (unsigned l = 0; l != 4; l+=2)
+            for (unsigned i = 0; i != 2; ++i)
+              Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
+        else if (PS128)
+          for (unsigned i = 0; i != 4; ++i)
+            Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
+        else if (PS256)
+          for (unsigned l = 0; l != 8; l+=4)
+            for (unsigned i = 0; i != 4; ++i)
+              Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
+        else
+          llvm_unreachable("Unexpected function");
+
+        Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
+      } else {
+        llvm_unreachable("Unknown function for CallInst upgrade.");
+      }
+    }
+
+    CI->replaceAllUsesWith(Rep);
+    CI->eraseFromParent();
+    return;
+  }
+
+  std::string Name = CI->getName().str();
+  CI->setName(Name + ".old");
+
+  switch (NewFn->getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unknown function for CallInst upgrade.");
+
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+    assert(CI->getNumArgOperands() == 1 &&
+           "Mismatch between function args and call args");
+    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+                                               Builder.getFalse(), Name));
+    CI->eraseFromParent();
+    return;
+
+  case Intrinsic::arm_neon_vclz: {
+    // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
+    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+                                               Builder.getFalse(),
+                                               "llvm.ctlz." + Name.substr(14)));
+    CI->eraseFromParent();
+    return;
+  }
+  case Intrinsic::ctpop: {
+    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
+    CI->eraseFromParent();
+    return;
+  }
+
+  case Intrinsic::x86_xop_vfrcz_ss:
+  case Intrinsic::x86_xop_vfrcz_sd:
+    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
+                                              Name));
+    CI->eraseFromParent();
+    return;
+
+  case Intrinsic::x86_sse41_ptestc:
+  case Intrinsic::x86_sse41_ptestz:
+  case Intrinsic::x86_sse41_ptestnzc: {
+    // The arguments for these intrinsics used to be v4f32, and changed
+    // to v2i64. This is purely a nop, since those are bitwise intrinsics.
+    // So, the only thing required is a bitcast for both arguments.
+    // First, check the arguments have the old type.
+    Value *Arg0 = CI->getArgOperand(0);
+    if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
+      return;
+
+    // Old intrinsic, add bitcasts
+    Value *Arg1 = CI->getArgOperand(1);
+
+    Value *BC0 =
+      Builder.CreateBitCast(Arg0,
+                            VectorType::get(Type::getInt64Ty(C), 2),
+                            "cast");
+    Value *BC1 =
+      Builder.CreateBitCast(Arg1,
+                            VectorType::get(Type::getInt64Ty(C), 2),
+                            "cast");
+
+    CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
+    CI->replaceAllUsesWith(NewCall);
+    CI->eraseFromParent();
+    return;
+  }
+  }
+}
+
+// This tests each Function to determine if it needs upgrading. When we find 
+// one we are interested in, we then upgrade all calls to reflect the new 
+// function.
+void llvm::UpgradeCallsToIntrinsic(Function* F) {
+  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
+
+  // Upgrade the function and check if it is a totaly new function.
+  Function *NewFn;
+  if (UpgradeIntrinsicFunction(F, NewFn)) {
+    if (NewFn != F) {
+      // Replace all uses to the old function with the new one if necessary.
+      for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
+           UI != UE; ) {
+        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, NewFn);
+      }
+      // Remove old function, no longer used, from the module.
+      F->eraseFromParent();
+    }
+  }
+}
+
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
new file mode 100644
index 000000000000..41e58ec5da2d
--- /dev/null
+++ b/lib/IR/BasicBlock.cpp
@@ -0,0 +1,371 @@
+//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BasicBlock class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+using namespace llvm;
+
+ValueSymbolTable *BasicBlock::getValueSymbolTable() {
+  if (Function *F = getParent())
+    return &F->getValueSymbolTable();
+  return 0;
+}
+
+LLVMContext &BasicBlock::getContext() const {
+  return getType()->getContext();
+}
+
+// Explicit instantiation of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
+
+
+BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
+                       BasicBlock *InsertBefore)
+  : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (InsertBefore) {
+    assert(NewParent &&
+           "Cannot insert block before another block with no function!");
+    NewParent->getBasicBlockList().insert(InsertBefore, this);
+  } else if (NewParent) {
+    NewParent->getBasicBlockList().push_back(this);
+  }
+
+  setName(Name);
+}
+
+
+BasicBlock::~BasicBlock() {
+  // If the address of the block is taken and it is being deleted (e.g. because
+  // it is dead), this means that there is either a dangling constant expr
+  // hanging off the block, or an undefined use of the block (source code
+  // expecting the address of a label to keep the block alive even though there
+  // is no indirect branch).  Handle these cases by zapping the BlockAddress
+  // nodes.  There are no other possible uses at this point.
+  if (hasAddressTaken()) {
+    assert(!use_empty() && "There should be at least one blockaddress!");
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
+    while (!use_empty()) {
+      BlockAddress *BA = cast<BlockAddress>(use_back());
+      BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                       BA->getType()));
+      BA->destroyConstant();
+    }
+  }
+
+  assert(getParent() == 0 && "BasicBlock still linked into the program!");
+  dropAllReferences();
+  InstList.clear();
+}
+
+void BasicBlock::setParent(Function *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+
+  // Set Parent=parent, updating instruction symtab entries as appropriate.
+  InstList.setSymTabObject(&Parent, parent);
+
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void BasicBlock::removeFromParent() {
+  getParent()->getBasicBlockList().remove(this);
+}
+
+void BasicBlock::eraseFromParent() {
+  getParent()->getBasicBlockList().erase(this);
+}
+
+/// moveBefore - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right before MovePos.
+void BasicBlock::moveBefore(BasicBlock *MovePos) {
+  MovePos->getParent()->getBasicBlockList().splice(MovePos,
+                       getParent()->getBasicBlockList(), this);
+}
+
+/// moveAfter - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right after MovePos.
+void BasicBlock::moveAfter(BasicBlock *MovePos) {
+  Function::iterator I = MovePos;
+  MovePos->getParent()->getBasicBlockList().splice(++I,
+                                       getParent()->getBasicBlockList(), this);
+}
+
+
+TerminatorInst *BasicBlock::getTerminator() {
+  if (InstList.empty()) return 0;
+  return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+const TerminatorInst *BasicBlock::getTerminator() const {
+  if (InstList.empty()) return 0;
+  return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+Instruction* BasicBlock::getFirstNonPHI() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i)) ++i;
+  return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbg() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
+  return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  BasicBlock::iterator i = begin();
+  for (;; ++i) {
+    if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+      continue;
+
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
+    if (!II)
+      break;
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      break;
+  }
+  return &*i;
+}
+
+BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
+  iterator InsertPt = getFirstNonPHI();
+  if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
+  return InsertPt;
+}
+
+void BasicBlock::dropAllReferences() {
+  for(iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+}
+
+/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// return the block, otherwise return a null pointer.
+BasicBlock *BasicBlock::getSinglePredecessor() {
+  pred_iterator PI = pred_begin(this), E = pred_end(this);
+  if (PI == E) return 0;         // No preds.
+  BasicBlock *ThePred = *PI;
+  ++PI;
+  return (PI == E) ? ThePred : 0 /*multiple preds*/;
+}
+
+/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// return the block, otherwise return a null pointer.
+/// Note that unique predecessor doesn't mean single edge, there can be
+/// multiple edges from the unique predecessor to this block (for example
+/// a switch statement with multiple cases having the same destination).
+BasicBlock *BasicBlock::getUniquePredecessor() {
+  pred_iterator PI = pred_begin(this), E = pred_end(this);
+  if (PI == E) return 0; // No preds.
+  BasicBlock *PredBB = *PI;
+  ++PI;
+  for (;PI != E; ++PI) {
+    if (*PI != PredBB)
+      return 0;
+    // The same predecessor appears multiple times in the predecessor list.
+    // This is OK.
+  }
+  return PredBB;
+}
+
+/// removePredecessor - This method is used to notify a BasicBlock that the
+/// specified Predecessor of the block is no longer able to reach it.  This is
+/// actually not used to update the Predecessor list, but is actually used to
+/// update the PHI nodes that reside in the block.  Note that this should be
+/// called while the predecessor still refers to this block.
+///
+void BasicBlock::removePredecessor(BasicBlock *Pred,
+                                   bool DontDeleteUselessPHIs) {
+  assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
+          find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+         "removePredecessor: BB is not a predecessor!");
+
+  if (InstList.empty()) return;
+  PHINode *APN = dyn_cast<PHINode>(&front());
+  if (!APN) return;   // Quick exit.
+
+  // If there are exactly two predecessors, then we want to nuke the PHI nodes
+  // altogether.  However, we cannot do this, if this in this case:
+  //
+  //  Loop:
+  //    %x = phi [X, Loop]
+  //    %x2 = add %x, 1         ;; This would become %x2 = add %x2, 1
+  //    br Loop                 ;; %x2 does not dominate all uses
+  //
+  // This is because the PHI node input is actually taken from the predecessor
+  // basic block.  The only case this can happen is with a self loop, so we
+  // check for this case explicitly now.
+  //
+  unsigned max_idx = APN->getNumIncomingValues();
+  assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
+  if (max_idx == 2) {
+    BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
+
+    // Disable PHI elimination!
+    if (this == Other) max_idx = 3;
+  }
+
+  // <= Two predecessors BEFORE I remove one?
+  if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+    // Yup, loop through and nuke the PHI nodes
+    while (PHINode *PN = dyn_cast<PHINode>(&front())) {
+      // Remove the predecessor first.
+      PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+
+      // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
+      if (max_idx == 2) {
+        if (PN->getIncomingValue(0) != PN)
+          PN->replaceAllUsesWith(PN->getIncomingValue(0));
+        else
+          // We are left with an infinite loop with no entries: kill the PHI.
+          PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+        getInstList().pop_front();    // Remove the PHI node
+      }
+
+      // If the PHI node already only had one entry, it got deleted by
+      // removeIncomingValue.
+    }
+  } else {
+    // Okay, now we know that we need to remove predecessor #pred_idx from all
+    // PHI nodes.  Iterate over each PHI node fixing them up
+    PHINode *PN;
+    for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
+      ++II;
+      PN->removeIncomingValue(Pred, false);
+      // If all incoming values to the Phi are the same, we can replace the Phi
+      // with that value.
+      Value* PNV = 0;
+      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+        if (PNV != PN) {
+          PN->replaceAllUsesWith(PNV);
+          PN->eraseFromParent();
+        }
+    }
+  }
+}
+
+
+/// splitBasicBlock - This splits a basic block into two at the specified
+/// instruction.  Note that all instructions BEFORE the specified iterator stay
+/// as part of the original basic block, an unconditional branch is added to
+/// the new BB, and the rest of the instructions in the BB are moved to the new
+/// BB, including the old terminator.  This invalidates the iterator.
+///
+/// Note that this only works on well formed basic blocks (must have a
+/// terminator), and 'I' must not be the end of instruction list (which would
+/// cause a degenerate basic block to be formed, having a terminator inside of
+/// the basic block).
+///
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
+  assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
+  assert(I != InstList.end() &&
+         "Trying to get me to create degenerate basic block!");
+
+  BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
+                               .getNodePtrUnchecked();
+  BasicBlock *New = BasicBlock::Create(getContext(), BBName,
+                                       getParent(), InsertBefore);
+
+  // Move all of the specified instructions from the original basic block into
+  // the new basic block.
+  New->getInstList().splice(New->end(), this->getInstList(), I, end());
+
+  // Add a branch instruction to the newly formed basic block.
+  BranchInst::Create(New, this);
+
+  // Now we must loop through all of the successors of the New block (which
+  // _were_ the successors of the 'this' block), and update any PHI nodes in
+  // successors.  If there were PHI nodes in the successors, then they need to
+  // know that incoming branches will be from New, not from Old.
+  //
+  for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
+    // Loop over any phi nodes in the basic block, updating the BB field of
+    // incoming values...
+    BasicBlock *Successor = *I;
+    PHINode *PN;
+    for (BasicBlock::iterator II = Successor->begin();
+         (PN = dyn_cast<PHINode>(II)); ++II) {
+      int IDX = PN->getBasicBlockIndex(this);
+      while (IDX != -1) {
+        PN->setIncomingBlock((unsigned)IDX, New);
+        IDX = PN->getBasicBlockIndex(this);
+      }
+    }
+  }
+  return New;
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+  TerminatorInst *TI = getTerminator();
+  if (!TI)
+    // Cope with being called on a BasicBlock that doesn't have a terminator
+    // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+    return;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    BasicBlock *Succ = TI->getSuccessor(i);
+    // N.B. Succ might not be a complete BasicBlock, so don't assume
+    // that it ends with a non-phi instruction.
+    for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
+      PHINode *PN = dyn_cast<PHINode>(II);
+      if (!PN)
+        break;
+      int i;
+      while ((i = PN->getBasicBlockIndex(this)) >= 0)
+        PN->setIncomingBlock(i, New);
+    }
+  }
+}
+
+/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's
+/// the destination of the 'unwind' edge of an invoke instruction.
+bool BasicBlock::isLandingPad() const {
+  return isa<LandingPadInst>(getFirstNonPHI());
+}
+
+/// getLandingPadInst() - Return the landingpad instruction associated with
+/// the landing pad.
+LandingPadInst *BasicBlock::getLandingPadInst() {
+  return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
+const LandingPadInst *BasicBlock::getLandingPadInst() const {
+  return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
new file mode 100644
index 000000000000..c2a4ee3aae14
--- /dev/null
+++ b/lib/IR/CMakeLists.txt
@@ -0,0 +1,51 @@
+add_llvm_library(LLVMCore
+  AsmWriter.cpp
+  Attributes.cpp
+  AutoUpgrade.cpp
+  BasicBlock.cpp
+  ConstantFold.cpp
+  Constants.cpp
+  Core.cpp
+  DataLayout.cpp
+  DebugInfo.cpp
+  DebugLoc.cpp
+  DIBuilder.cpp
+  Dominators.cpp
+  Function.cpp
+  GCOV.cpp
+  GVMaterializer.cpp
+  Globals.cpp
+  IRBuilder.cpp
+  InlineAsm.cpp
+  Instruction.cpp
+  Instructions.cpp
+  IntrinsicInst.cpp
+  LLVMContext.cpp
+  LLVMContextImpl.cpp
+  LeakDetector.cpp
+  Metadata.cpp
+  Module.cpp
+  Pass.cpp
+  PassManager.cpp
+  PassRegistry.cpp
+  PrintModulePass.cpp
+  Type.cpp
+  TypeFinder.cpp
+  Use.cpp
+  User.cpp
+  Value.cpp
+  ValueSymbolTable.cpp
+  ValueTypes.cpp
+  Verifier.cpp
+  )
+
+# Workaround: It takes over 20 minutes to compile with msvc10.
+# FIXME: Suppressing optimizations to core libraries would not be good thing.
+if( MSVC_VERSION LESS 1700 )
+set_property(
+  SOURCE Function.cpp
+  PROPERTY COMPILE_FLAGS "/Og-"
+  )
+endif()
+
+add_dependencies(LLVMCore intrinsics_gen)
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
new file mode 100644
index 000000000000..bf93d4f95663
--- /dev/null
+++ b/lib/IR/ConstantFold.cpp
@@ -0,0 +1,2074 @@
+//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements folding of constants for LLVM.  This implements the
+// (internal) ConstantFold.h interface, which is used by the
+// ConstantExpr::get* methods to automatically fold constants when possible.
+//
+// The current constant folding implementation is implemented in two pieces: the
+// pieces that don't need DataLayout, and the pieces that do. This is to avoid
+// a dependence in IR on Target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantFold.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                ConstantFold*Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+/// BitCastConstantVector - Convert the specified vector Constant node to the
+/// specified vector type.  At this point, we know that the elements of the
+/// input vector constant are all simple integer or FP values.
+static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
+
+  if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+  if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
+  // If this cast changes element count then we can't handle it here:
+  // doing so requires endianness information.  This should be handled by
+  // Analysis/ConstantFolding.cpp
+  unsigned NumElts = DstTy->getNumElements();
+  if (NumElts != CV->getType()->getVectorNumElements())
+    return 0;
+  
+  Type *DstEltTy = DstTy->getElementType();
+
+  SmallVector<Constant*, 16> Result;
+  Type *Ty = IntegerType::get(CV->getContext(), 32);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *C =
+      ConstantExpr::getExtractElement(CV, ConstantInt::get(Ty, i));
+    C = ConstantExpr::getBitCast(C, DstEltTy);
+    Result.push_back(C);
+  }
+
+  return ConstantVector::get(Result);
+}
+
+/// This function determines which opcode to use to fold two constant cast 
+/// expressions together. It uses CastInst::isEliminableCastPair to determine
+/// the opcode. Consequently its just a wrapper around that function.
+/// @brief Determine if it is valid to fold a cast of a cast
+static unsigned
+foldConstantCastPair(
+  unsigned opc,          ///< opcode of the second cast constant expression
+  ConstantExpr *Op,      ///< the first cast constant expression
+  Type *DstTy      ///< desintation type of the first cast
+) {
+  assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
+  assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
+  assert(CastInst::isCast(opc) && "Invalid cast opcode");
+
+  // The the types and opcodes for the two Cast constant expressions
+  Type *SrcTy = Op->getOperand(0)->getType();
+  Type *MidTy = Op->getType();
+  Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(opc);
+
+  // Assume that pointers are never more than 64 bits wide.
+  IntegerType *FakeIntPtrTy = Type::getInt64Ty(DstTy->getContext());
+
+  // Let CastInst::isEliminableCastPair do the heavy lifting.
+  return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
+                                        FakeIntPtrTy, FakeIntPtrTy,
+                                        FakeIntPtrTy);
+}
+
+static Constant *FoldBitCast(Constant *V, Type *DestTy) {
+  Type *SrcTy = V->getType();
+  if (SrcTy == DestTy)
+    return V; // no-op cast
+
+  // Check to see if we are casting a pointer to an aggregate to a pointer to
+  // the first element.  If so, return the appropriate GEP instruction.
+  if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+    if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+      if (PTy->getAddressSpace() == DPTy->getAddressSpace()
+          && DPTy->getElementType()->isSized()) {
+        SmallVector<Value*, 8> IdxList;
+        Value *Zero =
+          Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
+        IdxList.push_back(Zero);
+        Type *ElTy = PTy->getElementType();
+        while (ElTy != DPTy->getElementType()) {
+          if (StructType *STy = dyn_cast<StructType>(ElTy)) {
+            if (STy->getNumElements() == 0) break;
+            ElTy = STy->getElementType(0);
+            IdxList.push_back(Zero);
+          } else if (SequentialType *STy = 
+                     dyn_cast<SequentialType>(ElTy)) {
+            if (ElTy->isPointerTy()) break;  // Can't index into pointers!
+            ElTy = STy->getElementType();
+            IdxList.push_back(Zero);
+          } else {
+            break;
+          }
+        }
+
+        if (ElTy == DPTy->getElementType())
+          // This GEP is inbounds because all indices are zero.
+          return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
+      }
+
+  // Handle casts from one vector constant to another.  We know that the src 
+  // and dest type have the same size (otherwise its an illegal cast).
+  if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+    if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+      assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+             "Not cast between same sized vectors!");
+      SrcTy = NULL;
+      // First, check for null.  Undef is already handled.
+      if (isa<ConstantAggregateZero>(V))
+        return Constant::getNullValue(DestTy);
+
+      // Handle ConstantVector and ConstantAggregateVector.
+      return BitCastConstantVector(V, DestPTy);
+    }
+
+    // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
+    // This allows for other simplifications (although some of them
+    // can only be handled by Analysis/ConstantFolding.cpp).
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
+      return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
+  }
+
+  // Finally, implement bitcast folding now.   The code below doesn't handle
+  // bitcast right.
+  if (isa<ConstantPointerNull>(V))  // ptr->ptr cast.
+    return ConstantPointerNull::get(cast<PointerType>(DestTy));
+
+  // Handle integral constant input.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (DestTy->isIntegerTy())
+      // Integral -> Integral. This is a no-op because the bit widths must
+      // be the same. Consequently, we just fold to V.
+      return V;
+
+    if (DestTy->isFloatingPointTy())
+      return ConstantFP::get(DestTy->getContext(),
+                             APFloat(DestTy->getFltSemantics(),
+                                     CI->getValue()));
+
+    // Otherwise, can't fold this (vector?)
+    return 0;
+  }
+
+  // Handle ConstantFP input: FP -> Integral.
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+    return ConstantInt::get(FP->getContext(),
+                            FP->getValueAPF().bitcastToAPInt());
+
+  return 0;
+}
+
+
+/// ExtractConstantBytes - V is an integer constant which only has a subset of
+/// its bytes used.  The bytes used are indicated by ByteStart (which is the
+/// first byte used, counting from the least significant byte) and ByteSize,
+/// which is the number of bytes used.
+///
+/// This function analyzes the specified constant to see if the specified byte
+/// range can be returned as a simplified constant.  If so, the constant is
+/// returned, otherwise null is returned.
+/// 
+static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
+                                      unsigned ByteSize) {
+  assert(C->getType()->isIntegerTy() &&
+         (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
+         "Non-byte sized integer input");
+  unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
+  assert(ByteSize && "Must be accessing some piece");
+  assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
+  assert(ByteSize != CSize && "Should not extract everything");
+  
+  // Constant Integers are simple.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    APInt V = CI->getValue();
+    if (ByteStart)
+      V = V.lshr(ByteStart*8);
+    V = V.trunc(ByteSize*8);
+    return ConstantInt::get(CI->getContext(), V);
+  }
+  
+  // In the input is a constant expr, we might be able to recursively simplify.
+  // If not, we definitely can't do anything.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (CE == 0) return 0;
+  
+  switch (CE->getOpcode()) {
+  default: return 0;
+  case Instruction::Or: {
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+    if (RHS == 0)
+      return 0;
+    
+    // X | -1 -> -1.
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
+      if (RHSC->isAllOnesValue())
+        return RHSC;
+    
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+    if (LHS == 0)
+      return 0;
+    return ConstantExpr::getOr(LHS, RHS);
+  }
+  case Instruction::And: {
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+    if (RHS == 0)
+      return 0;
+    
+    // X & 0 -> 0.
+    if (RHS->isNullValue())
+      return RHS;
+    
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+    if (LHS == 0)
+      return 0;
+    return ConstantExpr::getAnd(LHS, RHS);
+  }
+  case Instruction::LShr: {
+    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+    if (Amt == 0)
+      return 0;
+    unsigned ShAmt = Amt->getZExtValue();
+    // Cannot analyze non-byte shifts.
+    if ((ShAmt & 7) != 0)
+      return 0;
+    ShAmt >>= 3;
+    
+    // If the extract is known to be all zeros, return zero.
+    if (ByteStart >= CSize-ShAmt)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+    // If the extract is known to be fully in the input, extract it.
+    if (ByteStart+ByteSize+ShAmt <= CSize)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+    
+  case Instruction::Shl: {
+    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+    if (Amt == 0)
+      return 0;
+    unsigned ShAmt = Amt->getZExtValue();
+    // Cannot analyze non-byte shifts.
+    if ((ShAmt & 7) != 0)
+      return 0;
+    ShAmt >>= 3;
+    
+    // If the extract is known to be all zeros, return zero.
+    if (ByteStart+ByteSize <= ShAmt)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+    // If the extract is known to be fully in the input, extract it.
+    if (ByteStart >= ShAmt)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+      
+  case Instruction::ZExt: {
+    unsigned SrcBitSize =
+      cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
+    
+    // If extracting something that is completely zero, return 0.
+    if (ByteStart*8 >= SrcBitSize)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+
+    // If exactly extracting the input, return it.
+    if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
+      return CE->getOperand(0);
+    
+    // If extracting something completely in the input, if if the input is a
+    // multiple of 8 bits, recurse.
+    if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
+      
+    // Otherwise, if extracting a subset of the input, which is not multiple of
+    // 8 bits, do a shift and trunc to get the bits.
+    if ((ByteStart+ByteSize)*8 < SrcBitSize) {
+      assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
+      Constant *Res = CE->getOperand(0);
+      if (ByteStart)
+        Res = ConstantExpr::getLShr(Res, 
+                                 ConstantInt::get(Res->getType(), ByteStart*8));
+      return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
+                                                          ByteSize*8));
+    }
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+  }
+}
+
+/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
+                                 bool Folded) {
+  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has size zero.
+      if (NumElems == 0)
+        return ConstantExpr::getNullValue(DestTy);
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantInt::get(DestTy, NumElems);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // Pointer size doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                         PTy->getAddressSpace()),
+                        DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular sizeof expression.
+  Constant *C = ConstantExpr::getSizeOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
+                                  bool Folded) {
+  // The alignment of an array is equal to the alignment of the
+  // array element. Note that this is not always true for vectors.
+  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
+    C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                      DestTy,
+                                                      false),
+                              C, DestTy);
+    return C;
+  }
+
+  if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    // Packed structs always have an alignment of 1.
+    if (STy->isPacked())
+      return ConstantInt::get(DestTy, 1);
+
+    // Otherwise, struct alignment is the maximum alignment of any member.
+    // Without target data, we can't compare much, but we can check to see
+    // if all the members have the same alignment.
+    unsigned NumElems = STy->getNumElements();
+    // An empty struct has minimal alignment.
+    if (NumElems == 0)
+      return ConstantInt::get(DestTy, 1);
+    // Check for a struct with all members having the same alignment.
+    Constant *MemberAlign =
+      getFoldedAlignOf(STy->getElementType(0), DestTy, true);
+    bool AllSame = true;
+    for (unsigned i = 1; i != NumElems; ++i)
+      if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
+        AllSame = false;
+        break;
+      }
+    if (AllSame)
+      return MemberAlign;
+  }
+
+  // Pointer alignment doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
+                                                           1),
+                                          PTy->getAddressSpace()),
+                         DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular alignof expression.
+  Constant *C = ConstantExpr::getAlignOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
+/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
+                                   Type *DestTy,
+                                   bool Folded) {
+  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+                                                                DestTy, false),
+                                        FieldNo, DestTy);
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has no members.
+      if (NumElems == 0)
+        return 0;
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
+                                                                    false,
+                                                                    DestTy,
+                                                                    false),
+                                            FieldNo, DestTy);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular offsetof expression.
+  Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
+                                            Type *DestTy) {
+  if (isa<UndefValue>(V)) {
+    // zext(undef) = 0, because the top bits will be zero.
+    // sext(undef) = 0, because the top bits will all be the same.
+    // [us]itofp(undef) = 0, because the result value is bounded.
+    if (opc == Instruction::ZExt || opc == Instruction::SExt ||
+        opc == Instruction::UIToFP || opc == Instruction::SIToFP)
+      return Constant::getNullValue(DestTy);
+    return UndefValue::get(DestTy);
+  }
+
+  if (V->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+
+  // If the cast operand is a constant expression, there's a few things we can
+  // do to try to simplify it.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->isCast()) {
+      // Try hard to fold cast of cast because they are often eliminable.
+      if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
+        return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
+    } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+      // If all of the indexes in the GEP are null values, there is no pointer
+      // adjustment going on.  We might as well cast the source pointer.
+      bool isAllNull = true;
+      for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+        if (!CE->getOperand(i)->isNullValue()) {
+          isAllNull = false;
+          break;
+        }
+      if (isAllNull)
+        // This is casting one pointer type to another, always BitCast
+        return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+    }
+  }
+
+  // If the cast operand is a constant vector, perform the cast by
+  // operating on each element. In the cast of bitcasts, the element
+  // count may be mismatched; don't attempt to handle that here.
+  if ((isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) &&
+      DestTy->isVectorTy() &&
+      DestTy->getVectorNumElements() == V->getType()->getVectorNumElements()) {
+    SmallVector<Constant*, 16> res;
+    VectorType *DestVecTy = cast<VectorType>(DestTy);
+    Type *DstEltTy = DestVecTy->getElementType();
+    Type *Ty = IntegerType::get(V->getContext(), 32);
+    for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i) {
+      Constant *C =
+        ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
+      res.push_back(ConstantExpr::getCast(opc, C, DstEltTy));
+    }
+    return ConstantVector::get(res);
+  }
+
+  // We actually have to do a cast now. Perform the cast according to the
+  // opcode specified.
+  switch (opc) {
+  default:
+    llvm_unreachable("Failed to cast constant expression");
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+      bool ignored;
+      APFloat Val = FPC->getValueAPF();
+      Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf :
+                  DestTy->isFloatTy() ? APFloat::IEEEsingle :
+                  DestTy->isDoubleTy() ? APFloat::IEEEdouble :
+                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
+                  DestTy->isFP128Ty() ? APFloat::IEEEquad :
+                  DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble :
+                  APFloat::Bogus,
+                  APFloat::rmNearestTiesToEven, &ignored);
+      return ConstantFP::get(V->getContext(), Val);
+    }
+    return 0; // Can't fold.
+  case Instruction::FPToUI: 
+  case Instruction::FPToSI:
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+      const APFloat &V = FPC->getValueAPF();
+      bool ignored;
+      uint64_t x[2]; 
+      uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+                                APFloat::rmTowardZero, &ignored);
+      APInt Val(DestBitWidth, x);
+      return ConstantInt::get(FPC->getContext(), Val);
+    }
+    return 0; // Can't fold.
+  case Instruction::IntToPtr:   //always treated as unsigned
+    if (V->isNullValue())       // Is it an integral null value?
+      return ConstantPointerNull::get(cast<PointerType>(DestTy));
+    return 0;                   // Other pointer types cannot be casted
+  case Instruction::PtrToInt:   // always treated as unsigned
+    // Is it a null pointer value?
+    if (V->isNullValue())
+      return ConstantInt::get(DestTy, 0);
+    // If this is a sizeof-like expression, pull out multiplications by
+    // known factors to expose them to subsequent folding. If it's an
+    // alignof-like expression, factor out known factors.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          CE->getOperand(0)->isNullValue()) {
+        Type *Ty =
+          cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+        if (CE->getNumOperands() == 2) {
+          // Handle a sizeof-like expression.
+          Constant *Idx = CE->getOperand(1);
+          bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
+          if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
+            Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
+                                                                DestTy, false),
+                                        Idx, DestTy);
+            return ConstantExpr::getMul(C, Idx);
+          }
+        } else if (CE->getNumOperands() == 3 &&
+                   CE->getOperand(1)->isNullValue()) {
+          // Handle an alignof-like expression.
+          if (StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked()) {
+              ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
+              if (CI->isOne() &&
+                  STy->getNumElements() == 2 &&
+                  STy->getElementType(0)->isIntegerTy(1)) {
+                return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
+              }
+            }
+          // Handle an offsetof-like expression.
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
+            if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
+                                                DestTy, false))
+              return C;
+          }
+        }
+      }
+    // Other pointer types cannot be casted
+    return 0;
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      APInt api = CI->getValue();
+      APFloat apf(DestTy->getFltSemantics(),
+                  APInt::getNullValue(DestTy->getPrimitiveSizeInBits()));
+      (void)apf.convertFromAPInt(api, 
+                                 opc==Instruction::SIToFP,
+                                 APFloat::rmNearestTiesToEven);
+      return ConstantFP::get(V->getContext(), apf);
+    }
+    return 0;
+  case Instruction::ZExt:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().zext(BitWidth));
+    }
+    return 0;
+  case Instruction::SExt:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().sext(BitWidth));
+    }
+    return 0;
+  case Instruction::Trunc: {
+    uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().trunc(DestBitWidth));
+    }
+    
+    // The input must be a constantexpr.  See if we can simplify this based on
+    // the bytes we are demanding.  Only do this if the source and dest are an
+    // even multiple of a byte.
+    if ((DestBitWidth & 7) == 0 &&
+        (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0)
+      if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
+        return Res;
+      
+    return 0;
+  }
+  case Instruction::BitCast:
+    return FoldBitCast(V, DestTy);
+  }
+}
+
+Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
+                                              Constant *V1, Constant *V2) {
+  // Check for i1 and vector true/false conditions.
+  if (Cond->isNullValue()) return V2;
+  if (Cond->isAllOnesValue()) return V1;
+
+  // If the condition is a vector constant, fold the result elementwise.
+  if (ConstantVector *CondV = dyn_cast<ConstantVector>(Cond)) {
+    SmallVector<Constant*, 16> Result;
+    Type *Ty = IntegerType::get(CondV->getContext(), 32);
+    for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){
+      ConstantInt *Cond = dyn_cast<ConstantInt>(CondV->getOperand(i));
+      if (Cond == 0) break;
+      
+      Constant *V = Cond->isNullValue() ? V2 : V1;
+      Constant *Res = ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
+      Result.push_back(Res);
+    }
+    
+    // If we were able to build the vector, return it.
+    if (Result.size() == V1->getType()->getVectorNumElements())
+      return ConstantVector::get(Result);
+  }
+
+  if (isa<UndefValue>(Cond)) {
+    if (isa<UndefValue>(V1)) return V1;
+    return V2;
+  }
+  if (isa<UndefValue>(V1)) return V2;
+  if (isa<UndefValue>(V2)) return V1;
+  if (V1 == V2) return V1;
+
+  if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+    if (TrueVal->getOpcode() == Instruction::Select)
+      if (TrueVal->getOperand(0) == Cond)
+        return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+  }
+  if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+    if (FalseVal->getOpcode() == Instruction::Select)
+      if (FalseVal->getOperand(0) == Cond)
+        return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+  }
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
+                                                      Constant *Idx) {
+  if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
+    return UndefValue::get(Val->getType()->getVectorElementType());
+  if (Val->isNullValue())  // ee(zero, x) -> zero
+    return Constant::getNullValue(Val->getType()->getVectorElementType());
+  // ee({w,x,y,z}, undef) -> undef
+  if (isa<UndefValue>(Idx))
+    return UndefValue::get(Val->getType()->getVectorElementType());
+
+  if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+    uint64_t Index = CIdx->getZExtValue();
+    // ee({w,x,y,z}, wrong_value) -> undef
+    if (Index >= Val->getType()->getVectorNumElements())
+      return UndefValue::get(Val->getType()->getVectorElementType());
+    return Val->getAggregateElement(Index);
+  }
+  return 0;
+}
+
+Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
+                                                     Constant *Elt,
+                                                     Constant *Idx) {
+  ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+  if (!CIdx) return 0;
+  const APInt &IdxVal = CIdx->getValue();
+  
+  SmallVector<Constant*, 16> Result;
+  Type *Ty = IntegerType::get(Val->getContext(), 32);
+  for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){
+    if (i == IdxVal) {
+      Result.push_back(Elt);
+      continue;
+    }
+    
+    Constant *C =
+      ConstantExpr::getExtractElement(Val, ConstantInt::get(Ty, i));
+    Result.push_back(C);
+  }
+  
+  return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
+                                                     Constant *V2,
+                                                     Constant *Mask) {
+  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+  Type *EltTy = V1->getType()->getVectorElementType();
+
+  // Undefined shuffle mask -> undefined value.
+  if (isa<UndefValue>(Mask))
+    return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
+
+  // Don't break the bitcode reader hack.
+  if (isa<ConstantExpr>(Mask)) return 0;
+  
+  unsigned SrcNumElts = V1->getType()->getVectorNumElements();
+
+  // Loop over the shuffle mask, evaluating each element.
+  SmallVector<Constant*, 32> Result;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    int Elt = ShuffleVectorInst::getMaskValue(Mask, i);
+    if (Elt == -1) {
+      Result.push_back(UndefValue::get(EltTy));
+      continue;
+    }
+    Constant *InElt;
+    if (unsigned(Elt) >= SrcNumElts*2)
+      InElt = UndefValue::get(EltTy);
+    else if (unsigned(Elt) >= SrcNumElts) {
+      Type *Ty = IntegerType::get(V2->getContext(), 32);
+      InElt =
+        ConstantExpr::getExtractElement(V2,
+                                        ConstantInt::get(Ty, Elt - SrcNumElts));
+    } else {
+      Type *Ty = IntegerType::get(V1->getContext(), 32);
+      InElt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, Elt));
+    }
+    Result.push_back(InElt);
+  }
+
+  return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
+                                                    ArrayRef<unsigned> Idxs) {
+  // Base case: no indices, so return the entire value.
+  if (Idxs.empty())
+    return Agg;
+
+  if (Constant *C = Agg->getAggregateElement(Idxs[0]))
+    return ConstantFoldExtractValueInstruction(C, Idxs.slice(1));
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
+                                                   Constant *Val,
+                                                   ArrayRef<unsigned> Idxs) {
+  // Base case: no indices, so replace the entire value.
+  if (Idxs.empty())
+    return Val;
+
+  unsigned NumElts;
+  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+    NumElts = ST->getNumElements();
+  else if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+    NumElts = AT->getNumElements();
+  else
+    NumElts = Agg->getType()->getVectorNumElements();
+
+  SmallVector<Constant*, 32> Result;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *C = Agg->getAggregateElement(i);
+    if (C == 0) return 0;
+    
+    if (Idxs[0] == i)
+      C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
+    
+    Result.push_back(C);
+  }
+  
+  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+    return ConstantStruct::get(ST, Result);
+  if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+    return ConstantArray::get(AT, Result);
+  return ConstantVector::get(Result);
+}
+
+
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
+                                              Constant *C1, Constant *C2) {
+  // Handle UndefValue up front.
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    switch (Opcode) {
+    case Instruction::Xor:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
+        // Handle undef ^ undef -> 0 special case. This is a common
+        // idiom (misuse).
+        return Constant::getNullValue(C1->getType());
+      // Fallthrough
+    case Instruction::Add:
+    case Instruction::Sub:
+      return UndefValue::get(C1->getType());
+    case Instruction::And:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
+        return C1;
+      return Constant::getNullValue(C1->getType());   // undef & X -> 0
+    case Instruction::Mul: {
+      ConstantInt *CI;
+      // X * undef -> undef   if X is odd or undef
+      if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
+          ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
+          (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+        return UndefValue::get(C1->getType());
+
+      // X * undef -> 0       otherwise
+      return Constant::getNullValue(C1->getType());
+    }
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+      // undef / 1 -> undef
+      if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
+        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
+          if (CI2->isOne())
+            return C1;
+      // FALL THROUGH
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (!isa<UndefValue>(C2))                    // undef / X -> 0
+        return Constant::getNullValue(C1->getType());
+      return C2;                                   // X / undef -> undef
+    case Instruction::Or:                          // X | undef -> -1
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
+        return C1;
+      return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
+    case Instruction::LShr:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef lshr undef -> undef
+      return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
+                                                    // undef lshr X -> 0
+    case Instruction::AShr:
+      if (!isa<UndefValue>(C2))                     // undef ashr X --> all ones
+        return Constant::getAllOnesValue(C1->getType());
+      else if (isa<UndefValue>(C1)) 
+        return C1;                                  // undef ashr undef -> undef
+      else
+        return C1;                                  // X ashr undef --> X
+    case Instruction::Shl:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef shl undef -> undef
+      // undef << X -> 0   or   X << undef -> 0
+      return Constant::getNullValue(C1->getType());
+    }
+  }
+
+  // Handle simplifications when the RHS is a constant int.
+  if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+    switch (Opcode) {
+    case Instruction::Add:
+      if (CI2->equalsInt(0)) return C1;                         // X + 0 == X
+      break;
+    case Instruction::Sub:
+      if (CI2->equalsInt(0)) return C1;                         // X - 0 == X
+      break;
+    case Instruction::Mul:
+      if (CI2->equalsInt(0)) return C2;                         // X * 0 == 0
+      if (CI2->equalsInt(1))
+        return C1;                                              // X * 1 == X
+      break;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+      if (CI2->equalsInt(1))
+        return C1;                                            // X / 1 == X
+      if (CI2->equalsInt(0))
+        return UndefValue::get(CI2->getType());               // X / 0 == undef
+      break;
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (CI2->equalsInt(1))
+        return Constant::getNullValue(CI2->getType());        // X % 1 == 0
+      if (CI2->equalsInt(0))
+        return UndefValue::get(CI2->getType());               // X % 0 == undef
+      break;
+    case Instruction::And:
+      if (CI2->isZero()) return C2;                           // X & 0 == 0
+      if (CI2->isAllOnesValue())
+        return C1;                                            // X & -1 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
+        if (CE1->getOpcode() == Instruction::ZExt) {
+          unsigned DstWidth = CI2->getType()->getBitWidth();
+          unsigned SrcWidth =
+            CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
+          APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
+          if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
+            return C1;
+        }
+
+        // If and'ing the address of a global with a constant, fold it.
+        if (CE1->getOpcode() == Instruction::PtrToInt && 
+            isa<GlobalValue>(CE1->getOperand(0))) {
+          GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
+
+          // Functions are at least 4-byte aligned.
+          unsigned GVAlign = GV->getAlignment();
+          if (isa<Function>(GV))
+            GVAlign = std::max(GVAlign, 4U);
+
+          if (GVAlign > 1) {
+            unsigned DstWidth = CI2->getType()->getBitWidth();
+            unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+            APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
+
+            // If checking bits we know are clear, return zero.
+            if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
+              return Constant::getNullValue(CI2->getType());
+          }
+        }
+      }
+      break;
+    case Instruction::Or:
+      if (CI2->equalsInt(0)) return C1;    // X | 0 == X
+      if (CI2->isAllOnesValue())
+        return C2;                         // X | -1 == -1
+      break;
+    case Instruction::Xor:
+      if (CI2->equalsInt(0)) return C1;    // X ^ 0 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        switch (CE1->getOpcode()) {
+        default: break;
+        case Instruction::ICmp:
+        case Instruction::FCmp:
+          // cmp pred ^ true -> cmp !pred
+          assert(CI2->equalsInt(1));
+          CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
+          pred = CmpInst::getInversePredicate(pred);
+          return ConstantExpr::getCompare(pred, CE1->getOperand(0),
+                                          CE1->getOperand(1));
+        }
+      }
+      break;
+    case Instruction::AShr:
+      // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+        if (CE1->getOpcode() == Instruction::ZExt)  // Top bits known zero.
+          return ConstantExpr::getLShr(C1, C2);
+      break;
+    }
+  } else if (isa<ConstantInt>(C1)) {
+    // If C1 is a ConstantInt and C2 is not, swap the operands.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantExpr::get(Opcode, C2, C1);
+  }
+
+  // At this point we know neither constant is an UndefValue.
+  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+    if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+      const APInt &C1V = CI1->getValue();
+      const APInt &C2V = CI2->getValue();
+      switch (Opcode) {
+      default:
+        break;
+      case Instruction::Add:     
+        return ConstantInt::get(CI1->getContext(), C1V + C2V);
+      case Instruction::Sub:     
+        return ConstantInt::get(CI1->getContext(), C1V - C2V);
+      case Instruction::Mul:     
+        return ConstantInt::get(CI1->getContext(), C1V * C2V);
+      case Instruction::UDiv:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
+      case Instruction::SDiv:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+          return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
+        return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
+      case Instruction::URem:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
+      case Instruction::SRem:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+          return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
+        return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
+      case Instruction::And:
+        return ConstantInt::get(CI1->getContext(), C1V & C2V);
+      case Instruction::Or:
+        return ConstantInt::get(CI1->getContext(), C1V | C2V);
+      case Instruction::Xor:
+        return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
+      case Instruction::Shl: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      case Instruction::LShr: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      case Instruction::AShr: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      }
+    }
+
+    switch (Opcode) {
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Shl:
+      if (CI1->equalsInt(0)) return C1;
+      break;
+    default:
+      break;
+    }
+  } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+    if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+      APFloat C1V = CFP1->getValueAPF();
+      APFloat C2V = CFP2->getValueAPF();
+      APFloat C3V = C1V;  // copy for modification
+      switch (Opcode) {
+      default:                   
+        break;
+      case Instruction::FAdd:
+        (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FSub:
+        (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FMul:
+        (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FDiv:
+        (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FRem:
+        (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      }
+    }
+  } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+    // Perform elementwise folding.
+    SmallVector<Constant*, 16> Result;
+    Type *Ty = IntegerType::get(VTy->getContext(), 32);
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *LHS =
+        ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
+      Constant *RHS =
+        ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
+      
+      Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
+    }
+    
+    return ConstantVector::get(Result);
+  }
+
+  if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+    // There are many possible foldings we could do here.  We should probably
+    // at least fold add of a pointer with an integer into the appropriate
+    // getelementptr.  This will improve alias analysis a bit.
+
+    // Given ((a + b) + c), if (b + c) folds to something interesting, return
+    // (a + (b + c)).
+    if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
+      Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
+      if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
+        return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
+    }
+  } else if (isa<ConstantExpr>(C2)) {
+    // If C2 is a constant expr and C1 isn't, flop them around and fold the
+    // other way if possible.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantFoldBinaryInstruction(Opcode, C2, C1);
+  }
+
+  // i1 can be simplified in many cases.
+  if (C1->getType()->isIntegerTy(1)) {
+    switch (Opcode) {
+    case Instruction::Add:
+    case Instruction::Sub:
+      return ConstantExpr::getXor(C1, C2);
+    case Instruction::Mul:
+      return ConstantExpr::getAnd(C1, C2);
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      // We can assume that C2 == 0.  If it were one the result would be
+      // undefined because the shift value is as large as the bitwidth.
+      return C1;
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return C1;
+    case Instruction::URem:
+    case Instruction::SRem:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return ConstantInt::getFalse(C1->getContext());
+    default:
+      break;
+    }
+  }
+
+  // We don't know how to fold this.
+  return 0;
+}
+
+/// isZeroSizedType - This type is zero sized if its an array or structure of
+/// zero sized types.  The only leaf zero sized type is an empty structure.
+static bool isMaybeZeroSizedType(Type *Ty) {
+  if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    if (STy->isOpaque()) return true;  // Can't say.
+
+    // If all of elements have zero size, this does too.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
+    return true;
+
+  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    return isMaybeZeroSizedType(ATy->getElementType());
+  }
+  return false;
+}
+
+/// IdxCompare - Compare the two constants as though they were getelementptr
+/// indices.  This allows coersion of the types to be the same thing.
+///
+/// If the two constants are the "same" (after coersion), return 0.  If the
+/// first is less than the second, return -1, if the second is less than the
+/// first, return 1.  If the constants are not integral, return -2.
+///
+static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) {
+  if (C1 == C2) return 0;
+
+  // Ok, we found a different index.  If they are not ConstantInt, we can't do
+  // anything with them.
+  if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
+    return -2; // don't know!
+
+  // Ok, we have two differing integer indices.  Sign extend them to be the same
+  // type.  Long is always big enough, so we use it.
+  if (!C1->getType()->isIntegerTy(64))
+    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+
+  if (!C2->getType()->isIntegerTy(64))
+    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
+
+  if (C1 == C2) return 0;  // They are equal
+
+  // If the type being indexed over is really just a zero sized type, there is
+  // no pointer difference being made here.
+  if (isMaybeZeroSizedType(ElTy))
+    return -2; // dunno.
+
+  // If they are really different, now that they are the same type, then we
+  // found a difference!
+  if (cast<ConstantInt>(C1)->getSExtValue() < 
+      cast<ConstantInt>(C2)->getSExtValue())
+    return -1;
+  else
+    return 1;
+}
+
+/// evaluateFCmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided.  This doesn't need to handle simple
+/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
+/// If we can determine that the two constants have a particular relation to 
+/// each other, we should return the corresponding FCmpInst predicate, 
+/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
+/// ConstantFoldCompareInstruction.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two.  We consider ConstantFP
+/// to be the simplest, and ConstantExprs to be the most complex.
+static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
+  assert(V1->getType() == V2->getType() &&
+         "Cannot compare values of different types!");
+
+  // Handle degenerate case quickly
+  if (V1 == V2) return FCmpInst::FCMP_OEQ;
+
+  if (!isa<ConstantExpr>(V1)) {
+    if (!isa<ConstantExpr>(V2)) {
+      // We distilled thisUse the standard constant folder for a few cases
+      ConstantInt *R = 0;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OEQ;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OLT;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OGT;
+
+      // Nothing more we can do
+      return FCmpInst::BAD_FCMP_PREDICATE;
+    }
+
+    // If the first operand is simple and second is ConstantExpr, swap operands.
+    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+    if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
+      return FCmpInst::getSwappedPredicate(SwappedRelation);
+  } else {
+    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
+    // constantexpr or a simple constant.
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    switch (CE1->getOpcode()) {
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+      // We might be able to do something with these but we don't right now.
+      break;
+    default:
+      break;
+    }
+  }
+  // There are MANY other foldings that we could perform here.  They will
+  // probably be added on demand, as they seem needed.
+  return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+/// evaluateICmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided.  This doesn't need to handle simple
+/// things like integer comparisons, but should instead handle ConstantExprs
+/// and GlobalValues.  If we can determine that the two constants have a
+/// particular relation to each other, we should return the corresponding ICmp
+/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two.  We consider simple
+/// constants (like ConstantInt) to be the simplest, followed by
+/// GlobalValues, followed by ConstantExpr's (the most complex).
+///
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
+                                                bool isSigned) {
+  assert(V1->getType() == V2->getType() &&
+         "Cannot compare different types of values!");
+  if (V1 == V2) return ICmpInst::ICMP_EQ;
+
+  if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
+      !isa<BlockAddress>(V1)) {
+    if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
+        !isa<BlockAddress>(V2)) {
+      // We distilled this down to a simple case, use the standard constant
+      // folder.
+      ConstantInt *R = 0;
+      ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero()) 
+        return pred;
+      pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero())
+        return pred;
+      pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero())
+        return pred;
+
+      // If we couldn't figure it out, bail.
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+
+    // If the first operand is simple, swap operands.
+    ICmpInst::Predicate SwappedRelation = 
+      evaluateICmpRelation(V2, V1, isSigned);
+    if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+      return ICmpInst::getSwappedPredicate(SwappedRelation);
+
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it's a
+    // ConstantPointerNull).
+    if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+      // Don't try to decide equality of aliases.
+      if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
+        if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
+          return ICmpInst::ICMP_NE;
+    } else if (isa<BlockAddress>(V2)) {
+      return ICmpInst::ICMP_NE; // Globals never equal labels.
+    } else {
+      assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+      // GlobalVals can never be null unless they have external weak linkage.
+      // We don't try to evaluate aliases here.
+      if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
+        return ICmpInst::ICMP_NE;
+    }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+    
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it is a
+    // ConstantPointerNull).
+    if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+      // Block address in another function can't equal this one, but block
+      // addresses in the current function might be the same if blocks are
+      // empty.
+      if (BA2->getFunction() != BA->getFunction())
+        return ICmpInst::ICMP_NE;
+    } else {
+      // Block addresses aren't null, don't equal the address of globals.
+      assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
+             "Canonicalization guarantee!");
+      return ICmpInst::ICMP_NE;
+    }
+  } else {
+    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
+    // constantexpr, a global, block address, or a simple constant.
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    Constant *CE1Op0 = CE1->getOperand(0);
+
+    switch (CE1->getOpcode()) {
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      break; // We can't evaluate floating point casts or truncations.
+
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::BitCast:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      // If the cast is not actually changing bits, and the second operand is a
+      // null pointer, do the comparison with the pre-casted value.
+      if (V2->isNullValue() &&
+          (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
+        if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+        if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+        return evaluateICmpRelation(CE1Op0,
+                                    Constant::getNullValue(CE1Op0->getType()), 
+                                    isSigned);
+      }
+      break;
+
+    case Instruction::GetElementPtr:
+      // Ok, since this is a getelementptr, we know that the constant has a
+      // pointer type.  Check the various cases.
+      if (isa<ConstantPointerNull>(V2)) {
+        // If we are comparing a GEP to a null pointer, check to see if the base
+        // of the GEP equals the null pointer.
+        if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV->hasExternalWeakLinkage())
+            // Weak linkage GVals could be zero or not. We're comparing that
+            // to null pointer so its greater-or-equal
+            return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+          else 
+            // If its not weak linkage, the GVal must have a non-zero address
+            // so the result is greater-than
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+        } else if (isa<ConstantPointerNull>(CE1Op0)) {
+          // If we are indexing from a null pointer, check to see if we have any
+          // non-zero indices.
+          for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
+            if (!CE1->getOperand(i)->isNullValue())
+              // Offsetting from null, must not be equal.
+              return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          // Only zero indexes from null, must still be zero.
+          return ICmpInst::ICMP_EQ;
+        }
+        // Otherwise, we can't really say if the first operand is null or not.
+      } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+        if (isa<ConstantPointerNull>(CE1Op0)) {
+          if (GV2->hasExternalWeakLinkage())
+            // Weak linkage GVals could be zero or not. We're comparing it to
+            // a null pointer, so its less-or-equal
+            return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+          else
+            // If its not weak linkage, the GVal must have a non-zero address
+            // so the result is less-than
+            return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+        } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV == GV2) {
+            // If this is a getelementptr of the same global, then it must be
+            // different.  Because the types must match, the getelementptr could
+            // only have at most one index, and because we fold getelementptr's
+            // with a single zero index, it must be nonzero.
+            assert(CE1->getNumOperands() == 2 &&
+                   !CE1->getOperand(1)->isNullValue() &&
+                   "Surprising getelementptr!");
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          } else {
+            // If they are different globals, we don't know what the value is.
+            return ICmpInst::BAD_ICMP_PREDICATE;
+          }
+        }
+      } else {
+        ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+        Constant *CE2Op0 = CE2->getOperand(0);
+
+        // There are MANY other foldings that we could perform here.  They will
+        // probably be added on demand, as they seem needed.
+        switch (CE2->getOpcode()) {
+        default: break;
+        case Instruction::GetElementPtr:
+          // By far the most common case to handle is when the base pointers are
+          // obviously to the same global.
+          if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
+            if (CE1Op0 != CE2Op0) // Don't know relative ordering.
+              return ICmpInst::BAD_ICMP_PREDICATE;
+            // Ok, we know that both getelementptr instructions are based on the
+            // same global.  From this, we can precisely determine the relative
+            // ordering of the resultant pointers.
+            unsigned i = 1;
+
+            // The logic below assumes that the result of the comparison
+            // can be determined by finding the first index that differs.
+            // This doesn't work if there is over-indexing in any
+            // subsequent indices, so check for that case first.
+            if (!CE1->isGEPWithNoNotionalOverIndexing() ||
+                !CE2->isGEPWithNoNotionalOverIndexing())
+               return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+
+            // Compare all of the operands the GEP's have in common.
+            gep_type_iterator GTI = gep_type_begin(CE1);
+            for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
+                 ++i, ++GTI)
+              switch (IdxCompare(CE1->getOperand(i),
+                                 CE2->getOperand(i), GTI.getIndexedType())) {
+              case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
+              case 1:  return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
+              case -2: return ICmpInst::BAD_ICMP_PREDICATE;
+              }
+
+            // Ok, we ran out of things they have in common.  If any leftovers
+            // are non-zero then we have a difference, otherwise we are equal.
+            for (; i < CE1->getNumOperands(); ++i)
+              if (!CE1->getOperand(i)->isNullValue()) {
+                if (isa<ConstantInt>(CE1->getOperand(i)))
+                  return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+                else
+                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+              }
+
+            for (; i < CE2->getNumOperands(); ++i)
+              if (!CE2->getOperand(i)->isNullValue()) {
+                if (isa<ConstantInt>(CE2->getOperand(i)))
+                  return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+                else
+                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+              }
+            return ICmpInst::ICMP_EQ;
+          }
+        }
+      }
+    default:
+      break;
+    }
+  }
+
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, 
+                                               Constant *C1, Constant *C2) {
+  Type *ResultTy;
+  if (VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+    ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
+                               VT->getNumElements());
+  else
+    ResultTy = Type::getInt1Ty(C1->getContext());
+
+  // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
+  if (pred == FCmpInst::FCMP_FALSE)
+    return Constant::getNullValue(ResultTy);
+
+  if (pred == FCmpInst::FCMP_TRUE)
+    return Constant::getAllOnesValue(ResultTy);
+
+  // Handle some degenerate cases first
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    // For EQ and NE, we can always pick a value for the undef to make the
+    // predicate pass or fail, so we can return undef.
+    // Also, if both operands are undef, we can return undef.
+    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
+        (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+      return UndefValue::get(ResultTy);
+    // Otherwise, pick the same value as the non-undef operand, and fold
+    // it to true or false.
+    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+  }
+
+  // icmp eq/ne(null,GV) -> false/true
+  if (C1->isNullValue()) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
+      // Don't try to evaluate aliases.  External weak GV can be null.
+      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+        if (pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(C1->getContext());
+        else if (pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(C1->getContext());
+      }
+  // icmp eq/ne(GV,null) -> false/true
+  } else if (C2->isNullValue()) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
+      // Don't try to evaluate aliases.  External weak GV can be null.
+      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+        if (pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(C1->getContext());
+        else if (pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(C1->getContext());
+      }
+  }
+
+  // If the comparison is a comparison between two i1's, simplify it.
+  if (C1->getType()->isIntegerTy(1)) {
+    switch(pred) {
+    case ICmpInst::ICMP_EQ:
+      if (isa<ConstantInt>(C2))
+        return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
+      return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
+    case ICmpInst::ICMP_NE:
+      return ConstantExpr::getXor(C1, C2);
+    default:
+      break;
+    }
+  }
+
+  if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
+    APInt V1 = cast<ConstantInt>(C1)->getValue();
+    APInt V2 = cast<ConstantInt>(C2)->getValue();
+    switch (pred) {
+    default: llvm_unreachable("Invalid ICmp Predicate");
+    case ICmpInst::ICMP_EQ:  return ConstantInt::get(ResultTy, V1 == V2);
+    case ICmpInst::ICMP_NE:  return ConstantInt::get(ResultTy, V1 != V2);
+    case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
+    case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
+    case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
+    case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
+    case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
+    case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
+    case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
+    case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
+    }
+  } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
+    APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
+    APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
+    APFloat::cmpResult R = C1V.compare(C2V);
+    switch (pred) {
+    default: llvm_unreachable("Invalid FCmp Predicate");
+    case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
+    case FCmpInst::FCMP_TRUE:  return Constant::getAllOnesValue(ResultTy);
+    case FCmpInst::FCMP_UNO:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
+    case FCmpInst::FCMP_ORD:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
+    case FCmpInst::FCMP_UEQ:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_OEQ:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_UNE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
+    case FCmpInst::FCMP_ONE:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_ULT: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpLessThan);
+    case FCmpInst::FCMP_OLT:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
+    case FCmpInst::FCMP_UGT:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_OGT:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_ULE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_OLE: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_UGE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
+    case FCmpInst::FCMP_OGE: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
+                                        R==APFloat::cmpEqual);
+    }
+  } else if (C1->getType()->isVectorTy()) {
+    // If we can constant fold the comparison of each element, constant fold
+    // the whole vector comparison.
+    SmallVector<Constant*, 4> ResElts;
+    Type *Ty = IntegerType::get(C1->getContext(), 32);
+    // Compare the elements, producing an i1 result or constant expr.
+    for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){
+      Constant *C1E =
+        ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
+      Constant *C2E =
+        ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
+      
+      ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
+    }
+    
+    return ConstantVector::get(ResElts);
+  }
+
+  if (C1->getType()->isFloatingPointTy()) {
+    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
+    switch (evaluateFCmpRelation(C1, C2)) {
+    default: llvm_unreachable("Unknown relation!");
+    case FCmpInst::FCMP_UNO:
+    case FCmpInst::FCMP_ORD:
+    case FCmpInst::FCMP_UEQ:
+    case FCmpInst::FCMP_UNE:
+    case FCmpInst::FCMP_ULT:
+    case FCmpInst::FCMP_UGT:
+    case FCmpInst::FCMP_ULE:
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_TRUE:
+    case FCmpInst::FCMP_FALSE:
+    case FCmpInst::BAD_FCMP_PREDICATE:
+      break; // Couldn't determine anything about these constants.
+    case FCmpInst::FCMP_OEQ: // We know that C1 == C2
+      Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
+                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
+                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+      break;
+    case FCmpInst::FCMP_OLT: // We know that C1 < C2
+      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+                pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
+                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
+      break;
+    case FCmpInst::FCMP_OGT: // We know that C1 > C2
+      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+                pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
+                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+      break;
+    case FCmpInst::FCMP_OLE: // We know that C1 <= C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
+        Result = 1;
+      break;
+    case FCmpInst::FCMP_OGE: // We known that C1 >= C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
+        Result = 1;
+      break;
+    case FCmpInst::FCMP_ONE: // We know that C1 != C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE) 
+        Result = 1;
+      break;
+    }
+
+    // If we evaluated the result, return it now.
+    if (Result != -1)
+      return ConstantInt::get(ResultTy, Result);
+
+  } else {
+    // Evaluate the relation between the two constants, per the predicate.
+    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
+    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+    default: llvm_unreachable("Unknown relational!");
+    case ICmpInst::BAD_ICMP_PREDICATE:
+      break;  // Couldn't determine anything about these constants.
+    case ICmpInst::ICMP_EQ:   // We know the constants are equal!
+      // If we know the constants are equal, we can decide the result of this
+      // computation precisely.
+      Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
+      break;
+    case ICmpInst::ICMP_ULT:
+      switch (pred) {
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
+        Result = 1; break;
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_SLT:
+      switch (pred) {
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_UGT:
+      switch (pred) {
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_SGT:
+      switch (pred) {
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_ULE:
+      if (pred == ICmpInst::ICMP_UGT) Result = 0;
+      if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if (pred == ICmpInst::ICMP_SGT) Result = 0;
+      if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
+      break;
+    case ICmpInst::ICMP_UGE:
+      if (pred == ICmpInst::ICMP_ULT) Result = 0;
+      if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if (pred == ICmpInst::ICMP_SLT) Result = 0;
+      if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
+      break;
+    case ICmpInst::ICMP_NE:
+      if (pred == ICmpInst::ICMP_EQ) Result = 0;
+      if (pred == ICmpInst::ICMP_NE) Result = 1;
+      break;
+    }
+
+    // If we evaluated the result, return it now.
+    if (Result != -1)
+      return ConstantInt::get(ResultTy, Result);
+
+    // If the right hand side is a bitcast, try using its inverse to simplify
+    // it by moving it to the left hand side.  We can't do this if it would turn
+    // a vector compare into a scalar compare or visa versa.
+    if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
+      Constant *CE2Op0 = CE2->getOperand(0);
+      if (CE2->getOpcode() == Instruction::BitCast &&
+          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+        Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
+        return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
+      }
+    }
+
+    // If the left hand side is an extension, try eliminating it.
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+        Constant *CE1Op0 = CE1->getOperand(0);
+        Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
+        if (CE1Inverse == CE1Op0) {
+          // Check whether we can safely truncate the right hand side.
+          Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
+          if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
+            return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
+          }
+        }
+      }
+    }
+
+    if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
+        (C1->isNullValue() && !C2->isNullValue())) {
+      // If C2 is a constant expr and C1 isn't, flip them around and fold the
+      // other way if possible.
+      // Also, if C1 is null and C2 isn't, flip them around.
+      pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+      return ConstantExpr::getICmp(pred, C2, C1);
+    }
+  }
+  return 0;
+}
+
+/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
+/// is "inbounds".
+template<typename IndexTy>
+static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
+  // No indices means nothing that could be out of bounds.
+  if (Idxs.empty()) return true;
+
+  // If the first index is zero, it's in bounds.
+  if (cast<Constant>(Idxs[0])->isNullValue()) return true;
+
+  // If the first index is one and all the rest are zero, it's in bounds,
+  // by the one-past-the-end rule.
+  if (!cast<ConstantInt>(Idxs[0])->isOne())
+    return false;
+  for (unsigned i = 1, e = Idxs.size(); i != e; ++i)
+    if (!cast<Constant>(Idxs[i])->isNullValue())
+      return false;
+  return true;
+}
+
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+                                               bool inBounds,
+                                               ArrayRef<IndexTy> Idxs) {
+  if (Idxs.empty()) return C;
+  Constant *Idx0 = cast<Constant>(Idxs[0]);
+  if ((Idxs.size() == 1 && Idx0->isNullValue()))
+    return C;
+
+  if (isa<UndefValue>(C)) {
+    PointerType *Ptr = cast<PointerType>(C->getType());
+    Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+    assert(Ty != 0 && "Invalid indices for GEP!");
+    return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
+  }
+
+  if (C->isNullValue()) {
+    bool isNull = true;
+    for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
+      if (!cast<Constant>(Idxs[i])->isNullValue()) {
+        isNull = false;
+        break;
+      }
+    if (isNull) {
+      PointerType *Ptr = cast<PointerType>(C->getType());
+      Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+      assert(Ty != 0 && "Invalid indices for GEP!");
+      return ConstantPointerNull::get(PointerType::get(Ty,
+                                                       Ptr->getAddressSpace()));
+    }
+  }
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    // Combine Indices - If the source pointer to this getelementptr instruction
+    // is a getelementptr instruction, combine the indices of the two
+    // getelementptr instructions into a single instruction.
+    //
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      Type *LastTy = 0;
+      for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+           I != E; ++I)
+        LastTy = *I;
+
+      if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
+        SmallVector<Value*, 16> NewIndices;
+        NewIndices.reserve(Idxs.size() + CE->getNumOperands());
+        for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
+          NewIndices.push_back(CE->getOperand(i));
+
+        // Add the last index of the source with the first index of the new GEP.
+        // Make sure to handle the case when they are actually different types.
+        Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
+        // Otherwise it must be an array.
+        if (!Idx0->isNullValue()) {
+          Type *IdxTy = Combined->getType();
+          if (IdxTy != Idx0->getType()) {
+            Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
+            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
+            Combined = ConstantExpr::get(Instruction::Add, C1, C2);
+          } else {
+            Combined =
+              ConstantExpr::get(Instruction::Add, Idx0, Combined);
+          }
+        }
+
+        NewIndices.push_back(Combined);
+        NewIndices.append(Idxs.begin() + 1, Idxs.end());
+        return
+          ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
+                                         inBounds &&
+                                           cast<GEPOperator>(CE)->isInBounds());
+      }
+    }
+
+    // Attempt to fold casts to the same type away.  For example, folding:
+    //
+    //   i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+    //                       i64 0, i64 0)
+    // into:
+    //
+    //   i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+    //
+    // Don't fold if the cast is changing address spaces.
+    if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
+      PointerType *SrcPtrTy =
+        dyn_cast<PointerType>(CE->getOperand(0)->getType());
+      PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType());
+      if (SrcPtrTy && DstPtrTy) {
+        ArrayType *SrcArrayTy =
+          dyn_cast<ArrayType>(SrcPtrTy->getElementType());
+        ArrayType *DstArrayTy =
+          dyn_cast<ArrayType>(DstPtrTy->getElementType());
+        if (SrcArrayTy && DstArrayTy
+            && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
+            && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
+          return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
+                                                Idxs, inBounds);
+      }
+    }
+  }
+
+  // Check to see if any array indices are not within the corresponding
+  // notional array bounds. If so, try to determine if they can be factored
+  // out into preceding dimensions.
+  bool Unknown = false;
+  SmallVector<Constant *, 8> NewIdxs;
+  Type *Ty = C->getType();
+  Type *Prev = 0;
+  for (unsigned i = 0, e = Idxs.size(); i != e;
+       Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
+      if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+        if (ATy->getNumElements() <= INT64_MAX &&
+            ATy->getNumElements() != 0 &&
+            CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+          if (isa<SequentialType>(Prev)) {
+            // It's out of range, but we can factor it into the prior
+            // dimension.
+            NewIdxs.resize(Idxs.size());
+            ConstantInt *Factor = ConstantInt::get(CI->getType(),
+                                                   ATy->getNumElements());
+            NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
+
+            Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
+            Constant *Div = ConstantExpr::getSDiv(CI, Factor);
+
+            // Before adding, extend both operands to i64 to avoid
+            // overflow trouble.
+            if (!PrevIdx->getType()->isIntegerTy(64))
+              PrevIdx = ConstantExpr::getSExt(PrevIdx,
+                                           Type::getInt64Ty(Div->getContext()));
+            if (!Div->getType()->isIntegerTy(64))
+              Div = ConstantExpr::getSExt(Div,
+                                          Type::getInt64Ty(Div->getContext()));
+
+            NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
+          } else {
+            // It's out of range, but the prior dimension is a struct
+            // so we can't do anything about it.
+            Unknown = true;
+          }
+        }
+    } else {
+      // We don't know if it's in range or not.
+      Unknown = true;
+    }
+  }
+
+  // If we did any factoring, start over with the adjusted indices.
+  if (!NewIdxs.empty()) {
+    for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
+      if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
+    return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
+  }
+
+  // If all indices are known integers and normalized, we can do a simple
+  // check for the "inbounds" property.
+  if (!Unknown && !inBounds &&
+      isa<GlobalVariable>(C) && isInBoundsIndices(Idxs))
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          ArrayRef<Constant *> Idxs) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          ArrayRef<Value *> Idxs) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
+}
diff --git a/lib/VMCore/ConstantFold.h b/lib/IR/ConstantFold.h
index e12f27a7cb1e..e12f27a7cb1e 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/IR/ConstantFold.h
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
new file mode 100644
index 000000000000..1abb65643559
--- /dev/null
+++ b/lib/IR/Constants.cpp
@@ -0,0 +1,2779 @@
+//===-- Constants.cpp - Implement Constant nodes --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Constant* classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Constants.h"
+#include "ConstantFold.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                              Constant Class
+//===----------------------------------------------------------------------===//
+
+void Constant::anchor() { }
+
+bool Constant::isNegativeZeroValue() const {
+  // Floating point values have an explicit -0.0 value.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && CFP->isNegative();
+
+  // Equivalent for a vector of -0.0's.
+  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+    if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+      if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+        return true;
+
+  // We've already handled true FP case; any other FP vectors can't represent -0.0.
+  if (getType()->isFPOrFPVectorTy())
+    return false;
+
+  // Otherwise, just use +0.0.
+  return isNullValue();
+}
+
+// Return true iff this constant is positive zero (floating point), negative
+// zero (floating point), or a null value.
+bool Constant::isZeroValue() const {
+  // Floating point values have an explicit -0.0 value.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero();
+
+  // Otherwise, just use +0.0.
+  return isNullValue();
+}
+
+bool Constant::isNullValue() const {
+  // 0 is null.
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isZero();
+
+  // +0.0 is null.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && !CFP->isNegative();
+
+  // constant zero is zero for aggregates and cpnull is null for pointers.
+  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+}
+
+bool Constant::isAllOnesValue() const {
+  // Check for -1 integers
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isMinusOne();
+
+  // Check for FP which are bitcasted from -1 integers
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
+
+  // Check for constant vectors which are splats of -1 values.
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+    if (Constant *Splat = CV->getSplatValue())
+      return Splat->isAllOnesValue();
+
+  // Check for constant vectors which are splats of -1 values.
+  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+    if (Constant *Splat = CV->getSplatValue())
+      return Splat->isAllOnesValue();
+
+  return false;
+}
+
+// Constructor to create a '0' constant of arbitrary type...
+Constant *Constant::getNullValue(Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    return ConstantInt::get(Ty, 0);
+  case Type::HalfTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEhalf));
+  case Type::FloatTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEsingle));
+  case Type::DoubleTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEdouble));
+  case Type::X86_FP80TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::x87DoubleExtended));
+  case Type::FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEquad));
+  case Type::PPC_FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat(APFloat::PPCDoubleDouble,
+                                   APInt::getNullValue(128)));
+  case Type::PointerTyID:
+    return ConstantPointerNull::get(cast<PointerType>(Ty));
+  case Type::StructTyID:
+  case Type::ArrayTyID:
+  case Type::VectorTyID:
+    return ConstantAggregateZero::get(Ty);
+  default:
+    // Function, Label, or Opaque type?
+    llvm_unreachable("Cannot create a null constant of that type!");
+  }
+}
+
+Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
+  Type *ScalarTy = Ty->getScalarType();
+
+  // Create the base integer constant.
+  Constant *C = ConstantInt::get(Ty->getContext(), V);
+
+  // Convert an integer to a pointer, if necessary.
+  if (PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+    C = ConstantExpr::getIntToPtr(C, PTy);
+
+  // Broadcast a scalar to a vector, if necessary.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    C = ConstantVector::getSplat(VTy->getNumElements(), C);
+
+  return C;
+}
+
+Constant *Constant::getAllOnesValue(Type *Ty) {
+  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+    return ConstantInt::get(Ty->getContext(),
+                            APInt::getAllOnesValue(ITy->getBitWidth()));
+
+  if (Ty->isFloatingPointTy()) {
+    APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+                                          !Ty->isPPC_FP128Ty());
+    return ConstantFP::get(Ty->getContext(), FL);
+  }
+
+  VectorType *VTy = cast<VectorType>(Ty);
+  return ConstantVector::getSplat(VTy->getNumElements(),
+                                  getAllOnesValue(VTy->getElementType()));
+}
+
+/// getAggregateElement - For aggregates (struct/array/vector) return the
+/// constant that corresponds to the specified element if possible, or null if
+/// not.  This can return null if the element index is a ConstantExpr, or if
+/// 'this' is a constant expr.
+Constant *Constant::getAggregateElement(unsigned Elt) const {
+  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(this))
+    return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0;
+
+  if (const ConstantArray *CA = dyn_cast<ConstantArray>(this))
+    return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0;
+
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+    return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0;
+
+  if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
+    return CAZ->getElementValue(Elt);
+
+  if (const UndefValue *UV = dyn_cast<UndefValue>(this))
+    return UV->getElementValue(Elt);
+
+  if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
+    return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0;
+  return 0;
+}
+
+Constant *Constant::getAggregateElement(Constant *Elt) const {
+  assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
+    return getAggregateElement(CI->getZExtValue());
+  return 0;
+}
+
+
+void Constant::destroyConstantImpl() {
+  // When a Constant is destroyed, there may be lingering
+  // references to the constant by other constants in the constant pool.  These
+  // constants are implicitly dependent on the module that is being deleted,
+  // but they don't know that.  Because we only find out when the CPV is
+  // deleted, we must now notify all of our users (that should only be
+  // Constants) that they are, in fact, invalid now and should be deleted.
+  //
+  while (!use_empty()) {
+    Value *V = use_back();
+#ifndef NDEBUG      // Only in -g mode...
+    if (!isa<Constant>(V)) {
+      dbgs() << "While deleting: " << *this
+             << "\n\nUse still stuck around after Def is destroyed: "
+             << *V << "\n\n";
+    }
+#endif
+    assert(isa<Constant>(V) && "References remain to Constant being destroyed");
+    cast<Constant>(V)->destroyConstant();
+
+    // The constant should remove itself from our use list...
+    assert((use_empty() || use_back() != V) && "Constant not removed!");
+  }
+
+  // Value has no outstanding references it is safe to delete it now...
+  delete this;
+}
+
+/// canTrap - Return true if evaluation of this constant could trap.  This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+  // The only thing that could possibly trap are constant exprs.
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
+  if (!CE) return false;
+
+  // ConstantExpr traps if any operands can trap.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (CE->getOperand(i)->canTrap())
+      return true;
+
+  // Otherwise, only specific operations can trap.
+  switch (CE->getOpcode()) {
+  default:
+    return false;
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+    // Div and rem can trap if the RHS is not known to be non-zero.
+    if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
+      return true;
+    return false;
+  }
+}
+
+/// isThreadDependent - Return true if the value can vary between threads.
+bool Constant::isThreadDependent() const {
+  SmallPtrSet<const Constant*, 64> Visited;
+  SmallVector<const Constant*, 64> WorkList;
+  WorkList.push_back(this);
+  Visited.insert(this);
+
+  while (!WorkList.empty()) {
+    const Constant *C = WorkList.pop_back_val();
+
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+      if (GV->isThreadLocal())
+        return true;
+    }
+
+    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
+      const Constant *D = dyn_cast<Constant>(C->getOperand(I));
+      if (!D)
+        continue;
+      if (Visited.insert(D))
+        WorkList.push_back(D);
+    }
+  }
+
+  return false;
+}
+
+/// isConstantUsed - Return true if the constant has users other than constant
+/// exprs and other dangling things.
+bool Constant::isConstantUsed() const {
+  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    const Constant *UC = dyn_cast<Constant>(*UI);
+    if (UC == 0 || isa<GlobalValue>(UC))
+      return true;
+
+    if (UC->isConstantUsed())
+      return true;
+  }
+  return false;
+}
+
+
+
+/// getRelocationInfo - This method classifies the entry according to
+/// whether or not it may generate a relocation entry.  This must be
+/// conservative, so if it might codegen to a relocatable entry, it should say
+/// so.  The return values are:
+/// 
+///  NoRelocation: This constant pool entry is guaranteed to never have a
+///     relocation applied to it (because it holds a simple constant like
+///     '4').
+///  LocalRelocation: This entry has relocations, but the entries are
+///     guaranteed to be resolvable by the static linker, so the dynamic
+///     linker will never see them.
+///  GlobalRelocations: This entry may have arbitrary relocations.
+///
+/// FIXME: This really should not be in IR.
+Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return LocalRelocation;  // Local to this file/library.
+    return GlobalRelocations;    // Global reference.
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
+    return BA->getFunction()->getRelocationInfo();
+  
+  // While raw uses of blockaddress need to be relocated, differences between
+  // two of them don't when they are for labels in the same function.  This is a
+  // common idiom when creating a table for the indirect goto extension, so we
+  // handle it efficiently here.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+    if (CE->getOpcode() == Instruction::Sub) {
+      ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
+      ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
+      if (LHS && RHS &&
+          LHS->getOpcode() == Instruction::PtrToInt &&
+          RHS->getOpcode() == Instruction::PtrToInt &&
+          isa<BlockAddress>(LHS->getOperand(0)) &&
+          isa<BlockAddress>(RHS->getOperand(0)) &&
+          cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
+            cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+        return NoRelocation;
+    }
+
+  PossibleRelocationsTy Result = NoRelocation;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    Result = std::max(Result,
+                      cast<Constant>(getOperand(i))->getRelocationInfo());
+
+  return Result;
+}
+
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it.  This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+  if (isa<GlobalValue>(C)) return false; // Cannot remove this
+
+  while (!C->use_empty()) {
+    const Constant *User = dyn_cast<Constant>(C->use_back());
+    if (!User) return false; // Non-constant usage;
+    if (!removeDeadUsersOfConstant(User))
+      return false; // Constant wasn't dead
+  }
+
+  const_cast<Constant*>(C)->destroyConstant();
+  return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them.  This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+  Value::const_use_iterator I = use_begin(), E = use_end();
+  Value::const_use_iterator LastNonDeadUser = E;
+  while (I != E) {
+    const Constant *User = dyn_cast<Constant>(*I);
+    if (User == 0) {
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+
+    if (!removeDeadUsersOfConstant(User)) {
+      // If the constant wasn't dead, remember that this was the last live use
+      // and move on to the next constant.
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+
+    // If the constant was dead, then the iterator is invalidated.
+    if (LastNonDeadUser == E) {
+      I = use_begin();
+      if (I == E) break;
+    } else {
+      I = LastNonDeadUser;
+      ++I;
+    }
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                                ConstantInt
+//===----------------------------------------------------------------------===//
+
+void ConstantInt::anchor() { }
+
+ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
+  : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+  assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
+}
+
+ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheTrueVal)
+    pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+  return pImpl->TheTrueVal;
+}
+
+ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheFalseVal)
+    pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+  return pImpl->TheFalseVal;
+}
+
+Constant *ConstantInt::getTrue(Type *Ty) {
+  VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
+    return ConstantInt::getTrue(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "True must be vector of i1 or i1.");
+  return ConstantVector::getSplat(VTy->getNumElements(),
+                                  ConstantInt::getTrue(Ty->getContext()));
+}
+
+Constant *ConstantInt::getFalse(Type *Ty) {
+  VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
+    return ConstantInt::getFalse(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "False must be vector of i1 or i1.");
+  return ConstantVector::getSplat(VTy->getNumElements(),
+                                  ConstantInt::getFalse(Ty->getContext()));
+}
+
+
+// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
+// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
+// operator== and operator!= to ensure that the DenseMap doesn't attempt to
+// compare APInt's of different widths, which would violate an APInt class
+// invariant which generates an assertion.
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
+  // Get the corresponding integer type for the bit width of the value.
+  IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
+  // get an existing value or the insertion position
+  DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+  ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; 
+  if (!Slot) Slot = new ConstantInt(ITy, V);
+  return Slot;
+}
+
+Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+  // For vectors, broadcast the value.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+  return C;
+}
+
+ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, 
+                              bool isSigned) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::get(Type *Ty, const APInt& V) {
+  ConstantInt *C = get(Ty->getContext(), V);
+  assert(C->getType() == Ty->getScalarType() &&
+         "ConstantInt type doesn't match the type implied by its value!");
+
+  // For vectors, broadcast the value.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+  return C;
+}
+
+ConstantInt *ConstantInt::get(IntegerType* Ty, StringRef Str,
+                              uint8_t radix) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
+}
+
+//===----------------------------------------------------------------------===//
+//                                ConstantFP
+//===----------------------------------------------------------------------===//
+
+static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
+  if (Ty->isHalfTy())
+    return &APFloat::IEEEhalf;
+  if (Ty->isFloatTy())
+    return &APFloat::IEEEsingle;
+  if (Ty->isDoubleTy())
+    return &APFloat::IEEEdouble;
+  if (Ty->isX86_FP80Ty())
+    return &APFloat::x87DoubleExtended;
+  else if (Ty->isFP128Ty())
+    return &APFloat::IEEEquad;
+
+  assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
+  return &APFloat::PPCDoubleDouble;
+}
+
+void ConstantFP::anchor() { }
+
+/// get() - This returns a constant fp for the specified value in the
+/// specified type.  This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+Constant *ConstantFP::get(Type *Ty, double V) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(V);
+  bool ignored;
+  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+  return C;
+}
+
+
+Constant *ConstantFP::get(Type *Ty, StringRef Str) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
+
+  return C; 
+}
+
+
+ConstantFP *ConstantFP::getNegativeZero(Type *Ty) {
+  LLVMContext &Context = Ty->getContext();
+  APFloat apf = cast<ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+  apf.changeSign();
+  return get(Context, apf);
+}
+
+
+Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
+  Type *ScalarTy = Ty->getScalarType();
+  if (ScalarTy->isFloatingPointTy()) {
+    Constant *C = getNegativeZero(ScalarTy);
+    if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+      return ConstantVector::getSplat(VTy->getNumElements(), C);
+    return C;
+  }
+
+  return Constant::getNullValue(Ty);
+}
+
+
+// ConstantFP accessors.
+ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
+  DenseMapAPFloatKeyInfo::KeyTy Key(V);
+
+  LLVMContextImpl* pImpl = Context.pImpl;
+
+  ConstantFP *&Slot = pImpl->FPConstants[Key];
+
+  if (!Slot) {
+    Type *Ty;
+    if (&V.getSemantics() == &APFloat::IEEEhalf)
+      Ty = Type::getHalfTy(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEsingle)
+      Ty = Type::getFloatTy(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEdouble)
+      Ty = Type::getDoubleTy(Context);
+    else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+      Ty = Type::getX86_FP80Ty(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEquad)
+      Ty = Type::getFP128Ty(Context);
+    else {
+      assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
+             "Unknown FP format");
+      Ty = Type::getPPC_FP128Ty(Context);
+    }
+    Slot = new ConstantFP(Ty, V);
+  }
+
+  return Slot;
+}
+
+ConstantFP *ConstantFP::getInfinity(Type *Ty, bool Negative) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
+  return ConstantFP::get(Ty->getContext(),
+                         APFloat::getInf(Semantics, Negative));
+}
+
+ConstantFP::ConstantFP(Type *Ty, const APFloat& V)
+  : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+  assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+         "FP type Mismatch");
+}
+
+bool ConstantFP::isExactlyValue(const APFloat &V) const {
+  return Val.bitwiseIsEqual(V);
+}
+
+//===----------------------------------------------------------------------===//
+//                   ConstantAggregateZero Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this CAZ has array or vector type, return a zero
+/// with the right element type.
+Constant *ConstantAggregateZero::getSequentialElement() const {
+  return Constant::getNullValue(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this CAZ has struct type, return a zero with the
+/// right element type for the specified element.
+Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const {
+  return Constant::getNullValue(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+Constant *ConstantAggregateZero::getElementValue(Constant *C) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index.
+Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(Idx);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         UndefValue Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this undef has array or vector type, return an
+/// undef with the right element type.
+UndefValue *UndefValue::getSequentialElement() const {
+  return UndefValue::get(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this undef has struct type, return a zero with the
+/// right element type for the specified element.
+UndefValue *UndefValue::getStructElement(unsigned Elt) const {
+  return UndefValue::get(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+UndefValue *UndefValue::getElementValue(Constant *C) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index.
+UndefValue *UndefValue::getElementValue(unsigned Idx) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(Idx);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                            ConstantXXX Classes
+//===----------------------------------------------------------------------===//
+
+template <typename ItTy, typename EltTy>
+static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
+  for (; Start != End; ++Start)
+    if (*Start != Elt)
+      return false;
+  return true;
+}
+
+ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
+  : Constant(T, ConstantArrayVal,
+             OperandTraits<ConstantArray>::op_end(this) - V.size(),
+             V.size()) {
+  assert(V.size() == T->getNumElements() &&
+         "Invalid initializer vector for constant array");
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    assert(V[i]->getType() == T->getElementType() &&
+           "Initializer for array element doesn't match array element type!");
+  std::copy(V.begin(), V.end(), op_begin());
+}
+
+Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
+  // Empty arrays are canonicalized to ConstantAggregateZero.
+  if (V.empty())
+    return ConstantAggregateZero::get(Ty);
+
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    assert(V[i]->getType() == Ty->getElementType() &&
+           "Wrong type in array element initializer");
+  }
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+  // If this is an all-zero array, return a ConstantAggregateZero object.  If
+  // all undef, return an UndefValue, if "all simple", then return a
+  // ConstantDataArray.
+  Constant *C = V[0];
+  if (isa<UndefValue>(C) && rangeOnlyContains(V.begin(), V.end(), C))
+    return UndefValue::get(Ty);
+
+  if (C->isNullValue() && rangeOnlyContains(V.begin(), V.end(), C))
+    return ConstantAggregateZero::get(Ty);
+
+  // Check to see if all of the elements are ConstantFP or ConstantInt and if
+  // the element type is compatible with ConstantDataVector.  If so, use it.
+  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+    // We speculatively build the elements here even if it turns out that there
+    // is a constantexpr or something else weird in the array, since it is so
+    // uncommon for that to happen.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+      if (CI->getType()->isIntegerTy(8)) {
+        SmallVector<uint8_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(16)) {
+        SmallVector<uint16_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(32)) {
+        SmallVector<uint32_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(64)) {
+        SmallVector<uint64_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      }
+    }
+
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      if (CFP->getType()->isFloatTy()) {
+        SmallVector<float, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToFloat());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CFP->getType()->isDoubleTy()) {
+        SmallVector<double, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToDouble());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      }
+    }
+  }
+
+  // Otherwise, we really do want to create a ConstantArray.
+  return pImpl->ArrayConstants.getOrCreate(Ty, V);
+}
+
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements.  The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+                                               ArrayRef<Constant*> V,
+                                               bool Packed) {
+  unsigned VecSize = V.size();
+  SmallVector<Type*, 16> EltTypes(VecSize);
+  for (unsigned i = 0; i != VecSize; ++i)
+    EltTypes[i] = V[i]->getType();
+
+  return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+                                               bool Packed) {
+  assert(!V.empty() &&
+         "ConstantStruct::getTypeForElements cannot be called on empty list");
+  return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
+ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V)
+  : Constant(T, ConstantStructVal,
+             OperandTraits<ConstantStruct>::op_end(this) - V.size(),
+             V.size()) {
+  assert(V.size() == T->getNumElements() &&
+         "Invalid initializer vector for constant structure");
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    assert((T->isOpaque() || V[i]->getType() == T->getElementType(i)) &&
+           "Initializer for struct element doesn't match struct element type!");
+  std::copy(V.begin(), V.end(), op_begin());
+}
+
+// ConstantStruct accessors.
+Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
+  assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
+         "Incorrect # elements specified to ConstantStruct::get");
+
+  // Create a ConstantAggregateZero value if all elements are zeros.
+  bool isZero = true;
+  bool isUndef = false;
+  
+  if (!V.empty()) {
+    isUndef = isa<UndefValue>(V[0]);
+    isZero = V[0]->isNullValue();
+    if (isUndef || isZero) {
+      for (unsigned i = 0, e = V.size(); i != e; ++i) {
+        if (!V[i]->isNullValue())
+          isZero = false;
+        if (!isa<UndefValue>(V[i]))
+          isUndef = false;
+      }
+    }
+  }
+  if (isZero)
+    return ConstantAggregateZero::get(ST);
+  if (isUndef)
+    return UndefValue::get(ST);
+
+  return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
+}
+
+Constant *ConstantStruct::get(StructType *T, ...) {
+  va_list ap;
+  SmallVector<Constant*, 8> Values;
+  va_start(ap, T);
+  while (Constant *Val = va_arg(ap, llvm::Constant*))
+    Values.push_back(Val);
+  va_end(ap);
+  return get(T, Values);
+}
+
+ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V)
+  : Constant(T, ConstantVectorVal,
+             OperandTraits<ConstantVector>::op_end(this) - V.size(),
+             V.size()) {
+  for (size_t i = 0, e = V.size(); i != e; i++)
+    assert(V[i]->getType() == T->getElementType() &&
+           "Initializer for vector element doesn't match vector element type!");
+  std::copy(V.begin(), V.end(), op_begin());
+}
+
+// ConstantVector accessors.
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
+  assert(!V.empty() && "Vectors can't be empty");
+  VectorType *T = VectorType::get(V.front()->getType(), V.size());
+  LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+  // If this is an all-undef or all-zero vector, return a
+  // ConstantAggregateZero or UndefValue.
+  Constant *C = V[0];
+  bool isZero = C->isNullValue();
+  bool isUndef = isa<UndefValue>(C);
+
+  if (isZero || isUndef) {
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C) {
+        isZero = isUndef = false;
+        break;
+      }
+  }
+
+  if (isZero)
+    return ConstantAggregateZero::get(T);
+  if (isUndef)
+    return UndefValue::get(T);
+
+  // Check to see if all of the elements are ConstantFP or ConstantInt and if
+  // the element type is compatible with ConstantDataVector.  If so, use it.
+  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+    // We speculatively build the elements here even if it turns out that there
+    // is a constantexpr or something else weird in the array, since it is so
+    // uncommon for that to happen.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+      if (CI->getType()->isIntegerTy(8)) {
+        SmallVector<uint8_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(16)) {
+        SmallVector<uint16_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(32)) {
+        SmallVector<uint32_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(64)) {
+        SmallVector<uint64_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      }
+    }
+
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      if (CFP->getType()->isFloatTy()) {
+        SmallVector<float, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToFloat());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CFP->getType()->isDoubleTy()) {
+        SmallVector<double, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToDouble());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      }
+    }
+  }
+
+  // Otherwise, the element type isn't compatible with ConstantDataVector, or
+  // the operand list constants a ConstantExpr or something else strange.
+  return pImpl->VectorConstants.getOrCreate(T, V);
+}
+
+Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
+  // If this splat is compatible with ConstantDataVector, use it instead of
+  // ConstantVector.
+  if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
+      ConstantDataSequential::isElementTypeCompatible(V->getType()))
+    return ConstantDataVector::getSplat(NumElts, V);
+
+  SmallVector<Constant*, 32> Elts(NumElts, V);
+  return get(Elts);
+}
+
+
+// Utility function for determining if a ConstantExpr is a CastOp or not. This
+// can't be inline because we don't want to #include Instruction.h into
+// Constant.h
+bool ConstantExpr::isCast() const {
+  return Instruction::isCast(getOpcode());
+}
+
+bool ConstantExpr::isCompare() const {
+  return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
+}
+
+bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
+  if (getOpcode() != Instruction::GetElementPtr) return false;
+
+  gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
+  User::const_op_iterator OI = llvm::next(this->op_begin());
+
+  // Skip the first index, as it has no static limit.
+  ++GEPI;
+  ++OI;
+
+  // The remaining indices must be compile-time known integers within the
+  // bounds of the corresponding notional static array types.
+  for (; GEPI != E; ++GEPI, ++OI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
+    if (!CI) return false;
+    if (ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+      if (CI->getValue().getActiveBits() > 64 ||
+          CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+  }
+
+  // All the indices checked out.
+  return true;
+}
+
+bool ConstantExpr::hasIndices() const {
+  return getOpcode() == Instruction::ExtractValue ||
+         getOpcode() == Instruction::InsertValue;
+}
+
+ArrayRef<unsigned> ConstantExpr::getIndices() const {
+  if (const ExtractValueConstantExpr *EVCE =
+        dyn_cast<ExtractValueConstantExpr>(this))
+    return EVCE->Indices;
+
+  return cast<InsertValueConstantExpr>(this)->Indices;
+}
+
+unsigned ConstantExpr::getPredicate() const {
+  assert(isCompare());
+  return ((const CompareConstantExpr*)this)->predicate;
+}
+
+/// getWithOperandReplaced - Return a constant expression identical to this
+/// one, but with the specified operand set to the specified value.
+Constant *
+ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
+  assert(Op->getType() == getOperand(OpNo)->getType() &&
+         "Replacing operand with value of different type!");
+  if (getOperand(OpNo) == Op)
+    return const_cast<ConstantExpr*>(this);
+
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    NewOps.push_back(i == OpNo ? Op : getOperand(i));
+
+  return getWithOperands(NewOps);
+}
+
+/// getWithOperands - This returns the current constant expression with the
+/// operands replaced with the specified values.  The specified array must
+/// have the same number of operands as our current one.
+Constant *ConstantExpr::
+getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
+  assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
+  bool AnyChange = Ty != getType();
+  for (unsigned i = 0; i != Ops.size(); ++i)
+    AnyChange |= Ops[i] != getOperand(i);
+
+  if (!AnyChange)  // No operands changed, return self.
+    return const_cast<ConstantExpr*>(this);
+
+  switch (getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
+  case Instruction::Select:
+    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::InsertElement:
+    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::InsertValue:
+    return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices());
+  case Instruction::ExtractValue:
+    return ConstantExpr::getExtractValue(Ops[0], getIndices());
+  case Instruction::ShuffleVector:
+    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+  case Instruction::GetElementPtr:
+    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+                                      cast<GEPOperator>(this)->isInBounds());
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+  default:
+    assert(getNumOperands() == 2 && "Must be binary operator?");
+    return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      isValueValidForType implementations
+
+bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
+  unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay
+  if (Ty->isIntegerTy(1))
+    return Val == 0 || Val == 1;
+  if (NumBits >= 64)
+    return true; // always true, has to fit in largest type
+  uint64_t Max = (1ll << NumBits) - 1;
+  return Val <= Max;
+}
+
+bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
+  unsigned NumBits = Ty->getIntegerBitWidth();
+  if (Ty->isIntegerTy(1))
+    return Val == 0 || Val == 1 || Val == -1;
+  if (NumBits >= 64)
+    return true; // always true, has to fit in largest type
+  int64_t Min = -(1ll << (NumBits-1));
+  int64_t Max = (1ll << (NumBits-1)) - 1;
+  return (Val >= Min && Val <= Max);
+}
+
+bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
+  // convert modifies in place, so make a copy.
+  APFloat Val2 = APFloat(Val);
+  bool losesInfo;
+  switch (Ty->getTypeID()) {
+  default:
+    return false;         // These can't be represented as floating point!
+
+  // FIXME rounding mode needs to be more flexible
+  case Type::HalfTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEhalf)
+      return true;
+    Val2.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::FloatTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+      return true;
+    Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::DoubleTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEhalf ||
+        &Val2.getSemantics() == &APFloat::IEEEsingle ||
+        &Val2.getSemantics() == &APFloat::IEEEdouble)
+      return true;
+    Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::X86_FP80TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+  case Type::FP128TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::IEEEquad;
+  case Type::PPC_FP128TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      Factory Function Implementation
+
+ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
+  assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
+         "Cannot create an aggregate zero of non-aggregate type!");
+  
+  ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
+  if (Entry == 0)
+    Entry = new ConstantAggregateZero(Ty);
+
+  return Entry;
+}
+
+/// destroyConstant - Remove the constant from the constant table.
+///
+void ConstantAggregateZero::destroyConstant() {
+  getContext().pImpl->CAZConstants.erase(getType());
+  destroyConstantImpl();
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantArray::destroyConstant() {
+  getType()->getContext().pImpl->ArrayConstants.remove(this);
+  destroyConstantImpl();
+}
+
+
+//---- ConstantStruct::get() implementation...
+//
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantStruct::destroyConstant() {
+  getType()->getContext().pImpl->StructConstants.remove(this);
+  destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantVector::destroyConstant() {
+  getType()->getContext().pImpl->VectorConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// getSplatValue - If this is a splat vector constant, meaning that all of
+/// the elements have the same value, return that value. Otherwise return 0.
+Constant *Constant::getSplatValue() const {
+  assert(this->getType()->isVectorTy() && "Only valid for vectors!");
+  if (isa<ConstantAggregateZero>(this))
+    return getNullValue(this->getType()->getVectorElementType());
+  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+    return CV->getSplatValue();
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+    return CV->getSplatValue();
+  return 0;
+}
+
+/// getSplatValue - If this is a splat constant, where all of the
+/// elements have the same value, return that value. Otherwise return null.
+Constant *ConstantVector::getSplatValue() const {
+  // Check out first element.
+  Constant *Elt = getOperand(0);
+  // Then make sure all remaining elements point to the same value.
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+    if (getOperand(I) != Elt)
+      return 0;
+  return Elt;
+}
+
+/// If C is a constant integer then return its value, otherwise C must be a
+/// vector of constant integers, all equal, and the common value is returned.
+const APInt &Constant::getUniqueInteger() const {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->getValue();
+  assert(this->getSplatValue() && "Doesn't contain a unique integer!");
+  const Constant *C = this->getAggregateElement(0U);
+  assert(C && isa<ConstantInt>(C) && "Not a vector of numbers!");
+  return cast<ConstantInt>(C)->getValue();
+}
+
+
+//---- ConstantPointerNull::get() implementation.
+//
+
+ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
+  ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
+  if (Entry == 0)
+    Entry = new ConstantPointerNull(Ty);
+
+  return Entry;
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantPointerNull::destroyConstant() {
+  getContext().pImpl->CPNConstants.erase(getType());
+  // Free the constant and any dangling references to it.
+  destroyConstantImpl();
+}
+
+
+//---- UndefValue::get() implementation.
+//
+
+UndefValue *UndefValue::get(Type *Ty) {
+  UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
+  if (Entry == 0)
+    Entry = new UndefValue(Ty);
+
+  return Entry;
+}
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void UndefValue::destroyConstant() {
+  // Free the constant and any dangling references to it.
+  getContext().pImpl->UVConstants.erase(getType());
+  destroyConstantImpl();
+}
+
+//---- BlockAddress::get() implementation.
+//
+
+BlockAddress *BlockAddress::get(BasicBlock *BB) {
+  assert(BB->getParent() != 0 && "Block must have a parent");
+  return get(BB->getParent(), BB);
+}
+
+BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
+  BlockAddress *&BA =
+    F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
+  if (BA == 0)
+    BA = new BlockAddress(F, BB);
+
+  assert(BA->getFunction() == F && "Basic block moved between functions");
+  return BA;
+}
+
+BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
+: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
+           &Op<0>(), 2) {
+  setOperand(0, F);
+  setOperand(1, BB);
+  BB->AdjustBlockAddressRefCount(1);
+}
+
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void BlockAddress::destroyConstant() {
+  getFunction()->getType()->getContext().pImpl
+    ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
+  getBasicBlock()->AdjustBlockAddressRefCount(-1);
+  destroyConstantImpl();
+}
+
+void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+  // This could be replacing either the Basic Block or the Function.  In either
+  // case, we have to remove the map entry.
+  Function *NewF = getFunction();
+  BasicBlock *NewBB = getBasicBlock();
+
+  if (U == &Op<0>())
+    NewF = cast<Function>(To);
+  else
+    NewBB = cast<BasicBlock>(To);
+
+  // See if the 'new' entry already exists, if not, just update this in place
+  // and return early.
+  BlockAddress *&NewBA =
+    getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
+  if (NewBA == 0) {
+    getBasicBlock()->AdjustBlockAddressRefCount(-1);
+
+    // Remove the old entry, this can't cause the map to rehash (just a
+    // tombstone will get added).
+    getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+                                                            getBasicBlock()));
+    NewBA = this;
+    setOperand(0, NewF);
+    setOperand(1, NewBB);
+    getBasicBlock()->AdjustBlockAddressRefCount(1);
+    return;
+  }
+
+  // Otherwise, I do need to replace this with an existing value.
+  assert(NewBA != this && "I didn't contain From!");
+
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(NewBA);
+
+  destroyConstant();
+}
+
+//---- ConstantExpr::get() implementations.
+//
+
+/// This is a utility function to handle folding of casts and lookup of the
+/// cast in the ExprConstants map. It is used by the various get* methods below.
+static inline Constant *getFoldedCast(
+  Instruction::CastOps opc, Constant *C, Type *Ty) {
+  assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+  // Fold a few common cases
+  if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+    return FC;
+
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+  // Look up the constant in the table first to ensure uniqueness.
+  ExprMapKeyType Key(opc, C);
+
+  return pImpl->ExprConstants.getOrCreate(Ty, Key);
+}
+
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
+  Instruction::CastOps opc = Instruction::CastOps(oc);
+  assert(Instruction::isCast(opc) && "opcode out of range");
+  assert(C && Ty && "Null arguments to getCast");
+  assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
+
+  switch (opc) {
+  default:
+    llvm_unreachable("Invalid cast opcode");
+  case Instruction::Trunc:    return getTrunc(C, Ty);
+  case Instruction::ZExt:     return getZExt(C, Ty);
+  case Instruction::SExt:     return getSExt(C, Ty);
+  case Instruction::FPTrunc:  return getFPTrunc(C, Ty);
+  case Instruction::FPExt:    return getFPExtend(C, Ty);
+  case Instruction::UIToFP:   return getUIToFP(C, Ty);
+  case Instruction::SIToFP:   return getSIToFP(C, Ty);
+  case Instruction::FPToUI:   return getFPToUI(C, Ty);
+  case Instruction::FPToSI:   return getFPToSI(C, Ty);
+  case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+  case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+  case Instruction::BitCast:  return getBitCast(C, Ty);
+  }
+}
+
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getZExt(C, Ty);
+}
+
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getSExt(C, Ty);
+}
+
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getTrunc(C, Ty);
+}
+
+Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
+  assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+  assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
+          "Invalid cast");
+
+  if (Ty->isIntOrIntVectorTy())
+    return getPtrToInt(S, Ty);
+  return getBitCast(S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, 
+                                       bool isSigned) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         Ty->isIntOrIntVectorTy() && "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  if (SrcBits == DstBits)
+    return C; // Avoid a useless cast
+  Instruction::CastOps opcode =
+    (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+  return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
+  assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+         "SrcTy must be larger than DestTy for Trunc!");
+
+  return getFoldedCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "SrcTy must be smaller than DestTy for SExt!");
+
+  return getFoldedCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "SrcTy must be smaller than DestTy for ZExt!");
+
+  return getFoldedCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+         "This is an illegal floating point truncation!");
+  return getFoldedCast(Instruction::FPTrunc, C, Ty);
+}
+
+Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "This is an illegal floating point extension!");
+  return getFoldedCast(Instruction::FPExt, C, Ty);
+}
+
+Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "This is an illegal uint to floating point cast!");
+  return getFoldedCast(Instruction::UIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "This is an illegal sint to floating point cast!");
+  return getFoldedCast(Instruction::SIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "This is an illegal floating point to uint cast!");
+  return getFoldedCast(Instruction::FPToUI, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "This is an illegal floating point to sint cast!");
+  return getFoldedCast(Instruction::FPToSI, C, Ty);
+}
+
+Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
+  assert(C->getType()->getScalarType()->isPointerTy() &&
+         "PtrToInt source must be pointer or pointer vector");
+  assert(DstTy->getScalarType()->isIntegerTy() && 
+         "PtrToInt destination must be integer or integer vector");
+  assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+  if (isa<VectorType>(C->getType()))
+    assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+           "Invalid cast between a different number of vector elements");
+  return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+}
+
+Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
+  assert(C->getType()->getScalarType()->isIntegerTy() &&
+         "IntToPtr source must be integer or integer vector");
+  assert(DstTy->getScalarType()->isPointerTy() &&
+         "IntToPtr destination must be a pointer or pointer vector");
+  assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+  if (isa<VectorType>(C->getType()))
+    assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+           "Invalid cast between a different number of vector elements");
+  return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+}
+
+Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
+  assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
+         "Invalid constantexpr bitcast!");
+
+  // It is common to ask for a bitcast of a value to its own type, handle this
+  // speedily.
+  if (C->getType() == DstTy) return C;
+
+  return getFoldedCast(Instruction::BitCast, C, DstTy);
+}
+
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+                            unsigned Flags) {
+  // Check the operands for consistency first.
+  assert(Opcode >= Instruction::BinaryOpsBegin &&
+         Opcode <  Instruction::BinaryOpsEnd   &&
+         "Invalid opcode in binary constant expression");
+  assert(C1->getType() == C2->getType() &&
+         "Operand types in binary constant expression should match");
+
+#ifndef NDEBUG
+  switch (Opcode) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  case Instruction::UDiv: 
+  case Instruction::SDiv: 
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::FDiv:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::URem: 
+  case Instruction::SRem: 
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::FRem:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create a logical operation on a non-integral type!");
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create a shift operation on a non-integer type!");
+    break;
+  default:
+    break;
+  }
+#endif
+
+  if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+    return FC;          // Fold a few common cases.
+
+  Constant *ArgVec[] = { C1, C2 };
+  ExprMapKeyType Key(Opcode, ArgVec, 0, Flags);
+
+  LLVMContextImpl *pImpl = C1->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
+}
+
+Constant *ConstantExpr::getSizeOf(Type* Ty) {
+  // sizeof is implemented as: (i64) gep (Ty*)null, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *GEP = getGetElementPtr(
+                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+  return getPtrToInt(GEP, 
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getAlignOf(Type* Ty) {
+  // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Type *AligningTy = 
+    StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+  Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+  Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
+  Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *Indices[2] = { Zero, One };
+  Constant *GEP = getGetElementPtr(NullPtr, Indices);
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getOffsetOf(StructType* STy, unsigned FieldNo) {
+  return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
+                                           FieldNo));
+}
+
+Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
+  // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx[] = {
+    ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
+    FieldNo
+  };
+  Constant *GEP = getGetElementPtr(
+                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getCompare(unsigned short Predicate, 
+                                   Constant *C1, Constant *C2) {
+  assert(C1->getType() == C2->getType() && "Op types should be identical!");
+
+  switch (Predicate) {
+  default: llvm_unreachable("Invalid CmpInst predicate");
+  case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+  case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+  case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+  case CmpInst::FCMP_TRUE:
+    return getFCmp(Predicate, C1, C2);
+
+  case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+  case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE:
+    return getICmp(Predicate, C1, C2);
+  }
+}
+
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
+  assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
+
+  if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+    return SC;        // Fold common cases
+
+  Constant *ArgVec[] = { C, V1, V2 };
+  ExprMapKeyType Key(Instruction::Select, ArgVec);
+
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
+                                         bool InBounds) {
+  assert(C->getType()->isPtrOrPtrVectorTy() &&
+         "Non-pointer type for constant GetElementPtr expression");
+
+  if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
+    return FC;          // Fold a few common cases.
+
+  // Get the result type of the getelementptr!
+  Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
+  assert(Ty && "GEP indices invalid!");
+  unsigned AS = C->getType()->getPointerAddressSpace();
+  Type *ReqTy = Ty->getPointerTo(AS);
+  if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
+    ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.reserve(1 + Idxs.size());
+  ArgVec.push_back(C);
+  for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
+    assert(Idxs[i]->getType()->isVectorTy() == ReqTy->isVectorTy() &&
+           "getelementptr index type missmatch");
+    assert((!Idxs[i]->getType()->isVectorTy() ||
+            ReqTy->getVectorNumElements() ==
+            Idxs[i]->getType()->getVectorNumElements()) &&
+           "getelementptr index type missmatch");
+    ArgVec.push_back(cast<Constant>(Idxs[i]));
+  }
+  const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+                           InBounds ? GEPOperator::IsInBounds : 0);
+
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *
+ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+  assert(LHS->getType() == RHS->getType());
+  assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
+         pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+    return FC;          // Fold a few common cases...
+
+  // Look up the constant in the table first to ensure uniqueness
+  Constant *ArgVec[] = { LHS, RHS };
+  // Get the key type with both the opcode and predicate
+  const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+  Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+  if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *
+ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+  assert(LHS->getType() == RHS->getType());
+  assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+    return FC;          // Fold a few common cases...
+
+  // Look up the constant in the table first to ensure uniqueness
+  Constant *ArgVec[] = { LHS, RHS };
+  // Get the key type with both the opcode and predicate
+  const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+
+  Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+  if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+  assert(Val->getType()->isVectorTy() &&
+         "Tried to create extractelement operation on non-vector type!");
+  assert(Idx->getType()->isIntegerTy(32) &&
+         "Extractelement index must be i32 type!");
+
+  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+    return FC;          // Fold a few common cases.
+
+  // Look up the constant in the table first to ensure uniqueness
+  Constant *ArgVec[] = { Val, Idx };
+  const ExprMapKeyType Key(Instruction::ExtractElement, ArgVec);
+
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  Type *ReqTy = Val->getType()->getVectorElementType();
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, 
+                                         Constant *Idx) {
+  assert(Val->getType()->isVectorTy() &&
+         "Tried to create insertelement operation on non-vector type!");
+  assert(Elt->getType() == Val->getType()->getVectorElementType() &&
+         "Insertelement types must match!");
+  assert(Idx->getType()->isIntegerTy(32) &&
+         "Insertelement index must be i32 type!");
+
+  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+    return FC;          // Fold a few common cases.
+  // Look up the constant in the table first to ensure uniqueness
+  Constant *ArgVec[] = { Val, Elt, Idx };
+  const ExprMapKeyType Key(Instruction::InsertElement, ArgVec);
+
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
+}
+
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
+                                         Constant *Mask) {
+  assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector constant expr operands!");
+
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+    return FC;          // Fold a few common cases.
+
+  unsigned NElts = Mask->getType()->getVectorNumElements();
+  Type *EltTy = V1->getType()->getVectorElementType();
+  Type *ShufTy = VectorType::get(EltTy, NElts);
+
+  // Look up the constant in the table first to ensure uniqueness
+  Constant *ArgVec[] = { V1, V2, Mask };
+  const ExprMapKeyType Key(Instruction::ShuffleVector, ArgVec);
+
+  LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
+}
+
+Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
+                                       ArrayRef<unsigned> Idxs) {
+  assert(ExtractValueInst::getIndexedType(Agg->getType(),
+                                          Idxs) == Val->getType() &&
+         "insertvalue indices invalid!");
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant insertvalue expression");
+  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
+  assert(FC && "insertvalue constant expr couldn't be folded!");
+  return FC;
+}
+
+Constant *ConstantExpr::getExtractValue(Constant *Agg,
+                                        ArrayRef<unsigned> Idxs) {
+  assert(Agg->getType()->isFirstClassType() &&
+         "Tried to create extractelement operation on non-first-class type!");
+
+  Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+  (void)ReqTy;
+  assert(ReqTy && "extractvalue indices invalid!");
+
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant extractvalue expression");
+  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
+  assert(FC && "ExtractValue constant expr couldn't be folded!");
+  return FC;
+}
+
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NEG a nonintegral value!");
+  return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+                C, HasNUW, HasNSW);
+}
+
+Constant *ConstantExpr::getFNeg(Constant *C) {
+  assert(C->getType()->isFPOrFPVectorTy() &&
+         "Cannot FNEG a non-floating-point value!");
+  return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+}
+
+Constant *ConstantExpr::getNot(Constant *C) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NOT a nonintegral value!");
+  return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
+}
+
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Add, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+  return get(Instruction::FAdd, C1, C2);
+}
+
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Sub, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+  return get(Instruction::FSub, C1, C2);
+}
+
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Mul, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+  return get(Instruction::FMul, C1, C2);
+}
+
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::UDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::SDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
+  return get(Instruction::FDiv, C1, C2);
+}
+
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
+  return get(Instruction::URem, C1, C2);
+}
+
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
+  return get(Instruction::SRem, C1, C2);
+}
+
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
+  return get(Instruction::FRem, C1, C2);
+}
+
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
+  return get(Instruction::And, C1, C2);
+}
+
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
+  return get(Instruction::Or, C1, C2);
+}
+
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
+  return get(Instruction::Xor, C1, C2);
+}
+
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Shl, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::LShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::AShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+/// getBinOpIdentity - Return the identity for the given binary operation,
+/// i.e. a constant C such that X op C = X and C op X = X for every X.  It
+/// returns null if the operator doesn't have an identity.
+Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) {
+  switch (Opcode) {
+  default:
+    // Doesn't have an identity.
+    return 0;
+
+  case Instruction::Add:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return Constant::getNullValue(Ty);
+
+  case Instruction::Mul:
+    return ConstantInt::get(Ty, 1);
+
+  case Instruction::And:
+    return Constant::getAllOnesValue(Ty);
+  }
+}
+
+/// getBinOpAbsorber - Return the absorbing element for the given binary
+/// operation, i.e. a constant C such that X op C = C and C op X = C for
+/// every X.  For example, this returns zero for integer multiplication.
+/// It returns null if the operator doesn't have an absorbing element.
+Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
+  switch (Opcode) {
+  default:
+    // Doesn't have an absorber.
+    return 0;
+
+  case Instruction::Or:
+    return Constant::getAllOnesValue(Ty);
+
+  case Instruction::And:
+  case Instruction::Mul:
+    return Constant::getNullValue(Ty);
+  }
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantExpr::destroyConstant() {
+  getType()->getContext().pImpl->ExprConstants.remove(this);
+  destroyConstantImpl();
+}
+
+const char *ConstantExpr::getOpcodeName() const {
+  return Instruction::getOpcodeName(getOpcode());
+}
+
+
+
+GetElementPtrConstantExpr::
+GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
+                          Type *DestTy)
+  : ConstantExpr(DestTy, Instruction::GetElementPtr,
+                 OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+                 - (IdxList.size()+1), IdxList.size()+1) {
+  OperandList[0] = C;
+  for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+    OperandList[i+1] = IdxList[i];
+}
+
+//===----------------------------------------------------------------------===//
+//                       ConstantData* implementations
+
+void ConstantDataArray::anchor() {}
+void ConstantDataVector::anchor() {}
+
+/// getElementType - Return the element type of the array/vector.
+Type *ConstantDataSequential::getElementType() const {
+  return getType()->getElementType();
+}
+
+StringRef ConstantDataSequential::getRawDataValues() const {
+  return StringRef(DataElements, getNumElements()*getElementByteSize());
+}
+
+/// isElementTypeCompatible - Return true if a ConstantDataSequential can be
+/// formed with a vector or array of the specified element type.
+/// ConstantDataArray only works with normal float and int types that are
+/// stored densely in memory, not with things like i42 or x86_f80.
+bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
+  if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
+  if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+    switch (IT->getBitWidth()) {
+    case 8:
+    case 16:
+    case 32:
+    case 64:
+      return true;
+    default: break;
+    }
+  }
+  return false;
+}
+
+/// getNumElements - Return the number of elements in the array or vector.
+unsigned ConstantDataSequential::getNumElements() const {
+  if (ArrayType *AT = dyn_cast<ArrayType>(getType()))
+    return AT->getNumElements();
+  return getType()->getVectorNumElements();
+}
+
+
+/// getElementByteSize - Return the size in bytes of the elements in the data.
+uint64_t ConstantDataSequential::getElementByteSize() const {
+  return getElementType()->getPrimitiveSizeInBits()/8;
+}
+
+/// getElementPointer - Return the start of the specified element.
+const char *ConstantDataSequential::getElementPointer(unsigned Elt) const {
+  assert(Elt < getNumElements() && "Invalid Elt");
+  return DataElements+Elt*getElementByteSize();
+}
+
+
+/// isAllZeros - return true if the array is empty or all zeros.
+static bool isAllZeros(StringRef Arr) {
+  for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I)
+    if (*I != 0)
+      return false;
+  return true;
+}
+
+/// getImpl - This is the underlying implementation of all of the
+/// ConstantDataSequential::get methods.  They all thunk down to here, providing
+/// the correct element type.  We take the bytes in as a StringRef because
+/// we *want* an underlying "char*" to avoid TBAA type punning violations.
+Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
+  assert(isElementTypeCompatible(Ty->getSequentialElementType()));
+  // If the elements are all zero or there are no elements, return a CAZ, which
+  // is more dense and canonical.
+  if (isAllZeros(Elements))
+    return ConstantAggregateZero::get(Ty);
+
+  // Do a lookup to see if we have already formed one of these.
+  StringMap<ConstantDataSequential*>::MapEntryTy &Slot =
+    Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements);
+
+  // The bucket can point to a linked list of different CDS's that have the same
+  // body but different types.  For example, 0,0,0,1 could be a 4 element array
+  // of i8, or a 1-element array of i32.  They'll both end up in the same
+  /// StringMap bucket, linked up by their Next pointers.  Walk the list.
+  ConstantDataSequential **Entry = &Slot.getValue();
+  for (ConstantDataSequential *Node = *Entry; Node != 0;
+       Entry = &Node->Next, Node = *Entry)
+    if (Node->getType() == Ty)
+      return Node;
+
+  // Okay, we didn't get a hit.  Create a node of the right class, link it in,
+  // and return it.
+  if (isa<ArrayType>(Ty))
+    return *Entry = new ConstantDataArray(Ty, Slot.getKeyData());
+
+  assert(isa<VectorType>(Ty));
+  return *Entry = new ConstantDataVector(Ty, Slot.getKeyData());
+}
+
+void ConstantDataSequential::destroyConstant() {
+  // Remove the constant from the StringMap.
+  StringMap<ConstantDataSequential*> &CDSConstants = 
+    getType()->getContext().pImpl->CDSConstants;
+
+  StringMap<ConstantDataSequential*>::iterator Slot =
+    CDSConstants.find(getRawDataValues());
+
+  assert(Slot != CDSConstants.end() && "CDS not found in uniquing table");
+
+  ConstantDataSequential **Entry = &Slot->getValue();
+
+  // Remove the entry from the hash table.
+  if ((*Entry)->Next == 0) {
+    // If there is only one value in the bucket (common case) it must be this
+    // entry, and removing the entry should remove the bucket completely.
+    assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
+    getContext().pImpl->CDSConstants.erase(Slot);
+  } else {
+    // Otherwise, there are multiple entries linked off the bucket, unlink the 
+    // node we care about but keep the bucket around.
+    for (ConstantDataSequential *Node = *Entry; ;
+         Entry = &Node->Next, Node = *Entry) {
+      assert(Node && "Didn't find entry in its uniquing hash table!");
+      // If we found our entry, unlink it from the list and we're done.
+      if (Node == this) {
+        *Entry = Node->Next;
+        break;
+      }
+    }
+  }
+
+  // If we were part of a list, make sure that we don't delete the list that is
+  // still owned by the uniquing map.
+  Next = 0;
+
+  // Finally, actually delete it.
+  destroyConstantImpl();
+}
+
+/// get() constructors - Return a constant with array type with an element
+/// count and element type matching the ArrayRef passed in.  Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) {
+  Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+  Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+  Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+  Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
+  Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
+  Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+
+/// getString - This method constructs a CDS and initializes it with a text
+/// string. The default behavior (AddNull==true) causes a null terminator to
+/// be placed at the end of the array (increasing the length of the string by
+/// one more than the StringRef would normally indicate.  Pass AddNull=false
+/// to disable this behavior.
+Constant *ConstantDataArray::getString(LLVMContext &Context,
+                                       StringRef Str, bool AddNull) {
+  if (!AddNull) {
+    const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
+    return get(Context, ArrayRef<uint8_t>(const_cast<uint8_t *>(Data),
+               Str.size()));
+  }
+
+  SmallVector<uint8_t, 64> ElementVals;
+  ElementVals.append(Str.begin(), Str.end());
+  ElementVals.push_back(0);
+  return get(Context, ElementVals);
+}
+
+/// get() constructors - Return a constant with vector type with an element
+/// count and element type matching the ArrayRef passed in.  Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
+  Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
+  Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
+  const char *Data = reinterpret_cast<const char *>(Elts.data());
+  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
+}
+
+Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
+  assert(isElementTypeCompatible(V->getType()) &&
+         "Element type not compatible with ConstantData");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getType()->isIntegerTy(8)) {
+      SmallVector<uint8_t, 16> Elts(NumElts, CI->getZExtValue());
+      return get(V->getContext(), Elts);
+    }
+    if (CI->getType()->isIntegerTy(16)) {
+      SmallVector<uint16_t, 16> Elts(NumElts, CI->getZExtValue());
+      return get(V->getContext(), Elts);
+    }
+    if (CI->getType()->isIntegerTy(32)) {
+      SmallVector<uint32_t, 16> Elts(NumElts, CI->getZExtValue());
+      return get(V->getContext(), Elts);
+    }
+    assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type");
+    SmallVector<uint64_t, 16> Elts(NumElts, CI->getZExtValue());
+    return get(V->getContext(), Elts);
+  }
+
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isFloatTy()) {
+      SmallVector<float, 16> Elts(NumElts, CFP->getValueAPF().convertToFloat());
+      return get(V->getContext(), Elts);
+    }
+    if (CFP->getType()->isDoubleTy()) {
+      SmallVector<double, 16> Elts(NumElts,
+                                   CFP->getValueAPF().convertToDouble());
+      return get(V->getContext(), Elts);
+    }
+  }
+  return ConstantVector::getSplat(NumElts, V);
+}
+
+
+/// getElementAsInteger - If this is a sequential container of integers (of
+/// any size), return the specified element in the low bits of a uint64_t.
+uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
+  assert(isa<IntegerType>(getElementType()) &&
+         "Accessor can only be used when element is an integer");
+  const char *EltPtr = getElementPointer(Elt);
+
+  // The data is stored in host byte order, make sure to cast back to the right
+  // type to load with the right endianness.
+  switch (getElementType()->getIntegerBitWidth()) {
+  default: llvm_unreachable("Invalid bitwidth for CDS");
+  case 8:
+    return *const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(EltPtr));
+  case 16:
+    return *const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(EltPtr));
+  case 32:
+    return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(EltPtr));
+  case 64:
+    return *const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(EltPtr));
+  }
+}
+
+/// getElementAsAPFloat - If this is a sequential container of floating point
+/// type, return the specified element as an APFloat.
+APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
+  const char *EltPtr = getElementPointer(Elt);
+
+  switch (getElementType()->getTypeID()) {
+  default:
+    llvm_unreachable("Accessor can only be used when element is float/double!");
+  case Type::FloatTyID: {
+      const float *FloatPrt = reinterpret_cast<const float *>(EltPtr);
+      return APFloat(*const_cast<float *>(FloatPrt));
+    }
+  case Type::DoubleTyID: {
+      const double *DoublePtr = reinterpret_cast<const double *>(EltPtr);
+      return APFloat(*const_cast<double *>(DoublePtr));
+    }
+  }
+}
+
+/// getElementAsFloat - If this is an sequential container of floats, return
+/// the specified element as a float.
+float ConstantDataSequential::getElementAsFloat(unsigned Elt) const {
+  assert(getElementType()->isFloatTy() &&
+         "Accessor can only be used when element is a 'float'");
+  const float *EltPtr = reinterpret_cast<const float *>(getElementPointer(Elt));
+  return *const_cast<float *>(EltPtr);
+}
+
+/// getElementAsDouble - If this is an sequential container of doubles, return
+/// the specified element as a float.
+double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
+  assert(getElementType()->isDoubleTy() &&
+         "Accessor can only be used when element is a 'float'");
+  const double *EltPtr =
+      reinterpret_cast<const double *>(getElementPointer(Elt));
+  return *const_cast<double *>(EltPtr);
+}
+
+/// getElementAsConstant - Return a Constant for a specified index's element.
+/// Note that this has to compute a new constant to return, so it isn't as
+/// efficient as getElementAsInteger/Float/Double.
+Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
+  if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
+    return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
+
+  return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
+}
+
+/// isString - This method returns true if this is an array of i8.
+bool ConstantDataSequential::isString() const {
+  return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(8);
+}
+
+/// isCString - This method returns true if the array "isString", ends with a
+/// nul byte, and does not contains any other nul bytes.
+bool ConstantDataSequential::isCString() const {
+  if (!isString())
+    return false;
+
+  StringRef Str = getAsString();
+
+  // The last value must be nul.
+  if (Str.back() != 0) return false;
+
+  // Other elements must be non-nul.
+  return Str.drop_back().find(0) == StringRef::npos;
+}
+
+/// getSplatValue - If this is a splat constant, meaning that all of the
+/// elements have the same value, return that value. Otherwise return NULL.
+Constant *ConstantDataVector::getSplatValue() const {
+  const char *Base = getRawDataValues().data();
+
+  // Compare elements 1+ to the 0'th element.
+  unsigned EltSize = getElementByteSize();
+  for (unsigned i = 1, e = getNumElements(); i != e; ++i)
+    if (memcmp(Base, Base+i*EltSize, EltSize))
+      return 0;
+
+  // If they're all the same, return the 0th one as a representative.
+  return getElementAsConstant(0);
+}
+
+//===----------------------------------------------------------------------===//
+//                replaceUsesOfWithOnConstant implementations
+
+/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// 'From' to be uses of 'To'.  This must update the uniquing data structures
+/// etc.
+///
+/// Note that we intentionally replace all uses of From with To here.  Consider
+/// a large array that uses 'From' 1000 times.  By handling this case all here,
+/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// single invocation handles all 1000 uses.  Handling them one at a time would
+/// work, but would be really slow because it would have to unique each updated
+/// array instance.
+///
+void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+  SmallVector<Constant*, 8> Values;
+  LLVMContextImpl::ArrayConstantsTy::LookupKey Lookup;
+  Lookup.first = cast<ArrayType>(getType());
+  Values.reserve(getNumOperands());  // Build replacement array.
+
+  // Fill values with the modified operands of the constant array.  Also,
+  // compute whether this turns into an all-zeros array.
+  unsigned NumUpdated = 0;
+
+  // Keep track of whether all the values in the array are "ToC".
+  bool AllSame = true;
+  for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+    Constant *Val = cast<Constant>(O->get());
+    if (Val == From) {
+      Val = ToC;
+      ++NumUpdated;
+    }
+    Values.push_back(Val);
+    AllSame &= Val == ToC;
+  }
+
+  Constant *Replacement = 0;
+  if (AllSame && ToC->isNullValue()) {
+    Replacement = ConstantAggregateZero::get(getType());
+  } else if (AllSame && isa<UndefValue>(ToC)) {
+    Replacement = UndefValue::get(getType());
+  } else {
+    // Check to see if we have this array type already.
+    Lookup.second = makeArrayRef(Values);
+    LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
+      pImpl->ArrayConstants.find(Lookup);
+
+    if (I != pImpl->ArrayConstants.map_end()) {
+      Replacement = I->first;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant array, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->ArrayConstants.remove(this);
+
+      // Update to the new value.  Optimize for the case when we have a single
+      // operand that we're changing, but handle bulk updates efficiently.
+      if (NumUpdated == 1) {
+        unsigned OperandToUpdate = U - OperandList;
+        assert(getOperand(OperandToUpdate) == From &&
+               "ReplaceAllUsesWith broken!");
+        setOperand(OperandToUpdate, ToC);
+      } else {
+        for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+          if (getOperand(i) == From)
+            setOperand(i, ToC);
+      }
+      pImpl->ArrayConstants.insert(this);
+      return;
+    }
+  }
+
+  // Otherwise, I do need to replace this with an existing value.
+  assert(Replacement != this && "I didn't contain From!");
+
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  unsigned OperandToUpdate = U-OperandList;
+  assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
+
+  SmallVector<Constant*, 8> Values;
+  LLVMContextImpl::StructConstantsTy::LookupKey Lookup;
+  Lookup.first = cast<StructType>(getType());
+  Values.reserve(getNumOperands());  // Build replacement struct.
+
+  // Fill values with the modified operands of the constant struct.  Also,
+  // compute whether this turns into an all-zeros struct.
+  bool isAllZeros = false;
+  bool isAllUndef = false;
+  if (ToC->isNullValue()) {
+    isAllZeros = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      Values.push_back(Val);
+      if (isAllZeros) isAllZeros = Val->isNullValue();
+    }
+  } else if (isa<UndefValue>(ToC)) {
+    isAllUndef = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      Values.push_back(Val);
+      if (isAllUndef) isAllUndef = isa<UndefValue>(Val);
+    }
+  } else {
+    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+      Values.push_back(cast<Constant>(O->get()));
+  }
+  Values[OperandToUpdate] = ToC;
+
+  LLVMContextImpl *pImpl = getContext().pImpl;
+
+  Constant *Replacement = 0;
+  if (isAllZeros) {
+    Replacement = ConstantAggregateZero::get(getType());
+  } else if (isAllUndef) {
+    Replacement = UndefValue::get(getType());
+  } else {
+    // Check to see if we have this struct type already.
+    Lookup.second = makeArrayRef(Values);
+    LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
+      pImpl->StructConstants.find(Lookup);
+
+    if (I != pImpl->StructConstants.map_end()) {
+      Replacement = I->first;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant struct, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->StructConstants.remove(this);
+
+      // Update to the new value.
+      setOperand(OperandToUpdate, ToC);
+      pImpl->StructConstants.insert(this);
+      return;
+    }
+  }
+
+  assert(Replacement != this && "I didn't contain From!");
+
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+
+  SmallVector<Constant*, 8> Values;
+  Values.reserve(getNumOperands());  // Build replacement array...
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    Constant *Val = getOperand(i);
+    if (Val == From) Val = cast<Constant>(To);
+    Values.push_back(Val);
+  }
+
+  Constant *Replacement = get(Values);
+  assert(Replacement != this && "I didn't contain From!");
+
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
+                                               Use *U) {
+  assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
+  Constant *To = cast<Constant>(ToV);
+
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    Constant *Op = getOperand(i);
+    NewOps.push_back(Op == From ? To : Op);
+  }
+
+  Constant *Replacement = getWithOperands(NewOps);
+  assert(Replacement != this && "I didn't contain From!");
+
+  // Everyone using this now uses the replacement.
+  replaceAllUsesWith(Replacement);
+
+  // Delete the old constant!
+  destroyConstant();
+}
+
+Instruction *ConstantExpr::getAsInstruction() {
+  SmallVector<Value*,4> ValueOperands;
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+    ValueOperands.push_back(cast<Value>(I));
+
+  ArrayRef<Value*> Ops(ValueOperands);
+
+  switch (getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    return CastInst::Create((Instruction::CastOps)getOpcode(),
+                            Ops[0], getType());
+  case Instruction::Select:
+    return SelectInst::Create(Ops[0], Ops[1], Ops[2]);
+  case Instruction::InsertElement:
+    return InsertElementInst::Create(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ExtractElementInst::Create(Ops[0], Ops[1]);
+  case Instruction::InsertValue:
+    return InsertValueInst::Create(Ops[0], Ops[1], getIndices());
+  case Instruction::ExtractValue:
+    return ExtractValueInst::Create(Ops[0], getIndices());
+  case Instruction::ShuffleVector:
+    return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]);
+
+  case Instruction::GetElementPtr:
+    if (cast<GEPOperator>(this)->isInBounds())
+      return GetElementPtrInst::CreateInBounds(Ops[0], Ops.slice(1));
+    else
+      return GetElementPtrInst::Create(Ops[0], Ops.slice(1));
+
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return CmpInst::Create((Instruction::OtherOps)getOpcode(),
+                           getPredicate(), Ops[0], Ops[1]);
+
+  default:
+    assert(getNumOperands() == 2 && "Must be binary operator?");
+    BinaryOperator *BO =
+      BinaryOperator::Create((Instruction::BinaryOps)getOpcode(),
+                             Ops[0], Ops[1]);
+    if (isa<OverflowingBinaryOperator>(BO)) {
+      BO->setHasNoUnsignedWrap(SubclassOptionalData &
+                               OverflowingBinaryOperator::NoUnsignedWrap);
+      BO->setHasNoSignedWrap(SubclassOptionalData &
+                             OverflowingBinaryOperator::NoSignedWrap);
+    }
+    if (isa<PossiblyExactOperator>(BO))
+      BO->setIsExact(SubclassOptionalData & PossiblyExactOperator::IsExact);
+    return BO;
+  }
+}
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
new file mode 100644
index 000000000000..e9958589f53c
--- /dev/null
+++ b/lib/IR/ConstantsContext.h
@@ -0,0 +1,774 @@
+//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTSCONTEXT_H
+#define LLVM_CONSTANTSCONTEXT_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace llvm {
+template<class ValType>
+struct ConstantTraits;
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class UnaryConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty)
+    : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+    Op<0>() = C;
+  }
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class BinaryConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
+                     unsigned Flags)
+    : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    SubclassOptionalData = Flags;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class SelectConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class ExtractElementConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  ExtractElementConstantExpr(Constant *C1, Constant *C2)
+    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
+                   Instruction::ExtractElement, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class InsertElementConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
+                   &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class ShuffleVectorConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+  : ConstantExpr(VectorType::get(
+                   cast<VectorType>(C1->getType())->getElementType(),
+                   cast<VectorType>(C3->getType())->getNumElements()),
+                 Instruction::ShuffleVector, 
+                 &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class ExtractValueConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  ExtractValueConstantExpr(Constant *Agg,
+                           const SmallVector<unsigned, 4> &IdxList,
+                           Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+  }
+
+  /// Indices - These identify which value to extract.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class InsertValueConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  InsertValueConstantExpr(Constant *Agg, Constant *Val,
+                          const SmallVector<unsigned, 4> &IdxList,
+                          Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+    Op<1>() = Val;
+  }
+
+  /// Indices - These identify the position for the insertion.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class GetElementPtrConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
+                            Type *DestTy);
+public:
+  static GetElementPtrConstantExpr *Create(Constant *C,
+                                           ArrayRef<Constant*> IdxList,
+                                           Type *DestTy,
+                                           unsigned Flags) {
+    GetElementPtrConstantExpr *Result =
+      new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+    Result->SubclassOptionalData = Flags;
+    return Result;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+class CompareConstantExpr : public ConstantExpr {
+  virtual void anchor();
+  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  unsigned short predicate;
+  CompareConstantExpr(Type *ty, Instruction::OtherOps opc,
+                      unsigned short pred,  Constant* LHS, Constant* RHS)
+    : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+template <>
+struct OperandTraits<UnaryConstantExpr> :
+  public FixedNumOperandTraits<UnaryConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> :
+  public FixedNumOperandTraits<BinaryConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> :
+  public FixedNumOperandTraits<SelectConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> :
+  public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> :
+  public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> :
+    public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> :
+  public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> :
+  public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> :
+  public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> :
+  public FixedNumOperandTraits<CompareConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+struct ExprMapKeyType {
+  ExprMapKeyType(unsigned opc,
+      ArrayRef<Constant*> ops,
+      unsigned short flags = 0,
+      unsigned short optionalflags = 0,
+      ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+        : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
+        operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
+  uint8_t opcode;
+  uint8_t subclassoptionaldata;
+  uint16_t subclassdata;
+  std::vector<Constant*> operands;
+  SmallVector<unsigned, 4> indices;
+  bool operator==(const ExprMapKeyType& that) const {
+    return this->opcode == that.opcode &&
+           this->subclassdata == that.subclassdata &&
+           this->subclassoptionaldata == that.subclassoptionaldata &&
+           this->operands == that.operands &&
+           this->indices == that.indices;
+  }
+  bool operator<(const ExprMapKeyType & that) const {
+    if (this->opcode != that.opcode) return this->opcode < that.opcode;
+    if (this->operands != that.operands) return this->operands < that.operands;
+    if (this->subclassdata != that.subclassdata)
+      return this->subclassdata < that.subclassdata;
+    if (this->subclassoptionaldata != that.subclassoptionaldata)
+      return this->subclassoptionaldata < that.subclassoptionaldata;
+    if (this->indices != that.indices) return this->indices < that.indices;
+    return false;
+  }
+
+  bool operator!=(const ExprMapKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+struct InlineAsmKeyType {
+  InlineAsmKeyType(StringRef AsmString,
+                   StringRef Constraints, bool hasSideEffects,
+                   bool isAlignStack, InlineAsm::AsmDialect asmDialect)
+    : asm_string(AsmString), constraints(Constraints),
+      has_side_effects(hasSideEffects), is_align_stack(isAlignStack),
+      asm_dialect(asmDialect) {}
+  std::string asm_string;
+  std::string constraints;
+  bool has_side_effects;
+  bool is_align_stack;
+  InlineAsm::AsmDialect asm_dialect;
+  bool operator==(const InlineAsmKeyType& that) const {
+    return this->asm_string == that.asm_string &&
+           this->constraints == that.constraints &&
+           this->has_side_effects == that.has_side_effects &&
+           this->is_align_stack == that.is_align_stack &&
+           this->asm_dialect == that.asm_dialect;
+  }
+  bool operator<(const InlineAsmKeyType& that) const {
+    if (this->asm_string != that.asm_string)
+      return this->asm_string < that.asm_string;
+    if (this->constraints != that.constraints)
+      return this->constraints < that.constraints;
+    if (this->has_side_effects != that.has_side_effects)
+      return this->has_side_effects < that.has_side_effects;
+    if (this->is_align_stack != that.is_align_stack)
+      return this->is_align_stack < that.is_align_stack;
+    if (this->asm_dialect != that.asm_dialect)
+      return this->asm_dialect < that.asm_dialect;
+    return false;
+  }
+
+  bool operator!=(const InlineAsmKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ConstantUniqueMap*.  This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+template<typename T, typename Alloc>
+struct ConstantTraits< std::vector<T, Alloc> > {
+  static unsigned uses(const std::vector<T, Alloc>& v) {
+    return v.size();
+  }
+};
+
+template<>
+struct ConstantTraits<Constant *> {
+  static unsigned uses(Constant * const & v) {
+    return 1;
+  }
+};
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator {
+  static ConstantClass *create(TypeClass *Ty, const ValType &V) {
+    return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+  }
+};
+
+template<class ConstantClass, class TypeClass>
+struct ConstantArrayCreator {
+  static ConstantClass *create(TypeClass *Ty, ArrayRef<Constant*> V) {
+    return new(V.size()) ConstantClass(Ty, V);
+  }
+};
+
+template<class ConstantClass>
+struct ConstantKeyData {
+  typedef void ValType;
+  static ValType getValType(ConstantClass *C) {
+    llvm_unreachable("Unknown Constant type!");
+  }
+};
+
+template<>
+struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+  static ConstantExpr *create(Type *Ty, const ExprMapKeyType &V,
+      unsigned short pred = 0) {
+    if (Instruction::isCast(V.opcode))
+      return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+    if ((V.opcode >= Instruction::BinaryOpsBegin &&
+         V.opcode < Instruction::BinaryOpsEnd))
+      return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
+                                    V.subclassoptionaldata);
+    if (V.opcode == Instruction::Select)
+      return new SelectConstantExpr(V.operands[0], V.operands[1], 
+                                    V.operands[2]);
+    if (V.opcode == Instruction::ExtractElement)
+      return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::InsertElement)
+      return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::ShuffleVector)
+      return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::InsertValue)
+      return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+                                         V.indices, Ty);
+    if (V.opcode == Instruction::ExtractValue)
+      return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+    if (V.opcode == Instruction::GetElementPtr) {
+      std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+      return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
+                                               V.subclassoptionaldata);
+    }
+
+    // The compare instructions are weird. We have to encode the predicate
+    // value and it is combined with the instruction opcode by multiplying
+    // the opcode by one hundred. We must decode this to get the predicate.
+    if (V.opcode == Instruction::ICmp)
+      return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::FCmp) 
+      return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    llvm_unreachable("Invalid ConstantExpr!");
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantExpr> {
+  typedef ExprMapKeyType ValType;
+  static ValType getValType(ConstantExpr *CE) {
+    std::vector<Constant*> Operands;
+    Operands.reserve(CE->getNumOperands());
+    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+      Operands.push_back(cast<Constant>(CE->getOperand(i)));
+    return ExprMapKeyType(CE->getOpcode(), Operands,
+        CE->isCompare() ? CE->getPredicate() : 0,
+        CE->getRawSubclassOptionalData(),
+        CE->hasIndices() ?
+          CE->getIndices() : ArrayRef<unsigned>());
+  }
+};
+
+template<>
+struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
+  static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
+    return new InlineAsm(Ty, Key.asm_string, Key.constraints,
+                         Key.has_side_effects, Key.is_align_stack,
+                         Key.asm_dialect);
+  }
+};
+
+template<>
+struct ConstantKeyData<InlineAsm> {
+  typedef InlineAsmKeyType ValType;
+  static ValType getValType(InlineAsm *Asm) {
+    return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
+                            Asm->hasSideEffects(), Asm->isAlignStack(),
+                            Asm->getDialect());
+  }
+};
+
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+         bool HasLargeKey = false /*true for arrays and structs*/ >
+class ConstantUniqueMap {
+public:
+  typedef std::pair<TypeClass*, ValType> MapKey;
+  typedef std::map<MapKey, ConstantClass *> MapTy;
+  typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
+private:
+  /// Map - This is the main map from the element descriptor to the Constants.
+  /// This is the primary way we avoid creating two of the same shape
+  /// constant.
+  MapTy Map;
+    
+  /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+  /// from the constants to their element in Map.  This is important for
+  /// removal of constants from the array, which would otherwise have to scan
+  /// through the map with very large keys.
+  InverseMapTy InverseMap;
+
+public:
+  typename MapTy::iterator map_begin() { return Map.begin(); }
+  typename MapTy::iterator map_end() { return Map.end(); }
+
+  void freeConstants() {
+    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+         I != E; ++I) {
+      // Asserts that use_empty().
+      delete I->second;
+    }
+  }
+    
+  /// InsertOrGetItem - Return an iterator for the specified element.
+  /// If the element exists in the map, the returned iterator points to the
+  /// entry and Exists=true.  If not, the iterator points to the newly
+  /// inserted entry and returns Exists=false.  Newly inserted entries have
+  /// I->second == 0, and should be filled in.
+  typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
+                                 &InsertVal,
+                                 bool &Exists) {
+    std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+    Exists = !IP.second;
+    return IP.first;
+  }
+    
+private:
+  typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+    if (HasLargeKey) {
+      typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+      assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+             IMI->second->second == CP &&
+             "InverseMap corrupt!");
+      return IMI->second;
+    }
+      
+    typename MapTy::iterator I =
+      Map.find(MapKey(static_cast<TypeClass*>(CP->getType()),
+                      ConstantKeyData<ConstantClass>::getValType(CP)));
+    if (I == Map.end() || I->second != CP) {
+      // FIXME: This should not use a linear scan.  If this gets to be a
+      // performance problem, someone should look at this.
+      for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+        /* empty */;
+    }
+    return I;
+  }
+
+  ConstantClass *Create(TypeClass *Ty, ValRefType V,
+                        typename MapTy::iterator I) {
+    ConstantClass* Result =
+      ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+    assert(Result->getType() == Ty && "Type specified is not correct!");
+    I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.insert(std::make_pair(Result, I));
+
+    return Result;
+  }
+public:
+    
+  /// getOrCreate - Return the specified constant from the map, creating it if
+  /// necessary.
+  ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) {
+    MapKey Lookup(Ty, V);
+    ConstantClass* Result = 0;
+    
+    typename MapTy::iterator I = Map.find(Lookup);
+    // Is it in the map?  
+    if (I != Map.end())
+      Result = I->second;
+        
+    if (!Result) {
+      // If no preexisting value, create one now...
+      Result = Create(Ty, V, I);
+    }
+        
+    return Result;
+  }
+
+  void remove(ConstantClass *CP) {
+    typename MapTy::iterator I = FindExistingElement(CP);
+    assert(I != Map.end() && "Constant not found in constant table!");
+    assert(I->second == CP && "Didn't find correct element?");
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.erase(CP);
+
+    Map.erase(I);
+  }
+
+  /// MoveConstantToNewSlot - If we are about to change C to be the element
+  /// specified by I, update our internal data structures to reflect this
+  /// fact.
+  void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+    // First, remove the old location of the specified constant in the map.
+    typename MapTy::iterator OldI = FindExistingElement(C);
+    assert(OldI != Map.end() && "Constant not found in constant table!");
+    assert(OldI->second == C && "Didn't find correct element?");
+      
+     // Remove the old entry from the map.
+    Map.erase(OldI);
+    
+    // Update the inverse map so that we know that this constant is now
+    // located at descriptor I.
+    if (HasLargeKey) {
+      assert(I->second == C && "Bad inversemap entry!");
+      InverseMap[C] = I;
+    }
+  }
+
+  void dump() const {
+    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+  }
+};
+
+// Unique map for aggregate constants
+template<class TypeClass, class ConstantClass>
+class ConstantAggrUniqueMap {
+public:
+  typedef ArrayRef<Constant*> Operands;
+  typedef std::pair<TypeClass*, Operands> LookupKey;
+private:
+  struct MapInfo {
+    typedef DenseMapInfo<ConstantClass*> ConstantClassInfo;
+    typedef DenseMapInfo<Constant*> ConstantInfo;
+    typedef DenseMapInfo<TypeClass*> TypeClassInfo;
+    static inline ConstantClass* getEmptyKey() {
+      return ConstantClassInfo::getEmptyKey();
+    }
+    static inline ConstantClass* getTombstoneKey() {
+      return ConstantClassInfo::getTombstoneKey();
+    }
+    static unsigned getHashValue(const ConstantClass *CP) {
+      SmallVector<Constant*, 8> CPOperands;
+      CPOperands.reserve(CP->getNumOperands());
+      for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
+        CPOperands.push_back(CP->getOperand(I));
+      return getHashValue(LookupKey(CP->getType(), CPOperands));
+    }
+    static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
+      return LHS == RHS;
+    }
+    static unsigned getHashValue(const LookupKey &Val) {
+      return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
+                                                        Val.second.end()));
+    }
+    static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
+      if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+        return false;
+      if (LHS.first != RHS->getType()
+          || LHS.second.size() != RHS->getNumOperands())
+        return false;
+      for (unsigned I = 0, E = RHS->getNumOperands(); I < E; ++I) {
+        if (LHS.second[I] != RHS->getOperand(I))
+          return false;
+      }
+      return true;
+    }
+  };
+public:
+  typedef DenseMap<ConstantClass *, char, MapInfo> MapTy;
+
+private:
+  /// Map - This is the main map from the element descriptor to the Constants.
+  /// This is the primary way we avoid creating two of the same shape
+  /// constant.
+  MapTy Map;
+
+public:
+  typename MapTy::iterator map_begin() { return Map.begin(); }
+  typename MapTy::iterator map_end() { return Map.end(); }
+
+  void freeConstants() {
+    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+         I != E; ++I) {
+      // Asserts that use_empty().
+      delete I->first;
+    }
+  }
+
+private:
+  typename MapTy::iterator findExistingElement(ConstantClass *CP) {
+    return Map.find(CP);
+  }
+
+  ConstantClass *Create(TypeClass *Ty, Operands V, typename MapTy::iterator I) {
+    ConstantClass* Result =
+      ConstantArrayCreator<ConstantClass,TypeClass>::create(Ty, V);
+
+    assert(Result->getType() == Ty && "Type specified is not correct!");
+    Map[Result] = '\0';
+
+    return Result;
+  }
+public:
+
+  /// getOrCreate - Return the specified constant from the map, creating it if
+  /// necessary.
+  ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
+    LookupKey Lookup(Ty, V);
+    ConstantClass* Result = 0;
+
+    typename MapTy::iterator I = Map.find_as(Lookup);
+    // Is it in the map?
+    if (I != Map.end())
+      Result = I->first;
+
+    if (!Result) {
+      // If no preexisting value, create one now...
+      Result = Create(Ty, V, I);
+    }
+
+    return Result;
+  }
+
+  /// Find the constant by lookup key.
+  typename MapTy::iterator find(LookupKey Lookup) {
+    return Map.find_as(Lookup);
+  }
+
+  /// Insert the constant into its proper slot.
+  void insert(ConstantClass *CP) {
+    Map[CP] = '\0';
+  }
+
+  /// Remove this constant from the map
+  void remove(ConstantClass *CP) {
+    typename MapTy::iterator I = findExistingElement(CP);
+    assert(I != Map.end() && "Constant not found in constant table!");
+    assert(I->first == CP && "Didn't find correct element?");
+    Map.erase(I);
+  }
+
+  void dump() const {
+    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+  }
+};
+
+}
+
+#endif
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
new file mode 100644
index 000000000000..983b49c628b4
--- /dev/null
+++ b/lib/IR/Core.cpp
@@ -0,0 +1,2458 @@
+//===-- Core.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Threading.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeCore(PassRegistry &Registry) {
+  initializeDominatorTreePass(Registry);
+  initializePrintModulePassPass(Registry);
+  initializePrintFunctionPassPass(Registry);
+  initializePrintBasicBlockPassPass(Registry);
+  initializeVerifierPass(Registry);
+  initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+  initializeCore(*unwrap(R));
+}
+
+void LLVMShutdown() {
+  llvm_shutdown();
+}
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message) {
+  free(Message);
+}
+
+
+/*===-- Operations on contexts --------------------------------------------===*/
+
+LLVMContextRef LLVMContextCreate() {
+  return wrap(new LLVMContext());
+}
+
+LLVMContextRef LLVMGetGlobalContext() {
+  return wrap(&getGlobalContext());
+}
+
+void LLVMContextDispose(LLVMContextRef C) {
+  delete unwrap(C);
+}
+
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+                                  unsigned SLen) {
+  return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+  return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
+  return wrap(new Module(ModuleID, getGlobalContext()));
+}
+
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, 
+                                                LLVMContextRef C) {
+  return wrap(new Module(ModuleID, *unwrap(C)));
+}
+
+void LLVMDisposeModule(LLVMModuleRef M) {
+  delete unwrap(M);
+}
+
+/*--.. Data layout .........................................................--*/
+const char * LLVMGetDataLayout(LLVMModuleRef M) {
+  return unwrap(M)->getDataLayout().c_str();
+}
+
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
+  unwrap(M)->setDataLayout(Triple);
+}
+
+/*--.. Target triple .......................................................--*/
+const char * LLVMGetTarget(LLVMModuleRef M) {
+  return unwrap(M)->getTargetTriple().c_str();
+}
+
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
+  unwrap(M)->setTargetTriple(Triple);
+}
+
+void LLVMDumpModule(LLVMModuleRef M) {
+  unwrap(M)->dump();
+}
+
+LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
+                               char **ErrorMessage) {
+  std::string error;
+  raw_fd_ostream dest(Filename, error);
+  if (!error.empty()) {
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+
+  unwrap(M)->print(dest, NULL);
+
+  if (!error.empty()) {
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+  dest.flush();
+  return false;
+}
+
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+  unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
+
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+  return wrap(&unwrap(M)->getContext());
+}
+
+
+/*===-- Operations on types -----------------------------------------------===*/
+
+/*--.. Operations on all types (mostly) ....................................--*/
+
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
+  switch (unwrap(Ty)->getTypeID()) {
+  default: llvm_unreachable("Unhandled TypeID.");
+  case Type::VoidTyID:
+    return LLVMVoidTypeKind;
+  case Type::HalfTyID:
+    return LLVMHalfTypeKind;
+  case Type::FloatTyID:
+    return LLVMFloatTypeKind;
+  case Type::DoubleTyID:
+    return LLVMDoubleTypeKind;
+  case Type::X86_FP80TyID:
+    return LLVMX86_FP80TypeKind;
+  case Type::FP128TyID:
+    return LLVMFP128TypeKind;
+  case Type::PPC_FP128TyID:
+    return LLVMPPC_FP128TypeKind;
+  case Type::LabelTyID:
+    return LLVMLabelTypeKind;
+  case Type::MetadataTyID:
+    return LLVMMetadataTypeKind;
+  case Type::IntegerTyID:
+    return LLVMIntegerTypeKind;
+  case Type::FunctionTyID:
+    return LLVMFunctionTypeKind;
+  case Type::StructTyID:
+    return LLVMStructTypeKind;
+  case Type::ArrayTyID:
+    return LLVMArrayTypeKind;
+  case Type::PointerTyID:
+    return LLVMPointerTypeKind;
+  case Type::VectorTyID:
+    return LLVMVectorTypeKind;
+  case Type::X86_MMXTyID:
+    return LLVMX86_MMXTypeKind;
+  }
+}
+
+LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty)
+{
+    return unwrap(Ty)->isSized();
+}
+
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
+  return wrap(&unwrap(Ty)->getContext());
+}
+
+/*--.. Operations on integer types .........................................--*/
+
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
+  return wrap(IntegerType::get(*unwrap(C), NumBits));
+}
+
+LLVMTypeRef LLVMInt1Type(void)  {
+  return LLVMInt1TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt8Type(void)  {
+  return LLVMInt8TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt16Type(void) {
+  return LLVMInt16TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt32Type(void) {
+  return LLVMInt32TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt64Type(void) {
+  return LLVMInt64TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMIntType(unsigned NumBits) {
+  return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
+}
+
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
+  return unwrap<IntegerType>(IntegerTy)->getBitWidth();
+}
+
+/*--.. Operations on real types ............................................--*/
+
+LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getHalfTy(*unwrap(C));
+}
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
+}
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
+}
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
+
+LLVMTypeRef LLVMHalfType(void) {
+  return LLVMHalfTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFloatType(void) {
+  return LLVMFloatTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMDoubleType(void) {
+  return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86FP80Type(void) {
+  return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFP128Type(void) {
+  return LLVMFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMPPCFP128Type(void) {
+  return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86MMXType(void) {
+  return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
+
+/*--.. Operations on function types ........................................--*/
+
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+                             LLVMTypeRef *ParamTypes, unsigned ParamCount,
+                             LLVMBool IsVarArg) {
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
+  return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
+}
+
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+  return unwrap<FunctionType>(FunctionTy)->isVarArg();
+}
+
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
+  return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
+}
+
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
+  return unwrap<FunctionType>(FunctionTy)->getNumParams();
+}
+
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
+  FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
+  for (FunctionType::param_iterator I = Ty->param_begin(),
+                                    E = Ty->param_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
+/*--.. Operations on struct types ..........................................--*/
+
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, LLVMBool Packed) {
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+  return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
+}
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, LLVMBool Packed) {
+  return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+                                 ElementCount, Packed);
+}
+
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
+{
+  return wrap(StructType::create(*unwrap(C), Name));
+}
+
+const char *LLVMGetStructName(LLVMTypeRef Ty)
+{
+  StructType *Type = unwrap<StructType>(Ty);
+  if (!Type->hasName())
+    return 0;
+  return Type->getName().data();
+}
+
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+                       unsigned ElementCount, LLVMBool Packed) {
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+  unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
+}
+
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->getNumElements();
+}
+
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
+  StructType *Ty = unwrap<StructType>(StructTy);
+  for (StructType::element_iterator I = Ty->element_begin(),
+                                    E = Ty->element_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isPacked();
+}
+
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isOpaque();
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getTypeByName(Name));
+}
+
+/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
+
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
+  return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
+  return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
+}
+
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
+  return wrap(VectorType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
+  return wrap(unwrap<SequentialType>(Ty)->getElementType());
+}
+
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
+  return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
+  return unwrap<PointerType>(PointerTy)->getAddressSpace();
+}
+
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
+  return unwrap<VectorType>(VectorTy)->getNumElements();
+}
+
+/*--.. Operations on other types ...........................................--*/
+
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
+  return wrap(Type::getVoidTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
+  return wrap(Type::getLabelTy(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMVoidType(void)  {
+  return LLVMVoidTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMLabelType(void) {
+  return LLVMLabelTypeInContext(LLVMGetGlobalContext());
+}
+
+/*===-- Operations on values ----------------------------------------------===*/
+
+/*--.. Operations on all values ............................................--*/
+
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
+  return wrap(unwrap(Val)->getType());
+}
+
+const char *LLVMGetValueName(LLVMValueRef Val) {
+  return unwrap(Val)->getName().data();
+}
+
+void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
+  unwrap(Val)->setName(Name);
+}
+
+void LLVMDumpValue(LLVMValueRef Val) {
+  unwrap(Val)->dump();
+}
+
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
+  unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
+}
+
+int LLVMHasMetadata(LLVMValueRef Inst) {
+  return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+  return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+  unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
+/*--.. Conversion functions ................................................--*/
+
+#define LLVM_DEFINE_VALUE_CAST(name)                                       \
+  LLVMValueRef LLVMIsA##name(LLVMValueRef Val) {                           \
+    return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
+  }
+
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
+
+/*--.. Operations on Uses ..................................................--*/
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
+  Value *V = unwrap(Val);
+  Value::use_iterator I = V->use_begin();
+  if (I == V->use_end())
+    return 0;
+  return wrap(&(I.getUse()));
+}
+
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+  Use *Next = unwrap(U)->getNext();
+  if (Next)
+    return wrap(Next);
+  return 0;
+}
+
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+  return wrap(unwrap(U)->getUser());
+}
+
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+  return wrap(unwrap(U)->get());
+}
+
+/*--.. Operations on Users .................................................--*/
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
+  Value *V = unwrap(Val);
+  if (MDNode *MD = dyn_cast<MDNode>(V))
+      return wrap(MD->getOperand(Index));
+  return wrap(cast<User>(V)->getOperand(Index));
+}
+
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+  unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+  Value *V = unwrap(Val);
+  if (MDNode *MD = dyn_cast<MDNode>(V))
+      return MD->getNumOperands();
+  return cast<User>(V)->getNumOperands();
+}
+
+/*--.. Operations on constants of any type .................................--*/
+
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
+  return wrap(Constant::getNullValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
+  return wrap(Constant::getAllOnesValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
+  return wrap(UndefValue::get(unwrap(Ty)));
+}
+
+LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
+  return isa<Constant>(unwrap(Ty));
+}
+
+LLVMBool LLVMIsNull(LLVMValueRef Val) {
+  if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
+    return C->isNullValue();
+  return false;
+}
+
+LLVMBool LLVMIsUndef(LLVMValueRef Val) {
+  return isa<UndefValue>(unwrap(Val));
+}
+
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
+  return
+      wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
+}
+
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen) {
+  return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+  return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count) {
+  return wrap(MDNode::get(*unwrap(C),
+                          makeArrayRef(unwrap<Value>(Vals, Count), Count)));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+  return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
+const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) {
+  if (const MDString *S = dyn_cast<MDString>(unwrap(V))) {
+    *Len = S->getString().size();
+    return S->getString().data();
+  }
+  *Len = 0;
+  return 0;
+}
+
+unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V)
+{
+  return cast<MDNode>(unwrap(V))->getNumOperands();
+}
+
+void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest)
+{
+  const MDNode *N = cast<MDNode>(unwrap(V));
+  const unsigned numOperands = N->getNumOperands();
+  for (unsigned i = 0; i < numOperands; i++)
+    Dest[i] = wrap(N->getOperand(i));
+}
+
+unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name)
+{
+  if (NamedMDNode *N = unwrap(M)->getNamedMetadata(name)) {
+    return N->getNumOperands();
+  }
+  return 0;
+}
+
+void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest)
+{
+  NamedMDNode *N = unwrap(M)->getNamedMetadata(name);
+  if (!N)
+    return;
+  for (unsigned i=0;i<N->getNumOperands();i++)
+    Dest[i] = wrap(N->getOperand(i));
+}
+
+void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
+                                 LLVMValueRef Val)
+{
+  NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
+  if (!N)
+    return;
+  MDNode *Op = Val ? unwrap<MDNode>(Val) : NULL;
+  if (Op)
+    N->addOperand(Op);
+}
+
+/*--.. Operations on scalar constants ......................................--*/
+
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+                          LLVMBool SignExtend) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
+}
+
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+                                              unsigned NumWords,
+                                              const uint64_t Words[]) {
+    IntegerType *Ty = unwrap<IntegerType>(IntTy);
+    return wrap(ConstantInt::get(Ty->getContext(),
+                                 APInt(Ty->getBitWidth(),
+                                       makeArrayRef(Words, NumWords))));
+}
+
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
+                                  uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
+                                         unsigned SLen, uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
+  return wrap(ConstantFP::get(unwrap(RealTy), N));
+}
+
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
+}
+
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
+                                          unsigned SLen) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
+}
+
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
+}
+
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
+}
+
+/*--.. Operations on composite constants ...................................--*/
+
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+                                      unsigned Length,
+                                      LLVMBool DontNullTerminate) {
+  /* Inverted the sense of AddNull because ', 0)' is a
+     better mnemonic for null termination than ', 1)'. */
+  return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length),
+                                           DontNullTerminate == 0));
+}
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, LLVMBool Packed) {
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  return wrap(ConstantStruct::getAnon(*unwrap(C), makeArrayRef(Elements, Count),
+                                      Packed != 0));
+}
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+                             LLVMBool DontNullTerminate) {
+  return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
+                                  DontNullTerminate);
+}
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+                            LLVMValueRef *ConstantVals, unsigned Length) {
+  ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
+}
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+                             LLVMBool Packed) {
+  return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
+                                  Packed);
+}
+
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+                                  LLVMValueRef *ConstantVals,
+                                  unsigned Count) {
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  StructType *Ty = cast<StructType>(unwrap(StructTy));
+
+  return wrap(ConstantStruct::get(Ty, makeArrayRef(Elements, Count)));
+}
+
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
+  return wrap(ConstantVector::get(makeArrayRef(
+                            unwrap<Constant>(ScalarConstantVals, Size), Size)));
+}
+
+/*-- Opcode mapping */
+
+static LLVMOpcode map_to_llvmopcode(int opcode)
+{
+    switch (opcode) {
+      default: llvm_unreachable("Unhandled Opcode.");
+#define HANDLE_INST(num, opc, clas) case num: return LLVM##opc;
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_INST
+    }
+}
+
+static int map_from_llvmopcode(LLVMOpcode code)
+{
+    switch (code) {
+#define HANDLE_INST(num, opc, clas) case LLVM##opc: return num;
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_INST
+    }
+    llvm_unreachable("Unhandled Opcode.");
+}
+
+/*--.. Constant expressions ................................................--*/
+
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
+  return map_to_llvmopcode(unwrap<ConstantExpr>(ConstantVal)->getOpcode());
+}
+
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
+  return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
+  return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
+                                LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
+                                         unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+                                  unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getICmp(Predicate,
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFCmp(Predicate,
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+                          LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+  ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+                               NumIndices);
+  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+                                             IdxList));
+}
+
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+                                  LLVMValueRef *ConstantIndices,
+                                  unsigned NumIndices) {
+  Constant* Val = unwrap<Constant>(ConstantVal);
+  ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
+                               NumIndices);
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
+}
+
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+                                     unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+                                    unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+                                    unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+                                       unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+                                       unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+                                     LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
+                                              unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+                                  LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+                              LLVMBool isSigned) {
+  return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType), isSigned));
+}
+
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+                             LLVMValueRef ConstantIfTrue,
+                             LLVMValueRef ConstantIfFalse) {
+  return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+                                      unwrap<Constant>(ConstantIfTrue),
+                                      unwrap<Constant>(ConstantIfFalse)));
+}
+
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+                                     LLVMValueRef IndexConstant) {
+  return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+                                              unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+                                    LLVMValueRef ElementValueConstant,
+                                    LLVMValueRef IndexConstant) {
+  return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+                                         unwrap<Constant>(ElementValueConstant),
+                                             unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+                                    LLVMValueRef VectorBConstant,
+                                    LLVMValueRef MaskConstant) {
+  return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+                                             unwrap<Constant>(VectorBConstant),
+                                             unwrap<Constant>(MaskConstant)));
+}
+
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+                                   unsigned NumIdx) {
+  return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+                                            makeArrayRef(IdxList, NumIdx)));
+}
+
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+                                  LLVMValueRef ElementValueConstant,
+                                  unsigned *IdxList, unsigned NumIdx) {
+  return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+                                         unwrap<Constant>(ElementValueConstant),
+                                           makeArrayRef(IdxList, NumIdx)));
+}
+
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+                                const char *Constraints,
+                                LLVMBool HasSideEffects,
+                                LLVMBool IsAlignStack) {
+  return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
+                             Constraints, HasSideEffects, IsAlignStack));
+}
+
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+  return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
+/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
+
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
+  return wrap(unwrap<GlobalValue>(Global)->getParent());
+}
+
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->isDeclaration();
+}
+
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
+  switch (unwrap<GlobalValue>(Global)->getLinkage()) {
+  case GlobalValue::ExternalLinkage:
+    return LLVMExternalLinkage;
+  case GlobalValue::AvailableExternallyLinkage:
+    return LLVMAvailableExternallyLinkage;
+  case GlobalValue::LinkOnceAnyLinkage:
+    return LLVMLinkOnceAnyLinkage;
+  case GlobalValue::LinkOnceODRLinkage:
+    return LLVMLinkOnceODRLinkage;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
+    return LLVMLinkOnceODRAutoHideLinkage;
+  case GlobalValue::WeakAnyLinkage:
+    return LLVMWeakAnyLinkage;
+  case GlobalValue::WeakODRLinkage:
+    return LLVMWeakODRLinkage;
+  case GlobalValue::AppendingLinkage:
+    return LLVMAppendingLinkage;
+  case GlobalValue::InternalLinkage:
+    return LLVMInternalLinkage;
+  case GlobalValue::PrivateLinkage:
+    return LLVMPrivateLinkage;
+  case GlobalValue::LinkerPrivateLinkage:
+    return LLVMLinkerPrivateLinkage;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    return LLVMLinkerPrivateWeakLinkage;
+  case GlobalValue::DLLImportLinkage:
+    return LLVMDLLImportLinkage;
+  case GlobalValue::DLLExportLinkage:
+    return LLVMDLLExportLinkage;
+  case GlobalValue::ExternalWeakLinkage:
+    return LLVMExternalWeakLinkage;
+  case GlobalValue::CommonLinkage:
+    return LLVMCommonLinkage;
+  }
+
+  llvm_unreachable("Invalid GlobalValue linkage!");
+}
+
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
+  GlobalValue *GV = unwrap<GlobalValue>(Global);
+
+  switch (Linkage) {
+  case LLVMExternalLinkage:
+    GV->setLinkage(GlobalValue::ExternalLinkage);
+    break;
+  case LLVMAvailableExternallyLinkage:
+    GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+    break;
+  case LLVMLinkOnceAnyLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+    break;
+  case LLVMLinkOnceODRLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    break;
+  case LLVMLinkOnceODRAutoHideLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage);
+    break;
+  case LLVMWeakAnyLinkage:
+    GV->setLinkage(GlobalValue::WeakAnyLinkage);
+    break;
+  case LLVMWeakODRLinkage:
+    GV->setLinkage(GlobalValue::WeakODRLinkage);
+    break;
+  case LLVMAppendingLinkage:
+    GV->setLinkage(GlobalValue::AppendingLinkage);
+    break;
+  case LLVMInternalLinkage:
+    GV->setLinkage(GlobalValue::InternalLinkage);
+    break;
+  case LLVMPrivateLinkage:
+    GV->setLinkage(GlobalValue::PrivateLinkage);
+    break;
+  case LLVMLinkerPrivateLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
+    break;
+  case LLVMLinkerPrivateWeakLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+    break;
+  case LLVMDLLImportLinkage:
+    GV->setLinkage(GlobalValue::DLLImportLinkage);
+    break;
+  case LLVMDLLExportLinkage:
+    GV->setLinkage(GlobalValue::DLLExportLinkage);
+    break;
+  case LLVMExternalWeakLinkage:
+    GV->setLinkage(GlobalValue::ExternalWeakLinkage);
+    break;
+  case LLVMGhostLinkage:
+    DEBUG(errs()
+          << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
+    break;
+  case LLVMCommonLinkage:
+    GV->setLinkage(GlobalValue::CommonLinkage);
+    break;
+  }
+}
+
+const char *LLVMGetSection(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->getSection().c_str();
+}
+
+void LLVMSetSection(LLVMValueRef Global, const char *Section) {
+  unwrap<GlobalValue>(Global)->setSection(Section);
+}
+
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
+  return static_cast<LLVMVisibility>(
+    unwrap<GlobalValue>(Global)->getVisibility());
+}
+
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
+  unwrap<GlobalValue>(Global)
+    ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
+}
+
+unsigned LLVMGetAlignment(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->getAlignment();
+}
+
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
+  unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+}
+
+/*--.. Operations on global variables ......................................--*/
+
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name));
+}
+
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+                                         const char *Name,
+                                         unsigned AddressSpace) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name, 0,
+                                 GlobalVariable::NotThreadLocal, AddressSpace));
+}
+
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getNamedGlobal(Name));
+}
+
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::global_iterator I = Mod->global_begin();
+  if (I == Mod->global_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::global_iterator I = Mod->global_end();
+  if (I == Mod->global_begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+  Module::global_iterator I = GV;
+  if (++I == GV->getParent()->global_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+  Module::global_iterator I = GV;
+  if (I == GV->getParent()->global_begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
+  unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
+}
+
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
+  GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
+  if ( !GV->hasInitializer() )
+    return 0;
+  return wrap(GV->getInitializer());
+}
+
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
+  unwrap<GlobalVariable>(GlobalVar)
+    ->setInitializer(unwrap<Constant>(ConstantVal));
+}
+
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
+}
+
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
+  unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
+}
+
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isConstant();
+}
+
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
+  unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
+}
+
+/*--.. Operations on aliases ......................................--*/
+
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+                          const char *Name) {
+  return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
+                              unwrap<Constant>(Aliasee), unwrap (M)));
+}
+
+/*--.. Operations on functions .............................................--*/
+
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+                             LLVMTypeRef FunctionTy) {
+  return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
+                               GlobalValue::ExternalLinkage, Name, unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getFunction(Name));
+}
+
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::iterator I = Mod->begin();
+  if (I == Mod->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::iterator I = Mod->end();
+  if (I == Mod->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Module::iterator I = Func;
+  if (++I == Func->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Module::iterator I = Func;
+  if (I == Func->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMDeleteFunction(LLVMValueRef Fn) {
+  unwrap<Function>(Fn)->eraseFromParent();
+}
+
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
+  if (Function *F = dyn_cast<Function>(unwrap(Fn)))
+    return F->getIntrinsicID();
+  return 0;
+}
+
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
+  return unwrap<Function>(Fn)->getCallingConv();
+}
+
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
+  return unwrap<Function>(Fn)->setCallingConv(
+    static_cast<CallingConv::ID>(CC));
+}
+
+const char *LLVMGetGC(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  return F->hasGC()? F->getGC() : 0;
+}
+
+void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
+  Function *F = unwrap<Function>(Fn);
+  if (GC)
+    F->setGC(GC);
+  else
+    F->clearGC();
+}
+
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttributeSet PAL = Func->getAttributes();
+  AttrBuilder B(PA);
+  const AttributeSet PALnew =
+    PAL.addAttributes(Func->getContext(), AttributeSet::FunctionIndex,
+                      AttributeSet::get(Func->getContext(),
+                                        AttributeSet::FunctionIndex, B));
+  Func->setAttributes(PALnew);
+}
+
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttributeSet PAL = Func->getAttributes();
+  AttrBuilder B(PA);
+  const AttributeSet PALnew =
+    PAL.removeAttributes(Func->getContext(), AttributeSet::FunctionIndex,
+                         AttributeSet::get(Func->getContext(),
+                                           AttributeSet::FunctionIndex, B));
+  Func->setAttributes(PALnew);
+}
+
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttributeSet PAL = Func->getAttributes();
+  return (LLVMAttribute)PAL.Raw(AttributeSet::FunctionIndex);
+}
+
+/*--.. Operations on parameters ............................................--*/
+
+unsigned LLVMCountParams(LLVMValueRef FnRef) {
+  // This function is strictly redundant to
+  //   LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+  return unwrap<Function>(FnRef)->arg_size();
+}
+
+void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
+  Function *Fn = unwrap<Function>(FnRef);
+  for (Function::arg_iterator I = Fn->arg_begin(),
+                              E = Fn->arg_end(); I != E; I++)
+    *ParamRefs++ = wrap(I);
+}
+
+LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
+  Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
+  while (index --> 0)
+    AI++;
+  return wrap(AI);
+}
+
+LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
+  return wrap(unwrap<Argument>(V)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::arg_iterator I = Func->arg_begin();
+  if (I == Func->arg_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::arg_iterator I = Func->arg_end();
+  if (I == Func->arg_begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Function::arg_iterator I = A;
+  if (++I == A->getParent()->arg_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Function::arg_iterator I = A;
+  if (I == A->getParent()->arg_begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+  Argument *A = unwrap<Argument>(Arg);
+  AttrBuilder B(PA);
+  A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
+}
+
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+  Argument *A = unwrap<Argument>(Arg);
+  AttrBuilder B(PA);
+  A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
+}
+
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  return (LLVMAttribute)A->getParent()->getAttributes().
+    Raw(A->getArgNo()+1);
+}
+  
+
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
+  Argument *A = unwrap<Argument>(Arg);
+  AttrBuilder B;
+  B.addAlignmentAttr(align);
+  A->addAttr(AttributeSet::get(A->getContext(),A->getArgNo() + 1, B));
+}
+
+/*--.. Operations on basic blocks ..........................................--*/
+
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
+  return wrap(static_cast<Value*>(unwrap(BB)));
+}
+
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
+  return isa<BasicBlock>(unwrap(Val));
+}
+
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
+  return wrap(unwrap<BasicBlock>(Val));
+}
+
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
+  return wrap(unwrap(BB)->getParent());
+}
+
+LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB) {
+  return wrap(unwrap(BB)->getTerminator());
+}
+
+unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
+  return unwrap<Function>(FnRef)->size();
+}
+
+void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
+  Function *Fn = unwrap<Function>(FnRef);
+  for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
+    *BasicBlocksRefs++ = wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
+  return wrap(&unwrap<Function>(Fn)->getEntryBlock());
+}
+
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::iterator I = Func->begin();
+  if (I == Func->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::iterator I = Func->end();
+  if (I == Func->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  Function::iterator I = Block;
+  if (++I == Block->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  Function::iterator I = Block;
+  if (I == Block->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+                                                LLVMValueRef FnRef,
+                                                const char *Name) {
+  return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
+  return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+                                                LLVMBasicBlockRef BBRef,
+                                                const char *Name) {
+  BasicBlock *BB = unwrap(BBRef);
+  return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
+                                       const char *Name) {
+  return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
+}
+
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
+  unwrap(BBRef)->eraseFromParent();
+}
+
+void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BBRef) {
+  unwrap(BBRef)->removeFromParent();
+}
+
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+  unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+  unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
+/*--.. Operations on instructions ..........................................--*/
+
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
+  return wrap(unwrap<Instruction>(Inst)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  BasicBlock::iterator I = Block->begin();
+  if (I == Block->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  BasicBlock::iterator I = Block->end();
+  if (I == Block->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
+  Instruction *Instr = unwrap<Instruction>(Inst);
+  BasicBlock::iterator I = Instr;
+  if (++I == Instr->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
+  Instruction *Instr = unwrap<Instruction>(Inst);
+  BasicBlock::iterator I = Instr;
+  if (I == Instr->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
+  unwrap<Instruction>(Inst)->eraseFromParent();
+}
+
+LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) {
+  if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst)))
+    return (LLVMIntPredicate)I->getPredicate();
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(unwrap(Inst)))
+    if (CE->getOpcode() == Instruction::ICmp)
+      return (LLVMIntPredicate)CE->getPredicate();
+  return (LLVMIntPredicate)0;
+}
+
+LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) {
+  if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst)))
+    return map_to_llvmopcode(C->getOpcode());
+  return (LLVMOpcode)0;
+}
+
+/*--.. Call and invoke instructions ........................................--*/
+
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
+  Value *V = unwrap(Instr);
+  if (CallInst *CI = dyn_cast<CallInst>(V))
+    return CI->getCallingConv();
+  if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+    return II->getCallingConv();
+  llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
+  Value *V = unwrap(Instr);
+  if (CallInst *CI = dyn_cast<CallInst>(V))
+    return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
+  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+    return II->setCallingConv(static_cast<CallingConv::ID>(CC));
+  llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                           LLVMAttribute PA) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  AttrBuilder B(PA);
+  Call.setAttributes(
+    Call.getAttributes().addAttributes(Call->getContext(), index,
+                                       AttributeSet::get(Call->getContext(),
+                                                         index, B)));
+}
+
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                              LLVMAttribute PA) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  AttrBuilder B(PA);
+  Call.setAttributes(Call.getAttributes()
+                       .removeAttributes(Call->getContext(), index,
+                                         AttributeSet::get(Call->getContext(),
+                                                           index, B)));
+}
+
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
+                                unsigned align) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  AttrBuilder B;
+  B.addAlignmentAttr(align);
+  Call.setAttributes(Call.getAttributes()
+                       .addAttributes(Call->getContext(), index,
+                                      AttributeSet::get(Call->getContext(),
+                                                        index, B)));
+}
+
+/*--.. Operations on call instructions (only) ..............................--*/
+
+LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
+  return unwrap<CallInst>(Call)->isTailCall();
+}
+
+void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
+  unwrap<CallInst>(Call)->setTailCall(isTailCall);
+}
+
+/*--.. Operations on switch instructions (only) ............................--*/
+
+LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) {
+  return wrap(unwrap<SwitchInst>(Switch)->getDefaultDest());
+}
+
+/*--.. Operations on phi nodes .............................................--*/
+
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+                     LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
+  PHINode *PhiVal = unwrap<PHINode>(PhiNode);
+  for (unsigned I = 0; I != Count; ++I)
+    PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
+}
+
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
+  return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
+}
+
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
+  return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
+}
+
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
+  return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
+  return wrap(new IRBuilder<>(*unwrap(C)));
+}
+
+LLVMBuilderRef LLVMCreateBuilder(void) {
+  return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
+}
+
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+                         LLVMValueRef Instr) {
+  BasicBlock *BB = unwrap(Block);
+  Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
+  unwrap(Builder)->SetInsertPoint(BB, I);
+}
+
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+  Instruction *I = unwrap<Instruction>(Instr);
+  unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+}
+
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
+  BasicBlock *BB = unwrap(Block);
+  unwrap(Builder)->SetInsertPoint(BB);
+}
+
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
+   return wrap(unwrap(Builder)->GetInsertBlock());
+}
+
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
+  unwrap(Builder)->ClearInsertionPoint();
+}
+
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
+}
+
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+                                   const char *Name) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
+}
+
+void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
+  delete unwrap(Builder);
+}
+
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+  MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+  return wrap(unwrap(Builder)->getCurrentDebugLocation()
+              .getAsMDNode(unwrap(Builder)->getContext()));
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+  unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
+/*--.. Instruction builders ................................................--*/
+
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateRetVoid());
+}
+
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
+  return wrap(unwrap(B)->CreateRet(unwrap(V)));
+}
+
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
+                                   unsigned N) {
+  return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
+}
+
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
+  return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
+}
+
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
+  return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
+}
+
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
+                             LLVMBasicBlockRef Else, unsigned NumCases) {
+  return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
+}
+
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+                                 unsigned NumDests) {
+  return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
+                             LLVMValueRef *Args, unsigned NumArgs,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
+                                      makeArrayRef(unwrap(Args), NumArgs),
+                                      Name));
+}
+
+LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                 LLVMValueRef PersFn, unsigned NumClauses,
+                                 const char *Name) {
+  return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty),
+                                          cast<Function>(unwrap(PersFn)),
+                                          NumClauses, Name));
+}
+
+LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) {
+  return wrap(unwrap(B)->CreateResume(unwrap(Exn)));
+}
+
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateUnreachable());
+}
+
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+                 LLVMBasicBlockRef Dest) {
+  unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
+}
+
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+  unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
+void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal) {
+  unwrap<LandingPadInst>(LandingPad)->
+    addClause(cast<Constant>(unwrap(ClauseVal)));
+}
+
+void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val) {
+  unwrap<LandingPadInst>(LandingPad)->setCleanup(Val);
+}
+
+/*--.. Arithmetic ..........................................................--*/
+
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+                                LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                         const char *Name) {
+  return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+                            LLVMValueRef LHS, LLVMValueRef RHS,
+                            const char *Name) {
+  return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(map_from_llvmopcode(Op)), unwrap(LHS),
+                                     unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
+}
+
+/*--.. Memory ..............................................................--*/
+
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+                             const char *Name) {
+  Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               0, 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name) {
+  Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               unwrap(Val), 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name) {
+  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
+  return wrap(unwrap(B)->Insert(
+     CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
+}
+
+
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+}
+
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, 
+                            LLVMValueRef PointerVal) {
+  return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
+}
+
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                          LLVMValueRef *Indices, unsigned NumIndices,
+                          const char *Name) {
+  ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+  return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                  LLVMValueRef *Indices, unsigned NumIndices,
+                                  const char *Name) {
+  ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
+  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                unsigned Idx, const char *Name) {
+  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+                                   const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalString(Str, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+                                      const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
+}
+
+LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
+  Value *P = unwrap<Value>(MemAccessInst);
+  if (LoadInst *LI = dyn_cast<LoadInst>(P))
+    return LI->isVolatile();
+  return cast<StoreInst>(P)->isVolatile();
+}
+
+void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
+  Value *P = unwrap<Value>(MemAccessInst);
+  if (LoadInst *LI = dyn_cast<LoadInst>(P))
+    return LI->setVolatile(isVolatile);
+  return cast<StoreInst>(P)->setVolatile(isVolatile);
+}
+
+/*--.. Casts ...............................................................--*/
+
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                     LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateCast(Instruction::CastOps(map_from_llvmopcode(Op)), unwrap(Val),
+                                    unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                  LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
+                                       /*isSigned*/true, Name));
+}
+
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+/*--.. Comparisons .........................................................--*/
+
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
+                                    unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
+                                    unwrap(LHS), unwrap(RHS), Name));
+}
+
+/*--.. Miscellaneous instructions ..........................................--*/
+
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
+  return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
+                           LLVMValueRef *Args, unsigned NumArgs,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateCall(unwrap(Fn),
+                                    makeArrayRef(unwrap(Args), NumArgs),
+                                    Name));
+}
+
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
+                             LLVMValueRef Then, LLVMValueRef Else,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
+                                      Name));
+}
+
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
+                            LLVMTypeRef Ty, const char *Name) {
+  return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+                                      LLVMValueRef Index, const char *Name) {
+  return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+                                    LLVMValueRef EltVal, LLVMValueRef Index,
+                                    const char *Name) {
+  return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
+                                             unwrap(Index), Name));
+}
+
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
+                                    LLVMValueRef V2, LLVMValueRef Mask,
+                                    const char *Name) {
+  return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
+                                             unwrap(Mask), Name));
+}
+
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+                                   unsigned Index, const char *Name) {
+  return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
+}
+
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+                                  LLVMValueRef EltVal, unsigned Index,
+                                  const char *Name) {
+  return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
+                                           Index, Name));
+}
+
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
+                                const char *Name) {
+  return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
+                              LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+}
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
+  return reinterpret_cast<LLVMModuleProviderRef>(M);
+}
+
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
+  delete unwrap(MP);
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
+    const char *Path,
+    LLVMMemoryBufferRef *OutMemBuf,
+    char **OutMessage) {
+
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+    *OutMemBuf = wrap(MB.take());
+    return 0;
+  }
+
+  *OutMessage = strdup(ec.message().c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+                                         char **OutMessage) {
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+    *OutMemBuf = wrap(MB.take());
+    return 0;
+  }
+
+  *OutMessage = strdup(ec.message().c_str());
+  return 1;
+}
+
+LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(
+    const char *InputData,
+    size_t InputDataLength,
+    const char *BufferName,
+    LLVMBool RequiresNullTerminator) {
+
+  return wrap(MemoryBuffer::getMemBuffer(
+      StringRef(InputData, InputDataLength),
+      StringRef(BufferName),
+      RequiresNullTerminator));
+}
+
+LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
+    const char *InputData,
+    size_t InputDataLength,
+    const char *BufferName) {
+
+  return wrap(MemoryBuffer::getMemBufferCopy(
+      StringRef(InputData, InputDataLength),
+      StringRef(BufferName)));
+}
+
+
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
+  delete unwrap(MemBuf);
+}
+
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+  return wrap(PassRegistry::getPassRegistry());
+}
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+  return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+  return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+  return LLVMCreateFunctionPassManagerForModule(
+                                            reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+  return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+  return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+  return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+  return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+  delete unwrap(PM);
+}
+
+/*===-- Threading ------------------------------------------------------===*/
+
+LLVMBool LLVMStartMultithreaded() {
+  return llvm_start_multithreaded();
+}
+
+void LLVMStopMultithreaded() {
+  llvm_stop_multithreaded();
+}
+
+LLVMBool LLVMIsMultithreaded() {
+  return llvm_is_multithreaded();
+}
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
new file mode 100644
index 000000000000..9d6e84072912
--- /dev/null
+++ b/lib/IR/DIBuilder.cpp
@@ -0,0 +1,1101 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DIBuilder.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+  assert((Tag & LLVMDebugVersionMask) == 0 &&
+         "Tag too large for debug encoding!");
+  return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+  : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0),
+    TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0),
+    ValueFn(0)
+{}
+
+/// finalize - Construct any deferred debug info descriptors.
+void DIBuilder::finalize() {
+  DIArray Enums = getOrCreateArray(AllEnumTypes);
+  DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+
+  DIArray RetainTypes = getOrCreateArray(AllRetainTypes);
+  DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
+
+  DIArray SPs = getOrCreateArray(AllSubprograms);
+  DIType(TempSubprograms).replaceAllUsesWith(SPs);
+  for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+    DISubprogram SP(SPs.getElement(i));
+    SmallVector<Value *, 4> Variables;
+    if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) {
+      for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii)
+        Variables.push_back(NMD->getOperand(ii));
+      NMD->eraseFromParent();
+    }
+    if (MDNode *Temp = SP.getVariablesNodes()) {
+      DIArray AV = getOrCreateArray(Variables);
+      DIType(Temp).replaceAllUsesWith(AV);
+    }
+  }
+
+  DIArray GVs = getOrCreateArray(AllGVs);
+  DIType(TempGVs).replaceAllUsesWith(GVs);
+}
+
+/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
+/// N.
+static MDNode *getNonCompileUnitScope(MDNode *N) {
+  if (DIDescriptor(N).isCompileUnit())
+    return NULL;
+  return N;
+}
+
+static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
+                                  StringRef Directory) {
+  assert(!Filename.empty() && "Unable to create file without name");
+  Value *Pair[] = {
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+  };
+  return MDNode::get(VMContext, Pair);
+}
+
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
+                                  StringRef Directory, StringRef Producer,
+                                  bool isOptimized, StringRef Flags,
+                                  unsigned RunTimeVer, StringRef SplitName) {
+  assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) ||
+          (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
+         "Invalid Language tag");
+  assert(!Filename.empty() &&
+         "Unable to create compile unit without filename");
+  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
+
+  TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
+
+  TempSubprograms = MDNode::getTemporary(VMContext, TElts);
+
+  TempGVs = MDNode::getTemporary(VMContext, TElts);
+
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+    createFilePathPair(VMContext, Filename, Directory),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+    MDString::get(VMContext, Producer),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    MDString::get(VMContext, Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
+    TempEnumTypes,
+    TempRetainTypes,
+    TempSubprograms,
+    TempGVs,
+    MDString::get(VMContext, SplitName)
+  };
+  TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+
+  // Create a named metadata so that it is easier to find cu in a module.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+  NMD->addOperand(TheCU);
+}
+
+/// createFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+    createFilePathPair(VMContext, Filename, Directory)
+  };
+  return DIFile(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+  assert(!Name.empty() && "Unable to create enumerator without name");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+  };
+  return DIEnumerator(MDNode::get(VMContext, Elts));
+}
+
+/// createNullPtrType - Create C++0x nullptr type.
+DIType DIBuilder::createNullPtrType(StringRef Name) {
+  assert(!Name.empty() && "Unable to create type without name");
+  // nullptr is encoded in DIBasicType format. Line number, filename,
+  // ,size, alignment, offset and flags are always empty here.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
+    NULL, // Filename
+    NULL, //TheCU,
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0)  // Encoding
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createBasicType - Create debugging information entry for a basic
+/// type, e.g 'char'.
+DIBasicType
+DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+                           uint64_t AlignInBits, unsigned Encoding) {
+  assert(!Name.empty() && "Unable to create type without name");
+  // Basic types are encoded in DIBasicType format. Line number, filename,
+  // offset and flags are always empty here.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+    NULL, // File/directory name
+    NULL, //TheCU,
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+  };
+  return DIBasicType(MDNode::get(VMContext, Elts));
+}
+
+/// createQualifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+  // Qualified types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    NULL, // Filename
+    NULL, //TheCU,
+    MDString::get(VMContext, StringRef()), // Empty name.
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FromTy
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createPointerType - Create debugging information entry for a pointer.
+DIDerivedType
+DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+                             uint64_t AlignInBits, StringRef Name) {
+  // Pointer types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+    NULL, // Filename
+    NULL, //TheCU,
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    PointeeTy
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) {
+  // Pointer types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
+    NULL, // Filename
+    NULL, //TheCU,
+    NULL,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    PointeeTy,
+    Base
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createReferenceType - Create debugging information entry for a reference
+/// type.
+DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
+  assert(RTy.Verify() && "Unable to create reference type");
+  // References are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    NULL, // Filename
+    NULL, // TheCU,
+    NULL, // Name
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    RTy
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createTypedef - Create debugging information entry for a typedef.
+DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
+                                       unsigned LineNo, DIDescriptor Context) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid typedef type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+    File.getFileNode(),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    Ty
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid type!");
+  assert(FriendTy.Verify() && "Invalid friend type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+    NULL,
+    Ty,
+    NULL, // Name
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FriendTy
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createInheritance - Create debugging information entry to establish
+/// inheritance relationship between two types.
+DIDerivedType DIBuilder::createInheritance(
+    DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) {
+  assert(Ty.Verify() && "Unable to create inheritance");
+  // TAG_inheritance is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+    NULL,
+    Ty,
+    NULL, // Name
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    BaseTy
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createMemberType - Create debugging information entry for a member.
+DIDerivedType DIBuilder::createMemberType(
+    DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
+    unsigned Flags, DIType Ty) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty
+  };
+  return DIDerivedType(MDNode::get(VMContext, Elts));
+}
+
+/// createStaticMemberType - Create debugging information entry for a
+/// C++ static data member.
+DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name,
+                                         DIFile File, unsigned LineNumber,
+                                         DIType Ty, unsigned Flags,
+                                         llvm::Value *Val) {
+  // TAG_member is encoded in DIDerivedType format.
+  Flags |= DIDescriptor::FlagStaticMember;
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0/*OffsetInBits*/),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    Val
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+                                 DIFile File, unsigned LineNumber,
+                                 uint64_t SizeInBits, uint64_t AlignInBits,
+                                 uint64_t OffsetInBits, unsigned Flags,
+                                 DIType Ty, StringRef PropertyName,
+                                 StringRef GetterName, StringRef SetterName,
+                                 unsigned PropertyAttributes) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
+    getNonCompileUnitScope(File),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    MDString::get(VMContext, PropertyName),
+    MDString::get(VMContext, GetterName),
+    MDString::get(VMContext, SetterName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+                                 DIFile File, unsigned LineNumber,
+                                 uint64_t SizeInBits, uint64_t AlignInBits,
+                                 uint64_t OffsetInBits, unsigned Flags,
+                                 DIType Ty, MDNode *PropertyNode) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File.getFileNode(),
+    getNonCompileUnitScope(File),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    PropertyNode
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCProperty - Create debugging information entry for Objective-C
+/// property.
+DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
+                                             DIFile File, unsigned LineNumber,
+                                             StringRef GetterName,
+                                             StringRef SetterName, 
+                                             unsigned PropertyAttributes,
+                                             DIType Ty) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    MDString::get(VMContext, GetterName),
+    MDString::get(VMContext, SetterName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes),
+    Ty
+  };
+  return DIObjCProperty(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+                                       DIType Ty, MDNode *File, unsigned LineNo,
+                                       unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    Ty,
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
+                                        DIType Ty, uint64_t Val,
+                                        MDNode *File, unsigned LineNo,
+                                        unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    Ty,
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateValueParameter(MDNode::get(VMContext, Elts));
+}
+
+/// createClassType - Create debugging information entry for a class.
+DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
+                                           DIFile File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits,
+                                           uint64_t OffsetInBits,
+                                           unsigned Flags, DIType DerivedFrom,
+                                           DIArray Elements,
+                                           MDNode *VTableHolder,
+                                           MDNode *TemplateParams) {
+  assert((!Context || Context.Verify()) &&
+         "createClassType should be called with a valid Context");
+  // TAG_class_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+    File.getFileNode(),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    VTableHolder,
+    TemplateParams
+  };
+  DICompositeType R(MDNode::get(VMContext, Elts));
+  assert(R.Verify() && "createClassType should return a verifiable DIType");
+  return R;
+}
+
+/// createStructType - Create debugging information entry for a struct.
+DICompositeType DIBuilder::createStructType(DIDescriptor Context,
+                                            StringRef Name, DIFile File,
+                                            unsigned LineNumber,
+                                            uint64_t SizeInBits,
+                                            uint64_t AlignInBits,
+                                            unsigned Flags, DIType DerivedFrom,
+                                            DIArray Elements,
+                                            unsigned RunTimeLang,
+                                            MDNode *VTableHolder) {
+ // TAG_structure_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+    File.getFileNode(),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    VTableHolder,
+    NULL,
+  };
+  DICompositeType R(MDNode::get(VMContext, Elts));
+  assert(R.Verify() && "createStructType should return a verifiable DIType");
+  return R;
+}
+
+/// createUnionType - Create debugging information entry for an union.
+DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
+                                           DIFile File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits, unsigned Flags,
+                                           DIArray Elements,
+                                           unsigned RunTimeLang) {
+  // TAG_union_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+    File.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    NULL,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL
+  };
+  return DICompositeType(MDNode::get(VMContext, Elts));
+}
+
+/// createSubroutineType - Create subroutine type.
+DICompositeType
+DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+  // TAG_subroutine_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    MDString::get(VMContext, ""),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    NULL,
+    ParameterTypes,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Constant::getNullValue(Type::getInt32Ty(VMContext))
+  };
+  return DICompositeType(MDNode::get(VMContext, Elts));
+}
+
+/// createEnumerationType - Create debugging information entry for an
+/// enumeration.
+DICompositeType DIBuilder::createEnumerationType(
+    DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
+    DIType ClassType) {
+  // TAG_enumeration_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+    File.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ClassType,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Constant::getNullValue(Type::getInt32Ty(VMContext))
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  AllEnumTypes.push_back(Node);
+  return DICompositeType(Node);
+}
+
+/// createArrayType - Create debugging information entry for an array.
+DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+                                           DIType Ty, DIArray Subscripts) {
+  // TAG_array_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    NULL, // Filename/Directory,
+    NULL, //TheCU,
+    MDString::get(VMContext, ""),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Constant::getNullValue(Type::getInt32Ty(VMContext))
+  };
+  return DICompositeType(MDNode::get(VMContext, Elts));
+}
+
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
+                                   DIType Ty, DIArray Subscripts) {
+
+  // A vector is an array type with the FlagVector flag applied.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    NULL, // Filename/Directory,
+    NULL, //TheCU,
+    MDString::get(VMContext, ""),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), DIType::FlagVector),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Constant::getNullValue(Type::getInt32Ty(VMContext))
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
+  if (Ty.isArtificial())
+    return Ty;
+
+  SmallVector<Value *, 9> Elts;
+  MDNode *N = Ty;
+  assert (N && "Unexpected input DIType!");
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i))
+      Elts.push_back(V);
+    else
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  }
+
+  unsigned CurFlags = Ty.getFlags();
+  CurFlags = CurFlags | DIType::FlagArtificial;
+
+  // Flags are stored at this slot.
+  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjectPointerType - Create a new type with both the object pointer
+/// and artificial flags set.
+DIType DIBuilder::createObjectPointerType(DIType Ty) {
+  if (Ty.isObjectPointer())
+    return Ty;
+
+  SmallVector<Value *, 9> Elts;
+  MDNode *N = Ty;
+  assert (N && "Unexpected input DIType!");
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i))
+      Elts.push_back(V);
+    else
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  }
+
+  unsigned CurFlags = Ty.getFlags();
+  CurFlags = CurFlags | (DIType::FlagObjectPointer | DIType::FlagArtificial);
+
+  // Flags are stored at this slot.
+  Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// retainType - Retain DIType in a module even if it is not referenced
+/// through debug info anchors.
+void DIBuilder::retainType(DIType T) {
+  AllRetainTypes.push_back(T);
+}
+
+/// createUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
+  };
+  return DIDescriptor(MDNode::get(VMContext, Elts));
+}
+
+/// createForwardDecl - Create a temporary forward-declared type that
+/// can be RAUW'd if the full type is seen.
+DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
+                                    DIDescriptor Scope, DIFile F,
+                                    unsigned Line, unsigned RuntimeLang,
+                                    uint64_t SizeInBits,
+                                    uint64_t AlignInBits) {
+  // Create a temporary MDNode.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    F.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext),
+                     DIDescriptor::FlagFwdDecl),
+    NULL,
+    DIArray(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+  };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+  assert(DIType(Node).Verify() &&
+         "createForwardDecl result should be verifiable");
+  return DIType(Node);
+}
+
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+  if (Elements.empty()) {
+    Value *Null = Constant::getNullValue(Type::getInt32Ty(VMContext));
+    return DIArray(MDNode::get(VMContext, Null));
+  }
+  return DIArray(MDNode::get(VMContext, Elements));
+}
+
+/// getOrCreateSubrange - Create a descriptor for a value range.  This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Count)
+  };
+
+  return DISubrange(MDNode::get(VMContext, Elts));
+}
+
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F,
+                     unsigned LineNumber, DIType Ty, bool isLocalToUnit,
+                     Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL, // TheCU,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val,
+    DIDescriptor()
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  AllGVs.push_back(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// \brief Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+                     DIType Ty, bool isLocalToUnit, Value *Val) {
+  return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit,
+                              Val);
+}
+
+/// createStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+createStaticVariable(DIDescriptor Context, StringRef Name,
+                     StringRef LinkageName, DIFile F, unsigned LineNumber,
+                     DIType Ty, bool isLocalToUnit, Value *Val, MDNode *Decl) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val,
+    DIDescriptor(Decl)
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  AllGVs.push_back(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
+                                          StringRef Name, DIFile File,
+                                          unsigned LineNo, DIType Ty,
+                                          bool AlwaysPreserve, unsigned Flags,
+                                          unsigned ArgNo) {
+  DIDescriptor Context(getNonCompileUnitScope(Scope));
+  assert((!Context || Context.Verify()) &&
+         "createLocalVariable should be called with a valid Context");
+  assert(Ty.Verify() &&
+         "createLocalVariable should be called with a valid type");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Constant::getNullValue(Type::getInt32Ty(VMContext))
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  if (AlwaysPreserve) {
+    // The optimizer may remove local variable. If there is an interest
+    // to preserve variable info in such situation then stash it in a
+    // named mdnode.
+    DISubprogram Fn(getDISubprogram(Scope));
+    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
+    FnLocals->addOperand(Node);
+  }
+  assert(DIVariable(Node).Verify() &&
+         "createLocalVariable should return a verifiable DIVariable");
+  return DIVariable(Node);
+}
+
+/// createComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
+                                            StringRef Name, DIFile F,
+                                            unsigned LineNo,
+                                            DIType Ty, ArrayRef<Value *> Addr,
+                                            unsigned ArgNo) {
+  SmallVector<Value *, 15> Elts;
+  Elts.push_back(GetTagConstant(VMContext, Tag));
+  Elts.push_back(getNonCompileUnitScope(Scope)),
+  Elts.push_back(MDString::get(VMContext, Name));
+  Elts.push_back(F);
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
+                                  (LineNo | (ArgNo << 24))));
+  Elts.push_back(Ty);
+  Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  Elts.append(Addr.begin(), Addr.end());
+
+  return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
+                                       StringRef Name,
+                                       StringRef LinkageName,
+                                       DIFile File, unsigned LineNo,
+                                       DIType Ty,
+                                       bool isLocalToUnit, bool isDefinition,
+                                       unsigned ScopeLine,
+                                       unsigned Flags, bool isOptimized,
+                                       Function *Fn,
+                                       MDNode *TParams,
+                                       MDNode *Decl) {
+  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    File.getFileNode(),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    NULL,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn,
+    TParams,
+    Decl,
+    MDNode::getTemporary(VMContext, TElts),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine)
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+
+  // Create a named metadata so that we do not lose this mdnode.
+  if (isDefinition)
+    AllSubprograms.push_back(Node);
+  DISubprogram S(Node);
+  assert(S.Verify() && "createFunction should return a valid DISubprogram");
+  return S;
+}
+
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
+                                     StringRef Name,
+                                     StringRef LinkageName,
+                                     DIFile F,
+                                     unsigned LineNo, DIType Ty,
+                                     bool isLocalToUnit,
+                                     bool isDefinition,
+                                     unsigned VK, unsigned VIndex,
+                                     MDNode *VTableHolder,
+                                     unsigned Flags,
+                                     bool isOptimized,
+                                     Function *Fn,
+                                     MDNode *TParam) {
+  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    F.getFileNode(),
+    getNonCompileUnitScope(Context),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+    VTableHolder,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn,
+    TParam,
+    Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    MDNode::getTemporary(VMContext, TElts),
+    // FIXME: Do we want to use different scope/lines?
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+  };
+  MDNode *Node = MDNode::get(VMContext, Elts);
+  if (isDefinition)
+    AllSubprograms.push_back(Node);
+  DISubprogram S(Node);
+  assert(S.Verify() && "createMethod should return a valid DISubprogram");
+  return S;
+}
+
+/// createNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
+                                       DIFile File, unsigned LineNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+    File.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+  };
+  DINameSpace R(MDNode::get(VMContext, Elts));
+  assert(R.Verify() &&
+         "createNameSpace should return a verifiable DINameSpace");
+  return R;
+}
+
+/// createLexicalBlockFile - This creates a new MDNode that encapsulates
+/// an existing scope with a new filename.
+DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
+                                                     DIFile File) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    File.getFileNode(),
+    Scope
+  };
+  DILexicalBlockFile R(MDNode::get(VMContext, Elts));
+  assert(
+      R.Verify() &&
+      "createLexicalBlockFile should return a verifiable DILexicalBlockFile");
+  return R;
+}
+
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+                                             unsigned Line, unsigned Col) {
+  // Defeat MDNode uniqing for lexical blocks by using unique id.
+  static unsigned int unique_id = 0;
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    File.getFileNode(),
+    getNonCompileUnitScope(Scope),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+    ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+  };
+  DILexicalBlock R(MDNode::get(VMContext, Elts));
+  assert(R.Verify() &&
+         "createLexicalBlock should return a verifiable DILexicalBlock");
+  return R;
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+                                      Instruction *InsertBefore) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+                                      BasicBlock *InsertAtEnd) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
+
+  // If this block already has a terminator then insert this intrinsic
+  // before the terminator.
+  if (TerminatorInst *T = InsertAtEnd->getTerminator())
+    return CallInst::Create(DeclareFn, Args, "", T);
+  else
+    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                Instruction *InsertBefore) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), V),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, "", InsertBefore);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                BasicBlock *InsertAtEnd) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), V),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+}
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
new file mode 100644
index 000000000000..ecd5216f20ac
--- /dev/null
+++ b/lib/IR/DataLayout.cpp
@@ -0,0 +1,697 @@
+//===-- DataLayout.cpp - Data size & alignment routines --------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines layout properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&.  None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+
+// Register the default SparcV9 implementation...
+INITIALIZE_PASS(DataLayout, "datalayout", "Data Layout", false, true)
+char DataLayout::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
+  assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
+  StructAlignment = 0;
+  StructSize = 0;
+  NumElements = ST->getNumElements();
+
+  // Loop over each of the elements, placing them in memory.
+  for (unsigned i = 0, e = NumElements; i != e; ++i) {
+    Type *Ty = ST->getElementType(i);
+    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+    // Add padding if necessary to align the data element properly.
+    if ((StructSize & (TyAlign-1)) != 0)
+      StructSize = DataLayout::RoundUpAlignment(StructSize, TyAlign);
+
+    // Keep track of maximum alignment constraint.
+    StructAlignment = std::max(TyAlign, StructAlignment);
+
+    MemberOffsets[i] = StructSize;
+    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+  }
+
+  // Empty structures have alignment of 1 byte.
+  if (StructAlignment == 0) StructAlignment = 1;
+
+  // Add padding to the end of the struct so that it could be put in an array
+  // and all array elements would be aligned correctly.
+  if ((StructSize & (StructAlignment-1)) != 0)
+    StructSize = DataLayout::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+  const uint64_t *SI =
+    std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+  assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+  --SI;
+  assert(*SI <= Offset && "upper_bound didn't work");
+  assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+         (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+         "Upper bound didn't work!");
+
+  // Multiple fields can have the same offset if any of them are zero sized.
+  // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+  // at the i32 element, because it is the last element at that offset.  This is
+  // the right one to return, because anything after it will have a higher
+  // offset, implying that this element is non-empty.
+  return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// LayoutAlignElem, LayoutAlign support
+//===----------------------------------------------------------------------===//
+
+LayoutAlignElem
+LayoutAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+                     unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  LayoutAlignElem retval;
+  retval.AlignType = align_type;
+  retval.ABIAlign = abi_align;
+  retval.PrefAlign = pref_align;
+  retval.TypeBitWidth = bit_width;
+  return retval;
+}
+
+bool
+LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
+  return (AlignType == rhs.AlignType
+          && ABIAlign == rhs.ABIAlign
+          && PrefAlign == rhs.PrefAlign
+          && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const LayoutAlignElem
+DataLayout::InvalidAlignmentElem = LayoutAlignElem::get(INVALID_ALIGN, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// PointerAlignElem, PointerAlign support
+//===----------------------------------------------------------------------===//
+
+PointerAlignElem
+PointerAlignElem::get(uint32_t addr_space, unsigned abi_align,
+                      unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  PointerAlignElem retval;
+  retval.AddressSpace = addr_space;
+  retval.ABIAlign = abi_align;
+  retval.PrefAlign = pref_align;
+  retval.TypeBitWidth = bit_width;
+  return retval;
+}
+
+bool
+PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
+  return (ABIAlign == rhs.ABIAlign
+          && AddressSpace == rhs.AddressSpace
+          && PrefAlign == rhs.PrefAlign
+          && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const PointerAlignElem
+DataLayout::InvalidPointerElem = PointerAlignElem::get(~0U, 0U, 0U, 0U);
+
+//===----------------------------------------------------------------------===//
+//                       DataLayout Class Implementation
+//===----------------------------------------------------------------------===//
+
+void DataLayout::init(StringRef Desc) {
+  initializeDataLayoutPass(*PassRegistry::getPassRegistry());
+
+  LayoutMap = 0;
+  LittleEndian = false;
+  StackNaturalAlign = 0;
+
+  // Default alignments
+  setAlignment(INTEGER_ALIGN,   1,  1, 1);   // i1
+  setAlignment(INTEGER_ALIGN,   1,  1, 8);   // i8
+  setAlignment(INTEGER_ALIGN,   2,  2, 16);  // i16
+  setAlignment(INTEGER_ALIGN,   4,  4, 32);  // i32
+  setAlignment(INTEGER_ALIGN,   4,  8, 64);  // i64
+  setAlignment(FLOAT_ALIGN,     2,  2, 16);  // half
+  setAlignment(FLOAT_ALIGN,     4,  4, 32);  // float
+  setAlignment(FLOAT_ALIGN,     8,  8, 64);  // double
+  setAlignment(FLOAT_ALIGN,    16, 16, 128); // ppcf128, quad, ...
+  setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32, v1i64, ...
+  setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
+  setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
+  setPointerAlignment(0, 8, 8, 8);
+
+  parseSpecifier(Desc);
+}
+
+/// Checked version of split, to ensure mandatory subparts.
+static std::pair<StringRef, StringRef> split(StringRef Str, char Separator) {
+  assert(!Str.empty() && "parse error, string can't be empty here");
+  std::pair<StringRef, StringRef> Split = Str.split(Separator);
+  assert((!Split.second.empty() || Split.first == Str) &&
+         "a trailing separator is not allowed");
+  return Split;
+}
+
+/// Get an unsinged integer, including error checks.
+static unsigned getInt(StringRef R) {
+  unsigned Result;
+  bool error = R.getAsInteger(10, Result); (void)error;
+  assert(!error && "not a number, or does not fit in an unsigned int");
+  return Result;
+}
+
+/// Convert bits into bytes. Assert if not a byte width multiple.
+static unsigned inBytes(unsigned Bits) {
+  assert(Bits % 8 == 0 && "number of bits must be a byte width multiple");
+  return Bits / 8;
+}
+
+void DataLayout::parseSpecifier(StringRef Desc) {
+
+  while (!Desc.empty()) {
+
+    // Split at '-'.
+    std::pair<StringRef, StringRef> Split = split(Desc, '-');
+    Desc = Split.second;
+
+    // Split at ':'.
+    Split = split(Split.first, ':');
+
+    // Aliases used below.
+    StringRef &Tok  = Split.first;  // Current token.
+    StringRef &Rest = Split.second; // The rest of the string.
+
+    char Specifier = Tok.front();
+    Tok = Tok.substr(1);
+
+    switch (Specifier) {
+    case 'E':
+      LittleEndian = false;
+      break;
+    case 'e':
+      LittleEndian = true;
+      break;
+    case 'p': {
+      // Address space.
+      unsigned AddrSpace = Tok.empty() ? 0 : getInt(Tok);
+      assert(AddrSpace < 1 << 24 &&
+             "Invalid address space, must be a 24bit integer");
+
+      // Size.
+      Split = split(Rest, ':');
+      unsigned PointerMemSize = inBytes(getInt(Tok));
+
+      // ABI alignment.
+      Split = split(Rest, ':');
+      unsigned PointerABIAlign = inBytes(getInt(Tok));
+
+      // Preferred alignment.
+      unsigned PointerPrefAlign = PointerABIAlign;
+      if (!Rest.empty()) {
+        Split = split(Rest, ':');
+        PointerPrefAlign = inBytes(getInt(Tok));
+      }
+
+      setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
+                          PointerMemSize);
+      break;
+    }
+    case 'i':
+    case 'v':
+    case 'f':
+    case 'a':
+    case 's': {
+      AlignTypeEnum AlignType;
+      switch (Specifier) {
+      default:
+      case 'i': AlignType = INTEGER_ALIGN; break;
+      case 'v': AlignType = VECTOR_ALIGN; break;
+      case 'f': AlignType = FLOAT_ALIGN; break;
+      case 'a': AlignType = AGGREGATE_ALIGN; break;
+      case 's': AlignType = STACK_ALIGN; break;
+      }
+
+      // Bit size.
+      unsigned Size = Tok.empty() ? 0 : getInt(Tok);
+
+      // ABI alignment.
+      Split = split(Rest, ':');
+      unsigned ABIAlign = inBytes(getInt(Tok));
+
+      // Preferred alignment.
+      unsigned PrefAlign = ABIAlign;
+      if (!Rest.empty()) {
+        Split = split(Rest, ':');
+        PrefAlign = inBytes(getInt(Tok));
+      }
+
+      setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+
+      break;
+    }
+    case 'n':  // Native integer types.
+      for (;;) {
+        unsigned Width = getInt(Tok);
+        assert(Width != 0 && "width must be non-zero");
+        LegalIntWidths.push_back(Width);
+        if (Rest.empty())
+          break;
+        Split = split(Rest, ':');
+      }
+      break;
+    case 'S': { // Stack natural alignment.
+      StackNaturalAlign = inBytes(getInt(Tok));
+      break;
+    }
+    default:
+      llvm_unreachable("Unknown specifier in datalayout string");
+      break;
+    }
+  }
+}
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+DataLayout::DataLayout() : ImmutablePass(ID) {
+  report_fatal_error("Bad DataLayout ctor used.  "
+                     "Tool did not specify a DataLayout to use?");
+}
+
+DataLayout::DataLayout(const Module *M)
+  : ImmutablePass(ID) {
+  init(M->getDataLayout());
+}
+
+void
+DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+                         unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield");
+  assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield");
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    if (Alignments[i].AlignType == (unsigned)align_type &&
+        Alignments[i].TypeBitWidth == bit_width) {
+      // Update the abi, preferred alignments.
+      Alignments[i].ABIAlign = abi_align;
+      Alignments[i].PrefAlign = pref_align;
+      return;
+    }
+  }
+
+  Alignments.push_back(LayoutAlignElem::get(align_type, abi_align,
+                                            pref_align, bit_width));
+}
+
+void
+DataLayout::setPointerAlignment(uint32_t addr_space, unsigned abi_align,
+                         unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  DenseMap<unsigned,PointerAlignElem>::iterator val = Pointers.find(addr_space);
+  if (val == Pointers.end()) {
+    Pointers[addr_space] = PointerAlignElem::get(addr_space,
+          abi_align, pref_align, bit_width);
+  } else {
+    val->second.ABIAlign = abi_align;
+    val->second.PrefAlign = pref_align;
+    val->second.TypeBitWidth = bit_width;
+  }
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the layout wants for the specified datatype.
+unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
+                                      uint32_t BitWidth, bool ABIInfo,
+                                      Type *Ty) const {
+  // Check to see if we have an exact match and remember the best match we see.
+  int BestMatchIdx = -1;
+  int LargestInt = -1;
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    if (Alignments[i].AlignType == (unsigned)AlignType &&
+        Alignments[i].TypeBitWidth == BitWidth)
+      return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+    // The best match so far depends on what we're looking for.
+     if (AlignType == INTEGER_ALIGN &&
+         Alignments[i].AlignType == INTEGER_ALIGN) {
+      // The "best match" for integers is the smallest size that is larger than
+      // the BitWidth requested.
+      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+          Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+        BestMatchIdx = i;
+      // However, if there isn't one that's larger, then we must use the
+      // largest one we have (see below)
+      if (LargestInt == -1 ||
+          Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+        LargestInt = i;
+    }
+  }
+
+  // Okay, we didn't find an exact solution.  Fall back here depending on what
+  // is being looked for.
+  if (BestMatchIdx == -1) {
+    // If we didn't find an integer alignment, fall back on most conservative.
+    if (AlignType == INTEGER_ALIGN) {
+      BestMatchIdx = LargestInt;
+    } else {
+      assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+      // By default, use natural alignment for vector types. This is consistent
+      // with what clang and llvm-gcc do.
+      unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+      Align *= cast<VectorType>(Ty)->getNumElements();
+      // If the alignment is not a power of 2, round up to the next power of 2.
+      // This happens for non-power-of-2 length vectors.
+      if (Align & (Align-1))
+        Align = NextPowerOf2(Align);
+      return Align;
+    }
+  }
+
+  // Since we got a "best match" index, just return it.
+  return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+                 : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+class StructLayoutMap {
+  typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
+  LayoutInfoTy LayoutInfo;
+
+public:
+  virtual ~StructLayoutMap() {
+    // Remove any layouts.
+    for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+         I != E; ++I) {
+      StructLayout *Value = I->second;
+      Value->~StructLayout();
+      free(Value);
+    }
+  }
+
+  StructLayout *&operator[](StructType *STy) {
+    return LayoutInfo[STy];
+  }
+
+  // for debugging...
+  virtual void dump() const {}
+};
+
+} // end anonymous namespace
+
+DataLayout::~DataLayout() {
+  delete static_cast<StructLayoutMap*>(LayoutMap);
+}
+
+bool DataLayout::doFinalization(Module &M) {
+  delete static_cast<StructLayoutMap*>(LayoutMap);
+  LayoutMap = 0;
+  return false;
+}
+
+const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
+  if (!LayoutMap)
+    LayoutMap = new StructLayoutMap();
+
+  StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+  StructLayout *&SL = (*STM)[Ty];
+  if (SL) return SL;
+
+  // Otherwise, create the struct layout.  Because it is variable length, we
+  // malloc it, then use placement new.
+  int NumElts = Ty->getNumElements();
+  StructLayout *L =
+    (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
+
+  // Set SL before calling StructLayout's ctor.  The ctor could cause other
+  // entries to be added to TheMap, invalidating our reference.
+  SL = L;
+
+  new (L) StructLayout(Ty, *this);
+
+  return L;
+}
+
+std::string DataLayout::getStringRepresentation() const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+
+  OS << (LittleEndian ? "e" : "E");
+  SmallVector<unsigned, 8> addrSpaces;
+  // Lets get all of the known address spaces and sort them
+  // into increasing order so that we can emit the string
+  // in a cleaner format.
+  for (DenseMap<unsigned, PointerAlignElem>::const_iterator
+      pib = Pointers.begin(), pie = Pointers.end();
+      pib != pie; ++pib) {
+    addrSpaces.push_back(pib->first);
+  }
+  std::sort(addrSpaces.begin(), addrSpaces.end());
+  for (SmallVector<unsigned, 8>::iterator asb = addrSpaces.begin(),
+      ase = addrSpaces.end(); asb != ase; ++asb) {
+    const PointerAlignElem &PI = Pointers.find(*asb)->second;
+    OS << "-p";
+    if (PI.AddressSpace) {
+      OS << PI.AddressSpace;
+    }
+     OS << ":" << PI.TypeBitWidth*8 << ':' << PI.ABIAlign*8
+        << ':' << PI.PrefAlign*8;
+  }
+  OS << "-S" << StackNaturalAlign*8;
+
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    const LayoutAlignElem &AI = Alignments[i];
+    OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+       << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+  }
+
+  if (!LegalIntWidths.empty()) {
+    OS << "-n" << (unsigned)LegalIntWidths[0];
+
+    for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+      OS << ':' << (unsigned)LegalIntWidths[i];
+  }
+  return OS.str();
+}
+
+
+/*!
+  \param abi_or_pref Flag that determines which alignment is returned. true
+  returns the ABI alignment, false returns the preferred alignment.
+  \param Ty The underlying type for which alignment is determined.
+
+  Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+  == false) for the requested type \a Ty.
+ */
+unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
+  int AlignType = -1;
+
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  // Early escape for the non-numeric types.
+  case Type::LabelTyID:
+    return (abi_or_pref
+            ? getPointerABIAlignment(0)
+            : getPointerPrefAlignment(0));
+  case Type::PointerTyID: {
+    unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
+    return (abi_or_pref
+            ? getPointerABIAlignment(AS)
+            : getPointerPrefAlignment(AS));
+    }
+  case Type::ArrayTyID:
+    return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+  case Type::StructTyID: {
+    // Packed structure types always have an ABI alignment of one.
+    if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+      return 1;
+
+    // Get the layout annotation... which is lazily created on demand.
+    const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+    unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+    return std::max(Align, Layout->getAlignment());
+  }
+  case Type::IntegerTyID:
+    AlignType = INTEGER_ALIGN;
+    break;
+  case Type::HalfTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  // PPC_FP128TyID and FP128TyID have different data contents, but the
+  // same size and alignment, so they look the same here.
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+  case Type::X86_FP80TyID:
+    AlignType = FLOAT_ALIGN;
+    break;
+  case Type::X86_MMXTyID:
+  case Type::VectorTyID:
+    AlignType = VECTOR_ALIGN;
+    break;
+  default:
+    llvm_unreachable("Bad type for getAlignment!!!");
+  }
+
+  return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+                          abi_or_pref, Ty);
+}
+
+unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
+  return getAlignment(Ty, true);
+}
+
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+  return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
+unsigned DataLayout::getCallFrameTypeAlignment(Type *Ty) const {
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+    if (Alignments[i].AlignType == STACK_ALIGN)
+      return Alignments[i].ABIAlign;
+
+  return getABITypeAlignment(Ty);
+}
+
+unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
+  return getAlignment(Ty, false);
+}
+
+unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const {
+  unsigned Align = getPrefTypeAlignment(Ty);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+  return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an integer type with size at least as big as that
+/// of a pointer in the given address space.
+IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
+                                       unsigned AddressSpace) const {
+  return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
+}
+
+/// getIntPtrType - Return an integer (vector of integer) type with size at
+/// least as big as that of a pointer of the given pointer (vector of pointer)
+/// type.
+Type *DataLayout::getIntPtrType(Type *Ty) const {
+  assert(Ty->isPtrOrPtrVectorTy() &&
+         "Expected a pointer or pointer vector type.");
+  unsigned NumBits = getTypeSizeInBits(Ty->getScalarType());
+  IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
+  if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+    return VectorType::get(IntTy, VecTy->getNumElements());
+  return IntTy;
+}
+
+Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const {
+  for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+    if (Width <= LegalIntWidths[i])
+      return Type::getIntNTy(C, LegalIntWidths[i]);
+  return 0;
+}
+
+uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
+                                      ArrayRef<Value *> Indices) const {
+  Type *Ty = ptrTy;
+  assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
+  uint64_t Result = 0;
+
+  generic_gep_type_iterator<Value* const*>
+    TI = gep_type_begin(ptrTy, Indices);
+  for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
+       ++CurIDX, ++TI) {
+    if (StructType *STy = dyn_cast<StructType>(*TI)) {
+      assert(Indices[CurIDX]->getType() ==
+             Type::getInt32Ty(ptrTy->getContext()) &&
+             "Illegal struct idx");
+      unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+      // Get structure layout information...
+      const StructLayout *Layout = getStructLayout(STy);
+
+      // Add in the offset, as calculated by the structure layout info...
+      Result += Layout->getElementOffset(FieldNo);
+
+      // Update Ty to refer to current element
+      Ty = STy->getElementType(FieldNo);
+    } else {
+      // Update Ty to refer to current element
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // Get the array index and the size of each array element.
+      if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+        Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
+    }
+  }
+
+  return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global.  This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned DataLayout::getPreferredAlignment(const GlobalVariable *GV) const {
+  Type *ElemType = GV->getType()->getElementType();
+  unsigned Alignment = getPrefTypeAlignment(ElemType);
+  unsigned GVAlignment = GV->getAlignment();
+  if (GVAlignment >= Alignment) {
+    Alignment = GVAlignment;
+  } else if (GVAlignment != 0) {
+    Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+  }
+
+  if (GV->hasInitializer() && GVAlignment == 0) {
+    if (Alignment < 16) {
+      // If the global is not external, see if it is large.  If so, give it a
+      // larger alignment.
+      if (getTypeSizeInBits(ElemType) > 128)
+        Alignment = 16;    // 16-byte alignment.
+    }
+  }
+  return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form.  This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+  return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
new file mode 100644
index 000000000000..0ffe99d70454
--- /dev/null
+++ b/lib/IR/DebugInfo.cpp
@@ -0,0 +1,1209 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+bool DIDescriptor::Verify() const {
+  return DbgNode &&
+         (DIDerivedType(DbgNode).Verify() ||
+          DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() ||
+          DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() ||
+          DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() ||
+          DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() ||
+          DILexicalBlock(DbgNode).Verify() ||
+          DILexicalBlockFile(DbgNode).Verify() ||
+          DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
+          DIObjCProperty(DbgNode).Verify() ||
+          DITemplateTypeParameter(DbgNode).Verify() ||
+          DITemplateValueParameter(DbgNode).Verify());
+}
+
+static Value *getField(const MDNode *DbgNode, unsigned Elt) {
+  if (DbgNode == 0 || Elt >= DbgNode->getNumOperands())
+    return 0;
+  return DbgNode->getOperand(Elt);
+}
+
+static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
+  if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt)))
+    return R;
+  return 0;
+}
+
+static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
+  if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
+    return MDS->getString();
+  return StringRef();
+}
+
+StringRef DIDescriptor::getStringField(unsigned Elt) const {
+  return ::getStringField(DbgNode, Elt);
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+    if (ConstantInt *CI
+        = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+      return CI->getZExtValue();
+
+  return 0;
+}
+
+int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+    if (ConstantInt *CI
+        = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+      return CI->getSExtValue();
+
+  return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return DIDescriptor();
+
+  if (Elt < DbgNode->getNumOperands())
+    return
+      DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+  return DIDescriptor();
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
+  if (DbgNode == 0)
+    return;
+
+  if (Elt < DbgNode->getNumOperands()) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    Node->replaceOperandWith(Elt, F);
+  }
+}
+
+unsigned DIVariable::getNumAddrElements() const {
+  return DbgNode->getNumOperands()-8;
+}
+
+/// getInlinedAt - If this variable is inlined then return inline location.
+MDNode *DIVariable::getInlinedAt() const {
+  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7));
+}
+
+//===----------------------------------------------------------------------===//
+// Predicates
+//===----------------------------------------------------------------------===//
+
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_base_type:
+  case dwarf::DW_TAG_unspecified_type:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_typedef:
+  case dwarf::DW_TAG_pointer_type:
+  case dwarf::DW_TAG_ptr_to_member_type:
+  case dwarf::DW_TAG_reference_type:
+  case dwarf::DW_TAG_rvalue_reference_type:
+  case dwarf::DW_TAG_const_type:
+  case dwarf::DW_TAG_volatile_type:
+  case dwarf::DW_TAG_restrict_type:
+  case dwarf::DW_TAG_member:
+  case dwarf::DW_TAG_inheritance:
+  case dwarf::DW_TAG_friend:
+    return true;
+  default:
+    // CompositeTypes are currently modelled as DerivedTypes.
+    return isCompositeType();
+  }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIDescriptor::isCompositeType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_array_type:
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_enumeration_type:
+  case dwarf::DW_TAG_subroutine_type:
+  case dwarf::DW_TAG_class_type:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIDescriptor::isVariable() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_auto_variable:
+  case dwarf::DW_TAG_arg_variable:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+  return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+  return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+                     getTag() == dwarf::DW_TAG_constant);
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+  return isGlobalVariable();
+}
+
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_compile_unit:
+  case dwarf::DW_TAG_lexical_block:
+  case dwarf::DW_TAG_subprogram:
+  case dwarf::DW_TAG_namespace:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
+}
+
+/// isFile - Return true if the specified tag is DW_TAG_file_type.
+bool DIDescriptor::isFile() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_file_type;
+}
+
+/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
+bool DIDescriptor::isNameSpace() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_namespace;
+}
+
+/// isLexicalBlockFile - Return true if the specified descriptor is a
+/// lexical block with an extra file.
+bool DIDescriptor::isLexicalBlockFile() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+    (DbgNode->getNumOperands() == 3);
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
+    (DbgNode->getNumOperands() > 3);
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
+}
+
+/// isObjCProperty - Return true if the specified tag is DW_TAG_APPLE_property.
+bool DIDescriptor::isObjCProperty() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
+}
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(const MDNode *N) : DIScope(N) {
+  if (!N) return;
+  if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+    DbgNode = 0;
+  }
+}
+
+unsigned DIArray::getNumElements() const {
+  if (!DbgNode)
+    return 0;
+  return DbgNode->getNumOperands();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+bool DIType::isUnsignedDIType() {
+  DIDerivedType DTy(DbgNode);
+  if (DTy.Verify())
+    return DTy.getTypeDerivedFrom().isUnsignedDIType();
+
+  DIBasicType BTy(DbgNode);
+  if (BTy.Verify()) {
+    unsigned Encoding = BTy.getEncoding();
+    if (Encoding == dwarf::DW_ATE_unsigned ||
+        Encoding == dwarf::DW_ATE_unsigned_char ||
+        Encoding == dwarf::DW_ATE_boolean)
+      return true;
+  }
+  return false;
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+  if (!isCompileUnit())
+    return false;
+  StringRef N = getFilename();
+  if (N.empty())
+    return false;
+  // It is possible that directory and produce string is empty.
+  return DbgNode->getNumOperands() == 12;
+}
+
+/// Verify - Verify that an ObjC property is well formed.
+bool DIObjCProperty::Verify() const {
+  if (!isObjCProperty())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify()) return false;
+
+  // Don't worry about the rest of the strings for now.
+  return DbgNode->getNumOperands() == 8;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+  if (!isType())
+    return false;
+  if (getContext() && !getContext().Verify())
+    return false;
+  unsigned Tag = getTag();
+  if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+      Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+      Tag != dwarf::DW_TAG_ptr_to_member_type &&
+      Tag != dwarf::DW_TAG_reference_type &&
+      Tag != dwarf::DW_TAG_rvalue_reference_type &&
+      Tag != dwarf::DW_TAG_restrict_type &&
+      Tag != dwarf::DW_TAG_array_type &&
+      Tag != dwarf::DW_TAG_enumeration_type &&
+      Tag != dwarf::DW_TAG_subroutine_type &&
+      getFilename().empty())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+  return isBasicType() && DbgNode->getNumOperands() == 10;
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+  return isDerivedType() && DbgNode->getNumOperands() >= 10 &&
+         DbgNode->getNumOperands() <= 14;
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+  if (!isCompositeType())
+    return false;
+  if (getContext() && !getContext().Verify())
+    return false;
+
+  return DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+  if (!isSubprogram())
+    return false;
+
+  if (getContext() && !getContext().Verify())
+    return false;
+
+  DICompositeType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+  return DbgNode->getNumOperands() == 20;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+  if (!isGlobalVariable())
+    return false;
+
+  if (getDisplayName().empty())
+    return false;
+
+  if (getContext() && !getContext().Verify())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+
+  if (!getGlobal() && !getConstant())
+    return false;
+
+  return DbgNode->getNumOperands() == 13;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+  if (!isVariable())
+    return false;
+
+  if (getContext() && !getContext().Verify())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+
+  return DbgNode->getNumOperands() >= 8;
+}
+
+/// Verify - Verify that a location descriptor is well formed.
+bool DILocation::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  return DbgNode->getNumOperands() == 4;
+}
+
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+  if (!isNameSpace())
+    return false;
+  return DbgNode->getNumOperands() == 5;
+}
+
+/// \brief Retrieve the MDNode for the directory/file pair.
+MDNode *DIFile::getFileNode() const {
+  return const_cast<MDNode*>(getNodeField(DbgNode, 1));
+}
+
+/// \brief Verify that the file descriptor is well formed.
+bool DIFile::Verify() const {
+  return isFile() && DbgNode->getNumOperands() == 2;
+}
+
+/// \brief Verify that the enumerator descriptor is well formed.
+bool DIEnumerator::Verify() const {
+  return isEnumerator() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the subrange descriptor is well formed.
+bool DISubrange::Verify() const {
+  return isSubrange() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the lexical block descriptor is well formed.
+bool DILexicalBlock::Verify() const {
+  return isLexicalBlock() && DbgNode->getNumOperands() == 6;
+}
+
+/// \brief Verify that the file-scoped lexical block descriptor is well formed.
+bool DILexicalBlockFile::Verify() const {
+  return isLexicalBlockFile() && DbgNode->getNumOperands() == 3;
+}
+
+/// \brief Verify that the template type parameter descriptor is well formed.
+bool DITemplateTypeParameter::Verify() const {
+  return isTemplateTypeParameter() && DbgNode->getNumOperands() == 7;
+}
+
+/// \brief Verify that the template value parameter descriptor is well formed.
+bool DITemplateValueParameter::Verify() const {
+  return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+  unsigned Tag = getTag();
+
+  if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
+      Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+      Tag != dwarf::DW_TAG_restrict_type)
+    return getSizeInBits();
+
+  DIType BaseType = getTypeDerivedFrom();
+
+  // If this type is not derived from any type then take conservative approach.
+  if (!BaseType.isValid())
+    return getSizeInBits();
+
+  // If this is a derived type, go ahead and get the base type, unless it's a
+  // reference then it's just the size of the field. Pointer types have no need
+  // of this since they're a different type of qualification on the type.
+  if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
+      BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
+    return getSizeInBits();
+
+  if (BaseType.isDerivedType())
+    return DIDerivedType(BaseType).getOriginalTypeSize();
+
+  return BaseType.getSizeInBits();
+}
+
+/// getObjCProperty - Return property node, if this ivar is associated with one.
+MDNode *DIDerivedType::getObjCProperty() const {
+  if (DbgNode->getNumOperands() <= 10)
+    return NULL;
+  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
+}
+
+/// \brief Set the array of member DITypes.
+void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) {
+  assert((!TParams || DbgNode->getNumOperands() == 14) &&
+         "If you're setting the template parameters this should include a slot "
+         "for that!");
+  TrackingVH<MDNode> N(*this);
+  N->replaceOperandWith(10, Elements);
+  if (TParams)
+    N->replaceOperandWith(13, TParams);
+  DbgNode = N;
+}
+
+/// \brief Set the containing type.
+void DICompositeType::setContainingType(DICompositeType ContainingType) {
+  TrackingVH<MDNode> N(*this);
+  N->replaceOperandWith(12, ContainingType);
+  DbgNode = N;
+}
+
+/// isInlinedFnArgument - Return true if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+  assert(CurFn && "Invalid function");
+  if (!getContext().isSubprogram())
+    return false;
+  // This variable is not inlined function argument if its scope
+  // does not describe current function.
+  return !DISubprogram(getContext()).describes(CurFn);
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+  assert(F && "Invalid function");
+  if (F == getFunction())
+    return true;
+  StringRef Name = getLinkageName();
+  if (Name.empty())
+    Name = getName();
+  if (F->getName() == Name)
+    return true;
+  return false;
+}
+
+unsigned DISubprogram::isOptimized() const {
+  assert (DbgNode && "Invalid subprogram descriptor!");
+  if (DbgNode->getNumOperands() == 15)
+    return getUnsignedField(14);
+  return 0;
+}
+
+MDNode *DISubprogram::getVariablesNodes() const {
+  if (!DbgNode || DbgNode->getNumOperands() <= 18)
+    return NULL;
+  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18));
+}
+
+DIArray DISubprogram::getVariables() const {
+  if (!DbgNode || DbgNode->getNumOperands() <= 18)
+    return DIArray();
+  if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)))
+    return DIArray(T);
+  return DIArray();
+}
+
+StringRef DIScope::getFilename() const {
+  if (!DbgNode)
+    return StringRef();
+  return ::getStringField(getNodeField(DbgNode, 1), 0);
+}
+
+StringRef DIScope::getDirectory() const {
+  if (!DbgNode)
+    return StringRef();
+  return ::getStringField(getNodeField(DbgNode, 1), 1);
+}
+
+DIArray DICompileUnit::getEnumTypes() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
+    return DIArray(N);
+  return DIArray();
+}
+
+DIArray DICompileUnit::getRetainedTypes() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
+    return DIArray(N);
+  return DIArray();
+}
+
+DIArray DICompileUnit::getSubprograms() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
+    return DIArray(N);
+  return DIArray();
+}
+
+
+DIArray DICompileUnit::getGlobalVariables() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 12)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
+    return DIArray(N);
+  return DIArray();
+}
+
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+  bool isObjCLike = false;
+  for (size_t i = 0, e = Str.size(); i < e; ++i) {
+    char C = Str[i];
+    if (C == '[')
+      isObjCLike = true;
+
+    if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+                       C == '+' || C == '(' || C == ')'))
+      Out.push_back('.');
+    else
+      Out.push_back(C);
+  }
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  StringRef FName = "fn";
+  if (Fn.getFunction())
+    FName = Fn.getFunction()->getName();
+  else
+    FName = Fn.getName();
+  char One = '\1';
+  if (FName.startswith(StringRef(&One, 1)))
+    FName = FName.substr(1);
+  fixupObjcLikeName(FName, Name);
+  return M.getNamedMetadata(Name.str());
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  StringRef FName = "fn";
+  if (Fn.getFunction())
+    FName = Fn.getFunction()->getName();
+  else
+    FName = Fn.getName();
+  char One = '\1';
+  if (FName.startswith(StringRef(&One, 1)))
+    FName = FName.substr(1);
+  fixupObjcLikeName(FName, Name);
+
+  return M.getOrInsertNamedMetadata(Name.str());
+}
+
+/// createInlinedVariable - Create a new inlined variable based on current
+/// variable.
+/// @param DV            Current Variable.
+/// @param InlinedScope  Location at current variable is inlined.
+DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
+                                       LLVMContext &VMContext) {
+  SmallVector<Value *, 16> Elts;
+  // Insert inlined scope as 7th element.
+  for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
+    i == 7 ? Elts.push_back(InlinedScope) :
+             Elts.push_back(DV->getOperand(i));
+  return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// cleanseInlinedVariable - Remove inlined scope from the variable.
+DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
+  SmallVector<Value *, 16> Elts;
+  // Insert inlined scope as 7th element.
+  for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
+    i == 7 ?
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))):
+      Elts.push_back(DV->getOperand(i));
+  return DIVariable(MDNode::get(VMContext, Elts));
+}
+
+/// getDISubprogram - Find subprogram that is enclosing this scope.
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
+  DIDescriptor D(Scope);
+  if (D.isSubprogram())
+    return DISubprogram(Scope);
+
+  if (D.isLexicalBlockFile())
+    return getDISubprogram(DILexicalBlockFile(Scope).getContext());
+
+  if (D.isLexicalBlock())
+    return getDISubprogram(DILexicalBlock(Scope).getContext());
+
+  return DISubprogram();
+}
+
+/// getDICompositeType - Find underlying composite type.
+DICompositeType llvm::getDICompositeType(DIType T) {
+  if (T.isCompositeType())
+    return DICompositeType(T);
+
+  if (T.isDerivedType())
+    return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+
+  return DICompositeType();
+}
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+bool llvm::isSubprogramContext(const MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext());
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(const Module &M) {
+  if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
+    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+      DICompileUnit CU(CU_Nodes->getOperand(i));
+      addCompileUnit(CU);
+      DIArray GVs = CU.getGlobalVariables();
+      for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+        DIGlobalVariable DIG(GVs.getElement(i));
+        if (addGlobalVariable(DIG))
+          processType(DIG.getType());
+      }
+      DIArray SPs = CU.getSubprograms();
+      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+        processSubprogram(DISubprogram(SPs.getElement(i)));
+      DIArray EnumTypes = CU.getEnumTypes();
+      for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+        processType(DIType(EnumTypes.getElement(i)));
+      DIArray RetainedTypes = CU.getRetainedTypes();
+      for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+        processType(DIType(RetainedTypes.getElement(i)));
+      // FIXME: We really shouldn't be bailing out after visiting just one CU
+      return;
+    }
+  }
+}
+
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+  if (!Loc.Verify()) return;
+  DIDescriptor S(Loc.getScope());
+  if (S.isCompileUnit())
+    addCompileUnit(DICompileUnit(S));
+  else if (S.isSubprogram())
+    processSubprogram(DISubprogram(S));
+  else if (S.isLexicalBlock())
+    processLexicalBlock(DILexicalBlock(S));
+  else if (S.isLexicalBlockFile()) {
+    DILexicalBlockFile DBF = DILexicalBlockFile(S);
+    processLexicalBlock(DILexicalBlock(DBF.getScope()));
+  }
+  processLocation(Loc.getOrigLocation());
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+  if (!addType(DT))
+    return;
+  if (DT.isCompositeType()) {
+    DICompositeType DCT(DT);
+    processType(DCT.getTypeDerivedFrom());
+    DIArray DA = DCT.getTypeArray();
+    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+      DIDescriptor D = DA.getElement(i);
+      if (D.isType())
+        processType(DIType(D));
+      else if (D.isSubprogram())
+        processSubprogram(DISubprogram(D));
+    }
+  } else if (DT.isDerivedType()) {
+    DIDerivedType DDT(DT);
+    processType(DDT.getTypeDerivedFrom());
+  }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+  DIScope Context = LB.getContext();
+  if (Context.isLexicalBlock())
+    return processLexicalBlock(DILexicalBlock(Context));
+  else if (Context.isLexicalBlockFile()) {
+    DILexicalBlockFile DBF = DILexicalBlockFile(Context);
+    return processLexicalBlock(DILexicalBlock(DBF.getScope()));
+  }
+  else
+    return processSubprogram(DISubprogram(Context));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+  if (!addSubprogram(SP))
+    return;
+  processType(SP.getType());
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) {
+  MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+  if (!N) return;
+
+  DIDescriptor DV(N);
+  if (!DV.isVariable())
+    return;
+
+  if (!NodesSeen.insert(DV))
+    return;
+  processType(DIVariable(N).getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+  if (!DT.isValid())
+    return false;
+
+  if (!NodesSeen.insert(DT))
+    return false;
+
+  TYs.push_back(DT);
+  return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+  if (!CU.Verify())
+    return false;
+
+  if (!NodesSeen.insert(CU))
+    return false;
+
+  CUs.push_back(CU);
+  return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+  if (!DIDescriptor(DIG).isGlobalVariable())
+    return false;
+
+  if (!NodesSeen.insert(DIG))
+    return false;
+
+  GVs.push_back(DIG);
+  return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+  if (!DIDescriptor(SP).isSubprogram())
+    return false;
+
+  if (!NodesSeen.insert(SP))
+    return false;
+
+  SPs.push_back(SP);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+  if (!DbgNode) return;
+
+  if (const char *Tag = dwarf::TagString(getTag()))
+    OS << "[ " << Tag << " ]";
+
+  if (this->isSubrange()) {
+    DISubrange(DbgNode).printInternal(OS);
+  } else if (this->isCompileUnit()) {
+    DICompileUnit(DbgNode).printInternal(OS);
+  } else if (this->isFile()) {
+    DIFile(DbgNode).printInternal(OS);
+  } else if (this->isEnumerator()) {
+    DIEnumerator(DbgNode).printInternal(OS);
+  } else if (this->isBasicType()) {
+    DIType(DbgNode).printInternal(OS);
+  } else if (this->isDerivedType()) {
+    DIDerivedType(DbgNode).printInternal(OS);
+  } else if (this->isCompositeType()) {
+    DICompositeType(DbgNode).printInternal(OS);
+  } else if (this->isSubprogram()) {
+    DISubprogram(DbgNode).printInternal(OS);
+  } else if (this->isGlobalVariable()) {
+    DIGlobalVariable(DbgNode).printInternal(OS);
+  } else if (this->isVariable()) {
+    DIVariable(DbgNode).printInternal(OS);
+  } else if (this->isObjCProperty()) {
+    DIObjCProperty(DbgNode).printInternal(OS);
+  } else if (this->isNameSpace()) {
+    DINameSpace(DbgNode).printInternal(OS);
+  } else if (this->isScope()) {
+    DIScope(DbgNode).printInternal(OS);
+  }
+}
+
+void DISubrange::printInternal(raw_ostream &OS) const {
+  int64_t Count = getCount();
+  if (Count != -1)
+    OS << " [" << getLo() << ", " << Count - 1 << ']';
+  else
+    OS << " [unbounded]";
+}
+
+void DIScope::printInternal(raw_ostream &OS) const {
+  OS << " [" << getDirectory() << "/" << getFilename() << ']';
+}
+
+void DICompileUnit::printInternal(raw_ostream &OS) const {
+  DIScope::printInternal(OS);
+  if (const char *Lang = dwarf::LanguageString(getLanguage()))
+    OS << " [" << Lang << ']';
+}
+
+void DIEnumerator::printInternal(raw_ostream &OS) const {
+  OS << " [" << getName() << " :: " << getEnumValue() << ']';
+}
+
+void DIType::printInternal(raw_ostream &OS) const {
+  if (!DbgNode) return;
+
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "]";
+
+  // TODO: Print context?
+
+  OS << " [line " << getLineNumber()
+     << ", size " << getSizeInBits()
+     << ", align " << getAlignInBits()
+     << ", offset " << getOffsetInBits();
+  if (isBasicType())
+    if (const char *Enc =
+        dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
+      OS << ", enc " << Enc;
+  OS << "]";
+
+  if (isPrivate())
+    OS << " [private]";
+  else if (isProtected())
+    OS << " [protected]";
+
+  if (isArtificial())
+    OS << " [artificial]";
+
+  if (isForwardDecl())
+    OS << " [fwd]";
+  if (isVector())
+    OS << " [vector]";
+  if (isStaticMember())
+    OS << " [static]";
+}
+
+void DIDerivedType::printInternal(raw_ostream &OS) const {
+  DIType::printInternal(OS);
+  OS << " [from " << getTypeDerivedFrom().getName() << ']';
+}
+
+void DICompositeType::printInternal(raw_ostream &OS) const {
+  DIType::printInternal(OS);
+  DIArray A = getTypeArray();
+  OS << " [" << A.getNumElements() << " elements]";
+}
+
+void DINameSpace::printInternal(raw_ostream &OS) const {
+  StringRef Name = getName();
+  if (!Name.empty())
+    OS << " [" << Name << ']';
+
+  OS << " [line " << getLineNumber() << ']';
+}
+
+void DISubprogram::printInternal(raw_ostream &OS) const {
+  // TODO : Print context
+  OS << " [line " << getLineNumber() << ']';
+
+  if (isLocalToUnit())
+    OS << " [local]";
+
+  if (isDefinition())
+    OS << " [def]";
+
+  if (getScopeLineNumber() != getLineNumber())
+    OS << " [scope " << getScopeLineNumber() << "]";
+
+  if (isPrivate())
+    OS << " [private]";
+  else if (isProtected())
+    OS << " [protected]";
+
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << ']';
+}
+
+void DIGlobalVariable::printInternal(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << ']';
+
+  OS << " [line " << getLineNumber() << ']';
+
+  // TODO : Print context
+
+  if (isLocalToUnit())
+    OS << " [local]";
+
+  if (isDefinition())
+    OS << " [def]";
+}
+
+void DIVariable::printInternal(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << ']';
+
+  OS << " [line " << getLineNumber() << ']';
+}
+
+void DIObjCProperty::printInternal(raw_ostream &OS) const {
+  StringRef Name = getObjCPropertyName();
+  if (!Name.empty())
+    OS << " [" << Name << ']';
+
+  OS << " [line " << getLineNumber()
+     << ", properties " << getUnsignedField(6) << ']';
+}
+
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+                          const LLVMContext &Ctx) {
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(Ctx));
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (Scope.Verify())
+      CommentOS << Scope.getFilename();
+    else
+      CommentOS << "<unknown>";
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      CommentOS << " @[ ";
+      printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+      CommentOS << " ]";
+    }
+  }
+}
+
+void DIVariable::printExtendedName(raw_ostream &OS) const {
+  const LLVMContext &Ctx = DbgNode->getContext();
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << Res << "," << getLineNumber();
+  if (MDNode *InlinedAt = getInlinedAt()) {
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
+    if (!InlinedAtDL.isUnknown()) {
+      OS << " @[";
+      printDebugLoc(InlinedAtDL, OS, Ctx);
+      OS << "]";
+    }
+  }
+}
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
new file mode 100644
index 000000000000..c57b5a305303
--- /dev/null
+++ b/lib/IR/DebugLoc.cpp
@@ -0,0 +1,315 @@
+//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DebugLoc.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/DebugInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DebugLoc Implementation
+//===----------------------------------------------------------------------===//
+
+MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
+  if (ScopeIdx == 0) return 0;
+  
+  if (ScopeIdx > 0) {
+    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+    // position specified.
+    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+           "Invalid ScopeIdx!");
+    return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+  }
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+}
+
+MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+  // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+  // position specified.  Zero is invalid.
+  if (ScopeIdx >= 0) return 0;
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+/// Return both the Scope and the InlinedAt values.
+void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+                                    const LLVMContext &Ctx) const {
+  if (ScopeIdx == 0) {
+    Scope = IA = 0;
+    return;
+  }
+  
+  if (ScopeIdx > 0) {
+    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+    // position specified.
+    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+           "Invalid ScopeIdx!");
+    Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+    IA = 0;
+    return;
+  }
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+  IA    = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
+                       MDNode *Scope, MDNode *InlinedAt) {
+  DebugLoc Result;
+  
+  // If no scope is available, this is an unknown location.
+  if (Scope == 0) return Result;
+  
+  // Saturate line and col to "unknown".
+  if (Col > 255) Col = 0;
+  if (Line >= (1 << 24)) Line = 0;
+  Result.LineCol = Line | (Col << 24);
+  
+  LLVMContext &Ctx = Scope->getContext();
+  
+  // If there is no inlined-at location, use the ScopeRecords array.
+  if (InlinedAt == 0)
+    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
+  else
+    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
+                                                                InlinedAt, 0);
+
+  return Result;
+}
+
+/// getAsMDNode - This method converts the compressed DebugLoc node into a
+/// DILocation compatible MDNode.
+MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+  if (isUnknown()) return 0;
+  
+  MDNode *Scope, *IA;
+  getScopeAndInlinedAt(Scope, IA, Ctx);
+  assert(Scope && "If scope is null, this should be isUnknown()");
+  
+  LLVMContext &Ctx2 = Scope->getContext();
+  Type *Int32 = Type::getInt32Ty(Ctx2);
+  Value *Elts[] = {
+    ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
+    Scope, IA
+  };
+  return MDNode::get(Ctx2, Elts);
+}
+
+/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
+  DILocation Loc(N);
+  MDNode *Scope = Loc.getScope();
+  if (Scope == 0) return DebugLoc();
+  return get(Loc.getLineNumber(), Loc.getColumnNumber(), Scope,
+             Loc.getOrigLocation());
+}
+
+/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
+  DILexicalBlock LexBlock(N);
+  MDNode *Scope = LexBlock.getContext();
+  if (Scope == 0) return DebugLoc();
+  return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, NULL);
+}
+
+void DebugLoc::dump(const LLVMContext &Ctx) const {
+#ifndef NDEBUG
+  if (!isUnknown()) {
+    dbgs() << getLine();
+    if (getCol() != 0)
+      dbgs() << ',' << getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      dbgs() << " @ ";
+      InlinedAtDL.dump(Ctx);
+    } else
+      dbgs() << "\n";
+  }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// DenseMap specialization
+//===----------------------------------------------------------------------===//
+
+unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
+  return static_cast<unsigned>(hash_combine(Key.LineCol, Key.ScopeIdx));
+}
+
+//===----------------------------------------------------------------------===//
+// LLVMContextImpl Implementation
+//===----------------------------------------------------------------------===//
+
+int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
+                                                 int ExistingIdx) {
+  // If we already have an entry for this scope, return it.
+  int &Idx = ScopeRecordIdx[Scope];
+  if (Idx) return Idx;
+  
+  // If we don't have an entry, but ExistingIdx is specified, use it.
+  if (ExistingIdx)
+    return Idx = ExistingIdx;
+  
+  // Otherwise add a new entry.
+  
+  // Start out ScopeRecords with a minimal reasonable size to avoid
+  // excessive reallocation starting out.
+  if (ScopeRecords.empty())
+    ScopeRecords.reserve(128);
+  
+  // Index is biased by 1 for index.
+  Idx = ScopeRecords.size()+1;
+  ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
+  return Idx;
+}
+
+int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
+                                                    int ExistingIdx) {
+  // If we already have an entry, return it.
+  int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
+  if (Idx) return Idx;
+  
+  // If we don't have an entry, but ExistingIdx is specified, use it.
+  if (ExistingIdx)
+    return Idx = ExistingIdx;
+  
+  // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
+  // excessive reallocation starting out.
+  if (ScopeInlinedAtRecords.empty())
+    ScopeInlinedAtRecords.reserve(128);
+    
+  // Index is biased by 1 and negated.
+  Idx = -ScopeInlinedAtRecords.size()-1;
+  ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
+                                                 DebugRecVH(IA, this, Idx)));
+  return Idx;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugRecVH Implementation
+//===----------------------------------------------------------------------===//
+
+/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
+/// to drop to null and we need remove our entry from the DenseMap.
+void DebugRecVH::deleted() {
+  // If this is a non-canonical reference, just drop the value to null, we know
+  // it doesn't have a map entry.
+  if (Idx == 0) {
+    setValPtr(0);
+    return;
+  }
+    
+  MDNode *Cur = get();
+  
+  // If the index is positive, it is an entry in ScopeRecords.
+  if (Idx > 0) {
+    assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
+    Ctx->ScopeRecordIdx.erase(Cur);
+    // Reset this VH to null and we're done.
+    setValPtr(0);
+    Idx = 0;
+    return;
+  }
+  
+  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+  // is the scope or the inlined-at record entry.
+  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+  assert((this == &Entry.first || this == &Entry.second) &&
+         "Mapping out of date!");
+  
+  MDNode *OldScope = Entry.first.get();
+  MDNode *OldInlinedAt = Entry.second.get();
+  assert(OldScope != 0 && OldInlinedAt != 0 &&
+         "Entry should be non-canonical if either val dropped to null");
+
+  // Otherwise, we do have an entry in it, nuke it and we're done.
+  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+         "Mapping out of date");
+  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+  
+  // Reset this VH to null.  Drop both 'Idx' values to null to indicate that
+  // we're in non-canonical form now.
+  setValPtr(0);
+  Entry.first.Idx = Entry.second.Idx = 0;
+}
+
+void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
+  // If being replaced with a non-mdnode value (e.g. undef) handle this as if
+  // the mdnode got deleted.
+  MDNode *NewVal = dyn_cast<MDNode>(NewVa);
+  if (NewVal == 0) return deleted();
+  
+  // If this is a non-canonical reference, just change it, we know it already
+  // doesn't have a map entry.
+  if (Idx == 0) {
+    setValPtr(NewVa);
+    return;
+  }
+  
+  MDNode *OldVal = get();
+  assert(OldVal != NewVa && "Node replaced with self?");
+  
+  // If the index is positive, it is an entry in ScopeRecords.
+  if (Idx > 0) {
+    assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
+    Ctx->ScopeRecordIdx.erase(OldVal);
+    setValPtr(NewVal);
+
+    int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
+    
+    // If NewVal already has an entry, this becomes a non-canonical reference,
+    // just drop Idx to 0 to signify this.
+    if (NewEntry != Idx)
+      Idx = 0;
+    return;
+  }
+  
+  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+  // is the scope or the inlined-at record entry.
+  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+  assert((this == &Entry.first || this == &Entry.second) &&
+         "Mapping out of date!");
+  
+  MDNode *OldScope = Entry.first.get();
+  MDNode *OldInlinedAt = Entry.second.get();
+  assert(OldScope != 0 && OldInlinedAt != 0 &&
+         "Entry should be non-canonical if either val dropped to null");
+  
+  // Otherwise, we do have an entry in it, nuke it and we're done.
+  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+         "Mapping out of date");
+  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+  
+  // Reset this VH to the new value.
+  setValPtr(NewVal);
+
+  int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
+                                                   Entry.second.get(), Idx);
+  // If NewVal already has an entry, this becomes a non-canonical reference,
+  // just drop Idx to 0 to signify this.
+  if (NewIdx != Idx) {
+    std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
+    Entry.first.Idx = Entry.second.Idx = 0;
+  }
+}
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
new file mode 100644
index 000000000000..a1160cdc83b1
--- /dev/null
+++ b/lib/IR/Dominators.cpp
@@ -0,0 +1,302 @@
+//===- Dominators.cpp - Dominator Calculation -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators.  Postdominators are available in libanalysis, but are not
+// included in libvmcore, because it's not needed.  Forward dominators are
+// needed to support the Verifier pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyDomInfo = true;
+#else
+static bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+               cl::desc("Verify dominator info (time consuming)"));
+
+bool BasicBlockEdge::isSingleEdge() const {
+  const TerminatorInst *TI = Start->getTerminator();
+  unsigned NumEdgesToEnd = 0;
+  for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
+    if (TI->getSuccessor(i) == End)
+      ++NumEdgesToEnd;
+    if (NumEdgesToEnd >= 2)
+      return false;
+  }
+  assert(NumEdgesToEnd == 1);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//  DominatorTree Implementation
+//===----------------------------------------------------------------------===//
+//
+// Provide public access to DominatorTree information.  Implementation details
+// can be found in DominatorInternals.h.
+//
+//===----------------------------------------------------------------------===//
+
+TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
+TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
+
+char DominatorTree::ID = 0;
+INITIALIZE_PASS(DominatorTree, "domtree",
+                "Dominator Tree Construction", true, true)
+
+bool DominatorTree::runOnFunction(Function &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+void DominatorTree::verifyAnalysis() const {
+  if (!VerifyDomInfo) return;
+
+  Function &F = *getRoot()->getParent();
+
+  DominatorTree OtherDT;
+  OtherDT.getBase().recalculate(F);
+  if (compare(OtherDT)) {
+    errs() << "DominatorTree is not up to date!\nComputed:\n";
+    print(errs());
+    errs() << "\nActual:\n";
+    OtherDT.print(errs());
+    abort();
+  }
+}
+
+void DominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+// dominates - Return true if Def dominates a use in User. This performs
+// the special checks necessary if Def and User are in the same basic block.
+// Note that Def doesn't dominate a use in Def itself!
+bool DominatorTree::dominates(const Instruction *Def,
+                              const Instruction *User) const {
+  const BasicBlock *UseBB = User->getParent();
+  const BasicBlock *DefBB = Def->getParent();
+
+  // Any unreachable use is dominated, even if Def == User.
+  if (!isReachableFromEntry(UseBB))
+    return true;
+
+  // Unreachable definitions don't dominate anything.
+  if (!isReachableFromEntry(DefBB))
+    return false;
+
+  // An instruction doesn't dominate a use in itself.
+  if (Def == User)
+    return false;
+
+  // The value defined by an invoke dominates an instruction only if
+  // it dominates every instruction in UseBB.
+  // A PHI is dominated only if the instruction dominates every possible use
+  // in the UseBB.
+  if (isa<InvokeInst>(Def) || isa<PHINode>(User))
+    return dominates(Def, UseBB);
+
+  if (DefBB != UseBB)
+    return dominates(DefBB, UseBB);
+
+  // Loop through the basic block until we find Def or User.
+  BasicBlock::const_iterator I = DefBB->begin();
+  for (; &*I != Def && &*I != User; ++I)
+    /*empty*/;
+
+  return &*I == Def;
+}
+
+// true if Def would dominate a use in any instruction in UseBB.
+// note that dominates(Def, Def->getParent()) is false.
+bool DominatorTree::dominates(const Instruction *Def,
+                              const BasicBlock *UseBB) const {
+  const BasicBlock *DefBB = Def->getParent();
+
+  // Any unreachable use is dominated, even if DefBB == UseBB.
+  if (!isReachableFromEntry(UseBB))
+    return true;
+
+  // Unreachable definitions don't dominate anything.
+  if (!isReachableFromEntry(DefBB))
+    return false;
+
+  if (DefBB == UseBB)
+    return false;
+
+  const InvokeInst *II = dyn_cast<InvokeInst>(Def);
+  if (!II)
+    return dominates(DefBB, UseBB);
+
+  // Invoke results are only usable in the normal destination, not in the
+  // exceptional destination.
+  BasicBlock *NormalDest = II->getNormalDest();
+  BasicBlockEdge E(DefBB, NormalDest);
+  return dominates(E, UseBB);
+}
+
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
+                              const BasicBlock *UseBB) const {
+  // Assert that we have a single edge. We could handle them by simply
+  // returning false, but since isSingleEdge is linear on the number of
+  // edges, the callers can normally handle them more efficiently.
+  assert(BBE.isSingleEdge());
+
+  // If the BB the edge ends in doesn't dominate the use BB, then the
+  // edge also doesn't.
+  const BasicBlock *Start = BBE.getStart();
+  const BasicBlock *End = BBE.getEnd();
+  if (!dominates(End, UseBB))
+    return false;
+
+  // Simple case: if the end BB has a single predecessor, the fact that it
+  // dominates the use block implies that the edge also does.
+  if (End->getSinglePredecessor())
+    return true;
+
+  // The normal edge from the invoke is critical. Conceptually, what we would
+  // like to do is split it and check if the new block dominates the use.
+  // With X being the new block, the graph would look like:
+  //
+  //        DefBB
+  //          /\      .  .
+  //         /  \     .  .
+  //        /    \    .  .
+  //       /      \   |  |
+  //      A        X  B  C
+  //      |         \ | /
+  //      .          \|/
+  //      .      NormalDest
+  //      .
+  //
+  // Given the definition of dominance, NormalDest is dominated by X iff X
+  // dominates all of NormalDest's predecessors (X, B, C in the example). X
+  // trivially dominates itself, so we only have to find if it dominates the
+  // other predecessors. Since the only way out of X is via NormalDest, X can
+  // only properly dominate a node if NormalDest dominates that node too.
+  for (const_pred_iterator PI = pred_begin(End), E = pred_end(End);
+       PI != E; ++PI) {
+    const BasicBlock *BB = *PI;
+    if (BB == Start)
+      continue;
+
+    if (!dominates(End, BB))
+      return false;
+  }
+  return true;
+}
+
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
+                              const Use &U) const {
+  // Assert that we have a single edge. We could handle them by simply
+  // returning false, but since isSingleEdge is linear on the number of
+  // edges, the callers can normally handle them more efficiently.
+  assert(BBE.isSingleEdge());
+
+  Instruction *UserInst = cast<Instruction>(U.getUser());
+  // A PHI in the end of the edge is dominated by it.
+  PHINode *PN = dyn_cast<PHINode>(UserInst);
+  if (PN && PN->getParent() == BBE.getEnd() &&
+      PN->getIncomingBlock(U) == BBE.getStart())
+    return true;
+
+  // Otherwise use the edge-dominates-block query, which
+  // handles the crazy critical edge cases properly.
+  const BasicBlock *UseBB;
+  if (PN)
+    UseBB = PN->getIncomingBlock(U);
+  else
+    UseBB = UserInst->getParent();
+  return dominates(BBE, UseBB);
+}
+
+bool DominatorTree::dominates(const Instruction *Def,
+                              const Use &U) const {
+  Instruction *UserInst = cast<Instruction>(U.getUser());
+  const BasicBlock *DefBB = Def->getParent();
+
+  // Determine the block in which the use happens. PHI nodes use
+  // their operands on edges; simulate this by thinking of the use
+  // happening at the end of the predecessor block.
+  const BasicBlock *UseBB;
+  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+    UseBB = PN->getIncomingBlock(U);
+  else
+    UseBB = UserInst->getParent();
+
+  // Any unreachable use is dominated, even if Def == User.
+  if (!isReachableFromEntry(UseBB))
+    return true;
+
+  // Unreachable definitions don't dominate anything.
+  if (!isReachableFromEntry(DefBB))
+    return false;
+
+  // Invoke instructions define their return values on the edges
+  // to their normal successors, so we have to handle them specially.
+  // Among other things, this means they don't dominate anything in
+  // their own block, except possibly a phi, so we don't need to
+  // walk the block in any case.
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
+    BasicBlock *NormalDest = II->getNormalDest();
+    BasicBlockEdge E(DefBB, NormalDest);
+    return dominates(E, U);
+  }
+
+  // If the def and use are in different blocks, do a simple CFG dominator
+  // tree query.
+  if (DefBB != UseBB)
+    return dominates(DefBB, UseBB);
+
+  // Ok, def and use are in the same block. If the def is an invoke, it
+  // doesn't dominate anything in the block. If it's a PHI, it dominates
+  // everything in the block.
+  if (isa<PHINode>(UserInst))
+    return true;
+
+  // Otherwise, just loop through the basic block until we find Def or User.
+  BasicBlock::const_iterator I = DefBB->begin();
+  for (; &*I != Def && &*I != UserInst; ++I)
+    /*empty*/;
+
+  return &*I != UserInst;
+}
+
+bool DominatorTree::isReachableFromEntry(const Use &U) const {
+  Instruction *I = dyn_cast<Instruction>(U.getUser());
+
+  // ConstantExprs aren't really reachable from the entry block, but they
+  // don't need to be treated like unreachable code either.
+  if (!I) return true;
+
+  // PHI nodes use their operands on their incoming edges.
+  if (PHINode *PN = dyn_cast<PHINode>(I))
+    return isReachableFromEntry(PN->getIncomingBlock(U));
+
+  // Everything else uses their operands in their own block.
+  return isReachableFromEntry(I->getParent());
+}
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
new file mode 100644
index 000000000000..1e72b90a13ce
--- /dev/null
+++ b/lib/IR/Function.cpp
@@ -0,0 +1,707 @@
+//===-- Function.cpp - Implement the Global object classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "LLVMContextImpl.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/StringPool.h"
+#include "llvm/Support/Threading.h"
+using namespace llvm;
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Argument, Function>;
+template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+
+//===----------------------------------------------------------------------===//
+// Argument Implementation
+//===----------------------------------------------------------------------===//
+
+void Argument::anchor() { }
+
+Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
+  : Value(Ty, Value::ArgumentVal) {
+  Parent = 0;
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (Par)
+    Par->getArgumentList().push_back(this);
+  setName(Name);
+}
+
+void Argument::setParent(Function *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+/// getArgNo - Return the index of this formal argument in its containing
+/// function.  For example in "void foo(int a, float b)" a is 0 and b is 1.
+unsigned Argument::getArgNo() const {
+  const Function *F = getParent();
+  assert(F && "Argument is not in a function");
+
+  Function::const_arg_iterator AI = F->arg_begin();
+  unsigned ArgIdx = 0;
+  for (; &*AI != this; ++AI)
+    ++ArgIdx;
+
+  return ArgIdx;
+}
+
+/// hasByValAttr - Return true if this argument has the byval attribute on it
+/// in its containing function.
+bool Argument::hasByValAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->getAttributes().
+    hasAttribute(getArgNo()+1, Attribute::ByVal);
+}
+
+unsigned Argument::getParamAlignment() const {
+  assert(getType()->isPointerTy() && "Only pointers have alignments");
+  return getParent()->getParamAlignment(getArgNo()+1);
+
+}
+
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->getAttributes().
+    hasAttribute(getArgNo()+1, Attribute::Nest);
+}
+
+/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+/// it in its containing function.
+bool Argument::hasNoAliasAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->getAttributes().
+    hasAttribute(getArgNo()+1, Attribute::NoAlias);
+}
+
+/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
+/// on it in its containing function.
+bool Argument::hasNoCaptureAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->getAttributes().
+    hasAttribute(getArgNo()+1, Attribute::NoCapture);
+}
+
+/// hasSRetAttr - Return true if this argument has the sret attribute on
+/// it in its containing function.
+bool Argument::hasStructRetAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  if (this != getParent()->arg_begin())
+    return false; // StructRet param must be first param
+  return getParent()->getAttributes().
+    hasAttribute(1, Attribute::StructRet);
+}
+
+/// addAttr - Add attributes to an argument.
+void Argument::addAttr(AttributeSet AS) {
+  assert(AS.getNumSlots() <= 1 &&
+         "Trying to add more than one attribute set to an argument!");
+  AttrBuilder B(AS, AS.getSlotIndex(0));
+  getParent()->addAttributes(getArgNo() + 1,
+                             AttributeSet::get(Parent->getContext(),
+                                               getArgNo() + 1, B));
+}
+
+/// removeAttr - Remove attributes from an argument.
+void Argument::removeAttr(AttributeSet AS) {
+  assert(AS.getNumSlots() <= 1 &&
+         "Trying to remove more than one attribute set from an argument!");
+  AttrBuilder B(AS, AS.getSlotIndex(0));
+  getParent()->removeAttributes(getArgNo() + 1,
+                                AttributeSet::get(Parent->getContext(),
+                                                  getArgNo() + 1, B));
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods in Function
+//===----------------------------------------------------------------------===//
+
+LLVMContext &Function::getContext() const {
+  return getType()->getContext();
+}
+
+FunctionType *Function::getFunctionType() const {
+  return cast<FunctionType>(getType()->getElementType());
+}
+
+bool Function::isVarArg() const {
+  return getFunctionType()->isVarArg();
+}
+
+Type *Function::getReturnType() const {
+  return getFunctionType()->getReturnType();
+}
+
+void Function::removeFromParent() {
+  getParent()->getFunctionList().remove(this);
+}
+
+void Function::eraseFromParent() {
+  getParent()->getFunctionList().erase(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Function Implementation
+//===----------------------------------------------------------------------===//
+
+Function::Function(FunctionType *Ty, LinkageTypes Linkage,
+                   const Twine &name, Module *ParentModule)
+  : GlobalValue(PointerType::getUnqual(Ty),
+                Value::FunctionVal, 0, 0, Linkage, name) {
+  assert(FunctionType::isValidReturnType(getReturnType()) &&
+         "invalid return type");
+  SymTab = new ValueSymbolTable();
+
+  // If the function has arguments, mark them as lazily built.
+  if (Ty->getNumParams())
+    setValueSubclassData(1);   // Set the "has lazy arguments" bit.
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (ParentModule)
+    ParentModule->getFunctionList().push_back(this);
+
+  // Ensure intrinsics have the right parameter attributes.
+  if (unsigned IID = getIntrinsicID())
+    setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
+
+}
+
+Function::~Function() {
+  dropAllReferences();    // After this it is safe to delete instructions.
+
+  // Delete all of the method arguments and unlink from symbol table...
+  ArgumentList.clear();
+  delete SymTab;
+
+  // Remove the function from the on-the-side GC table.
+  clearGC();
+
+  // Remove the intrinsicID from the Cache.
+  if (getValueName() && isIntrinsic())
+    getContext().pImpl->IntrinsicIDCache.erase(this);
+}
+
+void Function::BuildLazyArguments() const {
+  // Create the arguments vector, all arguments start out unnamed.
+  FunctionType *FT = getFunctionType();
+  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+    assert(!FT->getParamType(i)->isVoidTy() &&
+           "Cannot have void typed arguments!");
+    ArgumentList.push_back(new Argument(FT->getParamType(i)));
+  }
+
+  // Clear the lazy arguments bit.
+  unsigned SDC = getSubclassDataFromValue();
+  const_cast<Function*>(this)->setValueSubclassData(SDC &= ~1);
+}
+
+size_t Function::arg_size() const {
+  return getFunctionType()->getNumParams();
+}
+bool Function::arg_empty() const {
+  return getFunctionType()->getNumParams() == 0;
+}
+
+void Function::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+// dropAllReferences() - This function causes all the subinstructions to "let
+// go" of all references that they are maintaining.  This allows one to
+// 'delete' a whole class at a time, even though there may be circular
+// references... first all references are dropped, and all use counts go to
+// zero.  Then everything is deleted for real.  Note that no operations are
+// valid on an object that has "dropped all references", except operator
+// delete.
+//
+void Function::dropAllReferences() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+
+  // Delete all basic blocks. They are now unused, except possibly by
+  // blockaddresses, but BasicBlock's destructor takes care of those.
+  while (!BasicBlocks.empty())
+    BasicBlocks.begin()->eraseFromParent();
+}
+
+void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addAttribute(getContext(), i, attr);
+  setAttributes(PAL);
+}
+
+void Function::addAttributes(unsigned i, AttributeSet attrs) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addAttributes(getContext(), i, attrs);
+  setAttributes(PAL);
+}
+
+void Function::removeAttributes(unsigned i, AttributeSet attrs) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.removeAttributes(getContext(), i, attrs);
+  setAttributes(PAL);
+}
+
+// Maintain the GC name for each function in an on-the-side table. This saves
+// allocating an additional word in Function for programs which do not use GC
+// (i.e., most programs) at the cost of increased overhead for clients which do
+// use GC.
+static DenseMap<const Function*,PooledStringPtr> *GCNames;
+static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
+
+bool Function::hasGC() const {
+  sys::SmartScopedReader<true> Reader(*GCLock);
+  return GCNames && GCNames->count(this);
+}
+
+const char *Function::getGC() const {
+  assert(hasGC() && "Function has no collector");
+  sys::SmartScopedReader<true> Reader(*GCLock);
+  return *(*GCNames)[this];
+}
+
+void Function::setGC(const char *Str) {
+  sys::SmartScopedWriter<true> Writer(*GCLock);
+  if (!GCNamePool)
+    GCNamePool = new StringPool();
+  if (!GCNames)
+    GCNames = new DenseMap<const Function*,PooledStringPtr>();
+  (*GCNames)[this] = GCNamePool->intern(Str);
+}
+
+void Function::clearGC() {
+  sys::SmartScopedWriter<true> Writer(*GCLock);
+  if (GCNames) {
+    GCNames->erase(this);
+    if (GCNames->empty()) {
+      delete GCNames;
+      GCNames = 0;
+      if (GCNamePool->empty()) {
+        delete GCNamePool;
+        GCNamePool = 0;
+      }
+    }
+  }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a Function) from the Function Src to this one.
+void Function::copyAttributesFrom(const GlobalValue *Src) {
+  assert(isa<Function>(Src) && "Expected a Function!");
+  GlobalValue::copyAttributesFrom(Src);
+  const Function *SrcF = cast<Function>(Src);
+  setCallingConv(SrcF->getCallingConv());
+  setAttributes(SrcF->getAttributes());
+  if (SrcF->hasGC())
+    setGC(SrcF->getGC());
+  else
+    clearGC();
+}
+
+/// getIntrinsicID - This method returns the ID number of the specified
+/// function, or Intrinsic::not_intrinsic if the function is not an
+/// intrinsic, or if the pointer is null.  This value is always defined to be
+/// zero to allow easy checking for whether a function is intrinsic or not.  The
+/// particular intrinsic functions which correspond to this value are defined in
+/// llvm/Intrinsics.h.  Results are cached in the LLVM context, subsequent
+/// requests for the same ID return results much faster from the cache.
+///
+unsigned Function::getIntrinsicID() const {
+  const ValueName *ValName = this->getValueName();
+  if (!ValName || !isIntrinsic())
+    return 0;
+
+  LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache =
+    getContext().pImpl->IntrinsicIDCache;
+  if (!IntrinsicIDCache.count(this)) {
+    unsigned Id = lookupIntrinsicID();
+    IntrinsicIDCache[this]=Id;
+    return Id;
+  }
+  return IntrinsicIDCache[this];
+}
+
+/// This private method does the actual lookup of an intrinsic ID when the query
+/// could not be answered from the cache.
+unsigned Function::lookupIntrinsicID() const {
+  const ValueName *ValName = this->getValueName();
+  unsigned Len = ValName->getKeyLength();
+  const char *Name = ValName->getKeyData();
+
+#define GET_FUNCTION_RECOGNIZER
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_FUNCTION_RECOGNIZER
+
+  return 0;
+}
+
+std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
+  assert(id < num_intrinsics && "Invalid intrinsic ID!");
+  static const char * const Table[] = {
+    "not_intrinsic",
+#define GET_INTRINSIC_NAME_TABLE
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+  if (Tys.empty())
+    return Table[id];
+  std::string Result(Table[id]);
+  for (unsigned i = 0; i < Tys.size(); ++i) {
+    if (PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+      Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) +
+                EVT::getEVT(PTyp->getElementType()).getEVTString();
+    }
+    else if (Tys[i])
+      Result += "." + EVT::getEVT(Tys[i]).getEVTString();
+  }
+  return Result;
+}
+
+
+/// IIT_Info - These are enumerators that describe the entries returned by the
+/// getIntrinsicInfoTableEntries function.
+///
+/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
+enum IIT_Info {
+  // Common values should be encoded with 0-15.
+  IIT_Done = 0,
+  IIT_I1   = 1,
+  IIT_I8   = 2,
+  IIT_I16  = 3,
+  IIT_I32  = 4,
+  IIT_I64  = 5,
+  IIT_F16  = 6,
+  IIT_F32  = 7,
+  IIT_F64  = 8,
+  IIT_V2   = 9,
+  IIT_V4   = 10,
+  IIT_V8   = 11,
+  IIT_V16  = 12,
+  IIT_V32  = 13,
+  IIT_PTR  = 14,
+  IIT_ARG  = 15,
+
+  // Values from 16+ are only encodable with the inefficient encoding.
+  IIT_MMX  = 16,
+  IIT_METADATA = 17,
+  IIT_EMPTYSTRUCT = 18,
+  IIT_STRUCT2 = 19,
+  IIT_STRUCT3 = 20,
+  IIT_STRUCT4 = 21,
+  IIT_STRUCT5 = 22,
+  IIT_EXTEND_VEC_ARG = 23,
+  IIT_TRUNC_VEC_ARG = 24,
+  IIT_ANYPTR = 25
+};
+
+
+static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
+                      SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) {
+  IIT_Info Info = IIT_Info(Infos[NextElt++]);
+  unsigned StructElts = 2;
+  using namespace Intrinsic;
+
+  switch (Info) {
+  case IIT_Done:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0));
+    return;
+  case IIT_MMX:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
+    return;
+  case IIT_METADATA:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0));
+    return;
+  case IIT_F16:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Half, 0));
+    return;
+  case IIT_F32:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0));
+    return;
+  case IIT_F64:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Double, 0));
+    return;
+  case IIT_I1:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
+    return;
+  case IIT_I8:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
+    return;
+  case IIT_I16:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer,16));
+    return;
+  case IIT_I32:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 32));
+    return;
+  case IIT_I64:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
+    return;
+  case IIT_V2:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_V4:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 4));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_V8:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 8));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_V16:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 16));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_V32:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_PTR:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_ANYPTR: {  // [ANYPTR addrspace, subtype]
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer,
+                                             Infos[NextElt++]));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  }
+  case IIT_ARG: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
+    return;
+  }
+  case IIT_EXTEND_VEC_ARG: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::ExtendVecArgument,
+                                             ArgInfo));
+    return;
+  }
+  case IIT_TRUNC_VEC_ARG: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::TruncVecArgument,
+                                             ArgInfo));
+    return;
+  }
+  case IIT_EMPTYSTRUCT:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
+    return;
+  case IIT_STRUCT5: ++StructElts; // FALL THROUGH.
+  case IIT_STRUCT4: ++StructElts; // FALL THROUGH.
+  case IIT_STRUCT3: ++StructElts; // FALL THROUGH.
+  case IIT_STRUCT2: {
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts));
+
+    for (unsigned i = 0; i != StructElts; ++i)
+      DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  }
+  }
+  llvm_unreachable("unhandled");
+}
+
+
+#define GET_INTRINSIC_GENERATOR_GLOBAL
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_GENERATOR_GLOBAL
+
+void Intrinsic::getIntrinsicInfoTableEntries(ID id,
+                                             SmallVectorImpl<IITDescriptor> &T){
+  // Check to see if the intrinsic's type was expressible by the table.
+  unsigned TableVal = IIT_Table[id-1];
+
+  // Decode the TableVal into an array of IITValues.
+  SmallVector<unsigned char, 8> IITValues;
+  ArrayRef<unsigned char> IITEntries;
+  unsigned NextElt = 0;
+  if ((TableVal >> 31) != 0) {
+    // This is an offset into the IIT_LongEncodingTable.
+    IITEntries = IIT_LongEncodingTable;
+
+    // Strip sentinel bit.
+    NextElt = (TableVal << 1) >> 1;
+  } else {
+    // Decode the TableVal into an array of IITValues.  If the entry was encoded
+    // into a single word in the table itself, decode it now.
+    do {
+      IITValues.push_back(TableVal & 0xF);
+      TableVal >>= 4;
+    } while (TableVal);
+
+    IITEntries = IITValues;
+    NextElt = 0;
+  }
+
+  // Okay, decode the table into the output vector of IITDescriptors.
+  DecodeIITType(NextElt, IITEntries, T);
+  while (NextElt != IITEntries.size() && IITEntries[NextElt] != 0)
+    DecodeIITType(NextElt, IITEntries, T);
+}
+
+
+static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
+                             ArrayRef<Type*> Tys, LLVMContext &Context) {
+  using namespace Intrinsic;
+  IITDescriptor D = Infos.front();
+  Infos = Infos.slice(1);
+
+  switch (D.Kind) {
+  case IITDescriptor::Void: return Type::getVoidTy(Context);
+  case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
+  case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
+  case IITDescriptor::Half: return Type::getHalfTy(Context);
+  case IITDescriptor::Float: return Type::getFloatTy(Context);
+  case IITDescriptor::Double: return Type::getDoubleTy(Context);
+
+  case IITDescriptor::Integer:
+    return IntegerType::get(Context, D.Integer_Width);
+  case IITDescriptor::Vector:
+    return VectorType::get(DecodeFixedType(Infos, Tys, Context),D.Vector_Width);
+  case IITDescriptor::Pointer:
+    return PointerType::get(DecodeFixedType(Infos, Tys, Context),
+                            D.Pointer_AddressSpace);
+  case IITDescriptor::Struct: {
+    Type *Elts[5];
+    assert(D.Struct_NumElements <= 5 && "Can't handle this yet");
+    for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
+      Elts[i] = DecodeFixedType(Infos, Tys, Context);
+    return StructType::get(Context, ArrayRef<Type*>(Elts,D.Struct_NumElements));
+  }
+
+  case IITDescriptor::Argument:
+    return Tys[D.getArgumentNumber()];
+  case IITDescriptor::ExtendVecArgument:
+    return VectorType::getExtendedElementVectorType(cast<VectorType>(
+                                                  Tys[D.getArgumentNumber()]));
+
+  case IITDescriptor::TruncVecArgument:
+    return VectorType::getTruncatedElementVectorType(cast<VectorType>(
+                                                  Tys[D.getArgumentNumber()]));
+  }
+  llvm_unreachable("unhandled");
+}
+
+
+
+FunctionType *Intrinsic::getType(LLVMContext &Context,
+                                 ID id, ArrayRef<Type*> Tys) {
+  SmallVector<IITDescriptor, 8> Table;
+  getIntrinsicInfoTableEntries(id, Table);
+
+  ArrayRef<IITDescriptor> TableRef = Table;
+  Type *ResultTy = DecodeFixedType(TableRef, Tys, Context);
+
+  SmallVector<Type*, 8> ArgTys;
+  while (!TableRef.empty())
+    ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context));
+
+  return FunctionType::get(ResultTy, ArgTys, false);
+}
+
+bool Intrinsic::isOverloaded(ID id) {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+/// This defines the "Intrinsic::getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
+  // There can never be multiple globals with the same name of different types,
+  // because intrinsics must be a specific type.
+  return
+    cast<Function>(M->getOrInsertFunction(getName(id, Tys),
+                                          getType(M->getContext(), id, Tys)));
+}
+
+// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+/// hasAddressTaken - returns true if there are any uses of this function
+/// other than direct calls or invokes to it.
+bool Function::hasAddressTaken(const User* *PutOffender) const {
+  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    const User *U = *I;
+    if (isa<BlockAddress>(U))
+      continue;
+    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+      return PutOffender ? (*PutOffender = U, true) : true;
+    ImmutableCallSite CS(cast<Instruction>(U));
+    if (!CS.isCallee(I))
+      return PutOffender ? (*PutOffender = U, true) : true;
+  }
+  return false;
+}
+
+bool Function::isDefTriviallyDead() const {
+  // Check the linkage
+  if (!hasLinkOnceLinkage() && !hasLocalLinkage() &&
+      !hasAvailableExternallyLinkage())
+    return false;
+
+  // Check if the function is used by anything other than a blockaddress.
+  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+    if (!isa<BlockAddress>(*I))
+      return false;
+
+  return true;
+}
+
+/// callsFunctionThatReturnsTwice - Return true if the function has a call to
+/// setjmp or other function that gcc recognizes as "returning twice".
+bool Function::callsFunctionThatReturnsTwice() const {
+  for (const_inst_iterator
+         I = inst_begin(this), E = inst_end(this); I != E; ++I) {
+    const CallInst* callInst = dyn_cast<CallInst>(&*I);
+    if (!callInst)
+      continue;
+    if (callInst->canReturnTwice())
+      return true;
+  }
+
+  return false;
+}
+
diff --git a/lib/VMCore/GCOV.cpp b/lib/IR/GCOV.cpp
index ea2f0a6d556f..ea2f0a6d556f 100644
--- a/lib/VMCore/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
diff --git a/lib/VMCore/GVMaterializer.cpp b/lib/IR/GVMaterializer.cpp
index f77a9c908d54..f77a9c908d54 100644
--- a/lib/VMCore/GVMaterializer.cpp
+++ b/lib/IR/GVMaterializer.cpp
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
new file mode 100644
index 000000000000..6d547f3edf3f
--- /dev/null
+++ b/lib/IR/Globals.cpp
@@ -0,0 +1,269 @@
+//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GlobalValue & GlobalVariable classes for the IR
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                            GlobalValue Class
+//===----------------------------------------------------------------------===//
+
+bool GlobalValue::isMaterializable() const {
+  return getParent() && getParent()->isMaterializable(this);
+}
+bool GlobalValue::isDematerializable() const {
+  return getParent() && getParent()->isDematerializable(this);
+}
+bool GlobalValue::Materialize(std::string *ErrInfo) {
+  return getParent()->Materialize(this, ErrInfo);
+}
+void GlobalValue::Dematerialize() {
+  getParent()->Dematerialize(this);
+}
+
+/// Override destroyConstant to make sure it doesn't get called on
+/// GlobalValue's because they shouldn't be treated like other constants.
+void GlobalValue::destroyConstant() {
+  llvm_unreachable("You can't GV->destroyConstant()!");
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalValue) from the GlobalValue Src to this one.
+void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
+  setAlignment(Src->getAlignment());
+  setSection(Src->getSection());
+  setVisibility(Src->getVisibility());
+  setUnnamedAddr(Src->hasUnnamedAddr());
+}
+
+void GlobalValue::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  Alignment = Log2_32(Align) + 1;
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool GlobalValue::isDeclaration() const {
+  // Globals are definitions if they have an initializer.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return GV->getNumOperands() == 0;
+
+  // Functions are definitions if they have a body.
+  if (const Function *F = dyn_cast<Function>(this))
+    return F->empty();
+
+  // Aliases are always definitions.
+  assert(isa<GlobalAlias>(this));
+  return false;
+}
+  
+//===----------------------------------------------------------------------===//
+// GlobalVariable Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
+                               Constant *InitVal,
+                               const Twine &Name, ThreadLocalMode TLMode,
+                               unsigned AddressSpace,
+                               bool isExternallyInitialized)
+  : GlobalValue(PointerType::get(Ty, AddressSpace),
+                Value::GlobalVariableVal,
+                OperandTraits<GlobalVariable>::op_begin(this),
+                InitVal != 0, Link, Name),
+    isConstantGlobal(constant), threadLocalMode(TLMode),
+    isExternallyInitializedConstant(isExternallyInitialized) {
+  if (InitVal) {
+    assert(InitVal->getType() == Ty &&
+           "Initializer should be the same type as the GlobalVariable!");
+    Op<0>() = InitVal;
+  }
+
+  LeakDetector::addGarbageObject(this);
+}
+
+GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
+                               LinkageTypes Link, Constant *InitVal,
+                               const Twine &Name,
+                               GlobalVariable *Before, ThreadLocalMode TLMode,
+                               unsigned AddressSpace,
+                               bool isExternallyInitialized)
+  : GlobalValue(PointerType::get(Ty, AddressSpace),
+                Value::GlobalVariableVal,
+                OperandTraits<GlobalVariable>::op_begin(this),
+                InitVal != 0, Link, Name),
+    isConstantGlobal(constant), threadLocalMode(TLMode),
+    isExternallyInitializedConstant(isExternallyInitialized) {
+  if (InitVal) {
+    assert(InitVal->getType() == Ty &&
+           "Initializer should be the same type as the GlobalVariable!");
+    Op<0>() = InitVal;
+  }
+  
+  LeakDetector::addGarbageObject(this);
+  
+  if (Before)
+    Before->getParent()->getGlobalList().insert(Before, this);
+  else
+    M.getGlobalList().push_back(this);
+}
+
+void GlobalVariable::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalVariable::removeFromParent() {
+  getParent()->getGlobalList().remove(this);
+}
+
+void GlobalVariable::eraseFromParent() {
+  getParent()->getGlobalList().erase(this);
+}
+
+void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  // If you call this, then you better know this GVar has a constant
+  // initializer worth replacing. Enforce that here.
+  assert(getNumOperands() == 1 &&
+         "Attempt to replace uses of Constants on a GVar with no initializer");
+
+  // And, since you know it has an initializer, the From value better be
+  // the initializer :)
+  assert(getOperand(0) == From &&
+         "Attempt to replace wrong constant initializer in GVar");
+
+  // And, you better have a constant for the replacement value
+  assert(isa<Constant>(To) &&
+         "Attempt to replace GVar initializer with non-constant");
+
+  // Okay, preconditions out of the way, replace the constant initializer.
+  this->setOperand(0, cast<Constant>(To));
+}
+
+void GlobalVariable::setInitializer(Constant *InitVal) {
+  if (InitVal == 0) {
+    if (hasInitializer()) {
+      Op<0>().set(0);
+      NumOperands = 0;
+    }
+  } else {
+    assert(InitVal->getType() == getType()->getElementType() &&
+           "Initializer type must match GlobalVariable type");
+    if (!hasInitializer())
+      NumOperands = 1;
+    Op<0>().set(InitVal);
+  }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalVariable) from the GlobalVariable Src to this one.
+void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
+  assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
+  GlobalValue::copyAttributesFrom(Src);
+  const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
+  setThreadLocal(SrcVar->isThreadLocal());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalAlias Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link,
+                         const Twine &Name, Constant* aliasee,
+                         Module *ParentModule)
+  : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+  LeakDetector::addGarbageObject(this);
+
+  if (aliasee)
+    assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
+  Op<0>() = aliasee;
+
+  if (ParentModule)
+    ParentModule->getAliasList().push_back(this);
+}
+
+void GlobalAlias::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalAlias::removeFromParent() {
+  getParent()->getAliasList().remove(this);
+}
+
+void GlobalAlias::eraseFromParent() {
+  getParent()->getAliasList().erase(this);
+}
+
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+  assert((!Aliasee || Aliasee->getType() == getType()) &&
+         "Alias and aliasee types should match!");
+  
+  setOperand(0, Aliasee);
+}
+
+const GlobalValue *GlobalAlias::getAliasedGlobal() const {
+  const Constant *C = getAliasee();
+  if (C == 0) return 0;
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GV;
+
+  const ConstantExpr *CE = cast<ConstantExpr>(C);
+  assert((CE->getOpcode() == Instruction::BitCast || 
+          CE->getOpcode() == Instruction::GetElementPtr) &&
+         "Unsupported aliasee");
+  
+  return cast<GlobalValue>(CE->getOperand(0));
+}
+
+const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
+  SmallPtrSet<const GlobalValue*, 3> Visited;
+
+  // Check if we need to stop early.
+  if (stopOnWeak && mayBeOverridden())
+    return this;
+
+  const GlobalValue *GV = getAliasedGlobal();
+  Visited.insert(GV);
+
+  // Iterate over aliasing chain, stopping on weak alias if necessary.
+  while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+    if (stopOnWeak && GA->mayBeOverridden())
+      break;
+
+    GV = GA->getAliasedGlobal();
+
+    if (!Visited.insert(GV))
+      return 0;
+  }
+
+  return GV;
+}
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
new file mode 100644
index 000000000000..435e54f0ea2a
--- /dev/null
+++ b/lib/IR/IRBuilder.cpp
@@ -0,0 +1,153 @@
+//===---- IRBuilder.cpp - Builder for LLVM Instrs -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+using namespace llvm;
+
+/// CreateGlobalString - Make a new global variable with an initializer that
+/// has array of i8 type filled in with the nul terminated string value
+/// specified.  If Name is specified, it is the name of the global variable
+/// created.
+Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+  Constant *StrConstant = ConstantDataArray::getString(Context, Str);
+  Module &M = *BB->getParent()->getParent();
+  GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
+                                          true, GlobalValue::PrivateLinkage,
+                                          StrConstant);
+  GV->setName(Name);
+  GV->setUnnamedAddr(true);
+  return GV;
+}
+
+Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+  assert(BB && BB->getParent() && "No current function!");
+  return BB->getParent()->getReturnType();
+}
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+  PointerType *PT = cast<PointerType>(Ptr->getType());
+  if (PT->getElementType()->isIntegerTy(8))
+    return Ptr;
+  
+  // Otherwise, we need to insert a bitcast.
+  PT = getInt8PtrTy(PT->getAddressSpace());
+  BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+  BB->getInstList().insert(InsertPt, BCI);
+  SetInstDebugLocation(BCI);
+  return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+                                  IRBuilderBase *Builder) {
+  CallInst *CI = CallInst::Create(Callee, Ops, "");
+  Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+  Builder->SetInstDebugLocation(CI);
+  return CI;  
+}
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Ptr = getCastedInt8PtrValue(Ptr);
+  Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+  Type *Tys[] = { Ptr->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+  // Set the TBAA Struct info if present.
+  if (TBAAStructTag)
+    CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
+  
+  return CI;  
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+              bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+  
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
+
+CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
+  assert(isa<PointerType>(Ptr->getType()) &&
+         "lifetime.start only applies to pointers.");
+  Ptr = getCastedInt8PtrValue(Ptr);
+  if (!Size)
+    Size = getInt64(-1);
+  else
+    assert(Size->getType() == getInt64Ty() &&
+           "lifetime.start requires the size to be an i64");
+  Value *Ops[] = { Size, Ptr };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
+  return createCallHelper(TheFn, Ops, this);
+}
+
+CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
+  assert(isa<PointerType>(Ptr->getType()) &&
+         "lifetime.end only applies to pointers.");
+  Ptr = getCastedInt8PtrValue(Ptr);
+  if (!Size)
+    Size = getInt64(-1);
+  else
+    assert(Size->getType() == getInt64Ty() &&
+           "lifetime.end requires the size to be an i64");
+  Value *Ops[] = { Size, Ptr };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
+  return createCallHelper(TheFn, Ops, this);
+}
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
new file mode 100644
index 000000000000..9f2a9fea4b93
--- /dev/null
+++ b/lib/IR/InlineAsm.cpp
@@ -0,0 +1,295 @@
+//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InlineAsm class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/InlineAsm.h"
+#include "ConstantsContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() {
+}
+
+
+InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString,
+                          StringRef Constraints, bool hasSideEffects,
+                          bool isAlignStack, AsmDialect asmDialect) {
+  InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack,
+                       asmDialect);
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+}
+
+InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString,
+                     const std::string &constraints, bool hasSideEffects,
+                     bool isAlignStack, AsmDialect asmDialect)
+  : Value(Ty, Value::InlineAsmVal),
+    AsmString(asmString), Constraints(constraints),
+    HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
+    Dialect(asmDialect) {
+
+  // Do various checks on the constraint string and type.
+  assert(Verify(getFunctionType(), constraints) &&
+         "Function type not legal for constraints!");
+}
+
+void InlineAsm::destroyConstant() {
+  getType()->getContext().pImpl->InlineAsms.remove(this);
+  delete this;
+}
+
+FunctionType *InlineAsm::getFunctionType() const {
+  return cast<FunctionType>(getType()->getElementType());
+}
+    
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+  Type(isInput), isEarlyClobber(false),
+  MatchingInput(-1), isCommutative(false),
+  isIndirect(false), isMultipleAlternative(false),
+  currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+  Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+  MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+  isIndirect(other.isIndirect), Codes(other.Codes),
+  isMultipleAlternative(other.isMultipleAlternative),
+  multipleAlternatives(other.multipleAlternatives),
+  currentAlternativeIndex(other.currentAlternativeIndex) {
+}
+
+/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
+/// fields in this structure.  If the constraint string is not understood,
+/// return true, otherwise return false.
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
+                     InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
+  StringRef::iterator I = Str.begin(), E = Str.end();
+  unsigned multipleAlternativeCount = Str.count('|') + 1;
+  unsigned multipleAlternativeIndex = 0;
+  ConstraintCodeVector *pCodes = &Codes;
+  
+  // Initialize
+  isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+  if (isMultipleAlternative) {
+    multipleAlternatives.resize(multipleAlternativeCount);
+    pCodes = &multipleAlternatives[0].Codes;
+  }
+  Type = isInput;
+  isEarlyClobber = false;
+  MatchingInput = -1;
+  isCommutative = false;
+  isIndirect = false;
+  currentAlternativeIndex = 0;
+  
+  // Parse prefixes.
+  if (*I == '~') {
+    Type = isClobber;
+    ++I;
+  } else if (*I == '=') {
+    ++I;
+    Type = isOutput;
+  }
+  
+  if (*I == '*') {
+    isIndirect = true;
+    ++I;
+  }
+  
+  if (I == E) return true;  // Just a prefix, like "==" or "~".
+  
+  // Parse the modifiers.
+  bool DoneWithModifiers = false;
+  while (!DoneWithModifiers) {
+    switch (*I) {
+    default:
+      DoneWithModifiers = true;
+      break;
+    case '&':     // Early clobber.
+      if (Type != isOutput ||      // Cannot early clobber anything but output.
+          isEarlyClobber)          // Reject &&&&&&
+        return true;
+      isEarlyClobber = true;
+      break;
+    case '%':     // Commutative.
+      if (Type == isClobber ||     // Cannot commute clobbers.
+          isCommutative)           // Reject %%%%%
+        return true;
+      isCommutative = true;
+      break;
+    case '#':     // Comment.
+    case '*':     // Register preferencing.
+      return true;     // Not supported.
+    }
+    
+    if (!DoneWithModifiers) {
+      ++I;
+      if (I == E) return true;   // Just prefixes and modifiers!
+    }
+  }
+  
+  // Parse the various constraints.
+  while (I != E) {
+    if (*I == '{') {   // Physical register reference.
+      // Find the end of the register name.
+      StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
+      if (ConstraintEnd == E) return true;  // "{foo"
+      pCodes->push_back(std::string(I, ConstraintEnd+1));
+      I = ConstraintEnd+1;
+    } else if (isdigit(static_cast<unsigned char>(*I))) { // Matching Constraint
+      // Maximal munch numbers.
+      StringRef::iterator NumStart = I;
+      while (I != E && isdigit(static_cast<unsigned char>(*I)))
+        ++I;
+      pCodes->push_back(std::string(NumStart, I));
+      unsigned N = atoi(pCodes->back().c_str());
+      // Check that this is a valid matching constraint!
+      if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
+          Type != isInput)
+        return true;  // Invalid constraint number.
+      
+      // If Operand N already has a matching input, reject this.  An output
+      // can't be constrained to the same value as multiple inputs.
+      if (isMultipleAlternative) {
+        InlineAsm::SubConstraintInfo &scInfo =
+          ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+        if (scInfo.MatchingInput != -1)
+          return true;
+        // Note that operand #n has a matching input.
+        scInfo.MatchingInput = ConstraintsSoFar.size();
+      } else {
+        if (ConstraintsSoFar[N].hasMatchingInput())
+          return true;
+        // Note that operand #n has a matching input.
+        ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+        }
+    } else if (*I == '|') {
+      multipleAlternativeIndex++;
+      pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+      ++I;
+    } else if (*I == '^') {
+      // Multi-letter constraint
+      // FIXME: For now assuming these are 2-character constraints.
+      pCodes->push_back(std::string(I+1, I+3));
+      I += 3;
+    } else {
+      // Single letter constraint.
+      pCodes->push_back(std::string(I, I+1));
+      ++I;
+    }
+  }
+
+  return false;
+}
+
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+  if (index < multipleAlternatives.size()) {
+    currentAlternativeIndex = index;
+    InlineAsm::SubConstraintInfo &scInfo =
+      multipleAlternatives[currentAlternativeIndex];
+    MatchingInput = scInfo.MatchingInput;
+    Codes = scInfo.Codes;
+  }
+}
+
+InlineAsm::ConstraintInfoVector
+InlineAsm::ParseConstraints(StringRef Constraints) {
+  ConstraintInfoVector Result;
+  
+  // Scan the constraints string.
+  for (StringRef::iterator I = Constraints.begin(),
+         E = Constraints.end(); I != E; ) {
+    ConstraintInfo Info;
+
+    // Find the end of this constraint.
+    StringRef::iterator ConstraintEnd = std::find(I, E, ',');
+
+    if (ConstraintEnd == I ||  // Empty constraint like ",,"
+        Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
+      Result.clear();          // Erroneous constraint?
+      break;
+    }
+
+    Result.push_back(Info);
+    
+    // ConstraintEnd may be either the next comma or the end of the string.  In
+    // the former case, we skip the comma.
+    I = ConstraintEnd;
+    if (I != E) {
+      ++I;
+      if (I == E) { Result.clear(); break; }    // don't allow "xyz,"
+    }
+  }
+  
+  return Result;
+}
+
+/// Verify - Verify that the specified constraint string is reasonable for the
+/// specified function type, and otherwise validate the constraint string.
+bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
+  if (Ty->isVarArg()) return false;
+  
+  ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
+  
+  // Error parsing constraints.
+  if (Constraints.empty() && !ConstStr.empty()) return false;
+  
+  unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
+  unsigned NumIndirect = 0;
+  
+  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+    switch (Constraints[i].Type) {
+    case InlineAsm::isOutput:
+      if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
+        return false;  // outputs before inputs and clobbers.
+      if (!Constraints[i].isIndirect) {
+        ++NumOutputs;
+        break;
+      }
+      ++NumIndirect;
+      // FALLTHROUGH for Indirect Outputs.
+    case InlineAsm::isInput:
+      if (NumClobbers) return false;               // inputs before clobbers.
+      ++NumInputs;
+      break;
+    case InlineAsm::isClobber:
+      ++NumClobbers;
+      break;
+    }
+  }
+  
+  switch (NumOutputs) {
+  case 0:
+    if (!Ty->getReturnType()->isVoidTy()) return false;
+    break;
+  case 1:
+    if (Ty->getReturnType()->isStructTy()) return false;
+    break;
+  default:
+    StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+    if (STy == 0 || STy->getNumElements() != NumOutputs)
+      return false;
+    break;
+  }      
+  
+  if (Ty->getNumParams() != NumInputs) return false;
+  return true;
+}
+
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
new file mode 100644
index 000000000000..2b5a0b39c316
--- /dev/null
+++ b/lib/IR/Instruction.cpp
@@ -0,0 +1,555 @@
+//===-- Instruction.cpp - Implement the Instruction class -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Instruction class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+                         Instruction *InsertBefore)
+  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+  // Make sure that we get added to a basicblock
+  LeakDetector::addGarbageObject(this);
+
+  // If requested, insert this instruction into a basic block...
+  if (InsertBefore) {
+    assert(InsertBefore->getParent() &&
+           "Instruction to insert before is not in a basic block!");
+    InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+  }
+}
+
+Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+                         BasicBlock *InsertAtEnd)
+  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+  // Make sure that we get added to a basicblock
+  LeakDetector::addGarbageObject(this);
+
+  // append this instruction into the basic block
+  assert(InsertAtEnd && "Basic block to append to may not be NULL!");
+  InsertAtEnd->getInstList().push_back(this);
+}
+
+
+// Out of line virtual method, so the vtable, etc has a home.
+Instruction::~Instruction() {
+  assert(Parent == 0 && "Instruction still linked in the program!");
+  if (hasMetadataHashEntry())
+    clearMetadataHashEntries();
+}
+
+
+void Instruction::setParent(BasicBlock *P) {
+  if (getParent()) {
+    if (!P) LeakDetector::addGarbageObject(this);
+  } else {
+    if (P) LeakDetector::removeGarbageObject(this);
+  }
+
+  Parent = P;
+}
+
+void Instruction::removeFromParent() {
+  getParent()->getInstList().remove(this);
+}
+
+void Instruction::eraseFromParent() {
+  getParent()->getInstList().erase(this);
+}
+
+/// insertBefore - Insert an unlinked instructions into a basic block
+/// immediately before the specified instruction.
+void Instruction::insertBefore(Instruction *InsertPos) {
+  InsertPos->getParent()->getInstList().insert(InsertPos, this);
+}
+
+/// insertAfter - Insert an unlinked instructions into a basic block
+/// immediately after the specified instruction.
+void Instruction::insertAfter(Instruction *InsertPos) {
+  InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+}
+
+/// moveBefore - Unlink this instruction from its current basic block and
+/// insert it into the basic block that MovePos lives in, right before
+/// MovePos.
+void Instruction::moveBefore(Instruction *MovePos) {
+  MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
+                                             this);
+}
+
+/// Set or clear the unsafe-algebra flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasUnsafeAlgebra(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasUnsafeAlgebra(B);
+}
+
+/// Set or clear the NoNaNs flag on this instruction, which must be an operator
+/// which supports this flag. See LangRef.html for the meaning of this flag.
+void Instruction::setHasNoNaNs(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasNoNaNs(B);
+}
+
+/// Set or clear the no-infs flag on this instruction, which must be an operator
+/// which supports this flag. See LangRef.html for the meaning of this flag.
+void Instruction::setHasNoInfs(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasNoInfs(B);
+}
+
+/// Set or clear the no-signed-zeros flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasNoSignedZeros(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasNoSignedZeros(B);
+}
+
+/// Set or clear the allow-reciprocal flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasAllowReciprocal(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasAllowReciprocal(B);
+}
+
+/// Convenience function for setting all the fast-math flags on this
+/// instruction, which must be an operator which supports these flags. See
+/// LangRef.html for the meaning of these flats.
+void Instruction::setFastMathFlags(FastMathFlags FMF) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setFastMathFlags(FMF);
+}
+
+/// Determine whether the unsafe-algebra flag is set.
+bool Instruction::hasUnsafeAlgebra() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
+}
+
+/// Determine whether the no-NaNs flag is set.
+bool Instruction::hasNoNaNs() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasNoNaNs();
+}
+
+/// Determine whether the no-infs flag is set.
+bool Instruction::hasNoInfs() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasNoInfs();
+}
+
+/// Determine whether the no-signed-zeros flag is set.
+bool Instruction::hasNoSignedZeros() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasNoSignedZeros();
+}
+
+/// Determine whether the allow-reciprocal flag is set.
+bool Instruction::hasAllowReciprocal() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasAllowReciprocal();
+}
+
+/// Convenience function for getting all the fast-math flags, which must be an
+/// operator which supports these flags. See LangRef.html for the meaning of
+/// these flats.
+FastMathFlags Instruction::getFastMathFlags() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->getFastMathFlags();
+}
+
+/// Copy I's fast-math flags
+void Instruction::copyFastMathFlags(const Instruction *I) {
+  setFastMathFlags(I->getFastMathFlags());
+}
+
+
+const char *Instruction::getOpcodeName(unsigned OpCode) {
+  switch (OpCode) {
+  // Terminators
+  case Ret:    return "ret";
+  case Br:     return "br";
+  case Switch: return "switch";
+  case IndirectBr: return "indirectbr";
+  case Invoke: return "invoke";
+  case Resume: return "resume";
+  case Unreachable: return "unreachable";
+
+  // Standard binary operators...
+  case Add: return "add";
+  case FAdd: return "fadd";
+  case Sub: return "sub";
+  case FSub: return "fsub";
+  case Mul: return "mul";
+  case FMul: return "fmul";
+  case UDiv: return "udiv";
+  case SDiv: return "sdiv";
+  case FDiv: return "fdiv";
+  case URem: return "urem";
+  case SRem: return "srem";
+  case FRem: return "frem";
+
+  // Logical operators...
+  case And: return "and";
+  case Or : return "or";
+  case Xor: return "xor";
+
+  // Memory instructions...
+  case Alloca:        return "alloca";
+  case Load:          return "load";
+  case Store:         return "store";
+  case AtomicCmpXchg: return "cmpxchg";
+  case AtomicRMW:     return "atomicrmw";
+  case Fence:         return "fence";
+  case GetElementPtr: return "getelementptr";
+
+  // Convert instructions...
+  case Trunc:     return "trunc";
+  case ZExt:      return "zext";
+  case SExt:      return "sext";
+  case FPTrunc:   return "fptrunc";
+  case FPExt:     return "fpext";
+  case FPToUI:    return "fptoui";
+  case FPToSI:    return "fptosi";
+  case UIToFP:    return "uitofp";
+  case SIToFP:    return "sitofp";
+  case IntToPtr:  return "inttoptr";
+  case PtrToInt:  return "ptrtoint";
+  case BitCast:   return "bitcast";
+
+  // Other instructions...
+  case ICmp:           return "icmp";
+  case FCmp:           return "fcmp";
+  case PHI:            return "phi";
+  case Select:         return "select";
+  case Call:           return "call";
+  case Shl:            return "shl";
+  case LShr:           return "lshr";
+  case AShr:           return "ashr";
+  case VAArg:          return "va_arg";
+  case ExtractElement: return "extractelement";
+  case InsertElement:  return "insertelement";
+  case ShuffleVector:  return "shufflevector";
+  case ExtractValue:   return "extractvalue";
+  case InsertValue:    return "insertvalue";
+  case LandingPad:     return "landingpad";
+
+  default: return "<Invalid operator> ";
+  }
+}
+
+/// isIdenticalTo - Return true if the specified instruction is exactly
+/// identical to the current one.  This means that all operands match and any
+/// extra information (e.g. load is volatile) agree.
+bool Instruction::isIdenticalTo(const Instruction *I) const {
+  return isIdenticalToWhenDefined(I) &&
+         SubclassOptionalData == I->SubclassOptionalData;
+}
+
+/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+/// ignores the SubclassOptionalData flags, which specify conditions
+/// under which the instruction's result is undefined.
+bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i) != I->getOperand(i))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I)->getAlignment() &&
+           LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
+           LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I)->getAlignment() &&
+           SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
+           SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+  if (const FenceInst *FI = dyn_cast<FenceInst>(this))
+    return FI->getOrdering() == cast<FenceInst>(FI)->getOrdering() &&
+           FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope();
+  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
+    return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
+           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+           CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
+    return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
+           RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
+           RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
+           RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
+  if (const PHINode *thisPHI = dyn_cast<PHINode>(this)) {
+    const PHINode *otherPHI = cast<PHINode>(I);
+    for (unsigned i = 0, e = thisPHI->getNumOperands(); i != e; ++i) {
+      if (thisPHI->getIncomingBlock(i) != otherPHI->getIncomingBlock(i))
+        return false;
+    }
+    return true;
+  }
+  return true;
+}
+
+// isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
+bool Instruction::isSameOperationAs(const Instruction *I,
+                                    unsigned flags) const {
+  bool IgnoreAlignment = flags & CompareIgnoringAlignment;
+  bool UseScalarTypes  = flags & CompareUsingScalarTypes;
+
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      (UseScalarTypes ?
+       getType()->getScalarType() != I->getType()->getScalarType() :
+       getType() != I->getType()))
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (UseScalarTypes ?
+        getOperand(i)->getType()->getScalarType() !=
+          I->getOperand(i)->getType()->getScalarType() :
+        getOperand(i)->getType() != I->getOperand(i)->getType())
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+           (LI->getAlignment() == cast<LoadInst>(I)->getAlignment() ||
+            IgnoreAlignment) &&
+           LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
+           LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+           (SI->getAlignment() == cast<StoreInst>(I)->getAlignment() ||
+            IgnoreAlignment) &&
+           SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
+           SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+           CI->getAttributes() ==
+             cast<InvokeInst>(I)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
+  if (const FenceInst *FI = dyn_cast<FenceInst>(this))
+    return FI->getOrdering() == cast<FenceInst>(I)->getOrdering() &&
+           FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope();
+  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
+    return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
+           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
+           CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
+    return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
+           RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
+           RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
+           RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
+
+  return true;
+}
+
+/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
+/// specified block.  Note that PHI nodes are considered to evaluate their
+/// operands in the corresponding predecessor block.
+bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
+  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    // PHI nodes uses values in the corresponding predecessor block.  For other
+    // instructions, just check to see whether the parent of the use matches up.
+    const User *U = *UI;
+    const PHINode *PN = dyn_cast<PHINode>(U);
+    if (PN == 0) {
+      if (cast<Instruction>(U)->getParent() != BB)
+        return true;
+      continue;
+    }
+
+    if (PN->getIncomingBlock(UI) != BB)
+      return true;
+  }
+  return false;
+}
+
+/// mayReadFromMemory - Return true if this instruction may read memory.
+///
+bool Instruction::mayReadFromMemory() const {
+  switch (getOpcode()) {
+  default: return false;
+  case Instruction::VAArg:
+  case Instruction::Load:
+  case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory
+  case Instruction::AtomicCmpXchg:
+  case Instruction::AtomicRMW:
+    return true;
+  case Instruction::Call:
+    return !cast<CallInst>(this)->doesNotAccessMemory();
+  case Instruction::Invoke:
+    return !cast<InvokeInst>(this)->doesNotAccessMemory();
+  case Instruction::Store:
+    return !cast<StoreInst>(this)->isUnordered();
+  }
+}
+
+/// mayWriteToMemory - Return true if this instruction may modify memory.
+///
+bool Instruction::mayWriteToMemory() const {
+  switch (getOpcode()) {
+  default: return false;
+  case Instruction::Fence: // FIXME: refine definition of mayWriteToMemory
+  case Instruction::Store:
+  case Instruction::VAArg:
+  case Instruction::AtomicCmpXchg:
+  case Instruction::AtomicRMW:
+    return true;
+  case Instruction::Call:
+    return !cast<CallInst>(this)->onlyReadsMemory();
+  case Instruction::Invoke:
+    return !cast<InvokeInst>(this)->onlyReadsMemory();
+  case Instruction::Load:
+    return !cast<LoadInst>(this)->isUnordered();
+  }
+}
+
+bool Instruction::mayThrow() const {
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return !CI->doesNotThrow();
+  return isa<ResumeInst>(this);
+}
+
+bool Instruction::mayReturn() const {
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return !CI->doesNotReturn();
+  return true;
+}
+
+/// isAssociative - Return true if the instruction is associative:
+///
+///   Associative operators satisfy:  x op (y op z) === (x op y) op z
+///
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+///
+bool Instruction::isAssociative(unsigned Opcode) {
+  return Opcode == And || Opcode == Or || Opcode == Xor ||
+         Opcode == Add || Opcode == Mul;
+}
+
+bool Instruction::isAssociative() const {
+  unsigned Opcode = getOpcode();
+  if (isAssociative(Opcode))
+    return true;
+
+  switch (Opcode) {
+  case FMul:
+  case FAdd:
+    return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
+  default:
+    return false;
+  }
+}
+
+/// isCommutative - Return true if the instruction is commutative:
+///
+///   Commutative operators satisfy: (x op y) === (y op x)
+///
+/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+/// applied to any type.
+///
+bool Instruction::isCommutative(unsigned op) {
+  switch (op) {
+  case Add:
+  case FAdd:
+  case Mul:
+  case FMul:
+  case And:
+  case Or:
+  case Xor:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isIdempotent - Return true if the instruction is idempotent:
+///
+///   Idempotent operators satisfy:  x op x === x
+///
+/// In LLVM, the And and Or operators are idempotent.
+///
+bool Instruction::isIdempotent(unsigned Opcode) {
+  return Opcode == And || Opcode == Or;
+}
+
+/// isNilpotent - Return true if the instruction is nilpotent:
+///
+///   Nilpotent operators satisfy:  x op x === Id,
+///
+///   where Id is the identity for the operator, i.e. a constant such that
+///     x op Id === x and Id op x === x for all x.
+///
+/// In LLVM, the Xor operator is nilpotent.
+///
+bool Instruction::isNilpotent(unsigned Opcode) {
+  return Opcode == Xor;
+}
+
+Instruction *Instruction::clone() const {
+  Instruction *New = clone_impl();
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (!hasMetadata())
+    return New;
+
+  // Otherwise, enumerate and copy over metadata from the old instruction to the
+  // new one.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
+  getAllMetadataOtherThanDebugLoc(TheMDs);
+  for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
+    New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+
+  New->setDebugLoc(getDebugLoc());
+  return New;
+}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
new file mode 100644
index 000000000000..d58877ef773a
--- /dev/null
+++ b/lib/IR/Instructions.cpp
@@ -0,0 +1,3553 @@
+//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all of the non-inline methods for the LLVM instruction
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instructions.h"
+#include "LLVMContextImpl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                            CallSite Class
+//===----------------------------------------------------------------------===//
+
+User::op_iterator CallSite::getCallee() const {
+  Instruction *II(getInstruction());
+  return isCall()
+    ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+    : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
+}
+
+//===----------------------------------------------------------------------===//
+//                            TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+TerminatorInst::~TerminatorInst() {
+}
+
+//===----------------------------------------------------------------------===//
+//                           UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+UnaryInstruction::~UnaryInstruction() {
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// areInvalidOperands - Return a string if the specified operands are invalid
+/// for a select operation, otherwise return null.
+const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
+  if (Op1->getType() != Op2->getType())
+    return "both values to select must have same type";
+  
+  if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+    // Vector select.
+    if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
+      return "vector select condition element type must be i1";
+    VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+    if (ET == 0)
+      return "selected values for vector select must be vectors";
+    if (ET->getNumElements() != VT->getNumElements())
+      return "vector select requires selected vectors to have "
+                   "the same vector length as select condition";
+  } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
+    return "select condition must be i1 or <n x i1>";
+  }
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               PHINode Class
+//===----------------------------------------------------------------------===//
+
+PHINode::PHINode(const PHINode &PN)
+  : Instruction(PN.getType(), Instruction::PHI,
+                allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
+    ReservedSpace(PN.getNumOperands()) {
+  std::copy(PN.op_begin(), PN.op_end(), op_begin());
+  std::copy(PN.block_begin(), PN.block_end(), block_begin());
+  SubclassOptionalData = PN.SubclassOptionalData;
+}
+
+PHINode::~PHINode() {
+  dropHungoffUses();
+}
+
+Use *PHINode::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses of the incoming values, followed by a pointer
+  // (with bottom bit set) to the User, followed by the array of pointers to
+  // the incoming basic blocks.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+    + N * sizeof(BasicBlock*);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
+// removeIncomingValue - Remove an incoming value.  This is useful if a
+// predecessor basic block is deleted.
+Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
+  Value *Removed = getIncomingValue(Idx);
+
+  // Move everything after this operand down.
+  //
+  // FIXME: we could just swap with the end of the list, then erase.  However,
+  // clients might not expect this to happen.  The code as it is thrashes the
+  // use/def lists, which is kinda lame.
+  std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+  std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
+
+  // Nuke the last value.
+  Op<-1>().set(0);
+  --NumOperands;
+
+  // If the PHI node is dead, because it has zero entries, nuke it now.
+  if (getNumOperands() == 0 && DeletePHIIfEmpty) {
+    // If anyone is using this PHI, make them use a dummy value instead...
+    replaceAllUsesWith(UndefValue::get(getType()));
+    eraseFromParent();
+  }
+  return Removed;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 1.5
+/// times.
+///
+void PHINode::growOperands() {
+  unsigned e = getNumOperands();
+  unsigned NumOps = e + e / 2;
+  if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
+
+  Use *OldOps = op_begin();
+  BasicBlock **OldBlocks = block_begin();
+
+  ReservedSpace = NumOps;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  std::copy(OldOps, OldOps + e, op_begin());
+  std::copy(OldBlocks, OldBlocks + e, block_begin());
+
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+/// hasConstantValue - If the specified PHI node always merges together the same
+/// value, return the value, otherwise return null.
+Value *PHINode::hasConstantValue() const {
+  // Exploit the fact that phi nodes always have at least one entry.
+  Value *ConstantValue = getIncomingValue(0);
+  for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+    if (getIncomingValue(i) != ConstantValue && getIncomingValue(i) != this) {
+      if (ConstantValue != this)
+        return 0; // Incoming values not all the same.
+       // The case where the first value is this PHI.
+      ConstantValue = getIncomingValue(i);
+    }
+  if (ConstantValue == this)
+    return UndefValue::get(getType());
+  return ConstantValue;
+}
+
+//===----------------------------------------------------------------------===//
+//                       LandingPadInst Implementation
+//===----------------------------------------------------------------------===//
+
+LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
+                               unsigned NumReservedValues, const Twine &NameStr,
+                               Instruction *InsertBefore)
+  : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) {
+  init(PersonalityFn, 1 + NumReservedValues, NameStr);
+}
+
+LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
+                               unsigned NumReservedValues, const Twine &NameStr,
+                               BasicBlock *InsertAtEnd)
+  : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) {
+  init(PersonalityFn, 1 + NumReservedValues, NameStr);
+}
+
+LandingPadInst::LandingPadInst(const LandingPadInst &LP)
+  : Instruction(LP.getType(), Instruction::LandingPad,
+                allocHungoffUses(LP.getNumOperands()), LP.getNumOperands()),
+    ReservedSpace(LP.getNumOperands()) {
+  Use *OL = OperandList, *InOL = LP.OperandList;
+  for (unsigned I = 0, E = ReservedSpace; I != E; ++I)
+    OL[I] = InOL[I];
+
+  setCleanup(LP.isCleanup());
+}
+
+LandingPadInst::~LandingPadInst() {
+  dropHungoffUses();
+}
+
+LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
+                                       unsigned NumReservedClauses,
+                                       const Twine &NameStr,
+                                       Instruction *InsertBefore) {
+  return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
+                            InsertBefore);
+}
+
+LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
+                                       unsigned NumReservedClauses,
+                                       const Twine &NameStr,
+                                       BasicBlock *InsertAtEnd) {
+  return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
+                            InsertAtEnd);
+}
+
+void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
+                          const Twine &NameStr) {
+  ReservedSpace = NumReservedValues;
+  NumOperands = 1;
+  OperandList = allocHungoffUses(ReservedSpace);
+  OperandList[0] = PersFn;
+  setName(NameStr);
+  setCleanup(false);
+}
+
+/// growOperands - grow operands - This grows the operand list in response to a
+/// push_back style of operation. This grows the number of ops by 2 times.
+void LandingPadInst::growOperands(unsigned Size) {
+  unsigned e = getNumOperands();
+  if (ReservedSpace >= e + Size) return;
+  ReservedSpace = (e + Size / 2) * 2;
+
+  Use *NewOps = allocHungoffUses(ReservedSpace);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i)
+      NewOps[i] = OldOps[i];
+
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+void LandingPadInst::addClause(Value *Val) {
+  unsigned OpNo = getNumOperands();
+  growOperands(1);
+  assert(OpNo < ReservedSpace && "Growing didn't work!");
+  ++NumOperands;
+  OperandList[OpNo] = Val;
+}
+
+//===----------------------------------------------------------------------===//
+//                        CallInst Implementation
+//===----------------------------------------------------------------------===//
+
+CallInst::~CallInst() {
+}
+
+void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
+  Op<-1>() = Func;
+
+#ifndef NDEBUG
+  FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+  assert((Args.size() == FTy->getNumParams() ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+         "Calling a function with bad signature!");
+
+  for (unsigned i = 0; i != Args.size(); ++i)
+    assert((i >= FTy->getNumParams() || 
+            FTy->getParamType(i) == Args[i]->getType()) &&
+           "Calling a function with a bad signature!");
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
+}
+
+void CallInst::init(Value *Func, const Twine &NameStr) {
+  assert(NumOperands == 1 && "NumOperands not set up?");
+  Op<-1>() = Func;
+
+#ifndef NDEBUG
+  FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+
+  assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
+#endif
+
+  setName(NameStr);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+                   Instruction *InsertBefore)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 1,
+                1, InsertBefore) {
+  init(Func, Name);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+                   BasicBlock *InsertAtEnd)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 1,
+                1, InsertAtEnd) {
+  init(Func, Name);
+}
+
+CallInst::CallInst(const CallInst &CI)
+  : Instruction(CI.getType(), Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+                CI.getNumOperands()) {
+  setAttributes(CI.getAttributes());
+  setTailCall(CI.isTailCall());
+  setCallingConv(CI.getCallingConv());
+    
+  std::copy(CI.op_begin(), CI.op_end(), op_begin());
+  SubclassOptionalData = CI.SubclassOptionalData;
+}
+
+void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addAttribute(getContext(), i, attr);
+  setAttributes(PAL);
+}
+
+void CallInst::removeAttribute(unsigned i, Attribute attr) {
+  AttributeSet PAL = getAttributes();
+  AttrBuilder B(attr);
+  LLVMContext &Context = getContext();
+  PAL = PAL.removeAttributes(Context, i,
+                             AttributeSet::get(Context, i, B));
+  setAttributes(PAL);
+}
+
+bool CallInst::hasFnAttr(Attribute::AttrKind A) const {
+  if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+  return false;
+}
+
+bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+  if (AttributeList.hasAttribute(i, A))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(i, A);
+  return false;
+}
+
+/// IsConstantOne - Return true only if val is constant int 1
+static bool IsConstantOne(Value *val) {
+  assert(val && "IsConstantOne does not work with NULL val");
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Instruction *createMalloc(Instruction *InsertBefore,
+                                 BasicBlock *InsertAtEnd, Type *IntPtrTy,
+                                 Type *AllocTy, Value *AllocSize, 
+                                 Value *ArraySize, Function *MallocF,
+                                 const Twine &Name) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createMalloc needs either InsertBefore or InsertAtEnd");
+
+  // malloc(type) becomes: 
+  //       bitcast (i8* malloc(typeSize)) to type*
+  // malloc(type, arraySize) becomes:
+  //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
+  if (!ArraySize)
+    ArraySize = ConstantInt::get(IntPtrTy, 1);
+  else if (ArraySize->getType() != IntPtrTy) {
+    if (InsertBefore)
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertBefore);
+    else
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertAtEnd);
+  }
+
+  if (!IsConstantOne(ArraySize)) {
+    if (IsConstantOne(AllocSize)) {
+      AllocSize = ArraySize;         // Operand * 1 = Operand
+    } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
+      Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
+                                                     false /*ZExt*/);
+      // Malloc arg is constant product of type size and array size
+      AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
+    } else {
+      // Multiply type size by the array size...
+      if (InsertBefore)
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertBefore);
+      else
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertAtEnd);
+    }
+  }
+
+  assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+  // Create the call to Malloc.
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+  Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+  Value *MallocFunc = MallocF;
+  if (!MallocFunc)
+    // prototype malloc as "void *malloc(size_t)"
+    MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+  PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+  CallInst *MCall = NULL;
+  Instruction *Result = NULL;
+  if (InsertBefore) {
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
+    Result = MCall;
+    if (Result->getType() != AllocPtrType)
+      // Create a cast instruction to convert to the right type...
+      Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
+  } else {
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
+    Result = MCall;
+    if (Result->getType() != AllocPtrType) {
+      InsertAtEnd->getInstList().push_back(MCall);
+      // Create a cast instruction to convert to the right type...
+      Result = new BitCastInst(MCall, AllocPtrType, Name);
+    }
+  }
+  MCall->setTailCall();
+  if (Function *F = dyn_cast<Function>(MallocFunc)) {
+    MCall->setCallingConv(F->getCallingConv());
+    if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+  }
+  assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
+
+  return Result;
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
+                                    Type *IntPtrTy, Type *AllocTy,
+                                    Value *AllocSize, Value *ArraySize,
+                                    Function * MallocF,
+                                    const Twine &Name) {
+  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+                      ArraySize, MallocF, Name);
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+/// Note: This function does not add the bitcast to the basic block, that is the
+/// responsibility of the caller.
+Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
+                                    Type *IntPtrTy, Type *AllocTy,
+                                    Value *AllocSize, Value *ArraySize, 
+                                    Function *MallocF, const Twine &Name) {
+  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+                      ArraySize, MallocF, Name);
+}
+
+static Instruction* createFree(Value* Source, Instruction *InsertBefore,
+                               BasicBlock *InsertAtEnd) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createFree needs either InsertBefore or InsertAtEnd");
+  assert(Source->getType()->isPointerTy() &&
+         "Can not free something of nonpointer type!");
+
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+
+  Type *VoidTy = Type::getVoidTy(M->getContext());
+  Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+  // prototype free as "void free(void*)"
+  Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+  CallInst* Result = NULL;
+  Value *PtrCast = Source;
+  if (InsertBefore) {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
+    Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
+  } else {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
+    Result = CallInst::Create(FreeFunc, PtrCast, "");
+  }
+  Result->setTailCall();
+  if (Function *F = dyn_cast<Function>(FreeFunc))
+    Result->setCallingConv(F->getCallingConv());
+
+  return Result;
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+  return createFree(Source, InsertBefore, NULL);
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+/// Note: This function does not add the call to the basic block, that is the
+/// responsibility of the caller.
+Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
+  Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+  assert(FreeCall && "CreateFree did not create a CallInst");
+  return FreeCall;
+}
+
+//===----------------------------------------------------------------------===//
+//                        InvokeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+                      ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
+  Op<-3>() = Fn;
+  Op<-2>() = IfNormal;
+  Op<-1>() = IfException;
+
+#ifndef NDEBUG
+  FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
+
+  assert(((Args.size() == FTy->getNumParams()) ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+         "Invoking a function with bad signature");
+
+  for (unsigned i = 0, e = Args.size(); i != e; i++)
+    assert((i >= FTy->getNumParams() || 
+            FTy->getParamType(i) == Args[i]->getType()) &&
+           "Invoking a function with a bad signature!");
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
+}
+
+InvokeInst::InvokeInst(const InvokeInst &II)
+  : TerminatorInst(II.getType(), Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this)
+                   - II.getNumOperands(),
+                   II.getNumOperands()) {
+  setAttributes(II.getAttributes());
+  setCallingConv(II.getCallingConv());
+  std::copy(II.op_begin(), II.op_end(), op_begin());
+  SubclassOptionalData = II.SubclassOptionalData;
+}
+
+BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned InvokeInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  return setSuccessor(idx, B);
+}
+
+bool InvokeInst::hasFnAttr(Attribute::AttrKind A) const {
+  if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+  return false;
+}
+
+bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+  if (AttributeList.hasAttribute(i, A))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->getAttributes().hasAttribute(i, A);
+  return false;
+}
+
+void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addAttribute(getContext(), i, attr);
+  setAttributes(PAL);
+}
+
+void InvokeInst::removeAttribute(unsigned i, Attribute attr) {
+  AttributeSet PAL = getAttributes();
+  AttrBuilder B(attr);
+  PAL = PAL.removeAttributes(getContext(), i,
+                             AttributeSet::get(getContext(), i, B));
+  setAttributes(PAL);
+}
+
+LandingPadInst *InvokeInst::getLandingPadInst() const {
+  return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
+}
+
+//===----------------------------------------------------------------------===//
+//                        ReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+ReturnInst::ReturnInst(const ReturnInst &RI)
+  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) -
+                     RI.getNumOperands(),
+                   RI.getNumOperands()) {
+  if (RI.getNumOperands())
+    Op<0>() = RI.Op<0>();
+  SubclassOptionalData = RI.SubclassOptionalData;
+}
+
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                   InsertBefore) {
+  if (retVal)
+    Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                   InsertAtEnd) {
+  if (retVal)
+    Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
+}
+
+unsigned ReturnInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
+/// emit the vtable for the class in this translation unit.
+void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("ReturnInst has no successors!");
+}
+
+BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("ReturnInst has no successors!");
+}
+
+ReturnInst::~ReturnInst() {
+}
+
+//===----------------------------------------------------------------------===//
+//                        ResumeInst Implementation
+//===----------------------------------------------------------------------===//
+
+ResumeInst::ResumeInst(const ResumeInst &RI)
+  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume,
+                   OperandTraits<ResumeInst>::op_begin(this), 1) {
+  Op<0>() = RI.Op<0>();
+}
+
+ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+                   OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
+  Op<0>() = Exn;
+}
+
+ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
+                   OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
+  Op<0>() = Exn;
+}
+
+unsigned ResumeInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("ResumeInst has no successors!");
+}
+
+BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("ResumeInst has no successors!");
+}
+
+//===----------------------------------------------------------------------===//
+//                      UnreachableInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnreachableInst::UnreachableInst(LLVMContext &Context, 
+                                 Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertBefore) {
+}
+UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertAtEnd) {
+}
+
+unsigned UnreachableInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("UnreachableInst has no successors!");
+}
+
+BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("UnreachableInst has no successors!");
+}
+
+//===----------------------------------------------------------------------===//
+//                        BranchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void BranchInst::AssertOK() {
+  if (isConditional())
+    assert(getCondition()->getType()->isIntegerTy(1) &&
+           "May only branch on boolean predicates!");
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 1,
+                   1, InsertBefore) {
+  assert(IfTrue != 0 && "Branch destination may not be null!");
+  Op<-1>() = IfTrue;
+}
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+                       Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 3,
+                   3, InsertBefore) {
+  Op<-1>() = IfTrue;
+  Op<-2>() = IfFalse;
+  Op<-3>() = Cond;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 1,
+                   1, InsertAtEnd) {
+  assert(IfTrue != 0 && "Branch destination may not be null!");
+  Op<-1>() = IfTrue;
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+           BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 3,
+                   3, InsertAtEnd) {
+  Op<-1>() = IfTrue;
+  Op<-2>() = IfFalse;
+  Op<-3>() = Cond;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+
+BranchInst::BranchInst(const BranchInst &BI) :
+  TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
+                 OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+                 BI.getNumOperands()) {
+  Op<-1>() = BI.Op<-1>();
+  if (BI.getNumOperands() != 1) {
+    assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
+    Op<-3>() = BI.Op<-3>();
+    Op<-2>() = BI.Op<-2>();
+  }
+  SubclassOptionalData = BI.SubclassOptionalData;
+}
+
+void BranchInst::swapSuccessors() {
+  assert(isConditional() &&
+         "Cannot swap successors of an unconditional branch");
+  Op<-1>().swap(Op<-2>());
+
+  // Update profile metadata if present and it matches our structural
+  // expectations.
+  MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
+  if (!ProfileData || ProfileData->getNumOperands() != 3)
+    return;
+
+  // The first operand is the name. Fetch them backwards and build a new one.
+  Value *Ops[] = {
+    ProfileData->getOperand(0),
+    ProfileData->getOperand(2),
+    ProfileData->getOperand(1)
+  };
+  setMetadata(LLVMContext::MD_prof,
+              MDNode::get(ProfileData->getContext(), Ops));
+}
+
+BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned BranchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        AllocaInst Implementation
+//===----------------------------------------------------------------------===//
+
+static Value *getAISize(LLVMContext &Context, Value *Amt) {
+  if (!Amt)
+    Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  else {
+    assert(!isa<BasicBlock>(Amt) &&
+           "Passed basic block into allocation size parameter! Use other ctor");
+    assert(Amt->getType()->isIntegerTy() &&
+           "Allocation array size is not an integer!");
+  }
+  return Amt;
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+                       Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertBefore) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+                       BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertAtEnd) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(Align);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(Align);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+// Out of line virtual method, so the vtable, etc has a home.
+AllocaInst::~AllocaInst() {
+}
+
+void AllocaInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData(Log2_32(Align) + 1);
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool AllocaInst::isArrayAllocation() const {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
+    return !CI->isOne();
+  return true;
+}
+
+Type *AllocaInst::getAllocatedType() const {
+  return getType()->getElementType();
+}
+
+/// isStaticAlloca - Return true if this alloca is in the entry block of the
+/// function and is a constant size.  If so, the code generator will fold it
+/// into the prolog/epilog code, so it is basically free.
+bool AllocaInst::isStaticAlloca() const {
+  // Must be constant size.
+  if (!isa<ConstantInt>(getArraySize())) return false;
+  
+  // Must be in the entry block.
+  const BasicBlock *Parent = getParent();
+  return Parent == &Parent->getParent()->front();
+}
+
+//===----------------------------------------------------------------------===//
+//                           LoadInst Implementation
+//===----------------------------------------------------------------------===//
+
+void LoadInst::AssertOK() {
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type.");
+  assert(!(isAtomic() && getAlignment() == 0) &&
+         "Alignment required for atomic load");
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(false);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(false);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+                   Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(NotAtomic);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(NotAtomic);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, AtomicOrdering Order,
+                   SynchronizationScope SynchScope,
+                   Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(Order, SynchScope);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, AtomicOrdering Order,
+                   SynchronizationScope SynchScope,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(Order, SynchScope);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(false);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(false);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+                   Instruction *InsertBef)
+: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                   Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+void LoadInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
+                             ((Log2_32(Align)+1)<<1));
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+//                           StoreInst Implementation
+//===----------------------------------------------------------------------===//
+
+void StoreInst::AssertOK() {
+  assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
+  assert(getOperand(1)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(0)->getType() ==
+                 cast<PointerType>(getOperand(1)->getType())->getElementType()
+         && "Ptr must be a pointer to Val type!");
+  assert(!(isAtomic() && getAlignment() == 0) &&
+         "Alignment required for atomic load");
+}
+
+
+StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(false);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(false);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(NotAtomic);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, AtomicOrdering Order,
+                     SynchronizationScope SynchScope,
+                     Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(Order, SynchScope);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(0);
+  setAtomic(NotAtomic);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(NotAtomic);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, AtomicOrdering Order,
+                     SynchronizationScope SynchScope,
+                     BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  setAtomic(Order, SynchScope);
+  AssertOK();
+}
+
+void StoreInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
+                             ((Log2_32(Align)+1) << 1));
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+//                       AtomicCmpXchgInst Implementation
+//===----------------------------------------------------------------------===//
+
+void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
+                             AtomicOrdering Ordering,
+                             SynchronizationScope SynchScope) {
+  Op<0>() = Ptr;
+  Op<1>() = Cmp;
+  Op<2>() = NewVal;
+  setOrdering(Ordering);
+  setSynchScope(SynchScope);
+
+  assert(getOperand(0) && getOperand(1) && getOperand(2) &&
+         "All operands must be non-null!");
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(1)->getType() ==
+                 cast<PointerType>(getOperand(0)->getType())->getElementType()
+         && "Ptr must be a pointer to Cmp type!");
+  assert(getOperand(2)->getType() ==
+                 cast<PointerType>(getOperand(0)->getType())->getElementType()
+         && "Ptr must be a pointer to NewVal type!");
+  assert(Ordering != NotAtomic &&
+         "AtomicCmpXchg instructions must be atomic!");
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                                     AtomicOrdering Ordering,
+                                     SynchronizationScope SynchScope,
+                                     Instruction *InsertBefore)
+  : Instruction(Cmp->getType(), AtomicCmpXchg,
+                OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+                OperandTraits<AtomicCmpXchgInst>::operands(this),
+                InsertBefore) {
+  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+
+AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
+                                     AtomicOrdering Ordering,
+                                     SynchronizationScope SynchScope,
+                                     BasicBlock *InsertAtEnd)
+  : Instruction(Cmp->getType(), AtomicCmpXchg,
+                OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+                OperandTraits<AtomicCmpXchgInst>::operands(this),
+                InsertAtEnd) {
+  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
+}
+ 
+//===----------------------------------------------------------------------===//
+//                       AtomicRMWInst Implementation
+//===----------------------------------------------------------------------===//
+
+void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
+                         AtomicOrdering Ordering,
+                         SynchronizationScope SynchScope) {
+  Op<0>() = Ptr;
+  Op<1>() = Val;
+  setOperation(Operation);
+  setOrdering(Ordering);
+  setSynchScope(SynchScope);
+
+  assert(getOperand(0) && getOperand(1) &&
+         "All operands must be non-null!");
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(1)->getType() ==
+         cast<PointerType>(getOperand(0)->getType())->getElementType()
+         && "Ptr must be a pointer to Val type!");
+  assert(Ordering != NotAtomic &&
+         "AtomicRMW instructions must be atomic!");
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                             AtomicOrdering Ordering,
+                             SynchronizationScope SynchScope,
+                             Instruction *InsertBefore)
+  : Instruction(Val->getType(), AtomicRMW,
+                OperandTraits<AtomicRMWInst>::op_begin(this),
+                OperandTraits<AtomicRMWInst>::operands(this),
+                InsertBefore) {
+  Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
+                             AtomicOrdering Ordering,
+                             SynchronizationScope SynchScope,
+                             BasicBlock *InsertAtEnd)
+  : Instruction(Val->getType(), AtomicRMW,
+                OperandTraits<AtomicRMWInst>::op_begin(this),
+                OperandTraits<AtomicRMWInst>::operands(this),
+                InsertAtEnd) {
+  Init(Operation, Ptr, Val, Ordering, SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+//                       FenceInst Implementation
+//===----------------------------------------------------------------------===//
+
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, 
+                     SynchronizationScope SynchScope,
+                     Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) {
+  setOrdering(Ordering);
+  setSynchScope(SynchScope);
+}
+
+FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, 
+                     SynchronizationScope SynchScope,
+                     BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) {
+  setOrdering(Ordering);
+  setSynchScope(SynchScope);
+}
+
+//===----------------------------------------------------------------------===//
+//                       GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
+                             const Twine &Name) {
+  assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?");
+  OperandList[0] = Ptr;
+  std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
+  setName(Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
+  : Instruction(GEPI.getType(), GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this)
+                - GEPI.getNumOperands(),
+                GEPI.getNumOperands()) {
+  std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin());
+  SubclassOptionalData = GEPI.SubclassOptionalData;
+}
+
+/// getIndexedType - Returns the type of the element that would be accessed with
+/// a gep instruction with the specified parameters.
+///
+/// The Idxs pointer should point to a continuous piece of memory containing the
+/// indices, either as Value* or uint64_t.
+///
+/// A null type is returned if the indices are invalid for the specified
+/// pointer type.
+///
+template <typename IndexTy>
+static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+  PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
+  if (!PTy) return 0;   // Type isn't a pointer type!
+  Type *Agg = PTy->getElementType();
+
+  // Handle the special case of the empty set index set, which is always valid.
+  if (IdxList.empty())
+    return Agg;
+
+  // If there is at least one index, the top level type must be sized, otherwise
+  // it cannot be 'stepped over'.
+  if (!Agg->isSized())
+    return 0;
+
+  unsigned CurIdx = 1;
+  for (; CurIdx != IdxList.size(); ++CurIdx) {
+    CompositeType *CT = dyn_cast<CompositeType>(Agg);
+    if (!CT || CT->isPointerTy()) return 0;
+    IndexTy Index = IdxList[CurIdx];
+    if (!CT->indexValid(Index)) return 0;
+    Agg = CT->getTypeAtIndex(Index);
+  }
+  return CurIdx == IdxList.size() ? Agg : 0;
+}
+
+Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
+  return getIndexedTypeInternal(Ptr, IdxList);
+}
+
+Type *GetElementPtrInst::getIndexedType(Type *Ptr,
+                                        ArrayRef<Constant *> IdxList) {
+  return getIndexedTypeInternal(Ptr, IdxList);
+}
+
+Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
+  return getIndexedTypeInternal(Ptr, IdxList);
+}
+
+/// hasAllZeroIndices - Return true if all of the indices of this GEP are
+/// zeros.  If so, the result pointer and the first operand have the same
+/// value, just potentially different types.
+bool GetElementPtrInst::hasAllZeroIndices() const {
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
+      if (!CI->isZero()) return false;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+/// hasAllConstantIndices - Return true if all of the indices of this GEP are
+/// constant integers.  If so, the result pointer and the first operand have
+/// a constant offset between them.
+bool GetElementPtrInst::hasAllConstantIndices() const {
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+  }
+  return true;
+}
+
+void GetElementPtrInst::setIsInBounds(bool B) {
+  cast<GEPOperator>(this)->setIsInBounds(B);
+}
+
+bool GetElementPtrInst::isInBounds() const {
+  return cast<GEPOperator>(this)->isInBounds();
+}
+
+bool GetElementPtrInst::accumulateConstantOffset(const DataLayout &DL,
+                                                 APInt &Offset) const {
+  // Delegate to the generic GEPOperator implementation.
+  return cast<GEPOperator>(this)->accumulateConstantOffset(DL, Offset);
+}
+
+//===----------------------------------------------------------------------===//
+//                           ExtractElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+                                       const Twine &Name,
+                                       Instruction *InsertBef)
+  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+                ExtractElement,
+                OperandTraits<ExtractElementInst>::op_begin(this),
+                2, InsertBef) {
+  assert(isValidOperands(Val, Index) &&
+         "Invalid extractelement instruction operands!");
+  Op<0>() = Val;
+  Op<1>() = Index;
+  setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+                                       const Twine &Name,
+                                       BasicBlock *InsertAE)
+  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+                ExtractElement,
+                OperandTraits<ExtractElementInst>::op_begin(this),
+                2, InsertAE) {
+  assert(isValidOperands(Val, Index) &&
+         "Invalid extractelement instruction operands!");
+
+  Op<0>() = Val;
+  Op<1>() = Index;
+  setName(Name);
+}
+
+
+bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
+  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+    return false;
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           InsertElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+                                     const Twine &Name,
+                                     Instruction *InsertBef)
+  : Instruction(Vec->getType(), InsertElement,
+                OperandTraits<InsertElementInst>::op_begin(this),
+                3, InsertBef) {
+  assert(isValidOperands(Vec, Elt, Index) &&
+         "Invalid insertelement instruction operands!");
+  Op<0>() = Vec;
+  Op<1>() = Elt;
+  Op<2>() = Index;
+  setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAE)
+  : Instruction(Vec->getType(), InsertElement,
+                OperandTraits<InsertElementInst>::op_begin(this),
+                3, InsertAE) {
+  assert(isValidOperands(Vec, Elt, Index) &&
+         "Invalid insertelement instruction operands!");
+
+  Op<0>() = Vec;
+  Op<1>() = Elt;
+  Op<2>() = Index;
+  setName(Name);
+}
+
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, 
+                                        const Value *Index) {
+  if (!Vec->getType()->isVectorTy())
+    return false;   // First operand of insertelement must be vector type.
+  
+  if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
+    return false;// Second operand of insertelement must be vector element type.
+    
+  if (!Index->getType()->isIntegerTy(32))
+    return false;  // Third operand of insertelement must be i32.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      ShuffleVectorInst Implementation
+//===----------------------------------------------------------------------===//
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                                     const Twine &Name,
+                                     Instruction *InsertBefore)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertBefore) {
+  assert(isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector instruction operands!");
+  Op<0>() = V1;
+  Op<1>() = V2;
+  Op<2>() = Mask;
+  setName(Name);
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAtEnd)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertAtEnd) {
+  assert(isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector instruction operands!");
+
+  Op<0>() = V1;
+  Op<1>() = V2;
+  Op<2>() = Mask;
+  setName(Name);
+}
+
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
+                                        const Value *Mask) {
+  // V1 and V2 must be vectors of the same type.
+  if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
+    return false;
+  
+  // Mask must be vector of i32.
+  VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+  if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+    return false;
+
+  // Check to see if Mask is valid.
+  if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask))
+    return true;
+
+  if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+    unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+    for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+        if (CI->uge(V1Size*2))
+          return false;
+      } else if (!isa<UndefValue>(MV->getOperand(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+  
+  if (const ConstantDataSequential *CDS =
+        dyn_cast<ConstantDataSequential>(Mask)) {
+    unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+    for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
+      if (CDS->getElementAsInteger(i) >= V1Size*2)
+        return false;
+    return true;
+  }
+  
+  // The bitcode reader can create a place holder for a forward reference
+  // used as the shuffle mask. When this occurs, the shuffle mask will
+  // fall into this case and fail. To avoid this error, do this bit of
+  // ugliness to allow such a mask pass.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask))
+    if (CE->getOpcode() == Instruction::UserOp1)
+      return true;
+
+  return false;
+}
+
+/// getMaskValue - Return the index from the shuffle mask for the specified
+/// output result.  This is either -1 if the element is undef or a number less
+/// than 2*numelements.
+int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
+  assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
+  if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
+    return CDS->getElementAsInteger(i);
+  Constant *C = Mask->getAggregateElement(i);
+  if (isa<UndefValue>(C))
+    return -1;
+  return cast<ConstantInt>(C)->getZExtValue();
+}
+
+/// getShuffleMask - Return the full mask for this instruction, where each
+/// element is the element number and undef's are returned as -1.
+void ShuffleVectorInst::getShuffleMask(Constant *Mask,
+                                       SmallVectorImpl<int> &Result) {
+  unsigned NumElts = Mask->getType()->getVectorNumElements();
+  
+  if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Result.push_back(CDS->getElementAsInteger(i));
+    return;
+  }    
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *C = Mask->getAggregateElement(i);
+    Result.push_back(isa<UndefValue>(C) ? -1 :
+                     cast<ConstantInt>(C)->getZExtValue());
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                             InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, 
+                           const Twine &Name) {
+  assert(NumOperands == 2 && "NumOperands not initialized?");
+
+  // There's no fundamental reason why we require at least one index
+  // (other than weirdness with &*IdxBegin being invalid; see
+  // getelementptr's init routine for example). But there's no
+  // present need to support it.
+  assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
+
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
+         Val->getType() && "Inserted value must match indexed type!");
+  Op<0>() = Agg;
+  Op<1>() = Val;
+
+  Indices.append(Idxs.begin(), Idxs.end());
+  setName(Name);
+}
+
+InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
+  : Instruction(IVI.getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this), 2),
+    Indices(IVI.Indices) {
+  Op<0>() = IVI.getOperand(0);
+  Op<1>() = IVI.getOperand(1);
+  SubclassOptionalData = IVI.SubclassOptionalData;
+}
+
+//===----------------------------------------------------------------------===//
+//                             ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
+  assert(NumOperands == 1 && "NumOperands not initialized?");
+
+  // There's no fundamental reason why we require at least one index.
+  // But there's no present need to support it.
+  assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
+
+  Indices.append(Idxs.begin(), Idxs.end());
+  setName(Name);
+}
+
+ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
+  : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
+    Indices(EVI.Indices) {
+  SubclassOptionalData = EVI.SubclassOptionalData;
+}
+
+// getIndexedType - Returns the type of the element that would be extracted
+// with an extractvalue instruction with the specified parameters.
+//
+// A null type is returned if the indices are invalid for the specified
+// pointer type.
+//
+Type *ExtractValueInst::getIndexedType(Type *Agg,
+                                       ArrayRef<unsigned> Idxs) {
+  for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
+    unsigned Index = Idxs[CurIdx];
+    // We can't use CompositeType::indexValid(Index) here.
+    // indexValid() always returns true for arrays because getelementptr allows
+    // out-of-bounds indices. Since we don't allow those for extractvalue and
+    // insertvalue we need to check array indexing manually.
+    // Since the only other types we can index into are struct types it's just
+    // as easy to check those manually as well.
+    if (ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+      if (Index >= AT->getNumElements())
+        return 0;
+    } else if (StructType *ST = dyn_cast<StructType>(Agg)) {
+      if (Index >= ST->getNumElements())
+        return 0;
+    } else {
+      // Not a valid type to index into.
+      return 0;
+    }
+
+    Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
+  }
+  return const_cast<Type*>(Agg);
+}
+
+//===----------------------------------------------------------------------===//
+//                             BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+                               Type *Ty, const Twine &Name,
+                               Instruction *InsertBefore)
+  : Instruction(Ty, iType,
+                OperandTraits<BinaryOperator>::op_begin(this),
+                OperandTraits<BinaryOperator>::operands(this),
+                InsertBefore) {
+  Op<0>() = S1;
+  Op<1>() = S2;
+  init(iType);
+  setName(Name);
+}
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
+                               Type *Ty, const Twine &Name,
+                               BasicBlock *InsertAtEnd)
+  : Instruction(Ty, iType,
+                OperandTraits<BinaryOperator>::op_begin(this),
+                OperandTraits<BinaryOperator>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = S1;
+  Op<1>() = S2;
+  init(iType);
+  setName(Name);
+}
+
+
+void BinaryOperator::init(BinaryOps iType) {
+  Value *LHS = getOperand(0), *RHS = getOperand(1);
+  (void)LHS; (void)RHS; // Silence warnings.
+  assert(LHS->getType() == RHS->getType() &&
+         "Binary operator operand types must match!");
+#ifndef NDEBUG
+  switch (iType) {
+  case Add: case Sub:
+  case Mul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isIntOrIntVectorTy() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case FAdd: case FSub:
+  case FMul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  case UDiv: 
+  case SDiv: 
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Incorrect operand type (not integer) for S/UDIV");
+    break;
+  case FDiv:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Incorrect operand type (not floating point) for FDIV");
+    break;
+  case URem: 
+  case SRem: 
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Incorrect operand type (not integer) for S/UREM");
+    break;
+  case FRem:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Incorrect operand type (not floating point) for FREM");
+    break;
+  case Shl:
+  case LShr:
+  case AShr:
+    assert(getType() == LHS->getType() &&
+           "Shift operation should return same type as operands!");
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Tried to create a shift operation on a non-integral type!");
+    break;
+  case And: case Or:
+  case Xor:
+    assert(getType() == LHS->getType() &&
+           "Logical operation should return same type as operands!");
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Tried to create a logical operation on a non-integral type!");
+    break;
+  default:
+    break;
+  }
+#endif
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+                                       const Twine &Name,
+                                       Instruction *InsertBefore) {
+  assert(S1->getType() == S2->getType() &&
+         "Cannot create binary operator with two operands of differing type!");
+  return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+                                       const Twine &Name,
+                                       BasicBlock *InsertAtEnd) {
+  BinaryOperator *Res = Create(Op, S1, S2, Name);
+  InsertAtEnd->getInstList().push_back(Res);
+  return Res;
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+                                          Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::Sub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+                                          BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::Sub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+                                           Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::FSub, zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+                                           BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::FSub, zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+                                          Instruction *InsertBefore) {
+  Constant *C = Constant::getAllOnesValue(Op->getType());
+  return new BinaryOperator(Instruction::Xor, Op, C,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+                                          BasicBlock *InsertAtEnd) {
+  Constant *AllOnes = Constant::getAllOnesValue(Op->getType());
+  return new BinaryOperator(Instruction::Xor, Op, AllOnes,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+
+// isConstantAllOnes - Helper function for several functions below
+static inline bool isConstantAllOnes(const Value *V) {
+  if (const Constant *C = dyn_cast<Constant>(V))
+    return C->isAllOnesValue();
+  return false;
+}
+
+bool BinaryOperator::isNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::Sub)
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
+  return false;
+}
+
+bool BinaryOperator::isFNeg(const Value *V, bool IgnoreZeroSign) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::FSub)
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0))) {
+        if (!IgnoreZeroSign)
+          IgnoreZeroSign = cast<Instruction>(V)->hasNoSignedZeros();
+        return !IgnoreZeroSign ? C->isNegativeZeroValue() : C->isZeroValue();
+      }
+  return false;
+}
+
+bool BinaryOperator::isNot(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    return (Bop->getOpcode() == Instruction::Xor &&
+            (isConstantAllOnes(Bop->getOperand(1)) ||
+             isConstantAllOnes(Bop->getOperand(0))));
+  return false;
+}
+
+Value *BinaryOperator::getNegArgument(Value *BinOp) {
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
+  return getNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+  return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getNotArgument(Value *BinOp) {
+  assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
+  BinaryOperator *BO = cast<BinaryOperator>(BinOp);
+  Value *Op0 = BO->getOperand(0);
+  Value *Op1 = BO->getOperand(1);
+  if (isConstantAllOnes(Op0)) return Op1;
+
+  assert(isConstantAllOnes(Op1));
+  return Op0;
+}
+
+const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
+  return getNotArgument(const_cast<Value*>(BinOp));
+}
+
+
+// swapOperands - Exchange the two operands to this instruction.  This
+// instruction is safe to use on any binary instruction and does not
+// modify the semantics of the instruction.  If the instruction is
+// order dependent (SetLT f.e.) the opcode is changed.
+//
+bool BinaryOperator::swapOperands() {
+  if (!isCommutative())
+    return true; // Can't commute operands
+  Op<0>().swap(Op<1>());
+  return false;
+}
+
+void BinaryOperator::setHasNoUnsignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
+}
+
+void BinaryOperator::setHasNoSignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
+}
+
+void BinaryOperator::setIsExact(bool b) {
+  cast<PossiblyExactOperator>(this)->setIsExact(b);
+}
+
+bool BinaryOperator::hasNoUnsignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
+}
+
+bool BinaryOperator::hasNoSignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
+}
+
+bool BinaryOperator::isExact() const {
+  return cast<PossiblyExactOperator>(this)->isExact();
+}
+
+//===----------------------------------------------------------------------===//
+//                             FPMathOperator Class
+//===----------------------------------------------------------------------===//
+
+/// getFPAccuracy - Get the maximum error permitted by this operation in ULPs.
+/// An accuracy of 0.0 means that the operation should be performed with the
+/// default precision.
+float FPMathOperator::getFPAccuracy() const {
+  const MDNode *MD =
+    cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath);
+  if (!MD)
+    return 0.0;
+  ConstantFP *Accuracy = cast<ConstantFP>(MD->getOperand(0));
+  return Accuracy->getValueAPF().convertToFloat();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                CastInst Class
+//===----------------------------------------------------------------------===//
+
+void CastInst::anchor() {}
+
+// Just determine if this cast only deals with integral->integral conversion.
+bool CastInst::isIntegerCast() const {
+  switch (getOpcode()) {
+    default: return false;
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::Trunc:
+      return true;
+    case Instruction::BitCast:
+      return getOperand(0)->getType()->isIntegerTy() &&
+        getType()->isIntegerTy();
+  }
+}
+
+bool CastInst::isLosslessCast() const {
+  // Only BitCast can be lossless, exit fast if we're not BitCast
+  if (getOpcode() != Instruction::BitCast)
+    return false;
+
+  // Identity cast is always lossless
+  Type* SrcTy = getOperand(0)->getType();
+  Type* DstTy = getType();
+  if (SrcTy == DstTy)
+    return true;
+  
+  // Pointer to pointer is always lossless.
+  if (SrcTy->isPointerTy())
+    return DstTy->isPointerTy();
+  return false;  // Other types have no identity values
+}
+
+/// This function determines if the CastInst does not require any bits to be
+/// changed in order to effect the cast. Essentially, it identifies cases where
+/// no code gen is necessary for the cast, hence the name no-op cast.  For 
+/// example, the following are all no-op casts:
+/// # bitcast i32* %x to i8*
+/// # bitcast <2 x i32> %x to <4 x i16> 
+/// # ptrtoint i32* %x to i32     ; on 32-bit plaforms only
+/// @brief Determine if the described cast is a no-op.
+bool CastInst::isNoopCast(Instruction::CastOps Opcode,
+                          Type *SrcTy,
+                          Type *DestTy,
+                          Type *IntPtrTy) {
+  switch (Opcode) {
+    default: llvm_unreachable("Invalid CastOp");
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt: 
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      return false; // These always modify bits
+    case Instruction::BitCast:
+      return true;  // BitCast never modifies bits.
+    case Instruction::PtrToInt:
+      return IntPtrTy->getScalarSizeInBits() ==
+             DestTy->getScalarSizeInBits();
+    case Instruction::IntToPtr:
+      return IntPtrTy->getScalarSizeInBits() ==
+             SrcTy->getScalarSizeInBits();
+  }
+}
+
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(Type *IntPtrTy) const {
+  return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
+/// This function determines if a pair of casts can be eliminated and what 
+/// opcode should be used in the elimination. This assumes that there are two 
+/// instructions like this:
+/// *  %F = firstOpcode SrcTy %x to MidTy
+/// *  %S = secondOpcode MidTy %F to DstTy
+/// The function returns a resultOpcode so these two casts can be replaced with:
+/// *  %Replacement = resultOpcode %SrcTy %x to DstTy
+/// If no such cast is permited, the function returns 0.
+unsigned CastInst::isEliminableCastPair(
+  Instruction::CastOps firstOp, Instruction::CastOps secondOp,
+  Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy,
+  Type *DstIntPtrTy) {
+  // Define the 144 possibilities for these two cast instructions. The values
+  // in this matrix determine what to do in a given situation and select the
+  // case in the switch below.  The rows correspond to firstOp, the columns 
+  // correspond to secondOp.  In looking at the table below, keep in  mind
+  // the following cast properties:
+  //
+  //          Size Compare       Source               Destination
+  // Operator  Src ? Size   Type       Sign         Type       Sign
+  // -------- ------------ -------------------   ---------------------
+  // TRUNC         >       Integer      Any        Integral     Any
+  // ZEXT          <       Integral   Unsigned     Integer      Any
+  // SEXT          <       Integral    Signed      Integer      Any
+  // FPTOUI       n/a      FloatPt      n/a        Integral   Unsigned
+  // FPTOSI       n/a      FloatPt      n/a        Integral    Signed 
+  // UITOFP       n/a      Integral   Unsigned     FloatPt      n/a   
+  // SITOFP       n/a      Integral    Signed      FloatPt      n/a   
+  // FPTRUNC       >       FloatPt      n/a        FloatPt      n/a   
+  // FPEXT         <       FloatPt      n/a        FloatPt      n/a   
+  // PTRTOINT     n/a      Pointer      n/a        Integral   Unsigned
+  // INTTOPTR     n/a      Integral   Unsigned     Pointer      n/a
+  // BITCAST       =       FirstClass   n/a       FirstClass    n/a   
+  //
+  // NOTE: some transforms are safe, but we consider them to be non-profitable.
+  // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+  // into "fptoui double to i64", but this loses information about the range
+  // of the produced value (we no longer know the top-part is all zeros). 
+  // Further this conversion is often much more expensive for typical hardware,
+  // and causes issues when building libgcc.  We disallow fptosi+sext for the 
+  // same reason.
+  const unsigned numCastOps = 
+    Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+  static const uint8_t CastResults[numCastOps][numCastOps] = {
+    // T        F  F  U  S  F  F  P  I  B   -+
+    // R  Z  S  P  P  I  I  T  P  2  N  T    |
+    // U  E  E  2  2  2  2  R  E  I  T  C    +- secondOp
+    // N  X  X  U  S  F  F  N  X  N  2  V    |
+    // C  T  T  I  I  P  P  C  T  T  P  T   -+
+    {  1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc      -+
+    {  8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt        |
+    {  8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt        |
+    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI      |
+    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI      |
+    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP      +- firstOp
+    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP      |
+    { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc     |
+    { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt       |
+    {  1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt    |
+    { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr    |
+    {  5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast    -+
+  };
+  
+  // If either of the casts are a bitcast from scalar to vector, disallow the
+  // merging. However, bitcast of A->B->A are allowed.
+  bool isFirstBitcast  = (firstOp == Instruction::BitCast);
+  bool isSecondBitcast = (secondOp == Instruction::BitCast);
+  bool chainedBitcast  = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast);
+
+  // Check if any of the bitcasts convert scalars<->vectors.
+  if ((isFirstBitcast  && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+      (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+    // Unless we are bitcasing to the original type, disallow optimizations.
+    if (!chainedBitcast) return 0;
+
+  int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
+                            [secondOp-Instruction::CastOpsBegin];
+  switch (ElimCase) {
+    case 0: 
+      // categorically disallowed
+      return 0;
+    case 1: 
+      // allowed, use first cast's opcode
+      return firstOp;
+    case 2: 
+      // allowed, use second cast's opcode
+      return secondOp;
+    case 3: 
+      // no-op cast in second op implies firstOp as long as the DestTy 
+      // is integer and we are not converting between a vector and a
+      // non vector type.
+      if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
+        return firstOp;
+      return 0;
+    case 4:
+      // no-op cast in second op implies firstOp as long as the DestTy
+      // is floating point.
+      if (DstTy->isFloatingPointTy())
+        return firstOp;
+      return 0;
+    case 5: 
+      // no-op cast in first op implies secondOp as long as the SrcTy
+      // is an integer.
+      if (SrcTy->isIntegerTy())
+        return secondOp;
+      return 0;
+    case 6:
+      // no-op cast in first op implies secondOp as long as the SrcTy
+      // is a floating point.
+      if (SrcTy->isFloatingPointTy())
+        return secondOp;
+      return 0;
+    case 7: { 
+      // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+      if (!SrcIntPtrTy || DstIntPtrTy != SrcIntPtrTy)
+        return 0;
+      unsigned PtrSize = SrcIntPtrTy->getScalarSizeInBits();
+      unsigned MidSize = MidTy->getScalarSizeInBits();
+      if (MidSize >= PtrSize)
+        return Instruction::BitCast;
+      return 0;
+    }
+    case 8: {
+      // ext, trunc -> bitcast,    if the SrcTy and DstTy are same size
+      // ext, trunc -> ext,        if sizeof(SrcTy) < sizeof(DstTy)
+      // ext, trunc -> trunc,      if sizeof(SrcTy) > sizeof(DstTy)
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
+      if (SrcSize == DstSize)
+        return Instruction::BitCast;
+      else if (SrcSize < DstSize)
+        return firstOp;
+      return secondOp;
+    }
+    case 9: // zext, sext -> zext, because sext can't sign extend after zext
+      return Instruction::ZExt;
+    case 10:
+      // fpext followed by ftrunc is allowed if the bit size returned to is
+      // the same as the original, in which case its just a bitcast
+      if (SrcTy == DstTy)
+        return Instruction::BitCast;
+      return 0; // If the types are not the same we can't eliminate it.
+    case 11:
+      // bitcast followed by ptrtoint is allowed as long as the bitcast
+      // is a pointer to pointer cast.
+      if (SrcTy->isPointerTy() && MidTy->isPointerTy())
+        return secondOp;
+      return 0;
+    case 12:
+      // inttoptr, bitcast -> intptr  if bitcast is a ptr to ptr cast
+      if (MidTy->isPointerTy() && DstTy->isPointerTy())
+        return firstOp;
+      return 0;
+    case 13: {
+      // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+      if (!MidIntPtrTy)
+        return 0;
+      unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
+      if (SrcSize <= PtrSize && SrcSize == DstSize)
+        return Instruction::BitCast;
+      return 0;
+    }
+    case 99: 
+      // cast combination can't happen (error in input). This is for all cases
+      // where the MidTy is not the same for the two cast instructions.
+      llvm_unreachable("Invalid Cast Combination");
+    default:
+      llvm_unreachable("Error in CastResults table!!!");
+  }
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, 
+  const Twine &Name, Instruction *InsertBefore) {
+  assert(castIsValid(op, S, Ty) && "Invalid cast!");
+  // Construct and return the appropriate CastInst subclass
+  switch (op) {
+    case Trunc:    return new TruncInst    (S, Ty, Name, InsertBefore);
+    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertBefore);
+    case SExt:     return new SExtInst     (S, Ty, Name, InsertBefore);
+    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertBefore);
+    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertBefore);
+    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertBefore);
+    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertBefore);
+    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertBefore);
+    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertBefore);
+    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertBefore);
+    default: llvm_unreachable("Invalid opcode provided");
+  }
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
+  const Twine &Name, BasicBlock *InsertAtEnd) {
+  assert(castIsValid(op, S, Ty) && "Invalid cast!");
+  // Construct and return the appropriate CastInst subclass
+  switch (op) {
+    case Trunc:    return new TruncInst    (S, Ty, Name, InsertAtEnd);
+    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertAtEnd);
+    case SExt:     return new SExtInst     (S, Ty, Name, InsertAtEnd);
+    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertAtEnd);
+    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertAtEnd);
+    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertAtEnd);
+    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertAtEnd);
+    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertAtEnd);
+    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertAtEnd);
+    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertAtEnd);
+    default: llvm_unreachable("Invalid opcode provided");
+  }
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, 
+                                        const Twine &Name,
+                                        Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, 
+                                        const Twine &Name,
+                                        BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, 
+                                        const Twine &Name,
+                                        Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, 
+                                        const Twine &Name,
+                                        BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
+                                         const Twine &Name,
+                                         Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
+                                         const Twine &Name, 
+                                         BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
+                                      const Twine &Name,
+                                      BasicBlock *InsertAtEnd) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+/// @brief Create a BitCast or a PtrToInt cast instruction
+CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, 
+                                      const Twine &Name, 
+                                      Instruction *InsertBefore) {
+  assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast");
+  assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) &&
+         "Invalid cast");
+
+  if (Ty->isIntOrIntVectorTy())
+    return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+  return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, 
+                                      bool isSigned, const Twine &Name,
+                                      Instruction *InsertBefore) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Invalid integer cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, 
+                                      bool isSigned, const Twine &Name,
+                                      BasicBlock *InsertAtEnd) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, 
+                                 const Twine &Name, 
+                                 Instruction *InsertBefore) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+  return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, 
+                                 const Twine &Name, 
+                                 BasicBlock *InsertAtEnd) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+  return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+// Check whether it is valid to call getCastOpcode for these types.
+// This routine must be kept in sync with getCastOpcode.
+bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
+  if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
+    return false;
+
+  if (SrcTy == DestTy)
+    return true;
+
+  if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+    if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+      if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+        // An element by element cast.  Valid if casting the elements is valid.
+        SrcTy = SrcVecTy->getElementType();
+        DestTy = DestVecTy->getElementType();
+      }
+
+  // Get the bit sizes, we'll need these
+  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+  // Run through the possibilities ...
+  if (DestTy->isIntegerTy()) {               // Casting to integral
+    if (SrcTy->isIntegerTy()) {                // Casting from integral
+        return true;
+    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
+      return true;
+    } else if (SrcTy->isVectorTy()) {          // Casting from vector
+      return DestBits == SrcBits;
+    } else {                                   // Casting from something else
+      return SrcTy->isPointerTy();
+    }
+  } else if (DestTy->isFloatingPointTy()) {  // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                // Casting from integral
+      return true;
+    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
+      return true;
+    } else if (SrcTy->isVectorTy()) {          // Casting from vector
+      return DestBits == SrcBits;
+    } else {                                   // Casting from something else
+      return false;
+    }
+  } else if (DestTy->isVectorTy()) {         // Casting to vector
+    return DestBits == SrcBits;
+  } else if (DestTy->isPointerTy()) {        // Casting to pointer
+    if (SrcTy->isPointerTy()) {                // Casting from pointer
+      return true;
+    } else if (SrcTy->isIntegerTy()) {         // Casting from integral
+      return true;
+    } else {                                   // Casting from something else
+      return false;
+    }
+  } else if (DestTy->isX86_MMXTy()) {
+    if (SrcTy->isVectorTy()) {
+      return DestBits == SrcBits;       // 64-bit vector to MMX
+    } else {
+      return false;
+    }
+  } else {                                   // Casting to something else
+    return false;
+  }
+}
+
+// Provide a way to get a "cast" where the cast opcode is inferred from the 
+// types and size of the operand. This, basically, is a parallel of the 
+// logic in the castIsValid function below.  This axiom should hold:
+//   castIsValid( getCastOpcode(Val, Ty), Val, Ty)
+// should not assert in castIsValid. In other words, this produces a "correct"
+// casting opcode for the arguments passed to it.
+// This routine must be kept in sync with isCastable.
+Instruction::CastOps
+CastInst::getCastOpcode(
+  const Value *Src, bool SrcIsSigned, Type *DestTy, bool DestIsSigned) {
+  Type *SrcTy = Src->getType();
+
+  assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
+         "Only first class types are castable!");
+
+  if (SrcTy == DestTy)
+    return BitCast;
+
+  if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
+    if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
+      if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
+        // An element by element cast.  Find the appropriate opcode based on the
+        // element types.
+        SrcTy = SrcVecTy->getElementType();
+        DestTy = DestVecTy->getElementType();
+      }
+
+  // Get the bit sizes, we'll need these
+  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
+
+  // Run through the possibilities ...
+  if (DestTy->isIntegerTy()) {                      // Casting to integral
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
+      if (DestBits < SrcBits)
+        return Trunc;                               // int -> smaller int
+      else if (DestBits > SrcBits) {                // its an extension
+        if (SrcIsSigned)
+          return SExt;                              // signed -> SEXT
+        else
+          return ZExt;                              // unsigned -> ZEXT
+      } else {
+        return BitCast;                             // Same size, No-op cast
+      }
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
+      if (DestIsSigned) 
+        return FPToSI;                              // FP -> sint
+      else
+        return FPToUI;                              // FP -> uint 
+    } else if (SrcTy->isVectorTy()) {
+      assert(DestBits == SrcBits &&
+             "Casting vector to integer of different width");
+      return BitCast;                             // Same size, no-op cast
+    } else {
+      assert(SrcTy->isPointerTy() &&
+             "Casting from a value that is not first-class type");
+      return PtrToInt;                              // ptr -> int
+    }
+  } else if (DestTy->isFloatingPointTy()) {         // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
+      if (SrcIsSigned)
+        return SIToFP;                              // sint -> FP
+      else
+        return UIToFP;                              // uint -> FP
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
+      if (DestBits < SrcBits) {
+        return FPTrunc;                             // FP -> smaller FP
+      } else if (DestBits > SrcBits) {
+        return FPExt;                               // FP -> larger FP
+      } else  {
+        return BitCast;                             // same size, no-op cast
+      }
+    } else if (SrcTy->isVectorTy()) {
+      assert(DestBits == SrcBits &&
+             "Casting vector to floating point of different width");
+      return BitCast;                             // same size, no-op cast
+    }
+    llvm_unreachable("Casting pointer or non-first class to float");
+  } else if (DestTy->isVectorTy()) {
+    assert(DestBits == SrcBits &&
+           "Illegal cast to vector (wrong type or size)");
+    return BitCast;
+  } else if (DestTy->isPointerTy()) {
+    if (SrcTy->isPointerTy()) {
+      return BitCast;                               // ptr -> ptr
+    } else if (SrcTy->isIntegerTy()) {
+      return IntToPtr;                              // int -> ptr
+    }
+    llvm_unreachable("Casting pointer to other than pointer or int");
+  } else if (DestTy->isX86_MMXTy()) {
+    if (SrcTy->isVectorTy()) {
+      assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
+      return BitCast;                               // 64-bit vector to MMX
+    }
+    llvm_unreachable("Illegal cast to X86_MMX");
+  }
+  llvm_unreachable("Casting to type that is not first-class");
+}
+
+//===----------------------------------------------------------------------===//
+//                    CastInst SubClass Constructors
+//===----------------------------------------------------------------------===//
+
+/// Check that the construction parameters for a CastInst are correct. This
+/// could be broken out into the separate constructors but it is useful to have
+/// it in one place and to eliminate the redundant code for getting the sizes
+/// of the types involved.
+bool 
+CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
+
+  // Check for type sanity on the arguments
+  Type *SrcTy = S->getType();
+
+  // If this is a cast to the same type then it's trivially true.
+  if (SrcTy == DstTy)
+    return true;
+
+  if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
+      SrcTy->isAggregateType() || DstTy->isAggregateType())
+    return false;
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DstBitSize = DstTy->getScalarSizeInBits();
+
+  // If these are vector types, get the lengths of the vectors (using zero for
+  // scalar types means that checking that vector lengths match also checks that
+  // scalars are not being converted to vectors or vectors to scalars).
+  unsigned SrcLength = SrcTy->isVectorTy() ?
+    cast<VectorType>(SrcTy)->getNumElements() : 0;
+  unsigned DstLength = DstTy->isVectorTy() ?
+    cast<VectorType>(DstTy)->getNumElements() : 0;
+
+  // Switch on the opcode provided
+  switch (op) {
+  default: return false; // This is an input error
+  case Instruction::Trunc:
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength && SrcBitSize > DstBitSize;
+  case Instruction::ZExt:
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength && SrcBitSize < DstBitSize;
+  case Instruction::SExt: 
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength && SrcBitSize < DstBitSize;
+  case Instruction::FPTrunc:
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+      SrcLength == DstLength && SrcBitSize > DstBitSize;
+  case Instruction::FPExt:
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
+      SrcLength == DstLength && SrcBitSize < DstBitSize;
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy() &&
+      SrcLength == DstLength;
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
+      SrcLength == DstLength;
+  case Instruction::PtrToInt:
+    if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+      return false;
+    if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+      if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+        return false;
+    return SrcTy->getScalarType()->isPointerTy() &&
+           DstTy->getScalarType()->isIntegerTy();
+  case Instruction::IntToPtr:
+    if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+      return false;
+    if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+      if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+        return false;
+    return SrcTy->getScalarType()->isIntegerTy() &&
+           DstTy->getScalarType()->isPointerTy();
+  case Instruction::BitCast:
+    // BitCast implies a no-op cast of type only. No bits change.
+    // However, you can't cast pointers to anything but pointers.
+    if (SrcTy->isPointerTy() != DstTy->isPointerTy())
+      return false;
+
+    // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
+    // these cases, the cast is okay if the source and destination bit widths
+    // are identical.
+    return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+  }
+}
+
+TruncInst::TruncInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+TruncInst::TruncInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+ZExtInst::ZExtInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+)  : CastInst(Ty, ZExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+
+ZExtInst::ZExtInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+)  : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+SExtInst::SExtInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+SExtInst::SExtInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+)  : CastInst(Ty, SExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+FPTruncInst::FPTruncInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPTruncInst::FPTruncInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPExtInst::FPExtInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+FPExtInst::FPExtInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+UIToFPInst::UIToFPInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+UIToFPInst::UIToFPInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+FPToUIInst::FPToUIInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToUIInst::FPToUIInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToSIInst::FPToSIInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+FPToSIInst::FPToSIInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+PtrToIntInst::PtrToIntInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+PtrToIntInst::PtrToIntInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+IntToPtrInst::IntToPtrInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+IntToPtrInst::IntToPtrInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+BitCastInst::BitCastInst(
+  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+BitCastInst::BitCastInst(
+  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+//===----------------------------------------------------------------------===//
+//                               CmpInst Classes
+//===----------------------------------------------------------------------===//
+
+void CmpInst::anchor() {}
+
+CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
+                 Value *LHS, Value *RHS, const Twine &Name,
+                 Instruction *InsertBefore)
+  : Instruction(ty, op,
+                OperandTraits<CmpInst>::op_begin(this),
+                OperandTraits<CmpInst>::operands(this),
+                InsertBefore) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  setPredicate((Predicate)predicate);
+  setName(Name);
+}
+
+CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
+                 Value *LHS, Value *RHS, const Twine &Name,
+                 BasicBlock *InsertAtEnd)
+  : Instruction(ty, op,
+                OperandTraits<CmpInst>::op_begin(this),
+                OperandTraits<CmpInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = LHS;
+  Op<1>() = RHS;
+  setPredicate((Predicate)predicate);
+  setName(Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate,
+                Value *S1, Value *S2, 
+                const Twine &Name, Instruction *InsertBefore) {
+  if (Op == Instruction::ICmp) {
+    if (InsertBefore)
+      return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+    else
+      return new ICmpInst(CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+  }
+  
+  if (InsertBefore)
+    return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  else
+    return new FCmpInst(CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
+                const Twine &Name, BasicBlock *InsertAtEnd) {
+  if (Op == Instruction::ICmp) {
+    return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  }
+  return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                      S1, S2, Name);
+}
+
+void CmpInst::swapOperands() {
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    IC->swapOperands();
+  else
+    cast<FCmpInst>(this)->swapOperands();
+}
+
+bool CmpInst::isCommutative() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    return IC->isCommutative();
+  return cast<FCmpInst>(this)->isCommutative();
+}
+
+bool CmpInst::isEquality() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    return IC->isEquality();
+  return cast<FCmpInst>(this)->isEquality();
+}
+
+
+CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
+  switch (pred) {
+    default: llvm_unreachable("Unknown cmp predicate!");
+    case ICMP_EQ: return ICMP_NE;
+    case ICMP_NE: return ICMP_EQ;
+    case ICMP_UGT: return ICMP_ULE;
+    case ICMP_ULT: return ICMP_UGE;
+    case ICMP_UGE: return ICMP_ULT;
+    case ICMP_ULE: return ICMP_UGT;
+    case ICMP_SGT: return ICMP_SLE;
+    case ICMP_SLT: return ICMP_SGE;
+    case ICMP_SGE: return ICMP_SLT;
+    case ICMP_SLE: return ICMP_SGT;
+
+    case FCMP_OEQ: return FCMP_UNE;
+    case FCMP_ONE: return FCMP_UEQ;
+    case FCMP_OGT: return FCMP_ULE;
+    case FCMP_OLT: return FCMP_UGE;
+    case FCMP_OGE: return FCMP_ULT;
+    case FCMP_OLE: return FCMP_UGT;
+    case FCMP_UEQ: return FCMP_ONE;
+    case FCMP_UNE: return FCMP_OEQ;
+    case FCMP_UGT: return FCMP_OLE;
+    case FCMP_ULT: return FCMP_OGE;
+    case FCMP_UGE: return FCMP_OLT;
+    case FCMP_ULE: return FCMP_OGT;
+    case FCMP_ORD: return FCMP_UNO;
+    case FCMP_UNO: return FCMP_ORD;
+    case FCMP_TRUE: return FCMP_FALSE;
+    case FCMP_FALSE: return FCMP_TRUE;
+  }
+}
+
+ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
+  switch (pred) {
+    default: llvm_unreachable("Unknown icmp predicate!");
+    case ICMP_EQ: case ICMP_NE: 
+    case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: 
+       return pred;
+    case ICMP_UGT: return ICMP_SGT;
+    case ICMP_ULT: return ICMP_SLT;
+    case ICMP_UGE: return ICMP_SGE;
+    case ICMP_ULE: return ICMP_SLE;
+  }
+}
+
+ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
+  switch (pred) {
+    default: llvm_unreachable("Unknown icmp predicate!");
+    case ICMP_EQ: case ICMP_NE: 
+    case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: 
+       return pred;
+    case ICMP_SGT: return ICMP_UGT;
+    case ICMP_SLT: return ICMP_ULT;
+    case ICMP_SGE: return ICMP_UGE;
+    case ICMP_SLE: return ICMP_ULE;
+  }
+}
+
+/// Initialize a set of values that all satisfy the condition with C.
+///
+ConstantRange 
+ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
+  APInt Lower(C);
+  APInt Upper(C);
+  uint32_t BitWidth = C.getBitWidth();
+  switch (pred) {
+  default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
+  case ICmpInst::ICMP_EQ: ++Upper; break;
+  case ICmpInst::ICMP_NE: ++Lower; break;
+  case ICmpInst::ICMP_ULT:
+    Lower = APInt::getMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SLT:
+    Lower = APInt::getSignedMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_UGT: 
+    ++Lower; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SGT:
+    ++Lower; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_ULE: 
+    Lower = APInt::getMinValue(BitWidth); ++Upper; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_SLE: 
+    Lower = APInt::getSignedMinValue(BitWidth); ++Upper; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_UGE:
+    Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_SGE:
+    Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  }
+  return ConstantRange(Lower, Upper);
+}
+
+CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
+  switch (pred) {
+    default: llvm_unreachable("Unknown cmp predicate!");
+    case ICMP_EQ: case ICMP_NE:
+      return pred;
+    case ICMP_SGT: return ICMP_SLT;
+    case ICMP_SLT: return ICMP_SGT;
+    case ICMP_SGE: return ICMP_SLE;
+    case ICMP_SLE: return ICMP_SGE;
+    case ICMP_UGT: return ICMP_ULT;
+    case ICMP_ULT: return ICMP_UGT;
+    case ICMP_UGE: return ICMP_ULE;
+    case ICMP_ULE: return ICMP_UGE;
+  
+    case FCMP_FALSE: case FCMP_TRUE:
+    case FCMP_OEQ: case FCMP_ONE:
+    case FCMP_UEQ: case FCMP_UNE:
+    case FCMP_ORD: case FCMP_UNO:
+      return pred;
+    case FCMP_OGT: return FCMP_OLT;
+    case FCMP_OLT: return FCMP_OGT;
+    case FCMP_OGE: return FCMP_OLE;
+    case FCMP_OLE: return FCMP_OGE;
+    case FCMP_UGT: return FCMP_ULT;
+    case FCMP_ULT: return FCMP_UGT;
+    case FCMP_UGE: return FCMP_ULE;
+    case FCMP_ULE: return FCMP_UGE;
+  }
+}
+
+bool CmpInst::isUnsigned(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: 
+    case ICmpInst::ICMP_UGE: return true;
+  }
+}
+
+bool CmpInst::isSigned(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: 
+    case ICmpInst::ICMP_SGE: return true;
+  }
+}
+
+bool CmpInst::isOrdered(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: 
+    case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE: 
+    case FCmpInst::FCMP_ORD: return true;
+  }
+}
+      
+bool CmpInst::isUnordered(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: 
+    case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE: 
+    case FCmpInst::FCMP_UNO: return true;
+  }
+}
+
+bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+    default: return false;
+    case ICMP_EQ:   case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
+    case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
+  }
+}
+
+bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+  case ICMP_NE:    case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
+  case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
+  default: return false;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+  assert(Value && Default && NumReserved);
+  ReservedSpace = NumReserved;
+  NumOperands = 2;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  OperandList[0] = Value;
+  OperandList[1] = Default;
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination.  The number of additional cases can
+/// be specified here to make memory allocation more efficient.  This
+/// constructor can also autoinsert before another instruction.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+                       Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertBefore) {
+  init(Value, Default, 2+NumCases*2);
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination.  The number of additional cases can
+/// be specified here to make memory allocation more efficient.  This
+/// constructor also autoinserts at the end of the specified BasicBlock.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+                       BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertAtEnd) {
+  init(Value, Default, 2+NumCases*2);
+}
+
+SwitchInst::SwitchInst(const SwitchInst &SI)
+  : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+  init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+  NumOperands = SI.getNumOperands();
+  Use *OL = OperandList, *InOL = SI.OperandList;
+  for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
+    OL[i] = InOL[i];
+    OL[i+1] = InOL[i+1];
+  }
+  TheSubsets = SI.TheSubsets;
+  SubclassOptionalData = SI.SubclassOptionalData;
+}
+
+SwitchInst::~SwitchInst() {
+  dropHungoffUses();
+}
+
+
+/// addCase - Add an entry to the switch instruction...
+///
+void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+  IntegersSubsetToBB Mapping;
+  
+  // FIXME: Currently we work with ConstantInt based cases.
+  // So inititalize IntItem container directly from ConstantInt.
+  Mapping.add(IntItem::fromConstantInt(OnVal));
+  IntegersSubset CaseRanges = Mapping.getCase();
+  addCase(CaseRanges, Dest);
+}
+
+void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) {
+  unsigned NewCaseIdx = getNumCases(); 
+  unsigned OpNo = NumOperands;
+  if (OpNo+2 > ReservedSpace)
+    growOperands();  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+2;
+
+  SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal);
+  
+  CaseIt Case(this, NewCaseIdx, TheSubsetsIt);
+  Case.updateCaseValueOperand(OnVal);
+  Case.setSuccessor(Dest);
+}
+
+/// removeCase - This method removes the specified case and its successor
+/// from the switch instruction.
+void SwitchInst::removeCase(CaseIt& i) {
+  unsigned idx = i.getCaseIndex();
+  
+  assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
+
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Overwrite this case with the end of the list.
+  if (2 + (idx + 1) * 2 != NumOps) {
+    OL[2 + idx * 2] = OL[NumOps - 2];
+    OL[2 + idx * 2 + 1] = OL[NumOps - 1];
+  }
+
+  // Nuke the last value.
+  OL[NumOps-2].set(0);
+  OL[NumOps-2+1].set(0);
+
+  // Do the same with TheCases collection:
+  if (i.SubsetIt != --TheSubsets.end()) {
+    *i.SubsetIt = TheSubsets.back();
+    TheSubsets.pop_back();
+  } else {
+    TheSubsets.pop_back();
+    i.SubsetIt = TheSubsets.end();
+  }
+  
+  NumOperands = NumOps-2;
+}
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 3 times.
+///
+void SwitchInst::growOperands() {
+  unsigned e = getNumOperands();
+  unsigned NumOps = e*3;
+
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i) {
+      NewOps[i] = OldOps[i];
+  }
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+
+BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned SwitchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        IndirectBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void IndirectBrInst::init(Value *Address, unsigned NumDests) {
+  assert(Address && Address->getType()->isPointerTy() &&
+         "Address of indirectbr must be a pointer");
+  ReservedSpace = 1+NumDests;
+  NumOperands = 1;
+  OperandList = allocHungoffUses(ReservedSpace);
+  
+  OperandList[0] = Address;
+}
+
+
+/// growOperands - grow operands - This grows the operand list in response
+/// to a push_back style of operation.  This grows the number of ops by 2 times.
+///
+void IndirectBrInst::growOperands() {
+  unsigned e = getNumOperands();
+  unsigned NumOps = e*2;
+  
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i)
+    NewOps[i] = OldOps[i];
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               Instruction *InsertBefore)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertBefore) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               BasicBlock *InsertAtEnd)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertAtEnd) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
+  : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+                   allocHungoffUses(IBI.getNumOperands()),
+                   IBI.getNumOperands()) {
+  Use *OL = OperandList, *InOL = IBI.OperandList;
+  for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
+    OL[i] = InOL[i];
+  SubclassOptionalData = IBI.SubclassOptionalData;
+}
+
+IndirectBrInst::~IndirectBrInst() {
+  dropHungoffUses();
+}
+
+/// addDestination - Add a destination.
+///
+void IndirectBrInst::addDestination(BasicBlock *DestBB) {
+  unsigned OpNo = NumOperands;
+  if (OpNo+1 > ReservedSpace)
+    growOperands();  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+1;
+  OperandList[OpNo] = DestBB;
+}
+
+/// removeDestination - This method removes the specified successor from the
+/// indirectbr instruction.
+void IndirectBrInst::removeDestination(unsigned idx) {
+  assert(idx < getNumOperands()-1 && "Successor index out of range!");
+  
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Replace this value with the last one.
+  OL[idx+1] = OL[NumOps-1];
+  
+  // Nuke the last value.
+  OL[NumOps-1].set(0);
+  NumOperands = NumOps-1;
+}
+
+BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned IndirectBrInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                           clone_impl() implementations
+//===----------------------------------------------------------------------===//
+
+// Define these methods here so vtables don't get emitted into every translation
+// unit that uses these classes.
+
+GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+  return new (getNumOperands()) GetElementPtrInst(*this);
+}
+
+BinaryOperator *BinaryOperator::clone_impl() const {
+  return Create(getOpcode(), Op<0>(), Op<1>());
+}
+
+FCmpInst* FCmpInst::clone_impl() const {
+  return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ICmpInst* ICmpInst::clone_impl() const {
+  return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ExtractValueInst *ExtractValueInst::clone_impl() const {
+  return new ExtractValueInst(*this);
+}
+
+InsertValueInst *InsertValueInst::clone_impl() const {
+  return new InsertValueInst(*this);
+}
+
+AllocaInst *AllocaInst::clone_impl() const {
+  return new AllocaInst(getAllocatedType(),
+                        (Value*)getOperand(0),
+                        getAlignment());
+}
+
+LoadInst *LoadInst::clone_impl() const {
+  return new LoadInst(getOperand(0), Twine(), isVolatile(),
+                      getAlignment(), getOrdering(), getSynchScope());
+}
+
+StoreInst *StoreInst::clone_impl() const {
+  return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
+                       getAlignment(), getOrdering(), getSynchScope());
+  
+}
+
+AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
+  AtomicCmpXchgInst *Result =
+    new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
+                          getOrdering(), getSynchScope());
+  Result->setVolatile(isVolatile());
+  return Result;
+}
+
+AtomicRMWInst *AtomicRMWInst::clone_impl() const {
+  AtomicRMWInst *Result =
+    new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
+                      getOrdering(), getSynchScope());
+  Result->setVolatile(isVolatile());
+  return Result;
+}
+
+FenceInst *FenceInst::clone_impl() const {
+  return new FenceInst(getContext(), getOrdering(), getSynchScope());
+}
+
+TruncInst *TruncInst::clone_impl() const {
+  return new TruncInst(getOperand(0), getType());
+}
+
+ZExtInst *ZExtInst::clone_impl() const {
+  return new ZExtInst(getOperand(0), getType());
+}
+
+SExtInst *SExtInst::clone_impl() const {
+  return new SExtInst(getOperand(0), getType());
+}
+
+FPTruncInst *FPTruncInst::clone_impl() const {
+  return new FPTruncInst(getOperand(0), getType());
+}
+
+FPExtInst *FPExtInst::clone_impl() const {
+  return new FPExtInst(getOperand(0), getType());
+}
+
+UIToFPInst *UIToFPInst::clone_impl() const {
+  return new UIToFPInst(getOperand(0), getType());
+}
+
+SIToFPInst *SIToFPInst::clone_impl() const {
+  return new SIToFPInst(getOperand(0), getType());
+}
+
+FPToUIInst *FPToUIInst::clone_impl() const {
+  return new FPToUIInst(getOperand(0), getType());
+}
+
+FPToSIInst *FPToSIInst::clone_impl() const {
+  return new FPToSIInst(getOperand(0), getType());
+}
+
+PtrToIntInst *PtrToIntInst::clone_impl() const {
+  return new PtrToIntInst(getOperand(0), getType());
+}
+
+IntToPtrInst *IntToPtrInst::clone_impl() const {
+  return new IntToPtrInst(getOperand(0), getType());
+}
+
+BitCastInst *BitCastInst::clone_impl() const {
+  return new BitCastInst(getOperand(0), getType());
+}
+
+CallInst *CallInst::clone_impl() const {
+  return  new(getNumOperands()) CallInst(*this);
+}
+
+SelectInst *SelectInst::clone_impl() const {
+  return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+VAArgInst *VAArgInst::clone_impl() const {
+  return new VAArgInst(getOperand(0), getType());
+}
+
+ExtractElementInst *ExtractElementInst::clone_impl() const {
+  return ExtractElementInst::Create(getOperand(0), getOperand(1));
+}
+
+InsertElementInst *InsertElementInst::clone_impl() const {
+  return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+  return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2));
+}
+
+PHINode *PHINode::clone_impl() const {
+  return new PHINode(*this);
+}
+
+LandingPadInst *LandingPadInst::clone_impl() const {
+  return new LandingPadInst(*this);
+}
+
+ReturnInst *ReturnInst::clone_impl() const {
+  return new(getNumOperands()) ReturnInst(*this);
+}
+
+BranchInst *BranchInst::clone_impl() const {
+  return new(getNumOperands()) BranchInst(*this);
+}
+
+SwitchInst *SwitchInst::clone_impl() const {
+  return new SwitchInst(*this);
+}
+
+IndirectBrInst *IndirectBrInst::clone_impl() const {
+  return new IndirectBrInst(*this);
+}
+
+
+InvokeInst *InvokeInst::clone_impl() const {
+  return new(getNumOperands()) InvokeInst(*this);
+}
+
+ResumeInst *ResumeInst::clone_impl() const {
+  return new(1) ResumeInst(*this);
+}
+
+UnreachableInst *UnreachableInst::clone_impl() const {
+  LLVMContext &Context = getContext();
+  return new UnreachableInst(Context);
+}
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
new file mode 100644
index 000000000000..51f88d2e6fbd
--- /dev/null
+++ b/lib/IR/IntrinsicInst.cpp
@@ -0,0 +1,73 @@
+//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods that make it really easy to deal with intrinsic
+// functions.
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class.  Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+// 
+// In some cases, arguments to intrinsics need to be generic and are defined as
+// type pointer to empty struct { }*.  To access the real item of interest the
+// cast instruction needs to be stripped away. 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+///
+
+static Value *CastOperand(Value *C) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (CE->isCast())
+      return CE->getOperand(0);
+  return NULL;
+}
+
+Value *DbgInfoIntrinsic::StripCast(Value *C) {
+  if (Value *CO = CastOperand(C)) {
+    C = StripCast(CO);
+  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+    if (GV->hasInitializer())
+      if (Value *CO = CastOperand(GV->getInitializer()))
+        C = StripCast(CO);
+  }
+  return dyn_cast<GlobalVariable>(C);
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+///
+
+Value *DbgDeclareInst::getAddress() const {
+  if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
+    return MD->getOperand(0);
+  else
+    return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgValueInst - This represents the llvm.dbg.value instruction.
+///
+
+const Value *DbgValueInst::getValue() const {
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
+
+Value *DbgValueInst::getValue() {
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
diff --git a/lib/IR/LLVMBuild.txt b/lib/IR/LLVMBuild.txt
new file mode 100644
index 000000000000..cd90ef5b16b6
--- /dev/null
+++ b/lib/IR/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/IR/LLVMBuild.txt -----------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Core
+parent = Libraries
+required_libraries = Support
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
new file mode 100644
index 000000000000..883bb9878fa5
--- /dev/null
+++ b/lib/IR/LLVMContext.cpp
@@ -0,0 +1,168 @@
+//===-- LLVMContext.cpp - Implement LLVMContext ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements LLVMContext, as a wrapper around the opaque
+//  class LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
+#include <cctype>
+using namespace llvm;
+
+static ManagedStatic<LLVMContext> GlobalContext;
+
+LLVMContext& llvm::getGlobalContext() {
+  return *GlobalContext;
+}
+
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
+  // Create the fixed metadata kinds. This is done in the same order as the
+  // MD_* enum values so that they correspond.
+
+  // Create the 'dbg' metadata kind.
+  unsigned DbgID = getMDKindID("dbg");
+  assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+  // Create the 'tbaa' metadata kind.
+  unsigned TBAAID = getMDKindID("tbaa");
+  assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+
+  // Create the 'prof' metadata kind.
+  unsigned ProfID = getMDKindID("prof");
+  assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
+
+  // Create the 'fpmath' metadata kind.
+  unsigned FPAccuracyID = getMDKindID("fpmath");
+  assert(FPAccuracyID == MD_fpmath && "fpmath kind id drifted");
+  (void)FPAccuracyID;
+
+  // Create the 'range' metadata kind.
+  unsigned RangeID = getMDKindID("range");
+  assert(RangeID == MD_range && "range kind id drifted");
+  (void)RangeID;
+
+  // Create the 'tbaa.struct' metadata kind.
+  unsigned TBAAStructID = getMDKindID("tbaa.struct");
+  assert(TBAAStructID == MD_tbaa_struct && "tbaa.struct kind id drifted");
+  (void)TBAAStructID;
+
+  // Create the 'invariant.load' metadata kind.
+  unsigned InvariantLdId = getMDKindID("invariant.load");
+  assert(InvariantLdId == MD_invariant_load && "invariant.load kind id drifted");
+  (void)InvariantLdId;
+}
+LLVMContext::~LLVMContext() { delete pImpl; }
+
+void LLVMContext::addModule(Module *M) {
+  pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+  pImpl->OwnedModules.erase(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                              void *DiagContext) {
+  pImpl->InlineAsmDiagHandler = DiagHandler;
+  pImpl->InlineAsmDiagContext = DiagContext;
+}
+
+/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+/// setInlineAsmDiagnosticHandler.
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
+  return pImpl->InlineAsmDiagHandler;
+}
+
+/// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+/// setInlineAsmDiagnosticHandler.
+void *LLVMContext::getInlineAsmDiagnosticContext() const {
+  return pImpl->InlineAsmDiagContext;
+}
+
+void LLVMContext::emitError(const Twine &ErrorStr) {
+  emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
+  unsigned LocCookie = 0;
+  if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+    if (SrcLoc->getNumOperands() != 0)
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+        LocCookie = CI->getZExtValue();
+  }
+  return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
+  // If there is no error handler installed, just print the error and exit.
+  if (pImpl->InlineAsmDiagHandler == 0) {
+    errs() << "error: " << ErrorStr << "\n";
+    exit(1);
+  }
+
+  // If we do have an error handler, we can report the error and keep going.
+  SMDiagnostic Diag("", SourceMgr::DK_Error, ErrorStr.str());
+
+  pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+/// isValidName - Return true if Name is a valid custom metadata handler name.
+static bool isValidName(StringRef MDName) {
+  if (MDName.empty())
+    return false;
+
+  if (!std::isalpha(static_cast<unsigned char>(MDName[0])))
+    return false;
+
+  for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
+       ++I) {
+    if (!std::isalnum(static_cast<unsigned char>(*I)) && *I != '_' &&
+        *I != '-' && *I != '.')
+      return false;
+  }
+  return true;
+}
+#endif
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+unsigned LLVMContext::getMDKindID(StringRef Name) const {
+  assert(isValidName(Name) && "Invalid MDNode name");
+
+  // If this is new, assign it its ID.
+  return
+    pImpl->CustomMDKindNames.GetOrCreateValue(
+      Name, pImpl->CustomMDKindNames.size()).second;
+}
+
+/// getHandlerNames - Populate client supplied smallvector using custome
+/// metadata name and ID.
+void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
+  Names.resize(pImpl->CustomMDKindNames.size());
+  for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
+       E = pImpl->CustomMDKindNames.end(); I != E; ++I)
+    Names[I->second] = I->first();
+}
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
new file mode 100644
index 000000000000..6a6a4d6801f0
--- /dev/null
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -0,0 +1,156 @@
+//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the opaque LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Module.h"
+#include <algorithm>
+using namespace llvm;
+
+LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
+  : TheTrueVal(0), TheFalseVal(0),
+    VoidTy(C, Type::VoidTyID),
+    LabelTy(C, Type::LabelTyID),
+    HalfTy(C, Type::HalfTyID),
+    FloatTy(C, Type::FloatTyID),
+    DoubleTy(C, Type::DoubleTyID),
+    MetadataTy(C, Type::MetadataTyID),
+    X86_FP80Ty(C, Type::X86_FP80TyID),
+    FP128Ty(C, Type::FP128TyID),
+    PPC_FP128Ty(C, Type::PPC_FP128TyID),
+    X86_MMXTy(C, Type::X86_MMXTyID),
+    Int1Ty(C, 1),
+    Int8Ty(C, 8),
+    Int16Ty(C, 16),
+    Int32Ty(C, 32),
+    Int64Ty(C, 64) {
+  InlineAsmDiagHandler = 0;
+  InlineAsmDiagContext = 0;
+  NamedStructTypesUniqueID = 0;
+}
+
+namespace {
+struct DropReferences {
+  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+  // is a Constant*.
+  template<typename PairT>
+  void operator()(const PairT &P) {
+    P.second->dropAllReferences();
+  }
+};
+
+// Temporary - drops pair.first instead of second.
+struct DropFirst {
+  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+  // is a Constant*.
+  template<typename PairT>
+  void operator()(const PairT &P) {
+    P.first->dropAllReferences();
+  }
+};
+}
+
+LLVMContextImpl::~LLVMContextImpl() {
+  // NOTE: We need to delete the contents of OwnedModules, but we have to
+  // duplicate it into a temporary vector, because the destructor of Module
+  // will try to remove itself from OwnedModules set.  This would cause
+  // iterator invalidation if we iterated on the set directly.
+  std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+  DeleteContainerPointers(Modules);
+  
+  // Free the constants.  This is important to do here to ensure that they are
+  // freed before the LeakDetector is torn down.
+  std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
+                DropReferences());
+  std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
+                DropFirst());
+  std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
+                DropFirst());
+  std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
+                DropFirst());
+  ExprConstants.freeConstants();
+  ArrayConstants.freeConstants();
+  StructConstants.freeConstants();
+  VectorConstants.freeConstants();
+  DeleteContainerSeconds(CAZConstants);
+  DeleteContainerSeconds(CPNConstants);
+  DeleteContainerSeconds(UVConstants);
+  InlineAsms.freeConstants();
+  DeleteContainerSeconds(IntConstants);
+  DeleteContainerSeconds(FPConstants);
+  
+  for (StringMap<ConstantDataSequential*>::iterator I = CDSConstants.begin(),
+       E = CDSConstants.end(); I != E; ++I)
+    delete I->second;
+  CDSConstants.clear();
+
+  // Destroy attributes.
+  for (FoldingSetIterator<AttributeImpl> I = AttrsSet.begin(),
+         E = AttrsSet.end(); I != E; ) {
+    FoldingSetIterator<AttributeImpl> Elem = I++;
+    delete &*Elem;
+  }
+
+  // Destroy attribute lists.
+  for (FoldingSetIterator<AttributeSetImpl> I = AttrsLists.begin(),
+         E = AttrsLists.end(); I != E; ) {
+    FoldingSetIterator<AttributeSetImpl> Elem = I++;
+    delete &*Elem;
+  }
+
+  // Destroy attribute node lists.
+  for (FoldingSetIterator<AttributeSetNode> I = AttrsSetNodes.begin(),
+         E = AttrsSetNodes.end(); I != E; ) {
+    FoldingSetIterator<AttributeSetNode> Elem = I++;
+    delete &*Elem;
+  }
+
+  // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
+  // and the NonUniquedMDNodes sets, so copy the values out first.
+  SmallVector<MDNode*, 8> MDNodes;
+  MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
+  for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+       I != E; ++I)
+    MDNodes.push_back(&*I);
+  MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+  for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
+         E = MDNodes.end(); I != E; ++I)
+    (*I)->destroy();
+  assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+         "Destroying all MDNodes didn't empty the Context's sets.");
+
+  // Destroy MDStrings.
+  DeleteContainerSeconds(MDStringCache);
+}
+
+// ConstantsContext anchors
+void UnaryConstantExpr::anchor() { }
+
+void BinaryConstantExpr::anchor() { }
+
+void SelectConstantExpr::anchor() { }
+
+void ExtractElementConstantExpr::anchor() { }
+
+void InsertElementConstantExpr::anchor() { }
+
+void ShuffleVectorConstantExpr::anchor() { }
+
+void ExtractValueConstantExpr::anchor() { }
+
+void InsertValueConstantExpr::anchor() { }
+
+void GetElementPtrConstantExpr::anchor() { }
+
+void CompareConstantExpr::anchor() { }
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
new file mode 100644
index 000000000000..0c659b81b706
--- /dev/null
+++ b/lib/IR/LLVMContextImpl.h
@@ -0,0 +1,367 @@
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares LLVMContextImpl, the opaque implementation 
+//  of LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_IMPL_H
+#define LLVM_LLVMCONTEXT_IMPL_H
+
+#include "AttributeImpl.h"
+#include "ConstantsContext.h"
+#include "LeaksContext.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/ValueHandle.h"
+#include <vector>
+
+namespace llvm {
+
+class ConstantInt;
+class ConstantFP;
+class LLVMContext;
+class Type;
+class Value;
+
+struct DenseMapAPIntKeyInfo {
+  struct KeyTy {
+    APInt val;
+    Type* type;
+    KeyTy(const APInt& V, Type* Ty) : val(V), type(Ty) {}
+    bool operator==(const KeyTy& that) const {
+      return type == that.type && this->val == that.val;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+    friend hash_code hash_value(const KeyTy &Key) {
+      return hash_combine(Key.type, Key.val);
+    }
+  };
+  static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+  static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return static_cast<unsigned>(hash_value(Key));
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+};
+
+struct DenseMapAPFloatKeyInfo {
+  struct KeyTy {
+    APFloat val;
+    KeyTy(const APFloat& V) : val(V){}
+    bool operator==(const KeyTy& that) const {
+      return this->val.bitwiseIsEqual(that.val);
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+    friend hash_code hash_value(const KeyTy &Key) {
+      return hash_combine(Key.val);
+    }
+  };
+  static inline KeyTy getEmptyKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,1));
+  }
+  static inline KeyTy getTombstoneKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,2)); 
+  }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return static_cast<unsigned>(hash_value(Key));
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+};
+
+struct AnonStructTypeKeyInfo {
+  struct KeyTy {
+    ArrayRef<Type*> ETypes;
+    bool isPacked;
+    KeyTy(const ArrayRef<Type*>& E, bool P) :
+      ETypes(E), isPacked(P) {}
+    KeyTy(const StructType* ST) :
+      ETypes(ArrayRef<Type*>(ST->element_begin(), ST->element_end())),
+      isPacked(ST->isPacked()) {}
+    bool operator==(const KeyTy& that) const {
+      if (isPacked != that.isPacked)
+        return false;
+      if (ETypes != that.ETypes)
+        return false;
+      return true;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline StructType* getEmptyKey() {
+    return DenseMapInfo<StructType*>::getEmptyKey();
+  }
+  static inline StructType* getTombstoneKey() {
+    return DenseMapInfo<StructType*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(const KeyTy& Key) {
+    return hash_combine(hash_combine_range(Key.ETypes.begin(),
+                                           Key.ETypes.end()),
+                        Key.isPacked);
+  }
+  static unsigned getHashValue(const StructType *ST) {
+    return getHashValue(KeyTy(ST));
+  }
+  static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+      return false;
+    return LHS == KeyTy(RHS);
+  }
+  static bool isEqual(const StructType *LHS, const StructType *RHS) {
+    return LHS == RHS;
+  }
+};
+
+struct FunctionTypeKeyInfo {
+  struct KeyTy {
+    const Type *ReturnType;
+    ArrayRef<Type*> Params;
+    bool isVarArg;
+    KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
+      ReturnType(R), Params(P), isVarArg(V) {}
+    KeyTy(const FunctionType* FT) :
+      ReturnType(FT->getReturnType()),
+      Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
+      isVarArg(FT->isVarArg()) {}
+    bool operator==(const KeyTy& that) const {
+      if (ReturnType != that.ReturnType)
+        return false;
+      if (isVarArg != that.isVarArg)
+        return false;
+      if (Params != that.Params)
+        return false;
+      return true;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline FunctionType* getEmptyKey() {
+    return DenseMapInfo<FunctionType*>::getEmptyKey();
+  }
+  static inline FunctionType* getTombstoneKey() {
+    return DenseMapInfo<FunctionType*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(const KeyTy& Key) {
+    return hash_combine(Key.ReturnType,
+                        hash_combine_range(Key.Params.begin(),
+                                           Key.Params.end()),
+                        Key.isVarArg);
+  }
+  static unsigned getHashValue(const FunctionType *FT) {
+    return getHashValue(KeyTy(FT));
+  }
+  static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+      return false;
+    return LHS == KeyTy(RHS);
+  }
+  static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a
+// shortcut to avoid comparing all operands.
+template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> {
+  static bool Equals(const MDNode &X, const FoldingSetNodeID &ID,
+                     unsigned IDHash, FoldingSetNodeID &TempID) {
+    assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?");
+    // First, check if the cached hashes match.  If they don't we can skip the
+    // expensive operand walk.
+    if (X.Hash != IDHash)
+      return false;
+
+    // If they match we have to compare the operands.
+    X.Profile(TempID);
+    return TempID == ID;
+  }
+  static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) {
+    return X.Hash; // Return cached hash.
+  }
+};
+
+/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
+/// up to date as MDNodes mutate.  This class is implemented in DebugLoc.cpp.
+class DebugRecVH : public CallbackVH {
+  /// Ctx - This is the LLVM Context being referenced.
+  LLVMContextImpl *Ctx;
+  
+  /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
+  /// this reference lives in.  If this is zero, then it represents a
+  /// non-canonical entry that has no DenseMap value.  This can happen due to
+  /// RAUW.
+  int Idx;
+public:
+  DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
+    : CallbackVH(n), Ctx(ctx), Idx(idx) {}
+  
+  MDNode *get() const {
+    return cast_or_null<MDNode>(getValPtr());
+  }
+  
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *VNew);
+};
+  
+class LLVMContextImpl {
+public:
+  /// OwnedModules - The set of modules instantiated in this context, and which
+  /// will be automatically deleted if this context is deleted.
+  SmallPtrSet<Module*, 4> OwnedModules;
+  
+  LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+  void *InlineAsmDiagContext;
+  
+  typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
+                         DenseMapAPIntKeyInfo> IntMapTy;
+  IntMapTy IntConstants;
+  
+  typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, 
+                         DenseMapAPFloatKeyInfo> FPMapTy;
+  FPMapTy FPConstants;
+
+  FoldingSet<AttributeImpl> AttrsSet;
+  FoldingSet<AttributeSetImpl> AttrsLists;
+  FoldingSet<AttributeSetNode> AttrsSetNodes;
+
+  StringMap<Value*> MDStringCache;
+
+  FoldingSet<MDNode> MDNodeSet;
+
+  // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
+  // aren't in the MDNodeSet, but they're still shared between objects, so no
+  // one object can destroy them.  This set allows us to at least destroy them
+  // on Context destruction.
+  SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
+  
+  DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
+
+  typedef ConstantAggrUniqueMap<ArrayType, ConstantArray> ArrayConstantsTy;
+  ArrayConstantsTy ArrayConstants;
+  
+  typedef ConstantAggrUniqueMap<StructType, ConstantStruct> StructConstantsTy;
+  StructConstantsTy StructConstants;
+  
+  typedef ConstantAggrUniqueMap<VectorType, ConstantVector> VectorConstantsTy;
+  VectorConstantsTy VectorConstants;
+  
+  DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
+
+  DenseMap<Type*, UndefValue*> UVConstants;
+  
+  StringMap<ConstantDataSequential*> CDSConstants;
+
+  
+  DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
+  ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+    ExprConstants;
+
+  ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+                    InlineAsm> InlineAsms;
+  
+  ConstantInt *TheTrueVal;
+  ConstantInt *TheFalseVal;
+  
+  LeakDetectorImpl<Value> LLVMObjects;
+  
+  // Basic type instances.
+  Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
+  Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+
+  
+  /// TypeAllocator - All dynamically allocated types are allocated from this.
+  /// They live forever until the context is torn down.
+  BumpPtrAllocator TypeAllocator;
+  
+  DenseMap<unsigned, IntegerType*> IntegerTypes;
+  
+  typedef DenseMap<FunctionType*, bool, FunctionTypeKeyInfo> FunctionTypeMap;
+  FunctionTypeMap FunctionTypes;
+  typedef DenseMap<StructType*, bool, AnonStructTypeKeyInfo> StructTypeMap;
+  StructTypeMap AnonStructTypes;
+  StringMap<StructType*> NamedStructTypes;
+  unsigned NamedStructTypesUniqueID;
+    
+  DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
+  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+  DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
+  DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
+
+
+  /// ValueHandles - This map keeps track of all of the value handles that are
+  /// watching a Value*.  The Value::HasValueHandle bit is used to know
+  /// whether or not a value has an entry in this map.
+  typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+  ValueHandlesTy ValueHandles;
+  
+  /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
+  StringMap<unsigned> CustomMDKindNames;
+  
+  typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy;
+  typedef SmallVector<MDPairTy, 2> MDMapTy;
+
+  /// MetadataStore - Collection of per-instruction metadata used in this
+  /// context.
+  DenseMap<const Instruction *, MDMapTy> MetadataStore;
+  
+  /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
+  /// entry with no "inlined at" element.
+  DenseMap<MDNode*, int> ScopeRecordIdx;
+  
+  /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
+  /// index.  The ValueHandle ensures that ScopeRecordIdx stays up to date if
+  /// the MDNode is RAUW'd.
+  std::vector<DebugRecVH> ScopeRecords;
+  
+  /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
+  /// scope/inlined-at pair.
+  DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
+  
+  /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
+  /// for an index.  The ValueHandle ensures that ScopeINlinedAtIdx stays up
+  /// to date.
+  std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+  
+  /// IntrinsicIDCache - Cache of intrinsic name (string) to numeric ID mappings
+  /// requested in this context
+  typedef DenseMap<const Function*, unsigned> IntrinsicIDCacheTy;
+  IntrinsicIDCacheTy IntrinsicIDCache;
+
+  int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
+  int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+  
+  LLVMContextImpl(LLVMContext &C);
+  ~LLVMContextImpl();
+};
+
+}
+
+#endif
diff --git a/lib/IR/LeakDetector.cpp b/lib/IR/LeakDetector.cpp
new file mode 100644
index 000000000000..835e5e61cdf9
--- /dev/null
+++ b/lib/IR/LeakDetector.cpp
@@ -0,0 +1,69 @@
+//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LeakDetector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/LeakDetector.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
+using namespace llvm;
+
+static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
+static ManagedStatic<LeakDetectorImpl<void> > Objects;
+
+static void clearGarbage(LLVMContext &Context) {
+  Objects->clear();
+  Context.pImpl->LLVMObjects.clear();
+}
+
+void LeakDetector::addGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  Objects->addGarbage(Object);
+}
+
+void LeakDetector::addGarbageObjectImpl(const Value *Object) {
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  pImpl->LLVMObjects.addGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  Objects->removeGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  pImpl->LLVMObjects.removeGarbage(Object);
+}
+
+void LeakDetector::checkForGarbageImpl(LLVMContext &Context, 
+                                       const std::string &Message) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  
+  Objects->setName("GENERIC");
+  pImpl->LLVMObjects.setName("LLVM");
+  
+  // use non-short-circuit version so that both checks are performed
+  if (Objects->hasGarbage(Message) |
+      pImpl->LLVMObjects.hasGarbage(Message))
+    errs() << "\nThis is probably because you removed an object, but didn't "
+           << "delete it.  Please check your code for memory leaks.\n";
+
+  // Clear out results so we don't get duplicate warnings on
+  // next call...
+  clearGarbage(Context);
+}
diff --git a/lib/IR/LeaksContext.h b/lib/IR/LeaksContext.h
new file mode 100644
index 000000000000..5038dc9d6d6d
--- /dev/null
+++ b/lib/IR/LeaksContext.h
@@ -0,0 +1,92 @@
+//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for leaks detectors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+
+template <class T>
+struct PrinterTrait {
+  static void print(const T* P) { errs() << P; }
+};
+
+template<>
+struct PrinterTrait<Value> {
+  static void print(const Value* P) { errs() << *P; }
+};
+
+template <typename T>
+struct LeakDetectorImpl {
+  explicit LeakDetectorImpl(const char* const name = "") : 
+    Cache(0), Name(name) { }
+
+  void clear() {
+    Cache = 0;
+    Ts.clear();
+  }
+    
+  void setName(const char* n) { 
+    Name = n;
+  }
+    
+  // Because the most common usage pattern, by far, is to add a
+  // garbage object, then remove it immediately, we optimize this
+  // case.  When an object is added, it is not added to the set
+  // immediately, it is added to the CachedValue Value.  If it is
+  // immediately removed, no set search need be performed.
+  void addGarbage(const T* o) {
+    assert(Ts.count(o) == 0 && "Object already in set!");
+    if (Cache) {
+      assert(Cache != o && "Object already in set!");
+      Ts.insert(Cache);
+    }
+    Cache = o;
+  }
+
+  void removeGarbage(const T* o) {
+    if (o == Cache)
+      Cache = 0; // Cache hit
+    else
+      Ts.erase(o);
+  }
+
+  bool hasGarbage(const std::string& Message) {
+    addGarbage(0); // Flush the Cache
+
+    assert(Cache == 0 && "No value should be cached anymore!");
+
+    if (!Ts.empty()) {
+      errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
+      for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+           E = Ts.end(); I != E; ++I) {
+        errs() << '\t';
+        PrinterTrait<T>::print(*I);
+        errs() << '\n';
+      }
+      errs() << '\n';
+
+      return true;
+    }
+    
+    return false;
+  }
+
+private:
+  SmallPtrSet<const T*, 8> Ts;
+  const T* Cache;
+  const char* Name;
+};
+
+}
diff --git a/lib/IR/Makefile b/lib/IR/Makefile
new file mode 100644
index 000000000000..cc403f38dd8e
--- /dev/null
+++ b/lib/IR/Makefile
@@ -0,0 +1,33 @@
+##===- lib/IR/Makefile -------------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+LIBRARYNAME = LLVMCore
+BUILD_ARCHIVE = 1
+
+BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen
+
+include $(LEVEL)/Makefile.common
+
+GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen
+
+INTRINSICTD  := $(PROJ_SRC_ROOT)/include/llvm/IR/Intrinsics.td
+INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/IR/Intrinsics*.td)
+
+$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(LLVM_TBLGEN)
+	$(Echo) Building Intrinsics.gen.tmp from Intrinsics.td
+	$(Verb) $(LLVMTableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic
+
+$(GENFILE): $(ObjDir)/Intrinsics.gen.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir
+	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
+	  $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \
+	    changed significantly. )
+
+install-local:: $(GENFILE)
+	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/IR/Intrinsics.gen
+	$(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/IR/Intrinsics.gen
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
new file mode 100644
index 000000000000..0228aeb31f5d
--- /dev/null
+++ b/lib/IR/Metadata.cpp
@@ -0,0 +1,745 @@
+//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Metadata.h"
+#include "LLVMContextImpl.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MDString implementation.
+//
+
+void MDString::anchor() { }
+
+MDString::MDString(LLVMContext &C)
+  : Value(Type::getMetadataTy(C), Value::MDStringVal) {}
+
+MDString *MDString::get(LLVMContext &Context, StringRef Str) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  StringMapEntry<Value*> &Entry =
+    pImpl->MDStringCache.GetOrCreateValue(Str);
+  Value *&S = Entry.getValue();
+  if (!S) S = new MDString(Context);
+  S->setValueName(&Entry);
+  return cast<MDString>(S);
+}
+
+//===----------------------------------------------------------------------===//
+// MDNodeOperand implementation.
+//
+
+// Use CallbackVH to hold MDNode operands.
+namespace llvm {
+class MDNodeOperand : public CallbackVH {
+  MDNode *getParent() {
+    MDNodeOperand *Cur = this;
+
+    while (Cur->getValPtrInt() != 1)
+      --Cur;
+
+    assert(Cur->getValPtrInt() == 1 &&
+           "Couldn't find the beginning of the operand list!");
+    return reinterpret_cast<MDNode*>(Cur) - 1;
+  }
+
+public:
+  MDNodeOperand(Value *V) : CallbackVH(V) {}
+  ~MDNodeOperand() {}
+
+  void set(Value *V) {
+    unsigned IsFirst = this->getValPtrInt();
+    this->setValPtr(V);
+    this->setAsFirstOperand(IsFirst);
+  }
+
+  /// setAsFirstOperand - Accessor method to mark the operand as the first in
+  /// the list.
+  void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); }
+
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *NV);
+};
+} // end namespace llvm.
+
+
+void MDNodeOperand::deleted() {
+  getParent()->replaceOperand(this, 0);
+}
+
+void MDNodeOperand::allUsesReplacedWith(Value *NV) {
+  getParent()->replaceOperand(this, NV);
+}
+
+//===----------------------------------------------------------------------===//
+// MDNode implementation.
+//
+
+/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
+/// the end of the MDNode.
+static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
+  // Use <= instead of < to permit a one-past-the-end address.
+  assert(Op <= N->getNumOperands() && "Invalid operand number");
+  return reinterpret_cast<MDNodeOperand*>(N + 1) + Op;
+}
+
+void MDNode::replaceOperandWith(unsigned i, Value *Val) {
+  MDNodeOperand *Op = getOperandPtr(this, i);
+  replaceOperand(Op, Val);
+}
+
+MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
+: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
+  NumOperands = Vals.size();
+
+  if (isFunctionLocal)
+    setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
+
+  // Initialize the operand list, which is co-allocated on the end of the node.
+  unsigned i = 0;
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op, ++i) {
+    new (Op) MDNodeOperand(Vals[i]);
+
+    // Mark the first MDNodeOperand as being the first in the list of operands.
+    if (i == 0)
+      Op->setAsFirstOperand(1);
+  }
+}
+
+/// ~MDNode - Destroy MDNode.
+MDNode::~MDNode() {
+  assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
+         "Not being destroyed through destroy()?");
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  if (isNotUniqued()) {
+    pImpl->NonUniquedMDNodes.erase(this);
+  } else {
+    pImpl->MDNodeSet.RemoveNode(this);
+  }
+
+  // Destroy the operands.
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op)
+    Op->~MDNodeOperand();
+}
+
+static const Function *getFunctionForValue(Value *V) {
+  if (!V) return NULL;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    BasicBlock *BB = I->getParent();
+    return BB ? BB->getParent() : 0;
+  }
+  if (Argument *A = dyn_cast<Argument>(V))
+    return A->getParent();
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return BB->getParent();
+  if (MDNode *MD = dyn_cast<MDNode>(V))
+    return MD->getFunction();
+  return NULL;
+}
+
+#ifndef NDEBUG
+static const Function *assertLocalFunction(const MDNode *N) {
+  if (!N->isFunctionLocal()) return 0;
+
+  // FIXME: This does not handle cyclic function local metadata.
+  const Function *F = 0, *NewF = 0;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i)) {
+      if (MDNode *MD = dyn_cast<MDNode>(V))
+        NewF = assertLocalFunction(MD);
+      else
+        NewF = getFunctionForValue(V);
+    }
+    if (F == 0)
+      F = NewF;
+    else 
+      assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
+  }
+  return F;
+}
+#endif
+
+// getFunction - If this metadata is function-local and recursively has a
+// function-local operand, return the first such operand's parent function.
+// Otherwise, return null. getFunction() should not be used for performance-
+// critical code because it recursively visits all the MDNode's operands.  
+const Function *MDNode::getFunction() const {
+#ifndef NDEBUG
+  return assertLocalFunction(this);
+#else
+  if (!isFunctionLocal()) return NULL;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (const Function *F = getFunctionForValue(getOperand(i)))
+      return F;
+  return NULL;
+#endif
+}
+
+// destroy - Delete this node.  Only when there are no uses.
+void MDNode::destroy() {
+  setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
+  // Placement delete, then free the memory.
+  this->~MDNode();
+  free(this);
+}
+
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+  return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+         (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
+MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
+                          FunctionLocalness FL, bool Insert) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+
+  // Add all the operand pointers. Note that we don't have to add the
+  // isFunctionLocal bit because that's implied by the operands.
+  // Note that if the operands are later nulled out, the node will be
+  // removed from the uniquing map.
+  FoldingSetNodeID ID;
+  for (unsigned i = 0; i != Vals.size(); ++i)
+    ID.AddPointer(Vals[i]);
+
+  void *InsertPoint;
+  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (N || !Insert)
+    return N;
+
+  bool isFunctionLocal = false;
+  switch (FL) {
+  case FL_Unknown:
+    for (unsigned i = 0; i != Vals.size(); ++i) {
+      Value *V = Vals[i];
+      if (!V) continue;
+      if (isFunctionLocalValue(V)) {
+        isFunctionLocal = true;
+        break;
+      }
+    }
+    break;
+  case FL_No:
+    isFunctionLocal = false;
+    break;
+  case FL_Yes:
+    isFunctionLocal = true;
+    break;
+  }
+
+  // Coallocate space for the node and Operands together, then placement new.
+  void *Ptr = malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
+  N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
+
+  // Cache the operand hash.
+  N->Hash = ID.ComputeHash();
+
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+
+  return N;
+}
+
+MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals, FL_Unknown);
+}
+
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context,
+                                      ArrayRef<Value*> Vals,
+                                      bool isFunctionLocal) {
+  return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No);
+}
+
+MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals, FL_Unknown, false);
+}
+
+MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  MDNode *N =
+    (MDNode *)malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
+  N = new (N) MDNode(Context, Vals, FL_No);
+  N->setValueSubclassData(N->getSubclassDataFromValue() |
+                          NotUniquedBit);
+  LeakDetector::addGarbageObject(N);
+  return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+  assert(N->use_empty() && "Temporary MDNode has uses!");
+  assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+         "Deleting a non-temporary uniqued node!");
+  assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+         "Deleting a non-temporary non-uniqued node!");
+  assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+         "Temporary MDNode does not have NotUniquedBit set!");
+  assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+         "Temporary MDNode has DestroyFlag set!");
+  LeakDetector::removeGarbageObject(N);
+  N->destroy();
+}
+
+/// getOperand - Return specified operand.
+Value *MDNode::getOperand(unsigned i) const {
+  assert(i < getNumOperands() && "Invalid operand number");
+  return *getOperandPtr(const_cast<MDNode*>(this), i);
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+  // Add all the operand pointers. Note that we don't have to add the
+  // isFunctionLocal bit because that's implied by the operands.
+  // Note that if the operands are later nulled out, the node will be
+  // removed from the uniquing map.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    ID.AddPointer(getOperand(i));
+}
+
+void MDNode::setIsNotUniqued() {
+  setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  pImpl->NonUniquedMDNodes.insert(this);
+}
+
+// Replace value from this node's operand list.
+void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
+  Value *From = *Op;
+
+  // If is possible that someone did GV->RAUW(inst), replacing a global variable
+  // with an instruction or some other function-local object.  If this is a
+  // non-function-local MDNode, it can't point to a function-local object.
+  // Handle this case by implicitly dropping the MDNode reference to null.
+  // Likewise if the MDNode is function-local but for a different function.
+  if (To && isFunctionLocalValue(To)) {
+    if (!isFunctionLocal())
+      To = 0;
+    else {
+      const Function *F = getFunction();
+      const Function *FV = getFunctionForValue(To);
+      // Metadata can be function-local without having an associated function.
+      // So only consider functions to have changed if non-null.
+      if (F && FV && F != FV)
+        To = 0;
+    }
+  }
+  
+  if (From == To)
+    return;
+
+  // Update the operand.
+  Op->set(To);
+
+  // If this node is already not being uniqued (because one of the operands
+  // already went to null), then there is nothing else to do here.
+  if (isNotUniqued()) return;
+
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+  // Remove "this" from the context map.  FoldingSet doesn't have to reprofile
+  // this node to remove it, so we don't care what state the operands are in.
+  pImpl->MDNodeSet.RemoveNode(this);
+
+  // If we are dropping an argument to null, we choose to not unique the MDNode
+  // anymore.  This commonly occurs during destruction, and uniquing these
+  // brings little reuse.  Also, this means we don't need to include
+  // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
+  if (To == 0) {
+    setIsNotUniqued();
+    return;
+  }
+
+  // Now that the node is out of the folding set, get ready to reinsert it.
+  // First, check to see if another node with the same operands already exists
+  // in the set.  If so, then this node is redundant.
+  FoldingSetNodeID ID;
+  Profile(ID);
+  void *InsertPoint;
+  if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+    replaceAllUsesWith(N);
+    destroy();
+    return;
+  }
+
+  // Cache the operand hash.
+  Hash = ID.ComputeHash();
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+  // If this MDValue was previously function-local but no longer is, clear
+  // its function-local flag.
+  if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+    bool isStillFunctionLocal = false;
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      Value *V = getOperand(i);
+      if (!V) continue;
+      if (isFunctionLocalValue(V)) {
+        isStillFunctionLocal = true;
+        break;
+      }
+    }
+    if (!isStillFunctionLocal)
+      setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+  }
+}
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return NULL;
+
+  if (A == B)
+    return A;
+
+  SmallVector<MDNode *, 4> PathA;
+  MDNode *T = A;
+  while (T) {
+    PathA.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  SmallVector<MDNode *, 4> PathB;
+  T = B;
+  while (T) {
+    PathB.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  int IA = PathA.size() - 1;
+  int IB = PathB.size() - 1;
+
+  MDNode *Ret = 0;
+  while (IA >= 0 && IB >=0) {
+    if (PathA[IA] == PathB[IB])
+      Ret = PathA[IA];
+    else
+      break;
+    --IA;
+    --IB;
+  }
+  return Ret;
+}
+
+MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return NULL;
+
+  APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF();
+  APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF();
+  if (AVal.compare(BVal) == APFloat::cmpLessThan)
+    return A;
+  return B;
+}
+
+static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
+  return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
+}
+
+static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) {
+  return !A.intersectWith(B).isEmptySet() || isContiguous(A, B);
+}
+
+static bool tryMergeRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
+                          ConstantInt *High) {
+  ConstantRange NewRange(Low->getValue(), High->getValue());
+  unsigned Size = EndPoints.size();
+  APInt LB = cast<ConstantInt>(EndPoints[Size - 2])->getValue();
+  APInt LE = cast<ConstantInt>(EndPoints[Size - 1])->getValue();
+  ConstantRange LastRange(LB, LE);
+  if (canBeMerged(NewRange, LastRange)) {
+    ConstantRange Union = LastRange.unionWith(NewRange);
+    Type *Ty = High->getType();
+    EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower());
+    EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper());
+    return true;
+  }
+  return false;
+}
+
+static void addRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
+                     ConstantInt *High) {
+  if (!EndPoints.empty())
+    if (tryMergeRange(EndPoints, Low, High))
+      return;
+
+  EndPoints.push_back(Low);
+  EndPoints.push_back(High);
+}
+
+MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
+  // Given two ranges, we want to compute the union of the ranges. This
+  // is slightly complitade by having to combine the intervals and merge
+  // the ones that overlap.
+
+  if (!A || !B)
+    return NULL;
+
+  if (A == B)
+    return A;
+
+  // First, walk both lists in older of the lower boundary of each interval.
+  // At each step, try to merge the new interval to the last one we adedd.
+  SmallVector<Value*, 4> EndPoints;
+  int AI = 0;
+  int BI = 0;
+  int AN = A->getNumOperands() / 2;
+  int BN = B->getNumOperands() / 2;
+  while (AI < AN && BI < BN) {
+    ConstantInt *ALow = cast<ConstantInt>(A->getOperand(2 * AI));
+    ConstantInt *BLow = cast<ConstantInt>(B->getOperand(2 * BI));
+
+    if (ALow->getValue().slt(BLow->getValue())) {
+      addRange(EndPoints, ALow, cast<ConstantInt>(A->getOperand(2 * AI + 1)));
+      ++AI;
+    } else {
+      addRange(EndPoints, BLow, cast<ConstantInt>(B->getOperand(2 * BI + 1)));
+      ++BI;
+    }
+  }
+  while (AI < AN) {
+    addRange(EndPoints, cast<ConstantInt>(A->getOperand(2 * AI)),
+             cast<ConstantInt>(A->getOperand(2 * AI + 1)));
+    ++AI;
+  }
+  while (BI < BN) {
+    addRange(EndPoints, cast<ConstantInt>(B->getOperand(2 * BI)),
+             cast<ConstantInt>(B->getOperand(2 * BI + 1)));
+    ++BI;
+  }
+
+  // If we have more than 2 ranges (4 endpoints) we have to try to merge
+  // the last and first ones.
+  unsigned Size = EndPoints.size();
+  if (Size > 4) {
+    ConstantInt *FB = cast<ConstantInt>(EndPoints[0]);
+    ConstantInt *FE = cast<ConstantInt>(EndPoints[1]);
+    if (tryMergeRange(EndPoints, FB, FE)) {
+      for (unsigned i = 0; i < Size - 2; ++i) {
+        EndPoints[i] = EndPoints[i + 2];
+      }
+      EndPoints.resize(Size - 2);
+    }
+  }
+
+  // If in the end we have a single range, it is possible that it is now the
+  // full range. Just drop the metadata in that case.
+  if (EndPoints.size() == 2) {
+    ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(),
+                        cast<ConstantInt>(EndPoints[1])->getValue());
+    if (Range.isFullSet())
+      return NULL;
+  }
+
+  return MDNode::get(A->getContext(), EndPoints);
+}
+
+//===----------------------------------------------------------------------===//
+// NamedMDNode implementation.
+//
+
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+  return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
+}
+
+NamedMDNode::NamedMDNode(const Twine &N)
+  : Name(N.str()), Parent(0),
+    Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
+}
+
+NamedMDNode::~NamedMDNode() {
+  dropAllReferences();
+  delete &getNMDOps(Operands);
+}
+
+/// getNumOperands - Return number of NamedMDNode operands.
+unsigned NamedMDNode::getNumOperands() const {
+  return (unsigned)getNMDOps(Operands).size();
+}
+
+/// getOperand - Return specified operand.
+MDNode *NamedMDNode::getOperand(unsigned i) const {
+  assert(i < getNumOperands() && "Invalid Operand number!");
+  return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
+}
+
+/// addOperand - Add metadata Operand.
+void NamedMDNode::addOperand(MDNode *M) {
+  assert(!M->isFunctionLocal() &&
+         "NamedMDNode operands must not be function-local!");
+  getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
+}
+
+/// eraseFromParent - Drop all references and remove the node from parent
+/// module.
+void NamedMDNode::eraseFromParent() {
+  getParent()->eraseNamedMetadata(this);
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void NamedMDNode::dropAllReferences() {
+  getNMDOps(Operands).clear();
+}
+
+/// getName - Return a constant reference to this named metadata's name.
+StringRef NamedMDNode::getName() const {
+  return StringRef(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Metadata method implementations.
+//
+
+void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
+  if (Node == 0 && !hasMetadata()) return;
+  setMetadata(getContext().getMDKindID(Kind), Node);
+}
+
+MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
+  return getMetadataImpl(getContext().getMDKindID(Kind));
+}
+
+/// setMetadata - Set the metadata of of the specified kind to the specified
+/// node.  This updates/replaces metadata if already present, or removes it if
+/// Node is null.
+void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
+  if (Node == 0 && !hasMetadata()) return;
+
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (KindID == LLVMContext::MD_dbg) {
+    DbgLoc = DebugLoc::getFromDILocation(Node);
+    return;
+  }
+  
+  // Handle the case when we're adding/updating metadata on an instruction.
+  if (Node) {
+    LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+    assert(!Info.empty() == hasMetadataHashEntry() &&
+           "HasMetadata bit is wonked");
+    if (Info.empty()) {
+      setHasMetadataHashEntry(true);
+    } else {
+      // Handle replacement of an existing value.
+      for (unsigned i = 0, e = Info.size(); i != e; ++i)
+        if (Info[i].first == KindID) {
+          Info[i].second = Node;
+          return;
+        }
+    }
+
+    // No replacement, just add it to the list.
+    Info.push_back(std::make_pair(KindID, Node));
+    return;
+  }
+
+  // Otherwise, we're removing metadata from an instruction.
+  assert((hasMetadataHashEntry() ==
+          getContext().pImpl->MetadataStore.count(this)) &&
+         "HasMetadata bit out of date!");
+  if (!hasMetadataHashEntry())
+    return;  // Nothing to remove!
+  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+
+  // Common case is removing the only entry.
+  if (Info.size() == 1 && Info[0].first == KindID) {
+    getContext().pImpl->MetadataStore.erase(this);
+    setHasMetadataHashEntry(false);
+    return;
+  }
+
+  // Handle removal of an existing value.
+  for (unsigned i = 0, e = Info.size(); i != e; ++i)
+    if (Info[i].first == KindID) {
+      Info[i] = Info.back();
+      Info.pop_back();
+      assert(!Info.empty() && "Removing last entry should be handled above");
+      return;
+    }
+  // Otherwise, removing an entry that doesn't exist on the instruction.
+}
+
+MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (KindID == LLVMContext::MD_dbg)
+    return DbgLoc.getAsMDNode(getContext());
+  
+  if (!hasMetadataHashEntry()) return 0;
+  
+  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+  assert(!Info.empty() && "bit out of sync with hash table");
+
+  for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
+       I != E; ++I)
+    if (I->first == KindID)
+      return I->second;
+  return 0;
+}
+
+void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
+                                       MDNode*> > &Result) const {
+  Result.clear();
+  
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (!DbgLoc.isUnknown()) {
+    Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
+                                    DbgLoc.getAsMDNode(getContext())));
+    if (!hasMetadataHashEntry()) return;
+  }
+  
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "Shouldn't have called this");
+  const LLVMContextImpl::MDMapTy &Info =
+    getContext().pImpl->MetadataStore.find(this)->second;
+  assert(!Info.empty() && "Shouldn't have called this");
+
+  Result.append(Info.begin(), Info.end());
+
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
+
+void Instruction::
+getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+                                    MDNode*> > &Result) const {
+  Result.clear();
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "Shouldn't have called this");
+  const LLVMContextImpl::MDMapTy &Info =
+    getContext().pImpl->MetadataStore.find(this)->second;
+  assert(!Info.empty() && "Shouldn't have called this");
+  Result.append(Info.begin(), Info.end());
+
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
+
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+  assert(hasMetadataHashEntry() && "Caller should check");
+  getContext().pImpl->MetadataStore.erase(this);
+  setHasMetadataHashEntry(false);
+}
+
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
new file mode 100644
index 000000000000..8affcc946960
--- /dev/null
+++ b/lib/IR/Module.cpp
@@ -0,0 +1,451 @@
+//===-- Module.cpp - Implement the Module class ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Module class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Module.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/LeakDetector.h"
+#include <algorithm>
+#include <cstdarg>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Methods to implement the globals and functions lists.
+//
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file.
+template class llvm::SymbolTableListTraits<Function, Module>;
+template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
+template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+
+//===----------------------------------------------------------------------===//
+// Primitive Module methods.
+//
+
+Module::Module(StringRef MID, LLVMContext& C)
+  : Context(C), Materializer(NULL), ModuleID(MID) {
+  ValSymTab = new ValueSymbolTable();
+  NamedMDSymTab = new StringMap<NamedMDNode *>();
+  Context.addModule(this);
+}
+
+Module::~Module() {
+  Context.removeModule(this);
+  dropAllReferences();
+  GlobalList.clear();
+  FunctionList.clear();
+  AliasList.clear();
+  NamedMDList.clear();
+  delete ValSymTab;
+  delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+}
+
+/// Target endian information.
+Module::Endianness Module::getEndianness() const {
+  StringRef temp = DataLayout;
+  Module::Endianness ret = AnyEndianness;
+
+  while (!temp.empty()) {
+    std::pair<StringRef, StringRef> P = getToken(temp, "-");
+
+    StringRef token = P.first;
+    temp = P.second;
+
+    if (token[0] == 'e') {
+      ret = LittleEndian;
+    } else if (token[0] == 'E') {
+      ret = BigEndian;
+    }
+  }
+
+  return ret;
+}
+
+/// Target Pointer Size information.
+Module::PointerSize Module::getPointerSize() const {
+  StringRef temp = DataLayout;
+  Module::PointerSize ret = AnyPointerSize;
+
+  while (!temp.empty()) {
+    std::pair<StringRef, StringRef> TmpP = getToken(temp, "-");
+    temp = TmpP.second;
+    TmpP = getToken(TmpP.first, ":");
+    StringRef token = TmpP.second, signalToken = TmpP.first;
+
+    if (signalToken[0] == 'p') {
+      int size = 0;
+      getToken(token, ":").first.getAsInteger(10, size);
+      if (size == 32)
+        ret = Pointer32;
+      else if (size == 64)
+        ret = Pointer64;
+    }
+  }
+
+  return ret;
+}
+
+/// getNamedValue - Return the first global value in the module with
+/// the specified name, of arbitrary type.  This method returns null
+/// if a global with the specified name is not found.
+GlobalValue *Module::getNamedValue(StringRef Name) const {
+  return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
+}
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+/// This ID is uniqued across modules in the current LLVMContext.
+unsigned Module::getMDKindID(StringRef Name) const {
+  return Context.getMDKindID(Name);
+}
+
+/// getMDKindNames - Populate client supplied SmallVector with the name for
+/// custom metadata IDs registered in this LLVMContext.   ID #0 is not used,
+/// so it is filled in as an empty string.
+void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
+  return Context.getMDKindNames(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the functions in the module.
+//
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table.  If it does not exist, add a prototype for the function and return
+// it.  This is nice because it allows most passes to get away with not handling
+// the symbol table directly for this common task.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      FunctionType *Ty,
+                                      AttributeSet AttributeList) {
+  // See if we have a definition for the specified function already.
+  GlobalValue *F = getNamedValue(Name);
+  if (F == 0) {
+    // Nope, add it
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    if (!New->isIntrinsic())       // Intrinsics get attrs set on construction
+      New->setAttributes(AttributeList);
+    FunctionList.push_back(New);
+    return New;                    // Return the new prototype.
+  }
+
+  // Okay, the function exists.  Does it have externally visible linkage?
+  if (F->hasLocalLinkage()) {
+    // Clear the function's name.
+    F->setName("");
+    // Retry, now there won't be a conflict.
+    Constant *NewF = getOrInsertFunction(Name, Ty);
+    F->setName(Name);
+    return NewF;
+  }
+
+  // If the function exists but has the wrong type, return a bitcast to the
+  // right type.
+  if (F->getType() != PointerType::getUnqual(Ty))
+    return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+
+  // Otherwise, we just found the existing function or a prototype.
+  return F;
+}
+
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
+                                             FunctionType *Ty,
+                                             AttributeSet AttributeList) {
+  // See if we have a definition for the specified function already.
+  GlobalValue *F = getNamedValue(Name);
+  if (F == 0) {
+    // Nope, add it
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    New->setAttributes(AttributeList);
+    FunctionList.push_back(New);
+    return New; // Return the new prototype.
+  }
+
+  // Otherwise, we just found the existing function or a prototype.
+  return F;
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      FunctionType *Ty) {
+  return getOrInsertFunction(Name, Ty, AttributeSet());
+}
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table.  If it does not exist, add a prototype for the function and return it.
+// This version of the method takes a null terminated list of function
+// arguments, which makes it easier for clients to use.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      AttributeSet AttributeList,
+                                      Type *RetTy, ...) {
+  va_list Args;
+  va_start(Args, RetTy);
+
+  // Build the list of argument types...
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
+    ArgTys.push_back(ArgTy);
+
+  va_end(Args);
+
+  // Build the function type and chain to the other getOrInsertFunction...
+  return getOrInsertFunction(Name,
+                             FunctionType::get(RetTy, ArgTys, false),
+                             AttributeList);
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      Type *RetTy, ...) {
+  va_list Args;
+  va_start(Args, RetTy);
+
+  // Build the list of argument types...
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
+    ArgTys.push_back(ArgTy);
+
+  va_end(Args);
+
+  // Build the function type and chain to the other getOrInsertFunction...
+  return getOrInsertFunction(Name,
+                             FunctionType::get(RetTy, ArgTys, false),
+                             AttributeSet());
+}
+
+// getFunction - Look up the specified function in the module symbol table.
+// If it does not exist, return null.
+//
+Function *Module::getFunction(StringRef Name) const {
+  return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+/// getGlobalVariable - Look up the specified global variable in the module
+/// symbol table.  If it does not exist, return null.  The type argument
+/// should be the underlying type of the global, i.e., it should not have
+/// the top-level PointerType, which represents the address of the global.
+/// If AllowLocal is set to true, this function will return types that
+/// have an local. By default, these types are not returned.
+///
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
+                                          bool AllowLocal) const {
+  if (GlobalVariable *Result =
+      dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
+    if (AllowLocal || !Result->hasLocalLinkage())
+      return Result;
+  return 0;
+}
+
+/// getOrInsertGlobal - Look up the specified global in the module symbol table.
+///   1. If it does not exist, add a declaration of the global and return it.
+///   2. Else, the global exists but has the wrong type: return the function
+///      with a constantexpr cast to the right type.
+///   3. Finally, if the existing global is the correct delclaration, return the
+///      existing global.
+Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
+  // See if we have a definition for the specified global already.
+  GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
+  if (GV == 0) {
+    // Nope, add it
+    GlobalVariable *New =
+      new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+                         0, Name);
+     return New;                    // Return the new declaration.
+  }
+
+  // If the variable exists but has the wrong type, return a bitcast to the
+  // right type.
+  if (GV->getType() != PointerType::getUnqual(Ty))
+    return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+
+  // Otherwise, we just found the existing function or a prototype.
+  return GV;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+// getNamedAlias - Look up the specified global in the module symbol table.
+// If it does not exist, return null.
+//
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
+  return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
+}
+
+/// getNamedMetadata - Return the first NamedMDNode in the module with the
+/// specified name. This method returns null if a NamedMDNode with the
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
+  SmallString<256> NameData;
+  StringRef NameRef = Name.toStringRef(NameData);
+  return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
+}
+
+/// getOrInsertNamedMetadata - Return the first named MDNode in the module
+/// with the specified name. This method returns a new NamedMDNode if a
+/// NamedMDNode with the specified name is not found.
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
+  NamedMDNode *&NMD =
+    (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+  if (!NMD) {
+    NMD = new NamedMDNode(Name);
+    NMD->setParent(this);
+    NamedMDList.push_back(NMD);
+  }
+  return NMD;
+}
+
+/// eraseNamedMetadata - Remove the given NamedMDNode from this module and
+/// delete it.
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+  static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+  NamedMDList.erase(NMD);
+}
+
+/// getModuleFlagsMetadata - Returns the module flags in the provided vector.
+void Module::
+getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
+  const NamedMDNode *ModFlags = getModuleFlagsMetadata();
+  if (!ModFlags) return;
+
+  for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) {
+    MDNode *Flag = ModFlags->getOperand(i);
+    ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
+    MDString *Key = cast<MDString>(Flag->getOperand(1));
+    Value *Val = Flag->getOperand(2);
+    Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
+                                    Key, Val));
+  }
+}
+
+/// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. This method returns null if there are no
+/// module-level flags.
+NamedMDNode *Module::getModuleFlagsMetadata() const {
+  return getNamedMetadata("llvm.module.flags");
+}
+
+/// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. If module-level flags aren't found, it
+/// creates the named metadata that contains them.
+NamedMDNode *Module::getOrInsertModuleFlagsMetadata() {
+  return getOrInsertNamedMetadata("llvm.module.flags");
+}
+
+/// addModuleFlag - Add a module-level flag to the module-level flags
+/// metadata. It will create the module-level flags named metadata if it doesn't
+/// already exist.
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+                           Value *Val) {
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  Value *Ops[3] = {
+    ConstantInt::get(Int32Ty, Behavior), MDString::get(Context, Key), Val
+  };
+  getOrInsertModuleFlagsMetadata()->addOperand(MDNode::get(Context, Ops));
+}
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+                           uint32_t Val) {
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  addModuleFlag(Behavior, Key, ConstantInt::get(Int32Ty, Val));
+}
+void Module::addModuleFlag(MDNode *Node) {
+  assert(Node->getNumOperands() == 3 &&
+         "Invalid number of operands for module flag!");
+  assert(isa<ConstantInt>(Node->getOperand(0)) &&
+         isa<MDString>(Node->getOperand(1)) &&
+         "Invalid operand types for module flag!");
+  getOrInsertModuleFlagsMetadata()->addOperand(Node);
+}
+
+//===----------------------------------------------------------------------===//
+// Methods to control the materialization of GlobalValues in the Module.
+//
+void Module::setMaterializer(GVMaterializer *GVM) {
+  assert(!Materializer &&
+         "Module already has a GVMaterializer.  Call MaterializeAllPermanently"
+         " to clear it out before setting another one.");
+  Materializer.reset(GVM);
+}
+
+bool Module::isMaterializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isMaterializable(GV);
+  return false;
+}
+
+bool Module::isDematerializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isDematerializable(GV);
+  return false;
+}
+
+bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  if (Materializer)
+    return Materializer->Materialize(GV, ErrInfo);
+  return false;
+}
+
+void Module::Dematerialize(GlobalValue *GV) {
+  if (Materializer)
+    return Materializer->Dematerialize(GV);
+}
+
+bool Module::MaterializeAll(std::string *ErrInfo) {
+  if (!Materializer)
+    return false;
+  return Materializer->MaterializeModule(this, ErrInfo);
+}
+
+bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
+  if (MaterializeAll(ErrInfo))
+    return true;
+  Materializer.reset();
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other module related stuff.
+//
+
+
+// dropAllReferences() - This function causes all the subelements to "let go"
+// of all references that they are maintaining.  This allows one to 'delete' a
+// whole module at a time, even though there may be circular references... first
+// all references are dropped, and all use counts go to zero.  Then everything
+// is deleted for real.  Note that no operations are valid on an object that
+// has "dropped all references", except operator delete.
+//
+void Module::dropAllReferences() {
+  for(Module::iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+
+  for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
+    I->dropAllReferences();
+
+  for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
+    I->dropAllReferences();
+}
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
new file mode 100644
index 000000000000..7fc48282380b
--- /dev/null
+++ b/lib/IR/Pass.cpp
@@ -0,0 +1,276 @@
+//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass infrastructure.  It is primarily
+// responsible with ensuring that passes are executed and batched together
+// optimally.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Pass Implementation
+//
+
+// Force out-of-line virtual method.
+Pass::~Pass() {
+  delete Resolver;
+}
+
+// Force out-of-line virtual method.
+ModulePass::~ModulePass() { }
+
+Pass *ModulePass::createPrinterPass(raw_ostream &O,
+                                    const std::string &Banner) const {
+  return createPrintModulePass(&O, false, Banner);
+}
+
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+  return PMT_ModulePassManager;
+}
+
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+  return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
+}
+
+// dumpPassStructure - Implement the -debug-pass=Structure option
+void Pass::dumpPassStructure(unsigned Offset) {
+  dbgs().indent(Offset*2) << getPassName() << "\n";
+}
+
+/// getPassName - Return a nice clean name for a pass.  This usually
+/// implemented in terms of the name that is registered by one of the
+/// Registration templates, but can be overloaded directly.
+///
+const char *Pass::getPassName() const {
+  AnalysisID AID =  getPassID();
+  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+  if (PI)
+    return PI->getPassName();
+  return "Unnamed pass: implement Pass::getPassName()";
+}
+
+void Pass::preparePassManager(PMStack &) {
+  // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+  // Default implementation.
+  return PMT_Unknown;
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+  // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+  // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+  // By default, don't do anything.
+}
+
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
+  return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+  return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+  return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+  assert(!Resolver && "Resolver is already set");
+  Resolver = AR;
+}
+
+// print - Print out the internal state of the pass.  This is called by Analyze
+// to print out the contents of an analysis.  Otherwise it is not necessary to
+// implement this method.
+//
+void Pass::print(raw_ostream &O,const Module*) const {
+  O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
+}
+
+// dump - call print(cerr);
+void Pass::dump() const {
+  print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// ImmutablePass Implementation
+//
+// Force out-of-line virtual method.
+ImmutablePass::~ImmutablePass() { }
+
+void ImmutablePass::initializePass() {
+  // By default, don't do anything.
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPass Implementation
+//
+
+Pass *FunctionPass::createPrinterPass(raw_ostream &O,
+                                      const std::string &Banner) const {
+  return createPrintFunctionPass(Banner, &O);
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+  return PMT_FunctionPassManager;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicBlockPass Implementation
+//
+
+Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
+                                        const std::string &Banner) const {
+  return createPrintBasicBlockPass(&O, false, Banner);
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+  return PMT_BasicBlockPassManager;
+}
+
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+  return PassRegistry::getPassRegistry()->getPassInfo(TI);
+}
+
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
+  return PassRegistry::getPassRegistry()->getPassInfo(Arg);
+}
+
+Pass *Pass::createPass(AnalysisID ID) {
+  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+  if (!PI)
+    return NULL;
+  return PI->createPass();
+}
+
+Pass *PassInfo::createPass() const {
+  assert((!isAnalysisGroup() || NormalCtor) &&
+         "No default implementation found for analysis group!");
+  assert(NormalCtor &&
+         "Cannot call createPass on PassInfo without default ctor!");
+  return NormalCtor();
+}
+
+//===----------------------------------------------------------------------===//
+//                  Analysis Group Implementation Code
+//===----------------------------------------------------------------------===//
+
+// RegisterAGBase implementation
+//
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+                               const void *PassID, bool isDefault)
+    : PassInfo(Name, InterfaceID) {
+  PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+                                                         *this, isDefault);
+}
+
+//===----------------------------------------------------------------------===//
+// PassRegistrationListener implementation
+//
+
+// PassRegistrationListener ctor - Add the current object to the list of
+// PassRegistrationListeners...
+PassRegistrationListener::PassRegistrationListener() {
+  PassRegistry::getPassRegistry()->addRegistrationListener(this);
+}
+
+// dtor - Remove object from list of listeners...
+PassRegistrationListener::~PassRegistrationListener() {
+  PassRegistry::getPassRegistry()->removeRegistrationListener(this);
+}
+
+// enumeratePasses - Iterate over the registered passes, calling the
+// passEnumerate callback on each PassInfo object.
+//
+void PassRegistrationListener::enumeratePasses() {
+  PassRegistry::getPassRegistry()->enumerateWith(this);
+}
+
+PassNameParser::~PassNameParser() {}
+
+//===----------------------------------------------------------------------===//
+//   AnalysisUsage Class Implementation
+//
+
+namespace {
+  struct GetCFGOnlyPasses : public PassRegistrationListener {
+    typedef AnalysisUsage::VectorType VectorType;
+    VectorType &CFGOnlyList;
+    GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
+
+    void passEnumerate(const PassInfo *P) {
+      if (P->isCFGOnlyPass())
+        CFGOnlyList.push_back(P->getTypeInfo());
+    }
+  };
+}
+
+// setPreservesCFG - This function should be called to by the pass, iff they do
+// not:
+//
+//  1. Add or remove basic blocks from the function
+//  2. Modify terminator instructions in any way.
+//
+// This function annotates the AnalysisUsage info object to say that analyses
+// that only depend on the CFG are preserved by this pass.
+//
+void AnalysisUsage::setPreservesCFG() {
+  // Since this transformation doesn't modify the CFG, it preserves all analyses
+  // that only depend on the CFG (like dominators, loop info, etc...)
+  GetCFGOnlyPasses(Preserved).enumeratePasses();
+}
+
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+  const PassInfo *PI = Pass::lookupPassInfo(Arg);
+  // If the pass exists, preserve it. Otherwise silently do nothing.
+  if (PI) Preserved.push_back(PI->getTypeInfo());
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
+  Required.push_back(ID);
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+  Required.push_back(&ID);
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+  Required.push_back(&ID);
+  RequiredTransitive.push_back(&ID);
+  return *this;
+}
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
new file mode 100644
index 000000000000..3c968aac164f
--- /dev/null
+++ b/lib/IR/PassManager.cpp
@@ -0,0 +1,1912 @@
+//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/PassManagers.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information.  Often it is useful to find out what pass is
+// running when a crash occurs in a utility.  When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+  None, Arguments, Structure, Executions, Details
+};
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+                  cl::desc("Print PassManager debugging information"),
+                  cl::values(
+  clEnumVal(None      , "disable debug output"),
+  clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+  clEnumVal(Structure , "print pass structure before run()"),
+  clEnumVal(Executions, "print pass name before it is executed"),
+  clEnumVal(Details   , "print pass details when it is executed"),
+                             clEnumValEnd));
+
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+            llvm::cl::desc("Print IR before specified passes"),
+            cl::Hidden);
+
+static PassOptionList
+PrintAfter("print-after",
+           llvm::cl::desc("Print IR after specified passes"),
+           cl::Hidden);
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+               llvm::cl::desc("Print IR before each pass"),
+               cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+              llvm::cl::desc("Print IR after each pass"),
+              cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
+                                         PassOptionList &PassesToPrint) {
+  for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+    const llvm::PassInfo *PassInf = PassesToPrint[i];
+    if (PassInf)
+      if (PassInf->getPassArgument() == PI->getPassArgument()) {
+        return true;
+      }
+  }
+  return false;
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(const PassInfo *PI) {
+  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(const PassInfo *PI) {
+  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
+}
+
+} // End of llvm namespace
+
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+  return PassDebugging >= Executions;
+}
+
+
+
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+  if (V == 0 && M == 0)
+    OS << "Releasing pass '";
+  else
+    OS << "Running pass '";
+
+  OS << P->getPassName() << "'";
+
+  if (M) {
+    OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+    return;
+  }
+  if (V == 0) {
+    OS << '\n';
+    return;
+  }
+
+  OS << " on ";
+  if (isa<Function>(V))
+    OS << "function";
+  else if (isa<BasicBlock>(V))
+    OS << "basic block";
+  else
+    OS << "value";
+
+  OS << " '";
+  WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+  OS << "'\n";
+}
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class BBPassManager : public PMDataManager, public FunctionPass {
+
+public:
+  static char ID;
+  explicit BBPassManager()
+    : PMDataManager(), FunctionPass(ID) {}
+
+  /// Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the function, and if so, return true.
+  bool runOnFunction(Function &F);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  bool doInitialization(Module &M);
+  bool doInitialization(Function &F);
+  bool doFinalization(Module &M);
+  bool doFinalization(Function &F);
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  virtual const char *getPassName() const {
+    return "BasicBlock Pass Manager";
+  }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      BasicBlockPass *BP = getContainedPass(Index);
+      BP->dumpPassStructure(Offset + 1);
+      dumpLastUses(BP, Offset+1);
+    }
+  }
+
+  BasicBlockPass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+    return BP;
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_BasicBlockPassManager;
+  }
+};
+
+char BBPassManager::ID = 0;
+}
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+                                public PMDataManager,
+                                public PMTopLevelManager {
+  virtual void anchor();
+private:
+  bool wasRun;
+public:
+  static char ID;
+  explicit FunctionPassManagerImpl() :
+    Pass(PT_PassManager, ID), PMDataManager(),
+    PMTopLevelManager(new FPPassManager()), wasRun(false) {}
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P) {
+    schedulePass(P);
+  }
+
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintFunctionPass(Banner, &O);
+  }
+
+  // Prepare for running an on the fly pass, freeing memory if needed
+  // from a previous run.
+  void releaseMemoryOnTheFly();
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Function &F);
+
+  /// doInitialization - Run all of the initializers for the function passes.
+  ///
+  bool doInitialization(Module &M);
+
+  /// doFinalization - Run all of the finalizers for the function passes.
+  ///
+  bool doFinalization(Module &M);
+
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+  virtual PassManagerType getTopLevelPassManagerType() {
+    return PMT_FunctionPassManager;
+  }
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  FPPassManager *getContainedManager(unsigned N) {
+    assert(N < PassManagers.size() && "Pass number out of range!");
+    FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+    return FP;
+  }
+};
+
+void FunctionPassManagerImpl::anchor() {}
+
+char FunctionPassManagerImpl::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+  static char ID;
+  explicit MPPassManager() :
+    Pass(PT_PassManager, ID), PMDataManager() { }
+
+  // Delete on the fly managers.
+  virtual ~MPPassManager() {
+    for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+           I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+         I != E; ++I) {
+      FunctionPassManagerImpl *FPP = I->second;
+      delete FPP;
+    }
+  }
+
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnModule(Module &M);
+
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
+  /// doInitialization - Run all of the initializers for the module passes.
+  ///
+  bool doInitialization();
+
+  /// doFinalization - Run all of the finalizers for the module passes.
+  ///
+  bool doFinalization();
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  /// Add RequiredPass into list of lower level passes required by pass P.
+  /// RequiredPass is run on the fly by Pass Manager when P requests it
+  /// through getAnalysis interface.
+  virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+  /// Return function pass corresponding to PassInfo PI, that is
+  /// required by module pass MP. Instantiate analysis pass, by using
+  /// its runOnFunction() for function F.
+  virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
+
+  virtual const char *getPassName() const {
+    return "Module Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      ModulePass *MP = getContainedPass(Index);
+      MP->dumpPassStructure(Offset + 1);
+      std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+        OnTheFlyManagers.find(MP);
+      if (I != OnTheFlyManagers.end())
+        I->second->dumpPassStructure(Offset + 2);
+      dumpLastUses(MP, Offset+1);
+    }
+  }
+
+  ModulePass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<ModulePass *>(PassVector[N]);
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_ModulePassManager;
+  }
+
+ private:
+  /// Collection of on the fly FPPassManagers. These managers manage
+  /// function passes that are required by module passes.
+  std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+                        public PMDataManager,
+                        public PMTopLevelManager {
+  virtual void anchor();
+
+public:
+  static char ID;
+  explicit PassManagerImpl() :
+    Pass(PT_PassManager, ID), PMDataManager(),
+                              PMTopLevelManager(new MPPassManager()) {}
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P) {
+    schedulePass(P);
+  }
+
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Module &M);
+
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
+  /// doInitialization - Run all of the initializers for the module passes.
+  ///
+  bool doInitialization();
+
+  /// doFinalization - Run all of the finalizers for the module passes.
+  ///
+  bool doFinalization();
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+  virtual PassManagerType getTopLevelPassManagerType() {
+    return PMT_ModulePassManager;
+  }
+
+  MPPassManager *getContainedManager(unsigned N) {
+    assert(N < PassManagers.size() && "Pass number out of range!");
+    MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+    return MP;
+  }
+};
+
+void PassManagerImpl::anchor() {}
+
+char PassManagerImpl::ID = 0;
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute.  This only happens when
+/// -time-passes is enabled on the command line.
+///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
+class TimingInfo {
+  DenseMap<Pass*, Timer*> TimingData;
+  TimerGroup TG;
+public:
+  // Use 'create' member to get this.
+  TimingInfo() : TG("... Pass execution timing report ...") {}
+
+  // TimingDtor - Print out information about timing information
+  ~TimingInfo() {
+    // Delete all of the timers, which accumulate their info into the
+    // TimerGroup.
+    for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+         E = TimingData.end(); I != E; ++I)
+      delete I->second;
+    // TimerGroup is deleted next, printing the report.
+  }
+
+  // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+  // to a non null value (if the -time-passes option is enabled) or it leaves it
+  // null.  It may be called multiple times.
+  static void createTheTimeInfo();
+
+  /// getPassTimer - Return the timer for the specified pass if it exists.
+  Timer *getPassTimer(Pass *P) {
+    if (P->getAsPMDataManager())
+      return 0;
+
+    sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+    Timer *&T = TimingData[P];
+    if (T == 0)
+      T = new Timer(P->getPassName(), TG);
+    return T;
+  }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+  PMDM->setTopLevelManager(this);
+  addPassManager(PMDM);
+  activeStack.push(PMDM);
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void
+PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
+  unsigned PDepth = 0;
+  if (P->getResolver())
+    PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+  for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
+         E = AnalysisPasses.end(); I != E; ++I) {
+    Pass *AP = *I;
+    LastUser[AP] = P;
+
+    if (P == AP)
+      continue;
+
+    // Update the last users of passes that are required transitive by AP.
+    AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+    const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+    SmallVector<Pass *, 12> LastUses;
+    SmallVector<Pass *, 12> LastPMUses;
+    for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      assert(AnalysisPass && "Expected analysis pass to exist.");
+      AnalysisResolver *AR = AnalysisPass->getResolver();
+      assert(AR && "Expected analysis resolver to exist.");
+      unsigned APDepth = AR->getPMDataManager().getDepth();
+
+      if (PDepth == APDepth)
+        LastUses.push_back(AnalysisPass);
+      else if (PDepth > APDepth)
+        LastPMUses.push_back(AnalysisPass);
+    }
+
+    setLastUser(LastUses, P);
+
+    // If this pass has a corresponding pass manager, push higher level
+    // analysis to this pass manager.
+    if (P->getResolver())
+      setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
+    // If AP is the last user of other passes then make P last user of
+    // such passes.
+    for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+           LUE = LastUser.end(); LUI != LUE; ++LUI) {
+      if (LUI->second == AP)
+        // DenseMap iterator is not invalidated here because
+        // this is just updating existing entries.
+        LastUser[LUI->first] = P;
+    }
+  }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
+                                        Pass *P) {
+  DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+    InversedLastUser.find(P);
+  if (DMI == InversedLastUser.end())
+    return;
+
+  SmallPtrSet<Pass *, 8> &LU = DMI->second;
+  for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+         E = LU.end(); I != E; ++I) {
+    LastUses.push_back(*I);
+  }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+  AnalysisUsage *AnUsage = NULL;
+  DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+  if (DMI != AnUsageMap.end())
+    AnUsage = DMI->second;
+  else {
+    AnUsage = new AnalysisUsage();
+    P->getAnalysisUsage(*AnUsage);
+    AnUsageMap[P] = AnUsage;
+  }
+  return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+  // TODO : Allocate function manager for this pass, other wise required set
+  // may be inserted into previous function manager
+
+  // Give pass a chance to prepare the stage.
+  P->preparePassManager(activeStack);
+
+  // If P is an analysis pass and it is available then do not
+  // generate the analysis again. Stale analysis info should not be
+  // available at this point.
+  const PassInfo *PI =
+    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+  if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+    delete P;
+    return;
+  }
+
+  AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+  bool checkAnalysis = true;
+  while (checkAnalysis) {
+    checkAnalysis = false;
+
+    const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+    for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+           E = RequiredSet.end(); I != E; ++I) {
+
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      if (!AnalysisPass) {
+        const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+
+        if (PI == NULL) {
+          // Pass P is not in the global PassRegistry
+          dbgs() << "Pass '"  << P->getPassName() << "' is not initialized." << "\n";
+          dbgs() << "Verify if there is a pass dependency cycle." << "\n";
+          dbgs() << "Required Passes:" << "\n";
+          for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(),
+                 E = RequiredSet.end(); I2 != E && I2 != I; ++I2) {
+            Pass *AnalysisPass2 = findAnalysisPass(*I2);
+            if (AnalysisPass2) {
+              dbgs() << "\t" << AnalysisPass2->getPassName() << "\n";
+            } else {
+              dbgs() << "\t"   << "Error: Required pass not found! Possible causes:"  << "\n";
+              dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)"    << "\n";
+              dbgs() << "\t\t" << "- Corruption of the global PassRegistry"           << "\n";
+            }
+          }
+        }
+
+        assert(PI && "Expected required passes to be initialized");
+        AnalysisPass = PI->createPass();
+        if (P->getPotentialPassManagerType () ==
+            AnalysisPass->getPotentialPassManagerType())
+          // Schedule analysis pass that is managed by the same pass manager.
+          schedulePass(AnalysisPass);
+        else if (P->getPotentialPassManagerType () >
+                 AnalysisPass->getPotentialPassManagerType()) {
+          // Schedule analysis pass that is managed by a new manager.
+          schedulePass(AnalysisPass);
+          // Recheck analysis passes to ensure that required analyses that
+          // are already checked are still available.
+          checkAnalysis = true;
+        } else
+          // Do not schedule this analysis. Lower level analsyis
+          // passes are run on the fly.
+          delete AnalysisPass;
+      }
+    }
+  }
+
+  // Now all required passes are available.
+  if (ImmutablePass *IP = P->getAsImmutablePass()) {
+    // P is a immutable pass and it will be managed by this
+    // top level manager. Set up analysis resolver to connect them.
+    PMDataManager *DM = getAsPMDataManager();
+    AnalysisResolver *AR = new AnalysisResolver(*DM);
+    P->setResolver(AR);
+    DM->initializeAnalysisImpl(P);
+    addImmutablePass(IP);
+    DM->recordAvailableAnalysis(IP);
+    return;
+  }
+
+  if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+    Pass *PP = P->createPrinterPass(
+      dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
+    PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+  }
+
+  // Add the requested pass to the best available pass manager.
+  P->assignPassManager(activeStack, getTopLevelPassManagerType());
+
+  if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+    Pass *PP = P->createPrinterPass(
+      dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
+    PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+  }
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+  // Check pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check other pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator
+         I = IndirectPassManagers.begin(),
+         E = IndirectPassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check the immutable passes. Iterate in reverse order so that we find
+  // the most recently registered passes first.
+  for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+       ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+    AnalysisID PI = (*I)->getPassID();
+    if (PI == AID)
+      return *I;
+
+    // If Pass not found then check the interfaces implemented by Immutable Pass
+    const PassInfo *PassInf =
+      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    assert(PassInf && "Expected all immutable passes to be initialized");
+    const std::vector<const PassInfo*> &ImmPI =
+      PassInf->getInterfacesImplemented();
+    for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+         EE = ImmPI.end(); II != EE; ++II) {
+      if ((*II)->getTypeInfo() == AID)
+        return *I;
+    }
+  }
+
+  return 0;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+  if (PassDebugging < Structure)
+    return;
+
+  // Print out the immutable passes
+  for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+    ImmutablePasses[i]->dumpPassStructure(0);
+  }
+
+  // Every class that derives from PMDataManager also derives from Pass
+  // (sometimes indirectly), but there's no inheritance relationship
+  // between PMDataManager and Pass, so we have to getAsPass to get
+  // from a PMDataManager* to a Pass*.
+  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->getAsPass()->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+  if (PassDebugging < Arguments)
+    return;
+
+  dbgs() << "Pass Arguments: ";
+  for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+       ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    if (const PassInfo *PI =
+        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
+      assert(PI && "Expected all immutable passes to be initialized");
+      if (!PI->isAnalysisGroup())
+        dbgs() << " -" << PI->getPassArgument();
+    }
+  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->dumpPassArguments();
+  dbgs() << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->initializeAnalysisInfo();
+
+  // Initailize other pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator
+       I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+       I != E; ++I)
+    (*I)->initializeAnalysisInfo();
+
+  for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+        DME = LastUser.end(); DMI != DME; ++DMI) {
+    DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+      InversedLastUser.find(DMI->second);
+    if (InvDMI != InversedLastUser.end()) {
+      SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+      L.insert(DMI->first);
+    } else {
+      SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+      InversedLastUser[DMI->second] = L;
+    }
+  }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    delete *I;
+
+  for (SmallVectorImpl<ImmutablePass *>::iterator
+         I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    delete *I;
+
+  for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+         DME = AnUsageMap.end(); DMI != DME; ++DMI)
+    delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+  AnalysisID PI = P->getPassID();
+
+  AvailableAnalysis[PI] = P;
+
+  assert(!AvailableAnalysis.empty());
+
+  // This pass is the current implementation of all of the interfaces it
+  // implements as well.
+  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+  if (PInf == 0) return;
+  const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+  for (unsigned i = 0, e = II.size(); i != e; ++i)
+    AvailableAnalysis[II[i]->getTypeInfo()] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  if (AnUsage->getPreservesAll())
+    return true;
+
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+  for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
+         E = HigherLevelAnalysis.end(); I  != E; ++I) {
+    Pass *P1 = *I;
+    if (P1->getAsImmutablePass() == 0 &&
+        std::find(PreservedSet.begin(), PreservedSet.end(),
+                  P1->getPassID()) ==
+           PreservedSet.end())
+      return false;
+  }
+
+  return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+  // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+  return;
+#endif
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+  // Verify preserved analysis
+  for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+         E = PreservedSet.end(); I != E; ++I) {
+    AnalysisID AID = *I;
+    if (Pass *AP = findAnalysisPass(AID, true)) {
+      TimeRegion PassTimer(getPassTimer(AP));
+      AP->verifyAnalysis();
+    }
+  }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  if (AnUsage->getPreservesAll())
+    return;
+
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+  for (DenseMap<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+         E = AvailableAnalysis.end(); I != E; ) {
+    DenseMap<AnalysisID, Pass*>::iterator Info = I++;
+    if (Info->second->getAsImmutablePass() == 0 &&
+        std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+        PreservedSet.end()) {
+      // Remove this analysis
+      if (PassDebugging >= Details) {
+        Pass *S = Info->second;
+        dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+        dbgs() << S->getPassName() << "'\n";
+      }
+      AvailableAnalysis.erase(Info);
+    }
+  }
+
+  // Check inherited analysis also. If P is not preserving analysis
+  // provided by parent manager then remove it here.
+  for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+    if (!InheritedAnalysis[Index])
+      continue;
+
+    for (DenseMap<AnalysisID, Pass*>::iterator
+           I = InheritedAnalysis[Index]->begin(),
+           E = InheritedAnalysis[Index]->end(); I != E; ) {
+      DenseMap<AnalysisID, Pass *>::iterator Info = I++;
+      if (Info->second->getAsImmutablePass() == 0 &&
+          std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+             PreservedSet.end()) {
+        // Remove this analysis
+        if (PassDebugging >= Details) {
+          Pass *S = Info->second;
+          dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+          dbgs() << S->getPassName() << "'\n";
+        }
+        InheritedAnalysis[Index]->erase(Info);
+      }
+    }
+  }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
+                                     enum PassDebuggingString DBG_STR) {
+
+  SmallVector<Pass *, 12> DeadPasses;
+
+  // If this is a on the fly manager then it does not have TPM.
+  if (!TPM)
+    return;
+
+  TPM->collectLastUses(DeadPasses, P);
+
+  if (PassDebugging >= Details && !DeadPasses.empty()) {
+    dbgs() << " -*- '" <<  P->getPassName();
+    dbgs() << "' is the last user of following pass instances.";
+    dbgs() << " Free these instances\n";
+  }
+
+  for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
+         E = DeadPasses.end(); I != E; ++I)
+    freePass(*I, Msg, DBG_STR);
+}
+
+void PMDataManager::freePass(Pass *P, StringRef Msg,
+                             enum PassDebuggingString DBG_STR) {
+  dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
+
+  {
+    // If the pass crashes releasing memory, remember this.
+    PassManagerPrettyStackEntry X(P);
+    TimeRegion PassTimer(getPassTimer(P));
+
+    P->releaseMemory();
+  }
+
+  AnalysisID PI = P->getPassID();
+  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+    // Remove the pass itself (if it is not already removed).
+    AvailableAnalysis.erase(PI);
+
+    // Remove all interfaces this pass implements, for which it is also
+    // listed as the available implementation.
+    const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+    for (unsigned i = 0, e = II.size(); i != e; ++i) {
+      DenseMap<AnalysisID, Pass*>::iterator Pos =
+        AvailableAnalysis.find(II[i]->getTypeInfo());
+      if (Pos != AvailableAnalysis.end() && Pos->second == P)
+        AvailableAnalysis.erase(Pos);
+    }
+  }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+  // This manager is going to manage pass P. Set up analysis resolver
+  // to connect them.
+  AnalysisResolver *AR = new AnalysisResolver(*this);
+  P->setResolver(AR);
+
+  // If a FunctionPass F is the last user of ModulePass info M
+  // then the F's manager, not F, records itself as a last user of M.
+  SmallVector<Pass *, 12> TransferLastUses;
+
+  if (!ProcessAnalysis) {
+    // Add pass
+    PassVector.push_back(P);
+    return;
+  }
+
+  // At the moment, this pass is the last user of all required passes.
+  SmallVector<Pass *, 12> LastUses;
+  SmallVector<Pass *, 8> RequiredPasses;
+  SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+  unsigned PDepth = this->getDepth();
+
+  collectRequiredAnalysis(RequiredPasses,
+                          ReqAnalysisNotAvailable, P);
+  for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
+         E = RequiredPasses.end(); I != E; ++I) {
+    Pass *PRequired = *I;
+    unsigned RDepth = 0;
+
+    assert(PRequired->getResolver() && "Analysis Resolver is not set");
+    PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+    RDepth = DM.getDepth();
+
+    if (PDepth == RDepth)
+      LastUses.push_back(PRequired);
+    else if (PDepth > RDepth) {
+      // Let the parent claim responsibility of last use
+      TransferLastUses.push_back(PRequired);
+      // Keep track of higher level analysis used by this manager.
+      HigherLevelAnalysis.push_back(PRequired);
+    } else
+      llvm_unreachable("Unable to accommodate Required Pass");
+  }
+
+  // Set P as P's last user until someone starts using P.
+  // However, if P is a Pass Manager then it does not need
+  // to record its last user.
+  if (P->getAsPMDataManager() == 0)
+    LastUses.push_back(P);
+  TPM->setLastUser(LastUses, P);
+
+  if (!TransferLastUses.empty()) {
+    Pass *My_PM = getAsPass();
+    TPM->setLastUser(TransferLastUses, My_PM);
+    TransferLastUses.clear();
+  }
+
+  // Now, take care of required analyses that are not available.
+  for (SmallVectorImpl<AnalysisID>::iterator
+         I = ReqAnalysisNotAvailable.begin(),
+         E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+    Pass *AnalysisPass = PI->createPass();
+    this->addLowerLevelRequiredPass(P, AnalysisPass);
+  }
+
+  // Take a note of analysis required and made available by this pass.
+  // Remove the analysis not preserved by this pass
+  removeNotPreservedAnalysis(P);
+  recordAvailableAnalysis(P);
+
+  // Add pass
+  PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+                                       SmallVectorImpl<AnalysisID> &RP_NotAvail,
+                                            Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+  for (AnalysisUsage::VectorType::const_iterator
+         I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+      RP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(*I);
+  }
+
+  const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+  for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+      RP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(*I);
+  }
+}
+
+// All Required analyses should be available to the pass as it runs!  Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+  for (AnalysisUsage::VectorType::const_iterator
+         I = AnUsage->getRequiredSet().begin(),
+         E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+    Pass *Impl = findAnalysisPass(*I, true);
+    if (Impl == 0)
+      // This may be analysis pass that is initialized on the fly.
+      // If that is not the case then it will raise an assert when it is used.
+      continue;
+    AnalysisResolver *AR = P->getResolver();
+    assert(AR && "Analysis Resolver is not set");
+    AR->addAnalysisImplsPair(*I, Impl);
+  }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+  // Check if AvailableAnalysis map has one entry.
+  DenseMap<AnalysisID, Pass*>::const_iterator I =  AvailableAnalysis.find(AID);
+
+  if (I != AvailableAnalysis.end())
+    return I->second;
+
+  // Search Parents through TopLevelManager
+  if (SearchParent)
+    return TPM->findAnalysisPass(AID);
+
+  return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+  SmallVector<Pass *, 12> LUses;
+
+  // If this is a on the fly manager then it does not have TPM.
+  if (!TPM)
+    return;
+
+  TPM->collectLastUses(LUses, P);
+
+  for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
+         E = LUses.end(); I != E; ++I) {
+    llvm::dbgs() << "--" << std::string(Offset*2, ' ');
+    (*I)->dumpPassStructure(0);
+  }
+}
+
+void PMDataManager::dumpPassArguments() const {
+  for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
+        E = PassVector.end(); I != E; ++I) {
+    if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+      PMD->dumpPassArguments();
+    else
+      if (const PassInfo *PI =
+            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+        if (!PI->isAnalysisGroup())
+          dbgs() << " -" << PI->getPassArgument();
+  }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+                                 enum PassDebuggingString S2,
+                                 StringRef Msg) {
+  if (PassDebugging < Executions)
+    return;
+  dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+  switch (S1) {
+  case EXECUTION_MSG:
+    dbgs() << "Executing Pass '" << P->getPassName();
+    break;
+  case MODIFICATION_MSG:
+    dbgs() << "Made Modification '" << P->getPassName();
+    break;
+  case FREEING_MSG:
+    dbgs() << " Freeing Pass '" << P->getPassName();
+    break;
+  default:
+    break;
+  }
+  switch (S2) {
+  case ON_BASICBLOCK_MSG:
+    dbgs() << "' on BasicBlock '" << Msg << "'...\n";
+    break;
+  case ON_FUNCTION_MSG:
+    dbgs() << "' on Function '" << Msg << "'...\n";
+    break;
+  case ON_MODULE_MSG:
+    dbgs() << "' on Module '"  << Msg << "'...\n";
+    break;
+  case ON_REGION_MSG:
+    dbgs() << "' on Region '"  << Msg << "'...\n";
+    break;
+  case ON_LOOP_MSG:
+    dbgs() << "' on Loop '" << Msg << "'...\n";
+    break;
+  case ON_CG_MSG:
+    dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+    break;
+  default:
+    break;
+  }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
+                                   const AnalysisUsage::VectorType &Set) const {
+  assert(PassDebugging >= Details);
+  if (Set.empty())
+    return;
+  dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+  for (unsigned i = 0; i != Set.size(); ++i) {
+    if (i) dbgs() << ',';
+    const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+    if (!PInf) {
+      // Some preserved passes, such as AliasAnalysis, may not be initialized by
+      // all drivers.
+      dbgs() << " Uninitialized Pass";
+      continue;
+    }
+    dbgs() << ' ' << PInf->getPassName();
+  }
+  dbgs() << '\n';
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+  if (TPM) {
+    TPM->dumpArguments();
+    TPM->dumpPasses();
+  }
+
+  // Module Level pass may required Function Level analysis info
+  // (e.g. dominator info). Pass manager uses on the fly function pass manager
+  // to provide this on demand. In that case, in Pass manager terminology,
+  // module level pass is requiring lower level analysis info managed by
+  // lower level pass manager.
+
+  // When Pass manager is not able to order required analysis info, Pass manager
+  // checks whether any lower level manager will be able to provide this
+  // analysis info on demand or not.
+#ifndef NDEBUG
+  dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+  dbgs() << "' required by '" << P->getPassName() << "'\n";
+#endif
+  llvm_unreachable("Unable to schedule pass");
+}
+
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
+  llvm_unreachable("Unable to find on the fly pass");
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+  for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
+         E = PassVector.end(); I != E; ++I)
+    delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+  return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
+                                     Function &F) {
+  return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method.  Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  bool Changed = doInitialization(F);
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      BasicBlockPass *BP = getContainedPass(Index);
+      bool LocalChanged = false;
+
+      dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+      dumpRequiredSet(BP);
+
+      initializeAnalysisImpl(BP);
+
+      {
+        // If the pass crashes, remember this.
+        PassManagerPrettyStackEntry X(BP, *I);
+        TimeRegion PassTimer(getPassTimer(BP));
+
+        LocalChanged |= BP->runOnBasicBlock(*I);
+      }
+
+      Changed |= LocalChanged;
+      if (LocalChanged)
+        dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+                     I->getName());
+      dumpPreservedSet(BP);
+
+      verifyPreservedAnalysis(BP);
+      removeNotPreservedAnalysis(BP);
+      recordAvailableAnalysis(BP);
+      removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+    }
+
+  return doFinalization(F) || Changed;
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    BasicBlockPass *BP = getContainedPass(Index);
+    Changed |= BP->doInitialization(F);
+  }
+
+  return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    BasicBlockPass *BP = getContainedPass(Index);
+    Changed |= BP->doFinalization(F);
+  }
+
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
+  FPM = new FunctionPassManagerImpl();
+  // FPM is the top level manager.
+  FPM->setTopLevelManager(FPM);
+
+  AnalysisResolver *AR = new AnalysisResolver(*FPM);
+  FPM->setResolver(AR);
+}
+
+FunctionPassManager::~FunctionPassManager() {
+  delete FPM;
+}
+
+/// add - Add a pass to the queue of passes to run.  This passes
+/// ownership of the Pass to the PassManager.  When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+  FPM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+  if (F.isMaterializable()) {
+    std::string errstr;
+    if (F.Materialize(&errstr))
+      report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+  }
+  return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+  return FPM->doInitialization(*M);
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+  return FPM->doFinalization(*M);
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+  bool Changed = false;
+
+  dumpArguments();
+  dumpPasses();
+
+  SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doInitialization(M);
+  }
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index)
+    Changed |= getContainedManager(Index)->doFinalization(M);
+
+  SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doFinalization(M);
+  }
+
+  return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    AnalysisResolver *AR = FP->getResolver();
+    assert(AR && "Analysis Resolver is not set");
+    AR->clearAnalysisImpls();
+ }
+}
+
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+  if (!wasRun)
+    return;
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+    FPPassManager *FPPM = getContainedManager(Index);
+    for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+      FPPM->getContainedPass(Index)->releaseMemory();
+    }
+  }
+  wasRun = false;
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+  bool Changed = false;
+  TimingInfo::createTheTimeInfo();
+
+  initializeAllAnalysisInfo();
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->runOnFunction(F);
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    getContainedManager(Index)->cleanup();
+
+  wasRun = true;
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+  dbgs().indent(Offset*2) << "FunctionPass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    FP->dumpPassStructure(Offset + 1);
+    dumpLastUses(FP, Offset+1);
+  }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method.  Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    bool LocalChanged = false;
+
+    dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+    dumpRequiredSet(FP);
+
+    initializeAnalysisImpl(FP);
+
+    {
+      PassManagerPrettyStackEntry X(FP, F);
+      TimeRegion PassTimer(getPassTimer(FP));
+
+      LocalChanged |= FP->runOnFunction(F);
+    }
+
+    Changed |= LocalChanged;
+    if (LocalChanged)
+      dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+    dumpPreservedSet(FP);
+
+    verifyPreservedAnalysis(FP);
+    removeNotPreservedAnalysis(FP);
+    recordAvailableAnalysis(FP);
+    removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+  }
+  return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+  bool Changed = false;
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    Changed |= runOnFunction(*I);
+
+  return Changed;
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+  
+  return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+  
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method.  Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Initialize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    Changed |= FPP->doInitialization(M);
+  }
+
+  // Initialize module passes
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    ModulePass *MP = getContainedPass(Index);
+    bool LocalChanged = false;
+
+    dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
+    dumpRequiredSet(MP);
+
+    initializeAnalysisImpl(MP);
+
+    {
+      PassManagerPrettyStackEntry X(MP, M);
+      TimeRegion PassTimer(getPassTimer(MP));
+
+      LocalChanged |= MP->runOnModule(M);
+    }
+
+    Changed |= LocalChanged;
+    if (LocalChanged)
+      dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+                   M.getModuleIdentifier());
+    dumpPreservedSet(MP);
+
+    verifyPreservedAnalysis(MP);
+    removeNotPreservedAnalysis(MP);
+    recordAvailableAnalysis(MP);
+    removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
+  }
+
+  // Finalize module passes
+  for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
+  // Finalize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    // We don't know when is the last time an on-the-fly pass is run,
+    // so we need to releaseMemory / finalize here
+    FPP->releaseMemoryOnTheFly();
+    Changed |= FPP->doFinalization(M);
+  }
+  
+  return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+  assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+         "Unable to handle Pass that requires lower level Analysis pass");
+  assert((P->getPotentialPassManagerType() <
+          RequiredPass->getPotentialPassManagerType()) &&
+         "Unable to handle Pass that requires lower level Analysis pass");
+
+  FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+  if (!FPP) {
+    FPP = new FunctionPassManagerImpl();
+    // FPP is the top level manager.
+    FPP->setTopLevelManager(FPP);
+
+    OnTheFlyManagers[P] = FPP;
+  }
+  FPP->add(RequiredPass);
+
+  // Register P as the last user of RequiredPass.
+  if (RequiredPass) {
+    SmallVector<Pass *, 1> LU;
+    LU.push_back(RequiredPass);
+    FPP->setLastUser(LU,  P);
+  }
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
+  FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+  assert(FPP && "Unable to find on the fly pass");
+
+  FPP->releaseMemoryOnTheFly();
+  FPP->run(F);
+  return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+
+//
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+  bool Changed = false;
+  TimingInfo::createTheTimeInfo();
+
+  dumpArguments();
+  dumpPasses();
+
+  SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doInitialization(M);
+  }
+
+  initializeAllAnalysisInfo();
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->runOnModule(M);
+
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doFinalization(M);
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+  PM = new PassManagerImpl();
+  // PM is the top level manager
+  PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+  delete PM;
+}
+
+/// add - Add a pass to the queue of passes to run.  This passes ownership of
+/// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass.  This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+  PM->add(P);
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+  return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo implementation
+
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+            cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null.  It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+  if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+  // Constructed the first time this is called, iff -time-passes is enabled.
+  // This guarantees that the object will be constructed before static globals,
+  // thus it will be destroyed before them.
+  static ManagedStatic<TimingInfo> TTI;
+  TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+Timer *llvm::getPassTimer(Pass *P) {
+  if (TheTimeInfo)
+    return TheTimeInfo->getPassTimer(P);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+  PMDataManager *Top = this->top();
+  Top->initializeAnalysisInfo();
+
+  S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+  assert(PM && "Unable to push. Pass Manager expected");
+  assert(PM->getDepth()==0 && "Pass Manager depth set too early");
+
+  if (!this->empty()) {
+    assert(PM->getPassManagerType() > this->top()->getPassManagerType()
+           && "pushing bad pass manager to PMStack");
+    PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+    assert(TPM && "Unable to find top level manager");
+    TPM->addIndirectPassManager(PM);
+    PM->setTopLevelManager(TPM);
+    PM->setDepth(this->top()->getDepth()+1);
+  } else {
+    assert((PM->getPassManagerType() == PMT_ModulePassManager
+           || PM->getPassManagerType() == PMT_FunctionPassManager)
+           && "pushing bad pass manager to PMStack");
+    PM->setDepth(1);
+  }
+
+  S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() const {
+  for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
+         E = S.end(); I != E; ++I)
+    dbgs() << (*I)->getAsPass()->getPassName() << ' ';
+
+  if (!S.empty())
+    dbgs() << '\n';
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+                                   PassManagerType PreferredType) {
+  // Find Module Pass Manager
+  while (!PMS.empty()) {
+    PassManagerType TopPMType = PMS.top()->getPassManagerType();
+    if (TopPMType == PreferredType)
+      break; // We found desired pass manager
+    else if (TopPMType > PMT_ModulePassManager)
+      PMS.pop();    // Pop children pass managers
+    else
+      break;
+  }
+  assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+  PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+                                     PassManagerType PreferredType) {
+
+  // Find Function Pass Manager
+  while (!PMS.empty()) {
+    if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+      PMS.pop();
+    else
+      break;
+  }
+
+  // Create new Function Pass Manager if needed.
+  FPPassManager *FPP;
+  if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
+    FPP = (FPPassManager *)PMS.top();
+  } else {
+    assert(!PMS.empty() && "Unable to create Function Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Function Pass Manager
+    FPP = new FPPassManager();
+    FPP->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(FPP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+    // [4] Push new manager into PMS
+    PMS.push(FPP);
+  }
+
+  // Assign FPP as the manager of this pass.
+  FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+                                       PassManagerType PreferredType) {
+  BBPassManager *BBP;
+
+  // Basic Pass Manager is a leaf pass manager. It does not handle
+  // any other pass manager.
+  if (!PMS.empty() &&
+      PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
+    BBP = (BBPassManager *)PMS.top();
+  } else {
+    // If leaf manager is not Basic Block Pass manager then create new
+    // basic Block Pass manager.
+    assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Basic Block Manager
+    BBP = new BBPassManager();
+
+    // [2] Set up new manager's top level manager
+    // Basic Block Pass Manager does not live by itself
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(BBP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    BBP->assignPassManager(PMS, PreferredType);
+
+    // [4] Push new manager into PMS
+    PMS.push(BBP);
+  }
+
+  // Assign BBP as the manager of this pass.
+  BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
new file mode 100644
index 000000000000..a0b64ed78f5f
--- /dev/null
+++ b/lib/IR/PassRegistry.cpp
@@ -0,0 +1,209 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+
+using namespace llvm;
+
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful resurrection after
+// llvm_shutdown is run.  Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+  return &*PassRegistryObj;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+  /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+  typedef DenseMap<const void*, const PassInfo*> MapType;
+  MapType PassInfoMap;
+  
+  typedef StringMap<const PassInfo*> StringMapType;
+  StringMapType PassInfoStringMap;
+  
+  /// AnalysisGroupInfo - Keep track of information for each analysis group.
+  struct AnalysisGroupInfo {
+    SmallPtrSet<const PassInfo *, 8> Implementations;
+  };
+  DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+  
+  std::vector<const PassInfo*> ToFree;
+  std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+  if (!pImpl)
+    pImpl = new PassRegistryImpl();
+  return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+  
+  for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+       E = Impl->ToFree.end(); I != E; ++I)
+    delete *I;
+  
+  delete Impl;
+  pImpl = 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+  return I != Impl->PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::StringMapType::const_iterator
+    I = Impl->PassInfoStringMap.find(Arg);
+  return I != Impl->PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  bool Inserted =
+    Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+  assert(Inserted && "Pass registered multiple times!");
+  (void)Inserted;
+  Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
+  
+  // Notify any listeners.
+  for (std::vector<PassRegistrationListener*>::iterator
+       I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
+    (*I)->passRegistered(&PI);
+  
+  if (ShouldFree) Impl->ToFree.push_back(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::iterator I = 
+    Impl->PassInfoMap.find(PI.getTypeInfo());
+  assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
+  
+  // Remove pass from the map.
+  Impl->PassInfoMap.erase(I);
+  Impl->PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+       E = Impl->PassInfoMap.end(); I != E; ++I)
+    L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID, 
+                                         const void *PassID,
+                                         PassInfo& Registeree,
+                                         bool isDefault,
+                                         bool ShouldFree) {
+  PassInfo *InterfaceInfo =  const_cast<PassInfo*>(getPassInfo(InterfaceID));
+  if (InterfaceInfo == 0) {
+    // First reference to Interface, register it now.
+    registerPass(Registeree);
+    InterfaceInfo = &Registeree;
+  }
+  assert(Registeree.isAnalysisGroup() && 
+         "Trying to join an analysis group that is a normal pass!");
+
+  if (PassID) {
+    PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+    assert(ImplementationInfo &&
+           "Must register pass before adding to AnalysisGroup!");
+
+    sys::SmartScopedLock<true> Guard(*Lock);
+    
+    // Make sure we keep track of the fact that the implementation implements
+    // the interface.
+    ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+    PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+    PassRegistryImpl::AnalysisGroupInfo &AGI =
+      Impl->AnalysisGroupInfoMap[InterfaceInfo];
+    assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+           "Cannot add a pass to the same analysis group more than once!");
+    AGI.Implementations.insert(ImplementationInfo);
+    if (isDefault) {
+      assert(InterfaceInfo->getNormalCtor() == 0 &&
+             "Default implementation for analysis group already specified!");
+      assert(ImplementationInfo->getNormalCtor() &&
+           "Cannot specify pass as default if it does not have a default ctor");
+      InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+    }
+  }
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  if (ShouldFree) Impl->ToFree.push_back(&Registeree);
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  Impl->Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  
+  // NOTE: This is necessary, because removeRegistrationListener() can be called
+  // as part of the llvm_shutdown sequence.  Since we have no control over the
+  // order of that sequence, we need to gracefully handle the case where the
+  // PassRegistry is destructed before the object that triggers this call.
+  if (!pImpl) return;
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  std::vector<PassRegistrationListener*>::iterator I =
+    std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+  assert(I != Impl->Listeners.end() &&
+         "PassRegistrationListener not registered!");
+  Impl->Listeners.erase(I);
+}
diff --git a/lib/IR/PrintModulePass.cpp b/lib/IR/PrintModulePass.cpp
new file mode 100644
index 000000000000..5026bc2d9840
--- /dev/null
+++ b/lib/IR/PrintModulePass.cpp
@@ -0,0 +1,136 @@
+//===--- IR/PrintModulePass.cpp - Module/Function Printer -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PrintModulePass and PrintFunctionPass implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+  class PrintModulePass : public ModulePass {
+    std::string Banner;
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintModulePass() : ModulePass(ID), Out(&dbgs()), 
+      DeleteStream(false) {}
+    PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
+        : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintModulePass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    bool runOnModule(Module &M) {
+      (*Out) << Banner << M;
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+  
+  class PrintFunctionPass : public FunctionPass {
+    std::string Banner;     // String to print before each function
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()), 
+                          DeleteStream(false) {}
+    PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
+      : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintFunctionPass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    // runOnFunction - This pass just prints a banner followed by the
+    // function as it's processed.
+    //
+    bool runOnFunction(Function &F) {
+      (*Out) << Banner << static_cast<Value&>(F);
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+  
+  class PrintBasicBlockPass : public BasicBlockPass {
+    std::string Banner;
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintBasicBlockPass() : BasicBlockPass(ID), Out(&dbgs()), 
+      DeleteStream(false) {}
+    PrintBasicBlockPass(const std::string &B, raw_ostream *o, bool DS)
+        : BasicBlockPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintBasicBlockPass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    bool runOnBasicBlock(BasicBlock &BB) {
+      (*Out) << Banner << BB;
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char PrintModulePass::ID = 0;
+INITIALIZE_PASS(PrintModulePass, "print-module",
+                "Print module to stderr", false, false)
+char PrintFunctionPass::ID = 0;
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+                "Print function to stderr", false, false)
+char PrintBasicBlockPass::ID = 0;
+INITIALIZE_PASS(PrintBasicBlockPass, "print-bb",
+                "Print BB to stderr", false, false)
+
+/// createPrintModulePass - Create and return a pass that writes the
+/// module to the specified raw_ostream.
+ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS, 
+                                        bool DeleteStream,
+                                        const std::string &Banner) {
+  return new PrintModulePass(Banner, OS, DeleteStream);
+}
+
+/// createPrintFunctionPass - Create and return a pass that prints
+/// functions to the specified raw_ostream as they are processed.
+FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
+                                            llvm::raw_ostream *OS, 
+                                            bool DeleteStream) {
+  return new PrintFunctionPass(Banner, OS, DeleteStream);
+}
+
+/// createPrintBasicBlockPass - Create and return a pass that writes the
+/// BB to the specified raw_ostream.
+BasicBlockPass *llvm::createPrintBasicBlockPass(llvm::raw_ostream *OS,
+                                        bool DeleteStream,
+                                        const std::string &Banner) {
+  return new PrintBasicBlockPass(Banner, OS, DeleteStream);
+}
+
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
new file mode 100644
index 000000000000..5a383eee56c5
--- /dev/null
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -0,0 +1,118 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stickier parts of the SymbolTableListTraits class,
+// and is explicitly instantiated where needed to avoid defining all this code
+// in a widely used header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+
+#include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/IR/ValueSymbolTable.h"
+
+namespace llvm {
+
+/// setSymTabObject - This is called when (f.e.) the parent of a basic block
+/// changes.  This requires us to remove all the instruction symtab entries from
+/// the current function and reinsert them into the new function.
+template<typename ValueSubClass, typename ItemParentClass>
+template<typename TPtr>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::setSymTabObject(TPtr *Dest, TPtr Src) {
+  // Get the old symtab and value list before doing the assignment.
+  ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+
+  // Do it.
+  *Dest = Src;
+  
+  // Get the new SymTab object.
+  ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+  
+  // If there is nothing to do, quick exit.
+  if (OldST == NewST) return;
+  
+  // Move all the elements from the old symtab to the new one.
+  iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+  if (ItemList.empty()) return;
+  
+  if (OldST) {
+    // Remove all entries from the previous symtab.
+    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+         I != ItemList.end(); ++I)
+      if (I->hasName())
+        OldST->removeValueName(I->getValueName());
+  }
+
+  if (NewST) {
+    // Add all of the items to the new symtab.
+    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+         I != ItemList.end(); ++I)
+      if (I->hasName())
+        NewST->reinsertValue(I);
+  }
+  
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::addNodeToList(ValueSubClass *V) {
+  assert(V->getParent() == 0 && "Value already in a container!!");
+  ItemParentClass *Owner = getListOwner();
+  V->setParent(Owner);
+  if (V->hasName())
+    if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+      ST->reinsertValue(V);
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::removeNodeFromList(ValueSubClass *V) {
+  V->setParent(0);
+  if (V->hasName())
+    if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+      ST->removeValueName(V->getValueName());
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+                        ilist_iterator<ValueSubClass> first,
+                        ilist_iterator<ValueSubClass> last) {
+  // We only have to do work here if transferring instructions between BBs
+  ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
+  if (NewIP == OldIP) return;  // No work to do at all...
+
+  // We only have to update symbol table entries if we are transferring the
+  // instructions to a different symtab object...
+  ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
+  ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+  if (NewST != OldST) {
+    for (; first != last; ++first) {
+      ValueSubClass &V = *first;
+      bool HasName = V.hasName();
+      if (OldST && HasName)
+        OldST->removeValueName(V.getValueName());
+      V.setParent(NewIP);
+      if (NewST && HasName)
+        NewST->reinsertValue(&V);
+    }
+  } else {
+    // Just transferring between blocks in the same function, simply update the
+    // parent fields in the instructions...
+    for (; first != last; ++first)
+      first->setParent(NewIP);
+  }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
new file mode 100644
index 000000000000..1e6a51ab108c
--- /dev/null
+++ b/lib/IR/Type.cpp
@@ -0,0 +1,767 @@
+//===-- Type.cpp - Implement the Type class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Type class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Type.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Module.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                         Type Class Implementation
+//===----------------------------------------------------------------------===//
+
+Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+  switch (IDNumber) {
+  case VoidTyID      : return getVoidTy(C);
+  case HalfTyID      : return getHalfTy(C);
+  case FloatTyID     : return getFloatTy(C);
+  case DoubleTyID    : return getDoubleTy(C);
+  case X86_FP80TyID  : return getX86_FP80Ty(C);
+  case FP128TyID     : return getFP128Ty(C);
+  case PPC_FP128TyID : return getPPC_FP128Ty(C);
+  case LabelTyID     : return getLabelTy(C);
+  case MetadataTyID  : return getMetadataTy(C);
+  case X86_MMXTyID   : return getX86_MMXTy(C);
+  default:
+    return 0;
+  }
+}
+
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+Type *Type::getScalarType() {
+  if (VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
+const Type *Type::getScalarType() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+  return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+}
+
+// canLosslesslyBitCastTo - Return true if this type can be converted to
+// 'Ty' without any reinterpretation of bits.  For example, i8* to i32*.
+//
+bool Type::canLosslesslyBitCastTo(Type *Ty) const {
+  // Identity cast means no change so return true
+  if (this == Ty) 
+    return true;
+  
+  // They are not convertible unless they are at least first class types
+  if (!this->isFirstClassType() || !Ty->isFirstClassType())
+    return false;
+
+  // Vector -> Vector conversions are always lossless if the two vector types
+  // have the same size, otherwise not.  Also, 64-bit vector types can be
+  // converted to x86mmx.
+  if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+    if (Ty->getTypeID() == Type::X86_MMXTyID &&
+        thisPTy->getBitWidth() == 64)
+      return true;
+  }
+
+  if (this->getTypeID() == Type::X86_MMXTyID)
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      if (thatPTy->getBitWidth() == 64)
+        return true;
+
+  // At this point we have only various mismatches of the first class types
+  // remaining and ptr->ptr. Just select the lossless conversions. Everything
+  // else is not lossless.
+  if (this->isPointerTy())
+    return Ty->isPointerTy();
+  return false;  // Other types have no identity values
+}
+
+bool Type::isEmptyTy() const {
+  const ArrayType *ATy = dyn_cast<ArrayType>(this);
+  if (ATy) {
+    unsigned NumElements = ATy->getNumElements();
+    return NumElements == 0 || ATy->getElementType()->isEmptyTy();
+  }
+
+  const StructType *STy = dyn_cast<StructType>(this);
+  if (STy) {
+    unsigned NumElements = STy->getNumElements();
+    for (unsigned i = 0; i < NumElements; ++i)
+      if (!STy->getElementType(i)->isEmptyTy())
+        return false;
+    return true;
+  }
+
+  return false;
+}
+
+unsigned Type::getPrimitiveSizeInBits() const {
+  switch (getTypeID()) {
+  case Type::HalfTyID: return 16;
+  case Type::FloatTyID: return 32;
+  case Type::DoubleTyID: return 64;
+  case Type::X86_FP80TyID: return 80;
+  case Type::FP128TyID: return 128;
+  case Type::PPC_FP128TyID: return 128;
+  case Type::X86_MMXTyID: return 64;
+  case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
+  case Type::VectorTyID:  return cast<VectorType>(this)->getBitWidth();
+  default: return 0;
+  }
+}
+
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() {
+  return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+/// is only valid on floating point types.  If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->getFPMantissaWidth();
+  assert(isFloatingPointTy() && "Not a floating point type!");
+  if (getTypeID() == HalfTyID) return 11;
+  if (getTypeID() == FloatTyID) return 24;
+  if (getTypeID() == DoubleTyID) return 53;
+  if (getTypeID() == X86_FP80TyID) return 64;
+  if (getTypeID() == FP128TyID) return 113;
+  assert(getTypeID() == PPC_FP128TyID && "unknown fp type");
+  return -1;
+}
+
+/// isSizedDerivedType - Derived types like structures and arrays are sized
+/// iff all of the members of the type are sized as well.  Since asking for
+/// their size is relatively uncommon, move this operation out of line.
+bool Type::isSizedDerivedType() const {
+  if (this->isIntegerTy())
+    return true;
+
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+    return ATy->getElementType()->isSized();
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->isSized();
+
+  if (!this->isStructTy()) 
+    return false;
+
+  return cast<StructType>(this)->isSized();
+}
+
+//===----------------------------------------------------------------------===//
+//                         Subclass Helper Methods
+//===----------------------------------------------------------------------===//
+
+unsigned Type::getIntegerBitWidth() const {
+  return cast<IntegerType>(this)->getBitWidth();
+}
+
+bool Type::isFunctionVarArg() const {
+  return cast<FunctionType>(this)->isVarArg();
+}
+
+Type *Type::getFunctionParamType(unsigned i) const {
+  return cast<FunctionType>(this)->getParamType(i);
+}
+
+unsigned Type::getFunctionNumParams() const {
+  return cast<FunctionType>(this)->getNumParams();
+}
+
+StringRef Type::getStructName() const {
+  return cast<StructType>(this)->getName();
+}
+
+unsigned Type::getStructNumElements() const {
+  return cast<StructType>(this)->getNumElements();
+}
+
+Type *Type::getStructElementType(unsigned N) const {
+  return cast<StructType>(this)->getElementType(N);
+}
+
+Type *Type::getSequentialElementType() const {
+  return cast<SequentialType>(this)->getElementType();
+}
+
+uint64_t Type::getArrayNumElements() const {
+  return cast<ArrayType>(this)->getNumElements();
+}
+
+unsigned Type::getVectorNumElements() const {
+  return cast<VectorType>(this)->getNumElements();
+}
+
+unsigned Type::getPointerAddressSpace() const {
+  return cast<PointerType>(getScalarType())->getAddressSpace();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                          Primitive 'Type' data
+//===----------------------------------------------------------------------===//
+
+Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
+Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
+Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
+Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
+Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
+Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
+Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
+Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+
+IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
+IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
+IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
+IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
+IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+
+IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+  return IntegerType::get(C, N);
+}
+
+PointerType *Type::getHalfPtrTy(LLVMContext &C, unsigned AS) {
+  return getHalfTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+  return getFloatTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+  return getDoubleTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_FP80Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getFP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getPPC_FP128Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+  return getIntNTy(C, N)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt1Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt8Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt16Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt32Ty(C)->getPointerTo(AS);
+}
+
+PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt64Ty(C)->getPointerTo(AS);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                       IntegerType Implementation
+//===----------------------------------------------------------------------===//
+
+IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+  
+  // Check for the built-in integer types
+  switch (NumBits) {
+  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
+  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
+  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+  default: 
+    break;
+  }
+  
+  IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
+  
+  if (Entry == 0)
+    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+  
+  return Entry;
+}
+
+bool IntegerType::isPowerOf2ByteWidth() const {
+  unsigned BitWidth = getBitWidth();
+  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+}
+
+APInt IntegerType::getMask() const {
+  return APInt::getAllOnesValue(getBitWidth());
+}
+
+//===----------------------------------------------------------------------===//
+//                       FunctionType Implementation
+//===----------------------------------------------------------------------===//
+
+FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
+                           bool IsVarArgs)
+  : Type(Result->getContext(), FunctionTyID) {
+  Type **SubTys = reinterpret_cast<Type**>(this+1);
+  assert(isValidReturnType(Result) && "invalid return type for function");
+  setSubclassData(IsVarArgs);
+
+  SubTys[0] = const_cast<Type*>(Result);
+
+  for (unsigned i = 0, e = Params.size(); i != e; ++i) {
+    assert(isValidArgumentType(Params[i]) &&
+           "Not a valid type for function argument!");
+    SubTys[i+1] = Params[i];
+  }
+
+  ContainedTys = SubTys;
+  NumContainedTys = Params.size() + 1; // + 1 for result type
+}
+
+// FunctionType::get - The factory function for the FunctionType class.
+FunctionType *FunctionType::get(Type *ReturnType,
+                                ArrayRef<Type*> Params, bool isVarArg) {
+  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+  FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
+  LLVMContextImpl::FunctionTypeMap::iterator I =
+    pImpl->FunctionTypes.find_as(Key);
+  FunctionType *FT;
+
+  if (I == pImpl->FunctionTypes.end()) {
+    FT = (FunctionType*) pImpl->TypeAllocator.
+      Allocate(sizeof(FunctionType) + sizeof(Type*) * (Params.size() + 1),
+               AlignOf<FunctionType>::Alignment);
+    new (FT) FunctionType(ReturnType, Params, isVarArg);
+    pImpl->FunctionTypes[FT] = true;
+  } else {
+    FT = I->first;
+  }
+
+  return FT;
+}
+
+FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
+  return get(Result, ArrayRef<Type *>(), isVarArg);
+}
+
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(Type *RetTy) {
+  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+  !RetTy->isMetadataTy();
+}
+
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(Type *ArgTy) {
+  return ArgTy->isFirstClassType();
+}
+
+//===----------------------------------------------------------------------===//
+//                       StructType Implementation
+//===----------------------------------------------------------------------===//
+
+// Primitive Constructors.
+
+StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, 
+                            bool isPacked) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
+  LLVMContextImpl::StructTypeMap::iterator I =
+    pImpl->AnonStructTypes.find_as(Key);
+  StructType *ST;
+
+  if (I == pImpl->AnonStructTypes.end()) {
+    // Value not found.  Create a new type!
+    ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+    ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
+    ST->setBody(ETypes, isPacked);
+    Context.pImpl->AnonStructTypes[ST] = true;
+  } else {
+    ST = I->first;
+  }
+
+  return ST;
+}
+
+void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+  assert(isOpaque() && "Struct body already set!");
+  
+  setSubclassData(getSubclassData() | SCDB_HasBody);
+  if (isPacked)
+    setSubclassData(getSubclassData() | SCDB_Packed);
+
+  unsigned NumElements = Elements.size();
+  Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
+  memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
+  
+  ContainedTys = Elts;
+  NumContainedTys = NumElements;
+}
+
+void StructType::setName(StringRef Name) {
+  if (Name == getName()) return;
+
+  StringMap<StructType *> &SymbolTable = getContext().pImpl->NamedStructTypes;
+  typedef StringMap<StructType *>::MapEntryTy EntryTy;
+
+  // If this struct already had a name, remove its symbol table entry. Don't
+  // delete the data yet because it may be part of the new name.
+  if (SymbolTableEntry)
+    SymbolTable.remove((EntryTy *)SymbolTableEntry);
+
+  // If this is just removing the name, we're done.
+  if (Name.empty()) {
+    if (SymbolTableEntry) {
+      // Delete the old string data.
+      ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
+      SymbolTableEntry = 0;
+    }
+    return;
+  }
+  
+  // Look up the entry for the name.
+  EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+  
+  // While we have a name collision, try a random rename.
+  if (Entry->getValue()) {
+    SmallString<64> TempStr(Name);
+    TempStr.push_back('.');
+    raw_svector_ostream TmpStream(TempStr);
+    unsigned NameSize = Name.size();
+   
+    do {
+      TempStr.resize(NameSize + 1);
+      TmpStream.resync();
+      TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
+      
+      Entry = &getContext().pImpl->
+                 NamedStructTypes.GetOrCreateValue(TmpStream.str());
+    } while (Entry->getValue());
+  }
+
+  // Okay, we found an entry that isn't used.  It's us!
+  Entry->setValue(this);
+
+  // Delete the old string data.
+  if (SymbolTableEntry)
+    ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
+  SymbolTableEntry = Entry;
+}
+
+//===----------------------------------------------------------------------===//
+// StructType Helper functions.
+
+StructType *StructType::create(LLVMContext &Context, StringRef Name) {
+  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  if (!Name.empty())
+    ST->setName(Name);
+  return ST;
+}
+
+StructType *StructType::get(LLVMContext &Context, bool isPacked) {
+  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+}
+
+StructType *StructType::get(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  return llvm::StructType::get(Ctx, StructFields);
+}
+
+StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements,
+                               StringRef Name, bool isPacked) {
+  StructType *ST = create(Context, Name);
+  ST->setBody(Elements, isPacked);
+  return ST;
+}
+
+StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements) {
+  return create(Context, Elements, StringRef());
+}
+
+StructType *StructType::create(LLVMContext &Context) {
+  return create(Context, StringRef());
+}
+
+StructType *StructType::create(ArrayRef<Type*> Elements, StringRef Name,
+                               bool isPacked) {
+  assert(!Elements.empty() &&
+         "This method may not be invoked with an empty list");
+  return create(Elements[0]->getContext(), Elements, Name, isPacked);
+}
+
+StructType *StructType::create(ArrayRef<Type*> Elements) {
+  assert(!Elements.empty() &&
+         "This method may not be invoked with an empty list");
+  return create(Elements[0]->getContext(), Elements, StringRef());
+}
+
+StructType *StructType::create(StringRef Name, Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  return llvm::StructType::create(Ctx, StructFields, Name);
+}
+
+bool StructType::isSized() const {
+  if ((getSubclassData() & SCDB_IsSized) != 0)
+    return true;
+  if (isOpaque())
+    return false;
+
+  // Okay, our struct is sized if all of the elements are, but if one of the
+  // elements is opaque, the struct isn't sized *yet*, but may become sized in
+  // the future, so just bail out without caching.
+  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+    if (!(*I)->isSized())
+      return false;
+
+  // Here we cheat a bit and cast away const-ness. The goal is to memoize when
+  // we find a sized type, as types can only move from opaque to sized, not the
+  // other way.
+  const_cast<StructType*>(this)->setSubclassData(
+    getSubclassData() | SCDB_IsSized);
+  return true;
+}
+
+StringRef StructType::getName() const {
+  assert(!isLiteral() && "Literal structs never have names");
+  if (SymbolTableEntry == 0) return StringRef();
+  
+  return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
+}
+
+void StructType::setBody(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  setBody(StructFields);
+}
+
+bool StructType::isValidElementType(Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+/// isLayoutIdentical - Return true if this is layout identical to the
+/// specified struct.
+bool StructType::isLayoutIdentical(StructType *Other) const {
+  if (this == Other) return true;
+  
+  if (isPacked() != Other->isPacked() ||
+      getNumElements() != Other->getNumElements())
+    return false;
+  
+  return std::equal(element_begin(), element_end(), Other->element_begin());
+}
+
+/// getTypeByName - Return the type with the specified name, or null if there
+/// is none by that name.
+StructType *Module::getTypeByName(StringRef Name) const {
+  StringMap<StructType*>::iterator I =
+    getContext().pImpl->NamedStructTypes.find(Name);
+  if (I != getContext().pImpl->NamedStructTypes.end())
+    return I->second;
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                       CompositeType Implementation
+//===----------------------------------------------------------------------===//
+
+Type *CompositeType::getTypeAtIndex(const Value *V) {
+  if (StructType *STy = dyn_cast<StructType>(this)) {
+    unsigned Idx =
+      (unsigned)cast<Constant>(V)->getUniqueInteger().getZExtValue();
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+
+  return cast<SequentialType>(this)->getElementType();
+}
+Type *CompositeType::getTypeAtIndex(unsigned Idx) {
+  if (StructType *STy = dyn_cast<StructType>(this)) {
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    // Structure indexes require (vectors of) 32-bit integer constants.  In the
+    // vector case all of the indices must be equal.
+    if (!V->getType()->getScalarType()->isIntegerTy(32))
+      return false;
+    const Constant *C = dyn_cast<Constant>(V);
+    if (C && V->getType()->isVectorTy())
+      C = C->getSplatValue();
+    const ConstantInt *CU = dyn_cast_or_null<ConstantInt>(C);
+    return CU && CU->getZExtValue() < STy->getNumElements();
+  }
+
+  // Sequential types can be indexed by any integer.
+  return V->getType()->isIntOrIntVectorTy();
+}
+
+bool CompositeType::indexValid(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this))
+    return Idx < STy->getNumElements();
+  // Sequential types can be indexed by any integer.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           ArrayType Implementation
+//===----------------------------------------------------------------------===//
+
+ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
+  : SequentialType(ArrayTyID, ElType) {
+  NumElements = NumEl;
+}
+
+ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(isValidElementType(ElementType) && "Invalid type for array element!");
+    
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  ArrayType *&Entry = 
+    pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
+  
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+  return Entry;
+}
+
+bool ArrayType::isValidElementType(Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+//===----------------------------------------------------------------------===//
+//                          VectorType Implementation
+//===----------------------------------------------------------------------===//
+
+VectorType::VectorType(Type *ElType, unsigned NumEl)
+  : SequentialType(VectorTyID, ElType) {
+  NumElements = NumEl;
+}
+
+VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+  assert(isValidElementType(ElementType) &&
+         "Elements of a VectorType must be a primitive type");
+  
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  VectorType *&Entry = ElementType->getContext().pImpl
+    ->VectorTypes[std::make_pair(ElementType, NumElements)];
+  
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+  return Entry;
+}
+
+bool VectorType::isValidElementType(Type *ElemTy) {
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
+    ElemTy->isPointerTy();
+}
+
+//===----------------------------------------------------------------------===//
+//                         PointerType Implementation
+//===----------------------------------------------------------------------===//
+
+PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
+  assert(EltTy && "Can't get a pointer to <null> type!");
+  assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+  
+  LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
+  
+  // Since AddressSpace #0 is the common case, we special case it.
+  PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
+     : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
+
+  if (Entry == 0)
+    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+  return Entry;
+}
+
+
+PointerType::PointerType(Type *E, unsigned AddrSpace)
+  : SequentialType(PointerTyID, E) {
+#ifndef NDEBUG
+  const unsigned oldNCT = NumContainedTys;
+#endif
+  setSubclassData(AddrSpace);
+  // Check for miscompile. PR11652.
+  assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
+}
+
+PointerType *Type::getPointerTo(unsigned addrs) {
+  return PointerType::get(this, addrs);
+}
+
+bool PointerType::isValidElementType(Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy();
+}
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
new file mode 100644
index 000000000000..d5e620350705
--- /dev/null
+++ b/lib/IR/TypeFinder.cpp
@@ -0,0 +1,148 @@
+//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TypeFinder class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+using namespace llvm;
+
+void TypeFinder::run(const Module &M, bool onlyNamed) {
+  OnlyNamed = onlyNamed;
+
+  // Get types from global variables.
+  for (Module::const_global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    incorporateType(I->getType());
+    if (I->hasInitializer())
+      incorporateValue(I->getInitializer());
+  }
+
+  // Get types from aliases.
+  for (Module::const_alias_iterator I = M.alias_begin(),
+         E = M.alias_end(); I != E; ++I) {
+    incorporateType(I->getType());
+    if (const Value *Aliasee = I->getAliasee())
+      incorporateValue(Aliasee);
+  }
+
+  // Get types from functions.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+  for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+    incorporateType(FI->getType());
+
+    // First incorporate the arguments.
+    for (Function::const_arg_iterator AI = FI->arg_begin(),
+           AE = FI->arg_end(); AI != AE; ++AI)
+      incorporateValue(AI);
+
+    for (Function::const_iterator BB = FI->begin(), E = FI->end();
+         BB != E;++BB)
+      for (BasicBlock::const_iterator II = BB->begin(),
+             E = BB->end(); II != E; ++II) {
+        const Instruction &I = *II;
+
+        // Incorporate the type of the instruction.
+        incorporateType(I.getType());
+
+        // Incorporate non-instruction operand types. (We are incorporating all
+        // instructions with this loop.)
+        for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+             OI != OE; ++OI)
+          if (!isa<Instruction>(OI))
+            incorporateValue(*OI);
+
+        // Incorporate types hiding in metadata.
+        I.getAllMetadataOtherThanDebugLoc(MDForInst);
+        for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+          incorporateMDNode(MDForInst[i].second);
+
+        MDForInst.clear();
+      }
+  }
+
+  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+         E = M.named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      incorporateMDNode(NMD->getOperand(i));
+  }
+}
+
+void TypeFinder::clear() {
+  VisitedConstants.clear();
+  VisitedTypes.clear();
+  StructTypes.clear();
+}
+
+/// incorporateType - This method adds the type to the list of used structures
+/// if it's not in there already.
+void TypeFinder::incorporateType(Type *Ty) {
+  // Check to see if we're already visited this type.
+  if (!VisitedTypes.insert(Ty).second)
+    return;
+
+  // If this is a structure or opaque type, add a name for the type.
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    if (!OnlyNamed || STy->hasName())
+      StructTypes.push_back(STy);
+
+  // Recursively walk all contained types.
+  for (Type::subtype_iterator I = Ty->subtype_begin(),
+         E = Ty->subtype_end(); I != E; ++I)
+    incorporateType(*I);
+}
+
+/// incorporateValue - This method is used to walk operand lists finding types
+/// hiding in constant expressions and other operands that won't be walked in
+/// other ways.  GlobalValues, basic blocks, instructions, and inst operands are
+/// all explicitly enumerated.
+void TypeFinder::incorporateValue(const Value *V) {
+  if (const MDNode *M = dyn_cast<MDNode>(V))
+    return incorporateMDNode(M);
+
+  if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+  // Already visited?
+  if (!VisitedConstants.insert(V).second)
+    return;
+
+  // Check this type.
+  incorporateType(V->getType());
+
+  // If this is an instruction, we incorporate it separately.
+  if (isa<Instruction>(V))
+    return;
+
+  // Look in operands for types.
+  const User *U = cast<User>(V);
+  for (Constant::const_op_iterator I = U->op_begin(),
+         E = U->op_end(); I != E;++I)
+    incorporateValue(*I);
+}
+
+/// incorporateMDNode - This method is used to walk the operands of an MDNode to
+/// find types hiding within.
+void TypeFinder::incorporateMDNode(const MDNode *V) {
+  // Already visited?
+  if (!VisitedConstants.insert(V).second)
+    return;
+
+  // Look in operands for types.
+  for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+    if (Value *Op = V->getOperand(i))
+      incorporateValue(Op);
+}
diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp
new file mode 100644
index 000000000000..1d343e803094
--- /dev/null
+++ b/lib/IR/Use.cpp
@@ -0,0 +1,145 @@
+//===-- Use.cpp - Implement the Use class ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the algorithm for finding the User of a Use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Value.h"
+#include <new>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                         Use swap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::swap(Use &RHS) {
+  Value *V1(Val);
+  Value *V2(RHS.Val);
+  if (V1 != V2) {
+    if (V1) {
+      removeFromList();
+    }
+
+    if (V2) {
+      RHS.removeFromList();
+      Val = V2;
+      V2->addUse(*this);
+    } else {
+      Val = 0;
+    }
+
+    if (V1) {
+      RHS.Val = V1;
+      V1->addUse(RHS);
+    } else {
+      RHS.Val = 0;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use getImpliedUser Implementation
+//===----------------------------------------------------------------------===//
+
+const Use *Use::getImpliedUser() const {
+  const Use *Current = this;
+
+  while (true) {
+    unsigned Tag = (Current++)->Prev.getInt();
+    switch (Tag) {
+      case zeroDigitTag:
+      case oneDigitTag:
+        continue;
+
+      case stopTag: {
+        ++Current;
+        ptrdiff_t Offset = 1;
+        while (true) {
+          unsigned Tag = Current->Prev.getInt();
+          switch (Tag) {
+            case zeroDigitTag:
+            case oneDigitTag:
+              ++Current;
+              Offset = (Offset << 1) + Tag;
+              continue;
+            default:
+              return Current + Offset;
+          }
+        }
+      }
+
+      case fullStopTag:
+        return Current;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use initTags Implementation
+//===----------------------------------------------------------------------===//
+
+Use *Use::initTags(Use * const Start, Use *Stop) {
+  ptrdiff_t Done = 0;
+  while (Done < 20) {
+    if (Start == Stop--)
+      return Start;
+    static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+                                         oneDigitTag, oneDigitTag, stopTag,
+                                         zeroDigitTag, oneDigitTag, oneDigitTag,
+                                         stopTag, zeroDigitTag, oneDigitTag,
+                                         zeroDigitTag, oneDigitTag, stopTag,
+                                         oneDigitTag, oneDigitTag, oneDigitTag,
+                                         oneDigitTag, stopTag
+                                       };
+    new(Stop) Use(tags[Done++]);
+  }
+
+  ptrdiff_t Count = Done;
+  while (Start != Stop) {
+    --Stop;
+    if (!Count) {
+      new(Stop) Use(stopTag);
+      ++Done;
+      Count = Done;
+    } else {
+      new(Stop) Use(PrevPtrTag(Count & 1));
+      Count >>= 1;
+      ++Done;
+    }
+  }
+
+  return Start;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use zap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::zap(Use *Start, const Use *Stop, bool del) {
+  while (Start != Stop)
+    (--Stop)->~Use();
+  if (del)
+    ::operator delete(Start);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use getUser Implementation
+//===----------------------------------------------------------------------===//
+
+User *Use::getUser() const {
+  const Use *End = getImpliedUser();
+  const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+  return ref->getInt()
+    ? ref->getPointer()
+    : reinterpret_cast<User*>(const_cast<Use*>(End));
+}
+
+} // End llvm namespace
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
new file mode 100644
index 000000000000..940682826acc
--- /dev/null
+++ b/lib/IR/User.cpp
@@ -0,0 +1,90 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/User.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Operator.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                                 User Class
+//===----------------------------------------------------------------------===//
+
+void User::anchor() {}
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+  if (From == To) return;   // Duh what?
+
+  assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+         "Cannot call User::replaceUsesOfWith on a constant!");
+
+  for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+    if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
+      // The side effects of this setOperand call include linking to
+      // "To", adding "this" to the uses list of To, and
+      // most importantly, removing "this" from the use list of "From".
+      setOperand(i, To); // Fix it now...
+    }
+}
+
+//===----------------------------------------------------------------------===//
+//                         User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+  // the User.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+  void *Storage = ::operator new(s + sizeof(Use) * Us);
+  Use *Start = static_cast<Use*>(Storage);
+  Use *End = Start + Us;
+  User *Obj = reinterpret_cast<User*>(End);
+  Obj->OperandList = Start;
+  Obj->NumOperands = Us;
+  Use::initTags(Start, End);
+  return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+  User *Start = static_cast<User*>(Usr);
+  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+  // If there were hung-off uses, they will have been freed already and
+  // NumOperands reset to 0, so here we just free the User itself.
+  ::operator delete(Storage);
+}
+
+//===----------------------------------------------------------------------===//
+//                             Operator Class
+//===----------------------------------------------------------------------===//
+
+Operator::~Operator() {
+  llvm_unreachable("should never destroy an Operator");
+}
+
+} // End llvm namespace
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
new file mode 100644
index 000000000000..adc702e05e68
--- /dev/null
+++ b/lib/IR/Value.cpp
@@ -0,0 +1,701 @@
+//===-- Value.cpp - Implement the Value class -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Value, ValueHandle, and User classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Value.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                                Value Class
+//===----------------------------------------------------------------------===//
+
+static inline Type *checkType(Type *Ty) {
+  assert(Ty && "Value defined with a null type: Error!");
+  return const_cast<Type*>(Ty);
+}
+
+Value::Value(Type *ty, unsigned scid)
+  : SubclassID(scid), HasValueHandle(0),
+    SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
+    UseList(0), Name(0) {
+  // FIXME: Why isn't this in the subclass gunk??
+  // Note, we cannot call isa<CallInst> before the CallInst has been
+  // constructed.
+  if (SubclassID == Instruction::Call || SubclassID == Instruction::Invoke)
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
+           "invalid CallInst type!");
+  else if (SubclassID != BasicBlockVal &&
+           (SubclassID < ConstantFirstVal || SubclassID > ConstantLastVal))
+    assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
+           "Cannot create non-first-class values except for constants!");
+}
+
+Value::~Value() {
+  // Notify all ValueHandles (if present) that this value is going away.
+  if (HasValueHandle)
+    ValueHandleBase::ValueIsDeleted(this);
+
+#ifndef NDEBUG      // Only in -g mode...
+  // Check to make sure that there are no uses of this value that are still
+  // around when the value is destroyed.  If there are, then we have a dangling
+  // reference and something is wrong.  This code is here to print out what is
+  // still being referenced.  The value in question should be printed as
+  // a <badref>
+  //
+  if (!use_empty()) {
+    dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
+    for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+      dbgs() << "Use still stuck around after Def is destroyed:"
+           << **I << "\n";
+  }
+#endif
+  assert(use_empty() && "Uses remain when a value is destroyed!");
+
+  // If this value is named, destroy the name.  This should not be in a symtab
+  // at this point.
+  if (Name && SubclassID != MDStringVal)
+    Name->Destroy();
+
+  // There should be no uses of this object anymore, remove it.
+  LeakDetector::removeGarbageObject(this);
+}
+
+/// hasNUses - Return true if this Value has exactly N users.
+///
+bool Value::hasNUses(unsigned N) const {
+  const_use_iterator UI = use_begin(), E = use_end();
+
+  for (; N; --N, ++UI)
+    if (UI == E) return false;  // Too few.
+  return UI == E;
+}
+
+/// hasNUsesOrMore - Return true if this value has N users or more.  This is
+/// logically equivalent to getNumUses() >= N.
+///
+bool Value::hasNUsesOrMore(unsigned N) const {
+  const_use_iterator UI = use_begin(), E = use_end();
+
+  for (; N; --N, ++UI)
+    if (UI == E) return false;  // Too few.
+
+  return true;
+}
+
+/// isUsedInBasicBlock - Return true if this value is used in the specified
+/// basic block.
+bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+  // Start by scanning over the instructions looking for a use before we start
+  // the expensive use iteration.
+  unsigned MaxBlockSize = 3;
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+      return true;
+    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+      break;
+  }
+
+  if (MaxBlockSize != 0) // We scanned the entire block and found no use.
+    return false;
+
+  for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    const Instruction *User = dyn_cast<Instruction>(*I);
+    if (User && User->getParent() == BB)
+      return true;
+  }
+  return false;
+}
+
+
+/// getNumUses - This method computes the number of uses of this Value.  This
+/// is a linear time operation.  Use hasOneUse or hasNUses to check for specific
+/// values.
+unsigned Value::getNumUses() const {
+  return (unsigned)std::distance(use_begin(), use_end());
+}
+
+static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
+  ST = 0;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    if (BasicBlock *P = I->getParent())
+      if (Function *PP = P->getParent())
+        ST = &PP->getValueSymbolTable();
+  } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+    if (Function *P = BB->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (Module *P = GV->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (Argument *A = dyn_cast<Argument>(V)) {
+    if (Function *P = A->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (isa<MDString>(V))
+    return true;
+  else {
+    assert(isa<Constant>(V) && "Unknown value type!");
+    return true;  // no name is setable for this.
+  }
+  return false;
+}
+
+StringRef Value::getName() const {
+  // Make sure the empty string is still a C string. For historical reasons,
+  // some clients want to call .data() on the result and expect it to be null
+  // terminated.
+  if (!Name) return StringRef("", 0);
+  return Name->getKey();
+}
+
+void Value::setName(const Twine &NewName) {
+  assert(SubclassID != MDStringVal &&
+         "Cannot set the name of MDString with this method!");
+
+  // Fast path for common IRBuilder case of setName("") when there is no name.
+  if (NewName.isTriviallyEmpty() && !hasName())
+    return;
+
+  SmallString<256> NameData;
+  StringRef NameRef = NewName.toStringRef(NameData);
+
+  // Name isn't changing?
+  if (getName() == NameRef)
+    return;
+
+  assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
+
+  // Get the symbol table to update for this object.
+  ValueSymbolTable *ST;
+  if (getSymTab(this, ST))
+    return;  // Cannot set a name on this value (e.g. constant).
+
+  if (Function *F = dyn_cast<Function>(this))
+    getContext().pImpl->IntrinsicIDCache.erase(F);
+
+  if (!ST) { // No symbol table to update?  Just do the change.
+    if (NameRef.empty()) {
+      // Free the name for this value.
+      Name->Destroy();
+      Name = 0;
+      return;
+    }
+
+    if (Name)
+      Name->Destroy();
+
+    // NOTE: Could optimize for the case the name is shrinking to not deallocate
+    // then reallocated.
+
+    // Create the new name.
+    Name = ValueName::Create(NameRef.begin(), NameRef.end());
+    Name->setValue(this);
+    return;
+  }
+
+  // NOTE: Could optimize for the case the name is shrinking to not deallocate
+  // then reallocated.
+  if (hasName()) {
+    // Remove old name.
+    ST->removeValueName(Name);
+    Name->Destroy();
+    Name = 0;
+
+    if (NameRef.empty())
+      return;
+  }
+
+  // Name is changing to something new.
+  Name = ST->createValueName(NameRef, this);
+}
+
+
+/// takeName - transfer the name from V to this value, setting V's name to
+/// empty.  It is an error to call V->takeName(V).
+void Value::takeName(Value *V) {
+  assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
+
+  ValueSymbolTable *ST = 0;
+  // If this value has a name, drop it.
+  if (hasName()) {
+    // Get the symtab this is in.
+    if (getSymTab(this, ST)) {
+      // We can't set a name on this value, but we need to clear V's name if
+      // it has one.
+      if (V->hasName()) V->setName("");
+      return;  // Cannot set a name on this value (e.g. constant).
+    }
+
+    // Remove old name.
+    if (ST)
+      ST->removeValueName(Name);
+    Name->Destroy();
+    Name = 0;
+  }
+
+  // Now we know that this has no name.
+
+  // If V has no name either, we're done.
+  if (!V->hasName()) return;
+
+  // Get this's symtab if we didn't before.
+  if (!ST) {
+    if (getSymTab(this, ST)) {
+      // Clear V's name.
+      V->setName("");
+      return;  // Cannot set a name on this value (e.g. constant).
+    }
+  }
+
+  // Get V's ST, this should always succed, because V has a name.
+  ValueSymbolTable *VST;
+  bool Failure = getSymTab(V, VST);
+  assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
+
+  // If these values are both in the same symtab, we can do this very fast.
+  // This works even if both values have no symtab yet.
+  if (ST == VST) {
+    // Take the name!
+    Name = V->Name;
+    V->Name = 0;
+    Name->setValue(this);
+    return;
+  }
+
+  // Otherwise, things are slightly more complex.  Remove V's name from VST and
+  // then reinsert it into ST.
+
+  if (VST)
+    VST->removeValueName(V->Name);
+  Name = V->Name;
+  V->Name = 0;
+  Name->setValue(this);
+
+  if (ST)
+    ST->reinsertValue(this);
+}
+
+
+void Value::replaceAllUsesWith(Value *New) {
+  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+  assert(New->getType() == getType() &&
+         "replaceAllUses of value with new value of different type!");
+
+  // Notify all ValueHandles (if present) that this value is going away.
+  if (HasValueHandle)
+    ValueHandleBase::ValueIsRAUWd(this, New);
+
+  while (!use_empty()) {
+    Use &U = *UseList;
+    // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+    // constant because they are uniqued.
+    if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+      if (!isa<GlobalValue>(C)) {
+        C->replaceUsesOfWithOnConstant(this, New, &U);
+        continue;
+      }
+    }
+
+    U.set(New);
+  }
+
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+    BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
+}
+
+namespace {
+// Various metrics for how much to strip off of pointers.
+enum PointerStripKind {
+  PSK_ZeroIndices,
+  PSK_InBoundsConstantIndices,
+  PSK_InBounds
+};
+
+template <PointerStripKind StripKind>
+static Value *stripPointerCastsAndOffsets(Value *V) {
+  if (!V->getType()->isPointerTy())
+    return V;
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      switch (StripKind) {
+      case PSK_ZeroIndices:
+        if (!GEP->hasAllZeroIndices())
+          return V;
+        break;
+      case PSK_InBoundsConstantIndices:
+        if (!GEP->hasAllConstantIndices())
+          return V;
+        // fallthrough
+      case PSK_InBounds:
+        if (!GEP->isInBounds())
+          return V;
+        break;
+      }
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
+
+  return V;
+}
+} // namespace
+
+Value *Value::stripPointerCasts() {
+  return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+}
+
+Value *Value::stripInBoundsConstantOffsets() {
+  return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
+}
+
+Value *Value::stripInBoundsOffsets() {
+  return stripPointerCastsAndOffsets<PSK_InBounds>(this);
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+static bool isDereferenceablePointer(const Value *V,
+                                     SmallPtrSet<const Value *, 32> &Visited) {
+  // Note that it is not safe to speculate into a malloc'd region because
+  // malloc may return null.
+  // It's also not always safe to follow a bitcast, for example:
+  //   bitcast i8* (alloca i8) to i32*
+  // would result in a 4-byte load from a 1-byte alloca. Some cases could
+  // be handled using DataLayout to check sizes and alignments though.
+
+  // These are obviously ok.
+  if (isa<AllocaInst>(V)) return true;
+
+  // Global variables which can't collapse to null are ok.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return !GV->hasExternalWeakLinkage();
+
+  // byval arguments are ok.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+
+  // For GEPs, determine if the indexing lands within the allocated object.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+    // Conservatively require that the base pointer be fully dereferenceable.
+    if (!Visited.insert(GEP->getOperand(0)))
+      return false;
+    if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
+      return false;
+    // Check the indices.
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::const_op_iterator I = GEP->op_begin()+1,
+         E = GEP->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      Type *Ty = *GTI++;
+      // Struct indices can't be out of bounds.
+      if (isa<StructType>(Ty))
+        continue;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+      if (!CI)
+        return false;
+      // Zero is always ok.
+      if (CI->isZero())
+        continue;
+      // Check to see that it's within the bounds of an array.
+      ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+      if (!ATy)
+        return false;
+      if (CI->getValue().getActiveBits() > 64)
+        return false;
+      if (CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+    }
+    // Indices check out; this is dereferenceable.
+    return true;
+  }
+
+  // If we don't know, assume the worst.
+  return false;
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+  SmallPtrSet<const Value *, 32> Visited;
+  return ::isDereferenceablePointer(this, Visited);
+}
+
+/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+/// return the value in the PHI node corresponding to PredBB.  If not, return
+/// ourself.  This is useful if you want to know the value something has in a
+/// predecessor block.
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+                               const BasicBlock *PredBB) {
+  PHINode *PN = dyn_cast<PHINode>(this);
+  if (PN && PN->getParent() == CurBB)
+    return PN->getIncomingValueForBlock(PredBB);
+  return this;
+}
+
+LLVMContext &Value::getContext() const { return VTy->getContext(); }
+
+//===----------------------------------------------------------------------===//
+//                             ValueHandleBase Class
+//===----------------------------------------------------------------------===//
+
+/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+/// List is known to point into the existing use list.
+void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
+  assert(List && "Handle list is null?");
+
+  // Splice ourselves into the list.
+  Next = *List;
+  *List = this;
+  setPrevPtr(List);
+  if (Next) {
+    Next->setPrevPtr(&Next);
+    assert(VP.getPointer() == Next->VP.getPointer() && "Added to wrong list?");
+  }
+}
+
+void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
+  assert(List && "Must insert after existing node");
+
+  Next = List->Next;
+  setPrevPtr(&List->Next);
+  List->Next = this;
+  if (Next)
+    Next->setPrevPtr(&Next);
+}
+
+/// AddToUseList - Add this ValueHandle to the use list for VP.
+void ValueHandleBase::AddToUseList() {
+  assert(VP.getPointer() && "Null pointer doesn't have a use list!");
+
+  LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
+
+  if (VP.getPointer()->HasValueHandle) {
+    // If this value already has a ValueHandle, then it must be in the
+    // ValueHandles map already.
+    ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()];
+    assert(Entry != 0 && "Value doesn't have any handles?");
+    AddToExistingUseList(&Entry);
+    return;
+  }
+
+  // Ok, it doesn't have any handles yet, so we must insert it into the
+  // DenseMap.  However, doing this insertion could cause the DenseMap to
+  // reallocate itself, which would invalidate all of the PrevP pointers that
+  // point into the old table.  Handle this by checking for reallocation and
+  // updating the stale pointers only if needed.
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+  const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
+
+  ValueHandleBase *&Entry = Handles[VP.getPointer()];
+  assert(Entry == 0 && "Value really did already have handles?");
+  AddToExistingUseList(&Entry);
+  VP.getPointer()->HasValueHandle = true;
+
+  // If reallocation didn't happen or if this was the first insertion, don't
+  // walk the table.
+  if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
+      Handles.size() == 1) {
+    return;
+  }
+
+  // Okay, reallocation did happen.  Fix the Prev Pointers.
+  for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
+       E = Handles.end(); I != E; ++I) {
+    assert(I->second && I->first == I->second->VP.getPointer() &&
+           "List invariant broken!");
+    I->second->setPrevPtr(&I->second);
+  }
+}
+
+/// RemoveFromUseList - Remove this ValueHandle from its current use list.
+void ValueHandleBase::RemoveFromUseList() {
+  assert(VP.getPointer() && VP.getPointer()->HasValueHandle &&
+         "Pointer doesn't have a use list!");
+
+  // Unlink this from its use list.
+  ValueHandleBase **PrevPtr = getPrevPtr();
+  assert(*PrevPtr == this && "List invariant broken");
+
+  *PrevPtr = Next;
+  if (Next) {
+    assert(Next->getPrevPtr() == &Next && "List invariant broken");
+    Next->setPrevPtr(PrevPtr);
+    return;
+  }
+
+  // If the Next pointer was null, then it is possible that this was the last
+  // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
+  // map.
+  LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+  if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
+    Handles.erase(VP.getPointer());
+    VP.getPointer()->HasValueHandle = false;
+  }
+}
+
+
+void ValueHandleBase::ValueIsDeleted(Value *V) {
+  assert(V->HasValueHandle && "Should only be called if ValueHandles present");
+
+  // Get the linked list base, which is guaranteed to exist since the
+  // HasValueHandle flag is set.
+  LLVMContextImpl *pImpl = V->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[V];
+  assert(Entry && "Value bit set but no entries exist");
+
+  // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+  // and remove themselves from the list without breaking our iteration.  This
+  // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+  // Note that we deliberately do not the support the case when dropping a value
+  // handle results in a new value handle being permanently added to the list
+  // (as might occur in theory for CallbackVH's): the new value handle will not
+  // be processed and the checking code will mete out righteous punishment if
+  // the handle is still present once we have finished processing all the other
+  // value handles (it is fine to momentarily add then remove a value handle).
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
+    case Assert:
+      break;
+    case Tracking:
+      // Mark that this value has been deleted by setting it to an invalid Value
+      // pointer.
+      Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
+      break;
+    case Weak:
+      // Weak just goes to null, which will unlink it from the list.
+      Entry->operator=(0);
+      break;
+    case Callback:
+      // Forward to the subclass's implementation.
+      static_cast<CallbackVH*>(Entry)->deleted();
+      break;
+    }
+  }
+
+  // All callbacks, weak references, and assertingVHs should be dropped by now.
+  if (V->HasValueHandle) {
+#ifndef NDEBUG      // Only in +Asserts mode...
+    dbgs() << "While deleting: " << *V->getType() << " %" << V->getName()
+           << "\n";
+    if (pImpl->ValueHandles[V]->getKind() == Assert)
+      llvm_unreachable("An asserting value handle still pointed to this"
+                       " value!");
+
+#endif
+    llvm_unreachable("All references to V were not removed?");
+  }
+}
+
+
+void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
+  assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
+  assert(Old != New && "Changing value into itself!");
+
+  // Get the linked list base, which is guaranteed to exist since the
+  // HasValueHandle flag is set.
+  LLVMContextImpl *pImpl = Old->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[Old];
+
+  assert(Entry && "Value bit set but no entries exist");
+
+  // We use a local ValueHandleBase as an iterator so that
+  // ValueHandles can add and remove themselves from the list without
+  // breaking our iteration.  This is not really an AssertingVH; we
+  // just have to give ValueHandleBase some kind.
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
+    case Assert:
+      // Asserting handle does not follow RAUW implicitly.
+      break;
+    case Tracking:
+      // Tracking goes to new value like a WeakVH. Note that this may make it
+      // something incompatible with its templated type. We don't want to have a
+      // virtual (or inline) interface to handle this though, so instead we make
+      // the TrackingVH accessors guarantee that a client never sees this value.
+
+      // FALLTHROUGH
+    case Weak:
+      // Weak goes to the new value, which will unlink it from Old's list.
+      Entry->operator=(New);
+      break;
+    case Callback:
+      // Forward to the subclass's implementation.
+      static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
+      break;
+    }
+  }
+
+#ifndef NDEBUG
+  // If any new tracking or weak value handles were added while processing the
+  // list, then complain about it now.
+  if (Old->HasValueHandle)
+    for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+      switch (Entry->getKind()) {
+      case Tracking:
+      case Weak:
+        dbgs() << "After RAUW from " << *Old->getType() << " %"
+               << Old->getName() << " to " << *New->getType() << " %"
+               << New->getName() << "\n";
+        llvm_unreachable("A tracking or weak value handle still pointed to the"
+                         " old value!\n");
+      default:
+        break;
+      }
+#endif
+}
+
+// Default implementation for CallbackVH.
+void CallbackVH::allUsesReplacedWith(Value *) {}
+
+void CallbackVH::deleted() {
+  setValPtr(NULL);
+}
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
new file mode 100644
index 000000000000..fffacb377770
--- /dev/null
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -0,0 +1,117 @@
+//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueSymbolTable class for the IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "valuesymtab"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Class destructor
+ValueSymbolTable::~ValueSymbolTable() {
+#ifndef NDEBUG   // Only do this in -g mode...
+  for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
+    dbgs() << "Value still in symbol table! Type = '"
+           << *VI->getValue()->getType() << "' Name = '"
+           << VI->getKeyData() << "'\n";
+  assert(vmap.empty() && "Values remain in symbol table!");
+#endif
+}
+
+// Insert a value into the symbol table with the specified name...
+//
+void ValueSymbolTable::reinsertValue(Value* V) {
+  assert(V->hasName() && "Can't insert nameless Value into symbol table");
+
+  // Try inserting the name, assuming it won't conflict.
+  if (vmap.insert(V->Name)) {
+    //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
+    return;
+  }
+  
+  // Otherwise, there is a naming conflict.  Rename this value.
+  SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
+
+  // The name is too already used, just free it so we can allocate a new name.
+  V->Name->Destroy();
+  
+  unsigned BaseSize = UniqueName.size();
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(BaseSize);
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+
+    // Try insert the vmap entry with this suffix.
+    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+    if (NewName.getValue() == 0) {
+      // Newly inserted name.  Success!
+      NewName.setValue(V);
+      V->Name = &NewName;
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+      return;
+    }
+  }
+}
+
+void ValueSymbolTable::removeValueName(ValueName *V) {
+  //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
+  // Remove the value from the symbol table.
+  vmap.remove(V);
+}
+
+/// createValueName - This method attempts to create a value name and insert
+/// it into the symbol table with the specified name.  If it conflicts, it
+/// auto-renames the name and returns that instead.
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
+  // In the common case, the name is not already in the symbol table.
+  ValueName &Entry = vmap.GetOrCreateValue(Name);
+  if (Entry.getValue() == 0) {
+    Entry.setValue(V);
+    //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
+    //           << *V << "\n");
+    return &Entry;
+  }
+  
+  // Otherwise, there is a naming conflict.  Rename this value.
+  SmallString<256> UniqueName(Name.begin(), Name.end());
+  
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(Name.size());
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+    
+    // Try insert the vmap entry with this suffix.
+    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+    if (NewName.getValue() == 0) {
+      // Newly inserted name.  Success!
+      NewName.setValue(V);
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+      return &NewName;
+    }
+  }
+}
+
+
+// dump - print out the symbol table
+//
+void ValueSymbolTable::dump() const {
+  //DEBUG(dbgs() << "ValueSymbolTable:\n");
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    //DEBUG(dbgs() << "  '" << I->getKeyData() << "' = ");
+    I->getValue()->dump();
+    //DEBUG(dbgs() << "\n");
+  }
+}
diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp
new file mode 100644
index 000000000000..ba04d60c24a1
--- /dev/null
+++ b/lib/IR/ValueTypes.cpp
@@ -0,0 +1,277 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods in the CodeGen/ValueTypes.h header.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+EVT EVT::changeExtendedVectorElementTypeToInteger() const {
+  LLVMContext &Context = LLVMTy->getContext();
+  EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
+  return getVectorVT(Context, IntTy, getVectorNumElements());
+}
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+  EVT VT;
+  VT.LLVMTy = IntegerType::get(Context, BitWidth);
+  assert(VT.isExtended() && "Type is not extended!");
+  return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+                             unsigned NumElements) {
+  EVT ResultVT;
+  ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+  assert(ResultVT.isExtended() && "Type is not extended!");
+  return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedVector() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended16BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 16;
+}
+
+bool EVT::isExtended32BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 32;
+}
+
+bool EVT::isExtended64BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 512;
+}
+
+bool EVT::isExtended1024BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 1024;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+  assert(isExtended() && "Type is not extended!");
+  return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+  assert(isExtended() && "Type is not extended!");
+  return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+  assert(isExtended() && "Type is not extended!");
+  if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+    return ITy->getBitWidth();
+  if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+    return VTy->getBitWidth();
+  llvm_unreachable("Unrecognized extended type!");
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+  switch (V.SimpleTy) {
+  default:
+    if (isVector())
+      return "v" + utostr(getVectorNumElements()) +
+             getVectorElementType().getEVTString();
+    if (isInteger())
+      return "i" + utostr(getSizeInBits());
+    llvm_unreachable("Invalid EVT!");
+  case MVT::i1:      return "i1";
+  case MVT::i8:      return "i8";
+  case MVT::i16:     return "i16";
+  case MVT::i32:     return "i32";
+  case MVT::i64:     return "i64";
+  case MVT::i128:    return "i128";
+  case MVT::f16:     return "f16";
+  case MVT::f32:     return "f32";
+  case MVT::f64:     return "f64";
+  case MVT::f80:     return "f80";
+  case MVT::f128:    return "f128";
+  case MVT::ppcf128: return "ppcf128";
+  case MVT::isVoid:  return "isVoid";
+  case MVT::Other:   return "ch";
+  case MVT::Glue:    return "glue";
+  case MVT::x86mmx:  return "x86mmx";
+  case MVT::v2i1:    return "v2i1";
+  case MVT::v4i1:    return "v4i1";
+  case MVT::v8i1:    return "v8i1";
+  case MVT::v16i1:   return "v16i1";
+  case MVT::v32i1:   return "v32i1";
+  case MVT::v64i1:   return "v64i1";
+  case MVT::v2i8:    return "v2i8";
+  case MVT::v4i8:    return "v4i8";
+  case MVT::v8i8:    return "v8i8";
+  case MVT::v16i8:   return "v16i8";
+  case MVT::v32i8:   return "v32i8";
+  case MVT::v64i8:   return "v64i8";
+  case MVT::v1i16:   return "v1i16";
+  case MVT::v2i16:   return "v2i16";
+  case MVT::v4i16:   return "v4i16";
+  case MVT::v8i16:   return "v8i16";
+  case MVT::v16i16:  return "v16i16";
+  case MVT::v32i16:  return "v32i16";
+  case MVT::v1i32:   return "v1i32";
+  case MVT::v2i32:   return "v2i32";
+  case MVT::v4i32:   return "v4i32";
+  case MVT::v8i32:   return "v8i32";
+  case MVT::v16i32:  return "v16i32";
+  case MVT::v1i64:   return "v1i64";
+  case MVT::v2i64:   return "v2i64";
+  case MVT::v4i64:   return "v4i64";
+  case MVT::v8i64:   return "v8i64";
+  case MVT::v16i64:  return "v16i64";
+  case MVT::v2f32:   return "v2f32";
+  case MVT::v2f16:   return "v2f16";
+  case MVT::v4f32:   return "v4f32";
+  case MVT::v8f32:   return "v8f32";
+  case MVT::v16f32:  return "v16f32";
+  case MVT::v2f64:   return "v2f64";
+  case MVT::v4f64:   return "v4f64";
+  case MVT::v8f64:   return "v8f64";
+  case MVT::Metadata:return "Metadata";
+  case MVT::Untyped: return "Untyped";
+  }
+}
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT.  For integer types, this returns an unsigned type.  Note
+/// that this will abort for types that cannot be represented.
+Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+  switch (V.SimpleTy) {
+  default:
+    assert(isExtended() && "Type is not extended!");
+    return LLVMTy;
+  case MVT::isVoid:  return Type::getVoidTy(Context);
+  case MVT::i1:      return Type::getInt1Ty(Context);
+  case MVT::i8:      return Type::getInt8Ty(Context);
+  case MVT::i16:     return Type::getInt16Ty(Context);
+  case MVT::i32:     return Type::getInt32Ty(Context);
+  case MVT::i64:     return Type::getInt64Ty(Context);
+  case MVT::i128:    return IntegerType::get(Context, 128);
+  case MVT::f16:     return Type::getHalfTy(Context);
+  case MVT::f32:     return Type::getFloatTy(Context);
+  case MVT::f64:     return Type::getDoubleTy(Context);
+  case MVT::f80:     return Type::getX86_FP80Ty(Context);
+  case MVT::f128:    return Type::getFP128Ty(Context);
+  case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+  case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
+  case MVT::v2i1:    return VectorType::get(Type::getInt1Ty(Context), 2);
+  case MVT::v4i1:    return VectorType::get(Type::getInt1Ty(Context), 4);
+  case MVT::v8i1:    return VectorType::get(Type::getInt1Ty(Context), 8);
+  case MVT::v16i1:   return VectorType::get(Type::getInt1Ty(Context), 16);
+  case MVT::v32i1:   return VectorType::get(Type::getInt1Ty(Context), 32);
+  case MVT::v64i1:   return VectorType::get(Type::getInt1Ty(Context), 64);
+  case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
+  case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
+  case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
+  case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
+  case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
+  case MVT::v64i8:   return VectorType::get(Type::getInt8Ty(Context), 64);
+  case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1);
+  case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
+  case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
+  case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
+  case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
+  case MVT::v32i16:  return VectorType::get(Type::getInt16Ty(Context), 32);
+  case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
+  case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
+  case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
+  case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
+  case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16);
+  case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
+  case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
+  case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
+  case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
+  case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16);
+  case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2);
+  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
+  case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
+  case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
+  case MVT::v16f32:   return VectorType::get(Type::getFloatTy(Context), 16);
+  case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
+  case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4); 
+  case MVT::v8f64:   return VectorType::get(Type::getDoubleTy(Context), 8); 
+  case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+}
+
+/// Return the value type corresponding to the specified type.  This returns all
+/// pointers as MVT::iPTR.  If HandleUnknown is true, unknown types are returned
+/// as Other, otherwise they are invalid.
+MVT MVT::getVT(Type *Ty, bool HandleUnknown){
+  switch (Ty->getTypeID()) {
+  default:
+    if (HandleUnknown) return MVT(MVT::Other);
+    llvm_unreachable("Unknown type!");
+  case Type::VoidTyID:
+    return MVT::isVoid;
+  case Type::IntegerTyID:
+    return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
+  case Type::HalfTyID:      return MVT(MVT::f16);
+  case Type::FloatTyID:     return MVT(MVT::f32);
+  case Type::DoubleTyID:    return MVT(MVT::f64);
+  case Type::X86_FP80TyID:  return MVT(MVT::f80);
+  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
+  case Type::FP128TyID:     return MVT(MVT::f128);
+  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+  case Type::PointerTyID:   return MVT(MVT::iPTR);
+  case Type::VectorTyID: {
+    VectorType *VTy = cast<VectorType>(Ty);
+    return getVectorVT(
+      getVT(VTy->getElementType(), false), VTy->getNumElements());
+  }
+  }
+}
+
+/// getEVT - Return the value type corresponding to the specified type.  This
+/// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
+  switch (Ty->getTypeID()) {
+  default:
+    return MVT::getVT(Ty, HandleUnknown);
+  case Type::IntegerTyID:
+    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+  case Type::VectorTyID: {
+    VectorType *VTy = cast<VectorType>(Ty);
+    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
+                       VTy->getNumElements());
+  }
+  }
+}
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
new file mode 100644
index 000000000000..8bfbb322cf4c
--- /dev/null
+++ b/lib/IR/Verifier.cpp
@@ -0,0 +1,2144 @@
+//===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system.
+//
+// Note that this does not provide full `Java style' security and verifications,
+// instead it just tries to ensure that code is well-formed.
+//
+//  * Both of a binary operator's parameters are of the same type
+//  * Verify that the indices of mem access instructions match other operands
+//  * Verify that arithmetic and other things are only performed on first-class
+//    types.  Verify that shifts & logicals only happen on integrals f.e.
+//  * All of the constants in a switch statement are of the correct type
+//  * The code is in valid SSA form
+//  * It should be illegal to put a label into any other type (like a structure)
+//    or to return one. [except constant arrays!]
+//  * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
+//  * PHI nodes must have an entry for each predecessor, with no extras.
+//  * PHI nodes must be the first thing in a basic block, all grouped together
+//  * PHI nodes must have at least one entry
+//  * All basic blocks should only end with terminator insts, not contain them
+//  * The entry node to a function must not have predecessors
+//  * All Instructions must be embedded into a basic block
+//  * Functions cannot take a void-typed parameter
+//  * Verify that a function's argument list agrees with it's declared type.
+//  * It is illegal to specify a name for a void value.
+//  * It is illegal to have a internal global value with no initializer
+//  * It is illegal to have a ret instruction that returns a value that does not
+//    agree with the function return value type.
+//  * Function call argument types match the function prototype
+//  * A landing pad is defined by a landingpad instruction, and can be jumped to
+//    only by the unwind edge of an invoke instruction.
+//  * A landingpad instruction must be the first non-PHI instruction in the
+//    block.
+//  * All landingpad instructions must use the same personality function with
+//    the same function.
+//  * All other things that are tested by asserts spread about the code...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace {  // Anonymous namespace for class
+  struct PreVerifier : public FunctionPass {
+    static char ID; // Pass ID, replacement for typeid
+
+    PreVerifier() : FunctionPass(ID) {
+      initializePreVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    // Check that the prerequisites for successful DominatorTree construction
+    // are satisfied.
+    bool runOnFunction(Function &F) {
+      bool Broken = false;
+
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+        if (I->empty() || !I->back().isTerminator()) {
+          dbgs() << "Basic Block in function '" << F.getName() 
+                 << "' does not have terminator!\n";
+          WriteAsOperand(dbgs(), I, true);
+          dbgs() << "\n";
+          Broken = true;
+        }
+      }
+
+      if (Broken)
+        report_fatal_error("Broken module, no Basic Block terminator!");
+
+      return false;
+    }
+  };
+}
+
+char PreVerifier::ID = 0;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", 
+                false, false)
+static char &PreVerifyID = PreVerifier::ID;
+
+namespace {
+  struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
+    static char ID; // Pass ID, replacement for typeid
+    bool Broken;          // Is this module found to be broken?
+    VerifierFailureAction action;
+                          // What to do if verification fails.
+    Module *Mod;          // Module we are verifying right now
+    LLVMContext *Context; // Context within which we are verifying
+    DominatorTree *DT;    // Dominator Tree, caution can be null!
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    /// InstInThisBlock - when verifying a basic block, keep track of all of the
+    /// instructions we have seen so far.  This allows us to do efficient
+    /// dominance checks for the case when an instruction has an operand that is
+    /// an instruction in the same block.
+    SmallPtrSet<Instruction*, 16> InstsInThisBlock;
+
+    /// MDNodes - keep track of the metadata nodes that have been checked
+    /// already.
+    SmallPtrSet<MDNode *, 32> MDNodes;
+
+    /// PersonalityFn - The personality function referenced by the
+    /// LandingPadInsts. All LandingPadInsts within the same function must use
+    /// the same personality function.
+    const Value *PersonalityFn;
+
+    Verifier()
+      : FunctionPass(ID), Broken(false),
+        action(AbortProcessAction), Mod(0), Context(0), DT(0),
+        MessagesStr(Messages), PersonalityFn(0) {
+      initializeVerifierPass(*PassRegistry::getPassRegistry());
+    }
+    explicit Verifier(VerifierFailureAction ctn)
+      : FunctionPass(ID), Broken(false), action(ctn), Mod(0),
+        Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) {
+      initializeVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool doInitialization(Module &M) {
+      Mod = &M;
+      Context = &M.getContext();
+
+      // We must abort before returning back to the pass manager, or else the
+      // pass manager may try to run other passes on the broken module.
+      return abortIfBroken();
+    }
+
+    bool runOnFunction(Function &F) {
+      // Get dominator information if we are being run by PassManager
+      DT = &getAnalysis<DominatorTree>();
+
+      Mod = F.getParent();
+      if (!Context) Context = &F.getContext();
+
+      visit(F);
+      InstsInThisBlock.clear();
+      PersonalityFn = 0;
+
+      // We must abort before returning back to the pass manager, or else the
+      // pass manager may try to run other passes on the broken module.
+      return abortIfBroken();
+    }
+
+    bool doFinalization(Module &M) {
+      // Scan through, checking all of the external function's linkage now...
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        visitGlobalValue(*I);
+
+        // Check to make sure function prototypes are okay.
+        if (I->isDeclaration()) visitFunction(*I);
+      }
+
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
+           I != E; ++I)
+        visitGlobalVariable(*I);
+
+      for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 
+           I != E; ++I)
+        visitGlobalAlias(*I);
+
+      for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I)
+        visitNamedMDNode(*I);
+
+      visitModuleFlags(M);
+
+      // If the module is broken, abort at this time.
+      return abortIfBroken();
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequiredID(PreVerifyID);
+      AU.addRequired<DominatorTree>();
+    }
+
+    /// abortIfBroken - If the module is broken and we are supposed to abort on
+    /// this condition, do so.
+    ///
+    bool abortIfBroken() {
+      if (!Broken) return false;
+      MessagesStr << "Broken module found, ";
+      switch (action) {
+      case AbortProcessAction:
+        MessagesStr << "compilation aborted!\n";
+        dbgs() << MessagesStr.str();
+        // Client should choose different reaction if abort is not desired
+        abort();
+      case PrintMessageAction:
+        MessagesStr << "verification continues.\n";
+        dbgs() << MessagesStr.str();
+        return false;
+      case ReturnStatusAction:
+        MessagesStr << "compilation terminated.\n";
+        return true;
+      }
+      llvm_unreachable("Invalid action");
+    }
+
+
+    // Verification methods...
+    void visitGlobalValue(GlobalValue &GV);
+    void visitGlobalVariable(GlobalVariable &GV);
+    void visitGlobalAlias(GlobalAlias &GA);
+    void visitNamedMDNode(NamedMDNode &NMD);
+    void visitMDNode(MDNode &MD, Function *F);
+    void visitModuleFlags(Module &M);
+    void visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*> &SeenIDs,
+                         SmallVectorImpl<MDNode*> &Requirements);
+    void visitFunction(Function &F);
+    void visitBasicBlock(BasicBlock &BB);
+    using InstVisitor<Verifier>::visit;
+
+    void visit(Instruction &I);
+
+    void visitTruncInst(TruncInst &I);
+    void visitZExtInst(ZExtInst &I);
+    void visitSExtInst(SExtInst &I);
+    void visitFPTruncInst(FPTruncInst &I);
+    void visitFPExtInst(FPExtInst &I);
+    void visitFPToUIInst(FPToUIInst &I);
+    void visitFPToSIInst(FPToSIInst &I);
+    void visitUIToFPInst(UIToFPInst &I);
+    void visitSIToFPInst(SIToFPInst &I);
+    void visitIntToPtrInst(IntToPtrInst &I);
+    void visitPtrToIntInst(PtrToIntInst &I);
+    void visitBitCastInst(BitCastInst &I);
+    void visitPHINode(PHINode &PN);
+    void visitBinaryOperator(BinaryOperator &B);
+    void visitICmpInst(ICmpInst &IC);
+    void visitFCmpInst(FCmpInst &FC);
+    void visitExtractElementInst(ExtractElementInst &EI);
+    void visitInsertElementInst(InsertElementInst &EI);
+    void visitShuffleVectorInst(ShuffleVectorInst &EI);
+    void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
+    void visitCallInst(CallInst &CI);
+    void visitInvokeInst(InvokeInst &II);
+    void visitGetElementPtrInst(GetElementPtrInst &GEP);
+    void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void verifyDominatesUse(Instruction &I, unsigned i);
+    void visitInstruction(Instruction &I);
+    void visitTerminatorInst(TerminatorInst &I);
+    void visitBranchInst(BranchInst &BI);
+    void visitReturnInst(ReturnInst &RI);
+    void visitSwitchInst(SwitchInst &SI);
+    void visitIndirectBrInst(IndirectBrInst &BI);
+    void visitSelectInst(SelectInst &SI);
+    void visitUserOp1(Instruction &I);
+    void visitUserOp2(Instruction &I) { visitUserOp1(I); }
+    void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
+    void visitAtomicRMWInst(AtomicRMWInst &RMWI);
+    void visitFenceInst(FenceInst &FI);
+    void visitAllocaInst(AllocaInst &AI);
+    void visitExtractValueInst(ExtractValueInst &EVI);
+    void visitInsertValueInst(InsertValueInst &IVI);
+    void visitLandingPadInst(LandingPadInst &LPI);
+
+    void VerifyCallSite(CallSite CS);
+    bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty,
+                          int VT, unsigned ArgNo, std::string &Suffix);
+    bool VerifyIntrinsicType(Type *Ty,
+                             ArrayRef<Intrinsic::IITDescriptor> &Infos,
+                             SmallVectorImpl<Type*> &ArgTys);
+    void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+                              bool isReturnValue, const Value *V);
+    void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
+                             const Value *V);
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    void WriteType(Type *T) {
+      if (!T) return;
+      MessagesStr << ' ' << *T;
+    }
+
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+      Broken = true;
+    }
+
+    void CheckFailed(const Twine &Message, const Value *V1,
+                     Type *T2, const Value *V3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteType(T2);
+      WriteValue(V3);
+      Broken = true;
+    }
+
+    void CheckFailed(const Twine &Message, Type *T1,
+                     Type *T2 = 0, Type *T3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteType(T1);
+      WriteType(T2);
+      WriteType(T3);
+      Broken = true;
+    }
+  };
+} // End anonymous namespace
+
+char Verifier::ID = 0;
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+  do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+  do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+  do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+  do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+  do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+void Verifier::visit(Instruction &I) {
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+    Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+  InstVisitor<Verifier>::visit(I);
+}
+
+
+void Verifier::visitGlobalValue(GlobalValue &GV) {
+  Assert1(!GV.isDeclaration() ||
+          GV.isMaterializable() ||
+          GV.hasExternalLinkage() ||
+          GV.hasDLLImportLinkage() ||
+          GV.hasExternalWeakLinkage() ||
+          (isa<GlobalAlias>(GV) &&
+           (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+  "Global is external, but doesn't have external or dllimport or weak linkage!",
+          &GV);
+
+  Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
+          "Global is marked as dllimport, but not external", &GV);
+
+  Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+          "Only global variables can have appending linkage!", &GV);
+
+  if (GV.hasAppendingLinkage()) {
+    GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
+    Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
+            "Only global arrays can have appending linkage!", GVar);
+  }
+
+  Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(),
+          "linkonce_odr_auto_hide can only have default visibility!",
+          &GV);
+}
+
+void Verifier::visitGlobalVariable(GlobalVariable &GV) {
+  if (GV.hasInitializer()) {
+    Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+            "Global variable initializer type does not match global "
+            "variable type!", &GV);
+
+    // If the global has common linkage, it must have a zero initializer and
+    // cannot be constant.
+    if (GV.hasCommonLinkage()) {
+      Assert1(GV.getInitializer()->isNullValue(),
+              "'common' global must have a zero initializer!", &GV);
+      Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
+              &GV);
+    }
+  } else {
+    Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
+            GV.hasExternalWeakLinkage(),
+            "invalid linkage type for global declaration", &GV);
+  }
+
+  if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
+                       GV.getName() == "llvm.global_dtors")) {
+    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+            "invalid linkage for intrinsic global variable", &GV);
+    // Don't worry about emitting an error for it not being an array,
+    // visitGlobalValue will complain on appending non-array.
+    if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+      StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+      PointerType *FuncPtrTy =
+          FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
+      Assert1(STy && STy->getNumElements() == 2 &&
+              STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+              STy->getTypeAtIndex(1) == FuncPtrTy,
+              "wrong type for intrinsic global variable", &GV);
+    }
+  }
+
+  visitGlobalValue(GV);
+}
+
+void Verifier::visitGlobalAlias(GlobalAlias &GA) {
+  Assert1(!GA.getName().empty(),
+          "Alias name cannot be empty!", &GA);
+  Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
+          GA.hasWeakLinkage(),
+          "Alias should have external or external weak linkage!", &GA);
+  Assert1(GA.getAliasee(),
+          "Aliasee cannot be NULL!", &GA);
+  Assert1(GA.getType() == GA.getAliasee()->getType(),
+          "Alias and aliasee types should match!", &GA);
+  Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
+
+  if (!isa<GlobalValue>(GA.getAliasee())) {
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
+    Assert1(CE && 
+            (CE->getOpcode() == Instruction::BitCast ||
+             CE->getOpcode() == Instruction::GetElementPtr) &&
+            isa<GlobalValue>(CE->getOperand(0)),
+            "Aliasee should be either GlobalValue or bitcast of GlobalValue",
+            &GA);
+  }
+
+  const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
+  Assert1(Aliasee,
+          "Aliasing chain should end with function or global variable", &GA);
+
+  visitGlobalValue(GA);
+}
+
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+  for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+    MDNode *MD = NMD.getOperand(i);
+    if (!MD)
+      continue;
+
+    Assert1(!MD->isFunctionLocal(),
+            "Named metadata operand cannot be function local!", MD);
+    visitMDNode(*MD, 0);
+  }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+  // Only visit each node once.  Metadata can be mutually recursive, so this
+  // avoids infinite recursion here, as well as being an optimization.
+  if (!MDNodes.insert(&MD))
+    return;
+
+  for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+    Value *Op = MD.getOperand(i);
+    if (!Op)
+      continue;
+    if (isa<Constant>(Op) || isa<MDString>(Op))
+      continue;
+    if (MDNode *N = dyn_cast<MDNode>(Op)) {
+      Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+              "Global metadata operand cannot be function local!", &MD, N);
+      visitMDNode(*N, F);
+      continue;
+    }
+    Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+    // If this was an instruction, bb, or argument, verify that it is in the
+    // function that we expect.
+    Function *ActualF = 0;
+    if (Instruction *I = dyn_cast<Instruction>(Op))
+      ActualF = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+      ActualF = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(Op))
+      ActualF = A->getParent();
+    assert(ActualF && "Unimplemented function local metadata case!");
+
+    Assert2(ActualF == F, "function-local metadata used in wrong function",
+            &MD, Op);
+  }
+}
+
+void Verifier::visitModuleFlags(Module &M) {
+  const NamedMDNode *Flags = M.getModuleFlagsMetadata();
+  if (!Flags) return;
+
+  // Scan each flag, and track the flags and requirements.
+  DenseMap<MDString*, MDNode*> SeenIDs;
+  SmallVector<MDNode*, 16> Requirements;
+  for (unsigned I = 0, E = Flags->getNumOperands(); I != E; ++I) {
+    visitModuleFlag(Flags->getOperand(I), SeenIDs, Requirements);
+  }
+
+  // Validate that the requirements in the module are valid.
+  for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
+    MDNode *Requirement = Requirements[I];
+    MDString *Flag = cast<MDString>(Requirement->getOperand(0));
+    Value *ReqValue = Requirement->getOperand(1);
+
+    MDNode *Op = SeenIDs.lookup(Flag);
+    if (!Op) {
+      CheckFailed("invalid requirement on flag, flag is not present in module",
+                  Flag);
+      continue;
+    }
+
+    if (Op->getOperand(2) != ReqValue) {
+      CheckFailed(("invalid requirement on flag, "
+                   "flag does not have the required value"),
+                  Flag);
+      continue;
+    }
+  }
+}
+
+void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs,
+                               SmallVectorImpl<MDNode*> &Requirements) {
+  // Each module flag should have three arguments, the merge behavior (a
+  // constant int), the flag ID (an MDString), and the value.
+  Assert1(Op->getNumOperands() == 3,
+          "incorrect number of operands in module flag", Op);
+  ConstantInt *Behavior = dyn_cast<ConstantInt>(Op->getOperand(0));
+  MDString *ID = dyn_cast<MDString>(Op->getOperand(1));
+  Assert1(Behavior,
+          "invalid behavior operand in module flag (expected constant integer)",
+          Op->getOperand(0));
+  unsigned BehaviorValue = Behavior->getZExtValue();
+  Assert1(ID,
+          "invalid ID operand in module flag (expected metadata string)",
+          Op->getOperand(1));
+
+  // Sanity check the values for behaviors with additional requirements.
+  switch (BehaviorValue) {
+  default:
+    Assert1(false,
+            "invalid behavior operand in module flag (unexpected constant)",
+            Op->getOperand(0));
+    break;
+
+  case Module::Error:
+  case Module::Warning:
+  case Module::Override:
+    // These behavior types accept any value.
+    break;
+
+  case Module::Require: {
+    // The value should itself be an MDNode with two operands, a flag ID (an
+    // MDString), and a value.
+    MDNode *Value = dyn_cast<MDNode>(Op->getOperand(2));
+    Assert1(Value && Value->getNumOperands() == 2,
+            "invalid value for 'require' module flag (expected metadata pair)",
+            Op->getOperand(2));
+    Assert1(isa<MDString>(Value->getOperand(0)),
+            ("invalid value for 'require' module flag "
+             "(first value operand should be a string)"),
+            Value->getOperand(0));
+
+    // Append it to the list of requirements, to check once all module flags are
+    // scanned.
+    Requirements.push_back(Value);
+    break;
+  }
+
+  case Module::Append:
+  case Module::AppendUnique: {
+    // These behavior types require the operand be an MDNode.
+    Assert1(isa<MDNode>(Op->getOperand(2)),
+            "invalid value for 'append'-type module flag "
+            "(expected a metadata node)", Op->getOperand(2));
+    break;
+  }
+  }
+
+  // Unless this is a "requires" flag, check the ID is unique.
+  if (BehaviorValue != Module::Require) {
+    bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second;
+    Assert1(Inserted,
+            "module flag identifiers must be unique (or of 'require' type)",
+            ID);
+  }
+}
+
+// VerifyParameterAttrs - Check the given attributes for an argument or return
+// value of the specified type.  The value V is printed in error messages.
+void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+                                    bool isReturnValue, const Value *V) {
+  if (!Attrs.hasAttributes(Idx))
+    return;
+
+  Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) &&
+          !Attrs.hasAttribute(Idx, Attribute::NoUnwind) &&
+          !Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
+          !Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
+          !Attrs.hasAttribute(Idx, Attribute::NoInline) &&
+          !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) &&
+          !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) &&
+          !Attrs.hasAttribute(Idx, Attribute::StackProtect) &&
+          !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) &&
+          !Attrs.hasAttribute(Idx, Attribute::NoRedZone) &&
+          !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) &&
+          !Attrs.hasAttribute(Idx, Attribute::Naked) &&
+          !Attrs.hasAttribute(Idx, Attribute::InlineHint) &&
+          !Attrs.hasAttribute(Idx, Attribute::StackAlignment) &&
+          !Attrs.hasAttribute(Idx, Attribute::UWTable) &&
+          !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) &&
+          !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) &&
+          !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) &&
+          !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) &&
+          !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) &&
+          !Attrs.hasAttribute(Idx, Attribute::MinSize) &&
+          !Attrs.hasAttribute(Idx, Attribute::NoBuiltin),
+          "Some attributes in '" + Attrs.getAsString(Idx) +
+          "' only apply to functions!", V);
+
+  if (isReturnValue)
+    Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+            !Attrs.hasAttribute(Idx, Attribute::Nest) &&
+            !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+            !Attrs.hasAttribute(Idx, Attribute::NoCapture),
+            "Attribute 'byval', 'nest', 'sret', and 'nocapture' "
+            "do not apply to return values!", V);
+
+  // Check for mutually incompatible attributes.
+  Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+             Attrs.hasAttribute(Idx, Attribute::Nest)) ||
+            (Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+             Attrs.hasAttribute(Idx, Attribute::StructRet)) ||
+            (Attrs.hasAttribute(Idx, Attribute::Nest) &&
+             Attrs.hasAttribute(Idx, Attribute::StructRet))), "Attributes "
+          "'byval, nest, and sret' are incompatible!", V);
+
+  Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+             Attrs.hasAttribute(Idx, Attribute::Nest)) ||
+            (Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+             Attrs.hasAttribute(Idx, Attribute::InReg)) ||
+            (Attrs.hasAttribute(Idx, Attribute::Nest) &&
+             Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes "
+          "'byval, nest, and inreg' are incompatible!", V);
+
+  Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
+            Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
+          "'zeroext and signext' are incompatible!", V);
+
+  Assert1(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
+            Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes "
+          "'readnone and readonly' are incompatible!", V);
+
+  Assert1(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
+            Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), "Attributes "
+          "'noinline and alwaysinline' are incompatible!", V);
+
+  Assert1(!AttrBuilder(Attrs, Idx).
+            hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx),
+          "Wrong types for attribute: " +
+          AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V);
+
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) ||
+            PTy->getElementType()->isSized(),
+            "Attribute 'byval' does not support unsized types!", V);
+  else
+    Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal),
+            "Attribute 'byval' only applies to parameters with pointer type!",
+            V);
+}
+
+// VerifyFunctionAttrs - Check parameter attributes against a function type.
+// The value V is printed in error messages.
+void Verifier::VerifyFunctionAttrs(FunctionType *FT,
+                                   const AttributeSet &Attrs,
+                                   const Value *V) {
+  if (Attrs.isEmpty())
+    return;
+
+  bool SawNest = false;
+
+  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+    unsigned Index = Attrs.getSlotIndex(i);
+
+    Type *Ty;
+    if (Index == 0)
+      Ty = FT->getReturnType();
+    else if (Index-1 < FT->getNumParams())
+      Ty = FT->getParamType(Index-1);
+    else
+      break;  // VarArgs attributes, verified elsewhere.
+
+    VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V);
+
+    if (Attrs.hasAttribute(i, Attribute::Nest)) {
+      Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+      SawNest = true;
+    }
+
+    if (Attrs.hasAttribute(Index, Attribute::StructRet))
+      Assert1(Index == 1, "Attribute sret is not on first parameter!", V);
+  }
+
+  if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
+    return;
+
+  AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex);
+  NotFn.removeFunctionOnlyAttrs();
+  Assert1(NotFn.empty(), "Attributes '" +
+          AttributeSet::get(V->getContext(),
+                            AttributeSet::FunctionIndex,
+                            NotFn).getAsString(AttributeSet::FunctionIndex) +
+          "' do not apply to the function!", V);
+
+  // Check for mutually incompatible attributes.
+  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::ByVal) &&
+             Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::Nest)) ||
+            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::ByVal) &&
+             Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::StructRet)) ||
+            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::Nest) &&
+             Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::StructRet))),
+          "Attributes 'byval, nest, and sret' are incompatible!", V);
+
+  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::ByVal) &&
+             Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::Nest)) ||
+            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::ByVal) &&
+             Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::InReg)) ||
+            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::Nest) &&
+             Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::InReg))),
+          "Attributes 'byval, nest, and inreg' are incompatible!", V);
+
+  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::ZExt) &&
+            Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::SExt)),
+          "Attributes 'zeroext and signext' are incompatible!", V);
+
+  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::ReadNone) &&
+            Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::ReadOnly)),
+          "Attributes 'readnone and readonly' are incompatible!", V);
+
+  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::NoInline) &&
+            Attrs.hasAttribute(AttributeSet::FunctionIndex,
+                               Attribute::AlwaysInline)),
+          "Attributes 'noinline and alwaysinline' are incompatible!", V);
+}
+
+static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
+  if (Attrs.getNumSlots() == 0)
+    return true;
+
+  unsigned LastSlot = Attrs.getNumSlots() - 1;
+  unsigned LastIndex = Attrs.getSlotIndex(LastSlot);
+  if (LastIndex <= Params
+      || (LastIndex == AttributeSet::FunctionIndex
+          && (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params)))
+    return true;
+ 
+  return false;
+}
+
+// visitFunction - Verify that a function is ok.
+//
+void Verifier::visitFunction(Function &F) {
+  // Check function arguments.
+  FunctionType *FT = F.getFunctionType();
+  unsigned NumArgs = F.arg_size();
+
+  Assert1(Context == &F.getContext(),
+          "Function context does not match Module context!", &F);
+
+  Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+  Assert2(FT->getNumParams() == NumArgs,
+          "# formal arguments must match # of arguments for function type!",
+          &F, FT);
+  Assert1(F.getReturnType()->isFirstClassType() ||
+          F.getReturnType()->isVoidTy() || 
+          F.getReturnType()->isStructTy(),
+          "Functions cannot return aggregate values!", &F);
+
+  Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+          "Invalid struct return type!", &F);
+
+  const AttributeSet &Attrs = F.getAttributes();
+
+  Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
+          "Attribute after last parameter!", &F);
+
+  // Check function attributes.
+  VerifyFunctionAttrs(FT, Attrs, &F);
+
+  // Check that this function meets the restrictions on this calling convention.
+  switch (F.getCallingConv()) {
+  default:
+    break;
+  case CallingConv::C:
+    break;
+  case CallingConv::Fast:
+  case CallingConv::Cold:
+  case CallingConv::X86_FastCall:
+  case CallingConv::X86_ThisCall:
+  case CallingConv::Intel_OCL_BI:
+  case CallingConv::PTX_Kernel:
+  case CallingConv::PTX_Device:
+    Assert1(!F.isVarArg(),
+            "Varargs functions must have C calling conventions!", &F);
+    break;
+  }
+
+  bool isLLVMdotName = F.getName().size() >= 5 &&
+                       F.getName().substr(0, 5) == "llvm.";
+
+  // Check that the argument values match the function type for this function...
+  unsigned i = 0;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++i) {
+    Assert2(I->getType() == FT->getParamType(i),
+            "Argument value does not match function argument type!",
+            I, FT->getParamType(i));
+    Assert1(I->getType()->isFirstClassType(),
+            "Function arguments must have first-class types!", I);
+    if (!isLLVMdotName)
+      Assert2(!I->getType()->isMetadataTy(),
+              "Function takes metadata but isn't an intrinsic", I, &F);
+  }
+
+  if (F.isMaterializable()) {
+    // Function has a body somewhere we can't see.
+  } else if (F.isDeclaration()) {
+    Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
+            F.hasExternalWeakLinkage(),
+            "invalid linkage type for function declaration", &F);
+  } else {
+    // Verify that this function (which has a body) is not named "llvm.*".  It
+    // is not legal to define intrinsics.
+    Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+    
+    // Check the entry node
+    BasicBlock *Entry = &F.getEntryBlock();
+    Assert1(pred_begin(Entry) == pred_end(Entry),
+            "Entry block to function must not have predecessors!", Entry);
+    
+    // The address of the entry block cannot be taken, unless it is dead.
+    if (Entry->hasAddressTaken()) {
+      Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
+              "blockaddress may not be used with the entry block!", Entry);
+    }
+  }
+ 
+  // If this function is actually an intrinsic, verify that it is only used in
+  // direct call/invokes, never having its "address taken".
+  if (F.getIntrinsicID()) {
+    const User *U;
+    if (F.hasAddressTaken(&U))
+      Assert1(0, "Invalid user of intrinsic instruction!", U); 
+  }
+}
+
+// verifyBasicBlock - Verify that a basic block is well formed...
+//
+void Verifier::visitBasicBlock(BasicBlock &BB) {
+  InstsInThisBlock.clear();
+
+  // Ensure that basic blocks have terminators!
+  Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+
+  // Check constraints that this basic block imposes on all of the PHI nodes in
+  // it.
+  if (isa<PHINode>(BB.front())) {
+    SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+    SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
+    std::sort(Preds.begin(), Preds.end());
+    PHINode *PN;
+    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+      // Ensure that PHI nodes have at least one entry!
+      Assert1(PN->getNumIncomingValues() != 0,
+              "PHI nodes must have at least one entry.  If the block is dead, "
+              "the PHI should be removed!", PN);
+      Assert1(PN->getNumIncomingValues() == Preds.size(),
+              "PHINode should have one entry for each predecessor of its "
+              "parent basic block!", PN);
+
+      // Get and sort all incoming values in the PHI node...
+      Values.clear();
+      Values.reserve(PN->getNumIncomingValues());
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Values.push_back(std::make_pair(PN->getIncomingBlock(i),
+                                        PN->getIncomingValue(i)));
+      std::sort(Values.begin(), Values.end());
+
+      for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+        // Check to make sure that if there is more than one entry for a
+        // particular basic block in this PHI node, that the incoming values are
+        // all identical.
+        //
+        Assert4(i == 0 || Values[i].first  != Values[i-1].first ||
+                Values[i].second == Values[i-1].second,
+                "PHI node has multiple entries for the same basic block with "
+                "different incoming values!", PN, Values[i].first,
+                Values[i].second, Values[i-1].second);
+
+        // Check to make sure that the predecessors and PHI node entries are
+        // matched up.
+        Assert3(Values[i].first == Preds[i],
+                "PHI node entries do not match predecessors!", PN,
+                Values[i].first, Preds[i]);
+      }
+    }
+  }
+}
+
+void Verifier::visitTerminatorInst(TerminatorInst &I) {
+  // Ensure that terminators only exist at the end of the basic block.
+  Assert1(&I == I.getParent()->getTerminator(),
+          "Terminator found in the middle of a basic block!", I.getParent());
+  visitInstruction(I);
+}
+
+void Verifier::visitBranchInst(BranchInst &BI) {
+  if (BI.isConditional()) {
+    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+  }
+  visitTerminatorInst(BI);
+}
+
+void Verifier::visitReturnInst(ReturnInst &RI) {
+  Function *F = RI.getParent()->getParent();
+  unsigned N = RI.getNumOperands();
+  if (F->getReturnType()->isVoidTy()) 
+    Assert2(N == 0,
+            "Found return instr that returns non-void in Function of void "
+            "return type!", &RI, F->getReturnType());
+  else
+    Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+            "Function return type does not match operand "
+            "type of return inst!", &RI, F->getReturnType());
+
+  // Check to make sure that the return value has necessary properties for
+  // terminators...
+  visitTerminatorInst(RI);
+}
+
+void Verifier::visitSwitchInst(SwitchInst &SI) {
+  // Check to make sure that all of the constants in the switch instruction
+  // have the same type as the switched-on value.
+  Type *SwitchTy = SI.getCondition()->getType();
+  IntegerType *IntTy = cast<IntegerType>(SwitchTy);
+  IntegersSubsetToBB Mapping;
+  std::map<IntegersSubset::Range, unsigned> RangeSetMap;
+  for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+    IntegersSubset CaseRanges = i.getCaseValueEx();
+    for (unsigned ri = 0, rie = CaseRanges.getNumItems(); ri < rie; ++ri) {
+      IntegersSubset::Range r = CaseRanges.getItem(ri);
+      Assert1(((const APInt&)r.getLow()).getBitWidth() == IntTy->getBitWidth(),
+              "Switch constants must all be same type as switch value!", &SI);
+      Assert1(((const APInt&)r.getHigh()).getBitWidth() == IntTy->getBitWidth(),
+              "Switch constants must all be same type as switch value!", &SI);
+      Mapping.add(r);
+      RangeSetMap[r] = i.getCaseIndex();
+    }
+  }
+  
+  IntegersSubsetToBB::RangeIterator errItem;
+  if (!Mapping.verify(errItem)) {
+    unsigned CaseIndex = RangeSetMap[errItem->first];
+    SwitchInst::CaseIt i(&SI, CaseIndex);
+    Assert2(false, "Duplicate integer as switch case", &SI, i.getCaseValueEx());
+  }
+  
+  visitTerminatorInst(SI);
+}
+
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+  Assert1(BI.getAddress()->getType()->isPointerTy(),
+          "Indirectbr operand must have pointer type!", &BI);
+  for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+    Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+            "Indirectbr destinations must all have pointer type!", &BI);
+
+  visitTerminatorInst(BI);
+}
+
+void Verifier::visitSelectInst(SelectInst &SI) {
+  Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+                                          SI.getOperand(2)),
+          "Invalid operands for select instruction!", &SI);
+
+  Assert1(SI.getTrueValue()->getType() == SI.getType(),
+          "Select values must have same type as select instruction!", &SI);
+  visitInstruction(SI);
+}
+
+/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
+/// a pass, if any exist, it's an error.
+///
+void Verifier::visitUserOp1(Instruction &I) {
+  Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+}
+
+void Verifier::visitTruncInst(TruncInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "trunc source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitZExtInst(ZExtInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "zext source and destination must both be a vector or neither", &I);
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitSExtInst(SExtInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "sext source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPTruncInst(FPTruncInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "fptrunc source and destination must both be a vector or neither",&I);
+  Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPExtInst(FPExtInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "fpext source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitUIToFPInst(UIToFPInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "UIToFP source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(),
+          "UIToFP source must be integer or integer vector", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),
+          "UIToFP result must be FP or FP vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "UIToFP source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitSIToFPInst(SIToFPInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "SIToFP source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(),
+          "SIToFP source must be integer or integer vector", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),
+          "SIToFP result must be FP or FP vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "SIToFP source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPToUIInst(FPToUIInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "FPToUI source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+          &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
+          "FPToUI result must be integer or integer vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "FPToUI source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPToSIInst(FPToSIInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "FPToSI source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(),
+          "FPToSI source must be FP or FP vector", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
+          "FPToSI result must be integer or integer vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "FPToSI source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  Assert1(SrcTy->getScalarType()->isPointerTy(),
+          "PtrToInt source must be pointer", &I);
+  Assert1(DestTy->getScalarType()->isIntegerTy(),
+          "PtrToInt result must be integral", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "PtrToInt type mismatch", &I);
+
+  if (SrcTy->isVectorTy()) {
+    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+    VectorType *VDest = dyn_cast<VectorType>(DestTy);
+    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+          "PtrToInt Vector width mismatch", &I);
+  }
+
+  visitInstruction(I);
+}
+
+void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  Assert1(SrcTy->getScalarType()->isIntegerTy(),
+          "IntToPtr source must be an integral", &I);
+  Assert1(DestTy->getScalarType()->isPointerTy(),
+          "IntToPtr result must be a pointer",&I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "IntToPtr type mismatch", &I);
+  if (SrcTy->isVectorTy()) {
+    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+    VectorType *VDest = dyn_cast<VectorType>(DestTy);
+    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+          "IntToPtr Vector width mismatch", &I);
+  }
+  visitInstruction(I);
+}
+
+void Verifier::visitBitCastInst(BitCastInst &I) {
+  // Get the source and destination types
+  Type *SrcTy = I.getOperand(0)->getType();
+  Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+  // BitCast implies a no-op cast of type only. No bits change.
+  // However, you can't cast pointers to anything but pointers.
+  Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
+          "Bitcast requires both operands to be pointer or neither", &I);
+  Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
+
+  // Disallow aggregates.
+  Assert1(!SrcTy->isAggregateType(),
+          "Bitcast operand must not be aggregate", &I);
+  Assert1(!DestTy->isAggregateType(),
+          "Bitcast type must not be aggregate", &I);
+
+  visitInstruction(I);
+}
+
+/// visitPHINode - Ensure that a PHI node is well formed.
+///
+void Verifier::visitPHINode(PHINode &PN) {
+  // Ensure that the PHI nodes are all grouped together at the top of the block.
+  // This can be tested by checking whether the instruction before this is
+  // either nonexistent (because this is begin()) or is a PHI node.  If not,
+  // then there is some other instruction before a PHI.
+  Assert2(&PN == &PN.getParent()->front() || 
+          isa<PHINode>(--BasicBlock::iterator(&PN)),
+          "PHI nodes not grouped at top of basic block!",
+          &PN, PN.getParent());
+
+  // Check that all of the values of the PHI node have the same type as the
+  // result, and that the incoming blocks are really basic blocks.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
+            "PHI node operands are not the same type as the result!", &PN);
+  }
+
+  // All other PHI node constraints are checked in the visitBasicBlock method.
+
+  visitInstruction(PN);
+}
+
+void Verifier::VerifyCallSite(CallSite CS) {
+  Instruction *I = CS.getInstruction();
+
+  Assert1(CS.getCalledValue()->getType()->isPointerTy(),
+          "Called function must be a pointer!", I);
+  PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+
+  Assert1(FPTy->getElementType()->isFunctionTy(),
+          "Called function is not pointer to function type!", I);
+  FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+
+  // Verify that the correct number of arguments are being passed
+  if (FTy->isVarArg())
+    Assert1(CS.arg_size() >= FTy->getNumParams(),
+            "Called function requires more parameters than were provided!",I);
+  else
+    Assert1(CS.arg_size() == FTy->getNumParams(),
+            "Incorrect number of arguments passed to called function!", I);
+
+  // Verify that all arguments to the call match the function type.
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
+            "Call parameter type does not match function signature!",
+            CS.getArgument(i), FTy->getParamType(i), I);
+
+  const AttributeSet &Attrs = CS.getAttributes();
+
+  Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
+          "Attribute after last parameter!", I);
+
+  // Verify call attributes.
+  VerifyFunctionAttrs(FTy, Attrs, I);
+
+  if (FTy->isVarArg())
+    // Check attributes on the varargs part.
+    for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
+      VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(),
+                           false, I);
+
+      Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
+              "Attribute 'sret' cannot be used for vararg call arguments!", I);
+    }
+
+  // Verify that there's no metadata unless it's a direct call to an intrinsic.
+  if (CS.getCalledFunction() == 0 ||
+      !CS.getCalledFunction()->getName().startswith("llvm.")) {
+    for (FunctionType::param_iterator PI = FTy->param_begin(),
+           PE = FTy->param_end(); PI != PE; ++PI)
+      Assert1(!(*PI)->isMetadataTy(),
+              "Function has metadata parameter but isn't an intrinsic", I);
+  }
+
+  visitInstruction(*I);
+}
+
+void Verifier::visitCallInst(CallInst &CI) {
+  VerifyCallSite(&CI);
+
+  if (Function *F = CI.getCalledFunction())
+    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+      visitIntrinsicFunctionCall(ID, CI);
+}
+
+void Verifier::visitInvokeInst(InvokeInst &II) {
+  VerifyCallSite(&II);
+
+  // Verify that there is a landingpad instruction as the first non-PHI
+  // instruction of the 'unwind' destination.
+  Assert1(II.getUnwindDest()->isLandingPad(),
+          "The unwind destination does not have a landingpad instruction!",&II);
+
+  visitTerminatorInst(II);
+}
+
+/// visitBinaryOperator - Check that both arguments to the binary operator are
+/// of the same type!
+///
+void Verifier::visitBinaryOperator(BinaryOperator &B) {
+  Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+          "Both operands to a binary operator are not of the same type!", &B);
+
+  switch (B.getOpcode()) {
+  // Check that integer arithmetic operators are only used with
+  // integral operands.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Integer arithmetic operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Integer arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that floating-point arithmetic operators are only used with
+  // floating-point operands.
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    Assert1(B.getType()->isFPOrFPVectorTy(),
+            "Floating-point arithmetic operators only work with "
+            "floating-point types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Floating-point arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that logical operators are only used with integral operands.
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Logical operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Logical operators must have same type for operands and result!",
+            &B);
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Shifts only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Shift return type must be same as operands!", &B);
+    break;
+  default:
+    llvm_unreachable("Unknown BinaryOperator opcode!");
+  }
+
+  visitInstruction(B);
+}
+
+void Verifier::visitICmpInst(ICmpInst &IC) {
+  // Check that the operands are the same type
+  Type *Op0Ty = IC.getOperand(0)->getType();
+  Type *Op1Ty = IC.getOperand(1)->getType();
+  Assert1(Op0Ty == Op1Ty,
+          "Both operands to ICmp instruction are not of the same type!", &IC);
+  // Check that the operands are the right type
+  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
+          "Invalid operand types for ICmp instruction", &IC);
+  // Check that the predicate is valid.
+  Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+          IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+          "Invalid predicate in ICmp instruction!", &IC);
+
+  visitInstruction(IC);
+}
+
+void Verifier::visitFCmpInst(FCmpInst &FC) {
+  // Check that the operands are the same type
+  Type *Op0Ty = FC.getOperand(0)->getType();
+  Type *Op1Ty = FC.getOperand(1)->getType();
+  Assert1(Op0Ty == Op1Ty,
+          "Both operands to FCmp instruction are not of the same type!", &FC);
+  // Check that the operands are the right type
+  Assert1(Op0Ty->isFPOrFPVectorTy(),
+          "Invalid operand types for FCmp instruction", &FC);
+  // Check that the predicate is valid.
+  Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+          FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+          "Invalid predicate in FCmp instruction!", &FC);
+
+  visitInstruction(FC);
+}
+
+void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
+  Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
+                                              EI.getOperand(1)),
+          "Invalid extractelement operands!", &EI);
+  visitInstruction(EI);
+}
+
+void Verifier::visitInsertElementInst(InsertElementInst &IE) {
+  Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
+                                             IE.getOperand(1),
+                                             IE.getOperand(2)),
+          "Invalid insertelement operands!", &IE);
+  visitInstruction(IE);
+}
+
+void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
+  Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+                                             SV.getOperand(2)),
+          "Invalid shufflevector operands!", &SV);
+  visitInstruction(SV);
+}
+
+void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
+
+  Assert1(isa<PointerType>(TargetTy),
+    "GEP base pointer is not a vector or a vector of pointers", &GEP);
+  Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
+          "GEP into unsized type!", &GEP);
+  Assert1(GEP.getPointerOperandType()->isVectorTy() ==
+          GEP.getType()->isVectorTy(), "Vector GEP must return a vector value",
+          &GEP);
+
+  SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+  Type *ElTy =
+    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
+  Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+
+  Assert2(GEP.getType()->getScalarType()->isPointerTy() &&
+          cast<PointerType>(GEP.getType()->getScalarType())->getElementType()
+          == ElTy, "GEP is not of right type for indices!", &GEP, ElTy);
+
+  if (GEP.getPointerOperandType()->isVectorTy()) {
+    // Additional checks for vector GEPs.
+    unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
+    Assert1(GepWidth == GEP.getType()->getVectorNumElements(),
+            "Vector GEP result width doesn't match operand's", &GEP);
+    for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
+      Type *IndexTy = Idxs[i]->getType();
+      Assert1(IndexTy->isVectorTy(),
+              "Vector GEP must have vector indices!", &GEP);
+      unsigned IndexWidth = IndexTy->getVectorNumElements();
+      Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+    }
+  }
+  visitInstruction(GEP);
+}
+
+static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
+  return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
+}
+
+void Verifier::visitLoadInst(LoadInst &LI) {
+  PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+  Assert1(PTy, "Load operand must be a pointer.", &LI);
+  Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == LI.getType(),
+          "Load result type does not match pointer operand type!", &LI, ElTy);
+  if (LI.isAtomic()) {
+    Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
+            "Load cannot have Release ordering", &LI);
+    Assert1(LI.getAlignment() != 0,
+            "Atomic load must specify explicit alignment", &LI);
+    if (!ElTy->isPointerTy()) {
+      Assert2(ElTy->isIntegerTy(),
+              "atomic store operand must have integer type!",
+              &LI, ElTy);
+      unsigned Size = ElTy->getPrimitiveSizeInBits();
+      Assert2(Size >= 8 && !(Size & (Size - 1)),
+              "atomic store operand must be power-of-two byte-sized integer",
+              &LI, ElTy);
+    }
+  } else {
+    Assert1(LI.getSynchScope() == CrossThread,
+            "Non-atomic load cannot have SynchronizationScope specified", &LI);
+  }
+
+  if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) {
+    unsigned NumOperands = Range->getNumOperands();
+    Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+    unsigned NumRanges = NumOperands / 2;
+    Assert1(NumRanges >= 1, "It should have at least one range!", Range);
+
+    ConstantRange LastRange(1); // Dummy initial value
+    for (unsigned i = 0; i < NumRanges; ++i) {
+      ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
+      Assert1(Low, "The lower limit must be an integer!", Low);
+      ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
+      Assert1(High, "The upper limit must be an integer!", High);
+      Assert1(High->getType() == Low->getType() &&
+              High->getType() == ElTy, "Range types must match load type!",
+              &LI);
+
+      APInt HighV = High->getValue();
+      APInt LowV = Low->getValue();
+      ConstantRange CurRange(LowV, HighV);
+      Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+              "Range must not be empty!", Range);
+      if (i != 0) {
+        Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
+                "Intervals are overlapping", Range);
+        Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
+                Range);
+        Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
+                Range);
+      }
+      LastRange = ConstantRange(LowV, HighV);
+    }
+    if (NumRanges > 2) {
+      APInt FirstLow =
+        dyn_cast<ConstantInt>(Range->getOperand(0))->getValue();
+      APInt FirstHigh =
+        dyn_cast<ConstantInt>(Range->getOperand(1))->getValue();
+      ConstantRange FirstRange(FirstLow, FirstHigh);
+      Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
+              "Intervals are overlapping", Range);
+      Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
+              Range);
+    }
+
+
+  }
+
+  visitInstruction(LI);
+}
+
+void Verifier::visitStoreInst(StoreInst &SI) {
+  PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+  Assert1(PTy, "Store operand must be a pointer.", &SI);
+  Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == SI.getOperand(0)->getType(),
+          "Stored value type does not match pointer operand type!",
+          &SI, ElTy);
+  if (SI.isAtomic()) {
+    Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
+            "Store cannot have Acquire ordering", &SI);
+    Assert1(SI.getAlignment() != 0,
+            "Atomic store must specify explicit alignment", &SI);
+    if (!ElTy->isPointerTy()) {
+      Assert2(ElTy->isIntegerTy(),
+              "atomic store operand must have integer type!",
+              &SI, ElTy);
+      unsigned Size = ElTy->getPrimitiveSizeInBits();
+      Assert2(Size >= 8 && !(Size & (Size - 1)),
+              "atomic store operand must be power-of-two byte-sized integer",
+              &SI, ElTy);
+    }
+  } else {
+    Assert1(SI.getSynchScope() == CrossThread,
+            "Non-atomic store cannot have SynchronizationScope specified", &SI);
+  }
+  visitInstruction(SI);
+}
+
+void Verifier::visitAllocaInst(AllocaInst &AI) {
+  PointerType *PTy = AI.getType();
+  Assert1(PTy->getAddressSpace() == 0, 
+          "Allocation instruction pointer not in the generic address space!",
+          &AI);
+  Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
+          &AI);
+  Assert1(AI.getArraySize()->getType()->isIntegerTy(),
+          "Alloca array size must have integer type", &AI);
+  visitInstruction(AI);
+}
+
+void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
+  Assert1(CXI.getOrdering() != NotAtomic,
+          "cmpxchg instructions must be atomic.", &CXI);
+  Assert1(CXI.getOrdering() != Unordered,
+          "cmpxchg instructions cannot be unordered.", &CXI);
+  PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
+  Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
+  Type *ElTy = PTy->getElementType();
+  Assert2(ElTy->isIntegerTy(),
+          "cmpxchg operand must have integer type!",
+          &CXI, ElTy);
+  unsigned Size = ElTy->getPrimitiveSizeInBits();
+  Assert2(Size >= 8 && !(Size & (Size - 1)),
+          "cmpxchg operand must be power-of-two byte-sized integer",
+          &CXI, ElTy);
+  Assert2(ElTy == CXI.getOperand(1)->getType(),
+          "Expected value type does not match pointer operand type!",
+          &CXI, ElTy);
+  Assert2(ElTy == CXI.getOperand(2)->getType(),
+          "Stored value type does not match pointer operand type!",
+          &CXI, ElTy);
+  visitInstruction(CXI);
+}
+
+void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+  Assert1(RMWI.getOrdering() != NotAtomic,
+          "atomicrmw instructions must be atomic.", &RMWI);
+  Assert1(RMWI.getOrdering() != Unordered,
+          "atomicrmw instructions cannot be unordered.", &RMWI);
+  PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
+  Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
+  Type *ElTy = PTy->getElementType();
+  Assert2(ElTy->isIntegerTy(),
+          "atomicrmw operand must have integer type!",
+          &RMWI, ElTy);
+  unsigned Size = ElTy->getPrimitiveSizeInBits();
+  Assert2(Size >= 8 && !(Size & (Size - 1)),
+          "atomicrmw operand must be power-of-two byte-sized integer",
+          &RMWI, ElTy);
+  Assert2(ElTy == RMWI.getOperand(1)->getType(),
+          "Argument value type does not match pointer operand type!",
+          &RMWI, ElTy);
+  Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
+          RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+          "Invalid binary operation!", &RMWI);
+  visitInstruction(RMWI);
+}
+
+void Verifier::visitFenceInst(FenceInst &FI) {
+  const AtomicOrdering Ordering = FI.getOrdering();
+  Assert1(Ordering == Acquire || Ordering == Release ||
+          Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
+          "fence instructions may only have "
+          "acquire, release, acq_rel, or seq_cst ordering.", &FI);
+  visitInstruction(FI);
+}
+
+void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
+  Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+                                           EVI.getIndices()) ==
+          EVI.getType(),
+          "Invalid ExtractValueInst operands!", &EVI);
+  
+  visitInstruction(EVI);
+}
+
+void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
+  Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+                                           IVI.getIndices()) ==
+          IVI.getOperand(1)->getType(),
+          "Invalid InsertValueInst operands!", &IVI);
+  
+  visitInstruction(IVI);
+}
+
+void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
+  BasicBlock *BB = LPI.getParent();
+
+  // The landingpad instruction is ill-formed if it doesn't have any clauses and
+  // isn't a cleanup.
+  Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(),
+          "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
+
+  // The landingpad instruction defines its parent as a landing pad block. The
+  // landing pad block may be branched to only by the unwind edge of an invoke.
+  for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+    const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
+    Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+            "Block containing LandingPadInst must be jumped to "
+            "only by the unwind edge of an invoke.", &LPI);
+  }
+
+  // The landingpad instruction must be the first non-PHI instruction in the
+  // block.
+  Assert1(LPI.getParent()->getLandingPadInst() == &LPI,
+          "LandingPadInst not the first non-PHI instruction in the block.",
+          &LPI);
+
+  // The personality functions for all landingpad instructions within the same
+  // function should match.
+  if (PersonalityFn)
+    Assert1(LPI.getPersonalityFn() == PersonalityFn,
+            "Personality function doesn't match others in function", &LPI);
+  PersonalityFn = LPI.getPersonalityFn();
+
+  // All operands must be constants.
+  Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
+          &LPI);
+  for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
+    Value *Clause = LPI.getClause(i);
+    Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI);
+    if (LPI.isCatch(i)) {
+      Assert1(isa<PointerType>(Clause->getType()),
+              "Catch operand does not have pointer type!", &LPI);
+    } else {
+      Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
+      Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
+              "Filter operand is not an array of constants!", &LPI);
+    }
+  }
+
+  visitInstruction(LPI);
+}
+
+void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
+  Instruction *Op = cast<Instruction>(I.getOperand(i));
+  // If the we have an invalid invoke, don't try to compute the dominance.
+  // We already reject it in the invoke specific checks and the dominance
+  // computation doesn't handle multiple edges.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+    if (II->getNormalDest() == II->getUnwindDest())
+      return;
+  }
+
+  const Use &U = I.getOperandUse(i);
+  Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U),
+          "Instruction does not dominate all uses!", Op, &I);
+}
+
+/// verifyInstruction - Verify that an instruction is well formed.
+///
+void Verifier::visitInstruction(Instruction &I) {
+  BasicBlock *BB = I.getParent();
+  Assert1(BB, "Instruction not embedded in basic block!", &I);
+
+  if (!isa<PHINode>(I)) {   // Check that non-phi nodes are not self referential
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+         UI != UE; ++UI)
+      Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
+              "Only PHI nodes may reference their own value!", &I);
+  }
+
+  // Check that void typed values don't have names
+  Assert1(!I.getType()->isVoidTy() || !I.hasName(),
+          "Instruction has a name, but provides a void value!", &I);
+
+  // Check that the return value of the instruction is either void or a legal
+  // value type.
+  Assert1(I.getType()->isVoidTy() || 
+          I.getType()->isFirstClassType(),
+          "Instruction returns a non-scalar type!", &I);
+
+  // Check that the instruction doesn't produce metadata. Calls are already
+  // checked against the callee type.
+  Assert1(!I.getType()->isMetadataTy() ||
+          isa<CallInst>(I) || isa<InvokeInst>(I),
+          "Invalid use of metadata!", &I);
+
+  // Check that all uses of the instruction, if they are instructions
+  // themselves, actually have parent basic blocks.  If the use is not an
+  // instruction, it is an error!
+  for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
+       UI != UE; ++UI) {
+    if (Instruction *Used = dyn_cast<Instruction>(*UI))
+      Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+              " embedded in a basic block!", &I, Used);
+    else {
+      CheckFailed("Use of instruction is not an instruction!", *UI);
+      return;
+    }
+  }
+
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+
+    // Check to make sure that only first-class-values are operands to
+    // instructions.
+    if (!I.getOperand(i)->getType()->isFirstClassType()) {
+      Assert1(0, "Instruction operands must be first-class values!", &I);
+    }
+
+    if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+      // Check to make sure that the "address of" an intrinsic function is never
+      // taken.
+      Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 : 0),
+              "Cannot take the address of an intrinsic!", &I);
+      Assert1(!F->isIntrinsic() || isa<CallInst>(I) ||
+              F->getIntrinsicID() == Intrinsic::donothing,
+              "Cannot invoke an intrinsinc other than donothing", &I);
+      Assert1(F->getParent() == Mod, "Referencing function in another module!",
+              &I);
+    } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
+      Assert1(OpBB->getParent() == BB->getParent(),
+              "Referring to a basic block in another function!", &I);
+    } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
+      Assert1(OpArg->getParent() == BB->getParent(),
+              "Referring to an argument in another function!", &I);
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
+      Assert1(GV->getParent() == Mod, "Referencing global in another module!",
+              &I);
+    } else if (isa<Instruction>(I.getOperand(i))) {
+      verifyDominatesUse(I, i);
+    } else if (isa<InlineAsm>(I.getOperand(i))) {
+      Assert1((i + 1 == e && isa<CallInst>(I)) ||
+              (i + 3 == e && isa<InvokeInst>(I)),
+              "Cannot take the address of an inline asm!", &I);
+    }
+  }
+
+  if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) {
+    Assert1(I.getType()->isFPOrFPVectorTy(),
+            "fpmath requires a floating point result!", &I);
+    Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
+    Value *Op0 = MD->getOperand(0);
+    if (ConstantFP *CFP0 = dyn_cast_or_null<ConstantFP>(Op0)) {
+      APFloat Accuracy = CFP0->getValueAPF();
+      Assert1(Accuracy.isNormal() && !Accuracy.isNegative(),
+              "fpmath accuracy not a positive number!", &I);
+    } else {
+      Assert1(false, "invalid fpmath accuracy!", &I);
+    }
+  }
+
+  MDNode *MD = I.getMetadata(LLVMContext::MD_range);
+  Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+
+  InstsInThisBlock.insert(&I);
+}
+
+/// VerifyIntrinsicType - Verify that the specified type (which comes from an
+/// intrinsic argument or return value) matches the type constraints specified
+/// by the .td file (e.g. an "any integer" argument really is an integer).
+///
+/// This return true on error but does not print a message.
+bool Verifier::VerifyIntrinsicType(Type *Ty,
+                                   ArrayRef<Intrinsic::IITDescriptor> &Infos,
+                                   SmallVectorImpl<Type*> &ArgTys) {
+  using namespace Intrinsic;
+
+  // If we ran out of descriptors, there are too many arguments.
+  if (Infos.empty()) return true; 
+  IITDescriptor D = Infos.front();
+  Infos = Infos.slice(1);
+  
+  switch (D.Kind) {
+  case IITDescriptor::Void: return !Ty->isVoidTy();
+  case IITDescriptor::MMX:  return !Ty->isX86_MMXTy();
+  case IITDescriptor::Metadata: return !Ty->isMetadataTy();
+  case IITDescriptor::Half: return !Ty->isHalfTy();
+  case IITDescriptor::Float: return !Ty->isFloatTy();
+  case IITDescriptor::Double: return !Ty->isDoubleTy();
+  case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
+  case IITDescriptor::Vector: {
+    VectorType *VT = dyn_cast<VectorType>(Ty);
+    return VT == 0 || VT->getNumElements() != D.Vector_Width ||
+           VerifyIntrinsicType(VT->getElementType(), Infos, ArgTys);
+  }
+  case IITDescriptor::Pointer: {
+    PointerType *PT = dyn_cast<PointerType>(Ty);
+    return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace ||
+           VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys);
+  }
+      
+  case IITDescriptor::Struct: {
+    StructType *ST = dyn_cast<StructType>(Ty);
+    if (ST == 0 || ST->getNumElements() != D.Struct_NumElements)
+      return true;
+    
+    for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
+      if (VerifyIntrinsicType(ST->getElementType(i), Infos, ArgTys))
+        return true;
+    return false;
+  }
+      
+  case IITDescriptor::Argument:
+    // Two cases here - If this is the second occurrence of an argument, verify
+    // that the later instance matches the previous instance. 
+    if (D.getArgumentNumber() < ArgTys.size())
+      return Ty != ArgTys[D.getArgumentNumber()];  
+      
+    // Otherwise, if this is the first instance of an argument, record it and
+    // verify the "Any" kind.
+    assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error");
+    ArgTys.push_back(Ty);
+      
+    switch (D.getArgumentKind()) {
+    case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
+    case IITDescriptor::AK_AnyFloat:   return !Ty->isFPOrFPVectorTy();
+    case IITDescriptor::AK_AnyVector:  return !isa<VectorType>(Ty);
+    case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
+    }
+    llvm_unreachable("all argument kinds not covered");
+      
+  case IITDescriptor::ExtendVecArgument:
+    // This may only be used when referring to a previous vector argument.
+    return D.getArgumentNumber() >= ArgTys.size() ||
+           !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
+           VectorType::getExtendedElementVectorType(
+                       cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
+
+  case IITDescriptor::TruncVecArgument:
+    // This may only be used when referring to a previous vector argument.
+    return D.getArgumentNumber() >= ArgTys.size() ||
+           !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
+           VectorType::getTruncatedElementVectorType(
+                         cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
+  }
+  llvm_unreachable("unhandled");
+}
+
+/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
+///
+void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+  Function *IF = CI.getCalledFunction();
+  Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+          IF);
+
+  // Verify that the intrinsic prototype lines up with what the .td files
+  // describe.
+  FunctionType *IFTy = IF->getFunctionType();
+  Assert1(!IFTy->isVarArg(), "Intrinsic prototypes are not varargs", IF);
+  
+  SmallVector<Intrinsic::IITDescriptor, 8> Table;
+  getIntrinsicInfoTableEntries(ID, Table);
+  ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+
+  SmallVector<Type *, 4> ArgTys;
+  Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
+          "Intrinsic has incorrect return type!", IF);
+  for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
+    Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
+            "Intrinsic has incorrect argument type!", IF);
+  Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF);
+
+  // Now that we have the intrinsic ID and the actual argument types (and we
+  // know they are legal for the intrinsic!) get the intrinsic name through the
+  // usual means.  This allows us to verify the mangling of argument types into
+  // the name.
+  Assert1(Intrinsic::getName(ID, ArgTys) == IF->getName(),
+          "Intrinsic name not mangled correctly for type arguments!", IF);
+  
+  // If the intrinsic takes MDNode arguments, verify that they are either global
+  // or are local to *this* function.
+  for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
+    if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
+      visitMDNode(*MD, CI.getParent()->getParent());
+
+  switch (ID) {
+  default:
+    break;
+  case Intrinsic::ctlz:  // llvm.ctlz
+  case Intrinsic::cttz:  // llvm.cttz
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+            "is_zero_undef argument of bit counting intrinsics must be a "
+            "constant int", &CI);
+    break;
+  case Intrinsic::dbg_declare: {  // llvm.dbg.declare
+    Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
+                "invalid llvm.dbg.declare intrinsic call 1", &CI);
+    MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
+    Assert1(MD->getNumOperands() == 1,
+                "invalid llvm.dbg.declare intrinsic call 2", &CI);
+  } break;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
+            "alignment argument of memory intrinsics must be a constant int",
+            &CI);
+    Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
+            "isvolatile argument of memory intrinsics must be a constant int",
+            &CI);
+    break;
+  case Intrinsic::gcroot:
+  case Intrinsic::gcwrite:
+  case Intrinsic::gcread:
+    if (ID == Intrinsic::gcroot) {
+      AllocaInst *AI =
+        dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
+      Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+      Assert1(isa<Constant>(CI.getArgOperand(1)),
+              "llvm.gcroot parameter #2 must be a constant.", &CI);
+      if (!AI->getType()->getElementType()->isPointerTy()) {
+        Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+                "llvm.gcroot parameter #1 must either be a pointer alloca, "
+                "or argument #2 must be a non-null constant.", &CI);
+      }
+    }
+
+    Assert1(CI.getParent()->getParent()->hasGC(),
+            "Enclosing function does not use GC.", &CI);
+    break;
+  case Intrinsic::init_trampoline:
+    Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+            "llvm.init_trampoline parameter #2 must resolve to a function.",
+            &CI);
+    break;
+  case Intrinsic::prefetch:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+            isa<ConstantInt>(CI.getArgOperand(2)) &&
+            cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+            cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+            "invalid arguments to llvm.prefetch",
+            &CI);
+    break;
+  case Intrinsic::stackprotector:
+    Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+            "llvm.stackprotector parameter #2 must resolve to an alloca.",
+            &CI);
+    break;
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
+            "size argument of memory use markers must be a constant integer",
+            &CI);
+    break;
+  case Intrinsic::invariant_end:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+            "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+    break;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
+  return new Verifier(action);
+}
+
+
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
+bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot verify external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Verifier *V = new Verifier(action);
+  FPM.add(V);
+  FPM.run(F);
+  return V->Broken;
+}
+
+/// verifyModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
+                        std::string *ErrorInfo) {
+  PassManager PM;
+  Verifier *V = new Verifier(action);
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+
+  if (ErrorInfo && V->Broken)
+    *ErrorInfo = V->MessagesStr.str();
+  return V->Broken;
+}
diff --git a/lib/IRReader/CMakeLists.txt b/lib/IRReader/CMakeLists.txt
new file mode 100644
index 000000000000..cf10d8b7dba9
--- /dev/null
+++ b/lib/IRReader/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMIRReader
+  IRReader.cpp
+  )
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
new file mode 100644
index 000000000000..eeec14e834c1
--- /dev/null
+++ b/lib/IRReader/IRReader.cpp
@@ -0,0 +1,89 @@
+//===---- IRReader.cpp - Reader for LLVM IR files -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+namespace llvm {
+  extern bool TimePassesIsEnabled;
+}
+
+static const char *TimeIRParsingGroupName = "LLVM IR Parsing";
+static const char *TimeIRParsingName = "Parse IR";
+
+
+Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+                              LLVMContext &Context) {
+  if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                (const unsigned char *)Buffer->getBufferEnd())) {
+    std::string ErrMsg;
+    Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
+    if (M == 0) {
+      Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                         ErrMsg);
+      // ParseBitcodeFile does not take ownership of the Buffer in the
+      // case of an error.
+      delete Buffer;
+    }
+    return M;
+  }
+
+  return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
+                                  LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return getLazyIRModule(File.take(), Err, Context);
+}
+
+Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
+                      LLVMContext &Context) {
+  NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
+                     TimePassesIsEnabled);
+  if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                (const unsigned char *)Buffer->getBufferEnd())) {
+    std::string ErrMsg;
+    Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+    if (M == 0)
+      Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                         ErrMsg);
+    // ParseBitcodeFile does not take ownership of the Buffer.
+    delete Buffer;
+    return M;
+  }
+
+  return ParseAssembly(Buffer, 0, Err, Context);
+}
+
+Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
+                          LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return ParseIR(File.take(), Err, Context);
+}
diff --git a/lib/IRReader/LLVMBuild.txt b/lib/IRReader/LLVMBuild.txt
new file mode 100644
index 000000000000..b7bc74d61649
--- /dev/null
+++ b/lib/IRReader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/IRReader/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IRReader
+parent = Libraries
+required_libraries = AsmParser BitReader Core Support
diff --git a/lib/IRReader/Makefile b/lib/IRReader/Makefile
new file mode 100644
index 000000000000..cf6bc1135427
--- /dev/null
+++ b/lib/IRReader/Makefile
@@ -0,0 +1,14 @@
+##===- lib/IRReader/Makefile -------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME := LLVMIRReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index e22b8cd406b2..056544380698 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR IRReader MC Object Option Support TableGen Target Transforms
 
 [component_0]
 type = Group
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 0b6d2f4218e3..28f1262a4398 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -1,6 +1,4 @@
 add_llvm_library(LLVMLinker
-  LinkArchives.cpp
-  LinkItems.cpp
   LinkModules.cpp
   Linker.cpp
   )
diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt
index 2b4c232b8067..0bb26d0c2aea 100644
--- a/lib/Linker/LLVMBuild.txt
+++ b/lib/Linker/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Linker
 parent = Libraries
-required_libraries = Archive BitReader Core Support TransformUtils
+required_libraries = Core Support TransformUtils
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
deleted file mode 100644
index c16d1958cdfb..000000000000
--- a/lib/Linker/LinkArchives.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains routines to handle linking together LLVM bitcode files,
-// and to handle annoying things like static libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/Bitcode/Archive.h"
-#include <memory>
-#include <set>
-using namespace llvm;
-
-/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
-/// exist in an LLVM module. This is a bit tricky because there may be two
-/// symbols with the same name but different LLVM types that will be resolved to
-/// each other but aren't currently (thus we need to treat it as resolved).
-///
-/// Inputs:
-///  M - The module in which to find undefined symbols.
-///
-/// Outputs:
-///  UndefinedSymbols - A set of C++ strings containing the name of all
-///                     undefined symbols.
-///
-static void
-GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
-  std::set<std::string> DefinedSymbols;
-  UndefinedSymbols.clear();
-
-  // If the program doesn't define a main, try pulling one in from a .a file.
-  // This is needed for programs where the main function is defined in an
-  // archive, such f2c'd programs.
-  Function *Main = M->getFunction("main");
-  if (Main == 0 || Main->isDeclaration())
-    UndefinedSymbols.insert("main");
-
-  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
-    if (I->hasName()) {
-      if (I->isDeclaration())
-        UndefinedSymbols.insert(I->getName());
-      else if (!I->hasLocalLinkage()) {
-        assert(!I->hasDLLImportLinkage()
-               && "Found dllimported non-external symbol!");
-        DefinedSymbols.insert(I->getName());
-      }      
-    }
-
-  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I)
-    if (I->hasName()) {
-      if (I->isDeclaration())
-        UndefinedSymbols.insert(I->getName());
-      else if (!I->hasLocalLinkage()) {
-        assert(!I->hasDLLImportLinkage()
-               && "Found dllimported non-external symbol!");
-        DefinedSymbols.insert(I->getName());
-      }      
-    }
-
-  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
-       I != E; ++I)
-    if (I->hasName())
-      DefinedSymbols.insert(I->getName());
-
-  // Prune out any defined symbols from the undefined symbols set...
-  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
-       I != UndefinedSymbols.end(); )
-    if (DefinedSymbols.count(*I))
-      UndefinedSymbols.erase(I++);  // This symbol really is defined!
-    else
-      ++I; // Keep this symbol in the undefined symbols list
-}
-
-/// LinkInArchive - opens an archive library and link in all objects which
-/// provide symbols that are currently undefined.
-///
-/// Inputs:
-///  Filename - The pathname of the archive.
-///
-/// Return Value:
-///  TRUE  - An error occurred.
-///  FALSE - No errors.
-bool
-Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
-  // Make sure this is an archive file we're dealing with
-  if (!Filename.isArchive())
-    return error("File '" + Filename.str() + "' is not an archive.");
-
-  // Open the archive file
-  verbose("Linking archive file '" + Filename.str() + "'");
-
-  // Find all of the symbols currently undefined in the bitcode program.
-  // If all the symbols are defined, the program is complete, and there is
-  // no reason to link in any archive files.
-  std::set<std::string> UndefinedSymbols;
-  GetAllUndefinedSymbols(Composite, UndefinedSymbols);
-
-  if (UndefinedSymbols.empty()) {
-    verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
-    return false;  // No need to link anything in!
-  }
-
-  std::string ErrMsg;
-  std::auto_ptr<Archive> AutoArch (
-    Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg));
-
-  Archive* arch = AutoArch.get();
-
-  if (!arch)
-    return error("Cannot read archive '" + Filename.str() +
-                 "': " + ErrMsg);
-  if (!arch->isBitcodeArchive()) {
-    is_native = true;
-    return false;
-  }
-  is_native = false;
-
-  // Save a set of symbols that are not defined by the archive. Since we're
-  // entering a loop, there's no point searching for these multiple times. This
-  // variable is used to "set_subtract" from the set of undefined symbols.
-  std::set<std::string> NotDefinedByArchive;
-
-  // Save the current set of undefined symbols, because we may have to make
-  // multiple passes over the archive:
-  std::set<std::string> CurrentlyUndefinedSymbols;
-
-  do {
-    CurrentlyUndefinedSymbols = UndefinedSymbols;
-
-    // Find the modules we need to link into the target module.  Note that arch
-    // keeps ownership of these modules and may return the same Module* from a
-    // subsequent call.
-    SmallVector<Module*, 16> Modules;
-    if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
-      return error("Cannot find symbols in '" + Filename.str() + 
-                   "': " + ErrMsg);
-
-    // If we didn't find any more modules to link this time, we are done
-    // searching this archive.
-    if (Modules.empty())
-      break;
-
-    // Any symbols remaining in UndefinedSymbols after
-    // findModulesDefiningSymbols are ones that the archive does not define. So
-    // we add them to the NotDefinedByArchive variable now.
-    NotDefinedByArchive.insert(UndefinedSymbols.begin(),
-        UndefinedSymbols.end());
-
-    // Loop over all the Modules that we got back from the archive
-    for (SmallVectorImpl<Module*>::iterator I=Modules.begin(), E=Modules.end();
-         I != E; ++I) {
-
-      // Get the module we must link in.
-      std::string moduleErrorMsg;
-      Module* aModule = *I;
-      if (aModule != NULL) {
-        if (aModule->MaterializeAll(&moduleErrorMsg))
-          return error("Could not load a module: " + moduleErrorMsg);
-
-        verbose("  Linking in module: " + aModule->getModuleIdentifier());
-
-        // Link it in
-        if (LinkInModule(aModule, &moduleErrorMsg))
-          return error("Cannot link in module '" +
-                       aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
-      } 
-    }
-    
-    // Get the undefined symbols from the aggregate module. This recomputes the
-    // symbols we still need after the new modules have been linked in.
-    GetAllUndefinedSymbols(Composite, UndefinedSymbols);
-
-    // At this point we have two sets of undefined symbols: UndefinedSymbols
-    // which holds the undefined symbols from all the modules, and
-    // NotDefinedByArchive which holds symbols we know the archive doesn't
-    // define. There's no point searching for symbols that we won't find in the
-    // archive so we subtract these sets.
-    set_subtract(UndefinedSymbols, NotDefinedByArchive);
-
-    // If there's no symbols left, no point in continuing to search the
-    // archive.
-    if (UndefinedSymbols.empty())
-      break;
-  } while (CurrentlyUndefinedSymbols != UndefinedSymbols);
-
-  return false;
-}
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
deleted file mode 100644
index 52a0d175a5cd..000000000000
--- a/lib/Linker/LinkItems.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains routines to handle linking together LLVM bitcode files,
-// and to handle annoying things like static libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/Module.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/system_error.h"
-using namespace llvm;
-
-// LinkItems - This function is the main entry point into linking. It takes a
-// list of LinkItem which indicates the order the files should be linked and
-// how each file should be treated (plain file or with library search). The
-// function only links bitcode and produces a result list of items that are
-// native objects. 
-bool
-Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
-  // Clear the NativeItems just in case
-  NativeItems.clear();
-
-  // For each linkage item ...
-  for (ItemList::const_iterator I = Items.begin(), E = Items.end();
-       I != E; ++I) {
-    if (I->second) {
-      // Link in the library suggested.
-      bool is_native = false;
-      if (LinkInLibrary(I->first, is_native))
-        return true;
-      if (is_native)
-        NativeItems.push_back(*I);
-    } else {
-      // Link in the file suggested
-      bool is_native = false;
-      if (LinkInFile(sys::Path(I->first), is_native))
-        return true;
-      if (is_native)
-        NativeItems.push_back(*I);
-    }
-  }
-
-  // At this point we have processed all the link items provided to us. Since
-  // we have an aggregated module at this point, the dependent libraries in
-  // that module should also be aggregated with duplicates eliminated. This is
-  // now the time to process the dependent libraries to resolve any remaining
-  // symbols.
-  bool is_native;
-  for (Module::lib_iterator I = Composite->lib_begin(),
-         E = Composite->lib_end(); I != E; ++I) {
-    if(LinkInLibrary(*I, is_native))
-      return true;
-    if (is_native)
-      NativeItems.push_back(std::make_pair(*I, true));
-  }
-
-  return false;
-}
-
-
-/// LinkInLibrary - links one library into the HeadModule.
-///
-bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
-  is_native = false;
-  // Determine where this library lives.
-  sys::Path Pathname = FindLib(Lib);
-  if (Pathname.isEmpty())
-    return error("Cannot find library '" + Lib.str() + "'");
-
-  // If its an archive, try to link it in
-  std::string Magic;
-  Pathname.getMagicNumber(Magic, 64);
-  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
-    default: llvm_unreachable("Bad file type identification");
-    case sys::Unknown_FileType:
-      return warning("Supposed library '" + Lib.str() + "' isn't a library.");
-
-    case sys::Bitcode_FileType:
-      // LLVM ".so" file.
-      if (LinkInFile(Pathname, is_native))
-        return true;
-      break;
-
-    case sys::Archive_FileType:
-      if (LinkInArchive(Pathname, is_native))
-        return error("Cannot link archive '" + Pathname.str() + "'");
-      break;
-
-    case sys::ELF_Relocatable_FileType:
-    case sys::ELF_SharedObject_FileType:
-    case sys::Mach_O_Object_FileType:
-    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-    case sys::COFF_FileType:
-      is_native = true;
-      break;
-  }
-  return false;
-}
-
-/// LinkLibraries - takes the specified library files and links them into the
-/// main bitcode object file.
-///
-/// Inputs:
-///  Libraries  - The list of libraries to link into the module.
-///
-/// Return value:
-///  FALSE - No error.
-///  TRUE  - Error.
-///
-bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
-
-  // Process the set of libraries we've been provided.
-  bool is_native = false;
-  for (unsigned i = 0; i < Libraries.size(); ++i)
-    if (LinkInLibrary(Libraries[i], is_native))
-      return true;
-
-  // At this point we have processed all the libraries provided to us. Since
-  // we have an aggregated module at this point, the dependent libraries in
-  // that module should also be aggregated with duplicates eliminated. This is
-  // now the time to process the dependent libraries to resolve any remaining
-  // symbols.
-  const Module::LibraryListType& DepLibs = Composite->getLibraries();
-  for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
-         E = DepLibs.end(); I != E; ++I)
-    if (LinkInLibrary(*I, is_native))
-      return true;
-
-  return false;
-}
-
-/// LinkInFile - opens a bitcode file and links in all objects which
-/// provide symbols that are currently undefined.
-///
-/// Inputs:
-///  File - The pathname of the bitcode file.
-///
-/// Outputs:
-///  ErrorMessage - A C++ string detailing what error occurred, if any.
-///
-/// Return Value:
-///  TRUE  - An error occurred.
-///  FALSE - No errors.
-///
-bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
-  is_native = false;
-  
-  // Check for a file of name "-", which means "read standard input"
-  if (File.str() == "-") {
-    std::auto_ptr<Module> M;
-    OwningPtr<MemoryBuffer> Buffer;
-    error_code ec;
-    if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
-      if (!Buffer->getBufferSize()) {
-        Error = "standard input is empty";
-      } else {
-        M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
-        if (M.get())
-          if (!LinkInModule(M.get(), &Error))
-            return false;
-      }
-    }
-    return error("Cannot link stdin: " + ec.message());
-  }
-
-  // Determine what variety of file it is.
-  std::string Magic;
-  if (!File.getMagicNumber(Magic, 64))
-    return error("Cannot find linker input '" + File.str() + "'");
-
-  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
-    default: llvm_unreachable("Bad file type identification");
-    case sys::Unknown_FileType:
-      return warning("Ignoring file '" + File.str() + 
-                   "' because does not contain bitcode.");
-
-    case sys::Archive_FileType:
-      // A user may specify an ar archive without -l, perhaps because it
-      // is not installed as a library. Detect that and link the archive.
-      if (LinkInArchive(File, is_native))
-        return true;
-      break;
-
-    case sys::Bitcode_FileType: {
-      verbose("Linking bitcode file '" + File.str() + "'");
-      std::auto_ptr<Module> M(LoadObject(File));
-      if (M.get() == 0)
-        return error("Cannot load file '" + File.str() + "': " + Error);
-      if (LinkInModule(M.get(), &Error))
-        return error("Cannot link file '" + File.str() + "': " + Error);
-
-      verbose("Linked in file '" + File.str() + "'");
-      break;
-    }
-
-    case sys::ELF_Relocatable_FileType:
-    case sys::ELF_SharedObject_FileType:
-    case sys::Mach_O_Object_FileType:
-    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-    case sys::COFF_FileType:
-      is_native = true;
-      break;
-  }
-  return false;
-}
-
-/// LinkFiles - takes a module and a list of files and links them all together.
-/// It locates the file either in the current directory, as its absolute
-/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH.
-///
-/// Inputs:
-///  Files      - A vector of sys::Path indicating the LLVM bitcode filenames
-///               to be linked.  The names can refer to a mixture of pure LLVM
-///               bitcode files and archive (ar) formatted files.
-///
-/// Return value:
-///  FALSE - No errors.
-///  TRUE  - Some error occurred.
-///
-bool Linker::LinkInFiles(const std::vector<sys::Path> &Files) {
-  bool is_native;
-  for (unsigned i = 0; i < Files.size(); ++i)
-    if (LinkInFile(Files[i], is_native))
-      return true;
-  return false;
-}
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index a6599bfe4fb2..74cbdadd61eb 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -12,21 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/TypeFinder.h"
+#include "llvm-c/Linker.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeFinder.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm-c/Linker.h"
 #include <cctype>
 using namespace llvm;
 
@@ -180,7 +180,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
     if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
       return false;
   } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-    if (DVTy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+    if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
       return false;
   }
 
@@ -421,13 +421,6 @@ namespace {
     }
     
     void computeTypeMapping();
-    bool categorizeModuleFlagNodes(const NamedMDNode *ModFlags,
-                                   DenseMap<MDString*, MDNode*> &ErrorNode,
-                                   DenseMap<MDString*, MDNode*> &WarningNode,
-                                   DenseMap<MDString*, MDNode*> &OverrideNode,
-                                   DenseMap<MDString*,
-                                   SmallSetVector<MDNode*, 8> > &RequireNodes,
-                                   SmallSetVector<MDString*, 16> &SeenIDs);
     
     bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
     bool linkGlobalProto(GlobalVariable *SrcGV);
@@ -613,7 +606,8 @@ void ModuleLinker::computeTypeMapping() {
     // Check to see if there is a dot in the name followed by a digit.
     size_t DotPos = ST->getName().rfind('.');
     if (DotPos == 0 || DotPos == StringRef::npos ||
-        ST->getName().back() == '.' || !isdigit(ST->getName()[DotPos+1]))
+        ST->getName().back() == '.' ||
+        !isdigit(static_cast<unsigned char>(ST->getName()[DotPos+1])))
       continue;
     
     // Check to see if the destination module has a struct with the prefix name.
@@ -987,76 +981,16 @@ void ModuleLinker::linkNamedMDNodes() {
   }
 }
 
-/// categorizeModuleFlagNodes - Categorize the module flags according to their
-/// type: Error, Warning, Override, and Require.
-bool ModuleLinker::
-categorizeModuleFlagNodes(const NamedMDNode *ModFlags,
-                          DenseMap<MDString*, MDNode*> &ErrorNode,
-                          DenseMap<MDString*, MDNode*> &WarningNode,
-                          DenseMap<MDString*, MDNode*> &OverrideNode,
-                          DenseMap<MDString*,
-                            SmallSetVector<MDNode*, 8> > &RequireNodes,
-                          SmallSetVector<MDString*, 16> &SeenIDs) {
-  bool HasErr = false;
-
-  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
-    MDNode *Op = ModFlags->getOperand(I);
-    assert(Op->getNumOperands() == 3 && "Invalid module flag metadata!");
-    assert(isa<ConstantInt>(Op->getOperand(0)) &&
-           "Module flag's first operand must be an integer!");
-    assert(isa<MDString>(Op->getOperand(1)) &&
-           "Module flag's second operand must be an MDString!");
-
-    ConstantInt *Behavior = cast<ConstantInt>(Op->getOperand(0));
-    MDString *ID = cast<MDString>(Op->getOperand(1));
-    Value *Val = Op->getOperand(2);
-    switch (Behavior->getZExtValue()) {
-    default:
-      assert(false && "Invalid behavior in module flag metadata!");
-      break;
-    case Module::Error: {
-      MDNode *&ErrNode = ErrorNode[ID];
-      if (!ErrNode) ErrNode = Op;
-      if (ErrNode->getOperand(2) != Val)
-        HasErr = emitError("linking module flags '" + ID->getString() +
-                           "': IDs have conflicting values");
-      break;
-    }
-    case Module::Warning: {
-      MDNode *&WarnNode = WarningNode[ID];
-      if (!WarnNode) WarnNode = Op;
-      if (WarnNode->getOperand(2) != Val)
-        errs() << "WARNING: linking module flags '" << ID->getString()
-               << "': IDs have conflicting values";
-      break;
-    }
-    case Module::Require:  RequireNodes[ID].insert(Op);     break;
-    case Module::Override: {
-      MDNode *&OvrNode = OverrideNode[ID];
-      if (!OvrNode) OvrNode = Op;
-      if (OvrNode->getOperand(2) != Val)
-        HasErr = emitError("linking module flags '" + ID->getString() +
-                           "': IDs have conflicting override values");
-      break;
-    }
-    }
-
-    SeenIDs.insert(ID);
-  }
-
-  return HasErr;
-}
-
 /// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest
 /// module.
 bool ModuleLinker::linkModuleFlagsMetadata() {
+  // If the source module has no module flags, we are done.
   const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
   if (!SrcModFlags) return false;
 
-  NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
-
   // If the destination module doesn't have module flags yet, then just copy
   // over the source module's flags.
+  NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
   if (DstModFlags->getNumOperands() == 0) {
     for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
       DstModFlags->addOperand(SrcModFlags->getOperand(I));
@@ -1064,89 +998,137 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
     return false;
   }
 
-  bool HasErr = false;
+  // First build a map of the existing module flags and requirements.
+  DenseMap<MDString*, MDNode*> Flags;
+  SmallSetVector<MDNode*, 16> Requirements;
+  for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
+    MDNode *Op = DstModFlags->getOperand(I);
+    ConstantInt *Behavior = cast<ConstantInt>(Op->getOperand(0));
+    MDString *ID = cast<MDString>(Op->getOperand(1));
 
-  // Otherwise, we have to merge them based on their behaviors. First,
-  // categorize all of the nodes in the modules' module flags. If an error or
-  // warning occurs, then emit the appropriate message(s).
-  DenseMap<MDString*, MDNode*> ErrorNode;
-  DenseMap<MDString*, MDNode*> WarningNode;
-  DenseMap<MDString*, MDNode*> OverrideNode;
-  DenseMap<MDString*, SmallSetVector<MDNode*, 8> > RequireNodes;
-  SmallSetVector<MDString*, 16> SeenIDs;
-
-  HasErr |= categorizeModuleFlagNodes(SrcModFlags, ErrorNode, WarningNode,
-                                      OverrideNode, RequireNodes, SeenIDs);
-  HasErr |= categorizeModuleFlagNodes(DstModFlags, ErrorNode, WarningNode,
-                                      OverrideNode, RequireNodes, SeenIDs);
-
-  // Check that there isn't both an error and warning node for a flag.
-  for (SmallSetVector<MDString*, 16>::iterator
-         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
-    MDString *ID = *I;
-    if (ErrorNode[ID] && WarningNode[ID])
-      HasErr = emitError("linking module flags '" + ID->getString() +
-                         "': IDs have conflicting behaviors");
+    if (Behavior->getZExtValue() == Module::Require) {
+      Requirements.insert(cast<MDNode>(Op->getOperand(2)));
+    } else {
+      Flags[ID] = Op;
+    }
   }
 
-  // Early exit if we had an error.
-  if (HasErr) return true;
-
-  // Get the destination's module flags ready for new operands.
-  DstModFlags->dropAllReferences();
-
-  // Add all of the module flags to the destination module.
-  DenseMap<MDString*, SmallVector<MDNode*, 4> > AddedNodes;
-  for (SmallSetVector<MDString*, 16>::iterator
-         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
-    MDString *ID = *I;
-    if (OverrideNode[ID]) {
-      DstModFlags->addOperand(OverrideNode[ID]);
-      AddedNodes[ID].push_back(OverrideNode[ID]);
-    } else if (ErrorNode[ID]) {
-      DstModFlags->addOperand(ErrorNode[ID]);
-      AddedNodes[ID].push_back(ErrorNode[ID]);
-    } else if (WarningNode[ID]) {
-      DstModFlags->addOperand(WarningNode[ID]);
-      AddedNodes[ID].push_back(WarningNode[ID]);
+  // Merge in the flags from the source module, and also collect its set of
+  // requirements.
+  bool HasErr = false;
+  for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
+    MDNode *SrcOp = SrcModFlags->getOperand(I);
+    ConstantInt *SrcBehavior = cast<ConstantInt>(SrcOp->getOperand(0));
+    MDString *ID = cast<MDString>(SrcOp->getOperand(1));
+    MDNode *DstOp = Flags.lookup(ID);
+    unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
+
+    // If this is a requirement, add it and continue.
+    if (SrcBehaviorValue == Module::Require) {
+      // If the destination module does not already have this requirement, add
+      // it.
+      if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
+        DstModFlags->addOperand(SrcOp);
+      }
+      continue;
+    }
+
+    // If there is no existing flag with this ID, just add it.
+    if (!DstOp) {
+      Flags[ID] = SrcOp;
+      DstModFlags->addOperand(SrcOp);
+      continue;
     }
 
-    for (SmallSetVector<MDNode*, 8>::iterator
-           II = RequireNodes[ID].begin(), IE = RequireNodes[ID].end();
-         II != IE; ++II)
-      DstModFlags->addOperand(*II);
-  }
+    // Otherwise, perform a merge.
+    ConstantInt *DstBehavior = cast<ConstantInt>(DstOp->getOperand(0));
+    unsigned DstBehaviorValue = DstBehavior->getZExtValue();
+
+    // If either flag has override behavior, handle it first.
+    if (DstBehaviorValue == Module::Override) {
+      // Diagnose inconsistent flags which both have override behavior.
+      if (SrcBehaviorValue == Module::Override &&
+          SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+        HasErr |= emitError("linking module flags '" + ID->getString() +
+                            "': IDs have conflicting override values");
+      }
+      continue;
+    } else if (SrcBehaviorValue == Module::Override) {
+      // Update the destination flag to that of the source.
+      DstOp->replaceOperandWith(0, SrcBehavior);
+      DstOp->replaceOperandWith(2, SrcOp->getOperand(2));
+      continue;
+    }
 
-  // Now check that all of the requirements have been satisfied.
-  for (SmallSetVector<MDString*, 16>::iterator
-         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
-    MDString *ID = *I;
-    SmallSetVector<MDNode*, 8> &Set = RequireNodes[ID];
-
-    for (SmallSetVector<MDNode*, 8>::iterator
-           II = Set.begin(), IE = Set.end(); II != IE; ++II) {
-      MDNode *Node = *II;
-      assert(isa<MDNode>(Node->getOperand(2)) &&
-             "Module flag's third operand must be an MDNode!");
-      MDNode *Val = cast<MDNode>(Node->getOperand(2));
-
-      MDString *ReqID = cast<MDString>(Val->getOperand(0));
-      Value *ReqVal = Val->getOperand(1);
-
-      bool HasValue = false;
-      for (SmallVectorImpl<MDNode*>::iterator
-             RI = AddedNodes[ReqID].begin(), RE = AddedNodes[ReqID].end();
-           RI != RE; ++RI) {
-        MDNode *ReqNode = *RI;
-        if (ReqNode->getOperand(2) == ReqVal) {
-          HasValue = true;
-          break;
-        }
+    // Diagnose inconsistent merge behavior types.
+    if (SrcBehaviorValue != DstBehaviorValue) {
+      HasErr |= emitError("linking module flags '" + ID->getString() +
+                          "': IDs have conflicting behaviors");
+      continue;
+    }
+
+    // Perform the merge for standard behavior types.
+    switch (SrcBehaviorValue) {
+    case Module::Require:
+    case Module::Override: assert(0 && "not possible"); break;
+    case Module::Error: {
+      // Emit an error if the values differ.
+      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+        HasErr |= emitError("linking module flags '" + ID->getString() +
+                            "': IDs have conflicting values");
+      }
+      continue;
+    }
+    case Module::Warning: {
+      // Emit a warning if the values differ.
+      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+        errs() << "WARNING: linking module flags '" << ID->getString()
+               << "': IDs have conflicting values";
       }
+      continue;
+    }
+    case Module::Append: {
+      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+      unsigned NumOps = DstValue->getNumOperands() + SrcValue->getNumOperands();
+      Value **VP, **Values = VP = new Value*[NumOps];
+      for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i, ++VP)
+        *VP = DstValue->getOperand(i);
+      for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i, ++VP)
+        *VP = SrcValue->getOperand(i);
+      DstOp->replaceOperandWith(2, MDNode::get(DstM->getContext(),
+                                               ArrayRef<Value*>(Values,
+                                                                NumOps)));
+      delete[] Values;
+      break;
+    }
+    case Module::AppendUnique: {
+      SmallSetVector<Value*, 16> Elts;
+      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+      for (unsigned i = 0, e = DstValue->getNumOperands(); i != e; ++i)
+        Elts.insert(DstValue->getOperand(i));
+      for (unsigned i = 0, e = SrcValue->getNumOperands(); i != e; ++i)
+        Elts.insert(SrcValue->getOperand(i));
+      DstOp->replaceOperandWith(2, MDNode::get(DstM->getContext(),
+                                               ArrayRef<Value*>(Elts.begin(),
+                                                                Elts.end())));
+      break;
+    }
+    }
+  }
 
-      if (!HasValue)
-        HasErr = emitError("linking module flags '" + ReqID->getString() +
-                           "': does not have the required value");
+  // Check all of the requirements.
+  for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
+    MDNode *Requirement = Requirements[I];
+    MDString *Flag = cast<MDString>(Requirement->getOperand(0));
+    Value *ReqValue = Requirement->getOperand(1);
+
+    MDNode *Op = Flags[Flag];
+    if (!Op || Op->getOperand(2) != ReqValue) {
+      HasErr |= emitError("linking module flags '" + Flag->getString() +
+                          "': does not have the required value");
+      continue;
     }
   }
 
@@ -1187,19 +1169,6 @@ bool ModuleLinker::run() {
                                SrcM->getModuleInlineAsm());
   }
 
-  // Update the destination module's dependent libraries list with the libraries
-  // from the source module. There's no opportunity for duplicates here as the
-  // Module ensures that duplicate insertions are discarded.
-  for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
-       SI != SE; ++SI)
-    DstM->addLibrary(*SI);
-  
-  // If the source library's module id is in the dependent library list of the
-  // destination library, remove it since that module is now linked in.
-  StringRef ModuleId = SrcM->getModuleIdentifier();
-  if (!ModuleId.empty())
-    DstM->removeLibrary(sys::path::stem(ModuleId));
-  
   // Loop over all of the linked values to compute type mappings.
   computeTypeMapping();
 
@@ -1323,7 +1292,7 @@ bool ModuleLinker::run() {
 //===----------------------------------------------------------------------===//
 
 /// LinkModules - This function links two modules together, with the resulting
-/// left module modified to be the composite of the two input modules.  If an
+/// Dest module modified to be the composite of the two input modules.  If an
 /// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
 /// the problem.  Upon failure, the Dest module could be in a modified state,
 /// and shouldn't be relied on to be consistent.
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index 7c6cf4f3dd78..74d24f278b77 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -12,9 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/Path.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
@@ -24,7 +23,6 @@ Linker::Linker(StringRef progname, StringRef modname,
                LLVMContext& C, unsigned flags):
   Context(C),
   Composite(new Module(modname, C)),
-  LibPaths(),
   Flags(flags),
   Error(),
   ProgramName(progname) { }
@@ -32,7 +30,6 @@ Linker::Linker(StringRef progname, StringRef modname,
 Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
   Context(aModule->getContext()),
   Composite(aModule),
-  LibPaths(),
   Flags(flags),
   Error(),
   ProgramName(progname) { }
@@ -63,119 +60,11 @@ Linker::verbose(StringRef message) {
     errs() << "  " << message << "\n";
 }
 
-void
-Linker::addPath(const sys::Path& path) {
-  LibPaths.push_back(path);
-}
-
-void
-Linker::addPaths(const std::vector<std::string>& paths) {
-  for (unsigned i = 0, e = paths.size(); i != e; ++i)
-    LibPaths.push_back(sys::Path(paths[i]));
-}
-
-void
-Linker::addSystemPaths() {
-  sys::Path::GetBitcodeLibraryPaths(LibPaths);
-  LibPaths.insert(LibPaths.begin(),sys::Path("./"));
-}
-
 Module*
 Linker::releaseModule() {
   Module* result = Composite;
-  LibPaths.clear();
   Error.clear();
   Composite = 0;
   Flags = 0;
   return result;
 }
-
-// LoadObject - Read in and parse the bitcode file named by FN and return the
-// module it contains (wrapped in an auto_ptr), or auto_ptr<Module>() and set
-// Error if an error occurs.
-std::auto_ptr<Module>
-Linker::LoadObject(const sys::Path &FN) {
-  std::string ParseErrorMessage;
-  Module *Result = 0;
-
-  OwningPtr<MemoryBuffer> Buffer;
-  if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer))
-    ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": "
-                      + ec.message();
-  else
-    Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
-
-  if (Result)
-    return std::auto_ptr<Module>(Result);
-  Error = "Bitcode file '" + FN.str() + "' could not be loaded";
-  if (ParseErrorMessage.size())
-    Error += ": " + ParseErrorMessage;
-  return std::auto_ptr<Module>();
-}
-
-// IsLibrary - Determine if "Name" is a library in "Directory". Return
-// a non-empty sys::Path if its found, an empty one otherwise.
-static inline sys::Path IsLibrary(StringRef Name,
-                                  const sys::Path &Directory) {
-
-  sys::Path FullPath(Directory);
-
-  // Try the libX.a form
-  FullPath.appendComponent(("lib" + Name).str());
-  FullPath.appendSuffix("a");
-  if (FullPath.isArchive())
-    return FullPath;
-
-  // Try the libX.bca form
-  FullPath.eraseSuffix();
-  FullPath.appendSuffix("bca");
-  if (FullPath.isArchive())
-    return FullPath;
-
-  // Try the libX.so (or .dylib) form
-  FullPath.eraseSuffix();
-  FullPath.appendSuffix(sys::Path::GetDLLSuffix());
-  if (FullPath.isDynamicLibrary())  // Native shared library?
-    return FullPath;
-  if (FullPath.isBitcodeFile())    // .so file containing bitcode?
-    return FullPath;
-
-  // Try libX form, to make it possible to add dependency on the
-  // specific version of .so, like liblzma.so.1.0.0
-  FullPath.eraseSuffix();
-  if (FullPath.isDynamicLibrary())  // Native shared library?
-    return FullPath;
-  if (FullPath.isBitcodeFile())    // .so file containing bitcode?
-    return FullPath;
-
-  // Not found .. fall through
-
-  // Indicate that the library was not found in the directory.
-  FullPath.clear();
-  return FullPath;
-}
-
-/// FindLib - Try to convert Filename into the name of a file that we can open,
-/// if it does not already name a file we can open, by first trying to open
-/// Filename, then libFilename.[suffix] for each of a set of several common
-/// library suffixes, in each of the directories in LibPaths. Returns an empty
-/// Path if no matching file can be found.
-///
-sys::Path
-Linker::FindLib(StringRef Filename) {
-  // Determine if the pathname can be found as it stands.
-  sys::Path FilePath(Filename);
-  if (FilePath.canRead() &&
-      (FilePath.isArchive() || FilePath.isDynamicLibrary()))
-    return FilePath;
-
-  // Iterate over the directories in Paths to see if we can find the library
-  // there.
-  for (unsigned Index = 0; Index != LibPaths.size(); ++Index) {
-    sys::Path Directory(LibPaths[Index]);
-    sys::Path FullPath = IsLibrary(Filename, Directory);
-    if (!FullPath.isEmpty())
-      return FullPath;
-  }
-  return sys::Path();
-}
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 99bff96bb971..db882c020b76 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -36,7 +36,6 @@ add_llvm_library(LLVMMC
   MCStreamer.cpp
   MCSubtargetInfo.cpp
   MCSymbol.cpp
-  MCTargetAsmLexer.cpp
   MCValue.cpp
   MCWin64EH.cpp
   MachObjectWriter.cpp
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index eda062376edc..3d995484e7c7 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCELF.h"
+#include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
@@ -29,9 +29,8 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ELF.h"
-
+#include "llvm/Support/ErrorHandling.h"
 #include <vector>
 using namespace llvm;
 
@@ -136,16 +135,14 @@ class ELFObjectWriter : public MCObjectWriter {
     const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
                                             const MCFixup &Fixup,
                                             bool IsPCRel) const {
-      return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup, IsPCRel);
+      return TargetObjectWriter->undefinedExplicitRelSym(Target, Fixup,
+                                                         IsPCRel);
     }
 
     bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
     bool hasRelocationAddend() const {
       return TargetObjectWriter->hasRelocationAddend();
     }
-    unsigned getEFlags() const {
-      return TargetObjectWriter->getEFlags();
-    }
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel, bool IsRelocWithSymbol,
                           int64_t Addend) const {
@@ -153,13 +150,12 @@ class ELFObjectWriter : public MCObjectWriter {
                                               IsRelocWithSymbol, Addend);
     }
 
-
   public:
     ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
                     raw_ostream &_OS, bool IsLittleEndian)
       : MCObjectWriter(_OS, IsLittleEndian),
         TargetObjectWriter(MOTW),
-        NeedsGOT(false), NeedsSymtabShndx(false){
+        NeedsGOT(false), NeedsSymtabShndx(false) {
     }
 
     virtual ~ELFObjectWriter();
@@ -204,7 +200,7 @@ class ELFObjectWriter : public MCObjectWriter {
     void String8(MCDataFragment &F, uint8_t Value) {
       char buf[1];
       buf[0] = Value;
-      F.getContents() += StringRef(buf, 1);
+      F.getContents().append(&buf[0], &buf[1]);
     }
 
     void String16(MCDataFragment &F, uint16_t Value) {
@@ -213,7 +209,7 @@ class ELFObjectWriter : public MCObjectWriter {
         StringLE16(buf, Value);
       else
         StringBE16(buf, Value);
-      F.getContents() += StringRef(buf, 2);
+      F.getContents().append(&buf[0], &buf[2]);
     }
 
     void String32(MCDataFragment &F, uint32_t Value) {
@@ -222,7 +218,7 @@ class ELFObjectWriter : public MCObjectWriter {
         StringLE32(buf, Value);
       else
         StringBE32(buf, Value);
-      F.getContents() += StringRef(buf, 4);
+      F.getContents().append(&buf[0], &buf[4]);
     }
 
     void String64(MCDataFragment &F, uint64_t Value) {
@@ -231,10 +227,11 @@ class ELFObjectWriter : public MCObjectWriter {
         StringLE64(buf, Value);
       else
         StringBE64(buf, Value);
-      F.getContents() += StringRef(buf, 8);
+      F.getContents().append(&buf[0], &buf[8]);
     }
 
-    void WriteHeader(uint64_t SectionDataSize,
+    void WriteHeader(const MCAssembler &Asm,
+                     uint64_t SectionDataSize,
                      unsigned NumberOfSections);
 
     void WriteSymbolEntry(MCDataFragment *SymtabF,
@@ -374,7 +371,8 @@ ELFObjectWriter::~ELFObjectWriter()
 {}
 
 // Emit the ELF header.
-void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
+void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
+                                  uint64_t SectionDataSize,
                                   unsigned NumberOfSections) {
   // ELF Header
   // ----------
@@ -412,7 +410,7 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
             sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
 
   // e_flags = whatever the target wants
-  Write32(getEFlags());
+  Write32(Asm.getELFHeaderEFlags());
 
   // e_ehsize = ELF header size
   Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
@@ -548,12 +546,17 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
   bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
     Data.getSymbol().isVariable();
 
+  // Binding and Type share the same byte as upper and lower nibbles
   uint8_t Binding = MCELF::GetBinding(OrigData);
-  uint8_t Visibility = MCELF::GetVisibility(OrigData);
   uint8_t Type = MCELF::GetType(Data);
-
   uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
-  uint8_t Other = Visibility;
+
+  // Other and Visibility share the same byte with Visability using the lower
+  // 2 bits
+  uint8_t Visibility = MCELF::GetVisibility(OrigData);
+  uint8_t Other = MCELF::getOther(OrigData) <<
+    (ELF_Other_Shift - ELF_STV_Shift);
+  Other |= Visibility;
 
   uint64_t Value = SymbolValue(Data, Layout);
   uint64_t Size = 0;
@@ -866,7 +869,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
   // FIXME: Is this the correct place to do this?
   // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
   if (NeedsGOT) {
-    llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+    StringRef Name = "_GLOBAL_OFFSET_TABLE_";
     MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
     MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
     Data.setExternal(true);
@@ -975,7 +978,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
   for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
     UndefinedSymbolData[i].SymbolData->setIndex(Index++);
 
-  if (NumRegularSections > ELF::SHN_LORESERVE)
+  if (Index >= ELF::SHN_LORESERVE)
     NeedsSymtabShndx = true;
 }
 
@@ -1187,7 +1190,7 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
   // The first entry of a string table holds a null character so skip
   // section 0.
   uint64_t Index = 1;
-  F->getContents() += '\x00';
+  F->getContents().push_back('\x00');
 
   for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
     const MCSectionELF &Section = *Sections[I];
@@ -1205,8 +1208,8 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
     SectionStringTableIndex[&Section] = Index;
 
     Index += Name.size() + 1;
-    F->getContents() += Name;
-    F->getContents() += '\x00';
+    F->getContents().append(Name.begin(), Name.end());
+    F->getContents().push_back('\x00');
   }
 }
 
@@ -1320,6 +1323,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
   case ELF::SHT_FINI_ARRAY:
   case ELF::SHT_PREINIT_ARRAY:
   case ELF::SHT_X86_64_UNWIND:
+  case ELF::SHT_MIPS_REGINFO:
+  case ELF::SHT_MIPS_OPTIONS:
     // Nothing to do.
     break;
 
@@ -1333,6 +1338,24 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
     break;
   }
 
+  if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
+      Section.getType() == ELF::SHT_ARM_EXIDX) {
+    StringRef SecName(Section.getSectionName());
+    if (SecName == ".ARM.exidx") {
+      sh_link = SectionIndexMap.lookup(
+        Asm.getContext().getELFSection(".text",
+                                       ELF::SHT_PROGBITS,
+                                       ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+                                       SectionKind::getText()));
+    } else if (SecName.startswith(".ARM.exidx")) {
+      sh_link = SectionIndexMap.lookup(
+        Asm.getContext().getELFSection(SecName.substr(sizeof(".ARM.exidx") - 1),
+                                       ELF::SHT_PROGBITS,
+                                       ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+                                       SectionKind::getText()));
+    }
+  }
+
   WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
                    Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
                    Alignment, Section.getEntrySize());
@@ -1381,7 +1404,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
          ++i) {
       const MCFragment &F = *i;
       assert(F.getKind() == MCFragment::FT_Data);
-      WriteBytes(cast<MCDataFragment>(F).getContents().str());
+      WriteBytes(cast<MCDataFragment>(F).getContents());
     }
   } else {
     Asm.writeSectionData(&SD, Layout);
@@ -1533,7 +1556,7 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
   }
 
   // Write out the ELF header ...
-  WriteHeader(SectionHeaderOffset, NumSections + 1);
+  WriteHeader(Asm, SectionHeaderOffset, NumSections + 1);
 
   // ... then the regular sections ...
   // + because of .shstrtab
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 7ea0f3b85a53..51bb4357102e 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -24,6 +24,8 @@ using namespace llvm;
 
 MCAsmInfo::MCAsmInfo() {
   PointerSize = 4;
+  CalleeSaveStackSlotSize = 4;
+
   IsLittleEndian = true;
   StackGrowsUp = false;
   HasSubsectionsViaSymbols = false;
@@ -37,6 +39,7 @@ MCAsmInfo::MCAsmInfo() {
   CommentColumn = 40;
   CommentString = "#";
   LabelSuffix = ":";
+  DebugLabelSuffix = ":";
   GlobalPrefix = "";
   PrivateGlobalPrefix = ".";
   LinkerPrivateGlobalPrefix = "";
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 17a6323d0e76..35613b411c24 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -8,6 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@@ -20,15 +25,10 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PathV2.h"
 #include <cctype>
 using namespace llvm;
@@ -71,7 +71,7 @@ public:
                 MCInstPrinter *printer, MCCodeEmitter *emitter,
                 MCAsmBackend *asmbackend,
                 bool showInst)
-    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+    : MCStreamer(SK_AsmStreamer, Context), OS(os), MAI(Context.getAsmInfo()),
       InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
       CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
       ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI),
@@ -127,17 +127,25 @@ public:
   virtual void ChangeSection(const MCSection *Section);
 
   virtual void InitSections() {
+    InitToTextSection();
+  }
+
+  virtual void InitToTextSection() {
     // FIXME, this is MachO specific, but the testsuite
     // expects this.
-    SwitchSection(getContext().getMachOSection("__TEXT", "__text",
-                         MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                         0, SectionKind::getText()));
+    SwitchSection(getContext().getMachOSection(
+                                      "__TEXT", "__text",
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      0, SectionKind::getText()));
   }
 
   virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitDebugLabel(MCSymbol *Symbol);
+
   virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
                                    MCSymbol *EHSymbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitLinkerOptions(ArrayRef<std::string> Options);
   virtual void EmitDataRegion(MCDataRegionType Kind);
   virtual void EmitThumbFunc(MCSymbol *Func);
 
@@ -207,7 +215,7 @@ public:
 
   virtual void EmitFileDirective(StringRef Filename);
   virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                      StringRef Filename);
+                                      StringRef Filename, unsigned CUID = 0);
   virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                      unsigned Column, unsigned Flags,
                                      unsigned Isa, unsigned Discriminator,
@@ -226,6 +234,8 @@ public:
   virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
   virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
   virtual void EmitCFISignalFrame();
+  virtual void EmitCFIUndefined(int64_t Register);
+  virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
 
   virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
   virtual void EmitWin64EHEndProc();
@@ -255,6 +265,10 @@ public:
 
   virtual void EmitInstruction(const MCInst &Inst);
 
+  virtual void EmitBundleAlignMode(unsigned AlignPow2);
+  virtual void EmitBundleLock(bool AlignToEnd);
+  virtual void EmitBundleUnlock();
+
   /// EmitRawText - If this file is backed by an assembly streamer, this dumps
   /// the specified string in the output .s file.  This capability is
   /// indicated by the hasRawTextSupport() predicate.
@@ -263,6 +277,10 @@ public:
   virtual void FinishImpl();
 
   /// @}
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_AsmStreamer;
+  }
 };
 
 } // end anonymous namespace.
@@ -343,6 +361,14 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  MCStreamer::EmitDebugLabel(Symbol);
+
+  OS << *Symbol << MAI.getDebugLabelSuffix();
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   switch (Flag) {
   case MCAF_SyntaxUnified:         OS << "\t.syntax unified"; break;
@@ -354,6 +380,16 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitLinkerOptions(ArrayRef<std::string> Options) {
+  assert(!Options.empty() && "At least one option is required!");
+  OS << "\t.linker_option \"" << Options[0] << '"';
+  for (ArrayRef<std::string>::iterator it = Options.begin() + 1,
+         ie = Options.end(); it != ie; ++it) {
+    OS << ", " << '"' << *it << '"';
+  }
+  OS << "\n";
+}
+
 void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) {
   MCContext &Ctx = getContext();
   const MCAsmInfo &MAI = Ctx.getAsmInfo();
@@ -792,14 +828,14 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
 }
 
 bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                           StringRef Filename) {
+                                           StringRef Filename, unsigned CUID) {
   if (!UseDwarfDirectory && !Directory.empty()) {
     if (sys::path::is_absolute(Filename))
-      return EmitDwarfFileDirective(FileNo, "", Filename);
+      return EmitDwarfFileDirective(FileNo, "", Filename, CUID);
 
     SmallString<128> FullPathName = Directory;
     sys::path::append(FullPathName, Filename);
-    return EmitDwarfFileDirective(FileNo, "", FullPathName);
+    return EmitDwarfFileDirective(FileNo, "", FullPathName, CUID);
   }
 
   if (UseLoc) {
@@ -810,8 +846,11 @@ bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
     }
     PrintQuotedString(Filename, OS);
     EmitEOL();
+    // All .file will belong to a single CUID.
+    CUID = 0;
   }
-  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename,
+                                                  CUID);
 }
 
 void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -1036,6 +1075,26 @@ void MCAsmStreamer::EmitCFISignalFrame() {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCFIUndefined(int64_t Register) {
+  MCStreamer::EmitCFIUndefined(Register);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_undefined " << Register;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
+  MCStreamer::EmitCFIRegister(Register1, Register2);
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cfi_register " << Register1 << ", " << Register2;
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
   MCStreamer::EmitWin64EHStartProc(Symbol);
 
@@ -1329,6 +1388,23 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
+  OS << "\t.bundle_align_mode " << AlignPow2;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleLock(bool AlignToEnd) {
+  OS << "\t.bundle_lock";
+  if (AlignToEnd)
+    OS << " align_to_end";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleUnlock() {
+  OS << "\t.bundle_unlock";
+  EmitEOL();
+}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 726ec5aba512..1829266f96cb 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -9,31 +9,43 @@
 
 #define DEBUG_TYPE "assembler"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 namespace {
 namespace stats {
-STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
+STATISTIC(EmittedFragments, "Number of emitted assembler fragments - total");
+STATISTIC(EmittedRelaxableFragments,
+          "Number of emitted assembler fragments - relaxable");
+STATISTIC(EmittedDataFragments,
+          "Number of emitted assembler fragments - data");
+STATISTIC(EmittedCompactEncodedInstFragments,
+          "Number of emitted assembler fragments - compact encoded inst");
+STATISTIC(EmittedAlignFragments,
+          "Number of emitted assembler fragments - align");
+STATISTIC(EmittedFillFragments,
+          "Number of emitted assembler fragments - fill");
+STATISTIC(EmittedOrgFragments,
+          "Number of emitted assembler fragments - org");
 STATISTIC(evaluateFixup, "Number of evaluated fixups");
 STATISTIC(FragmentLayouts, "Number of fragment layouts");
 STATISTIC(ObjectBytes, "Number of emitted object file bytes");
@@ -61,7 +73,7 @@ MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
       SectionOrder.push_back(&*it);
 }
 
-bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
+bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
   const MCSectionData &SD = *F->getParent();
   const MCFragment *LastValid = LastValidFragment.lookup(&SD);
   if (!LastValid)
@@ -70,17 +82,18 @@ bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
   return F->getLayoutOrder() <= LastValid->getLayoutOrder();
 }
 
-void MCAsmLayout::Invalidate(MCFragment *F) {
-  // If this fragment wasn't already up-to-date, we don't need to do anything.
-  if (!isFragmentUpToDate(F))
+void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
+  // If this fragment wasn't already valid, we don't need to do anything.
+  if (!isFragmentValid(F))
     return;
 
-  // Otherwise, reset the last valid fragment to this fragment.
+  // Otherwise, reset the last valid fragment to the previous fragment
+  // (if this is the first fragment, it will be NULL).
   const MCSectionData &SD = *F->getParent();
-  LastValidFragment[&SD] = F;
+  LastValidFragment[&SD] = F->getPrevNode();
 }
 
-void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+void MCAsmLayout::ensureValid(const MCFragment *F) const {
   MCSectionData &SD = *F->getParent();
 
   MCFragment *Cur = LastValidFragment[&SD];
@@ -89,15 +102,16 @@ void MCAsmLayout::EnsureValid(const MCFragment *F) const {
   else
     Cur = Cur->getNextNode();
 
-  // Advance the layout position until the fragment is up-to-date.
-  while (!isFragmentUpToDate(F)) {
-    const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
+  // Advance the layout position until the fragment is valid.
+  while (!isFragmentValid(F)) {
+    assert(Cur && "Layout bookkeeping error");
+    const_cast<MCAsmLayout*>(this)->layoutFragment(Cur);
     Cur = Cur->getNextNode();
   }
 }
 
 uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
-  EnsureValid(F);
+  ensureValid(F);
   assert(F->Offset != ~UINT64_C(0) && "Address not set!");
   return F->Offset;
 }
@@ -149,6 +163,46 @@ uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
   return getSectionAddressSize(SD);
 }
 
+uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F,
+                                           uint64_t FOffset, uint64_t FSize) {
+  uint64_t BundleSize = Assembler.getBundleAlignSize();
+  assert(BundleSize > 0 &&
+         "computeBundlePadding should only be called if bundling is enabled");
+  uint64_t BundleMask = BundleSize - 1;
+  uint64_t OffsetInBundle = FOffset & BundleMask;
+  uint64_t EndOfFragment = OffsetInBundle + FSize;
+
+  // There are two kinds of bundling restrictions:
+  //
+  // 1) For alignToBundleEnd(), add padding to ensure that the fragment will
+  //    *end* on a bundle boundary.
+  // 2) Otherwise, check if the fragment would cross a bundle boundary. If it
+  //    would, add padding until the end of the bundle so that the fragment
+  //    will start in a new one.
+  if (F->alignToBundleEnd()) {
+    // Three possibilities here:
+    //
+    // A) The fragment just happens to end at a bundle boundary, so we're good.
+    // B) The fragment ends before the current bundle boundary: pad it just
+    //    enough to reach the boundary.
+    // C) The fragment ends after the current bundle boundary: pad it until it
+    //    reaches the end of the next bundle boundary.
+    //
+    // Note: this code could be made shorter with some modulo trickery, but it's
+    // intentionally kept in its more explicit form for simplicity.
+    if (EndOfFragment == BundleSize)
+      return 0;
+    else if (EndOfFragment < BundleSize)
+      return BundleSize - EndOfFragment;
+    else { // EndOfFragment > BundleSize
+      return 2 * BundleSize - EndOfFragment;
+    }
+  } else if (EndOfFragment > BundleSize)
+    return BundleSize - OffsetInBundle;
+  else
+    return 0;
+}
+
 /* *** */
 
 MCFragment::MCFragment() : Kind(FragmentType(~0)) {
@@ -166,12 +220,23 @@ MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
 
 /* *** */
 
+MCEncodedFragment::~MCEncodedFragment() {
+}
+
+/* *** */
+
+MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
+}
+
+/* *** */
+
 MCSectionData::MCSectionData() : Section(0) {}
 
 MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
   : Section(&_Section),
     Ordinal(~UINT32_C(0)),
     Alignment(1),
+    BundleLockState(NotBundleLocked), BundleGroupBeforeFirstInst(false),
     HasInstructions(false)
 {
   if (A)
@@ -199,12 +264,32 @@ MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
                          MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
                          raw_ostream &OS_)
   : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
-    OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false) {
+    OS(OS_), BundleAlignSize(0), RelaxAll(false), NoExecStack(false),
+    SubsectionsViaSymbols(false), ELFHeaderEFlags(0) {
 }
 
 MCAssembler::~MCAssembler() {
 }
 
+void MCAssembler::reset() {
+  Sections.clear();
+  Symbols.clear();
+  SectionMap.clear();
+  SymbolMap.clear();
+  IndirectSymbols.clear();
+  DataRegions.clear();
+  ThumbFuncs.clear();
+  RelaxAll = false;
+  NoExecStack = false;
+  SubsectionsViaSymbols = false;
+  ELFHeaderEFlags = 0;
+
+  // reset objects owned by us
+  getBackend().reset();
+  getEmitter().reset();
+  getWriter().reset();
+}
+
 bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
   // Non-temporary labels should always be visible to the linker.
   if (!Symbol.isTemporary())
@@ -311,11 +396,11 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
                                           const MCFragment &F) const {
   switch (F.getKind()) {
   case MCFragment::FT_Data:
-    return cast<MCDataFragment>(F).getContents().size();
+  case MCFragment::FT_Relaxable:
+  case MCFragment::FT_CompactEncodedInst:
+    return cast<MCEncodedFragment>(F).getContents().size();
   case MCFragment::FT_Fill:
     return cast<MCFillFragment>(F).getSize();
-  case MCFragment::FT_Inst:
-    return cast<MCInstFragment>(F).getInstSize();
 
   case MCFragment::FT_LEB:
     return cast<MCLEBFragment>(F).getContents().size();
@@ -336,7 +421,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
   }
 
   case MCFragment::FT_Org: {
-    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+    const MCOrgFragment &OF = cast<MCOrgFragment>(F);
     int64_t TargetLocation;
     if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
       report_fatal_error("expected assembly-time absolute expression");
@@ -359,41 +444,112 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
   llvm_unreachable("invalid fragment kind");
 }
 
-void MCAsmLayout::LayoutFragment(MCFragment *F) {
+void MCAsmLayout::layoutFragment(MCFragment *F) {
   MCFragment *Prev = F->getPrevNode();
 
-  // We should never try to recompute something which is up-to-date.
-  assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
-  // We should never try to compute the fragment layout if it's predecessor
-  // isn't up-to-date.
-  assert((!Prev || isFragmentUpToDate(Prev)) &&
-         "Attempt to compute fragment before it's predecessor!");
+  // We should never try to recompute something which is valid.
+  assert(!isFragmentValid(F) && "Attempt to recompute a valid fragment!");
+  // We should never try to compute the fragment layout if its predecessor
+  // isn't valid.
+  assert((!Prev || isFragmentValid(Prev)) &&
+         "Attempt to compute fragment before its predecessor!");
 
   ++stats::FragmentLayouts;
 
   // Compute fragment offset and size.
-  uint64_t Offset = 0;
   if (Prev)
-    Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
-
-  F->Offset = Offset;
+    F->Offset = Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
+  else
+    F->Offset = 0;
   LastValidFragment[F->getParent()] = F;
+
+  // If bundling is enabled and this fragment has instructions in it, it has to
+  // obey the bundling restrictions. With padding, we'll have:
+  //
+  //
+  //        BundlePadding
+  //             |||
+  // -------------------------------------
+  //   Prev  |##########|       F        |
+  // -------------------------------------
+  //                    ^
+  //                    |
+  //                    F->Offset
+  //
+  // The fragment's offset will point to after the padding, and its computed
+  // size won't include the padding.
+  //
+  if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
+    assert(isa<MCEncodedFragment>(F) &&
+           "Only MCEncodedFragment implementations have instructions");
+    uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
+
+    if (FSize > Assembler.getBundleAlignSize())
+      report_fatal_error("Fragment can't be larger than a bundle size");
+
+    uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize);
+    if (RequiredBundlePadding > UINT8_MAX)
+      report_fatal_error("Padding cannot exceed 255 bytes");
+    F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
+    F->Offset += RequiredBundlePadding;
+  }
 }
 
-/// WriteFragmentData - Write the \p F data to the output file.
-static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                              const MCFragment &F) {
+/// \brief Write the contents of a fragment to the given object writer. Expects
+///        a MCEncodedFragment.
+static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
+  const MCEncodedFragment &EF = cast<MCEncodedFragment>(F);
+  OW->WriteBytes(EF.getContents());
+}
+
+/// \brief Write the fragment \p F to the output file.
+static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                          const MCFragment &F) {
   MCObjectWriter *OW = &Asm.getWriter();
+
+  // FIXME: Embed in fragments instead?
+  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
+
+  // Should NOP padding be written out before this fragment?
+  unsigned BundlePadding = F.getBundlePadding();
+  if (BundlePadding > 0) {
+    assert(Asm.isBundlingEnabled() &&
+           "Writing bundle padding with disabled bundling");
+    assert(F.hasInstructions() &&
+           "Writing bundle padding for a fragment without instructions");
+
+    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize);
+    if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) {
+      // If the padding itself crosses a bundle boundary, it must be emitted
+      // in 2 pieces, since even nop instructions must not cross boundaries.
+      //             v--------------v   <- BundleAlignSize
+      //        v---------v             <- BundlePadding
+      // ----------------------------
+      // | Prev |####|####|    F    |
+      // ----------------------------
+      //        ^-------------------^   <- TotalLength
+      unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize();
+      if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW))
+          report_fatal_error("unable to write NOP sequence of " +
+                             Twine(DistanceToBoundary) + " bytes");
+      BundlePadding -= DistanceToBoundary;
+    }
+    if (!Asm.getBackend().writeNopData(BundlePadding, OW))
+      report_fatal_error("unable to write NOP sequence of " +
+                         Twine(BundlePadding) + " bytes");
+  }
+
+  // This variable (and its dummy usage) is to participate in the assert at
+  // the end of the function.
   uint64_t Start = OW->getStream().tell();
   (void) Start;
 
   ++stats::EmittedFragments;
 
-  // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
   switch (F.getKind()) {
   case MCFragment::FT_Align: {
-    MCAlignFragment &AF = cast<MCAlignFragment>(F);
+    ++stats::EmittedAlignFragments;
+    const MCAlignFragment &AF = cast<MCAlignFragment>(F);
     uint64_t Count = FragmentSize / AF.getValueSize();
 
     assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
@@ -431,15 +587,24 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     break;
   }
 
-  case MCFragment::FT_Data: {
-    MCDataFragment &DF = cast<MCDataFragment>(F);
-    assert(FragmentSize == DF.getContents().size() && "Invalid size!");
-    OW->WriteBytes(DF.getContents().str());
+  case MCFragment::FT_Data: 
+    ++stats::EmittedDataFragments;
+    writeFragmentContents(F, OW);
+    break;
+
+  case MCFragment::FT_Relaxable:
+    ++stats::EmittedRelaxableFragments;
+    writeFragmentContents(F, OW);
+    break;
+
+  case MCFragment::FT_CompactEncodedInst:
+    ++stats::EmittedCompactEncodedInstFragments;
+    writeFragmentContents(F, OW);
     break;
-  }
 
   case MCFragment::FT_Fill: {
-    MCFillFragment &FF = cast<MCFillFragment>(F);
+    ++stats::EmittedFillFragments;
+    const MCFillFragment &FF = cast<MCFillFragment>(F);
 
     assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
 
@@ -455,20 +620,15 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     break;
   }
 
-  case MCFragment::FT_Inst: {
-    MCInstFragment &IF = cast<MCInstFragment>(F);
-    OW->WriteBytes(StringRef(IF.getCode().begin(), IF.getCode().size()));
-    break;
-  }
-
   case MCFragment::FT_LEB: {
-    MCLEBFragment &LF = cast<MCLEBFragment>(F);
+    const MCLEBFragment &LF = cast<MCLEBFragment>(F);
     OW->WriteBytes(LF.getContents().str());
     break;
   }
 
   case MCFragment::FT_Org: {
-    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+    ++stats::EmittedOrgFragments;
+    const MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
     for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
       OW->Write8(uint8_t(OF.getValue()));
@@ -488,7 +648,8 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
   }
   }
 
-  assert(OW->getStream().tell() - Start == FragmentSize);
+  assert(OW->getStream().tell() - Start == FragmentSize &&
+         "The stream should advance by fragment size");
 }
 
 void MCAssembler::writeSectionData(const MCSectionData *SD,
@@ -506,7 +667,7 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
         // Check that we aren't trying to write a non-zero contents (or fixups)
         // into a virtual section. This is to support clients which use standard
         // directives to fill the contents of virtual sections.
-        MCDataFragment &DF = cast<MCDataFragment>(*it);
+        const MCDataFragment &DF = cast<MCDataFragment>(*it);
         assert(DF.fixup_begin() == DF.fixup_end() &&
                "Cannot have fixups in virtual section!");
         for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
@@ -534,9 +695,9 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
   uint64_t Start = getWriter().getStream().tell();
   (void)Start;
 
-  for (MCSectionData::const_iterator it = SD->begin(),
-         ie = SD->end(); it != ie; ++it)
-    WriteFragmentData(*this, Layout, *it);
+  for (MCSectionData::const_iterator it = SD->begin(), ie = SD->end();
+       it != ie; ++it)
+    writeFragment(*this, Layout, *it);
 
   assert(getWriter().getStream().tell() - Start ==
          Layout.getSectionAddressSize(SD));
@@ -583,9 +744,9 @@ void MCAssembler::Finish() {
     SD->setLayoutOrder(i);
 
     unsigned FragmentIndex = 0;
-    for (MCSectionData::iterator it2 = SD->begin(),
-           ie2 = SD->end(); it2 != ie2; ++it2)
-      it2->setLayoutOrder(FragmentIndex++);
+    for (MCSectionData::iterator iFrag = SD->begin(), iFragEnd = SD->end();
+         iFrag != iFragEnd; ++iFrag)
+      iFrag->setLayoutOrder(FragmentIndex++);
   }
 
   // Layout until everything fits.
@@ -613,24 +774,15 @@ void MCAssembler::Finish() {
   for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
     for (MCSectionData::iterator it2 = it->begin(),
            ie2 = it->end(); it2 != ie2; ++it2) {
-      MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
-      if (DF) {
-        for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
-               ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+      MCEncodedFragmentWithFixups *F =
+        dyn_cast<MCEncodedFragmentWithFixups>(it2);
+      if (F) {
+        for (MCEncodedFragmentWithFixups::fixup_iterator it3 = F->fixup_begin(),
+             ie3 = F->fixup_end(); it3 != ie3; ++it3) {
           MCFixup &Fixup = *it3;
-          uint64_t FixedValue = handleFixup(Layout, *DF, Fixup);
-          getBackend().applyFixup(Fixup, DF->getContents().data(),
-                                  DF->getContents().size(), FixedValue);
-        }
-      }
-      MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
-      if (IF) {
-        for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
-               ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
-          MCFixup &Fixup = *it3;
-          uint64_t FixedValue = handleFixup(Layout, *IF, Fixup);
-          getBackend().applyFixup(Fixup, IF->getCode().data(),
-                                  IF->getCode().size(), FixedValue);
+          uint64_t FixedValue = handleFixup(Layout, *F, Fixup);
+          getBackend().applyFixup(Fixup, F->getContents().data(),
+                                  F->getContents().size(), FixedValue);
         }
       }
     }
@@ -643,11 +795,8 @@ void MCAssembler::Finish() {
 }
 
 bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
-                                       const MCInstFragment *DF,
+                                       const MCRelaxableFragment *DF,
                                        const MCAsmLayout &Layout) const {
-  if (getRelaxAll())
-    return true;
-
   // If we cannot resolve the fixup value, it requires relaxation.
   MCValue Target;
   uint64_t Value;
@@ -657,25 +806,25 @@ bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
   return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
 }
 
-bool MCAssembler::fragmentNeedsRelaxation(const MCInstFragment *IF,
+bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F,
                                           const MCAsmLayout &Layout) const {
   // If this inst doesn't ever need relaxation, ignore it. This occurs when we
   // are intentionally pushing out inst fragments, or because we relaxed a
   // previous instruction to one that doesn't need relaxation.
-  if (!getBackend().mayNeedRelaxation(IF->getInst()))
+  if (!getBackend().mayNeedRelaxation(F->getInst()))
     return false;
 
-  for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
-         ie = IF->fixup_end(); it != ie; ++it)
-    if (fixupNeedsRelaxation(*it, IF, Layout))
+  for (MCRelaxableFragment::const_fixup_iterator it = F->fixup_begin(),
+       ie = F->fixup_end(); it != ie; ++it)
+    if (fixupNeedsRelaxation(*it, F, Layout))
       return true;
 
   return false;
 }
 
 bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
-                                   MCInstFragment &IF) {
-  if (!fragmentNeedsRelaxation(&IF, Layout))
+                                   MCRelaxableFragment &F) {
+  if (!fragmentNeedsRelaxation(&F, Layout))
     return false;
 
   ++stats::RelaxedInstructions;
@@ -686,7 +835,7 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
   // Relax the fragment.
 
   MCInst Relaxed;
-  getBackend().relaxInstruction(IF.getInst(), Relaxed);
+  getBackend().relaxInstruction(F.getInst(), Relaxed);
 
   // Encode the new instruction.
   //
@@ -698,13 +847,10 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
   getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
   VecOS.flush();
 
-  // Update the instruction fragment.
-  IF.setInst(Relaxed);
-  IF.getCode() = Code;
-  IF.getFixups().clear();
-  // FIXME: Eliminate copy.
-  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
-    IF.getFixups().push_back(Fixups[i]);
+  // Update the fragment.
+  F.setInst(Relaxed);
+  F.getContents() = Code;
+  F.getFixups() = Fixups;
 
   return true;
 }
@@ -758,39 +904,43 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
   return OldSize != Data.size();
 }
 
-bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout,
-                                    MCSectionData &SD) {
-  MCFragment *FirstInvalidFragment = NULL;
-  // Scan for fragments that need relaxation.
-  for (MCSectionData::iterator it2 = SD.begin(),
-         ie2 = SD.end(); it2 != ie2; ++it2) {
-    // Check if this is an fragment that needs relaxation.
-    bool relaxedFrag = false;
-    switch(it2->getKind()) {
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) {
+  // Holds the first fragment which needed relaxing during this layout. It will
+  // remain NULL if none were relaxed.
+  // When a fragment is relaxed, all the fragments following it should get
+  // invalidated because their offset is going to change.
+  MCFragment *FirstRelaxedFragment = NULL;
+
+  // Attempt to relax all the fragments in the section.
+  for (MCSectionData::iterator I = SD.begin(), IE = SD.end(); I != IE; ++I) {
+    // Check if this is a fragment that needs relaxation.
+    bool RelaxedFrag = false;
+    switch(I->getKind()) {
     default:
-          break;
-    case MCFragment::FT_Inst:
-      relaxedFrag = relaxInstruction(Layout, *cast<MCInstFragment>(it2));
+      break;
+    case MCFragment::FT_Relaxable:
+      assert(!getRelaxAll() &&
+             "Did not expect a MCRelaxableFragment in RelaxAll mode");
+      RelaxedFrag = relaxInstruction(Layout, *cast<MCRelaxableFragment>(I));
       break;
     case MCFragment::FT_Dwarf:
-      relaxedFrag = relaxDwarfLineAddr(Layout,
-                                       *cast<MCDwarfLineAddrFragment>(it2));
+      RelaxedFrag = relaxDwarfLineAddr(Layout,
+                                       *cast<MCDwarfLineAddrFragment>(I));
       break;
     case MCFragment::FT_DwarfFrame:
-      relaxedFrag =
+      RelaxedFrag =
         relaxDwarfCallFrameFragment(Layout,
-                                    *cast<MCDwarfCallFrameFragment>(it2));
+                                    *cast<MCDwarfCallFrameFragment>(I));
       break;
     case MCFragment::FT_LEB:
-      relaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(it2));
+      RelaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(I));
       break;
     }
-    // Update the layout, and remember that we relaxed.
-    if (relaxedFrag && !FirstInvalidFragment)
-      FirstInvalidFragment = it2;
+    if (RelaxedFrag && !FirstRelaxedFragment)
+      FirstRelaxedFragment = I;
   }
-  if (FirstInvalidFragment) {
-    Layout.Invalidate(FirstInvalidFragment);
+  if (FirstRelaxedFragment) {
+    Layout.invalidateFragmentsFrom(FirstRelaxedFragment);
     return true;
   }
   return false;
@@ -802,7 +952,7 @@ bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
   bool WasRelaxed = false;
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     MCSectionData &SD = *it;
-    while(layoutSectionOnce(Layout, SD))
+    while (layoutSectionOnce(Layout, SD))
       WasRelaxed = true;
   }
 
@@ -837,8 +987,10 @@ void MCFragment::dump() {
   switch (getKind()) {
   case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
   case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
+  case MCFragment::FT_CompactEncodedInst:
+    OS << "MCCompactEncodedInstFragment"; break;
   case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
-  case MCFragment::FT_Inst:  OS << "MCInstFragment"; break;
+  case MCFragment::FT_Relaxable:  OS << "MCRelaxableFragment"; break;
   case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
   case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
   case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
@@ -846,7 +998,9 @@ void MCFragment::dump() {
   }
 
   OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
-     << " Offset:" << Offset << ">";
+     << " Offset:" << Offset
+     << " HasInstructions:" << hasInstructions() 
+     << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
 
   switch (getKind()) {
   case MCFragment::FT_Align: {
@@ -870,7 +1024,7 @@ void MCFragment::dump() {
     }
     OS << "] (" << Contents.size() << " bytes)";
 
-    if (!DF->getFixups().empty()) {
+    if (DF->fixup_begin() != DF->fixup_end()) {
       OS << ",\n       ";
       OS << " Fixups:[";
       for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
@@ -882,17 +1036,30 @@ void MCFragment::dump() {
     }
     break;
   }
+  case MCFragment::FT_CompactEncodedInst: {
+    const MCCompactEncodedInstFragment *CEIF =
+      cast<MCCompactEncodedInstFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = CEIF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+    }
+    OS << "] (" << Contents.size() << " bytes)";
+    break;
+  }
   case MCFragment::FT_Fill:  {
     const MCFillFragment *FF = cast<MCFillFragment>(this);
     OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
        << " Size:" << FF->getSize();
     break;
   }
-  case MCFragment::FT_Inst:  {
-    const MCInstFragment *IF = cast<MCInstFragment>(this);
+  case MCFragment::FT_Relaxable:  {
+    const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
     OS << "\n       ";
     OS << " Inst:";
-    IF->getInst().dump_pretty(OS);
+    F->getInst().dump_pretty(OS);
     break;
   }
   case MCFragment::FT_Org:  {
@@ -928,7 +1095,8 @@ void MCSectionData::dump() {
   raw_ostream &OS = llvm::errs();
 
   OS << "<MCSectionData";
-  OS << " Alignment:" << getAlignment() << " Fragments:[\n      ";
+  OS << " Alignment:" << getAlignment()
+     << " Fragments:[\n      ";
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     if (it != begin()) OS << ",\n      ";
     it->dump();
@@ -973,8 +1141,11 @@ void MCAssembler::dump() {
 #endif
 
 // anchors for MC*Fragment vtables
+void MCEncodedFragment::anchor() { }
+void MCEncodedFragmentWithFixups::anchor() { }
 void MCDataFragment::anchor() { }
-void MCInstFragment::anchor() { }
+void MCCompactEncodedInstFragment::anchor() { }
+void MCRelaxableFragment::anchor() { }
 void MCAlignFragment::anchor() { }
 void MCFillFragment::anchor() { }
 void MCOrgFragment::anchor() { }
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 477bd17c0d57..9adcc02b71a4 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -8,21 +8,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCLabel.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCLabel.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 using namespace llvm;
 
 typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
@@ -31,12 +32,16 @@ typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
 
 
 MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
-                     const MCObjectFileInfo *mofi, const SourceMgr *mgr) :
+                     const MCObjectFileInfo *mofi, const SourceMgr *mgr,
+                     bool DoAutoReset) :
   SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi),
   Allocator(), Symbols(Allocator), UsedNames(Allocator),
   NextUniqueID(0),
-  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
-  AllowTemporaryLabels(true) {
+  CompilationDir(llvm::sys::Path::GetCurrentDirectory().str()),
+  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), 
+  DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0),
+  AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) {
+
   MachOUniquingMap = 0;
   ELFUniquingMap = 0;
   COFFUniquingMap = 0;
@@ -45,22 +50,56 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
   SecureLog = 0;
   SecureLogUsed = false;
 
-  DwarfLocSeen = false;
-  GenDwarfForAssembly = false;
-  GenDwarfFileNumber = 0;
+  if (SrcMgr && SrcMgr->getNumBuffers() > 0)
+    MainFileName = SrcMgr->getMemoryBuffer(0)->getBufferIdentifier();
+  else
+    MainFileName = "";
 }
 
 MCContext::~MCContext() {
+
+  if (AutoReset)
+    reset();
+
   // NOTE: The symbols are all allocated out of a bump pointer allocator,
   // we don't need to free them here.
+  
+  // If the stream for the .secure_log_unique directive was created free it.
+  delete (raw_ostream*)SecureLog;
+}
+
+//===----------------------------------------------------------------------===//
+// Module Lifetime Management
+//===----------------------------------------------------------------------===//
+
+void MCContext::reset() {
+  UsedNames.clear();
+  Symbols.clear();
+  Allocator.Reset();
+  Instances.clear();
+  MCDwarfFilesCUMap.clear();
+  MCDwarfDirsCUMap.clear();
+  MCGenDwarfLabelEntries.clear();
+  DwarfDebugFlags = StringRef();
+  MCLineSections.clear();
+  MCLineSectionOrder.clear();
+  DwarfCompileUnitID = 0;
+  MCLineTableSymbols.clear();
+  CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0);
 
   // If we have the MachO uniquing map, free it.
   delete (MachOUniqueMapTy*)MachOUniquingMap;
   delete (ELFUniqueMapTy*)ELFUniquingMap;
   delete (COFFUniqueMapTy*)COFFUniquingMap;
+  MachOUniquingMap = 0;
+  ELFUniquingMap = 0;
+  COFFUniquingMap = 0;
 
-  // If the stream for the .secure_log_unique directive was created free it.
-  delete (raw_ostream*)SecureLog;
+  NextUniqueID = 0;
+  AllowTemporaryLabels = true;
+  DwarfLocSeen = false;
+  GenDwarfForAssembly = false;
+  GenDwarfFileNumber = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -260,11 +299,13 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
 /// error and zero is returned and the client reports the error, else the
 /// allocated file number is returned.  The file numbers may be in any order.
 unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
-                                 unsigned FileNumber) {
+                                 unsigned FileNumber, unsigned CUID) {
   // TODO: a FileNumber of zero says to use the next available file number.
   // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
   // to not be less than one.  This needs to be change to be not less than zero.
 
+  SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
+  SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID];
   // Make space for this FileNumber in the MCDwarfFiles vector if needed.
   if (FileNumber >= MCDwarfFiles.size()) {
     MCDwarfFiles.resize(FileNumber + 1);
@@ -324,7 +365,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
 
 /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
 /// currently is assigned and false otherwise.
-bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
+  SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID];
   if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
     return false;
 
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 5e2cd8387db1..5195b9e23d69 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -1,8 +1,3 @@
 add_llvm_library(LLVMMCDisassembler
   Disassembler.cpp
-  EDDisassembler.cpp
-  EDInst.cpp
-  EDMain.cpp
-  EDOperand.cpp
-  EDToken.cpp
   )
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 5189c9daeed6..4766b3747635 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -9,7 +9,6 @@
 
 #include "Disassembler.h"
 #include "llvm-c/Disassembler.h"
-
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -18,10 +17,9 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 class Target;
@@ -35,68 +33,71 @@ using namespace llvm;
 // functions can all be passed as NULL.  If successful, this returns a
 // disassembler context.  If not, it returns NULL.
 //
-LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
-                                      int TagType, LLVMOpInfoCallback GetOpInfo,
-                                      LLVMSymbolLookupCallback SymbolLookUp) {
-  // Initialize targets and assembly printers/parsers.
-  // FIXME: Clients are responsible for initializing the targets. And this
-  // would be done by calling routines in "llvm-c/Target.h" which are static
-  // line functions. But the current use of LLVMCreateDisasm() is to dynamically
-  // load libLTO with dlopen() and then lookup the symbols using dlsym().
-  // And since these initialize routines are static that does not work which
-  // is why the call to them in this 'C' library API was added back.
-  llvm::InitializeAllTargetInfos();
-  llvm::InitializeAllTargetMCs();
-  llvm::InitializeAllAsmParsers();
-  llvm::InitializeAllDisassemblers();
-
+LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
+                                         void *DisInfo, int TagType,
+                                         LLVMOpInfoCallback GetOpInfo,
+                                         LLVMSymbolLookupCallback SymbolLookUp){
   // Get the target.
   std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
   assert(TheTarget && "Unable to create target!");
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
-  assert(MAI && "Unable to create target asm info!");
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
+  if (!MAI)
+    return 0;
 
   const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
-  assert(MII && "Unable to create target instruction info!");
+  if (!MII)
+    return 0;
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName);
-  assert(MRI && "Unable to create target register info!");
+  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
+  if (!MRI)
+    return 0;
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  std::string CPU;
 
-  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(TripleName, CPU,
+  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
                                                                 FeaturesStr);
-  assert(STI && "Unable to create subtarget info!");
+  if (!STI)
+    return 0;
 
   // Set up the MCContext for creating symbols and MCExpr's.
   MCContext *Ctx = new MCContext(*MAI, *MRI, 0);
-  assert(Ctx && "Unable to create MCContext!");
+  if (!Ctx)
+    return 0;
 
   // Set up disassembler.
   MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
-  assert(DisAsm && "Unable to create disassembler!");
+  if (!DisAsm)
+    return 0;
   DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx);
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
   MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
                                                      *MAI, *MII, *MRI, *STI);
-  assert(IP && "Unable to create instruction printer!");
+  if (!IP)
+    return 0;
 
-  LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
+  LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
                                                 GetOpInfo, SymbolLookUp,
                                                 TheTarget, MAI, MRI,
                                                 STI, MII, Ctx, DisAsm, IP);
-  assert(DC && "Allocation failure!");
+  if (!DC)
+    return 0;
 
   return DC;
 }
 
+LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
+                                      int TagType, LLVMOpInfoCallback GetOpInfo,
+                                      LLVMSymbolLookupCallback SymbolLookUp) {
+  return LLVMCreateDisasmCPU(Triple, "", DisInfo, TagType, GetOpInfo,
+                             SymbolLookUp);
+}
+
 //
 // LLVMDisasmDispose() disposes of the disassembler specified by the context.
 //
@@ -196,5 +197,27 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
       IP->setUseMarkup(1);
       Options &= ~LLVMDisassembler_Option_UseMarkup;
   }
+  if (Options & LLVMDisassembler_Option_PrintImmHex){
+      LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+      MCInstPrinter *IP = DC->getIP();
+      IP->setPrintImmHex(1);
+      Options &= ~LLVMDisassembler_Option_PrintImmHex;
+  }
+  if (Options & LLVMDisassembler_Option_AsmPrinterVariant){
+      LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+      // Try to set up the new instruction printer.
+      const MCAsmInfo *MAI = DC->getAsmInfo();
+      const MCInstrInfo *MII = DC->getInstrInfo();
+      const MCRegisterInfo *MRI = DC->getRegisterInfo();
+      const MCSubtargetInfo *STI = DC->getSubtargetInfo();
+      int AsmPrinterVariant = MAI->getAssemblerDialect();
+      AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0;
+      MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter(
+          AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
+      if (IP) {
+        DC->setIP(IP);
+        Options &= ~LLVMDisassembler_Option_AsmPrinterVariant;
+      }
+  }
   return (Options == 0);
 }
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 322abd5d637a..6eb59d0c57be 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -18,10 +18,10 @@
 #define LLVM_MC_DISASSEMBLER_H
 
 #include "llvm-c/Disassembler.h"
-#include <string>
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
+#include <string>
 
 namespace llvm {
 class MCContext;
@@ -109,7 +109,11 @@ public:
   const Target *getTarget() const { return TheTarget; }
   const MCDisassembler *getDisAsm() const { return DisAsm.get(); }
   const MCAsmInfo *getAsmInfo() const { return MAI.get(); }
+  const MCInstrInfo *getInstrInfo() const { return MII.get(); }
+  const MCRegisterInfo *getRegisterInfo() const { return MRI.get(); }
+  const MCSubtargetInfo *getSubtargetInfo() const { return MSI.get(); }
   MCInstPrinter *getIP() { return IP.get(); }
+  void setIP(MCInstPrinter *NewIP) { IP.reset(NewIP); }
 };
 
 } // namespace llvm
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
deleted file mode 100644
index eed7a771b97e..000000000000
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembly library's  disassembler class.
-// The disassembler is responsible for vending individual instructions according
-// to a given architecture and disassembly syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
-
-struct TripleMap {
-  Triple::ArchType Arch;
-  const char *String;
-};
-
-static const struct TripleMap triplemap[] = {
-  { Triple::x86,          "i386-unknown-unknown"    },
-  { Triple::x86_64,       "x86_64-unknown-unknown"  },
-  { Triple::arm,          "arm-unknown-unknown"     },
-  { Triple::thumb,        "thumb-unknown-unknown"   }
-};
-
-/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
-///   or NULL if there is an error
-///
-/// @arg arch - The Triple::ArchType for the desired architecture
-static const char *tripleFromArch(Triple::ArchType arch) {
-  unsigned int infoIndex;
-  
-  for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
-    if (arch == triplemap[infoIndex].Arch)
-      return triplemap[infoIndex].String;
-  }
-  
-  return NULL;
-}
-
-/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
-///   for the desired assembly syntax, suitable for passing to 
-///   Target::createMCInstPrinter()
-///
-/// @arg arch   - The target architecture
-/// @arg syntax - The assembly syntax in sd form
-static int getLLVMSyntaxVariant(Triple::ArchType arch,
-                                EDDisassembler::AssemblySyntax syntax) {
-  switch (syntax) {
-  // Mappings below from X86AsmPrinter.cpp
-  case EDDisassembler::kEDAssemblySyntaxX86ATT:
-    if (arch == Triple::x86 || arch == Triple::x86_64)
-      return 0;
-    break;
-  case EDDisassembler::kEDAssemblySyntaxX86Intel:
-    if (arch == Triple::x86 || arch == Triple::x86_64)
-      return 1;
-    break;
-  case EDDisassembler::kEDAssemblySyntaxARMUAL:
-    if (arch == Triple::arm || arch == Triple::thumb)
-      return 0;
-    break;
-  }
-
-  return -1;
-}
-
-EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
-                                                AssemblySyntax syntax) {
-  const char *triple = tripleFromArch(arch);
-  return getDisassembler(StringRef(triple), syntax);
-}
-
-EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
-                                                AssemblySyntax syntax) {
-  CPUKey key;
-  key.Triple = str.str();
-  key.Syntax = syntax;
-
-  EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
-
-  if (i != sDisassemblers.end()) {
-    return i->second;  
-  }
-
-  EDDisassembler *sdd = new EDDisassembler(key);
-  if (!sdd->valid()) {
-    delete sdd;
-    return NULL;
-  }
-
-  sDisassemblers[key] = sdd;
-
-  return sdd;
-}
-
-EDDisassembler::EDDisassembler(CPUKey &key) : 
-  Valid(false), 
-  HasSemantics(false), 
-  ErrorStream(nulls()), 
-  Key(key),
-  TgtTriple(key.Triple.c_str()) {        
-  
-  LLVMSyntaxVariant = getLLVMSyntaxVariant(TgtTriple.getArch(), key.Syntax);
-  
-  if (LLVMSyntaxVariant < 0)
-    return;
-  
-  std::string tripleString(key.Triple);
-  std::string errorString;
-  
-  Tgt = TargetRegistry::lookupTarget(key.Triple, 
-                                     errorString);
-  
-  if (!Tgt)
-    return;
-  
-  MRI.reset(Tgt->createMCRegInfo(tripleString));
-
-  if (!MRI)
-    return;
-
-  initMaps(*MRI);
-  
-  AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
-  
-  if (!AsmInfo)
-    return;
-
-  STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", ""));
-  
-  if (!STI)
-    return;
-
-  Disassembler.reset(Tgt->createMCDisassembler(*STI));
-  
-  if (!Disassembler)
-    return;
-    
-  InstInfos = Disassembler->getEDInfo();
-
-  MII.reset(Tgt->createMCInstrInfo());
-
-  if (!MII)
-    return;
-
-  InstString.reset(new std::string);
-  InstStream.reset(new raw_string_ostream(*InstString));
-  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo,
-                                             *MII, *MRI, *STI));
-  
-  if (!InstPrinter)
-    return;
-    
-  GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
-  SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo));
-  SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
-  
-  initMaps(*MRI);
-    
-  Valid = true;
-}
-
-EDDisassembler::~EDDisassembler() {
-  if (!valid())
-    return;
-}
-
-namespace {
-  /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
-  ///   as provided by the sd interface.  See MemoryObject.
-  class EDMemoryObject : public llvm::MemoryObject {
-  private:
-    EDByteReaderCallback Callback;
-    void *Arg;
-  public:
-    EDMemoryObject(EDByteReaderCallback callback,
-                   void *arg) : Callback(callback), Arg(arg) { }
-    ~EDMemoryObject() { }
-    uint64_t getBase() const { return 0x0; }
-    uint64_t getExtent() const { return (uint64_t)-1; }
-    int readByte(uint64_t address, uint8_t *ptr) const {
-      if (!Callback)
-        return -1;
-      
-      if (Callback(ptr, address, Arg))
-        return -1;
-      
-      return 0;
-    }
-  };
-}
-
-EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, 
-                                   uint64_t address, 
-                                   void *arg) {
-  EDMemoryObject memoryObject(byteReader, arg);
-  
-  MCInst* inst = new MCInst;
-  uint64_t byteSize;
-  
-  MCDisassembler::DecodeStatus S;
-  S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address,
-                                   ErrorStream, nulls());
-  switch (S) {
-  case MCDisassembler::Fail:
-  case MCDisassembler::SoftFail:
-    // FIXME: Do something different on soft failure mode?
-    delete inst;
-    return NULL;
-    
-  case MCDisassembler::Success: {
-    const llvm::EDInstInfo *thisInstInfo = NULL;
-
-    if (InstInfos) {
-      thisInstInfo = &InstInfos[inst->getOpcode()];
-    }
-    
-    EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
-    return sdInst;
-  }
-  }
-  return NULL;
-}
-
-void EDDisassembler::initMaps(const MCRegisterInfo &registerInfo) {
-  unsigned numRegisters = registerInfo.getNumRegs();
-  unsigned registerIndex;
-  
-  for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
-    const char* registerName = registerInfo.getName(registerIndex);
-    
-    RegVec.push_back(registerName);
-    RegRMap[registerName] = registerIndex;
-  }
-  
-  switch (TgtTriple.getArch()) {
-  default:
-    break;
-  case Triple::x86:
-  case Triple::x86_64:
-    stackPointers.insert(registerIDWithName("SP"));
-    stackPointers.insert(registerIDWithName("ESP"));
-    stackPointers.insert(registerIDWithName("RSP"));
-    
-    programCounters.insert(registerIDWithName("IP"));
-    programCounters.insert(registerIDWithName("EIP"));
-    programCounters.insert(registerIDWithName("RIP"));
-    break;
-  case Triple::arm:
-  case Triple::thumb:
-    stackPointers.insert(registerIDWithName("SP"));
-    
-    programCounters.insert(registerIDWithName("PC"));
-    break;  
-  }
-}
-
-const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
-  if (registerID >= RegVec.size())
-    return NULL;
-  else
-    return RegVec[registerID].c_str();
-}
-
-unsigned EDDisassembler::registerIDWithName(const char *name) const {
-  regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
-  if (iter == RegRMap.end())
-    return 0;
-  else
-    return (*iter).second;
-}
-
-bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
-  return (stackPointers.find(registerID) != stackPointers.end());
-}
-
-bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
-  return (programCounters.find(registerID) != programCounters.end());
-}
-
-int EDDisassembler::printInst(std::string &str, MCInst &inst) {
-  PrinterMutex.acquire();
-  
-  InstPrinter->printInst(&inst, *InstStream, "");
-  InstStream->flush();
-  str = *InstString;
-  InstString->clear();
-  
-  PrinterMutex.release();
-  
-  return 0;
-}
-
-static void diag_handler(const SMDiagnostic &diag, void *context) {
-  if (context)
-    diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream);
-}
-
-int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
-                              SmallVectorImpl<AsmToken> &tokens,
-                              const std::string &str) {
-  int ret = 0;
-  
-  switch (TgtTriple.getArch()) {
-  default:
-    return -1;
-  case Triple::x86:
-  case Triple::x86_64:
-  case Triple::arm:
-  case Triple::thumb:
-    break;
-  }
-  
-  const char *cStr = str.c_str();
-  MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
-  
-  StringRef instName;
-  SMLoc instLoc;
-  
-  SourceMgr sourceMgr;
-  sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
-  sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
-  MCContext context(*AsmInfo, *MRI, NULL);
-  OwningPtr<MCStreamer> streamer(createNullStreamer(context));
-  OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr,
-                                                         context, *streamer,
-                                                         *AsmInfo));
-
-  OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(Key.Triple.c_str(), "", ""));
-  OwningPtr<MCTargetAsmParser>
-    TargetParser(Tgt->createMCAsmParser(*STI, *genericParser));
-  
-  AsmToken OpcodeToken = genericParser->Lex();
-  AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
-    
-  if (OpcodeToken.is(AsmToken::Identifier)) {
-    instName = OpcodeToken.getString();
-    instLoc = OpcodeToken.getLoc();
-    
-    ParseInstructionInfo Info;
-    if (NextToken.isNot(AsmToken::Eof) &&
-        TargetParser->ParseInstruction(Info, instName, instLoc, operands))
-      ret = -1;
-  } else {
-    ret = -1;
-  }
-  
-  ParserMutex.acquire();
-  
-  if (!ret) {
-    GenericAsmLexer->setBuffer(buf);
-  
-    while (SpecificAsmLexer->Lex(),
-           SpecificAsmLexer->isNot(AsmToken::Eof) &&
-           SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
-      if (SpecificAsmLexer->is(AsmToken::Error)) {
-        ret = -1;
-        break;
-      }
-      tokens.push_back(SpecificAsmLexer->getTok());
-    }
-  }
-
-  ParserMutex.release();
-  
-  return ret;
-}
-
-int EDDisassembler::llvmSyntaxVariant() const {
-  return LLVMSyntaxVariant;
-}
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
deleted file mode 100644
index 6f71908d2bcf..000000000000
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ /dev/null
@@ -1,271 +0,0 @@
-//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's
-// disassembler class.  The disassembler is responsible for vending individual
-// instructions according to a given architecture and disassembly syntax.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EDDISASSEMBLER_H
-#define LLVM_EDDISASSEMBLER_H
-
-#include "EDInfo.h"
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
-
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-
-namespace llvm {
-class AsmLexer;
-class AsmParser;
-class AsmToken;
-class MCContext;
-class MCAsmInfo;
-class MCAsmLexer;
-class MCDisassembler;
-class MCInst;
-class MCInstPrinter;
-class MCInstrInfo;
-class MCParsedAsmOperand;
-class MCRegisterInfo;
-class MCStreamer;
-class MCSubtargetInfo;
-class MCTargetAsmLexer;
-class MCTargetAsmParser;
-template <typename T> class SmallVectorImpl;
-class SourceMgr;
-class Target;
-
-struct EDInstInfo;
-struct EDInst;
-struct EDOperand;
-struct EDToken;
-
-typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
-
-/// EDDisassembler - Encapsulates a disassembler for a single architecture and
-///   disassembly syntax.  Also manages the static disassembler registry.
-struct EDDisassembler {
-  typedef enum {
-    /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
-    kEDAssemblySyntaxX86Intel  = 0,
-    /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
-    kEDAssemblySyntaxX86ATT    = 1,
-    kEDAssemblySyntaxARMUAL    = 2
-  } AssemblySyntax;
-  
-  
-  ////////////////////
-  // Static members //
-  ////////////////////
-  
-  /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
-  ///   pair
-  struct CPUKey {
-    /// The architecture type
-    std::string Triple;
-    
-    /// The assembly syntax
-    AssemblySyntax Syntax;
-    
-    /// operator== - Equality operator
-    bool operator==(const CPUKey &key) const {
-      return (Triple == key.Triple &&
-              Syntax == key.Syntax);
-    }
-    
-    /// operator< - Less-than operator
-    bool operator<(const CPUKey &key) const {
-      return ((Triple < key.Triple) ||
-              ((Triple == key.Triple) && Syntax < (key.Syntax)));
-    }
-  };
-  
-  typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
-  
-  /// A map from disassembler specifications to disassemblers.  Populated
-  ///   lazily.
-  static DisassemblerMap_t sDisassemblers;
-
-  /// getDisassembler - Returns the specified disassemble, or NULL on failure
-  ///
-  /// @arg arch   - The desired architecture
-  /// @arg syntax - The desired disassembly syntax
-  static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
-                                         AssemblySyntax syntax);
-  
-  /// getDisassembler - Returns the disassembler for a given combination of
-  ///   CPU type, CPU subtype, and assembly syntax, or NULL on failure
-  ///
-  /// @arg str    - The string representation of the architecture triple, e.g.,
-  ///               "x86_64-apple-darwin"
-  /// @arg syntax - The disassembly syntax for the required disassembler
-  static EDDisassembler *getDisassembler(llvm::StringRef str,
-                                         AssemblySyntax syntax);
-  
-  ////////////////////////
-  // Per-object members //
-  ////////////////////////
-  
-  /// True only if the object has been successfully initialized
-  bool Valid;
-  /// True if the disassembler can provide semantic information
-  bool HasSemantics;
-  
-  /// The stream to write errors to
-  llvm::raw_ostream &ErrorStream;
-
-  /// The triple/syntax pair for the current architecture
-  CPUKey Key;
-  /// The Triple fur the current architecture
-  Triple TgtTriple;
-  /// The LLVM target corresponding to the disassembler
-  const llvm::Target *Tgt;
-  /// The assembly information for the target architecture
-  llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
-  /// The subtarget information for the target architecture
-  llvm::OwningPtr<const llvm::MCSubtargetInfo> STI;
-  // The instruction information for the target architecture.
-  llvm::OwningPtr<const llvm::MCInstrInfo> MII;
-  // The register information for the target architecture.
-  llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
-  /// The disassembler for the target architecture
-  llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
-  /// The output string for the instruction printer; must be guarded with 
-  ///   PrinterMutex
-  llvm::OwningPtr<std::string> InstString;
-  /// The output stream for the disassembler; must be guarded with
-  ///   PrinterMutex
-  llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
-  /// The instruction printer for the target architecture; must be guarded with
-  ///   PrinterMutex when printing
-  llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
-  /// The mutex that guards the instruction printer's printing functions, which
-  ///   use a shared stream
-  llvm::sys::Mutex PrinterMutex;
-  /// The array of instruction information provided by the TableGen backend for
-  ///   the target architecture
-  const llvm::EDInstInfo *InstInfos;
-  /// The target-specific lexer for use in tokenizing strings, in
-  ///   target-independent and target-specific portions
-  llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
-  llvm::OwningPtr<llvm::MCTargetAsmLexer> SpecificAsmLexer;
-  /// The guard for the above
-  llvm::sys::Mutex ParserMutex;
-  /// The LLVM number used for the target disassembly syntax variant
-  int LLVMSyntaxVariant;
-    
-  typedef std::vector<std::string> regvec_t;
-  typedef std::map<std::string, unsigned> regrmap_t;
-  
-  /// A vector of registers for quick mapping from LLVM register IDs to names
-  regvec_t RegVec;
-  /// A map of registers for quick mapping from register names to LLVM IDs
-  regrmap_t RegRMap;
-  
-  /// A set of register IDs for aliases of the stack pointer for the current
-  ///   architecture
-  std::set<unsigned> stackPointers;
-  /// A set of register IDs for aliases of the program counter for the current
-  ///   architecture
-  std::set<unsigned> programCounters;
-  
-  /// Constructor - initializes a disassembler with all the necessary objects,
-  ///   which come pre-allocated from the registry accessor function
-  ///
-  /// @arg key                - the architecture and disassembly syntax for the 
-  ///                           disassembler
-  EDDisassembler(CPUKey& key);
-  
-  /// valid - reports whether there was a failure in the constructor.
-  bool valid() {
-    return Valid;
-  }
-  
-  /// hasSemantics - reports whether the disassembler can provide operands and
-  ///   tokens.
-  bool hasSemantics() {
-    return HasSemantics;
-  }
-  
-  ~EDDisassembler();
-  
-  /// createInst - creates and returns an instruction given a callback and
-  ///   memory address, or NULL on failure
-  ///
-  /// @arg byteReader - A callback function that provides machine code bytes
-  /// @arg address    - The address of the first byte of the instruction,
-  ///                   suitable for passing to byteReader
-  /// @arg arg        - An opaque argument for byteReader
-  EDInst *createInst(EDByteReaderCallback byteReader, 
-                     uint64_t address, 
-                     void *arg);
-
-  /// initMaps - initializes regVec and regRMap using the provided register
-  ///   info
-  ///
-  /// @arg registerInfo - the register information to use as a source
-  void initMaps(const llvm::MCRegisterInfo &registerInfo);
-  /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a 
-  ///   register for a given register ID, or NULL on failure
-  ///
-  /// @arg registerID - the ID of the register to be queried
-  const char *nameWithRegisterID(unsigned registerID) const;
-  /// registerIDWithName - Returns the ID of a register for a given register
-  ///   name, or (unsigned)-1 on failure
-  ///
-  /// @arg name - The name of the register
-  unsigned registerIDWithName(const char *name) const;
-  
-  /// registerIsStackPointer - reports whether a register ID is an alias for the
-  ///   stack pointer register
-  ///
-  /// @arg registerID - The LLVM register ID
-  bool registerIsStackPointer(unsigned registerID);
-  /// registerIsStackPointer - reports whether a register ID is an alias for the
-  ///   stack pointer register
-  ///
-  /// @arg registerID - The LLVM register ID
-  bool registerIsProgramCounter(unsigned registerID);
-  
-  /// printInst - prints an MCInst to a string, returning 0 on success, or -1
-  ///   otherwise
-  ///
-  /// @arg str  - A reference to a string which is filled in with the string
-  ///             representation of the instruction
-  /// @arg inst - A reference to the MCInst to be printed
-  int printInst(std::string& str,
-                llvm::MCInst& inst);
-  
-  /// parseInst - extracts operands and tokens from a string for use in
-  ///   tokenizing the string.  Returns 0 on success, or -1 otherwise.
-  ///
-  /// @arg operands - A reference to a vector that will be filled in with the
-  ///                 parsed operands
-  /// @arg tokens   - A reference to a vector that will be filled in with the
-  ///                 tokens
-  /// @arg str      - The string representation of the instruction
-  int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
-                llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
-                const std::string &str);
-  
-  /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
-  int llvmSyntaxVariant() const;  
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h
deleted file mode 100644
index e43ad1635246..000000000000
--- a/lib/MC/MCDisassembler/EDInfo.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EDINFO_H
-#define LLVM_EDINFO_H
-
-enum {
-  EDIS_MAX_OPERANDS = 13,
-  EDIS_MAX_SYNTAXES = 2
-};
-
-enum OperandTypes {
-  kOperandTypeNone,
-  kOperandTypeImmediate,
-  kOperandTypeRegister,
-  kOperandTypeX86Memory,
-  kOperandTypeX86EffectiveAddress,
-  kOperandTypeX86PCRelative,
-  kOperandTypeARMBranchTarget,
-  kOperandTypeARMSoReg,
-  kOperandTypeARMSoImm,
-  kOperandTypeARMRotImm,
-  kOperandTypeARMSoImm2Part,
-  kOperandTypeARMPredicate,
-  kOperandTypeAddrModeImm12,
-  kOperandTypeLdStSOReg,
-  kOperandTypeARMAddrMode2,
-  kOperandTypeARMAddrMode2Offset,
-  kOperandTypeARMAddrMode3,
-  kOperandTypeARMAddrMode3Offset,
-  kOperandTypeARMAddrMode4,
-  kOperandTypeARMAddrMode5,
-  kOperandTypeARMAddrMode6,
-  kOperandTypeARMAddrMode6Offset,
-  kOperandTypeARMAddrMode7,
-  kOperandTypeARMAddrModePC,
-  kOperandTypeARMRegisterList,
-  kOperandTypeARMDPRRegisterList,
-  kOperandTypeARMSPRRegisterList,
-  kOperandTypeARMTBAddrMode,
-  kOperandTypeThumbITMask,
-  kOperandTypeThumbAddrModeRegS1,
-  kOperandTypeThumbAddrModeRegS2,
-  kOperandTypeThumbAddrModeRegS4,
-  kOperandTypeThumbAddrModeImmS1,
-  kOperandTypeThumbAddrModeImmS2,
-  kOperandTypeThumbAddrModeImmS4,
-  kOperandTypeThumbAddrModeRR,
-  kOperandTypeThumbAddrModeSP,
-  kOperandTypeThumbAddrModePC,
-  kOperandTypeThumb2AddrModeReg,
-  kOperandTypeThumb2SoReg,
-  kOperandTypeThumb2SoImm,
-  kOperandTypeThumb2AddrModeImm8,
-  kOperandTypeThumb2AddrModeImm8Offset,
-  kOperandTypeThumb2AddrModeImm12,
-  kOperandTypeThumb2AddrModeSoReg,
-  kOperandTypeThumb2AddrModeImm8s4,
-  kOperandTypeThumb2AddrModeImm8s4Offset
-};
-
-enum OperandFlags {
-  kOperandFlagSource = 0x1,
-  kOperandFlagTarget = 0x2
-};
-
-enum InstructionTypes {
-  kInstructionTypeNone,
-  kInstructionTypeMove,
-  kInstructionTypeBranch,
-  kInstructionTypePush,
-  kInstructionTypePop,
-  kInstructionTypeCall,
-  kInstructionTypeReturn
-};
-
-
-#endif
diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
deleted file mode 100644
index 6057e169e347..000000000000
--- a/lib/MC/MCDisassembler/EDInst.cpp
+++ /dev/null
@@ -1,212 +0,0 @@
-//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembly library's instruction class.
-// The instruction is responsible for vending the string representation, 
-// individual tokens, and operands for a single instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDInst.h"
-#include "EDDisassembler.h"
-#include "EDOperand.h"
-#include "EDToken.h"
-
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-
-using namespace llvm;
-
-EDInst::EDInst(llvm::MCInst *inst,
-               uint64_t byteSize, 
-               EDDisassembler &disassembler,
-               const llvm::EDInstInfo *info) :
-  Disassembler(disassembler),
-  Inst(inst),
-  ThisInstInfo(info),
-  ByteSize(byteSize),
-  BranchTarget(-1),
-  MoveSource(-1),
-  MoveTarget(-1) {
-  OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
-}
-
-EDInst::~EDInst() {
-  unsigned int index;
-  unsigned int numOperands = Operands.size();
-  
-  for (index = 0; index < numOperands; ++index)
-    delete Operands[index];
-  
-  unsigned int numTokens = Tokens.size();
-  
-  for (index = 0; index < numTokens; ++index)
-    delete Tokens[index];
-  
-  delete Inst;
-}
-
-uint64_t EDInst::byteSize() {
-  return ByteSize;
-}
-
-int EDInst::stringify() {
-  if (StringifyResult.valid())
-    return StringifyResult.result();
-  
-  if (Disassembler.printInst(String, *Inst))
-    return StringifyResult.setResult(-1);
-
-  String.push_back('\n');
-  
-  return StringifyResult.setResult(0);
-}
-
-int EDInst::getString(const char*& str) {
-  if (stringify())
-    return -1;
-  
-  str = String.c_str();
-  
-  return 0;
-}
-
-unsigned EDInst::instID() {
-  return Inst->getOpcode();
-}
-
-bool EDInst::isBranch() {
-  if (ThisInstInfo)
-    return 
-      ThisInstInfo->instructionType == kInstructionTypeBranch ||
-      ThisInstInfo->instructionType == kInstructionTypeCall;
-  else
-    return false;
-}
-
-bool EDInst::isMove() {
-  if (ThisInstInfo)
-    return ThisInstInfo->instructionType == kInstructionTypeMove;
-  else
-    return false;
-}
-
-int EDInst::parseOperands() {
-  if (ParseResult.valid())
-    return ParseResult.result();
-  
-  if (!ThisInstInfo)
-    return ParseResult.setResult(-1);
-  
-  unsigned int opIndex;
-  unsigned int mcOpIndex = 0;
-  
-  for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
-    if (isBranch() &&
-        (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
-      BranchTarget = opIndex;
-    }
-    else if (isMove()) {
-      if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
-        MoveSource = opIndex;
-      else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
-        MoveTarget = opIndex;
-    }
-    
-    EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
-    
-    Operands.push_back(operand);
-  }
-  
-  return ParseResult.setResult(0);
-}
-
-int EDInst::branchTargetID() {
-  if (parseOperands())
-    return -1;
-  return BranchTarget;
-}
-
-int EDInst::moveSourceID() {
-  if (parseOperands())
-    return -1;
-  return MoveSource;
-}
-
-int EDInst::moveTargetID() {
-  if (parseOperands())
-    return -1;
-  return MoveTarget;
-}
-
-int EDInst::numOperands() {
-  if (parseOperands())
-    return -1;
-  return Operands.size();
-}
-
-int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
-  if (parseOperands())
-    return -1;
-  
-  if (index >= Operands.size())
-    return -1;
-  
-  operand = Operands[index];
-  return 0;
-}
-
-int EDInst::tokenize() {
-  if (TokenizeResult.valid())
-    return TokenizeResult.result();
-    
-  if (ThisInstInfo == NULL)
-    return TokenizeResult.setResult(-1);
-  
-  if (stringify())
-    return TokenizeResult.setResult(-1);
-    
-  return TokenizeResult.setResult(EDToken::tokenize(Tokens,
-                                                    String,
-                                                    OperandOrder,
-                                                    Disassembler));
-    
-}
-
-int EDInst::numTokens() {
-  if (tokenize())
-    return -1;
-  return Tokens.size();
-}
-
-int EDInst::getToken(EDToken *&token, unsigned int index) {
-  if (tokenize())
-    return -1;
-  token = Tokens[index];
-  return 0;
-}
-
-#ifdef __BLOCKS__
-int EDInst::visitTokens(EDTokenVisitor_t visitor) {
-  if (tokenize())
-    return -1;
-  
-  tokvec_t::iterator iter;
-  
-  for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
-    int ret = visitor(*iter);
-    if (ret == 1)
-      return 0;
-    if (ret != 0)
-      return -1;
-  }
-  
-  return 0;
-}
-#endif
diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
deleted file mode 100644
index 6b78dc826c92..000000000000
--- a/lib/MC/MCDisassembler/EDInst.h
+++ /dev/null
@@ -1,182 +0,0 @@
-//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's
-// instruction class.  The instruction is responsible for vending the string
-// representation, individual tokens and operands for a single instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EDINST_H
-#define LLVM_EDINST_H
-
-#include "llvm/Support/DataTypes.h"
-#include "llvm/ADT/SmallVector.h"
-#include <string>
-#include <vector>
-
-namespace llvm {
-  class MCInst;
-  struct EDInstInfo;
-  struct EDToken;
-  struct EDDisassembler;
-  struct EDOperand;
-
-#ifdef __BLOCKS__
-  typedef int (^EDTokenVisitor_t)(EDToken *token);
-#endif
-
-/// CachedResult - Encapsulates the result of a function along with the validity
-///   of that result, so that slow functions don't need to run twice
-struct CachedResult {
-  /// True if the result has been obtained by executing the function
-  bool Valid;
-  /// The result last obtained from the function
-  int Result;
-  
-  /// Constructor - Initializes an invalid result
-  CachedResult() : Valid(false) { }
-  /// valid - Returns true if the result has been obtained by executing the
-  ///   function and false otherwise
-  bool valid() { return Valid; }
-  /// result - Returns the result of the function or an undefined value if
-  ///   valid() is false
-  int result() { return Result; }
-  /// setResult - Sets the result of the function and declares it valid
-  ///   returning the result (so that setResult() can be called from inside a
-  ///   return statement)
-  /// @arg result - The result of the function
-  int setResult(int result) { Result = result; Valid = true; return result; }
-};
-
-/// EDInst - Encapsulates a single instruction, which can be queried for its
-///   string representation, as well as its operands and tokens
-struct EDInst {
-  /// The parent disassembler
-  EDDisassembler &Disassembler;
-  /// The containing MCInst
-  llvm::MCInst *Inst;
-  /// The instruction information provided by TableGen for this instruction
-  const llvm::EDInstInfo *ThisInstInfo;
-  /// The number of bytes for the machine code representation of the instruction
-  uint64_t ByteSize;
-  
-  /// The result of the stringify() function
-  CachedResult StringifyResult;
-  /// The string representation of the instruction
-  std::string String;
-  /// The order in which operands from the InstInfo's operand information appear
-  /// in String
-  const signed char* OperandOrder;
-  
-  /// The result of the parseOperands() function
-  CachedResult ParseResult;
-  typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
-  /// The instruction's operands
-  opvec_t Operands;
-  /// The operand corresponding to the target, if the instruction is a branch
-  int BranchTarget;
-  /// The operand corresponding to the source, if the instruction is a move
-  int MoveSource;
-  /// The operand corresponding to the target, if the instruction is a move
-  int MoveTarget;
-  
-  /// The result of the tokenize() function
-  CachedResult TokenizeResult;
-  typedef std::vector<EDToken*> tokvec_t;
-  /// The instruction's tokens
-  tokvec_t Tokens;
-  
-  /// Constructor - initializes an instruction given the output of the LLVM
-  ///   C++ disassembler
-  ///
-  /// @arg inst         - The MCInst, which will now be owned by this object
-  /// @arg byteSize     - The size of the consumed instruction, in bytes
-  /// @arg disassembler - The parent disassembler
-  /// @arg instInfo     - The instruction information produced by the table
-  ///                     generator for this instruction
-  EDInst(llvm::MCInst *inst,
-         uint64_t byteSize,
-         EDDisassembler &disassembler,
-         const llvm::EDInstInfo *instInfo);
-  ~EDInst();
-  
-  /// byteSize - returns the number of bytes consumed by the machine code
-  ///   representation of the instruction
-  uint64_t byteSize();
-  /// instID - returns the LLVM instruction ID of the instruction
-  unsigned instID();
-  
-  /// stringify - populates the String and AsmString members of the instruction,
-  ///   returning 0 on success or -1 otherwise
-  int stringify();
-  /// getString - retrieves a pointer to the string representation of the
-  ///   instructinon, returning 0 on success or -1 otherwise
-  ///
-  /// @arg str - A reference to a pointer that, on success, is set to point to
-  ///   the string representation of the instruction; this string is still owned
-  ///   by the instruction and will be deleted when it is
-  int getString(const char *&str);
-  
-  /// isBranch - Returns true if the instruction is a branch
-  bool isBranch();
-  /// isMove - Returns true if the instruction is a move
-  bool isMove();
-  
-  /// parseOperands - populates the Operands member of the instruction,
-  ///   returning 0 on success or -1 otherwise
-  int parseOperands();
-  /// branchTargetID - returns the ID (suitable for use with getOperand()) of 
-  ///   the target operand if the instruction is a branch, or -1 otherwise
-  int branchTargetID();
-  /// moveSourceID - returns the ID of the source operand if the instruction
-  ///   is a move, or -1 otherwise
-  int moveSourceID();
-  /// moveTargetID - returns the ID of the target operand if the instruction
-  ///   is a move, or -1 otherwise
-  int moveTargetID();
-  
-  /// numOperands - returns the number of operands available to retrieve, or -1
-  ///   on error
-  int numOperands();
-  /// getOperand - retrieves an operand from the instruction's operand list by
-  ///   index, returning 0 on success or -1 on error
-  ///
-  /// @arg operand  - A reference whose target is pointed at the operand on
-  ///                 success, although the operand is still owned by the EDInst
-  /// @arg index    - The index of the operand in the instruction
-  int getOperand(EDOperand *&operand, unsigned int index);
-
-  /// tokenize - populates the Tokens member of the instruction, returning 0 on
-  ///   success or -1 otherwise
-  int tokenize();
-  /// numTokens - returns the number of tokens in the instruction, or -1 on
-  ///   error
-  int numTokens();
-  /// getToken - retrieves a token from the instruction's token list by index,
-  ///   returning 0 on success or -1 on error
-  ///
-  /// @arg token  - A reference whose target is pointed at the token on success,
-  ///               although the token is still owned by the EDInst
-  /// @arg index  - The index of the token in the instrcutino
-  int getToken(EDToken *&token, unsigned int index);
-
-#ifdef __BLOCKS__
-  /// visitTokens - Visits each token in turn and applies a block to it,
-  ///   returning 0 if all blocks are visited and/or the block signals
-  ///   termination by returning 1; returns -1 on error
-  ///
-  /// @arg visitor  - The visitor block to apply to all tokens.
-  int visitTokens(EDTokenVisitor_t visitor);
-#endif
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/MC/MCDisassembler/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp
deleted file mode 100644
index 5c065dbf0ca6..000000000000
--- a/lib/MC/MCDisassembler/EDMain.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the enhanced disassembler's public C API.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "EDOperand.h"
-#include "EDToken.h"
-#include "llvm-c/EnhancedDisassembly.h"
-using namespace llvm;
-
-int EDGetDisassembler(EDDisassemblerRef *disassembler,
-                      const char *triple,
-                      EDAssemblySyntax_t syntax) {
-  EDDisassembler::AssemblySyntax Syntax;
-  switch (syntax) {
-  default: llvm_unreachable("Unknown assembly syntax!");
-  case kEDAssemblySyntaxX86Intel:
-    Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
-    break;
-  case kEDAssemblySyntaxX86ATT:
-    Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
-    break;
-  case kEDAssemblySyntaxARMUAL:
-    Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
-    break;
-  }
-
-  EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
-
-  if (!ret)
-    return -1;
-  *disassembler = ret;
-  return 0;
-}
-
-int EDGetRegisterName(const char** regName,
-                      EDDisassemblerRef disassembler,
-                      unsigned regID) {
-  const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
-  if (!name)
-    return -1;
-  *regName = name;
-  return 0;
-}
-
-int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
-                             unsigned regID) {
-  return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
-}
-
-int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
-                               unsigned regID) {
-  return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
-}
-
-unsigned int EDCreateInsts(EDInstRef *insts,
-                           unsigned int count,
-                           EDDisassemblerRef disassembler,
-                           ::EDByteReaderCallback byteReader,
-                           uint64_t address,
-                           void *arg) {
-  unsigned int index;
-
-  for (index = 0; index < count; ++index) {
-    EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
-                                                               address, arg);
-
-    if (!inst)
-      return index;
-
-    insts[index] = inst;
-    address += inst->byteSize();
-  }
-
-  return count;
-}
-
-void EDReleaseInst(EDInstRef inst) {
-  delete ((EDInst*)inst);
-}
-
-int EDInstByteSize(EDInstRef inst) {
-  return ((EDInst*)inst)->byteSize();
-}
-
-int EDGetInstString(const char **buf,
-                    EDInstRef inst) {
-  return ((EDInst*)inst)->getString(*buf);
-}
-
-int EDInstID(unsigned *instID, EDInstRef inst) {
-  *instID = ((EDInst*)inst)->instID();
-  return 0;
-}
-
-int EDInstIsBranch(EDInstRef inst) {
-  return ((EDInst*)inst)->isBranch();
-}
-
-int EDInstIsMove(EDInstRef inst) {
-  return ((EDInst*)inst)->isMove();
-}
-
-int EDBranchTargetID(EDInstRef inst) {
-  return ((EDInst*)inst)->branchTargetID();
-}
-
-int EDMoveSourceID(EDInstRef inst) {
-  return ((EDInst*)inst)->moveSourceID();
-}
-
-int EDMoveTargetID(EDInstRef inst) {
-  return ((EDInst*)inst)->moveTargetID();
-}
-
-int EDNumTokens(EDInstRef inst) {
-  return ((EDInst*)inst)->numTokens();
-}
-
-int EDGetToken(EDTokenRef *token,
-               EDInstRef inst,
-               int index) {
-  return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
-}
-
-int EDGetTokenString(const char **buf,
-                     EDTokenRef token) {
-  return ((EDToken*)token)->getString(*buf);
-}
-
-int EDOperandIndexForToken(EDTokenRef token) {
-  return ((EDToken*)token)->operandID();
-}
-
-int EDTokenIsWhitespace(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
-}
-
-int EDTokenIsPunctuation(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
-}
-
-int EDTokenIsOpcode(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
-}
-
-int EDTokenIsLiteral(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
-}
-
-int EDTokenIsRegister(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenRegister;
-}
-
-int EDTokenIsNegativeLiteral(EDTokenRef token) {
-  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
-    return -1;
-
-  return ((EDToken*)token)->literalSign();
-}
-
-int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
-  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
-    return -1;
-
-  return ((EDToken*)token)->literalAbsoluteValue(*value);
-}
-
-int EDRegisterTokenValue(unsigned *registerID,
-                         EDTokenRef token) {
-  if (((EDToken*)token)->type() != EDToken::kTokenRegister)
-    return -1;
-
-  return ((EDToken*)token)->registerID(*registerID);
-}
-
-int EDNumOperands(EDInstRef inst) {
-  return ((EDInst*)inst)->numOperands();
-}
-
-int EDGetOperand(EDOperandRef *operand,
-                 EDInstRef inst,
-                 int index) {
-  return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
-}
-
-int EDOperandIsRegister(EDOperandRef operand) {
-  return ((EDOperand*)operand)->isRegister();
-}
-
-int EDOperandIsImmediate(EDOperandRef operand) {
-  return ((EDOperand*)operand)->isImmediate();
-}
-
-int EDOperandIsMemory(EDOperandRef operand) {
-  return ((EDOperand*)operand)->isMemory();
-}
-
-int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
-  if (!((EDOperand*)operand)->isRegister())
-    return -1;
-  *value = ((EDOperand*)operand)->regVal();
-  return 0;
-}
-
-int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
-  if (!((EDOperand*)operand)->isImmediate())
-    return -1;
-  *value = ((EDOperand*)operand)->immediateVal();
-  return 0;
-}
-
-int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
-                      ::EDRegisterReaderCallback regReader, void *arg) {
-  return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
-}
-
-#ifdef __BLOCKS__
-
-struct ByteReaderWrapper {
-  EDByteBlock_t byteBlock;
-};
-
-static int readerWrapperCallback(uint8_t *byte,
-                          uint64_t address,
-                          void *arg) {
-  struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
-  return wrapper->byteBlock(byte, address);
-}
-
-unsigned int EDBlockCreateInsts(EDInstRef *insts,
-                                int count,
-                                EDDisassemblerRef disassembler,
-                                EDByteBlock_t byteBlock,
-                                uint64_t address) {
-  struct ByteReaderWrapper wrapper;
-  wrapper.byteBlock = byteBlock;
-
-  return EDCreateInsts(insts, count, disassembler, readerWrapperCallback,
-                       address, (void*)&wrapper);
-}
-
-int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
-                           EDRegisterBlock_t regBlock) {
-  return ((EDOperand*)operand)->evaluate(*result, regBlock);
-}
-
-int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
-  return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
-}
-
-#else
-
-extern "C" unsigned int EDBlockCreateInsts() {
-  return 0;
-}
-
-extern "C" int EDBlockEvaluateOperand() {
-  return -1;
-}
-
-extern "C" int EDBlockVisitTokens() {
-  return -1;
-}
-
-#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
deleted file mode 100644
index 48b374659d5e..000000000000
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ /dev/null
@@ -1,315 +0,0 @@
-//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembly library's operand class.  The
-// operand is responsible for allowing evaluation given a particular register 
-// context.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDOperand.h"
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-using namespace llvm;
-
-EDOperand::EDOperand(const EDDisassembler &disassembler,
-                     const EDInst &inst,
-                     unsigned int opIndex,
-                     unsigned int &mcOpIndex) :
-  Disassembler(disassembler),
-  Inst(inst),
-  OpIndex(opIndex),
-  MCOpIndex(mcOpIndex) {
-  unsigned int numMCOperands = 0;
-    
-  Triple::ArchType arch = Disassembler.TgtTriple.getArch();
-    
-  if (arch == Triple::x86 ||
-      arch == Triple::x86_64) {
-    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
-    
-    switch (operandType) {
-    default:
-      break;
-    case kOperandTypeImmediate:
-      numMCOperands = 1;
-      break;
-    case kOperandTypeRegister:
-      numMCOperands = 1;
-      break;
-    case kOperandTypeX86Memory:
-      numMCOperands = 5;
-      break;
-    case kOperandTypeX86EffectiveAddress:
-      numMCOperands = 4;
-      break;
-    case kOperandTypeX86PCRelative:
-      numMCOperands = 1;
-      break;
-    }
-  }
-  else if (arch == Triple::arm ||
-           arch == Triple::thumb) {
-    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
-    
-    switch (operandType) {
-    default:
-    case kOperandTypeARMRegisterList:
-    case kOperandTypeARMDPRRegisterList:
-    case kOperandTypeARMSPRRegisterList:
-      break;
-    case kOperandTypeImmediate:
-    case kOperandTypeRegister:
-    case kOperandTypeARMBranchTarget:
-    case kOperandTypeARMSoImm:
-    case kOperandTypeARMRotImm:
-    case kOperandTypeThumb2SoImm:
-    case kOperandTypeARMSoImm2Part:
-    case kOperandTypeARMPredicate:
-    case kOperandTypeThumbITMask:
-    case kOperandTypeThumb2AddrModeImm8Offset:
-    case kOperandTypeARMTBAddrMode:
-    case kOperandTypeThumb2AddrModeImm8s4Offset:
-    case kOperandTypeARMAddrMode7:
-    case kOperandTypeThumb2AddrModeReg:
-      numMCOperands = 1;
-      break;
-    case kOperandTypeThumb2SoReg:
-    case kOperandTypeAddrModeImm12:
-    case kOperandTypeARMAddrMode2Offset:
-    case kOperandTypeARMAddrMode3Offset:
-    case kOperandTypeARMAddrMode4:
-    case kOperandTypeARMAddrMode5:
-    case kOperandTypeARMAddrModePC:
-    case kOperandTypeThumb2AddrModeImm8:
-    case kOperandTypeThumb2AddrModeImm12:
-    case kOperandTypeThumb2AddrModeImm8s4:
-    case kOperandTypeThumbAddrModeImmS1:
-    case kOperandTypeThumbAddrModeImmS2:
-    case kOperandTypeThumbAddrModeImmS4:
-    case kOperandTypeThumbAddrModeRR:
-    case kOperandTypeThumbAddrModeSP:
-    case kOperandTypeThumbAddrModePC:
-      numMCOperands = 2;
-      break;
-    case kOperandTypeARMSoReg:
-    case kOperandTypeLdStSOReg:
-    case kOperandTypeARMAddrMode2:
-    case kOperandTypeARMAddrMode3:
-    case kOperandTypeThumb2AddrModeSoReg:
-    case kOperandTypeThumbAddrModeRegS1:
-    case kOperandTypeThumbAddrModeRegS2:
-    case kOperandTypeThumbAddrModeRegS4:
-    case kOperandTypeARMAddrMode6Offset:
-      numMCOperands = 3;
-      break;
-    case kOperandTypeARMAddrMode6:
-      numMCOperands = 4;
-      break;
-    }
-  }
-    
-  mcOpIndex += numMCOperands;
-}
-
-EDOperand::~EDOperand() {
-}
-
-int EDOperand::evaluate(uint64_t &result,
-                        EDRegisterReaderCallback callback,
-                        void *arg) {
-  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
-  
-  Triple::ArchType arch = Disassembler.TgtTriple.getArch();
-  
-  switch (arch) {
-  default:
-    return -1;  
-  case Triple::x86:
-  case Triple::x86_64:    
-    switch (operandType) {
-    default:
-      return -1;
-    case kOperandTypeImmediate:
-      result = Inst.Inst->getOperand(MCOpIndex).getImm();
-      return 0;
-    case kOperandTypeRegister:
-    {
-      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
-      return callback(&result, reg, arg);
-    }
-    case kOperandTypeX86PCRelative:
-    {
-      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
-        
-      uint64_t ripVal;
-        
-      // TODO fix how we do this
-        
-      if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
-        return -1;
-        
-      result = ripVal + displacement;
-      return 0;
-    }
-    case kOperandTypeX86Memory:
-    case kOperandTypeX86EffectiveAddress:  
-    {
-      unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
-      uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
-      unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
-      int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
-    
-      uint64_t addr = 0;
-        
-      unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
-        
-      if (segmentReg != 0 && arch == Triple::x86_64) {
-        unsigned fsID = Disassembler.registerIDWithName("FS");
-        unsigned gsID = Disassembler.registerIDWithName("GS");
-        
-        if (segmentReg == fsID ||
-            segmentReg == gsID) {
-          uint64_t segmentBase;
-          if (!callback(&segmentBase, segmentReg, arg))
-            addr += segmentBase;        
-        }
-      }
-        
-      if (baseReg) {
-        uint64_t baseVal;
-        if (callback(&baseVal, baseReg, arg))
-          return -1;
-        addr += baseVal;
-      }
-        
-      if (indexReg) {
-        uint64_t indexVal;
-        if (callback(&indexVal, indexReg, arg))
-          return -1;
-        addr += (scaleAmount * indexVal);
-      }
-       
-      addr += displacement;
-       
-      result = addr;
-      return 0;
-    }
-    } // switch (operandType)
-  case Triple::arm:
-  case Triple::thumb:
-    switch (operandType) {
-    default:
-      return -1;
-    case kOperandTypeImmediate:
-      if (!Inst.Inst->getOperand(MCOpIndex).isImm())
-        return -1;
-            
-      result = Inst.Inst->getOperand(MCOpIndex).getImm();
-      return 0;
-    case kOperandTypeRegister:
-    {
-      if (!Inst.Inst->getOperand(MCOpIndex).isReg())
-        return -1;
-        
-      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
-      return callback(&result, reg, arg);
-    }
-    case kOperandTypeARMBranchTarget:
-    {
-      if (!Inst.Inst->getOperand(MCOpIndex).isImm())
-        return -1;
-        
-      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
-      
-      uint64_t pcVal;
-      
-      if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
-        return -1;
-      
-      result = pcVal + displacement;
-      return 0;
-    }
-    }
-  }
-}
-
-int EDOperand::isRegister() {
-  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
-}
-
-unsigned EDOperand::regVal() {
-  return Inst.Inst->getOperand(MCOpIndex).getReg(); 
-}
-
-int EDOperand::isImmediate() {
-  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
-}
-
-uint64_t EDOperand::immediateVal() {
-  return Inst.Inst->getOperand(MCOpIndex).getImm();
-}
-
-int EDOperand::isMemory() {
-  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
-    
-  switch (operandType) {
-  default:
-    return 0;
-  case kOperandTypeX86Memory:
-  case kOperandTypeX86PCRelative:
-  case kOperandTypeX86EffectiveAddress:
-  case kOperandTypeARMSoReg:
-  case kOperandTypeARMSoImm:
-  case kOperandTypeARMAddrMode2:
-  case kOperandTypeARMAddrMode2Offset:
-  case kOperandTypeARMAddrMode3:
-  case kOperandTypeARMAddrMode3Offset:
-  case kOperandTypeARMAddrMode4:
-  case kOperandTypeARMAddrMode5:
-  case kOperandTypeARMAddrMode6:
-  case kOperandTypeARMAddrMode7:
-  case kOperandTypeARMAddrModePC:
-  case kOperandTypeARMBranchTarget:
-  case kOperandTypeThumbAddrModeRegS1:
-  case kOperandTypeThumbAddrModeRegS2:
-  case kOperandTypeThumbAddrModeRegS4:
-  case kOperandTypeThumbAddrModeRR:
-  case kOperandTypeThumbAddrModeSP:
-  case kOperandTypeThumb2SoImm:
-  case kOperandTypeThumb2AddrModeImm8:
-  case kOperandTypeThumb2AddrModeImm8Offset:
-  case kOperandTypeThumb2AddrModeImm12:
-  case kOperandTypeThumb2AddrModeSoReg:
-  case kOperandTypeThumb2AddrModeImm8s4:
-  case kOperandTypeThumb2AddrModeReg:
-    return 1;
-  }
-}
-
-#ifdef __BLOCKS__
-namespace {
-  struct RegisterReaderWrapper {
-    EDOperand::EDRegisterBlock_t regBlock;
-  };
-}
-
-static int readerWrapperCallback(uint64_t *value, unsigned regID, void *arg) {
-  RegisterReaderWrapper *wrapper = (RegisterReaderWrapper *)arg;
-  return wrapper->regBlock(value, regID);
-}
-
-int EDOperand::evaluate(uint64_t &result, EDRegisterBlock_t regBlock) {
-  RegisterReaderWrapper wrapper;
-  wrapper.regBlock = regBlock;
-  return evaluate(result, readerWrapperCallback, (void*)&wrapper);
-}
-#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h
deleted file mode 100644
index 50260ec965a6..000000000000
--- a/lib/MC/MCDisassembler/EDOperand.h
+++ /dev/null
@@ -1,91 +0,0 @@
-//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's 
-// operand class.  The operand is responsible for allowing evaluation given a
-// particular register context.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EDOPERAND_H
-#define LLVM_EDOPERAND_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-struct EDDisassembler;
-struct EDInst;
-  
-typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, 
-                                        void* arg);
-
-
-/// EDOperand - Encapsulates a single operand, which can be evaluated by the
-///   client
-struct EDOperand {
-  /// The parent disassembler
-  const EDDisassembler &Disassembler;
-  /// The parent instruction
-  const EDInst &Inst;
-  
-  /// The index of the operand in the EDInst
-  unsigned int OpIndex;
-  /// The index of the first component of the operand in the MCInst
-  unsigned int MCOpIndex;
-  
-  /// Constructor - Initializes an EDOperand
-  ///
-  /// @arg disassembler - The disassembler responsible for the operand
-  /// @arg inst         - The instruction containing this operand
-  /// @arg opIndex      - The index of the operand in inst
-  /// @arg mcOpIndex    - The index of the operand in the original MCInst
-  EDOperand(const EDDisassembler &disassembler,
-            const EDInst &inst,
-            unsigned int opIndex,
-            unsigned int &mcOpIndex);
-  ~EDOperand();
-  
-  /// evaluate - Returns the numeric value of an operand to the extent possible,
-  ///   returning 0 on success or -1 if there was some problem (such as a 
-  ///   register not being readable)
-  ///
-  /// @arg result   - A reference whose target is filled in with the value of
-  ///                 the operand (the address if it is a memory operand)
-  /// @arg callback - A function to call to obtain register values
-  /// @arg arg      - An opaque argument to pass to callback
-  int evaluate(uint64_t &result,
-               EDRegisterReaderCallback callback,
-               void *arg);
-
-  /// isRegister - Returns 1 if the operand is a register or 0 otherwise
-  int isRegister();
-  /// regVal - Returns the register value.
-  unsigned regVal();
-  
-  /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
-  int isImmediate();
-  /// immediateVal - Returns the immediate value.
-  uint64_t immediateVal();
-  
-  /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
-  int isMemory();
-  
-#ifdef __BLOCKS__
-  typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
-
-  /// evaluate - Like evaluate for a callback, but uses a block instead
-  int evaluate(uint64_t &result,
-               EDRegisterBlock_t regBlock);
-#endif
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
deleted file mode 100644
index 5f6c9df4812a..000000000000
--- a/lib/MC/MCDisassembler/EDToken.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Enhanced Disassembler library's token class.  The
-// token is responsible for vending information about the token, such as its
-// type and logical value.
-//
-//===----------------------------------------------------------------------===//
-
-#include "EDToken.h"
-#include "EDDisassembler.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-EDToken::EDToken(StringRef str,
-                 enum tokenType type,
-                 uint64_t localType,
-                 EDDisassembler &disassembler) :
-  Disassembler(disassembler),
-  Str(str),
-  Type(type),
-  LocalType(localType),
-  OperandID(-1) {
-}
-
-EDToken::~EDToken() {
-}
-
-void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
-  Type = kTokenLiteral;
-  LiteralSign = sign;
-  LiteralAbsoluteValue = absoluteValue;
-}
-
-void EDToken::makeRegister(unsigned registerID) {
-  Type = kTokenRegister;
-  RegisterID = registerID;
-}
-
-void EDToken::setOperandID(int operandID) {
-  OperandID = operandID;
-}
-
-enum EDToken::tokenType EDToken::type() const {
-  return Type;
-}
-
-uint64_t EDToken::localType() const {
-  return LocalType;
-}
-
-StringRef EDToken::string() const {
-  return Str;
-}
-
-int EDToken::operandID() const {
-  return OperandID;
-}
-
-int EDToken::literalSign() const {
-  if (Type != kTokenLiteral)
-    return -1;
-  return (LiteralSign ? 1 : 0);
-}
-
-int EDToken::literalAbsoluteValue(uint64_t &value) const {
-  if (Type != kTokenLiteral)
-    return -1;
-  value = LiteralAbsoluteValue;
-  return 0;
-}
-
-int EDToken::registerID(unsigned &registerID) const {
-  if (Type != kTokenRegister)
-    return -1;
-  registerID = RegisterID;
-  return 0;
-}
-
-int EDToken::tokenize(std::vector<EDToken*> &tokens,
-                      std::string &str,
-                      const signed char *operandOrder,
-                      EDDisassembler &disassembler) {
-  SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
-  SmallVector<AsmToken, 10> asmTokens;
-  
-  if (disassembler.parseInst(parsedOperands, asmTokens, str))
-  {
-    for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
-      delete parsedOperands[i];
-    return -1;
-  }
-      
-  SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
-  unsigned int operandIndex;
-  SmallVectorImpl<AsmToken>::iterator tokenIterator;
-  
-  operandIterator = parsedOperands.begin();
-  operandIndex = 0;
-  
-  bool readOpcode = false;
-  
-  const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
-  
-  for (tokenIterator = asmTokens.begin();
-       tokenIterator != asmTokens.end();
-       ++tokenIterator) {
-    SMLoc tokenLoc = tokenIterator->getLoc();
-    
-    const char *tokenPointer = tokenLoc.getPointer();
-    
-    if (tokenPointer > wsPointer) {
-      unsigned long wsLength = tokenPointer - wsPointer;
-      
-      EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
-                                             EDToken::kTokenWhitespace,
-                                             0,
-                                             disassembler);
-      
-      tokens.push_back(whitespaceToken);
-    }
-    
-    wsPointer = tokenPointer + tokenIterator->getString().size();
-    
-    while (operandIterator != parsedOperands.end() &&
-           tokenLoc.getPointer() > 
-           (*operandIterator)->getEndLoc().getPointer()) {
-      ++operandIterator;
-      ++operandIndex;
-    }
-    
-    EDToken *token;
-    
-    switch (tokenIterator->getKind()) {
-    case AsmToken::Identifier:
-      if (!readOpcode) {
-        token = new EDToken(tokenIterator->getString(),
-                            EDToken::kTokenOpcode,
-                            (uint64_t)tokenIterator->getKind(),
-                            disassembler);
-        readOpcode = true;
-        break;
-      }
-      // any identifier that isn't an opcode is mere punctuation; so we fall
-      // through
-    default:
-      token = new EDToken(tokenIterator->getString(),
-                          EDToken::kTokenPunctuation,
-                          (uint64_t)tokenIterator->getKind(),
-                          disassembler);
-      break;
-    case AsmToken::Integer:
-    {
-      token = new EDToken(tokenIterator->getString(),
-                          EDToken::kTokenLiteral,
-                          (uint64_t)tokenIterator->getKind(),
-                          disassembler);
-        
-      int64_t intVal = tokenIterator->getIntVal();
-      
-      if (intVal < 0)  
-        token->makeLiteral(true, -intVal);
-      else
-        token->makeLiteral(false, intVal);
-      break;
-    }
-    case AsmToken::Register:
-    {
-      token = new EDToken(tokenIterator->getString(),
-                          EDToken::kTokenLiteral,
-                          (uint64_t)tokenIterator->getKind(),
-                          disassembler);
-      
-      token->makeRegister((unsigned)tokenIterator->getRegVal());
-      break;
-    }
-    }
-    
-    if (operandIterator != parsedOperands.end() &&
-       tokenLoc.getPointer() >= 
-       (*operandIterator)->getStartLoc().getPointer()) {
-      /// operandIndex == 0 means the operand is the instruction (which the
-      /// AsmParser treats as an operand but edis does not).  We therefore skip
-      /// operandIndex == 0 and subtract 1 from all other operand indices.
-      
-      if (operandIndex > 0)
-        token->setOperandID(operandOrder[operandIndex - 1]);
-    }
-    
-    tokens.push_back(token);
-  }
-  
-  // Free any parsed operands.
-  for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
-    delete parsedOperands[i];
-
-  return 0;
-}
-
-int EDToken::getString(const char*& buf) {
-  if (PermStr.length() == 0) {
-    PermStr = Str.str();
-  }
-  buf = PermStr.c_str();
-  return 0;
-}
diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h
deleted file mode 100644
index 384079b72eec..000000000000
--- a/lib/MC/MCDisassembler/EDToken.h
+++ /dev/null
@@ -1,139 +0,0 @@
-//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface for the Enhanced Disassembly library's token
-// class.  The token is responsible for vending information about the token, 
-// such as its type and logical value.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EDTOKEN_H
-#define LLVM_EDTOKEN_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include <string>
-#include <vector>
-
-namespace llvm {
-  
-struct EDDisassembler;
-
-/// EDToken - Encapsulates a single token, which can provide a string
-///   representation of itself or interpret itself in various ways, depending
-///   on the token type.
-struct EDToken {
-  enum tokenType {
-    kTokenWhitespace,
-    kTokenOpcode,
-    kTokenLiteral,
-    kTokenRegister,
-    kTokenPunctuation
-  };
-  
-  /// The parent disassembler
-  EDDisassembler &Disassembler;
-
-  /// The token's string representation
-  llvm::StringRef Str;
-  /// The token's string representation, but in a form suitable for export
-  std::string PermStr;
-  /// The type of the token, as exposed through the external API
-  enum tokenType Type;
-  /// The type of the token, as recorded by the syntax-specific tokenizer
-  uint64_t LocalType;
-  /// The operand corresponding to the token, or (unsigned int)-1 if not
-  ///   part of an operand.
-  int OperandID;
-  
-  /// The sign if the token is a literal (1 if negative, 0 otherwise)
-  bool LiteralSign;
-  /// The absolute value if the token is a literal
-  uint64_t LiteralAbsoluteValue;
-  /// The LLVM register ID if the token is a register name
-  unsigned RegisterID;
-  
-  /// Constructor - Initializes an EDToken with the information common to all
-  ///   tokens
-  ///
-  /// @arg str          - The string corresponding to the token
-  /// @arg type         - The token's type as exposed through the public API
-  /// @arg localType    - The token's type as recorded by the tokenizer
-  /// @arg disassembler - The disassembler responsible for the token
-  EDToken(llvm::StringRef str,
-          enum tokenType type,
-          uint64_t localType,
-          EDDisassembler &disassembler);
-  
-  /// makeLiteral - Adds the information specific to a literal
-  /// @arg sign           - The sign of the literal (1 if negative, 0 
-  ///                       otherwise)
-  ///
-  /// @arg absoluteValue  - The absolute value of the literal
-  void makeLiteral(bool sign, uint64_t absoluteValue);
-  /// makeRegister - Adds the information specific to a register
-  ///
-  /// @arg registerID - The LLVM register ID
-  void makeRegister(unsigned registerID);
-  
-  /// setOperandID - Links the token to a numbered operand
-  ///
-  /// @arg operandID  - The operand ID to link to
-  void setOperandID(int operandID);
-  
-  ~EDToken();
-  
-  /// type - Returns the public type of the token
-  enum tokenType type() const;
-  /// localType - Returns the tokenizer-specific type of the token
-  uint64_t localType() const;
-  /// string - Returns the string representation of the token
-  llvm::StringRef string() const;
-  /// operandID - Returns the operand ID of the token
-  int operandID() const;
-  
-  /// literalSign - Returns the sign of the token 
-  ///   (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
-  int literalSign() const;
-  /// literalAbsoluteValue - Retrieves the absolute value of the token, and
-  ///   returns -1 if the token is not a literal
-  /// @arg value  - A reference to a value that is filled in with the absolute
-  ///               value, if it is valid
-  int literalAbsoluteValue(uint64_t &value) const;
-  /// registerID - Retrieves the register ID of the token, and returns -1 if the
-  ///   token is not a register
-  ///
-  /// @arg registerID - A reference to a value that is filled in with the 
-  ///                   register ID, if it is valid
-  int registerID(unsigned &registerID) const;
-  
-  /// tokenize - Tokenizes a string using the platform- and syntax-specific
-  ///   tokenizer, and returns 0 on success (-1 on failure)
-  ///
-  /// @arg tokens       - A vector that will be filled in with pointers to
-  ///                     allocated tokens
-  /// @arg str          - The string, as outputted by the AsmPrinter
-  /// @arg operandOrder - The order of the operands from the operandFlags array
-  ///                     as they appear in str
-  /// @arg disassembler - The disassembler for the desired target and
-  //                      assembly syntax
-  static int tokenize(std::vector<EDToken*> &tokens,
-                      std::string &str,
-                      const signed char *operandOrder,
-                      EDDisassembler &disassembler);
-  
-  /// getString - Directs a character pointer to the string, returning 0 on
-  ///   success (-1 on failure)
-  /// @arg buf  - A reference to a pointer that is set to point to the string.
-  ///   The string is still owned by the token.
-  int getString(const char*& buf);
-};
-
-} // end namespace llvm
-#endif
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index f71b266ad632..0f8f0741bd7c 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -8,24 +8,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Config/config.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // Given a special op, return the address skip amount (in units of
@@ -101,7 +101,8 @@ void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
   }
 
   // Add the line entry to this section's entries.
-  LineSection->addLineEntry(LineEntry);
+  LineSection->addLineEntry(LineEntry,
+                            MCOS->getContext().getDwarfCompileUnitID());
 }
 
 //
@@ -131,7 +132,12 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
 //
 static inline void EmitDwarfLineTable(MCStreamer *MCOS,
                                       const MCSection *Section,
-                                      const MCLineSection *LineSection) {
+                                      const MCLineSection *LineSection,
+                                      unsigned CUID) {
+  // This LineSection does not contain any LineEntry for the given Compile Unit.
+  if (!LineSection->containEntriesForID(CUID))
+    return;
+
   unsigned FileNum = 1;
   unsigned LastLine = 1;
   unsigned Column = 0;
@@ -141,8 +147,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
 
   // Loop through each MCLineEntry and encode the dwarf line number table.
   for (MCLineSection::const_iterator
-         it = LineSection->getMCLineEntries()->begin(),
-         ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) {
+         it = LineSection->getMCLineEntries(CUID).begin(),
+         ie = LineSection->getMCLineEntries(CUID).end(); it != ie; ++it) {
 
     if (FileNum != it->getFileNum()) {
       FileNum = it->getFileNum();
@@ -215,9 +221,36 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   // Switch to the section where the table will be emitted into.
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
 
-  // Create a symbol at the beginning of this section.
-  MCSymbol *LineStartSym = context.CreateTempSymbol();
-  // Set the value of the symbol, as we are at the start of the section.
+  const DenseMap<unsigned, MCSymbol *> &MCLineTableSymbols =
+    MCOS->getContext().getMCLineTableSymbols();
+  // CUID and MCLineTableSymbols are set in DwarfDebug, when DwarfDebug does
+  // not exist, CUID will be 0 and MCLineTableSymbols will be empty.
+  // Handle Compile Unit 0, the line table start symbol is the section symbol.
+  const MCSymbol *LineStartSym = EmitCU(MCOS, 0);
+  // Handle the rest of the Compile Units.
+  for (unsigned Is = 1, Ie = MCLineTableSymbols.size(); Is < Ie; Is++)
+    EmitCU(MCOS, Is);
+
+  // Now delete the MCLineSections that were created in MCLineEntry::Make()
+  // and used to emit the line table.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  for (DenseMap<const MCSection *, MCLineSection *>::const_iterator it =
+       MCLineSections.begin(), ie = MCLineSections.end(); it != ie;
+       ++it)
+    delete it->second;
+
+  return LineStartSym;
+}
+
+const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) {
+  MCContext &context = MCOS->getContext();
+
+  // Create a symbol at the beginning of the line table.
+  MCSymbol *LineStartSym = MCOS->getContext().getMCLineTableSymbol(CUID);
+  if (!LineStartSym)
+    LineStartSym = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the start of the line table.
   MCOS->EmitLabel(LineStartSym);
 
   // Create a symbol for the end of the section (to be set when we get there).
@@ -239,8 +272,7 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   // total length, the 2 bytes for the version, and these 4 bytes for the
   // length of the prologue.
   MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
-                                        (4 + 2 + 4)),
-                  4, 0);
+                                           (4 + 2 + 4)), 4, 0);
 
   // Parameters of the state machine, are next.
   MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
@@ -266,20 +298,20 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   // Put out the directory and file tables.
 
   // First the directory table.
-  const std::vector<StringRef> &MCDwarfDirs =
-    context.getMCDwarfDirs();
+  const SmallVectorImpl<StringRef> &MCDwarfDirs =
+    context.getMCDwarfDirs(CUID);
   for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
-    MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
-    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+    MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName
+    MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
   }
   MCOS->EmitIntValue(0, 1); // Terminate the directory list
 
   // Second the file table.
-  const std::vector<MCDwarfFile *> &MCDwarfFiles =
-    MCOS->getContext().getMCDwarfFiles();
+  const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
+    MCOS->getContext().getMCDwarfFiles(CUID);
   for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
-    MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
-    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+    MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName
+    MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string
     // the Directory num
     MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
     MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
@@ -301,11 +333,7 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
        ++it) {
     const MCSection *Sec = *it;
     const MCLineSection *Line = MCLineSections.lookup(Sec);
-    EmitDwarfLineTable(MCOS, Sec, Line);
-
-    // Now delete the MCLineSections that were created in MCLineEntry::Make()
-    // and used to emit the line table.
-    delete Line;
+    EmitDwarfLineTable(MCOS, Sec, Line, CUID);
   }
 
   if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
@@ -342,7 +370,7 @@ void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
   SmallString<256> Tmp;
   raw_svector_ostream OS(Tmp);
   MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
-  MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0);
+  MCOS->EmitBytes(OS.str());
 }
 
 /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
@@ -484,7 +512,8 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
 // .debug_aranges section.  Which contains a header and a table of pairs of
 // PointerSize'ed values for the address and size of section(s) with line table
 // entries (just the default .text in our case) and a terminating pair of zeros.
-static void EmitGenDwarfAranges(MCStreamer *MCOS) {
+static void EmitGenDwarfAranges(MCStreamer *MCOS,
+                                const MCSymbol *InfoSectionSymbol) {
   MCContext &context = MCOS->getContext();
 
   // Create a symbol at the end of the section that we are creating the dwarf
@@ -523,8 +552,11 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS) {
   // The 2 byte version, which is 2.
   MCOS->EmitIntValue(2, 2);
   // The 4 byte offset to the compile unit in the .debug_info from the start
-  // of the .debug_info, it is at the start of that section so this is zero.
-  MCOS->EmitIntValue(0, 4);
+  // of the .debug_info.
+  if (InfoSectionSymbol)
+    MCOS->EmitSymbolValue(InfoSectionSymbol, 4);
+  else
+    MCOS->EmitIntValue(0, 4);
   // The 1 byte size of an address.
   MCOS->EmitIntValue(AddrSize, 1);
   // The 1 byte size of a segment descriptor, we use a value of zero.
@@ -611,33 +643,38 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
 
   // AT_name, the name of the source file.  Reconstruct from the first directory
   // and file table entries.
-  const std::vector<StringRef> &MCDwarfDirs =
+  const SmallVectorImpl<StringRef> &MCDwarfDirs =
     context.getMCDwarfDirs();
   if (MCDwarfDirs.size() > 0) {
-    MCOS->EmitBytes(MCDwarfDirs[0], 0);
-    MCOS->EmitBytes("/", 0);
+    MCOS->EmitBytes(MCDwarfDirs[0]);
+    MCOS->EmitBytes("/");
   }
-  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+  const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
     MCOS->getContext().getMCDwarfFiles();
-  MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0);
+  MCOS->EmitBytes(MCDwarfFiles[1]->getName());
   MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
 
   // AT_comp_dir, the working directory the assembly was done in.
-  llvm::sys::Path CWD = llvm::sys::Path::GetCurrentDirectory();
-  MCOS->EmitBytes(StringRef(CWD.c_str()), 0);
+  MCOS->EmitBytes(context.getCompilationDir());
   MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
 
   // AT_APPLE_flags, the command line arguments of the assembler tool.
   StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
   if (!DwarfDebugFlags.empty()){
-    MCOS->EmitBytes(DwarfDebugFlags, 0);
+    MCOS->EmitBytes(DwarfDebugFlags);
     MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
   }
 
   // AT_producer, the version of the assembler tool.
-  MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0);
-  MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0);
-  MCOS->EmitBytes(StringRef(")"), 0);
+  StringRef DwarfDebugProducer = context.getDwarfDebugProducer();
+  if (!DwarfDebugProducer.empty()){
+    MCOS->EmitBytes(DwarfDebugProducer);
+  }
+  else {
+    MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "));
+    MCOS->EmitBytes(StringRef(PACKAGE_VERSION));
+    MCOS->EmitBytes(StringRef(")"));
+  }
   MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
 
   // AT_language, a 4 byte value.  We use DW_LANG_Mips_Assembler as the dwarf2
@@ -658,7 +695,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
     MCOS->EmitULEB128IntValue(2);
 
     // AT_name, of the label without any leading underbar.
-    MCOS->EmitBytes(Entry->getName(), 0);
+    MCOS->EmitBytes(Entry->getName());
     MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
 
     // AT_decl_file, index into the file table.
@@ -705,15 +742,21 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
   // Create the dwarf sections in this order (.debug_line already created).
   MCContext &context = MCOS->getContext();
   const MCAsmInfo &AsmInfo = context.getAsmInfo();
+  bool CreateDwarfSectionSymbols =
+      AsmInfo.doesDwarfUseRelocationsAcrossSections();
+  if (!CreateDwarfSectionSymbols)
+    LineSectionSymbol = NULL;
+  MCSymbol *AbbrevSectionSymbol = NULL;
+  MCSymbol *InfoSectionSymbol = NULL;
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+  if (CreateDwarfSectionSymbols) {
+    InfoSectionSymbol = context.CreateTempSymbol();
+    MCOS->EmitLabel(InfoSectionSymbol);
+  }
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
-  MCSymbol *AbbrevSectionSymbol;
-  if (AsmInfo.doesDwarfUseRelocationsAcrossSections()) {
+  if (CreateDwarfSectionSymbols) {
     AbbrevSectionSymbol = context.CreateTempSymbol();
     MCOS->EmitLabel(AbbrevSectionSymbol);
-  } else {
-    AbbrevSectionSymbol = NULL;
-    LineSectionSymbol = NULL;
   }
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
 
@@ -722,7 +765,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
     return;
 
   // Output the data for .debug_aranges section.
-  EmitGenDwarfAranges(MCOS);
+  EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
 
   // Output the data for .debug_abbrev section.
   EmitGenDwarfAbbrev(MCOS);
@@ -777,7 +820,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
 static int getDataAlignmentFactor(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo &asmInfo = context.getAsmInfo();
-  int size = asmInfo.getPointerSize();
+  int size = asmInfo.getCalleeSaveStackSlotSize();
   if (asmInfo.isStackGrowthDirectionUp())
     return size;
   else
@@ -928,46 +971,86 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
   bool VerboseAsm = Streamer.isVerboseAsm();
 
   switch (Instr.getOperation()) {
-  case MCCFIInstruction::Move:
-  case MCCFIInstruction::RelMove: {
-    const MachineLocation &Dst = Instr.getDestination();
-    const MachineLocation &Src = Instr.getSource();
-    const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove;
-
-    // If advancing cfa.
-    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-      if (Src.getReg() == MachineLocation::VirtualFP) {
-        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset");
-        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
-      } else {
-        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa");
-        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
-        if (VerboseAsm) Streamer.AddComment(Twine("Reg ") +
-                                            Twine(Src.getReg()));
-        Streamer.EmitULEB128IntValue(Src.getReg());
-      }
+  case MCCFIInstruction::OpRegister: {
+    unsigned Reg1 = Instr.getRegister();
+    unsigned Reg2 = Instr.getRegister2();
+    if (VerboseAsm) {
+      Streamer.AddComment("DW_CFA_register");
+      Streamer.AddComment(Twine("Reg1 ") + Twine(Reg1));
+      Streamer.AddComment(Twine("Reg2 ") + Twine(Reg2));
+    }
+    Streamer.EmitIntValue(dwarf::DW_CFA_register, 1);
+    Streamer.EmitULEB128IntValue(Reg1);
+    Streamer.EmitULEB128IntValue(Reg2);
+    return;
+  }
+  case MCCFIInstruction::OpUndefined: {
+    unsigned Reg = Instr.getRegister();
+    if (VerboseAsm) {
+      Streamer.AddComment("DW_CFA_undefined");
+      Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+    }
+    Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
+    Streamer.EmitULEB128IntValue(Reg);
+    return;
+  }
+  case MCCFIInstruction::OpAdjustCfaOffset:
+  case MCCFIInstruction::OpDefCfaOffset: {
+    const bool IsRelative =
+      Instr.getOperation() == MCCFIInstruction::OpAdjustCfaOffset;
 
-      if (IsRelative)
-        CFAOffset += Src.getOffset();
-      else
-        CFAOffset = -Src.getOffset();
+    if (VerboseAsm)
+      Streamer.AddComment("DW_CFA_def_cfa_offset");
+    Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
 
-      if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
-      Streamer.EmitULEB128IntValue(CFAOffset);
-      return;
-    }
+    if (IsRelative)
+      CFAOffset += Instr.getOffset();
+    else
+      CFAOffset = -Instr.getOffset();
 
-    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
-      assert(Dst.isReg() && "Machine move not supported yet.");
-      if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register");
-      Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
-      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg()));
-      Streamer.EmitULEB128IntValue(Dst.getReg());
-      return;
-    }
+    if (VerboseAsm)
+      Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+    Streamer.EmitULEB128IntValue(CFAOffset);
 
-    unsigned Reg = Src.getReg();
-    int Offset = Dst.getOffset();
+    return;
+  }
+  case MCCFIInstruction::OpDefCfa: {
+    if (VerboseAsm)
+      Streamer.AddComment("DW_CFA_def_cfa");
+    Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+
+    if (VerboseAsm)
+      Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
+    Streamer.EmitULEB128IntValue(Instr.getRegister());
+
+    CFAOffset = -Instr.getOffset();
+
+    if (VerboseAsm)
+      Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+    Streamer.EmitULEB128IntValue(CFAOffset);
+
+    return;
+  }
+
+  case MCCFIInstruction::OpDefCfaRegister: {
+    if (VerboseAsm)
+      Streamer.AddComment("DW_CFA_def_cfa_register");
+    Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+
+    if (VerboseAsm)
+      Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
+    Streamer.EmitULEB128IntValue(Instr.getRegister());
+
+    return;
+  }
+
+  case MCCFIInstruction::OpOffset:
+  case MCCFIInstruction::OpRelOffset: {
+    const bool IsRelative =
+      Instr.getOperation() == MCCFIInstruction::OpRelOffset;
+
+    unsigned Reg = Instr.getRegister();
+    int Offset = Instr.getOffset();
     if (IsRelative)
       Offset -= CFAOffset;
     Offset = Offset / dataAlignmentFactor;
@@ -995,24 +1078,24 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
     }
     return;
   }
-  case MCCFIInstruction::RememberState:
+  case MCCFIInstruction::OpRememberState:
     if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
     return;
-  case MCCFIInstruction::RestoreState:
+  case MCCFIInstruction::OpRestoreState:
     if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
     return;
-  case MCCFIInstruction::SameValue: {
-    unsigned Reg = Instr.getDestination().getReg();
+  case MCCFIInstruction::OpSameValue: {
+    unsigned Reg = Instr.getRegister();
     if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
     Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
     if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
     Streamer.EmitULEB128IntValue(Reg);
     return;
   }
-  case MCCFIInstruction::Restore: {
-    unsigned Reg = Instr.getDestination().getReg();
+  case MCCFIInstruction::OpRestore: {
+    unsigned Reg = Instr.getRegister();
     if (VerboseAsm) {
       Streamer.AddComment("DW_CFA_restore");
       Streamer.AddComment(Twine("Reg ") + Twine(Reg));
@@ -1020,9 +1103,9 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
     Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
     return;
   }
-  case MCCFIInstruction::Escape:
+  case MCCFIInstruction::OpEscape:
     if (VerboseAsm) Streamer.AddComment("Escape bytes");
-    Streamer.EmitBytes(Instr.getValues(), 0);
+    Streamer.EmitBytes(Instr.getValues());
     return;
   }
   llvm_unreachable("Unhandled case in switch");
@@ -1180,7 +1263,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
     Augmentation += "R";
     if (IsSignalFrame)
       Augmentation += "S";
-    streamer.EmitBytes(Augmentation.str(), 0);
+    streamer.EmitBytes(Augmentation.str());
   }
   streamer.EmitIntValue(0, 1);
 
@@ -1244,8 +1327,21 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
       TranslateMachineLocation(MRI, Moves[i].getDestination());
     const MachineLocation &Src =
       TranslateMachineLocation(MRI, Moves[i].getSource());
-    MCCFIInstruction Inst(Label, Dst, Src);
-    Instructions.push_back(Inst);
+
+    if (Dst.isReg()) {
+      assert(Dst.getReg() == MachineLocation::VirtualFP);
+      assert(!Src.isReg());
+      MCCFIInstruction Inst =
+        MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset());
+      Instructions.push_back(Inst);
+    } else {
+      assert(Src.isReg());
+      unsigned Reg = Src.getReg();
+      int Offset = Dst.getOffset();
+      MCCFIInstruction Inst =
+        MCCFIInstruction::createOffset(Label, Reg, Offset);
+      Instructions.push_back(Inst);
+    }
   }
 
   EmitCFIInstructions(streamer, Instructions, NULL);
@@ -1431,7 +1527,7 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
   SmallString<256> Tmp;
   raw_svector_ostream OS(Tmp);
   MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
-  Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
+  Streamer.EmitBytes(OS.str());
 }
 
 void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index f9f98e0f730e..560cdbc6abae 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCELF.h"
+#include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCFixupKindInfo.h"
@@ -52,6 +52,8 @@ unsigned MCELF::GetType(const MCSymbolData &SD) {
   return Type;
 }
 
+// Visibility is stored in the first two bits of st_other
+// st_other values are stored in the second byte of get/setFlags
 void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
   assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
          Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
@@ -68,4 +70,17 @@ unsigned MCELF::GetVisibility(MCSymbolData &SD) {
   return Visibility;
 }
 
+// Other is stored in the last six bits of st_other
+// st_other values are stored in the second byte of get/setFlags
+void MCELF::setOther(MCSymbolData &SD, unsigned Other) {
+  uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_Other_Shift);
+  SD.setFlags(OtherFlags | (Other << ELF_Other_Shift));
+}
+
+unsigned MCELF::getOther(MCSymbolData &SD) {
+  unsigned Other =
+    (SD.getFlags() & (0x3f << ELF_Other_Shift)) >> ELF_Other_Shift;
+  return Other;
+}
+
 }
diff --git a/lib/MC/MCELF.h b/lib/MC/MCELF.h
deleted file mode 100644
index e08f1e65429a..000000000000
--- a/lib/MC/MCELF.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains some support functions used by the ELF Streamer and
-// ObjectWriter.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCELF_H
-#define LLVM_MC_MCELF_H
-
-#include "llvm/MC/MCExpr.h"
-
-namespace llvm {
-class MCSymbolData;
-
-class MCELF {
- public:
-  static void SetBinding(MCSymbolData &SD, unsigned Binding);
-  static unsigned GetBinding(const MCSymbolData &SD);
-  static void SetType(MCSymbolData &SD, unsigned Type);
-  static unsigned GetType(const MCSymbolData &SD);
-  static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
-  static unsigned GetVisibility(MCSymbolData &SD);
-};
-
-}
-
-#endif
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 74cd042a0f8c..4cac84d66609 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -24,11 +24,6 @@ MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
     IsN64(IsN64_){
 }
 
-/// Default e_flags = 0
-unsigned MCELFObjectTargetWriter::getEFlags() const {
-  return 0;
-}
-
 const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm,
                                                         const MCValue &Target,
                                                         const MCFragment &F,
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 14fbc1ec8391..7f5f1b63e5fe 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,23 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -35,117 +32,41 @@
 
 using namespace llvm;
 
-namespace {
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                raw_ostream &OS, MCCodeEmitter *Emitter,
-                MCAssembler *Assembler)
-    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
-
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
 
-  virtual void EmitCOFFSymbolType(int Type) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                     unsigned ByteAlignment);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            uint64_t Size = 0, unsigned ByteAlignment = 0) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             unsigned AddrSpace);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void EmitTCEntry(const MCSymbol &S);
-
-  virtual void FinishImpl();
+inline void MCELFStreamer::SetSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind) {
+  SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+}
 
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
+inline void MCELFStreamer::SetSectionData() {
+  SetSection(".data",
+             ELF::SHT_PROGBITS,
+             ELF::SHF_WRITE | ELF::SHF_ALLOC,
+             SectionKind::getDataRel());
+  EmitCodeAlignment(4, 0);
+}
 
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
+inline void MCELFStreamer::SetSectionText() {
+  SetSection(".text",
+             ELF::SHT_PROGBITS,
+             ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+             SectionKind::getText());
+  EmitCodeAlignment(4, 0);
+}
 
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
+inline void MCELFStreamer::SetSectionBss() {
+  SetSection(".bss",
+             ELF::SHT_NOBITS,
+             ELF::SHF_WRITE | ELF::SHF_ALLOC,
+             SectionKind::getBSS());
+  EmitCodeAlignment(4, 0);
+}
 
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
+MCELFStreamer::~MCELFStreamer() {
+}
 
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
+void MCELFStreamer::InitToTextSection() {
+  SetSectionText();
 }
 
 void MCELFStreamer::InitSections() {
@@ -169,6 +90,10 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
     MCELF::SetType(SD, ELF::STT_TLS);
 }
 
+void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  EmitLabel(Symbol);
+}
+
 void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   switch (Flag) {
   case MCAF_SyntaxUnified: return; // no-op here.
@@ -183,24 +108,10 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("invalid assembler flag!");
 }
 
-void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
-  // FIXME: Anything needed here to flag the function as thumb?
-
-  getAssembler().setIsThumbFunc(Func);
-
-  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
-  SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
-}
-
-void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
-  // MCObjectStreamer.
-  // FIXME: Lift context changes into super class.
-  getAssembler().getOrCreateSymbolData(*Symbol);
-  Symbol->setVariableValue(AddValueSymbols(Value));
-}
-
 void MCELFStreamer::ChangeSection(const MCSection *Section) {
+  MCSectionData *CurSection = getCurrentSectionData();
+  if (CurSection && CurSection->isBundleLocked())
+    report_fatal_error("Unterminated .bundle_lock when changing a section");
   const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
   if (Grp)
     getAssembler().getOrCreateSymbolData(*Grp);
@@ -341,6 +252,11 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   SD.setSize(MCConstantExpr::Create(Size, getContext()));
 }
 
+void MCELFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setSize(Value);
+}
+
 void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                           unsigned ByteAlignment) {
   // FIXME: Should this be caught and done earlier?
@@ -353,10 +269,22 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 
 void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   unsigned AddrSpace) {
+  if (getCurrentSectionData()->isBundleLocked())
+    report_fatal_error("Emitting values inside a locked bundle is forbidden");
   fixSymbolsInTLSFixups(Value);
   MCObjectStreamer::EmitValueImpl(Value, Size, AddrSpace);
 }
 
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                         int64_t Value,
+                                         unsigned ValueSize,
+                                         unsigned MaxBytesToEmit) {
+  if (getCurrentSectionData()->isBundleLocked())
+    report_fatal_error("Emitting values inside a locked bundle is forbidden");
+  MCObjectStreamer::EmitValueToAlignment(ByteAlignment, Value,
+                                         ValueSize, MaxBytesToEmit);
+}
+
 
 // Add a symbol for the file name of this module. This is the second
 // entry in the module's symbol table (the first being the null symbol).
@@ -372,7 +300,9 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) {
 
 void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
   switch (expr->getKind()) {
-  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Target:
+    cast<MCTargetExpr>(expr)->fixELFSymbolsInTLSFixups(getAssembler());
+    break;
   case MCExpr::Constant:
     break;
 
@@ -404,6 +334,19 @@ void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
     case MCSymbolRefExpr::VK_Mips_GOTTPREL:
     case MCSymbolRefExpr::VK_Mips_TPREL_HI:
     case MCSymbolRefExpr::VK_Mips_TPREL_LO:
+    case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
+    case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+    case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+    case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+    case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+    case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+    case MCSymbolRefExpr::VK_PPC_TLS:
+    case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+    case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+    case MCSymbolRefExpr::VK_PPC_TLSGD:
+    case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+    case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+    case MCSymbolRefExpr::VK_PPC_TLSLD:
       break;
     }
     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
@@ -419,32 +362,116 @@ void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
 
 void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
   this->MCObjectStreamer::EmitInstToFragment(Inst);
-  MCInstFragment &F = *cast<MCInstFragment>(getCurrentFragment());
+  MCRelaxableFragment &F = *cast<MCRelaxableFragment>(getCurrentFragment());
 
   for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
     fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
 }
 
 void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
-  MCDataFragment *DF = getOrCreateDataFragment();
-
+  MCAssembler &Assembler = getAssembler();
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
-  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
   VecOS.flush();
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
     fixSymbolsInTLSFixups(Fixups[i].getValue());
 
+  // There are several possibilities here:
+  //
+  // If bundling is disabled, append the encoded instruction to the current data
+  // fragment (or create a new such fragment if the current fragment is not a
+  // data fragment).
+  //
+  // If bundling is enabled:
+  // - If we're not in a bundle-locked group, emit the instruction into a
+  //   fragment of its own. If there are no fixups registered for the
+  //   instruction, emit a MCCompactEncodedInstFragment. Otherwise, emit a
+  //   MCDataFragment.
+  // - If we're in a bundle-locked group, append the instruction to the current
+  //   data fragment because we want all the instructions in a group to get into
+  //   the same fragment. Be careful not to do that for the first instruction in
+  //   the group, though.
+  MCDataFragment *DF;
+
+  if (Assembler.isBundlingEnabled()) {
+    MCSectionData *SD = getCurrentSectionData();
+    if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
+      // If we are bundle-locked, we re-use the current fragment.
+      // The bundle-locking directive ensures this is a new data fragment.
+      DF = cast<MCDataFragment>(getCurrentFragment());
+    else if (!SD->isBundleLocked() && Fixups.size() == 0) {
+      // Optimize memory usage by emitting the instruction to a
+      // MCCompactEncodedInstFragment when not in a bundle-locked group and
+      // there are no fixups registered.
+      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD);
+      CEIF->getContents().append(Code.begin(), Code.end());
+      return;
+    } else {
+      DF = new MCDataFragment(SD);
+      if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
+        // If this is a new fragment created for a bundle-locked group, and the
+        // group was marked as "align_to_end", set a flag in the fragment.
+        DF->setAlignToBundleEnd(true);
+      }
+    }
+
+    // We're now emitting an instruction in a bundle group, so this flag has
+    // to be turned off.
+    SD->setBundleGroupBeforeFirstInst(false);
+  } else {
+    DF = getOrCreateDataFragment();
+  }
+
   // Add the fixups and data.
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
-    DF->addFixup(Fixups[i]);
+    DF->getFixups().push_back(Fixups[i]);
   }
+  DF->setHasInstructions(true);
   DF->getContents().append(Code.begin(), Code.end());
 }
 
+void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
+  assert(AlignPow2 <= 30 && "Invalid bundle alignment");
+  MCAssembler &Assembler = getAssembler();
+  if (Assembler.getBundleAlignSize() == 0 && AlignPow2 > 0)
+    Assembler.setBundleAlignSize(1 << AlignPow2);
+  else
+    report_fatal_error(".bundle_align_mode should be only set once per file");
+}
+
+void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
+  MCSectionData *SD = getCurrentSectionData();
+
+  // Sanity checks
+  //
+  if (!getAssembler().isBundlingEnabled())
+    report_fatal_error(".bundle_lock forbidden when bundling is disabled");
+  else if (SD->isBundleLocked())
+    report_fatal_error("Nesting of .bundle_lock is forbidden");
+
+  SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
+                                      MCSectionData::BundleLocked);
+  SD->setBundleGroupBeforeFirstInst(true);
+}
+
+void MCELFStreamer::EmitBundleUnlock() {
+  MCSectionData *SD = getCurrentSectionData();
+
+  // Sanity checks
+  if (!getAssembler().isBundlingEnabled())
+    report_fatal_error(".bundle_unlock forbidden when bundling is disabled");
+  else if (!SD->isBundleLocked())
+    report_fatal_error(".bundle_unlock without matching lock");
+  else if (SD->isBundleGroupBeforeFirstInst())
+    report_fatal_error("Empty bundle-locked group is forbidden");
+
+  SD->setBundleLockState(MCSectionData::NotBundleLocked);
+}
+
 void MCELFStreamer::FinishImpl() {
   EmitFrames(true);
 
@@ -470,11 +497,9 @@ void MCELFStreamer::FinishImpl() {
 
   this->MCObjectStreamer::FinishImpl();
 }
-
-void MCELFStreamer::EmitTCEntry(const MCSymbol &S)
-{
+void MCELFStreamer::EmitTCEntry(const MCSymbol &S) {
   // Creates a R_PPC64_TOC relocation
-  MCObjectStreamer::EmitSymbolValue(&S, 8, 0);
+  MCObjectStreamer::EmitSymbolValue(&S, 8);
 }
 
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
@@ -487,3 +512,41 @@ MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
     S->getAssembler().setNoExecStack(true);
   return S;
 }
+
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  llvm_unreachable("Generic ELF doesn't support this directive");
+}
+
+MCSymbolData &MCELFStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
+  return getAssembler().getOrCreateSymbolData(*Symbol);
+}
+
+void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitCOFFSymbolType(int Type) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EndCOFFSymbolDef() {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                 uint64_t Size, unsigned ByteAlignment) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                   uint64_t Size, unsigned ByteAlignment) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index de2f375aab91..cd4d144575b1 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -54,14 +54,16 @@ void MCExpr::print(raw_ostream &OS) const {
     else
       OS << Sym;
 
-    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_NONE ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 ||
-        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2)
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2 ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_PREL31)
       OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
     else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
              SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
@@ -192,7 +194,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_TPOFF: return "TPOFF";
   case VK_DTPOFF: return "DTPOFF";
   case VK_TLVP: return "TLVP";
-  case VK_SECREL: return "SECREL";
+  case VK_SECREL: return "SECREL32";
+  case VK_ARM_NONE: return "(NONE)";
   case VK_ARM_PLT: return "(PLT)";
   case VK_ARM_GOT: return "(GOT)";
   case VK_ARM_GOTOFF: return "(GOTOFF)";
@@ -201,6 +204,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_ARM_TLSGD: return "(tlsgd)";
   case VK_ARM_TARGET1: return "(target1)";
   case VK_ARM_TARGET2: return "(target2)";
+  case VK_ARM_PREL31: return "(prel31)";
   case VK_PPC_TOC: return "tocbase";
   case VK_PPC_TOC_ENTRY: return "toc";
   case VK_PPC_DARWIN_HA16: return "ha16";
@@ -209,6 +213,19 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PPC_GAS_LO16: return "l";
   case VK_PPC_TPREL16_HA: return "tprel@ha";
   case VK_PPC_TPREL16_LO: return "tprel@l";
+  case VK_PPC_DTPREL16_HA: return "dtprel@ha";
+  case VK_PPC_DTPREL16_LO: return "dtprel@l";
+  case VK_PPC_TOC16_HA: return "toc@ha";
+  case VK_PPC_TOC16_LO: return "toc@l";
+  case VK_PPC_GOT_TPREL16_HA: return "got@tprel@ha";
+  case VK_PPC_GOT_TPREL16_LO: return "got@tprel@l";
+  case VK_PPC_TLS: return "tls";
+  case VK_PPC_GOT_TLSGD16_HA: return "got@tlsgd@ha";
+  case VK_PPC_GOT_TLSGD16_LO: return "got@tlsgd@l";
+  case VK_PPC_GOT_TLSLD16_HA: return "got@tlsld@ha";
+  case VK_PPC_GOT_TLSLD16_LO: return "got@tlsld@l";
+  case VK_PPC_TLSGD: return "tlsgd";
+  case VK_PPC_TLSLD: return "tlsld";
   case VK_Mips_GPREL: return "GPREL";
   case VK_Mips_GOT_CALL: return "GOT_CALL";
   case VK_Mips_GOT16: return "GOT16";
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 41d90abeeb63..73f30ffb52a0 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,10 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -50,3 +51,11 @@ StringRef MCInstPrinter::markup(StringRef a, StringRef b) const {
   else
     return b;
 }
+
+/// Utility function to print immediates in decimal or hex.
+format_object1<int64_t> MCInstPrinter::formatImm(const int64_t Value) const {
+  if (getPrintImmHex())
+    return format("0x%" PRIx64, Value);
+  else
+    return format("%" PRId64, Value);
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 04b0e86aed61..7d08d0ecd5e0 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -7,19 +7,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCStreamer.h"
-
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -35,21 +34,23 @@ private:
   void EmitDataRegion(DataRegionData::KindTy Kind);
   void EmitDataRegionEnd();
 public:
-  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, MAB, OS, Emitter) {}
+  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+                  MCCodeEmitter *Emitter)
+      : MCObjectStreamer(SK_MachOStreamer, Context, MAB, OS, Emitter) {}
 
   /// @name MCStreamer Interface
   /// @{
 
   virtual void InitSections();
+  virtual void InitToTextSection();
   virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitDebugLabel(MCSymbol *Symbol);
   virtual void EmitEHSymAttributes(const MCSymbol *Symbol,
                                    MCSymbol *EHSymbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitLinkerOptions(ArrayRef<std::string> Options);
   virtual void EmitDataRegion(MCDataRegionType Kind);
   virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -86,15 +87,23 @@ public:
   virtual void FinishImpl();
 
   /// @}
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_MachOStreamer;
+  }
 };
 
 } // end anonymous namespace.
 
 void MCMachOStreamer::InitSections() {
-  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
-                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                    0, SectionKind::getText()));
+  InitToTextSection();
+}
 
+void MCMachOStreamer::InitToTextSection() {
+  SwitchSection(getContext().getMachOSection(
+                                    "__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0,
+                                    SectionKind::getText()));
 }
 
 void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
@@ -132,6 +141,9 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
 }
 
+void MCMachOStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  EmitLabel(Symbol);
+}
 void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
   if (!getAssembler().getBackend().hasDataInCodeSupport())
     return;
@@ -171,6 +183,10 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   }
 }
 
+void MCMachOStreamer::EmitLinkerOptions(ArrayRef<std::string> Options) {
+  getAssembler().getLinkerOptions().push_back(Options);
+}
+
 void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) {
   switch (Kind) {
   case MCDR_DataRegion:
@@ -201,14 +217,6 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
   SD.setFlags(SD.getFlags() | SF_ThumbFunc);
 }
 
-void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
-  // MCObjectStreamer.
-  // FIXME: Lift context changes into super class.
-  getAssembler().getOrCreateSymbolData(*Symbol);
-  Symbol->setVariableValue(AddValueSymbols(Value));
-}
-
 void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
   // Indirect symbols are handled differently, to match how 'as' handles
@@ -378,7 +386,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
   // Add the fixups and data.
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
-    DF->addFixup(Fixups[i]);
+    DF->getFixups().push_back(Fixups[i]);
   }
   DF->getContents().append(Code.begin(), Code.end());
 }
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 4c17d9155105..c872b2203f87 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCStreamer.h"
-
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -20,11 +19,14 @@ namespace {
 
   class MCNullStreamer : public MCStreamer {
   public:
-    MCNullStreamer(MCContext &Context) : MCStreamer(Context) {}
+    MCNullStreamer(MCContext &Context) : MCStreamer(SK_NullStreamer, Context) {}
 
     /// @name MCStreamer Interface
     /// @{
 
+    virtual void InitToTextSection() {
+    }
+
     virtual void InitSections() {
     }
 
@@ -36,7 +38,9 @@ namespace {
       assert(getCurrentSection() && "Cannot emit before setting section!");
       Symbol->setSection(*getCurrentSection());
     }
-
+    virtual void EmitDebugLabel(MCSymbol *Symbol) {
+      EmitLabel(Symbol);
+    }
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
     virtual void EmitThumbFunc(MCSymbol *Func) {}
 
@@ -85,7 +89,7 @@ namespace {
 
     virtual void EmitFileDirective(StringRef Filename) {}
     virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                        StringRef Filename) {
+                                        StringRef Filename, unsigned CUID = 0) {
       return false;
     }
     virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -94,6 +98,10 @@ namespace {
                                        StringRef FileName) {}
     virtual void EmitInstruction(const MCInst &Inst) {}
 
+    virtual void EmitBundleAlignMode(unsigned AlignPow2) {}
+    virtual void EmitBundleLock(bool AlignToEnd) {}
+    virtual void EmitBundleUnlock() {}
+
     virtual void FinishImpl() {}
 
     virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
@@ -101,6 +109,11 @@ namespace {
     }
 
     /// @}
+
+    static bool classof(const MCStreamer *S) {
+      return S->getKind() == SK_NullStreamer;
+    }
+
   };
 
 }
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 2e1604d6b506..d19e79ac64f9 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
 void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
@@ -186,6 +186,10 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
     Ctx->getMachOSection("__DWARF", "__debug_frame",
                          MCSectionMachO::S_ATTR_DEBUG,
                          SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    Ctx->getMachOSection("__DWARF", "__debug_pubnames",
+                         MCSectionMachO::S_ATTR_DEBUG,
+                         SectionKind::getMetadata());
   DwarfPubTypesSection =
     Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
                          MCSectionMachO::S_ATTR_DEBUG,
@@ -219,6 +223,15 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
 }
 
 void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
+  if (T.getArch() == Triple::mips ||
+      T.getArch() == Triple::mipsel)
+    FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
+  else if (T.getArch() == Triple::mips64 ||
+           T.getArch() == Triple::mips64el)
+    FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
+  else
+    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
   if (T.getArch() == Triple::x86) {
     PersonalityEncoding = (RelocM == Reloc::PIC_)
      ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
@@ -226,15 +239,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
     LSDAEncoding = (RelocM == Reloc::PIC_)
       ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
       : dwarf::DW_EH_PE_absptr;
-    FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_)
+    FDEEncoding = (RelocM == Reloc::PIC_)
       ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
       : dwarf::DW_EH_PE_absptr;
     TTypeEncoding = (RelocM == Reloc::PIC_)
      ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
      : dwarf::DW_EH_PE_absptr;
   } else if (T.getArch() == Triple::x86_64) {
-    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-
     if (RelocM == Reloc::PIC_) {
       PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
         ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
@@ -256,6 +267,30 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
       TTypeEncoding = (CMModel == CodeModel::Small)
         ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
     }
+  }  else if (T.getArch() ==  Triple::aarch64) {
+    // The small model guarantees static code/data size < 4GB, but not where it
+    // will be in memory. Most of these could end up >2GB away so even a signed
+    // pc-relative 32-bit address is insufficient, theoretically.
+    if (RelocM == Reloc::PIC_) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+      FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata8;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      FDEEncoding = dwarf::DW_EH_PE_udata4;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
+  } else if (T.getArch() == Triple::ppc64) {
+    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+      dwarf::DW_EH_PE_udata8;
+    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+    FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
+    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+      dwarf::DW_EH_PE_udata8;
   }
 
   // Solaris requires different flags for .eh_frame to seemingly every other
@@ -373,6 +408,9 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   DwarfFrameSection =
     Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
   DwarfPubTypesSection =
     Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
@@ -392,6 +430,10 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   DwarfMacroInfoSection =
     Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
+
+  // DWARF5 Experimental Debug Info
+
+  // Accelerator Tables
   DwarfAccelNamesSection =
     Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
@@ -404,6 +446,30 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   DwarfAccelTypesSection =
     Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
+
+  // Fission Sections
+  DwarfInfoDWOSection =
+    Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfAbbrevDWOSection =
+    Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfStrDWOSection =
+    Ctx->getELFSection(".debug_str.dwo", ELF::SHT_PROGBITS,
+                       ELF::SHF_MERGE | ELF::SHF_STRINGS,
+                       SectionKind::getMergeable1ByteCString());
+  DwarfLineDWOSection =
+    Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfLocDWOSection =
+    Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfStrOffDWOSection =
+    Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfAddrSection =
+    Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
 }
 
 
@@ -488,6 +554,11 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
                         COFF::IMAGE_SCN_MEM_DISCARDABLE |
                         COFF::IMAGE_SCN_MEM_READ,
                         SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    Ctx->getCOFFSection(".debug_pubnames",
+                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                        COFF::IMAGE_SCN_MEM_READ,
+                        SectionKind::getMetadata());
   DwarfPubTypesSection =
     Ctx->getCOFFSection(".debug_pubtypes",
                         COFF::IMAGE_SCN_MEM_DISCARDABLE |
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 774632306d94..0d2ce83a8a10 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -20,22 +20,19 @@
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
-  : MCStreamer(Context),
-    Assembler(new MCAssembler(Context, TAB,
-                              *Emitter_, *TAB.createObjectWriter(OS),
-                              OS)),
-    CurSectionData(0)
-{
-}
-
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_,
+MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context,
+                                   MCAsmBackend &TAB, raw_ostream &OS,
+                                   MCCodeEmitter *Emitter_)
+    : MCStreamer(Kind, Context),
+      Assembler(new MCAssembler(Context, TAB, *Emitter_,
+                                *TAB.createObjectWriter(OS), OS)),
+      CurSectionData(0) {}
+
+MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context,
+                                   MCAsmBackend &TAB, raw_ostream &OS,
+                                   MCCodeEmitter *Emitter_,
                                    MCAssembler *_Assembler)
-  : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0)
-{
-}
+    : MCStreamer(Kind, Context), Assembler(_Assembler), CurSectionData(0) {}
 
 MCObjectStreamer::~MCObjectStreamer() {
   delete &Assembler->getBackend();
@@ -44,6 +41,13 @@ MCObjectStreamer::~MCObjectStreamer() {
   delete Assembler;
 }
 
+void MCObjectStreamer::reset() {
+  if (Assembler)
+    Assembler->reset();
+  CurSectionData = 0;
+  MCStreamer::reset();
+}
+
 MCFragment *MCObjectStreamer::getCurrentFragment() const {
   assert(getCurrentSectionData() && "No current section!");
 
@@ -55,7 +59,9 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const {
 
 MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
-  if (!F)
+  // When bundling is enabled, we don't want to add data to a fragment that
+  // already has instructions (see MCELFStreamer::EmitInstToData for details)
+  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions()))
     F = new MCDataFragment(getCurrentSectionData());
   return F;
 }
@@ -99,9 +105,9 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
     EmitIntValue(AbsValue, Size, AddrSpace);
     return;
   }
-  DF->addFixup(MCFixup::Create(DF->getContents().size(),
-                               Value,
-                               MCFixup::getKindForSize(Size, false)));
+  DF->getFixups().push_back(
+      MCFixup::Create(DF->getContents().size(), Value,
+                      MCFixup::getKindForSize(Size, false)));
   DF->getContents().resize(DF->getContents().size() + Size, 0);
 }
 
@@ -128,6 +134,10 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   SD.setOffset(F->getContents().size());
 }
 
+void MCObjectStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  EmitLabel(Symbol);
+}
+
 void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
   int64_t IntValue;
   if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
@@ -159,27 +169,38 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section) {
   CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
 }
 
+void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
 void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
   // Scan for values.
   for (unsigned i = Inst.getNumOperands(); i--; )
     if (Inst.getOperand(i).isExpr())
       AddValueSymbols(Inst.getOperand(i).getExpr());
 
-  getCurrentSectionData()->setHasInstructions(true);
+  MCSectionData *SD = getCurrentSectionData();
+  SD->setHasInstructions(true);
 
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
   MCLineEntry::Make(this, getCurrentSection());
 
   // If this instruction doesn't need relaxation, just emit it as data.
-  if (!getAssembler().getBackend().mayNeedRelaxation(Inst)) {
+  MCAssembler &Assembler = getAssembler();
+  if (!Assembler.getBackend().mayNeedRelaxation(Inst)) {
     EmitInstToData(Inst);
     return;
   }
 
-  // Otherwise, if we are relaxing everything, relax the instruction as much as
-  // possible and emit it as data.
-  if (getAssembler().getRelaxAll()) {
+  // Otherwise, relax and emit it as data if either:
+  // - The RelaxAll flag was passed
+  // - Bundling is enabled and this instruction is inside a bundle-locked
+  //   group. We want to emit all such instructions into the same data
+  //   fragment.
+  if (Assembler.getRelaxAll() ||
+      (Assembler.isBundlingEnabled() && SD->isBundleLocked())) {
     MCInst Relaxed;
     getAssembler().getBackend().relaxInstruction(Inst, Relaxed);
     while (getAssembler().getBackend().mayNeedRelaxation(Relaxed))
@@ -193,13 +214,33 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 }
 
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+  // Always create a new, separate fragment here, because its size can change
+  // during relaxation.
+  MCRelaxableFragment *IF =
+    new MCRelaxableFragment(Inst, getCurrentSectionData());
 
   SmallString<128> Code;
   raw_svector_ostream VecOS(Code);
   getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
   VecOS.flush();
-  IF->getCode().append(Code.begin(), Code.end());
+  IF->getContents().append(Code.begin(), Code.end());
+}
+
+#ifndef NDEBUG
+static const char *BundlingNotImplementedMsg =
+  "Aligned bundling is not implemented for this object format";
+#endif
+
+void MCObjectStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
+  llvm_unreachable(BundlingNotImplementedMsg);
+}
+
+void MCObjectStreamer::EmitBundleLock(bool AlignToEnd) {
+  llvm_unreachable(BundlingNotImplementedMsg);
+}
+
+void MCObjectStreamer::EmitBundleUnlock() {
+  llvm_unreachable(BundlingNotImplementedMsg);
 }
 
 void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
@@ -275,7 +316,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
 
   if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
     return true;
-  EmitFill(Res, Value, 0);
+  EmitFill(Res, Value);
   return false;
 }
 
@@ -283,7 +324,8 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
 void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
 
-  DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, FK_GPRel_4));
+  DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), 
+                                            Value, FK_GPRel_4));
   DF->getContents().resize(DF->getContents().size() + 4, 0);
 }
 
@@ -291,7 +333,8 @@ void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
 void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
 
-  DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, FK_GPRel_4));
+  DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), 
+                                            Value, FK_GPRel_4));
   DF->getContents().resize(DF->getContents().size() + 8, 0);
 }
 
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index f93f685bf502..c1c594a74697 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
 #include <cctype>
 #include <cerrno>
 #include <cstdio>
@@ -156,10 +156,36 @@ AsmToken AsmLexer::LexLineComment() {
 }
 
 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
-  if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
-    CurPtr += 2;
-  if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
-    CurPtr += 3;
+  // Skip ULL, UL, U, L and LL suffices.
+  if (CurPtr[0] == 'U')
+    ++CurPtr;
+  if (CurPtr[0] == 'L')
+    ++CurPtr;
+  if (CurPtr[0] == 'L')
+    ++CurPtr;
+}
+
+// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
+// integer as a hexadecimal, possibly with leading zeroes.
+static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
+  const char *FirstHex = 0;
+  const char *LookAhead = CurPtr;
+  while (1) {
+    if (isdigit(*LookAhead)) {
+      ++LookAhead;
+    } else if (isxdigit(*LookAhead)) {
+      if (!FirstHex)
+        FirstHex = LookAhead;
+      ++LookAhead;
+    } else {
+      break;
+    }
+  }
+  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
+  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
+  if (isHex)
+    return 16;
+  return DefaultRadix;
 }
 
 /// LexDigit: First character is [0-9].
@@ -167,16 +193,15 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 ///   Forward/Backward Label: [0-9][fb]
 ///   Binary integer: 0b[01]+
 ///   Octal integer: 0[0-7]+
-///   Hex integer: 0x[0-9a-fA-F]+
+///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
 ///   Decimal integer: [1-9][0-9]*
 AsmToken AsmLexer::LexDigit() {
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
-    while (isdigit(*CurPtr))
-      ++CurPtr;
-
+    unsigned Radix = doLookAhead(CurPtr, 10);
+    bool isHex = Radix == 16;
     // Check for floating point literals.
-    if (*CurPtr == '.' || *CurPtr == 'e') {
+    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
       ++CurPtr;
       return LexFloatLiteral();
     }
@@ -184,17 +209,22 @@ AsmToken AsmLexer::LexDigit() {
     StringRef Result(TokStart, CurPtr - TokStart);
 
     long long Value;
-    if (Result.getAsInteger(10, Value)) {
+    if (Result.getAsInteger(Radix, Value)) {
       // Allow positive values that are too large to fit into a signed 64-bit
       // integer, but that do fit in an unsigned one, we just convert them over.
       unsigned long long UValue;
-      if (Result.getAsInteger(10, UValue))
-        return ReturnError(TokStart, "invalid decimal number");
+      if (Result.getAsInteger(Radix, UValue))
+        return ReturnError(TokStart, !isHex ? "invalid decimal number" :
+                           "invalid hexdecimal number");
       Value = (long long)UValue;
     }
 
-    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
-    // suffixes on integer literals.
+    // Consume the [bB][hH].
+    if (Radix == 2 || Radix == 16)
+      ++CurPtr;
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores type
+    // suffices on integer literals.
     SkipIgnoredIntegerSuffix(CurPtr);
 
     return AsmToken(AsmToken::Integer, Result, Value);
@@ -243,6 +273,10 @@ AsmToken AsmLexer::LexDigit() {
     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
       return ReturnError(TokStart, "invalid hexadecimal number");
 
+    // Consume the optional [hH].
+    if (*CurPtr == 'h' || *CurPtr == 'H')
+      ++CurPtr;
+
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
     // suffixes on integer literals.
     SkipIgnoredIntegerSuffix(CurPtr);
@@ -251,14 +285,18 @@ AsmToken AsmLexer::LexDigit() {
                     (int64_t)Result);
   }
 
-  // Must be an octal number, it starts with 0.
-  while (*CurPtr >= '0' && *CurPtr <= '9')
-    ++CurPtr;
-
-  StringRef Result(TokStart, CurPtr - TokStart);
+  // Either octal or hexadecimal.
   long long Value;
-  if (Result.getAsInteger(8, Value))
-    return ReturnError(TokStart, "invalid octal number");
+  unsigned Radix = doLookAhead(CurPtr, 8);
+  bool isHex = Radix == 16;
+  StringRef Result(TokStart, CurPtr - TokStart);
+  if (Result.getAsInteger(Radix, Value))
+    return ReturnError(TokStart, !isHex ? "invalid octal number" :
+                       "invalid hexdecimal number");
+
+  // Consume the [hH].
+  if (Radix == 16)
+    ++CurPtr;
 
   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
   // suffixes on integer literals.
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 6f2e85e55335..804734cea939 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -46,31 +47,34 @@ static cl::opt<bool>
 FatalAssemblerWarnings("fatal-assembler-warnings",
                        cl::desc("Consider warnings as error"));
 
-MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {} 
+MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
 
 namespace {
 
-/// \brief Helper class for tracking macro definitions.
-typedef std::vector<AsmToken> MacroArgument;
-typedef std::vector<MacroArgument> MacroArguments;
-typedef std::pair<StringRef, MacroArgument> MacroParameter;
-typedef std::vector<MacroParameter> MacroParameters;
+/// \brief Helper types for tracking macro definitions.
+typedef std::vector<AsmToken> MCAsmMacroArgument;
+typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
+typedef std::pair<StringRef, MCAsmMacroArgument> MCAsmMacroParameter;
+typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
 
-struct Macro {
+struct MCAsmMacro {
   StringRef Name;
   StringRef Body;
-  MacroParameters Parameters;
+  MCAsmMacroParameters Parameters;
 
 public:
-  Macro(StringRef N, StringRef B, const MacroParameters &P) :
+  MCAsmMacro(StringRef N, StringRef B, const MCAsmMacroParameters &P) :
     Name(N), Body(B), Parameters(P) {}
+
+  MCAsmMacro(const MCAsmMacro& Other)
+    : Name(Other.Name), Body(Other.Body), Parameters(Other.Parameters) {}
 };
 
 /// \brief Helper class for storing information about an active macro
 /// instantiation.
 struct MacroInstantiation {
   /// The macro being instantiated.
-  const Macro *TheMacro;
+  const MCAsmMacro *TheMacro;
 
   /// The macro instantiation with substitutions.
   MemoryBuffer *Instantiation;
@@ -78,15 +82,17 @@ struct MacroInstantiation {
   /// The location of the instantiation.
   SMLoc InstantiationLoc;
 
+  /// The buffer where parsing should resume upon instantiation completion.
+  int ExitBuffer;
+
   /// The location where parsing should resume upon instantiation completion.
   SMLoc ExitLoc;
 
 public:
-  MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+  MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, SMLoc EL,
                      MemoryBuffer *I);
 };
 
-//struct AsmRewrite;
 struct ParseStatementInfo {
   /// ParsedOperands - The parsed operands from the last parsed statement.
   SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
@@ -94,11 +100,14 @@ struct ParseStatementInfo {
   /// Opcode - The opcode from the last parsed instruction.
   unsigned Opcode;
 
+  /// Error - Was there an error parsing the inline assembly?
+  bool ParseError;
+
   SmallVectorImpl<AsmRewrite> *AsmRewrites;
 
-  ParseStatementInfo() : Opcode(~0U), AsmRewrites(0) {}
+  ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(0) {}
   ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
-    : Opcode(~0), AsmRewrites(rewrites) {}
+    : Opcode(~0), ParseError(false), AsmRewrites(rewrites) {}
 
   ~ParseStatementInfo() {
     // Free any parsed operands.
@@ -110,8 +119,6 @@ struct ParseStatementInfo {
 
 /// \brief The concrete assembly parser instance.
 class AsmParser : public MCAsmParser {
-  friend class GenericAsmParser;
-
   AsmParser(const AsmParser &) LLVM_DELETED_FUNCTION;
   void operator=(const AsmParser &) LLVM_DELETED_FUNCTION;
 private:
@@ -122,7 +129,6 @@ private:
   SourceMgr &SrcMgr;
   SourceMgr::DiagHandlerTy SavedDiagHandler;
   void *SavedDiagContext;
-  MCAsmParserExtension *GenericParser;
   MCAsmParserExtension *PlatformParser;
 
   /// This is the current buffer index we're lexing from as managed by the
@@ -132,20 +138,19 @@ private:
   AsmCond TheCondState;
   std::vector<AsmCond> TheCondStack;
 
-  /// DirectiveMap - This is a table handlers for directives.  Each handler is
-  /// invoked after the directive identifier is read and is responsible for
-  /// parsing and validating the rest of the directive.  The handler is passed
-  /// in the directive name and the location of the directive keyword.
-  StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
+  /// ExtensionDirectiveMap - maps directive names to handler methods in parser
+  /// extensions. Extensions register themselves in this map by calling
+  /// addDirectiveHandler.
+  StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
 
   /// MacroMap - Map of currently defined macros.
-  StringMap<Macro*> MacroMap;
+  StringMap<MCAsmMacro*> MacroMap;
 
   /// ActiveMacros - Stack of active macro instantiations.
   std::vector<MacroInstantiation*> ActiveMacros;
 
   /// Boolean tracking whether macro substitution is enabled.
-  unsigned MacrosEnabled : 1;
+  unsigned MacrosEnabledFlag : 1;
 
   /// Flag tracking whether any errors have been encountered.
   unsigned HadError : 1;
@@ -172,10 +177,9 @@ public:
 
   virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
 
-  virtual void AddDirectiveHandler(MCAsmParserExtension *Object,
-                                   StringRef Directive,
-                                   DirectiveHandler Handler) {
-    DirectiveMap[Directive] = std::make_pair(Object, Handler);
+  virtual void addDirectiveHandler(StringRef Directive,
+                                   ExtensionDirectiveHandler Handler) {
+    ExtensionDirectiveMap[Directive] = Handler;
   }
 
 public:
@@ -186,9 +190,9 @@ public:
   virtual MCAsmLexer &getLexer() { return Lexer; }
   virtual MCContext &getContext() { return Ctx; }
   virtual MCStreamer &getStreamer() { return Out; }
-  virtual unsigned getAssemblerDialect() { 
+  virtual unsigned getAssemblerDialect() {
     if (AssemblerDialect == ~0U)
-      return MAI.getAssemblerDialect(); 
+      return MAI.getAssemblerDialect();
     else
       return AssemblerDialect;
   }
@@ -206,7 +210,7 @@ public:
   void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
   bool isParsingInlineAsm() { return ParsingInlineAsm; }
 
-  bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+  bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
                         unsigned &NumOutputs, unsigned &NumInputs,
                         SmallVectorImpl<std::pair<void *,bool> > &OpDecls,
                         SmallVectorImpl<std::string> &Constraints,
@@ -215,27 +219,70 @@ public:
                         const MCInstPrinter *IP,
                         MCAsmParserSemaCallback &SI);
 
-  bool ParseExpression(const MCExpr *&Res);
-  virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
-  virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
-  virtual bool ParseAbsoluteExpression(int64_t &Res);
+  bool parseExpression(const MCExpr *&Res);
+  virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool parseAbsoluteExpression(int64_t &Res);
 
+  /// parseIdentifier - Parse an identifier or string (as a quoted identifier)
+  /// and set \p Res to the identifier contents.
+  virtual bool parseIdentifier(StringRef &Res);
+  virtual void eatToEndOfStatement();
+
+  virtual void checkForValidSection();
   /// }
 
 private:
-  void CheckForValidSection();
 
   bool ParseStatement(ParseStatementInfo &Info);
   void EatToEndOfLine();
   bool ParseCppHashLineFilenameComment(const SMLoc &L);
 
-  bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
+  void CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
+                        MCAsmMacroParameters Parameters);
   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
-                   const MacroParameters &Parameters,
-                   const MacroArguments &A,
+                   const MCAsmMacroParameters &Parameters,
+                   const MCAsmMacroArguments &A,
                    const SMLoc &L);
+
+  /// \brief Are macros enabled in the parser?
+  bool MacrosEnabled() {return MacrosEnabledFlag;}
+
+  /// \brief Control a flag in the parser that enables or disables macros.
+  void SetMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
+
+  /// \brief Lookup a previously defined macro.
+  /// \param Name Macro name.
+  /// \returns Pointer to macro. NULL if no such macro was defined.
+  const MCAsmMacro* LookupMacro(StringRef Name);
+
+  /// \brief Define a new macro with the given name and information.
+  void DefineMacro(StringRef Name, const MCAsmMacro& Macro);
+
+  /// \brief Undefine a macro. If no such macro was defined, it's a no-op.
+  void UndefineMacro(StringRef Name);
+
+  /// \brief Are we inside a macro instantiation?
+  bool InsideMacroInstantiation() {return !ActiveMacros.empty();}
+
+  /// \brief Handle entry to macro instantiation. 
+  ///
+  /// \param M The macro.
+  /// \param NameLoc Instantiation location.
+  bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc);
+
+  /// \brief Handle exit from macro instantiation.
   void HandleMacroExit();
 
+  /// \brief Extract AsmTokens for a macro argument. If the argument delimiter
+  /// is initially unknown, set it to AsmToken::Eof. It will be set to the
+  /// correct delimiter by the method.
+  bool ParseMacroArgument(MCAsmMacroArgument &MA,
+                          AsmToken::TokenKind &ArgumentDelimiter);
+
+  /// \brief Parse all macro arguments for a given macro.
+  bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
+
   void PrintMacroInstantiations();
   void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
@@ -252,18 +299,15 @@ private:
   /// \brief Reset the current lexer position to that given by \p Loc. The
   /// current token is not set; clients should ensure Lex() is called
   /// subsequently.
-  void JumpToLoc(SMLoc Loc);
-
-  virtual void EatToEndOfStatement();
-
-  bool ParseMacroArgument(MacroArgument &MA,
-                          AsmToken::TokenKind &ArgumentDelimiter);
-  bool ParseMacroArguments(const Macro *M, MacroArguments &A);
+  ///
+  /// \param InBuffer If not -1, should be the known buffer id that contains the
+  /// location.
+  void JumpToLoc(SMLoc Loc, int InBuffer=-1);
 
   /// \brief Parse up to the end of statement and a return the contents from the
   /// current token until the end of the statement; the current token on exit
   /// will be either the EndOfStatement or EOF.
-  virtual StringRef ParseStringToEndOfStatement();
+  virtual StringRef parseStringToEndOfStatement();
 
   /// \brief Parse until the end of a statement or a comma is encountered,
   /// return the contents from the current token up to the end or comma.
@@ -277,24 +321,95 @@ private:
   bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
   bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
 
-  /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
-  /// and set \p Res to the identifier contents.
-  virtual bool ParseIdentifier(StringRef &Res);
-
-  // Directive Parsing.
+  bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
 
- // ".ascii", ".asciiz", ".string"
+  // Generic (target and platform independent) directive parsing.
+  enum DirectiveKind {
+    DK_NO_DIRECTIVE, // Placeholder
+    DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT,
+    DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_SINGLE,
+    DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
+    DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
+    DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK,
+    DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_INDIRECT_SYMBOL,
+    DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN,
+    DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
+    DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
+    DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
+    DK_IF, DK_IFB, DK_IFNB, DK_IFC, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
+    DK_ELSEIF, DK_ELSE, DK_ENDIF,
+    DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
+    DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
+    DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
+    DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA,
+    DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE,
+    DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED,
+    DK_CFI_REGISTER,
+    DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM,
+    DK_SLEB128, DK_ULEB128
+  };
+
+  /// DirectiveKindMap - Maps directive name --> DirectiveKind enum, for
+  /// directives parsed by this class.
+  StringMap<DirectiveKind> DirectiveKindMap;
+
+  // ".ascii", ".asciz", ".string"
   bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
   bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
   bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
   bool ParseDirectiveFill(); // ".fill"
-  bool ParseDirectiveSpace(); // ".space"
   bool ParseDirectiveZero(); // ".zero"
-  bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); // ".set", ".equ", ".equiv"
+  // ".set", ".equ", ".equiv"
+  bool ParseDirectiveSet(StringRef IDVal, bool allow_redef);
   bool ParseDirectiveOrg(); // ".org"
   // ".align{,32}", ".p2align{,w,l}"
   bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
 
+  // ".file", ".line", ".loc", ".stabs"
+  bool ParseDirectiveFile(SMLoc DirectiveLoc);
+  bool ParseDirectiveLine();
+  bool ParseDirectiveLoc();
+  bool ParseDirectiveStabs();
+
+  // .cfi directives
+  bool ParseDirectiveCFIRegister(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFISections();
+  bool ParseDirectiveCFIStartProc();
+  bool ParseDirectiveCFIEndProc();
+  bool ParseDirectiveCFIDefCfaOffset();
+  bool ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIAdjustCfaOffset();
+  bool ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIOffset(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
+  bool ParseDirectiveCFIRememberState();
+  bool ParseDirectiveCFIRestoreState();
+  bool ParseDirectiveCFISameValue(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRestore(SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIEscape();
+  bool ParseDirectiveCFISignalFrame();
+  bool ParseDirectiveCFIUndefined(SMLoc DirectiveLoc);
+
+  // macro directives
+  bool ParseDirectivePurgeMacro(SMLoc DirectiveLoc);
+  bool ParseDirectiveEndMacro(StringRef Directive);
+  bool ParseDirectiveMacro(SMLoc DirectiveLoc);
+  bool ParseDirectiveMacrosOnOff(StringRef Directive);
+
+  // ".bundle_align_mode"
+  bool ParseDirectiveBundleAlignMode();
+  // ".bundle_lock"
+  bool ParseDirectiveBundleLock();
+  // ".bundle_unlock"
+  bool ParseDirectiveBundleUnlock();
+
+  // ".space", ".skip"
+  bool ParseDirectiveSpace(StringRef IDVal);
+
+  // .sleb128 (Signed=true) and .uleb128 (Signed=false)
+  bool ParseDirectiveLEB128(bool Signed);
+
   /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
   /// accepts a single symbol (which should be a label or an external).
   bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
@@ -315,133 +430,29 @@ private:
   bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
   bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
   bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
-
-  /// ParseEscapedString - Parse the current token as a string which may include
-  /// escaped characters and return the string contents.
-  bool ParseEscapedString(std::string &Data);
+  virtual bool parseEscapedString(std::string &Data);
 
   const MCExpr *ApplyModifierToExpr(const MCExpr *E,
                                     MCSymbolRefExpr::VariantKind Variant);
 
   // Macro-like directives
-  Macro *ParseMacroLikeBody(SMLoc DirectiveLoc);
-  void InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc,
+  MCAsmMacro *ParseMacroLikeBody(SMLoc DirectiveLoc);
+  void InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
                                 raw_svector_ostream &OS);
   bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept"
   bool ParseDirectiveIrp(SMLoc DirectiveLoc);  // ".irp"
   bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
   bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
 
-  // "_emit"
-  bool ParseDirectiveEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info);
-};
+  // "_emit" or "__emit"
+  bool ParseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
+                            size_t Len);
 
-/// \brief Generic implementations of directive handling, etc. which is shared
-/// (or the default, at least) for all assembler parser.
-class GenericAsmParser : public MCAsmParserExtension {
-  template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)>
-  void AddDirectiveHandler(StringRef Directive) {
-    getParser().AddDirectiveHandler(this, Directive,
-                                    HandleDirective<GenericAsmParser, Handler>);
-  }
-public:
-  GenericAsmParser() {}
-
-  AsmParser &getParser() {
-    return (AsmParser&) this->MCAsmParserExtension::getParser();
-  }
-
-  virtual void Initialize(MCAsmParser &Parser) {
-    // Call the base implementation.
-    this->MCAsmParserExtension::Initialize(Parser);
-
-    // Debugging directives.
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs");
-
-    // CFI directives.
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFISections>(
-                                                               ".cfi_sections");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>(
-                                                              ".cfi_startproc");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>(
-                                                                ".cfi_endproc");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>(
-                                                         ".cfi_def_cfa");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
-                                                         ".cfi_def_cfa_offset");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>(
-                                                      ".cfi_adjust_cfa_offset");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
-                                                       ".cfi_def_cfa_register");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
-                                                                 ".cfi_offset");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>(
-                                                             ".cfi_rel_offset");
-    AddDirectiveHandler<
-     &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
-    AddDirectiveHandler<
-            &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda");
-    AddDirectiveHandler<
-      &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
-    AddDirectiveHandler<
-      &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
-    AddDirectiveHandler<
-      &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value");
-    AddDirectiveHandler<
-      &GenericAsmParser::ParseDirectiveCFIRestore>(".cfi_restore");
-    AddDirectiveHandler<
-      &GenericAsmParser::ParseDirectiveCFIEscape>(".cfi_escape");
-    AddDirectiveHandler<
-      &GenericAsmParser::ParseDirectiveCFISignalFrame>(".cfi_signal_frame");
-
-    // Macro directives.
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
-      ".macros_on");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
-      ".macros_off");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectivePurgeMacro>(".purgem");
-
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128");
-    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128");
-  }
-
-  bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+  // "align"
+  bool ParseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
 
-  bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFISections(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIRestore(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFIEscape(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveCFISignalFrame(StringRef, SMLoc DirectiveLoc);
-
-  bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
-  bool ParseDirectivePurgeMacro(StringRef, SMLoc DirectiveLoc);
-
-  bool ParseDirectiveLEB128(StringRef, SMLoc);
+  void initializeDirectiveKindMap();
 };
-
 }
 
 namespace llvm {
@@ -457,8 +468,8 @@ enum { DEFAULT_ADDRSPACE = 0 };
 AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
-    GenericParser(new GenericAsmParser), PlatformParser(0),
-    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
+    PlatformParser(0),
+    CurBuffer(0), MacrosEnabledFlag(true), CppHashLineNumber(0),
     AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
@@ -467,9 +478,6 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
   SrcMgr.setDiagHandler(DiagHandler, this);
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
 
-  // Initialize the generic parser.
-  GenericParser->Initialize(*this);
-
   // Initialize the platform / file format parser.
   //
   // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
@@ -485,18 +493,19 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
     PlatformParser = createELFAsmParser();
     PlatformParser->Initialize(*this);
   }
+
+  initializeDirectiveKindMap();
 }
 
 AsmParser::~AsmParser() {
   assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
 
   // Destroy any macros.
-  for (StringMap<Macro*>::iterator it = MacroMap.begin(),
+  for (StringMap<MCAsmMacro*>::iterator it = MacroMap.begin(),
          ie = MacroMap.end(); it != ie; ++it)
     delete it->getValue();
 
   delete PlatformParser;
-  delete GenericParser;
 }
 
 void AsmParser::PrintMacroInstantiations() {
@@ -550,8 +559,12 @@ bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
   return false;
 }
 
-void AsmParser::JumpToLoc(SMLoc Loc) {
-  CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+void AsmParser::JumpToLoc(SMLoc Loc, int InBuffer) {
+  if (InBuffer != -1) {
+    CurBuffer = InBuffer;
+  } else {
+    CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+  }
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
 }
 
@@ -593,7 +606,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
     getStreamer().EmitLabel(SectionStartSym);
     getContext().setGenDwarfSectionStartSym(SectionStartSym);
     getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
-      StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier());
+                                         StringRef(),
+                                         getContext().getMainFileName());
   }
 
   // While we have input, parse each statement.
@@ -604,7 +618,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
     // We had an error, validate that one was emitted and recover by skipping to
     // the next line.
     assert(HadError && "Parse statement returned an error, but none emitted!");
-    EatToEndOfStatement();
+    eatToEndOfStatement();
   }
 
   if (TheCondState.TheCond != StartingCondState.TheCond ||
@@ -612,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
     return TokError("unmatched .ifs or .elses");
 
   // Check to see there are no empty DwarfFile slots.
-  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+  const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles =
     getContext().getMCDwarfFiles();
   for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
     if (!MCDwarfFiles[i])
@@ -651,18 +665,15 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   return HadError;
 }
 
-void AsmParser::CheckForValidSection() {
+void AsmParser::checkForValidSection() {
   if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
     TokError("expected section directive before assembly directive");
-    Out.SwitchSection(Ctx.getMachOSection(
-                        "__TEXT", "__text",
-                        MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                        0, SectionKind::getText()));
+    Out.InitToTextSection();
   }
 }
 
-/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
-void AsmParser::EatToEndOfStatement() {
+/// eatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::eatToEndOfStatement() {
   while (Lexer.isNot(AsmToken::EndOfStatement) &&
          Lexer.isNot(AsmToken::Eof))
     Lex();
@@ -672,7 +683,7 @@ void AsmParser::EatToEndOfStatement() {
     Lex();
 }
 
-StringRef AsmParser::ParseStringToEndOfStatement() {
+StringRef AsmParser::parseStringToEndOfStatement() {
   const char *Start = getTok().getLoc().getPointer();
 
   while (Lexer.isNot(AsmToken::EndOfStatement) &&
@@ -701,10 +712,10 @@ StringRef AsmParser::ParseStringToComma() {
 /// parenexpr ::= expr)
 ///
 bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
-  if (ParseExpression(Res)) return true;
+  if (parseExpression(Res)) return true;
   if (Lexer.isNot(AsmToken::RParen))
     return TokError("expected ')' in parentheses expression");
-  EndLoc = Lexer.getLoc();
+  EndLoc = Lexer.getTok().getEndLoc();
   Lex();
   return false;
 }
@@ -715,10 +726,10 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 /// bracketexpr ::= expr]
 ///
 bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
-  if (ParseExpression(Res)) return true;
+  if (parseExpression(Res)) return true;
   if (Lexer.isNot(AsmToken::RBrac))
     return TokError("expected ']' in brackets expression");
-  EndLoc = Lexer.getLoc();
+  EndLoc = Lexer.getTok().getEndLoc();
   Lex();
   return false;
 }
@@ -730,7 +741,9 @@ bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 ///  primaryexpr ::= '.'
 ///  primaryexpr ::= ~,+,- primaryexpr
 bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
-  switch (Lexer.getKind()) {
+  SMLoc FirstTokenLoc = getLexer().getLoc();
+  AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
+  switch (FirstTokenKind) {
   default:
     return TokError("unknown token in expression");
   // If we have an error assume that we've already handled it.
@@ -745,11 +758,14 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   case AsmToken::Dollar:
   case AsmToken::String:
   case AsmToken::Identifier: {
-    EndLoc = Lexer.getLoc();
-
     StringRef Identifier;
-    if (ParseIdentifier(Identifier))
+    if (parseIdentifier(Identifier)) {
+      if (FirstTokenKind == AsmToken::Dollar)
+        return Error(FirstTokenLoc, "invalid token in expression");
       return true;
+    }
+
+    EndLoc = SMLoc::getFromPointer(Identifier.end());
 
     // This is a symbol reference.
     std::pair<StringRef, StringRef> Split = Identifier.split('@');
@@ -783,7 +799,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     SMLoc Loc = getTok().getLoc();
     int64_t IntVal = getTok().getIntVal();
     Res = MCConstantExpr::Create(IntVal, getContext());
-    EndLoc = Lexer.getLoc();
+    EndLoc = Lexer.getTok().getEndLoc();
     Lex(); // Eat token.
     // Look for 'b' or 'f' following an Integer as a directional label
     if (Lexer.getKind() == AsmToken::Identifier) {
@@ -795,7 +811,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
                                       getContext());
         if (IDVal == "b" && Sym->isUndefined())
           return Error(Loc, "invalid reference to undefined symbol");
-        EndLoc = Lexer.getLoc();
+        EndLoc = Lexer.getTok().getEndLoc();
         Lex(); // Eat identifier.
       }
     }
@@ -805,6 +821,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
     Res = MCConstantExpr::Create(IntVal, getContext());
+    EndLoc = Lexer.getTok().getEndLoc();
     Lex(); // Eat token.
     return false;
   }
@@ -814,7 +831,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     MCSymbol *Sym = Ctx.CreateTempSymbol();
     Out.EmitLabel(Sym);
     Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
-    EndLoc = Lexer.getLoc();
+    EndLoc = Lexer.getTok().getEndLoc();
     Lex(); // Eat identifier.
     return false;
   }
@@ -847,9 +864,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   }
 }
 
-bool AsmParser::ParseExpression(const MCExpr *&Res) {
+bool AsmParser::parseExpression(const MCExpr *&Res) {
   SMLoc EndLoc;
-  return ParseExpression(Res, EndLoc);
+  return parseExpression(Res, EndLoc);
 }
 
 const MCExpr *
@@ -900,7 +917,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
   llvm_unreachable("Invalid expression kind!");
 }
 
-/// ParseExpression - Parse an expression and return it.
+/// parseExpression - Parse an expression and return it.
 ///
 ///  expr ::= expr &&,|| expr               -> lowest.
 ///  expr ::= expr |,^,&,! expr
@@ -910,7 +927,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
 ///  expr ::= expr *,/,% expr               -> highest.
 ///  expr ::= primaryexpr
 ///
-bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
   // Parse the expression.
   Res = 0;
   if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
@@ -948,17 +965,17 @@ bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
   return false;
 }
 
-bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
   Res = 0;
   return ParseParenExpr(Res, EndLoc) ||
          ParseBinOpRHS(1, Res, EndLoc);
 }
 
-bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
+bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
   const MCExpr *Expr;
 
   SMLoc StartLoc = Lexer.getLoc();
-  if (ParseExpression(Expr))
+  if (parseExpression(Expr))
     return true;
 
   if (!Expr->EvaluateAsAbsolute(Res))
@@ -1105,8 +1122,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
       if (!TheCondState.Ignore)
         return TokError("unexpected token at start of statement");
       IDVal = "";
-    }
-    else {
+    } else {
       IDVal = getTok().getString();
       Lex(); // Consume the integer token to be used as an identifier token.
       if (Lexer.getKind() != AsmToken::Colon) {
@@ -1114,46 +1130,54 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
           return TokError("unexpected token at start of statement");
       }
     }
-
   } else if (Lexer.is(AsmToken::Dot)) {
     // Treat '.' as a valid identifier in this context.
     Lex();
     IDVal = ".";
-
-  } else if (ParseIdentifier(IDVal)) {
+  } else if (parseIdentifier(IDVal)) {
     if (!TheCondState.Ignore)
       return TokError("unexpected token at start of statement");
     IDVal = "";
   }
 
-
   // Handle conditional assembly here before checking for skipping.  We
   // have to do this so that .endif isn't skipped in a ".if 0" block for
   // example.
-  if (IDVal == ".if")
-    return ParseDirectiveIf(IDLoc);
-  if (IDVal == ".ifb")
-    return ParseDirectiveIfb(IDLoc, true);
-  if (IDVal == ".ifnb")
-    return ParseDirectiveIfb(IDLoc, false);
-  if (IDVal == ".ifc")
-    return ParseDirectiveIfc(IDLoc, true);
-  if (IDVal == ".ifnc")
-    return ParseDirectiveIfc(IDLoc, false);
-  if (IDVal == ".ifdef")
-    return ParseDirectiveIfdef(IDLoc, true);
-  if (IDVal == ".ifndef" || IDVal == ".ifnotdef")
-    return ParseDirectiveIfdef(IDLoc, false);
-  if (IDVal == ".elseif")
-    return ParseDirectiveElseIf(IDLoc);
-  if (IDVal == ".else")
-    return ParseDirectiveElse(IDLoc);
-  if (IDVal == ".endif")
-    return ParseDirectiveEndIf(IDLoc);
-
-  // If we are in a ".if 0" block, ignore this statement.
+  StringMap<DirectiveKind>::const_iterator DirKindIt =
+    DirectiveKindMap.find(IDVal);
+  DirectiveKind DirKind =
+    (DirKindIt == DirectiveKindMap.end()) ? DK_NO_DIRECTIVE :
+                                            DirKindIt->getValue();
+  switch (DirKind) {
+    default:
+      break;
+    case DK_IF:
+      return ParseDirectiveIf(IDLoc);
+    case DK_IFB:
+      return ParseDirectiveIfb(IDLoc, true);
+    case DK_IFNB:
+      return ParseDirectiveIfb(IDLoc, false);
+    case DK_IFC:
+      return ParseDirectiveIfc(IDLoc, true);
+    case DK_IFNC:
+      return ParseDirectiveIfc(IDLoc, false);
+    case DK_IFDEF:
+      return ParseDirectiveIfdef(IDLoc, true);
+    case DK_IFNDEF:
+    case DK_IFNOTDEF:
+      return ParseDirectiveIfdef(IDLoc, false);
+    case DK_ELSEIF:
+      return ParseDirectiveElseIf(IDLoc);
+    case DK_ELSE:
+      return ParseDirectiveElse(IDLoc);
+    case DK_ENDIF:
+      return ParseDirectiveEndIf(IDLoc);
+  }
+
+  // Ignore the statement if in the middle of inactive conditional
+  // (e.g. ".if 0").
   if (TheCondState.Ignore) {
-    EatToEndOfStatement();
+    eatToEndOfStatement();
     return false;
   }
 
@@ -1162,7 +1186,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
   // See what kind of statement we have.
   switch (Lexer.getKind()) {
   case AsmToken::Colon: {
-    CheckForValidSection();
+    checkForValidSection();
 
     // identifier ':'   -> Label.
     Lex();
@@ -1185,7 +1209,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
       return Error(IDLoc, "invalid symbol redefinition");
 
     // Emit the label.
-    Out.EmitLabel(Sym);
+    if (!ParsingInlineAsm)
+      Out.EmitLabel(Sym);
 
     // If we are generating dwarf for assembly source files then gather the
     // info to make a dwarf label entry for this label if needed.
@@ -1215,160 +1240,234 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
   }
 
   // If macros are enabled, check to see if this is a macro instantiation.
-  if (MacrosEnabled)
-    if (const Macro *M = MacroMap.lookup(IDVal))
-      return HandleMacroEntry(IDVal, IDLoc, M);
+  if (MacrosEnabled())
+    if (const MCAsmMacro *M = LookupMacro(IDVal)) {
+      return HandleMacroEntry(M, IDLoc);
+    }
 
   // Otherwise, we have a normal instruction or directive.
+  
+  // Directives start with "."
   if (IDVal[0] == '.' && IDVal != ".") {
-
-    // Target hook for parsing target specific directives.
+    // There are several entities interested in parsing directives:
+    // 
+    // 1. The target-specific assembly parser. Some directives are target
+    //    specific or may potentially behave differently on certain targets.
+    // 2. Asm parser extensions. For example, platform-specific parsers
+    //    (like the ELF parser) register themselves as extensions.
+    // 3. The generic directive parser implemented by this class. These are
+    //    all the directives that behave in a target and platform independent
+    //    manner, or at least have a default behavior that's shared between
+    //    all targets and platforms.
+
+    // First query the target-specific parser. It will return 'true' if it
+    // isn't interested in this directive.
     if (!getTargetParser().ParseDirective(ID))
       return false;
 
-    // Assembler features
-    if (IDVal == ".set" || IDVal == ".equ")
-      return ParseDirectiveSet(IDVal, true);
-    if (IDVal == ".equiv")
-      return ParseDirectiveSet(IDVal, false);
-
-    // Data directives
-
-    if (IDVal == ".ascii")
-      return ParseDirectiveAscii(IDVal, false);
-    if (IDVal == ".asciz" || IDVal == ".string")
-      return ParseDirectiveAscii(IDVal, true);
-
-    if (IDVal == ".byte")
-      return ParseDirectiveValue(1);
-    if (IDVal == ".short")
-      return ParseDirectiveValue(2);
-    if (IDVal == ".value")
-      return ParseDirectiveValue(2);
-    if (IDVal == ".2byte")
-      return ParseDirectiveValue(2);
-    if (IDVal == ".long")
-      return ParseDirectiveValue(4);
-    if (IDVal == ".int")
-      return ParseDirectiveValue(4);
-    if (IDVal == ".4byte")
-      return ParseDirectiveValue(4);
-    if (IDVal == ".quad")
-      return ParseDirectiveValue(8);
-    if (IDVal == ".8byte")
-      return ParseDirectiveValue(8);
-    if (IDVal == ".single" || IDVal == ".float")
-      return ParseDirectiveRealValue(APFloat::IEEEsingle);
-    if (IDVal == ".double")
-      return ParseDirectiveRealValue(APFloat::IEEEdouble);
-
-    if (IDVal == ".align") {
-      bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
-      return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
-    }
-    if (IDVal == ".align32") {
-      bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
-      return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
-    }
-    if (IDVal == ".balign")
-      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
-    if (IDVal == ".balignw")
-      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
-    if (IDVal == ".balignl")
-      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
-    if (IDVal == ".p2align")
-      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
-    if (IDVal == ".p2alignw")
-      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
-    if (IDVal == ".p2alignl")
-      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
-
-    if (IDVal == ".org")
-      return ParseDirectiveOrg();
-
-    if (IDVal == ".fill")
-      return ParseDirectiveFill();
-    if (IDVal == ".space" || IDVal == ".skip")
-      return ParseDirectiveSpace();
-    if (IDVal == ".zero")
-      return ParseDirectiveZero();
-
-    // Symbol attribute directives
-
-    if (IDVal == ".extern") {
-      EatToEndOfStatement(); // .extern is the default, ignore it.
-      return false;
-    }
-    if (IDVal == ".globl" || IDVal == ".global")
-      return ParseDirectiveSymbolAttribute(MCSA_Global);
-    if (IDVal == ".indirect_symbol")
-      return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
-    if (IDVal == ".lazy_reference")
-      return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
-    if (IDVal == ".no_dead_strip")
-      return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
-    if (IDVal == ".symbol_resolver")
-      return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
-    if (IDVal == ".private_extern")
-      return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
-    if (IDVal == ".reference")
-      return ParseDirectiveSymbolAttribute(MCSA_Reference);
-    if (IDVal == ".weak_definition")
-      return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
-    if (IDVal == ".weak_reference")
-      return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
-    if (IDVal == ".weak_def_can_be_hidden")
-      return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
-
-    if (IDVal == ".comm" || IDVal == ".common")
-      return ParseDirectiveComm(/*IsLocal=*/false);
-    if (IDVal == ".lcomm")
-      return ParseDirectiveComm(/*IsLocal=*/true);
-
-    if (IDVal == ".abort")
-      return ParseDirectiveAbort();
-    if (IDVal == ".include")
-      return ParseDirectiveInclude();
-    if (IDVal == ".incbin")
-      return ParseDirectiveIncbin();
-
-    if (IDVal == ".code16" || IDVal == ".code16gcc")
-      return TokError(Twine(IDVal) + " not supported yet");
-
-    // Macro-like directives
-    if (IDVal == ".rept")
-      return ParseDirectiveRept(IDLoc);
-    if (IDVal == ".irp")
-      return ParseDirectiveIrp(IDLoc);
-    if (IDVal == ".irpc")
-      return ParseDirectiveIrpc(IDLoc);
-    if (IDVal == ".endr")
-      return ParseDirectiveEndr(IDLoc);
-
-    // Look up the handler in the handler table.
+    // Next, check the extention directive map to see if any extension has
+    // registered itself to parse this directive.
     std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
-      DirectiveMap.lookup(IDVal);
+      ExtensionDirectiveMap.lookup(IDVal);
     if (Handler.first)
       return (*Handler.second)(Handler.first, IDVal, IDLoc);
 
+    // Finally, if no one else is interested in this directive, it must be
+    // generic and familiar to this class.
+    switch (DirKind) {
+      default:
+        break;
+      case DK_SET:
+      case DK_EQU:
+        return ParseDirectiveSet(IDVal, true);
+      case DK_EQUIV:
+        return ParseDirectiveSet(IDVal, false);
+      case DK_ASCII:
+        return ParseDirectiveAscii(IDVal, false);
+      case DK_ASCIZ:
+      case DK_STRING:
+        return ParseDirectiveAscii(IDVal, true);
+      case DK_BYTE:
+        return ParseDirectiveValue(1);
+      case DK_SHORT:
+      case DK_VALUE:
+      case DK_2BYTE:
+        return ParseDirectiveValue(2);
+      case DK_LONG:
+      case DK_INT:
+      case DK_4BYTE:
+        return ParseDirectiveValue(4);
+      case DK_QUAD:
+      case DK_8BYTE:
+        return ParseDirectiveValue(8);
+      case DK_SINGLE:
+      case DK_FLOAT:
+        return ParseDirectiveRealValue(APFloat::IEEEsingle);
+      case DK_DOUBLE:
+        return ParseDirectiveRealValue(APFloat::IEEEdouble);
+      case DK_ALIGN: {
+        bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+        return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+      }
+      case DK_ALIGN32: {
+        bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+        return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+      }
+      case DK_BALIGN:
+        return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
+      case DK_BALIGNW:
+        return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
+      case DK_BALIGNL:
+        return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
+      case DK_P2ALIGN:
+        return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
+      case DK_P2ALIGNW:
+        return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
+      case DK_P2ALIGNL:
+        return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+      case DK_ORG:
+        return ParseDirectiveOrg();
+      case DK_FILL:
+        return ParseDirectiveFill();
+      case DK_ZERO:
+        return ParseDirectiveZero();
+      case DK_EXTERN:
+        eatToEndOfStatement(); // .extern is the default, ignore it.
+        return false;
+      case DK_GLOBL:
+      case DK_GLOBAL:
+        return ParseDirectiveSymbolAttribute(MCSA_Global);
+      case DK_INDIRECT_SYMBOL:
+        return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
+      case DK_LAZY_REFERENCE:
+        return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
+      case DK_NO_DEAD_STRIP:
+        return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+      case DK_SYMBOL_RESOLVER:
+        return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
+      case DK_PRIVATE_EXTERN:
+        return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
+      case DK_REFERENCE:
+        return ParseDirectiveSymbolAttribute(MCSA_Reference);
+      case DK_WEAK_DEFINITION:
+        return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
+      case DK_WEAK_REFERENCE:
+        return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
+      case DK_WEAK_DEF_CAN_BE_HIDDEN:
+        return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+      case DK_COMM:
+      case DK_COMMON:
+        return ParseDirectiveComm(/*IsLocal=*/false);
+      case DK_LCOMM:
+        return ParseDirectiveComm(/*IsLocal=*/true);
+      case DK_ABORT:
+        return ParseDirectiveAbort();
+      case DK_INCLUDE:
+        return ParseDirectiveInclude();
+      case DK_INCBIN:
+        return ParseDirectiveIncbin();
+      case DK_CODE16:
+      case DK_CODE16GCC:
+        return TokError(Twine(IDVal) + " not supported yet");
+      case DK_REPT:
+        return ParseDirectiveRept(IDLoc);
+      case DK_IRP:
+        return ParseDirectiveIrp(IDLoc);
+      case DK_IRPC:
+        return ParseDirectiveIrpc(IDLoc);
+      case DK_ENDR:
+        return ParseDirectiveEndr(IDLoc);
+      case DK_BUNDLE_ALIGN_MODE:
+        return ParseDirectiveBundleAlignMode();
+      case DK_BUNDLE_LOCK:
+        return ParseDirectiveBundleLock();
+      case DK_BUNDLE_UNLOCK:
+        return ParseDirectiveBundleUnlock();
+      case DK_SLEB128:
+        return ParseDirectiveLEB128(true);
+      case DK_ULEB128:
+        return ParseDirectiveLEB128(false);
+      case DK_SPACE:
+      case DK_SKIP:
+        return ParseDirectiveSpace(IDVal);
+      case DK_FILE:
+        return ParseDirectiveFile(IDLoc);
+      case DK_LINE:
+        return ParseDirectiveLine();
+      case DK_LOC:
+        return ParseDirectiveLoc();
+      case DK_STABS:
+        return ParseDirectiveStabs();
+      case DK_CFI_SECTIONS:
+        return ParseDirectiveCFISections();
+      case DK_CFI_STARTPROC:
+        return ParseDirectiveCFIStartProc();
+      case DK_CFI_ENDPROC:
+        return ParseDirectiveCFIEndProc();
+      case DK_CFI_DEF_CFA:
+        return ParseDirectiveCFIDefCfa(IDLoc);
+      case DK_CFI_DEF_CFA_OFFSET:
+        return ParseDirectiveCFIDefCfaOffset();
+      case DK_CFI_ADJUST_CFA_OFFSET:
+        return ParseDirectiveCFIAdjustCfaOffset();
+      case DK_CFI_DEF_CFA_REGISTER:
+        return ParseDirectiveCFIDefCfaRegister(IDLoc);
+      case DK_CFI_OFFSET:
+        return ParseDirectiveCFIOffset(IDLoc);
+      case DK_CFI_REL_OFFSET:
+        return ParseDirectiveCFIRelOffset(IDLoc);
+      case DK_CFI_PERSONALITY:
+        return ParseDirectiveCFIPersonalityOrLsda(true);
+      case DK_CFI_LSDA:
+        return ParseDirectiveCFIPersonalityOrLsda(false);
+      case DK_CFI_REMEMBER_STATE:
+        return ParseDirectiveCFIRememberState();
+      case DK_CFI_RESTORE_STATE:
+        return ParseDirectiveCFIRestoreState();
+      case DK_CFI_SAME_VALUE:
+        return ParseDirectiveCFISameValue(IDLoc);
+      case DK_CFI_RESTORE:
+        return ParseDirectiveCFIRestore(IDLoc);
+      case DK_CFI_ESCAPE:
+        return ParseDirectiveCFIEscape();
+      case DK_CFI_SIGNAL_FRAME:
+        return ParseDirectiveCFISignalFrame();
+      case DK_CFI_UNDEFINED:
+        return ParseDirectiveCFIUndefined(IDLoc);
+      case DK_CFI_REGISTER:
+        return ParseDirectiveCFIRegister(IDLoc);
+      case DK_MACROS_ON:
+      case DK_MACROS_OFF:
+        return ParseDirectiveMacrosOnOff(IDVal);
+      case DK_MACRO:
+        return ParseDirectiveMacro(IDLoc);
+      case DK_ENDM:
+      case DK_ENDMACRO:
+        return ParseDirectiveEndMacro(IDVal);
+      case DK_PURGEM:
+        return ParseDirectivePurgeMacro(IDLoc);
+    }
 
     return Error(IDLoc, "unknown directive");
   }
 
-  // _emit
-  if (ParsingInlineAsm && IDVal == "_emit")
-    return ParseDirectiveEmit(IDLoc, Info);
+  // __asm _emit or __asm __emit
+  if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
+                           IDVal == "_EMIT" || IDVal == "__EMIT"))
+    return ParseDirectiveMSEmit(IDLoc, Info, IDVal.size());
 
-  CheckForValidSection();
+  // __asm align
+  if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
+    return ParseDirectiveMSAlign(IDLoc, Info);
 
-  // Canonicalize the opcode to lower case.
-  SmallString<128> OpcodeStr;
-  for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
-    OpcodeStr.push_back(tolower(IDVal[i]));
+  checkForValidSection();
 
+  // Canonicalize the opcode to lower case.
+  std::string OpcodeStr = IDVal.lower();
   ParseInstructionInfo IInfo(Info.AsmRewrites);
-  bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr.str(),
-                                                     IDLoc,Info.ParsedOperands);
+  bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr,
+                                                     IDLoc, Info.ParsedOperands);
+  Info.ParseError = HadError;
 
   // Dump the parsed representation, if requested.
   if (getShowParsedOperands()) {
@@ -1389,24 +1488,24 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
   // section is the initial text section then generate a .loc directive for
   // the instruction.
   if (!HadError && getContext().getGenDwarfForAssembly() &&
-      getContext().getGenDwarfSection() == getStreamer().getCurrentSection() ) {
+      getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) {
 
-     unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
+    unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
 
-     // If we previously parsed a cpp hash file line comment then make sure the
-     // current Dwarf File is for the CppHashFilename if not then emit the
-     // Dwarf File table for it and adjust the line number for the .loc.
-     const std::vector<MCDwarfFile *> &MCDwarfFiles =
-       getContext().getMCDwarfFiles();
-     if (CppHashFilename.size() != 0) {
-       if(MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
+    // If we previously parsed a cpp hash file line comment then make sure the
+    // current Dwarf File is for the CppHashFilename if not then emit the
+    // Dwarf File table for it and adjust the line number for the .loc.
+    const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = 
+      getContext().getMCDwarfFiles();
+    if (CppHashFilename.size() != 0) {
+      if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() !=
           CppHashFilename)
-	 getStreamer().EmitDwarfFileDirective(
-	   getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename);
+        getStreamer().EmitDwarfFileDirective(
+          getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename);
 
        unsigned CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc,CppHashBuf);
        Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo);
-     }
+    }
 
     getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(),
                                         Line, 0, DWARF2_LINE_DEFAULT_IS_STMT ?
@@ -1492,7 +1591,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
      DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
   }
 
-  // If we have not parsed a cpp hash line filename comment or the source 
+  // If we have not parsed a cpp hash line filename comment or the source
   // manager changed or buffer changed (like in a nested include) then just
   // print the normal diagnostic using its Filename and LineNo.
   if (!Parser->CppHashLineNumber ||
@@ -1505,7 +1604,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
     return;
   }
 
-  // Use the CppHashFilename and calculate a line number based on the 
+  // Use the CppHashFilename and calculate a line number based on the
   // CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for
   // the diagnostic.
   const std::string Filename = Parser->CppHashFilename;
@@ -1532,12 +1631,13 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
 // we can't do that. AsmLexer.cpp should probably be changed to handle
 // '@' as a special case when needed.
 static bool isIdentifierChar(char c) {
-  return isalnum(c) || c == '_' || c == '$' || c == '.';
+  return isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '$' ||
+         c == '.';
 }
 
 bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
-                            const MacroParameters &Parameters,
-                            const MacroArguments &A,
+                            const MCAsmMacroParameters &Parameters,
+                            const MCAsmMacroArguments &A,
                             const SMLoc &L) {
   unsigned NParameters = Parameters.size();
   if (NParameters != 0 && NParameters != A.size())
@@ -1556,7 +1656,8 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
           continue;
 
         char Next = Body[Pos + 1];
-        if (Next == '$' || Next == 'n' || isdigit(Next))
+        if (Next == '$' || Next == 'n' ||
+            isdigit(static_cast<unsigned char>(Next)))
           break;
       } else {
         // This macro has parameters, look for \foo, \bar, etc.
@@ -1592,7 +1693,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
           break;
 
         // Otherwise substitute with the token values, with spaces eliminated.
-        for (MacroArgument::const_iterator it = A[Index].begin(),
+        for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
                ie = A[Index].end(); it != ie; ++it)
           OS << it->getString();
         break;
@@ -1619,7 +1720,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
             Pos = I;
           }
       } else {
-        for (MacroArgument::const_iterator it = A[Index].begin(),
+        for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
                ie = A[Index].end(); it != ie; ++it)
           if (it->getKind() == AsmToken::String)
             OS << it->getStringContents();
@@ -1636,9 +1737,11 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
   return false;
 }
 
-MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL,
+                                       int EB, SMLoc EL,
                                        MemoryBuffer *I)
-  : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitLoc(EL)
+  : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
+    ExitLoc(EL)
 {
 }
 
@@ -1675,10 +1778,7 @@ static bool IsOperator(AsmToken::TokenKind kind)
   }
 }
 
-/// ParseMacroArgument - Extract AsmTokens for a macro argument.
-/// This is used for both default macro parameter values and the
-/// arguments in macro invocations
-bool AsmParser::ParseMacroArgument(MacroArgument &MA,
+bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA,
                                    AsmToken::TokenKind &ArgumentDelimiter) {
   unsigned ParenLevel = 0;
   unsigned AddTokens = 0;
@@ -1715,7 +1815,7 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA,
         if (IsOperator(Lexer.getKind())) {
           // Check to see whether the token is used as an operator,
           // or part of an identifier
-          const char *NextChar = getTok().getEndLoc().getPointer() + 1;
+          const char *NextChar = getTok().getEndLoc().getPointer();
           if (*NextChar == ' ')
             AddTokens = 2;
         }
@@ -1754,7 +1854,7 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA,
 }
 
 // Parse the macro instantiation arguments.
-bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
+bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) {
   const unsigned NParameters = M ? M->Parameters.size() : 0;
   // Argument delimiter is initially unknown. It will be set by
   // ParseMacroArgument()
@@ -1765,7 +1865,7 @@ bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
   // - macros defined with parameters accept at most that many of them
   for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
        ++Parameter) {
-    MacroArgument MA;
+    MCAsmMacroArgument MA;
 
     if (ParseMacroArgument(MA, ArgumentDelimiter))
       return true;
@@ -1798,14 +1898,30 @@ bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
   return TokError("Too many arguments");
 }
 
-bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
-                                 const Macro *M) {
+const MCAsmMacro* AsmParser::LookupMacro(StringRef Name) {
+  StringMap<MCAsmMacro*>::iterator I = MacroMap.find(Name);
+  return (I == MacroMap.end()) ? NULL : I->getValue();
+}
+
+void AsmParser::DefineMacro(StringRef Name, const MCAsmMacro& Macro) {
+  MacroMap[Name] = new MCAsmMacro(Macro);
+}
+
+void AsmParser::UndefineMacro(StringRef Name) {
+  StringMap<MCAsmMacro*>::iterator I = MacroMap.find(Name);
+  if (I != MacroMap.end()) {
+    delete I->getValue();
+    MacroMap.erase(I);
+  }
+}
+
+bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
   // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
   // this, although we should protect against infinite loops.
   if (ActiveMacros.size() == 20)
     return TokError("macros cannot be nested more than 20 levels deep");
 
-  MacroArguments A;
+  MCAsmMacroArguments A;
   if (ParseMacroArguments(M, A))
     return true;
 
@@ -1824,7 +1940,7 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
   if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc()))
     return true;
 
-  // We include the .endmacro in the buffer as our queue to exit the macro
+  // We include the .endmacro in the buffer as our cue to exit the macro
   // instantiation.
   OS << ".endmacro\n";
 
@@ -1834,6 +1950,7 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
   // Create the macro instantiation object and add to the current macro
   // instantiation stack.
   MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+                                                  CurBuffer,
                                                   getTok().getLoc(),
                                                   Instantiation);
   ActiveMacros.push_back(MI);
@@ -1848,7 +1965,7 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
 
 void AsmParser::HandleMacroExit() {
   // Jump to the EndOfStatement we should return to, and consume it.
-  JumpToLoc(ActiveMacros.back()->ExitLoc);
+  JumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
   Lex();
 
   // Pop the instantiation entry.
@@ -1885,7 +2002,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
   SMLoc EqualLoc = Lexer.getLoc();
 
   const MCExpr *Value;
-  if (ParseExpression(Value))
+  if (parseExpression(Value))
     return true;
 
   // Note: we don't count b as used in "a = b". This is to allow
@@ -1942,10 +2059,10 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef,
   return false;
 }
 
-/// ParseIdentifier:
+/// parseIdentifier:
 ///   ::= identifier
 ///   ::= string
-bool AsmParser::ParseIdentifier(StringRef &Res) {
+bool AsmParser::parseIdentifier(StringRef &Res) {
   // The assembler has relaxed rules for accepting identifiers, in particular we
   // allow things like '.globl $foo', which would normally be separate
   // tokens. At this level, we have already lexed so we cannot (currently)
@@ -1988,7 +2105,7 @@ bool AsmParser::ParseIdentifier(StringRef &Res) {
 bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
   StringRef Name;
 
-  if (ParseIdentifier(Name))
+  if (parseIdentifier(Name))
     return TokError("expected identifier after '" + Twine(IDVal) + "'");
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -1998,7 +2115,7 @@ bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
   return ParseAssignment(Name, allow_redef, true);
 }
 
-bool AsmParser::ParseEscapedString(std::string &Data) {
+bool AsmParser::parseEscapedString(std::string &Data) {
   assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
 
   Data = "";
@@ -2060,14 +2177,14 @@ bool AsmParser::ParseEscapedString(std::string &Data) {
 ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
 bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    CheckForValidSection();
+    checkForValidSection();
 
     for (;;) {
       if (getLexer().isNot(AsmToken::String))
         return TokError("expected string in '" + Twine(IDVal) + "' directive");
 
       std::string Data;
-      if (ParseEscapedString(Data))
+      if (parseEscapedString(Data))
         return true;
 
       getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
@@ -2093,12 +2210,12 @@ bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
 bool AsmParser::ParseDirectiveValue(unsigned Size) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    CheckForValidSection();
+    checkForValidSection();
 
     for (;;) {
       const MCExpr *Value;
       SMLoc ExprLoc = getLexer().getLoc();
-      if (ParseExpression(Value))
+      if (parseExpression(Value))
         return true;
 
       // Special case constant expressions to match code generator.
@@ -2129,7 +2246,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
 ///  ::= (.single | .double) [ expression (, expression)* ]
 bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    CheckForValidSection();
+    checkForValidSection();
 
     for (;;) {
       // We don't truly support arithmetic on floating point expressions, so we
@@ -2183,52 +2300,19 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
   return false;
 }
 
-/// ParseDirectiveSpace
-///  ::= .space expression [ , expression ]
-bool AsmParser::ParseDirectiveSpace() {
-  CheckForValidSection();
-
-  int64_t NumBytes;
-  if (ParseAbsoluteExpression(NumBytes))
-    return true;
-
-  int64_t FillExpr = 0;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    if (getLexer().isNot(AsmToken::Comma))
-      return TokError("unexpected token in '.space' directive");
-    Lex();
-
-    if (ParseAbsoluteExpression(FillExpr))
-      return true;
-
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in '.space' directive");
-  }
-
-  Lex();
-
-  if (NumBytes <= 0)
-    return TokError("invalid number of bytes in '.space' directive");
-
-  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
-  getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
-
-  return false;
-}
-
 /// ParseDirectiveZero
 ///  ::= .zero expression
 bool AsmParser::ParseDirectiveZero() {
-  CheckForValidSection();
+  checkForValidSection();
 
   int64_t NumBytes;
-  if (ParseAbsoluteExpression(NumBytes))
+  if (parseAbsoluteExpression(NumBytes))
     return true;
 
   int64_t Val = 0;
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
-    if (ParseAbsoluteExpression(Val))
+    if (parseAbsoluteExpression(Val))
       return true;
   }
 
@@ -2245,10 +2329,10 @@ bool AsmParser::ParseDirectiveZero() {
 /// ParseDirectiveFill
 ///  ::= .fill expression , expression , expression
 bool AsmParser::ParseDirectiveFill() {
-  CheckForValidSection();
+  checkForValidSection();
 
   int64_t NumValues;
-  if (ParseAbsoluteExpression(NumValues))
+  if (parseAbsoluteExpression(NumValues))
     return true;
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -2256,7 +2340,7 @@ bool AsmParser::ParseDirectiveFill() {
   Lex();
 
   int64_t FillSize;
-  if (ParseAbsoluteExpression(FillSize))
+  if (parseAbsoluteExpression(FillSize))
     return true;
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -2264,7 +2348,7 @@ bool AsmParser::ParseDirectiveFill() {
   Lex();
 
   int64_t FillExpr;
-  if (ParseAbsoluteExpression(FillExpr))
+  if (parseAbsoluteExpression(FillExpr))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -2284,11 +2368,11 @@ bool AsmParser::ParseDirectiveFill() {
 /// ParseDirectiveOrg
 ///  ::= .org expression [ , expression ]
 bool AsmParser::ParseDirectiveOrg() {
-  CheckForValidSection();
+  checkForValidSection();
 
   const MCExpr *Offset;
   SMLoc Loc = getTok().getLoc();
-  if (ParseExpression(Offset))
+  if (parseExpression(Offset))
     return true;
 
   // Parse optional fill expression.
@@ -2298,7 +2382,7 @@ bool AsmParser::ParseDirectiveOrg() {
       return TokError("unexpected token in '.org' directive");
     Lex();
 
-    if (ParseAbsoluteExpression(FillExpr))
+    if (parseAbsoluteExpression(FillExpr))
       return true;
 
     if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -2319,11 +2403,11 @@ bool AsmParser::ParseDirectiveOrg() {
 /// ParseDirectiveAlign
 ///  ::= {.align, ...} expression [ , expression [ , expression ]]
 bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
-  CheckForValidSection();
+  checkForValidSection();
 
   SMLoc AlignmentLoc = getLexer().getLoc();
   int64_t Alignment;
-  if (ParseAbsoluteExpression(Alignment))
+  if (parseAbsoluteExpression(Alignment))
     return true;
 
   SMLoc MaxBytesLoc;
@@ -2340,7 +2424,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
     //  .align 3,,4
     if (getLexer().isNot(AsmToken::Comma)) {
       HasFillExpr = true;
-      if (ParseAbsoluteExpression(FillExpr))
+      if (parseAbsoluteExpression(FillExpr))
         return true;
     }
 
@@ -2350,7 +2434,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
       Lex();
 
       MaxBytesLoc = getLexer().getLoc();
-      if (ParseAbsoluteExpression(MaxBytesToFill))
+      if (parseAbsoluteExpression(MaxBytesToFill))
         return true;
 
       if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -2372,6 +2456,10 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
     }
 
     Alignment = 1ULL << Alignment;
+  } else {
+    // Reject alignments that aren't a power of two, for gas compatibility.
+    if (!isPowerOf2_64(Alignment))
+      Error(AlignmentLoc, "alignment must be a power of 2");
   }
 
   // Diagnose non-sensical max bytes to align.
@@ -2404,368 +2492,10 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
   return false;
 }
 
-/// ParseDirectiveSymbolAttribute
-///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
-bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      StringRef Name;
-      SMLoc Loc = getTok().getLoc();
-
-      if (ParseIdentifier(Name))
-        return Error(Loc, "expected identifier in directive");
-
-      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
-
-      // Assembler local symbols don't make any sense here. Complain loudly.
-      if (Sym->isTemporary())
-        return Error(Loc, "non-local symbol required in directive");
-
-      getStreamer().EmitSymbolAttribute(Sym, Attr);
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      if (getLexer().isNot(AsmToken::Comma))
-        return TokError("unexpected token in directive");
-      Lex();
-    }
-  }
-
-  Lex();
-  return false;
-}
-
-/// ParseDirectiveComm
-///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
-bool AsmParser::ParseDirectiveComm(bool IsLocal) {
-  CheckForValidSection();
-
-  SMLoc IDLoc = getLexer().getLoc();
-  StringRef Name;
-  if (ParseIdentifier(Name))
-    return TokError("expected identifier in directive");
-
-  // Handle the identifier as the key symbol.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
-
-  if (getLexer().isNot(AsmToken::Comma))
-    return TokError("unexpected token in directive");
-  Lex();
-
-  int64_t Size;
-  SMLoc SizeLoc = getLexer().getLoc();
-  if (ParseAbsoluteExpression(Size))
-    return true;
-
-  int64_t Pow2Alignment = 0;
-  SMLoc Pow2AlignmentLoc;
-  if (getLexer().is(AsmToken::Comma)) {
-    Lex();
-    Pow2AlignmentLoc = getLexer().getLoc();
-    if (ParseAbsoluteExpression(Pow2Alignment))
-      return true;
-
-    LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
-    if (IsLocal && LCOMM == LCOMM::NoAlignment)
-      return Error(Pow2AlignmentLoc, "alignment not supported on this target");
-
-    // If this target takes alignments in bytes (not log) validate and convert.
-    if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
-        (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
-      if (!isPowerOf2_64(Pow2Alignment))
-        return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
-      Pow2Alignment = Log2_64(Pow2Alignment);
-    }
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.comm' or '.lcomm' directive");
-
-  Lex();
-
-  // NOTE: a size of zero for a .comm should create a undefined symbol
-  // but a size of .lcomm creates a bss symbol of size zero.
-  if (Size < 0)
-    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
-                 "be less than zero");
-
-  // NOTE: The alignment in the directive is a power of 2 value, the assembler
-  // may internally end up wanting an alignment in bytes.
-  // FIXME: Diagnose overflow.
-  if (Pow2Alignment < 0)
-    return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
-                 "alignment, can't be less than zero");
-
-  if (!Sym->isUndefined())
-    return Error(IDLoc, "invalid symbol redefinition");
-
-  // Create the Symbol as a common or local common with Size and Pow2Alignment
-  if (IsLocal) {
-    getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
-    return false;
-  }
-
-  getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
-  return false;
-}
-
-/// ParseDirectiveAbort
-///  ::= .abort [... message ...]
-bool AsmParser::ParseDirectiveAbort() {
-  // FIXME: Use loc from directive.
-  SMLoc Loc = getLexer().getLoc();
-
-  StringRef Str = ParseStringToEndOfStatement();
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.abort' directive");
-
-  Lex();
-
-  if (Str.empty())
-    Error(Loc, ".abort detected. Assembly stopping.");
-  else
-    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
-  // FIXME: Actually abort assembly here.
-
-  return false;
-}
-
-/// ParseDirectiveInclude
-///  ::= .include "filename"
-bool AsmParser::ParseDirectiveInclude() {
-  if (getLexer().isNot(AsmToken::String))
-    return TokError("expected string in '.include' directive");
-
-  std::string Filename = getTok().getString();
-  SMLoc IncludeLoc = getLexer().getLoc();
-  Lex();
-
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.include' directive");
-
-  // Strip the quotes.
-  Filename = Filename.substr(1, Filename.size()-2);
-
-  // Attempt to switch the lexer to the included file before consuming the end
-  // of statement to avoid losing it when we switch.
-  if (EnterIncludeFile(Filename)) {
-    Error(IncludeLoc, "Could not find include file '" + Filename + "'");
-    return true;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveIncbin
-///  ::= .incbin "filename"
-bool AsmParser::ParseDirectiveIncbin() {
-  if (getLexer().isNot(AsmToken::String))
-    return TokError("expected string in '.incbin' directive");
-
-  std::string Filename = getTok().getString();
-  SMLoc IncbinLoc = getLexer().getLoc();
-  Lex();
-
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.incbin' directive");
-
-  // Strip the quotes.
-  Filename = Filename.substr(1, Filename.size()-2);
-
-  // Attempt to process the included file.
-  if (ProcessIncbinFile(Filename)) {
-    Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
-    return true;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveIf
-/// ::= .if expression
-bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
-  TheCondStack.push_back(TheCondState);
-  TheCondState.TheCond = AsmCond::IfCond;
-  if (TheCondState.Ignore) {
-    EatToEndOfStatement();
-  } else {
-    int64_t ExprValue;
-    if (ParseAbsoluteExpression(ExprValue))
-      return true;
-
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in '.if' directive");
-
-    Lex();
-
-    TheCondState.CondMet = ExprValue;
-    TheCondState.Ignore = !TheCondState.CondMet;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveIfb
-/// ::= .ifb string
-bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
-  TheCondStack.push_back(TheCondState);
-  TheCondState.TheCond = AsmCond::IfCond;
-
-  if (TheCondState.Ignore) {
-    EatToEndOfStatement();
-  } else {
-    StringRef Str = ParseStringToEndOfStatement();
-
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in '.ifb' directive");
-
-    Lex();
-
-    TheCondState.CondMet = ExpectBlank == Str.empty();
-    TheCondState.Ignore = !TheCondState.CondMet;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveIfc
-/// ::= .ifc string1, string2
-bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
-  TheCondStack.push_back(TheCondState);
-  TheCondState.TheCond = AsmCond::IfCond;
-
-  if (TheCondState.Ignore) {
-    EatToEndOfStatement();
-  } else {
-    StringRef Str1 = ParseStringToComma();
-
-    if (getLexer().isNot(AsmToken::Comma))
-      return TokError("unexpected token in '.ifc' directive");
-
-    Lex();
-
-    StringRef Str2 = ParseStringToEndOfStatement();
-
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in '.ifc' directive");
-
-    Lex();
-
-    TheCondState.CondMet = ExpectEqual == (Str1 == Str2);
-    TheCondState.Ignore = !TheCondState.CondMet;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveIfdef
-/// ::= .ifdef symbol
-bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
-  StringRef Name;
-  TheCondStack.push_back(TheCondState);
-  TheCondState.TheCond = AsmCond::IfCond;
-
-  if (TheCondState.Ignore) {
-    EatToEndOfStatement();
-  } else {
-    if (ParseIdentifier(Name))
-      return TokError("expected identifier after '.ifdef'");
-
-    Lex();
-
-    MCSymbol *Sym = getContext().LookupSymbol(Name);
-
-    if (expect_defined)
-      TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
-    else
-      TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
-    TheCondState.Ignore = !TheCondState.CondMet;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveElseIf
-/// ::= .elseif expression
-bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
-  if (TheCondState.TheCond != AsmCond::IfCond &&
-      TheCondState.TheCond != AsmCond::ElseIfCond)
-      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
-                          " an .elseif");
-  TheCondState.TheCond = AsmCond::ElseIfCond;
-
-  bool LastIgnoreState = false;
-  if (!TheCondStack.empty())
-      LastIgnoreState = TheCondStack.back().Ignore;
-  if (LastIgnoreState || TheCondState.CondMet) {
-    TheCondState.Ignore = true;
-    EatToEndOfStatement();
-  }
-  else {
-    int64_t ExprValue;
-    if (ParseAbsoluteExpression(ExprValue))
-      return true;
-
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in '.elseif' directive");
-
-    Lex();
-    TheCondState.CondMet = ExprValue;
-    TheCondState.Ignore = !TheCondState.CondMet;
-  }
-
-  return false;
-}
-
-/// ParseDirectiveElse
-/// ::= .else
-bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.else' directive");
-
-  Lex();
-
-  if (TheCondState.TheCond != AsmCond::IfCond &&
-      TheCondState.TheCond != AsmCond::ElseIfCond)
-      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
-                          ".elseif");
-  TheCondState.TheCond = AsmCond::ElseCond;
-  bool LastIgnoreState = false;
-  if (!TheCondStack.empty())
-    LastIgnoreState = TheCondStack.back().Ignore;
-  if (LastIgnoreState || TheCondState.CondMet)
-    TheCondState.Ignore = true;
-  else
-    TheCondState.Ignore = false;
-
-  return false;
-}
-
-/// ParseDirectiveEndIf
-/// ::= .endif
-bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.endif' directive");
-
-  Lex();
-
-  if ((TheCondState.TheCond == AsmCond::NoCond) ||
-      TheCondStack.empty())
-    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
-                        ".else");
-  if (!TheCondStack.empty()) {
-    TheCondState = TheCondStack.back();
-    TheCondStack.pop_back();
-  }
-
-  return false;
-}
-
 /// ParseDirectiveFile
 /// ::= .file [number] filename
 /// ::= .file number directory filename
-bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) {
   // FIXME: I'm not sure what this is.
   int64_t FileNumber = -1;
   SMLoc FileNumberLoc = getLexer().getLoc();
@@ -2817,7 +2547,7 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
 
 /// ParseDirectiveLine
 /// ::= .line [number]
-bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveLine() {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     if (getLexer().isNot(AsmToken::Integer))
       return TokError("unexpected token in '.line' directive");
@@ -2835,7 +2565,6 @@ bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
   return false;
 }
 
-
 /// ParseDirectiveLoc
 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
 ///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
@@ -2843,8 +2572,7 @@ bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
 /// a .file directive, the second number is the line number and optionally the
 /// third number is a column position (zero if not specified).  The remaining
 /// optional items are .loc sub-directives.
-bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
-
+bool AsmParser::ParseDirectiveLoc() {
   if (getLexer().isNot(AsmToken::Integer))
     return TokError("unexpected token in '.loc' directive");
   int64_t FileNumber = getTok().getIntVal();
@@ -2880,7 +2608,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
 
       StringRef Name;
       SMLoc Loc = getTok().getLoc();
-      if (getParser().ParseIdentifier(Name))
+      if (parseIdentifier(Name))
         return TokError("unexpected token in '.loc' directive");
 
       if (Name == "basic_block")
@@ -2890,9 +2618,9 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
       else if (Name == "epilogue_begin")
         Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
       else if (Name == "is_stmt") {
-        SMLoc Loc = getTok().getLoc();
+        Loc = getTok().getLoc();
         const MCExpr *Value;
-        if (getParser().ParseExpression(Value))
+        if (parseExpression(Value))
           return true;
         // The expression must be the constant 0 or 1.
         if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
@@ -2909,9 +2637,9 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
         }
       }
       else if (Name == "isa") {
-        SMLoc Loc = getTok().getLoc();
+        Loc = getTok().getLoc();
         const MCExpr *Value;
-        if (getParser().ParseExpression(Value))
+        if (parseExpression(Value))
           return true;
         // The expression must be a constant greater or equal to 0.
         if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
@@ -2925,7 +2653,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
         }
       }
       else if (Name == "discriminator") {
-        if (getParser().ParseAbsoluteExpression(Discriminator))
+        if (parseAbsoluteExpression(Discriminator))
           return true;
       }
       else {
@@ -2945,20 +2673,18 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
 
 /// ParseDirectiveStabs
 /// ::= .stabs string, number, number, number
-bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
-                                           SMLoc DirectiveLoc) {
-  return TokError("unsupported directive '" + Directive + "'");
+bool AsmParser::ParseDirectiveStabs() {
+  return TokError("unsupported directive '.stabs'");
 }
 
 /// ParseDirectiveCFISections
 /// ::= .cfi_sections section [, section]
-bool GenericAsmParser::ParseDirectiveCFISections(StringRef,
-                                                 SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFISections() {
   StringRef Name;
   bool EH = false;
   bool Debug = false;
 
-  if (getParser().ParseIdentifier(Name))
+  if (parseIdentifier(Name))
     return TokError("Expected an identifier");
 
   if (Name == ".eh_frame")
@@ -2969,7 +2695,7 @@ bool GenericAsmParser::ParseDirectiveCFISections(StringRef,
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
 
-    if (getParser().ParseIdentifier(Name))
+    if (parseIdentifier(Name))
       return TokError("Expected an identifier");
 
     if (Name == ".eh_frame")
@@ -2979,45 +2705,41 @@ bool GenericAsmParser::ParseDirectiveCFISections(StringRef,
   }
 
   getStreamer().EmitCFISections(EH, Debug);
-
   return false;
 }
 
 /// ParseDirectiveCFIStartProc
 /// ::= .cfi_startproc
-bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
-                                                  SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIStartProc() {
   getStreamer().EmitCFIStartProc();
   return false;
 }
 
 /// ParseDirectiveCFIEndProc
 /// ::= .cfi_endproc
-bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIEndProc() {
   getStreamer().EmitCFIEndProc();
   return false;
 }
 
 /// ParseRegisterOrRegisterNumber - parse register name or number.
-bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
-                                                     SMLoc DirectiveLoc) {
+bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+                                              SMLoc DirectiveLoc) {
   unsigned RegNo;
 
   if (getLexer().isNot(AsmToken::Integer)) {
-    if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
-      DirectiveLoc))
+    if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc))
       return true;
     Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true);
   } else
-    return getParser().ParseAbsoluteExpression(Register);
+    return parseAbsoluteExpression(Register);
 
   return false;
 }
 
 /// ParseDirectiveCFIDefCfa
 /// ::= .cfi_def_cfa register,  offset
-bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
-                                               SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
   int64_t Register = 0;
   if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
     return true;
@@ -3027,7 +2749,7 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
   Lex();
 
   int64_t Offset = 0;
-  if (getParser().ParseAbsoluteExpression(Offset))
+  if (parseAbsoluteExpression(Offset))
     return true;
 
   getStreamer().EmitCFIDefCfa(Register, Offset);
@@ -3036,22 +2758,39 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
 
 /// ParseDirectiveCFIDefCfaOffset
 /// ::= .cfi_def_cfa_offset offset
-bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
-                                                     SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIDefCfaOffset() {
   int64_t Offset = 0;
-  if (getParser().ParseAbsoluteExpression(Offset))
+  if (parseAbsoluteExpression(Offset))
     return true;
 
   getStreamer().EmitCFIDefCfaOffset(Offset);
   return false;
 }
 
+/// ParseDirectiveCFIRegister
+/// ::= .cfi_register register, register
+bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) {
+  int64_t Register1 = 0;
+  if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Register2 = 0;
+  if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc))
+    return true;
+
+  getStreamer().EmitCFIRegister(Register1, Register2);
+  return false;
+}
+
 /// ParseDirectiveCFIAdjustCfaOffset
 /// ::= .cfi_adjust_cfa_offset adjustment
-bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef,
-                                                        SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() {
   int64_t Adjustment = 0;
-  if (getParser().ParseAbsoluteExpression(Adjustment))
+  if (parseAbsoluteExpression(Adjustment))
     return true;
 
   getStreamer().EmitCFIAdjustCfaOffset(Adjustment);
@@ -3060,8 +2799,7 @@ bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef,
 
 /// ParseDirectiveCFIDefCfaRegister
 /// ::= .cfi_def_cfa_register register
-bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
-                                                       SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
   int64_t Register = 0;
   if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
     return true;
@@ -3072,7 +2810,7 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
 
 /// ParseDirectiveCFIOffset
 /// ::= .cfi_offset register, offset
-bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) {
   int64_t Register = 0;
   int64_t Offset = 0;
 
@@ -3083,7 +2821,7 @@ bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
     return TokError("unexpected token in directive");
   Lex();
 
-  if (getParser().ParseAbsoluteExpression(Offset))
+  if (parseAbsoluteExpression(Offset))
     return true;
 
   getStreamer().EmitCFIOffset(Register, Offset);
@@ -3092,8 +2830,7 @@ bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
 
 /// ParseDirectiveCFIRelOffset
 /// ::= .cfi_rel_offset register, offset
-bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef,
-                                                  SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
   int64_t Register = 0;
 
   if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
@@ -3104,7 +2841,7 @@ bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef,
   Lex();
 
   int64_t Offset = 0;
-  if (getParser().ParseAbsoluteExpression(Offset))
+  if (parseAbsoluteExpression(Offset))
     return true;
 
   getStreamer().EmitCFIRelOffset(Register, Offset);
@@ -3134,12 +2871,12 @@ static bool isValidEncoding(int64_t Encoding) {
 }
 
 /// ParseDirectiveCFIPersonalityOrLsda
+/// IsPersonality true for cfi_personality, false for cfi_lsda
 /// ::= .cfi_personality encoding, [symbol_name]
 /// ::= .cfi_lsda encoding, [symbol_name]
-bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
-                                                    SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
   int64_t Encoding = 0;
-  if (getParser().ParseAbsoluteExpression(Encoding))
+  if (parseAbsoluteExpression(Encoding))
     return true;
   if (Encoding == dwarf::DW_EH_PE_omit)
     return false;
@@ -3152,70 +2889,61 @@ bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
   Lex();
 
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
 
-  if (IDVal == ".cfi_personality")
+  if (IsPersonality)
     getStreamer().EmitCFIPersonality(Sym, Encoding);
-  else {
-    assert(IDVal == ".cfi_lsda");
+  else
     getStreamer().EmitCFILsda(Sym, Encoding);
-  }
   return false;
 }
 
 /// ParseDirectiveCFIRememberState
 /// ::= .cfi_remember_state
-bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
-                                                      SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIRememberState() {
   getStreamer().EmitCFIRememberState();
   return false;
 }
 
 /// ParseDirectiveCFIRestoreState
 /// ::= .cfi_remember_state
-bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
-                                                     SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIRestoreState() {
   getStreamer().EmitCFIRestoreState();
   return false;
 }
 
 /// ParseDirectiveCFISameValue
 /// ::= .cfi_same_value register
-bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal,
-                                                  SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFISameValue(SMLoc DirectiveLoc) {
   int64_t Register = 0;
 
   if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
     return true;
 
   getStreamer().EmitCFISameValue(Register);
-
   return false;
 }
 
 /// ParseDirectiveCFIRestore
 /// ::= .cfi_restore register
-bool GenericAsmParser::ParseDirectiveCFIRestore(StringRef IDVal,
-                                                SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIRestore(SMLoc DirectiveLoc) {
   int64_t Register = 0;
   if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
     return true;
 
   getStreamer().EmitCFIRestore(Register);
-
   return false;
 }
 
 /// ParseDirectiveCFIEscape
 /// ::= .cfi_escape expression[,...]
-bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal,
-                                               SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFIEscape() {
   std::string Values;
   int64_t CurrValue;
-  if (getParser().ParseAbsoluteExpression(CurrValue))
+  if (parseAbsoluteExpression(CurrValue))
     return true;
 
   Values.push_back((uint8_t)CurrValue);
@@ -3223,7 +2951,7 @@ bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal,
   while (getLexer().is(AsmToken::Comma)) {
     Lex();
 
-    if (getParser().ParseAbsoluteExpression(CurrValue))
+    if (parseAbsoluteExpression(CurrValue))
       return true;
 
     Values.push_back((uint8_t)CurrValue);
@@ -3235,52 +2963,59 @@ bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal,
 
 /// ParseDirectiveCFISignalFrame
 /// ::= .cfi_signal_frame
-bool GenericAsmParser::ParseDirectiveCFISignalFrame(StringRef Directive,
-                                                    SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveCFISignalFrame() {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return Error(getLexer().getLoc(),
-                 "unexpected token in '" + Directive + "' directive");
+                 "unexpected token in '.cfi_signal_frame'");
 
   getStreamer().EmitCFISignalFrame();
+  return false;
+}
+
+/// ParseDirectiveCFIUndefined
+/// ::= .cfi_undefined register
+bool AsmParser::ParseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
 
+  getStreamer().EmitCFIUndefined(Register);
   return false;
 }
 
 /// ParseDirectiveMacrosOnOff
 /// ::= .macros_on
 /// ::= .macros_off
-bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive,
-                                                 SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveMacrosOnOff(StringRef Directive) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return Error(getLexer().getLoc(),
                  "unexpected token in '" + Directive + "' directive");
 
-  getParser().MacrosEnabled = Directive == ".macros_on";
-
+  SetMacrosEnabled(Directive == ".macros_on");
   return false;
 }
 
 /// ParseDirectiveMacro
 /// ::= .macro name [parameters]
-bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
-                                           SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (parseIdentifier(Name))
     return TokError("expected identifier in '.macro' directive");
 
-  MacroParameters Parameters;
+  MCAsmMacroParameters Parameters;
   // Argument delimiter is initially unknown. It will be set by
   // ParseMacroArgument()
   AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
-      MacroParameter Parameter;
-      if (getParser().ParseIdentifier(Parameter.first))
+      MCAsmMacroParameter Parameter;
+      if (parseIdentifier(Parameter.first))
         return TokError("expected identifier in '.macro' directive");
 
       if (getLexer().is(AsmToken::Equal)) {
         Lex();
-        if (getParser().ParseMacroArgument(Parameter.second, ArgumentDelimiter))
+        if (ParseMacroArgument(Parameter.second, ArgumentDelimiter))
           return true;
       }
 
@@ -3317,32 +3052,134 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
     }
 
     // Otherwise, scan til the end of the statement.
-    getParser().EatToEndOfStatement();
+    eatToEndOfStatement();
   }
 
-  if (getParser().MacroMap.lookup(Name)) {
+  if (LookupMacro(Name)) {
     return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
   }
 
   const char *BodyStart = StartToken.getLoc().getPointer();
   const char *BodyEnd = EndToken.getLoc().getPointer();
   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
-  getParser().MacroMap[Name] = new Macro(Name, Body, Parameters);
+  CheckForBadMacro(DirectiveLoc, Name, Body, Parameters);
+  DefineMacro(Name, MCAsmMacro(Name, Body, Parameters));
   return false;
 }
 
+/// CheckForBadMacro
+///
+/// With the support added for named parameters there may be code out there that
+/// is transitioning from positional parameters.  In versions of gas that did
+/// not support named parameters they would be ignored on the macro defintion.
+/// But to support both styles of parameters this is not possible so if a macro
+/// defintion has named parameters but does not use them and has what appears
+/// to be positional parameters, strings like $1, $2, ... and $n, then issue a
+/// warning that the positional parameter found in body which have no effect.
+/// Hoping the developer will either remove the named parameters from the macro
+/// definiton so the positional parameters get used if that was what was
+/// intended or change the macro to use the named parameters.  It is possible
+/// this warning will trigger when the none of the named parameters are used
+/// and the strings like $1 are infact to simply to be passed trough unchanged.
+void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name,
+                                 StringRef Body,
+                                 MCAsmMacroParameters Parameters) {
+  // If this macro is not defined with named parameters the warning we are
+  // checking for here doesn't apply.
+  unsigned NParameters = Parameters.size();
+  if (NParameters == 0)
+    return;
+
+  bool NamedParametersFound = false;
+  bool PositionalParametersFound = false;
+
+  // Look at the body of the macro for use of both the named parameters and what
+  // are likely to be positional parameters.  This is what expandMacro() is
+  // doing when it finds the parameters in the body.
+  while (!Body.empty()) {
+    // Scan for the next possible parameter.
+    std::size_t End = Body.size(), Pos = 0;
+    for (; Pos != End; ++Pos) {
+      // Check for a substitution or escape.
+      // This macro is defined with parameters, look for \foo, \bar, etc.
+      if (Body[Pos] == '\\' && Pos + 1 != End)
+        break;
+
+      // This macro should have parameters, but look for $0, $1, ..., $n too.
+      if (Body[Pos] != '$' || Pos + 1 == End)
+        continue;
+      char Next = Body[Pos + 1];
+      if (Next == '$' || Next == 'n' ||
+          isdigit(static_cast<unsigned char>(Next)))
+        break;
+    }
+
+    // Check if we reached the end.
+    if (Pos == End)
+      break;
+
+    if (Body[Pos] == '$') {
+      switch (Body[Pos+1]) {
+        // $$ => $
+      case '$':
+        break;
+
+        // $n => number of arguments
+      case 'n':
+        PositionalParametersFound = true;
+        break;
+
+        // $[0-9] => argument
+      default: {
+        PositionalParametersFound = true;
+        break;
+        }
+      }
+      Pos += 2;
+    } else {
+      unsigned I = Pos + 1;
+      while (isIdentifierChar(Body[I]) && I + 1 != End)
+        ++I;
+
+      const char *Begin = Body.data() + Pos +1;
+      StringRef Argument(Begin, I - (Pos +1));
+      unsigned Index = 0;
+      for (; Index < NParameters; ++Index)
+        if (Parameters[Index].first == Argument)
+          break;
+
+      if (Index == NParameters) {
+          if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
+            Pos += 3;
+          else {
+            Pos = I;
+          }
+      } else {
+        NamedParametersFound = true;
+        Pos += 1 + Argument.size();
+      }
+    }
+    // Update the scan point.
+    Body = Body.substr(Pos);
+  }
+
+  if (!NamedParametersFound && PositionalParametersFound)
+    Warning(DirectiveLoc, "macro defined with named parameters which are not "
+                          "used in macro body, possible positional parameter "
+                          "found in body which will have no effect");
+}
+
 /// ParseDirectiveEndMacro
 /// ::= .endm
 /// ::= .endmacro
-bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
-                                              SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectiveEndMacro(StringRef Directive) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '" + Directive + "' directive");
 
   // If we are inside a macro instantiation, terminate the current
   // instantiation.
-  if (!getParser().ActiveMacros.empty()) {
-    getParser().HandleMacroExit();
+  if (InsideMacroInstantiation()) {
+    HandleMacroExit();
     return false;
   }
 
@@ -3354,37 +3191,136 @@ bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
 
 /// ParseDirectivePurgeMacro
 /// ::= .purgem
-bool GenericAsmParser::ParseDirectivePurgeMacro(StringRef Directive,
-                                                SMLoc DirectiveLoc) {
+bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (parseIdentifier(Name))
     return TokError("expected identifier in '.purgem' directive");
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.purgem' directive");
 
-  StringMap<Macro*>::iterator I = getParser().MacroMap.find(Name);
-  if (I == getParser().MacroMap.end())
+  if (!LookupMacro(Name))
     return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
 
-  // Undefine the macro.
-  delete I->getValue();
-  getParser().MacroMap.erase(I);
+  UndefineMacro(Name);
   return false;
 }
 
-bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
-  getParser().CheckForValidSection();
+/// ParseDirectiveBundleAlignMode
+/// ::= {.bundle_align_mode} expression
+bool AsmParser::ParseDirectiveBundleAlignMode() {
+  checkForValidSection();
+
+  // Expect a single argument: an expression that evaluates to a constant
+  // in the inclusive range 0-30.
+  SMLoc ExprLoc = getLexer().getLoc();
+  int64_t AlignSizePow2;
+  if (parseAbsoluteExpression(AlignSizePow2))
+    return true;
+  else if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token after expression in"
+                    " '.bundle_align_mode' directive");
+  else if (AlignSizePow2 < 0 || AlignSizePow2 > 30)
+    return Error(ExprLoc,
+                 "invalid bundle alignment size (expected between 0 and 30)");
+
+  Lex();
+
+  // Because of AlignSizePow2's verified range we can safely truncate it to
+  // unsigned.
+  getStreamer().EmitBundleAlignMode(static_cast<unsigned>(AlignSizePow2));
+  return false;
+}
+
+/// ParseDirectiveBundleLock
+/// ::= {.bundle_lock} [align_to_end]
+bool AsmParser::ParseDirectiveBundleLock() {
+  checkForValidSection();
+  bool AlignToEnd = false;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    StringRef Option;
+    SMLoc Loc = getTok().getLoc();
+    const char *kInvalidOptionError =
+      "invalid option for '.bundle_lock' directive";
+
+    if (parseIdentifier(Option))
+      return Error(Loc, kInvalidOptionError);
+
+    if (Option != "align_to_end")
+      return Error(Loc, kInvalidOptionError);
+    else if (getLexer().isNot(AsmToken::EndOfStatement))
+      return Error(Loc,
+                   "unexpected token after '.bundle_lock' directive option");
+    AlignToEnd = true;
+  }
 
+  Lex();
+
+  getStreamer().EmitBundleLock(AlignToEnd);
+  return false;
+}
+
+/// ParseDirectiveBundleLock
+/// ::= {.bundle_lock}
+bool AsmParser::ParseDirectiveBundleUnlock() {
+  checkForValidSection();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.bundle_unlock' directive");
+  Lex();
+
+  getStreamer().EmitBundleUnlock();
+  return false;
+}
+
+/// ParseDirectiveSpace
+/// ::= (.skip | .space) expression [ , expression ]
+bool AsmParser::ParseDirectiveSpace(StringRef IDVal) {
+  checkForValidSection();
+
+  int64_t NumBytes;
+  if (parseAbsoluteExpression(NumBytes))
+    return true;
+
+  int64_t FillExpr = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+    Lex();
+
+    if (parseAbsoluteExpression(FillExpr))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+  }
+
+  Lex();
+
+  if (NumBytes <= 0)
+    return TokError("invalid number of bytes in '" +
+                    Twine(IDVal) + "' directive");
+
+  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+  getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveLEB128
+/// ::= (.sleb128 | .uleb128) expression
+bool AsmParser::ParseDirectiveLEB128(bool Signed) {
+  checkForValidSection();
   const MCExpr *Value;
 
-  if (getParser().ParseExpression(Value))
+  if (parseExpression(Value))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
-  if (DirName[1] == 's')
+  if (Signed)
     getStreamer().EmitSLEB128Value(Value);
   else
     getStreamer().EmitULEB128Value(Value);
@@ -3392,7 +3328,469 @@ bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
   return false;
 }
 
-Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
+/// ParseDirectiveSymbolAttribute
+///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
+bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      StringRef Name;
+      SMLoc Loc = getTok().getLoc();
+
+      if (parseIdentifier(Name))
+        return Error(Loc, "expected identifier in directive");
+
+      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+      // Assembler local symbols don't make any sense here. Complain loudly.
+      if (Sym->isTemporary())
+        return Error(Loc, "non-local symbol required in directive");
+
+      getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveComm
+///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
+bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+  checkForValidSection();
+
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef Name;
+  if (parseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (parseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (parseAbsoluteExpression(Pow2Alignment))
+      return true;
+
+    LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
+    if (IsLocal && LCOMM == LCOMM::NoAlignment)
+      return Error(Pow2AlignmentLoc, "alignment not supported on this target");
+
+    // If this target takes alignments in bytes (not log) validate and convert.
+    if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
+        (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
+      if (!isPowerOf2_64(Pow2Alignment))
+        return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
+      Pow2Alignment = Log2_64(Pow2Alignment);
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+  Lex();
+
+  // NOTE: a size of zero for a .comm should create a undefined symbol
+  // but a size of .lcomm creates a bss symbol of size zero.
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+                 "be less than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+                 "alignment, can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // Create the Symbol as a common or local common with Size and Pow2Alignment
+  if (IsLocal) {
+    getStreamer().EmitLocalCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+    return false;
+  }
+
+  getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+  return false;
+}
+
+/// ParseDirectiveAbort
+///  ::= .abort [... message ...]
+bool AsmParser::ParseDirectiveAbort() {
+  // FIXME: Use loc from directive.
+  SMLoc Loc = getLexer().getLoc();
+
+  StringRef Str = parseStringToEndOfStatement();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.abort' directive");
+
+  Lex();
+
+  if (Str.empty())
+    Error(Loc, ".abort detected. Assembly stopping.");
+  else
+    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+  // FIXME: Actually abort assembly here.
+
+  return false;
+}
+
+/// ParseDirectiveInclude
+///  ::= .include "filename"
+bool AsmParser::ParseDirectiveInclude() {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.include' directive");
+
+  std::string Filename = getTok().getString();
+  SMLoc IncludeLoc = getLexer().getLoc();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.include' directive");
+
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+
+  // Attempt to switch the lexer to the included file before consuming the end
+  // of statement to avoid losing it when we switch.
+  if (EnterIncludeFile(Filename)) {
+    Error(IncludeLoc, "Could not find include file '" + Filename + "'");
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIncbin
+///  ::= .incbin "filename"
+bool AsmParser::ParseDirectiveIncbin() {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.incbin' directive");
+
+  std::string Filename = getTok().getString();
+  SMLoc IncbinLoc = getLexer().getLoc();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.incbin' directive");
+
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+
+  // Attempt to process the included file.
+  if (ProcessIncbinFile(Filename)) {
+    Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIf
+/// ::= .if expression
+bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+  if (TheCondState.Ignore) {
+    eatToEndOfStatement();
+  } else {
+    int64_t ExprValue;
+    if (parseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.if' directive");
+
+    Lex();
+
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIfb
+/// ::= .ifb string
+bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+
+  if (TheCondState.Ignore) {
+    eatToEndOfStatement();
+  } else {
+    StringRef Str = parseStringToEndOfStatement();
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.ifb' directive");
+
+    Lex();
+
+    TheCondState.CondMet = ExpectBlank == Str.empty();
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIfc
+/// ::= .ifc string1, string2
+bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+
+  if (TheCondState.Ignore) {
+    eatToEndOfStatement();
+  } else {
+    StringRef Str1 = ParseStringToComma();
+
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '.ifc' directive");
+
+    Lex();
+
+    StringRef Str2 = parseStringToEndOfStatement();
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.ifc' directive");
+
+    Lex();
+
+    TheCondState.CondMet = ExpectEqual == (Str1 == Str2);
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIfdef
+/// ::= .ifdef symbol
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+  StringRef Name;
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+
+  if (TheCondState.Ignore) {
+    eatToEndOfStatement();
+  } else {
+    if (parseIdentifier(Name))
+      return TokError("expected identifier after '.ifdef'");
+
+    Lex();
+
+    MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+    if (expect_defined)
+      TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+    else
+      TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElseIf
+/// ::= .elseif expression
+bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+                          " an .elseif");
+  TheCondState.TheCond = AsmCond::ElseIfCond;
+
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+      LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet) {
+    TheCondState.Ignore = true;
+    eatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (parseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.elseif' directive");
+
+    Lex();
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElse
+/// ::= .else
+bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.else' directive");
+
+  Lex();
+
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+                          ".elseif");
+  TheCondState.TheCond = AsmCond::ElseCond;
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+    LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet)
+    TheCondState.Ignore = true;
+  else
+    TheCondState.Ignore = false;
+
+  return false;
+}
+
+/// ParseDirectiveEndIf
+/// ::= .endif
+bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.endif' directive");
+
+  Lex();
+
+  if ((TheCondState.TheCond == AsmCond::NoCond) ||
+      TheCondStack.empty())
+    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
+                        ".else");
+  if (!TheCondStack.empty()) {
+    TheCondState = TheCondStack.back();
+    TheCondStack.pop_back();
+  }
+
+  return false;
+}
+
+void AsmParser::initializeDirectiveKindMap() {
+  DirectiveKindMap[".set"] = DK_SET;
+  DirectiveKindMap[".equ"] = DK_EQU;
+  DirectiveKindMap[".equiv"] = DK_EQUIV;
+  DirectiveKindMap[".ascii"] = DK_ASCII;
+  DirectiveKindMap[".asciz"] = DK_ASCIZ;
+  DirectiveKindMap[".string"] = DK_STRING;
+  DirectiveKindMap[".byte"] = DK_BYTE;
+  DirectiveKindMap[".short"] = DK_SHORT;
+  DirectiveKindMap[".value"] = DK_VALUE;
+  DirectiveKindMap[".2byte"] = DK_2BYTE;
+  DirectiveKindMap[".long"] = DK_LONG;
+  DirectiveKindMap[".int"] = DK_INT;
+  DirectiveKindMap[".4byte"] = DK_4BYTE;
+  DirectiveKindMap[".quad"] = DK_QUAD;
+  DirectiveKindMap[".8byte"] = DK_8BYTE;
+  DirectiveKindMap[".single"] = DK_SINGLE;
+  DirectiveKindMap[".float"] = DK_FLOAT;
+  DirectiveKindMap[".double"] = DK_DOUBLE;
+  DirectiveKindMap[".align"] = DK_ALIGN;
+  DirectiveKindMap[".align32"] = DK_ALIGN32;
+  DirectiveKindMap[".balign"] = DK_BALIGN;
+  DirectiveKindMap[".balignw"] = DK_BALIGNW;
+  DirectiveKindMap[".balignl"] = DK_BALIGNL;
+  DirectiveKindMap[".p2align"] = DK_P2ALIGN;
+  DirectiveKindMap[".p2alignw"] = DK_P2ALIGNW;
+  DirectiveKindMap[".p2alignl"] = DK_P2ALIGNL;
+  DirectiveKindMap[".org"] = DK_ORG;
+  DirectiveKindMap[".fill"] = DK_FILL;
+  DirectiveKindMap[".zero"] = DK_ZERO;
+  DirectiveKindMap[".extern"] = DK_EXTERN;
+  DirectiveKindMap[".globl"] = DK_GLOBL;
+  DirectiveKindMap[".global"] = DK_GLOBAL;
+  DirectiveKindMap[".indirect_symbol"] = DK_INDIRECT_SYMBOL;
+  DirectiveKindMap[".lazy_reference"] = DK_LAZY_REFERENCE;
+  DirectiveKindMap[".no_dead_strip"] = DK_NO_DEAD_STRIP;
+  DirectiveKindMap[".symbol_resolver"] = DK_SYMBOL_RESOLVER;
+  DirectiveKindMap[".private_extern"] = DK_PRIVATE_EXTERN;
+  DirectiveKindMap[".reference"] = DK_REFERENCE;
+  DirectiveKindMap[".weak_definition"] = DK_WEAK_DEFINITION;
+  DirectiveKindMap[".weak_reference"] = DK_WEAK_REFERENCE;
+  DirectiveKindMap[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN;
+  DirectiveKindMap[".comm"] = DK_COMM;
+  DirectiveKindMap[".common"] = DK_COMMON;
+  DirectiveKindMap[".lcomm"] = DK_LCOMM;
+  DirectiveKindMap[".abort"] = DK_ABORT;
+  DirectiveKindMap[".include"] = DK_INCLUDE;
+  DirectiveKindMap[".incbin"] = DK_INCBIN;
+  DirectiveKindMap[".code16"] = DK_CODE16;
+  DirectiveKindMap[".code16gcc"] = DK_CODE16GCC;
+  DirectiveKindMap[".rept"] = DK_REPT;
+  DirectiveKindMap[".irp"] = DK_IRP;
+  DirectiveKindMap[".irpc"] = DK_IRPC;
+  DirectiveKindMap[".endr"] = DK_ENDR;
+  DirectiveKindMap[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE;
+  DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
+  DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
+  DirectiveKindMap[".if"] = DK_IF;
+  DirectiveKindMap[".ifb"] = DK_IFB;
+  DirectiveKindMap[".ifnb"] = DK_IFNB;
+  DirectiveKindMap[".ifc"] = DK_IFC;
+  DirectiveKindMap[".ifnc"] = DK_IFNC;
+  DirectiveKindMap[".ifdef"] = DK_IFDEF;
+  DirectiveKindMap[".ifndef"] = DK_IFNDEF;
+  DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF;
+  DirectiveKindMap[".elseif"] = DK_ELSEIF;
+  DirectiveKindMap[".else"] = DK_ELSE;
+  DirectiveKindMap[".endif"] = DK_ENDIF;
+  DirectiveKindMap[".skip"] = DK_SKIP;
+  DirectiveKindMap[".space"] = DK_SPACE;
+  DirectiveKindMap[".file"] = DK_FILE;
+  DirectiveKindMap[".line"] = DK_LINE;
+  DirectiveKindMap[".loc"] = DK_LOC;
+  DirectiveKindMap[".stabs"] = DK_STABS;
+  DirectiveKindMap[".sleb128"] = DK_SLEB128;
+  DirectiveKindMap[".uleb128"] = DK_ULEB128;
+  DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
+  DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
+  DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
+  DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
+  DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
+  DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
+  DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
+  DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
+  DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
+  DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
+  DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
+  DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
+  DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
+  DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
+  DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
+  DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
+  DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
+  DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
+  DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
+  DirectiveKindMap[".macros_on"] = DK_MACROS_ON;
+  DirectiveKindMap[".macros_off"] = DK_MACROS_OFF;
+  DirectiveKindMap[".macro"] = DK_MACRO;
+  DirectiveKindMap[".endm"] = DK_ENDM;
+  DirectiveKindMap[".endmacro"] = DK_ENDMACRO;
+  DirectiveKindMap[".purgem"] = DK_PURGEM;
+}
+
+
+MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
   AsmToken EndToken, StartToken = getTok();
 
   unsigned NestLevel = 0;
@@ -3424,7 +3822,7 @@ Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
     }
 
     // Otherwise, scan till the end of the statement.
-    EatToEndOfStatement();
+    eatToEndOfStatement();
   }
 
   const char *BodyStart = StartToken.getLoc().getPointer();
@@ -3433,11 +3831,11 @@ Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
 
   // We Are Anonymous.
   StringRef Name;
-  MacroParameters Parameters;
-  return new Macro(Name, Body, Parameters);
+  MCAsmMacroParameters Parameters;
+  return new MCAsmMacro(Name, Body, Parameters);
 }
 
-void AsmParser::InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc,
+void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
                                          raw_svector_ostream &OS) {
   OS << ".endr\n";
 
@@ -3447,6 +3845,7 @@ void AsmParser::InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc,
   // Create the macro instantiation object and add to the current macro
   // instantiation stack.
   MacroInstantiation *MI = new MacroInstantiation(M, DirectiveLoc,
+                                                  CurBuffer,
                                                   getTok().getLoc(),
                                                   Instantiation);
   ActiveMacros.push_back(MI);
@@ -3459,7 +3858,7 @@ void AsmParser::InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc,
 
 bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
   int64_t Count;
-  if (ParseAbsoluteExpression(Count))
+  if (parseAbsoluteExpression(Count))
     return TokError("unexpected token in '.rept' directive");
 
   if (Count < 0)
@@ -3472,15 +3871,15 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
   Lex();
 
   // Lex the rept definition.
-  Macro *M = ParseMacroLikeBody(DirectiveLoc);
+  MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
   if (!M)
     return true;
 
   // Macro instantiation is lexical, unfortunately. We construct a new buffer
   // to hold the macro body with substitutions.
   SmallString<256> Buf;
-  MacroParameters Parameters;
-  MacroArguments A;
+  MCAsmMacroParameters Parameters;
+  MCAsmMacroArguments A;
   raw_svector_ostream OS(Buf);
   while (Count--) {
     if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc()))
@@ -3494,10 +3893,10 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
 /// ParseDirectiveIrp
 /// ::= .irp symbol,values
 bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
-  MacroParameters Parameters;
-  MacroParameter Parameter;
+  MCAsmMacroParameters Parameters;
+  MCAsmMacroParameter Parameter;
 
-  if (ParseIdentifier(Parameter.first))
+  if (parseIdentifier(Parameter.first))
     return TokError("expected identifier in '.irp' directive");
 
   Parameters.push_back(Parameter);
@@ -3507,7 +3906,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
 
   Lex();
 
-  MacroArguments A;
+  MCAsmMacroArguments A;
   if (ParseMacroArguments(0, A))
     return true;
 
@@ -3515,7 +3914,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
   Lex();
 
   // Lex the irp definition.
-  Macro *M = ParseMacroLikeBody(DirectiveLoc);
+  MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
   if (!M)
     return true;
 
@@ -3524,8 +3923,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
   SmallString<256> Buf;
   raw_svector_ostream OS(Buf);
 
-  for (MacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
-    MacroArguments Args;
+  for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
+    MCAsmMacroArguments Args;
     Args.push_back(*i);
 
     if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
@@ -3540,10 +3939,10 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
 /// ParseDirectiveIrpc
 /// ::= .irpc symbol,values
 bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
-  MacroParameters Parameters;
-  MacroParameter Parameter;
+  MCAsmMacroParameters Parameters;
+  MCAsmMacroParameter Parameter;
 
-  if (ParseIdentifier(Parameter.first))
+  if (parseIdentifier(Parameter.first))
     return TokError("expected identifier in '.irpc' directive");
 
   Parameters.push_back(Parameter);
@@ -3553,7 +3952,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
 
   Lex();
 
-  MacroArguments A;
+  MCAsmMacroArguments A;
   if (ParseMacroArguments(0, A))
     return true;
 
@@ -3564,7 +3963,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
   Lex();
 
   // Lex the irpc definition.
-  Macro *M = ParseMacroLikeBody(DirectiveLoc);
+  MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc);
   if (!M)
     return true;
 
@@ -3576,10 +3975,10 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
   StringRef Values = A.front().front().getString();
   std::size_t I, End = Values.size();
   for (I = 0; I < End; ++I) {
-    MacroArgument Arg;
+    MCAsmMacroArgument Arg;
     Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1)));
 
-    MacroArguments Args;
+    MCAsmMacroArguments Args;
     Args.push_back(Arg);
 
     if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
@@ -3603,10 +4002,11 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) {
   return false;
 }
 
-bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) {
+bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
+                                     size_t Len) {
   const MCExpr *Value;
   SMLoc ExprLoc = getLexer().getLoc();
-  if (ParseExpression(Value))
+  if (parseExpression(Value))
     return true;
   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
   if (!MCE)
@@ -3615,27 +4015,71 @@ bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) {
   if (!isUIntN(8, IntValue) && !isIntN(8, IntValue))
     return Error(ExprLoc, "literal value out of range for directive");
 
-  Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, 5));
+  Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len));
+  return false;
+}
+
+bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
+  const MCExpr *Value;
+  SMLoc ExprLoc = getLexer().getLoc();
+  if (parseExpression(Value))
+    return true;
+  const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+  if (!MCE)
+    return Error(ExprLoc, "unexpected expression in align");
+  uint64_t IntValue = MCE->getValue();
+  if (!isPowerOf2_64(IntValue))
+    return Error(ExprLoc, "literal value not a power of two greater then zero");
+
+  Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5,
+                                         Log2_64(IntValue)));
   return false;
 }
 
-bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
-                                 unsigned &NumOutputs, unsigned &NumInputs,
-                                 SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
-                                 SmallVectorImpl<std::string> &Constraints,
-                                 SmallVectorImpl<std::string> &Clobbers,
-                                 const MCInstrInfo *MII,
-                                 const MCInstPrinter *IP,
-                                 MCAsmParserSemaCallback &SI) {
+// We are comparing pointers, but the pointers are relative to a single string.
+// Thus, this should always be deterministic.
+static int RewritesSort(const void *A, const void *B) {
+  const AsmRewrite *AsmRewriteA = static_cast<const AsmRewrite *>(A);
+  const AsmRewrite *AsmRewriteB = static_cast<const AsmRewrite *>(B);
+  if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
+    return -1;
+  if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
+    return 1;
+
+  // It's possible to have a SizeDirective rewrite and an Input/Output rewrite
+  // to the same location.  Make sure the SizeDirective rewrite is performed
+  // first.  This also ensure the sort algorithm is stable.
+  if (AsmRewriteA->Kind == AOK_SizeDirective) {
+    assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) &&
+            "Expected an Input/Output rewrite!");
+    return -1;
+  }
+  if (AsmRewriteB->Kind == AOK_SizeDirective) {
+    assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) &&
+            "Expected an Input/Output rewrite!");
+    return 1;
+  }
+  llvm_unreachable ("Unstable rewrite sort.");
+}
+
+bool
+AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+                            unsigned &NumOutputs, unsigned &NumInputs,
+                            SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
+                            SmallVectorImpl<std::string> &Constraints,
+                            SmallVectorImpl<std::string> &Clobbers,
+                            const MCInstrInfo *MII,
+                            const MCInstPrinter *IP,
+                            MCAsmParserSemaCallback &SI) {
   SmallVector<void *, 4> InputDecls;
   SmallVector<void *, 4> OutputDecls;
-  SmallVector<bool, 4> InputDeclsOffsetOf;
-  SmallVector<bool, 4> OutputDeclsOffsetOf;
+  SmallVector<bool, 4> InputDeclsAddressOf;
+  SmallVector<bool, 4> OutputDeclsAddressOf;
   SmallVector<std::string, 4> InputConstraints;
   SmallVector<std::string, 4> OutputConstraints;
-  std::set<std::string> ClobberRegs;
+  SmallVector<unsigned, 4> ClobberRegs;
 
-  SmallVector<struct AsmRewrite, 4> AsmStrRewrites;
+  SmallVector<AsmRewrite, 4> AsmStrRewrites;
 
   // Prime the lexer.
   Lex();
@@ -3648,64 +4092,54 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
     if (ParseStatement(Info))
       return true;
 
-    if (Info.Opcode != ~0U) {
-      const MCInstrDesc &Desc = MII->get(Info.Opcode);
+    if (Info.ParseError)
+      return true;
 
-      // Build the list of clobbers, outputs and inputs.
-      for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
-        MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
+    if (Info.Opcode == ~0U)
+      continue;
 
-        // Immediate.
-        if (Operand->isImm()) {
-          if (Operand->needAsmRewrite())
-            AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix,
-                                                Operand->getStartLoc()));
-          continue;
-        }
+    const MCInstrDesc &Desc = MII->get(Info.Opcode);
 
-        // Register operand.
-        if (Operand->isReg() && !Operand->isOffsetOf()) {
-          unsigned NumDefs = Desc.getNumDefs();
-          // Clobber.
-          if (NumDefs && Operand->getMCOperandNum() < NumDefs) {
-            std::string Reg;
-            raw_string_ostream OS(Reg);
-            IP->printRegName(OS, Operand->getReg());
-            ClobberRegs.insert(StringRef(OS.str()));
-          }
-          continue;
-        }
+    // Build the list of clobbers, outputs and inputs.
+    for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
+      MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
 
-        // Expr/Input or Output.
-        unsigned Size;
-        void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
-                                                    Size);
-        if (OpDecl) {
-          bool isOutput = (i == 1) && Desc.mayStore();
-          if (!Operand->isOffsetOf() && Operand->needSizeDirective())
-            AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective,
-                                                Operand->getStartLoc(),
-                                                /*Len*/0,
-                                                Operand->getMemSize()));
-          if (isOutput) {
-            std::string Constraint = "=";
-            ++InputIdx;
-            OutputDecls.push_back(OpDecl);
-            OutputDeclsOffsetOf.push_back(Operand->isOffsetOf());
-            Constraint += Operand->getConstraint().str();
-            OutputConstraints.push_back(Constraint);
-            AsmStrRewrites.push_back(AsmRewrite(AOK_Output,
-                                                Operand->getStartLoc(),
-                                                Operand->getNameLen()));
-          } else {
-            InputDecls.push_back(OpDecl);
-            InputDeclsOffsetOf.push_back(Operand->isOffsetOf());
-            InputConstraints.push_back(Operand->getConstraint().str());
-            AsmStrRewrites.push_back(AsmRewrite(AOK_Input,
-                                                Operand->getStartLoc(),
-                                                Operand->getNameLen()));
-          }
-        }
+      // Immediate.
+      if (Operand->isImm())
+        continue;
+
+      // Register operand.
+      if (Operand->isReg() && !Operand->needAddressOf()) {
+        unsigned NumDefs = Desc.getNumDefs();
+        // Clobber.
+        if (NumDefs && Operand->getMCOperandNum() < NumDefs)
+          ClobberRegs.push_back(Operand->getReg());
+        continue;
+      }
+
+      // Expr/Input or Output.
+      bool IsVarDecl;
+      unsigned Length, Size, Type;
+      void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
+                                                  Length, Size, Type,
+                                                  IsVarDecl);
+      if (!OpDecl)
+        continue;
+
+      bool isOutput = (i == 1) && Desc.mayStore();
+      if (isOutput) {
+        ++InputIdx;
+        OutputDecls.push_back(OpDecl);
+        OutputDeclsAddressOf.push_back(Operand->needAddressOf());
+        OutputConstraints.push_back('=' + Operand->getConstraint().str());
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(),
+                                            Operand->getNameLen()));
+      } else {
+        InputDecls.push_back(OpDecl);
+        InputDeclsAddressOf.push_back(Operand->needAddressOf());
+        InputConstraints.push_back(Operand->getConstraint().str());
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(),
+                                            Operand->getNameLen()));
       }
     }
   }
@@ -3715,47 +4149,57 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
   NumInputs = InputDecls.size();
 
   // Set the unique clobbers.
-  for (std::set<std::string>::iterator I = ClobberRegs.begin(),
-         E = ClobberRegs.end(); I != E; ++I)
-    Clobbers.push_back(*I);
+  array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
+  ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()),
+                    ClobberRegs.end());
+  Clobbers.assign(ClobberRegs.size(), std::string());
+  for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
+    raw_string_ostream OS(Clobbers[I]);
+    IP->printRegName(OS, ClobberRegs[I]);
+  }
 
   // Merge the various outputs and inputs.  Output are expected first.
   if (NumOutputs || NumInputs) {
     unsigned NumExprs = NumOutputs + NumInputs;
     OpDecls.resize(NumExprs);
     Constraints.resize(NumExprs);
-    // FIXME: Constraints are hard coded to 'm', but we need an 'r'
-    // constraint for offsetof.  This needs to be cleaned up!
     for (unsigned i = 0; i < NumOutputs; ++i) {
-      OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsOffsetOf[i]);
-      Constraints[i] = OutputDeclsOffsetOf[i] ? "=r" : OutputConstraints[i];
+      OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
+      Constraints[i] = OutputConstraints[i];
     }
     for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
-      OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsOffsetOf[i]);
-      Constraints[j] = InputDeclsOffsetOf[i] ? "r" : InputConstraints[i];
+      OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
+      Constraints[j] = InputConstraints[i];
     }
   }
 
   // Build the IR assembly string.
   std::string AsmStringIR;
-  AsmRewriteKind PrevKind = AOK_Imm;
   raw_string_ostream OS(AsmStringIR);
-  const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart();
-  for (SmallVectorImpl<struct AsmRewrite>::iterator
-         I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
+  const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+  const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
+  array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort);
+  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
+                                             E = AsmStrRewrites.end();
+       I != E; ++I) {
     const char *Loc = (*I).Loc.getPointer();
+    assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
 
+    unsigned AdditionalSkip = 0;
     AsmRewriteKind Kind = (*I).Kind;
 
-    // Emit everything up to the immediate/expression.  If the previous rewrite
-    // was a size directive, then this has already been done.
-    if (PrevKind != AOK_SizeDirective)
-      OS << StringRef(Start, Loc - Start);
-    PrevKind = Kind;
+    // Emit everything up to the immediate/expression.
+    unsigned Len = Loc - AsmStart;
+    if (Len) {
+      // For Input/Output operands we need to remove the brackets, if present.
+      if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
+        --Len;
+      OS << StringRef(AsmStart, Len);
+    }
 
     // Skip the original expression.
     if (Kind == AOK_Skip) {
-      Start = Loc + (*I).Len;
+      AsmStart = Loc + (*I).Len;
       continue;
     }
 
@@ -3763,22 +4207,19 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
     switch (Kind) {
     default: break;
     case AOK_Imm:
-      OS << Twine("$$");
-      OS << (*I).Val;
+      OS << "$$" << (*I).Val;
       break;
     case AOK_ImmPrefix:
-      OS << Twine("$$");
+      OS << "$$";
       break;
     case AOK_Input:
-      OS << '$';
-      OS << InputIdx++;
+      OS << '$' << InputIdx++;
       break;
     case AOK_Output:
-      OS << '$';
-      OS << OutputIdx++;
+      OS << '$' << OutputIdx++;
       break;
     case AOK_SizeDirective:
-      switch((*I).Val) {
+      switch ((*I).Val) {
       default: break;
       case 8:  OS << "byte ptr "; break;
       case 16: OS << "word ptr "; break;
@@ -3792,20 +4233,32 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
     case AOK_Emit:
       OS << ".byte";
       break;
+    case AOK_Align: {
+      unsigned Val = (*I).Val;
+      OS << ".align " << Val;
+
+      // Skip the original immediate.
+      assert(Val < 10 && "Expected alignment less then 2^10.");
+      AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
+      break;
+    }
     case AOK_DotOperator:
       OS << (*I).Val;
       break;
     }
 
     // Skip the original expression.
-    if (Kind != AOK_SizeDirective)
-      Start = Loc + (*I).Len;
+    AsmStart = Loc + (*I).Len + AdditionalSkip;
+
+    // For Input/Output operands we need to remove the brackets, if present.
+    if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
+        *AsmStart == ']')
+      ++AsmStart;
   }
 
   // Emit the remainder of the asm string.
-  const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
-  if (Start != AsmEnd)
-    OS << StringRef(Start, AsmEnd - Start);
+  if (AsmStart != AsmEnd)
+    OS << StringRef(AsmStart, AsmEnd - AsmStart);
 
   AsmString = OS.str();
   return false;
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index c4cdc3c9f96f..a50eab217d21 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -12,11 +12,11 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/COFF.h"
 using namespace llvm;
@@ -24,10 +24,11 @@ using namespace llvm;
 namespace {
 
 class COFFAsmParser : public MCAsmParserExtension {
-  template<bool (COFFAsmParser::*Handler)(StringRef, SMLoc)>
-  void AddDirectiveHandler(StringRef Directive) {
-    getParser().AddDirectiveHandler(this, Directive,
-                                    HandleDirective<COFFAsmParser, Handler>);
+  template<bool (COFFAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+  void addDirectiveHandler(StringRef Directive) {
+    MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+        this, HandleDirective<COFFAsmParser, HandlerMethod>);
+    getParser().addDirectiveHandler(Directive, Handler);
   }
 
   bool ParseSectionSwitch(StringRef Section,
@@ -38,43 +39,43 @@ class COFFAsmParser : public MCAsmParserExtension {
     // Call the base implementation.
     MCAsmParserExtension::Initialize(Parser);
 
-    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
-    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
-    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
-    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
-    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
-    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
-    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
-    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32");
+    addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+    addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+    addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    addDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+    addDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+    addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+    addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+    addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32");
 
     // Win64 EH directives.
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
                                                                    ".seh_proc");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProc>(
                                                                 ".seh_endproc");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartChained>(
                                                            ".seh_startchained");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndChained>(
                                                              ".seh_endchained");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandler>(
                                                                 ".seh_handler");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveHandlerData>(
                                                             ".seh_handlerdata");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushReg>(
                                                                 ".seh_pushreg");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSetFrame>(
                                                                ".seh_setframe");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveAllocStack>(
                                                              ".seh_stackalloc");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveReg>(
                                                                 ".seh_savereg");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveSaveXMM>(
                                                                 ".seh_savexmm");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectivePushFrame>(
                                                               ".seh_pushframe");
-    AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
+    addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveEndProlog>(
                                                             ".seh_endprologue");
-    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+    addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
   }
 
   bool ParseSectionDirectiveText(StringRef, SMLoc) {
@@ -140,7 +141,7 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
     for (;;) {
       StringRef Name;
 
-      if (getParser().ParseIdentifier(Name))
+      if (getParser().parseIdentifier(Name))
         return TokError("expected identifier in directive");
 
       MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
@@ -176,7 +177,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
 bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
   StringRef SymbolName;
 
-  if (getParser().ParseIdentifier(SymbolName))
+  if (getParser().parseIdentifier(SymbolName))
     return TokError("expected identifier in directive");
 
   MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
@@ -189,7 +190,7 @@ bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
 
 bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
   int64_t SymbolStorageClass;
-  if (getParser().ParseAbsoluteExpression(SymbolStorageClass))
+  if (getParser().parseAbsoluteExpression(SymbolStorageClass))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -202,7 +203,7 @@ bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
 
 bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
   int64_t Type;
-  if (getParser().ParseAbsoluteExpression(Type))
+  if (getParser().parseAbsoluteExpression(Type))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -221,7 +222,7 @@ bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
 
 bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
   StringRef SymbolID;
-  if (getParser().ParseIdentifier(SymbolID))
+  if (getParser().parseIdentifier(SymbolID))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -236,7 +237,7 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
 
 bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
   StringRef SymbolID;
-  if (getParser().ParseIdentifier(SymbolID))
+  if (getParser().parseIdentifier(SymbolID))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -269,7 +270,7 @@ bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) {
 
 bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) {
   StringRef SymbolID;
-  if (getParser().ParseIdentifier(SymbolID))
+  if (getParser().parseIdentifier(SymbolID))
     return true;
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -322,7 +323,7 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) {
 
   Lex();
   SMLoc startLoc = getLexer().getLoc();
-  if (getParser().ParseAbsoluteExpression(Off))
+  if (getParser().parseAbsoluteExpression(Off))
     return true;
 
   if (Off & 0x0F)
@@ -339,7 +340,7 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) {
 bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) {
   int64_t Size;
   SMLoc startLoc = getLexer().getLoc();
-  if (getParser().ParseAbsoluteExpression(Size))
+  if (getParser().parseAbsoluteExpression(Size))
     return true;
 
   if (Size & 7)
@@ -363,7 +364,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) {
 
   Lex();
   SMLoc startLoc = getLexer().getLoc();
-  if (getParser().ParseAbsoluteExpression(Off))
+  if (getParser().parseAbsoluteExpression(Off))
     return true;
 
   if (Off & 7)
@@ -390,7 +391,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) {
 
   Lex();
   SMLoc startLoc = getLexer().getLoc();
-  if (getParser().ParseAbsoluteExpression(Off))
+  if (getParser().parseAbsoluteExpression(Off))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -411,7 +412,7 @@ bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) {
   if (getLexer().is(AsmToken::At)) {
     SMLoc startLoc = getLexer().getLoc();
     Lex();
-    if (!getParser().ParseIdentifier(CodeID)) {
+    if (!getParser().parseIdentifier(CodeID)) {
       if (CodeID != "code")
         return Error(startLoc, "expected @code");
       Code = true;
@@ -438,7 +439,7 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
     return TokError("a handler attribute must begin with '@'");
   SMLoc startLoc = getLexer().getLoc();
   Lex();
-  if (getParser().ParseIdentifier(identifier))
+  if (getParser().parseIdentifier(identifier))
     return Error(startLoc, "expected @unwind or @except");
   if (identifier == "unwind")
     unwind = true;
@@ -479,7 +480,7 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
   }
   else {
     int64_t n;
-    if (getParser().ParseAbsoluteExpression(n))
+    if (getParser().parseAbsoluteExpression(n))
       return true;
     if (n > 15)
       return Error(startLoc, "register number is too high");
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 18033d05eb76..6d6409fb69e2 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -8,15 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 using namespace llvm;
@@ -26,10 +26,11 @@ namespace {
 /// \brief Implementation of directive handling which is shared across all
 /// Darwin targets.
 class DarwinAsmParser : public MCAsmParserExtension {
-  template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)>
-  void AddDirectiveHandler(StringRef Directive) {
-    getParser().AddDirectiveHandler(this, Directive,
-                                    HandleDirective<DarwinAsmParser, Handler>);
+  template<bool (DarwinAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+  void addDirectiveHandler(StringRef Directive) {
+    MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+        this, HandleDirective<DarwinAsmParser, HandlerMethod>);
+    getParser().addDirectiveHandler(Directive, Handler);
   }
 
   bool ParseSectionSwitch(const char *Segment, const char *Section,
@@ -43,77 +44,128 @@ public:
     // Call the base implementation.
     this->MCAsmParserExtension::Initialize(Parser);
 
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
       ".subsections_via_symbols");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(".pushsection");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(".popsection");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(
+      ".pushsection");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(
+      ".popsection");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
       ".secure_log_unique");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
       ".secure_log_reset");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
 
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>(".data_region");
-    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>(".end_data_region");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>(
+      ".data_region");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>(
+      ".end_data_region");
 
     // Special section directives.
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
-
-    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(
+      ".const_data");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(
+      ".constructor");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(
+      ".cstring");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(
+      ".destructor");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(
+      ".fvmlib_init0");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(
+      ".fvmlib_init1");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(
+        ".lazy_symbol_pointer");
+    addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLinkerOption>(
+      ".linker_option");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(
+      ".literal16");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(
+      ".literal4");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(
+      ".literal8");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(
+      ".mod_init_func");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(
+      ".mod_term_func");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(
+        ".non_lazy_symbol_pointer");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(
+      ".objc_cat_cls_meth");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(
+      ".objc_cat_inst_meth");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(
+      ".objc_category");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(
+      ".objc_class");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(
+      ".objc_class_names");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(
+      ".objc_class_vars");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(
+      ".objc_cls_meth");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(
+      ".objc_cls_refs");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(
+      ".objc_inst_meth");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(
+        ".objc_instance_vars");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(
+      ".objc_message_refs");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(
+      ".objc_meta_class");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(
+        ".objc_meth_var_names");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(
+        ".objc_meth_var_types");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(
+      ".objc_module_info");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(
+      ".objc_protocol");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(
+        ".objc_selector_strs");
+    addDirectiveHandler<
+      &DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(
+        ".objc_string_object");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(
+      ".objc_symbols");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(
+      ".picsymbol_stub");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(
+      ".static_const");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(
+      ".static_data");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(
+      ".symbol_stub");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(
+      ".thread_init_func");
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
+
+    addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident");
   }
 
   bool ParseDirectiveDesc(StringRef, SMLoc);
   bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
   bool ParseDirectiveLsym(StringRef, SMLoc);
+  bool ParseDirectiveLinkerOption(StringRef, SMLoc);
   bool ParseDirectiveSection(StringRef, SMLoc);
   bool ParseDirectivePushSection(StringRef, SMLoc);
   bool ParseDirectivePopSection(StringRef, SMLoc);
@@ -293,7 +345,7 @@ public:
   }
   bool ParseSectionDirectiveIdent(StringRef, SMLoc) {
     // Darwin silently ignores the .ident directive.
-    getParser().EatToEndOfStatement();
+    getParser().eatToEndOfStatement();
     return false;
   }
   bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
@@ -314,7 +366,7 @@ bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
   Lex();
 
   // FIXME: Arch specific.
-  bool isText = StringRef(Segment) == "__TEXT";  // FIXME: Hack.
+  bool isText = TAA & MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS;
   getStreamer().SwitchSection(getContext().getMachOSection(
                                 Segment, Section, TAA, StubSize,
                                 isText ? SectionKind::getText()
@@ -338,7 +390,7 @@ bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
 ///  ::= .desc identifier , expression
 bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
@@ -349,7 +401,7 @@ bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
   Lex();
 
   int64_t DescValue;
-  if (getParser().ParseAbsoluteExpression(DescValue))
+  if (getParser().parseAbsoluteExpression(DescValue))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -386,11 +438,38 @@ bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
     return Warning(IDLoc, "ignoring directive .load for now");
 }
 
+/// ParseDirectiveLinkerOption
+///  ::= .linker_option "string" ( , "string" )*
+bool DarwinAsmParser::ParseDirectiveLinkerOption(StringRef IDVal, SMLoc) {
+  SmallVector<std::string, 4> Args;
+  for (;;) {
+    if (getLexer().isNot(AsmToken::String))
+      return TokError("expected string in '" + Twine(IDVal) + "' directive");
+
+    std::string Data;
+    if (getParser().parseEscapedString(Data))
+      return true;
+
+    Args.push_back(Data);
+
+    Lex();
+    if (getLexer().is(AsmToken::EndOfStatement))
+      break;
+
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+    Lex();
+  }
+
+  getStreamer().EmitLinkerOptions(Args);
+  return false;
+}
+
 /// ParseDirectiveLsym
 ///  ::= .lsym identifier , expression
 bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
@@ -401,7 +480,7 @@ bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
   Lex();
 
   const MCExpr *Value;
-  if (getParser().ParseExpression(Value))
+  if (getParser().parseExpression(Value))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -422,7 +501,7 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   SMLoc Loc = getLexer().getLoc();
 
   StringRef SectionName;
-  if (getParser().ParseIdentifier(SectionName))
+  if (getParser().parseIdentifier(SectionName))
     return Error(Loc, "expected identifier after '.section' directive");
 
   // Verify there is a following comma.
@@ -497,7 +576,7 @@ bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
 /// ParseDirectiveSecureLogUnique
 ///  ::= .secure_log_unique ... message ...
 bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
-  StringRef LogMessage = getParser().ParseStringToEndOfStatement();
+  StringRef LogMessage = getParser().parseStringToEndOfStatement();
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.secure_log_unique' directive");
 
@@ -565,7 +644,7 @@ bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
 bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
   SMLoc IDLoc = getLexer().getLoc();
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
@@ -577,7 +656,7 @@ bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
 
   int64_t Size;
   SMLoc SizeLoc = getLexer().getLoc();
-  if (getParser().ParseAbsoluteExpression(Size))
+  if (getParser().parseAbsoluteExpression(Size))
     return true;
 
   int64_t Pow2Alignment = 0;
@@ -585,7 +664,7 @@ bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
     Pow2AlignmentLoc = getLexer().getLoc();
-    if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+    if (getParser().parseAbsoluteExpression(Pow2Alignment))
       return true;
   }
 
@@ -620,7 +699,7 @@ bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
 ///      , align_expression ]]
 bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
   StringRef Segment;
-  if (getParser().ParseIdentifier(Segment))
+  if (getParser().parseIdentifier(Segment))
     return TokError("expected segment name after '.zerofill' directive");
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -628,7 +707,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
   Lex();
 
   StringRef Section;
-  if (getParser().ParseIdentifier(Section))
+  if (getParser().parseIdentifier(Section))
     return TokError("expected section name after comma in '.zerofill' "
                     "directive");
 
@@ -648,7 +727,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
 
   SMLoc IDLoc = getLexer().getLoc();
   StringRef IDStr;
-  if (getParser().ParseIdentifier(IDStr))
+  if (getParser().parseIdentifier(IDStr))
     return TokError("expected identifier in directive");
 
   // handle the identifier as the key symbol.
@@ -660,7 +739,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
 
   int64_t Size;
   SMLoc SizeLoc = getLexer().getLoc();
-  if (getParser().ParseAbsoluteExpression(Size))
+  if (getParser().parseAbsoluteExpression(Size))
     return true;
 
   int64_t Pow2Alignment = 0;
@@ -668,7 +747,7 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
     Pow2AlignmentLoc = getLexer().getLoc();
-    if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+    if (getParser().parseAbsoluteExpression(Pow2Alignment))
       return true;
   }
 
@@ -712,7 +791,7 @@ bool DarwinAsmParser::ParseDirectiveDataRegion(StringRef, SMLoc) {
   }
   StringRef RegionType;
   SMLoc Loc = getParser().getTok().getLoc();
-  if (getParser().ParseIdentifier(RegionType))
+  if (getParser().parseIdentifier(RegionType))
     return TokError("expected region type after '.data_region' directive");
   int Kind = StringSwitch<int>(RegionType)
     .Case("jt8", MCDR_DataRegionJT8)
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d55de1f3fbe8..4c45e087445d 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -22,10 +22,12 @@ using namespace llvm;
 namespace {
 
 class ELFAsmParser : public MCAsmParserExtension {
-  template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)>
-  void AddDirectiveHandler(StringRef Directive) {
-    getParser().AddDirectiveHandler(this, Directive,
-                                    HandleDirective<ELFAsmParser, Handler>);
+  template<bool (ELFAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+  void addDirectiveHandler(StringRef Directive) {
+    MCAsmParser::ExtensionDirectiveHandler Handler = std::make_pair(
+        this, HandleDirective<ELFAsmParser, HandlerMethod>);
+
+    getParser().addDirectiveHandler(Directive, Handler);
   }
 
   bool ParseSectionSwitch(StringRef Section, unsigned Type,
@@ -41,38 +43,38 @@ public:
     // Call the base implementation.
     this->MCAsmParserExtension::Initialize(Parser);
 
-    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
-    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
-    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
-    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
-    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
-    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
-    AddDirectiveHandler<
+    addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+    addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+    addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+    addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+    addDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+    addDirectiveHandler<
       &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
-    AddDirectiveHandler<
+    addDirectiveHandler<
       &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
-    AddDirectiveHandler<
+    addDirectiveHandler<
       &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
-    AddDirectiveHandler<
+    addDirectiveHandler<
       &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
-    AddDirectiveHandler<
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+    addDirectiveHandler<
       &ELFAsmParser::ParseDirectivePushSection>(".pushsection");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
-    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local");
-    AddDirectiveHandler<
+    addDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local");
+    addDirectiveHandler<
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".protected");
-    AddDirectiveHandler<
+    addDirectiveHandler<
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
-    AddDirectiveHandler<
+    addDirectiveHandler<
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
   }
 
@@ -167,7 +169,7 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
     for (;;) {
       StringRef Name;
 
-      if (getParser().ParseIdentifier(Name))
+      if (getParser().parseIdentifier(Name))
         return TokError("expected identifier in directive");
 
       MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
@@ -201,7 +203,7 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
 
 bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
 
@@ -210,7 +212,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
   Lex();
 
   const MCExpr *Expr;
-  if (getParser().ParseExpression(Expr))
+  if (getParser().parseExpression(Expr))
     return true;
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -222,7 +224,7 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
 
 bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
   // A section name can contain -, so we cannot just use
-  // ParseIdentifier.
+  // parseIdentifier.
   SMLoc FirstLoc = getLexer().getLoc();
   unsigned Size = 0;
 
@@ -375,14 +377,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
         return TokError("expected '@' or '%' before type");
 
       Lex();
-      if (getParser().ParseIdentifier(TypeName))
+      if (getParser().parseIdentifier(TypeName))
         return TokError("expected identifier in directive");
 
       if (Mergeable) {
         if (getLexer().isNot(AsmToken::Comma))
           return TokError("expected the entry size");
         Lex();
-        if (getParser().ParseAbsoluteExpression(Size))
+        if (getParser().parseAbsoluteExpression(Size))
           return true;
         if (Size <= 0)
           return TokError("entry size must be positive");
@@ -392,12 +394,12 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
         if (getLexer().isNot(AsmToken::Comma))
           return TokError("expected group name");
         Lex();
-        if (getParser().ParseIdentifier(GroupName))
+        if (getParser().parseIdentifier(GroupName))
           return true;
         if (getLexer().is(AsmToken::Comma)) {
           Lex();
           StringRef Linkage;
-          if (getParser().ParseIdentifier(Linkage))
+          if (getParser().parseIdentifier(Linkage))
             return true;
           if (Linkage != "comdat")
             return TokError("Linkage must be 'comdat'");
@@ -411,7 +413,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
 
   unsigned Type = ELF::SHT_PROGBITS;
 
-  if (!TypeName.empty()) {
+  if (TypeName.empty()) {
+    if (SectionName.startswith(".note"))
+      Type = ELF::SHT_NOTE;
+    else if (SectionName == ".init_array")
+      Type = ELF::SHT_INIT_ARRAY;
+    else if (SectionName == ".fini_array")
+      Type = ELF::SHT_FINI_ARRAY;
+    else if (SectionName == ".preinit_array")
+      Type = ELF::SHT_PREINIT_ARRAY;
+  } else {
     if (TypeName == "init_array")
       Type = ELF::SHT_INIT_ARRAY;
     else if (TypeName == "fini_array")
@@ -450,7 +461,7 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
 ///  ::= .type identifier , @attribute
 bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   // Handle the identifier as the key symbol.
@@ -468,7 +479,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
   SMLoc TypeLoc;
 
   TypeLoc = getLexer().getLoc();
-  if (getParser().ParseIdentifier(Type))
+  if (getParser().parseIdentifier(Type))
     return TokError("expected symbol type in directive");
 
   MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
@@ -517,7 +528,7 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
     getStreamer().EmitIntValue(0, 1);
     SeenIdent = true;
   }
-  getStreamer().EmitBytes(Data, 0);
+  getStreamer().EmitBytes(Data);
   getStreamer().EmitIntValue(0, 1);
   getStreamer().PopSection();
   return false;
@@ -527,7 +538,7 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
 ///  ::= .symver foo, bar2@zed
 bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -536,7 +547,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
   Lex();
 
   StringRef AliasName;
-  if (getParser().ParseIdentifier(AliasName))
+  if (getParser().parseIdentifier(AliasName))
     return TokError("expected identifier in directive");
 
   if (AliasName.find('@') == StringRef::npos)
@@ -569,7 +580,7 @@ bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) {
   getStreamer().EmitIntValue(Data.size()+1, 4); // namesz.
   getStreamer().EmitIntValue(0, 4);             // descsz = 0 (no description).
   getStreamer().EmitIntValue(1, 4);             // type = NT_VERSION.
-  getStreamer().EmitBytes(Data, 0);             // name.
+  getStreamer().EmitBytes(Data);                // name.
   getStreamer().EmitIntValue(0, 1);             // terminate the string.
   getStreamer().EmitValueToAlignment(4);        // ensure 4 byte alignment.
   getStreamer().PopSection();
@@ -582,7 +593,7 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
   // FIXME: Share code with the other alias building directives.
 
   StringRef AliasName;
-  if (getParser().ParseIdentifier(AliasName))
+  if (getParser().parseIdentifier(AliasName))
     return TokError("expected identifier in directive");
 
   if (getLexer().isNot(AsmToken::Comma))
@@ -591,7 +602,7 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
   Lex();
 
   StringRef Name;
-  if (getParser().ParseIdentifier(Name))
+  if (getParser().parseIdentifier(Name))
     return TokError("expected identifier in directive");
 
   MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 384b341bc730..3867691107fb 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -28,5 +28,5 @@ SMLoc AsmToken::getLoc() const {
 }
 
 SMLoc AsmToken::getEndLoc() const {
-  return SMLoc::getFromPointer(Str.data() + Str.size() - 1);
+  return SMLoc::getFromPointer(Str.data() + Str.size());
 }
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 6967feef2440..6e1ebad36c0d 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
@@ -38,9 +38,9 @@ bool MCAsmParser::TokError(const Twine &Msg, ArrayRef<SMRange> Ranges) {
   return true;
 }
 
-bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
+bool MCAsmParser::parseExpression(const MCExpr *&Res) {
   SMLoc L;
-  return ParseExpression(Res, L);
+  return parseExpression(Res, L);
 }
 
 void MCParsedAsmOperand::dump() const {
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 9ccab930673e..0e04c5537acb 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -28,16 +28,17 @@ private:
   virtual void EmitInstToData(const MCInst &Inst);
 
 public:
-  MCPureStreamer(MCContext &Context, MCAsmBackend &TAB,
-                 raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+  MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+                 MCCodeEmitter *Emitter)
+      : MCObjectStreamer(SK_PureStreamer, Context, TAB, OS, Emitter) {}
 
   /// @name MCStreamer Interface
   /// @{
 
   virtual void InitSections();
+  virtual void InitToTextSection();
   virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitDebugLabel(MCSymbol *Symbol);
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             uint64_t Size = 0, unsigned ByteAlignment = 0);
   virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
@@ -94,21 +95,28 @@ public:
     report_fatal_error("unsupported directive in pure streamer");
   }
   virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
-                                      StringRef Filename) {
+                                      StringRef Filename, unsigned CUID = 0) {
     report_fatal_error("unsupported directive in pure streamer");
   }
 
   /// @}
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_PureStreamer;
+  }
 };
 
 } // end anonymous namespace.
 
 void MCPureStreamer::InitSections() {
+  InitToTextSection();
+}
+
+void MCPureStreamer::InitToTextSection() {
   // FIMXE: To what!?
   SwitchSection(getContext().getMachOSection("__TEXT", "__text",
                                     MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                     0, SectionKind::getText()));
-
 }
 
 void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
@@ -135,12 +143,9 @@ void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
   SD.setOffset(F->getContents().size());
 }
 
-void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
-  // MCObjectStreamer.
-  // FIXME: Lift context changes into super class.
-  getAssembler().getOrCreateSymbolData(*Symbol);
-  Symbol->setVariableValue(AddValueSymbols(Value));
+
+void MCPureStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  EmitLabel(Symbol);
 }
 
 void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
@@ -191,7 +196,8 @@ bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
 }
 
 void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF =
+    new MCRelaxableFragment(Inst, getCurrentSectionData());
 
   // Add the fixups and data.
   //
@@ -203,7 +209,7 @@ void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
   getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
   VecOS.flush();
 
-  IF->getCode() = Code;
+  IF->getContents() = Code;
   IF->getFixups() = Fixups;
 }
 
@@ -219,7 +225,7 @@ void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
   // Add the fixups and data.
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
-    DF->addFixup(Fixups[i]);
+    DF->getFixups().push_back(Fixups[i]);
   }
   DF->getContents().append(Code.begin(), Code.end());
 }
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index a792d5631790..ccf4a7dddf73 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index e771556262a8..fc323155befa 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -165,9 +165,9 @@ bool MCSectionMachO::isVirtualSection() const {
 
 /// StripSpaces - This removes leading and trailing spaces from the StringRef.
 static void StripSpaces(StringRef &Str) {
-  while (!Str.empty() && isspace(Str[0]))
+  while (!Str.empty() && isspace(static_cast<unsigned char>(Str[0])))
     Str = Str.substr(1);
-  while (!Str.empty() && isspace(Str.back()))
+  while (!Str.empty() && isspace(static_cast<unsigned char>(Str.back())))
     Str = Str.substr(0, Str.size()-1);
 }
 
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index afece0ba5519..d02e5535bde5 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -7,24 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
-MCStreamer::MCStreamer(MCContext &Ctx)
-  : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
-    CurrentW64UnwindInfo(0), LastSymbol(0) {
-  const MCSection *section = NULL;
+MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
+    : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
+      CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
+  const MCSection *section = 0;
   SectionStack.push_back(std::make_pair(section, section));
 }
 
@@ -33,6 +33,18 @@ MCStreamer::~MCStreamer() {
     delete W64UnwindInfos[i];
 }
 
+void MCStreamer::reset() {
+  for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
+    delete W64UnwindInfos[i];
+  EmitEHFrame = true;
+  EmitDebugFrame = false;
+  CurrentW64UnwindInfo = 0;
+  LastSymbol = 0;
+  const MCSection *section = 0;
+  SectionStack.clear();
+  SectionStack.push_back(std::make_pair(section, section));
+}
+
 const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
                                           const MCSymbol *A,
                                           const MCSymbol *B) {
@@ -91,8 +103,8 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
 
 /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
 /// client having to pass in a MCExpr for constant integers.
-void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
-                                     unsigned Padding) {
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned Padding,
+                                     unsigned AddrSpace) {
   SmallString<128> Tmp;
   raw_svector_ostream OSE(Tmp);
   encodeULEB128(Value, OSE, Padding);
@@ -145,8 +157,8 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
 
 bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
                                         StringRef Directory,
-                                        StringRef Filename) {
-  return getContext().GetDwarfFile(Directory, Filename, FileNo) == 0;
+                                        StringRef Filename, unsigned CUID) {
+  return getContext().GetDwarfFile(Directory, Filename, FileNo, CUID) == 0;
 }
 
 void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -160,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
 
 MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
   if (FrameInfos.empty())
-    return NULL;
+    return 0;
   return &FrameInfos.back();
 }
 
@@ -181,6 +193,13 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) {
   LastSymbol = Symbol;
 }
 
+void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection());
+  LastSymbol = Symbol;
+}
+
 void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
@@ -234,69 +253,58 @@ void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) {
   EmitLabel(Frame.End);
 }
 
-void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+MCSymbol *MCStreamer::EmitCFICommon() {
   EnsureValidFrame();
-  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
   EmitLabel(Label);
-  MachineLocation Dest(MachineLocation::VirtualFP);
-  MachineLocation Source(Register, -Offset);
-  MCCFIInstruction Instruction(Label, Dest, Source);
+  return Label;
+}
+
+void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createDefCfa(Label, Register, Offset);
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createDefCfaOffset(Label, Offset);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MachineLocation Dest(MachineLocation::VirtualFP);
-  MachineLocation Source(MachineLocation::VirtualFP, -Offset);
-  MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MachineLocation Dest(MachineLocation::VirtualFP);
-  MachineLocation Source(MachineLocation::VirtualFP, Adjustment);
-  MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createDefCfaRegister(Label, Register);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MachineLocation Dest(Register);
-  MachineLocation Source(MachineLocation::VirtualFP);
-  MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createOffset(Label, Register, Offset);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MachineLocation Dest(Register, Offset);
-  MachineLocation Source(Register, Offset);
-  MCCFIInstruction Instruction(Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createRelOffset(Label, Register, Offset);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MachineLocation Dest(Register, Offset);
-  MachineLocation Source(Register, Offset);
-  MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
   CurFrame->Instructions.push_back(Instruction);
 }
 
@@ -316,48 +324,40 @@ void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
 }
 
 void MCStreamer::EmitCFIRememberState() {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction = MCCFIInstruction::createRememberState(Label);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::RememberState, Label);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIRestoreState() {
   // FIXME: Error if there is no matching cfi_remember_state.
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction = MCCFIInstruction::createRestoreState(Label);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::RestoreState, Label);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFISameValue(int64_t Register) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createSameValue(Label, Register);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIRestore(int64_t Register) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createRestore(Label, Register);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label, Register);
   CurFrame->Instructions.push_back(Instruction);
 }
 
 void MCStreamer::EmitCFIEscape(StringRef Values) {
-  EnsureValidFrame();
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction = MCCFIInstruction::createEscape(Label, Values);
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  MCSymbol *Label = getContext().CreateTempSymbol();
-  EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::Escape, Label, Values);
   CurFrame->Instructions.push_back(Instruction);
 }
 
@@ -367,6 +367,22 @@ void MCStreamer::EmitCFISignalFrame() {
   CurFrame->IsSignalFrame = true;
 }
 
+void MCStreamer::EmitCFIUndefined(int64_t Register) {
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createUndefined(Label, Register);
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction =
+    MCCFIInstruction::createRegister(Label, Register1, Register2);
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Instructions.push_back(Instruction);
+}
+
 void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
   W64UnwindInfos.push_back(Frame);
   CurrentW64UnwindInfo = W64UnwindInfos.back();
@@ -457,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
     report_fatal_error("Frame register and offset already specified!");
   if (Offset & 0x0F)
     report_fatal_error("Misaligned frame pointer offset!");
-  MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset);
+  MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset);
   CurFrame->LastFrameInst = CurFrame->Instructions.size();
   CurFrame->Instructions.push_back(Inst);
 }
@@ -604,3 +620,8 @@ void MCStreamer::Finish() {
 
   FinishImpl();
 }
+
+MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) {
+  report_fatal_error("Not supported!");
+  return *(static_cast<MCSymbolData*>(0));
+}
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 80a1f02ce653..f18828dd41ef 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
diff --git a/lib/MC/MCTargetAsmLexer.cpp b/lib/MC/MCTargetAsmLexer.cpp
deleted file mode 100644
index c01c914cecd2..000000000000
--- a/lib/MC/MCTargetAsmLexer.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-//===-- llvm/MC/MCTargetAsmLexer.cpp - Target Assembly Lexer --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCTargetAsmLexer.h"
-using namespace llvm;
-
-MCTargetAsmLexer::MCTargetAsmLexer(const Target &T)
-  : TheTarget(T), Lexer(NULL) {
-}
-MCTargetAsmLexer::~MCTargetAsmLexer() {}
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index c05b4b17fc3e..c5b637c92443 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCWin64EH.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 
 namespace llvm {
 
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a94b2140227f..a5ba3c36532a 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -10,24 +10,33 @@
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-
 #include <vector>
 using namespace llvm;
 using namespace llvm::object;
 
+void MachObjectWriter::reset() {
+  Relocations.clear();
+  IndirectSymBase.clear();
+  StringTable.clear();
+  LocalSymbolData.clear();
+  ExternalSymbolData.clear();
+  UndefinedSymbolData.clear();
+  MCObjectWriter::reset();
+}
+
 bool MachObjectWriter::
 doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
   // Undefined symbols are always extern.
@@ -367,6 +376,39 @@ void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type,
   assert(OS.tell() - Start == macho::LinkeditLoadCommandSize);
 }
 
+static unsigned ComputeLinkerOptionsLoadCommandSize(
+  const std::vector<std::string> &Options, bool is64Bit)
+{
+  unsigned Size = sizeof(macho::LinkerOptionsLoadCommand);
+  for (unsigned i = 0, e = Options.size(); i != e; ++i)
+    Size += Options[i].size() + 1;
+  return RoundUpToAlignment(Size, is64Bit ? 8 : 4);
+}
+
+void MachObjectWriter::WriteLinkerOptionsLoadCommand(
+  const std::vector<std::string> &Options)
+{
+  unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(macho::LCT_LinkerOptions);
+  Write32(Size);
+  Write32(Options.size());
+  uint64_t BytesWritten = sizeof(macho::LinkerOptionsLoadCommand);
+  for (unsigned i = 0, e = Options.size(); i != e; ++i) {
+    // Write each string, including the null byte.
+    const std::string &Option = Options[i];
+    WriteBytes(Option.c_str(), Option.size() + 1);
+    BytesWritten += Option.size() + 1;
+  }
+
+  // Pad to a multiple of the pointer size.
+  WriteBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
+
+  assert(OS.tell() - Start == Size);
+}
+
 
 void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
                                         const MCAsmLayout &Layout,
@@ -684,6 +726,13 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
     macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
     macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
 
+  // Add the data-in-code load command size, if used.
+  unsigned NumDataRegions = Asm.getDataRegions().size();
+  if (NumDataRegions) {
+    ++NumLoadCommands;
+    LoadCommandsSize += macho::LinkeditLoadCommandSize;
+  }
+
   // Add the symbol table load command sizes, if used.
   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
     UndefinedSymbolData.size();
@@ -693,13 +742,15 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
                          macho::DysymtabLoadCommandSize);
   }
 
-  // Add the data-in-code load command size, if used.
-  unsigned NumDataRegions = Asm.getDataRegions().size();
-  if (NumDataRegions) {
+  // Add the linker option load commands sizes.
+  const std::vector<std::vector<std::string> > &LinkerOptions =
+    Asm.getLinkerOptions();
+  for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) {
     ++NumLoadCommands;
-    LoadCommandsSize += macho::LinkeditLoadCommandSize;
+    LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(LinkerOptions[i],
+                                                            is64Bit());
   }
-
+  
   // Compute the total size of the section data, as well as its file size and vm
   // size.
   uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
@@ -790,6 +841,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
                              IndirectSymbolOffset, NumIndirectSymbols);
   }
 
+  // Write the linker options load commands.
+  for (unsigned i = 0, e = LinkerOptions.size(); i != e; ++i) {
+    WriteLinkerOptionsLoadCommand(LinkerOptions[i]);
+  }
+
   // Write the actual section data.
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index f706cac8d36c..6dffed73dfb3 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -13,34 +13,30 @@
 
 #define DEBUG_TYPE "WinCOFFObjectWriter"
 
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
-
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-
 #include "llvm/Support/TimeValue.h"
-
 #include <cstdio>
 
 using namespace llvm;
 
 namespace {
-typedef llvm::SmallString<COFF::NameSize> name;
+typedef SmallString<COFF::NameSize> name;
 
 enum AuxiliaryType {
   ATFunctionDefinition,
@@ -62,7 +58,7 @@ class COFFSymbol {
 public:
   COFF::symbol Data;
 
-  typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+  typedef SmallVector<AuxSymbol, 1> AuxiliarySymbols;
 
   name             Name;
   int              Index;
@@ -73,7 +69,7 @@ public:
 
   MCSymbolData const *MCData;
 
-  COFFSymbol(llvm::StringRef name);
+  COFFSymbol(StringRef name);
   size_t size() const;
   void set_name_offset(uint32_t Offset);
 
@@ -101,13 +97,13 @@ public:
   COFFSymbol          *Symbol;
   relocations          Relocations;
 
-  COFFSection(llvm::StringRef name);
+  COFFSection(StringRef name);
   static size_t size();
 };
 
 // This class holds the COFF string table.
 class StringTable {
-  typedef llvm::StringMap<size_t> map;
+  typedef StringMap<size_t> map;
   map Map;
 
   void update_length();
@@ -116,7 +112,7 @@ public:
 
   StringTable();
   size_t size() const;
-  size_t insert(llvm::StringRef String);
+  size_t insert(StringRef String);
 };
 
 class WinCOFFObjectWriter : public MCObjectWriter {
@@ -148,10 +144,12 @@ public:
   COFFSection *createSection(StringRef Name);
 
   template <typename object_t, typename list_t>
-  object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
+  object_t *createCOFFEntity(StringRef Name, list_t &List);
 
   void DefineSection(MCSectionData const &SectionData);
-  void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
+  void DefineSymbol(MCSymbol const &Symbol,
+                    MCSymbolData const &SymbolData,
+                    MCAssembler &Assembler);
 
   void MakeSymbolReal(COFFSymbol &S, size_t Index);
   void MakeSectionReal(COFFSection &S, size_t Number);
@@ -206,7 +204,7 @@ static inline void write_uint8_le(void *Data, uint8_t const &Value) {
 //------------------------------------------------------------------------------
 // Symbol class implementation
 
-COFFSymbol::COFFSymbol(llvm::StringRef name)
+COFFSymbol::COFFSymbol(StringRef name)
   : Name(name.begin(), name.end())
   , Other(NULL)
   , Section(NULL)
@@ -258,7 +256,7 @@ bool COFFSymbol::should_keep() const {
 //------------------------------------------------------------------------------
 // Section class implementation
 
-COFFSection::COFFSection(llvm::StringRef name)
+COFFSection::COFFSection(StringRef name)
   : Name(name)
   , MCData(NULL)
   , Symbol(NULL) {
@@ -291,7 +289,7 @@ size_t StringTable::size() const {
 
 /// Add String to the table iff it is not already there.
 /// @returns the index into the string table where the string is now located.
-size_t StringTable::insert(llvm::StringRef String) {
+size_t StringTable::insert(StringRef String) {
   map::iterator i = Map.find(String);
 
   if (i != Map.end())
@@ -345,14 +343,14 @@ COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
   return RetSymbol;
 }
 
-COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
   return createCOFFEntity<COFFSection>(Name, Sections);
 }
 
 /// A template used to lookup or create a symbol/section, and initialize it if
 /// needed.
 template <typename object_t, typename list_t>
-object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
+object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name,
                                                 list_t &List) {
   object_t *Object = new object_t(Name);
 
@@ -412,9 +410,10 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
 
 /// This function takes a section data object from the assembler
 /// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
+                                       MCSymbolData const &SymbolData,
                                        MCAssembler &Assembler) {
-  COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol());
+  COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
 
   coff_symbol->Data.Type         = (SymbolData.getFlags() & 0x0000FFFF) >>  0;
   coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
@@ -422,20 +421,17 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
   if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
     coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
 
-    if (SymbolData.getSymbol().isVariable()) {
+    if (Symbol.isVariable()) {
       coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
-      const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
 
       // FIXME: This assert message isn't very good.
-      assert(Value->getKind() == MCExpr::SymbolRef &&
+      assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef &&
               "Value must be a SymbolRef!");
 
-      const MCSymbolRefExpr *SymbolRef =
-        static_cast<const MCSymbolRefExpr *>(Value);
-      coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol());
+      coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol);
     } else {
       std::string WeakName = std::string(".weak.")
-                           +  SymbolData.getSymbol().getName().str()
+                           +  Symbol.getName().str()
                            + ".default";
       COFFSymbol *WeakDefault = createSymbol(WeakName);
       WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
@@ -468,7 +464,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
 
   // Bind internal COFF symbol to MC symbol.
   coff_symbol->MCData = &SymbolData;
-  SymbolMap[&SymbolData.getSymbol()] = coff_symbol;
+  SymbolMap[&Symbol] = coff_symbol;
 }
 
 /// making a section real involves assigned it a number and putting
@@ -623,8 +619,11 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
 
   for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
                                           e = Asm.symbol_end(); i != e; i++) {
-    if (ExportSymbol(*i, Asm))
-      DefineSymbol(*i, Asm);
+    if (ExportSymbol(*i, Asm)) {
+      const MCSymbol &Alias = i->getSymbol();
+      const MCSymbol &Symbol = Alias.AliasedSymbol();
+      DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm);
+    }
   }
 }
 
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 702eec04ef1b..75f343c421bb 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -13,19 +13,19 @@
 
 #define DEBUG_TYPE "WinCOFFStreamer"
 
-#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCWin64EH.h"
-#include "llvm/MC/MCAsmBackend.h"
-
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -50,10 +50,11 @@ public:
   // MCStreamer interface
 
   virtual void InitSections();
+  virtual void InitToTextSection();
   virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitDebugLabel(MCSymbol *Symbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
   virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
   virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
   virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
@@ -71,16 +72,29 @@ public:
   virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
                               uint64_t Size, unsigned ByteAlignment);
   virtual void EmitFileDirective(StringRef Filename);
-  virtual void EmitInstruction(const MCInst &Instruction);
   virtual void EmitWin64EHHandlerData();
   virtual void FinishImpl();
 
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst) {
-    llvm_unreachable("Not used by WinCOFF.");
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_WinCOFFStreamer;
   }
+
+private:
   virtual void EmitInstToData(const MCInst &Inst) {
-    llvm_unreachable("Not used by WinCOFF.");
+    MCDataFragment *DF = getOrCreateDataFragment();
+
+    SmallVector<MCFixup, 4> Fixups;
+    SmallString<256> Code;
+    raw_svector_ostream VecOS(Code);
+    getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+    VecOS.flush();
+
+    // Add the fixups and data.
+    for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+      Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+      DF->getFixups().push_back(Fixups[i]);
+    }
+    DF->getContents().append(Code.begin(), Code.end());
   }
 
   void SetSection(StringRef Section,
@@ -115,17 +129,13 @@ private:
                SectionKind::getBSS());
     EmitCodeAlignment(4, 0);
   }
-
 };
 } // end anonymous namespace.
 
-WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
-                                 MCAsmBackend &MAB,
-                                 MCCodeEmitter &CE,
-                                 raw_ostream &OS)
-    : MCObjectStreamer(Context, MAB, OS, &CE)
-    , CurSymbol(NULL) {
-}
+WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+                                 MCCodeEmitter &CE, raw_ostream &OS)
+    : MCObjectStreamer(SK_WinCOFFStreamer, Context, MAB, OS, &CE),
+      CurSymbol(NULL) {}
 
 void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                       unsigned ByteAlignment, bool External) {
@@ -164,6 +174,10 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 
 // MCStreamer interface
 
+void WinCOFFStreamer::InitToTextSection() {
+  SetSectionText();
+}
+
 void WinCOFFStreamer::InitSections() {
   SetSectionText();
   SetSectionData();
@@ -176,6 +190,9 @@ void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
   MCObjectStreamer::EmitLabel(Symbol);
 }
 
+void WinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+  EmitLabel(Symbol);
+}
 void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("not implemented");
 }
@@ -184,48 +201,6 @@ void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
   llvm_unreachable("not implemented");
 }
 
-void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-  assert((Symbol->isInSection()
-         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
-         : true) && "Got non COFF section in the COFF backend!");
-  // FIXME: This is all very ugly and depressing. What needs to happen here
-  // depends on quite a few things that are all part of relaxation, which we
-  // don't really even do.
-
-  if (Value->getKind() != MCExpr::SymbolRef) {
-    // TODO: This is exactly the same as MachOStreamer. Consider merging into
-    // MCObjectStreamer.
-    getAssembler().getOrCreateSymbolData(*Symbol);
-    AddValueSymbols(Value);
-    Symbol->setVariableValue(Value);
-  } else {
-    // FIXME: This is a horrible way to do this :(. This should really be
-    // handled after we are done with the MC* objects and immediately before
-    // writing out the object file when we know exactly what the symbol should
-    // look like in the coff symbol table. I'm not doing that now because the
-    // COFF object writer doesn't have a clearly defined separation between MC
-    // data structures, the object writers data structures, and the raw, POD,
-    // data structures that get written to disk.
-
-    // Copy over the aliased data.
-    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-    const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData(
-      dyn_cast<const MCSymbolRefExpr>(Value)->getSymbol());
-
-    // FIXME: This is particularly nasty because it breaks as soon as any data
-    // members of MCSymbolData change.
-    SD.CommonAlign     = RealSD.CommonAlign;
-    SD.CommonSize      = RealSD.CommonSize;
-    SD.Flags           = RealSD.Flags;
-    SD.Fragment        = RealSD.Fragment;
-    SD.Index           = RealSD.Index;
-    SD.IsExternal      = RealSD.IsExternal;
-    SD.IsPrivateExtern = RealSD.IsPrivateExtern;
-    SD.Offset          = RealSD.Offset;
-    SD.SymbolSize      = RealSD.SymbolSize;
-  }
-}
-
 void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
   assert(Symbol && "Symbol must be non-null!");
@@ -292,9 +267,10 @@ void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol)
 {
   MCDataFragment *DF = getOrCreateDataFragment();
 
-  DF->addFixup(MCFixup::Create(DF->getContents().size(),
-                               MCSymbolRefExpr::Create (Symbol, getContext ()),
-                               FK_SecRel_4));
+  DF->getFixups().push_back(
+      MCFixup::Create(DF->getContents().size(),
+                      MCSymbolRefExpr::Create (Symbol, getContext ()),
+                      FK_SecRel_4));
   DF->getContents().resize(DF->getContents().size() + 4, 0);
 }
 
@@ -333,22 +309,6 @@ void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
   // info will be a much large effort.
 }
 
-void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
-  for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
-    if (Instruction.getOperand(i).isExpr())
-      AddValueSymbols(Instruction.getOperand(i).getExpr());
-
-  getCurrentSectionData()->setHasInstructions(true);
-
-  MCInstFragment *Fragment =
-    new MCInstFragment(Instruction, getCurrentSectionData());
-
-  raw_svector_ostream VecOS(Fragment->getCode());
-
-  getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS,
-                                                Fragment->getFixups());
-}
-
 void WinCOFFStreamer::EmitWin64EHHandlerData() {
   MCStreamer::EmitWin64EHHandlerData();
 
diff --git a/lib/Makefile b/lib/Makefile
index fd575cd19570..57f016bc8905 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,8 +10,9 @@ LEVEL = ..
 
 include $(LEVEL)/Makefile.config
 
-PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Linker MC Object DebugInfo
+PARALLEL_DIRS := IR AsmParser Bitcode Archive Analysis Transforms CodeGen \
+                 Target ExecutionEngine Linker MC Object Option DebugInfo \
+								 IRReader
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 2a5951ada506..0e13d0540fa6 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -21,44 +21,6 @@ using namespace object;
 
 static const char *Magic = "!<arch>\n";
 
-namespace {
-struct ArchiveMemberHeader {
-  char Name[16];
-  char LastModified[12];
-  char UID[6];
-  char GID[6];
-  char AccessMode[8];
-  char Size[10]; ///< Size of data, not including header or padding.
-  char Terminator[2];
-
-  ///! Get the name without looking up long names.
-  StringRef getName() const {
-    char EndCond;
-    if (Name[0] == '/' || Name[0] == '#')
-      EndCond = ' ';
-    else
-      EndCond = '/';
-    StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond);
-    if (end == StringRef::npos)
-      end = sizeof(Name);
-    assert(end <= sizeof(Name) && end > 0);
-    // Don't include the EndCond if there is one.
-    return StringRef(Name, end);
-  }
-
-  uint64_t getSize() const {
-    APInt ret;
-    StringRef(Size, sizeof(Size)).getAsInteger(10, ret);
-    return ret.getZExtValue();
-  }
-};
-}
-
-static const ArchiveMemberHeader *ToHeader(const char *base) {
-  return reinterpret_cast<const ArchiveMemberHeader *>(base);
-}
-
-
 static bool isInternalMember(const ArchiveMemberHeader &amh) {
   static const char *const internals[] = {
     "/",
@@ -76,25 +38,6 @@ static bool isInternalMember(const ArchiveMemberHeader &amh) {
 
 void Archive::anchor() { }
 
-Archive::Child Archive::Child::getNext() const {
-  size_t SpaceToSkip = sizeof(ArchiveMemberHeader) +
-    ToHeader(Data.data())->getSize();
-  // If it's odd, add 1 to make it even.
-  if (SpaceToSkip & 1)
-    ++SpaceToSkip;
-
-  const char *NextLoc = Data.data() + SpaceToSkip;
-
-  // Check to see if this is past the end of the archive.
-  if (NextLoc >= Parent->Data->getBufferEnd())
-    return Child(Parent, StringRef(0, 0));
-
-  size_t NextSize = sizeof(ArchiveMemberHeader) +
-    ToHeader(NextLoc)->getSize();
-
-  return Child(Parent, StringRef(NextLoc, NextSize));
-}
-
 error_code Archive::Child::getName(StringRef &Result) const {
   StringRef name = ToHeader(Data.data())->getName();
   // Check if it's a special name.
@@ -109,11 +52,12 @@ error_code Archive::Child::getName(StringRef &Result) const {
     }
     // It's a long name.
     // Get the offset.
-    APInt offset;
-    name.substr(1).getAsInteger(10, offset);
+    std::size_t offset;
+    if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
+      llvm_unreachable("Long name offset is not an integer");
     const char *addr = Parent->StringTable->Data.begin()
                        + sizeof(ArchiveMemberHeader)
-                       + offset.getZExtValue();
+                       + offset;
     // Verify it.
     if (Parent->StringTable == Parent->end_children()
         || addr < (Parent->StringTable->Data.begin()
@@ -122,12 +66,20 @@ error_code Archive::Child::getName(StringRef &Result) const {
                    + sizeof(ArchiveMemberHeader)
                    + Parent->StringTable->getSize()))
       return object_error::parse_failed;
-    Result = addr;
+
+    // GNU long file names end with a /.
+    if (Parent->kind() == K_GNU) {
+      StringRef::size_type End = StringRef(addr).find('/');
+      Result = StringRef(addr, End);
+    } else {
+      Result = addr;
+    }
     return object_error::success;
   } else if (name.startswith("#1/")) {
-    APInt name_size;
-    name.substr(3).getAsInteger(10, name_size);
-    Result = Data.substr(0, name_size.getZExtValue());
+    uint64_t name_size;
+    if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
+      llvm_unreachable("Long name length is not an ingeter");
+    Result = Data.substr(sizeof(ArchiveMemberHeader), name_size);
     return object_error::success;
   }
   // It's a simple name.
@@ -138,36 +90,12 @@ error_code Archive::Child::getName(StringRef &Result) const {
   return object_error::success;
 }
 
-uint64_t Archive::Child::getSize() const {
-  uint64_t size = ToHeader(Data.data())->getSize();
-  // Don't include attached name.
-  StringRef name =  ToHeader(Data.data())->getName();
-  if (name.startswith("#1/")) {
-    APInt name_size;
-    name.substr(3).getAsInteger(10, name_size);
-    size -= name_size.getZExtValue();
-  }
-  return size;
-}
-
-MemoryBuffer *Archive::Child::getBuffer() const {
-  StringRef name;
-  if (getName(name)) return NULL;
-  int size = sizeof(ArchiveMemberHeader);
-  if (name.startswith("#1/")) {
-    APInt name_size;
-    name.substr(3).getAsInteger(10, name_size);
-    size += name_size.getZExtValue();
-  }
-  return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()),
-                                    name,
-                                    false);
-}
-
 error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
   OwningPtr<Binary> ret;
-  if (error_code ec =
-    createBinary(getBuffer(), ret))
+  OwningPtr<MemoryBuffer> Buff;
+  if (error_code ec = getMemoryBuffer(Buff))
+    return ec;
+  if (error_code ec = createBinary(Buff.take(), ret))
     return ec;
   Result.swap(ret);
   return object_error::success;
@@ -187,15 +115,56 @@ Archive::Archive(MemoryBuffer *source, error_code &ec)
   child_iterator i = begin_children(false);
   child_iterator e = end_children();
 
-  if (i != e) ++i; // Nobody cares about the first member.
-  if (i != e) {
-    SymbolTable = i;
-    ++i;
-  }
-  if (i != e) {
-    StringTable = i;
-  }
+  StringRef name;
+  if ((ec = i->getName(name)))
+    return;
 
+  // Below is the pattern that is used to figure out the archive format
+  // GNU archive format
+  //  First member : / (points to the symbol table )
+  //  Second member : // (may exist, if it exists, points to the string table)
+  //  Note : The string table is used if the filename exceeds 15 characters
+  // BSD archive format
+  //  First member : __.SYMDEF (points to the symbol table)
+  //  There is no string table, if the filename exceeds 15 characters or has a 
+  //  embedded space, the filename has #1/<size>, The size represents the size 
+  //  of the filename that needs to be read after the archive header
+  // COFF archive format
+  //  First member : /
+  //  Second member : / (provides a directory of symbols)
+  //  Third member : // contains the string table, this is present even if the
+  //                    string table is empty
+  if (name == "/") {
+    SymbolTable = i;
+    StringTable = e;
+    if (i != e) ++i;
+    if (i == e) {
+      ec = object_error::parse_failed;
+      return;
+    }
+    if ((ec = i->getName(name)))
+      return;
+    if (name[0] != '/') {
+      Format = K_GNU;
+    } else if ((name.size() > 1) && (name == "//")) { 
+      Format = K_GNU;
+      StringTable = i;
+      ++i;
+    } else  { 
+      Format = K_COFF;
+      if (i != e) {
+        SymbolTable = i;
+        ++i;
+      }
+      if (i != e) {
+        StringTable = i;
+      }
+    }
+  } else if (name == "__.SYMDEF") {
+    Format = K_BSD;
+    SymbolTable = i;
+    StringTable = e;
+  } 
   ec = object_error::success;
 }
 
@@ -215,26 +184,50 @@ Archive::child_iterator Archive::end_children() const {
 }
 
 error_code Archive::Symbol::getName(StringRef &Result) const {
-  Result =
-    StringRef(Parent->SymbolTable->getBuffer()->getBufferStart() + StringIndex);
+  Result = StringRef(Parent->SymbolTable->getBuffer().begin() + StringIndex);
   return object_error::success;
 }
 
 error_code Archive::Symbol::getMember(child_iterator &Result) const {
-  const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart();
-  uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
-  const char *offsets = buf + 4;
-  buf += 4 + (member_count * 4); // Skip offsets.
-  const char *indicies = buf + 4;
+  const char *Buf = Parent->SymbolTable->getBuffer().begin();
+  const char *Offsets = Buf + 4;
+  uint32_t Offset = 0;
+  if (Parent->kind() == K_GNU) {
+    Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets)
+               + SymbolIndex);
+  } else if (Parent->kind() == K_BSD) {
+    llvm_unreachable("BSD format is not supported");
+  } else {
+    uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+    
+    // Skip offsets.
+    Buf += sizeof(support::ulittle32_t)
+           + (MemberCount * sizeof(support::ulittle32_t));
+
+    uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+
+    if (SymbolIndex >= SymbolCount)
+      return object_error::parse_failed;
+
+    // Skip SymbolCount to get to the indices table.
+    const char *Indices = Buf + sizeof(support::ulittle32_t);
+
+    // Get the index of the offset in the file member offset table for this
+    // symbol.
+    uint16_t OffsetIndex =
+      *(reinterpret_cast<const support::ulittle16_t*>(Indices)
+        + SymbolIndex);
+    // Subtract 1 since OffsetIndex is 1 based.
+    --OffsetIndex;
 
-  uint16_t offsetindex =
-    *(reinterpret_cast<const support::ulittle16_t*>(indicies)
-      + SymbolIndex);
+    if (OffsetIndex >= MemberCount)
+      return object_error::parse_failed;
 
-  uint32_t offset = *(reinterpret_cast<const support::ulittle32_t*>(offsets)
-                      + (offsetindex - 1));
+    Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
+               + OffsetIndex);
+  }
 
-  const char *Loc = Parent->getData().begin() + offset;
+  const char *Loc = Parent->getData().begin() + Offset;
   size_t Size = sizeof(ArchiveMemberHeader) +
     ToHeader(Loc)->getSize();
   Result = Child(Parent, StringRef(Loc, Size));
@@ -246,27 +239,63 @@ Archive::Symbol Archive::Symbol::getNext() const {
   Symbol t(*this);
   // Go to one past next null.
   t.StringIndex =
-    Parent->SymbolTable->getBuffer()->getBuffer().find('\0', t.StringIndex) + 1;
+      Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
   ++t.SymbolIndex;
   return t;
 }
 
 Archive::symbol_iterator Archive::begin_symbols() const {
-  const char *buf = SymbolTable->getBuffer()->getBufferStart();
-  uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
-  buf += 4 + (member_count * 4); // Skip offsets.
-  uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
-  buf += 4 + (symbol_count * 2); // Skip indices.
-  uint32_t string_start_offset =
-    buf - SymbolTable->getBuffer()->getBufferStart();
+  const char *buf = SymbolTable->getBuffer().begin();
+  if (kind() == K_GNU) {
+    uint32_t symbol_count = 0;
+    symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+    buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+  } else if (kind() == K_BSD) {
+    llvm_unreachable("BSD archive format is not supported");
+  } else {
+    uint32_t member_count = 0;
+    uint32_t symbol_count = 0;
+    member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+    buf += 4 + (member_count * 4); // Skip offsets.
+    symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+    buf += 4 + (symbol_count * 2); // Skip indices.
+  }
+  uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
   return symbol_iterator(Symbol(this, 0, string_start_offset));
 }
 
 Archive::symbol_iterator Archive::end_symbols() const {
-  const char *buf = SymbolTable->getBuffer()->getBufferStart();
-  uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
-  buf += 4 + (member_count * 4); // Skip offsets.
-  uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  const char *buf = SymbolTable->getBuffer().begin();
+  uint32_t symbol_count = 0;
+  if (kind() == K_GNU) {
+    symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+    buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+  } else if (kind() == K_BSD) {
+    llvm_unreachable("BSD archive format is not supported");
+  } else {
+    uint32_t member_count = 0;
+    member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+    buf += 4 + (member_count * 4); // Skip offsets.
+    symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  }
   return symbol_iterator(
     Symbol(this, symbol_count, 0));
 }
+
+Archive::child_iterator Archive::findSym(StringRef name) const {
+  Archive::symbol_iterator bs = begin_symbols();
+  Archive::symbol_iterator es = end_symbols();
+  Archive::child_iterator result;
+  
+  StringRef symname;
+  for (; bs != es; ++bs) {
+    if (bs->getName(symname))
+        return end_children();
+    if (symname == name) {
+      if (bs->getMember(result))
+        return end_children();
+      return result;
+    }
+  }
+  return end_children();
+}
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 0b7ee34c09af..ca90e0e3c3fc 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -267,7 +267,7 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
   }
 
   if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
-    ret = ::toupper(ret);
+    ret = ::toupper(static_cast<unsigned char>(ret));
 
   Result = ret;
   return object_error::success;
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 663b84ec8b1f..cfe0eb467e53 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/ELF.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace llvm {
 
@@ -22,16 +23,49 @@ ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
   std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
   error_code ec;
 
+  std::size_t MaxAlignment =
+    1ULL << CountTrailingZeros_64(uintptr_t(Object->getBufferStart()));
+
   if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
-    return new ELFObjectFile<support::little, false>(Object, ec);
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+    if (MaxAlignment >= 4)
+      return new ELFObjectFile<ELFType<support::little, 4, false> >(Object, ec);
+    else
+#endif
+    if (MaxAlignment >= 2)
+      return new ELFObjectFile<ELFType<support::little, 2, false> >(Object, ec);
+    else
+      llvm_unreachable("Invalid alignment for ELF file!");
   else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
-    return new ELFObjectFile<support::big, false>(Object, ec);
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+    if (MaxAlignment >= 4)
+      return new ELFObjectFile<ELFType<support::big, 4, false> >(Object, ec);
+    else
+#endif
+    if (MaxAlignment >= 2)
+      return new ELFObjectFile<ELFType<support::big, 2, false> >(Object, ec);
+    else
+      llvm_unreachable("Invalid alignment for ELF file!");
   else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
-    return new ELFObjectFile<support::big, true>(Object, ec);
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+    if (MaxAlignment >= 8)
+      return new ELFObjectFile<ELFType<support::big, 8, true> >(Object, ec);
+    else
+#endif
+    if (MaxAlignment >= 2)
+      return new ELFObjectFile<ELFType<support::big, 2, true> >(Object, ec);
+    else
+      llvm_unreachable("Invalid alignment for ELF file!");
   else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
-    ELFObjectFile<support::little, true> *result =
-          new ELFObjectFile<support::little, true>(Object, ec);
-    return result;
+#if !LLVM_IS_UNALIGNED_ACCESS_FAST
+    if (MaxAlignment >= 8)
+      return new ELFObjectFile<ELFType<support::little, 8, true> >(Object, ec);
+    else
+#endif
+    if (MaxAlignment >= 2)
+      return new ELFObjectFile<ELFType<support::little, 2, true> >(Object, ec);
+    else
+      llvm_unreachable("Invalid alignment for ELF file!");
   }
 
   report_fatal_error("Buffer is not an ELF object file!");
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index 00dea3fe4769..c9c341a207c7 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/MachOObject.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -44,7 +44,8 @@ static void ReadInMemoryStruct(const MachOObject &MOO,
   }
 
   // Check whether we can return a direct pointer.
-  struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
+  struct_type *Ptr = reinterpret_cast<struct_type *>(
+                       const_cast<char *>(Buffer.data() + Base));
   if (!MOO.isSwappedEndian()) {
     Res = Ptr;
     return;
@@ -258,6 +259,17 @@ void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
 }
 
 template<>
+void SwapStruct(macho::LinkerOptionsLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.Count);
+}
+void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI,
+                   InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
 void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
   SwapValue(Value.Index);
 }
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 45aeaac6b831..6501df9fb986 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -12,12 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
-
 #include <cctype>
 #include <cstring>
 #include <limits>
@@ -50,7 +49,15 @@ ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
   MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
   if (!MachOObj)
     return NULL;
-  return new MachOObjectFile(Buffer, MachOObj, ec);
+  // MachOObject takes ownership of the Buffer we passed to it, and
+  // MachOObjectFile does, too, so we need to make sure they don't get the
+  // same object. A MemoryBuffer is cheap (it's just a reference to memory,
+  // not a copy of the memory itself), so just make a new copy here for
+  // the MachOObjectFile.
+  MemoryBuffer *NewBuffer =
+    MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
+                               Buffer->getBufferIdentifier(), false);
+  return new MachOObjectFile(NewBuffer, MachOObj, ec);
 }
 
 /*===-- Symbols -----------------------------------------------------------===*/
@@ -266,7 +273,7 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
   }
 
   if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
-    Char = toupper(Char);
+    Char = toupper(static_cast<unsigned char>(Char));
   Result = Char;
   return object_error::success;
 }
@@ -440,9 +447,7 @@ error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
 void
 MachOObjectFile::getSection(DataRefImpl DRI,
                             InMemoryStruct<macho::Section> &Res) const {
-  InMemoryStruct<macho::SegmentLoadCommand> SLC;
   LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
   MachOObj->ReadSection(LCI, DRI.d.b, Res);
 }
 
@@ -456,9 +461,7 @@ std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
 void
 MachOObjectFile::getSection64(DataRefImpl DRI,
                             InMemoryStruct<macho::Section64> &Res) const {
-  InMemoryStruct<macho::Segment64LoadCommand> SLC;
   LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSegment64LoadCommand(LCI, SLC);
   MachOObj->ReadSection64(LCI, DRI.d.b, Res);
 }
 
@@ -470,38 +473,61 @@ static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
   return false;
 }
 
+static StringRef parseSegmentOrSectionName(const char *P) {
+  if (P[15] == 0)
+    // Null terminated.
+    return P;
+  // Not null terminated, so this is a 16 char string.
+  return StringRef(P, 16);
+}
+
 error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
                                            StringRef &Result) const {
-  // FIXME: thread safety.
-  static char result[34];
-  if (is64BitLoadCommand(MachOObj, DRI)) {
-    InMemoryStruct<macho::Segment64LoadCommand> SLC;
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    MachOObj->ReadSegment64LoadCommand(LCI, SLC);
-    InMemoryStruct<macho::Section64> Sect;
-    MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
-
-    strcpy(result, Sect->SegmentName);
-    strcat(result, ",");
-    strcat(result, Sect->Name);
+    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+      DRI.d.b * sizeof(macho::Section64);
+    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
+    const macho::Section64 *sec =
+      reinterpret_cast<const macho::Section64*>(Data.data());
+    Result = parseSegmentOrSectionName(sec->Name);
   } else {
-    InMemoryStruct<macho::SegmentLoadCommand> SLC;
     LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    MachOObj->ReadSegmentLoadCommand(LCI, SLC);
-    InMemoryStruct<macho::Section> Sect;
-    MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+      DRI.d.b * sizeof(macho::Section);
+    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
+    const macho::Section *sec =
+      reinterpret_cast<const macho::Section*>(Data.data());
+    Result = parseSegmentOrSectionName(sec->Name);
+  }
+  return object_error::success;
+}
 
-    strcpy(result, Sect->SegmentName);
-    strcat(result, ",");
-    strcat(result, Sect->Name);
+error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec,
+                                                       StringRef &Res) const {
+  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
+    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+      Sec.d.b * sizeof(macho::Section64);
+    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
+    const macho::Section64 *sec =
+      reinterpret_cast<const macho::Section64*>(Data.data());
+    Res = parseSegmentOrSectionName(sec->SegmentName);
+  } else {
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
+    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+      Sec.d.b * sizeof(macho::Section);
+    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
+    const macho::Section *sec =
+      reinterpret_cast<const macho::Section*>(Data.data());
+    Res = parseSegmentOrSectionName(sec->SegmentName);
   }
-  Result = StringRef(result);
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
                                               uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = Sect->Address;
@@ -515,7 +541,7 @@ error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
                                            uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = Sect->Size;
@@ -529,7 +555,7 @@ error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
                                                StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = MachOObj->getData(Sect->Offset, Sect->Size);
@@ -543,7 +569,7 @@ error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
                                                 uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = uint64_t(1) << Sect->Align;
@@ -557,14 +583,14 @@ error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
 
 error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
                                           bool &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
-    Result = !strcmp(Sect->Name, "__text");
+    Result = Sect->Flags & macho::SF_PureInstructions;
   } else {
     InMemoryStruct<macho::Section> Sect;
     getSection(DRI, Sect);
-    Result = !strcmp(Sect->Name, "__text");
+    Result = Sect->Flags & macho::SF_PureInstructions;
   }
   return object_error::success;
 }
@@ -664,7 +690,7 @@ relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
 }
 relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
   uint32_t last_reloc;
-  if (is64BitLoadCommand(MachOObj, Sec)) {
+  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(Sec, Sect);
     last_reloc = Sect->NumRelocationTableEntries;
@@ -1050,6 +1076,7 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
         printRelocationTargetName(RENext, fmt);
         fmt << "-";
         printRelocationTargetName(RE, fmt);
+        break;
       }
       case macho::RIT_X86_64_TLV:
         printRelocationTargetName(RE, fmt);
@@ -1277,14 +1304,17 @@ StringRef MachOObjectFile::getFileFormatName() const {
     }
   }
 
+  // Make sure the cpu type has the correct mask.
+  assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+	 == llvm::MachO::CPUArchABI64 &&
+	 "32-bit object file when we're 64-bit?");
+
   switch (MachOObj->getHeader().CPUType) {
   case llvm::MachO::CPUTypeX86_64:
     return "Mach-O 64-bit x86-64";
   case llvm::MachO::CPUTypePowerPC64:
     return "Mach-O 64-bit ppc64";
   default:
-    assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 &&
-           "32-bit object file when we're 64-bit?");
     return "Mach-O 64-bit unknown";
   }
 }
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index b14df9af64f4..860c87be9846 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -33,6 +33,8 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
   sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
                                 static_cast<unsigned>(Object->getBufferSize()));
   switch (type) {
+    case sys::Unknown_FileType:
+      return 0;
     case sys::ELF_Relocatable_FileType:
     case sys::ELF_Executable_FileType:
     case sys::ELF_SharedObject_FileType:
@@ -52,7 +54,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
     case sys::COFF_FileType:
       return createCOFFObjectFile(Object);
     default:
-      llvm_unreachable("Unknown Object File Type");
+      llvm_unreachable("Unexpected Object File Type");
   }
 }
 
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
new file mode 100644
index 000000000000..4c8da58f5368
--- /dev/null
+++ b/lib/Option/Arg.cpp
@@ -0,0 +1,122 @@
+//===--- Arg.cpp - Argument Implementations -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Arg.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, const Arg *_BaseArg)
+  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+    Claimed(false), OwnsValues(false) {
+}
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
+         const char *Value0, const Arg *_BaseArg)
+  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+    Claimed(false), OwnsValues(false) {
+  Values.push_back(Value0);
+}
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
+         const char *Value0, const char *Value1, const Arg *_BaseArg)
+  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+    Claimed(false), OwnsValues(false) {
+  Values.push_back(Value0);
+  Values.push_back(Value1);
+}
+
+Arg::~Arg() {
+  if (OwnsValues) {
+    for (unsigned i = 0, e = Values.size(); i != e; ++i)
+      delete[] Values[i];
+  }
+}
+
+void Arg::dump() const {
+  llvm::errs() << "<";
+
+  llvm::errs() << " Opt:";
+  Opt.dump();
+
+  llvm::errs() << " Index:" << Index;
+
+  llvm::errs() << " Values: [";
+  for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+    if (i) llvm::errs() << ", ";
+    llvm::errs() << "'" << Values[i] << "'";
+  }
+
+  llvm::errs() << "]>\n";
+}
+
+std::string Arg::getAsString(const ArgList &Args) const {
+  SmallString<256> Res;
+  llvm::raw_svector_ostream OS(Res);
+
+  ArgStringList ASL;
+  render(Args, ASL);
+  for (ArgStringList::iterator
+         it = ASL.begin(), ie = ASL.end(); it != ie; ++it) {
+    if (it != ASL.begin())
+      OS << ' ';
+    OS << *it;
+  }
+
+  return OS.str();
+}
+
+void Arg::renderAsInput(const ArgList &Args, ArgStringList &Output) const {
+  if (!getOption().hasNoOptAsInput()) {
+    render(Args, Output);
+    return;
+  }
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+    Output.push_back(getValue(i));
+}
+
+void Arg::render(const ArgList &Args, ArgStringList &Output) const {
+  switch (getOption().getRenderStyle()) {
+  case Option::RenderValuesStyle:
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+      Output.push_back(getValue(i));
+    break;
+
+  case Option::RenderCommaJoinedStyle: {
+    SmallString<256> Res;
+    llvm::raw_svector_ostream OS(Res);
+    OS << getSpelling();
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+      if (i) OS << ',';
+      OS << getValue(i);
+    }
+    Output.push_back(Args.MakeArgString(OS.str()));
+    break;
+  }
+
+ case Option::RenderJoinedStyle:
+    Output.push_back(Args.GetOrMakeJoinedArgString(
+                       getIndex(), getSpelling(), getValue(0)));
+    for (unsigned i = 1, e = getNumValues(); i != e; ++i)
+      Output.push_back(getValue(i));
+    break;
+
+  case Option::RenderSeparateStyle:
+    Output.push_back(Args.MakeArgString(getSpelling()));
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+      Output.push_back(getValue(i));
+    break;
+  }
+}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
new file mode 100644
index 000000000000..39b22d776ed9
--- /dev/null
+++ b/lib/Option/ArgList.cpp
@@ -0,0 +1,385 @@
+//===--- ArgList.cpp - Argument List Management ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/ArgList.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+void arg_iterator::SkipToNextArg() {
+  for (; Current != Args.end(); ++Current) {
+    // Done if there are no filters.
+    if (!Id0.isValid())
+      break;
+
+    // Otherwise require a match.
+    const Option &O = (*Current)->getOption();
+    if (O.matches(Id0) ||
+        (Id1.isValid() && O.matches(Id1)) ||
+        (Id2.isValid() && O.matches(Id2)))
+      break;
+  }
+}
+
+//
+
+ArgList::ArgList() {
+}
+
+ArgList::~ArgList() {
+}
+
+void ArgList::append(Arg *A) {
+  Args.push_back(A);
+}
+
+void ArgList::eraseArg(OptSpecifier Id) {
+  for (iterator it = begin(), ie = end(); it != ie; ) {
+    if ((*it)->getOption().matches(Id)) {
+      it = Args.erase(it);
+      ie = end();
+    } else {
+      ++it;
+    }
+  }
+}
+
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
+  // FIXME: Make search efficient?
+  for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+    if ((*it)->getOption().matches(Id))
+      return *it;
+  return 0;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1)) {
+      Res = *it;
+      Res->claim();
+
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4, OptSpecifier Id5) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4) ||
+        (*it)->getOption().matches(Id5)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4, OptSpecifier Id5,
+                         OptSpecifier Id6) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4) ||
+        (*it)->getOption().matches(Id5) ||
+        (*it)->getOption().matches(Id6)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4, OptSpecifier Id5,
+                         OptSpecifier Id6, OptSpecifier Id7) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4) ||
+        (*it)->getOption().matches(Id5) ||
+        (*it)->getOption().matches(Id6) ||
+        (*it)->getOption().matches(Id7)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const {
+  if (Arg *A = getLastArg(Pos, Neg))
+    return A->getOption().matches(Pos);
+  return Default;
+}
+
+StringRef ArgList::getLastArgValue(OptSpecifier Id,
+                                         StringRef Default) const {
+  if (Arg *A = getLastArg(Id))
+    return A->getValue();
+  return Default;
+}
+
+std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
+  SmallVector<const char *, 16> Values;
+  AddAllArgValues(Values, Id);
+  return std::vector<std::string>(Values.begin(), Values.end());
+}
+
+void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id) const {
+  if (Arg *A = getLastArg(Id)) {
+    A->claim();
+    A->render(*this, Output);
+  }
+}
+
+void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
+                         OptSpecifier Id1, OptSpecifier Id2) const {
+  for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
+         ie = filtered_end(); it != ie; ++it) {
+    (*it)->claim();
+    (*it)->render(*this, Output);
+  }
+}
+
+void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
+                              OptSpecifier Id1, OptSpecifier Id2) const {
+  for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
+         ie = filtered_end(); it != ie; ++it) {
+    (*it)->claim();
+    for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i)
+      Output.push_back((*it)->getValue(i));
+  }
+}
+
+void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
+                                   const char *Translation,
+                                   bool Joined) const {
+  for (arg_iterator it = filtered_begin(Id0),
+         ie = filtered_end(); it != ie; ++it) {
+    (*it)->claim();
+
+    if (Joined) {
+      Output.push_back(MakeArgString(StringRef(Translation) +
+                                     (*it)->getValue(0)));
+    } else {
+      Output.push_back(Translation);
+      Output.push_back((*it)->getValue(0));
+    }
+  }
+}
+
+void ArgList::ClaimAllArgs(OptSpecifier Id0) const {
+  for (arg_iterator it = filtered_begin(Id0),
+         ie = filtered_end(); it != ie; ++it)
+    (*it)->claim();
+}
+
+void ArgList::ClaimAllArgs() const {
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it)
+    if (!(*it)->isClaimed())
+      (*it)->claim();
+}
+
+const char *ArgList::MakeArgString(const Twine &T) const {
+  SmallString<256> Str;
+  T.toVector(Str);
+  return MakeArgString(Str.str());
+}
+
+const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
+                                              StringRef LHS,
+                                              StringRef RHS) const {
+  StringRef Cur = getArgString(Index);
+  if (Cur.size() == LHS.size() + RHS.size() &&
+      Cur.startswith(LHS) && Cur.endswith(RHS))
+    return Cur.data();
+
+  return MakeArgString(LHS + RHS);
+}
+
+//
+
+InputArgList::InputArgList(const char* const *ArgBegin,
+                           const char* const *ArgEnd)
+  : NumInputArgStrings(ArgEnd - ArgBegin) {
+  ArgStrings.append(ArgBegin, ArgEnd);
+}
+
+InputArgList::~InputArgList() {
+  // An InputArgList always owns its arguments.
+  for (iterator it = begin(), ie = end(); it != ie; ++it)
+    delete *it;
+}
+
+unsigned InputArgList::MakeIndex(StringRef String0) const {
+  unsigned Index = ArgStrings.size();
+
+  // Tuck away so we have a reliable const char *.
+  SynthesizedStrings.push_back(String0);
+  ArgStrings.push_back(SynthesizedStrings.back().c_str());
+
+  return Index;
+}
+
+unsigned InputArgList::MakeIndex(StringRef String0,
+                                 StringRef String1) const {
+  unsigned Index0 = MakeIndex(String0);
+  unsigned Index1 = MakeIndex(String1);
+  assert(Index0 + 1 == Index1 && "Unexpected non-consecutive indices!");
+  (void) Index1;
+  return Index0;
+}
+
+const char *InputArgList::MakeArgString(StringRef Str) const {
+  return getArgString(MakeIndex(Str));
+}
+
+//
+
+DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs)
+  : BaseArgs(_BaseArgs) {
+}
+
+DerivedArgList::~DerivedArgList() {
+  // We only own the arguments we explicitly synthesized.
+  for (iterator it = SynthesizedArgs.begin(), ie = SynthesizedArgs.end();
+       it != ie; ++it)
+    delete *it;
+}
+
+const char *DerivedArgList::MakeArgString(StringRef Str) const {
+  return BaseArgs.MakeArgString(Str);
+}
+
+Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())),
+                   BaseArgs.MakeIndex(Opt.getName()), BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
+
+Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
+                                       StringRef Value) const {
+  unsigned Index = BaseArgs.MakeIndex(Value);
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())),
+                   Index, BaseArgs.getArgString(Index), BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
+
+Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
+                                     StringRef Value) const {
+  unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())),
+                   Index, BaseArgs.getArgString(Index + 1), BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
+
+Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
+                                   StringRef Value) const {
+  unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())), Index,
+                   BaseArgs.getArgString(Index) + Opt.getName().size(),
+                   BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
diff --git a/lib/Option/CMakeLists.txt b/lib/Option/CMakeLists.txt
new file mode 100644
index 000000000000..2e7acc27a534
--- /dev/null
+++ b/lib/Option/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMOption
+  Arg.cpp
+  ArgList.cpp
+  Option.cpp
+  OptTable.cpp
+  )
+
+target_link_libraries(LLVMOption LLVMSupport)
diff --git a/lib/Option/LLVMBuild.txt b/lib/Option/LLVMBuild.txt
new file mode 100644
index 000000000000..0b78cf20c05f
--- /dev/null
+++ b/lib/Option/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Option/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Option
+parent = Libraries
+required_libraries = Support
diff --git a/lib/Option/Makefile b/lib/Option/Makefile
new file mode 100644
index 000000000000..255d0796e237
--- /dev/null
+++ b/lib/Option/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Option/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMOption
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
new file mode 100644
index 000000000000..5c8a0eacd1f4
--- /dev/null
+++ b/lib/Option/OptTable.cpp
@@ -0,0 +1,387 @@
+//===--- OptTable.cpp - Option Table Implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/OptTable.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+using namespace llvm::opt;
+
+// Ordering on Info. The ordering is *almost* lexicographic, with two
+// exceptions. First, '\0' comes at the end of the alphabet instead of
+// the beginning (thus options precede any other options which prefix
+// them). Second, for options with the same name, the less permissive
+// version should come first; a Flag option should precede a Joined
+// option, for example.
+
+static int StrCmpOptionName(const char *A, const char *B) {
+  char a = *A, b = *B;
+  while (a == b) {
+    if (a == '\0')
+      return 0;
+
+    a = *++A;
+    b = *++B;
+  }
+
+  if (a == '\0') // A is a prefix of B.
+    return 1;
+  if (b == '\0') // B is a prefix of A.
+    return -1;
+
+  // Otherwise lexicographic.
+  return (a < b) ? -1 : 1;
+}
+
+namespace llvm {
+namespace opt {
+
+static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
+  if (&A == &B)
+    return false;
+
+  if (int N = StrCmpOptionName(A.Name, B.Name))
+    return N == -1;
+
+  for (const char * const *APre = A.Prefixes,
+                  * const *BPre = B.Prefixes;
+                          *APre != 0 && *BPre != 0; ++APre, ++BPre) {
+    if (int N = StrCmpOptionName(*APre, *BPre))
+      return N == -1;
+  }
+
+  // Names are the same, check that classes are in order; exactly one
+  // should be joined, and it should succeed the other.
+  assert(((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) &&
+         "Unexpected classes for options with same name.");
+  return B.Kind == Option::JoinedClass;
+}
+
+// Support lower_bound between info and an option name.
+static inline bool operator<(const OptTable::Info &I, const char *Name) {
+  return StrCmpOptionName(I.Name, Name) == -1;
+}
+static inline bool operator<(const char *Name, const OptTable::Info &I) {
+  return StrCmpOptionName(Name, I.Name) == -1;
+}
+}
+}
+
+OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
+
+OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos)
+  : OptionInfos(_OptionInfos),
+    NumOptionInfos(_NumOptionInfos),
+    TheInputOptionID(0),
+    TheUnknownOptionID(0),
+    FirstSearchableIndex(0)
+{
+  // Explicitly zero initialize the error to work around a bug in array
+  // value-initialization on MinGW with gcc 4.3.5.
+
+  // Find start of normal options.
+  for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+    unsigned Kind = getInfo(i + 1).Kind;
+    if (Kind == Option::InputClass) {
+      assert(!TheInputOptionID && "Cannot have multiple input options!");
+      TheInputOptionID = getInfo(i + 1).ID;
+    } else if (Kind == Option::UnknownClass) {
+      assert(!TheUnknownOptionID && "Cannot have multiple unknown options!");
+      TheUnknownOptionID = getInfo(i + 1).ID;
+    } else if (Kind != Option::GroupClass) {
+      FirstSearchableIndex = i;
+      break;
+    }
+  }
+  assert(FirstSearchableIndex != 0 && "No searchable options?");
+
+#ifndef NDEBUG
+  // Check that everything after the first searchable option is a
+  // regular option class.
+  for (unsigned i = FirstSearchableIndex, e = getNumOptions(); i != e; ++i) {
+    Option::OptionClass Kind = (Option::OptionClass) getInfo(i + 1).Kind;
+    assert((Kind != Option::InputClass && Kind != Option::UnknownClass &&
+            Kind != Option::GroupClass) &&
+           "Special options should be defined first!");
+  }
+
+  // Check that options are in order.
+  for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions(); i != e; ++i){
+    if (!(getInfo(i) < getInfo(i + 1))) {
+      getOption(i).dump();
+      getOption(i + 1).dump();
+      llvm_unreachable("Options are not in order!");
+    }
+  }
+#endif
+
+  // Build prefixes.
+  for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions() + 1;
+                i != e; ++i) {
+    if (const char *const *P = getInfo(i).Prefixes) {
+      for (; *P != 0; ++P) {
+        PrefixesUnion.insert(*P);
+      }
+    }
+  }
+
+  // Build prefix chars.
+  for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(),
+                                         E = PrefixesUnion.end(); I != E; ++I) {
+    StringRef Prefix = I->getKey();
+    for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
+                                   C != CE; ++C)
+      if (std::find(PrefixChars.begin(), PrefixChars.end(), *C)
+            == PrefixChars.end())
+        PrefixChars.push_back(*C);
+  }
+}
+
+OptTable::~OptTable() {
+}
+
+const Option OptTable::getOption(OptSpecifier Opt) const {
+  unsigned id = Opt.getID();
+  if (id == 0)
+    return Option(0, 0);
+  assert((unsigned) (id - 1) < getNumOptions() && "Invalid ID.");
+  return Option(&getInfo(id), this);
+}
+
+bool OptTable::isOptionHelpHidden(OptSpecifier id) const {
+  return getInfo(id).Flags & HelpHidden;
+}
+
+static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) {
+  if (Arg == "-")
+    return true;
+  for (llvm::StringSet<>::const_iterator I = Prefixes.begin(),
+                                         E = Prefixes.end(); I != E; ++I)
+    if (Arg.startswith(I->getKey()))
+      return false;
+  return true;
+}
+
+/// \returns Matched size. 0 means no match.
+static unsigned matchOption(const OptTable::Info *I, StringRef Str) {
+  for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) {
+    StringRef Prefix(*Pre);
+    if (Str.startswith(Prefix) && Str.substr(Prefix.size()).startswith(I->Name))
+      return Prefix.size() + StringRef(I->Name).size();
+  }
+  return 0;
+}
+
+Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index) const {
+  unsigned Prev = Index;
+  const char *Str = Args.getArgString(Index);
+
+  // Anything that doesn't start with PrefixesUnion is an input, as is '-'
+  // itself.
+  if (isInput(PrefixesUnion, Str))
+    return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+
+  const Info *Start = OptionInfos + FirstSearchableIndex;
+  const Info *End = OptionInfos + getNumOptions();
+  StringRef Name = StringRef(Str).ltrim(PrefixChars);
+
+  // Search for the first next option which could be a prefix.
+  Start = std::lower_bound(Start, End, Name.data());
+
+  // Options are stored in sorted order, with '\0' at the end of the
+  // alphabet. Since the only options which can accept a string must
+  // prefix it, we iteratively search for the next option which could
+  // be a prefix.
+  //
+  // FIXME: This is searching much more than necessary, but I am
+  // blanking on the simplest way to make it fast. We can solve this
+  // problem when we move to TableGen.
+  for (; Start != End; ++Start) {
+    unsigned ArgSize = 0;
+    // Scan for first option which is a proper prefix.
+    for (; Start != End; ++Start)
+      if ((ArgSize = matchOption(Start, Str)))
+        break;
+    if (Start == End)
+      break;
+
+    // See if this option matches.
+    if (Arg *A = Option(Start, this).accept(Args, Index, ArgSize))
+      return A;
+
+    // Otherwise, see if this argument was missing values.
+    if (Prev != Index)
+      return 0;
+  }
+
+  return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str);
+}
+
+InputArgList *OptTable::ParseArgs(const char* const *ArgBegin,
+                                  const char* const *ArgEnd,
+                                  unsigned &MissingArgIndex,
+                                  unsigned &MissingArgCount) const {
+  InputArgList *Args = new InputArgList(ArgBegin, ArgEnd);
+
+  // FIXME: Handle '@' args (or at least error on them).
+
+  MissingArgIndex = MissingArgCount = 0;
+  unsigned Index = 0, End = ArgEnd - ArgBegin;
+  while (Index < End) {
+    // Ignore empty arguments (other things may still take them as arguments).
+    if (Args->getArgString(Index)[0] == '\0') {
+      ++Index;
+      continue;
+    }
+
+    unsigned Prev = Index;
+    Arg *A = ParseOneArg(*Args, Index);
+    assert(Index > Prev && "Parser failed to consume argument.");
+
+    // Check for missing argument error.
+    if (!A) {
+      assert(Index >= End && "Unexpected parser error.");
+      assert(Index - Prev - 1 && "No missing arguments!");
+      MissingArgIndex = Prev;
+      MissingArgCount = Index - Prev - 1;
+      break;
+    }
+
+    Args->append(A);
+  }
+
+  return Args;
+}
+
+static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
+  const Option O = Opts.getOption(Id);
+  std::string Name = O.getPrefixedName();
+
+  // Add metavar, if used.
+  switch (O.getKind()) {
+  case Option::GroupClass: case Option::InputClass: case Option::UnknownClass:
+    llvm_unreachable("Invalid option with help text.");
+
+  case Option::MultiArgClass:
+    llvm_unreachable("Cannot print metavar for this kind of option.");
+
+  case Option::FlagClass:
+    break;
+
+  case Option::SeparateClass: case Option::JoinedOrSeparateClass:
+    Name += ' ';
+    // FALLTHROUGH
+  case Option::JoinedClass: case Option::CommaJoinedClass:
+  case Option::JoinedAndSeparateClass:
+    if (const char *MetaVarName = Opts.getOptionMetaVar(Id))
+      Name += MetaVarName;
+    else
+      Name += "<value>";
+    break;
+  }
+
+  return Name;
+}
+
+static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
+                                std::vector<std::pair<std::string,
+                                const char*> > &OptionHelp) {
+  OS << Title << ":\n";
+
+  // Find the maximum option length.
+  unsigned OptionFieldWidth = 0;
+  for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+    // Skip titles.
+    if (!OptionHelp[i].second)
+      continue;
+
+    // Limit the amount of padding we are willing to give up for alignment.
+    unsigned Length = OptionHelp[i].first.size();
+    if (Length <= 23)
+      OptionFieldWidth = std::max(OptionFieldWidth, Length);
+  }
+
+  const unsigned InitialPad = 2;
+  for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+    const std::string &Option = OptionHelp[i].first;
+    int Pad = OptionFieldWidth - int(Option.size());
+    OS.indent(InitialPad) << Option;
+
+    // Break on long option names.
+    if (Pad < 0) {
+      OS << "\n";
+      Pad = OptionFieldWidth + InitialPad;
+    }
+    OS.indent(Pad + 1) << OptionHelp[i].second << '\n';
+  }
+}
+
+static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) {
+  unsigned GroupID = Opts.getOptionGroupID(Id);
+
+  // If not in a group, return the default help group.
+  if (!GroupID)
+    return "OPTIONS";
+
+  // Abuse the help text of the option groups to store the "help group"
+  // name.
+  //
+  // FIXME: Split out option groups.
+  if (const char *GroupHelp = Opts.getOptionHelpText(GroupID))
+    return GroupHelp;
+
+  // Otherwise keep looking.
+  return getOptionHelpGroup(Opts, GroupID);
+}
+
+void OptTable::PrintHelp(raw_ostream &OS, const char *Name,
+                         const char *Title, bool ShowHidden) const {
+  OS << "OVERVIEW: " << Title << "\n";
+  OS << '\n';
+  OS << "USAGE: " << Name << " [options] <inputs>\n";
+  OS << '\n';
+
+  // Render help text into a map of group-name to a list of (option, help)
+  // pairs.
+  typedef std::map<std::string,
+                 std::vector<std::pair<std::string, const char*> > > helpmap_ty;
+  helpmap_ty GroupedOptionHelp;
+
+  for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+    unsigned Id = i + 1;
+
+    // FIXME: Split out option groups.
+    if (getOptionKind(Id) == Option::GroupClass)
+      continue;
+
+    if (!ShowHidden && isOptionHelpHidden(Id))
+      continue;
+
+    if (const char *Text = getOptionHelpText(Id)) {
+      const char *HelpGroup = getOptionHelpGroup(*this, Id);
+      const std::string &OptName = getOptionHelpName(*this, Id);
+      GroupedOptionHelp[HelpGroup].push_back(std::make_pair(OptName, Text));
+    }
+  }
+
+  for (helpmap_ty::iterator it = GroupedOptionHelp .begin(),
+         ie = GroupedOptionHelp.end(); it != ie; ++it) {
+    if (it != GroupedOptionHelp .begin())
+      OS << "\n";
+    PrintHelpOptionList(OS, it->first, it->second);
+  }
+
+  OS.flush();
+}
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
new file mode 100644
index 000000000000..0e2263475e0c
--- /dev/null
+++ b/lib/Option/Option.cpp
@@ -0,0 +1,202 @@
+//===--- Option.cpp - Abstract Driver Options -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Option.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::opt;
+
+Option::Option(const OptTable::Info *info, const OptTable *owner)
+  : Info(info), Owner(owner) {
+
+  // Multi-level aliases are not supported, and alias options cannot
+  // have groups. This just simplifies option tracking, it is not an
+  // inherent limitation.
+  assert((!Info || !getAlias().isValid() || (!getAlias().getAlias().isValid() &&
+         !getGroup().isValid())) &&
+         "Multi-level aliases and aliases with groups are unsupported.");
+}
+
+Option::~Option() {
+}
+
+void Option::dump() const {
+  llvm::errs() << "<";
+  switch (getKind()) {
+#define P(N) case N: llvm::errs() << #N; break
+    P(GroupClass);
+    P(InputClass);
+    P(UnknownClass);
+    P(FlagClass);
+    P(JoinedClass);
+    P(SeparateClass);
+    P(CommaJoinedClass);
+    P(MultiArgClass);
+    P(JoinedOrSeparateClass);
+    P(JoinedAndSeparateClass);
+#undef P
+  }
+
+  llvm::errs() << " Prefixes:[";
+  for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) {
+    llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", ");
+  }
+  llvm::errs() << ']';
+
+  llvm::errs() << " Name:\"" << getName() << '"';
+
+  const Option Group = getGroup();
+  if (Group.isValid()) {
+    llvm::errs() << " Group:";
+    Group.dump();
+  }
+
+  const Option Alias = getAlias();
+  if (Alias.isValid()) {
+    llvm::errs() << " Alias:";
+    Alias.dump();
+  }
+
+  if (getKind() == MultiArgClass)
+    llvm::errs() << " NumArgs:" << getNumArgs();
+
+  llvm::errs() << ">\n";
+}
+
+bool Option::matches(OptSpecifier Opt) const {
+  // Aliases are never considered in matching, look through them.
+  const Option Alias = getAlias();
+  if (Alias.isValid())
+    return Alias.matches(Opt);
+
+  // Check exact match.
+  if (getID() == Opt.getID())
+    return true;
+
+  const Option Group = getGroup();
+  if (Group.isValid())
+    return Group.matches(Opt);
+  return false;
+}
+
+Arg *Option::accept(const ArgList &Args,
+                    unsigned &Index,
+                    unsigned ArgSize) const {
+  const Option &UnaliasedOption = getUnaliasedOption();
+  StringRef Spelling;
+  // If the option was an alias, get the spelling from the unaliased one.
+  if (getID() == UnaliasedOption.getID()) {
+    Spelling = StringRef(Args.getArgString(Index), ArgSize);
+  } else {
+    Spelling = Args.MakeArgString(Twine(UnaliasedOption.getPrefix()) +
+                                  Twine(UnaliasedOption.getName()));
+  }
+
+  switch (getKind()) {
+  case FlagClass:
+    if (ArgSize != strlen(Args.getArgString(Index)))
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling, Index++);
+  case JoinedClass: {
+    const char *Value = Args.getArgString(Index) + ArgSize;
+    return new Arg(UnaliasedOption, Spelling, Index++, Value);
+  }
+  case CommaJoinedClass: {
+    // Always matches.
+    const char *Str = Args.getArgString(Index) + ArgSize;
+    Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+
+    // Parse out the comma separated values.
+    const char *Prev = Str;
+    for (;; ++Str) {
+      char c = *Str;
+
+      if (!c || c == ',') {
+        if (Prev != Str) {
+          char *Value = new char[Str - Prev + 1];
+          memcpy(Value, Prev, Str - Prev);
+          Value[Str - Prev] = '\0';
+          A->getValues().push_back(Value);
+        }
+
+        if (!c)
+          break;
+
+        Prev = Str + 1;
+      }
+    }
+    A->setOwnsValues(true);
+
+    return A;
+  }
+  case SeparateClass:
+    // Matches iff this is an exact match.
+    // FIXME: Avoid strlen.
+    if (ArgSize != strlen(Args.getArgString(Index)))
+      return 0;
+
+    Index += 2;
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling,
+                   Index - 2, Args.getArgString(Index - 1));
+  case MultiArgClass: {
+    // Matches iff this is an exact match.
+    // FIXME: Avoid strlen.
+    if (ArgSize != strlen(Args.getArgString(Index)))
+      return 0;
+
+    Index += 1 + getNumArgs();
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(),
+                      Args.getArgString(Index - getNumArgs()));
+    for (unsigned i = 1; i != getNumArgs(); ++i)
+      A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
+    return A;
+  }
+  case JoinedOrSeparateClass: {
+    // If this is not an exact match, it is a joined arg.
+    // FIXME: Avoid strlen.
+    if (ArgSize != strlen(Args.getArgString(Index))) {
+      const char *Value = Args.getArgString(Index) + ArgSize;
+      return new Arg(*this, Spelling, Index++, Value);
+    }
+
+    // Otherwise it must be separate.
+    Index += 2;
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling,
+                   Index - 2, Args.getArgString(Index - 1));
+  }
+  case JoinedAndSeparateClass:
+    // Always matches.
+    Index += 2;
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling, Index - 2,
+                   Args.getArgString(Index - 2) + ArgSize,
+                   Args.getArgString(Index - 1));
+  default:
+    llvm_unreachable("Invalid option kind!");
+  }
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 7e8b4a3d0d29..6182e3415005 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -16,11 +16,12 @@
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include <limits.h>
 #include <cstring>
+#include <limits.h>
 
 using namespace llvm;
 
@@ -101,26 +102,6 @@ decDigitValue(unsigned int c)
   return c - '0';
 }
 
-static unsigned int
-hexDigitValue(unsigned int c)
-{
-  unsigned int r;
-
-  r = c - '0';
-  if (r <= 9)
-    return r;
-
-  r = c - 'A';
-  if (r <= 5)
-    return r + 10;
-
-  r = c - 'a';
-  if (r <= 5)
-    return r + 10;
-
-  return -1U;
-}
-
 /* Return the value of a decimal exponent of the form
    [+-]ddddddd.
 
@@ -697,6 +678,13 @@ APFloat::operator=(const APFloat &rhs)
 }
 
 bool
+APFloat::isDenormal() const {
+  return isNormal() && (exponent == semantics->minExponent) &&
+         (APInt::tcExtractBit(significandParts(), 
+                              semantics->precision - 1) == 0);
+}
+
+bool
 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
   if (this == &rhs)
     return true;
@@ -1925,6 +1913,12 @@ APFloat::convert(const fltSemantics &toSemantics,
     *losesInfo = (fs != opOK);
   } else if (category == fcNaN) {
     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
+
+    // For x87 extended precision, we want to make a NaN, not a special NaN if
+    // the input wasn't special either.
+    if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
+      APInt::tcSetBit(significandParts(), semantics->precision - 1);
+
     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
     // does not give you back the same bits.  This is dubious, and we
     // don't currently do it.  You're really supposed to get
@@ -2761,9 +2755,11 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
   // normalize against the "double" minExponent first, and only *then*
   // truncate the mantissa.  The result of that second conversion
   // may be inexact, but should never underflow.
-  APFloat extended(*this);
+  // Declare fltSemantics before APFloat that uses it (and
+  // saves pointer to it) to ensure correct destruction order.
   fltSemantics extendedSemantics = *semantics;
   extendedSemantics.minExponent = IEEEdouble.minExponent;
+  APFloat extended(*this);
   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
   assert(fs == opOK && !losesInfo);
   (void)fs;
@@ -3023,7 +3019,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
 
   // Unless we have a special case, add in second double.
   if (category == fcNormal) {
-    APFloat v(APInt(64, i2));
+    APFloat v(IEEEdouble, APInt(64, i2));
     fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
     assert(fs == opOK && !losesInfo);
     (void)fs;
@@ -3176,27 +3172,43 @@ APFloat::initFromHalfAPInt(const APInt & api)
 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
 /// when the size is anything else).
 void
-APFloat::initFromAPInt(const APInt& api, bool isIEEE)
+APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
 {
-  if (api.getBitWidth() == 16)
+  if (Sem == &IEEEhalf)
     return initFromHalfAPInt(api);
-  else if (api.getBitWidth() == 32)
+  if (Sem == &IEEEsingle)
     return initFromFloatAPInt(api);
-  else if (api.getBitWidth()==64)
+  if (Sem == &IEEEdouble)
     return initFromDoubleAPInt(api);
-  else if (api.getBitWidth()==80)
+  if (Sem == &x87DoubleExtended)
     return initFromF80LongDoubleAPInt(api);
-  else if (api.getBitWidth()==128)
-    return (isIEEE ?
-            initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
-  else
-    llvm_unreachable(0);
+  if (Sem == &IEEEquad)
+    return initFromQuadrupleAPInt(api);
+  if (Sem == &PPCDoubleDouble)
+    return initFromPPCDoubleDoubleAPInt(api);
+
+  llvm_unreachable(0);
 }
 
 APFloat
 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
 {
-  return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
+  switch (BitWidth) {
+  case 16:
+    return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
+  case 32:
+    return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
+  case 64:
+    return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
+  case 80:
+    return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
+  case 128:
+    if (isIEEE)
+      return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
+    return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
+  default:
+    llvm_unreachable("Unknown floating bit width");
+  }
 }
 
 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
@@ -3254,16 +3266,16 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
   return Val;
 }
 
-APFloat::APFloat(const APInt& api, bool isIEEE) {
-  initFromAPInt(api, isIEEE);
+APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
+  initFromAPInt(&Sem, API);
 }
 
 APFloat::APFloat(float f) {
-  initFromAPInt(APInt::floatToBits(f));
+  initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
 }
 
 APFloat::APFloat(double d) {
-  initFromAPInt(APInt::doubleToBits(d));
+  initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
 }
 
 namespace {
@@ -3299,10 +3311,8 @@ namespace {
 
     significand = significand.udiv(divisor);
 
-    // Truncate the significand down to its active bit count, but
-    // don't try to drop below 32.
-    unsigned newPrecision = std::max(32U, significand.getActiveBits());
-    significand = significand.trunc(newPrecision);
+    // Truncate the significand down to its active bit count.
+    significand = significand.trunc(significand.getActiveBits());
   }
 
 
@@ -3439,7 +3449,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
 
   AdjustToPrecision(significand, exp, FormatPrecision);
 
-  llvm::SmallVector<char, 256> buffer;
+  SmallVector<char, 256> buffer;
 
   // Fill the buffer.
   unsigned precision = significand.getBitWidth();
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 38cfaed9d217..e8534753b46e 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -23,9 +23,9 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cmath>
-#include <limits>
-#include <cstring>
 #include <cstdlib>
+#include <cstring>
+#include <limits>
 using namespace llvm;
 
 /// A utility function for allocating memory, checking for allocation failures,
@@ -559,12 +559,12 @@ bool APInt::slt(const APInt& RHS) const {
   if (lhsNeg) {
     // Sign bit is set so perform two's complement to make it positive
     lhs.flipAllBits();
-    lhs++;
+    ++lhs;
   }
   if (rhsNeg) {
     // Sign bit is set so perform two's complement to make it positive
     rhs.flipAllBits();
-    rhs++;
+    ++rhs;
   }
 
   // Now we have unsigned values to compare so do the comparison if necessary
@@ -1876,6 +1876,17 @@ APInt APInt::udiv(const APInt& RHS) const {
   return Quotient;
 }
 
+APInt APInt::sdiv(const APInt &RHS) const {
+  if (isNegative()) {
+    if (RHS.isNegative())
+      return (-(*this)).udiv(-RHS);
+    return -((-(*this)).udiv(RHS));
+  }
+  if (RHS.isNegative())
+    return -(this->udiv(-RHS));
+  return this->udiv(RHS);
+}
+
 APInt APInt::urem(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
@@ -1913,6 +1924,17 @@ APInt APInt::urem(const APInt& RHS) const {
   return Remainder;
 }
 
+APInt APInt::srem(const APInt &RHS) const {
+  if (isNegative()) {
+    if (RHS.isNegative())
+      return -((-(*this)).urem(-RHS));
+    return -((-(*this)).urem(RHS));
+  }
+  if (RHS.isNegative())
+    return this->urem(-RHS);
+  return this->urem(RHS);
+}
+
 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
                     APInt &Quotient, APInt &Remainder) {
   // Get some size facts about the dividend and divisor
@@ -1953,6 +1975,24 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
 }
 
+void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
+                    APInt &Quotient, APInt &Remainder) {
+  if (LHS.isNegative()) {
+    if (RHS.isNegative())
+      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
+    else {
+      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
+      Quotient = -Quotient;
+    }
+    Remainder = -Remainder;
+  } else if (RHS.isNegative()) {
+    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
+    Quotient = -Quotient;
+  } else {
+    APInt::udivrem(LHS, RHS, Quotient, Remainder);
+  }
+}
+
 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
   APInt Res = *this+RHS;
   Overflow = isNonNegative() == RHS.isNonNegative() &&
@@ -2076,7 +2116,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   }
   // If its negative, put it in two's complement form
   if (isNeg) {
-    (*this)--;
+    --(*this);
     this->flipAllBits();
   }
 }
@@ -2157,7 +2197,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     // Flip the bits and add one to turn it into the equivalent positive
     // value and put a '-' in the result.
     Tmp.flipAllBits();
-    Tmp++;
+    ++Tmp;
     Str.push_back('-');
   }
 
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index b8978302e746..3c4191b805a3 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Recycler.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Memory.h"
 #include <cstring>
 
 namespace llvm {
@@ -82,6 +83,7 @@ void BumpPtrAllocator::Reset() {
   CurSlab->NextPtr = 0;
   CurPtr = (char*)(CurSlab + 1);
   End = ((char*)CurSlab) + CurSlab->Size;
+  BytesAllocated = 0;
 }
 
 /// Allocate - Allocate space at the specified alignment.
@@ -102,6 +104,10 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
   // Check if we can hold it.
   if (Ptr + Size <= End) {
     CurPtr = Ptr + Size;
+    // Update the allocation point of this memory block in MemorySanitizer.
+    // Without this, MemorySanitizer messages for values originated from here
+    // will point to the allocation of the entire slab.
+    __msan_allocated_memory(Ptr, Size);
     return Ptr;
   }
 
@@ -117,6 +123,7 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
 
     Ptr = AlignPtr((char*)(NewSlab + 1), Alignment);
     assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size);
+    __msan_allocated_memory(Ptr, Size);
     return Ptr;
   }
 
@@ -125,6 +132,7 @@ void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
   Ptr = AlignPtr(CurPtr, Alignment);
   CurPtr = Ptr + Size;
   assert(CurPtr <= End && "Unable to allocate memory!");
+  __msan_allocated_memory(Ptr, Size);
   return Ptr;
 }
 
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 6af0f4a6c938..3746a810114f 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -8,6 +8,8 @@ add_llvm_library(LLVMSupport
   circular_raw_ostream.cpp
   CommandLine.cpp
   ConstantRange.cpp
+  ConvertUTF.c
+  ConvertUTFWrapper.cpp
   CrashRecoveryContext.cpp
   DataExtractor.cpp
   DataStream.cpp
@@ -50,6 +52,7 @@ add_llvm_library(LLVMSupport
   Triple.cpp
   Twine.cpp
   YAMLParser.cpp
+  YAMLTraits.cpp
   raw_os_ostream.cpp
   raw_ostream.cpp
   regcomp.c
@@ -80,6 +83,7 @@ add_llvm_library(LLVMSupport
   Threading.cpp
   TimeValue.cpp
   Valgrind.cpp
+  Watchdog.cpp
   Unix/Host.inc
   Unix/Memory.inc
   Unix/Mutex.inc
@@ -92,6 +96,7 @@ add_llvm_library(LLVMSupport
   Unix/system_error.inc
   Unix/ThreadLocal.inc
   Unix/TimeValue.inc
+  Unix/Watchdog.inc
   Windows/DynamicLibrary.inc
   Windows/Host.inc
   Windows/Memory.inc
@@ -105,4 +110,5 @@ add_llvm_library(LLVMSupport
   Windows/system_error.inc
   Windows/ThreadLocal.inc
   Windows/TimeValue.inc
+  Windows/Watchdog.inc
   )
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index fc4f1891d95f..560d7eb289c6 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -17,20 +17,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/Path.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 #include <cerrno>
 #include <cstdlib>
 using namespace llvm;
@@ -1222,14 +1222,10 @@ sortOpts(StringMap<Option*> &OptMap,
 namespace {
 
 class HelpPrinter {
-  size_t MaxArgLen;
-  const Option *EmptyArg;
   const bool ShowHidden;
 
 public:
-  explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
-    EmptyArg = 0;
-  }
+  explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
 
   void operator=(bool Value) {
     if (Value == false) return;
@@ -1266,7 +1262,7 @@ public:
     outs() << "\n\n";
 
     // Compute the maximum argument length...
-    MaxArgLen = 0;
+    size_t MaxArgLen = 0;
     for (size_t i = 0, e = Opts.size(); i != e; ++i)
       MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
 
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 720ef36c4640..5c5895026b67 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -21,7 +21,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InstrTypes.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Support/ConvertUTF.c b/lib/Support/ConvertUTF.c
new file mode 100644
index 000000000000..23f17ca25aea
--- /dev/null
+++ b/lib/Support/ConvertUTF.c
@@ -0,0 +1,571 @@
+/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===------------------------------------------------------------------------=*/
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/* ---------------------------------------------------------------------
+
+    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
+    Author: Mark E. Davis, 1994.
+    Rev History: Rick McGowan, fixes & updates May 2001.
+    Sept 2001: fixed const & error conditions per
+        mods suggested by S. Parent & A. Lillich.
+    June 2002: Tim Dodd added detection and handling of incomplete
+        source sequences, enhanced error detection, added casts
+        to eliminate compiler warnings.
+    July 2003: slight mods to back out aggressive FFFE detection.
+    Jan 2004: updated switches in from-UTF8 conversions.
+    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+
+    See the header file "ConvertUTF.h" for complete documentation.
+
+------------------------------------------------------------------------ */
+
+
+#include "llvm/Support/ConvertUTF.h"
+#ifdef CVTUTF_DEBUG
+#include <stdio.h>
+#endif
+
+static const int halfShift  = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+#define UNI_SUR_HIGH_START  (UTF32)0xD800
+#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
+#define UNI_SUR_LOW_START   (UTF32)0xDC00
+#define UNI_SUR_LOW_END     (UTF32)0xDFFF
+#define false      0
+#define true        1
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+                     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow.  There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+/* --------------------------------------------------------------------- */
+
+/* The interface converts a whole buffer to avoid function-call overhead.
+ * Constants have been gathered. Loops & conditionals have been removed as
+ * much as possible for efficiency, in favor of drop-through switches.
+ * (See "Note A" at the bottom of the file for equivalent code.)
+ * If your compiler supports it, the "isLegalUTF8" call can be turned
+ * into an inline function.
+ */
+
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF16 (
+        const UTF32** sourceStart, const UTF32* sourceEnd, 
+        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch;
+        if (target >= targetEnd) {
+            result = targetExhausted; break;
+        }
+        ch = *source++;
+        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+            /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    --source; /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = (UTF16)ch; /* normal case */
+            }
+        } else if (ch > UNI_MAX_LEGAL_UTF32) {
+            if (flags == strictConversion) {
+                result = sourceIllegal;
+            } else {
+                *target++ = UNI_REPLACEMENT_CHAR;
+            }
+        } else {
+            /* target is a character in range 0xFFFF - 0x10FFFF. */
+            if (target + 1 >= targetEnd) {
+                --source; /* Back up source pointer! */
+                result = targetExhausted; break;
+            }
+            ch -= halfBase;
+            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF16toUTF32 (
+        const UTF16** sourceStart, const UTF16* sourceEnd, 
+        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF32* target = *targetStart;
+    UTF32 ch, ch2;
+    while (source < sourceEnd) {
+        const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
+        ch = *source++;
+        /* If we have a surrogate pair, convert to UTF32 first. */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+            /* If the 16 bits following the high surrogate are in the source buffer... */
+            if (source < sourceEnd) {
+                ch2 = *source;
+                /* If it's a low surrogate, convert to UTF32. */
+                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
+                    ++source;
+                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+                    --source; /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                }
+            } else { /* We don't have the 16 bits following the high surrogate. */
+                --source; /* return to the high surrogate */
+                result = sourceExhausted;
+                break;
+            }
+        } else if (flags == strictConversion) {
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+                --source; /* return to the illegal value itself */
+                result = sourceIllegal;
+                break;
+            }
+        }
+        if (target >= targetEnd) {
+            source = oldSource; /* Back up source pointer! */
+            result = targetExhausted; break;
+        }
+        *target++ = ch;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+#ifdef CVTUTF_DEBUG
+if (result == sourceIllegal) {
+    fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
+    fflush(stderr);
+}
+#endif
+    return result;
+}
+ConversionResult ConvertUTF16toUTF8 (
+        const UTF16** sourceStart, const UTF16* sourceEnd, 
+        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch;
+        unsigned short bytesToWrite = 0;
+        const UTF32 byteMask = 0xBF;
+        const UTF32 byteMark = 0x80; 
+        const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
+        ch = *source++;
+        /* If we have a surrogate pair, convert to UTF32 first. */
+        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+            /* If the 16 bits following the high surrogate are in the source buffer... */
+            if (source < sourceEnd) {
+                UTF32 ch2 = *source;
+                /* If it's a low surrogate, convert to UTF32. */
+                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
+                    ++source;
+                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+                    --source; /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                }
+            } else { /* We don't have the 16 bits following the high surrogate. */
+                --source; /* return to the high surrogate */
+                result = sourceExhausted;
+                break;
+            }
+        } else if (flags == strictConversion) {
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+                --source; /* return to the illegal value itself */
+                result = sourceIllegal;
+                break;
+            }
+        }
+        /* Figure out how many bytes the result will require */
+        if (ch < (UTF32)0x80) {      bytesToWrite = 1;
+        } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+        } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+        } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
+        } else {                            bytesToWrite = 3;
+                                            ch = UNI_REPLACEMENT_CHAR;
+        }
+
+        target += bytesToWrite;
+        if (target > targetEnd) {
+            source = oldSource; /* Back up source pointer! */
+            target -= bytesToWrite; result = targetExhausted; break;
+        }
+        switch (bytesToWrite) { /* note: everything falls through. */
+            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
+        }
+        target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF32toUTF8 (
+        const UTF32** sourceStart, const UTF32* sourceEnd, 
+        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch;
+        unsigned short bytesToWrite = 0;
+        const UTF32 byteMask = 0xBF;
+        const UTF32 byteMark = 0x80; 
+        ch = *source++;
+        if (flags == strictConversion ) {
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                --source; /* return to the illegal value itself */
+                result = sourceIllegal;
+                break;
+            }
+        }
+        /*
+         * Figure out how many bytes the result will require. Turn any
+         * illegally large UTF32 things (> Plane 17) into replacement chars.
+         */
+        if (ch < (UTF32)0x80) {      bytesToWrite = 1;
+        } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+        } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+        } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
+        } else {                            bytesToWrite = 3;
+                                            ch = UNI_REPLACEMENT_CHAR;
+                                            result = sourceIllegal;
+        }
+        
+        target += bytesToWrite;
+        if (target > targetEnd) {
+            --source; /* Back up source pointer! */
+            target -= bytesToWrite; result = targetExhausted; break;
+        }
+        switch (bytesToWrite) { /* note: everything falls through. */
+            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+            case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
+        }
+        target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+ * This must be called with the length pre-determined by the first byte.
+ * If not calling this from ConvertUTF8to*, then the length can be set by:
+ *  length = trailingBytesForUTF8[*source]+1;
+ * and the sequence is illegal right away if there aren't that many bytes
+ * available.
+ * If presented with a length > 4, this returns false.  The Unicode
+ * definition of UTF-8 goes up to 4-byte sequences.
+ */
+
+static Boolean isLegalUTF8(const UTF8 *source, int length) {
+    UTF8 a;
+    const UTF8 *srcptr = source+length;
+    switch (length) {
+    default: return false;
+        /* Everything else falls through when "true"... */
+    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+
+        switch (*source) {
+            /* no fall-through in this inner switch */
+            case 0xE0: if (a < 0xA0) return false; break;
+            case 0xED: if (a > 0x9F) return false; break;
+            case 0xF0: if (a < 0x90) return false; break;
+            case 0xF4: if (a > 0x8F) return false; break;
+            default:   if (a < 0x80) return false;
+        }
+
+    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+    }
+    if (*source > 0xF4) return false;
+    return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 sequence is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
+    int length = trailingBytesForUTF8[*source]+1;
+    if (length > sourceEnd - source) {
+        return false;
+    }
+    return isLegalUTF8(source, length);
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return the total number of bytes in a codepoint
+ * represented in UTF-8, given the value of the first byte.
+ */
+unsigned getNumBytesForUTF8(UTF8 first) {
+  return trailingBytesForUTF8[first] + 1;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
+ * Exported function to return whether a UTF-8 string is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
+    while (*source != sourceEnd) {
+        int length = trailingBytesForUTF8[**source] + 1;
+        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
+            return false;
+        *source += length;
+    }
+    return true;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF16 (
+        const UTF8** sourceStart, const UTF8* sourceEnd, 
+        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch = 0;
+        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+        if (extraBytesToRead >= sourceEnd - source) {
+            result = sourceExhausted; break;
+        }
+        /* Do this check whether lenient or strict */
+        if (!isLegalUTF8(source, extraBytesToRead+1)) {
+            result = sourceIllegal;
+            break;
+        }
+        /*
+         * The cases all fall through. See "Note A" below.
+         */
+        switch (extraBytesToRead) {
+            case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+            case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+            case 3: ch += *source++; ch <<= 6;
+            case 2: ch += *source++; ch <<= 6;
+            case 1: ch += *source++; ch <<= 6;
+            case 0: ch += *source++;
+        }
+        ch -= offsetsFromUTF8[extraBytesToRead];
+
+        if (target >= targetEnd) {
+            source -= (extraBytesToRead+1); /* Back up source pointer! */
+            result = targetExhausted; break;
+        }
+        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+            /* UTF-16 surrogate values are illegal in UTF-32 */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = (UTF16)ch; /* normal case */
+            }
+        } else if (ch > UNI_MAX_UTF16) {
+            if (flags == strictConversion) {
+                result = sourceIllegal;
+                source -= (extraBytesToRead+1); /* return to the start */
+                break; /* Bail out; shouldn't continue */
+            } else {
+                *target++ = UNI_REPLACEMENT_CHAR;
+            }
+        } else {
+            /* target is a character in range 0xFFFF - 0x10FFFF. */
+            if (target + 1 >= targetEnd) {
+                source -= (extraBytesToRead+1); /* Back up source pointer! */
+                result = targetExhausted; break;
+            }
+            ch -= halfBase;
+            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+        const UTF8** sourceStart, const UTF8* sourceEnd, 
+        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF32* target = *targetStart;
+    while (source < sourceEnd) {
+        UTF32 ch = 0;
+        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+        if (extraBytesToRead >= sourceEnd - source) {
+            result = sourceExhausted; break;
+        }
+        /* Do this check whether lenient or strict */
+        if (!isLegalUTF8(source, extraBytesToRead+1)) {
+            result = sourceIllegal;
+            break;
+        }
+        /*
+         * The cases all fall through. See "Note A" below.
+         */
+        switch (extraBytesToRead) {
+            case 5: ch += *source++; ch <<= 6;
+            case 4: ch += *source++; ch <<= 6;
+            case 3: ch += *source++; ch <<= 6;
+            case 2: ch += *source++; ch <<= 6;
+            case 1: ch += *source++; ch <<= 6;
+            case 0: ch += *source++;
+        }
+        ch -= offsetsFromUTF8[extraBytesToRead];
+
+        if (target >= targetEnd) {
+            source -= (extraBytesToRead+1); /* Back up the source pointer! */
+            result = targetExhausted; break;
+        }
+        if (ch <= UNI_MAX_LEGAL_UTF32) {
+            /*
+             * UTF-16 surrogate values are illegal in UTF-32, and anything
+             * over Plane 17 (> 0x10FFFF) is illegal.
+             */
+            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+                if (flags == strictConversion) {
+                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                    result = sourceIllegal;
+                    break;
+                } else {
+                    *target++ = UNI_REPLACEMENT_CHAR;
+                }
+            } else {
+                *target++ = ch;
+            }
+        } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+            result = sourceIllegal;
+            *target++ = UNI_REPLACEMENT_CHAR;
+        }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+}
+
+/* ---------------------------------------------------------------------
+
+    Note A.
+    The fall-through switches in UTF-8 reading code save a
+    temp variable, some decrements & conditionals.  The switches
+    are equivalent to the following loop:
+        {
+            int tmpBytesToRead = extraBytesToRead+1;
+            do {
+                ch += *source++;
+                --tmpBytesToRead;
+                if (tmpBytesToRead) ch <<= 6;
+            } while (tmpBytesToRead > 0);
+        }
+    In UTF-8 writing code, the switches on "bytesToWrite" are
+    similarly unrolled loops.
+
+   --------------------------------------------------------------------- */
diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp
new file mode 100644
index 000000000000..458fbb0b496a
--- /dev/null
+++ b/lib/Support/ConvertUTFWrapper.cpp
@@ -0,0 +1,76 @@
+//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConvertUTF.h"
+
+namespace llvm {
+
+bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
+                       char *&ResultPtr, const UTF8 *&ErrorPtr) {
+  assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
+  ConversionResult result = conversionOK;
+  // Copy the character span over.
+  if (WideCharWidth == 1) {
+    const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.begin());
+    if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.end()))) {
+      result = sourceIllegal;
+      ErrorPtr = Pos;
+    } else {
+      memcpy(ResultPtr, Source.data(), Source.size());
+      ResultPtr += Source.size();
+    }
+  } else if (WideCharWidth == 2) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF16(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 2*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
+  } else if (WideCharWidth == 4) {
+    const UTF8 *sourceStart = (const UTF8*)Source.data();
+    // FIXME: Make the type of the result buffer correct instead of
+    // using reinterpret_cast.
+    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
+    ConversionFlags flags = strictConversion;
+    result = ConvertUTF8toUTF32(
+        &sourceStart, sourceStart + Source.size(),
+        &targetStart, targetStart + 4*Source.size(), flags);
+    if (result == conversionOK)
+      ResultPtr = reinterpret_cast<char*>(targetStart);
+    else
+      ErrorPtr = sourceStart;
+  }
+  assert((result != targetExhausted)
+         && "ConvertUTF8toUTFXX exhausted target buffer");
+  return result == conversionOK;
+}
+
+bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) {
+  const UTF32 *SourceStart = &Source;
+  const UTF32 *SourceEnd = SourceStart + 1;
+  UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr);
+  UTF8 *TargetEnd = TargetStart + 4;
+  ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd,
+                                           &TargetStart, TargetEnd,
+                                           strictConversion);
+  if (CR != conversionOK)
+    return false;
+
+  ResultPtr = reinterpret_cast<char*>(TargetStart);
+  return true;
+}
+
+} // end namespace llvm
+
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index e175056279cc..182c362cc755 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -10,11 +10,11 @@
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/ThreadLocal.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <setjmp.h>
 #include <cstdio>
+#include <setjmp.h>
 using namespace llvm;
 
 namespace {
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index 3a38e2a66b43..0a02281c2549 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -15,13 +15,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "Data-stream"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/DataStream.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/system_error.h"
-#include <string>
 #include <cerrno>
 #include <cstdio>
+#include <string>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
 #else
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index c8e8900749bb..d9cb8a9da815 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -23,10 +23,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/circular_raw_ostream.h"
 
 using namespace llvm;
 
@@ -44,7 +44,7 @@ Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
 //until program termination.
 static cl::opt<unsigned>
 DebugBufferSize("debug-buffer-size",
-                cl::desc("Buffer the last N characters of debug output"
+                cl::desc("Buffer the last N characters of debug output "
                          "until program termination. "
                          "[default 0 -- immediate print-out]"),
                 cl::Hidden,
diff --git a/lib/Support/Disassembler.cpp b/lib/Support/Disassembler.cpp
index c6d73bcad3e4..b3244fab7df7 100644
--- a/lib/Support/Disassembler.cpp
+++ b/lib/Support/Disassembler.cpp
@@ -12,13 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/Support/Disassembler.h"
-
+#include "llvm/Config/config.h"
 #include <cassert>
 #include <iomanip>
-#include <string>
 #include <sstream>
+#include <string>
 
 #if USE_UDIS86
 #include <udis86.h>
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 5c59a3ef8ef3..0f91c11ac260 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -80,8 +80,6 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
   case DW_TAG_hi_user:                   return "DW_TAG_hi_user";
   case DW_TAG_auto_variable:             return "DW_TAG_auto_variable";
   case DW_TAG_arg_variable:              return "DW_TAG_arg_variable";
-  case DW_TAG_return_variable:           return "DW_TAG_return_variable";
-  case DW_TAG_vector_type:               return "DW_TAG_vector_type";
   case DW_TAG_rvalue_reference_type:     return "DW_TAG_rvalue_reference_type";
   case DW_TAG_template_alias:            return "DW_TAG_template_alias";
   case DW_TAG_MIPS_loop:                 return "DW_TAG_MIPS_loop";
@@ -248,6 +246,14 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
   case DW_AT_APPLE_property:             return "DW_AT_APPLE_property";
   case DW_AT_APPLE_objc_complete_type:   return "DW_AT_APPLE_objc_complete_type";
+
+    // DWARF5 Fission Extension Attribute
+  case DW_AT_GNU_dwo_name:               return "DW_AT_GNU_dwo_name";
+  case DW_AT_GNU_dwo_id:                 return "DW_AT_GNU_dwo_id";
+  case DW_AT_GNU_ranges_base:            return "DW_AT_GNU_ranges_base";
+  case DW_AT_GNU_addr_base:              return "DW_AT_GNU_addr_base";
+  case DW_AT_GNU_pubnames:               return "DW_AT_GNU_pubnames";
+  case DW_AT_GNU_pubtypes:               return "DW_AT_GNU_pubtypes";
   }
   return 0;
 }
@@ -281,6 +287,10 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
   case DW_FORM_exprloc:                  return "DW_FORM_exprloc";
   case DW_FORM_flag_present:             return "DW_FORM_flag_present";
   case DW_FORM_ref_sig8:                 return "DW_FORM_ref_sig8";
+
+    // DWARF5 Fission Extension Forms
+  case DW_FORM_GNU_addr_index:           return "DW_FORM_GNU_addr_index";
+  case DW_FORM_GNU_str_index:            return "DW_FORM_GNU_str_index";
   }
   return 0;
 }
@@ -445,6 +455,10 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   case DW_OP_stack_value:                return "DW_OP_stack_value";
   case DW_OP_lo_user:                    return "DW_OP_lo_user";
   case DW_OP_hi_user:                    return "DW_OP_hi_user";
+
+    // DWARF5 Fission Proposal Op Extensions
+  case DW_OP_GNU_addr_index:             return "DW_OP_GNU_addr_index";
+  case DW_OP_GNU_const_index:            return "DW_OP_GNU_const_index";
   }
   return 0;
 }
@@ -674,6 +688,7 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
 /// encodings.
 const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
   switch (Encoding) {
+  case DW_CFA_nop:                       return "DW_CFA_nop";
   case DW_CFA_advance_loc:               return "DW_CFA_advance_loc";
   case DW_CFA_offset:                    return "DW_CFA_offset";
   case DW_CFA_restore:                   return "DW_CFA_restore";
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 45fec361c1a6..f14cb45d9dc0 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -13,11 +13,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
 #include <cstdio>
 #include <cstring>
 
@@ -46,7 +46,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName,
                                           void *symbolValue) {
   SmartScopedLock<true> lock(getMutex());
   if (ExplicitSymbols == 0)
-    ExplicitSymbols = new llvm::StringMap<void*>();
+    ExplicitSymbols = new StringMap<void*>();
   (*ExplicitSymbols)[symbolName] = symbolValue;
 }
 
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index e6cc57db8243..f4b591e777eb 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -12,14 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Threading.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Config/config.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
 
@@ -49,21 +49,21 @@ void llvm::remove_fatal_error_handler() {
   ErrorHandler = 0;
 }
 
-void llvm::report_fatal_error(const char *Reason) {
-  report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
+  report_fatal_error(Twine(Reason), GenCrashDiag);
 }
 
-void llvm::report_fatal_error(const std::string &Reason) {
-  report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) {
+  report_fatal_error(Twine(Reason), GenCrashDiag);
 }
 
-void llvm::report_fatal_error(StringRef Reason) {
-  report_fatal_error(Twine(Reason));
+void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
+  report_fatal_error(Twine(Reason), GenCrashDiag);
 }
 
-void llvm::report_fatal_error(const Twine &Reason) {
+void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
   if (ErrorHandler) {
-    ErrorHandler(ErrorHandlerUserData, Reason.str());
+    ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag);
   } else {
     // Blast the result out to stderr.  We don't try hard to make sure this
     // succeeds (e.g. handling EINTR) and we can't use errs() here because
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 7dc9587caae2..1ee69b60234f 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -12,37 +12,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileOutputBuffer.h"
-
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 
+using llvm::sys::fs::mapped_file_region;
 
 namespace llvm {
-
-
-FileOutputBuffer::FileOutputBuffer(uint8_t *Start, uint8_t *End, 
-                                  StringRef Path, StringRef TmpPath)
-  : BufferStart(Start), BufferEnd(End) {
-  FinalPath.assign(Path);
-  TempPath.assign(TmpPath);
+FileOutputBuffer::FileOutputBuffer(mapped_file_region * R,
+                                   StringRef Path, StringRef TmpPath)
+  : Region(R)
+  , FinalPath(Path)
+  , TempPath(TmpPath) {
 }
 
-
 FileOutputBuffer::~FileOutputBuffer() {
-  // If not already commited, delete buffer and remove temp file.
-  if ( BufferStart != NULL ) {
-    sys::fs::unmap_file_pages((void*)BufferStart, getBufferSize());
-    bool Existed;
-    sys::fs::remove(Twine(TempPath), Existed);
-  }
+  bool Existed;
+  sys::fs::remove(Twine(TempPath), Existed);
 }
 
- 
-error_code FileOutputBuffer::create(StringRef FilePath, 
-                                    size_t Size,  
+error_code FileOutputBuffer::create(StringRef FilePath,
+                                    size_t Size,
                                     OwningPtr<FileOutputBuffer> &Result,
                                     unsigned Flags) {
   // If file already exists, it must be a regular file (to be mappable).
@@ -70,34 +61,27 @@ error_code FileOutputBuffer::create(StringRef FilePath,
   EC = sys::fs::remove(FilePath, Existed);
   if (EC)
     return EC;
-  
+
   // Create new file in same directory but with random name.
   SmallString<128> TempFilePath;
   int FD;
-  EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",  
-                                                FD, TempFilePath, false, 0644);
+  EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",
+                            FD, TempFilePath, false, 0644);
   if (EC)
     return EC;
-  
-  // The unique_file() interface leaks lower layers and returns a file 
-  // descriptor.  There is no way to directly close it, so use this hack
-  // to hand it off to raw_fd_ostream to close for us.
-  {
-    raw_fd_ostream Dummy(FD, /*shouldClose=*/true);
-  }
-  
-  // Resize file to requested initial size
-  EC = sys::fs::resize_file(Twine(TempFilePath), Size);
+
+  OwningPtr<mapped_file_region> MappedFile(new mapped_file_region(
+      FD, true, mapped_file_region::readwrite, Size, 0, EC));
   if (EC)
     return EC;
-  
+
   // If requested, make the output file executable.
   if ( Flags & F_executable ) {
     sys::fs::file_status Stat2;
     EC = sys::fs::status(Twine(TempFilePath), Stat2);
     if (EC)
       return EC;
-    
+
     sys::fs::perms new_perms = Stat2.permissions();
     if ( new_perms & sys::fs::owner_read )
       new_perms |= sys::fs::owner_exe;
@@ -111,38 +95,25 @@ error_code FileOutputBuffer::create(StringRef FilePath,
       return EC;
   }
 
-  // Memory map new file.
-  void *Base;
-  EC = sys::fs::map_file_pages(Twine(TempFilePath), 0, Size, true, Base);
-  if (EC)
-    return EC;
-  
-  // Create FileOutputBuffer object to own mapped range.
-  uint8_t *Start = reinterpret_cast<uint8_t*>(Base);
-  Result.reset(new FileOutputBuffer(Start, Start+Size, FilePath, TempFilePath));
-                     
-  return error_code::success();
-}                    
+  Result.reset(new FileOutputBuffer(MappedFile.get(), FilePath, TempFilePath));
+  if (Result)
+    MappedFile.take();
 
+  return error_code::success();
+}
 
 error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
   // Unmap buffer, letting OS flush dirty pages to file on disk.
-  void *Start = reinterpret_cast<void*>(BufferStart);
-  error_code EC = sys::fs::unmap_file_pages(Start, getBufferSize());
-  if (EC)
-    return EC;
-  
+  Region.reset(0);
+
   // If requested, resize file as part of commit.
   if ( NewSmallerSize != -1 ) {
-    EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
+    error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
     if (EC)
       return EC;
   }
-  
+
   // Rename file to final name.
   return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
 }
-
-
 } // namespace
-
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index f9e9cf036608..4d7b2391f01e 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
+#include <cctype>
 #include <cstdlib>
 #include <cstring>
-#include <cctype>
 using namespace llvm;
 
 static bool isSignedChar(char C) {
@@ -87,9 +87,9 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P,
 
   // If one of the positions is at a space and the other isn't, chomp up 'til
   // the end of the space.
-  while (isspace(*F1P) && F1P != F1End)
+  while (isspace(static_cast<unsigned char>(*F1P)) && F1P != F1End)
     ++F1P;
-  while (isspace(*F2P) && F2P != F2End)
+  while (isspace(static_cast<unsigned char>(*F2P)) && F2P != F2End)
     ++F2P;
 
   // If we stop on numbers, compare their difference.
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 4d489a88e55d..36e33b5aafa3 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -8,9 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements a hash set that can be used to remove duplication of
-// nodes in a graph.  This code was originally created by Chris Lattner for use
-// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
-// set. 
+// nodes in a graph.
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,8 +16,8 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cstring>
 using namespace llvm;
@@ -150,7 +148,7 @@ unsigned FoldingSetNodeID::ComputeHash() const {
 
 /// operator== - Used to compare two nodes to each other.
 ///
-bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
+bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
   return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
 }
 
@@ -162,7 +160,7 @@ bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
 
 /// Used to compare the "ordering" of two nodes as defined by the
 /// profiled bits and their ordering defined by memcmp().
-bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS)const{
+bool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
   return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
 }
 
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index f6aaf8381171..bff182f30e35 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
@@ -53,6 +53,17 @@ std::string llvm::DOT::EscapeString(const std::string &Label) {
   return Str;
 }
 
+/// \brief Get a color string for this node number. Simply round-robin selects
+/// from a reasonable number of colors.
+StringRef llvm::DOT::getColorString(unsigned ColorNumber) {
+  static const int NumColors = 20;
+  static const char* Colors[NumColors] = {
+    "aaaaaa", "aa0000", "00aa00", "aa5500", "0055ff", "aa00aa", "00aaaa",
+    "555555", "ff5555", "55ff55", "ffff55", "5555ff", "ff55ff", "55ffff",
+    "ffaaaa", "aaffaa", "ffffaa", "aaaaff", "ffaaff", "aaffff"};
+  return Colors[ColorNumber % NumColors];
+}
+
 // Execute the graph viewer. Return true if successful.
 static bool LLVM_ATTRIBUTE_UNUSED
 ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args,
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 34e32b817b36..73d98d148746 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Host.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"
 #include <string.h>
 
 // Include the platform-specific parts of this class.
@@ -111,6 +112,21 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
 #endif
 }
 
+static bool OSHasAVXSupport() {
+#if defined(__GNUC__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction 
+  // directly because older assemblers do not include support for xgetbv and 
+  // there is no easy way to conditionally compile based on the assembler used.
+  int rEAX, rEDX;
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
+  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+  int rEAX = 0; // Ensures we return false
+#endif
+  return (rEAX & 6) == 6;
+}
+
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
                                  unsigned &Model) {
   Family = (EAX >> 8) & 0xf; // Bits 8 - 11
@@ -133,6 +149,11 @@ std::string sys::getHostCPUName() {
   DetectX86FamilyModel(EAX, Family, Model);
 
   bool HasSSE3 = (ECX & 0x1);
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  const unsigned AVXBits = (1 << 27) | (1 << 28);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
   GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
 
@@ -242,11 +263,15 @@ std::string sys::getHostCPUName() {
       case 42: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
       case 45:
-        return "corei7-avx";
+        // Not all Sandy Bridge processors support AVX (such as the Pentium
+        // versions instead of the i7 versions).
+        return HasAVX ? "corei7-avx" : "corei7";
 
       // Ivy Bridge:
       case 58:
-        return "core-avx-i";
+        // Not all Ivy Bridge processors support AVX (such as the Pentium
+        // versions instead of the i7 versions).
+        return HasAVX ? "core-avx-i" : "corei7";
 
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
@@ -330,7 +355,10 @@ std::string sys::getHostCPUName() {
       case 20:
         return "btver1";
       case 21:
-        return "bdver1";
+        if (Model <= 15)
+          return "bdver1";
+        else if (Model <= 31)
+          return "bdver2";
     default:
       return "generic";
     }
@@ -517,6 +545,75 @@ std::string sys::getHostCPUName() {
 }
 #endif
 
+#if defined(__linux__) && defined(__arm__)
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+  std::string Err;
+  DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err);
+  if (!DS) {
+    DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n");
+    return false;
+  }
+
+  // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
+  // in all cases.
+  char buffer[1024];
+  size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer));
+  delete DS;
+
+  StringRef Str(buffer, CPUInfoSize);
+
+  SmallVector<StringRef, 32> Lines;
+  Str.split(Lines, "\n");
+
+  // Look for the CPU implementer line.
+  StringRef Implementer;
+  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+    if (Lines[I].startswith("CPU implementer"))
+      Implementer = Lines[I].substr(15).ltrim("\t :");
+
+  if (Implementer == "0x41") { // ARM Ltd.
+    SmallVector<StringRef, 32> CPUFeatures;
+
+    // Look for the CPU features.
+    for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+      if (Lines[I].startswith("Features")) {
+        Lines[I].split(CPUFeatures, " ");
+        break;
+      }
+
+    for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
+      StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
+        .Case("half", "fp16")
+        .Case("neon", "neon")
+        .Case("vfpv3", "vfp3")
+        .Case("vfpv3d16", "d16")
+        .Case("vfpv4", "vfp4")
+        .Case("idiva", "hwdiv-arm")
+        .Case("idivt", "hwdiv")
+        .Default("");
+
+      if (LLVMFeatureStr != "")
+        Features.GetOrCreateValue(LLVMFeatureStr).setValue(true);
+    }
+
+    return true;
+  }
+
+  return false;
+}
+#else
 bool sys::getHostCPUFeatures(StringMap<bool> &Features){
   return false;
 }
+#endif
+
+std::string sys::getProcessTriple() {
+  Triple PT(LLVM_HOSTTRIPLE);
+
+  if (sizeof(void *) == 8 && PT.isArch32Bit())
+    PT = PT.get64BitArchVariant();
+  if (sizeof(void *) == 4 && PT.isArch64Bit())
+    PT = PT.get32BitArchVariant();
+
+  return PT.str();
+}
diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc
index 6827ac15a1ac..28e429c0cb7d 100644
--- a/lib/Support/LocaleWindows.inc
+++ b/lib/Support/LocaleWindows.inc
@@ -12,4 +12,4 @@ bool isPrint(int c) {
 
 }
 }
-}
-\ No newline at end of file
+}
diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc
index f595e7c582ca..389fe3d1d4fd 100644
--- a/lib/Support/LocaleXlocale.inc
+++ b/lib/Support/LocaleXlocale.inc
@@ -1,5 +1,5 @@
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include <cassert>
 #include <xlocale.h>
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 59bfcfcd254c..92d8b83cf94e 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -10,8 +10,8 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <fstream>
-#include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #if LLVM_ON_WIN32
 #include <windows.h>
 #endif
@@ -31,7 +31,7 @@ LockFileManager::readLockFile(StringRef LockFileName) {
   // to read, so we just return.
   bool Exists = false;
   if (sys::fs::exists(LockFileName, Exists) || !Exists)
-    return Optional<std::pair<std::string, int> >();
+    return None;
 
   // Read the owning host and PID out of the lock file. If it appears that the
   // owning process is dead, the lock file is invalid.
@@ -45,7 +45,7 @@ LockFileManager::readLockFile(StringRef LockFileName) {
   // Delete the lock file. It's invalid anyway.
   bool Existed;
   sys::fs::remove(LockFileName, Existed);
-  return Optional<std::pair<std::string, int> >();
+  return None;
 }
 
 bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
@@ -64,6 +64,7 @@ bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
 
 LockFileManager::LockFileManager(StringRef FileName)
 {
+  this->FileName = FileName;
   LockFileName = FileName;
   LockFileName += ".lock";
 
@@ -175,6 +176,7 @@ void LockFileManager::waitForUnlock() {
 #endif
   // Don't wait more than an hour for the file to appear.
   const unsigned MaxSeconds = 3600;
+  bool LockFileGone = false;
   do {
     // Sleep for the designated interval, to allow the owning process time to
     // finish up and remove the lock file.
@@ -185,10 +187,18 @@ void LockFileManager::waitForUnlock() {
 #else
     nanosleep(&Interval, NULL);
 #endif
-    // If the file no longer exists, we're done.
+    // If the lock file no longer exists, wait for the actual file.
     bool Exists = false;
-    if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists)
-      return;
+    if (!LockFileGone) {
+      if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
+        LockFileGone = true;
+        Exists = false;
+      }
+    }
+    if (LockFileGone) {
+      if (!sys::fs::exists(FileName.str(), Exists) && Exists)
+        return;
+    }
 
     if (!processStillExecuting((*Owner).first, (*Owner).second))
       return;
diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp
index 12f083822fd4..f9a4903ad015 100644
--- a/lib/Support/Memory.cpp
+++ b/lib/Support/Memory.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Memory.h"
-#include "llvm/Support/Valgrind.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Valgrind.h"
 
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index ec373e7f997c..7c5ab96a764a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -15,26 +15,31 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/system_error.h"
 #include <cassert>
+#include <cerrno>
 #include <cstdio>
 #include <cstring>
-#include <cerrno>
 #include <new>
-#include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
 #else
 #include <io.h>
-#ifndef S_ISFIFO
-#define S_ISFIFO(x) (0)
+// Simplistic definitinos of these macros to allow files to be read with
+// MapInFilePages.
+#ifndef S_ISREG
+#define S_ISREG(x) (1)
+#endif
+#ifndef S_ISBLK
+#define S_ISBLK(x) (0)
 #endif
 #endif
 #include <fcntl.h>
@@ -67,13 +72,17 @@ static void CopyStringRef(char *Memory, StringRef Data) {
   Memory[Data.size()] = 0; // Null terminate string.
 }
 
-/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
-template <typename T>
-static T *GetNamedBuffer(StringRef Buffer, StringRef Name,
-                         bool RequiresNullTerminator) {
-  char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
-  CopyStringRef(Mem + sizeof(T), Name);
-  return new (Mem) T(Buffer, RequiresNullTerminator);
+namespace {
+struct NamedBufferAlloc {
+  StringRef Name;
+  NamedBufferAlloc(StringRef Name) : Name(Name) {}
+};
+}
+
+void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
+  char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1));
+  CopyStringRef(Mem + N, Alloc.Name);
+  return Mem;
 }
 
 namespace {
@@ -100,8 +109,8 @@ public:
 MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
                                          StringRef BufferName,
                                          bool RequiresNullTerminator) {
-  return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName,
-                                         RequiresNullTerminator);
+  return new (NamedBufferAlloc(BufferName))
+      MemoryBufferMem(InputData, RequiresNullTerminator);
 }
 
 /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
@@ -178,24 +187,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// MemoryBufferMMapFile - This represents a file that was mapped in with the
-/// sys::Path::MapInFilePages method.  When destroyed, it calls the
-/// sys::Path::UnMapFilePages method.
-class MemoryBufferMMapFile : public MemoryBufferMem {
-public:
-  MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator)
-    : MemoryBufferMem(Buffer, RequiresNullTerminator) { }
+/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region.
+///
+/// This handles converting the offset into a legal offset on the platform.
+class MemoryBufferMMapFile : public MemoryBuffer {
+  sys::fs::mapped_file_region MFR;
+
+  static uint64_t getLegalMapOffset(uint64_t Offset) {
+    return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
+  }
 
-  ~MemoryBufferMMapFile() {
-    static int PageSize = sys::Process::GetPageSize();
+  static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
+    return Len + (Offset - getLegalMapOffset(Offset));
+  }
 
-    uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart());
-    size_t Size = getBufferSize();
-    uintptr_t RealStart = Start & ~(PageSize - 1);
-    size_t RealSize = Size + (Start - RealStart);
+  const char *getStart(uint64_t Len, uint64_t Offset) {
+    return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
+  }
 
-    sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart),
-                              RealSize);
+public:
+  MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+                       uint64_t Offset, error_code EC)
+      : MFR(FD, false, sys::fs::mapped_file_region::readonly,
+            getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
+    if (!EC) {
+      const char *Start = getStart(Len, Offset);
+      init(Start, Start + Len, RequiresNullTerminator);
+    }
+  }
+
+  virtual const char *getBufferIdentifier() const LLVM_OVERRIDE {
+    // The name is stored after the class itself.
+    return reinterpret_cast<const char *>(this + 1);
   }
 
   virtual BufferKind getBufferKind() const LLVM_OVERRIDE {
@@ -239,6 +262,8 @@ error_code MemoryBuffer::getFile(const char *Filename,
                                  OwningPtr<MemoryBuffer> &result,
                                  int64_t FileSize,
                                  bool RequiresNullTerminator) {
+  // FIXME: Review if this check is unnecessary on windows as well.
+#ifdef LLVM_ON_WIN32
   // First check that the "file" is not a directory
   bool is_dir = false;
   error_code err = sys::fs::is_directory(Filename, is_dir);
@@ -246,6 +271,7 @@ error_code MemoryBuffer::getFile(const char *Filename,
     return err;
   if (is_dir)
     return make_error_code(errc::is_a_directory);
+#endif
 
   int OpenFlags = O_RDONLY;
 #ifdef O_BINARY
@@ -309,7 +335,7 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
                                      uint64_t FileSize, uint64_t MapSize,
                                      int64_t Offset,
                                      bool RequiresNullTerminator) {
-  static int PageSize = sys::Process::GetPageSize();
+  static int PageSize = sys::process::get_self()->page_size();
 
   // Default is to map the full file.
   if (MapSize == uint64_t(-1)) {
@@ -322,9 +348,10 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
         return error_code(errno, posix_category());
       }
 
-      // If this is a named pipe, we can't trust the size. Create the memory
+      // If this not a file or a block device (e.g. it's a named pipe
+      // or character device), we can't trust the size. Create the memory
       // buffer by copying off the stream.
-      if (S_ISFIFO(FileInfo.st_mode)) {
+      if (!S_ISREG(FileInfo.st_mode) && !S_ISBLK(FileInfo.st_mode)) {
         return getMemoryBufferForStream(FD, Filename, result);
       }
 
@@ -335,17 +362,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
 
   if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
                     PageSize)) {
-    off_t RealMapOffset = Offset & ~(PageSize - 1);
-    off_t Delta = Offset - RealMapOffset;
-    size_t RealMapSize = MapSize + Delta;
-
-    if (const char *Pages = sys::Path::MapInFilePages(FD,
-                                                      RealMapSize,
-                                                      RealMapOffset)) {
-      result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
-          StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
+    error_code EC;
+    result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
+        RequiresNullTerminator, FD, MapSize, Offset, EC));
+    if (!EC)
       return error_code::success();
-    }
   }
 
   MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index db4a56b6928c..d0703754e04f 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -12,10 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Path.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/FileSystem.h"
 #include <cassert>
 #include <cstring>
 #include <ostream>
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 46571c049f12..58a6ea720e73 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -12,12 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/PathV2.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include <cctype>
 #include <cstdio>
 #include <cstring>
+#ifdef __APPLE__
+#include <unistd.h>
+#endif
 
 namespace {
   using llvm::StringRef;
@@ -44,7 +47,8 @@ namespace {
 
 #ifdef LLVM_ON_WIN32
     // C:
-    if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':')
+    if (path.size() >= 2 && std::isalpha(static_cast<unsigned char>(path[0])) &&
+        path[1] == ':')
       return path.substr(0, 2);
 #endif
 
@@ -492,6 +496,27 @@ bool is_separator(char value) {
 void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
   result.clear();
 
+#ifdef __APPLE__
+  // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+  int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
+                               : _CS_DARWIN_USER_CACHE_DIR;
+  size_t ConfLen = confstr(ConfName, 0, 0);
+  if (ConfLen > 0) {
+    do {
+      result.resize(ConfLen);
+      ConfLen = confstr(ConfName, result.data(), result.size());
+    } while (ConfLen > 0 && ConfLen != result.size());
+
+    if (ConfLen > 0) {
+      assert(result.back() == 0);
+      result.pop_back();
+      return;
+    }
+
+    result.clear();
+  }
+#endif
+
   // Check whether the temporary directory is specified by an environment
   // variable.
   const char *EnvironmentVariable;
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
index 2924cfa38897..358137f08f5f 100644
--- a/lib/Support/PluginLoader.cpp
+++ b/lib/Support/PluginLoader.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DONT_GET_PLUGIN_LOADER_OPTION
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index ef3307317c4a..23ee5ab105ae 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -12,12 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadLocal.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Watchdog.h"
+#include "llvm/Support/raw_ostream.h"
 
 #ifdef HAVE_CRASHREPORTERCLIENT_H
 #include <CrashReporterClient.h>
@@ -37,7 +38,10 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
   if (Entry->getNextEntry())
     NextID = PrintStack(Entry->getNextEntry(), OS);
   OS << NextID << ".\t";
-  Entry->print(OS);
+  {
+    sys::Watchdog W(5);
+    Entry->print(OS);
+  }
   
   return NextID+1;
 }
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index 88ca7c3f220f..2c0d37bb3299 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -11,10 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Process.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Process.h"
 
-namespace llvm {
+using namespace llvm;
 using namespace sys;
 
 //===----------------------------------------------------------------------===//
@@ -22,8 +23,63 @@ using namespace sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
+// Empty virtual destructor to anchor the vtable for the process class.
+process::~process() {}
+
+self_process *process::get_self() {
+  // Use a function local static for thread safe initialization and allocate it
+  // as a raw pointer to ensure it is never destroyed.
+  static self_process *SP = new self_process();
+
+  return SP;
 }
 
+#if defined(_MSC_VER)
+// Visual Studio complains that the self_process destructor never exits. This
+// doesn't make much sense, as that's the whole point of calling abort... Just
+// silence this warning.
+#pragma warning(push)
+#pragma warning(disable:4722)
+#endif
+
+// The destructor for the self_process subclass must never actually be
+// executed. There should be at most one instance of this class, and that
+// instance should live until the process terminates to avoid the potential for
+// racy accesses during shutdown.
+self_process::~self_process() {
+  llvm_unreachable("This destructor must never be executed!");
+}
+
+/// \brief A helper function to compute the elapsed wall-time since the program
+/// started.
+///
+/// Note that this routine actually computes the elapsed wall time since the
+/// first time it was called. However, we arrange to have it called during the
+/// startup of the process to get approximately correct results.
+static TimeValue getElapsedWallTime() {
+  static TimeValue &StartTime = *new TimeValue(TimeValue::now());
+  return TimeValue::now() - StartTime;
+}
+
+/// \brief A special global variable to ensure we call \c getElapsedWallTime
+/// during global initialization of the program.
+///
+/// Note that this variable is never referenced elsewhere. Doing so could
+/// create race conditions during program startup or shutdown.
+static volatile TimeValue DummyTimeValue = getElapsedWallTime();
+
+// Implement this routine by using the static helpers above. They're already
+// portable.
+TimeValue self_process::get_wall_time() const {
+  return getElapsedWallTime();
+}
+
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Process.inc"
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 75bc282d9bd4..201d5c0d3056 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -29,12 +29,15 @@ Program::ExecuteAndWait(const Path& path,
                         const Path** redirects,
                         unsigned secondsToWait,
                         unsigned memoryLimit,
-                        std::string* ErrMsg) {
+                        std::string* ErrMsg,
+                        bool *ExecutionFailed) {
   Program prg;
-  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) {
+    if (ExecutionFailed) *ExecutionFailed = false;
     return prg.Wait(path, secondsToWait, ErrMsg);
-  else
-    return -1;
+  }
+  if (ExecutionFailed) *ExecutionFailed = true;
+  return -1;
 }
 
 void
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index d293da07d684..efc8b90a0090 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Regex.h"
+#include "regex_impl.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "regex_impl.h"
 #include <string>
 using namespace llvm;
 
@@ -27,7 +27,9 @@ Regex::Regex(StringRef regex, unsigned Flags) {
     flags |= REG_ICASE;
   if (Flags & Newline)
     flags |= REG_NEWLINE;
-  error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+  if (!(Flags & BasicRegex))
+    flags |= REG_EXTENDED;
+  error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
 }
 
 Regex::~Regex() {
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 3b53e9ff49fe..f0fed7792ce6 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -29,13 +29,9 @@ void SmallPtrSetImpl::shrink_and_clear() {
   NumElements = NumTombstones = 0;
 
   // Install the new array.  Clear all the buckets to empty.
-  CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+  CurArray = (const void**)malloc(sizeof(void*) * CurArraySize);
   assert(CurArray && "Failed to allocate memory?");
   memset(CurArray, -1, CurArraySize*sizeof(void*));
-  
-  // The end pointer, always valid, is set to a valid element to help the
-  // iterator.
-  CurArray[CurArraySize] = 0;
 }
 
 bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
@@ -139,15 +135,11 @@ void SmallPtrSetImpl::Grow(unsigned NewSize) {
   bool WasSmall = isSmall();
   
   // Install the new array.  Clear all the buckets to empty.
-  CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+  CurArray = (const void**)malloc(sizeof(void*) * NewSize);
   assert(CurArray && "Failed to allocate memory?");
   CurArraySize = NewSize;
   memset(CurArray, -1, NewSize*sizeof(void*));
   
-  // The end pointer, always valid, is set to a valid element to help the
-  // iterator.
-  CurArray[NewSize] = 0;
-  
   // Copy over all the elements.
   if (WasSmall) {
     // Small sets store their elements in order.
@@ -180,7 +172,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
     CurArray = SmallArray;
   // Otherwise, allocate new heap space (unless we were the same size)
   } else {
-    CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+    CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize);
     assert(CurArray && "Failed to allocate memory?");
   }
   
@@ -188,7 +180,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
   CurArraySize = that.CurArraySize;
 
   // Copy over the contents from the other set
-  memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+  memcpy(CurArray, that.CurArray, sizeof(void*)*CurArraySize);
   
   NumElements = that.NumElements;
   NumTombstones = that.NumTombstones;
@@ -200,7 +192,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   if (isSmall() && RHS.isSmall())
     assert(CurArraySize == RHS.CurArraySize &&
            "Cannot assign sets with different small sizes");
-           
+
   // If we're becoming small, prepare to insert into our stack space
   if (RHS.isSmall()) {
     if (!isSmall())
@@ -209,9 +201,9 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   // Otherwise, allocate new heap space (unless we were the same size)
   } else if (CurArraySize != RHS.CurArraySize) {
     if (isSmall())
-      CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+      CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize);
     else
-      CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+      CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize);
     assert(CurArray && "Failed to allocate memory?");
   }
   
@@ -219,7 +211,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   CurArraySize = RHS.CurArraySize;
 
   // Copy over the contents from the other set
-  memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+  memcpy(CurArray, RHS.CurArray, sizeof(void*)*CurArraySize);
   
   NumElements = RHS.NumElements;
   NumTombstones = RHS.NumTombstones;
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index e4e01be03802..fac3cad5cc25 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -13,14 +13,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Locale.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
 
+static const size_t TabStop = 8;
+
 namespace {
   struct LineNoCacheTy {
     int LastQueryBufferID;
@@ -146,7 +150,8 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
 /// prefixed to the message.
 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
                                    const Twine &Msg,
-                                   ArrayRef<SMRange> Ranges) const {
+                                   ArrayRef<SMRange> Ranges,
+                                   ArrayRef<SMFixIt> FixIts) const {
 
   // First thing to do: find the current buffer containing the specified
   // location to pull out the source line.
@@ -193,6 +198,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
         R.End = SMLoc::getFromPointer(LineEnd);
       
       // Translate from SMLoc ranges to column ranges.
+      // FIXME: Handle multibyte characters.
       ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
                                          R.End.getPointer()-LineStart));
     }
@@ -202,13 +208,13 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
     
   return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
                       LineAndCol.second-1, Kind, Msg.str(),
-                      LineStr, ColRanges);
+                      LineStr, ColRanges, FixIts);
 }
 
 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
                              const Twine &Msg, ArrayRef<SMRange> Ranges,
-                             bool ShowColors) const {
-  SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges);
+                             ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
+  SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts);
   
   // Report the message with the diagnostic handler if present.
   if (DiagHandler) {
@@ -231,15 +237,108 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
 // SMDiagnostic Implementation
 //===----------------------------------------------------------------------===//
 
-SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
+SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
                            int Line, int Col, SourceMgr::DiagKind Kind,
-                           const std::string &Msg,
-                           const std::string &LineStr,
-                           ArrayRef<std::pair<unsigned,unsigned> > Ranges)
+                           StringRef Msg, StringRef LineStr,
+                           ArrayRef<std::pair<unsigned,unsigned> > Ranges,
+                           ArrayRef<SMFixIt> Hints)
   : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
-    Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()) {
+    Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
+    FixIts(Hints.begin(), Hints.end()) {
+  std::sort(FixIts.begin(), FixIts.end());
 }
 
+static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
+                           ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
+  if (FixIts.empty())
+    return;
+
+  const char *LineStart = SourceLine.begin();
+  const char *LineEnd = SourceLine.end();
+
+  size_t PrevHintEndCol = 0;
+
+  for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
+       I != E; ++I) {
+    // If the fixit contains a newline or tab, ignore it.
+    if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
+      continue;
+
+    SMRange R = I->getRange();
+
+    // If the line doesn't contain any part of the range, then ignore it.
+    if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
+      continue;
+
+    // Translate from SMLoc to column.
+    // Ignore pieces of the range that go onto other lines.
+    // FIXME: Handle multibyte characters in the source line.
+    unsigned FirstCol;
+    if (R.Start.getPointer() < LineStart)
+      FirstCol = 0;
+    else
+      FirstCol = R.Start.getPointer() - LineStart;
+
+    // If we inserted a long previous hint, push this one forwards, and add
+    // an extra space to show that this is not part of the previous
+    // completion. This is sort of the best we can do when two hints appear
+    // to overlap.
+    //
+    // Note that if this hint is located immediately after the previous
+    // hint, no space will be added, since the location is more important.
+    unsigned HintCol = FirstCol;
+    if (HintCol < PrevHintEndCol)
+      HintCol = PrevHintEndCol + 1;
+
+    // FIXME: This assertion is intended to catch unintended use of multibyte
+    // characters in fixits. If we decide to do this, we'll have to track
+    // separate byte widths for the source and fixit lines.
+    assert((size_t)llvm::sys::locale::columnWidth(I->getText()) ==
+           I->getText().size());
+
+    // This relies on one byte per column in our fixit hints.
+    unsigned LastColumnModified = HintCol + I->getText().size();
+    if (LastColumnModified > FixItLine.size())
+      FixItLine.resize(LastColumnModified, ' ');
+
+    std::copy(I->getText().begin(), I->getText().end(),
+              FixItLine.begin() + HintCol);
+
+    PrevHintEndCol = LastColumnModified;
+
+    // For replacements, mark the removal range with '~'.
+    // FIXME: Handle multibyte characters in the source line.
+    unsigned LastCol;
+    if (R.End.getPointer() >= LineEnd)
+      LastCol = LineEnd - LineStart;
+    else
+      LastCol = R.End.getPointer() - LineStart;
+
+    std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
+  }
+}
+
+static void printSourceLine(raw_ostream &S, StringRef LineContents) {
+  // Print out the source line one character at a time, so we can expand tabs.
+  for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
+    if (LineContents[i] != '\t') {
+      S << LineContents[i];
+      ++OutCol;
+      continue;
+    }
+
+    // If we have a tab, emit at least one space, then round up to 8 columns.
+    do {
+      S << ' ';
+      ++OutCol;
+    } while ((OutCol % TabStop) != 0);
+  }
+  S << '\n';
+}
+
+static bool isNonASCII(char c) {
+  return c & 0x80;
+}
 
 void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
                          bool ShowColors) const {
@@ -297,43 +396,48 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
   if (LineNo == -1 || ColumnNo == -1)
     return;
 
+  // FIXME: If there are multibyte or multi-column characters in the source, all
+  // our ranges will be wrong. To do this properly, we'll need a byte-to-column
+  // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
+  // expanding them later, and bail out rather than show incorrect ranges and
+  // misaligned fixits for any other odd characters.
+  if (std::find_if(LineContents.begin(), LineContents.end(), isNonASCII) !=
+      LineContents.end()) {
+    printSourceLine(S, LineContents);
+    return;
+  }
+  size_t NumColumns = LineContents.size();
+
   // Build the line with the caret and ranges.
-  std::string CaretLine(LineContents.size()+1, ' ');
+  std::string CaretLine(NumColumns+1, ' ');
   
   // Expand any ranges.
   for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
     std::pair<unsigned, unsigned> R = Ranges[r];
-    for (unsigned i = R.first,
-         e = std::min(R.second, (unsigned)LineContents.size())+1; i != e; ++i)
-      CaretLine[i] = '~';
+    std::fill(&CaretLine[R.first],
+              &CaretLine[std::min((size_t)R.second, CaretLine.size())],
+              '~');
   }
-    
+
+  // Add any fix-its.
+  // FIXME: Find the beginning of the line properly for multibyte characters.
+  std::string FixItInsertionLine;
+  buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
+                 makeArrayRef(Loc.getPointer() - ColumnNo,
+                              LineContents.size()));
+
   // Finally, plop on the caret.
-  if (unsigned(ColumnNo) <= LineContents.size())
+  if (unsigned(ColumnNo) <= NumColumns)
     CaretLine[ColumnNo] = '^';
   else 
-    CaretLine[LineContents.size()] = '^';
+    CaretLine[NumColumns] = '^';
   
   // ... and remove trailing whitespace so the output doesn't wrap for it.  We
   // know that the line isn't completely empty because it has the caret in it at
   // least.
   CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
   
-  // Print out the source line one character at a time, so we can expand tabs.
-  for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
-    if (LineContents[i] != '\t') {
-      S << LineContents[i];
-      ++OutCol;
-      continue;
-    }
-    
-    // If we have a tab, emit at least one space, then round up to 8 columns.
-    do {
-      S << ' ';
-      ++OutCol;
-    } while (OutCol & 7);
-  }
-  S << '\n';
+  printSourceLine(S, LineContents);
 
   if (ShowColors)
     S.changeColor(raw_ostream::GREEN, true);
@@ -350,11 +454,36 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S,
     do {
       S << CaretLine[i];
       ++OutCol;
-    } while (OutCol & 7);
+    } while ((OutCol % TabStop) != 0);
   }
+  S << '\n';
 
   if (ShowColors)
     S.resetColor();
+
+  // Print out the replacement line, matching tabs in the source line.
+  if (FixItInsertionLine.empty())
+    return;
   
+  for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) {
+    if (i >= LineContents.size() || LineContents[i] != '\t') {
+      S << FixItInsertionLine[i];
+      ++OutCol;
+      continue;
+    }
+
+    // Okay, we have a tab.  Insert the appropriate number of characters.
+    do {
+      S << FixItInsertionLine[i];
+      // FIXME: This is trying not to break up replacements, but then to re-sync
+      // with the tabs between replacements. This will fail, though, if two
+      // fix-it replacements are exactly adjacent, or if a fix-it contains a
+      // space. Really we should be precomputing column widths, which we'll
+      // need anyway for multibyte chars.
+      if (FixItInsertionLine[i] != ' ')
+        ++i;
+      ++OutCol;
+    } while (((OutCol % TabStop) != 0) && i != e);
+  }
   S << '\n';
 }
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index d8a6ad35ba9c..9c28176b730e 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -22,13 +22,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstring>
 using namespace llvm;
@@ -40,7 +40,9 @@ namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
 /// what they did.
 ///
 static cl::opt<bool>
-Enabled("stats", cl::desc("Enable statistics output from program"));
+Enabled(
+    "stats",
+    cl::desc("Enable statistics output from program (available with Asserts)"));
 
 
 namespace {
@@ -142,6 +144,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
 }
 
 void llvm::PrintStatistics() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
   StatisticInfo &Stats = *StatInfo;
 
   // Statistics not enabled?
@@ -151,4 +154,17 @@ void llvm::PrintStatistics() {
   raw_ostream &OutStream = *CreateInfoOutputFile();
   PrintStatistics(OutStream);
   delete &OutStream;   // Close the file.
+#else
+  // Check if the -stats option is set instead of checking
+  // !Stats.Stats.empty().  In release builds, Statistics operators
+  // do nothing, so stats are never Registered.
+  if (Enabled) {
+    // Get the stream to write to.
+    raw_ostream &OutStream = *CreateInfoOutputFile();
+    OutStream << "Statistics are disabled.  "
+            << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
+    OutStream.flush();
+    delete &OutStream;   // Close the file.
+  }
+#endif
 }
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index f8e920846259..d7a0bfa41005 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -9,10 +9,9 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/edit_distance.h"
-
 #include <bitset>
 
 using namespace llvm;
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 7483225fdfb0..13fba2ea2584 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Threading.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/Atomic.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/Config/config.h"
 #include <cassert>
 
 using namespace llvm;
diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp
index 1a0f7bc36394..bd8af174bcd0 100644
--- a/lib/Support/TimeValue.cpp
+++ b/lib/Support/TimeValue.cpp
@@ -17,11 +17,16 @@
 namespace llvm {
 using namespace sys;
 
+const TimeValue::SecondsType
+  TimeValue::PosixZeroTimeSeconds = -946684800;
+const TimeValue::SecondsType
+  TimeValue::Win32ZeroTimeSeconds = -12591158400ULL;
+
 const TimeValue TimeValue::MinTime       = TimeValue ( INT64_MIN,0 );
 const TimeValue TimeValue::MaxTime       = TimeValue ( INT64_MAX,0 );
 const TimeValue TimeValue::ZeroTime      = TimeValue ( 0,0 );
-const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
-const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( PosixZeroTimeSeconds,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( Win32ZeroTimeSeconds,0 );
 
 void
 TimeValue::normalize( void ) {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 598e8ad6a1a5..896d869aa1e7 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Timer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Process.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // CreateInfoOutputFile - Return a file stream to print our output on.
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c058c05595f1..d2508ac1ef3a 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 using namespace llvm;
@@ -19,8 +19,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   switch (Kind) {
   case UnknownArch: return "unknown";
 
+  case aarch64: return "aarch64";
   case arm:     return "arm";
-  case cellspu: return "cellspu";
   case hexagon: return "hexagon";
   case mips:    return "mips";
   case mipsel:  return "mipsel";
@@ -54,11 +54,11 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   default:
     return 0;
 
+  case aarch64: return "aarch64";
+
   case arm:
   case thumb:   return "arm";
 
-  case cellspu: return "spu";
-
   case ppc64:
   case ppc:     return "ppc";
 
@@ -128,7 +128,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Haiku: return "haiku";
   case Minix: return "minix";
   case RTEMS: return "rtems";
-  case NativeClient: return "nacl";
+  case NaCl: return "nacl";
   case CNK: return "cnk";
   case Bitrig: return "bitrig";
   case AIX: return "aix";
@@ -143,6 +143,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case GNU: return "gnu";
   case GNUEABIHF: return "gnueabihf";
   case GNUEABI: return "gnueabi";
+  case GNUX32: return "gnux32";
   case EABI: return "eabi";
   case MachO: return "macho";
   case Android: return "android";
@@ -154,8 +155,8 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
 
 Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
   return StringSwitch<Triple::ArchType>(Name)
+    .Case("aarch64", aarch64)
     .Case("arm", arm)
-    .Case("cellspu", cellspu)
     .Case("mips", mips)
     .Case("mipsel", mipsel)
     .Case("mips64", mips64)
@@ -218,13 +219,13 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("powerpc", Triple::ppc)
     .Cases("powerpc64", "ppu", Triple::ppc64)
     .Case("mblaze", Triple::mblaze)
+    .Case("aarch64", Triple::aarch64)
     .Cases("arm", "xscale", Triple::arm)
     // FIXME: It would be good to replace these with explicit names for all the
     // various suffixes supported.
     .StartsWith("armv", Triple::arm)
     .Case("thumb", Triple::thumb)
     .StartsWith("thumbv", Triple::thumb)
-    .Cases("spu", "cellspu", Triple::cellspu)
     .Case("msp430", Triple::msp430)
     .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
     .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@@ -277,7 +278,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("haiku", Triple::Haiku)
     .StartsWith("minix", Triple::Minix)
     .StartsWith("rtems", Triple::RTEMS)
-    .StartsWith("nacl", Triple::NativeClient)
+    .StartsWith("nacl", Triple::NaCl)
     .StartsWith("cnk", Triple::CNK)
     .StartsWith("bitrig", Triple::Bitrig)
     .StartsWith("aix", Triple::AIX)
@@ -289,6 +290,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
     .StartsWith("eabi", Triple::EABI)
     .StartsWith("gnueabihf", Triple::GNUEABIHF)
     .StartsWith("gnueabi", Triple::GNUEABI)
+    .StartsWith("gnux32", Triple::GNUX32)
     .StartsWith("gnu", Triple::GNU)
     .StartsWith("macho", Triple::MachO)
     .StartsWith("android", Triple::Android)
@@ -663,7 +665,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
 
   case llvm::Triple::amdil:
   case llvm::Triple::arm:
-  case llvm::Triple::cellspu:
   case llvm::Triple::hexagon:
   case llvm::Triple::le32:
   case llvm::Triple::mblaze:
@@ -680,6 +681,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::spir:
     return 32;
 
+  case llvm::Triple::aarch64:
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
   case llvm::Triple::nvptx64:
@@ -708,6 +710,7 @@ Triple Triple::get32BitArchVariant() const {
   Triple T(*this);
   switch (getArch()) {
   case Triple::UnknownArch:
+  case Triple::aarch64:
   case Triple::msp430:
     T.setArch(UnknownArch);
     break;
@@ -715,7 +718,6 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::amdil:
   case Triple::spir:
   case Triple::arm:
-  case Triple::cellspu:
   case Triple::hexagon:
   case Triple::le32:
   case Triple::mblaze:
@@ -749,7 +751,6 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::UnknownArch:
   case Triple::amdil:
   case Triple::arm:
-  case Triple::cellspu:
   case Triple::hexagon:
   case Triple::le32:
   case Triple::mblaze:
@@ -761,6 +762,7 @@ Triple Triple::get64BitArchVariant() const {
     T.setArch(UnknownArch);
     break;
 
+  case Triple::aarch64:
   case Triple::spir64:
   case Triple::mips64:
   case Triple::mips64el:
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 9a8abd27f158..e9b26bdb80f2 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -51,7 +51,18 @@ int getPosixProtectionFlags(unsigned Flags) {
 	 llvm::sys::Memory::MF_EXEC:
     return PROT_READ | PROT_WRITE | PROT_EXEC;
   case llvm::sys::Memory::MF_EXEC:
+#if defined(__FreeBSD__)
+    // On PowerPC, having an executable page that has no read permission
+    // can have unintended consequences.  The function InvalidateInstruction-
+    // Cache uses instructions dcbf and icbi, both of which are treated by
+    // the processor as loads.  If the page has no read permissions,
+    // executing these instructions will result in a segmentation fault.
+    // Somehow, this problem is not present on Linux, but it does happen
+    // on FreeBSD.
+    return PROT_READ | PROT_EXEC;
+#else
     return PROT_EXEC;
+#endif
   default:
     llvm_unreachable("Illegal memory protection flag specified!");
   }
@@ -73,7 +84,7 @@ Memory::allocateMappedMemory(size_t NumBytes,
   if (NumBytes == 0)
     return MemoryBlock();
 
-  static const size_t PageSize = Process::GetPageSize();
+  static const size_t PageSize = process::get_self()->page_size();
   const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
 
   int fd = -1;
@@ -166,8 +177,8 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
                     std::string *ErrMsg) {
   if (NumBytes == 0) return MemoryBlock();
 
-  size_t pageSize = Process::GetPageSize();
-  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+  size_t PageSize = process::get_self()->page_size();
+  size_t NumPages = (NumBytes+PageSize-1)/PageSize;
 
   int fd = -1;
 #ifdef NEED_DEV_ZERO_FOR_MMAP
@@ -191,10 +202,10 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
                             NearBlock->size() : 0;
 
 #if defined(__APPLE__) && defined(__arm__)
-  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
+  void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
                     flags, fd, 0);
 #else
-  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+  void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
                     flags, fd, 0);
 #endif
   if (pa == MAP_FAILED) {
@@ -207,7 +218,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
 
 #if defined(__APPLE__) && defined(__arm__)
   kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
-                                (vm_size_t)(pageSize*NumPages), 0,
+                                (vm_size_t)(PageSize*NumPages), 0,
                                 VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
   if (KERN_SUCCESS != kr) {
     MakeErrMsg(ErrMsg, "vm_protect max RX failed");
@@ -215,7 +226,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
   }
 
   kr = vm_protect(mach_task_self(), (vm_address_t)pa,
-                  (vm_size_t)(pageSize*NumPages), 0,
+                  (vm_size_t)(PageSize*NumPages), 0,
                   VM_PROT_READ | VM_PROT_WRITE);
   if (KERN_SUCCESS != kr) {
     MakeErrMsg(ErrMsg, "vm_protect RW failed");
@@ -225,7 +236,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
 
   MemoryBlock result;
   result.Address = pa;
-  result.Size = NumPages*pageSize;
+  result.Size = NumPages*PageSize;
 
   return result;
 }
@@ -321,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr,
   __clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
 #  elif defined(__mips__)
   const char *Start = static_cast<const char *>(Addr);
+#    if defined(ANDROID)
+  // The declaration of "cacheflush" in Android bionic:
+  // extern int cacheflush(long start, long end, long flags);
+  const char *End = Start + Len;
+  long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
+  long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
+  cacheflush(LStart, LEnd, BCACHE);
+#    else
   cacheflush(const_cast<char *>(Start), Len, BCACHE);
+#    endif
 #  endif
 
 #endif  // end apple
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index d04f590f87ed..a3dfd4b0a32d 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -417,16 +417,24 @@ retry_random_path:
       RandomPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
   }
 
+  // Make sure we don't fall into an infinite loop by constantly trying
+  // to create the parent path.
+  bool TriedToCreateParent = false;
+
   // Try to open + create the file.
 rety_open_create:
   int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode);
   if (RandomFD == -1) {
+    int SavedErrno = errno;
     // If the file existed, try again, otherwise, error.
-    if (errno == errc::file_exists)
+    if (SavedErrno == errc::file_exists)
       goto retry_random_path;
-    // The path prefix doesn't exist.
-    if (errno == errc::no_such_file_or_directory) {
-      StringRef p(RandomPath.begin(), RandomPath.size());
+    // If path prefix doesn't exist, try to create it.
+    if (SavedErrno == errc::no_such_file_or_directory &&
+        !exists(path::parent_path(RandomPath)) &&
+        !TriedToCreateParent) {
+      TriedToCreateParent = true;
+      StringRef p(RandomPath);
       SmallString<64> dir_to_create;
       for (path::const_iterator i = path::begin(p),
                                 e = --path::end(p); i != e; ++i) {
@@ -439,13 +447,15 @@ rety_open_create:
                                (*i)[1] == '/' &&
                                (*i)[2] != '/')
             return make_error_code(errc::no_such_file_or_directory);
-          if (::mkdir(dir_to_create.c_str(), 0700) == -1)
+          if (::mkdir(dir_to_create.c_str(), 0700) == -1 &&
+              errno != errc::file_exists)
             return error_code(errno, system_category());
         }
       }
       goto rety_open_create;
     }
-    return error_code(errno, system_category());
+
+    return error_code(SavedErrno, system_category());
   }
 
    // Make the path absolute.
@@ -465,12 +475,14 @@ rety_open_create:
   return error_code::success();
 }
 
-error_code mapped_file_region::init(int fd, uint64_t offset) {
-  AutoFD FD(fd);
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+  AutoFD ScopedFD(FD);
+  if (!CloseFD)
+    ScopedFD.take();
 
   // Figure out how large the file is.
   struct stat FileInfo;
-  if (fstat(fd, &FileInfo) == -1)
+  if (fstat(FD, &FileInfo) == -1)
     return error_code(errno, system_category());
   uint64_t FileSize = FileInfo.st_size;
 
@@ -478,7 +490,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
     Size = FileSize;
   else if (FileSize < Size) {
     // We need to grow the file.
-    if (ftruncate(fd, Size) == -1)
+    if (ftruncate(FD, Size) == -1)
       return error_code(errno, system_category());
   }
 
@@ -487,7 +499,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) {
 #ifdef MAP_FILE
   flags |= MAP_FILE;
 #endif
-  Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+  Mapping = ::mmap(0, Size, prot, flags, FD, Offset);
   if (Mapping == MAP_FAILED)
     return error_code(errno, system_category());
   return error_code::success();
@@ -516,12 +528,13 @@ mapped_file_region::mapped_file_region(const Twine &path,
     return;
   }
 
-  ec = init(ofd, offset);
+  ec = init(ofd, true, offset);
   if (ec)
     Mapping = 0;
 }
 
 mapped_file_region::mapped_file_region(int fd,
+                                       bool closefd,
                                        mapmode mode,
                                        uint64_t length,
                                        uint64_t offset,
@@ -535,7 +548,7 @@ mapped_file_region::mapped_file_region(int fd,
     return;
   }
 
-  ec = init(fd, offset);
+  ec = init(fd, closefd, offset);
   if (ec)
     Mapping = 0;
 }
@@ -545,7 +558,7 @@ mapped_file_region::~mapped_file_region() {
     ::munmap(Mapping, Size);
 }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 mapped_file_region::mapped_file_region(mapped_file_region &&other)
   : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
   other.Mapping = 0;
@@ -574,7 +587,7 @@ const char *mapped_file_region::const_data() const {
 }
 
 int mapped_file_region::alignment() {
-  return Process::GetPageSize();
+  return process::get_self()->page_size();
 }
 
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 5204147ce316..9a4454f1c650 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -44,9 +44,49 @@
 using namespace llvm;
 using namespace sys;
 
-unsigned
-Process::GetPageSize()
-{
+
+process::id_type self_process::get_id() {
+  return getpid();
+}
+
+static std::pair<TimeValue, TimeValue> getRUsageTimes() {
+#if defined(HAVE_GETRUSAGE)
+  struct rusage RU;
+  ::getrusage(RUSAGE_SELF, &RU);
+  return std::make_pair(
+      TimeValue(
+          static_cast<TimeValue::SecondsType>(RU.ru_utime.tv_sec),
+          static_cast<TimeValue::NanoSecondsType>(
+              RU.ru_utime.tv_usec * TimeValue::NANOSECONDS_PER_MICROSECOND)),
+      TimeValue(
+          static_cast<TimeValue::SecondsType>(RU.ru_stime.tv_sec),
+          static_cast<TimeValue::NanoSecondsType>(
+              RU.ru_stime.tv_usec * TimeValue::NANOSECONDS_PER_MICROSECOND)));
+#else
+#warning Cannot get usage times on this platform
+  return std::make_pair(TimeValue(), TimeValue());
+#endif
+}
+
+TimeValue self_process::get_user_time() const {
+#if _POSIX_TIMERS > 0 && _POSIX_CPUTIME > 0
+  // Try to get a high resolution CPU timer.
+  struct timespec TS;
+  if (::clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &TS) == 0)
+    return TimeValue(static_cast<TimeValue::SecondsType>(TS.tv_sec),
+                     static_cast<TimeValue::NanoSecondsType>(TS.tv_nsec));
+#endif
+
+  // Otherwise fall back to rusage based timing.
+  return getRUsageTimes().first;
+}
+
+TimeValue self_process::get_system_time() const {
+  // We can only collect system time by inspecting the results of getrusage.
+  return getRUsageTimes().second;
+}
+
+static unsigned getPageSize() {
 #if defined(__CYGWIN__)
   // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
   // memory protection and mmap() is 4k.
@@ -62,6 +102,12 @@ Process::GetPageSize()
   return static_cast<unsigned>(page_size);
 }
 
+// This constructor guaranteed to be run exactly once on a single thread, and
+// sets up various process invariants that can be queried cheaply from then on.
+self_process::self_process() : PageSize(getPageSize()) {
+}
+
+
 size_t Process::GetMallocUsage() {
 #if defined(HAVE_MALLINFO)
   struct mallinfo mi;
@@ -86,49 +132,10 @@ size_t Process::GetMallocUsage() {
 #endif
 }
 
-size_t
-Process::GetTotalMemoryUsage()
-{
-#if defined(HAVE_MALLINFO)
-  struct mallinfo mi = ::mallinfo();
-  return mi.uordblks + mi.hblkhd;
-#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
-  malloc_statistics_t Stats;
-  malloc_zone_statistics(malloc_default_zone(), &Stats);
-  return Stats.size_allocated;   // darwin
-#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
-  struct rusage usage;
-  ::getrusage(RUSAGE_SELF, &usage);
-  return usage.ru_maxrss;
-#else
-#warning Cannot get total memory size on this platform
-  return 0;
-#endif
-}
-
-void
-Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
-                      TimeValue& sys_time)
-{
+void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
+                           TimeValue &sys_time) {
   elapsed = TimeValue::now();
-#if defined(HAVE_GETRUSAGE)
-  struct rusage usage;
-  ::getrusage(RUSAGE_SELF, &usage);
-  user_time = TimeValue(
-    static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
-    static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
-      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
-  sys_time = TimeValue(
-    static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
-    static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
-      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
-#else
-#warning Cannot get usage times on this platform
-  user_time.seconds(0);
-  user_time.microseconds(0);
-  sys_time.seconds(0);
-  sys_time.microseconds(0);
-#endif
+  llvm::tie(user_time, sys_time) = getRUsageTimes();
 }
 
 int Process::GetCurrentUserId() {
@@ -217,6 +224,8 @@ static unsigned getColumns(int FileID) {
 #if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
   // Try to determine the width of the terminal.
   struct winsize ws;
+  // Zero-fill ws to avoid a false positive from MemorySanitizer.
+  memset(&ws, 0, sizeof(ws));
   if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
     Columns = ws.ws_col;
 #endif
@@ -318,7 +327,7 @@ static unsigned GetRandomNumberSeed() {
 
   // Otherwise, swizzle the current time and the process ID to form a reasonable
   // seed.
-  TimeValue Now = llvm::TimeValue::now();
+  TimeValue Now = TimeValue::now();
   return hash_combine(Now.seconds(), Now.nanoseconds(), ::getpid());
 }
 #endif
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index e5990d06ecc2..117151c91d8b 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -16,9 +16,10 @@
 //===          is guaranteed to work on *all* UNIX variants.
 //===----------------------------------------------------------------------===//
 
-#include <llvm/Config/config.h>
-#include "llvm/Support/FileSystem.h"
 #include "Unix.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/FileSystem.h"
+#include <llvm/Config/config.h>
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
@@ -47,11 +48,6 @@ Program::Program() : Data_(0) {}
 
 Program::~Program() {}
 
-unsigned Program::GetPid() const {
-  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
-  return static_cast<unsigned>(pid);
-}
-
 // This function just uses the PATH environment variable to find the program.
 Path
 Program::FindProgramByName(const std::string& progName) {
@@ -169,12 +165,16 @@ static void SetMemoryLimits (unsigned size)
   setrlimit (RLIMIT_RSS, &r);
 #endif
 #ifdef RLIMIT_AS  // e.g. NetBSD doesn't have it.
+  // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb
+  // of virtual memory for shadow memory mapping.
+#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD
   // Virtual memory.
   getrlimit (RLIMIT_AS, &r);
   r.rlim_cur = limit;
   setrlimit (RLIMIT_AS, &r);
 #endif
 #endif
+#endif
 }
 
 bool
@@ -394,24 +394,6 @@ Program::Wait(const sys::Path &path,
 #endif
 }
 
-bool
-Program::Kill(std::string* ErrMsg) {
-  if (Data_ == 0) {
-    MakeErrMsg(ErrMsg, "Process not started!");
-    return true;
-  }
-
-  uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
-  pid_t pid = static_cast<pid_t>(pid64);
-
-  if (kill(pid, SIGKILL) != 0) {
-    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
-    return true;
-  }
-
-  return false;
-}
-
 error_code Program::ChangeStdinToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
   return make_error_code(errc::success);
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 9e94068c9c36..66338f17d88f 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -15,9 +15,9 @@
 #include "Unix.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Mutex.h"
+#include <algorithm>
 #include <string>
 #include <vector>
-#include <algorithm>
 #if HAVE_EXECINFO_H
 # include <execinfo.h>         // For backtrace().
 #endif
@@ -47,17 +47,19 @@ static void (*InterruptFunction)() = 0;
 static std::vector<std::string> FilesToRemove;
 static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
 
-// IntSigs - Signals that may interrupt the program at any time.
+// IntSigs - Signals that represent requested termination. There's no bug
+// or failure, or if there is, it's not our direct responsibility. For whatever
+// reason, our continued execution is no longer desirable.
 static const int IntSigs[] = {
-  SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+  SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
 };
 static const int *const IntSigsEnd =
   IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
 
-// KillSigs - Signals that are synchronous with the program that will cause it
-// to die.
+// KillSigs - Signals that represent that we have a bug, and our prompt
+// termination has been ordered.
 static const int KillSigs[] = {
-  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT
 #ifdef SIGSYS
   , SIGSYS
 #endif
@@ -254,7 +256,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
 //
 // On glibc systems we have the 'backtrace' function, which works nicely, but
 // doesn't demangle symbols.
-static void PrintStackTrace(void *) {
+void llvm::sys::PrintStackTrace(FILE *FD) {
 #if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
   static void* StackTrace[256];
   // Use backtrace() to output a backtrace on Linux systems with glibc.
@@ -278,26 +280,30 @@ static void PrintStackTrace(void *) {
     Dl_info dlinfo;
     dladdr(StackTrace[i], &dlinfo);
 
-    fprintf(stderr, "%-2d", i);
+    fprintf(FD, "%-2d", i);
 
     const char* name = strrchr(dlinfo.dli_fname, '/');
-    if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
-    else              fprintf(stderr, " %-*s", width, name+1);
+    if (name == NULL) fprintf(FD, " %-*s", width, dlinfo.dli_fname);
+    else              fprintf(FD, " %-*s", width, name+1);
 
-    fprintf(stderr, " %#0*lx",
+    fprintf(FD, " %#0*lx",
             (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
 
     if (dlinfo.dli_sname != NULL) {
       int res;
-      fputc(' ', stderr);
+      fputc(' ', FD);
       char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
-      if (d == NULL) fputs(dlinfo.dli_sname, stderr);
-      else           fputs(d, stderr);
+      if (d == NULL) fputs(dlinfo.dli_sname, FD);
+      else           fputs(d, FD);
       free(d);
 
-      fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
+      // FIXME: When we move to C++11, use %t length modifier. It's not in
+      // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of
+      // the stack offset for a stack dump isn't likely to cause any problems.
+      fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]-
+                                     (char*)dlinfo.dli_saddr));
     }
-    fputc('\n', stderr);
+    fputc('\n', FD);
   }
 #else
   backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
@@ -305,10 +311,14 @@ static void PrintStackTrace(void *) {
 #endif
 }
 
+static void PrintStackTraceSignalHandler(void *) {
+  PrintStackTrace(stderr);
+}
+
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
 void llvm::sys::PrintStackTraceOnErrorSignal() {
-  AddSignalHandler(PrintStackTrace, 0);
+  AddSignalHandler(PrintStackTraceSignalHandler, 0);
 
 #if defined(__APPLE__)
   // Environment variable to disable any kind of crash dialog.
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
index 5cf5a9d44ed6..df8558bf8bed 100644
--- a/lib/Support/Unix/TimeValue.inc
+++ b/lib/Support/Unix/TimeValue.inc
@@ -48,7 +48,8 @@ TimeValue TimeValue::now() {
   }
 
   return TimeValue(
-    static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
+    static_cast<TimeValue::SecondsType>( the_time.tv_sec +
+      PosixZeroTimeSeconds ),
     static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
       NANOSECONDS_PER_MICROSECOND ) );
 }
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index 361f297d3642..051f56f96922 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -21,12 +21,12 @@
 
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/Errno.h"
-#include <cstdlib>
+#include <algorithm>
+#include <cerrno>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <cerrno>
 #include <string>
-#include <algorithm>
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc
new file mode 100644
index 000000000000..5d89c0e51b11
--- /dev/null
+++ b/lib/Support/Unix/Watchdog.inc
@@ -0,0 +1,32 @@
+//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+namespace llvm {
+  namespace sys {
+    Watchdog::Watchdog(unsigned int seconds) {
+#ifdef HAVE_UNISTD_H
+      alarm(seconds);
+#endif
+    }
+
+    Watchdog::~Watchdog() {
+#ifdef HAVE_UNISTD_H
+      alarm(0);
+#endif
+    }
+  }
+}
diff --git a/lib/Support/Watchdog.cpp b/lib/Support/Watchdog.cpp
new file mode 100644
index 000000000000..724aa001f16e
--- /dev/null
+++ b/lib/Support/Watchdog.cpp
@@ -0,0 +1,23 @@
+//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Watchdog.h"
+#include "llvm/Config/config.h"
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Watchdog.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Watchdog.inc"
+#endif
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index cb80f2817c02..4c5aebd5e71a 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -15,6 +15,8 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Process.h"
+
+// The Windows.h header must be the last one included.
 #include "Windows.h"
 
 namespace {
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 2280b3417145..f4898e619abf 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -17,8 +17,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
-#include <malloc.h>
 #include <cstdio>
+#include <malloc.h>
 
 // We need to undo a macro defined in Windows.h, otherwise we won't compile:
 #undef CopyFile
@@ -82,7 +82,7 @@ Path::isValid() const {
   pos = path.rfind(':',len);
   size_t rootslash = 0;
   if (pos != std::string::npos) {
-    if (pos != 1 || !isalpha(path[0]) || len < 3)
+    if (pos != 1 || !isalpha(static_cast<unsigned char>(path[0])) || len < 3)
       return false;
       rootslash = 2;
   }
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 3dfac66b77ce..23f3d14f91f0 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -328,7 +328,7 @@ error_code resize_file(const Twine &path, uint64_t size) {
                                   path_utf16))
     return ec;
 
-  int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+  int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE);
   if (fd == -1)
     return error_code(errno, generic_category());
 #ifdef HAVE__CHSIZE_S
@@ -593,6 +593,10 @@ retry_random_path:
   random_path_utf16.push_back(0);
   random_path_utf16.pop_back();
 
+  // Make sure we don't fall into an infinite loop by constantly trying
+  // to create the parent path.
+  bool TriedToCreateParent = false;
+
   // Try to create + open the path.
 retry_create_file:
   HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
@@ -610,7 +614,9 @@ retry_create_file:
     if (ec == windows_error::file_exists)
       goto retry_random_path;
     // Check for non-existing parent directories.
-    if (ec == windows_error::path_not_found) {
+    if (ec == windows_error::path_not_found && !TriedToCreateParent) {
+      TriedToCreateParent = true;
+
       // Create the directories using result_path as temp storage.
       if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
                                       random_path_utf16.size(), result_path))
@@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len,
   return error_code::success();
 }
 
-error_code mapped_file_region::init(int FD, uint64_t Offset) {
+error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
   FileDescriptor = FD;
   // Make sure that the requested size fits within SIZE_T.
   if (Size > std::numeric_limits<SIZE_T>::max()) {
-    if (FileDescriptor)
-      _close(FileDescriptor);
-    else
+    if (FileDescriptor) {
+      if (CloseFD)
+        _close(FileDescriptor);
+    } else
       ::CloseHandle(FileHandle);
     return make_error_code(errc::invalid_argument);
   }
@@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
                                           0);
   if (FileMappingHandle == NULL) {
     error_code ec = windows_error(GetLastError());
-    if (FileDescriptor)
-      _close(FileDescriptor);
-    else
+    if (FileDescriptor) {
+      if (CloseFD)
+        _close(FileDescriptor);
+    } else
       ::CloseHandle(FileHandle);
     return ec;
   }
@@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
   if (Mapping == NULL) {
     error_code ec = windows_error(GetLastError());
     ::CloseHandle(FileMappingHandle);
-    if (FileDescriptor)
-      _close(FileDescriptor);
-    else
+    if (FileDescriptor) {
+      if (CloseFD)
+        _close(FileDescriptor);
+    } else
       ::CloseHandle(FileHandle);
     return ec;
   }
@@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) {
       error_code ec = windows_error(GetLastError());
       ::UnmapViewOfFile(Mapping);
       ::CloseHandle(FileMappingHandle);
-      if (FileDescriptor)
-        _close(FileDescriptor);
-      else
+      if (FileDescriptor) {
+        if (CloseFD)
+          _close(FileDescriptor);
+      } else
         ::CloseHandle(FileHandle);
       return ec;
     }
     Size = mbi.RegionSize;
   }
+
+  // Close all the handles except for the view. It will keep the other handles
+  // alive.
+  ::CloseHandle(FileMappingHandle);
+  if (FileDescriptor) {
+    if (CloseFD)
+      _close(FileDescriptor); // Also closes FileHandle.
+  } else
+    ::CloseHandle(FileHandle);
   return error_code::success();
 }
 
@@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
   }
 
   FileDescriptor = 0;
-  ec = init(FileDescriptor, offset);
+  ec = init(FileDescriptor, true, offset);
   if (ec) {
     Mapping = FileMappingHandle = 0;
     FileHandle = INVALID_HANDLE_VALUE;
@@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
 }
 
 mapped_file_region::mapped_file_region(int fd,
+                                       bool closefd,
                                        mapmode mode,
                                        uint64_t length,
                                        uint64_t offset,
@@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd,
   , FileMappingHandle() {
   FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
   if (FileHandle == INVALID_HANDLE_VALUE) {
-    _close(FileDescriptor);
+    if (closefd)
+      _close(FileDescriptor);
     FileDescriptor = 0;
     ec = make_error_code(errc::bad_file_descriptor);
     return;
   }
 
-  ec = init(FileDescriptor, offset);
+  ec = init(FileDescriptor, closefd, offset);
   if (ec) {
     Mapping = FileMappingHandle = 0;
     FileHandle = INVALID_HANDLE_VALUE;
@@ -853,15 +874,9 @@ mapped_file_region::mapped_file_region(int fd,
 mapped_file_region::~mapped_file_region() {
   if (Mapping)
     ::UnmapViewOfFile(Mapping);
-  if (FileMappingHandle)
-    ::CloseHandle(FileMappingHandle);
-  if (FileDescriptor)
-    _close(FileDescriptor);
-  else if (FileHandle != INVALID_HANDLE_VALUE)
-    ::CloseHandle(FileHandle);
 }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 mapped_file_region::mapped_file_region(mapped_file_region &&other)
   : Mode(other.Mode)
   , Size(other.Size)
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index e29eb6dff6d7..ad9412852f10 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
-#include <psapi.h>
-#include <malloc.h>
-#include <io.h>
 #include <direct.h>
+#include <io.h>
+#include <malloc.h>
+#include <psapi.h>
 
 #ifdef __MINGW32__
  #if (HAVE_LIBPSAPI != 1)
@@ -35,13 +35,47 @@
 #  define _HEAPOK (-2)
 #endif
 
-namespace llvm {
+using namespace llvm;
 using namespace sys;
 
+
+process::id_type self_process::get_id() {
+  return GetCurrentProcess();
+}
+
+static TimeValue getTimeValueFromFILETIME(FILETIME Time) {
+  ULARGE_INTEGER TimeInteger;
+  TimeInteger.LowPart = Time.dwLowDateTime;
+  TimeInteger.HighPart = Time.dwHighDateTime;
+
+  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+  return TimeValue(
+      static_cast<TimeValue::SecondsType>(TimeInteger.QuadPart / 10000000),
+      static_cast<TimeValue::NanoSecondsType>(
+          (TimeInteger.QuadPart % 10000000) * 100));
+}
+
+TimeValue self_process::get_user_time() const {
+  FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
+  if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
+                      &UserTime) == 0)
+    return TimeValue();
+
+  return getTimeValueFromFILETIME(UserTime);
+}
+
+TimeValue self_process::get_system_time() const {
+  FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
+  if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
+                      &UserTime) == 0)
+    return TimeValue();
+
+  return getTimeValueFromFILETIME(KernelTime);
+}
+
 // This function retrieves the page size using GetSystemInfo and is present
-// solely so it can be called once in Process::GetPageSize to initialize the
-// static variable PageSize.
-inline unsigned GetPageSizeOnce() {
+// solely so it can be called once to initialize the self_process member below.
+static unsigned getPageSize() {
   // NOTE: A 32-bit application running under WOW64 is supposed to use
   // GetNativeSystemInfo.  However, this interface is not present prior
   // to Windows XP so to use it requires dynamic linking.  It is not clear
@@ -52,12 +86,12 @@ inline unsigned GetPageSizeOnce() {
   return static_cast<unsigned>(info.dwPageSize);
 }
 
-unsigned
-Process::GetPageSize() {
-  static const unsigned PageSize = GetPageSizeOnce();
-  return PageSize;
+// This constructor guaranteed to be run exactly once on a single thread, and
+// sets up various process invariants that can be queried cheaply from then on.
+self_process::self_process() : PageSize(getPageSize()) {
 }
 
+
 size_t
 Process::GetMallocUsage()
 {
@@ -72,30 +106,17 @@ Process::GetMallocUsage()
   return size;
 }
 
-size_t
-Process::GetTotalMemoryUsage()
-{
-  PROCESS_MEMORY_COUNTERS pmc;
-  GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
-  return pmc.PagefileUsage;
-}
-
-void
-Process::GetTimeUsage(
-  TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
-{
+void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
+                           TimeValue &sys_time) {
   elapsed = TimeValue::now();
 
-  uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
-  GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
-                  (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
-                  (FILETIME*)&UserTime);
+  FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
+  if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
+                      &UserTime) == 0)
+    return;
 
-  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
-  user_time.seconds( UserTime / 10000000 );
-  user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
-  sys_time.seconds( KernelTime / 10000000 );
-  sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
+  user_time = getTimeValueFromFILETIME(UserTime);
+  sys_time = getTimeValueFromFILETIME(KernelTime);
 }
 
 int Process::GetCurrentUserId()
@@ -255,5 +276,3 @@ const char *Process::ResetColor() {
   SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
   return 0;
 }
-
-}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 80ccaa6ea6b1..691d6d455501 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -13,9 +13,9 @@
 
 #include "Windows.h"
 #include <cstdio>
-#include <malloc.h>
-#include <io.h>
 #include <fcntl.h>
+#include <io.h>
+#include <malloc.h>
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only Win32 specific code
@@ -43,11 +43,6 @@ Program::~Program() {
   }
 }
 
-unsigned Program::GetPid() const {
-  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-  return wpi->dwProcessId;
-}
-
 // This function just uses the PATH environment variable to find the program.
 Path
 Program::FindProgramByName(const std::string& progName) {
@@ -380,23 +375,6 @@ Program::Wait(const Path &path,
   return 1;
 }
 
-bool
-Program::Kill(std::string* ErrMsg) {
-  if (Data_ == 0) {
-    MakeErrMsg(ErrMsg, "Process not started!");
-    return true;
-  }
-
-  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
-  HANDLE hProcess = wpi->hProcess;
-  if (TerminateProcess(hProcess, 1) == 0) {
-    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
-    return true;
-  }
-
-  return false;
-}
-
 error_code Program::ChangeStdinToBinary(){
   int result = _setmode( _fileno(stdin), _O_BINARY );
   if (result == -1)
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 38308f6abd85..3dd6660b031d 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
+#include <algorithm>
 #include <stdio.h>
 #include <vector>
-#include <algorithm>
 
 #ifdef __MINGW32__
  #include <imagehlp.h>
@@ -295,6 +295,10 @@ void sys::PrintStackTraceOnErrorSignal() {
   LeaveCriticalSection(&CriticalSection);
 }
 
+void llvm::sys::PrintStackTrace(FILE *) {
+  // FIXME: Implement.
+}
+
 
 void sys::SetInterruptFunction(void (*IF)()) {
   RegisterHandler();
diff --git a/lib/Support/Windows/Watchdog.inc b/lib/Support/Windows/Watchdog.inc
new file mode 100644
index 000000000000..fab2bdf2a941
--- /dev/null
+++ b/lib/Support/Windows/Watchdog.inc
@@ -0,0 +1,24 @@
+//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Windows implementation of the Watchdog class.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: implement.
+// Currently this is only used by PrettyStackTrace which is also unimplemented
+// on Windows. Roughly, a Windows implementation would use CreateWaitableTimer
+// and a second thread to run the TimerAPCProc.
+
+namespace llvm {
+  namespace sys {
+    Watchdog::Watchdog(unsigned int seconds) {}
+    Watchdog::~Watchdog() {}
+  }
+}
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 34df636a72a0..2cead20c0b21 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -12,16 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/YAMLParser.h"
-
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace yaml;
@@ -252,6 +251,7 @@ namespace yaml {
 class Scanner {
 public:
   Scanner(const StringRef Input, SourceMgr &SM);
+  Scanner(MemoryBuffer *Buffer, SourceMgr &SM_);
 
   /// @brief Parse the next token and return it without popping it.
   Token &peekNext();
@@ -708,6 +708,21 @@ Scanner::Scanner(StringRef Input, SourceMgr &sm)
   End = InputBuffer->getBufferEnd();
 }
 
+Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_)
+  : SM(SM_)
+  , InputBuffer(Buffer)
+  , Current(InputBuffer->getBufferStart())
+  , End(InputBuffer->getBufferEnd())
+  , Indent(-1)
+  , Column(0)
+  , Line(0)
+  , FlowLevel(0)
+  , IsStartOfStream(true)
+  , IsSimpleKeyAllowed(true)
+  , Failed(false) {
+    SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+}
+
 Token &Scanner::peekNext() {
   // If the current token is a possible simple key, keep parsing until we
   // can confirm.
@@ -1532,6 +1547,10 @@ Stream::Stream(StringRef Input, SourceMgr &SM)
   : scanner(new Scanner(Input, SM))
   , CurrentDoc(0) {}
 
+Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM)
+  : scanner(new Scanner(InputBuffer, SM))
+  , CurrentDoc(0) {}
+
 Stream::~Stream() {}
 
 bool Stream::failed() { return scanner->failed(); }
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
new file mode 100644
index 000000000000..9da2aa7c841d
--- /dev/null
+++ b/lib/Support/YAMLTraits.cpp
@@ -0,0 +1,827 @@
+//===- lib/Support/YAMLTraits.cpp -----------------------------------------===//
+//
+//                             The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+using namespace llvm;
+using namespace yaml;
+
+//===----------------------------------------------------------------------===//
+//  IO
+//===----------------------------------------------------------------------===//
+
+IO::IO(void *Context) : Ctxt(Context) {
+}
+
+IO::~IO() {
+}
+
+void *IO::getContext() {
+  return Ctxt;
+}
+
+void IO::setContext(void *Context) {
+  Ctxt = Context;
+}
+
+//===----------------------------------------------------------------------===//
+//  Input
+//===----------------------------------------------------------------------===//
+
+Input::Input(StringRef InputContent, void *Ctxt) 
+  : IO(Ctxt), 
+    Strm(new Stream(InputContent, SrcMgr)),
+    CurrentNode(NULL) {
+  DocIterator = Strm->begin();
+}
+
+Input::~Input() {
+  
+}
+
+error_code Input::error() {
+  return EC;
+}
+
+void Input::setDiagHandler(SourceMgr::DiagHandlerTy Handler, void *Ctxt) {
+  SrcMgr.setDiagHandler(Handler, Ctxt);
+}
+
+bool Input::outputting() {
+  return false;
+}
+
+bool Input::setCurrentDocument() {
+  if (DocIterator != Strm->end()) {
+    Node *N = DocIterator->getRoot();
+    if (isa<NullNode>(N)) {
+      // Empty files are allowed and ignored
+      ++DocIterator;
+      return setCurrentDocument();
+    }
+    TopNode.reset(this->createHNodes(N));
+    CurrentNode = TopNode.get();
+    return true;
+  }
+  return false;
+}
+
+void Input::nextDocument() {
+  ++DocIterator;
+}
+
+void Input::beginMapping() {
+  if (EC)
+    return;
+  MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+  if (MN) {
+    MN->ValidKeys.clear();
+  }
+}
+
+bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
+                         void *&SaveInfo) {
+  UseDefault = false;
+  if (EC)
+    return false;
+  MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+  if (!MN) {
+    setError(CurrentNode, "not a mapping");
+    return false;
+  }
+  MN->ValidKeys.push_back(Key);
+  HNode *Value = MN->Mapping[Key];
+  if (!Value) {
+    if (Required)
+      setError(CurrentNode, Twine("missing required key '") + Key + "'");
+    else
+      UseDefault = true;
+    return false;
+  }
+  SaveInfo = CurrentNode;
+  CurrentNode = Value;
+  return true;
+}
+
+void Input::postflightKey(void *saveInfo) {
+  CurrentNode = reinterpret_cast<HNode *>(saveInfo);
+}
+
+void Input::endMapping() {
+  if (EC)
+    return;
+  MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
+  if (!MN)
+    return;
+  for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(),
+       End = MN->Mapping.end(); i != End; ++i) {
+    if (!MN->isValidKey(i->first)) {
+      setError(i->second, Twine("unknown key '") + i->first + "'");
+      break;
+    }
+  }
+}
+
+unsigned Input::beginSequence() {
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    return SQ->Entries.size();
+  }
+  return 0;
+}
+
+void Input::endSequence() {
+}
+
+bool Input::preflightElement(unsigned Index, void *&SaveInfo) {
+  if (EC)
+    return false;
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    SaveInfo = CurrentNode;
+    CurrentNode = SQ->Entries[Index];
+    return true;
+  }
+  return false;
+}
+
+void Input::postflightElement(void *SaveInfo) {
+  CurrentNode = reinterpret_cast<HNode *>(SaveInfo);
+}
+
+unsigned Input::beginFlowSequence() {
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    return SQ->Entries.size();
+  }
+  return 0;
+}
+
+bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) {
+  if (EC)
+    return false;
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    SaveInfo = CurrentNode;
+    CurrentNode = SQ->Entries[index];
+    return true;
+  }
+  return false;
+}
+
+void Input::postflightFlowElement(void *SaveInfo) {
+  CurrentNode = reinterpret_cast<HNode *>(SaveInfo);
+}
+
+void Input::endFlowSequence() {
+}
+
+void Input::beginEnumScalar() {
+  ScalarMatchFound = false;
+}
+
+bool Input::matchEnumScalar(const char *Str, bool) {
+  if (ScalarMatchFound)
+    return false;
+  if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
+    if (SN->value().equals(Str)) {
+      ScalarMatchFound = true;
+      return true;
+    }
+  }
+  return false;
+}
+
+void Input::endEnumScalar() {
+  if (!ScalarMatchFound) {
+    setError(CurrentNode, "unknown enumerated scalar");
+  }
+}
+
+bool Input::beginBitSetScalar(bool &DoClear) {
+  BitValuesUsed.clear();
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    BitValuesUsed.insert(BitValuesUsed.begin(), SQ->Entries.size(), false);
+  } else {
+    setError(CurrentNode, "expected sequence of bit values");
+  }
+  DoClear = true;
+  return true;
+}
+
+bool Input::bitSetMatch(const char *Str, bool) {
+  if (EC)
+    return false;
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    unsigned Index = 0;
+    for (std::vector<HNode *>::iterator i = SQ->Entries.begin(),
+         End = SQ->Entries.end(); i != End; ++i) {
+      if (ScalarHNode *SN = dyn_cast<ScalarHNode>(*i)) {
+        if (SN->value().equals(Str)) {
+          BitValuesUsed[Index] = true;
+          return true;
+        }
+      } else {
+        setError(CurrentNode, "unexpected scalar in sequence of bit values");
+      }
+      ++Index;
+    }
+  } else {
+    setError(CurrentNode, "expected sequence of bit values");
+  }
+  return false;
+}
+
+void Input::endBitSetScalar() {
+  if (EC)
+    return;
+  if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+    assert(BitValuesUsed.size() == SQ->Entries.size());
+    for (unsigned i = 0; i < SQ->Entries.size(); ++i) {
+      if (!BitValuesUsed[i]) {
+        setError(SQ->Entries[i], "unknown bit value");
+        return;
+      }
+    }
+  }
+}
+
+void Input::scalarString(StringRef &S) {
+  if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
+    S = SN->value();
+  } else {
+    setError(CurrentNode, "unexpected scalar");
+  }
+}
+
+void Input::setError(HNode *hnode, const Twine &message) {
+  this->setError(hnode->_node, message);
+}
+
+void Input::setError(Node *node, const Twine &message) {
+  Strm->printError(node, message);
+  EC = make_error_code(errc::invalid_argument);
+}
+
+Input::HNode *Input::createHNodes(Node *N) {
+  SmallString<128> StringStorage;
+  if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) {
+    StringRef KeyStr = SN->getValue(StringStorage);
+    if (!StringStorage.empty()) {
+      // Copy string to permanent storage
+      unsigned Len = StringStorage.size();
+      char *Buf = StringAllocator.Allocate<char>(Len);
+      memcpy(Buf, &StringStorage[0], Len);
+      KeyStr = StringRef(Buf, Len);
+    }
+    return new ScalarHNode(N, KeyStr);
+  } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
+    SequenceHNode *SQHNode = new SequenceHNode(N);
+    for (SequenceNode::iterator i = SQ->begin(), End = SQ->end(); i != End;
+         ++i) {
+      HNode *Entry = this->createHNodes(i);
+      if (EC)
+        break;
+      SQHNode->Entries.push_back(Entry);
+    }
+    return SQHNode;
+  } else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
+    MapHNode *mapHNode = new MapHNode(N);
+    for (MappingNode::iterator i = Map->begin(), End = Map->end(); i != End;
+         ++i) {
+      ScalarNode *KeyScalar = dyn_cast<ScalarNode>(i->getKey());
+      StringStorage.clear();
+      StringRef KeyStr = KeyScalar->getValue(StringStorage);
+      if (!StringStorage.empty()) {
+        // Copy string to permanent storage
+        unsigned Len = StringStorage.size();
+        char *Buf = StringAllocator.Allocate<char>(Len);
+        memcpy(Buf, &StringStorage[0], Len);
+        KeyStr = StringRef(Buf, Len);
+      }
+      HNode *ValueHNode = this->createHNodes(i->getValue());
+      if (EC)
+        break;
+      mapHNode->Mapping[KeyStr] = ValueHNode;
+    }
+    return mapHNode;
+  } else if (isa<NullNode>(N)) {
+    return new EmptyHNode(N);
+  } else {
+    setError(N, "unknown node kind");
+    return NULL;
+  }
+}
+
+bool Input::MapHNode::isValidKey(StringRef Key) {
+  for (SmallVector<const char *, 6>::iterator i = ValidKeys.begin(),
+       End = ValidKeys.end(); i != End; ++i) {
+    if (Key.equals(*i))
+      return true;
+  }
+  return false;
+}
+
+void Input::setError(const Twine &Message) {
+  this->setError(CurrentNode, Message);
+}
+
+Input::MapHNode::~MapHNode() {
+  for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end();
+                                                                i != End; ++i) {
+    delete i->second;
+  }
+}
+
+Input::SequenceHNode::~SequenceHNode() {
+  for (std::vector<HNode*>::iterator i = Entries.begin(), End = Entries.end();
+                                                                i != End; ++i) {
+    delete *i;
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//  Output
+//===----------------------------------------------------------------------===//
+
+Output::Output(raw_ostream &yout, void *context)
+    : IO(context),
+      Out(yout),
+      Column(0),
+      ColumnAtFlowStart(0),
+      NeedBitValueComma(false),
+      NeedFlowSequenceComma(false),
+      EnumerationMatchFound(false),
+      NeedsNewLine(false) {
+}
+
+Output::~Output() {
+}
+
+bool Output::outputting() {
+  return true;
+}
+
+void Output::beginMapping() {
+  StateStack.push_back(inMapFirstKey);
+  NeedsNewLine = true;
+}
+
+void Output::endMapping() {
+  StateStack.pop_back();
+}
+
+bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
+                          bool &UseDefault, void *&) {
+  UseDefault = false;
+  if (Required || !SameAsDefault) {
+    this->newLineCheck();
+    this->paddedKey(Key);
+    return true;
+  }
+  return false;
+}
+
+void Output::postflightKey(void *) {
+  if (StateStack.back() == inMapFirstKey) {
+    StateStack.pop_back();
+    StateStack.push_back(inMapOtherKey);
+  }
+}
+
+void Output::beginDocuments() {
+  this->outputUpToEndOfLine("---");
+}
+
+bool Output::preflightDocument(unsigned index) {
+  if (index > 0)
+    this->outputUpToEndOfLine("\n---");
+  return true;
+}
+
+void Output::postflightDocument() {
+}
+
+void Output::endDocuments() {
+  output("\n...\n");
+}
+
+unsigned Output::beginSequence() {
+  StateStack.push_back(inSeq);
+  NeedsNewLine = true;
+  return 0;
+}
+
+void Output::endSequence() {
+  StateStack.pop_back();
+}
+
+bool Output::preflightElement(unsigned, void *&) {
+  return true;
+}
+
+void Output::postflightElement(void *) {
+}
+
+unsigned Output::beginFlowSequence() {
+  StateStack.push_back(inFlowSeq);
+  this->newLineCheck();
+  ColumnAtFlowStart = Column;
+  output("[ ");
+  NeedFlowSequenceComma = false;
+  return 0;
+}
+
+void Output::endFlowSequence() {
+  StateStack.pop_back();
+  this->outputUpToEndOfLine(" ]");
+}
+
+bool Output::preflightFlowElement(unsigned, void *&) {
+  if (NeedFlowSequenceComma)
+    output(", ");
+  if (Column > 70) {
+    output("\n");
+    for (int i = 0; i < ColumnAtFlowStart; ++i)
+      output(" ");
+    Column = ColumnAtFlowStart;
+    output("  ");
+  }
+  return true;
+}
+
+void Output::postflightFlowElement(void *) {
+  NeedFlowSequenceComma = true;
+}
+
+void Output::beginEnumScalar() {
+  EnumerationMatchFound = false;
+}
+
+bool Output::matchEnumScalar(const char *Str, bool Match) {
+  if (Match && !EnumerationMatchFound) {
+    this->newLineCheck();
+    this->outputUpToEndOfLine(Str);
+    EnumerationMatchFound = true;
+  }
+  return false;
+}
+
+void Output::endEnumScalar() {
+  if (!EnumerationMatchFound)
+    llvm_unreachable("bad runtime enum value");
+}
+
+bool Output::beginBitSetScalar(bool &DoClear) {
+  this->newLineCheck();
+  output("[ ");
+  NeedBitValueComma = false;
+  DoClear = false;
+  return true;
+}
+
+bool Output::bitSetMatch(const char *Str, bool Matches) {
+  if (Matches) {
+    if (NeedBitValueComma)
+      output(", ");
+    this->output(Str);
+    NeedBitValueComma = true;
+  }
+  return false;
+}
+
+void Output::endBitSetScalar() {
+  this->outputUpToEndOfLine(" ]");
+}
+
+void Output::scalarString(StringRef &S) {
+  this->newLineCheck();
+  if (S.find('\n') == StringRef::npos) {
+    // No embedded new-line chars, just print string.
+    this->outputUpToEndOfLine(S);
+    return;
+  }
+  unsigned i = 0;
+  unsigned j = 0;
+  unsigned End = S.size();
+  output("'"); // Starting single quote.
+  const char *Base = S.data();
+  while (j < End) {
+    // Escape a single quote by doubling it.
+    if (S[j] == '\'') {
+      output(StringRef(&Base[i], j - i + 1));
+      output("'");
+      i = j + 1;
+    }
+    ++j;
+  }
+  output(StringRef(&Base[i], j - i));
+  this->outputUpToEndOfLine("'"); // Ending single quote.
+}
+
+void Output::setError(const Twine &message) {
+}
+
+void Output::output(StringRef s) {
+  Column += s.size();
+  Out << s;
+}
+
+void Output::outputUpToEndOfLine(StringRef s) {
+  this->output(s);
+  if (StateStack.empty() || StateStack.back() != inFlowSeq)
+    NeedsNewLine = true;
+}
+
+void Output::outputNewLine() {
+  Out << "\n";
+  Column = 0;
+}
+
+// if seq at top, indent as if map, then add "- "
+// if seq in middle, use "- " if firstKey, else use "  "
+//
+
+void Output::newLineCheck() {
+  if (!NeedsNewLine)
+    return;
+  NeedsNewLine = false;
+
+  this->outputNewLine();
+
+  assert(StateStack.size() > 0);
+  unsigned Indent = StateStack.size() - 1;
+  bool OutputDash = false;
+
+  if (StateStack.back() == inSeq) {
+    OutputDash = true;
+  } else if ((StateStack.size() > 1) && (StateStack.back() == inMapFirstKey) &&
+             (StateStack[StateStack.size() - 2] == inSeq)) {
+    --Indent;
+    OutputDash = true;
+  }
+
+  for (unsigned i = 0; i < Indent; ++i) {
+    output("  ");
+  }
+  if (OutputDash) {
+    output("- ");
+  }
+
+}
+
+void Output::paddedKey(StringRef key) {
+  output(key);
+  output(":");
+  const char *spaces = "                ";
+  if (key.size() < strlen(spaces))
+    output(&spaces[key.size()]);
+  else
+    output(" ");
+}
+
+//===----------------------------------------------------------------------===//
+//  traits for built-in types
+//===----------------------------------------------------------------------===//
+
+void ScalarTraits<bool>::output(const bool &Val, void *, raw_ostream &Out) {
+  Out << (Val ? "true" : "false");
+}
+
+StringRef ScalarTraits<bool>::input(StringRef Scalar, void *, bool &Val) {
+  if (Scalar.equals("true")) {
+    Val = true;
+    return StringRef();
+  } else if (Scalar.equals("false")) {
+    Val = false;
+    return StringRef();
+  }
+  return "invalid boolean";
+}
+
+void ScalarTraits<StringRef>::output(const StringRef &Val, void *,
+                                     raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<StringRef>::input(StringRef Scalar, void *,
+                                         StringRef &Val) {
+  Val = Scalar;
+  return StringRef();
+}
+
+void ScalarTraits<uint8_t>::output(const uint8_t &Val, void *,
+                                   raw_ostream &Out) {
+  // use temp uin32_t because ostream thinks uint8_t is a character
+  uint32_t Num = Val;
+  Out << Num;
+}
+
+StringRef ScalarTraits<uint8_t>::input(StringRef Scalar, void *, uint8_t &Val) {
+  unsigned long long n;
+  if (getAsUnsignedInteger(Scalar, 0, n))
+    return "invalid number";
+  if (n > 0xFF)
+    return "out of range number";
+  Val = n;
+  return StringRef();
+}
+
+void ScalarTraits<uint16_t>::output(const uint16_t &Val, void *,
+                                    raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<uint16_t>::input(StringRef Scalar, void *,
+                                        uint16_t &Val) {
+  unsigned long long n;
+  if (getAsUnsignedInteger(Scalar, 0, n))
+    return "invalid number";
+  if (n > 0xFFFF)
+    return "out of range number";
+  Val = n;
+  return StringRef();
+}
+
+void ScalarTraits<uint32_t>::output(const uint32_t &Val, void *,
+                                    raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<uint32_t>::input(StringRef Scalar, void *,
+                                        uint32_t &Val) {
+  unsigned long long n;
+  if (getAsUnsignedInteger(Scalar, 0, n))
+    return "invalid number";
+  if (n > 0xFFFFFFFFUL)
+    return "out of range number";
+  Val = n;
+  return StringRef();
+}
+
+void ScalarTraits<uint64_t>::output(const uint64_t &Val, void *,
+                                    raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<uint64_t>::input(StringRef Scalar, void *,
+                                        uint64_t &Val) {
+  unsigned long long N;
+  if (getAsUnsignedInteger(Scalar, 0, N))
+    return "invalid number";
+  Val = N;
+  return StringRef();
+}
+
+void ScalarTraits<int8_t>::output(const int8_t &Val, void *, raw_ostream &Out) {
+  // use temp in32_t because ostream thinks int8_t is a character
+  int32_t Num = Val;
+  Out << Num;
+}
+
+StringRef ScalarTraits<int8_t>::input(StringRef Scalar, void *, int8_t &Val) {
+  long long N;
+  if (getAsSignedInteger(Scalar, 0, N))
+    return "invalid number";
+  if ((N > 127) || (N < -128))
+    return "out of range number";
+  Val = N;
+  return StringRef();
+}
+
+void ScalarTraits<int16_t>::output(const int16_t &Val, void *,
+                                   raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<int16_t>::input(StringRef Scalar, void *, int16_t &Val) {
+  long long N;
+  if (getAsSignedInteger(Scalar, 0, N))
+    return "invalid number";
+  if ((N > INT16_MAX) || (N < INT16_MIN))
+    return "out of range number";
+  Val = N;
+  return StringRef();
+}
+
+void ScalarTraits<int32_t>::output(const int32_t &Val, void *,
+                                   raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<int32_t>::input(StringRef Scalar, void *, int32_t &Val) {
+  long long N;
+  if (getAsSignedInteger(Scalar, 0, N))
+    return "invalid number";
+  if ((N > INT32_MAX) || (N < INT32_MIN))
+    return "out of range number";
+  Val = N;
+  return StringRef();
+}
+
+void ScalarTraits<int64_t>::output(const int64_t &Val, void *,
+                                   raw_ostream &Out) {
+  Out << Val;
+}
+
+StringRef ScalarTraits<int64_t>::input(StringRef Scalar, void *, int64_t &Val) {
+  long long N;
+  if (getAsSignedInteger(Scalar, 0, N))
+    return "invalid number";
+  Val = N;
+  return StringRef();
+}
+
+void ScalarTraits<double>::output(const double &Val, void *, raw_ostream &Out) {
+  Out << format("%g", Val);
+}
+
+StringRef ScalarTraits<double>::input(StringRef Scalar, void *, double &Val) {
+  SmallString<32> buff(Scalar.begin(), Scalar.end());
+  char *end;
+  Val = strtod(buff.c_str(), &end);
+  if (*end != '\0')
+    return "invalid floating point number";
+  return StringRef();
+}
+
+void ScalarTraits<float>::output(const float &Val, void *, raw_ostream &Out) {
+  Out << format("%g", Val);
+}
+
+StringRef ScalarTraits<float>::input(StringRef Scalar, void *, float &Val) {
+  SmallString<32> buff(Scalar.begin(), Scalar.end());
+  char *end;
+  Val = strtod(buff.c_str(), &end);
+  if (*end != '\0')
+    return "invalid floating point number";
+  return StringRef();
+}
+
+void ScalarTraits<Hex8>::output(const Hex8 &Val, void *, raw_ostream &Out) {
+  uint8_t Num = Val;
+  Out << format("0x%02X", Num);
+}
+
+StringRef ScalarTraits<Hex8>::input(StringRef Scalar, void *, Hex8 &Val) {
+  unsigned long long n;
+  if (getAsUnsignedInteger(Scalar, 0, n))
+    return "invalid hex8 number";
+  if (n > 0xFF)
+    return "out of range hex8 number";
+  Val = n;
+  return StringRef();
+}
+
+void ScalarTraits<Hex16>::output(const Hex16 &Val, void *, raw_ostream &Out) {
+  uint16_t Num = Val;
+  Out << format("0x%04X", Num);
+}
+
+StringRef ScalarTraits<Hex16>::input(StringRef Scalar, void *, Hex16 &Val) {
+  unsigned long long n;
+  if (getAsUnsignedInteger(Scalar, 0, n))
+    return "invalid hex16 number";
+  if (n > 0xFFFF)
+    return "out of range hex16 number";
+  Val = n;
+  return StringRef();
+}
+
+void ScalarTraits<Hex32>::output(const Hex32 &Val, void *, raw_ostream &Out) {
+  uint32_t Num = Val;
+  Out << format("0x%08X", Num);
+}
+
+StringRef ScalarTraits<Hex32>::input(StringRef Scalar, void *, Hex32 &Val) {
+  unsigned long long n;
+  if (getAsUnsignedInteger(Scalar, 0, n))
+    return "invalid hex32 number";
+  if (n > 0xFFFFFFFFUL)
+    return "out of range hex32 number";
+  Val = n;
+  return StringRef();
+}
+
+void ScalarTraits<Hex64>::output(const Hex64 &Val, void *, raw_ostream &Out) {
+  uint64_t Num = Val;
+  Out << format("0x%016llX", Num);
+}
+
+StringRef ScalarTraits<Hex64>::input(StringRef Scalar, void *, Hex64 &Val) {
+  unsigned long long Num;
+  if (getAsUnsignedInteger(Scalar, 0, Num))
+    return "invalid hex64 number";
+  Val = Num;
+  return StringRef();
+}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 7cd53648da35..a433088b1930 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/Process.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/STLExtras.h"
 #include <cctype>
 #include <cerrno>
 #include <sys/stat.h>
@@ -241,7 +241,8 @@ raw_ostream &raw_ostream::operator<<(double N) {
       if (cs == '+' || cs == '-') {
         int c1 = buf[len - 2];
         int c0 = buf[len - 1];
-        if (isdigit(c1) && isdigit(c0)) {
+        if (isdigit(static_cast<unsigned char>(c1)) &&
+            isdigit(static_cast<unsigned char>(c0))) {
           // Trim leading '0': "...e+012" -> "...e+12\0"
           buf[len - 3] = c1;
           buf[len - 2] = c0;
@@ -305,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
     if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
       size_t BytesToWrite = Size - (Size % NumBytes);
       write_impl(Ptr, BytesToWrite);
-      copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+      size_t BytesRemaining = Size - BytesToWrite;
+      if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
+        // Too much left over to copy into our buffer.
+        return write(Ptr + BytesToWrite, BytesRemaining);
+      }
+      copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
       return *this;
     }
 
@@ -511,7 +517,7 @@ raw_fd_ostream::~raw_fd_ostream() {
   // has_error() and clear the error flag with clear_error() before
   // destructing raw_ostream objects which may have errors.
   if (has_error())
-    report_fatal_error("IO failure on output stream.");
+    report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false);
 }
 
 
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index 46c91a9c497c..74d9186aaaa2 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -303,6 +303,7 @@ p_ere_exp(struct parse *p)
 	sopno pos;
 	int count;
 	int count2;
+	int backrefnum;
 	sopno subno;
 	int wascaret = 0;
 
@@ -370,7 +371,34 @@ p_ere_exp(struct parse *p)
 	case '\\':
 		REQUIRE(MORE(), REG_EESCAPE);
 		c = GETNEXT();
-		ordinary(p, c);
+		if (c >= '1' && c <= '9') {
+			/* \[0-9] is taken to be a back-reference to a previously specified
+			 * matching group. backrefnum will hold the number. The matching
+			 * group must exist (i.e. if \4 is found there must have been at
+			 * least 4 matching groups specified in the pattern previously).
+			 */
+			backrefnum = c - '0';
+			if (p->pend[backrefnum] == 0) {
+				SETERROR(REG_ESUBREG);
+				break;
+			}
+
+			/* Make sure everything checks out and emit the sequence
+			 * that marks a back-reference to the parse structure.
+			 */
+			assert(backrefnum <= p->g->nsub);
+			EMIT(OBACK_, backrefnum);
+			assert(p->pbegin[backrefnum] != 0);
+			assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN);
+			assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN);
+			(void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]);
+			EMIT(O_BACK, backrefnum);
+			p->g->backrefs = 1;
+		} else {
+			/* Other chars are simply themselves when escaped with a backslash.
+			 */
+			ordinary(p, c);
+		}
 		break;
 	case '{':		/* okay as ordinary except if digit follows */
 		REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
diff --git a/lib/Support/system_error.cpp b/lib/Support/system_error.cpp
index 2df223ca718a..b22745afc330 100644
--- a/lib/Support/system_error.cpp
+++ b/lib/Support/system_error.cpp
@@ -13,8 +13,8 @@
 
 #include "llvm/Support/system_error.h"
 #include "llvm/Support/Errno.h"
-#include <string>
 #include <cstring>
+#include <string>
 
 namespace llvm {
 
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index 0bb86b0686a0..928b1203cd8f 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -15,15 +15,20 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <cstdlib>
 
 namespace llvm {
 
 SourceMgr SrcMgr;
+unsigned ErrorsPrinted = 0;
 
 static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind,
                          const Twine &Msg) {
+  // Count the total number of errors printed.
+  // This is used to exit with an error code if there were any errors.
+  if (Kind == SourceMgr::DK_Error)
+    ++ErrorsPrinted;
+
   SMLoc NullLoc;
   if (Loc.empty())
     Loc = NullLoc;
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index d0ca756016f2..dc4167b305ca 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -64,11 +64,11 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
     return 1;
   }
   DepOut.os() << OutputFilename << ":";
-  const std::vector<std::string> &Dependencies = Parser.getDependencies();
-  for (std::vector<std::string>::const_iterator I = Dependencies.begin(),
-                                                E = Dependencies.end();
+  const TGLexer::DependenciesMapTy &Dependencies = Parser.getDependencies();
+  for (TGLexer::DependenciesMapTy::const_iterator I = Dependencies.begin(),
+                                                  E = Dependencies.end();
        I != E; ++I) {
-    DepOut.os() << " " << (*I);
+    DepOut.os() << " " << I->first;
   }
   DepOut.os() << "\n";
   DepOut.keep();
@@ -117,11 +117,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   if (MainFn(Out.os(), Records))
     return 1;
 
+  if (ErrorsPrinted > 0) {
+    errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
+    return 1;
+  }
+
   // Declare success.
   Out.keep();
   return 0;
-
-  return 1;
 }
 
 }
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 11feb435421c..9ad20532d7eb 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Error.h"
 
 using namespace llvm;
 
@@ -95,15 +95,16 @@ ListRecTy *RecTy::getListTy() {
   return ListTy;
 }
 
+bool RecTy::baseClassOf(const RecTy *RHS) const{
+  assert (RHS && "NULL pointer");
+  return Kind == RHS->getRecTyKind();
+}
+
 Init *BitRecTy::convertValue(BitsInit *BI) {
   if (BI->getNumBits() != 1) return 0; // Only accept if just one bit!
   return BI->getBit(0);
 }
 
-bool BitRecTy::baseClassOf(const BitsRecTy *RHS) const {
-  return RHS->getNumBits() == 1;
-}
-
 Init *BitRecTy::convertValue(IntInit *II) {
   int64_t Val = II->getValue();
   if (Val != 0 && Val != 1) return 0;  // Only accept 0 or 1 for a bit!
@@ -118,6 +119,14 @@ Init *BitRecTy::convertValue(TypedInit *VI) {
   return 0;
 }
 
+bool BitRecTy::baseClassOf(const RecTy *RHS) const{
+  if(RecTy::baseClassOf(RHS) || getRecTyKind() == IntRecTyKind)
+    return true;
+  if(const BitsRecTy *BitsTy = dyn_cast<BitsRecTy>(RHS))
+    return BitsTy->getNumBits() == 1;
+  return false;
+}
+
 BitsRecTy *BitsRecTy::get(unsigned Sz) {
   static std::vector<BitsRecTy*> Shared;
   if (Sz >= Shared.size())
@@ -193,6 +202,13 @@ Init *BitsRecTy::convertValue(TypedInit *VI) {
   return 0;
 }
 
+bool BitsRecTy::baseClassOf(const RecTy *RHS) const{
+  if (RecTy::baseClassOf(RHS)) //argument and the receiver are the same type
+    return cast<BitsRecTy>(RHS)->Size == Size;
+  RecTyKind kind = RHS->getRecTyKind();
+  return (kind == BitRecTyKind && Size == 1) || (kind == IntRecTyKind);
+}
+
 Init *IntRecTy::convertValue(BitInit *BI) {
   return IntInit::get(BI->getValue());
 }
@@ -214,6 +230,11 @@ Init *IntRecTy::convertValue(TypedInit *TI) {
   return 0;
 }
 
+bool IntRecTy::baseClassOf(const RecTy *RHS) const{
+  RecTyKind kind = RHS->getRecTyKind();
+  return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
+}
+
 Init *StringRecTy::convertValue(UnOpInit *BO) {
   if (BO->getOpcode() == UnOpInit::CAST) {
     Init *L = BO->getOperand()->convertInitializerTo(this);
@@ -275,6 +296,12 @@ Init *ListRecTy::convertValue(TypedInit *TI) {
   return 0;
 }
 
+bool ListRecTy::baseClassOf(const RecTy *RHS) const{
+  if(const ListRecTy* ListTy = dyn_cast<ListRecTy>(RHS))
+    return ListTy->getElementType()->typeIsConvertibleTo(Ty);
+  return false;
+}
+
 Init *DagRecTy::convertValue(TypedInit *TI) {
   if (TI->getType()->typeIsConvertibleTo(this))
     return TI;
@@ -328,13 +355,17 @@ Init *RecordRecTy::convertValue(TypedInit *TI) {
   return 0;
 }
 
-bool RecordRecTy::baseClassOf(const RecordRecTy *RHS) const {
-  if (Rec == RHS->getRecord() || RHS->getRecord()->isSubClassOf(Rec))
+bool RecordRecTy::baseClassOf(const RecTy *RHS) const{
+  const RecordRecTy *RTy = dyn_cast<RecordRecTy>(RHS);
+  if (!RTy)
+    return false;
+
+  if (Rec == RTy->getRecord() || RTy->getRecord()->isSubClassOf(Rec))
     return true;
 
   const std::vector<Record*> &SC = Rec->getSuperClasses();
   for (unsigned i = 0, e = SC.size(); i != e; ++i)
-    if (RHS->getRecord()->isSubClassOf(SC[i]))
+    if (RTy->getRecord()->isSubClassOf(SC[i]))
       return true;
 
   return false;
@@ -904,6 +935,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
 
     break;
   }
+  case ADD:
   case SHL:
   case SRA:
   case SRL: {
@@ -914,6 +946,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       int64_t Result;
       switch (getOpcode()) {
       default: llvm_unreachable("Bad opcode!");
+      case ADD: Result = LHSv +  RHSv; break;
       case SHL: Result = LHSv << RHSv; break;
       case SRA: Result = LHSv >> RHSv; break;
       case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
@@ -939,6 +972,7 @@ std::string BinOpInit::getAsString() const {
   std::string Result;
   switch (Opc) {
   case CONCAT: Result = "!con"; break;
+  case ADD: Result = "!add"; break;
   case SHL: Result = "!shl"; break;
   case SRA: Result = "!sra"; break;
   case SRL: Result = "!srl"; break;
@@ -1491,11 +1525,9 @@ Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const {
   return const_cast<FieldInit *>(this);
 }
 
-void ProfileDagInit(FoldingSetNodeID &ID,
-                    Init *V,
-                    const std::string &VN,
-                    ArrayRef<Init *> ArgRange,
-                    ArrayRef<std::string> NameRange) {
+static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, const std::string &VN,
+                           ArrayRef<Init *> ArgRange,
+                           ArrayRef<std::string> NameRange) {
   ID.AddPointer(V);
   ID.AddString(VN);
 
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index ff322e74fba2..c6be4f8a1189 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -12,18 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGLexer.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
 #include <cctype>
+#include <cerrno>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cerrno>
-
-#include "llvm/Config/config.h" // for strtoull()/strtoll() define
 
 using namespace llvm;
 
@@ -310,7 +309,15 @@ bool TGLexer::LexInclude() {
     return true;
   }
   
-  Dependencies.push_back(IncludedFile);
+  DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
+  if (Found != Dependencies.end()) {
+    PrintError(getLoc(),
+               "File '" + IncludedFile + "' has already been included.");
+    SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
+                        "previously included here");
+    return true;
+  }
+  Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
   // Save the line number and lex buffer of the includer.
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
   CurPtr = CurBuf->getBufferStart();
@@ -463,6 +470,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
     .Case("head", tgtok::XHead)
     .Case("tail", tgtok::XTail)
     .Case("con", tgtok::XConcat)
+    .Case("add", tgtok::XADD)
     .Case("shl", tgtok::XSHL)
     .Case("sra", tgtok::XSRA)
     .Case("srl", tgtok::XSRL)
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 8a850b5cec8e..d1bd70d2eca4 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -15,9 +15,10 @@
 #define TGLEXER_H
 
 #include "llvm/Support/DataTypes.h"
-#include <string>
-#include <vector>
+#include "llvm/Support/SMLoc.h"
 #include <cassert>
+#include <map>
+#include <string>
 
 namespace llvm {
 class MemoryBuffer;
@@ -46,7 +47,7 @@ namespace tgtok {
     MultiClass, String,
     
     // !keywords.
-    XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
+    XConcat, XADD, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
     XForEach, XHead, XTail, XEmpty, XIf, XEq,
 
     // Integer value.
@@ -73,9 +74,13 @@ class TGLexer {
   /// CurBuffer - This is the current buffer index we're lexing from as managed
   /// by the SourceMgr object.
   int CurBuffer;
+
+public:
+  typedef std::map<std::string, SMLoc> DependenciesMapTy;
+private:
   /// Dependencies - This is the list of all included files.
-  std::vector<std::string> Dependencies;
-  
+  DependenciesMapTy Dependencies;
+
 public:
   TGLexer(SourceMgr &SrcMgr);
   ~TGLexer() {}
@@ -84,7 +89,7 @@ public:
     return CurCode = LexToken();
   }
 
-  const std::vector<std::string> &getDependencies() const {
+  const DependenciesMapTy &getDependencies() const {
     return Dependencies;
   }
   
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index b1f9f724efd3..86ad2a6e3c09 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGParser.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <sstream>
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -26,7 +26,7 @@ using namespace llvm;
 
 namespace llvm {
 struct SubClassReference {
-  SMLoc RefLoc;
+  SMRange RefRange;
   Record *Rec;
   std::vector<Init*> TemplateArgs;
   SubClassReference() : Rec(0) {}
@@ -35,7 +35,7 @@ struct SubClassReference {
 };
 
 struct SubMultiClassReference {
-  SMLoc RefLoc;
+  SMRange RefRange;
   MultiClass *MC;
   std::vector<Init*> TemplateArgs;
   SubMultiClassReference() : MC(0) {}
@@ -150,22 +150,23 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
   // Add all of the values in the subclass into the current class.
   const std::vector<RecordVal> &Vals = SC->getValues();
   for (unsigned i = 0, e = Vals.size(); i != e; ++i)
-    if (AddValue(CurRec, SubClass.RefLoc, Vals[i]))
+    if (AddValue(CurRec, SubClass.RefRange.Start, Vals[i]))
       return true;
 
   const std::vector<Init *> &TArgs = SC->getTemplateArgs();
 
   // Ensure that an appropriate number of template arguments are specified.
   if (TArgs.size() < SubClass.TemplateArgs.size())
-    return Error(SubClass.RefLoc, "More template args specified than expected");
+    return Error(SubClass.RefRange.Start,
+                 "More template args specified than expected");
 
   // Loop over all of the template arguments, setting them to the specified
   // value or leaving them as the default if necessary.
   for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
     if (i < SubClass.TemplateArgs.size()) {
       // If a value is specified for this template arg, set it now.
-      if (SetValue(CurRec, SubClass.RefLoc, TArgs[i], std::vector<unsigned>(),
-                   SubClass.TemplateArgs[i]))
+      if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i],
+                   std::vector<unsigned>(), SubClass.TemplateArgs[i]))
         return true;
 
       // Resolve it next.
@@ -175,7 +176,8 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
       CurRec->removeValue(TArgs[i]);
 
     } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
-      return Error(SubClass.RefLoc,"Value not specified for template argument #"
+      return Error(SubClass.RefRange.Start,
+                   "Value not specified for template argument #"
                    + utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
                    + ") of subclass '" + SC->getNameInitAsString() + "'!");
     }
@@ -184,17 +186,18 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
   // Since everything went well, we can now set the "superclass" list for the
   // current record.
   const std::vector<Record*> &SCs = SC->getSuperClasses();
+  ArrayRef<SMRange> SCRanges = SC->getSuperClassRanges();
   for (unsigned i = 0, e = SCs.size(); i != e; ++i) {
     if (CurRec->isSubClassOf(SCs[i]))
-      return Error(SubClass.RefLoc,
+      return Error(SubClass.RefRange.Start,
                    "Already subclass of '" + SCs[i]->getName() + "'!\n");
-    CurRec->addSuperClass(SCs[i]);
+    CurRec->addSuperClass(SCs[i], SCRanges[i]);
   }
 
   if (CurRec->isSubClassOf(SC))
-    return Error(SubClass.RefLoc,
+    return Error(SubClass.RefRange.Start,
                  "Already subclass of '" + SC->getName() + "'!\n");
-  CurRec->addSuperClass(SC);
+  CurRec->addSuperClass(SC, SubClass.RefRange);
   return false;
 }
 
@@ -211,7 +214,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
   // Add all of the values in the subclass into the current class.
   const std::vector<RecordVal> &SMCVals = SMC->Rec.getValues();
   for (unsigned i = 0, e = SMCVals.size(); i != e; ++i)
-    if (AddValue(CurRec, SubMultiClass.RefLoc, SMCVals[i]))
+    if (AddValue(CurRec, SubMultiClass.RefRange.Start, SMCVals[i]))
       return true;
 
   int newDefStart = CurMC->DefPrototypes.size();
@@ -226,7 +229,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
 
     // Add all of the values in the superclass into the current def.
     for (unsigned i = 0, e = MCVals.size(); i != e; ++i)
-      if (AddValue(NewDef, SubMultiClass.RefLoc, MCVals[i]))
+      if (AddValue(NewDef, SubMultiClass.RefRange.Start, MCVals[i]))
         return true;
 
     CurMC->DefPrototypes.push_back(NewDef);
@@ -237,7 +240,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
   // Ensure that an appropriate number of template arguments are
   // specified.
   if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size())
-    return Error(SubMultiClass.RefLoc,
+    return Error(SubMultiClass.RefRange.Start,
                  "More template args specified than expected");
 
   // Loop over all of the template arguments, setting them to the specified
@@ -246,7 +249,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
     if (i < SubMultiClass.TemplateArgs.size()) {
       // If a value is specified for this template arg, set it in the
       // superclass now.
-      if (SetValue(CurRec, SubMultiClass.RefLoc, SMCTArgs[i],
+      if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i],
                    std::vector<unsigned>(),
                    SubMultiClass.TemplateArgs[i]))
         return true;
@@ -266,7 +269,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
            ++j) {
         Record *Def = *j;
 
-        if (SetValue(Def, SubMultiClass.RefLoc, SMCTArgs[i],
+        if (SetValue(Def, SubMultiClass.RefRange.Start, SMCTArgs[i],
                      std::vector<unsigned>(),
                      SubMultiClass.TemplateArgs[i]))
           return true;
@@ -278,7 +281,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
         Def->removeValue(SMCTArgs[i]);
       }
     } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
-      return Error(SubMultiClass.RefLoc,
+      return Error(SubMultiClass.RefRange.Start,
                    "Value not specified for template argument #"
                    + utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString()
                    + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
@@ -379,11 +382,12 @@ static bool isObjectStart(tgtok::TokKind K) {
 
 static std::string GetNewAnonymousName() {
   static unsigned AnonCounter = 0;
-  return "anonymous."+utostr(AnonCounter++);
+  unsigned Tmp = AnonCounter++; // MSVC2012 ICEs without this.
+  return "anonymous." + utostr(Tmp);
 }
 
 /// ParseObjectName - If an object name is specified, return it.  Otherwise,
-/// return an anonymous name.
+/// return 0.
 ///   ObjectName ::= Value [ '#' Value ]*
 ///   ObjectName ::= /*empty*/
 ///
@@ -395,7 +399,7 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
     // These are all of the tokens that can begin an object body.
     // Some of these can also begin values but we disallow those cases
     // because they are unlikely to be useful.
-    return StringInit::get(GetNewAnonymousName());
+    return 0;
   default:
     break;
   }
@@ -443,35 +447,18 @@ Record *TGParser::ParseClassID() {
 ///
 MultiClass *TGParser::ParseMultiClassID() {
   if (Lex.getCode() != tgtok::Id) {
-    TokError("expected name for ClassID");
+    TokError("expected name for MultiClassID");
     return 0;
   }
 
   MultiClass *Result = MultiClasses[Lex.getCurStrVal()];
   if (Result == 0)
-    TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
-
-  Lex.Lex();
-  return Result;
-}
-
-Record *TGParser::ParseDefmID() {
-  if (Lex.getCode() != tgtok::Id) {
-    TokError("expected multiclass name");
-    return 0;
-  }
-
-  MultiClass *MC = MultiClasses[Lex.getCurStrVal()];
-  if (MC == 0) {
     TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'");
-    return 0;
-  }
 
   Lex.Lex();
-  return &MC->Rec;
+  return Result;
 }
 
-
 /// ParseSubClassReference - Parse a reference to a subclass or to a templated
 /// subclass.  This returns a SubClassRefTy with a null Record* on error.
 ///
@@ -481,17 +468,21 @@ Record *TGParser::ParseDefmID() {
 SubClassReference TGParser::
 ParseSubClassReference(Record *CurRec, bool isDefm) {
   SubClassReference Result;
-  Result.RefLoc = Lex.getLoc();
+  Result.RefRange.Start = Lex.getLoc();
 
-  if (isDefm)
-    Result.Rec = ParseDefmID();
-  else
+  if (isDefm) {
+    if (MultiClass *MC = ParseMultiClassID())
+      Result.Rec = &MC->Rec;
+  } else {
     Result.Rec = ParseClassID();
+  }
   if (Result.Rec == 0) return Result;
 
   // If there is no template arg list, we're done.
-  if (Lex.getCode() != tgtok::less)
+  if (Lex.getCode() != tgtok::less) {
+    Result.RefRange.End = Lex.getLoc();
     return Result;
+  }
   Lex.Lex();  // Eat the '<'
 
   if (Lex.getCode() == tgtok::greater) {
@@ -512,6 +503,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
     return Result;
   }
   Lex.Lex();
+  Result.RefRange.End = Lex.getLoc();
 
   return Result;
 }
@@ -526,14 +518,16 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
 SubMultiClassReference TGParser::
 ParseSubMultiClassReference(MultiClass *CurMC) {
   SubMultiClassReference Result;
-  Result.RefLoc = Lex.getLoc();
+  Result.RefRange.Start = Lex.getLoc();
 
   Result.MC = ParseMultiClassID();
   if (Result.MC == 0) return Result;
 
   // If there is no template arg list, we're done.
-  if (Lex.getCode() != tgtok::less)
+  if (Lex.getCode() != tgtok::less) {
+    Result.RefRange.End = Lex.getLoc();
     return Result;
+  }
   Lex.Lex();  // Eat the '<'
 
   if (Lex.getCode() == tgtok::greater) {
@@ -554,6 +548,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
     return Result;
   }
   Lex.Lex();
+  Result.RefRange.End = Lex.getLoc();
 
   return Result;
 }
@@ -918,6 +913,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
   }
 
   case tgtok::XConcat:
+  case tgtok::XADD:
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
@@ -933,6 +929,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     switch (OpTok) {
     default: llvm_unreachable("Unhandled code!");
     case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
+    case tgtok::XADD:    Code = BinOpInit::ADD;   Type = IntRecTy::get(); break;
     case tgtok::XSRA:    Code = BinOpInit::SRA;   Type = IntRecTy::get(); break;
     case tgtok::XSRL:    Code = BinOpInit::SRL;   Type = IntRecTy::get(); break;
     case tgtok::XSHL:    Code = BinOpInit::SHL;   Type = IntRecTy::get(); break;
@@ -1148,6 +1145,7 @@ RecTy *TGParser::ParseOperatorType() {
 ///   SimpleValue ::= '[' ValueList ']'
 ///   SimpleValue ::= '(' IDValue DagArgList ')'
 ///   SimpleValue ::= CONCATTOK '(' Value ',' Value ')'
+///   SimpleValue ::= ADDTOK '(' Value ',' Value ')'
 ///   SimpleValue ::= SHLTOK '(' Value ',' Value ')'
 ///   SimpleValue ::= SRATOK '(' Value ',' Value ')'
 ///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
@@ -1214,14 +1212,16 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
       return 0;
     }
     Lex.Lex();  // eat the '>'
+    SMLoc EndLoc = Lex.getLoc();
 
     // Create the new record, set it as CurRec temporarily.
     static unsigned AnonCounter = 0;
     Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++),
                                 NameLoc,
-                                Records);
+                                Records,
+                                /*IsAnonymous=*/true);
     SubClassReference SCRef;
-    SCRef.RefLoc = NameLoc;
+    SCRef.RefRange = SMRange(NameLoc, EndLoc);
     SCRef.Rec = Class;
     SCRef.TemplateArgs = ValueList;
     // Add info about the subclass to NewRec.
@@ -1401,6 +1401,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   case tgtok::XEmpty:
   case tgtok::XCast:  // Value ::= !unop '(' Value ')'
   case tgtok::XConcat:
+  case tgtok::XADD:
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
@@ -1546,29 +1547,39 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
 
 /// ParseDagArgList - Parse the argument list for a dag literal expression.
 ///
-///    ParseDagArgList ::= Value (':' VARNAME)?
-///    ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)?
+///    DagArg     ::= Value (':' VARNAME)?
+///    DagArg     ::= VARNAME
+///    DagArgList ::= DagArg
+///    DagArgList ::= DagArgList ',' DagArg
 std::vector<std::pair<llvm::Init*, std::string> >
 TGParser::ParseDagArgList(Record *CurRec) {
   std::vector<std::pair<llvm::Init*, std::string> > Result;
 
   while (1) {
-    Init *Val = ParseValue(CurRec);
-    if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >();
-
-    // If the variable name is present, add it.
-    std::string VarName;
-    if (Lex.getCode() == tgtok::colon) {
-      if (Lex.Lex() != tgtok::VarName) { // eat the ':'
-        TokError("expected variable name in dag literal");
+    // DagArg ::= VARNAME
+    if (Lex.getCode() == tgtok::VarName) {
+      // A missing value is treated like '?'.
+      Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal()));
+      Lex.Lex();
+    } else {
+      // DagArg ::= Value (':' VARNAME)?
+      Init *Val = ParseValue(CurRec);
+      if (Val == 0)
         return std::vector<std::pair<llvm::Init*, std::string> >();
-      }
-      VarName = Lex.getCurStrVal();
-      Lex.Lex();  // eat the VarName.
-    }
 
-    Result.push_back(std::make_pair(Val, VarName));
+      // If the variable name is present, add it.
+      std::string VarName;
+      if (Lex.getCode() == tgtok::colon) {
+        if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+          TokError("expected variable name in dag literal");
+          return std::vector<std::pair<llvm::Init*, std::string> >();
+        }
+        VarName = Lex.getCurStrVal();
+        Lex.Lex();  // eat the VarName.
+      }
 
+      Result.push_back(std::make_pair(Val, VarName));
+    }
     if (Lex.getCode() != tgtok::comma) break;
     Lex.Lex(); // eat the ','
   }
@@ -1876,6 +1887,17 @@ bool TGParser::ParseBody(Record *CurRec) {
   return false;
 }
 
+/// \brief Apply the current let bindings to \a CurRec.
+/// \returns true on error, false otherwise.
+bool TGParser::ApplyLetStack(Record *CurRec) {
+  for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+    for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+      if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+                   LetStack[i][j].Bits, LetStack[i][j].Value))
+        return true;
+  return false;
+}
+
 /// ParseObjectBody - Parse the body of a def or class.  This consists of an
 /// optional ClassList followed by a Body.  CurRec is the current def or class
 /// that is being parsed.
@@ -1906,12 +1928,8 @@ bool TGParser::ParseObjectBody(Record *CurRec) {
     }
   }
 
-  // Process any variables on the let stack.
-  for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
-    for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
-      if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
-                   LetStack[i][j].Bits, LetStack[i][j].Value))
-        return true;
+  if (ApplyLetStack(CurRec))
+    return true;
 
   return ParseBody(CurRec);
 }
@@ -1927,7 +1945,13 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
   Lex.Lex();  // Eat the 'def' token.
 
   // Parse ObjectName and make a record for it.
-  Record *CurRec = new Record(ParseObjectName(CurMultiClass), DefLoc, Records);
+  Record *CurRec;
+  Init *Name = ParseObjectName(CurMultiClass);
+  if (Name)
+    CurRec = new Record(Name, DefLoc, Records);
+  else
+    CurRec = new Record(GetNewAnonymousName(), DefLoc, Records,
+                        /*IsAnonymous=*/true);
 
   if (!CurMultiClass && Loops.empty()) {
     // Top-level def definition.
@@ -2160,7 +2184,12 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
 /// ParseMultiClass - Parse a multiclass definition.
 ///
 ///  MultiClassInst ::= MULTICLASS ID TemplateArgList?
-///                     ':' BaseMultiClassList '{' MultiClassDef+ '}'
+///                     ':' BaseMultiClassList '{' MultiClassObject+ '}'
+///  MultiClassObject ::= DefInst
+///  MultiClassObject ::= MultiClassInst
+///  MultiClassObject ::= DefMInst
+///  MultiClassObject ::= LETCommand '{' ObjectList '}'
+///  MultiClassObject ::= LETCommand Object
 ///
 bool TGParser::ParseMultiClass() {
   assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token");
@@ -2242,7 +2271,7 @@ Record *TGParser::
 InstantiateMulticlassDef(MultiClass &MC,
                          Record *DefProto,
                          Init *DefmPrefix,
-                         SMLoc DefmPrefixLoc) {
+                         SMRange DefmPrefixRange) {
   // We need to preserve DefProto so it can be reused for later
   // instantiations, so create a new Record to inherit from it.
 
@@ -2251,8 +2280,11 @@ InstantiateMulticlassDef(MultiClass &MC,
   // name, substitute the prefix for #NAME#.  Otherwise, use the defm name
   // as a prefix.
 
-  if (DefmPrefix == 0)
+  bool IsAnonymous = false;
+  if (DefmPrefix == 0) {
     DefmPrefix = StringInit::get(GetNewAnonymousName());
+    IsAnonymous = true;
+  }
 
   Init *DefName = DefProto->getNameInit();
 
@@ -2269,21 +2301,21 @@ InstantiateMulticlassDef(MultiClass &MC,
   }
 
   // Make a trail of SMLocs from the multiclass instantiations.
-  SmallVector<SMLoc, 4> Locs(1, DefmPrefixLoc);
+  SmallVector<SMLoc, 4> Locs(1, DefmPrefixRange.Start);
   Locs.append(DefProto->getLoc().begin(), DefProto->getLoc().end());
-  Record *CurRec = new Record(DefName, Locs, Records);
+  Record *CurRec = new Record(DefName, Locs, Records, IsAnonymous);
 
   SubClassReference Ref;
-  Ref.RefLoc = DefmPrefixLoc;
+  Ref.RefRange = DefmPrefixRange;
   Ref.Rec = DefProto;
   AddSubClass(CurRec, Ref);
 
   // Set the value for NAME. We don't resolve references to it 'til later,
   // though, so that uses in nested multiclass names don't get
   // confused.
-  if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
+  if (SetValue(CurRec, Ref.RefRange.Start, "NAME", std::vector<unsigned>(),
                DefmPrefix)) {
-    Error(DefmPrefixLoc, "Could not resolve "
+    Error(DefmPrefixRange.Start, "Could not resolve "
           + CurRec->getNameInitAsString() + ":NAME to '"
           + DefmPrefix->getAsUnquotedString() + "'");
     return 0;
@@ -2314,7 +2346,7 @@ InstantiateMulticlassDef(MultiClass &MC,
 
     // Ensure redefinition doesn't happen.
     if (Records.getDef(CurRec->getNameInitAsString())) {
-      Error(DefmPrefixLoc, "def '" + CurRec->getNameInitAsString() + 
+      Error(DefmPrefixRange.Start, "def '" + CurRec->getNameInitAsString() +
             "' already defined, instantiating defm with subdef '" + 
             DefProto->getNameInitAsString() + "'");
       return 0;
@@ -2365,33 +2397,30 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
                                     Record *DefProto,
                                     SMLoc DefmPrefixLoc) {
   // If the mdef is inside a 'let' expression, add to each def.
-  for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
-    for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
-      if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
-                   LetStack[i][j].Bits, LetStack[i][j].Value))
-        return Error(DefmPrefixLoc, "when instantiating this defm");
+  if (ApplyLetStack(CurRec))
+    return Error(DefmPrefixLoc, "when instantiating this defm");
 
   // Don't create a top level definition for defm inside multiclasses,
   // instead, only update the prototypes and bind the template args
   // with the new created definition.
-  if (CurMultiClass) {
-    for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
-         i != e; ++i)
-      if (CurMultiClass->DefPrototypes[i]->getNameInit()
-          == CurRec->getNameInit())
-        return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() +
-                     "' already defined in this multiclass!");
-    CurMultiClass->DefPrototypes.push_back(CurRec);
+  if (!CurMultiClass)
+    return false;
+  for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
+       i != e; ++i)
+    if (CurMultiClass->DefPrototypes[i]->getNameInit()
+        == CurRec->getNameInit())
+      return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() +
+                   "' already defined in this multiclass!");
+  CurMultiClass->DefPrototypes.push_back(CurRec);
 
-    // Copy the template arguments for the multiclass into the new def.
-    const std::vector<Init *> &TA =
-      CurMultiClass->Rec.getTemplateArgs();
+  // Copy the template arguments for the multiclass into the new def.
+  const std::vector<Init *> &TA =
+    CurMultiClass->Rec.getTemplateArgs();
 
-    for (unsigned i = 0, e = TA.size(); i != e; ++i) {
-      const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]);
-      assert(RV && "Template arg doesn't exist?");
-      CurRec->addValue(*RV);
-    }
+  for (unsigned i = 0, e = TA.size(); i != e; ++i) {
+    const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]);
+    assert(RV && "Template arg doesn't exist?");
+    CurRec->addValue(*RV);
   }
 
   return false;
@@ -2403,14 +2432,14 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
 ///
 bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
   assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
-
+  SMLoc DefmLoc = Lex.getLoc();
   Init *DefmPrefix = 0;
 
   if (Lex.Lex() == tgtok::Id) {  // eat the defm.
     DefmPrefix = ParseObjectName(CurMultiClass);
   }
 
-  SMLoc DefmPrefixLoc = Lex.getLoc();
+  SMLoc DefmPrefixEndLoc = Lex.getLoc();
   if (Lex.getCode() != tgtok::colon)
     return TokError("expected ':' after defm identifier");
 
@@ -2446,15 +2475,17 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
     for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) {
       Record *DefProto = MC->DefPrototypes[i];
 
-      Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc);
+      Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix,
+                                                SMRange(DefmLoc,
+                                                        DefmPrefixEndLoc));
       if (!CurRec)
         return true;
 
-      if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc,
+      if (ResolveMulticlassDefArgs(*MC, CurRec, DefmLoc, SubClassLoc,
                                    TArgs, TemplateVals, true/*Delete args*/))
         return Error(SubClassLoc, "could not instantiate def");
 
-      if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmPrefixLoc))
+      if (ResolveMulticlassDef(*MC, CurRec, DefProto, DefmLoc))
         return Error(SubClassLoc, "could not instantiate def");
 
       NewRecDefs.push_back(CurRec);
@@ -2493,12 +2524,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
         if (AddSubClass(CurRec, SubClass))
           return true;
 
-        // Process any variables on the let stack.
-        for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
-          for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
-            if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
-                         LetStack[i][j].Bits, LetStack[i][j].Value))
-              return true;
+        if (ApplyLetStack(CurRec))
+          return true;
       }
 
       if (Lex.getCode() != tgtok::comma) break;
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index 9c2ad43c426e..044e3a02ba4b 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -14,11 +14,11 @@
 #ifndef TGPARSER_H
 #define TGPARSER_H
 
-#include "llvm/TableGen/Record.h"
 #include "TGLexer.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <map>
 
 namespace llvm {
@@ -96,7 +96,7 @@ public:
   bool TokError(const Twine &Msg) const {
     return Error(Lex.getLoc(), Msg);
   }
-  const std::vector<std::string> &getDependencies() const {
+  const TGLexer::DependenciesMapTy &getDependencies() const {
     return Lex.getDependencies();
   }
 
@@ -134,7 +134,7 @@ private:  // Parser methods.
   Record *InstantiateMulticlassDef(MultiClass &MC,
                                    Record *DefProto,
                                    Init *DefmPrefix,
-                                   SMLoc DefmPrefixLoc);
+                                   SMRange DefmPrefixRange);
   bool ResolveMulticlassDefArgs(MultiClass &MC,
                                 Record *DefProto,
                                 SMLoc DefmPrefixLoc,
@@ -183,7 +183,7 @@ private:  // Parser methods.
   Init *ParseObjectName(MultiClass *CurMultiClass);
   Record *ParseClassID();
   MultiClass *ParseMultiClassID();
-  Record *ParseDefmID();
+  bool ApplyLetStack(Record *CurRec);
 };
 
 } // end namespace llvm
diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp
index 7c8367ab9dfe..79d567753a6c 100644
--- a/lib/TableGen/TableGenBackend.cpp
+++ b/lib/TableGen/TableGenBackend.cpp
@@ -14,13 +14,20 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+
 using namespace llvm;
 
+const size_t MAX_LINE_LEN = 80U;
+
 static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill,
                       StringRef Suffix) {
-  uint64_t Pos = OS.tell();
+  size_t Pos = (size_t)OS.tell();
+  assert((MAX_LINE_LEN - Prefix.str().size() - Suffix.size() > 0) &&
+    "header line exceeds max limit");
   OS << Prefix;
-  for (unsigned i = OS.tell() - Pos, e = 80 - Suffix.size(); i != e; ++i)
+  const size_t e = MAX_LINE_LEN - Suffix.size();
+  for (size_t i = (size_t)OS.tell() - Pos; i < e; ++i)
     OS << Fill;
   OS << Suffix << '\n';
 }
@@ -28,10 +35,22 @@ static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill,
 void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) {
   printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\");
   printLine(OS, "|*", ' ', "*|");
-  printLine(OS, "|* " + Desc, ' ', "*|");
-  printLine(OS, "|*", ' ', "*|");
-  printLine(OS, "|* Automatically generated file, do not edit!", ' ', "*|");
-  printLine(OS, "|*", ' ', "*|");
+  size_t Pos = 0U;
+  size_t PosE;
+  StringRef Prefix("|*");
+  StringRef Suffix(" *|");
+  do{
+    size_t PSLen = Suffix.size() + Prefix.size();
+    PosE = Pos + ((MAX_LINE_LEN > (Desc.size() - PSLen)) ?
+      Desc.size() :
+      MAX_LINE_LEN - PSLen);
+    printLine(OS, Prefix + Desc.slice(Pos, PosE), ' ', Suffix);
+    Pos = PosE;
+  } while(Pos < Desc.size());
+  printLine(OS, Prefix, ' ', Suffix);
+  printLine(OS, Prefix + " Automatically generated file, do not edit!", ' ',
+    Suffix);
+  printLine(OS, Prefix, ' ', Suffix);
   printLine(OS, "\\*===", '-', "===*/");
   OS << '\n';
 }
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
new file mode 100644
index 000000000000..4de4faa58182
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.h
@@ -0,0 +1,42 @@
+//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_H
+#define LLVM_TARGET_AARCH64_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64AsmPrinter;
+class FunctionPass;
+class AArch64TargetMachine;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel);
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+FunctionPass *createAArch64BranchFixupPass();
+
+void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                      AArch64AsmPrinter &AP);
+
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
new file mode 100644
index 000000000000..e17052b4a565
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.td
@@ -0,0 +1,70 @@
+//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+  "Enable Advanced SIMD instructions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+  "Enable cryptographic instructions">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors
+//
+
+include "AArch64Schedule.td"
+
+def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+
+include "AArch64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+
+def A64InstPrinter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+  let InstructionSet = AArch64InstrInfo;
+  let AssemblyWriters = [A64InstPrinter];
+}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
new file mode 100644
index 000000000000..47ebb826e0d0
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -0,0 +1,347 @@
+//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64AsmPrinter.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
+  // expected to be created.
+  assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
+         && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
+  return MachineLocation(MI->getOperand(0).getReg(),
+                         MI->getOperand(1).getImm());
+}
+
+/// Try to print a floating-point register as if it belonged to a specified
+/// register-class. For example the inline asm operand modifier "b" requires its
+/// argument to be printed as "bN".
+static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
+                                       const TargetRegisterInfo *TRI,
+                                       const TargetRegisterClass &RegClass,
+                                       raw_ostream &O) {
+  if (!MO.isReg())
+    return true;
+
+  for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+    if (RegClass.contains(*AR)) {
+      O << AArch64InstPrinter::getRegisterName(*AR);
+      return false;
+    }
+  }
+  return true;
+}
+
+/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
+/// with the obvious type and an immediate 0 as either wzr or xzr.
+static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
+                                       const TargetRegisterInfo *TRI,
+                                       const TargetRegisterClass &RegClass,
+                                       raw_ostream &O) {
+  char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+
+  if (MO.isImm() && MO.getImm() == 0) {
+    O << Prefix << "zr";
+    return false;
+  } else if (MO.isReg()) {
+    if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
+      O << (Prefix == 'x' ? "sp" : "wsp");
+      return false;
+    }
+
+    for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+      if (RegClass.contains(*AR)) {
+        O << AArch64InstPrinter::getRegisterName(*AR);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
+                                             bool PrintImmediatePrefix,
+                                             StringRef Suffix, raw_ostream &O) {
+  StringRef Name;
+  StringRef Modifier;
+  switch (MO.getType()) {
+  default:
+    llvm_unreachable("Unexpected operand for symbolic address constraint");
+  case MachineOperand::MO_GlobalAddress:
+    Name = Mang->getSymbol(MO.getGlobal())->getName();
+
+    // Global variables may be accessed either via a GOT or in various fun and
+    // interesting TLS-model specific ways. Set the prefix modifier as
+    // appropriate here.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
+      Reloc::Model RelocM = TM.getRelocationModel();
+      if (GV->isThreadLocal()) {
+        switch (TM.getTLSModel(GV)) {
+        case TLSModel::GeneralDynamic:
+          Modifier = "tlsdesc";
+          break;
+        case TLSModel::LocalDynamic:
+          Modifier = "dtprel";
+          break;
+        case TLSModel::InitialExec:
+          Modifier = "gottprel";
+          break;
+        case TLSModel::LocalExec:
+          Modifier = "tprel";
+          break;
+        }
+      } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+        Modifier = "got";
+      }
+    }
+    break;
+  case MachineOperand::MO_BlockAddress:
+    Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    Name = MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    Name = GetCPISymbol(MO.getIndex())->getName();
+    break;
+  }
+
+  // Some instructions (notably ADRP) don't take the # prefix for
+  // immediates. Only print it if asked to.
+  if (PrintImmediatePrefix)
+    O << '#';
+
+  // Only need the joining "_" if both the prefix and the suffix are
+  // non-null. This little block simply takes care of the four possibly
+  // combinations involved there.
+  if (Modifier == "" && Suffix == "")
+    O << Name;
+  else if (Modifier == "" && Suffix != "")
+    O << ":" << Suffix << ':' << Name;
+  else if (Modifier != "" && Suffix == "")
+    O << ":" << Modifier << ':' << Name;
+  else
+    O << ":" << Modifier << '_' << Suffix << ':' << Name;
+
+  return false;
+}
+
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode, raw_ostream &O) {
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  if (!ExtraCode || !ExtraCode[0]) {
+    // There's actually no operand modifier, which leads to a slightly eclectic
+    // set of behaviour which we have to handle here.
+    const MachineOperand &MO = MI->getOperand(OpNum);
+    switch (MO.getType()) {
+    default:
+      llvm_unreachable("Unexpected operand for inline assembly");
+    case MachineOperand::MO_Register:
+      // GCC prints the unmodified operand of a 'w' constraint as the vector
+      // register. Technically, we could allocate the argument as a VPR128, but
+      // that leads to extremely dodgy copies being generated to get the data
+      // there.
+      if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
+        O << AArch64InstPrinter::getRegisterName(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      O << '#' << MO.getImm();
+      break;
+    case MachineOperand::MO_FPImmediate:
+      assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+      O << "#0.0";
+      break;
+    case MachineOperand::MO_BlockAddress:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      return printSymbolicAddress(MO, false, "", O);
+    }
+    return false;
+  }
+
+  // We have a real modifier to handle.
+  switch(ExtraCode[0]) {
+  default:
+    // See if this is a generic operand
+    return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+  case 'c': // Don't print "#" before an immediate operand.
+    if (!MI->getOperand(OpNum).isImm())
+      return true;
+    O << MI->getOperand(OpNum).getImm();
+    return false;
+  case 'w':
+    // Output 32-bit general register operand, constant zero as wzr, or stack
+    // pointer as wsp. Ignored when used with other operand types.
+    return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::GPR32RegClass, O);
+  case 'x':
+    // Output 64-bit general register operand, constant zero as xzr, or stack
+    // pointer as sp. Ignored when used with other operand types.
+    return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::GPR64RegClass, O);
+  case 'H':
+    // Output higher numbered of a 64-bit general register pair
+  case 'Q':
+    // Output least significant register of a 64-bit general register pair
+  case 'R':
+    // Output most significant register of a 64-bit general register pair
+
+    // FIXME note: these three operand modifiers will require, to some extent,
+    // adding a paired GPR64 register class. Initial investigation suggests that
+    // assertions are hit unless it has a type and is made legal for that type
+    // in ISelLowering. After that step is made, the number of modifications
+    // needed explodes (operation legality, calling conventions, stores, reg
+    // copies ...).
+    llvm_unreachable("FIXME: Unimplemented register pairs");
+  case 'b':
+    // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR8RegClass, O);
+  case 'h':
+    // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR16RegClass, O);
+  case 's':
+    // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR32RegClass, O);
+  case 'd':
+    // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR64RegClass, O);
+  case 'q':
+    // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
+    return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+                                      AArch64::FPR128RegClass, O);
+  case 'A':
+    // Output symbolic address with appropriate relocation modifier (also
+    // suitable for ADRP).
+    return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+  case 'L':
+    // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
+    // modifier.
+    return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+  case 'G':
+    // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
+    // modifier (currently only for TLS local exec).
+    return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+  }
+
+
+}
+
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                              unsigned OpNum,
+                                              unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &O) {
+  // Currently both the memory constraints (m and Q) behave the same and amount
+  // to the address as a single register. In future, we may allow "m" to provide
+  // both a base and an offset.
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline assembly memory operand");
+  O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+  return false;
+}
+
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                               raw_ostream &OS) {
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==4);
+  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  OS << V.getName();
+  OS << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+  OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
+  OS << '+' << MI->getOperand(1).getImm();
+  OS << ']';
+  OS << "+" << MI->getOperand(NOps - 2).getImm();
+}
+
+
+#include "AArch64GenMCPseudoLowering.inc"
+
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  // Do any auto-generated pseudo lowerings.
+  if (emitPseudoExpansionLowering(OutStreamer, MI))
+    return;
+
+  switch (MI->getOpcode()) {
+  case AArch64::DBG_VALUE: {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      SmallString<128> TmpStr;
+      raw_svector_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+  }
+  }
+
+  MCInst TmpInst;
+  LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const DataLayout *TD = TM.getDataLayout();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(0), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+    RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target);
+}
+
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h
new file mode 100644
index 000000000000..af0c9fed066f
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.h
@@ -0,0 +1,80 @@
+// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64ASMPRINTER_H
+#define LLVM_AARCH64ASMPRINTER_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
+
+  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+  /// make the right decision when printing asm code for different targets.
+  const AArch64Subtarget *Subtarget;
+
+  // emitPseudoExpansionLowering - tblgen'erated.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+
+  public:
+  explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+  }
+
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+  MCOperand lowerSymbolOperand(const MachineOperand &MO,
+                               const MCSymbol *Sym) const;
+
+  void EmitInstruction(const MachineInstr *MI);
+  void EmitEndOfAsmFile(Module &M);
+
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O);
+
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  /// printSymbolicAddress - Given some kind of reasonably bare symbolic
+  /// reference, print out the appropriate asm string to represent it. If
+  /// appropriate, a relocation-specifier will be produced, composed of a
+  /// general class derived from the MO parameter and an instruction-specific
+  /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
+  /// given.
+  bool printSymbolicAddress(const MachineOperand &MO,
+                            bool PrintImmediatePrefix,
+                            StringRef Suffix, raw_ostream &O);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+  virtual const char *getPassName() const {
+    return "AArch64 Assembly Printer";
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
new file mode 100644
index 000000000000..71233ba5c3dc
--- /dev/null
+++ b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
@@ -0,0 +1,600 @@
+//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that fixes AArch64 branches which have ended up out
+// of range for their immediate operands.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-branch-fixup"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSplit,      "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
+
+/// Return the worst case padding that could result from unknown offset bits.
+/// This does not include alignment padding caused by known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+  if (KnownBits < LogAlign)
+    return (1u << LogAlign) - (1u << KnownBits);
+  return 0;
+}
+
+namespace {
+  /// Due to limited PC-relative displacements, conditional branches to distant
+  /// blocks may need converting into an unconditional equivalent. For example:
+  ///     tbz w1, #0, far_away
+  /// becomes
+  ///     tbnz w1, #0, skip
+  ///     b far_away
+  ///   skip:
+  class AArch64BranchFixup : public MachineFunctionPass {
+    /// Information about the offset and size of a single basic block.
+    struct BasicBlockInfo {
+      /// Distance from the beginning of the function to the beginning of this
+      /// basic block.
+      ///
+      /// Offsets are computed assuming worst case padding before an aligned
+      /// block. This means that subtracting basic block offsets always gives a
+      /// conservative estimate of the real distance which may be smaller.
+      ///
+      /// Because worst case padding is used, the computed offset of an aligned
+      /// block may not actually be aligned.
+      unsigned Offset;
+
+      /// Size of the basic block in bytes.  If the block contains inline
+      /// assembly, this is a worst case estimate.
+      ///
+      /// The size does not include any alignment padding whether from the
+      /// beginning of the block, or from an aligned jump table at the end.
+      unsigned Size;
+
+      /// The number of low bits in Offset that are known to be exact.  The
+      /// remaining bits of Offset are an upper bound.
+      uint8_t KnownBits;
+
+      /// When non-zero, the block contains instructions (inline asm) of unknown
+      /// size.  The real size may be smaller than Size bytes by a multiple of 1
+      /// << Unalign.
+      uint8_t Unalign;
+
+      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
+
+      /// Compute the number of known offset bits internally to this block.
+      /// This number should be used to predict worst case padding when
+      /// splitting the block.
+      unsigned internalKnownBits() const {
+        unsigned Bits = Unalign ? Unalign : KnownBits;
+        // If the block size isn't a multiple of the known bits, assume the
+        // worst case padding.
+        if (Size & ((1u << Bits) - 1))
+          Bits = CountTrailingZeros_32(Size);
+        return Bits;
+      }
+
+      /// Compute the offset immediately following this block.  If LogAlign is
+      /// specified, return the offset the successor block will get if it has
+      /// this alignment.
+      unsigned postOffset(unsigned LogAlign = 0) const {
+        unsigned PO = Offset + Size;
+        if (!LogAlign)
+          return PO;
+        // Add alignment padding from the terminator.
+        return PO + UnknownPadding(LogAlign, internalKnownBits());
+      }
+
+      /// Compute the number of known low bits of postOffset.  If this block
+      /// contains inline asm, the number of known bits drops to the
+      /// instruction alignment.  An aligned terminator may increase the number
+      /// of know bits.
+      /// If LogAlign is given, also consider the alignment of the next block.
+      unsigned postKnownBits(unsigned LogAlign = 0) const {
+        return std::max(LogAlign, internalKnownBits());
+      }
+    };
+
+    std::vector<BasicBlockInfo> BBInfo;
+
+    /// One per immediate branch, keeping the machine instruction pointer,
+    /// conditional or unconditional, the max displacement, and (if IsCond is
+    /// true) the corresponding inverted branch opcode.
+    struct ImmBranch {
+      MachineInstr *MI;
+      unsigned OffsetBits : 31;
+      bool IsCond : 1;
+      ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
+        : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
+    };
+
+    /// Keep track of all the immediate branch instructions.
+    ///
+    std::vector<ImmBranch> ImmBranches;
+
+    MachineFunction *MF;
+    const AArch64InstrInfo *TII;
+  public:
+    static char ID;
+    AArch64BranchFixup() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "AArch64 branch fixup pass";
+    }
+
+  private:
+    void initializeFunctionInfo();
+    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
+                     unsigned OffsetBits);
+    bool fixupImmediateBr(ImmBranch &Br);
+    bool fixupConditionalBr(ImmBranch &Br);
+
+    void computeBlockSize(MachineBasicBlock *MBB);
+    unsigned getOffsetOf(MachineInstr *MI) const;
+    void dumpBBs();
+    void verify();
+  };
+  char AArch64BranchFixup::ID = 0;
+}
+
+/// check BBOffsets
+void AArch64BranchFixup::verify() {
+#ifndef NDEBUG
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    unsigned MBBId = MBB->getNumber();
+    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
+  }
+#endif
+}
+
+/// print block size and offset information - debugging
+void AArch64BranchFixup::dumpBBs() {
+  DEBUG({
+    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+      const BasicBlockInfo &BBI = BBInfo[J];
+      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+             << " kb=" << unsigned(BBI.KnownBits)
+             << " ua=" << unsigned(BBI.Unalign)
+             << format(" size=%#x\n", BBInfo[J].Size);
+    }
+  });
+}
+
+/// Returns an instance of the branch fixup pass.
+FunctionPass *llvm::createAArch64BranchFixupPass() {
+  return new AArch64BranchFixup();
+}
+
+bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  DEBUG(dbgs() << "***** AArch64BranchFixup ******");
+  TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
+
+  // This pass invalidates liveness information when it splits basic blocks.
+  MF->getRegInfo().invalidateLiveness();
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF->RenumberBlocks();
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block and location of each immediate branch.
+  initializeFunctionInfo();
+
+  // Iteratively fix up branches until there is no change.
+  unsigned NoBRIters = 0;
+  bool MadeChange = false;
+  while (true) {
+    DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
+    bool BRChange = false;
+    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+      BRChange |= fixupImmediateBr(ImmBranches[i]);
+    if (BRChange && ++NoBRIters > 30)
+      report_fatal_error("Branch Fix Up pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    if (!BRChange)
+      break;
+    MadeChange = true;
+  }
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify();
+
+  DEBUG(dbgs() << '\n'; dumpBBs());
+
+  BBInfo.clear();
+  ImmBranches.clear();
+
+  return MadeChange;
+}
+
+/// Return true if the specified basic block can fallthrough into the block
+/// immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+  // Get the next machine basic block in the function.
+  MachineFunction::iterator MBBI = MBB;
+  // Can't fall off end of function.
+  if (llvm::next(MBBI) == MBB->getParent()->end())
+    return false;
+
+  MachineBasicBlock *NextBB = llvm::next(MBBI);
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I == NextBB)
+      return true;
+
+  return false;
+}
+
+/// Do the initial scan of the function, building up information about the sizes
+/// of each block, and each immediate branch.
+void AArch64BranchFixup::initializeFunctionInfo() {
+  BBInfo.clear();
+  BBInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+    computeBlockSize(I);
+
+  // The known bits of the entry block offset are determined by the function
+  // alignment.
+  BBInfo.front().KnownBits = MF->getAlignment();
+
+  // Compute block offsets and known bits.
+  adjustBBOffsetsAfter(MF->begin());
+
+  // Now go back through the instructions and build up our data structures.
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      if (I->isDebugValue())
+        continue;
+
+      int Opc = I->getOpcode();
+      if (I->isBranch()) {
+        bool IsCond = false;
+
+        // The offsets encoded in instructions here scale by the instruction
+        // size (4 bytes), effectively increasing their range by 2 bits.
+        unsigned Bits = 0;
+        switch (Opc) {
+        default:
+          continue;  // Ignore other JT branches
+        case AArch64::TBZxii:
+        case AArch64::TBZwii:
+        case AArch64::TBNZxii:
+        case AArch64::TBNZwii:
+          IsCond = true;
+          Bits = 14 + 2;
+          break;
+        case AArch64::Bcc:
+        case AArch64::CBZx:
+        case AArch64::CBZw:
+        case AArch64::CBNZx:
+        case AArch64::CBNZw:
+          IsCond = true;
+          Bits = 19 + 2;
+          break;
+        case AArch64::Bimm:
+          Bits = 26 + 2;
+          break;
+        }
+
+        // Record this immediate branch.
+        ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
+      }
+    }
+  }
+}
+
+/// Compute the size and some alignment information for MBB.  This function
+/// updates BBInfo directly.
+void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
+  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+  BBI.Size = 0;
+  BBI.Unalign = 0;
+
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+       ++I) {
+    BBI.Size += TII->getInstSizeInBytes(*I);
+    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+    // The actual size may be smaller, but still a multiple of the instr size.
+    if (I->isInlineAsm())
+      BBI.Unalign = 2;
+  }
+}
+
+/// Return the current offset of the specified machine instruction from the
+/// start of the function.  This offset changes as stuff is moved around inside
+/// the function.
+unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+  return Offset;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *
+AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
+  MachineBasicBlock *OrigBB = MI->getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MF->insert(MBBI, NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
+  ++NumSplit;
+
+  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
+  NewBB->transferSuccessors(OrigBB);
+
+  // OrigBB branches to NewBB.
+  OrigBB->addSuccessor(NewBB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  MF->RenumberBlocks(NewBB);
+
+  // Insert an entry into BBInfo to align it properly with the (newly
+  // renumbered) block numbers.
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+  // Figure out how large the OrigBB is.  As the first half of the original
+  // block, it cannot contain a tablejump.  The size includes
+  // the new jump we added.  (It should be possible to do this without
+  // recounting everything, but it's very confusing, and this is rarely
+  // executed.)
+  computeBlockSize(OrigBB);
+
+  // Figure out how large the NewMBB is.  As the second half of the original
+  // block, it may contain a tablejump.
+  computeBlockSize(NewBB);
+
+  // All BBOffsets following these blocks must be modified.
+  adjustBBOffsetsAfter(OrigBB);
+
+  return NewBB;
+}
+
+void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+  unsigned BBNum = BB->getNumber();
+  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+    // This is where block i begins.  Stop if the offset is already correct,
+    // and we have updated 2 blocks.  This is the maximum number of blocks
+    // changed before calling this function.
+    if (i > BBNum + 2 &&
+        BBInfo[i].Offset == Offset &&
+        BBInfo[i].KnownBits == KnownBits)
+      break;
+
+    BBInfo[i].Offset = Offset;
+    BBInfo[i].KnownBits = KnownBits;
+  }
+}
+
+/// Returns true if the distance between specific MI and specific BB can fit in
+/// MI's displacement field.
+bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
+                                     MachineBasicBlock *DestBB,
+                                     unsigned OffsetBits) {
+  int64_t BrOffset   = getOffsetOf(MI);
+  int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+               << " from BB#" << MI->getParent()->getNumber()
+               << " bits available=" << OffsetBits
+               << " from " << getOffsetOf(MI) << " to " << DestOffset
+               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+  return isIntN(OffsetBits, DestOffset - BrOffset);
+}
+
+/// Fix up an immediate branch whose destination is too far away to fit in its
+/// displacement field.
+bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (MI->getOperand(i).isMBB()) {
+      DestBB = MI->getOperand(i).getMBB();
+      break;
+    }
+  }
+  assert(DestBB && "Branch with no destination BB?");
+
+  // Check to see if the DestBB is already in-range.
+  if (isBBInRange(MI, DestBB, Br.OffsetBits))
+    return false;
+
+  assert(Br.IsCond && "Only conditional branches should need fixup");
+  return fixupConditionalBr(Br);
+}
+
+/// Fix up a conditional branch whose destination is too far away to fit in its
+/// displacement field. It is converted to an inverse conditional branch + an
+/// unconditional branch to the destination.
+bool
+AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  unsigned CondBrMBBOperand = 0;
+
+  // The general idea is to add an unconditional branch to the destination and
+  // invert the conditional branch to jump over it. Complications occur around
+  // fallthrough and unreachable ends to the block.
+  //   b.lt L1
+  //   =>
+  //   b.ge L2
+  //   b   L1
+  // L2:
+
+  // First we invert the conditional branch, by creating a replacement if
+  // necessary. This if statement contains all the special handling of different
+  // branch types.
+  if (MI->getOpcode() == AArch64::Bcc) {
+    // The basic block is operand number 1 for Bcc
+    CondBrMBBOperand = 1;
+
+    A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
+    CC = A64InvertCondCode(CC);
+    MI->getOperand(0).setImm(CC);
+  } else {
+    MachineInstrBuilder InvertedMI;
+    int InvertedOpcode;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Unknown branch type");
+    case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
+    case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
+    case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
+    case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
+    case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
+    case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
+    case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
+    case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
+    }
+
+    InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
+    for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
+      InvertedMI.addOperand(MI->getOperand(i));
+      if (MI->getOperand(i).isMBB())
+        CondBrMBBOperand = i;
+    }
+
+    MI->eraseFromParent();
+    MI = Br.MI = InvertedMI;
+  }
+
+  // If the branch is at the end of its MBB and that has a fall-through block,
+  // direct the updated conditional branch to the fall-through
+  // block. Otherwise, split the MBB before the next instruction.
+  MachineInstr *BMI = &MBB->back();
+  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+  ++NumCBrFixed;
+  if (BMI != MI) {
+    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+        BMI->getOpcode() == AArch64::Bimm) {
+      // Last MI in the BB is an unconditional branch. We can swap destinations:
+      // b.eq L1 (temporarily b.ne L1 after first change)
+      // b   L2
+      // =>
+      // b.ne L2
+      // b   L1
+      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+      if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
+        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
+                     << *BMI);
+        MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
+        BMI->getOperand(0).setMBB(DestBB);
+        MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
+        return true;
+      }
+    }
+  }
+
+  if (NeedSplit) {
+    MachineBasicBlock::iterator MBBI = MI; ++MBBI;
+    splitBlockBeforeInstr(MBBI);
+    // No need for the branch to the next block. We're adding an unconditional
+    // branch to the destination.
+    int delta = TII->getInstSizeInBytes(MBB->back());
+    BBInfo[MBB->getNumber()].Size -= delta;
+    MBB->back().eraseFromParent();
+    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+  }
+
+  // After splitting and removing the unconditional branch from the original BB,
+  // the structure is now:
+  // oldbb:
+  //   [things]
+  //   b.invertedCC L1
+  // splitbb/fallthroughbb:
+  //   [old b L2/real continuation]
+  //
+  // We now have to change the conditional branch to point to splitbb and add an
+  // unconditional branch after it to L1, giving the final structure:
+  // oldbb:
+  //   [things]
+  //   b.invertedCC splitbb
+  //   b L1
+  // splitbb/fallthroughbb:
+  //   [old b L2/real continuation]
+  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+  DEBUG(dbgs() << "  Insert B to BB#"
+               << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
+               << " also invert condition and change dest. to BB#"
+               << NextBB->getNumber() << "\n");
+
+  // Insert a new unconditional branch and fixup the destination of the
+  // conditional one.  Also update the ImmBranch as well as adding a new entry
+  // for the new branch.
+  BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
+    .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
+  MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
+
+  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+
+  // 26 bits written down in Bimm, specifying a multiple of 4.
+  unsigned OffsetBits = 26 + 2;
+  ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
+
+  adjustBBOffsetsAfter(MBB);
+  return true;
+}
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
new file mode 100644
index 000000000000..b880d8373deb
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -0,0 +1,196 @@
+//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AArch64 architecture.
+//===----------------------------------------------------------------------===//
+
+
+// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
+// higher level of abstraction than LLVM's target interface presents. In
+// particular, it refers (like other ABIs, in fact) directly to
+// structs. However, generic LLVM code takes the liberty of lowering structure
+// arguments to the component fields before we see them.
+//
+// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
+// implemented, so the goals of this calling convention are, in decreasing
+// priority order:
+//     1. Expose *some* way to express the concepts required to implement the
+//        generic PCS from a front-end.
+//     2. Provide a sane ABI for pure LLVM.
+//     3. Follow the generic PCS as closely as is naturally possible.
+//
+// The suggested front-end implementation of PCS features is:
+//     * Integer, float and vector arguments of all sizes which end up in
+//       registers are passed and returned via the natural LLVM type.
+//     * Structure arguments with size <= 16 bytes are passed and returned in
+//       registers as similar integer or composite types. For example:
+//       [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
+//     * HFAs in registers follow rules similar to small structs: appropriate
+//       composite types.
+//     * Structure arguments with size > 16 bytes are passed via a pointer,
+//       handled completely by the front-end.
+//     * Structure return values > 16 bytes via an sret pointer argument.
+//     * Other stack-based arguments (not large structs) are passed using byval
+//       pointers. Padding arguments are added beforehand to guarantee a large
+//       struct doesn't later use integer registers.
+//
+// N.b. this means that it is the front-end's responsibility (if it cares about
+// PCS compliance) to check whether enough registers are available for an
+// argument when deciding how to pass it.
+
+class CCIfAlign<int Align, CCAction A>:
+  CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
+
+def CC_A64_APCS : CallingConv<[
+  // SRet is an LLVM-specific concept, so it takes precedence over general ABI
+  // concerns. However, this rule will be used by C/C++ frontends to implement
+  // structure return.
+  CCIfSRet<CCAssignToReg<[X8]>>,
+
+  // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+  // slot is 64-bit.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // Canonicalise the various types that live in different floating-point
+  // registers. This makes sense because the PCS does not distinguish Short
+  // Vectors and Floating-point types.
+  CCIfType<[v2i8], CCBitConvertToType<f16>>,
+  CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
+  CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCBitConvertToType<f128>>,
+
+  // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
+  // Floating-point or Short Vector Type and the NSRN is less than 8, then the
+  // argument is allocated to the least significant bits of register
+  // v[NSRN]. The NSRN is incremented by one. The argument has now been
+  // allocated."
+  CCIfType<[f16],  CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+  CCIfType<[f32],  CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+  CCIfType<[f64],  CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+  // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
+  // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
+  // argument is allocated to SIMD and Floating-point registers (with one
+  // register per element of the HFA). The NSRN is incremented by the number of
+  // registers used. The argument has now been allocated."
+  //
+  // N.b. As above, this rule is the responsibility of the front-end.
+
+  // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
+  // the argument is rounded up to the nearest multiple of 8 bytes."
+  //
+  // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
+  // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
+  // Alignment of the Argument's type."
+  //
+  // It is expected that these will be satisfied by adding dummy arguments to
+  // the prototype.
+
+  // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
+  // type then the size of the argument is set to 8 bytes. The effect is as if
+  // the argument had been copied to the least significant bits of a 64-bit
+  // register and the remaining bits filled with unspecified values."
+  CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+  // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
+  // precision Floating-point or Short Vector Type, then the argument is copied
+  // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
+  // argument. The argument has now been allocated."
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+  CCIfType<[f128], CCAssignToStack<16, 16>>,
+
+  // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
+  // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
+  // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
+  // one. The argument has now been allocated."
+
+  // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
+  // represented as two i64s, the first one being split. If we delayed this
+  // operation C.8 would never be reached.
+  CCIfType<[i64],
+        CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
+
+  // Note: the promotion also implements C.14.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // And now the real implementation of C.7
+  CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+
+  // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
+  // up to the next even number."
+  //
+  // "C.9: If the argument is an Integral Type, the size of the argument is
+  // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
+  // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
+  // memory representation of the argument. The NGRN is incremented by two. The
+  // argument has now been allocated."
+  //
+  // Subtlety here: what if alignment is 16 but it is not an integral type? All
+  // floating-point types have been allocated already, which leaves composite
+  // types: this is why a front-end may need to produce i128 for a struct <= 16
+  // bytes.
+
+  // PCS: "C.10 If the argument is a Composite Type and the size in double-words
+  // of the argument is not more than 8 minus NGRN, then the argument is copied
+  // into consecutive general-purpose registers, starting at x[NGRN]. The
+  // argument is passed as though it had been loaded into the registers from a
+  // double-word aligned address with an appropriate sequence of LDR
+  // instructions loading consecutive registers from memory (the contents of any
+  // unused parts of the registers are unspecified by this standard). The NGRN
+  // is incremented by the number of registers used. The argument has now been
+  // allocated."
+  //
+  // Another one that's the responsibility of the front-end (sigh).
+
+  // PCS: "C.11: The NGRN is set to 8."
+  CCCustom<"CC_AArch64NoMoreRegs">,
+
+  // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
+  // Alignment of the argument's type."
+  //
+  // PCS: "C.13: If the argument is a composite type then the argument is copied
+  // to memory at the adjusted NSAA. The NSAA is by the size of the
+  // argument. The argument has now been allocated."
+  //
+  // Note that the effect of this corresponds to a memcpy rather than register
+  // stores so that the struct ends up correctly addressable at the adjusted
+  // NSAA.
+
+  // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
+  // of the argument is set to 8 bytes. The effect is as if the argument was
+  // copied to the least significant bits of a 64-bit register and the remaining
+  // bits filled with unspecified values."
+  //
+  // Integer types were widened above. Floating-point and composite types have
+  // already been allocated completely. Nothing to do.
+
+  // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
+  // is incremented by the size of the argument. The argument has now been
+  // allocated."
+  CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+  CCIfType<[i64], CCAssignToStack<8, 8>>
+
+]>;
+
+// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
+// of vector registers (8-15) are callee-saved. The order here is is picked up
+// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
+// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
+// [sp-16], ...
+def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
+                                   (sequence "D%u", 15, 8))>;
+
+
+// TLS descriptor calls are extremely restricted in their changes, to allow
+// optimisations in the (hopefully) more common fast path where no real action
+// is needed. They actually have to preserve all registers, except for the
+// unavoidable X30 and the return register X0.
+def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
+                                   (sequence "Q%u", 31, 0))>;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
new file mode 100644
index 000000000000..dc41f2f60525
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -0,0 +1,633 @@
+//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
+                                              uint64_t &Initial,
+                                              uint64_t &Residual) const {
+  // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
+  // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
+  // 0x1f8, but stack adjustment should always be a multiple of 16.
+  if (Total <= 0x1f0) {
+    Initial = Total;
+    Residual = 0;
+  } else {
+    Initial = 0x1f0;
+    Residual = Total - Initial;
+  }
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  bool NeedsFrameMoves = MMI.hasDebugInfo()
+    || MF.getFunction()->needsUnwindTableEntry();
+
+  uint64_t NumInitialBytes, NumResidualBytes;
+
+  // Currently we expect the stack to be laid out by
+  //     sub sp, sp, #initial
+  //     stp x29, x30, [sp, #offset]
+  //     ...
+  //     str xxx, [sp, #offset]
+  //     sub sp, sp, #rest (possibly via extra instructions).
+  if (MFI->getCalleeSavedInfo().size()) {
+    // If there are callee-saved registers, we want to store them efficiently as
+    // a block, and virtual base assignment happens too early to do it for us so
+    // we adjust the stack in two phases: first just for callee-saved fiddling,
+    // then to allocate the rest of the frame.
+    splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
+  } else {
+    // If there aren't any callee-saved registers, two-phase adjustment is
+    // inefficient. It's more efficient to adjust with NumInitialBytes too
+    // because when we're in a "callee pops argument space" situation, that pop
+    // must be tacked onto Initial for correctness.
+    NumInitialBytes = MFI->getStackSize();
+    NumResidualBytes = 0;
+  }
+
+  // Tell everyone else how much adjustment we're expecting them to use. In
+  // particular if an adjustment is required for a tail call the epilogue could
+  // have a different view of things.
+  FuncInfo->setInitialStackAdjust(NumInitialBytes);
+
+  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
+               MachineInstr::FrameSetup);
+
+  if (NeedsFrameMoves && NumInitialBytes) {
+    // We emit this update even if the CFA is set from a frame pointer later so
+    // that the CFA is valid in the interim.
+    MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+      .addSym(SPLabel);
+
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(AArch64::XSP, NumInitialBytes);
+    Moves.push_back(MachineMove(SPLabel, Dst, Src));
+  }
+
+  // Otherwise we need to set the frame pointer and/or add a second stack
+  // adjustment.
+
+  bool FPNeedsSetting = hasFP(MF);
+  for (; MBBI != MBB.end(); ++MBBI) {
+    // Note that this search makes strong assumptions about the operation used
+    // to store the frame-pointer: it must be "STP x29, x30, ...". This could
+    // change in future, but until then there's no point in implementing
+    // untestable more generic cases.
+    if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
+                       && MBBI->getOperand(0).getReg() == AArch64::X29) {
+      int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
+      FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
+
+      ++MBBI;
+      emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
+                    AArch64::X29,
+                    NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
+                    MachineInstr::FrameSetup);
+
+      // The offset adjustment used when emitting debugging locations relative
+      // to whatever frame base is set. AArch64 uses the default frame base (FP
+      // or SP) and this adjusts the calculations to be correct.
+      MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
+                               - MFI->getStackSize());
+
+      if (NeedsFrameMoves) {
+        MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+          .addSym(FPLabel);
+        MachineLocation Dst(MachineLocation::VirtualFP);
+        MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
+        Moves.push_back(MachineMove(FPLabel, Dst, Src));
+      }
+
+      FPNeedsSetting = false;
+    }
+
+    if (!MBBI->getFlag(MachineInstr::FrameSetup))
+      break;
+  }
+
+  assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+
+  emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
+               MachineInstr::FrameSetup);
+
+  // Now we emit the rest of the frame setup information, if necessary: we've
+  // already noted the FP and initial SP moves so we're left with the prologue's
+  // final SP update and callee-saved register locations.
+  if (!NeedsFrameMoves)
+    return;
+
+  // Reuse the label if appropriate, so create it in this outer scope.
+  MCSymbol *CSLabel = 0;
+
+  // The rest of the stack adjustment
+  if (!hasFP(MF) && NumResidualBytes) {
+    CSLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+      .addSym(CSLabel);
+
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
+    Moves.push_back(MachineMove(CSLabel, Dst, Src));
+  }
+
+  // And any callee-saved registers (it's fine to leave them to the end here,
+  // because the old values are still valid at this point.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.size()) {
+    if (!CSLabel) {
+      CSLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+        .addSym(CSLabel);
+    }
+
+    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+           E = CSI.end(); I != E; ++I) {
+      MachineLocation Dst(MachineLocation::VirtualFP,
+                          MFI->getObjectOffset(I->getFrameIdx()));
+      MachineLocation Src(I->getReg());
+      Moves.push_back(MachineMove(CSLabel, Dst, Src));
+    }
+  }
+}
+
+void
+AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  DebugLoc DL = MBBI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  // Initial and residual are named for consitency with the prologue. Note that
+  // in the epilogue, the residual adjustment is executed first.
+  uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
+  uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
+  uint64_t ArgumentPopSize = 0;
+  if (RetOpcode == AArch64::TC_RETURNdi ||
+      RetOpcode == AArch64::TC_RETURNxi) {
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+    MachineInstrBuilder MIB;
+    if (RetOpcode == AArch64::TC_RETURNdi) {
+      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
+      if (JumpTarget.isGlobal()) {
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      } else {
+        assert(JumpTarget.isSymbol() && "unexpected tail call destination");
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else {
+      assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
+             && "Unexpected tail call");
+
+      MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
+      MIB.addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    // Add the extra operands onto the new tail call instruction even though
+    // they're not used directly (so that liveness is tracked properly etc).
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+        MIB->addOperand(MBBI->getOperand(i));
+
+
+    // Delete the pseudo instruction TC_RETURN.
+    MachineInstr *NewMI = prior(MBBI);
+    MBB.erase(MBBI);
+    MBBI = NewMI;
+
+    // For a tail-call in a callee-pops-arguments environment, some or all of
+    // the stack may actually be in use for the call's arguments, this is
+    // calculated during LowerCall and consumed here...
+    ArgumentPopSize = StackAdjust.getImm();
+  } else {
+    // ... otherwise the amount to pop is *all* of the argument space,
+    // conveniently stored in the MachineFunctionInfo by
+    // LowerFormalArguments. This will, of course, be zero for the C calling
+    // convention.
+    ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+  }
+
+  assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
+         && "refusing to adjust stack by misaligned amt");
+
+  // We may need to address callee-saved registers differently, so find out the
+  // bound on the frame indices.
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  // The "residual" stack update comes first from this direction and guarantees
+  // that SP is NumInitialBytes below its value on function entry, either by a
+  // direct update or restoring it from the frame pointer.
+  if (NumInitialBytes + ArgumentPopSize != 0) {
+    emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
+                 NumInitialBytes + ArgumentPopSize);
+    --MBBI;
+  }
+
+
+  // MBBI now points to the instruction just past the last callee-saved
+  // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
+  // otherwise).
+
+  // Now we need to find out where to put the bulk of the stack adjustment
+  MachineBasicBlock::iterator FirstEpilogue = MBBI;
+  while (MBBI != MBB.begin()) {
+    --MBBI;
+
+    unsigned FrameOp;
+    for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
+      if (MBBI->getOperand(FrameOp).isFI())
+        break;
+    }
+
+    // If this instruction doesn't have a frame index we've reached the end of
+    // the callee-save restoration.
+    if (FrameOp == MBBI->getNumOperands())
+      break;
+
+    // Likewise if it *is* a local reference, but not to a callee-saved object.
+    int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
+    if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
+      break;
+
+    FirstEpilogue = MBBI;
+  }
+
+  if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    int64_t StaticFrameBase;
+    StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
+    emitRegUpdate(MBB, FirstEpilogue, DL, TII,
+                  AArch64::XSP, AArch64::X29, AArch64::NoRegister,
+                  StaticFrameBase);
+  } else {
+    emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+  }
+}
+
+int64_t
+AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
+                                                 int FrameIndex,
+                                                 unsigned &FrameReg,
+                                                 int SPAdj,
+                                                 bool IsCalleeSaveOp) const {
+  AArch64MachineFunctionInfo *FuncInfo =
+    MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
+
+  assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
+         && "callee-saved register in unexpected place");
+
+  // If the frame for this function is particularly large, we adjust the stack
+  // in two phases which means the callee-save related operations see a
+  // different (intermediate) stack size.
+  int64_t FrameRegPos;
+  if (IsCalleeSaveOp) {
+    FrameReg = AArch64::XSP;
+    FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
+  } else if (useFPForAddressing(MF)) {
+    // Have to use the frame pointer since we have no idea where SP is.
+    FrameReg = AArch64::X29;
+    FrameRegPos = FuncInfo->getFramePointerOffset();
+  } else {
+    FrameReg = AArch64::XSP;
+    FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+  }
+
+  return TopOfFrameOffset - FrameRegPos;
+}
+
+void
+AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  const AArch64RegisterInfo *RegInfo =
+    static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64InstrInfo &TII =
+    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+
+  if (hasFP(MF)) {
+    MF.getRegInfo().setPhysRegUsed(AArch64::X29);
+    MF.getRegInfo().setPhysRegUsed(AArch64::X30);
+  }
+
+  // If addressing of local variables is going to be more complicated than
+  // shoving a base register and an offset into the instruction then we may well
+  // need to scavenge registers. We should either specifically add an
+  // callee-save register for this purpose or allocate an extra spill slot.
+
+  bool BigStack =
+    (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+    || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
+    || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
+
+  if (!BigStack)
+    return;
+
+  // We certainly need some slack space for the scavenger, preferably an extra
+  // register.
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+  uint16_t ExtraReg = AArch64::NoRegister;
+
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
+        !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
+      ExtraReg = CSRegs[i];
+      break;
+    }
+  }
+
+  if (ExtraReg != 0) {
+    MF.getRegInfo().setPhysRegUsed(ExtraReg);
+  } else {
+    // Create a stack slot for scavenging purposes. PrologEpilogInserter
+    // helpfully places it near either SP or FP for us to avoid
+    // infinitely-regression during scavenging.
+    const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+}
+
+bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
+                                                  unsigned Reg) const {
+  // If @llvm.returnaddress is called then it will refer to X30 by some means;
+  // the prologue store does not kill the register.
+  if (Reg == AArch64::X30) {
+    if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
+        && MBB.getParent()->getRegInfo().isLiveIn(Reg))
+    return false;
+  }
+
+  // In all other cases, physical registers are dead after they've been saved
+  // but live at the beginning of the prologue block.
+  MBB.addLiveIn(Reg);
+  return true;
+}
+
+void
+AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      const std::vector<CalleeSavedInfo> &CSI,
+                                      const TargetRegisterInfo *TRI,
+                                      LoadStoreMethod PossClasses[],
+                                      unsigned NumClasses) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // A certain amount of implicit contract is present here. The actual stack
+  // offsets haven't been allocated officially yet, so for strictly correct code
+  // we rely on the fact that the elements of CSI are allocated in order
+  // starting at SP, purely as dictated by size and alignment. In practice since
+  // this function handles the only accesses to those slots it's not quite so
+  // important.
+  //
+  // We have also ordered the Callee-saved register list in AArch64CallingConv
+  // so that the above scheme puts registers in order: in particular we want
+  // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
+  for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    // First we need to find out which register class the register belongs to so
+    // that we can use the correct load/store instrucitons.
+    unsigned ClassIdx;
+    for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
+      if (PossClasses[ClassIdx].RegClass->contains(Reg))
+        break;
+    }
+    assert(ClassIdx != NumClasses
+           && "Asked to store register in unexpected class");
+    const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
+
+    // Now we need to decide whether it's possible to emit a paired instruction:
+    // for this we want the next register to be in the same class.
+    MachineInstrBuilder NewMI;
+    bool Pair = false;
+    if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
+      Pair = true;
+      unsigned StLow = 0, StHigh = 0;
+      if (isPrologue) {
+        // Most of these registers will be live-in to the MBB and killed by our
+        // store, though there are exceptions (see determinePrologueDeath).
+        StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
+        StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+      } else {
+        StLow = RegState::Define;
+        StHigh = RegState::Define;
+      }
+
+      NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
+                .addReg(CSI[i+1].getReg(), StLow)
+                .addReg(CSI[i].getReg(), StHigh);
+
+      // If it's a paired op, we've consumed two registers
+      ++i;
+    } else {
+      unsigned State;
+      if (isPrologue) {
+        State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+      } else {
+        State = RegState::Define;
+      }
+
+      NewMI = BuildMI(MBB, MBBI, DL,
+                      TII.get(PossClasses[ClassIdx].SingleOpcode))
+                .addReg(CSI[i].getReg(), State);
+    }
+
+    // Note that the FrameIdx refers to the second register in a pair: it will
+    // be allocated the smaller numeric address and so is the one an LDP/STP
+    // address must use.
+    int FrameIdx = CSI[i].getFrameIdx();
+    MachineMemOperand::MemOperandFlags Flags;
+    Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                             Flags,
+                             Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
+                             MFI.getObjectAlignment(FrameIdx));
+
+    NewMI.addFrameIndex(FrameIdx)
+      .addImm(0)                  // address-register offset
+      .addMemOperand(MMO);
+
+    if (isPrologue)
+      NewMI.setMIFlags(MachineInstr::FrameSetup);
+
+    // For aesthetic reasons, during an epilogue we want to emit complementary
+    // operations to the prologue, but in the opposite order. So we still
+    // iterate through the CalleeSavedInfo list in order, but we put the
+    // instructions successively earlier in the MBB.
+    if (!isPrologue)
+      --MBBI;
+  }
+}
+
+bool
+AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  static LoadStoreMethod PossibleClasses[] = {
+    {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
+    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
+  };
+  unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+  emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
+                  PossibleClasses, NumClasses);
+
+  return true;
+}
+
+bool
+AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+
+  if (CSI.empty())
+    return false;
+
+  static LoadStoreMethod PossibleClasses[] = {
+    {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
+    {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
+  };
+  unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+  emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
+                  PossibleClasses, NumClasses);
+
+  return true;
+}
+
+bool
+AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // This is a decision of ABI compliance. The AArch64 PCS gives various options
+  // for conformance, and even at the most stringent level more or less permits
+  // elimination for leaf functions because there's no loss of functionality
+  // (for debugging etc)..
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
+    return true;
+
+  // The following are hard-limits: incorrect code will be generated if we try
+  // to omit the frame.
+  return (RI->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool
+AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
+  return MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Of the various reasons for having a frame pointer, it's actually only
+  // variable-sized objects that prevent reservation of a call frame.
+  return !(hasFP(MF) && MFI->hasVarSizedObjects());
+}
+
+void
+AArch64FrameLowering::eliminateCallFramePseudoInstr(
+                                MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const {
+  const AArch64InstrInfo &TII =
+    *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MI->getDebugLoc();
+  int Opcode = MI->getOpcode();
+  bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+  uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
+
+  if (!hasReservedCallFrame(MF)) {
+    unsigned Align = getStackAlignment();
+
+    int64_t Amount = MI->getOperand(0).getImm();
+    Amount = RoundUpToAlignment(Amount, Align);
+    if (!IsDestroy) Amount = -Amount;
+
+    // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+    // doesn't have to pop anything), then the first operand will be zero too so
+    // this adjustment is a no-op.
+    if (CalleePopAmount == 0) {
+      // FIXME: in-function stack adjustment for calls is limited to 12-bits
+      // because there's no guaranteed temporary register available. Mostly call
+      // frames will be allocated at the start of a function so this is OK, but
+      // it is a limitation that needs dealing with.
+      assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
+      emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+    }
+  } else if (CalleePopAmount != 0) {
+    // If the calling convention demands that the callee pops arguments from the
+    // stack, we want to add it back if we have a reserved call frame.
+    assert(CalleePopAmount < 0xfff && "call frame too large");
+    emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
+  }
+
+  MBB.erase(MI);
+}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
new file mode 100644
index 000000000000..45ea0ec8e071
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -0,0 +1,108 @@
+//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the AArch64-specific parts of the TargetFrameLowering
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_FRAMEINFO_H
+#define LLVM_AARCH64_FRAMEINFO_H
+
+#include "AArch64Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class AArch64Subtarget;
+
+class AArch64FrameLowering : public TargetFrameLowering {
+private:
+  // In order to unify the spilling and restoring of callee-saved registers into
+  // emitFrameMemOps, we need to be able to specify which instructions to use
+  // for the relevant memory operations on each register class. An array of the
+  // following struct is populated and passed in to achieve this.
+  struct LoadStoreMethod {
+    const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
+
+    // The preferred instruction.
+    unsigned PairOpcode; // E.g. LSPair64_STR
+
+    // Sometimes only a single register can be handled at once.
+    unsigned SingleOpcode; // E.g. LS64_STR
+  };
+protected:
+  const AArch64Subtarget &STI;
+
+public:
+  explicit AArch64FrameLowering(const AArch64Subtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  /// Decides how much stack adjustment to perform in each phase of the prologue
+  /// and epilogue.
+  void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
+                          uint64_t &Residual) const;
+
+  int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
+                                     unsigned &FrameReg, int SPAdj,
+                                     bool IsCalleeSaveOp) const;
+
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                    RegScavenger *RS) const;
+
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
+
+  /// If the register is X30 (i.e. LR) and the return address is used in the
+  /// function then the callee-save store doesn't actually kill the register,
+  /// otherwise it does.
+  bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
+
+  /// This function emits the loads or stores required during prologue and
+  /// epilogue as efficiently as possible.
+  ///
+  /// The operations involved in setting up and tearing down the frame are
+  /// similar enough to warrant a shared function, particularly as discrepancies
+  /// between the two would be disastrous.
+  void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MI,
+                       const std::vector<CalleeSavedInfo> &CSI,
+                       const TargetRegisterInfo *TRI,
+                       LoadStoreMethod PossibleClasses[],
+                       unsigned NumClasses) const;
+
+
+  virtual bool hasFP(const MachineFunction &MF) const;
+
+  virtual bool useFPForAddressing(const MachineFunction &MF) const;
+
+  /// On AA
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..46b822152a00
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -0,0 +1,415 @@
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// AArch64 specific code to select AArch64 machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+  AArch64TargetMachine &TM;
+  const AArch64InstrInfo *TII;
+
+  /// Keep a pointer to the AArch64Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const AArch64Subtarget *Subtarget;
+
+public:
+  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+                               CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(tm, OptLevel), TM(tm),
+      TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())),
+      Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+  }
+
+  virtual const char *getPassName() const {
+    return "AArch64 Instruction Selection";
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+  template<unsigned MemSize>
+  bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
+    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+    if (!CN || CN->getZExtValue() % MemSize != 0
+        || CN->getZExtValue() / MemSize > 0xfff)
+      return false;
+
+    UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
+    return true;
+  }
+
+  template<unsigned RegWidth>
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
+
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+                                unsigned RegWidth);
+
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                    char ConstraintCode,
+                                    std::vector<SDValue> &OutOps);
+
+  bool SelectLogicalImm(SDValue N, SDValue &Imm);
+
+  template<unsigned RegWidth>
+  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
+    return SelectTSTBOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+  SDNode *TrySelectToMoveImm(SDNode *N);
+  SDNode *LowerToFPLitPool(SDNode *Node);
+  SDNode *SelectToLitPool(SDNode *N);
+
+  SDNode* Select(SDNode*);
+private:
+};
+}
+
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+                                              unsigned RegWidth) {
+  const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+  if (!CN) return false;
+
+  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+  // x-register.
+  //
+  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+  // integers.
+  bool IsExact;
+
+  // fbits is between 1 and 64 in the worst-case, which means the fmul
+  // could have 2^64 as an actual operand. Need 65 bits of precision.
+  APSInt IntVal(65, true);
+  CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+  // N.b. isPowerOf2 also checks for > 0.
+  if (!IsExact || !IntVal.isPowerOf2()) return false;
+  unsigned FBits = IntVal.logBase2();
+
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding FBits, but it must still be in range.
+  if (FBits == 0 || FBits > RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+  return true;
+}
+
+bool
+AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                                 char ConstraintCode,
+                                                 std::vector<SDValue> &OutOps) {
+  switch (ConstraintCode) {
+  default: llvm_unreachable("Unrecognised AArch64 memory constraint");
+  case 'm':
+    // FIXME: more freedom is actually permitted for 'm'. We can go
+    // hunting for a base and an offset if we want. Of course, since
+    // we don't really know how the operand is going to be used we're
+    // probably restricted to the load/store pair's simm7 as an offset
+    // range anyway.
+  case 'Q':
+    OutOps.push_back(Op);
+  }
+
+  return false;
+}
+
+bool
+AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
+  ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
+  if (!Imm || !Imm->getValueAPF().isPosZero())
+    return false;
+
+  // Doesn't actually carry any information, but keeps TableGen quiet.
+  Dummy = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
+  uint32_t Bits;
+  uint32_t RegWidth = N.getValueType().getSizeInBits();
+
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) return false;
+
+  if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+    return false;
+
+  Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+  return true;
+}
+
+SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
+  SDNode *ResNode;
+  DebugLoc dl = Node->getDebugLoc();
+  EVT DestType = Node->getValueType(0);
+  unsigned DestWidth = DestType.getSizeInBits();
+
+  unsigned MOVOpcode;
+  EVT MOVType;
+  int UImm16, Shift;
+  uint32_t LogicalBits;
+
+  uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
+  if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
+    MOVType = DestType;
+    MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
+  } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
+    MOVType = DestType;
+    MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
+  } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
+    // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
+    // use a 32-bit instruction: "movn w0, 0xedbc".
+    MOVType = MVT::i32;
+    MOVOpcode = AArch64::MOVNwii;
+  } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
+    MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
+    uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
+
+    return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
+                              CurDAG->getRegister(ZR, DestType),
+                              CurDAG->getTargetConstant(LogicalBits, MVT::i32));
+  } else {
+    // Can't handle it in one instruction. There's scope for permitting two (or
+    // more) instructions, but that'll need more thought.
+    return NULL;
+  }
+
+  ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
+                                   CurDAG->getTargetConstant(UImm16, MVT::i32),
+                                   CurDAG->getTargetConstant(Shift, MVT::i32));
+
+  if (MOVType != DestType) {
+    ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+                          MVT::i64, MVT::i32, MVT::Other,
+                          CurDAG->getTargetConstant(0, MVT::i64),
+                          SDValue(ResNode, 0),
+                          CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+  }
+
+  return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
+  DebugLoc DL = Node->getDebugLoc();
+  uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
+  int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
+  EVT DestType = Node->getValueType(0);
+  EVT PtrVT = TLI.getPointerTy();
+
+  // Since we may end up loading a 64-bit constant from a 32-bit entry the
+  // constant in the pool may have a different type to the eventual node.
+  ISD::LoadExtType Extension;
+  EVT MemType;
+
+  assert((DestType == MVT::i64 || DestType == MVT::i32)
+         && "Only expect integer constants at the moment");
+
+  if (DestType == MVT::i32) {
+    Extension = ISD::NON_EXTLOAD;
+    MemType = MVT::i32;
+  } else if (UnsignedVal <= UINT32_MAX) {
+    Extension = ISD::ZEXTLOAD;
+    MemType = MVT::i32;
+  } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
+    Extension = ISD::SEXTLOAD;
+    MemType = MVT::i32;
+  } else {
+    Extension = ISD::NON_EXTLOAD;
+    MemType = MVT::i64;
+  }
+
+  Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
+                                                  MemType.getSizeInBits()),
+                                  UnsignedVal);
+  SDValue PoolAddr;
+  unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+  PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                             CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+                                                         AArch64II::MO_NO_FLAG),
+                             CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+                                                           AArch64II::MO_LO12),
+                             CurDAG->getConstant(Alignment, MVT::i32));
+
+  return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
+                            PoolAddr,
+                            MachinePointerInfo::getConstantPool(), MemType,
+                            /* isVolatile = */ false,
+                            /* isNonTemporal = */ false,
+                            Alignment).getNode();
+}
+
+SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
+  DebugLoc DL = Node->getDebugLoc();
+  const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
+  EVT PtrVT = TLI.getPointerTy();
+  EVT DestType = Node->getValueType(0);
+
+  unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
+  SDValue PoolAddr;
+
+  assert(TM.getCodeModel() == CodeModel::Small &&
+         "Only small code model supported");
+  PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                             CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+                                                         AArch64II::MO_NO_FLAG),
+                             CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+                                                           AArch64II::MO_LO12),
+                             CurDAG->getConstant(Alignment, MVT::i32));
+
+  return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
+                         MachinePointerInfo::getConstantPool(),
+                         /* isVolatile = */ false,
+                         /* isNonTemporal = */ false,
+                         /* isInvariant = */ true,
+                         Alignment).getNode();
+}
+
+bool
+AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
+                                       unsigned RegWidth) {
+  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) return false;
+
+  uint64_t Val = CN->getZExtValue();
+
+  if (!isPowerOf2_64(Val)) return false;
+
+  unsigned TestedBit = Log2_64(Val);
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding TestedBit, but it must still be in range.
+  if (TestedBit >= RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
+  return true;
+}
+
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+  // Dump information about the Node being selected
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+
+  if (Node->isMachineOpcode()) {
+    DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+    return NULL;
+  }
+
+  switch (Node->getOpcode()) {
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+    EVT PtrTy = TLI.getPointerTy();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
+    return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
+                                TFI, CurDAG->getTargetConstant(0, PtrTy));
+  }
+  case ISD::ConstantPool: {
+    // Constant pools are fine, just create a Target entry.
+    ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
+    const Constant *C = CN->getConstVal();
+    SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
+
+    ReplaceUses(SDValue(Node, 0), CP);
+    return NULL;
+  }
+  case ISD::Constant: {
+    SDNode *ResNode = 0;
+    if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
+      // XZR and WZR are probably even better than an actual move: most of the
+      // time they can be folded into another instruction with *no* cost.
+
+      EVT Ty = Node->getValueType(0);
+      assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
+      uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
+      ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                       Node->getDebugLoc(),
+                                       Register, Ty).getNode();
+    }
+
+    // Next best option is a move-immediate, see if we can do that.
+    if (!ResNode) {
+      ResNode = TrySelectToMoveImm(Node);
+    }
+
+    if (ResNode)
+      return ResNode;
+
+    // If even that fails we fall back to a lit-pool entry at the moment. Future
+    // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
+    ResNode = SelectToLitPool(Node);
+    assert(ResNode && "We need *some* way to materialise a constant");
+
+    // We want to continue selection at this point since the litpool access
+    // generated used generic nodes for simplicity.
+    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+    Node = ResNode;
+    break;
+  }
+  case ISD::ConstantFP: {
+    if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
+      // FMOV will take care of it from TableGen
+      break;
+    }
+
+    SDNode *ResNode = LowerToFPLitPool(Node);
+    ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+
+    // We want to continue selection at this point since the litpool access
+    // generated used generic nodes for simplicity.
+    Node = ResNode;
+    break;
+  }
+  default:
+    break; // Let generic code handle it
+  }
+
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << "\n");
+
+  return ResNode;
+}
+
+/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
+/// instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new AArch64DAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
new file mode 100644
index 000000000000..e9f449709c40
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -0,0 +1,2975 @@
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+
+using namespace llvm;
+
+static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
+  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
+  if (Subtarget->isTargetLinux())
+    return new AArch64LinuxTargetObjectFile();
+  if (Subtarget->isTargetELF())
+    return new TargetLoweringObjectFileELF();
+  llvm_unreachable("unknown subtarget type");
+}
+
+
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
+  : TargetLowering(TM, createTLOF(TM)),
+    Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+    RegInfo(TM.getRegisterInfo()),
+    Itins(TM.getInstrItineraryData()) {
+
+  // SIMD compares set the entire lane's bits to 1
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+  // Scalar register <-> type mapping
+  addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
+  addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
+  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+  computeRegisterProperties();
+
+  // Some atomic operations can be folded into load-acquire or store-release
+  // instructions on AArch64. It's marginally simpler to let LLVM expand
+  // everything out to a barrier and then recombine the (few) barriers we can.
+  setInsertFencesForAtomic(true);
+  setTargetDAGCombine(ISD::ATOMIC_FENCE);
+  setTargetDAGCombine(ISD::ATOMIC_STORE);
+
+  // We combine OR nodes for bitfield and NEON BSL operations.
+  setTargetDAGCombine(ISD::OR);
+
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::SRA);
+
+  // AArch64 does not have i1 loads, or much of anything for i1 really.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+  setStackPointerRegisterToSaveRestore(AArch64::XSP);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+
+  // We'll lower globals to wrappers for selection.
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+  // A64 instructions have the comparison predicate attached to the user of the
+  // result, but having a separate comparison is valuable for matching.
+  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::SELECT, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+  setOperationAction(ISD::SETCC, MVT::i64, Custom);
+  setOperationAction(ISD::SETCC, MVT::f32, Custom);
+  setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+  setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i64, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i64, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+  // Legal floating-point operations.
+  setOperationAction(ISD::FABS, MVT::f32, Legal);
+  setOperationAction(ISD::FABS, MVT::f64, Legal);
+
+  setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+  setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+
+  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+  setOperationAction(ISD::FNEG, MVT::f32, Legal);
+  setOperationAction(ISD::FNEG, MVT::f64, Legal);
+
+  setOperationAction(ISD::FRINT, MVT::f32, Legal);
+  setOperationAction(ISD::FRINT, MVT::f64, Legal);
+
+  setOperationAction(ISD::FSQRT, MVT::f32, Legal);
+  setOperationAction(ISD::FSQRT, MVT::f64, Legal);
+
+  setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
+
+  // Illegal floating-point operations.
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+  setOperationAction(ISD::FCOS, MVT::f32, Expand);
+  setOperationAction(ISD::FCOS, MVT::f64, Expand);
+
+  setOperationAction(ISD::FEXP, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP, MVT::f64, Expand);
+
+  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+
+  setOperationAction(ISD::FLOG, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG, MVT::f64, Expand);
+
+  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+
+  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+
+  setOperationAction(ISD::FPOW, MVT::f32, Expand);
+  setOperationAction(ISD::FPOW, MVT::f64, Expand);
+
+  setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+
+  setOperationAction(ISD::FREM, MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
+  setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+
+  // Virtually no operation on f128 is legal, but LLVM can't expand them when
+  // there's a valid register class, so we need custom operations in most cases.
+  setOperationAction(ISD::FABS,       MVT::f128, Expand);
+  setOperationAction(ISD::FADD,       MVT::f128, Custom);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f128, Expand);
+  setOperationAction(ISD::FCOS,       MVT::f128, Expand);
+  setOperationAction(ISD::FDIV,       MVT::f128, Custom);
+  setOperationAction(ISD::FMA,        MVT::f128, Expand);
+  setOperationAction(ISD::FMUL,       MVT::f128, Custom);
+  setOperationAction(ISD::FNEG,       MVT::f128, Expand);
+  setOperationAction(ISD::FP_EXTEND,  MVT::f128, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f128, Expand);
+  setOperationAction(ISD::FPOW,       MVT::f128, Expand);
+  setOperationAction(ISD::FREM,       MVT::f128, Expand);
+  setOperationAction(ISD::FRINT,      MVT::f128, Expand);
+  setOperationAction(ISD::FSIN,       MVT::f128, Expand);
+  setOperationAction(ISD::FSINCOS,    MVT::f128, Expand);
+  setOperationAction(ISD::FSQRT,      MVT::f128, Expand);
+  setOperationAction(ISD::FSUB,       MVT::f128, Custom);
+  setOperationAction(ISD::FTRUNC,     MVT::f128, Expand);
+  setOperationAction(ISD::SETCC,      MVT::f128, Custom);
+  setOperationAction(ISD::BR_CC,      MVT::f128, Custom);
+  setOperationAction(ISD::SELECT,     MVT::f128, Expand);
+  setOperationAction(ISD::SELECT_CC,  MVT::f128, Custom);
+  setOperationAction(ISD::FP_EXTEND,  MVT::f128, Custom);
+
+  // Lowering for many of the conversions is actually specified by the non-f128
+  // type. The LowerXXX function will be trivial when f128 isn't involved.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+  setOperationAction(ISD::FP_ROUND,  MVT::f32, Custom);
+  setOperationAction(ISD::FP_ROUND,  MVT::f64, Custom);
+
+  // This prevents LLVM trying to compress double constants into a floating
+  // constant-pool entry and trying to load from there. It's of doubtful benefit
+  // for A64: we'd need LDR followed by FCVT, I believe.
+  setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+  setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+
+  setExceptionPointerRegister(AArch64::X0);
+  setExceptionSelectorRegister(AArch64::X1);
+}
+
+EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
+  // It's reasonably important that this value matches the "natural" legal
+  // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
+  // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
+  if (!VT.isVector()) return MVT::i32;
+  return VT.changeVectorElementTypeToInteger();
+}
+
+static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
+                                  unsigned &strOpc) {
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for atomic binary op!");
+  case 1:
+    ldrOpc = AArch64::LDXR_byte;
+    strOpc = AArch64::STXR_byte;
+    break;
+  case 2:
+    ldrOpc = AArch64::LDXR_hword;
+    strOpc = AArch64::STXR_hword;
+    break;
+  case 4:
+    ldrOpc = AArch64::LDXR_word;
+    strOpc = AArch64::STXR_word;
+    break;
+  case 8:
+    ldrOpc = AArch64::LDXR_dword;
+    strOpc = AArch64::STXR_dword;
+    break;
+  }
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                        unsigned Size,
+                                        unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+  unsigned ldrOpc, strOpc;
+  getExclusiveOperation(Size, ldrOpc, strOpc);
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  const TargetRegisterClass *TRC
+    = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+  unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldxr dest, ptr
+  //   <binop> scratch, dest, incr
+  //   stxr stxr_status, scratch, ptr
+  //   cbnz stxr_status, loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+  if (BinOpcode) {
+    // All arithmetic operations we'll be creating are designed to take an extra
+    // shift or extend operand, which we can conveniently set to zero.
+
+    // Operand order needs to go the other way for NAND.
+    if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
+      BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+        .addReg(incr).addReg(dest).addImm(0);
+    else
+      BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+        .addReg(dest).addReg(incr).addImm(0);
+  }
+
+  // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
+  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
+  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+    .addReg(stxr_status).addMBB(loopMBB);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
+                                              MachineBasicBlock *BB,
+                                              unsigned Size,
+                                              unsigned CmpOp,
+                                              A64CC::CondCodes Cond) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  unsigned oldval = dest;
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  const TargetRegisterClass *TRC, *TRCsp;
+  if (Size == 8) {
+    TRC = &AArch64::GPR64RegClass;
+    TRCsp = &AArch64::GPR64xspRegClass;
+  } else {
+    TRC = &AArch64::GPR32RegClass;
+    TRCsp = &AArch64::GPR32wspRegClass;
+  }
+
+  unsigned ldrOpc, strOpc;
+  getExclusiveOperation(Size, ldrOpc, strOpc);
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  unsigned scratch = MRI.createVirtualRegister(TRC);
+  MRI.constrainRegClass(scratch, TRCsp);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldxr dest, ptr
+  //   cmp incr, dest (, sign extend if necessary)
+  //   csel scratch, dest, incr, cond
+  //   stxr stxr_status, scratch, ptr
+  //   cbnz stxr_status, loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+  // Build compare and cmov instructions.
+  MRI.constrainRegClass(incr, TRCsp);
+  BuildMI(BB, dl, TII->get(CmpOp))
+    .addReg(incr).addReg(oldval).addImm(0);
+
+  BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
+          scratch)
+    .addReg(oldval).addReg(incr).addImm(Cond);
+
+  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+  BuildMI(BB, dl, TII->get(strOpc), stxr_status)
+    .addReg(scratch).addReg(ptr);
+  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+    .addReg(stxr_status).addMBB(loopMBB);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+                                         MachineBasicBlock *BB,
+                                         unsigned Size) const {
+  unsigned dest    = MI->getOperand(0).getReg();
+  unsigned ptr     = MI->getOperand(1).getReg();
+  unsigned oldval  = MI->getOperand(2).getReg();
+  unsigned newval  = MI->getOperand(3).getReg();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  const TargetRegisterClass *TRCsp;
+  TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
+
+  unsigned ldrOpc, strOpc;
+  getExclusiveOperation(Size, ldrOpc, strOpc);
+
+  MachineFunction *MF = BB->getParent();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It; // insert the new blocks after the current block
+
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loop1MBB
+  BB->addSuccessor(loop1MBB);
+
+  // loop1MBB:
+  //   ldxr dest, [ptr]
+  //   cmp dest, oldval
+  //   b.ne exitMBB
+  BB = loop1MBB;
+  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+  unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
+  MRI.constrainRegClass(dest, TRCsp);
+  BuildMI(BB, dl, TII->get(CmpOp))
+    .addReg(dest).addReg(oldval).addImm(0);
+  BuildMI(BB, dl, TII->get(AArch64::Bcc))
+    .addImm(A64CC::NE).addMBB(exitMBB);
+  BB->addSuccessor(loop2MBB);
+  BB->addSuccessor(exitMBB);
+
+  // loop2MBB:
+  //   strex stxr_status, newval, [ptr]
+  //   cbnz stxr_status, loop1MBB
+  BB = loop2MBB;
+  unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+  BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
+  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+    .addReg(stxr_status).addMBB(loop1MBB);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
+                                    MachineBasicBlock *MBB) const {
+  // We materialise the F128CSEL pseudo-instruction using conditional branches
+  // and loads, giving an instruciton sequence like:
+  //     str q0, [sp]
+  //     b.ne IfTrue
+  //     b Finish
+  // IfTrue:
+  //     str q1, [sp]
+  // Finish:
+  //     ldr q0, [sp]
+  //
+  // Using virtual registers would probably not be beneficial since COPY
+  // instructions are expensive for f128 (there's no actual instruction to
+  // implement them).
+  //
+  // An alternative would be to do an integer-CSEL on some address. E.g.:
+  //     mov x0, sp
+  //     add x1, sp, #16
+  //     str q0, [x0]
+  //     str q1, [x1]
+  //     csel x0, x0, x1, ne
+  //     ldr q0, [x0]
+  //
+  // It's unclear which approach is actually optimal.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineFunction *MF = MBB->getParent();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  unsigned DestReg = MI->getOperand(0).getReg();
+  unsigned IfTrueReg = MI->getOperand(1).getReg();
+  unsigned IfFalseReg = MI->getOperand(2).getReg();
+  unsigned CondCode = MI->getOperand(3).getImm();
+  bool NZCVKilled = MI->getOperand(4).isKill();
+
+  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, TrueBB);
+  MF->insert(It, EndBB);
+
+  // Transfer rest of current basic-block to EndBB
+  EndBB->splice(EndBB->begin(), MBB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                MBB->end());
+  EndBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // We need somewhere to store the f128 value needed.
+  int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
+
+  //     [... start of incoming MBB ...]
+  //     str qIFFALSE, [sp]
+  //     b.cc IfTrue
+  //     b Done
+  BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
+    .addReg(IfFalseReg)
+    .addFrameIndex(ScratchFI)
+    .addImm(0);
+  BuildMI(MBB, DL, TII->get(AArch64::Bcc))
+    .addImm(CondCode)
+    .addMBB(TrueBB);
+  BuildMI(MBB, DL, TII->get(AArch64::Bimm))
+    .addMBB(EndBB);
+  MBB->addSuccessor(TrueBB);
+  MBB->addSuccessor(EndBB);
+
+  // IfTrue:
+  //     str qIFTRUE, [sp]
+  BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
+    .addReg(IfTrueReg)
+    .addFrameIndex(ScratchFI)
+    .addImm(0);
+
+  // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
+  // blocks.
+  TrueBB->addSuccessor(EndBB);
+
+  // Done:
+  //     ldr qDEST, [sp]
+  //     [... rest of incoming MBB ...]
+  if (!NZCVKilled)
+    EndBB->addLiveIn(AArch64::NZCV);
+  MachineInstr *StartOfEnd = EndBB->begin();
+  BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
+    .addFrameIndex(ScratchFI)
+    .addImm(0);
+
+  MI->eraseFromParent();
+  return EndBB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) const {
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unhandled instruction with custom inserter");
+  case AArch64::F128CSEL:
+    return EmitF128CSEL(MI, MBB);
+  case AArch64::ATOMIC_LOAD_ADD_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
+  case AArch64::ATOMIC_LOAD_ADD_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
+  case AArch64::ATOMIC_LOAD_ADD_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
+  case AArch64::ATOMIC_LOAD_ADD_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_SUB_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
+  case AArch64::ATOMIC_LOAD_SUB_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
+  case AArch64::ATOMIC_LOAD_SUB_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
+  case AArch64::ATOMIC_LOAD_SUB_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_AND_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
+  case AArch64::ATOMIC_LOAD_AND_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
+  case AArch64::ATOMIC_LOAD_AND_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
+  case AArch64::ATOMIC_LOAD_AND_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_OR_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
+  case AArch64::ATOMIC_LOAD_OR_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
+  case AArch64::ATOMIC_LOAD_OR_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
+  case AArch64::ATOMIC_LOAD_OR_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_XOR_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
+  case AArch64::ATOMIC_LOAD_XOR_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
+  case AArch64::ATOMIC_LOAD_XOR_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
+  case AArch64::ATOMIC_LOAD_XOR_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_NAND_I8:
+    return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
+  case AArch64::ATOMIC_LOAD_NAND_I16:
+    return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
+  case AArch64::ATOMIC_LOAD_NAND_I32:
+    return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
+  case AArch64::ATOMIC_LOAD_NAND_I64:
+    return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
+
+  case AArch64::ATOMIC_LOAD_MIN_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
+  case AArch64::ATOMIC_LOAD_MIN_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
+  case AArch64::ATOMIC_LOAD_MIN_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
+  case AArch64::ATOMIC_LOAD_MIN_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
+
+  case AArch64::ATOMIC_LOAD_MAX_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
+  case AArch64::ATOMIC_LOAD_MAX_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
+  case AArch64::ATOMIC_LOAD_MAX_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
+  case AArch64::ATOMIC_LOAD_MAX_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
+
+  case AArch64::ATOMIC_LOAD_UMIN_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
+  case AArch64::ATOMIC_LOAD_UMIN_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
+  case AArch64::ATOMIC_LOAD_UMIN_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
+  case AArch64::ATOMIC_LOAD_UMIN_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
+
+  case AArch64::ATOMIC_LOAD_UMAX_I8:
+    return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
+  case AArch64::ATOMIC_LOAD_UMAX_I16:
+    return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
+  case AArch64::ATOMIC_LOAD_UMAX_I32:
+    return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
+  case AArch64::ATOMIC_LOAD_UMAX_I64:
+    return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
+
+  case AArch64::ATOMIC_SWAP_I8:
+    return emitAtomicBinary(MI, MBB, 1, 0);
+  case AArch64::ATOMIC_SWAP_I16:
+    return emitAtomicBinary(MI, MBB, 2, 0);
+  case AArch64::ATOMIC_SWAP_I32:
+    return emitAtomicBinary(MI, MBB, 4, 0);
+  case AArch64::ATOMIC_SWAP_I64:
+    return emitAtomicBinary(MI, MBB, 8, 0);
+
+  case AArch64::ATOMIC_CMP_SWAP_I8:
+    return emitAtomicCmpSwap(MI, MBB, 1);
+  case AArch64::ATOMIC_CMP_SWAP_I16:
+    return emitAtomicCmpSwap(MI, MBB, 2);
+  case AArch64::ATOMIC_CMP_SWAP_I32:
+    return emitAtomicCmpSwap(MI, MBB, 4);
+  case AArch64::ATOMIC_CMP_SWAP_I64:
+    return emitAtomicCmpSwap(MI, MBB, 8);
+  }
+}
+
+
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  case AArch64ISD::BR_CC:          return "AArch64ISD::BR_CC";
+  case AArch64ISD::Call:           return "AArch64ISD::Call";
+  case AArch64ISD::FPMOV:          return "AArch64ISD::FPMOV";
+  case AArch64ISD::GOTLoad:        return "AArch64ISD::GOTLoad";
+  case AArch64ISD::BFI:            return "AArch64ISD::BFI";
+  case AArch64ISD::EXTR:           return "AArch64ISD::EXTR";
+  case AArch64ISD::Ret:            return "AArch64ISD::Ret";
+  case AArch64ISD::SBFX:           return "AArch64ISD::SBFX";
+  case AArch64ISD::SELECT_CC:      return "AArch64ISD::SELECT_CC";
+  case AArch64ISD::SETCC:          return "AArch64ISD::SETCC";
+  case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
+  case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
+  case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
+  case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
+
+  default:                       return NULL;
+  }
+}
+
+static const uint16_t AArch64FPRArgRegs[] = {
+  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
+};
+static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
+
+static const uint16_t AArch64ArgRegs[] = {
+  AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
+  AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
+};
+static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
+
+static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+                                 CCValAssign::LocInfo LocInfo,
+                                 ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  // Mark all remaining general purpose registers as allocated. We don't
+  // backtrack: if (for example) an i128 gets put on the stack, no subsequent
+  // i64 will go in registers (C.11).
+  for (unsigned i = 0; i < NumArgRegs; ++i)
+    State.AllocateReg(AArch64ArgRegs[i]);
+
+  return false;
+}
+
+#include "AArch64GenCallingConv.inc"
+
+CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
+
+  switch(CC) {
+  default: llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return CC_A64_APCS;
+  }
+}
+
+void
+AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                                           DebugLoc DL, SDValue &Chain) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+
+  SmallVector<SDValue, 8> MemOps;
+
+  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
+                                                         NumArgRegs);
+  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
+                                                         NumFPRArgRegs);
+
+  unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
+  int GPRIdx = 0;
+  if (GPRSaveSize != 0) {
+    GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+
+    SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+
+    for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
+      unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
+      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+      SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+                                   MachinePointerInfo::getStack(i * 8),
+                                   false, false, 0);
+      MemOps.push_back(Store);
+      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+                        DAG.getConstant(8, getPointerTy()));
+    }
+  }
+
+  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
+  int FPRIdx = 0;
+  if (FPRSaveSize != 0) {
+    FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+    SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+    for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+      unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+                                   &AArch64::FPR128RegClass);
+      SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+      SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+                                   MachinePointerInfo::getStack(i * 16),
+                                   false, false, 0);
+      MemOps.push_back(Store);
+      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+                        DAG.getConstant(16, getPointerTy()));
+    }
+  }
+
+  int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
+
+  FuncInfo->setVariadicStackIdx(StackIdx);
+  FuncInfo->setVariadicGPRIdx(GPRIdx);
+  FuncInfo->setVariadicGPRSize(GPRSaveSize);
+  FuncInfo->setVariadicFPRIdx(FPRIdx);
+  FuncInfo->setVariadicFPRSize(FPRSaveSize);
+
+  if (!MemOps.empty()) {
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+                        MemOps.size());
+  }
+}
+
+
+SDValue
+AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+  SmallVector<SDValue, 16> ArgValues;
+
+  SDValue ArgValue;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+    if (Flags.isByVal()) {
+      // Byval is used for small structs and HFAs in the PCS, but the system
+      // should work in a non-compliant manner for larger structs.
+      EVT PtrTy = getPointerTy();
+      int Size = Flags.getByValSize();
+      unsigned NumRegs = (Size + 7) / 8;
+
+      unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
+                                                 VA.getLocMemOffset(),
+                                                 false);
+      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+      InVals.push_back(FrameIdxN);
+
+      continue;
+    } else if (VA.isRegLoc()) {
+      MVT RegVT = VA.getLocVT();
+      const TargetRegisterClass *RC = getRegClassFor(RegVT);
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+
+      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+    } else { // VA.isRegLoc()
+      assert(VA.isMemLoc());
+
+      int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+                                      VA.getLocMemOffset(), true);
+
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                             MachinePointerInfo::getFixedStack(FI),
+                             false, false, false, 0);
+
+
+    }
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
+      break;
+    case CCValAssign::SExt:
+    case CCValAssign::ZExt:
+    case CCValAssign::AExt: {
+      unsigned DestSize = VA.getValVT().getSizeInBits();
+      unsigned DestSubReg;
+
+      switch (DestSize) {
+      case 8: DestSubReg = AArch64::sub_8; break;
+      case 16: DestSubReg = AArch64::sub_16; break;
+      case 32: DestSubReg = AArch64::sub_32; break;
+      case 64: DestSubReg = AArch64::sub_64; break;
+      default: llvm_unreachable("Unexpected argument promotion");
+      }
+
+      ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+                                   VA.getValVT(), ArgValue,
+                                   DAG.getTargetConstant(DestSubReg, MVT::i32)),
+                         0);
+      break;
+    }
+    }
+
+    InVals.push_back(ArgValue);
+  }
+
+  if (isVarArg)
+    SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
+
+  unsigned StackArgSize = CCInfo.getNextStackOffset();
+  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
+    // This is a non-standard ABI so by fiat I say we're allowed to make full
+    // use of the stack area to be popped, which must be aligned to 16 bytes in
+    // any case:
+    StackArgSize = RoundUpToAlignment(StackArgSize, 16);
+
+    // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
+    // a multiple of 16.
+    FuncInfo->setArgumentStackToRestore(StackArgSize);
+
+    // This realignment carries over to the available bytes below. Our own
+    // callers will guarantee the space is free by giving an aligned value to
+    // CALLSEQ_START.
+  }
+  // Even if we're not expected to free up the space, it's useful to know how
+  // much is there while considering tail calls (because we can reuse it).
+  FuncInfo->setBytesInStackArgArea(StackArgSize);
+
+  return Chain;
+}
+
+SDValue
+AArch64TargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc dl, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to a location.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slots.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analyze outgoing return values.
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    // PCS: "If the type, T, of the result of a function is such that
+    // void func(T arg) would require that arg be passed as a value in a
+    // register (or set of registers) according to the rules in 5.4, then the
+    // result is returned in the same registers as would be used for such an
+    // argument.
+    //
+    // Otherwise, the caller shall reserve a block of memory of sufficient
+    // size and alignment to hold the result. The address of the memory block
+    // shall be passed as an additional argument to the function in x8."
+    //
+    // This is implemented in two places. The register-return values are dealt
+    // with here, more complex returns are passed as an sret parameter, which
+    // means we don't have to worry about it during actual return.
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
+
+
+    SDValue Arg = OutVals[i];
+
+    // There's no convenient note in the ABI about this as there is for normal
+    // arguments, but it says return values are passed in the same registers as
+    // an argument would be. I believe that includes the comments about
+    // unspecified higher bits, putting the burden of widening on the *caller*
+    // for return values.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+    case CCValAssign::ZExt:
+    case CCValAssign::AExt:
+      // Floating-point values should only be extended when they're going into
+      // memory, which can't happen here so an integer extend is acceptable.
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
+                     &RetOps[0], RetOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG                     = CLI.DAG;
+  DebugLoc &dl                          = CLI.DL;
+  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDValue Chain                         = CLI.Chain;
+  SDValue Callee                        = CLI.Callee;
+  bool &IsTailCall                      = CLI.IsTailCall;
+  CallingConv::ID CallConv              = CLI.CallConv;
+  bool IsVarArg                         = CLI.IsVarArg;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+  bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
+  bool IsSibCall = false;
+
+  if (IsTailCall) {
+    IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                                                   Outs, OutVals, Ins, DAG);
+
+    // A sibling call is one where we're under the usual C ABI and not planning
+    // to change that but can still do a tail call:
+    if (!TailCallOpt && IsTailCall)
+      IsSibCall = true;
+  }
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+  // On AArch64 (and all other architectures I'm aware of) the most this has to
+  // do is adjust the stack pointer.
+  unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
+  if (IsSibCall) {
+    // Since we're not changing the ABI to make this a tail call, the memory
+    // operands are already available in the caller's incoming argument space.
+    NumBytes = 0;
+  }
+
+  // FPDiff is the byte offset of the call's argument area from the callee's.
+  // Stores to callee stack arguments will be placed in FixedStackSlots offset
+  // by this amount for a tail call. In a sibling call it must be 0 because the
+  // caller will deallocate the entire stack and the callee still expects its
+  // arguments to begin at SP+0. Completely unused for non-tail calls.
+  int FPDiff = 0;
+
+  if (IsTailCall && !IsSibCall) {
+    unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+
+    // FPDiff will be negative if this tail call requires more space than we
+    // would automatically have in our incoming argument space. Positive if we
+    // can actually shrink the stack.
+    FPDiff = NumReusableBytes - NumBytes;
+
+    // The stack pointer must be 16-byte aligned at all times it's used for a
+    // memory operation, which in practice means at *all* times and in
+    // particular across call boundaries. Therefore our own arguments started at
+    // a 16-byte aligned SP and the delta applied for the tail call should
+    // satisfy the same constraint.
+    assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+  }
+
+  if (!IsSibCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
+                                        getPointerTy());
+
+  SmallVector<SDValue, 8> MemOpChains;
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    SDValue Arg = OutVals[i];
+
+    // Callee does the actual widening, so all extensions just use an implicit
+    // definition of the rest of the Loc. Aesthetically, this would be nicer as
+    // an ANY_EXTEND, but that isn't valid for floating-point types and this
+    // alternative works on integer types too.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+    case CCValAssign::ZExt:
+    case CCValAssign::AExt: {
+      unsigned SrcSize = VA.getValVT().getSizeInBits();
+      unsigned SrcSubReg;
+
+      switch (SrcSize) {
+      case 8: SrcSubReg = AArch64::sub_8; break;
+      case 16: SrcSubReg = AArch64::sub_16; break;
+      case 32: SrcSubReg = AArch64::sub_32; break;
+      case 64: SrcSubReg = AArch64::sub_64; break;
+      default: llvm_unreachable("Unexpected argument promotion");
+      }
+
+      Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+                                    VA.getLocVT(),
+                                    DAG.getUNDEF(VA.getLocVT()),
+                                    Arg,
+                                    DAG.getTargetConstant(SrcSubReg, MVT::i32)),
+                    0);
+
+      break;
+    }
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      // A normal register (sub-) argument. For now we just note it down because
+      // we want to copy things into registers as late as possible to avoid
+      // register-pressure (and possibly worse).
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc() && "unexpected argument location");
+
+    SDValue DstAddr;
+    MachinePointerInfo DstInfo;
+    if (IsTailCall) {
+      uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
+                                          VA.getLocVT().getSizeInBits();
+      OpSize = (OpSize + 7) / 8;
+      int32_t Offset = VA.getLocMemOffset() + FPDiff;
+      int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+
+      DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+      DstInfo = MachinePointerInfo::getFixedStack(FI);
+
+      // Make sure any stack arguments overlapping with where we're storing are
+      // loaded before this eventual operation. Otherwise they'll be clobbered.
+      Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+    } else {
+      SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+
+      DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+      DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
+    }
+
+    if (Flags.isByVal()) {
+      SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
+      SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
+                                  Flags.getByValAlign(),
+                                  /*isVolatile = */ false,
+                                  /*alwaysInline = */ false,
+                                  DstInfo, MachinePointerInfo(0));
+      MemOpChains.push_back(Cpy);
+    } else {
+      // Normal stack argument, put it where it's needed.
+      SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
+                                   false, false, 0);
+      MemOpChains.push_back(Store);
+    }
+  }
+
+  // The loads and stores generated above shouldn't clash with each
+  // other. Combining them with this TokenFactor notes that fact for the rest of
+  // the backend.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Most of the rest of the instructions need to be glued together; we don't
+  // want assignments to actual registers used by a call to be rearranged by a
+  // well-meaning scheduler.
+  SDValue InFlag;
+
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // The linker is responsible for inserting veneers when necessary to put a
+  // function call destination in range, so we don't need to bother with a
+  // wrapper here.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    const char *Sym = S->getSymbol();
+    Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+  }
+
+  // We don't usually want to end the call-sequence here because we would tidy
+  // the frame up *after* the call, however in the ABI-changing tail-call case
+  // we've carefully laid out the parameters so that when sp is reset they'll be
+  // in the correct location.
+  if (IsTailCall && !IsSibCall) {
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(0, true), InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // We produce the following DAG scheme for the actual call instruction:
+  //     (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
+  //
+  // Most arguments aren't going to be used and just keep the values live as
+  // far as LLVM is concerned. It's expected to be selected as simply "bl
+  // callee" (for a direct, non-tail call).
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  if (IsTailCall) {
+    // Each tail call may have to adjust the stack by a different amount, so
+    // this information must travel along with the operation for eventual
+    // consumption by emitEpilogue.
+    Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+  }
+
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+
+  // Add a register mask operand representing the call-preserved registers. This
+  // is used later in codegen to constrain register-allocation.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // If we needed glue, put it in as the last argument.
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  if (IsTailCall) {
+    return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+  }
+
+  Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Now we can reclaim the stack, just as well do it before working out where
+  // our return value is.
+  if (!IsSibCall) {
+    uint64_t CalleePopBytes
+      = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
+
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(CalleePopBytes, true),
+                               InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  return LowerCallResult(Chain, InFlag, CallConv,
+                         IsVarArg, Ins, dl, DAG, InVals);
+}
+
+SDValue
+AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                      CallingConv::ID CallConv, bool IsVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
+
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    // Return values that are too big to fit into registers should use an sret
+    // pointer, so this can be a lot simpler than the main argument code.
+    assert(VA.isRegLoc() && "Memory locations not expected for call return");
+
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                                     InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+      break;
+    case CCValAssign::ZExt:
+    case CCValAssign::SExt:
+    case CCValAssign::AExt:
+      // Floating-point arguments only get extended/truncated if they're going
+      // in memory, so using the integer operation is acceptable here.
+      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+      break;
+    }
+
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+bool
+AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                    CallingConv::ID CalleeCC,
+                                    bool IsVarArg,
+                                    bool IsCalleeStructRet,
+                                    bool IsCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SelectionDAG& DAG) const {
+
+  // For CallingConv::C this function knows whether the ABI needs
+  // changing. That's not true for other conventions so they will have to opt in
+  // manually.
+  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+    return false;
+
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const Function *CallerF = MF.getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // Byval parameters hand the function a pointer directly into the stack area
+  // we want to reuse during a tail call. Working around this *is* possible (see
+  // X86) but less efficient and uglier in LowerCall.
+  for (Function::const_arg_iterator i = CallerF->arg_begin(),
+         e = CallerF->arg_end(); i != e; ++i)
+    if (i->hasByValAttr())
+      return false;
+
+  if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+    if (IsTailCallConvention(CalleeCC) && CCMatch)
+      return true;
+    return false;
+  }
+
+  // Now we search for cases where we can use a tail call without changing the
+  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
+  // concept.
+
+  // I want anyone implementing a new calling convention to think long and hard
+  // about this assert.
+  assert((!IsVarArg || CalleeCC == CallingConv::C)
+         && "Unexpected variadic calling convention");
+
+  if (IsVarArg && !Outs.empty()) {
+    // At least two cases here: if caller is fastcc then we can't have any
+    // memory arguments (we'd be expected to clean up the stack afterwards). If
+    // caller is C then we could potentially use its argument area.
+
+    // FIXME: for now we take the most conservative of these in both cases:
+    // disallow all variadic memory operands.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+                   getTargetMachine(), ArgLocs, *DAG.getContext());
+
+    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+      if (!ArgLocs[i].isRegLoc())
+        return false;
+  }
+
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+                    getTargetMachine(), RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
+  // Nothing more to check if the callee is taking no arguments
+  if (Outs.empty())
+    return true;
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+  const AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+
+  // If the stack arguments for this call would fit into our own save area then
+  // the call can be made tail.
+  return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
+}
+
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+                                                   bool TailCallOpt) const {
+  return CallCC == CallingConv::Fast && TailCallOpt;
+}
+
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+  return CallCC == CallingConv::Fast;
+}
+
+SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+                                                   SelectionDAG &DAG,
+                                                   MachineFrameInfo *MFI,
+                                                   int ClobberedFI) const {
+  SmallVector<SDValue, 8> ArgChains;
+  int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
+  int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument corresponding
+  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
+         UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0) {
+          int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
+          int64_t InLastByte = InFirstByte;
+          InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
+
+          if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
+              (FirstByte <= InFirstByte && InFirstByte <= LastByte))
+            ArgChains.push_back(SDValue(L, 1));
+        }
+
+   // Build a tokenfactor for all the chains.
+   return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                      &ArgChains[0], ArgChains.size());
+}
+
+static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
+  switch (CC) {
+  case ISD::SETEQ:  return A64CC::EQ;
+  case ISD::SETGT:  return A64CC::GT;
+  case ISD::SETGE:  return A64CC::GE;
+  case ISD::SETLT:  return A64CC::LT;
+  case ISD::SETLE:  return A64CC::LE;
+  case ISD::SETNE:  return A64CC::NE;
+  case ISD::SETUGT: return A64CC::HI;
+  case ISD::SETUGE: return A64CC::HS;
+  case ISD::SETULT: return A64CC::LO;
+  case ISD::SETULE: return A64CC::LS;
+  default: llvm_unreachable("Unexpected condition code");
+  }
+}
+
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
+  // icmp is implemented using adds/subs immediate, which take an unsigned
+  // 12-bit immediate, optionally shifted left by 12 bits.
+
+  // Symmetric by using adds/subs
+  if (Val < 0)
+    Val = -Val;
+
+  return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
+}
+
+SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
+                                        ISD::CondCode CC, SDValue &A64cc,
+                                        SelectionDAG &DAG, DebugLoc &dl) const {
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+    int64_t C = 0;
+    EVT VT = RHSC->getValueType(0);
+    bool knownInvalid = false;
+
+    // I'm not convinced the rest of LLVM handles these edge cases properly, but
+    // we can at least get it right.
+    if (isSignedIntSetCC(CC)) {
+      C = RHSC->getSExtValue();
+    } else if (RHSC->getZExtValue() > INT64_MAX) {
+      // A 64-bit constant not representable by a signed 64-bit integer is far
+      // too big to fit into a SUBS immediate anyway.
+      knownInvalid = true;
+    } else {
+      C = RHSC->getZExtValue();
+    }
+
+    if (!knownInvalid && !isLegalICmpImmediate(C)) {
+      // Constant does not fit, try adjusting it by one?
+      switch (CC) {
+      default: break;
+      case ISD::SETLT:
+      case ISD::SETGE:
+        if (isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+          RHS = DAG.getConstant(C-1, VT);
+        }
+        break;
+      case ISD::SETULT:
+      case ISD::SETUGE:
+        if (isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+          RHS = DAG.getConstant(C-1, VT);
+        }
+        break;
+      case ISD::SETLE:
+      case ISD::SETGT:
+        if (isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+          RHS = DAG.getConstant(C+1, VT);
+        }
+        break;
+      case ISD::SETULE:
+      case ISD::SETUGT:
+        if (isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+          RHS = DAG.getConstant(C+1, VT);
+        }
+        break;
+      }
+    }
+  }
+
+  A64CC::CondCodes CondCode = IntCCToA64CC(CC);
+  A64cc = DAG.getConstant(CondCode, MVT::i32);
+  return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                     DAG.getCondCode(CC));
+}
+
+static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
+                                    A64CC::CondCodes &Alternative) {
+  A64CC::CondCodes CondCode = A64CC::Invalid;
+  Alternative = A64CC::Invalid;
+
+  switch (CC) {
+  default: llvm_unreachable("Unknown FP condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: CondCode = A64CC::EQ; break;
+  case ISD::SETGT:
+  case ISD::SETOGT: CondCode = A64CC::GT; break;
+  case ISD::SETGE:
+  case ISD::SETOGE: CondCode = A64CC::GE; break;
+  case ISD::SETOLT: CondCode = A64CC::MI; break;
+  case ISD::SETOLE: CondCode = A64CC::LS; break;
+  case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
+  case ISD::SETO:   CondCode = A64CC::VC; break;
+  case ISD::SETUO:  CondCode = A64CC::VS; break;
+  case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
+  case ISD::SETUGT: CondCode = A64CC::HI; break;
+  case ISD::SETUGE: CondCode = A64CC::PL; break;
+  case ISD::SETLT:
+  case ISD::SETULT: CondCode = A64CC::LT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: CondCode = A64CC::LE; break;
+  case ISD::SETNE:
+  case ISD::SETUNE: CondCode = A64CC::NE; break;
+  }
+  return CondCode;
+}
+
+SDValue
+AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small
+         && "Only small code model supported at the moment");
+
+  // The most efficient code is PC-relative anyway for the small memory model,
+  // so we don't need to worry about relocation model.
+  return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                               AArch64II::MO_NO_FLAG),
+                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                               AArch64II::MO_LO12),
+                     DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+}
+
+
+// (BRCOND chain, val, dest)
+SDValue
+AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = Op.getOperand(0);
+  SDValue TheBit = Op.getOperand(1);
+  SDValue DestBB = Op.getOperand(2);
+
+  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+  // that as the consumer we are responsible for ignoring rubbish in higher
+  // bits.
+  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+                       DAG.getConstant(1, MVT::i32));
+
+  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+                               DAG.getConstant(0, TheBit.getValueType()),
+                               DAG.getCondCode(ISD::SETNE));
+
+  return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
+                     A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
+                     DestBB);
+}
+
+// (BR_CC chain, condcode, lhs, rhs, dest)
+SDValue
+AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue DestBB = Op.getOperand(4);
+
+  if (LHS.getValueType() == MVT::f128) {
+    // f128 comparisons are lowered to runtime calls by a routine which sets
+    // LHS, RHS and CC appropriately for the rest of this function to continue.
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+    // If softenSetCCOperands returned a scalar, we need to compare the result
+    // against zero to select between true and false values.
+    if (RHS.getNode() == 0) {
+      RHS = DAG.getConstant(0, LHS.getValueType());
+      CC = ISD::SETNE;
+    }
+  }
+
+  if (LHS.getValueType().isInteger()) {
+    SDValue A64cc;
+
+    // Integers are handled in a separate function because the combinations of
+    // immediates and tests can get hairy and we may want to fiddle things.
+    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+    return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+                       Chain, CmpOp, A64cc, DestBB);
+  }
+
+  // Note that some LLVM floating-point CondCodes can't be lowered to a single
+  // conditional branch, hence FPCCToA64CC can set a second test, where either
+  // passing is sufficient.
+  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+  CondCode = FPCCToA64CC(CC, Alternative);
+  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                              DAG.getCondCode(CC));
+  SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+                                 Chain, SetCC, A64cc, DestBB);
+
+  if (Alternative != A64CC::Invalid) {
+    A64cc = DAG.getConstant(Alternative, MVT::i32);
+    A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+                           A64BR_CC, SetCC, A64cc, DestBB);
+
+  }
+
+  return A64BR_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+                                       RTLIB::Libcall Call) const {
+  ArgListTy Args;
+  ArgListEntry Entry;
+  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Op.getOperand(i).getValueType();
+    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
+
+  Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+
+  // By default, the input chain to this libcall is the entry node of the
+  // function. If the libcall is going to be emitted as a tail call then
+  // isUsedByReturnOnly will change it to the right chain if the return
+  // node which is being folded has a non-entry input chain.
+  SDValue InChain = DAG.getEntryNode();
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  SDValue TCChain = InChain;
+  bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
+  if (isTailCall)
+    InChain = TCChain;
+
+  TargetLowering::
+  CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
+                    0, getLibcallCallingConv(Call), isTailCall,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Op->getDebugLoc());
+  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
+  return CallInfo.first;
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getOperand(0).getValueType() != MVT::f128) {
+    // It's legal except when f128 is involved
+    return Op;
+  }
+
+  RTLIB::Libcall LC;
+  LC  = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  SDValue SrcVal = Op.getOperand(0);
+  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+                     /*isSigned*/ false, Op.getDebugLoc());
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+
+  RTLIB::Libcall LC;
+  LC  = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                      bool IsSigned) const {
+  if (Op.getOperand(0).getValueType() != MVT::f128) {
+    // It's legal except when f128 is involved
+    return Op;
+  }
+
+  RTLIB::Libcall LC;
+  if (IsSigned)
+    LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+  else
+    LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+  // we make that distinction here.
+
+  // We support the small memory model for now.
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small);
+
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
+  unsigned Alignment = GV->getAlignment();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+    // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+    // to zero when they remain undefined. In PIC mode the GOT can take care of
+    // this, but in absolute mode we use a constant pool load.
+    SDValue PoolAddr;
+    PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+                           DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+                                                     AArch64II::MO_NO_FLAG),
+                           DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+                                                     AArch64II::MO_LO12),
+                           DAG.getConstant(8, MVT::i32));
+    SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+                                     MachinePointerInfo::getConstantPool(),
+                                     /*isVolatile=*/ false,
+                                     /*isNonTemporal=*/ true,
+                                     /*isInvariant=*/ true, 8);
+    if (GN->getOffset() != 0)
+      return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+                         DAG.getConstant(GN->getOffset(), PtrVT));
+
+    return GlobalAddr;
+  }
+
+  if (Alignment == 0) {
+    const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
+    if (GVPtrTy->getElementType()->isSized()) {
+      Alignment
+        = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
+    } else {
+      // Be conservative if we can't guess, not that it really matters:
+      // functions and labels aren't valid for loads, and the methods used to
+      // actually calculate an address work with any alignment.
+      Alignment = 1;
+    }
+  }
+
+  unsigned char HiFixup, LoFixup;
+  bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+
+  if (UseGOT) {
+    HiFixup = AArch64II::MO_GOT;
+    LoFixup = AArch64II::MO_GOT_LO12;
+    Alignment = 8;
+  } else {
+    HiFixup = AArch64II::MO_NO_FLAG;
+    LoFixup = AArch64II::MO_LO12;
+  }
+
+  // AArch64's small model demands the following sequence:
+  // ADRP x0, somewhere
+  // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
+  SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                             HiFixup),
+                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                             LoFixup),
+                                  DAG.getConstant(Alignment, MVT::i32));
+
+  if (UseGOT) {
+    GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
+                            GlobalRef);
+  }
+
+  if (GN->getOffset() != 0)
+    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
+                       DAG.getConstant(GN->getOffset(), PtrVT));
+
+  return GlobalRef;
+}
+
+SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
+                                                SDValue DescAddr,
+                                                DebugLoc DL,
+                                                SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+
+  // The function we need to call is simply the first entry in the GOT for this
+  // descriptor, load it in preparation.
+  SDValue Func, Chain;
+  Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+                     DescAddr);
+
+  // The function takes only one argument: the address of the descriptor itself
+  // in X0.
+  SDValue Glue;
+  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
+  Glue = Chain.getValue(1);
+
+  // Finally, there's a special calling-convention which means that the lookup
+  // must preserve all registers (except X0, obviously).
+  const TargetRegisterInfo *TRI  = getTargetMachine().getRegisterInfo();
+  const AArch64RegisterInfo *A64RI
+    = static_cast<const AArch64RegisterInfo *>(TRI);
+  const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
+
+  // We're now ready to populate the argument list, as with a normal call:
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Func);
+  Ops.push_back(SymAddr);
+  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
+  Ops.push_back(DAG.getRegisterMask(Mask));
+  Ops.push_back(Glue);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
+                      Ops.size());
+  Glue = Chain.getValue(1);
+
+  // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
+  // back to the generic handling code.
+  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetELF() &&
+         "TLS not implemented for non-ELF targets");
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+  TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+  SDValue TPOff;
+  EVT PtrVT = getPointerTy();
+  DebugLoc DL = Op.getDebugLoc();
+  const GlobalValue *GV = GA->getGlobal();
+
+  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
+
+  if (Model == TLSModel::InitialExec) {
+    TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                   AArch64II::MO_GOTTPREL),
+                        DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                   AArch64II::MO_GOTTPREL_LO12),
+                        DAG.getConstant(8, MVT::i32));
+    TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+                        TPOff);
+  } else if (Model == TLSModel::LocalExec) {
+    SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_TPREL_G1);
+    SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_TPREL_G0_NC);
+
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+                                       TPOff, LoVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+  } else if (Model == TLSModel::GeneralDynamic) {
+    // Accesses used in this sequence go via the TLS descriptor which lives in
+    // the GOT. Prepare an address we can use to handle this.
+    SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                AArch64II::MO_TLSDESC);
+    SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+                                                AArch64II::MO_TLSDESC_LO12);
+    SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                                   HiDesc, LoDesc,
+                                   DAG.getConstant(8, MVT::i32));
+    SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
+
+    TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+  } else if (Model == TLSModel::LocalDynamic) {
+    // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
+    // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
+    // the beginning of the module's TLS region, followed by a DTPREL offset
+    // calculation.
+
+    // These accesses will need deduplicating if there's more than one.
+    AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
+      .getInfo<AArch64MachineFunctionInfo>();
+    MFI->incNumLocalDynamicTLSAccesses();
+
+
+    // Get the location of _TLS_MODULE_BASE_:
+    SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+                                                AArch64II::MO_TLSDESC);
+    SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+                                                AArch64II::MO_TLSDESC_LO12);
+    SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                                   HiDesc, LoDesc,
+                                   DAG.getConstant(8, MVT::i32));
+    SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
+
+    ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+
+    // Get the variable's offset from _TLS_MODULE_BASE_
+    SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_DTPREL_G1);
+    SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+                                               AArch64II::MO_DTPREL_G0_NC);
+
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+                                       TPOff, LoVar,
+                                       DAG.getTargetConstant(0, MVT::i32)), 0);
+  } else
+      llvm_unreachable("Unsupported TLS access model");
+
+
+  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+}
+
+SDValue
+AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+                                      bool IsSigned) const {
+  if (Op.getValueType() != MVT::f128) {
+    // Legal for everything except f128.
+    return Op;
+  }
+
+  RTLIB::Libcall LC;
+  if (IsSigned)
+    LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+  else
+    LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+
+  return LowerF128ToCall(Op, DAG, LC);
+}
+
+
+SDValue
+AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  DebugLoc dl = JT->getDebugLoc();
+
+  // When compiling PIC, jump tables get put in the code section so a static
+  // relocation-style is acceptable for both cases.
+  return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
+                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
+                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+                                            AArch64II::MO_LO12),
+                     DAG.getConstant(1, MVT::i32));
+}
+
+// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
+SDValue
+AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue IfTrue = Op.getOperand(2);
+  SDValue IfFalse = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+  if (LHS.getValueType() == MVT::f128) {
+    // f128 comparisons are lowered to libcalls, but slot in nicely here
+    // afterwards.
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+    // If softenSetCCOperands returned a scalar, we need to compare the result
+    // against zero to select between true and false values.
+    if (RHS.getNode() == 0) {
+      RHS = DAG.getConstant(0, LHS.getValueType());
+      CC = ISD::SETNE;
+    }
+  }
+
+  if (LHS.getValueType().isInteger()) {
+    SDValue A64cc;
+
+    // Integers are handled in a separate function because the combinations of
+    // immediates and tests can get hairy and we may want to fiddle things.
+    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+    return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                       CmpOp, IfTrue, IfFalse, A64cc);
+  }
+
+  // Note that some LLVM floating-point CondCodes can't be lowered to a single
+  // conditional branch, hence FPCCToA64CC can set a second test, where either
+  // passing is sufficient.
+  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+  CondCode = FPCCToA64CC(CC, Alternative);
+  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                              DAG.getCondCode(CC));
+  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
+                                     Op.getValueType(),
+                                     SetCC, IfTrue, IfFalse, A64cc);
+
+  if (Alternative != A64CC::Invalid) {
+    A64cc = DAG.getConstant(Alternative, MVT::i32);
+    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                               SetCC, IfTrue, A64SELECT_CC, A64cc);
+
+  }
+
+  return A64SELECT_CC;
+}
+
+// (SELECT testbit, iftrue, iffalse)
+SDValue
+AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue TheBit = Op.getOperand(0);
+  SDValue IfTrue = Op.getOperand(1);
+  SDValue IfFalse = Op.getOperand(2);
+
+  // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+  // that as the consumer we are responsible for ignoring rubbish in higher
+  // bits.
+  TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+                       DAG.getConstant(1, MVT::i32));
+  SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+                               DAG.getConstant(0, TheBit.getValueType()),
+                               DAG.getCondCode(ISD::SETNE));
+
+  return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+                     A64CMP, IfTrue, IfFalse,
+                     DAG.getConstant(A64CC::NE, MVT::i32));
+}
+
+// (SETCC lhs, rhs, condcode)
+SDValue
+AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  EVT VT = Op.getValueType();
+
+  if (LHS.getValueType() == MVT::f128) {
+    // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
+    // for the rest of the function (some i32 or i64 values).
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+    // If softenSetCCOperands returned a scalar, use it.
+    if (RHS.getNode() == 0) {
+      assert(LHS.getValueType() == Op.getValueType() &&
+             "Unexpected setcc expansion!");
+      return LHS;
+    }
+  }
+
+  if (LHS.getValueType().isInteger()) {
+    SDValue A64cc;
+
+    // Integers are handled in a separate function because the combinations of
+    // immediates and tests can get hairy and we may want to fiddle things.
+    SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+    return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+                       CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       A64cc);
+  }
+
+  // Note that some LLVM floating-point CondCodes can't be lowered to a single
+  // conditional branch, hence FPCCToA64CC can set a second test, where either
+  // passing is sufficient.
+  A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+  CondCode = FPCCToA64CC(CC, Alternative);
+  SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+                              DAG.getCondCode(CC));
+  SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+                                     CmpOp, DAG.getConstant(1, VT),
+                                     DAG.getConstant(0, VT), A64cc);
+
+  if (Alternative != A64CC::Invalid) {
+    A64cc = DAG.getConstant(Alternative, MVT::i32);
+    A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
+                               DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
+  }
+
+  return A64SELECT_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+
+  // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
+  // rather than just 8.
+  return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(),
+                       Op.getOperand(1), Op.getOperand(2),
+                       DAG.getConstant(32, MVT::i32), 8, false, false,
+                       MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  // The layout of the va_list struct is specified in the AArch64 Procedure Call
+  // Standard, section B.3.
+  MachineFunction &MF = DAG.getMachineFunction();
+  AArch64MachineFunctionInfo *FuncInfo
+    = MF.getInfo<AArch64MachineFunctionInfo>();
+  DebugLoc DL = Op.getDebugLoc();
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue VAList = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  SmallVector<SDValue, 4> MemOps;
+
+  // void *__stack at offset 0
+  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
+                                    getPointerTy());
+  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
+                                MachinePointerInfo(SV), false, false, 0));
+
+  // void *__gr_top at offset 8
+  int GPRSize = FuncInfo->getVariadicGPRSize();
+  if (GPRSize > 0) {
+    SDValue GRTop, GRTopAddr;
+
+    GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                            DAG.getConstant(8, getPointerTy()));
+
+    GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
+    GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
+                        DAG.getConstant(GPRSize, getPointerTy()));
+
+    MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
+                                  MachinePointerInfo(SV, 8),
+                                  false, false, 0));
+  }
+
+  // void *__vr_top at offset 16
+  int FPRSize = FuncInfo->getVariadicFPRSize();
+  if (FPRSize > 0) {
+    SDValue VRTop, VRTopAddr;
+    VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                            DAG.getConstant(16, getPointerTy()));
+
+    VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
+    VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
+                        DAG.getConstant(FPRSize, getPointerTy()));
+
+    MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
+                                  MachinePointerInfo(SV, 16),
+                                  false, false, 0));
+  }
+
+  // int __gr_offs at offset 24
+  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                                   DAG.getConstant(24, getPointerTy()));
+  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+                                GROffsAddr, MachinePointerInfo(SV, 24),
+                                false, false, 0));
+
+  // int __vr_offs at offset 28
+  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+                                   DAG.getConstant(28, getPointerTy()));
+  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+                                VROffsAddr, MachinePointerInfo(SV, 28),
+                                false, false, 0));
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+                     MemOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
+  case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
+  case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
+  case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
+  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
+  case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
+  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
+  case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
+  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+
+  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+  case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
+  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+  case ISD::SELECT: return LowerSELECT(Op, DAG);
+  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::SETCC: return LowerSETCC(Op, DAG);
+  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+  case ISD::VASTART: return LowerVASTART(Op, DAG);
+  }
+
+  return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // We're looking for an SRA/SHL pair which form an SBFX.
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    return SDValue();
+
+  uint64_t TruncMask = N->getConstantOperandVal(1);
+  if (!isMask_64(TruncMask))
+    return SDValue();
+
+  uint64_t Width = CountPopulation_64(TruncMask);
+  SDValue Shift = N->getOperand(0);
+
+  if (Shift.getOpcode() != ISD::SRL)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+    return SDValue();
+  uint64_t LSB = Shift->getConstantOperandVal(1);
+
+  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+    return SDValue();
+
+  return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
+                     DAG.getConstant(LSB, MVT::i64),
+                     DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  // An atomic operation followed by an acquiring atomic fence can be reduced to
+  // an acquiring load. The atomic operation provides a convenient pointer to
+  // load from. If the original operation was a load anyway we can actually
+  // combine the two operations into an acquiring load.
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue AtomicOp = FenceNode->getOperand(0);
+  AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
+
+  // A fence on its own can't be optimised
+  if (!AtomicNode)
+    return SDValue();
+
+  AtomicOrdering FenceOrder
+    = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
+  SynchronizationScope FenceScope
+    = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
+
+  if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
+    return SDValue();
+
+  // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
+  // the chain we use should be its input, otherwise we'll put our store after
+  // it so we use its output chain.
+  SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
+    AtomicNode->getChain() : AtomicOp;
+
+  // We have an acquire fence with a handy atomic operation nearby, we can
+  // convert the fence into a load-acquire, discarding the result.
+  DebugLoc DL = FenceNode->getDebugLoc();
+  SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
+                             AtomicNode->getValueType(0),
+                             Chain,                  // Chain
+                             AtomicOp.getOperand(1), // Pointer
+                             AtomicNode->getMemOperand(), Acquire,
+                             FenceScope);
+
+  if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
+    DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
+
+  return Op.getValue(1);
+}
+
+static SDValue PerformATOMIC_STORECombine(SDNode *N,
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  // A releasing atomic fence followed by an atomic store can be combined into a
+  // single store operation.
+  SelectionDAG &DAG = DCI.DAG;
+  AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
+  SDValue FenceOp = AtomicNode->getOperand(0);
+
+  if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
+    return SDValue();
+
+  AtomicOrdering FenceOrder
+    = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
+  SynchronizationScope FenceScope
+    = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
+
+  if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
+    return SDValue();
+
+  DebugLoc DL = AtomicNode->getDebugLoc();
+  return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
+                       FenceOp.getOperand(0),  // Chain
+                       AtomicNode->getOperand(1),       // Pointer
+                       AtomicNode->getOperand(2),       // Value
+                       AtomicNode->getMemOperand(), Release,
+                       FenceScope);
+}
+
+/// For a true bitfield insert, the bits getting into that contiguous mask
+/// should come from the low part of an existing value: they must be formed from
+/// a compatible SHL operation (unless they're already low). This function
+/// checks that condition and returns the least-significant bit that's
+/// intended. If the operation not a field preparation, -1 is returned.
+static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+                            SDValue &MaskedVal, uint64_t Mask) {
+  if (!isShiftedMask_64(Mask))
+    return -1;
+
+  // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
+  // instruction. BFI will do a left-shift by LSB before applying the mask we've
+  // spotted, so in general we should pre-emptively "undo" that by making sure
+  // the incoming bits have had a right-shift applied to them.
+  //
+  // This right shift, however, will combine with existing left/right shifts. In
+  // the simplest case of a completely straight bitfield operation, it will be
+  // expected to completely cancel out with an existing SHL. More complicated
+  // cases (e.g. bitfield to bitfield copy) may still need a real shift before
+  // the BFI.
+
+  uint64_t LSB = CountTrailingZeros_64(Mask);
+  int64_t ShiftRightRequired = LSB;
+  if (MaskedVal.getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+    ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
+    MaskedVal = MaskedVal.getOperand(0);
+  } else if (MaskedVal.getOpcode() == ISD::SRL &&
+             isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+    ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
+    MaskedVal = MaskedVal.getOperand(0);
+  }
+
+  if (ShiftRightRequired > 0)
+    MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
+                            DAG.getConstant(ShiftRightRequired, MVT::i64));
+  else if (ShiftRightRequired < 0) {
+    // We could actually end up with a residual left shift, for example with
+    // "struc.bitfield = val << 1".
+    MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
+                            DAG.getConstant(-ShiftRightRequired, MVT::i64));
+  }
+
+  return LSB;
+}
+
+/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
+/// a mask and an extension. Returns true if a BFI was found and provides
+/// information on its surroundings.
+static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
+                          bool &Extended) {
+  Extended = false;
+  if (N.getOpcode() == ISD::ZERO_EXTEND) {
+    Extended = true;
+    N = N.getOperand(0);
+  }
+
+  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+    Mask = N->getConstantOperandVal(1);
+    N = N.getOperand(0);
+  } else {
+    // Mask is the whole width.
+    Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
+  }
+
+  if (N.getOpcode() == AArch64ISD::BFI) {
+    BFI = N;
+    return true;
+  }
+
+  return false;
+}
+
+/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
+/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
+/// can often be further combined with a larger mask. Ultimately, we want mask
+/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
+static SDValue tryCombineToBFI(SDNode *N,
+                               TargetLowering::DAGCombinerInfo &DCI,
+                               const AArch64Subtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+  // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
+  // abandon the effort.
+  SDValue LHS = N->getOperand(0);
+  if (LHS.getOpcode() != ISD::AND)
+    return SDValue();
+
+  uint64_t LHSMask;
+  if (isa<ConstantSDNode>(LHS.getOperand(1)))
+    LHSMask = LHS->getConstantOperandVal(1);
+  else
+    return SDValue();
+
+  // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
+  // is or abandon the effort.
+  SDValue RHS = N->getOperand(1);
+  if (RHS.getOpcode() != ISD::AND)
+    return SDValue();
+
+  uint64_t RHSMask;
+  if (isa<ConstantSDNode>(RHS.getOperand(1)))
+    RHSMask = RHS->getConstantOperandVal(1);
+  else
+    return SDValue();
+
+  // Can't do anything if the masks are incompatible.
+  if (LHSMask & RHSMask)
+    return SDValue();
+
+  // Now we need one of the masks to be a contiguous field. Without loss of
+  // generality that should be the RHS one.
+  SDValue Bitfield = LHS.getOperand(0);
+  if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
+    // We know that LHS is a candidate new value, and RHS isn't already a better
+    // one.
+    std::swap(LHS, RHS);
+    std::swap(LHSMask, RHSMask);
+  }
+
+  // We've done our best to put the right operands in the right places, all we
+  // can do now is check whether a BFI exists.
+  Bitfield = RHS.getOperand(0);
+  int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
+  if (LSB == -1)
+    return SDValue();
+
+  uint32_t Width = CountPopulation_64(RHSMask);
+  assert(Width && "Expected non-zero bitfield width");
+
+  SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+                            LHS.getOperand(0), Bitfield,
+                            DAG.getConstant(LSB, MVT::i64),
+                            DAG.getConstant(Width, MVT::i64));
+
+  // Mask is trivial
+  if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+    return BFI;
+
+  return DAG.getNode(ISD::AND, DL, VT, BFI,
+                     DAG.getConstant(LHSMask | RHSMask, VT));
+}
+
+/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
+/// original input. This is surprisingly common because SROA splits things up
+/// into i8 chunks, so the originally detected MaskedBFI may actually only act
+/// on the low (say) byte of a word. This is then orred into the rest of the
+/// word afterwards.
+///
+/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
+///
+/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
+/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
+/// involved.
+static SDValue tryCombineToLargerBFI(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const AArch64Subtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // First job is to hunt for a MaskedBFI on either the left or right. Swap
+  // operands if it's actually on the right.
+  SDValue BFI;
+  SDValue PossExtraMask;
+  uint64_t ExistingMask = 0;
+  bool Extended = false;
+  if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
+    PossExtraMask = N->getOperand(1);
+  else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
+    PossExtraMask = N->getOperand(0);
+  else
+    return SDValue();
+
+  // We can only combine a BFI with another compatible mask.
+  if (PossExtraMask.getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
+    return SDValue();
+
+  uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
+
+  // Masks must be compatible.
+  if (ExtraMask & ExistingMask)
+    return SDValue();
+
+  SDValue OldBFIVal = BFI.getOperand(0);
+  SDValue NewBFIVal = BFI.getOperand(1);
+  if (Extended) {
+    // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
+    // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
+    // need to be made compatible.
+    assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
+           && "Invalid types for BFI");
+    OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
+    NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
+  }
+
+  // We need the MaskedBFI to be combined with a mask of the *same* value.
+  if (PossExtraMask.getOperand(0) != OldBFIVal)
+    return SDValue();
+
+  BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+                    OldBFIVal, NewBFIVal,
+                    BFI.getOperand(2), BFI.getOperand(3));
+
+  // If the masking is trivial, we don't need to create it.
+  if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+    return BFI;
+
+  return DAG.getNode(ISD::AND, DL, VT, BFI,
+                     DAG.getConstant(ExtraMask | ExistingMask, VT));
+}
+
+/// An EXTR instruction is made up of two shifts, ORed together. This helper
+/// searches for and classifies those shifts.
+static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
+                         bool &FromHi) {
+  if (N.getOpcode() == ISD::SHL)
+    FromHi = false;
+  else if (N.getOpcode() == ISD::SRL)
+    FromHi = true;
+  else
+    return false;
+
+  if (!isa<ConstantSDNode>(N.getOperand(1)))
+    return false;
+
+  ShiftAmount = N->getConstantOperandVal(1);
+  Src = N->getOperand(0);
+  return true;
+}
+
+/// EXTR instruction extracts a contiguous chunk of bits from two existing
+/// registers viewed as a high/low pair. This function looks for the pattern:
+/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
+/// EXTR. Can't quite be done in TableGen because the two immediates aren't
+/// independent.
+static SDValue tryCombineToEXTR(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  SDValue LHS;
+  uint32_t ShiftLHS = 0;
+  bool LHSFromHi = 0;
+  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
+    return SDValue();
+
+  SDValue RHS;
+  uint32_t ShiftRHS = 0;
+  bool RHSFromHi = 0;
+  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
+    return SDValue();
+
+  // If they're both trying to come from the high part of the register, they're
+  // not really an EXTR.
+  if (LHSFromHi == RHSFromHi)
+    return SDValue();
+
+  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
+    return SDValue();
+
+  if (LHSFromHi) {
+    std::swap(LHS, RHS);
+    std::swap(ShiftLHS, ShiftRHS);
+  }
+
+  return DAG.getNode(AArch64ISD::EXTR, DL, VT,
+                     LHS, RHS,
+                     DAG.getConstant(ShiftRHS, MVT::i64));
+}
+
+/// Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const AArch64Subtarget *Subtarget) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  // Attempt to recognise bitfield-insert operations.
+  SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
+  if (Res.getNode())
+    return Res;
+
+  // Attempt to combine an existing MaskedBFI operation into one with a larger
+  // mask.
+  Res = tryCombineToLargerBFI(N, DCI, Subtarget);
+  if (Res.getNode())
+    return Res;
+
+  Res = tryCombineToEXTR(N, DCI);
+  if (Res.getNode())
+    return Res;
+
+  return SDValue();
+}
+
+/// Target-specific dag combine xforms for ISD::SRA
+static SDValue PerformSRACombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  // We're looking for an SRA/SHL pair which form an SBFX.
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    return SDValue();
+
+  uint64_t ExtraSignBits = N->getConstantOperandVal(1);
+  SDValue Shift = N->getOperand(0);
+
+  if (Shift.getOpcode() != ISD::SHL)
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+    return SDValue();
+
+  uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
+  uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
+  uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
+
+  if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+    return SDValue();
+
+  return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
+                     DAG.getConstant(LSB, MVT::i64),
+                     DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+
+SDValue
+AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+                                         DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::AND: return PerformANDCombine(N, DCI);
+  case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
+  case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
+  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+  case ISD::SRA: return PerformSRACombine(N, DCI);
+  }
+  return SDValue();
+}
+
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'w': // An FP/SIMD vector register
+      return C_RegisterClass;
+    case 'I': // Constant that can be used with an ADD instruction
+    case 'J': // Constant that can be used with a SUB instruction
+    case 'K': // Constant that can be used with a 32-bit logical instruction
+    case 'L': // Constant that can be used with a 64-bit logical instruction
+    case 'M': // Constant that can be used as a 32-bit MOV immediate
+    case 'N': // Constant that can be used as a 64-bit MOV immediate
+    case 'Y': // Floating point constant zero
+    case 'Z': // Integer constant zero
+      return C_Other;
+    case 'Q': // A memory reference with base register and no offset
+      return C_Memory;
+    case 'S': // A symbolic address
+      return C_Other;
+    }
+  }
+
+  // FIXME: Ump, Utf, Usa, Ush
+  // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
+  //      whatever they may be
+  // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
+  // Usa: An absolute symbolic address
+  // Ush: The high part (bits 32:12) of a pc-relative symbolic address
+  assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
+         && Constraint != "Ush" && "Unimplemented constraints");
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight
+AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+                                                const char *Constraint) const {
+
+  llvm_unreachable("Constraint weight unimplemented");
+}
+
+void
+AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                    std::string &Constraint,
+                                                    std::vector<SDValue> &Ops,
+                                                    SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  // Only length 1 constraints are C_Other.
+  if (Constraint.size() != 1) return;
+
+  // Only C_Other constraints get lowered like this. That means constants for us
+  // so return early if there's no hope the constraint can be lowered.
+
+  switch(Constraint[0]) {
+  default: break;
+  case 'I': case 'J': case 'K': case 'L':
+  case 'M': case 'N': case 'Z': {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    if (!C)
+      return;
+
+    uint64_t CVal = C->getZExtValue();
+    uint32_t Bits;
+
+    switch (Constraint[0]) {
+    default:
+      // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
+      // is a peculiarly useless SUB constraint.
+      llvm_unreachable("Unimplemented C_Other constraint");
+    case 'I':
+      if (CVal <= 0xfff)
+        break;
+      return;
+    case 'K':
+      if (A64Imms::isLogicalImm(32, CVal, Bits))
+        break;
+      return;
+    case 'L':
+      if (A64Imms::isLogicalImm(64, CVal, Bits))
+        break;
+      return;
+    case 'Z':
+      if (CVal == 0)
+        break;
+      return;
+    }
+
+    Result = DAG.getTargetConstant(CVal, Op.getValueType());
+    break;
+  }
+  case 'S': {
+    // An absolute symbolic address or label reference.
+    if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+      Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
+                                          GA->getValueType(0));
+    } else if (const BlockAddressSDNode *BA
+                 = dyn_cast<BlockAddressSDNode>(Op)) {
+      Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
+                                         BA->getValueType(0));
+    } else if (const ExternalSymbolSDNode *ES
+                 = dyn_cast<ExternalSymbolSDNode>(Op)) {
+      Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                           ES->getValueType(0));
+    } else
+      return;
+    break;
+  }
+  case 'Y':
+    if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+      if (CFP->isExactlyValue(0.0)) {
+        Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
+        break;
+      }
+    }
+    return;
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+
+  // It's an unknown constraint for us. Let generic code have a go.
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+AArch64TargetLowering::getRegForInlineAsmConstraint(
+                                                  const std::string &Constraint,
+                                                  EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      if (VT.getSizeInBits() <= 32)
+        return std::make_pair(0U, &AArch64::GPR32RegClass);
+      else if (VT == MVT::i64)
+        return std::make_pair(0U, &AArch64::GPR64RegClass);
+      break;
+    case 'w':
+      if (VT == MVT::f16)
+        return std::make_pair(0U, &AArch64::FPR16RegClass);
+      else if (VT == MVT::f32)
+        return std::make_pair(0U, &AArch64::FPR32RegClass);
+      else if (VT == MVT::f64)
+        return std::make_pair(0U, &AArch64::FPR64RegClass);
+      else if (VT.getSizeInBits() == 64)
+        return std::make_pair(0U, &AArch64::VPR64RegClass);
+      else if (VT == MVT::f128)
+        return std::make_pair(0U, &AArch64::FPR128RegClass);
+      else if (VT.getSizeInBits() == 128)
+        return std::make_pair(0U, &AArch64::VPR128RegClass);
+      break;
+    }
+  }
+
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
new file mode 100644
index 000000000000..4960d286e9de
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -0,0 +1,247 @@
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
+#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+
+namespace llvm {
+namespace AArch64ISD {
+  enum NodeType {
+    // Start the numbering from where ISD NodeType finishes.
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+    // This is a conditional branch which also notes the flag needed
+    // (eq/sgt/...). A64 puts this information on the branches rather than
+    // compares as LLVM does.
+    BR_CC,
+
+    // A node to be selected to an actual call operation: either BL or BLR in
+    // the absence of tail calls.
+    Call,
+
+    // Indicates a floating-point immediate which fits into the format required
+    // by the FMOV instructions. First (and only) operand is the 8-bit encoded
+    // value of that immediate.
+    FPMOV,
+
+    // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
+    // and an LSB.
+    EXTR,
+
+    // Wraps a load from the GOT, which should always be performed with a 64-bit
+    // load instruction. This prevents the DAG combiner folding a truncate to
+    // form a smaller memory access.
+    GOTLoad,
+
+    // Performs a bitfield insert. Arguments are: the value being inserted into;
+    // the value being inserted; least significant bit changed; width of the
+    // field.
+    BFI,
+
+    // Simply a convenient node inserted during ISelLowering to represent
+    // procedure return. Will almost certainly be selected to "RET".
+    Ret,
+
+    /// Extracts a field of contiguous bits from the source and sign extends
+    /// them into a single register. Arguments are: source; immr; imms. Note
+    /// these are pre-encoded since DAG matching can't cope with combining LSB
+    /// and Width into these values itself.
+    SBFX,
+
+    /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
+    /// main difference is that it only has the values and an A64 condition,
+    /// which will be produced by a setcc instruction.
+    SELECT_CC,
+
+    /// This serves most of the functions of the LLVM SETCC instruction, for two
+    /// purposes. First, it prevents optimisations from fiddling with the
+    /// compare after we've moved the CondCode information onto the SELECT_CC or
+    /// BR_CC instructions. Second, it gives a legal instruction for the actual
+    /// comparison.
+    ///
+    /// It keeps a record of the condition flags asked for because certain
+    /// instructions are only valid for a subset of condition codes.
+    SETCC,
+
+    // Designates a node which is a tail call: both a call and a return
+    // instruction as far as selction is concerned. It should be selected to an
+    // unconditional branch. Has the usual plethora of call operands, but: 1st
+    // is callee, 2nd is stack adjustment required immediately before branch.
+    TC_RETURN,
+
+    // Designates a call used to support the TLS descriptor ABI. The call itself
+    // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
+    // var") must be attached somehow during code generation. It takes two
+    // operands: the callee and the symbol to be relocated against.
+    TLSDESCCALL,
+
+    // Leaf node which will be lowered to an appropriate MRS to obtain the
+    // thread pointer: TPIDR_EL0.
+    THREAD_POINTER,
+
+    /// Extracts a field of contiguous bits from the source and zero extends
+    /// them into a single register. Arguments are: source; immr; imms. Note
+    /// these are pre-encoded since DAG matching can't cope with combining LSB
+    /// and Width into these values itself.
+    UBFX,
+
+    // Wraps an address which the ISelLowering phase has decided should be
+    // created using the small absolute memory model: i.e. adrp/add or
+    // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
+    // get selected.
+    WrapperSmall
+  };
+}
+
+
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64TargetLowering : public TargetLowering {
+public:
+  explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+
+  const char *getTargetNodeName(unsigned Opcode) const;
+
+  CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+
+  SDValue LowerFormalArguments(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerReturn(SDValue Chain,
+                      CallingConv::ID CallConv, bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+
+  SDValue LowerCall(CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const;
+
+  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                          CallingConv::ID CallConv, bool IsVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          DebugLoc dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals) const;
+
+  void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                           DebugLoc DL, SDValue &Chain) const;
+
+
+  /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+  /// for tail call optimization. Targets which want to do tail call
+  /// optimization should implement this function.
+  bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                    CallingConv::ID CalleeCC,
+                                    bool IsVarArg,
+                                    bool IsCalleeStructRet,
+                                    bool IsCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    SelectionDAG& DAG) const;
+
+  /// Finds the incoming stack arguments which overlap the given fixed stack
+  /// object and incorporates their load into the current chain. This prevents
+  /// an upcoming store from clobbering the stack argument before it's used.
+  SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+                              MachineFrameInfo *MFI, int ClobberedFI) const;
+
+  EVT getSetCCResultType(EVT VT) const;
+
+  bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+
+  bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  bool isLegalICmpImmediate(int64_t Val) const;
+  SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                         SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const;
+
+  virtual MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  MachineBasicBlock *
+  emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
+                   unsigned Size, unsigned Opcode) const;
+
+  MachineBasicBlock *
+  emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
+                         unsigned Size, unsigned CmpOp,
+                         A64CC::CondCodes Cond) const;
+  MachineBasicBlock *
+  emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
+                    unsigned Size) const;
+
+  MachineBasicBlock *
+  EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+                          RTLIB::Libcall Call) const;
+  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+  SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
+                           SelectionDAG &DAG) const;
+  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+  /// is expanded to mul + add.
+  virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+  ConstraintType getConstraintType(const std::string &Constraint) const;
+
+  ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+                                                  const char *Constraint) const;
+  void LowerAsmOperandForConstraint(SDValue Op,
+                                    std::string &Constraint,
+                                    std::vector<SDValue> &Ops,
+                                    SelectionDAG &DAG) const;
+
+  std::pair<unsigned, const TargetRegisterClass*>
+  getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+private:
+  const AArch64Subtarget *Subtarget;
+  const TargetRegisterInfo *RegInfo;
+  const InstrItineraryData *Itins;
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
new file mode 100644
index 000000000000..cb93471058df
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -0,0 +1,961 @@
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file describes AArch64 instruction formats, down to the level of the
+// instruction's overall class.
+// ===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// A64 Instruction Format Definitions.
+//===----------------------------------------------------------------------===//
+
+// A64 is currently the only instruction set supported by the AArch64
+// architecture.
+class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
+              InstrItinClass itin>
+    : Instruction {
+  // All A64 instructions are 32-bit. This field will be filled in
+  // gradually going down the hierarchy.
+  field bits<32> Inst;
+
+  field bits<32> Unpredictable = 0;
+  // SoftFail is the generic name for this field, but we alias it so
+  // as to make it more obvious what it means in ARM-land.
+  field bits<32> SoftFail = Unpredictable;
+
+  // LLVM-level model of the AArch64/A64 distinction.
+  let Namespace = "AArch64";
+  let DecoderNamespace = "A64";
+  let Size = 4;
+
+  // Set the templated fields
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString = asmstr;
+  let Pattern = patterns;
+  let Itinerary = itin;
+}
+
+class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
+  let Namespace = "AArch64";
+
+  let OutOperandList = outs;
+  let InOperandList= ins;
+  let Pattern = patterns;
+  let isCodeGenOnly = 1;
+  let isPseudo = 1;
+}
+
+// Represents a pseudo-instruction that represents a single A64 instruction for
+// whatever reason, the eventual result will be a 32-bit real instruction.
+class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
+  : PseudoInst<outs, ins, patterns> {
+  let Size = 4;
+}
+
+// As above, this will be a single A64 instruction, but we can actually give the
+// expansion in TableGen.
+class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
+  : A64PseudoInst<outs, ins, patterns>,
+    PseudoInstExpansion<Result>;
+
+
+// First, some common cross-hierarchy register formats.
+
+class A64InstRd<dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rd;
+
+  let Inst{4-0} = Rd;
+}
+
+class A64InstRt<dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rt;
+
+  let Inst{4-0} = Rt;
+}
+
+
+class A64InstRdn<dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+    : A64InstRd<outs, ins, asmstr, patterns, itin> {
+  // Inherit rdt
+  bits<5> Rn;
+
+  let Inst{9-5} = Rn;
+}
+
+class A64InstRtn<dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+    : A64InstRt<outs, ins, asmstr, patterns, itin> {
+  // Inherit rdt
+  bits<5> Rn;
+
+  let Inst{9-5} = Rn;
+}
+
+// Instructions taking Rt,Rt2,Rn
+class A64InstRtt2n<dag outs, dag ins, string asmstr,
+                   list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rt2;
+
+  let Inst{14-10} = Rt2;
+}
+
+class A64InstRdnm<dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rm;
+
+  let Inst{20-16} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Actual A64 Instruction Formats
+//
+
+// Format for Add-subtract (extended register) instructions.
+class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
+                     dag outs, dag ins, string asmstr, list<dag> patterns,
+                     InstrItinClass itin>
+    : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+    bits<3> Imm3;
+
+    let Inst{31} = sf;
+    let Inst{30} = op;
+    let Inst{29} = S;
+    let Inst{28-24} = 0b01011;
+    let Inst{23-22} = opt;
+    let Inst{21} = 0b1;
+    // Rm inherited in 20-16
+    let Inst{15-13} = option;
+    let Inst{12-10} = Imm3;
+    // Rn inherited in 9-5
+    // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (immediate) instructions.
+class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  bits<12> Imm12;
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = S;
+  let Inst{28-24} = 0b10001;
+  let Inst{23-22} = shift;
+  let Inst{21-10} = Imm12;
+}
+
+// Format for Add-subtract (shifted register) instructions.
+class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
+                       dag outs, dag ins, string asmstr, list<dag> patterns,
+                       InstrItinClass itin>
+    : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+    bits<6> Imm6;
+
+    let Inst{31} = sf;
+    let Inst{30} = op;
+    let Inst{29} = S;
+    let Inst{28-24} = 0b01011;
+    let Inst{23-22} = shift;
+    let Inst{21} = 0b0;
+    // Rm inherited in 20-16
+    let Inst{15-10} = Imm6;
+    // Rn inherited in 9-5
+    // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (with carry) instructions.
+class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
+                       dag outs, dag ins, string asmstr, list<dag> patterns,
+                       InstrItinClass itin>
+    : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+    let Inst{31} = sf;
+    let Inst{30} = op;
+    let Inst{29} = S;
+    let Inst{28-21} = 0b11010000;
+    // Rm inherited in 20-16
+    let Inst{15-10} = opcode2;
+    // Rn inherited in 9-5
+    // Rd inherited in 4-0
+}
+
+
+// Format for Bitfield instructions
+class A64I_bitfield<bit sf, bits<2> opc, bit n,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  bits<6> ImmR;
+  bits<6> ImmS;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-23} = 0b100110;
+  let Inst{22} = n;
+  let Inst{21-16} = ImmR;
+  let Inst{15-10} = ImmS;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for compare and branch (immediate) instructions.
+class A64I_cmpbr<bit sf, bit op,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64InstRt<outs, ins, asmstr, patterns, itin> {
+  bits<19> Label;
+
+  let Inst{31} = sf;
+  let Inst{30-25} = 0b011010;
+  let Inst{24} = op;
+  let Inst{23-5} = Label;
+  // Inherit Rt in 4-0
+}
+
+// Format for conditional branch (immediate) instructions.
+class A64I_condbr<bit o1, bit o0,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<19> Label;
+  bits<4> Cond;
+
+  let Inst{31-25} = 0b0101010;
+  let Inst{24} = o1;
+  let Inst{23-5} = Label;
+  let Inst{4} = o0;
+  let Inst{3-0} = Cond;
+}
+
+// Format for conditional compare (immediate) instructions.
+class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rn;
+  bits<5> UImm5;
+  bits<4> NZCVImm;
+  bits<4> Cond;
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = s;
+  let Inst{28-21} = 0b11010010;
+  let Inst{20-16} = UImm5;
+  let Inst{15-12} = Cond;
+  let Inst{11} = 0b1;
+  let Inst{10} = o2;
+  let Inst{9-5} = Rn;
+  let Inst{4} = o3;
+  let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional compare (register) instructions.
+class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<4> NZCVImm;
+  bits<4> Cond;
+
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = s;
+  let Inst{28-21} = 0b11010010;
+  let Inst{20-16} = Rm;
+  let Inst{15-12} = Cond;
+  let Inst{11} = 0b0;
+  let Inst{10} = o2;
+  let Inst{9-5} = Rn;
+  let Inst{4} = o3;
+  let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional select instructions.
+class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
+                   dag outs, dag ins, string asmstr,
+                   list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  bits<4> Cond;
+
+  let Inst{31} = sf;
+  let Inst{30} = op;
+  let Inst{29} = s;
+  let Inst{28-21} = 0b11010100;
+  // Inherit Rm in 20-16
+  let Inst{15-12} = Cond;
+  let Inst{11-10} = op2;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for data processing (1 source) instructions
+class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
+                string asmstr, dag outs, dag ins,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  let Inst{31} = sf;
+  let Inst{30} = 0b1;
+  let Inst{29} = S;
+  let Inst{28-21} = 0b11010110;
+  let Inst{20-16} = opcode2;
+  let Inst{15-10} = opcode;
+}
+
+// Format for data processing (2 source) instructions
+class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
+                string asmstr, dag outs, dag ins,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  let Inst{31} = sf;
+  let Inst{30} = 0b0;
+  let Inst{29} = S;
+  let Inst{28-21} = 0b11010110;
+  let Inst{15-10} = opcode;
+}
+
+// Format for data-processing (3 source) instructions
+
+class A64I_dp3<bit sf, bits<6> opcode,
+               dag outs, dag ins, string asmstr,
+               list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  bits<5> Ra;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opcode{5-4};
+  let Inst{28-24} = 0b11011;
+  let Inst{23-21} = opcode{3-1};
+  // Inherits Rm in 20-16
+  let Inst{15} = opcode{0};
+  let Inst{14-10} = Ra;
+  // Inherits Rn in 9-5
+  // Inherits Rd in 4-0
+}
+
+// Format for exception generation instructions
+class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<16> UImm16;
+
+  let Inst{31-24} = 0b11010100;
+  let Inst{23-21} = opc;
+  let Inst{20-5} = UImm16;
+  let Inst{4-2} = op2;
+  let Inst{1-0} = ll;
+}
+
+// Format for extract (immediate) instructions
+class A64I_extract<bit sf, bits<3> op, bit n,
+                   dag outs, dag ins, string asmstr,
+                   list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  bits<6> LSB;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = op{2-1};
+  let Inst{28-23} = 0b100111;
+  let Inst{22} = n;
+  let Inst{21} = op{0};
+  // Inherits Rm in bits 20-16
+  let Inst{15-10} = LSB;
+  // Inherits Rn in 9-5
+  // Inherits Rd in 4-0
+}
+
+// Format for floating-point compare instructions.
+class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rn;
+  bits<5> Rm;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-14} = op;
+  let Inst{13-10} = 0b1000;
+  let Inst{9-5} = Rn;
+  let Inst{4-0} = opcode2;
+}
+
+// Format for floating-point conditional compare instructions.
+class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rn;
+  bits<5> Rm;
+  bits<4> NZCVImm;
+  bits<4> Cond;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-12} = Cond;
+  let Inst{11-10} = 0b01;
+  let Inst{9-5} = Rn;
+  let Inst{4} = op;
+  let Inst{3-0} = NZCVImm;
+}
+
+// Format for floating-point conditional select instructions.
+class A64I_fpcondsel<bit m, bit s, bits<2> type,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  bits<4> Cond;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  // Inherit Rm in 20-16
+  let Inst{15-12} = Cond;
+  let Inst{11-10} = 0b11;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point data-processing (1 source) instructions.
+class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-15} = opcode;
+  let Inst{14-10} = 0b10000;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (2 sources) instructions.
+class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  // Inherit Rm in 20-16
+  let Inst{15-12} = opcode;
+  let Inst{11-10} = 0b10;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (3 sources) instructions.
+class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  bits<5> Ra;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11111;
+  let Inst{23-22} = type;
+  let Inst{21} = o1;
+  // Inherit Rm in 20-16
+  let Inst{15} = o0;
+  let Inst{14-10} = Ra;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> fixed-point conversion instructions.
+class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  bits<6> Scale;
+
+  let Inst{31} = sf;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b0;
+  let Inst{20-19} = mode;
+  let Inst{18-16} = opcode;
+  let Inst{15-10} = Scale;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> integer conversion instructions.
+class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  let Inst{31} = sf;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = rmode;
+  let Inst{18-16} = opcode;
+  let Inst{15-10} = 0b000000;
+  // Inherit Rn in 9-5
+  // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point immediate instructions.
+class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRd<outs, ins, asmstr, patterns, itin> {
+  bits<8> Imm8;
+
+  let Inst{31} = m;
+  let Inst{30} = 0b0;
+  let Inst{29} = s;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = type;
+  let Inst{21} = 0b1;
+  let Inst{20-13} = Imm8;
+  let Inst{12-10} = 0b100;
+  let Inst{9-5} = imm5;
+  // Inherit Rd in 4-0
+}
+
+// Format for load-register (literal) instructions.
+class A64I_LDRlit<bits<2> opc, bit v,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64InstRt<outs, ins, asmstr, patterns, itin> {
+  bits<19> Imm19;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b011;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-5} = Imm19;
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store exclusive instructions.
+class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                 dag outs, dag ins, string asmstr,
+                 list <dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  let Inst{31-30} = size;
+  let Inst{29-24} = 0b001000;
+  let Inst{23} = o2;
+  let Inst{22} = L;
+  let Inst{21} = o1;
+  let Inst{15} = o0;
+}
+
+class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                     dag outs, dag ins, string asmstr,
+                     list <dag> patterns, InstrItinClass itin>:
+      A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+   bits<5> Rt2;
+   let Inst{14-10} = Rt2;
+}
+
+class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                     dag outs, dag ins, string asmstr,
+                     list <dag> patterns, InstrItinClass itin>:
+      A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+   bits<5> Rs;
+   let Inst{20-16} = Rs;
+}
+
+class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+                     dag outs, dag ins, string asmstr,
+                     list <dag> patterns, InstrItinClass itin>:
+      A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+   bits<5> Rt2;
+   let Inst{14-10} = Rt2;
+}
+
+// Format for load-store register (immediate post-indexed) instructions
+class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<9> SImm9;
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b01;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store register (immediate pre-indexed) instructions
+class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<9> SImm9;
+
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b11;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store register (unprivileged) instructions
+class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<9> SImm9;
+
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b10;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for load-store (unscaled immediate) instructions.
+class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
+                     dag outs, dag ins, string asmstr,
+                     list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<9> SImm9;
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b0;
+  let Inst{20-12} = SImm9;
+  let Inst{11-10} = 0b00;
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+
+// Format for load-store (unsigned immediate) instructions.
+class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<12> UImm12;
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b01;
+  let Inst{23-22} = opc;
+  let Inst{21-10} = UImm12;
+}
+
+// Format for load-store register (register offset) instructions.
+class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
+                    dag outs, dag ins, string asmstr,
+                    list<dag> patterns, InstrItinClass itin>
+  : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+  bits<5> Rm;
+
+  // Complex operand selection needed for these instructions, so they
+  // need an "addr" field for encoding/decoding to be generated.
+  bits<3> Ext;
+  // OptionHi = Ext{2-1}
+  // S = Ext{0}
+
+  let Inst{31-30} = size;
+  let Inst{29-27} = 0b111;
+  let Inst{26} = v;
+  let Inst{25-24} = 0b00;
+  let Inst{23-22} = opc;
+  let Inst{21} = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-14} = Ext{2-1};
+  let Inst{13} = optionlo;
+  let Inst{12} = Ext{0};
+  let Inst{11-10} = 0b10;
+  // Inherits Rn in 9-5
+  // Inherits Rt in 4-0
+
+  let AddedComplexity = 50;
+}
+
+// Format for Load-store register pair (offset) instructions
+class A64I_LSPoffset<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b010;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (post-indexed) instructions
+class A64I_LSPpostind<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b001;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (pre-indexed) instructions
+class A64I_LSPpreind<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b011;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Load-store non-temporal register pair (offset) instructions
+class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+  bits<7> SImm7;
+
+  let Inst{31-30} = opc;
+  let Inst{29-27} = 0b101;
+  let Inst{26} = v;
+  let Inst{25-23} = 0b000;
+  let Inst{22} = l;
+  let Inst{21-15} = SImm7;
+  // Inherit Rt2 in 14-10
+  // Inherit Rn in 9-5
+  // Inherit Rt in 4-0
+}
+
+// Format for Logical (immediate) instructions
+class A64I_logicalimm<bit sf, bits<2> opc,
+                      dag outs, dag ins, string asmstr,
+                      list<dag> patterns, InstrItinClass itin>
+  : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+  bit N;
+  bits<6> ImmR;
+  bits<6> ImmS;
+
+  // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
+  // selection), so we'll combine them into a single field here.
+  bits<13> Imm;
+  // N = Imm{12};
+  // ImmR = Imm{11-6};
+  // ImmS = Imm{5-0};
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-23} = 0b100100;
+  let Inst{22} = Imm{12};
+  let Inst{21-16} = Imm{11-6};
+  let Inst{15-10} = Imm{5-0};
+  // Rn inherited in 9-5
+  // Rd inherited in 4-0
+}
+
+// Format for Logical (shifted register) instructions
+class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
+                        dag outs, dag ins, string asmstr,
+                        list<dag> patterns, InstrItinClass itin>
+  : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+  bits<6> Imm6;
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-24} = 0b01010;
+  let Inst{23-22} = shift;
+  let Inst{21} = N;
+  // Rm inherited
+  let Inst{15-10} = Imm6;
+  // Rn inherited
+  // Rd inherited
+}
+
+// Format for Move wide (immediate)
+class A64I_movw<bit sf, bits<2> opc,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRd<outs, ins, asmstr, patterns, itin> {
+  bits<16> UImm16;
+  bits<2> Shift; // Called "hw" officially
+
+  let Inst{31} = sf;
+  let Inst{30-29} = opc;
+  let Inst{28-23} = 0b100101;
+  let Inst{22-21} = Shift;
+  let Inst{20-5} = UImm16;
+  // Inherits Rd in 4-0
+}
+
+// Format for PC-relative addressing instructions, ADR and ADRP.
+class A64I_PCADR<bit op,
+                 dag outs, dag ins, string asmstr,
+                 list<dag> patterns, InstrItinClass itin>
+  : A64InstRd<outs, ins, asmstr, patterns, itin> {
+  bits<21> Label;
+
+  let Inst{31} = op;
+  let Inst{30-29} = Label{1-0};
+  let Inst{28-24} = 0b10000;
+  let Inst{23-5} = Label{20-2};
+}
+
+// Format for system instructions
+class A64I_system<bit l,
+                  dag outs, dag ins, string asmstr,
+                  list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  bits<2> Op0;
+  bits<3> Op1;
+  bits<4> CRn;
+  bits<4> CRm;
+  bits<3> Op2;
+  bits<5> Rt;
+
+  let Inst{31-22} = 0b1101010100;
+  let Inst{21} = l;
+  let Inst{20-19} = Op0;
+  let Inst{18-16} = Op1;
+  let Inst{15-12} = CRn;
+  let Inst{11-8} = CRm;
+  let Inst{7-5} = Op2;
+  let Inst{4-0} = Rt;
+
+  // These instructions can do horrible things.
+  let hasSideEffects = 1;
+}
+
+// Format for unconditional branch (immediate) instructions
+class A64I_Bimm<bit op,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  // Doubly special in not even sharing register fields with other
+  // instructions, so we create our own Rn here.
+  bits<26> Label;
+
+  let Inst{31} = op;
+  let Inst{30-26} = 0b00101;
+  let Inst{25-0} = Label;
+}
+
+// Format for Test & branch (immediate) instructions
+class A64I_TBimm<bit op,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64InstRt<outs, ins, asmstr, patterns, itin> {
+  // Doubly special in not even sharing register fields with other
+  // instructions, so we create our own Rn here.
+  bits<6> Imm;
+  bits<14> Label;
+
+  let Inst{31} = Imm{5};
+  let Inst{30-25} = 0b011011;
+  let Inst{24} = op;
+  let Inst{23-19} = Imm{4-0};
+  let Inst{18-5} = Label;
+  // Inherit Rt in 4-0
+}
+
+// Format for Unconditional branch (register) instructions, including
+// RET.  Shares no fields with instructions further up the hierarchy
+// so top-level.
+class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
+                dag outs, dag ins, string asmstr,
+                list<dag> patterns, InstrItinClass itin>
+  : A64Inst<outs, ins, asmstr, patterns, itin> {
+  // Doubly special in not even sharing register fields with other
+  // instructions, so we create our own Rn here.
+  bits<5> Rn;
+
+  let Inst{31-25} = 0b1101011;
+  let Inst{24-21} = opc;
+  let Inst{20-16} = op2;
+  let Inst{15-10} = op3;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = op4;
+}
+
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
new file mode 100644
index 000000000000..cf3a2c3707d9
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -0,0 +1,822 @@
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <algorithm>
+
+#define GET_INSTRINFO_CTOR
+#include "AArch64GenInstrInfo.inc"
+
+using namespace llvm;
+
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+  : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+    RI(*this, STI), Subtarget(STI) {}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  unsigned Opc = 0;
+  unsigned ZeroReg = 0;
+  if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
+    // E.g. ADD xDst, xsp, #0 (, lsl #0)
+    BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
+      .addReg(SrcReg)
+      .addImm(0);
+    return;
+  } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+    // E.g. ADD wDST, wsp, #0 (, lsl #0)
+    BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
+      .addReg(SrcReg)
+      .addImm(0);
+    return;
+  } else if (DestReg == AArch64::NZCV) {
+    assert(AArch64::GPR64RegClass.contains(SrcReg));
+    // E.g. MSR NZCV, xDST
+    BuildMI(MBB, I, DL, get(AArch64::MSRix))
+      .addImm(A64SysReg::NZCV)
+      .addReg(SrcReg);
+  } else if (SrcReg == AArch64::NZCV) {
+    assert(AArch64::GPR64RegClass.contains(DestReg));
+    // E.g. MRS xDST, NZCV
+    BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
+      .addImm(A64SysReg::NZCV);
+  } else if (AArch64::GPR64RegClass.contains(DestReg)) {
+    assert(AArch64::GPR64RegClass.contains(SrcReg));
+    Opc = AArch64::ORRxxx_lsl;
+    ZeroReg = AArch64::XZR;
+  } else if (AArch64::GPR32RegClass.contains(DestReg)) {
+    assert(AArch64::GPR32RegClass.contains(SrcReg));
+    Opc = AArch64::ORRwww_lsl;
+    ZeroReg = AArch64::WZR;
+  } else if (AArch64::FPR32RegClass.contains(DestReg)) {
+    assert(AArch64::FPR32RegClass.contains(SrcReg));
+    BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+      .addReg(SrcReg);
+    return;
+  } else if (AArch64::FPR64RegClass.contains(DestReg)) {
+    assert(AArch64::FPR64RegClass.contains(SrcReg));
+    BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+      .addReg(SrcReg);
+    return;
+  } else if (AArch64::FPR128RegClass.contains(DestReg)) {
+    assert(AArch64::FPR128RegClass.contains(SrcReg));
+
+    // FIXME: there's no good way to do this, at least without NEON:
+    //   + There's no single move instruction for q-registers
+    //   + We can't create a spill slot and use normal STR/LDR because stack
+    //     allocation has already happened
+    //   + We can't go via X-registers with FMOV because register allocation has
+    //     already happened.
+    // This may not be efficient, but at least it works.
+    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+      .addReg(SrcReg)
+      .addReg(AArch64::XSP)
+      .addImm(0x1ff & -16);
+
+    BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+      .addReg(AArch64::XSP, RegState::Define)
+      .addReg(AArch64::XSP)
+      .addImm(16);
+    return;
+  } else {
+    llvm_unreachable("Unknown register class in copyPhysReg");
+  }
+
+  // E.g. ORR xDst, xzr, xSrc, lsl #0
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(ZeroReg)
+    .addReg(SrcReg)
+    .addImm(0);
+}
+
+MachineInstr *
+AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                           uint64_t Offset, const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0)
+    .addImm(Offset)
+    .addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// Does the Opcode represent a conditional branch that we can remove and re-add
+/// at the end of a basic block?
+static bool isCondBranch(unsigned Opc) {
+  return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
+         Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
+         Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
+         Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
+}
+
+/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
+/// setting TBB to the destination basic block and populating the Cond vector
+/// with data necessary to recreate the conditional branch at a later
+/// date. First element will be the opcode, and subsequent ones define the
+/// conditions being branched on in an instruction-specific manner.
+static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
+                               SmallVectorImpl<MachineOperand> &Cond) {
+  switch(I->getOpcode()) {
+  case AArch64::Bcc:
+  case AArch64::CBZw:
+  case AArch64::CBZx:
+  case AArch64::CBNZw:
+  case AArch64::CBNZx:
+    // These instructions just have one predicate operand in position 0 (either
+    // a condition code or a register being compared).
+    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+    Cond.push_back(I->getOperand(0));
+    TBB = I->getOperand(1).getMBB();
+    return;
+  case AArch64::TBZwii:
+  case AArch64::TBZxii:
+  case AArch64::TBNZwii:
+  case AArch64::TBNZxii:
+    // These have two predicate operands: a register and a bit position.
+    Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+    Cond.push_back(I->getOperand(0));
+    Cond.push_back(I->getOperand(1));
+    TBB = I->getOperand(2).getMBB();
+    return;
+  default:
+    llvm_unreachable("Unknown conditional branch to classify");
+  }
+}
+
+
+bool
+AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastOpc == AArch64::Bimm) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranch(LastOpc)) {
+      classifyCondBranch(LastInst, TBB, Cond);
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && LastOpc == AArch64::Bimm) {
+    while (SecondLastOpc == AArch64::Bimm) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (LastOpc == AArch64::Bimm) {
+    if (SecondLastOpc == AArch64::Bcc) {
+      TBB =  SecondLastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
+      Cond.push_back(SecondLastInst->getOperand(0));
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (isCondBranch(SecondLastOpc)) {
+      classifyCondBranch(SecondLastInst, TBB, Cond);
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+bool AArch64InstrInfo::ReverseBranchCondition(
+                                  SmallVectorImpl<MachineOperand> &Cond) const {
+  switch (Cond[0].getImm()) {
+  case AArch64::Bcc: {
+    A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
+    CC = A64InvertCondCode(CC);
+    Cond[1].setImm(CC);
+    return false;
+  }
+  case AArch64::CBZw:
+    Cond[0].setImm(AArch64::CBNZw);
+    return false;
+  case AArch64::CBZx:
+    Cond[0].setImm(AArch64::CBNZx);
+    return false;
+  case AArch64::CBNZw:
+    Cond[0].setImm(AArch64::CBZw);
+    return false;
+  case AArch64::CBNZx:
+    Cond[0].setImm(AArch64::CBZx);
+    return false;
+  case AArch64::TBZwii:
+    Cond[0].setImm(AArch64::TBNZwii);
+    return false;
+  case AArch64::TBZxii:
+    Cond[0].setImm(AArch64::TBNZxii);
+    return false;
+  case AArch64::TBNZwii:
+    Cond[0].setImm(AArch64::TBZwii);
+    return false;
+  case AArch64::TBNZxii:
+    Cond[0].setImm(AArch64::TBZxii);
+    return false;
+  default:
+    llvm_unreachable("Unknown branch type");
+  }
+}
+
+
+unsigned
+AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  if (FBB == 0 && Cond.empty()) {
+    BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
+    return 1;
+  } else if (FBB == 0) {
+    MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+    for (int i = 1, e = Cond.size(); i != e; ++i)
+      MIB.addOperand(Cond[i]);
+    MIB.addMBB(TBB);
+    return 1;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+  for (int i = 1, e = Cond.size(); i != e; ++i)
+    MIB.addOperand(Cond[i]);
+  MIB.addMBB(TBB);
+
+  BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
+  return 2;
+}
+
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!isCondBranch(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  case AArch64::TLSDESC_BLRx: {
+    MachineInstr *NewMI =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
+        .addOperand(MI.getOperand(1));
+    MI.setDesc(get(AArch64::BLRx));
+
+    llvm::finalizeBundle(MBB, NewMI, *++MBBI);
+    return true;
+    }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+void
+AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned SrcReg, bool isKill,
+                                      int FrameIdx,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+  MachineMemOperand *MMO
+    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FrameIdx),
+                              Align);
+
+  unsigned StoreOp = 0;
+  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+    switch(RC->getSize()) {
+    case 4: StoreOp = AArch64::LS32_STR; break;
+    case 8: StoreOp = AArch64::LS64_STR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  } else {
+    assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+            RC->hasType(MVT::f128))
+           && "Expected integer or floating type for store");
+    switch (RC->getSize()) {
+    case 4: StoreOp = AArch64::LSFP32_STR; break;
+    case 8: StoreOp = AArch64::LSFP64_STR; break;
+    case 16: StoreOp = AArch64::LSFP128_STR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  }
+
+  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+  NewMI.addReg(SrcReg, getKillRegState(isKill))
+    .addFrameIndex(FrameIdx)
+    .addImm(0)
+    .addMemOperand(MMO);
+
+}
+
+void
+AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       unsigned DestReg, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+  MachineMemOperand *MMO
+    = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FrameIdx),
+                              Align);
+
+  unsigned LoadOp = 0;
+  if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+    switch(RC->getSize()) {
+    case 4: LoadOp = AArch64::LS32_LDR; break;
+    case 8: LoadOp = AArch64::LS64_LDR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  } else {
+    assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
+            || RC->hasType(MVT::f128))
+           && "Expected integer or floating type for store");
+    switch (RC->getSize()) {
+    case 4: LoadOp = AArch64::LSFP32_LDR; break;
+    case 8: LoadOp = AArch64::LSFP64_LDR; break;
+    case 16: LoadOp = AArch64::LSFP128_LDR; break;
+    default:
+      llvm_unreachable("Unknown size for regclass");
+    }
+  }
+
+  MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+  NewMI.addFrameIndex(FrameIdx)
+       .addImm(0)
+       .addMemOperand(MMO);
+}
+
+unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
+  unsigned Limit = (1 << 16) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
+        // is the largest offset guaranteed to fit in the immediate offset.
+        if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
+          Limit = std::min(Limit, 0xfffu);
+          break;
+        }
+
+        int AccessScale, MinOffset, MaxOffset;
+        getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
+        Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
+                                             int &AccessScale, int &MinOffset,
+                                             int &MaxOffset) const {
+  switch (MI.getOpcode()) {
+  default: llvm_unreachable("Unkown load/store kind");
+  case TargetOpcode::DBG_VALUE:
+    AccessScale = 1;
+    MinOffset = INT_MIN;
+    MaxOffset = INT_MAX;
+    return;
+  case AArch64::LS8_LDR: case AArch64::LS8_STR:
+  case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
+  case AArch64::LDRSBw:
+  case AArch64::LDRSBx:
+    AccessScale = 1;
+    MinOffset = 0;
+    MaxOffset = 0xfff;
+    return;
+  case AArch64::LS16_LDR: case AArch64::LS16_STR:
+  case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
+  case AArch64::LDRSHw:
+  case AArch64::LDRSHx:
+    AccessScale = 2;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LS32_LDR:  case AArch64::LS32_STR:
+  case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
+  case AArch64::LDRSWx:
+  case AArch64::LDPSWx:
+    AccessScale = 4;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LS64_LDR: case AArch64::LS64_STR:
+  case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
+  case AArch64::PRFM:
+    AccessScale = 8;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
+    AccessScale = 16;
+    MinOffset = 0;
+    MaxOffset = 0xfff * AccessScale;
+    return;
+  case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
+  case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
+    AccessScale = 4;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
+  case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
+    AccessScale = 8;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
+    AccessScale = 16;
+    MinOffset = -0x40 * AccessScale;
+    MaxOffset = 0x3f * AccessScale;
+    return;
+  }
+}
+
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  const MCInstrDesc &MCID = MI.getDesc();
+  const MachineBasicBlock &MBB = *MI.getParent();
+  const MachineFunction &MF = *MBB.getParent();
+  const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+
+  if (MCID.getSize())
+    return MCID.getSize();
+
+  if (MI.getOpcode() == AArch64::INLINEASM)
+    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+
+  if (MI.isLabel())
+    return 0;
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::BUNDLE:
+    return getInstBundleLength(MI);
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::DBG_VALUE:
+    return 0;
+  case AArch64::TLSDESCCALL:
+    return 0;
+  default:
+    llvm_unreachable("Unknown instruction class");
+  }
+}
+
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+  unsigned Size = 0;
+  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+  while (++I != E && I->isInsideBundle()) {
+    assert(!I->isBundle() && "No nested bundle!");
+    Size += getInstSizeInBytes(*I);
+  }
+  return Size;
+}
+
+bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                unsigned FrameReg, int &Offset,
+                                const AArch64InstrInfo &TII) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MFI.getObjectOffset(FrameRegIdx);
+  llvm_unreachable("Unimplemented rewriteFrameIndex");
+}
+
+void llvm::emitRegUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         DebugLoc dl, const TargetInstrInfo &TII,
+                         unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+                         int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
+  if (NumBytes == 0 && DstReg == SrcReg)
+    return;
+  else if (abs64(NumBytes) & ~0xffffff) {
+    // Generically, we have to materialize the offset into a temporary register
+    // and subtract it. There are a couple of ways this could be done, for now
+    // we'll use a movz/movk or movn/movk sequence.
+    uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
+    BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
+      .addImm(0xffff & Bits).addImm(0)
+      .setMIFlags(MIFlags);
+
+    Bits >>= 16;
+    if (Bits & 0xffff) {
+      BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(0xffff & Bits).addImm(1)
+        .setMIFlags(MIFlags);
+    }
+
+    Bits >>= 16;
+    if (Bits & 0xffff) {
+      BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(0xffff & Bits).addImm(2)
+        .setMIFlags(MIFlags);
+    }
+
+    Bits >>= 16;
+    if (Bits & 0xffff) {
+      BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+        .addReg(ScratchReg)
+        .addImm(0xffff & Bits).addImm(3)
+        .setMIFlags(MIFlags);
+    }
+
+    // ADD DST, SRC, xTMP (, lsl #0)
+    unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
+    BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill)
+      .addImm(0)
+      .setMIFlag(MIFlags);
+    return;
+  }
+
+  // Now we know that the adjustment can be done in at most two add/sub
+  // (immediate) instructions, which is always more efficient than a
+  // literal-pool load, or even a hypothetical movz/movk/add sequence
+
+  // Decide whether we're doing addition or subtraction
+  unsigned LowOp, HighOp;
+  if (NumBytes >= 0) {
+    LowOp = AArch64::ADDxxi_lsl0_s;
+    HighOp = AArch64::ADDxxi_lsl12_s;
+  } else {
+    LowOp = AArch64::SUBxxi_lsl0_s;
+    HighOp = AArch64::SUBxxi_lsl12_s;
+    NumBytes = abs64(NumBytes);
+  }
+
+  // If we're here, at the very least a move needs to be produced, which just
+  // happens to be materializable by an ADD.
+  if ((NumBytes & 0xfff) || NumBytes == 0) {
+    BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addImm(NumBytes & 0xfff)
+      .setMIFlag(MIFlags);
+
+    // Next update should use the register we've just defined.
+    SrcReg = DstReg;
+  }
+
+  if (NumBytes & 0xfff000) {
+    BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
+      .addReg(SrcReg, RegState::Kill)
+      .addImm(NumBytes >> 12)
+      .setMIFlag(MIFlags);
+  }
+}
+
+void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                        DebugLoc dl, const TargetInstrInfo &TII,
+                        unsigned ScratchReg, int64_t NumBytes,
+                        MachineInstr::MIFlag MIFlags) {
+  emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
+                NumBytes, MIFlags);
+}
+
+
+namespace {
+  struct LDTLSCleanup : public MachineFunctionPass {
+    static char ID;
+    LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      AArch64MachineFunctionInfo* MFI
+        = MF.getInfo<AArch64MachineFunctionInfo>();
+      if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+        // No point folding accesses if there isn't at least two.
+        return false;
+      }
+
+      MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+      return VisitNode(DT->getRootNode(), 0);
+    }
+
+    // Visit the dominator subtree rooted at Node in pre-order.
+    // If TLSBaseAddrReg is non-null, then use that to replace any
+    // TLS_base_addr instructions. Otherwise, create the register
+    // when the first such instruction is seen, and then use it
+    // as we encounter more instructions.
+    bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+      MachineBasicBlock *BB = Node->getBlock();
+      bool Changed = false;
+
+      // Traverse the current block.
+      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+           ++I) {
+        switch (I->getOpcode()) {
+        case AArch64::TLSDESC_BLRx:
+          // Make sure it's a local dynamic access.
+          if (!I->getOperand(1).isSymbol() ||
+              strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+            break;
+
+          if (TLSBaseAddrReg)
+            I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+          else
+            I = SetRegister(I, &TLSBaseAddrReg);
+          Changed = true;
+          break;
+        default:
+          break;
+        }
+      }
+
+      // Visit the children of this block in the dominator tree.
+      for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+           I != E; ++I) {
+        Changed |= VisitNode(*I, TLSBaseAddrReg);
+      }
+
+      return Changed;
+    }
+
+    // Replace the TLS_base_addr instruction I with a copy from
+    // TLSBaseAddrReg, returning the new instruction.
+    MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+                                         unsigned TLSBaseAddrReg) {
+      MachineFunction *MF = I->getParent()->getParent();
+      const AArch64TargetMachine *TM =
+          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+      const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+      // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
+      // code sequence assumes the address will be.
+      MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY),
+                                   AArch64::X0)
+        .addReg(TLSBaseAddrReg);
+
+      // Erase the TLS_base_addr instruction.
+      I->eraseFromParent();
+
+      return Copy;
+    }
+
+    // Create a virtal register in *TLSBaseAddrReg, and populate it by
+    // inserting a copy instruction after I. Returns the new instruction.
+    MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+      MachineFunction *MF = I->getParent()->getParent();
+      const AArch64TargetMachine *TM =
+          static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+      const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+      // Create a virtual register for the TLS base address.
+      MachineRegisterInfo &RegInfo = MF->getRegInfo();
+      *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+
+      // Insert a copy from X0 to TLSBaseAddrReg for later.
+      MachineInstr *Next = I->getNextNode();
+      MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY),
+                                   *TLSBaseAddrReg)
+        .addReg(AArch64::X0);
+
+      return Copy;
+    }
+
+    virtual const char *getPassName() const {
+      return "Local Dynamic TLS Access Clean-up";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
new file mode 100644
index 000000000000..22a2ab4cf60a
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -0,0 +1,112 @@
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
+#define LLVM_TARGET_AARCH64INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AArch64RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AArch64GenInstrInfo.inc"
+
+namespace llvm {
+
+class AArch64Subtarget;
+
+class AArch64InstrInfo : public AArch64GenInstrInfo {
+  const AArch64RegisterInfo RI;
+  const AArch64Subtarget &Subtarget;
+public:
+  explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  const AArch64Subtarget &getSubTarget() const { return Subtarget; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                         uint64_t Offset, const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIdx,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify = false) const;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond,
+                        DebugLoc DL) const;
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+  /// Look through the instructions in this function and work out the largest
+  /// the stack frame can be while maintaining the ability to address local
+  /// slots with no complexities.
+  unsigned estimateRSStackLimit(MachineFunction &MF) const;
+
+  /// getAddressConstraints - For loads and stores (and PRFMs) taking an
+  /// immediate offset, this function determines the constraints required for
+  /// the immediate. It must satisfy:
+  ///    + MinOffset <= imm <= MaxOffset
+  ///    + imm % OffsetScale == 0
+  void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
+                             int &MinOffset, int &MaxOffset) const;
+
+
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+
+  unsigned getInstBundleLength(const MachineInstr &MI) const;
+
+};
+
+bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                          unsigned FrameReg, int &Offset,
+                          const AArch64InstrInfo &TII);
+
+
+void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   DebugLoc dl, const TargetInstrInfo &TII,
+                   unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+                   int64_t NumBytes,
+                   MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                  DebugLoc dl, const TargetInstrInfo &TII,
+                  unsigned ScratchReg, int64_t NumBytes,
+                  MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
new file mode 100644
index 000000000000..37be5e4892e4
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -0,0 +1,5099 @@
+//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AArch64 scalar instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Target-specific ISD nodes and profiles
+//===----------------------------------------------------------------------===//
+
+def SDT_A64ret : SDTypeProfile<0, 0, []>;
+def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
+                                                    SDNPOptInGlue,
+                                                    SDNPVariadic]>;
+
+// (ins NZCV, Condition, Dest)
+def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
+def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
+
+// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
+def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
+                                            SDTCisSameAs<0, 2>,
+                                            SDTCisSameAs<2, 3>]>;
+def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
+
+// (outs NZCV), (ins LHS, RHS, Condition)
+def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+                                        SDTCisSameAs<1, 2>]>;
+def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
+
+
+// (outs GPR64), (ins)
+def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+// A64 compares don't care about the cond really (they set all flags) so a
+// simple binary operator is useful.
+def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
+                     (A64setcc node:$lhs, node:$rhs, cond)>;
+
+
+// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
+// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
+// and V flags can be set differently by this operation. It comes down to
+// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
+// then everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+
+// So, finally, the only LLVM-native comparisons that don't mention C and V are
+// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
+// absence of information about op2.
+def equality_cond : PatLeaf<(cond), [{
+  return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
+}]>;
+
+def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
+                     (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
+
+// There are two layers of indirection here, driven by the following
+// considerations.
+//     + TableGen does not know CodeModel or Reloc so that decision should be
+//       made for a variable/address at ISelLowering.
+//     + The output of ISelLowering should be selectable (hence the Wrapper,
+//       rather than a bare target opcode)
+def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<1, 2>,
+                                             SDTCisVT<3, i32>,
+                                             SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+
+
+def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
+                        [SDNPHasChain]>;
+
+
+// (A64BFI LHS, RHS, LSB, Width)
+def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                     SDTCisSameAs<1, 2>,
+                                     SDTCisVT<3, i64>,
+                                     SDTCisVT<4, i64>]>;
+
+def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
+
+// (A64EXTR HiReg, LoReg, LSB)
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                      SDTCisVT<3, i64>]>;
+def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+// (A64[SU]BFX Field, ImmR, ImmS).
+//
+// Note that ImmR and ImmS are already encoded for the actual instructions. The
+// more natural LSB and Width mix together to form ImmR and ImmS, something
+// which TableGen can't handle.
+def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
+
+def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+
+//===----------------------------------------------------------------------===//
+// Call sequence pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+
+def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
+                     [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
+                          [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
+// destination but needs a relocation against a fixed symbol. As such it has two
+// certain operands: the callee and the relocated variable.
+//
+// The TLS ABI only allows it to be selected to a BLR instructin (with
+// appropriate relocation).
+def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+
+def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
+                            [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+                             SDNPVariadic]>;
+
+
+def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
+                                  [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_AArch64CallSeqEnd   : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",   SDT_AArch64CallSeqEnd,
+                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+
+// These pseudo-instructions have special semantics by virtue of being passed to
+// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
+// LowerCall to (in our case) tell the back-end about stack adjustments for
+// arguments passed on the stack. Here we select those markers to
+// pseudo-instructions which explicitly set the stack, and finally in the
+// RegisterInfo we convert them to a true stack adjustment.
+let Defs = [XSP], Uses = [XSP] in {
+  def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
+                                    [(AArch64callseq_start timm:$amt)]>;
+
+  def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                                 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operation pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+multiclass AtomicSizes<string opname> {
+  def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+          [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>;
+  def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+         [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>;
+  def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+         [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>;
+  def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
+         [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>;
+}
+}
+
+defm ATOMIC_LOAD_ADD  : AtomicSizes<"atomic_load_add">;
+defm ATOMIC_LOAD_SUB  : AtomicSizes<"atomic_load_sub">;
+defm ATOMIC_LOAD_AND  : AtomicSizes<"atomic_load_and">;
+defm ATOMIC_LOAD_OR   : AtomicSizes<"atomic_load_or">;
+defm ATOMIC_LOAD_XOR  : AtomicSizes<"atomic_load_xor">;
+defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
+defm ATOMIC_SWAP      : AtomicSizes<"atomic_swap">;
+let Defs = [NZCV] in {
+  // These operations need a CMP to calculate the correct value
+  defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
+  defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
+  defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+  defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+}
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+def ATOMIC_CMP_SWAP_I8
+  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+            [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I16
+  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+           [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I32
+  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+           [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
+def ATOMIC_CMP_SWAP_I64
+  : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
+           [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (extended register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
+
+// The RHS of these operations is conceptually a sign/zero-extended
+// register, optionally shifted left by 1-4. The extension can be a
+// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
+// must be specified with one exception:
+
+// If one of the registers is sp/wsp then LSL is an alias for UXTW in
+// 32-bit instructions and UXTX in 64-bit versions, the shift amount
+// is not optional in that case (but can explicitly be 0), and the
+// entire suffix can be skipped (e.g. "add sp, x3, x2").
+
+multiclass extend_operands<string PREFIX, string Diag> {
+     def _asmoperand : AsmOperandClass {
+         let Name = PREFIX;
+         let RenderMethod = "addRegExtendOperands";
+         let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
+         let DiagnosticType = "AddSubRegExtend" # Diag;
+     }
+
+     def _operand : Operand<i64>,
+                    ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
+         let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
+         let DecoderMethod = "DecodeRegExtendOperand";
+         let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
+     }
+}
+
+defm UXTB : extend_operands<"UXTB", "Small">;
+defm UXTH : extend_operands<"UXTH", "Small">;
+defm UXTW : extend_operands<"UXTW", "Small">;
+defm UXTX : extend_operands<"UXTX", "Large">;
+defm SXTB : extend_operands<"SXTB", "Small">;
+defm SXTH : extend_operands<"SXTH", "Small">;
+defm SXTW : extend_operands<"SXTW", "Small">;
+defm SXTX : extend_operands<"SXTX", "Large">;
+
+def LSL_extasmoperand : AsmOperandClass {
+    let Name = "RegExtendLSL";
+    let RenderMethod = "addRegExtendOperands";
+    let DiagnosticType = "AddSubRegExtendLarge";
+}
+
+def LSL_extoperand : Operand<i64> {
+    let ParserMatchClass = LSL_extasmoperand;
+}
+
+
+// The patterns for various sign-extensions are a little ugly and
+// non-uniform because everything has already been promoted to the
+// legal i64 and i32 types. We'll wrap the various variants up in a
+// class for use later.
+class extend_types {
+    dag uxtb; dag uxth; dag uxtw; dag uxtx;
+    dag sxtb; dag sxth; dag sxtw; dag sxtx;
+    ValueType ty;
+    RegisterClass GPR;
+}
+
+def extends_to_i64 : extend_types {
+    let uxtb = (and (anyext i32:$Rm), 255);
+    let uxth = (and (anyext i32:$Rm), 65535);
+    let uxtw = (zext i32:$Rm);
+    let uxtx = (i64 $Rm);
+
+    let sxtb = (sext_inreg (anyext i32:$Rm), i8);
+    let sxth = (sext_inreg (anyext i32:$Rm), i16);
+    let sxtw = (sext i32:$Rm);
+    let sxtx = (i64 $Rm);
+
+    let ty = i64;
+    let GPR = GPR64xsp;
+}
+
+
+def extends_to_i32 : extend_types {
+    let uxtb = (and i32:$Rm, 255);
+    let uxth = (and i32:$Rm, 65535);
+    let uxtw = (i32 i32:$Rm);
+    let uxtx = (i32 i32:$Rm);
+
+    let sxtb = (sext_inreg i32:$Rm, i8);
+    let sxth = (sext_inreg i32:$Rm, i16);
+    let sxtw = (i32 i32:$Rm);
+    let sxtx = (i32 i32:$Rm);
+
+    let ty = i32;
+    let GPR = GPR32wsp;
+}
+
+// Now, six of the extensions supported are easy and uniform: if the source size
+// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
+// those instructions in one block.
+
+// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
+//     + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
+//       be impossible.
+//     + Patterns are very different as well.
+//     + Passing different registers would be ugly (more fields in extend_types
+//       would probably be the best option).
+multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
+                       SDPatternOperator opfrag,
+                       dag outs, extend_types exts> {
+    def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
+                    NoItinerary>;
+    def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
+                    NoItinerary>;
+    def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
+                    NoItinerary>;
+
+    def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
+                    NoItinerary>;
+    def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
+                    NoItinerary>;
+    def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
+                    outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
+                    !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                    [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
+                    NoItinerary>;
+}
+
+// These two could be merge in with the above, but their patterns aren't really
+// necessary and the naming-scheme would necessarily break:
+multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
+                       dag outs> {
+    def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
+                   outs,
+                   (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
+                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                   [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
+                   NoItinerary>;
+
+    def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
+                   outs,
+                   (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
+                   !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                   [/* No Pattern: same as uxtx */],
+                   NoItinerary>;
+}
+
+multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
+    def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
+                              outs,
+                              (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
+                              !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                              [/* No pattern: probably same as uxtw */],
+                              NoItinerary>;
+
+    def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
+                              outs,
+                              (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
+                              !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+                              [/* No Pattern: probably same as uxtw */],
+                              NoItinerary>;
+}
+
+class SetRD<RegisterClass RC, SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
+class SetNZCV<SDPatternOperator op>
+  : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
+
+defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+                        (outs GPR64xsp:$Rd), extends_to_i64>,
+            addsub_xxtx<     0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+                        (outs GPR64xsp:$Rd)>;
+defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
+                        (outs GPR32wsp:$Rd), extends_to_i32>,
+            addsub_wxtx<     0b0, 0b0, "add\t$Rd, ",
+                        (outs GPR32wsp:$Rd)>;
+defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+                        (outs GPR64xsp:$Rd), extends_to_i64>,
+            addsub_xxtx<     0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+                        (outs GPR64xsp:$Rd)>;
+defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
+                        (outs GPR32wsp:$Rd), extends_to_i32>,
+            addsub_wxtx<     0b1, 0b0, "sub\t$Rd, ",
+                        (outs GPR32wsp:$Rd)>;
+
+let Defs = [NZCV] in {
+defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+                         (outs GPR64:$Rd), extends_to_i64>,
+             addsub_xxtx<     0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+                         (outs GPR64:$Rd)>;
+defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
+                         (outs GPR32:$Rd), extends_to_i32>,
+             addsub_wxtx<     0b0, 0b1, "adds\t$Rd, ",
+                         (outs GPR32:$Rd)>;
+defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+                         (outs GPR64:$Rd), extends_to_i64>,
+             addsub_xxtx<     0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+                         (outs GPR64:$Rd)>;
+defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
+                         (outs GPR32:$Rd), extends_to_i32>,
+             addsub_wxtx<     0b1, 0b1, "subs\t$Rd, ",
+                         (outs GPR32:$Rd)>;
+
+
+let Rd = 0b11111, isCompare = 1 in {
+defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+                        (outs), extends_to_i64>,
+            addsub_xxtx<     0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
+defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+                        (outs), extends_to_i32>,
+            addsub_wxtx<     0b0, 0b1, "cmn\t", (outs)>;
+defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+                        (outs), extends_to_i64>,
+            addsub_xxtx<     0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
+defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+                        (outs), extends_to_i32>,
+            addsub_wxtx<     0b1, 0b1, "cmp\t", (outs)>;
+}
+}
+
+// Now patterns for the operation without a shift being needed. No patterns are
+// created for uxtx/sxtx since they're non-uniform and it's expected that
+// add/sub (shifted register) will handle those cases anyway.
+multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
+                                      extend_types exts> {
+    def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
+              (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
+              (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
+              (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
+
+    def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
+              (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
+              (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
+    def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
+              (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
+}
+
+defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
+defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
+defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
+defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
+
+defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
+defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
+
+// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
+// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
+// operation. Also permitted in this case is complete omission of the argument,
+// which implies "lsl #0".
+multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
+                       RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+    def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+                    (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+    def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
+                (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+
+}
+
+defm : lsl_aliases<"add",  ADDxxx_uxtx,  Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"add",  ADDxxx_uxtx,  GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"add",  ADDwww_uxtw,  Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"add",  ADDwww_uxtw,  GPR32wsp, Rwsp, GPR32>;
+defm : lsl_aliases<"sub",  SUBxxx_uxtx,  Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"sub",  SUBxxx_uxtx,  GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"sub",  SUBwww_uxtw,  Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"sub",  SUBwww_uxtw,  GPR32wsp, Rwsp, GPR32>;
+
+// Rd cannot be sp for flag-setting variants so only half of the aliases are
+// needed.
+defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
+defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
+
+// CMP unfortunately has to be different because the instruction doesn't have a
+// dest register.
+multiclass cmp_lsl_aliases<string asmop, Instruction inst,
+                       RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+    def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+                    (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+    def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
+                    (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+}
+
+defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
+defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
+
+// These instructions accept a 12-bit unsigned immediate, optionally shifted
+// left by 12 bits. Official assembly format specifies a 12 bit immediate with
+// one of "", "LSL #0", "LSL #12" supplementary operands.
+
+// There are surprisingly few ways to make this work with TableGen, so this
+// implementation has separate instructions for the "LSL #0" and "LSL #12"
+// variants.
+
+// If the MCInst retained a single combined immediate (which could be 0x123000,
+// for example) then both components (imm & shift) would have to be delegated to
+// a single assembly operand. This would entail a separate operand parser
+// (because the LSL would have to live in the same AArch64Operand as the
+// immediate to be accessible); assembly parsing is rather complex and
+// error-prone C++ code.
+//
+// By splitting the immediate, we can delegate handling this optional operand to
+// an InstAlias. Supporting functions to generate the correct MCInst are still
+// required, but these are essentially trivial and parsing can remain generic.
+//
+// Rejected plans with rationale:
+// ------------------------------
+//
+// In an ideal world you'de have two first class immediate operands (in
+// InOperandList, specifying imm12 and shift). Unfortunately this is not
+// selectable by any means I could discover.
+//
+// An Instruction with two MCOperands hidden behind a single entry in
+// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
+// but required more C++ code to handle encoding/decoding. Parsing (the intended
+// main beneficiary) ended up equally complex because of the optional nature of
+// "LSL #0".
+//
+// Attempting to circumvent the need for a custom OperandParser above by giving
+// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
+// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
+// should be parsed: there was no way to accommodate an "lsl #12".
+
+let ParserMethod = "ParseImmWithLSLOperand",
+    RenderMethod = "addImmWithLSLOperands" in {
+  // Derived PredicateMethod fields are different for each
+  def addsubimm_lsl0_asmoperand : AsmOperandClass {
+    let Name = "AddSubImmLSL0";
+    // If an error is reported against this operand, instruction could also be a
+    // register variant.
+    let DiagnosticType = "AddSubSecondSource";
+  }
+
+  def addsubimm_lsl12_asmoperand : AsmOperandClass {
+    let Name = "AddSubImmLSL12";
+    let DiagnosticType = "AddSubSecondSource";
+  }
+}
+
+def shr_12_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
+}]>;
+
+def shr_12_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
+}]>;
+
+def neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
+}]>;
+
+
+multiclass addsub_imm_operands<ValueType ty> {
+ let PrintMethod = "printAddSubImmLSL0Operand",
+      EncoderMethod = "getAddSubImmOpValue",
+      ParserMatchClass = addsubimm_lsl0_asmoperand in {
+    def _posimm_lsl0 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
+    def _negimm_lsl0 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
+                neg_XFORM>;
+  }
+
+  let PrintMethod = "printAddSubImmLSL12Operand",
+      EncoderMethod = "getAddSubImmOpValue",
+      ParserMatchClass = addsubimm_lsl12_asmoperand in {
+    def _posimm_lsl12 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
+                shr_12_XFORM>;
+
+    def _negimm_lsl12 : Operand<ty>,
+        ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
+                shr_12_neg_XFORM>;
+  }
+}
+
+// The add operands don't need any transformation
+defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
+defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
+
+multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
+                               string asmop, string cmpasmop,
+                               Operand imm_operand, Operand cmp_imm_operand,
+                               RegisterClass GPR, RegisterClass GPRsp,
+                               AArch64Reg ZR, ValueType Ty> {
+    // All registers for non-S variants allow SP
+  def _s : A64I_addsubimm<sf, op, 0b0, shift,
+                         (outs GPRsp:$Rd),
+                         (ins GPRsp:$Rn, imm_operand:$Imm12),
+                         !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
+                         [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
+                         NoItinerary>;
+
+
+  // S variants can read SP but would write to ZR
+  def _S : A64I_addsubimm<sf, op, 0b1, shift,
+                         (outs GPR:$Rd),
+                         (ins GPRsp:$Rn, imm_operand:$Imm12),
+                         !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
+                         [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
+                         NoItinerary> {
+    let Defs = [NZCV];
+  }
+
+  // Note that the pattern here for ADDS is subtle. Canonically CMP
+  // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
+  // ADDS a, (-b). This is not true in general.
+  def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
+                            (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
+                            !strconcat(cmpasmop, " $Rn, $Imm12"),
+                            [(set NZCV,
+                                  (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
+                            NoItinerary> {
+    let Rd = 0b11111;
+    let Defs = [NZCV];
+    let isCompare = 1;
+  }
+}
+
+
+multiclass addsubimm_shifts<string prefix, bit sf, bit op,
+           string asmop, string cmpasmop, string operand, string cmpoperand,
+           RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
+           ValueType Ty> {
+  defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
+                                   asmop, cmpasmop,
+                                   !cast<Operand>(operand # "_lsl0"),
+                                   !cast<Operand>(cmpoperand # "_lsl0"),
+                                   GPR, GPRsp, ZR, Ty>;
+
+  defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
+                                    asmop, cmpasmop,
+                                    !cast<Operand>(operand # "_lsl12"),
+                                    !cast<Operand>(cmpoperand # "_lsl12"),
+                                    GPR, GPRsp, ZR, Ty>;
+}
+
+defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
+                              "addsubimm_operand_i32_posimm",
+                              "addsubimm_operand_i32_negimm",
+                              GPR32, GPR32wsp, WZR, i32>;
+defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
+                              "addsubimm_operand_i64_posimm",
+                              "addsubimm_operand_i64_negimm",
+                              GPR64, GPR64xsp, XZR, i64>;
+defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
+                              "addsubimm_operand_i32_negimm",
+                              "addsubimm_operand_i32_posimm",
+                              GPR32, GPR32wsp, WZR, i32>;
+defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
+                              "addsubimm_operand_i64_negimm",
+                              "addsubimm_operand_i64_posimm",
+                              GPR64, GPR64xsp, XZR, i64>;
+
+multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
+  def _fromsp : InstAlias<"mov $Rd, $Rn",
+                          (addop GPRsp:$Rd, SP:$Rn, 0),
+                          0b1>;
+
+  def _tosp : InstAlias<"mov $Rd, $Rn",
+                        (addop SP:$Rd, GPRsp:$Rn, 0),
+                        0b1>;
+}
+
+// Recall Rxsp is a RegisterClass containing *just* xsp.
+defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
+defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
+
+//===-------------------------------
+// 1. The "shifed register" operands. Shared with logical insts.
+//===-------------------------------
+
+multiclass shift_operands<string prefix, string form> {
+  def _asmoperand_i32 : AsmOperandClass {
+    let Name = "Shift" # form # "i32";
+    let RenderMethod = "addShiftOperands";
+    let PredicateMethod = "isShift<A64SE::" # form # ", false>";
+    let DiagnosticType = "AddSubRegShift32";
+  }
+
+  // Note that the operand type is intentionally i64 because the DAGCombiner
+  // puts these into a canonical form.
+  def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+    let ParserMatchClass
+          = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
+    let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+    let DecoderMethod = "Decode32BitShiftOperand";
+  }
+
+  def _asmoperand_i64 : AsmOperandClass {
+      let Name = "Shift" # form # "i64";
+      let RenderMethod = "addShiftOperands";
+      let PredicateMethod = "isShift<A64SE::" # form # ", true>";
+      let DiagnosticType = "AddSubRegShift64";
+  }
+
+  def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+    let ParserMatchClass
+          = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
+    let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+  }
+}
+
+defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
+defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
+defm asr_operand : shift_operands<"asr_operand", "ASR">;
+
+// Not used for add/sub, but defined here for completeness. The "logical
+// (shifted register)" instructions *do* have an ROR variant.
+defm ror_operand : shift_operands<"ror_operand", "ROR">;
+
+//===-------------------------------
+// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
+//===-------------------------------
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
+                         string asmop, SDPatternOperator opfrag, ValueType ty,
+                         RegisterClass GPR, list<Register> defs> {
+  let isCommutable = commutable, Defs = defs in {
+  def _lsl : A64I_addsubshift<sf, op, s, 0b00,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _lsr : A64I_addsubshift<sf, op, s, 0b01,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _asr : A64I_addsubshift<sf, op, s, 0b10,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+  }
+
+  def _noshift
+      : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+                                                      GPR:$Rm, 0)>;
+
+  def : Pat<(opfrag ty:$Rn, ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
+                         string asmop, SDPatternOperator opfrag,
+                         list<Register> defs> {
+  defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
+                           commutable, asmop, opfrag, i64, GPR64, defs>;
+  defm www : addsub_shifts<prefix # "www", 0b0, op, s,
+                           commutable, asmop, opfrag, i32, GPR32, defs>;
+}
+
+
+defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
+defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
+
+defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
+defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
+
+//===-------------------------------
+// 1. The NEG/NEGS aliases
+//===-------------------------------
+
+multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
+                     ValueType ty, Operand shift_operand, SDNode shiftop> {
+   def : InstAlias<"neg $Rd, $Rm, $Imm6",
+                   (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+   def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
+             (INST ZR, $Rm, shift_operand:$Imm6)>;
+}
+
+defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
+defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
+defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
+
+defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
+defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
+defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
+
+// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
+// be involved.
+class negs_alias<Instruction INST, RegisterClass GPR,
+                 Register ZR, Operand shift_operand, SDNode shiftop>
+  : InstAlias<"negs $Rd, $Rm, $Imm6",
+              (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+
+//===-------------------------------
+// 1. The CMP/CMN aliases
+//===-------------------------------
+
+multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
+                      string asmop, SDPatternOperator opfrag, ValueType ty,
+                      RegisterClass GPR> {
+  let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
+  def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+                       [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+                       [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+                       [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+  }
+
+  def _noshift
+      : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+  def : Pat<(opfrag ty:$Rn, ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
+defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
+
+defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
+defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (with carry) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
+
+multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
+  let Uses = [NZCV] in {
+    def www : A64I_addsubcarry<0b0, op, s, 0b000000,
+                               (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
+                               !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+                               [], NoItinerary>;
+
+    def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
+                               (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+                               !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+                               [], NoItinerary>;
+  }
+}
+
+let isCommutable = 1 in {
+  defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
+}
+
+defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
+
+let Defs = [NZCV] in {
+  let isCommutable = 1 in {
+    defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
+  }
+
+  defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
+}
+
+def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+
+// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
+// addition). So the flag-setting instructions are appropriate.
+def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
+def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
+def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
+def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield
+//===----------------------------------------------------------------------===//
+// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
+//     UBFIZ, UBFX
+
+// Because of the rather complicated nearly-overlapping aliases, the decoding of
+// this range of instructions is handled manually. The architectural
+// instructions are BFM, SBFM and UBFM but a disassembler should never produce
+// these.
+//
+// In the end, the best option was to use BFM instructions for decoding under
+// almost all circumstances, but to create aliasing *Instructions* for each of
+// the canonical forms and specify a completely custom decoder which would
+// substitute the correct MCInst as needed.
+//
+// This also simplifies instruction selection, parsing etc because the MCInsts
+// have a shape that's closer to their use in code.
+
+//===-------------------------------
+// 1. The architectural BFM instructions
+//===-------------------------------
+
+def uimm5_asmoperand : AsmOperandClass {
+  let Name = "UImm5";
+  let PredicateMethod = "isUImm<5>";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "UImm5";
+}
+
+def uimm6_asmoperand : AsmOperandClass {
+  let Name = "UImm6";
+  let PredicateMethod = "isUImm<6>";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "UImm6";
+}
+
+def bitfield32_imm : Operand<i64>,
+                     ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
+  let ParserMatchClass = uimm5_asmoperand;
+
+  let DecoderMethod = "DecodeBitfield32ImmOperand";
+}
+
+
+def bitfield64_imm : Operand<i64>,
+                     ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
+  let ParserMatchClass = uimm6_asmoperand;
+
+  // Default decoder works in 64-bit case: the 6-bit field can take any value.
+}
+
+multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
+  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+                    (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                    [], NoItinerary> {
+    let DecoderMethod = "DecodeBitfieldInstruction";
+  }
+
+  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+                    (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                    [], NoItinerary> {
+    let DecoderMethod = "DecodeBitfieldInstruction";
+  }
+}
+
+defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
+defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
+
+// BFM instructions modify the destination register rather than defining it
+// completely.
+def BFMwwii :
+  A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+        (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+  let DecoderMethod = "DecodeBitfieldInstruction";
+  let Constraints = "$src = $Rd";
+}
+
+def BFMxxii :
+  A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+        (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+        "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+  let DecoderMethod = "DecodeBitfieldInstruction";
+  let Constraints = "$src = $Rd";
+}
+
+
+//===-------------------------------
+// 2. Extend aliases to 64-bit dest
+//===-------------------------------
+
+// Unfortunately the extensions that end up as 64-bits cannot be handled by an
+// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
+// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
+// not capable of such a map as far as I'm aware
+
+// Note that these instructions are strictly more specific than the
+// BFM ones (in ImmR) so they can handle their own decoding.
+class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
+                    string asmop, bits<6> imms, dag pattern>
+  : A64I_bitfield<sf, opc, sf,
+                  (outs GPRDest:$Rd), (ins GPR32:$Rn),
+                  !strconcat(asmop, "\t$Rd, $Rn"),
+                  [(set dty:$Rd, pattern)], NoItinerary> {
+  let ImmR = 0b000000;
+  let ImmS = imms;
+}
+
+// Signed extensions
+def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
+                         (sext_inreg (anyext i32:$Rn), i8)>;
+def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
+                         (sext_inreg i32:$Rn, i8)>;
+def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
+                         (sext_inreg (anyext i32:$Rn), i16)>;
+def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
+                         (sext_inreg i32:$Rn, i16)>;
+def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
+
+// Unsigned extensions
+def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
+                         (and i32:$Rn, 255)>;
+def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
+                         (and i32:$Rn, 65535)>;
+
+// The 64-bit unsigned variants are not strictly architectural but recommended
+// for consistency.
+let isAsmParserOnly = 1 in {
+  def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
+                           (and (anyext i32:$Rn), 255)>;
+  def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
+                           (and (anyext i32:$Rn), 65535)>;
+}
+
+// Extra patterns for when the source register is actually 64-bits
+// too. There's no architectural difference here, it's just LLVM
+// shinanigans. There's no need for equivalent zero-extension patterns
+// because they'll already be caught by logical (immediate) matching.
+def : Pat<(sext_inreg i64:$Rn, i8),
+          (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i16),
+          (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
+def : Pat<(sext_inreg i64:$Rn, i32),
+          (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
+
+
+//===-------------------------------
+// 3. Aliases for ASR and LSR (the simple shifts)
+//===-------------------------------
+
+// These also handle their own decoding because ImmS being set makes
+// them take precedence over BFM.
+multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
+  def wwi : A64I_bitfield<0b0, opc, 0b0,
+                    (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+                    [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
+                    NoItinerary> {
+    let ImmS = 31;
+  }
+
+  def xxi : A64I_bitfield<0b1, opc, 0b1,
+                    (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
+                    !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+                    [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
+                    NoItinerary> {
+    let ImmS = 63;
+  }
+
+}
+
+defm ASR : A64I_shift<0b00, "asr", sra>;
+defm LSR : A64I_shift<0b10, "lsr", srl>;
+
+//===-------------------------------
+// 4. Aliases for LSL
+//===-------------------------------
+
+// Unfortunately LSL and subsequent aliases are much more complicated. We need
+// to be able to say certain output instruction fields depend in a complex
+// manner on combinations of input assembly fields).
+//
+// MIOperandInfo *might* have been able to do it, but at the cost of
+// significantly more C++ code.
+
+// N.b. contrary to usual practice these operands store the shift rather than
+// the machine bits in an MCInst. The complexity overhead of consistency
+// outweighed the benefits in this case (custom asmparser, printer and selection
+// vs custom encoder).
+def bitfield32_lsl_imm : Operand<i64>,
+                         ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+  let ParserMatchClass = uimm5_asmoperand;
+  let EncoderMethod = "getBitfield32LSLOpValue";
+}
+
+def bitfield64_lsl_imm : Operand<i64>,
+                         ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+  let ParserMatchClass = uimm6_asmoperand;
+  let EncoderMethod = "getBitfield64LSLOpValue";
+}
+
+class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
+                        Operand operand>
+  : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
+                  "lsl\t$Rd, $Rn, $FullImm",
+                  [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
+                  NoItinerary> {
+  bits<12> FullImm;
+  let ImmR = FullImm{5-0};
+  let ImmS = FullImm{11-6};
+
+  // No disassembler allowed because it would overlap with BFM which does the
+  // actual work.
+  let isAsmParserOnly = 1;
+}
+
+def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
+def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
+
+//===-------------------------------
+// 5. Aliases for bitfield extract instructions
+//===-------------------------------
+
+def bfx32_width_asmoperand : AsmOperandClass {
+  let Name = "BFX32Width";
+  let PredicateMethod = "isBitfieldWidth<32>";
+  let RenderMethod = "addBFXWidthOperands";
+  let DiagnosticType = "Width32";
+}
+
+def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
+  let PrintMethod = "printBFXWidthOperand";
+  let ParserMatchClass = bfx32_width_asmoperand;
+}
+
+def bfx64_width_asmoperand : AsmOperandClass {
+  let Name = "BFX64Width";
+  let PredicateMethod = "isBitfieldWidth<64>";
+  let RenderMethod = "addBFXWidthOperands";
+  let DiagnosticType = "Width64";
+}
+
+def bfx64_width : Operand<i64> {
+  let PrintMethod = "printBFXWidthOperand";
+  let ParserMatchClass = bfx64_width_asmoperand;
+}
+
+
+multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
+  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+                       (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+                       !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                       [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
+                       NoItinerary> {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+
+  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+                       (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+                       !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                       [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
+                       NoItinerary> {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+}
+
+defm SBFX :  A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
+defm UBFX :  A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
+
+// Again, variants based on BFM modify Rd so need it as an input too.
+def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+           (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+           (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+// SBFX instructions can do a 1-instruction sign-extension of boolean values.
+def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
+def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
+def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
+          (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
+
+// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
+// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
+def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+                                         sub_32)>;
+
+//===-------------------------------
+// 6. Aliases for bitfield insert instructions
+//===-------------------------------
+
+def bfi32_lsb_asmoperand : AsmOperandClass {
+  let Name = "BFI32LSB";
+  let PredicateMethod = "isUImm<5>";
+  let RenderMethod = "addBFILSBOperands<32>";
+  let DiagnosticType = "UImm5";
+}
+
+def bfi32_lsb : Operand<i64>,
+                ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+  let PrintMethod = "printBFILSBOperand<32>";
+  let ParserMatchClass = bfi32_lsb_asmoperand;
+}
+
+def bfi64_lsb_asmoperand : AsmOperandClass {
+  let Name = "BFI64LSB";
+  let PredicateMethod = "isUImm<6>";
+  let RenderMethod = "addBFILSBOperands<64>";
+  let DiagnosticType = "UImm6";
+}
+
+def bfi64_lsb : Operand<i64>,
+                ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+  let PrintMethod = "printBFILSBOperand<64>";
+  let ParserMatchClass = bfi64_lsb_asmoperand;
+}
+
+// Width verification is performed during conversion so width operand can be
+// shared between 32/64-bit cases. Still needed for the print method though
+// because ImmR encodes "width - 1".
+def bfi32_width_asmoperand : AsmOperandClass {
+  let Name = "BFI32Width";
+  let PredicateMethod = "isBitfieldWidth<32>";
+  let RenderMethod = "addBFIWidthOperands";
+  let DiagnosticType = "Width32";
+}
+
+def bfi32_width : Operand<i64>,
+                  ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
+  let PrintMethod = "printBFIWidthOperand";
+  let ParserMatchClass = bfi32_width_asmoperand;
+}
+
+def bfi64_width_asmoperand : AsmOperandClass {
+  let Name = "BFI64Width";
+  let PredicateMethod = "isBitfieldWidth<64>";
+  let RenderMethod = "addBFIWidthOperands";
+  let DiagnosticType = "Width64";
+}
+
+def bfi64_width : Operand<i64>,
+                  ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
+  let PrintMethod = "printBFIWidthOperand";
+  let ParserMatchClass = bfi64_width_asmoperand;
+}
+
+multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
+  def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+                           (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+                           !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                           [], NoItinerary> {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+
+  def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+                           (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+                           !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+                           [], NoItinerary> {
+    // As above, no disassembler allowed.
+    let isAsmParserOnly = 1;
+  }
+}
+
+defm SBFIZ :  A64I_bitfield_insert<0b00, "sbfiz">;
+defm UBFIZ :  A64I_bitfield_insert<0b10, "ubfiz">;
+
+
+def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+                (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+                (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+                "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+  // As above, no disassembler allowed.
+  let isAsmParserOnly = 1;
+  let Constraints = "$src = $Rd";
+}
+
+//===----------------------------------------------------------------------===//
+// Compare and branch (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: CBZ, CBNZ
+
+class label_asmoperand<int width, int scale> : AsmOperandClass {
+  let Name = "Label" # width # "_" # scale;
+  let PredicateMethod = "isLabel<" # width # "," # scale # ">";
+  let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
+  let DiagnosticType = "Label";
+}
+
+def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
+
+// All conditional immediate branches are the same really: 19 signed bits scaled
+// by the instruction-size (4).
+def bcc_target : Operand<OtherVT> {
+  // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+  let ParserMatchClass = label_wid19_scal4_asmoperand;
+  let PrintMethod = "printLabelOperand<19, 4>";
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
+  let OperandType = "OPERAND_PCREL";
+}
+
+multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
+  let isBranch = 1, isTerminator = 1 in {
+  def x : A64I_cmpbr<0b1, op,
+                     (outs),
+                     (ins GPR64:$Rt, bcc_target:$Label),
+                     !strconcat(asmop,"\t$Rt, $Label"),
+                     [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
+                     NoItinerary>;
+
+  def w : A64I_cmpbr<0b0, op,
+                     (outs),
+                     (ins GPR32:$Rt, bcc_target:$Label),
+                     !strconcat(asmop,"\t$Rt, $Label"),
+                     [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
+                     NoItinerary>;
+  }
+}
+
+defm CBZ  : cmpbr_sizes<0b0, "cbz",  ImmLeaf<i32, [{
+  return Imm == A64CC::EQ;
+}]> >;
+defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
+  return Imm == A64CC::NE;
+}]> >;
+
+//===----------------------------------------------------------------------===//
+// Conditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B.cc
+
+def cond_code_asmoperand : AsmOperandClass {
+  let Name = "CondCode";
+  let DiagnosticType = "CondCode";
+}
+
+def cond_code : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm <= 15;
+}]> {
+  let PrintMethod = "printCondCodeOperand";
+  let ParserMatchClass = cond_code_asmoperand;
+}
+
+def Bcc : A64I_condbr<0b0, 0b0, (outs),
+                (ins cond_code:$Cond, bcc_target:$Label),
+                "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
+                NoItinerary> {
+  let Uses = [NZCV];
+  let isBranch = 1;
+  let isTerminator = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+def uimm4_asmoperand : AsmOperandClass {
+  let Name = "UImm4";
+  let PredicateMethod = "isUImm<4>";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "UImm4";
+}
+
+def uimm4 : Operand<i32> {
+  let ParserMatchClass = uimm4_asmoperand;
+}
+
+def uimm5 : Operand<i32> {
+  let ParserMatchClass = uimm5_asmoperand;
+}
+
+// The only difference between this operand and the one for instructions like
+// B.cc is that it's parsed manually. The other get parsed implicitly as part of
+// the mnemonic handling.
+def cond_code_op_asmoperand : AsmOperandClass {
+  let Name = "CondCodeOp";
+  let RenderMethod = "addCondCodeOperands";
+  let PredicateMethod = "isCondCode";
+  let ParserMethod = "ParseCondCodeOperand";
+  let DiagnosticType = "CondCode";
+}
+
+def cond_code_op : Operand<i32> {
+  let PrintMethod = "printCondCodeOperand";
+  let ParserMatchClass = cond_code_op_asmoperand;
+}
+
+class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+  : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
+                (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
+                !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
+                [], NoItinerary> {
+  let Defs = [NZCV];
+}
+
+def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+  : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
+                    (outs),
+                    (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+                    !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+                    [], NoItinerary> {
+  let Defs = [NZCV];
+}
+
+def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
+
+// Condition code which is encoded as the inversion (semantically rather than
+// bitwise) in the instruction.
+def inv_cond_code_op_asmoperand : AsmOperandClass {
+  let Name = "InvCondCodeOp";
+  let RenderMethod = "addInvCondCodeOperands";
+  let PredicateMethod = "isCondCode";
+  let ParserMethod = "ParseCondCodeOperand";
+  let DiagnosticType = "CondCode";
+}
+
+def inv_cond_code_op : Operand<i32> {
+  let ParserMatchClass = inv_cond_code_op_asmoperand;
+}
+
+// Having a separate operand for the selectable use-case is debatable, but gives
+// consistency with cond_code.
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+  A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
+  return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
+}]>;
+
+def inv_cond_code
+  : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
+
+
+multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
+                             SDPatternOperator select> {
+  let Uses = [NZCV] in {
+    def wwwc : A64I_condsel<0b0, op, 0b0, op2,
+                            (outs GPR32:$Rd),
+                            (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
+                            !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+                            [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
+                            NoItinerary>;
+
+
+    def xxxc : A64I_condsel<0b1, op, 0b0, op2,
+                            (outs GPR64:$Rd),
+                            (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
+                            !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+                            [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
+                            NoItinerary>;
+  }
+}
+
+def simple_select
+  : PatFrag<(ops node:$lhs, node:$rhs),
+            (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
+
+class complex_select<SDPatternOperator opnode>
+  : PatFrag<(ops node:$lhs, node:$rhs),
+        (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
+
+
+defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
+defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
+                               complex_select<PatFrag<(ops node:$val),
+                                                      (add node:$val, 1)>>>;
+defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
+defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
+
+// Now the instruction aliases, which fit nicely into LLVM's model:
+
+def : InstAlias<"cset $Rd, $Cond",
+                (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cset $Rd, $Cond",
+                (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+                (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+                (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+           (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+           (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+           (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+           (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+           (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+           (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+
+// Finally some helper patterns.
+
+// For CSET (a.k.a. zero-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+          (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+          (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+          (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+          (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// For CSETM (a.k.a. sign-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+          (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+          (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+          (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+          (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
+// commutativity. The instructions are to complex for isCommutable to be used,
+// so we have to create the patterns manually:
+
+// No commutable pattern for CSEL since the commuted version is isomorphic.
+
+// CSINC
+def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
+         (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
+         (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+// CSINV
+def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+         (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+         (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+// CSNEG
+def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
+         (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
+         (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (1 source) instructions
+//===----------------------------------------------------------------------===//
+// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
+
+// We define an unary operator which always fails. We will use this to
+// define unary operators that cannot be matched.
+
+class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
+                   list<dag> patterns, RegisterClass GPRrc,
+                   InstrItinClass itin>:
+      A64I_dp_1src<sf,
+                   0,
+                   0b00000,
+                   opcode,
+                   !strconcat(asmop, "\t$Rd, $Rn"),
+                   (outs GPRrc:$Rd),
+                   (ins GPRrc:$Rn),
+                   patterns,
+                   itin>;
+
+multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
+  let hasSideEffects = 0 in {
+    def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
+    def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
+  }
+}
+
+defm RBIT  : A64I_dp_1src<0b000000, "rbit">;
+defm CLS   : A64I_dp_1src<0b000101, "cls">;
+defm CLZ   : A64I_dp_1src<0b000100, "clz">;
+
+def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
+def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
+def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
+
+def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
+def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
+def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
+
+
+def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
+                              [(set i32:$Rd, (bswap i32:$Rn))],
+                              GPR32, NoItinerary>;
+def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
+                              [(set i64:$Rd, (bswap i64:$Rn))],
+                              GPR64, NoItinerary>;
+def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
+                          [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
+                          GPR64, NoItinerary>;
+def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
+                          [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
+                          GPR32,
+                          NoItinerary>;
+def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
+//           LSR, ASR, ROR
+
+
+class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
+                   RegisterClass GPRsp,
+                   InstrItinClass itin>:
+      A64I_dp_2src<sf,
+                   opcode,
+                   0,
+                   !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+                   (outs GPRsp:$Rd),
+                   (ins GPRsp:$Rn, GPRsp:$Rm),
+                   patterns,
+                   itin>;
+
+multiclass dp_2src_crc<bit c, string asmop> {
+  def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
+                           !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
+  def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
+                           !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
+  def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
+                           !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
+  def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
+                           !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
+                           (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
+                           NoItinerary>;
+}
+
+multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
+   def www : dp_2src_impl<0b0,
+                         opcode,
+                         asmop,
+                         [(set i32:$Rd,
+                               (op i32:$Rn, (i64 (zext i32:$Rm))))],
+                         GPR32,
+                         NoItinerary>;
+   def xxx : dp_2src_impl<0b1,
+                         opcode,
+                         asmop,
+                         [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
+                         GPR64,
+                         NoItinerary>;
+}
+
+
+multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
+    def www : dp_2src_impl<0b0,
+                         opcode,
+                         asmop,
+                         [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
+                         GPR32,
+                         NoItinerary>;
+   def xxx : dp_2src_impl<0b1,
+                         opcode,
+                         asmop,
+                         [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
+                         GPR64,
+                         NoItinerary>;
+}
+
+// Here we define the data processing 2 source instructions.
+defm CRC32  : dp_2src_crc<0b0, "crc32">;
+defm CRC32C : dp_2src_crc<0b1, "crc32c">;
+
+defm UDIV : dp_2src<0b000010, "udiv", udiv>;
+defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+
+defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
+defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
+defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
+defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+
+// Extra patterns for an incoming 64-bit value for a 32-bit
+// operation. Since the LLVM operations are undefined (as in C) if the
+// RHS is out of range, it's perfectly permissible to discard the high
+// bits of the GPR64.
+def : Pat<(shl i32:$Rn, i64:$Rm),
+          (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(srl i32:$Rn, i64:$Rm),
+          (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(sra i32:$Rn, i64:$Rm),
+          (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+def : Pat<(rotr i32:$Rn, i64:$Rm),
+          (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
+
+// Here we define the aliases for the data processing 2 source instructions.
+def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
+def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
+def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
+def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
+//    + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
+
+class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
+                        ValueType AccTy, RegisterClass SrcReg,
+                        string asmop, dag pattern>
+  : A64I_dp3<sf, opcode,
+             (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
+             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
+             [(set AccTy:$Rd, pattern)], NoItinerary> {
+  RegisterClass AccGPR = AccReg;
+  RegisterClass SrcGPR = SrcReg;
+}
+
+def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
+                                 (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
+                                 (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
+
+def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
+                                 (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
+def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
+                                 (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
+
+def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
+                     (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
+                     (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+
+def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
+                     (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
+                     (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+
+let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
+  def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
+                          (ins GPR64:$Rn, GPR64:$Rm),
+                          "umulh\t$Rd, $Rn, $Rm",
+                          [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
+                          NoItinerary>;
+
+  def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
+                          (ins GPR64:$Rn, GPR64:$Rm),
+                          "smulh\t$Rd, $Rn, $Rm",
+                          [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
+                          NoItinerary>;
+}
+
+multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
+                             Register ZR, dag pattern> {
+  def : InstAlias<asmop # " $Rd, $Rn, $Rm",
+                  (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+
+  def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
+}
+
+defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
+
+defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
+                         (sub 0, (mul i32:$Rn, i32:$Rm))>;
+defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
+                         (sub 0, (mul i64:$Rn, i64:$Rm))>;
+
+defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
+                         (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
+defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
+                       (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
+
+defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
+                         (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
+defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
+                       (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
+
+
+//===----------------------------------------------------------------------===//
+// Exception generation
+//===----------------------------------------------------------------------===//
+// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
+
+def uimm16_asmoperand : AsmOperandClass {
+  let Name = "UImm16";
+  let PredicateMethod = "isUImm<16>";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "UImm16";
+}
+
+def uimm16 : Operand<i32> {
+  let ParserMatchClass = uimm16_asmoperand;
+}
+
+class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
+  : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
+                   !strconcat(asmop, "\t$UImm16"), [], NoItinerary> {
+  let isBranch = 1;
+  let isTerminator = 1;
+}
+
+def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
+def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
+def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
+def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
+def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
+
+def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
+def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
+def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
+
+// The immediate is optional for the DCPS instructions, defaulting to 0.
+def : InstAlias<"dcps1", (DCPS1i 0)>;
+def : InstAlias<"dcps2", (DCPS2i 0)>;
+def : InstAlias<"dcps3", (DCPS3i 0)>;
+
+//===----------------------------------------------------------------------===//
+// Extract (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: EXTR + alias ROR
+
+def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
+                            (outs GPR32:$Rd),
+                            (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
+                            "extr\t$Rd, $Rn, $Rm, $LSB",
+                            [(set i32:$Rd,
+                                  (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
+                            NoItinerary>;
+def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
+                            (outs GPR64:$Rd),
+                            (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
+                            "extr\t$Rd, $Rn, $Rm, $LSB",
+                            [(set i64:$Rd,
+                                  (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
+                            NoItinerary>;
+
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+               (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+               (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
+
+def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
+          (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
+def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
+          (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCMP, FCMPE
+
+def fpzero_asmoperand : AsmOperandClass {
+  let Name = "FPZero";
+  let ParserMethod = "ParseFPImmOperand";
+  let DiagnosticType = "FPZero";
+}
+
+def fpz32 : Operand<f32>,
+            ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
+  let ParserMatchClass = fpzero_asmoperand;
+  let PrintMethod = "printFPZeroOperand";
+  let DecoderMethod = "DecodeFPZeroOperand";
+}
+
+def fpz64 : Operand<f64>,
+            ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
+  let ParserMatchClass = fpzero_asmoperand;
+  let PrintMethod = "printFPZeroOperand";
+  let DecoderMethod = "DecodeFPZeroOperand";
+}
+
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
+  def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
+                          (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
+                          NoItinerary> {
+    let Defs = [NZCV];
+  }
+
+  def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
+                        (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
+    let Defs = [NZCV];
+  }
+}
+
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
+                               (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
+                               (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
+
+// What would be Rm should be written as 0; note that even though it's called
+// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
+defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
+                               (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
+
+defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
+                               (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCCMP, FCCMPE
+
+class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
+  : A64I_fpccmp<0b0, 0b0, type, op,
+                (outs),
+                (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+                !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+                [], NoItinerary> {
+  let Defs = [NZCV];
+}
+
+def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
+def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
+def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
+def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCSEL
+
+let Uses = [NZCV] in {
+  def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
+                                 (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
+                                 "fcsel\t$Rd, $Rn, $Rm, $Cond",
+                                 [(set f32:$Rd, 
+                                       (simple_select f32:$Rn, f32:$Rm))],
+                                 NoItinerary>;
+
+
+  def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
+                                 (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
+                                 "fcsel\t$Rd, $Rn, $Rm, $Cond",
+                                 [(set f64:$Rd,
+                                       (simple_select f64:$Rn, f64:$Rm))],
+                                 NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (1 source)
+//===----------------------------------------------------------------------===//
+// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
+
+def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
+                       [{ (void)N; return false; }]>;
+
+// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
+// syntax. Default to no pattern because most are odd enough not to have one.
+multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
+                           SDPatternOperator opnode = FPNoUnop> {
+  def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
+                     !strconcat(asmstr, "\t$Rd, $Rn"),
+                     [(set f32:$Rd, (opnode f32:$Rn))],
+                     NoItinerary>;
+
+  def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
+                     !strconcat(asmstr, "\t$Rd, $Rn"),
+                     [(set f64:$Rd, (opnode f64:$Rn))],
+                     NoItinerary>;
+}
+
+defm FMOV   : A64I_fpdp1sizes<0b000000, "fmov">;
+defm FABS   : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
+defm FNEG   : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
+defm FSQRT  : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+
+defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
+defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
+defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
+defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
+defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
+defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
+defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
+
+// The FCVT instrucitons have different source and destination register-types,
+// but the fields are uniform everywhere a D-register (say) crops up. Package
+// this information in a Record.
+class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
+    RegisterClass Class = rc;
+    ValueType VT = vt;
+    bit t1 = fld{1};
+    bit t0 = fld{0};
+}
+
+def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
+def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
+def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
+
+class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
+  : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
+               {0,0,0,1, DestReg.t1, DestReg.t0},
+               (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
+               "fcvt\t$Rd, $Rn",
+               [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>;
+
+def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
+def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
+def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
+def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
+def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
+def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
+
+def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
+                      [{ (void)N; return false; }]>;
+
+multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
+                           SDPatternOperator opnode> {
+  def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
+                      (outs FPR32:$Rd),
+                      (ins FPR32:$Rn, FPR32:$Rm),
+                      !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+                      [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
+                      NoItinerary>;
+
+  def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
+                      (outs FPR64:$Rd),
+                      (ins FPR64:$Rn, FPR64:$Rm),
+                      !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+                      [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
+                      NoItinerary>;
+}
+
+let isCommutable = 1 in {
+  defm FMUL   : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+  defm FADD   : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
+
+  // No patterns for these.
+  defm FMAX   : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
+  defm FMIN   : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
+  defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
+  defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
+
+  defm FNMUL  : A64I_fpdp2sizes<0b1000, "fnmul",
+                                PatFrag<(ops node:$lhs, node:$rhs),
+                                        (fneg (fmul node:$lhs, node:$rhs))> >;
+}
+
+defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMADD, FMSUB, FNMADD, FNMSUB
+
+def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+                    (fma (fneg node:$Rn),  node:$Rm, node:$Ra)>;
+def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+                     (fma node:$Rn,  node:$Rm, (fneg node:$Ra))>;
+def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+                     (fma (fneg node:$Rn),  node:$Rm, (fneg node:$Ra))>;
+
+class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
+                     bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
+  : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
+               (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
+               !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
+               [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
+               NoItinerary>;
+
+def FMADDssss  : A64I_fpdp3Impl<"fmadd",  FPR32, f32, 0b00, 0b0, 0b0, fma>;
+def FMSUBssss  : A64I_fpdp3Impl<"fmsub",  FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
+def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
+def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
+
+def FMADDdddd  : A64I_fpdp3Impl<"fmadd",  FPR64, f64, 0b01, 0b0, 0b0, fma>;
+def FMSUBdddd  : A64I_fpdp3Impl<"fmsub",  FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
+def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
+def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> fixed-point conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+// #1-#32 allowed, encoded as "64 - <specified imm>
+def fixedpos_asmoperand_i32 : AsmOperandClass {
+  let Name = "CVTFixedPos32";
+  let RenderMethod = "addCVTFixedPosOperands";
+  let PredicateMethod = "isCVTFixedPos<32>";
+  let DiagnosticType = "CVTFixedPos32";
+}
+
+// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
+def fixedpos_asmoperand_i64 : AsmOperandClass {
+  let Name = "CVTFixedPos64";
+  let RenderMethod = "addCVTFixedPosOperands";
+  let PredicateMethod = "isCVTFixedPos<64>";
+  let DiagnosticType = "CVTFixedPos64";
+}
+
+// We need the cartesian product of f32/f64 i32/i64 operands for
+// conversions:
+//   + Selection needs to use operands of correct floating type
+//   + Assembly parsing and decoding depend on integer width
+class cvtfix_i32_op<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
+  let ParserMatchClass = fixedpos_asmoperand_i32;
+  let DecoderMethod = "DecodeCVT32FixedPosOperand";
+  let PrintMethod = "printCVTFixedPosOperand";
+}
+
+class cvtfix_i64_op<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
+  let ParserMatchClass = fixedpos_asmoperand_i64;
+  let PrintMethod = "printCVTFixedPosOperand";
+}
+
+// Because of the proliferation of weird operands, it's not really
+// worth going for a multiclass here. Oh well.
+
+class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
+                   RegisterClass GPR, RegisterClass FPR, 
+                   ValueType DstTy, ValueType SrcTy, 
+                   Operand scale_op, string asmop, SDNode cvtop>
+  : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
+                 (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
+                 !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+                 [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
+                 NoItinerary>;
+
+def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
+                             cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
+                             cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
+                             cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
+def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
+                             cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
+
+def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
+                             cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
+                             cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
+                             cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
+def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
+                             cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
+
+
+class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
+                   RegisterClass FPR, RegisterClass GPR,
+                   ValueType DstTy, ValueType SrcTy,
+                   Operand scale_op, string asmop, SDNode cvtop>
+  : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
+                 (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
+                 !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+                 [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
+                 NoItinerary>;
+
+def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
+                            cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
+def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
+                            cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
+def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
+                            cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
+def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
+                            cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
+def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
+                            cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
+def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
+                            cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
+def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
+                            cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
+def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
+                            cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> integer conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
+                   RegisterClass DestPR, RegisterClass SrcPR, string asmop>
+  : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
+               !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
+
+multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
+  def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
+                        GPR32, FPR32, asmop # "s">;
+  def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
+                        GPR64, FPR32, asmop # "s">;
+  def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
+                        GPR32, FPR32, asmop # "u">;
+  def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
+                        GPR64, FPR32, asmop # "u">;
+
+  def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
+                        GPR32, FPR64, asmop # "s">;
+  def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
+                        GPR64, FPR64, asmop # "s">;
+  def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
+                        GPR32, FPR64, asmop # "u">;
+  def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
+                        GPR64, FPR64, asmop # "u">;
+}
+
+defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
+defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
+defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
+defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
+defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+
+def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
+def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
+def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
+def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
+def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
+def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
+def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
+def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
+
+multiclass A64I_inttofp<bit o0, string asmop> {
+  def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
+  def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
+  def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
+  def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
+}
+
+defm S : A64I_inttofp<0b0, "scvtf">;
+defm U : A64I_inttofp<0b1, "ucvtf">;
+
+def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
+def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
+def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
+def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
+def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
+def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
+def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
+def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
+
+def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
+def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
+def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
+def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+
+def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
+def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
+def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
+def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
+
+def lane1_asmoperand : AsmOperandClass {
+  let Name = "Lane1";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "Lane1";
+}
+
+def lane1 : Operand<i32> {
+  let ParserMatchClass = lane1_asmoperand;
+  let PrintMethod = "printBareImmOperand";
+}
+
+let DecoderMethod =  "DecodeFMOVLaneInstruction" in {
+  def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
+                          (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
+                          "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
+
+  def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
+                          (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
+                          "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
+}
+
+def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
+                (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
+
+def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
+                (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediate instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMOV
+
+def fpimm_asmoperand : AsmOperandClass {
+  let Name = "FMOVImm";
+  let ParserMethod = "ParseFPImmOperand";
+  let DiagnosticType = "FPImm";
+}
+
+// The MCOperand for these instructions are the encoded 8-bit values.
+def SDXF_fpimm : SDNodeXForm<fpimm, [{
+  uint32_t Imm8;
+  A64Imms::isFPImm(N->getValueAPF(), Imm8);
+  return CurDAG->getTargetConstant(Imm8, MVT::i32);
+}]>;
+
+class fmov_operand<ValueType FT>
+  : Operand<i32>,
+    PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
+            SDXF_fpimm> {
+  let PrintMethod = "printFPImmOperand";
+  let ParserMatchClass = fpimm_asmoperand;
+}
+
+def fmov32_operand : fmov_operand<f32>;
+def fmov64_operand : fmov_operand<f64>;
+
+class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
+                      Operand fmov_operand>
+  : A64I_fpimm<0b0, 0b0, type, 0b00000,
+               (outs Reg:$Rd),
+               (ins fmov_operand:$Imm8),
+               "fmov\t$Rd, $Imm8",
+               [(set VT:$Rd, fmov_operand:$Imm8)],
+               NoItinerary>;
+
+def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
+def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
+
+//===----------------------------------------------------------------------===//
+// Load-register (literal) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDR, LDRSW, PRFM
+
+def ldrlit_label_asmoperand : AsmOperandClass {
+  let Name = "LoadLitLabel";
+  let RenderMethod = "addLabelOperands<19, 4>";
+  let DiagnosticType = "Label";
+}
+
+def ldrlit_label : Operand<i64> {
+  let EncoderMethod = "getLoadLitLabelOpValue";
+
+  // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<19, 4>";
+  let ParserMatchClass = ldrlit_label_asmoperand;
+  let OperandType = "OPERAND_PCREL";
+}
+
+// Various instructions take an immediate value (which can always be used),
+// where some numbers have a symbolic name to make things easier. These operands
+// and the associated functions abstract away the differences.
+multiclass namedimm<string prefix, string mapper> {
+  def _asmoperand : AsmOperandClass {
+    let Name = "NamedImm" # prefix;
+    let PredicateMethod = "isUImm";
+    let RenderMethod = "addImmOperands";
+    let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
+    let DiagnosticType = "NamedImm_" # prefix;
+  }
+
+  def _op : Operand<i32> {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+    let PrintMethod = "printNamedImmOperand<" # mapper # ">";
+    let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
+  }
+}
+
+defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
+
+class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
+                      list<dag> patterns = []>
+   : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
+                 "ldr\t$Rt, $Imm19", patterns, NoItinerary>;
+
+let mayLoad = 1 in {
+  def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
+  def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
+}
+
+def LDRs_lit  : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
+def LDRd_lit  : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+
+let mayLoad = 1 in {
+  def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+
+
+  def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
+                               (outs GPR64:$Rt),
+                               (ins ldrlit_label:$Imm19),
+                               "ldrsw\t$Rt, $Imm19",
+                               [], NoItinerary>;
+
+  def PRFM_lit : A64I_LDRlit<0b11, 0b0,
+                             (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
+                             "prfm\t$Rt, $Imm19",
+                             [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load-store exclusive instructions
+//===----------------------------------------------------------------------===//
+// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
+//           STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
+//           STLRH, STLR, LDARB, LDARH, LDAR
+
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fiels since Rt and Rn are always used.
+
+// This operand parses a GPR64xsp register, followed by an optional immediate
+// #0.
+def GPR64xsp0_asmoperand : AsmOperandClass {
+  let Name = "GPR64xsp0";
+  let PredicateMethod = "isWrappedReg";
+  let RenderMethod = "addRegOperands";
+  let ParserMethod = "ParseLSXAddressOperand";
+  // Diagnostics are provided by ParserMethod
+}
+
+def GPR64xsp0 : RegisterOperand<GPR64xsp> {
+  let ParserMatchClass = GPR64xsp0_asmoperand;
+}
+
+//===----------------------------------
+// Store-exclusive (releasing & normal)
+//===----------------------------------
+
+class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+       A64I_LDSTex_stn <size,
+                        opcode{2}, 0, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
+                        pat, itin> {
+  let mayStore = 1;
+  let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
+  def _byte:  A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+                              (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                              [], NoItinerary>;
+
+  def _hword:  A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                               [],NoItinerary>;
+
+  def _word:  A64I_SRexs_impl<0b10, opcode, asmstr,
+                              (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                              [], NoItinerary>;
+
+  def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
+                              (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+                              [], NoItinerary>;
+}
+
+defm STXR  : A64I_SRex<"stxr",  0b000, "STXR">;
+defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
+
+//===----------------------------------
+// Loads
+//===----------------------------------
+
+class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+        A64I_LDSTex_tn <size,
+                        opcode{2}, 1, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rt, [$Rn]"),
+                        pat, itin> {
+  let mayLoad = 1;
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass A64I_LRex<string asmstr, bits<3> opcode> {
+  def _byte:  A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _hword:  A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _word:  A64I_LRexs_impl<0b10, opcode, asmstr,
+                            (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
+                            (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+}
+
+defm LDXR  : A64I_LRex<"ldxr",  0b000>;
+defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
+defm LDAR  : A64I_LRex<"ldar",  0b101>;
+
+class acquiring_load<PatFrag base>
+  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+  return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+}]>;
+
+def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
+
+def : Pat<(atomic_load_acquire_8  i64:$Rn), (LDAR_byte  $Rn)>;
+def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
+def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word  $Rn)>;
+def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
+
+//===----------------------------------
+// Store-release (no exclusivity)
+//===----------------------------------
+
+class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+        A64I_LDSTex_tn <size,
+                        opcode{2}, 0, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rt, [$Rn]"),
+                        pat, itin> {
+  let mayStore = 1;
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+class releasing_store<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+  return cast<AtomicSDNode>(N)->getOrdering() == Release;
+}]>;
+
+def atomic_store_release_8  : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+def atomic_store_release_64 : releasing_store<atomic_store_64>;
+
+multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
+  def _byte:  A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                            [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
+                            NoItinerary>;
+
+  def _hword:  A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+                           (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                           [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
+                           NoItinerary>;
+
+  def _word:  A64I_SLexs_impl<0b10, opcode, asmstr,
+                           (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+                           [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
+                           NoItinerary>;
+
+  def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
+                           (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+                           [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
+                           NoItinerary>;
+}
+
+defm STLR  : A64I_SLex<"stlr", 0b101, "STLR">;
+
+//===----------------------------------
+// Store-exclusive pair (releasing & normal)
+//===----------------------------------
+
+class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+     A64I_LDSTex_stt2n <size,
+                        opcode{2}, 0, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
+                        pat, itin> {
+  let mayStore = 1;
+}
+
+
+multiclass A64I_SPex<string asmstr, bits<3> opcode> {
+  def _word:  A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
+                            (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
+                                 GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
+                            (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
+                                            GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+}
+
+defm STXP  : A64I_SPex<"stxp", 0b010>;
+defm STLXP : A64I_SPex<"stlxp", 0b011>;
+
+//===----------------------------------
+// Load-exclusive pair (acquiring & normal)
+//===----------------------------------
+
+class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+                        dag ins, list<dag> pat,
+                        InstrItinClass itin> :
+      A64I_LDSTex_tt2n <size,
+                        opcode{2}, 1, opcode{1}, opcode{0},
+                        outs, ins,
+                        !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
+                        pat, itin>{
+  let mayLoad = 1;
+  let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
+  let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
+}
+
+multiclass A64I_LPex<string asmstr, bits<3> opcode> {
+  def _word:  A64I_LPexs_impl<0b10, opcode, asmstr,
+                            (outs GPR32:$Rt, GPR32:$Rt2),
+                            (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+
+  def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
+                            (outs GPR64:$Rt, GPR64:$Rt2),
+                            (ins GPR64xsp0:$Rn),
+                            [], NoItinerary>;
+}
+
+defm LDXP  : A64I_LPex<"ldxp", 0b010>;
+defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unscaled immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (register offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (unsigned immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+
+// Note that patterns are much later on in a completely separate section (they
+// need ADRPxi to be defined).
+
+//===-------------------------------
+// 1. Various operands needed
+//===-------------------------------
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+// The addressing mode for these instructions consists of an unsigned 12-bit
+// immediate which is scaled by the size of the memory access.
+//
+// We represent this in the MC layer by two operands:
+//     1. A base register.
+//     2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
+//        would have '1' in this field.
+// This means that separate functions are needed for converting representations
+// which *are* aware of the intended access size.
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+
+multiclass offsets_uimm12<int MemSize, string prefix> {
+  def uimm12_asmoperand : AsmOperandClass {
+    let Name = "OffsetUImm12_" # MemSize;
+    let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
+    let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
+    let DiagnosticType = "LoadStoreUImm12_" # MemSize;
+  }
+
+  // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
+  // complicates things beyond TableGen's ken.
+  def uimm12 : Operand<i64>,
+               ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
+    let ParserMatchClass
+      = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
+
+    let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
+    let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
+  }
+}
+
+defm byte_  : offsets_uimm12<1, "byte_">;
+defm hword_ : offsets_uimm12<2, "hword_">;
+defm word_  : offsets_uimm12<4, "word_">;
+defm dword_ : offsets_uimm12<8, "dword_">;
+defm qword_ : offsets_uimm12<16, "qword_">;
+
+//===-------------------------------
+// 1.1 Signed 9-bit immediate operands
+//===-------------------------------
+
+// The MCInst is expected to store the bit-wise encoding of the value,
+// which amounts to lopping off the extended sign bits.
+def SDXF_simm9 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
+}]>;
+
+def simm9_asmoperand : AsmOperandClass {
+  let Name = "SImm9";
+  let PredicateMethod = "isSImm<9>";
+  let RenderMethod = "addSImmOperands<9>";
+  let DiagnosticType = "LoadStoreSImm9";
+}
+
+def simm9 : Operand<i64>,
+            ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
+            SDXF_simm9> {
+  let PrintMethod = "printOffsetSImm9Operand";
+  let ParserMatchClass = simm9_asmoperand;
+}
+
+
+//===-------------------------------
+// 1.3 Register offset extensions
+//===-------------------------------
+
+// The assembly-syntax for these addressing-modes is:
+//    [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
+//
+// The essential semantics are:
+//     + <amount> is a shift: #<log(transfer size)> or #0
+//     + <R> can be W or X.
+//     + If <R> is W, <extend> can be UXTW or SXTW
+//     + If <R> is X, <extend> can be LSL or SXTX
+//
+// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
+// which will need separate instructions for LLVM type-consistency. We'll also
+// need separate operands, of course.
+multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
+                   string Rm, string prefix> {
+  def regext_asmoperand : AsmOperandClass {
+    let Name = "AddrRegExtend_" # MemSize # "_" #  Rm;
+    let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
+    let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
+    let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
+  }
+
+  def regext : Operand<i64> {
+    let PrintMethod
+      = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
+
+    let DecoderMethod = "DecodeAddrRegExtendOperand";
+    let ParserMatchClass
+      = !cast<AsmOperandClass>(prefix # regext_asmoperand);
+  }
+}
+
+multiclass regexts_wx<int MemSize, string prefix> {
+  // Rm is an X-register if LSL or SXTX are specified as the shift.
+  defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
+
+  // Rm is a W-register if UXTW or SXTW are specified as the shift.
+  defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
+}
+
+defm byte_  : regexts_wx<1, "byte_">;
+defm hword_ : regexts_wx<2, "hword_">;
+defm word_  : regexts_wx<4, "word_">;
+defm dword_ : regexts_wx<8, "dword_">;
+defm qword_ : regexts_wx<16, "qword_">;
+
+
+//===------------------------------
+// 2. The instructions themselves.
+//===------------------------------
+
+// We have the following instructions to implement:
+// |                 | B     | H     | W     | X      |
+// |-----------------+-------+-------+-------+--------|
+// | unsigned str    | STRB  | STRH  | STR   | STR    |
+// | unsigned ldr    | LDRB  | LDRH  | LDR   | LDR    |
+// | signed ldr to W | LDRSB | LDRSH | -     | -      |
+// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
+
+// This will instantiate the LDR/STR instructions you'd expect to use for an
+// unsigned datatype (first two rows above) or floating-point register, which is
+// reasonably uniform across all access sizes.
+
+
+//===------------------------------
+// 2.1 Regular instructions
+//===------------------------------
+
+// This class covers the basic unsigned or irrelevantly-signed loads and stores,
+// to general-purpose and floating-point registers.
+
+class AddrParams<string prefix> {
+  Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
+
+  Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
+  Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
+}
+
+def byte_addrparams : AddrParams<"byte">;
+def hword_addrparams : AddrParams<"hword">;
+def word_addrparams : AddrParams<"word">;
+def dword_addrparams : AddrParams<"dword">;
+def qword_addrparams : AddrParams<"qword">;
+
+multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
+                                bit high_opc, string asmsuffix,
+                                RegisterClass GPR, AddrParams params> {
+  // Unsigned immediate
+  def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
+                     (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
+                     "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+                     [], NoItinerary> {
+    let mayStore = 1;
+  }
+  def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
+                (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
+                      (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+                      "ldr" #  asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+                      [], NoItinerary> {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
+                (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  // Register offset (four of these: load/store and Wm/Xm).
+  let mayLoad = 1 in {
+    def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
+                            (outs GPR:$Rt),
+                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+                            "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
+                            (outs GPR:$Rt),
+                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+                            "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+  }
+  def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
+        (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
+                                                          GPR64:$Rm, 2)>;
+
+  let mayStore = 1 in {
+    def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
+                                  (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
+                                               params.regextWm:$Ext),
+                                  "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                                  [], NoItinerary>;
+
+    def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
+                                  (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
+                                               params.regextXm:$Ext),
+                                  "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+                                  [], NoItinerary>;
+  }
+  def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
+      (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
+                                                        GPR64:$Rm, 2)>;
+
+  // Unaligned immediate
+  def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
+                             (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                             "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary> {
+    let mayStore = 1;
+  }
+  def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
+               (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
+                             (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+                             "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary> {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
+               (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  // Post-indexed
+  def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
+                               (outs GPR64xsp:$Rn_wb),
+                               (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                               "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+                               [], NoItinerary> {
+    let Constraints = "$Rn = $Rn_wb";
+    let mayStore = 1;
+
+    // Decoder only needed for unpredictability checking (FIXME).
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+  def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
+                                    (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                    "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+                                    [], NoItinerary> {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+  // Pre-indexed
+  def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
+                               (outs GPR64xsp:$Rn_wb),
+                               (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                               "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+                               [], NoItinerary> {
+    let Constraints = "$Rn = $Rn_wb";
+    let mayStore = 1;
+
+    // Decoder only needed for unpredictability checking (FIXME).
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+  def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
+                                    (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                    "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+                                    [], NoItinerary> {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeSingleIndexedInstruction";
+  }
+
+}
+
+// STRB/LDRB: First define the instructions
+defm LS8
+  : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
+
+// STRH/LDRH
+defm LS16
+  : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
+
+
+// STR/LDR to/from a W register
+defm LS32
+  : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
+
+// STR/LDR to/from an X register
+defm LS64
+  : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+
+// STR/LDR to/from a B register
+defm LSFP8
+  : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
+
+// STR/LDR to/from an H register
+defm LSFP16
+  : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
+
+// STR/LDR to/from an S register
+defm LSFP32
+  : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
+// STR/LDR to/from a D register
+defm LSFP64
+  : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
+// STR/LDR to/from a Q register
+defm LSFP128
+  : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
+                         qword_addrparams>;
+
+//===------------------------------
+// 2.3 Signed loads
+//===------------------------------
+
+// Byte and half-word signed loads can both go into either an X or a W register,
+// so it's worth factoring out. Signed word loads don't fit because there is no
+// W version.
+multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
+                           string prefix> {
+  // Unsigned offset
+  def w : A64I_LSunsigimm<size, 0b0, 0b11,
+                          (outs GPR32:$Rt),
+                          (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+                          "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+                          [], NoItinerary> {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+                  (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def x : A64I_LSunsigimm<size, 0b0, 0b10,
+                          (outs GPR64:$Rt),
+                          (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+                          "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+                          [], NoItinerary> {
+    let mayLoad = 1;
+  }
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+                  (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+  // Register offset
+  let mayLoad = 1 in {
+    def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
+                            (outs GPR32:$Rt),
+                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
+                            (outs GPR32:$Rt),
+                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
+                            (outs GPR64:$Rt),
+                            (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+
+    def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
+                            (outs GPR64:$Rt),
+                            (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+                            "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+                            [], NoItinerary>;
+  }
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+        (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
+                                                       GPR64:$Rm, 2)>;
+
+  def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+        (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
+                                                       GPR64:$Rm, 2)>;
+
+
+  let mayLoad = 1 in {
+    // Unaligned offset
+    def w_U : A64I_LSunalimm<size, 0b0, 0b11,
+                             (outs GPR32:$Rt),
+                             (ins GPR64xsp:$Rn, simm9:$SImm9),
+                             "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary>;
+
+    def x_U : A64I_LSunalimm<size, 0b0, 0b10,
+                             (outs GPR64:$Rt),
+                             (ins GPR64xsp:$Rn, simm9:$SImm9),
+                             "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                             [], NoItinerary>;
+
+
+    // Post-indexed
+    def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
+                                 (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+                                 [], NoItinerary> {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+
+    def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
+                                   (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                                   (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                   "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+                                   [], NoItinerary> {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+
+    // Pre-indexed
+    def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
+                                 (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+                                 [], NoItinerary> {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+
+    def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
+                                 (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+                                 [], NoItinerary> {
+      let Constraints = "$Rn = $Rn_wb";
+      let DecoderMethod = "DecodeSingleIndexedInstruction";
+    }
+  } // let mayLoad = 1
+}
+
+// LDRSB
+defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
+// LDRSH
+defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
+
+// LDRSW: load a 32-bit register, sign-extending to 64-bits.
+def LDRSWx
+    : A64I_LSunsigimm<0b10, 0b0, 0b10,
+                    (outs GPR64:$Rt),
+                    (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
+                    "ldrsw\t$Rt, [$Rn, $UImm12]",
+                    [], NoItinerary> {
+  let mayLoad = 1;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+  def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
+                             (outs GPR64:$Rt),
+                             (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
+                             "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+                             [], NoItinerary>;
+
+  def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
+                             (outs GPR64:$Rt),
+                             (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
+                             "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+                             [], NoItinerary>;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
+                (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
+
+
+def LDURSWx
+    : A64I_LSunalimm<0b10, 0b0, 0b10,
+                    (outs GPR64:$Rt),
+                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                    "ldursw\t$Rt, [$Rn, $SImm9]",
+                    [], NoItinerary> {
+  let mayLoad = 1;
+}
+def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+def LDRSWx_PostInd
+    : A64I_LSpostind<0b10, 0b0, 0b10,
+                    (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                    (ins GPR64xsp:$Rn, simm9:$SImm9),
+                    "ldrsw\t$Rt, [$Rn], $SImm9",
+                    [], NoItinerary> {
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
+                                 (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+                                 (ins GPR64xsp:$Rn, simm9:$SImm9),
+                                 "ldrsw\t$Rt, [$Rn, $SImm9]!",
+                                 [], NoItinerary> {
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+//===------------------------------
+// 2.4 Prefetch operations
+//===------------------------------
+
+def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
+                 (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
+                 "prfm\t$Rt, [$Rn, $UImm12]",
+                 [], NoItinerary> {
+  let mayLoad = 1;
+}
+def : InstAlias<"prfm $Rt, [$Rn]",
+                (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+  def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
+                                        (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+                                             GPR32:$Rm, dword_Wm_regext:$Ext),
+                                        "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+                                        [], NoItinerary>;
+  def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
+                                        (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+                                             GPR64:$Rm, dword_Xm_regext:$Ext),
+                                        "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+                                        [], NoItinerary>;
+}
+
+def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+                (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
+                                   GPR64:$Rm, 2)>;
+
+
+def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
+                         (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                         "prfum\t$Rt, [$Rn, $SImm9]",
+                         [], NoItinerary> {
+  let mayLoad = 1;
+}
+def : InstAlias<"prfum $Rt, [$Rn]",
+                (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unprivileged) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
+
+// These instructions very much mirror the "unscaled immediate" loads, but since
+// there are no floating-point variants we need to split them out into their own
+// section to avoid instantiation of "ldtr d0, [sp]" etc.
+
+multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
+                         string prefix> {
+  def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
+                              (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+                              "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                              [], NoItinerary> {
+    let mayStore = 1;
+  }
+
+  def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
+         (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
+                               (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+                               "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+                               [], NoItinerary> {
+    let mayLoad = 1;
+  }
+
+  def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
+         (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// STTRB/LDTRB: First define the instructions
+defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
+
+// STTRH/LDTRH
+defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
+
+// STTR/LDTR to/from a W register
+defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
+
+// STTR/LDTR to/from an X register
+defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
+
+// Now a class for the signed instructions that can go to either 32 or 64
+// bits...
+multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
+  let mayLoad = 1 in {
+    def w : A64I_LSunpriv<size, 0b0, 0b11,
+                          (outs GPR32:$Rt),
+                          (ins GPR64xsp:$Rn, simm9:$SImm9),
+                          "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                          [], NoItinerary>;
+
+    def x : A64I_LSunpriv<size, 0b0, 0b10,
+                          (outs GPR64:$Rt),
+                          (ins GPR64xsp:$Rn, simm9:$SImm9),
+                          "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+                          [], NoItinerary>;
+  }
+
+  def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+                 (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+  def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+                 (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// LDTRSB
+defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
+// LDTRSH
+defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
+
+// And finally LDTRSW which only goes to 64 bits.
+def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
+                            (outs GPR64:$Rt),
+                            (ins GPR64xsp:$Rn, simm9:$SImm9),
+                            "ldtrsw\t$Rt, [$Rn, $SImm9]",
+                            [], NoItinerary> {
+  let mayLoad = 1;
+}
+def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store non-temporal register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STNP, LDNP
+
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+multiclass offsets_simm7<string MemSize, string prefix> {
+  // The bare signed 7-bit immediate is used in post-indexed instructions, but
+  // because of the scaling performed a generic "simm7" operand isn't
+  // appropriate here either.
+  def simm7_asmoperand : AsmOperandClass {
+    let Name = "SImm7_Scaled" # MemSize;
+    let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
+    let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
+    let DiagnosticType = "LoadStoreSImm7_" # MemSize;
+  }
+
+  def simm7 : Operand<i64> {
+    let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
+  }
+}
+
+defm word_  : offsets_simm7<"4", "word_">;
+defm dword_ : offsets_simm7<"8", "dword_">;
+defm qword_ : offsets_simm7<"16", "qword_">;
+
+multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
+                          Operand simm7, string prefix> {
+  def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
+                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+    let mayStore = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+  def _LDR : A64I_LSPoffset<opc, v, 0b1,
+                            (outs SomeReg:$Rt, SomeReg:$Rt2),
+                            (ins GPR64xsp:$Rn, simm7:$SImm7),
+                            "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+    let mayLoad = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+  def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
+                               (outs GPR64xsp:$Rn_wb),
+                               (ins SomeReg:$Rt, SomeReg:$Rt2,
+                                    GPR64xsp:$Rn,
+                                    simm7:$SImm7),
+                               "stp\t$Rt, $Rt2, [$Rn], $SImm7",
+                               [], NoItinerary> {
+    let mayStore = 1;
+    let Constraints = "$Rn = $Rn_wb";
+
+    // Decoder only needed for unpredictability checking (FIXME).
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
+                        (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+                        (ins GPR64xsp:$Rn, simm7:$SImm7),
+                        "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
+                        [], NoItinerary> {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
+                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                    [], NoItinerary> {
+    let mayStore = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
+                              (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+                              (ins GPR64xsp:$Rn, simm7:$SImm7),
+                              "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                              [], NoItinerary> {
+    let mayLoad = 1;
+    let Constraints = "$Rn = $Rn_wb";
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+
+  def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
+                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                    "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+    let mayStore = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+  def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
+                            (outs SomeReg:$Rt, SomeReg:$Rt2),
+                            (ins GPR64xsp:$Rn, simm7:$SImm7),
+                            "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+    let mayLoad = 1;
+    let DecoderMethod = "DecodeLDSTPairInstruction";
+  }
+  def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
+                  (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
+                                                SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+}
+
+
+defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
+defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
+defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64,  dword_simm7, "LSFPPair64">;
+defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
+                                  "LSFPPair128">;
+
+
+def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
+                           (outs GPR64:$Rt, GPR64:$Rt2),
+                           (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+                           "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+  let mayLoad = 1;
+  let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
+                (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
+
+def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
+                                  (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+                                  (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+                                  "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
+                                  [], NoItinerary> {
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
+                                   (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+                                   (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+                                   "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                                   [], NoItinerary> {
+  let mayLoad = 1;
+  let Constraints = "$Rn = $Rn_wb";
+  let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+//===----------------------------------------------------------------------===//
+// Logical (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
+
+multiclass logical_imm_operands<string prefix, string note,
+                                int size, ValueType VT> {
+  def _asmoperand : AsmOperandClass {
+    let Name = "LogicalImm" # note # size;
+    let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
+    let RenderMethod = "addLogicalImmOperands<" # size # ">";
+    let DiagnosticType = "LogicalSecondSource";
+  }
+
+  def _operand
+        : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+    let PrintMethod = "printLogicalImmOperand<" # size # ">";
+    let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
+  }
+}
+
+defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
+defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
+
+// The mov versions only differ in assembly parsing, where they
+// exclude values representable with either MOVZ or MOVN.
+defm logical_imm32_mov
+  : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
+defm logical_imm64_mov
+  : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
+
+
+multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
+  def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
+                         (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+                         !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+                         [(set i32:$Rd,
+                               (opnode i32:$Rn, logical_imm32_operand:$Imm))],
+                         NoItinerary>;
+
+  def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
+                         (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+                         !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+                         [(set i64:$Rd,
+                               (opnode i64:$Rn, logical_imm64_operand:$Imm))],
+                         NoItinerary>;
+}
+
+defm AND : A64I_logimmSizes<0b00, "and", and>;
+defm ORR : A64I_logimmSizes<0b01, "orr", or>;
+defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
+
+let Defs = [NZCV] in {
+  def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
+                                (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+                                "ands\t$Rd, $Rn, $Imm",
+                                [], NoItinerary>;
+
+  def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
+                                (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+                                "ands\t$Rd, $Rn, $Imm",
+                                [], NoItinerary>;
+}
+
+
+def : InstAlias<"tst $Rn, $Imm",
+                (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
+def : InstAlias<"tst $Rn, $Imm",
+                (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+                (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+                (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
+
+//===----------------------------------------------------------------------===//
+// Logical (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
+
+// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
+// behaves differently for unsigned comparisons, so we defensively only allow
+// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
+// equal to 0" and LLVM gives us this.
+def signed_cond : PatLeaf<(cond), [{
+  return !isUnsignedIntSetCC(N->get());
+}]>;
+
+
+// These instructions share their "shift" operands with add/sub (shifted
+// register instructions). They are defined there.
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
+                          bit N, bit commutable,
+                          string asmop, SDPatternOperator opfrag, ValueType ty,
+                          RegisterClass GPR, list<Register> defs> {
+  let isCommutable = commutable, Defs = defs in {
+  def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _asr : A64I_logicalshift<sf, opc, 0b10, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+
+  def _ror : A64I_logicalshift<sf, opc, 0b11, N,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # ty):$Imm6),
+                       !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+                       [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
+                            !cast<Operand>("ror_operand_" # ty):$Imm6))
+                       )],
+                       NoItinerary>;
+  }
+
+  def _noshift
+      : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+                 (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+                                                      GPR:$Rm, 0)>;
+
+  def : Pat<(opfrag ty:$Rn, ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
+                         string asmop, SDPatternOperator opfrag,
+                         list<Register> defs> {
+  defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
+                            commutable, asmop, opfrag, i64, GPR64, defs>;
+  defm www : logical_shifts<prefix # "www", 0b0, opc, N,
+                            commutable, asmop, opfrag, i32, GPR32, defs>;
+}
+
+
+defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
+defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
+defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
+defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
+             PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
+                     [{ (void)N; return false; }]>,
+             [NZCV]>;
+
+defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
+                         PatFrag<(ops node:$lhs, node:$rhs),
+                                 (and node:$lhs, (not node:$rhs))>, []>;
+defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
+                         PatFrag<(ops node:$lhs, node:$rhs),
+                                 (or node:$lhs, (not node:$rhs))>, []>;
+defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
+                         PatFrag<(ops node:$lhs, node:$rhs),
+                                 (xor node:$lhs, (not node:$rhs))>, []>;
+defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
+                          PatFrag<(ops node:$lhs, node:$rhs),
+                                  (and node:$lhs, (not node:$rhs)),
+                                  [{ (void)N; return false; }]>,
+                          [NZCV]>;
+
+multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
+  let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
+  def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
+                           !cast<Operand>("lsl_operand_" # ty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+
+
+  def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
+                           !cast<Operand>("lsr_operand_" # ty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+
+  def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
+                           !cast<Operand>("asr_operand_" # ty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+
+  def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
+                       (outs),
+                       (ins GPR:$Rn, GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # ty):$Imm6),
+                       "tst\t$Rn, $Rm, $Imm6",
+                       [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
+                           !cast<Operand>("ror_operand_" # ty):$Imm6)),
+                                          0, signed_cond))],
+                       NoItinerary>;
+  }
+
+  def _noshift : InstAlias<"tst $Rn, $Rm",
+                     (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+  def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
+            (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
+}
+
+defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
+defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
+
+
+multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
+  let isCommutable = 0, Rn = 0b11111 in {
+  def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("lsl_operand_" # ty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set ty:$Rd, (not (shl ty:$Rm,
+                         !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
+                       NoItinerary>;
+
+
+  def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("lsr_operand_" # ty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set ty:$Rd, (not (srl ty:$Rm,
+                         !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
+                       NoItinerary>;
+
+  def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("asr_operand_" # ty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set ty:$Rd, (not (sra ty:$Rm,
+                         !cast<Operand>("asr_operand_" # ty):$Imm6)))],
+                       NoItinerary>;
+
+  def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
+                       (outs GPR:$Rd),
+                       (ins GPR:$Rm,
+                            !cast<Operand>("ror_operand_" # ty):$Imm6),
+                       "mvn\t$Rd, $Rm, $Imm6",
+                       [(set ty:$Rd, (not (rotr ty:$Rm,
+                         !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
+                       NoItinerary>;
+  }
+
+  def _noshift : InstAlias<"mvn $Rn, $Rm",
+                     (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+  def : Pat<(not ty:$Rm),
+            (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
+}
+
+defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
+defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
+
+def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Move wide (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MOVN, MOVZ, MOVK + MOV aliases
+
+// A wide variety of different relocations are needed for variants of these
+// instructions, so it turns out that we need a different operand for all of
+// them.
+multiclass movw_operands<string prefix, string instname, int width> {
+  def _imm_asmoperand : AsmOperandClass {
+    let Name = instname # width # "Shifted" # shift;
+    let PredicateMethod = "is" # instname # width # "Imm";
+    let RenderMethod = "addMoveWideImmOperands";
+    let ParserMethod = "ParseImmWithLSLOperand";
+    let DiagnosticType = "MOVWUImm16";
+  }
+
+  def _imm : Operand<i32> {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
+    let PrintMethod = "printMoveWideImmOperand";
+    let EncoderMethod = "getMoveWideImmOpValue";
+    let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
+
+    let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+  }
+}
+
+defm movn32 : movw_operands<"movn32", "MOVN", 32>;
+defm movn64 : movw_operands<"movn64", "MOVN", 64>;
+defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
+defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
+defm movk32 : movw_operands<"movk32", "MOVK", 32>;
+defm movk64 : movw_operands<"movk64", "MOVK", 64>;
+
+multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
+                          dag ins64bit> {
+
+  def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
+                      !strconcat(asmop, "\t$Rd, $FullImm"),
+                      [], NoItinerary> {
+    bits<18> FullImm;
+    let UImm16 = FullImm{15-0};
+    let Shift = FullImm{17-16};
+  }
+
+  def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
+                      !strconcat(asmop, "\t$Rd, $FullImm"),
+                      [], NoItinerary> {
+    bits<18> FullImm;
+    let UImm16 = FullImm{15-0};
+    let Shift = FullImm{17-16};
+  }
+}
+
+let isMoveImm = 1, isReMaterializable = 1,
+    isAsCheapAsAMove = 1, hasSideEffects = 0 in {
+  defm MOVN : A64I_movwSizes<0b00, "movn",
+                             (ins movn32_imm:$FullImm),
+                             (ins movn64_imm:$FullImm)>;
+
+  // Some relocations are able to convert between a MOVZ and a MOVN. If these
+  // are applied the instruction must be emitted with the corresponding bits as
+  // 0, which means a MOVZ needs to override that bit from the default.
+  let PostEncoderMethod = "fixMOVZ" in
+  defm MOVZ : A64I_movwSizes<0b10, "movz",
+                             (ins movz32_imm:$FullImm),
+                             (ins movz64_imm:$FullImm)>;
+}
+
+let Constraints = "$src = $Rd" in
+defm MOVK : A64I_movwSizes<0b11, "movk",
+                           (ins GPR32:$src, movk32_imm:$FullImm),
+                           (ins GPR64:$src, movk64_imm:$FullImm)>;
+
+
+// And now the "MOV" aliases. These also need their own operands because what
+// they accept is completely different to what the base instructions accept.
+multiclass movalias_operand<string prefix, string basename,
+                            string immpredicate, int width> {
+  def _asmoperand : AsmOperandClass {
+    let Name = basename # width # "MovAlias";
+    let PredicateMethod
+          = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
+    let RenderMethod
+      = "addMoveWideMovAliasOperands<" # width # ", "
+                                       # "A64Imms::" # immpredicate # ">";
+  }
+
+  def _movimm : Operand<i32> {
+    let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+
+    let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+  }
+}
+
+defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
+defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
+defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
+defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
+
+// FIXME: these are officially canonical aliases, but TableGen is too limited to
+// print them at the moment. I believe in this case an "AliasPredicate" method
+// will need to be implemented. to allow it, as well as the more generally
+// useful handling of non-register, non-constant operands.
+class movalias<Instruction INST, RegisterClass GPR, Operand operand>
+  : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
+
+def : movalias<MOVZwii, GPR32, movz32_movimm>;
+def : movalias<MOVZxii, GPR64, movz64_movimm>;
+def : movalias<MOVNwii, GPR32, movn32_movimm>;
+def : movalias<MOVNxii, GPR64, movn64_movimm>;
+
+//===----------------------------------------------------------------------===//
+// PC-relative addressing instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADR, ADRP
+
+def adr_label : Operand<i64> {
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
+
+  // This label is a 21-bit offset from PC, unscaled
+  let PrintMethod = "printLabelOperand<21, 1>";
+  let ParserMatchClass = label_asmoperand<21, 1>;
+  let OperandType = "OPERAND_PCREL";
+}
+
+def adrp_label_asmoperand : AsmOperandClass {
+  let Name = "AdrpLabel";
+  let RenderMethod = "addLabelOperands<21, 4096>";
+  let DiagnosticType = "Label";
+}
+
+def adrp_label : Operand<i64> {
+  let EncoderMethod = "getAdrpLabelOpValue";
+
+  // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
+  let PrintMethod = "printLabelOperand<21, 4096>";
+  let ParserMatchClass = adrp_label_asmoperand;
+  let OperandType = "OPERAND_PCREL";
+}
+
+let hasSideEffects = 0 in {
+  def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
+                         "adr\t$Rd, $Label", [], NoItinerary>;
+
+  def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
+                          "adrp\t$Rd, $Label", [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// System instructions
+//===----------------------------------------------------------------------===//
+// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
+//    + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
+
+// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
+def uimm3_asmoperand : AsmOperandClass {
+  let Name = "UImm3";
+  let PredicateMethod = "isUImm<3>";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "UImm3";
+}
+
+def uimm3 : Operand<i32> {
+  let ParserMatchClass = uimm3_asmoperand;
+}
+
+// The HINT alias can accept a simple unsigned 7-bit immediate.
+def uimm7_asmoperand : AsmOperandClass {
+  let Name = "UImm7";
+  let PredicateMethod = "isUImm<7>";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "UImm7";
+}
+
+def uimm7 : Operand<i32> {
+  let ParserMatchClass = uimm7_asmoperand;
+}
+
+// Multiclass namedimm is defined with the prefetch operands. Most of these fit
+// into the NamedImmMapper scheme well: they either accept a named operand or
+// any immediate under a particular value (which may be 0, implying no immediate
+// is allowed).
+defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
+defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
+defm ic : namedimm<"ic", "A64IC::ICMapper">;
+defm dc : namedimm<"dc", "A64DC::DCMapper">;
+defm at : namedimm<"at", "A64AT::ATMapper">;
+defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
+
+// However, MRS and MSR are more complicated for a few reasons:
+//   * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
+//     implementation-defined effect
+//   * Most registers are shared, but some are read-only or write-only.
+//   * There is a variant of MSR which accepts the same register name (SPSel),
+//     but which would have a different encoding.
+
+// In principle these could be resolved in with more complicated subclasses of
+// NamedImmMapper, however that imposes an overhead on other "named
+// immediates". Both in concrete terms with virtual tables and in unnecessary
+// abstraction.
+
+// The solution adopted here is to take the MRS/MSR Mappers out of the usual
+// hierarchy (they're not derived from NamedImmMapper) and to add logic for
+// their special situation.
+def mrs_asmoperand : AsmOperandClass {
+  let Name = "MRS";
+  let ParserMethod = "ParseSysRegOperand";
+  let DiagnosticType = "MRS";
+}
+
+def mrs_op : Operand<i32> {
+  let ParserMatchClass = mrs_asmoperand;
+  let PrintMethod = "printMRSOperand";
+  let DecoderMethod = "DecodeMRSOperand";
+}
+
+def msr_asmoperand : AsmOperandClass {
+  let Name = "MSRWithReg";
+
+  // Note that SPSel is valid for both this and the pstate operands, but with
+  // different immediate encodings. This is why these operands provide a string
+  // AArch64Operand rather than an immediate. The overlap is small enough that
+  // it could be resolved with hackery now, but who can say in future?
+  let ParserMethod = "ParseSysRegOperand";
+  let DiagnosticType = "MSR";
+}
+
+def msr_op : Operand<i32> {
+  let ParserMatchClass = msr_asmoperand;
+  let PrintMethod = "printMSROperand";
+  let DecoderMethod = "DecodeMSROperand";
+}
+
+def pstate_asmoperand : AsmOperandClass {
+  let Name = "MSRPState";
+  // See comment above about parser.
+  let ParserMethod = "ParseSysRegOperand";
+  let DiagnosticType = "MSR";
+}
+
+def pstate_op : Operand<i32> {
+  let ParserMatchClass = pstate_asmoperand;
+  let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
+  let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
+}
+
+// When <CRn> is specified, an assembler should accept something like "C4", not
+// the usual "#4" immediate.
+def CRx_asmoperand : AsmOperandClass {
+  let Name = "CRx";
+  let PredicateMethod = "isUImm<4>";
+  let RenderMethod = "addImmOperands";
+  let ParserMethod = "ParseCRxOperand";
+  // Diagnostics are handled in all cases by ParseCRxOperand.
+}
+
+def CRx : Operand<i32> {
+  let ParserMatchClass = CRx_asmoperand;
+  let PrintMethod = "printCRxOperand";
+}
+
+
+// Finally, we can start defining the instructions.
+
+// HINT is straightforward, with a few aliases.
+def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
+                        [], NoItinerary> {
+  bits<7> UImm7;
+  let CRm = UImm7{6-3};
+  let Op2 = UImm7{2-0};
+
+  let Op0 = 0b00;
+  let Op1 = 0b011;
+  let CRn = 0b0010;
+  let Rt = 0b11111;
+}
+
+def : InstAlias<"nop", (HINTi 0)>;
+def : InstAlias<"yield", (HINTi 1)>;
+def : InstAlias<"wfe", (HINTi 2)>;
+def : InstAlias<"wfi", (HINTi 3)>;
+def : InstAlias<"sev", (HINTi 4)>;
+def : InstAlias<"sevl", (HINTi 5)>;
+
+// Quite a few instructions then follow a similar pattern of fixing common
+// fields in the bitpattern, we'll define a helper-class for them.
+class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
+                 Operand operand, string asmop>
+  : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
+                [], NoItinerary> {
+  let Op0 = op0;
+  let Op1 = op1;
+  let CRn = crn;
+  let Op2 = op2;
+  let Rt = 0b11111;
+}
+
+
+def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
+def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
+def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
+def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
+
+def : InstAlias<"clrex", (CLREXi 0b1111)>;
+def : InstAlias<"isb", (ISBi 0b1111)>;
+
+// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
+// configurations at least.
+def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
+
+// Any SYS bitpattern can be represented with a complex and opaque "SYS"
+// instruction.
+def SYSiccix : A64I_system<0b0, (outs),
+                           (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
+                                uimm3:$Op2, GPR64:$Rt),
+                           "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
+                           [], NoItinerary> {
+  let Op0 = 0b01;
+}
+
+// You can skip the Xt argument whether it makes sense or not for the generic
+// SYS instruction.
+def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
+                (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
+
+
+// But many have aliases, which obviously don't fit into
+class SYSalias<dag ins, string asmstring>
+  : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
+  let isAsmParserOnly = 1;
+
+  bits<14> SysOp;
+  let Op0 = 0b01;
+  let Op1 = SysOp{13-11};
+  let CRn = SysOp{10-7};
+  let CRm = SysOp{6-3};
+  let Op2 = SysOp{2-0};
+}
+
+def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
+
+def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
+  let Rt = 0b11111;
+}
+
+def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
+def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
+
+def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
+
+def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
+  let Rt = 0b11111;
+}
+
+
+def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
+                            (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
+                            "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
+                            [], NoItinerary> {
+  let Op0 = 0b01;
+}
+
+// The instructions themselves are rather simple for MSR and MRS.
+def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
+                        "msr\t$SysReg, $Rt", [], NoItinerary> {
+  bits<16> SysReg;
+  let Op0 = SysReg{15-14};
+  let Op1 = SysReg{13-11};
+  let CRn = SysReg{10-7};
+  let CRm = SysReg{6-3};
+  let Op2 = SysReg{2-0};
+}
+
+def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
+                        "mrs\t$Rt, $SysReg", [], NoItinerary> {
+  bits<16> SysReg;
+  let Op0 = SysReg{15-14};
+  let Op1 = SysReg{13-11};
+  let CRn = SysReg{10-7};
+  let CRm = SysReg{6-3};
+  let Op2 = SysReg{2-0};
+}
+
+def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
+                        "msr\t$PState, $CRm", [], NoItinerary> {
+  bits<6> PState;
+
+  let Op0 = 0b00;
+  let Op1 = PState{5-3};
+  let CRn = 0b0100;
+  let Op2 = PState{2-0};
+  let Rt = 0b11111;
+}
+
+//===----------------------------------------------------------------------===//
+// Test & branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: TBZ, TBNZ
+
+// The bit to test is a simple unsigned 6-bit immediate in the X-register
+// versions.
+def uimm6 : Operand<i64> {
+  let ParserMatchClass = uimm6_asmoperand;
+}
+
+def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
+
+def tbimm_target : Operand<OtherVT> {
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
+
+  // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<14, 4>";
+  let ParserMatchClass = label_wid14_scal4_asmoperand;
+
+  let OperandType = "OPERAND_PCREL";
+}
+
+def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
+def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
+
+// These instructions correspond to patterns involving "and" with a power of
+// two, which we need to be able to select.
+def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
+def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
+
+let isBranch = 1, isTerminator = 1 in {
+  def TBZxii : A64I_TBimm<0b0, (outs),
+                        (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+                        "tbz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
+                                   A64eq, bb:$Label)],
+                        NoItinerary>;
+
+  def TBNZxii : A64I_TBimm<0b1, (outs),
+                        (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+                        "tbnz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
+                                   A64ne, bb:$Label)],
+                        NoItinerary>;
+
+
+  // Note, these instructions overlap with the above 64-bit patterns. This is
+  // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
+  // do the same thing and are both permitted assembly. They also both have
+  // sensible DAG patterns.
+  def TBZwii : A64I_TBimm<0b0, (outs),
+                        (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+                        "tbz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
+                                   A64eq, bb:$Label)],
+                        NoItinerary> {
+    let Imm{5} = 0b0;
+  }
+
+  def TBNZwii : A64I_TBimm<0b1, (outs),
+                        (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+                        "tbnz\t$Rt, $Imm, $Label",
+                        [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
+                                   A64ne, bb:$Label)],
+                        NoItinerary> {
+    let Imm{5} = 0b0;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B, BL
+
+def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
+
+def bimm_target : Operand<OtherVT> {
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
+
+  // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<26, 4>";
+  let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+  let OperandType = "OPERAND_PCREL";
+}
+
+def blimm_target : Operand<i64> {
+  let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
+
+  // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+  let PrintMethod = "printLabelOperand<26, 4>";
+  let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+  let OperandType = "OPERAND_PCREL";
+}
+
+class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
+  : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
+              !strconcat(asmop, "\t$Label"), patterns,
+              NoItinerary>;
+
+let isBranch = 1 in {
+  def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
+    let isTerminator = 1;
+    let isBarrier = 1;
+  }
+
+  def BLimm : A64I_BimmImpl<0b1, "bl",
+                            [(AArch64Call tglobaladdr:$Label)], blimm_target> {
+    let isCall = 1;
+    let Defs = [X30];
+  }
+}
+
+def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: BR, BLR, RET, ERET, DRP.
+
+// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
+// at the moment.
+class A64I_BregImpl<bits<4> opc,
+                    dag outs, dag ins, string asmstr, list<dag> patterns,
+                    InstrItinClass itin = NoItinerary>
+  : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
+              outs, ins, asmstr, patterns, itin> {
+  let isBranch         = 1;
+  let isIndirectBranch = 1;
+}
+
+// Note that these are not marked isCall or isReturn because as far as LLVM is
+// concerned they're not. "ret" is just another jump unless it has been selected
+// by LLVM as the function's return.
+
+let isBranch = 1 in {
+  def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
+                          "br\t$Rn", [(brind i64:$Rn)]> {
+    let isBarrier = 1;
+    let isTerminator = 1;
+  }
+
+  def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
+                           "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
+    let isBarrier = 0;
+    let isCall = 1;
+    let Defs = [X30];
+  }
+
+  def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
+                           "ret\t$Rn", []> {
+    let isBarrier = 1;
+    let isTerminator = 1;
+    let isReturn = 1;
+  }
+
+  // Create a separate pseudo-instruction for codegen to use so that we don't
+  // flag x30 as used in every function. It'll be restored before the RET by the
+  // epilogue if it's legitimately used.
+  def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
+    let isTerminator = 1;
+    let isBarrier = 1;
+    let isReturn = 1;
+  }
+
+  def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
+    let Rn = 0b11111;
+    let isBarrier = 1;
+    let isTerminator = 1;
+    let isReturn = 1;
+  }
+
+  def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
+    let Rn = 0b11111;
+    let isBarrier = 1;
+  }
+}
+
+def RETAlias : InstAlias<"ret", (RETx X30)>;
+
+
+//===----------------------------------------------------------------------===//
+// Address generation patterns
+//===----------------------------------------------------------------------===//
+
+// Primary method of address generation for the small/absolute memory model is
+// an ADRP/ADR pair:
+//     ADRP x0, some_variable
+//     ADD x0, x0, #:lo12:some_variable
+//
+// The load/store elision of the ADD is accomplished when selecting
+// addressing-modes. This just mops up the cases where that doesn't work and we
+// really need an address in some register.
+
+// This wrapper applies a LO12 modifier to the address. Otherwise we could just
+// use the same address.
+
+class ADRP_ADD<SDNode Wrapper, SDNode addrop>
+ : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
+       (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
+
+def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
+def : ADRP_ADD<A64WrapperSmall, texternalsym>;
+def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
+def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
+def : ADRP_ADD<A64WrapperSmall, tjumptable>;
+
+//===----------------------------------------------------------------------===//
+// GOT access patterns
+//===----------------------------------------------------------------------===//
+
+// FIXME: Wibble
+
+class GOTLoadSmall<SDNode addrfrag>
+  : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
+        (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
+
+def : GOTLoadSmall<texternalsym>;
+def : GOTLoadSmall<tglobaladdr>;
+def : GOTLoadSmall<tglobaltlsaddr>;
+
+//===----------------------------------------------------------------------===//
+// Tail call handling
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
+  def TC_RETURNdi
+    : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
+                 [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
+
+  def TC_RETURNxi
+    : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
+                 [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    Uses = [XSP] in {
+  def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
+                                  (Bimm bimm_target:$Label)>;
+
+  def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
+                                 (BRx GPR64:$Rd)>;
+}
+
+
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+          (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+//===----------------------------------------------------------------------===//
+// Thread local storage
+//===----------------------------------------------------------------------===//
+
+// This is a pseudo-instruction representing the ".tlsdesccall" directive in
+// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
+// current location. It should always be immediately followed by a BLR
+// instruction, and is intended solely for relaxation by the linker.
+
+def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
+
+def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
+  let hasSideEffects = 1;
+}
+
+def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
+                            [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
+  let isCall = 1;
+  let Defs = [X30];
+}
+
+def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
+          (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield patterns
+//===----------------------------------------------------------------------===//
+
+def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
+}]>;
+
+def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
+}]>;
+
+def bfi_width_to_imms : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
+}]>;
+
+
+// The simpler patterns deal with cases where no AND mask is actually needed
+// (either all bits are used or the low 32 bits are used).
+let AddedComplexity = 10 in {
+
+def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+           (BFIxxii $src, $Rn,
+                    (bfi64_lsb_to_immr (i64 imm:$ImmR)),
+                    (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
+          (BFIwwii $src, $Rn,
+                   (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+                   (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+
+def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
+               (i64 4294967295)),
+          (SUBREG_TO_REG (i64 0),
+                         (BFIwwii (EXTRACT_SUBREG $src, sub_32),
+                                  (EXTRACT_SUBREG $Rn, sub_32),
+                                  (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+                                  (bfi_width_to_imms (i64 imm:$ImmS))),
+                         sub_32)>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous patterns
+//===----------------------------------------------------------------------===//
+
+// Truncation from 64 to 32-bits just involves renaming your register.
+def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
+
+// Similarly, extension where we don't care about the high bits is
+// just a rename.
+def : Pat<(i64 (anyext i32:$val)),
+          (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
+
+// SELECT instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : PseudoInst<(outs FPR128:$Rd),
+                         (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
+                         [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
+  let Uses = [NZCV];
+  let usesCustomInserter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Load/store patterns
+//===----------------------------------------------------------------------===//
+
+// There are lots of patterns here, because we need to allow at least three
+// parameters to vary independently.
+//   1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
+//   2. LLVM source: zextloadi8, anyextloadi8, ...
+//   3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
+//
+// The biggest problem turns out to be the address-generation variable. At the
+// point of instantiation we need to produce two DAGs, one for the pattern and
+// one for the instruction. Doing this at the lowest level of classes doesn't
+// work.
+//
+// Consider the simple uimm12 addressing mode, and the desire to match both (add
+// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
+// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
+// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
+// operation, and PatFrags are for selection not output.
+//
+// As a result, the address-generation patterns are the final
+// instantiations. However, we do still need to vary the operand for the address
+// further down (At the point we're deciding A64WrapperSmall, we don't know
+// the memory width of the operation).
+
+//===------------------------------
+// 1. Basic infrastructural defs
+//===------------------------------
+
+// First, some simple classes for !foreach and !subst to use:
+class Decls {
+  dag pattern;
+}
+
+def decls : Decls;
+def ALIGN;
+def INST;
+def OFFSET;
+def SHIFT;
+
+// You can't use !subst on an actual immediate, but you *can* use it on an
+// operand record that happens to match a single immediate. So we do.
+def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
+def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
+def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
+def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
+def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
+
+// If the low bits of a pointer are known to be 0 then an "or" is just as good
+// as addition for computing an offset. This fragment forwards that check for
+// TableGen's use.
+def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
+[{
+  return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
+}]>;
+
+// Load/store (unsigned immediate) operations with relocations against global
+// symbols (for lo12) are only valid if those symbols have correct alignment
+// (since the immediate offset is divided by the access scale, it can't have a
+// remainder).
+//
+// The guaranteed alignment is provided as part of the WrapperSmall
+// operation, and checked against one of these.
+def any_align   : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
+def min_align2  : ImmLeaf<i32, [{ return Imm >= 2; }]>;
+def min_align4  : ImmLeaf<i32, [{ return Imm >= 4; }]>;
+def min_align8  : ImmLeaf<i32, [{ return Imm >= 8; }]>;
+def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
+
+// "Normal" load/store instructions can be used on atomic operations, provided
+// the ordering parameter is at most "monotonic". Anything above that needs
+// special handling with acquire/release instructions.
+class simple_load<PatFrag base>
+  : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+  return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_load_simple_i8  : simple_load<atomic_load_8>;
+def atomic_load_simple_i16 : simple_load<atomic_load_16>;
+def atomic_load_simple_i32 : simple_load<atomic_load_32>;
+def atomic_load_simple_i64 : simple_load<atomic_load_64>;
+
+class simple_store<PatFrag base>
+  : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+  return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_store_simple_i8  : simple_store<atomic_store_8>;
+def atomic_store_simple_i16 : simple_store<atomic_store_16>;
+def atomic_store_simple_i32 : simple_store<atomic_store_32>;
+def atomic_store_simple_i64 : simple_store<atomic_store_64>;
+
+//===------------------------------
+// 2. UImm12 and SImm9
+//===------------------------------
+
+// These instructions have two operands providing the address so they can be
+// treated similarly for most purposes.
+
+//===------------------------------
+// 2.1 Base patterns covering extend/truncate semantics
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+                          dag Offset, dag address, ValueType transty,
+                          ValueType sty> {
+  def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+            (LOAD Base, Offset)>;
+
+  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+            (STORE $Rt, Base, Offset)>;
+}
+
+// Instructions accessing a memory chunk smaller than a register (or, in a
+// pinch, the same size) have a characteristic set of patterns they want to
+// match: extending loads and truncating stores. This class deals with the
+// sign-neutral version of those patterns.
+//
+// It will be instantiated across multiple addressing-modes.
+multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
+                         dag Base, dag Offset,
+                         dag address, ValueType sty>
+  : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
+  def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
+
+  def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
+
+  // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+  // register was actually set.
+  def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+  def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+  def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+            (STORE $Rt, Base, Offset)>;
+
+  // For truncating store from 64-bits, we have to manually tell LLVM to
+  // ignore the high bits of the x register.
+  def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+            (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
+}
+
+// Next come patterns for sign-extending loads.
+multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
+                            dag address, ValueType sty> {
+  def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
+
+  def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
+
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+                           dag Offset, dag address, ValueType sty> {
+  def : Pat<(sty (load address)), (LOAD Base, Offset)>;
+  def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
+}
+
+// Integer operations also get atomic instructions to select for.
+multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+                           dag Offset, dag address, ValueType sty>
+  : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
+    ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
+
+//===------------------------------
+// 2.2. Addressing-mode instantiations
+//===------------------------------
+
+multiclass uimm12_pats<dag address, dag Base, dag Offset> {
+  defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
+                       !foreach(decls.pattern, Offset,
+                                !subst(OFFSET, byte_uimm12, decls.pattern)),
+                       !foreach(decls.pattern, address,
+                                !subst(OFFSET, byte_uimm12,
+                                !subst(ALIGN, any_align, decls.pattern))),
+                       i8>;
+  defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
+                       !foreach(decls.pattern, Offset,
+                                !subst(OFFSET, hword_uimm12, decls.pattern)),
+                       !foreach(decls.pattern, address,
+                                !subst(OFFSET, hword_uimm12,
+                                !subst(ALIGN, min_align2, decls.pattern))),
+                       i16>;
+  defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
+                       !foreach(decls.pattern, Offset,
+                                !subst(OFFSET, word_uimm12, decls.pattern)),
+                       !foreach(decls.pattern, address,
+                                !subst(OFFSET, word_uimm12,
+                                !subst(ALIGN, min_align4, decls.pattern))),
+                       i32>;
+
+  defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, word_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, word_uimm12,
+                                   !subst(ALIGN, min_align4, decls.pattern))),
+                          i32>;
+
+  defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, dword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, dword_uimm12,
+                                   !subst(ALIGN, min_align8, decls.pattern))),
+                          i64>;
+
+  defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, hword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, hword_uimm12,
+                                   !subst(ALIGN, min_align2, decls.pattern))),
+                          f16>;
+
+  defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, word_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, word_uimm12,
+                                   !subst(ALIGN, min_align4, decls.pattern))),
+                          f32>;
+
+  defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, dword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, dword_uimm12,
+                                   !subst(ALIGN, min_align8, decls.pattern))),
+                          f64>;
+
+  defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, qword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, qword_uimm12,
+                                   !subst(ALIGN, min_align16, decls.pattern))),
+                          f128>;
+
+  defm : load_signed_pats<"B", "", Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, byte_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, byte_uimm12,
+                                   !subst(ALIGN, any_align, decls.pattern))),
+                          i8>;
+
+  defm : load_signed_pats<"H", "", Base,
+                          !foreach(decls.pattern, Offset,
+                                   !subst(OFFSET, hword_uimm12, decls.pattern)),
+                          !foreach(decls.pattern, address,
+                                   !subst(OFFSET, hword_uimm12,
+                                   !subst(ALIGN, min_align2, decls.pattern))),
+                          i16>;
+
+  def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+                                  !subst(OFFSET, word_uimm12,
+                                  !subst(ALIGN, min_align4, decls.pattern)))),
+            (LDRSWx Base, !foreach(decls.pattern, Offset,
+                                  !subst(OFFSET, word_uimm12, decls.pattern)))>;
+}
+
+// Straightforward patterns of last resort: a pointer with or without an
+// appropriate offset.
+defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
+defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
+                   (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
+
+// The offset could be hidden behind an "or", of course:
+defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
+                   (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
+
+// Global addresses under the small-absolute model should use these
+// instructions. There are ELF relocations specifically for it.
+defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
+                   (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
+                                    ALIGN),
+                   (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
+
+// External symbols that make it this far should also get standard relocations.
+defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
+                                    ALIGN),
+                   (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
+                   (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
+
+// We also want to use uimm12 instructions for local variables at the moment.
+def tframeindex_XFORM : SDNodeXForm<frameindex, [{
+  int FI = cast<FrameIndexSDNode>(N)->getIndex();
+  return CurDAG->getTargetFrameIndex(FI, MVT::i64);
+}]>;
+
+defm : uimm12_pats<(i64 frameindex:$Rn),
+                   (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
+
+// These can be much simpler than uimm12 because we don't to change the operand
+// type (e.g. LDURB and LDURH take the same operands).
+multiclass simm9_pats<dag address, dag Base, dag Offset> {
+  defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
+  defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
+
+  defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
+  defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
+
+  defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
+  defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
+  defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
+  defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
+                         f128>;
+
+  def : Pat<(i64 (zextloadi32 address)),
+            (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
+
+  def : Pat<(truncstorei32 i64:$Rt, address),
+            (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
+
+  defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
+  defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
+  def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
+}
+
+defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
+                  (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
+                  (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+
+//===------------------------------
+// 3. Register offset patterns
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+                          dag Offset, dag Extend, dag address,
+                          ValueType transty, ValueType sty> {
+  def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+            (LOAD Base, Offset, Extend)>;
+
+  def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
+            (STORE $Rt, Base, Offset, Extend)>;
+}
+
+// The register offset instructions take three operands giving the instruction,
+// and have an annoying split between instructions where Rm is 32-bit and
+// 64-bit. So we need a special hierarchy to describe them. Other than that the
+// same operations should be supported as for simm9 and uimm12 addressing.
+
+multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
+                         dag Base, dag Offset, dag Extend,
+                         dag address, ValueType sty>
+  : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
+  def : Pat<(!cast<SDNode>(zextload # sty) address),
+            (LOAD Base, Offset, Extend)>;
+
+  def : Pat<(!cast<SDNode>(extload # sty) address),
+            (LOAD Base, Offset, Extend)>;
+
+  // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+  // register was actually set.
+  def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+  def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+            (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+  def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
+            (STORE $Rt, Base, Offset, Extend)>;
+
+  // For truncating store from 64-bits, we have to manually tell LLVM to
+  // ignore the high bits of the x register.
+  def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
+            (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
+
+}
+
+// Next come patterns for sign-extending loads.
+multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
+                          dag address, ValueType sty> {
+  def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
+              Base, Offset, Extend)>;
+
+  def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+            (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
+              Base, Offset, Extend)>;
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
+                           dag Base, dag Offset, dag Extend, dag address,
+                           ValueType sty> {
+  def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
+  def : Pat<(store sty:$Rt, address),
+            (STORE $Rt, Base, Offset, Extend)>;
+}
+
+multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
+                               dag Base, dag Offset, dag Extend, dag address,
+                               ValueType sty>
+  : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
+    ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
+
+multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
+                       dag Extend> {
+  defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
+                       !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
+                       Base, Offset, Extend,
+                       !foreach(decls.pattern, address,
+                                !subst(SHIFT, imm_eq0, decls.pattern)),
+                       i8>;
+  defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
+                       !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
+                       Base, Offset, Extend,
+                       !foreach(decls.pattern, address,
+                                !subst(SHIFT, imm_eq1, decls.pattern)),
+                       i16>;
+  defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+                       !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+                       Base, Offset, Extend,
+                       !foreach(decls.pattern, address,
+                                !subst(SHIFT, imm_eq2, decls.pattern)),
+                       i32>;
+
+  defm : ro_int_neutral_pats<
+                            !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+                            !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+                            Base, Offset, Extend,
+                            !foreach(decls.pattern, address,
+                                     !subst(SHIFT, imm_eq2, decls.pattern)),
+                            i32>;
+
+  defm : ro_int_neutral_pats<
+                            !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
+                            !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
+                            Base, Offset, Extend,
+                            !foreach(decls.pattern, address,
+                                     !subst(SHIFT, imm_eq3, decls.pattern)),
+                            i64>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq1, decls.pattern)),
+                         f16>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq2, decls.pattern)),
+                         f32>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq3, decls.pattern)),
+                         f64>;
+
+  defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
+                         !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
+                         Base, Offset, Extend,
+                         !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq4, decls.pattern)),
+                         f128>;
+
+  defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
+                        !foreach(decls.pattern, address,
+                                 !subst(SHIFT, imm_eq0, decls.pattern)),
+                        i8>;
+
+  defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
+                        !foreach(decls.pattern, address,
+                                 !subst(SHIFT, imm_eq1, decls.pattern)),
+                        i16>;
+
+  def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+                                  !subst(SHIFT, imm_eq2, decls.pattern))),
+            (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
+              Base, Offset, Extend)>;
+}
+
+
+// Finally we're in a position to tell LLVM exactly what addresses are reachable
+// using register-offset instructions. Essentially a base plus a possibly
+// extended, possibly shifted (by access size) offset.
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
+                   (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
+
+defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
+                   (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
+                   (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
new file mode 100644
index 000000000000..c96bf85a716c
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -0,0 +1,140 @@
+//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64AsmPrinter.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MCOperand
+AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
+                                      const MCSymbol *Sym) const {
+  const MCExpr *Expr = 0;
+
+  Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+
+  switch (MO.getTargetFlags()) {
+  case AArch64II::MO_GOT:
+    Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOT_LO12:
+    Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_LO12:
+    Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_DTPREL_G1:
+    Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
+    break;
+  case AArch64II::MO_DTPREL_G0_NC:
+    Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOTTPREL:
+    Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
+    break;
+  case AArch64II::MO_GOTTPREL_LO12:
+    Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_TLSDESC:
+    Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
+    break;
+  case AArch64II::MO_TLSDESC_LO12:
+    Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
+    break;
+  case AArch64II::MO_TPREL_G1:
+    Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
+    break;
+  case AArch64II::MO_TPREL_G0_NC:
+    Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_NO_FLAG:
+    // Expr is already correct
+    break;
+  default:
+    llvm_unreachable("Unexpected MachineOperand flag");
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(),
+                                                          OutContext),
+                                   OutContext);
+
+  return MCOperand::CreateExpr(Expr);
+}
+
+bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
+                                     MCOperand &MCOp) const {
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown operand type");
+  case MachineOperand::MO_Register:
+    if (MO.isImplicit())
+      return false;
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    MCOp = MCOperand::CreateReg(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::CreateImm(MO.getImm());
+    break;
+  case MachineOperand::MO_BlockAddress:
+    MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                   MO.getMBB()->getSymbol(), OutContext));
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_RegisterMask:
+    // Ignore call clobbers
+    return false;
+
+  }
+
+  return true;
+}
+
+void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
+                                            MCInst &OutMI,
+                                            AArch64AsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    if (AP.lowerOperand(MO, MCOp))
+      OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..f45d8f784f42
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file just contains the anchor for the AArch64MachineFunctionInfo to
+// force vtable emission.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void AArch64MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
new file mode 100644
index 000000000000..33da54f97fda
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -0,0 +1,149 @@
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AArch64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64MACHINEFUNCTIONINFO_H
+#define AARCH64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private AArch64
+/// target-specific information for each MachineFunction.
+class AArch64MachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
+  /// Number of bytes of arguments this function has on the stack. If the callee
+  /// is expected to restore the argument stack this should be a multiple of 16,
+  /// all usable during a tail call.
+  ///
+  /// The alternative would forbid tail call optimisation in some cases: if we
+  /// want to transfer control from a function with 8-bytes of stack-argument
+  /// space to a function with 16-bytes then misalignment of this value would
+  /// make a stack adjustment necessary, which could not be undone by the
+  /// callee.
+  unsigned BytesInStackArgArea;
+
+  /// The number of bytes to restore to deallocate space for incoming
+  /// arguments. Canonically 0 in the C calling convention, but non-zero when
+  /// callee is expected to pop the args.
+  unsigned ArgumentStackToRestore;
+
+  /// If the stack needs to be adjusted on frame entry in two stages, this
+  /// records the size of the first adjustment just prior to storing
+  /// callee-saved registers. The callee-saved slots are addressed assuming
+  /// SP == <incoming-SP> - InitialStackAdjust.
+  unsigned InitialStackAdjust;
+
+  /// Number of local-dynamic TLS accesses.
+  unsigned NumLocalDynamics;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of the area where LowerFormalArguments puts the
+  /// general-purpose registers that might contain variadic parameters.
+  int VariadicGPRIdx;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The size of the frame object used to store the general-purpose registers
+  /// which might contain variadic arguments. This is the offset from
+  /// VariadicGPRIdx to what's stored in __gr_top.
+  unsigned VariadicGPRSize;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of the area where LowerFormalArguments puts the
+  /// floating-point registers that might contain variadic parameters.
+  int VariadicFPRIdx;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The size of the frame object used to store the floating-point registers
+  /// which might contain variadic arguments. This is the offset from
+  /// VariadicFPRIdx to what's stored in __vr_top.
+  unsigned VariadicFPRSize;
+
+  /// @see AArch64 Procedure Call Standard, B.3
+  ///
+  /// The Frame index of an object pointing just past the last known stacked
+  /// argument on entry to a variadic function. This goes into the __stack field
+  /// of the va_list type.
+  int VariadicStackIdx;
+
+  /// The offset of the frame pointer from the stack pointer on function
+  /// entry. This is expected to be negative.
+  int FramePointerOffset;
+
+public:
+  AArch64MachineFunctionInfo()
+    : BytesInStackArgArea(0),
+      ArgumentStackToRestore(0),
+      InitialStackAdjust(0),
+      NumLocalDynamics(0),
+      VariadicGPRIdx(0),
+      VariadicGPRSize(0),
+      VariadicFPRIdx(0),
+      VariadicFPRSize(0),
+      VariadicStackIdx(0),
+      FramePointerOffset(0) {}
+
+  explicit AArch64MachineFunctionInfo(MachineFunction &MF)
+    : BytesInStackArgArea(0),
+      ArgumentStackToRestore(0),
+      InitialStackAdjust(0),
+      NumLocalDynamics(0),
+      VariadicGPRIdx(0),
+      VariadicGPRSize(0),
+      VariadicFPRIdx(0),
+      VariadicFPRSize(0),
+      VariadicStackIdx(0),
+      FramePointerOffset(0) {}
+
+  unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
+  void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+
+  unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
+  void setArgumentStackToRestore(unsigned bytes) {
+    ArgumentStackToRestore = bytes;
+  }
+
+  unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
+  void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+
+  unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+  void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+  int getVariadicGPRIdx() const { return VariadicGPRIdx; }
+  void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+
+  unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
+  void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+
+  int getVariadicFPRIdx() const { return VariadicFPRIdx; }
+  void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+
+  unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
+  void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+
+  int getVariadicStackIdx() const { return VariadicStackIdx; }
+  void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+
+  int getFramePointerOffset() const { return FramePointerOffset; }
+  void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
new file mode 100644
index 000000000000..20b0dcf86f46
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -0,0 +1,171 @@
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii,
+                                         const AArch64Subtarget &sti)
+  : AArch64GenRegisterInfo(AArch64::X30), TII(tii) {
+}
+
+const uint16_t *
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  return CSR_PCS_SaveList;
+}
+
+const uint32_t*
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+  return CSR_PCS_RegMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
+  return TLSDesc_RegMask;
+}
+
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &AArch64::FlagClassRegClass)
+    return &AArch64::GPR64RegClass;
+
+  return RC;
+}
+
+
+
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  Reserved.set(AArch64::XSP);
+  Reserved.set(AArch64::WSP);
+
+  Reserved.set(AArch64::XZR);
+  Reserved.set(AArch64::WZR);
+
+  if (TFI->hasFP(MF)) {
+    Reserved.set(AArch64::X29);
+    Reserved.set(AArch64::W29);
+  }
+
+  return Reserved;
+}
+
+void
+AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
+                                         int SPAdj,
+                                         unsigned FIOperandNum,
+                                         RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64FrameLowering *TFI =
+   static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
+
+  // In order to work out the base and offset for addressing, the FrameLowering
+  // code needs to know (sometimes) whether the instruction is storing/loading a
+  // callee-saved register, or whether it's a more generic
+  // operation. Fortunately the frame indices are used *only* for that purpose
+  // and are contiguous, so we can check here.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+  bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+
+  unsigned FrameReg;
+  int64_t Offset;
+  Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
+                                           IsCalleeSaveOp);
+
+  Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+  // DBG_VALUE instructions have no real restrictions so they can be handled
+  // easily.
+  if (MI.isDebugValue()) {
+    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  int MinOffset, MaxOffset, OffsetScale;
+  if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
+    MinOffset = 0;
+    MaxOffset = 0xfff;
+    OffsetScale = 1;
+  } else {
+    // Load/store of a stack object
+    TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
+  }
+
+  // The frame lowering has told us a base and offset it thinks we should use to
+  // access this variable, but it's still up to us to make sure the values are
+  // legal for the instruction in question.
+  if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
+    unsigned BaseReg =
+      MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+    emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
+                  BaseReg, FrameReg, BaseReg, Offset);
+    FrameReg = BaseReg;
+    Offset = 0;
+  }
+
+  // Negative offsets are expected if we address from FP, but for
+  // now this checks nothing has gone horribly wrong.
+  assert(Offset >= 0 && "Unexpected negative offset from SP");
+
+  MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
+}
+
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
+    return AArch64::X29;
+  else
+    return AArch64::XSP;
+}
+
+bool
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const AArch64FrameLowering *AFI
+    = static_cast<const AArch64FrameLowering*>(TFI);
+  return AFI->useFPForAddressing(MF);
+}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
new file mode 100644
index 000000000000..bb64fd55b2c3
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -0,0 +1,76 @@
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
+#define LLVM_TARGET_AARCH64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AArch64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class AArch64InstrInfo;
+class AArch64Subtarget;
+
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
+private:
+  const AArch64InstrInfo &TII;
+
+public:
+  AArch64RegisterInfo(const AArch64InstrInfo &tii,
+                      const AArch64Subtarget &sti);
+
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+  const uint32_t *getTLSDescCallPreservedMask() const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *Rs = NULL) const;
+
+  /// getCrossCopyRegClass - Returns a legal register class to copy a register
+  /// in the specified class to or from. Returns original class if it is
+  /// possible to copy between a two registers of the specified class.
+  const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+  /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+  /// legal to use in the current sub-target and has the same spill size.
+  const TargetRegisterClass*
+  getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+    if (RC == &AArch64::tcGPR64RegClass)
+      return &AArch64::GPR64RegClass;
+
+    return RC;
+  }
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 000000000000..bd79546371c5
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,203 @@
+//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains declarations that describe the AArch64 register file
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AArch64" in {
+def sub_128 : SubRegIndex;
+def sub_64 : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_16 : SubRegIndex;
+def sub_8  : SubRegIndex;
+
+// The VPR registers are handled as sub-registers of FPR equivalents, but
+// they're really the same thing. We give this concept a special index.
+def sub_alias : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+class AArch64Reg<bits<16> enc, string n> : Register<n> {
+  let HWEncoding = enc;
+  let Namespace = "AArch64";
+}
+
+class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
+                         list<SubRegIndex> inds = []>
+      : AArch64Reg<enc, n> {
+  let SubRegs = subregs;
+  let SubRegIndices = inds;
+}
+
+//===----------------------------------------------------------------------===//
+//  Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-30 in {
+  def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+}
+
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">;
+
+// Could be combined with previous loop, but this way leaves w and x registers
+// consecutive as LLVM register numbers, which makes for easier debugging.
+foreach Index = 0-30 in {
+  def X#Index : AArch64RegWithSubs<Index, "x"#Index,
+                                   [!cast<Register>("W"#Index)], [sub_32]>,
+                DwarfRegNum<[Index]>;
+}
+
+def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
+def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+
+// Most instructions treat register 31 as zero for reads and a black-hole for
+// writes.
+
+// Note that the order of registers is important for the Disassembler here:
+// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
+// take an encoding value.
+def GPR32 : RegisterClass<"AArch64", [i32], 32,
+                          (add (sequence "W%u", 0, 30), WZR)> {
+}
+
+def GPR64 : RegisterClass<"AArch64", [i64], 64,
+                          (add (sequence "X%u", 0, 30), XZR)> {
+}
+
+def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
+                               (sequence "W%u", 0, 30)> {
+}
+
+def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
+                               (sequence "X%u", 0, 30)> {
+}
+
+// For tail calls, we can't use callee-saved registers or the structure-return
+// register, as they are supposed to be live across function calls and may be
+// clobbered by the epilogue.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
+                            (add (sequence "X%u", 0, 7),
+                                 (sequence "X%u", 9, 18))> {
+}
+
+
+// Certain addressing-useful instructions accept sp directly. Again the order of
+// registers is important to the Disassembler.
+def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
+                             (add (sequence "W%u", 0, 30), WSP)> {
+}
+
+def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
+                             (add (sequence "X%u", 0, 30), XSP)> {
+}
+
+// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
+// non-SP variants). We can't use a bare register in those patterns because
+// TableGen doesn't like it, so we need a class containing just stack registers
+def Rxsp : RegisterClass<"AArch64", [i64], 64,
+                         (add XSP)> {
+}
+
+def Rwsp : RegisterClass<"AArch64", [i32], 32,
+                         (add WSP)> {
+}
+
+//===----------------------------------------------------------------------===//
+//  Scalar registers in the vector unit:
+//  b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-31 in {
+  def B # Index : AArch64Reg< Index, "b" # Index>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def H # Index : AArch64RegWithSubs<Index, "h" # Index,
+                                     [!cast<Register>("B" # Index)], [sub_8]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def S # Index : AArch64RegWithSubs<Index, "s" # Index,
+                                     [!cast<Register>("H" # Index)], [sub_16]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def D # Index : AArch64RegWithSubs<Index, "d" # Index,
+                                     [!cast<Register>("S" # Index)], [sub_32]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+
+  def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
+                                     [!cast<Register>("D" # Index)], [sub_64]>,
+                  DwarfRegNum<[!add(Index, 64)]>;
+}
+
+
+def FPR8 : RegisterClass<"AArch64", [i8], 8,
+                          (sequence "B%u", 0, 31)> {
+}
+
+def FPR16 : RegisterClass<"AArch64", [f16], 16,
+                          (sequence "H%u", 0, 31)> {
+}
+
+def FPR32 : RegisterClass<"AArch64", [f32], 32,
+                          (sequence "S%u", 0, 31)> {
+}
+
+def FPR64 : RegisterClass<"AArch64", [f64], 64,
+                          (sequence "D%u", 0, 31)> {
+}
+
+def FPR128 : RegisterClass<"AArch64", [f128], 128,
+                          (sequence "Q%u", 0, 31)> {
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Vector registers:
+//===----------------------------------------------------------------------===//
+
+// NEON registers simply specify the overall vector, and it's expected that
+// Instructions will individually specify the acceptable data layout. In
+// principle this leaves two approaches open:
+//   + An operand, giving a single ADDvvv instruction (for example). This turns
+//     out to be unworkable in the assembly parser (without every Instruction
+//     having a "cvt" function, at least) because the constraints can't be
+//     properly enforced. It also complicates specifying patterns since each
+//     instruction will accept many types.
+//  + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
+//    details about NEON registers, but simplifies most other details.
+//
+// The second approach was taken.
+
+foreach Index = 0-31 in {
+  def V # Index  : AArch64RegWithSubs<Index, "v" # Index,
+                                      [!cast<Register>("Q" # Index)],
+                                      [sub_alias]>,
+            DwarfRegNum<[!add(Index, 64)]>;
+}
+
+// These two classes contain the same registers, which should be reasonably
+// sensible for MC and allocation purposes, but allows them to be treated
+// separately for things like stack spilling.
+def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
+                          (sequence "V%u", 0, 31)>;
+
+def VPR128 : RegisterClass<"AArch64",
+                           [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
+                           (sequence "V%u", 0, 31)>;
+
+// Flags register
+def NZCV : Register<"nzcv"> {
+  let Namespace = "AArch64";
+}
+
+def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+  let CopyCost = -1;
+  let isAllocatable = 0;
+}
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
new file mode 100644
index 000000000000..e17cdaa1f6d2
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -0,0 +1,10 @@
+//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..6bbe075a1b61
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM),
+    Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
new file mode 100644
index 000000000000..d412ed2be180
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -0,0 +1,32 @@
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
+#define LLVM_AARCH64SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AArch64TargetMachine;
+
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+  const AArch64Subtarget *Subtarget;
+public:
+  explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+  ~AArch64SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
new file mode 100644
index 000000000000..d17b73820994
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -0,0 +1,43 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
+  : AArch64GenSubtargetInfo(TT, CPU, FS)
+  , HasNEON(true)
+  , HasCrypto(true)
+  , TargetTriple(TT) {
+
+  ParseSubtargetFeatures(CPU, FS);
+}
+
+bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                          Reloc::Model RelocM) const {
+  if (RelocM == Reloc::Static)
+    return false;
+
+  return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
new file mode 100644
index 000000000000..2e9205fc9924
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -0,0 +1,54 @@
+//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
+#define LLVM_TARGET_AARCH64_SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AArch64GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+class GlobalValue;
+
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
+protected:
+  bool HasNEON;
+  bool HasCrypto;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+  bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_AARCH64_SUBTARGET_H
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
new file mode 100644
index 000000000000..df599d599dd6
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -0,0 +1,81 @@
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the AArch64TargetMachine
+// methods. Principally just setting up the passes needed to generate correct
+// code on this architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAArch64Target() {
+  RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target);
+}
+
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    InstrInfo(Subtarget),
+    DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget) {
+}
+
+namespace {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
+public:
+  AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  AArch64TargetMachine &getAArch64TargetMachine() const {
+    return getTM<AArch64TargetMachine>();
+  }
+
+  const AArch64Subtarget &getAArch64Subtarget() const {
+    return *getAArch64TargetMachine().getSubtargetImpl();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new AArch64PassConfig(this, PM);
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+  addPass(&UnpackMachineBundlesID);
+  addPass(createAArch64BranchFixupPass());
+  return true;
+}
+
+bool AArch64PassConfig::addInstSelector() {
+  addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+
+  // For ELF, cleanup any local-dynamic TLS accesses.
+  if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+    addPass(createAArch64CleanupLocalDynamicTLSPass());
+
+  return false;
+}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
new file mode 100644
index 000000000000..c1f47c2e5372
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -0,0 +1,69 @@
+//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETMACHINE_H
+#define LLVM_AARCH64TARGETMACHINE_H
+
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+  AArch64Subtarget          Subtarget;
+  AArch64InstrInfo          InstrInfo;
+  const DataLayout          DL;
+  AArch64TargetLowering     TLInfo;
+  AArch64SelectionDAGInfo   TSInfo;
+  AArch64FrameLowering      FrameLowering;
+
+public:
+  AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+
+  const AArch64InstrInfo *getInstrInfo() const {
+    return &InstrInfo;
+  }
+
+  const AArch64FrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  const AArch64TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  const DataLayout *getDataLayout() const { return &DL; }
+
+  const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
new file mode 100644
index 000000000000..b4452f514590
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64TargetObjectFile.h"
+
+using namespace llvm;
+
+void
+AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
new file mode 100644
index 000000000000..bf0565a79ec8
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -0,0 +1,31 @@
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+  /// AArch64LinuxTargetObjectFile - This implementation is used for linux
+  /// AArch64.
+  class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
new file mode 100644
index 000000000000..69bb80a48537
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -0,0 +1,2197 @@
+//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the (GNU-style) assembly parser for the AArch64
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64Operand;
+
+class AArch64AsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+
+#define GET_ASSEMBLER_HEADER
+#include "AArch64GenAsmMatcher.inc"
+
+public:
+  enum AArch64MatchResultTy {
+    Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "AArch64GenAsmMatcher.inc"
+  };
+
+  AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+    MCAsmParserExtension::Initialize(_Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  // These are the public interface of the MCTargetAsmParser
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool ParseDirective(AsmToken DirectiveID);
+  bool ParseDirectiveTLSDescCall(SMLoc L);
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer&Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
+
+  // The rest of the sub-parsers have more freedom over interface: they return
+  // an OperandMatchResultTy because it's less ambiguous than true/false or
+  // -1/0/1 even if it is more verbose
+  OperandMatchResultTy
+  ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+               StringRef Mnemonic);
+
+  OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal);
+
+  OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind);
+
+  OperandMatchResultTy
+  ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                uint32_t NumLanes);
+
+  OperandMatchResultTy
+  ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                uint32_t &NumLanes);
+
+  OperandMatchResultTy
+  ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  template<typename SomeNamedImmMapper> OperandMatchResultTy
+  ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return ParseNamedImmOperand(SomeNamedImmMapper(), Operands);
+  }
+
+  OperandMatchResultTy
+  ParseNamedImmOperand(const NamedImmMapper &Mapper,
+                       SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  OperandMatchResultTy
+  ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool validateInstruction(MCInst &Inst,
+                          const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  /// Scan the next token (which had better be an identifier) and determine
+  /// whether it represents a general-purpose or vector register. It returns
+  /// true if an identifier was found and populates its reference arguments. It
+  /// does not consume the token.
+  bool
+  IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec,
+                   SMLoc &LayoutLoc) const;
+
+};
+
+}
+
+namespace {
+
+/// Instances of this class represent a parsed AArch64 machine instruction.
+class AArch64Operand : public MCParsedAsmOperand {
+private:
+  enum KindTy {
+    k_ImmWithLSL,     // #uimm {, LSL #amt }
+    k_CondCode,       // eq/ne/...
+    k_FPImmediate,    // Limited-precision floating-point imm
+    k_Immediate,      // Including expressions referencing symbols
+    k_Register,
+    k_ShiftExtend,
+    k_SysReg,         // The register operand of MRS and MSR instructions
+    k_Token,          // The mnemonic; other raw tokens the auto-generated
+    k_WrappedRegister // Load/store exclusive permit a wrapped register.
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  struct ImmWithLSLOp {
+    const MCExpr *Val;
+    unsigned ShiftAmount;
+    bool ImplicitAmount;
+  };
+
+  struct CondCodeOp {
+    A64CC::CondCodes Code;
+  };
+
+  struct FPImmOp {
+    double Val;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+  };
+
+  struct ShiftExtendOp {
+    A64SE::ShiftExtSpecifiers ShiftType;
+    unsigned Amount;
+    bool ImplicitAmount;
+  };
+
+  struct SysRegOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  union {
+    struct ImmWithLSLOp ImmWithLSL;
+    struct CondCodeOp CondCode;
+    struct FPImmOp FPImm;
+    struct ImmOp Imm;
+    struct RegOp Reg;
+    struct ShiftExtendOp ShiftExtend;
+    struct SysRegOp SysReg;
+    struct TokOp Tok;
+  };
+
+  AArch64Operand(KindTy K, SMLoc S, SMLoc E)
+    : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {}
+
+public:
+  AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() {
+  }
+
+  SMLoc getStartLoc() const { return StartLoc; }
+  SMLoc getEndLoc() const { return EndLoc; }
+  void print(raw_ostream&) const;
+  void dump() const;
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const {
+    assert((Kind == k_Register || Kind == k_WrappedRegister)
+           && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == k_Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  A64CC::CondCodes getCondCode() const {
+    assert(Kind == k_CondCode && "Invalid access!");
+    return CondCode.Code;
+  }
+
+  static bool isNonConstantExpr(const MCExpr *E,
+                                AArch64MCExpr::VariantKind &Variant) {
+    if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
+      Variant = A64E->getKind();
+      return true;
+    } else if (!isa<MCConstantExpr>(E)) {
+      Variant = AArch64MCExpr::VK_AARCH64_None;
+      return true;
+    }
+
+    return false;
+  }
+
+  bool isCondCode() const { return Kind == k_CondCode; }
+  bool isToken() const { return Kind == k_Token; }
+  bool isReg() const { return Kind == k_Register; }
+  bool isImm() const { return Kind == k_Immediate; }
+  bool isMem() const { return false; }
+  bool isFPImm() const { return Kind == k_FPImmediate; }
+  bool isShiftOrExtend() const { return Kind == k_ShiftExtend; }
+  bool isSysReg() const { return Kind == k_SysReg; }
+  bool isImmWithLSL() const { return Kind == k_ImmWithLSL; }
+  bool isWrappedReg() const { return Kind == k_WrappedRegister; }
+
+  bool isAddSubImmLSL0() const {
+    if (!isImmWithLSL()) return false;
+    if (ImmWithLSL.ShiftAmount != 0) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_LO12
+          || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12
+          || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC
+          || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12
+          || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC
+          || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12;
+    }
+
+    // Otherwise it should be a real immediate in range:
+    const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+    return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+  }
+
+  bool isAddSubImmLSL12() const {
+    if (!isImmWithLSL()) return false;
+    if (ImmWithLSL.ShiftAmount != 12) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12
+          || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12;
+    }
+
+    // Otherwise it should be a real immediate in range:
+    const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+    return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+  }
+
+  template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const {
+    if (!isShiftOrExtend()) return false;
+
+    A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType;
+    if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW))
+      return false;
+
+    if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX))
+      return false;
+
+    return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0;
+  }
+
+  bool isAdrpLabel() const {
+    if (!isImm()) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(getImm(), Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_None
+        || Variant == AArch64MCExpr::VK_AARCH64_GOT
+        || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL
+        || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC;
+    }
+
+    return isLabel<21, 4096>();
+  }
+
+  template<unsigned RegWidth>  bool isBitfieldWidth() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+  }
+
+  template<int RegWidth>
+  bool isCVTFixedPos() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+  }
+
+  bool isFMOVImm() const {
+    if (!isFPImm()) return false;
+
+    APFloat RealVal(FPImm.Val);
+    uint32_t ImmVal;
+    return A64Imms::isFPImm(RealVal, ImmVal);
+  }
+
+  bool isFPZero() const {
+    if (!isFPImm()) return false;
+
+    APFloat RealVal(FPImm.Val);
+    return RealVal.isPosZero();
+  }
+
+  template<unsigned field_width, unsigned scale>
+  bool isLabel() const {
+    if (!isImm()) return false;
+
+    if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) {
+      return true;
+    } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+      int64_t Val = CE->getValue();
+      int64_t Min = - (scale * (1LL << (field_width - 1)));
+      int64_t Max = scale * ((1LL << (field_width - 1)) - 1);
+      return (Val % scale) == 0 && Val >= Min && Val <= Max;
+    }
+
+    // N.b. this disallows explicit relocation specifications via an
+    // AArch64MCExpr. Users needing that behaviour
+    return false;
+  }
+
+  bool isLane1() const {
+    if (!isImm()) return false;
+
+    // Because it's come through custom assembly parsing, it must always be a
+    // constant expression.
+    return cast<MCConstantExpr>(getImm())->getValue() == 1;
+  }
+
+  bool isLoadLitLabel() const {
+    if (!isImm()) return false;
+
+    AArch64MCExpr::VariantKind Variant;
+    if (isNonConstantExpr(getImm(), Variant)) {
+      return Variant == AArch64MCExpr::VK_AARCH64_None
+          || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL;
+    }
+
+    return isLabel<19, 4>();
+  }
+
+  template<unsigned RegWidth> bool isLogicalImm() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+    if (!CE) return false;
+
+    uint32_t Bits;
+    return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+  }
+
+  template<unsigned RegWidth> bool isLogicalImmMOV() const {
+    if (!isLogicalImm<RegWidth>()) return false;
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+    // The move alias for ORR is only valid if the immediate cannot be
+    // represented with a move (immediate) instruction; they take priority.
+    int UImm16, Shift;
+    return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift)
+      && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift);
+  }
+
+  template<int MemSize>
+  bool isOffsetUImm12() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+    // Assume they know what they're doing for now if they've given us a
+    // non-constant expression. In principle we could check for ridiculous
+    // things that can't possibly work or relocations that would almost
+    // certainly break resulting code.
+    if (!CE)
+      return true;
+
+    int64_t Val = CE->getValue();
+
+    // Must be a multiple of the access size in bytes.
+    if ((Val & (MemSize - 1)) != 0) return false;
+
+    // Must be 12-bit unsigned
+    return Val >= 0 && Val <= 0xfff * MemSize;
+  }
+
+  template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit>
+  bool isShift() const {
+    if (!isShiftOrExtend()) return false;
+
+    if (ShiftExtend.ShiftType != SHKind)
+      return false;
+
+    return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31;
+  }
+
+  bool isMOVN32Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVN64Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_SABS_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G2,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+  }
+
+
+  bool isMOVZ32Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0,
+      AArch64MCExpr::VK_AARCH64_ABS_G1,
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVZ64Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0,
+      AArch64MCExpr::VK_AARCH64_ABS_G1,
+      AArch64MCExpr::VK_AARCH64_ABS_G2,
+      AArch64MCExpr::VK_AARCH64_ABS_G3,
+      AArch64MCExpr::VK_AARCH64_SABS_G0,
+      AArch64MCExpr::VK_AARCH64_SABS_G1,
+      AArch64MCExpr::VK_AARCH64_SABS_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G2,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVK32Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMOVK64Imm() const {
+    static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+      AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G2_NC,
+      AArch64MCExpr::VK_AARCH64_ABS_G3,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+      AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+    };
+    unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+    return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+  }
+
+  bool isMoveWideImm(unsigned RegWidth,
+                     AArch64MCExpr::VariantKind *PermittedModifiers,
+                     unsigned NumModifiers) const {
+    if (!isImmWithLSL()) return false;
+
+    if (ImmWithLSL.ShiftAmount % 16 != 0) return false;
+    if (ImmWithLSL.ShiftAmount >= RegWidth) return false;
+
+    AArch64MCExpr::VariantKind Modifier;
+    if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) {
+      // E.g. "#:abs_g0:sym, lsl #16" makes no sense.
+      if (!ImmWithLSL.ImplicitAmount) return false;
+
+      for (unsigned i = 0; i < NumModifiers; ++i)
+        if (PermittedModifiers[i] == Modifier) return true;
+
+      return false;
+    }
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val);
+    return CE && CE->getValue() >= 0  && CE->getValue() <= 0xffff;
+  }
+
+  template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)>
+  bool isMoveWideMovAlias() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    int UImm16, Shift;
+    uint64_t Value = CE->getValue();
+
+    // If this is a 32-bit instruction then all bits above 32 should be the
+    // same: either of these is fine because signed/unsigned values should be
+    // permitted.
+    if (RegWidth == 32) {
+      if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff)
+        return false;
+
+      Value &= 0xffffffffULL;
+    }
+
+    return isValidImm(RegWidth, Value, UImm16, Shift);
+  }
+
+  bool isMSRWithReg() const {
+    if (!isSysReg()) return false;
+
+    bool IsKnownRegister;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    A64SysReg::MSRMapper().fromString(Name, IsKnownRegister);
+
+    return IsKnownRegister;
+  }
+
+  bool isMSRPState() const {
+    if (!isSysReg()) return false;
+
+    bool IsKnownRegister;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    A64PState::PStateMapper().fromString(Name, IsKnownRegister);
+
+    return IsKnownRegister;
+  }
+
+  bool isMRS() const {
+    if (!isSysReg()) return false;
+
+    // First check against specific MSR-only (write-only) registers
+    bool IsKnownRegister;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    A64SysReg::MRSMapper().fromString(Name, IsKnownRegister);
+
+    return IsKnownRegister;
+  }
+
+  bool isPRFM() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+    if (!CE)
+      return false;
+
+    return CE->getValue() >= 0 && CE->getValue() <= 31;
+  }
+
+  template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const {
+    if (!isShiftOrExtend()) return false;
+
+    if (ShiftExtend.ShiftType != SHKind)
+      return false;
+
+    return ShiftExtend.Amount <= 4;
+  }
+
+  bool isRegExtendLSL() const {
+    if (!isShiftOrExtend()) return false;
+
+    if (ShiftExtend.ShiftType != A64SE::LSL)
+      return false;
+
+    return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
+  }
+
+  template<int MemSize>  bool isSImm7Scaled() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    int64_t Val = CE->getValue();
+    if (Val % MemSize != 0) return false;
+
+    Val /= MemSize;
+
+    return Val >= -64 && Val < 64;
+  }
+
+  template<int BitWidth>
+  bool isSImm() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= -(1LL << (BitWidth - 1))
+      && CE->getValue() < (1LL << (BitWidth - 1));
+  }
+
+  template<int bitWidth>
+  bool isUImm() const {
+    if (!isImm()) return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+
+    return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth);
+  }
+
+  bool isUImm() const {
+    if (!isImm()) return false;
+
+    return isa<MCConstantExpr>(getImm());
+  }
+
+  static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
+                                          unsigned ShiftAmount,
+                                          bool ImplicitAmount,
+                                          SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
+    Op->ImmWithLSL.Val = Val;
+    Op->ImmWithLSL.ShiftAmount = ShiftAmount;
+    Op->ImmWithLSL.ImplicitAmount = ImplicitAmount;
+    return Op;
+  }
+
+  static AArch64Operand *CreateCondCode(A64CC::CondCodes Code,
+                                        SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E);
+    Op->CondCode.Code = Code;
+    return Op;
+  }
+
+  static AArch64Operand *CreateFPImm(double Val,
+                                     SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E);
+    Op->FPImm.Val = Val;
+    return Op;
+  }
+
+  static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E);
+    Op->Imm.Val = Val;
+    return Op;
+  }
+
+  static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_Register, S, E);
+    Op->Reg.RegNum = RegNum;
+    return Op;
+  }
+
+  static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E);
+    Op->Reg.RegNum = RegNum;
+    return Op;
+  }
+
+  static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp,
+                                           unsigned Amount,
+                                           bool ImplicitAmount,
+                                           SMLoc S, SMLoc E) {
+    AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E);
+    Op->ShiftExtend.ShiftType = ShiftTyp;
+    Op->ShiftExtend.Amount = Amount;
+    Op->ShiftExtend.ImplicitAmount = ImplicitAmount;
+    return Op;
+  }
+
+  static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) {
+    AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    return Op;
+  }
+
+  static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
+    AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    return Op;
+  }
+
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  template<unsigned RegWidth>
+  void addBFILSBOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth;
+    Inst.addOperand(MCOperand::CreateImm(EncodedVal));
+  }
+
+  void addBFIWidthOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+  }
+
+  void addBFXWidthOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm();
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+
+    Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1));
+  }
+
+  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+  }
+
+  void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue()));
+  }
+
+  void addFMOVImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    APFloat RealVal(FPImm.Val);
+    uint32_t ImmVal;
+    A64Imms::isFPImm(RealVal, ImmVal);
+
+    Inst.addOperand(MCOperand::CreateImm(ImmVal));
+  }
+
+  void addFPZeroOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    Inst.addOperand(MCOperand::CreateImm(0));
+  }
+
+  void addInvCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    unsigned Encoded = A64InvertCondCode(getCondCode());
+    Inst.addOperand(MCOperand::CreateImm(Encoded));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  template<int MemSize>
+  void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    uint64_t Val = CE->getValue() / MemSize;
+    Inst.addOperand(MCOperand::CreateImm(Val  & 0x7f));
+  }
+
+  template<int BitWidth>
+  void addSImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    uint64_t Val = CE->getValue();
+    Inst.addOperand(MCOperand::CreateImm(Val  & ((1ULL << BitWidth) - 1)));
+  }
+
+  void addImmWithLSLOperands(MCInst &Inst, unsigned N) const {
+    assert (N == 1 && "Invalid number of operands!");
+
+    addExpr(Inst, ImmWithLSL.Val);
+  }
+
+  template<unsigned field_width, unsigned scale>
+  void addLabelOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+
+    if (!CE) {
+      addExpr(Inst, Imm.Val);
+      return;
+    }
+
+    int64_t Val = CE->getValue();
+    assert(Val % scale == 0 && "Unaligned immediate in instruction");
+    Val /= scale;
+
+    Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1)));
+  }
+
+  template<int MemSize>
+  void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize));
+    } else {
+      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+    }
+  }
+
+  template<unsigned RegWidth>
+  void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+    uint32_t Bits;
+    A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMRSOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    bool Valid;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMSRWithRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    bool Valid;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMSRPStateOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    bool Valid;
+    StringRef Name(SysReg.Data, SysReg.Length);
+    uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid);
+
+    Inst.addOperand(MCOperand::CreateImm(Bits));
+  }
+
+  void addMoveWideImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    addExpr(Inst, ImmWithLSL.Val);
+
+    AArch64MCExpr::VariantKind Variant;
+    if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+      Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16));
+      return;
+    }
+
+    // We know it's relocated
+    switch (Variant) {
+    case AArch64MCExpr::VK_AARCH64_ABS_G0:
+    case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+    case AArch64MCExpr::VK_AARCH64_SABS_G0:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+    case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+      Inst.addOperand(MCOperand::CreateImm(0));
+      break;
+    case AArch64MCExpr::VK_AARCH64_ABS_G1:
+    case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+    case AArch64MCExpr::VK_AARCH64_SABS_G1:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+    case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+      Inst.addOperand(MCOperand::CreateImm(1));
+      break;
+    case AArch64MCExpr::VK_AARCH64_ABS_G2:
+    case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+    case AArch64MCExpr::VK_AARCH64_SABS_G2:
+    case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+    case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+      Inst.addOperand(MCOperand::CreateImm(2));
+      break;
+    case AArch64MCExpr::VK_AARCH64_ABS_G3:
+      Inst.addOperand(MCOperand::CreateImm(3));
+      break;
+    default: llvm_unreachable("Inappropriate move wide relocation");
+    }
+  }
+
+  template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)>
+  void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    int UImm16, Shift;
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    uint64_t Value = CE->getValue();
+
+    if (RegWidth == 32) {
+      Value &= 0xffffffffULL;
+    }
+
+    bool Valid = isValidImm(RegWidth, Value, UImm16, Shift);
+    (void)Valid;
+    assert(Valid && "Invalid immediates should have been weeded out by now");
+
+    Inst.addOperand(MCOperand::CreateImm(UImm16));
+    Inst.addOperand(MCOperand::CreateImm(Shift));
+  }
+
+  void addPRFMOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+    assert(CE->getValue() >= 0 && CE->getValue() <= 31
+           && "PRFM operand should be 5-bits");
+
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+  }
+
+  // For Add-sub (extended register) operands.
+  void addRegExtendOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+  }
+
+  // For the extend in load-store (register offset) instructions.
+  template<unsigned MemSize>
+  void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
+    addAddrRegExtendOperands(Inst, N, MemSize);
+  }
+
+  void addAddrRegExtendOperands(MCInst &Inst, unsigned N,
+                                unsigned MemSize) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    // First bit of Option is set in instruction classes, the high two bits are
+    // as follows:
+    unsigned OptionHi = 0;
+    switch (ShiftExtend.ShiftType) {
+    case A64SE::UXTW:
+    case A64SE::LSL:
+      OptionHi = 1;
+      break;
+    case A64SE::SXTW:
+    case A64SE::SXTX:
+      OptionHi = 3;
+      break;
+    default:
+      llvm_unreachable("Invalid extend type for register offset");
+    }
+
+    unsigned S = 0;
+    if (MemSize == 1 && !ShiftExtend.ImplicitAmount)
+      S = 1;
+    else if (MemSize != 1 && ShiftExtend.Amount != 0)
+      S = 1;
+
+    Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S));
+  }
+  void addShiftOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+  }
+};
+
+} // end anonymous namespace.
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               StringRef Mnemonic) {
+
+  // See if the operand has a custom parser
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+  // It could either succeed, fail or just not care.
+  if (ResTy != MatchOperand_NoMatch)
+    return ResTy;
+
+  switch (getLexer().getKind()) {
+  default:
+    Error(Parser.getTok().getLoc(), "unexpected token in operand");
+    return MatchOperand_ParseFail;
+  case AsmToken::Identifier: {
+    // It might be in the LSL/UXTB family ...
+    OperandMatchResultTy GotShift = ParseShiftExtend(Operands);
+
+    // We can only continue if no tokens were eaten.
+    if (GotShift != MatchOperand_NoMatch)
+      return GotShift;
+
+    // ... or it might be a register ...
+    uint32_t NumLanes = 0;
+    OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes);
+    assert(GotReg != MatchOperand_ParseFail
+           && "register parsing shouldn't partially succeed");
+
+    if (GotReg == MatchOperand_Success) {
+      if (Parser.getTok().is(AsmToken::LBrac))
+        return ParseNEONLane(Operands, NumLanes);
+      else
+        return MatchOperand_Success;
+    }
+
+    // ... or it might be a symbolish thing
+  }
+    // Fall through
+  case AsmToken::LParen:  // E.g. (strcmp-4)
+  case AsmToken::Integer: // 1f, 2b labels
+  case AsmToken::String:  // quoted labels
+  case AsmToken::Dot:     // . is Current location
+  case AsmToken::Dollar:  // $ is PC
+  case AsmToken::Colon: {
+    SMLoc StartLoc  = Parser.getTok().getLoc();
+    SMLoc EndLoc;
+    const MCExpr *ImmVal = 0;
+
+    if (ParseImmediate(ImmVal) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+    return MatchOperand_Success;
+  }
+  case AsmToken::Hash: {   // Immediates
+    SMLoc StartLoc = Parser.getTok().getLoc();
+    SMLoc EndLoc;
+    const MCExpr *ImmVal = 0;
+    Parser.Lex();
+
+    if (ParseImmediate(ImmVal) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+    return MatchOperand_Success;
+  }
+  case AsmToken::LBrac: {
+    SMLoc Loc = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+    Parser.Lex(); // Eat '['
+
+    // There's no comma after a '[', so we can parse the next operand
+    // immediately.
+    return ParseOperand(Operands, Mnemonic);
+  }
+  // The following will likely be useful later, but not in very early cases
+  case AsmToken::LCurly:  // Weird SIMD lists
+    llvm_unreachable("Don't know how to deal with '{' in operand");
+    return MatchOperand_ParseFail;
+  }
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) {
+  if (getLexer().is(AsmToken::Colon)) {
+    AArch64MCExpr::VariantKind RefKind;
+
+    OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind);
+    if (ResTy != MatchOperand_Success)
+      return ResTy;
+
+    const MCExpr *SubExprVal;
+    if (getParser().parseExpression(SubExprVal))
+      return MatchOperand_ParseFail;
+
+    ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext());
+    return MatchOperand_Success;
+  }
+
+  // No weird AArch64MCExpr prefix
+  return getParser().parseExpression(ExprVal)
+    ? MatchOperand_ParseFail : MatchOperand_Success;
+}
+
+// A lane attached to a NEON register. "[N]", which should yield three tokens:
+// '[', N, ']'. A hash is not allowed to precede the immediate here.
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                uint32_t NumLanes) {
+  SMLoc Loc = Parser.getTok().getLoc();
+
+  assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand");
+  Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+  Parser.Lex(); // Eat '['
+
+  if (Parser.getTok().isNot(AsmToken::Integer)) {
+    Error(Parser.getTok().getLoc(), "expected lane number");
+    return MatchOperand_ParseFail;
+  }
+
+  if (Parser.getTok().getIntVal() >= NumLanes) {
+    Error(Parser.getTok().getLoc(), "lane number incompatible with layout");
+    return MatchOperand_ParseFail;
+  }
+
+  const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(),
+                                              getContext());
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat actual lane
+  SMLoc E = Parser.getTok().getLoc();
+  Operands.push_back(AArch64Operand::CreateImm(Lane, S, E));
+
+
+  if (Parser.getTok().isNot(AsmToken::RBrac)) {
+    Error(Parser.getTok().getLoc(), "expected ']' after lane");
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+  Parser.Lex(); // Eat ']'
+
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) {
+  assert(getLexer().is(AsmToken::Colon) && "expected a ':'");
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    Error(Parser.getTok().getLoc(),
+          "expected relocation specifier in operand after ':'");
+    return MatchOperand_ParseFail;
+  }
+
+  std::string LowerCase = Parser.getTok().getIdentifier().lower();
+  RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
+    .Case("got",              AArch64MCExpr::VK_AARCH64_GOT)
+    .Case("got_lo12",         AArch64MCExpr::VK_AARCH64_GOT_LO12)
+    .Case("lo12",             AArch64MCExpr::VK_AARCH64_LO12)
+    .Case("abs_g0",           AArch64MCExpr::VK_AARCH64_ABS_G0)
+    .Case("abs_g0_nc",        AArch64MCExpr::VK_AARCH64_ABS_G0_NC)
+    .Case("abs_g1",           AArch64MCExpr::VK_AARCH64_ABS_G1)
+    .Case("abs_g1_nc",        AArch64MCExpr::VK_AARCH64_ABS_G1_NC)
+    .Case("abs_g2",           AArch64MCExpr::VK_AARCH64_ABS_G2)
+    .Case("abs_g2_nc",        AArch64MCExpr::VK_AARCH64_ABS_G2_NC)
+    .Case("abs_g3",           AArch64MCExpr::VK_AARCH64_ABS_G3)
+    .Case("abs_g0_s",         AArch64MCExpr::VK_AARCH64_SABS_G0)
+    .Case("abs_g1_s",         AArch64MCExpr::VK_AARCH64_SABS_G1)
+    .Case("abs_g2_s",         AArch64MCExpr::VK_AARCH64_SABS_G2)
+    .Case("dtprel_g2",        AArch64MCExpr::VK_AARCH64_DTPREL_G2)
+    .Case("dtprel_g1",        AArch64MCExpr::VK_AARCH64_DTPREL_G1)
+    .Case("dtprel_g1_nc",     AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC)
+    .Case("dtprel_g0",        AArch64MCExpr::VK_AARCH64_DTPREL_G0)
+    .Case("dtprel_g0_nc",     AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC)
+    .Case("dtprel_hi12",      AArch64MCExpr::VK_AARCH64_DTPREL_HI12)
+    .Case("dtprel_lo12",      AArch64MCExpr::VK_AARCH64_DTPREL_LO12)
+    .Case("dtprel_lo12_nc",   AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC)
+    .Case("gottprel_g1",      AArch64MCExpr::VK_AARCH64_GOTTPREL_G1)
+    .Case("gottprel_g0_nc",   AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC)
+    .Case("gottprel",         AArch64MCExpr::VK_AARCH64_GOTTPREL)
+    .Case("gottprel_lo12",    AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12)
+    .Case("tprel_g2",         AArch64MCExpr::VK_AARCH64_TPREL_G2)
+    .Case("tprel_g1",         AArch64MCExpr::VK_AARCH64_TPREL_G1)
+    .Case("tprel_g1_nc",      AArch64MCExpr::VK_AARCH64_TPREL_G1_NC)
+    .Case("tprel_g0",         AArch64MCExpr::VK_AARCH64_TPREL_G0)
+    .Case("tprel_g0_nc",      AArch64MCExpr::VK_AARCH64_TPREL_G0_NC)
+    .Case("tprel_hi12",       AArch64MCExpr::VK_AARCH64_TPREL_HI12)
+    .Case("tprel_lo12",       AArch64MCExpr::VK_AARCH64_TPREL_LO12)
+    .Case("tprel_lo12_nc",    AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC)
+    .Case("tlsdesc",          AArch64MCExpr::VK_AARCH64_TLSDESC)
+    .Case("tlsdesc_lo12",     AArch64MCExpr::VK_AARCH64_TLSDESC_LO12)
+    .Default(AArch64MCExpr::VK_AARCH64_None);
+
+  if (RefKind == AArch64MCExpr::VK_AARCH64_None) {
+    Error(Parser.getTok().getLoc(),
+          "expected relocation specifier in operand after ':'");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat identifier
+
+  if (getLexer().isNot(AsmToken::Colon)) {
+    Error(Parser.getTok().getLoc(),
+          "expected ':' after relocation specifier");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex();
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmWithLSLOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // FIXME?: I want to live in a world where immediates must start with
+  // #. Please don't dash my hopes (well, do if you have a good reason).
+  if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat '#'
+
+  const MCExpr *Imm;
+  if (ParseImmediate(Imm) != MatchOperand_Success)
+    return MatchOperand_ParseFail;
+  else if (Parser.getTok().isNot(AsmToken::Comma)) {
+    SMLoc E = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E));
+    return MatchOperand_Success;
+  }
+
+  // Eat ','
+  Parser.Lex();
+
+  // The optional operand must be "lsl #N" where N is non-negative.
+  if (Parser.getTok().is(AsmToken::Identifier)
+      && Parser.getTok().getIdentifier().lower() == "lsl") {
+    Parser.Lex();
+
+    if (Parser.getTok().is(AsmToken::Hash)) {
+      Parser.Lex();
+
+      if (Parser.getTok().isNot(AsmToken::Integer)) {
+        Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+        return MatchOperand_ParseFail;
+      }
+    }
+  }
+
+  int64_t ShiftAmount = Parser.getTok().getIntVal();
+
+  if (ShiftAmount < 0) {
+    Error(Parser.getTok().getLoc(), "positive shift amount required");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat the number
+
+  SMLoc E = Parser.getTok().getLoc();
+  Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount,
+                                                      false, S, E));
+  return MatchOperand_Success;
+}
+
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCondCodeOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return MatchOperand_NoMatch;
+
+  StringRef Tok = Parser.getTok().getIdentifier();
+  A64CC::CondCodes CondCode = A64StringToCondCode(Tok);
+
+  if (CondCode == A64CC::Invalid)
+    return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat condition code
+  SMLoc E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E));
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCRxOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  if (Parser.getTok().isNot(AsmToken::Identifier)) {
+    Error(S, "Expected cN operand where 0 <= N <= 15");
+    return MatchOperand_ParseFail;
+  }
+
+  std::string LowerTok = Parser.getTok().getIdentifier().lower();
+  StringRef Tok(LowerTok);
+  if (Tok[0] != 'c') {
+    Error(S, "Expected cN operand where 0 <= N <= 15");
+    return MatchOperand_ParseFail;
+  }
+
+  uint32_t CRNum;
+  bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
+  if (BadNum || CRNum > 15) {
+    Error(S, "Expected cN operand where 0 <= N <= 15");
+    return MatchOperand_ParseFail;
+  }
+
+  const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext());
+
+  Parser.Lex();
+  SMLoc E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E));
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseFPImmOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+  // FIXME?: I want to live in a world where immediates must start with
+  // #. Please don't dash my hopes (well, do if you have a good reason).
+  if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat '#'
+
+  bool Negative = false;
+  if (Parser.getTok().is(AsmToken::Minus)) {
+    Negative = true;
+    Parser.Lex(); // Eat '-'
+  } else if (Parser.getTok().is(AsmToken::Plus)) {
+    Parser.Lex(); // Eat '+'
+  }
+
+  if (Parser.getTok().isNot(AsmToken::Real)) {
+    Error(S, "Expected floating-point immediate");
+    return MatchOperand_ParseFail;
+  }
+
+  APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString());
+  if (Negative) RealVal.changeSign();
+  double DblVal = RealVal.convertToDouble();
+
+  Parser.Lex(); // Eat real number
+  SMLoc E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E));
+  return MatchOperand_Success;
+}
+
+
+// Automatically generated
+static unsigned MatchRegisterName(StringRef Name);
+
+bool
+AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
+                                   StringRef &Layout,
+                                   SMLoc &LayoutLoc) const {
+  const AsmToken &Tok = Parser.getTok();
+
+  if (Tok.isNot(AsmToken::Identifier))
+    return false;
+
+  std::string LowerReg = Tok.getString().lower();
+  size_t DotPos = LowerReg.find('.');
+
+  RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+  if (RegNum == AArch64::NoRegister) {
+    RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+      .Case("ip0", AArch64::X16)
+      .Case("ip1", AArch64::X17)
+      .Case("fp", AArch64::X29)
+      .Case("lr", AArch64::X30)
+      .Default(AArch64::NoRegister);
+  }
+  if (RegNum == AArch64::NoRegister)
+    return false;
+
+  SMLoc S = Tok.getLoc();
+  RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
+
+  if (DotPos == StringRef::npos) {
+    Layout = StringRef();
+  } else {
+    // Everything afterwards needs to be a literal token, expected to be
+    // '.2d','.b' etc for vector registers.
+
+    // This StringSwitch validates the input and (perhaps more importantly)
+    // gives us a permanent string to use in the token (a pointer into LowerReg
+    // would go out of scope when we return).
+    LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
+    std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
+    Layout = StringSwitch<const char *>(LayoutText)
+      .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
+      .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
+      .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
+      .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+      .Default("");
+
+    if (Layout.size() == 0) {
+      // Malformed register
+      return false;
+    }
+  }
+
+  return true;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                uint32_t &NumLanes) {
+  unsigned RegNum;
+  StringRef Layout;
+  SMLoc RegEndLoc, LayoutLoc;
+  SMLoc S = Parser.getTok().getLoc();
+
+  if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+    return MatchOperand_NoMatch;
+
+  Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc));
+
+  if (Layout.size() != 0) {
+    unsigned long long TmpLanes = 0;
+    llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes);
+    if (TmpLanes != 0) {
+      NumLanes = TmpLanes;
+    } else {
+      // If the number of lanes isn't specified explicitly, a valid instruction
+      // will have an element specifier and be capable of acting on the entire
+      // vector register.
+      switch (Layout.back()) {
+      default: llvm_unreachable("Invalid layout specifier");
+      case 'b': NumLanes = 16; break;
+      case 'h': NumLanes = 8; break;
+      case 's': NumLanes = 4; break;
+      case 'd': NumLanes = 2; break;
+      }
+    }
+
+    Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc));
+  }
+
+  Parser.Lex();
+  return MatchOperand_Success;
+}
+
+bool
+AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                SMLoc &EndLoc) {
+  // This callback is used for things like DWARF frame directives in
+  // assembly. They don't care about things like NEON layouts or lanes, they
+  // just want to be able to produce the DWARF register number.
+  StringRef LayoutSpec;
+  SMLoc RegEndLoc, LayoutLoc;
+  StartLoc = Parser.getTok().getLoc();
+
+  if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc))
+    return true;
+
+  Parser.Lex();
+  EndLoc = Parser.getTok().getLoc();
+
+  return false;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Since these operands occur in very limited circumstances, without
+  // alternatives, we actually signal an error if there is no match. If relaxing
+  // this, beware of unintended consequences: an immediate will be accepted
+  // during matching, no matter how it gets into the AArch64Operand.
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc S = Tok.getLoc();
+
+  if (Tok.is(AsmToken::Identifier)) {
+    bool ValidName;
+    uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName);
+
+    if (!ValidName) {
+      Error(S, "operand specifier not recognised");
+      return MatchOperand_ParseFail;
+    }
+
+    Parser.Lex(); // We're done with the identifier. Eat it
+
+    SMLoc E = Parser.getTok().getLoc();
+    const MCExpr *Imm = MCConstantExpr::Create(Code, getContext());
+    Operands.push_back(AArch64Operand::CreateImm(Imm, S, E));
+    return MatchOperand_Success;
+  } else if (Tok.is(AsmToken::Hash)) {
+    Parser.Lex();
+
+    const MCExpr *ImmVal;
+    if (ParseImmediate(ImmVal) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+    if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) {
+      Error(S, "Invalid immediate for instruction");
+      return MatchOperand_ParseFail;
+    }
+
+    SMLoc E = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E));
+    return MatchOperand_Success;
+  }
+
+  Error(S, "unexpected operand for instruction");
+  return MatchOperand_ParseFail;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseSysRegOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  const AsmToken &Tok = Parser.getTok();
+
+  // Any MSR/MRS operand will be an identifier, and we want to store it as some
+  // kind of string: SPSel is valid for two different forms of MSR with two
+  // different encodings. There's no collision at the moment, but the potential
+  // is there.
+  if (!Tok.is(AsmToken::Identifier)) {
+    return MatchOperand_NoMatch;
+  }
+
+  SMLoc S = Tok.getLoc();
+  Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S));
+  Parser.Lex(); // Eat identifier
+
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseLSXAddressOperand(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  unsigned RegNum;
+  SMLoc RegEndLoc, LayoutLoc;
+  StringRef Layout;
+  if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)
+     || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum)
+     || Layout.size() != 0) {
+    // Check Layout.size because we don't want to let "x3.4s" or similar
+    // through.
+    return MatchOperand_NoMatch;
+  }
+  Parser.Lex(); // Eat register
+
+  if (Parser.getTok().is(AsmToken::RBrac)) {
+    // We're done
+    SMLoc E = Parser.getTok().getLoc();
+    Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+    return MatchOperand_Success;
+  }
+
+  // Otherwise, only ", #0" is valid
+
+  if (Parser.getTok().isNot(AsmToken::Comma)) {
+    Error(Parser.getTok().getLoc(), "expected ',' or ']' after register");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat ','
+
+  if (Parser.getTok().isNot(AsmToken::Hash)) {
+    Error(Parser.getTok().getLoc(), "expected '#0'");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat '#'
+
+  if (Parser.getTok().isNot(AsmToken::Integer)
+      || Parser.getTok().getIntVal() != 0 ) {
+    Error(Parser.getTok().getLoc(), "expected '#0'");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat '0'
+
+  SMLoc E = Parser.getTok().getLoc();
+  Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+  return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseShiftExtend(
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  StringRef IDVal = Parser.getTok().getIdentifier();
+  std::string LowerID = IDVal.lower();
+
+  A64SE::ShiftExtSpecifiers Spec =
+    StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
+      .Case("lsl", A64SE::LSL)
+      .Case("lsr", A64SE::LSR)
+      .Case("asr", A64SE::ASR)
+      .Case("ror", A64SE::ROR)
+      .Case("uxtb", A64SE::UXTB)
+      .Case("uxth", A64SE::UXTH)
+      .Case("uxtw", A64SE::UXTW)
+      .Case("uxtx", A64SE::UXTX)
+      .Case("sxtb", A64SE::SXTB)
+      .Case("sxth", A64SE::SXTH)
+      .Case("sxtw", A64SE::SXTW)
+      .Case("sxtx", A64SE::SXTX)
+      .Default(A64SE::Invalid);
+
+  if (Spec == A64SE::Invalid)
+    return MatchOperand_NoMatch;
+
+  // Eat the shift
+  SMLoc S, E;
+  S = Parser.getTok().getLoc();
+  Parser.Lex();
+
+  if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
+      Spec != A64SE::ASR && Spec != A64SE::ROR) {
+    // The shift amount can be omitted for the extending versions, but not real
+    // shifts:
+    //     add x0, x0, x0, uxtb
+    // is valid, and equivalent to
+    //     add x0, x0, x0, uxtb #0
+
+    if (Parser.getTok().is(AsmToken::Comma) ||
+        Parser.getTok().is(AsmToken::EndOfStatement) ||
+        Parser.getTok().is(AsmToken::RBrac)) {
+      Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true,
+                                                           S, E));
+      return MatchOperand_Success;
+    }
+  }
+
+  // Eat # at beginning of immediate
+  if (!Parser.getTok().is(AsmToken::Hash)) {
+    Error(Parser.getTok().getLoc(),
+          "expected #imm after shift specifier");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex();
+
+  // Make sure we do actually have a number
+  if (!Parser.getTok().is(AsmToken::Integer)) {
+    Error(Parser.getTok().getLoc(),
+          "expected integer shift amount");
+    return MatchOperand_ParseFail;
+  }
+  unsigned Amount = Parser.getTok().getIntVal();
+  Parser.Lex();
+  E = Parser.getTok().getLoc();
+
+  Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false,
+                                                       S, E));
+
+  return MatchOperand_Success;
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool AArch64AsmParser::
+validateInstruction(MCInst &Inst,
+                    const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  switch (Inst.getOpcode()) {
+  case AArch64::BFIwwii:
+  case AArch64::BFIxxii:
+  case AArch64::SBFIZwwii:
+  case AArch64::SBFIZxxii:
+  case AArch64::UBFIZwwii:
+  case AArch64::UBFIZxxii:  {
+    unsigned ImmOps = Inst.getNumOperands() - 2;
+    int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+    int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+
+    if (ImmR != 0 && ImmS >= ImmR) {
+      return Error(Operands[4]->getStartLoc(),
+                   "requested insert overflows register");
+    }
+    return false;
+  }
+  case AArch64::BFXILwwii:
+  case AArch64::BFXILxxii:
+  case AArch64::SBFXwwii:
+  case AArch64::SBFXxxii:
+  case AArch64::UBFXwwii:
+  case AArch64::UBFXxxii: {
+    unsigned ImmOps = Inst.getNumOperands() - 2;
+    int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+    int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+    int64_t RegWidth = 0;
+    switch (Inst.getOpcode()) {
+    case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii:
+      RegWidth = 64;
+      break;
+    case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii:
+      RegWidth = 32;
+      break;
+    }
+
+    if (ImmS >= RegWidth || ImmS < ImmR) {
+      return Error(Operands[4]->getStartLoc(),
+                   "requested extract overflows register");
+    }
+    return false;
+  }
+  case AArch64::ICix: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+    if (!A64IC::NeedsRegister(ICOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified IC op does not use a register");
+    }
+    return false;
+  }
+  case AArch64::ICi: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+    if (A64IC::NeedsRegister(ICOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified IC op requires a register");
+    }
+    return false;
+  }
+  case AArch64::TLBIix: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+    if (!A64TLBI::NeedsRegister(TLBIOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified TLBI op does not use a register");
+    }
+    return false;
+  }
+  case AArch64::TLBIi: {
+    int64_t ImmVal = Inst.getOperand(0).getImm();
+    A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+    if (A64TLBI::NeedsRegister(TLBIOp)) {
+      return Error(Operands[1]->getStartLoc(),
+                   "specified TLBI op requires a register");
+    }
+    return false;
+  }
+  }
+
+  return false;
+}
+
+
+// Parses the instruction *together with* all operands, appending each parsed
+// operand to the "Operands" list
+bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                        StringRef Name, SMLoc NameLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  size_t CondCodePos = Name.find('.');
+
+  StringRef Mnemonic = Name.substr(0, CondCodePos);
+  Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc));
+
+  if (CondCodePos != StringRef::npos) {
+    // We have a condition code
+    SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1);
+    StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos);
+    A64CC::CondCodes Code;
+
+    Code = A64StringToCondCode(CondStr);
+
+    if (Code == A64CC::Invalid) {
+      Error(S, "invalid condition code");
+      Parser.eatToEndOfStatement();
+      return true;
+    }
+
+    SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos);
+
+    Operands.push_back(AArch64Operand::CreateToken(".",  DotL));
+    SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3);
+    Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E));
+  }
+
+  // Now we parse the operands of this instruction
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (ParseOperand(Operands, Mnemonic)) {
+      Parser.eatToEndOfStatement();
+      return true;
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      if (ParseOperand(Operands, Mnemonic)) {
+        Parser.eatToEndOfStatement();
+        return true;
+      }
+
+
+      // After successfully parsing some operands there are two special cases to
+      // consider (i.e. notional operands not separated by commas). Both are due
+      // to memory specifiers:
+      //  + An RBrac will end an address for load/store/prefetch
+      //  + An '!' will indicate a pre-indexed operation.
+      //
+      // It's someone else's responsibility to make sure these tokens are sane
+      // in the given context!
+      if (Parser.getTok().is(AsmToken::RBrac)) {
+        SMLoc Loc = Parser.getTok().getLoc();
+        Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+        Parser.Lex();
+      }
+
+      if (Parser.getTok().is(AsmToken::Exclaim)) {
+        SMLoc Loc = Parser.getTok().getLoc();
+        Operands.push_back(AArch64Operand::CreateToken("!", Loc));
+        Parser.Lex();
+      }
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = getLexer().getLoc();
+    Parser.eatToEndOfStatement();
+    return Error(Loc, "expected comma before next operand");
+  }
+
+  // Eat the EndOfStatement
+  Parser.Lex();
+
+  return false;
+}
+
+bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".hword")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  else if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  else if (IDVal == ".xword")
+    return ParseDirectiveWord(8, DirectiveID.getLoc());
+  else if (IDVal == ".tlsdesccall")
+    return ParseDirectiveTLSDescCall(DirectiveID.getLoc());
+
+  return true;
+}
+
+/// parseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+// parseDirectiveTLSDescCall:
+//   ::= .tlsdesccall symbol
+bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
+  StringRef Name;
+  if (getParser().parseIdentifier(Name))
+    return Error(L, "expected symbol after directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+
+  MCInst Inst;
+  Inst.setOpcode(AArch64::TLSDESCCALL);
+  Inst.addOperand(MCOperand::CreateExpr(Expr));
+
+  getParser().getStreamer().EmitInstruction(Inst);
+  return false;
+}
+
+
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                 MCStreamer &Out, unsigned &ErrorInfo,
+                                 bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult;
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                     MatchingInlineAsm);
+
+  if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+    return Error(IDLoc, "too few operands for instruction");
+
+  switch (MatchResult) {
+  default: break;
+  case Match_Success:
+    if (validateInstruction(Inst, Operands))
+      return true;
+
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "invalid instruction");
+
+  case Match_AddSubRegExtendSmall:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+      "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+  case Match_AddSubRegExtendLarge:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+      "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
+  case Match_AddSubRegShift32:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+       "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]");
+  case Match_AddSubRegShift64:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+       "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]");
+  case Match_AddSubSecondSource:
+      return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+          "expected compatible register, symbol or integer in range [0, 4095]");
+  case Match_CVTFixedPos32:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [1, 32]");
+  case Match_CVTFixedPos64:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [1, 64]");
+  case Match_CondCode:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected AArch64 condition code");
+  case Match_FPImm:
+    // Any situation which allows a nontrivial floating-point constant also
+    // allows a register.
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected compatible register or floating-point constant");
+  case Match_FPZero:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected floating-point constant #0.0");
+  case Match_Label:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected label or encodable integer pc offset");
+  case Match_Lane1:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected lane specifier '[1]'");
+  case Match_LoadStoreExtend32_1:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'uxtw' or 'sxtw' with optional shift of #0");
+  case Match_LoadStoreExtend32_2:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1");
+  case Match_LoadStoreExtend32_4:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2");
+  case Match_LoadStoreExtend32_8:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3");
+  case Match_LoadStoreExtend32_16:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'lsl' or 'sxtw' with optional shift of #0 or #4");
+  case Match_LoadStoreExtend64_1:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'lsl' or 'sxtx' with optional shift of #0");
+  case Match_LoadStoreExtend64_2:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'lsl' or 'sxtx' with optional shift of #0 or #1");
+  case Match_LoadStoreExtend64_4:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'lsl' or 'sxtx' with optional shift of #0 or #2");
+  case Match_LoadStoreExtend64_8:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'lsl' or 'sxtx' with optional shift of #0 or #3");
+  case Match_LoadStoreExtend64_16:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'lsl' or 'sxtx' with optional shift of #0 or #4");
+  case Match_LoadStoreSImm7_4:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer multiple of 4 in range [-256, 252]");
+  case Match_LoadStoreSImm7_8:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer multiple of 8 in range [-512, 508]");
+  case Match_LoadStoreSImm7_16:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer multiple of 16 in range [-1024, 1016]");
+  case Match_LoadStoreSImm9:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [-256, 255]");
+  case Match_LoadStoreUImm12_1:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected symbolic reference or integer in range [0, 4095]");
+  case Match_LoadStoreUImm12_2:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected symbolic reference or integer in range [0, 8190]");
+  case Match_LoadStoreUImm12_4:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected symbolic reference or integer in range [0, 16380]");
+  case Match_LoadStoreUImm12_8:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected symbolic reference or integer in range [0, 32760]");
+  case Match_LoadStoreUImm12_16:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected symbolic reference or integer in range [0, 65520]");
+  case Match_LogicalSecondSource:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected compatible register or logical immediate");
+  case Match_MOVWUImm16:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected relocated symbol or integer in range [0, 65535]");
+  case Match_MRS:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected readable system register");
+  case Match_MSR:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected writable system register or pstate");
+  case Match_NamedImm_at:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]");
+  case Match_NamedImm_dbarrier:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+             "expected integer in range [0, 15] or symbolic barrier operand");
+  case Match_NamedImm_dc:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected symbolic 'dc' operand");
+  case Match_NamedImm_ic:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'");
+  case Match_NamedImm_isb:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 15] or 'sy'");
+  case Match_NamedImm_prefetch:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected prefetch hint: p(ld|st|i)l[123](strm|keep)");
+  case Match_NamedImm_tlbi:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected translation buffer invalidation operand");
+  case Match_UImm16:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 65535]");
+  case Match_UImm3:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 7]");
+  case Match_UImm4:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 15]");
+  case Match_UImm5:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 31]");
+  case Match_UImm6:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 63]");
+  case Match_UImm7:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [0, 127]");
+  case Match_Width32:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [<lsb>, 31]");
+  case Match_Width64:
+    return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+                 "expected integer in range [<lsb>, 63]");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+void AArch64Operand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case k_CondCode:
+    OS << "<CondCode: " << CondCode.Code << ">";
+    break;
+  case k_FPImmediate:
+    OS << "<fpimm: " << FPImm.Val << ">";
+    break;
+  case k_ImmWithLSL:
+    OS << "<immwithlsl: imm=" << ImmWithLSL.Val
+       << ", shift=" << ImmWithLSL.ShiftAmount << ">";
+    break;
+  case k_Immediate:
+    getImm()->print(OS);
+    break;
+  case k_Register:
+    OS << "<register " << getReg() << '>';
+    break;
+  case k_Token:
+    OS << '\'' << getToken() << '\'';
+    break;
+  case k_ShiftExtend:
+    OS << "<shift: type=" << ShiftExtend.ShiftType
+       << ", amount=" << ShiftExtend.Amount << ">";
+    break;
+  case k_SysReg: {
+    StringRef Name(SysReg.Data, SysReg.Length);
+    OS << "<sysreg: " << Name << '>';
+    break;
+  }
+  default:
+    llvm_unreachable("No idea how to print this kind of operand");
+    break;
+  }
+}
+
+void AArch64Operand::dump() const {
+  print(errs());
+}
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmParser() {
+  RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AArch64GenAsmMatcher.inc"
diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..a018a0aa7b36
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmParser
+  AArch64AsmParser.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..bd1fcaf1ffe8
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmParser
+parent = AArch64
+required_libraries = AArch64Desc AArch64Info MC MCParser Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile
new file mode 100644
index 000000000000..56c9ef52ea58
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmParser
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
new file mode 100644
index 000000000000..8164d6f73c97
--- /dev/null
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -0,0 +1,36 @@
+set(LLVM_TARGET_DEFINITIONS AArch64.td)
+
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(AArch64CommonTableGen)
+
+add_llvm_target(AArch64CodeGen
+  AArch64AsmPrinter.cpp
+  AArch64BranchFixupPass.cpp
+  AArch64FrameLowering.cpp
+  AArch64ISelDAGToDAG.cpp
+  AArch64ISelLowering.cpp
+  AArch64InstrInfo.cpp
+  AArch64MachineFunctionInfo.cpp
+  AArch64MCInstLower.cpp
+  AArch64RegisterInfo.cpp
+  AArch64SelectionDAGInfo.cpp
+  AArch64Subtarget.cpp
+  AArch64TargetMachine.cpp
+  AArch64TargetObjectFile.cpp
+  )
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils)
+\ No newline at end of file
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
new file mode 100644
index 000000000000..12c1b8f4c81a
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -0,0 +1,803 @@
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions necessary to decode AArch64 instruction
+// bitpatterns into MCInsts (with the help of TableGenerated information from
+// the instruction definitions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// AArch64 disassembler for all AArch64 platforms.
+class AArch64Disassembler : public MCDisassembler {
+  const MCRegisterInfo *RegInfo;
+public:
+  /// Initializes the disassembler.
+  ///
+  AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
+    : MCDisassembler(STI), RegInfo(Info) {
+  }
+
+  ~AArch64Disassembler() {
+  }
+
+  /// See MCDisassembler.
+  DecodeStatus getInstruction(MCInst &instr,
+                              uint64_t &size,
+                              const MemoryObject &region,
+                              uint64_t address,
+                              raw_ostream &vStream,
+                              raw_ostream &cStream) const;
+
+  const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+
+}
+
+// Forward-declarations used in the auto-generated files.
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                         uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
+                                              unsigned RegNo, uint64_t Address,
+                                              const void *Decoder);
+static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
+                                              unsigned RegNo, uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+                                               unsigned OptionHiS,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+                                        unsigned RmBits,
+                                        uint64_t Address,
+                                        const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+                                             unsigned FullImm,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+                                            unsigned Bits,
+                                            uint64_t Address,
+                                            const void *Decoder);
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+                                           unsigned ShiftAmount,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+                                            unsigned ShiftAmount,
+                                            uint64_t Address,
+                                            const void *Decoder);
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+                                                       unsigned Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder);
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+                                          unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
+static DecodeStatus
+DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
+                    llvm::MCInst &Inst, unsigned Val,
+                    uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+                                                   unsigned Val,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+
+
+static bool Check(DecodeStatus &Out, DecodeStatus In);
+
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+  switch (In) {
+    case MCDisassembler::Success:
+      // Out stays the same.
+      return true;
+    case MCDisassembler::SoftFail:
+      Out = In;
+      return true;
+    case MCDisassembler::Fail:
+      Out = In;
+      return false;
+  }
+  llvm_unreachable("Invalid DecodeStatus!");
+}
+
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+                                                 const MemoryObject &Region,
+                                                 uint64_t Address,
+                                                 raw_ostream &os,
+                                                 raw_ostream &cs) const {
+  CommentStream = &cs;
+
+  uint8_t bytes[4];
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  // Encoded as a small-endian 32-bit word in the stream.
+  uint32_t insn = (bytes[3] << 24) |
+    (bytes[2] << 16) |
+    (bytes[1] <<  8) |
+    (bytes[0] <<  0);
+
+  // Calling the auto-generated decoder function.
+  DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
+                                          this, STI);
+  if (result != MCDisassembler::Fail) {
+    Size = 4;
+    return result;
+  }
+
+  MI.clear();
+  Size = 0;
+  return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+  const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
+  return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+}
+
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                        uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                            uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+                         uint64_t Address, const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+                                               unsigned OptionHiS,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
+  // S}. Hence we want to check bit 1.
+  if (!(OptionHiS & 2))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(OptionHiS));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
+  // between 0 and 31.
+  if (Imm6Bits > 31)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+                                               unsigned Imm6Bits,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
+  if (Imm6Bits < 32)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+                                        unsigned RmBits,
+                                        uint64_t Address,
+                                        const void *Decoder) {
+  // Any bits are valid in the instruction (they're architecturally ignored),
+  // but a code generator should insert 0.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  return MCDisassembler::Success;
+}
+
+
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+                                             unsigned FullImm,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  unsigned Imm16 = FullImm & 0xffff;
+  unsigned Shift = FullImm >> 16;
+
+  if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Imm16));
+  Inst.addOperand(MCOperand::CreateImm(Shift));
+  return MCDisassembler::Success;
+}
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+                                            unsigned Bits,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  uint64_t Imm;
+  if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(Bits));
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+                                           unsigned ShiftAmount,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  // Only values 0-4 are valid for this 3-bit field
+  if (ShiftAmount > 4)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+                                            unsigned ShiftAmount,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  // Only values below 32 are valid for a 32-bit register
+  if (ShiftAmount > 31)
+    return MCDisassembler::Fail;
+
+  Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
+  unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
+  unsigned SF = fieldFromInstruction(Insn, 31, 1);
+
+  // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
+  // out assertions that it thinks should never be hit.
+  enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
+  Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
+
+  if (!SF) {
+    // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
+    if (ImmR > 31 || ImmS > 31)
+      return MCDisassembler::Fail;
+  }
+
+  if (SF) {
+    DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    // BFM MCInsts use Rd as a source too.
+    if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+  } else {
+    DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+    // BFM MCInsts use Rd as a source too.
+    if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // ASR and LSR have more specific patterns so they won't get here:
+  assert(!(ImmS == 31 && !SF && Opc != BFM)
+         && "shift should have used auto decode");
+  assert(!(ImmS == 63 && SF && Opc != BFM)
+         && "shift should have used auto decode");
+
+  // Extension instructions similarly:
+  if (Opc == SBFM && ImmR == 0) {
+    assert((ImmS != 7 && ImmS != 15) && "extension got here");
+    assert((ImmS != 31 || SF == 0) && "extension got here");
+  } else if (Opc == UBFM && ImmR == 0) {
+    assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
+  }
+
+  if (Opc == UBFM) {
+    // It might be a LSL instruction, which actually takes the shift amount
+    // itself as an MCInst operand.
+    if (SF && (ImmS + 1) % 64 == ImmR) {
+      Inst.setOpcode(AArch64::LSLxxi);
+      Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
+      return MCDisassembler::Success;
+    } else if (!SF && (ImmS + 1) % 32 == ImmR) {
+      Inst.setOpcode(AArch64::LSLwwi);
+      Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
+      return MCDisassembler::Success;
+    }
+  }
+
+  // Otherwise it's definitely either an extract or an insert depending on which
+  // of ImmR or ImmS is larger.
+  unsigned ExtractOp, InsertOp;
+  switch (Opc) {
+  default: llvm_unreachable("unexpected instruction trying to decode bitfield");
+  case SBFM:
+    ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
+    InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
+    break;
+  case BFM:
+    ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
+    InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
+    break;
+  case UBFM:
+    ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
+    InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
+    break;
+  }
+
+  // Otherwise it's a boring insert or extract
+  Inst.addOperand(MCOperand::CreateImm(ImmR));
+  Inst.addOperand(MCOperand::CreateImm(ImmS));
+
+
+  if (ImmS < ImmR)
+    Inst.setOpcode(InsertOp);
+  else
+    Inst.setOpcode(ExtractOp);
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  // This decoder exists to add the dummy Lane operand to the MCInst, which must
+  // be 1 in assembly but has no other real manifestation.
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
+
+  if (IsToVec) {
+    DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+  } else {
+    DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+    DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // Add the lane
+  Inst.addOperand(MCOperand::CreateImm(1));
+
+  return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  DecodeStatus Result = MCDisassembler::Success;
+  unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
+  unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
+  unsigned L = fieldFromInstruction(Insn, 22, 1);
+  unsigned V = fieldFromInstruction(Insn, 26, 1);
+  unsigned Opc = fieldFromInstruction(Insn, 30, 2);
+
+  // Not an official name, but it turns out that bit 23 distinguishes indexed
+  // from non-indexed operations.
+  unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
+
+  if (Indexed && L == 0) {
+    // The MCInst for an indexed store has an out operand and 4 ins:
+    //    Rn_wb, Rt, Rt2, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  // You shouldn't load to the same register twice in an instruction...
+  if (L && Rt == Rt2)
+    Result = MCDisassembler::SoftFail;
+
+  // ... or do any operation that writes-back to a transfer register. But note
+  // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+  if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+    Result = MCDisassembler::SoftFail;
+
+  // Exactly how we decode the MCInst's registers depends on the Opc and V
+  // fields of the instruction. These also obviously determine the size of the
+  // operation so we can fill in that information while we're at it.
+  if (V) {
+    // The instruction operates on the FP/SIMD registers
+    switch (Opc) {
+    default: return MCDisassembler::Fail;
+    case 0:
+      DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 1:
+      DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 2:
+      DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    }
+  } else {
+    switch (Opc) {
+    default: return MCDisassembler::Fail;
+    case 0:
+      DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 1:
+      assert(L && "unexpected \"store signed\" attempt");
+      DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    case 2:
+      DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+      DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+      break;
+    }
+  }
+
+  if (Indexed && L == 1) {
+    // The MCInst for an indexed load has 3 out operands and an 3 ins:
+    //    Rt, Rt2, Rn_wb, Rt2, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+
+  DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(SImm7));
+
+  return Result;
+}
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+                                                       uint32_t Val,
+                                                       uint64_t Address,
+                                                       const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(Val, 0, 5);
+  unsigned Rn = fieldFromInstruction(Val, 5, 5);
+  unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
+  unsigned MemSize = fieldFromInstruction(Val, 30, 2);
+
+  DecodeStatus S = MCDisassembler::Success;
+  if (Rt == Rt2) S = MCDisassembler::SoftFail;
+
+  switch (MemSize) {
+    case 2:
+      if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
+        return MCDisassembler::Fail;
+      if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
+        return MCDisassembler::Fail;
+      break;
+    case 3:
+      if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
+        return MCDisassembler::Fail;
+      if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
+        return MCDisassembler::Fail;
+      break;
+    default:
+      llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
+  }
+
+  if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+                                          unsigned Val,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  SomeNamedImmMapper Mapper;
+  bool ValidNamed;
+  Mapper.toString(Val, ValidNamed);
+  if (ValidNamed || Mapper.validImm(Val)) {
+    Inst.addOperand(MCOperand::CreateImm(Val));
+    return MCDisassembler::Success;
+  }
+
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                                        llvm::MCInst &Inst,
+                                        unsigned Val,
+                                        uint64_t Address,
+                                        const void *Decoder) {
+  bool ValidNamed;
+  Mapper.toString(Val, ValidNamed);
+
+  Inst.addOperand(MCOperand::CreateImm(Val));
+
+  return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
+                             Decoder);
+}
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+                                     unsigned Val,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
+                             Decoder);
+}
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+                                                   unsigned Insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder) {
+  unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
+
+  unsigned Opc = fieldFromInstruction(Insn, 22, 2);
+  unsigned V = fieldFromInstruction(Insn, 26, 1);
+  unsigned Size = fieldFromInstruction(Insn, 30, 2);
+
+  if (Opc == 0 || (V == 1 && Opc == 2)) {
+    // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  if (V == 0 && (Opc == 2 || Size == 3)) {
+    DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+  } else if (V == 0) {
+    DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+  } else if (V == 1 && (Opc & 2)) {
+    DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+  } else {
+    switch (Size) {
+    case 0:
+      DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 1:
+      DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 2:
+      DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    case 3:
+      DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+      break;
+    }
+  }
+
+  if (Opc != 0 && (V != 1 || Opc != 2)) {
+    // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
+    DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+  }
+
+  DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(Imm9));
+
+  // N.b. The official documentation says undpredictable if Rt == Rn, but this
+  // takes place at the architectural rather than encoding level:
+  //
+  // "STR xzr, [sp], #4" is perfectly valid.
+  if (V == 0 && Rt == Rn && Rn != 31)
+    return MCDisassembler::SoftFail;
+  else
+    return MCDisassembler::Success;
+}
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+                                                 const MCSubtargetInfo &STI) {
+  return new AArch64Disassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeAArch64Disassembler() {
+  TargetRegistry::RegisterMCDisassembler(TheAArch64Target,
+                                         createAArch64Disassembler);
+}
+
+
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..d4bd163dad60
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Disassembler
+  AArch64Disassembler.cpp
+  )
+
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..a93e343886d0
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Disassembler
+parent = AArch64
+required_libraries = AArch64CodeGen AArch64Desc AArch64Info AArch64Utils MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile
new file mode 100644
index 000000000000..5c861207f836
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Disassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
new file mode 100644
index 000000000000..82ce80c8b1a1
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -0,0 +1,408 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+
+static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
+  assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
+  if (Value & (1ULL <<  (BitWidth - 1)))
+    return static_cast<int64_t>(Value) - (1LL << BitWidth);
+  else
+    return Value;
+}
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+                                       const MCInstrInfo &MII,
+                                       const MCRegisterInfo &MRI,
+                                       const MCSubtargetInfo &STI) :
+  MCInstPrinter(MAI, MII, MRI) {
+  // Initialize the set of available features.
+  setAvailableFeatures(STI.getFeatureBits());
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << getRegisterName(RegNo);
+}
+
+void
+AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
+                                              unsigned OpNum, raw_ostream &O) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+  int32_t Imm = unpackSignedImm(9, MOImm.getImm());
+
+  O << '#' << Imm;
+}
+
+void
+AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O, unsigned MemSize,
+                                          unsigned RmSize) {
+  unsigned ExtImm = MI->getOperand(OpNum).getImm();
+  unsigned OptionHi = ExtImm >> 1;
+  unsigned S = ExtImm & 1;
+  bool IsLSL = OptionHi == 1 && RmSize == 64;
+
+  const char *Ext;
+  switch (OptionHi) {
+  case 1:
+    Ext = (RmSize == 32) ? "uxtw" : "lsl";
+    break;
+  case 3:
+    Ext = (RmSize == 32) ? "sxtw" : "sxtx";
+    break;
+  default:
+    llvm_unreachable("Incorrect Option on load/store (reg offset)");
+  }
+  O << Ext;
+
+  if (S) {
+    unsigned ShiftAmt = Log2_32(MemSize);
+    O << " #" << ShiftAmt;
+  } else if (IsLSL) {
+    O << " #0";
+  }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
+                                              unsigned OpNum, raw_ostream &O) {
+  const MCOperand &Imm12Op = MI->getOperand(OpNum);
+
+  if (Imm12Op.isImm()) {
+    int64_t Imm12 = Imm12Op.getImm();
+    assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
+    O << "#" << Imm12;
+  } else {
+    assert(Imm12Op.isExpr() && "Unexpected shift operand type");
+    O << "#" << *Imm12Op.getExpr();
+  }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+
+  printAddSubImmLSL0Operand(MI, OpNum, O);
+
+  O << ", lsl #12";
+}
+
+void
+AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << MO.getImm();
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &ImmROp = MI->getOperand(OpNum);
+  unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+
+  O << '#' << LSB;
+}
+
+void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  const MCOperand &ImmSOp = MI->getOperand(OpNum);
+  unsigned Width = ImmSOp.getImm() + 1;
+
+  O << '#' << Width;
+}
+
+void
+AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &ImmSOp = MI->getOperand(OpNum);
+  const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+
+  unsigned ImmR = ImmROp.getImm();
+  unsigned ImmS = ImmSOp.getImm();
+
+  assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+
+  O << '#' << (ImmS - ImmR + 1);
+}
+
+void
+AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O) {
+    const MCOperand &CRx = MI->getOperand(OpNum);
+
+    O << 'c' << CRx.getImm();
+}
+
+
+void
+AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+    const MCOperand &ScaleOp = MI->getOperand(OpNum);
+
+    O << '#' << (64 - ScaleOp.getImm());
+}
+
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &o) {
+  const MCOperand &MOImm8 = MI->getOperand(OpNum);
+
+  assert(MOImm8.isImm()
+         && "Immediate operand required for floating-point immediate inst");
+
+  uint32_t Imm8 = MOImm8.getImm();
+  uint32_t Fraction = Imm8 & 0xf;
+  uint32_t Exponent = (Imm8 >> 4) & 0x7;
+  uint32_t Negative = (Imm8 >> 7) & 0x1;
+
+  float Val = 1.0f + Fraction / 16.0f;
+
+  // That is:
+  // 000 -> 2^1,  001 -> 2^2,  010 -> 2^3,  011 -> 2^4,
+  // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
+  if (Exponent & 0x4) {
+    Val /= 1 << (7 - Exponent);
+  } else {
+    Val *= 1 << (Exponent + 1);
+  }
+
+  Val = Negative ? -Val : Val;
+
+  o << '#' << format("%.8f", Val);
+}
+
+void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &o) {
+  o << "#0.0";
+}
+
+void
+AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+}
+
+template <unsigned field_width, unsigned scale> void
+AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  if (!MO.isImm()) {
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
+  // is multiplied by 4 (because all A64 instructions are 32-bits wide).
+  uint64_t UImm = MO.getImm();
+  uint64_t Sign = UImm & (1LL << (field_width - 1));
+  int64_t SImm = scale * ((UImm & ~Sign) - Sign);
+
+  O << "#" << SImm;
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint64_t Val;
+  A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+  O << "#0x";
+  O.write_hex(Val);
+}
+
+void
+AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O, int MemSize) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+
+  if (MOImm.isImm()) {
+    uint32_t Imm = MOImm.getImm() * MemSize;
+
+    O << "#" << Imm;
+  } else {
+    O << "#" << *MOImm.getExpr();
+  }
+}
+
+void
+AArch64InstPrinter::printShiftOperand(const MCInst *MI,  unsigned OpNum,
+                                      raw_ostream &O,
+                                      A64SE::ShiftExtSpecifiers Shift) {
+    const MCOperand &MO = MI->getOperand(OpNum);
+
+    // LSL #0 is not printed
+    if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
+        return;
+
+    switch (Shift) {
+    case A64SE::LSL: O << "lsl"; break;
+    case A64SE::LSR: O << "lsr"; break;
+    case A64SE::ASR: O << "asr"; break;
+    case A64SE::ROR: O << "ror"; break;
+    default: llvm_unreachable("Invalid shift specifier in logical instruction");
+    }
+
+  O << " #" << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI,  unsigned OpNum,
+                                            raw_ostream &O) {
+  const MCOperand &UImm16MO = MI->getOperand(OpNum);
+  const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+
+  if (UImm16MO.isImm()) {
+    O << '#' << UImm16MO.getImm();
+
+    if (ShiftMO.getImm() != 0)
+      O << ", lsl #" << (ShiftMO.getImm() * 16);
+
+    return;
+  }
+
+  O << "#" << *UImm16MO.getExpr();
+}
+
+void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
+                                              const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  bool ValidName;
+  const MCOperand &MO = MI->getOperand(OpNum);
+  StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+
+  if (ValidName)
+    O << Name;
+  else
+    O << '#' << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                                       const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  bool ValidName;
+  std::string Name = Mapper.toString(MO.getImm(), ValidName);
+  if (ValidName) {
+    O << Name;
+    return;
+  }
+}
+
+
+void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
+                                               unsigned OpNum,
+                                               raw_ostream &O,
+                                               A64SE::ShiftExtSpecifiers Ext) {
+  // FIXME: In principle TableGen should be able to detect this itself far more
+  // easily. We will only accumulate more of these hacks.
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+
+  if (isStackReg(Reg0) || isStackReg(Reg1)) {
+    A64SE::ShiftExtSpecifiers LSLEquiv;
+
+    if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
+      LSLEquiv = A64SE::UXTX;
+    else
+      LSLEquiv = A64SE::UXTW;
+
+    if (Ext == LSLEquiv) {
+      O << "lsl #" << MI->getOperand(OpNum).getImm();
+      return;
+    }
+  }
+
+  switch (Ext) {
+  case A64SE::UXTB: O << "uxtb"; break;
+  case A64SE::UXTH: O << "uxth"; break;
+  case A64SE::UXTW: O << "uxtw"; break;
+  case A64SE::UXTX: O << "uxtx"; break;
+  case A64SE::SXTB: O << "sxtb"; break;
+  case A64SE::SXTH: O << "sxth"; break;
+  case A64SE::SXTW: O << "sxtw"; break;
+  case A64SE::SXTX: O << "sxtx"; break;
+  default: llvm_unreachable("Unexpected shift type for printing");
+  }
+
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getImm() != 0)
+    O << " #" << MO.getImm();
+}
+
+template<int MemScale> void
+AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MOImm = MI->getOperand(OpNum);
+  int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+
+  O << "#" << (Imm * MemScale);
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    // If a symbolic branch target was added as a constant expression then print
+    // that address in hex.
+    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+    int64_t Address;
+    if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+      O << "0x";
+      O.write_hex(Address);
+    }
+    else {
+      // Otherwise, just print the expression.
+      O << *Op.getExpr();
+    }
+  }
+}
+
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  if (MI->getOpcode() == AArch64::TLSDESCCALL) {
+    // This is a special assembler directive which applies an
+    // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
+    // form outside the normal TableGenerated scheme.
+    O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
+  } else if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
+
+  printAnnotation(O, Annot);
+}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
new file mode 100644
index 000000000000..639fa869c016
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -0,0 +1,172 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64INSTPRINTER_H
+#define LLVM_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+  AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+  // Autogenerated by tblgen
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printRegName(raw_ostream &O, unsigned RegNum) const;
+
+  template<unsigned MemSize, unsigned RmSize>
+  void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O) {
+    printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+  }
+
+
+  void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O, unsigned MemSize,
+                                 unsigned RmSize);
+
+  void printAddSubImmLSL0Operand(const MCInst *MI,
+                                 unsigned OpNum, raw_ostream &O);
+  void printAddSubImmLSL12Operand(const MCInst *MI,
+                                  unsigned OpNum, raw_ostream &O);
+
+  void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<unsigned RegWidth>
+  void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+
+  void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O);
+
+  void printCRxOperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O);
+
+  void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+  void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+  template<int MemScale>
+  void printOffsetUImm12Operand(const MCInst *MI,
+                                  unsigned OpNum, raw_ostream &o) {
+    printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+  }
+
+  void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &o, int MemScale);
+
+  template<unsigned field_width, unsigned scale>
+  void printLabelOperand(const MCInst *MI, unsigned OpNum,
+                         raw_ostream &O);
+
+  template<unsigned RegWidth>
+  void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<typename SomeNamedImmMapper>
+  void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O) {
+    printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+  }
+
+  void printNamedImmOperand(const NamedImmMapper &Mapper,
+                            const MCInst *MI, unsigned OpNum,
+                            raw_ostream &O);
+
+  void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+                          const MCInst *MI, unsigned OpNum,
+                          raw_ostream &O);
+
+  void printMRSOperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O) {
+    printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+  }
+
+  void printMSROperand(const MCInst *MI, unsigned OpNum,
+                       raw_ostream &O) {
+    printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+  }
+
+  void printShiftOperand(const char *name, const MCInst *MI,
+                         unsigned OpIdx, raw_ostream &O);
+
+  void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("lsr", MI, OpNum, O);
+  }
+  void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("asr", MI, OpNum, O);
+  }
+  void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand("ror", MI, OpNum, O);
+  }
+
+  template<A64SE::ShiftExtSpecifiers Shift>
+  void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+    printShiftOperand(MI, OpNum, O, Shift);
+  }
+
+  void printShiftOperand(const MCInst *MI, unsigned OpNum,
+                         raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+
+
+  void printMoveWideImmOperand(const  MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  template<int MemSize> void
+  printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+
+  void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  template<A64SE::ShiftExtSpecifiers EXT>
+  void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                             raw_ostream &O) {
+    printRegExtendOperand(MI, OpNum, O, EXT);
+  }
+
+  void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+                             raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+  bool isStackReg(unsigned RegNo) {
+    return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+  }
+
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..d4b980a94d9b
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmPrinter
+  AArch64InstPrinter.cpp
+  )
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
+
diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..4836c7c45d44
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmPrinter
+parent = AArch64
+required_libraries = AArch64Utils MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile
new file mode 100644
index 000000000000..1c36a8dea798
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmPrinter
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
new file mode 100644
index 000000000000..3b296fdddc04
--- /dev/null
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -0,0 +1,36 @@
+;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
+[component_0]
+type = TargetGroup
+name = AArch64
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+;has_jit = 1
+
+[component_1]
+type = Library
+name = AArch64CodeGen
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
new file mode 100644
index 000000000000..a3373b1087bb
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -0,0 +1,585 @@
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCAsmBackend class,
+// which is principally concerned with relaxation of the various fixup kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+  const MCSubtargetInfo* STI;
+public:
+  AArch64AsmBackend(const Target &T, const StringRef TT)
+    : MCAsmBackend(),
+      STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
+    {}
+
+
+  ~AArch64AsmBackend() {
+    delete STI;
+  }
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  virtual void processFixupValue(const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFixup &Fixup, const MCFragment *DF,
+                                 MCValue &Target, uint64_t &Value,
+                                 bool &IsResolved);
+};
+} // end anonymous namespace
+
+void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+                                          const MCAsmLayout &Layout,
+                                          const MCFixup &Fixup,
+                                          const MCFragment *DF,
+                                          MCValue &Target, uint64_t &Value,
+                                          bool &IsResolved) {
+  // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+  // ~0xfff. This means that the required offset to reach a symbol can vary by
+  // up to one step depending on where the ADRP is in memory. For example:
+  //
+  //     ADRP x0, there
+  //  there:
+  //
+  // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+  // we'll need that as an offset. At any other address "there" will be in the
+  // same page as the ADRP and the instruction should encode 0x0. Assuming the
+  // section isn't 0x1000-aligned, we therefore need to delegate this decision
+  // to the linker -- a relocation!
+  if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
+      (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
+    IsResolved = false;
+}
+
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+  uint8_t OSABI;
+  ELFAArch64AsmBackend(const Target &T, const StringRef TT,
+                       uint8_t _OSABI)
+    : AArch64AsmBackend(T, TT), OSABI(_OSABI) { }
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
+  unsigned int getNumFixupKinds() const {
+    return AArch64::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// AArch64FixupKinds.h.
+//
+// Name                   Offset (bits)    Size (bits)    Flags
+{ "fixup_a64_ld_prel",               0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel",              0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel_page",         0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_add_lo12",              0,    32,             0 },
+{ "fixup_a64_ldst8_lo12",            0,    32,             0 },
+{ "fixup_a64_ldst16_lo12",           0,    32,             0 },
+{ "fixup_a64_ldst32_lo12",           0,    32,             0 },
+{ "fixup_a64_ldst64_lo12",           0,    32,             0 },
+{ "fixup_a64_ldst128_lo12",          0,    32,             0 },
+{ "fixup_a64_tstbr",                 0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_condbr",                0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_uncondbr",              0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_call",                  0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_uabs_g0",          0,    32,             0 },
+{ "fixup_a64_movw_uabs_g0_nc",       0,    32,             0 },
+{ "fixup_a64_movw_uabs_g1",          0,    32,             0 },
+{ "fixup_a64_movw_uabs_g1_nc",       0,    32,             0 },
+{ "fixup_a64_movw_uabs_g2",          0,    32,             0 },
+{ "fixup_a64_movw_uabs_g2_nc",       0,    32,             0 },
+{ "fixup_a64_movw_uabs_g3",          0,    32,             0 },
+{ "fixup_a64_movw_sabs_g0",          0,    32,             0 },
+{ "fixup_a64_movw_sabs_g1",          0,    32,             0 },
+{ "fixup_a64_movw_sabs_g2",          0,    32,             0 },
+{ "fixup_a64_adr_prel_got_page",     0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_got_lo12_nc",      0,    32,             0 },
+{ "fixup_a64_movw_dtprel_g2",        0,    32,             0 },
+{ "fixup_a64_movw_dtprel_g1",        0,    32,             0 },
+{ "fixup_a64_movw_dtprel_g1_nc",     0,    32,             0 },
+{ "fixup_a64_movw_dtprel_g0",        0,    32,             0 },
+{ "fixup_a64_movw_dtprel_g0_nc",     0,    32,             0 },
+{ "fixup_a64_add_dtprel_hi12",       0,    32,             0 },
+{ "fixup_a64_add_dtprel_lo12",       0,    32,             0 },
+{ "fixup_a64_add_dtprel_lo12_nc",    0,    32,             0 },
+{ "fixup_a64_ldst8_dtprel_lo12",     0,    32,             0 },
+{ "fixup_a64_ldst8_dtprel_lo12_nc",  0,    32,             0 },
+{ "fixup_a64_ldst16_dtprel_lo12",    0,    32,             0 },
+{ "fixup_a64_ldst16_dtprel_lo12_nc", 0,    32,             0 },
+{ "fixup_a64_ldst32_dtprel_lo12",    0,    32,             0 },
+{ "fixup_a64_ldst32_dtprel_lo12_nc", 0,    32,             0 },
+{ "fixup_a64_ldst64_dtprel_lo12",    0,    32,             0 },
+{ "fixup_a64_ldst64_dtprel_lo12_nc", 0,    32,             0 },
+{ "fixup_a64_movw_gottprel_g1",      0,    32,             0 },
+{ "fixup_a64_movw_gottprel_g0_nc",   0,    32,             0 },
+{ "fixup_a64_adr_gottprel_page",     0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_gottprel_lo12_nc", 0,    32,             0 },
+{ "fixup_a64_ld_gottprel_prel19",    0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_tprel_g2",         0,    32,             0 },
+{ "fixup_a64_movw_tprel_g1",         0,    32,             0 },
+{ "fixup_a64_movw_tprel_g1_nc",      0,    32,             0 },
+{ "fixup_a64_movw_tprel_g0",         0,    32,             0 },
+{ "fixup_a64_movw_tprel_g0_nc",      0,    32,             0 },
+{ "fixup_a64_add_tprel_hi12",        0,    32,             0 },
+{ "fixup_a64_add_tprel_lo12",        0,    32,             0 },
+{ "fixup_a64_add_tprel_lo12_nc",     0,    32,             0 },
+{ "fixup_a64_ldst8_tprel_lo12",      0,    32,             0 },
+{ "fixup_a64_ldst8_tprel_lo12_nc",   0,    32,             0 },
+{ "fixup_a64_ldst16_tprel_lo12",     0,    32,             0 },
+{ "fixup_a64_ldst16_tprel_lo12_nc",  0,    32,             0 },
+{ "fixup_a64_ldst32_tprel_lo12",     0,    32,             0 },
+{ "fixup_a64_ldst32_tprel_lo12_nc",  0,    32,             0 },
+{ "fixup_a64_ldst64_tprel_lo12",     0,    32,             0 },
+{ "fixup_a64_ldst64_tprel_lo12_nc",  0,    32,             0 },
+{ "fixup_a64_tlsdesc_adr_page",      0,    32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_tlsdesc_ld64_lo12_nc",  0,    32,             0 },
+{ "fixup_a64_tlsdesc_add_lo12_nc",   0,    32,             0 },
+{ "fixup_a64_tlsdesc_call",          0,     0,             0 }
+    };
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
+    Value = adjustFixupValue(Fixup.getKind(), Value);
+    if (!Value) return;           // Doesn't change encoding.
+
+    unsigned Offset = Fixup.getOffset();
+    assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+    // For each byte of the fragment that the fixup touches, mask in the bits
+    // from the fixup value.
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    }
+  }
+
+  bool mayNeedRelaxation(const MCInst&) const {
+    return false;
+  }
+
+  void relaxInstruction(const MCInst&, llvm::MCInst&) const {
+    llvm_unreachable("Cannot relax instructions");
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createAArch64ELFObjectWriter(OS, OSABI);
+  }
+};
+
+} // end anonymous namespace
+
+bool
+ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                           uint64_t Value,
+                                           const MCRelaxableFragment *DF,
+                                           const MCAsmLayout &Layout) const {
+  // Correct for now. With all instructions 32-bit only very low-level
+  // considerations could make you select something which may fail.
+  return false;
+}
+
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  // Can't emit NOP with size not multiple of 32-bits
+  if (Count % 4 != 0)
+    return false;
+
+  uint64_t NumNops = Count / 4;
+  for (uint64_t i = 0; i != NumNops; ++i)
+    OW->Write32(0xd503201f);
+
+  return true;
+}
+
+static unsigned ADRImmBits(unsigned Value) {
+  unsigned lo2 = Value & 0x3;
+  unsigned hi19 = (Value & 0x1fffff) >> 2;
+
+  return (hi19 << 5) | (lo2 << 29);
+}
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_2:
+    assert((int64_t)Value >= -32768 &&
+           (int64_t)Value <= 65536 &&
+           "Out of range ABS16 fixup");
+    return Value;
+  case FK_Data_4:
+    assert((int64_t)Value >= -(1LL << 31) &&
+           (int64_t)Value <= (1LL << 32) - 1 &&
+           "Out of range ABS32 fixup");
+    return Value;
+  case FK_Data_8:
+    return Value;
+
+  case AArch64::fixup_a64_ld_gottprel_prel19:
+    // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
+    // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
+  case AArch64::fixup_a64_ld_prel:
+    // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
+    // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
+    return (Value & 0x1ffffc) << 3;
+
+  case AArch64::fixup_a64_adr_prel:
+    // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
+    // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
+    return ADRImmBits(Value & 0x1fffff);
+
+  case AArch64::fixup_a64_adr_prel_page:
+    // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
+    // F000 of the result of the operation, checking that -2^32 <= result <
+    // 2^32.
+    assert((int64_t)Value >= -(1LL << 32) &&
+           (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+    return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+  case AArch64::fixup_a64_add_dtprel_hi12:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+    // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
+  case AArch64::fixup_a64_add_tprel_hi12:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+    // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
+    assert((int64_t)Value >= 0 &&
+           (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
+    return (Value & 0xfff000) >> 2;
+
+  case AArch64::fixup_a64_add_dtprel_lo12:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+    // FFF of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_add_tprel_lo12:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+    // FFF of TPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t)Value >= 0 &&
+           (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_add_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
+    // FFF of DTPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_add_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
+    // FFF of TPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+    // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
+    // FFF of G(TLSDESC(S+A)), with no overflow check.
+  case AArch64::fixup_a64_add_lo12:
+    // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
+    // S+A, with no overflow check.
+    return (Value & 0xfff) << 10;
+
+  case AArch64::fixup_a64_ldst8_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst8_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst8_lo12:
+    // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
+    // of S+A, with no overflow check.
+    return (Value & 0xfff) << 10;
+
+  case AArch64::fixup_a64_ldst16_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst16_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst16_lo12:
+    // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
+    // of S+A, with no overflow check.
+    return (Value & 0xffe) << 9;
+
+  case AArch64::fixup_a64_ldst32_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst32_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst32_lo12:
+    // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
+    // of S+A, with no overflow check.
+    return (Value & 0xffc) << 8;
+
+  case AArch64::fixup_a64_ldst64_dtprel_lo12:
+    // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+  case AArch64::fixup_a64_ldst64_tprel_lo12:
+    // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), check 0 <= X < 2^12.
+    assert((int64_t) Value >= 0 &&
+           (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+    // ... fallthrough to no-checking versions ...
+  case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+    // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+    // of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_ldst64_lo12:
+    // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
+    // of S+A, with no overflow check.
+    return (Value & 0xff8) << 7;
+
+  case AArch64::fixup_a64_ldst128_lo12:
+    // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
+    // of S+A, with no overflow check.
+    return (Value & 0xff0) << 6;
+
+  case AArch64::fixup_a64_movw_uabs_g0:
+    // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
+    // with a check that S+A < 2^16
+    assert(Value <= 0xffff && "Out of range move wide fixup");
+    return (Value & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g0_nc:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of DTPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_movw_gottprel_g0_nc:
+    // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
+  case AArch64::fixup_a64_movw_tprel_g0_nc:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
+    // FFFF of TPREL(S+A) with no overflow check.
+  case AArch64::fixup_a64_movw_uabs_g0_nc:
+    // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
+    // S+A with no overflow check.
+    return (Value & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g1:
+    // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
+    // S+A with a check that S+A < 2^32
+    assert(Value <= 0xffffffffull && "Out of range move wide fixup");
+    return ((Value >> 16) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g1_nc:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
+    // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_movw_tprel_g1_nc:
+    // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
+    // to bits FFFF0000 of TPREL(S+A), with no overflow check.
+  case AArch64::fixup_a64_movw_uabs_g1_nc:
+    // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
+    // FFFF0000 of S+A with no overflow check.
+    return ((Value >> 16) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g2:
+    // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
+    // 0000 of S+A with a check that S+A < 2^48
+    assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
+    return ((Value >> 32) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g2_nc:
+    // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
+    // 0000 of S+A with no overflow check.
+    return ((Value >> 32) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_uabs_g3:
+    // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
+    // 0000 0000 of S+A (no overflow check needed)
+    return ((Value >> 48) & 0xffff) << 5;
+
+  case AArch64::fixup_a64_movw_dtprel_g0:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
+    // to bits FFFF of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_tprel_g0:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
+    // bits FFFF of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g0: {
+    // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
+    // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
+    // should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = (Value & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      // MCCodeEmitter should have encoded a MOVN, which is fine.
+      Value = (~Value & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_movw_dtprel_g1:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
+    // to bits FFFF0000 of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_gottprel_g1:
+    // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
+    // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
+  case AArch64::fixup_a64_movw_tprel_g1:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
+    // bits FFFF0000 of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g1: {
+    // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
+    // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
+    // should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = ((Value >> 16) & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      Value = ((~Value >> 16) & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_movw_dtprel_g2:
+    // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
+    // to bits FFFF 0000 0000 of DTPREL(S+A).
+  case AArch64::fixup_a64_movw_tprel_g2:
+    // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
+    // bits FFFF 0000 0000 of TPREL(S+A).
+  case AArch64::fixup_a64_movw_sabs_g2: {
+    // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
+    // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
+    // we should convert between MOVN and MOVZ to achieve our goals).
+    int64_t Signed = Value;
+    assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
+           && "Out of range move wide fixup");
+    if (Signed >= 0) {
+      Value = ((Value >> 32) & 0xffff) << 5;
+      // Bit 30 converts the MOVN encoding into a MOVZ
+      Value |= 1 << 30;
+    } else {
+      Value = ((~Value >> 32) & 0xffff) << 5;
+    }
+    return Value;
+  }
+
+  case AArch64::fixup_a64_tstbr:
+    // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
+    // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
+    assert((int64_t)Value >= -(1LL << 15) &&
+           (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
+    return (Value & 0xfffc) << (5 - 2);
+
+  case AArch64::fixup_a64_condbr:
+    // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
+    // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
+    assert((int64_t)Value >= -(1LL << 20) &&
+           (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
+    return (Value & 0x1ffffc) << (5 - 2);
+
+  case AArch64::fixup_a64_uncondbr:
+    // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
+  case AArch64::fixup_a64_call:
+    // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
+    // checking that -2^27 <= S+A-P < 2^27.
+    assert((int64_t)Value >= -(1LL << 27) &&
+           (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
+    return (Value & 0xffffffc) >> 2;
+
+  case AArch64::fixup_a64_adr_gottprel_page:
+    // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
+    // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
+  case AArch64::fixup_a64_tlsdesc_adr_page:
+    // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
+    // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
+  case AArch64::fixup_a64_adr_prel_got_page:
+    // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
+    // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
+    // 2^32.
+    assert((int64_t)Value >= -(1LL << 32) &&
+           (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+    return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+  case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+    // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
+    // of X, with no overflow check. Check that X & 7 == 0.
+  case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+    // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
+    // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
+  case AArch64::fixup_a64_ld64_got_lo12_nc:
+    // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
+    // G(S) with no overflow check. Check X & 7 == 0
+    assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
+    return (Value & 0xff8) << 7;
+
+  case AArch64::fixup_a64_tlsdesc_call:
+    // R_AARCH64_TLSDESC_CALL: For relaxation only.
+    return 0;
+  }
+}
+
+MCAsmBackend *
+llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+  Triple TheTriple(TT);
+
+  return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
new file mode 100644
index 000000000000..4bcc65dfca27
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -0,0 +1,292 @@
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file handles ELF-specific object emission, converting LLVM's internal
+// fixups into the appropriate relocations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AArch64ELFObjectWriter(uint8_t OSABI);
+
+  virtual ~AArch64ELFObjectWriter();
+
+protected:
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const;
+private:
+};
+}
+
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+                            /*HasRelocationAddend*/ true)
+{}
+
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
+{}
+
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                              const MCFixup &Fixup,
+                                              bool IsPCRel,
+                                              bool IsRelocWithSymbol,
+                                              int64_t Addend) const {
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_Data_8:
+      return ELF::R_AARCH64_PREL64;
+    case FK_Data_4:
+      return ELF::R_AARCH64_PREL32;
+    case FK_Data_2:
+      return ELF::R_AARCH64_PREL16;
+    case AArch64::fixup_a64_ld_prel:
+      Type = ELF::R_AARCH64_LD_PREL_LO19;
+      break;
+    case AArch64::fixup_a64_adr_prel:
+      Type = ELF::R_AARCH64_ADR_PREL_LO21;
+      break;
+    case AArch64::fixup_a64_adr_prel_page:
+      Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
+      break;
+    case AArch64::fixup_a64_adr_prel_got_page:
+      Type = ELF::R_AARCH64_ADR_GOT_PAGE;
+      break;
+    case AArch64::fixup_a64_tstbr:
+      Type = ELF::R_AARCH64_TSTBR14;
+      break;
+    case AArch64::fixup_a64_condbr:
+      Type = ELF::R_AARCH64_CONDBR19;
+      break;
+    case AArch64::fixup_a64_uncondbr:
+      Type = ELF::R_AARCH64_JUMP26;
+      break;
+    case AArch64::fixup_a64_call:
+      Type = ELF::R_AARCH64_CALL26;
+      break;
+    case AArch64::fixup_a64_adr_gottprel_page:
+      Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+      break;
+    case AArch64::fixup_a64_ld_gottprel_prel19:
+      Type =  ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+      break;
+    case AArch64::fixup_a64_tlsdesc_adr_page:
+      Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented fixup -> relocation");
+    case FK_Data_8:
+      return ELF::R_AARCH64_ABS64;
+    case FK_Data_4:
+      return ELF::R_AARCH64_ABS32;
+    case FK_Data_2:
+      return ELF::R_AARCH64_ABS16;
+    case AArch64::fixup_a64_add_lo12:
+      Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ld64_got_lo12_nc:
+      Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_lo12:
+      Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_lo12:
+      Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_lo12:
+      Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_lo12:
+      Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst128_lo12:
+      Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g0:
+      Type = ELF::R_AARCH64_MOVW_UABS_G0;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g0_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g1:
+      Type = ELF::R_AARCH64_MOVW_UABS_G1;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g1_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g2:
+      Type = ELF::R_AARCH64_MOVW_UABS_G2;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g2_nc:
+      Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
+      break;
+    case AArch64::fixup_a64_movw_uabs_g3:
+      Type = ELF::R_AARCH64_MOVW_UABS_G3;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g0:
+      Type = ELF::R_AARCH64_MOVW_SABS_G0;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g1:
+      Type = ELF::R_AARCH64_MOVW_SABS_G1;
+      break;
+    case AArch64::fixup_a64_movw_sabs_g2:
+      Type = ELF::R_AARCH64_MOVW_SABS_G2;
+      break;
+
+    // TLS Local-dynamic block
+    case AArch64::fixup_a64_movw_dtprel_g2:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g1:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g1_nc:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g0:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+      break;
+    case AArch64::fixup_a64_movw_dtprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_add_dtprel_hi12:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+      break;
+    case AArch64::fixup_a64_add_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_add_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_dtprel_lo12:
+      Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+      break;
+
+    // TLS initial-exec block
+    case AArch64::fixup_a64_movw_gottprel_g1:
+      Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_gottprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+      break;
+
+    // TLS local-exec block
+    case AArch64::fixup_a64_movw_tprel_g2:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g1:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g1_nc:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g0:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+      break;
+    case AArch64::fixup_a64_movw_tprel_g0_nc:
+      Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+      break;
+    case AArch64::fixup_a64_add_tprel_hi12:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+      break;
+    case AArch64::fixup_a64_add_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_add_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst8_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst16_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst32_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+      break;
+    case AArch64::fixup_a64_ldst64_tprel_lo12:
+      Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+      break;
+    case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+      Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+      break;
+
+    // TLS general-dynamic block
+    case AArch64::fixup_a64_tlsdesc_adr_page:
+      Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+      break;
+    case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+      Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+      break;
+    case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+      Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+      break;
+    case AArch64::fixup_a64_tlsdesc_call:
+      Type = ELF::R_AARCH64_TLSDESC_CALL;
+      break;
+    }
+  }
+
+  return Type;
+}
+
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
+                                                   uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
new file mode 100644
index 000000000000..b83577af45c6
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -0,0 +1,160 @@
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits AArch64 ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
+// regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// AArch64 ELF ABI:
+///    infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
+///
+/// In brief: $x or $d should be emitted at the start of each contiguous region
+/// of A64 code or data in a section. In practice, this emission does not rely
+/// on explicit assembler directives but on inherent properties of the
+/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
+/// instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class AArch64ELFStreamer : public MCELFStreamer {
+public:
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                 raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCELFStreamer(Context, TAB, OS, Emitter),
+      MappingSymbolCounter(0), LastEMS(EMS_None) {
+  }
+
+  ~AArch64ELFStreamer() {}
+
+  virtual void ChangeSection(const MCSection *Section) {
+    // We have to keep track of the mapping symbol state of any sections we
+    // use. Each one should start off as EMS_None, which is provided as the
+    // default constructor by DenseMap::lookup.
+    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastEMS = LastMappingSymbols.lookup(Section);
+
+    MCELFStreamer::ChangeSection(Section);
+  }
+
+  /// This function is the one used to emit instruction data into the ELF
+  /// streamer. We override it to add the appropriate mapping symbol if
+  /// necessary.
+  virtual void EmitInstruction(const MCInst& Inst) {
+    EmitA64MappingSymbol();
+    MCELFStreamer::EmitInstruction(Inst);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// if necessary.
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitBytes(Data, AddrSpace);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+  /// if necessary.
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+  }
+
+private:
+  enum ElfMappingSymbol {
+    EMS_None,
+    EMS_A64,
+    EMS_Data
+  };
+
+  void EmitDataMappingSymbol() {
+    if (LastEMS == EMS_Data) return;
+    EmitMappingSymbol("$d");
+    LastEMS = EMS_Data;
+  }
+
+  void EmitA64MappingSymbol() {
+    if (LastEMS == EMS_A64) return;
+    EmitMappingSymbol("$x");
+    LastEMS = EMS_A64;
+  }
+
+  void EmitMappingSymbol(StringRef Name) {
+    MCSymbol *Start = getContext().CreateTempSymbol();
+    EmitLabel(Start);
+
+    MCSymbol *Symbol =
+      getContext().GetOrCreateSymbol(Name + "." +
+                                     Twine(MappingSymbolCounter++));
+
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    Symbol->setSection(*getCurrentSection());
+
+    const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+    Symbol->setVariableValue(Value);
+  }
+
+  int64_t MappingSymbolCounter;
+
+  DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+  ElfMappingSymbol LastEMS;
+
+  /// @}
+};
+}
+
+namespace llvm {
+  MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *Emitter,
+                                      bool RelaxAll, bool NoExecStack) {
+    AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+    if (RelaxAll)
+      S->getAssembler().setRelaxAll(true);
+    if (NoExecStack)
+      S->getAssembler().setNoExecStack(true);
+    return S;
+  }
+}
+
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
new file mode 100644
index 000000000000..5a89ca50cee8
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_ELF_STREAMER_H
+#define LLVM_AARCH64_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+  MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                          raw_ostream &OS,
+                                          MCCodeEmitter *Emitter,
+                                          bool RelaxAll, bool NoExecStack);
+}
+
+#endif // AArch64_ELF_STREAMER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
new file mode 100644
index 000000000000..eeb122d38494
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -0,0 +1,113 @@
+//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LLVM fixups applied to MCInsts in the AArch64
+// backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+  namespace AArch64 {
+    enum Fixups {
+      fixup_a64_ld_prel = FirstTargetFixupKind,
+      fixup_a64_adr_prel,
+      fixup_a64_adr_prel_page,
+
+      fixup_a64_add_lo12,
+
+      fixup_a64_ldst8_lo12,
+      fixup_a64_ldst16_lo12,
+      fixup_a64_ldst32_lo12,
+      fixup_a64_ldst64_lo12,
+      fixup_a64_ldst128_lo12,
+
+      fixup_a64_tstbr,
+      fixup_a64_condbr,
+      fixup_a64_uncondbr,
+      fixup_a64_call,
+
+      fixup_a64_movw_uabs_g0,
+      fixup_a64_movw_uabs_g0_nc,
+      fixup_a64_movw_uabs_g1,
+      fixup_a64_movw_uabs_g1_nc,
+      fixup_a64_movw_uabs_g2,
+      fixup_a64_movw_uabs_g2_nc,
+      fixup_a64_movw_uabs_g3,
+
+      fixup_a64_movw_sabs_g0,
+      fixup_a64_movw_sabs_g1,
+      fixup_a64_movw_sabs_g2,
+
+      fixup_a64_adr_prel_got_page,
+      fixup_a64_ld64_got_lo12_nc,
+
+      // Produce offsets relative to the module's dynamic TLS area.
+      fixup_a64_movw_dtprel_g2,
+      fixup_a64_movw_dtprel_g1,
+      fixup_a64_movw_dtprel_g1_nc,
+      fixup_a64_movw_dtprel_g0,
+      fixup_a64_movw_dtprel_g0_nc,
+      fixup_a64_add_dtprel_hi12,
+      fixup_a64_add_dtprel_lo12,
+      fixup_a64_add_dtprel_lo12_nc,
+      fixup_a64_ldst8_dtprel_lo12,
+      fixup_a64_ldst8_dtprel_lo12_nc,
+      fixup_a64_ldst16_dtprel_lo12,
+      fixup_a64_ldst16_dtprel_lo12_nc,
+      fixup_a64_ldst32_dtprel_lo12,
+      fixup_a64_ldst32_dtprel_lo12_nc,
+      fixup_a64_ldst64_dtprel_lo12,
+      fixup_a64_ldst64_dtprel_lo12_nc,
+
+      // Produce the GOT entry containing a variable's address in TLS's
+      // initial-exec mode.
+      fixup_a64_movw_gottprel_g1,
+      fixup_a64_movw_gottprel_g0_nc,
+      fixup_a64_adr_gottprel_page,
+      fixup_a64_ld64_gottprel_lo12_nc,
+      fixup_a64_ld_gottprel_prel19,
+
+      // Produce offsets relative to the thread pointer: TPIDR_EL0.
+      fixup_a64_movw_tprel_g2,
+      fixup_a64_movw_tprel_g1,
+      fixup_a64_movw_tprel_g1_nc,
+      fixup_a64_movw_tprel_g0,
+      fixup_a64_movw_tprel_g0_nc,
+      fixup_a64_add_tprel_hi12,
+      fixup_a64_add_tprel_lo12,
+      fixup_a64_add_tprel_lo12_nc,
+      fixup_a64_ldst8_tprel_lo12,
+      fixup_a64_ldst8_tprel_lo12_nc,
+      fixup_a64_ldst16_tprel_lo12,
+      fixup_a64_ldst16_tprel_lo12_nc,
+      fixup_a64_ldst32_tprel_lo12,
+      fixup_a64_ldst32_tprel_lo12_nc,
+      fixup_a64_ldst64_tprel_lo12,
+      fixup_a64_ldst64_tprel_lo12_nc,
+
+      // Produce the special fixups used by the general-dynamic TLS model.
+      fixup_a64_tlsdesc_adr_page,
+      fixup_a64_tlsdesc_ld64_lo12_nc,
+      fixup_a64_tlsdesc_add_lo12_nc,
+      fixup_a64_tlsdesc_call,
+
+
+      // Marker
+      LastTargetFixupKind,
+      NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+    };
+  }
+}
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
new file mode 100644
index 000000000000..8ec8cbf1c525
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AArch64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCAsmInfo.h"
+
+using namespace llvm;
+
+AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
+  PointerSize = 8;
+
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  CommentString = "//";
+  PrivateGlobalPrefix = ".L";
+  Code32Directive = ".code\t32";
+
+  Data16bitsDirective = "\t.hword\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = "\t.xword\t";
+
+  UseDataRegionDirectives = true;
+
+  WeakRefDirective = "\t.weak\t";
+
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
new file mode 100644
index 000000000000..a20bc471c20d
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -0,0 +1,27 @@
+//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AArch64MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETASMINFO_H
+#define LLVM_AARCH64TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+  struct AArch64ELFMCAsmInfo : public MCAsmInfo {
+    explicit AArch64ELFMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
new file mode 100644
index 000000000000..a5c591eee800
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -0,0 +1,502 @@
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
+  AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  MCContext &Ctx;
+
+public:
+  AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {}
+
+  ~AArch64MCCodeEmitter() {}
+
+  unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  template<int MemSize>
+  unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const {
+    return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize);
+  }
+
+  unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups,
+                                    int MemSize) const;
+
+  unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  // Labels are handled mostly the same way: a symbol is needed, and
+  // just gets some fixup attached.
+  template<AArch64::Fixups fixupDesired>
+  unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned  getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  unsigned getAddressWithFixup(const MCOperand &MO,
+                               unsigned FixupKind,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
+  }
+
+  void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      EmitByte(Val & 0xff, OS);
+      Val >>= 8;
+    }
+  }
+
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  template<int hasRs, int hasRt2> unsigned
+  fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
+
+  unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const;
+
+  unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const;
+
+
+};
+
+} // end anonymous namespace
+
+unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
+                                       unsigned FixupKind,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (!MO.isExpr()) {
+    // This can occur for manually decoded or constructed MCInsts, but neither
+    // the assembly-parser nor instruction selection will currently produce an
+    // MCInst that's not a symbol reference.
+    assert(MO.isImm() && "Unexpected address requested");
+    return MO.getImm();
+  }
+
+  const MCExpr *Expr = MO.getExpr();
+  MCFixupKind Kind = MCFixupKind(FixupKind);
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+  return 0;
+}
+
+unsigned AArch64MCCodeEmitter::
+getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups,
+                       int MemSize) const {
+  const MCOperand &ImmOp = MI.getOperand(OpIdx);
+  if (ImmOp.isImm())
+    return ImmOp.getImm();
+
+  assert(ImmOp.isExpr() && "Unexpected operand type");
+  const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
+  unsigned FixupKind;
+
+
+  switch (Expr->getKind()) {
+  default: llvm_unreachable("Unexpected operand modifier");
+  case AArch64MCExpr::VK_AARCH64_LO12: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
+                                AArch64::fixup_a64_ldst16_lo12,
+                                AArch64::fixup_a64_ldst32_lo12,
+                                AArch64::fixup_a64_ldst64_lo12,
+                                AArch64::fixup_a64_ldst128_lo12 };
+    assert(MemSize <= 16 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_GOT_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
+    break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:  {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12,
+                                AArch64::fixup_a64_ldst16_dtprel_lo12,
+                                AArch64::fixup_a64_ldst32_dtprel_lo12,
+                                AArch64::fixup_a64_ldst64_dtprel_lo12 };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
+                                AArch64::fixup_a64_ldst64_dtprel_lo12_nc };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
+    break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12,
+                                AArch64::fixup_a64_ldst16_tprel_lo12,
+                                AArch64::fixup_a64_ldst32_tprel_lo12,
+                                AArch64::fixup_a64_ldst64_tprel_lo12 };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
+    unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst16_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst32_tprel_lo12_nc,
+                                AArch64::fixup_a64_ldst64_tprel_lo12_nc };
+    assert(MemSize <= 8 && "Invalid fixup for operation");
+    FixupKind = FixupsBySize[Log2_32(MemSize)];
+    break;
+  }
+  case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+    assert(MemSize == 8 && "Invalid fixup for operation");
+    FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
+    break;
+  }
+
+  return getAddressWithFixup(ImmOp, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  unsigned FixupKind = 0;
+  switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
+  default: llvm_unreachable("Invalid expression modifier");
+  case AArch64MCExpr::VK_AARCH64_LO12:
+    FixupKind = AArch64::fixup_a64_add_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
+    FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
+    FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
+    FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
+    FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
+    FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
+    FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+    FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
+  if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
+    Modifier = Expr->getKind();
+
+  unsigned FixupKind = 0;
+  switch(Modifier) {
+  case AArch64MCExpr::VK_AARCH64_None:
+    FixupKind = AArch64::fixup_a64_adr_prel_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_GOT:
+    FixupKind = AArch64::fixup_a64_adr_prel_got_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL:
+    FixupKind = AArch64::fixup_a64_adr_gottprel_page;
+    break;
+  case AArch64MCExpr::VK_AARCH64_TLSDESC:
+    FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
+    break;
+  default:
+    llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Only immediate expected for shift");
+
+  return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  assert(MO.isImm() && "Only immediate expected for shift");
+
+  return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+}
+
+
+template<AArch64::Fixups fixupDesired> unsigned
+AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
+                                      unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  if (MO.isExpr())
+    return getAddressWithFixup(MO, fixupDesired, Fixups);
+
+  assert(MO.isImm());
+  return MO.getImm();
+}
+
+unsigned
+AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
+                                       unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  if (MO.isImm())
+    return MO.getImm();
+
+  assert(MO.isExpr());
+
+  unsigned FixupKind;
+  if (isa<AArch64MCExpr>(MO.getExpr())) {
+    assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
+           == AArch64MCExpr::VK_AARCH64_GOTTPREL
+           && "Invalid symbol modifier for literal load");
+    FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
+  } else {
+    FixupKind = AArch64::fixup_a64_ld_prel;
+  }
+
+  return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                       const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  }
+
+  llvm_unreachable("Unable to encode MCOperand!");
+  return 0;
+}
+
+unsigned
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &UImm16MO = MI.getOperand(OpIdx);
+  const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+
+  unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+
+  if (UImm16MO.isImm()) {
+    Result |= UImm16MO.getImm();
+    return Result;
+  }
+
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+  AArch64::Fixups requestedFixup;
+  switch (A64E->getKind()) {
+  default: llvm_unreachable("unexpected expression modifier");
+  case AArch64MCExpr::VK_AARCH64_ABS_G0:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G1:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G2:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
+  case AArch64MCExpr::VK_AARCH64_ABS_G3:
+    requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G0:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G1:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
+  case AArch64MCExpr::VK_AARCH64_SABS_G2:
+    requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+    requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
+  }
+
+  return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
+}
+
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+                                            unsigned EncodedValue) const {
+  if (!hasRs) EncodedValue |= 0x001F0000;
+  if (!hasRt2) EncodedValue |= 0x00007C00;
+
+  return EncodedValue;
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const {
+  // If one of the signed fixup kinds is applied to a MOVZ instruction, the
+  // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
+  // job to ensure that any bits possibly affected by this are 0. This means we
+  // must zero out bit 30 (essentially emitting a MOVN).
+  MCOperand UImm16MO = MI.getOperand(1);
+
+  // Nothing to do if there's no fixup.
+  if (UImm16MO.isImm())
+    return EncodedValue;
+
+  const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+  switch (A64E->getKind()) {
+  case AArch64MCExpr::VK_AARCH64_SABS_G0:
+  case AArch64MCExpr::VK_AARCH64_SABS_G1:
+  case AArch64MCExpr::VK_AARCH64_SABS_G2:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+  case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+  case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+    return EncodedValue & ~(1u << 30);
+  default:
+    // Nothing to do for an unsigned fixup.
+    return EncodedValue;
+  }
+
+  llvm_unreachable("Should have returned by now");
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+                                 unsigned EncodedValue) const {
+  // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+  // (i.e. all bits 1) but is ignored by the processor.
+  EncodedValue |= 0x1f << 10;
+  return EncodedValue;
+}
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &STI,
+                                                MCContext &Ctx) {
+  return new AArch64MCCodeEmitter(Ctx);
+}
+
+void AArch64MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+    // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+    // following (BLR) instruction. It doesn't emit any code itself so it
+    // doesn't go through the normal TableGenerated channels.
+    MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
+    const MCExpr *Expr;
+    Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
+    Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
+    return;
+  }
+
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+  EmitInstruction(Binary, OS);
+}
+
+
+#include "AArch64GenMCCodeEmitter.inc"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
new file mode 100644
index 000000000000..c1abfe74dfdd
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -0,0 +1,178 @@
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the assembly expression modifiers
+// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64mcexpr"
+#include "AArch64MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/Object/ELF.h"
+
+using namespace llvm;
+
+const AArch64MCExpr*
+AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                      MCContext &Ctx) {
+  return new (Ctx) AArch64MCExpr(Kind, Expr);
+}
+
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: llvm_unreachable("Invalid kind!");
+  case VK_AARCH64_GOT:              OS << ":got:"; break;
+  case VK_AARCH64_GOT_LO12:         OS << ":got_lo12:"; break;
+  case VK_AARCH64_LO12:             OS << ":lo12:"; break;
+  case VK_AARCH64_ABS_G0:           OS << ":abs_g0:"; break;
+  case VK_AARCH64_ABS_G0_NC:        OS << ":abs_g0_nc:"; break;
+  case VK_AARCH64_ABS_G1:           OS << ":abs_g1:"; break;
+  case VK_AARCH64_ABS_G1_NC:        OS << ":abs_g1_nc:"; break;
+  case VK_AARCH64_ABS_G2:           OS << ":abs_g2:"; break;
+  case VK_AARCH64_ABS_G2_NC:        OS << ":abs_g2_nc:"; break;
+  case VK_AARCH64_ABS_G3:           OS << ":abs_g3:"; break;
+  case VK_AARCH64_SABS_G0:          OS << ":abs_g0_s:"; break;
+  case VK_AARCH64_SABS_G1:          OS << ":abs_g1_s:"; break;
+  case VK_AARCH64_SABS_G2:          OS << ":abs_g2_s:"; break;
+  case VK_AARCH64_DTPREL_G2:        OS << ":dtprel_g2:"; break;
+  case VK_AARCH64_DTPREL_G1:        OS << ":dtprel_g1:"; break;
+  case VK_AARCH64_DTPREL_G1_NC:     OS << ":dtprel_g1_nc:"; break;
+  case VK_AARCH64_DTPREL_G0:        OS << ":dtprel_g0:"; break;
+  case VK_AARCH64_DTPREL_G0_NC:     OS << ":dtprel_g0_nc:"; break;
+  case VK_AARCH64_DTPREL_HI12:      OS << ":dtprel_hi12:"; break;
+  case VK_AARCH64_DTPREL_LO12:      OS << ":dtprel_lo12:"; break;
+  case VK_AARCH64_DTPREL_LO12_NC:   OS << ":dtprel_lo12_nc:"; break;
+  case VK_AARCH64_GOTTPREL_G1:      OS << ":gottprel_g1:"; break;
+  case VK_AARCH64_GOTTPREL_G0_NC:   OS << ":gottprel_g0_nc:"; break;
+  case VK_AARCH64_GOTTPREL:         OS << ":gottprel:"; break;
+  case VK_AARCH64_GOTTPREL_LO12:    OS << ":gottprel_lo12:"; break;
+  case VK_AARCH64_TPREL_G2:         OS << ":tprel_g2:"; break;
+  case VK_AARCH64_TPREL_G1:         OS << ":tprel_g1:"; break;
+  case VK_AARCH64_TPREL_G1_NC:      OS << ":tprel_g1_nc:"; break;
+  case VK_AARCH64_TPREL_G0:         OS << ":tprel_g0:"; break;
+  case VK_AARCH64_TPREL_G0_NC:      OS << ":tprel_g0_nc:"; break;
+  case VK_AARCH64_TPREL_HI12:       OS << ":tprel_hi12:"; break;
+  case VK_AARCH64_TPREL_LO12:       OS << ":tprel_lo12:"; break;
+  case VK_AARCH64_TPREL_LO12_NC:    OS << ":tprel_lo12_nc:"; break;
+  case VK_AARCH64_TLSDESC:          OS << ":tlsdesc:"; break;
+  case VK_AARCH64_TLSDESC_LO12:     OS << ":tlsdesc_lo12:"; break;
+
+  }
+
+  const MCExpr *Expr = getSubExpr();
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << '(';
+  Expr->print(OS);
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << ')';
+}
+
+bool
+AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                         const MCAsmLayout *Layout) const {
+  return getSubExpr()->EvaluateAsRelocatable(Res, *Layout);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+  switch (Expr->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expression");
+    break;
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    // We're known to be under a TLS fixup, so any symbol should be
+    // modified. There should be only one.
+    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+    MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+  switch (getKind()) {
+  default:
+    return;
+  case VK_AARCH64_DTPREL_G2:
+  case VK_AARCH64_DTPREL_G1:
+  case VK_AARCH64_DTPREL_G1_NC:
+  case VK_AARCH64_DTPREL_G0:
+  case VK_AARCH64_DTPREL_G0_NC:
+  case VK_AARCH64_DTPREL_HI12:
+  case VK_AARCH64_DTPREL_LO12:
+  case VK_AARCH64_DTPREL_LO12_NC:
+  case VK_AARCH64_GOTTPREL_G1:
+  case VK_AARCH64_GOTTPREL_G0_NC:
+  case VK_AARCH64_GOTTPREL:
+  case VK_AARCH64_GOTTPREL_LO12:
+  case VK_AARCH64_TPREL_G2:
+  case VK_AARCH64_TPREL_G1:
+  case VK_AARCH64_TPREL_G1_NC:
+  case VK_AARCH64_TPREL_G0:
+  case VK_AARCH64_TPREL_G0_NC:
+  case VK_AARCH64_TPREL_HI12:
+  case VK_AARCH64_TPREL_LO12:
+  case VK_AARCH64_TPREL_LO12_NC:
+  case VK_AARCH64_TLSDESC:
+  case VK_AARCH64_TLSDESC_LO12:
+    break;
+  }
+
+  fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbolsImpl(BE->getLHS(), Asm);
+    AddValueSymbolsImpl(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbolsImpl(getSubExpr(), Asm);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
new file mode 100644
index 000000000000..c0e3b29474d1
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -0,0 +1,167 @@
+//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes AArch64-specific MCExprs, used for modifiers like
+// ":lo12:" or ":gottprel_g1:".
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCEXPR_H
+#define LLVM_AARCH64MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class AArch64MCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_AARCH64_None,
+    VK_AARCH64_GOT,      // :got: modifier in assembly
+    VK_AARCH64_GOT_LO12, // :got_lo12:
+    VK_AARCH64_LO12,     // :lo12:
+
+    VK_AARCH64_ABS_G0, // :abs_g0:
+    VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
+    VK_AARCH64_ABS_G1,
+    VK_AARCH64_ABS_G1_NC,
+    VK_AARCH64_ABS_G2,
+    VK_AARCH64_ABS_G2_NC,
+    VK_AARCH64_ABS_G3,
+
+    VK_AARCH64_SABS_G0, // :abs_g0_s:
+    VK_AARCH64_SABS_G1,
+    VK_AARCH64_SABS_G2,
+
+    VK_AARCH64_DTPREL_G2, // :dtprel_g2:
+    VK_AARCH64_DTPREL_G1,
+    VK_AARCH64_DTPREL_G1_NC,
+    VK_AARCH64_DTPREL_G0,
+    VK_AARCH64_DTPREL_G0_NC,
+    VK_AARCH64_DTPREL_HI12,
+    VK_AARCH64_DTPREL_LO12,
+    VK_AARCH64_DTPREL_LO12_NC,
+
+    VK_AARCH64_GOTTPREL_G1, // :gottprel:
+    VK_AARCH64_GOTTPREL_G0_NC,
+    VK_AARCH64_GOTTPREL,
+    VK_AARCH64_GOTTPREL_LO12,
+
+    VK_AARCH64_TPREL_G2, // :tprel:
+    VK_AARCH64_TPREL_G1,
+    VK_AARCH64_TPREL_G1_NC,
+    VK_AARCH64_TPREL_G0,
+    VK_AARCH64_TPREL_G0_NC,
+    VK_AARCH64_TPREL_HI12,
+    VK_AARCH64_TPREL_LO12,
+    VK_AARCH64_TPREL_LO12_NC,
+
+    VK_AARCH64_TLSDESC, // :tlsdesc:
+    VK_AARCH64_TLSDESC_LO12
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                     MCContext &Ctx);
+
+  static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_AARCH64_GOT, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr,
+                                            MCContext &Ctx) {
+    return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
+                                                 MCContext &Ctx) {
+    return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
+                                            MCContext &Ctx) {
+    return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
+                                             MCContext &Ctx) {
+    return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
+                                                MCContext &Ctx) {
+    return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const {
+    return getSubExpr()->FindAssociatedSection();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const AArch64MCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
new file mode 100644
index 000000000000..7960db08c8d6
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -0,0 +1,194 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
+                                                          StringRef CPU,
+                                                          StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitAArch64MCSubtargetInfo(X, TT, CPU, "");
+  return X;
+}
+
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAArch64MCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAArch64MCRegisterInfo(X, AArch64::X30);
+  return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(AArch64::XSP, 0);
+  MAI->addInitialFrameState(0, Dst, Src);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
+    // On ELF platforms the default static relocation model has a smart enough
+    // linker to cope with referencing external symbols defined in a shared
+    // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+    RM = Reloc::Static;
+  }
+
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Small;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+                                    MCContext &Ctx, MCAsmBackend &MAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+
+  return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  if (SyntaxVariant == 0)
+    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+  return 0;
+}
+
+namespace {
+
+class AArch64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+  AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+  virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+    if (Inst.getOpcode() == AArch64::Bcc
+        && Inst.getOperand(0).getImm() == A64CC::AL)
+      return true;
+    return MCInstrAnalysis::isUnconditionalBranch(Inst);
+  }
+
+  virtual bool isConditionalBranch(const MCInst &Inst) const {
+    if (Inst.getOpcode() == AArch64::Bcc
+        && Inst.getOperand(0).getImm() == A64CC::AL)
+      return false;
+    return MCInstrAnalysis::isConditionalBranch(Inst);
+  }
+
+  uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+                          uint64_t Size) const {
+    unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
+    // FIXME: We only handle PCRel branches for now.
+    if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
+        != MCOI::OPERAND_PCREL)
+      return -1ULL;
+
+    int64_t Imm = Inst.getOperand(LblOperand).getImm();
+
+    return Addr + Imm;
+  }
+};
+
+}
+
+static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
+  return new AArch64MCInstrAnalysis(Info);
+}
+
+
+
+extern "C" void LLVMInitializeAArch64TargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target,
+                                        createAArch64MCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheAArch64Target,
+                                      createAArch64MCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheAArch64Target,
+                                    createAArch64MCRegisterInfo);
+
+  // Register the MC subtarget info.
+  using AArch64_MC::createAArch64MCSubtargetInfo;
+  TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target,
+                                          createAArch64MCSubtargetInfo);
+
+  // Register the MC instruction analyzer.
+  TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target,
+                                          createAArch64MCInstrAnalysis);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target,
+                                        createAArch64MCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterMCAsmBackend(TheAArch64Target,
+                                       createAArch64AsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target,
+                                           createMCStreamer);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheAArch64Target,
+                                        createAArch64MCInstPrinter);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
new file mode 100644
index 000000000000..3849fe379513
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -0,0 +1,65 @@
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCTARGETDESC_H
+#define LLVM_AARCH64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheAArch64Target;
+
+namespace AArch64_MC {
+  MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                StringRef FS);
+}
+
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
+                                          const MCSubtargetInfo &STI,
+                                          MCContext &Ctx);
+
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
+                                             uint8_t OSABI);
+
+MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
+                                      StringRef CPU);
+
+} // End llvm namespace
+
+// Defines symbolic names for AArch64 registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AArch64GenRegisterInfo.inc"
+
+// Defines symbolic names for the AArch64 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AArch64GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..44c66a224e30
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMAArch64Desc
+  AArch64AsmBackend.cpp
+  AArch64ELFObjectWriter.cpp
+  AArch64ELFStreamer.cpp
+  AArch64MCAsmInfo.cpp
+  AArch64MCCodeEmitter.cpp
+  AArch64MCExpr.cpp
+  AArch64MCTargetDesc.cpp
+  )
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..37c8035a49f9
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Desc
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Info MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..5779ac5ac60a
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile
new file mode 100644
index 000000000000..641bb83c4775
--- /dev/null
+++ b/lib/Target/AArch64/Makefile
@@ -0,0 +1,30 @@
+##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAArch64CodeGen
+TARGET = AArch64
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AArch64GenAsmMatcher.inc \
+   AArch64GenAsmWriter.inc \
+   AArch64GenCallingConv.inc \
+   AArch64GenDAGISel.inc \
+   AArch64GenDisassemblerTables.inc \
+   AArch64GenInstrInfo.inc \
+   AArch64GenMCCodeEmitter.inc \
+   AArch64GenMCPseudoLowering.inc \
+   AArch64GenRegisterInfo.inc \
+   AArch64GenSubtargetInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
+
+include $(LEVEL)/Makefile.common
+
+
diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt
new file mode 100644
index 000000000000..601990f17dee
--- /dev/null
+++ b/lib/Target/AArch64/README.txt
@@ -0,0 +1,2 @@
+This file will contain changes that need to be made before AArch64 can become an
+officially supported target. Currently a placeholder.
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
new file mode 100644
index 000000000000..b8099cb26b0f
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the key registration step for the architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheAArch64Target;
+
+extern "C" void LLVMInitializeAArch64TargetInfo() {
+  RegisterTarget<Triple::aarch64>
+    X(TheAArch64Target, "aarch64", "AArch64");
+}
diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..e236eed00be1
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Info
+  AArch64TargetInfo.cpp
+  )
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..5b003f012218
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Info
+parent = AArch64
+required_libraries = MC Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile
new file mode 100644
index 000000000000..9dc9aa4bccf7
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
new file mode 100644
index 000000000000..1678559aa084
--- /dev/null
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -0,0 +1,1103 @@
+//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides basic encoding and assembly information for AArch64.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm;
+
+StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+  for (unsigned i = 0; i < NumPairs; ++i) {
+    if (Pairs[i].Value == Value) {
+      Valid = true;
+      return Pairs[i].Name;
+    }
+  }
+
+  Valid = false;
+  return StringRef();
+}
+
+uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+  std::string LowerCaseName = Name.lower();
+  for (unsigned i = 0; i < NumPairs; ++i) {
+    if (Pairs[i].Name == LowerCaseName) {
+      Valid = true;
+      return Pairs[i].Value;
+    }
+  }
+
+  Valid = false;
+  return -1;
+}
+
+bool NamedImmMapper::validImm(uint32_t Value) const {
+  return Value < TooBigImm;
+}
+
+const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+  {"s1e1r", S1E1R},
+  {"s1e2r", S1E2R},
+  {"s1e3r", S1E3R},
+  {"s1e1w", S1E1W},
+  {"s1e2w", S1E2W},
+  {"s1e3w", S1E3W},
+  {"s1e0r", S1E0R},
+  {"s1e0w", S1E0W},
+  {"s12e1r", S12E1R},
+  {"s12e1w", S12E1W},
+  {"s12e0r", S12E0R},
+  {"s12e0w", S12E0W},
+};
+
+A64AT::ATMapper::ATMapper()
+  : NamedImmMapper(ATPairs, 0) {}
+
+const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+  {"oshld", OSHLD},
+  {"oshst", OSHST},
+  {"osh", OSH},
+  {"nshld", NSHLD},
+  {"nshst", NSHST},
+  {"nsh", NSH},
+  {"ishld", ISHLD},
+  {"ishst", ISHST},
+  {"ish", ISH},
+  {"ld", LD},
+  {"st", ST},
+  {"sy", SY}
+};
+
+A64DB::DBarrierMapper::DBarrierMapper()
+  : NamedImmMapper(DBarrierPairs, 16u) {}
+
+const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+  {"zva", ZVA},
+  {"ivac", IVAC},
+  {"isw", ISW},
+  {"cvac", CVAC},
+  {"csw", CSW},
+  {"cvau", CVAU},
+  {"civac", CIVAC},
+  {"cisw", CISW}
+};
+
+A64DC::DCMapper::DCMapper()
+  : NamedImmMapper(DCPairs, 0) {}
+
+const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+  {"ialluis",  IALLUIS},
+  {"iallu", IALLU},
+  {"ivau", IVAU}
+};
+
+A64IC::ICMapper::ICMapper()
+  : NamedImmMapper(ICPairs, 0) {}
+
+const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+  {"sy",  SY},
+};
+
+A64ISB::ISBMapper::ISBMapper()
+  : NamedImmMapper(ISBPairs, 16) {}
+
+const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+  {"pldl1keep", PLDL1KEEP},
+  {"pldl1strm", PLDL1STRM},
+  {"pldl2keep", PLDL2KEEP},
+  {"pldl2strm", PLDL2STRM},
+  {"pldl3keep", PLDL3KEEP},
+  {"pldl3strm", PLDL3STRM},
+  {"plil1keep", PLIL1KEEP},
+  {"plil1strm", PLIL1STRM},
+  {"plil2keep", PLIL2KEEP},
+  {"plil2strm", PLIL2STRM},
+  {"plil3keep", PLIL3KEEP},
+  {"plil3strm", PLIL3STRM},
+  {"pstl1keep", PSTL1KEEP},
+  {"pstl1strm", PSTL1STRM},
+  {"pstl2keep", PSTL2KEEP},
+  {"pstl2strm", PSTL2STRM},
+  {"pstl3keep", PSTL3KEEP},
+  {"pstl3strm", PSTL3STRM}
+};
+
+A64PRFM::PRFMMapper::PRFMMapper()
+  : NamedImmMapper(PRFMPairs, 32) {}
+
+const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+  {"spsel", SPSel},
+  {"daifset", DAIFSet},
+  {"daifclr", DAIFClr}
+};
+
+A64PState::PStateMapper::PStateMapper()
+  : NamedImmMapper(PStatePairs, 0) {}
+
+const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+  {"mdccsr_el0", MDCCSR_EL0},
+  {"dbgdtrrx_el0", DBGDTRRX_EL0},
+  {"mdrar_el1", MDRAR_EL1},
+  {"oslsr_el1", OSLSR_EL1},
+  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
+  {"pmceid0_el0", PMCEID0_EL0},
+  {"pmceid1_el0", PMCEID1_EL0},
+  {"midr_el1", MIDR_EL1},
+  {"ccsidr_el1", CCSIDR_EL1},
+  {"clidr_el1", CLIDR_EL1},
+  {"ctr_el0", CTR_EL0},
+  {"mpidr_el1", MPIDR_EL1},
+  {"revidr_el1", REVIDR_EL1},
+  {"aidr_el1", AIDR_EL1},
+  {"dczid_el0", DCZID_EL0},
+  {"id_pfr0_el1", ID_PFR0_EL1},
+  {"id_pfr1_el1", ID_PFR1_EL1},
+  {"id_dfr0_el1", ID_DFR0_EL1},
+  {"id_afr0_el1", ID_AFR0_EL1},
+  {"id_mmfr0_el1", ID_MMFR0_EL1},
+  {"id_mmfr1_el1", ID_MMFR1_EL1},
+  {"id_mmfr2_el1", ID_MMFR2_EL1},
+  {"id_mmfr3_el1", ID_MMFR3_EL1},
+  {"id_isar0_el1", ID_ISAR0_EL1},
+  {"id_isar1_el1", ID_ISAR1_EL1},
+  {"id_isar2_el1", ID_ISAR2_EL1},
+  {"id_isar3_el1", ID_ISAR3_EL1},
+  {"id_isar4_el1", ID_ISAR4_EL1},
+  {"id_isar5_el1", ID_ISAR5_EL1},
+  {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
+  {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
+  {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
+  {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
+  {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
+  {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
+  {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
+  {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
+  {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
+  {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+  {"mvfr0_el1", MVFR0_EL1},
+  {"mvfr1_el1", MVFR1_EL1},
+  {"mvfr2_el1", MVFR2_EL1},
+  {"rvbar_el1", RVBAR_EL1},
+  {"rvbar_el2", RVBAR_EL2},
+  {"rvbar_el3", RVBAR_EL3},
+  {"isr_el1", ISR_EL1},
+  {"cntpct_el0", CNTPCT_EL0},
+  {"cntvct_el0", CNTVCT_EL0},
+
+  // Trace registers
+  {"trcstatr", TRCSTATR},
+  {"trcidr8", TRCIDR8},
+  {"trcidr9", TRCIDR9},
+  {"trcidr10", TRCIDR10},
+  {"trcidr11", TRCIDR11},
+  {"trcidr12", TRCIDR12},
+  {"trcidr13", TRCIDR13},
+  {"trcidr0", TRCIDR0},
+  {"trcidr1", TRCIDR1},
+  {"trcidr2", TRCIDR2},
+  {"trcidr3", TRCIDR3},
+  {"trcidr4", TRCIDR4},
+  {"trcidr5", TRCIDR5},
+  {"trcidr6", TRCIDR6},
+  {"trcidr7", TRCIDR7},
+  {"trcoslsr", TRCOSLSR},
+  {"trcpdsr", TRCPDSR},
+  {"trcdevaff0", TRCDEVAFF0},
+  {"trcdevaff1", TRCDEVAFF1},
+  {"trclsr", TRCLSR},
+  {"trcauthstatus", TRCAUTHSTATUS},
+  {"trcdevarch", TRCDEVARCH},
+  {"trcdevid", TRCDEVID},
+  {"trcdevtype", TRCDEVTYPE},
+  {"trcpidr4", TRCPIDR4},
+  {"trcpidr5", TRCPIDR5},
+  {"trcpidr6", TRCPIDR6},
+  {"trcpidr7", TRCPIDR7},
+  {"trcpidr0", TRCPIDR0},
+  {"trcpidr1", TRCPIDR1},
+  {"trcpidr2", TRCPIDR2},
+  {"trcpidr3", TRCPIDR3},
+  {"trccidr0", TRCCIDR0},
+  {"trccidr1", TRCCIDR1},
+  {"trccidr2", TRCCIDR2},
+  {"trccidr3", TRCCIDR3},
+
+  // GICv3 registers
+  {"icc_iar1_el1", ICC_IAR1_EL1},
+  {"icc_iar0_el1", ICC_IAR0_EL1},
+  {"icc_hppir1_el1", ICC_HPPIR1_EL1},
+  {"icc_hppir0_el1", ICC_HPPIR0_EL1},
+  {"icc_rpr_el1", ICC_RPR_EL1},
+  {"ich_vtr_el2", ICH_VTR_EL2},
+  {"ich_eisr_el2", ICH_EISR_EL2},
+  {"ich_elsr_el2", ICH_ELSR_EL2}
+};
+
+A64SysReg::MRSMapper::MRSMapper() {
+    InstPairs = &MRSPairs[0];
+    NumInstPairs = llvm::array_lengthof(MRSPairs);
+}
+
+const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+  {"dbgdtrtx_el0", DBGDTRTX_EL0},
+  {"oslar_el1", OSLAR_EL1},
+  {"pmswinc_el0", PMSWINC_EL0},
+
+  // Trace registers
+  {"trcoslar", TRCOSLAR},
+  {"trclar", TRCLAR},
+
+  // GICv3 registers
+  {"icc_eoir1_el1", ICC_EOIR1_EL1},
+  {"icc_eoir0_el1", ICC_EOIR0_EL1},
+  {"icc_dir_el1", ICC_DIR_EL1},
+  {"icc_sgi1r_el1", ICC_SGI1R_EL1},
+  {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
+  {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+};
+
+A64SysReg::MSRMapper::MSRMapper() {
+    InstPairs = &MSRPairs[0];
+    NumInstPairs = llvm::array_lengthof(MSRPairs);
+}
+
+
+const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+  {"osdtrrx_el1", OSDTRRX_EL1},
+  {"osdtrtx_el1",  OSDTRTX_EL1},
+  {"teecr32_el1", TEECR32_EL1},
+  {"mdccint_el1", MDCCINT_EL1},
+  {"mdscr_el1", MDSCR_EL1},
+  {"dbgdtr_el0", DBGDTR_EL0},
+  {"oseccr_el1", OSECCR_EL1},
+  {"dbgvcr32_el2", DBGVCR32_EL2},
+  {"dbgbvr0_el1", DBGBVR0_EL1},
+  {"dbgbvr1_el1", DBGBVR1_EL1},
+  {"dbgbvr2_el1", DBGBVR2_EL1},
+  {"dbgbvr3_el1", DBGBVR3_EL1},
+  {"dbgbvr4_el1", DBGBVR4_EL1},
+  {"dbgbvr5_el1", DBGBVR5_EL1},
+  {"dbgbvr6_el1", DBGBVR6_EL1},
+  {"dbgbvr7_el1", DBGBVR7_EL1},
+  {"dbgbvr8_el1", DBGBVR8_EL1},
+  {"dbgbvr9_el1", DBGBVR9_EL1},
+  {"dbgbvr10_el1", DBGBVR10_EL1},
+  {"dbgbvr11_el1", DBGBVR11_EL1},
+  {"dbgbvr12_el1", DBGBVR12_EL1},
+  {"dbgbvr13_el1", DBGBVR13_EL1},
+  {"dbgbvr14_el1", DBGBVR14_EL1},
+  {"dbgbvr15_el1", DBGBVR15_EL1},
+  {"dbgbcr0_el1", DBGBCR0_EL1},
+  {"dbgbcr1_el1", DBGBCR1_EL1},
+  {"dbgbcr2_el1", DBGBCR2_EL1},
+  {"dbgbcr3_el1", DBGBCR3_EL1},
+  {"dbgbcr4_el1", DBGBCR4_EL1},
+  {"dbgbcr5_el1", DBGBCR5_EL1},
+  {"dbgbcr6_el1", DBGBCR6_EL1},
+  {"dbgbcr7_el1", DBGBCR7_EL1},
+  {"dbgbcr8_el1", DBGBCR8_EL1},
+  {"dbgbcr9_el1", DBGBCR9_EL1},
+  {"dbgbcr10_el1", DBGBCR10_EL1},
+  {"dbgbcr11_el1", DBGBCR11_EL1},
+  {"dbgbcr12_el1", DBGBCR12_EL1},
+  {"dbgbcr13_el1", DBGBCR13_EL1},
+  {"dbgbcr14_el1", DBGBCR14_EL1},
+  {"dbgbcr15_el1", DBGBCR15_EL1},
+  {"dbgwvr0_el1", DBGWVR0_EL1},
+  {"dbgwvr1_el1", DBGWVR1_EL1},
+  {"dbgwvr2_el1", DBGWVR2_EL1},
+  {"dbgwvr3_el1", DBGWVR3_EL1},
+  {"dbgwvr4_el1", DBGWVR4_EL1},
+  {"dbgwvr5_el1", DBGWVR5_EL1},
+  {"dbgwvr6_el1", DBGWVR6_EL1},
+  {"dbgwvr7_el1", DBGWVR7_EL1},
+  {"dbgwvr8_el1", DBGWVR8_EL1},
+  {"dbgwvr9_el1", DBGWVR9_EL1},
+  {"dbgwvr10_el1", DBGWVR10_EL1},
+  {"dbgwvr11_el1", DBGWVR11_EL1},
+  {"dbgwvr12_el1", DBGWVR12_EL1},
+  {"dbgwvr13_el1", DBGWVR13_EL1},
+  {"dbgwvr14_el1", DBGWVR14_EL1},
+  {"dbgwvr15_el1", DBGWVR15_EL1},
+  {"dbgwcr0_el1", DBGWCR0_EL1},
+  {"dbgwcr1_el1", DBGWCR1_EL1},
+  {"dbgwcr2_el1", DBGWCR2_EL1},
+  {"dbgwcr3_el1", DBGWCR3_EL1},
+  {"dbgwcr4_el1", DBGWCR4_EL1},
+  {"dbgwcr5_el1", DBGWCR5_EL1},
+  {"dbgwcr6_el1", DBGWCR6_EL1},
+  {"dbgwcr7_el1", DBGWCR7_EL1},
+  {"dbgwcr8_el1", DBGWCR8_EL1},
+  {"dbgwcr9_el1", DBGWCR9_EL1},
+  {"dbgwcr10_el1", DBGWCR10_EL1},
+  {"dbgwcr11_el1", DBGWCR11_EL1},
+  {"dbgwcr12_el1", DBGWCR12_EL1},
+  {"dbgwcr13_el1", DBGWCR13_EL1},
+  {"dbgwcr14_el1", DBGWCR14_EL1},
+  {"dbgwcr15_el1", DBGWCR15_EL1},
+  {"teehbr32_el1", TEEHBR32_EL1},
+  {"osdlr_el1", OSDLR_EL1},
+  {"dbgprcr_el1", DBGPRCR_EL1},
+  {"dbgclaimset_el1", DBGCLAIMSET_EL1},
+  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
+  {"csselr_el1", CSSELR_EL1},
+  {"vpidr_el2", VPIDR_EL2},
+  {"vmpidr_el2", VMPIDR_EL2},
+  {"sctlr_el1", SCTLR_EL1},
+  {"sctlr_el2", SCTLR_EL2},
+  {"sctlr_el3", SCTLR_EL3},
+  {"actlr_el1", ACTLR_EL1},
+  {"actlr_el2", ACTLR_EL2},
+  {"actlr_el3", ACTLR_EL3},
+  {"cpacr_el1", CPACR_EL1},
+  {"hcr_el2", HCR_EL2},
+  {"scr_el3", SCR_EL3},
+  {"mdcr_el2", MDCR_EL2},
+  {"sder32_el3", SDER32_EL3},
+  {"cptr_el2", CPTR_EL2},
+  {"cptr_el3", CPTR_EL3},
+  {"hstr_el2", HSTR_EL2},
+  {"hacr_el2", HACR_EL2},
+  {"mdcr_el3", MDCR_EL3},
+  {"ttbr0_el1", TTBR0_EL1},
+  {"ttbr0_el2", TTBR0_EL2},
+  {"ttbr0_el3", TTBR0_EL3},
+  {"ttbr1_el1", TTBR1_EL1},
+  {"tcr_el1", TCR_EL1},
+  {"tcr_el2", TCR_EL2},
+  {"tcr_el3", TCR_EL3},
+  {"vttbr_el2", VTTBR_EL2},
+  {"vtcr_el2", VTCR_EL2},
+  {"dacr32_el2", DACR32_EL2},
+  {"spsr_el1", SPSR_EL1},
+  {"spsr_el2", SPSR_EL2},
+  {"spsr_el3", SPSR_EL3},
+  {"elr_el1", ELR_EL1},
+  {"elr_el2", ELR_EL2},
+  {"elr_el3", ELR_EL3},
+  {"sp_el0", SP_EL0},
+  {"sp_el1", SP_EL1},
+  {"sp_el2", SP_EL2},
+  {"spsel", SPSel},
+  {"nzcv", NZCV},
+  {"daif", DAIF},
+  {"currentel", CurrentEL},
+  {"spsr_irq", SPSR_irq},
+  {"spsr_abt", SPSR_abt},
+  {"spsr_und", SPSR_und},
+  {"spsr_fiq", SPSR_fiq},
+  {"fpcr", FPCR},
+  {"fpsr", FPSR},
+  {"dspsr_el0", DSPSR_EL0},
+  {"dlr_el0", DLR_EL0},
+  {"ifsr32_el2", IFSR32_EL2},
+  {"afsr0_el1", AFSR0_EL1},
+  {"afsr0_el2", AFSR0_EL2},
+  {"afsr0_el3", AFSR0_EL3},
+  {"afsr1_el1", AFSR1_EL1},
+  {"afsr1_el2", AFSR1_EL2},
+  {"afsr1_el3", AFSR1_EL3},
+  {"esr_el1", ESR_EL1},
+  {"esr_el2", ESR_EL2},
+  {"esr_el3", ESR_EL3},
+  {"fpexc32_el2", FPEXC32_EL2},
+  {"far_el1", FAR_EL1},
+  {"far_el2", FAR_EL2},
+  {"far_el3", FAR_EL3},
+  {"hpfar_el2", HPFAR_EL2},
+  {"par_el1", PAR_EL1},
+  {"pmcr_el0", PMCR_EL0},
+  {"pmcntenset_el0", PMCNTENSET_EL0},
+  {"pmcntenclr_el0", PMCNTENCLR_EL0},
+  {"pmovsclr_el0", PMOVSCLR_EL0},
+  {"pmselr_el0", PMSELR_EL0},
+  {"pmccntr_el0", PMCCNTR_EL0},
+  {"pmxevtyper_el0", PMXEVTYPER_EL0},
+  {"pmxevcntr_el0", PMXEVCNTR_EL0},
+  {"pmuserenr_el0", PMUSERENR_EL0},
+  {"pmintenset_el1", PMINTENSET_EL1},
+  {"pmintenclr_el1", PMINTENCLR_EL1},
+  {"pmovsset_el0", PMOVSSET_EL0},
+  {"mair_el1", MAIR_EL1},
+  {"mair_el2", MAIR_EL2},
+  {"mair_el3", MAIR_EL3},
+  {"amair_el1", AMAIR_EL1},
+  {"amair_el2", AMAIR_EL2},
+  {"amair_el3", AMAIR_EL3},
+  {"vbar_el1", VBAR_EL1},
+  {"vbar_el2", VBAR_EL2},
+  {"vbar_el3", VBAR_EL3},
+  {"rmr_el1", RMR_EL1},
+  {"rmr_el2", RMR_EL2},
+  {"rmr_el3", RMR_EL3},
+  {"contextidr_el1", CONTEXTIDR_EL1},
+  {"tpidr_el0", TPIDR_EL0},
+  {"tpidr_el2", TPIDR_EL2},
+  {"tpidr_el3", TPIDR_EL3},
+  {"tpidrro_el0", TPIDRRO_EL0},
+  {"tpidr_el1", TPIDR_EL1},
+  {"cntfrq_el0", CNTFRQ_EL0},
+  {"cntvoff_el2", CNTVOFF_EL2},
+  {"cntkctl_el1", CNTKCTL_EL1},
+  {"cnthctl_el2", CNTHCTL_EL2},
+  {"cntp_tval_el0", CNTP_TVAL_EL0},
+  {"cnthp_tval_el2", CNTHP_TVAL_EL2},
+  {"cntps_tval_el1", CNTPS_TVAL_EL1},
+  {"cntp_ctl_el0", CNTP_CTL_EL0},
+  {"cnthp_ctl_el2", CNTHP_CTL_EL2},
+  {"cntps_ctl_el1", CNTPS_CTL_EL1},
+  {"cntp_cval_el0", CNTP_CVAL_EL0},
+  {"cnthp_cval_el2", CNTHP_CVAL_EL2},
+  {"cntps_cval_el1", CNTPS_CVAL_EL1},
+  {"cntv_tval_el0", CNTV_TVAL_EL0},
+  {"cntv_ctl_el0", CNTV_CTL_EL0},
+  {"cntv_cval_el0", CNTV_CVAL_EL0},
+  {"pmevcntr0_el0", PMEVCNTR0_EL0},
+  {"pmevcntr1_el0", PMEVCNTR1_EL0},
+  {"pmevcntr2_el0", PMEVCNTR2_EL0},
+  {"pmevcntr3_el0", PMEVCNTR3_EL0},
+  {"pmevcntr4_el0", PMEVCNTR4_EL0},
+  {"pmevcntr5_el0", PMEVCNTR5_EL0},
+  {"pmevcntr6_el0", PMEVCNTR6_EL0},
+  {"pmevcntr7_el0", PMEVCNTR7_EL0},
+  {"pmevcntr8_el0", PMEVCNTR8_EL0},
+  {"pmevcntr9_el0", PMEVCNTR9_EL0},
+  {"pmevcntr10_el0", PMEVCNTR10_EL0},
+  {"pmevcntr11_el0", PMEVCNTR11_EL0},
+  {"pmevcntr12_el0", PMEVCNTR12_EL0},
+  {"pmevcntr13_el0", PMEVCNTR13_EL0},
+  {"pmevcntr14_el0", PMEVCNTR14_EL0},
+  {"pmevcntr15_el0", PMEVCNTR15_EL0},
+  {"pmevcntr16_el0", PMEVCNTR16_EL0},
+  {"pmevcntr17_el0", PMEVCNTR17_EL0},
+  {"pmevcntr18_el0", PMEVCNTR18_EL0},
+  {"pmevcntr19_el0", PMEVCNTR19_EL0},
+  {"pmevcntr20_el0", PMEVCNTR20_EL0},
+  {"pmevcntr21_el0", PMEVCNTR21_EL0},
+  {"pmevcntr22_el0", PMEVCNTR22_EL0},
+  {"pmevcntr23_el0", PMEVCNTR23_EL0},
+  {"pmevcntr24_el0", PMEVCNTR24_EL0},
+  {"pmevcntr25_el0", PMEVCNTR25_EL0},
+  {"pmevcntr26_el0", PMEVCNTR26_EL0},
+  {"pmevcntr27_el0", PMEVCNTR27_EL0},
+  {"pmevcntr28_el0", PMEVCNTR28_EL0},
+  {"pmevcntr29_el0", PMEVCNTR29_EL0},
+  {"pmevcntr30_el0", PMEVCNTR30_EL0},
+  {"pmccfiltr_el0", PMCCFILTR_EL0},
+  {"pmevtyper0_el0", PMEVTYPER0_EL0},
+  {"pmevtyper1_el0", PMEVTYPER1_EL0},
+  {"pmevtyper2_el0", PMEVTYPER2_EL0},
+  {"pmevtyper3_el0", PMEVTYPER3_EL0},
+  {"pmevtyper4_el0", PMEVTYPER4_EL0},
+  {"pmevtyper5_el0", PMEVTYPER5_EL0},
+  {"pmevtyper6_el0", PMEVTYPER6_EL0},
+  {"pmevtyper7_el0", PMEVTYPER7_EL0},
+  {"pmevtyper8_el0", PMEVTYPER8_EL0},
+  {"pmevtyper9_el0", PMEVTYPER9_EL0},
+  {"pmevtyper10_el0", PMEVTYPER10_EL0},
+  {"pmevtyper11_el0", PMEVTYPER11_EL0},
+  {"pmevtyper12_el0", PMEVTYPER12_EL0},
+  {"pmevtyper13_el0", PMEVTYPER13_EL0},
+  {"pmevtyper14_el0", PMEVTYPER14_EL0},
+  {"pmevtyper15_el0", PMEVTYPER15_EL0},
+  {"pmevtyper16_el0", PMEVTYPER16_EL0},
+  {"pmevtyper17_el0", PMEVTYPER17_EL0},
+  {"pmevtyper18_el0", PMEVTYPER18_EL0},
+  {"pmevtyper19_el0", PMEVTYPER19_EL0},
+  {"pmevtyper20_el0", PMEVTYPER20_EL0},
+  {"pmevtyper21_el0", PMEVTYPER21_EL0},
+  {"pmevtyper22_el0", PMEVTYPER22_EL0},
+  {"pmevtyper23_el0", PMEVTYPER23_EL0},
+  {"pmevtyper24_el0", PMEVTYPER24_EL0},
+  {"pmevtyper25_el0", PMEVTYPER25_EL0},
+  {"pmevtyper26_el0", PMEVTYPER26_EL0},
+  {"pmevtyper27_el0", PMEVTYPER27_EL0},
+  {"pmevtyper28_el0", PMEVTYPER28_EL0},
+  {"pmevtyper29_el0", PMEVTYPER29_EL0},
+  {"pmevtyper30_el0", PMEVTYPER30_EL0},
+
+  // Trace registers
+  {"trcprgctlr", TRCPRGCTLR},
+  {"trcprocselr", TRCPROCSELR},
+  {"trcconfigr", TRCCONFIGR},
+  {"trcauxctlr", TRCAUXCTLR},
+  {"trceventctl0r", TRCEVENTCTL0R},
+  {"trceventctl1r", TRCEVENTCTL1R},
+  {"trcstallctlr", TRCSTALLCTLR},
+  {"trctsctlr", TRCTSCTLR},
+  {"trcsyncpr", TRCSYNCPR},
+  {"trcccctlr", TRCCCCTLR},
+  {"trcbbctlr", TRCBBCTLR},
+  {"trctraceidr", TRCTRACEIDR},
+  {"trcqctlr", TRCQCTLR},
+  {"trcvictlr", TRCVICTLR},
+  {"trcviiectlr", TRCVIIECTLR},
+  {"trcvissctlr", TRCVISSCTLR},
+  {"trcvipcssctlr", TRCVIPCSSCTLR},
+  {"trcvdctlr", TRCVDCTLR},
+  {"trcvdsacctlr", TRCVDSACCTLR},
+  {"trcvdarcctlr", TRCVDARCCTLR},
+  {"trcseqevr0", TRCSEQEVR0},
+  {"trcseqevr1", TRCSEQEVR1},
+  {"trcseqevr2", TRCSEQEVR2},
+  {"trcseqrstevr", TRCSEQRSTEVR},
+  {"trcseqstr", TRCSEQSTR},
+  {"trcextinselr", TRCEXTINSELR},
+  {"trccntrldvr0", TRCCNTRLDVR0},
+  {"trccntrldvr1", TRCCNTRLDVR1},
+  {"trccntrldvr2", TRCCNTRLDVR2},
+  {"trccntrldvr3", TRCCNTRLDVR3},
+  {"trccntctlr0", TRCCNTCTLR0},
+  {"trccntctlr1", TRCCNTCTLR1},
+  {"trccntctlr2", TRCCNTCTLR2},
+  {"trccntctlr3", TRCCNTCTLR3},
+  {"trccntvr0", TRCCNTVR0},
+  {"trccntvr1", TRCCNTVR1},
+  {"trccntvr2", TRCCNTVR2},
+  {"trccntvr3", TRCCNTVR3},
+  {"trcimspec0", TRCIMSPEC0},
+  {"trcimspec1", TRCIMSPEC1},
+  {"trcimspec2", TRCIMSPEC2},
+  {"trcimspec3", TRCIMSPEC3},
+  {"trcimspec4", TRCIMSPEC4},
+  {"trcimspec5", TRCIMSPEC5},
+  {"trcimspec6", TRCIMSPEC6},
+  {"trcimspec7", TRCIMSPEC7},
+  {"trcrsctlr2", TRCRSCTLR2},
+  {"trcrsctlr3", TRCRSCTLR3},
+  {"trcrsctlr4", TRCRSCTLR4},
+  {"trcrsctlr5", TRCRSCTLR5},
+  {"trcrsctlr6", TRCRSCTLR6},
+  {"trcrsctlr7", TRCRSCTLR7},
+  {"trcrsctlr8", TRCRSCTLR8},
+  {"trcrsctlr9", TRCRSCTLR9},
+  {"trcrsctlr10", TRCRSCTLR10},
+  {"trcrsctlr11", TRCRSCTLR11},
+  {"trcrsctlr12", TRCRSCTLR12},
+  {"trcrsctlr13", TRCRSCTLR13},
+  {"trcrsctlr14", TRCRSCTLR14},
+  {"trcrsctlr15", TRCRSCTLR15},
+  {"trcrsctlr16", TRCRSCTLR16},
+  {"trcrsctlr17", TRCRSCTLR17},
+  {"trcrsctlr18", TRCRSCTLR18},
+  {"trcrsctlr19", TRCRSCTLR19},
+  {"trcrsctlr20", TRCRSCTLR20},
+  {"trcrsctlr21", TRCRSCTLR21},
+  {"trcrsctlr22", TRCRSCTLR22},
+  {"trcrsctlr23", TRCRSCTLR23},
+  {"trcrsctlr24", TRCRSCTLR24},
+  {"trcrsctlr25", TRCRSCTLR25},
+  {"trcrsctlr26", TRCRSCTLR26},
+  {"trcrsctlr27", TRCRSCTLR27},
+  {"trcrsctlr28", TRCRSCTLR28},
+  {"trcrsctlr29", TRCRSCTLR29},
+  {"trcrsctlr30", TRCRSCTLR30},
+  {"trcrsctlr31", TRCRSCTLR31},
+  {"trcssccr0", TRCSSCCR0},
+  {"trcssccr1", TRCSSCCR1},
+  {"trcssccr2", TRCSSCCR2},
+  {"trcssccr3", TRCSSCCR3},
+  {"trcssccr4", TRCSSCCR4},
+  {"trcssccr5", TRCSSCCR5},
+  {"trcssccr6", TRCSSCCR6},
+  {"trcssccr7", TRCSSCCR7},
+  {"trcsscsr0", TRCSSCSR0},
+  {"trcsscsr1", TRCSSCSR1},
+  {"trcsscsr2", TRCSSCSR2},
+  {"trcsscsr3", TRCSSCSR3},
+  {"trcsscsr4", TRCSSCSR4},
+  {"trcsscsr5", TRCSSCSR5},
+  {"trcsscsr6", TRCSSCSR6},
+  {"trcsscsr7", TRCSSCSR7},
+  {"trcsspcicr0", TRCSSPCICR0},
+  {"trcsspcicr1", TRCSSPCICR1},
+  {"trcsspcicr2", TRCSSPCICR2},
+  {"trcsspcicr3", TRCSSPCICR3},
+  {"trcsspcicr4", TRCSSPCICR4},
+  {"trcsspcicr5", TRCSSPCICR5},
+  {"trcsspcicr6", TRCSSPCICR6},
+  {"trcsspcicr7", TRCSSPCICR7},
+  {"trcpdcr", TRCPDCR},
+  {"trcacvr0", TRCACVR0},
+  {"trcacvr1", TRCACVR1},
+  {"trcacvr2", TRCACVR2},
+  {"trcacvr3", TRCACVR3},
+  {"trcacvr4", TRCACVR4},
+  {"trcacvr5", TRCACVR5},
+  {"trcacvr6", TRCACVR6},
+  {"trcacvr7", TRCACVR7},
+  {"trcacvr8", TRCACVR8},
+  {"trcacvr9", TRCACVR9},
+  {"trcacvr10", TRCACVR10},
+  {"trcacvr11", TRCACVR11},
+  {"trcacvr12", TRCACVR12},
+  {"trcacvr13", TRCACVR13},
+  {"trcacvr14", TRCACVR14},
+  {"trcacvr15", TRCACVR15},
+  {"trcacatr0", TRCACATR0},
+  {"trcacatr1", TRCACATR1},
+  {"trcacatr2", TRCACATR2},
+  {"trcacatr3", TRCACATR3},
+  {"trcacatr4", TRCACATR4},
+  {"trcacatr5", TRCACATR5},
+  {"trcacatr6", TRCACATR6},
+  {"trcacatr7", TRCACATR7},
+  {"trcacatr8", TRCACATR8},
+  {"trcacatr9", TRCACATR9},
+  {"trcacatr10", TRCACATR10},
+  {"trcacatr11", TRCACATR11},
+  {"trcacatr12", TRCACATR12},
+  {"trcacatr13", TRCACATR13},
+  {"trcacatr14", TRCACATR14},
+  {"trcacatr15", TRCACATR15},
+  {"trcdvcvr0", TRCDVCVR0},
+  {"trcdvcvr1", TRCDVCVR1},
+  {"trcdvcvr2", TRCDVCVR2},
+  {"trcdvcvr3", TRCDVCVR3},
+  {"trcdvcvr4", TRCDVCVR4},
+  {"trcdvcvr5", TRCDVCVR5},
+  {"trcdvcvr6", TRCDVCVR6},
+  {"trcdvcvr7", TRCDVCVR7},
+  {"trcdvcmr0", TRCDVCMR0},
+  {"trcdvcmr1", TRCDVCMR1},
+  {"trcdvcmr2", TRCDVCMR2},
+  {"trcdvcmr3", TRCDVCMR3},
+  {"trcdvcmr4", TRCDVCMR4},
+  {"trcdvcmr5", TRCDVCMR5},
+  {"trcdvcmr6", TRCDVCMR6},
+  {"trcdvcmr7", TRCDVCMR7},
+  {"trccidcvr0", TRCCIDCVR0},
+  {"trccidcvr1", TRCCIDCVR1},
+  {"trccidcvr2", TRCCIDCVR2},
+  {"trccidcvr3", TRCCIDCVR3},
+  {"trccidcvr4", TRCCIDCVR4},
+  {"trccidcvr5", TRCCIDCVR5},
+  {"trccidcvr6", TRCCIDCVR6},
+  {"trccidcvr7", TRCCIDCVR7},
+  {"trcvmidcvr0", TRCVMIDCVR0},
+  {"trcvmidcvr1", TRCVMIDCVR1},
+  {"trcvmidcvr2", TRCVMIDCVR2},
+  {"trcvmidcvr3", TRCVMIDCVR3},
+  {"trcvmidcvr4", TRCVMIDCVR4},
+  {"trcvmidcvr5", TRCVMIDCVR5},
+  {"trcvmidcvr6", TRCVMIDCVR6},
+  {"trcvmidcvr7", TRCVMIDCVR7},
+  {"trccidcctlr0", TRCCIDCCTLR0},
+  {"trccidcctlr1", TRCCIDCCTLR1},
+  {"trcvmidcctlr0", TRCVMIDCCTLR0},
+  {"trcvmidcctlr1", TRCVMIDCCTLR1},
+  {"trcitctrl", TRCITCTRL},
+  {"trcclaimset", TRCCLAIMSET},
+  {"trcclaimclr", TRCCLAIMCLR},
+
+  // GICv3 registers
+  {"icc_bpr1_el1", ICC_BPR1_EL1},
+  {"icc_bpr0_el1", ICC_BPR0_EL1},
+  {"icc_pmr_el1", ICC_PMR_EL1},
+  {"icc_ctlr_el1", ICC_CTLR_EL1},
+  {"icc_ctlr_el3", ICC_CTLR_EL3},
+  {"icc_sre_el1", ICC_SRE_EL1},
+  {"icc_sre_el2", ICC_SRE_EL2},
+  {"icc_sre_el3", ICC_SRE_EL3},
+  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
+  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
+  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
+  {"icc_seien_el1", ICC_SEIEN_EL1},
+  {"icc_ap0r0_el1", ICC_AP0R0_EL1},
+  {"icc_ap0r1_el1", ICC_AP0R1_EL1},
+  {"icc_ap0r2_el1", ICC_AP0R2_EL1},
+  {"icc_ap0r3_el1", ICC_AP0R3_EL1},
+  {"icc_ap1r0_el1", ICC_AP1R0_EL1},
+  {"icc_ap1r1_el1", ICC_AP1R1_EL1},
+  {"icc_ap1r2_el1", ICC_AP1R2_EL1},
+  {"icc_ap1r3_el1", ICC_AP1R3_EL1},
+  {"ich_ap0r0_el2", ICH_AP0R0_EL2},
+  {"ich_ap0r1_el2", ICH_AP0R1_EL2},
+  {"ich_ap0r2_el2", ICH_AP0R2_EL2},
+  {"ich_ap0r3_el2", ICH_AP0R3_EL2},
+  {"ich_ap1r0_el2", ICH_AP1R0_EL2},
+  {"ich_ap1r1_el2", ICH_AP1R1_EL2},
+  {"ich_ap1r2_el2", ICH_AP1R2_EL2},
+  {"ich_ap1r3_el2", ICH_AP1R3_EL2},
+  {"ich_hcr_el2", ICH_HCR_EL2},
+  {"ich_misr_el2", ICH_MISR_EL2},
+  {"ich_vmcr_el2", ICH_VMCR_EL2},
+  {"ich_vseir_el2", ICH_VSEIR_EL2},
+  {"ich_lr0_el2", ICH_LR0_EL2},
+  {"ich_lr1_el2", ICH_LR1_EL2},
+  {"ich_lr2_el2", ICH_LR2_EL2},
+  {"ich_lr3_el2", ICH_LR3_EL2},
+  {"ich_lr4_el2", ICH_LR4_EL2},
+  {"ich_lr5_el2", ICH_LR5_EL2},
+  {"ich_lr6_el2", ICH_LR6_EL2},
+  {"ich_lr7_el2", ICH_LR7_EL2},
+  {"ich_lr8_el2", ICH_LR8_EL2},
+  {"ich_lr9_el2", ICH_LR9_EL2},
+  {"ich_lr10_el2", ICH_LR10_EL2},
+  {"ich_lr11_el2", ICH_LR11_EL2},
+  {"ich_lr12_el2", ICH_LR12_EL2},
+  {"ich_lr13_el2", ICH_LR13_EL2},
+  {"ich_lr14_el2", ICH_LR14_EL2},
+  {"ich_lr15_el2", ICH_LR15_EL2}
+};
+
+uint32_t
+A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+  // First search the registers shared by all
+  std::string NameLower = Name.lower();
+  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+    if (SysRegPairs[i].Name == NameLower) {
+      Valid = true;
+      return SysRegPairs[i].Value;
+    }
+  }
+
+  // Now try the instruction-specific registers (either read-only or
+  // write-only).
+  for (unsigned i = 0; i < NumInstPairs; ++i) {
+    if (InstPairs[i].Name == NameLower) {
+      Valid = true;
+      return InstPairs[i].Value;
+    }
+  }
+
+  // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
+  // are: 11 xxx 1x11 xxxx xxx
+  Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+
+  SmallVector<StringRef, 4> Ops;
+  if (!GenericRegPattern.match(NameLower, &Ops)) {
+    Valid = false;
+    return -1;
+  }
+
+  uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+  uint32_t Bits;
+  Ops[1].getAsInteger(10, Op1);
+  Ops[2].getAsInteger(10, CRn);
+  Ops[3].getAsInteger(10, CRm);
+  Ops[4].getAsInteger(10, Op2);
+  Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
+
+  Valid = true;
+  return Bits;
+}
+
+std::string
+A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+  for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+    if (SysRegPairs[i].Value == Bits) {
+      Valid = true;
+      return SysRegPairs[i].Name;
+    }
+  }
+
+  for (unsigned i = 0; i < NumInstPairs; ++i) {
+    if (InstPairs[i].Value == Bits) {
+      Valid = true;
+      return InstPairs[i].Name;
+    }
+  }
+
+  uint32_t Op0 = (Bits >> 14) & 0x3;
+  uint32_t Op1 = (Bits >> 11) & 0x7;
+  uint32_t CRn = (Bits >> 7) & 0xf;
+  uint32_t CRm = (Bits >> 3) & 0xf;
+  uint32_t Op2 = Bits & 0x7;
+
+  // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
+  // name.
+  if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
+      Valid = false;
+      return "";
+  }
+
+  assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
+
+  Valid = true;
+  return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+               + "_c" + utostr(CRm) + "_" + utostr(Op2);
+}
+
+const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+  {"ipas2e1is", IPAS2E1IS},
+  {"ipas2le1is", IPAS2LE1IS},
+  {"vmalle1is", VMALLE1IS},
+  {"alle2is", ALLE2IS},
+  {"alle3is", ALLE3IS},
+  {"vae1is", VAE1IS},
+  {"vae2is", VAE2IS},
+  {"vae3is", VAE3IS},
+  {"aside1is", ASIDE1IS},
+  {"vaae1is", VAAE1IS},
+  {"alle1is", ALLE1IS},
+  {"vale1is", VALE1IS},
+  {"vale2is", VALE2IS},
+  {"vale3is", VALE3IS},
+  {"vmalls12e1is", VMALLS12E1IS},
+  {"vaale1is", VAALE1IS},
+  {"ipas2e1", IPAS2E1},
+  {"ipas2le1", IPAS2LE1},
+  {"vmalle1", VMALLE1},
+  {"alle2", ALLE2},
+  {"alle3", ALLE3},
+  {"vae1", VAE1},
+  {"vae2", VAE2},
+  {"vae3", VAE3},
+  {"aside1", ASIDE1},
+  {"vaae1", VAAE1},
+  {"alle1", ALLE1},
+  {"vale1", VALE1},
+  {"vale2", VALE2},
+  {"vale3", VALE3},
+  {"vmalls12e1", VMALLS12E1},
+  {"vaale1", VAALE1}
+};
+
+A64TLBI::TLBIMapper::TLBIMapper()
+  : NamedImmMapper(TLBIPairs, 0) {}
+
+bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
+  const fltSemantics &Sem = Val.getSemantics();
+  unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
+
+  uint32_t ExpMask;
+  switch (FracBits) {
+  case 10: // IEEE half-precision
+    ExpMask = 0x1f;
+    break;
+  case 23: // IEEE single-precision
+    ExpMask = 0xff;
+    break;
+  case 52: // IEEE double-precision
+    ExpMask = 0x7ff;
+    break;
+  case 112: // IEEE quad-precision
+    // No immediates are valid for double precision.
+    return false;
+  default:
+    llvm_unreachable("Only half, single and double precision supported");
+  }
+
+  uint32_t ExpStart = FracBits;
+  uint64_t FracMask = (1ULL << FracBits) - 1;
+
+  uint32_t Sign = Val.isNegative();
+
+  uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
+  uint64_t Fraction = Bits & FracMask;
+  int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
+  Exponent -= ExpMask >> 1;
+
+  // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
+  // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
+  // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
+  uint64_t A64FracStart = FracBits - 4;
+  uint64_t A64FracMask = 0xf;
+
+  // Are there too many fraction bits?
+  if (Fraction & ~(A64FracMask << A64FracStart))
+    return false;
+
+  if (Exponent < -3 || Exponent > 4)
+    return false;
+
+  uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
+  uint32_t PackedExp = (Exponent + 7) & 0x7;
+
+  Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
+  return true;
+}
+
+// Encoding of the immediate for logical (immediate) instructions:
+//
+// | N | imms   | immr   | size | R            | S            |
+// |---+--------+--------+------+--------------+--------------|
+// | 1 | ssssss | rrrrrr |   64 | UInt(rrrrrr) | UInt(ssssss) |
+// | 0 | 0sssss | xrrrrr |   32 | UInt(rrrrr)  | UInt(sssss)  |
+// | 0 | 10ssss | xxrrrr |   16 | UInt(rrrr)   | UInt(ssss)   |
+// | 0 | 110sss | xxxrrr |    8 | UInt(rrr)    | UInt(sss)    |
+// | 0 | 1110ss | xxxxrr |    4 | UInt(rr)     | UInt(ss)     |
+// | 0 | 11110s | xxxxxr |    2 | UInt(r)      | UInt(s)      |
+// | 0 | 11111x | -      |      | UNALLOCATED  |              |
+//
+// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
+// which the lower S+1 bits are ones and the remaining bits are zero, then
+// rotated right by R bits, which is then replicated across the datapath.
+//
+// + Values of 'N', 'imms' and 'immr' which do not match the above table are
+//   RESERVED.
+// + If all 's' bits in the imms field are set then the instruction is
+//   RESERVED.
+// + The 'x' bits in the 'immr' field are IGNORED.
+
+bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
+  int RepeatWidth;
+  int Rotation = 0;
+  int Num1s = 0;
+
+  // Because there are S+1 ones in the replicated mask, an immediate of all
+  // zeros is not allowed. Filtering it here is probably more efficient.
+  if (Imm == 0) return false;
+
+  for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
+    uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
+    uint64_t ReplicatedMask = Imm & RepeatMask;
+
+    if (ReplicatedMask == 0) continue;
+
+    // First we have to make sure the mask is actually repeated in each slot for
+    // this width-specifier.
+    bool IsReplicatedMask = true;
+    for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
+      if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
+        IsReplicatedMask = false;
+        break;
+      }
+    }
+    if (!IsReplicatedMask) continue;
+
+    // Now we have to work out the amount of rotation needed. The first part of
+    // this calculation is actually independent of RepeatWidth, but the complex
+    // case will depend on it.
+    Rotation = CountTrailingZeros_64(Imm);
+    if (Rotation == 0) {
+      // There were no leading zeros, which means it's either in place or there
+      // are 1s at each end (e.g. 0x8003 needs rotating).
+      Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
+                                : CountLeadingOnes_32(Imm);
+      Rotation = RepeatWidth - Rotation;
+    }
+
+    uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
+      | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+    // Of course, they may not actually be ones, so we have to check that:
+    if (!isMask_64(ReplicatedOnes))
+      continue;
+
+    Num1s = CountTrailingOnes_64(ReplicatedOnes);
+
+    // We know we've got an almost valid encoding (certainly, if this is invalid
+    // no other parameters would work).
+    break;
+  }
+
+  // The encodings which would produce all 1s are RESERVED.
+  if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
+
+  uint32_t N = RepeatWidth == 64;
+  uint32_t ImmR = RepeatWidth - Rotation;
+  uint32_t ImmS = Num1s - 1;
+
+  switch (RepeatWidth) {
+  default: break; // No action required for other valid rotations.
+  case 16: ImmS |= 0x20; break; // 10ssss
+  case 8: ImmS |= 0x30; break;  // 110sss
+  case 4: ImmS |= 0x38; break;  // 1110ss
+  case 2: ImmS |= 0x3c; break;  // 11110s
+  }
+
+  Bits = ImmS | (ImmR << 6) | (N << 12);
+
+  return true;
+}
+
+
+bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
+                               uint64_t &Imm) {
+  uint32_t N = Bits >> 12;
+  uint32_t ImmR = (Bits >> 6) & 0x3f;
+  uint32_t ImmS = Bits & 0x3f;
+
+  // N=1 encodes a 64-bit replication and is invalid for the 32-bit
+  // instructions.
+  if (RegWidth == 32 && N != 0) return false;
+
+  int Width = 0;
+  if (N == 1)
+    Width = 64;
+  else if ((ImmS & 0x20) == 0)
+    Width = 32;
+  else if ((ImmS & 0x10) == 0)
+    Width = 16;
+  else if ((ImmS & 0x08) == 0)
+    Width = 8;
+  else if ((ImmS & 0x04) == 0)
+    Width = 4;
+  else if ((ImmS & 0x02) == 0)
+    Width = 2;
+  else {
+    // ImmS  is 0b11111x: UNALLOCATED
+    return false;
+  }
+
+  int Num1s = (ImmS & (Width - 1)) + 1;
+
+  // All encodings which would map to -1 (signed) are RESERVED.
+  if (Num1s == Width) return false;
+
+  int Rotation = (ImmR & (Width - 1));
+  uint64_t Mask = (1ULL << Num1s) - 1;
+  uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
+  Mask = (Mask >> Rotation)
+    | ((Mask << (Width - Rotation)) & WidthMask);
+
+  Imm = 0;
+  for (unsigned i = 0; i < RegWidth / Width; ++i) {
+    Imm |= Mask;
+    Mask <<= Width;
+  }
+
+  return true;
+}
+
+bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+  // If high bits are set then a 32-bit MOVZ can't possibly work.
+  if (RegWidth == 32 && (Value & ~0xffffffffULL))
+    return false;
+
+  for (int i = 0; i < RegWidth; i += 16) {
+    // If the value is 0 when we mask out all the bits that could be set with
+    // the current LSL value then it's representable.
+    if ((Value & ~(0xffffULL << i)) == 0) {
+      Shift = i / 16;
+      UImm16 = (Value >> i) & 0xffff;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+  // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
+
+  // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
+  // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
+  // a valid input for isMOVZImm.
+  if (RegWidth == 32 && (Value & ~0xffffffffULL))
+    return false;
+
+  uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
+
+  return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
+}
+
+bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
+                            int &UImm16, int &Shift) {
+  if (isMOVZImm(RegWidth, Value, UImm16, Shift))
+    return false;
+
+  return isMOVNImm(RegWidth, Value, UImm16, Shift);
+}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
new file mode 100644
index 000000000000..1b773d632ebe
--- /dev/null
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -0,0 +1,1068 @@
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the AArch64 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_BASEINFO_H
+#define LLVM_AARCH64_BASEINFO_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// // Enums corresponding to AArch64 condition codes
+namespace A64CC {
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
+  enum CondCodes {   // Meaning (integer)          Meaning (floating-point)
+    EQ = 0,        // Equal                      Equal
+    NE,            // Not equal                  Not equal, or unordered
+    HS,            // Unsigned higher or same    >, ==, or unordered
+    LO,            // Unsigned lower or same     Less than
+    MI,            // Minus, negative            Less than
+    PL,            // Plus, positive or zero     >, ==, or unordered
+    VS,            // Overflow                   Unordered
+    VC,            // No overflow                Ordered
+    HI,            // Unsigned higher            Greater than, or unordered
+    LS,            // Unsigned lower or same     Less than or equal
+    GE,            // Greater than or equal      Greater than or equal
+    LT,            // Less than                  Less than, or unordered
+    GT,            // Signed greater than        Greater than
+    LE,            // Signed less than or equal  <, ==, or unordered
+    AL,            // Always (unconditional)     Always (unconditional)
+    NV,             // Always (unconditional)     Always (unconditional)
+    // Note the NV exists purely to disassemble 0b1111. Execution
+    // is "always".
+    Invalid
+  };
+
+} // namespace A64CC
+
+inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case A64CC::EQ:  return "eq";
+  case A64CC::NE:  return "ne";
+  case A64CC::HS:  return "hs";
+  case A64CC::LO:  return "lo";
+  case A64CC::MI:  return "mi";
+  case A64CC::PL:  return "pl";
+  case A64CC::VS:  return "vs";
+  case A64CC::VC:  return "vc";
+  case A64CC::HI:  return "hi";
+  case A64CC::LS:  return "ls";
+  case A64CC::GE:  return "ge";
+  case A64CC::LT:  return "lt";
+  case A64CC::GT:  return "gt";
+  case A64CC::LE:  return "le";
+  case A64CC::AL:  return "al";
+  case A64CC::NV:  return "nv";
+  }
+}
+
+inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
+  return StringSwitch<A64CC::CondCodes>(CondStr.lower())
+             .Case("eq", A64CC::EQ)
+             .Case("ne", A64CC::NE)
+             .Case("ne", A64CC::NE)
+             .Case("hs", A64CC::HS)
+             .Case("cs", A64CC::HS)
+             .Case("lo", A64CC::LO)
+             .Case("cc", A64CC::LO)
+             .Case("mi", A64CC::MI)
+             .Case("pl", A64CC::PL)
+             .Case("vs", A64CC::VS)
+             .Case("vc", A64CC::VC)
+             .Case("hi", A64CC::HI)
+             .Case("ls", A64CC::LS)
+             .Case("ge", A64CC::GE)
+             .Case("lt", A64CC::LT)
+             .Case("gt", A64CC::GT)
+             .Case("le", A64CC::LE)
+             .Case("al", A64CC::AL)
+             .Case("nv", A64CC::NV)
+             .Default(A64CC::Invalid);
+}
+
+inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
+  // It turns out that the condition codes have been designed so that in order
+  // to reverse the intent of the condition you only have to invert the low bit:
+
+  return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+}
+
+/// Instances of this class can perform bidirectional mapping from random
+/// identifier strings to operand encodings. For example "MSR" takes a named
+/// system-register which must be encoded somehow and decoded for printing. This
+/// central location means that the information for those transformations is not
+/// duplicated and remains in sync.
+///
+/// FIXME: currently the algorithm is a completely unoptimised linear
+/// search. Obviously this could be improved, but we would probably want to work
+/// out just how often these instructions are emitted before working on it. It
+/// might even be optimal to just reorder the tables for the common instructions
+/// rather than changing the algorithm.
+struct NamedImmMapper {
+  struct Mapping {
+    const char *Name;
+    uint32_t Value;
+  };
+
+  template<int N>
+  NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+    : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+
+  StringRef toString(uint32_t Value, bool &Valid) const;
+  uint32_t fromString(StringRef Name, bool &Valid) const;
+
+  /// Many of the instructions allow an alternative assembly form consisting of
+  /// a simple immediate. Currently the only valid forms are ranges [0, N) where
+  /// N being 0 indicates no immediate syntax-form is allowed.
+  bool validImm(uint32_t Value) const;
+protected:
+  const Mapping *Pairs;
+  size_t NumPairs;
+  uint32_t TooBigImm;
+};
+
+namespace A64AT {
+  enum ATValues {
+    Invalid = -1,    // Op0 Op1  CRn   CRm   Op2
+    S1E1R = 0x43c0,  // 01  000  0111  1000  000
+    S1E2R = 0x63c0,  // 01  100  0111  1000  000
+    S1E3R = 0x73c0,  // 01  110  0111  1000  000
+    S1E1W = 0x43c1,  // 01  000  0111  1000  001
+    S1E2W = 0x63c1,  // 01  100  0111  1000  001
+    S1E3W = 0x73c1,  // 01  110  0111  1000  001
+    S1E0R = 0x43c2,  // 01  000  0111  1000  010
+    S1E0W = 0x43c3,  // 01  000  0111  1000  011
+    S12E1R = 0x63c4, // 01  100  0111  1000  100
+    S12E1W = 0x63c5, // 01  100  0111  1000  101
+    S12E0R = 0x63c6, // 01  100  0111  1000  110
+    S12E0W = 0x63c7  // 01  100  0111  1000  111
+  };
+
+  struct ATMapper : NamedImmMapper {
+    const static Mapping ATPairs[];
+
+    ATMapper();
+  };
+
+}
+namespace A64DB {
+  enum DBValues {
+    Invalid = -1,
+    OSHLD = 0x1,
+    OSHST = 0x2,
+    OSH =   0x3,
+    NSHLD = 0x5,
+    NSHST = 0x6,
+    NSH =   0x7,
+    ISHLD = 0x9,
+    ISHST = 0xa,
+    ISH =   0xb,
+    LD =    0xd,
+    ST =    0xe,
+    SY =    0xf
+  };
+
+  struct DBarrierMapper : NamedImmMapper {
+    const static Mapping DBarrierPairs[];
+
+    DBarrierMapper();
+  };
+}
+
+namespace  A64DC {
+  enum DCValues {
+    Invalid = -1,   // Op1  CRn   CRm   Op2
+    ZVA   = 0x5ba1, // 01  011  0111  0100  001
+    IVAC  = 0x43b1, // 01  000  0111  0110  001
+    ISW   = 0x43b2, // 01  000  0111  0110  010
+    CVAC  = 0x5bd1, // 01  011  0111  1010  001
+    CSW   = 0x43d2, // 01  000  0111  1010  010
+    CVAU  = 0x5bd9, // 01  011  0111  1011  001
+    CIVAC = 0x5bf1, // 01  011  0111  1110  001
+    CISW  = 0x43f2  // 01  000  0111  1110  010
+  };
+
+  struct DCMapper : NamedImmMapper {
+    const static Mapping DCPairs[];
+
+    DCMapper();
+  };
+
+}
+
+namespace  A64IC {
+  enum ICValues {
+    Invalid = -1,     // Op1  CRn   CRm   Op2
+    IALLUIS = 0x0388, // 000  0111  0001  000
+    IALLU = 0x03a8,   // 000  0111  0101  000
+    IVAU = 0x1ba9     // 011  0111  0101  001
+  };
+
+
+  struct ICMapper : NamedImmMapper {
+    const static Mapping ICPairs[];
+
+    ICMapper();
+  };
+
+  static inline bool NeedsRegister(ICValues Val) {
+    return Val == IVAU;
+  }
+}
+
+namespace  A64ISB {
+  enum ISBValues {
+    Invalid = -1,
+    SY = 0xf
+  };
+  struct ISBMapper : NamedImmMapper {
+    const static Mapping ISBPairs[];
+
+    ISBMapper();
+  };
+}
+
+namespace A64PRFM {
+  enum PRFMValues {
+    Invalid = -1,
+    PLDL1KEEP = 0x00,
+    PLDL1STRM = 0x01,
+    PLDL2KEEP = 0x02,
+    PLDL2STRM = 0x03,
+    PLDL3KEEP = 0x04,
+    PLDL3STRM = 0x05,
+    PLIL1KEEP = 0x08,
+    PLIL1STRM = 0x09,
+    PLIL2KEEP = 0x0a,
+    PLIL2STRM = 0x0b,
+    PLIL3KEEP = 0x0c,
+    PLIL3STRM = 0x0d,
+    PSTL1KEEP = 0x10,
+    PSTL1STRM = 0x11,
+    PSTL2KEEP = 0x12,
+    PSTL2STRM = 0x13,
+    PSTL3KEEP = 0x14,
+    PSTL3STRM = 0x15
+  };
+
+  struct PRFMMapper : NamedImmMapper {
+    const static Mapping PRFMPairs[];
+
+    PRFMMapper();
+  };
+}
+
+namespace A64PState {
+  enum PStateValues {
+    Invalid = -1,
+    SPSel = 0x05,
+    DAIFSet = 0x1e,
+    DAIFClr = 0x1f
+  };
+
+  struct PStateMapper : NamedImmMapper {
+    const static Mapping PStatePairs[];
+
+    PStateMapper();
+  };
+
+}
+
+namespace A64SE {
+    enum ShiftExtSpecifiers {
+        Invalid = -1,
+        LSL,
+        LSR,
+        ASR,
+        ROR,
+
+        UXTB,
+        UXTH,
+        UXTW,
+        UXTX,
+
+        SXTB,
+        SXTH,
+        SXTW,
+        SXTX
+    };
+}
+
+namespace A64SysReg {
+  enum SysRegROValues {
+    MDCCSR_EL0        = 0x9808, // 10  011  0000  0001  000
+    DBGDTRRX_EL0      = 0x9828, // 10  011  0000  0101  000
+    MDRAR_EL1         = 0x8080, // 10  000  0001  0000  000
+    OSLSR_EL1         = 0x808c, // 10  000  0001  0001  100
+    DBGAUTHSTATUS_EL1 = 0x83f6, // 10  000  0111  1110  110
+    PMCEID0_EL0       = 0xdce6, // 11  011  1001  1100  110
+    PMCEID1_EL0       = 0xdce7, // 11  011  1001  1100  111
+    MIDR_EL1          = 0xc000, // 11  000  0000  0000  000
+    CCSIDR_EL1        = 0xc800, // 11  001  0000  0000  000
+    CLIDR_EL1         = 0xc801, // 11  001  0000  0000  001
+    CTR_EL0           = 0xd801, // 11  011  0000  0000  001
+    MPIDR_EL1         = 0xc005, // 11  000  0000  0000  101
+    REVIDR_EL1        = 0xc006, // 11  000  0000  0000  110
+    AIDR_EL1          = 0xc807, // 11  001  0000  0000  111
+    DCZID_EL0         = 0xd807, // 11  011  0000  0000  111
+    ID_PFR0_EL1       = 0xc008, // 11  000  0000  0001  000
+    ID_PFR1_EL1       = 0xc009, // 11  000  0000  0001  001
+    ID_DFR0_EL1       = 0xc00a, // 11  000  0000  0001  010
+    ID_AFR0_EL1       = 0xc00b, // 11  000  0000  0001  011
+    ID_MMFR0_EL1      = 0xc00c, // 11  000  0000  0001  100
+    ID_MMFR1_EL1      = 0xc00d, // 11  000  0000  0001  101
+    ID_MMFR2_EL1      = 0xc00e, // 11  000  0000  0001  110
+    ID_MMFR3_EL1      = 0xc00f, // 11  000  0000  0001  111
+    ID_ISAR0_EL1      = 0xc010, // 11  000  0000  0010  000
+    ID_ISAR1_EL1      = 0xc011, // 11  000  0000  0010  001
+    ID_ISAR2_EL1      = 0xc012, // 11  000  0000  0010  010
+    ID_ISAR3_EL1      = 0xc013, // 11  000  0000  0010  011
+    ID_ISAR4_EL1      = 0xc014, // 11  000  0000  0010  100
+    ID_ISAR5_EL1      = 0xc015, // 11  000  0000  0010  101
+    ID_AA64PFR0_EL1   = 0xc020, // 11  000  0000  0100  000
+    ID_AA64PFR1_EL1   = 0xc021, // 11  000  0000  0100  001
+    ID_AA64DFR0_EL1   = 0xc028, // 11  000  0000  0101  000
+    ID_AA64DFR1_EL1   = 0xc029, // 11  000  0000  0101  001
+    ID_AA64AFR0_EL1   = 0xc02c, // 11  000  0000  0101  100
+    ID_AA64AFR1_EL1   = 0xc02d, // 11  000  0000  0101  101
+    ID_AA64ISAR0_EL1  = 0xc030, // 11  000  0000  0110  000
+    ID_AA64ISAR1_EL1  = 0xc031, // 11  000  0000  0110  001
+    ID_AA64MMFR0_EL1  = 0xc038, // 11  000  0000  0111  000
+    ID_AA64MMFR1_EL1  = 0xc039, // 11  000  0000  0111  001
+    MVFR0_EL1         = 0xc018, // 11  000  0000  0011  000
+    MVFR1_EL1         = 0xc019, // 11  000  0000  0011  001
+    MVFR2_EL1         = 0xc01a, // 11  000  0000  0011  010
+    RVBAR_EL1         = 0xc601, // 11  000  1100  0000  001
+    RVBAR_EL2         = 0xe601, // 11  100  1100  0000  001
+    RVBAR_EL3         = 0xf601, // 11  110  1100  0000  001
+    ISR_EL1           = 0xc608, // 11  000  1100  0001  000
+    CNTPCT_EL0        = 0xdf01, // 11  011  1110  0000  001
+    CNTVCT_EL0        = 0xdf02,  // 11  011  1110  0000  010
+
+    // Trace registers
+    TRCSTATR          = 0x8818, // 10  001  0000  0011  000
+    TRCIDR8           = 0x8806, // 10  001  0000  0000  110
+    TRCIDR9           = 0x880e, // 10  001  0000  0001  110
+    TRCIDR10          = 0x8816, // 10  001  0000  0010  110
+    TRCIDR11          = 0x881e, // 10  001  0000  0011  110
+    TRCIDR12          = 0x8826, // 10  001  0000  0100  110
+    TRCIDR13          = 0x882e, // 10  001  0000  0101  110
+    TRCIDR0           = 0x8847, // 10  001  0000  1000  111
+    TRCIDR1           = 0x884f, // 10  001  0000  1001  111
+    TRCIDR2           = 0x8857, // 10  001  0000  1010  111
+    TRCIDR3           = 0x885f, // 10  001  0000  1011  111
+    TRCIDR4           = 0x8867, // 10  001  0000  1100  111
+    TRCIDR5           = 0x886f, // 10  001  0000  1101  111
+    TRCIDR6           = 0x8877, // 10  001  0000  1110  111
+    TRCIDR7           = 0x887f, // 10  001  0000  1111  111
+    TRCOSLSR          = 0x888c, // 10  001  0001  0001  100
+    TRCPDSR           = 0x88ac, // 10  001  0001  0101  100
+    TRCDEVAFF0        = 0x8bd6, // 10  001  0111  1010  110
+    TRCDEVAFF1        = 0x8bde, // 10  001  0111  1011  110
+    TRCLSR            = 0x8bee, // 10  001  0111  1101  110
+    TRCAUTHSTATUS     = 0x8bf6, // 10  001  0111  1110  110
+    TRCDEVARCH        = 0x8bfe, // 10  001  0111  1111  110
+    TRCDEVID          = 0x8b97, // 10  001  0111  0010  111
+    TRCDEVTYPE        = 0x8b9f, // 10  001  0111  0011  111
+    TRCPIDR4          = 0x8ba7, // 10  001  0111  0100  111
+    TRCPIDR5          = 0x8baf, // 10  001  0111  0101  111
+    TRCPIDR6          = 0x8bb7, // 10  001  0111  0110  111
+    TRCPIDR7          = 0x8bbf, // 10  001  0111  0111  111
+    TRCPIDR0          = 0x8bc7, // 10  001  0111  1000  111
+    TRCPIDR1          = 0x8bcf, // 10  001  0111  1001  111
+    TRCPIDR2          = 0x8bd7, // 10  001  0111  1010  111
+    TRCPIDR3          = 0x8bdf, // 10  001  0111  1011  111
+    TRCCIDR0          = 0x8be7, // 10  001  0111  1100  111
+    TRCCIDR1          = 0x8bef, // 10  001  0111  1101  111
+    TRCCIDR2          = 0x8bf7, // 10  001  0111  1110  111
+    TRCCIDR3          = 0x8bff, // 10  001  0111  1111  111
+
+    // GICv3 registers
+    ICC_IAR1_EL1      = 0xc660, // 11  000  1100  1100  000
+    ICC_IAR0_EL1      = 0xc640, // 11  000  1100  1000  000
+    ICC_HPPIR1_EL1    = 0xc662, // 11  000  1100  1100  010
+    ICC_HPPIR0_EL1    = 0xc642, // 11  000  1100  1000  010
+    ICC_RPR_EL1       = 0xc65b, // 11  000  1100  1011  011
+    ICH_VTR_EL2       = 0xe659, // 11  100  1100  1011  001
+    ICH_EISR_EL2      = 0xe65b, // 11  100  1100  1011  011
+    ICH_ELSR_EL2      = 0xe65d  // 11  100  1100  1011  101
+  };
+
+  enum SysRegWOValues {
+    DBGDTRTX_EL0      = 0x9828, // 10  011  0000  0101  000
+    OSLAR_EL1         = 0x8084, // 10  000  0001  0000  100
+    PMSWINC_EL0       = 0xdce4,  // 11  011  1001  1100  100
+
+    // Trace Registers
+    TRCOSLAR          = 0x8884, // 10  001  0001  0000  100
+    TRCLAR            = 0x8be6, // 10  001  0111  1100  110
+
+    // GICv3 registers
+    ICC_EOIR1_EL1     = 0xc661, // 11  000  1100  1100  001
+    ICC_EOIR0_EL1     = 0xc641, // 11  000  1100  1000  001
+    ICC_DIR_EL1       = 0xc659, // 11  000  1100  1011  001
+    ICC_SGI1R_EL1     = 0xc65d, // 11  000  1100  1011  101
+    ICC_ASGI1R_EL1    = 0xc65e, // 11  000  1100  1011  110
+    ICC_SGI0R_EL1     = 0xc65f  // 11  000  1100  1011  111
+  };
+
+  enum SysRegValues {
+    Invalid = -1,               // Op0 Op1  CRn   CRm   Op2
+    OSDTRRX_EL1       = 0x8002, // 10  000  0000  0000  010
+    OSDTRTX_EL1       = 0x801a, // 10  000  0000  0011  010
+    TEECR32_EL1       = 0x9000, // 10  010  0000  0000  000
+    MDCCINT_EL1       = 0x8010, // 10  000  0000  0010  000
+    MDSCR_EL1         = 0x8012, // 10  000  0000  0010  010
+    DBGDTR_EL0        = 0x9820, // 10  011  0000  0100  000
+    OSECCR_EL1        = 0x8032, // 10  000  0000  0110  010
+    DBGVCR32_EL2      = 0xa038, // 10  100  0000  0111  000
+    DBGBVR0_EL1       = 0x8004, // 10  000  0000  0000  100
+    DBGBVR1_EL1       = 0x800c, // 10  000  0000  0001  100
+    DBGBVR2_EL1       = 0x8014, // 10  000  0000  0010  100
+    DBGBVR3_EL1       = 0x801c, // 10  000  0000  0011  100
+    DBGBVR4_EL1       = 0x8024, // 10  000  0000  0100  100
+    DBGBVR5_EL1       = 0x802c, // 10  000  0000  0101  100
+    DBGBVR6_EL1       = 0x8034, // 10  000  0000  0110  100
+    DBGBVR7_EL1       = 0x803c, // 10  000  0000  0111  100
+    DBGBVR8_EL1       = 0x8044, // 10  000  0000  1000  100
+    DBGBVR9_EL1       = 0x804c, // 10  000  0000  1001  100
+    DBGBVR10_EL1      = 0x8054, // 10  000  0000  1010  100
+    DBGBVR11_EL1      = 0x805c, // 10  000  0000  1011  100
+    DBGBVR12_EL1      = 0x8064, // 10  000  0000  1100  100
+    DBGBVR13_EL1      = 0x806c, // 10  000  0000  1101  100
+    DBGBVR14_EL1      = 0x8074, // 10  000  0000  1110  100
+    DBGBVR15_EL1      = 0x807c, // 10  000  0000  1111  100
+    DBGBCR0_EL1       = 0x8005, // 10  000  0000  0000  101
+    DBGBCR1_EL1       = 0x800d, // 10  000  0000  0001  101
+    DBGBCR2_EL1       = 0x8015, // 10  000  0000  0010  101
+    DBGBCR3_EL1       = 0x801d, // 10  000  0000  0011  101
+    DBGBCR4_EL1       = 0x8025, // 10  000  0000  0100  101
+    DBGBCR5_EL1       = 0x802d, // 10  000  0000  0101  101
+    DBGBCR6_EL1       = 0x8035, // 10  000  0000  0110  101
+    DBGBCR7_EL1       = 0x803d, // 10  000  0000  0111  101
+    DBGBCR8_EL1       = 0x8045, // 10  000  0000  1000  101
+    DBGBCR9_EL1       = 0x804d, // 10  000  0000  1001  101
+    DBGBCR10_EL1      = 0x8055, // 10  000  0000  1010  101
+    DBGBCR11_EL1      = 0x805d, // 10  000  0000  1011  101
+    DBGBCR12_EL1      = 0x8065, // 10  000  0000  1100  101
+    DBGBCR13_EL1      = 0x806d, // 10  000  0000  1101  101
+    DBGBCR14_EL1      = 0x8075, // 10  000  0000  1110  101
+    DBGBCR15_EL1      = 0x807d, // 10  000  0000  1111  101
+    DBGWVR0_EL1       = 0x8006, // 10  000  0000  0000  110
+    DBGWVR1_EL1       = 0x800e, // 10  000  0000  0001  110
+    DBGWVR2_EL1       = 0x8016, // 10  000  0000  0010  110
+    DBGWVR3_EL1       = 0x801e, // 10  000  0000  0011  110
+    DBGWVR4_EL1       = 0x8026, // 10  000  0000  0100  110
+    DBGWVR5_EL1       = 0x802e, // 10  000  0000  0101  110
+    DBGWVR6_EL1       = 0x8036, // 10  000  0000  0110  110
+    DBGWVR7_EL1       = 0x803e, // 10  000  0000  0111  110
+    DBGWVR8_EL1       = 0x8046, // 10  000  0000  1000  110
+    DBGWVR9_EL1       = 0x804e, // 10  000  0000  1001  110
+    DBGWVR10_EL1      = 0x8056, // 10  000  0000  1010  110
+    DBGWVR11_EL1      = 0x805e, // 10  000  0000  1011  110
+    DBGWVR12_EL1      = 0x8066, // 10  000  0000  1100  110
+    DBGWVR13_EL1      = 0x806e, // 10  000  0000  1101  110
+    DBGWVR14_EL1      = 0x8076, // 10  000  0000  1110  110
+    DBGWVR15_EL1      = 0x807e, // 10  000  0000  1111  110
+    DBGWCR0_EL1       = 0x8007, // 10  000  0000  0000  111
+    DBGWCR1_EL1       = 0x800f, // 10  000  0000  0001  111
+    DBGWCR2_EL1       = 0x8017, // 10  000  0000  0010  111
+    DBGWCR3_EL1       = 0x801f, // 10  000  0000  0011  111
+    DBGWCR4_EL1       = 0x8027, // 10  000  0000  0100  111
+    DBGWCR5_EL1       = 0x802f, // 10  000  0000  0101  111
+    DBGWCR6_EL1       = 0x8037, // 10  000  0000  0110  111
+    DBGWCR7_EL1       = 0x803f, // 10  000  0000  0111  111
+    DBGWCR8_EL1       = 0x8047, // 10  000  0000  1000  111
+    DBGWCR9_EL1       = 0x804f, // 10  000  0000  1001  111
+    DBGWCR10_EL1      = 0x8057, // 10  000  0000  1010  111
+    DBGWCR11_EL1      = 0x805f, // 10  000  0000  1011  111
+    DBGWCR12_EL1      = 0x8067, // 10  000  0000  1100  111
+    DBGWCR13_EL1      = 0x806f, // 10  000  0000  1101  111
+    DBGWCR14_EL1      = 0x8077, // 10  000  0000  1110  111
+    DBGWCR15_EL1      = 0x807f, // 10  000  0000  1111  111
+    TEEHBR32_EL1      = 0x9080, // 10  010  0001  0000  000
+    OSDLR_EL1         = 0x809c, // 10  000  0001  0011  100
+    DBGPRCR_EL1       = 0x80a4, // 10  000  0001  0100  100
+    DBGCLAIMSET_EL1   = 0x83c6, // 10  000  0111  1000  110
+    DBGCLAIMCLR_EL1   = 0x83ce, // 10  000  0111  1001  110
+    CSSELR_EL1        = 0xd000, // 11  010  0000  0000  000
+    VPIDR_EL2         = 0xe000, // 11  100  0000  0000  000
+    VMPIDR_EL2        = 0xe005, // 11  100  0000  0000  101
+    CPACR_EL1         = 0xc082, // 11  000  0001  0000  010
+    SCTLR_EL1         = 0xc080, // 11  000  0001  0000  000
+    SCTLR_EL2         = 0xe080, // 11  100  0001  0000  000
+    SCTLR_EL3         = 0xf080, // 11  110  0001  0000  000
+    ACTLR_EL1         = 0xc081, // 11  000  0001  0000  001
+    ACTLR_EL2         = 0xe081, // 11  100  0001  0000  001
+    ACTLR_EL3         = 0xf081, // 11  110  0001  0000  001
+    HCR_EL2           = 0xe088, // 11  100  0001  0001  000
+    SCR_EL3           = 0xf088, // 11  110  0001  0001  000
+    MDCR_EL2          = 0xe089, // 11  100  0001  0001  001
+    SDER32_EL3        = 0xf089, // 11  110  0001  0001  001
+    CPTR_EL2          = 0xe08a, // 11  100  0001  0001  010
+    CPTR_EL3          = 0xf08a, // 11  110  0001  0001  010
+    HSTR_EL2          = 0xe08b, // 11  100  0001  0001  011
+    HACR_EL2          = 0xe08f, // 11  100  0001  0001  111
+    MDCR_EL3          = 0xf099, // 11  110  0001  0011  001
+    TTBR0_EL1         = 0xc100, // 11  000  0010  0000  000
+    TTBR0_EL2         = 0xe100, // 11  100  0010  0000  000
+    TTBR0_EL3         = 0xf100, // 11  110  0010  0000  000
+    TTBR1_EL1         = 0xc101, // 11  000  0010  0000  001
+    TCR_EL1           = 0xc102, // 11  000  0010  0000  010
+    TCR_EL2           = 0xe102, // 11  100  0010  0000  010
+    TCR_EL3           = 0xf102, // 11  110  0010  0000  010
+    VTTBR_EL2         = 0xe108, // 11  100  0010  0001  000
+    VTCR_EL2          = 0xe10a, // 11  100  0010  0001  010
+    DACR32_EL2        = 0xe180, // 11  100  0011  0000  000
+    SPSR_EL1          = 0xc200, // 11  000  0100  0000  000
+    SPSR_EL2          = 0xe200, // 11  100  0100  0000  000
+    SPSR_EL3          = 0xf200, // 11  110  0100  0000  000
+    ELR_EL1           = 0xc201, // 11  000  0100  0000  001
+    ELR_EL2           = 0xe201, // 11  100  0100  0000  001
+    ELR_EL3           = 0xf201, // 11  110  0100  0000  001
+    SP_EL0            = 0xc208, // 11  000  0100  0001  000
+    SP_EL1            = 0xe208, // 11  100  0100  0001  000
+    SP_EL2            = 0xf208, // 11  110  0100  0001  000
+    SPSel             = 0xc210, // 11  000  0100  0010  000
+    NZCV              = 0xda10, // 11  011  0100  0010  000
+    DAIF              = 0xda11, // 11  011  0100  0010  001
+    CurrentEL         = 0xc212, // 11  000  0100  0010  010
+    SPSR_irq          = 0xe218, // 11  100  0100  0011  000
+    SPSR_abt          = 0xe219, // 11  100  0100  0011  001
+    SPSR_und          = 0xe21a, // 11  100  0100  0011  010
+    SPSR_fiq          = 0xe21b, // 11  100  0100  0011  011
+    FPCR              = 0xda20, // 11  011  0100  0100  000
+    FPSR              = 0xda21, // 11  011  0100  0100  001
+    DSPSR_EL0         = 0xda28, // 11  011  0100  0101  000
+    DLR_EL0           = 0xda29, // 11  011  0100  0101  001
+    IFSR32_EL2        = 0xe281, // 11  100  0101  0000  001
+    AFSR0_EL1         = 0xc288, // 11  000  0101  0001  000
+    AFSR0_EL2         = 0xe288, // 11  100  0101  0001  000
+    AFSR0_EL3         = 0xf288, // 11  110  0101  0001  000
+    AFSR1_EL1         = 0xc289, // 11  000  0101  0001  001
+    AFSR1_EL2         = 0xe289, // 11  100  0101  0001  001
+    AFSR1_EL3         = 0xf289, // 11  110  0101  0001  001
+    ESR_EL1           = 0xc290, // 11  000  0101  0010  000
+    ESR_EL2           = 0xe290, // 11  100  0101  0010  000
+    ESR_EL3           = 0xf290, // 11  110  0101  0010  000
+    FPEXC32_EL2       = 0xe298, // 11  100  0101  0011  000
+    FAR_EL1           = 0xc300, // 11  000  0110  0000  000
+    FAR_EL2           = 0xe300, // 11  100  0110  0000  000
+    FAR_EL3           = 0xf300, // 11  110  0110  0000  000
+    HPFAR_EL2         = 0xe304, // 11  100  0110  0000  100
+    PAR_EL1           = 0xc3a0, // 11  000  0111  0100  000
+    PMCR_EL0          = 0xdce0, // 11  011  1001  1100  000
+    PMCNTENSET_EL0    = 0xdce1, // 11  011  1001  1100  001
+    PMCNTENCLR_EL0    = 0xdce2, // 11  011  1001  1100  010
+    PMOVSCLR_EL0      = 0xdce3, // 11  011  1001  1100  011
+    PMSELR_EL0        = 0xdce5, // 11  011  1001  1100  101
+    PMCCNTR_EL0       = 0xdce8, // 11  011  1001  1101  000
+    PMXEVTYPER_EL0    = 0xdce9, // 11  011  1001  1101  001
+    PMXEVCNTR_EL0     = 0xdcea, // 11  011  1001  1101  010
+    PMUSERENR_EL0     = 0xdcf0, // 11  011  1001  1110  000
+    PMINTENSET_EL1    = 0xc4f1, // 11  000  1001  1110  001
+    PMINTENCLR_EL1    = 0xc4f2, // 11  000  1001  1110  010
+    PMOVSSET_EL0      = 0xdcf3, // 11  011  1001  1110  011
+    MAIR_EL1          = 0xc510, // 11  000  1010  0010  000
+    MAIR_EL2          = 0xe510, // 11  100  1010  0010  000
+    MAIR_EL3          = 0xf510, // 11  110  1010  0010  000
+    AMAIR_EL1         = 0xc518, // 11  000  1010  0011  000
+    AMAIR_EL2         = 0xe518, // 11  100  1010  0011  000
+    AMAIR_EL3         = 0xf518, // 11  110  1010  0011  000
+    VBAR_EL1          = 0xc600, // 11  000  1100  0000  000
+    VBAR_EL2          = 0xe600, // 11  100  1100  0000  000
+    VBAR_EL3          = 0xf600, // 11  110  1100  0000  000
+    RMR_EL1           = 0xc602, // 11  000  1100  0000  010
+    RMR_EL2           = 0xe602, // 11  100  1100  0000  010
+    RMR_EL3           = 0xf602, // 11  110  1100  0000  010
+    CONTEXTIDR_EL1    = 0xc681, // 11  000  1101  0000  001
+    TPIDR_EL0         = 0xde82, // 11  011  1101  0000  010
+    TPIDR_EL2         = 0xe682, // 11  100  1101  0000  010
+    TPIDR_EL3         = 0xf682, // 11  110  1101  0000  010
+    TPIDRRO_EL0       = 0xde83, // 11  011  1101  0000  011
+    TPIDR_EL1         = 0xc684, // 11  000  1101  0000  100
+    CNTFRQ_EL0        = 0xdf00, // 11  011  1110  0000  000
+    CNTVOFF_EL2       = 0xe703, // 11  100  1110  0000  011
+    CNTKCTL_EL1       = 0xc708, // 11  000  1110  0001  000
+    CNTHCTL_EL2       = 0xe708, // 11  100  1110  0001  000
+    CNTP_TVAL_EL0     = 0xdf10, // 11  011  1110  0010  000
+    CNTHP_TVAL_EL2    = 0xe710, // 11  100  1110  0010  000
+    CNTPS_TVAL_EL1    = 0xff10, // 11  111  1110  0010  000
+    CNTP_CTL_EL0      = 0xdf11, // 11  011  1110  0010  001
+    CNTHP_CTL_EL2     = 0xe711, // 11  100  1110  0010  001
+    CNTPS_CTL_EL1     = 0xff11, // 11  111  1110  0010  001
+    CNTP_CVAL_EL0     = 0xdf12, // 11  011  1110  0010  010
+    CNTHP_CVAL_EL2    = 0xe712, // 11  100  1110  0010  010
+    CNTPS_CVAL_EL1    = 0xff12, // 11  111  1110  0010  010
+    CNTV_TVAL_EL0     = 0xdf18, // 11  011  1110  0011  000
+    CNTV_CTL_EL0      = 0xdf19, // 11  011  1110  0011  001
+    CNTV_CVAL_EL0     = 0xdf1a, // 11  011  1110  0011  010
+    PMEVCNTR0_EL0     = 0xdf40, // 11  011  1110  1000  000
+    PMEVCNTR1_EL0     = 0xdf41, // 11  011  1110  1000  001
+    PMEVCNTR2_EL0     = 0xdf42, // 11  011  1110  1000  010
+    PMEVCNTR3_EL0     = 0xdf43, // 11  011  1110  1000  011
+    PMEVCNTR4_EL0     = 0xdf44, // 11  011  1110  1000  100
+    PMEVCNTR5_EL0     = 0xdf45, // 11  011  1110  1000  101
+    PMEVCNTR6_EL0     = 0xdf46, // 11  011  1110  1000  110
+    PMEVCNTR7_EL0     = 0xdf47, // 11  011  1110  1000  111
+    PMEVCNTR8_EL0     = 0xdf48, // 11  011  1110  1001  000
+    PMEVCNTR9_EL0     = 0xdf49, // 11  011  1110  1001  001
+    PMEVCNTR10_EL0    = 0xdf4a, // 11  011  1110  1001  010
+    PMEVCNTR11_EL0    = 0xdf4b, // 11  011  1110  1001  011
+    PMEVCNTR12_EL0    = 0xdf4c, // 11  011  1110  1001  100
+    PMEVCNTR13_EL0    = 0xdf4d, // 11  011  1110  1001  101
+    PMEVCNTR14_EL0    = 0xdf4e, // 11  011  1110  1001  110
+    PMEVCNTR15_EL0    = 0xdf4f, // 11  011  1110  1001  111
+    PMEVCNTR16_EL0    = 0xdf50, // 11  011  1110  1010  000
+    PMEVCNTR17_EL0    = 0xdf51, // 11  011  1110  1010  001
+    PMEVCNTR18_EL0    = 0xdf52, // 11  011  1110  1010  010
+    PMEVCNTR19_EL0    = 0xdf53, // 11  011  1110  1010  011
+    PMEVCNTR20_EL0    = 0xdf54, // 11  011  1110  1010  100
+    PMEVCNTR21_EL0    = 0xdf55, // 11  011  1110  1010  101
+    PMEVCNTR22_EL0    = 0xdf56, // 11  011  1110  1010  110
+    PMEVCNTR23_EL0    = 0xdf57, // 11  011  1110  1010  111
+    PMEVCNTR24_EL0    = 0xdf58, // 11  011  1110  1011  000
+    PMEVCNTR25_EL0    = 0xdf59, // 11  011  1110  1011  001
+    PMEVCNTR26_EL0    = 0xdf5a, // 11  011  1110  1011  010
+    PMEVCNTR27_EL0    = 0xdf5b, // 11  011  1110  1011  011
+    PMEVCNTR28_EL0    = 0xdf5c, // 11  011  1110  1011  100
+    PMEVCNTR29_EL0    = 0xdf5d, // 11  011  1110  1011  101
+    PMEVCNTR30_EL0    = 0xdf5e, // 11  011  1110  1011  110
+    PMCCFILTR_EL0     = 0xdf7f, // 11  011  1110  1111  111
+    PMEVTYPER0_EL0    = 0xdf60, // 11  011  1110  1100  000
+    PMEVTYPER1_EL0    = 0xdf61, // 11  011  1110  1100  001
+    PMEVTYPER2_EL0    = 0xdf62, // 11  011  1110  1100  010
+    PMEVTYPER3_EL0    = 0xdf63, // 11  011  1110  1100  011
+    PMEVTYPER4_EL0    = 0xdf64, // 11  011  1110  1100  100
+    PMEVTYPER5_EL0    = 0xdf65, // 11  011  1110  1100  101
+    PMEVTYPER6_EL0    = 0xdf66, // 11  011  1110  1100  110
+    PMEVTYPER7_EL0    = 0xdf67, // 11  011  1110  1100  111
+    PMEVTYPER8_EL0    = 0xdf68, // 11  011  1110  1101  000
+    PMEVTYPER9_EL0    = 0xdf69, // 11  011  1110  1101  001
+    PMEVTYPER10_EL0   = 0xdf6a, // 11  011  1110  1101  010
+    PMEVTYPER11_EL0   = 0xdf6b, // 11  011  1110  1101  011
+    PMEVTYPER12_EL0   = 0xdf6c, // 11  011  1110  1101  100
+    PMEVTYPER13_EL0   = 0xdf6d, // 11  011  1110  1101  101
+    PMEVTYPER14_EL0   = 0xdf6e, // 11  011  1110  1101  110
+    PMEVTYPER15_EL0   = 0xdf6f, // 11  011  1110  1101  111
+    PMEVTYPER16_EL0   = 0xdf70, // 11  011  1110  1110  000
+    PMEVTYPER17_EL0   = 0xdf71, // 11  011  1110  1110  001
+    PMEVTYPER18_EL0   = 0xdf72, // 11  011  1110  1110  010
+    PMEVTYPER19_EL0   = 0xdf73, // 11  011  1110  1110  011
+    PMEVTYPER20_EL0   = 0xdf74, // 11  011  1110  1110  100
+    PMEVTYPER21_EL0   = 0xdf75, // 11  011  1110  1110  101
+    PMEVTYPER22_EL0   = 0xdf76, // 11  011  1110  1110  110
+    PMEVTYPER23_EL0   = 0xdf77, // 11  011  1110  1110  111
+    PMEVTYPER24_EL0   = 0xdf78, // 11  011  1110  1111  000
+    PMEVTYPER25_EL0   = 0xdf79, // 11  011  1110  1111  001
+    PMEVTYPER26_EL0   = 0xdf7a, // 11  011  1110  1111  010
+    PMEVTYPER27_EL0   = 0xdf7b, // 11  011  1110  1111  011
+    PMEVTYPER28_EL0   = 0xdf7c, // 11  011  1110  1111  100
+    PMEVTYPER29_EL0   = 0xdf7d, // 11  011  1110  1111  101
+    PMEVTYPER30_EL0   = 0xdf7e, // 11  011  1110  1111  110
+
+    // Trace registers
+    TRCPRGCTLR        = 0x8808, // 10  001  0000  0001  000
+    TRCPROCSELR       = 0x8810, // 10  001  0000  0010  000
+    TRCCONFIGR        = 0x8820, // 10  001  0000  0100  000
+    TRCAUXCTLR        = 0x8830, // 10  001  0000  0110  000
+    TRCEVENTCTL0R     = 0x8840, // 10  001  0000  1000  000
+    TRCEVENTCTL1R     = 0x8848, // 10  001  0000  1001  000
+    TRCSTALLCTLR      = 0x8858, // 10  001  0000  1011  000
+    TRCTSCTLR         = 0x8860, // 10  001  0000  1100  000
+    TRCSYNCPR         = 0x8868, // 10  001  0000  1101  000
+    TRCCCCTLR         = 0x8870, // 10  001  0000  1110  000
+    TRCBBCTLR         = 0x8878, // 10  001  0000  1111  000
+    TRCTRACEIDR       = 0x8801, // 10  001  0000  0000  001
+    TRCQCTLR          = 0x8809, // 10  001  0000  0001  001
+    TRCVICTLR         = 0x8802, // 10  001  0000  0000  010
+    TRCVIIECTLR       = 0x880a, // 10  001  0000  0001  010
+    TRCVISSCTLR       = 0x8812, // 10  001  0000  0010  010
+    TRCVIPCSSCTLR     = 0x881a, // 10  001  0000  0011  010
+    TRCVDCTLR         = 0x8842, // 10  001  0000  1000  010
+    TRCVDSACCTLR      = 0x884a, // 10  001  0000  1001  010
+    TRCVDARCCTLR      = 0x8852, // 10  001  0000  1010  010
+    TRCSEQEVR0        = 0x8804, // 10  001  0000  0000  100
+    TRCSEQEVR1        = 0x880c, // 10  001  0000  0001  100
+    TRCSEQEVR2        = 0x8814, // 10  001  0000  0010  100
+    TRCSEQRSTEVR      = 0x8834, // 10  001  0000  0110  100
+    TRCSEQSTR         = 0x883c, // 10  001  0000  0111  100
+    TRCEXTINSELR      = 0x8844, // 10  001  0000  1000  100
+    TRCCNTRLDVR0      = 0x8805, // 10  001  0000  0000  101
+    TRCCNTRLDVR1      = 0x880d, // 10  001  0000  0001  101
+    TRCCNTRLDVR2      = 0x8815, // 10  001  0000  0010  101
+    TRCCNTRLDVR3      = 0x881d, // 10  001  0000  0011  101
+    TRCCNTCTLR0       = 0x8825, // 10  001  0000  0100  101
+    TRCCNTCTLR1       = 0x882d, // 10  001  0000  0101  101
+    TRCCNTCTLR2       = 0x8835, // 10  001  0000  0110  101
+    TRCCNTCTLR3       = 0x883d, // 10  001  0000  0111  101
+    TRCCNTVR0         = 0x8845, // 10  001  0000  1000  101
+    TRCCNTVR1         = 0x884d, // 10  001  0000  1001  101
+    TRCCNTVR2         = 0x8855, // 10  001  0000  1010  101
+    TRCCNTVR3         = 0x885d, // 10  001  0000  1011  101
+    TRCIMSPEC0        = 0x8807, // 10  001  0000  0000  111
+    TRCIMSPEC1        = 0x880f, // 10  001  0000  0001  111
+    TRCIMSPEC2        = 0x8817, // 10  001  0000  0010  111
+    TRCIMSPEC3        = 0x881f, // 10  001  0000  0011  111
+    TRCIMSPEC4        = 0x8827, // 10  001  0000  0100  111
+    TRCIMSPEC5        = 0x882f, // 10  001  0000  0101  111
+    TRCIMSPEC6        = 0x8837, // 10  001  0000  0110  111
+    TRCIMSPEC7        = 0x883f, // 10  001  0000  0111  111
+    TRCRSCTLR2        = 0x8890, // 10  001  0001  0010  000
+    TRCRSCTLR3        = 0x8898, // 10  001  0001  0011  000
+    TRCRSCTLR4        = 0x88a0, // 10  001  0001  0100  000
+    TRCRSCTLR5        = 0x88a8, // 10  001  0001  0101  000
+    TRCRSCTLR6        = 0x88b0, // 10  001  0001  0110  000
+    TRCRSCTLR7        = 0x88b8, // 10  001  0001  0111  000
+    TRCRSCTLR8        = 0x88c0, // 10  001  0001  1000  000
+    TRCRSCTLR9        = 0x88c8, // 10  001  0001  1001  000
+    TRCRSCTLR10       = 0x88d0, // 10  001  0001  1010  000
+    TRCRSCTLR11       = 0x88d8, // 10  001  0001  1011  000
+    TRCRSCTLR12       = 0x88e0, // 10  001  0001  1100  000
+    TRCRSCTLR13       = 0x88e8, // 10  001  0001  1101  000
+    TRCRSCTLR14       = 0x88f0, // 10  001  0001  1110  000
+    TRCRSCTLR15       = 0x88f8, // 10  001  0001  1111  000
+    TRCRSCTLR16       = 0x8881, // 10  001  0001  0000  001
+    TRCRSCTLR17       = 0x8889, // 10  001  0001  0001  001
+    TRCRSCTLR18       = 0x8891, // 10  001  0001  0010  001
+    TRCRSCTLR19       = 0x8899, // 10  001  0001  0011  001
+    TRCRSCTLR20       = 0x88a1, // 10  001  0001  0100  001
+    TRCRSCTLR21       = 0x88a9, // 10  001  0001  0101  001
+    TRCRSCTLR22       = 0x88b1, // 10  001  0001  0110  001
+    TRCRSCTLR23       = 0x88b9, // 10  001  0001  0111  001
+    TRCRSCTLR24       = 0x88c1, // 10  001  0001  1000  001
+    TRCRSCTLR25       = 0x88c9, // 10  001  0001  1001  001
+    TRCRSCTLR26       = 0x88d1, // 10  001  0001  1010  001
+    TRCRSCTLR27       = 0x88d9, // 10  001  0001  1011  001
+    TRCRSCTLR28       = 0x88e1, // 10  001  0001  1100  001
+    TRCRSCTLR29       = 0x88e9, // 10  001  0001  1101  001
+    TRCRSCTLR30       = 0x88f1, // 10  001  0001  1110  001
+    TRCRSCTLR31       = 0x88f9, // 10  001  0001  1111  001
+    TRCSSCCR0         = 0x8882, // 10  001  0001  0000  010
+    TRCSSCCR1         = 0x888a, // 10  001  0001  0001  010
+    TRCSSCCR2         = 0x8892, // 10  001  0001  0010  010
+    TRCSSCCR3         = 0x889a, // 10  001  0001  0011  010
+    TRCSSCCR4         = 0x88a2, // 10  001  0001  0100  010
+    TRCSSCCR5         = 0x88aa, // 10  001  0001  0101  010
+    TRCSSCCR6         = 0x88b2, // 10  001  0001  0110  010
+    TRCSSCCR7         = 0x88ba, // 10  001  0001  0111  010
+    TRCSSCSR0         = 0x88c2, // 10  001  0001  1000  010
+    TRCSSCSR1         = 0x88ca, // 10  001  0001  1001  010
+    TRCSSCSR2         = 0x88d2, // 10  001  0001  1010  010
+    TRCSSCSR3         = 0x88da, // 10  001  0001  1011  010
+    TRCSSCSR4         = 0x88e2, // 10  001  0001  1100  010
+    TRCSSCSR5         = 0x88ea, // 10  001  0001  1101  010
+    TRCSSCSR6         = 0x88f2, // 10  001  0001  1110  010
+    TRCSSCSR7         = 0x88fa, // 10  001  0001  1111  010
+    TRCSSPCICR0       = 0x8883, // 10  001  0001  0000  011
+    TRCSSPCICR1       = 0x888b, // 10  001  0001  0001  011
+    TRCSSPCICR2       = 0x8893, // 10  001  0001  0010  011
+    TRCSSPCICR3       = 0x889b, // 10  001  0001  0011  011
+    TRCSSPCICR4       = 0x88a3, // 10  001  0001  0100  011
+    TRCSSPCICR5       = 0x88ab, // 10  001  0001  0101  011
+    TRCSSPCICR6       = 0x88b3, // 10  001  0001  0110  011
+    TRCSSPCICR7       = 0x88bb, // 10  001  0001  0111  011
+    TRCPDCR           = 0x88a4, // 10  001  0001  0100  100
+    TRCACVR0          = 0x8900, // 10  001  0010  0000  000
+    TRCACVR1          = 0x8910, // 10  001  0010  0010  000
+    TRCACVR2          = 0x8920, // 10  001  0010  0100  000
+    TRCACVR3          = 0x8930, // 10  001  0010  0110  000
+    TRCACVR4          = 0x8940, // 10  001  0010  1000  000
+    TRCACVR5          = 0x8950, // 10  001  0010  1010  000
+    TRCACVR6          = 0x8960, // 10  001  0010  1100  000
+    TRCACVR7          = 0x8970, // 10  001  0010  1110  000
+    TRCACVR8          = 0x8901, // 10  001  0010  0000  001
+    TRCACVR9          = 0x8911, // 10  001  0010  0010  001
+    TRCACVR10         = 0x8921, // 10  001  0010  0100  001
+    TRCACVR11         = 0x8931, // 10  001  0010  0110  001
+    TRCACVR12         = 0x8941, // 10  001  0010  1000  001
+    TRCACVR13         = 0x8951, // 10  001  0010  1010  001
+    TRCACVR14         = 0x8961, // 10  001  0010  1100  001
+    TRCACVR15         = 0x8971, // 10  001  0010  1110  001
+    TRCACATR0         = 0x8902, // 10  001  0010  0000  010
+    TRCACATR1         = 0x8912, // 10  001  0010  0010  010
+    TRCACATR2         = 0x8922, // 10  001  0010  0100  010
+    TRCACATR3         = 0x8932, // 10  001  0010  0110  010
+    TRCACATR4         = 0x8942, // 10  001  0010  1000  010
+    TRCACATR5         = 0x8952, // 10  001  0010  1010  010
+    TRCACATR6         = 0x8962, // 10  001  0010  1100  010
+    TRCACATR7         = 0x8972, // 10  001  0010  1110  010
+    TRCACATR8         = 0x8903, // 10  001  0010  0000  011
+    TRCACATR9         = 0x8913, // 10  001  0010  0010  011
+    TRCACATR10        = 0x8923, // 10  001  0010  0100  011
+    TRCACATR11        = 0x8933, // 10  001  0010  0110  011
+    TRCACATR12        = 0x8943, // 10  001  0010  1000  011
+    TRCACATR13        = 0x8953, // 10  001  0010  1010  011
+    TRCACATR14        = 0x8963, // 10  001  0010  1100  011
+    TRCACATR15        = 0x8973, // 10  001  0010  1110  011
+    TRCDVCVR0         = 0x8904, // 10  001  0010  0000  100
+    TRCDVCVR1         = 0x8924, // 10  001  0010  0100  100
+    TRCDVCVR2         = 0x8944, // 10  001  0010  1000  100
+    TRCDVCVR3         = 0x8964, // 10  001  0010  1100  100
+    TRCDVCVR4         = 0x8905, // 10  001  0010  0000  101
+    TRCDVCVR5         = 0x8925, // 10  001  0010  0100  101
+    TRCDVCVR6         = 0x8945, // 10  001  0010  1000  101
+    TRCDVCVR7         = 0x8965, // 10  001  0010  1100  101
+    TRCDVCMR0         = 0x8906, // 10  001  0010  0000  110
+    TRCDVCMR1         = 0x8926, // 10  001  0010  0100  110
+    TRCDVCMR2         = 0x8946, // 10  001  0010  1000  110
+    TRCDVCMR3         = 0x8966, // 10  001  0010  1100  110
+    TRCDVCMR4         = 0x8907, // 10  001  0010  0000  111
+    TRCDVCMR5         = 0x8927, // 10  001  0010  0100  111
+    TRCDVCMR6         = 0x8947, // 10  001  0010  1000  111
+    TRCDVCMR7         = 0x8967, // 10  001  0010  1100  111
+    TRCCIDCVR0        = 0x8980, // 10  001  0011  0000  000
+    TRCCIDCVR1        = 0x8990, // 10  001  0011  0010  000
+    TRCCIDCVR2        = 0x89a0, // 10  001  0011  0100  000
+    TRCCIDCVR3        = 0x89b0, // 10  001  0011  0110  000
+    TRCCIDCVR4        = 0x89c0, // 10  001  0011  1000  000
+    TRCCIDCVR5        = 0x89d0, // 10  001  0011  1010  000
+    TRCCIDCVR6        = 0x89e0, // 10  001  0011  1100  000
+    TRCCIDCVR7        = 0x89f0, // 10  001  0011  1110  000
+    TRCVMIDCVR0       = 0x8981, // 10  001  0011  0000  001
+    TRCVMIDCVR1       = 0x8991, // 10  001  0011  0010  001
+    TRCVMIDCVR2       = 0x89a1, // 10  001  0011  0100  001
+    TRCVMIDCVR3       = 0x89b1, // 10  001  0011  0110  001
+    TRCVMIDCVR4       = 0x89c1, // 10  001  0011  1000  001
+    TRCVMIDCVR5       = 0x89d1, // 10  001  0011  1010  001
+    TRCVMIDCVR6       = 0x89e1, // 10  001  0011  1100  001
+    TRCVMIDCVR7       = 0x89f1, // 10  001  0011  1110  001
+    TRCCIDCCTLR0      = 0x8982, // 10  001  0011  0000  010
+    TRCCIDCCTLR1      = 0x898a, // 10  001  0011  0001  010
+    TRCVMIDCCTLR0     = 0x8992, // 10  001  0011  0010  010
+    TRCVMIDCCTLR1     = 0x899a, // 10  001  0011  0011  010
+    TRCITCTRL         = 0x8b84, // 10  001  0111  0000  100
+    TRCCLAIMSET       = 0x8bc6, // 10  001  0111  1000  110
+    TRCCLAIMCLR       = 0x8bce, // 10  001  0111  1001  110
+
+    // GICv3 registers
+    ICC_BPR1_EL1      = 0xc663, // 11  000  1100  1100  011
+    ICC_BPR0_EL1      = 0xc643, // 11  000  1100  1000  011
+    ICC_PMR_EL1       = 0xc230, // 11  000  0100  0110  000
+    ICC_CTLR_EL1      = 0xc664, // 11  000  1100  1100  100
+    ICC_CTLR_EL3      = 0xf664, // 11  110  1100  1100  100
+    ICC_SRE_EL1       = 0xc665, // 11  000  1100  1100  101
+    ICC_SRE_EL2       = 0xe64d, // 11  100  1100  1001  101
+    ICC_SRE_EL3       = 0xf665, // 11  110  1100  1100  101
+    ICC_IGRPEN0_EL1   = 0xc666, // 11  000  1100  1100  110
+    ICC_IGRPEN1_EL1   = 0xc667, // 11  000  1100  1100  111
+    ICC_IGRPEN1_EL3   = 0xf667, // 11  110  1100  1100  111
+    ICC_SEIEN_EL1     = 0xc668, // 11  000  1100  1101  000
+    ICC_AP0R0_EL1     = 0xc644, // 11  000  1100  1000  100
+    ICC_AP0R1_EL1     = 0xc645, // 11  000  1100  1000  101
+    ICC_AP0R2_EL1     = 0xc646, // 11  000  1100  1000  110
+    ICC_AP0R3_EL1     = 0xc647, // 11  000  1100  1000  111
+    ICC_AP1R0_EL1     = 0xc648, // 11  000  1100  1001  000
+    ICC_AP1R1_EL1     = 0xc649, // 11  000  1100  1001  001
+    ICC_AP1R2_EL1     = 0xc64a, // 11  000  1100  1001  010
+    ICC_AP1R3_EL1     = 0xc64b, // 11  000  1100  1001  011
+    ICH_AP0R0_EL2     = 0xe640, // 11  100  1100  1000  000
+    ICH_AP0R1_EL2     = 0xe641, // 11  100  1100  1000  001
+    ICH_AP0R2_EL2     = 0xe642, // 11  100  1100  1000  010
+    ICH_AP0R3_EL2     = 0xe643, // 11  100  1100  1000  011
+    ICH_AP1R0_EL2     = 0xe648, // 11  100  1100  1001  000
+    ICH_AP1R1_EL2     = 0xe649, // 11  100  1100  1001  001
+    ICH_AP1R2_EL2     = 0xe64a, // 11  100  1100  1001  010
+    ICH_AP1R3_EL2     = 0xe64b, // 11  100  1100  1001  011
+    ICH_HCR_EL2       = 0xe658, // 11  100  1100  1011  000
+    ICH_MISR_EL2      = 0xe65a, // 11  100  1100  1011  010
+    ICH_VMCR_EL2      = 0xe65f, // 11  100  1100  1011  111
+    ICH_VSEIR_EL2     = 0xe64c, // 11  100  1100  1001  100
+    ICH_LR0_EL2       = 0xe660, // 11  100  1100  1100  000
+    ICH_LR1_EL2       = 0xe661, // 11  100  1100  1100  001
+    ICH_LR2_EL2       = 0xe662, // 11  100  1100  1100  010
+    ICH_LR3_EL2       = 0xe663, // 11  100  1100  1100  011
+    ICH_LR4_EL2       = 0xe664, // 11  100  1100  1100  100
+    ICH_LR5_EL2       = 0xe665, // 11  100  1100  1100  101
+    ICH_LR6_EL2       = 0xe666, // 11  100  1100  1100  110
+    ICH_LR7_EL2       = 0xe667, // 11  100  1100  1100  111
+    ICH_LR8_EL2       = 0xe668, // 11  100  1100  1101  000
+    ICH_LR9_EL2       = 0xe669, // 11  100  1100  1101  001
+    ICH_LR10_EL2      = 0xe66a, // 11  100  1100  1101  010
+    ICH_LR11_EL2      = 0xe66b, // 11  100  1100  1101  011
+    ICH_LR12_EL2      = 0xe66c, // 11  100  1100  1101  100
+    ICH_LR13_EL2      = 0xe66d, // 11  100  1100  1101  101
+    ICH_LR14_EL2      = 0xe66e, // 11  100  1100  1101  110
+    ICH_LR15_EL2      = 0xe66f  // 11  100  1100  1101  111
+  };
+
+  // Note that these do not inherit from NamedImmMapper. This class is
+  // sufficiently different in its behaviour that I don't believe it's worth
+  // burdening the common NamedImmMapper with abstractions only needed in
+  // this one case.
+  struct SysRegMapper {
+    static const NamedImmMapper::Mapping SysRegPairs[];
+
+    const NamedImmMapper::Mapping *InstPairs;
+    size_t NumInstPairs;
+
+    SysRegMapper() {}
+    uint32_t fromString(StringRef Name, bool &Valid) const;
+    std::string toString(uint32_t Bits, bool &Valid) const;
+  };
+
+  struct MSRMapper : SysRegMapper {
+    static const NamedImmMapper::Mapping MSRPairs[];
+    MSRMapper();
+  };
+
+  struct MRSMapper : SysRegMapper {
+    static const NamedImmMapper::Mapping MRSPairs[];
+    MRSMapper();
+  };
+
+  uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
+}
+
+namespace A64TLBI {
+  enum TLBIValues {
+    Invalid = -1,          // Op0 Op1  CRn   CRm   Op2
+    IPAS2E1IS    = 0x6401, // 01  100  1000  0000  001
+    IPAS2LE1IS   = 0x6405, // 01  100  1000  0000  101
+    VMALLE1IS    = 0x4418, // 01  000  1000  0011  000
+    ALLE2IS      = 0x6418, // 01  100  1000  0011  000
+    ALLE3IS      = 0x7418, // 01  110  1000  0011  000
+    VAE1IS       = 0x4419, // 01  000  1000  0011  001
+    VAE2IS       = 0x6419, // 01  100  1000  0011  001
+    VAE3IS       = 0x7419, // 01  110  1000  0011  001
+    ASIDE1IS     = 0x441a, // 01  000  1000  0011  010
+    VAAE1IS      = 0x441b, // 01  000  1000  0011  011
+    ALLE1IS      = 0x641c, // 01  100  1000  0011  100
+    VALE1IS      = 0x441d, // 01  000  1000  0011  101
+    VALE2IS      = 0x641d, // 01  100  1000  0011  101
+    VALE3IS      = 0x741d, // 01  110  1000  0011  101
+    VMALLS12E1IS = 0x641e, // 01  100  1000  0011  110
+    VAALE1IS     = 0x441f, // 01  000  1000  0011  111
+    IPAS2E1      = 0x6421, // 01  100  1000  0100  001
+    IPAS2LE1     = 0x6425, // 01  100  1000  0100  101
+    VMALLE1      = 0x4438, // 01  000  1000  0111  000
+    ALLE2        = 0x6438, // 01  100  1000  0111  000
+    ALLE3        = 0x7438, // 01  110  1000  0111  000
+    VAE1         = 0x4439, // 01  000  1000  0111  001
+    VAE2         = 0x6439, // 01  100  1000  0111  001
+    VAE3         = 0x7439, // 01  110  1000  0111  001
+    ASIDE1       = 0x443a, // 01  000  1000  0111  010
+    VAAE1        = 0x443b, // 01  000  1000  0111  011
+    ALLE1        = 0x643c, // 01  100  1000  0111  100
+    VALE1        = 0x443d, // 01  000  1000  0111  101
+    VALE2        = 0x643d, // 01  100  1000  0111  101
+    VALE3        = 0x743d, // 01  110  1000  0111  101
+    VMALLS12E1   = 0x643e, // 01  100  1000  0111  110
+    VAALE1       = 0x443f  // 01  000  1000  0111  111
+  };
+
+  struct TLBIMapper : NamedImmMapper {
+    const static Mapping TLBIPairs[];
+
+    TLBIMapper();
+  };
+
+  static inline bool NeedsRegister(TLBIValues Val) {
+    switch (Val) {
+    case VMALLE1IS:
+    case ALLE2IS:
+    case ALLE3IS:
+    case ALLE1IS:
+    case VMALLS12E1IS:
+    case VMALLE1:
+    case ALLE2:
+    case ALLE3:
+    case ALLE1:
+    case VMALLS12E1:
+      return false;
+    default:
+      return true;
+    }
+  }
+}
+
+namespace AArch64II {
+
+  enum TOF {
+    //===--------------------------------------------------------------===//
+    // AArch64 Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    // MO_GOT - Represents a relocation referring to the GOT entry of a given
+    // symbol. Used in adrp.
+    MO_GOT,
+
+    // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
+    // GOT entry of a given symbol. Used in ldr only.
+    MO_GOT_LO12,
+
+    // MO_DTPREL_* - Represents a relocation referring to the offset from a
+    // module's dynamic thread pointer. Used in the local-dynamic TLS access
+    // model.
+    MO_DTPREL_G1,
+    MO_DTPREL_G0_NC,
+
+    // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
+    // providing the offset of a variable from the thread-pointer. Used in
+    // initial-exec TLS model where this offset is assigned in the static thread
+    // block and thus known by the dynamic linker.
+    MO_GOTTPREL,
+    MO_GOTTPREL_LO12,
+
+    // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
+    // a TLS descriptor chosen by the dynamic linker. Used for the
+    // general-dynamic and local-dynamic TLS access models where very littls is
+    // known at link-time.
+    MO_TLSDESC,
+    MO_TLSDESC_LO12,
+
+    // MO_TPREL_* - Represents a relocation referring to the offset of a
+    // variable from the thread pointer itself. Used in the local-exec TLS
+    // access model.
+    MO_TPREL_G1,
+    MO_TPREL_G0_NC,
+
+    // MO_LO12 - On a symbol operand, this represents a relocation containing
+    // lower 12 bits of the address. Used in add/sub/ldr/str.
+    MO_LO12
+  };
+}
+
+class APFloat;
+
+namespace A64Imms {
+  bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
+
+  inline bool isFPImm(const APFloat &Val) {
+    uint32_t Imm8;
+    return isFPImm(Val, Imm8);
+  }
+
+  bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
+  bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
+
+  bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+  bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+  // We sometimes want to know whether the immediate is representable with a
+  // MOVN but *not* with a MOVZ (because that would take priority).
+  bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt
new file mode 100644
index 000000000000..2c28348d7d81
--- /dev/null
+++ b/lib/Target/AArch64/Utils/CMakeLists.txt
@@ -0,0 +1,5 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Utils
+  AArch64BaseInfo.cpp
+  )
diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..1be537598ae5
--- /dev/null
+++ b/lib/Target/AArch64/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Utils
+parent = AArch64
+required_libraries = Core Support
+add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile
new file mode 100644
index 000000000000..0f4a64527123
--- /dev/null
+++ b/lib/Target/AArch64/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Utils
+
+# Hack: we need to include 'main' AArch64 target directory to grab private headers
+#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 000000000000..f0d4dbe2bfb3
--- /dev/null
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers.  Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+  struct A15SDOptimizer : public MachineFunctionPass {
+    static char ID;
+    A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM A15 S->D optimizer";
+    }
+
+  private:
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+
+    bool runOnInstruction(MachineInstr *MI);
+
+    //
+    // Instruction builder helpers
+    //
+    unsigned createDupLane(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator InsertBefore,
+                           DebugLoc DL,
+                           unsigned Reg, unsigned Lane,
+                           bool QPR=false);
+
+    unsigned createExtractSubreg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator InsertBefore,
+                                 DebugLoc DL,
+                                 unsigned DReg, unsigned Lane,
+                                 const TargetRegisterClass *TRC);
+
+    unsigned createVExt(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator InsertBefore,
+                        DebugLoc DL,
+                        unsigned Ssub0, unsigned Ssub1);
+
+    unsigned createRegSequence(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator InsertBefore,
+                               DebugLoc DL,
+                               unsigned Reg1, unsigned Reg2);
+
+    unsigned createInsertSubreg(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator InsertBefore,
+                                DebugLoc DL, unsigned DReg, unsigned Lane,
+                                unsigned ToInsert);
+
+    unsigned createImplicitDef(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator InsertBefore,
+                               DebugLoc DL);
+    
+    //
+    // Various property checkers
+    //
+    bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+    bool hasPartialWrite(MachineInstr *MI);
+    SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+    unsigned getDPRLaneFromSPR(unsigned SReg);
+
+    //
+    // Methods used for getting the definitions of partial registers
+    //
+
+    MachineInstr *elideCopies(MachineInstr *MI);
+    void elideCopiesAndPHIs(MachineInstr *MI,
+                            SmallVectorImpl<MachineInstr*> &Outs);
+
+    //
+    // Pattern optimization methods
+    //
+    unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+    unsigned optimizeSDPattern(MachineInstr *MI);
+    unsigned getPrefSPRLane(unsigned SReg);
+
+    //
+    // Sanitizing method - used to make sure if don't leave dead code around.
+    //
+    void eraseInstrWithNoUses(MachineInstr *MI);
+
+    //
+    // A map used to track the changes done by this pass.
+    //
+    std::map<MachineInstr*, unsigned> Replacements;
+    std::set<MachineInstr *> DeadInstr;
+  };
+  char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+                                  const TargetRegisterClass *TRC) {
+  if (!MO.isReg())
+    return false;
+  unsigned Reg = MO.getReg();
+
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+  else
+    return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+                                           &ARM::DPRRegClass);
+  if (DReg != ARM::NoRegister) return ARM::ssub_1;
+  return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+  if (!TRI->isVirtualRegister(SReg))
+    return getDPRLaneFromSPR(SReg);
+
+  MachineInstr *MI = MRI->getVRegDef(SReg);
+  if (!MI) return ARM::ssub_0;
+  MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+  assert(MO->isReg() && "Non register operand found!");
+  if (!MO) return ARM::ssub_0;
+
+  if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+                                    &ARM::SPRRegClass)) {
+    SReg = MI->getOperand(1).getReg();
+  }
+
+  if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+    if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+    return ARM::ssub_0;
+  }
+  return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+  SmallVector<MachineInstr *, 8> Front;
+  DeadInstr.insert(MI);
+
+  DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+  Front.push_back(MI);
+
+  while (Front.size() != 0) {
+    MI = Front.back();
+    Front.pop_back();
+
+    // MI is already known to be dead. We need to see
+    // if other instructions can also be removed.
+    for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if ((!MO.isReg()) || (!MO.isUse()))
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TRI->isVirtualRegister(Reg))
+        continue;
+      MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+      if (!Op)
+        continue;
+
+      MachineInstr *Def = Op->getParent();
+
+      // We don't need to do anything if we have already marked
+      // this instruction as being dead.
+      if (DeadInstr.find(Def) != DeadInstr.end())
+        continue;
+
+      // Check if all the uses of this instruction are marked as
+      // dead. If so, we can also mark this instruction as being
+      // dead.
+      bool IsDead = true;
+      for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+        MachineOperand &MODef = Def->getOperand(j);
+        if ((!MODef.isReg()) || (!MODef.isDef()))
+          continue;
+        unsigned DefReg = MODef.getReg();
+        if (!TRI->isVirtualRegister(DefReg)) {
+          IsDead = false;
+          break;
+        }
+        for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+                            EE = MRI->use_end();
+                            II != EE; ++II) {
+          // We don't care about self references.
+          if (&*II == Def)
+            continue;
+          if (DeadInstr.find(&*II) == DeadInstr.end()) {
+            IsDead = false;
+            break;
+          }
+        }
+      }
+
+      if (!IsDead) continue;
+
+      DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+      DeadInstr.insert(Def);
+    }
+  }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+  if (MI->isCopy()) {
+    return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+  }
+
+  if (MI->isInsertSubreg()) {
+    unsigned DPRReg = MI->getOperand(1).getReg();
+    unsigned SPRReg = MI->getOperand(2).getReg();
+
+    if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+      MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+      MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+      if (DPRMI && SPRMI) {
+        // See if the first operand of this insert_subreg is IMPLICIT_DEF
+        MachineInstr *ECDef = elideCopies(DPRMI);
+        if (ECDef != 0 && ECDef->isImplicitDef()) {
+          // Another corner case - if we're inserting something that is purely
+          // a subreg copy of a DPR, just use that DPR.
+
+          MachineInstr *EC = elideCopies(SPRMI);
+          // Is it a subreg copy of ssub_0?
+          if (EC && EC->isCopy() &&
+              EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+            DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+            // Find the thing we're subreg copying out of - is it of the same
+            // regclass as DPRMI? (i.e. a DPR or QPR).
+            unsigned FullReg = SPRMI->getOperand(1).getReg();
+            const TargetRegisterClass *TRC =
+              MRI->getRegClass(MI->getOperand(1).getReg());
+            if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+              DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+              DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+              eraseInstrWithNoUses(MI);
+              return FullReg;
+            }
+          }
+
+          return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+        }
+      }
+    }
+    return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+  }
+
+  if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+                                          &ARM::SPRRegClass)) {
+    // See if all bar one of the operands are IMPLICIT_DEF and insert the
+    // optimizer pattern accordingly.
+    unsigned NumImplicit = 0, NumTotal = 0;
+    unsigned NonImplicitReg = ~0U;
+
+    for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+      if (!MI->getOperand(I).isReg())
+        continue;
+      ++NumTotal;
+      unsigned OpReg = MI->getOperand(I).getReg();
+
+      if (!TRI->isVirtualRegister(OpReg))
+        break;
+
+      MachineInstr *Def = MRI->getVRegDef(OpReg);
+      if (!Def)
+        break;
+      if (Def->isImplicitDef())
+        ++NumImplicit;
+      else
+        NonImplicitReg = MI->getOperand(I).getReg();
+    }
+
+    if (NumImplicit == NumTotal - 1)
+      return optimizeAllLanesPattern(MI, NonImplicitReg);
+    else
+      return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+  }
+
+  assert(0 && "Unhandled update pattern!");
+  return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+  // The only way we can do a partial register update is through a COPY,
+  // INSERT_SUBREG or REG_SEQUENCE.
+  if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+    return true;
+
+  if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+                                           &ARM::SPRRegClass))
+    return true;
+
+  if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+    return true;
+
+  return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+  if (!MI->isFullCopy())
+    return MI;
+  if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+    return NULL;
+  MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+  if (!Def)
+    return NULL;
+  return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+                                        SmallVectorImpl<MachineInstr*> &Outs) {
+   // Looking through PHIs may create loops so we need to track what
+   // instructions we have visited before.
+   std::set<MachineInstr *> Reached;
+   SmallVector<MachineInstr *, 8> Front;
+   Front.push_back(MI);
+   while (Front.size() != 0) {
+     MI = Front.back();
+     Front.pop_back();
+
+     // If we have already explored this MachineInstr, ignore it.
+     if (Reached.find(MI) != Reached.end())
+       continue;
+     Reached.insert(MI);
+     if (MI->isPHI()) {
+       for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+         unsigned Reg = MI->getOperand(I).getReg();
+         if (!TRI->isVirtualRegister(Reg)) {
+           continue;
+         }
+         MachineInstr *NewMI = MRI->getVRegDef(Reg);
+         if (!NewMI)
+           continue;
+         Front.push_back(NewMI);
+       }
+     } else if (MI->isFullCopy()) {
+       if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+         continue;
+       MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+       if (!NewMI)
+         continue;
+       Front.push_back(NewMI);
+     } else {
+       DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+       Outs.push_back(MI);
+     }
+   }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+  if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+      MI->isKill())
+    return SmallVector<unsigned, 8>();
+
+  SmallVector<unsigned, 8> Defs;
+  for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+        !usesRegClass(MO, &ARM::QPRRegClass))
+      continue;
+
+    Defs.push_back(MO.getReg());
+  }
+  return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator InsertBefore,
+                              DebugLoc DL,
+                              unsigned Reg, unsigned Lane, bool QPR) {
+  unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+                                                  &ARM::DPRRegClass);
+  AddDefaultPred(BuildMI(MBB,
+                         InsertBefore,
+                         DL,
+                         TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+                         Out)
+                   .addReg(Reg)
+                   .addImm(Lane));
+ 
+  return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator InsertBefore,
+                                    DebugLoc DL,
+                                    unsigned DReg, unsigned Lane,
+                                    const TargetRegisterClass *TRC) {
+  unsigned Out = MRI->createVirtualRegister(TRC);
+  BuildMI(MBB,
+          InsertBefore,
+          DL,
+          TII->get(TargetOpcode::COPY), Out)
+    .addReg(DReg, 0, Lane);
+
+  return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator InsertBefore,
+                                  DebugLoc DL,
+                                  unsigned Reg1, unsigned Reg2) {
+  unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+  BuildMI(MBB,
+          InsertBefore,
+          DL,
+          TII->get(TargetOpcode::REG_SEQUENCE), Out)
+    .addReg(Reg1)
+    .addImm(ARM::dsub_0)
+    .addReg(Reg2)
+    .addImm(ARM::dsub_1);
+  return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator InsertBefore,
+                           DebugLoc DL,
+                           unsigned Ssub0, unsigned Ssub1) {
+  unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+  AddDefaultPred(BuildMI(MBB,
+                         InsertBefore,
+                         DL,
+                         TII->get(ARM::VEXTd32), Out)
+                   .addReg(Ssub0)
+                   .addReg(Ssub1)
+                   .addImm(1));
+  return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator InsertBefore,
+                                   DebugLoc DL, unsigned DReg, unsigned Lane,
+                                   unsigned ToInsert) {
+  unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+  BuildMI(MBB,
+          InsertBefore,
+          DL,
+          TII->get(TargetOpcode::INSERT_SUBREG), Out)
+    .addReg(DReg)
+    .addReg(ToInsert)
+    .addImm(Lane);
+
+  return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator InsertBefore,
+                                  DebugLoc DL) {
+  unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+  BuildMI(MBB,
+          InsertBefore,
+          DL,
+          TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+  return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+  MachineBasicBlock::iterator InsertPt(MI);
+  DebugLoc DL = MI->getDebugLoc();
+  MachineBasicBlock &MBB = *MI->getParent();
+  InsertPt++;
+  unsigned Out;
+
+  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+    unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+                                         ARM::dsub_0, &ARM::DPRRegClass);
+    unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+                                         ARM::dsub_1, &ARM::DPRRegClass);
+
+    unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+    unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+    Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+    unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+    unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+    Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+    Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+  } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+    unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+    unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+    Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+  } else {
+    assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+           "Found unexpected regclass!");
+
+    unsigned PrefLane = getPrefSPRLane(Reg);
+    unsigned Lane;
+    switch (PrefLane) {
+      case ARM::ssub_0: Lane = 0; break;
+      case ARM::ssub_1: Lane = 1; break;
+      default: llvm_unreachable("Unknown preferred lane!");
+    }
+
+    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+    Out = createImplicitDef(MBB, InsertPt, DL);
+    Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+    Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+    eraseInstrWithNoUses(MI);
+  }
+  return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+  // We look for instructions that write S registers that are then read as
+  // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+  // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+  // merge two SPR values to form a DPR register.  In order avoid false
+  // positives we make sure that there is an SPR producer so we look past
+  // COPY and PHI nodes to find it.
+  //
+  // The best code pattern for when an SPR producer is going to be used by a
+  // DPR or QPR consumer depends on whether the other lanes of the
+  // corresponding DPR/QPR are currently defined.
+  //
+  // We can handle these efficiently, depending on the type of
+  // pseudo-instruction that is producing the pattern
+  //
+  //   * COPY:          * VDUP all lanes and merge the results together
+  //                      using VEXTs.
+  //
+  //   * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+  //                      lane, and the other lane(s) of the DPR/QPR register
+  //                      that we are inserting in are undefined, use the
+  //                      original DPR/QPR value. 
+  //                    * Otherwise, fall back on the same stategy as COPY.
+  //
+  //   * REG_SEQUENCE:  * If all except one of the input operands are
+  //                      IMPLICIT_DEFs, insert the VDUP pattern for just the
+  //                      defined input operand
+  //                    * Otherwise, fall back on the same stategy as COPY.
+  //
+
+  // First, get all the reads of D-registers done by this instruction.
+  SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+  bool Modified = false;
+
+  for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+     I != E; ++I) {
+    // Follow the def-use chain for this DPR through COPYs, and also through
+    // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+    // we can end up with multiple defs of this DPR.
+
+    SmallVector<MachineInstr *, 8> DefSrcs;
+    if (!TRI->isVirtualRegister(*I))
+      continue;
+    MachineInstr *Def = MRI->getVRegDef(*I);
+    if (!Def)
+      continue;
+
+    elideCopiesAndPHIs(Def, DefSrcs);
+
+    for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+      EE = DefSrcs.end(); II != EE; ++II) {
+      MachineInstr *MI = *II;
+
+      // If we've already analyzed and replaced this operand, don't do
+      // anything.
+      if (Replacements.find(MI) != Replacements.end())
+        continue;
+
+      // Now, work out if the instruction causes a SPR->DPR dependency.
+      if (!hasPartialWrite(MI))
+        continue;
+
+      // Collect all the uses of this MI's DPR def for updating later.
+      SmallVector<MachineOperand*, 8> Uses;
+      unsigned DPRDefReg = MI->getOperand(0).getReg();
+      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+             E = MRI->use_end(); I != E; ++I)
+        Uses.push_back(&I.getOperand());
+
+      // We can optimize this.
+      unsigned NewReg = optimizeSDPattern(MI);
+
+      if (NewReg != 0) {
+        Modified = true;
+        for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+               E = Uses.end(); I != E; ++I) {
+          DEBUG(dbgs() << "Replacing operand "
+                       << **I << " with "
+                       << PrintReg(NewReg) << "\n");
+          (*I)->substVirtReg(NewReg, 0, *TRI);
+        }
+      }
+      Replacements[MI] = NewReg;
+    }
+  }
+  return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+  TRI = Fn.getTarget().getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+  bool Modified = false;
+
+  DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+  DeadInstr.clear();
+  Replacements.clear();
+
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+
+    for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+      MI != ME;) {
+      Modified |= runOnInstruction(MI++);
+    }
+ 
+  }
+
+  for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+                                            E = DeadInstr.end();
+                                            I != E; ++I) {
+    (*I)->eraseFromParent();
+  }
+
+  return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+  return new A15SDOptimizer();
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 1446bbbb8e7c..80e5f37eb086 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
 FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE);
 
+FunctionPass *createA15SDOptimizerPass();
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
 FunctionPass *createARMGlobalBaseRegPass();
@@ -44,6 +45,9 @@ FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 23974ad9052c..68380847a022 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
                                                "AvoidCPSRPartialUpdate", "true",
                                  "Avoid CPSR partial update for OOO execution">;
 
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+                                            "AvoidMOVsShifterOperand", "true",
+                                "Avoid movs instructions with shifter operand">;
+
 // Some processors perform return stack prediction. CodeGen should avoid issue
 // "normal" call instructions to callees which do not return.
 def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
@@ -106,6 +110,11 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
 def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
                                      "Is microcontroller profile ('M' series)">;
 
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+                                       "NaCl trap">;
+
 // ARM ISAs.
 def HasV4TOps   : SubtargetFeature<"v4t", "HasV4TOps", "true",
                                    "Support ARM v4T instructions">;
@@ -132,11 +141,14 @@ def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
 include "ARMSchedule.td"
 
 // ARM processor families.
+def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+                                   "Cortex-A5 ARM processors",
+                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                    FeatureVMLxForwarding, FeatureT2XtPk]>;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
-                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
-                                    FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
-                                    FeatureT2XtPk]>;
+                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                    FeatureVMLxForwarding, FeatureT2XtPk]>;
 def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
                                    "Cortex-A9 ARM processors",
                                    [FeatureVMLxForwarding,
@@ -147,6 +159,7 @@ def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                    [FeatureNEONForFP, FeatureT2XtPk,
                                     FeatureVFP4, FeatureMP, FeatureHWDiv,
                                     FeatureHWDivARM, FeatureAvoidPartialCPSR,
+                                    FeatureAvoidMOVsShOp,
                                     FeatureHasSlowFPVMLx]>;
 
 // FIXME: It has not been determined if A15 has these features.
@@ -154,6 +167,12 @@ def ProcA15      : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
                                    "Cortex-A15 ARM processors",
                                    [FeatureT2XtPk, FeatureFP16,
                                     FeatureAvoidPartialCPSR]>;
+def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+                                   "Cortex-R5 ARM processors",
+                                   [FeatureSlowFPBrcc, FeatureHWDivARM,
+                                    FeatureHasSlowFPVMLx,
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureT2XtPk]>;
 
 class ProcNoItin<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
@@ -219,6 +238,11 @@ def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
                                                        FeatureDSPThumb2]>;
 
 // V7a Processors.
+// FIXME: A5 has currently the same Schedule model as A8
+def : ProcessorModel<"cortex-a5",   CortexA8Model,
+                                    [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureVFP4, FeatureDSPThumb2,
+                                     FeatureHasRAS]>;
 def : ProcessorModel<"cortex-a8",   CortexA8Model,
                                     [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureHasRAS]>;
@@ -233,6 +257,11 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
 def : ProcessorModel<"cortex-a15",   CortexA9Model,
                                     [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureHasRAS]>;
+// FIXME: R5 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r5",   CortexA8Model,
+                                    [ProcR5, HasV7Ops, FeatureDB,
+                                     FeatureVFP3, FeatureDSPThumb2,
+                                     FeatureHasRAS]>;
 
 // V7M Processors.
 def : ProcNoItin<"cortex-m3",       [HasV7Ops,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index d439d1d7cb7e..13ec2087938a 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -25,30 +25,33 @@
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
 #include <cctype>
 using namespace llvm;
 
@@ -181,7 +184,7 @@ namespace {
       const size_t TagHeaderSize = 1 + 4;
 
       Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
-      Streamer.EmitBytes(CurrentVendor, 0);
+      Streamer.EmitBytes(CurrentVendor);
       Streamer.EmitIntValue(0, 1); // '\0'
 
       Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
@@ -191,14 +194,14 @@ namespace {
       // emit each field as its type (ULEB or String)
       for (unsigned int i=0; i<Contents.size(); ++i) {
         AttributeItemType item = Contents[i];
-        Streamer.EmitULEB128IntValue(item.Tag, 0);
+        Streamer.EmitULEB128IntValue(item.Tag);
         switch (item.Type) {
         default: llvm_unreachable("Invalid attribute type");
         case AttributeItemType::NumericAttribute:
-          Streamer.EmitULEB128IntValue(item.IntValue, 0);
+          Streamer.EmitULEB128IntValue(item.IntValue);
           break;
         case AttributeItemType::TextAttribute:
-          Streamer.EmitBytes(item.StringValue.upper(), 0);
+          Streamer.EmitBytes(item.StringValue.upper());
           Streamer.EmitIntValue(0, 1); // '\0'
           break;
         }
@@ -339,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
     assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    if(ARM::GPRPairRegClass.contains(Reg)) {
+      const MachineFunction &MF = *MI->getParent()->getParent();
+      const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+      Reg = TRI->getSubReg(Reg, ARM::gsub_0);
+    }
     O << ARMInstPrinter::getRegisterName(Reg);
     break;
   }
@@ -398,7 +406,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
 }
 
 
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
   SmallString<60> Name;
   raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
     << getFunctionNumber();
@@ -527,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       const MachineOperand &MO = MI->getOperand(OpNum);
       if (!MO.isReg())
         return true;
-      const TargetRegisterClass &RC = ARM::GPRRegClass;
       const MachineFunction &MF = *MI->getParent()->getParent();
       const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
-
-      unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
-      RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
-
-      unsigned Reg = RC.getRegister(RegIdx);
+      unsigned Reg = MO.getReg();
+      if(!ARM::GPRPairRegClass.contains(Reg))
+        return false;
+      Reg = TRI->getSubReg(Reg, ARM::gsub_1);
       O << ARMInstPrinter::getRegisterName(Reg);
       return false;
     }
@@ -656,7 +662,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
         if (MCSym.getInt())
           // External to current translation unit.
-          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+          OutStreamer.EmitIntValue(0, 4/*size*/);
         else
           // Internal to current translation unit.
           //
@@ -666,7 +672,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
           // We need to fill in the value for the NLP in those cases.
           OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
                                                         OutContext),
-                                4/*size*/, 0/*addrspace*/);
+                                4/*size*/);
       }
 
       Stubs.clear();
@@ -684,7 +690,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
         OutStreamer.EmitValue(MCSymbolRefExpr::
                               Create(Stubs[i].second.getPointer(),
                                      OutContext),
-                              4/*size*/, 0/*addrspace*/);
+                              4/*size*/);
       }
 
       Stubs.clear();
@@ -698,6 +704,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     // generates code that does this, it is always safe to set.
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
+  // FIXME: This should eventually end up somewhere else where more
+  // intelligent flag decisions can be made. For now we are just maintaining
+  // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+  if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
+    MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1051,12 +1062,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
                                                       OutContext);
     // If this isn't a TBB or TBH, the entries are direct branch instructions.
     if (OffsetWidth == 4) {
-      MCInst BrInst;
-      BrInst.setOpcode(ARM::t2B);
-      BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
-      BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      BrInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(BrInst);
+      OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B)
+        .addExpr(MBBSymbolExpr)
+        .addImm(ARMCC::AL)
+        .addReg(0));
       continue;
     }
     // Otherwise it's an offset from the dispatch instruction. Construct an
@@ -1100,18 +1109,6 @@ void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   printOperand(MI, NOps-2, OS);
 }
 
-static void populateADROperands(MCInst &Inst, unsigned Dest,
-                                const MCSymbol *Label,
-                                unsigned pred, unsigned ccreg,
-                                MCContext &Ctx) {
-  const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
-  Inst.addOperand(MCOperand::CreateReg(Dest));
-  Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
-  // Add predicate operands.
-  Inst.addOperand(MCOperand::CreateImm(pred));
-  Inst.addOperand(MCOperand::CreateReg(ccreg));
-}
-
 void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   assert(MI->getFlag(MachineInstr::FrameSetup) &&
       "Only instruction which are involved into frame setup code are allowed");
@@ -1288,129 +1285,104 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::tLEApcrel:
   case ARM::t2LEApcrel: {
     // FIXME: Need to also handle globals and externals
-    MCInst TmpInst;
-    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
-                      : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
-                         : ARM::ADR));
-    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
-                        GetCPISymbol(MI->getOperand(1).getIndex()),
-                        MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
-                        OutContext);
-    OutStreamer.EmitInstruction(TmpInst);
+    MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
+    OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+                                              ARM::t2LEApcrel ? ARM::t2ADR
+                  : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+                     : ARM::ADR))
+      .addReg(MI->getOperand(0).getReg())
+      .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+      // Add predicate operands.
+      .addImm(MI->getOperand(2).getImm())
+      .addReg(MI->getOperand(3).getReg()));
     return;
   }
   case ARM::LEApcrelJT:
   case ARM::tLEApcrelJT:
   case ARM::t2LEApcrelJT: {
-    MCInst TmpInst;
-    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
-                      : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
-                         : ARM::ADR));
-    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
-                      GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
-                                                  MI->getOperand(2).getImm()),
-                      MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
-                      OutContext);
-    OutStreamer.EmitInstruction(TmpInst);
+    MCSymbol *JTIPICSymbol =
+      GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+                                  MI->getOperand(2).getImm());
+    OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+                                              ARM::t2LEApcrelJT ? ARM::t2ADR
+                  : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+                     : ARM::ADR))
+      .addReg(MI->getOperand(0).getReg())
+      .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+      // Add predicate operands.
+      .addImm(MI->getOperand(3).getImm())
+      .addReg(MI->getOperand(4).getReg()));
     return;
   }
   // Darwin call instructions are just normal call instructions with different
   // clobber semantics (they clobber R9).
   case ARM::BX_CALL: {
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+      .addReg(ARM::LR)
+      .addReg(ARM::PC)
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // Add 's' bit operand (always reg0 for this)
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::BX);
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+      .addReg(MI->getOperand(0).getReg()));
     return;
   }
   case ARM::tBX_CALL: {
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+      .addReg(ARM::LR)
+      .addReg(ARM::PC)
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tBX);
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+      .addReg(MI->getOperand(0).getReg())
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addImm(ARMCC::AL)
+      .addReg(0));
     return;
   }
   case ARM::BMOVPCRX_CALL: {
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+      .addReg(ARM::LR)
+      .addReg(ARM::PC)
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // Add 's' bit operand (always reg0 for this)
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(0).getReg())
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // Add 's' bit operand (always reg0 for this)
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addReg(0));
     return;
   }
   case ARM::BMOVPCB_CALL: {
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+      .addReg(ARM::LR)
+      .addReg(ARM::PC)
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // Add 's' bit operand (always reg0 for this)
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::Bcc);
-      const GlobalValue *GV = MI->getOperand(0).getGlobal();
-      MCSymbol *GVSym = Mang->getSymbol(GV);
-      const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
-      TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+      .addReg(0));
+
+    const GlobalValue *GV = MI->getOperand(0).getGlobal();
+    MCSymbol *GVSym = Mang->getSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
+      .addExpr(GVSymExpr)
       // Add predicate operands.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addImm(ARMCC::AL)
+      .addReg(0));
     return;
   }
   case ARM::MOVi16_ga_pcrel:
@@ -1498,15 +1470,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                           OutContext));
 
     // Form and emit the add.
-    MCInst AddInst;
-    AddInst.setOpcode(ARM::tADDhirr);
-    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    // Add predicate operands.
-    AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    AddInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(AddInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(ARM::PC)
+      // Add predicate operands.
+      .addImm(ARMCC::AL)
+      .addReg(0));
     return;
   }
   case ARM::PICADD: {
@@ -1521,17 +1491,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                           OutContext));
 
     // Form and emit the add.
-    MCInst AddInst;
-    AddInst.setOpcode(ARM::ADDrr);
-    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-    // Add predicate operands.
-    AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
-    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
-    // Add 's' bit operand (always reg0 for this)
-    AddInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(AddInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(1).getReg())
+      // Add predicate operands.
+      .addImm(MI->getOperand(3).getImm())
+      .addReg(MI->getOperand(4).getReg())
+      // Add 's' bit operand (always reg0 for this)
+      .addReg(0));
     return;
   }
   case ARM::PICSTR:
@@ -1567,16 +1535,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
     case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
     }
-    MCInst LdStInst;
-    LdStInst.setOpcode(Opcode);
-    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-    LdStInst.addOperand(MCOperand::CreateImm(0));
-    // Add predicate operands.
-    LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
-    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
-    OutStreamer.EmitInstruction(LdStInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(Opcode)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(0)
+      // Add predicate operands.
+      .addImm(MI->getOperand(3).getImm())
+      .addReg(MI->getOperand(4).getReg()));
 
     return;
   }
@@ -1606,29 +1572,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case ARM::t2BR_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM::tMOVr);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(0).getReg())
+      // Add predicate operands.
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
     // Output the data for the jump table itself
     EmitJump2Table(MI);
     return;
   }
   case ARM::t2TBB_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
-    MCInst TmpInst;
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB)
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(0).getReg())
+      // Add predicate operands.
+      .addImm(ARMCC::AL)
+      .addReg(0));
 
-    TmpInst.setOpcode(ARM::t2TBB);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
     // Output the data for the jump table itself
     EmitJump2Table(MI);
     // Make sure the next instruction is 2-byte aligned.
@@ -1637,15 +1600,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case ARM::t2TBH_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
-    MCInst TmpInst;
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH)
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(0).getReg())
+      // Add predicate operands.
+      .addImm(ARMCC::AL)
+      .addReg(0));
 
-    TmpInst.setOpcode(ARM::t2TBH);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
     // Output the data for the jump table itself
     EmitJump2Table(MI);
     return;
@@ -1705,17 +1666,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::BR_JTadd: {
     // Lower and emit the instruction itself, then the jump table following it.
     // add pc, target, idx
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM::ADDrr);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    // Add 's' bit operand (always reg0 for this)
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+      .addReg(ARM::PC)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      // Add predicate operands.
+      .addImm(ARMCC::AL)
+      .addReg(0)
+      // Add 's' bit operand (always reg0 for this)
+      .addReg(0));
 
     // Output the data for the jump table itself
     EmitJumpTable(MI);
@@ -1733,6 +1692,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     break;
   }
+  case ARM::TRAPNaCl: {
+    //.long 0xe7fedef0 @ trap
+    uint32_t Val = 0xe7fedef0UL;
+    OutStreamer.AddComment("trap");
+    OutStreamer.EmitIntValue(Val, 4);
+    return;
+  }
   case ARM::tTRAP: {
     // Non-Darwin binutils don't yet support the "trap" mnemonic.
     // FIXME: Remove this special case when they do.
@@ -1759,75 +1725,57 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ValReg = MI->getOperand(1).getReg();
     MCSymbol *Label = GetARMSJLJEHLabel();
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    OutStreamer.AddComment("eh_setjmp begin");
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+      .addReg(ValReg)
+      .addReg(ARM::PC)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.AddComment("eh_setjmp begin");
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tADDi3);
-      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3)
+      .addReg(ValReg)
       // 's' bit operand
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
-      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
-      TmpInst.addOperand(MCOperand::CreateImm(7));
+      .addReg(ARM::CPSR)
+      .addReg(ValReg)
+      .addImm(7)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tSTRi);
-      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi)
+      .addReg(ValReg)
+      .addReg(SrcReg)
       // The offset immediate is #4. The operand value is scaled by 4 for the
       // tSTR instruction.
-      TmpInst.addOperand(MCOperand::CreateImm(1));
+      .addImm(1)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVi8);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
-      TmpInst.addOperand(MCOperand::CreateImm(0));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+      .addReg(ARM::R0)
+      .addReg(ARM::CPSR)
+      .addImm(0)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tB);
-      TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVi8);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
-      TmpInst.addOperand(MCOperand::CreateImm(1));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB)
+      .addExpr(SymbolExpr)
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.AddComment("eh_setjmp end");
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+      .addReg(ARM::R0)
+      .addReg(ARM::CPSR)
+      .addImm(1)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.AddComment("eh_setjmp end");
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
     OutStreamer.EmitLabel(Label);
     return;
   }
@@ -1843,69 +1791,53 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ValReg = MI->getOperand(1).getReg();
 
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::ADDri);
-      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      TmpInst.addOperand(MCOperand::CreateImm(8));
+    OutStreamer.AddComment("eh_setjmp begin");
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+      .addReg(ValReg)
+      .addReg(ARM::PC)
+      .addImm(8)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // 's' bit operand (always reg0 for this).
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.AddComment("eh_setjmp begin");
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::STRi12);
-      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateImm(4));
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12)
+      .addReg(ValReg)
+      .addReg(SrcReg)
+      .addImm(4)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVi);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
-      TmpInst.addOperand(MCOperand::CreateImm(0));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+      .addReg(ARM::R0)
+      .addImm(0)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // 's' bit operand (always reg0 for this).
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::ADDri);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      TmpInst.addOperand(MCOperand::CreateImm(0));
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+      .addReg(ARM::PC)
+      .addReg(ARM::PC)
+      .addImm(0)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // 's' bit operand (always reg0 for this).
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::MOVi);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
-      TmpInst.addOperand(MCOperand::CreateImm(1));
+      .addReg(0));
+
+    OutStreamer.AddComment("eh_setjmp end");
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+      .addReg(ARM::R0)
+      .addImm(1)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      .addImm(ARMCC::AL)
+      .addReg(0)
       // 's' bit operand (always reg0 for this).
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.AddComment("eh_setjmp end");
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addReg(0));
     return;
   }
   case ARM::Int_eh_sjlj_longjmp: {
@@ -1915,48 +1847,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // bx $scratch
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ScratchReg = MI->getOperand(1).getReg();
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::LDRi12);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateImm(8));
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+      .addReg(ARM::SP)
+      .addReg(SrcReg)
+      .addImm(8)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::LDRi12);
-      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateImm(4));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+      .addReg(ScratchReg)
+      .addReg(SrcReg)
+      .addImm(4)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::LDRi12);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateImm(0));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+      .addReg(ARM::R7)
+      .addReg(SrcReg)
+      .addImm(0)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::BX);
-      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+      .addReg(ScratchReg)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addImm(ARMCC::AL)
+      .addReg(0));
     return;
   }
   case ARM::tInt_eh_sjlj_longjmp: {
@@ -1967,60 +1886,44 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // bx $scratch
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ScratchReg = MI->getOperand(1).getReg();
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tLDRi);
-      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+      .addReg(ScratchReg)
+      .addReg(SrcReg)
       // The offset immediate is #8. The operand value is scaled by 4 for the
       // tLDR instruction.
-      TmpInst.addOperand(MCOperand::CreateImm(2));
+      .addImm(2)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVr);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
-      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+      .addReg(ARM::SP)
+      .addReg(ScratchReg)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tLDRi);
-      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateImm(1));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+      .addReg(ScratchReg)
+      .addReg(SrcReg)
+      .addImm(1)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tLDRi);
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
-      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateImm(0));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+      .addReg(ARM::R7)
+      .addReg(SrcReg)
+      .addImm(0)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
-    {
-      MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tBX);
-      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      .addImm(ARMCC::AL)
+      .addReg(0));
+
+    OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+      .addReg(ScratchReg)
       // Predicate.
-      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
-      OutStreamer.EmitInstruction(TmpInst);
-    }
+      .addImm(ARMCC::AL)
+      .addReg(0));
     return;
   }
   }
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index c875b2cbdffe..c945e4f28699 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===//
+//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// ARM Assembly printer class.
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef ARMASMPRINTER_H
 #define ARMASMPRINTER_H
@@ -54,7 +50,7 @@ public:
     }
 
   virtual const char *getPassName() const LLVM_OVERRIDE {
-    return "ARM Assembly Printer";
+    return "ARM Assembly / Object Emitter";
   }
 
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
@@ -121,7 +117,7 @@ private:
   MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
   MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
 
-  MCSymbol *GetARMSJLJEHLabel(void) const;
+  MCSymbol *GetARMSJLJEHLabel() const;
 
   MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
 
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 3c7bb24f42f8..9e68ff44890e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -18,9 +18,7 @@
 #include "ARMHazardRecognizer.h"
 #include "ARMMachineFunctionInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,12 +27,14 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "ARMGenInstrInfo.inc"
@@ -106,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
     const InstrItineraryData *II = TM->getInstrItineraryData();
     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
   }
-  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
 }
 
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -115,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   if (Subtarget.isThumb2() || Subtarget.hasVFP2())
     return (ScheduleHazardRecognizer *)
       new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
-  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
 
 MachineInstr *
@@ -464,8 +464,9 @@ PredicateInstruction(MachineInstr *MI,
   unsigned Opc = MI->getOpcode();
   if (isUncondBranchOpcode(Opc)) {
     MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
-    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
-    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+      .addImm(Pred[0].getImm())
+      .addReg(Pred[1].getReg());
     return true;
   }
 
@@ -1124,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
   // changed into a VORR that can go down the NEON pipeline.
-  if (!WidenVMOVS || !MI->isCopy())
+  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
     return false;
 
   // Look for a copy between even S-registers.  That is where we keep floats
@@ -1154,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
 
   // All clear, widen the COPY.
   DEBUG(dbgs() << "widening:    " << *MI);
+  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
 
   // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
   // or some other super-register.
@@ -1165,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
   MI->setDesc(get(ARM::VMOVD));
   MI->getOperand(0).setReg(DstRegD);
   MI->getOperand(1).setReg(SrcRegD);
-  AddDefaultPred(MachineInstrBuilder(MI));
+  AddDefaultPred(MIB);
 
   // We are now reading SrcRegD instead of SrcRegS.  This may upset the
   // register scavenger and machine verifier, so we need to indicate that we
   // are reading an undefined value from SrcRegD, but a proper value from
   // SrcRegS.
   MI->getOperand(1).setIsUndef();
-  MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit);
+  MIB.addReg(SrcRegS, RegState::Implicit);
 
   // SrcRegD may actually contain an unrelated value in the ssub_1
   // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
@@ -1269,7 +1271,7 @@ reMaterialize(MachineBasicBlock &MBB,
 
 MachineInstr *
 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
-  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+  MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
   switch(Orig->getOpcode()) {
   case ARM::tLDRpci_pic:
   case ARM::t2LDRpci_pic: {
@@ -1373,6 +1375,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
 /// only return true if the base pointers are the same and the only differences
 /// between the two addresses is the offset. It also returns the offsets by
 /// reference.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
                                                int64_t &Offset1,
                                                int64_t &Offset2) const {
@@ -1447,6 +1452,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
 /// from the common base address. It returns true if it decides it's desirable
 /// to schedule the two loads together. "NumLoads" is the number of loads that
 /// have already been scheduled after Load1.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                                int64_t Offset1, int64_t Offset2,
                                                unsigned NumLoads) const {
@@ -1598,7 +1606,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     // MOVCC AL can't be inverted. Shouldn't happen.
     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
       return NULL;
-    MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
     if (!MI)
       return NULL;
     // After swapping the MOVCC operands, also invert the condition.
@@ -1607,7 +1615,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     return MI;
   }
   }
-  return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+  return TargetInstrInfo::commuteInstruction(MI, NewMI);
 }
 
 /// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1710,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
   // same register as operand 0.
   MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
   FalseReg.setImplicit();
-  NewMI->addOperand(FalseReg);
+  NewMI.addOperand(FalseReg);
   NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
 
   // The caller will erase MI, but not DefMI.
@@ -2711,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
   case ARM::t2STMDB_UPD: {
     unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
     if (Subtarget.isSwift()) {
-      // rdar://8402126
       int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
       switch (Opc) {
       default: break;
@@ -3321,8 +3328,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     // instructions).
     if (Latency > 0 && Subtarget.isThumb2()) {
       const MachineFunction *MF = DefMI->getParent()->getParent();
-      if (MF->getFunction()->getFnAttributes().
-            hasAttribute(Attributes::OptimizeForSize))
+      if (MF->getFunction()->getAttributes().
+            hasAttribute(AttributeSet::FunctionIndex,
+                         Attribute::OptimizeForSize))
         --Latency;
     }
     return Latency;
@@ -3726,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
   if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
 
-  // A9-like cores are particularly picky about mixing the two and want these
+  // CortexA9 is particularly picky about mixing the two and wants these
   // converted.
-  if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
       (MI->getOpcode() == ARM::VMOVRS ||
        MI->getOpcode() == ARM::VMOVSR ||
        MI->getOpcode() == ARM::VMOVS))
@@ -3813,7 +3821,7 @@ void
 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   unsigned DstReg, SrcReg, DReg;
   unsigned Lane;
-  MachineInstrBuilder MIB(MI);
+  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   const TargetRegisterInfo *TRI = &getRegisterInfo();
   switch (MI->getOpcode()) {
     default:
@@ -4015,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
 //
 // FCONSTD can be used as a dependency-breaking instruction.
-
-
 unsigned ARMBaseInstrInfo::
 getPartialRegUpdateClearance(const MachineInstr *MI,
                              unsigned OpNum,
                              const TargetRegisterInfo *TRI) const {
-  // Only Swift has partial register update problems.
-  if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+  if (!SwiftPartialUpdateClearance ||
+      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
     return 0;
 
   assert(TRI && "Need TRI instance");
@@ -4038,7 +4044,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
   case ARM::VLDRS:
   case ARM::FCONSTS:
   case ARM::VMOVSR:
-    // rdar://problem/8791586
   case ARM::VMOVv8i8:
   case ARM::VMOVv4i16:
   case ARM::VMOVv2i32:
@@ -4049,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
 
     // Explicitly reads the dependency.
   case ARM::VLD1LNd32:
-    UseOp = 1;
+    UseOp = 3;
     break;
   default:
     return 0;
@@ -4118,3 +4123,15 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
 bool ARMBaseInstrInfo::hasNOP() const {
   return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
 }
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+  unsigned ShOpVal = MI->getOperand(3).getImm();
+  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+      ((ShImm == 1 || ShImm == 2) &&
+       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+    return true;
+
+  return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 6f38e35124eb..7c107bb41951 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,10 +15,10 @@
 #define ARMBASEINSTRUCTIONINFO_H
 
 #include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "ARMGenInstrInfo.inc"
@@ -314,6 +314,10 @@ public:
   bool canCauseFpMLxStall(unsigned Opcode) const {
     return MLxHazardOpcodes.count(Opcode);
   }
+
+  /// Returns true if the instruction has a shift by immediate that can be
+  /// executed in one cycle less.
+  bool isSwiftFastImmShift(const MachineInstr *MI) const;
 };
 
 static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e5b300fc7792..b6b27f849a23 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -18,44 +18,34 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "ARMGenRegisterInfo.inc"
 
 using namespace llvm;
 
-static cl::opt<bool>
-ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
-          cl::desc("Force use of virtual base registers for stack load/store"));
-static cl::opt<bool>
-EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
-          cl::desc("Enable pre-regalloc stack frame index allocation"));
-static cl::opt<bool>
-EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
-          cl::desc("Enable use of a base pointer for complex stack frames"));
-
 ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
                                          const ARMSubtarget &sti)
-  : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti),
+  : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti),
     FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
     BasePtr(ARM::R6) {
 }
@@ -173,154 +163,63 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-/// getRawAllocationOrder - Returns the register allocation order for a
-/// specified register class with a target-dependent hint.
-ArrayRef<uint16_t>
-ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
-                                           unsigned HintType, unsigned HintReg,
-                                           const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  // Alternative register allocation orders when favoring even / odd registers
-  // of register pairs.
-
-  // No FP, R9 is available.
-  static const uint16_t GPREven1[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
-    ARM::R9, ARM::R11
-  };
-  static const uint16_t GPROdd1[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
-    ARM::R8, ARM::R10
-  };
-
-  // FP is R7, R9 is available.
-  static const uint16_t GPREven2[] = {
-    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
-    ARM::R9, ARM::R11
-  };
-  static const uint16_t GPROdd2[] = {
-    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
-    ARM::R8, ARM::R10
-  };
-
-  // FP is R11, R9 is available.
-  static const uint16_t GPREven3[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
-    ARM::R9
-  };
-  static const uint16_t GPROdd3[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
-    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
-    ARM::R8
-  };
-
-  // No FP, R9 is not available.
-  static const uint16_t GPREven4[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
-    ARM::R11
-  };
-  static const uint16_t GPROdd4[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R10
-  };
-
-  // FP is R7, R9 is not available.
-  static const uint16_t GPREven5[] = {
-    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
-    ARM::R11
-  };
-  static const uint16_t GPROdd5[] = {
-    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R10
-  };
-
-  // FP is R11, R9 is not available.
-  static const uint16_t GPREven6[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
-    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
-  };
-  static const uint16_t GPROdd6[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
-    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
-  };
-
-  // We only support even/odd hints for GPR and rGPR.
-  if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass)
-    return RC->getRawAllocationOrder(MF);
-
-  if (HintType == ARMRI::RegPairEven) {
-    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
-      // It's no longer possible to fulfill this hint. Return the default
-      // allocation order.
-      return RC->getRawAllocationOrder(MF);
-
-    if (!TFI->hasFP(MF)) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPREven1);
-      else
-        return makeArrayRef(GPREven4);
-    } else if (FramePtr == ARM::R7) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPREven2);
-      else
-        return makeArrayRef(GPREven5);
-    } else { // FramePtr == ARM::R11
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPREven3);
-      else
-        return makeArrayRef(GPREven6);
-    }
-  } else if (HintType == ARMRI::RegPairOdd) {
-    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
-      // It's no longer possible to fulfill this hint. Return the default
-      // allocation order.
-      return RC->getRawAllocationOrder(MF);
-
-    if (!TFI->hasFP(MF)) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPROdd1);
-      else
-        return makeArrayRef(GPROdd4);
-    } else if (FramePtr == ARM::R7) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPROdd2);
-      else
-        return makeArrayRef(GPROdd5);
-    } else { // FramePtr == ARM::R11
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPROdd3);
-      else
-        return makeArrayRef(GPROdd6);
-    }
-  }
-  return RC->getRawAllocationOrder(MF);
+// Get the other register in a GPRPair.
+static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+  for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
+    if (ARM::GPRPairRegClass.contains(*Supers))
+      return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+  return 0;
 }
 
-/// ResolveRegAllocHint - Resolves the specified register allocation hint
-/// to a physical register. Returns the physical register if it is successful.
-unsigned
-ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                         const MachineFunction &MF) const {
-  if (Reg == 0 || !isPhysicalRegister(Reg))
-    return 0;
-  if (Type == 0)
-    return Reg;
-  else if (Type == (unsigned)ARMRI::RegPairOdd)
-    // Odd register.
-    return getRegisterPairOdd(Reg, MF);
-  else if (Type == (unsigned)ARMRI::RegPairEven)
-    // Even register.
-    return getRegisterPairEven(Reg, MF);
-  return 0;
+// Resolve the RegPairEven / RegPairOdd register allocator hints.
+void
+ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+                                           ArrayRef<MCPhysReg> Order,
+                                           SmallVectorImpl<MCPhysReg> &Hints,
+                                           const MachineFunction &MF,
+                                           const VirtRegMap *VRM) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+  unsigned Odd;
+  switch (Hint.first) {
+  case ARMRI::RegPairEven:
+    Odd = 0;
+    break;
+  case ARMRI::RegPairOdd:
+    Odd = 1;
+    break;
+  default:
+    TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+    return;
+  }
+
+  // This register should preferably be even (Odd == 0) or odd (Odd == 1).
+  // Check if the other part of the pair has already been assigned, and provide
+  // the paired register as the first hint.
+  unsigned PairedPhys = 0;
+  if (VRM && VRM->hasPhys(Hint.second)) {
+    PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
+    if (PairedPhys && MRI.isReserved(PairedPhys))
+      PairedPhys = 0;
+  }
+
+  // First prefer the paired physreg.
+  if (PairedPhys &&
+      std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
+    Hints.push_back(PairedPhys);
+
+  // Then prefer even or odd registers.
+  for (unsigned I = 0, E = Order.size(); I != E; ++I) {
+    unsigned Reg = Order[I];
+    if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
+      continue;
+    // Don't provide hints that are paired to a reserved register.
+    unsigned Paired = getPairedGPR(Reg, !Odd, this);
+    if (!Paired || MRI.isReserved(Paired))
+      continue;
+    Hints.push_back(Reg);
+  }
 }
 
 void
@@ -371,9 +270,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
-  if (!EnableBasePointer)
-    return false;
-
   // When outgoing call frames are so large that we adjust the stack pointer
   // around the call, we can no longer use the stack pointer to reach the
   // emergency spill slot.
@@ -419,8 +315,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   // pointer adjustments around calls.
   if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
     return true;
-  if (!EnableBasePointer)
-    return false;
   // A base pointer is required and allowed.  Check that it isn't too late to
   // reserve it.
   return MRI->canReserveReg(BasePtr);
@@ -433,7 +327,8 @@ needsStackRealignment(const MachineFunction &MF) const {
   unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
   bool requiresRealignment =
     ((MFI->getMaxAlignment() > StackAlign) ||
-     F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
+     F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                     Attribute::StackAlignment));
 
   return requiresRealignment && canRealignStack(MF);
 }
@@ -464,114 +359,6 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
 }
 
-unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
-                                              const MachineFunction &MF) const {
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  switch (Reg) {
-  default: break;
-  // Return 0 if either register of the pair is a special register.
-  // So no R12, etc.
-  case ARM::R1: return ARM::R0;
-  case ARM::R3: return ARM::R2;
-  case ARM::R5: return ARM::R4;
-  case ARM::R7:
-    return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
-      ? 0 : ARM::R6;
-  case ARM::R9: return MRI.isReserved(ARM::R9)  ? 0 :ARM::R8;
-  case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10;
-
-  case ARM::S1: return ARM::S0;
-  case ARM::S3: return ARM::S2;
-  case ARM::S5: return ARM::S4;
-  case ARM::S7: return ARM::S6;
-  case ARM::S9: return ARM::S8;
-  case ARM::S11: return ARM::S10;
-  case ARM::S13: return ARM::S12;
-  case ARM::S15: return ARM::S14;
-  case ARM::S17: return ARM::S16;
-  case ARM::S19: return ARM::S18;
-  case ARM::S21: return ARM::S20;
-  case ARM::S23: return ARM::S22;
-  case ARM::S25: return ARM::S24;
-  case ARM::S27: return ARM::S26;
-  case ARM::S29: return ARM::S28;
-  case ARM::S31: return ARM::S30;
-
-  case ARM::D1: return ARM::D0;
-  case ARM::D3: return ARM::D2;
-  case ARM::D5: return ARM::D4;
-  case ARM::D7: return ARM::D6;
-  case ARM::D9: return ARM::D8;
-  case ARM::D11: return ARM::D10;
-  case ARM::D13: return ARM::D12;
-  case ARM::D15: return ARM::D14;
-  case ARM::D17: return ARM::D16;
-  case ARM::D19: return ARM::D18;
-  case ARM::D21: return ARM::D20;
-  case ARM::D23: return ARM::D22;
-  case ARM::D25: return ARM::D24;
-  case ARM::D27: return ARM::D26;
-  case ARM::D29: return ARM::D28;
-  case ARM::D31: return ARM::D30;
-  }
-
-  return 0;
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
-                                             const MachineFunction &MF) const {
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  switch (Reg) {
-  default: break;
-  // Return 0 if either register of the pair is a special register.
-  // So no R12, etc.
-  case ARM::R0: return ARM::R1;
-  case ARM::R2: return ARM::R3;
-  case ARM::R4: return ARM::R5;
-  case ARM::R6:
-    return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
-      ? 0 : ARM::R7;
-  case ARM::R8: return MRI.isReserved(ARM::R9)  ? 0 :ARM::R9;
-  case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11;
-
-  case ARM::S0: return ARM::S1;
-  case ARM::S2: return ARM::S3;
-  case ARM::S4: return ARM::S5;
-  case ARM::S6: return ARM::S7;
-  case ARM::S8: return ARM::S9;
-  case ARM::S10: return ARM::S11;
-  case ARM::S12: return ARM::S13;
-  case ARM::S14: return ARM::S15;
-  case ARM::S16: return ARM::S17;
-  case ARM::S18: return ARM::S19;
-  case ARM::S20: return ARM::S21;
-  case ARM::S22: return ARM::S23;
-  case ARM::S24: return ARM::S25;
-  case ARM::S26: return ARM::S27;
-  case ARM::S28: return ARM::S29;
-  case ARM::S30: return ARM::S31;
-
-  case ARM::D0: return ARM::D1;
-  case ARM::D2: return ARM::D3;
-  case ARM::D4: return ARM::D5;
-  case ARM::D6: return ARM::D7;
-  case ARM::D8: return ARM::D9;
-  case ARM::D10: return ARM::D11;
-  case ARM::D12: return ARM::D13;
-  case ARM::D14: return ARM::D15;
-  case ARM::D16: return ARM::D17;
-  case ARM::D18: return ARM::D19;
-  case ARM::D20: return ARM::D21;
-  case ARM::D22: return ARM::D23;
-  case ARM::D24: return ARM::D25;
-  case ARM::D26: return ARM::D27;
-  case ARM::D28: return ARM::D29;
-  case ARM::D30: return ARM::D31;
-  }
-
-  return 0;
-}
-
 /// emitLoadConstPool - Emits a load from constpool to materialize the
 /// specified immediate.
 void ARMBaseRegisterInfo::
@@ -611,65 +398,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
 
 bool ARMBaseRegisterInfo::
 requiresVirtualBaseRegisters(const MachineFunction &MF) const {
-  return EnableLocalStackAlloc;
-}
-
-static void
-emitSPUpdate(bool isARM,
-             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-             DebugLoc dl, const ARMBaseInstrInfo &TII,
-             int NumBytes,
-             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
-  if (isARM)
-    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                            Pred, PredReg, TII);
-  else
-    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                           Pred, PredReg, TII);
-}
-
-
-void ARMBaseRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  if (!TFI->hasReservedCallFrame(MF)) {
-    // If we have alloca, convert as follows:
-    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
-    // ADJCALLSTACKUP   -> add, sp, sp, amount
-    MachineInstr *Old = I;
-    DebugLoc dl = Old->getDebugLoc();
-    unsigned Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = TFI->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-      assert(!AFI->isThumb1OnlyFunction() &&
-             "This eliminateCallFramePseudoInstr does not support Thumb1!");
-      bool isARM = !AFI->isThumbFunction();
-
-      // Replace the pseudo instruction with a new instruction...
-      unsigned Opc = Old->getOpcode();
-      int PIdx = Old->findFirstPredOperandIdx();
-      ARMCC::CondCodes Pred = (PIdx == -1)
-        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
-      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
-        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
-        unsigned PredReg = Old->getOperand(2).getReg();
-        emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
-      } else {
-        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
-        unsigned PredReg = Old->getOperand(3).getReg();
-        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
-      }
-    }
-  }
-  MBB.erase(I);
+  return true;
 }
 
 int64_t ARMBaseRegisterInfo::
@@ -750,8 +479,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   case ARM::VLDRS: case ARM::VLDRD:
   case ARM::VSTRS: case ARM::VSTRD:
   case ARM::tSTRspi: case ARM::tLDRspi:
-    if (ForceAllBaseRegAlloc)
-      return true;
     break;
   default:
     return false;
@@ -933,8 +660,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
 
 void
 ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                         int SPAdj, RegScavenger *RS) const {
-  unsigned i = 0;
+                                         int SPAdj, unsigned FIOperandNum,
+                                         RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
@@ -943,13 +670,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
          "This eliminateFrameIndex does not support Thumb1!");
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   unsigned FrameReg;
 
   int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
@@ -959,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // means the stack pointer cannot be used to access the emergency spill slot
   // when !hasReservedCallFrame().
 #ifndef NDEBUG
-  if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+  if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
     assert(TFI->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
@@ -971,18 +692,18 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Special handling of dbg_value instructions.
   if (MI.isDebugValue()) {
-    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
-    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    MI.getOperand(FIOperandNum).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
     return;
   }
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   bool Done = false;
   if (!AFI->isThumbFunction())
-    Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+    Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
   else {
     assert(AFI->isThumb2Function());
-    Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+    Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
   }
   if (Done)
     return;
@@ -1002,7 +723,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
   if (Offset == 0)
     // Must be addrmode4/6.
-    MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
   else {
     ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
     if (!AFI->isThumbFunction())
@@ -1014,6 +735,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                              Offset, Pred, PredReg, TII);
     }
     // Update the original instruction to use the scratch register.
-    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+    MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
   }
 }
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index e2bdd046db57..725033b7e573 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -111,12 +111,11 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const;
 
-  ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
-                                           unsigned HintType, unsigned HintReg,
-                                           const MachineFunction &MF) const;
-
-  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                               const MachineFunction &MF) const;
+  void getRegAllocationHints(unsigned VirtReg,
+                             ArrayRef<MCPhysReg> Order,
+                             SmallVectorImpl<MCPhysReg> &Hints,
+                             const MachineFunction &MF,
+                             const VirtRegMap *VRM) const;
 
   void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const;
@@ -169,17 +168,9 @@ public:
 
   virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
 
-  virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                           MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator I) const;
-
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                   int SPAdj, RegScavenger *RS = NULL) const;
-
-private:
-  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
-
-  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+                                   int SPAdj, unsigned FIOperandNum,
+                                   RegScavenger *RS = NULL) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 0bd1c3ee2feb..e6e8c3d5fac6 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -18,8 +18,8 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
-#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 6adbf4f27e6e..5e8e1739a984 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -14,16 +14,13 @@
 
 #define DEBUG_TYPE "jit"
 #include "ARM.h"
-#include "ARMConstantPoolValue.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMConstantPoolValue.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -31,7 +28,10 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -371,12 +371,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
 }
 
 bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
-  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
-          MF.getTarget().getRelocationModel() != Reloc::Static) &&
+  TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget());
+
+  assert((Target.getRelocationModel() != Reloc::Default ||
+          Target.getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
-  JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
-  II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
-  TD = MF.getTarget().getDataLayout();
+
+  JTI = static_cast<ARMJITInfo*>(Target.getJITInfo());
+  II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo());
+  TD = Target.getDataLayout();
+
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = 0;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index a57368fdb5d8..4891609b336f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,23 +16,23 @@
 #define DEBUG_TYPE "arm-cp-islands"
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -1468,7 +1468,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
   if (CPEBB->empty()) {
     BBInfo[CPEBB->getNumber()].Size = 0;
 
-    // This block no longer needs to be aligned. <rdar://problem/10534709>.
+    // This block no longer needs to be aligned.
     CPEBB->setAlignment(0);
   } else
     // Entries are sorted by descending alignment, so realign from the front.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index fa3226e37eb9..4e703ec3c1a8 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,11 +13,11 @@
 
 #include "ARMConstantPoolValue.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/Constant.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
@@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
                                              bool AddCurrentAddress)
   : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
                          AddCurrentAddress),
-    S(strdup(s)) {}
-
-ARMConstantPoolSymbol::~ARMConstantPoolSymbol() {
-  free((void*)S);
-}
+    S(s) {}
 
 ARMConstantPoolSymbol *
 ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
@@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
   return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
 }
 
-static bool CPV_streq(const char *S1, const char *S2) {
-  if (S1 == S2)
-    return true;
-  if (S1 && S2 && strcmp(S1, S2) == 0)
-    return true;
-  return false;
-}
-
 int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
                                                      unsigned Alignment) {
   unsigned AlignMask = Alignment - 1;
@@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
       ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
       if (!APS) continue;
 
-      if (CPV_streq(APS->S, S) && equals(APS))
+      if (APS->S == S && equals(APS))
         return i;
     }
   }
@@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
 
 bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
   const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV);
-  return ACPS && CPV_streq(ACPS->S, S) &&
-    ARMConstantPoolValue::hasSameValue(ACPV);
+  return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV);
 }
 
 void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
-  ID.AddPointer(S);
+  ID.AddString(S);
   ARMConstantPoolValue::addSelectionDAGCSEId(ID);
 }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index ae531c4ea888..93812fe6bb37 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -161,19 +161,17 @@ public:
 /// ARMConstantPoolSymbol - ARM-specific constantpool values for external
 /// symbols.
 class ARMConstantPoolSymbol : public ARMConstantPoolValue {
-  const char *S;                // ExtSymbol being loaded.
+  const std::string S;          // ExtSymbol being loaded.
 
   ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
                         unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
                         bool AddCurrentAddress);
 
 public:
-  ~ARMConstantPoolSymbol();
-
   static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
                                        unsigned ID, unsigned char PCAdj);
 
-  const char *getSymbol() const { return S; }
+  const char *getSymbol() const { return S.c_str(); }
 
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
                                         unsigned Alignment);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 8c45e0b98d8e..beb843ca9aa8 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,10 +23,10 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 6611862ca071..29fcd4009af3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,31 +16,31 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMCallingConv.h"
-#include "ARMTargetMachine.h"
-#include "ARMSubtarget.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -146,6 +146,7 @@ class ARMFastISel : public FastISel {
     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
     virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
                                const LoadInst *LI);
+    virtual bool FastLowerArguments();
   private:
   #include "ARMGenFastISel.inc"
 
@@ -178,23 +179,24 @@ class ARMFastISel : public FastISel {
     bool isLoadTypeLegal(Type *Ty, MVT &VT);
     bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
                     bool isZExt);
-    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+    bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
                      unsigned Alignment = 0, bool isZExt = true,
                      bool allocReg = true);
-    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+    bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
                       unsigned Alignment = 0);
     bool ARMComputeAddress(const Value *Obj, Address &Addr);
-    void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+    void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
     bool ARMIsMemCpySmall(uint64_t Len);
-    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
-    unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
-    unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
-    unsigned ARMMaterializeInt(const Constant *C, EVT VT);
-    unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
-    unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
-    unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+                               unsigned Alignment);
+    unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
+    unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
+    unsigned ARMMaterializeInt(const Constant *C, MVT VT);
+    unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
+    unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
+    unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
     unsigned ARMSelectCallOp(bool UseReg);
-    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT);
+    unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
 
     // Call handling routines.
   private:
@@ -220,7 +222,7 @@ class ARMFastISel : public FastISel {
     bool isARMNEONPred(const MachineInstr *MI);
     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
-    void AddLoadStoreOperands(EVT VT, Address &Addr,
+    void AddLoadStoreOperands(MVT VT, Address &Addr,
                               const MachineInstrBuilder &MIB,
                               unsigned Flags, bool useAM3);
 };
@@ -486,7 +488,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
 
 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
 // checks from the various callers.
-unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
   if (VT == MVT::f64) return 0;
 
   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -496,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
   return MoveReg;
 }
 
-unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
   if (VT == MVT::i64) return 0;
 
   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
@@ -509,7 +511,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
 // For double width floating point we need to materialize two constants
 // (the high and the low) into integer registers then use a move to get
 // the combined constant into an FP reg.
-unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
   const APFloat Val = CFP->getValueAPF();
   bool is64bit = VT == MVT::f64;
 
@@ -553,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
   return DestReg;
 }
 
-unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
 
   if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
     return false;
@@ -563,7 +565,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
   const ConstantInt *CI = cast<ConstantInt>(C);
   if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
     unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
-    unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+    const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+      &ARM::GPRRegClass;
+    unsigned ImmReg = createResultReg(RC);
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(Opc), ImmReg)
                     .addImm(CI->getZExtValue()));
@@ -613,7 +617,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
   return DestReg;
 }
 
-unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
   // For now 32-bit only.
   if (VT != MVT::i32) return 0;
 
@@ -716,10 +720,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
 }
 
 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
-  EVT VT = TLI.getValueType(C->getType(), true);
+  EVT CEVT = TLI.getValueType(C->getType(), true);
 
   // Only handle simple types.
-  if (!VT.isSimple()) return 0;
+  if (!CEVT.isSimple()) return 0;
+  MVT VT = CEVT.getSimpleVT();
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
     return ARMMaterializeFP(CFP, VT);
@@ -895,12 +900,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
   return Addr.Base.Reg != 0;
 }
 
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
-
-  assert(VT.isSimple() && "Non-simple types are invalid here!");
-
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
   bool needsLowering = false;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
     default: llvm_unreachable("Unhandled load/store type!");
     case MVT::i1:
     case MVT::i8:
@@ -951,13 +953,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
   }
 }
 
-void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
                                        const MachineInstrBuilder &MIB,
                                        unsigned Flags, bool useAM3) {
   // addrmode5 output depends on the selection dag addressing dividing the
   // offset by 4 that it then later multiplies. Do this here as well.
-  if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
-      VT.getSimpleVT().SimpleTy == MVT::f64)
+  if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
     Addr.Offset /= 4;
 
   // Frame base works a bit differently. Handle it separately.
@@ -1000,14 +1001,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
   AddOptionalDefs(MIB);
 }
 
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
                               unsigned Alignment, bool isZExt, bool allocReg) {
-  assert(VT.isSimple() && "Non-simple types are invalid here!");
   unsigned Opc;
   bool useAM3 = false;
   bool needVMOV = false;
   const TargetRegisterClass *RC;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
     // This is mostly going to be Neon/vector support.
     default: return false;
     case MVT::i1:
@@ -1124,11 +1124,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
   return true;
 }
 
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
                                unsigned Alignment) {
   unsigned StrOpc;
   bool useAM3 = false;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
     // This is mostly going to be Neon/vector support.
     default: return false;
     case MVT::i1: {
@@ -1402,8 +1402,9 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
                              bool isZExt) {
   Type *Ty = Src1Value->getType();
-  EVT SrcVT = TLI.getValueType(Ty, true);
-  if (!SrcVT.isSimple()) return false;
+  EVT SrcEVT = TLI.getValueType(Ty, true);
+  if (!SrcEVT.isSimple()) return false;
+  MVT SrcVT = SrcEVT.getSimpleVT();
 
   bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
   if (isFloat && !Subtarget->hasVFP2())
@@ -1440,7 +1441,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   unsigned CmpOpc;
   bool isICmp = true;
   bool needsExt = false;
-  switch (SrcVT.getSimpleVT().SimpleTy) {
+  switch (SrcVT.SimpleTy) {
     default: return false;
     // TODO: Verify compares.
     case MVT::f32:
@@ -1592,7 +1593,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
     return false;
 
   Value *Src = I->getOperand(0);
-  EVT SrcVT = TLI.getValueType(Src->getType(), true);
+  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+  if (!SrcEVT.isSimple())
+    return false;
+  MVT SrcVT = SrcEVT.getSimpleVT();
   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
     return false;
 
@@ -1601,8 +1605,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
 
   // Handle sign-extension.
   if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
-    EVT DestVT = MVT::i32;
-    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
                                        /*isZExt*/!isSigned);
     if (SrcReg == 0) return false;
   }
@@ -1665,7 +1668,6 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
 
   // Things need to be register sized for register moves.
   if (VT != MVT::i32) return false;
-  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
 
   unsigned CondReg = getRegForValue(I->getOperand(0));
   if (CondReg == 0) return false;
@@ -1698,14 +1700,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
                   .addReg(CondReg).addImm(0));
 
   unsigned MovCCOpc;
+  const TargetRegisterClass *RC;
   if (!UseImm) {
+    RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
     MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
   } else {
-    if (!isNegativeImm) {
+    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+    if (!isNegativeImm)
       MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
-    } else {
+    else
       MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
-    }
   }
   unsigned ResultReg = createResultReg(RC);
   if (!UseImm)
@@ -1807,7 +1811,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
 }
 
 bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
-  EVT VT  = TLI.getValueType(I->getType(), true);
+  EVT FPVT = TLI.getValueType(I->getType(), true);
+  if (!FPVT.isSimple()) return false;
+  MVT VT = FPVT.getSimpleVT();
 
   // We can get here in the case when we want to use NEON for our fp
   // operations, but can't figure out how to. Just use the vfp instructions
@@ -1838,7 +1844,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   unsigned Op2 = getRegForValue(I->getOperand(1));
   if (Op2 == 0) return false;
 
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(Opc), ResultReg)
                   .addReg(Op1).addReg(Op2));
@@ -2051,7 +2057,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
     if (RVLocs.size() == 2 && RetVT == MVT::f64) {
       // For this move we copy into two registers and then move into the
       // double fp reg we want.
-      EVT DestVT = RVLocs[0].getValVT();
+      MVT DestVT = RVLocs[0].getValVT();
       const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
       unsigned ResultReg = createResultReg(DstRC);
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -2066,7 +2072,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
       UpdateValueMap(I, ResultReg);
     } else {
       assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
-      EVT CopyVT = RVLocs[0].getValVT();
+      MVT CopyVT = RVLocs[0].getValVT();
 
       // Special handling for extended integers.
       if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
@@ -2094,11 +2100,13 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
   if (!FuncInfo.CanLowerReturn)
     return false;
 
+  // Build a list of return value registers.
+  SmallVector<unsigned, 4> RetRegs;
+
   CallingConv::ID CC = F.getCallingConv();
   if (Ret->getNumOperands() > 0) {
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
-                  Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
@@ -2125,8 +2133,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
       return false;
 
     unsigned SrcReg = Reg + VA.getValNo();
-    EVT RVVT = TLI.getValueType(RV->getType());
-    EVT DestVT = VA.getValVT();
+    EVT RVEVT = TLI.getValueType(RV->getType());
+    if (!RVEVT.isSimple()) return false;
+    MVT RVVT = RVEVT.getSimpleVT();
+    MVT DestVT = VA.getValVT();
     // Special handling for extended integers.
     if (RVVT != DestVT) {
       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
@@ -2151,13 +2161,16 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
             DstReg).addReg(SrcReg);
 
-    // Mark the register as live out of the function.
-    MRI.addLiveOut(VA.getLocReg());
+    // Add register to return instruction.
+    RetRegs.push_back(VA.getLocReg());
   }
 
   unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
-  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                          TII.get(RetOpc)));
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(RetOpc));
+  AddOptionalDefs(MIB);
+  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+    MIB.addReg(RetRegs[i], RegState::Implicit);
   return true;
 }
 
@@ -2171,7 +2184,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
 unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
   GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
                                        GlobalValue::ExternalLinkage, 0, Name);
-  return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
+  EVT LCREVT = TLI.getValueType(GV->getType());
+  if (!LCREVT.isSimple()) return 0;
+  return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
 }
 
 // A quick function that will emit a call for a named libcall in F with the
@@ -2280,6 +2295,9 @@ bool ARMFastISel::SelectCall(const Instruction *I,
   // Can't handle inline asm.
   if (isa<InlineAsm>(Callee)) return false;
 
+  // Allow SelectionDAG isel to handle tail calls.
+  if (CI->isTailCall()) return false;
+
   // Check the calling convention.
   ImmutableCallSite CS(CI);
   CallingConv::ID CC = CS.getCallingConv();
@@ -2328,16 +2346,16 @@ bool ARMFastISel::SelectCall(const Instruction *I,
 
     ISD::ArgFlagsTy Flags;
     unsigned AttrInd = i - CS.arg_begin() + 1;
-    if (CS.paramHasAttr(AttrInd, Attributes::SExt))
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
       Flags.setSExt();
-    if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
       Flags.setZExt();
 
     // FIXME: Only handle *easy* calls for now.
-    if (CS.paramHasAttr(AttrInd, Attributes::InReg) ||
-        CS.paramHasAttr(AttrInd, Attributes::StructRet) ||
-        CS.paramHasAttr(AttrInd, Attributes::Nest) ||
-        CS.paramHasAttr(AttrInd, Attributes::ByVal))
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+        CS.paramHasAttr(AttrInd, Attribute::ByVal))
       return false;
 
     Type *ArgTy = (*i)->getType();
@@ -2419,21 +2437,29 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
 }
 
 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
-                                        uint64_t Len) {
+                                        uint64_t Len, unsigned Alignment) {
   // Make sure we don't bloat code by inlining very large memcpy's.
   if (!ARMIsMemCpySmall(Len))
     return false;
 
-  // We don't care about alignment here since we just emit integer accesses.
   while (Len) {
     MVT VT;
-    if (Len >= 4)
-      VT = MVT::i32;
-    else if (Len >= 2)
-      VT = MVT::i16;
-    else {
-      assert(Len == 1);
-      VT = MVT::i8;
+    if (!Alignment || Alignment >= 4) {
+      if (Len >= 4)
+        VT = MVT::i32;
+      else if (Len >= 2)
+        VT = MVT::i16;
+      else {
+        assert (Len == 1 && "Expected a length of 1!");
+        VT = MVT::i8;
+      }
+    } else {
+      // Bound based on alignment.
+      if (Len >= 2 && Alignment == 2)
+        VT = MVT::i16;
+      else {
+        VT = MVT::i8;
+      }
     }
 
     bool RV;
@@ -2512,7 +2538,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
         if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
             !ARMComputeAddress(MTI.getRawSource(), Src))
           return false;
-        if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+        unsigned Alignment = MTI.getAlignment();
+        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
           return true;
       }
     }
@@ -2541,7 +2568,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
     return SelectCall(&I, "memset");
   }
   case Intrinsic::trap: {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(
+      Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
     return true;
   }
   }
@@ -2570,18 +2598,19 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
   return true;
 }
 
-unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
                                     bool isZExt) {
   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
     return 0;
 
   unsigned Opc;
   bool isBoolZext = false;
-  if (!SrcVT.isSimple()) return 0;
-  switch (SrcVT.getSimpleVT().SimpleTy) {
+  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+  switch (SrcVT.SimpleTy) {
   default: return 0;
   case MVT::i16:
     if (!Subtarget->hasV6Ops()) return 0;
+    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
     if (isZExt)
       Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
     else
@@ -2589,6 +2618,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
     break;
   case MVT::i8:
     if (!Subtarget->hasV6Ops()) return 0;
+    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
     if (isZExt)
       Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
     else
@@ -2596,6 +2626,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
     break;
   case MVT::i1:
     if (isZExt) {
+      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
       Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
       isBoolZext = true;
       break;
@@ -2603,7 +2634,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
     return 0;
   }
 
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  unsigned ResultReg = createResultReg(RC);
   MachineInstrBuilder MIB;
   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
         .addReg(SrcReg);
@@ -2622,14 +2653,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
   Value *Src = I->getOperand(0);
   Type *SrcTy = Src->getType();
 
-  EVT SrcVT, DestVT;
-  SrcVT = TLI.getValueType(SrcTy, true);
-  DestVT = TLI.getValueType(DestTy, true);
-
   bool isZExt = isa<ZExtInst>(I);
   unsigned SrcReg = getRegForValue(Src);
   if (!SrcReg) return false;
 
+  EVT SrcEVT, DestEVT;
+  SrcEVT = TLI.getValueType(SrcTy, true);
+  DestEVT = TLI.getValueType(DestTy, true);
+  if (!SrcEVT.isSimple()) return false;
+  if (!DestEVT.isSimple()) return false;
+
+  MVT SrcVT = SrcEVT.getSimpleVT();
+  MVT DestVT = DestEVT.getSimpleVT();
   unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
   if (ResultReg == 0) return false;
   UpdateValueMap(I, ResultReg);
@@ -2809,7 +2844,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
 }
 
 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
-                                     unsigned Align, EVT VT) {
+                                     unsigned Align, MVT VT) {
   bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   ARMConstantPoolConstant *CPV =
     ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
@@ -2849,6 +2884,80 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
   return DestReg2;
 }
 
+bool ARMFastISel::FastLowerArguments() {
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  const Function *F = FuncInfo.Fn;
+  if (F->isVarArg())
+    return false;
+
+  CallingConv::ID CC = F->getCallingConv();
+  switch (CC) {
+  default:
+    return false;
+  case CallingConv::Fast:
+  case CallingConv::C:
+  case CallingConv::ARM_AAPCS_VFP:
+  case CallingConv::ARM_AAPCS:
+  case CallingConv::ARM_APCS:
+    break;
+  }
+
+  // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
+  // which are passed in r0 - r3.
+  unsigned Idx = 1;
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++Idx) {
+    if (Idx > 4)
+      return false;
+
+    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+      return false;
+
+    Type *ArgTy = I->getType();
+    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+      return false;
+
+    EVT ArgVT = TLI.getValueType(ArgTy);
+    if (!ArgVT.isSimple()) return false;
+    switch (ArgVT.getSimpleVT().SimpleTy) {
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      break;
+    default:
+      return false;
+    }
+  }
+
+
+  static const uint16_t GPRArgRegs[] = {
+    ARM::R0, ARM::R1, ARM::R2, ARM::R3
+  };
+
+  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+  Idx = 0;
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++Idx) {
+    if (I->use_empty())
+      continue;
+    unsigned SrcReg = GPRArgRegs[Idx];
+    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+    // Without this, EmitLiveInCopies may eliminate the livein if its only
+    // use is a bitcast (which isn't turned into an instruction).
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(DstReg, getKillRegState(true));
+    UpdateValueMap(I, ResultReg);
+  }
+
+  return true;
+}
+
 namespace llvm {
   FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
                                 const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 9392497fd07d..7a02adf24633 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,17 +15,16 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -120,13 +119,14 @@ static void
 emitSPUpdate(bool isARM,
              MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
              DebugLoc dl, const ARMBaseInstrInfo &TII,
-             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
   if (isARM)
     emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                            ARMCC::AL, 0, TII, MIFlags);
+                            Pred, PredReg, TII, MIFlags);
   else
     emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                           ARMCC::AL, 0, TII, MIFlags);
+                           Pred, PredReg, TII, MIFlags);
 }
 
 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -696,7 +696,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
         MIB.addReg(Regs[i], getDefRegState(true));
       if (DeleteRet) {
-        MIB->copyImplicitOps(&*MI);
+        MIB.copyImplicitOps(&*MI);
         MI->eraseFromParent();
       }
       MI = MIB;
@@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
   return FnSize;
 }
 
-/// estimateStackSize - Estimate and return the size of the frame.
-/// FIXME: Make generic?
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
-  unsigned MaxAlign = MFI->getMaxAlignment();
-  int Offset = 0;
-
-  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
-  // It really should be refactored to share code. Until then, changes
-  // should keep in mind that there's tight coupling between the two.
-
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -MFI->getObjectOffset(i);
-    if (FixedOff > Offset) Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isDeadObjectIndex(i))
-      continue;
-    Offset += MFI->getObjectSize(i);
-    unsigned Align = MFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset+Align-1)/Align*Align;
-
-    MaxAlign = std::max(Align, MaxAlign);
-  }
-
-  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
-    Offset += MFI->getMaxCallFrameSize();
-
-  // Round up the size to a multiple of the alignment.  If the function has
-  // any calls or alloca's, align to the target's StackAlignment value to
-  // ensure that the callee's frame or the alloca data is suitably aligned;
-  // otherwise, for leaf functions, align to the TransientStackAlignment
-  // value.
-  unsigned StackAlign;
-  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
-      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
-    StackAlign = TFI->getStackAlignment();
-  else
-    StackAlign = TFI->getTransientStackAlignment();
-
-  // If the frame pointer is eliminated, all frame offsets will be relative to
-  // SP not FP. Align to MaxAlign so this works.
-  StackAlign = std::max(StackAlign, MaxAlign);
-  unsigned AlignMask = StackAlign - 1;
-  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
-  return (unsigned)Offset;
-}
-
 /// estimateRSStackSizeLimit - Look at each instruction that references stack
 /// frames and return the stack size limit beyond which some of these
 /// instructions will require a scratch register during their expansion later.
@@ -1153,7 +1101,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
     return;
 
   // Naked functions don't spill callee-saved registers.
-  if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+  if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                     Attribute::Naked))
     return;
 
   // We are planning to use NEON instructions vst1 / vld1.
@@ -1234,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     // we've used all the registers and so R4 is already used, so not marking
     // it here will be OK.
     // FIXME: It will be better just to find spare register here.
-    unsigned StackSize = estimateStackSize(MF);
+    unsigned StackSize = MFI->estimateStackSize(MF);
     if (MFI->hasVarSizedObjects() || StackSize > 508)
       MRI.setPhysRegUsed(ARM::R4);
   }
@@ -1329,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   //        worth the effort and added fragility?
   bool BigStack =
     (RS &&
-     (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+     (MFI->estimateStackSize(MF) +
+      ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
       estimateRSStackSizeLimit(MF, this)))
     || MFI->hasVarSizedObjects()
     || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
@@ -1418,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
         // closest to SP or frame pointer.
         const TargetRegisterClass *RC = &ARM::GPRRegClass;
-        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+        RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                            RC->getAlignment(),
                                                            false));
       }
@@ -1430,3 +1380,51 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     AFI->setLRIsSpilledForFarJump(true);
   }
 }
+
+
+void ARMFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      assert(!AFI->isThumb1OnlyFunction() &&
+             "This eliminateCallFramePseudoInstr does not support Thumb1!");
+      bool isARM = !AFI->isThumbFunction();
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      int PIdx = Old->findFirstPredOperandIdx();
+      ARMCC::CondCodes Pred = (PIdx == -1)
+        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+        unsigned PredReg = Old->getOperand(2).getReg();
+        emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
+                     Pred, PredReg);
+      } else {
+        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+        unsigned PredReg = Old->getOperand(3).getReg();
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
+                     Pred, PredReg);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index a1c2b93562c9..efa255a5574a 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -70,6 +70,11 @@ public:
                    unsigned LdrOpc, bool isVarArg, bool NoGap,
                    bool(*Func)(unsigned, bool),
                    unsigned NumAlignedDPRCS2Regs) const;
+
+  virtual void eliminateCallFramePseudoInstr(
+                                    MachineFunction &MF,
+                                    MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index efd6d2b8399e..2c51de23f7dc 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -16,24 +16,25 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -78,6 +79,8 @@ public:
     return "ARM Instruction Selection";
   }
 
+  virtual void PreprocessISelDAG();
+
   /// getI32Imm - Return a target constant of type i32 with the specified
   /// value.
   inline SDValue getI32Imm(unsigned Imm) {
@@ -255,6 +258,8 @@ private:
   // Select special operations if node forms integer ABS pattern
   SDNode *SelectABSOp(SDNode *N);
 
+  SDNode *SelectInlineAsm(SDNode *N);
+
   SDNode *SelectConcatVector(SDNode *N);
 
   SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
@@ -265,15 +270,16 @@ private:
                                             char ConstraintCode,
                                             std::vector<SDValue> &OutOps);
 
-  // Form pairs of consecutive S, D, or Q registers.
-  SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
-  SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
-  SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+  // Form pairs of consecutive R, S, D, or Q registers.
+  SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+  SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
+  SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
+  SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 
   // Form sequences of 4 consecutive S, D, or Q registers.
-  SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-  SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-  SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 
   // Get the alignment operand for a NEON VLD or VST instruction.
   SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
@@ -326,6 +332,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale,
   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 }
 
+void ARMDAGToDAGISel::PreprocessISelDAG() {
+  if (!Subtarget->hasV6T2Ops())
+    return;
+
+  bool isThumb2 = Subtarget->isThumb();
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ) {
+    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+
+    if (N->getOpcode() != ISD::ADD)
+      continue;
+
+    // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
+    // leading zeros, followed by consecutive set bits, followed by 1 or 2
+    // trailing zeros, e.g. 1020.
+    // Transform the expression to
+    // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
+    // of trailing zeros of c2. The left shift would be folded as an shifter
+    // operand of 'add' and the 'and' and 'srl' would become a bits extraction
+    // node (UBFX).
+
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    unsigned And_imm = 0;
+    if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
+      if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
+        std::swap(N0, N1);
+    }
+    if (!And_imm)
+      continue;
+
+    // Check if the AND mask is an immediate of the form: 000.....1111111100
+    unsigned TZ = CountTrailingZeros_32(And_imm);
+    if (TZ != 1 && TZ != 2)
+      // Be conservative here. Shifter operands aren't always free. e.g. On
+      // Swift, left shifter operand of 1 / 2 for free but others are not.
+      // e.g.
+      //  ubfx   r3, r1, #16, #8
+      //  ldr.w  r3, [r0, r3, lsl #2]
+      // vs.
+      //  mov.w  r9, #1020
+      //  and.w  r2, r9, r1, lsr #14
+      //  ldr    r2, [r0, r2]
+      continue;
+    And_imm >>= TZ;
+    if (And_imm & (And_imm + 1))
+      continue;
+
+    // Look for (and (srl X, c1), c2).
+    SDValue Srl = N1.getOperand(0);
+    unsigned Srl_imm = 0;
+    if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
+        (Srl_imm <= 2))
+      continue;
+
+    // Make sure first operand is not a shifter operand which would prevent
+    // folding of the left shift.
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+    SDValue CPTmp2;
+    if (isThumb2) {
+      if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+        continue;
+    } else {
+      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
+          SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
+        continue;
+    }
+
+    // Now make the transformation.
+    Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
+                          Srl.getOperand(0),
+                          CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
+    N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
+                         Srl, CurDAG->getConstant(And_imm, MVT::i32));
+    N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
+                         N1, CurDAG->getConstant(TZ, MVT::i32));
+    CurDAG->UpdateNodeOperands(N, N0, N1);
+  }  
+}
+
 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 /// least on current ARM implementations) which should be avoidded.
@@ -1444,9 +1531,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
   return NULL;
 }
 
-/// PairSRegs - Form a D register from a pair of S registers.
-///
-SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form a D register from a pair of S registers.
+SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue RegClass =
     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
@@ -1456,9 +1553,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
-/// PairDRegs - Form a quad register from a pair of D registers.
-///
-SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a quad register from a pair of D registers.
+SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
@@ -1467,9 +1563,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
-/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
-///
-SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form 4 consecutive D registers from a pair of Q registers.
+SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
@@ -1478,9 +1573,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
-/// QuadSRegs - Form 4 consecutive S registers.
-///
-SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive S registers.
+SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue RegClass =
@@ -1494,9 +1588,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
-/// QuadDRegs - Form 4 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive D registers.
+SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
@@ -1509,9 +1602,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
-/// QuadQRegs - Form 4 consecutive Q registers.
-///
-SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive Q registers.
+SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
@@ -1784,7 +1876,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
       SDValue V0 = N->getOperand(Vec0Idx + 0);
       SDValue V1 = N->getOperand(Vec0Idx + 1);
       if (NumVecs == 2)
-        SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+        SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
       else {
         SDValue V2 = N->getOperand(Vec0Idx + 2);
         // If it's a vst3, form a quad D-register and leave the last part as
@@ -1792,13 +1884,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
         SDValue V3 = (NumVecs == 3)
           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
           : N->getOperand(Vec0Idx + 3);
-        SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+        SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
       }
     } else {
       // Form a QQ register.
       SDValue Q0 = N->getOperand(Vec0Idx);
       SDValue Q1 = N->getOperand(Vec0Idx + 1);
-      SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+      SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
     }
 
     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
@@ -1840,7 +1932,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   SDValue V3 = (NumVecs == 3)
     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
     : N->getOperand(Vec0Idx + 3);
-  SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+  SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
 
   // Store the even D registers.  This is always an updating store, so that it
   // provides the address to the second store for the odd subregs.
@@ -1950,18 +2042,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   SDValue V1 = N->getOperand(Vec0Idx + 1);
   if (NumVecs == 2) {
     if (is64BitVector)
-      SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+      SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
     else
-      SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+      SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
   } else {
     SDValue V2 = N->getOperand(Vec0Idx + 2);
     SDValue V3 = (NumVecs == 3)
       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
       : N->getOperand(Vec0Idx + 3);
     if (is64BitVector)
-      SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+      SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
     else
-      SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+      SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
   }
   Ops.push_back(SuperReg);
   Ops.push_back(getI32Imm(Lane));
@@ -2087,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
   SDValue V0 = N->getOperand(FirstTblReg + 0);
   SDValue V1 = N->getOperand(FirstTblReg + 1);
   if (NumVecs == 2)
-    RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+    RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
   else {
     SDValue V2 = N->getOperand(FirstTblReg + 2);
     // If it's a vtbl3, form a quad D-register and leave the last part as
@@ -2095,7 +2187,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
     SDValue V3 = (NumVecs == 3)
       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
       : N->getOperand(FirstTblReg + 3);
-    RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+    RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   }
 
   SmallVector<SDValue, 6> Ops;
@@ -2113,10 +2205,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
   if (!Subtarget->hasV6T2Ops())
     return NULL;
 
-  unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+  unsigned Opc = isSigned
+    ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
 
-
   // For unsigned extracts, check for a shift right and mask
   unsigned And_imm = 0;
   if (N->getOpcode() == ISD::AND) {
@@ -2134,7 +2226,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
         // Note: The width operand is encoded as width-1.
         unsigned Width = CountTrailingOnes_32(And_imm) - 1;
         unsigned LSB = Srl_imm;
+
         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+        if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
+          // It's cheaper to use a right shift to extract the top bits.
+          if (Subtarget->isThumb()) {
+            Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
+            SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                              CurDAG->getTargetConstant(LSB, MVT::i32),
+                              getAL(CurDAG), Reg0, Reg0 };
+            return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+          }
+
+          // ARM models shift instructions as MOVsi with shifter operand.
+          ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
+          SDValue ShOpc =
+            CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
+                                      MVT::i32);
+          SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
+                            getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+        }
+
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                           CurDAG->getTargetConstant(LSB, MVT::i32),
                           CurDAG->getTargetConstant(Width, MVT::i32),
@@ -2411,7 +2525,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
   EVT VT = N->getValueType(0);
   if (!VT.is128BitVector() || N->getNumOperands() != 2)
     llvm_unreachable("unexpected CONCAT_VECTORS");
-  return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+  return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
 }
 
 SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
@@ -2441,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
+  case ISD::INLINEASM: {
+    SDNode *ResNode = SelectInlineAsm(N);
+    if (ResNode)
+      return ResNode;
+    break;
+  }
   case ISD::XOR: {
     // Select special operations if XOR node forms integer ABS pattern
     SDNode *ResNode = SelectABSOp(N);
@@ -2790,13 +2910,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     unsigned NumElts = VecVT.getVectorNumElements();
     if (EltVT == MVT::f64) {
       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
-      return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+      return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
     }
     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
     if (NumElts == 2)
-      return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+      return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
-    return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+    return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
                      N->getOperand(2), N->getOperand(3));
   }
 
@@ -3009,17 +3129,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       DebugLoc dl = N->getDebugLoc();
       SDValue Chain = N->getOperand(0);
 
-      unsigned NewOpc = ARM::LDREXD;
-      if (Subtarget->isThumb() && Subtarget->hasThumb2())
-        NewOpc = ARM::t2LDREXD;
+      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+      unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
 
       // arm_ldrexd returns a i64 value in {i32, i32}
       std::vector<EVT> ResTys;
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::i32);
+      if (isThumb) {
+        ResTys.push_back(MVT::i32);
+        ResTys.push_back(MVT::i32);
+      } else
+        ResTys.push_back(MVT::Untyped);
       ResTys.push_back(MVT::Other);
 
-      // place arguments in the right order
+      // Place arguments in the right order.
       SmallVector<SDValue, 7> Ops;
       Ops.push_back(MemAddr);
       Ops.push_back(getAL(CurDAG));
@@ -3032,30 +3154,33 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
 
-      // Until there's support for specifing explicit register constraints
-      // like the use of even/odd register pair, hardcode ldrexd to always
-      // use the pair [R0, R1] to hold the load result.
-      Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
-                                   SDValue(Ld, 0), SDValue(0,0));
-      Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
-                                   SDValue(Ld, 1), Chain.getValue(1));
-
       // Remap uses.
-      SDValue Glue = Chain.getValue(1);
+      SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
       if (!SDValue(N, 0).use_empty()) {
-        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                                ARM::R0, MVT::i32, Glue);
-        Glue = Result.getValue(2);
+        SDValue Result;
+        if (isThumb)
+          Result = SDValue(Ld, 0);
+        else {
+          SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+          Result = SDValue(ResNode,0);
+        }
         ReplaceUses(SDValue(N, 0), Result);
       }
       if (!SDValue(N, 1).use_empty()) {
-        SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                                ARM::R1, MVT::i32, Glue);
-        Glue = Result.getValue(2);
+        SDValue Result;
+        if (isThumb)
+          Result = SDValue(Ld, 1);
+        else {
+          SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+          SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+              dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
+          Result = SDValue(ResNode,0);
+        }
         ReplaceUses(SDValue(N, 1), Result);
       }
-
-      ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+      ReplaceUses(SDValue(N, 2), OutChain);
       return NULL;
     }
 
@@ -3066,38 +3191,25 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Val1 = N->getOperand(3);
       SDValue MemAddr = N->getOperand(4);
 
-      // Until there's support for specifing explicit register constraints
-      // like the use of even/odd register pair, hardcode strexd to always
-      // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
-      Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
-                                   SDValue(0, 0));
-      Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
-
-      SDValue Glue = Chain.getValue(1);
-      Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                    ARM::R2, MVT::i32, Glue);
-      Glue = Val0.getValue(1);
-      Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                    ARM::R3, MVT::i32, Glue);
-
       // Store exclusive double return a i32 value which is the return status
       // of the issued store.
-      std::vector<EVT> ResTys;
-      ResTys.push_back(MVT::i32);
-      ResTys.push_back(MVT::Other);
+      EVT ResTys[] = { MVT::i32, MVT::Other };
 
-      // place arguments in the right order
+      bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+      // Place arguments in the right order.
       SmallVector<SDValue, 7> Ops;
-      Ops.push_back(Val0);
-      Ops.push_back(Val1);
+      if (isThumb) {
+        Ops.push_back(Val0);
+        Ops.push_back(Val1);
+      } else
+        // arm_strexd uses GPRPair.
+        Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
       Ops.push_back(MemAddr);
       Ops.push_back(getAL(CurDAG));
       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
       Ops.push_back(Chain);
 
-      unsigned NewOpc = ARM::STREXD;
-      if (Subtarget->isThumb() && Subtarget->hasThumb2())
-        NewOpc = ARM::t2STREXD;
+      unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
 
       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
                                           Ops.size());
@@ -3295,7 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     // Form a REG_SEQUENCE to force register allocation.
     SDValue V0 = N->getOperand(0);
     SDValue V1 = N->getOperand(1);
-    SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+    SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
 
     SmallVector<SDValue, 6> Ops;
     Ops.push_back(RegSeq);
@@ -3325,11 +3437,152 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     return SelectAtomic64(N, ARM::ATOMSWAP6432);
   case ARMISD::ATOMCMPXCHG64_DAG:
     return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
+
+  case ARMISD::ATOMMIN64_DAG:
+    return SelectAtomic64(N, ARM::ATOMMIN6432);
+  case ARMISD::ATOMUMIN64_DAG:
+    return SelectAtomic64(N, ARM::ATOMUMIN6432);
+  case ARMISD::ATOMMAX64_DAG:
+    return SelectAtomic64(N, ARM::ATOMMAX6432);
+  case ARMISD::ATOMUMAX64_DAG:
+    return SelectAtomic64(N, ARM::ATOMUMAX6432);
   }
 
   return SelectCode(N);
 }
 
+SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
+  std::vector<SDValue> AsmNodeOperands;
+  unsigned Flag, Kind;
+  bool Changed = false;
+  unsigned NumOps = N->getNumOperands();
+
+  ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(
+      N->getOperand(InlineAsm::Op_AsmString));
+  StringRef AsmString = StringRef(S->getSymbol());
+
+  // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
+  // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
+  // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
+  // respectively. Since there is no constraint to explicitly specify a
+  // reg pair, we search %H operand inside the asm string. If it is found, the
+  // transformation below enforces a GPRPair reg class for "%r" for 64-bit data.
+  if (AsmString.find(":H}") == StringRef::npos)
+    return NULL;
+
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Glue = N->getOperand(NumOps-1);
+
+  // Glue node will be appended late.
+  for(unsigned i = 0; i < NumOps -1; ++i) {
+    SDValue op = N->getOperand(i);
+    AsmNodeOperands.push_back(op);
+
+    if (i < InlineAsm::Op_FirstOperand)
+      continue;
+
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+      Flag = C->getZExtValue();
+      Kind = InlineAsm::getKind(Flag);
+    }
+    else
+      continue;
+
+    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+      continue;
+
+    unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag);
+    unsigned RC;
+    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+    if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2)
+      continue;
+
+    assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm");
+    SDValue V0 = N->getOperand(i+1);
+    SDValue V1 = N->getOperand(i+2);
+    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+    SDValue PairedReg;
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+
+    if (Kind == InlineAsm::Kind_RegDef ||
+        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+      // the original GPRs.
+
+      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+      SDValue Chain = SDValue(N,0);
+
+      SDNode *GU = N->getGluedUser();
+      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
+                                               Chain.getValue(1));
+
+      // Extract values from a GPRPair reg and copy to the original GPR reg.
+      SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+                                                    RegCopy);
+      SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+                                                    RegCopy);
+      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+                                        RegCopy.getValue(1));
+      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+      // Update the original glue user.
+      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+      Ops.push_back(T1.getValue(1));
+      CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
+      GU = T1.getNode();
+    }
+    else {
+      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+      // GPRPair and then pass the GPRPair to the inline asm.
+      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+                                          Chain.getValue(1));
+      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+                                          T0.getValue(1));
+      SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
+
+      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+      // i32 VRs of inline asm with it.
+      unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+      PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+      Glue = Chain.getValue(1);
+    }
+
+    Changed = true;
+
+    if(PairedReg.getNode()) {
+      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+      Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+      // Replace the current flag.
+      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+          Flag, MVT::i32);
+      // Add the new register node and skip the original two GPRs.
+      AsmNodeOperands.push_back(PairedReg);
+      // Skip the next two GPRs.
+      i += 2;
+    }
+  }
+
+  AsmNodeOperands.push_back(Glue);
+  if (!Changed)
+    return NULL;
+
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+      CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
+                        AsmNodeOperands.size());
+  New->setNodeId(-1);
+  return New.getNode();
+}
+
+
 bool ARMDAGToDAGISel::
 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
                              std::vector<SDValue> &OutOps) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ff99b04078e8..bb26090d2d8d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,14 +23,8 @@
 #include "ARMTargetMachine.h"
 #include "ARMTargetObjectFile.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,14 +34,20 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
@@ -504,6 +504,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMA, MVT::v2f64, Expand);
 
     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
@@ -515,8 +516,29 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
+    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
 
+    // Mark v2f32 intrinsics.
+    setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
+    setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
+    setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
+    setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
+    setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
+    setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
+    setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
+
     // Neon does not support some operations on v1i64 and v2i64 types.
     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
     // Custom handling for some quad-vector types to detect VMULL.
@@ -539,6 +561,33 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
 
+    setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
+    setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
+
+    // Custom expand long extensions to vectors.
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32,  Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64,  Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64,  Custom);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64,  Custom);
+
+    // NEON does not have single instruction CTPOP for vectors with element
+    // types wider than 8-bits.  However, custom lowering can leverage the
+    // v8i8/v16i8 vcnt instruction.
+    setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
+
+    // NEON only has FMA instructions as of VFP4.
+    if (!Subtarget->hasVFP4()) {
+      setOperationAction(ISD::FMA, MVT::v2f32, Expand);
+      setOperationAction(ISD::FMA, MVT::v4f32, Expand);
+    }
+
     setTargetDAGCombine(ISD::INTRINSIC_VOID);
     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -688,7 +737,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
-    setOperationAction(ISD::ATOMIC_SWAP,  MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
     // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
     setInsertFencesForAtomic(true);
@@ -762,6 +815,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
+  setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
+  setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
@@ -814,18 +869,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setSchedulingPreference(Sched::Hybrid);
 
   //// temporary - rewrite interface to use type
-  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
-  maxStoresPerMemset = 16;
-  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemset = 8;
+  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
 
   // On ARM arguments smaller than 4 bytes are extended, so all arguments
   // are at least 4 bytes aligned.
   setMinStackArgumentAlignment(4);
 
-  benefitFromCodePlacementOpt = true;
-
   // Prefer likely predicted branches to selects on out-of-order cores.
-  predictableSelectIsExpensive = Subtarget->isLikeA9();
+  PredictableSelectIsExpensive = Subtarget->isLikeA9();
 
   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
 }
@@ -841,10 +897,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 // due to the common occurrence of cross class copies and subregister insertions
 // and extractions.
 std::pair<const TargetRegisterClass*, uint8_t>
-ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ARMTargetLowering::findRepresentativeClass(MVT VT) const{
   const TargetRegisterClass *RRC = 0;
   uint8_t Cost = 1;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
   default:
     return TargetLowering::findRepresentativeClass(VT);
   // Use DPR as representative register class for all floating point
@@ -1024,7 +1080,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
 
 /// getRegClassFor - Return the register class that should be used for the
 /// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   // load / store 4 to 8 consecutive D registers.
@@ -1557,7 +1613,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // On ELF targets for PIC code, direct calls should go through the PLT
       unsigned OpFlags = 0;
       if (Subtarget->isTargetELF() &&
-                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+          getTargetMachine().getRelocationModel() == Reloc::PIC_)
         OpFlags = ARMII::MO_PLT;
       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
     }
@@ -1594,8 +1650,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // FIXME: handle tail calls differently.
   unsigned CallOpc;
-  bool HasMinSizeAttr = MF.getFunction()->getFnAttributes().
-    hasAttribute(Attributes::MinSize);
+  bool HasMinSizeAttr = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
   if (Subtarget->isThumb()) {
     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
@@ -1875,6 +1931,17 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   return true;
 }
 
+bool
+ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+                                  MachineFunction &MF, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
+                                                    isVarArg));
+}
+
 SDValue
 ARMTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
@@ -1893,15 +1960,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
                                                isVarArg));
 
-  // If this is the first return lowered for this function, add
-  // the regs to the liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps;
+  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
 
   // Copy the result values into the output registers.
   for (unsigned i = 0, realRVLocIdx = 0;
@@ -1930,10 +1991,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
 
         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
         Flag = Chain.getValue(1);
+        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
         VA = RVLocs[++i]; // skip ahead to next loc
         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
                                  HalfGPRs.getValue(1), Flag);
         Flag = Chain.getValue(1);
+        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
         VA = RVLocs[++i]; // skip ahead to next loc
 
         // Extract the 2nd half and fall through to handle it as an f64 value.
@@ -1946,6 +2009,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
       Flag = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
       VA = RVLocs[++i]; // skip ahead to next loc
       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
                                Flag);
@@ -1955,15 +2019,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
     // Guarantee that all emitted copies are
     // stuck together, avoiding something bad.
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
-  SDValue result;
+  // Update chain and glue.
+  RetOps[0] = Chain;
   if (Flag.getNode())
-    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  else // Return Void
-    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+    RetOps.push_back(Flag);
 
-  return result;
+  return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+                     RetOps.data(), RetOps.size());
 }
 
 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2214,8 +2279,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-  if (RelocM == Reloc::PIC_) {
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
     ARMConstantPoolValue *CPV =
       ARMConstantPoolConstant::Create(GV,
@@ -2259,8 +2323,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   DebugLoc dl = Op.getDebugLoc();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-  MachineFunction &MF = DAG.getMachineFunction();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
   // FIXME: Enable this for static codegen when tool issues are fixed.  Also
   // update ARMFastISel::ARMMaterializeGV.
@@ -2288,6 +2350,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   if (RelocM == Reloc::Static) {
     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   } else {
+    ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
     ARMPCLabelIndex = AFI->createPICLabelUId();
     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
     ARMConstantPoolValue *CPV =
@@ -2368,7 +2431,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
     EVT PtrVT = getPointerTy();
-    DebugLoc dl = Op.getDebugLoc();
     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
     SDValue CPAddr;
     unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2543,7 +2605,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
 }
 
 // The remaining GPRs hold either the beginning of variable-argument
-// data, or the beginning of an aggregate passed by value (usuall
+// data, or the beginning of an aggregate passed by value (usually
 // byval).  Either way, we allocate stack slots adjacent to the data
 // provided by our caller, and store the unallocated registers there.
 // If this is a variadic function, the va_list pointer will begin with
@@ -2628,7 +2690,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   CCInfo.AnalyzeFormalArguments(Ins,
                                 CCAssignFnForNode(CallConv, /* Return*/ false,
                                                   isVarArg));
-  
+
   SmallVector<SDValue, 16> ArgValues;
   int lastInsIndex = -1;
   SDValue ArgValue;
@@ -2743,7 +2805,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
             } else {
               int FI = MFI->CreateFixedObject(Flags.getByValSize(),
                                               VA.getLocMemOffset(), false);
-              InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));              
+              InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
             }
           } else {
             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
@@ -3379,6 +3441,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   return FrameAddr;
 }
 
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op = N->getOperand(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DestVT = N->getValueType(0);
+
+  assert(DestVT.getSizeInBits() > 128 &&
+         "Custom sext/zext expansion needs >128-bit vector.");
+  // If this is a normal length extension, use the default expansion.
+  if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+      SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+    return SDValue();
+
+  DebugLoc dl = N->getDebugLoc();
+  unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+  unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+  unsigned NumElts = SrcVT.getVectorNumElements();
+  LLVMContext &Ctx = *DAG.getContext();
+  SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+  EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+                               NumElts);
+  EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+                                 NumElts/2);
+  EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+                               NumElts/2);
+
+  Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+  SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+                        DAG.getIntPtrConstant(0));
+  SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+                        DAG.getIntPtrConstant(NumElts/2));
+  ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+  ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
 /// ExpandBITCAST - If the target supports VFP, this function is called to
 /// expand a bit convert where either the source or destination type is i64 to
 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
@@ -3532,6 +3635,114 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
 }
 
+/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
+/// for each 16-bit element from operand, repeated.  The basic idea is to
+/// leverage vcnt to get the 8-bit counts, gather and add the results.
+///
+/// Trace for v4i16:
+/// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
+/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
+/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
+///            [b0 b1 b2 b3 b4 b5 b6 b7]
+///           +[b1 b0 b3 b2 b5 b4 b7 b6]
+/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
+/// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
+static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+  SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
+  SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
+  SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
+  SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
+  return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
+}
+
+/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
+/// bit-count for each 16-bit element from the operand.  We need slightly
+/// different sequencing for v4i16 and v8i16 to stay within NEON's available
+/// 64/128-bit registers.
+///
+/// Trace for v4i16:
+/// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
+/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
+/// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
+/// v4i16:Extracted = [k0    k1    k2    k3    ]
+static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
+  if (VT.is64BitVector()) {
+    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
+                       DAG.getIntPtrConstant(0));
+  } else {
+    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
+                                    BitCounts, DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
+  }
+}
+
+/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
+/// bit-count for each 32-bit element from the operand.  The idea here is
+/// to split the vector into 16-bit elements, leverage the 16-bit count
+/// routine, and then combine the results.
+///
+/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
+/// input    = [v0    v1    ] (vi: 32-bit elements)
+/// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
+/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
+/// vrev: N0 = [k1 k0 k3 k2 ]
+///            [k0 k1 k2 k3 ]
+///       N1 =+[k1 k0 k3 k2 ]
+///            [k0 k2 k1 k3 ]
+///       N2 =+[k1 k3 k0 k2 ]
+///            [k0    k2    k1    k3    ]
+/// Extended =+[k1    k3    k0    k2    ]
+///            [k0    k2    ]
+/// Extracted=+[k1    k3    ]
+///
+static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+
+  SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
+  SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
+  SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
+  SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
+  SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+
+  if (VT.is64BitVector()) {
+    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
+                       DAG.getIntPtrConstant(0));
+  } else {
+    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
+                                    DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+  }
+}
+
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+                          const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+
+  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+  assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
+          VT == MVT::v4i16 || VT == MVT::v8i16) &&
+         "Unexpected type for custom ctpop lowering");
+
+  if (VT.getVectorElementType() == MVT::i32)
+    return lowerCTPOP32BitElements(N, DAG);
+  else
+    return lowerCTPOP16BitElements(N, DAG);
+}
+
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                           const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
@@ -4153,6 +4364,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   return true;
 }
 
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+  unsigned NumElts = VT.getVectorNumElements();
+  // Make sure the mask has the right size.
+  if (NumElts != M.size())
+      return false;
+
+  // Look for <15, ..., 3, -1, 1, 0>.
+  for (unsigned i = 0; i != NumElts; ++i)
+    if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+      return false;
+
+  return true;
+}
+
 // If N is an integer constant that can be moved into a register in one
 // instruction, return an SDValue of such a constant (will become a MOV
 // instruction).  Otherwise return null.
@@ -4247,7 +4473,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
     ValueCounts.insert(std::make_pair(V, 0));
     unsigned &Count = ValueCounts[V];
-    
+
     // Is this value dominant? (takes up more than half of the lanes)
     if (++Count > (NumElts / 2)) {
       hasDominantValue = true;
@@ -4275,8 +4501,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 
       // If we are VDUPing a value that comes directly from a vector, that will
       // cause an unnecessary move to and from a GPR, where instead we could
-      // just use VDUPLANE.
-      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+      // just use VDUPLANE. We can only do this if the lane being extracted
+      // is at a constant index, as the VDUP from lane instructions only have
+      // constant-index forms.
+      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          isa<ConstantSDNode>(Value->getOperand(1))) {
         // We need to create a new undef vector to use for the VDUPLANE if the
         // size of the vector from which we get the value is different than the
         // size of the vector that we need to create. We will insert the element
@@ -4291,12 +4520,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
                         Value, DAG.getConstant(index, MVT::i32)),
                            DAG.getConstant(index, MVT::i32));
-        } else {
+        } else
           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                         Value->getOperand(0), Value->getOperand(1));
-        }
-      }
-      else
+      } else
         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
 
       if (!usesOnlyOneValue) {
@@ -4328,7 +4555,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
     if (usesOnlyOneValue) {
       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
       if (isConstant && Val.getNode())
-        return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 
+        return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
     }
   }
 
@@ -4548,7 +4775,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
           isVZIPMask(M, VT, WhichResult) ||
           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
-          isVZIP_v_undef_Mask(M, VT, WhichResult));
+          isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+          ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
 }
 
 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -4652,6 +4880,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
                                  &VTBLMask[0], 8));
 }
 
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+                                                      SelectionDAG &DAG) {
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue OpLHS = Op.getOperand(0);
+  EVT VT = OpLHS.getValueType();
+
+  assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+         "Expect an v8i16/v16i8 type");
+  OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+  // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+  // extract the first 8 bytes into the top double word and the last 8 bytes
+  // into the bottom double word. The v8i16 case is similar.
+  unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+  return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+                     DAG.getConstant(ExtractNum, MVT::i32));
+}
+
 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -4789,6 +5034,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   }
 
+  if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+    return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
   if (VT == MVT::v8i8) {
     SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
     if (NewOp.getNode())
@@ -4917,16 +5165,76 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
-/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
-/// load, or BUILD_VECTOR with extended elements, return the unextended value.
-static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
+/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
+/// We insert the required extension here to get the vector to fill a D register.
+static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
+                                            const EVT &OrigTy,
+                                            const EVT &ExtTy,
+                                            unsigned ExtOpcode) {
+  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+  // 64-bits we need to insert a new extension so that it will be 64-bits.
+  assert(ExtTy.is128BitVector() && "Unexpected extension size");
+  if (OrigTy.getSizeInBits() >= 64)
+    return N;
+
+  // Must extend size to at least 64 bits to be used as an operand for VMULL.
+  MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
+  EVT NewVT;
+  switch (OrigSimpleTy) {
+  default: llvm_unreachable("Unexpected Orig Vector Type");
+  case MVT::v2i8:
+  case MVT::v2i16:
+    NewVT = MVT::v2i32;
+    break;
+  case MVT::v4i8:
+    NewVT = MVT::v4i16;
+    break;
+  }
+  return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+}
+
+/// SkipLoadExtensionForVMULL - return a load of the original vector size that
+/// does not do any sign/zero extension. If the original vector is less
+/// than 64 bits, an appropriate extension will be added after the load to
+/// reach a total size of 64 bits. We have to add the extension separately
+/// because ARM does not have a sign/zero extending load for vectors.
+static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
+  SDValue NonExtendingLoad =
+    DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+                LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+                LD->isNonTemporal(), LD->isInvariant(),
+                LD->getAlignment());
+  unsigned ExtOp = 0;
+  switch (LD->getExtensionType()) {
+  default: llvm_unreachable("Unexpected LoadExtType");
+  case ISD::EXTLOAD:
+  case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
+  case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
+  }
+  MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
+  MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
+  return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
+                                      MemType, ExtType, ExtOp);
+}
+
+/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
+/// extending load, or BUILD_VECTOR with extended elements, return the
+/// unextended value. The unextended vector should be 64 bits so that it can
+/// be used as an operand to a VMULL instruction. If the original vector size
+/// before extension is less than 64 bits we add a an extension to resize
+/// the vector to 64 bits.
+static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
   if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
-    return N->getOperand(0);
+    return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
+                                        N->getOperand(0)->getValueType(0),
+                                        N->getValueType(0),
+                                        N->getOpcode());
+
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
-    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
-                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
-                       LD->isNonTemporal(), LD->isInvariant(),
-                       LD->getAlignment());
+    return SkipLoadExtensionForVMULL(LD, DAG);
+
   // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
   // have been legalized as a BITCAST from v4i32.
   if (N->getOpcode() == ISD::BITCAST) {
@@ -4981,7 +5289,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   // Multiplications are only custom-lowered for 128-bit vectors so that
   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
   EVT VT = Op.getValueType();
-  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+  assert(VT.is128BitVector() && VT.isInteger() &&
+         "unexpected type for custom-lowering ISD::MUL");
   SDNode *N0 = Op.getOperand(0).getNode();
   SDNode *N1 = Op.getOperand(1).getNode();
   unsigned NewOpc = 0;
@@ -5024,9 +5333,9 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   // Legalize to a VMULL instruction.
   DebugLoc DL = Op.getDebugLoc();
   SDValue Op0;
-  SDValue Op1 = SkipExtension(N1, DAG);
+  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
   if (!isMLA) {
-    Op0 = SkipExtension(N0, DAG);
+    Op0 = SkipExtensionForVMULL(N0, DAG);
     assert(Op0.getValueType().is64BitVector() &&
            Op1.getValueType().is64BitVector() &&
            "unexpected types for extended operands to VMULL");
@@ -5041,8 +5350,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   //   vaddl q0, d4, d5
   //   vmovl q1, d6
   //   vmul  q0, q0, q1
-  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
-  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
+  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
   EVT Op1VT = Op1.getValueType();
   return DAG.getNode(N0->getOpcode(), DL, VT,
                      DAG.getNode(NewOpc, DL, VT,
@@ -5328,6 +5637,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SRL_PARTS:
   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+  case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);
   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
   case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
@@ -5360,6 +5670,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::BITCAST:
     Res = ExpandBITCAST(N, DAG);
     break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+    Res = ExpandVectorExtension(N, DAG);
+    break;
   case ISD::SRL:
   case ISD::SRA:
     Res = Expand64BitShift(N, DAG, Subtarget);
@@ -5388,6 +5702,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::ATOMIC_CMP_SWAP:
     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
     return;
+  case ISD::ATOMIC_LOAD_MIN:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_UMIN:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_MAX:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_UMAX:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+    return;
   }
   if (Res.getNode())
     Results.push_back(Res);
@@ -5727,7 +6053,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
 MachineBasicBlock *
 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
                                       unsigned Op1, unsigned Op2,
-                                      bool NeedsCarry, bool IsCmpxchg) const {
+                                      bool NeedsCarry, bool IsCmpxchg,
+                                      bool IsMinMax, ARMCC::CondCodes CC) const {
   // This also handles ATOMIC_SWAP, indicated by Op1==0.
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
@@ -5751,21 +6078,17 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
     MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
   }
 
-  unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
-  unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
-
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *contBB = 0, *cont2BB = 0;
-  if (IsCmpxchg) {
+  if (IsCmpxchg || IsMinMax)
     contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  if (IsCmpxchg)
     cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
-  }
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
   MF->insert(It, loopMBB);
-  if (IsCmpxchg) {
-    MF->insert(It, contBB);
-    MF->insert(It, cont2BB);
-  }
+  if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
+  if (IsCmpxchg) MF->insert(It, cont2BB);
   MF->insert(It, exitMBB);
 
   // Transfer the remainder of BB and its successor edges to exitMBB.
@@ -5792,22 +6115,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
   //   cmp storesuccess, #0
   //   bne- loopMBB
   //   fallthrough --> exitMBB
-  //
-  // Note that the registers are explicitly specified because there is not any
-  // way to force the register allocator to allocate a register pair.
-  //
-  // FIXME: The hardcoded registers are not necessary for Thumb2, but we
-  // need to properly enforce the restriction that the two output registers
-  // for ldrexd must be different.
   BB = loopMBB;
+
   // Load
-  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
-                 .addReg(ARM::R2, RegState::Define)
-                 .addReg(ARM::R3, RegState::Define).addReg(ptr));
-  // Copy r2/r3 into dest.  (This copy will normally be coalesced.)
-  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
-  BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+  if (isThumb2) {
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
+                   .addReg(destlo, RegState::Define)
+                   .addReg(desthi, RegState::Define)
+                   .addReg(ptr));
+  } else {
+    unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
+                   .addReg(GPRPair0, RegState::Define).addReg(ptr));
+    // Copy r2/r3 into dest.  (This copy will normally be coalesced.)
+    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+      .addReg(GPRPair0, 0, ARM::gsub_0);
+    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+      .addReg(GPRPair0, 0, ARM::gsub_1);
+  }
 
+  unsigned StoreLo, StoreHi;
   if (IsCmpxchg) {
     // Add early exit
     for (unsigned i = 0; i < 2; i++) {
@@ -5823,26 +6150,60 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
     }
 
     // Copy to physregs for strexd
-    unsigned setlo = MI->getOperand(5).getReg();
-    unsigned sethi = MI->getOperand(6).getReg();
-    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
-    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+    StoreLo = MI->getOperand(5).getReg();
+    StoreHi = MI->getOperand(6).getReg();
   } else if (Op1) {
     // Perform binary operation
-    AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+    unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
                    .addReg(destlo).addReg(vallo))
         .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
-    AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
-                   .addReg(desthi).addReg(valhi)).addReg(0);
+    unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
+                   .addReg(desthi).addReg(valhi))
+        .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
+
+    StoreLo = tmpRegLo;
+    StoreHi = tmpRegHi;
   } else {
     // Copy to physregs for strexd
-    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
-    BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
+    StoreLo = vallo;
+    StoreHi = valhi;
+  }
+  if (IsMinMax) {
+    // Compare and branch to exit block.
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+      .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
+    BB->addSuccessor(exitMBB);
+    BB->addSuccessor(contBB);
+    BB = contBB;
+    StoreLo = vallo;
+    StoreHi = valhi;
   }
 
   // Store
-  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
-                 .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+  if (isThumb2) {
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+                   .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
+  } else {
+    // Marshal a pair...
+    unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+    BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+      .addReg(UndefPair)
+      .addReg(StoreLo)
+      .addImm(ARM::gsub_0);
+    BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
+      .addReg(r1)
+      .addReg(StoreHi)
+      .addImm(ARM::gsub_1);
+
+    // ...and store it
+    AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+                   .addReg(StorePair).addReg(ptr));
+  }
   // Cmp+jump
   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
                  .addReg(storesuccess).addImm(0));
@@ -6043,6 +6404,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
     MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
   unsigned UId = AFI->createJumpTableUId();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
 
   // Create the MBBs for the dispatch code.
 
@@ -6051,7 +6413,13 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   DispatchBB->setIsLandingPad();
 
   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
-  BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+  unsigned trap_opcode;
+  if (Subtarget->isThumb())
+    trap_opcode = ARM::tTRAP;
+  else
+    trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
+
+  BuildMI(TrapBB, dl, TII->get(trap_opcode));
   DispatchBB->addSuccessor(TrapBB);
 
   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
@@ -6197,11 +6565,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
                    .addImm(0)
                    .addMemOperand(JTMMOLd));
 
-    unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
-                   .addReg(ARM::CPSR, RegState::Define)
-                   .addReg(NewVReg5, RegState::Kill)
-                   .addReg(NewVReg3));
+    unsigned NewVReg6 = NewVReg5;
+    if (RelocM == Reloc::PIC_) {
+      NewVReg6 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+                     .addReg(ARM::CPSR, RegState::Define)
+                     .addReg(NewVReg5, RegState::Kill)
+                     .addReg(NewVReg3));
+    }
 
     BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
       .addReg(NewVReg6, RegState::Kill)
@@ -6281,11 +6652,18 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
       .addImm(0)
       .addMemOperand(JTMMOLd));
 
-    BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
-      .addReg(NewVReg5, RegState::Kill)
-      .addReg(NewVReg4)
-      .addJumpTableIndex(MJTI)
-      .addImm(UId);
+    if (RelocM == Reloc::PIC_) {
+      BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+        .addReg(NewVReg5, RegState::Kill)
+        .addReg(NewVReg4)
+        .addJumpTableIndex(MJTI)
+        .addImm(UId);
+    } else {
+      BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
+        .addReg(NewVReg5, RegState::Kill)
+        .addJumpTableIndex(MJTI)
+        .addImm(UId);
+    }
   }
 
   // Add the jump table entries as successors to the MBB.
@@ -6334,7 +6712,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
         DefRegs[OI->getReg()] = true;
       }
 
-      MachineInstrBuilder MIB(&*II);
+      MachineInstrBuilder MIB(*MF, &*II);
 
       for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
         unsigned Reg = SavedRegs[i];
@@ -6411,8 +6789,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
     UnitSize = 2;
   } else {
     // Check whether we can use NEON instructions.
-    if (!MF->getFunction()->getFnAttributes().
-          hasAttribute(Attributes::NoImplicitFloat) &&
+    if (!MF->getFunction()->getAttributes().
+          hasAttribute(AttributeSet::FunctionIndex,
+                       Attribute::NoImplicitFloat) &&
         Subtarget->hasNEON()) {
       if ((Align % 16 == 0) && SizeVal >= 16) {
         ldrOpc = ARM::VLD1q32wb_fixed;
@@ -6840,6 +7219,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
                               /*NeedsCarry*/ false, /*IsCmpxchg*/true);
+  case ARM::ATOMMIN6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::LT);
+  case ARM::ATOMMAX6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::GE);
+  case ARM::ATOMUMIN6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::LO);
+  case ARM::ATOMUMAX6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::HS);
 
   case ARM::tMOVCCr_pseudo: {
     // To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -9111,7 +9510,7 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
   return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
 }
 
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
   // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
   bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
 
@@ -9120,15 +9519,27 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
     return false;
   case MVT::i8:
   case MVT::i16:
-  case MVT::i32:
+  case MVT::i32: {
     // Unaligned access can use (for example) LRDB, LRDH, LDR
-    return AllowsUnaligned;
+    if (AllowsUnaligned) {
+      if (Fast)
+        *Fast = Subtarget->hasV7Ops();
+      return true;
+    }
+    return false;
+  }
   case MVT::f64:
-  case MVT::v2f64:
+  case MVT::v2f64: {
     // For any little-endian targets with neon, we can support unaligned ld/st
     // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
     // A big-endian target may also explictly support unaligned accesses
-    return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
+    if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+      if (Fast)
+        *Fast = true;
+      return true;
+    }
+    return false;
+  }
   }
 }
 
@@ -9140,33 +9551,59 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
 
 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
-                                           bool IsZeroVal,
+                                           bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
   const Function *F = MF.getFunction();
 
   // See if we can use NEON instructions for this...
-  if (IsZeroVal &&
-      !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) &&
-      Subtarget->hasNEON()) {
-    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
-      return MVT::v4i32;
-    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
-      return MVT::v2i32;
+  if ((!IsMemset || ZeroMemset) &&
+      Subtarget->hasNEON() &&
+      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::NoImplicitFloat)) {
+    bool Fast;
+    if (Size >= 16 &&
+        (memOpAlign(SrcAlign, DstAlign, 16) ||
+         (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
+      return MVT::v2f64;
+    } else if (Size >= 8 &&
+               (memOpAlign(SrcAlign, DstAlign, 8) ||
+                (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
+      return MVT::f64;
     }
   }
 
   // Lowering to i32/i16 if the size permits.
-  if (Size >= 4) {
+  if (Size >= 4)
     return MVT::i32;
-  } else if (Size >= 2) {
+  else if (Size >= 2)
     return MVT::i16;
-  }
 
   // Let the target-independent logic figure it out.
   return MVT::Other;
 }
 
+bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  if (Val.getOpcode() != ISD::LOAD)
+    return false;
+
+  EVT VT1 = Val.getValueType();
+  if (!VT1.isSimple() || !VT1.isInteger() ||
+      !VT2.isSimple() || !VT2.isInteger())
+    return false;
+
+  switch (VT1.getSimpleVT().SimpleTy) {
+  default: break;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+    // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
+    return true;
+  }
+
+  return false;
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
     return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4eb3b2cb5150..9ee17f0781b9 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,11 +17,11 @@
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
 namespace llvm {
@@ -232,7 +232,11 @@ namespace llvm {
       ATOMAND64_DAG,
       ATOMNAND64_DAG,
       ATOMSWAP64_DAG,
-      ATOMCMPXCHG64_DAG
+      ATOMCMPXCHG64_DAG,
+      ATOMMIN64_DAG,
+      ATOMUMIN64_DAG,
+      ATOMMAX64_DAG,
+      ATOMUMAX64_DAG
     };
   }
 
@@ -248,7 +252,7 @@ namespace llvm {
   public:
     explicit ARMTargetLowering(TargetMachine &TM);
 
-    virtual unsigned getJumpTableEncoding(void) const;
+    virtual unsigned getJumpTableEncoding() const;
 
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
@@ -281,15 +285,19 @@ namespace llvm {
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
-    /// unaligned memory accesses. of the specified type.
-    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+    /// unaligned memory accesses of the specified type. Returns whether it
+    /// is "fast" by reference in the second argument.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
 
     virtual EVT getOptimalMemOpType(uint64_t Size,
                                     unsigned DstAlign, unsigned SrcAlign,
-                                    bool IsZeroVal,
+                                    bool IsMemset, bool ZeroMemset,
                                     bool MemcpyStrSrc,
                                     MachineFunction &MF) const;
 
+    using TargetLowering::isZExtFree;
+    virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -358,7 +366,7 @@ namespace llvm {
 
     /// getRegClassFor - Return the register class that should be used for the
     /// specified value type.
-    virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
+    virtual const TargetRegisterClass *getRegClassFor(MVT VT) const;
 
     /// getMaximalGlobalOffset - Returns the maximal possible offset which can
     /// be used for loads / stores from the global.
@@ -384,7 +392,7 @@ namespace llvm {
                                     unsigned Intrinsic) const;
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
-    findRepresentativeClass(EVT VT) const;
+    findRepresentativeClass(MVT VT) const;
 
   private:
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
@@ -495,6 +503,12 @@ namespace llvm {
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                            SelectionDAG& DAG) const;
+
+    virtual bool CanLowerReturn(CallingConv::ID CallConv,
+                                MachineFunction &MF, bool isVarArg,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                LLVMContext &Context) const;
+
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
@@ -526,7 +540,9 @@ namespace llvm {
                                           unsigned Op1,
                                           unsigned Op2,
                                           bool NeedsCarry = false,
-                                          bool IsCmpxchg = false) const;
+                                          bool IsCmpxchg = false,
+                                          bool IsMinMax = false,
+                                          ARMCC::CondCodes CC = ARMCC::AL) const;
     MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
                                                MachineBasicBlock *BB,
                                                unsigned Size,
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index a0b6f249a286..80f0ec74376a 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -22,8 +22,8 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 using namespace llvm;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index df2e55ed5c0e..11550c5ae678 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -117,7 +117,7 @@ def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
                                SDNPVariadic]>;
 
 def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
-                              [SDNPHasChain, SDNPOptInGlue]>;
+                              [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
                               [SDNPInGlue]>;
@@ -239,6 +239,9 @@ def IsARM            : Predicate<"!Subtarget->isThumb()">,
 def IsIOS            : Predicate<"Subtarget->isTargetIOS()">;
 def IsNotIOS         : Predicate<"!Subtarget->isTargetIOS()">;
 def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
+def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
+                                 AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
 def UseMovt          : Predicate<"Subtarget->useMovt()">;
@@ -417,6 +420,8 @@ def reglist : Operand<i32> {
   let DecoderMethod = "DecodeRegListOperand";
 }
 
+def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
+
 def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
 def dpr_reglist : Operand<i32> {
   let EncoderMethod = "getRegisterListOpValue";
@@ -1005,7 +1010,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   let isReMaterializable = 1 in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
                iii, opc, "\t$Rd, $Rn, $imm",
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1017,7 +1023,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   }
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
                iir, opc, "\t$Rd, $Rn, $Rm",
-               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1032,7 +1039,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   def rsi : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1047,7 +1055,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
   def rsr : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1074,7 +1083,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   let isReMaterializable = 1 in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
                iii, opc, "\t$Rd, $Rn, $imm",
-               [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
+               [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1086,7 +1096,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   }
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
                iir, opc, "\t$Rd, $Rn, $Rm",
-               [/* pattern left blank */]> {
+               [/* pattern left blank */]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1100,7 +1111,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   def rsi : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+               [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1115,7 +1127,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
   def rsr : AsI1<opcod, (outs GPR:$Rd),
                (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
                iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+               [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1140,24 +1153,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
                           bit Commutable = 0> {
   def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
                          4, iii,
-                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+                         Sched<[WriteALU, ReadALU]>;
 
   def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
                          4, iir,
-                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+                         Sched<[WriteALU, ReadALU, ReadALU]> {
     let isCommutable = Commutable;
   }
   def rsi : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
-                                                so_reg_imm:$shift))]>;
+                                                so_reg_imm:$shift))]>,
+                          Sched<[WriteALUsi, ReadALU]>;
 
   def rsr : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
-                                                so_reg_reg:$shift))]>;
+                                                so_reg_reg:$shift))]>,
+                          Sched<[WriteALUSsr, ReadALUsr]>;
 }
 }
 
@@ -1169,19 +1186,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
                           bit Commutable = 0> {
   def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
                          4, iii,
-                         [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+                         [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+           Sched<[WriteALU, ReadALU]>;
 
   def rsi : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
-                                             GPR:$Rn))]>;
+                                             GPR:$Rn))]>,
+            Sched<[WriteALUsi, ReadALU]>;
 
   def rsr : ARMPseudoInst<(outs GPR:$Rd),
                           (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
                           4, iis,
                           [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
-                                             GPR:$Rn))]>;
+                                             GPR:$Rn))]>,
+            Sched<[WriteALUSsr, ReadALUsr]>;
 }
 }
 
@@ -1194,7 +1214,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
                        PatFrag opnode, bit Commutable = 0> {
   def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
                opc, "\t$Rn, $imm",
-               [(opnode GPR:$Rn, so_imm:$imm)]> {
+               [(opnode GPR:$Rn, so_imm:$imm)]>,
+           Sched<[WriteCMP, ReadALU]> {
     bits<4> Rn;
     bits<12> imm;
     let Inst{25} = 1;
@@ -1207,7 +1228,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   }
   def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
                opc, "\t$Rn, $Rm",
-               [(opnode GPR:$Rn, GPR:$Rm)]> {
+               [(opnode GPR:$Rn, GPR:$Rm)]>,
+           Sched<[WriteCMP, ReadALU, ReadALU]> {
     bits<4> Rn;
     bits<4> Rm;
     let isCommutable = Commutable;
@@ -1223,7 +1245,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   def rsi : AI1<opcod, (outs),
                (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
                opc, "\t$Rn, $shift",
-               [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+               [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
+            Sched<[WriteCMPsi, ReadALU]> {
     bits<4> Rn;
     bits<12> shift;
     let Inst{25} = 0;
@@ -1239,7 +1262,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
   def rsr : AI1<opcod, (outs),
                (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
                opc, "\t$Rn, $shift",
-               [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> {
+               [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
+            Sched<[WriteCMPsr, ReadALU]> {
     bits<4> Rn;
     bits<12> shift;
     let Inst{25} = 0;
@@ -1321,7 +1345,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
                [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1333,7 +1358,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                 DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
                [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1348,7 +1374,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                 (ins GPR:$Rn, so_reg_imm:$shift),
                 DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1364,7 +1391,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                 DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPRnopc:$Rd, CPSR,
                     (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1387,7 +1415,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
                [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+           Sched<[WriteALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> imm;
@@ -1398,7 +1427,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   }
   def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                 DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
-               [/* pattern left blank */]> {
+               [/* pattern left blank */]>,
+           Sched<[WriteALU, ReadALU, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<4> Rm;
@@ -1411,7 +1441,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
                 DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsi, ReadALU]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1425,7 +1456,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
                 DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
               [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
-               Requires<[IsARM]> {
+               Requires<[IsARM]>,
+            Sched<[WriteALUsr, ReadALUsr]> {
     bits<4> Rd;
     bits<4> Rn;
     bits<12> shift;
@@ -1622,6 +1654,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
                                  (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
                                       GPR:$set1, GPR:$set2),
                                  NoItinerary, []>;
+def ATOMMIN6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
+def ATOMUMIN6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
+def ATOMMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
+def ATOMUMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
 }
 
 def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
@@ -1748,11 +1792,32 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
   let Inst{3-0} = opt;
 }
 
-// A5.4 Permanently UNDEFINED instructions.
+/*
+ * A5.4 Permanently UNDEFINED instructions.
+ *
+ * For most targets use UDF #65006, for which the OS will generate SIGTRAP.
+ * Other UDF encodings generate SIGILL.
+ *
+ * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb.
+ * Encoding A1:
+ *  1110 0111 1111 iiii iiii iiii 1111 iiii
+ * Encoding T1:
+ *  1101 1110 iiii iiii
+ * It uses the following encoding:
+ *  1110 0111 1111 1110 1101 1110 1111 0000
+ *  - In ARM: UDF #60896;
+ *  - In Thumb: UDF #254 followed by a branch-to-self.
+ */
+let isBarrier = 1, isTerminator = 1 in
+def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary,
+               "trap", [(trap)]>,
+           Requires<[IsARM,UseNaClTrap]> {
+  let Inst = 0xe7fedef0;
+}
 let isBarrier = 1, isTerminator = 1 in
 def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
                "trap", [(trap)]>,
-           Requires<[IsARM]> {
+           Requires<[IsARM,DontUseNaClTrap]> {
   let Inst = 0xe7ffdefe;
 }
 
@@ -1804,7 +1869,8 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
 // the instruction. The {24-21} opcode bits are set by the fixup, as we don't
 // know until then which form of the instruction will be used.
 def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
-                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
+                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>,
+                 Sched<[WriteALU, ReadALU]> {
   bits<4> Rd;
   bits<14> label;
   let Inst{27-25} = 0b001;
@@ -2065,6 +2131,18 @@ def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
   let Inst{24-23} = 0b11;
 }
 
+def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>;
+def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>;
+def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>;
+def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>;
+def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>;
+
 // Return From Exception
 class RFEI<bit wb, string asm>
   : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
@@ -3816,28 +3894,33 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
 
 def CLZ  : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "clz", "\t$Rd, $Rm",
-              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
+           Sched<[WriteALU]>;
 
 def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rbit", "\t$Rd, $Rm",
               [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
-           Requires<[IsARM, HasV6T2]>;
+           Requires<[IsARM, HasV6T2]>,
+           Sched<[WriteALU]>;
 
 def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rev", "\t$Rd, $Rm",
-              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>,
+           Sched<[WriteALU]>;
 
 let AddedComplexity = 5 in
 def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "rev16", "\t$Rd, $Rm",
                [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALU]>;
 
 let AddedComplexity = 5 in
 def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "revsh", "\t$Rd, $Rm",
                [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALU]>;
 
 def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
                    (and (srl GPR:$Rm, (i32 8)), 0xFF)),
@@ -3849,7 +3932,8 @@ def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
                [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
                                       (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
                                            0xFFFF0000)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALUsi, ReadALU]>;
 
 // Alternate cases for PKHBT where identities eliminate some nodes.
 def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
@@ -3865,7 +3949,8 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
                [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
                                       (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
                                            0xFFFF)))]>,
-               Requires<[IsARM, HasV6]>;
+               Requires<[IsARM, HasV6]>,
+           Sched<[WriteALUsi, ReadALU]>;
 
 // Alternate cases for PKHTB where identities eliminate some nodes.  Note that
 // a shift amount of 0 is *not legal* here, it is PKHBT instead.
@@ -4229,8 +4314,8 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
 def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
                      NoItinerary, "ldrex", "\t$Rt, $addr", []>;
 let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
-                      NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+                      NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
   let DecoderMethod = "DecodeDoubleRegLoad";
 }
 }
@@ -4244,8 +4329,8 @@ def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
                     NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
 let hasExtraSrcRegAllocReq = 1 in
 def STREXD : AIstrex<0b01, (outs GPR:$Rd),
-                    (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
-                    NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+                    (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+                    NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
   let DecoderMethod = "DecodeDoubleRegStore";
 }
 }
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3cf213cbffee..0411ac4e282a 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4264,6 +4264,7 @@ def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
                      NEONvceq, 1>;
 
+let TwoOperandAliasConstraint = "$Vm = $Vd" in
 defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
                             "$Vd, $Vm, #0", NEONvceqz>;
 
@@ -4277,10 +4278,12 @@ def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
                      NEONvcge, 0>;
 
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
 defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
                             "$Vd, $Vm, #0", NEONvcgez>;
 defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
                             "$Vd, $Vm, #0", NEONvclez>;
+}
 
 //   VCGT     : Vector Compare Greater Than
 defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -4292,10 +4295,12 @@ def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
                      NEONvcgt, 0>;
 
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
 defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
                             "$Vd, $Vm, #0", NEONvcgtz>;
 defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
                             "$Vd, $Vm, #0", NEONvcltz>;
+}
 
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
 def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
@@ -4877,12 +4882,15 @@ defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
 defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
                            IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
                            int_arm_neon_vabs>;
-def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
-                        IIC_VUNAD, "vabs", "f32",
-                        v2f32, v2f32, int_arm_neon_vabs>;
-def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
-                        IIC_VUNAQ, "vabs", "f32",
-                        v4f32, v4f32, int_arm_neon_vabs>;
+def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                     "vabs", "f32",
+                     v2f32, v2f32, fabs>;
+def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                     "vabs", "f32",
+                      v4f32, v4f32, fabs>;
+
+def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
+def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
 
 //   VQABS    : Vector Saturating Absolute Value
 defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
@@ -5737,6 +5745,10 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
 
+// Fold extracting an element out of a v2i32 into a vfp register.
+def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
+          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
 // Vector lengthening move with load, matching extending loads.
 
 // extload, zextload and sextload for a standard lengthening load. Example:
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 002d64a2d039..c9d709eb5222 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -130,8 +130,9 @@ def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
   let ParserMatchClass = imm0_4095_neg_asmoperand;
 }
 
-def imm0_255_neg : PatLeaf<(i32 imm), [{
-  return (uint32_t)(-N->getZExtValue()) < 255;
+def imm1_255_neg : PatLeaf<(i32 imm), [{
+  uint32_t Val = -N->getZExtValue();
+  return (Val > 0 && Val < 255);
 }], imm_neg_XFORM>;
 
 def imm0_255_not : PatLeaf<(i32 imm), [{
@@ -1928,8 +1929,8 @@ defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 // The AddedComplexity preferences the first variant over the others since
 // it can be shrunk to a 16-bit wide encoding, while the others cannot.
 let AddedComplexity = 1 in
-def : T2Pat<(add        GPR:$src, imm0_255_neg:$imm),
-            (t2SUBri    GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, imm1_255_neg:$imm),
+            (t2SUBri    GPR:$src, imm1_255_neg:$imm)>;
 def : T2Pat<(add        GPR:$src, t2_so_imm_neg:$imm),
             (t2SUBri    GPR:$src, t2_so_imm_neg:$imm)>;
 def : T2Pat<(add        GPR:$src, imm0_4095_neg:$imm),
@@ -1938,8 +1939,8 @@ def : T2Pat<(add        GPR:$src, imm0_65535_neg:$imm),
             (t2SUBrr    GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
 
 let AddedComplexity = 1 in
-def : T2Pat<(ARMaddc    rGPR:$src, imm0_255_neg:$imm),
-            (t2SUBSri   rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(ARMaddc    rGPR:$src, imm1_255_neg:$imm),
+            (t2SUBSri   rGPR:$src, imm1_255_neg:$imm)>;
 def : T2Pat<(ARMaddc    rGPR:$src, t2_so_imm_neg:$imm),
             (t2SUBSri   rGPR:$src, t2_so_imm_neg:$imm)>;
 def : T2Pat<(ARMaddc    rGPR:$src, imm0_65535_neg:$imm),
@@ -2314,13 +2315,15 @@ defm t2ORN  : T2I_bin_irs<0b0011, "orn",
 /// changed to modify CPSR.
 multiclass T2I_un_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                      PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+                      PatFrag opnode,
+                      bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> {
    // shifted imm
    def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
                 opc, "\t$Rd, $imm",
                 [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
      let isAsCheapAsAMove = Cheap;
      let isReMaterializable = ReMat;
+     let isMoveImm = MoveImm;
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -2354,7 +2357,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
 let AddedComplexity = 1 in
 defm t2MVN  : T2I_un_irs <0b0011, "mvn",
                           IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
-                          UnOpFrag<(not node:$Src)>, 1, 1>;
+                          UnOpFrag<(not node:$Src)>, 1, 1, 1>;
 
 let AddedComplexity = 1 in
 def : T2Pat<(and     rGPR:$src, t2_so_imm_not:$imm),
@@ -3478,6 +3481,13 @@ def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
 def t2SRSIA  : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
                      "srsia","\tsp, $mode", []>;
 
+
+def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>;
+
+def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
+
 // Return From Exception is a system instruction.
 class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 254d8f6b7c7a..351a290e2aa0 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -17,12 +17,12 @@
 #include "ARMConstantPoolValue.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 792818442724..23a6a9b512f4 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -15,12 +15,12 @@
 #define ARMJITINFO_H
 
 #include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
   class ARMTargetMachine;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 0185289f3bd8..b7ac5d57c362 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -18,8 +18,12 @@
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -27,19 +31,15 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
@@ -87,6 +87,53 @@ namespace {
                       MachineBasicBlock::iterator i)
         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     };
+    class UnitRegsMap {
+    public:
+      UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
+      const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
+        DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
+            Cache.find(Reg);
+        if (found != Cache.end())
+          return found->second;
+        else
+          return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
+                      .first->second;
+      }
+    private:
+      SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
+        SmallVector<unsigned, 4> Res;
+
+        const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
+        if (TRC == &ARM::QPRRegClass) {
+          if (Reg > ARM::Q7) {
+            Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
+            Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
+            return Res;
+          }
+
+          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
+          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
+          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
+          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
+
+          return Res;
+        }
+
+        if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
+          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
+          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
+
+          return Res;
+        }
+
+        Res.push_back(Reg);
+
+        return Res;
+
+      }
+      const TargetRegisterInfo* TRI;
+      DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
+    };
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     typedef MemOpQueue::iterator MemOpQueueIter;
 
@@ -128,6 +175,11 @@ namespace {
                                    MachineBasicBlock::iterator MBBI,
                                    bool &Advance,
                                    MachineBasicBlock::iterator &I);
+    unsigned AddMemOp(MemOpQueue& MemOps,
+                      const MemOpQueueEntry newEntry,
+                      UnitRegsMap& UnitRegsInfo,
+                      SmallSet<unsigned, 4>& UsedUnitRegs,
+                      unsigned At = -1U);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -865,7 +917,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
   // Can't do the merge if the destination register is the same as the would-be
   // writeback register.
-  if (isLd && MI->getOperand(0).getReg() == Base)
+  if (MI->getOperand(0).getReg() == Base)
     return false;
 
   unsigned PredReg = 0;
@@ -1188,7 +1240,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
           OddDeadKill = true;
         }
         // Never kill the base register in the first instruction.
-        // <rdar://problem/11101911>
         if (EvenReg == BaseReg)
           EvenDeadKill = false;
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
@@ -1214,12 +1265,103 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   return false;
 }
 
+/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
+/// It adds store mem ops with simple push_back/insert method,
+/// without any additional logic.
+/// For load operation it does the next:
+/// 1. Adds new load operation into MemOp collection at "At" position.
+/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
+/// contents, prior to "At".
+/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
+/// UsedUnitRegs - set of unit-registers currently in use.
+/// At - position at which it would added, and prior which the clean-up
+/// should be made (for load operation).
+/// FIXME: The clean-up also should be made for store operations,
+/// but the memory address should be analyzed instead of unit registers.
+unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
+                                   const MemOpQueueEntry NewEntry,
+                                   UnitRegsMap& UnitRegsInfo,
+                                   SmallSet<unsigned, 4>& UsedUnitRegs,
+                                   unsigned At) {
+  unsigned Cleaned = 0;
+
+  if (At == -1U) {
+    At = MemOps.size();
+    MemOps.push_back(NewEntry);
+  } else
+    MemOps.insert(&MemOps[At], NewEntry);
+
+  // FIXME:
+  // If operation is not load, leave it as is by now,
+  // So 0 overridden ops would cleaned in this case.
+  if (!NewEntry.MBBI->mayLoad())
+    return 0;
+
+  const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
+
+  bool FoundOverriddenLoads = false;
+
+  for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
+    if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
+      FoundOverriddenLoads = true;
+      break;
+    }
+
+  // If we detect that this register is used by load operations that are
+  // predecessors for the new one, remove them from MemOps then.
+  if (FoundOverriddenLoads) {
+    MemOpQueue UpdatedMemOps;
+
+    // Scan through MemOps entries.
+    for (unsigned i = 0; i != At; ++i) {
+      MemOpQueueEntry& MemOpEntry = MemOps[i];
+
+      // FIXME: Skip non-load operations by now.
+      if (!MemOpEntry.MBBI->mayLoad())
+        continue;
+
+      const SmallVector<unsigned, 4>& MemOpUnitRegs =
+          UnitRegsInfo[MemOpEntry.Reg];
+
+      // Lookup entry that loads contents into register used by new entry.
+      bool ReleaseThisEntry = false;
+      for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
+        if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
+                      MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
+          ReleaseThisEntry = true;
+          ++Cleaned;
+          break;
+        }
+      }
+
+      if (ReleaseThisEntry) {
+        const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
+        for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
+          UsedUnitRegs.erase(RelesedRegs[r]);
+      } else
+        UpdatedMemOps.push_back(MemOpEntry);
+    }
+
+    // Keep anything without changes after At position.
+    for (unsigned i = At, e = MemOps.size(); i != e; ++i)
+      UpdatedMemOps.push_back(MemOps[i]);
+
+    MemOps.swap(UpdatedMemOps);
+  }
+
+  UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
+
+  return Cleaned;
+}
+
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   unsigned NumMerges = 0;
   unsigned NumMemOps = 0;
   MemOpQueue MemOps;
+  UnitRegsMap UnitRegsInfo(TRI);
+  SmallSet<unsigned, 4> UsedRegUnits;
   unsigned CurrBase = 0;
   int CurrOpc = -1;
   unsigned CurrSize = 0;
@@ -1266,8 +1408,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         CurrSize = Size;
         CurrPred = Pred;
         CurrPredReg = PredReg;
+
         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
         ++NumMemOps;
+        const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
+        UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
         Advance = true;
       } else {
         if (Clobber) {
@@ -1279,20 +1424,24 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
           // No need to match PredReg.
           // Continue adding to the queue.
           if (Offset > MemOps.back().Offset) {
-            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
-                                             Position, MBBI));
-            ++NumMemOps;
+            unsigned OverridesCleaned =
+              AddMemOp(MemOps,
+                           MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
+                           UnitRegsInfo, UsedRegUnits) != 0;
+            NumMemOps += 1 - OverridesCleaned;
             Advance = true;
           } else {
-            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
-                 I != E; ++I) {
-              if (Offset < I->Offset) {
-                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
-                                                 Position, MBBI));
-                ++NumMemOps;
+            for (unsigned I = 0; I != NumMemOps; ++I) {
+              if (Offset < MemOps[I].Offset) {
+                MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
+                unsigned OverridesCleaned =
+                    AddMemOp(MemOps, entry, UnitRegsInfo,
+                                 UsedRegUnits, I) != 0;
+                NumMemOps += 1 - OverridesCleaned;
+
                 Advance = true;
                 break;
-              } else if (Offset == I->Offset) {
+              } else if (Offset == MemOps[I].Offset) {
                 // Collision! This can't be merged!
                 break;
               }
@@ -1363,6 +1512,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       CurrPredReg = 0;
       if (NumMemOps) {
         MemOps.clear();
+        UsedRegUnits.clear();
         NumMemOps = 0;
       }
 
@@ -1408,7 +1558,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
       PrevMI->setDesc(TII->get(NewOpc));
       MO.setReg(ARM::PC);
-      PrevMI->copyImplicitOps(&*MBBI);
+      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
       MBB.erase(MBBI);
       return true;
     }
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index e2ac9a466ed8..b6414832003d 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -15,8 +15,8 @@
 #include "ARM.h"
 #include "ARMAsmPrinter.h"
 #include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index c0ac04b6003c..88d96c0be8a7 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -15,10 +15,10 @@
 #define ARMMACHINEFUNCTIONINFO_H
 
 #include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
 
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 02196d06bfd3..2d088de96e27 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -6,6 +6,77 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+//  Uops | Latency from register | Uops - resource requirements - latency
+//  2    | Rn: 1 Rm: 4 Rs: 4     | uop T0, Rm, Rs - P01 - 3
+//       |                       | uopc Rd, Rn, T0 -  P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+//   def WriteALUsr : SchedWrite;
+//   def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+//   def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+//                           ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+//  // Resources.
+//  def P01 : ProcResource<3>; // ALU unit (3 of it).
+//  ...
+//  // Resource usages.
+//  def : WriteRes<WriteALUsr, [P01, P01]> {
+//    Latency = 4; // Latency of 4.
+//    NumMicroOps = 2; // Dispatch 2 micro-ops.
+//    // The two instances of resource P01 are occupied for one cycle. It is one
+//    // cycle because these resources happen to be pipelined.
+//    ResourceCycles = [1, 1];
+//  }
+//  def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Compares.
+def WriteCMP : SchedWrite;
+def WriteCMPsi : SchedWrite;
+def WriteCMPsr : SchedWrite;
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+  const ARMBaseInstrInfo *TII =
+    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+  (void)TII;
+}]>;
+
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 404634fee989..9739ed20ce2e 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel {
   let LoadLatency = 2; // Optimistic load latency assuming bypass.
                        // This is overriden by OperandCycles if the
                        // Itineraries are queried instead.
+  let ILPWindow = 10; // Don't reschedule small blocks to hide
+                      // latency. Minimum latency requirements are already
+                      // modeled strictly by reserving resources.
   let MispredictPenalty = 8; // Based on estimate of pipeline depth.
 
   let Itineraries = CortexA9Itineraries;
@@ -1895,6 +1898,8 @@ def CortexA9Model : SchedMachineModel {
 //===----------------------------------------------------------------------===//
 // Define each kind of processor resource and number available.
 
+let SchedModel = CortexA9Model in {
+
 def A9UnitALU : ProcResource<2>;
 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
 def A9UnitAGU : ProcResource<1>;
@@ -1915,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>;
 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
 
 // Basic ALU.
-def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
 // ALU with operand shifted by immediate.
-def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
 // ALU with operand shifted by register.
-def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
 
 // Multiplication
 def A9WriteM   : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
@@ -2000,13 +2005,6 @@ foreach NumCycles = 2-8 in {
 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
 } // foreach NumCycles
 
-// Define TII for use in SchedVariant Predicates.
-def : PredicateProlog<[{
-  const ARMBaseInstrInfo *TII =
-    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
-  (void)TII;
-}]>;
-
 // Define address generation sequences and predicates for 8 flavors of LDMs.
 foreach NumAddr = 1-8 in {
 
@@ -2251,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[
 // These mov immediate writers are unconditionally expanded with
 // additive latency.
 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
-def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
 def A9WriteI2ld  : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
 
 // Some ALU operations can read loaded integer values one cycle early.
-def A9ReadA : SchedReadAdvance<1,
+def A9ReadALU : SchedReadAdvance<1,
   [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
    A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
    A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
@@ -2276,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>;
 
 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
 // mostly in order.
-let SchedModel = CortexA9Model in {
 
 def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
                          IIC_iMVNi,IIC_iMVNsi,
                          IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
 
 def :ItinRW<[A9WriteI2],   [IIC_iMOVix2,IIC_iCMOVix2]>;
 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
 
-def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
-def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
-def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
-def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
-def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
-def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
-def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
-def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
 
 // A9WriteHi ignored for MUL32.
 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
@@ -2368,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
                                                       IIC_iStore_m,
                                                       IIC_iStore_mu]>;
 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
-def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
 
 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
 
@@ -2483,4 +2480,17 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALU, A9WriteALU>;
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
+// advance. But our instrinfo claims it does.
+
+def : SchedAlias<WriteCMP, A9WriteALU>;
+def : SchedAlias<WriteCMPsi, A9WriteALU>;
+def : SchedAlias<WriteCMPsr, A9WriteALU>;
 } // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index e9bc3e0f3955..7c6df410706e 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1078,8 +1078,67 @@ def SwiftModel : SchedMachineModel {
   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
   let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
   let LoadLatency = 3;
+  let MispredictPenalty = 14; // A branch direction mispredict.
 
   let Itineraries = SwiftItineraries;
 }
 
-// TODO: Add Swift processor and scheduler resources.
+// Swift predicates.
+def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
+
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+  // Processor resources.
+  def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+  def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+  def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+  def SwiftUnitP2 : ProcResource<1>; // LS unit.
+  def SwiftUnitDiv : ProcResource<1>;
+
+  // Generic resource requirements.
+  def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
+  def SwiftWriteP01ThreeCycleTwoUops :
+    SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+    let Latency = 3;
+    let NumMicroOps = 2;
+  }
+  def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
+    let Latency = 3;
+    let NumMicroOps = 3;
+    let ResourceCycles = [3];
+  }
+
+  // 4.2.4 Arithmetic and Logical.
+  // ALU operation register shifted by immediate variant.
+  def SwiftWriteALUsi : SchedWriteVariant<[
+    // lsl #2, lsl #1, or lsr #1.
+    SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
+    SchedVar<NoSchedPred,             [WriteALU]>
+  ]>;
+  def SwiftWriteALUsr : SchedWriteVariant<[
+    SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
+    SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
+  ]>;
+  def SwiftWriteALUSsr : SchedWriteVariant<[
+    SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
+    SchedVar<NoSchedPred,      [SwiftWriteP01TwoCycle]>
+  ]>;
+  def SwiftReadAdvanceALUsr : SchedReadVariant<[
+    SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
+    SchedVar<NoSchedPred,      [NoReadAdvance]>
+  ]>;
+  // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+  // AND,BIC,EOR,ORN,ORR
+  // CLZ,RBIT,REV,REV16,REVSH,PKH
+  def : WriteRes<WriteALU, [SwiftUnitP01]>;
+  def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
+  def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
+  def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
+  def : ReadAdvance<ReadALU, 0>;
+  def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+  // 4.2.5 Integer comparison
+  def : WriteRes<WriteCMP, [SwiftUnitP01]>;
+  def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
+  def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index b33b3c915a6e..41a7e0c2c8a5 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "arm-selectiondag-info"
 #include "ARMTargetMachine.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
 using namespace llvm;
 
 ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index bcc9db4ae3e3..739300e4eff9 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,11 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMSubtarget.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
@@ -40,60 +43,88 @@ StrictAlign("arm-strict-align", cl::Hidden,
             cl::desc("Disallow all unaligned memory accesses"));
 
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS)
+                           const std::string &FS, const TargetOptions &Options)
   : ARMGenSubtargetInfo(TT, CPU, FS)
   , ARMProcFamily(Others)
-  , HasV4TOps(false)
-  , HasV5TOps(false)
-  , HasV5TEOps(false)
-  , HasV6Ops(false)
-  , HasV6T2Ops(false)
-  , HasV7Ops(false)
-  , HasVFPv2(false)
-  , HasVFPv3(false)
-  , HasVFPv4(false)
-  , HasNEON(false)
-  , UseNEONForSinglePrecisionFP(false)
-  , UseMulOps(UseFusedMulOps)
-  , SlowFPVMLx(false)
-  , HasVMLxForwarding(false)
-  , SlowFPBrcc(false)
-  , InThumbMode(false)
-  , HasThumb2(false)
-  , IsMClass(false)
-  , NoARM(false)
-  , PostRAScheduler(false)
-  , IsR9Reserved(ReserveR9)
-  , UseMovt(false)
-  , SupportsTailCall(false)
-  , HasFP16(false)
-  , HasD16(false)
-  , HasHardwareDivide(false)
-  , HasHardwareDivideInARM(false)
-  , HasT2ExtractPack(false)
-  , HasDataBarrier(false)
-  , Pref32BitThumb(false)
-  , AvoidCPSRPartialUpdate(false)
-  , HasRAS(false)
-  , HasMPExtension(false)
-  , FPOnlySP(false)
-  , AllowsUnalignedMem(false)
-  , Thumb2DSP(false)
   , stackAlignment(4)
   , CPUString(CPU)
   , TargetTriple(TT)
+  , Options(Options)
   , TargetABI(ARM_ABI_APCS) {
-  // Determine default and user specified characteristics
+  initializeEnvironment();
+  resetSubtargetFeatures(CPU, FS);
+}
+
+void ARMSubtarget::initializeEnvironment() {
+  HasV4TOps = false;
+  HasV5TOps = false;
+  HasV5TEOps = false;
+  HasV6Ops = false;
+  HasV6T2Ops = false;
+  HasV7Ops = false;
+  HasVFPv2 = false;
+  HasVFPv3 = false;
+  HasVFPv4 = false;
+  HasNEON = false;
+  UseNEONForSinglePrecisionFP = false;
+  UseMulOps = UseFusedMulOps;
+  SlowFPVMLx = false;
+  HasVMLxForwarding = false;
+  SlowFPBrcc = false;
+  InThumbMode = false;
+  HasThumb2 = false;
+  IsMClass = false;
+  NoARM = false;
+  PostRAScheduler = false;
+  IsR9Reserved = ReserveR9;
+  UseMovt = false;
+  SupportsTailCall = false;
+  HasFP16 = false;
+  HasD16 = false;
+  HasHardwareDivide = false;
+  HasHardwareDivideInARM = false;
+  HasT2ExtractPack = false;
+  HasDataBarrier = false;
+  Pref32BitThumb = false;
+  AvoidCPSRPartialUpdate = false;
+  AvoidMOVsShifterOperand = false;
+  HasRAS = false;
+  HasMPExtension = false;
+  FPOnlySP = false;
+  AllowsUnalignedMem = false;
+  Thumb2DSP = false;
+  UseNaClTrap = false;
+  UnsafeFPMath = false;
+}
+
+void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+  Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+                                           "target-cpu");
+  Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+                                          "target-features");
+  std::string CPU =
+    !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+  std::string FS =
+    !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+  if (!FS.empty()) {
+    initializeEnvironment();
+    resetSubtargetFeatures(CPU, FS);
+  }
+}
+
+void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (CPUString.empty())
     CPUString = "generic";
 
   // Insert the architecture feature derived from the target triple into the
   // feature string. This is important for setting features that are implied
   // based on the architecture version.
-  std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString);
+  std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
+                                              CPUString);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS;
+      ArchFS = ArchFS + "," + FS.str();
     else
       ArchFS = FS;
   }
@@ -110,7 +141,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
 
-  if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass()))
+  if ((TargetTriple.getTriple().find("eabi") != std::string::npos) ||
+      (isTargetIOS() && isMClass()))
     // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
     // Darwin-EABI conforms to AACPS but not the rest of EABI.
     TargetABI = ARM_ABI_AAPCS;
@@ -133,6 +165,12 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
   // configuration.
   if (!StrictAlign && hasV6Ops() && isTargetDarwin())
     AllowsUnalignedMem = true;
+
+  // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+  uint64_t Bits = getFeatureBits();
+  if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+      (Options.UnsafeFPMath || isTargetDarwin()))
+    UseNEONForSinglePrecisionFP = true;
 }
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8e6b6506022d..5b5ee6aeb865 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -15,9 +15,9 @@
 #define ARMSUBTARGET_H
 
 #include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -26,11 +26,12 @@
 namespace llvm {
 class GlobalValue;
 class StringRef;
+class TargetOptions;
 
 class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
-    Others, CortexA8, CortexA9, CortexA15, Swift
+    Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
   };
 
   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -131,6 +132,10 @@ protected:
   /// CPSR setting instruction.
   bool AvoidCPSRPartialUpdate;
 
+  /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+  /// movs with shifter operand (i.e. asr, lsl, lsr).
+  bool AvoidMOVsShifterOperand;
+
   /// HasRAS - Some processors perform return stack prediction. CodeGen should
   /// avoid issue "normal" call instructions to callees which do not return.
   bool HasRAS;
@@ -152,6 +157,12 @@ protected:
   /// and such) instructions in Thumb2 code.
   bool Thumb2DSP;
 
+  /// NaCl TRAP instruction is generated instead of the regular TRAP.
+  bool UseNaClTrap;
+
+  /// Target machine allowed unsafe FP math (such as use of NEON fp)
+  bool UnsafeFPMath;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -168,6 +179,9 @@ protected:
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
 
+  /// Options passed via command line that could influence the target
+  const TargetOptions &Options;
+
  public:
   enum {
     isELF, isDarwin
@@ -182,7 +196,7 @@ protected:
   /// of the specified triple.
   ///
   ARMSubtarget(const std::string &TT, const std::string &CPU,
-               const std::string &FS);
+               const std::string &FS, const TargetOptions &Options);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -195,6 +209,12 @@ protected:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
+  /// \brief Reset the features for the ARM target.
+  virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+  void initializeEnvironment();
+  void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
   void computeIssueWidth();
 
   bool hasV4TOps()  const { return HasV4TOps;  }
@@ -204,12 +224,14 @@ protected:
   bool hasV6T2Ops() const { return HasV6T2Ops; }
   bool hasV7Ops()   const { return HasV7Ops;  }
 
+  bool isCortexA5() const { return ARMProcFamily == CortexA5; }
   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
   bool isCortexA15() const { return ARMProcFamily == CortexA15; }
   bool isSwift()    const { return ARMProcFamily == Swift; }
   bool isCortexM3() const { return CPUString == "cortex-m3"; }
   bool isLikeA9() const { return isCortexA9() || isCortexA15(); }
+  bool isCortexR5() const { return ARMProcFamily == CortexR5; }
 
   bool hasARMOps() const { return !NoARM; }
 
@@ -231,9 +253,11 @@ protected:
   bool isFPOnlySP() const { return FPOnlySP; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+  bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
   bool hasRAS() const { return HasRAS; }
   bool hasMPExtension() const { return HasMPExtension; }
   bool hasThumb2DSP() const { return Thumb2DSP; }
+  bool useNaClTrap() const { return UseNaClTrap; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
@@ -243,7 +267,7 @@ protected:
   bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetNaCl() const {
-    return TargetTriple.getOS() == Triple::NativeClient;
+    return TargetTriple.getOS() == Triple::NaCl;
   }
   bool isTargetELF() const { return !isTargetDarwin(); }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b486d4fe2ef9..42c7d2c437e0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,11 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetMachine.h"
-#include "ARMFrameLowering.h"
 #include "ARM.h"
-#include "llvm/PassManager.h"
+#include "ARMFrameLowering.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden,
                   cl::desc("Enable global merge pass"),
                   cl::init(true));
 
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+                   cl::desc("Inhibit optimization of S->D register accesses on A15"),
+                   cl::init(false));
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS),
+    Subtarget(TT, CPU, FS, Options),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   // Default to soft float ABI
@@ -51,6 +56,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
     this->Options.FloatABIType = FloatABI::Soft;
 }
 
+void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  // Add first the target-independent BasicTTI pass, then our ARM pass. This
+  // allows the ARM pass to delegate to the target independent layer when
+  // appropriate.
+  PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+  PM.add(createARMTargetTransformInfoPass(this));
+}
+
+
 void ARMTargetMachine::anchor() { }
 
 ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
@@ -70,8 +84,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
                            "v128:64:128-v64:64:64-n32-S32")),
     TLInfo(*this),
     TSInfo(*this),
-    FrameLowering(Subtarget),
-    STTI(&TLInfo), VTTI(&TLInfo) {
+    FrameLowering(Subtarget) {
   if (!Subtarget.hasARMOps())
     report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
                        "support ARM mode execution!");
@@ -103,8 +116,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
     TSInfo(*this),
     FrameLowering(Subtarget.hasThumb2()
               ? new ARMFrameLowering(Subtarget)
-              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)),
-    STTI(&TLInfo), VTTI(&TLInfo) {
+              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
 }
 
 namespace {
@@ -157,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() {
     addPass(createARMLoadStoreOptimizationPass(true));
   if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
     addPass(createMLxExpansionPass());
+  // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+  // enabled when NEON is available.
+  if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+    getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+    addPass(createA15SDOptimizerPass());
+  }
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index ebdd5b4d64c9..d4caf5ca6e19 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,20 +14,19 @@
 #ifndef ARMTARGETMACHINE_H
 #define ARMTARGETMACHINE_H
 
-#include "ARMInstrInfo.h"
 #include "ARMFrameLowering.h"
-#include "ARMJITInfo.h"
-#include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
 #include "ARMSelectionDAGInfo.h"
-#include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
 #include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
@@ -47,10 +46,17 @@ public:
 
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    // Implemented by derived classes
+    llvm_unreachable("getTargetLowering not implemented");
+  }
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
 
+  /// \brief Register ARM analysis passes with a pass manager.
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
@@ -66,8 +72,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMTargetLowering   TLInfo;
   ARMSelectionDAGInfo TSInfo;
   ARMFrameLowering    FrameLowering;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
  public:
   ARMTargetMachine(const Target &T, StringRef TT,
                    StringRef CPU, StringRef FS,
@@ -89,12 +93,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   virtual const ARMFrameLowering *getFrameLowering() const {
     return &FrameLowering;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
   virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
 };
@@ -112,8 +110,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   ARMSelectionDAGInfo TSInfo;
   // Either Thumb1FrameLowering or ARMFrameLowering.
   OwningPtr<ARMFrameLowering> FrameLowering;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 public:
   ThumbTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS,
@@ -142,12 +138,6 @@ public:
   virtual const ARMFrameLowering *getFrameLowering() const {
     return FrameLowering.get();
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
 };
 
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 3d85ca7d6995..dfdf6ab356a3 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -9,12 +9,14 @@
 
 #include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace dwarf;
 
@@ -38,3 +40,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                0,
                                SectionKind::getMetadata());
 }
+
+const MCExpr *ARMElfTargetObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
+  assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
+
+  return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+                                 MCSymbolRefExpr::VK_ARM_TARGET2,
+                                 getContext());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261439d7..7f60727e5305 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -28,6 +28,11 @@ public:
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
+  const MCExpr *
+  getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                          MachineModuleInfo *MMI, unsigned Encoding,
+                          MCStreamer &Streamer) const;
+  
   virtual const MCSection *getAttributesSection() const {
     return AttributesSection;
   }
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
new file mode 100644
index 000000000000..1019b972e957
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -0,0 +1,458 @@
+//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// ARM target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armtti"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeARMTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class ARMTTI : public ImmutablePass, public TargetTransformInfo {
+  const ARMBaseTargetMachine *TM;
+  const ARMSubtarget *ST;
+  const ARMTargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+  ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  ARMTTI(const ARMBaseTargetMachine *TM)
+      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+        TLI(TM->getTargetLowering()) {
+    initializeARMTTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    pushTTIStack(this);
+  }
+
+  virtual void finalizePass() {
+    popTTIStack();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+
+  /// @}
+
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool Vector) const {
+    if (Vector) {
+      if (ST->hasNEON())
+        return 16;
+      return 0;
+    }
+
+    if (ST->isThumb1Only())
+      return 8;
+    return 16;
+  }
+
+  unsigned getRegisterBitWidth(bool Vector) const {
+    if (Vector) {
+      if (ST->hasNEON())
+        return 128;
+      return 0;
+    }
+
+    return 32;
+  }
+
+  unsigned getMaximumUnrollFactor() const {
+    // These are out of order CPUs:
+    if (ST->isCortexA15() || ST->isSwift())
+      return 2;
+    return 1;
+  }
+
+  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                          int Index, Type *SubTp) const;
+
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                      Type *Src) const;
+
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const;
+
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
+
+  unsigned getAddressComputationCost(Type *Val) const;
+  /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
+                   "ARM Target Transform Info", true, true, false)
+char ARMTTI::ID = 0;
+
+ImmutablePass *
+llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
+  return new ARMTTI(TM);
+}
+
+
+unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+  assert(Ty->isIntegerTy());
+
+  unsigned Bits = Ty->getPrimitiveSizeInBits();
+  if (Bits == 0 || Bits > 32)
+    return 4;
+
+  int32_t SImmVal = Imm.getSExtValue();
+  uint32_t ZImmVal = Imm.getZExtValue();
+  if (!ST->isThumb()) {
+    if ((SImmVal >= 0 && SImmVal < 65536) ||
+        (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
+        (ARM_AM::getSOImmVal(~ZImmVal) != -1))
+      return 1;
+    return ST->hasV6T2Ops() ? 2 : 3;
+  } else if (ST->isThumb2()) {
+    if ((SImmVal >= 0 && SImmVal < 65536) ||
+        (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
+        (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
+      return 1;
+    return ST->hasV6T2Ops() ? 2 : 3;
+  } else /*Thumb1*/ {
+    if (SImmVal >= 0 && SImmVal < 256)
+      return 1;
+    if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+      return 2;
+    // Load from constantpool.
+    return 3;
+  }
+  return 2;
+}
+
+unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Single to/from double precision conversions.
+  static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+    // Vector fptrunc/fpext conversions.
+    { ISD::FP_ROUND,   MVT::v2f64, 2 },
+    { ISD::FP_EXTEND,  MVT::v2f32, 2 },
+    { ISD::FP_EXTEND,  MVT::v4f32, 4 }
+  };
+
+  if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+                                          ISD == ISD::FP_EXTEND)) {
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+    int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+                                ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * NEONFltDblTbl[Idx].Cost;
+  }
+
+  EVT SrcTy = TLI->getValueType(Src);
+  EVT DstTy = TLI->getValueType(Dst);
+
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+  // Some arithmetic, load and store operations have specific instructions
+  // to cast up/down their types automatically at no extra cost.
+  // TODO: Get these tables to know at least what the related operations are.
+  static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+    { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+    { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+    { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+    { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+    { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 0 },
+    { ISD::TRUNCATE,    MVT::v4i16, MVT::v4i32, 1 },
+
+    // The number of vmovl instructions for the extension.
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+    { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+    { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+    { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+    { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+    // Operations that we legalize using load/stores to the stack.
+    { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+    { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
+    // Vector float <-> i32 conversions.
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
+    { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
+
+    { ISD::FP_TO_SINT,  MVT::v4i32, MVT::v4f32, 1 },
+    { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f32, 1 },
+    { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32, 3 },
+    { ISD::FP_TO_UINT,  MVT::v4i8, MVT::v4f32, 3 },
+    { ISD::FP_TO_SINT,  MVT::v4i16, MVT::v4f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::v4i16, MVT::v4f32, 2 },
+
+    // Vector double <-> i32 conversions.
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
+
+    { ISD::FP_TO_SINT,  MVT::v2i32, MVT::v2f64, 2 },
+    { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 4 },
+    { ISD::FP_TO_UINT,  MVT::v8i16, MVT::v8f32, 4 },
+    { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f32, 8 },
+    { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 8 }
+  };
+
+  if (SrcTy.isVector() && ST->hasNEON()) {
+    int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
+                                array_lengthof(NEONVectorConversionTbl),
+                                ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+    if (Idx != -1)
+      return NEONVectorConversionTbl[Idx].Cost;
+  }
+
+  // Scalar float to integer conversions.
+  static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+    { ISD::FP_TO_SINT,  MVT::i1, MVT::f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::i1, MVT::f32, 2 },
+    { ISD::FP_TO_SINT,  MVT::i1, MVT::f64, 2 },
+    { ISD::FP_TO_UINT,  MVT::i1, MVT::f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::i8, MVT::f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::i8, MVT::f32, 2 },
+    { ISD::FP_TO_SINT,  MVT::i8, MVT::f64, 2 },
+    { ISD::FP_TO_UINT,  MVT::i8, MVT::f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::i16, MVT::f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::i16, MVT::f32, 2 },
+    { ISD::FP_TO_SINT,  MVT::i16, MVT::f64, 2 },
+    { ISD::FP_TO_UINT,  MVT::i16, MVT::f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::i32, MVT::f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::i32, MVT::f32, 2 },
+    { ISD::FP_TO_SINT,  MVT::i32, MVT::f64, 2 },
+    { ISD::FP_TO_UINT,  MVT::i32, MVT::f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::i64, MVT::f32, 10 },
+    { ISD::FP_TO_UINT,  MVT::i64, MVT::f32, 10 },
+    { ISD::FP_TO_SINT,  MVT::i64, MVT::f64, 10 },
+    { ISD::FP_TO_UINT,  MVT::i64, MVT::f64, 10 }
+  };
+  if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
+    int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
+                                        array_lengthof(NEONFloatConversionTbl),
+                                        ISD, DstTy.getSimpleVT(),
+                                        SrcTy.getSimpleVT());
+    if (Idx != -1)
+        return NEONFloatConversionTbl[Idx].Cost;
+  }
+
+  // Scalar integer to float conversions.
+  static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+    { ISD::SINT_TO_FP,  MVT::f32, MVT::i1, 2 },
+    { ISD::UINT_TO_FP,  MVT::f32, MVT::i1, 2 },
+    { ISD::SINT_TO_FP,  MVT::f64, MVT::i1, 2 },
+    { ISD::UINT_TO_FP,  MVT::f64, MVT::i1, 2 },
+    { ISD::SINT_TO_FP,  MVT::f32, MVT::i8, 2 },
+    { ISD::UINT_TO_FP,  MVT::f32, MVT::i8, 2 },
+    { ISD::SINT_TO_FP,  MVT::f64, MVT::i8, 2 },
+    { ISD::UINT_TO_FP,  MVT::f64, MVT::i8, 2 },
+    { ISD::SINT_TO_FP,  MVT::f32, MVT::i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::f32, MVT::i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::f64, MVT::i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::f64, MVT::i16, 2 },
+    { ISD::SINT_TO_FP,  MVT::f32, MVT::i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::f32, MVT::i32, 2 },
+    { ISD::SINT_TO_FP,  MVT::f64, MVT::i32, 2 },
+    { ISD::UINT_TO_FP,  MVT::f64, MVT::i32, 2 },
+    { ISD::SINT_TO_FP,  MVT::f32, MVT::i64, 10 },
+    { ISD::UINT_TO_FP,  MVT::f32, MVT::i64, 10 },
+    { ISD::SINT_TO_FP,  MVT::f64, MVT::i64, 10 },
+    { ISD::UINT_TO_FP,  MVT::f64, MVT::i64, 10 }
+  };
+
+  if (SrcTy.isInteger() && ST->hasNEON()) {
+    int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
+                                       array_lengthof(NEONIntegerConversionTbl),
+                                       ISD, DstTy.getSimpleVT(),
+                                       SrcTy.getSimpleVT());
+    if (Idx != -1)
+      return NEONIntegerConversionTbl[Idx].Cost;
+  }
+
+  // Scalar integer conversion costs.
+  static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+    // i16 -> i64 requires two dependent operations.
+    { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
+
+    // Truncates on i64 are assumed to be free.
+    { ISD::TRUNCATE,    MVT::i32, MVT::i64, 0 },
+    { ISD::TRUNCATE,    MVT::i16, MVT::i64, 0 },
+    { ISD::TRUNCATE,    MVT::i8,  MVT::i64, 0 },
+    { ISD::TRUNCATE,    MVT::i1,  MVT::i64, 0 }
+  };
+
+  if (SrcTy.isInteger()) {
+    int Idx =
+      ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
+                                  array_lengthof(ARMIntegerConversionTbl),
+                                  ISD, DstTy.getSimpleVT(),
+                                  SrcTy.getSimpleVT());
+    if (Idx != -1)
+      return ARMIntegerConversionTbl[Idx].Cost;
+  }
+
+  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                    unsigned Index) const {
+  // Penalize inserting into an D-subregister. We end up with a three times
+  // lower estimated throughput on swift.
+  if (ST->isSwift() &&
+      Opcode == Instruction::InsertElement &&
+      ValTy->isVectorTy() &&
+      ValTy->getScalarSizeInBits() <= 32)
+    return 3;
+
+  return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
+}
+
+unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                    Type *CondTy) const {
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  // On NEON a a vector select gets lowered to vbsl.
+  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+    // Lowering of some vector selects is currently far from perfect.
+    static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+      { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+      { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+      { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+    };
+
+    EVT SelCondTy = TLI->getValueType(CondTy);
+    EVT SelValTy = TLI->getValueType(ValTy);
+    int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+                                          array_lengthof(NEONVectorSelectTbl),
+                                          ISD, SelCondTy.getSimpleVT(),
+                                          SelValTy.getSimpleVT());
+    if (Idx != -1)
+      return NEONVectorSelectTbl[Idx].Cost;
+
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+    return LT.first;
+  }
+
+  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+  // In many cases the address computation is not merged into the instruction
+  // addressing mode.
+  return 1;
+}
+
+unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                                Type *SubTp) const {
+  // We only handle costs of reverse shuffles for now.
+  if (Kind != SK_Reverse)
+    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+  static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+    // Reverse shuffle cost one instruction if we are shuffling within a double
+    // word (vrev) or two if we shuffle a quad word (vrev, vext).
+    { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
+    { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
+    { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
+    { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
+
+    { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
+    { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
+    { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
+    { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
+  };
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+  int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
+                                 ISD::VECTOR_SHUFFLE, LT.second);
+  if (Idx == -1)
+    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+  return LT.first * NEONShuffleTbl[Idx].Cost;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
deleted file mode 100644
index fda8536fcf6b..000000000000
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARMBaseInfo.h"
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-
-#include "llvm/Support/TargetRegistry.h"
-
-#include "llvm/ADT/StringSwitch.h"
-
-#include <string>
-#include <map>
-
-using namespace llvm;
-
-namespace {
-
-class ARMBaseAsmLexer : public MCTargetAsmLexer {
-  const MCAsmInfo &AsmInfo;
-
-  const AsmToken &lexDefinite() {
-    return getLexer()->Lex();
-  }
-
-  AsmToken LexTokenUAL();
-protected:
-  typedef std::map <std::string, unsigned> rmap_ty;
-
-  rmap_ty RegisterMap;
-
-  void InitRegisterMap(const MCRegisterInfo *info) {
-    unsigned numRegs = info->getNumRegs();
-
-    for (unsigned i = 0; i < numRegs; ++i) {
-      const char *regName = info->getName(i);
-      if (regName)
-        RegisterMap[regName] = i;
-    }
-  }
-
-  unsigned MatchRegisterName(StringRef Name) {
-    rmap_ty::iterator iter = RegisterMap.find(Name.str());
-    if (iter != RegisterMap.end())
-      return iter->second;
-    else
-      return 0;
-  }
-
-  AsmToken LexToken() {
-    if (!Lexer) {
-      SetError(SMLoc(), "No MCAsmLexer installed");
-      return AsmToken(AsmToken::Error, "", 0);
-    }
-
-    switch (AsmInfo.getAssemblerDialect()) {
-    default:
-      SetError(SMLoc(), "Unhandled dialect");
-      return AsmToken(AsmToken::Error, "", 0);
-    case 0:
-      return LexTokenUAL();
-    }
-  }
-public:
-  ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
-    : MCTargetAsmLexer(T), AsmInfo(MAI) {
-  }
-};
-
-class ARMAsmLexer : public ARMBaseAsmLexer {
-public:
-  ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
-    : ARMBaseAsmLexer(T, MAI) {
-    InitRegisterMap(&MRI);
-  }
-};
-
-class ThumbAsmLexer : public ARMBaseAsmLexer {
-public:
-  ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI)
-    : ARMBaseAsmLexer(T, MAI) {
-    InitRegisterMap(&MRI);
-  }
-};
-
-} // end anonymous namespace
-
-AsmToken ARMBaseAsmLexer::LexTokenUAL() {
-  const AsmToken &lexedToken = lexDefinite();
-
-  switch (lexedToken.getKind()) {
-  default: break;
-  case AsmToken::Error:
-    SetError(Lexer->getErrLoc(), Lexer->getErr());
-    break;
-  case AsmToken::Identifier: {
-    std::string lowerCase = lexedToken.getString().lower();
-
-    unsigned regID = MatchRegisterName(lowerCase);
-    // Check for register aliases.
-    //   r13 -> sp
-    //   r14 -> lr
-    //   r15 -> pc
-    //   ip  -> r12
-    //   FIXME: Some assemblers support lots of others. Do we want them all?
-    if (!regID) {
-      regID = StringSwitch<unsigned>(lowerCase)
-        .Case("r13", ARM::SP)
-        .Case("r14", ARM::LR)
-        .Case("r15", ARM::PC)
-        .Case("ip", ARM::R12)
-        .Default(0);
-    }
-
-    if (regID)
-      return AsmToken(AsmToken::Register,
-                      lexedToken.getString(),
-                      static_cast<int64_t>(regID));
-  }
-  }
-
-  return AsmToken(lexedToken);
-}
-
-extern "C" void LLVMInitializeARMAsmLexer() {
-  RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget);
-  RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
-}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c61e3bd99d77..ed7b7ec9d2cd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,31 +7,34 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
 
 using namespace llvm;
 
@@ -178,7 +181,8 @@ class ARMAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
-  OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
+  OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+                                       SMLoc &EndLoc);
 
   // Asm Match Converter Methods
   void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &);
@@ -249,6 +253,13 @@ public:
 
     // Not in an ITBlock to start with.
     ITState.CurPosition = ~0U;
+
+    // Set ELF header flags.
+    // FIXME: This should eventually end up somewhere else where more
+    // intelligent flag decisions can be made. For now we are just maintaining
+    // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+    if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
+      MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
   }
 
   // Implementation of the MCTargetAsmParser interface:
@@ -258,6 +269,7 @@ public:
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands);
   bool ParseDirective(AsmToken DirectiveID);
 
+  unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
   unsigned checkTargetMatchPredicate(MCInst &Inst);
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -270,7 +282,7 @@ public:
 namespace {
 
 /// ARMOperand - Instances of this class represent a parsed ARM machine
-/// instruction.
+/// operand.
 class ARMOperand : public MCParsedAsmOperand {
   enum KindTy {
     k_CondCode,
@@ -304,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand {
   SMLoc StartLoc, EndLoc;
   SmallVector<unsigned, 8> Registers;
 
+  struct CCOp {
+    ARMCC::CondCodes Val;
+  };
+
+  struct CopOp {
+    unsigned Val;
+  };
+
+  struct CoprocOptionOp {
+    unsigned Val;
+  };
+
+  struct ITMaskOp {
+    unsigned Mask:4;
+  };
+
+  struct MBOptOp {
+    ARM_MB::MemBOpt Val;
+  };
+
+  struct IFlagsOp {
+    ARM_PROC::IFlags Val;
+  };
+
+  struct MMaskOp {
+    unsigned Val;
+  };
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+  };
+
+  // A vector register list is a sequential list of 1 to 4 registers.
+  struct VectorListOp {
+    unsigned RegNum;
+    unsigned Count;
+    unsigned LaneIndex;
+    bool isDoubleSpaced;
+  };
+
+  struct VectorIndexOp {
+    unsigned Val;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  /// Combined record for all forms of ARM address expressions.
+  struct MemoryOp {
+    unsigned BaseRegNum;
+    // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+    // was specified.
+    const MCConstantExpr *OffsetImm;  // Offset immediate value
+    unsigned OffsetRegNum;    // Offset register num, when OffsetImm == NULL
+    ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+    unsigned ShiftImm;        // shift for OffsetReg.
+    unsigned Alignment;       // 0 = no alignment specified
+    // n = alignment in bytes (2, 4, 8, 16, or 32)
+    unsigned isNegative : 1;  // Negated OffsetReg? (~'U' bit)
+  };
+
+  struct PostIdxRegOp {
+    unsigned RegNum;
+    bool isAdd;
+    ARM_AM::ShiftOpc ShiftTy;
+    unsigned ShiftImm;
+  };
+
+  struct ShifterImmOp {
+    bool isASR;
+    unsigned Imm;
+  };
+
+  struct RegShiftedRegOp {
+    ARM_AM::ShiftOpc ShiftTy;
+    unsigned SrcReg;
+    unsigned ShiftReg;
+    unsigned ShiftImm;
+  };
+
+  struct RegShiftedImmOp {
+    ARM_AM::ShiftOpc ShiftTy;
+    unsigned SrcReg;
+    unsigned ShiftImm;
+  };
+
+  struct RotImmOp {
+    unsigned Imm;
+  };
+
+  struct BitfieldOp {
+    unsigned LSB;
+    unsigned Width;
+  };
+
   union {
-    struct {
-      ARMCC::CondCodes Val;
-    } CC;
-
-    struct {
-      unsigned Val;
-    } Cop;
-
-    struct {
-      unsigned Val;
-    } CoprocOption;
-
-    struct {
-      unsigned Mask:4;
-    } ITMask;
-
-    struct {
-      ARM_MB::MemBOpt Val;
-    } MBOpt;
-
-    struct {
-      ARM_PROC::IFlags Val;
-    } IFlags;
-
-    struct {
-      unsigned Val;
-    } MMask;
-
-    struct {
-      const char *Data;
-      unsigned Length;
-    } Tok;
-
-    struct {
-      unsigned RegNum;
-    } Reg;
-
-    // A vector register list is a sequential list of 1 to 4 registers.
-    struct {
-      unsigned RegNum;
-      unsigned Count;
-      unsigned LaneIndex;
-      bool isDoubleSpaced;
-    } VectorList;
-
-    struct {
-      unsigned Val;
-    } VectorIndex;
-
-    struct {
-      const MCExpr *Val;
-    } Imm;
-
-    /// Combined record for all forms of ARM address expressions.
-    struct {
-      unsigned BaseRegNum;
-      // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
-      // was specified.
-      const MCConstantExpr *OffsetImm;  // Offset immediate value
-      unsigned OffsetRegNum;    // Offset register num, when OffsetImm == NULL
-      ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
-      unsigned ShiftImm;        // shift for OffsetReg.
-      unsigned Alignment;       // 0 = no alignment specified
-                                // n = alignment in bytes (2, 4, 8, 16, or 32)
-      unsigned isNegative : 1;  // Negated OffsetReg? (~'U' bit)
-    } Memory;
-
-    struct {
-      unsigned RegNum;
-      bool isAdd;
-      ARM_AM::ShiftOpc ShiftTy;
-      unsigned ShiftImm;
-    } PostIdxReg;
-
-    struct {
-      bool isASR;
-      unsigned Imm;
-    } ShifterImm;
-    struct {
-      ARM_AM::ShiftOpc ShiftTy;
-      unsigned SrcReg;
-      unsigned ShiftReg;
-      unsigned ShiftImm;
-    } RegShiftedReg;
-    struct {
-      ARM_AM::ShiftOpc ShiftTy;
-      unsigned SrcReg;
-      unsigned ShiftImm;
-    } RegShiftedImm;
-    struct {
-      unsigned Imm;
-    } RotImm;
-    struct {
-      unsigned LSB;
-      unsigned Width;
-    } Bitfield;
+    struct CCOp CC;
+    struct CopOp Cop;
+    struct CoprocOptionOp CoprocOption;
+    struct MBOptOp MBOpt;
+    struct ITMaskOp ITMask;
+    struct IFlagsOp IFlags;
+    struct MMaskOp MMask;
+    struct TokOp Tok;
+    struct RegOp Reg;
+    struct VectorListOp VectorList;
+    struct VectorIndexOp VectorIndex;
+    struct ImmOp Imm;
+    struct MemoryOp Memory;
+    struct PostIdxRegOp PostIdxReg;
+    struct ShifterImmOp ShifterImm;
+    struct RegShiftedRegOp RegShiftedReg;
+    struct RegShiftedImmOp RegShiftedImm;
+    struct RotImmOp RotImm;
+    struct BitfieldOp Bitfield;
   };
 
   ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -2450,8 +2486,8 @@ static unsigned MatchRegisterName(StringRef Name);
 bool ARMAsmParser::ParseRegister(unsigned &RegNo,
                                  SMLoc &StartLoc, SMLoc &EndLoc) {
   StartLoc = Parser.getTok().getLoc();
+  EndLoc = Parser.getTok().getEndLoc();
   RegNo = tryParseRegister();
-  EndLoc = Parser.getTok().getLoc();
 
   return (RegNo == (unsigned)-1);
 }
@@ -2540,6 +2576,8 @@ int ARMAsmParser::tryParseShiftRegister(
   if (!PrevOp->isReg())
     return Error(PrevOp->getStartLoc(), "shift must be of a register");
   int SrcReg = PrevOp->getReg();
+
+  SMLoc EndLoc;
   int64_t Imm = 0;
   int ShiftReg = 0;
   if (ShiftTy == ARM_AM::rrx) {
@@ -2554,7 +2592,7 @@ int ARMAsmParser::tryParseShiftRegister(
       Parser.Lex(); // Eat hash.
       SMLoc ImmLoc = Parser.getTok().getLoc();
       const MCExpr *ShiftExpr = 0;
-      if (getParser().ParseExpression(ShiftExpr)) {
+      if (getParser().parseExpression(ShiftExpr, EndLoc)) {
         Error(ImmLoc, "invalid immediate shift value");
         return -1;
       }
@@ -2579,8 +2617,9 @@ int ARMAsmParser::tryParseShiftRegister(
       if (Imm == 0)
         ShiftTy = ARM_AM::lsl;
     } else if (Parser.getTok().is(AsmToken::Identifier)) {
-      ShiftReg = tryParseRegister();
       SMLoc L = Parser.getTok().getLoc();
+      EndLoc = Parser.getTok().getEndLoc();
+      ShiftReg = tryParseRegister();
       if (ShiftReg == -1) {
         Error (L, "expected immediate or register in shift operand");
         return -1;
@@ -2595,10 +2634,10 @@ int ARMAsmParser::tryParseShiftRegister(
   if (ShiftReg && ShiftTy != ARM_AM::rrx)
     Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
                                                          ShiftReg, Imm,
-                                               S, Parser.getTok().getLoc()));
+                                                         S, EndLoc));
   else
     Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm,
-                                               S, Parser.getTok().getLoc()));
+                                                          S, EndLoc));
 
   return 0;
 }
@@ -2612,12 +2651,13 @@ int ARMAsmParser::tryParseShiftRegister(
 /// parse for a specific register type.
 bool ARMAsmParser::
 tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &RegTok = Parser.getTok();
   int RegNo = tryParseRegister();
   if (RegNo == -1)
     return true;
 
-  Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+  Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(),
+                                           RegTok.getEndLoc()));
 
   const AsmToken &ExclaimTok = Parser.getTok();
   if (ExclaimTok.is(AsmToken::Exclaim)) {
@@ -2635,16 +2675,16 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Parser.Lex(); // Eat left bracket token.
 
     const MCExpr *ImmVal;
-    if (getParser().ParseExpression(ImmVal))
+    if (getParser().parseExpression(ImmVal))
       return true;
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
     if (!MCE)
       return TokError("immediate value expected for vector index");
 
-    SMLoc E = Parser.getTok().getLoc();
     if (Parser.getTok().isNot(AsmToken::RBrac))
-      return Error(E, "']' expected");
+      return Error(Parser.getTok().getLoc(), "']' expected");
 
+    SMLoc E = Parser.getTok().getEndLoc();
     Parser.Lex(); // Eat right bracket token.
 
     Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
@@ -2780,7 +2820,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   const MCExpr *Expr;
   SMLoc Loc = Parser.getTok().getLoc();
-  if (getParser().ParseExpression(Expr)) {
+  if (getParser().parseExpression(Expr)) {
     Error(Loc, "illegal expression");
     return MatchOperand_ParseFail;
   }
@@ -2794,7 +2834,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // Check for and consume the closing '}'
   if (Parser.getTok().isNot(AsmToken::RCurly))
     return MatchOperand_ParseFail;
-  SMLoc E = Parser.getTok().getLoc();
+  SMLoc E = Parser.getTok().getEndLoc();
   Parser.Lex(); // Eat the '}'
 
   Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
@@ -2891,10 +2931,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
          Parser.getTok().is(AsmToken::Minus)) {
     if (Parser.getTok().is(AsmToken::Minus)) {
       Parser.Lex(); // Eat the minus.
-      SMLoc EndLoc = Parser.getTok().getLoc();
+      SMLoc AfterMinusLoc = Parser.getTok().getLoc();
       int EndReg = tryParseRegister();
       if (EndReg == -1)
-        return Error(EndLoc, "register expected");
+        return Error(AfterMinusLoc, "register expected");
       // Allow Q regs and just interpret them as the two D sub-registers.
       if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
         EndReg = getDRegFromQReg(EndReg) + 1;
@@ -2904,10 +2944,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         continue;
       // The register must be in the same register class as the first.
       if (!RC->contains(EndReg))
-        return Error(EndLoc, "invalid register in register list");
+        return Error(AfterMinusLoc, "invalid register in register list");
       // Ranges must go from low to high.
       if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
-        return Error(EndLoc, "bad range in register list");
+        return Error(AfterMinusLoc, "bad range in register list");
 
       // Add all the registers in the range to the register list.
       while (Reg != EndReg) {
@@ -2955,9 +2995,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
   }
 
-  SMLoc E = Parser.getTok().getLoc();
   if (Parser.getTok().isNot(AsmToken::RCurly))
-    return Error(E, "'}' expected");
+    return Error(Parser.getTok().getLoc(), "'}' expected");
+  SMLoc E = Parser.getTok().getEndLoc();
   Parser.Lex(); // Eat '}' token.
 
   // Push the register list operand.
@@ -2974,13 +3014,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
 // Helper function to parse the lane index for vector lists.
 ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
   Index = 0; // Always return a defined index value.
   if (Parser.getTok().is(AsmToken::LBrac)) {
     Parser.Lex(); // Eat the '['.
     if (Parser.getTok().is(AsmToken::RBrac)) {
       // "Dn[]" is the 'all lanes' syntax.
       LaneKind = AllLanes;
+      EndLoc = Parser.getTok().getEndLoc();
       Parser.Lex(); // Eat the ']'.
       return MatchOperand_Success;
     }
@@ -2992,7 +3033,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
 
     const MCExpr *LaneIndex;
     SMLoc Loc = Parser.getTok().getLoc();
-    if (getParser().ParseExpression(LaneIndex)) {
+    if (getParser().parseExpression(LaneIndex)) {
       Error(Loc, "illegal expression");
       return MatchOperand_ParseFail;
     }
@@ -3005,6 +3046,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
       Error(Parser.getTok().getLoc(), "']' expected");
       return MatchOperand_ParseFail;
     }
+    EndLoc = Parser.getTok().getEndLoc();
     Parser.Lex(); // Eat the ']'.
     int64_t Val = CE->getValue();
 
@@ -3031,21 +3073,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // (without encosing curly braces) as a single or double entry list,
   // respectively.
   if (Parser.getTok().is(AsmToken::Identifier)) {
+    SMLoc E = Parser.getTok().getEndLoc();
     int Reg = tryParseRegister();
     if (Reg == -1)
       return MatchOperand_NoMatch;
-    SMLoc E = Parser.getTok().getLoc();
     if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
-      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
       if (Res != MatchOperand_Success)
         return Res;
       switch (LaneKind) {
       case NoLanes:
-        E = Parser.getTok().getLoc();
         Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
         break;
       case AllLanes:
-        E = Parser.getTok().getLoc();
         Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
                                                                 S, E));
         break;
@@ -3059,18 +3099,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     }
     if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
       Reg = getDRegFromQReg(Reg);
-      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
       if (Res != MatchOperand_Success)
         return Res;
       switch (LaneKind) {
       case NoLanes:
-        E = Parser.getTok().getLoc();
         Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
                                    &ARMMCRegisterClasses[ARM::DPairRegClassID]);
         Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
         break;
       case AllLanes:
-        E = Parser.getTok().getLoc();
         Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
                                    &ARMMCRegisterClasses[ARM::DPairRegClassID]);
         Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
@@ -3111,7 +3149,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     ++Reg;
     ++Count;
   }
-  if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+
+  SMLoc E;
+  if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
     return MatchOperand_ParseFail;
 
   while (Parser.getTok().is(AsmToken::Comma) ||
@@ -3125,10 +3165,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         return MatchOperand_ParseFail;
       }
       Parser.Lex(); // Eat the minus.
-      SMLoc EndLoc = Parser.getTok().getLoc();
+      SMLoc AfterMinusLoc = Parser.getTok().getLoc();
       int EndReg = tryParseRegister();
       if (EndReg == -1) {
-        Error(EndLoc, "register expected");
+        Error(AfterMinusLoc, "register expected");
         return MatchOperand_ParseFail;
       }
       // Allow Q regs and just interpret them as the two D sub-registers.
@@ -3140,24 +3180,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         continue;
       // The register must be in the same register class as the first.
       if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
-        Error(EndLoc, "invalid register in register list");
+        Error(AfterMinusLoc, "invalid register in register list");
         return MatchOperand_ParseFail;
       }
       // Ranges must go from low to high.
       if (Reg > EndReg) {
-        Error(EndLoc, "bad range in register list");
+        Error(AfterMinusLoc, "bad range in register list");
         return MatchOperand_ParseFail;
       }
       // Parse the lane specifier if present.
       VectorLaneTy NextLaneKind;
       unsigned NextLaneIndex;
-      if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+      if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+          MatchOperand_Success)
         return MatchOperand_ParseFail;
       if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
-        Error(EndLoc, "mismatched lane index in register list");
+        Error(AfterMinusLoc, "mismatched lane index in register list");
         return MatchOperand_ParseFail;
       }
-      EndLoc = Parser.getTok().getLoc();
 
       // Add all the registers in the range to the register list.
       Count += EndReg - Reg;
@@ -3196,11 +3236,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       // Parse the lane specifier if present.
       VectorLaneTy NextLaneKind;
       unsigned NextLaneIndex;
-      SMLoc EndLoc = Parser.getTok().getLoc();
-      if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+      SMLoc LaneLoc = Parser.getTok().getLoc();
+      if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
+          MatchOperand_Success)
         return MatchOperand_ParseFail;
       if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
-        Error(EndLoc, "mismatched lane index in register list");
+        Error(LaneLoc, "mismatched lane index in register list");
         return MatchOperand_ParseFail;
       }
       continue;
@@ -3221,7 +3262,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     VectorLaneTy NextLaneKind;
     unsigned NextLaneIndex;
     SMLoc EndLoc = Parser.getTok().getLoc();
-    if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+    if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
       return MatchOperand_ParseFail;
     if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
       Error(EndLoc, "mismatched lane index in register list");
@@ -3229,11 +3270,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     }
   }
 
-  SMLoc E = Parser.getTok().getLoc();
   if (Parser.getTok().isNot(AsmToken::RCurly)) {
-    Error(E, "'}' expected");
+    Error(Parser.getTok().getLoc(), "'}' expected");
     return MatchOperand_ParseFail;
   }
+  E = Parser.getTok().getEndLoc();
   Parser.Lex(); // Eat '}' token.
 
   switch (LaneKind) {
@@ -3310,7 +3351,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     SMLoc Loc = Parser.getTok().getLoc();
 
     const MCExpr *MemBarrierID;
-    if (getParser().ParseExpression(MemBarrierID)) {
+    if (getParser().parseExpression(MemBarrierID)) {
       Error(Loc, "illegal expression");
       return MatchOperand_ParseFail;
     }
@@ -3525,7 +3566,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
 
   const MCExpr *ShiftAmount;
   SMLoc Loc = Parser.getTok().getLoc();
-  if (getParser().ParseExpression(ShiftAmount)) {
+  SMLoc EndLoc;
+  if (getParser().parseExpression(ShiftAmount, EndLoc)) {
     Error(Loc, "illegal expression");
     return MatchOperand_ParseFail;
   }
@@ -3540,7 +3582,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc()));
+  Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
 
   return MatchOperand_Success;
 }
@@ -3550,7 +3592,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc S = Tok.getLoc();
   if (Tok.isNot(AsmToken::Identifier)) {
-    Error(Tok.getLoc(), "'be' or 'le' operand expected");
+    Error(S, "'be' or 'le' operand expected");
     return MatchOperand_ParseFail;
   }
   int Val = StringSwitch<int>(Tok.getString())
@@ -3560,12 +3602,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   Parser.Lex(); // Eat the token.
 
   if (Val == -1) {
-    Error(Tok.getLoc(), "'be' or 'le' operand expected");
+    Error(S, "'be' or 'le' operand expected");
     return MatchOperand_ParseFail;
   }
   Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
                                                                   getContext()),
-                                           S, Parser.getTok().getLoc()));
+                                           S, Tok.getEndLoc()));
   return MatchOperand_Success;
 }
 
@@ -3601,16 +3643,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return MatchOperand_ParseFail;
   }
   Parser.Lex(); // Eat hash token.
+  SMLoc ExLoc = Parser.getTok().getLoc();
 
   const MCExpr *ShiftAmount;
-  SMLoc E = Parser.getTok().getLoc();
-  if (getParser().ParseExpression(ShiftAmount)) {
-    Error(E, "malformed shift expression");
+  SMLoc EndLoc;
+  if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+    Error(ExLoc, "malformed shift expression");
     return MatchOperand_ParseFail;
   }
   const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
   if (!CE) {
-    Error(E, "shift amount must be an immediate");
+    Error(ExLoc, "shift amount must be an immediate");
     return MatchOperand_ParseFail;
   }
 
@@ -3618,25 +3661,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (isASR) {
     // Shift amount must be in [1,32]
     if (Val < 1 || Val > 32) {
-      Error(E, "'asr' shift amount must be in range [1,32]");
+      Error(ExLoc, "'asr' shift amount must be in range [1,32]");
       return MatchOperand_ParseFail;
     }
     // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
     if (isThumb() && Val == 32) {
-      Error(E, "'asr #32' shift amount not allowed in Thumb mode");
+      Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
       return MatchOperand_ParseFail;
     }
     if (Val == 32) Val = 0;
   } else {
     // Shift amount must be in [1,32]
     if (Val < 0 || Val > 31) {
-      Error(E, "'lsr' shift amount must be in range [0,31]");
+      Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
       return MatchOperand_ParseFail;
     }
   }
 
-  E = Parser.getTok().getLoc();
-  Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E));
+  Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
 
   return MatchOperand_Success;
 }
@@ -3662,16 +3704,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return MatchOperand_ParseFail;
   }
   Parser.Lex(); // Eat hash token.
+  SMLoc ExLoc = Parser.getTok().getLoc();
 
   const MCExpr *ShiftAmount;
-  SMLoc E = Parser.getTok().getLoc();
-  if (getParser().ParseExpression(ShiftAmount)) {
-    Error(E, "malformed rotate expression");
+  SMLoc EndLoc;
+  if (getParser().parseExpression(ShiftAmount, EndLoc)) {
+    Error(ExLoc, "malformed rotate expression");
     return MatchOperand_ParseFail;
   }
   const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
   if (!CE) {
-    Error(E, "rotate amount must be an immediate");
+    Error(ExLoc, "rotate amount must be an immediate");
     return MatchOperand_ParseFail;
   }
 
@@ -3680,12 +3723,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // normally, zero is represented in asm by omitting the rotate operand
   // entirely.
   if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
-    Error(E, "'ror' rotate amount must be 8, 16, or 24");
+    Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
     return MatchOperand_ParseFail;
   }
 
-  E = Parser.getTok().getLoc();
-  Operands.push_back(ARMOperand::CreateRotImm(Val, S, E));
+  Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
 
   return MatchOperand_Success;
 }
@@ -3703,7 +3745,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   const MCExpr *LSBExpr;
   SMLoc E = Parser.getTok().getLoc();
-  if (getParser().ParseExpression(LSBExpr)) {
+  if (getParser().parseExpression(LSBExpr)) {
     Error(E, "malformed immediate expression");
     return MatchOperand_ParseFail;
   }
@@ -3735,7 +3777,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   Parser.Lex(); // Eat hash token.
 
   const MCExpr *WidthExpr;
-  if (getParser().ParseExpression(WidthExpr)) {
+  SMLoc EndLoc;
+  if (getParser().parseExpression(WidthExpr, EndLoc)) {
     Error(E, "malformed immediate expression");
     return MatchOperand_ParseFail;
   }
@@ -3751,9 +3794,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Error(E, "'width' operand must be in the range [1,32-lsb]");
     return MatchOperand_ParseFail;
   }
-  E = Parser.getTok().getLoc();
 
-  Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E));
+  Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
 
   return MatchOperand_Success;
 }
@@ -3772,7 +3814,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Tok.getLoc();
   bool haveEaten = false;
   bool isAdd = true;
-  int Reg = -1;
   if (Tok.is(AsmToken::Plus)) {
     Parser.Lex(); // Eat the '+' token.
     haveEaten = true;
@@ -3781,15 +3822,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     isAdd = false;
     haveEaten = true;
   }
-  if (Parser.getTok().is(AsmToken::Identifier))
-    Reg = tryParseRegister();
+
+  SMLoc E = Parser.getTok().getEndLoc();
+  int Reg = tryParseRegister();
   if (Reg == -1) {
     if (!haveEaten)
       return MatchOperand_NoMatch;
     Error(Parser.getTok().getLoc(), "register expected");
     return MatchOperand_ParseFail;
   }
-  SMLoc E = Parser.getTok().getLoc();
 
   ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
   unsigned ShiftImm = 0;
@@ -3797,6 +3838,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Parser.Lex(); // Eat the ','.
     if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
       return MatchOperand_ParseFail;
+
+    // FIXME: Only approximates end...may include intervening whitespace.
+    E = Parser.getTok().getLoc();
   }
 
   Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
@@ -3829,14 +3873,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     // differently.
     bool isNegative = Parser.getTok().is(AsmToken::Minus);
     const MCExpr *Offset;
-    if (getParser().ParseExpression(Offset))
+    SMLoc E;
+    if (getParser().parseExpression(Offset, E))
       return MatchOperand_ParseFail;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
     if (!CE) {
       Error(S, "constant expression expected");
       return MatchOperand_ParseFail;
     }
-    SMLoc E = Tok.getLoc();
     // Negative zero is encoded as the flag value INT32_MIN.
     int32_t Val = CE->getValue();
     if (isNegative && Val == 0)
@@ -3851,7 +3895,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   bool haveEaten = false;
   bool isAdd = true;
-  int Reg = -1;
   if (Tok.is(AsmToken::Plus)) {
     Parser.Lex(); // Eat the '+' token.
     haveEaten = true;
@@ -3860,18 +3903,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     isAdd = false;
     haveEaten = true;
   }
-  if (Parser.getTok().is(AsmToken::Identifier))
-    Reg = tryParseRegister();
+  
+  Tok = Parser.getTok();
+  int Reg = tryParseRegister();
   if (Reg == -1) {
     if (!haveEaten)
       return MatchOperand_NoMatch;
-    Error(Parser.getTok().getLoc(), "register expected");
+    Error(Tok.getLoc(), "register expected");
     return MatchOperand_ParseFail;
   }
-  SMLoc E = Parser.getTok().getLoc();
 
   Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
-                                                  0, S, E));
+                                                  0, S, Tok.getEndLoc()));
 
   return MatchOperand_Success;
 }
@@ -4218,13 +4261,14 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (BaseRegNum == -1)
     return Error(BaseRegTok.getLoc(), "register expected");
 
-  // The next token must either be a comma or a closing bracket.
+  // The next token must either be a comma, a colon or a closing bracket.
   const AsmToken &Tok = Parser.getTok();
-  if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+  if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) &&
+      !Tok.is(AsmToken::RBrac))
     return Error(Tok.getLoc(), "malformed memory operand");
 
   if (Tok.is(AsmToken::RBrac)) {
-    E = Tok.getLoc();
+    E = Tok.getEndLoc();
     Parser.Lex(); // Eat right bracket token.
 
     Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
@@ -4240,8 +4284,11 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return false;
   }
 
-  assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!");
-  Parser.Lex(); // Eat the comma.
+  assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) &&
+         "Lost colon or comma in memory operand?!");
+  if (Tok.is(AsmToken::Comma)) {
+    Parser.Lex(); // Eat the comma.
+  }
 
   // If we have a ':', it's an alignment specifier.
   if (Parser.getTok().is(AsmToken::Colon)) {
@@ -4249,7 +4296,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     E = Parser.getTok().getLoc();
 
     const MCExpr *Expr;
-    if (getParser().ParseExpression(Expr))
+    if (getParser().parseExpression(Expr))
      return true;
 
     // The expression has to be a constant. Memory references with relocations
@@ -4272,9 +4319,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     }
 
     // Now we should have the closing ']'
-    E = Parser.getTok().getLoc();
     if (Parser.getTok().isNot(AsmToken::RBrac))
-      return Error(E, "']' expected");
+      return Error(Parser.getTok().getLoc(), "']' expected");
+    E = Parser.getTok().getEndLoc();
     Parser.Lex(); // Eat right bracket token.
 
     // Don't worry about range checking the value here. That's handled by
@@ -4305,7 +4352,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
     bool isNegative = getParser().getTok().is(AsmToken::Minus);
     const MCExpr *Offset;
-    if (getParser().ParseExpression(Offset))
+    if (getParser().parseExpression(Offset))
      return true;
 
     // The expression has to be a constant. Memory references with relocations
@@ -4321,9 +4368,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       CE = MCConstantExpr::Create(INT32_MIN, getContext());
 
     // Now we should have the closing ']'
-    E = Parser.getTok().getLoc();
     if (Parser.getTok().isNot(AsmToken::RBrac))
-      return Error(E, "']' expected");
+      return Error(Parser.getTok().getLoc(), "']' expected");
+    E = Parser.getTok().getEndLoc();
     Parser.Lex(); // Eat right bracket token.
 
     // Don't worry about range checking the value here. That's handled by
@@ -4367,9 +4414,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   }
 
   // Now we should have the closing ']'
-  E = Parser.getTok().getLoc();
   if (Parser.getTok().isNot(AsmToken::RBrac))
-    return Error(E, "']' expected");
+    return Error(Parser.getTok().getLoc(), "']' expected");
+  E = Parser.getTok().getEndLoc();
   Parser.Lex(); // Eat right bracket token.
 
   Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
@@ -4424,7 +4471,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
     Parser.Lex(); // Eat hash token.
 
     const MCExpr *Expr;
-    if (getParser().ParseExpression(Expr))
+    if (getParser().parseExpression(Expr))
       return true;
     // Range check the immediate.
     // lsl, ror: 0 <= imm <= 31
@@ -4453,7 +4500,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
 ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // Anything that can accept a floating point constant as an operand
-  // needs to go through here, as the regular ParseExpression is
+  // needs to go through here, as the regular parseExpression is
   // integer only.
   //
   // This routine still creates a generic Immediate operand, containing
@@ -4546,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
   case AsmToken::Identifier: {
-    if (!tryParseRegisterWithWriteBack(Operands))
-      return false;
-    int Res = tryParseShiftRegister(Operands);
-    if (Res == 0) // success
-      return false;
-    else if (Res == -1) // irrecoverable error
-      return true;
-    // If this is VMRS, check for the apsr_nzcv operand.
-    if (Mnemonic == "vmrs" &&
-        Parser.getTok().getString().equals_lower("apsr_nzcv")) {
-      S = Parser.getTok().getLoc();
-      Parser.Lex();
-      Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
-      return false;
+    // If we've seen a branch mnemonic, the next operand must be a label.  This
+    // is true even if the label is a register name.  So "br r1" means branch to
+    // label "r1".
+    bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+    if (!ExpectLabel) {
+      if (!tryParseRegisterWithWriteBack(Operands))
+        return false;
+      int Res = tryParseShiftRegister(Operands);
+      if (Res == 0) // success
+        return false;
+      else if (Res == -1) // irrecoverable error
+        return true;
+      // If this is VMRS, check for the apsr_nzcv operand.
+      if (Mnemonic == "vmrs" &&
+          Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+        S = Parser.getTok().getLoc();
+        Parser.Lex();
+        Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+        return false;
+      }
     }
 
     // Fall though for the Identifier case that is not a register or a
@@ -4573,7 +4626,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     // identifier (like labels) as expressions and create them as immediates.
     const MCExpr *IdVal;
     S = Parser.getTok().getLoc();
-    if (getParser().ParseExpression(IdVal))
+    if (getParser().parseExpression(IdVal))
       return true;
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
     Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
@@ -4592,7 +4645,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     if (Parser.getTok().isNot(AsmToken::Colon)) {
       bool isNegative = Parser.getTok().is(AsmToken::Minus);
       const MCExpr *ImmVal;
-      if (getParser().ParseExpression(ImmVal))
+      if (getParser().parseExpression(ImmVal))
         return true;
       const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
       if (CE) {
@@ -4602,6 +4655,15 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
       }
       E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
       Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+
+      // There can be a trailing '!' on operands that we want as a separate
+      // '!' Token operand. Handle that here. For example, the compatibilty
+      // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'.
+      if (Parser.getTok().is(AsmToken::Exclaim)) {
+        Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(),
+                                                   Parser.getTok().getLoc()));
+        Parser.Lex(); // Eat exclaim token
+      }
       return false;
     }
     // w/ a ':' after the '#', it's just like a plain ':'.
@@ -4616,7 +4678,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
       return true;
 
     const MCExpr *SubExprVal;
-    if (getParser().ParseExpression(SubExprVal))
+    if (getParser().parseExpression(SubExprVal))
       return true;
 
     const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
@@ -4989,7 +5051,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // In Thumb1, only the branch (B) instruction can be predicated.
   if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(NameLoc, "conditional execution not supported in Thumb1");
   }
 
@@ -5003,14 +5065,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   if (Mnemonic == "it") {
     SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
     if (ITMask.size() > 3) {
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return Error(Loc, "too many conditions on IT instruction");
     }
     unsigned Mask = 8;
     for (unsigned i = ITMask.size(); i != 0; --i) {
       char pos = ITMask[i - 1];
       if (pos != 't' && pos != 'e') {
-        Parser.EatToEndOfStatement();
+        Parser.eatToEndOfStatement();
         return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
       }
       Mask >>= 1;
@@ -5036,14 +5098,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // If we had a carry-set on an instruction that can't do that, issue an
   // error.
   if (!CanAcceptCarrySet && CarrySetting) {
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(NameLoc, "instruction '" + Mnemonic +
                  "' can not set flags, but 's' suffix specified");
   }
   // If we had a predication code on an instruction that can't do that, issue an
   // error.
   if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(NameLoc, "instruction '" + Mnemonic +
                  "' is not predicable, but condition code specified");
   }
@@ -5092,7 +5154,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
     if (parseOperand(Operands, Mnemonic)) {
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return true;
     }
 
@@ -5101,7 +5163,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
       // Parse and remember the operand.
       if (parseOperand(Operands, Mnemonic)) {
-        Parser.EatToEndOfStatement();
+        Parser.eatToEndOfStatement();
         return true;
       }
     }
@@ -5109,7 +5171,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     SMLoc Loc = getLexer().getLoc();
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(Loc, "unexpected token in argument list");
   }
 
@@ -5140,50 +5202,42 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     delete Op;
   }
 
-  // The vector-compare-to-zero instructions have a literal token "#0" at
-  // the end that comes to here as an immediate operand. Convert it to a
-  // token to play nicely with the matcher.
-  if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" ||
-      Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 &&
-      static_cast<ARMOperand*>(Operands[5])->isImm()) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
-    if (CE && CE->getValue() == 0) {
-      Operands.erase(Operands.begin() + 5);
-      Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
-      delete Op;
-    }
-  }
-  // VCMP{E} does the same thing, but with a different operand count.
-  if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 &&
-      static_cast<ARMOperand*>(Operands[4])->isImm()) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]);
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
-    if (CE && CE->getValue() == 0) {
-      Operands.erase(Operands.begin() + 4);
-      Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
-      delete Op;
-    }
-  }
-  // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
-  // end. Convert it to a token here. Take care not to convert those
-  // that should hit the Thumb2 encoding.
-  if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
-      static_cast<ARMOperand*>(Operands[3])->isReg() &&
-      static_cast<ARMOperand*>(Operands[4])->isReg() &&
-      static_cast<ARMOperand*>(Operands[5])->isImm()) {
-    ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
-    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
-    if (CE && CE->getValue() == 0 &&
-        (isThumbOne() ||
-         // The cc_out operand matches the IT block.
-         ((inITBlock() != CarrySetting) &&
-         // Neither register operand is a high register.
-         (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
-          isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
-      Operands.erase(Operands.begin() + 5);
-      Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
-      delete Op;
+  // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+  // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+  // a single GPRPair reg operand is used in the .td file to replace the two
+  // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
+  // expressed as a GPRPair, so we have to manually merge them.
+  // FIXME: We would really like to be able to tablegen'erate this.
+  if (!isThumb() && Operands.size() > 4 &&
+      (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
+    bool isLoad = (Mnemonic == "ldrexd");
+    unsigned Idx = isLoad ? 2 : 3;
+    ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
+    ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+
+    const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
+    // Adjust only if Op1 and Op2 are GPRs.
+    if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
+        MRC.contains(Op2->getReg())) {
+      unsigned Reg1 = Op1->getReg();
+      unsigned Reg2 = Op2->getReg();
+      unsigned Rt = MRI->getEncodingValue(Reg1);
+      unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+      // Rt2 must be Rt + 1 and Rt must be even.
+      if (Rt + 1 != Rt2 || (Rt & 1)) {
+        Error(Op2->getStartLoc(), isLoad ?
+            "destination operands must be sequential" :
+            "source operands must be sequential");
+        return true;
+      }
+      unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
+          &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+      Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
+      Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
+            NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
+      delete Op1;
+      delete Op2;
     }
   }
 
@@ -5274,8 +5328,7 @@ validateInstruction(MCInst &Inst,
   switch (Inst.getOpcode()) {
   case ARM::LDRD:
   case ARM::LDRD_PRE:
-  case ARM::LDRD_POST:
-  case ARM::LDREXD: {
+  case ARM::LDRD_POST: {
     // Rt2 must be Rt + 1.
     unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
     unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
@@ -5294,8 +5347,7 @@ validateInstruction(MCInst &Inst,
     return false;
   }
   case ARM::STRD_PRE:
-  case ARM::STRD_POST:
-  case ARM::STREXD: {
+  case ARM::STRD_POST: {
     // Rt2 must be Rt + 1.
     unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
     unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -5686,7 +5738,12 @@ processInstruction(MCInst &Inst,
   }
   // Aliases for alternate PC+imm syntax of LDR instructions.
   case ARM::t2LDRpcrel:
-    Inst.setOpcode(ARM::t2LDRpci);
+    // Select the narrow version if the immediate will fit.
+    if (Inst.getOperand(1).getImm() > 0 &&
+        Inst.getOperand(1).getImm() <= 0xff)
+      Inst.setOpcode(ARM::tLDRpci);
+    else
+      Inst.setOpcode(ARM::t2LDRpci);
     return true;
   case ARM::t2LDRBpcrel:
     Inst.setOpcode(ARM::t2LDRBpci);
@@ -7483,6 +7540,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         bool MatchingInlineAsm) {
   MCInst Inst;
   unsigned MatchResult;
+
   MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
                                      MatchingInlineAsm);
   switch (MatchResult) {
@@ -7595,10 +7653,10 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
-      if (getParser().ParseExpression(Value))
+      if (getParser().parseExpression(Value))
         return true;
 
-      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+      getParser().getStreamer().EmitValue(Value, Size);
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
@@ -7742,13 +7800,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
   unsigned Reg;
   SMLoc SRegLoc, ERegLoc;
   if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(SRegLoc, "register name expected");
   }
 
   // Shouldn't be anything else.
   if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(Parser.getTok().getLoc(),
                  "unexpected input in .req directive.");
   }
@@ -7766,7 +7824,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
 ///  ::= .unreq registername
 bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
   if (Parser.getTok().isNot(AsmToken::Identifier)) {
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(L, "unexpected input in .unreq directive.");
   }
   RegisterReqs.erase(Parser.getTok().getIdentifier());
@@ -7786,16 +7844,31 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
   return true;
 }
 
-extern "C" void LLVMInitializeARMAsmLexer();
-
 /// Force static initialization.
 extern "C" void LLVMInitializeARMAsmParser() {
   RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget);
   RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget);
-  LLVMInitializeARMAsmLexer();
 }
 
 #define GET_REGISTER_MATCHER
 #define GET_SUBTARGET_FEATURE_NAME
 #define GET_MATCHER_IMPLEMENTATION
 #include "ARMGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+                                                  unsigned Kind) {
+  ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+  // If the kind is a token for a literal immediate, check if our asm
+  // operand matches. This is for InstAliases which have a fixed-value
+  // immediate in the syntax.
+  if (Kind == MCK__35_0 && Op->isImm()) {
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+    if (!CE)
+      return Match_InvalidOperand;
+    if (CE->getValue() == 0)
+      return Match_Success;
+  }
+  return Match_InvalidOperand;
+}
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index e24a1b17867a..d2012c387cda 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMARMAsmParser
-  ARMAsmLexer.cpp
   ARMAsmParser.cpp
   )
 
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 377bd9243c2e..b832508a086c 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -11,11 +11,11 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
 tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
 tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
 add_public_tablegen_target(ARMCommonTableGen)
 
 add_llvm_target(ARMCodeGen
+  A15SDOptimizer.cpp
   ARMAsmPrinter.cpp
   ARMBaseInstrInfo.cpp
   ARMBaseRegisterInfo.cpp
@@ -38,6 +38,7 @@ add_llvm_target(ARMCodeGen
   ARMSubtarget.cpp
   ARMTargetMachine.cpp
   ARMTargetObjectFile.cpp
+  ARMTargetTransformInfo.cpp
   MLxExpansionPass.cpp
   Thumb1FrameLowering.cpp
   Thumb1InstrInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f00142de50dc..2e009e55e3b0 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -9,21 +9,20 @@
 
 #define DEBUG_TYPE "arm-disassembler"
 
+#include "llvm/MC/MCDisassembler.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "MCTargetDesc/ARMMCExpr.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCExpr.h"
+#include "MCTargetDesc/ARMMCExpr.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
@@ -105,10 +104,6 @@ public:
                               uint64_t address,
                               raw_ostream &vStream,
                               raw_ostream &cStream) const;
-
-  /// getEDInfo - See MCDisassembler.
-  const EDInstInfo *getEDInfo() const;
-private:
 };
 
 /// ThumbDisassembler - Thumb disassembler for all Thumb platforms.
@@ -131,8 +126,6 @@ public:
                               raw_ostream &vStream,
                               raw_ostream &cStream) const;
 
-  /// getEDInfo - See MCDisassembler.
-  const EDInstInfo *getEDInfo() const;
 private:
   mutable ITStatus ITBlock;
   DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
 static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
 #include "ARMGenDisassemblerTables.inc"
-#include "ARMGenEDInfo.inc"
 
 static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
   return new ARMDisassembler(STI);
@@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
   return new ThumbDisassembler(STI);
 }
 
-const EDInstInfo *ARMDisassembler::getEDInfo() const {
-  return instInfoARM;
-}
-
-const EDInstInfo *ThumbDisassembler::getEDInfo() const {
-  return instInfoARM;
-}
-
 DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                              const MemoryObject &Region,
                                              uint64_t Address,
@@ -1281,7 +1265,13 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
   unsigned lsb = fieldFromInstruction(Val, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
-  if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+  if (lsb > msb) {
+    Check(S, MCDisassembler::SoftFail);
+    // The check above will cause the warning for the "potentially undefined
+    // instruction encoding" but we can't build a bad MCOperand value here
+    // with a lsb > msb or else printing the MCInst will cause a crash.
+    lsb = msb;
+  }
 
   uint32_t msb_mask = 0xFFFFFFFF;
   if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
@@ -3059,9 +3049,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
-  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+  if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
                                 true, 2, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+    Inst.addOperand(MCOperand::CreateImm(Val << 1));
   return MCDisassembler::Success;
 }
 
@@ -3288,7 +3278,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
       return MCDisassembler::Fail;
   }
 
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if (load) {
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index dcc41d93f5ce..2afb20d6686a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -13,11 +13,11 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "ARMInstPrinter.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/MC/MCInst.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/raw_ostream.h"
@@ -252,6 +252,35 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
+  // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+  // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+  // a single GPRPair reg operand is used in the .td file to replace the two
+  // GPRs. However, when decoding them, the two GRPs cannot be automatically
+  // expressed as a GPRPair, so we have to manually merge them.
+  // FIXME: We would really like to be able to tablegen'erate this.
+  if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+    const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+    bool isStore = Opcode == ARM::STREXD;
+    unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+    if (MRC.contains(Reg)) {
+      MCInst NewMI;
+      MCOperand NewReg;
+      NewMI.setOpcode(Opcode);
+
+      if (isStore)
+        NewMI.addOperand(MI->getOperand(0));
+      NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
+        &MRI.getRegClass(ARM::GPRPairRegClassID)));
+      NewMI.addOperand(NewReg);
+
+      // Copy the rest operands into NewMI.
+      for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+        NewMI.addOperand(MI->getOperand(i));
+      printInstruction(&NewMI, O);
+      return;
+    }
+  }
+
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
@@ -264,7 +293,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     printRegName(O, Reg);
   } else if (Op.isImm()) {
     O << markup("<imm:")
-      << '#' << Op.getImm()
+      << '#' << formatImm(Op.getImm())
       << markup(">");
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
@@ -290,7 +319,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
     O << *MO1.getExpr();
   else if (MO1.isImm()) {
     O << markup("<mem:") << "[pc, "
-      << markup("<imm:") << "#" << MO1.getImm()
+      << markup("<imm:") << "#" << formatImm(MO1.getImm())
       << markup(">]>", "]");
   }
   else
@@ -598,8 +627,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (MO2.getImm()) {
-    // FIXME: Both darwin as and GNU as violate ARM docs here.
-    O << ", :" << (MO2.getImm() << 3);
+    O << ":" << (MO2.getImm() << 3);
   }
   O << "]" << markup(">");
 }
@@ -691,6 +719,15 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
   O << "}";
 }
 
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+  O << ", ";
+  printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+
 void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
@@ -873,7 +910,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
                                             raw_ostream &O) {
   O << markup("<imm:")
-    << "#" << MI->getOperand(OpNum).getImm() * 4
+    << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
     << markup(">");
 }
 
@@ -881,7 +918,7 @@ void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
                                      raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   O << markup("<imm:")
-    << "#" << (Imm == 0 ? 32 : Imm)
+    << "#" << formatImm((Imm == 0 ? 32 : Imm))
     << markup(">");
 }
 
@@ -938,7 +975,7 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
   if (unsigned ImmOffs = MO2.getImm()) {
     O << ", "
       << markup("<imm:")
-      << "#" << ImmOffs * Scale
+      << "#" << formatImm(ImmOffs * Scale)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1089,7 +1126,7 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
   if (MO2.getImm()) {
     O << ", "
       << markup("<imm:")
-      << "#" << MO2.getImm() * 4
+      << "#" << formatImm(MO2.getImm() * 4)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1179,7 +1216,7 @@ void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
                                             raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   O << markup("<imm:")
-    << "#" << Imm + 1
+    << "#" << formatImm(Imm + 1)
     << markup(">");
 }
 
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index b7bab5fdcd8e..edff75d886e9 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -124,6 +124,7 @@ public:
   void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
   void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
new file mode 100755
index 000000000000..68afea12ed44
--- /dev/null
+++ b/lib/Target/ARM/LICENSE.TXT
@@ -0,0 +1,47 @@
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+   Agreement, ARM hereby grants to you and to recipients of the Software
+   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+   royalty-free, irrevocable copyright license to reproduce, prepare derivative
+   works of, publicly display, publicly perform, sublicense, and distribute the
+   Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+   Agreement, ARM hereby grants you and to recipients of the Software
+   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+   royalty-free, irrevocable (except as stated in this section) patent license
+   to make, have made, use, offer to sell, sell, import, and otherwise transfer
+   the Work, where such license applies only to those patent claims licensable
+   by ARM that are necessarily infringed by ARM's Software alone or by
+   combination of the Software with the Work to which such Software was
+   submitted. If any entity institutes patent litigation against ARM or any
+   other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+   that ARM's Software, or the Work to which ARM has contributed constitutes
+   direct or contributory patent infringement, then any patent licenses granted
+   to that entity under this Agreement for the Software or Work shall terminate
+   as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1ba6ab039f20..e66e98567873 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -8,9 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
@@ -21,7 +23,6 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
@@ -114,11 +115,15 @@ public:
                          MCValue &Target, uint64_t &Value,
                          bool &IsResolved);
 
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
   bool mayNeedRelaxation(const MCInst &Inst) const;
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup,
                             uint64_t Value,
-                            const MCInstFragment *DF,
+                            const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -161,7 +166,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
 
 bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
                                          uint64_t Value,
-                                         const MCInstFragment *DF,
+                                         const MCRelaxableFragment *DF,
                                          const MCAsmLayout &Layout) const {
   switch ((unsigned)Fixup.getKind()) {
   case ARM::fixup_arm_thumb_br: {
@@ -216,7 +221,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
 bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
   const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
-  const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
+  const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
   const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
   if (isThumb()) {
     const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
@@ -552,65 +557,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
   (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
 }
 
-namespace {
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
-  uint8_t OSABI;
-  ELFARMAsmBackend(const Target &T, const StringRef TT,
-                   uint8_t _OSABI)
-    : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
-
-  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                  uint64_t Value) const;
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createARMELFObjectWriter(OS, OSABI);
-  }
-};
-
-// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
-                                  unsigned DataSize, uint64_t Value) const {
-  unsigned NumBytes = 4;        // FIXME: 2 for Thumb
-  Value = adjustFixupValue(Fixup, Value);
-  if (!Value) return;           // Doesn't change encoding.
-
-  unsigned Offset = Fixup.getOffset();
-
-  // For each byte of the fragment that the fixup touches, mask in the bits from
-  // the fixup value. The Value has been "split up" into the appropriate
-  // bitfields above.
-  for (unsigned i = 0; i != NumBytes; ++i)
-    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
-  const object::mach::CPUSubtypeARM Subtype;
-  DarwinARMAsmBackend(const Target &T, const StringRef TT,
-                      object::mach::CPUSubtypeARM st)
-    : ARMAsmBackend(T, TT), Subtype(st) {
-      HasDataInCodeSupport = true;
-    }
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
-                                     object::mach::CTM_ARM,
-                                     Subtype);
-  }
-
-  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                  uint64_t Value) const;
-
-  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
-    return false;
-  }
-};
-
 /// getFixupKindNumBytes - The number of bytes the fixup may change.
 static unsigned getFixupKindNumBytes(unsigned Kind) {
   switch (Kind) {
@@ -659,8 +605,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   }
 }
 
-void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
-                                     unsigned DataSize, uint64_t Value) const {
+void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                               unsigned DataSize, uint64_t Value) const {
   unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
   Value = adjustFixupValue(Fixup, Value);
   if (!Value) return;           // Doesn't change encoding.
@@ -668,37 +614,70 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   unsigned Offset = Fixup.getOffset();
   assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
 
-  // For each byte of the fragment that the fixup touches, mask in the
-  // bits from the fixup value.
+  // For each byte of the fragment that the fixup touches, mask in the bits from
+  // the fixup value. The Value has been "split up" into the appropriate
+  // bitfields above.
   for (unsigned i = 0; i != NumBytes; ++i)
     Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
 }
 
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+  uint8_t OSABI;
+  ELFARMAsmBackend(const Target &T, const StringRef TT,
+                   uint8_t _OSABI)
+    : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createARMELFObjectWriter(OS, OSABI);
+  }
+};
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+  const object::mach::CPUSubtypeARM Subtype;
+  DarwinARMAsmBackend(const Target &T, const StringRef TT,
+                      object::mach::CPUSubtypeARM st)
+    : ARMAsmBackend(T, TT), Subtype(st) {
+      HasDataInCodeSupport = true;
+    }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_ARM,
+                                     Subtype);
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    return false;
+  }
+};
+
 } // end anonymous namespace
 
 MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin()) {
-    if (TheTriple.getArchName() == "armv4t" ||
-        TheTriple.getArchName() == "thumbv4t")
-      return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T);
-    else if (TheTriple.getArchName() == "armv5e" ||
-        TheTriple.getArchName() == "thumbv5e")
-      return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ);
-    else if (TheTriple.getArchName() == "armv6" ||
-        TheTriple.getArchName() == "thumbv6")
-      return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
-    else if (TheTriple.getArchName() == "armv7f" ||
-        TheTriple.getArchName() == "thumbv7f")
-      return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
-    else if (TheTriple.getArchName() == "armv7k" ||
-        TheTriple.getArchName() == "thumbv7k")
-      return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
-    else if (TheTriple.getArchName() == "armv7s" ||
-        TheTriple.getArchName() == "thumbv7s")
-      return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
-    return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
+    object::mach::CPUSubtypeARM CS =
+      StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
+      .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
+      .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
+      .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
+      .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
+      .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
+      .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
+      .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
+      .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
+      .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
+      .Default(object::mach::CSARM_V7);
+
+    return new DarwinARMAsmBackend(T, TT, CS);
   }
 
   if (TheTriple.isOSWindows())
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 99e4f713f690..f98bbd204c7a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -37,7 +37,6 @@ namespace {
     virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                                   bool IsPCRel, bool IsRelocWithSymbol,
                                   int64_t Addend) const;
-    virtual unsigned getEFlags() const;
     virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
                                    const MCValue &Target,
                                    const MCFragment &F,
@@ -53,11 +52,6 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
 
 ARMELFObjectWriter::~ARMELFObjectWriter() {}
 
-// FIXME: get the real EABI Version from the Triple.
-unsigned ARMELFObjectWriter::getEFlags() const {
-  return ELF::EF_ARM_EABIMASK & DefaultEABIVersion;
-}
-
 // In ARM, _MergedGlobals and other most symbols get emitted directly.
 // I.e. not as an offset to a section symbol.
 // This code is an approximation of what ARM/gcc does.
@@ -133,6 +127,7 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
     switch (RelocType) {
     default: EmitThisSym = true; break;
     case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+    case ELF::R_ARM_PREL31: EmitThisSym = false; break;
     }
   }
 
@@ -225,6 +220,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     case FK_Data_4:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_ARM_NONE:
+        Type = ELF::R_ARM_NONE;
+        break;
       case MCSymbolRefExpr::VK_ARM_GOT:
         Type = ELF::R_ARM_GOT_BREL;
         break;
@@ -249,7 +247,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_ARM_TARGET2:
         Type = ELF::R_ARM_TARGET2;
         break;
-      } 
+      case MCSymbolRefExpr::VK_ARM_PREL31:
+        Type = ELF::R_ARM_PREL31;
+        break;
+      }
       break;
     case ARM::fixup_arm_ldst_pcrel_12:
     case ARM::fixup_arm_pcrel_10:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
new file mode 100644
index 000000000000..418971df3292
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -0,0 +1,418 @@
+//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ARM ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to
+// delimit regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOp.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
+///
+/// In brief: $a, $t or $d should be emitted at the start of each contiguous
+/// region of ARM code, Thumb code or data in a section. In practice, this
+/// emission does not rely on explicit assembler directives but on inherent
+/// properties of the directives doing the emission (e.g. ".byte" is data, "add
+/// r0, r0, r0" an instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class ARMELFStreamer : public MCELFStreamer {
+public:
+  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+                 MCCodeEmitter *Emitter, bool IsThumb)
+      : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
+        FnStart(0), Personality(0), CantUnwind(false) {}
+
+  ~ARMELFStreamer() {}
+
+  // ARM exception handling directives
+  virtual void EmitFnStart();
+  virtual void EmitFnEnd();
+  virtual void EmitCantUnwind();
+  virtual void EmitPersonality(const MCSymbol *Per);
+  virtual void EmitHandlerData();
+  virtual void EmitSetFP(unsigned NewFpReg,
+                         unsigned NewSpReg,
+                         int64_t Offset = 0);
+  virtual void EmitPad(int64_t Offset);
+  virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                           bool isVector);
+
+  virtual void ChangeSection(const MCSection *Section) {
+    // We have to keep track of the mapping symbol state of any sections we
+    // use. Each one should start off as EMS_None, which is provided as the
+    // default constructor by DenseMap::lookup.
+    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastEMS = LastMappingSymbols.lookup(Section);
+
+    MCELFStreamer::ChangeSection(Section);
+  }
+
+  /// This function is the one used to emit instruction data into the ELF
+  /// streamer. We override it to add the appropriate mapping symbol if
+  /// necessary.
+  virtual void EmitInstruction(const MCInst& Inst) {
+    if (IsThumb)
+      EmitThumbMappingSymbol();
+    else
+      EmitARMMappingSymbol();
+
+    MCELFStreamer::EmitInstruction(Inst);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+  /// necessary.
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitBytes(Data, AddrSpace);
+  }
+
+  /// This is one of the functions used to emit data into an ELF section, so the
+  /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
+  /// necessary.
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace) {
+    EmitDataMappingSymbol();
+    MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+  }
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    MCELFStreamer::EmitAssemblerFlag(Flag);
+
+    switch (Flag) {
+    case MCAF_SyntaxUnified:
+      return; // no-op here.
+    case MCAF_Code16:
+      IsThumb = true;
+      return; // Change to Thumb mode
+    case MCAF_Code32:
+      IsThumb = false;
+      return; // Change to ARM mode
+    case MCAF_Code64:
+      return;
+    case MCAF_SubsectionsViaSymbols:
+      return;
+    }
+  }
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_ARMELFStreamer;
+  }
+
+private:
+  enum ElfMappingSymbol {
+    EMS_None,
+    EMS_ARM,
+    EMS_Thumb,
+    EMS_Data
+  };
+
+  void EmitDataMappingSymbol() {
+    if (LastEMS == EMS_Data) return;
+    EmitMappingSymbol("$d");
+    LastEMS = EMS_Data;
+  }
+
+  void EmitThumbMappingSymbol() {
+    if (LastEMS == EMS_Thumb) return;
+    EmitMappingSymbol("$t");
+    LastEMS = EMS_Thumb;
+  }
+
+  void EmitARMMappingSymbol() {
+    if (LastEMS == EMS_ARM) return;
+    EmitMappingSymbol("$a");
+    LastEMS = EMS_ARM;
+  }
+
+  void EmitMappingSymbol(StringRef Name) {
+    MCSymbol *Start = getContext().CreateTempSymbol();
+    EmitLabel(Start);
+
+    MCSymbol *Symbol =
+      getContext().GetOrCreateSymbol(Name + "." +
+                                     Twine(MappingSymbolCounter++));
+
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    Symbol->setSection(*getCurrentSection());
+
+    const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+    Symbol->setVariableValue(Value);
+  }
+
+  void EmitThumbFunc(MCSymbol *Func) {
+    // FIXME: Anything needed here to flag the function as thumb?
+
+    getAssembler().setIsThumbFunc(Func);
+
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
+    SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+  }
+
+  // Helper functions for ARM exception handling directives
+  void Reset();
+
+  void EmitPersonalityFixup(StringRef Name);
+
+  void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+                         SectionKind Kind, const MCSymbol &Fn);
+  void SwitchToExTabSection(const MCSymbol &FnStart);
+  void SwitchToExIdxSection(const MCSymbol &FnStart);
+
+  bool IsThumb;
+  int64_t MappingSymbolCounter;
+
+  DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+  ElfMappingSymbol LastEMS;
+
+  // ARM Exception Handling Frame Information
+  MCSymbol *ExTab;
+  MCSymbol *FnStart;
+  const MCSymbol *Personality;
+  bool CantUnwind;
+};
+}
+
+inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+                                              unsigned Type,
+                                              unsigned Flags,
+                                              SectionKind Kind,
+                                              const MCSymbol &Fn) {
+  const MCSectionELF &FnSection =
+    static_cast<const MCSectionELF &>(Fn.getSection());
+
+  // Create the name for new section
+  StringRef FnSecName(FnSection.getSectionName());
+  SmallString<128> EHSecName(Prefix);
+  if (FnSecName != ".text") {
+    EHSecName += FnSecName;
+  }
+
+  // Get .ARM.extab or .ARM.exidx section
+  const MCSectionELF *EHSection = NULL;
+  if (const MCSymbol *Group = FnSection.getGroup()) {
+    EHSection = getContext().getELFSection(
+      EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
+      FnSection.getEntrySize(), Group->getName());
+  } else {
+    EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
+  }
+  assert(EHSection);
+
+  // Switch to .ARM.extab or .ARM.exidx section
+  SwitchSection(EHSection);
+  EmitCodeAlignment(4, 0);
+}
+
+inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
+  SwitchToEHSection(".ARM.extab",
+                    ELF::SHT_PROGBITS,
+                    ELF::SHF_ALLOC,
+                    SectionKind::getDataRel(),
+                    FnStart);
+}
+
+inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
+  SwitchToEHSection(".ARM.exidx",
+                    ELF::SHT_ARM_EXIDX,
+                    ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
+                    SectionKind::getDataRel(),
+                    FnStart);
+}
+
+void ARMELFStreamer::Reset() {
+  ExTab = NULL;
+  FnStart = NULL;
+  Personality = NULL;
+  CantUnwind = false;
+}
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+  const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+  const MCSymbolRefExpr *PersonalityRef =
+    MCSymbolRefExpr::Create(PersonalitySym,
+                            MCSymbolRefExpr::VK_ARM_NONE,
+                            getContext());
+
+  AddValueSymbols(PersonalityRef);
+  MCDataFragment *DF = getOrCreateDataFragment();
+  DF->getFixups().push_back(
+    MCFixup::Create(DF->getContents().size(), PersonalityRef,
+                    MCFixup::getKindForSize(4, false)));
+}
+
+void ARMELFStreamer::EmitFnStart() {
+  assert(FnStart == 0);
+  FnStart = getContext().CreateTempSymbol();
+  EmitLabel(FnStart);
+}
+
+void ARMELFStreamer::EmitFnEnd() {
+  assert(FnStart && ".fnstart must preceeds .fnend");
+
+  // Emit unwind opcodes if there is no .handlerdata directive
+  int PersonalityIndex = -1;
+  if (!ExTab && !CantUnwind) {
+    // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
+    SwitchToExTabSection(*FnStart);
+
+    // Create .ARM.extab label for offset in .ARM.exidx
+    ExTab = getContext().CreateTempSymbol();
+    EmitLabel(ExTab);
+
+    PersonalityIndex = 1;
+
+    uint32_t Entry = 0;
+    uint32_t NumExtraEntryWords = 0;
+    Entry |= NumExtraEntryWords << 24;
+    Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
+
+    // TODO: This should be generated according to .save, .vsave, .setfp
+    // directives.  Currently, we are simply generating FINISH opcode.
+    Entry |= UNWIND_OPCODE_FINISH << 8;
+    Entry |= UNWIND_OPCODE_FINISH;
+
+    EmitIntValue(Entry, 4, 0);
+  }
+
+  // Emit the exception index table entry
+  SwitchToExIdxSection(*FnStart);
+
+  if (PersonalityIndex == 1)
+    EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+
+  const MCSymbolRefExpr *FnStartRef =
+    MCSymbolRefExpr::Create(FnStart,
+                            MCSymbolRefExpr::VK_ARM_PREL31,
+                            getContext());
+
+  EmitValue(FnStartRef, 4, 0);
+
+  if (CantUnwind) {
+    EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
+  } else {
+    const MCSymbolRefExpr *ExTabEntryRef =
+      MCSymbolRefExpr::Create(ExTab,
+                              MCSymbolRefExpr::VK_ARM_PREL31,
+                              getContext());
+    EmitValue(ExTabEntryRef, 4, 0);
+  }
+
+  // Clean exception handling frame information
+  Reset();
+}
+
+void ARMELFStreamer::EmitCantUnwind() {
+  CantUnwind = true;
+}
+
+void ARMELFStreamer::EmitHandlerData() {
+  SwitchToExTabSection(*FnStart);
+
+  // Create .ARM.extab label for offset in .ARM.exidx
+  assert(!ExTab);
+  ExTab = getContext().CreateTempSymbol();
+  EmitLabel(ExTab);
+
+  // Emit Personality
+  assert(Personality && ".personality directive must preceed .handlerdata");
+
+  const MCSymbolRefExpr *PersonalityRef =
+    MCSymbolRefExpr::Create(Personality,
+                            MCSymbolRefExpr::VK_ARM_PREL31,
+                            getContext());
+
+  EmitValue(PersonalityRef, 4, 0);
+
+  // Emit unwind opcodes
+  uint32_t Entry = 0;
+  uint32_t NumExtraEntryWords = 0;
+
+  // TODO: This should be generated according to .save, .vsave, .setfp
+  // directives.  Currently, we are simply generating FINISH opcode.
+  Entry |= NumExtraEntryWords << 24;
+  Entry |= UNWIND_OPCODE_FINISH << 16;
+  Entry |= UNWIND_OPCODE_FINISH << 8;
+  Entry |= UNWIND_OPCODE_FINISH;
+
+  EmitIntValue(Entry, 4, 0);
+}
+
+void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+  Personality = Per;
+}
+
+void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
+                               unsigned NewSpReg,
+                               int64_t Offset) {
+  // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitPad(int64_t Offset) {
+  // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                                 bool IsVector) {
+  // TODO: Not implemented
+}
+
+namespace llvm {
+  MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *Emitter,
+                                      bool RelaxAll, bool NoExecStack,
+                                      bool IsThumb) {
+    ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+    if (RelaxAll)
+      S->getAssembler().setRelaxAll(true);
+    if (NoExecStack)
+      S->getAssembler().setNoExecStack(true);
+    return S;
+  }
+
+}
+
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
new file mode 100644
index 000000000000..77ae5d23628e
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_STREAMER_H
+#define ARM_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+  MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *Emitter,
+                                      bool RelaxAll, bool NoExecStack,
+                                      bool IsThumb);
+}
+
+#endif // ARM_ELF_STREAMER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index d0e127a8f335..7a59a7dd5055 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -12,11 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMMCExpr.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -24,8 +26,6 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -655,15 +655,28 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
   int32_t offset = MO.getImm();
   uint32_t Val = 0x2000;
 
+  int SoImmVal;
   if (offset == INT32_MIN) {
     Val = 0x1000;
-    offset = 0;
+    SoImmVal = 0;
   } else if (offset < 0) {
     Val = 0x1000;
     offset *= -1;
+    SoImmVal = ARM_AM::getSOImmVal(offset);
+    if(SoImmVal == -1) {
+      Val = 0x2000;
+      offset *= -1;
+      SoImmVal = ARM_AM::getSOImmVal(offset);
+    }
+  } else {
+    SoImmVal = ARM_AM::getSOImmVal(offset);
+    if(SoImmVal == -1) {
+      Val = 0x1000;
+      offset *= -1;
+      SoImmVal = ARM_AM::getSOImmVal(offset);
+    }
   }
 
-  int SoImmVal = ARM_AM::getSOImmVal(offset);
   assert(SoImmVal != -1 && "Not a valid so_imm value!");
 
   Val |= SoImmVal;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 22e14a2281de..fc8505b052bd 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -9,8 +9,8 @@
 
 #define DEBUG_TYPE "armmcexpr"
 #include "ARMMCExpr.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
 using namespace llvm;
 
 const ARMMCExpr*
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index b404e6c6e014..cd4067a52955 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -64,6 +64,9 @@ public:
     return getSubExpr()->FindAssociatedSection();
   }
 
+  // There are no TLS ARMMCExprs at the moment.
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
   static bool classof(const MCExpr *E) {
     return E->getKind() == MCExpr::Target;
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 00ffc94ac7d1..f09fb5a94fd8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,10 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMMCTargetDesc.h"
-#include "ARMMCAsmInfo.h"
 #include "ARMBaseInfo.h"
+#include "ARMELFStreamer.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMMCTargetDesc.h"
 #include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -36,6 +38,8 @@
 using namespace llvm;
 
 std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
+  Triple triple(TT);
+
   // Set the boolean corresponding to the current target triple, or the default
   // if one cannot be determined, to true.
   unsigned Len = TT.size();
@@ -118,6 +122,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
       ARMArchFeature += ",+thumb-mode";
   }
 
+  if (triple.isOSNaCl()) {
+    if (ARMArchFeature.empty())
+      ARMArchFeature = "+nacl-trap";
+    else
+      ARMArchFeature += ",+nacl-trap";
+  }
+
   return ARMArchFeature;
 }
 
@@ -144,7 +155,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
 
 static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
   MCRegisterInfo *X = new MCRegisterInfo();
-  InitARMMCRegisterInfo(X, ARM::LR);
+  InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
   return X;
 }
 
@@ -186,7 +197,8 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
     llvm_unreachable("ARM does not support Windows COFF format");
   }
 
-  return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
+  return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+                              TheTriple.getArch() == Triple::thumb);
 }
 
 static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 2154c931769a..b9efe74b41e5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,17 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
new file mode 100644
index 000000000000..dad5576df4cd
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -0,0 +1,112 @@
+//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the constants for the ARM unwind opcodes and exception
+// handling table entry kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_H
+#define ARM_UNWIND_OP_H
+
+namespace llvm {
+
+  /// ARM exception handling table entry kinds
+  enum ARMEHTEntryKind {
+    EHT_GENERIC = 0x00,
+    EHT_COMPACT = 0x80
+  };
+
+  enum {
+    /// Special entry for the function never unwind
+    EXIDX_CANTUNWIND = 0x1
+  };
+
+  /// ARM-defined frame unwinding opcodes
+  enum ARMUnwindOpcodes {
+    // Format: 00xxxxxx
+    // Purpose: vsp = vsp + ((x << 2) + 4)
+    UNWIND_OPCODE_INC_VSP = 0x00,
+
+    // Format: 01xxxxxx
+    // Purpose: vsp = vsp - ((x << 2) + 4)
+    UNWIND_OPCODE_DEC_VSP = 0x40,
+
+    // Format: 10000000 00000000
+    // Purpose: refuse to unwind
+    UNWIND_OPCODE_REFUSE = 0x8000,
+
+    // Format: 1000xxxx xxxxxxxx
+    // Purpose: pop r[15:12], r[11:4]
+    // Constraint: x != 0
+    UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000,
+
+    // Format: 1001xxxx
+    // Purpose: vsp = r[x]
+    // Constraint: x != 13 && x != 15
+    UNWIND_OPCODE_SET_VSP = 0x90,
+
+    // Format: 10100xxx
+    // Purpose: pop r[(4+x):4]
+    UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0,
+
+    // Format: 10101xxx
+    // Purpose: pop r14, r[(4+x):4]
+    UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8,
+
+    // Format: 10110000
+    // Purpose: finish
+    UNWIND_OPCODE_FINISH = 0xb0,
+
+    // Format: 10110001 0000xxxx
+    // Purpose: pop r[3:0]
+    // Constraint: x != 0
+    UNWIND_OPCODE_POP_REG_MASK = 0xb100,
+
+    // Format: 10110010 x(uleb128)
+    // Purpose: vsp = vsp + ((x << 2) + 0x204)
+    UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2,
+
+    // Format: 10110011 xxxxyyyy
+    // Purpose: pop d[(x+y):x]
+    UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300,
+
+    // Format: 10111xxx
+    // Purpose: pop d[(8+x):8]
+    UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8,
+
+    // Format: 11000xxx
+    // Purpose: pop wR[(10+x):10]
+    UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0,
+
+    // Format: 11000110 xxxxyyyy
+    // Purpose: pop wR[(x+y):x]
+    UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600,
+
+    // Format: 11000111 0000xxxx
+    // Purpose: pop wCGR[3:0]
+    // Constraint: x != 0
+    UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700,
+
+    // Format: 11001000 xxxxyyyy
+    // Purpose: pop d[(16+x+y):(16+x)]
+    UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800,
+
+    // Format: 11001001 xxxxyyyy
+    // Purpose: pop d[(x+y):x]
+    UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900,
+
+    // Format: 11010xxx
+    // Purpose: pop d[(8+x):8]
+    UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
+  };
+
+}
+
+#endif // ARM_UNWIND_OP_H
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 256599412e8b..e17eb4d5e987 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMARMDesc
   ARMAsmBackend.cpp
   ARMELFObjectWriter.cpp
+  ARMELFStreamer.cpp
   ARMMCAsmInfo.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 70643bcda3ac..2e266c2e9624 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -16,16 +16,16 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 3e48ed1189cc..f069535ff3c0 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
 		ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
                 ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
                 ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
-                ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
                 ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc
 
 DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 463c440852f5..a64707e6f34f 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -173,7 +173,6 @@ GCC is doing a couple of clever things here:
         mov r1, #1
         lsl r1, r1, #8
         tst r2, r1
-  
 
 //===---------------------------------------------------------------------===//
 
@@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack
 objects are referenced off the frame pointer with negative offsets. See
 oggenc for an example.
 
-
 //===---------------------------------------------------------------------===//
 
 Poor codegen test/CodeGen/ARM/select.ll f7:
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 500e3de82db3..fa5681fb12bf 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index edd73c20c0be..2c3388cc452c 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -43,6 +43,41 @@ emitSPUpdate(MachineBasicBlock &MBB,
                             MRI, MIFlags);
 }
 
+
+void Thumb1FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -124,14 +159,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  bool HasFP = hasFP(MF);
+  if (HasFP)
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+                                NumBytes);
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
   NumBytes = DPRCSOffset;
 
   // Adjust FP so it point to the stack slot that contains the previous FP.
-  if (hasFP(MF)) {
+  if (HasFP) {
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0)
       .setMIFlags(MachineInstr::FrameSetup));
@@ -146,7 +184,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
                  MachineInstr::FrameSetup);
 
-  if (STI.isTargetELF() && hasFP(MF))
+  if (STI.isTargetELF() && HasFP)
     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
                              AFI->getFramePtrSpillOffset());
 
@@ -281,7 +319,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
       .addReg(ARM::R3, RegState::Kill);
     AddDefaultPred(MIB);
-    MIB->copyImplicitOps(&*MBBI);
+    MIB.copyImplicitOps(&*MBBI);
     // erase the old tBX_RET instruction
     MBB.erase(MBBI);
   }
@@ -352,7 +390,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         continue;
       Reg = ARM::PC;
       (*MIB).setDesc(TII.get(ARM::tPOP_RET));
-      MIB->copyImplicitOps(&*MI);
+      MIB.copyImplicitOps(&*MI);
       MI = MBB.erase(MI);
     }
     MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index bcfc5165fad0..5a300afd5d36 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,10 @@ public:
                                    const TargetRegisterInfo *TRI) const;
 
   bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 735b255759b7..095736d52a88 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -15,8 +15,8 @@
 #include "ARM.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInst.h"
 
 using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a39b722caef5..7452fb776ebd 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -18,21 +18,21 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 extern cl::opt<bool> ReuseFrameIndexVals;
@@ -296,47 +296,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
   }
 }
 
-static void emitSPUpdate(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         const TargetInstrInfo &TII, DebugLoc dl,
-                         const Thumb1RegisterInfo &MRI,
-                         int NumBytes) {
-  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
-                            MRI);
-}
-
-void Thumb1RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    // If we have alloca, convert as follows:
-    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
-    // ADJCALLSTACKUP   -> add, sp, sp, amount
-    MachineInstr *Old = I;
-    DebugLoc dl = Old->getDebugLoc();
-    unsigned Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = TFI->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      // Replace the pseudo instruction with a new instruction...
-      unsigned Opc = Old->getOpcode();
-      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
-        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
-      } else {
-        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
-      }
-    }
-  }
-  MBB.erase(I);
-}
-
 /// emitThumbConstant - Emit a series of instructions to materialize a
 /// constant.
 static void emitThumbConstant(MachineBasicBlock &MBB,
@@ -390,6 +349,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc dl = MI.getDebugLoc();
+  MachineInstrBuilder MIB(*MBB.getParent(), &MI);
   unsigned Opcode = MI.getOpcode();
   const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -417,7 +377,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       // Remove offset
       MI.RemoveOperand(FrameRegIdx+1);
-      MachineInstrBuilder MIB(&MI);
       return true;
     }
 
@@ -428,7 +387,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
       if (Opcode == ARM::tADDi3) {
         MI.setDesc(TII.get(Opcode));
         removeOperands(MI, FrameRegIdx);
-        MachineInstrBuilder MIB(&MI);
         AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
                        .addImm(Offset / Scale));
       } else {
@@ -457,7 +415,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
       if (Opcode == ARM::tADDi3) {
         MI.setDesc(TII.get(Opcode));
         removeOperands(MI, FrameRegIdx);
-        MachineInstrBuilder MIB(&MI);
         AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
       } else {
         MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
@@ -595,22 +552,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
 
 void
 Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                        int SPAdj, RegScavenger *RS) const {
+                                        int SPAdj, unsigned FIOperandNum,
+                                        RegScavenger *RS) const {
   unsigned VReg = 0;
-  unsigned i = 0;
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc dl = MI.getDebugLoc();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
+  MachineInstrBuilder MIB(*MBB.getParent(), &MI);
 
   unsigned FrameReg = ARM::SP;
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
                MF.getFrameInfo()->getStackSize() + SPAdj;
 
@@ -635,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // means the stack pointer cannot be used to access the emergency spill slot
   // when !hasReservedCallFrame().
 #ifndef NDEBUG
-  if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+  if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
     assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
@@ -647,15 +600,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Special handling of dbg_value instructions.
   if (MI.isDebugValue()) {
-    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
-    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    MI.getOperand(FIOperandNum).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
     return;
   }
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   assert(AFI->isThumbFunction() &&
          "This eliminateFrameIndex only supports Thumb1!");
-  if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+  if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
     return;
 
   // If we get here, the immediate doesn't fit into the instruction.  We folded
@@ -688,11 +641,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
 
     MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
-    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
     if (UseRR)
       // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
       // register. The offset is already handled in the vreg value.
-      MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+      MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+                                                     false);
   } else if (MI.mayStore()) {
       VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
       bool UseRR = false;
@@ -709,18 +663,17 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
                                   *this);
       MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
-      MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+      MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
       if (UseRR)
         // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
         // register. The offset is already handled in the vreg value.
-        MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+        MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+                                                       false);
   } else {
     llvm_unreachable("Unexpected opcode!");
   }
 
   // Add predicate back if it's needed.
-  if (MI.isPredicable()) {
-    MachineInstrBuilder MIB(&MI);
+  if (MI.isPredicable())
     AddDefaultPred(MIB);
-  }
 }
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index f2e4b08f798e..ebbab36dd7b8 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -43,11 +43,6 @@ public:
                         unsigned PredReg = 0,
                         unsigned MIFlags = MachineInstr::NoFlags) const;
 
-  /// Code Generation virtual methods...
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
   // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
   // however much remains to be handled. Return 'true' if no further
   // work is required.
@@ -62,7 +57,8 @@ public:
                              const TargetRegisterClass *RC,
                              unsigned Reg) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 };
 }
 
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d54aa935325c..97c254ce75a5 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -11,12 +11,12 @@
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
 #include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumITs,        "Number of IT blocks inserted");
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index e9e20ddd8783..67e8ec7c5ff2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -51,7 +51,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
   MachineBasicBlock *MBB = Tail->getParent();
   ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
   if (!AFI->hasITBlocks()) {
-    TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+    TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
     return;
   }
 
@@ -65,7 +65,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
     --MBBI;
 
   // Actually replace the tail.
-  TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+  TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
 
   // Fix up IT.
   if (CC != ARMCC::AL) {
@@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
       // Remove offset and remaining explicit predicate operands.
       do MI.RemoveOperand(FrameRegIdx+1);
       while (MI.getNumOperands() > FrameRegIdx+1);
-      MachineInstrBuilder MIB(&MI);
+      MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
       AddDefaultPred(MIB);
       return true;
     }
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 29a87d016227..1a7a4d450cfe 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -16,12 +16,12 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 using namespace llvm;
 
 Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index f18f491f4995..d50f5d972232 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -9,19 +9,21 @@
 
 #define DEBUG_TYPE "t2-reduce-size"
 #include "ARM.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/Function.h"        // To access Function attributes
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
@@ -52,81 +54,79 @@ namespace {
     unsigned PredCC2  : 2;
     unsigned PartFlag : 1; // 16-bit instruction does partial flag update
     unsigned Special  : 1; // Needs to be dealt with specially
+    unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
   };
 
   static const ReduceEntry ReduceTable[] = {
-    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
-    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
-    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
-    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
-    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
-    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
-    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
-    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 1,0 },
-    //FIXME: Disable CMN, as CCodes are backwards from compare expectations
-    //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0,1 },
-    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 1,0 },
-    // FIXME: adr.n immediate offset must be multiple of 4.
-    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 1,0 },
-    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
-    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 1,0 },
-    // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
-    // likely to cause issue in the loop. As a size / performance workaround,
-    // they are not marked as such.
-    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
-    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
-    // FIXME: Do we need the 16-bit 'S' variant?
-    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
-    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
-    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 0,1 },
-    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0,0 },
-    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0,0 },
-    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0,0 },
-    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0,0 },
-    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0,1 },
-    { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0,1 },
-    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0,1 },
-    { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0,1 },
-
-    // FIXME: Clean this up after splitting each Thumb load / store opcode
-    // into multiple ones.
-    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-
-    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 0,1 },
-    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 0,1 },
-    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 0,1 },
-    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
-    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 0,1 },
-    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 0,1 },
+  // Wide,        Narrow1,      Narrow2,     imm1,imm2, lo1, lo2, P/C,PF,S,AM
+  { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,   0,   1,  0,0, 0,0,0 },
+  { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  0,0, 0,1,0 },
+  { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,   1,   0,  0,1, 0,0,0 },
+  { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  2,2, 0,1,0 },
+  { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,   1,   0,  2,0, 0,1,0 },
+  { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
+  { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
+  { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,   0,   1,  0,0, 1,0,0 },
+  //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+  //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,   0,   0,  2,0, 0,1,0 },
+  { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,   0,   1,  0,0, 1,0,0 },
+  // FIXME: adr.n immediate offset must be multiple of 4.
+  //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
+  { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
+  { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
+  { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
+  { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,0,0 },
+  { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 1,1,0 },
+  // FIXME: Do we need the 16-bit 'S' variant?
+  { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
+  { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,   1,   0,  0,0, 0,0,0 },
+  { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2REV,   ARM::tREV,    0,             0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2REV16, ARM::tREV16,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
+  { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,   0,   1,  0,0, 0,0,0 },
+  { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  0,0, 0,0,0 },
+  { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,   1,   0,  0,0, 0,0,0 },
+  { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  2,2, 0,0,0 },
+  { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+
+  // FIXME: Clean this up after splitting each Thumb load / store opcode
+  // into multiple ones.
+  { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+
+  { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
+  { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
+  { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
+  // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+  { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
+  { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
   };
 
   class Thumb2SizeReduce : public MachineFunctionPass {
@@ -147,8 +147,7 @@ namespace {
     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 
-    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
-                             bool IsSelfLoop);
+    bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
 
     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                          bool is2Addr, ARMCC::CondCodes Pred,
@@ -158,30 +157,52 @@ namespace {
                          const ReduceEntry &Entry);
 
     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
-                       const ReduceEntry &Entry, bool LiveCPSR,
-                       MachineInstr *CPSRDef, bool IsSelfLoop);
+                       const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
 
     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     /// instruction.
     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
-                       const ReduceEntry &Entry,
-                       bool LiveCPSR, MachineInstr *CPSRDef,
+                       const ReduceEntry &Entry, bool LiveCPSR,
                        bool IsSelfLoop);
 
     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     /// non-two-address instruction.
     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
-                        const ReduceEntry &Entry,
-                        bool LiveCPSR, MachineInstr *CPSRDef,
+                        const ReduceEntry &Entry, bool LiveCPSR,
                         bool IsSelfLoop);
 
+    /// ReduceMI - Attempt to reduce MI, return true on success.
+    bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+                  bool LiveCPSR, bool IsSelfLoop);
+
     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     bool ReduceMBB(MachineBasicBlock &MBB);
+
+    bool OptimizeSize;
+    bool MinimizeSize;
+
+    // Last instruction to define CPSR in the current block.
+    MachineInstr *CPSRDef;
+    // Was CPSR last defined by a high latency instruction?
+    // When CPSRDef is null, this refers to CPSR defs in predecessors.
+    bool HighLatencyCPSR;
+
+    struct MBBInfo {
+      // The flags leaving this block have high latency.
+      bool HighLatencyCPSR;
+      // Has this block been visited yet?
+      bool Visited;
+
+      MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+    };
+
+    SmallVector<MBBInfo, 8> BlockInfo;
   };
   char Thumb2SizeReduce::ID = 0;
 }
 
 Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+  OptimizeSize = MinimizeSize = false;
   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -196,6 +217,16 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
   return false;
 }
 
+// Check for a likely high-latency flag def.
+static bool isHighLatencyCPSR(MachineInstr *Def) {
+  switch(Def->getOpcode()) {
+  case ARM::FMSTAT:
+  case ARM::tMUL:
+    return true;
+  }
+  return false;
+}
+
 /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
 /// the 's' 16-bit instruction partially update CPSR. Abort the
 /// transformation to avoid adding false dependency on last CPSR setting
@@ -214,20 +245,19 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
 /// In this case it would have been ok to narrow the mul.w to muls since there
 /// are indirect RAW dependency between the muls and the mul.w
 bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
-                                      bool FirstInSelfLoop) {
-  // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
-  if (!STI->avoidCPSRPartialUpdate())
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
+  // Disable the check for -Oz (aka OptimizeForSizeHarder).
+  if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
     return false;
 
-  if (!Def)
+  if (!CPSRDef)
     // If this BB loops back to itself, conservatively avoid narrowing the
     // first instruction that does partial flag update.
-    return FirstInSelfLoop;
+    return HighLatencyCPSR || FirstInSelfLoop;
 
   SmallSet<unsigned, 2> Defs;
-  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = Def->getOperand(i);
+  for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = CPSRDef->getOperand(i);
     if (!MO.isReg() || MO.isUndef() || MO.isUse())
       continue;
     unsigned Reg = MO.getReg();
@@ -245,6 +275,16 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
       return false;
   }
 
+  // If the current CPSR has high latency, try to avoid the false dependency.
+  if (HighLatencyCPSR)
+    return true;
+
+  // tMOVi8 usually doesn't start long dependency chains, and there are a lot
+  // of them, so always shrink them when CPSR doesn't have high latency.
+  if (Use->getOpcode() == ARM::t2MOVi ||
+      Use->getOpcode() == ARM::t2MOVi16)
+    return false;
+
   // No read-after-write dependency. The narrowing will add false dependency.
   return true;
 }
@@ -487,16 +527,15 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef,
-                                bool IsSelfLoop) {
+                                bool LiveCPSR, bool IsSelfLoop) {
   unsigned Opc = MI->getOpcode();
   if (Opc == ARM::t2ADDri) {
     // If the source register is SP, try to reduce to tADDrSPi, otherwise
     // it's a normal reduce.
     if (MI->getOperand(1).getReg() != ARM::SP) {
-      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
         return true;
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     }
     // Try to reduce to tADDrSPi.
     unsigned Imm = MI->getOperand(2).getImm();
@@ -546,12 +585,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       switch (Opc) {
       default: break;
       case ARM::t2ADDSri: {
-        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
           return true;
         // fallthrough
       }
       case ARM::t2ADDSrr:
-        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
       }
     }
     break;
@@ -563,13 +602,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   case ARM::t2UXTB:
   case ARM::t2UXTH:
     if (MI->getOperand(2).getImm() == 0)
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     break;
   case ARM::t2MOVi16:
     // Can convert only 'pure' immediate operands, not immediates obtained as
     // globals' addresses.
     if (MI->getOperand(1).isImm())
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
     break;
   case ARM::t2CMPrr: {
     // Try to reduce to the lo-reg only version first. Why there are two
@@ -578,10 +617,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     // are prioritized, but the table assumes a unique entry for each
     // source insn opcode. So for now, we hack a local entry record to use.
     static const ReduceEntry NarrowEntry =
-      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
-    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
+      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
       return true;
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
   }
   }
   return false;
@@ -590,12 +629,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef,
-                                bool IsSelfLoop) {
+                                bool LiveCPSR, bool IsSelfLoop) {
 
   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;
 
+  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+      STI->avoidMOVsShifterOperand())
+    // Don't issue movs with shifter operand for some CPUs unless we
+    // are optimizing / minimizing for size.
+    return false;
+
   unsigned Reg0 = MI->getOperand(0).getReg();
   unsigned Reg1 = MI->getOperand(1).getReg();
   // t2MUL is "special". The tied source operand is second, not first.
@@ -666,7 +710,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+      canAddPseudoFlagDep(MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
@@ -703,11 +747,16 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                                  const ReduceEntry &Entry,
-                                 bool LiveCPSR, MachineInstr *CPSRDef,
-                                 bool IsSelfLoop) {
+                                 bool LiveCPSR, bool IsSelfLoop) {
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;
 
+  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+      STI->avoidMOVsShifterOperand())
+    // Don't issue movs with shifter operand for some CPUs unless we
+    // are optimizing / minimizing for size.
+    return false;
+
   unsigned Limit = ~0U;
   if (Entry.Imm1Limit)
     Limit = (1 << Entry.Imm1Limit) - 1;
@@ -757,7 +806,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+      canAddPseudoFlagDep(MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
@@ -841,14 +890,57 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
   return LiveCPSR;
 }
 
+bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
+                                bool LiveCPSR, bool IsSelfLoop) {
+  unsigned Opcode = MI->getOpcode();
+  DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+  if (OPI == ReduceOpcodeMap.end())
+    return false;
+  const ReduceEntry &Entry = ReduceTable[OPI->second];
+
+  // Don't attempt normal reductions on "special" cases for now.
+  if (Entry.Special)
+    return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+
+  // Try to transform to a 16-bit two-address instruction.
+  if (Entry.NarrowOpc2 &&
+      ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+    return true;
+
+  // Try to transform to a 16-bit non-two-address instruction.
+  if (Entry.NarrowOpc1 &&
+      ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
+    return true;
+
+  return false;
+}
+
 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
   // Yes, CPSR could be livein.
   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
-  MachineInstr *CPSRDef = 0;
   MachineInstr *BundleMI = 0;
 
+  CPSRDef = 0;
+  HighLatencyCPSR = false;
+
+  // Check predecessors for the latest CPSRDef.
+  bool HasBackEdges = false;
+  for (MachineBasicBlock::pred_iterator
+       I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
+    const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+    if (!PInfo.Visited) {
+      // Since blocks are visited in RPO, this must be a back-edge.
+      HasBackEdges = true;
+      continue;
+    }
+    if (PInfo.HighLatencyCPSR) {
+      HighLatencyCPSR = true;
+      break;
+    }
+  }
+
   // If this BB loops back to itself, conservatively avoid narrowing the
   // first instruction that does partial flag update.
   bool IsSelfLoop = MBB.isSuccessor(&MBB);
@@ -862,43 +954,25 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       BundleMI = MI;
       continue;
     }
+    if (MI->isDebugValue())
+      continue;
 
     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
 
-    unsigned Opcode = MI->getOpcode();
-    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
-    if (OPI != ReduceOpcodeMap.end()) {
-      const ReduceEntry &Entry = ReduceTable[OPI->second];
-      // Ignore "special" cases for now.
-      if (Entry.Special) {
-        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
-          Modified = true;
-          MachineBasicBlock::instr_iterator I = prior(NextMII);
-          MI = &*I;
-        }
-        goto ProcessNext;
-      }
-
-      // Try to transform to a 16-bit two-address instruction.
-      if (Entry.NarrowOpc2 &&
-          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
-        Modified = true;
-        MachineBasicBlock::instr_iterator I = prior(NextMII);
-        MI = &*I;
-        goto ProcessNext;
-      }
-
-      // Try to transform to a 16-bit non-two-address instruction.
-      if (Entry.NarrowOpc1 &&
-          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
-        Modified = true;
-        MachineBasicBlock::instr_iterator I = prior(NextMII);
-        MI = &*I;
-      }
+    // Does NextMII belong to the same bundle as MI?
+    bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
+
+    if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
+      Modified = true;
+      MachineBasicBlock::instr_iterator I = prior(NextMII);
+      MI = &*I;
+      // Removing and reinserting the first instruction in a bundle will break
+      // up the bundle. Fix the bundling if it was broken.
+      if (NextInSameBundle && !NextMII->isBundledWithPred())
+        NextMII->bundleWithPred();
     }
 
-  ProcessNext:
-    if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+    if (!NextInSameBundle && MI->isInsideBundle()) {
       // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
       // marker is only on the BUNDLE instruction. Process the BUNDLE
       // instruction as we finish with the bundled instruction to work around
@@ -915,14 +989,19 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
     if (MI->isCall()) {
       // Calls don't really set CPSR.
       CPSRDef = 0;
+      HighLatencyCPSR = false;
       IsSelfLoop = false;
     } else if (DefCPSR) {
       // This is the last CPSR defining instruction.
       CPSRDef = MI;
+      HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
       IsSelfLoop = false;
     }
   }
 
+  MBBInfo &Info = BlockInfo[MBB.getNumber()];
+  Info.HighLatencyCPSR = HighLatencyCPSR;
+  Info.Visited = true;
   return Modified;
 }
 
@@ -931,9 +1010,23 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
   TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
   STI = &TM.getSubtarget<ARMSubtarget>();
 
+  // Optimizing / minimizing size?
+  AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+  OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::OptimizeForSize);
+  MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::MinSize);
+
+  BlockInfo.clear();
+  BlockInfo.resize(MF.getNumBlockIDs());
+
+  // Visit blocks in reverse post-order so LastCPSRDef is known for all
+  // predecessors.
+  ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
   bool Modified = false;
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-    Modified |= ReduceMBB(*I);
+  for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+       I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+    Modified |= ReduceMBB(**I);
   return Modified;
 }
 
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index a85acaaa1494..02ac493b4215 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,16 +1,13 @@
 add_llvm_library(LLVMTarget
   Mangler.cpp
   Target.cpp
-  TargetInstrInfo.cpp
   TargetIntrinsicInfo.cpp
   TargetJITInfo.cpp
   TargetLibraryInfo.cpp
   TargetLoweringObjectFile.cpp
   TargetMachine.cpp
   TargetMachineC.cpp
-  TargetRegisterInfo.cpp
   TargetSubtargetInfo.cpp
-  TargetTransformImpl.cpp
   )
 
 foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
deleted file mode 100644
index 1f8ca8681c09..000000000000
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SPU.td)
-
-tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv)
-add_public_tablegen_target(CellSPUCommonTableGen)
-
-add_llvm_target(CellSPUCodeGen
-  SPUAsmPrinter.cpp
-  SPUHazardRecognizers.cpp
-  SPUInstrInfo.cpp
-  SPUISelDAGToDAG.cpp
-  SPUISelLowering.cpp
-  SPUFrameLowering.cpp
-  SPUMachineFunction.cpp
-  SPURegisterInfo.cpp
-  SPUSubtarget.cpp
-  SPUTargetMachine.cpp
-  SPUSelectionDAGInfo.cpp
-  SPUNopFiller.cpp
-  )
-
-add_dependencies(LLVMCellSPUCodeGen intrinsics_gen)
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
deleted file mode 100644
index cdb4099ffbca..000000000000
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ /dev/null
@@ -1,449 +0,0 @@
-//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-///--==-- Arithmetic ops intrinsics --==--
-def CellSDKah:
-    RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
-def CellSDKahi:
-    RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
-def CellSDKa:
-    RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
-def CellSDKai:
-    RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
-def CellSDKsfh:
-    RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
-def CellSDKsfhi:
-    RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
-def CellSDKsf:
-    RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
-def CellSDKsfi:
-    RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
-def CellSDKaddx:
-    RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
-def CellSDKcg:
-    RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
-def CellSDKcgx:
-    RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
-def CellSDKsfx:
-    RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
-def CellSDKbg:
-    RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
-def CellSDKbgx:
-    RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
-
-def CellSDKmpy:
-    RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpy $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
-                                                (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyu:
-    RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpyu $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
-                                                 (v8i16 VECREG:$rB)))] >;
-
-def CellSDKmpyi:
-    RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-      "mpyi $rT, $rA, $val", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
-                                                 i16ImmSExt10:$val))]>;
-
-def CellSDKmpyui:
-    RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-      "mpyui $rT, $rA, $val", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
-                                                  i16ImmSExt10:$val))]>;
-
-def CellSDKmpya:
-    RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
-                                                 (v8i16 VECREG:$rB),
-                                                 (v8i16 VECREG:$rC)))]>;
-
-def CellSDKmpyh:
-    RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpyh $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
-                                                 (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpys:
-    RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpys $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
-                                                 (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhh:
-    RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpyhh $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
-                                                  (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhha:
-    RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpyhha $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
-                                                   (v8i16 VECREG:$rB)))]>;
-
-// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
-// as an intrinsic for the time being
-def CellSDKmpyhhu:
-    RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
-                                                   (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhhau:
-    RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
-      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
-                                                    (v8i16 VECREG:$rB)))]>;
-
-def CellSDKand:
-        RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "and\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT),
-                (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKandc:
-        RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "andc\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT),
-                (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKandbi:
-     RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
-       "andbi\t $rT, $rA, $val", BranchResolv,
-       [(set (v16i8 VECREG:$rT),
-             (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKandhi:
-     RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-           "andhi\t $rT, $rA, $val", BranchResolv,
-       [(set (v8i16 VECREG:$rT),
-             (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKandi:
-     RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-           "andi\t $rT, $rA, $val", BranchResolv,
-       [(set (v4i32 VECREG:$rT),
-             (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKor:
-        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "or\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT),
-                (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKorc:
-        RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "addc\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT),
-                (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKorbi:
-     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
-       "orbi\t $rT, $rA, $val", BranchResolv,
-       [(set (v16i8 VECREG:$rT),
-             (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKorhi:
-     RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-           "orhi\t $rT, $rA, $val", BranchResolv,
-       [(set (v8i16 VECREG:$rT),
-             (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKori:
-     RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-           "ori\t $rT, $rA, $val", BranchResolv,
-       [(set (v4i32 VECREG:$rT),
-             (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKxor:
-        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "xor\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT), 
-                (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKxorbi:
-     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
-       "xorbi\t $rT, $rA, $val", BranchResolv,
-       [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKxorhi:
-     RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-       "xorhi\t $rT, $rA, $val", BranchResolv,
-       [(set (v8i16 VECREG:$rT), 
-             (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKxori:
-     RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-           "xori\t $rT, $rA, $val", BranchResolv,
-       [(set (v4i32 VECREG:$rT), 
-             (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKnor:
-        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "nor\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT), 
-                (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKnand:
-        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          "nand\t $rT, $rA, $rB", IntegerOp,
-          [(set (v4i32 VECREG:$rT), 
-                (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-//===----------------------------------------------------------------------===//
-// Shift/rotate intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKshli:
-  Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
-      (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def CellSDKshlqbi:
-  Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
-      (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
-
-def CellSDKshlqii:
-  Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
-      (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def CellSDKshlqby:
-  Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
-      (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
-
-def CellSDKshlqbyi:
-  Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
-      (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-          
-//===----------------------------------------------------------------------===//
-// Branch/compare intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKceq:
-  RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-        "ceq\t $rT, $rA, $rB", BranchResolv,
-        [(set (v4i32 VECREG:$rT), 
-              (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKceqi:
-  RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-        "ceqi\t $rT, $rA, $val", BranchResolv,
-    [(set (v4i32 VECREG:$rT), 
-          (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKceqb:
-  RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-        "ceqb\t $rT, $rA, $rB", BranchResolv,
-        [(set (v16i8 VECREG:$rT), 
-              (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKceqbi:
-  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
-        "ceqbi\t $rT, $rA, $val", BranchResolv,
-    [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKceqh:
-  RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-        "ceqh\t $rT, $rA, $rB", BranchResolv,
-        [(set (v8i16 VECREG:$rT), 
-              (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKceqhi:
-  RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-        "ceqhi\t $rT, $rA, $val", BranchResolv,
-    [(set (v8i16 VECREG:$rT), 
-          (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-def CellSDKcgth:
-  RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "cgth\t $rT, $rA, $rB", BranchResolv,
-        [(set (v8i16 VECREG:$rT),
-              (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKcgthi:
-  RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-    "cgthi\t $rT, $rA, $val", BranchResolv,
-        [(set (v8i16 VECREG:$rT), 
-              (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKcgt:
-  RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "cgt\t $rT, $rA, $rB", BranchResolv,
-        [(set (v4i32 VECREG:$rT), 
-              (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKcgti:
-  RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-    "cgti\t $rT, $rA, $val", BranchResolv,
-        [(set (v4i32 VECREG:$rT), 
-              (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKcgtb:
-  RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "cgtb\t $rT, $rA, $rB", BranchResolv,
-        [(set (v16i8 VECREG:$rT), 
-              (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKcgtbi:
-  RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
-    "cgtbi\t $rT, $rA, $val", BranchResolv,
-        [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKclgth:
-  RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "clgth\t $rT, $rA, $rB", BranchResolv,
-        [(set (v8i16 VECREG:$rT), 
-              (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKclgthi:
-  RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-    "clgthi\t $rT, $rA, $val", BranchResolv,
-        [(set (v8i16 VECREG:$rT), 
-              (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKclgt:
-  RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "clgt\t $rT, $rA, $rB", BranchResolv,
-        [(set (v4i32 VECREG:$rT), 
-              (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKclgti:
-  RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-    "clgti\t $rT, $rA, $val", BranchResolv,
-        [(set (v4i32 VECREG:$rT), 
-              (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKclgtb:
-  RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "clgtb\t $rT, $rA, $rB", BranchResolv,
-    [(set (v16i8 VECREG:$rT),
-          (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKclgtbi:
-  RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
-    "clgtbi\t $rT, $rA, $val", BranchResolv,
-    [(set (v16i8 VECREG:$rT),
-          (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-//===----------------------------------------------------------------------===//
-// Floating-point intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKfa:
-  RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fa\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
-                                                 (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfs:
-  RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fs\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
-                                                 (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfm:
-  RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fm\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
-                                                 (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfceq:
-  RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fceq\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
-                                                   (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcgt:
-  RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fcgt\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
-                                                   (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcmeq:
-  RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fcmeq\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
-                                                    (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcmgt:
-  RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "fcmgt\t $rT, $rA, $rB", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
-                                                    (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfma:
-  RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-    "fma\t $rT, $rA, $rB, $rC", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
-                                                  (v4f32 VECREG:$rB),
-                                                  (v4f32 VECREG:$rC)))]>;
-
-def CellSDKfnms:
-  RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-    "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
-                                                   (v4f32 VECREG:$rB),
-                                                   (v4f32 VECREG:$rC)))]>;
-
-def CellSDKfms:
-  RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-    "fms\t $rT, $rA, $rB, $rC", SPrecFP,
-        [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
-                                                  (v4f32 VECREG:$rB),
-                                                  (v4f32 VECREG:$rC)))]>;
-
-//===----------------------------------------------------------------------===//
-// Double precision floating-point intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKdfa:
-  RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfa\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
-                                                  (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfs:
-  RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfs\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
-                                                  (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfm:
-  RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfm\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
-                                                  (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfma:
-  RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfma\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
-                                                   (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfnma:
-  RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfnma\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
-                                                    (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfnms:
-  RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfnms\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
-                                                    (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfms:
-  RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "dfms\t $rT, $rA, $rB", DPrecFP,
-        [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
-                                                   (v2f64 VECREG:$rB)))]>;
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
deleted file mode 100644
index 277620bf4e59..000000000000
--- a/lib/Target/CellSPU/LLVMBuild.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[common]
-subdirectories = MCTargetDesc TargetInfo
-
-[component_0]
-type = TargetGroup
-name = CellSPU
-parent = Target
-has_asmprinter = 1
-
-[component_1]
-type = Library
-name = CellSPUCodeGen
-parent = CellSPU
-required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
-add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 0027bdbf6ca1..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_llvm_library(LLVMCellSPUDesc
-  SPUMCTargetDesc.cpp
-  SPUMCAsmInfo.cpp
-  )
-
-add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
deleted file mode 100644
index 71e5bbc629ca..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = CellSPUDesc
-parent = CellSPU
-required_libraries = CellSPUInfo MC
-add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/CellSPU/MCTargetDesc/Makefile
deleted file mode 100644
index 10d9a42239ad..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
deleted file mode 100644
index 4bad37eacaf7..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the SPUMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMCAsmInfo.h"
-using namespace llvm;
-
-void SPULinuxMCAsmInfo::anchor() { }
-
-SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
-  IsLittleEndian = false;
-
-  ZeroDirective = "\t.space\t";
-  Data64bitsDirective = "\t.quad\t";
-  AlignmentIsInBytes = false;
-      
-  PCSymbol = ".";
-  CommentString = "#";
-  GlobalPrefix = "";
-  PrivateGlobalPrefix = ".L";
-
-  // Has leb128
-  HasLEB128 = true;
-
-  SupportsDebugInformation = true;
-
-  // Exception handling is not supported on CellSPU (think about it: you only
-  // have 256K for code+data. Would you support exception handling?)
-  ExceptionsType = ExceptionHandling::None;
-
-  // SPU assembly requires ".section" before ".bss" 
-  UsesELFSectionDirectiveForBSS = true;  
-}
-
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
deleted file mode 100644
index f786147b9267..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the SPUMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUTARGETASMINFO_H
-#define SPUTARGETASMINFO_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
-  class Target;
-  
-  class SPULinuxMCAsmInfo : public MCAsmInfo {
-    virtual void anchor();
-  public:
-    explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
-  };
-} // namespace llvm
-
-#endif /* SPUTARGETASMINFO_H */
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
deleted file mode 100644
index 8450e2c6634c..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Cell SPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMCTargetDesc.h"
-#include "SPUMCAsmInfo.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "SPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "SPUGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "SPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createSPUMCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitSPUMCInstrInfo(X);
-  return X;
-}
-
-static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitSPUMCRegisterInfo(X, SPU::R0);
-  return X;
-}
-
-static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                 StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitSPUMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
-}
-
-static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
-  MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
-
-  // Initial state of the frame pointer is R1.
-  MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(SPU::R1, 0);
-  MAI->addInitialFrameState(0, Dst, Src);
-
-  return MAI;
-}
-
-static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                             CodeModel::Model CM,
-                                             CodeGenOpt::Level OL) {
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-  // For the time being, use static relocations, since there's really no
-  // support for PIC yet.
-  X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
-  return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
-                                        createSPUMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
-                                    createCellSPUMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
-                                          createSPUMCSubtargetInfo);
-}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
deleted file mode 100644
index d26449e8908f..000000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides CellSPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUMCTARGETDESC_H
-#define SPUMCTARGETDESC_H
-
-namespace llvm {
-class Target;
-
-extern Target TheCellSPUTarget;
-
-} // End llvm namespace
-
-// Define symbolic names for Cell registers.  This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "SPUGenRegisterInfo.inc"
-
-// Defines symbolic names for the SPU instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "SPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "SPUGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
deleted file mode 100644
index d7a8247f5702..000000000000
--- a/lib/Target/CellSPU/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMCellSPUCodeGen
-TARGET = SPU
-BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \
-		SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
-		SPUGenDAGISel.inc \
-		SPUGenSubtargetInfo.inc SPUGenCallingConv.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
deleted file mode 100644
index 3bce9609bfef..000000000000
--- a/lib/Target/CellSPU/README.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
-
-This code was contributed by a team from the Computer Systems Research
-Department in The Aerospace Corporation:
-
-- Scott Michel (head bottle washer and much of the non-floating point
-  instructions)
-- Mark Thomas (floating point instructions)
-- Michael AuYeung (intrinsics)
-- Chandler Carruth (LLVM expertise)
-- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
-
-Some minor fixes added by Kalle Raiskila.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
-OTHERWISE.  IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES
-OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING
-OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT
-LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR
-REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL,
-OR PUNITIVE  DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR
-SUCH DAMAGES ARE FORESEEABLE.
-
----------------------------------------------------------------------------
---WARNING--:
---WARNING--: The CellSPU work is work-in-progress and "alpha" quality code.
---WARNING--:
-
-If you are brave enough to try this code or help to hack on it, be sure
-to add 'spu' to configure's --enable-targets option, e.g.:
-
-        ./configure <your_configure_flags_here> \
-           --enable-targets=x86,x86_64,powerpc,spu
-
----------------------------------------------------------------------------
-
-TODO:
-* In commit r142152 vector legalization was set to element promotion per
-  default. This breaks half vectors (e.g. v2i32) badly as they get element
-  promoted to much slower types (v2i64).
-
-* Many CellSPU specific codegen tests only grep & count the number of 
-  instructions, not checking their place with FileCheck. There have also
-  been some commits that change the CellSPU checks, some of which might
-  have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU
-  assembly. (especially since about the time of r142152)  
-
-* Some of the i64 math have huge tablegen rules, which sometime cause
-  tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics 
-  should probably be done with libraries.
-
-* Create a machine pass for performing dual-pipeline scheduling specifically
-  for CellSPU, and insert branch prediction instructions as needed.
-
-* i32 instructions:
-
-  * i32 division (work-in-progress)
-
-* i64 support (see i64operations.c test harness):
-
-  * shifts and comparison operators: done
-  * sign and zero extension: done
-  * addition: done
-  * subtraction: needed
-  * multiplication: done
-
-* i128 support:
-
-  * zero extension, any extension: done
-  * sign extension: done
-  * arithmetic operators (add, sub, mul, div): needed
-  * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
-
-    * or: done
-
-* f64 support
-
-  * Comparison operators:
-    SETOEQ              unimplemented
-    SETOGT              unimplemented
-    SETOGE              unimplemented
-    SETOLT              unimplemented
-    SETOLE              unimplemented
-    SETONE              unimplemented
-    SETO                done (lowered)
-    SETUO               done (lowered)
-    SETUEQ              unimplemented
-    SETUGT              unimplemented
-    SETUGE              unimplemented
-    SETULT              unimplemented
-    SETULE              unimplemented
-    SETUNE              unimplemented
-
-* LLVM vector suport
-
-  * VSETCC needs to be implemented. It's pretty straightforward to code, but
-    needs implementation.
-
-* Intrinsics
-
-  * spu.h instrinsics added but not tested. Need to have an operational
-    llvm-spu-gcc in order to write a unit test harness.
-
-===-------------------------------------------------------------------------===
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
deleted file mode 100644
index c660131706cb..000000000000
--- a/lib/Target/CellSPU/SPU.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Cell SPU back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_IBMCELLSPU_H
-#define LLVM_TARGET_IBMCELLSPU_H
-
-#include "MCTargetDesc/SPUMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  class SPUTargetMachine;
-  class FunctionPass;
-  class formatted_raw_ostream;
-
-  FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
-  FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
-
-}
-
-#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
deleted file mode 100644
index e835b9cac8e1..000000000000
--- a/lib/Target/CellSPU/SPU.td
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the top level entry point for the STI Cell SPU target machine.
-//
-//===----------------------------------------------------------------------===//
-
-// Get the target-independent interfaces which we are implementing.
-//
-include "llvm/Target/Target.td"
-
-// Holder of code fragments (you'd think this'd already be in
-// a td file somewhere... :-)
-
-class CodeFrag<dag frag> {
-  dag Fragment = frag;
-}
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "SPURegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction formats, instructions
-//===----------------------------------------------------------------------===//
-
-include "SPUNodes.td"
-include "SPUOperands.td"
-include "SPUSchedule.td"
-include "SPUInstrFormats.td"
-include "SPUInstrInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget features:
-//===----------------------------------------------------------------------===//
-
-def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
-def LargeMemFeature:
-  SubtargetFeature<"large_mem","UseLargeMem", "true",
-                   "Use large (>256) LSA memory addressing [default = false]">;
-
-def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
-
-//===----------------------------------------------------------------------===//
-// Calling convention:
-//===----------------------------------------------------------------------===//
-
-include "SPUCallingConv.td"
-
-// Target:
-
-def SPUInstrInfo : InstrInfo {
-  let isLittleEndianEncoding = 1;
-}
-
-def SPU : Target {
-  let InstructionSet = SPUInstrInfo;
-}
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
deleted file mode 100644
index e051e047333a..000000000000
--- a/lib/Target/CellSPU/SPU128InstrInfo.td
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===//
-//
-//                     Cell SPU 128-bit operations
-//
-//===----------------------------------------------------------------------===//
-
-// zext 32->128: Zero extend 32-bit to 128-bit
-def : Pat<(i128 (zext R32C:$rSrc)),
-          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
-
-// zext 64->128: Zero extend 64-bit to 128-bit
-def : Pat<(i128 (zext R64C:$rSrc)),
-          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
-
-// zext 16->128: Zero extend 16-bit to 128-bit
-def : Pat<(i128 (zext R16C:$rSrc)),
-          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
-
-// zext 8->128: Zero extend 8-bit to 128-bit
-def : Pat<(i128 (zext R8C:$rSrc)),
-          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
-
-// anyext 32->128: Zero extend 32-bit to 128-bit
-def : Pat<(i128 (anyext R32C:$rSrc)),
-          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
-
-// anyext 64->128: Zero extend 64-bit to 128-bit
-def : Pat<(i128 (anyext R64C:$rSrc)),
-          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
-
-// anyext 16->128: Zero extend 16-bit to 128-bit
-def : Pat<(i128 (anyext R16C:$rSrc)),
-          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
-
-// anyext 8->128: Zero extend 8-bit to 128-bit
-def : Pat<(i128 (anyext R8C:$rSrc)),
-          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
-
-// Shift left
-def : Pat<(shl GPRC:$rA, R32C:$rB),
-          (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
deleted file mode 100644
index bea33b5362d2..000000000000
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ /dev/null
@@ -1,408 +0,0 @@
-//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===//
-//
-//                     Cell SPU 64-bit operations
-//
-//===----------------------------------------------------------------------===//
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// 64-bit comparisons:
-//
-// 1. The instruction sequences for vector vice scalar differ by a
-//    constant. In the scalar case, we're only interested in the
-//    top two 32-bit slots, whereas we're interested in an exact
-//    all-four-slot match in the vector case.
-//
-// 2. There are no "immediate" forms, since loading 64-bit constants
-//    could be a constant pool load.
-//
-// 3. i64 setcc results are i32, which are subsequently converted to a FSM
-//    mask when used in a select pattern.
-//
-// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
-//    [Note: this may be moot, since gb produces v4i32 or r32.]
-//
-// 5. The code sequences for r64 and v2i64 are probably overly conservative,
-//    compared to the code that gcc produces.
-//
-// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// selb instruction definition for i64. Note that the selection mask is
-// a vector, produced by various forms of FSM:
-def SELBr64_cond:
-  SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
-           [/* no pattern */]>;
-
-// The generic i64 select pattern, which assumes that the comparison result
-// is in a 32-bit register that contains a select mask pattern (i.e., gather
-// bits result):
-
-def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
-          (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
-
-// select the negative condition:
-class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
-  Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
-      (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
-
-// setcc the negative condition:
-class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
-  Pat<(cond R64C:$rA, R64C:$rB),
-      (XORIr32 compare.Fragment, -1)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// The i64 seteq fragment that does the scalar->vector conversion and
-// comparison:
-def CEQr64compare:
-    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
-                                           (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
-
-// The i64 seteq fragment that does the vector comparison
-def CEQv2i64compare:
-    CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
-
-// i64 seteq (equality): the setcc result is i32, which is converted to a
-// vector FSM mask when used in a select pattern.
-//
-// v2i64 seteq (equality): the setcc result is v4i32
-multiclass CompareEqual64 {
-  // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
-  def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
-
-  // SELB mask from FSM:
-  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                               (FSMv4i32 CEQr64compare.Fragment), R32C))>;
-  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                               (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
-}
-
-defm I64EQ: CompareEqual64;
-
-def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
-def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
-
-// i64 setne:
-def : I64SETCCNegCond<setne, I64EQr64>;
-def : I64SELECTNegCond<setne, I64EQr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setugt/setule:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CLGTr64ugt:
-    CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
-                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CLGTr64eq:
-    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
-                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-    
-def CLGTr64compare:
-    CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
-                        (XSWDv2i64 CLGTr64ugt.Fragment),
-                        CLGTr64eq.Fragment)>;
-
-def CLGTv2i64ugt:
-    CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CLGTv2i64eq:
-    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
-    
-def CLGTv2i64compare:
-    CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
-                        (XSWDv2i64 CLGTr64ugt.Fragment),
-                        CLGTv2i64eq.Fragment)>;
-
-multiclass CompareLogicalGreaterThan64 {
-  // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
-  def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
-
-  // SELB mask from FSM:
-  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                               (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
-  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                               (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
-}
-
-defm I64LGT: CompareLogicalGreaterThan64;
-
-def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
-//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-//          I64LGTv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setule, I64LGTr64>;
-def : I64SELECTNegCond<setule, I64LGTr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setuge/setult:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CLGEr64compare:
-    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
-                                          CLGTr64eq.Fragment)), 0xb)>;
-
-def CLGEv2i64compare:
-    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
-                                          CLGTv2i64eq.Fragment)), 0xf)>;
-
-multiclass CompareLogicalGreaterEqual64 {
-  // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
-  def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
-
-  // SELB mask from FSM:
-  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                           (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
-  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                           (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
-}
-
-defm I64LGE: CompareLogicalGreaterEqual64;
-
-def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
-def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
-          I64LGEv2i64.Fragment>;
-                  
-
-// i64 setult:
-def : I64SETCCNegCond<setult, I64LGEr64>;
-def : I64SELECTNegCond<setult, I64LGEr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setgt/setle:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CGTr64sgt:
-    CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
-                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CGTr64eq:
-    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
-                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-    
-def CGTr64compare:
-    CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
-                        (XSWDv2i64 CGTr64sgt.Fragment),
-                        CGTr64eq.Fragment)>;
-
-def CGTv2i64sgt:
-    CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CGTv2i64eq:
-    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
-    
-def CGTv2i64compare:
-    CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
-                        (XSWDv2i64 CGTr64sgt.Fragment),
-                        CGTv2i64eq.Fragment)>;
-
-multiclass CompareGreaterThan64 {
-  // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
-  def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
-
-  // SELB mask from FSM:
-  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                             (FSMv4i32 CGTr64compare.Fragment), R32C))>;
-  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
-                               (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
-}
-
-defm I64GT: CompareLogicalGreaterThan64;
-
-def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
-//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-//                  I64GTv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setle, I64GTr64>;
-def : I64SELECTNegCond<setle, I64GTr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setge/setlt:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-    
-def CGEr64compare:
-    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
-                                          CGTr64eq.Fragment)), 0xb)>;
-
-def CGEv2i64compare:
-    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
-                                          CGTv2i64eq.Fragment)), 0xf)>;
-
-multiclass CompareGreaterEqual64 {
-  // Plain old comparison, converts back to i32 scalar
-  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
-  def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
-
-  // SELB mask from FSM:
-  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
-  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
-}
-
-defm I64GE: CompareGreaterEqual64;
-
-def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
-def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
-          I64GEv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setlt, I64GEr64>;
-def : I64SELECTNegCond<setlt, I64GEr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 add
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_add_cg<dag lhs, dag rhs>:
-    CodeFrag<(CGv4i32 lhs, rhs)>;
-
-class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
-    CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
-
-class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
-    v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
-
-def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-           (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
-                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
-                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
-                    (v4i32 VECREG:$rCGmask)),
-           v2i64_add<(v2i64 VECREG:$rA),
-                     (v2i64 VECREG:$rB),
-                     (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 subtraction
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
-
-class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
-    CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
-
-def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-           (COPY_TO_REGCLASS 
-               v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
-                         (COPY_TO_REGCLASS R64C:$rB, VECREG),
-                         v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
-                                      (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
-                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
-                    (v4i32 VECREG:$rCGmask)),
-           v2i64_sub<(v2i64 VECREG:$rA),
-                     (v2i64 VECREG:$rB),
-                     v2i64_sub_bg<(v2i64 VECREG:$rA),
-                                  (v2i64 VECREG:$rB)>.Fragment,
-                     (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 multiply
-//
-// Note: i64 multiply is simply the vector->scalar conversion of the
-// full-on v2i64 multiply, since the entire vector has to be manipulated
-// anyway.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_mul_ahi64<dag rA> :
-    CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
-
-class v2i64_mul_bhi64<dag rB> :
-    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
-
-class v2i64_mul_alo64<dag rB> :
-    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
-
-class v2i64_mul_blo64<dag rB> :
-    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
-
-class v2i64_mul_ashlq2<dag rA>:
-    CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
-
-class v2i64_mul_ashlq4<dag rA>:
-    CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
-
-class v2i64_mul_bshlq2<dag rB> :
-    CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
-
-class v2i64_mul_bshlq4<dag rB> :
-    CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
-
-class v2i64_highprod<dag rA, dag rB>:
-    CodeFrag<(Av4i32
-                (Av4i32
-                  (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
-                             v2i64_mul_ahi64<rA>.Fragment),
-                  (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
-                             v2i64_mul_bshlq4<rB>.Fragment)),
-                (Av4i32
-                  (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
-                             v2i64_mul_ashlq4<rA>.Fragment),
-                  (Av4i32
-                      (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
-                                 v2i64_mul_bhi64<rB>.Fragment),
-                    (Av4i32
-                      (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
-                                 v2i64_mul_bhi64<rB>.Fragment),
-                      (Av4i32
-                        (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
-                                   v2i64_mul_bshlq2<rB>.Fragment),
-                        (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
-                                   v2i64_mul_bshlq2<rB>.Fragment))))))>;
-
-class v2i64_mul_a3_b3<dag rA, dag rB>:
-    CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
-                        v2i64_mul_blo64<rB>.Fragment)>;
-
-class v2i64_mul_a2_b3<dag rA, dag rB>:
-    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
-                          (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
-                                       v2i64_mul_bshlq2<rB>.Fragment), 0x2),
-                        (ILv4i32 0),
-                        (FSMBIv4i32 0xc3c3))>;
-
-class v2i64_mul_a3_b2<dag rA, dag rB>:
-    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
-                          (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
-                                       v2i64_mul_ashlq2<rA>.Fragment), 0x2),
-                        (ILv4i32 0),
-                        (FSMBIv4i32 0xc3c3))>;
-
-class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
-    v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
-                        v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
-              v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
-
-class v2i64_mul<dag rA, dag rB, dag rCGmask>:
-    v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
-              (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
-                         (ILv4i32 0),
-                         (FSMBIv4i32 0x0f0f)),
-              rCGmask>;
-
-def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
-          (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
-                                 (COPY_TO_REGCLASS R64C:$rB, VECREG),
-                                 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
-                    (v4i32 VECREG:$rCGmask)),
-          v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
-                    (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// f64 comparisons
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// selb instruction definition for i64. Note that the selection mask is
-// a vector, produced by various forms of FSM:
-def SELBf64_cond:
-   SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
-            [(set R64FP:$rT,
-                  (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
deleted file mode 100644
index 3396e8b1ef39..000000000000
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ /dev/null
@@ -1,333 +0,0 @@
-//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to Cell SPU assembly language. This printer
-// is the output mechanism used by `llc'.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asmprinter"
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class SPUAsmPrinter : public AsmPrinter {
-  public:
-    explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
-      AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "STI CBEA SPU Assembly Printer";
-    }
-
-    /// printInstruction - This method is automatically generated by tablegen
-    /// from the instruction set description.
-    void printInstruction(const MachineInstr *MI, raw_ostream &OS);
-    static const char *getRegisterName(unsigned RegNo);
-
-
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    void printOp(const MachineOperand &MO, raw_ostream &OS);
-
-    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      if (MO.isReg()) {
-        O << getRegisterName(MO.getReg());
-      } else if (MO.isImm()) {
-        O << MO.getImm();
-      } else {
-        printOp(MO, O);
-      }
-    }
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O);
-
-
-    void
-    printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      unsigned int value = MI->getOperand(OpNo).getImm();
-      assert(value < (1 << 8) && "Invalid u7 argument");
-      O << value;
-    }
-
-    void
-    printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      char value = MI->getOperand(OpNo).getImm();
-      O << (int) value;
-      O << "(";
-      printOperand(MI, OpNo+1, O);
-      O << ")";
-    }
-
-    void
-    printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      O << (short) MI->getOperand(OpNo).getImm();
-    }
-
-    void
-    printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      O << (unsigned short)MI->getOperand(OpNo).getImm();
-    }
-
-    void
-    printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // When used as the base register, r0 reads constant zero rather than
-      // the value contained in the register.  For this reason, the darwin
-      // assembler requires that we print r0 as 0 (no r) when used as the base.
-      const MachineOperand &MO = MI->getOperand(OpNo);
-      O << getRegisterName(MO.getReg()) << ", ";
-      printOperand(MI, OpNo+1, O);
-    }
-
-    void
-    printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      unsigned int value = MI->getOperand(OpNo).getImm();
-      assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
-      O << value;
-    }
-
-    void
-    printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      short value = MI->getOperand(OpNo).getImm();
-      assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
-             && "Invalid s10 argument");
-      O << value;
-    }
-
-    void
-    printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      short value = MI->getOperand(OpNo).getImm();
-      assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
-      O << value;
-    }
-
-    void
-    printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      assert(MI->getOperand(OpNo).isImm() &&
-             "printDFormAddr first operand is not immediate");
-      int64_t value = int64_t(MI->getOperand(OpNo).getImm());
-      int16_t value16 = int16_t(value);
-      assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
-             && "Invalid dform s10 offset argument");
-      O << (value16 & ~0xf) << "(";
-      printOperand(MI, OpNo+1, O);
-      O << ")";
-    }
-
-    void
-    printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
-    {
-      /* Note: operand 1 is an offset or symbol name. */
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-        if (MI->getOperand(OpNo+1).isImm()) {
-          int displ = int(MI->getOperand(OpNo+1).getImm());
-          if (displ > 0)
-            O << "+" << displ;
-          else if (displ < 0)
-            O << displ;
-        }
-      }
-    }
-
-    void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      // Used to generate a ".-<target>", but it turns out that the assembler
-      // really wants the target.
-      //
-      // N.B.: This operand is used for call targets. Branch hints are another
-      // animal entirely.
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-        O << "@h";
-      }
-    }
-
-    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        printS16ImmOperand(MI, OpNo, O);
-      } else {
-        printOp(MI->getOperand(OpNo), O);
-        O << "@l";
-      }
-    }
-
-    /// Print local store address
-    void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
-      printOp(MI->getOperand(OpNo), O);
-    }
-
-    void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
-                          raw_ostream &O) {
-      if (MI->getOperand(OpNo).isImm()) {
-        int value = (int) MI->getOperand(OpNo).getImm();
-        assert((value >= 0 && value < 16)
-               && "Invalid negated immediate rotate 7-bit argument");
-        O << -value;
-      } else {
-        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
-      }
-    }
-
-    void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
-      assert(MI->getOperand(OpNo).isImm() &&
-             "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
-      int value = (int) MI->getOperand(OpNo).getImm();
-      assert((value >= 0 && value <= 32)
-             && "Invalid negated immediate rotate 7-bit argument");
-      O << -value;
-    }
-  };
-} // end of anonymous namespace
-
-// Include the auto-generated portion of the assembly writer
-#include "SPUGenAsmWriter.inc"
-
-void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
-  switch (MO.getType()) {
-  case MachineOperand::MO_Immediate:
-    report_fatal_error("printOp() does not handle immediate values");
-
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-  case MachineOperand::MO_ExternalSymbol:
-    // Computing the address of an external symbol, not calling it.
-    if (TM.getRelocationModel() != Reloc::Static) {
-      O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
-        << "$non_lazy_ptr";
-      return;
-    }
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    return;
-  case MachineOperand::MO_GlobalAddress:
-    // External or weakly linked global variables need non-lazily-resolved
-    // stubs
-    if (TM.getRelocationModel() != Reloc::Static) {
-      const GlobalValue *GV = MO.getGlobal();
-      if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
-            GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
-        O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        return;
-      }
-    }
-    O << *Mang->getSymbol(MO.getGlobal());
-    return;
-  case MachineOperand::MO_MCSymbol:
-    O << *(MO.getMCSymbol());
-    return;
-  default:
-    O << "<unknown operand type: " << MO.getType() << ">";
-    return;
-  }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                    unsigned AsmVariant,
-                                    const char *ExtraCode, raw_ostream &O) {
-  // Does this asm operand have a single letter operand modifier?
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default:
-      // See if this is a generic print operand
-      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
-    case 'L': // Write second word of DImode reference.
-      // Verify that this operand has two consecutive registers.
-      if (!MI->getOperand(OpNo).isReg() ||
-          OpNo+1 == MI->getNumOperands() ||
-          !MI->getOperand(OpNo+1).isReg())
-        return true;
-      ++OpNo;   // Return the high-part.
-      break;
-    }
-  }
-
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNo, unsigned AsmVariant,
-                                          const char *ExtraCode,
-                                          raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-  printMemRegReg(MI, OpNo, O);
-  return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
-  RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
-}
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
deleted file mode 100644
index 9bc6be79860b..000000000000
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the STI Cell SPU architecture.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-
-// Return-value convention for Cell SPU: return value to be passed in reg 3-74
-def RetCC_SPU : CallingConv<[
-  CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
-  CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
-                 R12, R13, R14, R15, R16, R17, R18, R19, R20,
-                 R21, R22, R23, R24, R25, R26, R27, R28, R29,
-                 R30, R31, R32, R33, R34, R35, R36, R37, R38,
-                 R39, R40, R41, R42, R43, R44, R45, R46, R47,
-                 R48, R49, R50, R51, R52, R53, R54, R55, R56,
-                 R57, R58, R59, R60, R61, R62, R63, R64, R65,
-                 R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
-]>;
-
-
-//===----------------------------------------------------------------------===//
-// CellSPU Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CCC_SPU : CallingConv<[
-  CCIfType<[i8, i16, i32, i64, i128, f32, f64, 
-            v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
-            CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
-                           R12, R13, R14, R15, R16, R17, R18, R19, R20,
-                           R21, R22, R23, R24, R25, R26, R27, R28, R29,
-                           R30, R31, R32, R33, R34, R35, R36, R37, R38,
-                           R39, R40, R41, R42, R43, R44, R45, R46, R47,
-                           R48, R49, R50, R51, R52, R53, R54, R55, R56,
-                           R57, R58, R59, R60, R61, R62, R63, R64, R65,
-                           R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
-  // Integer/FP values get stored in stack slots that are 8 bytes in size and
-  // 8-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-  
-  // Vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-              CCAssignToStack<16, 16>>
-]>;
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
deleted file mode 100644
index f01199515a11..000000000000
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUFrameLowering.h"
-#include "SPU.h"
-#include "SPUInstrBuilder.h"
-#include "SPUInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// SPUFrameLowering:
-//===----------------------------------------------------------------------===//
-
-SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
-  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
-    Subtarget(sti) {
-  LR[0].first = SPU::R0;
-  LR[0].second = 16;
-}
-
-
-//--------------------------------------------------------------------------
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register.  This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  return MFI->getStackSize() &&
-    (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-     MFI->hasVarSizedObjects());
-}
-
-
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  // Get the number of bytes to allocate from the FrameInfo
-  unsigned FrameSize = MFI->getStackSize();
-
-  // Get the alignments provided by the target, and the maximum alignment
-  // (if any) of the fixed frame objects.
-  unsigned TargetAlign = getStackAlignment();
-  unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
-  assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
-  unsigned AlignMask = Align - 1;
-
-  // Get the maximum call frame size of all the calls.
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
-  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
-  // that allocations will be aligned.
-  if (MFI->hasVarSizedObjects())
-    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
-  // Update maximum call frame size.
-  MFI->setMaxCallFrameSize(maxCallFrameSize);
-
-  // Include call frame size in total.
-  FrameSize += maxCallFrameSize;
-
-  // Make sure the frame is aligned.
-  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
-  // Update frame info.
-  MFI->setStackSize(FrameSize);
-}
-
-void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const SPUInstrInfo &TII =
-    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
-  MachineModuleInfo &MMI = MF.getMMI();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Prepare for debug frame info.
-  bool hasDebugInfo = MMI.hasDebugInfo();
-  MCSymbol *FrameLabel = 0;
-
-  // Move MBBI back to the beginning of the function.
-  MBBI = MBB.begin();
-
-  // Work out frame sizes.
-  determineFrameLayout(MF);
-  int FrameSize = MFI->getStackSize();
-
-  assert((FrameSize & 0xf) == 0
-         && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
-
-  // the "empty" frame size is 16 - just the register scavenger spill slot
-  if (FrameSize > 16 || MFI->adjustsStack()) {
-    FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
-    if (hasDebugInfo) {
-      // Mark effective beginning of when frame pointer becomes valid.
-      FrameLabel = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
-    }
-
-    // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
-    // for the ABI
-    BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
-      .addReg(SPU::R1);
-    if (isInt<10>(FrameSize)) {
-      // Spill $sp to adjusted $sp
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
-        .addReg(SPU::R1);
-      // Adjust $sp by required amout
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
-        .addImm(FrameSize);
-    } else if (isInt<16>(FrameSize)) {
-      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
-      // $r2 to adjust $sp:
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
-        .addImm(-16)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
-        .addImm(FrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
-        .addReg(SPU::R2)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
-        .addReg(SPU::R1)
-        .addReg(SPU::R2);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
-        .addReg(SPU::R2)
-        .addImm(16);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
-        .addReg(SPU::R2)
-        .addReg(SPU::R1);
-    } else {
-      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
-    }
-
-    if (hasDebugInfo) {
-      std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
-      // Show update of SP.
-      MachineLocation SPDst(MachineLocation::VirtualFP);
-      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
-      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
-      // Add callee saved registers to move list.
-      const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-      for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
-        int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-        unsigned Reg = CSI[I].getReg();
-        if (Reg == SPU::R0) continue;
-        MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
-        MachineLocation CSSrc(Reg);
-        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
-      }
-
-      // Mark effective beginning of when frame pointer is ready.
-      MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
-
-      MachineLocation FPDst(SPU::R1);
-      MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
-    }
-  }
-}
-
-void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
-                                MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  const SPUInstrInfo &TII =
-    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  int FrameSize = MFI->getStackSize();
-  int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  assert(MBBI->getOpcode() == SPU::RET &&
-         "Can only insert epilog into returning blocks");
-  assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
-
-  // the "empty" frame size is 16 - just the register scavenger spill slot
-  if (FrameSize > 16 || MFI->adjustsStack()) {
-    FrameSize = FrameSize + SPUFrameLowering::minStackSize();
-    if (isInt<10>(FrameSize + LinkSlotOffset)) {
-      // Reload $lr, adjust $sp by required amount
-      // Note: We do this to slightly improve dual issue -- not by much, but it
-      // is an opportunity for dual issue.
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
-        .addImm(FrameSize + LinkSlotOffset)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
-        .addReg(SPU::R1)
-        .addImm(FrameSize);
-    } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
-      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
-      // $r2 to adjust $sp:
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
-        .addImm(16)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
-        .addImm(FrameSize);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
-        .addReg(SPU::R1)
-        .addReg(SPU::R2);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
-        .addImm(16)
-        .addReg(SPU::R1);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
-        addReg(SPU::R2)
-        .addImm(16);
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
-        .addReg(SPU::R2)
-        .addReg(SPU::R1);
-    } else {
-      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
-    }
-  }
-}
-
-void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                        RegScavenger *RS) const{
-  // Mark LR and SP unused, since the prolog spills them to stack and
-  // we don't want anyone else to spill them for us.
-  //
-  // Also, unless R2 is really used someday, don't spill it automatically.
-  MF.getRegInfo().setPhysRegUnused(SPU::R0);
-  MF.getRegInfo().setPhysRegUnused(SPU::R1);
-  MF.getRegInfo().setPhysRegUnused(SPU::R2);
-
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetRegisterClass *RC = &SPU::R32CRegClass;
-  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                     RC->getAlignment(),
-                                                     false));
-}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
deleted file mode 100644
index 11c52818dd9c..000000000000
--- a/lib/Target/CellSPU/SPUFrameLowering.h
+++ /dev/null
@@ -1,80 +0,0 @@
-//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains CellSPU frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_FRAMEINFO_H
-#define SPU_FRAMEINFO_H
-
-#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  class SPUSubtarget;
-
-  class SPUFrameLowering: public TargetFrameLowering {
-    const SPUSubtarget &Subtarget;
-    std::pair<unsigned, int> LR[1];
-
-  public:
-    SPUFrameLowering(const SPUSubtarget &sti);
-
-    //! Determine the frame's layour
-    void determineFrameLayout(MachineFunction &MF) const;
-
-    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-    /// the function.
-    void emitPrologue(MachineFunction &MF) const;
-    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
-    //! Prediate: Target has dedicated frame pointer
-    bool hasFP(const MachineFunction &MF) const;
-
-    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                              RegScavenger *RS = NULL) const;
-
-    //! Return a function's saved spill slots
-    /*!
-      For CellSPU, a function's saved spill slots is just the link register.
-     */
-    const std::pair<unsigned, int> *
-    getCalleeSaveSpillSlots(unsigned &NumEntries) const;
-
-    //! Stack slot size (16 bytes)
-    static int stackSlotSize() {
-      return 16;
-    }
-    //! Maximum frame offset representable by a signed 10-bit integer
-    /*!
-      This is the maximum frame offset that can be expressed as a 10-bit
-      integer, used in D-form addresses.
-     */
-    static int maxFrameOffset() {
-      return ((1 << 9) - 1) * stackSlotSize();
-    }
-    //! Minimum frame offset representable by a signed 10-bit integer
-    static int minFrameOffset() {
-      return -(1 << 9) * stackSlotSize();
-    }
-    //! Minimum frame size (enough to spill LR + SP)
-    static int minStackSize() {
-      return (2 * stackSlotSize());
-    }
-    //! Convert frame index to stack offset
-    static int FItoStackOffset(int frame_index) {
-      return frame_index * stackSlotSize();
-    }
-  };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
deleted file mode 100644
index 67a83f16a649..000000000000
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements hazard recognizers for scheduling on Cell SPU
-// processors.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sched"
-
-#include "SPUHazardRecognizers.h"
-#include "SPU.h"
-#include "SPUInstrInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Cell SPU hazard recognizer
-//
-// This is the pipeline hazard recognizer for the Cell SPU processor. It does
-// very little right now.
-//===----------------------------------------------------------------------===//
-
-/// Return the pipeline hazard type encountered or generated by this
-/// instruction. Currently returns NoHazard.
-///
-/// \return NoHazard
-ScheduleHazardRecognizer::HazardType
-SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
-{
-  // Initial thoughts on how to do this, but this code cannot work unless the
-  // function's prolog and epilog code are also being scheduled so that we can
-  // accurately determine which pipeline is being scheduled.
-#if 0
-  assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
-
-  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
-  ScheduleHazardRecognizer::HazardType retval = NoHazard;
-  bool mustBeOdd = false;
-
-  switch (Node->getOpcode()) {
-  case SPU::LQDv16i8:
-  case SPU::LQDv8i16:
-  case SPU::LQDv4i32:
-  case SPU::LQDv4f32:
-  case SPU::LQDv2f64:
-  case SPU::LQDr128:
-  case SPU::LQDr64:
-  case SPU::LQDr32:
-  case SPU::LQDr16:
-  case SPU::LQAv16i8:
-  case SPU::LQAv8i16:
-  case SPU::LQAv4i32:
-  case SPU::LQAv4f32:
-  case SPU::LQAv2f64:
-  case SPU::LQAr128:
-  case SPU::LQAr64:
-  case SPU::LQAr32:
-  case SPU::LQXv4i32:
-  case SPU::LQXr128:
-  case SPU::LQXr64:
-  case SPU::LQXr32:
-  case SPU::LQXr16:
-  case SPU::STQDv16i8:
-  case SPU::STQDv8i16:
-  case SPU::STQDv4i32:
-  case SPU::STQDv4f32:
-  case SPU::STQDv2f64:
-  case SPU::STQDr128:
-  case SPU::STQDr64:
-  case SPU::STQDr32:
-  case SPU::STQDr16:
-  case SPU::STQDr8:
-  case SPU::STQAv16i8:
-  case SPU::STQAv8i16:
-  case SPU::STQAv4i32:
-  case SPU::STQAv4f32:
-  case SPU::STQAv2f64:
-  case SPU::STQAr128:
-  case SPU::STQAr64:
-  case SPU::STQAr32:
-  case SPU::STQAr16:
-  case SPU::STQAr8:
-  case SPU::STQXv16i8:
-  case SPU::STQXv8i16:
-  case SPU::STQXv4i32:
-  case SPU::STQXv4f32:
-  case SPU::STQXv2f64:
-  case SPU::STQXr128:
-  case SPU::STQXr64:
-  case SPU::STQXr32:
-  case SPU::STQXr16:
-  case SPU::STQXr8:
-  case SPU::RET:
-    mustBeOdd = true;
-    break;
-  default:
-    // Assume that this instruction can be on the even pipe
-    break;
-  }
-
-  if (mustBeOdd && !EvenOdd)
-    retval = Hazard;
-
-  DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
-               << retval << "\n");
-  EvenOdd ^= 1;
-  return retval;
-#else
-  return NoHazard;
-#endif
-}
-
-void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
-{
-}
-
-void SPUHazardRecognizer::AdvanceCycle()
-{
-  DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
-}
-
-void SPUHazardRecognizer::EmitNoop()
-{
-  AdvanceCycle();
-}
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
deleted file mode 100644
index 30acaeaa36fb..000000000000
--- a/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines hazard recognizers for scheduling on the Cell SPU
-// processor.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUHAZRECS_H
-#define SPUHAZRECS_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-
-namespace llvm {
-
-class TargetInstrInfo;
-
-/// SPUHazardRecognizer
-class SPUHazardRecognizer : public ScheduleHazardRecognizer
-{
-public:
-  SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {}
-  virtual HazardType getHazardType(SUnit *SU, int Stalls);
-  virtual void EmitInstruction(SUnit *SU);
-  virtual void AdvanceCycle();
-  virtual void EmitNoop();
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
deleted file mode 100644
index 5d5061054b08..000000000000
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ /dev/null
@@ -1,1192 +0,0 @@
-//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pattern matching instruction selector for the Cell SPU,
-// converting from a legalized dag to a SPU-target dag.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "SPUHazardRecognizers.h"
-#include "SPUFrameLowering.h"
-#include "SPUTargetMachine.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-  //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
-  bool
-  isI32IntS10Immediate(ConstantSDNode *CN)
-  {
-    return isInt<10>(CN->getSExtValue());
-  }
-
-  //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
-  bool
-  isI32IntU10Immediate(ConstantSDNode *CN)
-  {
-    return isUInt<10>(CN->getSExtValue());
-  }
-
-  //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
-  bool
-  isI16IntS10Immediate(ConstantSDNode *CN)
-  {
-    return isInt<10>(CN->getSExtValue());
-  }
-
-  //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
-  bool
-  isI16IntU10Immediate(ConstantSDNode *CN)
-  {
-    return isUInt<10>((short) CN->getZExtValue());
-  }
-
-  //! ConstantSDNode predicate for signed 16-bit values
-  /*!
-    \param CN The constant SelectionDAG node holding the value
-    \param Imm The returned 16-bit value, if returning true
-
-    This predicate tests the value in \a CN to see whether it can be
-    represented as a 16-bit, sign-extended quantity. Returns true if
-    this is the case.
-   */
-  bool
-  isIntS16Immediate(ConstantSDNode *CN, short &Imm)
-  {
-    EVT vt = CN->getValueType(0);
-    Imm = (short) CN->getZExtValue();
-    if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
-      return true;
-    } else if (vt == MVT::i32) {
-      int32_t i_val = (int32_t) CN->getZExtValue();
-      return i_val == SignExtend32<16>(i_val);
-    } else {
-      int64_t i_val = (int64_t) CN->getZExtValue();
-      return i_val == SignExtend64<16>(i_val);
-    }
-  }
-
-  //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
-  static bool
-  isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
-  {
-    EVT vt = FPN->getValueType(0);
-    if (vt == MVT::f32) {
-      int val = FloatToBits(FPN->getValueAPF().convertToFloat());
-      if (val == SignExtend32<16>(val)) {
-        Imm = (short) val;
-        return true;
-      }
-    }
-
-    return false;
-  }
-
-  //! Generate the carry-generate shuffle mask.
-  SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
-    SmallVector<SDValue, 16 > ShufBytes;
-
-    // Create the shuffle mask for "rotating" the borrow up one register slot
-    // once the borrow is generated.
-    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
-    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
-    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                       &ShufBytes[0], ShufBytes.size());
-  }
-
-  //! Generate the borrow-generate shuffle mask
-  SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
-    SmallVector<SDValue, 16 > ShufBytes;
-
-    // Create the shuffle mask for "rotating" the borrow up one register slot
-    // once the borrow is generated.
-    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
-    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
-    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                       &ShufBytes[0], ShufBytes.size());
-  }
-
-  //===------------------------------------------------------------------===//
-  /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
-  /// instructions for SelectionDAG operations.
-  ///
-  class SPUDAGToDAGISel :
-    public SelectionDAGISel
-  {
-    const SPUTargetMachine &TM;
-    const SPUTargetLowering &SPUtli;
-    unsigned GlobalBaseReg;
-
-  public:
-    explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
-      SelectionDAGISel(tm),
-      TM(tm),
-      SPUtli(*tm.getTargetLowering())
-    { }
-
-    virtual bool runOnMachineFunction(MachineFunction &MF) {
-      // Make sure we re-emit a set of the global base reg if necessary
-      GlobalBaseReg = 0;
-      SelectionDAGISel::runOnMachineFunction(MF);
-      return true;
-    }
-
-    /// getI32Imm - Return a target constant with the specified value, of type
-    /// i32.
-    inline SDValue getI32Imm(uint32_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
-    }
-
-    /// getSmallIPtrImm - Return a target constant of pointer type.
-    inline SDValue getSmallIPtrImm(unsigned Imm) {
-      return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
-    }
-
-    SDNode *emitBuildVector(SDNode *bvNode) {
-      EVT vecVT = bvNode->getValueType(0);
-      DebugLoc dl = bvNode->getDebugLoc();
-
-      // Check to see if this vector can be represented as a CellSPU immediate
-      // constant by invoking all of the instruction selection predicates:
-      if (((vecVT == MVT::v8i16) &&
-           (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
-          ((vecVT == MVT::v4i32) &&
-           ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
-            (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
-            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
-            (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
-          ((vecVT == MVT::v2i64) &&
-           ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
-            (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
-            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
-        HandleSDNode Dummy(SDValue(bvNode, 0));
-        if (SDNode *N = Select(bvNode))
-          return N;
-        return Dummy.getValue().getNode();
-      }
-
-      // No, need to emit a constant pool spill:
-      std::vector<Constant*> CV;
-
-      for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
-        ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
-        CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
-      }
-
-      const Constant *CP = ConstantVector::get(CV);
-      SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
-      unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-      SDValue CGPoolOffset =
-              SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
-
-      HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
-                                         CurDAG->getEntryNode(), CGPoolOffset,
-                                         MachinePointerInfo::getConstantPool(),
-                                         false, false, false, Alignment));
-      CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
-      if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
-        return N;
-      return Dummy.getValue().getNode();
-    }
-
-    /// Select - Convert the specified operand from a target-independent to a
-    /// target-specific node if it hasn't already been changed.
-    SDNode *Select(SDNode *N);
-
-    //! Emit the instruction sequence for i64 shl
-    SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
-
-    //! Emit the instruction sequence for i64 srl
-    SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
-
-    //! Emit the instruction sequence for i64 sra
-    SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
-
-    //! Emit the necessary sequence for loading i64 constants:
-    SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
-
-    //! Alternate instruction emit sequence for loading i64 constants
-    SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
-
-    //! Returns true if the address N is an A-form (local store) address
-    bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
-                         SDValue &Index);
-
-    //! D-form address predicate
-    bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
-                         SDValue &Index);
-
-    /// Alternate D-form address using i7 offset predicate
-    bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
-                          SDValue &Base);
-
-    /// D-form address selection workhorse
-    bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
-                               SDValue &Base, int minOffset, int maxOffset);
-
-    //! Address predicate if N can be expressed as an indexed [r+r] operation.
-    bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
-                         SDValue &Index);
-
-    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-    /// inline asm expressions.
-    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                              char ConstraintCode,
-                                              std::vector<SDValue> &OutOps) {
-      SDValue Op0, Op1;
-      switch (ConstraintCode) {
-      default: return true;
-      case 'm':   // memory
-        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
-            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
-          SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
-        break;
-      case 'o':   // offsetable
-        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
-            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
-          Op0 = Op;
-          Op1 = getSmallIPtrImm(0);
-        }
-        break;
-      case 'v':   // not offsetable
-#if 1
-        llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
-#else
-        SelectAddrIdxOnly(Op, Op, Op0, Op1);
-        break;
-#endif
-      }
-
-      OutOps.push_back(Op0);
-      OutOps.push_back(Op1);
-      return false;
-    }
-
-    virtual const char *getPassName() const {
-      return "Cell SPU DAG->DAG Pattern Instruction Selection";
-    }
-
-  private:
-    SDValue getRC( MVT );
-
-    // Include the pieces autogenerated from the target description.
-#include "SPUGenDAGISel.inc"
-  };
-}
-
-/*!
- \param Op The ISD instruction operand
- \param N The address to be tested
- \param Base The base address
- \param Index The base address index
- */
-bool
-SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
-                    SDValue &Index) {
-  // These match the addr256k operand type:
-  EVT OffsVT = MVT::i16;
-  SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
-  int64_t val;
-
-  switch (N.getOpcode()) {
-  case ISD::Constant:
-    val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
-    Base = CurDAG->getTargetConstant( val , MVT::i32);
-    Index = Zero;
-    return true;
-  case ISD::ConstantPool:
-  case ISD::GlobalAddress:
-    report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
-    /*NOTREACHED*/
-
-  case ISD::TargetConstant:
-  case ISD::TargetGlobalAddress:
-  case ISD::TargetJumpTable:
-    report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
-                      "not wrapped as A-form address.");
-    /*NOTREACHED*/
-
-  case SPUISD::AFormAddr:
-    // Just load from memory if there's only a single use of the location,
-    // otherwise, this will get handled below with D-form offset addresses
-    if (N.hasOneUse()) {
-      SDValue Op0 = N.getOperand(0);
-      switch (Op0.getOpcode()) {
-      case ISD::TargetConstantPool:
-      case ISD::TargetJumpTable:
-        Base = Op0;
-        Index = Zero;
-        return true;
-
-      case ISD::TargetGlobalAddress: {
-        GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
-        const GlobalValue *GV = GSDN->getGlobal();
-        if (GV->getAlignment() == 16) {
-          Base = Op0;
-          Index = Zero;
-          return true;
-        }
-        break;
-      }
-      }
-    }
-    break;
-  }
-  return false;
-}
-
-bool
-SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
-                                  SDValue &Base) {
-  const int minDForm2Offset = -(1 << 7);
-  const int maxDForm2Offset = (1 << 7) - 1;
-  return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
-                               maxDForm2Offset);
-}
-
-/*!
-  \param Op The ISD instruction (ignored)
-  \param N The address to be tested
-  \param Base Base address register/pointer
-  \param Index Base address index
-
-  Examine the input address by a base register plus a signed 10-bit
-  displacement, [r+I10] (D-form address).
-
-  \return true if \a N is a D-form address with \a Base and \a Index set
-  to non-empty SDValue instances.
-*/
-bool
-SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
-                                 SDValue &Index) {
-  return DFormAddressPredicate(Op, N, Base, Index,
-                               SPUFrameLowering::minFrameOffset(),
-                               SPUFrameLowering::maxFrameOffset());
-}
-
-bool
-SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
-                                      SDValue &Index, int minOffset,
-                                      int maxOffset) {
-  unsigned Opc = N.getOpcode();
-  EVT PtrTy = SPUtli.getPointerTy();
-
-  if (Opc == ISD::FrameIndex) {
-    // Stack frame index must be less than 512 (divided by 16):
-    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
-    int FI = int(FIN->getIndex());
-    DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
-               << FI << "\n");
-    if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
-      Base = CurDAG->getTargetConstant(0, PtrTy);
-      Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
-      return true;
-    }
-  } else if (Opc == ISD::ADD) {
-    // Generated by getelementptr
-    const SDValue Op0 = N.getOperand(0);
-    const SDValue Op1 = N.getOperand(1);
-
-    if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
-        || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
-      Base = CurDAG->getTargetConstant(0, PtrTy);
-      Index = N;
-      return true;
-    } else if (Op1.getOpcode() == ISD::Constant
-               || Op1.getOpcode() == ISD::TargetConstant) {
-      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
-      int32_t offset = int32_t(CN->getSExtValue());
-
-      if (Op0.getOpcode() == ISD::FrameIndex) {
-        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
-        int FI = int(FIN->getIndex());
-        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
-                   << " frame index = " << FI << "\n");
-
-        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
-          Base = CurDAG->getTargetConstant(offset, PtrTy);
-          Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
-          return true;
-        }
-      } else if (offset > minOffset && offset < maxOffset) {
-        Base = CurDAG->getTargetConstant(offset, PtrTy);
-        Index = Op0;
-        return true;
-      }
-    } else if (Op0.getOpcode() == ISD::Constant
-               || Op0.getOpcode() == ISD::TargetConstant) {
-      ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
-      int32_t offset = int32_t(CN->getSExtValue());
-
-      if (Op1.getOpcode() == ISD::FrameIndex) {
-        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
-        int FI = int(FIN->getIndex());
-        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
-                   << " frame index = " << FI << "\n");
-
-        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
-          Base = CurDAG->getTargetConstant(offset, PtrTy);
-          Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
-          return true;
-        }
-      } else if (offset > minOffset && offset < maxOffset) {
-        Base = CurDAG->getTargetConstant(offset, PtrTy);
-        Index = Op1;
-        return true;
-      }
-    }
-  } else if (Opc == SPUISD::IndirectAddr) {
-    // Indirect with constant offset -> D-Form address
-    const SDValue Op0 = N.getOperand(0);
-    const SDValue Op1 = N.getOperand(1);
-
-    if (Op0.getOpcode() == SPUISD::Hi
-        && Op1.getOpcode() == SPUISD::Lo) {
-      // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
-      Base = CurDAG->getTargetConstant(0, PtrTy);
-      Index = N;
-      return true;
-    } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
-      int32_t offset = 0;
-      SDValue idxOp;
-
-      if (isa<ConstantSDNode>(Op1)) {
-        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
-        offset = int32_t(CN->getSExtValue());
-        idxOp = Op0;
-      } else if (isa<ConstantSDNode>(Op0)) {
-        ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
-        offset = int32_t(CN->getSExtValue());
-        idxOp = Op1;
-      }
-
-      if (offset >= minOffset && offset <= maxOffset) {
-        Base = CurDAG->getTargetConstant(offset, PtrTy);
-        Index = idxOp;
-        return true;
-      }
-    }
-  } else if (Opc == SPUISD::AFormAddr) {
-    Base = CurDAG->getTargetConstant(0, N.getValueType());
-    Index = N;
-    return true;
-  } else if (Opc == SPUISD::LDRESULT) {
-    Base = CurDAG->getTargetConstant(0, N.getValueType());
-    Index = N;
-    return true;
-  } else if (Opc == ISD::Register
-           ||Opc == ISD::CopyFromReg
-           ||Opc == ISD::UNDEF
-           ||Opc == ISD::Constant) {
-    unsigned OpOpc = Op->getOpcode();
-
-    if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
-      // Direct load/store without getelementptr
-      SDValue Offs;
-
-      Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
-
-      if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
-        if (Offs.getOpcode() == ISD::UNDEF)
-          Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
-
-        Base = Offs;
-        Index = N;
-        return true;
-      }
-    } else {
-      /* If otherwise unadorned, default to D-form address with 0 offset: */
-      if (Opc == ISD::CopyFromReg) {
-        Index = N.getOperand(1);
-      } else {
-        Index = N;
-      }
-
-      Base = CurDAG->getTargetConstant(0, Index.getValueType());
-      return true;
-    }
-  }
-
-  return false;
-}
-
-/*!
-  \param Op The ISD instruction operand
-  \param N The address operand
-  \param Base The base pointer operand
-  \param Index The offset/index operand
-
-  If the address \a N can be expressed as an A-form or D-form address, returns
-  false.  Otherwise, creates two operands, Base and Index that will become the
-  (r)(r) X-form address.
-*/
-bool
-SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
-                                 SDValue &Index) {
-  if (!SelectAFormAddr(Op, N, Base, Index)
-      && !SelectDFormAddr(Op, N, Base, Index)) {
-    // If the address is neither A-form or D-form, punt and use an X-form
-    // address:
-    Base = N.getOperand(1);
-    Index = N.getOperand(0);
-    return true;
-  }
-
-  return false;
-}
-
-/*!
- Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
- to be used as the last parameter of a
-CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
- \param VT the value type for which we want a register class
-*/
-SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
-  switch( VT.SimpleTy ) {
-  case MVT::i8:
-    return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
-  case MVT::i16:
-    return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
-  case MVT::i32:
-    return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
-  case MVT::f32:
-    return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
-  case MVT::i64:
-    return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
-  case MVT::i128:
-    return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
-  case MVT::v16i8:
-  case MVT::v8i16:
-  case MVT::v4i32:
-  case MVT::v4f32:
-  case MVT::v2i64:
-  case MVT::v2f64:
-    return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
-  default:
-    assert( false && "add a new case here" );
-    return SDValue();
-  }
-}
-
-//! Convert the operand from a target-independent to a target-specific node
-/*!
- */
-SDNode *
-SPUDAGToDAGISel::Select(SDNode *N) {
-  unsigned Opc = N->getOpcode();
-  int n_ops = -1;
-  unsigned NewOpc = 0;
-  EVT OpVT = N->getValueType(0);
-  SDValue Ops[8];
-  DebugLoc dl = N->getDebugLoc();
-
-  if (N->isMachineOpcode())
-    return NULL;   // Already selected.
-
-  if (Opc == ISD::FrameIndex) {
-    int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
-    SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
-
-    if (FI < 128) {
-      NewOpc = SPU::AIr32;
-      Ops[0] = TFI;
-      Ops[1] = Imm0;
-      n_ops = 2;
-    } else {
-      NewOpc = SPU::Ar32;
-      Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
-      Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
-                                              N->getValueType(0), TFI),
-                       0);
-      n_ops = 2;
-    }
-  } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
-    // Catch the i64 constants that end up here. Note: The backend doesn't
-    // attempt to legalize the constant (it's useless because DAGCombiner
-    // will insert 64-bit constants and we can't stop it).
-    return SelectI64Constant(N, OpVT, N->getDebugLoc());
-  } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
-             && OpVT == MVT::i64) {
-    SDValue Op0 = N->getOperand(0);
-    EVT Op0VT = Op0.getValueType();
-    EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
-                                    Op0VT, (128 / Op0VT.getSizeInBits()));
-    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
-                                   OpVT, (128 / OpVT.getSizeInBits()));
-    SDValue shufMask;
-
-    switch (Op0VT.getSimpleVT().SimpleTy) {
-    default:
-      report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
-      /*NOTREACHED*/
-    case MVT::i32:
-      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                 CurDAG->getConstant(0x80808080, MVT::i32),
-                                 CurDAG->getConstant(0x00010203, MVT::i32),
-                                 CurDAG->getConstant(0x80808080, MVT::i32),
-                                 CurDAG->getConstant(0x08090a0b, MVT::i32));
-      break;
-
-    case MVT::i16:
-      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                 CurDAG->getConstant(0x80808080, MVT::i32),
-                                 CurDAG->getConstant(0x80800203, MVT::i32),
-                                 CurDAG->getConstant(0x80808080, MVT::i32),
-                                 CurDAG->getConstant(0x80800a0b, MVT::i32));
-      break;
-
-    case MVT::i8:
-      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                 CurDAG->getConstant(0x80808080, MVT::i32),
-                                 CurDAG->getConstant(0x80808003, MVT::i32),
-                                 CurDAG->getConstant(0x80808080, MVT::i32),
-                                 CurDAG->getConstant(0x8080800b, MVT::i32));
-      break;
-    }
-
-    SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-
-    HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
-                                               Op0VecVT, Op0));
-
-    SDValue PromScalar;
-    if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
-      PromScalar = SDValue(N, 0);
-    else
-      PromScalar = PromoteScalar.getValue();
-
-    SDValue zextShuffle =
-            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
-                            PromScalar, PromScalar,
-                            SDValue(shufMaskLoad, 0));
-
-    HandleSDNode Dummy2(zextShuffle);
-    if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
-      zextShuffle = SDValue(N, 0);
-    else
-      zextShuffle = Dummy2.getValue();
-    HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
-                                       zextShuffle));
-
-    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
-    SelectCode(Dummy.getValue().getNode());
-    return Dummy.getValue().getNode();
-  } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
-    SDNode *CGLoad =
-            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
-
-    HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
-                                       N->getOperand(0), N->getOperand(1),
-                                       SDValue(CGLoad, 0)));
-
-    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
-    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
-      return N;
-    return Dummy.getValue().getNode();
-  } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
-    SDNode *CGLoad =
-            emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
-
-    HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
-                                       N->getOperand(0), N->getOperand(1),
-                                       SDValue(CGLoad, 0)));
-
-    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
-    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
-      return N;
-    return Dummy.getValue().getNode();
-  } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
-    SDNode *CGLoad =
-            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
-
-    HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
-                                       N->getOperand(0), N->getOperand(1),
-                                       SDValue(CGLoad, 0)));
-    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
-    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
-      return N;
-    return Dummy.getValue().getNode();
-  } else if (Opc == ISD::TRUNCATE) {
-    SDValue Op0 = N->getOperand(0);
-    if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
-        && OpVT == MVT::i32
-        && Op0.getValueType() == MVT::i64) {
-      // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
-      //
-      // Take advantage of the fact that the upper 32 bits are in the
-      // i32 preferred slot and avoid shuffle gymnastics:
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
-      if (CN != 0) {
-        unsigned shift_amt = unsigned(CN->getZExtValue());
-
-        if (shift_amt >= 32) {
-          SDNode *hi32 =
-                  CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
-                                         Op0.getOperand(0), getRC(MVT::i32));
-
-          shift_amt -= 32;
-          if (shift_amt > 0) {
-            // Take care of the additional shift, if present:
-            SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
-            unsigned Opc = SPU::ROTMAIr32_i32;
-
-            if (Op0.getOpcode() == ISD::SRL)
-              Opc = SPU::ROTMr32;
-
-            hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
-                                          shift);
-          }
-
-          return hi32;
-        }
-      }
-    }
-  } else if (Opc == ISD::SHL) {
-    if (OpVT == MVT::i64)
-      return SelectSHLi64(N, OpVT);
-  } else if (Opc == ISD::SRL) {
-    if (OpVT == MVT::i64)
-      return SelectSRLi64(N, OpVT);
-  } else if (Opc == ISD::SRA) {
-    if (OpVT == MVT::i64)
-      return SelectSRAi64(N, OpVT);
-  } else if (Opc == ISD::FNEG
-             && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
-    DebugLoc dl = N->getDebugLoc();
-    // Check if the pattern is a special form of DFNMS:
-    // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
-    SDValue Op0 = N->getOperand(0);
-    if (Op0.getOpcode() == ISD::FSUB) {
-      SDValue Op00 = Op0.getOperand(0);
-      if (Op00.getOpcode() == ISD::FMUL) {
-        unsigned Opc = SPU::DFNMSf64;
-        if (OpVT == MVT::v2f64)
-          Opc = SPU::DFNMSv2f64;
-
-        return CurDAG->getMachineNode(Opc, dl, OpVT,
-                                      Op00.getOperand(0),
-                                      Op00.getOperand(1),
-                                      Op0.getOperand(1));
-      }
-    }
-
-    SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
-    SDNode *signMask = 0;
-    unsigned Opc = SPU::XORfneg64;
-
-    if (OpVT == MVT::f64) {
-      signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
-    } else if (OpVT == MVT::v2f64) {
-      Opc = SPU::XORfnegvec;
-      signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
-                                                 MVT::v2i64,
-                                                 negConst, negConst).getNode());
-    }
-
-    return CurDAG->getMachineNode(Opc, dl, OpVT,
-                                  N->getOperand(0), SDValue(signMask, 0));
-  } else if (Opc == ISD::FABS) {
-    if (OpVT == MVT::f64) {
-      SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
-      return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
-                                    N->getOperand(0), SDValue(signMask, 0));
-    } else if (OpVT == MVT::v2f64) {
-      SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
-      SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
-                                       absConst, absConst);
-      SDNode *signMask = emitBuildVector(absVec.getNode());
-      return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
-                                    N->getOperand(0), SDValue(signMask, 0));
-    }
-  } else if (Opc == SPUISD::LDRESULT) {
-    // Custom select instructions for LDRESULT
-    EVT VT = N->getValueType(0);
-    SDValue Arg = N->getOperand(0);
-    SDValue Chain = N->getOperand(1);
-    SDNode *Result;
-
-    Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
-                                    MVT::Other, Arg,
-                                    getRC( VT.getSimpleVT()), Chain);
-    return Result;
-
-  } else if (Opc == SPUISD::IndirectAddr) {
-    // Look at the operands: SelectCode() will catch the cases that aren't
-    // specifically handled here.
-    //
-    // SPUInstrInfo catches the following patterns:
-    // (SPUindirect (SPUhi ...), (SPUlo ...))
-    // (SPUindirect $sp, imm)
-    EVT VT = N->getValueType(0);
-    SDValue Op0 = N->getOperand(0);
-    SDValue Op1 = N->getOperand(1);
-    RegisterSDNode *RN;
-
-    if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
-        || (Op0.getOpcode() == ISD::Register
-            && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
-                && RN->getReg() != SPU::R1))) {
-      NewOpc = SPU::Ar32;
-      Ops[1] = Op1;
-      if (Op1.getOpcode() == ISD::Constant) {
-        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
-        Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
-        if (isInt<10>(CN->getSExtValue())) {
-          NewOpc = SPU::AIr32;
-          Ops[1] = Op1;
-        } else {
-          Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
-                                                  N->getValueType(0),
-                                                  Op1),
-                           0);
-        }
-      }
-      Ops[0] = Op0;
-      n_ops = 2;
-    }
-  }
-
-  if (n_ops > 0) {
-    if (N->hasOneUse())
-      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
-    else
-      return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
-  } else
-    return SelectCode(N);
-}
-
-/*!
- * Emit the instruction sequence for i64 left shifts. The basic algorithm
- * is to fill the bottom two word slots with zeros so that zeros are shifted
- * in as the entire quadword is shifted left.
- *
- * \note This code could also be used to implement v2i64 shl.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
-  SDValue Op0 = N->getOperand(0);
-  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
-                               OpVT, (128 / OpVT.getSizeInBits()));
-  SDValue ShiftAmt = N->getOperand(1);
-  EVT ShiftAmtVT = ShiftAmt.getValueType();
-  SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
-  SDValue SelMaskVal;
-  DebugLoc dl = N->getDebugLoc();
-
-  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
-                                  Op0, getRC(MVT::v2i64) );
-  SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
-  SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
-  ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
-                                    CurDAG->getTargetConstant(0, OpVT));
-  VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
-                                  SDValue(ZeroFill, 0),
-                                  SDValue(VecOp0, 0),
-                                  SDValue(SelMask, 0));
-
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
-    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
-    unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
-    if (bytes > 0) {
-      Shift =
-        CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
-                               SDValue(VecOp0, 0),
-                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
-    }
-
-    if (bits > 0) {
-      Shift =
-        CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
-                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
-                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
-    }
-  } else {
-    SDNode *Bytes =
-      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
-                             ShiftAmt,
-                             CurDAG->getTargetConstant(3, ShiftAmtVT));
-    SDNode *Bits =
-      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
-                             ShiftAmt,
-                             CurDAG->getTargetConstant(7, ShiftAmtVT));
-    Shift =
-      CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
-                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
-    Shift =
-      CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
-                             SDValue(Shift, 0), SDValue(Bits, 0));
-  }
-
-  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
-                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- * Emit the instruction sequence for i64 logical right shifts.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
-  SDValue Op0 = N->getOperand(0);
-  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
-                               OpVT, (128 / OpVT.getSizeInBits()));
-  SDValue ShiftAmt = N->getOperand(1);
-  EVT ShiftAmtVT = ShiftAmt.getValueType();
-  SDNode *VecOp0, *Shift = 0;
-  DebugLoc dl = N->getDebugLoc();
-
-  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
-                                  Op0, getRC(MVT::v2i64) );
-
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
-    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
-    unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
-    if (bytes > 0) {
-      Shift =
-        CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
-                               SDValue(VecOp0, 0),
-                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
-    }
-
-    if (bits > 0) {
-      Shift =
-        CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
-                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
-                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
-    }
-  } else {
-    SDNode *Bytes =
-      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
-                             ShiftAmt,
-                             CurDAG->getTargetConstant(3, ShiftAmtVT));
-    SDNode *Bits =
-      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
-                             ShiftAmt,
-                             CurDAG->getTargetConstant(7, ShiftAmtVT));
-
-    // Ensure that the shift amounts are negated!
-    Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
-                                   SDValue(Bytes, 0),
-                                   CurDAG->getTargetConstant(0, ShiftAmtVT));
-
-    Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
-                                  SDValue(Bits, 0),
-                                  CurDAG->getTargetConstant(0, ShiftAmtVT));
-
-    Shift =
-      CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
-                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
-    Shift =
-      CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
-                             SDValue(Shift, 0), SDValue(Bits, 0));
-  }
-
-  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
-                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- * Emit the instruction sequence for i64 arithmetic right shifts.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
-  // Promote Op0 to vector
-  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
-                               OpVT, (128 / OpVT.getSizeInBits()));
-  SDValue ShiftAmt = N->getOperand(1);
-  EVT ShiftAmtVT = ShiftAmt.getValueType();
-  DebugLoc dl = N->getDebugLoc();
-
-  SDNode *VecOp0 =
-    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
-                           VecVT, N->getOperand(0), getRC(MVT::v2i64));
-
-  SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
-  SDNode *SignRot =
-    CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
-                           SDValue(VecOp0, 0), SignRotAmt);
-  SDNode *UpperHalfSign =
-    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
-                           MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
-
-  SDNode *UpperHalfSignMask =
-    CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
-  SDNode *UpperLowerMask =
-    CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
-                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
-  SDNode *UpperLowerSelect =
-    CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
-                           SDValue(UpperHalfSignMask, 0),
-                           SDValue(VecOp0, 0),
-                           SDValue(UpperLowerMask, 0));
-
-  SDNode *Shift = 0;
-
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
-    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
-    unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
-    if (bytes > 0) {
-      bytes = 31 - bytes;
-      Shift =
-        CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
-                               SDValue(UpperLowerSelect, 0),
-                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
-    }
-
-    if (bits > 0) {
-      bits = 8 - bits;
-      Shift =
-        CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
-                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
-                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
-    }
-  } else {
-    SDNode *NegShift =
-      CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
-                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
-
-    Shift =
-      CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
-                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
-    Shift =
-      CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
-                             SDValue(Shift, 0), SDValue(NegShift, 0));
-  }
-
-  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
-                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- Do the necessary magic necessary to load a i64 constant
- */
-SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
-                                           DebugLoc dl) {
-  ConstantSDNode *CN = cast<ConstantSDNode>(N);
-  return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
-}
-
-SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
-                                           DebugLoc dl) {
-  EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
-  SDValue i64vec =
-          SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
-
-  // Here's where it gets interesting, because we have to parse out the
-  // subtree handed back in i64vec:
-
-  if (i64vec.getOpcode() == ISD::BITCAST) {
-    // The degenerate case where the upper and lower bits in the splat are
-    // identical:
-    SDValue Op0 = i64vec.getOperand(0);
-
-    ReplaceUses(i64vec, Op0);
-    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
-                                  SDValue(emitBuildVector(Op0.getNode()), 0),
-                                  getRC(MVT::i64));
-  } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
-    SDValue lhs = i64vec.getOperand(0);
-    SDValue rhs = i64vec.getOperand(1);
-    SDValue shufmask = i64vec.getOperand(2);
-
-    if (lhs.getOpcode() == ISD::BITCAST) {
-      ReplaceUses(lhs, lhs.getOperand(0));
-      lhs = lhs.getOperand(0);
-    }
-
-    SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
-                       ? lhs.getNode()
-                       : emitBuildVector(lhs.getNode()));
-
-    if (rhs.getOpcode() == ISD::BITCAST) {
-      ReplaceUses(rhs, rhs.getOperand(0));
-      rhs = rhs.getOperand(0);
-    }
-
-    SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
-                       ? rhs.getNode()
-                       : emitBuildVector(rhs.getNode()));
-
-    if (shufmask.getOpcode() == ISD::BITCAST) {
-      ReplaceUses(shufmask, shufmask.getOperand(0));
-      shufmask = shufmask.getOperand(0);
-    }
-
-    SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
-                            ? shufmask.getNode()
-                            : emitBuildVector(shufmask.getNode()));
-
-   SDValue shufNode =
-            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
-                                   SDValue(lhsNode, 0), SDValue(rhsNode, 0),
-                                   SDValue(shufMaskNode, 0));
-    HandleSDNode Dummy(shufNode);
-    SDNode *SN = SelectCode(Dummy.getValue().getNode());
-    if (SN == 0) SN = Dummy.getValue().getNode();
-
-    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
-                                  OpVT, SDValue(SN, 0), getRC(MVT::i64));
-  } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
-    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
-                                  SDValue(emitBuildVector(i64vec.getNode()), 0),
-                                  getRC(MVT::i64));
-  } else {
-    report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
-                      "condition");
-  }
-}
-
-/// createSPUISelDag - This pass converts a legalized DAG into a
-/// SPU-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
-  return new SPUDAGToDAGISel(TM);
-}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
deleted file mode 100644
index 4e9fcd1bc765..000000000000
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ /dev/null
@@ -1,3266 +0,0 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUISelLowering.h"
-#include "SPUTargetMachine.h"
-#include "SPUFrameLowering.h"
-#include "SPUMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-  // Byte offset of the preferred slot (counted from the MSB)
-  int prefslotOffset(EVT VT) {
-    int retval=0;
-    if (VT==MVT::i1) retval=3;
-    if (VT==MVT::i8) retval=3;
-    if (VT==MVT::i16) retval=2;
-
-    return retval;
-  }
-
-  //! Expand a library call into an actual call DAG node
-  /*!
-   \note
-   This code is taken from SelectionDAGLegalize, since it is not exposed as
-   part of the LLVM SelectionDAG API.
-   */
-
-  SDValue
-  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
-                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
-    // The input chain to this libcall is the entry node of the function.
-    // Legalizing the call will automatically add the previous call to the
-    // dependence.
-    SDValue InChain = DAG.getEntryNode();
-
-    TargetLowering::ArgListTy Args;
-    TargetLowering::ArgListEntry Entry;
-    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
-      EVT ArgVT = Op.getOperand(i).getValueType();
-      Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-      Entry.Node = Op.getOperand(i);
-      Entry.Ty = ArgTy;
-      Entry.isSExt = isSigned;
-      Entry.isZExt = !isSigned;
-      Args.push_back(Entry);
-    }
-    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                           TLI.getPointerTy());
-
-    // Splice the libcall in wherever FindInputOutputChains tells us to.
-    Type *RetTy =
-                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
-    TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned,
-                                         false, false,
-                            0, TLI.getLibcallCallingConv(LC),
-                            /*isTailCall=*/false,
-                                         /*doesNotRet=*/false,
-                                         /*isReturnValueUsed=*/true,
-                            Callee, Args, DAG, Op.getDebugLoc());
-    std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
-    return CallInfo.first;
-  }
-}
-
-SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
-  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
-    SPUTM(TM) {
-
-  // Use _setjmp/_longjmp instead of setjmp/longjmp.
-  setUseUnderscoreSetJmp(true);
-  setUseUnderscoreLongJmp(true);
-
-  // Set RTLIB libcall names as used by SPU:
-  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
-
-  // Set up the SPU's register classes:
-  addRegisterClass(MVT::i8,   &SPU::R8CRegClass);
-  addRegisterClass(MVT::i16,  &SPU::R16CRegClass);
-  addRegisterClass(MVT::i32,  &SPU::R32CRegClass);
-  addRegisterClass(MVT::i64,  &SPU::R64CRegClass);
-  addRegisterClass(MVT::f32,  &SPU::R32FPRegClass);
-  addRegisterClass(MVT::f64,  &SPU::R64FPRegClass);
-  addRegisterClass(MVT::i128, &SPU::GPRCRegClass);
-
-  // SPU has no sign or zero extended loads for i1, i8, i16:
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
-
-  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
-  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
-  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
-  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
-
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
-  // SPU constant load actions are custom lowered:
-  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-
-  // SPU's loads and stores have to be custom lowered:
-  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
-       ++sctype) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
-    setOperationAction(ISD::LOAD,   VT, Custom);
-    setOperationAction(ISD::STORE,  VT, Custom);
-    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
-    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
-    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
-
-    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
-      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
-      setTruncStoreAction(VT, StoreVT, Expand);
-    }
-  }
-
-  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
-       ++sctype) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
-
-    setOperationAction(ISD::LOAD,   VT, Custom);
-    setOperationAction(ISD::STORE,  VT, Custom);
-
-    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
-      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
-      setTruncStoreAction(VT, StoreVT, Expand);
-    }
-  }
-
-  // Expand the jumptable branches
-  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
-
-  // Custom lower SELECT_CC for most cases, but expand by default
-  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
-  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
-  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
-  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
-  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
-
-  // SPU has no intrinsics for these particular operations:
-  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
-  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
-
-  // SPU has no division/remainder instructions
-  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
-  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
-  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
-  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
-  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
-  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
-  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
-  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
-  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
-  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
-  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
-  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
-  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
-  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
-  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
-  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
-  setOperationAction(ISD::SREM,    MVT::i128, Expand);
-  setOperationAction(ISD::UREM,    MVT::i128, Expand);
-  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
-  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
-
-  // We don't support sin/cos/sqrt/fmod
-  setOperationAction(ISD::FSIN , MVT::f64, Expand);
-  setOperationAction(ISD::FCOS , MVT::f64, Expand);
-  setOperationAction(ISD::FREM , MVT::f64, Expand);
-  setOperationAction(ISD::FSIN , MVT::f32, Expand);
-  setOperationAction(ISD::FCOS , MVT::f32, Expand);
-  setOperationAction(ISD::FREM , MVT::f32, Expand);
-
-  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
-  // for f32!)
-  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
-  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
-  setOperationAction(ISD::FMA, MVT::f64, Expand);
-  setOperationAction(ISD::FMA, MVT::f32, Expand);
-
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
-  // SPU can do rotate right and left, so legalize it... but customize for i8
-  // because instructions don't exist.
-
-  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
-  //        .td files.
-  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
-  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
-  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
-
-  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
-  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
-  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
-
-  // SPU has no native version of shift left/right for i8
-  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
-  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
-  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
-
-  // Make these operations legal and handle them during instruction selection:
-  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
-  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
-  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
-
-  // Custom lower i8, i32 and i64 multiplications
-  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
-  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
-  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
-
-  // Expand double-width multiplication
-  // FIXME: It would probably be reasonable to support some of these operations
-  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
-  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
-  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
-
-  // Need to custom handle (some) common i8, i64 math ops
-  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
-  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
-  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
-  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
-
-  // SPU does not have BSWAP. It does have i32 support CTLZ.
-  // CTPOP has to be custom lowered.
-  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
-  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
-
-  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
-  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
-  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
-  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
-  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
-
-  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
-  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
-  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
-  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
-  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8,    Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16,   Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32,   Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64,   Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128,  Expand);
-
-  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
-  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
-  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
-  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
-  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8,    Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16,   Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,   Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64,   Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128,  Expand);
-
-  // SPU has a version of select that implements (a&~c)|(b&c), just like
-  // select ought to work:
-  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
-  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
-  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
-  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
-
-  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
-  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
-  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
-  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
-  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
-
-  // Custom lower i128 -> i64 truncates
-  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
-
-  // Custom lower i32/i64 -> i128 sign extend
-  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
-
-  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
-  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
-  // to expand to a libcall, hence the custom lowering:
-  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
-
-  // FDIV on SPU requires custom lowering
-  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
-
-  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
-  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-
-  setOperationAction(ISD::BITCAST, MVT::i32, Legal);
-  setOperationAction(ISD::BITCAST, MVT::f32, Legal);
-  setOperationAction(ISD::BITCAST, MVT::i64, Legal);
-  setOperationAction(ISD::BITCAST, MVT::f64, Legal);
-
-  // We cannot sextinreg(i1).  Expand to shifts.
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
-  // We want to legalize GlobalAddress and ConstantPool nodes into the
-  // appropriate instructions to materialize the address.
-  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
-       ++sctype) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
-    setOperationAction(ISD::GlobalAddress,  VT, Custom);
-    setOperationAction(ISD::ConstantPool,   VT, Custom);
-    setOperationAction(ISD::JumpTable,      VT, Custom);
-  }
-
-  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
-  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
-
-  // Use the default implementation.
-  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
-  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
-  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
-  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
-
-  // Cell SPU has instructions for converting between i64 and fp.
-  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-
-  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
-
-  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
-  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
-  // First set operation action for all vector types to expand. Then we
-  // will selectively turn on ones that can be effectively codegen'd.
-  addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass);
-
-  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
-    // Set operation actions to legal types only.
-    if (!isTypeLegal(VT)) continue;
-
-    // add/sub are legal for all supported vector VT's.
-    setOperationAction(ISD::ADD,     VT, Legal);
-    setOperationAction(ISD::SUB,     VT, Legal);
-    // mul has to be custom lowered.
-    setOperationAction(ISD::MUL,     VT, Legal);
-
-    setOperationAction(ISD::AND,     VT, Legal);
-    setOperationAction(ISD::OR,      VT, Legal);
-    setOperationAction(ISD::XOR,     VT, Legal);
-    setOperationAction(ISD::LOAD,    VT, Custom);
-    setOperationAction(ISD::SELECT,  VT, Legal);
-    setOperationAction(ISD::STORE,   VT, Custom);
-
-    // These operations need to be expanded:
-    setOperationAction(ISD::SDIV,    VT, Expand);
-    setOperationAction(ISD::SREM,    VT, Expand);
-    setOperationAction(ISD::UDIV,    VT, Expand);
-    setOperationAction(ISD::UREM,    VT, Expand);
-
-    // Expand all trunc stores
-    for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
-      MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
-    setTruncStoreAction(VT, TargetVT, Expand);
-    }
-
-    // Custom lower build_vector, constant pool spills, insert and
-    // extract vector elements:
-    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
-    setOperationAction(ISD::ConstantPool, VT, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
-  }
-
-  setOperationAction(ISD::SHL, MVT::v2i64, Expand);
-
-  setOperationAction(ISD::AND, MVT::v16i8, Custom);
-  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
-  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
-  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-
-  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
-
-  setBooleanContents(ZeroOrNegativeOneBooleanContent);
-  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
-
-  setStackPointerRegisterToSaveRestore(SPU::R1);
-
-  // We have target-specific dag combine patterns for the following nodes:
-  setTargetDAGCombine(ISD::ADD);
-  setTargetDAGCombine(ISD::ZERO_EXTEND);
-  setTargetDAGCombine(ISD::SIGN_EXTEND);
-  setTargetDAGCombine(ISD::ANY_EXTEND);
-
-  setMinFunctionAlignment(3);
-
-  computeRegisterProperties();
-
-  // Set pre-RA register scheduler default to BURR, which produces slightly
-  // better code than the default (could also be TDRR, but TargetLowering.h
-  // needs a mod to support that model):
-  setSchedulingPreference(Sched::RegPressure);
-}
-
-const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return 0;
-  case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
-  case SPUISD::Hi: return "SPUISD::Hi";
-  case SPUISD::Lo: return "SPUISD::Lo";
-  case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
-  case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
-  case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
-  case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
-  case SPUISD::CALL: return "SPUISD::CALL";
-  case SPUISD::SHUFB: return "SPUISD::SHUFB";
-  case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
-  case SPUISD::CNTB: return "SPUISD::CNTB";
-  case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
-  case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
-  case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
-  case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
-  case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
-  case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
-  case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
-  case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
-  case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
-  case SPUISD::SELB: return "SPUISD::SELB";
-  case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
-  case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
-  case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Return the Cell SPU's SETCC result type
-//===----------------------------------------------------------------------===//
-
-EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
-  // i8, i16 and i32 are valid SETCC result types
-  MVT::SimpleValueType retval;
-
-  switch(VT.getSimpleVT().SimpleTy){
-    case MVT::i1:
-    case MVT::i8:
-      retval = MVT::i8; break;
-    case MVT::i16:
-      retval = MVT::i16; break;
-    case MVT::i32:
-    default:
-      retval = MVT::i32;
-  }
-  return retval;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling convention code:
-//===----------------------------------------------------------------------===//
-
-#include "SPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-//  LowerOperation implementation
-//===----------------------------------------------------------------------===//
-
-/// Custom lower loads for CellSPU
-/*!
- All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to rotate to extract the requested element.
-
- For extending loads, we also want to ensure that the following sequence is
- emitted, e.g. for MVT::f32 extending load to MVT::f64:
-
-\verbatim
-%1  v16i8,ch = load
-%2  v16i8,ch = rotate %1
-%3  v4f8, ch = bitconvert %2
-%4  f32      = vec2perfslot %3
-%5  f64      = fp_extend %4
-\endverbatim
-*/
-static SDValue
-LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  LoadSDNode *LN = cast<LoadSDNode>(Op);
-  SDValue the_chain = LN->getChain();
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  EVT InVT = LN->getMemoryVT();
-  EVT OutVT = Op.getValueType();
-  ISD::LoadExtType ExtType = LN->getExtensionType();
-  unsigned alignment = LN->getAlignment();
-  int pso = prefslotOffset(InVT);
-  DebugLoc dl = Op.getDebugLoc();
-  EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
-                                                  (128 / InVT.getSizeInBits()));
-
-  // two sanity checks
-  assert( LN->getAddressingMode() == ISD::UNINDEXED
-          && "we should get only UNINDEXED adresses");
-  // clean aligned loads can be selected as-is
-  if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
-    return SDValue();
-
-  // Get pointerinfos to the memory chunk(s) that contain the data to load
-  uint64_t mpi_offset = LN->getPointerInfo().Offset;
-  mpi_offset -= mpi_offset%16;
-  MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
-  MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
-
-  SDValue result;
-  SDValue basePtr = LN->getBasePtr();
-  SDValue rotate;
-
-  if ((alignment%16) == 0) {
-    ConstantSDNode *CN;
-
-    // Special cases for a known aligned load to simplify the base pointer
-    // and the rotation amount:
-    if (basePtr.getOpcode() == ISD::ADD
-        && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
-      // Known offset into basePtr
-      int64_t offset = CN->getSExtValue();
-      int64_t rotamt = int64_t((offset & 0xf) - pso);
-
-      if (rotamt < 0)
-        rotamt += 16;
-
-      rotate = DAG.getConstant(rotamt, MVT::i16);
-
-      // Simplify the base pointer for this case:
-      basePtr = basePtr.getOperand(0);
-      if ((offset & ~0xf) > 0) {
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                              basePtr,
-                              DAG.getConstant((offset & ~0xf), PtrVT));
-      }
-    } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
-               || (basePtr.getOpcode() == SPUISD::IndirectAddr
-                   && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
-                   && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
-      // Plain aligned a-form address: rotate into preferred slot
-      // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
-      int64_t rotamt = -pso;
-      if (rotamt < 0)
-        rotamt += 16;
-      rotate = DAG.getConstant(rotamt, MVT::i16);
-    } else {
-      // Offset the rotate amount by the basePtr and the preferred slot
-      // byte offset
-      int64_t rotamt = -pso;
-      if (rotamt < 0)
-        rotamt += 16;
-      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
-                           basePtr,
-                           DAG.getConstant(rotamt, PtrVT));
-    }
-  } else {
-    // Unaligned load: must be more pessimistic about addressing modes:
-    if (basePtr.getOpcode() == ISD::ADD) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      MachineRegisterInfo &RegInfo = MF.getRegInfo();
-      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-      SDValue Flag;
-
-      SDValue Op0 = basePtr.getOperand(0);
-      SDValue Op1 = basePtr.getOperand(1);
-
-      if (isa<ConstantSDNode>(Op1)) {
-        // Convert the (add <ptr>, <const>) to an indirect address contained
-        // in a register. Note that this is done because we need to avoid
-        // creating a 0(reg) d-form address due to the SPU's block loads.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
-        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
-      } else {
-        // Convert the (add <arg1>, <arg2>) to an indirect address, which
-        // will likely be lowered as a reg(reg) x-form address.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-      }
-    } else {
-      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                            basePtr,
-                            DAG.getConstant(0, PtrVT));
-   }
-
-    // Offset the rotate amount by the basePtr and the preferred slot
-    // byte offset
-    rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
-                         basePtr,
-                         DAG.getConstant(-pso, PtrVT));
-  }
-
-  // Do the load as a i128 to allow possible shifting
-  SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
-                       lowMemPtr,
-                       LN->isVolatile(), LN->isNonTemporal(), false, 16);
-
-  // When the size is not greater than alignment we get all data with just
-  // one load
-  if (alignment >= InVT.getSizeInBits()/8) {
-    // Update the chain
-    the_chain = low.getValue(1);
-
-    // Rotate into the preferred slot:
-    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
-                         low.getValue(0), rotate);
-
-    // Convert the loaded v16i8 vector to the appropriate vector type
-    // specified by the operand:
-    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
-                                 InVT, (128 / InVT.getSizeInBits()));
-    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
-                         DAG.getNode(ISD::BITCAST, dl, vecVT, result));
-  }
-  // When alignment is less than the size, we might need (known only at
-  // run-time) two loads
-  // TODO: if the memory address is composed only from constants, we have
-  // extra kowledge, and might avoid the second load
-  else {
-    // storage position offset from lower 16 byte aligned memory chunk
-    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
-                                  basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
-    // get a registerfull of ones. (this implementation is a workaround: LLVM
-    // cannot handle 128 bit signed int constants)
-    SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
-    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
-    SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
-                               DAG.getNode(ISD::ADD, dl, PtrVT,
-                                           basePtr,
-                                           DAG.getConstant(16, PtrVT)),
-                               highMemPtr,
-                               LN->isVolatile(), LN->isNonTemporal(), false, 
-                               16);
-
-    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
-                                                              high.getValue(1));
-
-    // Shift the (possible) high part right to compensate the misalignemnt.
-    // if there is no highpart (i.e. value is i64 and offset is 4), this
-    // will zero out the high value.
-    high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
-                                     DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                                 DAG.getConstant( 16, MVT::i32),
-                                                 offset
-                                                ));
-
-    // Shift the low similarly
-    // TODO: add SPUISD::SHL_BYTES
-    low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
-
-    // Merge the two parts
-    result = DAG.getNode(ISD::BITCAST, dl, vecVT,
-                          DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
-
-    if (!InVT.isVector()) {
-      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
-     }
-
-  }
-    // Handle extending loads by extending the scalar result:
-    if (ExtType == ISD::SEXTLOAD) {
-      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
-    } else if (ExtType == ISD::ZEXTLOAD) {
-      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
-    } else if (ExtType == ISD::EXTLOAD) {
-      unsigned NewOpc = ISD::ANY_EXTEND;
-
-      if (OutVT.isFloatingPoint())
-        NewOpc = ISD::FP_EXTEND;
-
-      result = DAG.getNode(NewOpc, dl, OutVT, result);
-    }
-
-    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
-    SDValue retops[2] = {
-      result,
-      the_chain
-    };
-
-    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
-                         retops, sizeof(retops) / sizeof(retops[0]));
-    return result;
-}
-
-/// Custom lower stores for CellSPU
-/*!
- All CellSPU stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to generate a shuffle to insert the
- requested element into its place, then store the resulting block.
- */
-static SDValue
-LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  StoreSDNode *SN = cast<StoreSDNode>(Op);
-  SDValue Value = SN->getValue();
-  EVT VT = Value.getValueType();
-  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  DebugLoc dl = Op.getDebugLoc();
-  unsigned alignment = SN->getAlignment();
-  SDValue result;
-  EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
-                                                 (128 / StVT.getSizeInBits()));
-  // Get pointerinfos to the memory chunk(s) that contain the data to load
-  uint64_t mpi_offset = SN->getPointerInfo().Offset;
-  mpi_offset -= mpi_offset%16;
-  MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
-  MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
-
-
-  // two sanity checks
-  assert( SN->getAddressingMode() == ISD::UNINDEXED
-          && "we should get only UNINDEXED adresses");
-  // clean aligned loads can be selected as-is
-  if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
-    return SDValue();
-
-  SDValue alignLoadVec;
-  SDValue basePtr = SN->getBasePtr();
-  SDValue the_chain = SN->getChain();
-  SDValue insertEltOffs;
-
-  if ((alignment%16) == 0) {
-    ConstantSDNode *CN;
-    // Special cases for a known aligned load to simplify the base pointer
-    // and insertion byte:
-    if (basePtr.getOpcode() == ISD::ADD
-        && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
-      // Known offset into basePtr
-      int64_t offset = CN->getSExtValue();
-
-      // Simplify the base pointer for this case:
-      basePtr = basePtr.getOperand(0);
-      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                  basePtr,
-                                  DAG.getConstant((offset & 0xf), PtrVT));
-
-      if ((offset & ~0xf) > 0) {
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                              basePtr,
-                              DAG.getConstant((offset & ~0xf), PtrVT));
-      }
-    } else {
-      // Otherwise, assume it's at byte 0 of basePtr
-      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                  basePtr,
-                                  DAG.getConstant(0, PtrVT));
-      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                  basePtr,
-                                  DAG.getConstant(0, PtrVT));
-    }
-  } else {
-    // Unaligned load: must be more pessimistic about addressing modes:
-    if (basePtr.getOpcode() == ISD::ADD) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      MachineRegisterInfo &RegInfo = MF.getRegInfo();
-      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-      SDValue Flag;
-
-      SDValue Op0 = basePtr.getOperand(0);
-      SDValue Op1 = basePtr.getOperand(1);
-
-      if (isa<ConstantSDNode>(Op1)) {
-        // Convert the (add <ptr>, <const>) to an indirect address contained
-        // in a register. Note that this is done because we need to avoid
-        // creating a 0(reg) d-form address due to the SPU's block loads.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
-        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
-      } else {
-        // Convert the (add <arg1>, <arg2>) to an indirect address, which
-        // will likely be lowered as a reg(reg) x-form address.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-      }
-    } else {
-      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                            basePtr,
-                            DAG.getConstant(0, PtrVT));
-    }
-
-    // Insertion point is solely determined by basePtr's contents
-    insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
-                                basePtr,
-                                DAG.getConstant(0, PtrVT));
-  }
-
-  // Load the lower part of the memory to which to store.
-  SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
-                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
-                            false, 16);
-
-  // if we don't need to store over the 16 byte boundary, one store suffices
-  if (alignment >= StVT.getSizeInBits()/8) {
-    // Update the chain
-    the_chain = low.getValue(1);
-
-    LoadSDNode *LN = cast<LoadSDNode>(low);
-    SDValue theValue = SN->getValue();
-
-    if (StVT != VT
-        && (theValue.getOpcode() == ISD::AssertZext
-            || theValue.getOpcode() == ISD::AssertSext)) {
-      // Drill down and get the value for zero- and sign-extended
-      // quantities
-      theValue = theValue.getOperand(0);
-    }
-
-    // If the base pointer is already a D-form address, then just create
-    // a new D-form address with a slot offset and the orignal base pointer.
-    // Otherwise generate a D-form address with the slot offset relative
-    // to the stack pointer, which is always aligned.
-#if !defined(NDEBUG)
-      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        errs() << "CellSPU LowerSTORE: basePtr = ";
-        basePtr.getNode()->dump(&DAG);
-        errs() << "\n";
-      }
-#endif
-
-    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
-                                      insertEltOffs);
-    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
-                                      theValue);
-
-    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
-                         vectorizeOp, low,
-                         DAG.getNode(ISD::BITCAST, dl,
-                                     MVT::v4i32, insertEltOp));
-
-    result = DAG.getStore(the_chain, dl, result, basePtr,
-                          lowMemPtr,
-                          LN->isVolatile(), LN->isNonTemporal(),
-                          16);
-
-  }
-  // do the store when it might cross the 16 byte memory access boundary.
-  else {
-    // TODO issue a warning if SN->isVolatile()== true? This is likely not
-    // what the user wanted.
-
-    // address offset from nearest lower 16byte alinged address
-    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
-                                    SN->getBasePtr(),
-                                    DAG.getConstant(0xf, MVT::i32));
-    // 16 - offset
-    SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                           DAG.getConstant( 16, MVT::i32),
-                                           offset);
-    // 16 - sizeof(Value)
-    SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                     DAG.getConstant( 16, MVT::i32),
-                                     DAG.getConstant( VT.getSizeInBits()/8,
-                                                      MVT::i32));
-    // get a registerfull of ones
-    SDValue ones = DAG.getConstant(-1, MVT::v4i32);
-    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
-    // Create the 128 bit masks that have ones where the data to store is
-    // located.
-    SDValue lowmask, himask;
-    // if the value to store don't fill up the an entire 128 bits, zero
-    // out the last bits of the mask so that only the value we want to store
-    // is masked.
-    // this is e.g. in the case of store i32, align 2
-    if (!VT.isVector()){
-      Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
-      lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
-      lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
-                                                               surplus);
-      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
-      Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
-
-    }
-    else {
-      lowmask = ones;
-      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
-    }
-    // this will zero, if there are no data that goes to the high quad
-    himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
-                                                            offset_compl);
-    lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
-                                                             offset);
-
-    // Load in the old data and zero out the parts that will be overwritten with
-    // the new data to store.
-    SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
-                               DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
-                                           DAG.getConstant( 16, PtrVT)),
-                               highMemPtr,
-                               SN->isVolatile(), SN->isNonTemporal(), 
-                               false, 16);
-    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
-                                                              hi.getValue(1));
-
-    low = DAG.getNode(ISD::AND, dl, MVT::i128,
-                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
-                        DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
-    hi = DAG.getNode(ISD::AND, dl, MVT::i128,
-                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
-                        DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
-
-    // Shift the Value to store into place. rlow contains the parts that go to
-    // the lower memory chunk, rhi has the parts that go to the upper one.
-    SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
-    rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
-    SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
-                                                            offset_compl);
-
-    // Merge the old data and the new data and store the results
-    // Need to convert vectors here to integer as 'OR'ing floats assert
-    rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
-                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
-                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
-    rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
-                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
-                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
-
-    low = DAG.getStore(the_chain, dl, rlow, basePtr,
-                          lowMemPtr,
-                          SN->isVolatile(), SN->isNonTemporal(), 16);
-    hi  = DAG.getStore(the_chain, dl, rhi,
-                            DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
-                                        DAG.getConstant( 16, PtrVT)),
-                            highMemPtr,
-                            SN->isVolatile(), SN->isNonTemporal(), 16);
-    result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
-                                                           hi.getValue(0));
-  }
-
-  return result;
-}
-
-//! Generate the address of a constant pool entry.
-static SDValue
-LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  EVT PtrVT = Op.getValueType();
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  const Constant *C = CP->getConstVal();
-  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  const TargetMachine &TM = DAG.getTarget();
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      // Just return the SDValue with the constant pool address in it.
-      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
-    } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
-    }
-  }
-
-  llvm_unreachable("LowerConstantPool: Relocation model other than static"
-                   " not supported.");
-}
-
-//! Alternate entry point for generating the address of a constant pool entry
-SDValue
-SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
-  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
-}
-
-static SDValue
-LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  const TargetMachine &TM = DAG.getTarget();
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
-    } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
-    }
-  }
-
-  llvm_unreachable("LowerJumpTable: Relocation model other than static"
-                   " not supported.");
-}
-
-static SDValue
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  EVT PtrVT = Op.getValueType();
-  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  const GlobalValue *GV = GSDN->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
-                                          PtrVT, GSDN->getOffset());
-  const TargetMachine &TM = DAG.getTarget();
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
-    } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
-    }
-  } else {
-    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
-                      "not supported.");
-    /*NOTREACHED*/
-  }
-}
-
-//! Custom lower double precision floating point constants
-static SDValue
-LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (VT == MVT::f64) {
-    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
-
-    assert((FP != 0) &&
-           "LowerConstantFP: Node is not ConstantFPSDNode");
-
-    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
-    SDValue T = DAG.getConstant(dbits, MVT::i64);
-    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
-    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                       DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
-  }
-
-  return SDValue();
-}
-
-SDValue
-SPUTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv, bool isVarArg,
-                                        const SmallVectorImpl<ISD::InputArg>
-                                          &Ins,
-                                        DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals)
-                                          const {
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
-
-  unsigned ArgOffset = SPUFrameLowering::minStackSize();
-  unsigned ArgRegIdx = 0;
-  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
-  // FIXME: allow for other calling conventions
-  CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
-
-  // Add DAG nodes to load the arguments or copy them out of registers.
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
-    EVT ObjectVT = Ins[ArgNo].VT;
-    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
-    SDValue ArgVal;
-    CCValAssign &VA = ArgLocs[ArgNo];
-
-    if (VA.isRegLoc()) {
-      const TargetRegisterClass *ArgRegClass;
-
-      switch (ObjectVT.getSimpleVT().SimpleTy) {
-      default:
-        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
-                           Twine(ObjectVT.getEVTString()));
-      case MVT::i8:
-        ArgRegClass = &SPU::R8CRegClass;
-        break;
-      case MVT::i16:
-        ArgRegClass = &SPU::R16CRegClass;
-        break;
-      case MVT::i32:
-        ArgRegClass = &SPU::R32CRegClass;
-        break;
-      case MVT::i64:
-        ArgRegClass = &SPU::R64CRegClass;
-        break;
-      case MVT::i128:
-        ArgRegClass = &SPU::GPRCRegClass;
-        break;
-      case MVT::f32:
-        ArgRegClass = &SPU::R32FPRegClass;
-        break;
-      case MVT::f64:
-        ArgRegClass = &SPU::R64FPRegClass;
-        break;
-      case MVT::v2f64:
-      case MVT::v4f32:
-      case MVT::v2i64:
-      case MVT::v4i32:
-      case MVT::v8i16:
-      case MVT::v16i8:
-        ArgRegClass = &SPU::VECREGRegClass;
-        break;
-      }
-
-      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
-      RegInfo.addLiveIn(VA.getLocReg(), VReg);
-      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
-      ++ArgRegIdx;
-    } else {
-      // We need to load the argument to a virtual register if we determined
-      // above that we ran out of physical registers of the appropriate type
-      // or we're forced to do vararg
-      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
-      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
-                           false, false, false, 0);
-      ArgOffset += StackSlotSize;
-    }
-
-    InVals.push_back(ArgVal);
-    // Update the chain
-    Chain = ArgVal.getOperand(0);
-  }
-
-  // vararg handling:
-  if (isVarArg) {
-    // FIXME: we should be able to query the argument registers from
-    //        tablegen generated code.
-    static const uint16_t ArgRegs[] = {
-      SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
-      SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
-      SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
-      SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
-      SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
-      SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
-      SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
-      SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
-      SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
-      SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
-      SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
-    };
-    // size of ArgRegs array
-    const unsigned NumArgRegs = 77;
-
-    // We will spill (79-3)+1 registers to the stack
-    SmallVector<SDValue, 79-3+1> MemOps;
-
-    // Create the frame slot
-    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
-      FuncInfo->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
-      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
-      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
-      SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
-                                   false, false, 0);
-      Chain = Store.getOperand(0);
-      MemOps.push_back(Store);
-
-      // Increment address by stack slot size for the next stored argument
-      ArgOffset += StackSlotSize;
-    }
-    if (!MemOps.empty())
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                          &MemOps[0], MemOps.size());
-  }
-
-  return Chain;
-}
-
-/// isLSAAddress - Return the immediate to use if the specified
-/// value is representable as a LSA address.
-static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-  if (!C) return 0;
-
-  int Addr = C->getZExtValue();
-  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
-      (Addr << 14 >> 14) != Addr)
-    return 0;  // Top 14 bits have to be sext of immediate.
-
-  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
-}
-
-SDValue
-SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                             SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
-  SDValue Chain                         = CLI.Chain;
-  SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
-  CallingConv::ID CallConv              = CLI.CallConv;
-  bool isVarArg                         = CLI.IsVarArg;
-
-  // CellSPU target does not yet support tail call optimization.
-  isTailCall = false;
-
-  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
-  unsigned NumOps     = Outs.size();
-  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
-  // FIXME: allow for other calling conventions
-  CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-
-  const unsigned NumArgRegs = ArgLocs.size();
-
-
-  // Handy pointer type
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-  // Set up a copy of the stack pointer for use loading and storing any
-  // arguments that may not fit in the registers available for argument
-  // passing.
-  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
-
-  // Figure out which arguments are going to go in registers, and which in
-  // memory.
-  unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
-  unsigned ArgRegIdx = 0;
-
-  // Keep track of registers passing arguments
-  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
-  // And the arguments passed on the stack
-  SmallVector<SDValue, 8> MemOpChains;
-
-  for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
-    SDValue Arg = OutVals[ArgRegIdx];
-    CCValAssign &VA = ArgLocs[ArgRegIdx];
-
-    // PtrOff will be used to store the current argument to the stack if a
-    // register cannot be found for it.
-    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
-    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-
-    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unexpected ValueType for argument!");
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-    case MVT::i64:
-    case MVT::i128:
-    case MVT::f32:
-    case MVT::f64:
-    case MVT::v2i64:
-    case MVT::v2f64:
-    case MVT::v4f32:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v16i8:
-      if (ArgRegIdx != NumArgRegs) {
-        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-      } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                           MachinePointerInfo(),
-                                           false, false, 0));
-        ArgOffset += StackSlotSize;
-      }
-      break;
-    }
-  }
-
-  // Accumulate how many bytes are to be pushed on the stack, including the
-  // linkage area, and parameter passing area.  According to the SPU ABI,
-  // we minimally need space for [LR] and [SP].
-  unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
-
-  // Insert a call sequence start
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
-                                                            true));
-
-  if (!MemOpChains.empty()) {
-    // Adjust the stack pointer for the stack arguments.
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
-  }
-
-  // Build a sequence of copy-to-reg nodes chained together with token chain
-  // and flag operands which copy the outgoing args into the appropriate regs.
-  SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = SPUISD::CALL;
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-  // node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    const GlobalValue *GV = G->getGlobal();
-    EVT CalleeVT = Callee.getValueType();
-    SDValue Zero = DAG.getConstant(0, PtrVT);
-    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
-
-    if (!ST->usingLargeMem()) {
-      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
-      // style calls, otherwise, external symbols are BRASL calls. This assumes
-      // that declared/defined symbols are in the same compilation unit and can
-      // be reached through PC-relative jumps.
-      //
-      // NOTE:
-      // This may be an unsafe assumption for JIT and really large compilation
-      // units.
-      if (GV->isDeclaration()) {
-        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
-      } else {
-        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
-      }
-    } else {
-      // "Large memory" mode: Turn all calls into indirect calls with a X-form
-      // address pairs:
-      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
-    }
-  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    EVT CalleeVT = Callee.getValueType();
-    SDValue Zero = DAG.getConstant(0, PtrVT);
-    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
-        Callee.getValueType());
-
-    if (!ST->usingLargeMem()) {
-      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
-    } else {
-      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
-    }
-  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
-    // If this is an absolute destination address that appears to be a legal
-    // local store address, use the munged value.
-    Callee = SDValue(Dest, 0);
-  }
-
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-  // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
-                      &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
-  if (!Ins.empty())
-    InFlag = Chain.getValue(1);
-
-  // If the function returns void, just return the chain.
-  if (Ins.empty())
-    return Chain;
-
-  // Now handle the return value(s)
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                    getTargetMachine(), RVLocs, *DAG.getContext());
-  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
-
-
-  // If the call has results, copy the values out of the ret val registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign VA = RVLocs[i];
-
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
-                                     InFlag);
-    Chain = Val.getValue(1);
-    InFlag = Val.getValue(2);
-    InVals.push_back(Val);
-   }
-
-  return Chain;
-}
-
-SDValue
-SPUTargetLowering::LowerReturn(SDValue Chain,
-                               CallingConv::ID CallConv, bool isVarArg,
-                               const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               const SmallVectorImpl<SDValue> &OutVals,
-                               DebugLoc dl, SelectionDAG &DAG) const {
-
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), RVLocs, *DAG.getContext());
-  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
-
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
-  SDValue Flag;
-
-  // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &VA = RVLocs[i];
-    assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             OutVals[i], Flag);
-    Flag = Chain.getValue(1);
-  }
-
-  if (Flag.getNode())
-    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  else
-    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Vector related lowering:
-//===----------------------------------------------------------------------===//
-
-static ConstantSDNode *
-getVecImm(SDNode *N) {
-  SDValue OpVal(0, 0);
-
-  // Check to see if this buildvec has a single non-undef value in its elements.
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-    if (OpVal.getNode() == 0)
-      OpVal = N->getOperand(i);
-    else if (OpVal != N->getOperand(i))
-      return 0;
-  }
-
-  if (OpVal.getNode() != 0) {
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
-      return CN;
-    }
-  }
-
-  return 0;
-}
-
-/// get_vec_i18imm - Test if this vector is a vector filled with the same value
-/// and the value fits into an unsigned 18-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    uint64_t Value = CN->getZExtValue();
-    if (ValueType == MVT::i64) {
-      uint64_t UValue = CN->getZExtValue();
-      uint32_t upper = uint32_t(UValue >> 32);
-      uint32_t lower = uint32_t(UValue);
-      if (upper != lower)
-        return SDValue();
-      Value = Value >> 32;
-    }
-    if (Value <= 0x3ffff)
-      return DAG.getTargetConstant(Value, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_vec_i16imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    int64_t Value = CN->getSExtValue();
-    if (ValueType == MVT::i64) {
-      uint64_t UValue = CN->getZExtValue();
-      uint32_t upper = uint32_t(UValue >> 32);
-      uint32_t lower = uint32_t(UValue);
-      if (upper != lower)
-        return SDValue();
-      Value = Value >> 32;
-    }
-    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
-      return DAG.getTargetConstant(Value, ValueType);
-    }
-  }
-
-  return SDValue();
-}
-
-/// get_vec_i10imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 10-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    int64_t Value = CN->getSExtValue();
-    if (ValueType == MVT::i64) {
-      uint64_t UValue = CN->getZExtValue();
-      uint32_t upper = uint32_t(UValue >> 32);
-      uint32_t lower = uint32_t(UValue);
-      if (upper != lower)
-        return SDValue();
-      Value = Value >> 32;
-    }
-    if (isInt<10>(Value))
-      return DAG.getTargetConstant(Value, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_vec_i8imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 8-bit constant, and if so, return the
-/// constant.
-///
-/// @note: The incoming vector is v16i8 because that's the only way we can load
-/// constant vectors. Thus, we test to see if the upper and lower bytes are the
-/// same value.
-SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                             EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    int Value = (int) CN->getZExtValue();
-    if (ValueType == MVT::i16
-        && Value <= 0xffff                 /* truncated from uint64_t */
-        && ((short) Value >> 8) == ((short) Value & 0xff))
-      return DAG.getTargetConstant(Value & 0xff, ValueType);
-    else if (ValueType == MVT::i8
-             && (Value & 0xff) == Value)
-      return DAG.getTargetConstant(Value, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                               EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    uint64_t Value = CN->getZExtValue();
-    if ((ValueType == MVT::i32
-          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
-        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
-      return DAG.getTargetConstant(Value >> 16, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
-SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
-  }
-
-  return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
-SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
-  }
-
-  return SDValue();
-}
-
-//! Lower a BUILD_VECTOR instruction creatively:
-static SDValue
-LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT.getVectorElementType();
-  DebugLoc dl = Op.getDebugLoc();
-  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
-  unsigned minSplatBits = EltVT.getSizeInBits();
-
-  if (minSplatBits < 16)
-    minSplatBits = 16;
-
-  APInt APSplatBits, APSplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-
-  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                            HasAnyUndefs, minSplatBits)
-      || minSplatBits < SplatBitSize)
-    return SDValue();   // Wasn't a constant vector or splat exceeded min
-
-  uint64_t SplatBits = APSplatBits.getZExtValue();
-
-  switch (VT.getSimpleVT().SimpleTy) {
-  default:
-    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
-                       Twine(VT.getEVTString()));
-    /*NOTREACHED*/
-  case MVT::v4f32: {
-    uint32_t Value32 = uint32_t(SplatBits);
-    assert(SplatBitSize == 32
-           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
-    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
-    SDValue T = DAG.getConstant(Value32, MVT::i32);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
-  }
-  case MVT::v2f64: {
-    uint64_t f64val = uint64_t(SplatBits);
-    assert(SplatBitSize == 64
-           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
-    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
-    SDValue T = DAG.getConstant(f64val, MVT::i64);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
-  }
-  case MVT::v16i8: {
-   // 8-bit constants have to be expanded to 16-bits
-   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
-   SmallVector<SDValue, 8> Ops;
-
-   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
-   return DAG.getNode(ISD::BITCAST, dl, VT,
-                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
-  }
-  case MVT::v8i16: {
-    unsigned short Value16 = SplatBits;
-    SDValue T = DAG.getConstant(Value16, EltVT);
-    SmallVector<SDValue, 8> Ops;
-
-    Ops.assign(8, T);
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
-  }
-  case MVT::v4i32: {
-    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
-  }
-  case MVT::v2i64: {
-    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
-  }
-  }
-}
-
-/*!
- */
-SDValue
-SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
-                     DebugLoc dl) {
-  uint32_t upper = uint32_t(SplatVal >> 32);
-  uint32_t lower = uint32_t(SplatVal);
-
-  if (upper == lower) {
-    // Magic constant that can be matched by IL, ILA, et. al.
-    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
-    return DAG.getNode(ISD::BITCAST, dl, OpVT,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                   Val, Val, Val, Val));
-  } else {
-    bool upper_special, lower_special;
-
-    // NOTE: This code creates common-case shuffle masks that can be easily
-    // detected as common expressions. It is not attempting to create highly
-    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
-
-    // Detect if the upper or lower half is a special shuffle mask pattern:
-    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
-    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
-
-    // Both upper and lower are special, lower to a constant pool load:
-    if (lower_special && upper_special) {
-      SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
-      SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
-      SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                         UpperVal, LowerVal, UpperVal, LowerVal);
-      return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
-    }
-
-    SDValue LO32;
-    SDValue HI32;
-    SmallVector<SDValue, 16> ShufBytes;
-    SDValue Result;
-
-    // Create lower vector if not a special pattern
-    if (!lower_special) {
-      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
-      LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
-                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                     LO32C, LO32C, LO32C, LO32C));
-    }
-
-    // Create upper vector if not a special pattern
-    if (!upper_special) {
-      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
-      HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
-                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                     HI32C, HI32C, HI32C, HI32C));
-    }
-
-    // If either upper or lower are special, then the two input operands are
-    // the same (basically, one of them is a "don't care")
-    if (lower_special)
-      LO32 = HI32;
-    if (upper_special)
-      HI32 = LO32;
-
-    for (int i = 0; i < 4; ++i) {
-      uint64_t val = 0;
-      for (int j = 0; j < 4; ++j) {
-        SDValue V;
-        bool process_upper, process_lower;
-        val <<= 8;
-        process_upper = (upper_special && (i & 1) == 0);
-        process_lower = (lower_special && (i & 1) == 1);
-
-        if (process_upper || process_lower) {
-          if ((process_upper && upper == 0)
-                  || (process_lower && lower == 0))
-            val |= 0x80;
-          else if ((process_upper && upper == 0xffffffff)
-                  || (process_lower && lower == 0xffffffff))
-            val |= 0xc0;
-          else if ((process_upper && upper == 0x80000000)
-                  || (process_lower && lower == 0x80000000))
-            val |= (j == 0 ? 0xe0 : 0x80);
-        } else
-          val |= i * 4 + j + ((i & 1) * 16);
-      }
-
-      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
-    }
-
-    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                   &ShufBytes[0], ShufBytes.size()));
-  }
-}
-
-/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
-/// which the Cell can operate. The code inspects V3 to ascertain whether the
-/// permutation vector, V3, is monotonically increasing with one "exception"
-/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
-/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
-/// In either case, the net result is going to eventually invoke SHUFB to
-/// permute/shuffle the bytes from V1 and V2.
-/// \note
-/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
-/// control word for byte/halfword/word insertion. This takes care of a single
-/// element move from V2 into V1.
-/// \note
-/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
-  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
-
-  // If we have a single element being moved from V1 to V2, this can be handled
-  // using the C*[DX] compute mask instructions, but the vector elements have
-  // to be monotonically increasing with one exception element, and the source
-  // slot of the element to move must be the same as the destination.
-  EVT VecVT = V1.getValueType();
-  EVT EltVT = VecVT.getVectorElementType();
-  unsigned EltsFromV2 = 0;
-  unsigned V2EltOffset = 0;
-  unsigned V2EltIdx0 = 0;
-  unsigned CurrElt = 0;
-  unsigned MaxElts = VecVT.getVectorNumElements();
-  unsigned PrevElt = 0;
-  bool monotonic = true;
-  bool rotate = true;
-  int rotamt=0;
-  EVT maskVT;             // which of the c?d instructions to use
-
-  if (EltVT == MVT::i8) {
-    V2EltIdx0 = 16;
-    maskVT = MVT::v16i8;
-  } else if (EltVT == MVT::i16) {
-    V2EltIdx0 = 8;
-    maskVT = MVT::v8i16;
-  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
-    V2EltIdx0 = 4;
-    maskVT = MVT::v4i32;
-  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
-    V2EltIdx0 = 2;
-    maskVT = MVT::v2i64;
-  } else
-    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
-
-  for (unsigned i = 0; i != MaxElts; ++i) {
-    if (SVN->getMaskElt(i) < 0)
-      continue;
-
-    unsigned SrcElt = SVN->getMaskElt(i);
-
-    if (monotonic) {
-      if (SrcElt >= V2EltIdx0) {
-        // TODO: optimize for the monotonic case when several consecutive
-        // elements are taken form V2. Do we ever get such a case?
-        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
-          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
-        else
-          monotonic = false;
-        ++EltsFromV2;
-      } else if (CurrElt != SrcElt) {
-        monotonic = false;
-      }
-
-      ++CurrElt;
-    }
-
-    if (rotate) {
-      if (PrevElt > 0 && SrcElt < MaxElts) {
-        if ((PrevElt == SrcElt - 1)
-            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
-          PrevElt = SrcElt;
-        } else {
-          rotate = false;
-        }
-      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
-        // First time or after a "wrap around"
-        rotamt = SrcElt-i;
-        PrevElt = SrcElt;
-      } else {
-        // This isn't a rotation, takes elements from vector 2
-        rotate = false;
-      }
-    }
-  }
-
-  if (EltsFromV2 == 1 && monotonic) {
-    // Compute mask and shuffle
-    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-    // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
-    // R1 ($sp) is used here only as it is guaranteed to have last bits zero
-    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(V2EltOffset, MVT::i32));
-    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
-                                     maskVT, Pointer);
-
-    // Use shuffle mask in SHUFB synthetic instruction:
-    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
-                       ShufMaskOp);
-  } else if (rotate) {
-    if (rotamt < 0)
-      rotamt +=MaxElts;
-    rotamt *= EltVT.getSizeInBits()/8;
-    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
-                       V1, DAG.getConstant(rotamt, MVT::i16));
-  } else {
-   // Convert the SHUFFLE_VECTOR mask's input element units to the
-   // actual bytes.
-    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
-
-    SmallVector<SDValue, 16> ResultMask;
-    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
-      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
-
-      for (unsigned j = 0; j < BytesPerElement; ++j)
-        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
-    }
-    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
-                                    &ResultMask[0], ResultMask.size());
-    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
-  }
-}
-
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (Op0.getNode()->getOpcode() == ISD::Constant) {
-    // For a constant, build the appropriate constant vector, which will
-    // eventually simplify to a vector register load.
-
-    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
-    SmallVector<SDValue, 16> ConstVecValues;
-    EVT VT;
-    size_t n_copies;
-
-    // Create a constant vector:
-    switch (Op.getValueType().getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unexpected constant value type in "
-                              "LowerSCALAR_TO_VECTOR");
-    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
-    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
-    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
-    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
-    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
-    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
-    }
-
-    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
-    for (size_t j = 0; j < n_copies; ++j)
-      ConstVecValues.push_back(CValue);
-
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
-                       &ConstVecValues[0], ConstVecValues.size());
-  } else {
-    // Otherwise, copy the value from one register to another:
-    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-    case MVT::i64:
-    case MVT::f32:
-    case MVT::f64:
-      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
-    }
-  }
-}
-
-static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  SDValue N = Op.getOperand(0);
-  SDValue Elt = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue retval;
-
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
-    // Constant argument:
-    int EltNo = (int) C->getZExtValue();
-
-    // sanity checks:
-    if (VT == MVT::i8 && EltNo >= 16)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
-    else if (VT == MVT::i16 && EltNo >= 8)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
-    else if (VT == MVT::i32 && EltNo >= 4)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
-    else if (VT == MVT::i64 && EltNo >= 2)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
-
-    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
-      // i32 and i64: Element 0 is the preferred slot
-      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
-    }
-
-    // Need to generate shuffle mask and extract:
-    int prefslot_begin = -1, prefslot_end = -1;
-    int elt_byte = EltNo * VT.getSizeInBits() / 8;
-
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Invalid value type!");
-    case MVT::i8: {
-      prefslot_begin = prefslot_end = 3;
-      break;
-    }
-    case MVT::i16: {
-      prefslot_begin = 2; prefslot_end = 3;
-      break;
-    }
-    case MVT::i32:
-    case MVT::f32: {
-      prefslot_begin = 0; prefslot_end = 3;
-      break;
-    }
-    case MVT::i64:
-    case MVT::f64: {
-      prefslot_begin = 0; prefslot_end = 7;
-      break;
-    }
-    }
-
-    assert(prefslot_begin != -1 && prefslot_end != -1 &&
-           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
-
-    unsigned int ShufBytes[16] = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-    for (int i = 0; i < 16; ++i) {
-      // zero fill uppper part of preferred slot, don't care about the
-      // other slots:
-      unsigned int mask_val;
-      if (i <= prefslot_end) {
-        mask_val =
-          ((i < prefslot_begin)
-           ? 0x80
-           : elt_byte + (i - prefslot_begin));
-
-        ShufBytes[i] = mask_val;
-      } else
-        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
-    }
-
-    SDValue ShufMask[4];
-    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
-      unsigned bidx = i * 4;
-      unsigned int bits = ((ShufBytes[bidx] << 24) |
-                           (ShufBytes[bidx+1] << 16) |
-                           (ShufBytes[bidx+2] << 8) |
-                           ShufBytes[bidx+3]);
-      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
-    }
-
-    SDValue ShufMaskVec =
-      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
-
-    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
-                                     N, N, ShufMaskVec));
-  } else {
-    // Variable index: Rotate the requested element into slot 0, then replicate
-    // slot 0 across the vector
-    EVT VecVT = N.getValueType();
-    if (!VecVT.isSimple() || !VecVT.isVector()) {
-      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
-                        "vector type!");
-    }
-
-    // Make life easier by making sure the index is zero-extended to i32
-    if (Elt.getValueType() != MVT::i32)
-      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
-
-    // Scale the index to a bit/byte shift quantity
-    APInt scaleFactor =
-            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
-    unsigned scaleShift = scaleFactor.logBase2();
-    SDValue vecShift;
-
-    if (scaleShift > 0) {
-      // Scale the shift factor:
-      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
-                        DAG.getConstant(scaleShift, MVT::i32));
-    }
-
-    vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
-
-    // Replicate the bytes starting at byte 0 across the entire vector (for
-    // consistency with the notion of a unified register set)
-    SDValue replicate;
-
-    switch (VT.getSimpleVT().SimpleTy) {
-    default:
-      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
-                        "type");
-      /*NOTREACHED*/
-    case MVT::i8: {
-      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              factor, factor, factor, factor);
-      break;
-    }
-    case MVT::i16: {
-      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              factor, factor, factor, factor);
-      break;
-    }
-    case MVT::i32:
-    case MVT::f32: {
-      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              factor, factor, factor, factor);
-      break;
-    }
-    case MVT::i64:
-    case MVT::f64: {
-      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
-      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              loFactor, hiFactor, loFactor, hiFactor);
-      break;
-    }
-    }
-
-    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
-                                     vecShift, vecShift, replicate));
-  }
-
-  return retval;
-}
-
-static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  SDValue VecOp = Op.getOperand(0);
-  SDValue ValOp = Op.getOperand(1);
-  SDValue IdxOp = Op.getOperand(2);
-  DebugLoc dl = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  EVT eltVT = ValOp.getValueType();
-
-  // use 0 when the lane to insert to is 'undef'
-  int64_t Offset=0;
-  if (IdxOp.getOpcode() != ISD::UNDEF) {
-    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
-    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
-    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
-  }
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  // Use $sp ($1) because it's always 16-byte aligned and it's available:
-  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(Offset, PtrVT));
-  // widen the mask when dealing with half vectors
-  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
-                                128/ VT.getVectorElementType().getSizeInBits());
-  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
-
-  SDValue result =
-    DAG.getNode(SPUISD::SHUFB, dl, VT,
-                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
-                VecOp,
-                DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
-
-  return result;
-}
-
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
-                           const TargetLowering &TLI)
-{
-  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
-  DebugLoc dl = Op.getDebugLoc();
-  EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
-
-  assert(Op.getValueType() == MVT::i8);
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled i8 math operator");
-  case ISD::ADD: {
-    // 8-bit addition: Promote the arguments up to 16-bits and truncate
-    // the result:
-    SDValue N1 = Op.getOperand(1);
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-
-  }
-
-  case ISD::SUB: {
-    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
-    // the result:
-    SDValue N1 = Op.getOperand(1);
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  case ISD::ROTR:
-  case ISD::ROTL: {
-    SDValue N1 = Op.getOperand(1);
-    EVT N1VT = N1.getValueType();
-
-    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
-    if (!N1VT.bitsEq(ShiftVT)) {
-      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
-                       ? ISD::ZERO_EXTEND
-                       : ISD::TRUNCATE;
-      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
-    }
-
-    // Replicate lower 8-bits into upper 8:
-    SDValue ExpandArg =
-      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
-                  DAG.getNode(ISD::SHL, dl, MVT::i16,
-                              N0, DAG.getConstant(8, MVT::i32)));
-
-    // Truncate back down to i8
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
-  }
-  case ISD::SRL:
-  case ISD::SHL: {
-    SDValue N1 = Op.getOperand(1);
-    EVT N1VT = N1.getValueType();
-
-    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
-    if (!N1VT.bitsEq(ShiftVT)) {
-      unsigned N1Opc = ISD::ZERO_EXTEND;
-
-      if (N1.getValueType().bitsGT(ShiftVT))
-        N1Opc = ISD::TRUNCATE;
-
-      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
-    }
-
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  case ISD::SRA: {
-    SDValue N1 = Op.getOperand(1);
-    EVT N1VT = N1.getValueType();
-
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    if (!N1VT.bitsEq(ShiftVT)) {
-      unsigned N1Opc = ISD::SIGN_EXTEND;
-
-      if (N1VT.bitsGT(ShiftVT))
-        N1Opc = ISD::TRUNCATE;
-      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
-    }
-
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  case ISD::MUL: {
-    SDValue N1 = Op.getOperand(1);
-
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  }
-}
-
-//! Lower byte immediate operations for v16i8 vectors:
-static SDValue
-LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
-  SDValue ConstVec;
-  SDValue Arg;
-  EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
-
-  ConstVec = Op.getOperand(0);
-  Arg = Op.getOperand(1);
-  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
-    if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
-      ConstVec = ConstVec.getOperand(0);
-    } else {
-      ConstVec = Op.getOperand(1);
-      Arg = Op.getOperand(0);
-      if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
-        ConstVec = ConstVec.getOperand(0);
-      }
-    }
-  }
-
-  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
-    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
-    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
-
-    APInt APSplatBits, APSplatUndef;
-    unsigned SplatBitSize;
-    bool HasAnyUndefs;
-    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
-
-    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                              HasAnyUndefs, minSplatBits)
-        && minSplatBits <= SplatBitSize) {
-      uint64_t SplatBits = APSplatBits.getZExtValue();
-      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
-
-      SmallVector<SDValue, 16> tcVec;
-      tcVec.assign(16, tc);
-      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
-                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
-    }
-  }
-
-  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
-  // lowered.  Return the operation, rather than a null SDValue.
-  return Op;
-}
-
-//! Custom lowering for CTPOP (count population)
-/*!
-  Custom lowering code that counts the number ones in the input
-  operand. SPU has such an instruction, but it counts the number of
-  ones per byte, which then have to be accumulated.
-*/
-static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
-                               VT, (128 / VT.getSizeInBits()));
-  DebugLoc dl = Op.getDebugLoc();
-
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Invalid value type!");
-  case MVT::i8: {
-    SDValue N = Op.getOperand(0);
-    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
-
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
-  }
-
-  case MVT::i16: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
-    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
-
-    SDValue N = Op.getOperand(0);
-    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
-    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
-    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
-
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
-    // CNTB_result becomes the chain to which all of the virtual registers
-    // CNTB_reg, SUM1_reg become associated:
-    SDValue CNTB_result =
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
-
-    SDValue CNTB_rescopy =
-      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
-    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
-
-    return DAG.getNode(ISD::AND, dl, MVT::i16,
-                       DAG.getNode(ISD::ADD, dl, MVT::i16,
-                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
-                                               Tmp1, Shift1),
-                                   Tmp1),
-                       Mask0);
-  }
-
-  case MVT::i32: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
-    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-
-    SDValue N = Op.getOperand(0);
-    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
-    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
-    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
-    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
-
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
-    // CNTB_result becomes the chain to which all of the virtual registers
-    // CNTB_reg, SUM1_reg become associated:
-    SDValue CNTB_result =
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
-
-    SDValue CNTB_rescopy =
-      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
-    SDValue Comp1 =
-      DAG.getNode(ISD::SRL, dl, MVT::i32,
-                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
-                  Shift1);
-
-    SDValue Sum1 =
-      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
-                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
-
-    SDValue Sum1_rescopy =
-      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
-
-    SDValue Comp2 =
-      DAG.getNode(ISD::SRL, dl, MVT::i32,
-                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
-                  Shift2);
-    SDValue Sum2 =
-      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
-                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
-
-    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
-  }
-
-  case MVT::i64:
-    break;
-  }
-
-  return SDValue();
-}
-
-//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
-/*!
- f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
- All conversions to i64 are expanded to a libcall.
- */
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                              const SPUTargetLowering &TLI) {
-  EVT OpVT = Op.getValueType();
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
-      || OpVT == MVT::i64) {
-    // Convert f32 / f64 to i32 / i64 via libcall.
-    RTLIB::Libcall LC =
-            (Op.getOpcode() == ISD::FP_TO_SINT)
-             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
-             : RTLIB::getFPTOUINT(Op0VT, OpVT);
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
-    SDValue Dummy;
-    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
-  }
-
-  return Op;
-}
-
-//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
-/*!
- i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
- All conversions from i64 are expanded to a libcall.
- */
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
-                              const SPUTargetLowering &TLI) {
-  EVT OpVT = Op.getValueType();
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
-      || Op0VT == MVT::i64) {
-    // Convert i32, i64 to f64 via libcall:
-    RTLIB::Libcall LC =
-            (Op.getOpcode() == ISD::SINT_TO_FP)
-             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
-             : RTLIB::getUINTTOFP(Op0VT, OpVT);
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
-    SDValue Dummy;
-    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
-  }
-
-  return Op;
-}
-
-//! Lower ISD::SETCC
-/*!
- This handles MVT::f64 (double floating point) condition lowering
- */
-static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
-                          const TargetLowering &TLI) {
-  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
-  DebugLoc dl = Op.getDebugLoc();
-  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
-
-  SDValue lhs = Op.getOperand(0);
-  SDValue rhs = Op.getOperand(1);
-  EVT lhsVT = lhs.getValueType();
-  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
-
-  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
-  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-  EVT IntVT(MVT::i64);
-
-  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
-  // selected to a NOP:
-  SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
-  SDValue lhsHi32 =
-          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
-                      DAG.getNode(ISD::SRL, dl, IntVT,
-                                  i64lhs, DAG.getConstant(32, MVT::i32)));
-  SDValue lhsHi32abs =
-          DAG.getNode(ISD::AND, dl, MVT::i32,
-                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
-  SDValue lhsLo32 =
-          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
-
-  // SETO and SETUO only use the lhs operand:
-  if (CC->get() == ISD::SETO) {
-    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
-    // SETUO
-    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-    return DAG.getNode(ISD::XOR, dl, ccResultVT,
-                       DAG.getSetCC(dl, ccResultVT,
-                                    lhs, DAG.getConstantFP(0.0, lhsVT),
-                                    ISD::SETUO),
-                       DAG.getConstant(ccResultAllOnes, ccResultVT));
-  } else if (CC->get() == ISD::SETUO) {
-    // Evaluates to true if Op0 is [SQ]NaN
-    return DAG.getNode(ISD::AND, dl, ccResultVT,
-                       DAG.getSetCC(dl, ccResultVT,
-                                    lhsHi32abs,
-                                    DAG.getConstant(0x7ff00000, MVT::i32),
-                                    ISD::SETGE),
-                       DAG.getSetCC(dl, ccResultVT,
-                                    lhsLo32,
-                                    DAG.getConstant(0, MVT::i32),
-                                    ISD::SETGT));
-  }
-
-  SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
-  SDValue rhsHi32 =
-          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
-                      DAG.getNode(ISD::SRL, dl, IntVT,
-                                  i64rhs, DAG.getConstant(32, MVT::i32)));
-
-  // If a value is negative, subtract from the sign magnitude constant:
-  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
-
-  // Convert the sign-magnitude representation into 2's complement:
-  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
-                                      lhsHi32, DAG.getConstant(31, MVT::i32));
-  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
-  SDValue lhsSelect =
-          DAG.getNode(ISD::SELECT, dl, IntVT,
-                      lhsSelectMask, lhsSignMag2TC, i64lhs);
-
-  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
-                                      rhsHi32, DAG.getConstant(31, MVT::i32));
-  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
-  SDValue rhsSelect =
-          DAG.getNode(ISD::SELECT, dl, IntVT,
-                      rhsSelectMask, rhsSignMag2TC, i64rhs);
-
-  unsigned compareOp;
-
-  switch (CC->get()) {
-  case ISD::SETOEQ:
-  case ISD::SETUEQ:
-    compareOp = ISD::SETEQ; break;
-  case ISD::SETOGT:
-  case ISD::SETUGT:
-    compareOp = ISD::SETGT; break;
-  case ISD::SETOGE:
-  case ISD::SETUGE:
-    compareOp = ISD::SETGE; break;
-  case ISD::SETOLT:
-  case ISD::SETULT:
-    compareOp = ISD::SETLT; break;
-  case ISD::SETOLE:
-  case ISD::SETULE:
-    compareOp = ISD::SETLE; break;
-  case ISD::SETUNE:
-  case ISD::SETONE:
-    compareOp = ISD::SETNE; break;
-  default:
-    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
-  }
-
-  SDValue result =
-          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
-                       (ISD::CondCode) compareOp);
-
-  if ((CC->get() & 0x8) == 0) {
-    // Ordered comparison:
-    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
-                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
-                                  ISD::SETO);
-    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
-                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
-                                  ISD::SETO);
-    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
-
-    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
-  }
-
-  return result;
-}
-
-//! Lower ISD::SELECT_CC
-/*!
-  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
-  SELB instruction.
-
-  \note Need to revisit this in the future: if the code path through the true
-  and false value computations is longer than the latency of a branch (6
-  cycles), then it would be more advantageous to branch and insert a new basic
-  block and branch on the condition. However, this code does not make that
-  assumption, given the simplisitc uses so far.
- */
-
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
-                              const TargetLowering &TLI) {
-  EVT VT = Op.getValueType();
-  SDValue lhs = Op.getOperand(0);
-  SDValue rhs = Op.getOperand(1);
-  SDValue trueval = Op.getOperand(2);
-  SDValue falseval = Op.getOperand(3);
-  SDValue condition = Op.getOperand(4);
-  DebugLoc dl = Op.getDebugLoc();
-
-  // NOTE: SELB's arguments: $rA, $rB, $mask
-  //
-  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
-  // where bits in $mask are 1. CCond will be inverted, having 1s where the
-  // condition was true and 0s where the condition was false. Hence, the
-  // arguments to SELB get reversed.
-
-  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
-  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
-  // with another "cannot select select_cc" assert:
-
-  SDValue compare = DAG.getNode(ISD::SETCC, dl,
-                                TLI.getSetCCResultType(Op.getValueType()),
-                                lhs, rhs, condition);
-  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
-}
-
-//! Custom lower ISD::TRUNCATE
-static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
-{
-  // Type to truncate to
-  EVT VT = Op.getValueType();
-  MVT simpleVT = VT.getSimpleVT();
-  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
-                               VT, (128 / VT.getSizeInBits()));
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Type to truncate from
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
-    // Create shuffle mask, least significant doubleword of quadword
-    unsigned maskHigh = 0x08090a0b;
-    unsigned maskLow = 0x0c0d0e0f;
-    // Use a shuffle to perform the truncation
-    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                   DAG.getConstant(maskHigh, MVT::i32),
-                                   DAG.getConstant(maskLow, MVT::i32),
-                                   DAG.getConstant(maskHigh, MVT::i32),
-                                   DAG.getConstant(maskLow, MVT::i32));
-
-    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
-                                       Op0, Op0, shufMask);
-
-    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
-  }
-
-  return SDValue();             // Leave the truncate unmolested
-}
-
-/*!
- * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
- * algorithm is to duplicate the sign bit using rotmai to generate at
- * least one byte full of sign bits. Then propagate the "sign-byte" into
- * the leftmost words and the i64/i32 into the rightmost words using shufb.
- *
- * @param Op The sext operand
- * @param DAG The current DAG
- * @return The SDValue with the entire instruction sequence
- */
-static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
-{
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Type to extend to
-  MVT OpVT = Op.getValueType().getSimpleVT();
-
-  // Type to extend from
-  SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType().getSimpleVT();
-
-  // extend i8 & i16 via i32
-  if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
-    Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
-    Op0VT = MVT::i32;
-  }
-
-  // The type to extend to needs to be a i128 and
-  // the type to extend from needs to be i64 or i32.
-  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
-          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
-  (void)OpVT;
-
-  // Create shuffle mask
-  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
-  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
-  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
-  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                 DAG.getConstant(mask1, MVT::i32),
-                                 DAG.getConstant(mask1, MVT::i32),
-                                 DAG.getConstant(mask2, MVT::i32),
-                                 DAG.getConstant(mask3, MVT::i32));
-
-  // Word wise arithmetic right shift to generate at least one byte
-  // that contains sign bits.
-  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
-  SDValue sraVal = DAG.getNode(ISD::SRA,
-                 dl,
-                 mvt,
-                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
-                 DAG.getConstant(31, MVT::i32));
-
-  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
-  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
-                                        dl, Op0VT, Op0,
-                                        DAG.getTargetConstant(
-                                                  SPU::GPRCRegClass.getID(),
-                                                  MVT::i32)), 0);
-  // Shuffle bytes - Copy the sign bits into the upper 64 bits
-  // and the input value into the lower 64 bits.
-  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
-        extended, sraVal, shufMask);
-  return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
-}
-
-//! Custom (target-specific) lowering entry point
-/*!
-  This is where LLVM's DAG selection process calls to do target-specific
-  lowering of nodes.
- */
-SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
-  unsigned Opc = (unsigned) Op.getOpcode();
-  EVT VT = Op.getValueType();
-
-  switch (Opc) {
-  default: {
-#ifndef NDEBUG
-    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
-    errs() << "Op.getOpcode() = " << Opc << "\n";
-    errs() << "*Op.getNode():\n";
-    Op.getNode()->dump();
-#endif
-    llvm_unreachable(0);
-  }
-  case ISD::LOAD:
-  case ISD::EXTLOAD:
-  case ISD::SEXTLOAD:
-  case ISD::ZEXTLOAD:
-    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::STORE:
-    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::ConstantPool:
-    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::GlobalAddress:
-    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::JumpTable:
-    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::ConstantFP:
-    return LowerConstantFP(Op, DAG);
-
-  // i8, i64 math ops:
-  case ISD::ADD:
-  case ISD::SUB:
-  case ISD::ROTR:
-  case ISD::ROTL:
-  case ISD::SRL:
-  case ISD::SHL:
-  case ISD::SRA: {
-    if (VT == MVT::i8)
-      return LowerI8Math(Op, DAG, Opc, *this);
-    break;
-  }
-
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT:
-    return LowerFP_TO_INT(Op, DAG, *this);
-
-  case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
-    return LowerINT_TO_FP(Op, DAG, *this);
-
-  // Vector-related lowering.
-  case ISD::BUILD_VECTOR:
-    return LowerBUILD_VECTOR(Op, DAG);
-  case ISD::SCALAR_TO_VECTOR:
-    return LowerSCALAR_TO_VECTOR(Op, DAG);
-  case ISD::VECTOR_SHUFFLE:
-    return LowerVECTOR_SHUFFLE(Op, DAG);
-  case ISD::EXTRACT_VECTOR_ELT:
-    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
-  case ISD::INSERT_VECTOR_ELT:
-    return LowerINSERT_VECTOR_ELT(Op, DAG);
-
-  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
-  case ISD::AND:
-  case ISD::OR:
-  case ISD::XOR:
-    return LowerByteImmed(Op, DAG);
-
-  // Vector and i8 multiply:
-  case ISD::MUL:
-    if (VT == MVT::i8)
-      return LowerI8Math(Op, DAG, Opc, *this);
-
-  case ISD::CTPOP:
-    return LowerCTPOP(Op, DAG);
-
-  case ISD::SELECT_CC:
-    return LowerSELECT_CC(Op, DAG, *this);
-
-  case ISD::SETCC:
-    return LowerSETCC(Op, DAG, *this);
-
-  case ISD::TRUNCATE:
-    return LowerTRUNCATE(Op, DAG);
-
-  case ISD::SIGN_EXTEND:
-    return LowerSIGN_EXTEND(Op, DAG);
-  }
-
-  return SDValue();
-}
-
-void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
-                                           SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) const
-{
-#if 0
-  unsigned Opc = (unsigned) N->getOpcode();
-  EVT OpVT = N->getValueType(0);
-
-  switch (Opc) {
-  default: {
-    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
-    errs() << "Op.getOpcode() = " << Opc << "\n";
-    errs() << "*Op.getNode():\n";
-    N->dump();
-    abort();
-    /*NOTREACHED*/
-  }
-  }
-#endif
-
-  /* Otherwise, return unchanged */
-}
-
-//===----------------------------------------------------------------------===//
-// Target Optimization Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
-{
-#if 0
-  TargetMachine &TM = getTargetMachine();
-#endif
-  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
-  EVT NodeVT = N->getValueType(0);      // The node's value type
-  EVT Op0VT = Op0.getValueType();       // The first operand's result
-  SDValue Result;                       // Initially, empty result
-  DebugLoc dl = N->getDebugLoc();
-
-  switch (N->getOpcode()) {
-  default: break;
-  case ISD::ADD: {
-    SDValue Op1 = N->getOperand(1);
-
-    if (Op0.getOpcode() == SPUISD::IndirectAddr
-        || Op1.getOpcode() == SPUISD::IndirectAddr) {
-      // Normalize the operands to reduce repeated code
-      SDValue IndirectArg = Op0, AddArg = Op1;
-
-      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
-        IndirectArg = Op1;
-        AddArg = Op0;
-      }
-
-      if (isa<ConstantSDNode>(AddArg)) {
-        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
-        SDValue IndOp1 = IndirectArg.getOperand(1);
-
-        if (CN0->isNullValue()) {
-          // (add (SPUindirect <arg>, <arg>), 0) ->
-          // (SPUindirect <arg>, <arg>)
-
-#if !defined(NDEBUG)
-          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            errs() << "\n"
-                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
-                 << "With:    (SPUindirect <arg>, <arg>)\n";
-          }
-#endif
-
-          return IndirectArg;
-        } else if (isa<ConstantSDNode>(IndOp1)) {
-          // (add (SPUindirect <arg>, <const>), <const>) ->
-          // (SPUindirect <arg>, <const + const>)
-          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
-          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
-          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
-
-#if !defined(NDEBUG)
-          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            errs() << "\n"
-                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
-                 << "), " << CN0->getSExtValue() << ")\n"
-                 << "With:    (SPUindirect <arg>, "
-                 << combinedConst << ")\n";
-          }
-#endif
-
-          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
-                             IndirectArg, combinedValue);
-        }
-      }
-    }
-    break;
-  }
-  case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND: {
-    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
-      // (any_extend (SPUextract_elt0 <arg>)) ->
-      // (SPUextract_elt0 <arg>)
-      // Types must match, however...
-#if !defined(NDEBUG)
-      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        errs() << "\nReplace: ";
-        N->dump(&DAG);
-        errs() << "\nWith:    ";
-        Op0.getNode()->dump(&DAG);
-        errs() << "\n";
-      }
-#endif
-
-      return Op0;
-    }
-    break;
-  }
-  case SPUISD::IndirectAddr: {
-    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
-      if (CN != 0 && CN->isNullValue()) {
-        // (SPUindirect (SPUaform <addr>, 0), 0) ->
-        // (SPUaform <addr>, 0)
-
-        DEBUG(errs() << "Replace: ");
-        DEBUG(N->dump(&DAG));
-        DEBUG(errs() << "\nWith:    ");
-        DEBUG(Op0.getNode()->dump(&DAG));
-        DEBUG(errs() << "\n");
-
-        return Op0;
-      }
-    } else if (Op0.getOpcode() == ISD::ADD) {
-      SDValue Op1 = N->getOperand(1);
-      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
-        // (SPUindirect (add <arg>, <arg>), 0) ->
-        // (SPUindirect <arg>, <arg>)
-        if (CN1->isNullValue()) {
-
-#if !defined(NDEBUG)
-          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            errs() << "\n"
-                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
-                 << "With:    (SPUindirect <arg>, <arg>)\n";
-          }
-#endif
-
-          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
-                             Op0.getOperand(0), Op0.getOperand(1));
-        }
-      }
-    }
-    break;
-  }
-  case SPUISD::SHL_BITS:
-  case SPUISD::SHL_BYTES:
-  case SPUISD::ROTBYTES_LEFT: {
-    SDValue Op1 = N->getOperand(1);
-
-    // Kill degenerate vector shifts:
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
-      if (CN->isNullValue()) {
-        Result = Op0;
-      }
-    }
-    break;
-  }
-  case SPUISD::PREFSLOT2VEC: {
-    switch (Op0.getOpcode()) {
-    default:
-      break;
-    case ISD::ANY_EXTEND:
-    case ISD::ZERO_EXTEND:
-    case ISD::SIGN_EXTEND: {
-      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
-      // <arg>
-      // but only if the SPUprefslot2vec and <arg> types match.
-      SDValue Op00 = Op0.getOperand(0);
-      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
-        SDValue Op000 = Op00.getOperand(0);
-        if (Op000.getValueType() == NodeVT) {
-          Result = Op000;
-        }
-      }
-      break;
-    }
-    case SPUISD::VEC2PREFSLOT: {
-      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
-      // <arg>
-      Result = Op0.getOperand(0);
-      break;
-    }
-    }
-    break;
-  }
-  }
-
-  // Otherwise, return unchanged.
-#ifndef NDEBUG
-  if (Result.getNode()) {
-    DEBUG(errs() << "\nReplace.SPU: ");
-    DEBUG(N->dump(&DAG));
-    DEBUG(errs() << "\nWith:        ");
-    DEBUG(Result.getNode()->dump(&DAG));
-    DEBUG(errs() << "\n");
-  }
-#endif
-
-  return Result;
-}
-
-//===----------------------------------------------------------------------===//
-// Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-SPUTargetLowering::ConstraintType
-SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
-  if (ConstraintLetter.size() == 1) {
-    switch (ConstraintLetter[0]) {
-    default: break;
-    case 'b':
-    case 'r':
-    case 'f':
-    case 'v':
-    case 'y':
-      return C_RegisterClass;
-    }
-  }
-  return TargetLowering::getConstraintType(ConstraintLetter);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-SPUTargetLowering::getSingleConstraintMatchWeight(
-    AsmOperandInfo &info, const char *constraint) const {
-  ConstraintWeight weight = CW_Invalid;
-  Value *CallOperandVal = info.CallOperandVal;
-    // If we don't have a value, we can't do a match,
-    // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
-    return CW_Default;
-  // Look at the constraint type.
-  switch (*constraint) {
-  default:
-    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
-    //FIXME: Seems like the supported constraint letters were just copied
-    // from PPC, as the following doesn't correspond to the GCC docs.
-    // I'm leaving it so until someone adds the corresponding lowering support.
-  case 'b':
-  case 'r':
-  case 'f':
-  case 'd':
-  case 'v':
-  case 'y':
-    weight = CW_Register;
-    break;
-  }
-  return weight;
-}
-
-std::pair<unsigned, const TargetRegisterClass*>
-SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                EVT VT) const
-{
-  if (Constraint.size() == 1) {
-    // GCC RS6000 Constraint Letters
-    switch (Constraint[0]) {
-    case 'b':   // R1-R31
-    case 'r':   // R0-R31
-      if (VT == MVT::i64)
-        return std::make_pair(0U, &SPU::R64CRegClass);
-      return std::make_pair(0U, &SPU::R32CRegClass);
-    case 'f':
-      if (VT == MVT::f32)
-        return std::make_pair(0U, &SPU::R32FPRegClass);
-      if (VT == MVT::f64)
-        return std::make_pair(0U, &SPU::R64FPRegClass);
-      break;
-    case 'v':
-      return std::make_pair(0U, &SPU::GPRCRegClass);
-    }
-  }
-
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//! Compute used/known bits for a SPU operand
-void
-SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                  APInt &KnownZero,
-                                                  APInt &KnownOne,
-                                                  const SelectionDAG &DAG,
-                                                  unsigned Depth ) const {
-#if 0
-  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
-
-  switch (Op.getOpcode()) {
-  default:
-    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
-    break;
-  case CALL:
-  case SHUFB:
-  case SHUFFLE_MASK:
-  case CNTB:
-  case SPUISD::PREFSLOT2VEC:
-  case SPUISD::LDRESULT:
-  case SPUISD::VEC2PREFSLOT:
-  case SPUISD::SHLQUAD_L_BITS:
-  case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_ROTL:
-  case SPUISD::VEC_ROTR:
-  case SPUISD::ROTBYTES_LEFT:
-  case SPUISD::SELECT_MASK:
-  case SPUISD::SELB:
-  }
-#endif
-}
-
-unsigned
-SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
-                                                   unsigned Depth) const {
-  switch (Op.getOpcode()) {
-  default:
-    return 1;
-
-  case ISD::SETCC: {
-    EVT VT = Op.getValueType();
-
-    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
-      VT = MVT::i32;
-    }
-    return VT.getSizeInBits();
-  }
-  }
-}
-
-// LowerAsmOperandForConstraint
-void
-SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
-                                                std::string &Constraint,
-                                                std::vector<SDValue> &Ops,
-                                                SelectionDAG &DAG) const {
-  // Default, for the time being, to the base class handler
-  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
-}
-
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode.
-bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
-                                                Type *Ty) const {
-  // SPU's addresses are 256K:
-  return (V > -(1 << 18) && V < (1 << 18) - 1);
-}
-
-bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
-  return false;
-}
-
-bool
-SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  // The SPU target isn't yet aware of offsets.
-  return false;
-}
-
-// can we compare to Imm without writing it into a register?
-bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
-  //ceqi, cgti, etc. all take s10 operand
-  return isInt<10>(Imm);
-}
-
-bool
-SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                         Type * ) const{
-
-  // A-form: 18bit absolute address.
-  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
-    return true;
-
-  // D-form: reg + 14bit offset
-  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
-    return true;
-
-  // X-form: reg+reg
-  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
-    return true;
-
-  return false;
-}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
deleted file mode 100644
index 9f1599fa6fed..000000000000
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ /dev/null
@@ -1,178 +0,0 @@
-//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Cell SPU uses to lower LLVM code into
-// a selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_ISELLOWERING_H
-#define SPU_ISELLOWERING_H
-
-#include "SPU.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-
-namespace llvm {
-  namespace SPUISD {
-    enum NodeType {
-      // Start the numbering where the builting ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-      // Pseudo instructions:
-      RET_FLAG,                 ///< Return with flag, matched by bi instruction
-
-      Hi,                       ///< High address component (upper 16)
-      Lo,                       ///< Low address component (lower 16)
-      PCRelAddr,                ///< Program counter relative address
-      AFormAddr,                ///< A-form address (local store)
-      IndirectAddr,             ///< D-Form "imm($r)" and X-form "$r($r)"
-
-      LDRESULT,                 ///< Load result (value, chain)
-      CALL,                     ///< CALL instruction
-      SHUFB,                    ///< Vector shuffle (permute)
-      SHUFFLE_MASK,             ///< Shuffle mask
-      CNTB,                     ///< Count leading ones in bytes
-      PREFSLOT2VEC,             ///< Promote scalar->vector
-      VEC2PREFSLOT,             ///< Extract element 0
-      SHL_BITS,                 ///< Shift quad left, by bits
-      SHL_BYTES,                ///< Shift quad left, by bytes
-      SRL_BYTES,                ///< Shift quad right, by bytes. Insert zeros.
-      VEC_ROTL,                 ///< Vector rotate left
-      VEC_ROTR,                 ///< Vector rotate right
-      ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI)
-      ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count
-      SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
-      SELB,                     ///< Select bits -> (b & mask) | (a & ~mask)
-      // Markers: These aren't used to generate target-dependent nodes, but
-      // are used during instruction selection.
-      ADD64_MARKER,             ///< i64 addition marker
-      SUB64_MARKER,             ///< i64 subtraction marker
-      MUL64_MARKER,             ///< i64 multiply marker
-      LAST_SPUISD               ///< Last user-defined instruction
-    };
-  }
-
-  //! Utility functions specific to CellSPU:
-  namespace SPU {
-    SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                             EVT ValueType);
-    SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                             EVT ValueType);
-    SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                             EVT ValueType);
-    SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                            EVT ValueType);
-    SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType);
-    SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
-    SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
-
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
-                              const SPUTargetMachine &TM);
-    //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
-    SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
-                             DebugLoc dl);
-  }
-
-  class SPUTargetMachine;            // forward dec'l.
-
-  class SPUTargetLowering :
-    public TargetLowering
-  {
-    SPUTargetMachine &SPUTM;
-
-  public:
-    //! The venerable constructor
-    /*!
-     This is where the CellSPU backend sets operation handling (i.e., legal,
-     custom, expand or promote.)
-     */
-    SPUTargetLowering(SPUTargetMachine &TM);
-
-    //! Get the target machine
-    SPUTargetMachine &getSPUTargetMachine() {
-      return SPUTM;
-    }
-
-    /// getTargetNodeName() - This method returns the name of a target specific
-    /// DAG node.
-    virtual const char *getTargetNodeName(unsigned Opcode) const;
-
-    /// getSetCCResultType - Return the ValueType for ISD::SETCC
-    virtual EVT getSetCCResultType(EVT VT) const;
-
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
-
-    //! Custom lowering hooks
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
-    //! Custom lowering hook for nodes with illegal result types.
-    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG) const;
-
-    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
-    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                APInt &KnownZero,
-                                                APInt &KnownOne,
-                                                const SelectionDAG &DAG,
-                                                unsigned Depth = 0) const;
-
-    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
-                                                   unsigned Depth = 0) const;
-
-    ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
-
-    /// Examine constraint string and operand type and determine a weight value.
-    /// The operand object must already have been set up with the operand type.
-    ConstraintWeight getSingleConstraintMatchWeight(
-      AsmOperandInfo &info, const char *constraint) const;
-
-    std::pair<unsigned, const TargetRegisterClass*>
-      getRegForInlineAsmConstraint(const std::string &Constraint,
-                                   EVT VT) const;
-
-    void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
-                                      std::vector<SDValue> &Ops,
-                                      SelectionDAG &DAG) const;
-
-    /// isLegalAddressImmediate - Return true if the integer value can be used
-    /// as the offset of the target addressing mode.
-    virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
-    virtual bool isLegalAddressImmediate(GlobalValue *) const;
-
-    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
-    virtual SDValue
-      LowerFormalArguments(SDValue Chain,
-                           CallingConv::ID CallConv, bool isVarArg,
-                           const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerReturn(SDValue Chain,
-                  CallingConv::ID CallConv, bool isVarArg,
-                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
-
-    virtual bool isLegalICmpImmediate(int64_t Imm) const;
-
-    virtual bool isLegalAddressingMode(const AddrMode &AM,
-                                       Type *Ty) const;
-  };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
deleted file mode 100644
index b495537fc2c8..000000000000
--- a/lib/Target/CellSPU/SPUInstrBuilder.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes functions that may be used with BuildMI from the
-// MachineInstrBuilder.h file to simplify generating frame and constant pool
-// references.
-//
-// For reference, the order of operands for memory references is:
-// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
-// Displacement.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_INSTRBUILDER_H
-#define SPU_INSTRBUILDER_H
-
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-
-namespace llvm {
-
-/// addFrameReference - This function is used to add a reference to the base of
-/// an abstract object on the stack frame of the current function.  This
-/// reference has base register as the FrameIndex offset until it is resolved.
-/// This allows a constant offset to be specified as well...
-///
-inline const MachineInstrBuilder&
-addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
-                  bool mem = true) {
-  if (mem)
-    return MIB.addImm(Offset).addFrameIndex(FI);
-  else
-    return MIB.addFrameIndex(FI).addImm(Offset);
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
deleted file mode 100644
index cd3f42214345..000000000000
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ /dev/null
@@ -1,320 +0,0 @@
-//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//
-// Cell SPU instruction formats. Note that these are notationally similar to
-// PowerPC, like "A-Form". But the sizes of operands and fields differ.
-
-// This was kiped from the PPC instruction formats (seemed like a good idea...)
-
-class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
-        : Instruction {
-  field bits<32> Inst;
-
-  let Namespace = "SPU";
-  let OutOperandList = OOL;
-  let InOperandList = IOL;
-  let AsmString = asmstr;
-  let Itinerary = itin;
-}
-
-// RR Format
-class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, 
-              InstrItinClass itin, list<dag> pattern>
-         : SPUInstr<OOL, IOL, asmstr, itin> {
-  bits<7> RA;
-  bits<7> RB;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-10} = opcode;
-  let Inst{11-17} = RB;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RT;
-}
-
-let RB = 0 in {
-  // RR Format, where RB is zeroed (dont care):
-  class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr, 
-                 InstrItinClass itin, list<dag> pattern>
-           : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
-  { }
-
-  let RA = 0 in {
-    // RR Format, where RA and RB are zeroed (dont care):
-    // Used for reads from status control registers (see FPSCRRr32)
-    class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
-                   InstrItinClass itin, list<dag> pattern>
-             : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
-    { }
-  }
-}
-
-let RT = 0 in {
-  // RR Format, where RT is zeroed (don't care), or as the instruction handbook
-  // says, "RT is a false target." Used in "Halt if" instructions
-  class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
-                 InstrItinClass itin, list<dag> pattern>
-      : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
-  { }
-}
-
-// RRR Format
-class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
-              InstrItinClass itin, list<dag> pattern>
-        : SPUInstr<OOL, IOL, asmstr, itin>
-{
-  bits<7> RA;
-  bits<7> RB;
-  bits<7> RC;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-3} = opcode;
-  let Inst{4-10} = RT;
-  let Inst{11-17} = RB;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RC;
-}
-
-// RI7 Format
-class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
-              InstrItinClass itin, list<dag> pattern>
-        : SPUInstr<OOL, IOL, asmstr, itin>
-{
-  bits<7> i7;
-  bits<7> RA;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-10} = opcode;
-  let Inst{11-17} = i7;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RT;
-}
-
-// CVTIntFp Format
-class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
-                   InstrItinClass itin, list<dag> pattern>
-        : SPUInstr<OOL, IOL, asmstr, itin>
-{
-  bits<7> RA;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-9} = opcode;
-  let Inst{10-17} = 0;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RT;
-}
-
-let RA = 0 in {
-  class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
-           : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
-  { }
-
-  let RT = 0 in {
-    // Branch instruction format (without D/E flag settings)
-    class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
-               InstrItinClass itin, list<dag> pattern>
-          : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
-    { }
-
-    class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
-             : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
-                      pattern>
-    { }
-
-    let RB = 0 in {
-      // Return instruction (bi, branch indirect), RA is zero (LR):
-      class RETForm<string asmstr, list<dag> pattern>
-             : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
-                      pattern>
-      { }
-    }
-  }
-}
-
-// Branch indirect external data forms:
-class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
-         : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
-{
-  bits<7> Rcalldest;
-
-  let Pattern = pattern;
-
-  let Inst{0-10} = 0b11010101100;
-  let Inst{11} = 0;
-  let Inst{12-13} = DE_flag;
-  let Inst{14-17} = 0b0000;
-  let Inst{18-24} = Rcalldest;
-  let Inst{25-31} = 0b0000000;
-}
-
-// RI10 Format
-class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
-              InstrItinClass itin, list<dag> pattern>
-        : SPUInstr<OOL, IOL, asmstr, itin>
-{
-  bits<10> i10;
-  bits<7> RA;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-7} = opcode;
-  let Inst{8-17} = i10;
-  let Inst{18-24} = RA;
-  let Inst{25-31} = RT;
-}
-
-// RI10 Format, where the constant is zero (or effectively ignored by the
-// SPU)
-let i10 = 0 in {
-  class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
-                   InstrItinClass itin, list<dag> pattern>
-          : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
-  { }
-}
-
-// RI10 Format, where RT is ignored.
-// This format is used primarily by the Halt If ... Immediate set of
-// instructions
-let RT = 0 in {
-  class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
-                   InstrItinClass itin, list<dag> pattern>
-        : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
-  { }
-}
-
-// RI16 Format
-class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
-              InstrItinClass itin, list<dag> pattern>
-        : SPUInstr<OOL, IOL, asmstr, itin>
-{
-  bits<16> i16;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-8} = opcode;
-  let Inst{9-24} = i16;
-  let Inst{25-31} = RT;
-}
-
-// Specialized version of the RI16 Format for unconditional branch relative and
-// branch absolute, branch and set link. Note that for branch and set link, the
-// link register doesn't have to be $lr, but this is actually hard coded into
-// the instruction pattern.
-
-let RT = 0 in {
-  class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
-                     list<dag> pattern>
-    : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
-  { }
-
-  class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
-                      list<dag> pattern>
-        : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
-  { }
-}
-
-//===----------------------------------------------------------------------===//
-// Specialized versions of RI16:
-//===----------------------------------------------------------------------===//
-
-// RI18 Format
-class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
-              InstrItinClass itin, list<dag> pattern>
-        : SPUInstr<OOL, IOL, asmstr, itin>
-{
-  bits<18> i18;
-  bits<7> RT;
-
-  let Pattern = pattern;
-
-  let Inst{0-6} = opcode;
-  let Inst{7-24} = i18;
-  let Inst{25-31} = RT;
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction formats for intrinsics:
-//===----------------------------------------------------------------------===//
-
-// RI10 Format for v8i16 intrinsics
-class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
-                     Intrinsic IntID> :
-  RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
-           !strconcat(opc, " $rT, $rA, $val"), itin,
-           [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
-                                            i16ImmSExt10:$val))] >;
-
-class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
-                     Intrinsic IntID> :
-  RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
-           !strconcat(opc, " $rT, $rA, $val"), itin,
-           [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
-                                            i32ImmSExt10:$val))] >;
-
-// RR Format for v8i16 intrinsics
-class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
-                   Intrinsic IntID> :
-  RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-         !strconcat(opc, " $rT, $rA, $rB"), itin,
-         [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
-                                          (v8i16 VECREG:$rB)))] >;
-
-// RR Format for v4i32 intrinsics
-class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
-                   Intrinsic IntID> :
-  RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-         !strconcat(opc, " $rT, $rA, $rB"), itin,
-         [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
-                                          (v4i32 VECREG:$rB)))] >;
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions, like call frames:
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
-    : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
-  let OutOperandList = OOL;
-  let InOperandList = IOL;
-  let AsmString   = asmstr;
-  let Pattern = pattern;
-  let Inst{31-0} = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Branch hint formats
-//===----------------------------------------------------------------------===//
-// For hbrr and hbra
-class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
-        : Instruction {
-  field bits<32> Inst;
-  bits<16>i16;
-  bits<9>RO;
-
-  let Namespace = "SPU";
-  let InOperandList = IOL;
-  let OutOperandList = (outs); //no output
-  let AsmString = asmstr;
-  let Itinerary = BranchHints;
-
-  let Inst{0-6} = opcode;
-  let Inst{7-8} = RO{8-7};
-  let Inst{9-24} = i16;
-  let Inst{25-31} = RO{6-0};
-}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
deleted file mode 100644
index b25a6397ec3a..000000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ /dev/null
@@ -1,449 +0,0 @@
-//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell SPU implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUInstrInfo.h"
-#include "SPUInstrBuilder.h"
-#include "SPUTargetMachine.h"
-#include "SPUHazardRecognizers.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define GET_INSTRINFO_CTOR
-#include "SPUGenInstrInfo.inc"
-
-using namespace llvm;
-
-namespace {
-  //! Predicate for an unconditional branch instruction
-  inline bool isUncondBranch(const MachineInstr *I) {
-    unsigned opc = I->getOpcode();
-
-    return (opc == SPU::BR
-            || opc == SPU::BRA
-            || opc == SPU::BI);
-  }
-
-  //! Predicate for a conditional branch instruction
-  inline bool isCondBranch(const MachineInstr *I) {
-    unsigned opc = I->getOpcode();
-
-    return (opc == SPU::BRNZr32
-            || opc == SPU::BRNZv4i32
-            || opc == SPU::BRZr32
-            || opc == SPU::BRZv4i32
-            || opc == SPU::BRHNZr16
-            || opc == SPU::BRHNZv8i16
-            || opc == SPU::BRHZr16
-            || opc == SPU::BRHZv8i16);
-  }
-}
-
-SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
-  : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
-    TM(tm),
-    RI(*TM.getSubtargetImpl(), *this)
-{ /* NOP */ }
-
-/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
-/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
-  const TargetMachine *TM,
-  const ScheduleDAG *DAG) const {
-  const TargetInstrInfo *TII = TM->getInstrInfo();
-  assert(TII && "No InstrInfo?");
-  return new SPUHazardRecognizer(*TII);
-}
-
-unsigned
-SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                  int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case SPU::LQDv16i8:
-  case SPU::LQDv8i16:
-  case SPU::LQDv4i32:
-  case SPU::LQDv4f32:
-  case SPU::LQDv2f64:
-  case SPU::LQDr128:
-  case SPU::LQDr64:
-  case SPU::LQDr32:
-  case SPU::LQDr16: {
-    const MachineOperand MOp1 = MI->getOperand(1);
-    const MachineOperand MOp2 = MI->getOperand(2);
-    if (MOp1.isImm() && MOp2.isFI()) {
-      FrameIndex = MOp2.getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  }
-  return 0;
-}
-
-unsigned
-SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                 int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case SPU::STQDv16i8:
-  case SPU::STQDv8i16:
-  case SPU::STQDv4i32:
-  case SPU::STQDv4f32:
-  case SPU::STQDv2f64:
-  case SPU::STQDr128:
-  case SPU::STQDr64:
-  case SPU::STQDr32:
-  case SPU::STQDr16:
-  case SPU::STQDr8: {
-    const MachineOperand MOp1 = MI->getOperand(1);
-    const MachineOperand MOp2 = MI->getOperand(2);
-    if (MOp1.isImm() && MOp2.isFI()) {
-      FrameIndex = MOp2.getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  }
-  return 0;
-}
-
-void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator I, DebugLoc DL,
-                               unsigned DestReg, unsigned SrcReg,
-                               bool KillSrc) const
-{
-  // We support cross register class moves for our aliases, such as R3 in any
-  // reg class to any other reg class containing R3.  This is required because
-  // we instruction select bitconvert i64 -> f64 as a noop for example, so our
-  // types have no specific meaning.
-
-  BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-void
-SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MI,
-                                  unsigned SrcReg, bool isKill, int FrameIdx,
-                                  const TargetRegisterClass *RC,
-                                  const TargetRegisterInfo *TRI) const {
-  unsigned opc;
-  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
-  if (RC == &SPU::GPRCRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128;
-  else if (RC == &SPU::R64CRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
-  else if (RC == &SPU::R64FPRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
-  else if (RC == &SPU::R32CRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
-  else if (RC == &SPU::R32FPRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
-  else if (RC == &SPU::R16CRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16;
-  else if (RC == &SPU::R8CRegClass)
-    opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8;
-  else if (RC == &SPU::VECREGRegClass)
-    opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8;
-  else
-    llvm_unreachable("Unknown regclass!");
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-  addFrameReference(BuildMI(MBB, MI, DL, get(opc))
-                    .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
-}
-
-void
-SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   unsigned DestReg, int FrameIdx,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const {
-  unsigned opc;
-  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
-  if (RC == &SPU::GPRCRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128;
-  else if (RC == &SPU::R64CRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
-  else if (RC == &SPU::R64FPRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
-  else if (RC == &SPU::R32CRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
-  else if (RC == &SPU::R32FPRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
-  else if (RC == &SPU::R16CRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16;
-  else if (RC == &SPU::R8CRegClass)
-    opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8;
-  else if (RC == &SPU::VECREGRegClass)
-    opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8;
-  else
-    llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-  addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
-}
-
-//! Branch analysis
-/*!
-  \note This code was kiped from PPC. There may be more branch analysis for
-  CellSPU than what's currently done here.
- */
-bool
-SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                            MachineBasicBlock *&FBB,
-                            SmallVectorImpl<MachineOperand> &Cond,
-                            bool AllowModify) const {
-  // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
-    return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
-  if (!isUnpredicatedTerminator(I))
-    return false;
-
-  // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
-
-  // If there is only one terminator instruction, process it.
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (isUncondBranch(LastInst)) {
-      // Check for jump tables
-      if (!LastInst->getOperand(0).isMBB())
-        return true;
-      TBB = LastInst->getOperand(0).getMBB();
-      return false;
-    } else if (isCondBranch(LastInst)) {
-      // Block ends with fall-through condbranch.
-      TBB = LastInst->getOperand(1).getMBB();
-      DEBUG(errs() << "Pushing LastInst:               ");
-      DEBUG(LastInst->dump());
-      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
-      Cond.push_back(LastInst->getOperand(0));
-      return false;
-    }
-    // Otherwise, don't know what this is.
-    return true;
-  }
-
-  // Get the instruction before it if it's a terminator.
-  MachineInstr *SecondLastInst = I;
-
-  // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
-    return true;
-
-  // If the block ends with a conditional and unconditional branch, handle it.
-  if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
-    TBB =  SecondLastInst->getOperand(1).getMBB();
-    DEBUG(errs() << "Pushing SecondLastInst:         ");
-    DEBUG(SecondLastInst->dump());
-    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
-    Cond.push_back(SecondLastInst->getOperand(0));
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-  // If the block ends with two unconditional branches, handle it.  The second
-  // one is not executed, so remove it.
-  if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return false;
-  }
-
-  // Otherwise, can't handle this.
-  return true;
-}
-
-// search MBB for branch hint labels and branch hit ops
-static void removeHBR( MachineBasicBlock &MBB) {
-  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
-    if (I->getOpcode() == SPU::HBRA ||
-        I->getOpcode() == SPU::HBR_LABEL){
-      I=MBB.erase(I);
-      if (I == MBB.end())
-        break;
-    }
-  }
-}
-
-unsigned
-SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  removeHBR(MBB);
-  if (I == MBB.begin())
-    return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
-  if (!isCondBranch(I) && !isUncondBranch(I))
-    return 0;
-
-  // Remove the first branch.
-  DEBUG(errs() << "Removing branch:                ");
-  DEBUG(I->dump());
-  I->eraseFromParent();
-  I = MBB.end();
-  if (I == MBB.begin())
-    return 1;
-
-  --I;
-  if (!(isCondBranch(I) || isUncondBranch(I)))
-    return 1;
-
-  // Remove the second branch.
-  DEBUG(errs() << "Removing second branch:         ");
-  DEBUG(I->dump());
-  I->eraseFromParent();
-  return 2;
-}
-
-/** Find the optimal position for a hint branch instruction in a basic block.
- * This should take into account:
- *   -the branch hint delays
- *   -congestion of the memory bus
- *   -dual-issue scheduling (i.e. avoid insertion of nops)
- * Current implementation is rather simplistic.
- */
-static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
-{
-   MachineBasicBlock::iterator J = MBB.end();
-   for( int i=0; i<8; i++) {
-     if( J == MBB.begin() ) return J;
-     J--;
-   }
-   return J;
-}
-
-unsigned
-SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                           MachineBasicBlock *FBB,
-                           const SmallVectorImpl<MachineOperand> &Cond,
-                           DebugLoc DL) const {
-  // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 2 || Cond.size() == 0) &&
-         "SPU branch conditions have two components!");
-
-  MachineInstrBuilder MIB;
-  //TODO: make a more accurate algorithm.
-  bool haveHBR = MBB.size()>8;
-
-  removeHBR(MBB);
-  MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
-  // Add a label just before the branch
-  if (haveHBR)
-    MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
-
-  // One-way branch.
-  if (FBB == 0) {
-    if (Cond.empty()) {
-      // Unconditional branch
-      MIB = BuildMI(&MBB, DL, get(SPU::BR));
-      MIB.addMBB(TBB);
-
-      DEBUG(errs() << "Inserted one-way uncond branch: ");
-      DEBUG((*MIB).dump());
-
-      // basic blocks have just one branch so it is safe to add the hint a its
-      if (haveHBR) {
-        MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
-        MIB.addSym(branchLabel);
-        MIB.addMBB(TBB);
-      }
-    } else {
-      // Conditional branch
-      MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
-      MIB.addReg(Cond[1].getReg()).addMBB(TBB);
-
-      if (haveHBR) {
-        MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
-        MIB.addSym(branchLabel);
-        MIB.addMBB(TBB);
-      }
-
-      DEBUG(errs() << "Inserted one-way cond branch:   ");
-      DEBUG((*MIB).dump());
-    }
-    return 1;
-  } else {
-    MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
-    MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
-
-    // Two-way Conditional Branch.
-    MIB.addReg(Cond[1].getReg()).addMBB(TBB);
-    MIB2.addMBB(FBB);
-
-    if (haveHBR) {
-      MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
-      MIB.addSym(branchLabel);
-      MIB.addMBB(FBB);
-    }
-
-    DEBUG(errs() << "Inserted conditional branch:    ");
-    DEBUG((*MIB).dump());
-    DEBUG(errs() << "part 2: ");
-    DEBUG((*MIB2).dump());
-   return 2;
-  }
-}
-
-//! Reverses a branch's condition, returning false on success.
-bool
-SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
-  const {
-  // Pretty brainless way of inverting the condition, but it works, considering
-  // there are only two conditions...
-  static struct {
-    unsigned Opc;               //! The incoming opcode
-    unsigned RevCondOpc;        //! The reversed condition opcode
-  } revconds[] = {
-    { SPU::BRNZr32, SPU::BRZr32 },
-    { SPU::BRNZv4i32, SPU::BRZv4i32 },
-    { SPU::BRZr32, SPU::BRNZr32 },
-    { SPU::BRZv4i32, SPU::BRNZv4i32 },
-    { SPU::BRHNZr16, SPU::BRHZr16 },
-    { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
-    { SPU::BRHZr16, SPU::BRHNZr16 },
-    { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
-  };
-
-  unsigned Opc = unsigned(Cond[0].getImm());
-  // Pretty dull mapping between the two conditions that SPU can generate:
-  for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
-    if (revconds[i].Opc == Opc) {
-      Cond[0].setImm(revconds[i].RevCondOpc);
-      return false;
-    }
-  }
-
-  return true;
-}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
deleted file mode 100644
index 85e5821aefa1..000000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the CellSPU implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_INSTRUCTIONINFO_H
-#define SPU_INSTRUCTIONINFO_H
-
-#include "SPU.h"
-#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "SPUGenInstrInfo.inc"
-
-namespace llvm {
-  //! Cell SPU instruction information class
-  class SPUInstrInfo : public SPUGenInstrInfo {
-    SPUTargetMachine &TM;
-    const SPURegisterInfo RI;
-  public:
-    explicit SPUInstrInfo(SPUTargetMachine &tm);
-
-    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-    /// such, whenever a client has an instance of instruction info, it should
-    /// always be able to get register info as well (through this method).
-    ///
-    virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
-
-    ScheduleHazardRecognizer *
-    CreateTargetHazardRecognizer(const TargetMachine *TM,
-                                 const ScheduleDAG *DAG) const;
-
-    unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                 int &FrameIndex) const;
-    unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                int &FrameIndex) const;
-
-    virtual void copyPhysReg(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator I, DebugLoc DL,
-                             unsigned DestReg, unsigned SrcReg,
-                             bool KillSrc) const;
-
-    //! Store a register to a stack slot, based on its register class.
-    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     unsigned SrcReg, bool isKill, int FrameIndex,
-                                     const TargetRegisterClass *RC,
-                                     const TargetRegisterInfo *TRI) const;
-
-    //! Load a register from a stack slot, based on its register class.
-    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator MBBI,
-                                      unsigned DestReg, int FrameIndex,
-                                      const TargetRegisterClass *RC,
-                                      const TargetRegisterInfo *TRI) const;
-
-    //! Reverses a branch's condition, returning false on success.
-    virtual
-    bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
-    virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                               MachineBasicBlock *&FBB,
-                               SmallVectorImpl<MachineOperand> &Cond,
-                               bool AllowModify) const;
-
-    virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-    virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                                  MachineBasicBlock *FBB,
-                                  const SmallVectorImpl<MachineOperand> &Cond,
-                                  DebugLoc DL) const;
-   };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
deleted file mode 100644
index 117acd736aaa..000000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ /dev/null
@@ -1,4484 +0,0 @@
-//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instructions:
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TODO Items (not urgent today, but would be nice, low priority)
-//
-// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
-// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
-// in 16-bit and 32-bit constants and reduce instruction count.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions:
-//===----------------------------------------------------------------------===//
-
-let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
-  def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
-                                "${:comment} ADJCALLSTACKDOWN",
-                                [(callseq_start timm:$amt)]>;
-  def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm_i32:$amt),
-                                "${:comment} ADJCALLSTACKUP",
-                                [(callseq_end timm:$amt)]>;
-  def HBR_LABEL        : Pseudo<(outs), (ins hbrtarget:$targ), 
-                                "$targ:\t${:comment}branch hint target",[ ]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Loads:
-// NB: The ordering is actually important, since the instruction selection
-// will try each of the instructions in sequence, i.e., the D-form first with
-// the 10-bit displacement, then the A-form with the 16 bit displacement, and
-// finally the X-form with the register-register.
-//===----------------------------------------------------------------------===//
-
-let canFoldAsLoad = 1 in {
-  class LoadDFormVec<ValueType vectype>
-    : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
-               "lqd\t$rT, $src",
-               LoadStore,
-               [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
-  { }
-
-  class LoadDForm<RegisterClass rclass>
-    : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
-               "lqd\t$rT, $src",
-               LoadStore,
-               [(set rclass:$rT, (load dform_addr:$src))]>
-  { }
-
-  multiclass LoadDForms
-  {
-    def v16i8: LoadDFormVec<v16i8>;
-    def v8i16: LoadDFormVec<v8i16>;
-    def v4i32: LoadDFormVec<v4i32>;
-    def v2i64: LoadDFormVec<v2i64>;
-    def v4f32: LoadDFormVec<v4f32>;
-    def v2f64: LoadDFormVec<v2f64>;
-
-    def r128:  LoadDForm<GPRC>;
-    def r64:   LoadDForm<R64C>;
-    def r32:   LoadDForm<R32C>;
-    def f32:   LoadDForm<R32FP>;
-    def f64:   LoadDForm<R64FP>;
-    def r16:   LoadDForm<R16C>;
-    def r8:    LoadDForm<R8C>;
-  }
-
-  class LoadAFormVec<ValueType vectype>
-    : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
-               "lqa\t$rT, $src",
-               LoadStore,
-               [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
-  { }
-
-  class LoadAForm<RegisterClass rclass>
-    : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
-               "lqa\t$rT, $src",
-               LoadStore,
-               [(set rclass:$rT, (load aform_addr:$src))]>
-  { }
-
-  multiclass LoadAForms
-  {
-    def v16i8: LoadAFormVec<v16i8>;
-    def v8i16: LoadAFormVec<v8i16>;
-    def v4i32: LoadAFormVec<v4i32>;
-    def v2i64: LoadAFormVec<v2i64>;
-    def v4f32: LoadAFormVec<v4f32>;
-    def v2f64: LoadAFormVec<v2f64>;
-
-    def r128:  LoadAForm<GPRC>;
-    def r64:   LoadAForm<R64C>;
-    def r32:   LoadAForm<R32C>;
-    def f32:   LoadAForm<R32FP>;
-    def f64:   LoadAForm<R64FP>;
-    def r16:   LoadAForm<R16C>;
-    def r8:    LoadAForm<R8C>;
-  }
-
-  class LoadXFormVec<ValueType vectype>
-    : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
-             "lqx\t$rT, $src",
-             LoadStore,
-             [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
-  { }
-
-  class LoadXForm<RegisterClass rclass>
-    : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
-             "lqx\t$rT, $src",
-             LoadStore,
-             [(set rclass:$rT, (load xform_addr:$src))]>
-  { }
-
-  multiclass LoadXForms
-  {
-    def v16i8: LoadXFormVec<v16i8>;
-    def v8i16: LoadXFormVec<v8i16>;
-    def v4i32: LoadXFormVec<v4i32>;
-    def v2i64: LoadXFormVec<v2i64>;
-    def v4f32: LoadXFormVec<v4f32>;
-    def v2f64: LoadXFormVec<v2f64>;
-
-    def r128:  LoadXForm<GPRC>;
-    def r64:   LoadXForm<R64C>;
-    def r32:   LoadXForm<R32C>;
-    def f32:   LoadXForm<R32FP>;
-    def f64:   LoadXForm<R64FP>;
-    def r16:   LoadXForm<R16C>;
-    def r8:    LoadXForm<R8C>;
-  }
-
-  defm LQA : LoadAForms;
-  defm LQD : LoadDForms;
-  defm LQX : LoadXForms;
-
-/* Load quadword, PC relative: Not much use at this point in time.
-   Might be of use later for relocatable code. It's effectively the
-   same as LQA, but uses PC-relative addressing.
-  def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
-                     "lqr\t$rT, $disp", LoadStore,
-                     [(set VECREG:$rT, (load iaddr:$disp))]>;
- */
-}
-
-//===----------------------------------------------------------------------===//
-// Stores:
-//===----------------------------------------------------------------------===//
-class StoreDFormVec<ValueType vectype>
-  : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
-             "stqd\t$rT, $src",
-             LoadStore,
-             [(store (vectype VECREG:$rT), dform_addr:$src)]>
-{ }
-
-class StoreDForm<RegisterClass rclass>
-  : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
-             "stqd\t$rT, $src",
-             LoadStore,
-             [(store rclass:$rT, dform_addr:$src)]>
-{ }
-
-multiclass StoreDForms
-{
-  def v16i8: StoreDFormVec<v16i8>;
-  def v8i16: StoreDFormVec<v8i16>;
-  def v4i32: StoreDFormVec<v4i32>;
-  def v2i64: StoreDFormVec<v2i64>;
-  def v4f32: StoreDFormVec<v4f32>;
-  def v2f64: StoreDFormVec<v2f64>;
-
-  def r128:  StoreDForm<GPRC>;
-  def r64:   StoreDForm<R64C>;
-  def r32:   StoreDForm<R32C>;
-  def f32:   StoreDForm<R32FP>;
-  def f64:   StoreDForm<R64FP>;
-  def r16:   StoreDForm<R16C>;
-  def r8:    StoreDForm<R8C>;
-}
-
-class StoreAFormVec<ValueType vectype>
-  : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
-             "stqa\t$rT, $src",
-             LoadStore,
-             [(store (vectype VECREG:$rT), aform_addr:$src)]>;
-
-class StoreAForm<RegisterClass rclass>
-  : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
-             "stqa\t$rT, $src",
-             LoadStore,
-             [(store rclass:$rT, aform_addr:$src)]>;
-
-multiclass StoreAForms
-{
-  def v16i8: StoreAFormVec<v16i8>;
-  def v8i16: StoreAFormVec<v8i16>;
-  def v4i32: StoreAFormVec<v4i32>;
-  def v2i64: StoreAFormVec<v2i64>;
-  def v4f32: StoreAFormVec<v4f32>;
-  def v2f64: StoreAFormVec<v2f64>;
-
-  def r128:  StoreAForm<GPRC>;
-  def r64:   StoreAForm<R64C>;
-  def r32:   StoreAForm<R32C>;
-  def f32:   StoreAForm<R32FP>;
-  def f64:   StoreAForm<R64FP>;
-  def r16:   StoreAForm<R16C>;
-  def r8:    StoreAForm<R8C>;
-}
-
-class StoreXFormVec<ValueType vectype>
-  : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
-           "stqx\t$rT, $src",
-           LoadStore,
-           [(store (vectype VECREG:$rT), xform_addr:$src)]>
-{ }
-
-class StoreXForm<RegisterClass rclass>
-  : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
-           "stqx\t$rT, $src",
-           LoadStore,
-           [(store rclass:$rT, xform_addr:$src)]>
-{ }
-
-multiclass StoreXForms
-{
-  def v16i8: StoreXFormVec<v16i8>;
-  def v8i16: StoreXFormVec<v8i16>;
-  def v4i32: StoreXFormVec<v4i32>;
-  def v2i64: StoreXFormVec<v2i64>;
-  def v4f32: StoreXFormVec<v4f32>;
-  def v2f64: StoreXFormVec<v2f64>;
-
-  def r128:  StoreXForm<GPRC>;
-  def r64:   StoreXForm<R64C>;
-  def r32:   StoreXForm<R32C>;
-  def f32:   StoreXForm<R32FP>;
-  def f64:   StoreXForm<R64FP>;
-  def r16:   StoreXForm<R16C>;
-  def r8:    StoreXForm<R8C>;
-}
-
-defm STQD : StoreDForms;
-defm STQA : StoreAForms;
-defm STQX : StoreXForms;
-
-/* Store quadword, PC relative: Not much use at this point in time. Might
-   be useful for relocatable code.
-def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
-                   "stqr\t$rT, $disp", LoadStore,
-                   [(store VECREG:$rT, iaddr:$disp)]>;
-*/
-
-//===----------------------------------------------------------------------===//
-// Generate Controls for Insertion:
-//===----------------------------------------------------------------------===//
-
-def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
-    "cbd\t$rT, $src", ShuffleOp,
-    [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
-    "cbx\t$rT, $src", ShuffleOp,
-    [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
-    "chd\t$rT, $src", ShuffleOp,
-    [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
-    "chx\t$rT, $src", ShuffleOp,
-    [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
-    "cwd\t$rT, $src", ShuffleOp,
-    [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
-    "cwx\t$rT, $src", ShuffleOp,
-    [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
-    "cwd\t$rT, $src", ShuffleOp,
-    [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
-    "cwx\t$rT, $src", ShuffleOp,
-    [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
-    "cdd\t$rT, $src", ShuffleOp,
-    [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
-    "cdx\t$rT, $src", ShuffleOp,
-    [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
-    "cdd\t$rT, $src", ShuffleOp,
-    [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
-    "cdx\t$rT, $src", ShuffleOp,
-    [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-//===----------------------------------------------------------------------===//
-// Constant formation:
-//===----------------------------------------------------------------------===//
-
-def ILHv8i16:
-  RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
-    "ilh\t$rT, $val", ImmLoad,
-    [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
-
-def ILHr16:
-  RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
-    "ilh\t$rT, $val", ImmLoad,
-    [(set R16C:$rT, immSExt16:$val)]>;
-
-// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
-// the right constant")
-def ILHr8:
-  RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
-    "ilh\t$rT, $val", ImmLoad,
-    [(set R8C:$rT, immSExt8:$val)]>;
-
-// IL does sign extension!
-
-class ILInst<dag OOL, dag IOL, list<dag> pattern>:
-  RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
-           ImmLoad, pattern>;
-
-class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
-  ILInst<(outs VECREG:$rT), (ins immtype:$val),
-         [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
-  ILInst<(outs rclass:$rT), (ins immtype:$val),
-         [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmediateLoad
-{
-  def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
-  def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
-
-  // TODO: Need v2f64, v4f32
-
-  def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
-  def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
-  def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
-  def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
-}
-
-defm IL : ImmediateLoad;
-
-class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
-  RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
-           ImmLoad, pattern>;
-
-class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
-  ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
-           [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
-  ILHUInst<(outs rclass:$rT), (ins immtype:$val),
-           [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmLoadHalfwordUpper
-{
-  def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
-  def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
-
-  def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
-  def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
-
-  // Loads the high portion of an address
-  def hi: ILHURegInst<R32C, symbolHi, hi16>;
-
-  // Used in custom lowering constant SFP loads:
-  def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
-}
-
-defm ILHU : ImmLoadHalfwordUpper;
-
-// Immediate load address (can also be used to load 18-bit unsigned constants,
-// see the zext 16->32 pattern)
-
-class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
-  RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
-           LoadNOP, pattern>;
-
-class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
-  ILAInst<(outs VECREG:$rT), (ins immtype:$val),
-          [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
-  ILAInst<(outs rclass:$rT), (ins immtype:$val),
-          [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmLoadAddress
-{
-  def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
-  def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
-
-  def r64: ILARegInst<R64C, u18imm_i64, imm18>;
-  def r32: ILARegInst<R32C, u18imm, imm18>;
-  def f32: ILARegInst<R32FP, f18imm, fpimm18>;
-  def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
-
-  def hi: ILARegInst<R32C, symbolHi, imm18>;
-  def lo: ILARegInst<R32C, symbolLo, imm18>;
-
-  def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
-                   [(set R32C:$rT, imm18:$val)]>;
-}
-
-defm ILA : ImmLoadAddress;
-
-// Immediate OR, Halfword Lower: The "other" part of loading large constants
-// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
-// Note that these are really two operand instructions, but they're encoded
-// as three operands with the first two arguments tied-to each other.
-
-class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
-  RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
-           ImmLoad, pattern>,
-  RegConstraint<"$rS = $rT">,
-  NoEncode<"$rS">;
-
-class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
-  IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
-           [/* no pattern */]>;
-
-class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
-  IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
-           [/* no pattern */]>;
-
-multiclass ImmOrHalfwordLower
-{
-  def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
-  def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
-
-  def r32: IOHLRegInst<R32C, i32imm>;
-  def f32: IOHLRegInst<R32FP, f32imm>;
-
-  def lo: IOHLRegInst<R32C, symbolLo>;
-}
-
-defm IOHL: ImmOrHalfwordLower;
-
-// Form select mask for bytes using immediate, used in conjunction with the
-// SELB instruction:
-
-class FSMBIVec<ValueType vectype>:
-  RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
-          "fsmbi\t$rT, $val",
-          SelectOp,
-          [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
-
-multiclass FormSelectMaskBytesImm
-{
-  def v16i8: FSMBIVec<v16i8>;
-  def v8i16: FSMBIVec<v8i16>;
-  def v4i32: FSMBIVec<v4i32>;
-  def v2i64: FSMBIVec<v2i64>;
-}
-
-defm FSMBI : FormSelectMaskBytesImm;
-
-// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
-class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
-             pattern>;
-
-class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
-    FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
-             [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMBVecInst<ValueType vectype>:
-    FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
-             [(set (vectype VECREG:$rT),
-                   (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskBits {
-  def v16i8_r16: FSMBRegInst<R16C, v16i8>;
-  def v16i8:     FSMBVecInst<v16i8>;
-}
-
-defm FSMB: FormSelectMaskBits;
-
-// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
-// only 8-bits wide (even though it's input as 16-bits here)
-
-class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
-             pattern>;
-
-class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
-    FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
-             [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMHVecInst<ValueType vectype>:
-    FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
-             [(set (vectype VECREG:$rT),
-                   (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskHalfword {
-  def v8i16_r16: FSMHRegInst<R16C, v8i16>;
-  def v8i16:     FSMHVecInst<v8i16>;
-}
-
-defm FSMH: FormSelectMaskHalfword;
-
-// fsm: Form select mask for words. Like the other fsm* instructions,
-// only the lower 4 bits of $rA are significant.
-
-class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
-             pattern>;
-
-class FSMRegInst<ValueType vectype, RegisterClass rclass>:
-    FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
-            [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMVecInst<ValueType vectype>:
-    FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
-            [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskWord {
-  def v4i32: FSMVecInst<v4i32>;
-
-  def r32 :  FSMRegInst<v4i32, R32C>;
-  def r16 :  FSMRegInst<v4i32, R16C>;
-}
-
-defm FSM : FormSelectMaskWord;
-
-// Special case when used for i64 math operations
-multiclass FormSelectMaskWord64 {
-  def r32 : FSMRegInst<v2i64, R32C>;
-  def r16 : FSMRegInst<v2i64, R16C>;
-}
-
-defm FSM64 : FormSelectMaskWord64;
-
-//===----------------------------------------------------------------------===//
-// Integer and Logical Operations:
-//===----------------------------------------------------------------------===//
-
-def AHv8i16:
-  RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "ah\t$rT, $rA, $rB", IntegerOp,
-    [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
-
-def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
-          (AHv8i16 VECREG:$rA, VECREG:$rB)>;
-
-def AHr16:
-  RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-    "ah\t$rT, $rA, $rB", IntegerOp,
-    [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
-
-def AHIvec:
-    RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-      "ahi\t$rT, $rA, $val", IntegerOp,
-      [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
-                                     v8i16SExt10Imm:$val))]>;
-
-def AHIr16:
-  RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-    "ahi\t$rT, $rA, $val", IntegerOp,
-    [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
-
-// v4i32, i32 add instruction:
-
-class AInst<dag OOL, dag IOL, list<dag> pattern>:
-  RRForm<0b00000011000, OOL, IOL,
-         "a\t$rT, $rA, $rB", IntegerOp,
-         pattern>;
-
-class AVecInst<ValueType vectype>:
-  AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-        [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
-                                         (vectype VECREG:$rB)))]>;
-
-class ARegInst<RegisterClass rclass>:
-  AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-        [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
-        
-multiclass AddInstruction {
-  def v4i32: AVecInst<v4i32>;
-  def v16i8: AVecInst<v16i8>;
-  def r32:   ARegInst<R32C>;
-}
-
-defm A : AddInstruction;
-
-class AIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI10Form<0b00111000, OOL, IOL,
-             "ai\t$rT, $rA, $val", IntegerOp,
-             pattern>;
-
-class AIVecInst<ValueType vectype, PatLeaf immpred>:
-    AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-            [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
-
-class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
-    AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-            [/* no pattern */]>;
-
-class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
-    AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
-           [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
-
-// This is used to add epsilons to floating point numbers in the f32 fdiv code:
-class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
-    AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
-           [/* no pattern */]>;
-
-multiclass AddImmediate {
-  def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
-
-  def r32: AIRegInst<R32C, i32ImmSExt10>;
-
-  def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
-  def f32: AIFPInst<R32FP, i32ImmSExt10>;
-}
-
-defm AI : AddImmediate;
-
-def SFHvec:
-    RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "sfh\t$rT, $rA, $rB", IntegerOp,
-      [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
-                                     (v8i16 VECREG:$rB)))]>;
-
-def SFHr16:
-    RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-      "sfh\t$rT, $rA, $rB", IntegerOp,
-      [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
-
-def SFHIvec:
-    RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-      "sfhi\t$rT, $rA, $val", IntegerOp,
-      [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
-                                     (v8i16 VECREG:$rA)))]>;
-
-def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-  "sfhi\t$rT, $rA, $val", IntegerOp,
-  [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
-
-def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
-                                  (ins VECREG:$rA, VECREG:$rB),
-  "sf\t$rT, $rA, $rB", IntegerOp,
-  [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
-
-
-def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-  "sf\t$rT, $rA, $rB", IntegerOp,
-  [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
-
-def SFIvec:
-    RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-      "sfi\t$rT, $rA, $val", IntegerOp,
-      [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
-                                     (v4i32 VECREG:$rA)))]>;
-
-def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
-                                  (ins R32C:$rA, s10imm_i32:$val),
-  "sfi\t$rT, $rA, $val", IntegerOp,
-  [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
-
-// ADDX: only available in vector form, doesn't match a pattern.
-class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00000010110, OOL, IOL,
-      "addx\t$rT, $rA, $rB",
-      IntegerOp, pattern>;
-
-class ADDXVecInst<ValueType vectype>:
-    ADDXInst<(outs VECREG:$rT),
-             (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
-             [/* no pattern */]>,
-    RegConstraint<"$rCarry = $rT">,
-    NoEncode<"$rCarry">;
-
-class ADDXRegInst<RegisterClass rclass>:
-    ADDXInst<(outs rclass:$rT),
-             (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
-             [/* no pattern */]>,
-    RegConstraint<"$rCarry = $rT">,
-    NoEncode<"$rCarry">;
-
-multiclass AddExtended {
-  def v2i64 : ADDXVecInst<v2i64>;
-  def v4i32 : ADDXVecInst<v4i32>;
-  def r64 : ADDXRegInst<R64C>;
-  def r32 : ADDXRegInst<R32C>;
-}
-
-defm ADDX : AddExtended;
-
-// CG: Generate carry for add
-class CGInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01000011000, OOL, IOL,
-      "cg\t$rT, $rA, $rB",
-      IntegerOp, pattern>;
-
-class CGVecInst<ValueType vectype>:
-    CGInst<(outs VECREG:$rT),
-           (ins VECREG:$rA, VECREG:$rB),
-           [/* no pattern */]>;
-
-class CGRegInst<RegisterClass rclass>:
-    CGInst<(outs rclass:$rT),
-           (ins rclass:$rA, rclass:$rB),
-           [/* no pattern */]>;
-
-multiclass CarryGenerate {
-  def v2i64 : CGVecInst<v2i64>;
-  def v4i32 : CGVecInst<v4i32>;
-  def r64 : CGRegInst<R64C>;
-  def r32 : CGRegInst<R32C>;
-}
-
-defm CG : CarryGenerate;
-
-// SFX: Subract from, extended. This is used in conjunction with BG to subtract
-// with carry (borrow, in this case)
-class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10000010110, OOL, IOL,
-      "sfx\t$rT, $rA, $rB",
-      IntegerOp, pattern>;
-
-class SFXVecInst<ValueType vectype>:
-    SFXInst<(outs VECREG:$rT),
-            (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
-             [/* no pattern */]>,
-    RegConstraint<"$rCarry = $rT">,
-    NoEncode<"$rCarry">;
-
-class SFXRegInst<RegisterClass rclass>:
-    SFXInst<(outs rclass:$rT),
-            (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
-             [/* no pattern */]>,
-    RegConstraint<"$rCarry = $rT">,
-    NoEncode<"$rCarry">;
-
-multiclass SubtractExtended {
-  def v2i64 : SFXVecInst<v2i64>;
-  def v4i32 : SFXVecInst<v4i32>;
-  def r64 : SFXRegInst<R64C>;
-  def r32 : SFXRegInst<R32C>;
-}
-
-defm SFX : SubtractExtended;
-
-// BG: only available in vector form, doesn't match a pattern.
-class BGInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01000010000, OOL, IOL,
-      "bg\t$rT, $rA, $rB",
-      IntegerOp, pattern>;
-
-class BGVecInst<ValueType vectype>:
-    BGInst<(outs VECREG:$rT),
-           (ins VECREG:$rA, VECREG:$rB),
-           [/* no pattern */]>;
-
-class BGRegInst<RegisterClass rclass>:
-    BGInst<(outs rclass:$rT),
-           (ins rclass:$rA, rclass:$rB),
-           [/* no pattern */]>;
-
-multiclass BorrowGenerate {
-  def v4i32 : BGVecInst<v4i32>;
-  def v2i64 : BGVecInst<v2i64>;
-  def r64 : BGRegInst<R64C>;
-  def r32 : BGRegInst<R32C>;
-}
-
-defm BG : BorrowGenerate;
-
-// BGX: Borrow generate, extended.
-def BGXvec:
-    RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
-                                VECREG:$rCarry),
-      "bgx\t$rT, $rA, $rB", IntegerOp,
-      []>,
-    RegConstraint<"$rCarry = $rT">,
-    NoEncode<"$rCarry">;
-
-// Halfword multiply variants:
-// N.B: These can be used to build up larger quantities (16x16 -> 32)
-
-def MPYv8i16:
-  RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-    "mpy\t$rT, $rA, $rB", IntegerMulDiv,
-    [/* no pattern */]>;
-
-def MPYr16:
-  RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-    "mpy\t$rT, $rA, $rB", IntegerMulDiv,
-    [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
-
-// Unsigned 16-bit multiply:
-
-class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00110011110, OOL, IOL,
-      "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
-      pattern>;
-
-def MPYUv4i32:
-  MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-           [/* no pattern */]>;
-
-def MPYUr16:
-  MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
-           [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
-
-def MPYUr32:
-  MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-           [/* no pattern */]>;
-
-// mpyi: multiply 16 x s10imm -> 32 result.
-
-class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
-  RI10Form<0b00101110, OOL, IOL,
-    "mpyi\t$rT, $rA, $val", IntegerMulDiv,
-    pattern>;
-
-def MPYIvec:
-  MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-           [(set (v8i16 VECREG:$rT),
-                 (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
-
-def MPYIr16:
-  MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-           [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
-
-// mpyui: same issues as other multiplies, plus, this doesn't match a
-// pattern... but may be used during target DAG selection or lowering
-
-class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
-  RI10Form<0b10101110, OOL, IOL,
-           "mpyui\t$rT, $rA, $val", IntegerMulDiv,
-           pattern>;
-    
-def MPYUIvec:
-  MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-            []>;
-
-def MPYUIr16:
-  MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-            []>;
-
-// mpya: 16 x 16 + 16 -> 32 bit result
-class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
-  RRRForm<0b0011, OOL, IOL,
-          "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
-          pattern>;
-          
-def MPYAv4i32:
-  MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-           [(set (v4i32 VECREG:$rT),
-                 (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
-                                              (v8i16 VECREG:$rB)))),
-                      (v4i32 VECREG:$rC)))]>;
-
-def MPYAr32:
-  MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
-           [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
-                                R32C:$rC))]>;
-                                
-def MPYAr32_sext:
-  MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
-           [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
-                                R32C:$rC))]>;
-
-def MPYAr32_sextinreg:
-  MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
-           [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
-                                     (sext_inreg R32C:$rB, i16)),
-                                R32C:$rC))]>;
-
-// mpyh: multiply high, used to synthesize 32-bit multiplies
-class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
-  RRForm<0b10100011110, OOL, IOL,
-         "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
-         pattern>;
-         
-def MPYHv4i32:
-    MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [/* no pattern */]>;
-
-def MPYHr32:
-    MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-             [/* no pattern */]>;
-
-// mpys: multiply high and shift right (returns the top half of
-// a 16-bit multiply, sign extended to 32 bits.)
-
-class MPYSInst<dag OOL, dag IOL>:
-    RRForm<0b11100011110, OOL, IOL, 
-      "mpys\t$rT, $rA, $rB", IntegerMulDiv,
-      [/* no pattern */]>;
-
-def MPYSv4i32:
-    MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-    
-def MPYSr16:
-    MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
-
-// mpyhh: multiply high-high (returns the 32-bit result from multiplying
-// the top 16 bits of the $rA, $rB)
-
-class MPYHHInst<dag OOL, dag IOL>:
-  RRForm<0b01100011110, OOL, IOL,
-        "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
-        [/* no pattern */]>;
-        
-def MPYHHv8i16:
-    MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHr32:
-    MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhha: Multiply high-high, add to $rT:
-
-class MPYHHAInst<dag OOL, dag IOL>:
-    RRForm<0b01100010110, OOL, IOL,
-      "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
-      [/* no pattern */]>;
-
-def MPYHHAvec:
-    MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-    
-def MPYHHAr32:
-    MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhhu: Multiply high-high, unsigned, e.g.:
-//
-// +-------+-------+   +-------+-------+   +---------+
-// |  a0   .  a1   | x |  b0   .  b1   | = | a0 x b0 |
-// +-------+-------+   +-------+-------+   +---------+
-//
-// where a0, b0 are the upper 16 bits of the 32-bit word
-
-class MPYHHUInst<dag OOL, dag IOL>:
-    RRForm<0b01110011110, OOL, IOL,
-      "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
-      [/* no pattern */]>;
-
-def MPYHHUv4i32:
-    MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-    
-def MPYHHUr32:
-    MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhhau: Multiply high-high, unsigned
-
-class MPYHHAUInst<dag OOL, dag IOL>:
-    RRForm<0b01110010110, OOL, IOL,
-      "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
-      [/* no pattern */]>;
-
-def MPYHHAUvec:
-    MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-    
-def MPYHHAUr32:
-    MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// clz: Count leading zeroes
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
-             IntegerOp, pattern>;
-
-class CLZRegInst<RegisterClass rclass>:
-    CLZInst<(outs rclass:$rT), (ins rclass:$rA),
-            [(set rclass:$rT, (ctlz rclass:$rA))]>;
-
-class CLZVecInst<ValueType vectype>:
-    CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
-            [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
-
-multiclass CountLeadingZeroes {
-  def v4i32 : CLZVecInst<v4i32>;
-  def r32   : CLZRegInst<R32C>;
-}
-
-defm CLZ : CountLeadingZeroes;
-
-// cntb: Count ones in bytes (aka "population count")
-//
-// NOTE: This instruction is really a vector instruction, but the custom
-// lowering code uses it in unorthodox ways to support CTPOP for other
-// data types!
-
-def CNTBv16i8:
-    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
-      "cntb\t$rT, $rA", IntegerOp,
-      [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
-
-def CNTBv8i16 :
-    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
-      "cntb\t$rT, $rA", IntegerOp,
-      [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
-
-def CNTBv4i32 :
-    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
-      "cntb\t$rT, $rA", IntegerOp,
-      [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
-
-// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
-// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
-// slots 1-3.
-//
-// Note: This instruction "pairs" with the fsmb instruction for all of the
-// various types defined here.
-//
-// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
-// a vector or register.
-
-class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
-  RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
-
-class GBBRegInst<RegisterClass rclass, ValueType vectype>:
-  GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
-          [/* no pattern */]>;
-
-class GBBVecInst<ValueType vectype>:
-  GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
-          [/* no pattern */]>;
-
-multiclass GatherBitsFromBytes {
-  def v16i8_r32: GBBRegInst<R32C, v16i8>;
-  def v16i8_r16: GBBRegInst<R16C, v16i8>;
-  def v16i8:     GBBVecInst<v16i8>;
-}
-
-defm GBB: GatherBitsFromBytes;
-
-// gbh: Gather all low order bits from each halfword in $rA into a single
-// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
-// and slots 1-3 also set to 0.
-//
-// See notes for GBBInst, above.
-
-class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
-             pattern>;
-
-class GBHRegInst<RegisterClass rclass, ValueType vectype>:
-    GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
-            [/* no pattern */]>;
-
-class GBHVecInst<ValueType vectype>:
-    GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
-            [/* no pattern */]>;
-
-multiclass GatherBitsHalfword {
-  def v8i16_r32: GBHRegInst<R32C, v8i16>;
-  def v8i16_r16: GBHRegInst<R16C, v8i16>;
-  def v8i16:     GBHVecInst<v8i16>;
-}
-
-defm GBH: GatherBitsHalfword;
-
-// gb: Gather all low order bits from each word in $rA into a single
-// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
-// as well as slots 1-3.
-//
-// See notes for gbb, above.
-
-class GBInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
-             pattern>;
-
-class GBRegInst<RegisterClass rclass, ValueType vectype>:
-    GBInst<(outs rclass:$rT), (ins VECREG:$rA),
-           [/* no pattern */]>;
-
-class GBVecInst<ValueType vectype>:
-    GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
-           [/* no pattern */]>;
-
-multiclass GatherBitsWord {
-  def v4i32_r32: GBRegInst<R32C, v4i32>;
-  def v4i32_r16: GBRegInst<R16C, v4i32>;
-  def v4i32:     GBVecInst<v4i32>;
-}
-
-defm GB: GatherBitsWord;
-
-// avgb: average bytes
-def AVGB:
-    RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "avgb\t$rT, $rA, $rB", ByteOp,
-      []>;
-
-// absdb: absolute difference of bytes
-def ABSDB:
-    RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "absdb\t$rT, $rA, $rB", ByteOp,
-      []>;
-
-// sumb: sum bytes into halfwords
-def SUMB:
-    RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "sumb\t$rT, $rA, $rB", ByteOp,
-      []>;
-
-// Sign extension operations:
-class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b01101101010, OOL, IOL,
-      "xsbh\t$rDst, $rSrc",
-      IntegerOp, pattern>;
-
-class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
-    XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
-             pattern>;
-
-multiclass ExtendByteHalfword {
-  def v16i8:     XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
-                          [
-                  /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
-  def r8:        XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
-                          [(set R16C:$rDst, (sext R8C:$rSrc))]>;
-  def r16:       XSBHInRegInst<R16C,
-                               [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
-
-  // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
-  // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
-  // pattern below). Intentionally doesn't match a pattern because we want the
-  // sext 8->32 pattern to do the work for us, namely because we need the extra
-  // XSHWr32.
-  def r32:   XSBHInRegInst<R32C, [/* no pattern */]>;
-  
-  // Same as the 32-bit version, but for i64
-  def r64:   XSBHInRegInst<R64C, [/* no pattern */]>;
-}
-
-defm XSBH : ExtendByteHalfword;
-
-// Sign extend halfwords to words:
-
-class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
-            IntegerOp, pattern>;
-
-class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
-    XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
-             [(set (out_vectype VECREG:$rDest),
-                   (sext (in_vectype VECREG:$rSrc)))]>;
-
-class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
-    XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
-             pattern>;
-             
-class XSHWRegInst<RegisterClass rclass>:
-    XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
-             [(set rclass:$rDest, (sext R16C:$rSrc))]>;
-
-multiclass ExtendHalfwordWord {
-  def v4i32: XSHWVecInst<v8i16, v4i32>;
-
-  def r16:   XSHWRegInst<R32C>;
-
-  def r32:   XSHWInRegInst<R32C,
-                          [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
-  def r64:   XSHWInRegInst<R64C, [/* no pattern */]>;
-}
-
-defm XSHW : ExtendHalfwordWord;
-
-// Sign-extend words to doublewords (32->64 bits)
-
-class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
-              IntegerOp, pattern>;
-      
-class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
-    XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
-             [/*(set (out_vectype VECREG:$rDst),
-                   (sext (out_vectype VECREG:$rSrc)))*/]>;
-      
-class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
-    XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
-             [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
-             
-multiclass ExtendWordToDoubleWord {
-  def v2i64: XSWDVecInst<v4i32, v2i64>;
-  def r64:   XSWDRegInst<R32C, R64C>;
-  
-  def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
-                          [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
-}
-
-defm XSWD : ExtendWordToDoubleWord;
-
-// AND operations
-
-class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
-    RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class ANDVecInst<ValueType vectype>:
-    ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
-                                              (vectype VECREG:$rB)))]>;
-
-class ANDRegInst<RegisterClass rclass>:
-    ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-             [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
-
-multiclass BitwiseAnd
-{
-  def v16i8: ANDVecInst<v16i8>;
-  def v8i16: ANDVecInst<v8i16>;
-  def v4i32: ANDVecInst<v4i32>;
-  def v2i64: ANDVecInst<v2i64>;
-
-  def r128:  ANDRegInst<GPRC>;
-  def r64:   ANDRegInst<R64C>;
-  def r32:   ANDRegInst<R32C>;
-  def r16:   ANDRegInst<R16C>;
-  def r8:    ANDRegInst<R8C>;
-
-  //===---------------------------------------------
-  // Special instructions to perform the fabs instruction
-  def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
-                      [/* Intentionally does not match a pattern */]>;
-
-  def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
-                      [/* Intentionally does not match a pattern */]>;
-
-  def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                       [/* Intentionally does not match a pattern */]>;
-
-  //===---------------------------------------------
-
-  // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
-  // quantities -- see 16->32 zext pattern.
-  //
-  // This pattern is somewhat artificial, since it might match some
-  // compiler generated pattern but it is unlikely to do so.
-
-  def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
-                      [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
-}
-
-defm AND : BitwiseAnd;
-
-
-def vnot_cell_conv : PatFrag<(ops node:$in),
-                             (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
-
-// N.B.: vnot_cell_conv is one of those special target selection pattern
-// fragments,
-// in which we expect there to be a bit_convert on the constant. Bear in mind
-// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
-// constant -1 vector.)
-
-class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
-    ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT),
-                   (and (vectype VECREG:$rA),
-                        (vnot_frag (vectype VECREG:$rB))))]>;
-
-class ANDCRegInst<RegisterClass rclass>:
-    ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-             [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
-
-multiclass AndComplement
-{
-  def v16i8: ANDCVecInst<v16i8>;
-  def v8i16: ANDCVecInst<v8i16>;
-  def v4i32: ANDCVecInst<v4i32>;
-  def v2i64: ANDCVecInst<v2i64>;
-
-  def r128: ANDCRegInst<GPRC>;
-  def r64:  ANDCRegInst<R64C>;
-  def r32:  ANDCRegInst<R32C>;
-  def r16:  ANDCRegInst<R16C>;
-  def r8:   ANDCRegInst<R8C>;
-
-  // Sometimes, the xor pattern has a bitcast constant:
-  def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
-}
-
-defm ANDC : AndComplement;
-
-class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
-             ByteOp, pattern>;
-
-multiclass AndByteImm
-{
-  def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
-                       [(set (v16i8 VECREG:$rT),
-                             (and (v16i8 VECREG:$rA),
-                                  (v16i8 v16i8U8Imm:$val)))]>;
-
-  def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
-                    [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
-}
-
-defm ANDBI : AndByteImm;
-
-class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
-    RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
-             ByteOp, pattern>;
-
-multiclass AndHalfwordImm
-{
-  def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                       [(set (v8i16 VECREG:$rT),
-                             (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
-
-  def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
-                     [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
-
-  // Zero-extend i8 to i16:
-  def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
-                      [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
-}
-
-defm ANDHI : AndHalfwordImm;
-
-class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
-    RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
-             IntegerOp, pattern>;
-
-multiclass AndWordImm
-{
-  def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                      [(set (v4i32 VECREG:$rT),
-                            (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
-
-  def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
-                    [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
-
-  // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
-  // pattern below.
-  def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
-                      [(set R32C:$rT,
-                            (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
-
-  // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
-  // zext 16->32 pattern below.
-  //
-  // Note that this pattern is somewhat artificial, since it might match
-  // something the compiler generates but is unlikely to occur in practice.
-  def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
-                       [(set R32C:$rT,
-                             (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
-}
-
-defm ANDI : AndWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Bitwise OR group:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// Bitwise "or" (N.B.: These are also register-register copy instructions...)
-class ORInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class ORVecInst<ValueType vectype>:
-    ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-           [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
-                                           (vectype VECREG:$rB)))]>;
-
-class ORRegInst<RegisterClass rclass>:
-    ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-           [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
-
-
-multiclass BitwiseOr
-{
-  def v16i8: ORVecInst<v16i8>;
-  def v8i16: ORVecInst<v8i16>;
-  def v4i32: ORVecInst<v4i32>;
-  def v2i64: ORVecInst<v2i64>;
-
-  def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                    [(set (v4f32 VECREG:$rT),
-                          (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
-                                                 (v4i32 VECREG:$rB)))))]>;
-
-  def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                    [(set (v2f64 VECREG:$rT),
-                          (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
-                                                 (v2i64 VECREG:$rB)))))]>;
-
-  def r128: ORRegInst<GPRC>;
-  def r64:  ORRegInst<R64C>;
-  def r32:  ORRegInst<R32C>;
-  def r16:  ORRegInst<R16C>;
-  def r8:   ORRegInst<R8C>;
-
-  // OR instructions used to copy f32 and f64 registers.
-  def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
-                  [/* no pattern */]>;
-
-  def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
-                  [/* no pattern */]>;
-}
-
-defm OR : BitwiseOr;
-
-//===----------------------------------------------------------------------===//
-// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
-//===----------------------------------------------------------------------===//
-def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
-          (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
-
-def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
-          (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
-
-def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
-          (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
-
-def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
-          (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
-
-def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
-          (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
-
-def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
-          (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
- 
-def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
-          (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
-
-def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
-          (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
-
-def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
-          (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
-
-def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
-          (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
-
-def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
-          (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
-
-def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
-          (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
-
-// Load Register: This is an assembler alias for a bitwise OR of a register
-// against itself. It's here because it brings some clarity to assembly
-// language output.
-
-let hasCtrlDep = 1 in {
-    class LRInst<dag OOL, dag IOL>
-              : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
-      bits<7> RA;
-      bits<7> RT;
-
-      let Pattern = [/*no pattern*/];
-
-      let Inst{0-10} = 0b10000010000;   /* It's an OR operation */
-      let Inst{11-17} = RA;
-      let Inst{18-24} = RA;
-      let Inst{25-31} = RT;
-    }
-
-    class LRVecInst<ValueType vectype>:
-        LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
-
-    class LRRegInst<RegisterClass rclass>:
-        LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
-
-    multiclass LoadRegister {
-      def v2i64: LRVecInst<v2i64>;
-      def v2f64: LRVecInst<v2f64>;
-      def v4i32: LRVecInst<v4i32>;
-      def v4f32: LRVecInst<v4f32>;
-      def v8i16: LRVecInst<v8i16>;
-      def v16i8: LRVecInst<v16i8>;
-
-      def r128:  LRRegInst<GPRC>;
-      def r64:   LRRegInst<R64C>;
-      def f64:   LRRegInst<R64FP>;
-      def r32:   LRRegInst<R32C>;
-      def f32:   LRRegInst<R32FP>;
-      def r16:   LRRegInst<R16C>;
-      def r8:    LRRegInst<R8C>;
-    }
-
-    defm LR: LoadRegister;
-}
-
-// ORC: Bitwise "or" with complement (c = a | ~b)
-
-class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class ORCVecInst<ValueType vectype>:
-    ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-            [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
-                                            (vnot (vectype VECREG:$rB))))]>;
-
-class ORCRegInst<RegisterClass rclass>:
-  ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-          [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
-
-multiclass BitwiseOrComplement
-{
-  def v16i8: ORCVecInst<v16i8>;
-  def v8i16: ORCVecInst<v8i16>;
-  def v4i32: ORCVecInst<v4i32>;
-  def v2i64: ORCVecInst<v2i64>;
-
-  def r128:  ORCRegInst<GPRC>;
-  def r64:   ORCRegInst<R64C>;
-  def r32:   ORCRegInst<R32C>;
-  def r16:   ORCRegInst<R16C>;
-  def r8:    ORCRegInst<R8C>;
-}
-
-defm ORC : BitwiseOrComplement;
-
-// OR byte immediate
-class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
-             IntegerOp, pattern>;
-
-class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
-    ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
-             [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
-                                           (vectype immpred:$val)))]>;
-
-multiclass BitwiseOrByteImm
-{
-  def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
-
-  def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
-                   [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
-}
-
-defm ORBI : BitwiseOrByteImm;
-
-// OR halfword immediate
-class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
-             IntegerOp, pattern>;
-
-class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
-    ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
-              [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
-                                              immpred:$val))]>;
-
-multiclass BitwiseOrHalfwordImm
-{
-  def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
-
-  def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
-                    [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
-
-  // Specialized ORHI form used to promote 8-bit registers to 16-bit
-  def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
-                      [(set R16C:$rT, (or (anyext R8C:$rA),
-                                          i16ImmSExt10:$val))]>;
-}
-
-defm ORHI : BitwiseOrHalfwordImm;
-
-class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
-             IntegerOp, pattern>;
-
-class ORIVecInst<ValueType vectype, PatLeaf immpred>:
-    ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
-            [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
-                                            immpred:$val))]>;
-
-// Bitwise "or" with immediate
-multiclass BitwiseOrImm
-{
-  def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
-
-  def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
-                   [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
-
-  // i16i32: hacked version of the ori instruction to extend 16-bit quantities
-  // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
-  // infra "anyext 16->32" pattern.)
-  def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
-                      [(set R32C:$rT, (or (anyext R16C:$rA),
-                                          i32ImmSExt10:$val))]>;
-
-  // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
-  // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
-  // infra "anyext 16->32" pattern.)
-  def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
-                     [(set R32C:$rT, (or (anyext R8C:$rA),
-                                         i32ImmSExt10:$val))]>;
-}
-
-defm ORI : BitwiseOrImm;
-
-// ORX: "or" across the vector: or's $rA's word slots leaving the result in
-// $rT[0], slots 1-3 are zeroed.
-//
-// FIXME: Needs to match an intrinsic pattern.
-def ORXv4i32:
-    RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "orx\t$rT, $rA, $rB", IntegerOp,
-      []>;
-
-// XOR:
-
-class XORInst<dag OOL, dag IOL, list<dag> pattern> :
-    RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class XORVecInst<ValueType vectype>:
-    XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
-                                              (vectype VECREG:$rB)))]>;
-
-class XORRegInst<RegisterClass rclass>:
-    XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-             [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
-
-multiclass BitwiseExclusiveOr
-{
-  def v16i8: XORVecInst<v16i8>;
-  def v8i16: XORVecInst<v8i16>;
-  def v4i32: XORVecInst<v4i32>;
-  def v2i64: XORVecInst<v2i64>;
-
-  def r128:  XORRegInst<GPRC>;
-  def r64:   XORRegInst<R64C>;
-  def r32:   XORRegInst<R32C>;
-  def r16:   XORRegInst<R16C>;
-  def r8:    XORRegInst<R8C>;
-
-  // XOR instructions used to negate f32 and f64 quantities.
-
-  def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
-                     [/* no pattern */]>;
-
-  def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
-                     [/* no pattern */]>;
-
-  def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                      [/* no pattern, see fneg{32,64} */]>;
-}
-
-defm XOR : BitwiseExclusiveOr;
-
-//==----------------------------------------------------------
-
-class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
-             IntegerOp, pattern>;
-
-multiclass XorByteImm
-{
-  def v16i8:
-    XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
-              [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
-
-  def r8:
-    XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
-              [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
-}
-
-defm XORBI : XorByteImm;
-
-def XORHIv8i16:
-    RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
-      "xorhi\t$rT, $rA, $val", IntegerOp,
-      [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
-                                      v8i16SExt10Imm:$val))]>;
-
-def XORHIr16:
-    RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-      "xorhi\t$rT, $rA, $val", IntegerOp,
-      [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
-
-def XORIv4i32:
-    RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
-      "xori\t$rT, $rA, $val", IntegerOp,
-      [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
-                                     v4i32SExt10Imm:$val))]>;
-
-def XORIr32:
-    RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
-      "xori\t$rT, $rA, $val", IntegerOp,
-      [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
-
-// NAND:
-
-class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class NANDVecInst<ValueType vectype>:
-    NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
-                                                    (vectype VECREG:$rB))))]>;
-class NANDRegInst<RegisterClass rclass>:
-    NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-             [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitwiseNand
-{
-  def v16i8: NANDVecInst<v16i8>;
-  def v8i16: NANDVecInst<v8i16>;
-  def v4i32: NANDVecInst<v4i32>;
-  def v2i64: NANDVecInst<v2i64>;
-
-  def r128:  NANDRegInst<GPRC>;
-  def r64:   NANDRegInst<R64C>;
-  def r32:   NANDRegInst<R32C>;
-  def r16:   NANDRegInst<R16C>;
-  def r8:    NANDRegInst<R8C>;
-}
-
-defm NAND : BitwiseNand;
-
-// NOR:
-
-class NORInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class NORVecInst<ValueType vectype>:
-    NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-            [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
-                                                  (vectype VECREG:$rB))))]>;
-class NORRegInst<RegisterClass rclass>:
-    NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-            [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitwiseNor
-{
-  def v16i8: NORVecInst<v16i8>;
-  def v8i16: NORVecInst<v8i16>;
-  def v4i32: NORVecInst<v4i32>;
-  def v2i64: NORVecInst<v2i64>;
-
-  def r128:  NORRegInst<GPRC>;
-  def r64:   NORRegInst<R64C>;
-  def r32:   NORRegInst<R32C>;
-  def r16:   NORRegInst<R16C>;
-  def r8:    NORRegInst<R8C>;
-}
-
-defm NOR : BitwiseNor;
-
-// Select bits:
-class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
-            IntegerOp, pattern>;
-
-class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
-  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-           [(set (vectype VECREG:$rT),
-                 (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
-                     (and (vnot_frag (vectype VECREG:$rC)),
-                          (vectype VECREG:$rA))))]>;
-
-class SELBVecVCondInst<ValueType vectype>:
-  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-           [(set (vectype VECREG:$rT),
-                 (select (vectype VECREG:$rC),
-                         (vectype VECREG:$rB),
-                         (vectype VECREG:$rA)))]>;
-
-class SELBVecCondInst<ValueType vectype>:
-  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
-           [(set (vectype VECREG:$rT),
-                 (select R32C:$rC,
-                         (vectype VECREG:$rB),
-                         (vectype VECREG:$rA)))]>;
-
-class SELBRegInst<RegisterClass rclass>:
-  SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
-           [(set rclass:$rT,
-                 (or (and rclass:$rB, rclass:$rC),
-                     (and rclass:$rA, (not rclass:$rC))))]>;
-
-class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
-  SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
-           [(set rclass:$rT,
-                 (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
-
-multiclass SelectBits
-{
-  def v16i8: SELBVecInst<v16i8>;
-  def v8i16: SELBVecInst<v8i16>;
-  def v4i32: SELBVecInst<v4i32>;
-  def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
-
-  def r128:  SELBRegInst<GPRC>;
-  def r64:   SELBRegInst<R64C>;
-  def r32:   SELBRegInst<R32C>;
-  def r16:   SELBRegInst<R16C>;
-  def r8:    SELBRegInst<R8C>;
-
-  def v16i8_cond: SELBVecCondInst<v16i8>;
-  def v8i16_cond: SELBVecCondInst<v8i16>;
-  def v4i32_cond: SELBVecCondInst<v4i32>;
-  def v2i64_cond: SELBVecCondInst<v2i64>;
-
-  def v16i8_vcond: SELBVecCondInst<v16i8>;
-  def v8i16_vcond: SELBVecCondInst<v8i16>;
-  def v4i32_vcond: SELBVecCondInst<v4i32>;
-  def v2i64_vcond: SELBVecCondInst<v2i64>;
-
-  def v4f32_cond:
-        SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-                 [(set (v4f32 VECREG:$rT),
-                       (select (v4i32 VECREG:$rC),
-                               (v4f32 VECREG:$rB),
-                               (v4f32 VECREG:$rA)))]>;
-
-  // SELBr64_cond is defined in SPU64InstrInfo.td
-  def r32_cond:   SELBRegCondInst<R32C, R32C>;
-  def f32_cond:   SELBRegCondInst<R32C, R32FP>;
-  def r16_cond:   SELBRegCondInst<R16C, R16C>;
-  def r8_cond:    SELBRegCondInst<R8C,  R8C>;
-}
-
-defm SELB : SelectBits;
-
-class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
-   Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
-       (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
-
-def : SPUselbPatVec<v16i8, SELBv16i8>;
-def : SPUselbPatVec<v8i16, SELBv8i16>;
-def : SPUselbPatVec<v4i32, SELBv4i32>;
-def : SPUselbPatVec<v2i64, SELBv2i64>;
-
-class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
-   Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
-       (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
-
-def : SPUselbPatReg<R8C,   SELBr8>;
-def : SPUselbPatReg<R16C,  SELBr16>;
-def : SPUselbPatReg<R32C,  SELBr32>;
-def : SPUselbPatReg<R64C,  SELBr64>;
-
-// EQV: Equivalence (1 for each same bit, otherwise 0)
-//
-// Note: There are a lot of ways to match this bit operator and these patterns
-// attempt to be as exhaustive as possible.
-
-class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
-           IntegerOp, pattern>;
-
-class EQVVecInst<ValueType vectype>:
-    EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-            [(set (vectype VECREG:$rT),
-                  (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
-                      (and (vnot (vectype VECREG:$rA)),
-                           (vnot (vectype VECREG:$rB)))))]>;
-
-class EQVRegInst<RegisterClass rclass>:
-    EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-            [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
-                                  (and (not rclass:$rA), (not rclass:$rB))))]>;
-
-class EQVVecPattern1<ValueType vectype>:
-  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          [(set (vectype VECREG:$rT),
-                (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
-
-class EQVRegPattern1<RegisterClass rclass>:
-  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-          [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
-
-class EQVVecPattern2<ValueType vectype>:
-  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          [(set (vectype VECREG:$rT),
-                (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
-                    (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
-
-class EQVRegPattern2<RegisterClass rclass>:
-  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-          [(set rclass:$rT,
-                (or (and rclass:$rA, rclass:$rB),
-                    (not (or rclass:$rA, rclass:$rB))))]>;
-
-class EQVVecPattern3<ValueType vectype>:
-  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-          [(set (vectype VECREG:$rT),
-                (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
-
-class EQVRegPattern3<RegisterClass rclass>:
-  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-          [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitEquivalence
-{
-  def v16i8: EQVVecInst<v16i8>;
-  def v8i16: EQVVecInst<v8i16>;
-  def v4i32: EQVVecInst<v4i32>;
-  def v2i64: EQVVecInst<v2i64>;
-
-  def v16i8_1: EQVVecPattern1<v16i8>;
-  def v8i16_1: EQVVecPattern1<v8i16>;
-  def v4i32_1: EQVVecPattern1<v4i32>;
-  def v2i64_1: EQVVecPattern1<v2i64>;
-
-  def v16i8_2: EQVVecPattern2<v16i8>;
-  def v8i16_2: EQVVecPattern2<v8i16>;
-  def v4i32_2: EQVVecPattern2<v4i32>;
-  def v2i64_2: EQVVecPattern2<v2i64>;
-
-  def v16i8_3: EQVVecPattern3<v16i8>;
-  def v8i16_3: EQVVecPattern3<v8i16>;
-  def v4i32_3: EQVVecPattern3<v4i32>;
-  def v2i64_3: EQVVecPattern3<v2i64>;
-
-  def r128:  EQVRegInst<GPRC>;
-  def r64:   EQVRegInst<R64C>;
-  def r32:   EQVRegInst<R32C>;
-  def r16:   EQVRegInst<R16C>;
-  def r8:    EQVRegInst<R8C>;
-
-  def r128_1: EQVRegPattern1<GPRC>;
-  def r64_1:  EQVRegPattern1<R64C>;
-  def r32_1:  EQVRegPattern1<R32C>;
-  def r16_1:  EQVRegPattern1<R16C>;
-  def r8_1:   EQVRegPattern1<R8C>;
-
-  def r128_2: EQVRegPattern2<GPRC>;
-  def r64_2:  EQVRegPattern2<R64C>;
-  def r32_2:  EQVRegPattern2<R32C>;
-  def r16_2:  EQVRegPattern2<R16C>;
-  def r8_2:   EQVRegPattern2<R8C>;
-
-  def r128_3: EQVRegPattern3<GPRC>;
-  def r64_3:  EQVRegPattern3<R64C>;
-  def r32_3:  EQVRegPattern3<R32C>;
-  def r16_3:  EQVRegPattern3<R16C>;
-  def r8_3:   EQVRegPattern3<R8C>;
-}
-
-defm EQV: BitEquivalence;
-
-//===----------------------------------------------------------------------===//
-// Vector shuffle...
-//===----------------------------------------------------------------------===//
-// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
-// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
-// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
-// the SPUISD::SHUFB opcode.
-//===----------------------------------------------------------------------===//
-
-class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
-            ShuffleOp, pattern>;
-
-class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
-    SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-              [(set (resultvec VECREG:$rT),
-                    (SPUshuffle (resultvec VECREG:$rA),
-                                (resultvec VECREG:$rB),
-                                (maskvec VECREG:$rC)))]>;
-
-class SHUFBGPRCInst:
-    SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
-              [/* no pattern */]>;
-
-multiclass ShuffleBytes
-{
-  def v16i8     : SHUFBVecInst<v16i8, v16i8>;
-  def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
-  def v8i16     : SHUFBVecInst<v8i16, v16i8>;
-  def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
-  def v4i32     : SHUFBVecInst<v4i32, v16i8>;
-  def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
-  def v2i64     : SHUFBVecInst<v2i64, v16i8>;
-  def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
-
-  def v4f32     : SHUFBVecInst<v4f32, v16i8>;
-  def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
-
-  def v2f64     : SHUFBVecInst<v2f64, v16i8>;
-  def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
-
-  def gprc      : SHUFBGPRCInst;
-}
-
-defm SHUFB : ShuffleBytes;
-
-//===----------------------------------------------------------------------===//
-// Shift and rotate group:
-//===----------------------------------------------------------------------===//
-
-class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
-           RotShiftVec, pattern>;
-
-class SHLHVecInst<ValueType vectype>:
-    SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT),
-                   (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass ShiftLeftHalfword
-{
-  def v8i16: SHLHVecInst<v8i16>;
-  def r16:   SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-                      [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
-  def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
-                        [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
-}
-
-defm SHLH : ShiftLeftHalfword;
-
-//===----------------------------------------------------------------------===//
-
-class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
-            RotShiftVec, pattern>;
-
-class SHLHIVecInst<ValueType vectype>:
-    SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
-              [(set (vectype VECREG:$rT),
-                    (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
-
-multiclass ShiftLeftHalfwordImm
-{
-  def v8i16: SHLHIVecInst<v8i16>;
-  def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
-                     [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
-}
-
-defm SHLHI : ShiftLeftHalfwordImm;
-
-def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
-          (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
-
-def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
-          (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
-
-//===----------------------------------------------------------------------===//
-
-class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
-           RotShiftVec, pattern>;
-
-multiclass ShiftLeftWord
-{
-  def v4i32:
-      SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-              [(set (v4i32 VECREG:$rT),
-                    (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-  def r32:
-      SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-              [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
-}
-
-defm SHL: ShiftLeftWord;
-
-//===----------------------------------------------------------------------===//
-
-class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
-            RotShiftVec, pattern>;
-
-multiclass ShiftLeftWordImm
-{
-  def v4i32:
-    SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
-             [(set (v4i32 VECREG:$rT),
-                   (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
-
-  def r32:
-    SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
-             [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm SHLI : ShiftLeftWordImm;
-
-//===----------------------------------------------------------------------===//
-// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
-// register) to the left. Vector form is here to ensure type correctness.
-//
-// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
-// of 7 bits is actually possible.
-//
-// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
-// to shift i64 and i128. SHLQBI is the residual left over after shifting by
-// bytes with SHLQBY.
-
-class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class SHLQBIVecInst<ValueType vectype>:
-    SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-               [(set (vectype VECREG:$rT),
-                     (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
-
-class SHLQBIRegInst<RegisterClass rclass>:
-    SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
-               [/* no pattern */]>;
-
-multiclass ShiftLeftQuadByBits
-{
-  def v16i8: SHLQBIVecInst<v16i8>;
-  def v8i16: SHLQBIVecInst<v8i16>;
-  def v4i32: SHLQBIVecInst<v4i32>;
-  def v4f32: SHLQBIVecInst<v4f32>;
-  def v2i64: SHLQBIVecInst<v2i64>;
-  def v2f64: SHLQBIVecInst<v2f64>;
-
-  def r128:  SHLQBIRegInst<GPRC>;
-}
-
-defm SHLQBI : ShiftLeftQuadByBits;
-
-// See note above on SHLQBI. In this case, the predicate actually does then
-// enforcement, whereas with SHLQBI, we have to "take it on faith."
-class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
-            RotShiftQuad, pattern>;
-
-class SHLQBIIVecInst<ValueType vectype>:
-    SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
-                [(set (vectype VECREG:$rT),
-                      (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
-
-multiclass ShiftLeftQuadByBitsImm
-{
-  def v16i8 : SHLQBIIVecInst<v16i8>;
-  def v8i16 : SHLQBIIVecInst<v8i16>;
-  def v4i32 : SHLQBIIVecInst<v4i32>;
-  def v4f32 : SHLQBIIVecInst<v4f32>;
-  def v2i64 : SHLQBIIVecInst<v2i64>;
-  def v2f64 : SHLQBIIVecInst<v2f64>;
-}
-
-defm SHLQBII : ShiftLeftQuadByBitsImm;
-
-// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
-// not by bits. See notes above on SHLQBI.
-
-class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
-            RotShiftQuad, pattern>;
-
-class SHLQBYVecInst<ValueType vectype>:
-    SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-               [(set (vectype VECREG:$rT),
-                     (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
-
-multiclass ShiftLeftQuadBytes
-{
-  def v16i8: SHLQBYVecInst<v16i8>;
-  def v8i16: SHLQBYVecInst<v8i16>;
-  def v4i32: SHLQBYVecInst<v4i32>;
-  def v4f32: SHLQBYVecInst<v4f32>;
-  def v2i64: SHLQBYVecInst<v2i64>;
-  def v2f64: SHLQBYVecInst<v2f64>;
-  def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
-                       [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
-}
-
-defm SHLQBY: ShiftLeftQuadBytes;
-
-class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
-            RotShiftQuad, pattern>;
-
-class SHLQBYIVecInst<ValueType vectype>:
-    SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
-                [(set (vectype VECREG:$rT),
-                      (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
-
-multiclass ShiftLeftQuadBytesImm
-{
-  def v16i8: SHLQBYIVecInst<v16i8>;
-  def v8i16: SHLQBYIVecInst<v8i16>;
-  def v4i32: SHLQBYIVecInst<v4i32>;
-  def v4f32: SHLQBYIVecInst<v4f32>;
-  def v2i64: SHLQBYIVecInst<v2i64>;
-  def v2f64: SHLQBYIVecInst<v2f64>;
-  def r128:  SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
-                         [(set GPRC:$rT,
-                               (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm SHLQBYI : ShiftLeftQuadBytesImm;
-
-class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class SHLQBYBIVecInst<ValueType vectype>:
-    SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-                [/* no pattern */]>;
-
-class SHLQBYBIRegInst<RegisterClass rclass>:
-    SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
-                 [/* no pattern */]>;
-
-multiclass ShiftLeftQuadBytesBitCount
-{
-  def v16i8: SHLQBYBIVecInst<v16i8>;
-  def v8i16: SHLQBYBIVecInst<v8i16>;
-  def v4i32: SHLQBYBIVecInst<v4i32>;
-  def v4f32: SHLQBYBIVecInst<v4f32>;
-  def v2i64: SHLQBYBIVecInst<v2i64>;
-  def v2f64: SHLQBYBIVecInst<v2f64>;
-
-  def r128:  SHLQBYBIRegInst<GPRC>;
-}
-
-defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate halfword:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
-           RotShiftVec, pattern>;
-
-class ROTHVecInst<ValueType vectype>:
-    ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT),
-                   (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
-
-class ROTHRegInst<RegisterClass rclass>:
-    ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
-             [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
-
-multiclass RotateLeftHalfword
-{
-  def v8i16: ROTHVecInst<v8i16>;
-  def r16: ROTHRegInst<R16C>;
-}
-
-defm ROTH: RotateLeftHalfword;
-
-def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
-                          [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate halfword, immediate:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
-            RotShiftVec, pattern>;
-
-class ROTHIVecInst<ValueType vectype>:
-    ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
-              [(set (vectype VECREG:$rT),
-                    (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
-
-multiclass RotateLeftHalfwordImm
-{
-  def v8i16: ROTHIVecInst<v8i16>;
-  def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
-                     [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
-  def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
-                         [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm ROTHI: RotateLeftHalfwordImm;
-
-def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
-          (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate word:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
-           RotShiftVec, pattern>;
-
-class ROTVecInst<ValueType vectype>:
-    ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-            [(set (vectype VECREG:$rT),
-                  (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
-
-class ROTRegInst<RegisterClass rclass>:
-    ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
-            [(set rclass:$rT,
-                  (rotl rclass:$rA, R32C:$rB))]>;
-
-multiclass RotateLeftWord
-{
-  def v4i32: ROTVecInst<v4i32>;
-  def r32:   ROTRegInst<R32C>;
-}
-
-defm ROT: RotateLeftWord;
-
-// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
-// 32-bit register
-def ROTr32_r16_anyext:
-    ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
-            [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
-
-def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
-          (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
-
-def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
-          (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
-
-def ROTr32_r8_anyext:
-    ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
-            [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
-
-def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
-          (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
-
-def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
-          (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate word, immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
-            RotShiftVec, pattern>;
-
-class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
-    ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
-             [(set (vectype VECREG:$rT),
-                   (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
-
-class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
-    ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
-             [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
-
-multiclass RotateLeftWordImm
-{
-  def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
-  def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
-  def v4i32_i8:  ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
-
-  def r32:       ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
-  def r32_i16:   ROTIRegInst<R32C, u7imm, i16, uimm7>;
-  def r32_i8:    ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
-}
-
-defm ROTI : RotateLeftWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad by byte (count)
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class ROTQBYGenInst<ValueType type, RegisterClass rc>:
-    ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
-               [(set (type rc:$rT),
-                     (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
-
-class ROTQBYVecInst<ValueType type>:
-    ROTQBYGenInst<type, VECREG>;
-
-multiclass RotateQuadLeftByBytes
-{
-  def v16i8: ROTQBYVecInst<v16i8>;
-  def v8i16: ROTQBYVecInst<v8i16>;
-  def v4i32: ROTQBYVecInst<v4i32>;
-  def v4f32: ROTQBYVecInst<v4f32>;
-  def v2i64: ROTQBYVecInst<v2i64>;
-  def v2f64: ROTQBYVecInst<v2f64>;
-  def i128:  ROTQBYGenInst<i128, GPRC>;
-}
-
-defm ROTQBY: RotateQuadLeftByBytes;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad by byte (count), immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
-            RotShiftQuad, pattern>;
-
-class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
-    ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
-                [(set (type rclass:$rT),
-                      (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
-
-class ROTQBYIVecInst<ValueType vectype>:
-    ROTQBYIGenInst<vectype, VECREG>;
-
-multiclass RotateQuadByBytesImm
-{
-  def v16i8: ROTQBYIVecInst<v16i8>;
-  def v8i16: ROTQBYIVecInst<v8i16>;
-  def v4i32: ROTQBYIVecInst<v4i32>;
-  def v4f32: ROTQBYIVecInst<v4f32>;
-  def v2i64: ROTQBYIVecInst<v2i64>;
-  def vfi64: ROTQBYIVecInst<v2f64>;
-  def i128:  ROTQBYIGenInst<i128, GPRC>;
-}
-
-defm ROTQBYI: RotateQuadByBytesImm;
-
-// See ROTQBY note above.
-class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b00110011100, OOL, IOL,
-      "rotqbybi\t$rT, $rA, $shift",
-      RotShiftQuad, pattern>;
-
-class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
-    ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
-      [(set (vectype VECREG:$rT),
-            (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
-
-multiclass RotateQuadByBytesByBitshift {
-  def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
-  def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
-  def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
-  def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
-}
-
-defm ROTQBYBI : RotateQuadByBytesByBitshift;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// See ROTQBY note above.
-//
-// Assume that the user of this instruction knows to shift the rotate count
-// into bit 29
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class ROTQBIVecInst<ValueType vectype>:
-    ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-               [/* no pattern yet */]>;
-
-class ROTQBIRegInst<RegisterClass rclass>:
-    ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
-               [/* no pattern yet */]>;
-
-multiclass RotateQuadByBitCount
-{
-  def v16i8: ROTQBIVecInst<v16i8>;
-  def v8i16: ROTQBIVecInst<v8i16>;
-  def v4i32: ROTQBIVecInst<v4i32>;
-  def v2i64: ROTQBIVecInst<v2i64>;
-
-  def r128:  ROTQBIRegInst<GPRC>;
-  def r64:   ROTQBIRegInst<R64C>;
-}
-
-defm ROTQBI: RotateQuadByBitCount;
-
-class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
-            RotShiftQuad, pattern>;
-
-class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
-                     PatLeaf pred>:
-    ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
-                [/* no pattern yet */]>;
-
-class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
-                     PatLeaf pred>:
-    ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
-                [/* no pattern yet */]>;
-
-multiclass RotateQuadByBitCountImm
-{
-  def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
-  def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
-  def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
-  def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
-
-  def r128:  ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
-  def r64:   ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
-}
-
-defm ROTQBII : RotateQuadByBitCountImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTHM v8i16 form:
-// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
-//          so this only matches a synthetically generated/lowered code
-//          fragment.
-// NOTE(2): $rB must be negated before the right rotate!
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
-           RotShiftVec, pattern>;
-
-def ROTHMv8i16:
-    ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-              [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
-          (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
-
-// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
-// Note: This instruction doesn't match a pattern because rB must be negated
-// for the instruction to work. Thus, the pattern below the instruction!
-
-def ROTHMr16:
-    ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
-              [/* see patterns below - $rB must be negated! */]>;
-
-def : Pat<(srl R16C:$rA, R32C:$rB),
-          (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(srl R16C:$rA, R16C:$rB),
-          (ROTHMr16 R16C:$rA,
-                    (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(srl R16C:$rA, R8C:$rB),
-          (ROTHMr16 R16C:$rA,
-                    (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
-
-// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
-// that the immediate can be complemented, so that the user doesn't have to
-// worry about it.
-
-class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
-            RotShiftVec, pattern>;
-
-def ROTHMIv8i16:
-    ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
-               [/* no pattern */]>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
-          (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
-
-def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
-         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
-         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def ROTHMIr16:
-    ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
-               [/* no pattern */]>;
-
-def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
-         (ROTHMIr16 R16C:$rA, uimm7:$val)>;
-
-def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
-         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
-         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-// ROTM v4i32 form: See the ROTHM v8i16 comments.
-class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
-           RotShiftVec, pattern>;
-
-def ROTMv4i32:
-    ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
-          (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
-
-def ROTMr32:
-    ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-             [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(srl R32C:$rA, R32C:$rB),
-          (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(srl R32C:$rA, R16C:$rB),
-          (ROTMr32 R32C:$rA,
-                   (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(srl R32C:$rA, R8C:$rB),
-          (ROTMr32 R32C:$rA,
-                   (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-// ROTMI v4i32 form: See the comment for ROTHM v8i16.
-def ROTMIv4i32:
-    RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
-      "rotmi\t$rT, $rA, $val", RotShiftVec,
-      [(set (v4i32 VECREG:$rT),
-            (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
-          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
-          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
-
-// ROTMI r32 form: know how to complement the immediate value.
-def ROTMIr32:
-    RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
-      "rotmi\t$rT, $rA, $val", RotShiftVec,
-      [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
-
-def : Pat<(srl R32C:$rA, (i16 imm:$val)),
-          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(srl R32C:$rA, (i8 imm:$val)),
-          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTQMBY: This is a vector form merely so that when used in an
-// instruction pattern, type checking will succeed. This instruction assumes
-// that the user knew to negate $rB.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class ROTQMBYVecInst<ValueType vectype>:
-    ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-                [/* no pattern, $rB must be negated */]>;
-
-class ROTQMBYRegInst<RegisterClass rclass>:
-    ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
-                [/* no pattern */]>;
-
-multiclass RotateQuadBytes
-{
-  def v16i8: ROTQMBYVecInst<v16i8>;
-  def v8i16: ROTQMBYVecInst<v8i16>;
-  def v4i32: ROTQMBYVecInst<v4i32>;
-  def v2i64: ROTQMBYVecInst<v2i64>;
-
-  def r128: ROTQMBYRegInst<GPRC>;
-  def r64:  ROTQMBYRegInst<R64C>;
-}
-
-defm ROTQMBY : RotateQuadBytes;
-
-def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
-          (ROTQMBYr128  GPRC:$rA, 
-                        (SFIr32 R32C:$rB, 0))>;
-
-class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
-            RotShiftQuad, pattern>;
-
-class ROTQMBYIVecInst<ValueType vectype>:
-    ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
-                 [/* no pattern */]>;
-
-class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
-                      PatLeaf pred>:
-    ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
-                 [/* no pattern */]>;
-
-// 128-bit zero extension form:
-class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
-    ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
-                 [/* no pattern */]>;
-
-multiclass RotateQuadBytesImm
-{
-  def v16i8: ROTQMBYIVecInst<v16i8>;
-  def v8i16: ROTQMBYIVecInst<v8i16>;
-  def v4i32: ROTQMBYIVecInst<v4i32>;
-  def v2i64: ROTQMBYIVecInst<v2i64>;
-
-  def r128:  ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
-  def r64:   ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
-  
-  def r128_zext_r8:  ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
-  def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
-  def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
-  def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
-}
-
-defm ROTQMBYI : RotateQuadBytesImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate right and mask by bit count
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class ROTQMBYBIVecInst<ValueType vectype>:
-    ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-                  [/* no pattern, */]>;
-
-multiclass RotateMaskQuadByBitCount
-{
-  def v16i8: ROTQMBYBIVecInst<v16i8>;
-  def v8i16: ROTQMBYBIVecInst<v8i16>;
-  def v4i32: ROTQMBYBIVecInst<v4i32>;
-  def v2i64: ROTQMBYBIVecInst<v2i64>;
-  def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
-                           [/*no pattern*/]>;
-}
-
-defm ROTQMBYBI: RotateMaskQuadByBitCount;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad and mask by bits
-// Note that the rotate amount has to be negated
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
-           RotShiftQuad, pattern>;
-
-class ROTQMBIVecInst<ValueType vectype>:
-    ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
-                [/* no pattern */]>;
-
-class ROTQMBIRegInst<RegisterClass rclass>:
-    ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
-                [/* no pattern */]>;
-
-multiclass RotateMaskQuadByBits
-{
-  def v16i8: ROTQMBIVecInst<v16i8>;
-  def v8i16: ROTQMBIVecInst<v8i16>;
-  def v4i32: ROTQMBIVecInst<v4i32>;
-  def v2i64: ROTQMBIVecInst<v2i64>;
-
-  def r128:  ROTQMBIRegInst<GPRC>;
-  def r64:   ROTQMBIRegInst<R64C>;
-}
-
-defm ROTQMBI: RotateMaskQuadByBits;
-
-def : Pat<(srl GPRC:$rA, R32C:$rB),
-          (ROTQMBYBIr128 (ROTQMBIr128  GPRC:$rA, 
-                                       (SFIr32 R32C:$rB, 0)),
-                         (SFIr32 R32C:$rB, 0))>;
-
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad and mask by bits, immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
-            RotShiftQuad, pattern>;
-
-class ROTQMBIIVecInst<ValueType vectype>:
-   ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
-                 [/* no pattern */]>;
-
-class ROTQMBIIRegInst<RegisterClass rclass>:
-   ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
-                 [/* no pattern */]>;
-
-multiclass RotateMaskQuadByBitsImm
-{
-  def v16i8: ROTQMBIIVecInst<v16i8>;
-  def v8i16: ROTQMBIIVecInst<v8i16>;
-  def v4i32: ROTQMBIIVecInst<v4i32>;
-  def v2i64: ROTQMBIIVecInst<v2i64>;
-
-  def r128:  ROTQMBIIRegInst<GPRC>;
-  def r64:   ROTQMBIIRegInst<R64C>;
-}
-
-defm ROTQMBII: RotateMaskQuadByBitsImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def ROTMAHv8i16:
-    RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "rotmah\t$rT, $rA, $rB", RotShiftVec,
-      [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
-          (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
-
-def ROTMAHr16:
-    RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
-      "rotmah\t$rT, $rA, $rB", RotShiftVec,
-      [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(sra R16C:$rA, R32C:$rB),
-          (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(sra R16C:$rA, R16C:$rB),
-          (ROTMAHr16 R16C:$rA,
-                     (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(sra R16C:$rA, R8C:$rB),
-          (ROTMAHr16 R16C:$rA,
-                     (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-def ROTMAHIv8i16:
-    RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
-      "rotmahi\t$rT, $rA, $val", RotShiftVec,
-      [(set (v8i16 VECREG:$rT),
-            (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
-          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
-          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
-
-def ROTMAHIr16:
-    RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
-      "rotmahi\t$rT, $rA, $val", RotShiftVec,
-      [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
-
-def : Pat<(sra R16C:$rA, (i32 imm:$val)),
-          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(sra R16C:$rA, (i8 imm:$val)),
-          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def ROTMAv4i32:
-    RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "rotma\t$rT, $rA, $rB", RotShiftVec,
-      [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
-          (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
-
-def ROTMAr32:
-    RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-      "rotma\t$rT, $rA, $rB", RotShiftVec,
-      [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(sra R32C:$rA, R32C:$rB),
-          (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(sra R32C:$rA, R16C:$rB),
-          (ROTMAr32 R32C:$rA,
-                    (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(sra R32C:$rA, R8C:$rB),
-          (ROTMAr32 R32C:$rA,
-                    (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01011110000, OOL, IOL,
-      "rotmai\t$rT, $rA, $val",
-      RotShiftVec, pattern>;
-
-class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
-    ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
-      [(set (vectype VECREG:$rT),
-            (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
-
-class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
-    ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
-      [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
-
-multiclass RotateMaskAlgebraicImm {
-  def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
-  def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
-  def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
-  def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
-}
-
-defm ROTMAI : RotateMaskAlgebraicImm;
-
-//===----------------------------------------------------------------------===//
-// Branch and conditionals:
-//===----------------------------------------------------------------------===//
-
-let isTerminator = 1, isBarrier = 1 in {
-  // Halt If Equal (r32 preferred slot only, no vector form)
-  def HEQr32:
-    RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
-      "heq\t$rA, $rB", BranchResolv,
-      [/* no pattern to match */]>;
-
-  def HEQIr32 :
-    RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
-      "heqi\t$rA, $val", BranchResolv,
-      [/* no pattern to match */]>;
-
-  // HGT/HGTI: These instructions use signed arithmetic for the comparison,
-  // contrasting with HLGT/HLGTI, which use unsigned comparison:
-  def HGTr32:
-    RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
-      "hgt\t$rA, $rB", BranchResolv,
-      [/* no pattern to match */]>;
-
-  def HGTIr32:
-    RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
-      "hgti\t$rA, $val", BranchResolv,
-      [/* no pattern to match */]>;
-
-  def HLGTr32:
-    RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
-      "hlgt\t$rA, $rB", BranchResolv,
-      [/* no pattern to match */]>;
-
-  def HLGTIr32:
-    RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
-      "hlgti\t$rA, $val", BranchResolv,
-      [/* no pattern to match */]>;
-}
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Comparison operators for i8, i16 and i32:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpEqualByte
-{
-  def v16i8 :
-    CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
-                                       (v8i16 VECREG:$rB)))]>;
-
-  def r8 :
-    CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
-             [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
-}
-
-class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpEqualByteImm
-{
-  def v16i8 :
-    CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
-              [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
-                                               v16i8SExt8Imm:$val))]>;
-  def r8:
-    CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
-             [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
-}
-
-class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpEqualHalfword
-{
-  def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                       [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
-                                                        (v8i16 VECREG:$rB)))]>;
-
-  def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-                     [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
-}
-
-class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpEqualHalfwordImm
-{
-  def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                        [(set (v8i16 VECREG:$rT),
-                              (seteq (v8i16 VECREG:$rA),
-                                     (v8i16 v8i16SExt10Imm:$val)))]>;
-  def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-                      [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpEqualWord
-{
-  def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                      [(set (v4i32 VECREG:$rT),
-                            (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-  def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-                    [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
-}
-
-class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpEqualWordImm
-{
-  def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                       [(set (v4i32 VECREG:$rT),
-                             (seteq (v4i32 VECREG:$rA),
-                                    (v4i32 v4i32SExt16Imm:$val)))]>;
-
-  def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
-                    [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
-}
-
-class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpGtrByte
-{
-  def v16i8 :
-    CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
-                                       (v8i16 VECREG:$rB)))]>;
-
-  def r8 :
-    CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
-             [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
-}
-
-class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpGtrByteImm
-{
-  def v16i8 :
-    CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
-              [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
-                                               v16i8SExt8Imm:$val))]>;
-  def r8:
-    CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
-              [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
-}
-
-class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpGtrHalfword
-{
-  def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                       [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
-                                                        (v8i16 VECREG:$rB)))]>;
-
-  def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-                     [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
-}
-
-class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpGtrHalfwordImm
-{
-  def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                        [(set (v8i16 VECREG:$rT),
-                              (setgt (v8i16 VECREG:$rA),
-                                     (v8i16 v8i16SExt10Imm:$val)))]>;
-  def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-                      [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpGtrWord
-{
-  def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                      [(set (v4i32 VECREG:$rT),
-                            (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-  def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-                    [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
-}
-
-class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpGtrWordImm
-{
-  def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                       [(set (v4i32 VECREG:$rT),
-                             (setgt (v4i32 VECREG:$rA),
-                                    (v4i32 v4i32SExt16Imm:$val)))]>;
-
-  def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
-                    [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
-
-  // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
-  def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                       [(set (v4i32 VECREG:$rT),
-                             (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
-                                    (v4i32 v4i32SExt16Imm:$val)))]>;
-
-  def f32:   CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
-                      [/* no pattern */]>;
-}
-
-class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpLGtrByte
-{
-  def v16i8 :
-    CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
-                                       (v8i16 VECREG:$rB)))]>;
-
-  def r8 :
-    CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
-             [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
-}
-
-class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpLGtrByteImm
-{
-  def v16i8 :
-    CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
-              [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
-                                               v16i8SExt8Imm:$val))]>;
-  def r8:
-    CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
-             [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
-}
-
-class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpLGtrHalfword
-{
-  def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                       [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
-                                                        (v8i16 VECREG:$rB)))]>;
-
-  def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
-                     [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
-}
-
-class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpLGtrHalfwordImm
-{
-  def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                         [(set (v8i16 VECREG:$rT),
-                               (setugt (v8i16 VECREG:$rA),
-                                       (v8i16 v8i16SExt10Imm:$val)))]>;
-  def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
-                       [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
-  RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
-         ByteOp, pattern>;
-
-multiclass CmpLGtrWord
-{
-  def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-                      [(set (v4i32 VECREG:$rT),
-                            (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-  def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
-                     [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
-}
-
-class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
-  RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
-           ByteOp, pattern>;
-
-multiclass CmpLGtrWordImm
-{
-  def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
-                       [(set (v4i32 VECREG:$rT),
-                             (setugt (v4i32 VECREG:$rA),
-                                    (v4i32 v4i32SExt16Imm:$val)))]>;
-
-  def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
-                     [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
-}
-
-defm CEQB   : CmpEqualByte;
-defm CEQBI  : CmpEqualByteImm;
-defm CEQH   : CmpEqualHalfword;
-defm CEQHI  : CmpEqualHalfwordImm;
-defm CEQ    : CmpEqualWord;
-defm CEQI   : CmpEqualWordImm;
-defm CGTB   : CmpGtrByte;
-defm CGTBI  : CmpGtrByteImm;
-defm CGTH   : CmpGtrHalfword;
-defm CGTHI  : CmpGtrHalfwordImm;
-defm CGT    : CmpGtrWord;
-defm CGTI   : CmpGtrWordImm;
-defm CLGTB  : CmpLGtrByte;
-defm CLGTBI : CmpLGtrByteImm;
-defm CLGTH  : CmpLGtrHalfword;
-defm CLGTHI : CmpLGtrHalfwordImm;
-defm CLGT   : CmpLGtrWord;
-defm CLGTI  : CmpLGtrWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// For SETCC primitives not supported above (setlt, setle, setge, etc.)
-// define a pattern to generate the right code, as a binary operator
-// (in a manner of speaking.)
-//
-// Notes:
-// 1. This only matches the setcc set of conditionals. Special pattern
-//    matching is used for select conditionals.
-//
-// 2. The "DAG" versions of these classes is almost exclusively used for
-//    i64 comparisons. See the tblgen fundamentals documentation for what
-//    ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
-//    class for where ResultInstrs originates.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
-                      SPUInstr xorinst, SPUInstr cmpare>:
-  Pat<(cond rclass:$rA, rclass:$rB),
-      (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
-
-class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
-                      PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
-  Pat<(cond rclass:$rA, (inttype immpred:$imm)),
-      (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
-
-def : SETCCNegCondReg<setne, R8C, i8, XORBIr8,  CEQBr8>;
-def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
-
-def : SETCCNegCondReg<setne, R16C, i16, XORHIr16,     CEQHr16>;
-def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
-
-def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
-def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
-
-class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
-                    SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
-    Pat<(cond rclass:$rA, rclass:$rB),
-        (binop (cmpOp1 rclass:$rA, rclass:$rB),
-               (cmpOp2 rclass:$rA, rclass:$rB))>;
-
-class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
-                    ValueType immtype,
-                    SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
-    Pat<(cond rclass:$rA, (immtype immpred:$imm)),
-        (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
-               (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
-
-def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
-def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
-def : Pat<(setle R8C:$rA, R8C:$rB),
-          (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
-def :  Pat<(setle R8C:$rA, immU8:$imm),
-           (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
-
-def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
-                    ORr16, CGTHIr16, CEQHIr16>;
-def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
-def : Pat<(setle R16C:$rA, R16C:$rB),
-          (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
-def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
-          (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
-
-def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
-def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
-                    ORr32, CGTIr32, CEQIr32>;
-def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
-def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
-def : Pat<(setle R32C:$rA, R32C:$rB),
-          (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
-def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
-          (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
-
-def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
-def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
-def : Pat<(setule R8C:$rA, R8C:$rB),
-          (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
-def :  Pat<(setule R8C:$rA, immU8:$imm),
-           (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
-
-def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
-                    ORr16, CLGTHIr16, CEQHIr16>;
-def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
-                    CLGTHIr16, CEQHIr16>;
-def : Pat<(setule R16C:$rA, R16C:$rB),
-          (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
-def :  Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
-           (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
-
-def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
-def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
-                    ORr32, CLGTIr32, CEQIr32>;
-def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
-def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
-def : Pat<(setule R32C:$rA, R32C:$rB),
-          (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
-def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
-          (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// select conditional patterns:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
-                       SPUInstr selinstr, SPUInstr cmpare>:
-  Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
-              rclass:$rTrue, rclass:$rFalse),
-      (selinstr rclass:$rTrue, rclass:$rFalse,
-                (cmpare rclass:$rA, rclass:$rB))>;
-
-class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
-                       PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
-  Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
-              rclass:$rTrue, rclass:$rFalse),
-      (selinstr rclass:$rTrue, rclass:$rFalse,
-                (cmpare rclass:$rA, immpred:$imm))>;
-
-def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
-def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
-def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
-def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
-def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
-def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
-
-def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
-def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
-def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
-def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
-def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
-def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
-
-def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
-def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
-def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
-def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
-def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
-def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
-
-class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
-                     SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
-                     SPUInstr cmpOp2>:
-  Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
-              rclass:$rTrue, rclass:$rFalse),
-      (selinstr rclass:$rFalse, rclass:$rTrue,
-                (binop (cmpOp1 rclass:$rA, rclass:$rB),
-                       (cmpOp2 rclass:$rA, rclass:$rB)))>;
-
-class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
-                     ValueType inttype,
-                     SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
-                     SPUInstr cmpOp2>:
-    Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
-                rclass:$rTrue, rclass:$rFalse),
-        (selinstr rclass:$rFalse, rclass:$rTrue,
-                  (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
-                         (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
-
-def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
-def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
-                     SELBr8, ORr8, CGTBIr8, CEQBIr8>;
-
-def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
-def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
-                     SELBr16, ORr16, CGTHIr16, CEQHIr16>;
-
-def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
-def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
-                     SELBr32, ORr32, CGTIr32, CEQIr32>;
-
-def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
-def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
-                     SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
-
-def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
-def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
-                     SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
-
-def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
-def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
-                     SELBr32, ORr32, CLGTIr32, CEQIr32>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-let isCall = 1,
-  // All calls clobber the non-callee-saved registers:
-  Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
-          R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
-          R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
-          R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
-          R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
-          R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
-          R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
-          R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
-  // All of these instructions use $lr (aka $0)
-  Uses = [R0]  in {
-  // Branch relative and set link: Used if we actually know that the target
-  // is within [-32768, 32767] bytes of the target
-  def BRSL:
-    BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func),
-      "brsl\t$$lr, $func",
-      [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
-
-  // Branch absolute and set link: Used if we actually know that the target
-  // is an absolute address
-  def BRASL:
-    BranchSetLink<0b011001100, (outs), (ins calltarget:$func),
-      "brasl\t$$lr, $func",
-      [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
-
-  // Branch indirect and set link if external data. These instructions are not
-  // actually generated, matched by an intrinsic:
-  def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
-  def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
-  def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
-  def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
-
-  // Branch indirect and set link. This is the "X-form" address version of a
-  // function call
-  def BISL:
-    BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
-}
-
-// Support calls to external symbols:      
-def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
-          (BRSL texternalsym:$func)>;
-      
-def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
-          (BRASL texternalsym:$func)>;
-
-// Unconditional branches:
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-  let isBarrier = 1 in {
-    def BR :
-      UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
-        "br\t$dest",
-        [(br bb:$dest)]>;
-
-    // Unconditional, absolute address branch
-    def BRA:
-      UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
-        "bra\t$dest",
-        [/* no pattern */]>;
-
-    // Indirect branch
-    let isIndirectBranch = 1 in {
-      def BI:
-        BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
-    }
-  }
-
-  // Conditional branches:
-  class BRNZInst<dag IOL, list<dag> pattern>:
-    RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
-             BranchResolv, pattern>;
-
-  class BRNZRegInst<RegisterClass rclass>:
-    BRNZInst<(ins rclass:$rCond, brtarget:$dest),
-             [(brcond rclass:$rCond, bb:$dest)]>;
-
-  class BRNZVecInst<ValueType vectype>:
-    BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
-             [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
-
-  multiclass BranchNotZero {
-    def v4i32 : BRNZVecInst<v4i32>;
-    def r32   : BRNZRegInst<R32C>;
-  }
-
-  defm BRNZ : BranchNotZero;
-
-  class BRZInst<dag IOL, list<dag> pattern>:
-    RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
-             BranchResolv, pattern>;
-
-  class BRZRegInst<RegisterClass rclass>:
-    BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
-
-  class BRZVecInst<ValueType vectype>:
-    BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
-
-  multiclass BranchZero {
-    def v4i32: BRZVecInst<v4i32>;
-    def r32:   BRZRegInst<R32C>;
-  }
-
-  defm BRZ: BranchZero;
-
-  // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
-  // be useful:
-  /*
-  class BINZInst<dag IOL, list<dag> pattern>:
-   BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
-
-  class BINZRegInst<RegisterClass rclass>:
-    BINZInst<(ins rclass:$rA, brtarget:$dest),
-             [(brcond rclass:$rA, R32C:$dest)]>;
-
-  class BINZVecInst<ValueType vectype>:
-    BINZInst<(ins VECREG:$rA, R32C:$dest),
-             [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
-
-  multiclass BranchNotZeroIndirect {
-    def v4i32: BINZVecInst<v4i32>;
-    def r32:   BINZRegInst<R32C>;
-  }
-
-  defm BINZ: BranchNotZeroIndirect;
-
-  class BIZInst<dag IOL, list<dag> pattern>:
-    BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
-
-  class BIZRegInst<RegisterClass rclass>:
-    BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
-
-  class BIZVecInst<ValueType vectype>:
-    BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
-
-  multiclass BranchZeroIndirect {
-    def v4i32: BIZVecInst<v4i32>;
-    def r32:   BIZRegInst<R32C>;
-  }
-
-  defm BIZ: BranchZeroIndirect;
-  */
-
-  class BRHNZInst<dag IOL, list<dag> pattern>:
-    RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
-             pattern>;
-
-  class BRHNZRegInst<RegisterClass rclass>:
-    BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
-              [(brcond rclass:$rCond, bb:$dest)]>;
-
-  class BRHNZVecInst<ValueType vectype>:
-    BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
-
-  multiclass BranchNotZeroHalfword {
-    def v8i16: BRHNZVecInst<v8i16>;
-    def r16:   BRHNZRegInst<R16C>;
-  }
-
-  defm BRHNZ: BranchNotZeroHalfword;
-
-  class BRHZInst<dag IOL, list<dag> pattern>:
-    RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
-             pattern>;
-
-  class BRHZRegInst<RegisterClass rclass>:
-    BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
-
-  class BRHZVecInst<ValueType vectype>:
-    BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
-
-  multiclass BranchZeroHalfword {
-    def v8i16: BRHZVecInst<v8i16>;
-    def r16:   BRHZRegInst<R16C>;
-  }
-
-  defm BRHZ: BranchZeroHalfword;
-}
-
-//===----------------------------------------------------------------------===//
-// setcc and brcond patterns:
-//===----------------------------------------------------------------------===//
-
-def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
-          (BRHZr16 R16C:$rA, bb:$dest)>;
-def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
-          (BRHNZr16 R16C:$rA, bb:$dest)>;
-
-def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
-          (BRZr32 R32C:$rA, bb:$dest)>;
-def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
-          (BRNZr32 R32C:$rA, bb:$dest)>;
-
-multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
-  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
-                  (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
-  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
-                (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
-  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
-                   (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
-  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
-                (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
-defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
-
-multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
-  def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
-                   (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
-  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
-                (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
-  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
-                   (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
-  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
-                (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
-defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
-
-multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
-                           SPUInstr orinst32, SPUInstr brinst32>
-{
-  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
-                  (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
-                                      (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
-                            bb:$dest)>;
-
-  def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
-               (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
-                                   (CEQHr16 R16C:$rA, R16:$rB)),
-                         bb:$dest)>;
-
-  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
-                   (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
-                                       (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
-                             bb:$dest)>;
-
-  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
-                (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
-                                    (CEQr32 R32C:$rA, R32C:$rB)),
-                          bb:$dest)>;
-}
-
-defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
-defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
-
-multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
-  def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
-                   (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
-  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
-                (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
-  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
-                   (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
-  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
-                (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
-defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
-
-multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
-                          SPUInstr orinst32, SPUInstr brinst32>
-{
-  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
-                  (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
-                                      (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
-                            bb:$dest)>;
-
-  def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
-               (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
-                                   (CEQHr16 R16C:$rA, R16:$rB)),
-                         bb:$dest)>;
-
-  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
-                   (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
-                                       (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
-                             bb:$dest)>;
-
-  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
-                (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
-                                    (CEQr32 R32C:$rA, R32C:$rB)),
-                          bb:$dest)>;
-}
-
-defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
-defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
-
-let isTerminator = 1, isBarrier = 1 in {
-  let isReturn = 1 in {
-    def RET:
-        RETForm<"bi\t$$lr", [(retflag)]>;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Single precision floating point instructions
-//===----------------------------------------------------------------------===//
-
-class FAInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
-           SPrecFP, pattern>;
-
-class FAVecInst<ValueType vectype>:
-    FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-             [(set (vectype VECREG:$rT),
-                   (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass SFPAdd
-{
-  def v4f32: FAVecInst<v4f32>;
-  def f32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
-                    [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FA : SFPAdd;
-
-class FSInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
-           SPrecFP, pattern>;
-
-class FSVecInst<ValueType vectype>:
-    FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-           [(set (vectype VECREG:$rT),
-                 (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass SFPSub
-{
-  def v4f32: FSVecInst<v4f32>;
-  def f32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
-                    [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FS : SFPSub;
-
-class FMInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01100011010, OOL, IOL,
-      "fm\t$rT, $rA, $rB", SPrecFP,
-      pattern>;
-
-class FMVecInst<ValueType type>:
-    FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-           [(set (type VECREG:$rT),
-                 (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
-
-multiclass SFPMul
-{
-  def v4f32: FMVecInst<v4f32>;
-  def f32:   FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
-                     [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; 
-}
-
-defm FM : SFPMul;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
-    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set (v4f32 VECREG:$rT),
-            (fadd (v4f32 VECREG:$rC),
-                  (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
-    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
-      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
-    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set (v4f32 VECREG:$rT),
-            (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
-                  (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
-    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
-      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set R32FP:$rT,
-            (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-//     = - (a * b - c)
-//     = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
-    RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
-      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
-    RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
-      [(set (v4f32 VECREG:$rT),
-            (fsub (v4f32 VECREG:$rC),
-                  (fmul (v4f32 VECREG:$rA),
-                        (v4f32 VECREG:$rB))))]>;
-
-
-
-
-// Floating point reciprocal estimate
-
-class FRESTInst<dag OOL, dag IOL>:
-  RRForm_1<0b00110111000, OOL, IOL,
-           "frest\t$rT, $rA", SPrecFP,
-           [/* no pattern */]>;
-
-def FRESTv4f32 :
-    FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
-
-def FRESTf32 :
-    FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
-
-// Floating point interpolate (used in conjunction with reciprocal estimate)
-def FIv4f32 :
-    RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "fi\t$rT, $rA, $rB", SPrecFP,
-      [/* no pattern */]>;
-
-def FIf32 :
-    RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
-      "fi\t$rT, $rA, $rB", SPrecFP,
-      [/* no pattern */]>;
-
-//--------------------------------------------------------------------------
-// Basic single precision floating point comparisons:
-//
-// Note: There is no support on SPU for single precision NaN. Consequently,
-// ordered and unordered comparisons are the same.
-//--------------------------------------------------------------------------
-
-def FCEQf32 :
-    RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
-      "fceq\t$rT, $rA, $rB", SPrecFP,
-      [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
-
-def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
-          (FCEQf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCMEQf32 :
-    RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
-      "fcmeq\t$rT, $rA, $rB", SPrecFP,
-      [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-
-def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
-          (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCGTf32 :
-    RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
-      "fcgt\t$rT, $rA, $rB", SPrecFP,
-      [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
-
-def : Pat<(setogt R32FP:$rA, R32FP:$rB),
-          (FCGTf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCMGTf32 :
-    RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
-      "fcmgt\t$rT, $rA, $rB", SPrecFP,
-      [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-
-def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
-          (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
-
-//--------------------------------------------------------------------------
-// Single precision floating point comparisons and SETCC equivalents:
-//--------------------------------------------------------------------------
-
-def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
-def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
-
-def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
-def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
-
-def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
-def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
-
-def : Pat<(setule R32FP:$rA, R32FP:$rB),
-          (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
-def : Pat<(setole R32FP:$rA, R32FP:$rB),
-          (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
-
-// FP Status and Control Register Write
-// Why isn't rT a don't care in the ISA?
-// Should we create a special RRForm_3 for this guy and zero out the rT?
-def FSCRWf32 :
-    RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
-      "fscrwr\t$rA", SPrecFP,
-      [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
-
-// FP Status and Control Register Read
-def FSCRRf32 :
-    RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
-      "fscrrd\t$rT", SPrecFP,
-      [/* This instruction requires an intrinsic */]>;
-
-// llvm instruction space
-// How do these map onto cell instructions?
-// fdiv rA rB
-//   frest rC rB        # c = 1/b (both lines)
-//   fi rC rB rC
-//   fm rD rA rC        # d = a * 1/b
-//   fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
-//   fma rB rB rC rD            # b = b * c + d
-//                              = -(d *b -a) * c + d
-//                              = a * c - c ( a *b *c - a)
-
-// fcopysign (???)
-
-// Library calls:
-// These llvm instructions will actually map to library calls.
-// All that's needed, then, is to check that the appropriate library is
-// imported and do a brsl to the proper function name.
-// frem # fmod(x, y): x - (x/y) * y
-// (Note: fmod(double, double), fmodf(float,float)
-// fsqrt?
-// fsin?
-// fcos?
-// Unimplemented SPU instruction space
-// floating reciprocal absolute square root estimate (frsqest)
-
-// The following are probably just intrinsics
-// status and control register write
-// status and control register read
-
-//--------------------------------------
-// Floating Point Conversions
-// Signed conversions:
-def CSiFv4f32:
-    CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
-      "csflt\t$rT, $rA, 0", SPrecFP,
-      [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
-
-// Convert signed integer to floating point
-def CSiFf32 :
-    CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
-      "csflt\t$rT, $rA, 0", SPrecFP,
-      [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
-
-// Convert unsigned into to float
-def CUiFv4f32 :
-    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
-      "cuflt\t$rT, $rA, 0", SPrecFP,
-      [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
-
-def CUiFf32 :
-    CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
-      "cuflt\t$rT, $rA, 0", SPrecFP,
-      [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
-
-// Convert float to unsigned int
-// Assume that scale = 0
-
-def CFUiv4f32 :
-    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
-      "cfltu\t$rT, $rA, 0", SPrecFP,
-      [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
-
-def CFUif32 :
-    CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
-      "cfltu\t$rT, $rA, 0", SPrecFP,
-      [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
-
-// Convert float to signed int
-// Assume that scale = 0
-
-def CFSiv4f32 :
-    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
-      "cflts\t$rT, $rA, 0", SPrecFP,
-      [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
-
-def CFSif32 :
-    CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
-      "cflts\t$rT, $rA, 0", SPrecFP,
-      [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
-
-//===----------------------------------------------------------------------==//
-// Single<->Double precision conversions
-//===----------------------------------------------------------------------==//
-
-// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
-// v4f32, output is v2f64--which goes in the name?)
-
-// Floating point extend single to double
-// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
-// operates on two double-word slots (i.e. 1st and 3rd fp numbers
-// are ignored).
-def FESDvec :
-    RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
-      "fesd\t$rT, $rA", SPrecFP,
-      [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
-
-def FESDf32 :
-    RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
-      "fesd\t$rT, $rA", SPrecFP,
-      [(set R64FP:$rT, (fextend R32FP:$rA))]>;
-
-// Floating point round double to single
-//def FRDSvec :
-//    RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
-//      "frds\t$rT, $rA,", SPrecFP,
-//      [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
-
-def FRDSf64 :
-    RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
-      "frds\t$rT, $rA", SPrecFP,
-      [(set R32FP:$rT, (fround R64FP:$rA))]>;
-
-//ToDo include anyextend?
-
-//===----------------------------------------------------------------------==//
-// Double precision floating point instructions
-//===----------------------------------------------------------------------==//
-def FAf64 :
-    RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
-      "dfa\t$rT, $rA, $rB", DPrecFP,
-      [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
-
-def FAv2f64 :
-    RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "dfa\t$rT, $rA, $rB", DPrecFP,
-      [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FSf64 :
-    RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
-      "dfs\t$rT, $rA, $rB", DPrecFP,
-      [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
-
-def FSv2f64 :
-    RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "dfs\t$rT, $rA, $rB", DPrecFP,
-      [(set (v2f64 VECREG:$rT),
-            (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FMf64 :
-    RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
-      "dfm\t$rT, $rA, $rB", DPrecFP,
-      [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
-
-def FMv2f64:
-    RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
-      "dfm\t$rT, $rA, $rB", DPrecFP,
-      [(set (v2f64 VECREG:$rT),
-            (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FMAf64:
-    RRForm<0b00111010110, (outs R64FP:$rT),
-                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
-      "dfma\t$rT, $rA, $rB", DPrecFP,
-      [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
-    RegConstraint<"$rC = $rT">,
-    NoEncode<"$rC">;
-
-def FMAv2f64:
-    RRForm<0b00111010110, (outs VECREG:$rT),
-                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "dfma\t$rT, $rA, $rB", DPrecFP,
-      [(set (v2f64 VECREG:$rT),
-            (fadd (v2f64 VECREG:$rC),
-                  (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
-    RegConstraint<"$rC = $rT">,
-    NoEncode<"$rC">;
-
-def FMSf64 :
-    RRForm<0b10111010110, (outs R64FP:$rT),
-                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
-      "dfms\t$rT, $rA, $rB", DPrecFP,
-      [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
-    RegConstraint<"$rC = $rT">,
-    NoEncode<"$rC">;
-
-def FMSv2f64 :
-    RRForm<0b10111010110, (outs VECREG:$rT),
-                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "dfms\t$rT, $rA, $rB", DPrecFP,
-      [(set (v2f64 VECREG:$rT),
-            (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
-                  (v2f64 VECREG:$rC)))]>;
-
-// DFNMS: - (a * b - c)
-// - (a * b) + c => c - (a * b)
-
-class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
-    RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
-           DPrecFP, pattern>,
-    RegConstraint<"$rC = $rT">,
-    NoEncode<"$rC">;
-
-class DFNMSVecInst<list<dag> pattern>:
-    DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-              pattern>;
-
-class DFNMSRegInst<list<dag> pattern>:
-    DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
-             pattern>;
-
-multiclass DFMultiplySubtract
-{
-  def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), 
-                                 (fsub (v2f64 VECREG:$rC),
-                                       (fmul (v2f64 VECREG:$rA),
-                                             (v2f64 VECREG:$rB))))]>;
-
-  def f64 : DFNMSRegInst<[(set R64FP:$rT,
-                               (fsub R64FP:$rC,
-                                     (fmul R64FP:$rA, R64FP:$rB)))]>;
-}
-
-defm DFNMS : DFMultiplySubtract;
-
-// - (a * b + c)
-// - (a * b) - c
-def FNMAf64 :
-    RRForm<0b11111010110, (outs R64FP:$rT),
-                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
-      "dfnma\t$rT, $rA, $rB", DPrecFP,
-      [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
-    RegConstraint<"$rC = $rT">,
-    NoEncode<"$rC">;
-
-def FNMAv2f64 :
-    RRForm<0b11111010110, (outs VECREG:$rT),
-                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
-      "dfnma\t$rT, $rA, $rB", DPrecFP,
-      [(set (v2f64 VECREG:$rT),
-            (fneg (fadd (v2f64 VECREG:$rC),
-                        (fmul (v2f64 VECREG:$rA),
-                              (v2f64 VECREG:$rB)))))]>,
-    RegConstraint<"$rC = $rT">,
-    NoEncode<"$rC">;
-
-//===----------------------------------------------------------------------==//
-// Floating point negation and absolute value
-//===----------------------------------------------------------------------==//
-
-def : Pat<(fneg (v4f32 VECREG:$rA)),
-          (XORfnegvec (v4f32 VECREG:$rA),
-                      (v4f32 (ILHUv4i32 0x8000)))>;
-
-def : Pat<(fneg R32FP:$rA),
-          (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
-
-// Floating point absolute value
-// Note: f64 fabs is custom-selected.
-
-def : Pat<(fabs R32FP:$rA),
-          (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
-
-def : Pat<(fabs (v4f32 VECREG:$rA)),
-          (ANDfabsvec (v4f32 VECREG:$rA),
-                      (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
-
-//===----------------------------------------------------------------------===//
-// Hint for branch instructions:
-//===----------------------------------------------------------------------===//
-def HBRA :
-    HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
-
-//===----------------------------------------------------------------------===//
-// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
-// in the odd pipeline)
-//===----------------------------------------------------------------------===//
-
-def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
-  let Pattern = [];
-
-  let Inst{0-10} = 0b10000000010;
-  let Inst{11-17} = 0;
-  let Inst{18-24} = 0;
-  let Inst{25-31} = 0;
-}
-
-def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
-  let Pattern = [];
-
-  let Inst{0-10} = 0b10000000000;
-  let Inst{11-17} = 0;
-  let Inst{18-24} = 0;
-  let Inst{25-31} = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Bit conversions (type conversions between vector/packed types)
-// NOTE: Promotions are handled using the XS* instructions.
-//===----------------------------------------------------------------------===//
-def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
-
-def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
-
-def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
-
-def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
-
-def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
-
-def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
-          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
-          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
-          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
-          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
-          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
-          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-
-def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
-          (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
-          (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
-          (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
-          (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
-          (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
-          (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-
-def : Pat<(i32 (bitconvert R32FP:$rA)),
-          (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
-
-def : Pat<(f32 (bitconvert R32C:$rA)),
-          (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
-
-def : Pat<(i64 (bitconvert R64FP:$rA)),
-          (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
-
-def : Pat<(f64 (bitconvert R64C:$rA)),
-          (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
-
-
-//===----------------------------------------------------------------------===//
-// Instruction patterns:
-//===----------------------------------------------------------------------===//
-
-// General 32-bit constants:
-def : Pat<(i32 imm:$imm),
-          (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
-
-// Single precision float constants:
-def : Pat<(f32 fpimm:$imm),
-          (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
-
-// General constant 32-bit vectors
-def : Pat<(v4i32 v4i32Imm:$imm),
-          (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
-                     (LO16_vec v4i32Imm:$imm))>;
-
-// 8-bit constants
-def : Pat<(i8 imm:$imm),
-          (ILHr8 imm:$imm)>;
-
-//===----------------------------------------------------------------------===//
-// Zero/Any/Sign extensions
-//===----------------------------------------------------------------------===//
-
-// sext 8->32: Sign extend bytes to words
-def : Pat<(sext_inreg R32C:$rSrc, i8),
-          (XSHWr32 (XSBHr32 R32C:$rSrc))>;
-
-def : Pat<(i32 (sext R8C:$rSrc)),
-          (XSHWr16 (XSBHr8 R8C:$rSrc))>;
-
-// sext 8->64: Sign extend bytes to double word
-def : Pat<(sext_inreg R64C:$rSrc, i8),
-          (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
-          
-def : Pat<(i64 (sext R8C:$rSrc)),
-          (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
-
-// zext 8->16: Zero extend bytes to halfwords
-def : Pat<(i16 (zext R8C:$rSrc)),
-          (ANDHIi8i16 R8C:$rSrc, 0xff)>;
-
-// zext 8->32: Zero extend bytes to words
-def : Pat<(i32 (zext R8C:$rSrc)),
-          (ANDIi8i32 R8C:$rSrc, 0xff)>;
-
-// zext 8->64: Zero extend bytes to double words
-def : Pat<(i64 (zext R8C:$rSrc)),
-          (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
-                                    (COPY_TO_REGCLASS 
-                                       (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
-                                    0x4),
-                                  (ILv4i32 0x0),
-                                  (FSMBIv4i32 0x0f0f)), R64C)>;
-
-// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
-def : Pat<(i16 (anyext R8C:$rSrc)),
-          (ORHIi8i16 R8C:$rSrc, 0)>;
-
-// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
-def : Pat<(i32 (anyext R8C:$rSrc)),
-          (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
-
-// sext 16->64: Sign extend halfword to double word
-def : Pat<(sext_inreg R64C:$rSrc, i16),
-          (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
-          
-def : Pat<(sext R16C:$rSrc),
-          (XSWDr64 (XSHWr16 R16C:$rSrc))>;
-
-// zext 16->32: Zero extend halfwords to words
-def : Pat<(i32 (zext R16C:$rSrc)),
-          (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
-          (ANDIi16i32 R16C:$rSrc, 0xf)>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
-          (ANDIi16i32 R16C:$rSrc, 0xff)>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
-          (ANDIi16i32 R16C:$rSrc, 0xfff)>;
-
-// anyext 16->32: Extend 16->32 bits, irrespective of sign
-def : Pat<(i32 (anyext R16C:$rSrc)),
-          (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
-
-//===----------------------------------------------------------------------===//
-// Truncates:
-// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
-// above are custom lowered.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(i8 (trunc GPRC:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
-
-def : Pat<(i8 (trunc R64C:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBv2i64_m32
-              (COPY_TO_REGCLASS R64C:$src, VECREG),
-              (COPY_TO_REGCLASS R64C:$src, VECREG),
-              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
-
-def : Pat<(i8 (trunc R32C:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBv4i32_m32
-               (COPY_TO_REGCLASS R32C:$src, VECREG),
-               (COPY_TO_REGCLASS R32C:$src, VECREG),
-               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
-
-def : Pat<(i8 (trunc R16C:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBv4i32_m32
-               (COPY_TO_REGCLASS R16C:$src, VECREG),
-               (COPY_TO_REGCLASS R16C:$src, VECREG),
-               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
-
-def : Pat<(i16 (trunc GPRC:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
-
-def : Pat<(i16 (trunc R64C:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBv2i64_m32
-              (COPY_TO_REGCLASS R64C:$src, VECREG),
-              (COPY_TO_REGCLASS R64C:$src, VECREG),
-              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
-
-def : Pat<(i16 (trunc R32C:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBv4i32_m32
-               (COPY_TO_REGCLASS R32C:$src, VECREG),
-               (COPY_TO_REGCLASS R32C:$src, VECREG),
-               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
-
-def : Pat<(i32 (trunc GPRC:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBgprc GPRC:$src, GPRC:$src,
-                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
-
-def : Pat<(i32 (trunc R64C:$src)),
-          (COPY_TO_REGCLASS
-            (SHUFBv2i64_m32
-              (COPY_TO_REGCLASS R64C:$src, VECREG),
-              (COPY_TO_REGCLASS R64C:$src, VECREG),
-              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
-
-//===----------------------------------------------------------------------===//
-// Address generation: SPU, like PPC, has to split addresses into high and
-// low parts in order to load them into a register.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(SPUaform tglobaladdr:$in, 0),  (ILAlsa tglobaladdr:$in)>;
-def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
-def : Pat<(SPUaform tjumptable:$in, 0),   (ILAlsa tjumptable:$in)>;
-def : Pat<(SPUaform tconstpool:$in, 0),   (ILAlsa  tconstpool:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
-                       (SPUlo tglobaladdr:$in, 0)),
-          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
-
-def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
-                       (SPUlo texternalsym:$in, 0)),
-          (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
-                       (SPUlo tjumptable:$in, 0)),
-          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
-                       (SPUlo tconstpool:$in, 0)),
-          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-
-def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
-          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
-
-def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
-          (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
-
-def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
-          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
-
-def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
-          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-
-// Intrinsics:
-include "CellSDKIntrinsics.td"
-// Various math operator instruction sequences
-include "SPUMathInstr.td"
-// 64-bit "instructions"/support
-include "SPU64InstrInfo.td"
-// 128-bit "instructions"/support
-include "SPU128InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/CellSPU/SPUMachineFunction.cpp
deleted file mode 100644
index 3e948d071d63..000000000000
--- a/lib/Target/CellSPU/SPUMachineFunction.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMachineFunction.h"
-
-using namespace llvm;
-
-void SPUFunctionInfo::anchor() { }
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
deleted file mode 100644
index 399684bb0887..000000000000
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_MACHINE_FUNCTION_INFO_H
-#define SPU_MACHINE_FUNCTION_INFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// SPUFunctionInfo - Cell SPU target-specific information for each
-/// MachineFunction
-class SPUFunctionInfo : public MachineFunctionInfo {
-  virtual void anchor();
-
-  /// UsesLR - Indicates whether LR is used in the current function.
-  ///
-  bool UsesLR;
-
-  // VarArgsFrameIndex - FrameIndex for start of varargs area.
-  int VarArgsFrameIndex;
-
-public:
-  SPUFunctionInfo(MachineFunction& MF) 
-  : UsesLR(false),
-    VarArgsFrameIndex(0)
-  {}
-
-  void setUsesLR(bool U) { UsesLR = U; }
-  bool usesLR()          { return UsesLR; }
-
-  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
-  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
-};
-
-} // end of namespace llvm
-
-
-#endif
-
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
deleted file mode 100644
index 9a5c3976afbe..000000000000
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
-//
-//                     Cell SPU math operations
-//
-// This target description file contains instruction sequences for various
-// math operations, such as vector multiplies, i32 multiply, etc., for the
-// SPU's i32, i16 i8 and corresponding vector types.
-//
-// Any resemblance to libsimdmath or the Cell SDK simdmath library is
-// purely and completely coincidental.
-//===----------------------------------------------------------------------===//
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v16i8 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
-          (ORv4i32
-           (ANDv4i32
-            (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
-                       (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
-                                             (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
-                       (FSMBIv8i16 0x2222)),
-            (ILAv4i32 0x0000ffff)),
-           (SHLIv4i32
-            (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
-                                 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
-                       (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
-                                             (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
-                       (FSMBIv8i16 0x2222)), 16))>;
-                        
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v8i16 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
-          (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
-                     (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
-                     (FSMBIv8i16 0xcccc))>;
-                 
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v4i32, i32 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def MPYv4i32:
-  Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
-      (Av4i32
-        (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
-                       (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
-        (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
-
-def MPYi32:
-  Pat<(mul R32C:$rA, R32C:$rB),
-      (Ar32
-        (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
-              (MPYHr32 R32C:$rB, R32C:$rA)),
-        (MPYUr32 R32C:$rA, R32C:$rB))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// f32, v4f32 divide instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// Reciprocal estimate and interpolation
-def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
-// Division estimate
-def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
-// Newton-Raphson iteration
-def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
-                               Interpf32.Fragment,
-                               DivEstf32.Fragment)>;
-// Epsilon addition
-def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
-
-def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
-          (SELBf32_cond NRaphf32.Fragment,
-                        Epsilonf32.Fragment,
-                        (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
-
-// Reciprocal estimate and interpolation
-def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
-// Division estimate
-def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
-// Newton-Raphson iteration
-def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
-                                              (v4f32 VECREG:$rB),
-                                              (v4f32 VECREG:$rA)),
-                                   Interpv4f32.Fragment,
-                                   DivEstv4f32.Fragment)>;
-// Epsilon addition
-def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
-
-def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
-          (SELBv4f32_cond NRaphv4f32.Fragment,
-                        Epsilonv4f32.Fragment,
-                        (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
-                                              Epsilonv4f32.Fragment,
-                                              (v4f32 VECREG:$rA)), -1))>;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
deleted file mode 100644
index a47e9ef0167c..000000000000
--- a/lib/Target/CellSPU/SPUNodes.td
+++ /dev/null
@@ -1,159 +0,0 @@
-//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Type profiles and SelectionDAG nodes used by CellSPU
-//
-//===----------------------------------------------------------------------===//
-
-// Type profile for a call sequence
-def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
-
-// SPU_GenControl: Type profile for generating control words for insertions
-def SPU_GenControl : SDTypeProfile<1, 1, []>;
-def SPUshufmask    : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
-                           [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPUCallSeq,
-                           [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
-//===----------------------------------------------------------------------===//
-// Operand constraints:
-//===----------------------------------------------------------------------===//
-
-def SDT_SPUCall   : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SPUcall       : SDNode<"SPUISD::CALL", SDT_SPUCall,
-                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                            SDNPVariadic]>;
-
-// Operand type constraints for vector shuffle/permute operations
-def SDT_SPUshuffle   : SDTypeProfile<1, 3, [
-  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-]>;
-
-// Vector binary operator type constraints (needs a further constraint to
-// ensure that operand 0 is a vector...):
-
-def SPUVecBinop: SDTypeProfile<1, 2, [
-  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-]>;
-
-// Trinary operators, e.g., addx, carry generate
-def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
-  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
-]>;
-
-// SELECT_MASK type constraints: There are several variations for the various
-// vector types (this avoids having to bit_convert all over the place.)
-def SPUselmask_type: SDTypeProfile<1, 1, [
-  SDTCisInt<1>
-]>;
-
-// SELB type constraints:
-def SPUselb_type: SDTypeProfile<1, 3, [
-  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
-
-// SPU Vector shift pseudo-instruction type constraints
-def SPUvecshift_type: SDTypeProfile<1, 2, [
-  SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
-
-// "marker" type for i64 operators that need a shuffle mask
-// (i.e., uses cg or bg or another instruction that needs to
-// use shufb to get things in the right place.)
-// Op0: The result
-// Op1, 2: LHS, RHS
-// Op3: Carry-generate shuffle mask
-
-def SPUmarker_type : SDTypeProfile<1, 3, [
-  SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
-
-//===----------------------------------------------------------------------===//
-// Synthetic/pseudo-instructions
-//===----------------------------------------------------------------------===//
-
-// SPU CNTB:
-def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
-
-// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
-// SPUISelLowering.h):
-def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
-
-// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
-def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
-def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
-def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
-
-def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
-def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
-
-// Vector rotate left, bits shifted out of the left are rotated in on the right
-def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
-                             SPUvecshift_type, []>;
-
-// Vector rotate left by bytes, but the count is given in bits and the SPU
-// internally converts it to bytes (saves an instruction to mask off lower
-// three bits)
-def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
-                                   SPUvecshift_type>;
-
-// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
-// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
-def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
-def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
-def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
-
-// SPU form select mask for bytes, immediate
-def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
-
-// SPU select bits instruction
-def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
-
-def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
-def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
-
-def SPU_vec_demote   : SDTypeProfile<1, 1, []>;
-def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
-
-// Address high and low components, used for [r+r] type addressing
-def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
-def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
-
-// PC-relative address
-def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
-
-// A-Form local store addresses
-def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
-
-// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
-def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
-
-// i64 markers: supplies extra operands used to generate the i64 operator
-// instruction sequences
-def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
-def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
-def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
-
-//===----------------------------------------------------------------------===//
-// Constraints: (taken from PPCInstrInfo.td)
-//===----------------------------------------------------------------------===//
-
-class RegConstraint<string C> {
-  string Constraints = C;
-}
-
-class NoEncode<string E> {
-  string DisableEncoding = E;
-}
-
-//===----------------------------------------------------------------------===//
-// Return (flag isn't quite what it means: the operations are flagged so that
-// instruction scheduling doesn't disassociate them.)
-//===----------------------------------------------------------------------===//
-
-def retflag     : SDNode<"SPUISD::RET_FLAG", SDTNone,
-                         [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
deleted file mode 100644
index 7c58041e3b84..000000000000
--- a/lib/Target/CellSPU/SPUNopFiller.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The final pass just before assembly printing. This pass is the last
-// checkpoint where nops and lnops are added to the instruction stream to 
-// satisfy the dual issue requirements. The actual dual issue scheduling is 
-// done (TODO: nowhere, currently)
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-  struct SPUNopFiller : public MachineFunctionPass {
-
-    TargetMachine &TM;
-    const TargetInstrInfo *TII;
-    const InstrItineraryData *IID;
-    bool isEvenPlace;  // the instruction slot (mem address) at hand is even/odd
-
-    static char ID;
-    SPUNopFiller(TargetMachine &tm) 
-      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), 
-        IID(tm.getInstrItineraryData()) 
-    {
-      DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
-    }
-
-    virtual const char *getPassName() const {
-      return "SPU nop/lnop Filler";
-    }
-
-    void runOnMachineBasicBlock(MachineBasicBlock &MBB);
-
-    bool runOnMachineFunction(MachineFunction &F) {
-      isEvenPlace = true; //all functions get an .align 3 directive at start 
-      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
-           FI != FE; ++FI)
-        runOnMachineBasicBlock(*FI);
-      return true; //never-ever do any more modifications, just print it!
-    }
-
-    typedef enum { none   = 0, // no more instructions in this function / BB
-                   pseudo = 1, // this does not get executed
-                   even   = 2, 
-                   odd    = 3 } SPUOpPlace;
-    SPUOpPlace getOpPlacement( MachineInstr &instr );
-
-  };
-  char SPUNopFiller::ID = 0;
-
-} 
-
-// Fill a BasicBlock to alignment. 
-// In the assebly we align the functions to 'even' adresses, but
-// basic blocks have an implicit alignmnet. We hereby define 
-// basic blocks to have the same, even, alignment.
-void SPUNopFiller::
-runOnMachineBasicBlock(MachineBasicBlock &MBB) 
-{
-  assert( isEvenPlace && "basic block start from odd address");
-  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-  {
-    SPUOpPlace this_optype, next_optype;
-    MachineBasicBlock::iterator J = I;
-    J++;
-
-    this_optype = getOpPlacement( *I );
-    next_optype = none;
-    while (J!=MBB.end()){
-      next_optype = getOpPlacement( *J );
-      ++J;
-      if (next_optype != pseudo ) 
-        break;
-    }
-
-    // padd: odd(wrong), even(wrong), ...
-    // to:   nop(corr), odd(corr), even(corr)...
-    if( isEvenPlace && this_optype == odd && next_optype == even ) {
-      DEBUG( dbgs() <<"Adding NOP before: "; );
-      DEBUG( I->dump(); );
-      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
-      isEvenPlace=false;
-    }
-    
-    // padd: even(wrong), odd(wrong), ...
-    // to:   lnop(corr), even(corr), odd(corr)...
-    else if ( !isEvenPlace && this_optype == even && next_optype == odd){
-      DEBUG( dbgs() <<"Adding LNOP before: "; );
-      DEBUG( I->dump(); );
-      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
-      isEvenPlace=true;
-    }
-      
-    // now go to next mem slot
-    if( this_optype != pseudo )
-      isEvenPlace = !isEvenPlace;    
-
-  }
-
-  // padd basicblock end
-  if( !isEvenPlace ){
-    MachineBasicBlock::iterator J = MBB.end();
-    J--;
-    if (getOpPlacement( *J ) == odd) {
-      DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
-      BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
-    }  
-    else {
-      J++;
-      DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
-      BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
-    }
-    isEvenPlace=true;
-  }
-}
-
-FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
-  return new SPUNopFiller(tm);
-}
-
-// Figure out if 'instr' is executed in the even or odd pipeline
-SPUNopFiller::SPUOpPlace 
-SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
-  int sc = instr.getDesc().getSchedClass();
-  const InstrStage *stage = IID->beginStage(sc);
-  unsigned FUs = stage->getUnits();
-  SPUOpPlace retval;
-
-  switch( FUs ) {
-    case 0: retval = pseudo; break;
-    case 1: retval = odd;    break;
-    case 2: retval = even;   break;
-    default: retval= pseudo; 
-             assert( false && "got unknown FuncUnit\n");
-             break;
-  };
-  return retval;
-}
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
deleted file mode 100644
index 6f8deef5530f..000000000000
--- a/lib/Target/CellSPU/SPUOperands.td
+++ /dev/null
@@ -1,664 +0,0 @@
-//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instruction Operands:
-//===----------------------------------------------------------------------===//
-
-// TO_IMM32 - Convert an i8/i16 to i32.
-def TO_IMM32 : SDNodeXForm<imm, [{
-  return getI32Imm(N->getZExtValue());
-}]>;
-
-// TO_IMM16 - Convert an i8/i32 to i16.
-def TO_IMM16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
-}]>;
-
-
-def LO16 : SDNodeXForm<imm, [{
-  unsigned val = N->getZExtValue();
-  // Transformation function: get the low 16 bits.
-  return getI32Imm(val & 0xffff);
-}]>;
-
-def LO16_vec : SDNodeXForm<scalar_to_vector, [{
-  SDValue OpVal(0, 0);
-
-  // Transformation function: get the low 16 bit immediate from a build_vector
-  // node.
-  assert(N->getOpcode() == ISD::BUILD_VECTOR
-         && "LO16_vec got something other than a BUILD_VECTOR");
-
-  // Get first constant operand...
-  for (unsigned i = 0, e = N->getNumOperands();
-       OpVal.getNode() == 0 && i != e; ++i) {
-    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-    if (OpVal.getNode() == 0)
-      OpVal = N->getOperand(i);
-  }
-  
-  assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
-  ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
-  return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
-}]>;
-
-// Transform an immediate, returning the high 16 bits shifted down:
-def HI16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
-}]>;
-
-// Transformation function: shift the high 16 bit immediate from a build_vector
-// node into the low 16 bits, and return a 16-bit constant.
-def HI16_vec : SDNodeXForm<scalar_to_vector, [{
-  SDValue OpVal(0, 0);
-
-  assert(N->getOpcode() == ISD::BUILD_VECTOR
-         && "HI16_vec got something other than a BUILD_VECTOR");
-  
-  // Get first constant operand...
-  for (unsigned i = 0, e = N->getNumOperands();
-       OpVal.getNode() == 0 && i != e; ++i) {
-    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-    if (OpVal.getNode() == 0)
-      OpVal = N->getOperand(i);
-  }
-  
-  assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
-  ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
-  return getI32Imm((unsigned)CN->getZExtValue() >> 16);
-}]>;
-
-// simm7 predicate - True if the immediate fits in an 7-bit signed
-// field.
-def simm7: PatLeaf<(imm), [{
-  int sextVal = int(N->getSExtValue());
-  return (sextVal >= -64 && sextVal <= 63);
-}]>;
-
-// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
-// field.
-def uimm7: PatLeaf<(imm), [{
-  return (N->getZExtValue() <= 0x7f);
-}]>;
-
-// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
-// field.
-def immSExt8  : PatLeaf<(imm), [{
-  int Value = int(N->getSExtValue());
-  return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
-}]>;
-
-// immU8: immediate, unsigned 8-bit quantity
-def immU8 : PatLeaf<(imm), [{
-  return (N->getZExtValue() <= 0xff);
-}]>;
-
-// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
-// extended field.  Used by RI10Form instructions like 'ldq'.
-def i32ImmSExt10  : PatLeaf<(imm), [{
-  return isI32IntS10Immediate(N);
-}]>;
-
-// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
-// field.  Used by RI10Form instructions like 'ldq'.
-def i32ImmUns10  : PatLeaf<(imm), [{
-  return isI32IntU10Immediate(N);
-}]>;
-
-// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
-// extended field.  Used by RI10Form instructions like 'ldq'.
-def i16ImmSExt10  : PatLeaf<(imm), [{
-  return isI16IntS10Immediate(N);
-}]>;
-
-// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
-// value. Used by RI10Form instructions.
-def i16ImmUns10 : PatLeaf<(imm), [{
-  return isI16IntU10Immediate(N);
-}]>;
-
-def immSExt16  : PatLeaf<(imm), [{
-  // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  short Ignored;
-  return isIntS16Immediate(N, Ignored);
-}]>;
-
-def immZExt16  : PatLeaf<(imm), [{
-  // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
-  // field.
-  return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
-}], LO16>;
-
-def immU16 : PatLeaf<(imm), [{
-  // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
-  return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
-}]>;
-
-def imm18  : PatLeaf<(imm), [{
-  // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
-  int Value = (int) N->getZExtValue();
-  return isUInt<18>(Value); 
-}]>;
-
-def lo16 : PatLeaf<(imm), [{
-  // lo16 predicate - returns true if the immediate has all zeros in the
-  // low order bits and is a 32-bit constant:
-  if (N->getValueType(0) == MVT::i32) {
-    uint32_t val = N->getZExtValue();
-    return ((val & 0x0000ffff) == val);
-  }
-
-  return false;
-}], LO16>;
-
-def hi16 : PatLeaf<(imm), [{
-  // hi16 predicate - returns true if the immediate has all zeros in the
-  // low order bits and is a 32-bit constant:
-  if (N->getValueType(0) == MVT::i32) {
-    uint32_t val = uint32_t(N->getZExtValue());
-    return ((val & 0xffff0000) == val);
-  } else if (N->getValueType(0) == MVT::i64) {
-    uint64_t val = N->getZExtValue();
-    return ((val & 0xffff0000ULL) == val);
-  }
-
-  return false;
-}], HI16>;
-
-def bitshift : PatLeaf<(imm), [{
-  // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
-  // (shift left quadword by bits immediate)
-  int64_t Val = N->getZExtValue();
-  return (Val > 0 && Val <= 7);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Floating point operands:
-//===----------------------------------------------------------------------===//
-
-// Transform a float, returning the high 16 bits shifted down, as if
-// the float was really an unsigned integer:
-def HI16_f32 : SDNodeXForm<fpimm, [{
-  float fval = N->getValueAPF().convertToFloat();
-  return getI32Imm(FloatToBits(fval) >> 16);
-}]>;
-
-// Transformation function on floats: get the low 16 bits as if the float was
-// an unsigned integer.
-def LO16_f32 : SDNodeXForm<fpimm, [{
-  float fval = N->getValueAPF().convertToFloat();
-  return getI32Imm(FloatToBits(fval) & 0xffff);
-}]>;
-
-def FPimm_sext16 : SDNodeXForm<fpimm, [{
-  float fval = N->getValueAPF().convertToFloat();
-  return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
-}]>;
-
-def FPimm_u18 : SDNodeXForm<fpimm, [{
-  float fval = N->getValueAPF().convertToFloat();
-  return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
-}]>;
-
-def fpimmSExt16 : PatLeaf<(fpimm), [{
-  short Ignored;
-  return isFPS16Immediate(N, Ignored);  
-}], FPimm_sext16>;
-
-// Does the SFP constant only have upp 16 bits set?
-def hi16_f32 : PatLeaf<(fpimm), [{
-  if (N->getValueType(0) == MVT::f32) {
-    uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
-    return ((val & 0xffff0000) == val);
-  }
-
-  return false;
-}], HI16_f32>;
-
-// Does the SFP constant fit into 18 bits?
-def fpimm18  : PatLeaf<(fpimm), [{
-  if (N->getValueType(0) == MVT::f32) {
-    uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
-    return isUInt<18>(Value);
-  }
-
-  return false;
-}], FPimm_u18>;
-
-//===----------------------------------------------------------------------===//
-// 64-bit operands (TODO):
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// build_vector operands:
-//===----------------------------------------------------------------------===//
-
-// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
-// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
-// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
-def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
-}]>;
-
-// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
-// load, works in conjunction with its transform function. N.B.: This relies the
-// incoming constant being a 16-bit quantity, where the upper and lower bytes
-// are EXACTLY the same (e.g., 0x2a2a)
-def v16i8SExt8Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
-}], v16i8SExt8Imm_xform>;
-
-// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
-// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
-// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
-def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
-}]>;
-
-// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
-// load, works in conjunction with its transform function. N.B.: This relies the
-// incoming constant being a 16-bit quantity, where the upper and lower bytes
-// are EXACTLY the same (e.g., 0x2a2a)
-def v16i8U8Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
-}], v16i8U8Imm_xform>;
-
-// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt8Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16SExt8Imm_xform>;
-
-// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt10Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16SExt10Imm_xform>;
-
-// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
-// immediate constant load for v8i16 vectors.
-def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
-// load, works in conjunction with its transform function.
-def v8i16Uns10Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16Uns10Imm_xform>;
-
-// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt16Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16Uns16Imm_xform>;
-
-// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
-// immediate constant load for v4i32 vectors.
-def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v4i32SExt10Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32SExt10Imm_xform>;
-
-// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
-// immediate constant load for v4i32 vectors.
-def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
-// load, works in conjunction with its transform function.
-def v4i32Uns10Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32Uns10Imm_xform>;
-
-// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v4i32 vectors.
-def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v4i32SExt16Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32SExt16Imm_xform>;
-
-// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
-// immediate constant load for v4i32 vectors.
-def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
-// works in conjunction with its transform function.
-def v4i32Uns18Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32Uns18Imm_xform>;
-
-// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
-// load.
-def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
-  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
-}]>;
-
-/// immILHUvec: Predicate test for a ILHU constant vector.
-def immILHUvec: PatLeaf<(build_vector), [{
-  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], ILHUvec_get_imm>;
-
-// Catch-all for any other i32 vector constants
-def v4i32_get_imm: SDNodeXForm<build_vector, [{
-  return SPU::get_v4i32_imm(N, *CurDAG);
-}]>;
-
-def v4i32Imm: PatLeaf<(build_vector), [{
-  return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
-}], v4i32_get_imm>;
-
-// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
-// immediate constant load for v2i64 vectors.
-def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v2i64SExt10Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64SExt10Imm_xform>;
-
-// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v2i64 vectors.
-def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v2i64SExt16Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64SExt16Imm_xform>;
-
-// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
-// immediate constant load for v2i64 vectors.
-def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
-  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
-// works in conjunction with its transform function.
-def v2i64Uns18Imm: PatLeaf<(build_vector), [{
-  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64Uns18Imm_xform>;
-
-/// immILHUvec: Predicate test for a ILHU constant vector.
-def immILHUvec_i64: PatLeaf<(build_vector), [{
-  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], ILHUvec_get_imm>;
-
-// Catch-all for any other i32 vector constants
-def v2i64_get_imm: SDNodeXForm<build_vector, [{
-  return SPU::get_v2i64_imm(N, *CurDAG);
-}]>;
-
-def v2i64Imm: PatLeaf<(build_vector), [{
-  return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
-}], v2i64_get_imm>;
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-
-def s7imm: Operand<i8> {
-  let PrintMethod = "printS7ImmOperand";
-}
-
-def s7imm_i8: Operand<i8> {
-  let PrintMethod = "printS7ImmOperand";
-}
-
-def u7imm: Operand<i16> {
-  let PrintMethod = "printU7ImmOperand";
-}
-
-def u7imm_i8: Operand<i8> {
-  let PrintMethod = "printU7ImmOperand";
-}
-
-def u7imm_i32: Operand<i32> {
-  let PrintMethod = "printU7ImmOperand";
-}
-
-// Halfword, signed 10-bit constant
-def s10imm : Operand<i16> {
-  let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i8: Operand<i8> {
-  let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i32: Operand<i32> {
-  let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i64: Operand<i64> {
-  let PrintMethod = "printS10ImmOperand";
-}
-
-// Unsigned 10-bit integers:
-def u10imm: Operand<i16> {
-  let PrintMethod = "printU10ImmOperand";
-}
-
-def u10imm_i8: Operand<i8> {
-  let PrintMethod = "printU10ImmOperand";
-}
-
-def u10imm_i32: Operand<i32> {
-  let PrintMethod = "printU10ImmOperand";
-}
-
-def s16imm  : Operand<i16> {
-  let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i8: Operand<i8> {
-  let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i32: Operand<i32> {
-  let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i64: Operand<i64> {
-  let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_f32: Operand<f32> {
-  let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_f64: Operand<f64> {
-  let PrintMethod = "printS16ImmOperand";
-}
-
-def u16imm_i64 : Operand<i64> {
-  let PrintMethod = "printU16ImmOperand";
-}
-
-def u16imm_i32 : Operand<i32> {
-  let PrintMethod = "printU16ImmOperand";
-}
-
-def u16imm : Operand<i16> {
-  let PrintMethod = "printU16ImmOperand";
-}
-
-def f16imm : Operand<f32> {
-  let PrintMethod = "printU16ImmOperand";
-}
-
-def s18imm  : Operand<i32> {
-  let PrintMethod = "printS18ImmOperand";
-}
-
-def u18imm : Operand<i32> {
-  let PrintMethod = "printU18ImmOperand";
-}
-
-def u18imm_i64 : Operand<i64> {
-  let PrintMethod = "printU18ImmOperand";
-}
-
-def f18imm : Operand<f32> {
-  let PrintMethod = "printU18ImmOperand";
-}
-
-def f18imm_f64 : Operand<f64> {
-  let PrintMethod = "printU18ImmOperand";
-}
-
-// Negated 7-bit halfword rotate immediate operands
-def rothNeg7imm : Operand<i32> {
-  let PrintMethod = "printROTHNeg7Imm";
-}
-
-def rothNeg7imm_i16 : Operand<i16> {
-  let PrintMethod = "printROTHNeg7Imm";
-}
-
-// Negated 7-bit word rotate immediate operands
-def rotNeg7imm : Operand<i32> {
-  let PrintMethod = "printROTNeg7Imm";
-}
-
-def rotNeg7imm_i16 : Operand<i16> {
-  let PrintMethod = "printROTNeg7Imm";
-}
-
-def rotNeg7imm_i8 : Operand<i8> {
-  let PrintMethod = "printROTNeg7Imm";
-}
-
-def target : Operand<OtherVT> {
-  let PrintMethod = "printBranchOperand";
-}
-
-// Absolute address call target
-def calltarget : Operand<iPTR> {
-  let PrintMethod = "printCallOperand";
-  let MIOperandInfo = (ops u18imm:$calldest);
-}
-
-// PC relative call target
-def relcalltarget : Operand<iPTR> {
-  let PrintMethod = "printPCRelativeOperand";
-  let MIOperandInfo = (ops s16imm:$calldest);
-}
-
-// Branch targets:
-def brtarget : Operand<OtherVT> {
-  let PrintMethod = "printPCRelativeOperand";
-}
-
-// Hint for branch target
-def hbrtarget : Operand<OtherVT> {
-  let PrintMethod = "printHBROperand";
-}
-
-// Indirect call target
-def indcalltarget : Operand<iPTR> {
-  let PrintMethod = "printCallOperand";
-  let MIOperandInfo = (ops ptr_rc:$calldest);
-}
-
-def symbolHi: Operand<i32> {
-  let PrintMethod = "printSymbolHi";
-}
-
-def symbolLo: Operand<i32> {
-  let PrintMethod = "printSymbolLo";
-}
-
-def symbolLSA: Operand<i32> {
-  let PrintMethod = "printSymbolLSA";
-}
-
-// Shuffle address memory operaand [s7imm(reg) d-format]
-def shufaddr : Operand<iPTR> {
-  let PrintMethod = "printShufAddr";
-  let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
-}
-
-// memory s10imm(reg) operand
-def dformaddr : Operand<iPTR> {
-  let PrintMethod = "printDFormAddr";
-  let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
-}
-
-// 256K local store address
-// N.B.: The tblgen code generator expects to have two operands, an offset
-// and a pointer. Of these, only the immediate is actually used.
-def addr256k : Operand<iPTR> {
-  let PrintMethod = "printAddr256K";
-  let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
-}
-
-// memory s18imm(reg) operand
-def memri18 : Operand<iPTR> {
-  let PrintMethod = "printMemRegImmS18";
-  let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
-}
-
-// memory register + register operand
-def memrr : Operand<iPTR> {
-  let PrintMethod = "printMemRegReg";
-  let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
-}
-
-// Define SPU-specific addressing modes: These come in three basic
-// flavors:
-//
-// D-form   : [r+I10] (10-bit signed offset + reg)
-// X-form   : [r+r]   (reg+reg)
-// A-form   : abs     (256K LSA offset)
-// D-form(2): [r+I7]  (7-bit signed offset + reg)
-
-def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",
-                                  [], [SDNPWantRoot]>;
-def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",
-                                  [], [SDNPWantRoot]>;
-def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",
-                                  [], [SDNPWantRoot]>;
-def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
-                                  [], [SDNPWantRoot]>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
deleted file mode 100644
index e6c872d0bbb7..000000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "reginfo"
-#include "SPURegisterInfo.h"
-#include "SPU.h"
-#include "SPUInstrBuilder.h"
-#include "SPUSubtarget.h"
-#include "SPUMachineFunction.h"
-#include "SPUFrameLowering.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include <cstdlib>
-
-#define GET_REGINFO_TARGET_DESC
-#include "SPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
-  using namespace SPU;
-  switch (RegEnum) {
-  case SPU::R0: return 0;
-  case SPU::R1: return 1;
-  case SPU::R2: return 2;
-  case SPU::R3: return 3;
-  case SPU::R4: return 4;
-  case SPU::R5: return 5;
-  case SPU::R6: return 6;
-  case SPU::R7: return 7;
-  case SPU::R8: return 8;
-  case SPU::R9: return 9;
-  case SPU::R10: return 10;
-  case SPU::R11: return 11;
-  case SPU::R12: return 12;
-  case SPU::R13: return 13;
-  case SPU::R14: return 14;
-  case SPU::R15: return 15;
-  case SPU::R16: return 16;
-  case SPU::R17: return 17;
-  case SPU::R18: return 18;
-  case SPU::R19: return 19;
-  case SPU::R20: return 20;
-  case SPU::R21: return 21;
-  case SPU::R22: return 22;
-  case SPU::R23: return 23;
-  case SPU::R24: return 24;
-  case SPU::R25: return 25;
-  case SPU::R26: return 26;
-  case SPU::R27: return 27;
-  case SPU::R28: return 28;
-  case SPU::R29: return 29;
-  case SPU::R30: return 30;
-  case SPU::R31: return 31;
-  case SPU::R32: return 32;
-  case SPU::R33: return 33;
-  case SPU::R34: return 34;
-  case SPU::R35: return 35;
-  case SPU::R36: return 36;
-  case SPU::R37: return 37;
-  case SPU::R38: return 38;
-  case SPU::R39: return 39;
-  case SPU::R40: return 40;
-  case SPU::R41: return 41;
-  case SPU::R42: return 42;
-  case SPU::R43: return 43;
-  case SPU::R44: return 44;
-  case SPU::R45: return 45;
-  case SPU::R46: return 46;
-  case SPU::R47: return 47;
-  case SPU::R48: return 48;
-  case SPU::R49: return 49;
-  case SPU::R50: return 50;
-  case SPU::R51: return 51;
-  case SPU::R52: return 52;
-  case SPU::R53: return 53;
-  case SPU::R54: return 54;
-  case SPU::R55: return 55;
-  case SPU::R56: return 56;
-  case SPU::R57: return 57;
-  case SPU::R58: return 58;
-  case SPU::R59: return 59;
-  case SPU::R60: return 60;
-  case SPU::R61: return 61;
-  case SPU::R62: return 62;
-  case SPU::R63: return 63;
-  case SPU::R64: return 64;
-  case SPU::R65: return 65;
-  case SPU::R66: return 66;
-  case SPU::R67: return 67;
-  case SPU::R68: return 68;
-  case SPU::R69: return 69;
-  case SPU::R70: return 70;
-  case SPU::R71: return 71;
-  case SPU::R72: return 72;
-  case SPU::R73: return 73;
-  case SPU::R74: return 74;
-  case SPU::R75: return 75;
-  case SPU::R76: return 76;
-  case SPU::R77: return 77;
-  case SPU::R78: return 78;
-  case SPU::R79: return 79;
-  case SPU::R80: return 80;
-  case SPU::R81: return 81;
-  case SPU::R82: return 82;
-  case SPU::R83: return 83;
-  case SPU::R84: return 84;
-  case SPU::R85: return 85;
-  case SPU::R86: return 86;
-  case SPU::R87: return 87;
-  case SPU::R88: return 88;
-  case SPU::R89: return 89;
-  case SPU::R90: return 90;
-  case SPU::R91: return 91;
-  case SPU::R92: return 92;
-  case SPU::R93: return 93;
-  case SPU::R94: return 94;
-  case SPU::R95: return 95;
-  case SPU::R96: return 96;
-  case SPU::R97: return 97;
-  case SPU::R98: return 98;
-  case SPU::R99: return 99;
-  case SPU::R100: return 100;
-  case SPU::R101: return 101;
-  case SPU::R102: return 102;
-  case SPU::R103: return 103;
-  case SPU::R104: return 104;
-  case SPU::R105: return 105;
-  case SPU::R106: return 106;
-  case SPU::R107: return 107;
-  case SPU::R108: return 108;
-  case SPU::R109: return 109;
-  case SPU::R110: return 110;
-  case SPU::R111: return 111;
-  case SPU::R112: return 112;
-  case SPU::R113: return 113;
-  case SPU::R114: return 114;
-  case SPU::R115: return 115;
-  case SPU::R116: return 116;
-  case SPU::R117: return 117;
-  case SPU::R118: return 118;
-  case SPU::R119: return 119;
-  case SPU::R120: return 120;
-  case SPU::R121: return 121;
-  case SPU::R122: return 122;
-  case SPU::R123: return 123;
-  case SPU::R124: return 124;
-  case SPU::R125: return 125;
-  case SPU::R126: return 126;
-  case SPU::R127: return 127;
-  default:
-    report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
-  }
-}
-
-SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
-                                 const TargetInstrInfo &tii) :
-  SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
-{
-}
-
-/// getPointerRegClass - Return the register class to use to hold pointers.
-/// This is used for addressing modes.
-const TargetRegisterClass *
-SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
-                                                                        const {
-  return &SPU::R32CRegClass;
-}
-
-const uint16_t *
-SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
-{
-  // Cell ABI calling convention
-  static const uint16_t SPU_CalleeSaveRegs[] = {
-    SPU::R80, SPU::R81, SPU::R82, SPU::R83,
-    SPU::R84, SPU::R85, SPU::R86, SPU::R87,
-    SPU::R88, SPU::R89, SPU::R90, SPU::R91,
-    SPU::R92, SPU::R93, SPU::R94, SPU::R95,
-    SPU::R96, SPU::R97, SPU::R98, SPU::R99,
-    SPU::R100, SPU::R101, SPU::R102, SPU::R103,
-    SPU::R104, SPU::R105, SPU::R106, SPU::R107,
-    SPU::R108, SPU::R109, SPU::R110, SPU::R111,
-    SPU::R112, SPU::R113, SPU::R114, SPU::R115,
-    SPU::R116, SPU::R117, SPU::R118, SPU::R119,
-    SPU::R120, SPU::R121, SPU::R122, SPU::R123,
-    SPU::R124, SPU::R125, SPU::R126, SPU::R127,
-    SPU::R2,    /* environment pointer */
-    SPU::R1,    /* stack pointer */
-    SPU::R0,    /* link register */
-    0 /* end */
-  };
-
-  return SPU_CalleeSaveRegs;
-}
-
-/*!
- R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
- generally unused) are the Cell's reserved registers
- */
-BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  BitVector Reserved(getNumRegs());
-  Reserved.set(SPU::R0);                // LR
-  Reserved.set(SPU::R1);                // SP
-  Reserved.set(SPU::R2);                // environment pointer
-  return Reserved;
-}
-
-//===----------------------------------------------------------------------===//
-// Stack Frame Processing methods
-//===----------------------------------------------------------------------===//
-
-//--------------------------------------------------------------------------
-void
-SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                               MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator I)
-  const
-{
-  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
-  MBB.erase(I);
-}
-
-void
-SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                                     RegScavenger *RS) const
-{
-  unsigned i = 0;
-  MachineInstr &MI = *II;
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  DebugLoc dl = II->getDebugLoc();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  MachineOperand &SPOp = MI.getOperand(i);
-  int FrameIndex = SPOp.getIndex();
-
-  // Now add the frame object offset to the offset from r1.
-  int Offset = MFI->getObjectOffset(FrameIndex);
-
-  // Most instructions, except for generated FrameIndex additions using AIr32
-  // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
-  // immediate in operand 2.
-  unsigned OpNo = 1;
-  if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
-    OpNo = 2;
-
-  MachineOperand &MO = MI.getOperand(OpNo);
-
-  // Offset is biased by $lr's slot at the bottom.
-  Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
-  assert((Offset & 0xf) == 0
-         && "16-byte alignment violated in eliminateFrameIndex");
-
-  // Replace the FrameIndex with base register with $sp (aka $r1)
-  SPOp.ChangeToRegister(SPU::R1, false);
-
-  // if 'Offset' doesn't fit to the D-form instruction's
-  // immediate, convert the instruction to X-form
-  // if the instruction is not an AI (which takes a s10 immediate), assume
-  // it is a load/store that can take a s14 immediate
-  if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
-      || !isInt<14>(Offset)) {
-    int newOpcode = convertDFormToXForm(MI.getOpcode());
-    unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
-    BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
-        .addImm(Offset);
-    BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
-        .addReg(tmpReg, RegState::Kill)
-        .addReg(SPU::R1);
-    // remove the replaced D-form instruction
-    MBB.erase(II);
-  } else {
-    MO.ChangeToImmediate(Offset);
-  }
-}
-
-unsigned
-SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
-{
-  return SPU::R1;
-}
-
-int
-SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
-{
-  switch(dFormOpcode)
-  {
-    case SPU::AIr32:     return SPU::Ar32;
-    case SPU::LQDr32:    return SPU::LQXr32;
-    case SPU::LQDr128:   return SPU::LQXr128;
-    case SPU::LQDv16i8:  return SPU::LQXv16i8;
-    case SPU::LQDv4i32:  return SPU::LQXv4i32;
-    case SPU::LQDv4f32:  return SPU::LQXv4f32;
-    case SPU::STQDr32:   return SPU::STQXr32;
-    case SPU::STQDr128:  return SPU::STQXr128;
-    case SPU::STQDv16i8: return SPU::STQXv16i8;
-    case SPU::STQDv4i32: return SPU::STQXv4i32;
-    case SPU::STQDv4f32: return SPU::STQXv4f32;
-
-    default: assert( false && "Unhandled D to X-form conversion");
-  }
-  // default will assert, but need to return something to keep the
-  // compiler happy.
-  return dFormOpcode;
-}
-
-// TODO this is already copied from PPC. Could this convenience function
-// be moved to the RegScavenger class?
-unsigned
-SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
-                                     RegScavenger *RS,
-                                     const TargetRegisterClass *RC,
-                                     int SPAdj) const
-{
-  assert(RS && "Register scavenging must be on");
-  unsigned Reg = RS->FindUnusedReg(RC);
-  if (Reg == 0)
-    Reg = RS->scavengeRegister(RC, II, SPAdj);
-  assert( Reg && "Register scavenger failed");
-  return Reg;
-}
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
deleted file mode 100644
index e9f9aba63a48..000000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell SPU implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_REGISTERINFO_H
-#define SPU_REGISTERINFO_H
-
-#include "SPU.h"
-
-#define GET_REGINFO_HEADER
-#include "SPUGenRegisterInfo.inc"
-
-namespace llvm {
-  class SPUSubtarget;
-  class TargetInstrInfo;
-  class Type;
-
-  class SPURegisterInfo : public SPUGenRegisterInfo {
-  private:
-    const SPUSubtarget &Subtarget;
-    const TargetInstrInfo &TII;
-
-    //! Predicate: Does the machine function use the link register?
-    bool usesLR(MachineFunction &MF) const;
-
-  public:
-    SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
- 
-    //! Translate a register's enum value to a register number
-    /*!
-      This method translates a register's enum value to it's regiser number,
-      e.g. SPU::R14 -> 14.
-     */
-    static unsigned getRegisterNumbering(unsigned RegEnum);
-
-    /// getPointerRegClass - Return the register class to use to hold pointers.
-    /// This is used for addressing modes.
-    virtual const TargetRegisterClass *
-    getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
-
-    /// After allocating this many registers, the allocator should feel
-    /// register pressure. The value is a somewhat random guess, based on the
-    /// number of non callee saved registers in the C calling convention.
-    virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
-                                          MachineFunction &MF) const{
-      return 50;
-    }
-
-    //! Return the array of callee-saved registers
-    virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
-
-    //! Allow for scavenging, so we can get scratch registers when needed.
-    virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
-    { return true; }
-
-    //! Enable tracking of liveness after register allocation, since register
-    // scavenging is enabled.
-    virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
-    { return true; }
-
-    //! Return the reserved registers
-    BitVector getReservedRegs(const MachineFunction &MF) const;
-
-    //! Eliminate the call frame setup pseudo-instructions
-    void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                       MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I) const;
-    //! Convert frame indicies into machine operands
-    void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                             RegScavenger *RS = NULL) const;
-
-    //! Get the stack frame register (SP, aka R1)
-    unsigned getFrameRegister(const MachineFunction &MF) const;
-
-    //------------------------------------------------------------------------
-    // New methods added:
-    //------------------------------------------------------------------------
-
-    //! Convert D-form load/store to X-form load/store
-    /*!
-      Converts a regiser displacement load/store into a register-indexed
-      load/store for large stack frames, when the stack frame exceeds the
-      range of a s10 displacement.
-     */
-    int convertDFormToXForm(int dFormOpcode) const;
-
-    //! Acquire an unused register in an emergency.
-    unsigned findScratchRegister(MachineBasicBlock::iterator II,
-                                 RegScavenger *RS,
-                                 const TargetRegisterClass *RC, 
-                                 int SPAdj) const;
-    
-  };
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
deleted file mode 100644
index f27b042edd63..000000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ /dev/null
@@ -1,183 +0,0 @@
-//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-class SPUReg<string n> : Register<n> {
-  let Namespace = "SPU";
-}
-
-// The SPU's register are all 128-bits wide, which makes specifying the
-// registers relatively easy, if relatively mundane:
-
-class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
-  field bits<7> Num = num;
-}
-
-def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
-def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
-def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
-def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
-def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
-def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
-def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
-def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
-def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
-def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
-def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
-def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
-def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
-def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
-def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
-def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
-def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
-def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
-def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
-def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
-def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
-def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
-def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
-def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
-def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
-def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
-def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
-def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
-def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
-def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
-def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
-def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
-def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
-def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
-def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
-def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
-def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
-def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
-def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
-def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
-def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
-def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
-def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
-def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
-def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
-def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
-def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
-def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
-def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
-def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
-def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
-def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
-def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
-def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
-def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
-def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
-def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
-def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
-def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
-def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
-def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
-def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
-def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
-def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
-def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
-def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
-def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
-def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
-def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
-def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
-def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
-def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
-def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
-def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
-def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
-def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
-def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
-def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
-def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
-def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
-def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
-def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
-def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
-def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
-def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
-def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
-def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
-def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
-def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
-def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
-def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
-def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
-def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
-def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
-def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
-def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
-def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
-def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
-def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
-def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
-def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
-def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
-def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
-def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
-def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
-def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
-def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
-def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
-def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
-def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
-def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
-def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
-def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
-def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
-def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
-def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
-def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
-def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
-def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
-def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
-def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
-def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
-def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
-def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
-def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
-def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
-def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
-def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
-
-/* Need floating point status register here: */
-/* def FPCSR : ... */
-
-// The SPU's registers as 128-bit wide entities, and can function as general
-// purpose registers, where the operands are in the "preferred slot":
-// The non-volatile registers are allocated in reverse order, like PPC does it.
-def GPRC : RegisterClass<"SPU", [i128], 128,
-                         (add (sequence "R%u", 0, 79),
-                              (sequence "R%u", 127, 80))>;
-
-// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
-def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
-
-// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
-def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
-
-// The SPU's registers as 32-bit wide (word) "preferred slot":
-def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
-
-// The SPU's registers as single precision floating point "preferred slot":
-def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
-
-// The SPU's registers as 16-bit wide (halfword) "preferred slot":
-def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
-
-// The SPU's registers as 8-bit wide (byte) "preferred slot":
-def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
-
-// The SPU's registers as vector registers:
-def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
-                           (add GPRC)>;
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
deleted file mode 100644
index e557ed340a28..000000000000
--- a/lib/Target/CellSPU/SPURegisterNames.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_REGISTER_NAMES_H
-#define SPU_REGISTER_NAMES_H
-
-// Define symbolic names for Cell registers.  This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "SPUGenRegisterInfo.inc"
-
-#endif
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
deleted file mode 100644
index 9ccd0844e48e..000000000000
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Even pipeline:
-
-def EVEN_UNIT : FuncUnit;       // Even execution unit: (PC & 0x7 == 000)
-def ODD_UNIT  : FuncUnit;       // Odd execution unit:  (PC & 0x7 == 100)
-
-//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Cell SPU
-//===----------------------------------------------------------------------===//
-
-def LoadStore    : InstrItinClass;              // ODD_UNIT
-def BranchHints  : InstrItinClass;              // ODD_UNIT
-def BranchResolv : InstrItinClass;              // ODD_UNIT
-def ChanOpSPR    : InstrItinClass;              // ODD_UNIT
-def ShuffleOp    : InstrItinClass;              // ODD_UNIT
-def SelectOp     : InstrItinClass;              // ODD_UNIT
-def GatherOp     : InstrItinClass;              // ODD_UNIT
-def LoadNOP      : InstrItinClass;              // ODD_UNIT
-def ExecNOP      : InstrItinClass;              // EVEN_UNIT
-def SPrecFP      : InstrItinClass;              // EVEN_UNIT
-def DPrecFP      : InstrItinClass;              // EVEN_UNIT
-def FPInt        : InstrItinClass;              // EVEN_UNIT (FP<->integer)
-def ByteOp       : InstrItinClass;              // EVEN_UNIT
-def IntegerOp    : InstrItinClass;              // EVEN_UNIT
-def IntegerMulDiv: InstrItinClass;              // EVEN_UNIT
-def RotShiftVec  : InstrItinClass;              // EVEN_UNIT Inter vector
-def RotShiftQuad : InstrItinClass;              // ODD_UNIT Entire quad
-def ImmLoad      : InstrItinClass;              // EVEN_UNIT
-
-/* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
-  InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
-  InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
-  InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,
-  InstrItinData<ChanOpSPR   , [InstrStage<6,  [ODD_UNIT]>]>,
-  InstrItinData<ShuffleOp   , [InstrStage<4,  [ODD_UNIT]>]>,
-  InstrItinData<SelectOp    , [InstrStage<4,  [ODD_UNIT]>]>,
-  InstrItinData<GatherOp    , [InstrStage<4,  [ODD_UNIT]>]>,
-  InstrItinData<LoadNOP     , [InstrStage<1,  [ODD_UNIT]>]>,
-  InstrItinData<ExecNOP     , [InstrStage<1,  [EVEN_UNIT]>]>,
-  InstrItinData<SPrecFP     , [InstrStage<6,  [EVEN_UNIT]>]>,
-  InstrItinData<DPrecFP     , [InstrStage<13, [EVEN_UNIT]>]>,
-  InstrItinData<FPInt       , [InstrStage<2,  [EVEN_UNIT]>]>,
-  InstrItinData<ByteOp      , [InstrStage<4,  [EVEN_UNIT]>]>,
-  InstrItinData<IntegerOp   , [InstrStage<2,  [EVEN_UNIT]>]>,
-  InstrItinData<RotShiftVec , [InstrStage<4,  [EVEN_UNIT]>]>, 
-  InstrItinData<RotShiftQuad, [InstrStage<4,  [ODD_UNIT]>]>,
-  InstrItinData<IntegerMulDiv,[InstrStage<7,  [EVEN_UNIT]>]>,
-  InstrItinData<ImmLoad     , [InstrStage<2,  [EVEN_UNIT]>]>
-  ]>;
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
deleted file mode 100644
index 5732fd43cdc2..000000000000
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "cellspu-selectiondag-info"
-#include "SPUTargetMachine.h"
-using namespace llvm;
-
-SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
-  : TargetSelectionDAGInfo(TM) {
-}
-
-SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
-}
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
deleted file mode 100644
index 39257d92c400..000000000000
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CELLSPUSELECTIONDAGINFO_H
-#define CELLSPUSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class SPUTargetMachine;
-
-class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
-  ~SPUSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
deleted file mode 100644
index eec2d250be7f..000000000000
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUSubtarget.h"
-#include "SPU.h"
-#include "SPURegisterInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "SPUGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS) :
-  SPUGenSubtargetInfo(TT, CPU, FS),
-  StackAlignment(16),
-  ProcDirective(SPU::DEFAULT_PROC),
-  UseLargeMem(false)
-{
-  // Should be the target SPU processor type. For now, since there's only
-  // one, simply default to the current "v0" default:
-  std::string default_cpu("v0");
-
-  // Parse features string.
-  ParseSubtargetFeatures(default_cpu, FS);
-
-  // Initialize scheduling itinerary for the specified CPU.
-  InstrItins = getInstrItineraryForCPU(default_cpu);
-}
-
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void SPUSubtarget::SetJITMode() {
-}
-
-/// Enable PostRA scheduling for optimization levels -O2 and -O3.
-bool SPUSubtarget::enablePostRAScheduler(
-                       CodeGenOpt::Level OptLevel,
-                       TargetSubtargetInfo::AntiDepBreakMode& Mode,
-                       RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
-  // CriticalPathsRCs seems to be the set of
-  // RegisterClasses that antidep breakings are performed for.
-  // Do it for all register classes 
-  CriticalPathRCs.clear();
-  CriticalPathRCs.push_back(&SPU::R8CRegClass);
-  CriticalPathRCs.push_back(&SPU::R16CRegClass);
-  CriticalPathRCs.push_back(&SPU::R32CRegClass);
-  CriticalPathRCs.push_back(&SPU::R32FPRegClass);
-  CriticalPathRCs.push_back(&SPU::R64CRegClass);
-  CriticalPathRCs.push_back(&SPU::VECREGRegClass);
-  return OptLevel >= CodeGenOpt::Default;
-}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
deleted file mode 100644
index 27d28b22dd04..000000000000
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CELLSUBTARGET_H
-#define CELLSUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "SPUGenSubtargetInfo.inc"
-
-namespace llvm {
-  class GlobalValue;
-  class StringRef;
-
-  namespace SPU {
-    enum {
-      PROC_NONE,
-      DEFAULT_PROC
-    };
-  }
-    
-  class SPUSubtarget : public SPUGenSubtargetInfo {
-  protected:
-    /// stackAlignment - The minimum alignment known to hold of the stack frame
-    /// on entry to the function and which must be maintained by every function.
-    unsigned StackAlignment;
-    
-    /// Selected instruction itineraries (one entry per itinerary class.)
-    InstrItineraryData InstrItins;
-
-    /// Which SPU processor (this isn't really used, but it's there to keep
-    /// the C compiler happy)
-    unsigned ProcDirective;
-
-    /// Use (assume) large memory -- effectively disables the LQA/STQA
-    /// instructions that assume 259K local store.
-    bool UseLargeMem;
-    
-  public:
-    /// This constructor initializes the data members to match that
-    /// of the specified triple.
-    ///
-    SPUSubtarget(const std::string &TT, const std::string &CPU,
-                 const std::string &FS);
-    
-    /// ParseSubtargetFeatures - Parses features string setting specified 
-    /// subtarget options.  Definition of function is auto generated by tblgen.
-    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
-    /// SetJITMode - This is called to inform the subtarget info that we are
-    /// producing code for the JIT.
-    void SetJITMode();
-
-    /// getStackAlignment - Returns the minimum alignment known to hold of the
-    /// stack frame on entry to the function and which must be maintained by
-    /// every function for this subtarget.
-    unsigned getStackAlignment() const { return StackAlignment; }
-    
-    /// getInstrItins - Return the instruction itineraies based on subtarget 
-    /// selection.
-    const InstrItineraryData &getInstrItineraryData() const {
-      return InstrItins;
-    }
-
-    /// Use large memory addressing predicate
-    bool usingLargeMem() const {
-      return UseLargeMem;
-    }
-
-    /// getDataLayoutString - Return the pointer size and type alignment
-    /// properties of this subtarget.
-    const char *getDataLayoutString() const {
-      return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
-             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
-             "-s:128:128-n32:64";
-    }
-
-    bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                               TargetSubtargetInfo::AntiDepBreakMode& Mode,
-                               RegClassVector& CriticalPathRCs) const;
-  };
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
deleted file mode 100644
index 918316572a2e..000000000000
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUTargetMachine.h"
-#include "SPU.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeCellSPUTarget() {
-  // Register the target.
-  RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
-}
-
-const std::pair<unsigned, int> *
-SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
-  NumEntries = 1;
-  return &LR[0];
-}
-
-SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
-                                   StringRef CPU, StringRef FS,
-                                   const TargetOptions &Options,
-                                   Reloc::Model RM, CodeModel::Model CM,
-                                   CodeGenOpt::Level OL)
-  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS),
-    DL(Subtarget.getDataLayoutString()),
-    InstrInfo(*this),
-    FrameLowering(Subtarget),
-    TLInfo(*this),
-    TSInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()),
-    STTI(&TLInfo), VTTI(&TLInfo) {
-}
-
-//===----------------------------------------------------------------------===//
-// Pass Pipeline Configuration
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// SPU Code Generator Pass Configuration Options.
-class SPUPassConfig : public TargetPassConfig {
-public:
-  SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM)
-    : TargetPassConfig(TM, PM) {}
-
-  SPUTargetMachine &getSPUTargetMachine() const {
-    return getTM<SPUTargetMachine>();
-  }
-
-  virtual bool addInstSelector();
-  virtual bool addPreEmitPass();
-};
-} // namespace
-
-TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
-  return new SPUPassConfig(this, PM);
-}
-
-bool SPUPassConfig::addInstSelector() {
-  // Install an instruction selector.
-  addPass(createSPUISelDag(getSPUTargetMachine()));
-  return false;
-}
-
-// passes to run just before printing the assembly
-bool SPUPassConfig::addPreEmitPass() {
-  // load the TCE instruction scheduler, if available via
-  // loaded plugins
-  typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
-  BuilderFunc schedulerCreator =
-    (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
-          "createTCESchedulerPass");
-  if (schedulerCreator != NULL)
-      addPass(schedulerCreator("cellspu"));
-
-  //align instructions with nops/lnops for dual issue
-  addPass(createSPUNopFillerPass(getSPUTargetMachine()));
-  return true;
-}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
deleted file mode 100644
index 7f53ea6fbeb2..000000000000
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ /dev/null
@@ -1,96 +0,0 @@
-//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the CellSPU-specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_TARGETMACHINE_H
-#define SPU_TARGETMACHINE_H
-
-#include "SPUSubtarget.h"
-#include "SPUInstrInfo.h"
-#include "SPUISelLowering.h"
-#include "SPUSelectionDAGInfo.h"
-#include "SPUFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
-
-namespace llvm {
-
-/// SPUTargetMachine
-///
-class SPUTargetMachine : public LLVMTargetMachine {
-  SPUSubtarget        Subtarget;
-  const DataLayout    DL;
-  SPUInstrInfo        InstrInfo;
-  SPUFrameLowering    FrameLowering;
-  SPUTargetLowering   TLInfo;
-  SPUSelectionDAGInfo TSInfo;
-  InstrItineraryData  InstrItins;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
-public:
-  SPUTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS, const TargetOptions &Options,
-                   Reloc::Model RM, CodeModel::Model CM,
-                   CodeGenOpt::Level OL);
-
-  /// Return the subtarget implementation object
-  virtual const SPUSubtarget     *getSubtargetImpl() const {
-    return &Subtarget;
-  }
-  virtual const SPUInstrInfo     *getInstrInfo() const {
-    return &InstrInfo;
-  }
-  virtual const SPUFrameLowering *getFrameLowering() const {
-    return &FrameLowering;
-  }
-  /*!
-    \note Cell SPU does not support JIT today. It could support JIT at some
-    point.
-   */
-  virtual       TargetJITInfo    *getJITInfo() {
-    return NULL;
-  }
-
-  virtual const SPUTargetLowering *getTargetLowering() const {
-   return &TLInfo;
-  }
-
-  virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
-    return &TSInfo;
-  }
-
-  virtual const SPURegisterInfo *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
-  }
-
-  virtual const DataLayout *getDataLayout() const {
-    return &DL;
-  }
-
-  virtual const InstrItineraryData *getInstrItineraryData() const {
-    return &InstrItins;
-  }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
-
-  // Pass Pipeline Configuration
-  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 6a98f95db664..000000000000
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMCellSPUInfo
-  CellSPUTargetInfo.cpp
-  )
-
-add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
deleted file mode 100644
index 84aadfad6f8d..000000000000
--- a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheCellSPUTarget;
-
-extern "C" void LLVMInitializeCellSPUTargetInfo() { 
-  RegisterTarget<Triple::cellspu> 
-    X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
-}
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
deleted file mode 100644
index 6937e705ff7f..000000000000
--- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = CellSPUInfo
-parent = CellSPU
-required_libraries = MC Support Target
-add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/CellSPU/TargetInfo/Makefile
deleted file mode 100644
index 9cb6827b4323..000000000000
--- a/lib/Target/CellSPU/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 5c909903f94b..3e69098edcc3 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "CPPTargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 #include <algorithm>
 #include <cstdio>
 #include <map>
@@ -141,7 +141,7 @@ namespace {
     std::string getCppName(const Value* val);
     inline void printCppName(const Value* val);
 
-    void printAttributes(const AttrListPtr &PAL, const std::string &name);
+    void printAttributes(const AttributeSet &PAL, const std::string &name);
     void printType(Type* Ty);
     void printTypes(const Module* M);
 
@@ -464,24 +464,25 @@ void CppWriter::printCppName(const Value* val) {
   printEscapedString(getCppName(val));
 }
 
-void CppWriter::printAttributes(const AttrListPtr &PAL,
+void CppWriter::printAttributes(const AttributeSet &PAL,
                                 const std::string &name) {
-  Out << "AttrListPtr " << name << "_PAL;";
+  Out << "AttributeSet " << name << "_PAL;";
   nl(Out);
   if (!PAL.isEmpty()) {
     Out << '{'; in(); nl(Out);
-    Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
-    Out << "AttributeWithIndex PAWI;"; nl(Out);
+    Out << "SmallVector<AttributeSet, 4> Attrs;"; nl(Out);
+    Out << "AttributeSet PAS;"; in(); nl(Out);
     for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
-      unsigned index = PAL.getSlot(i).Index;
-      AttrBuilder attrs(PAL.getSlot(i).Attrs);
-      Out << "PAWI.Index = " << index << "U;\n";
-      Out << " {\n    AttrBuilder B;\n";
-
-#define HANDLE_ATTR(X)                                     \
-      if (attrs.hasAttribute(Attributes::X))               \
-        Out << "    B.addAttribute(Attributes::" #X ");\n"; \
-      attrs.removeAttribute(Attributes::X);
+      unsigned index = PAL.getSlotIndex(i);
+      AttrBuilder attrs(PAL.getSlotAttributes(i), index);
+      Out << "{"; in(); nl(Out);
+      Out << "AttrBuilder B;"; nl(Out);
+
+#define HANDLE_ATTR(X)                                                  \
+      if (attrs.contains(Attribute::X)) {                               \
+        Out << "B.addAttribute(Attribute::" #X ");"; nl(Out);           \
+        attrs.removeAttribute(Attribute::X);                            \
+      }
 
       HANDLE_ATTR(SExt);
       HANDLE_ATTR(ZExt);
@@ -499,6 +500,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
       HANDLE_ATTR(OptimizeForSize);
       HANDLE_ATTR(StackProtect);
       HANDLE_ATTR(StackProtectReq);
+      HANDLE_ATTR(StackProtectStrong);
       HANDLE_ATTR(NoCapture);
       HANDLE_ATTR(NoRedZone);
       HANDLE_ATTR(NoImplicitFloat);
@@ -509,16 +511,24 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
       HANDLE_ATTR(NonLazyBind);
       HANDLE_ATTR(MinSize);
 #undef HANDLE_ATTR
-      if (attrs.hasAttribute(Attributes::StackAlignment))
-        Out << "    B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n";
-      attrs.removeAttribute(Attributes::StackAlignment);
-      assert(!attrs.hasAttributes() && "Unhandled attribute!");
-      Out << "    PAWI.Attrs = Attributes::get(mod->getContext(), B);\n }";
-      nl(Out);
-      Out << "Attrs.push_back(PAWI);";
+
+      if (attrs.contains(Attribute::StackAlignment)) {
+        Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment()<<')';
+        nl(Out);
+        attrs.removeAttribute(Attribute::StackAlignment);
+      }
+
+      Out << "PAS = AttributeSet::get(mod->getContext(), ";
+      if (index == ~0U)
+        Out << "~0U,";
+      else
+        Out << index << "U,";
+      Out << " B);"; out(); nl(Out);
+      Out << "}"; out(); nl(Out);
       nl(Out);
+      Out << "Attrs.push_back(PAS);"; nl(Out);
     }
-    Out << name << "_PAL = AttrListPtr::get(mod->getContext(), Attrs);";
+    Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);";
     nl(Out);
     out(); nl(Out);
     Out << '}'; nl(Out);
@@ -1888,23 +1898,24 @@ void CppWriter::printModuleBody() {
 
 void CppWriter::printProgram(const std::string& fname,
                              const std::string& mName) {
-  Out << "#include <llvm/LLVMContext.h>\n";
-  Out << "#include <llvm/Module.h>\n";
-  Out << "#include <llvm/DerivedTypes.h>\n";
-  Out << "#include <llvm/Constants.h>\n";
-  Out << "#include <llvm/GlobalVariable.h>\n";
-  Out << "#include <llvm/Function.h>\n";
-  Out << "#include <llvm/CallingConv.h>\n";
-  Out << "#include <llvm/BasicBlock.h>\n";
-  Out << "#include <llvm/Instructions.h>\n";
-  Out << "#include <llvm/InlineAsm.h>\n";
-  Out << "#include <llvm/Support/FormattedStream.h>\n";
-  Out << "#include <llvm/Support/MathExtras.h>\n";
   Out << "#include <llvm/Pass.h>\n";
   Out << "#include <llvm/PassManager.h>\n";
+
   Out << "#include <llvm/ADT/SmallVector.h>\n";
   Out << "#include <llvm/Analysis/Verifier.h>\n";
   Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+  Out << "#include <llvm/IR/BasicBlock.h>\n";
+  Out << "#include <llvm/IR/CallingConv.h>\n";
+  Out << "#include <llvm/IR/Constants.h>\n";
+  Out << "#include <llvm/IR/DerivedTypes.h>\n";
+  Out << "#include <llvm/IR/Function.h>\n";
+  Out << "#include <llvm/IR/GlobalVariable.h>\n";
+  Out << "#include <llvm/IR/InlineAsm.h>\n";
+  Out << "#include <llvm/IR/Instructions.h>\n";
+  Out << "#include <llvm/IR/LLVMContext.h>\n";
+  Out << "#include <llvm/IR/Module.h>\n";
+  Out << "#include <llvm/Support/FormattedStream.h>\n";
+  Out << "#include <llvm/Support/MathExtras.h>\n";
   Out << "#include <algorithm>\n";
   Out << "using namespace llvm;\n\n";
   Out << "Module* " << fname << "();\n\n";
@@ -1941,14 +1952,6 @@ void CppWriter::printModule(const std::string& fname,
   }
   nl(Out);
 
-  // Loop over the dependent libraries and emit them.
-  Module::lib_iterator LI = TheModule->lib_begin();
-  Module::lib_iterator LE = TheModule->lib_end();
-  while (LI != LE) {
-    Out << "mod->addLibrary(\"" << *LI << "\");";
-    nl(Out);
-    ++LI;
-  }
   printModuleBody();
   nl(Out) << "return mod;";
   nl(Out,-1) << "}";
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 30d765d6c9ce..477e788ee2fd 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -14,8 +14,8 @@
 #ifndef CPPTARGETMACHINE_H
 #define CPPTARGETMACHINE_H
 
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
index a8ac0a282cd1..1ca74a4895c4 100644
--- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
+++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CPPTargetMachine.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 306084bb8c52..b5b887e7c7c8 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -9,6 +9,8 @@ tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
 add_public_tablegen_target(HexagonCommonTableGen)
 
+set(LLVM_COMMON_DEPENDS intrinsics_gen)
+
 add_llvm_target(HexagonCodeGen
   HexagonAsmPrinter.cpp
   HexagonCallingConvLower.cpp
@@ -16,6 +18,7 @@ add_llvm_target(HexagonCodeGen
   HexagonExpandPredSpillCode.cpp
   HexagonFrameLowering.cpp
   HexagonHardwareLoops.cpp
+  HexagonFixupHwLoops.cpp
   HexagonMachineScheduler.cpp
   HexagonMCInstLower.cpp
   HexagonInstrInfo.cpp
@@ -33,8 +36,6 @@ add_llvm_target(HexagonCodeGen
   HexagonNewValueJump.cpp
 )
 
-add_dependencies(LLVMHexagonCodeGen intrinsics_gen)
-
 add_subdirectory(TargetInfo)
 add_subdirectory(InstPrinter)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 45f857bab8c6..dfbefc864283 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -21,14 +21,16 @@
 
 namespace llvm {
   class FunctionPass;
+  class ModulePass;
   class TargetMachine;
   class MachineInstr;
-  class MCInst;
+  class HexagonMCInst;
   class HexagonAsmPrinter;
   class HexagonTargetMachine;
   class raw_ostream;
 
-  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel);
   FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
   FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
   FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
@@ -53,7 +55,7 @@ namespace llvm {
   TargetAsmBackend *createHexagonAsmBackend(const Target &,
                                                   const std::string &);
 */
-  void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
+  void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI,
                         HexagonAsmPrinter &AP);
 } // end namespace llvm;
 
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 451e56206e60..8a5ee40590bb 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -32,6 +32,107 @@ def ArchV5       : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
                                     "Hexagon v5">;
 
 //===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
+def HasV5T                      : Predicate<"Subtarget.hasV5TOps()">;
+def NoV5T                       : Predicate<"!Subtarget.hasV5TOps()">;
+def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
+def IEEERndNearV5T              : Predicate<"Subtarget.modeIEEERndNear()">;
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// PredRel - Filter class used to relate non-predicated instructions with their
+// predicated forms.
+class PredRel;
+// PredNewRel - Filter class used to relate predicated instructions with their
+// predicate-new forms.
+class PredNewRel: PredRel;
+// ImmRegRel - Filter class used to relate instructions having reg-reg form
+// with their reg-imm counterparts.
+class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate non-predicate instructions with their
+// predicated formats - true and false.
+//
+
+def getPredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["PredSense"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = [""];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["true"], ["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicated instructions with their .new
+// format.
+//
+def getPredNewOpcode : InstrMapping {
+  let FilterClass = "PredNewRel";
+  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+  let ColFields = ["PNewValue"];
+  let KeyCol = [""];
+  let ValueCols = [["new"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate store instructions with their new-value
+// format.
+//
+def getNewValueOpcode : InstrMapping {
+  let FilterClass = "NewValueRel";
+  let RowFields = ["BaseOpcode", "PredSense", "PNewValue"];
+  let ColFields = ["isNVStore"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+def getBasedWithImmOffset : InstrMapping {
+  let FilterClass = "AddrModeRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
+                   "isMEMri", "isFloat"];
+  let ColFields = ["addrMode"];
+  let KeyCol = ["Absolute"];
+  let ValueCols = [["BaseImmOffset"]];
+}
+
+def getBaseWithRegOffset : InstrMapping {
+  let FilterClass = "AddrModeRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+  let ColFields = ["addrMode"];
+  let KeyCol = ["BaseImmOffset"];
+  let ValueCols = [["BaseRegOffset"]];
+}
+
+def getRegForm : InstrMapping {
+  let FilterClass = "ImmRegRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue"];
+  let ColFields = ["InputType"];
+  let KeyCol = ["imm"];
+  let ValueCols = [["reg"]];
+}
+
+//===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
 //===----------------------------------------------------------------------===//
 include "HexagonSchedule.td"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index c15bce608f5e..88cd3fbacea0 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -17,20 +17,24 @@
 #include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "HexagonMCInst.h"
 #include "HexagonTargetMachine.h"
 #include "HexagonSubtarget.h"
+#include "MCTargetDesc/HexagonMCInst.h"
 #include "InstPrinter/HexagonInstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -38,22 +42,18 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -220,8 +220,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
     for (unsigned Index = 0; Index < Size; Index++) {
       HexagonMCInst MCI;
-      MCI.setStartPacket(Index == 0);
-      MCI.setEndPacket(Index == (Size-1));
+      MCI.setPacketStart(Index == 0);
+      MCI.setPacketEnd(Index == (Size-1));
 
       HexagonLowerToMC(BundleMIs[Index], MCI, *this);
       OutStreamer.EmitInstruction(MCI);
@@ -230,8 +230,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   else {
     HexagonMCInst MCI;
     if (MI->getOpcode() == Hexagon::ENDLOOP0) {
-      MCI.setStartPacket(true);
-      MCI.setEndPacket(true);
+      MCI.setPacketStart(true);
+      MCI.setPacketEnd(true);
     }
     HexagonLowerToMC(MI, MCI, *this);
     OutStreamer.EmitInstruction(MCI);
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 9bca9e070709..d4078ad28b60 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -7,21 +7,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hexagon_cfg"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 73f9d9acab26..2c93d04f98e6 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -15,12 +15,12 @@
 
 #include "HexagonCallingConvLower.h"
 #include "Hexagon.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
index 1f601e87ad68..489b3a3e5985 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -17,9 +17,9 @@
 #define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 //
 // Need to handle varargs.
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index ae2ca378881d..08144217fd30 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -17,9 +17,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -30,12 +31,12 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
new file mode 100644
index 000000000000..240cc9566648
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -0,0 +1,183 @@
+//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// The loop start address in the LOOPn instruction is encoded as a distance
+// from the LOOPn instruction itself.  If the start address is too far from
+// the LOOPn instruction, the loop needs to be set up manually, i.e. via
+// direct transfers to SAn and LCn.
+// This pass will identify and convert such LOOPn instructions to a proper
+// form.
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonFixupHwLoopsPass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonFixupHwLoops : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    HexagonFixupHwLoops() : MachineFunctionPass(ID) {
+      initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// \brief Maximum distance between the loop instr and the basic block.
+    /// Just an estimate.
+    static const unsigned MAX_LOOP_DISTANCE = 200;
+
+    /// \brief Check the offset between each loop instruction and
+    /// the loop basic block to determine if we can use the LOOP instruction
+    /// or if we need to set the LC/SA registers explicitly.
+    bool fixupLoopInstrs(MachineFunction &MF);
+
+    /// \brief Add the instruction to set the LC and SA registers explicitly.
+    void convertLoopInstr(MachineFunction &MF,
+                          MachineBasicBlock::iterator &MII,
+                          RegScavenger &RS);
+
+  };
+
+  char HexagonFixupHwLoops::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
+                "Hexagon Hardware Loops Fixup", false, false)
+
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+  return new HexagonFixupHwLoops();
+}
+
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+  return MI->getOpcode() == Hexagon::LOOP0_r ||
+         MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = fixupLoopInstrs(MF);
+  return Changed;
+}
+
+
+/// \brief For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count).  This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks.  The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+  // Offset of the current instruction from the start.
+  unsigned InstOffset = 0;
+  // Map for each basic block to it's first instruction.
+  DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+  // First pass - compute the offset of each basic block.
+  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+       MBB != MBBe; ++MBB) {
+    BlockToInstOffset[MBB] = InstOffset;
+    InstOffset += (MBB->size() * 4);
+  }
+
+  // Second pass - check each loop instruction to see if it needs to
+  // be converted.
+  InstOffset = 0;
+  bool Changed = false;
+  RegScavenger RS;
+
+  // Loop over all the basic blocks.
+  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+       MBB != MBBe; ++MBB) {
+    InstOffset = BlockToInstOffset[MBB];
+    RS.enterBasicBlock(MBB);
+
+    // Loop over all the instructions.
+    MachineBasicBlock::iterator MIE = MBB->end();
+    MachineBasicBlock::iterator MII = MBB->begin();
+    while (MII != MIE) {
+      if (isHardwareLoop(MII)) {
+        RS.forward(MII);
+        assert(MII->getOperand(0).isMBB() &&
+               "Expect a basic block as loop operand");
+        int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+        unsigned Dist = Sub > 0 ? Sub : -Sub;
+        if (Dist > MAX_LOOP_DISTANCE) {
+          // Convert to explicity setting LC0 and SA0.
+          convertLoopInstr(MF, MII, RS);
+          MII = MBB->erase(MII);
+          Changed = true;
+        } else {
+          ++MII;
+        }
+      } else {
+        ++MII;
+      }
+      InstOffset += 4;
+    }
+  }
+
+  return Changed;
+}
+
+
+/// \brief convert a loop instruction to a sequence of instructions that
+/// set the LC0 and SA0 register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+                                           MachineBasicBlock::iterator &MII,
+                                           RegScavenger &RS) {
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  MachineBasicBlock *MBB = MII->getParent();
+  DebugLoc DL = MII->getDebugLoc();
+  unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
+
+  // First, set the LC0 with the trip count.
+  if (MII->getOperand(1).isReg()) {
+    // Trip count is a register
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+      .addReg(MII->getOperand(1).getReg());
+  } else {
+    // Trip count is an immediate.
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+      .addImm(MII->getOperand(1).getImm());
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+      .addReg(Scratch);
+  }
+  // Then, set the SA0 with the loop start address.
+  BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+    .addMBB(MII->getOperand(0).getMBB());
+  BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0)
+    .addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index cd682df7a574..d6a9329cd407 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -11,28 +11,28 @@
 #include "HexagonFrameLowering.h"
 #include "Hexagon.h"
 #include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
@@ -166,7 +166,8 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   unsigned RetOpcode = MBBI->getOpcode();
 
-  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+}
 
 void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
@@ -326,6 +327,21 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
   return true;
 }
 
+void HexagonFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+
+  if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+    // Hexagon_TODO: add code
+  } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+    // Hexagon_TODO: add code
+  } else {
+    llvm_unreachable("Cannot handle this call frame pseudo instruction");
+  }
+  MBB.erase(I);
+}
+
 int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
                                               int FI) const {
   return MF.getFrameInfo()->getObjectOffset(FI);
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index ad87f11e2457..a62c76aaf676 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -35,6 +35,11 @@ public:
                             MachineBasicBlock::iterator MI,
                             const std::vector<CalleeSavedInfo> &CSI,
                             const TargetRegisterInfo *TRI) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   virtual bool
   restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index d756aec9bef9..178662447a7f 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -27,89 +27,202 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hwloops"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
 #include <algorithm>
+#include <vector>
 
 using namespace llvm;
 
+#ifndef NDEBUG
+static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+#endif
+
 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
 
+namespace llvm {
+  void initializeHexagonHardwareLoopsPass(PassRegistry&);
+}
+
 namespace {
   class CountValue;
   struct HexagonHardwareLoops : public MachineFunctionPass {
-    MachineLoopInfo       *MLI;
-    MachineRegisterInfo   *MRI;
-    const TargetInstrInfo *TII;
+    MachineLoopInfo            *MLI;
+    MachineRegisterInfo        *MRI;
+    MachineDominatorTree       *MDT;
+    const HexagonTargetMachine *TM;
+    const HexagonInstrInfo     *TII;
+    const HexagonRegisterInfo  *TRI;
+#ifndef NDEBUG
+    static int Counter;
+#endif
 
   public:
-    static char ID;   // Pass identification, replacement for typeid
+    static char ID;
 
-    HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+    HexagonHardwareLoops() : MachineFunctionPass(ID) {
+      initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     const char *getPassName() const { return "Hexagon Hardware Loops"; }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
       AU.addRequired<MachineDominatorTree>();
-      AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
-      AU.addPreserved<MachineLoopInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
   private:
-    /// getCanonicalInductionVariable - Check to see if the loop has a canonical
-    /// induction variable.
-    /// Should be defined in MachineLoop. Based upon version in class Loop.
-    const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
-
-    /// getTripCount - Return a loop-invariant LLVM register indicating the
-    /// number of times the loop will be executed.  If the trip-count cannot
-    /// be determined, this return null.
-    CountValue *getTripCount(MachineLoop *L) const;
-
-    /// isInductionOperation - Return true if the instruction matches the
-    /// pattern for an opertion that defines an induction variable.
-    bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+    /// Kinds of comparisons in the compare instructions.
+    struct Comparison {
+      enum Kind {
+        EQ  = 0x01,
+        NE  = 0x02,
+        L   = 0x04, // Less-than property.
+        G   = 0x08, // Greater-than property.
+        U   = 0x40, // Unsigned property.
+        LTs = L,
+        LEs = L | EQ,
+        GTs = G,
+        GEs = G | EQ,
+        LTu = L      | U,
+        LEu = L | EQ | U,
+        GTu = G      | U,
+        GEu = G | EQ | U
+      };
+
+      static Kind getSwappedComparison(Kind Cmp) {
+        assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
+        if ((Cmp & L) || (Cmp & G))
+          return (Kind)(Cmp ^ (L|G));
+        return Cmp;
+      }
+    };
 
-    /// isInvalidOperation - Return true if the instruction is not valid within
-    /// a hardware loop.
+    /// \brief Find the register that contains the loop controlling
+    /// induction variable.
+    /// If successful, it will return true and set the \p Reg, \p IVBump
+    /// and \p IVOp arguments.  Otherwise it will return false.
+    /// The returned induction register is the register R that follows the
+    /// following induction pattern:
+    /// loop:
+    ///   R = phi ..., [ R.next, LatchBlock ]
+    ///   R.next = R + #bump
+    ///   if (R.next < #N) goto loop
+    /// IVBump is the immediate value added to R, and IVOp is the instruction
+    /// "R.next = R + #bump".
+    bool findInductionRegister(MachineLoop *L, unsigned &Reg,
+                               int64_t &IVBump, MachineInstr *&IVOp) const;
+
+    /// \brief Analyze the statements in a loop to determine if the loop
+    /// has a computable trip count and, if so, return a value that represents
+    /// the trip count expression.
+    CountValue *getLoopTripCount(MachineLoop *L,
+                                 SmallVector<MachineInstr*, 2> &OldInsts);
+
+    /// \brief Return the expression that represents the number of times
+    /// a loop iterates.  The function takes the operands that represent the
+    /// loop start value, loop end value, and induction value.  Based upon
+    /// these operands, the function attempts to compute the trip count.
+    /// If the trip count is not directly available (as an immediate value,
+    /// or a register), the function will attempt to insert computation of it
+    /// to the loop's preheader.
+    CountValue *computeCount(MachineLoop *Loop,
+                             const MachineOperand *Start,
+                             const MachineOperand *End,
+                             unsigned IVReg,
+                             int64_t IVBump,
+                             Comparison::Kind Cmp) const;
+
+    /// \brief Return true if the instruction is not valid within a hardware
+    /// loop.
     bool isInvalidLoopOperation(const MachineInstr *MI) const;
 
-    /// containsInavlidInstruction - Return true if the loop contains an
-    /// instruction that inhibits using the hardware loop.
+    /// \brief Return true if the loop contains an instruction that inhibits
+    /// using the hardware loop.
     bool containsInvalidInstruction(MachineLoop *L) const;
 
-    /// converToHardwareLoop - Given a loop, check if we can convert it to a
-    /// hardware loop.  If so, then perform the conversion and return true.
+    /// \brief Given a loop, check if we can convert it to a hardware loop.
+    /// If so, then perform the conversion and return true.
     bool convertToHardwareLoop(MachineLoop *L);
 
+    /// \brief Return true if the instruction is now dead.
+    bool isDead(const MachineInstr *MI,
+                SmallVector<MachineInstr*, 1> &DeadPhis) const;
+
+    /// \brief Remove the instruction if it is now dead.
+    void removeIfDead(MachineInstr *MI);
+
+    /// \brief Make sure that the "bump" instruction executes before the
+    /// compare.  We need that for the IV fixup, so that the compare
+    /// instruction would not use a bumped value that has not yet been
+    /// defined.  If the instructions are out of order, try to reorder them.
+    bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
+
+    /// \brief Get the instruction that loads an immediate value into \p R,
+    /// or 0 if such an instruction does not exist.
+    MachineInstr *defWithImmediate(unsigned R);
+
+    /// \brief Get the immediate value referenced to by \p MO, either for
+    /// immediate operands, or for register operands, where the register
+    /// was defined with an immediate value.
+    int64_t getImmediate(MachineOperand &MO);
+
+    /// \brief Reset the given machine operand to now refer to a new immediate
+    /// value.  Assumes that the operand was already referencing an immediate
+    /// value, either directly, or via a register.
+    void setImmediate(MachineOperand &MO, int64_t Val);
+
+    /// \brief Fix the data flow of the induction varible.
+    /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
+    ///                                     |
+    ///                                     +-> back to phi
+    /// where "bump" is the increment of the induction variable:
+    ///   iv = iv + #const.
+    /// Due to some prior code transformations, the actual flow may look
+    /// like this:
+    ///   phi -+-> bump ---> back to phi
+    ///        |
+    ///        +-> comparison-in-latch (against upper_bound-bump),
+    /// i.e. the comparison that controls the loop execution may be using
+    /// the value of the induction variable from before the increment.
+    ///
+    /// Return true if the loop's flow is the desired one (i.e. it's
+    /// either been fixed, or no fixing was necessary).
+    /// Otherwise, return false.  This can happen if the induction variable
+    /// couldn't be identified, or if the value in the latch's comparison
+    /// cannot be adjusted to reflect the post-bump value.
+    bool fixupInductionVariable(MachineLoop *L);
+
+    /// \brief Given a loop, if it does not have a preheader, create one.
+    /// Return the block that is the preheader.
+    MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
   };
 
   char HexagonHardwareLoops::ID = 0;
+#ifndef NDEBUG
+  int HexagonHardwareLoops::Counter = 0;
+#endif
 
-
-  // CountValue class - Abstraction for a trip count of a loop. A
-  // smaller vesrsion of the MachineOperand class without the concerns
-  // of changing the operand representation.
+  /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
+  /// of the MachineOperand class without the concerns of changing the
+  /// operand representation.
   class CountValue {
   public:
     enum CountValueType {
@@ -119,101 +232,62 @@ namespace {
   private:
     CountValueType Kind;
     union Values {
-      unsigned RegNum;
-      int64_t ImmVal;
-      Values(unsigned r) : RegNum(r) {}
-      Values(int64_t i) : ImmVal(i) {}
+      struct {
+        unsigned Reg;
+        unsigned Sub;
+      } R;
+      unsigned ImmVal;
     } Contents;
-    bool isNegative;
 
   public:
-    CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
-                                       isNegative(neg) {}
-    explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
-                                     isNegative(i < 0) {}
-    CountValueType getType() const { return Kind; }
+    explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
+      Kind = t;
+      if (Kind == CV_Register) {
+        Contents.R.Reg = v;
+        Contents.R.Sub = u;
+      } else {
+        Contents.ImmVal = v;
+      }
+    }
     bool isReg() const { return Kind == CV_Register; }
     bool isImm() const { return Kind == CV_Immediate; }
-    bool isNeg() const { return isNegative; }
 
     unsigned getReg() const {
       assert(isReg() && "Wrong CountValue accessor");
-      return Contents.RegNum;
+      return Contents.R.Reg;
     }
-    void setReg(unsigned Val) {
-      Contents.RegNum = Val;
+    unsigned getSubReg() const {
+      assert(isReg() && "Wrong CountValue accessor");
+      return Contents.R.Sub;
     }
-    int64_t getImm() const {
+    unsigned getImm() const {
       assert(isImm() && "Wrong CountValue accessor");
-      if (isNegative) {
-        return -Contents.ImmVal;
-      }
       return Contents.ImmVal;
     }
-    void setImm(int64_t Val) {
-      Contents.ImmVal = Val;
-    }
 
     void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
-      if (isReg()) { OS << PrintReg(getReg()); }
-      if (isImm()) { OS << getImm(); }
-    }
-  };
-
-  struct HexagonFixupHwLoops : public MachineFunctionPass {
-  public:
-    static char ID;     // Pass identification, replacement for typeid.
-
-    HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      MachineFunctionPass::getAnalysisUsage(AU);
+      const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+      if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
+      if (isImm()) { OS << Contents.ImmVal; }
     }
-
-  private:
-    /// Maximum distance between the loop instr and the basic block.
-    /// Just an estimate.
-    static const unsigned MAX_LOOP_DISTANCE = 200;
-
-    /// fixupLoopInstrs - Check the offset between each loop instruction and
-    /// the loop basic block to determine if we can use the LOOP instruction
-    /// or if we need to set the LC/SA registers explicitly.
-    bool fixupLoopInstrs(MachineFunction &MF);
-
-    /// convertLoopInstr - Add the instruction to set the LC and SA registers
-    /// explicitly.
-    void convertLoopInstr(MachineFunction &MF,
-                          MachineBasicBlock::iterator &MII,
-                          RegScavenger &RS);
-
   };
+} // end anonymous namespace
 
-  char HexagonFixupHwLoops::ID = 0;
 
-} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
+                      "Hexagon Hardware Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
+                    "Hexagon Hardware Loops", false, false)
 
 
-/// isHardwareLoop - Returns true if the instruction is a hardware loop
-/// instruction.
+/// \brief Returns true if the instruction is a hardware loop instruction.
 static bool isHardwareLoop(const MachineInstr *MI) {
   return MI->getOpcode() == Hexagon::LOOP0_r ||
     MI->getOpcode() == Hexagon::LOOP0_i;
 }
 
-/// isCompareEquals - Returns true if the instruction is a compare equals
-/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI) {
-  return MI->getOpcode() == Hexagon::CMPEQri;
-}
-
-
-/// createHexagonHardwareLoops - Factory for creating
-/// the hardware loop phase.
 FunctionPass *llvm::createHexagonHardwareLoops() {
   return new HexagonHardwareLoops();
 }
@@ -224,45 +298,149 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
 
   bool Changed = false;
 
-  // get the loop information
   MLI = &getAnalysis<MachineLoopInfo>();
-  // get the register information
   MRI = &MF.getRegInfo();
-  // the target specific instructio info.
-  TII = MF.getTarget().getInstrInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  TM  = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
+  TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
+  TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
 
   for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
        I != E; ++I) {
     MachineLoop *L = *I;
-    if (!L->getParentLoop()) {
+    if (!L->getParentLoop())
       Changed |= convertToHardwareLoop(L);
-    }
   }
 
   return Changed;
 }
 
-/// getCanonicalInductionVariable - Check to see if the loop has a canonical
-/// induction variable. We check for a simple recurrence pattern - an
-/// integer recurrence that decrements by one each time through the loop and
-/// ends at zero.  If so, return the phi node that corresponds to it.
-///
-/// Based upon the similar code in LoopInfo except this code is specific to
-/// the machine.
-/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+
+bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
+                                                 unsigned &Reg,
+                                                 int64_t &IVBump,
+                                                 MachineInstr *&IVOp
+                                                 ) const {
+  MachineBasicBlock *Header = L->getHeader();
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  if (!Header || !Preheader || !Latch)
+    return false;
+
+  // This pair represents an induction register together with an immediate
+  // value that will be added to it in each loop iteration.
+  typedef std::pair<unsigned,int64_t> RegisterBump;
+
+  // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
+  // from an induction operation
+  //   R.next = R + bump
+  // where bump is an immediate value.
+  typedef std::map<unsigned,RegisterBump> InductionMap;
+
+  InductionMap IndMap;
+
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+       I != E && I->isPHI(); ++I) {
+    MachineInstr *Phi = &*I;
+
+    // Have a PHI instruction.  Get the operand that corresponds to the
+    // latch block, and see if is a result of an addition of form "reg+imm",
+    // where the "reg" is defined by the PHI node we are looking at.
+    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+      if (Phi->getOperand(i+1).getMBB() != Latch)
+        continue;
+
+      unsigned PhiOpReg = Phi->getOperand(i).getReg();
+      MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
+      unsigned UpdOpc = DI->getOpcode();
+      bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+      if (isAdd) {
+        // If the register operand to the add is the PHI we're
+        // looking at, this meets the induction pattern.
+        unsigned IndReg = DI->getOperand(1).getReg();
+        if (MRI->getVRegDef(IndReg) == Phi) {
+          unsigned UpdReg = DI->getOperand(0).getReg();
+          int64_t V = DI->getOperand(2).getImm();
+          IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+        }
+      }
+    }  // for (i)
+  }  // for (instr)
+
+  SmallVector<MachineOperand,2> Cond;
+  MachineBasicBlock *TB = 0, *FB = 0;
+  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  if (NotAnalyzed)
+    return false;
+
+  unsigned CSz = Cond.size();
+  assert (CSz == 1 || CSz == 2);
+  unsigned PredR = Cond[CSz-1].getReg();
+
+  MachineInstr *PredI = MRI->getVRegDef(PredR);
+  if (!PredI->isCompare())
+    return false;
+
+  unsigned CmpReg1 = 0, CmpReg2 = 0;
+  int CmpImm = 0, CmpMask = 0;
+  bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
+                                         CmpMask, CmpImm);
+  // Fail if the compare was not analyzed, or it's not comparing a register
+  // with an immediate value.  Not checking the mask here, since we handle
+  // the individual compare opcodes (including CMPb) later on.
+  if (!CmpAnalyzed)
+    return false;
+
+  // Exactly one of the input registers to the comparison should be among
+  // the induction registers.
+  InductionMap::iterator IndMapEnd = IndMap.end();
+  InductionMap::iterator F = IndMapEnd;
+  if (CmpReg1 != 0) {
+    InductionMap::iterator F1 = IndMap.find(CmpReg1);
+    if (F1 != IndMapEnd)
+      F = F1;
+  }
+  if (CmpReg2 != 0) {
+    InductionMap::iterator F2 = IndMap.find(CmpReg2);
+    if (F2 != IndMapEnd) {
+      if (F != IndMapEnd)
+        return false;
+      F = F2;
+    }
+  }
+  if (F == IndMapEnd)
+    return false;
+
+  Reg = F->second.first;
+  IVBump = F->second.second;
+  IVOp = MRI->getVRegDef(F->first);
+  return true;
+}
+
+
+/// \brief Analyze the statements in a loop to determine if the loop has
+/// a computable trip count and, if so, return a value that represents
+/// the trip count expression.
 ///
-const MachineInstr
-*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+/// This function iterates over the phi nodes in the loop to check for
+/// induction variable patterns that are used in the calculation for
+/// the number of time the loop is executed.
+CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
+                                SmallVector<MachineInstr*, 2> &OldInsts) {
   MachineBasicBlock *TopMBB = L->getTopBlock();
   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
   assert(PI != TopMBB->pred_end() &&
          "Loop must have more than one incoming edge!");
   MachineBasicBlock *Backedge = *PI++;
-  if (PI == TopMBB->pred_end()) return 0;  // dead loop
+  if (PI == TopMBB->pred_end())  // dead loop?
+    return 0;
   MachineBasicBlock *Incoming = *PI++;
-  if (PI != TopMBB->pred_end()) return 0;  // multiple backedges?
+  if (PI != TopMBB->pred_end())  // multiple backedges?
+    return 0;
 
-  // make sure there is one incoming and one backedge and determine which
+  // Make sure there is one incoming and one backedge and determine which
   // is which.
   if (L->contains(Incoming)) {
     if (L->contains(Backedge))
@@ -271,139 +449,433 @@ const MachineInstr
   } else if (!L->contains(Backedge))
     return 0;
 
-  // Loop over all of the PHI nodes, looking for a canonical induction variable:
-  //   - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
-  //   - The recurrence comes from the backedge.
-  //   - the definition is an induction operatio.n
-  for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
-       I != E && I->isPHI(); ++I) {
-    const MachineInstr *MPhi = &*I;
-    unsigned DefReg = MPhi->getOperand(0).getReg();
-    for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
-      // Check each operand for the value from the backedge.
-      MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
-      if (L->contains(MBB)) { // operands comes from the backedge
-        // Check if the definition is an induction operation.
-        const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
-        if (isInductionOperation(DI, DefReg)) {
-          return MPhi;
-        }
-      }
+  // Look for the cmp instruction to determine if we can get a useful trip
+  // count.  The trip count can be either a register or an immediate.  The
+  // location of the value depends upon the type (reg or imm).
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return 0;
+
+  unsigned IVReg = 0;
+  int64_t IVBump = 0;
+  MachineInstr *IVOp;
+  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
+  if (!FoundIV)
+    return 0;
+
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+
+  MachineOperand *InitialValue = 0;
+  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
+    MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
+    if (MBB == Preheader)
+      InitialValue = &IV_Phi->getOperand(i);
+    else if (MBB == Latch)
+      IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
+  }
+  if (!InitialValue)
+    return 0;
+
+  SmallVector<MachineOperand,2> Cond;
+  MachineBasicBlock *TB = 0, *FB = 0;
+  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  if (NotAnalyzed)
+    return 0;
+
+  MachineBasicBlock *Header = L->getHeader();
+  // TB must be non-null.  If FB is also non-null, one of them must be
+  // the header.  Otherwise, branch to TB could be exiting the loop, and
+  // the fall through can go to the header.
+  assert (TB && "Latch block without a branch?");
+  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
+  if (!TB || (FB && TB != Header && FB != Header))
+    return 0;
+
+  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+  // to put imm(0), followed by P in the vector Cond.
+  // If TB is not the header, it means that the "not-taken" path must lead
+  // to the header.
+  bool Negated = (Cond.size() > 1) ^ (TB != Header);
+  unsigned PredReg = Cond[Cond.size()-1].getReg();
+  MachineInstr *CondI = MRI->getVRegDef(PredReg);
+  unsigned CondOpc = CondI->getOpcode();
+
+  unsigned CmpReg1 = 0, CmpReg2 = 0;
+  int Mask = 0, ImmValue = 0;
+  bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
+                                         Mask, ImmValue);
+  if (!AnalyzedCmp)
+    return 0;
+
+  // The comparison operator type determines how we compute the loop
+  // trip count.
+  OldInsts.push_back(CondI);
+  OldInsts.push_back(IVOp);
+
+  // Sadly, the following code gets information based on the position
+  // of the operands in the compare instruction.  This has to be done
+  // this way, because the comparisons check for a specific relationship
+  // between the operands (e.g. is-less-than), rather than to find out
+  // what relationship the operands are in (as on PPC).
+  Comparison::Kind Cmp;
+  bool isSwapped = false;
+  const MachineOperand &Op1 = CondI->getOperand(1);
+  const MachineOperand &Op2 = CondI->getOperand(2);
+  const MachineOperand *EndValue = 0;
+
+  if (Op1.isReg()) {
+    if (Op2.isImm() || Op1.getReg() == IVReg)
+      EndValue = &Op2;
+    else {
+      EndValue = &Op1;
+      isSwapped = true;
     }
   }
-  return 0;
-}
 
-/// getTripCount - Return a loop-invariant LLVM value indicating the
-/// number of times the loop will be executed.  The trip count can
-/// be either a register or a constant value.  If the trip-count
-/// cannot be determined, this returns null.
-///
-/// We find the trip count from the phi instruction that defines the
-/// induction variable.  We follow the links to the CMP instruction
-/// to get the trip count.
-///
-/// Based upon getTripCount in LoopInfo.
-///
-CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
-  // Check that the loop has a induction variable.
-  const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
-  if (IV_Inst == 0) return 0;
-
-  // Canonical loops will end with a 'cmpeq_ri IV, Imm',
-  //  if Imm is 0, get the count from the PHI opnd
-  //  if Imm is -M, than M is the count
-  //  Otherwise, Imm is the count
-  const MachineOperand *IV_Opnd;
-  const MachineOperand *InitialValue;
-  if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
-    InitialValue = &IV_Inst->getOperand(1);
-    IV_Opnd = &IV_Inst->getOperand(3);
-  } else {
-    InitialValue = &IV_Inst->getOperand(3);
-    IV_Opnd = &IV_Inst->getOperand(1);
-  }
-
-  // Look for the cmp instruction to determine if we
-  // can get a useful trip count.  The trip count can
-  // be either a register or an immediate.  The location
-  // of the value depends upon the type (reg or imm).
-  for (MachineRegisterInfo::reg_iterator
-       RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
-       RI != RE; ++RI) {
-    IV_Opnd = &RI.getOperand();
-    const MachineInstr *MI = IV_Opnd->getParent();
-    if (L->contains(MI) && isCompareEqualsImm(MI)) {
-      const MachineOperand &MO = MI->getOperand(2);
-      assert(MO.isImm() && "IV Cmp Operand should be 0");
-      int64_t ImmVal = MO.getImm();
-
-      const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
-      assert(L->contains(IV_DefInstr->getParent()) &&
-             "IV definition should occurs in loop");
-      int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
-
-      if (ImmVal == 0) {
-        // Make sure the induction variable changes by one on each iteration.
-        if (iv_value != 1 && iv_value != -1) {
+  if (!EndValue)
+    return 0;
+
+  switch (CondOpc) {
+    case Hexagon::CMPEQri:
+    case Hexagon::CMPEQrr:
+      Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+      break;
+    case Hexagon::CMPLTrr:
+      Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
+      break;
+    case Hexagon::CMPLTUrr:
+      Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
+      break;
+    case Hexagon::CMPGTUri:
+    case Hexagon::CMPGTUrr:
+      Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
+      break;
+    case Hexagon::CMPGTri:
+    case Hexagon::CMPGTrr:
+      Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
+      break;
+    // Very limited support for byte/halfword compares.
+    case Hexagon::CMPbEQri_V4:
+    case Hexagon::CMPhEQri_V4: {
+      if (IVBump != 1)
+        return 0;
+
+      int64_t InitV, EndV;
+      // Since the comparisons are "ri", the EndValue should be an
+      // immediate.  Check it just in case.
+      assert(EndValue->isImm() && "Unrecognized latch comparison");
+      EndV = EndValue->getImm();
+      // Allow InitialValue to be a register defined with an immediate.
+      if (InitialValue->isReg()) {
+        if (!defWithImmediate(InitialValue->getReg()))
           return 0;
-        }
-        return new CountValue(InitialValue->getReg(), iv_value > 0);
+        InitV = getImmediate(*InitialValue);
       } else {
-        assert(InitialValue->isReg() && "Expecting register for init value");
-        const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
-        if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
-          int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
-          if ((count % iv_value) != 0) {
-            return 0;
-          }
-          return new CountValue(count/iv_value);
-        }
+        assert(InitialValue->isImm());
+        InitV = InitialValue->getImm();
+      }
+      if (InitV >= EndV)
+        return 0;
+      if (CondOpc == Hexagon::CMPbEQri_V4) {
+        if (!isInt<8>(InitV) || !isInt<8>(EndV))
+          return 0;
+      } else {  // Hexagon::CMPhEQri_V4
+        if (!isInt<16>(InitV) || !isInt<16>(EndV))
+          return 0;
       }
+      Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+      break;
     }
+    default:
+      return 0;
   }
-  return 0;
+
+  if (isSwapped)
+   Cmp = Comparison::getSwappedComparison(Cmp);
+
+  if (InitialValue->isReg()) {
+    unsigned R = InitialValue->getReg();
+    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+    if (!MDT->properlyDominates(DefBB, Header))
+      return 0;
+    OldInsts.push_back(MRI->getVRegDef(R));
+  }
+  if (EndValue->isReg()) {
+    unsigned R = EndValue->getReg();
+    MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+    if (!MDT->properlyDominates(DefBB, Header))
+      return 0;
+  }
+
+  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
 }
 
-/// isInductionOperation - return true if the operation is matches the
-/// pattern that defines an induction variable:
-///    add iv, c
-///
-bool
-HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
-                                           unsigned IVReg) const {
-  return (MI->getOpcode() ==
-          Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+/// \brief Helper function that returns the expression that represents the
+/// number of times a loop iterates.  The function takes the operands that
+/// represent the loop start value, loop end value, and induction value.
+/// Based upon these operands, the function attempts to compute the trip count.
+CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
+                                               const MachineOperand *Start,
+                                               const MachineOperand *End,
+                                               unsigned IVReg,
+                                               int64_t IVBump,
+                                               Comparison::Kind Cmp) const {
+  // Cannot handle comparison EQ, i.e. while (A == B).
+  if (Cmp == Comparison::EQ)
+    return 0;
+
+  // Check if either the start or end values are an assignment of an immediate.
+  // If so, use the immediate value rather than the register.
+  if (Start->isReg()) {
+    const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
+    if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
+      Start = &StartValInstr->getOperand(1);
+  }
+  if (End->isReg()) {
+    const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+    if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
+      End = &EndValInstr->getOperand(1);
+  }
+
+  assert (Start->isReg() || Start->isImm());
+  assert (End->isReg() || End->isImm());
+
+  bool CmpLess =     Cmp & Comparison::L;
+  bool CmpGreater =  Cmp & Comparison::G;
+  bool CmpHasEqual = Cmp & Comparison::EQ;
+
+  // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
+  // If loop executes while iv is "less" with the iv value going down, then
+  // the iv must wrap.
+  if (CmpLess && IVBump < 0)
+    return 0;
+  // If loop executes while iv is "greater" with the iv value going up, then
+  // the iv must wrap.
+  if (CmpGreater && IVBump > 0)
+    return 0;
+
+  if (Start->isImm() && End->isImm()) {
+    // Both, start and end are immediates.
+    int64_t StartV = Start->getImm();
+    int64_t EndV = End->getImm();
+    int64_t Dist = EndV - StartV;
+    if (Dist == 0)
+      return 0;
+
+    bool Exact = (Dist % IVBump) == 0;
+
+    if (Cmp == Comparison::NE) {
+      if (!Exact)
+        return 0;
+      if ((Dist < 0) ^ (IVBump < 0))
+        return 0;
+    }
+
+    // For comparisons that include the final value (i.e. include equality
+    // with the final value), we need to increase the distance by 1.
+    if (CmpHasEqual)
+      Dist = Dist > 0 ? Dist+1 : Dist-1;
+
+    // assert (CmpLess => Dist > 0);
+    assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
+    // assert (CmpGreater => Dist < 0);
+    assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
+
+    // "Normalized" distance, i.e. with the bump set to +-1.
+    int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump-1)) /   IVBump
+                               :  (-Dist + (-IVBump-1)) / (-IVBump);
+    assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
+
+    uint64_t Count = Dist1;
+
+    if (Count > 0xFFFFFFFFULL)
+      return 0;
+
+    return new CountValue(CountValue::CV_Immediate, Count);
+  }
+
+  // A general case: Start and End are some values, but the actual
+  // iteration count may not be available.  If it is not, insert
+  // a computation of it into the preheader.
+
+  // If the induction variable bump is not a power of 2, quit.
+  // Othwerise we'd need a general integer division.
+  if (!isPowerOf2_64(abs64(IVBump)))
+    return 0;
+
+  MachineBasicBlock *PH = Loop->getLoopPreheader();
+  assert (PH && "Should have a preheader by now");
+  MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
+  DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
+                                         : DebugLoc();
+
+  // If Start is an immediate and End is a register, the trip count
+  // will be "reg - imm".  Hexagon's "subtract immediate" instruction
+  // is actually "reg + -imm".
+
+  // If the loop IV is going downwards, i.e. if the bump is negative,
+  // then the iteration count (computed as End-Start) will need to be
+  // negated.  To avoid the negation, just swap Start and End.
+  if (IVBump < 0) {
+    std::swap(Start, End);
+    IVBump = -IVBump;
+  }
+  // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
+  // Signedness, and "including equality" are preserved.
+
+  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
+  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
+
+  int64_t StartV = 0, EndV = 0;
+  if (Start->isImm())
+    StartV = Start->getImm();
+  if (End->isImm())
+    EndV = End->getImm();
+
+  int64_t AdjV = 0;
+  // To compute the iteration count, we would need this computation:
+  //   Count = (End - Start + (IVBump-1)) / IVBump
+  // or, when CmpHasEqual:
+  //   Count = (End - Start + (IVBump-1)+1) / IVBump
+  // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
+  // generating an instruction specifically to add it if we can adjust
+  // the immediate values for Start or End.
+
+  if (CmpHasEqual) {
+    // Need to add 1 to the total iteration count.
+    if (Start->isImm())
+      StartV--;
+    else if (End->isImm())
+      EndV++;
+    else
+      AdjV += 1;
+  }
+
+  if (Cmp != Comparison::NE) {
+    if (Start->isImm())
+      StartV -= (IVBump-1);
+    else if (End->isImm())
+      EndV += (IVBump-1);
+    else
+      AdjV += (IVBump-1);
+  }
+
+  unsigned R = 0, SR = 0;
+  if (Start->isReg()) {
+    R = Start->getReg();
+    SR = Start->getSubReg();
+  } else {
+    R = End->getReg();
+    SR = End->getSubReg();
+  }
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  // Hardware loops cannot handle 64-bit registers.  If it's a double
+  // register, it has to have a subregister.
+  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
+    return 0;
+  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+
+  // Compute DistR (register with the distance between Start and End).
+  unsigned DistR, DistSR;
+
+  // Avoid special case, where the start value is an imm(0).
+  if (Start->isImm() && StartV == 0) {
+    DistR = End->getReg();
+    DistSR = End->getSubReg();
+  } else {
+    const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
+                              (RegToImm ? TII->get(Hexagon::SUB_ri) :
+                                          TII->get(Hexagon::ADD_ri));
+    unsigned SubR = MRI->createVirtualRegister(IntRC);
+    MachineInstrBuilder SubIB =
+      BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+    if (RegToReg) {
+      SubIB.addReg(End->getReg(), 0, End->getSubReg())
+           .addReg(Start->getReg(), 0, Start->getSubReg());
+    } else if (RegToImm) {
+      SubIB.addImm(EndV)
+           .addReg(Start->getReg(), 0, Start->getSubReg());
+    } else { // ImmToReg
+      SubIB.addReg(End->getReg(), 0, End->getSubReg())
+           .addImm(-StartV);
+    }
+    DistR = SubR;
+    DistSR = 0;
+  }
+
+  // From DistR, compute AdjR (register with the adjusted distance).
+  unsigned AdjR, AdjSR;
+
+  if (AdjV == 0) {
+    AdjR = DistR;
+    AdjSR = DistSR;
+  } else {
+    // Generate CountR = ADD DistR, AdjVal
+    unsigned AddR = MRI->createVirtualRegister(IntRC);
+    const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
+    BuildMI(*PH, InsertPos, DL, AddD, AddR)
+      .addReg(DistR, 0, DistSR)
+      .addImm(AdjV);
+
+    AdjR = AddR;
+    AdjSR = 0;
+  }
+
+  // From AdjR, compute CountR (register with the final count).
+  unsigned CountR, CountSR;
+
+  if (IVBump == 1) {
+    CountR = AdjR;
+    CountSR = AdjSR;
+  } else {
+    // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
+    unsigned Shift = Log2_32(IVBump);
+
+    // Generate NormR = LSR DistR, Shift.
+    unsigned LsrR = MRI->createVirtualRegister(IntRC);
+    const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
+    BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
+      .addReg(AdjR, 0, AdjSR)
+      .addImm(Shift);
+
+    CountR = LsrR;
+    CountSR = 0;
+  }
+
+  return new CountValue(CountValue::CV_Register, CountR, CountSR);
 }
 
-/// isInvalidOperation - Return true if the operation is invalid within
-/// hardware loop.
-bool
-HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+/// \brief Return true if the operation is invalid within hardware loop.
+bool HexagonHardwareLoops::isInvalidLoopOperation(
+      const MachineInstr *MI) const {
 
   // call is not allowed because the callee may use a hardware loop
-  if (MI->getDesc().isCall()) {
+  if (MI->getDesc().isCall())
     return true;
-  }
+
   // do not allow nested hardware loops
-  if (isHardwareLoop(MI)) {
+  if (isHardwareLoop(MI))
     return true;
-  }
+
   // check if the instruction defines a hardware loop register
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isDef() &&
-        (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
-         MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
+        R == Hexagon::SA0 || R == Hexagon::SA1)
       return true;
-    }
   }
   return false;
 }
 
-/// containsInvalidInstruction - Return true if the loop contains
-/// an instruction that inhibits the use of the hardware loop function.
-///
+
+/// \brief - Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop function.
 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
   const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -411,126 +883,258 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
     for (MachineBasicBlock::iterator
            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
       const MachineInstr *MI = &*MII;
-      if (isInvalidLoopOperation(MI)) {
+      if (isInvalidLoopOperation(MI))
         return true;
-      }
     }
   }
   return false;
 }
 
-/// converToHardwareLoop - check if the loop is a candidate for
-/// converting to a hardware loop.  If so, then perform the
-/// transformation.
+
+/// \brief Returns true if the instruction is dead.  This was essentially
+/// copied from DeadMachineInstructionElim::isDead, but with special cases
+/// for inline asm, physical registers and instructions with side effects
+/// removed.
+bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
+                             SmallVector<MachineInstr*, 1> &DeadPhis) const {
+  // Examine each operand.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (MRI->use_nodbg_empty(Reg))
+      continue;
+
+    typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
+
+    // This instruction has users, but if the only user is the phi node for the
+    // parent block, and the only use of that phi node is this instruction, then
+    // this instruction is dead: both it (and the phi node) can be removed.
+    use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+    use_nodbg_iterator End = MRI->use_nodbg_end();
+    if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
+      return false;
+
+    MachineInstr *OnePhi = I.getOperand().getParent();
+    for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+      const MachineOperand &OPO = OnePhi->getOperand(j);
+      if (!OPO.isReg() || !OPO.isDef())
+        continue;
+
+      unsigned OPReg = OPO.getReg();
+      use_nodbg_iterator nextJ;
+      for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
+           J != End; J = nextJ) {
+        nextJ = llvm::next(J);
+        MachineOperand &Use = J.getOperand();
+        MachineInstr *UseMI = Use.getParent();
+
+        // If the phi node has a user that is not MI, bail...
+        if (MI != UseMI)
+          return false;
+      }
+    }
+    DeadPhis.push_back(OnePhi);
+  }
+
+  // If there are no defs with uses, the instruction is dead.
+  return true;
+}
+
+void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
+  // This procedure was essentially copied from DeadMachineInstructionElim.
+
+  SmallVector<MachineInstr*, 1> DeadPhis;
+  if (isDead(MI, DeadPhis)) {
+    DEBUG(dbgs() << "HW looping will remove: " << *MI);
+
+    // It is possible that some DBG_VALUE instructions refer to this
+    // instruction.  Examine each def operand for such references;
+    // if found, mark the DBG_VALUE as undef (but don't delete it).
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned Reg = MO.getReg();
+      MachineRegisterInfo::use_iterator nextI;
+      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+           E = MRI->use_end(); I != E; I = nextI) {
+        nextI = llvm::next(I);  // I is invalidated by the setReg
+        MachineOperand &Use = I.getOperand();
+        MachineInstr *UseMI = Use.getParent();
+        if (UseMI == MI)
+          continue;
+        if (Use.isDebug())
+          UseMI->getOperand(0).setReg(0U);
+        // This may also be a "instr -> phi -> instr" case which can
+        // be removed too.
+      }
+    }
+
+    MI->eraseFromParent();
+    for (unsigned i = 0; i < DeadPhis.size(); ++i)
+      DeadPhis[i]->eraseFromParent();
+  }
+}
+
+/// \brief Check if the loop is a candidate for converting to a hardware
+/// loop.  If so, then perform the transformation.
 ///
-/// This function works on innermost loops first.  A loop can
-/// be converted if it is a counting loop; either a register
-/// value or an immediate.
+/// This function works on innermost loops first.  A loop can be converted
+/// if it is a counting loop; either a register value or an immediate.
 ///
-/// The code makes several assumptions about the representation
-/// of the loop in llvm.
+/// The code makes several assumptions about the representation of the loop
+/// in llvm.
 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+  // This is just for sanity.
+  assert(L->getHeader() && "Loop without a header?");
+
   bool Changed = false;
   // Process nested loops first.
-  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
     Changed |= convertToHardwareLoop(*I);
-  }
+
   // If a nested loop has been converted, then we can't convert this loop.
-  if (Changed) {
+  if (Changed)
     return Changed;
+
+#ifndef NDEBUG
+  // Stop trying after reaching the limit (if any).
+  int Limit = HWLoopLimit;
+  if (Limit >= 0) {
+    if (Counter >= HWLoopLimit)
+      return false;
+    Counter++;
   }
-  // Are we able to determine the trip count for the loop?
-  CountValue *TripCount = getTripCount(L);
-  if (TripCount == 0) {
-    return false;
-  }
+#endif
+
   // Does the loop contain any invalid instructions?
-  if (containsInvalidInstruction(L)) {
+  if (containsInvalidInstruction(L))
     return false;
-  }
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
-  // No preheader means there's not place for the loop instr.
-  if (Preheader == 0) {
+
+  // Is the induction variable bump feeding the latch condition?
+  if (!fixupInductionVariable(L))
     return false;
-  }
-  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
 
   MachineBasicBlock *LastMBB = L->getExitingBlock();
   // Don't generate hw loop if the loop has more than one exit.
-  if (LastMBB == 0) {
+  if (LastMBB == 0)
     return false;
-  }
+
   MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+  if (LastI == LastMBB->end())
+    return false;
+
+  // Ensure the loop has a preheader: the loop instruction will be
+  // placed there.
+  bool NewPreheader = false;
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = createPreheaderForLoop(L);
+    if (!Preheader)
+      return false;
+    NewPreheader = true;
+  }
+  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+  SmallVector<MachineInstr*, 2> OldInsts;
+  // Are we able to determine the trip count for the loop?
+  CountValue *TripCount = getLoopTripCount(L, OldInsts);
+  if (TripCount == 0)
+    return false;
+
+  // Is the trip count available in the preheader?
+  if (TripCount->isReg()) {
+    // There will be a use of the register inserted into the preheader,
+    // so make sure that the register is actually defined at that point.
+    MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
+    MachineBasicBlock *BBDef = TCDef->getParent();
+    if (!NewPreheader) {
+      if (!MDT->dominates(BBDef, Preheader))
+        return false;
+    } else {
+      // If we have just created a preheader, the dominator tree won't be
+      // aware of it.  Check if the definition of the register dominates
+      // the header, but is not the header itself.
+      if (!MDT->properlyDominates(BBDef, L->getHeader()))
+        return false;
+    }
+  }
 
   // Determine the loop start.
   MachineBasicBlock *LoopStart = L->getTopBlock();
   if (L->getLoopLatch() != LastMBB) {
     // When the exit and latch are not the same, use the latch block as the
     // start.
-    // The loop start address is used only after the 1st iteration, and the loop
-    // latch may contains instrs. that need to be executed after the 1st iter.
+    // The loop start address is used only after the 1st iteration, and the
+    // loop latch may contains instrs. that need to be executed after the
+    // first iteration.
     LoopStart = L->getLoopLatch();
     // Make sure the latch is a successor of the exit, otherwise it won't work.
-    if (!LastMBB->isSuccessor(LoopStart)) {
+    if (!LastMBB->isSuccessor(LoopStart))
       return false;
-    }
   }
 
-  // Convert the loop to a hardware loop
+  // Convert the loop to a hardware loop.
   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+  DebugLoc DL;
+  if (InsertPos != Preheader->end())
+    DL = InsertPos->getDebugLoc();
 
   if (TripCount->isReg()) {
     // Create a copy of the loop count register.
-    MachineFunction *MF = LastMBB->getParent();
-    const TargetRegisterClass *RC =
-      MF->getRegInfo().getRegClass(TripCount->getReg());
-    unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
-            TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
-    if (TripCount->isNeg()) {
-      unsigned CountReg1 = CountReg;
-      CountReg = MF->getRegInfo().createVirtualRegister(RC);
-      BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
-              TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
-    }
-
+    unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+    BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
+      .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
     // Add the Loop instruction to the beginning of the loop.
-    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
-            TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+    BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+      .addMBB(LoopStart)
+      .addReg(CountReg);
   } else {
-    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
-    // Add the Loop immediate instruction to the beginning of the loop.
+    assert(TripCount->isImm() && "Expecting immediate value for trip count");
+    // Add the Loop immediate instruction to the beginning of the loop,
+    // if the immediate fits in the instructions.  Otherwise, we need to
+    // create a new virtual register.
     int64_t CountImm = TripCount->getImm();
-    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
-            TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+    if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
+      unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
+        .addImm(CountImm);
+      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+        .addMBB(LoopStart).addReg(CountReg);
+    } else
+      BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
+        .addMBB(LoopStart).addImm(CountImm);
   }
 
-  // Make sure the loop start always has a reference in the CFG.  We need to
-  // create a BlockAddress operand to get this mechanism to work both the
+  // Make sure the loop start always has a reference in the CFG.  We need
+  // to create a BlockAddress operand to get this mechanism to work both the
   // MachineBasicBlock and BasicBlock objects need the flag set.
   LoopStart->setHasAddressTaken();
   // This line is needed to set the hasAddressTaken flag on the BasicBlock
-  // object
+  // object.
   BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
 
   // Replace the loop branch with an endloop instruction.
-  DebugLoc dl = LastI->getDebugLoc();
-  BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+  DebugLoc LastIDL = LastI->getDebugLoc();
+  BuildMI(*LastMBB, LastI, LastIDL,
+          TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
 
   // The loop ends with either:
   //  - a conditional branch followed by an unconditional branch, or
   //  - a conditional branch to the loop start.
   if (LastI->getOpcode() == Hexagon::JMP_c ||
       LastI->getOpcode() == Hexagon::JMP_cNot) {
-    // delete one and change/add an uncond. branch to out of the loop
+    // Delete one and change/add an uncond. branch to out of the loop.
     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
     LastI = LastMBB->erase(LastI);
     if (!L->contains(BranchTarget)) {
-      if (LastI != LastMBB->end()) {
-        TII->RemoveBranch(*LastMBB);
-      }
+      if (LastI != LastMBB->end())
+        LastI = LastMBB->erase(LastI);
       SmallVector<MachineOperand, 0> Cond;
-      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
     }
   } else {
     // Conditional branch to loop start; just delete it.
@@ -538,110 +1142,413 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
   }
   delete TripCount;
 
+  // The induction operation and the comparison may now be
+  // unneeded. If these are unneeded, then remove them.
+  for (unsigned i = 0; i < OldInsts.size(); ++i)
+    removeIfDead(OldInsts[i]);
+
   ++NumHWLoops;
   return true;
 }
 
-/// createHexagonFixupHwLoops - Factory for creating the hardware loop
-/// phase.
-FunctionPass *llvm::createHexagonFixupHwLoops() {
-  return new HexagonFixupHwLoops();
+
+bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
+                                            MachineInstr *CmpI) {
+  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
+
+  MachineBasicBlock *BB = BumpI->getParent();
+  if (CmpI->getParent() != BB)
+    return false;
+
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+  // Check if things are in order to begin with.
+  for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+    if (&*I == CmpI)
+      return true;
+
+  // Out of order.
+  unsigned PredR = CmpI->getOperand(0).getReg();
+  bool FoundBump = false;
+  instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt);
+  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
+    MachineInstr *In = &*I;
+    for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
+      MachineOperand &MO = In->getOperand(i);
+      if (MO.isReg() && MO.isUse()) {
+        if (MO.getReg() == PredR)  // Found an intervening use of PredR.
+          return false;
+      }
+    }
+
+    if (In == BumpI) {
+      instr_iterator After = BumpI;
+      instr_iterator From = CmpI;
+      BB->splice(llvm::next(After), BB, From);
+      FoundBump = true;
+      break;
+    }
+  }
+  assert (FoundBump && "Cannot determine instruction order");
+  return FoundBump;
 }
 
-bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
 
-  bool Changed = fixupLoopInstrs(MF);
-  return Changed;
+MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
+  MachineInstr *DI = MRI->getVRegDef(R);
+  unsigned DOpc = DI->getOpcode();
+  switch (DOpc) {
+    case Hexagon::TFRI:
+    case Hexagon::TFRI64:
+    case Hexagon::CONST32_Int_Real:
+    case Hexagon::CONST64_Int_Real:
+      return DI;
+  }
+  return 0;
 }
 
-/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
-/// loop instruction then we need to set the LC0 and SA0 registers
-/// explicitly instead of using LOOP(start,count).  This function
-/// checks the distance, and generates register assignments if needed.
-///
-/// This function makes two passes over the basic blocks.  The first
-/// pass computes the offset of the basic block from the start.
-/// The second pass checks all the loop instructions.
-bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
-
-  // Offset of the current instruction from the start.
-  unsigned InstOffset = 0;
-  // Map for each basic block to it's first instruction.
-  DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
-
-  // First pass - compute the offset of each basic block.
-  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
-       MBB != MBBe; ++MBB) {
-    BlockToInstOffset[MBB] = InstOffset;
-    InstOffset += (MBB->size() * 4);
-  }
-
-  // Second pass - check each loop instruction to see if it needs to
-  // be converted.
-  InstOffset = 0;
-  bool Changed = false;
-  RegScavenger RS;
-
-  // Loop over all the basic blocks.
-  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
-       MBB != MBBe; ++MBB) {
-    InstOffset = BlockToInstOffset[MBB];
-    RS.enterBasicBlock(MBB);
-
-    // Loop over all the instructions.
-    MachineBasicBlock::iterator MIE = MBB->end();
-    MachineBasicBlock::iterator MII = MBB->begin();
-    while (MII != MIE) {
-      if (isHardwareLoop(MII)) {
-        RS.forward(MII);
-        assert(MII->getOperand(0).isMBB() &&
-               "Expect a basic block as loop operand");
-        int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
-        diff = (diff > 0 ? diff : -diff);
-        if ((unsigned)diff > MAX_LOOP_DISTANCE) {
-          // Convert to explicity setting LC0 and SA0.
-          convertLoopInstr(MF, MII, RS);
-          MII = MBB->erase(MII);
-          Changed = true;
-        } else {
-          ++MII;
+
+int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
+  if (MO.isImm())
+    return MO.getImm();
+  assert(MO.isReg());
+  unsigned R = MO.getReg();
+  MachineInstr *DI = defWithImmediate(R);
+  assert(DI && "Need an immediate operand");
+  // All currently supported "define-with-immediate" instructions have the
+  // actual immediate value in the operand(1).
+  int64_t v = DI->getOperand(1).getImm();
+  return v;
+}
+
+
+void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
+  if (MO.isImm()) {
+    MO.setImm(Val);
+    return;
+  }
+
+  assert(MO.isReg());
+  unsigned R = MO.getReg();
+  MachineInstr *DI = defWithImmediate(R);
+  if (MRI->hasOneNonDBGUse(R)) {
+    // If R has only one use, then just change its defining instruction to
+    // the new immediate value.
+    DI->getOperand(1).setImm(Val);
+    return;
+  }
+
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  unsigned NewR = MRI->createVirtualRegister(RC);
+  MachineBasicBlock &B = *DI->getParent();
+  DebugLoc DL = DI->getDebugLoc();
+  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
+    .addImm(Val);
+  MO.setReg(NewR);
+}
+
+
+bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
+  MachineBasicBlock *Header = L->getHeader();
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  MachineBasicBlock *Latch = L->getLoopLatch();
+
+  if (!Header || !Preheader || !Latch)
+    return false;
+
+  // These data structures follow the same concept as the corresponding
+  // ones in findInductionRegister (where some comments are).
+  typedef std::pair<unsigned,int64_t> RegisterBump;
+  typedef std::pair<unsigned,RegisterBump> RegisterInduction;
+  typedef std::set<RegisterInduction> RegisterInductionSet;
+
+  // Register candidates for induction variables, with their associated bumps.
+  RegisterInductionSet IndRegs;
+
+  // Look for induction patterns:
+  //   vreg1 = PHI ..., [ latch, vreg2 ]
+  //   vreg2 = ADD vreg1, imm
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+       I != E && I->isPHI(); ++I) {
+    MachineInstr *Phi = &*I;
+
+    // Have a PHI instruction.
+    for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+      if (Phi->getOperand(i+1).getMBB() != Latch)
+        continue;
+
+      unsigned PhiReg = Phi->getOperand(i).getReg();
+      MachineInstr *DI = MRI->getVRegDef(PhiReg);
+      unsigned UpdOpc = DI->getOpcode();
+      bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+      if (isAdd) {
+        // If the register operand to the add/sub is the PHI we are looking
+        // at, this meets the induction pattern.
+        unsigned IndReg = DI->getOperand(1).getReg();
+        if (MRI->getVRegDef(IndReg) == Phi) {
+          unsigned UpdReg = DI->getOperand(0).getReg();
+          int64_t V = DI->getOperand(2).getImm();
+          IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
         }
-      } else {
-        ++MII;
       }
-      InstOffset += 4;
+    }  // for (i)
+  }  // for (instr)
+
+  if (IndRegs.empty())
+    return false;
+
+  MachineBasicBlock *TB = 0, *FB = 0;
+  SmallVector<MachineOperand,2> Cond;
+  // AnalyzeBranch returns true if it fails to analyze branch.
+  bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+  if (NotAnalyzed)
+    return false;
+
+  // Check if the latch branch is unconditional.
+  if (Cond.empty())
+    return false;
+
+  if (TB != Header && FB != Header)
+    // The latch does not go back to the header.  Not a latch we know and love.
+    return false;
+
+  // Expecting a predicate register as a condition.  It won't be a hardware
+  // predicate register at this point yet, just a vreg.
+  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+  // into Cond, followed by the predicate register.  For non-negated branches
+  // it's just the register.
+  unsigned CSz = Cond.size();
+  if (CSz != 1 && CSz != 2)
+    return false;
+
+  unsigned P = Cond[CSz-1].getReg();
+  MachineInstr *PredDef = MRI->getVRegDef(P);
+
+  if (!PredDef->isCompare())
+    return false;
+
+  SmallSet<unsigned,2> CmpRegs;
+  MachineOperand *CmpImmOp = 0;
+
+  // Go over all operands to the compare and look for immediate and register
+  // operands.  Assume that if the compare has a single register use and a
+  // single immediate operand, then the register is being compared with the
+  // immediate value.
+  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+    MachineOperand &MO = PredDef->getOperand(i);
+    if (MO.isReg()) {
+      // Skip all implicit references.  In one case there was:
+      //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
+      if (MO.isImplicit())
+        continue;
+      if (MO.isUse()) {
+        unsigned R = MO.getReg();
+        if (!defWithImmediate(R)) {
+          CmpRegs.insert(MO.getReg());
+          continue;
+        }
+        // Consider the register to be the "immediate" operand.
+        if (CmpImmOp)
+          return false;
+        CmpImmOp = &MO;
+      }
+    } else if (MO.isImm()) {
+      if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
+        return false;
+      CmpImmOp = &MO;
     }
   }
 
-  return Changed;
+  if (CmpRegs.empty())
+    return false;
+
+  // Check if the compared register follows the order we want.  Fix if needed.
+  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
+       I != E; ++I) {
+    // This is a success.  If the register used in the comparison is one that
+    // we have identified as a bumped (updated) induction register, there is
+    // nothing to do.
+    if (CmpRegs.count(I->first))
+      return true;
+
+    // Otherwise, if the register being compared comes out of a PHI node,
+    // and has been recognized as following the induction pattern, and is
+    // compared against an immediate, we can fix it.
+    const RegisterBump &RB = I->second;
+    if (CmpRegs.count(RB.first)) {
+      if (!CmpImmOp)
+        return false;
+
+      int64_t CmpImm = getImmediate(*CmpImmOp);
+      int64_t V = RB.second;
+      if (V > 0 && CmpImm+V < CmpImm)  // Overflow (64-bit).
+        return false;
+      if (V < 0 && CmpImm+V > CmpImm)  // Overflow (64-bit).
+        return false;
+      CmpImm += V;
+      // Some forms of cmp-immediate allow u9 and s10.  Assume the worst case
+      // scenario, i.e. an 8-bit value.
+      if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
+        return false;
+
+      // Make sure that the compare happens after the bump.  Otherwise,
+      // after the fixup, the compare would use a yet-undefined register.
+      MachineInstr *BumpI = MRI->getVRegDef(I->first);
+      bool Order = orderBumpCompare(BumpI, PredDef);
+      if (!Order)
+        return false;
+
+      // Finally, fix the compare instruction.
+      setImmediate(*CmpImmOp, CmpImm);
+      for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+        MachineOperand &MO = PredDef->getOperand(i);
+        if (MO.isReg() && MO.getReg() == RB.first) {
+          MO.setReg(I->first);
+          return true;
+        }
+      }
+    }
+  }
 
+  return false;
 }
 
-/// convertLoopInstr - convert a loop instruction to a sequence of instructions
-/// that set the lc and sa register explicitly.
-void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
-                                           MachineBasicBlock::iterator &MII,
-                                           RegScavenger &RS) {
-  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
-  MachineBasicBlock *MBB = MII->getParent();
-  DebugLoc DL = MII->getDebugLoc();
-  unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
-
-  // First, set the LC0 with the trip count.
-  if (MII->getOperand(1).isReg()) {
-    // Trip count is a register
-    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
-      .addReg(MII->getOperand(1).getReg());
+
+/// \brief Create a preheader for a given loop.
+MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
+      MachineLoop *L) {
+  if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+    return TmpPH;
+
+  MachineBasicBlock *Header = L->getHeader();
+  MachineBasicBlock *Latch = L->getLoopLatch();
+  MachineFunction *MF = Header->getParent();
+  DebugLoc DL;
+
+  if (!Latch || Header->hasAddressTaken())
+    return 0;
+
+  typedef MachineBasicBlock::instr_iterator instr_iterator;
+
+  // Verify that all existing predecessors have analyzable branches
+  // (or no branches at all).
+  typedef std::vector<MachineBasicBlock*> MBBVector;
+  MBBVector Preds(Header->pred_begin(), Header->pred_end());
+  SmallVector<MachineOperand,2> Tmp1;
+  MachineBasicBlock *TB = 0, *FB = 0;
+
+  if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+    return 0;
+
+  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+    MachineBasicBlock *PB = *I;
+    if (PB != Latch) {
+      bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+      if (NotAnalyzed)
+        return 0;
+    }
+  }
+
+  MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
+  MF->insert(Header, NewPH);
+
+  if (Header->pred_size() > 2) {
+    // Ensure that the header has only two predecessors: the preheader and
+    // the loop latch.  Any additional predecessors of the header should
+    // join at the newly created preheader.  Inspect all PHI nodes from the
+    // header and create appropriate corresponding PHI nodes in the preheader.
+
+    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+         I != E && I->isPHI(); ++I) {
+      MachineInstr *PN = &*I;
+
+      const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
+      MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
+      NewPH->insert(NewPH->end(), NewPN);
+
+      unsigned PR = PN->getOperand(0).getReg();
+      const TargetRegisterClass *RC = MRI->getRegClass(PR);
+      unsigned NewPR = MRI->createVirtualRegister(RC);
+      NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
+
+      // Copy all non-latch operands of a header's PHI node to the newly
+      // created PHI node in the preheader.
+      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+        unsigned PredR = PN->getOperand(i).getReg();
+        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+        if (PredB == Latch)
+          continue;
+
+        NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
+        NewPN->addOperand(MachineOperand::CreateMBB(PredB));
+      }
+
+      // Remove copied operands from the old PHI node and add the value
+      // coming from the preheader's PHI.
+      for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+        MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+        if (PredB != Latch) {
+          PN->RemoveOperand(i+1);
+          PN->RemoveOperand(i);
+        }
+      }
+      PN->addOperand(MachineOperand::CreateReg(NewPR, false));
+      PN->addOperand(MachineOperand::CreateMBB(NewPH));
+    }
+
   } else {
-    // Trip count is an immediate.
-    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
-      .addImm(MII->getOperand(1).getImm());
-    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
-      .addReg(Scratch);
-  }
-  // Then, set the SA0 with the loop start address.
-  BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
-    .addMBB(MII->getOperand(0).getMBB());
-  BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+    assert(Header->pred_size() == 2);
+
+    // The header has only two predecessors, but the non-latch predecessor
+    // is not a preheader (e.g. it has other successors, etc.)
+    // In such a case we don't need any extra PHI nodes in the new preheader,
+    // all we need is to adjust existing PHIs in the header to now refer to
+    // the new preheader.
+    for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+         I != E && I->isPHI(); ++I) {
+      MachineInstr *PN = &*I;
+      for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+        MachineOperand &MO = PN->getOperand(i+1);
+        if (MO.getMBB() != Latch)
+          MO.setMBB(NewPH);
+      }
+    }
+  }
+
+  // "Reroute" the CFG edges to link in the new preheader.
+  // If any of the predecessors falls through to the header, insert a branch
+  // to the new preheader in that place.
+  SmallVector<MachineOperand,1> Tmp2;
+  SmallVector<MachineOperand,1> EmptyCond;
+
+  TB = FB = 0;
+
+  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+    MachineBasicBlock *PB = *I;
+    if (PB != Latch) {
+      Tmp2.clear();
+      bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
+      (void)NotAnalyzed; // supress compiler warning
+      assert (!NotAnalyzed && "Should be analyzable!");
+      if (TB != Header && (Tmp2.empty() || FB != Header))
+        TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
+      PB->ReplaceUsesOfBlockWith(Header, NewPH);
+    }
+  }
+
+  // It can happen that the latch block will fall through into the header.
+  // Insert an unconditional branch to the header.
+  TB = FB = 0;
+  bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
+  (void)LatchNotAnalyzed; // supress compiler warning
+  assert (!LatchNotAnalyzed && "Should be analyzable!");
+  if (!TB && !FB)
+    TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
+
+  // Finally, the branch from the preheader to the header.
+  TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
+  NewPH->addSuccessor(Header);
+
+  return NewPH;
 }
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 5499134eb98b..8fc9ba1ee8cf 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -12,20 +12,32 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hexagon-isel"
+#include "Hexagon.h"
 #include "HexagonISelLowering.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-
 using namespace llvm;
 
+static
+cl::opt<unsigned>
+MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
+  cl::Hidden, cl::init(2),
+  cl::desc("Maximum number of uses of a global address such that we still us a"
+           "constant extended instruction"));
 
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
 //===----------------------------------------------------------------------===//
 
+namespace llvm {
+  void initializeHexagonDAGToDAGISelPass(PassRegistry&);
+}
+
 //===--------------------------------------------------------------------===//
 /// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
 /// instructions for SelectionDAG operations.
@@ -39,19 +51,24 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
   // Keep a reference to HexagonTargetMachine.
   HexagonTargetMachine& TM;
   const HexagonInstrInfo *TII;
-
+  DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
 public:
-  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
-    : SelectionDAGISel(targetmachine),
+  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+                               CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(targetmachine, OptLevel),
       Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
       TM(targetmachine),
       TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
-
+    initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
   }
+  bool hasNumUsesBelowThresGA(SDNode *N) const;
 
   SDNode *Select(SDNode *N);
 
   // Complex Pattern Selectors.
+  inline bool foldGlobalAddress(SDValue &N, SDValue &R);
+  inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
+  bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
   bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
   bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
   bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
@@ -94,8 +111,56 @@ public:
   SDNode *SelectConstant(SDNode *N);
   SDNode *SelectConstantFP(SDNode *N);
   SDNode *SelectAdd(SDNode *N);
+  bool isConstExtProfitable(SDNode *N) const;
+
+// XformMskToBitPosU5Imm - Returns the bit position which
+// the single bit 32 bit mask represents.
+// Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+  int32_t bitPos;
+  bitPos = Log2_32(Imm);
+  assert(bitPos >= 0 && bitPos < 32 &&
+         "Constant out of range for 32 BitPos Memops");
+  return CurDAG->getTargetConstant(bitPos, MVT::i32);
+}
+
+// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+  return XformMskToBitPosU5Imm(Imm);
+}
+
+// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
+// mask represents. Used in Clr and Set bit immediate memops.
+SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+  return XformMskToBitPosU5Imm(Imm);
+}
+
+// Return true if there is exactly one bit set in V, i.e., if V is one of the
+// following integers: 2^0, 2^1, ..., 2^31.
+bool ImmIsSingleBit(uint32_t v) const {
+  uint32_t c = CountPopulation_64(v);
+  // Only return true if we counted 1 bit.
+  return c == 1;
+}
+
+// XformM5ToU5Imm - Return a target constant with the specified value, of type
+// i32 where the negative literal is transformed into a positive literal for
+// use in -= memops.
+inline SDValue XformM5ToU5Imm(signed Imm) {
+   assert( (Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
+   return CurDAG->getTargetConstant( - Imm, MVT::i32);
+}
+
+
+// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+// [1..128], used in cmpb.gtu instructions.
+inline SDValue XformU7ToU7M1Imm(signed Imm) {
+  assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+  return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
+}
 
-  // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
 #include "HexagonGenDAGISel.inc"
 };
 }  // end anonymous namespace
@@ -104,10 +169,23 @@ public:
 /// createHexagonISelDag - This pass converts a legalized DAG into a
 /// Hexagon-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
-  return new HexagonDAGToDAGISel(TM);
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new HexagonDAGToDAGISel(TM, OptLevel);
 }
 
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
+  PassInfo *PI = new PassInfo(Name, "hexagon-isel",
+                              &SelectionDAGISel::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
 static bool IsS11_0_Offset(SDNode * S) {
     ConstantSDNode *N = cast<ConstantSDNode>(S);
 
@@ -606,8 +684,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
   // Offset value must be within representable range
   // and must have correct alignment properties.
   if (TII->isValidAutoIncImm(StoredVT, Val)) {
-    SDValue Ops[] = { Value, Base,
-                      CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+    SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
+                     Chain};
     unsigned Opcode = 0;
 
     // Figure out the post inc version of opcode.
@@ -1507,3 +1585,79 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   OutOps.push_back(Op1);
   return false;
 }
+
+bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
+  unsigned UseCount = 0;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    UseCount++;
+  }
+
+  return (UseCount <= 1);
+
+}
+
+//===--------------------------------------------------------------------===//
+// Return 'true' if use count of the global address is below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
+  assert(N->getOpcode() == ISD::TargetGlobalAddress &&
+         "Expecting a target global address");
+
+  // Always try to fold the address.
+  if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+    return true;
+
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+  DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
+    GlobalAddressUseCountMap.find(GA->getGlobal());
+
+  if (GI == GlobalAddressUseCountMap.end())
+    return false;
+
+  return GI->second <= MaxNumOfUsesForConstExtenders;
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the non GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
+  return foldGlobalAddressImpl(N, R, false);
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
+  return foldGlobalAddressImpl(N, R, true);
+}
+
+//===--------------------------------------------------------------------===//
+// Fold offset of the global address if number of uses are below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
+                                                bool ShouldLookForGP) {
+  if (N.getOpcode() == ISD::ADD) {
+    SDValue N0 = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
+        (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
+      ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
+      GlobalAddressSDNode *GA =
+        dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
+
+      if (Const && GA &&
+          (GA->getOpcode() == ISD::TargetGlobalAddress)) {
+        if ((N0.getOpcode() == HexagonISD::CONST32) &&
+                !hasNumUsesBelowThresGA(GA))
+            return false;
+        R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
+                                          Const->getDebugLoc(),
+                                          N.getValueType(),
+                                          GA->getOffset() +
+                                          (uint64_t)Const->getSExtValue());
+        return true;
+      }
+    }
+  }
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1c891f14d8fe..15858a9368ae 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -13,17 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonISelLowering.h"
-#include "HexagonTargetMachine.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "HexagonTargetObjectFile.h"
 #include "HexagonSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +25,13 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -103,6 +103,16 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
     return false;
   }
+  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
     ofst = State.AllocateStack(4, 4);
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
@@ -304,15 +314,9 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
   // Analyze return values of ISD::RET
   CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
 
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign &VA = RVLocs[i];
@@ -321,12 +325,17 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
 
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+    RetOps.push_back(Flag);
 
-  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other,
+                     &RetOps[0], RetOps.size());
 }
 
 
@@ -608,7 +617,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
 
 // TODO: Put this function along with the other isS* functions in
 // HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
-// functions defined in HexagonImmediates.td.
+// functions defined in HexagonOperands.td.
 static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
   ConstantSDNode *N = cast<ConstantSDNode>(S);
 
@@ -1016,8 +1025,8 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
   DebugLoc dl = Op.getDebugLoc();
   Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
 
-  HexagonTargetObjectFile &TLOF =
-    (HexagonTargetObjectFile&)getObjFileLowering();
+  const HexagonTargetObjectFile &TLOF =
+      static_cast<const HexagonTargetObjectFile &>(getObjFileLowering());
   if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
     return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
   }
@@ -1025,6 +1034,14 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
   return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
 }
 
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue BA_SD =  DAG.getTargetBlockAddress(BA, MVT::i32);
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD);
+}
+
 //===----------------------------------------------------------------------===//
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
@@ -1053,8 +1070,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     setPrefLoopAlignment(4);
 
     // Limits for inline expansion of memcpy/memmove
-    maxStoresPerMemcpy = 6;
-    maxStoresPerMemmove = 6;
+    MaxStoresPerMemcpy = 6;
+    MaxStoresPerMemmove = 6;
 
     //
     // Library calls for unsupported operations
@@ -1298,6 +1315,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     // Custom legalize GlobalAddress nodes into CONST32.
     setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
     setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+    setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
     // Truncate action?
     setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
 
@@ -1343,7 +1361,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
 
     }
 
-    setOperationAction(ISD::BR_CC, MVT::Other, Expand);
     setOperationAction(ISD::BRIND, MVT::Other, Expand);
     if (EmitJumpTables) {
       setOperationAction(ISD::BR_JT, MVT::Other, Custom);
@@ -1353,7 +1370,12 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     // Increase jump tables cutover to 5, was 4.
     setMinimumJumpTableEntries(5);
 
+    setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+    setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+    setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+    setOperationAction(ISD::BR_CC, MVT::i1,  Expand);
     setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+    setOperationAction(ISD::BR_CC, MVT::i64, Expand);
 
     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -1364,11 +1386,41 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     setOperationAction(ISD::FSIN , MVT::f32, Expand);
     setOperationAction(ISD::FCOS , MVT::f32, Expand);
     setOperationAction(ISD::FREM , MVT::f32, Expand);
+    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+    // In V4, we have double word add/sub with carry. The problem with
+    // modelling this instruction is that it produces 2 results - Rdd and Px.
+    // To model update of Px, we will have to use Defs[p0..p3] which will
+    // cause any predicate live range to spill. So, we pretend we dont't
+    // have these instructions.
+    setOperationAction(ISD::ADDE, MVT::i8, Expand);
+    setOperationAction(ISD::ADDE, MVT::i16, Expand);
+    setOperationAction(ISD::ADDE, MVT::i32, Expand);
+    setOperationAction(ISD::ADDE, MVT::i64, Expand);
+    setOperationAction(ISD::SUBE, MVT::i8, Expand);
+    setOperationAction(ISD::SUBE, MVT::i16, Expand);
+    setOperationAction(ISD::SUBE, MVT::i32, Expand);
+    setOperationAction(ISD::SUBE, MVT::i64, Expand);
+    setOperationAction(ISD::ADDC, MVT::i8, Expand);
+    setOperationAction(ISD::ADDC, MVT::i16, Expand);
+    setOperationAction(ISD::ADDC, MVT::i32, Expand);
+    setOperationAction(ISD::ADDC, MVT::i64, Expand);
+    setOperationAction(ISD::SUBC, MVT::i8, Expand);
+    setOperationAction(ISD::SUBC, MVT::i16, Expand);
+    setOperationAction(ISD::SUBC, MVT::i32, Expand);
+    setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
     setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ , MVT::i64, Expand);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
     setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ , MVT::i64, Expand);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
     setOperationAction(ISD::ROTL , MVT::i32, Expand);
     setOperationAction(ISD::ROTR , MVT::i32, Expand);
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -1430,6 +1482,8 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
     default: return 0;
     case HexagonISD::CONST32:     return "HexagonISD::CONST32";
+    case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+    case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
     case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
     case HexagonISD::CMPICC:      return "HexagonISD::CMPICC";
     case HexagonISD::CMPFCC:      return "HexagonISD::CMPFCC";
@@ -1478,6 +1532,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
     case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
     case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
+    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
     case ISD::VASTART:            return LowerVASTART(Op, DAG);
     case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
 
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index fe6c905adfcb..3279cc652434 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -16,9 +16,9 @@
 #define Hexagon_ISELLOWERING_H
 
 #include "Hexagon.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace HexagonISD {
@@ -27,6 +27,7 @@ namespace llvm {
 
       CONST32,
       CONST32_GP,  // For marking data present in GP.
+      CONST32_Int_Real,
       FCONST32,
       SETCC,
       ADJDYNALLOC,
@@ -50,6 +51,17 @@ namespace llvm {
       BARRIER,     // Memory barrier.
       WrapperJT,
       WrapperCP,
+      WrapperCombineII,
+      WrapperCombineRR,
+      WrapperCombineRI_V4,
+      WrapperCombineIR_V4,
+      WrapperPackhl,
+      WrapperSplatB,
+      WrapperSplatH,
+      WrapperShuffEB,
+      WrapperShuffEH,
+      WrapperShuffOB,
+      WrapperShuffOH,
       TC_RETURN
     };
   }
@@ -95,6 +107,7 @@ namespace llvm {
                                  DebugLoc dl, SelectionDAG &DAG,
                                  SmallVectorImpl<SDValue> &InVals) const;
     SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
                       SmallVectorImpl<SDValue> &InVals) const;
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
deleted file mode 100644
index 18692c4dcc5e..000000000000
--- a/lib/Target/Hexagon/HexagonImmediates.td
+++ /dev/null
@@ -1,508 +0,0 @@
-//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illnois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// From IA64's InstrInfo file
-def s32Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s16Imm : Operand<i32> {
-  let PrintMethod = "printImmOperand";
-}
-
-def s12Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s11Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s11_0Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s11_1Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s11_2Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s11_3Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s10Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s9Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s8Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s8Imm64 : Operand<i64> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s6Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s4Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s4_0Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s4_1Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s4_2Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def s4_3Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u64Imm : Operand<i64> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u32Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u16Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u16_0Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u16_1Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u16_2Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u11_3Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u10Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u9Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u8Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u7Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u6Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u6_0Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u6_1Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u6_2Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u6_3Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u5Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u4Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u3Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u2Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def u1Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def n8Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def m6Imm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printImmOperand";
-}
-
-def nOneImm : Operand<i32> {
-  // For now, we use a generic print function for all operands.
-  let PrintMethod = "printNOneImmOperand";
-}
-
-//
-// Immediate predicates
-//
-def s32ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<32>(v);
-}]>;
-
-def s32_24ImmPred  : PatLeaf<(i32 imm), [{
-  // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
-  // extended field that is a multiple of 0x1000000.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<32,24>(v);
-}]>;
-
-def s32_16s8ImmPred  : PatLeaf<(i32 imm), [{
-  // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
-  // extended field that is a multiple of 0x10000.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<24,16>(v);
-}]>;
-
-def s16ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<16>(v);
-}]>;
-
-
-def s13ImmPred  : PatLeaf<(i32 imm), [{
-  // immS13 predicate - True if the immediate fits in a 13-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<13>(v);
-}]>;
-
-
-def s12ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<12>(v);
-}]>;
-
-def s11_0ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<11>(v);
-}]>;
-
-
-def s11_1ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,1>(v);
-}]>;
-
-
-def s11_2ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,2>(v);
-}]>;
-
-
-def s11_3ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,3>(v);
-}]>;
-
-
-def s10ImmPred  : PatLeaf<(i32 imm), [{
-  // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<10>(v);
-}]>;
-
-
-def s9ImmPred  : PatLeaf<(i32 imm), [{
-  // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<9>(v);
-}]>;
-
-
-def s8ImmPred  : PatLeaf<(i32 imm), [{
-  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<8>(v);
-}]>;
-
-
-def s8Imm64Pred  : PatLeaf<(i64 imm), [{
-  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<8>(v);
-}]>;
-
-
-def s6ImmPred  : PatLeaf<(i32 imm), [{
-  // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<6>(v);
-}]>;
-
-
-def s4_0ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<4>(v);
-}]>;
-
-
-def s4_1ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field of 2.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<4,1>(v);
-}]>;
-
-
-def s4_2ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field that is a multiple of 4.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<4,2>(v);
-}]>;
-
-
-def s4_3ImmPred  : PatLeaf<(i32 imm), [{
-  // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
-  // field that is a multiple of 8.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<4,3>(v);
-}]>;
-
-
-def u64ImmPred  : PatLeaf<(i64 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  // Adding "N ||" to suppress gcc unused warning.
-  return (N || true);
-}]>;
-
-def u32ImmPred  : PatLeaf<(i32 imm), [{
-  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<32>(v);
-}]>;
-
-def u16ImmPred  : PatLeaf<(i32 imm), [{
-  // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<16>(v);
-}]>;
-
-def u16_s8ImmPred  : PatLeaf<(i32 imm), [{
-  // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
-  // extended s8 field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<16,8>(v);
-}]>;
-
-def u9ImmPred  : PatLeaf<(i32 imm), [{
-  // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<9>(v);
-}]>;
-
-
-def u8ImmPred  : PatLeaf<(i32 imm), [{
-  // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<8>(v);
-}]>;
-
-def u7ImmPred  : PatLeaf<(i32 imm), [{
-  // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<7>(v);
-}]>;
-
-
-def u6ImmPred  : PatLeaf<(i32 imm), [{
-  // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<6>(v);
-}]>;
-
-def u6_0ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field. Same as u6ImmPred.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<6>(v);
-}]>;
-
-def u6_1ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field that is 1 bit alinged - multiple of 2.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<6,1>(v);
-}]>;
-
-def u6_2ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field that is 2 bits alinged - multiple of 4.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<6,2>(v);
-}]>;
-
-def u6_3ImmPred  : PatLeaf<(i32 imm), [{
-  // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
-  // field that is 3 bits alinged - multiple of 8.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<6,3>(v);
-}]>;
-
-def u5ImmPred  : PatLeaf<(i32 imm), [{
-  // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<5>(v);
-}]>;
-
-
-def u3ImmPred  : PatLeaf<(i32 imm), [{
-  // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<3>(v);
-}]>;
-
-
-def u2ImmPred  : PatLeaf<(i32 imm), [{
-  // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<2>(v);
-}]>;
-
-
-def u1ImmPred  : PatLeaf<(i1 imm), [{
-  // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<1>(v);
-}]>;
-
-def m6ImmPred  : PatLeaf<(i32 imm), [{
-  // m6ImmPred predicate - True if the immediate is negative and fits in
-  // a 6-bit negative number.
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<6>(v);
-}]>;
-
-//InN means negative integers in [-(2^N - 1), 0]
-def n8ImmPred  : PatLeaf<(i32 imm), [{
-  // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return (-255 <= v && v <= 0);
-}]>;
-
-def nOneImmPred  : PatLeaf<(i32 imm), [{
-  // nOneImmPred predicate - True if the immediate is -1.
-  int64_t v = (int64_t)N->getSExtValue();
-  return (-1 == v);
-}]>;
-
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index a64c7a18164f..587fa7d7f10e 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -13,28 +13,77 @@
 //                    *** Must match HexagonBaseInfo.h ***
 //===----------------------------------------------------------------------===//
 
-class Type<bits<5> t> {
+class IType<bits<5> t> {
   bits<5> Value = t;
 }
-def TypePSEUDO : Type<0>;
-def TypeALU32  : Type<1>;
-def TypeCR     : Type<2>;
-def TypeJR     : Type<3>;
-def TypeJ      : Type<4>;
-def TypeLD     : Type<5>;
-def TypeST     : Type<6>;
-def TypeSYSTEM : Type<7>;
-def TypeXTYPE  : Type<8>;
-def TypeMARKER : Type<31>;
+def TypePSEUDO : IType<0>;
+def TypeALU32  : IType<1>;
+def TypeCR     : IType<2>;
+def TypeJR     : IType<3>;
+def TypeJ      : IType<4>;
+def TypeLD     : IType<5>;
+def TypeST     : IType<6>;
+def TypeSYSTEM : IType<7>;
+def TypeXTYPE  : IType<8>;
+def TypeENDLOOP: IType<31>;
+
+// Maintain list of valid subtargets for each instruction.
+class SubTarget<bits<4> value> {
+  bits<4> Value = value;
+}
+
+def HasV2SubT     : SubTarget<0xf>;
+def HasV2SubTOnly : SubTarget<0x1>;
+def NoV2SubT      : SubTarget<0x0>;
+def HasV3SubT     : SubTarget<0xe>;
+def HasV3SubTOnly : SubTarget<0x2>;
+def NoV3SubT      : SubTarget<0x1>;
+def HasV4SubT     : SubTarget<0xc>;
+def NoV4SubT      : SubTarget<0x3>;
+def HasV5SubT     : SubTarget<0x8>;
+def NoV5SubT      : SubTarget<0x7>;
+
+// Addressing modes for load/store instructions
+class AddrModeType<bits<3> value> {
+  bits<3> Value = value;
+}
+
+def NoAddrMode     : AddrModeType<0>;  // No addressing mode
+def Absolute       : AddrModeType<1>;  // Absolute addressing mode
+def AbsoluteSet    : AddrModeType<2>;  // Absolute set addressing mode
+def BaseImmOffset  : AddrModeType<3>;  // Indirect with offset
+def BaseLongOffset : AddrModeType<4>;  // Indirect with long offset
+def BaseRegOffset  : AddrModeType<5>;  // Indirect with register offset
+
+class MemAccessSize<bits<3> value> {
+  bits<3> Value = value;
+}
+
+def NoMemAccess      : MemAccessSize<0>;// Not a memory acces instruction.
+def ByteAccess       : MemAccessSize<1>;// Byte access instruction (memb).
+def HalfWordAccess   : MemAccessSize<2>;// Half word access instruction (memh).
+def WordAccess       : MemAccessSize<3>;// Word access instrution (memw).
+def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+
 
 //===----------------------------------------------------------------------===//
 //                         Intruction Class Declaration +
 //===----------------------------------------------------------------------===//
 
-class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
-                  string cstr, InstrItinClass itin, Type type> : Instruction {
-  field bits<32> Inst;
+class OpcodeHexagon {
+  field bits<32> Inst = ?; // Default to an invalid insn.
+  bits<4> IClass = 0; // ICLASS
+  bits<2> IParse = 0; // Parse bits.
+
+  let Inst{31-28} = IClass;
+  let Inst{15-14} = IParse;
+
+  bits<1> zero = 0;
+}
 
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+                  string cstr, InstrItinClass itin, IType type>
+  : Instruction, OpcodeHexagon {
   let Namespace = "Hexagon";
 
   dag OutOperandList = outs;
@@ -45,20 +94,63 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   let Itinerary = itin;
   let Size = 4;
 
-  // *** Must match HexagonBaseInfo.h ***
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
   // Instruction type according to the ISA.
-  Type HexagonType = type;
-  let TSFlags{4-0} = HexagonType.Value;
+  IType Type = type;
+  let TSFlags{4-0} = Type.Value;
+
   // Solo instructions, i.e., those that cannot be in a packet with others.
-  bits<1> isHexagonSolo = 0;
-  let TSFlags{5} = isHexagonSolo;
+  bits<1> isSolo = 0;
+  let TSFlags{5} = isSolo;
+
   // Predicated instructions.
   bits<1> isPredicated = 0;
   let TSFlags{6} = isPredicated;
-
-  // Dot new value store instructions.
+  bits<1> isPredicatedFalse = 0;
+  let TSFlags{7} = isPredicatedFalse;
+  bits<1> isPredicatedNew = 0;
+  let TSFlags{8} = isPredicatedNew;
+
+  // New-value insn helper fields.
+  bits<1> isNewValue = 0;
+  let TSFlags{9} = isNewValue; // New-value consumer insn.
+  bits<1> hasNewValue = 0;
+  let TSFlags{10} = hasNewValue; // New-value producer insn.
+  bits<3> opNewValue = 0;
+  let TSFlags{13-11} = opNewValue; // New-value produced operand.
+  bits<2> opNewBits = 0;
+  let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16.
+  bits<1> isNVStorable = 0;
+  let TSFlags{16} = isNVStorable; // Store that can become new-value store.
   bits<1> isNVStore = 0;
-  let TSFlags{8} = isNVStore;
+  let TSFlags{17} = isNVStore; // New-value store insn.
+
+  // Immediate extender helper fields.
+  bits<1> isExtendable = 0;
+  let TSFlags{18} = isExtendable; // Insn may be extended.
+  bits<1> isExtended = 0;
+  let TSFlags{19} = isExtended; // Insn must be extended.
+  bits<3> opExtendable = 0;
+  let TSFlags{22-20} = opExtendable; // Which operand may be extended.
+  bits<1> isExtentSigned = 0;
+  let TSFlags{23} = isExtentSigned; // Signed or unsigned range.
+  bits<5> opExtentBits = 0;
+  let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending.
+
+  // If an instruction is valid on a subtarget (v2-v5), set the corresponding
+  // bit from validSubTargets. v2 is the least significant bit.
+  // By default, instruction is valid on all subtargets.
+  SubTarget validSubTargets = HasV2SubT;
+  let TSFlags{32-29} = validSubTargets.Value;
+
+  // Addressing mode for load/store instructions.
+  AddrModeType addrMode = NoAddrMode;
+  let TSFlags{35-33} = addrMode.Value;
+
+  // Memory access size for mem access instructions (load/store)
+  MemAccessSize accessSize = NoMemAccess;
+  let TSFlags{38-36} = accessSize.Value;
 
   // Fields used for relation models.
   string BaseOpcode = "";
@@ -66,7 +158,15 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   string PredSense = "";
   string PNewValue = "";
   string InputType = "";    // Input is "imm" or "reg" type.
-  // *** The code above must match HexagonBaseInfo.h ***
+  string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
+  string isFloat = "false"; // Set to "true" for the floating-point load/store.
+  string isBrTaken = ""; // Set to "true"/"false" for jump instructions
+
+  let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
+                                    "");
+  let PNewValue = !if(isPredicatedNew, "new", "");
+
+  // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
 }
 
 //===----------------------------------------------------------------------===//
@@ -75,187 +175,143 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // LD Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
-class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<13> imm13;
-}
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>;
 
-class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<13> imm13;
-  let mayLoad = 1;
-}
+let mayLoad = 1 in
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                  string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
 
 // LD Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
-class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
-                 string cstr>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-  bits<13> imm13;
-}
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                 string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
 
 // ST Instruction Class in V2/V3 can take SLOT0 only.
 // ST Instruction Class in V4    can take SLOT0 & SLOT1.
 // Definition of the instruction class CHANGED from V2/V3 to V4.
-class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<13> imm13;
-}
+let mayStore = 1 in
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>;
 
-class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<13> imm13;
-  let mayStore = 1;
-}
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
 
-// SYSTEM Instruction Class in V4 can take SLOT0 only
-// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
-class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<13> imm13;
-}
+let mayStore = 1 in
+class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>;
 
 // ST Instruction Class in V2/V3 can take SLOT0 only.
 // ST Instruction Class in V4    can take SLOT0 & SLOT1.
 // Definition of the instruction class CHANGED from V2/V3 to V4.
-class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
-                 string cstr>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-  bits<13> imm13;
-}
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                 string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>;
 
 // ALU32 Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
-class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> {
-  bits<5>  rd;
-  bits<5>  rs;
-  bits<5>  rt;
-  bits<16> imm16;
-  bits<16> imm16_2;
-}
+class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>;
 
 // ALU64 Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
 // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
-class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> {
-  bits<5>  rd;
-  bits<5>  rs;
-  bits<5>  rt;
-  bits<16> imm16;
-  bits<16> imm16_2;
-}
+class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>;
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
 
-class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
-   string cstr>
-   : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> {
-  bits<5>  rd;
-  bits<5>  rs;
-  bits<5>  rt;
-  bits<16> imm16;
-  bits<16> imm16_2;
-}
 
 // M Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
 // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
-class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-}
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>;
 
 // M Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
 // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
-class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
-    string cstr>
-    : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-}
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+    : MInst<outs, ins, asmstr, pattern, cstr>;
 
 // S Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
 // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
-class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-}
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>;
 
 // S Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
 // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
-class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
-   string cstr>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> {
-//  : InstHexagon<outs, ins, asmstr, pattern, cstr,  S> {
-//  : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-}
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : SInst<outs, ins, asmstr, pattern, cstr>;
 
 // J Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
-class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> {
-  bits<16> imm16;
-}
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>;
 
 // JR Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
-class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> {
-  bits<5>  rs;
-  bits<5>  pu; // Predicate register
-}
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>;
 
 // CR Instruction Class in V2/V3/V4.
 // Definition of the instruction class NOT CHANGED.
-class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> {
-  bits<5> rs;
-  bits<10> imm10;
-}
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>;
 
-class Marker<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> {
-  let isCodeGenOnly = 1;
-  let isPseudo = 1;
-}
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>;
 
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> {
-  let isCodeGenOnly = 1;
-  let isPseudo = 1;
-}
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr="">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
 
 //===----------------------------------------------------------------------===//
 //                         Intruction Classes Definitions -
@@ -265,75 +321,52 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
 //
 // ALU32 patterns
 //.
-class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : ALU32Type<outs, ins, asmstr, pattern> {
-}
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+               string cstr = "">
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
 
-class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : ALU32Type<outs, ins, asmstr, pattern> {
-   let rt{0-4} = 0;
-}
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern,
+               string cstr = "">
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
 
-class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : ALU32Type<outs, ins, asmstr, pattern> {
-  let rt{0-4} = 0;
-}
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+               string cstr = "">
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
 
-class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : ALU32Type<outs, ins, asmstr, pattern> {
-  let rt{0-4} = 0;
-}
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern,
+               string cstr = "">
+   : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
 
 //
 // ALU64 patterns.
 //
-class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : ALU64Type<outs, ins, asmstr, pattern> {
-}
-
-class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : ALU64Type<outs, ins, asmstr, pattern> {
-  let rt{0-4} = 0;
-}
-
-// J Type Instructions.
-class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : JType<outs, ins, asmstr, pattern> {
-}
-
-// JR type Instructions.
-class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : JRType<outs, ins, asmstr, pattern> {
-}
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+               string cstr = "">
+   : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
 
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+               string cstr = "">
+   : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
 
 // Post increment ST Instruction.
-class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
-               string cstr>
-  : STInstPost<outs, ins, asmstr, pattern, cstr> {
-  let rt{0-4} = 0;
-}
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
 
-class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
-                string cstr>
-  : STInstPost<outs, ins, asmstr, pattern, cstr> {
-  let rt{0-4} = 0;
-  let mayStore = 1;
-}
+let mayStore = 1 in
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : STInst<outs, ins, asmstr, pattern, cstr>;
 
 // Post increment LD Instruction.
-class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
-               string cstr>
-  : LDInstPost<outs, ins, asmstr, pattern, cstr> {
-  let rt{0-4} = 0;
-}
-
-class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
-                string cstr>
-  : LDInstPost<outs, ins, asmstr, pattern, cstr> {
-  let rt{0-4} = 0;
-  let mayLoad = 1;
-}
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : LDInst<outs, ins, asmstr, pattern, cstr>;
 
 //===----------------------------------------------------------------------===//
 // V4 Instruction Format Definitions +
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 49741a3d1b20..9fda0da91612 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,9 +17,9 @@
 //                        *** Must match BaseInfo.h ***
 //----------------------------------------------------------------------------//
 
-def TypeMEMOP  : Type<9>;
-def TypeNV     : Type<10>;
-def TypePREFIX : Type<30>;
+def TypeMEMOP  : IType<9>;
+def TypeNV     : IType<10>;
+def TypePREFIX : IType<30>;
 
 //----------------------------------------------------------------------------//
 //                         Intruction Classes Definitions +
@@ -28,40 +28,39 @@ def TypePREFIX : Type<30>;
 //
 // NV type instructions.
 //
-class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<13> imm13;
-}
+class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+             string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>;
+
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "">
+  : NVInst<outs, ins, asmstr, pattern, cstr>;
 
 // Definition of Post increment new value store.
-class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
-                    string cstr>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<5> rt;
-  bits<13> imm13;
-}
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "">
+  : NVInst<outs, ins, asmstr, pattern, cstr>;
 
 // Post increment ST Instruction.
-class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
-                  string cstr>
-  : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
-  let rt{0-4} = 0;
-}
+let mayStore = 1 in
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+               string cstr = "">
+  : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+// New-value conditional branch.
+class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : NVInst<outs, ins, asmstr, pattern, cstr>;
 
-class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> {
-  bits<5> rd;
-  bits<5> rs;
-  bits<6> imm6;
-}
+let mayLoad = 1, mayStore = 1 in
+class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "">
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>;
 
-class Immext<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> {
-  let isCodeGenOnly = 1;
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                 string cstr = "">
+  : MEMInst<outs, ins, asmstr, pattern, cstr>;
 
-  bits<26> imm26;
-}
+let isCodeGenOnly = 1 in
+class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
+  : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 84354403084d..60b12ac01c9c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonInstrInfo.h"
+#include "Hexagon.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
-#include "Hexagon.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/MathExtras.h"
 #define GET_INSTRINFO_CTOR
@@ -305,6 +305,88 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 }
 
 
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+                                      unsigned &SrcReg, unsigned &SrcReg2,
+                                      int &Mask, int &Value) const {
+  unsigned Opc = MI->getOpcode();
+
+  // Set mask and the first source register.
+  switch (Opc) {
+    case Hexagon::CMPEHexagon4rr:
+    case Hexagon::CMPEQri:
+    case Hexagon::CMPEQrr:
+    case Hexagon::CMPGT64rr:
+    case Hexagon::CMPGTU64rr:
+    case Hexagon::CMPGTUri:
+    case Hexagon::CMPGTUrr:
+    case Hexagon::CMPGTri:
+    case Hexagon::CMPGTrr:
+    case Hexagon::CMPLTUrr:
+    case Hexagon::CMPLTrr:
+      SrcReg = MI->getOperand(1).getReg();
+      Mask = ~0;
+      break;
+    case Hexagon::CMPbEQri_V4:
+    case Hexagon::CMPbEQrr_sbsb_V4:
+    case Hexagon::CMPbEQrr_ubub_V4:
+    case Hexagon::CMPbGTUri_V4:
+    case Hexagon::CMPbGTUrr_V4:
+    case Hexagon::CMPbGTrr_V4:
+      SrcReg = MI->getOperand(1).getReg();
+      Mask = 0xFF;
+      break;
+    case Hexagon::CMPhEQri_V4:
+    case Hexagon::CMPhEQrr_shl_V4:
+    case Hexagon::CMPhEQrr_xor_V4:
+    case Hexagon::CMPhGTUri_V4:
+    case Hexagon::CMPhGTUrr_V4:
+    case Hexagon::CMPhGTrr_shl_V4:
+      SrcReg = MI->getOperand(1).getReg();
+      Mask = 0xFFFF;
+      break;
+  }
+
+  // Set the value/second source register.
+  switch (Opc) {
+    case Hexagon::CMPEHexagon4rr:
+    case Hexagon::CMPEQrr:
+    case Hexagon::CMPGT64rr:
+    case Hexagon::CMPGTU64rr:
+    case Hexagon::CMPGTUrr:
+    case Hexagon::CMPGTrr:
+    case Hexagon::CMPbEQrr_sbsb_V4:
+    case Hexagon::CMPbEQrr_ubub_V4:
+    case Hexagon::CMPbGTUrr_V4:
+    case Hexagon::CMPbGTrr_V4:
+    case Hexagon::CMPhEQrr_shl_V4:
+    case Hexagon::CMPhEQrr_xor_V4:
+    case Hexagon::CMPhGTUrr_V4:
+    case Hexagon::CMPhGTrr_shl_V4:
+    case Hexagon::CMPLTUrr:
+    case Hexagon::CMPLTrr:
+      SrcReg2 = MI->getOperand(2).getReg();
+      return true;
+
+    case Hexagon::CMPEQri:
+    case Hexagon::CMPGTUri:
+    case Hexagon::CMPGTri:
+    case Hexagon::CMPbEQri_V4:
+    case Hexagon::CMPbGTUri_V4:
+    case Hexagon::CMPhEQri_V4:
+    case Hexagon::CMPhGTUri_V4:
+      SrcReg2 = 0;
+      Value = MI->getOperand(2).getImm();
+      return true;
+  }
+
+  return false;
+}
+
+
 void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator I, DebugLoc DL,
                                  unsigned DestReg, unsigned SrcReg,
@@ -314,7 +396,7 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
   if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+    BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg);
     return;
   }
   if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
@@ -344,6 +426,18 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
     return;
   }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+      Hexagon::IntRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+      Hexagon::PredRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
 
   llvm_unreachable("Unimplemented");
 }
@@ -443,6 +537,15 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   return(0);
 }
 
+MachineInstr*
+HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                           int FrameIx, uint64_t Offset,
+                                           const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE))
+    .addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
 
 unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
 
@@ -463,270 +566,43 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
 }
 
 bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
-  switch(MI->getOpcode()) {
-    default: return false;
-    // JMP_EQri
-    case Hexagon::JMP_EQriPt_nv_V4:
-    case Hexagon::JMP_EQriPnt_nv_V4:
-    case Hexagon::JMP_EQriNotPt_nv_V4:
-    case Hexagon::JMP_EQriNotPnt_nv_V4:
-
-    // JMP_EQri - with -1
-    case Hexagon::JMP_EQriPtneg_nv_V4:
-    case Hexagon::JMP_EQriPntneg_nv_V4:
-    case Hexagon::JMP_EQriNotPtneg_nv_V4:
-    case Hexagon::JMP_EQriNotPntneg_nv_V4:
-
-    // JMP_EQrr
-    case Hexagon::JMP_EQrrPt_nv_V4:
-    case Hexagon::JMP_EQrrPnt_nv_V4:
-    case Hexagon::JMP_EQrrNotPt_nv_V4:
-    case Hexagon::JMP_EQrrNotPnt_nv_V4:
-
-    // JMP_GTri
-    case Hexagon::JMP_GTriPt_nv_V4:
-    case Hexagon::JMP_GTriPnt_nv_V4:
-    case Hexagon::JMP_GTriNotPt_nv_V4:
-    case Hexagon::JMP_GTriNotPnt_nv_V4:
-
-    // JMP_GTri - with -1
-    case Hexagon::JMP_GTriPtneg_nv_V4:
-    case Hexagon::JMP_GTriPntneg_nv_V4:
-    case Hexagon::JMP_GTriNotPtneg_nv_V4:
-    case Hexagon::JMP_GTriNotPntneg_nv_V4:
-
-    // JMP_GTrr
-    case Hexagon::JMP_GTrrPt_nv_V4:
-    case Hexagon::JMP_GTrrPnt_nv_V4:
-    case Hexagon::JMP_GTrrNotPt_nv_V4:
-    case Hexagon::JMP_GTrrNotPnt_nv_V4:
-
-    // JMP_GTrrdn
-    case Hexagon::JMP_GTrrdnPt_nv_V4:
-    case Hexagon::JMP_GTrrdnPnt_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-
-    // JMP_GTUri
-    case Hexagon::JMP_GTUriPt_nv_V4:
-    case Hexagon::JMP_GTUriPnt_nv_V4:
-    case Hexagon::JMP_GTUriNotPt_nv_V4:
-    case Hexagon::JMP_GTUriNotPnt_nv_V4:
-
-    // JMP_GTUrr
-    case Hexagon::JMP_GTUrrPt_nv_V4:
-    case Hexagon::JMP_GTUrrPnt_nv_V4:
-    case Hexagon::JMP_GTUrrNotPt_nv_V4:
-    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+  // Constant extenders are allowed only for V4 and above.
+  if (!Subtarget.hasV4TOps())
+    return false;
 
-    // JMP_GTUrrdn
-    case Hexagon::JMP_GTUrrdnPt_nv_V4:
-    case Hexagon::JMP_GTUrrdnPnt_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+  const MCInstrDesc &MID = MI->getDesc();
+  const uint64_t F = MID.TSFlags;
+  if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+    return true;
 
-    // TFR_FI
+  // TODO: This is largely obsolete now. Will need to be removed
+  // in consecutive patches.
+  switch(MI->getOpcode()) {
+    // TFR_FI Remains a special case.
     case Hexagon::TFR_FI:
       return true;
+    default:
+      return false;
   }
+  return  false;
 }
 
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
 bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
-  switch(MI->getOpcode()) {
-    default: return false;
-    // JMP_EQri
-    case Hexagon::JMP_EQriPt_ie_nv_V4:
-    case Hexagon::JMP_EQriPnt_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPt_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-
-    // JMP_EQri - with -1
-    case Hexagon::JMP_EQriPtneg_ie_nv_V4:
-    case Hexagon::JMP_EQriPntneg_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-
-    // JMP_EQrr
-    case Hexagon::JMP_EQrrPt_ie_nv_V4:
-    case Hexagon::JMP_EQrrPnt_ie_nv_V4:
-    case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-
-    // JMP_GTri
-    case Hexagon::JMP_GTriPt_ie_nv_V4:
-    case Hexagon::JMP_GTriPnt_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-
-    // JMP_GTri - with -1
-    case Hexagon::JMP_GTriPtneg_ie_nv_V4:
-    case Hexagon::JMP_GTriPntneg_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-
-    // JMP_GTrr
-    case Hexagon::JMP_GTrrPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrPnt_ie_nv_V4:
-    case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-
-    // JMP_GTrrdn
-    case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-
-    // JMP_GTUri
-    case Hexagon::JMP_GTUriPt_ie_nv_V4:
-    case Hexagon::JMP_GTUriPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-
-    // JMP_GTUrr
-    case Hexagon::JMP_GTUrrPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-
-    // JMP_GTUrrdn
-    case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
-
-    // V4 absolute set addressing.
-    case Hexagon::LDrid_abs_setimm_V4:
-    case Hexagon::LDriw_abs_setimm_V4:
-    case Hexagon::LDrih_abs_setimm_V4:
-    case Hexagon::LDrib_abs_setimm_V4:
-    case Hexagon::LDriuh_abs_setimm_V4:
-    case Hexagon::LDriub_abs_setimm_V4:
-
-    case Hexagon::STrid_abs_setimm_V4:
-    case Hexagon::STrib_abs_setimm_V4:
-    case Hexagon::STrih_abs_setimm_V4:
-    case Hexagon::STriw_abs_setimm_V4:
-
-    // V4 global address load.
-    case Hexagon::LDrid_GP_cPt_V4 :
-    case Hexagon::LDrid_GP_cNotPt_V4 :
-    case Hexagon::LDrid_GP_cdnPt_V4 :
-    case Hexagon::LDrid_GP_cdnNotPt_V4 :
-    case Hexagon::LDrib_GP_cPt_V4 :
-    case Hexagon::LDrib_GP_cNotPt_V4 :
-    case Hexagon::LDrib_GP_cdnPt_V4 :
-    case Hexagon::LDrib_GP_cdnNotPt_V4 :
-    case Hexagon::LDriub_GP_cPt_V4 :
-    case Hexagon::LDriub_GP_cNotPt_V4 :
-    case Hexagon::LDriub_GP_cdnPt_V4 :
-    case Hexagon::LDriub_GP_cdnNotPt_V4 :
-    case Hexagon::LDrih_GP_cPt_V4 :
-    case Hexagon::LDrih_GP_cNotPt_V4 :
-    case Hexagon::LDrih_GP_cdnPt_V4 :
-    case Hexagon::LDrih_GP_cdnNotPt_V4 :
-    case Hexagon::LDriuh_GP_cPt_V4 :
-    case Hexagon::LDriuh_GP_cNotPt_V4 :
-    case Hexagon::LDriuh_GP_cdnPt_V4 :
-    case Hexagon::LDriuh_GP_cdnNotPt_V4 :
-    case Hexagon::LDriw_GP_cPt_V4 :
-    case Hexagon::LDriw_GP_cNotPt_V4 :
-    case Hexagon::LDriw_GP_cdnPt_V4 :
-    case Hexagon::LDriw_GP_cdnNotPt_V4 :
-    case Hexagon::LDd_GP_cPt_V4 :
-    case Hexagon::LDd_GP_cNotPt_V4 :
-    case Hexagon::LDd_GP_cdnPt_V4 :
-    case Hexagon::LDd_GP_cdnNotPt_V4 :
-    case Hexagon::LDb_GP_cPt_V4 :
-    case Hexagon::LDb_GP_cNotPt_V4 :
-    case Hexagon::LDb_GP_cdnPt_V4 :
-    case Hexagon::LDb_GP_cdnNotPt_V4 :
-    case Hexagon::LDub_GP_cPt_V4 :
-    case Hexagon::LDub_GP_cNotPt_V4 :
-    case Hexagon::LDub_GP_cdnPt_V4 :
-    case Hexagon::LDub_GP_cdnNotPt_V4 :
-    case Hexagon::LDh_GP_cPt_V4 :
-    case Hexagon::LDh_GP_cNotPt_V4 :
-    case Hexagon::LDh_GP_cdnPt_V4 :
-    case Hexagon::LDh_GP_cdnNotPt_V4 :
-    case Hexagon::LDuh_GP_cPt_V4 :
-    case Hexagon::LDuh_GP_cNotPt_V4 :
-    case Hexagon::LDuh_GP_cdnPt_V4 :
-    case Hexagon::LDuh_GP_cdnNotPt_V4 :
-    case Hexagon::LDw_GP_cPt_V4 :
-    case Hexagon::LDw_GP_cNotPt_V4 :
-    case Hexagon::LDw_GP_cdnPt_V4 :
-    case Hexagon::LDw_GP_cdnNotPt_V4 :
-
-    // V4 global address store.
-    case Hexagon::STrid_GP_cPt_V4 :
-    case Hexagon::STrid_GP_cNotPt_V4 :
-    case Hexagon::STrid_GP_cdnPt_V4 :
-    case Hexagon::STrid_GP_cdnNotPt_V4 :
-    case Hexagon::STrib_GP_cPt_V4 :
-    case Hexagon::STrib_GP_cNotPt_V4 :
-    case Hexagon::STrib_GP_cdnPt_V4 :
-    case Hexagon::STrib_GP_cdnNotPt_V4 :
-    case Hexagon::STrih_GP_cPt_V4 :
-    case Hexagon::STrih_GP_cNotPt_V4 :
-    case Hexagon::STrih_GP_cdnPt_V4 :
-    case Hexagon::STrih_GP_cdnNotPt_V4 :
-    case Hexagon::STriw_GP_cPt_V4 :
-    case Hexagon::STriw_GP_cNotPt_V4 :
-    case Hexagon::STriw_GP_cdnPt_V4 :
-    case Hexagon::STriw_GP_cdnNotPt_V4 :
-    case Hexagon::STd_GP_cPt_V4 :
-    case Hexagon::STd_GP_cNotPt_V4 :
-    case Hexagon::STd_GP_cdnPt_V4 :
-    case Hexagon::STd_GP_cdnNotPt_V4 :
-    case Hexagon::STb_GP_cPt_V4 :
-    case Hexagon::STb_GP_cNotPt_V4 :
-    case Hexagon::STb_GP_cdnPt_V4 :
-    case Hexagon::STb_GP_cdnNotPt_V4 :
-    case Hexagon::STh_GP_cPt_V4 :
-    case Hexagon::STh_GP_cNotPt_V4 :
-    case Hexagon::STh_GP_cdnPt_V4 :
-    case Hexagon::STh_GP_cdnNotPt_V4 :
-    case Hexagon::STw_GP_cPt_V4 :
-    case Hexagon::STw_GP_cNotPt_V4 :
-    case Hexagon::STw_GP_cdnPt_V4 :
-    case Hexagon::STw_GP_cdnNotPt_V4 :
-
-    // V4 predicated global address new value store.
-    case Hexagon::STrib_GP_cPt_nv_V4 :
-    case Hexagon::STrib_GP_cNotPt_nv_V4 :
-    case Hexagon::STrib_GP_cdnPt_nv_V4 :
-    case Hexagon::STrib_GP_cdnNotPt_nv_V4 :
-    case Hexagon::STrih_GP_cPt_nv_V4 :
-    case Hexagon::STrih_GP_cNotPt_nv_V4 :
-    case Hexagon::STrih_GP_cdnPt_nv_V4 :
-    case Hexagon::STrih_GP_cdnNotPt_nv_V4 :
-    case Hexagon::STriw_GP_cPt_nv_V4 :
-    case Hexagon::STriw_GP_cNotPt_nv_V4 :
-    case Hexagon::STriw_GP_cdnPt_nv_V4 :
-    case Hexagon::STriw_GP_cdnNotPt_nv_V4 :
-    case Hexagon::STb_GP_cPt_nv_V4 :
-    case Hexagon::STb_GP_cNotPt_nv_V4 :
-    case Hexagon::STb_GP_cdnPt_nv_V4 :
-    case Hexagon::STb_GP_cdnNotPt_nv_V4 :
-    case Hexagon::STh_GP_cPt_nv_V4 :
-    case Hexagon::STh_GP_cNotPt_nv_V4 :
-    case Hexagon::STh_GP_cdnPt_nv_V4 :
-    case Hexagon::STh_GP_cdnNotPt_nv_V4 :
-    case Hexagon::STw_GP_cPt_nv_V4 :
-    case Hexagon::STw_GP_cNotPt_nv_V4 :
-    case Hexagon::STw_GP_cdnPt_nv_V4 :
-    case Hexagon::STw_GP_cdnNotPt_nv_V4 :
-
-    // TFR_FI
-    case Hexagon::TFR_FI_immext_V4:
-
-    // TFRI_F
-    case Hexagon::TFRI_f:
-    case Hexagon::TFRI_cPt_f:
-    case Hexagon::TFRI_cNotPt_f:
-    case Hexagon::CONST64_Float_Real:
+  // First check if this is permanently extended op code.
+  const uint64_t F = MI->getDesc().TSFlags;
+  if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+    return true;
+  // Use MO operand flags to determine if one of MI's operands
+  // has HMOTF_ConstExtended flag set.
+  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+       E = MI->operands_end(); I != E; ++I) {
+    if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
       return true;
   }
+  return  false;
 }
 
 bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
@@ -835,264 +711,6 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
   }
 }
 
-unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
-  switch(MI->getOpcode()) {
-    default: llvm_unreachable("Unknown type of instruction.");
-    // JMP_EQri
-    case Hexagon::JMP_EQriPt_nv_V4:
-      return Hexagon::JMP_EQriPt_ie_nv_V4;
-    case Hexagon::JMP_EQriNotPt_nv_V4:
-      return Hexagon::JMP_EQriNotPt_ie_nv_V4;
-    case Hexagon::JMP_EQriPnt_nv_V4:
-      return Hexagon::JMP_EQriPnt_ie_nv_V4;
-    case Hexagon::JMP_EQriNotPnt_nv_V4:
-      return Hexagon::JMP_EQriNotPnt_ie_nv_V4;
-
-    // JMP_EQri -- with -1
-    case Hexagon::JMP_EQriPtneg_nv_V4:
-      return Hexagon::JMP_EQriPtneg_ie_nv_V4;
-    case Hexagon::JMP_EQriNotPtneg_nv_V4:
-      return Hexagon::JMP_EQriNotPtneg_ie_nv_V4;
-    case Hexagon::JMP_EQriPntneg_nv_V4:
-      return Hexagon::JMP_EQriPntneg_ie_nv_V4;
-    case Hexagon::JMP_EQriNotPntneg_nv_V4:
-      return Hexagon::JMP_EQriNotPntneg_ie_nv_V4;
-
-    // JMP_EQrr
-    case Hexagon::JMP_EQrrPt_nv_V4:
-      return Hexagon::JMP_EQrrPt_ie_nv_V4;
-    case Hexagon::JMP_EQrrNotPt_nv_V4:
-      return Hexagon::JMP_EQrrNotPt_ie_nv_V4;
-    case Hexagon::JMP_EQrrPnt_nv_V4:
-      return Hexagon::JMP_EQrrPnt_ie_nv_V4;
-    case Hexagon::JMP_EQrrNotPnt_nv_V4:
-      return Hexagon::JMP_EQrrNotPnt_ie_nv_V4;
-
-    // JMP_GTri
-    case Hexagon::JMP_GTriPt_nv_V4:
-      return Hexagon::JMP_GTriPt_ie_nv_V4;
-    case Hexagon::JMP_GTriNotPt_nv_V4:
-      return Hexagon::JMP_GTriNotPt_ie_nv_V4;
-    case Hexagon::JMP_GTriPnt_nv_V4:
-      return Hexagon::JMP_GTriPnt_ie_nv_V4;
-    case Hexagon::JMP_GTriNotPnt_nv_V4:
-      return Hexagon::JMP_GTriNotPnt_ie_nv_V4;
-
-    // JMP_GTri -- with -1
-    case Hexagon::JMP_GTriPtneg_nv_V4:
-      return Hexagon::JMP_GTriPtneg_ie_nv_V4;
-    case Hexagon::JMP_GTriNotPtneg_nv_V4:
-      return Hexagon::JMP_GTriNotPtneg_ie_nv_V4;
-    case Hexagon::JMP_GTriPntneg_nv_V4:
-      return Hexagon::JMP_GTriPntneg_ie_nv_V4;
-    case Hexagon::JMP_GTriNotPntneg_nv_V4:
-      return Hexagon::JMP_GTriNotPntneg_ie_nv_V4;
-
-    // JMP_GTrr
-    case Hexagon::JMP_GTrrPt_nv_V4:
-      return Hexagon::JMP_GTrrPt_ie_nv_V4;
-    case Hexagon::JMP_GTrrNotPt_nv_V4:
-      return Hexagon::JMP_GTrrNotPt_ie_nv_V4;
-    case Hexagon::JMP_GTrrPnt_nv_V4:
-      return Hexagon::JMP_GTrrPnt_ie_nv_V4;
-    case Hexagon::JMP_GTrrNotPnt_nv_V4:
-      return Hexagon::JMP_GTrrNotPnt_ie_nv_V4;
-
-    // JMP_GTrrdn
-    case Hexagon::JMP_GTrrdnPt_nv_V4:
-      return Hexagon::JMP_GTrrdnPt_ie_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4;
-    case Hexagon::JMP_GTrrdnPnt_nv_V4:
-      return Hexagon::JMP_GTrrdnPnt_ie_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4;
-
-    // JMP_GTUri
-    case Hexagon::JMP_GTUriPt_nv_V4:
-      return Hexagon::JMP_GTUriPt_ie_nv_V4;
-    case Hexagon::JMP_GTUriNotPt_nv_V4:
-      return Hexagon::JMP_GTUriNotPt_ie_nv_V4;
-    case Hexagon::JMP_GTUriPnt_nv_V4:
-      return Hexagon::JMP_GTUriPnt_ie_nv_V4;
-    case Hexagon::JMP_GTUriNotPnt_nv_V4:
-      return Hexagon::JMP_GTUriNotPnt_ie_nv_V4;
-
-    // JMP_GTUrr
-    case Hexagon::JMP_GTUrrPt_nv_V4:
-      return Hexagon::JMP_GTUrrPt_ie_nv_V4;
-    case Hexagon::JMP_GTUrrNotPt_nv_V4:
-      return Hexagon::JMP_GTUrrNotPt_ie_nv_V4;
-    case Hexagon::JMP_GTUrrPnt_nv_V4:
-      return Hexagon::JMP_GTUrrPnt_ie_nv_V4;
-    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
-      return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4;
-
-    // JMP_GTUrrdn
-    case Hexagon::JMP_GTUrrdnPt_nv_V4:
-      return Hexagon::JMP_GTUrrdnPt_ie_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4;
-    case Hexagon::JMP_GTUrrdnPnt_nv_V4:
-      return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4;
-
-    case Hexagon::TFR_FI:
-        return Hexagon::TFR_FI_immext_V4;
-
-    case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDSUBi_MEM_V4 :
-    case Hexagon::MEMw_ADDi_MEM_V4 :
-    case Hexagon::MEMw_SUBi_MEM_V4 :
-    case Hexagon::MEMw_ADDr_MEM_V4 :
-    case Hexagon::MEMw_SUBr_MEM_V4 :
-    case Hexagon::MEMw_ANDr_MEM_V4 :
-    case Hexagon::MEMw_ORr_MEM_V4 :
-    case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDSUBi_MEM_V4 :
-    case Hexagon::MEMh_ADDi_MEM_V4 :
-    case Hexagon::MEMh_SUBi_MEM_V4 :
-    case Hexagon::MEMh_ADDr_MEM_V4 :
-    case Hexagon::MEMh_SUBr_MEM_V4 :
-    case Hexagon::MEMh_ANDr_MEM_V4 :
-    case Hexagon::MEMh_ORr_MEM_V4 :
-    case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDSUBi_MEM_V4 :
-    case Hexagon::MEMb_ADDi_MEM_V4 :
-    case Hexagon::MEMb_SUBi_MEM_V4 :
-    case Hexagon::MEMb_ADDr_MEM_V4 :
-    case Hexagon::MEMb_SUBr_MEM_V4 :
-    case Hexagon::MEMb_ANDr_MEM_V4 :
-    case Hexagon::MEMb_ORr_MEM_V4 :
-      llvm_unreachable("Needs implementing.");
-  }
-}
-
-unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const {
-  switch(MI->getOpcode()) {
-    default: llvm_unreachable("Unknown type of jump instruction.");
-    // JMP_EQri
-    case Hexagon::JMP_EQriPt_ie_nv_V4:
-      return Hexagon::JMP_EQriPt_nv_V4;
-    case Hexagon::JMP_EQriNotPt_ie_nv_V4:
-      return Hexagon::JMP_EQriNotPt_nv_V4;
-    case Hexagon::JMP_EQriPnt_ie_nv_V4:
-      return Hexagon::JMP_EQriPnt_nv_V4;
-    case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-      return Hexagon::JMP_EQriNotPnt_nv_V4;
-
-    // JMP_EQri -- with -1
-    case Hexagon::JMP_EQriPtneg_ie_nv_V4:
-      return Hexagon::JMP_EQriPtneg_nv_V4;
-    case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
-      return Hexagon::JMP_EQriNotPtneg_nv_V4;
-    case Hexagon::JMP_EQriPntneg_ie_nv_V4:
-      return Hexagon::JMP_EQriPntneg_nv_V4;
-    case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-      return Hexagon::JMP_EQriNotPntneg_nv_V4;
-
-    // JMP_EQrr
-    case Hexagon::JMP_EQrrPt_ie_nv_V4:
-      return Hexagon::JMP_EQrrPt_nv_V4;
-    case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
-      return Hexagon::JMP_EQrrNotPt_nv_V4;
-    case Hexagon::JMP_EQrrPnt_ie_nv_V4:
-      return Hexagon::JMP_EQrrPnt_nv_V4;
-    case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-      return Hexagon::JMP_EQrrNotPnt_nv_V4;
-
-    // JMP_GTri
-    case Hexagon::JMP_GTriPt_ie_nv_V4:
-      return Hexagon::JMP_GTriPt_nv_V4;
-    case Hexagon::JMP_GTriNotPt_ie_nv_V4:
-      return Hexagon::JMP_GTriNotPt_nv_V4;
-    case Hexagon::JMP_GTriPnt_ie_nv_V4:
-      return Hexagon::JMP_GTriPnt_nv_V4;
-    case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-      return Hexagon::JMP_GTriNotPnt_nv_V4;
-
-    // JMP_GTri -- with -1
-    case Hexagon::JMP_GTriPtneg_ie_nv_V4:
-      return Hexagon::JMP_GTriPtneg_nv_V4;
-    case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
-      return Hexagon::JMP_GTriNotPtneg_nv_V4;
-    case Hexagon::JMP_GTriPntneg_ie_nv_V4:
-      return Hexagon::JMP_GTriPntneg_nv_V4;
-    case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-      return Hexagon::JMP_GTriNotPntneg_nv_V4;
-
-    // JMP_GTrr
-    case Hexagon::JMP_GTrrPt_ie_nv_V4:
-      return Hexagon::JMP_GTrrPt_nv_V4;
-    case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
-      return Hexagon::JMP_GTrrNotPt_nv_V4;
-    case Hexagon::JMP_GTrrPnt_ie_nv_V4:
-      return Hexagon::JMP_GTrrPnt_nv_V4;
-    case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-      return Hexagon::JMP_GTrrNotPnt_nv_V4;
-
-    // JMP_GTrrdn
-    case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
-      return Hexagon::JMP_GTrrdnPt_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPt_nv_V4;
-    case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
-      return Hexagon::JMP_GTrrdnPnt_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
-
-    // JMP_GTUri
-    case Hexagon::JMP_GTUriPt_ie_nv_V4:
-      return Hexagon::JMP_GTUriPt_nv_V4;
-    case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
-      return Hexagon::JMP_GTUriNotPt_nv_V4;
-    case Hexagon::JMP_GTUriPnt_ie_nv_V4:
-      return Hexagon::JMP_GTUriPnt_nv_V4;
-    case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-      return Hexagon::JMP_GTUriNotPnt_nv_V4;
-
-    // JMP_GTUrr
-    case Hexagon::JMP_GTUrrPt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrPt_nv_V4;
-    case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrNotPt_nv_V4;
-    case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrPnt_nv_V4;
-    case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrNotPnt_nv_V4;
-
-    // JMP_GTUrrdn
-    case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrdnPt_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
-    case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrdnPnt_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
-  }
-}
-
-
 bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
     default: return false;
@@ -1101,7 +719,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STrib_indexed_nv_V4:
     case Hexagon::STrib_indexed_shl_nv_V4:
     case Hexagon::STrib_shl_nv_V4:
-    case Hexagon::STrib_GP_nv_V4:
     case Hexagon::STb_GP_nv_V4:
     case Hexagon::POST_STbri_nv_V4:
     case Hexagon::STrib_cPt_nv_V4:
@@ -1124,10 +741,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STb_GP_cNotPt_nv_V4:
     case Hexagon::STb_GP_cdnPt_nv_V4:
     case Hexagon::STb_GP_cdnNotPt_nv_V4:
-    case Hexagon::STrib_GP_cPt_nv_V4:
-    case Hexagon::STrib_GP_cNotPt_nv_V4:
-    case Hexagon::STrib_GP_cdnPt_nv_V4:
-    case Hexagon::STrib_GP_cdnNotPt_nv_V4:
     case Hexagon::STrib_abs_nv_V4:
     case Hexagon::STrib_abs_cPt_nv_V4:
     case Hexagon::STrib_abs_cdnPt_nv_V4:
@@ -1144,7 +757,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STrih_indexed_nv_V4:
     case Hexagon::STrih_indexed_shl_nv_V4:
     case Hexagon::STrih_shl_nv_V4:
-    case Hexagon::STrih_GP_nv_V4:
     case Hexagon::STh_GP_nv_V4:
     case Hexagon::POST_SThri_nv_V4:
     case Hexagon::STrih_cPt_nv_V4:
@@ -1167,10 +779,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STh_GP_cNotPt_nv_V4:
     case Hexagon::STh_GP_cdnPt_nv_V4:
     case Hexagon::STh_GP_cdnNotPt_nv_V4:
-    case Hexagon::STrih_GP_cPt_nv_V4:
-    case Hexagon::STrih_GP_cNotPt_nv_V4:
-    case Hexagon::STrih_GP_cdnPt_nv_V4:
-    case Hexagon::STrih_GP_cdnNotPt_nv_V4:
     case Hexagon::STrih_abs_nv_V4:
     case Hexagon::STrih_abs_cPt_nv_V4:
     case Hexagon::STrih_abs_cdnPt_nv_V4:
@@ -1187,7 +795,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STriw_indexed_nv_V4:
     case Hexagon::STriw_indexed_shl_nv_V4:
     case Hexagon::STriw_shl_nv_V4:
-    case Hexagon::STriw_GP_nv_V4:
     case Hexagon::STw_GP_nv_V4:
     case Hexagon::POST_STwri_nv_V4:
     case Hexagon::STriw_cPt_nv_V4:
@@ -1210,10 +817,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STw_GP_cNotPt_nv_V4:
     case Hexagon::STw_GP_cdnPt_nv_V4:
     case Hexagon::STw_GP_cdnNotPt_nv_V4:
-    case Hexagon::STriw_GP_cPt_nv_V4:
-    case Hexagon::STriw_GP_cNotPt_nv_V4:
-    case Hexagon::STriw_GP_cdnPt_nv_V4:
-    case Hexagon::STriw_GP_cdnNotPt_nv_V4:
     case Hexagon::STriw_abs_nv_V4:
     case Hexagon::STriw_abs_cPt_nv_V4:
     case Hexagon::STriw_abs_cdnPt_nv_V4:
@@ -1305,6 +908,16 @@ bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
   }
 }
 
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+  if (isNewValueJump(MI))
+    return true;
+
+  if (isNewValueStore(MI))
+    return true;
+
+  return false;
+}
+
 bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
   return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
 }
@@ -1506,26 +1119,11 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
       return Hexagon::JMPR_cPt;
 
   // V4 indexed+scaled load.
-    case Hexagon::LDrid_indexed_cPt_V4:
-      return Hexagon::LDrid_indexed_cNotPt_V4;
-    case Hexagon::LDrid_indexed_cNotPt_V4:
-      return Hexagon::LDrid_indexed_cPt_V4;
-
     case Hexagon::LDrid_indexed_shl_cPt_V4:
       return Hexagon::LDrid_indexed_shl_cNotPt_V4;
     case Hexagon::LDrid_indexed_shl_cNotPt_V4:
       return Hexagon::LDrid_indexed_shl_cPt_V4;
 
-    case Hexagon::LDrib_indexed_cPt_V4:
-      return Hexagon::LDrib_indexed_cNotPt_V4;
-    case Hexagon::LDrib_indexed_cNotPt_V4:
-      return Hexagon::LDrib_indexed_cPt_V4;
-
-    case Hexagon::LDriub_indexed_cPt_V4:
-      return Hexagon::LDriub_indexed_cNotPt_V4;
-    case Hexagon::LDriub_indexed_cNotPt_V4:
-      return Hexagon::LDriub_indexed_cPt_V4;
-
     case Hexagon::LDrib_indexed_shl_cPt_V4:
       return Hexagon::LDrib_indexed_shl_cNotPt_V4;
     case Hexagon::LDrib_indexed_shl_cNotPt_V4:
@@ -1536,16 +1134,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
     case Hexagon::LDriub_indexed_shl_cNotPt_V4:
       return Hexagon::LDriub_indexed_shl_cPt_V4;
 
-    case Hexagon::LDrih_indexed_cPt_V4:
-      return Hexagon::LDrih_indexed_cNotPt_V4;
-    case Hexagon::LDrih_indexed_cNotPt_V4:
-      return Hexagon::LDrih_indexed_cPt_V4;
-
-    case Hexagon::LDriuh_indexed_cPt_V4:
-      return Hexagon::LDriuh_indexed_cNotPt_V4;
-    case Hexagon::LDriuh_indexed_cNotPt_V4:
-      return Hexagon::LDriuh_indexed_cPt_V4;
-
     case Hexagon::LDrih_indexed_shl_cPt_V4:
       return Hexagon::LDrih_indexed_shl_cNotPt_V4;
     case Hexagon::LDrih_indexed_shl_cNotPt_V4:
@@ -1556,11 +1144,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
     case Hexagon::LDriuh_indexed_shl_cNotPt_V4:
       return Hexagon::LDriuh_indexed_shl_cPt_V4;
 
-    case Hexagon::LDriw_indexed_cPt_V4:
-      return Hexagon::LDriw_indexed_cNotPt_V4;
-    case Hexagon::LDriw_indexed_cNotPt_V4:
-      return Hexagon::LDriw_indexed_cPt_V4;
-
     case Hexagon::LDriw_indexed_shl_cPt_V4:
       return Hexagon::LDriw_indexed_shl_cNotPt_V4;
     case Hexagon::LDriw_indexed_shl_cNotPt_V4:
@@ -1686,26 +1269,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
     case Hexagon::STw_GP_cNotPt_V4:
       return Hexagon::STw_GP_cPt_V4;
 
-    case Hexagon::STrid_GP_cPt_V4:
-      return Hexagon::STrid_GP_cNotPt_V4;
-    case Hexagon::STrid_GP_cNotPt_V4:
-      return Hexagon::STrid_GP_cPt_V4;
-
-    case Hexagon::STrib_GP_cPt_V4:
-      return Hexagon::STrib_GP_cNotPt_V4;
-    case Hexagon::STrib_GP_cNotPt_V4:
-      return Hexagon::STrib_GP_cPt_V4;
-
-    case Hexagon::STrih_GP_cPt_V4:
-      return Hexagon::STrih_GP_cNotPt_V4;
-    case Hexagon::STrih_GP_cNotPt_V4:
-      return Hexagon::STrih_GP_cPt_V4;
-
-    case Hexagon::STriw_GP_cPt_V4:
-      return Hexagon::STriw_GP_cNotPt_V4;
-    case Hexagon::STriw_GP_cNotPt_V4:
-      return Hexagon::STriw_GP_cPt_V4;
-
   // Load.
     case Hexagon::LDrid_cPt:
       return Hexagon::LDrid_cNotPt;
@@ -1971,75 +1534,26 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
                               Hexagon::JMPR_cNotPt;
 
   // V4 indexed+scaled load.
-  case Hexagon::LDrid_indexed_V4:
-    return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
-                              Hexagon::LDrid_indexed_cNotPt_V4;
   case Hexagon::LDrid_indexed_shl_V4:
     return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
                               Hexagon::LDrid_indexed_shl_cNotPt_V4;
-  case Hexagon::LDrib_indexed_V4:
-    return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
-                              Hexagon::LDrib_indexed_cNotPt_V4;
-  case Hexagon::LDriub_indexed_V4:
-    return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
-                              Hexagon::LDriub_indexed_cNotPt_V4;
-  case Hexagon::LDriub_ae_indexed_V4:
-    return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
-                              Hexagon::LDriub_indexed_cNotPt_V4;
   case Hexagon::LDrib_indexed_shl_V4:
     return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
                               Hexagon::LDrib_indexed_shl_cNotPt_V4;
   case Hexagon::LDriub_indexed_shl_V4:
     return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
                               Hexagon::LDriub_indexed_shl_cNotPt_V4;
-  case Hexagon::LDriub_ae_indexed_shl_V4:
-    return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
-                              Hexagon::LDriub_indexed_shl_cNotPt_V4;
-  case Hexagon::LDrih_indexed_V4:
-    return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
-                              Hexagon::LDrih_indexed_cNotPt_V4;
-  case Hexagon::LDriuh_indexed_V4:
-    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
-                              Hexagon::LDriuh_indexed_cNotPt_V4;
-  case Hexagon::LDriuh_ae_indexed_V4:
-    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
-                              Hexagon::LDriuh_indexed_cNotPt_V4;
   case Hexagon::LDrih_indexed_shl_V4:
     return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
                               Hexagon::LDrih_indexed_shl_cNotPt_V4;
   case Hexagon::LDriuh_indexed_shl_V4:
     return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
                               Hexagon::LDriuh_indexed_shl_cNotPt_V4;
-  case Hexagon::LDriuh_ae_indexed_shl_V4:
-    return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
-                              Hexagon::LDriuh_indexed_shl_cNotPt_V4;
-  case Hexagon::LDriw_indexed_V4:
-    return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
-                              Hexagon::LDriw_indexed_cNotPt_V4;
   case Hexagon::LDriw_indexed_shl_V4:
     return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
                               Hexagon::LDriw_indexed_shl_cNotPt_V4;
 
   // V4 Load from global address
-  case Hexagon::LDrid_GP_V4:
-    return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 :
-                              Hexagon::LDrid_GP_cNotPt_V4;
-  case Hexagon::LDrib_GP_V4:
-    return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 :
-                              Hexagon::LDrib_GP_cNotPt_V4;
-  case Hexagon::LDriub_GP_V4:
-    return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 :
-                              Hexagon::LDriub_GP_cNotPt_V4;
-  case Hexagon::LDrih_GP_V4:
-    return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 :
-                              Hexagon::LDrih_GP_cNotPt_V4;
-  case Hexagon::LDriuh_GP_V4:
-    return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 :
-                              Hexagon::LDriuh_GP_cNotPt_V4;
-  case Hexagon::LDriw_GP_V4:
-    return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 :
-                              Hexagon::LDriw_GP_cNotPt_V4;
-
   case Hexagon::LDd_GP_V4:
     return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 :
                               Hexagon::LDd_GP_cNotPt_V4;
@@ -2122,19 +1636,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
                               Hexagon::STrid_indexed_shl_cNotPt_V4;
 
   // V4 Store to global address
-  case Hexagon::STrid_GP_V4:
-    return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 :
-                              Hexagon::STrid_GP_cNotPt_V4;
-  case Hexagon::STrib_GP_V4:
-    return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 :
-                              Hexagon::STrib_GP_cNotPt_V4;
-  case Hexagon::STrih_GP_V4:
-    return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 :
-                              Hexagon::STrih_GP_cNotPt_V4;
-  case Hexagon::STriw_GP_V4:
-    return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 :
-                              Hexagon::STriw_GP_cNotPt_V4;
-
   case Hexagon::STd_GP_V4:
     return !invertPredicate ? Hexagon::STd_GP_cPt_V4 :
                               Hexagon::STd_GP_cNotPt_V4;
@@ -2221,38 +1722,141 @@ PredicateInstruction(MachineInstr *MI,
   assert (isPredicable(MI) && "Expected predicable instruction");
   bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
                      (Cond[0].getImm() == 0));
+
+  // This will change MI's opcode to its predicate version.
+  // However, its operand list is still the old one, i.e. the
+  // non-predicate one.
   MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
-  //
-  // This assumes that the predicate is always the first operand
-  // in the set of inputs.
-  //
-  MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
-  int oper;
-  for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
-    MachineOperand MO = MI->getOperand(oper);
-    if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
-      break;
-    }
 
-    if (MO.isReg()) {
-      MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
-                                              MO.isImplicit(), MO.isKill(),
-                                              MO.isDead(), MO.isUndef(),
-                                              MO.isDebug());
-    } else if (MO.isImm()) {
-      MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
-    } else {
-      llvm_unreachable("Unexpected operand type");
+  int oper = -1;
+  unsigned int GAIdx = 0;
+
+  // Indicates whether the current MI has a GlobalAddress operand
+  bool hasGAOpnd = false;
+  std::vector<MachineOperand> tmpOpnds;
+
+  // Indicates whether we need to shift operands to right.
+  bool needShift = true;
+
+  // The predicate is ALWAYS the FIRST input operand !!!
+  if (MI->getNumOperands() == 0) {
+    // The non-predicate version of MI does not take any operands,
+    // i.e. no outs and no ins. In this condition, the predicate
+    // operand will be directly placed at Operands[0]. No operand
+    // shift is needed.
+    // Example: BARRIER
+    needShift = false;
+    oper = -1;
+  }
+  else if (   MI->getOperand(MI->getNumOperands()-1).isReg()
+           && MI->getOperand(MI->getNumOperands()-1).isDef()
+           && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
+    // The non-predicate version of MI does not have any input operands.
+    // In this condition, we extend the length of Operands[] by one and
+    // copy the original last operand to the newly allocated slot.
+    // At this moment, it is just a place holder. Later, we will put
+    // predicate operand directly into it. No operand shift is needed.
+    // Example: r0=BARRIER (this is a faked insn used here for illustration)
+    MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+    needShift = false;
+    oper = MI->getNumOperands() - 2;
+  }
+  else {
+    // We need to right shift all input operands by one. Duplicate the
+    // last operand into the newly allocated slot.
+    MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+  }
+
+  if (needShift)
+  {
+    // Operands[ MI->getNumOperands() - 2 ] has been copied into
+    // Operands[ MI->getNumOperands() - 1 ], so we start from
+    // Operands[ MI->getNumOperands() - 3 ].
+    // oper is a signed int.
+    // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
+    for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
+    {
+      MachineOperand &MO = MI->getOperand(oper);
+
+      // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4]   Opnd[5]   Opnd[6]   Opnd[7]
+      // <Def0>  <Def1>  <Use0>  <Use1>  <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
+      //               /\~
+      //              /||\~
+      //               ||
+      //        Predicate Operand here
+      if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
+        break;
+      }
+      if (MO.isReg()) {
+        MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+                                                MO.isImplicit(), MO.isKill(),
+                                                MO.isDead(), MO.isUndef(),
+                                                MO.isDebug());
+      }
+      else if (MO.isImm()) {
+        MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+      }
+      else if (MO.isGlobal()) {
+        // MI can not have more than one GlobalAddress operand.
+        assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
+
+        // There is no member function called "ChangeToGlobalAddress" in the
+        // MachineOperand class (not like "ChangeToRegister" and
+        // "ChangeToImmediate"). So we have to remove them from Operands[] list
+        // first, and then add them back after we have inserted the predicate
+        // operand. tmpOpnds[] is to remember these operands before we remove
+        // them.
+        tmpOpnds.push_back(MO);
+
+        // Operands[oper] is a GlobalAddress operand;
+        // Operands[oper+1] has been copied into Operands[oper+2];
+        hasGAOpnd = true;
+        GAIdx = oper;
+        continue;
+      }
+      else {
+        assert(false && "Unexpected operand type");
+      }
     }
   }
 
   int regPos = invertJump ? 1 : 0;
   MachineOperand PredMO = Cond[regPos];
+
+  // [oper] now points to the last explicit Def. Predicate operand must be
+  // located at [oper+1]. See diagram above.
+  // This assumes that the predicate is always the first operand,
+  // i.e. Operands[0+numResults], in the set of inputs
+  // It is better to have an assert here to check this. But I don't know how
+  // to write this assert because findFirstPredOperandIdx() would return -1
+  if (oper < -1) oper = -1;
   MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
                                           PredMO.isImplicit(), PredMO.isKill(),
                                           PredMO.isDead(), PredMO.isUndef(),
                                           PredMO.isDebug());
 
+  if (hasGAOpnd)
+  {
+    unsigned int i;
+
+    // Operands[GAIdx] is the original GlobalAddress operand, which is
+    // already copied into tmpOpnds[0].
+    // Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
+    // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
+    // so we start from [GAIdx+2]
+    for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
+      tmpOpnds.push_back(MI->getOperand(i));
+
+    // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
+    // It is very important that we always remove from the end of Operands[]
+    // MI->getNumOperands() is at least 2 if program goes to here.
+    for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
+      MI->RemoveOperand(i);
+
+    for (i = 0; i < tmpOpnds.size(); ++i)
+      MI->addOperand(tmpOpnds[i]);
+  }
+
   return true;
 }
 
@@ -2286,6 +1890,13 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
   return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
 }
 
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  assert(isPredicated(MI));
+  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
 bool
 HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
                                    std::vector<MachineOperand> &Pred) const {
@@ -2354,29 +1965,34 @@ isValidOffset(const int Opcode, const int Offset) const {
   // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
   // inserted to calculate the final address. Due to this reason, the function
   // assumes that the "Offset" has correct alignment.
+  // We used to assert if the offset was not properly aligned, however,
+  // there are cases where a misaligned pointer recast can cause this
+  // problem, and we need to allow for it. The front end warns of such
+  // misaligns with respect to load size.
 
   switch(Opcode) {
 
   case Hexagon::LDriw:
+  case Hexagon::LDriw_indexed:
   case Hexagon::LDriw_f:
+  case Hexagon::STriw_indexed:
   case Hexagon::STriw:
   case Hexagon::STriw_f:
-    assert((Offset % 4 == 0) && "Offset has incorrect alignment");
     return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMW_OFFSET_MAX);
 
   case Hexagon::LDrid:
+  case Hexagon::LDrid_indexed:
   case Hexagon::LDrid_f:
   case Hexagon::STrid:
+  case Hexagon::STrid_indexed:
   case Hexagon::STrid_f:
-    assert((Offset % 8 == 0) && "Offset has incorrect alignment");
     return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMD_OFFSET_MAX);
 
   case Hexagon::LDrih:
   case Hexagon::LDriuh:
   case Hexagon::STrih:
-    assert((Offset % 2 == 0) && "Offset has incorrect alignment");
     return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMH_OFFSET_MAX);
 
@@ -2391,54 +2007,28 @@ isValidOffset(const int Opcode, const int Offset) const {
     return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
       (Offset <= Hexagon_ADDI_OFFSET_MAX);
 
-  case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
-  case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
-  case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
-  case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
-  case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
-  case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
-  case Hexagon::MEMw_ORr_indexed_MEM_V4 :
-  case Hexagon::MEMw_ADDSUBi_MEM_V4 :
-  case Hexagon::MEMw_ADDi_MEM_V4 :
-  case Hexagon::MEMw_SUBi_MEM_V4 :
-  case Hexagon::MEMw_ADDr_MEM_V4 :
-  case Hexagon::MEMw_SUBr_MEM_V4 :
-  case Hexagon::MEMw_ANDr_MEM_V4 :
-  case Hexagon::MEMw_ORr_MEM_V4 :
-    assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+  case Hexagon::MemOPw_ADDi_V4 :
+  case Hexagon::MemOPw_SUBi_V4 :
+  case Hexagon::MemOPw_ADDr_V4 :
+  case Hexagon::MemOPw_SUBr_V4 :
+  case Hexagon::MemOPw_ANDr_V4 :
+  case Hexagon::MemOPw_ORr_V4 :
     return (0 <= Offset && Offset <= 255);
 
-  case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
-  case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
-  case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
-  case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
-  case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
-  case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
-  case Hexagon::MEMh_ORr_indexed_MEM_V4 :
-  case Hexagon::MEMh_ADDSUBi_MEM_V4 :
-  case Hexagon::MEMh_ADDi_MEM_V4 :
-  case Hexagon::MEMh_SUBi_MEM_V4 :
-  case Hexagon::MEMh_ADDr_MEM_V4 :
-  case Hexagon::MEMh_SUBr_MEM_V4 :
-  case Hexagon::MEMh_ANDr_MEM_V4 :
-  case Hexagon::MEMh_ORr_MEM_V4 :
-    assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+  case Hexagon::MemOPh_ADDi_V4 :
+  case Hexagon::MemOPh_SUBi_V4 :
+  case Hexagon::MemOPh_ADDr_V4 :
+  case Hexagon::MemOPh_SUBr_V4 :
+  case Hexagon::MemOPh_ANDr_V4 :
+  case Hexagon::MemOPh_ORr_V4 :
     return (0 <= Offset && Offset <= 127);
 
-  case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
-  case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
-  case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
-  case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
-  case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
-  case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
-  case Hexagon::MEMb_ORr_indexed_MEM_V4 :
-  case Hexagon::MEMb_ADDSUBi_MEM_V4 :
-  case Hexagon::MEMb_ADDi_MEM_V4 :
-  case Hexagon::MEMb_SUBi_MEM_V4 :
-  case Hexagon::MEMb_ADDr_MEM_V4 :
-  case Hexagon::MEMb_SUBr_MEM_V4 :
-  case Hexagon::MEMb_ANDr_MEM_V4 :
-  case Hexagon::MEMb_ORr_MEM_V4 :
+  case Hexagon::MemOPb_ADDi_V4 :
+  case Hexagon::MemOPb_SUBi_V4 :
+  case Hexagon::MemOPb_ADDr_V4 :
+  case Hexagon::MemOPb_SUBr_V4 :
+  case Hexagon::MemOPb_ANDr_V4 :
+  case Hexagon::MemOPb_ORr_V4 :
     return (0 <= Offset && Offset <= 63);
 
   // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -2447,6 +2037,9 @@ isValidOffset(const int Opcode, const int Offset) const {
   case Hexagon::LDriw_pred:
     return true;
 
+  case Hexagon::LOOP0_i:
+    return isUInt<10>(Offset);
+
   // INLINEASM is very special.
   case Hexagon::INLINEASM:
     return true;
@@ -2491,50 +2084,33 @@ isMemOp(const MachineInstr *MI) const {
   switch (MI->getOpcode())
   {
     default: return false;
-    case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMw_ADDSUBi_MEM_V4 :
-    case Hexagon::MEMw_ADDi_MEM_V4 :
-    case Hexagon::MEMw_SUBi_MEM_V4 :
-    case Hexagon::MEMw_ADDr_MEM_V4 :
-    case Hexagon::MEMw_SUBr_MEM_V4 :
-    case Hexagon::MEMw_ANDr_MEM_V4 :
-    case Hexagon::MEMw_ORr_MEM_V4 :
-    case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMh_ADDSUBi_MEM_V4 :
-    case Hexagon::MEMh_ADDi_MEM_V4 :
-    case Hexagon::MEMh_SUBi_MEM_V4 :
-    case Hexagon::MEMh_ADDr_MEM_V4 :
-    case Hexagon::MEMh_SUBr_MEM_V4 :
-    case Hexagon::MEMh_ANDr_MEM_V4 :
-    case Hexagon::MEMh_ORr_MEM_V4 :
-    case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
-    case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
-    case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ORr_indexed_MEM_V4 :
-    case Hexagon::MEMb_ADDSUBi_MEM_V4 :
-    case Hexagon::MEMb_ADDi_MEM_V4 :
-    case Hexagon::MEMb_SUBi_MEM_V4 :
-    case Hexagon::MEMb_ADDr_MEM_V4 :
-    case Hexagon::MEMb_SUBr_MEM_V4 :
-    case Hexagon::MEMb_ANDr_MEM_V4 :
-    case Hexagon::MEMb_ORr_MEM_V4 :
-      return true;
+    case Hexagon::MemOPw_ADDi_V4 :
+    case Hexagon::MemOPw_SUBi_V4 :
+    case Hexagon::MemOPw_ADDr_V4 :
+    case Hexagon::MemOPw_SUBr_V4 :
+    case Hexagon::MemOPw_ANDr_V4 :
+    case Hexagon::MemOPw_ORr_V4 :
+    case Hexagon::MemOPh_ADDi_V4 :
+    case Hexagon::MemOPh_SUBi_V4 :
+    case Hexagon::MemOPh_ADDr_V4 :
+    case Hexagon::MemOPh_SUBr_V4 :
+    case Hexagon::MemOPh_ANDr_V4 :
+    case Hexagon::MemOPh_ORr_V4 :
+    case Hexagon::MemOPb_ADDi_V4 :
+    case Hexagon::MemOPb_SUBi_V4 :
+    case Hexagon::MemOPb_ADDr_V4 :
+    case Hexagon::MemOPb_SUBr_V4 :
+    case Hexagon::MemOPb_ANDr_V4 :
+    case Hexagon::MemOPb_ORr_V4 :
+    case Hexagon::MemOPb_SETBITi_V4:
+    case Hexagon::MemOPh_SETBITi_V4:
+    case Hexagon::MemOPw_SETBITi_V4:
+    case Hexagon::MemOPb_CLRBITi_V4:
+    case Hexagon::MemOPh_CLRBITi_V4:
+    case Hexagon::MemOPw_CLRBITi_V4:
+    return true;
   }
+  return false;
 }
 
 
@@ -2661,28 +2237,16 @@ isConditionalLoad (const MachineInstr* MI) const {
     case Hexagon::POST_LDriub_cPt :
     case Hexagon::POST_LDriub_cNotPt :
       return QRI.Subtarget.hasV4TOps();
-    case Hexagon::LDrid_indexed_cPt_V4 :
-    case Hexagon::LDrid_indexed_cNotPt_V4 :
     case Hexagon::LDrid_indexed_shl_cPt_V4 :
     case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
-    case Hexagon::LDrib_indexed_cPt_V4 :
-    case Hexagon::LDrib_indexed_cNotPt_V4 :
     case Hexagon::LDrib_indexed_shl_cPt_V4 :
     case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
-    case Hexagon::LDriub_indexed_cPt_V4 :
-    case Hexagon::LDriub_indexed_cNotPt_V4 :
     case Hexagon::LDriub_indexed_shl_cPt_V4 :
     case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
-    case Hexagon::LDrih_indexed_cPt_V4 :
-    case Hexagon::LDrih_indexed_cNotPt_V4 :
     case Hexagon::LDrih_indexed_shl_cPt_V4 :
     case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
-    case Hexagon::LDriuh_indexed_cPt_V4 :
-    case Hexagon::LDriuh_indexed_cNotPt_V4 :
     case Hexagon::LDriuh_indexed_shl_cPt_V4 :
     case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
-    case Hexagon::LDriw_indexed_cPt_V4 :
-    case Hexagon::LDriw_indexed_cNotPt_V4 :
     case Hexagon::LDriw_indexed_shl_cPt_V4 :
     case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
       return QRI.Subtarget.hasV4TOps();
@@ -2765,14 +2329,6 @@ isConditionalStore (const MachineInstr* MI) const {
       return QRI.Subtarget.hasV4TOps();
 
     // V4 global address store before promoting to dot new.
-    case Hexagon::STrid_GP_cPt_V4 :
-    case Hexagon::STrid_GP_cNotPt_V4 :
-    case Hexagon::STrib_GP_cPt_V4 :
-    case Hexagon::STrib_GP_cNotPt_V4 :
-    case Hexagon::STrih_GP_cPt_V4 :
-    case Hexagon::STrih_GP_cNotPt_V4 :
-    case Hexagon::STriw_GP_cPt_V4 :
-    case Hexagon::STriw_GP_cNotPt_V4 :
     case Hexagon::STd_GP_cPt_V4 :
     case Hexagon::STd_GP_cNotPt_V4 :
     case Hexagon::STb_GP_cPt_V4 :
@@ -2813,7 +2369,33 @@ isConditionalStore (const MachineInstr* MI) const {
   }
 }
 
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
+  return (isNewValueInst(MI) ||
+     (isPredicated(MI) && isPredicatedNew(MI)));
+}
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
+}
 
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+  assert((isExtendable(MI)||isConstExtended(MI)) &&
+                               "Instruction must be extendable");
+  // Find which operand is extendable.
+  short ExtOpNum = getCExtOpNum(MI);
+  MachineOperand &MO = MI->getOperand(ExtOpNum);
+  // This needs to be something we understand.
+  assert((MO.isMBB() || MO.isImm()) &&
+         "Branch with unknown extendable field type");
+  // Mark given operand as extended.
+  MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
 
 DFAPacketizer *HexagonInstrInfo::
 CreateTargetScheduleState(const TargetMachine *TM,
@@ -2840,3 +2422,155 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
 
   return false;
 }
+
+bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
+
+  // Constant extenders are allowed only for V4 and above.
+  if (!Subtarget.hasV4TOps())
+    return false;
+
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+  if (isExtended) // Instruction must be extended.
+    return true;
+
+  unsigned isExtendable = (F >> HexagonII::ExtendablePos)
+                          & HexagonII::ExtendableMask;
+  if (!isExtendable)
+    return false;
+
+  short ExtOpNum = getCExtOpNum(MI);
+  const MachineOperand &MO = MI->getOperand(ExtOpNum);
+  // Use MO operand flags to determine if MO
+  // has the HMOTF_ConstExtended flag set.
+  if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+    return true;
+  // If this is a Machine BB address we are talking about, and it is
+  // not marked as extended, say so.
+  if (MO.isMBB())
+    return false;
+
+  // We could be using an instruction with an extendable immediate and shoehorn
+  // a global address into it. If it is a global address it will be constant
+  // extended. We do this for COMBINE.
+  // We currently only handle isGlobal() because it is the only kind of
+  // object we are going to end up with here for now.
+  // In the future we probably should add isSymbol(), etc.
+  if (MO.isGlobal() || MO.isSymbol())
+    return true;
+
+  // If the extendable operand is not 'Immediate' type, the instruction should
+  // have 'isExtended' flag set.
+  assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+  int MinValue = getMinValue(MI);
+  int MaxValue = getMaxValue(MI);
+  int ImmValue = MO.getImm();
+
+  return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+                                         unsigned short OperandNum) const {
+  // Constant extenders are allowed only for V4 and above.
+  if (!Subtarget.hasV4TOps())
+    return false;
+
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+          == OperandNum;
+}
+
+// Returns Operand Index for the constant extended instruction.
+unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return -1 << (bits - 1);
+  else
+    return 0;
+}
+
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return ~(-1 << (bits - 1));
+  else
+    return ~(-1 << bits);
+}
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
+
+  short NonExtOpcode;
+  // Check if the instruction has a register form that uses register in place
+  // of the extended operand, if so return that as the non-extended form.
+  if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+    return true;
+
+  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+    // Check addressing mode and retreive non-ext equivalent instruction.
+
+    switch (getAddrMode(MI)) {
+    case HexagonII::Absolute :
+      // Load/store with absolute addressing mode can be converted into
+      // base+offset mode.
+      NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
+      break;
+    case HexagonII::BaseImmOffset :
+      // Load/store with base+offset addressing mode can be converted into
+      // base+register offset addressing mode. However left shift operand should
+      // be set to 0.
+      NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+      break;
+    default:
+      return false;
+    }
+    if (NonExtOpcode < 0)
+      return false;
+    return true;
+  }
+  return false;
+}
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
+
+  // Check if the instruction has a register form that uses register in place
+  // of the extended operand, if so return that as the non-extended form.
+  short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+    if (NonExtOpcode >= 0)
+      return NonExtOpcode;
+
+  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+    // Check addressing mode and retreive non-ext equivalent instruction.
+    switch (getAddrMode(MI)) {
+    case HexagonII::Absolute :
+      return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+    case HexagonII::BaseImmOffset :
+      return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+    default:
+      return -1;
+    }
+  }
+  return -1;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2bb53f899ce1..5df13a88b5d3 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,8 +16,8 @@
 
 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 
 #define GET_INSTRINFO_HEADER
@@ -66,6 +66,10 @@ public:
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
 
+  virtual bool analyzeCompare(const MachineInstr *MI,
+                              unsigned &SrcReg, unsigned &SrcReg2,
+                              int &Mask, int &Value) const;
+
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
@@ -123,6 +127,7 @@ public:
                                    const BranchProbability &Probability) const;
 
   virtual bool isPredicated(const MachineInstr *MI) const;
+  virtual bool isPredicatedNew(const MachineInstr *MI) const;
   virtual bool DefinesPredicate(MachineInstr *MI,
                                 std::vector<MachineOperand> &Pred) const;
   virtual bool
@@ -136,6 +141,11 @@ public:
   isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
                             const BranchProbability &Probability) const;
 
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
   virtual DFAPacketizer*
   CreateTargetScheduleState(const TargetMachine *TM,
                             const ScheduleDAG *DAG) const;
@@ -165,6 +175,8 @@ public:
   bool isConditionalALU32 (const MachineInstr* MI) const;
   bool isConditionalLoad (const MachineInstr* MI) const;
   bool isConditionalStore(const MachineInstr* MI) const;
+  bool isNewValueInst(const MachineInstr* MI) const;
+  bool isDotNewInst(const MachineInstr* MI) const;
   bool isDeallocRet(const MachineInstr *MI) const;
   unsigned getInvertedPredicatedOpcode(const int Opc) const;
   bool isExtendable(const MachineInstr* MI) const;
@@ -173,9 +185,18 @@ public:
   bool isNewValueStore(const MachineInstr* MI) const;
   bool isNewValueJump(const MachineInstr* MI) const;
   bool isNewValueJumpCandidate(const MachineInstr *MI) const;
-  unsigned getImmExtForm(const MachineInstr* MI) const;
-  unsigned getNormalBranchForm(const MachineInstr* MI) const;
 
+
+  void immediateExtend(MachineInstr *MI) const;
+  bool isConstExtended(MachineInstr *MI) const;
+  unsigned getAddrMode(const MachineInstr* MI) const;
+  bool isOperandExtended(const MachineInstr *MI,
+                         unsigned short OperandNum) const;
+  unsigned short getCExtOpNum(const MachineInstr *MI) const;
+  int getMinValue(const MachineInstr *MI) const;
+  int getMaxValue(const MachineInstr *MI) const;
+  bool NonExtEquivalentExists (const MachineInstr *MI) const;
+  short getNonExtOpcode(const MachineInstr *MI) const;
 private:
   int getMatchingCondBranchOpcode(int Opc, bool sense) const;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 1d4a7060adf0..74dc0ca72a04 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -12,88 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 include "HexagonInstrFormats.td"
-include "HexagonImmediates.td"
-
-//===----------------------------------------------------------------------===//
-// Classes used for relation maps.
-//===----------------------------------------------------------------------===//
-// PredRel - Filter class used to relate non-predicated instructions with their
-// predicated forms.
-class PredRel;
-// PredNewRel - Filter class used to relate predicated instructions with their
-// predicate-new forms.
-class PredNewRel: PredRel;
-// ImmRegRel - Filter class used to relate instructions having reg-reg form
-// with their reg-imm counterparts.
-class ImmRegRel;
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Predicate Definitions.
-//===----------------------------------------------------------------------===//
-def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
-def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
-def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
-def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
-def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
-def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
-def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
-def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
-def HasV5T                      : Predicate<"Subtarget.hasV5TOps()">;
-def NoV5T                       : Predicate<"!Subtarget.hasV5TOps()">;
-def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
-def IEEERndNearV5T              : Predicate<"Subtarget.modeIEEERndNear()">;
-
-// Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
-
-// Address operands.
-def MEMrr : Operand<i32> {
-  let PrintMethod = "printMEMrrOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-// Address operands
-def MEMri : Operand<i32> {
-  let PrintMethod = "printMEMriOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
-  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
-  let PrintMethod = "printMEMriOperand";
-  let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
-  let PrintMethod = "printFrameIndexOperand";
-  let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-let PrintMethod = "printGlobalOperand" in
-  def globaladdress : Operand<i32>;
-
-let PrintMethod = "printJumpTable" in
- def jumptablebase : Operand<i32>;
-
-def brtarget : Operand<OtherVT>;
-def calltarget : Operand<i32>;
-
-def bblabel : Operand<i32>;
-def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
-
-def symbolHi32 : Operand<i32> {
-  let PrintMethod = "printSymbolHi";
-}
-def symbolLo32 : Operand<i32> {
-  let PrintMethod = "printSymbolLo";
-}
+include "HexagonOperands.td"
 
 // Multi-class for logical operators.
 multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
@@ -122,40 +41,54 @@ multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
                        (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
 }
 
-multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
-  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), s10ImmPred:$c))]>;
+multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
+  let CextOpcode = CextOp in {
+    let InputType = "reg" in
+    def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+    let isExtendable = 1, opExtendable = 2, isExtentSigned = 1,
+    opExtentBits = 10, InputType = "imm" in
+    def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>;
+  }
 }
 
-multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
-  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), u9ImmPred:$c))]>;
+multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
+  let CextOpcode = CextOp in {
+    let InputType = "reg" in
+    def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+    let isExtendable = 1, opExtendable = 2, isExtentSigned = 0,
+    opExtentBits = 9, InputType = "imm" in
+    def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>;
+  }
 }
 
 multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Imm:$c),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
                  !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
                  [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
-                                                   u8ImmPred:$c))]>;
+                                                   u8ExtPred:$c))]>;
 }
 
 multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
                  !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
                  [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
-                                                   s8ImmPred:$c))]>;
+                                                   s8ExtPred:$c))]>;
 }
 }
 
@@ -165,8 +98,8 @@ multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
 multiclass ALU32_Pbase<string mnemonic, bit isNot,
                        bit isPredNew> {
 
-  let PNewValue = #!if(isPredNew, "new", "") in
-  def #NAME# : ALU32_rr<(outs IntRegs:$dst),
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : ALU32_rr<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
             ") $dst = ")#mnemonic#"($src2, $src3)",
@@ -174,10 +107,10 @@ multiclass ALU32_Pbase<string mnemonic, bit isNot,
 }
 
 multiclass ALU32_Pred<string mnemonic, bit PredNot> {
-  let PredSense = #!if(PredNot, "false", "true") in {
-    defm _c#NAME# : ALU32_Pbase<mnemonic, PredNot, 0>;
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
     // Predicate new
-    defm _cdn#NAME# : ALU32_Pbase<mnemonic, PredNot, 1>;
+    defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
   }
 }
 
@@ -185,7 +118,7 @@ let InputType = "reg" in
 multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in {
     let isPredicable = 1 in
-    def #NAME# : ALU32_rr<(outs IntRegs:$dst),
+    def NAME : ALU32_rr<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = "#mnemonic#"($src1, $src2)",
             [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
@@ -211,33 +144,35 @@ defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
 // ALU32/ALU (ADD with register-immediate form)
 //===----------------------------------------------------------------------===//
 multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
-  let PNewValue = #!if(isPredNew, "new", "") in
-  def #NAME# : ALU32_ri<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, s8Imm: $src3),
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
             ") $dst = ")#mnemonic#"($src2, #$src3)",
             []>;
 }
 
 multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
-  let PredSense = #!if(PredNot, "false", "true") in {
-    defm _c#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 0>;
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
     // Predicate new
-    defm _cdn#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 1>;
+    defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
   }
 }
 
-let InputType = "imm" in
+let isExtendable = 1, InputType = "imm" in
 multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in {
-    let isPredicable = 1 in
-    def #NAME# : ALU32_ri<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s16Imm:$src2),
+    let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
+    isPredicable = 1 in
+    def NAME : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s16Ext:$src2),
             "$dst = "#mnemonic#"($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
-                                              (s16ImmPred:$src2)))]>;
+                                              (s16ExtPred:$src2)))]>;
 
-    let neverHasSideEffects = 1, isPredicated = 1 in {
+    let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+    neverHasSideEffects = 1, isPredicated = 1 in {
       defm Pt : ALU32ri_Pred<mnemonic, 0>;
       defm NotPt : ALU32ri_Pred<mnemonic, 1>;
     }
@@ -246,23 +181,26 @@ multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
 
 defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
 
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "OR", InputType = "imm" in
 def OR_ri : ALU32_ri<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s10Imm:$src2),
+            (ins IntRegs:$src1, s10Ext:$src2),
             "$dst = or($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
-                                          s10ImmPred:$src2))]>;
+                                          s10ExtPred:$src2))]>, ImmRegRel;
 
 def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
             (ins IntRegs:$src1),
             "$dst = not($src1)",
             [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
 
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+InputType = "imm", CextOpcode = "AND" in
 def AND_ri : ALU32_ri<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s10Imm:$src2),
+            (ins IntRegs:$src1, s10Ext:$src2),
             "$dst = and($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
-                                           s10ImmPred:$src2))]>;
-
+                                           s10ExtPred:$src2))]>, ImmRegRel;
 // Negate.
 def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
           "$dst = neg($src1)",
@@ -274,27 +212,138 @@ def NOP : ALU32_rr<(outs), (ins),
           []>;
 
 // Rd32=sub(#s10,Rs32)
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "SUB", InputType = "imm" in
 def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
-            (ins s10Imm:$src1, IntRegs:$src2),
+            (ins s10Ext:$src1, IntRegs:$src2),
             "$dst = sub(#$src1, $src2)",
-            [(set IntRegs:$dst, (sub s10ImmPred:$src1, IntRegs:$src2))]>;
-
-// Transfer immediate.
-let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in
-def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
-           "$dst = #$src1",
-           [(set (i32 IntRegs:$dst), s16ImmPred:$src1)]>;
-
-// Transfer register.
-let neverHasSideEffects = 1, isPredicable = 1 in
-def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
-          "$dst = $src1",
-          []>;
+            [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
+            ImmRegRel;
 
-let neverHasSideEffects = 1, isPredicable = 1 in
-def TFR64 : ALU32_ri<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-          "$dst = $src1",
-          []>;
+
+multiclass TFR_Pred<bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
+                           (ins PredRegs:$src1, IntRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
+            []>;
+    // Predicate new
+    let PNewValue = "new" in
+    def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+                             (ins PredRegs:$src1, IntRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
+            []>;
+  }
+}
+
+let InputType = "reg", neverHasSideEffects = 1 in
+multiclass TFR_base<string CextOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let isPredicable = 1 in
+    def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+            "$dst = $src1",
+            []>;
+
+    let  isPredicated = 1 in {
+      defm Pt : TFR_Pred<0>;
+      defm NotPt : TFR_Pred<1>;
+    }
+  }
+}
+
+class T_TFR64_Pred<bit PredNot, bit isPredNew>
+            : ALU32_rr<(outs DoubleRegs:$dst),
+                       (ins PredRegs:$src1, DoubleRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#
+            !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []>
+{
+    bits<5> dst;
+    bits<2> src1;
+    bits<5> src2;
+
+    let IClass = 0b1111;
+    let Inst{27-24} = 0b1101;
+    let Inst{13} = isPredNew;
+    let Inst{7} = PredNot;
+    let Inst{4-0} = dst;
+    let Inst{6-5} = src1;
+    let Inst{20-17} = src2{4-1};
+    let Inst{16} = 0b1;
+    let Inst{12-9} = src2{4-1};
+    let Inst{8} = 0b0;
+}
+
+multiclass TFR64_Pred<bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    def _c#NAME : T_TFR64_Pred<PredNot, 0>;
+
+    let PNewValue = "new" in
+    def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
+  }
+}
+
+let neverHasSideEffects = 1 in
+multiclass TFR64_base<string BaseName> {
+  let BaseOpcode = BaseName in {
+    let isPredicable = 1 in
+    def NAME : ALU32Inst <(outs DoubleRegs:$dst),
+                          (ins DoubleRegs:$src1),
+                          "$dst = $src1" > {
+        bits<5> dst;
+        bits<5> src1;
+
+        let IClass = 0b1111;
+        let Inst{27-23} = 0b01010;
+        let Inst{4-0} = dst;
+        let Inst{20-17} = src1{4-1};
+        let Inst{16} = 0b1;
+        let Inst{12-9} = src1{4-1};
+        let Inst{8} = 0b0;
+    }
+
+    let  isPredicated = 1 in {
+      defm Pt : TFR64_Pred<0>;
+      defm NotPt : TFR64_Pred<1>;
+    }
+  }
+}
+
+multiclass TFRI_Pred<bit PredNot> {
+  let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+    def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
+                           (ins PredRegs:$src1, s12Ext:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
+            []>;
+
+    // Predicate new
+    let PNewValue = "new" in
+    def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
+                             (ins PredRegs:$src1, s12Ext:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
+            []>;
+  }
+}
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in
+multiclass TFRI_base<string CextOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
+    let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16,
+    isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in
+    def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+            "$dst = #$src1",
+            [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
+
+    let opExtendable = 2,  opExtentBits = 12, neverHasSideEffects = 1,
+    isPredicated = 1 in {
+      defm Pt    : TFRI_Pred<0>;
+      defm NotPt : TFRI_Pred<1>;
+    }
+  }
+}
+
+defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR64 : TFR64_base<"TFR64">, PredNewRel;
 
 // Transfer control register.
 let neverHasSideEffects = 1 in
@@ -311,17 +360,50 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
 //===----------------------------------------------------------------------===//
 
 // Combine.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = combine($src1, $src2)",
-            []>;
 
-let neverHasSideEffects = 1 in
-def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst),
-            (ins s8Imm:$src1, s8Imm:$src2),
-            "$dst = combine(#$src1, #$src2)",
-            []>;
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+  SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineRR :
+  SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
+                                                       IntRegs:$src2),
+  "$dst = combine($src1, $src2)",
+  [(set (i64 DoubleRegs:$dst),
+        (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+                                      (i32 IntRegs:$src2))))]>;
+
+// Rd=combine(Rt.[HL], Rs.[HL])
+class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
+                                                   (ins IntRegs:$src1,
+                                                        IntRegs:$src2),
+  "$dst = combine($src1."# A #", $src2."# B #")", []>;
+
+let isPredicable = 1 in {
+  def COMBINE_hh : COMBINE_halves<"H", "H">;
+  def COMBINE_hl : COMBINE_halves<"H", "L">;
+  def COMBINE_lh : COMBINE_halves<"L", "H">;
+  def COMBINE_ll : COMBINE_halves<"L", "L">;
+}
+
+def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
+  (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
+              (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+  ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+  "$dst = combine(#$src1, #$src2)",
+  [(set (i64 DoubleRegs:$dst),
+        (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
 
 // Mux.
 def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
@@ -330,66 +412,92 @@ def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
             "$dst = vmux($src1, $src2, $src3)",
             []>;
 
+let CextOpcode = "MUX", InputType = "reg" in
 def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
                                             IntRegs:$src2, IntRegs:$src3),
              "$dst = mux($src1, $src2, $src3)",
-             [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    (i32 IntRegs:$src2),
-                                                    (i32 IntRegs:$src3))))]>;
+             [(set (i32 IntRegs:$dst),
+                   (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+                                (i32 IntRegs:$src3))))]>, ImmRegRel;
 
-def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
                                                 IntRegs:$src3),
              "$dst = mux($src1, #$src2, $src3)",
-             [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    s8ImmPred:$src2,
-                                                    (i32 IntRegs:$src3))))]>;
+             [(set (i32 IntRegs:$dst),
+                   (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2,
+                                (i32 IntRegs:$src3))))]>, ImmRegRel;
 
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
 def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
-                                                s8Imm:$src3),
+                                                s8Ext:$src3),
              "$dst = mux($src1, $src2, #$src3)",
-             [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    (i32 IntRegs:$src2),
-                                                    s8ImmPred:$src3)))]>;
+             [(set (i32 IntRegs:$dst),
+                   (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+                                 s8ExtPred:$src3)))]>, ImmRegRel;
 
-def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
                                                 s8Imm:$src3),
              "$dst = mux($src1, #$src2, #$src3)",
              [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    s8ImmPred:$src2,
+                                                    s8ExtPred:$src2,
                                                     s8ImmPred:$src3)))]>;
 
-// Shift halfword.
-let isPredicable = 1 in
-def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = aslh($src1)",
-           [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>;
+// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth
+multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> {
+  let isPredicatedNew = isPredNew in
+  def NAME : ALU32Inst<(outs IntRegs:$dst),
+                       (ins PredRegs:$src1, IntRegs:$src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
+            ") $dst = ")#mnemonic#"($src2)">,
+            Requires<[HasV4T]>;
+}
 
-let isPredicable = 1 in
-def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = asrh($src1)",
-           [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>;
+multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> {
+  let isPredicatedFalse = PredNot in {
+    defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>;
+  }
+}
 
-// Sign extend.
-let isPredicable = 1 in
-def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = sxtb($src1)",
-           [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>;
+multiclass ALU32_2op_base<string mnemonic> {
+  let BaseOpcode = mnemonic in {
+    let isPredicable = 1, neverHasSideEffects = 1 in
+    def NAME : ALU32Inst<(outs IntRegs:$dst),
+                         (ins IntRegs:$src1),
+            "$dst = "#mnemonic#"($src1)">;
+
+    let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1,
+    neverHasSideEffects = 1 in {
+      defm Pt_V4    : ALU32_2op_Pred<mnemonic, 0>;
+      defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>;
+    }
+  }
+}
 
-let isPredicable = 1 in
-def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = sxth($src1)",
-           [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>;
-
-// Zero extend.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-           "$dst = zxtb($src1)",
-           []>;
+defm ASLH : ALU32_2op_base<"aslh">, PredNewRel;
+defm ASRH : ALU32_2op_base<"asrh">, PredNewRel;
+defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel;
+defm SXTH : ALU32_2op_base<"sxth">,  PredNewRel;
+defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel;
+defm ZXTH : ALU32_2op_base<"zxth">,  PredNewRel;
+
+def : Pat <(shl (i32 IntRegs:$src1), (i32 16)),
+           (ASLH IntRegs:$src1)>;
+
+def : Pat <(sra (i32 IntRegs:$src1), (i32 16)),
+           (ASRH IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i8),
+           (SXTB IntRegs:$src1)>;
+
+def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
+           (SXTH IntRegs:$src1)>;
 
-let isPredicable = 1, neverHasSideEffects = 1 in
-def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-                    "$dst = zxth($src1)",
-                    []>;
 //===----------------------------------------------------------------------===//
 // ALU32/PERM -
 //===----------------------------------------------------------------------===//
@@ -400,98 +508,66 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
 //===----------------------------------------------------------------------===//
 
 // Conditional combine.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1 in {
 def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if ($src1) $dst = combine($src2, $src3)",
             []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedFalse = 1 in
 def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if (!$src1) $dst = combine($src2, $src3)",
             []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1 in
 def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if ($src1.new) $dst = combine($src2, $src3)",
             []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
+let isPredicatedNew = 1, isPredicatedFalse = 1 in
 def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "if (!$src1.new) $dst = combine($src2, $src3)",
             []>;
+}
 
-// Conditional transfer.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
-              "if ($src1) $dst = $src2",
-              []>;
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                    IntRegs:$src2),
-                 "if (!$src1) $dst = $src2",
-                 []>;
+def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+    "$dst = cl0($src1)",
+    [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR64_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
-                                                   DoubleRegs:$src2),
-              "if ($src1) $dst = $src2",
-              []>;
+def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+    "$dst = ct0($src1)",
+    [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR64_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
-                                                    DoubleRegs:$src2),
-                 "if (!$src1) $dst = $src2",
-                 []>;
+def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+    "$dst = cl0($src1)",
+    [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
-               "if ($src1) $dst = #$src2",
-               []>;
+def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+    "$dst = ct0($src1)",
+    [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                     s12Imm:$src2),
-                  "if (!$src1) $dst = #$src2",
-                  []>;
+def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+    "$dst = tstbit($src1, $src2)",
+    [(set (i1 PredRegs:$dst),
+          (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                   IntRegs:$src2),
-                "if ($src1.new) $dst = $src2",
-                []>;
+def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+    "$dst = tstbit($src1, $src2)",
+    [(set (i1 PredRegs:$dst),
+          (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                      IntRegs:$src2),
-                   "if (!$src1.new) $dst = $src2",
-                   []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                    s12Imm:$src2),
-                 "if ($src1.new) $dst = #$src2",
-                 []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                       s12Imm:$src2),
-                    "if (!$src1.new) $dst = #$src2",
-                    []>;
-
-// Compare.
-defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
-defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
-defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
-defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
-defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
-defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
 //===----------------------------------------------------------------------===//
 // ALU32/PRED -
 //===----------------------------------------------------------------------===//
@@ -608,11 +684,6 @@ def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
 
 // Subtract halfword.
 
-// Transfer register.
-let neverHasSideEffects = 1 in
-def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-             "$dst = $src1",
-             []>;
 //===----------------------------------------------------------------------===//
 // ALU64/ALU -
 //===----------------------------------------------------------------------===//
@@ -784,7 +855,7 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
 // JR +
 //===----------------------------------------------------------------------===//
 def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
-                               [SDNPHasChain, SDNPOptInGlue]>;
+                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 // Jump to address from register.
 let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
@@ -818,241 +889,218 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
 // LD +
 //===----------------------------------------------------------------------===//
 ///
-/// Make sure that in post increment load, the first operand is always the post
-/// increment operand.
-///
-// Load doubleword.
-let isPredicable = 1 in
-def LDrid : LDInst<(outs DoubleRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memd($addr)",
-            [(set (i64 DoubleRegs:$dst), (i64 (load ADDRriS11_3:$addr)))]>;
-
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, s11_3Imm:$offset),
-            "$dst = memd($src1+#$offset)",
-            [(set (i64 DoubleRegs:$dst),
-                  (i64 (load (add (i32 IntRegs:$src1),
-                                  s11_3ImmPred:$offset))))]>;
+// Load -- MEMri operand
+multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
+                          bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : LDInst2<(outs RC:$dst),
+                       (ins PredRegs:$src1, MEMri:$addr),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($addr)",
+            []>;
+}
 
-let neverHasSideEffects = 1 in
-def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memd(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
+multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
 
-let neverHasSideEffects = 1 in
-def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memd(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrid : LDInst2PI<(outs DoubleRegs:$dst, IntRegs:$dst2),
-            (ins IntRegs:$src1, s4Imm:$offset),
-            "$dst = memd($src1++#$offset)",
-            [],
-            "$src1 = $dst2">;
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+                    bits<5> ImmBits, bits<5> PredImmBits> {
 
-// Load doubleword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memd($addr)",
-            []>;
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+        isPredicable = 1 in
+      def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
+                   "$dst = "#mnemonic#"($addr)",
+                   []>;
 
+    let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+        isPredicated = 1 in {
+      defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >;
+      defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >;
+    }
+  }
+}
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memd($addr)",
-            []>;
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+  defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel;
+  defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel;
+  defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel;
+  defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel;
+  defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel;
+  defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if ($src1) $dst = memd($src2+#$src3)",
-            []>;
+def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
+            (LDrib ADDRriS11_0:$addr) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if (!$src1) $dst = memd($src2+#$src3)",
-            []>;
+def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
+            (LDriub ADDRriS11_0:$addr) >;
 
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
-            "if ($src1) $dst1 = memd($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
+def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
+            (LDrih ADDRriS11_1:$addr) >;
 
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cNotPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
-            "if (!$src1) $dst1 = memd($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
+def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
+            (LDriuh ADDRriS11_1:$addr) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cdnPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memd($addr)",
-            []>;
+def : Pat < (i32 (load ADDRriS11_2:$addr)),
+            (LDriw ADDRriS11_2:$addr) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memd($addr)",
-            []>;
+def : Pat < (i64 (load ADDRriS11_3:$addr)),
+            (LDrid ADDRriS11_3:$addr) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cdnPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if ($src1.new) $dst = memd($src2+#$src3)",
-            []>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if (!$src1.new) $dst = memd($src2+#$src3)",
+// Load - Base with Immediate offset addressing mode
+multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : LDInst2<(outs RC:$dst),
+                     (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($src2+#$src3)",
             []>;
+}
 
+multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+  }
+}
 
-// Load byte.
-let isPredicable = 1 in
-def LDrib : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memb($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (sextloadi8 ADDRriS11_0:$addr)))]>;
-
-// Load byte any-extend.
-def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
-            (i32 (LDrib ADDRriS11_0:$addr)) >;
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+                   bits<5> PredImmBits> {
 
-// Indexed load byte.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrib_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_0Imm:$offset),
-            "$dst = memb($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (sextloadi8 (add (i32 IntRegs:$src1),
-                                        s11_0ImmPred:$offset))))]>;
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+        isPredicable = 1, AddedComplexity = 20 in
+      def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+                   "$dst = "#mnemonic#"($src1+#$offset)",
+                   []>;
 
-// Indexed load byte any-extend.
-let AddedComplexity = 20 in
-def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
-            (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
+    let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+        isPredicated = 1 in {
+      defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >;
+      defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >;
+    }
+  }
+}
 
-let neverHasSideEffects = 1 in
-def LDrib_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memb(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
+let addrMode = BaseImmOffset in {
+  defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext,
+                               11, 6>, AddrModeRel;
+  defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext,
+                                11, 6>, AddrModeRel;
+  defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext,
+                               12, 7>, AddrModeRel;
+  defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext,
+                                12, 7>, AddrModeRel;
+  defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext,
+                               13, 8>, AddrModeRel;
+  defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext,
+                               14, 9>, AddrModeRel;
+}
 
-let neverHasSideEffects = 1 in
-def LDb_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memb(#$global)",
-            []>,
-            Requires<[NoV4T]>;
+let AddedComplexity = 20 in {
+def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+            (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
 
-let neverHasSideEffects = 1 in
-def LDub_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memub(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrib : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
-            (ins IntRegs:$src1, s4Imm:$offset),
-            "$dst = memb($src1++#$offset)",
-            [],
-            "$src1 = $dst2">;
+def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+            (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
 
-// Load byte conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memb($addr)",
-            []>;
+def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+            (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memb($addr)",
-            []>;
+def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+            (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1) $dst = memb($src2+#$src3)",
-            []>;
+def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
+            (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1) $dst = memb($src2+#$src3)",
-            []>;
+def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
+            (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
+}
 
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if ($src1) $dst1 = memb($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
+//===----------------------------------------------------------------------===//
+// Post increment load
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//===----------------------------------------------------------------------===//
 
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if (!$src1) $dst1 = memb($src2++#$src3)",
+multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+                            bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+                       (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
             [],
             "$src2 = $dst2">;
+}
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memb($addr)",
-            []>;
+multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
+                           Operand ImmOp, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+    // Predicate new
+    let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+    defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+  }
+}
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memb($addr)",
-            []>;
+multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+                      Operand ImmOp> {
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1.new) $dst = memb($src2+#$src3)",
-            []>;
+  let BaseOpcode = "POST_"#BaseOp in {
+    let isPredicable = 1 in
+    def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+                         (ins IntRegs:$src1, ImmOp:$offset),
+                 "$dst = "#mnemonic#"($src1++#$offset)",
+                 [],
+                 "$src1 = $dst2">;
+
+    let isPredicated = 1 in {
+      defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+      defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+    }
+  }
+}
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1.new) $dst = memb($src2+#$src3)",
-            []>;
+let hasCtrlDep = 1, neverHasSideEffects = 1 in {
+  defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>,
+                    PredNewRel;
+  defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>,
+                    PredNewRel;
+  defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>,
+                    PredNewRel;
+  defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>,
+                    PredNewRel;
+  defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>,
+                    PredNewRel;
+  defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>,
+                    PredNewRel;
+}
 
+def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)),
+           (i32 (LDrib ADDRriS11_0:$addr)) >;
 
-// Load halfword.
-let isPredicable = 1 in
-def LDrih : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memh($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (sextloadi16 ADDRriS11_1:$addr)))]>;
+// Load byte any-extend.
+def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
+            (i32 (LDrib ADDRriS11_0:$addr)) >;
 
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrih_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_1Imm:$offset),
-            "$dst = memh($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (sextloadi16 (add (i32 IntRegs:$src1),
-                                         s11_1ImmPred:$offset))))]>;
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
+            (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >;
 
 def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
             (i32 (LDrih ADDRriS11_1:$addr))>;
@@ -1061,399 +1109,25 @@ let AddedComplexity = 20 in
 def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))),
             (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >;
 
-let neverHasSideEffects = 1 in
-def LDrih_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memh(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def LDh_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memh(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def LDuh_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memuh(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrih : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
-            (ins IntRegs:$src1, s4Imm:$offset),
-            "$dst = memh($src1++#$offset)",
-            [],
-            "$src1 = $dst2">;
-
-// Load halfword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1) $dst = memh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1) $dst = memh($src2+#$src3)",
-            []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if ($src1) $dst1 = memh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if (!$src1) $dst1 = memh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1.new) $dst = memh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1.new) $dst = memh($src2+#$src3)",
-            []>;
-
-// Load unsigned byte.
-let isPredicable = 1 in
-def LDriub : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memub($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (zextloadi8 ADDRriS11_0:$addr)))]>;
-
+let AddedComplexity = 10 in
 def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
             (i32 (LDriub ADDRriS11_0:$addr))>;
 
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriub_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_0Imm:$offset),
-            "$dst = memub($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (zextloadi8 (add (i32 IntRegs:$src1),
-                                        s11_0ImmPred:$offset))))]>;
-
 let AddedComplexity = 20 in
 def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
             (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
 
-let neverHasSideEffects = 1 in
-def LDriub_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memub(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriub : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
-            (ins IntRegs:$src1, s4Imm:$offset),
-            "$dst = memub($src1++#$offset)",
-            [],
-            "$src1 = $dst2">;
-
-// Load unsigned byte conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1) $dst = memub($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1) $dst = memub($src2+#$src3)",
-            []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if ($src1) $dst1 = memub($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if (!$src1) $dst1 = memub($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1.new) $dst = memub($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1.new) $dst = memub($src2+#$src3)",
-            []>;
-
-// Load unsigned halfword.
-let isPredicable = 1 in
-def LDriuh : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memuh($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (zextloadi16 ADDRriS11_1:$addr)))]>;
-
-// Indexed load unsigned halfword.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_1Imm:$offset),
-            "$dst = memuh($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (zextloadi16 (add (i32 IntRegs:$src1),
-                                         s11_1ImmPred:$offset))))]>;
-
-let neverHasSideEffects = 1 in
-def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memuh(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriuh : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
-            (ins IntRegs:$src1, s4Imm:$offset),
-            "$dst = memuh($src1++#$offset)",
-            [],
-            "$src1 = $dst2">;
-
-// Load unsigned halfword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1) $dst = memuh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1) $dst = memuh($src2+#$src3)",
-            []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if ($src1) $dst1 = memuh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if (!$src1) $dst1 = memuh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1.new) $dst = memuh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1.new) $dst = memuh($src2+#$src3)",
-            []>;
-
-
-// Load word.
-let isPredicable = 1 in
-def LDriw : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr), "$dst = memw($addr)",
-            [(set IntRegs:$dst, (i32 (load ADDRriS11_2:$addr)))]>;
-
 // Load predicate.
-let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
-def LDriw_pred : LDInst<(outs PredRegs:$dst),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in
+def LDriw_pred : LDInst2<(outs PredRegs:$dst),
             (ins MEMri:$addr),
             "Error; should not emit",
             []>;
 
-// Indexed load.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriw_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_2Imm:$offset),
-            "$dst = memw($src1+#$offset)",
-            [(set IntRegs:$dst, (i32 (load (add IntRegs:$src1,
-                                           s11_2ImmPred:$offset))))]>;
-
-let neverHasSideEffects = 1 in
-def LDriw_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst = memw(#$global+$offset)",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def LDw_GP : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst = memw(#$global)",
-            []>,
-            Requires<[NoV4T]>;
-
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriw : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
-            (ins IntRegs:$src1, s4Imm:$offset),
-            "$dst = memw($src1++#$offset)",
-            [],
-            "$src1 = $dst2">;
-
-// Load word conditionally.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if ($src1) $dst = memw($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if (!$src1) $dst = memw($src2+#$src3)",
-            []>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
-            "if ($src1) $dst1 = memw($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
-            "if (!$src1) $dst1 = memw($src2++#$src3)",
-            [],
-            "$src2 = $dst2">;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if ($src1.new) $dst = memw($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if (!$src1.new) $dst = memw($src2+#$src3)",
-            []>;
-
 // Deallocate stack frame.
 let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
-  def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1),
+  def DEALLOCFRAME : LDInst2<(outs), (ins),
                      "deallocframe",
                      []>;
 }
@@ -1482,57 +1156,65 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
 //===----------------------------------------------------------------------===//
 // Multiply and use lower result.
 // Rd=+mpyi(Rs,#u8)
-def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2),
               "$dst =+ mpyi($src1, #$src2)",
               [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
-                                             u8ImmPred:$src2))]>;
+                                             u8ExtPred:$src2))]>;
 
 // Rd=-mpyi(Rs,#u8)
-def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
               "$dst =- mpyi($src1, #$src2)",
-              [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
-                                             n8ImmPred:$src2))]>;
+              [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1),
+                                                   u8ImmPred:$src2)))]>;
 
 // Rd=mpyi(Rs,#m9)
 // s9 is NOT the same as m9 - but it works.. so far.
 // Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
 // depending on the value of m9. See Arch Spec.
-def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
+CextOpcode = "MPYI", InputType = "imm" in
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
               "$dst = mpyi($src1, #$src2)",
               [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
-                                             s9ImmPred:$src2))]>;
+                                             s9ExtPred:$src2))]>, ImmRegRel;
 
 // Rd=mpyi(Rs,Rt)
+let CextOpcode = "MPYI", InputType = "reg" in
 def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
            "$dst = mpyi($src1, $src2)",
            [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
-                                          (i32 IntRegs:$src2)))]>;
+                                          (i32 IntRegs:$src2)))]>, ImmRegRel;
 
 // Rx+=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8,
+CextOpcode = "MPYI_acc", InputType = "imm" in
 def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+            (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
             "$dst += mpyi($src2, #$src3)",
             [(set (i32 IntRegs:$dst),
-                  (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3),
+                  (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3),
                        (i32 IntRegs:$src1)))],
-            "$src1 = $dst">;
+            "$src1 = $dst">, ImmRegRel;
 
 // Rx+=mpyi(Rs,Rt)
+let CextOpcode = "MPYI_acc", InputType = "reg" in
 def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "$dst += mpyi($src2, $src3)",
             [(set (i32 IntRegs:$dst),
                   (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
                        (i32 IntRegs:$src1)))],
-            "$src1 = $dst">;
+            "$src1 = $dst">, ImmRegRel;
 
 // Rx-=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in
 def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+            (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
             "$dst -= mpyi($src2, #$src3)",
             [(set (i32 IntRegs:$dst),
                   (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
-                                                 u8ImmPred:$src3)))],
+                                                 u8ExtPred:$src3)))],
             "$src1 = $dst">;
 
 // Multiply and use upper result.
@@ -1601,7 +1283,7 @@ def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
 // Rxx-=mpyu(Rs,Rt)
 def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
             (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "$dst += mpyu($src2, $src3)",
+            "$dst -= mpyu($src2, $src3)",
             [(set (i64 DoubleRegs:$dst),
                   (sub (i64 DoubleRegs:$src1),
                        (mul (i64 (anyext (i32 IntRegs:$src2))),
@@ -1609,37 +1291,43 @@ def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
             "$src1 = $dst">;
 
 
+let InputType = "reg", CextOpcode = "ADD_acc" in
 def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
                             IntRegs:$src2, IntRegs:$src3),
              "$dst += add($src2, $src3)",
              [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
                                                  (i32 IntRegs:$src3)),
                                             (i32 IntRegs:$src1)))],
-             "$src1 = $dst">;
+             "$src1 = $dst">, ImmRegRel;
 
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+InputType = "imm", CextOpcode = "ADD_acc" in
 def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
-                            IntRegs:$src2, s8Imm:$src3),
+                            IntRegs:$src2, s8Ext:$src3),
              "$dst += add($src2, #$src3)",
              [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
-                                                 s8ImmPred:$src3),
+                                                 s8_16ExtPred:$src3),
                                             (i32 IntRegs:$src1)))],
-             "$src1 = $dst">;
+             "$src1 = $dst">, ImmRegRel;
 
+let CextOpcode = "SUB_acc", InputType = "reg" in
 def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
                             IntRegs:$src2, IntRegs:$src3),
              "$dst -= add($src2, $src3)",
              [(set (i32 IntRegs:$dst),
                    (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
                                                   (i32 IntRegs:$src3))))],
-             "$src1 = $dst">;
+             "$src1 = $dst">, ImmRegRel;
 
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "SUB_acc", InputType = "imm" in
 def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
-                            IntRegs:$src2, s8Imm:$src3),
+                            IntRegs:$src2, s8Ext:$src3),
              "$dst -= add($src2, #$src3)",
              [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
                                             (add (i32 IntRegs:$src2),
-                                                 s8ImmPred:$src3)))],
-             "$src1 = $dst">;
+                                                 s8_16ExtPred:$src3)))],
+             "$src1 = $dst">, ImmRegRel;
 
 //===----------------------------------------------------------------------===//
 // MTYPE/MPYH -
@@ -1670,282 +1358,219 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
 // ST +
 //===----------------------------------------------------------------------===//
 ///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-///    post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-///    last operand.
-///
 // Store doubleword.
-let isPredicable = 1 in
-def STrid : STInst<(outs),
-            (ins MEMri:$addr, DoubleRegs:$src1),
-            "memd($addr) = $src1",
-            [(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr)]>;
-
-// Indexed store double word.
-let AddedComplexity = 10, isPredicable = 1 in
-def STrid_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_3Imm:$src2,  DoubleRegs:$src3),
-            "memd($src1+#$src2) = $src3",
-            [(store (i64 DoubleRegs:$src3),
-                    (add (i32 IntRegs:$src1), s11_3ImmPred:$src2))]>;
 
-let neverHasSideEffects = 1 in
-def STrid_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
-            "memd(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
+//===----------------------------------------------------------------------===//
+// Post increment store
+//===----------------------------------------------------------------------===//
 
-let neverHasSideEffects = 1 in
-def STd_GP : STInst2<(outs),
-            (ins globaladdress:$global, DoubleRegs:$src),
-            "memd(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STdri : STInstPI<(outs IntRegs:$dst),
-            (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
-            "memd($src2++#$offset) = $src1",
-            [(set IntRegs:$dst,
-            (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2),
-                        s4_3ImmPred:$offset))],
+multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+                            bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : STInst2PI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2++#$offset) = $src3",
+            [],
             "$src2 = $dst">;
+}
 
-// Store doubleword conditionally.
-// if ([!]Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if ($src1) memd($addr) = $src2",
-            []>;
+multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
+                           Operand ImmOp, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+    // Predicate new
+    let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+    defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+  }
+}
 
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if (!$src1) memd($addr) = $src2",
-            []>;
+let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+                      Operand ImmOp> {
 
-// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if ($src1) memd($src2+#$src3) = $src4",
-            []>;
+  let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in {
+    let isPredicable = 1 in
+    def NAME : STInst2PI<(outs IntRegs:$dst),
+                (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+                #mnemonic#"($src1++#$offset) = $src2",
+                [],
+                "$src1 = $dst">;
+
+    let isPredicated = 1 in {
+      defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+      defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+    }
+  }
+}
 
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if (!$src1) memd($src2+#$src3) = $src4",
-            []>;
+defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
 
-// if ([!]Pv) memd(Rx++#s4:3)=Rtt
-// if (Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
-                 s4_3Imm:$offset),
-            "if ($src1) memd($src3++#$offset) = $src2",
-            [],
-            "$src3 = $dst">;
-
-// if (!Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1,
-    isPredicated = 1 in
-def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
-                 s4_3Imm:$offset),
-            "if (!$src1) memd($src3++#$offset) = $src2",
-            [],
-            "$src3 = $dst">;
+let isNVStorable = 0 in
+defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel;
 
+def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
+                           s4_3ImmPred:$offset),
+          (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
 
-// Store byte.
-// memb(Rs+#s11:0)=Rt
-let isPredicable = 1 in
-def STrib : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memb($addr) = $src1",
-            [(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr)]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
-            "memb($src1+#$src2) = $src3",
-            [(truncstorei8 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
-                                                     s11_0ImmPred:$src2))]>;
-
-// memb(gp+#u16:0)=Rt
-let neverHasSideEffects = 1 in
-def STrib_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memb(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
+def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
+                            s4_3ImmPred:$offset),
+          (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
 
-// memb(#global)=Rt
-let neverHasSideEffects = 1 in
-def STb_GP : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memb(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-// memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
-                                                    IntRegs:$src2,
-                                                    s4Imm:$offset),
-            "memb($src2++#$offset) = $src1",
-            [(set IntRegs:$dst,
-            (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
-                            s4_0ImmPred:$offset))],
-            "$src2 = $dst">;
+def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
+          (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
 
-// Store byte conditionally.
-// if ([!]Pv) memb(Rs+#u6:0)=Rt
-// if (Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memb($addr) = $src2",
-            []>;
+def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
+                       s4_3ImmPred:$offset),
+          (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
 
-// if (!Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memb($addr) = $src2",
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with MEMri operand.
+//===----------------------------------------------------------------------===//
+multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+                          bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : STInst2<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($addr) = $src2",
             []>;
+}
 
-// if (Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1) memb($src2+#$src3) = $src4",
-            []>;
+multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
 
-// if (!Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memb($src2+#$src3) = $src4",
-            []>;
+    // Predicate new
+    let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+    defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
 
-// if ([!]Pv) memb(Rx++#s4:0)=Rt
-// if (Pv) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if ($src1) memb($src3++#$offset) = $src2",
-            [],"$src3 = $dst">;
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+                    bits<5> ImmBits, bits<5> PredImmBits> {
 
-// if (!Pv) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if (!$src1) memb($src3++#$offset) = $src2",
-            [],"$src3 = $dst">;
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+         isPredicable = 1 in
+    def NAME : STInst2<(outs),
+            (ins MEMri:$addr, RC:$src),
+            mnemonic#"($addr) = $src",
+            []>;
 
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+        isPredicated = 1 in {
+      defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>;
+      defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
 
-// Store halfword.
-// memh(Rs+#s11:1)=Rt
-let isPredicable = 1 in
-def STrih : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memh($addr) = $src1",
-            [(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr)]>;
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+  defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+  defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+  defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
 
+  let isNVStorable = 0 in
+  defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
 
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_1Imm:$src2,  IntRegs:$src3),
-            "memh($src1+#$src2) = $src3",
-            [(truncstorei16 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
-                                                      s11_1ImmPred:$src2))]>;
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
+          (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>;
 
-let neverHasSideEffects = 1 in
-def STrih_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memh(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
+          (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>;
 
-let neverHasSideEffects = 1 in
-def STh_GP   : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memh(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
+def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
+          (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>;
 
-// memh(Rx++#s4:1)=Rt.H
-// memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_SThri : STInstPI<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
-            "memh($src2++#$offset) = $src1",
-            [(set IntRegs:$dst,
-            (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
-                             s4_1ImmPred:$offset))],
-            "$src2 = $dst">;
+def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
+          (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>;
 
-// Store halfword conditionally.
-// if ([!]Pv) memh(Rs+#u6:1)=Rt
-// if (Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memh($addr) = $src2",
-            []>;
 
-// if (!Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memh($addr) = $src2",
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with base+immediate offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : STInst2<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+#$src3) = $src4",
             []>;
+}
 
-// if (Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1) memh($src2+#$src3) = $src4",
-            []>;
+multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+    defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
 
-// if (!Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memh($src2+#$src3) = $src4",
+    // Predicate new
+    let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+    defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+                   bits<5> PredImmBits> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+         isPredicable = 1 in
+    def NAME : STInst2<(outs),
+            (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+            mnemonic#"($src1+#$src2) = $src3",
             []>;
 
-// if ([!]Pv) memh(Rx++#s4:1)=Rt
-// if (Pv) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if ($src1) memh($src3++#$offset) = $src2",
-            [],"$src3 = $dst">;
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in {
+      defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>;
+      defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset, InputType = "reg" in {
+  defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext,
+                                u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel;
+  defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext,
+                                u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel;
+  defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext,
+                                u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel;
+  let isNVStorable = 0 in
+  defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+                                u6_3Ext, 14, 9>, AddrModeRel;
+}
+
+let AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
+                                                  s11_0ExtPred:$offset)),
+          (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset,
+                         (i32 IntRegs:$src1))>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
+                                                   s11_1ExtPred:$offset)),
+          (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset,
+                         (i32 IntRegs:$src1))>;
 
-// if (!Pv) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if (!$src1) memh($src3++#$offset) = $src2",
-            [],"$src3 = $dst">;
+def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
+                                           s11_2ExtPred:$offset)),
+          (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset,
+                         (i32 IntRegs:$src1))>;
 
+def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
+                                              s11_3ExtPred:$offset)),
+          (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset,
+                         (i64 DoubleRegs:$src1))>;
+}
+
+// memh(Rx++#s4:1)=Rt.H
 
 // Store word.
 // Store predicate.
@@ -1955,90 +1580,6 @@ def STriw_pred : STInst2<(outs),
             "Error; should not emit",
             []>;
 
-// memw(Rs+#s11:2)=Rt
-let isPredicable = 1 in
-def STriw : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memw($addr) = $src1",
-            [(store (i32 IntRegs:$src1), ADDRriS11_2:$addr)]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
-            "memw($src1+#$src2) = $src3",
-            [(store (i32 IntRegs:$src3),
-                    (add (i32 IntRegs:$src1), s11_2ImmPred:$src2))]>;
-
-let neverHasSideEffects = 1 in
-def STriw_GP : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memw(#$global+$offset) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let neverHasSideEffects = 1 in
-def STw_GP : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memw(#$global) = $src",
-            []>,
-            Requires<[NoV4T]>;
-
-let hasCtrlDep = 1, isPredicable = 1  in
-def POST_STwri : STInstPI<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
-            "memw($src2++#$offset) = $src1",
-            [(set IntRegs:$dst,
-            (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2),
-                        s4_2ImmPred:$offset))],
-            "$src2 = $dst">;
-
-// Store word conditionally.
-// if ([!]Pv) memw(Rs+#u6:2)=Rt
-// if (Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memw($addr) = $src2",
-            []>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memw($addr) = $src2",
-            []>;
-
-// if (Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1) memw($src2+#$src3) = $src4",
-            []>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memw($src2+#$src3) = $src4",
-            []>;
-
-// if ([!]Pv) memw(Rx++#s4:2)=Rt
-// if (Pv) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if ($src1) memw($src3++#$offset) = $src2",
-            [],"$src3 = $dst">;
-
-// if (!Pv) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if (!$src1) memw($src3++#$offset) = $src2",
-            [],"$src3 = $dst">;
-
-
-
 // Allocate stack frame.
 let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
   def ALLOCFRAME : STInst2<(outs),
@@ -2241,7 +1782,7 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
                            [SDNPHasChain]>;
 
-let hasSideEffects = 1, isHexagonSolo = 1 in
+let hasSideEffects = 1, isSolo = 1 in
 def BARRIER : SYSInst<(outs), (ins),
                      "barrier",
                      [(HexagonBARRIER)]>;
@@ -2316,9 +1857,9 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
 
 let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
     Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset),
-                      ":endloop0",
-                      []>;
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+                       ":endloop0",
+                       []>;
 }
 
 // Support for generating global address.
@@ -2406,6 +1947,10 @@ def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global),
                        "$dst = CONST32(#$global)",
                        [(set (i32 IntRegs:$dst), imm:$global) ]>;
 
+// Map BlockAddress lowering to CONST32_Int_Real
+def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
+          (CONST32_Int_Real tblockaddress:$addr)>;
+
 let isReMaterializable = 1, isMoveImm = 1 in
 def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
                     "$dst = CONST32($label)",
@@ -2509,68 +2054,26 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)),
 
 // Atomic load and store support
 // 8 bit atomic load
-def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i32 (LDub_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                              u16ImmPred:$offset)),
-          (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_8 ADDRriS11_0:$src1),
           (i32 (LDriub ADDRriS11_0:$src1))>;
 
 def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)),
           (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>;
 
-
-
 // 16 bit atomic load
-def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i32 (LDuh_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-          (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_16 ADDRriS11_1:$src1),
           (i32 (LDriuh ADDRriS11_1:$src1))>;
 
 def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)),
           (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>;
 
-
-
-// 32 bit atomic load
-def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i32 (LDw_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-          (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_32 ADDRriS11_2:$src1),
           (i32 (LDriw ADDRriS11_2:$src1))>;
 
 def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)),
           (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>;
 
-
 // 64 bit atomic load
-def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
-          (i64 (LDd_GP tglobaladdr:$global))>,
-            Requires<[NoV4T]>;
-
-def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-          (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-          Requires<[NoV4T]>;
-
 def : Pat<(atomic_load_64 ADDRriS11_3:$src1),
           (i64 (LDrid ADDRriS11_3:$src1))>;
 
@@ -2578,30 +2081,6 @@ def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)),
           (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>;
 
 
-// 64 bit atomic store
-def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i64 DoubleRegs:$src1)),
-          (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i64 DoubleRegs:$src1)),
-          (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
-                    (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// 8 bit atomic store
-def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
-                          (i32 IntRegs:$src1)),
-          (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset),
-                          (i32 IntRegs:$src1)),
-          (STrib_GP tglobaladdr:$global, u16ImmPred:$offset,
-                    (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
 def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)),
           (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>;
 
@@ -2611,18 +2090,6 @@ def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset),
                          (i32 IntRegs:$src1))>;
 
 
-// 16 bit atomic store
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (STrih_GP tglobaladdr:$global, u16ImmPred:$offset,
-                    (i32 IntRegs:$src1))>, Requires<[NoV4T]>;
-
 def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)),
           (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>;
 
@@ -2631,20 +2098,6 @@ def : Pat<(atomic_store_16 (i32 IntRegs:$src1),
           (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset,
                          (i32 IntRegs:$src1))>;
 
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[NoV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (STriw_GP tglobaladdr:$global, u16ImmPred:$offset,
-                                         (i32 IntRegs:$src1))>,
-            Requires<[NoV4T]>;
-
 def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)),
           (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>;
 
@@ -2713,198 +2166,8 @@ def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
 def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
       (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
 
-// Map from store(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (add (HexagonCONST32_GP tglobaladdr:$global),
-                       u16ImmPred:$offset)),
-      (STrid_GP tglobaladdr:$global, u16ImmPred:$offset,
-                (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (HexagonCONST32_GP tglobaladdr:$global)),
-      (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1),
-              (add (HexagonCONST32_GP tglobaladdr:$global),
-                                      u16ImmPred:$offset)),
-      (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-      (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>;
-
-// Map from store(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-      (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
-                          (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-      (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei16 (i32 IntRegs:$src1),
-                          (HexagonCONST32_GP tglobaladdr:$global)),
-      (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
-                         (add (HexagonCONST32_GP tglobaladdr:$global),
-                              u16ImmPred:$offset)),
-      (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(truncstorei8 (i32 IntRegs:$src1),
-                         (HexagonCONST32_GP tglobaladdr:$global)),
-      (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
-                      u16ImmPred:$offset))),
-      (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDw_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
-                           u16ImmPred:$offset))),
-      (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0).
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
-      (i64 (LDd_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd.
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
-      (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-      (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDrih_GP tglobaladdr:$global, 0))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                             u16ImmPred:$offset))),
-      (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDriuh_GP tglobaladdr:$global, 0))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDh_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memuh(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDuh_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                           u16ImmPred:$offset))),
-      (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-      (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-      (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memb(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// Map from load(globaladdress) -> memub(#foo).
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDub_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a
-// certain global variable takes only two constant values, it shrinks the
-// global to a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDb_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-      (i32 (LDub_GP tglobaladdr:$global))>,
-      Requires<[NoV4T]>;
-
 // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+let AddedComplexity = 10 in
 def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
       (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>;
 
@@ -3020,12 +2283,6 @@ def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
 def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
       (STrib ADDRriS11_2:$addr, (TFRI 1))>;
 
-let AddedComplexity = 100 in
-// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
-// memw(#foo) = r0
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
-      (STb_GP tglobaladdr:$global, (TFRI 1))>,
-      Requires<[NoV4T]>;
 
 // Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
 def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
@@ -3181,23 +2438,54 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
 
 // i1 -> i64
 def : Pat <(i64 (zext (i1 PredRegs:$src1))),
-      (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>;
+      (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+      Requires<[NoV4T]>;
 
 // i32 -> i64
 def : Pat <(i64 (zext (i32 IntRegs:$src1))),
-      (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+      Requires<[NoV4T]>;
 
 // i8 -> i64
 def:  Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
-      (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+      Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+                                s11_0ExtPred:$offset))),
+      (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+                                  s11_0ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
+
+// i1 -> i64
+def:  Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+      Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+                                s11_0ExtPred:$offset))),
+      (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+                                  s11_0ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
 
 // i16 -> i64
 def:  Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
-      (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
+      Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+                                  s11_1ExtPred:$offset))),
+      (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
+                                  s11_1ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
 
 // i32 -> i64
 def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
-      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+      Requires<[NoV4T]>;
 
 def:  Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
       (i32 (LDriw ADDRriS11_0:$src1))>;
@@ -3218,15 +2506,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
 // Any extended 64-bit load.
 // anyext i32 -> i64
 def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
-      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+      Requires<[NoV4T]>;
+
+// When there is an offset we should prefer the pattern below over the pattern above.
+// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
+// So this complexity below is comfortably higher to allow for choosing the below.
+// If this is not done then we generate addresses such as
+// ********************************************
+//        r1 = add (r0, #4)
+//        r1 = memw(r1 + #0)
+//  instead of
+//        r1 = memw(r0 + #4)
+// ********************************************
+let AddedComplexity = 100 in
+def:  Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+                                  s11_2ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
 
 // anyext i16 -> i64.
 def:  Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
-      (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
+      Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+                                  s11_1ExtPred:$offset))),
+      (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
+                                  s11_1ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
 
 // Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
 def : Pat<(i64 (zext (i32 IntRegs:$src1))),
-      (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
+      (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+      Requires<[NoV4T]>;
 
 // Multiply 64-bit unsigned and use upper result.
 def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
@@ -3331,6 +2645,11 @@ def BR_JT : JRInst<(outs), (ins IntRegs:$src),
                    "jumpr $src",
                    [(HexagonBR_JT (i32 IntRegs:$src))]>;
 
+let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+def BRIND : JRInst<(outs), (ins IntRegs:$src),
+                   "jumpr $src",
+                   [(brind (i32 IntRegs:$src))]>;
+
 def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
 
 def : Pat<(HexagonWrapperJT tjumptable:$dst),
@@ -3444,32 +2763,3 @@ include "HexagonInstrInfoV5.td"
 //===----------------------------------------------------------------------===//
 // V5 Instructions -
 //===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Generate mapping table to relate non-predicate instructions with their
-// predicated formats - true and false.
-//
-
-def getPredOpcode : InstrMapping {
-  let FilterClass = "PredRel";
-  // Instructions with the same BaseOpcode and isNVStore values form a row.
-  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
-  // Instructions with the same predicate sense form a column.
-  let ColFields = ["PredSense"];
-  // The key column is the unpredicated instructions.
-  let KeyCol = [""];
-  // Value columns are PredSense=true and PredSense=false
-  let ValueCols = [["true"], ["false"]];
-}
-
-//===----------------------------------------------------------------------===//
-// Generate mapping table to relate predicated instructions with their .new
-// format.
-//
-def getPredNewOpcode : InstrMapping {
-  let FilterClass = "PredNewRel";
-  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
-  let ColFields = ["PNewValue"];
-  let KeyCol = [""];
-  let ValueCols = [["new"]];
-}
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 70448fc7af38..cd0e4758968c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -12,10 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 let neverHasSideEffects = 1 in
-def IMMEXT : Immext<(outs), (ins),
-                    "/* immext #... */",
-                    []>,
-             Requires<[HasV4T]>;
+class T_Immext<dag ins> :
+  EXTENDERInst<(outs), ins, "immext(#$imm)", []>,
+  Requires<[HasV4T]>;
+
+def IMMEXT_b : T_Immext<(ins brtarget:$imm)>;
+def IMMEXT_c : T_Immext<(ins calltarget:$imm)>;
+def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>;
+def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>;
+
+// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
+
+// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>;
+
+def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
+                                       (HexagonCONST32 node:$addr), [{
+  return hasNumUsesBelowThresGA(N->getOperand(0).getNode());
+}]>;
 
 // Hexagon V4 Architecture spec defines 8 instruction classes:
 // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
@@ -80,190 +95,63 @@ def IMMEXT : Immext<(outs), (ins),
 //===----------------------------------------------------------------------===//
 // ALU32 +
 //===----------------------------------------------------------------------===//
-
-// Shift halfword.
-
-let isPredicated = 1 in
-def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = aslh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = asrh($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Sign extend.
-
-let isPredicated = 1 in
-def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = sxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let isPredicated = 1 in
-def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicated = 1 in
-def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = sxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Zero exten.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = zxtb($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if ($src1.new) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2),
-            "if (!$src1.new) $dst = zxth($src2)",
-            []>,
-            Requires<[HasV4T]>;
-
 // Generate frame index addresses.
-let neverHasSideEffects = 1, isReMaterializable = 1 in
+let neverHasSideEffects = 1, isReMaterializable = 1,
+isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
 def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
             (ins IntRegs:$src1, s32Imm:$offset),
             "$dst = add($src1, ##$offset)",
             []>,
             Requires<[HasV4T]>;
 
+// Rd=cmp.eq(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd),
+                    (ins IntRegs:$Rs, s8Ext:$s8),
+                    "$Rd = cmp.eq($Rs, #$s8)",
+                    [(set (i32 IntRegs:$Rd),
+                          (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+                                                s8ExtPred:$s8)))))]>,
+                    Requires<[HasV4T]>;
+
+// Preserve the TSTBIT generation
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+                                           (i32 IntRegs:$src1))), 0)))),
+      (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+                   1, 0))>;
+
+// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll
+// Rd=cmp.ne(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd),
+                     (ins IntRegs:$Rs, s8Ext:$s8),
+                     "$Rd = !cmp.eq($Rs, #$s8)",
+                     [(set (i32 IntRegs:$Rd),
+                           (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+                                                 s8ExtPred:$s8)))))]>,
+                     Requires<[HasV4T]>;
+
+// Rd=cmp.eq(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd),
+                   (ins IntRegs:$Rs, IntRegs:$Rt),
+                   "$Rd = cmp.eq($Rs, $Rt)",
+                   [(set (i32 IntRegs:$Rd),
+                         (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+                                               IntRegs:$Rt)))))]>,
+                   Requires<[HasV4T]>;
+
+// Rd=cmp.ne(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd),
+                    (ins IntRegs:$Rs, IntRegs:$Rt),
+                    "$Rd = !cmp.eq($Rs, $Rt)",
+                    [(set (i32 IntRegs:$Rd),
+                          (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+                                               IntRegs:$Rt)))))]>,
+                    Requires<[HasV4T]>;
 
 //===----------------------------------------------------------------------===//
 // ALU32 -
@@ -276,19 +164,44 @@ def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
 
 // Combine
 // Rdd=combine(Rs, #s8)
-let neverHasSideEffects = 1 in
-def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, s8Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+    neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, s8Ext:$src2),
             "$dst = combine($src1, #$src2)",
             []>,
             Requires<[HasV4T]>;
+
 // Rdd=combine(#s8, Rs)
-let neverHasSideEffects = 1 in
-def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
-            (ins s8Imm:$src1, IntRegs:$src2),
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+    neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+            (ins s8Ext:$src1, IntRegs:$src2),
             "$dst = combine(#$src1, $src2)",
             []>,
             Requires<[HasV4T]>;
+
+def HexagonWrapperCombineRI_V4 :
+  SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineIR_V4 :
+  SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+
+def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
+           (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
+          Requires<[HasV4T]>;
+
+def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
+           (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
+          Requires<[HasV4T]>;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
+    neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
+            (ins s8Imm:$src1, u6Ext:$src2),
+            "$dst = combine(#$src1, #$src2)",
+            []>,
+            Requires<[HasV4T]>;
+
 //===----------------------------------------------------------------------===//
 // ALU32/PERM +
 //===----------------------------------------------------------------------===//
@@ -300,1436 +213,310 @@ def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
 // These absolute set addressing mode instructions accept immediate as
 // an operand. We have duplicated these patterns to take global address.
 
-let neverHasSideEffects = 1 in
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
 def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins u6Imm:$addr),
-            "$dst1 = memd($dst2=#$addr)",
+            (ins u0AlwaysExt:$addr),
+            "$dst1 = memd($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memb(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u6Imm:$addr),
-            "$dst1 = memb($dst2=#$addr)",
+            (ins u0AlwaysExt:$addr),
+            "$dst1 = memb($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memh(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u6Imm:$addr),
-            "$dst1 = memh($dst2=#$addr)",
+            (ins u0AlwaysExt:$addr),
+            "$dst1 = memh($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memub(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u6Imm:$addr),
-            "$dst1 = memub($dst2=#$addr)",
+            (ins u0AlwaysExt:$addr),
+            "$dst1 = memub($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memuh(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u6Imm:$addr),
-            "$dst1 = memuh($dst2=#$addr)",
+            (ins u0AlwaysExt:$addr),
+            "$dst1 = memuh($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memw(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u6Imm:$addr),
-            "$dst1 = memw($dst2=#$addr)",
+            (ins u0AlwaysExt:$addr),
+            "$dst1 = memw($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
+}
 
 // Following patterns are defined for absolute set addressing mode
 // instruction which take global address as operand.
-let neverHasSideEffects = 1 in
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
 def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdress:$addr),
+            (ins globaladdressExt:$addr),
             "$dst1 = memd($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memb(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdress:$addr),
+            (ins globaladdressExt:$addr),
             "$dst1 = memb($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memh(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdress:$addr),
+            (ins globaladdressExt:$addr),
             "$dst1 = memh($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memub(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdress:$addr),
+            (ins globaladdressExt:$addr),
             "$dst1 = memub($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memuh(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdress:$addr),
+            (ins globaladdressExt:$addr),
             "$dst1 = memuh($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
 // Rd=memw(Re=#U6)
-let neverHasSideEffects = 1 in
 def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdress:$addr),
+            (ins globaladdressExt:$addr),
             "$dst1 = memw($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
+}
 
-// Load doubleword.
-//
-// Make sure that in post increment load, the first operand is always the post
-// increment operand.
-//
-// Rdd=memd(Rs+Rt<<#u2)
-// Special case pattern for indexed load without offset which is easier to
-// match. AddedComplexity of this pattern should be lower than base+offset load
-// and lower yet than the more generic version with offset/shift below
-// Similar approach is taken for all other base+index loads.
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memd($src1+$src2<<#0)",
-                    [(set (i64 DoubleRegs:$dst),
-                          (i64 (load (add (i32 IntRegs:$src1),
-                                          (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memd($src1+$src2<<#$offset)",
-                    [(set (i64 DoubleRegs:$dst),
-                          (i64 (load (add (i32 IntRegs:$src1),
-                                          (shl (i32 IntRegs:$src2),
-                                               u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-//// Load doubleword conditionally.
-// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
-// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1) $dst=memd($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1.new) $dst=memd($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1) $dst=memd($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// Rdd=memd(Rt<<#u2+#U6)
-
-//// Load byte.
-// Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memb($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (sextloadi8 (add (i32 IntRegs:$src1),
-                                                (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memub($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (zextloadi8 (add (i32 IntRegs:$src1),
-                                                (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memub($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (extloadi8 (add (i32 IntRegs:$src1),
-                                               (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memb($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (sextloadi8 (add (i32 IntRegs:$src1),
-                                                (shl (i32 IntRegs:$src2),
-                                                     u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memub($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (zextloadi8 (add (i32 IntRegs:$src1),
-                                                (shl (i32 IntRegs:$src2),
-                                                     u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memub($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (extloadi8 (add (i32 IntRegs:$src1),
-                                               (shl (i32 IntRegs:$src2),
-                                                    u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-//// Load byte conditionally.
-// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
-// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1) $dst=memb($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1.new) $dst=memb($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1) $dst=memb($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-//// Load unsigned byte conditionally.
-// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
-// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1) $dst=memub($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1.new) $dst=memub($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1) $dst=memub($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// Rd=memb(Rt<<#u2+#U6)
-
-//// Load halfword
-// Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memh($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (sextloadi16 (add (i32 IntRegs:$src1),
-                                                 (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memuh($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (zextloadi16 (add (i32 IntRegs:$src1),
-                                                 (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memuh($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (extloadi16 (add (i32 IntRegs:$src1),
-                                                (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-// Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memh($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (sextloadi16 (add (i32 IntRegs:$src1),
-                                                 (shl (i32 IntRegs:$src2),
-                                                      u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memuh($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (zextloadi16 (add (i32 IntRegs:$src1),
-                                                 (shl (i32 IntRegs:$src2),
-                                                      u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memuh($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (extloadi16 (add (i32 IntRegs:$src1),
-                                                (shl (i32 IntRegs:$src2),
-                                                     u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-//// Load halfword conditionally.
-// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1) $dst=memh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1.new) $dst=memh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1) $dst=memh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-//// Load unsigned halfword conditionally.
-// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
-// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1) $dst=memuh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1) $dst=memuh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// Rd=memh(Rt<<#u2+#U6)
-
-//// Load word.
-// Load predicate: Fix for bug 5279.
-let neverHasSideEffects = 1 in
-def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst),
-            (ins MEMri:$addr),
-            "Error; should not emit",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memw(Re=#U6)
-
-// Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2),
-                    "$dst=memw($src1+$src2<<#0)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (load (add (i32 IntRegs:$src1),
-                                          (i32 IntRegs:$src2)))))]>,
-                    Requires<[HasV4T]>;
-
-// Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memw($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (load (add (i32 IntRegs:$src1),
-                                          (shl (i32 IntRegs:$src2),
-                                               u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-//// Load word conditionally.
-// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
-// if (Pv) Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1) $dst=memw($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if ($src1.new) $dst=memw($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1) $dst=memw($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-                    "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// Rd=memw(Rt<<#u2+#U6)
-
-
-// Post-inc Load, Predicated, Dot new
-
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
-            "if ($src1.new) $dst1 = memd($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnNotPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
-            "if (!$src1.new) $dst1 = memd($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if ($src1.new) $dst1 = memb($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if (!$src1.new) $dst1 = memb($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if ($src1.new) $dst1 = memh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if (!$src1.new) $dst1 = memh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if ($src1.new) $dst1 = memub($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
-            "if (!$src1.new) $dst1 = memub($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if ($src1.new) $dst1 = memuh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
-            "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
-            "if ($src1.new) $dst1 = memw($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
-            "if (!$src1.new) $dst1 = memw($src2++#$src3)",
-            [],
-            "$src2 = $dst2">,
-            Requires<[HasV4T]>;
-
-/// Load from global offset
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst=memd(#$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1) $dst=memd(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1) $dst=memd(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1.new) $dst=memd(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1.new) $dst=memd(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst=memb(#$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1) $dst=memb(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1) $dst=memb(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1.new) $dst=memb(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1.new) $dst=memb(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst=memub(#$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1) $dst=memub(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1) $dst=memub(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1.new) $dst=memub(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1.new) $dst=memub(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst=memh(#$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1) $dst=memh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1) $dst=memh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1.new) $dst=memh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1.new) $dst=memh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst=memuh(#$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1) $dst=memuh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1) $dst=memuh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1.new) $dst=memuh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1.new) $dst=memuh(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global, u16Imm:$offset),
-            "$dst=memw(#$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1) $dst=memw(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1) $dst=memw(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if ($src1.new) $dst=memw(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
-            "if (!$src1.new) $dst=memw(##$global+$offset)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memd(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rtt=memd(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memd(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memb(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memb(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memb(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memub(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if (!Pv) Rt=memub(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memub(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memh(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) Rt=memh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memuh(#$global)",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
+// multiclass for load instructions with base + register offset
+// addressing mode
+multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
+                             bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : LDInst2<(outs RC:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)",
+            []>, Requires<[HasV4T]>;
+}
 
-// if (!Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
+multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
 
-// if (Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
+let neverHasSideEffects  = 1 in
+multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    let isPredicable = 1 in
+    def NAME#_V4 : LDInst2<(outs RC:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+            "$dst = "#mnemonic#"($src1+$src2<<#$offset)",
+            []>, Requires<[HasV4T]>;
+
+    let isPredicated = 1 in {
+      defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >;
+      defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>;
+    }
+  }
+}
 
-// if (!Pv) Rt=memuh(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memuh(##$global)",
-            []>,
-            Requires<[HasV4T]>;
+let addrMode = BaseRegOffset in {
+  defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>, AddrModeRel;
+  defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>, AddrModeRel;
+  defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel;
+  defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+  defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel;
+  defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
 
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$global),
-            "$dst=memw(#$global)",
-            []>,
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+let AddedComplexity = 40 in {
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDrib_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1) $dst=memw(##$global)",
-            []>,
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriub_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-
-// if (!Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1) $dst=memw(##$global)",
-            []>,
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
+                                (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriub_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-// if (Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if ($src1.new) $dst=memw(##$global)",
-            []>,
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
+                                  (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDrih_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-
-// if (!Pv) Rt=memw(##global)
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$global),
-            "if (!$src1.new) $dst=memw(##$global)",
-            []>,
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
+                                  (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriuh_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-
-
-def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
+                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriuh_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (load (add IntRegs:$src1,
+                           (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriw_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
 
-def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
-           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from load(globaladdress) -> memw(#foo + 0)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i64 (LDd_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
-           Requires<[HasV4T]>;
-
-// When the Interprocedural Global Variable optimizer realizes that a certain
-// global variable takes only two constant values, it shrinks the global to
-// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
-
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i64 (load (add IntRegs:$src1,
+                           (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDrid_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
             Requires<[HasV4T]>;
+}
 
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
-            Requires<[HasV4T]>;
 
-// Map from load(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+let AddedComplexity = 10 in {
+def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
+           (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+           (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-// Map from load(globaladdress) -> memub(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+           (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+           (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-// Map from load(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+           (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-// Map from load(globaladdress) -> memuh(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+           (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-// Map from load(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+           (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
 
-def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset)),
-           (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset)),
-           (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset)),
-           (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
+           (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
             Requires<[HasV4T]>;
+}
 
-def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset)),
-           (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
-                           u16ImmPred:$offset))),
-           (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                           u16ImmPred:$offset))),
-           (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-           (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memub(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-           (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memuh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                            u16ImmPred:$offset))),
-           (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[HasV4T]>;
+// zext i1->i64
+def : Pat <(i64 (zext (i1 PredRegs:$src1))),
+      (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+      Requires<[HasV4T]>;
+
+// zext i32->i64
+def : Pat <(i64 (zext (i32 IntRegs:$src1))),
+      (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
+      Requires<[HasV4T]>;
+// zext i8->i64
+def:  Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+      (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+      Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+                                s11_0ExtPred:$offset))),
+      (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+                                  s11_0ExtPred:$offset)))>,
+      Requires<[HasV4T]>;
+
+// zext i1->i64
+def:  Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)),
+      (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+      Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1),
+                                s11_0ExtPred:$offset))),
+      (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+                                  s11_0ExtPred:$offset)))>,
+      Requires<[HasV4T]>;
+
+// zext i16->i64
+def:  Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+      (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
+      Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+                                  s11_1ExtPred:$offset))),
+      (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
+                                  s11_1ExtPred:$offset)))>,
+      Requires<[HasV4T]>;
+
+// anyext i16->i64
+def:  Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
+      Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def:  Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+                                  s11_1ExtPred:$offset))),
+      (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
+                                  s11_1ExtPred:$offset)))>,
+      Requires<[HasV4T]>;
+
+// zext i32->i64
+def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+      Requires<[HasV4T]>;
 
-// Map from load(globaladdress + x) -> memh(#foo + x)
 let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                             u16ImmPred:$offset))),
-           (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-           Requires<[HasV4T]>;
+def:  Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+      (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+                                  s11_2ExtPred:$offset)))>,
+      Requires<[HasV4T]>;
 
+// anyext i32->i64
+def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+      Requires<[HasV4T]>;
 
-// Map from load(globaladdress + x) -> memuh(#foo + x)
 let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                             u16ImmPred:$offset))),
-           (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[HasV4T]>;
+def:  Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+      (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+                                  s11_2ExtPred:$offset)))>,
+      Requires<[HasV4T]>;
 
-// Map from load(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
-                      u16ImmPred:$offset))),
-           (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
-            Requires<[HasV4T]>;
 
 
 //===----------------------------------------------------------------------===//
@@ -1747,80 +534,192 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
 ///    last operand.
 ///
 
-// memd(Re=#U6)=Rtt
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
 def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins DoubleRegs:$src1, u6Imm:$src2),
-            "memd($dst1=#$src2) = $src1",
+            (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
+            "memd($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memb(Re=#U6)=Rs
+// memb(Re=#U)=Rs
 def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u6Imm:$src2),
-            "memb($dst1=#$src2) = $src1",
+            (ins IntRegs:$src1, u0AlwaysExt:$src2),
+            "memb($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memh(Re=#U6)=Rs
+// memh(Re=#U)=Rs
 def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u6Imm:$src2),
-            "memh($dst1=#$src2) = $src1",
+            (ins IntRegs:$src1, u0AlwaysExt:$src2),
+            "memh($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memw(Re=#U6)=Rs
+// memw(Re=#U)=Rs
 def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u6Imm:$src2),
-            "memw($dst1=#$src2) = $src1",
+            (ins IntRegs:$src1, u0AlwaysExt:$src2),
+            "memw($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
+}
 
-// memd(Re=#U6)=Rtt
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
 def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins DoubleRegs:$src1, globaladdress:$src2),
+            (ins DoubleRegs:$src1, globaladdressExt:$src2),
             "memd($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memb(Re=#U6)=Rs
+// memb(Re=#U)=Rs
 def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdress:$src2),
+            (ins IntRegs:$src1, globaladdressExt:$src2),
             "memb($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memh(Re=#U6)=Rs
+// memh(Re=#U)=Rs
 def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdress:$src2),
+            (ins IntRegs:$src1, globaladdressExt:$src2),
             "memh($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memw(Re=#U6)=Rs
+// memw(Re=#U)=Rs
 def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdress:$src2),
+            (ins IntRegs:$src1, globaladdressExt:$src2),
             "memw($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
+}
 
-// memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrid_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
-            "memd($src1+$src2<<#$src3) = $src4",
-            [(store (i64 DoubleRegs:$src4),
-                    (add (i32 IntRegs:$src1),
-                         (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>,
+// multiclass for store instructions with base + register offset addressing
+// mode
+multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+                             bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : STInst2<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 RC:$src5),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5",
+            []>,
             Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let isNVStorable = 1 in
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    let isPredicable = 1 in
+    def NAME#_V4 : STInst2<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+            mnemonic#"($src1+$src2<<#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+    let isPredicated = 1 in {
+      defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >;
+      defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+// multiclass for new-value store instructions with base + register offset
+// addressing mode.
+multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+                             bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 RC:$src5),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    let isPredicable = 1 in
+    def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+            mnemonic#"($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+    let isPredicated = 1 in {
+      defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >;
+      defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>;
+    }
+  }
+}
+
+let addrMode = BaseRegOffset, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
+  defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>,
+                          ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+
+  defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>,
+                          ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+
+  defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>,
+                          ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+
+  let isNVStorable = 0 in
+  defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src4),
+                       (add IntRegs:$src1, (shl IntRegs:$src2,
+                                                u2ImmPred:$src3))),
+          (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src4),
+                        (add IntRegs:$src1, (shl IntRegs:$src2,
+                                                 u2ImmPred:$src3))),
+          (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i32 IntRegs:$src4),
+                 (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+          (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i64 DoubleRegs:$src4),
+                (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+          (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, DoubleRegs:$src4)>;
+}
 
 // memd(Ru<<#u2+#U6)=Rtt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
+validSubTargets = HasV4SubT in
 def STrid_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
             "memd($src1<<#$src2+#$src3) = $src4",
             [(store (i64 DoubleRegs:$src4),
                     (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                         u6ImmPred:$src3))]>,
+                         u0AlwaysExtPred:$src3))]>,
             Requires<[HasV4T]>;
 
 // memd(Rx++#s4:3)=Rtt
@@ -1834,143 +733,81 @@ def STrid_shl_V4 : STInst<(outs),
 // if ([!]Pv[.new]) memd(#u6)=Rtt
 // TODO: needs to be implemented.
 
-// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
-// if (Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if ($src1.new) memd($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if (!$src1.new) memd($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if ($src1.new) memd($src2+#$src3) = $src4",
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with base + immediate offset
+// addressing mode and immediate stored value.
+// mem[bhw](Rx++#s4:3)=#s8
+// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
+//===----------------------------------------------------------------------===//
+multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
+                        bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : STInst2<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+#$src3) = #$src4",
             []>,
             Requires<[HasV4T]>;
+}
 
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if (!$src1.new) memd($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
+multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
+  }
+}
 
-// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
-// if (Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in
+multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
+    let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in
+    def NAME#_V4 : STInst2<(outs),
+            (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3),
+            mnemonic#"($src1+#$src2) = #$src3",
             []>,
             Requires<[HasV4T]>;
 
-// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if ($src1.new) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
+    let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in {
+      defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>;
+      defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >;
+    }
+  }
+}
 
-// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
-// if (Pv) memd(Rx++#s4:3)=Rtt
-// if (Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
-                 s4_3Imm:$offset),
-            "if ($src1.new) memd($src3++#$offset) = $src2",
-            [],
-            "$src3 = $dst">,
-            Requires<[HasV4T]>;
+let addrMode = BaseImmOffset, InputType = "imm",
+    validSubTargets = HasV4SubT in {
+  defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel;
+  defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel;
+  defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel;
+}
 
-// if (!Pv) memd(Rx++#s4:3)=Rtt
-// if (!Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
-                 s4_3Imm:$offset),
-            "if (!$src1.new) memd($src3++#$offset) = $src2",
-            [],
-            "$src3 = $dst">,
-            Requires<[HasV4T]>;
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
+            (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
 
+def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
+                                              u6_1ImmPred:$src2)),
+            (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
 
-// Store byte.
-// memb(Rs+#u6:0)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_imm_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
-            "memb($src1+#$src2) = #$src3",
-            [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
-                                                 u6_0ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
+def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
+            (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+}
 
-// memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memb($src1+$src2<<#$src3) = $src4",
-            [(truncstorei8 (i32 IntRegs:$src4),
-                           (add (i32 IntRegs:$src1),
-                                (shl (i32 IntRegs:$src2),
-                                          u2ImmPred:$src3)))]>,
-            Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+           (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+           Requires<[HasV4T]>;
 
 // memb(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
 def STrib_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
             "memb($src1<<#$src2+#$src3) = $src4",
             [(truncstorei8 (i32 IntRegs:$src4),
                            (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                                u6ImmPred:$src3))]>,
+                                u0AlwaysExtPred:$src3))]>,
             Requires<[HasV4T]>;
 
 // memb(Rx++#s4:0:circ(Mu))=Rt
@@ -1980,185 +817,28 @@ def STrib_shl_V4 : STInst<(outs),
 // memb(gp+#u16:0)=Rt
 
 
-// Store byte conditionally.
-// if ([!]Pv[.new]) memb(#u6)=Rt
-// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
-// if (Pv) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if ($src1) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if ($src1.new) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if (!$src1) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if (!$src1.new) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
-// if (Pv) memb(Rs+#u6:0)=Rt
-// if (Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memb($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=Rt
-// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memb($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(Rs+#u6:0)=Rt
-// if (!Pv) memb(Rs+#u6:0)=Rt
-// if (Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memb($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memb($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
-// if (Pv) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
-// if (Pv) memb(Rx++#s4:0)=Rt
-// if (Pv.new) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if ($src1.new) memb($src3++#$offset) = $src2",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rx++#s4:0)=Rt
-// if (!Pv.new) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if (!$src1.new) memb($src3++#$offset) = $src2",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-
 // Store halfword.
 // TODO: needs to be implemented
 // memh(Re=#U6)=Rt.H
 // memh(Rs+#s11:1)=Rt.H
-// memh(Rs+#u6:1)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_imm_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
-            "memh($src1+#$src2) = #$src3",
-            [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
-                                                  u6_1ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+           (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+           Requires<[HasV4T]>;
 
 // memh(Rs+Ru<<#u2)=Rt.H
 // TODO: needs to be implemented.
 
-// memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memh($src1+$src2<<#$src3) = $src4",
-            [(truncstorei16 (i32 IntRegs:$src4),
-                            (add (i32 IntRegs:$src1),
-                                 (shl (i32 IntRegs:$src2),
-                                      u2ImmPred:$src3)))]>,
-            Requires<[HasV4T]>;
-
 // memh(Ru<<#u2+#U6)=Rt.H
 // memh(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
 def STrih_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
             "memh($src1<<#$src2+#$src3) = $src4",
             [(truncstorei16 (i32 IntRegs:$src4),
                             (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                                 u6ImmPred:$src3))]>,
+                                 u0AlwaysExtPred:$src3))]>,
             Requires<[HasV4T]>;
 
 // memh(Rx++#s4:1:circ(Mu))=Rt.H
@@ -2173,152 +853,13 @@ def STrih_shl_V4 : STInst<(outs),
 // if ([!]Pv[.new]) memh(#u6)=Rt.H
 // if ([!]Pv[.new]) memh(#u6)=Rt
 
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
-// if (Pv) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if ($src1) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if ($src1.new) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if (!$src1) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if (!$src1.new) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
 
 // if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
 // TODO: needs to be implemented.
 
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
-// if (Pv) memh(Rs+#u6:1)=Rt
-// if (Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memh($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Rt
-// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memh($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memh($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memh($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
-// if (Pv) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
 // TODO: Needs to be implemented.
 
-// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
-// if (Pv) memh(Rx++#s4:1)=Rt
-// if (Pv.new) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if ($src1.new) memh($src3++#$offset) = $src2",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rx++#s4:1)=Rt
-// if (!Pv.new) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if (!$src1.new) memh($src3++#$offset) = $src2",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-
 // Store word.
 // memw(Re=#U6)=Rt
 // TODO: Needs to be implemented.
@@ -2331,34 +872,20 @@ def STriw_pred_V4 : STInst2<(outs),
             []>,
             Requires<[HasV4T]>;
 
-
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_imm_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
-            "memw($src1+#$src2) = #$src3",
-            [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1),
-                                          u6_2ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
-
-// memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memw($src1+$src2<<#$src3) = $src4",
-            [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1),
-                                    (shl (i32 IntRegs:$src2),
-                                         u2ImmPred:$src3)))]>,
-            Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
+           (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+           Requires<[HasV4T]>;
 
 // memw(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
 def STriw_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
             "memw($src1<<#$src2+#$src3) = $src4",
             [(store (i32 IntRegs:$src4),
                     (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                              u6ImmPred:$src3))]>,
+                              u0AlwaysExtPred:$src3))]>,
             Requires<[HasV4T]>;
 
 // memw(Rx++#s4:2)=Rt
@@ -2366,592 +893,6 @@ def STriw_shl_V4 : STInst<(outs),
 // memw(Rx++I:circ(Mu))=Rt
 // memw(Rx++Mu)=Rt
 // memw(Rx++Mu:brev)=Rt
-// memw(gp+#u16:2)=Rt
-
-
-// Store word conditionally.
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
-// if (Pv) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if ($src1) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if ($src1.new) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if (!$src1) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if (!$src1.new) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
-// if (Pv) memw(Rs+#u6:2)=Rt
-// if (Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memw($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memw($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(Rs+#u6:2)=Rt
-// if (!Pv) memw(Rs+#u6:2)=Rt
-// if (Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memw($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memw($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
-// if (Pv) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
-// if (Pv) memw(Rx++#s4:2)=Rt
-// if (Pv.new) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if ($src1.new) memw($src3++#$offset) = $src2",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rx++#s4:2)=Rt
-// if (!Pv.new) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if (!$src1.new) memw($src3++#$offset) = $src2",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-
-/// store to global address
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrid_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
-            "memd(#$global+$offset) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        DoubleRegs:$src2),
-            "if ($src1) memd(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        DoubleRegs:$src2),
-            "if (!$src1) memd(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        DoubleRegs:$src2),
-            "if ($src1.new) memd(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        DoubleRegs:$src2),
-            "if (!$src1.new) memd(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrib_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memb(#$global+$offset) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1) memb(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1) memb(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1.new) memb(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1.new) memb(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrih_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memh(#$global+$offset) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1) memh(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1) memh(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1.new) memh(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1.new) memh(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STriw_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memw(#$global+$offset) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1) memw(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1) memw(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1.new) memw(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1.new) memw(##$global+$offset) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// memd(#global)=Rtt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STd_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, DoubleRegs:$src),
-            "memd(#$global) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if ($src1) memd(##$global) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if (!$src1) memd(##$global) = $src2",
-            []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if ($src1.new) memd(##$global) = $src2",
-            []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STd_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
-            "if (!$src1.new) memd(##$global) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STb_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memb(#$global) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STb_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memb(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// memh(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STh_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memh(#$global) = $src",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STh_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memh(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// memw(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STw_GP_V4 : STInst2<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memw(#$global) = $src",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memw(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memw(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memw(##$global) = $src2",
-              []>,
-              Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STw_GP_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memw(##$global) = $src2",
-            []>,
-              Requires<[HasV4T]>;
-
-// 64 bit atomic store
-def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
-                            (i64 DoubleRegs:$src1)),
-           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-           Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memd(#foo)
-let AddedComplexity = 100 in
-def : Pat <(store (i64 DoubleRegs:$src1),
-                  (HexagonCONST32_GP tglobaladdr:$global)),
-           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
-           Requires<[HasV4T]>;
-
-// 8 bit atomic store
-def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
-                            (i32 IntRegs:$src1)),
-            (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-              Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memb(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
-          (HexagonCONST32_GP tglobaladdr:$global)),
-          (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-//       to "r0 = 1; memw(#foo) = r0"
-let AddedComplexity = 100 in
-def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
-          (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memh(#foo)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
-                         (HexagonCONST32_GP tglobaladdr:$global)),
-          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// 32 bit atomic store
-def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
-                           (i32 IntRegs:$src1)),
-          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress) -> memw(#foo)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
-          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i64 DoubleRegs:$src1)),
-          (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i64 DoubleRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset),
-                           (i32 IntRegs:$src1)),
-          (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
-                               u16ImmPred:$offset),
-                          (i32 IntRegs:$src1)),
-          (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i64 DoubleRegs:$src1),
-                    (add (HexagonCONST32_GP tglobaladdr:$global),
-                                        u16ImmPred:$offset)),
-          (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i64 DoubleRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
-                        (add (HexagonCONST32_GP tglobaladdr:$global),
-                             u16ImmPred:$offset)),
-          (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
-                         (add (HexagonCONST32_GP tglobaladdr:$global),
-                              u16ImmPred:$offset)),
-          (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1),
-                 (add (HexagonCONST32_GP tglobaladdr:$global),
-                                u16ImmPred:$offset)),
-          (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
-                                            (i32 IntRegs:$src1))>,
-          Requires<[HasV4T]>;
-
-
 
 //===----------------------------------------------------------------------===
 // ST -
@@ -2962,853 +903,202 @@ def : Pat<(store (i32 IntRegs:$src1),
 // NV/ST +
 //===----------------------------------------------------------------------===//
 
-// Store new-value byte.
-
-// memb(Re=#U6)=Nt.new
-// memb(Rs+#s11:0)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
-            "memb($addr) = $src1.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, isPredicable = 1 in
-def STrib_indexed_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
-            "memb($src1+#$src2) = $src3.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memb($src1+$src2<<#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
-def STrib_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
-            "memb($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
-def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
-            "memb($src2++#$offset) = $src1.new",
-            [],
-            "$src2 = $dst">,
-            Requires<[HasV4T]>;
-
-// memb(Rx++#s4:0:circ(Mu))=Nt.new
-// memb(Rx++I:circ(Mu))=Nt.new
-// memb(Rx++Mu)=Nt.new
-// memb(Rx++Mu:brev)=Nt.new
-
-// memb(gp+#u16:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memb(#$global+$offset) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memb(#$global) = $src.new",
+// multiclass for new-value store instructions with base + immediate offset.
+//
+multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
+                            Operand predImmOp, bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+#$src3) = $src4.new",
             []>,
             Requires<[HasV4T]>;
+}
 
-// Store new-value byte conditionally.
-// if ([!]Pv[.new]) memb(#u6)=Nt.new
-// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memb($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
+multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
+                           bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
+  }
+}
 
-// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memb($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
+let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in
+multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+                   bits<5> PredImmBits> {
 
-// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memb($addr) = $src2.new",
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+    isPredicable = 1 in
+    def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+            mnemonic#"($src1+#$src2) = $src3.new",
             []>,
             Requires<[HasV4T]>;
 
-// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memb($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+    isPredicated = 1 in {
+      defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>;
+      defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>;
+    }
+  }
+}
 
-// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
+let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
+  defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+                                 u6_0Ext, 11, 6>, AddrModeRel;
+  defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+                                 u6_1Ext, 12, 7>, AddrModeRel;
+  defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+                                 u6_2Ext, 13, 8>, AddrModeRel;
+}
 
-// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memb($src2+#$src3) = $src4.new",
+// multiclass for new-value store instructions with base + immediate offset.
+// and MEMri operand.
+multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+                          bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($addr) = $src2.new",
             []>,
             Requires<[HasV4T]>;
+}
 
-// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
+multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
 
-// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
+    // Predicate new
+    defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>;
+  }
+}
 
+let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC,
+                    bits<5> ImmBits, bits<5> PredImmBits> {
 
-// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+         isPredicable = 1 in
+    def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins MEMri:$addr, RC:$src),
+            mnemonic#"($addr) = $src.new",
             []>,
             Requires<[HasV4T]>;
 
-// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+        neverHasSideEffects = 1, isPredicated = 1 in {
+      defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>;
+      defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>;
+    }
+  }
+}
 
-// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
+let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT,
+mayStore = 1 in {
+  defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+  defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+  defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+}
 
-// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+// memb(Ru<<#u2+#U6)=Nt.new
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+            "memb($src1<<#$src2+#$src3) = $src4.new",
             []>,
             Requires<[HasV4T]>;
 
-// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
-// if (Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if ($src1) memb($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if ($src1.new) memb($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if (!$src1) memb($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// Post increment store
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
 
-// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
-            "if (!$src1.new) memb($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
+multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
+                            bit isNot, bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2++#$offset) = $src3.new",
+            [],
+            "$src2 = $dst">,
             Requires<[HasV4T]>;
+}
 
+multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
+                           Operand ImmOp, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
+    // Predicate new
+    let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+    defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>;
+  }
+}
 
-// Store new-value halfword.
-// memh(Re=#U6)=Nt.new
-// memh(Rs+#s11:1)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
-            "memh($addr) = $src1.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, isPredicable = 1 in
-def STrih_indexed_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
-            "memh($src1+#$src2) = $src3.new",
-            []>,
-            Requires<[HasV4T]>;
+let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC,
+                      Operand ImmOp> {
+
+  let BaseOpcode = "POST_"#BaseOp in {
+    let isPredicable = 1 in
+    def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+                (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+                mnemonic#"($src1++#$offset) = $src2.new",
+                [],
+                "$src1 = $dst">,
+                Requires<[HasV4T]>;
+
+    let isPredicated = 1 in {
+      defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >;
+      defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >;
+    }
+  }
+}
 
-// memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memh($src1+$src2<<#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
+let validSubTargets = HasV4SubT in {
+defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+}
 
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
 // memh(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
 def STrih_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
             "memh($src1<<#$src2+#$src3) = $src4.new",
             []>,
             Requires<[HasV4T]>;
 
-// memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
-def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
-            "memh($src2++#$offset) = $src1.new",
-            [],
-            "$src2 = $dst">,
-            Requires<[HasV4T]>;
-
 // memh(Rx++#s4:1:circ(Mu))=Nt.new
 // memh(Rx++I:circ(Mu))=Nt.new
 // memh(Rx++Mu)=Nt.new
 // memh(Rx++Mu:brev)=Nt.new
 
-// memh(gp+#u16:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memh(#$global+$offset) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memh(#global)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memh(#$global) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// Store new-value halfword conditionally.
-
-// if ([!]Pv[.new]) memh(#u6)=Nt.new
-
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
-// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memh($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memh($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memh($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memh($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
-// if (Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if ($src1) memh($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if ($src1.new) memh($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if (!$src1) memh($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
-            "if (!$src1.new) memh($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-
-// Store new-value word.
-
-// memw(Re=#U6)=Nt.new
-// memw(Rs+#s11:2)=Nt.new
-let mayStore = 1, isPredicable = 1 in
-def STriw_nv_V4 : NVInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memw($addr) = $src1.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, isPredicable = 1 in
-def STriw_indexed_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
-            "memw($src1+#$src2) = $src3.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memw($src1+$src2<<#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memw(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
 def STriw_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
             "memw($src1<<#$src2+#$src3) = $src4.new",
             []>,
             Requires<[HasV4T]>;
 
-// memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
-def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
-            "memw($src2++#$offset) = $src1.new",
-            [],
-            "$src2 = $dst">,
-            Requires<[HasV4T]>;
-
 // memw(Rx++#s4:2:circ(Mu))=Nt.new
 // memw(Rx++I:circ(Mu))=Nt.new
 // memw(Rx++Mu)=Nt.new
 // memw(Rx++Mu:brev)=Nt.new
-// memw(gp+#u16:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
-            "memw(#$global+$offset) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdress:$global, IntRegs:$src),
-            "memw(#$global) = $src.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// Store new-value word conditionally.
-
-// if ([!]Pv[.new]) memw(#u6)=Nt.new
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
-// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memw($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memw($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memw($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memw($addr) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
-// if (Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if ($src1) memw($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if ($src1.new) memw($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if (!$src1) memw($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
-    isPredicated = 1 in
-def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
-            "if (!$src1.new) memw($src3++#$offset) = $src2.new",
-            [],"$src3 = $dst">,
-            Requires<[HasV4T]>;
-
-
-
-// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memb(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memh(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if ($src1.new) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
-def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
-            "if (!$src1.new) memw(##$global) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1) memb(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1) memb(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1.new) memb(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1.new) memb(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1) memh(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1) memh(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1.new) memh(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1.new) memh(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1) memw(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1) memw(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if ($src1.new) memw(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
-                                                        IntRegs:$src2),
-            "if (!$src1.new) memw(##$global+$offset) = $src2.new",
-            []>,
-            Requires<[HasV4T]>;
 
 //===----------------------------------------------------------------------===//
 // NV/ST -
@@ -3998,31 +1288,37 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
 
 //  Add and accumulate.
 //  Rd=add(Rs,add(Ru,#s6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
 def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
-          (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+          (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3),
           "$dst = add($src1, add($src2, #$src3))",
           [(set (i32 IntRegs:$dst),
            (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
-                                          s6ImmPred:$src3)))]>,
+                                          s6_16ExtPred:$src3)))]>,
           Requires<[HasV4T]>;
 
 //  Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
 def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
-          (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+          (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
           "$dst = add($src1, sub(#$src2, $src3))",
           [(set (i32 IntRegs:$dst),
-           (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2,
+           (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
                                           (i32 IntRegs:$src3))))]>,
           Requires<[HasV4T]>;
 
 // Generates the same instruction as ADDr_SUBri_V4 but matches different
 // pattern.
 //  Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
 def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
-          (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+          (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
           "$dst = add($src1, sub(#$src2, $src3))",
           [(set (i32 IntRegs:$dst),
-                (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2),
+                (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
                      (i32 IntRegs:$src3)))]>,
           Requires<[HasV4T]>;
 
@@ -4036,6 +1332,7 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
 
 //  Logical doublewords.
 //  Rdd=and(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
 def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
           (ins DoubleRegs:$src1, DoubleRegs:$src2),
           "$dst = and($src1, ~$src2)",
@@ -4044,6 +1341,7 @@ def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
           Requires<[HasV4T]>;
 
 //  Rdd=or(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
 def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
           (ins DoubleRegs:$src1, DoubleRegs:$src2),
           "$dst = or($src1, ~$src2)",
@@ -4054,6 +1352,7 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
 
 //  Logical-logical doublewords.
 //  Rxx^=xor(Rss,Rtt)
+let validSubTargets = HasV4SubT in
 def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
           (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
           "$dst ^= xor($src2, $src3)",
@@ -4066,17 +1365,20 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
 
 // Logical-logical words.
 // Rx=or(Ru,and(Rx,#s10))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT in
 def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
             "$dst = or($src1, and($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
-                                                s10ImmPred:$src3)))],
+                                                s10ExtPred:$src3)))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 // Rx[&|^]=and(Rs,Rt)
 // Rx&=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst &= and($src2, $src3)",
@@ -4087,6 +1389,7 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx|=and(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in
 def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst |= and($src2, $src3)",
@@ -4094,9 +1397,10 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
                   (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
                                                 (i32 IntRegs:$src3))))],
             "$src1 = $dst">,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 // Rx^=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst ^= and($src2, $src3)",
@@ -4108,6 +1412,7 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
 
 // Rx[&|^]=and(Rs,~Rt)
 // Rx&=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
 def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst &= and($src2, ~$src3)",
@@ -4118,6 +1423,7 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx|=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
 def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst |= and($src2, ~$src3)",
@@ -4128,6 +1434,7 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx^=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
 def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst ^= and($src2, ~$src3)",
@@ -4139,6 +1446,7 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
 
 // Rx[&|^]=or(Rs,Rt)
 // Rx&=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst &= or($src2, $src3)",
@@ -4149,6 +1457,7 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx|=or(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in
 def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst |= or($src2, $src3)",
@@ -4156,9 +1465,10 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
                   (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
                                                (i32 IntRegs:$src3))))],
             "$src1 = $dst">,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 // Rx^=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst ^= or($src2, $src3)",
@@ -4170,6 +1480,7 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
 
 // Rx[&|^]=xor(Rs,Rt)
 // Rx&=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst &= xor($src2, $src3)",
@@ -4180,6 +1491,7 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx|=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst |= xor($src2, $src3)",
@@ -4190,6 +1502,7 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx^=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
 def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
             "$dst ^= xor($src2, $src3)",
@@ -4200,24 +1513,28 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rx|=and(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in
 def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
             "$dst |= and($src2, #$src3)",
             [(set (i32 IntRegs:$dst),
                   (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
-                                                s10ImmPred:$src3)))],
+                                                s10ExtPred:$src3)))],
             "$src1 = $dst">,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 // Rx|=or(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in
 def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
             "$dst |= or($src2, #$src3)",
             [(set (i32 IntRegs:$dst),
                   (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
-                                                s10ImmPred:$src3)))],
+                                                s10ExtPred:$src3)))],
             "$src1 = $dst">,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 
 //    Modulo wrap
@@ -4264,25 +1581,41 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
 
 // Multiply and user lower result.
 // Rd=add(#u6,mpyi(Rs,#U6))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT in
 def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
-            (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+            (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3),
             "$dst = add(#$src1, mpyi($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
-                       u6ImmPred:$src1))]>,
+                       u6ExtPred:$src1))]>,
             Requires<[HasV4T]>;
 
-// Rd=add(#u6,mpyi(Rs,Rt))
+// Rd=add(##,mpyi(Rs,#U6))
+def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+                     (HexagonCONST32 tglobaladdr:$src1)),
+           (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2,
+                               u6ImmPred:$src3))>;
 
+// Rd=add(#u6,mpyi(Rs,Rt))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
 def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
-            (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+            (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3),
             "$dst = add(#$src1, mpyi($src2, $src3))",
             [(set (i32 IntRegs:$dst),
                   (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
-                       u6ImmPred:$src1))]>,
-            Requires<[HasV4T]>;
+                       u6ExtPred:$src1))]>,
+            Requires<[HasV4T]>, ImmRegRel;
+
+// Rd=add(##,mpyi(Rs,Rt))
+def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+                     (HexagonCONST32 tglobaladdr:$src1)),
+           (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2,
+                               IntRegs:$src3))>;
 
 // Rd=add(Ru,mpyi(#u6:2,Rs))
+let validSubTargets = HasV4SubT in
 def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
             (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
             "$dst = add($src1, mpyi(#$src2, $src3))",
@@ -4292,15 +1625,18 @@ def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 // Rd=add(Ru,mpyi(Rs,#u6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
 def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+            (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3),
             "$dst = add($src1, mpyi($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
-                                                 u6ImmPred:$src3)))]>,
-            Requires<[HasV4T]>;
+                                                 u6ExtPred:$src3)))]>,
+            Requires<[HasV4T]>, ImmRegRel;
 
 // Rx=add(Ru,mpyi(Rx,Rs))
+let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in
 def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
             "$dst = add($src1, mpyi($src2, $src3))",
@@ -4308,7 +1644,7 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
              (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
                                             (i32 IntRegs:$src3))))],
             "$src2 = $dst">,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 
 // Polynomial multiply words
@@ -4351,92 +1687,107 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
 
 // Shift by immediate and accumulate.
 // Rx=add(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
 def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = add(#$src1, asl($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                       u8ImmPred:$src1))],
+                       u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 // Rx=add(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
 def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = add(#$src1, lsr($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                       u8ImmPred:$src1))],
+                       u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 // Rx=sub(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
 def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = sub(#$src1, asl($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                       u8ImmPred:$src1))],
+                       u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 // Rx=sub(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
 def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = sub(#$src1, lsr($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                       u8ImmPred:$src1))],
+                       u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 
 //Shift by immediate and logical.
 //Rx=and(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
 def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = and(#$src1, asl($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                       u8ImmPred:$src1))],
+                       u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 //Rx=and(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
 def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = and(#$src1, lsr($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                       u8ImmPred:$src1))],
+                       u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 //Rx=or(#u8,asl(Rx,#U5))
-let AddedComplexity = 30 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
 def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = or(#$src1, asl($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                      u8ImmPred:$src1))],
+                      u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 //Rx=or(#u8,lsr(Rx,#U5))
-let AddedComplexity = 30 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
 def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
-            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
             "$dst = or(#$src1, lsr($src2, #$src3))",
             [(set (i32 IntRegs:$dst),
                   (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
-                      u8ImmPred:$src1))],
+                      u8ExtPred:$src1))],
             "$src2 = $dst">,
             Requires<[HasV4T]>;
 
 
 //Shift by register.
 //Rd=lsl(#s6,Rt)
+let validSubTargets = HasV4SubT in {
 def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
             "$dst = lsl(#$src1, $src2)",
             [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
@@ -4484,7 +1835,7 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
                                                     (i32 IntRegs:$src3))))],
             "$src1 = $dst">,
             Requires<[HasV4T]>;
-
+}
 
 //===----------------------------------------------------------------------===//
 // XTYPE/SHIFT -
@@ -4494,488 +1845,367 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
 // MEMOP: Word, Half, Byte
 //===----------------------------------------------------------------------===//
 
+def MEMOPIMM : SDNodeXForm<imm, [{
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int32_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_HALF : SDNodeXForm<imm, [{
+  // -1 .. -31 represented as 65535..65515
+  // assigning to a short restores our desired signed value.
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int16_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm);
+}]>;
+
+def MEMOPIMM_BYTE : SDNodeXForm<imm, [{
+  // -1 .. -31 represented as 255..235
+  // assigning to a char restores our desired signed value.
+  // Call the transformation function XformM5ToU5Imm to get the negative
+  // immediate's positive counterpart.
+  int8_t imm = N->getSExtValue();
+  return XformM5ToU5Imm(imm);
+}]>;
+
+def SETMEMIMM : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-31].
+   // As an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def CLRMEMIMM : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-31].
+   // As an SDNode.
+   // we bit negate the value first
+   int32_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU5Imm(imm);
+}]>;
+
+def SETMEMIMM_SHORT : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-15].
+   // As an SDNode.
+   int16_t imm = N->getSExtValue();
+   return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-15].
+   // As an SDNode.
+   // we bit negate the value first
+   int16_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU4Imm(imm);
+}]>;
+
+def SETMEMIMM_BYTE : SDNodeXForm<imm, [{
+   // Return the bit position we will set [0-7].
+   // As an SDNode.
+   int8_t imm =  N->getSExtValue();
+   return XformMskToBitPosU3Imm(imm);
+}]>;
+
+def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{
+   // Return the bit position we will clear [0-7].
+   // As an SDNode.
+   // we bit negate the value first
+   int8_t imm = ~(N->getSExtValue());
+   return XformMskToBitPosU3Imm(imm);
+}]>;
+
 //===----------------------------------------------------------------------===//
-// MEMOP: Word
-//
-//  Implemented:
-//     MEMw_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
-//     MEMw_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
-//     MEMw_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
-//     MEMw_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
-//     MEMw_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
-//     MEMw_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
-//     MEMw_ADDi_V4          : memw(Rs+#u6:2)+=#U5
-//     MEMw_SUBi_V4          : memw(Rs+#u6:2)-=#U5
-//     MEMw_ADDr_V4          : memw(Rs+#u6:2)+=Rt
-//     MEMw_SUBr_V4          : memw(Rs+#u6:2)-=Rt
-//     MEMw_CLRr_V4          : memw(Rs+#u6:2)&=Rt
-//     MEMw_SETr_V4          : memw(Rs+#u6:2)|=Rt
-//
-//   Not implemented:
-//     MEMw_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMw_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
-//     MEMw_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMw_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+// Template class for MemOp instructions with the register value.
+//===----------------------------------------------------------------------===//
+class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
+                     string memOp, bits<2> memOpBits> :
+      MEMInst_V4<(outs),
+                 (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
+                 opc#"($base+#$offset)"#memOp#"$delta",
+                 []>,
+                 Requires<[HasV4T, UseMEMOP]> {
+
+    bits<5> base;
+    bits<5> delta;
+    bits<32> offset;
+    bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+    let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+                     !if (!eq(opcBits, 0b01), offset{6-1},
+                     !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+    let IClass = 0b0011;
+    let Inst{27-24} = 0b1110;
+    let Inst{22-21} = opcBits;
+    let Inst{20-16} = base;
+    let Inst{13} = 0b0;
+    let Inst{12-7} = offsetBits;
+    let Inst{6-5} = memOpBits;
+    let Inst{4-0} = delta;
+}
+
+//===----------------------------------------------------------------------===//
+// Template class for MemOp instructions with the immediate value.
 //===----------------------------------------------------------------------===//
+class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
+                     string memOp, bits<2> memOpBits> :
+      MEMInst_V4 <(outs),
+                  (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+                  opc#"($base+#$offset)"#memOp#"#$delta"
+                  #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
+                  []>,
+                  Requires<[HasV4T, UseMEMOP]> {
+
+    bits<5> base;
+    bits<5> delta;
+    bits<32> offset;
+    bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
+
+    let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
+                     !if (!eq(opcBits, 0b01), offset{6-1},
+                     !if (!eq(opcBits, 0b10), offset{7-2},0)));
+
+    let IClass = 0b0011;
+    let Inst{27-24} = 0b1111;
+    let Inst{22-21} = opcBits;
+    let Inst{20-16} = base;
+    let Inst{13} = 0b0;
+    let Inst{12-7} = offsetBits;
+    let Inst{6-5} = memOpBits;
+    let Inst{4-0} = delta;
+}
 
+// multiclass to define MemOp instructions with register operand.
+multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
+  def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
+  def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
+  def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
+  def _OR#NAME#_V4  : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
+}
 
-// MEMw_ADDSUBi_indexed_V4:
-//   pseudo operation for MEMw_ADDi_indexed_V4 and
-//   MEMw_SUBi_indexed_V4 a later pass will change it
-//   to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
-            "Error; should not emit",
-            [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         m6ImmPred:$addend),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
-            "memw($base+#$offset) += #$addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+// multiclass to define MemOp instructions with immediate Operand.
+multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
+  def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
+  def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
+  def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>;
+  def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>;
+}
 
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
-            "memw($base+#$offset) -= #$subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
-            "memw($base+#$offset) += $addend",
-            [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         (i32 IntRegs:$addend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
-            "memw($base+#$offset) -= $subend",
-            [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         (i32 IntRegs:$subend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
-            "memw($base+#$offset) &= $andend",
-            [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                         (i32 IntRegs:$andend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
-            "memw($base+#$offset) |= $orend",
-            [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
-                        (i32 IntRegs:$orend)),
-                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// MEMw_ADDSUBi_V4:
-//   Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
-//   a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, m6Imm:$addend),
-            "Error; should not emit",
-            [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += #U5
-let AddedComplexity = 30 in
-def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$addend),
-            "memw($addr) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
+  defm r : MemOp_rr <opc, opcBits, ImmOp>;
+  defm i : MemOp_ri <opc, opcBits, ImmOp>;
+}
 
-// memw(Rs+#u6:2) -= #U5
-let AddedComplexity = 30 in
-def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$subend),
-            "memw($addr) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) += Rt
-let AddedComplexity = 30 in
-def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$addend),
-            "memw($addr) += $addend",
-            [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) -= Rt
-let AddedComplexity = 30 in
-def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$subend),
-            "memw($addr) -= $subend",
-            [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) &= Rt
-let AddedComplexity = 30 in
-def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$andend),
-            "memw($addr) &= $andend",
-            [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memw(Rs+#u6:2) |= Rt
-let AddedComplexity = 30 in
-def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$orend),
-            "memw($addr) |= $orend",
-            [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
-                    ADDRriU6_2:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
+// Define MemOp instructions.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0,
+validSubTargets =HasV4SubT in {
+  let opExtentBits = 6, accessSize = ByteAccess in
+  defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>;
+
+  let opExtentBits = 7, accessSize = HalfWordAccess in
+  defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>;
+
+  let opExtentBits = 8, accessSize = WordAccess in
+  defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>;
+}
 
 //===----------------------------------------------------------------------===//
-// MEMOP: Halfword
-//
-//  Implemented:
-//     MEMh_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
-//     MEMh_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
-//     MEMh_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
-//     MEMh_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
-//     MEMh_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
-//     MEMh_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
-//     MEMh_ADDi_V4          : memw(Rs+#u6:2)+=#U5
-//     MEMh_SUBi_V4          : memw(Rs+#u6:2)-=#U5
-//     MEMh_ADDr_V4          : memw(Rs+#u6:2)+=Rt
-//     MEMh_SUBr_V4          : memw(Rs+#u6:2)-=Rt
-//     MEMh_CLRr_V4          : memw(Rs+#u6:2)&=Rt
-//     MEMh_SETr_V4          : memw(Rs+#u6:2)|=Rt
-//
-//   Not implemented:
-//     MEMh_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMh_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
-//     MEMh_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
-//     MEMh_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+// Multiclass to define 'Def Pats' for ALU operations on the memory
+// Here value used for the ALU operation is an immediate value.
+// mem[bh](Rs+#0) += #U5
+// mem[bh](Rs+#u6) += #U5
 //===----------------------------------------------------------------------===//
 
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+                          InstHexagon MI, SDNode OpNode> {
+  let AddedComplexity = 180 in
+  def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
+                    IntRegs:$addr),
+              (MI IntRegs:$addr, #0, u5ImmPred:$addend )>;
+
+  let AddedComplexity = 190 in
+  def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+                     u5ImmPred:$addend),
+             (add IntRegs:$base, ExtPred:$offset)),
+       (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+}
 
-// MEMh_ADDSUBi_indexed_V4:
-//   Pseudo operation for MEMh_ADDi_indexed_V4 and
-//   MEMh_SUBi_indexed_V4 a later pass will change it
-//   to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
-            "Error; should not emit",
-            [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 m6ImmPred:$addend),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
-            "memh($base+#$offset) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+                          InstHexagon addMI, InstHexagon subMI> {
+  defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
+  defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+}
 
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
-            "memh($base+#$offset) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
-            "memh($base+#$offset) += $addend",
-            [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 (i32 IntRegs:$addend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
-            "memh($base+#$offset) -= $subend",
-            [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 (i32 IntRegs:$subend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
-            "memh($base+#$offset) += $andend",
-            [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
-                                                   u6_1ImmPred:$offset)),
-                                 (i32 IntRegs:$andend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
-            "memh($base+#$offset) |= $orend",
-            [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
-                                              u6_1ImmPred:$offset)),
-                             (i32 IntRegs:$orend)),
-                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// MEMh_ADDSUBi_V4:
-//   Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
-//   a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, m6Imm:$addend),
-            "Error; should not emit",
-            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
-                                 m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += #U5
-let AddedComplexity = 30 in
-def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$addend),
-            "memh($addr) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+                         MemOPh_ADDi_V4, MemOPh_SUBi_V4>;
+  // Byte
+  defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+                         MemOPb_ADDi_V4, MemOPb_SUBi_V4>;
+}
 
-// memh(Rs+#u6:1) -= #U5
-let AddedComplexity = 30 in
-def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$subend),
-            "memh($addr) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) += Rt
-let AddedComplexity = 30 in
-def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$addend),
-            "memh($addr) += $addend",
-            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
-                                 (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) -= Rt
-let AddedComplexity = 30 in
-def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$subend),
-            "memh($addr) -= $subend",
-            [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
-                                 (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) &= Rt
-let AddedComplexity = 30 in
-def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$andend),
-            "memh($addr) &= $andend",
-            [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
-                                 (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memh(Rs+#u6:1) |= Rt
-let AddedComplexity = 30 in
-def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$orend),
-            "memh($addr) |= $orend",
-            [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
-                                (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
+let Predicates = [HasV4T, UseMEMOP] in {
+  defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOpi_u5ExtType<extloadi8,  extloadi16>;  // any extend
 
+  // Word
+  defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4,
+                         MemOPw_SUBi_V4>;
+}
 
 //===----------------------------------------------------------------------===//
-// MEMOP: Byte
-//
-//  Implemented:
-//     MEMb_ADDi_indexed_V4  : memb(Rs+#u6:0)+=#U5
-//     MEMb_SUBi_indexed_V4  : memb(Rs+#u6:0)-=#U5
-//     MEMb_ADDr_indexed_V4  : memb(Rs+#u6:0)+=Rt
-//     MEMb_SUBr_indexed_V4  : memb(Rs+#u6:0)-=Rt
-//     MEMb_CLRr_indexed_V4  : memb(Rs+#u6:0)&=Rt
-//     MEMb_SETr_indexed_V4  : memb(Rs+#u6:0)|=Rt
-//     MEMb_ADDi_V4          : memb(Rs+#u6:0)+=#U5
-//     MEMb_SUBi_V4          : memb(Rs+#u6:0)-=#U5
-//     MEMb_ADDr_V4          : memb(Rs+#u6:0)+=Rt
-//     MEMb_SUBr_V4          : memb(Rs+#u6:0)-=Rt
-//     MEMb_CLRr_V4          : memb(Rs+#u6:0)&=Rt
-//     MEMb_SETr_V4          : memb(Rs+#u6:0)|=Rt
-//
-//   Not implemented:
-//     MEMb_CLRi_indexed_V4  : memb(Rs+#u6:0)=clrbit(#U5)
-//     MEMb_SETi_indexed_V4  : memb(Rs+#u6:0)=setbit(#U5)
-//     MEMb_CLRi_V4          : memb(Rs+#u6:0)=clrbit(#U5)
-//     MEMb_SETi_V4          : memb(Rs+#u6:0)=setbit(#U5)
+// multiclass to define 'Def Pats' for ALU operations on the memory.
+// Here value used for the ALU operation is a negative value.
+// mem[bh](Rs+#0) += #m5
+// mem[bh](Rs+#u6) += #m5
 //===----------------------------------------------------------------------===//
 
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+                          PatLeaf immPred, ComplexPattern addrPred,
+                          SDNodeXForm xformFunc, InstHexagon MI> {
+  let AddedComplexity = 190 in
+  def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend),
+                   IntRegs:$addr),
+             (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>;
+
+  let AddedComplexity = 195 in
+  def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+                       immPred:$subend),
+                  (add IntRegs:$base, extPred:$offset)),
+            (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+}
 
-// MEMb_ADDSUBi_indexed_V4:
-//   Pseudo operation for MEMb_ADDi_indexed_V4 and
-//   MEMb_SUBi_indexed_V4 a later pass will change it
-//   to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
-            "Error; should not emit",
-            [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                m6ImmPred:$addend),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
-            "memb($base+#$offset) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+                        ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>;
+  // Byte
+  defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+                        ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>;
+}
 
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
-            "memb($base+#$offset) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
-            "memb($base+#$offset) += $addend",
-            [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                (i32 IntRegs:$addend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
-            "memb($base+#$offset) -= $subend",
-            [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                (i32 IntRegs:$subend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
-            "memb($base+#$offset) += $andend",
-            [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
-                                                 u6_0ImmPred:$offset)),
-                                (i32 IntRegs:$andend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
-            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
-            "memb($base+#$offset) |= $orend",
-            [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
-                                                u6_0ImmPred:$offset)),
-                               (i32 IntRegs:$orend)),
-                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// MEMb_ADDSUBi_V4:
-//   Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
-//   a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, m6Imm:$addend),
-            "Error; should not emit",
-            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
-                                m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += #U5
-let AddedComplexity = 30 in
-def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$addend),
-            "memb($addr) += $addend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
+let Predicates = [HasV4T, UseMEMOP] in {
+  defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOpi_m5ExtType<extloadi8,  extloadi16>;  // any extend
 
-// memb(Rs+#u6:0) -= #U5
-let AddedComplexity = 30 in
-def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, u5Imm:$subend),
-            "memb($addr) -= $subend",
-            []>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) += Rt
-let AddedComplexity = 30 in
-def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$addend),
-            "memb($addr) += $addend",
-            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
-                                (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) -= Rt
-let AddedComplexity = 30 in
-def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$subend),
-            "memb($addr) -= $subend",
-            [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
-                                (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) &= Rt
-let AddedComplexity = 30 in
-def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$andend),
-            "memb($addr) &= $andend",
-            [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
-                                (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
-
-// memb(Rs+#u6:0) |= Rt
-let AddedComplexity = 30 in
-def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
-            (ins MEMri:$addr, IntRegs:$orend),
-            "memb($addr) |= $orend",
-            [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
-                               (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
-            Requires<[HasV4T, UseMEMOP]>;
+  // Word
+  defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+                          ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>;
+}
 
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for bit operations on the memory.
+// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5)
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
+                     PatLeaf extPred, ComplexPattern addrPred,
+                     SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> {
+
+  // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5)
+  let AddedComplexity = 250 in
+  def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                          immPred:$bitend),
+                  (add IntRegs:$base, extPred:$offset)),
+            (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>;
+
+  // mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
+  let AddedComplexity = 225 in
+  def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend),
+                   addrPred:$addr),
+             (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>;
+}
+
+multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Byte - clrbit
+  defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+                       ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>;
+  // Byte - setbit
+  defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred,  u6ExtPred,
+                       ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>;
+  // Half Word - clrbit
+  defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+                       ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>;
+  // Half Word - setbit
+  defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+                       ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>;
+}
+
+let Predicates = [HasV4T, UseMEMOP] in {
+  // mem[bh](Rs+#0) = [clrbit|setbit](#U5)
+  // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5)
+  defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOpi_bitExtType<extloadi8,  extloadi16>;  // any extend
+
+  // memw(Rs+#0) = [clrbit|setbit](#U5)
+  // memw(Rs+#u6:2) = [clrbit|setbit](#U5)
+  defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2,
+                       CLRMEMIMM, MemOPw_CLRBITi_V4, and>;
+  defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2,
+                       SETMEMIMM, MemOPw_SETBITi_V4, or>;
+}
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'def Pats' for ALU operations on the memory
+// where addend is a register.
+// mem[bhw](Rs+#0) [+-&|]= Rt
+// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+//===----------------------------------------------------------------------===//
+
+multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
+                     PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
+  let AddedComplexity = 141 in
+  // mem[bhw](Rs+#0) [+-&|]= Rt
+  def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)),
+                   addrPred:$addr),
+             (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>;
+
+  // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
+  let AddedComplexity = 150 in
+  def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)),
+                           (i32 IntRegs:$orend)),
+                   (add IntRegs:$base, extPred:$offset)),
+             (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>;
+}
+
+multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp,
+                        ComplexPattern addrPred, PatLeaf extPred,
+                        InstHexagon addMI, InstHexagon subMI,
+                        InstHexagon andMI, InstHexagon orMI > {
+
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>;
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>;
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>;
+  defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI,  or>;
+}
+
+multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
+  // Half Word
+  defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred,
+                       MemOPh_ADDr_V4, MemOPh_SUBr_V4,
+                       MemOPh_ANDr_V4, MemOPh_ORr_V4>;
+  // Byte
+  defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred,
+                       MemOPb_ADDr_V4, MemOPb_SUBr_V4,
+                       MemOPb_ANDr_V4, MemOPb_ORr_V4>;
+}
+
+// Define 'def Pats' for MemOps with register addend.
+let Predicates = [HasV4T, UseMEMOP] in {
+  // Byte, Half Word
+  defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend
+  defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
+  defm : MemOPr_ExtType<extloadi8,  extloadi16>;  // any extend
+  // Word
+  defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4,
+                       MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >;
+}
 
 //===----------------------------------------------------------------------===//
 // XTYPE/PRED +
@@ -4992,7 +2222,61 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
 // incorrect code for negative numbers.
 // Pd=cmpb.eq(Rs,#u8)
 
-let isCompare = 1 in
+// p=!cmp.eq(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
+                           (ins IntRegs:$src1, IntRegs:$src2),
+      "$dst = !cmp.eq($src1, $src2)",
+      [(set (i1 PredRegs:$dst),
+            (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
+      Requires<[HasV4T]>;
+
+// p=!cmp.eq(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst),
+                           (ins IntRegs:$src1, s10Ext:$src2),
+      "$dst = !cmp.eq($src1, #$src2)",
+      [(set (i1 PredRegs:$dst),
+            (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>,
+      Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
+                           (ins IntRegs:$src1, IntRegs:$src2),
+      "$dst = !cmp.gt($src1, $src2)",
+      [(set (i1 PredRegs:$dst),
+            (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+      Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst),
+                           (ins IntRegs:$src1, s10Ext:$src2),
+      "$dst = !cmp.gt($src1, #$src2)",
+      [(set (i1 PredRegs:$dst),
+            (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>,
+      Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
+                            (ins IntRegs:$src1, IntRegs:$src2),
+      "$dst = !cmp.gtu($src1, $src2)",
+      [(set (i1 PredRegs:$dst),
+            (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+      Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,#u9)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst),
+                            (ins IntRegs:$src1, u9Ext:$src2),
+      "$dst = !cmp.gtu($src1, #$src2)",
+      [(set (i1 PredRegs:$dst),
+            (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>,
+      Requires<[HasV4T]>;
+
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, u8Imm:$src2),
             "$dst = cmpb.eq($src1, #$src2)",
@@ -5000,8 +2284,14 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
                   (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
             Requires<[HasV4T]>;
 
+def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
+                       bb:$offset),
+      (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+                bb:$offset)>,
+      Requires<[HasV4T]>;
+
 // Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmpb.eq($src1, $src2)",
@@ -5011,7 +2301,7 @@ def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
             Requires<[HasV4T]>;
 
 // Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmpb.eq($src1, $src2)",
@@ -5021,7 +2311,7 @@ def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
             Requires<[HasV4T]>;
 
 // Pd=cmpb.gt(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmpb.gt($src1, $src2)",
@@ -5031,29 +2321,237 @@ def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
             Requires<[HasV4T]>;
 
 // Pd=cmpb.gtu(Rs,#u7)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in
 def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, u7Imm:$src2),
+            (ins IntRegs:$src1, u7Ext:$src2),
             "$dst = cmpb.gtu($src1, #$src2)",
             [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
-                                              u7ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
+                                              u7ExtPred:$src2))]>,
+            Requires<[HasV4T]>, ImmRegRel;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformU7ToU7M1Imm(imm);
+}]>;
+
+// For the sequence
+//   zext( seteq ( and(Rs, 255), u8))
+// Generate
+//   Pd=cmpb.eq(Rs, #u8)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
+                                           u8ExtPred:$u8)))),
+           (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+                                                 (u8ExtPred:$u8))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setne ( and(Rs, 255), u8))
+// Generate
+//   Pd=cmpb.eq(Rs, #u8)
+//   if (Pd.new) Rd=#0
+//   if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
+                                           u8ExtPred:$u8)))),
+           (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+                                                 (u8ExtPred:$u8))),
+                                0, 1))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( seteq (Rs, and(Rt, 255)))
+// Generate
+//   Pd=cmpb.eq(Rs, Rt)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
+                                 (i32 (and (i32 IntRegs:$Rs), 255)))))),
+           (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+                                                      (i32 IntRegs:$Rt))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setne (Rs, and(Rt, 255)))
+// Generate
+//   Pd=cmpb.eq(Rs, Rt)
+//   if (Pd.new) Rd=#0
+//   if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
+                                 (i32 (and (i32 IntRegs:$Rs), 255)))))),
+           (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+                                                      (i32 IntRegs:$Rt))),
+                                0, 1))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setugt ( and(Rs, 255), u8))
+// Generate
+//   Pd=cmpb.gtu(Rs, #u8)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
+                                            u8ExtPred:$u8)))),
+           (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+                                                  (u8ExtPred:$u8))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setugt ( and(Rs, 254), u8))
+// Generate
+//   Pd=cmpb.gtu(Rs, #u8)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
+                                            u8ExtPred:$u8)))),
+           (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+                                                  (u8ExtPred:$u8))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setult ( Rs, Rt))
+// Generate
+//   Pd=cmp.ltu(Rs, Rt)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+                                              (i32 IntRegs:$Rs))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setlt ( Rs, Rt))
+// Generate
+//   Pd=cmp.lt(Rs, Rt)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+                                             (i32 IntRegs:$Rs))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setugt ( Rs, Rt))
+// Generate
+//   Pd=cmp.gtu(Rs, Rt)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+                                              (i32 IntRegs:$Rt))),
+                                1, 0))>,
+           Requires<[HasV4T]>;
+
+// This pattern interefers with coremark performance, not implementing at this
+// time.
+// For the sequence
+//   zext( setgt ( Rs, Rt))
+// Generate
+//   Pd=cmp.gt(Rs, Rt)
+//   if (Pd.new) Rd=#1
+//   if (!Pd.new) Rd=#0
+
+// For the sequence
+//   zext( setuge ( Rs, Rt))
+// Generate
+//   Pd=cmp.ltu(Rs, Rt)
+//   if (Pd.new) Rd=#0
+//   if (!Pd.new) Rd=#1
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+                                              (i32 IntRegs:$Rs))),
+                                0, 1))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setge ( Rs, Rt))
+// Generate
+//   Pd=cmp.lt(Rs, Rt)
+//   if (Pd.new) Rd=#0
+//   if (!Pd.new) Rd=#1
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+                                             (i32 IntRegs:$Rs))),
+                                0, 1))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setule ( Rs, Rt))
+// Generate
+//   Pd=cmp.gtu(Rs, Rt)
+//   if (Pd.new) Rd=#0
+//   if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+                                              (i32 IntRegs:$Rt))),
+                                0, 1))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setle ( Rs, Rt))
+// Generate
+//   Pd=cmp.gt(Rs, Rt)
+//   if (Pd.new) Rd=#0
+//   if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+           (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs),
+                                             (i32 IntRegs:$Rt))),
+                                0, 1))>,
+           Requires<[HasV4T]>;
+
+// For the sequence
+//   zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+//    biasing the expression by 48. We are depending on the representation of
+//    the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+//   retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+//   retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+                                  u7StrictPosImmPred:$src2)))),
+  (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
+                                 (DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
+                   0, 1))>,
+                   Requires<[HasV4T]>;
 
 // Pd=cmpb.gtu(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU",
+InputType = "reg" in
 def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmpb.gtu($src1, $src2)",
             [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
                                              (and (i32 IntRegs:$src2), 255)))]>,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 // Following instruction is not being extended as it results into the incorrect
 // code for negative numbers.
 
 // Signed half compare(.eq) ri.
 // Pd=cmph.eq(Rs,#s8)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, s8Imm:$src2),
             "$dst = cmph.eq($src1, #$src2)",
@@ -5067,7 +2565,7 @@ def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
 //   r0=and(r0,#0xffff)
 //   p0=cmp.eq(r0,#0)
 // Pd=cmph.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmph.eq($src1, $src2)",
@@ -5082,7 +2580,7 @@ def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
 //   r1=asl(r1,16)
 //   p0=cmp.eq(r0,r1)
 // Pd=cmph.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmph.eq($src1, $src2)",
@@ -5096,19 +2594,20 @@ used in the cmph.gt instruction.
 // Signed half compare(.gt) ri.
 // Pd=cmph.gt(Rs,#s8)
 
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+isCompare = 1, validSubTargets = HasV4SubT in
 def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, s8Imm:$src2),
+            (ins IntRegs:$src1, s8Ext:$src2),
             "$dst = cmph.gt($src1, #$src2)",
             [(set (i1 PredRegs:$dst),
                   (setgt (shl (i32 IntRegs:$src1), (i32 16)),
-                         s8ImmPred:$src2))]>,
+                         s8ExtPred:$src2))]>,
             Requires<[HasV4T]>;
 */
 
 // Signed half compare(.gt) rr.
 // Pd=cmph.gt(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
 def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmph.gt($src1, $src2)",
@@ -5119,24 +2618,41 @@ def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
 
 // Unsigned half compare rr (.gtu).
 // Pd=cmph.gtu(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "reg" in
 def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
             (ins IntRegs:$src1, IntRegs:$src2),
             "$dst = cmph.gtu($src1, $src2)",
             [(set (i1 PredRegs:$dst),
                   (setugt (and (i32 IntRegs:$src1), 65535),
                           (and (i32 IntRegs:$src2), 65535)))]>,
-            Requires<[HasV4T]>;
+            Requires<[HasV4T]>, ImmRegRel;
 
 // Unsigned half compare ri (.gtu).
 // Pd=cmph.gtu(Rs,#u7)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "imm" in
 def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
-            (ins IntRegs:$src1, u7Imm:$src2),
+            (ins IntRegs:$src1, u7Ext:$src2),
             "$dst = cmph.gtu($src1, #$src2)",
             [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
-                                              u7ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
+                                              u7ExtPred:$src2))]>,
+            Requires<[HasV4T]>, ImmRegRel;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+    "$dst = !tstbit($src1, $src2)",
+    [(set (i1 PredRegs:$dst),
+          (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>,
+    Requires<[HasV4T]>;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+    "$dst = !tstbit($src1, $src2)",
+    [(set (i1 PredRegs:$dst),
+          (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>,
+    Requires<[HasV4T]>;
 
 //===----------------------------------------------------------------------===//
 // XTYPE/PRED -
@@ -5248,227 +2764,258 @@ let isReturn = 1, isTerminator = 1,
             Requires<[HasV4T]>;
 }
 
-
 // Load/Store with absolute addressing mode
 // memw(#u6)=Rt
 
-multiclass ST_abs<string OpcStr> {
-  let isPredicable = 1 in
-  def _abs_V4 : STInst2<(outs),
-            (ins globaladdress:$absaddr, IntRegs:$src),
-            !strconcat(OpcStr, "(##$absaddr) = $src"),
-            []>,
-            Requires<[HasV4T]>;
-
-  let isPredicated = 1 in
-  def _abs_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if ($src1)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2")),
-            []>,
-            Requires<[HasV4T]>;
-
-  let isPredicated = 1 in
-  def _abs_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if (!$src1)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2")),
+multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+                           bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME#_V4 : STInst2<(outs),
+            (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"(##$absaddr) = $src2",
             []>,
             Requires<[HasV4T]>;
+}
 
-  let isPredicated = 1 in
-  def _abs_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2")),
-            []>,
-            Requires<[HasV4T]>;
+multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+  }
+}
 
-  let isPredicated = 1 in
-  def _abs_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2")),
+let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+    let opExtendable = 0, isPredicable = 1 in
+    def NAME#_V4 : STInst2<(outs),
+            (ins globaladdressExt:$absaddr, RC:$src),
+            mnemonic#"(##$absaddr) = $src",
             []>,
             Requires<[HasV4T]>;
 
-  def _abs_nv_V4 : STInst2<(outs),
-            (ins globaladdress:$absaddr, IntRegs:$src),
-            !strconcat(OpcStr, "(##$absaddr) = $src.new"),
-            []>,
-            Requires<[HasV4T]>;
+    let opExtendable = 1, isPredicated = 1 in {
+      defm Pt : ST_Abs_Pred<mnemonic, RC, 0>;
+      defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
 
-  let isPredicated = 1 in
-  def _abs_cPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if ($src1)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+                           bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"(##$absaddr) = $src2.new",
             []>,
             Requires<[HasV4T]>;
+}
 
-  let isPredicated = 1 in
-  def _abs_cNotPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if (!$src1)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
-            []>,
-            Requires<[HasV4T]>;
+multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
+  }
+}
 
-  let isPredicated = 1 in
-  def _abs_cdnPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+    let opExtendable = 0, isPredicable = 1 in
+    def NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdressExt:$absaddr, RC:$src),
+            mnemonic#"(##$absaddr) = $src.new",
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
-  def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
-            !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
-            []>,
-            Requires<[HasV4T]>;
+    let opExtendable = 1, isPredicated = 1 in {
+      defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+      defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+    }
+  }
 }
 
-let AddedComplexity = 30, isPredicable = 1 in
-def STrid_abs_V4 : STInst<(outs),
-          (ins globaladdress:$absaddr, DoubleRegs:$src),
-           "memd(##$absaddr) = $src",
-          [(store (i64 DoubleRegs:$src),
-                  (HexagonCONST32 tglobaladdr:$absaddr))]>,
-          Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cPt_V4 : STInst2<(outs),
-          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
-          "if ($src1) memd(##$absaddr) = $src2",
-          []>,
-          Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cNotPt_V4 : STInst2<(outs),
-          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
-          "if (!$src1) memd(##$absaddr) = $src2",
-          []>,
-          Requires<[HasV4T]>;
+let addrMode = Absolute in {
+    defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
+                     ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
 
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cdnPt_V4 : STInst2<(outs),
-          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
-          "if ($src1.new) memd(##$absaddr) = $src2",
-          []>,
-          Requires<[HasV4T]>;
+    defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
+                     ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
 
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cdnNotPt_V4 : STInst2<(outs),
-          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
-          "if (!$src1.new) memd(##$absaddr) = $src2",
-          []>,
-          Requires<[HasV4T]>;
+    defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
+                     ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
 
-defm STrib : ST_abs<"memb">;
-defm STrih : ST_abs<"memh">;
-defm STriw : ST_abs<"memw">;
+  let isNVStorable = 0 in
+    defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
+let Predicates = [HasV4T], AddedComplexity = 30 in {
 def : Pat<(truncstorei8 (i32 IntRegs:$src1),
                         (HexagonCONST32 tglobaladdr:$absaddr)),
           (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
 def : Pat<(truncstorei16 (i32 IntRegs:$src1),
                           (HexagonCONST32 tglobaladdr:$absaddr)),
           (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
 def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
           (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
 
+def : Pat<(store (i64 DoubleRegs:$src1),
+                 (HexagonCONST32 tglobaladdr:$absaddr)),
+          (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
+}
 
-multiclass LD_abs<string OpcStr> {
-  let isPredicable = 1 in
-  def _abs_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins globaladdress:$absaddr),
-            !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  let isPredicated = 1 in
-  def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$absaddr),
-            !strconcat("if ($src1) $dst = ",
-            !strconcat(OpcStr, "(##$absaddr)")),
-            []>,
-            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// multiclass for store instructions with GP-relative addressing mode.
+// mem[bhwd](#global)=Rt
+// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
+//===----------------------------------------------------------------------===//
+multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+  let BaseOpcode = BaseOp, isPredicable = 1 in
+  def NAME#_V4 : STInst2<(outs),
+          (ins globaladdress:$global, RC:$src),
+          mnemonic#"(#$global) = $src",
+          []>;
+
+  // When GP-relative instructions are predicated, their addressing mode is
+  // changed to absolute and they are always constant extended.
+  let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+  isPredicated = 1 in {
+    defm Pt : ST_Abs_Pred <mnemonic, RC, 0>;
+    defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>;
+  }
+}
 
-  let isPredicated = 1 in
-  def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$absaddr),
-            !strconcat("if (!$src1) $dst = ",
-            !strconcat(OpcStr, "(##$absaddr)")),
-            []>,
-            Requires<[HasV4T]>;
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
+  let BaseOpcode = BaseOp, isPredicable = 1 in
+  def NAME#_nv_V4 : NVInst_V4<(outs),
+          (ins u0AlwaysExt:$global, RC:$src),
+          mnemonic#"(#$global) = $src.new",
+          []>,
+          Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
-  def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$absaddr),
-            !strconcat("if ($src1.new) $dst = ",
-            !strconcat(OpcStr, "(##$absaddr)")),
-            []>,
-            Requires<[HasV4T]>;
+  // When GP-relative instructions are predicated, their addressing mode is
+  // changed to absolute and they are always constant extended.
+  let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1,
+  isPredicated = 1 in {
+    defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+    defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+  }
+}
 
-  let isPredicated = 1 in
-  def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, globaladdress:$absaddr),
-            !strconcat("if (!$src1.new) $dst = ",
-            !strconcat(OpcStr, "(##$absaddr)")),
-            []>,
-            Requires<[HasV4T]>;
+let validSubTargets = HasV4SubT,  validSubTargets = HasV4SubT in {
+defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
+              ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
+defm STb_GP : ST_GP<"memb",  "STb_GP", IntRegs>,
+              ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
+defm STh_GP : ST_GP<"memh",  "STh_GP", IntRegs>,
+              ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
+defm STw_GP : ST_GP<"memw",  "STw_GP", IntRegs>,
+              ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
 }
 
-let AddedComplexity = 30 in
-def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst),
-          (ins globaladdress:$absaddr),
-          "$dst = memd(##$absaddr)",
-          [(set (i64 DoubleRegs:$dst),
-                (load (HexagonCONST32 tglobaladdr:$absaddr)))]>,
-          Requires<[HasV4T]>;
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+                            (i64 DoubleRegs:$src1)),
+           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+           Requires<[HasV4T]>;
 
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-          (ins PredRegs:$src1, globaladdress:$absaddr),
-          "if ($src1) $dst = memd(##$absaddr)",
-          []>,
-          Requires<[HasV4T]>;
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1),
+                  (HexagonCONST32_GP tglobaladdr:$global)),
+           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>;
 
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-          (ins PredRegs:$src1, globaladdress:$absaddr),
-          "if (!$src1) $dst = memd(##$absaddr)",
-          []>,
-          Requires<[HasV4T]>;
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+                            (i32 IntRegs:$src1)),
+            (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
 
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-          (ins PredRegs:$src1, globaladdress:$absaddr),
-          "if ($src1.new) $dst = memd(##$absaddr)",
-          []>,
-          Requires<[HasV4T]>;
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+          (HexagonCONST32_GP tglobaladdr:$global)),
+          (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
 
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-          (ins PredRegs:$src1, globaladdress:$absaddr),
-          "if (!$src1.new) $dst = memd(##$absaddr)",
-          []>,
-          Requires<[HasV4T]>;
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+//       to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+          (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+                           (i32 IntRegs:$src1)),
+          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+                         (HexagonCONST32_GP tglobaladdr:$global)),
+          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+                           (i32 IntRegs:$src1)),
+          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass for the load instructions with absolute addressing mode.
+//===----------------------------------------------------------------------===//
+multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+                           bit isPredNew> {
+  let PNewValue = !if(isPredNew, "new", "") in
+  def NAME : LDInst2<(outs RC:$dst),
+            (ins PredRegs:$src1, globaladdressExt:$absaddr),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"(##$absaddr)",
+            []>,
+            Requires<[HasV4T]>;
+}
 
-defm LDrib : LD_abs<"memb">;
-defm LDriub : LD_abs<"memub">;
-defm LDrih : LD_abs<"memh">;
-defm LDriuh : LD_abs<"memuh">;
-defm LDriw : LD_abs<"memw">;
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = !if(PredNot, "false", "true") in {
+    defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let isExtended = 1, neverHasSideEffects = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+    let  opExtendable = 1, isPredicable = 1 in
+    def NAME#_V4 : LDInst2<(outs RC:$dst),
+            (ins globaladdressExt:$absaddr),
+            "$dst = "#mnemonic#"(##$absaddr)",
+            []>,
+            Requires<[HasV4T]>;
 
+    let opExtendable = 2, isPredicated = 1 in {
+      defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+      defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+let addrMode = Absolute in {
+    defm LDrib_abs  : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
+    defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+    defm LDrih_abs  : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
+    defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+    defm LDriw_abs  : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+    defm LDrid_abs : LD_Abs<"memd",  "LDrid", DoubleRegs>, AddrModeRel;
+}
 
 let Predicates = [HasV4T], AddedComplexity  = 30 in
 def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
@@ -5490,6 +3037,107 @@ let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
 
+//===----------------------------------------------------------------------===//
+// multiclass for load instructions with GP-relative addressing mode.
+// Rx=mem[bhwd](##global)
+// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
+  let BaseOpcode = BaseOp in {
+    let isPredicable = 1 in
+    def NAME#_V4 : LDInst2<(outs RC:$dst),
+            (ins globaladdress:$global),
+            "$dst = "#mnemonic#"(#$global)",
+            []>;
+
+    let isExtended = 1, opExtendable = 2, isPredicated = 1 in {
+      defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+      defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+defm LDd_GP  : LD_GP<"memd",  "LDd_GP",  DoubleRegs>;
+defm LDb_GP  : LD_GP<"memb",  "LDb_GP",  IntRegs>;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
+defm LDh_GP  : LD_GP<"memh",  "LDh_GP",  IntRegs>;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
+defm LDw_GP  : LD_GP<"memw",  "LDw_GP",  IntRegs>;
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i64 (LDd_GP_V4 tglobaladdr:$global))>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDuh_GP_V4 tglobaladdr:$global))>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDw_GP_V4 tglobaladdr:$global))>;
+
+
 // Transfer global address into a register
 let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
 def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
@@ -5497,6 +3145,11 @@ def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
            [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
            Requires<[HasV4T]>;
 
+// Transfer a block address into a register
+def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
+          (TFRI_V4 tblockaddress:$src1)>,
+          Requires<[HasV4T]>;
+
 let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
 def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, globaladdress:$src2),
@@ -5588,172 +3241,167 @@ defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
 defm STriw_ind : ST_indirect_lo<"memw", store>;
 
 // Store - absolute addressing mode: These instruction take constant
-// value as the extended operand
+// value as the extended operand.
 multiclass ST_absimm<string OpcStr> {
-  let isPredicable = 1 in
+let isExtended = 1, opExtendable = 0, isPredicable = 1,
+validSubTargets = HasV4SubT in
   def _abs_V4 : STInst2<(outs),
-            (ins u6Imm:$src1, IntRegs:$src2),
-            !strconcat(OpcStr, "(#$src1) = $src2"),
+            (ins u0AlwaysExt:$src1, IntRegs:$src2),
+            !strconcat(OpcStr, "(##$src1) = $src2"),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
+let isExtended = 1, opExtendable = 1, isPredicated = 1,
+validSubTargets = HasV4SubT in {
   def _abs_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
-            !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+            !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
   def _abs_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+            !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
   def _abs_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
             !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(#$src2) = $src3")),
+            !strconcat(OpcStr, "(##$src2) = $src3")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
   def _abs_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
             !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(#$src2) = $src3")),
+            !strconcat(OpcStr, "(##$src2) = $src3")),
             []>,
             Requires<[HasV4T]>;
+}
 
-  def _abs_nv_V4 : STInst2<(outs),
-            (ins u6Imm:$src1, IntRegs:$src2),
-            !strconcat(OpcStr, "(#$src1) = $src2.new"),
+let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
+validSubTargets = HasV4SubT in
+  def _abs_nv_V4 : NVInst_V4<(outs),
+            (ins u0AlwaysExt:$src1, IntRegs:$src2),
+            !strconcat(OpcStr, "(##$src1) = $src2.new"),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
-  def _abs_cPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
+isNVStore = 1, validSubTargets = HasV4SubT in {
+  def _abs_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
             !strconcat("if ($src1)",
-            !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            !strconcat(OpcStr, "(##$src2) = $src3.new")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
-  def _abs_cNotPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+  def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
             !strconcat("if (!$src1)",
-            !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            !strconcat(OpcStr, "(##$src2) = $src3.new")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
-  def _abs_cdnPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+  def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
             !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            !strconcat(OpcStr, "(##$src2) = $src3.new")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
-  def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+  def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
             !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            !strconcat(OpcStr, "(##$src2) = $src3.new")),
             []>,
             Requires<[HasV4T]>;
 }
+}
 
 defm STrib_imm : ST_absimm<"memb">;
 defm STrih_imm : ST_absimm<"memh">;
 defm STriw_imm : ST_absimm<"memw">;
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2),
-          (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+let Predicates = [HasV4T], AddedComplexity  = 30 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+          (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2),
-          (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
-
-let Predicates = [HasV4T], AddedComplexity  = 30 in
-def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2),
-          (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+          (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 
+def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+          (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+}
 
 // Load - absolute addressing mode: These instruction take constant
 // value as the extended operand
 
 multiclass LD_absimm<string OpcStr> {
-  let isPredicable = 1 in
+let isExtended = 1, opExtendable = 1, isPredicable = 1,
+validSubTargets = HasV4SubT in
   def _abs_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins u6Imm:$src),
+            (ins u0AlwaysExt:$src),
             !strconcat("$dst = ",
-            !strconcat(OpcStr, "(#$src)")),
+            !strconcat(OpcStr, "(##$src)")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
+let isExtended = 1, opExtendable = 2, isPredicated = 1,
+validSubTargets = HasV4SubT in {
   def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u6Imm:$src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2),
             !strconcat("if ($src1) $dst = ",
-            !strconcat(OpcStr, "(#$src2)")),
+            !strconcat(OpcStr, "(##$src2)")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
   def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u6Imm:$src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2),
             !strconcat("if (!$src1) $dst = ",
-            !strconcat(OpcStr, "(#$src2)")),
+            !strconcat(OpcStr, "(##$src2)")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
   def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u6Imm:$src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2),
             !strconcat("if ($src1.new) $dst = ",
-            !strconcat(OpcStr, "(#$src2)")),
+            !strconcat(OpcStr, "(##$src2)")),
             []>,
             Requires<[HasV4T]>;
 
-  let isPredicated = 1 in
   def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u6Imm:$src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$src2),
             !strconcat("if (!$src1.new) $dst = ",
-            !strconcat(OpcStr, "(#$src2)")),
+            !strconcat(OpcStr, "(##$src2)")),
             []>,
             Requires<[HasV4T]>;
 }
+}
 
-defm LDrib_imm : LD_absimm<"memb">;
+defm LDrib_imm  : LD_absimm<"memb">;
 defm LDriub_imm : LD_absimm<"memub">;
-defm LDrih_imm : LD_absimm<"memh">;
+defm LDrih_imm  : LD_absimm<"memh">;
 defm LDriuh_imm : LD_absimm<"memuh">;
-defm LDriw_imm : LD_absimm<"memw">;
+defm LDriw_imm  : LD_absimm<"memw">;
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
-def : Pat<(i32 (load u6ImmPred:$src)),
-          (LDriw_imm_abs_V4 u6ImmPred:$src)>;
+let Predicates = [HasV4T], AddedComplexity  = 30 in {
+def : Pat<(i32 (load u0AlwaysExtPred:$src)),
+          (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (sextloadi8 u6ImmPred:$src)),
-          (LDrib_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
+          (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (zextloadi8 u6ImmPred:$src)),
-          (LDriub_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
+          (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (sextloadi16 u6ImmPred:$src)),
-          (LDrih_imm_abs_V4 u6ImmPred:$src)>;
-
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (zextloadi16 u6ImmPred:$src)),
-          (LDriuh_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
+          (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
 
+def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
+          (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+}
 
 // Indexed store double word - global address.
 // memw(Rs+#u6:2)=#S8
@@ -5775,3 +3423,109 @@ def STrih_offset_ext_V4 : STInst<(outs),
             [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
                     (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
             Requires<[HasV4T]>;
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1),
+                 FoldGlobalAddrGP:$addr),
+          (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
+                           (i64 DoubleRegs:$src1)),
+          (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+          (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+            Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+          (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+            Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+          (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+            Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+          (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+            Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+          (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+           Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+          (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
+          (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
+          (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
+          (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
+          (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+//let AddedComplexity = 100 in
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
+          (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
+          (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
+          (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
+          (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
+          (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
+          (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
+          (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
+          (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+           Requires<[HasV4T]>;
+
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
deleted file mode 100644
index 7a16c241ff8f..000000000000
--- a/lib/Target/Hexagon/HexagonMCInst.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class extends MCInst to allow some VLIW annotation.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef HEXAGONMCINST_H
-#define HEXAGONMCINST_H
-
-#include "llvm/MC/MCInst.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
-  class HexagonMCInst: public MCInst {
-    // Packet start and end markers
-    unsigned startPacket: 1, endPacket: 1;
-    const MachineInstr *MachineI;
-  public:
-    explicit HexagonMCInst(): MCInst(),
-                              startPacket(0), endPacket(0) {}
-
-    const MachineInstr* getMI() const { return MachineI; }
-
-    void setMI(const MachineInstr *MI) { MachineI = MI; }
-
-    bool isStartPacket() const { return (startPacket); }
-    bool isEndPacket() const { return (endPacket); }
-
-    void setStartPacket(bool yes) { startPacket = yes; }
-    void setEndPacket(bool yes) { endPacket = yes; }
-  };
-}
-
-#endif
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 70bddcc76a59..f011d51bd61a 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,8 +15,9 @@
 #include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "llvm/Constants.h"
+#include "MCTargetDesc/HexagonMCInst.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
@@ -38,9 +39,10 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
 }
 
 // Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI,
+void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
                             HexagonAsmPrinter& AP) {
   MCI.setOpcode(MI->getOpcode());
+  MCI.setDesc(MI->getDesc());
 
   for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
     const MachineOperand &MO = MI->getOperand(i);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 0e9ef4838d8a..1388ad4f167d 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -15,8 +15,8 @@
 #define DEBUG_TYPE "misched"
 
 #include "HexagonMachineScheduler.h"
-
-#include <queue>
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
 
 using namespace llvm;
 
@@ -153,7 +153,16 @@ void VLIWMachineScheduler::schedule() {
   // Postprocess the DAG to add platform specific artificial dependencies.
   postprocessDAG();
 
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+
+  // Initialize the strategy before modifying the DAG.
+  SchedImpl->initialize(this);
+
   // To view Height/Depth correctly, they should be accessed at least once.
+  //
+  // FIXME: SUnit::dumpAll always recompute depth and height now. The max
+  // depth/height could be computed directly from the roots and leaves.
   DEBUG(unsigned maxH = 0;
         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           if (SUnits[su].getHeight() > maxH)
@@ -167,7 +176,7 @@ void VLIWMachineScheduler::schedule() {
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
-  initQueues();
+  initQueues(TopRoots, BotRoots);
 
   bool IsTopNode = false;
   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
@@ -187,6 +196,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
   DAG = static_cast<VLIWMachineScheduler*>(dag);
   SchedModel = DAG->getSchedModel();
   TRI = DAG->TRI;
+
   Top.init(DAG, SchedModel);
   Bot.init(DAG, SchedModel);
 
@@ -194,6 +204,8 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
   // are disabled, then these HazardRecs will be disabled.
   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
   const TargetMachine &TM = DAG->MF.getTarget();
+  delete Top.HazardRec;
+  delete Bot.HazardRec;
   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
 
@@ -678,4 +690,3 @@ void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
     Bot.bumpNode(SU);
   }
 }
-
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index fe0242a0f74e..f68dadf29210 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -14,6 +14,9 @@
 #ifndef HEXAGONASMPRINTER_H
 #define HEXAGONASMPRINTER_H
 
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
@@ -22,14 +25,11 @@
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 1e91c3948550..5e80e48b01d5 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -22,31 +22,29 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "hexagon-nvj"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-
 #include <map>
-
-#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
@@ -222,7 +220,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
       return false;
   }
 
-  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
+  unsigned cmpReg1, cmpOp2;
   cmpReg1 = MI->getOperand(1).getReg();
 
   if (secondReg) {
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
new file mode 100644
index 000000000000..c79d78f21080
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -0,0 +1,858 @@
+//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Immediate operands.
+
+let PrintMethod = "printImmOperand" in {
+  // f32Ext type is used to identify constant extended floating point immediates.
+  def f32Ext : Operand<f32>;
+  def s32Imm : Operand<i32>;
+  def s26_6Imm : Operand<i32>;
+  def s16Imm : Operand<i32>;
+  def s12Imm : Operand<i32>;
+  def s11Imm : Operand<i32>;
+  def s11_0Imm : Operand<i32>;
+  def s11_1Imm : Operand<i32>;
+  def s11_2Imm : Operand<i32>;
+  def s11_3Imm : Operand<i32>;
+  def s10Imm : Operand<i32>;
+  def s9Imm : Operand<i32>;
+  def m9Imm : Operand<i32>;
+  def s8Imm : Operand<i32>;
+  def s8Imm64 : Operand<i64>;
+  def s6Imm : Operand<i32>;
+  def s4Imm : Operand<i32>;
+  def s4_0Imm : Operand<i32>;
+  def s4_1Imm : Operand<i32>;
+  def s4_2Imm : Operand<i32>;
+  def s4_3Imm : Operand<i32>;
+  def u64Imm : Operand<i64>;
+  def u32Imm : Operand<i32>;
+  def u26_6Imm : Operand<i32>;
+  def u16Imm : Operand<i32>;
+  def u16_0Imm : Operand<i32>;
+  def u16_1Imm : Operand<i32>;
+  def u16_2Imm : Operand<i32>;
+  def u11_3Imm : Operand<i32>;
+  def u10Imm : Operand<i32>;
+  def u9Imm : Operand<i32>;
+  def u8Imm : Operand<i32>;
+  def u7Imm : Operand<i32>;
+  def u6Imm : Operand<i32>;
+  def u6_0Imm : Operand<i32>;
+  def u6_1Imm : Operand<i32>;
+  def u6_2Imm : Operand<i32>;
+  def u6_3Imm : Operand<i32>;
+  def u5Imm : Operand<i32>;
+  def u4Imm : Operand<i32>;
+  def u3Imm : Operand<i32>;
+  def u2Imm : Operand<i32>;
+  def u1Imm : Operand<i32>;
+  def n8Imm : Operand<i32>;
+  def m6Imm : Operand<i32>;
+}
+
+let PrintMethod = "printNOneImmOperand" in
+def nOneImm : Operand<i32>;
+
+//
+// Immediate predicates
+//
+def s32ImmPred  : PatLeaf<(i32 imm), [{
+  // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred  : PatLeaf<(i32 imm), [{
+  // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+  // extended field that is a multiple of 0x1000000.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred  : PatLeaf<(i32 imm), [{
+  // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+  // extended field that is a multiple of 0x10000.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<24,16>(v);
+}]>;
+
+def s26_6ImmPred  : PatLeaf<(i32 imm), [{
+  // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
+  // sign extended field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<26,6>(v);
+}]>;
+
+
+def s16ImmPred  : PatLeaf<(i32 imm), [{
+  // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred  : PatLeaf<(i32 imm), [{
+  // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred  : PatLeaf<(i32 imm), [{
+  // s12ImmPred predicate - True if the immediate fits in a 12-bit
+  // sign extended field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred  : PatLeaf<(i32 imm), [{
+  // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
+  // sign extended field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred  : PatLeaf<(i32 imm), [{
+  // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
+  // sign extended field and is a multiple of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred  : PatLeaf<(i32 imm), [{
+  // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
+  // sign extended field and is a multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred  : PatLeaf<(i32 imm), [{
+  // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
+  // sign extended field and is a multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred  : PatLeaf<(i32 imm), [{
+  // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred  : PatLeaf<(i32 imm), [{
+  // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<9>(v);
+}]>;
+
+def m9ImmPred  : PatLeaf<(i32 imm), [{
+  // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
+  // field. The range of m9 is -255 to 255.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<9>(v) && (v != -256);
+}]>;
+
+def s8ImmPred  : PatLeaf<(i32 imm), [{
+  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred  : PatLeaf<(i64 imm), [{
+  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred  : PatLeaf<(i32 imm), [{
+  // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field that is a multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field that is a multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred  : PatLeaf<(i64 imm), [{
+  // Adding "N ||" to suppress gcc unused warning.
+  return (N || true);
+}]>;
+
+def u32ImmPred  : PatLeaf<(i32 imm), [{
+  // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<32>(v);
+}]>;
+
+def u26_6ImmPred  : PatLeaf<(i32 imm), [{
+  // u26_6ImmPred - True if the immediate fits in a 32-bit field and
+  // is a multiple of 64.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<26,6>(v);
+}]>;
+
+def u16ImmPred  : PatLeaf<(i32 imm), [{
+  // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred  : PatLeaf<(i32 imm), [{
+  // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+  // extended s8 field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred  : PatLeaf<(i32 imm), [{
+  // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred  : PatLeaf<(i32 imm), [{
+  // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<8>(v);
+}]>;
+
+def u7StrictPosImmPred : ImmLeaf<i32, [{
+  // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
+  // unsigned field and is strictly greater than 0.
+  return isUInt<7>(Imm) && Imm > 0;
+}]>;
+
+def u7ImmPred  : PatLeaf<(i32 imm), [{
+  // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred  : PatLeaf<(i32 imm), [{
+  // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field. Same as u6ImmPred.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
+  // field that is 1 bit alinged - multiple of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field that is 2 bits alinged - multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+  // field that is 3 bits alinged - multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred  : PatLeaf<(i32 imm), [{
+  // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred  : PatLeaf<(i32 imm), [{
+  // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred  : PatLeaf<(i32 imm), [{
+  // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred  : PatLeaf<(i1 imm), [{
+  // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<1>(v);
+}]>;
+
+def m5BImmPred  : PatLeaf<(i32 imm), [{
+  // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
+  // and will fit in a 5 bit field when made positive, for use in memops.
+  // this is specific to the zero extending of a negative by CombineInstr
+  int8_t v = (int8_t)N->getSExtValue();
+  return (-31 <= v && v <= -1);
+}]>;
+
+def m5HImmPred  : PatLeaf<(i32 imm), [{
+  // m5HImmPred predicate - True if the (short) number is in range -1 .. -31
+  // and will fit in a 5 bit field when made positive, for use in memops.
+  // this is specific to the zero extending of a negative by CombineInstr
+  int16_t v = (int16_t)N->getSExtValue();
+  return (-31 <= v && v <= -1);
+}]>;
+
+def m5ImmPred  : PatLeaf<(i32 imm), [{
+  // m5ImmPred predicate - True if the number is in range -1 .. -31
+  // and will fit in a 5 bit field when made positive, for use in memops.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-31 <= v && v <= -1);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred  : PatLeaf<(i32 imm), [{
+  // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred  : PatLeaf<(i32 imm), [{
+  // nOneImmPred predicate - True if the immediate is -1.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-1 == v);
+}]>;
+
+def Set5ImmPred : PatLeaf<(i32 imm), [{
+  // Set5ImmPred predicate - True if the number is in the series of values.
+  // [ 2^0, 2^1, ... 2^31 ]
+  // For use in setbit immediate.
+  uint32_t v = (int32_t)N->getSExtValue();
+  // Constrain to 32 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def Clr5ImmPred : PatLeaf<(i32 imm), [{
+  // Clr5ImmPred predicate - True if the number is in the series of
+  // bit negated values.
+  // [ 2^0, 2^1, ... 2^31 ]
+  // For use in clrbit immediate.
+  // Note: we are bit NOTing the value.
+  uint32_t v = ~ (int32_t)N->getSExtValue();
+  // Constrain to 32 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def SetClr5ImmPred : PatLeaf<(i32 imm), [{
+  // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+  int32_t v = (int32_t)N->getSExtValue();
+  return (v >= 0 && v <= 31);
+}]>;
+
+def Set4ImmPred : PatLeaf<(i32 imm), [{
+  // Set4ImmPred predicate - True if the number is in the series of values:
+  // [ 2^0, 2^1, ... 2^15 ].
+  // For use in setbit immediate.
+  uint16_t v = (int16_t)N->getSExtValue();
+  // Constrain to 16 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def Clr4ImmPred : PatLeaf<(i32 imm), [{
+  // Clr4ImmPred predicate - True if the number is in the series of
+  // bit negated values:
+  // [ 2^0, 2^1, ... 2^15 ].
+  // For use in setbit and clrbit immediate.
+  uint16_t v = ~ (int16_t)N->getSExtValue();
+  // Constrain to 16 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def SetClr4ImmPred : PatLeaf<(i32 imm), [{
+  // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+  int16_t v = (int16_t)N->getSExtValue();
+  return (v >= 0 && v <= 15);
+}]>;
+
+def Set3ImmPred : PatLeaf<(i32 imm), [{
+  // Set3ImmPred predicate - True if the number is in the series of values:
+  // [ 2^0, 2^1, ... 2^7 ].
+  // For use in setbit immediate.
+  uint8_t v = (int8_t)N->getSExtValue();
+  // Constrain to 8 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def Clr3ImmPred : PatLeaf<(i32 imm), [{
+  // Clr3ImmPred predicate - True if the number is in the series of
+  // bit negated values:
+  // [ 2^0, 2^1, ... 2^7 ].
+  // For use in setbit and clrbit immediate.
+  uint8_t v = ~ (int8_t)N->getSExtValue();
+  // Constrain to 8 bits, and then check for single bit.
+  return ImmIsSingleBit(v);
+}]>;
+
+def SetClr3ImmPred : PatLeaf<(i32 imm), [{
+  // SetClr3ImmPred predicate - True if the immediate is in the range  0..7.
+  int8_t v = (int8_t)N->getSExtValue();
+  return (v >= 0 && v <= 7);
+}]>;
+
+
+// Extendable immediate operands.
+
+let PrintMethod = "printExtOperand" in {
+  def s16Ext : Operand<i32>;
+  def s12Ext : Operand<i32>;
+  def s10Ext : Operand<i32>;
+  def s9Ext : Operand<i32>;
+  def s8Ext : Operand<i32>;
+  def s6Ext : Operand<i32>;
+  def s11_0Ext : Operand<i32>;
+  def s11_1Ext : Operand<i32>;
+  def s11_2Ext : Operand<i32>;
+  def s11_3Ext : Operand<i32>;
+  def u6Ext : Operand<i32>;
+  def u7Ext : Operand<i32>;
+  def u8Ext : Operand<i32>;
+  def u9Ext : Operand<i32>;
+  def u10Ext : Operand<i32>;
+  def u6_0Ext : Operand<i32>;
+  def u6_1Ext : Operand<i32>;
+  def u6_2Ext : Operand<i32>;
+  def u6_3Ext : Operand<i32>;
+}
+
+let PrintMethod = "printImmOperand" in
+def u0AlwaysExt : Operand<i32>;
+
+// Predicates for constant extendable operands
+def s16ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 16-bit sign extended field.
+    return isInt<16>(v);
+  else {
+    if (isInt<16>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s10ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 10-bit sign extended field.
+    return isInt<10>(v);
+  else {
+    if (isInt<10>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s9ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 9-bit sign extended field.
+    return isInt<9>(v);
+  else {
+    if (isInt<9>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s8ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 8-bit sign extended field.
+    return isInt<8>(v);
+  else {
+    if (isInt<8>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s8_16ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate fits in a 8-bit sign extended field.
+    return isInt<8>(v);
+  else {
+    if (isInt<8>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can't fit in a 16-bit signed field. This is required to avoid
+    // unnecessary constant extenders.
+    return isConstExtProfitable(Node) && !isInt<16>(v);
+  }
+}]>;
+
+def s6ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 6-bit sign extended field.
+    return isInt<6>(v);
+  else {
+    if (isInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s6_16ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate fits in a 6-bit sign extended field.
+    return isInt<6>(v);
+  else {
+    if (isInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can't fit in a 16-bit signed field. This is required to avoid
+    // unnecessary constant extenders.
+    return isConstExtProfitable(Node) && !isInt<16>(v);
+  }
+}]>;
+
+def s6_10ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 6-bit sign extended field.
+    return isInt<6>(v);
+  else {
+    if (isInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can't fit in a 10-bit signed field. This is required to avoid
+    // unnecessary constant extenders.
+    return isConstExtProfitable(Node) && !isInt<10>(v);
+  }
+}]>;
+
+def s11_0ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 11-bit sign extended field.
+    return isShiftedInt<11,0>(v);
+  else {
+    if (isInt<11>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s11_1ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 12-bit sign extended field and
+    // is 2 byte aligned.
+    return isShiftedInt<11,1>(v);
+  else {
+    if (isInt<12>(v))
+      return isShiftedInt<11,1>(v);
+
+    // Return true if extending this immediate is profitable and the low 1 bit
+    // is zero (2-byte aligned).
+    return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
+  }
+}]>;
+
+def s11_2ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 13-bit sign extended field and
+    // is 4-byte aligned.
+    return isShiftedInt<11,2>(v);
+  else {
+    if (isInt<13>(v))
+      return isShiftedInt<11,2>(v);
+
+    // Return true if extending this immediate is profitable and the low 2-bits
+    // are zero (4-byte aligned).
+    return isConstExtProfitable(Node)  && isInt<32>(v) && ((v % 4) == 0);
+  }
+}]>;
+
+def s11_3ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 14-bit sign extended field and
+    // is 8-byte aligned.
+    return isShiftedInt<11,3>(v);
+  else {
+    if (isInt<14>(v))
+     return isShiftedInt<11,3>(v);
+
+    // Return true if extending this immediate is profitable and the low 3-bits
+    // are zero (8-byte aligned).
+    return isConstExtProfitable(Node)  && isInt<32>(v) && ((v % 8) == 0);
+  }
+}]>;
+
+def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
+  // Predicate for an unsigned 32-bit value that always needs to be extended.
+  if (Subtarget.hasV4TOps()) {
+    if (isConstExtProfitable(Node)) {
+      int64_t v = (int64_t)N->getSExtValue();
+      return isUInt<32>(v);
+    }
+  }
+  return false;
+}]>;
+
+def u6ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 6-bit unsigned field.
+    return isUInt<6>(v);
+  else {
+    if (isUInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u7ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 7-bit unsigned field.
+    return isUInt<7>(v);
+  else {
+    if (isUInt<7>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u8ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 8-bit unsigned field.
+    return isUInt<8>(v);
+  else {
+    if (isUInt<8>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u9ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 9-bit unsigned field.
+    return isUInt<9>(v);
+  else {
+    if (isUInt<9>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u6_1ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 7-bit unsigned field and
+    // is 2-byte aligned.
+    return isShiftedUInt<6,1>(v);
+  else {
+    if (isUInt<7>(v))
+      return isShiftedUInt<6,1>(v);
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
+  }
+}]>;
+
+def u6_2ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 8-bit unsigned field and
+    // is 4-byte aligned.
+    return isShiftedUInt<6,2>(v);
+  else {
+    if (isUInt<8>(v))
+      return isShiftedUInt<6,2>(v);
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
+  }
+}]>;
+
+def u6_3ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 9-bit unsigned field and
+    // is 8-byte aligned.
+    return isShiftedUInt<6,3>(v);
+  else {
+    if (isUInt<9>(v))
+      return isShiftedUInt<6,3>(v);
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
+  }
+}]>;
+
+// Addressing modes.
+
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+
+def MEMrr : Operand<i32> {
+  let PrintMethod = "printMEMrrOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri : Operand<i32> {
+  let PrintMethod = "printMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+  let PrintMethod = "printMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+  let PrintMethod = "printFrameIndexOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in {
+  def globaladdress : Operand<i32>;
+  def globaladdressExt : Operand<i32>;
+}
+
+let PrintMethod = "printJumpTable" in
+def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def brtargetExt : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+}
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index a295015de561..576f1d7d0790 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -38,21 +38,21 @@
 #define DEBUG_TYPE "hexagon-peephole"
 #include "Hexagon.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
 
 using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 37424860564f..d8b4e2fcb368 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -17,8 +17,6 @@
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,7 +24,10 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -117,37 +118,15 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
                    "architecture version");
 }
 
-void HexagonRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
-
-  if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
-    // Hexagon_TODO: add code
-  } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
-    // Hexagon_TODO: add code
-  } else {
-    llvm_unreachable("Cannot handle this call frame pseudo instruction");
-  }
-  MBB.erase(I);
-}
-
 void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                            int SPAdj, RegScavenger *RS) const {
-
+                                              int SPAdj, unsigned FIOperandNum,
+                                              RegScavenger *RS) const {
   //
   // Hexagon_TODO: Do we need to enforce this for Hexagon?
   assert(SPAdj == 0 && "Unexpected");
 
-
-  unsigned i = 0;
   MachineInstr &MI = *II;
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   // Addressable stack objects are accessed using neg. offsets from %fp.
   MachineFunction &MF = *MI.getParent()->getParent();
@@ -167,8 +146,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
       !TII.isSpillPredRegOp(&MI)) {
     // Replace frame index with a stack pointer reference.
-    MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
-    MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+    MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false,
+                                                 false, true);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset);
   } else {
     // Replace frame index with a frame pointer reference.
     if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
@@ -205,8 +185,8 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                   dstReg).addReg(FrameReg).addImm(Offset);
         }
 
-        MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
-        MI.getOperand(i+1).ChangeToImmediate(0);
+        MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+        MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
       } else if ((MI.getOpcode() == Hexagon::STriw_indexed) ||
                  (MI.getOpcode() == Hexagon::STriw) ||
                  (MI.getOpcode() == Hexagon::STrid) ||
@@ -233,29 +213,44 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                   TII.get(Hexagon::ADD_ri),
                   resReg).addReg(FrameReg).addImm(Offset);
         }
-        MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
-        MI.getOperand(i+1).ChangeToImmediate(0);
+        MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
+        MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
       } else if (TII.isMemOp(&MI)) {
-        unsigned resReg = HEXAGON_RESERVED_REG_1;
-        if (!MFI.hasVarSizedObjects() &&
-            TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
-          MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
-                                            true);
-          MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
-        } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::ADD_rr),
-                  resReg).addReg(FrameReg).addReg(resReg);
-          MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
-          MI.getOperand(i+1).ChangeToImmediate(0);
+        // use the constant extender if the instruction provides it
+        // and we are V4TOps.
+        if (Subtarget.hasV4TOps()) {
+          if (TII.isConstExtended(&MI)) {
+            MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+            TII.immediateExtend(&MI);
+          } else {
+            llvm_unreachable("Need to implement for memops");
+          }
         } else {
-          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
-                  TII.get(Hexagon::ADD_ri),
-                  resReg).addReg(FrameReg).addImm(Offset);
-          MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
-          MI.getOperand(i+1).ChangeToImmediate(0);
+          // Only V3 and older instructions here.
+          unsigned ResReg = HEXAGON_RESERVED_REG_1;
+          if (!MFI.hasVarSizedObjects() &&
+              TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+            MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
+                                                         false, false, false);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
+          } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                    TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset);
+            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                    TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg).
+              addReg(ResReg);
+            MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+                                                         true);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+          } else {
+            BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                    TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg).
+              addImm(Offset);
+            MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false,
+                                                         true);
+            MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
+          }
         }
       } else {
         unsigned dstReg = MI.getOperand(0).getReg();
@@ -265,14 +260,14 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                 TII.get(Hexagon::ADD_rr),
                 dstReg).addReg(FrameReg).addReg(dstReg);
         // Can we delete MI??? r2 = add (r2, #0).
-        MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
-        MI.getOperand(i+1).ChangeToImmediate(0);
+        MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+        MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
       }
     } else {
       // If the offset is small enough to fit in the immediate field, directly
       // encode it.
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Offset);
+      MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
     }
   }
 
@@ -310,58 +305,6 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
   Moves.push_back(MachineMove(0, Dst, Src));
 }
 
-// Get the weight in units of pressure for this register class.
-const RegClassWeight &
-HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const {
-  // Each TargetRegisterClass has a per register weight, and weight
-  // limit which must be less than the limits of its pressure sets.
-  static const RegClassWeight RCWeightTable[] = {
-    {1, 32}, // IntRegs
-    {1, 8},  // CRRegs
-    {1, 4},  // PredRegs
-    {2, 16}, // DoubleRegs
-    {0, 0} };
-  return RCWeightTable[RC->getID()];
-}
-
-/// Get the number of dimensions of register pressure.
-unsigned HexagonRegisterInfo::getNumRegPressureSets() const {
-  return 4;
-}
-
-/// Get the name of this register unit pressure set.
-const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const {
-  static const char *const RegPressureSetName[] = {
-    "IntRegsRegSet",
-    "CRRegsRegSet",
-    "PredRegsRegSet",
-    "DoubleRegsRegSet"
-  };
-  assert((Idx < 4) && "Index out of bounds");
-  return RegPressureSetName[Idx];
-}
-
-/// Get the register unit pressure limit for this dimension.
-/// This limit must be adjusted dynamically for reserved registers.
-unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
-  static const int RegPressureLimit [] = { 16, 4, 2, 8 };
-  assert((Idx < 4) && "Index out of bounds");
-  return RegPressureLimit[Idx];
-}
-
-const int*
-HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC)
-  const {
-  static const int RCSetsTable[] = {
-    0,  -1,  // IntRegs
-    1,  -1,  // CRRegs
-    2,  -1,  // PredRegs
-    0,  -1,  // DoubleRegs
-    -1 };
-  static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 };
-  unsigned SetListStart = RCSetStartTable[RC->getID()];
-  return &RCSetsTable[SetListStart];
-}
 unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
 }
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 8820d13e0122..8a3f94a3fd12 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -15,8 +15,8 @@
 #ifndef HexagonREGISTERINFO_H
 #define HexagonREGISTERINFO_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 #define GET_REGINFO_HEADER
 #include "HexagonGenRegisterInfo.inc"
@@ -56,12 +56,9 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
   /// determineFrameLayout - Determine the size of the frame and maximum call
   /// frame size.
@@ -87,11 +84,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
   // Exception handling queries.
   unsigned getEHExceptionRegister() const;
   unsigned getEHHandlerRegister() const;
-  const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
-  unsigned getNumRegPressureSets() const;
-  const char *getRegPressureSetName(unsigned Idx) const;
-  unsigned getRegPressureSetLimit(unsigned Idx) const;
-  const int* getRegClassPressureSets(const TargetRegisterClass *RC) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 4d93dd18d4e0..34bf4eacfdc0 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -12,11 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Hexagon.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
@@ -50,7 +51,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
   unsigned Idx = 1;
   for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
        ++AI, ++Idx) {
-    if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) {
+    if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
       Argument* Arg = AI;
       if (!isa<PointerType>(Arg->getType())) {
         for (Instruction::use_iterator UI = Arg->use_begin();
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index b5ff69a701cd..c2cfbb9710a6 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -8,10 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 // Functional Units
-def LUNIT     : FuncUnit;
-def LSUNIT    : FuncUnit;
-def MUNIT     : FuncUnit;
-def SUNIT     : FuncUnit;
+def LSUNIT    : FuncUnit; // SLOT0
+def LUNIT     : FuncUnit; // SLOT1
+def MUNIT     : FuncUnit; // SLOT2
+def SUNIT     : FuncUnit; // SLOT3
+def LOOPUNIT  : FuncUnit;
 
 // Itinerary classes
 def ALU32     : InstrItinClass;
@@ -20,27 +21,34 @@ def CR        : InstrItinClass;
 def J         : InstrItinClass;
 def JR        : InstrItinClass;
 def LD        : InstrItinClass;
+def LD0       : InstrItinClass;
 def M         : InstrItinClass;
 def ST        : InstrItinClass;
+def ST0       : InstrItinClass;
 def S         : InstrItinClass;
 def SYS       : InstrItinClass;
-def MARKER    : InstrItinClass;
+def ENDLOOP   : InstrItinClass;
 def PSEUDO    : InstrItinClass;
+def PSEUDOM   : InstrItinClass;
 
 def HexagonItineraries :
-      ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+      ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [
         InstrItinData<ALU32  , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
         InstrItinData<ALU64  , [InstrStage<1, [MUNIT, SUNIT]>]>,
         InstrItinData<CR     , [InstrStage<1, [SUNIT]>]>,
         InstrItinData<J      , [InstrStage<1, [SUNIT, MUNIT]>]>,
         InstrItinData<JR     , [InstrStage<1, [MUNIT]>]>,
         InstrItinData<LD     , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+        InstrItinData<LD0    , [InstrStage<1, [LSUNIT]>]>,
         InstrItinData<M      , [InstrStage<1, [MUNIT, SUNIT]>]>,
         InstrItinData<ST     , [InstrStage<1, [LSUNIT]>]>,
+        InstrItinData<ST0    , [InstrStage<1, [LSUNIT]>]>,
         InstrItinData<S      , [InstrStage<1, [SUNIT, MUNIT]>]>,
         InstrItinData<SYS    , [InstrStage<1, [LSUNIT]>]>,
-        InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
-        InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+        InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+        InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>,
+                                InstrStage<1, [MUNIT, SUNIT]>]>
       ]>;
 
 def HexagonModel : SchedMachineModel {
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 5668ae81e82e..ef72cf4068bf 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -28,6 +28,10 @@ def SLOT0       : FuncUnit;
 def SLOT1       : FuncUnit;
 def SLOT2       : FuncUnit;
 def SLOT3       : FuncUnit;
+// Endloop is a pseudo instruction that is encoded with 2 bits in a packet
+// rather than taking an execution slot. This special unit is needed
+// to schedule an ENDLOOP with 4 other instructions.
+def SLOT_ENDLOOP: FuncUnit;
 
 // Itinerary classes.
 def NV_V4       : InstrItinClass;
@@ -36,22 +40,26 @@ def MEM_V4      : InstrItinClass;
 def PREFIX      : InstrItinClass;
 
 def HexagonItinerariesV4 :
-      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
         InstrItinData<ALU32  , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
         InstrItinData<ALU64  , [InstrStage<1, [SLOT2, SLOT3]>]>,
         InstrItinData<CR     , [InstrStage<1, [SLOT3]>]>,
         InstrItinData<J      , [InstrStage<1, [SLOT2, SLOT3]>]>,
         InstrItinData<JR     , [InstrStage<1, [SLOT2]>]>,
         InstrItinData<LD     , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<LD0    , [InstrStage<1, [SLOT0]>]>,
         InstrItinData<M      , [InstrStage<1, [SLOT2, SLOT3]>]>,
         InstrItinData<ST     , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<ST0    , [InstrStage<1, [SLOT0]>]>,
         InstrItinData<S      , [InstrStage<1, [SLOT2, SLOT3]>]>,
         InstrItinData<SYS    , [InstrStage<1, [SLOT0]>]>,
         InstrItinData<NV_V4  , [InstrStage<1, [SLOT0]>]>,
         InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
-        InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>,
         InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
-        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                InstrStage<1, [SLOT2, SLOT3]>]>
       ]>;
 
 def HexagonModelV4 : SchedMachineModel {
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index a81cd913a6ec..814249fa6832 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -27,24 +27,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "xfer"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 4bacb8fa670d..07d5ce1d8ab0 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -29,8 +29,16 @@ EnableV3("enable-hexagon-v3", cl::Hidden,
 static cl::opt<bool>
 EnableMemOps(
     "enable-hexagon-memops",
-    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
-    cl::desc("Generate V4 memop instructions."));
+    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+    cl::desc(
+      "Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool>
+DisableMemOps(
+    "disable-hexagon-memops",
+    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+    cl::desc(
+      "Do not generate V4 MEMOP in code generation for Hexagon target"));
 
 static cl::opt<bool>
 EnableIEEERndNear(
@@ -64,7 +72,10 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
 
-  if (EnableMemOps)
+  // UseMemOps on by default unless disabled explicitly
+  if (DisableMemOps)
+    UseMemOps = false;
+  else if (EnableMemOps)
     UseMemOps = true;
   else
     UseMemOps = false;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 5d9d6d890d98..76a8fba195f3 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef Hexagon_SUBTARGET_H
 #define Hexagon_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 30866e9eeba8..ce45c626f799 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -15,13 +15,13 @@
 #include "Hexagon.h"
 #include "HexagonISelLowering.h"
 #include "HexagonMachineScheduler.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -35,6 +35,10 @@ opt<bool> DisableHexagonMISched("disable-hexagon-misched",
                                 cl::Hidden, cl::ZeroOrMore, cl::init(false),
                                 cl::desc("Disable Hexagon MI Scheduling"));
 
+static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Hexagon CFG Optimization"));
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -74,21 +78,21 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
     Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
     TSInfo(*this),
     FrameLowering(Subtarget),
-    InstrItins(&Subtarget.getInstrItineraryData()),
-    STTI(&TLInfo), VTTI(&TLInfo) {
-  setMCUseCFI(false);
+    InstrItins(&Subtarget.getInstrItineraryData()) {
+    setMCUseCFI(false);
 }
 
 // addPassesForOptimizations - Allow the backend (target) to add Target
 // Independent Optimization passes to the Pass Manager.
 bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
-
-  PM.add(createConstantPropagationPass());
-  PM.add(createLoopSimplifyPass());
-  PM.add(createDeadCodeEliminationPass());
-  PM.add(createConstantPropagationPass());
-  PM.add(createLoopUnrollPass());
-  PM.add(createLoopStrengthReducePass());
+  if (getOptLevel() != CodeGenOpt::None) {
+    PM.add(createConstantPropagationPass());
+    PM.add(createLoopSimplifyPass());
+    PM.add(createDeadCodeEliminationPass());
+    PM.add(createConstantPropagationPass());
+    PM.add(createLoopUnrollPass());
+    PM.add(createLoopStrengthReducePass());
+  }
   return true;
 }
 
@@ -122,38 +126,45 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 bool HexagonPassConfig::addInstSelector() {
-  addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
-  addPass(createHexagonISelDag(getHexagonTargetMachine()));
-  addPass(createHexagonPeephole());
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+
+  addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonPeephole());
+
   return false;
 }
 
 
 bool HexagonPassConfig::addPreRegAlloc() {
-  if (!DisableHardwareLoops) {
+  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createHexagonHardwareLoops());
-  }
   return false;
 }
 
 bool HexagonPassConfig::addPostRegAlloc() {
-  addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+  if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
   return true;
 }
 
 
 bool HexagonPassConfig::addPreSched2() {
-  addPass(&IfConverterID);
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&IfConverterID);
   return true;
 }
 
 bool HexagonPassConfig::addPreEmitPass() {
 
-  if (!DisableHardwareLoops) {
+  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
     addPass(createHexagonFixupHwLoops());
-  }
 
-  addPass(createHexagonNewValueJump());
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonNewValueJump());
 
   // Expand Spill code for predicate registers.
   addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
@@ -162,7 +173,8 @@ bool HexagonPassConfig::addPreEmitPass() {
   addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
 
   // Create Packets.
-  addPass(createHexagonPacketizer());
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createHexagonPacketizer());
 
   return false;
 }
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 7a4215c119a9..cf8f9aa3612f 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,14 +14,13 @@
 #ifndef HexagonTARGETMACHINE_H
 #define HexagonTARGETMACHINE_H
 
-#include "HexagonInstrInfo.h"
-#include "HexagonSubtarget.h"
+#include "HexagonFrameLowering.h"
 #include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
 #include "HexagonSelectionDAGInfo.h"
-#include "HexagonFrameLowering.h"
+#include "HexagonSubtarget.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine {
   HexagonSelectionDAGInfo TSInfo;
   HexagonFrameLowering FrameLowering;
   const InstrItineraryData* InstrItins;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 
 public:
   HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -71,14 +68,6 @@ public:
     return &TSInfo;
   }
 
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
-
   virtual const DataLayout       *getDataLayout() const { return &DL; }
   static unsigned getModuleMatchQuality(const Module &M);
 
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index f4d7761ac358..993fcfaed43e 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -14,13 +14,13 @@
 #include "HexagonTargetObjectFile.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 3d5f685028ea..c0d86da1c05e 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -149,7 +149,6 @@ namespace {
     bool canReserveResourcesForConstExt(MachineInstr *MI);
     void reserveResourcesForConstExt(MachineInstr* MI);
     bool isNewValueInst(MachineInstr* MI);
-    bool isDotNewInst(MachineInstr* MI);
   };
 }
 
@@ -242,8 +241,9 @@ static bool IsIndirectCall(MachineInstr* MI) {
 // reservation fail.
 void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
-                                  QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+                                                  MI->getDebugLoc());
 
   if (ResourceTracker->canReserveResources(PseudoMI)) {
     ResourceTracker->reserveResources(PseudoMI);
@@ -257,10 +257,10 @@ void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
 
 bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  assert(QII->isExtended(MI) &&
+  assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
          "Should only be called for constant extended instructions");
   MachineFunction *MF = MI->getParent()->getParent();
-  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT),
+  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
                                                   MI->getDebugLoc());
   bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
   MF->DeleteMachineInstr(PseudoMI);
@@ -271,8 +271,9 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
 // true, otherwise, return false.
 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
-                                  QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i),
+                                                  MI->getDebugLoc());
 
   if (ResourceTracker->canReserveResources(PseudoMI)) {
     ResourceTracker->reserveResources(PseudoMI);
@@ -349,17 +350,6 @@ static bool IsControlFlow(MachineInstr* MI) {
   return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
 }
 
-bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  if (QII->isNewValueJump(MI))
-    return true;
-
-  if (QII->isNewValueStore(MI))
-    return true;
-
-  return false;
-}
-
 // Function returns true if an instruction can be promoted to the new-value
 // store. It will always return false for v2 and v3.
 // It lists all the conditional and unconditional stores that can be promoted
@@ -375,7 +365,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
     case Hexagon::STrib_indexed:
     case Hexagon::STrib_indexed_shl_V4:
     case Hexagon::STrib_shl_V4:
-    case Hexagon::STrib_GP_V4:
     case Hexagon::STb_GP_V4:
     case Hexagon::POST_STbri:
     case Hexagon::STrib_cPt:
@@ -398,17 +387,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
     case Hexagon::STb_GP_cNotPt_V4:
     case Hexagon::STb_GP_cdnPt_V4:
     case Hexagon::STb_GP_cdnNotPt_V4:
-    case Hexagon::STrib_GP_cPt_V4:
-    case Hexagon::STrib_GP_cNotPt_V4:
-    case Hexagon::STrib_GP_cdnPt_V4:
-    case Hexagon::STrib_GP_cdnNotPt_V4:
 
     // store halfword
     case Hexagon::STrih:
     case Hexagon::STrih_indexed:
     case Hexagon::STrih_indexed_shl_V4:
     case Hexagon::STrih_shl_V4:
-    case Hexagon::STrih_GP_V4:
     case Hexagon::STh_GP_V4:
     case Hexagon::POST_SThri:
     case Hexagon::STrih_cPt:
@@ -431,17 +415,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
     case Hexagon::STh_GP_cNotPt_V4:
     case Hexagon::STh_GP_cdnPt_V4:
     case Hexagon::STh_GP_cdnNotPt_V4:
-    case Hexagon::STrih_GP_cPt_V4:
-    case Hexagon::STrih_GP_cNotPt_V4:
-    case Hexagon::STrih_GP_cdnPt_V4:
-    case Hexagon::STrih_GP_cdnNotPt_V4:
 
     // store word
     case Hexagon::STriw:
     case Hexagon::STriw_indexed:
     case Hexagon::STriw_indexed_shl_V4:
     case Hexagon::STriw_shl_V4:
-    case Hexagon::STriw_GP_V4:
     case Hexagon::STw_GP_V4:
     case Hexagon::POST_STwri:
     case Hexagon::STriw_cPt:
@@ -464,10 +443,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
     case Hexagon::STw_GP_cNotPt_V4:
     case Hexagon::STw_GP_cdnPt_V4:
     case Hexagon::STw_GP_cdnNotPt_V4:
-    case Hexagon::STriw_GP_cPt_V4:
-    case Hexagon::STriw_GP_cNotPt_V4:
-    case Hexagon::STriw_GP_cdnPt_V4:
-    case Hexagon::STriw_GP_cdnNotPt_V4:
         return QRI->Subtarget.hasV4TOps();
   }
   return false;
@@ -507,9 +482,6 @@ static int GetDotNewOp(const int opc) {
   case Hexagon::STrib_shl_V4:
     return Hexagon::STrib_shl_nv_V4;
 
-  case Hexagon::STrib_GP_V4:
-    return Hexagon::STrib_GP_nv_V4;
-
   case Hexagon::STb_GP_V4:
     return Hexagon::STb_GP_nv_V4;
 
@@ -576,18 +548,6 @@ static int GetDotNewOp(const int opc) {
   case Hexagon::STb_GP_cdnNotPt_V4:
     return Hexagon::STb_GP_cdnNotPt_nv_V4;
 
-  case Hexagon::STrib_GP_cPt_V4:
-    return Hexagon::STrib_GP_cPt_nv_V4;
-
-  case Hexagon::STrib_GP_cNotPt_V4:
-    return Hexagon::STrib_GP_cNotPt_nv_V4;
-
-  case Hexagon::STrib_GP_cdnPt_V4:
-    return Hexagon::STrib_GP_cdnPt_nv_V4;
-
-  case Hexagon::STrib_GP_cdnNotPt_V4:
-    return Hexagon::STrib_GP_cdnNotPt_nv_V4;
-
   // store new value halfword
   case Hexagon::STrih:
     return Hexagon::STrih_nv_V4;
@@ -601,9 +561,6 @@ static int GetDotNewOp(const int opc) {
   case Hexagon::STrih_shl_V4:
     return Hexagon::STrih_shl_nv_V4;
 
-  case Hexagon::STrih_GP_V4:
-    return Hexagon::STrih_GP_nv_V4;
-
   case Hexagon::STh_GP_V4:
     return Hexagon::STh_GP_nv_V4;
 
@@ -670,18 +627,6 @@ static int GetDotNewOp(const int opc) {
   case Hexagon::STh_GP_cdnNotPt_V4:
     return Hexagon::STh_GP_cdnNotPt_nv_V4;
 
-  case Hexagon::STrih_GP_cPt_V4:
-    return Hexagon::STrih_GP_cPt_nv_V4;
-
-  case Hexagon::STrih_GP_cNotPt_V4:
-    return Hexagon::STrih_GP_cNotPt_nv_V4;
-
-  case Hexagon::STrih_GP_cdnPt_V4:
-    return Hexagon::STrih_GP_cdnPt_nv_V4;
-
-  case Hexagon::STrih_GP_cdnNotPt_V4:
-    return Hexagon::STrih_GP_cdnNotPt_nv_V4;
-
   // store new value word
   case Hexagon::STriw:
     return Hexagon::STriw_nv_V4;
@@ -695,9 +640,6 @@ static int GetDotNewOp(const int opc) {
   case Hexagon::STriw_shl_V4:
     return Hexagon::STriw_shl_nv_V4;
 
-  case Hexagon::STriw_GP_V4:
-    return Hexagon::STriw_GP_nv_V4;
-
   case Hexagon::STw_GP_V4:
     return Hexagon::STw_GP_nv_V4;
 
@@ -764,17 +706,6 @@ static int GetDotNewOp(const int opc) {
   case Hexagon::STw_GP_cdnNotPt_V4:
     return Hexagon::STw_GP_cdnNotPt_nv_V4;
 
-  case Hexagon::STriw_GP_cPt_V4:
-    return Hexagon::STriw_GP_cPt_nv_V4;
-
-  case Hexagon::STriw_GP_cNotPt_V4:
-    return Hexagon::STriw_GP_cNotPt_nv_V4;
-
-  case Hexagon::STriw_GP_cdnPt_V4:
-    return Hexagon::STriw_GP_cdnPt_nv_V4;
-
-  case Hexagon::STriw_GP_cdnNotPt_V4:
-    return Hexagon::STriw_GP_cdnNotPt_nv_V4;
   }
 }
 
@@ -820,12 +751,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STb_GP_cNotPt_V4 :
     return Hexagon::STb_GP_cdnNotPt_V4;
 
-  case Hexagon::STrib_GP_cPt_V4 :
-    return Hexagon::STrib_GP_cdnPt_V4;
-
-  case Hexagon::STrib_GP_cNotPt_V4 :
-    return Hexagon::STrib_GP_cdnNotPt_V4;
-
   // Store doubleword conditionally
   case Hexagon::STrid_cPt :
     return Hexagon::STrid_cdnPt_V4;
@@ -857,12 +782,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STd_GP_cNotPt_V4 :
     return Hexagon::STd_GP_cdnNotPt_V4;
 
-  case Hexagon::STrid_GP_cPt_V4 :
-    return Hexagon::STrid_GP_cdnPt_V4;
-
-  case Hexagon::STrid_GP_cNotPt_V4 :
-    return Hexagon::STrid_GP_cdnNotPt_V4;
-
   // Store halfword conditionally
   case Hexagon::STrih_cPt :
     return Hexagon::STrih_cdnPt_V4;
@@ -900,12 +819,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STh_GP_cNotPt_V4 :
     return Hexagon::STh_GP_cdnNotPt_V4;
 
-  case Hexagon::STrih_GP_cPt_V4 :
-    return Hexagon::STrih_GP_cdnPt_V4;
-
-  case Hexagon::STrih_GP_cNotPt_V4 :
-    return Hexagon::STrih_GP_cdnNotPt_V4;
-
   // Store word conditionally
   case Hexagon::STriw_cPt :
     return Hexagon::STriw_cdnPt_V4;
@@ -943,12 +856,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STw_GP_cNotPt_V4 :
     return Hexagon::STw_GP_cdnNotPt_V4;
 
-  case Hexagon::STriw_GP_cPt_V4 :
-    return Hexagon::STriw_GP_cdnPt_V4;
-
-  case Hexagon::STriw_GP_cNotPt_V4 :
-    return Hexagon::STriw_GP_cdnNotPt_V4;
-
   // Condtional Jumps
   case Hexagon::JMP_c:
     return Hexagon::JMP_cdnPt;
@@ -1091,72 +998,36 @@ static int GetDotNewPredOp(const int opc) {
 
   // V4 indexed+scaled load
 
-  case Hexagon::LDrid_indexed_cPt_V4 :
-    return Hexagon::LDrid_indexed_cdnPt_V4;
-
-  case Hexagon::LDrid_indexed_cNotPt_V4 :
-    return Hexagon::LDrid_indexed_cdnNotPt_V4;
-
   case Hexagon::LDrid_indexed_shl_cPt_V4 :
     return Hexagon::LDrid_indexed_shl_cdnPt_V4;
 
   case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
     return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
 
-  case Hexagon::LDrib_indexed_cPt_V4 :
-    return Hexagon::LDrib_indexed_cdnPt_V4;
-
-  case Hexagon::LDrib_indexed_cNotPt_V4 :
-    return Hexagon::LDrib_indexed_cdnNotPt_V4;
-
   case Hexagon::LDrib_indexed_shl_cPt_V4 :
     return Hexagon::LDrib_indexed_shl_cdnPt_V4;
 
   case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
     return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
 
-  case Hexagon::LDriub_indexed_cPt_V4 :
-    return Hexagon::LDriub_indexed_cdnPt_V4;
-
-  case Hexagon::LDriub_indexed_cNotPt_V4 :
-    return Hexagon::LDriub_indexed_cdnNotPt_V4;
-
   case Hexagon::LDriub_indexed_shl_cPt_V4 :
     return Hexagon::LDriub_indexed_shl_cdnPt_V4;
 
   case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
     return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
 
-  case Hexagon::LDrih_indexed_cPt_V4 :
-    return Hexagon::LDrih_indexed_cdnPt_V4;
-
-  case Hexagon::LDrih_indexed_cNotPt_V4 :
-    return Hexagon::LDrih_indexed_cdnNotPt_V4;
-
   case Hexagon::LDrih_indexed_shl_cPt_V4 :
     return Hexagon::LDrih_indexed_shl_cdnPt_V4;
 
   case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
     return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
 
-  case Hexagon::LDriuh_indexed_cPt_V4 :
-    return Hexagon::LDriuh_indexed_cdnPt_V4;
-
-  case Hexagon::LDriuh_indexed_cNotPt_V4 :
-    return Hexagon::LDriuh_indexed_cdnNotPt_V4;
-
   case Hexagon::LDriuh_indexed_shl_cPt_V4 :
     return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
 
   case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
     return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
 
-  case Hexagon::LDriw_indexed_cPt_V4 :
-    return Hexagon::LDriw_indexed_cdnPt_V4;
-
-  case Hexagon::LDriw_indexed_cNotPt_V4 :
-    return Hexagon::LDriw_indexed_cdnNotPt_V4;
-
   case Hexagon::LDriw_indexed_shl_cPt_V4 :
     return Hexagon::LDriw_indexed_shl_cdnPt_V4;
 
@@ -1201,42 +1072,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::LDw_GP_cNotPt_V4:
     return Hexagon::LDw_GP_cdnNotPt_V4;
 
-  case Hexagon::LDrid_GP_cPt_V4:
-    return Hexagon::LDrid_GP_cdnPt_V4;
-
-  case Hexagon::LDrid_GP_cNotPt_V4:
-    return Hexagon::LDrid_GP_cdnNotPt_V4;
-
-  case Hexagon::LDrib_GP_cPt_V4:
-    return Hexagon::LDrib_GP_cdnPt_V4;
-
-  case Hexagon::LDrib_GP_cNotPt_V4:
-    return Hexagon::LDrib_GP_cdnNotPt_V4;
-
-  case Hexagon::LDriub_GP_cPt_V4:
-    return Hexagon::LDriub_GP_cdnPt_V4;
-
-  case Hexagon::LDriub_GP_cNotPt_V4:
-    return Hexagon::LDriub_GP_cdnNotPt_V4;
-
-  case Hexagon::LDrih_GP_cPt_V4:
-    return Hexagon::LDrih_GP_cdnPt_V4;
-
-  case Hexagon::LDrih_GP_cNotPt_V4:
-    return Hexagon::LDrih_GP_cdnNotPt_V4;
-
-  case Hexagon::LDriuh_GP_cPt_V4:
-    return Hexagon::LDriuh_GP_cdnPt_V4;
-
-  case Hexagon::LDriuh_GP_cNotPt_V4:
-    return Hexagon::LDriuh_GP_cdnNotPt_V4;
-
-  case Hexagon::LDriw_GP_cPt_V4:
-    return Hexagon::LDriw_GP_cdnPt_V4;
-
-  case Hexagon::LDriw_GP_cNotPt_V4:
-    return Hexagon::LDriw_GP_cdnNotPt_V4;
-
   // Conditional store new-value byte
   case Hexagon::STrib_cPt_nv_V4 :
     return Hexagon::STrib_cdnPt_nv_V4;
@@ -1264,12 +1099,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STb_GP_cNotPt_nv_V4 :
     return Hexagon::STb_GP_cdnNotPt_nv_V4;
 
-  case Hexagon::STrib_GP_cPt_nv_V4 :
-    return Hexagon::STrib_GP_cdnPt_nv_V4;
-
-  case Hexagon::STrib_GP_cNotPt_nv_V4 :
-    return Hexagon::STrib_GP_cdnNotPt_nv_V4;
-
   // Conditional store new-value halfword
   case Hexagon::STrih_cPt_nv_V4 :
     return Hexagon::STrih_cdnPt_nv_V4;
@@ -1297,12 +1126,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STh_GP_cNotPt_nv_V4 :
     return Hexagon::STh_GP_cdnNotPt_nv_V4;
 
-  case Hexagon::STrih_GP_cPt_nv_V4 :
-    return Hexagon::STrih_GP_cdnPt_nv_V4;
-
-  case Hexagon::STrih_GP_cNotPt_nv_V4 :
-    return Hexagon::STrih_GP_cdnNotPt_nv_V4;
-
   // Conditional store new-value word
   case Hexagon::STriw_cPt_nv_V4 :
     return  Hexagon::STriw_cdnPt_nv_V4;
@@ -1330,12 +1153,6 @@ static int GetDotNewPredOp(const int opc) {
   case Hexagon::STw_GP_cNotPt_nv_V4 :
     return Hexagon::STw_GP_cdnNotPt_nv_V4;
 
-  case Hexagon::STriw_GP_cPt_nv_V4 :
-    return Hexagon::STriw_GP_cdnPt_nv_V4;
-
-  case Hexagon::STriw_GP_cNotPt_nv_V4 :
-    return Hexagon::STriw_GP_cdnNotPt_nv_V4;
-
   // Conditional add
   case Hexagon::ADD_ri_cPt :
     return Hexagon::ADD_ri_cdnPt;
@@ -1622,72 +1439,36 @@ static int GetDotOldOp(const int opc) {
 
   // V4 indexed+scaled Load
 
-  case Hexagon::LDrid_indexed_cdnPt_V4 :
-    return Hexagon::LDrid_indexed_cPt_V4;
-
-  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
-    return Hexagon::LDrid_indexed_cNotPt_V4;
-
   case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
     return Hexagon::LDrid_indexed_shl_cPt_V4;
 
   case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
     return Hexagon::LDrid_indexed_shl_cNotPt_V4;
 
-  case Hexagon::LDrib_indexed_cdnPt_V4 :
-    return Hexagon::LDrib_indexed_cPt_V4;
-
-  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
-    return Hexagon::LDrib_indexed_cNotPt_V4;
-
   case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
     return Hexagon::LDrib_indexed_shl_cPt_V4;
 
   case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
     return Hexagon::LDrib_indexed_shl_cNotPt_V4;
 
-  case Hexagon::LDriub_indexed_cdnPt_V4 :
-    return Hexagon::LDriub_indexed_cPt_V4;
-
-  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
-    return Hexagon::LDriub_indexed_cNotPt_V4;
-
   case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
     return Hexagon::LDriub_indexed_shl_cPt_V4;
 
   case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
     return Hexagon::LDriub_indexed_shl_cNotPt_V4;
 
-  case Hexagon::LDrih_indexed_cdnPt_V4 :
-    return Hexagon::LDrih_indexed_cPt_V4;
-
-  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
-    return Hexagon::LDrih_indexed_cNotPt_V4;
-
   case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
     return Hexagon::LDrih_indexed_shl_cPt_V4;
 
   case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
     return Hexagon::LDrih_indexed_shl_cNotPt_V4;
 
-  case Hexagon::LDriuh_indexed_cdnPt_V4 :
-    return Hexagon::LDriuh_indexed_cPt_V4;
-
-  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
-    return Hexagon::LDriuh_indexed_cNotPt_V4;
-
   case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
     return Hexagon::LDriuh_indexed_shl_cPt_V4;
 
   case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
     return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
 
-  case Hexagon::LDriw_indexed_cdnPt_V4 :
-    return Hexagon::LDriw_indexed_cPt_V4;
-
-  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
-    return Hexagon::LDriw_indexed_cNotPt_V4;
-
   case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
     return Hexagon::LDriw_indexed_shl_cPt_V4;
 
@@ -1732,42 +1513,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::LDw_GP_cdnNotPt_V4:
     return Hexagon::LDw_GP_cNotPt_V4;
 
-  case Hexagon::LDrid_GP_cdnPt_V4:
-    return Hexagon::LDrid_GP_cPt_V4;
-
-  case Hexagon::LDrid_GP_cdnNotPt_V4:
-    return Hexagon::LDrid_GP_cNotPt_V4;
-
-  case Hexagon::LDrib_GP_cdnPt_V4:
-    return Hexagon::LDrib_GP_cPt_V4;
-
-  case Hexagon::LDrib_GP_cdnNotPt_V4:
-    return Hexagon::LDrib_GP_cNotPt_V4;
-
-  case Hexagon::LDriub_GP_cdnPt_V4:
-    return Hexagon::LDriub_GP_cPt_V4;
-
-  case Hexagon::LDriub_GP_cdnNotPt_V4:
-    return Hexagon::LDriub_GP_cNotPt_V4;
-
-  case Hexagon::LDrih_GP_cdnPt_V4:
-    return Hexagon::LDrih_GP_cPt_V4;
-
-  case Hexagon::LDrih_GP_cdnNotPt_V4:
-    return Hexagon::LDrih_GP_cNotPt_V4;
-
-  case Hexagon::LDriuh_GP_cdnPt_V4:
-    return Hexagon::LDriuh_GP_cPt_V4;
-
-  case Hexagon::LDriuh_GP_cdnNotPt_V4:
-    return Hexagon::LDriuh_GP_cNotPt_V4;
-
-  case Hexagon::LDriw_GP_cdnPt_V4:
-    return Hexagon::LDriw_GP_cPt_V4;
-
-  case Hexagon::LDriw_GP_cdnNotPt_V4:
-    return Hexagon::LDriw_GP_cNotPt_V4;
-
   // Conditional add
 
   case Hexagon::ADD_ri_cdnPt :
@@ -1901,16 +1646,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STb_GP_cNotPt_nv_V4:
     return Hexagon::STb_GP_cNotPt_V4;
 
-  case Hexagon::STrib_GP_cdnPt_nv_V4:
-  case Hexagon::STrib_GP_cdnPt_V4:
-  case Hexagon::STrib_GP_cPt_nv_V4:
-    return Hexagon::STrib_GP_cPt_V4;
-
-  case Hexagon::STrib_GP_cdnNotPt_nv_V4:
-  case Hexagon::STrib_GP_cdnNotPt_V4:
-  case Hexagon::STrib_GP_cNotPt_nv_V4:
-    return Hexagon::STrib_GP_cNotPt_V4;
-
   // Store new-value byte - unconditional
   case Hexagon::STrib_nv_V4:
     return Hexagon::STrib;
@@ -1924,9 +1659,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STrib_shl_nv_V4:
     return Hexagon::STrib_shl_V4;
 
-  case Hexagon::STrib_GP_nv_V4:
-    return Hexagon::STrib_GP_V4;
-
   case Hexagon::STb_GP_nv_V4:
     return Hexagon::STb_GP_V4;
 
@@ -1990,16 +1722,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STh_GP_cNotPt_nv_V4:
     return Hexagon::STh_GP_cNotPt_V4;
 
-  case Hexagon::STrih_GP_cdnPt_nv_V4:
-  case Hexagon::STrih_GP_cdnPt_V4:
-  case Hexagon::STrih_GP_cPt_nv_V4:
-    return Hexagon::STrih_GP_cPt_V4;
-
-  case Hexagon::STrih_GP_cdnNotPt_nv_V4:
-  case Hexagon::STrih_GP_cdnNotPt_V4:
-  case Hexagon::STrih_GP_cNotPt_nv_V4:
-    return Hexagon::STrih_GP_cNotPt_V4;
-
   // Store new-value halfword - unconditional
 
   case Hexagon::STrih_nv_V4:
@@ -2014,9 +1736,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STrih_shl_nv_V4:
     return Hexagon::STrih_shl_V4;
 
-  case Hexagon::STrih_GP_nv_V4:
-    return Hexagon::STrih_GP_V4;
-
   case Hexagon::STh_GP_nv_V4:
     return Hexagon::STh_GP_V4;
 
@@ -2081,16 +1800,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STw_GP_cNotPt_nv_V4:
     return Hexagon::STw_GP_cNotPt_V4;
 
-  case Hexagon::STriw_GP_cdnPt_nv_V4:
-  case Hexagon::STriw_GP_cdnPt_V4:
-  case Hexagon::STriw_GP_cPt_nv_V4:
-    return Hexagon::STriw_GP_cPt_V4;
-
-  case Hexagon::STriw_GP_cdnNotPt_nv_V4:
-  case Hexagon::STriw_GP_cdnNotPt_V4:
-  case Hexagon::STriw_GP_cNotPt_nv_V4:
-    return Hexagon::STriw_GP_cNotPt_V4;
-
   // Store new-value word - unconditional
 
   case Hexagon::STriw_nv_V4:
@@ -2105,9 +1814,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STriw_shl_nv_V4:
     return Hexagon::STriw_shl_V4;
 
-  case Hexagon::STriw_GP_nv_V4:
-    return Hexagon::STriw_GP_V4;
-
   case Hexagon::STw_GP_nv_V4:
     return Hexagon::STw_GP_V4;
 
@@ -2146,11 +1852,6 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::STd_GP_cdnNotPt_V4 :
     return Hexagon::STd_GP_cNotPt_V4;
 
-  case Hexagon::STrid_GP_cdnPt_V4 :
-    return Hexagon::STrid_GP_cPt_V4;
-
-  case Hexagon::STrid_GP_cdnNotPt_V4 :
-    return Hexagon::STrid_GP_cNotPt_V4;
   }
 }
 
@@ -2248,28 +1949,16 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::LDriub_indexed_cdnPt :
   case Hexagon::POST_LDriub_cPt :
   case Hexagon::POST_LDriub_cdnPt_V4 :
-  case Hexagon::LDrid_indexed_cPt_V4 :
-  case Hexagon::LDrid_indexed_cdnPt_V4 :
   case Hexagon::LDrid_indexed_shl_cPt_V4 :
   case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrib_indexed_cPt_V4 :
-  case Hexagon::LDrib_indexed_cdnPt_V4 :
   case Hexagon::LDrib_indexed_shl_cPt_V4 :
   case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriub_indexed_cPt_V4 :
-  case Hexagon::LDriub_indexed_cdnPt_V4 :
   case Hexagon::LDriub_indexed_shl_cPt_V4 :
   case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrih_indexed_cPt_V4 :
-  case Hexagon::LDrih_indexed_cdnPt_V4 :
   case Hexagon::LDrih_indexed_shl_cPt_V4 :
   case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriuh_indexed_cPt_V4 :
-  case Hexagon::LDriuh_indexed_cdnPt_V4 :
   case Hexagon::LDriuh_indexed_shl_cPt_V4 :
   case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriw_indexed_cPt_V4 :
-  case Hexagon::LDriw_indexed_cdnPt_V4 :
   case Hexagon::LDriw_indexed_shl_cPt_V4 :
   case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
   case Hexagon::ADD_ri_cPt :
@@ -2298,42 +1987,22 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::ZXTB_cdnPt_V4 :
   case Hexagon::ZXTH_cPt_V4 :
   case Hexagon::ZXTH_cdnPt_V4 :
-  case Hexagon::LDrid_GP_cPt_V4 :
-  case Hexagon::LDrib_GP_cPt_V4 :
-  case Hexagon::LDriub_GP_cPt_V4 :
-  case Hexagon::LDrih_GP_cPt_V4 :
-  case Hexagon::LDriuh_GP_cPt_V4 :
-  case Hexagon::LDriw_GP_cPt_V4 :
   case Hexagon::LDd_GP_cPt_V4 :
   case Hexagon::LDb_GP_cPt_V4 :
   case Hexagon::LDub_GP_cPt_V4 :
   case Hexagon::LDh_GP_cPt_V4 :
   case Hexagon::LDuh_GP_cPt_V4 :
   case Hexagon::LDw_GP_cPt_V4 :
-  case Hexagon::STrid_GP_cPt_V4 :
-  case Hexagon::STrib_GP_cPt_V4 :
-  case Hexagon::STrih_GP_cPt_V4 :
-  case Hexagon::STriw_GP_cPt_V4 :
   case Hexagon::STd_GP_cPt_V4 :
   case Hexagon::STb_GP_cPt_V4 :
   case Hexagon::STh_GP_cPt_V4 :
   case Hexagon::STw_GP_cPt_V4 :
-  case Hexagon::LDrid_GP_cdnPt_V4 :
-  case Hexagon::LDrib_GP_cdnPt_V4 :
-  case Hexagon::LDriub_GP_cdnPt_V4 :
-  case Hexagon::LDrih_GP_cdnPt_V4 :
-  case Hexagon::LDriuh_GP_cdnPt_V4 :
-  case Hexagon::LDriw_GP_cdnPt_V4 :
   case Hexagon::LDd_GP_cdnPt_V4 :
   case Hexagon::LDb_GP_cdnPt_V4 :
   case Hexagon::LDub_GP_cdnPt_V4 :
   case Hexagon::LDh_GP_cdnPt_V4 :
   case Hexagon::LDuh_GP_cdnPt_V4 :
   case Hexagon::LDw_GP_cdnPt_V4 :
-  case Hexagon::STrid_GP_cdnPt_V4 :
-  case Hexagon::STrib_GP_cdnPt_V4 :
-  case Hexagon::STrih_GP_cdnPt_V4 :
-  case Hexagon::STriw_GP_cdnPt_V4 :
   case Hexagon::STd_GP_cdnPt_V4 :
   case Hexagon::STb_GP_cdnPt_V4 :
   case Hexagon::STh_GP_cdnPt_V4 :
@@ -2419,28 +2088,16 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::LDriub_indexed_cdnNotPt :
   case Hexagon::POST_LDriub_cNotPt :
   case Hexagon::POST_LDriub_cdnNotPt_V4 :
-  case Hexagon::LDrid_indexed_cNotPt_V4 :
-  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
   case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
   case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDrib_indexed_cNotPt_V4 :
-  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
   case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
   case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriub_indexed_cNotPt_V4 :
-  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
   case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
   case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDrih_indexed_cNotPt_V4 :
-  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
   case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
   case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriuh_indexed_cNotPt_V4 :
-  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
   case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
   case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriw_indexed_cNotPt_V4 :
-  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
   case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
   case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
   case Hexagon::ADD_ri_cNotPt :
@@ -2470,42 +2127,22 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::ZXTH_cNotPt_V4 :
   case Hexagon::ZXTH_cdnNotPt_V4 :
 
-  case Hexagon::LDrid_GP_cNotPt_V4 :
-  case Hexagon::LDrib_GP_cNotPt_V4 :
-  case Hexagon::LDriub_GP_cNotPt_V4 :
-  case Hexagon::LDrih_GP_cNotPt_V4 :
-  case Hexagon::LDriuh_GP_cNotPt_V4 :
-  case Hexagon::LDriw_GP_cNotPt_V4 :
   case Hexagon::LDd_GP_cNotPt_V4 :
   case Hexagon::LDb_GP_cNotPt_V4 :
   case Hexagon::LDub_GP_cNotPt_V4 :
   case Hexagon::LDh_GP_cNotPt_V4 :
   case Hexagon::LDuh_GP_cNotPt_V4 :
   case Hexagon::LDw_GP_cNotPt_V4 :
-  case Hexagon::STrid_GP_cNotPt_V4 :
-  case Hexagon::STrib_GP_cNotPt_V4 :
-  case Hexagon::STrih_GP_cNotPt_V4 :
-  case Hexagon::STriw_GP_cNotPt_V4 :
   case Hexagon::STd_GP_cNotPt_V4 :
   case Hexagon::STb_GP_cNotPt_V4 :
   case Hexagon::STh_GP_cNotPt_V4 :
   case Hexagon::STw_GP_cNotPt_V4 :
-  case Hexagon::LDrid_GP_cdnNotPt_V4 :
-  case Hexagon::LDrib_GP_cdnNotPt_V4 :
-  case Hexagon::LDriub_GP_cdnNotPt_V4 :
-  case Hexagon::LDrih_GP_cdnNotPt_V4 :
-  case Hexagon::LDriuh_GP_cdnNotPt_V4 :
-  case Hexagon::LDriw_GP_cdnNotPt_V4 :
   case Hexagon::LDd_GP_cdnNotPt_V4 :
   case Hexagon::LDb_GP_cdnNotPt_V4 :
   case Hexagon::LDub_GP_cdnNotPt_V4 :
   case Hexagon::LDh_GP_cdnNotPt_V4 :
   case Hexagon::LDuh_GP_cdnNotPt_V4 :
   case Hexagon::LDw_GP_cdnNotPt_V4 :
-  case Hexagon::STrid_GP_cdnNotPt_V4 :
-  case Hexagon::STrib_GP_cdnNotPt_V4 :
-  case Hexagon::STrih_GP_cdnNotPt_V4 :
-  case Hexagon::STriw_GP_cdnNotPt_V4 :
   case Hexagon::STd_GP_cdnNotPt_V4 :
   case Hexagon::STb_GP_cdnNotPt_V4 :
   case Hexagon::STh_GP_cdnNotPt_V4 :
@@ -2516,203 +2153,6 @@ static bool GetPredicateSense(MachineInstr* MI,
   return false;
 }
 
-bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
-  if (isNewValueInst(MI))
-    return true;
-
-  switch (MI->getOpcode()) {
-  case Hexagon::TFR_cdnNotPt:
-  case Hexagon::TFR_cdnPt:
-  case Hexagon::TFRI_cdnNotPt:
-  case Hexagon::TFRI_cdnPt:
-  case Hexagon::LDrid_cdnPt :
-  case Hexagon::LDrid_cdnNotPt :
-  case Hexagon::LDrid_indexed_cdnPt :
-  case Hexagon::LDrid_indexed_cdnNotPt :
-  case Hexagon::POST_LDrid_cdnPt_V4 :
-  case Hexagon::POST_LDrid_cdnNotPt_V4 :
-  case Hexagon::LDriw_cdnPt :
-  case Hexagon::LDriw_cdnNotPt :
-  case Hexagon::LDriw_indexed_cdnPt :
-  case Hexagon::LDriw_indexed_cdnNotPt :
-  case Hexagon::POST_LDriw_cdnPt_V4 :
-  case Hexagon::POST_LDriw_cdnNotPt_V4 :
-  case Hexagon::LDrih_cdnPt :
-  case Hexagon::LDrih_cdnNotPt :
-  case Hexagon::LDrih_indexed_cdnPt :
-  case Hexagon::LDrih_indexed_cdnNotPt :
-  case Hexagon::POST_LDrih_cdnPt_V4 :
-  case Hexagon::POST_LDrih_cdnNotPt_V4 :
-  case Hexagon::LDrib_cdnPt :
-  case Hexagon::LDrib_cdnNotPt :
-  case Hexagon::LDrib_indexed_cdnPt :
-  case Hexagon::LDrib_indexed_cdnNotPt :
-  case Hexagon::POST_LDrib_cdnPt_V4 :
-  case Hexagon::POST_LDrib_cdnNotPt_V4 :
-  case Hexagon::LDriuh_cdnPt :
-  case Hexagon::LDriuh_cdnNotPt :
-  case Hexagon::LDriuh_indexed_cdnPt :
-  case Hexagon::LDriuh_indexed_cdnNotPt :
-  case Hexagon::POST_LDriuh_cdnPt_V4 :
-  case Hexagon::POST_LDriuh_cdnNotPt_V4 :
-  case Hexagon::LDriub_cdnPt :
-  case Hexagon::LDriub_cdnNotPt :
-  case Hexagon::LDriub_indexed_cdnPt :
-  case Hexagon::LDriub_indexed_cdnNotPt :
-  case Hexagon::POST_LDriub_cdnPt_V4 :
-  case Hexagon::POST_LDriub_cdnNotPt_V4 :
-
-  case Hexagon::LDrid_indexed_cdnPt_V4 :
-  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
-  case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDrib_indexed_cdnPt_V4 :
-  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
-  case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriub_indexed_cdnPt_V4 :
-  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
-  case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDrih_indexed_cdnPt_V4 :
-  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
-  case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriuh_indexed_cdnPt_V4 :
-  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
-  case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::LDriw_indexed_cdnPt_V4 :
-  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
-  case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
-  case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
-
-// Coditional add
-  case Hexagon::ADD_ri_cdnPt:
-  case Hexagon::ADD_ri_cdnNotPt:
-  case Hexagon::ADD_rr_cdnPt:
-  case Hexagon::ADD_rr_cdnNotPt:
-
-  // Conditional logical operations
-  case Hexagon::XOR_rr_cdnPt :
-  case Hexagon::XOR_rr_cdnNotPt :
-  case Hexagon::AND_rr_cdnPt :
-  case Hexagon::AND_rr_cdnNotPt :
-  case Hexagon::OR_rr_cdnPt :
-  case Hexagon::OR_rr_cdnNotPt :
-
-  // Conditonal subtract
-  case Hexagon::SUB_rr_cdnPt :
-  case Hexagon::SUB_rr_cdnNotPt :
-
-  // Conditional combine
-  case Hexagon::COMBINE_rr_cdnPt :
-  case Hexagon::COMBINE_rr_cdnNotPt :
-
-  // Conditional shift operations
-  case Hexagon::ASLH_cdnPt_V4:
-  case Hexagon::ASLH_cdnNotPt_V4:
-  case Hexagon::ASRH_cdnPt_V4:
-  case Hexagon::ASRH_cdnNotPt_V4:
-  case Hexagon::SXTB_cdnPt_V4:
-  case Hexagon::SXTB_cdnNotPt_V4:
-  case Hexagon::SXTH_cdnPt_V4:
-  case Hexagon::SXTH_cdnNotPt_V4:
-  case Hexagon::ZXTB_cdnPt_V4:
-  case Hexagon::ZXTB_cdnNotPt_V4:
-  case Hexagon::ZXTH_cdnPt_V4:
-  case Hexagon::ZXTH_cdnNotPt_V4:
-
-  // Conditional stores
-  case Hexagon::STrib_imm_cdnPt_V4 :
-  case Hexagon::STrib_imm_cdnNotPt_V4 :
-  case Hexagon::STrib_cdnPt_V4 :
-  case Hexagon::STrib_cdnNotPt_V4 :
-  case Hexagon::STrib_indexed_cdnPt_V4 :
-  case Hexagon::STrib_indexed_cdnNotPt_V4 :
-  case Hexagon::POST_STbri_cdnPt_V4 :
-  case Hexagon::POST_STbri_cdnNotPt_V4 :
-  case Hexagon::STrib_indexed_shl_cdnPt_V4 :
-  case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
-
-  // Store doubleword conditionally
-  case Hexagon::STrid_indexed_cdnPt_V4 :
-  case Hexagon::STrid_indexed_cdnNotPt_V4 :
-  case Hexagon::STrid_indexed_shl_cdnPt_V4 :
-  case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::POST_STdri_cdnPt_V4 :
-  case Hexagon::POST_STdri_cdnNotPt_V4 :
-
-  // Store halfword conditionally
-  case Hexagon::STrih_cdnPt_V4 :
-  case Hexagon::STrih_cdnNotPt_V4 :
-  case Hexagon::STrih_indexed_cdnPt_V4 :
-  case Hexagon::STrih_indexed_cdnNotPt_V4 :
-  case Hexagon::STrih_imm_cdnPt_V4 :
-  case Hexagon::STrih_imm_cdnNotPt_V4 :
-  case Hexagon::STrih_indexed_shl_cdnPt_V4 :
-  case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::POST_SThri_cdnPt_V4 :
-  case Hexagon::POST_SThri_cdnNotPt_V4 :
-
-  // Store word conditionally
-  case Hexagon::STriw_cdnPt_V4 :
-  case Hexagon::STriw_cdnNotPt_V4 :
-  case Hexagon::STriw_indexed_cdnPt_V4 :
-  case Hexagon::STriw_indexed_cdnNotPt_V4 :
-  case Hexagon::STriw_imm_cdnPt_V4 :
-  case Hexagon::STriw_imm_cdnNotPt_V4 :
-  case Hexagon::STriw_indexed_shl_cdnPt_V4 :
-  case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
-  case Hexagon::POST_STwri_cdnPt_V4 :
-  case Hexagon::POST_STwri_cdnNotPt_V4 :
-
-  case Hexagon::LDd_GP_cdnPt_V4:
-  case Hexagon::LDd_GP_cdnNotPt_V4:
-  case Hexagon::LDb_GP_cdnPt_V4:
-  case Hexagon::LDb_GP_cdnNotPt_V4:
-  case Hexagon::LDub_GP_cdnPt_V4:
-  case Hexagon::LDub_GP_cdnNotPt_V4:
-  case Hexagon::LDh_GP_cdnPt_V4:
-  case Hexagon::LDh_GP_cdnNotPt_V4:
-  case Hexagon::LDuh_GP_cdnPt_V4:
-  case Hexagon::LDuh_GP_cdnNotPt_V4:
-  case Hexagon::LDw_GP_cdnPt_V4:
-  case Hexagon::LDw_GP_cdnNotPt_V4:
-  case Hexagon::LDrid_GP_cdnPt_V4:
-  case Hexagon::LDrid_GP_cdnNotPt_V4:
-  case Hexagon::LDrib_GP_cdnPt_V4:
-  case Hexagon::LDrib_GP_cdnNotPt_V4:
-  case Hexagon::LDriub_GP_cdnPt_V4:
-  case Hexagon::LDriub_GP_cdnNotPt_V4:
-  case Hexagon::LDrih_GP_cdnPt_V4:
-  case Hexagon::LDrih_GP_cdnNotPt_V4:
-  case Hexagon::LDriuh_GP_cdnPt_V4:
-  case Hexagon::LDriuh_GP_cdnNotPt_V4:
-  case Hexagon::LDriw_GP_cdnPt_V4:
-  case Hexagon::LDriw_GP_cdnNotPt_V4:
-
-  case Hexagon::STrid_GP_cdnPt_V4:
-  case Hexagon::STrid_GP_cdnNotPt_V4:
-  case Hexagon::STrib_GP_cdnPt_V4:
-  case Hexagon::STrib_GP_cdnNotPt_V4:
-  case Hexagon::STrih_GP_cdnPt_V4:
-  case Hexagon::STrih_GP_cdnNotPt_V4:
-  case Hexagon::STriw_GP_cdnPt_V4:
-  case Hexagon::STriw_GP_cdnNotPt_V4:
-  case Hexagon::STd_GP_cdnPt_V4:
-  case Hexagon::STd_GP_cdnNotPt_V4:
-  case Hexagon::STb_GP_cdnPt_V4:
-  case Hexagon::STb_GP_cdnNotPt_V4:
-  case Hexagon::STh_GP_cdnPt_V4:
-  case Hexagon::STh_GP_cdnNotPt_V4:
-  case Hexagon::STw_GP_cdnPt_V4:
-  case Hexagon::STw_GP_cdnNotPt_V4:
-    return true;
-  }
-  return false;
-}
-
 static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
                                                const HexagonInstrInfo *QII) {
   assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
@@ -2883,7 +2323,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
     // sense, i.e, either both should be negated or both should be none negated.
 
     if (( predRegNumDst != predRegNumSrc) ||
-          isDotNewInst(PacketMI) != isDotNewInst(MI)  ||
+          QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI)  ||
           GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
       return false;
     }
@@ -2993,8 +2433,9 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
                               MachineBasicBlock::iterator &MII,
                               const TargetRegisterClass* RC )
 {
-  // already a dot new instruction
-  if (isDotNewInst(MI) && !IsNewifyStore(MI))
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  // Already a dot new instruction.
+  if (QII->isDotNewInst(MI) && !IsNewifyStore(MI))
     return false;
 
   if (!isNewifiable(MI))
@@ -3009,7 +2450,6 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
   else {
     // Create a dot new machine instruction to see if resources can be
     // allocated. If not, bail out now.
-    const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
     int NewOpcode = GetDotNewOp(MI->getOpcode());
     const MCInstrDesc &desc = QII->get(NewOpcode);
     DebugLoc dl;
@@ -3152,7 +2592,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
   // !p0 is not complimentary to p0.new
   return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
           (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
-          (isDotNewInst(MI1) == isDotNewInst(MI2)));
+          (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
 }
 
 // initPacketizerState - Initialize packetizer flags
@@ -3277,13 +2717,13 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   // dealloc_return and memop always take SLOT0.
   // Arch spec 3.4.4.2
   if (QRI->Subtarget.hasV4TOps()) {
-
-    if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) {
+    if (MCIDI.mayStore() && MCIDJ.mayStore() &&
+       (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
       Dependence = true;
       return false;
     }
 
-    if (   (QII->isMemOp(J) && MCIDI.mayStore())
+    if ((QII->isMemOp(J) && MCIDI.mayStore())
         || (MCIDJ.mayStore() && QII->isMemOp(I))
         || (QII->isMemOp(J) && QII->isMemOp(I))) {
       Dependence = true;
@@ -3580,7 +3020,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
       MachineInstr *nvjMI = MII;
       assert(ResourceTracker->canReserveResources(MI));
       ResourceTracker->reserveResources(MI);
-      if (QII->isExtended(MI) &&
+      if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
           !tryAllocateResourcesForConstExt(MI)) {
         endPacket(MBB, MI);
         ResourceTracker->reserveResources(MI);
@@ -3600,7 +3040,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
             && (!tryAllocateResourcesForConstExt(nvjMI)
                 || !ResourceTracker->canReserveResources(nvjMI)))
         || // For non-extended instruction, no need to allocate extra 4 bytes.
-        (!QII->isExtended(nvjMI) && 
+        (!QII->isExtended(nvjMI) &&
               !ResourceTracker->canReserveResources(nvjMI)))
       {
         endPacket(MBB, MI);
@@ -3616,7 +3056,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) {
       CurrentPacketMIs.push_back(MI);
       CurrentPacketMIs.push_back(nvjMI);
     } else {
-      if (   QII->isExtended(MI)
+      if (   (QII->isExtended(MI) || QII->isConstExtended(MI))
           && (   !tryAllocateResourcesForConstExt(MI)
               || !ResourceTracker->canReserveResources(MI)))
       {
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 035afe88d5bc..36da6dfcc3d0 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
+#include "Hexagon.h"
 #include "HexagonInstPrinter.h"
-#include "HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCInst.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 
@@ -28,6 +28,8 @@ using namespace llvm;
 #define GET_INSTRUCTION_NAME
 #include "HexagonGenAsmWriter.inc"
 
+const char HexagonInstPrinter::PacketPadding = '\t';
+
 StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
   return MII.getName(Opcode);
 }
@@ -43,43 +45,42 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
 void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
                                    StringRef Annot) {
-  const char packetPadding[] = "      ";
   const char startPacket = '{',
              endPacket = '}';
   // TODO: add outer HW loop when it's supported too.
   if (MI->getOpcode() == Hexagon::ENDLOOP0) {
     // Ending a harware loop is different from ending an regular packet.
-    assert(MI->isEndPacket() && "Loop end must also end the packet");
+    assert(MI->isPacketEnd() && "Loop-end must also end the packet");
 
-    if (MI->isStartPacket()) {
+    if (MI->isPacketStart()) {
       // There must be a packet to end a loop.
       // FIXME: when shuffling is always run, this shouldn't be needed.
       HexagonMCInst Nop;
       StringRef NoAnnot;
 
       Nop.setOpcode (Hexagon::NOP);
-      Nop.setStartPacket (MI->isStartPacket());
+      Nop.setPacketStart (MI->isPacketStart());
       printInst (&Nop, O, NoAnnot);
     }
 
     // Close the packet.
-    if (MI->isEndPacket())
-      O << packetPadding << endPacket;
+    if (MI->isPacketEnd())
+      O << PacketPadding << endPacket;
 
     printInstruction(MI, O);
   }
   else {
     // Prefix the insn opening the packet.
-    if (MI->isStartPacket())
-      O << packetPadding << startPacket << '\n';
+    if (MI->isPacketStart())
+      O << PacketPadding << startPacket << '\n';
 
     printInstruction(MI, O);
 
     // Suffix the insn closing the packet.
-    if (MI->isEndPacket())
+    if (MI->isPacketEnd())
       // Suffix the packet in a new line always, since the GNU assembler has
       // issues with a closing brace on the same line as CONST{32,64}.
-      O << '\n' << packetPadding << endPacket;
+      O << '\n' << PacketPadding << endPacket;
   }
 
   printAnnotation(O, Annot);
@@ -102,12 +103,23 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 
 void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
                                          raw_ostream &O) const {
-  O << MI->getOperand(OpNo).getImm();
+  const MCOperand& MO = MI->getOperand(OpNo);
+
+  if(MO.isExpr()) {
+    O << *MO.getExpr();
+  } else if(MO.isImm()) {
+    O << MI->getOperand(OpNo).getImm();
+  } else {
+    llvm_unreachable("Unknown operand");
+  }
 }
 
 void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
                                          raw_ostream &O) const {
-  O << MI->getOperand(OpNo).getImm();
+  const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI);
+  if (HMCI->isConstExtended())
+    O << "#";
+  printOperand(MI, OpNo, O);
 }
 
 void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
index 902a32352f1c..d0cef683da95 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -14,16 +14,18 @@
 #ifndef HEXAGONINSTPRINTER_H
 #define HEXAGONINSTPRINTER_H
 
-#include "HexagonMCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
+  class HexagonMCInst;
+
   class HexagonInstPrinter : public MCInstPrinter {
   public:
     explicit HexagonInstPrinter(const MCAsmInfo &MAI,
                                 const MCInstrInfo &MII,
                                 const MCRegisterInfo &MRI)
-      : MCInstPrinter(MAI, MII, MRI) {}
+      : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
     void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
@@ -65,10 +67,19 @@ namespace llvm {
     void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
       { printSymbol(MI, OpNo, O, false); }
 
-    bool isConstExtended(const MCInst *MI) const;
+    const MCInstrInfo &getMII() const {
+      return MII;
+    }
+
   protected:
     void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
            const;
+
+    static const char PacketPadding;
+
+  private:
+    const MCInstrInfo &MII;
+
   };
 
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
index 8678401feee4..59849aa7e1c7 100644
--- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = HexagonAsmPrinter
 parent = Hexagon
-required_libraries = MC Support
+required_libraries = HexagonDesc MC Support
 add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index 8e3da99404ee..62b9b600ce8e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMHexagonDesc
-  HexagonMCTargetDesc.cpp
   HexagonMCAsmInfo.cpp
+  HexagonMCInst.cpp
+  HexagonMCTargetDesc.cpp
   )
 
 add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 7221e906342e..d4a93b5c87a4 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -17,6 +17,9 @@
 #ifndef HEXAGONBASEINFO_H
 #define HEXAGONBASEINFO_H
 
+#include "HexagonMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
 namespace llvm {
 
 /// HexagonII - This namespace holds all of the target specific flags that
@@ -28,22 +31,50 @@ namespace HexagonII {
   // Insn types.
   // *** Must match HexagonInstrFormat*.td ***
   enum Type {
-    TypePSEUDO = 0,
-    TypeALU32  = 1,
-    TypeCR     = 2,
-    TypeJR     = 3,
-    TypeJ      = 4,
-    TypeLD     = 5,
-    TypeST     = 6,
-    TypeSYSTEM = 7,
-    TypeXTYPE  = 8,
-    TypeMEMOP  = 9,
-    TypeNV     = 10,
-    TypePREFIX = 30, // Such as extenders.
-    TypeMARKER = 31  // Such as end of a HW loop.
+    TypePSEUDO  = 0,
+    TypeALU32   = 1,
+    TypeCR      = 2,
+    TypeJR      = 3,
+    TypeJ       = 4,
+    TypeLD      = 5,
+    TypeST      = 6,
+    TypeSYSTEM  = 7,
+    TypeXTYPE   = 8,
+    TypeMEMOP   = 9,
+    TypeNV      = 10,
+    TypePREFIX  = 30, // Such as extenders.
+    TypeENDLOOP = 31  // Such as end of a HW loop.
   };
 
+  enum SubTarget {
+    HasV2SubT     = 0xf,
+    HasV2SubTOnly = 0x1,
+    NoV2SubT      = 0x0,
+    HasV3SubT     = 0xe,
+    HasV3SubTOnly = 0x2,
+    NoV3SubT      = 0x1,
+    HasV4SubT     = 0xc,
+    NoV4SubT      = 0x3,
+    HasV5SubT     = 0x8,
+    NoV5SubT      = 0x7
+  };
 
+  enum AddrMode {
+    NoAddrMode     = 0,  // No addressing mode
+    Absolute       = 1,  // Absolute addressing mode
+    AbsoluteSet    = 2,  // Absolute set addressing mode
+    BaseImmOffset  = 3,  // Indirect with offset
+    BaseLongOffset = 4,  // Indirect with long offset
+    BaseRegOffset  = 5   // Indirect with register offset
+  };
+
+  enum MemAccessSize {
+    NoMemAccess = 0,            // Not a memory acces instruction.
+    ByteAccess = 1,             // Byte access instruction (memb).
+    HalfWordAccess = 2,         // Half word access instruction (memh).
+    WordAccess = 3,             // Word access instrution (memw).
+    DoubleWordAccess = 4        // Double word access instruction (memd)
+  };
 
   // MCInstrDesc TSFlags
   // *** Must match HexagonInstrFormat*.td ***
@@ -58,11 +89,93 @@ namespace HexagonII {
 
     // Predicated instructions.
     PredicatedPos  = 6,
-    PredicatedMask = 0x1
+    PredicatedMask = 0x1,
+    PredicatedFalsePos  = 7,
+    PredicatedFalseMask = 0x1,
+    PredicatedNewPos  = 8,
+    PredicatedNewMask = 0x1,
+
+    // New-Value consumer instructions.
+    NewValuePos  = 9,
+    NewValueMask = 0x1,
+
+    // New-Value producer instructions.
+    hasNewValuePos  = 10,
+    hasNewValueMask = 0x1,
+
+    // Which operand consumes or produces a new value.
+    NewValueOpPos  = 11,
+    NewValueOpMask = 0x7,
+
+    // Which bits encode the new value.
+    NewValueBitsPos  = 14,
+    NewValueBitsMask = 0x3,
+
+    // Stores that can become new-value stores.
+    mayNVStorePos  = 16,
+    mayNVStoreMask = 0x1,
+
+    // New-value store instructions.
+    NVStorePos  = 17,
+    NVStoreMask = 0x1,
+
+    // Extendable insns.
+    ExtendablePos  = 18,
+    ExtendableMask = 0x1,
+
+    // Insns must be extended.
+    ExtendedPos  = 19,
+    ExtendedMask = 0x1,
+
+    // Which operand may be extended.
+    ExtendableOpPos  = 20,
+    ExtendableOpMask = 0x7,
+
+    // Signed or unsigned range.
+    ExtentSignedPos = 23,
+    ExtentSignedMask = 0x1,
+
+    // Number of bits of range before extending operand.
+    ExtentBitsPos  = 24,
+    ExtentBitsMask = 0x1f,
+
+    // Valid subtargets
+    validSubTargetPos = 29,
+    validSubTargetMask = 0xf,
+
+    // Addressing mode for load/store instructions.
+    AddrModePos = 33,
+    AddrModeMask = 0x7,
+
+    // Access size of memory access instructions (load/store).
+    MemAccessSizePos = 36,
+    MemAccesSizeMask = 0x7
   };
 
   // *** The code above must match HexagonInstrFormat*.td *** //
 
+  // Hexagon specific MO operand flag mask.
+  enum HexagonMOTargetFlagVal {
+    //===------------------------------------------------------------------===//
+    // Hexagon Specific MachineOperand flags.
+    MO_NO_FLAG,
+
+    HMOTF_ConstExtended = 1,
+
+    /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
+    /// Used for computing a global address for PIC compilations
+    MO_PCREL,
+
+    /// MO_GOT - Indicates a GOT-relative relocation
+    MO_GOT,
+
+    // Low or high part of a symbol.
+    MO_LO16, MO_HI16,
+
+    // Offset from the base of the SDA.
+    MO_GPREL
+  };
+
 } // End namespace HexagonII.
 
 } // End namespace llvm.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 86f75d1c2d7a..3deb8d1deb42 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -31,6 +31,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
   AscizDirective = "\t.string\t";
   WeakRefDirective = "\t.weak\t";
 
+  SupportsDebugInformation = true;
   UsesELFSectionDirectiveForBSS  = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
 }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
new file mode 100644
index 000000000000..9260b4a27661
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
@@ -0,0 +1,175 @@
+//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some Hexagon VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+
+using namespace llvm;
+
+// Return the slots used by the insn.
+unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const {
+  const HexagonInstrInfo* QII = TM->getInstrInfo();
+  const InstrItineraryData* II = TM->getInstrItineraryData();
+  const InstrStage*
+    IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass());
+
+  return (IS->getUnits());
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInst::getType() const {
+  const uint64_t F = MCID->TSFlags;
+
+  return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInst::isCanon() const {
+  return (!MCID->isPseudo() &&
+          !isPrefix() &&
+          getType() != HexagonII::TypeENDLOOP);
+}
+
+// Return whether the insn is a prefix.
+bool HexagonMCInst::isPrefix() const {
+  return (getType() == HexagonII::TypePREFIX);
+}
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInst::isSolo() const {
+  const uint64_t F = MCID->TSFlags;
+  return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInst::isNewValue() const {
+  const uint64_t F = MCID->TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInst::hasNewValue() const {
+  const uint64_t F = MCID->TSFlags;
+  return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+// Return the operand that consumes or produces a new value.
+const MCOperand& HexagonMCInst::getNewValue() const {
+  const uint64_t F = MCID->TSFlags;
+  const unsigned O = (F >> HexagonII::NewValueOpPos) &
+                     HexagonII::NewValueOpMask;
+  const MCOperand& MCO = getOperand(O);
+
+  assert ((isNewValue() || hasNewValue()) && MCO.isReg());
+  return (MCO);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+//    out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInst::isConstExtended(void) const {
+  if (isExtended())
+    return true;
+
+  if (!isExtendable())
+    return false;
+
+  short ExtOpNum = getCExtOpNum();
+  int MinValue   = getMinValue();
+  int MaxValue   = getMaxValue();
+  const MCOperand& MO = getOperand(ExtOpNum);
+
+  // We could be using an instruction with an extendable immediate and shoehorn
+  // a global address into it. If it is a global address it will be constant
+  // extended. We do this for COMBINE.
+  // We currently only handle isGlobal() because it is the only kind of
+  // object we are going to end up with here for now.
+  // In the future we probably should add isSymbol(), etc.
+  if (MO.isExpr())
+    return true;
+
+  // If the extendable operand is not 'Immediate' type, the instruction should
+  // have 'isExtended' flag set.
+  assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+  int ImmValue = MO.getImm();
+  return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Return whether the instruction must be always extended.
+bool HexagonMCInst::isExtended(void) const {
+  const uint64_t F = MCID->TSFlags;
+  return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Return true if the instruction may be extended based on the operand value.
+bool HexagonMCInst::isExtendable(void) const {
+  const uint64_t F = MCID->TSFlags;
+  return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+// Return number of bits in the constant extended operand.
+unsigned HexagonMCInst::getBitCount(void) const {
+  const uint64_t F = MCID->TSFlags;
+  return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInst::getCExtOpNum(void) const {
+  const uint64_t F = MCID->TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
+  const uint64_t F = MCID->TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+          == OperandNum;
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMinValue(void) const {
+  const uint64_t F = MCID->TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return -1 << (bits - 1);
+  else
+    return 0;
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMaxValue(void) const {
+  const uint64_t F = MCID->TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return ~(-1 << (bits - 1));
+  else
+    return ~(-1 << bits);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
new file mode 100644
index 000000000000..3ca71f00b241
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -0,0 +1,100 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "HexagonTargetMachine.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class HexagonMCInst: public MCInst {
+    // MCID is set during instruction lowering.
+    // It is needed in order to access TSFlags for
+    // use in checking MC instruction properties.
+    const MCInstrDesc *MCID;
+
+    // Packet start and end markers
+    unsigned packetStart: 1, packetEnd: 1;
+
+  public:
+    explicit HexagonMCInst():
+      MCInst(), MCID(0), packetStart(0), packetEnd(0) {};
+    HexagonMCInst(const MCInstrDesc& mcid):
+      MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {};
+
+    bool isPacketStart() const { return (packetStart); };
+    bool isPacketEnd() const { return (packetEnd); };
+    void setPacketStart(bool Y) { packetStart = Y; };
+    void setPacketEnd(bool Y) { packetEnd = Y; };
+    void resetPacket() { setPacketStart(false); setPacketEnd(false); };
+
+    // Return the slots used by the insn.
+    unsigned getUnits(const HexagonTargetMachine* TM) const;
+
+    // Return the Hexagon ISA class for the insn.
+    unsigned getType() const;
+
+    void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; };
+    const MCInstrDesc& getDesc(void) const { return *MCID; };
+
+    // Return whether the insn is an actual insn.
+    bool isCanon() const;
+
+    // Return whether the insn is a prefix.
+    bool isPrefix() const;
+
+    // Return whether the insn is solo, i.e., cannot be in a packet.
+    bool isSolo() const;
+
+    // Return whether the instruction needs to be constant extended.
+    bool isConstExtended() const;
+
+    // Return constant extended operand number.
+    unsigned short getCExtOpNum(void) const;
+
+    // Return whether the insn is a new-value consumer.
+    bool isNewValue() const;
+
+    // Return whether the instruction is a legal new-value producer.
+    bool hasNewValue() const;
+
+    // Return the operand that consumes or produces a new value.
+    const MCOperand& getNewValue() const;
+
+    // Return number of bits in the constant extended operand.
+    unsigned getBitCount(void) const;
+
+  private:
+    // Return whether the instruction must be always extended.
+    bool isExtended() const;
+
+    // Return true if the insn may be extended based on the operand value.
+    bool isExtendable() const;
+
+    // Return true if the operand can be constant extended.
+    bool isOperandExtended(const unsigned short OperandNum) const;
+
+    // Return the min value that a constant extendable operand can have
+    // without being extended.
+    int getMinValue() const;
+
+    // Return the max value that a constant extendable operand can have
+    // without being extended.
+    int getMaxValue() const;
+  };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 3cfa4fddd87c..6b1d2d161958 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -13,10 +13,12 @@
 
 #include "HexagonMCTargetDesc.h"
 #include "HexagonMCAsmInfo.h"
+#include "InstPrinter/HexagonInstPrinter.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 7aa5dd3b8980..40f6c8d23ea8 100644
--- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Hexagon.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 8995080974cc..c06e8bc3cdbe 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
+subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
 
 ; This is a special group whose required libraries are extended (by llvm-build)
 ; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index 813767ba6d65..4a7d8e8d8887 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -2,7 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
                      ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMMBlazeAsmParser
-  MBlazeAsmLexer.cpp
   MBlazeAsmParser.cpp
   )
 
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
deleted file mode 100644
index 59a1ed97d3d4..000000000000
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/MBlazeBaseInfo.h"
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-
-#include "llvm/Support/TargetRegistry.h"
-
-#include <string>
-#include <map>
-
-using namespace llvm;
-
-namespace {
-  
-  class MBlazeBaseAsmLexer : public MCTargetAsmLexer {
-    const MCAsmInfo &AsmInfo;
-    
-    const AsmToken &lexDefinite() {
-      return getLexer()->Lex();
-    }
-    
-    AsmToken LexTokenUAL();
-  protected:
-    typedef std::map <std::string, unsigned> rmap_ty;
-    
-    rmap_ty RegisterMap;
-    
-    void InitRegisterMap(const MCRegisterInfo *info) {
-      unsigned numRegs = info->getNumRegs();
-
-      for (unsigned i = 0; i < numRegs; ++i) {
-        const char *regName = info->getName(i);
-        if (regName)
-          RegisterMap[regName] = i;
-      }
-    }
-    
-    unsigned MatchRegisterName(StringRef Name) {
-      rmap_ty::iterator iter = RegisterMap.find(Name.str());
-      if (iter != RegisterMap.end())
-        return iter->second;
-      else
-        return 0;
-    }
-    
-    AsmToken LexToken() {
-      if (!Lexer) {
-        SetError(SMLoc(), "No MCAsmLexer installed");
-        return AsmToken(AsmToken::Error, "", 0);
-      }
-      
-      switch (AsmInfo.getAssemblerDialect()) {
-      default:
-        SetError(SMLoc(), "Unhandled dialect");
-        return AsmToken(AsmToken::Error, "", 0);
-      case 0:
-        return LexTokenUAL();
-      }
-    }
-  public:
-    MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
-      : MCTargetAsmLexer(T), AsmInfo(MAI) {
-    }
-  };
-  
-  class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
-  public:
-    MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI,
-                   const MCAsmInfo &MAI)
-      : MBlazeBaseAsmLexer(T, MAI) {
-      InitRegisterMap(&MRI);
-    }
-  };
-}
-
-AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
-  const AsmToken &lexedToken = lexDefinite();
-  
-  switch (lexedToken.getKind()) {
-  default:
-    return AsmToken(lexedToken);
-  case AsmToken::Error:
-    SetError(Lexer->getErrLoc(), Lexer->getErr());
-    return AsmToken(lexedToken);
-  case AsmToken::Identifier:
-  {
-    unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-    
-    if (regID) {
-      return AsmToken(AsmToken::Register,
-                      lexedToken.getString(),
-                      static_cast<int64_t>(regID));
-    } else {
-      return AsmToken(lexedToken);
-    }
-  }
-  }
-}
-
-extern "C" void LLVMInitializeMBlazeAsmLexer() {
-  RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
-}
-
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index f7809caeb32f..dda6e247ac4f 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -8,18 +8,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 namespace {
@@ -35,7 +35,8 @@ class MBlazeAsmParser : public MCTargetAsmParser {
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
 
   MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-  MBlazeOperand *ParseRegister(unsigned &RegNo);
+  MBlazeOperand *ParseRegister();
+  MBlazeOperand *ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc);
   MBlazeOperand *ParseImmediate();
   MBlazeOperand *ParseFsl();
   MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -81,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand {
 
   SMLoc StartLoc, EndLoc;
 
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct MemOp {
+    unsigned Base;
+    unsigned OffReg;
+    const MCExpr *Off;
+  };
+
+  struct FslImmOp {
+    const MCExpr *Val;
+  };
+
   union {
-    struct {
-      const char *Data;
-      unsigned Length;
-    } Tok;
-
-    struct {
-      unsigned RegNum;
-    } Reg;
-
-    struct {
-      const MCExpr *Val;
-    } Imm;
-
-    struct {
-      unsigned Base;
-      unsigned OffReg;
-      const MCExpr *Off;
-    } Mem;
-
-    struct {
-      const MCExpr *Val;
-    } FslImm;
+    struct TokOp Tok;
+    struct RegOp Reg;
+    struct ImmOp Imm;
+    struct MemOp Mem;
+    struct FslImmOp FslImm;
   };
 
   MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -383,23 +390,31 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
 bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
                                     SMLoc &StartLoc, SMLoc &EndLoc) {
-  return (ParseRegister(RegNo) == 0);
+  MBlazeOperand *Reg = ParseRegister(StartLoc, EndLoc);
+  if (!Reg)
+    return true;
+  RegNo = Reg->getReg();
+  return false;
 }
 
-MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
-  SMLoc S = Parser.getTok().getLoc();
-  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+MBlazeOperand *MBlazeAsmParser::ParseRegister() {
+  SMLoc S, E;
+  return ParseRegister(S, E);
+}
 
-  switch (getLexer().getKind()) {
-  default: return 0;
-  case AsmToken::Identifier:
-    RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
-    if (RegNo == 0)
-      return 0;
+MBlazeOperand *MBlazeAsmParser::ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc) {
+  StartLoc = Parser.getTok().getLoc();
+  EndLoc = Parser.getTok().getEndLoc();
 
-    getLexer().Lex();
-    return MBlazeOperand::CreateReg(RegNo, S, E);
-  }
+  if (getLexer().getKind() != AsmToken::Identifier)
+    return 0;
+
+  unsigned RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+  if (RegNo == 0)
+    return 0;
+
+  getLexer().Lex();
+  return MBlazeOperand::CreateReg(RegNo, StartLoc, EndLoc);
 }
 
 static unsigned MatchFslRegister(StringRef String) {
@@ -415,7 +430,7 @@ static unsigned MatchFslRegister(StringRef String) {
 
 MBlazeOperand *MBlazeAsmParser::ParseFsl() {
   SMLoc S = Parser.getTok().getLoc();
-  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  SMLoc E = Parser.getTok().getEndLoc();
 
   switch (getLexer().getKind()) {
   default: return 0;
@@ -432,7 +447,7 @@ MBlazeOperand *MBlazeAsmParser::ParseFsl() {
 
 MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
   SMLoc S = Parser.getTok().getLoc();
-  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  SMLoc E = Parser.getTok().getEndLoc();
 
   const MCExpr *EVal;
   switch (getLexer().getKind()) {
@@ -442,7 +457,7 @@ MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
   case AsmToken::Minus:
   case AsmToken::Integer:
   case AsmToken::Identifier:
-    if (getParser().ParseExpression(EVal))
+    if (getParser().parseExpression(EVal))
       return 0;
 
     return MBlazeOperand::CreateImm(EVal, S, E);
@@ -454,8 +469,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   MBlazeOperand *Op;
 
   // Attempt to parse the next token as a register name
-  unsigned RegNo;
-  Op = ParseRegister(RegNo);
+  Op = ParseRegister();
 
   // Attempt to parse the next token as an FSL immediate
   if (!Op)
@@ -529,10 +543,10 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
-      if (getParser().ParseExpression(Value))
+      if (getParser().parseExpression(Value))
         return true;
 
-      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+      getParser().getStreamer().EmitValue(Value, Size);
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
@@ -548,12 +562,9 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   return false;
 }
 
-extern "C" void LLVMInitializeMBlazeAsmLexer();
-
 /// Force static initialization.
 extern "C" void LLVMInitializeMBlazeAsmParser() {
   RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
-  LLVMInitializeMBlazeAsmLexer();
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 0bf93d71dab8..91a41f39b5d8 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -9,7 +9,6 @@ tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
-tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
 add_public_tablegen_target(MBlazeCommonTableGen)
 
 add_llvm_target(MBlazeCodeGen
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 6b958c85eebf..c03ab3803b60 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -12,10 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MBlaze.h"
 #include "MBlazeDisassembler.h"
-
-#include "llvm/MC/EDInstInfo.h"
+#include "MBlaze.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -26,7 +24,6 @@
 
 // #include "MBlazeGenDecoderTables.inc"
 // #include "MBlazeGenRegisterNames.inc"
-#include "MBlazeGenEDInfo.inc"
 
 namespace llvm {
 extern const MCInstrDesc MBlazeInsts[];
@@ -492,10 +489,6 @@ static unsigned getOPCODE(uint32_t insn) {
   }
 }
 
-const EDInstInfo *MBlazeDisassembler::getEDInfo() const {
-  return instInfoMBlaze;
-}
-
 //
 // Public interface for the disassembler
 //
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index 5c4ae3b1ace8..b8ff8f607265 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -23,8 +23,6 @@ class MCInst;
 class MemoryObject;
 class raw_ostream;
 
-struct EDInstInfo;
-  
 /// MBlazeDisassembler - Disassembler for all MBlaze platforms.
 class MBlazeDisassembler : public MCDisassembler {
 public:
@@ -44,9 +42,6 @@ public:
                       uint64_t address,
                       raw_ostream &vStream,
                       raw_ostream &cStream) const;
-
-  /// getEDInfo - See MCDisassembler.
-  const EDInstInfo *getEDInfo() const;
 };
 
 } // namespace llvm
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
index a1f1dbc7a23b..fc2b3d51b44c 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "MBlaze.h"
 #include "MBlazeInstPrinter.h"
-#include "llvm/MC/MCInst.h"
+#include "MBlaze.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index b679a318c3e0..7dafaef0af08 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -15,32 +15,32 @@
 #define DEBUG_TYPE "mblaze-asm-printer"
 
 #include "MBlaze.h"
-#include "MBlazeSubtarget.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
 #include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
-#include "MBlazeMachineFunction.h"
 #include "MBlazeMCInstLower.h"
-#include "InstPrinter/MBlazeInstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cctype>
 
 using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 19e787d8622d..3d0d1cecd1f1 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -16,14 +16,14 @@
 
 #include "MBlaze.h"
 #include "MBlazeTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index 9e467bf337e0..172304bd5b45 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -14,21 +14,21 @@
 #define DEBUG_TYPE "mblaze-frame-lowering"
 
 #include "MBlazeFrameLowering.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
 #include "MBlazeInstrInfo.h"
 #include "MBlazeMachineFunction.h"
-#include "InstPrinter/MBlazeInstPrinter.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -426,6 +426,45 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
+// Eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+  if (!hasReservedCallFrame(MF)) {
+    // If we have a frame pointer, turn the adjcallstackup instruction into a
+    // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+    // 'addi r1, r1, <amt>'
+    MachineInstr *Old = I;
+    int Amount = Old->getOperand(0).getImm() + 4;
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      MachineInstr *New;
+      if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+        New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(-Amount);
+      } else {
+        assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+        New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+
 void MBlazeFrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 01e6578a352f..f4228c5f0890 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -39,6 +39,10 @@ public:
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   bool hasFP(const MachineFunction &MF) const;
 
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 6b4349766f37..78ad24debb1b 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -17,21 +17,21 @@
 #include "MBlazeRegisterInfo.h"
 #include "MBlazeSubtarget.h"
 #include "MBlazeTargetMachine.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 310c25e839c3..d4f943297acb 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -15,14 +15,9 @@
 #define DEBUG_TYPE "mblaze-lower"
 #include "MBlazeISelLowering.h"
 #include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
 #include "MBlazeTargetMachine.h"
 #include "MBlazeTargetObjectFile.h"
-#include "MBlazeSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +25,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -81,6 +81,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
   setOperationAction(ISD::FSIN,       MVT::f32, Expand);
   setOperationAction(ISD::FCOS,       MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS,    MVT::f32, Expand);
   setOperationAction(ISD::FPOWI,      MVT::f32, Expand);
   setOperationAction(ISD::FPOW,       MVT::f32, Expand);
   setOperationAction(ISD::FLOG,       MVT::f32, Expand);
@@ -159,7 +160,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   // Operations not directly supported by MBlaze.
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
   setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,              MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,              MVT::f32,   Expand);
+  setOperationAction(ISD::BR_CC,              MVT::i32,   Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG,  MVT::i1,    Expand);
   setOperationAction(ISD::ROTL,               MVT::i32,   Expand);
   setOperationAction(ISD::ROTR,               MVT::i32,   Expand);
@@ -1027,15 +1029,17 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
   // Analize return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
 
-  // If this is the first return lowered for this function, add
-  // the regs to the liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // If this function is using the interrupt_handler calling convention
+  // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+  unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+                                                        : MBlazeISD::Ret;
+  unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
+                                                        : MBlaze::R15;
+  RetOps.push_back(DAG.getRegister(Reg, MVT::i32));
+
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1048,20 +1052,16 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     // guarantee that all emitted copies are
     // stuck together, avoiding something bad
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
-  // If this function is using the interrupt_handler calling convention
-  // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
-  unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
-                                                        : MBlazeISD::Ret;
-  unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
-                                                        : MBlaze::R15;
-  SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+  RetOps[0] = Chain;  // Update chain.
 
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+    RetOps.push_back(Flag);
 
-  return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+  return DAG.getNode(Ret, dl, MVT::Other, &RetOps[0], RetOps.size());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index a01fab567c8a..f6b4095a93dc 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -17,8 +17,8 @@
 
 #include "MBlaze.h"
 #include "MBlazeSubtarget.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b5025fc8ee6c..79449f73f74e 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
 #include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 139bf7156a69..f86bc0b0b5a4 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -28,9 +28,9 @@ def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 //===----------------------------------------------------------------------===//
 
 def MBlazeRet     : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
-                           [SDNPHasChain, SDNPOptInGlue]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def MBlazeIRet    : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
-                           [SDNPHasChain, SDNPOptInGlue]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
                            [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 1c2e3b26613e..8d262a01e706 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeIntrinsicInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstring>
 
 using namespace llvm;
@@ -104,7 +104,7 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
                                                 Type **Tys,
                                                 unsigned numTy) const {
   assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
-  AttrListPtr AList = getAttributes(M->getContext(),
+  AttributeSet AList = getAttributes(M->getContext(),
                                     (mblazeIntrinsic::ID) IntrID);
   return cast<Function>(M->getOrInsertFunction(getName(IntrID),
                                                getType(M->getContext(), IntrID),
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index 6b9f42ec91a6..ad414ac40fd7 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -14,19 +14,19 @@
 
 #include "MBlazeMCInstLower.h"
 #include "MBlazeInstrInfo.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 MCSymbol *MBlazeMCInstLower::
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index 95cc5077cc16..10d507f37bbc 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -16,8 +16,8 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index daa76e887fca..bd83afc1cc83 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -16,25 +16,25 @@
 
 #include "MBlazeRegisterInfo.h"
 #include "MBlaze.h"
-#include "MBlazeSubtarget.h"
 #include "MBlazeMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "MBlazeGenRegisterInfo.inc"
@@ -83,67 +83,21 @@ getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
-void MBlazeRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    // If we have a frame pointer, turn the adjcallstackup instruction into a
-    // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
-    // 'addi r1, r1, <amt>'
-    MachineInstr *Old = I;
-    int Amount = Old->getOperand(0).getImm() + 4;
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = TFI->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      MachineInstr *New;
-      if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
-        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
-                .addReg(MBlaze::R1).addImm(-Amount);
-      } else {
-        assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
-        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
-                .addReg(MBlaze::R1).addImm(Amount);
-      }
-
-      // Replace the pseudo instruction with a new instruction...
-      MBB.insert(I, New);
-    }
-  }
-
-  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
-  MBB.erase(I);
-}
-
 // FrameIndex represent objects inside a abstract stack.
 // We must replace FrameIndex with an stack/frame pointer
 // direct reference.
 void MBlazeRegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                    RegScavenger *RS) const {
+                    unsigned FIOperandNum, RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  unsigned i = 0;
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-  }
-
-  unsigned oi = i == 2 ? 1 : 2;
+  unsigned OFIOperandNum = FIOperandNum == 2 ? 1 : 2;
 
   DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n";
         dbgs() << "<--------->\n" << MI);
 
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   int stackSize  = MFI->getStackSize();
   int spOffset   = MFI->getObjectOffset(FrameIndex);
 
@@ -159,16 +113,16 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   // as explained on LowerFormalArguments, detect negative offsets
   // and adjust SPOffsets considering the final stack size.
   int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
-  Offset += MI.getOperand(oi).getImm();
+  Offset += MI.getOperand(OFIOperandNum).getImm();
 
   DEBUG(dbgs() << "Offset     : " << Offset << "\n" << "<--------->\n");
 
-  MI.getOperand(oi).ChangeToImmediate(Offset);
-  MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+  MI.getOperand(OFIOperandNum).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
 }
 
 void MBlazeRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
   // Set the stack offset where GP must be saved/loaded from.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 1d5116293516..497f3866c9ca 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -50,15 +50,13 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
   /// Stack Frame Processing Methods
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                           RegScavenger *RS = NULL) const;
 
   /// Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index eb375046f218..ed43d21f30c5 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef MBLAZESUBTARGET_H
 #define MBLAZESUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index f180652f1127..bcdd32fed947 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -13,8 +13,8 @@
 
 #include "MBlazeTargetMachine.h"
 #include "MBlaze.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
@@ -42,8 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT,
     InstrInfo(*this),
     FrameLowering(Subtarget),
     TLInfo(*this), TSInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()),
-    STTI(&TLInfo), VTTI(&TLInfo) {
+    InstrItins(Subtarget.getInstrItineraryData()) {
 }
 
 namespace {
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index a8df4e63e3ee..956794dddaf9 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -14,17 +14,16 @@
 #ifndef MBLAZE_TARGETMACHINE_H
 #define MBLAZE_TARGETMACHINE_H
 
-#include "MBlazeSubtarget.h"
-#include "MBlazeInstrInfo.h"
+#include "MBlazeFrameLowering.h"
 #include "MBlazeISelLowering.h"
-#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeInstrInfo.h"
 #include "MBlazeIntrinsicInfo.h"
-#include "MBlazeFrameLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
   class formatted_raw_ostream;
@@ -38,8 +37,6 @@ namespace llvm {
     MBlazeSelectionDAGInfo TSInfo;
     MBlazeIntrinsicInfo    IntrinsicInfo;
     InstrItineraryData     InstrItins;
-    ScalarTargetTransformImpl STTI;
-    VectorTargetTransformImpl VTTI;
 
   public:
     MBlazeTargetMachine(const Target &T, StringRef TT,
@@ -75,11 +72,6 @@ namespace llvm {
     const TargetIntrinsicInfo *getIntrinsicInfo() const
     { return &IntrinsicInfo; }
 
-    virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const
-    { return &STTI; }
-    virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const
-    { return &VTTI; }
-
     // Pass Pipeline Configuration
     virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   };
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 899c74ee8ed7..a7a0a68b1612 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -9,14 +9,14 @@
 
 #include "MBlazeTargetObjectFile.h"
 #include "MBlazeSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 void MBlazeTargetObjectFile::
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 44feeb49e7f1..6f9752c42951 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -8,9 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
@@ -18,7 +19,6 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -54,7 +54,7 @@ public:
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup,
                             uint64_t Value,
-                            const MCInstFragment *DF,
+                            const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -88,7 +88,7 @@ bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
 
 bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
                                             uint64_t Value,
-                                            const MCInstFragment *DF,
+                                            const MCRelaxableFragment *DF,
                                             const MCAsmLayout &Layout) const {
   // FIXME: Is this right? It's what the "generic" code was doing before,
   // but is X86 specific. Is it actually true for MBlaze also, or was it
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index 2b71d9d3c844..8faff6ade441 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/MBlazeBaseInfo.h"
 #include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 9a7549b0e7cf..380750d50f4c 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeMCTargetDesc.h"
-#include "MBlazeMCAsmInfo.h"
 #include "InstPrinter/MBlazeInstPrinter.h"
+#include "MBlazeMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
index 83c2a7d34da1..512ce9a08103 100644
--- a/lib/Target/MBlaze/Makefile
+++ b/lib/Target/MBlaze/Makefile
@@ -15,8 +15,7 @@ BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \
 		MBlazeGenAsmWriter.inc \
                 MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
                 MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
-                MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \
-                MBlazeGenEDInfo.inc
+                MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc
 
 DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
 
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
index 71210d8db466..323a7f647d56 100644
--- a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlaze.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 0930c453e954..4b12aeadd3e4 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "MSP430.h"
 #include "MSP430InstPrinter.h"
-#include "llvm/MC/MCInst.h"
+#include "MSP430.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 2e328cb5d6ac..3c9576056946 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
 void MSP430MCAsmInfo::anchor() { }
 
 MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
-  PointerSize = 2;
+  PointerSize = CalleeSaveStackSlotSize = 2;
 
   PrivateGlobalPrefix = ".L";
   WeakRefDirective ="\t.weak\t";
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index c455f6bc24f2..530e6aae92fd 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430MCTargetDesc.h"
-#include "MSP430MCAsmInfo.h"
 #include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430MCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 86bc183c1bdf..0a04e5ddb75d 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -14,26 +14,26 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "MSP430.h"
+#include "InstPrinter/MSP430InstPrinter.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
-#include "InstPrinter/MSP430InstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index bdeb0c590f2d..f128427f8066 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -18,11 +18,11 @@
 #define DEBUG_TYPE "msp430-branch-select"
 #include "MSP430.h"
 #include "MSP430InstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index ad27cc9122a8..b448cc4ed9b8 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -24,6 +24,9 @@ def RetCC_MSP430 : CallingConv<[
 // MSP430 Argument Calling Conventions
 //===----------------------------------------------------------------------===//
 def CC_MSP430 : CallingConv<[
+  // Pass by value if the byval attribute is given
+  CCIfByVal<CCPassByVal<2, 2>>,
+
   // Promote i8 arguments to i16.
   CCIfType<[i8], CCPromoteToType<i16>>,
 
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 2e170f17bf9d..e504011dfdc8 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -14,15 +14,15 @@
 #include "MSP430FrameLowering.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430MachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -222,13 +222,73 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-void
-MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
-                                                                         const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+void MSP430FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+  unsigned StackAlign = getStackAlignment();
+
+  if (!hasReservedCallFrame(MF)) {
+    // If the stack pointer can be changed after prologue, turn the
+    // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+    // adjcallstackdown instruction into 'add SPW, <amt>'
+    // TODO: consider using push / pop instead of sub + store / add
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+      MachineInstr *New = 0;
+      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+        New = BuildMI(MF, Old->getDebugLoc(),
+                      TII.get(MSP430::SUB16ri), MSP430::SPW)
+          .addReg(MSP430::SPW).addImm(Amount);
+      } else {
+        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+        // factor out the amount the callee already popped.
+        uint64_t CalleeAmt = Old->getOperand(1).getImm();
+        Amount -= CalleeAmt;
+        if (Amount)
+          New = BuildMI(MF, Old->getDebugLoc(),
+                        TII.get(MSP430::ADD16ri), MSP430::SPW)
+            .addReg(MSP430::SPW).addImm(Amount);
+      }
+
+      if (New) {
+        // The SRW implicit def is dead.
+        New->getOperand(3).setIsDead();
+
+        // Replace the pseudo instruction with a new instruction...
+        MBB.insert(I, New);
+      }
+    }
+  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.
+    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+      MachineInstr *Old = I;
+      MachineInstr *New =
+        BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+                MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+      // The SRW implicit def is dead.
+      New->getOperand(3).setIsDead();
 
+      MBB.insert(I, New);
+    }
+  }
+
+  MBB.erase(I);
+}
+
+void
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                                         RegScavenger *) const {
   // Create a frame entry for the FPW register that must be saved.
-  if (TFI->hasFP(MF)) {
+  if (hasFP(MF)) {
     int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
     (void)FrameIdx;
     assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index cb02545852b5..c673f59b5efc 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -35,6 +35,10 @@ public:
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
@@ -46,7 +50,8 @@ public:
 
   bool hasFP(const MachineFunction &MF) const;
   bool hasReservedCallFrame(const MachineFunction &MF) const;
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                       RegScavenger *RS = NULL) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 5efc6a36b894..1566c096037e 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -13,22 +13,22 @@
 
 #include "MSP430.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fc677aec38ef..09cdf3268553 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -16,14 +16,8 @@
 #include "MSP430ISelLowering.h"
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
-#include "MSP430TargetMachine.h"
 #include "MSP430Subtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
+#include "MSP430TargetMachine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +26,12 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -164,6 +164,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::SDIVREM,          MVT::i16,   Expand);
   setOperationAction(ISD::SREM,             MVT::i16,   Expand);
 
+  // varargs support
+  setOperationAction(ISD::VASTART,          MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,            MVT::Other, Expand);
+  setOperationAction(ISD::VAEND,            MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,           MVT::Other, Expand);
+
   // Libcalls names.
   if (HWMultMode == HWMultIntr) {
     setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw");
@@ -192,6 +198,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
   case ISD::SIGN_EXTEND:      return LowerSIGN_EXTEND(Op, DAG);
   case ISD::RETURNADDR:       return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
+  case ISD::VASTART:          return LowerVASTART(Op, DAG);
   default:
     llvm_unreachable("unimplemented operand");
   }
@@ -297,7 +304,6 @@ MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 /// LowerCCCArguments - transform physical registers into virtual registers and
 /// generate load operations for arguments places on the stack.
 // FIXME: struct return stuff
-// FIXME: varargs
 SDValue
 MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
                                         CallingConv::ID CallConv,
@@ -311,6 +317,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -318,7 +325,11 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
                  getTargetMachine(), ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
 
-  assert(!isVarArg && "Varargs not supported yet");
+  // Create frame index for the start of the first vararg value
+  if (isVarArg) {
+    unsigned Offset = CCInfo.getNextStackOffset();
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, Offset, true));
+  }
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -357,22 +368,34 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
     } else {
       // Sanity check
       assert(VA.isMemLoc());
-      // Load the argument to a virtual register
-      unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
-      if (ObjSize > 2) {
-        errs() << "LowerFormalArguments Unhandled argument type: "
-             << EVT(VA.getLocVT()).getEVTString()
-             << "\n";
+
+      SDValue InVal;
+      ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+      if (Flags.isByVal()) {
+        int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+                                        VA.getLocMemOffset(), true);
+        InVal = DAG.getFrameIndex(FI, getPointerTy());
+      } else {
+        // Load the argument to a virtual register
+        unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+        if (ObjSize > 2) {
+            errs() << "LowerFormalArguments Unhandled argument type: "
+                << EVT(VA.getLocVT()).getEVTString()
+                << "\n";
+        }
+        // Create the frame index object for this incoming parameter...
+        int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+        // Create the SelectionDAG nodes corresponding to a load
+        //from this parameter
+        SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+        InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                            MachinePointerInfo::getFixedStack(FI),
+                            false, false, false, 0);
       }
-      // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
-
-      // Create the SelectionDAG nodes corresponding to a load
-      //from this parameter
-      SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
-      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                                   MachinePointerInfo::getFixedStack(FI),
-                                   false, false, false, 0));
+
+      InVals.push_back(InVal);
     }
   }
 
@@ -400,15 +423,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
   // Analize return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
 
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -421,16 +437,19 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
     // Guarantee that all emitted copies are stuck together,
     // avoiding something bad.
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
   unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
                   MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
 
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+    RetOps.push_back(Flag);
 
-  // Return Void
-  return DAG.getNode(Opc, dl, MVT::Other, Chain);
+  return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size());
 }
 
 /// LowerCCCCallTo - functions arguments are copied from virtual regs to
@@ -498,9 +517,23 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                    StackPtr,
                                    DAG.getIntPtrConstant(VA.getLocMemOffset()));
 
+      SDValue MemOp;
+      ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+      if (Flags.isByVal()) {
+        SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i16);
+        MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
+                              Flags.getByValAlign(),
+                              /*isVolatile*/false,
+                              /*AlwaysInline=*/true,
+                              MachinePointerInfo(),
+                              MachinePointerInfo());
+      } else {
+        MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),
+                             false, false, 0);
+      }
 
-      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         MachinePointerInfo(),false, false, 0));
+      MemOpChains.push_back(MemOp);
     }
   }
 
@@ -931,6 +964,22 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
   return FrameAddr;
 }
 
+SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+
+  // Frame index of first vararg argument
+  SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                         getPointerTy());
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+  // Create a store of the frame index to the location operand
+  return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), FrameIndex,
+                      Op.getOperand(1), MachinePointerInfo(SV),
+                      false, false, 0);
+}
+
 /// getPostIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if this node can be
 /// combined with a load / store to form a post-indexed load / store.
@@ -1010,6 +1059,10 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
 }
 
+bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  return isZExtFree(Val.getValueType(), VT2);
+}
+
 //===----------------------------------------------------------------------===//
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 991304c23de3..e0ed870f5653 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -73,7 +73,7 @@ namespace llvm {
   public:
     explicit MSP430TargetLowering(MSP430TargetMachine &TM);
 
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -92,6 +92,7 @@ namespace llvm {
     SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     TargetLowering::ConstraintType
@@ -115,6 +116,7 @@ namespace llvm {
     /// out to 16 bits.
     virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+    virtual bool isZExtFree(SDValue Val, EVT VT2) const;
 
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
                                                    MachineBasicBlock *BB) const;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index be332f05b30b..a6b5f2f6d0bd 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -15,10 +15,10 @@
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index f003574eda00..e45780d05803 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -40,9 +40,9 @@ def SDT_MSP430Shift        : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
 // MSP430 Specific Node Definitions.
 //===----------------------------------------------------------------------===//
 def MSP430retflag  : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
-                       [SDNPHasChain, SDNPOptInGlue]>;
+                       [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
-                       [SDNPHasChain, SDNPOptInGlue]>;
+                       [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 def MSP430rra     : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
 def MSP430rla     : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index b1773fba7e92..043e5becadbb 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430MCInstLower.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -20,10 +21,9 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 632d6dee275f..d1697f478cc2 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -30,6 +30,9 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
   /// ReturnAddrIndex - FrameIndex for return slot.
   int ReturnAddrIndex;
 
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
 public:
   MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
 
@@ -41,6 +44,9 @@ public:
 
   int getRAIndex() const { return ReturnAddrIndex; }
   void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex;}
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 9ae238f66f57..0b3e9e259649 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -17,14 +17,14 @@
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/ErrorHandling.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "MSP430GenRegisterInfo.inc"
@@ -101,83 +101,18 @@ MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
   return &MSP430::GR16RegClass;
 }
 
-void MSP430RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    // If the stack pointer can be changed after prologue, turn the
-    // adjcallstackup instruction into a 'sub SPW, <amt>' and the
-    // adjcallstackdown instruction into 'add SPW, <amt>'
-    // TODO: consider using push / pop instead of sub + store / add
-    MachineInstr *Old = I;
-    uint64_t Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
-
-      MachineInstr *New = 0;
-      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
-        New = BuildMI(MF, Old->getDebugLoc(),
-                      TII.get(MSP430::SUB16ri), MSP430::SPW)
-          .addReg(MSP430::SPW).addImm(Amount);
-      } else {
-        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
-        // factor out the amount the callee already popped.
-        uint64_t CalleeAmt = Old->getOperand(1).getImm();
-        Amount -= CalleeAmt;
-        if (Amount)
-          New = BuildMI(MF, Old->getDebugLoc(),
-                        TII.get(MSP430::ADD16ri), MSP430::SPW)
-            .addReg(MSP430::SPW).addImm(Amount);
-      }
-
-      if (New) {
-        // The SRW implicit def is dead.
-        New->getOperand(3).setIsDead();
-
-        // Replace the pseudo instruction with a new instruction...
-        MBB.insert(I, New);
-      }
-    }
-  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
-    // If we are performing frame pointer elimination and if the callee pops
-    // something off the stack pointer, add it back.
-    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
-      MachineInstr *Old = I;
-      MachineInstr *New =
-        BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
-                MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
-      // The SRW implicit def is dead.
-      New->getOperand(3).setIsDead();
-
-      MBB.insert(I, New);
-    }
-  }
-
-  MBB.erase(I);
-}
-
 void
 MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                        int SPAdj, RegScavenger *RS) const {
+                                        int SPAdj, unsigned FIOperandNum,
+                                        RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
-  unsigned i = 0;
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
@@ -191,7 +126,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     Offset += 2; // Skip the saved FPW
 
   // Fold imm into offset
-  Offset += MI.getOperand(i+1).getImm();
+  Offset += MI.getOperand(FIOperandNum + 1).getImm();
 
   if (MI.getOpcode() == MSP430::ADD16ri) {
     // This is actually "load effective address" of the stack slot
@@ -199,7 +134,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // expand it into mov + add
 
     MI.setDesc(TII.get(MSP430::MOV16rr));
-    MI.getOperand(i).ChangeToRegister(BasePtr, false);
+    MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
 
     if (Offset == 0)
       return;
@@ -216,8 +151,8 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     return;
   }
 
-  MI.getOperand(i).ChangeToRegister(BasePtr, false);
-  MI.getOperand(i+1).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
 unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 64a43bcafbb4..69cccb275259 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -42,12 +42,9 @@ public:
   const TargetRegisterClass*
   getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 13e37b373533..164e351df952 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -13,9 +13,9 @@
 
 #include "MSP430TargetMachine.h"
 #include "MSP430.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
     // FIXME: Check DataLayout string.
     DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-    FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { }
+    FrameLowering(Subtarget) { }
 
 namespace {
 /// MSP430 Code Generator Pass Configuration Options.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 186172ede428..be695a211109 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -15,16 +15,15 @@
 #ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
 #define LLVM_TARGET_MSP430_TARGETMACHINE_H
 
-#include "MSP430InstrInfo.h"
-#include "MSP430ISelLowering.h"
 #include "MSP430FrameLowering.h"
-#include "MSP430SelectionDAGInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430InstrInfo.h"
 #include "MSP430RegisterInfo.h"
+#include "MSP430SelectionDAGInfo.h"
 #include "MSP430Subtarget.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
@@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine {
   MSP430TargetLowering   TLInfo;
   MSP430SelectionDAGInfo TSInfo;
   MSP430FrameLowering    FrameLowering;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 
 public:
   MSP430TargetMachine(const Target &T, StringRef TT,
@@ -64,12 +61,6 @@ public:
   virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 }; // MSP430TargetMachine.
 
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 8b3e01ecf52c..0d71d04ebe22 100644
--- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 539a1f723bdd..edfd421d8532 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/Mangler.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 67b524883cf8..c403f216b0d6 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,11 +13,11 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -84,15 +84,33 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool ParseDirective(AsmToken DirectiveID);
 
   MipsAsmParser::OperandMatchResultTy
-  parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
+  parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  MipsAsmParser::OperandMatchResultTy
+  parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  MipsAsmParser::OperandMatchResultTy
+  parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  MipsAsmParser::OperandMatchResultTy
+  parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  MipsAsmParser::OperandMatchResultTy
+  parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  MipsAsmParser::OperandMatchResultTy
+  parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                         unsigned RegisterClass);
 
   bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
                     StringRef Mnemonic);
 
-  int tryParseRegister(StringRef Mnemonic);
+  int tryParseRegister(bool is64BitReg);
 
   bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               StringRef Mnemonic);
+                               bool is64BitReg);
 
   bool needsExpansion(MCInst &Inst);
 
@@ -104,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser {
                             SmallVectorImpl<MCInst> &Instructions);
   void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
                             SmallVectorImpl<MCInst> &Instructions);
+  void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+                     SmallVectorImpl<MCInst> &Instructions,
+                     bool isLoad,bool isImmOpnd);
   bool reportParseError(StringRef ErrorMsg);
 
   bool parseMemOffset(const MCExpr *&Res);
@@ -118,6 +139,10 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseSetReorderDirective();
   bool parseSetNoReorderDirective();
 
+  bool parseSetAssignment();
+
+  bool parseDirectiveWord(unsigned Size, SMLoc L);
+
   MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
 
   bool isMips64() const {
@@ -128,9 +153,11 @@ class MipsAsmParser : public MCTargetAsmParser {
     return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
   }
 
-  int matchRegisterName(StringRef Symbol);
+  int matchRegisterName(StringRef Symbol, bool is64BitReg);
 
-  int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic);
+  int matchCPURegisterName(StringRef Symbol);
+
+  int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
 
   void setFpFormat(FpFormatTy Format) {
     FpFormat = Format;
@@ -146,7 +173,10 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   unsigned getReg(int RC,int RegNo);
 
-  unsigned getATReg();
+  int getATReg();
+
+  bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+                        SmallVectorImpl<MCInst> &Instructions);
 public:
   MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
     : MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -166,6 +196,20 @@ namespace {
 /// instruction.
 class MipsOperand : public MCParsedAsmOperand {
 
+public:
+  enum RegisterKind {
+    Kind_None,
+    Kind_CPURegs,
+    Kind_CPU64Regs,
+    Kind_HWRegs,
+    Kind_HW64Regs,
+    Kind_FGR32Regs,
+    Kind_FGR64Regs,
+    Kind_AFGR64Regs,
+    Kind_CCRRegs
+  };
+
+private:
   enum KindTy {
     k_CondCode,
     k_CoprocNum,
@@ -178,24 +222,30 @@ class MipsOperand : public MCParsedAsmOperand {
 
   MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 
+  struct Token {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+    RegisterKind Kind;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct MemOp {
+    unsigned Base;
+    const MCExpr *Off;
+  };
+
   union {
-    struct {
-      const char *Data;
-      unsigned Length;
-    } Tok;
-
-    struct {
-      unsigned RegNum;
-    } Reg;
-
-    struct {
-      const MCExpr *Val;
-    } Imm;
-
-    struct {
-      unsigned Base;
-      const MCExpr *Off;
-    } Mem;
+    struct Token Tok;
+    struct RegOp Reg;
+    struct ImmOp Imm;
+    struct MemOp Mem;
   };
 
   SMLoc StartLoc, EndLoc;
@@ -246,6 +296,11 @@ public:
     return Reg.RegNum;
   }
 
+  void setRegKind(RegisterKind RegKind) {
+    assert((Kind == k_Register) && "Invalid access!");
+    Reg.Kind = RegKind;
+  }
+
   const MCExpr *getImm() const {
     assert((Kind == k_Immediate) && "Invalid access!");
     return Imm.Val;
@@ -296,6 +351,45 @@ public:
     return Op;
   }
 
+  bool isCPURegsAsm() const {
+    return Kind == k_Register && Reg.Kind == Kind_CPURegs;
+  }
+  void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+  }
+
+  bool isCPU64RegsAsm() const {
+    return Kind == k_Register && Reg.Kind == Kind_CPU64Regs;
+  }
+  void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+  }
+
+  bool isHWRegsAsm() const {
+    assert((Kind == k_Register) && "Invalid access!");
+    return Reg.Kind == Kind_HWRegs;
+  }
+  void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+  }
+
+  bool isHW64RegsAsm() const {
+    assert((Kind == k_Register) && "Invalid access!");
+    return Reg.Kind == Kind_HW64Regs;
+  }
+  void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+  }
+
+  void addCCRAsmOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+  }
+
+  bool isCCRAsm() const {
+    assert((Kind == k_Register) && "Invalid access!");
+    return Reg.Kind == Kind_CCRRegs;
+  }
+
   /// getStartLoc - Get the location of the first token of this operand.
   SMLoc getStartLoc() const { return StartLoc; }
   /// getEndLoc - Get the location of the last token of this operand.
@@ -307,6 +401,56 @@ public:
 };
 }
 
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+  return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+                        SmallVectorImpl<MCInst> &Instructions) {
+  const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+  Inst.setLoc(IDLoc);
+  if (MCID.mayLoad() || MCID.mayStore()) {
+    // Check the offset of memory operand, if it is a symbol
+    // reference or immediate we may have to expand instructions
+    for (unsigned i=0;i<MCID.getNumOperands();i++) {
+      const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+          (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+        MCOperand &Op = Inst.getOperand(i);
+        if (Op.isImm()) {
+          int MemOffset = Op.getImm();
+          if (MemOffset < -32768 || MemOffset > 32767) {
+            // Offset can't exceed 16bit value
+            expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+            return false;
+          }
+        } else if (Op.isExpr()) {
+          const MCExpr *Expr = Op.getExpr();
+          if (Expr->getKind() == MCExpr::SymbolRef){
+            const MCSymbolRefExpr *SR =
+                    static_cast<const MCSymbolRefExpr*>(Expr);
+            if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+              // Expand symbol
+              expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (needsExpansion(Inst))
+    expandInstruction(Inst, IDLoc, Instructions);
+  else
+    Instructions.push_back(Inst);
+
+  return false;
+}
+
 bool MipsAsmParser::needsExpansion(MCInst &Inst) {
 
   switch(Inst.getOpcode()) {
@@ -344,31 +488,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
   if ( 0 <= ImmValue && ImmValue <= 65535) {
     // for 0 <= j <= 65535.
     // li d,j => ori d,$zero,j
-    tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+    tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(
-              MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+              MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else if ( ImmValue < 0 && ImmValue >= -32768) {
     // for -32768 <= j < 0.
     // li d,j => addiu d,$zero,j
-    tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+    tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(
-              MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+              MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
     // for any other value of j that is representable as a 32-bit integer.
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
-    tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi);
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
     Instructions.push_back(tmpInst);
     tmpInst.clear();
-    tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+    tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -390,7 +534,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
   if ( -32768 <= ImmValue && ImmValue <= 65535) {
     //for -32768 <= j <= 65535.
     //la d,j(s) => addiu d,s,j
-    tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+    tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
@@ -400,12 +544,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
     //la d,j(s) => lui d,hi16(j)
     //             ori d,d,lo16(j)
     //             addu d,d,s
-    tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
     Instructions.push_back(tmpInst);
     tmpInst.clear();
-    tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+    tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -433,19 +577,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(
-              MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO));
+              MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
     //for any other value of j that is representable as a 32-bit integer.
     //la d,j => lui d,hi16(j)
     //          ori d,d,lo16(j)
-    tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+    tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
     Instructions.push_back(tmpInst);
     tmpInst.clear();
-    tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+    tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -453,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
   }
 }
 
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+                     SmallVectorImpl<MCInst> &Instructions,
+                     bool isLoad,bool isImmOpnd) {
+  const MCSymbolRefExpr *SR;
+  MCInst TempInst;
+  unsigned ImmOffset,HiOffset,LoOffset;
+  const MCExpr *ExprOffset;
+  unsigned TmpRegNum;
+  unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
+                                            Mips::CPURegsRegClassID,
+                                            getATReg());
+  // 1st operand is either source or dst register
+  assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+  unsigned RegOpNum = Inst.getOperand(0).getReg();
+  // 2nd operand is base register
+  assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+  unsigned BaseRegNum = Inst.getOperand(1).getReg();
+  // 3rd operand is either immediate or expression
+  if (isImmOpnd) {
+    assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+    ImmOffset = Inst.getOperand(2).getImm();
+    LoOffset = ImmOffset & 0x0000ffff;
+    HiOffset = (ImmOffset & 0xffff0000) >> 16;
+    // If msb of LoOffset is 1(negative number) we must increment HiOffset
+    if (LoOffset & 0x8000)
+      HiOffset++;
+  }
+  else
+    ExprOffset = Inst.getOperand(2).getExpr();
+  // All instructions will have the same location
+  TempInst.setLoc(IDLoc);
+  // 1st instruction in expansion is LUi. For load instruction we can use
+  // the dst register as a temporary if base and dst are different,
+  // but for stores we must use $at
+  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+  TempInst.setOpcode(Mips::LUi);
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  if (isImmOpnd)
+    TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+  else {
+    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+      SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
+                                        Create(SR->getSymbol().getName(),
+                                        MCSymbolRefExpr::VK_Mips_ABS_HI,
+                                        getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+    }
+  }
+  // Add the instruction to the list
+  Instructions.push_back(TempInst);
+  // and prepare TempInst for next instruction
+  TempInst.clear();
+  // which is add temp register to base
+  TempInst.setOpcode(Mips::ADDu);
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+  Instructions.push_back(TempInst);
+  TempInst.clear();
+  // and finaly, create original instruction with low part
+  // of offset and new base
+  TempInst.setOpcode(Inst.getOpcode());
+  TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+  if (isImmOpnd)
+    TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+  else {
+    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
+                                      Create(SR->getSymbol().getName(),
+                                      MCSymbolRefExpr::VK_Mips_ABS_LO,
+                                      getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+    }
+  }
+  Instructions.push_back(TempInst);
+  TempInst.clear();
+}
+
 bool MipsAsmParser::
 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                         MCStreamer &Out, unsigned &ErrorInfo,
                         bool MatchingInlineAsm) {
   MCInst Inst;
+  SmallVector<MCInst, 8> Instructions;
   unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
                                               MatchingInlineAsm);
 
   switch (MatchResult) {
   default: break;
   case Match_Success: {
-    if (needsExpansion(Inst)) {
-      SmallVector<MCInst, 4> Instructions;
-      expandInstruction(Inst, IDLoc, Instructions);
-      for(unsigned i =0; i < Instructions.size(); i++){
-        Out.EmitInstruction(Instructions[i]);
-      }
-    } else {
-        Inst.setLoc(IDLoc);
-        Out.EmitInstruction(Inst);
-      }
+    if (processInstruction(Inst,IDLoc,Instructions))
+      return true;
+    for(unsigned i =0; i < Instructions.size(); i++)
+      Out.EmitInstruction(Instructions[i]);
     return false;
   }
   case Match_MissingFeature:
@@ -498,84 +717,72 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   return true;
 }
 
-int MipsAsmParser::matchRegisterName(StringRef Name) {
-
+int MipsAsmParser::matchCPURegisterName(StringRef Name) {
    int CC;
-   if (!isMips64())
+
+  if (Name == "at")
+    return getATReg();
+
     CC = StringSwitch<unsigned>(Name)
-      .Case("zero",  Mips::ZERO)
-      .Case("a0",  Mips::A0)
-      .Case("a1",  Mips::A1)
-      .Case("a2",  Mips::A2)
-      .Case("a3",  Mips::A3)
-      .Case("v0",  Mips::V0)
-      .Case("v1",  Mips::V1)
-      .Case("s0",  Mips::S0)
-      .Case("s1",  Mips::S1)
-      .Case("s2",  Mips::S2)
-      .Case("s3",  Mips::S3)
-      .Case("s4",  Mips::S4)
-      .Case("s5",  Mips::S5)
-      .Case("s6",  Mips::S6)
-      .Case("s7",  Mips::S7)
-      .Case("k0",  Mips::K0)
-      .Case("k1",  Mips::K1)
-      .Case("sp",  Mips::SP)
-      .Case("fp",  Mips::FP)
-      .Case("gp",  Mips::GP)
-      .Case("ra",  Mips::RA)
-      .Case("t0",  Mips::T0)
-      .Case("t1",  Mips::T1)
-      .Case("t2",  Mips::T2)
-      .Case("t3",  Mips::T3)
-      .Case("t4",  Mips::T4)
-      .Case("t5",  Mips::T5)
-      .Case("t6",  Mips::T6)
-      .Case("t7",  Mips::T7)
-      .Case("t8",  Mips::T8)
-      .Case("t9",  Mips::T9)
-      .Case("at",  Mips::AT)
-      .Case("fcc0",  Mips::FCC0)
-      .Default(-1);
-   else
+    .Case("zero", 0)
+    .Case("a0",   4)
+    .Case("a1",   5)
+    .Case("a2",   6)
+    .Case("a3",   7)
+    .Case("v0",   2)
+    .Case("v1",   3)
+    .Case("s0",  16)
+    .Case("s1",  17)
+    .Case("s2",  18)
+    .Case("s3",  19)
+    .Case("s4",  20)
+    .Case("s5",  21)
+    .Case("s6",  22)
+    .Case("s7",  23)
+    .Case("k0",  26)
+    .Case("k1",  27)
+    .Case("sp",  29)
+    .Case("fp",  30)
+    .Case("gp",  28)
+    .Case("ra",  31)
+    .Case("t0",   8)
+    .Case("t1",   9)
+    .Case("t2",  10)
+    .Case("t3",  11)
+    .Case("t4",  12)
+    .Case("t5",  13)
+    .Case("t6",  14)
+    .Case("t7",  15)
+    .Case("t8",  24)
+    .Case("t9",  25)
+    .Default(-1);
+
+  // Although SGI documentation just cut out t0-t3 for n32/n64,
+  // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
+  // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
+  if (isMips64() && 8 <= CC  && CC <= 11)
+    CC += 4;
+
+  if (CC == -1 && isMips64())
     CC = StringSwitch<unsigned>(Name)
-      .Case("zero", Mips::ZERO_64)
-      .Case("at", Mips::AT_64)
-      .Case("v0", Mips::V0_64)
-      .Case("v1", Mips::V1_64)
-      .Case("a0", Mips::A0_64)
-      .Case("a1", Mips::A1_64)
-      .Case("a2", Mips::A2_64)
-      .Case("a3", Mips::A3_64)
-      .Case("a4", Mips::T0_64)
-      .Case("a5", Mips::T1_64)
-      .Case("a6", Mips::T2_64)
-      .Case("a7", Mips::T3_64)
-      .Case("t4", Mips::T4_64)
-      .Case("t5", Mips::T5_64)
-      .Case("t6", Mips::T6_64)
-      .Case("t7", Mips::T7_64)
-      .Case("s0", Mips::S0_64)
-      .Case("s1", Mips::S1_64)
-      .Case("s2", Mips::S2_64)
-      .Case("s3", Mips::S3_64)
-      .Case("s4", Mips::S4_64)
-      .Case("s5", Mips::S5_64)
-      .Case("s6", Mips::S6_64)
-      .Case("s7", Mips::S7_64)
-      .Case("t8", Mips::T8_64)
-      .Case("t9", Mips::T9_64)
-      .Case("kt0", Mips::K0_64)
-      .Case("kt1", Mips::K1_64)
-      .Case("gp", Mips::GP_64)
-      .Case("sp", Mips::SP_64)
-      .Case("fp", Mips::FP_64)
-      .Case("s8", Mips::FP_64)
-      .Case("ra", Mips::RA_64)
+      .Case("a4",   8)
+      .Case("a5",   9)
+      .Case("a6",  10)
+      .Case("a7",  11)
+      .Case("kt0", 26)
+      .Case("kt1", 27)
+      .Case("s8",  30)
       .Default(-1);
 
+  return CC;
+}
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+
+  int CC;
+  CC = matchCPURegisterName(Name);
   if (CC != -1)
-    return CC;
+    return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
+                               Mips::CPURegsRegClassID);
 
   if (Name[0] == 'f') {
     StringRef NumString = Name.substr(1);
@@ -639,70 +846,44 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
   return true;
 }
 
-unsigned MipsAsmParser::getATReg() {
-  unsigned Reg = Options.getATRegNum();
-  if (isMips64())
-    return getReg(Mips::CPU64RegsRegClassID,Reg);
-  
-  return getReg(Mips::CPURegsRegClassID,Reg);
+int MipsAsmParser::getATReg() {
+  return Options.getATRegNum();
 }
 
 unsigned MipsAsmParser::getReg(int RC,int RegNo) {
   return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
 }
 
-int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) {
-
-  if (Mnemonic.lower() == "rdhwr") {
-    // at the moment only hwreg29 is supported
-    if (RegNum != 29)
-      return -1;
-    return Mips::HWR29;
-  }
+int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
 
   if (RegNum > 31)
     return -1;
 
-  // MIPS64 registers are numbered 1 after the 32-bit equivalents
-  return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64();
+  return getReg(RegClass, RegNum);
 }
 
-int MipsAsmParser::tryParseRegister(StringRef Mnemonic) {
+int MipsAsmParser::tryParseRegister(bool is64BitReg) {
   const AsmToken &Tok = Parser.getTok();
   int RegNum = -1;
 
   if (Tok.is(AsmToken::Identifier)) {
     std::string lowerCase = Tok.getString().lower();
-    RegNum = matchRegisterName(lowerCase);
+    RegNum = matchRegisterName(lowerCase, is64BitReg);
   } else if (Tok.is(AsmToken::Integer))
     RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
-                                   Mnemonic.lower());
-    else
-      return RegNum;  //error
-  // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64
-  if (isMips64() && RegNum == Mips::ZERO_64) {
-    if (Mnemonic.find("ddiv") != StringRef::npos)
-      RegNum = Mips::ZERO;
-  }
+                                   is64BitReg ? Mips::CPU64RegsRegClassID
+                                              : Mips::CPURegsRegClassID);
   return RegNum;
 }
 
 bool MipsAsmParser::
   tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                          StringRef Mnemonic){
+                          bool is64BitReg){
 
   SMLoc S = Parser.getTok().getLoc();
   int RegNo = -1;
 
-  // FIXME: we should make a more generic method for CCR
-  if ((Mnemonic == "cfc1" || Mnemonic == "ctc1")
-      && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){
-    RegNo = Parser.getTok().getIntVal();  // get the int value
-    // at the moment only fcc0 is supported
-    if (RegNo ==  0)
-      RegNo = Mips::FCC0;
-  } else
-    RegNo = tryParseRegister(Mnemonic);
+  RegNo = tryParseRegister(is64BitReg);
   if (RegNo == -1)
     return true;
 
@@ -734,7 +915,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     SMLoc S = Parser.getTok().getLoc();
     Parser.Lex(); // Eat dollar token.
     // parse register operand
-    if (!tryParseRegisterOperand(Operands, Mnemonic)) {
+    if (!tryParseRegisterOperand(Operands, isMips64())) {
       if (getLexer().is(AsmToken::LParen)) {
         // check if it is indexed addressing operand
         Operands.push_back(MipsOperand::CreateToken("(", S));
@@ -743,7 +924,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
           return true;
 
         Parser.Lex(); // eat dollar
-        if (tryParseRegisterOperand(Operands, Mnemonic))
+        if (tryParseRegisterOperand(Operands, isMips64()))
           return true;
 
         if (!getLexer().is(AsmToken::RParen))
@@ -757,7 +938,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     }
     // maybe it is a symbol reference
     StringRef Identifier;
-    if (Parser.ParseIdentifier(Identifier))
+    if (Parser.parseIdentifier(Identifier))
       return true;
 
     SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -772,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     return false;
   }
   case AsmToken::Identifier:
+    // Look for the existing symbol, we should check if
+    // we need to assigne the propper RegisterKind
+   if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
+     return false;
+    //else drop to expression parsing
   case AsmToken::LParen:
   case AsmToken::Minus:
   case AsmToken::Plus:
@@ -780,7 +966,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
      // quoted label names
     const MCExpr *IdVal;
     SMLoc S = Parser.getTok().getLoc();
-    if (getParser().ParseExpression(IdVal))
+    if (getParser().parseExpression(IdVal))
       return true;
     SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
     Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
@@ -832,7 +1018,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
       } else
         break;
     }
-    if (getParser().ParseParenExpression(IdVal,EndLoc))
+    if (getParser().parseParenExpression(IdVal,EndLoc))
       return true;
 
     while (getLexer().getKind() == AsmToken::RParen)
@@ -843,19 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
 
   // Check the type of the expression
   if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
-    // it's a constant, evaluate lo or hi value
-    int Val = MCE->getValue();
+    // It's a constant, evaluate lo or hi value
     if (Str == "lo") {
-      Val = Val & 0xffff;
+      short Val = MCE->getValue();
+      Res = MCConstantExpr::Create(Val, getContext());
     } else if (Str == "hi") {
+      int Val = MCE->getValue();
+      int LoSign = Val & 0x8000;
       Val = (Val & 0xffff0000) >> 16;
+      // Lower part is treated as a signed int, so if it is negative
+      // we must add 1 to the hi part to compensate
+      if (LoSign)
+        Val++;
+      Res = MCConstantExpr::Create(Val, getContext());
     }
-    Res = MCConstantExpr::Create(Val, getContext());
     return false;
   }
 
   if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
-    // it's a symbol, create symbolic expression from symbol
+    // It's a symbol, create symbolic expression from symbol
     StringRef Symbol = MSRE->getSymbol().getName();
     MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
     Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
@@ -868,7 +1060,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                   SMLoc &EndLoc) {
 
   StartLoc = Parser.getTok().getLoc();
-  RegNo = tryParseRegister("");
+  RegNo = tryParseRegister(isMips64());
   EndLoc = Parser.getTok().getLoc();
   return (RegNo == (unsigned)-1);
 }
@@ -880,10 +1072,11 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
   switch(getLexer().getKind()) {
   default:
     return true;
+  case AsmToken::Identifier:
   case AsmToken::Integer:
   case AsmToken::Minus:
   case AsmToken::Plus:
-    return (getParser().ParseExpression(Res));
+    return (getParser().parseExpression(Res));
   case AsmToken::Percent:
     return parseRelocOperand(Res);
   case AsmToken::LParen:
@@ -907,7 +1100,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
   if (Tok.isNot(AsmToken::LParen)) {
     MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
     if (Mnemonic->getToken() == "la") {
-      SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer()-1);
+      SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
       Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
       return MatchOperand_Success;
     }
@@ -920,7 +1113,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
   const AsmToken &Tok1 = Parser.getTok(); // get next token
   if (Tok1.is(AsmToken::Dollar)) {
     Parser.Lex(); // Eat '$' token.
-    if (tryParseRegisterOperand(Operands,"")) {
+    if (tryParseRegisterOperand(Operands, isMips64())) {
       Error(Parser.getTok().getLoc(), "unexpected token in operand");
       return MatchOperand_ParseFail;
     }
@@ -954,6 +1147,180 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
   return MatchOperand_Success;
 }
 
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+  if (!isMips64())
+    return MatchOperand_NoMatch;
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+      return MatchOperand_Success;
+    return MatchOperand_NoMatch;
+  }
+  // if the first token is not '$' we have an error
+  if (Parser.getTok().isNot(AsmToken::Dollar))
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat $
+  if(!tryParseRegisterOperand(Operands, true)) {
+    // set the proper register kind
+    MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+    op->setRegKind(MipsOperand::Kind_CPU64Regs);
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
+}
+
+bool MipsAsmParser::
+searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                  unsigned RegisterKind) {
+
+  MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+  if (Sym) {
+    SMLoc S = Parser.getTok().getLoc();
+    const MCExpr *Expr;
+    if (Sym->isVariable())
+      Expr = Sym->getVariableValue();
+    else
+      return false;
+    if (Expr->getKind() == MCExpr::SymbolRef) {
+      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+      const StringRef DefSymbol = Ref->getSymbol().getName();
+      if (DefSymbol.startswith("$")) {
+        // Lookup for the register with corresponding name
+        int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+        if (RegNum > -1) {
+          Parser.Lex();
+          MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
+                                         Parser.getTok().getLoc());
+          op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+          Operands.push_back(op);
+          return true;
+        }
+      }
+    } else if (Expr->getKind() == MCExpr::Constant) {
+      Parser.Lex();
+      const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+      MipsOperand *op = MipsOperand::CreateImm(Const,S,
+                                     Parser.getTok().getLoc());
+      Operands.push_back(op);
+      return true;
+    }
+  }
+  return false;
+}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+  if (getLexer().getKind() == AsmToken::Identifier) {
+    if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+      return MatchOperand_Success;
+    return MatchOperand_NoMatch;
+  }
+  // if the first token is not '$' we have an error
+  if (Parser.getTok().isNot(AsmToken::Dollar))
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat $
+  if(!tryParseRegisterOperand(Operands, false)) {
+    // set the propper register kind
+    MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+    op->setRegKind(MipsOperand::Kind_CPURegs);
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+  if (isMips64())
+    return MatchOperand_NoMatch;
+
+  // if the first token is not '$' we have error
+  if (Parser.getTok().isNot(AsmToken::Dollar))
+    return MatchOperand_NoMatch;
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat $
+
+  const AsmToken &Tok = Parser.getTok(); // get next token
+  if (Tok.isNot(AsmToken::Integer))
+    return MatchOperand_NoMatch;
+
+  unsigned RegNum = Tok.getIntVal();
+  // at the moment only hwreg29 is supported
+  if (RegNum != 29)
+    return MatchOperand_ParseFail;
+
+  MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
+        Parser.getTok().getLoc());
+  op->setRegKind(MipsOperand::Kind_HWRegs);
+  Operands.push_back(op);
+
+  Parser.Lex(); // Eat reg number
+  return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+  if (!isMips64())
+    return MatchOperand_NoMatch;
+    //if the first token is not '$' we have error
+  if (Parser.getTok().isNot(AsmToken::Dollar))
+    return MatchOperand_NoMatch;
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat $
+
+  const AsmToken &Tok = Parser.getTok(); // get next token
+  if (Tok.isNot(AsmToken::Integer))
+    return MatchOperand_NoMatch;
+
+  unsigned RegNum = Tok.getIntVal();
+  // at the moment only hwreg29 is supported
+  if (RegNum != 29)
+    return MatchOperand_ParseFail;
+
+  MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
+        Parser.getTok().getLoc());
+  op->setRegKind(MipsOperand::Kind_HW64Regs);
+  Operands.push_back(op);
+
+  Parser.Lex(); // Eat reg number
+  return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  unsigned RegNum;
+  //if the first token is not '$' we have error
+  if (Parser.getTok().isNot(AsmToken::Dollar))
+    return MatchOperand_NoMatch;
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat $
+
+  const AsmToken &Tok = Parser.getTok(); // get next token
+  if (Tok.is(AsmToken::Integer)) {
+    RegNum = Tok.getIntVal();
+    // at the moment only fcc0 is supported
+    if (RegNum != 0)
+      return MatchOperand_ParseFail;
+  } else if (Tok.is(AsmToken::Identifier)) {
+    // at the moment only fcc0 is supported
+    if (Tok.getIdentifier() != "fcc0")
+      return MatchOperand_ParseFail;
+  } else
+    return MatchOperand_NoMatch;
+
+  MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
+        Parser.getTok().getLoc());
+  op->setRegKind(MipsOperand::Kind_CCRRegs);
+  Operands.push_back(op);
+
+  Parser.Lex(); // Eat reg number
+  return MatchOperand_Success;
+}
+
 MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
 
   MCSymbolRefExpr::VariantKind VK
@@ -1023,13 +1390,13 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
     // Read the first operand.
     if (ParseOperand(Operands, Name)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
 
     if (getLexer().isNot(AsmToken::Comma)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
 
     }
@@ -1041,14 +1408,14 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
     // Parse and remember the operand.
     if (ParseOperand(Operands, Name)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
   }
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     SMLoc Loc = getLexer().getLoc();
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(Loc, "unexpected token in argument list");
   }
 
@@ -1059,16 +1426,18 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
 bool MipsAsmParser::
 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  StringRef Mnemonic;
   // floating point instructions: should register be treated as double?
   if (requestsDoubleOperand(Name)) {
     setFpFormat(FP_FORMAT_D);
   Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+  Mnemonic = Name;
   }
   else {
     setDefaultFpFormat();
     // Create the leading tokens for the mnemonic, split by '.' characters.
     size_t Start = 0, Next = Name.find('.');
-    StringRef Mnemonic = Name.slice(Start, Next);
+    Mnemonic = Name.slice(Start, Next);
 
     Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
 
@@ -1108,9 +1477,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
   // Read the remaining operands.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
-    if (ParseOperand(Operands, Name)) {
+    if (ParseOperand(Operands, Mnemonic)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
 
@@ -1120,7 +1489,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
       // Parse and remember the operand.
       if (ParseOperand(Operands, Name)) {
         SMLoc Loc = getLexer().getLoc();
-        Parser.EatToEndOfStatement();
+        Parser.eatToEndOfStatement();
         return Error(Loc, "unexpected token in argument list");
       }
     }
@@ -1128,7 +1497,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     SMLoc Loc = getLexer().getLoc();
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return Error(Loc, "unexpected token in argument list");
   }
 
@@ -1138,18 +1507,18 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
 
 bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
    SMLoc Loc = getLexer().getLoc();
-   Parser.EatToEndOfStatement();
+   Parser.eatToEndOfStatement();
    return Error(Loc, ErrorMsg);
 }
 
 bool MipsAsmParser::parseSetNoAtDirective() {
-  // line should look like:
+  // Line should look like:
   //  .set noat
   // set at reg to 0
   Options.setATReg(0);
   // eat noat
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report error
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
@@ -1161,28 +1530,39 @@ bool MipsAsmParser::parseSetAtDirective() {
   // line can be
   //  .set at - defaults to $1
   // or .set at=$reg
+  int AtRegNo;
   getParser().Lex();
   if (getLexer().is(AsmToken::EndOfStatement)) {
     Options.setATReg(1);
     Parser.Lex(); // Consume the EndOfStatement
     return false;
   } else if (getLexer().is(AsmToken::Equal)) {
-    getParser().Lex(); //eat '='
+    getParser().Lex(); // eat '='
     if (getLexer().isNot(AsmToken::Dollar)) {
       reportParseError("unexpected token in statement");
       return false;
     }
-    Parser.Lex(); // eat '$'
-    if (getLexer().isNot(AsmToken::Integer)) {
+    Parser.Lex(); // Eat '$'
+    const AsmToken &Reg = Parser.getTok();
+    if (Reg.is(AsmToken::Identifier)) {
+      AtRegNo = matchCPURegisterName(Reg.getIdentifier());
+    } else if (Reg.is(AsmToken::Integer)) {
+      AtRegNo = Reg.getIntVal();
+    } else {
       reportParseError("unexpected token in statement");
       return false;
     }
-    const AsmToken &Reg = Parser.getTok();
-    if (!Options.setATReg(Reg.getIntVal())) {
+
+    if ( AtRegNo < 1 || AtRegNo > 31) {
+      reportParseError("unexpected token in statement");
+      return false;
+    }
+
+    if (!Options.setATReg(AtRegNo)) {
       reportParseError("unexpected token in statement");
       return false;
     }
-    getParser().Lex(); //eat reg
+    getParser().Lex(); // Eat reg
 
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       reportParseError("unexpected token in statement");
@@ -1198,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() {
 
 bool MipsAsmParser::parseSetReorderDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report error
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
@@ -1247,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
   Parser.Lex(); // Consume the EndOfStatement
   return false;
 }
+
+bool MipsAsmParser::parseSetAssignment() {
+  StringRef Name;
+  const MCExpr *Value;
+
+  if (Parser.parseIdentifier(Name))
+    reportParseError("expected identifier after .set");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return reportParseError("unexpected token in .set directive");
+  Lex(); //eat comma
+
+  if (Parser.parseExpression(Value))
+    reportParseError("expected valid expression after comma");
+
+  // check if the Name already exists as a symbol
+  MCSymbol *Sym = getContext().LookupSymbol(Name);
+  if (Sym) {
+    return reportParseError("symbol already defined");
+  }
+  Sym = getContext().GetOrCreateSymbol(Name);
+  Sym->setVariableValue(Value);
+
+  return false;
+}
 bool MipsAsmParser::parseDirectiveSet() {
 
   // get next token
@@ -1266,55 +1671,92 @@ bool MipsAsmParser::parseDirectiveSet() {
     return parseSetNoMacroDirective();
   } else if (Tok.getString() == "nomips16") {
     // ignore this directive for now
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return false;
   } else if (Tok.getString() == "nomicromips") {
     // ignore this directive for now
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
+    return false;
+  } else {
+    // it is just an identifier, look for assignment
+    parseSetAssignment();
     return false;
   }
+
   return true;
 }
 
+/// parseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
 bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
-  if (DirectiveID.getString() == ".ent") {
+  StringRef IDVal = DirectiveID.getString();
+
+  if ( IDVal == ".ent") {
     // ignore this directive for now
     Parser.Lex();
     return false;
   }
 
-  if (DirectiveID.getString() == ".end") {
+  if (IDVal == ".end") {
     // ignore this directive for now
     Parser.Lex();
     return false;
   }
 
-  if (DirectiveID.getString() == ".frame") {
+  if (IDVal == ".frame") {
     // ignore this directive for now
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return false;
   }
 
-  if (DirectiveID.getString() == ".set") {
+  if (IDVal == ".set") {
     return parseDirectiveSet();
   }
 
-  if (DirectiveID.getString() == ".fmask") {
+  if (IDVal == ".fmask") {
     // ignore this directive for now
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return false;
   }
 
-  if (DirectiveID.getString() == ".mask") {
+  if (IDVal == ".mask") {
     // ignore this directive for now
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
     return false;
   }
 
-  if (DirectiveID.getString() == ".gpword") {
+  if (IDVal == ".gpword") {
     // ignore this directive for now
-    Parser.EatToEndOfStatement();
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+
+  if (IDVal == ".word") {
+    parseDirectiveWord(4, DirectiveID.getLoc());
     return false;
   }
 
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index ef56e752b2e4..cf8bb189e475 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -9,7 +9,6 @@ tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
 tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering)
 add_public_tablegen_target(MipsCommonTableGen)
@@ -17,10 +16,13 @@ add_public_tablegen_target(MipsCommonTableGen)
 add_llvm_target(MipsCodeGen
   Mips16FrameLowering.cpp
   Mips16InstrInfo.cpp
+  Mips16ISelDAGToDAG.cpp
+  Mips16ISelLowering.cpp
   Mips16RegisterInfo.cpp
   MipsAnalyzeImmediate.cpp
   MipsAsmPrinter.cpp
   MipsCodeEmitter.cpp
+  MipsConstantIslandPass.cpp
   MipsDelaySlotFiller.cpp
   MipsJITInfo.cpp
   MipsInstrInfo.cpp
@@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen
   MipsRegisterInfo.cpp
   MipsSEFrameLowering.cpp
   MipsSEInstrInfo.cpp
+  MipsSEISelDAGToDAG.cpp
+  MipsSEISelLowering.cpp
   MipsSERegisterInfo.cpp
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt
index 048ad0ddac5b..7101c06d12ac 100644
--- a/lib/Target/Mips/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt -------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
diff --git a/lib/Target/Mips/Disassembler/Makefile b/lib/Target/Mips/Disassembler/Makefile
index a78feba1f8df..7900373dd2b2 100644
--- a/lib/Target/Mips/Disassembler/Makefile
+++ b/lib/Target/Mips/Disassembler/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/Mips/Disassembler/Makefile ---------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 82dbcc5bcf7d..59e49d8ddc6c 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -12,18 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips.h"
-#include "MipsSubtarget.h"
 #include "MipsRegisterInfo.h"
-#include "llvm/MC/EDInstInfo.h"
+#include "MipsSubtarget.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
-
-#include "MipsGenEDInfo.inc"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 
@@ -42,9 +39,6 @@ public:
 
   virtual ~MipsDisassemblerBase() {}
 
-  /// getEDInfo - See MCDisassembler.
-  const EDInstInfo *getEDInfo() const;
-
   const MCRegisterInfo *getRegInfo() const { return RegInfo; }
 
 private:
@@ -92,10 +86,6 @@ public:
 
 } // end anonymous namespace
 
-const EDInstInfo *MipsDisassemblerBase::getEDInfo() const {
-  return instInfoMips;
-}
-
 // Forward declare these because the autogenerated code will reference them.
 // Definitions are further down.
 static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
@@ -103,6 +93,11 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
                                                  uint64_t Address,
                                                  const void *Decoder);
 
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
 static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
                                                unsigned RegNo,
                                                uint64_t Address,
@@ -143,10 +138,10 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
                                                 uint64_t Address,
                                                 const void *Decoder);
 
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
-                                              unsigned RegNo,
-                                              uint64_t Address,
-                                              const void *Decoder);
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
 
 static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        unsigned Offset,
@@ -332,6 +327,15 @@ static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
   return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
 }
 
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+
+  return MCDisassembler::Fail;
+
+}
+
 static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
                                                  unsigned RegNo,
                                                  uint64_t Address,
@@ -480,14 +484,14 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst,
-                                              unsigned RegNo,
-                                              uint64_t Address,
-                                              const void *Decoder) {
+static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
   if (RegNo >= 4)
     return MCDisassembler::Fail;
 
-  unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo);
+  unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
   Inst.addOperand(MCOperand::CreateReg(Reg));
   return MCDisassembler::Success;
 }
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 68d3ac5f3bd0..fc23cd380352 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define PRINT_ALIAS_INSTR
 #include "MipsGenAsmWriter.inc"
 
 const char* Mips::MipsFCCToString(Mips::CondCode CC) {
@@ -78,7 +79,9 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     O << "\t.set\tmips32r2\n";
   }
 
-  printInstruction(MI, O);
+  // Try to print any aliases first.
+  if (!printAliasInstr(MI, O))
+    printInstruction(MI, O);
   printAnnotation(O, Annot);
 
   switch (MI->getOpcode()) {
@@ -149,6 +152,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
     OS << ')';
 }
 
+void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  printRegName(O, MI->getOperand(OpNo).getReg());
+}
+
 void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                    raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 3d8a6f918ff6..d1b561f9764e 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -87,6 +87,9 @@ public:
 
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+  void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
 
 private:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index be5d7e42532a..4212c94a5578 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,8 @@ add_llvm_library(LLVMMipsDesc
   MipsMCCodeEmitter.cpp
   MipsMCTargetDesc.cpp
   MipsELFObjectWriter.cpp
+  MipsReginfo.cpp
+  MipsELFStreamer.cpp
   )
 
 add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index c078794899d2..0b13607a572d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -37,6 +37,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case FK_Data_4:
   case FK_Data_8:
   case Mips::fixup_Mips_LO16:
+  case Mips::fixup_Mips_GPREL16:
   case Mips::fixup_Mips_GPOFF_HI:
   case Mips::fixup_Mips_GPOFF_LO:
   case Mips::fixup_Mips_GOT_PAGE:
@@ -213,7 +214,7 @@ public:
   /// fixup requires the associated instruction to be relaxed.
   bool fixupNeedsRelaxation(const MCFixup &Fixup,
                             uint64_t Value,
-                            const MCInstFragment *DF,
+                            const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const {
     // FIXME.
     assert(0 && "RelaxInstruction() unimplemented");
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 94e0d20d8835..7a55efd5c330 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -121,99 +121,6 @@ namespace MipsII {
   };
 }
 
-
-/// getMipsRegisterNumbering - Given the enum value for some register,
-/// return the number that it corresponds to.
-inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
-{
-  switch (RegEnum) {
-  case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
-  case Mips::D0:   case Mips::FCC0:    case Mips::AC0:
-    return 0;
-  case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
-  case Mips::AC1:
-    return 1;
-  case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
-  case Mips::D1: case Mips::AC2:
-    return 2;
-  case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
-  case Mips::AC3:
-    return 3;
-  case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
-  case Mips::D2:
-    return 4;
-  case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
-    return 5;
-  case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
-  case Mips::D3:
-    return 6;
-  case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
-    return 7;
-  case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
-  case Mips::D4:
-    return 8;
-  case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
-    return 9;
-  case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
-  case Mips::D5:
-    return 10;
-  case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
-    return 11;
-  case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
-  case Mips::D6:
-    return 12;
-  case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
-    return 13;
-  case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
-  case Mips::D7:
-    return 14;
-  case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
-    return 15;
-  case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
-  case Mips::D8:
-    return 16;
-  case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
-    return 17;
-  case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
-  case Mips::D9:
-    return 18;
-  case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
-    return 19;
-  case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
-  case Mips::D10:
-    return 20;
-  case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
-    return 21;
-  case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
-  case Mips::D11:
-    return 22;
-  case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
-    return 23;
-  case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
-  case Mips::D12:
-    return 24;
-  case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
-    return 25;
-  case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
-  case Mips::D13:
-    return 26;
-  case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
-    return 27;
-  case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
-  case Mips::D14:
-    return 28;
-  case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
-  case Mips::HWR29:
-    return 29;
-  case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
-  case Mips::D15:
-    return 30;
-  case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
-    return 31;
-  default: llvm_unreachable("Unknown register number!");
-  }
-}
-
 inline static std::pair<const MCSymbolRefExpr*, int64_t>
 MipsGetSymAndOffset(const MCFixup &Fixup) {
   MCFixupKind FixupKind = Fixup.getKind();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index f82e203c23ca..6471b51583ce 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -42,7 +42,6 @@ namespace {
     virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                                   bool IsPCRel, bool IsRelocWithSymbol,
                                   int64_t Addend) const;
-    virtual unsigned getEFlags() const;
     virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
                                            const MCValue &Target,
                                            const MCFragment &F,
@@ -61,19 +60,6 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
 
 MipsELFObjectWriter::~MipsELFObjectWriter() {}
 
-// FIXME: get the real EABI Version from the Subtarget class.
-unsigned MipsELFObjectWriter::getEFlags() const {
-
-  // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static)
-  unsigned Flag = ELF::EF_MIPS_NOREORDER;
-
-  if (is64Bit())
-    Flag |= ELF::EF_MIPS_ARCH_64R2;
-  else
-    Flag |= ELF::EF_MIPS_ARCH_32R2;
-  return Flag;
-}
-
 const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
                                                     const MCValue &Target,
                                                     const MCFragment &F,
@@ -108,7 +94,13 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
     Type = ELF::R_MIPS_64;
     break;
   case FK_GPRel_4:
-    Type = ELF::R_MIPS_GPREL32;
+    if (isN64()) {
+      Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type);
+      Type = setRType2((unsigned)ELF::R_MIPS_64, Type);
+      Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type);
+    }
+    else
+      Type = ELF::R_MIPS_GPREL32;
     break;
   case Mips::fixup_Mips_GPREL16:
     Type = ELF::R_MIPS_GPREL16;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
new file mode 100644
index 000000000000..c33bc9ae3034
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -0,0 +1,89 @@
+//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===//
+//
+//                       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsELFStreamer.h"
+#include "MipsSubtarget.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+  MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                       raw_ostream &OS, MCCodeEmitter *Emitter,
+                                       bool RelaxAll, bool NoExecStack) {
+    MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter,
+                                             RelaxAll, NoExecStack);
+    return S;
+  }
+
+  // For llc. Set a group of ELF header flags
+  void
+  MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) {
+
+    if (hasRawTextSupport())
+      return;
+
+    // Update e_header flags
+    MCAssembler& MCA = getAssembler();
+    unsigned EFlags = MCA.getELFHeaderEFlags();
+
+    if (Subtarget.inMips16Mode())
+      EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
+    else
+      EFlags |= ELF::EF_MIPS_NOREORDER;
+
+    // Architecture
+    if (Subtarget.hasMips64r2())
+      EFlags |= ELF::EF_MIPS_ARCH_64R2;
+    else if (Subtarget.hasMips64())
+      EFlags |= ELF::EF_MIPS_ARCH_64;
+    else if (Subtarget.hasMips32r2())
+      EFlags |= ELF::EF_MIPS_ARCH_32R2;
+    else
+      EFlags |= ELF::EF_MIPS_ARCH_32;
+
+    if (Subtarget.inMicroMipsMode())
+      EFlags |= ELF::EF_MIPS_MICROMIPS;
+
+    // ABI
+    if (Subtarget.isABI_O32())
+      EFlags |= ELF::EF_MIPS_ABI_O32;
+
+    // Relocation Model
+    Reloc::Model RM = Subtarget.getRelocationModel();
+    if (RM == Reloc::PIC_ || RM == Reloc::Default)
+      EFlags |= ELF::EF_MIPS_PIC;
+    else if (RM == Reloc::Static)
+      ; // Do nothing for Reloc::Static
+    else
+      llvm_unreachable("Unsupported relocation model for e_flags");
+
+    MCA.setELFHeaderEFlags(EFlags);
+  }
+
+  // For llc. Set a symbol's STO flags
+  void
+  MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget,
+                                 MCSymbol *Sym,
+                                 unsigned Val) {
+
+    if (hasRawTextSupport())
+      return;
+
+    MCSymbolData &Data = getOrCreateSymbolData(Sym);
+    // The "other" values are stored in the last 6 bits of the second byte
+    // The traditional defines for STO values assume the full byte and thus
+    // the shift to pack it.
+    MCELF::setOther(Data, Val >> 2);
+  }
+
+} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
new file mode 100644
index 000000000000..b10ccc78e665
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -0,0 +1,43 @@
+//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===//
+//
+//                    The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#ifndef MIPSELFSTREAMER_H_
+#define MIPSELFSTREAMER_H_
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MipsAsmPrinter;
+class MipsSubtarget;
+class MCSymbol;
+
+class MipsELFStreamer : public MCELFStreamer {
+public:
+  MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter,
+                  bool RelaxAll, bool NoExecStack)
+    : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) {
+  }
+
+  ~MipsELFStreamer() {}
+  void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget);
+  void emitMipsSTOCG(const MipsSubtarget &Subtarget,
+                     MCSymbol *Sym,
+                     unsigned Val);
+
+  static bool classof(const MCStreamer *S) {
+    return S->getKind() == SK_MipsELFStreamer;
+  }
+};
+
+  MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                                       raw_ostream &OS, MCCodeEmitter *Emitter,
+                                       bool RelaxAll, bool NoExecStack);
+}
+
+#endif /* MIPSELFSTREAMER_H_ */
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 9d67aa1856e3..5d4b32d30578 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -24,6 +24,11 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
       (TheTriple.getArch() == Triple::mips64))
     IsLittleEndian = false;
 
+  if ((TheTriple.getArch() == Triple::mips64el) ||
+      (TheTriple.getArch() == Triple::mips64)) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
+
   AlignmentIsInBytes          = false;
   Data16bitsDirective         = "\t.2byte\t";
   Data32bitsDirective         = "\t.4byte\t";
@@ -34,7 +39,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
   GPRel32Directive            = "\t.gpword\t";
   GPRel64Directive            = "\t.gpdword\t";
   WeakRefDirective            = "\t.weak\t";
-
+  DebugLabelSuffix            = "=.";
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
   HasLEB128 = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index da1e4552c9d0..e198a7c983f0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -33,11 +34,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   MipsMCCodeEmitter(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
+  MCContext &Ctx;
   bool IsLittleEndian;
 
 public:
-  MipsMCCodeEmitter(const MCInstrInfo &mcii, bool IsLittle) :
-            MCII(mcii), IsLittleEndian(IsLittle) {}
+  MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
+                    const MCSubtargetInfo &sti, bool IsLittle) :
+    MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
 
   ~MipsMCCodeEmitter() {}
 
@@ -93,7 +96,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx)
 {
-  return new MipsMCCodeEmitter(MCII, false);
+  return new MipsMCCodeEmitter(MCII, Ctx, STI, false);
 }
 
 MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
@@ -101,7 +104,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx)
 {
-  return new MipsMCCodeEmitter(MCII, true);
+  return new MipsMCCodeEmitter(MCII, Ctx, STI, true);
 }
 
 /// EncodeInstruction - Emit the instruction.
@@ -139,12 +142,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     llvm_unreachable("unimplemented opcode in EncodeInstruction()");
 
   const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
-  uint64_t TSFlags = Desc.TSFlags;
-
-  // Pseudo instructions don't get encoded and shouldn't be here
-  // in the first place!
-  if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo)
-    llvm_unreachable("Pseudo opcode found in EncodeInstruction()");
 
   // Get byte count of instruction
   unsigned Size = Desc.getSize();
@@ -163,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
 
   const MCOperand &MO = MI.getOperand(OpNo);
 
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm()) return MO.getImm();
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm()) return MO.getImm() >> 2;
+
   assert(MO.isExpr() &&
          "getBranchTargetOpValue expects only expressions or immediates");
 
@@ -182,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
                      SmallVectorImpl<MCFixup> &Fixups) const {
 
   const MCOperand &MO = MI.getOperand(OpNo);
-  // If the destination is an immediate, we have nothing to do.
-  if (MO.isImm()) return MO.getImm();
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm()) return MO.getImm()>>2;
+
   assert(MO.isExpr() &&
          "getJumpTargetOpValue expects only expressions or an immediate");
 
@@ -200,7 +199,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   if (MO.isReg()) {
     unsigned Reg = MO.getReg();
-    unsigned RegNo = getMipsRegisterNumbering(Reg);
+    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
     return RegNo;
   } else if (MO.isImm()) {
     return static_cast<unsigned>(MO.getImm());
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index f634f082be5a..be83b54b6124 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -11,15 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsMCAsmInfo.h"
+#include "MCTargetDesc/MipsELFStreamer.h"
 #include "MipsMCTargetDesc.h"
 #include "InstPrinter/MipsInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "MipsMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -131,7 +132,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
                                     bool NoExecStack) {
   Triple TheTriple(TT);
 
-  return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+  return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
 }
 
 extern "C" void LLVMInitializeMipsTargetMC() {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
new file mode 100644
index 000000000000..1dc9bcb36a5f
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
@@ -0,0 +1,80 @@
+//===-- MipsReginfo.cpp - Registerinfo handling  --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// .reginfo
+//    Elf32_Word ri_gprmask
+//    Elf32_Word ri_cprmask[4]
+//    Elf32_Word ri_gp_value
+//
+// .MIPS.options - N64
+//    Elf64_Byte    kind (ODK_REGINFO)
+//    Elf64_Byte    size (40 bytes)
+//    Elf64_Section section (0)
+//    Elf64_Word    info (unused)
+//    Elf64_Word    ri_gprmask ()
+//    Elf64_Word    ri_pad ()
+//    Elf64_Word[4] ri_cprmask ()
+//    Elf64_Addr    ri_gp_value ()
+//
+// .MIPS.options - N32
+//    Elf32_Byte    kind (ODK_REGINFO)
+//    Elf32_Byte    size (36 bytes)
+//    Elf32_Section section (0)
+//    Elf32_Word    info (unused)
+//    Elf32_Word    ri_gprmask ()
+//    Elf32_Word    ri_pad ()
+//    Elf32_Word[4] ri_cprmask ()
+//    Elf32_Addr    ri_gp_value ()
+//
+//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsReginfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetObjectFile.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// Integrated assembler version
+void
+MipsReginfo::emitMipsReginfoSectionCG(MCStreamer &OS,
+    const TargetLoweringObjectFile &TLOF,
+    const MipsSubtarget &MST) const
+{
+
+  if (OS.hasRawTextSupport())
+    return;
+
+  const MipsTargetObjectFile &TLOFELF =
+      static_cast<const MipsTargetObjectFile &>(TLOF);
+  OS.SwitchSection(TLOFELF.getReginfoSection());
+
+  // .reginfo
+  if (MST.isABI_O32()) {
+    OS.EmitIntValue(0, 4); // ri_gprmask
+    OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+    OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+    OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+    OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+    OS.EmitIntValue(0, 4); // ri_gp_value
+  }
+  // .MIPS.options
+  else if (MST.isABI_N64()) {
+    OS.EmitIntValue(1, 1); // kind
+    OS.EmitIntValue(40, 1); // size
+    OS.EmitIntValue(0, 2); // section
+    OS.EmitIntValue(0, 4); // info
+    OS.EmitIntValue(0, 4); // ri_gprmask
+    OS.EmitIntValue(0, 4); // pad
+    OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+    OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+    OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+    OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+    OS.EmitIntValue(0, 8); // ri_gp_value
+  }
+  else llvm_unreachable("Unsupported abi for reginfo");
+}
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.h b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
new file mode 100644
index 000000000000..039b8eaaf287
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
@@ -0,0 +1,31 @@
+//=== MipsReginfo.h - MipsReginfo -----------------------------------------===//
+//
+//                    The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGINFO_H
+#define MIPSREGINFO_H
+
+namespace llvm {
+  class MCStreamer;
+  class TargetLoweringObjectFile;
+  class MipsSubtarget;
+
+  class MipsReginfo {
+    void anchor();
+  public:
+    MipsReginfo() {}
+
+    void emitMipsReginfoSectionCG(MCStreamer &OS,
+        const TargetLoweringObjectFile &TLOF,
+        const MipsSubtarget &MST) const;
+  };
+
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index bd8c5173454e..bcf951e861b0 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -16,7 +16,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
                 MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
                 MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
-                MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
+                MipsGenDisassemblerTables.inc \
                 MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc
 
 DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 2963f7e7fa42..8c65bb4020b5 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -27,6 +27,7 @@ namespace llvm {
   FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
   FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
                                              JITCodeEmitter &JCE);
+  FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
 
 } // end namespace llvm;
 
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 90c01d5de0a9..eefb02a494ca 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,32 +44,29 @@ def FeatureN64         : SubtargetFeature<"n64", "MipsABI", "N64",
                                 "Enable n64 ABI">;
 def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
                                 "Enable eabi ABI">;
-def FeatureAndroid     : SubtargetFeature<"android", "IsAndroid", "true",
-                                "Target is android">;
 def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
                                 "true", "Enable vector FPU instructions.">;
 def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
                                 "Enable 'signext in register' instructions.">;
 def FeatureCondMov     : SubtargetFeature<"condmov", "HasCondMov", "true",
                                 "Enable 'conditional move' instructions.">;
-def FeatureMulDivAdd   : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
-                                "Enable 'multiply add/sub' instructions.">;
-def FeatureMinMax      : SubtargetFeature<"minmax", "HasMinMax", "true",
-                                "Enable 'min/max' instructions.">;
 def FeatureSwap        : SubtargetFeature<"swap", "HasSwap", "true",
                                 "Enable 'byte/half swap' instructions.">;
 def FeatureBitCount    : SubtargetFeature<"bitcount", "HasBitCount", "true",
                                 "Enable 'count leading bits' instructions.">;
+def FeatureFPIdx       : SubtargetFeature<"FPIdx", "HasFPIdx", "true",
+                                "Enable 'FP indexed load/store' instructions.">;
 def FeatureMips32      : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
                                 "Mips32 ISA Support",
                                 [FeatureCondMov, FeatureBitCount]>;
 def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
                                 "Mips32r2", "Mips32r2 ISA Support",
-                                [FeatureMips32, FeatureSEInReg, FeatureSwap]>;
+                                [FeatureMips32, FeatureSEInReg, FeatureSwap,
+                                 FeatureFPIdx]>;
 def FeatureMips64      : SubtargetFeature<"mips64", "MipsArchVersion",
                                 "Mips64", "Mips64 ISA Support",
                                 [FeatureGP64Bit, FeatureFP64Bit,
-                                 FeatureMips32]>;
+                                 FeatureMips32, FeatureFPIdx]>;
 def FeatureMips64r2    : SubtargetFeature<"mips64r2", "MipsArchVersion",
                                 "Mips64r2", "Mips64r2 ISA Support",
                                 [FeatureMips64, FeatureMips32r2]>;
@@ -81,6 +78,9 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
 def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
                                     "Mips DSP-R2 ASE", [FeatureDSP]>;
 
+def FeatureMicroMips  : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
+                                         "microMips mode">;
+
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 4e6b21feb55d..1bb6fe46295b 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -12,25 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips16FrameLowering.h"
-#include "MipsInstrInfo.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
+#include "Mips16InstrInfo.h"
+#include "MipsInstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
 void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const MipsInstrInfo &TII =
-    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  const Mips16InstrInfo &TII =
+    *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   uint64_t StackSize = MFI->getStackSize();
@@ -38,9 +39,35 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
   // No need to allocate space on the stack.
   if (StackSize == 0 && !MFI->adjustsStack()) return;
 
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  MachineLocation DstML, SrcML;
+
   // Adjust stack.
-  if (isInt<16>(-StackSize))
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+  TII.makeFrame(Mips::SP, StackSize, MBB, MBBI);
+
+  // emit ".cfi_def_cfa_offset StackSize"
+  MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+  BuildMI(MBB, MBBI, dl,
+          TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+  DstML = MachineLocation(MachineLocation::VirtualFP);
+  SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+  Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+  MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+  BuildMI(MBB, MBBI, dl,
+          TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+  DstML = MachineLocation(MachineLocation::VirtualFP, -8);
+  SrcML = MachineLocation(Mips::S1);
+  Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+  DstML = MachineLocation(MachineLocation::VirtualFP, -12);
+  SrcML = MachineLocation(Mips::S0);
+  Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+
+  DstML = MachineLocation(MachineLocation::VirtualFP, -4);
+  SrcML = MachineLocation(Mips::RA);
+  Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
 
   if (hasFP(MF))
     BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0)
@@ -52,8 +79,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
                                  MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const MipsInstrInfo &TII =
-    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  const Mips16InstrInfo &TII =
+    *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
   uint64_t StackSize = MFI->getStackSize();
 
@@ -65,9 +92,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
       .addReg(Mips::S0);
 
   // Adjust stack.
-  if (isInt<16>(StackSize))
-    // assumes stacksize multiple of 8
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+  // assumes stacksize multiple of 8
+  TII.restoreFrame(Mips::SP, StackSize, MBB, MBBI);
 }
 
 bool Mips16FrameLowering::
@@ -113,6 +139,25 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void Mips16FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+
+    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+      Amount = -Amount;
+
+    const Mips16InstrInfo &TII =
+      *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+
+    TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
+  }
+
+  MBB.erase(I);
+}
+
 bool
 Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 01db71e8def5..54fdb7871466 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -20,13 +20,17 @@ namespace llvm {
 class Mips16FrameLowering : public MipsFrameLowering {
 public:
   explicit Mips16FrameLowering(const MipsSubtarget &STI)
-    : MipsFrameLowering(STI) {}
+    : MipsFrameLowering(STI, 8) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..00b3449300c5
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+                               bool HasLo, bool HasHi) {
+  SDNode *Lo = 0, *Hi = 0;
+  SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+                                       N->getOperand(1));
+  SDValue InFlag = SDValue(Mul, 0);
+
+  if (HasLo) {
+    unsigned Opcode = Mips::Mflo16;
+    Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+    InFlag = SDValue(Lo, 1);
+  }
+  if (HasHi) {
+    unsigned Opcode = Mips::Mfhi16;
+    Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+  }
+  return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  if (!MipsFI->globalBaseRegSet())
+    return;
+
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator I = MBB.begin();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+  const TargetRegisterClass *RC =
+    (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+  V0 = RegInfo.createVirtualRegister(RC);
+  V1 = RegInfo.createVirtualRegister(RC);
+  V2 = RegInfo.createVirtualRegister(RC);
+
+  BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+    .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+  BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+    .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+  BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+  BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+    .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  if (!MipsFI->mips16SPAliasRegSet())
+    return;
+
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator I = MBB.begin();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+  BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+    .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+  initGlobalBaseReg(MF);
+  initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+  unsigned Mips16SPAliasReg =
+    MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+  return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+  SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+  if (Parent) {
+    switch (Parent->getOpcode()) {
+      case ISD::LOAD: {
+        LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+        switch (SD->getMemoryVT().getSizeInBits()) {
+        case 8:
+        case 16:
+          AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+            AliasFPReg: getMips16SPAliasReg();
+          return;
+        }
+        break;
+      }
+      case ISD::STORE: {
+        StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+        switch (SD->getMemoryVT().getSizeInBits()) {
+        case 8:
+        case 16:
+          AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+            AliasFPReg: getMips16SPAliasReg();
+          return;
+        }
+        break;
+      }
+    }
+  }
+  AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+  return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+  SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+  SDValue &Alias) {
+  EVT ValTy = Addr.getValueType();
+
+  Alias = CurDAG->getTargetConstant(0, ValTy);
+
+  // if Address is FI, get the TargetFrameIndex.
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+    Offset = CurDAG->getTargetConstant(0, ValTy);
+    getMips16SPRefReg(Parent, Alias);
+    return true;
+  }
+  // on PIC code Load GA
+  if (Addr.getOpcode() == MipsISD::Wrapper) {
+    Base   = Addr.getOperand(0);
+    Offset = Addr.getOperand(1);
+    return true;
+  }
+  if (TM.getRelocationModel() != Reloc::PIC_) {
+    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+        Addr.getOpcode() == ISD::TargetGlobalAddress))
+      return false;
+  }
+  // Addresses of the form FI+const or FI|const
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isInt<16>(CN->getSExtValue())) {
+
+      // If the first operand is a FI, get the TargetFI Node
+      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+                                  (Addr.getOperand(0))) {
+        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+        getMips16SPRefReg(Parent, Alias);
+      }
+      else
+        Base = Addr.getOperand(0);
+
+      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+      return true;
+    }
+  }
+  // Operand is a result from an ADD.
+  if (Addr.getOpcode() == ISD::ADD) {
+    // When loading from constant pools, load the lower address part in
+    // the instruction itself. Example, instead of:
+    //  lui $2, %hi($CPI1_0)
+    //  addiu $2, $2, %lo($CPI1_0)
+    //  lwc1 $f0, 0($2)
+    // Generate:
+    //  lui $2, %hi($CPI1_0)
+    //  lwc1 $f0, %lo($CPI1_0)($2)
+    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+        Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+      SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+      if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+          isa<JumpTableSDNode>(Opnd0)) {
+        Base = Addr.getOperand(0);
+        Offset = Opnd0;
+        return true;
+      }
+    }
+
+    // If an indexed floating point load/store can be emitted, return false.
+    const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+    if (LS &&
+        (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+        Subtarget.hasFPIdx())
+      return false;
+  }
+  Base   = Addr;
+  Offset = CurDAG->getTargetConstant(0, ValTy);
+  return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc DL = Node->getDebugLoc();
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  EVT NodeTy = Node->getValueType(0);
+  unsigned MultOpc;
+
+  switch(Opcode) {
+  default: break;
+
+  case ISD::SUBE:
+  case ISD::ADDE: {
+    SDValue InFlag = Node->getOperand(2), CmpLHS;
+    unsigned Opc = InFlag.getOpcode(); (void)Opc;
+    assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+            (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+           "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+    unsigned MOp;
+    if (Opcode == ISD::ADDE) {
+      CmpLHS = InFlag.getValue(0);
+      MOp = Mips::AdduRxRyRz16;
+    } else {
+      CmpLHS = InFlag.getOperand(0);
+      MOp = Mips::SubuRxRyRz16;
+    }
+
+    SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+
+    EVT VT = LHS.getValueType();
+
+    unsigned Sltu_op = Mips::SltuRxRyRz16;
+    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+    unsigned Addu_op = Mips::AdduRxRyRz16;
+    SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+                                              SDValue(Carry,0), RHS);
+
+    SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+                                          SDValue(AddCarry,0));
+    return std::make_pair(true, Result);
+  }
+
+  /// Mul with two results
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI: {
+    MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+    std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+                                                  true, true);
+    if (!SDValue(Node, 0).use_empty())
+      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+    if (!SDValue(Node, 1).use_empty())
+      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+    return std::make_pair(true, (SDNode*)NULL);
+  }
+
+  case ISD::MULHS:
+  case ISD::MULHU: {
+    MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+    SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+    return std::make_pair(true, Result);
+  }
+  }
+
+  return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+  return new Mips16DAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 000000000000..baa85877d957
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+  explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+  std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+                                         EVT Ty, bool HasLo, bool HasHi);
+
+  SDValue getMips16SPAliasReg();
+
+  void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+  virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+                            SDValue &Offset, SDValue &Alias);
+
+  virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+  virtual void processFunctionAfterISel(MachineFunction &MF);
+
+  // Insert instructions to initialize the global base register in the
+  // first MBB of the function.
+  void initGlobalBaseReg(MachineFunction &MF);
+
+  void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 000000000000..23eb5375ac94
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+                cl::desc("MIPS: mips16 hard float enable."),
+                cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+  "mips16-dont-expand-cond-pseudo",
+  cl::init(false),
+  cl::desc("Dont expand conditional move related "
+           "pseudos for Mips 16"),
+  cl::Hidden);
+
+namespace {
+  std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+  : MipsTargetLowering(TM) {
+  //
+  // set up as if mips32 and then revert so we can test the mechanism
+  // for switching
+  addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+  addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+  computeRegisterProperties();
+  clearRegisterClasses();
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+  if (Mips16HardFloat)
+    setMips16HardFloatLibCalls();
+
+  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
+  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Expand);
+  setOperationAction(ISD::ATOMIC_CMP_SWAP,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_SWAP,        MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_ADD,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_AND,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_OR,     MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_XOR,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_NAND,   MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_MIN,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_MAX,    MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_UMIN,   MVT::i32,   Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_UMAX,   MVT::i32,   Expand);
+
+  computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+  return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+  return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
+  switch (MI->getOpcode()) {
+  default:
+    return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+  case Mips::SelBeqZ:
+    return emitSel16(Mips::BeqzRxImm16, MI, BB);
+  case Mips::SelBneZ:
+    return emitSel16(Mips::BnezRxImm16, MI, BB);
+  case Mips::SelTBteqZCmpi:
+    return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+  case Mips::SelTBteqZSlti:
+    return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+  case Mips::SelTBteqZSltiu:
+    return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+  case Mips::SelTBtneZCmpi:
+    return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+  case Mips::SelTBtneZSlti:
+    return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+  case Mips::SelTBtneZSltiu:
+    return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+  case Mips::SelTBteqZCmp:
+    return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+  case Mips::SelTBteqZSlt:
+    return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+  case Mips::SelTBteqZSltu:
+    return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+  case Mips::SelTBtneZCmp:
+    return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+  case Mips::SelTBtneZSlt:
+    return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+  case Mips::SelTBtneZSltu:
+    return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+  case Mips::BteqzT8CmpX16:
+    return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+  case Mips::BteqzT8SltX16:
+    return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+  case Mips::BteqzT8SltuX16:
+    // TBD: figure out a way to get this or remove the instruction
+    // altogether.
+    return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+  case Mips::BtnezT8CmpX16:
+    return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+  case Mips::BtnezT8SltX16:
+    return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+  case Mips::BtnezT8SltuX16:
+    // TBD: figure out a way to get this or remove the instruction
+    // altogether.
+    return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+  case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+    Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+  case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+    Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+  case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+    Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+  case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+    Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+  case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+    Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+  case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+    Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+    break;
+  case Mips::SltCCRxRy16:
+    return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+    break;
+  case Mips::SltiCCRxImmX16:
+    return emitFEXT_CCRXI16_ins
+      (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+  case Mips::SltiuCCRxImmX16:
+    return emitFEXT_CCRXI16_ins
+      (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+  case Mips::SltuCCRxRy16:
+    return emitFEXT_CCRX16_ins
+      (Mips::SltuRxRy16, MI, BB);
+  }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+                                  unsigned NextStackOffset,
+                                  const MipsFunctionInfo& FI) const {
+  // No tail call optimization for mips16.
+  return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+  (RTLIB::Libcall L, const char *Name) {
+  setLibcallName(L, Name);
+  NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+  setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+  setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+  setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+  setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+  setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+  setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+  setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+  setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+  setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+  setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+  setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+  setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+  setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+  setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+  setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+  setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+  setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+  setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+  setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+  setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+  setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+  setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+  setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+  setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+  setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+  setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+  setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+  setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+  setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+  setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+  setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+  setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+//  complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+//                        0,  1,    5,        9,         2,   6,        10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+//     Arg1x, Arg2x = 1 :  Arg is sf
+//                    2 :  Arg is df
+//                    0:   Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+  (ArgListTy &Args) const {
+  unsigned int resultNum = 0;
+  if (Args.size() >= 1) {
+    Type *t = Args[0].Ty;
+    if (t->isFloatTy()) {
+      resultNum = 1;
+    }
+    else if (t->isDoubleTy()) {
+      resultNum = 2;
+    }
+  }
+  if (resultNum) {
+    if (Args.size() >=2) {
+      Type *t = Args[1].Ty;
+      if (t->isFloatTy()) {
+        resultNum += 4;
+      }
+      else if (t->isDoubleTy()) {
+        resultNum += 8;
+      }
+    }
+  }
+  return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float  sf_
+//              double df_
+//              single complex sc_
+//              double complext dc_
+//              others  NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+//    return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+//  entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+  {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+  { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+  { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+  { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+  { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+  getMips16HelperFunction
+    (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+  const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+  const unsigned int maxStubNum = 10;
+  assert(stubNum <= maxStubNum);
+  const bool validStubNum[maxStubNum+1] =
+    {true, true, true, false, false, true, true, false, false, true, true};
+  assert(validStubNum[stubNum]);
+#endif
+  const char *result;
+  if (RetTy->isFloatTy()) {
+    result = sfMips16Helper[stubNum];
+  }
+  else if (RetTy ->isDoubleTy()) {
+    result = dfMips16Helper[stubNum];
+  }
+  else if (RetTy->isStructTy()) {
+    // check if it's complex
+    if (RetTy->getNumContainedTypes() == 2) {
+      if ((RetTy->getContainedType(0)->isFloatTy()) &&
+          (RetTy->getContainedType(1)->isFloatTy())) {
+        result = scMips16Helper[stubNum];
+      }
+      else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+               (RetTy->getContainedType(1)->isDoubleTy())) {
+        result = dcMips16Helper[stubNum];
+      }
+      else {
+        llvm_unreachable("Uncovered condition");
+      }
+    }
+    else {
+      llvm_unreachable("Uncovered condition");
+    }
+  }
+  else {
+    if (stubNum == 0) {
+      needHelper = false;
+      return "";
+    }
+    result = vMips16Helper[stubNum];
+  }
+  needHelper = true;
+  return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+            std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+            bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+            CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+  SelectionDAG &DAG = CLI.DAG;
+  const char* Mips16HelperFunction = 0;
+  bool NeedMips16Helper = false;
+
+  if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+    //
+    // currently we don't have symbols tagged with the mips16 or mips32
+    // qualifier so we will assume that we don't know what kind it is.
+    // and generate the helper
+    //
+    bool LookupHelper = true;
+    if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+      if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+        LookupHelper = false;
+      }
+    }
+    if (LookupHelper) Mips16HelperFunction =
+      getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+  }
+
+  SDValue JumpTarget = Callee;
+
+  // T9 should contain the address of the callee function if
+  // -reloction-model=pic or it is an indirect call.
+  if (IsPICCall || !GlobalOrExternal) {
+    unsigned V0Reg = Mips::V0;
+    if (NeedMips16Helper) {
+      RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+      JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+      JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+    } else
+      RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+  }
+
+  Ops.push_back(JumpTarget);
+
+  MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+                                  InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB  = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+    .addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+  //  ...
+  BB = sinkMBB;
+
+  BuildMI(*BB, BB->begin(), DL,
+          TII->get(Mips::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+  (unsigned Opc1, unsigned Opc2,
+   MachineInstr *MI, MachineBasicBlock *BB) const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB  = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+    .addReg(MI->getOperand(4).getReg());
+  BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+  //  ...
+  BB = sinkMBB;
+
+  BuildMI(*BB, BB->begin(), DL,
+          TII->get(Mips::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+  (unsigned Opc1, unsigned Opc2,
+   MachineInstr *MI, MachineBasicBlock *BB) const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB  = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+    .addImm(MI->getOperand(4).getImm());
+  BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+  //  ...
+  BB = sinkMBB;
+
+  BuildMI(*BB, BB->begin(), DL,
+          TII->get(Mips::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+
+}
+
+MachineBasicBlock
+  *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+                                             MachineInstr *MI,
+                                             MachineBasicBlock *BB) const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  unsigned regX = MI->getOperand(0).getReg();
+  unsigned regY = MI->getOperand(1).getReg();
+  MachineBasicBlock *target = MI->getOperand(2).getMBB();
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+  unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+  MachineInstr *MI,  MachineBasicBlock *BB) const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  unsigned regX = MI->getOperand(0).getReg();
+  int64_t imm = MI->getOperand(1).getImm();
+  MachineBasicBlock *target = MI->getOperand(2).getMBB();
+  unsigned CmpOpc;
+  if (isUInt<8>(imm))
+    CmpOpc = CmpiOpc;
+  else if (isUInt<16>(imm))
+    CmpOpc = CmpiXOpc;
+  else
+    llvm_unreachable("immediate field not usable");
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+  (unsigned shortOp, unsigned longOp, int64_t Imm) {
+  if (isUInt<8>(Imm))
+    return shortOp;
+  else if (isInt<16>(Imm))
+    return longOp;
+  else
+    llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+  unsigned SltOpc,
+  MachineInstr *MI,  MachineBasicBlock *BB) const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  unsigned CC = MI->getOperand(0).getReg();
+  unsigned regX = MI->getOperand(1).getReg();
+  unsigned regY = MI->getOperand(2).getReg();
+  BuildMI(*BB, MI, MI->getDebugLoc(),
+		  TII->get(SltOpc)).addReg(regX).addReg(regY);
+  BuildMI(*BB, MI, MI->getDebugLoc(),
+          TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+  unsigned SltiOpc, unsigned SltiXOpc,
+  MachineInstr *MI,  MachineBasicBlock *BB )const {
+  if (DontExpandCondPseudos16)
+    return BB;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  unsigned CC = MI->getOperand(0).getReg();
+  unsigned regX = MI->getOperand(1).getReg();
+  int64_t Imm = MI->getOperand(2).getImm();
+  unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+  BuildMI(*BB, MI, MI->getDebugLoc(),
+          TII->get(SltOpc)).addReg(regX).addImm(Imm);
+  BuildMI(*BB, MI, MI->getDebugLoc(),
+          TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+
+}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 000000000000..b23e2a1f37db
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+  class Mips16TargetLowering : public MipsTargetLowering  {
+  public:
+    explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+    virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  private:
+    virtual bool
+    isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+                                      unsigned NextStackOffset,
+                                      const MipsFunctionInfo& FI) const;
+
+    void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+    void setMips16HardFloatLibCalls();
+
+    unsigned int
+      getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+    const char *getMips16HelperFunction
+      (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+    virtual void
+    getOpndList(SmallVectorImpl<SDValue> &Ops,
+                std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+                bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+                CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+    MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+                                 MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+                                   MachineInstr *MI,
+                                   MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+                                  MachineInstr *MI,
+                                  MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+                                           MachineInstr *MI,
+                                           MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitFEXT_T8I8I16_ins(
+      unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+      MachineInstr *MI,  MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitFEXT_CCRX16_ins(
+      unsigned SltOpc,
+      MachineInstr *MI,  MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitFEXT_CCRXI16_ins(
+      unsigned SltiOpc, unsigned SltiXOpc,
+      MachineInstr *MI,  MachineBasicBlock *BB )const;
+  };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index 61602b62fb44..4ff62ef3b6f9 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -29,45 +29,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Format specifies the encoding used by the instruction.  This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-//
-class Format16<bits<5> val> {
-  bits<5> Value = val;
-}
-
-def Pseudo16          : Format16<0>;
-def FrmI16            : Format16<1>;
-def FrmRI16           : Format16<2>;
-def FrmRR16           : Format16<3>;
-def FrmRRI16          : Format16<4>;
-def FrmRRR16          : Format16<5>;
-def FrmRRI_A16        : Format16<6>;
-def FrmSHIFT16        : Format16<7>;
-def FrmI8_TYPE16      : Format16<8>;
-def FrmI8_MOVR3216    : Format16<9>;
-def FrmI8_MOV32R16    : Format16<10>;
-def FrmI8_SVRS16      : Format16<11>;
-def FrmJAL16          : Format16<12>;
-def FrmJALX16         : Format16<13>;
-def FrmEXT_I16        : Format16<14>;
-def FrmASMACRO16      : Format16<15>;
-def FrmEXT_RI16       : Format16<16>;
-def FrmEXT_RRI16      : Format16<17>;
-def FrmEXT_RRI_A16    : Format16<18>;
-def FrmEXT_SHIFT16    : Format16<19>;
-def FrmEXT_I816       : Format16<20>;
-def FrmEXT_I8_SVRS16  : Format16<21>;
-def FrmOther16        : Format16<22>; // Instruction w/ a custom format
 
 // Base class for Mips 16 Format
 // This class does not depend on the instruction size
 //
 class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
-                      InstrItinClass itin, Format16 f>: Instruction
+                      InstrItinClass itin>: Instruction
 {
-  Format16 Form = f;
 
   let Namespace = "Mips";
 
@@ -78,14 +46,6 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
   let Pattern     = pattern;
   let Itinerary   = itin;
 
-  //
-  // Attributes specific to Mips instructions...
-  //
-  bits<5> FormBits = Form.Value;
-
-  // TSFlags layout should be kept in sync with MipsInstrInfo.h.
-  let TSFlags{4-0}   = FormBits;
-
   let Predicates = [InMips16Mode];
 }
 
@@ -93,30 +53,35 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
 // Generic Mips 16 Format
 //
 class MipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
-                 InstrItinClass itin, Format16 f>:
-  MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+                 InstrItinClass itin>:
+  MipsInst16_Base<outs, ins, asmstr, pattern, itin>
 {
   field bits<16> Inst;
   bits<5> Opcode = 0;
 
   // Top 5 bits are the 'opcode' field
   let Inst{15-11} = Opcode;
+  
+  let Size=2;
+  field bits<16> SoftFail = 0;
 }
 
 //
 // For 32 bit extended instruction forms.
 //
 class MipsInst16_32<dag outs, dag ins, string asmstr, list<dag> pattern,
-                    InstrItinClass itin, Format16 f>:
-  MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+                    InstrItinClass itin>:
+  MipsInst16_Base<outs, ins, asmstr, pattern, itin>
 {
   field bits<32> Inst;
-
+  
+  let Size=4;
+  field bits<32> SoftFail = 0;
 }
 
 class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
-                        InstrItinClass itin, Format16 f>:
-  MipsInst16_32<outs, ins, asmstr, pattern, itin, f>
+                        InstrItinClass itin>:
+  MipsInst16_32<outs, ins, asmstr, pattern, itin>
 {
   let Inst{31-27} = 0b11110;
 }
@@ -125,7 +90,7 @@ class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Mips Pseudo Instructions Format
 class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
-  MipsInst16<outs, ins, asmstr, pattern, IIPseudo, Pseudo16> {
+  MipsInst16<outs, ins, asmstr, pattern, IIPseudo> {
   let isCodeGenOnly = 1;
   let isPseudo = 1;
 }
@@ -137,7 +102,7 @@ class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
 
 class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
            InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmI16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<11> imm11;
 
@@ -152,7 +117,7 @@ class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 
 class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
             list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRI16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<8>   imm8;
@@ -169,7 +134,7 @@ class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
 
 class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
             list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<3>  ry;
@@ -188,7 +153,7 @@ class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
 //
 class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
                string asmstr, list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<3>  subfunct;
@@ -208,7 +173,7 @@ class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
 //
 class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
            list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<6>  _code;  // code is a keyword in tablegen
   bits<5>  funct;
@@ -226,7 +191,7 @@ class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
 class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
                   dag outs, dag ins, string asmstr,
                   list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<1>  nd;
@@ -252,7 +217,7 @@ class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
 
 class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
              list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<3>  ry;
@@ -272,7 +237,7 @@ class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
 
 class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
              list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRR16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<3>  ry;
@@ -294,7 +259,7 @@ class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
 
 class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
                list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI_A16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<3>  ry;
@@ -316,7 +281,7 @@ class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
 
 class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
                list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmSHIFT16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  rx;
   bits<3>  ry;
@@ -338,7 +303,7 @@ class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
 
 class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
             list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_TYPE16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<3>  func;
   bits<8>   imm8;
@@ -356,7 +321,7 @@ class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
 
 class FI8_MOVR3216<dag outs, dag ins, string asmstr,
                    list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOVR3216>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
 
   bits<4> ry;
@@ -378,7 +343,7 @@ class FI8_MOVR3216<dag outs, dag ins, string asmstr,
 
 class FI8_MOV32R16<dag outs, dag ins, string asmstr,
                    list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOV32R16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
 
   bits<3>  func;
@@ -402,7 +367,7 @@ class FI8_MOV32R16<dag outs, dag ins, string asmstr,
 
 class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
                  list<dag> pattern, InstrItinClass itin>:
-  MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+  MipsInst16<outs, ins, asmstr, pattern, itin>
 {
   bits<1> s;
   bits<1> ra = 0;
@@ -429,7 +394,7 @@ class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
 
 class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
              list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_32<outs, ins, asmstr, pattern, itin, FrmJAL16>
+  MipsInst16_32<outs, ins, asmstr, pattern, itin>
 {
   bits<1> X;
   bits<26> imm26;
@@ -452,7 +417,7 @@ class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
 
 class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
                list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<16> imm16;
   bits<5> eop;
@@ -474,7 +439,7 @@ class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
 
 class FASMACRO16<dag outs, dag ins, string asmstr,
                  list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmASMACRO16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<3> select;
   bits<3> p4;
@@ -503,7 +468,7 @@ class FASMACRO16<dag outs, dag ins, string asmstr,
 
 class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
                 list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RI16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<16> imm16;
   bits<5> op;
@@ -527,7 +492,7 @@ class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
 
 class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
                  list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<5> op;
   bits<16> imm16;
@@ -552,7 +517,7 @@ class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
 
 class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
                    list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI_A16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<15> imm15;
   bits<3> rx;
@@ -578,7 +543,7 @@ class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
 
 class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
                    list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_SHIFT16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<6> sa6;
   bits<3> rx;
@@ -605,7 +570,7 @@ class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
 
 class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
                 list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I816>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<16> imm16;
   bits<5> I8;
@@ -630,7 +595,7 @@ class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
 
 class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
                      list<dag> pattern, InstrItinClass itin>:
-  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+  MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
 {
   bits<3> xsregs =0;
   bits<8> framesize =0;
@@ -659,5 +624,3 @@ class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
 
 }
 
-
-
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 619646b3178a..17dd2c07967a 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -12,18 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips16InstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
 
 using namespace llvm;
 
+static cl::opt<bool> NeverUseSaveRestore(
+  "mips16-never-use-save-restore",
+  cl::init(false),
+  cl::desc("For testing ability to adjust stack pointer "
+           "without save/restore instruction"),
+  cl::Hidden);
+
+
 Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
   : MipsInstrInfo(tm, Mips::BimmX16),
     RI(*tm.getSubtargetImpl(), *this) {}
@@ -87,10 +98,10 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 }
 
 void Mips16InstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC,
-                    const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                unsigned SrcReg, bool isKill, int FI,
+                const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+                int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -99,14 +110,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = Mips::SwRxSpImmX16;
   assert(Opc && "Register class not handled!");
   BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
-    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 }
 
 void Mips16InstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const {
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                 unsigned DestReg, int FI, const TargetRegisterClass *RC,
+                 const TargetRegisterInfo *TRI, int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -115,13 +125,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (Mips::CPU16RegsRegClass.hasSubClassEq(RC))
     Opc = Mips::LwRxSpImmX16;
   assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
     .addMemOperand(MMO);
 }
 
 bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   MachineBasicBlock &MBB = *MI->getParent();
-
   switch(MI->getDesc().getOpcode()) {
   default:
     return false;
@@ -160,20 +169,215 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
   return 0;
 }
 
+// Adjust SP by FrameSize bytes. Save RA, S0, S1
+void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
+                    MachineBasicBlock &MBB,
+                    MachineBasicBlock::iterator I) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  if (!NeverUseSaveRestore) {
+    if (isUInt<11>(FrameSize))
+      BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize);
+    else {
+      int Base = 2040; // should create template function like isUInt that
+                       // returns largest possible n bit unsigned integer
+      int64_t Remainder = FrameSize - Base;
+      BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base);
+      if (isInt<16>(-Remainder))
+        BuildAddiuSpImm(MBB, I, -Remainder);
+      else
+        adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1);
+    }
+
+  }
+  else {
+    //
+    // sw ra, -4[sp]
+    // sw s1, -8[sp]
+    // sw s0, -12[sp]
+
+    MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+                                       Mips::RA);
+    MIB1.addReg(Mips::SP);
+    MIB1.addImm(-4);
+    MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+                                       Mips::S1);
+    MIB2.addReg(Mips::SP);
+    MIB2.addImm(-8);
+    MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+                                       Mips::S0);
+    MIB3.addReg(Mips::SP);
+    MIB3.addImm(-12);
+    adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1);
+  }
+}
+
+// Adjust SP by FrameSize bytes. Restore RA, S0, S1
+void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize,
+                                   MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  if (!NeverUseSaveRestore) {
+    if (isUInt<11>(FrameSize))
+      BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize);
+    else {
+      int Base = 2040; // should create template function like isUInt that
+                       // returns largest possible n bit unsigned integer
+      int64_t Remainder = FrameSize - Base;
+      if (isInt<16>(Remainder))
+        BuildAddiuSpImm(MBB, I, Remainder);
+      else
+        adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1);
+      BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base);
+    }
+  }
+  else {
+    adjustStackPtrBig(SP, FrameSize, MBB, I, Mips::A0, Mips::A1);
+    // lw ra, -4[sp]
+    // lw s1, -8[sp]
+    // lw s0, -12[sp]
+    MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+                                       Mips::A0);
+    MIB1.addReg(Mips::SP);
+    MIB1.addImm(-4);
+    MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+                                       Mips::RA);
+     MIB0.addReg(Mips::A0);
+    MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+                                       Mips::S1);
+    MIB2.addReg(Mips::SP);
+    MIB2.addImm(-8);
+    MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+                                       Mips::S0);
+    MIB3.addReg(Mips::SP);
+    MIB3.addImm(-12);
+  }
+
+}
+
+// Adjust SP by Amount bytes where bytes can be up to 32bit number.
+// This can only be called at times that we know that there is at least one free
+// register.
+// This is clearly safe at prologue and epilogue.
+//
+void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
+                                        MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned Reg1, unsigned Reg2) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+//  MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+//  unsigned Reg1 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+//  unsigned Reg2 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass);
+  //
+  // li reg1, constant
+  // move reg2, sp
+  // add reg1, reg1, reg2
+  // move sp, reg1
+  //
+  //
+  MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwConstant32), Reg1);
+  MIB1.addImm(Amount);
+  MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::MoveR3216), Reg2);
+  MIB2.addReg(Mips::SP, RegState::Kill);
+  MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1);
+  MIB3.addReg(Reg1);
+  MIB3.addReg(Reg2, RegState::Kill);
+  MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+                                                     Mips::SP);
+  MIB4.addReg(Reg1, RegState::Kill);
+}
+
+void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+                    MachineBasicBlock &MBB,
+                    MachineBasicBlock::iterator I) const {
+   assert(false && "adjust stack pointer amount exceeded");
+}
+
 /// Adjust SP by Amount bytes.
 void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const {
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-  if (isInt<16>(Amount)) {
-    if (Amount < 0)
-      BuildMI(MBB, I, DL, get(Mips::SaveDecSpF16)). addImm(-Amount);
-    else if (Amount > 0)
-      BuildMI(MBB, I, DL, get(Mips::RestoreIncSpF16)).addImm(Amount);
+  if (isInt<16>(Amount))  // need to change to addiu sp, ....and isInt<16>
+    BuildAddiuSpImm(MBB, I, Amount);
+  else
+    adjustStackPtrBigUnrestricted(SP, Amount, MBB, I);
+}
+
+/// This function generates the sequence of instructions needed to get the
+/// result of adding register REG and immediate IMM.
+unsigned
+Mips16InstrInfo::loadImmediate(unsigned FrameReg,
+                               int64_t Imm, MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator II, DebugLoc DL,
+                               unsigned &NewImm) const {
+  //
+  // given original instruction is:
+  // Instr rx, T[offset] where offset is too big.
+  //
+  // lo = offset & 0xFFFF
+  // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF;
+  //
+  // let T = temporary register
+  // li T, hi
+  // shl T, 16
+  // add T, Rx, T
+  //
+  RegScavenger rs;
+  int32_t lo = Imm & 0xFFFF;
+  int32_t hi = ((Imm >> 16) + (lo >> 15)) & 0xFFFF;
+  NewImm = lo;
+  unsigned Reg =0;
+  unsigned SpReg = 0;
+  rs.enterBasicBlock(&MBB);
+  rs.forward(II);
+  //
+  // we use T0 for the first register, if we need to save something away.
+  // we use T1 for the second register, if we need to save something away.
+  //
+  unsigned FirstRegSaved =0, SecondRegSaved=0;
+  unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0;
+
+  Reg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+  if (Reg == 0) {
+    FirstRegSaved = Reg = Mips::V0;
+    FirstRegSavedTo = Mips::T0;
+    copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true);
   }
   else
-    // not implemented for large values yet
-    assert(false && "adjust stack pointer amount exceeded");
+    rs.setUsed(Reg);
+  BuildMI(MBB, II, DL, get(Mips::LiRxImmX16), Reg).addImm(hi);
+  BuildMI(MBB, II, DL, get(Mips::SllX16), Reg).addReg(Reg).
+    addImm(16);
+  if (FrameReg == Mips::SP) {
+    SpReg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+    if (SpReg == 0) {
+      if (Reg != Mips::V1) {
+        SecondRegSaved = SpReg = Mips::V1;
+        SecondRegSavedTo = Mips::T1;
+      }
+      else {
+        SecondRegSaved = SpReg = Mips::V0;
+        SecondRegSavedTo = Mips::T0;
+      }
+      copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
+    }
+    else
+      rs.setUsed(SpReg);
+
+    copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
+    BuildMI(MBB, II, DL, get(Mips::  AdduRxRyRz16), Reg).addReg(SpReg)
+      .addReg(Reg);
+  }
+  else
+    BuildMI(MBB, II, DL, get(Mips::  AdduRxRyRz16), Reg).addReg(FrameReg)
+      .addReg(Reg, RegState::Kill);
+  if (FirstRegSaved || SecondRegSaved) {
+    II = llvm::next(II);
+    if (FirstRegSaved)
+      copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true);
+    if (SecondRegSaved)
+      copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true);
+  }
+  return Reg;
 }
 
 unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
@@ -194,6 +398,20 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
   BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
 }
 
+
+const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
+  if (validSpImm8(Imm))
+    return get(Mips::AddiuSpImm16);
+  else
+    return get(Mips::AddiuSpImmX16);
+}
+
+void Mips16InstrInfo::BuildAddiuSpImm
+  (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
+}
+
 const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
   return new Mips16InstrInfo(TM);
 }
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index e06ccfe61c52..a77a9043bb17 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -14,8 +14,8 @@
 #ifndef MIPS16INSTRUCTIONINFO_H
 #define MIPS16INSTRUCTIONINFO_H
 
-#include "MipsInstrInfo.h"
 #include "Mips16RegisterInfo.h"
+#include "MipsInstrInfo.h"
 
 namespace llvm {
 
@@ -48,31 +48,75 @@ public:
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
 
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
+  virtual void storeRegToStack(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI,
+                               unsigned SrcReg, bool isKill, int FrameIndex,
+                               const TargetRegisterClass *RC,
+                               const TargetRegisterInfo *TRI,
+                               int64_t Offset) const;
 
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
+  virtual void loadRegFromStack(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                unsigned DestReg, int FrameIndex,
+                                const TargetRegisterClass *RC,
+                                const TargetRegisterInfo *TRI,
+                                int64_t Offset) const;
 
   virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
 
   virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
 
+  // Adjust SP by FrameSize bytes. Save RA, S0, S1
+  void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I) const;
+
+  // Adjust SP by FrameSize bytes. Restore RA, S0, S1
+  void restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I) const;
+
+
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
                       MachineBasicBlock::iterator I) const;
 
+  /// Emit a series of instructions to load an immediate.
+  // This is to adjust some FrameReg. We return the new register to be used
+  // in place of FrameReg and the adjusted immediate field (&NewImm)
+  //
+  unsigned loadImmediate(unsigned FrameReg,
+                         int64_t Imm, MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator II, DebugLoc DL,
+                         unsigned &NewImm) const;
+
+  static bool validSpImm8(int offset) {
+    return ((offset & 7) == 0) && isInt<11>(offset);
+  }
+
+  //
+  // build the proper one based on the Imm field
+  //
+
+  const MCInstrDesc& AddiuSpImm(int64_t Imm) const;
+
+  void BuildAddiuSpImm
+    (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+
 private:
   virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
 
   void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    unsigned Opc) const;
+
+  // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+  void adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator I,
+                         unsigned Reg1, unsigned Reg2) const;
+
+  // Adjust SP by Amount bytes where bytes can be up to 32bit number.
+  void adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
 };
 
 }
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 5defc75ea6ef..aa51aaf46565 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -15,7 +15,7 @@
 // Mips Address
 //
 def addr16 :
-  ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>;
+  ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
 
 //
 // Address operand
@@ -32,18 +32,76 @@ def mem16_ea : Operand<i32> {
 }
 
 //
+//
+// I8 instruction format
+//
+
+class FI816_ins_base<bits<3> _func, string asmstr,
+                     string asmstr2, InstrItinClass itin>:
+  FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
+        [], itin>;
+
+
+class FI816_SP_ins<bits<3> _func, string asmstr,
+                   InstrItinClass itin>:
+  FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
+
+//
+// RI instruction format
+//
+
+
+class FRI16_ins_base<bits<5> op, string asmstr, string asmstr2,
+                     InstrItinClass itin>:
+  FRI16<op, (outs CPU16Regs:$rx), (ins simm16:$imm),
+        !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16_ins<bits<5> op, string asmstr,
+                InstrItinClass itin>:
+  FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
+                     InstrItinClass itin>:
+  FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
+        !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16R_ins<bits<5> op, string asmstr,
+                InstrItinClass itin>:
+  FRI16R_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class F2RI16_ins<bits<5> _op, string asmstr,
+                     InstrItinClass itin>:
+  FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+        !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin> {
+  let Constraints = "$rx_ = $rx";
+}
+
+class FRI16_B_ins<bits<5> _op, string asmstr,
+                  InstrItinClass itin>:
+  FRI16<_op, (outs), (ins  CPU16Regs:$rx, brtarget:$imm),
+        !strconcat(asmstr, "\t$rx, $imm  # 16 bit inst"), [], itin>;
+//
 // Compare a register and immediate and place result in CC
 // Implicit use of T8
 //
 // EXT-CCRR Instruction format
 //
-class FEXT_CCRXI16_ins<bits<5> _op, string asmstr,
-                       InstrItinClass itin>:
-  FEXT_RI16<_op, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
-            !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), [], itin> {
+class FEXT_CCRXI16_ins<string asmstr>:
+  MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
+               !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> {
   let isCodeGenOnly=1;
+  let usesCustomInserter = 1;
 }
 
+// JAL and JALX instruction format
+//
+class FJAL16_ins<bits<1> _X, string asmstr,
+                 InstrItinClass itin>:
+  FJAL16<_X, (outs), (ins simm20:$imm),
+         !strconcat(asmstr, "\t$imm\n\tnop"),[],
+         itin>  {
+  let isCodeGenOnly=1;
+}
 //
 // EXT-I instruction format
 //
@@ -57,13 +115,17 @@ class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> :
 
 class FEXT_I816_ins_base<bits<3> _func, string asmstr,
                          string asmstr2, InstrItinClass itin>:
-  FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2),
+  FEXT_I816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
             [], itin>;
 
 class FEXT_I816_ins<bits<3> _func, string asmstr,
                     InstrItinClass itin>:
   FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>;
 
+class FEXT_I816_SP_ins<bits<3> _func, string asmstr,
+                       InstrItinClass itin>:
+      FEXT_I816_ins_base<_func, asmstr, "\t$$sp, $imm", itin>;
+
 //
 // Assembler formats in alphabetical order.
 // Natural and pseudos are mixed together.
@@ -73,10 +135,11 @@ class FEXT_I816_ins<bits<3> _func, string asmstr,
 //
 // CC-RR Instruction format
 //
-class FCCRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
-  FRR16<f, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
-        !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), [], itin> {
+class FCCRR16_ins<string asmstr> :
+  MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+               !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> {
   let isCodeGenOnly=1;
+  let usesCustomInserter = 1;
 }
 
 //
@@ -92,6 +155,15 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
                     InstrItinClass itin>:
   FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
 
+class FEXT_RI16R_ins_base<bits<5> _op, string asmstr, string asmstr2,
+                         InstrItinClass itin>:
+  FEXT_RI16<_op, (outs ), (ins CPU16Regs:$rx, simm16:$imm),
+                  !strconcat(asmstr, asmstr2), [], itin>;
+
+class FEXT_RI16R_ins<bits<5> _op, string asmstr,
+                    InstrItinClass itin>:
+  FEXT_RI16R_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+
 class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
   FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
 
@@ -149,25 +221,25 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
 //
 // EXT-T8I8
 //
-class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2,
-                      InstrItinClass itin>:
-  FEXT_I816<_func, (outs),
-            (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
-            !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
-            !strconcat(asmstr, "\t$imm"))),[], itin> {
+class FEXT_T8I816_ins<string asmstr, string asmstr2>:
+  MipsPseudo16<(outs),
+               (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
+               !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
+               !strconcat(asmstr, "\t$imm"))),[]> {
   let isCodeGenOnly=1;
+  let usesCustomInserter = 1;
 }
 
 //
 // EXT-T8I8I
 //
-class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2,
-                       InstrItinClass itin>:
-  FEXT_I816<_func, (outs),
-            (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
-            !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
-            !strconcat(asmstr, "\t$targ"))), [], itin> {
+class FEXT_T8I8I16_ins<string asmstr, string asmstr2>:
+  MipsPseudo16<(outs),
+               (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
+               !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
+               !strconcat(asmstr, "\t$targ"))), []> {
   let isCodeGenOnly=1;
+  let usesCustomInserter = 1;
 }
 //
 
@@ -215,9 +287,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
         !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
 }
 
-class FRRTR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
-  FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
-        !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), [], itin> ;
+class FRR16R_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+  FRR16<f, (outs), (ins  CPU16Regs:$rx, CPU16Regs:$ry),
+        !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRRTR16_ins<string asmstr> :
+  MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+               !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ;
 
 //
 // maybe refactor but need a $zero as a dummy first parameter
@@ -253,7 +330,7 @@ class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
 
 class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra,
                       string asmstr, InstrItinClass itin>:
-  FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx), 
+  FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
               !strconcat(asmstr, "\t $rx"), [], itin> ;
 
 //
@@ -292,13 +369,13 @@ class FRRR16_ins<bits<2> _f, string asmstr,  InstrItinClass itin> :
 //
 // So this pseudo class only has one operand, i.e. op
 //
-class Sel<bits<5> f1, string op, InstrItinClass itin>:
-  MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
-                CPU16Regs:$rt),
-                !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin,
-                Pseudo16> {
-  let isCodeGenOnly=1;
+class Sel<string op>:
+  MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+               CPU16Regs:$rt),
+               !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> {
+  //let isCodeGenOnly=1;
   let Constraints = "$rd = $rd_";
+  let usesCustomInserter = 1;
 }
 
 //
@@ -316,16 +393,15 @@ class Sel<bits<5> f1, string op, InstrItinClass itin>:
 // move $rd, $rs
 //
 //
-class SeliT<bits<5> f1, string op1, bits<5> f2, string op2,
-                 InstrItinClass itin>:
-  MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
-                                        CPU16Regs:$rl, simm16:$imm),
-                 !strconcat(op2,
-                 !strconcat("\t$rl, $imm\n\t",
-                 !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin,
-                 Pseudo16> {
+class SeliT<string op1, string op2>:
+  MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+                                       CPU16Regs:$rl, simm16:$imm),
+               !strconcat(op2,
+               !strconcat("\t$rl, $imm\n\t",
+               !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
   let isCodeGenOnly=1;
   let Constraints = "$rd = $rd_";
+  let usesCustomInserter = 1;
 }
 
 //
@@ -340,18 +416,30 @@ class SeliT<bits<5> f1, string op1, bits<5> f2, string op2,
 // move $rd, $rs
 //
 //
-class SelT<bits<5> f1, string op1, bits<5> f2, string op2,
-           InstrItinClass itin>:
-  MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+class SelT<string op1, string op2>:
+  MipsPseudo16<(outs CPU16Regs:$rd_),
+               (ins CPU16Regs:$rd, CPU16Regs:$rs,
                 CPU16Regs:$rl, CPU16Regs:$rr),
-                !strconcat(op2,
-                !strconcat("\t$rl, $rr\n\t",
-                !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin,
-                Pseudo16> {
+               !strconcat(op2,
+               !strconcat("\t$rl, $rr\n\t",
+               !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
   let isCodeGenOnly=1;
   let Constraints = "$rd = $rd_";
+  let usesCustomInserter = 1;
 }
 
+//
+// 32 bit constant
+//
+def imm32: Operand<i32>;
+
+def Constant32:
+  MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
+
+def LwConstant32:
+  MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm),
+    "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
+
 
 //
 // Some general instruction class info
@@ -385,14 +473,21 @@ class MayStore {
 }
 //
 
+
 // Format: ADDIU rx, immediate MIPS16e
 // Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
 // To add a constant to a 32-bit integer.
 //
 def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
 
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+  ArithLogic16Defs<0> {
+  let AddedComplexity = 5;
+}
 def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
-  ArithLogic16Defs<0>;
+  ArithLogic16Defs<0> {
+  let isCodeGenOnly = 1;
+}
 
 def AddiuRxRyOffMemX16:
   FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
@@ -404,6 +499,25 @@ def AddiuRxRyOffMemX16:
 // To add a constant to the program counter.
 //
 def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+
+//
+// Format: ADDIU sp, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended)
+// To add a constant to the stack pointer.
+//
+def AddiuSpImm16
+  : FI816_SP_ins<0b011, "addiu", IIAlu> {
+  let Defs = [SP];
+  let Uses = [SP];
+  let AddedComplexity = 5;
+}
+
+def AddiuSpImmX16
+  : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+  let Defs = [SP];
+  let Uses = [SP];
+}
+
 //
 // Format: ADDU rz, rx, ry MIPS16e
 // Purpose: Add Unsigned Word (3-Operand)
@@ -422,6 +536,14 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
 
 //
 // Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
 // Purpose: Branch on Equal to Zero (Extended)
 // To test a GPR then do a PC-relative conditional branch.
 //
@@ -435,6 +557,13 @@ def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
 
 //
 // Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
 // Purpose: Branch on Not Equal to Zero (Extended)
 // To test a GPR then do a PC-relative conditional branch.
 //
@@ -445,20 +574,22 @@ def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
 // Purpose: Branch on T Equal to Zero (Extended)
 // To test special register T then do a PC-relative conditional branch.
 //
-def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16;
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+  let Uses = [T8];
+}
 
-def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16;
+def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16;
 
-def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>,
+def BteqzT8CmpiX16: FEXT_T8I8I16_ins<"bteqz", "cmpi">,
   cbranch16;
 
-def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16;
+def BteqzT8SltX16: FEXT_T8I816_ins<"bteqz", "slt">, cbranch16;
 
-def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16;
+def BteqzT8SltuX16: FEXT_T8I816_ins<"bteqz", "sltu">, cbranch16;
 
-def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16;
+def BteqzT8SltiX16: FEXT_T8I8I16_ins<"bteqz", "slti">, cbranch16;
 
-def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
+def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
   cbranch16;
 
 //
@@ -466,22 +597,52 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
 // Purpose: Branch on T Not Equal to Zero (Extended)
 // To test special register T then do a PC-relative conditional branch.
 //
-def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16;
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+  let Uses = [T8];
+}
 
-def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16;
+def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16;
 
-def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16;
+def BtnezT8CmpiX16: FEXT_T8I8I16_ins<"btnez", "cmpi">, cbranch16;
 
-def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16;
+def BtnezT8SltX16: FEXT_T8I816_ins<"btnez", "slt">, cbranch16;
 
-def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16;
+def BtnezT8SltuX16: FEXT_T8I816_ins<"btnez", "sltu">, cbranch16;
 
-def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16;
+def BtnezT8SltiX16: FEXT_T8I8I16_ins<"btnez", "slti">, cbranch16;
 
-def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>,
+def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
   cbranch16;
 
 //
+// Format: CMP rx, ry MIPS16e
+// Purpose: Compare
+// To compare the contents of two GPRs.
+//
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+  let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+  let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate (Extended)
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+  let Defs = [T8];
+}
+
+
+//
 // Format: DIV rx, ry MIPS16e
 // Purpose: Divide Word
 // To divide 32-bit signed integers.
@@ -498,7 +659,19 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
 def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
   let Defs = [HI, LO];
 }
+//
+// Format: JAL target MIPS16e
+// Purpose: Jump and Link
+// To execute a procedure call within the current 256 MB-aligned
+// region and preserve the current ISA.
+//
 
+def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+  let isBranch = 1;
+  let hasDelaySlot = 0;  // not true, but we add the nop for now
+  let isTerminator=1;
+  let isBarrier=1;
+}
 
 //
 // Format: JR ra MIPS16e
@@ -515,7 +688,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
   let isBarrier=1;
 }
 
-def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> {
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
   let isBranch = 1;
   let isIndirectBranch = 1;
   let isTerminator=1;
@@ -533,7 +706,9 @@ def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
 // Purpose: Load Byte (Extended)
 // To load a byte from memory as a signed value.
 //
-def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad{
+  let isCodeGenOnly = 1;
+}
 
 //
 // Format: LBU ry, offset(rx) MIPS16e
@@ -541,14 +716,18 @@ def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
 // To load a byte from memory as a unsigned value.
 //
 def LbuRxRyOffMemX16:
-  FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad;
+  FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad {
+  let isCodeGenOnly = 1;
+}
 
 //
 // Format: LH ry, offset(rx) MIPS16e
 // Purpose: Load Halfword signed (Extended)
 // To load a halfword from memory as a signed value.
 //
-def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad{
+  let isCodeGenOnly = 1;
+}
 
 //
 // Format: LHU ry, offset(rx) MIPS16e
@@ -556,7 +735,16 @@ def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
 // To load a halfword from memory as an unsigned value.
 //
 def LhuRxRyOffMemX16:
-  FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad;
+  FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad {
+  let isCodeGenOnly = 1;
+}
+
+//
+// Format: LI rx, immediate MIPS16e
+// Purpose: Load Immediate
+// To load a constant into a GPR.
+//
+def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
 
 //
 // Format: LI rx, immediate MIPS16e
@@ -570,13 +758,17 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
 // Purpose: Load Word (Extended)
 // To load a word from memory as a signed value.
 //
-def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{
+  let isCodeGenOnly = 1;
+}
 
 // Format: LW rx, offset(sp) MIPS16e
 // Purpose: Load Word (SP-Relative, Extended)
 // To load an SP-relative word from memory as a signed value.
 //
-def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad;
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{
+  let Uses = [SP];
+}
 
 //
 // Format: MOVE r32, rz MIPS16e
@@ -688,6 +880,8 @@ def RestoreRaF16:
   FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
              "restore\t$$ra,  $$s0, $$s1, $frame_size", [], IILoad >, MayLoad {
   let isCodeGenOnly = 1;
+  let Defs = [S0, S1, RA, SP];
+  let Uses = [SP];
 }
 
 // Use Restore to increment SP since SP is not a Mip 16 register, this
@@ -698,6 +892,8 @@ def RestoreIncSpF16:
   FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
              "restore\t$frame_size", [], IILoad >, MayLoad {
   let isCodeGenOnly = 1;
+  let Defs = [SP];
+  let Uses = [SP];
 }
 
 //
@@ -712,6 +908,8 @@ def SaveRaF16:
   FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
              "save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore {
   let isCodeGenOnly = 1;
+  let Uses = [RA, SP, S0, S1];
+  let Defs = [SP];
 }
 
 //
@@ -723,6 +921,8 @@ def SaveDecSpF16:
   FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size),
              "save\t$frame_size", [], IIStore >, MayStore {
   let isCodeGenOnly = 1;
+  let Uses = [SP];
+  let Defs = [SP];
 }
 //
 // Format: SB ry, offset(rx) MIPS16e
@@ -741,7 +941,7 @@ def SbRxRyOffMemX16:
 // Purpose: if rt==0, do nothing
 //          else rs = rt
 //
-def SelBeqZ: Sel<0b00100, "beqz", IIAlu>;
+def SelBeqZ: Sel<"beqz">;
 
 //
 // Format:  SelTBteqZCmp rd, rs, rl, rr
@@ -749,7 +949,7 @@ def SelBeqZ: Sel<0b00100, "beqz", IIAlu>;
 //          If b==0 then do nothing.
 //          if b!=0 then rd = rs
 //
-def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>;
+def SelTBteqZCmp: SelT<"bteqz", "cmp">;
 
 //
 // Format:  SelTBteqZCmpi rd, rs, rl, rr
@@ -757,7 +957,7 @@ def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>;
 //          If b==0 then do nothing.
 //          if b!=0 then rd = rs
 //
-def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>;
+def SelTBteqZCmpi: SeliT<"bteqz", "cmpi">;
 
 //
 // Format:  SelTBteqZSlt rd, rs, rl, rr
@@ -765,7 +965,7 @@ def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>;
 //          If b==0 then do nothing.
 //          if b!=0 then rd = rs
 //
-def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>;
+def SelTBteqZSlt: SelT<"bteqz", "slt">;
 
 //
 // Format:  SelTBteqZSlti rd, rs, rl, rr
@@ -773,7 +973,7 @@ def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>;
 //          If b==0 then do nothing.
 //          if b!=0 then rd = rs
 //
-def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>;
+def SelTBteqZSlti: SeliT<"bteqz", "slti">;
 
 //
 // Format:  SelTBteqZSltu rd, rs, rl, rr
@@ -781,7 +981,7 @@ def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>;
 //          If b==0 then do nothing.
 //          if b!=0 then rd = rs
 //
-def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>;
+def SelTBteqZSltu: SelT<"bteqz", "sltu">;
 
 //
 // Format:  SelTBteqZSltiu rd, rs, rl, rr
@@ -789,14 +989,14 @@ def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>;
 //          If b==0 then do nothing.
 //          if b!=0 then rd = rs
 //
-def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>;
+def SelTBteqZSltiu: SeliT<"bteqz", "sltiu">;
 
 //
 // Format: SelBnez rd, rs, rt
 // Purpose: if rt!=0, do nothing
 //          else rs = rt
 //
-def SelBneZ: Sel<0b00101, "bnez", IIAlu>;
+def SelBneZ: Sel<"bnez">;
 
 //
 // Format:  SelTBtneZCmp rd, rs, rl, rr
@@ -804,7 +1004,7 @@ def SelBneZ: Sel<0b00101, "bnez", IIAlu>;
 //          If b!=0 then do nothing.
 //          if b0=0 then rd = rs
 //
-def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>;
+def SelTBtneZCmp: SelT<"btnez", "cmp">;
 
 //
 // Format:  SelTBtnezCmpi rd, rs, rl, rr
@@ -812,7 +1012,7 @@ def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>;
 //          If b!=0 then do nothing.
 //          if b==0 then rd = rs
 //
-def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>;
+def SelTBtneZCmpi: SeliT<"btnez", "cmpi">;
 
 //
 // Format:  SelTBtneZSlt rd, rs, rl, rr
@@ -820,7 +1020,7 @@ def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>;
 //          If b!=0 then do nothing.
 //          if b==0 then rd = rs
 //
-def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>;
+def SelTBtneZSlt: SelT<"btnez", "slt">;
 
 //
 // Format:  SelTBtneZSlti rd, rs, rl, rr
@@ -828,7 +1028,7 @@ def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>;
 //          If b!=0 then do nothing.
 //          if b==0 then rd = rs
 //
-def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>;
+def SelTBtneZSlti: SeliT<"btnez", "slti">;
 
 //
 // Format:  SelTBtneZSltu rd, rs, rl, rr
@@ -836,7 +1036,7 @@ def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>;
 //          If b!=0 then do nothing.
 //          if b==0 then rd = rs
 //
-def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>;
+def SelTBtneZSltu: SelT<"btnez", "sltu">;
 
 //
 // Format:  SelTBtneZSltiu rd, rs, rl, rr
@@ -844,7 +1044,7 @@ def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>;
 //          If b!=0 then do nothing.
 //          if b==0 then rd = rs
 //
-def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>;
+def SelTBtneZSltiu: SeliT<"btnez", "sltiu">;
 //
 //
 // Format: SH ry, offset(rx) MIPS16e
@@ -868,39 +1068,78 @@ def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
 //
 def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
 
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+  let Defs = [T8];
+}
+
 //
 // Format: SLTI rx, immediate MIPS16e
 // Purpose: Set on Less Than Immediate (Extended)
 // To record the result of a less-than comparison with a constant.
 //
-def SltiCCRxImmX16: FEXT_CCRXI16_ins<0b01010, "slti", IIAlu>;
+//
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+  let Defs = [T8];
+}
 
+def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
+
+// Format: SLTIU rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+  let Defs = [T8];
+}
+
+//
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned (Extended)
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+  let Defs = [T8];
+}
 //
 // Format: SLTIU rx, immediate MIPS16e
 // Purpose: Set on Less Than Immediate Unsigned (Extended)
 // To record the result of a less-than comparison with a constant.
 //
-def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>;
+def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
 
 //
 // Format: SLT rx, ry MIPS16e
 // Purpose: Set on Less Than
 // To record the result of a less-than comparison.
 //
-def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>;
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+  let Defs = [T8];
+}
 
-def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>;
+def SltCCRxRy16: FCCRR16_ins<"slt">;
 
 // Format: SLTU rx, ry MIPS16e
 // Purpose: Set on Less Than Unsigned
 // To record the result of an unsigned less-than comparison.
 //
-def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> {
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+  let Defs = [T8];
+}
+
+def SltuRxRyRz16: FRRTR16_ins<"sltu"> {
   let isCodeGenOnly=1;
+  let Defs = [T8];
 }
 
 
-def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>;
+def SltuCCRxRy16: FCCRR16_ins<"sltu">;
 //
 // Format: SRAV ry, rx MIPS16e
 // Purpose: Shift Word Right Arithmetic Variable
@@ -996,6 +1235,7 @@ class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
   Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
             (I CPU16Regs:$in, imm_type:$imm)>;
 
+def: ArithLogicI16_pat<add, immSExt8, AddiuRxRxImm16>;
 def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
 def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
 def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
@@ -1029,14 +1269,19 @@ def: StoreM16_pat<store, SwRxRyOffMemX16>;
 // Unconditional branch
 class UncondBranch16_pat<SDNode OpNode, Instruction I>:
   Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> {
-    let Predicates = [RelocPIC, InMips16Mode];
+    let Predicates = [InMips16Mode];
   }
 
+def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+                (Jal16 tglobaladdr:$dst)>;
+
+def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+                (Jal16 texternalsym:$dst)>;
+
 // Indirect branch
 def: Mips16Pat<
-  (brind CPU16Regs:$rs), 
-  (JrcRx16 CPU16Regs:$rs)>;  
-
+  (brind CPU16Regs:$rs),
+  (JrcRx16 CPU16Regs:$rs)>;
 
 // Jump and Link (Call)
 let isCall=1, hasDelaySlot=0 in
@@ -1221,14 +1466,14 @@ def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
 // MipsDivRem
 //
 def: Mips16Pat
-  <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+  <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry),
    (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
 
 //
 // MipsDivRemU
 //
 def: Mips16Pat
-  <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+  <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry),
    (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
 
 //  signed a,b
@@ -1464,7 +1709,7 @@ def: Mips16Pat
 //
 def: Mips16Pat
   <(setle CPU16Regs:$lhs, CPU16Regs:$rhs),
-   (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>;
+   (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImm16 1))>;
 
 //
 // setlt
@@ -1524,7 +1769,11 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
 
 // hi/lo relocs
 
-def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), 
+def : Mips16Pat<(MipsHi tglobaladdr:$in),
+                (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>;
+def : Mips16Pat<(MipsHi tjumptable:$in),
+                (SllX16 (LiRxImmX16 tjumptable:$in), 16)>;
+def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
                 (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>;
 
 // wrapper_pic
@@ -1539,4 +1788,4 @@ def : Wrapper16Pat<tglobaltlsaddr, AddiuRxRxImmX16, CPU16Regs>;
 def : Mips16Pat<(i32 (extloadi8   addr16:$src)),
                 (LbuRxRyOffMemX16  addr16:$src)>;
 def : Mips16Pat<(i32 (extloadi16  addr16:$src)),
-                (LhuRxRyOffMemX16  addr16:$src)>;
-\ No newline at end of file
+                (LhuRxRyOffMemX16  addr16:$src)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index d7397a32f074..6cca2276856d 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,3 +1,4 @@
+
 //===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -14,28 +15,30 @@
 #include "Mips16RegisterInfo.h"
 #include "Mips16InstrInfo.h"
 #include "Mips.h"
+#include "Mips16InstrInfo.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
-#include "MipsSubtarget.h"
 #include "MipsMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -43,25 +46,36 @@ Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
     const Mips16InstrInfo &I)
   : MipsRegisterInfo(ST), TII(I) {}
 
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void Mips16RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
+bool Mips16RegisterInfo::requiresRegisterScavenging
+  (const MachineFunction &MF) const {
+  return true;
+}
+bool Mips16RegisterInfo::requiresFrameIndexScavenging
+  (const MachineFunction &MF) const {
+  return true;
+}
 
-    const Mips16InstrInfo *II = static_cast<const Mips16InstrInfo*>(&TII);
+bool Mips16RegisterInfo::useFPForScavengingIndex
+  (const MachineFunction &MF) const {
+  return false;
+}
 
-    II->adjustStackPtr(Mips::SP, Amount, MBB, I);
-  }
+bool Mips16RegisterInfo::saveScavengerRegister
+  (MachineBasicBlock &MBB,
+   MachineBasicBlock::iterator I,
+   MachineBasicBlock::iterator &UseMI,
+   const TargetRegisterClass *RC,
+   unsigned Reg) const {
+  DebugLoc DL;
+  TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
+  TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
+  return true;
+}
 
-  MBB.erase(I);
+const TargetRegisterClass *
+Mips16RegisterInfo::intRegClass(unsigned Size) const {
+  assert(Size == 4);
+  return &Mips::CPU16RegsRegClass;
 }
 
 void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
@@ -114,13 +128,23 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
   //   by adding the size of the stack:
   //   incoming argument, callee-saved register location or local variable.
   int64_t Offset;
+  bool IsKill = false;
   Offset = SPOffset + (int64_t)StackSize;
   Offset += MI.getOperand(OpNo + 1).getImm();
 
 
   DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
 
-  MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+  if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) ||
+      ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) {
+    MachineBasicBlock &MBB = *MI.getParent();
+    DebugLoc DL = II->getDebugLoc();
+    unsigned NewImm;
+    FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
+    Offset = SignExtend64<16>(NewImm);
+    IsKill = true;
+  }
+  MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
   MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
 
 
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 153def20d085..2b3d2b1a4ecb 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -22,11 +22,23 @@ class Mips16InstrInfo;
 class Mips16RegisterInfo : public MipsRegisterInfo {
   const Mips16InstrInfo &TII;
 public:
-  Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII);
+  Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+                     const Mips16InstrInfo &TII);
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+  bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
+  bool saveScavengerRegister(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     MachineBasicBlock::iterator &UseMI,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Reg) const;
+
+  virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
 private:
   virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
                            int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 83322eac8c62..846a8224af35 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -34,192 +34,202 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
-// Shifts
-// 64-bit shift instructions.
 let DecoderNamespace = "Mips64" in {
-class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm,
-                         SDNode OpNode>:
-  shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt,
-                   CPU64Regs>;
-
-// Mul, Div
-class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>:
-  Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
-class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
-  Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
-
-multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
-  def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>,
-               Requires<[IsN64, HasStandardEncoding]> {
+
+multiclass Atomic2Ops64<PatFrag Op> {
+  def NAME : Atomic2Ops<Op, CPU64Regs, CPURegs>,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Atomic2Ops<Op, CPU64Regs, CPU64Regs>,
+             Requires<[IsN64, HasStdEnc]> {
     let isCodeGenOnly = 1;
   }
 }
 
-multiclass AtomicCmpSwap64<PatFrag Op, string Width>  {
-  def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
-               Requires<[IsN64, HasStandardEncoding]> {
+multiclass AtomicCmpSwap64<PatFrag Op>  {
+  def NAME : AtomicCmpSwap<Op, CPU64Regs, CPURegs>,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : AtomicCmpSwap<Op, CPU64Regs, CPU64Regs>,
+             Requires<[IsN64, HasStdEnc]> {
     let isCodeGenOnly = 1;
   }
 }
 }
-let usesCustomInserter = 1, Predicates = [HasMips64, HasStandardEncoding],
+let usesCustomInserter = 1, Predicates = [HasStdEnc],
   DecoderNamespace = "Mips64" in {
-  defm ATOMIC_LOAD_ADD_I64  : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
-  defm ATOMIC_LOAD_SUB_I64  : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
-  defm ATOMIC_LOAD_AND_I64  : Atomic2Ops64<atomic_load_and_64, "load_and_64">;
-  defm ATOMIC_LOAD_OR_I64   : Atomic2Ops64<atomic_load_or_64, "load_or_64">;
-  defm ATOMIC_LOAD_XOR_I64  : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">;
-  defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">;
-  defm ATOMIC_SWAP_I64      : Atomic2Ops64<atomic_swap_64, "swap_64">;
-  defm ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap64<atomic_cmp_swap_64, "64">;
+  defm ATOMIC_LOAD_ADD_I64  : Atomic2Ops64<atomic_load_add_64>;
+  defm ATOMIC_LOAD_SUB_I64  : Atomic2Ops64<atomic_load_sub_64>;
+  defm ATOMIC_LOAD_AND_I64  : Atomic2Ops64<atomic_load_and_64>;
+  defm ATOMIC_LOAD_OR_I64   : Atomic2Ops64<atomic_load_or_64>;
+  defm ATOMIC_LOAD_XOR_I64  : Atomic2Ops64<atomic_load_xor_64>;
+  defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>;
+  defm ATOMIC_SWAP_I64      : Atomic2Ops64<atomic_swap_64>;
+  defm ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap64<atomic_cmp_swap_64>;
+}
+
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+  defm LOAD_AC128  : LoadM<"load_ac128", ACRegs128>;
+  defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
 }
 
+def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
+
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
 let DecoderNamespace = "Mips64" in {
 /// Arithmetic Instructions (ALU Immediate)
-def DADDi    : ArithOverflowI<0x18, "daddi", add, simm16_64, immSExt16,
-                           CPU64Regs>;
-def DADDiu   : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16,
-                           CPU64Regs>, IsAsCheapAsAMove;
-def DANDi    : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>;
-def SLTi64   : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
-def SLTiu64  : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
-def ORi64    : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
-def XORi64   : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
-def LUi64    : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>;
+def DADDi   : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>;
+def DADDiu  : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>,
+              ADDI_FM<0x19>, IsAsCheapAsAMove;
+def DANDi   : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>,
+              ADDI_FM<0xc>;
+def SLTi64  : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>,
+              SLTI_FM<0xa>;
+def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>,
+              SLTI_FM<0xb>;
+def ORi64   : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>,
+              ADDI_FM<0xd>;
+def XORi64  : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>,
+              ADDI_FM<0xe>;
+def LUi64   : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
-def DADD     : ArithOverflowR<0x00, 0x2C, "dadd", IIAlu, CPU64Regs, 1>;
-def DADDu    : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
-def DSUBu    : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>;
-def SLT64    : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>;
-def SLTu64   : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>;
-def AND64    : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>;
-def OR64     : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>;
-def XOR64    : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
-def NOR64    : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
+def DADD   : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>;
+def DADDu  : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>,
+                              ADD_FM<0, 0x2d>;
+def DSUBu  : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>,
+                              ADD_FM<0, 0x2f>;
+def SLT64  : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>;
+def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>;
+def AND64  : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR64   : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR64  : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR64  : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>;
 
 /// Shift Instructions
-def DSLL     : shift_rotate_imm64<0x38, 0x00, "dsll", shl>;
-def DSRL     : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>;
-def DSRA     : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>;
-def DSLLV    : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>;
-def DSRLV    : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>;
-def DSRAV    : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>;
-let Pattern = []<dag> in {
-  def DSLL32   : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>;
-  def DSRL32   : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>;
-  def DSRA32   : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>;
-}
+def DSLL   : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>,
+             SRA_FM<0x38, 0>;
+def DSRL   : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>,
+             SRA_FM<0x3a, 0>;
+def DSRA   : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>,
+             SRA_FM<0x3b, 0>;
+def DSLLV  : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>;
+def DSRLV  : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>;
+def DSRAV  : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>;
 }
 // Rotate Instructions
-let Predicates = [HasMips64r2, HasStandardEncoding],
+let Predicates = [HasMips64r2, HasStdEnc],
     DecoderNamespace = "Mips64" in {
-  def DROTR    : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
-  def DROTRV   : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
+  def DROTR  : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>,
+               SRA_FM<0x3a, 1>;
+  def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>,
+               SRLV_FM<0x16, 1>;
 }
 
 let DecoderNamespace = "Mips64" in {
 /// Load and Store Instructions
 ///  aligned
-defm LB64    : LoadM64<0x20, "lb",  sextloadi8>;
-defm LBu64   : LoadM64<0x24, "lbu", zextloadi8>;
-defm LH64    : LoadM64<0x21, "lh",  sextloadi16>;
-defm LHu64   : LoadM64<0x25, "lhu", zextloadi16>;
-defm LW64    : LoadM64<0x23, "lw",  sextloadi32>;
-defm LWu64   : LoadM64<0x27, "lwu", zextloadi32>;
-defm SB64    : StoreM64<0x28, "sb", truncstorei8>;
-defm SH64    : StoreM64<0x29, "sh", truncstorei16>;
-defm SW64    : StoreM64<0x2b, "sw", truncstorei32>;
-defm LD      : LoadM64<0x37, "ld",  load>;
-defm SD      : StoreM64<0x3f, "sd", store>;
+defm LB64  : LoadM<"lb", CPU64Regs, sextloadi8>, LW_FM<0x20>;
+defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8>, LW_FM<0x24>;
+defm LH64  : LoadM<"lh", CPU64Regs, sextloadi16>, LW_FM<0x21>;
+defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16>, LW_FM<0x25>;
+defm LW64  : LoadM<"lw", CPU64Regs, sextloadi32>, LW_FM<0x23>;
+defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32>, LW_FM<0x27>;
+defm SB64  : StoreM<"sb", CPU64Regs, truncstorei8>, LW_FM<0x28>;
+defm SH64  : StoreM<"sh", CPU64Regs, truncstorei16>, LW_FM<0x29>;
+defm SW64  : StoreM<"sw", CPU64Regs, truncstorei32>, LW_FM<0x2b>;
+defm LD    : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>;
+defm SD    : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>;
 
 /// load/store left/right
-let isCodeGenOnly = 1 in {
-  defm LWL64 : LoadLeftRightM64<0x22, "lwl", MipsLWL>;
-  defm LWR64 : LoadLeftRightM64<0x26, "lwr", MipsLWR>;
-  defm SWL64 : StoreLeftRightM64<0x2a, "swl", MipsSWL>;
-  defm SWR64 : StoreLeftRightM64<0x2e, "swr", MipsSWR>;
-}
-defm LDL   : LoadLeftRightM64<0x1a, "ldl", MipsLDL>;
-defm LDR   : LoadLeftRightM64<0x1b, "ldr", MipsLDR>;
-defm SDL   : StoreLeftRightM64<0x2c, "sdl", MipsSDL>;
-defm SDR   : StoreLeftRightM64<0x2d, "sdr", MipsSDR>;
+defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>;
+defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>;
+defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>;
+defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>;
+
+defm LDL   : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>;
+defm LDR   : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>;
+defm SDL   : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>;
+defm SDR   : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>;
 
 /// Load-linked, Store-conditional
-def LLD    : LLBase<0x34, "lld", CPU64Regs, mem>,
-             Requires<[NotN64, HasStandardEncoding]>;
-def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>,
-             Requires<[IsN64, HasStandardEncoding]> {
-  let isCodeGenOnly = 1;
+let Predicates = [NotN64, HasStdEnc] in {
+  def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>;
+  def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>;
 }
-def SCD    : SCBase<0x3c, "scd", CPU64Regs, mem>,
-             Requires<[NotN64, HasStandardEncoding]>;
-def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>,
-             Requires<[IsN64, HasStandardEncoding]> {
-  let isCodeGenOnly = 1;
+
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
+  def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>;
+  def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>;
 }
 
 /// Jump and Branch Instructions
-def JR64   : IndirectBranch<CPU64Regs>;
-def BEQ64  : CBranch<0x04, "beq", seteq, CPU64Regs>;
-def BNE64  : CBranch<0x05, "bne", setne, CPU64Regs>;
-def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
-def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
-def BLEZ64 : CBranchZero<0x06, 0, "blez", setle, CPU64Regs>;
-def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
+def JR64   : IndirectBranch<CPU64Regs>, MTLO_FM<8>;
+def BEQ64  : CBranch<"beq", seteq, CPU64Regs>, BEQ_FM<4>;
+def BNE64  : CBranch<"bne", setne, CPU64Regs>, BEQ_FM<5>;
+def BGEZ64 : CBranchZero<"bgez", setge, CPU64Regs>, BGEZ_FM<1, 1>;
+def BGTZ64 : CBranchZero<"bgtz", setgt, CPU64Regs>, BGEZ_FM<7, 0>;
+def BLEZ64 : CBranchZero<"blez", setle, CPU64Regs>, BGEZ_FM<6, 0>;
+def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>;
 }
 let DecoderNamespace = "Mips64" in
-def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>;
-def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, IsTailCall;
+def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM;
+def JALR64Pseudo : JumpLinkRegPseudo<CPU64Regs, JALR64, RA_64>;
+def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
 
 let DecoderNamespace = "Mips64" in {
 /// Multiply and Divide Instructions.
-def DMULT    : Mult64<0x1c, "dmult", IIImul>;
-def DMULTu   : Mult64<0x1d, "dmultu", IIImul>;
-def DSDIV    : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
-def DUDIV    : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
-
-def MTHI64 : MoveToLOHI<0x11, "mthi", CPU64Regs, [HI64]>;
-def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>;
-def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>;
-def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>;
+def DMULT  : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+             MULT_FM<0, 0x1c>;
+def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+             MULT_FM<0, 0x1d>;
+def PseudoDMULT  : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult,
+                                 IIImul>;
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu,
+                                 IIImul>;
+def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem,
+                                IIIdiv, 0>;
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU,
+                                IIIdiv, 0>;
+
+def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
+def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>;
+def MFHI64 : MoveFromLOHI<"mfhi", CPU64Regs, [HI64]>, MFLO_FM<0x10>;
+def MFLO64 : MoveFromLOHI<"mflo", CPU64Regs, [LO64]>, MFLO_FM<0x12>;
 
 /// Sign Ext In Register Instructions.
-def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>;
-def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>;
+def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>;
+def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>;
 
 /// Count Leading
-def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>;
-def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
+def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>;
+def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>;
 
 /// Double Word Swap Bytes/HalfWords
-def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
-def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
+def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>;
+def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>;
+
+def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>;
 
-def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
 }
-let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
-                 Requires<[IsN64, HasStandardEncoding]>;
 let DecoderNamespace = "Mips64" in {
-def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM;
 
-def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>;
 let Pattern = []<dag> in {
-  def DEXTU : ExtBase<2, "dextu", CPU64Regs>;
-  def DEXTM : ExtBase<1, "dextm", CPU64Regs>;
+  def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>;
+  def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>;
 }
-def DINS : InsBase<7, "dins", CPU64Regs>;
+def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>;
 let Pattern = []<dag> in {
-  def DINSU : InsBase<6, "dinsu", CPU64Regs>;
-  def DINSM : InsBase<5, "dinsm", CPU64Regs>;
+  def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>;
+  def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>;
 }
 
 let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -236,13 +246,13 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
 //===----------------------------------------------------------------------===//
 
 // extended loads
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def : MipsPat<(i64 (extloadi1  addr:$src)), (LB64 addr:$src)>;
   def : MipsPat<(i64 (extloadi8  addr:$src)), (LB64 addr:$src)>;
   def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
   def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
 }
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
   def : MipsPat<(i64 (extloadi1  addr:$src)), (LB64_P8 addr:$src)>;
   def : MipsPat<(i64 (extloadi8  addr:$src)), (LB64_P8 addr:$src)>;
   def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>;
@@ -293,14 +303,10 @@ defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
 
-// select MipsDynAlloc
-def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>,
-      Requires<[IsN64, HasStandardEncoding]>;
-
 // truncate
 def : MipsPat<(i32 (trunc CPU64Regs:$src)),
               (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
-      Requires<[IsN64, HasStandardEncoding]>;
+      Requires<[IsN64, HasStdEnc]>;
 
 // 32-to-64-bit extension
 def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
@@ -314,37 +320,73 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
 // bswap MipsPattern
 def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
 
+// mflo/hi patterns.
+def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
+              (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
+
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>;
-
+def : InstAlias<"move $dst, $src",
+                (DADDu CPU64RegsOpnd:$dst,  CPU64RegsOpnd:$src, ZERO_64), 1>,
+      Requires<[HasMips64]>;
+def : InstAlias<"move $dst, $src",
+                (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
+      Requires<[HasMips64]>;
+def : InstAlias<"and $rs, $rt, $imm",
+                (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+                1>,
+      Requires<[HasMips64]>;
+def : InstAlias<"slt $rs, $rt, $imm",
+                (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm), 1>,
+      Requires<[HasMips64]>;
+def : InstAlias<"xor $rs, $rt, $imm",
+                (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+                1>,
+      Requires<[HasMips64]>;
+def : InstAlias<"not $rt, $rs",
+                (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>,
+      Requires<[HasMips64]>;
+def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>;
+def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>,
+      Requires<[HasMips64]>;
+def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>,
+                 Requires<[HasMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>,
+                 Requires<[HasMips64]>;
+def : InstAlias<"daddu $rs, $rt, $imm",
+                (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+                1>;
+def : InstAlias<"dadd $rs, $rt, $imm",
+                (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+                1>;
+def : InstAlias<"or $rs, $rt, $imm",
+                (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+                1>, Requires<[HasMips64]>;
 /// Move between CPU and coprocessor registers
+
 let DecoderNamespace = "Mips64" in {
-def MFC0_3OP64  : MFC3OP<0x10, 0, (outs CPU64Regs:$rt), 
-                       (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
-def MTC0_3OP64  : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel),
-                       (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">;
-def MFC2_3OP64  : MFC3OP<0x12, 0, (outs CPU64Regs:$rt),
-                       (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
-def MTC2_3OP64  : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel),
-                       (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">;
-def DMFC0_3OP64  : MFC3OP<0x10, 1, (outs CPU64Regs:$rt), 
-                       (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">;
-def DMTC0_3OP64  : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel),
-                       (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">;
-def DMFC2_3OP64  : MFC3OP<0x12, 1, (outs CPU64Regs:$rt),
-                       (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">;
-def DMTC2_3OP64  : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel),
-                       (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">;
+def DMFC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+                         (ins CPU64RegsOpnd:$rd, uimm16:$sel),
+                         "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
+def DMTC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+                         (ins CPU64RegsOpnd:$rt),
+                         "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
+def DMFC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+                         (ins CPU64RegsOpnd:$rd, uimm16:$sel),
+                         "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
+def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+                         (ins CPU64RegsOpnd:$rt),
+                         "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
 }
+
 // Two operand (implicit 0 selector) versions:
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+def : InstAlias<"dmfc0 $rt, $rd",
+                (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
+def : InstAlias<"dmtc0 $rt, $rd",
+                (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
+def : InstAlias<"dmfc2 $rt, $rd",
+                (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
+def : InstAlias<"dmtc2 $rt, $rd",
+                (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index bf2818d61df0..1876cb6ffae4 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,31 +13,33 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-asm-printer"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsELFStreamer.h"
 #include "Mips.h"
 #include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
 #include "MipsMCInstLower.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 
@@ -65,19 +67,28 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
-  // Do any auto-generated pseudo lowerings.
-  if (emitPseudoExpansionLowering(OutStreamer, MI))
-    return;
-
   MachineBasicBlock::const_instr_iterator I = MI;
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
 
   do {
-    MCInst TmpInst0;
-    MCInstLowering.Lower(I++, TmpInst0);
+    // Do any auto-generated pseudo lowerings.
+    if (emitPseudoExpansionLowering(OutStreamer, &*I))
+      continue;
+
+    // The inMips16Mode() test is not permanent.
+    // Some instructions are marked as pseudo right now which
+    // would make the test fail for the wrong reason but
+    // that will be fixed soon. We need this here because we are
+    // removing another test for this situation downstream in the
+    // callchain.
+    //
+    if (I->isPseudo() && !Subtarget->inMips16Mode())
+      llvm_unreachable("Pseudo opcode found in EmitInstruction()");
 
+    MCInst TmpInst0;
+    MCInstLowering.Lower(I, TmpInst0);
     OutStreamer.EmitInstruction(TmpInst0);
-  } while ((I != E) && I->isInsideBundle()); // Delay slot check
+  } while ((++I != E) && I->isInsideBundle()); // Delay slot check
 }
 
 //===----------------------------------------------------------------------===//
@@ -139,7 +150,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
     if (Mips::CPURegsRegClass.contains(Reg))
       break;
 
-    unsigned RegNum = getMipsRegisterNumbering(Reg);
+    unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
     if (Mips::AFGR64RegClass.contains(Reg)) {
       FPUBitmask |= (3 << RegNum);
       CSFPRegsSize += AFGR64RegSize;
@@ -154,7 +165,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
   // Set CPU Bitmask.
   for (; i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = getMipsRegisterNumbering(Reg);
+    unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
     CPUBitmask |= (1 << RegNum);
   }
 
@@ -221,6 +232,11 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
     // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips"));
     OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
   }
+
+  if (Subtarget->inMicroMipsMode())
+    if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+      MES->emitMipsSTOCG(*Subtarget, CurrentFnSym,
+      (unsigned)ELF::STO_MIPS_MICROMIPS);
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
@@ -236,10 +252,11 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
     raw_svector_ostream OS(Str);
     printSavedRegsBitmask(OS);
     OutStreamer.EmitRawText(OS.str());
-
-    OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
-    OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
-    OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+    if (!Subtarget->inMips16Mode()) {
+      OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
+      OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+      OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+    }
   }
 }
 
@@ -250,9 +267,11 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
   // always be at the function end, and we can't emit and
   // break with BB logic.
   if (OutStreamer.hasRawTextSupport()) {
-    OutStreamer.EmitRawText(StringRef("\t.set\tat"));
-    OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
-    OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+    if (!Subtarget->inMips16Mode()) {
+      OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+      OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+      OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+    }
     OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
   }
 }
@@ -540,6 +559,18 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // return to previous section
   if (OutStreamer.hasRawTextSupport())
     OutStreamer.EmitRawText(StringRef("\t.previous"));
+
+}
+
+void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+  if (OutStreamer.hasRawTextSupport()) return;
+
+  // Emit Mips ELF register info
+  Subtarget->getMReginfo().emitMipsReginfoSectionCG(
+             OutStreamer, getObjFileLowering(), *Subtarget);
+  if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+    MES->emitELFHeaderFlagsCG(*Subtarget);
 }
 
 MachineLocation
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 94d8bfa10569..dbdaf266b75f 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -14,8 +14,8 @@
 #ifndef MIPSASMPRINTER_H
 #define MIPSASMPRINTER_H
 
-#include "MipsMachineFunction.h"
 #include "MipsMCInstLower.h"
+#include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/Support/Compiler.h"
@@ -80,6 +80,7 @@ public:
   void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                        const char *Modifier = 0);
   void EmitStartOfAsmFile(Module &M);
+  void EmitEndOfAsmFile(Module &M);
   virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 };
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 78cf140def60..462def76cc80 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -96,6 +96,12 @@ def RetCC_MipsN : CallingConv<[
   CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>>
 ]>;
 
+// In soft-mode, register A0_64, instead of V1_64, is used to return a long
+// double value.
+def RetCC_F128Soft : CallingConv<[
+  CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>>
+]>;
+
 //===----------------------------------------------------------------------===//
 // Mips EABI Calling Convention
 //===----------------------------------------------------------------------===//
@@ -139,17 +145,6 @@ def RetCC_MipsEABI : CallingConv<[
 ]>;
 
 //===----------------------------------------------------------------------===//
-// Mips Android Calling Convention
-//===----------------------------------------------------------------------===//
-
-def RetCC_MipsAndroid : CallingConv<[
-  // f32 are returned in registers F0, F2, F1, F3
-  CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
-
-  CCDelegateTo<RetCC_MipsO32>
-]>;
-
-//===----------------------------------------------------------------------===//
 // Mips FastCC Calling Convention
 //===----------------------------------------------------------------------===//
 def CC_MipsO32_FastCC : CallingConv<[
@@ -209,7 +204,6 @@ def RetCC_Mips : CallingConv<[
   CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
   CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
   CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
-  CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
   CCDelegateTo<RetCC_MipsO32>
 ]>;
 
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 05090b84dece..1d86d903c12e 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -14,22 +14,23 @@
 
 #define DEBUG_TYPE "jit"
 #include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsInstrInfo.h"
 #include "MipsRelocations.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -62,67 +63,77 @@ class MipsCodeEmitter : public MachineFunctionPass {
 
   static char ID;
 
-  public:
-    MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
-      MachineFunctionPass(ID), JTI(0),
-      II((const MipsInstrInfo *) tm.getInstrInfo()),
-      TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
-      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
-    }
+public:
+  MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+    : MachineFunctionPass(ID), JTI(0),
+      II((const MipsInstrInfo *) tm.getInstrInfo()), TD(tm.getDataLayout()),
+      TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
 
-    bool runOnMachineFunction(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &MF);
 
-    virtual const char *getPassName() const {
-      return "Mips Machine Code Emitter";
-    }
+  virtual const char *getPassName() const {
+    return "Mips Machine Code Emitter";
+  }
+
+  /// getBinaryCodeForInstr - This function, generated by the
+  /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+  /// machine instructions.
+  uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
 
-    /// getBinaryCodeForInstr - This function, generated by the
-    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
-    /// machine instructions.
-    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+  void emitInstruction(MachineBasicBlock::instr_iterator MI,
+                       MachineBasicBlock &MBB);
 
-    void emitInstruction(const MachineInstr &MI);
+private:
 
-  private:
+  void emitWord(unsigned Word);
 
-    void emitWord(unsigned Word);
+  /// Routines that handle operands which add machine relocations which are
+  /// fixed up by the relocation stage.
+  void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                         bool MayNeedFarStub) const;
+  void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+  void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+  void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+  void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
 
-    /// Routines that handle operands which add machine relocations which are
-    /// fixed up by the relocation stage.
-    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
-                           bool MayNeedFarStub) const;
-    void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
-    void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
-    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
-    void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MachineInstr &MI,
+                             const MachineOperand &MO) const;
 
-    /// getMachineOpValue - Return binary encoding of operand. If the machine
-    /// operand requires relocation, record the relocation and return zero.
-    unsigned getMachineOpValue(const MachineInstr &MI,
-                               const MachineOperand &MO) const;
+  unsigned getRelocation(const MachineInstr &MI,
+                         const MachineOperand &MO) const;
 
-    unsigned getRelocation(const MachineInstr &MI,
-                           const MachineOperand &MO) const;
+  unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
 
-    unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+  unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+  unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
+  unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
+  unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
-    unsigned getBranchTargetOpValue(const MachineInstr &MI,
-                                    unsigned OpNo) const;
-    unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
-    unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+  void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
+                                  int Offset) const;
 
-    void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
-                                    int Offset) const;
-  };
+  /// Expand pseudo instructions with accumulator register operands.
+  void expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+                      MachineBasicBlock &MBB, unsigned Opc) const;
+
+  /// \brief Expand pseudo instruction. Return true if MI was expanded.
+  bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
+                     MachineBasicBlock &MBB) const;
+};
 }
 
 char MipsCodeEmitter::ID = 0;
 
 bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
-  JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo();
-  II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo();
-  TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout();
+  MipsTargetMachine &Target = static_cast<MipsTargetMachine &>(
+                                const_cast<TargetMachine &>(MF.getTarget()));
+
+  JTI = Target.getJITInfo();
+  II = Target.getInstrInfo();
+  TD = Target.getDataLayout();
   Subtarget = &TM.getSubtarget<MipsSubtarget> ();
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = 0;
@@ -139,8 +150,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
         MBB != E; ++MBB){
       MCE.StartMachineBasicBlock(MBB);
       for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
-           E = MBB->instr_end(); I != E; ++I)
-        emitInstruction(*I);
+           E = MBB->instr_end(); I != E;)
+        emitInstruction(*I++, *MBB);
     }
   } while (MCE.finishFunction(MF));
 
@@ -209,7 +220,7 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
 unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                             const MachineOperand &MO) const {
   if (MO.isReg())
-    return getMipsRegisterNumbering(MO.getReg());
+    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
   else if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
   else if (MO.isGlobal())
@@ -265,19 +276,21 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
                                              Reloc, BB));
 }
 
-void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
-  DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
+                                      MachineBasicBlock &MBB) {
+  DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
 
-  MCE.processDebugLoc(MI.getDebugLoc(), true);
-
-  // Skip pseudo instructions.
-  if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+  // Expand pseudo instruction. Skip if MI was not expanded.
+  if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) &&
+      !expandPseudos(MI, MBB))
     return;
 
-  emitWord(getBinaryCodeForInstr(MI));
+  MCE.processDebugLoc(MI->getDebugLoc(), true);
+
+  emitWord(getBinaryCodeForInstr(*MI));
   ++NumEmitted;  // Keep track of the # of mi's emitted
 
-  MCE.processDebugLoc(MI.getDebugLoc(), false);
+  MCE.processDebugLoc(MI->getDebugLoc(), false);
 }
 
 void MipsCodeEmitter::emitWord(unsigned Word) {
@@ -289,6 +302,57 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
     MCE.emitWordBE(Word);
 }
 
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+                                     MachineBasicBlock &MBB,
+                                     unsigned Opc) const {
+  // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
+  BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc))
+    .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg());
+}
+
+bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
+                                    MachineBasicBlock &MBB) const {
+  switch (MI->getOpcode()) {
+  case Mips::NOP:
+    BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
+      .addReg(Mips::ZERO).addImm(0);
+    break;
+  case Mips::JALRPseudo:
+    BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
+      .addReg(MI->getOperand(0).getReg());
+    break;
+  case Mips::PseudoMULT:
+    expandACCInstr(MI, MBB, Mips::MULT);
+    break;
+  case Mips::PseudoMULTu:
+    expandACCInstr(MI, MBB, Mips::MULTu);
+    break;
+  case Mips::PseudoSDIV:
+    expandACCInstr(MI, MBB, Mips::SDIV);
+    break;
+  case Mips::PseudoUDIV:
+    expandACCInstr(MI, MBB, Mips::UDIV);
+    break;
+  case Mips::PseudoMADD:
+    expandACCInstr(MI, MBB, Mips::MADD);
+    break;
+  case Mips::PseudoMADDU:
+    expandACCInstr(MI, MBB, Mips::MADDU);
+    break;
+  case Mips::PseudoMSUB:
+    expandACCInstr(MI, MBB, Mips::MSUB);
+    break;
+  case Mips::PseudoMSUBU:
+    expandACCInstr(MI, MBB, Mips::MSUBU);
+    break;
+  default:
+    return false;
+  }
+
+  (MI--)->eraseFromBundle();
+  return true;
+}
+
 /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
 /// code to the specified MCE object.
 FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index b12b1f2b5ad4..42e4c99f05d6 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -16,42 +16,37 @@
 // MipsISelLowering::EmitInstrWithCustomInserter if target does not have
 // conditional move instructions.
 // cond:int, data:int
-class CondMovIntInt<RegisterClass CRC, RegisterClass DRC, bits<6> funct,
-                    string instr_asm> :
-  FR<0, funct, (outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
-     !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
-  let shamt = 0;
+class CMov_I_I_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+                  InstrItinClass Itin> :
+  InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
+         !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> {
   let Constraints = "$F = $rd";
 }
 
 // cond:int, data:float
-class CondMovIntFP<RegisterClass CRC, RegisterClass DRC, bits<5> fmt,
-                   bits<6> func, string instr_asm> :
-  FFR<0x11, func, fmt, (outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
-      !strconcat(instr_asm, "\t$fd, $fs, $rt"), []> {
-  bits<5> rt;
-  let ft = rt;
+class CMov_I_F_FT<string opstr, RegisterClass CRC, RegisterClass DRC,
+                  InstrItinClass Itin> :
+  InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
+         !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> {
   let Constraints = "$F = $fd";
 }
 
 // cond:float, data:int
-class CondMovFPInt<RegisterClass RC, SDNode cmov, bits<1> tf,
-                   string instr_asm> :
-  FCMOV<tf, (outs RC:$rd), (ins RC:$rs, RC:$F),
-        !strconcat(instr_asm, "\t$rd, $rs, $$fcc0"),
-        [(set RC:$rd, (cmov RC:$rs, RC:$F))]> {
-  let cc = 0;
+class CMov_F_I_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+                  SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs RC:$rd), (ins RC:$rs, RC:$F),
+         !strconcat(opstr, "\t$rd, $rs, $$fcc0"),
+         [(set RC:$rd, (OpNode RC:$rs, RC:$F))], Itin, FrmFR> {
   let Uses = [FCR31];
   let Constraints = "$F = $rd";
 }
 
 // cond:float, data:float
-class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
-                  string instr_asm> :
-  FFCMOV<fmt, tf, (outs RC:$fd), (ins RC:$fs, RC:$F),
-         !strconcat(instr_asm, "\t$fd, $fs, $$fcc0"),
-         [(set RC:$fd, (cmov RC:$fs, RC:$F))]> {
-  let cc = 0;
+class CMov_F_F_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+                  SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs RC:$fd), (ins RC:$fs, RC:$F),
+         !strconcat(opstr, "\t$fd, $fs, $$fcc0"),
+         [(set RC:$fd, (OpNode RC:$fs, RC:$F))], Itin, FrmFR> {
   let Uses = [FCR31];
   let Constraints = "$F = $fd";
 }
@@ -63,21 +58,23 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
                      Instruction SLTiuOp> {
   def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
                 (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
-  def : MipsPat<
-          (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
-          (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
-  def : MipsPat<
-          (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
-          (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
-  def : MipsPat<
-          (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
-          (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
-  def : MipsPat<
-          (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
-          (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
-  def : MipsPat<
-          (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
-          (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+  def : MipsPat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+  def : MipsPat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+  def : MipsPat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+  def : MipsPat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+  def : MipsPat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+  def : MipsPat<(select (i32 (setgt CRC:$lhs, immSExt16Plus1:$rhs)),
+                        DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, (Plus1 imm:$rhs)), DRC:$F)>;
+  def : MipsPat<(select (i32 (setugt CRC:$lhs, immSExt16Plus1:$rhs)),
+                        DRC:$T, DRC:$F),
+                (MOVZInst DRC:$T, (SLTiuOp CRC:$lhs, (Plus1 imm:$rhs)),
+                          DRC:$F)>;
 }
 
 multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
@@ -106,88 +103,110 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
 }
 
 // Instantiation of instructions.
-def MOVZ_I_I     : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
-let Predicates = [HasMips64, HasStandardEncoding],
+def MOVZ_I_I : CMov_I_I_FT<"movz", CPURegs, CPURegs, NoItinerary>,
+               ADD_FM<0, 0xa>;
+let Predicates = [HasStdEnc],
                   DecoderNamespace = "Mips64" in {
-  def MOVZ_I_I64   : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
-  def MOVZ_I64_I   : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> {
+  def MOVZ_I_I64   : CMov_I_I_FT<"movz", CPURegs, CPU64Regs, NoItinerary>,
+                     ADD_FM<0, 0xa>;
+  def MOVZ_I64_I   : CMov_I_I_FT<"movz", CPU64Regs, CPURegs, NoItinerary>,
+                     ADD_FM<0, 0xa> {
     let isCodeGenOnly = 1;
   }
-  def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz"> {
+  def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64Regs, CPU64Regs, NoItinerary>,
+                     ADD_FM<0, 0xa> {
     let isCodeGenOnly = 1;
   }
 }
 
-def MOVN_I_I     : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
-let Predicates = [HasMips64, HasStandardEncoding],
+def MOVN_I_I       : CMov_I_I_FT<"movn", CPURegs, CPURegs, NoItinerary>,
+                     ADD_FM<0, 0xb>;
+let Predicates = [HasStdEnc],
                   DecoderNamespace = "Mips64" in {
-  def MOVN_I_I64   : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
-  def MOVN_I64_I   : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> {
+  def MOVN_I_I64   : CMov_I_I_FT<"movn", CPURegs, CPU64Regs, NoItinerary>,
+                     ADD_FM<0, 0xb>;
+  def MOVN_I64_I   : CMov_I_I_FT<"movn", CPU64Regs, CPURegs, NoItinerary>,
+                     ADD_FM<0, 0xb> {
     let isCodeGenOnly = 1;
   }
-  def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn"> {
+  def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64Regs, CPU64Regs, NoItinerary>,
+                     ADD_FM<0, 0xb> {
     let isCodeGenOnly = 1;
   }
 }
 
-def MOVZ_I_S   : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
-def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
-                 Requires<[HasMips64, HasStandardEncoding]> {
+def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegs, FGR32, IIFmove>,
+               CMov_I_F_FM<18, 16>;
+def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64Regs, FGR32, IIFmove>,
+                 CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
-def MOVN_I_S   : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
-def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
-                 Requires<[HasMips64, HasStandardEncoding]> {
+def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegs, FGR32, IIFmove>,
+               CMov_I_F_FM<19, 16>;
+def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64Regs, FGR32, IIFmove>,
+                 CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
-  def MOVZ_I_D32   : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
-  def MOVN_I_D32   : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+  def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegs, AFGR64, IIFmove>,
+                   CMov_I_F_FM<18, 17>;
+  def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegs, AFGR64, IIFmove>,
+                   CMov_I_F_FM<19, 17>;
 }
-let Predicates = [IsFP64bit, HasStandardEncoding],
+let Predicates = [IsFP64bit, HasStdEnc],
                   DecoderNamespace = "Mips64" in {
-  def MOVZ_I_D64   : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
-  def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> {
+  def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegs, FGR64, IIFmove>,
+                   CMov_I_F_FM<18, 17>;
+  def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64Regs, FGR64, IIFmove>,
+                     CMov_I_F_FM<18, 17> {
     let isCodeGenOnly = 1;
   }
-  def MOVN_I_D64   : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">;
-  def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d"> {
+  def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegs, FGR64, IIFmove>,
+                   CMov_I_F_FM<19, 17>;
+  def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64Regs, FGR64, IIFmove>,
+                     CMov_I_F_FM<19, 17> {
     let isCodeGenOnly = 1;
   }
 }
 
-def MOVT_I   : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
-def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
-               Requires<[HasMips64, HasStandardEncoding]> {
+def MOVT_I : CMov_F_I_FT<"movt", CPURegs, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>;
+def MOVT_I64 : CMov_F_I_FT<"movt", CPU64Regs, IIAlu, MipsCMovFP_T>,
+               CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
-def MOVF_I   : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
-def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
-               Requires<[HasMips64, HasStandardEncoding]> {
+def MOVF_I : CMov_F_I_FT<"movf", CPURegs, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>;
+def MOVF_I64 : CMov_F_I_FT<"movf", CPU64Regs, IIAlu, MipsCMovFP_F>,
+               CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
-def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
-def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+def MOVT_S : CMov_F_F_FT<"movt.s", FGR32, IIFmove, MipsCMovFP_T>,
+             CMov_F_F_FM<16, 1>;
+def MOVF_S : CMov_F_F_FT<"movf.s", FGR32, IIFmove, MipsCMovFP_F>,
+             CMov_F_F_FM<16, 0>;
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
-  def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
-  def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+  def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64, IIFmove, MipsCMovFP_T>,
+                 CMov_F_F_FM<17, 1>;
+  def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64, IIFmove, MipsCMovFP_F>,
+                 CMov_F_F_FM<17, 0>;
 }
-let Predicates = [IsFP64bit, HasStandardEncoding],
+let Predicates = [IsFP64bit, HasStdEnc],
     DecoderNamespace = "Mips64" in {
-  def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
-  def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+  def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64, IIFmove, MipsCMovFP_T>,
+                 CMov_F_F_FM<17, 1>;
+  def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64, IIFmove, MipsCMovFP_F>,
+                 CMov_F_F_FM<17, 0>;
 }
 
 // Instantiation of conditional move patterns.
 defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
 defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
 defm : MovzPats2<CPURegs, CPURegs, MOVZ_I_I, XORi>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
   defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
   defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
                    SLTiu64>;
@@ -202,7 +221,7 @@ let Predicates = [HasMips64, HasStandardEncoding] in {
 }
 
 defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
   defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
   defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
   defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
@@ -211,19 +230,19 @@ let Predicates = [HasMips64, HasStandardEncoding] in {
 defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
 defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
 defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
   defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
                    SLTiu64>;
   defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
   defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
 }
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
   defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
   defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
   defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
 }
-let Predicates = [IsFP64bit, HasStandardEncoding] in {
+let Predicates = [IsFP64bit, HasStdEnc] in {
   defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
   defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
                    SLTiu64>;
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
new file mode 100644
index 000000000000..b5de1ebad22b
--- /dev/null
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -0,0 +1,85 @@
+//===-- MipsConstantIslandPass.cpp - Emit Pc Relative loads----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+// This pass is used to make Pc relative loads of constants.
+// For now, only Mips16 will use this. While it has the same name and
+// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended
+// to reuse any of the code from the ARM version.
+//
+// Loading constants inline is expensive on Mips16 and it's in general better
+// to place the constant nearby in code space and then it can be loaded with a
+// simple 16 bit load instruction.
+//
+// The constants can be not just numbers but addresses of functions and labels.
+// This can be particularly helpful in static relocation mode for embedded
+// non linux targets.
+//
+//
+
+#define DEBUG_TYPE "mips-constant-islands"
+
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+  typedef MachineBasicBlock::iterator Iter;
+  typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+  class MipsConstantIslands : public MachineFunctionPass {
+
+  public:
+    static char ID;
+    MipsConstantIslands(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm),
+        TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())),
+        IsPIC(TM.getRelocationModel() == Reloc::PIC_),
+        ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {}
+
+    virtual const char *getPassName() const {
+      return "Mips Constant Islands";
+    }
+
+    bool runOnMachineFunction(MachineFunction &F);
+
+  private:
+
+
+    const TargetMachine &TM;
+    const MipsInstrInfo *TII;
+    bool IsPIC;
+    unsigned ABI;
+
+  };
+
+  char MipsConstantIslands::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
+  return new MipsConstantIslands(tm);
+}
+
+bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
+  return true;
+}
+
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index 8e01d06596a1..a72a763fde06 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -24,8 +24,9 @@ class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
   let Predicates = [HasDSP];
 }
 
-class PseudoDSP<dag outs, dag ins, list<dag> pattern>:
-  MipsPseudo<outs, ins, "", pattern> {
+class PseudoDSP<dag outs, dag ins, list<dag> pattern,
+                InstrItinClass itin = IIPseudo>:
+  MipsPseudo<outs, ins, pattern, itin> {
   let Predicates = [HasDSP];
 }
 
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index ef9402865b0d..3c116e1264b3 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -20,17 +20,18 @@ def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
 def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
 
 // Mips-specific dsp nodes
-def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
-def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>;
+def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+                                        SDTCisVT<2, untyped>]>;
+def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
+def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+                                       SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
 
 class MipsDSPBase<string Opc, SDTypeProfile Prof> :
-  SDNode<!strconcat("MipsISD::", Opc), Prof,
-         [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+  SDNode<!strconcat("MipsISD::", Opc), Prof>;
 
 class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> :
-  SDNode<!strconcat("MipsISD::", Opc), Prof,
-         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>;
+  SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>;
 
 def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>;
 def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>;
@@ -40,7 +41,7 @@ def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>;
 def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>;
 
 def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>;
-def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>;
+def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>;
 
 def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>;
 def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>;
@@ -75,10 +76,6 @@ def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
 def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
 
 // Flags.
-class IsCommutable {
-  bit isCommutable = 1;
-}
-
 class UseAC {
   list<Register> Uses = [AC0];
 }
@@ -387,7 +384,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                               InstrItinClass itin> {
   dag OutOperandList = (outs CPURegs:$rt);
-  dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs);
+  dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
   list<Register> Defs = [DSPCtrl];
@@ -396,46 +393,40 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                               InstrItinClass itin> {
   dag OutOperandList = (outs CPURegs:$rt);
-  dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs);
+  dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
   list<Register> Defs = [DSPCtrl];
 }
 
-class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                           Instruction realinst> :
-  PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>,
-  PseudoInstExpansion<(realinst AC0, simm16:$shift)> {
-  list<Register> Defs = [DSPCtrl, AC0];
-  list<Register> Uses = [AC0];
-  InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R1_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins simm16:$shift);
+class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+  list<Register> Defs = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
-class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                           Instruction realinst> :
-  PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>,
-  PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> {
-  list<Register> Defs = [DSPCtrl, AC0];
-  list<Register> Uses = [AC0];
-  InstrItinClass Itinerary = itin;
-}
-
-class SHILO_R2_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins CPURegs:$rs);
+class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+  list<Register> Defs = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
-class MTHLIP_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins CPURegs:$rs);
+class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
+  list<Register> Uses = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
 class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -458,39 +449,41 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<Register> Defs = [DSPCtrl];
 }
 
-class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                           Instruction realinst> :
-  PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
-            [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
-  PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
-  list<Register> Defs = [DSPCtrl, AC0];
-  list<Register> Uses = [AC0];
-  InstrItinClass Itinerary = itin;
+class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
+  string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+  list<Register> Defs = [DSPCtrl];
+  string Constraints = "$acin = $ac";
 }
 
-class DPA_W_PH_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
+class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                     InstrItinClass itin> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
   dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
-}
-
-class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin,
-                       Instruction realinst> :
-  PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt),
-            [(OpNode CPURegs:$rs, CPURegs:$rt)]>,
-  PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> {
-  list<Register> Defs = [DSPCtrl, AC0];
+  list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))];
   InstrItinClass Itinerary = itin;
+  int AddedComplexity = 20;
+  bit isCommutable = 1;
 }
 
-class MULT_DESC_BASE<string instr_asm> {
-  dag OutOperandList = (outs ACRegs:$ac);
-  dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt);
+class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                     InstrItinClass itin> {
+  dag OutOperandList = (outs ACRegsDSP:$ac);
+  dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin);
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
+  list<dag> Pattern = [(set ACRegsDSP:$ac,
+                        (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
+  InstrItinClass Itinerary = itin;
+  int AddedComplexity = 20;
+  string Constraints = "$acin = $ac";
 }
 
 class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
-  MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> {
+  MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
   list<Register> Uses = [DSPCtrl];
   bit usesCustomInserter = 1;
 }
@@ -721,44 +714,40 @@ class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
                                           NoItinerary, DSPRegs, DSPRegs>,
                         IsCommutable;
 
-class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">;
+class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
+                                              MipsMULSAQ_S_W_PH>;
 
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
 
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
 
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
 
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
 
 // Dot product with accumulate/subtract
-class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">;
-
-class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">;
+class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
 
-class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">;
+class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>;
 
-class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">;
+class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
 
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">;
+class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
 
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
 
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
 
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
 
-class MULT_DSP_DESC : MULT_DESC_BASE<"mult">;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
 
-class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">;
-
-class MADD_DSP_DESC : MULT_DESC_BASE<"madd">;
-
-class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">;
-
-class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">;
-
-class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">;
+class MULT_DSP_DESC  : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
+class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
+class MADD_DSP_DESC  : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>;
+class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>;
+class MSUB_DSP_DESC  : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>;
+class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
 
 // Comparison
 class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
@@ -871,11 +860,11 @@ class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
 class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
                                                NoItinerary>;
 
-class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">;
+class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
 
-class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">;
+class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
 
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
 
 class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
 
@@ -979,23 +968,25 @@ class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
                        IsCommutable;
 
 // Dot product with accumulate/subtract
-class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">;
+class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
 
-class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">;
+class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
 
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
 
-class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">;
+class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
+                                              MipsDPAQX_SA_W_PH>;
 
-class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">;
+class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
 
-class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">;
+class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
 
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
 
-class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">;
+class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
+                                              MipsDPSQX_SA_W_PH>;
 
-class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">;
+class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
 
 // Precision reduce/expand
 class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
@@ -1210,71 +1201,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
 }
 
 // Pseudos.
-def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary,
-                                                MULSAQ_S_W_PH>;
-def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary,
-                                              MAQ_S_W_PHL>;
-def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary,
-                                              MAQ_S_W_PHR>;
-def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary,
-                                               MAQ_SA_W_PHL>;
-def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary,
-                                               MAQ_SA_W_PHR>;
-def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary,
-                                             DPAU_H_QBL>;
-def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary,
-                                             DPAU_H_QBR>;
-def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary,
-                                             DPSU_H_QBL>;
-def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary,
-                                             DPSU_H_QBR>;
-def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary,
-                                              DPAQ_S_W_PH>;
-def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary,
-                                              DPSQ_S_W_PH>;
-def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary,
-                                              DPAQ_SA_L_W>;
-def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary,
-                                              DPSQ_SA_L_W>;
-
-def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>,
-                      IsCommutable;
-def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>,
-                       IsCommutable;
-def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>,
-                      IsCommutable, UseAC;
-def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>,
-                       IsCommutable, UseAC;
-def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>,
-                      UseAC;
-def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>,
-                       UseAC;
-
-def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>;
-def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>;
-def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>;
-
-let Predicates = [HasDSPR2] in {
-
-def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>;
-def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>;
-def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary,
-                                               DPAQX_S_W_PH>;
-def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary,
-                                                DPAQX_SA_W_PH>;
-def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary,
-                                            DPAX_W_PH>;
-def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary,
-                                            DPSX_W_PH>;
-def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary,
-                                               DPSQX_S_W_PH>;
-def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary,
-                                                DPSQX_SA_W_PH>;
-def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary,
-                                             MULSA_W_PH>;
-
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+  defm LOAD_AC_DSP  : LoadM<"load_ac_dsp", ACRegsDSP>;
+  defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
 }
 
+def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+
 // Patterns.
 class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
   Pat<pattern, result>, Requires<[pred]>;
@@ -1300,10 +1234,12 @@ def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
 
 // Extr patterns.
 class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
-  DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>;
+  DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
+         (Instr ACRegsDSP:$ac, CPURegs:$rs)>;
 
 class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
-  DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>;
+  DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
+         (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
 
 def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
 def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
@@ -1317,3 +1253,19 @@ def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>;
 def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
 def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
 def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
+
+// mflo/hi patterns.
+let AddedComplexity = 20 in
+def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
+             (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
+
+// Indexed load patterns.
+class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
+  DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
+         (Instr i32:$base, i32:$index)>;
+
+let AddedComplexity = 20 in {
+  def : IndexedLoadPat<zextloadi8, LBUX>;
+  def : IndexedLoadPat<sextloadi16, LHX>;
+  def : IndexedLoadPat<load, LWX>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e3c8ed75cf91..d07a595af38a 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,4 +1,4 @@
-//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===//
+//===-- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,22 +7,28 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Simple pass to fills delay slots with useful instructions.
+// Simple pass to fill delay slots with useful instructions.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "delay-slot-filler"
 
 #include "Mips.h"
+#include "MipsInstrInfo.h"
 #include "MipsTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
@@ -33,27 +39,143 @@ STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
 static cl::opt<bool> DisableDelaySlotFiller(
   "disable-mips-delay-filler",
   cl::init(false),
-  cl::desc("Disable the delay slot filler, which attempts to fill the Mips"
-           "delay slots with useful instructions."),
+  cl::desc("Fill all delay slots with NOPs."),
   cl::Hidden);
 
-// This option can be used to silence complaints by machine verifier passes.
-static cl::opt<bool> SkipDelaySlotFiller(
-  "skip-mips-delay-filler",
+static cl::opt<bool> DisableForwardSearch(
+  "disable-mips-df-forward-search",
+  cl::init(true),
+  cl::desc("Disallow MIPS delay filler to search forward."),
+  cl::Hidden);
+
+static cl::opt<bool> DisableSuccBBSearch(
+  "disable-mips-df-succbb-search",
+  cl::init(true),
+  cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
+  cl::Hidden);
+
+static cl::opt<bool> DisableBackwardSearch(
+  "disable-mips-df-backward-search",
   cl::init(false),
-  cl::desc("Skip MIPS' delay slot filling pass."),
+  cl::desc("Disallow MIPS delay filler to search backward."),
   cl::Hidden);
 
 namespace {
-  struct Filler : public MachineFunctionPass {
-    typedef MachineBasicBlock::instr_iterator InstrIter;
-    typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter;
+  typedef MachineBasicBlock::iterator Iter;
+  typedef MachineBasicBlock::reverse_iterator ReverseIter;
+  typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
+
+  /// \brief A functor comparing edge weight of two blocks.
+  struct CmpWeight {
+    CmpWeight(const MachineBasicBlock &S,
+              const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {}
+
+    bool operator()(const MachineBasicBlock *Dst0,
+                    const MachineBasicBlock *Dst1) const {
+      return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1);
+    }
 
-    TargetMachine &TM;
-    const TargetInstrInfo *TII;
-    InstrIter LastFiller;
+    const MachineBasicBlock &Src;
+    const MachineBranchProbabilityInfo &Prob;
+  };
 
-    static char ID;
+  class RegDefsUses {
+  public:
+    RegDefsUses(TargetMachine &TM);
+    void init(const MachineInstr &MI);
+
+    /// This function sets all caller-saved registers in Defs.
+    void setCallerSaved(const MachineInstr &MI);
+
+    /// This function sets all unallocatable registers in Defs.
+    void setUnallocatableRegs(const MachineFunction &MF);
+
+    /// Set bits in Uses corresponding to MBB's live-out registers except for
+    /// the registers that are live-in to SuccBB.
+    void addLiveOut(const MachineBasicBlock &MBB,
+                    const MachineBasicBlock &SuccBB);
+
+    bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
+
+  private:
+    bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
+                          bool IsDef) const;
+
+    /// Returns true if Reg or its alias is in RegSet.
+    bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
+
+    const TargetRegisterInfo &TRI;
+    BitVector Defs, Uses;
+  };
+
+  /// Base class for inspecting loads and stores.
+  class InspectMemInstr {
+  public:
+    InspectMemInstr(bool ForbidMemInstr_)
+      : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
+        SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+
+    /// Return true if MI cannot be moved to delay slot.
+    bool hasHazard(const MachineInstr &MI);
+
+    virtual ~InspectMemInstr() {}
+
+  protected:
+    /// Flags indicating whether loads or stores have been seen.
+    bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+
+    /// Memory instructions are not allowed to move to delay slot if this flag
+    /// is true.
+    bool ForbidMemInstr;
+
+  private:
+    virtual bool hasHazard_(const MachineInstr &MI) = 0;
+  };
+
+  /// This subclass rejects any memory instructions.
+  class NoMemInstr : public InspectMemInstr {
+  public:
+    NoMemInstr() : InspectMemInstr(true) {}
+  private:
+    virtual bool hasHazard_(const MachineInstr &MI) { return true; }
+  };
+
+  /// This subclass accepts loads from stacks and constant loads.
+  class LoadFromStackOrConst : public InspectMemInstr {
+  public:
+    LoadFromStackOrConst() : InspectMemInstr(false) {}
+  private:
+    virtual bool hasHazard_(const MachineInstr &MI);
+  };
+
+  /// This subclass uses memory dependence information to determine whether a
+  /// memory instruction can be moved to a delay slot.
+  class MemDefsUses : public InspectMemInstr {
+  public:
+    MemDefsUses(const MachineFrameInfo *MFI);
+
+  private:
+    virtual bool hasHazard_(const MachineInstr &MI);
+
+    /// Update Defs and Uses. Return true if there exist dependences that
+    /// disqualify the delay slot candidate between V and values in Uses and
+    /// Defs.
+    bool updateDefsUses(const Value *V, bool MayStore);
+
+    /// Get the list of underlying objects of MI's memory operand.
+    bool getUnderlyingObjects(const MachineInstr &MI,
+                              SmallVectorImpl<const Value *> &Objects) const;
+
+    const MachineFrameInfo *MFI;
+    SmallPtrSet<const Value*, 4> Uses, Defs;
+
+    /// Flags indicating whether loads or stores with no underlying objects have
+    /// been seen.
+    bool SeenNoObjLoad, SeenNoObjStore;
+  };
+
+  class Filler : public MachineFunctionPass {
+  public:
     Filler(TargetMachine &tm)
       : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
 
@@ -61,11 +183,7 @@ namespace {
       return "Mips Delay Slot Filler";
     }
 
-    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
     bool runOnMachineFunction(MachineFunction &F) {
-      if (SkipDelaySlotFiller)
-        return false;
-
       bool Changed = false;
       for (MachineFunction::iterator FI = F.begin(), FE = F.end();
            FI != FE; ++FI)
@@ -73,66 +191,334 @@ namespace {
       return Changed;
     }
 
-    bool isDelayFiller(MachineBasicBlock &MBB,
-                       InstrIter candidate);
-
-    void insertCallUses(InstrIter MI,
-                        SmallSet<unsigned, 32> &RegDefs,
-                        SmallSet<unsigned, 32> &RegUses);
-
-    void insertDefsUses(InstrIter MI,
-                        SmallSet<unsigned, 32> &RegDefs,
-                        SmallSet<unsigned, 32> &RegUses);
-
-    bool IsRegInSet(SmallSet<unsigned, 32> &RegSet,
-                    unsigned Reg);
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineBranchProbabilityInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
 
-    bool delayHasHazard(InstrIter candidate,
-                        bool &sawLoad, bool &sawStore,
-                        SmallSet<unsigned, 32> &RegDefs,
-                        SmallSet<unsigned, 32> &RegUses);
+  private:
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
 
-    bool
-    findDelayInstr(MachineBasicBlock &MBB, InstrIter slot,
-                   InstrIter &Filler);
+    /// This function checks if it is valid to move Candidate to the delay slot
+    /// and returns true if it isn't. It also updates memory and register
+    /// dependence information.
+    bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+                        InspectMemInstr &IM) const;
+
+    /// This function searches range [Begin, End) for an instruction that can be
+    /// moved to the delay slot. Returns true on success.
+    template<typename IterTy>
+    bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+                     RegDefsUses &RegDU, InspectMemInstr &IM,
+                     IterTy &Filler) const;
+
+    /// This function searches in the backward direction for an instruction that
+    /// can be moved to the delay slot. Returns true on success.
+    bool searchBackward(MachineBasicBlock &MBB, Iter Slot) const;
+
+    /// This function searches MBB in the forward direction for an instruction
+    /// that can be moved to the delay slot. Returns true on success.
+    bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
+
+    /// This function searches one of MBB's successor blocks for an instruction
+    /// that can be moved to the delay slot and inserts clones of the
+    /// instruction into the successor's predecessor blocks.
+    bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
+
+    /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
+    /// successor block that is not a landing pad.
+    MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
+
+    /// This function analyzes MBB and returns an instruction with an unoccupied
+    /// slot that branches to Dst.
+    std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+    getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
+
+    /// Examine Pred and see if it is possible to insert an instruction into
+    /// one of its branches delay slot or its end.
+    bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+                     RegDefsUses &RegDU, bool &HasMultipleSuccs,
+                     BB2BrMap &BrMap) const;
+
+    bool terminateSearch(const MachineInstr &Candidate) const;
 
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
 
+    static char ID;
   };
   char Filler::ID = 0;
 } // end of anonymous namespace
 
+static bool hasUnoccupiedSlot(const MachineInstr *MI) {
+  return MI->hasDelaySlot() && !MI->isBundledWithSucc();
+}
+
+/// This function inserts clones of Filler into predecessor blocks.
+static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
+  MachineFunction *MF = Filler->getParent()->getParent();
+
+  for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) {
+    if (I->second) {
+      MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler));
+      ++UsefulSlots;
+    } else {
+      I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler));
+    }
+  }
+}
+
+/// This function adds registers Filler defines to MBB's live-in register list.
+static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
+  for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = Filler->getOperand(I);
+    unsigned R;
+
+    if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
+      continue;
+
+#ifndef NDEBUG
+    const MachineFunction &MF = *MBB.getParent();
+    assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
+           "Shouldn't move an instruction with unallocatable registers across "
+           "basic block boundaries.");
+#endif
+
+    if (!MBB.isLiveIn(R))
+      MBB.addLiveIn(R);
+  }
+}
+
+RegDefsUses::RegDefsUses(TargetMachine &TM)
+  : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
+    Uses(TRI.getNumRegs(), false) {}
+
+void RegDefsUses::init(const MachineInstr &MI) {
+  // Add all register operands which are explicit and non-variadic.
+  update(MI, 0, MI.getDesc().getNumOperands());
+
+  // If MI is a call, add RA to Defs to prevent users of RA from going into
+  // delay slot.
+  if (MI.isCall())
+    Defs.set(Mips::RA);
+
+  // Add all implicit register operands of branch instructions except
+  // register AT.
+  if (MI.isBranch()) {
+    update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands());
+    Defs.reset(Mips::AT);
+  }
+}
+
+void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
+  assert(MI.isCall());
+
+  // If MI is a call, add all caller-saved registers to Defs.
+  BitVector CallerSavedRegs(TRI.getNumRegs(), true);
+
+  CallerSavedRegs.reset(Mips::ZERO);
+  CallerSavedRegs.reset(Mips::ZERO_64);
+
+  for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+    for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
+      CallerSavedRegs.reset(*AI);
+
+  Defs |= CallerSavedRegs;
+}
+
+void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
+  BitVector AllocSet = TRI.getAllocatableSet(MF);
+
+  for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R))
+    for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+      AllocSet.set(*AI);
+
+  AllocSet.set(Mips::ZERO);
+  AllocSet.set(Mips::ZERO_64);
+
+  Defs |= AllocSet.flip();
+}
+
+void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
+                             const MachineBasicBlock &SuccBB) {
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+       SE = MBB.succ_end(); SI != SE; ++SI)
+    if (*SI != &SuccBB)
+      for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+           LE = (*SI)->livein_end(); LI != LE; ++LI)
+        Uses.set(*LI);
+}
+
+bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
+  BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
+  bool HasHazard = false;
+
+  for (unsigned I = Begin; I != End; ++I) {
+    const MachineOperand &MO = MI.getOperand(I);
+
+    if (MO.isReg() && MO.getReg())
+      HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef());
+  }
+
+  Defs |= NewDefs;
+  Uses |= NewUses;
+
+  return HasHazard;
+}
+
+bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
+                                   unsigned Reg, bool IsDef) const {
+  if (IsDef) {
+    NewDefs.set(Reg);
+    // check whether Reg has already been defined or used.
+    return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg));
+  }
+
+  NewUses.set(Reg);
+  // check whether Reg has already been defined.
+  return isRegInSet(Defs, Reg);
+}
+
+bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
+  // Check Reg and all aliased Registers.
+  for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+    if (RegSet.test(*AI))
+      return true;
+  return false;
+}
+
+bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
+  if (!MI.mayStore() && !MI.mayLoad())
+    return false;
+
+  if (ForbidMemInstr)
+    return true;
+
+  OrigSeenLoad = SeenLoad;
+  OrigSeenStore = SeenStore;
+  SeenLoad |= MI.mayLoad();
+  SeenStore |= MI.mayStore();
+
+  // If MI is an ordered or volatile memory reference, disallow moving
+  // subsequent loads and stores to delay slot.
+  if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
+    ForbidMemInstr = true;
+    return true;
+  }
+
+  return hasHazard_(MI);
+}
+
+bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
+  if (MI.mayStore())
+    return true;
+
+  if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+    return true;
+
+  const Value *V = (*MI.memoperands_begin())->getValue();
+
+  if (isa<FixedStackPseudoSourceValue>(V))
+    return false;
+
+  if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
+    return !PSV->PseudoSourceValue::isConstant(0) &&
+      (V != PseudoSourceValue::getStack());
+
+  return true;
+}
+
+MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
+  : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
+    SeenNoObjStore(false) {}
+
+bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
+  bool HasHazard = false;
+  SmallVector<const Value *, 4> Objs;
+
+  // Check underlying object list.
+  if (getUnderlyingObjects(MI, Objs)) {
+    for (SmallVector<const Value *, 4>::const_iterator I = Objs.begin();
+         I != Objs.end(); ++I)
+      HasHazard |= updateDefsUses(*I, MI.mayStore());
+
+    return HasHazard;
+  }
+
+  // No underlying objects found.
+  HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
+  HasHazard |= MI.mayLoad() || OrigSeenStore;
+
+  SeenNoObjLoad |= MI.mayLoad();
+  SeenNoObjStore |= MI.mayStore();
+
+  return HasHazard;
+}
+
+bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) {
+  if (MayStore)
+    return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
+
+  Uses.insert(V);
+  return Defs.count(V) || SeenNoObjStore;
+}
+
+bool MemDefsUses::
+getUnderlyingObjects(const MachineInstr &MI,
+                     SmallVectorImpl<const Value *> &Objects) const {
+  if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+    return false;
+
+  const Value *V = (*MI.memoperands_begin())->getValue();
+
+  SmallVector<Value *, 4> Objs;
+  GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+  for (SmallVector<Value*, 4>::iterator I = Objs.begin(), E = Objs.end();
+       I != E; ++I) {
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(*I)) {
+      if (PSV->isAliased(MFI))
+        return false;
+    } else if (!isIdentifiedObject(V))
+      return false;
+
+    Objects.push_back(*I);
+  }
+
+  return true;
+}
+
 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
 /// We assume there is only one delay slot per delayed instruction.
-bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
-  LastFiller = MBB.instr_end();
-
-  for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I)
-    if (I->hasDelaySlot()) {
-      ++FilledSlots;
-      Changed = true;
-
-      InstrIter D;
-
-      // Delay slot filling is disabled at -O0.
-      if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
-          findDelayInstr(MBB, I, D)) {
-        MBB.splice(llvm::next(I), &MBB, D);
-        ++UsefulSlots;
-      } else
-        BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
-
-      // Record the filler instruction that filled the delay slot.
-      // The instruction after it will be visited in the next iteration.
-      LastFiller = ++I;
-
-      // Set InsideBundle bit so that the machine verifier doesn't expect this
-      // instruction to be a terminator.
-      LastFiller->setIsInsideBundle();
-     }
-  return Changed;
 
+  for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
+    if (!hasUnoccupiedSlot(&*I))
+      continue;
+
+    ++FilledSlots;
+    Changed = true;
+
+    // Delay slot filling is disabled at -O0.
+    if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) {
+      if (searchBackward(MBB, I))
+        continue;
+
+      if (I->isTerminator()) {
+        if (searchSuccBBs(MBB, I))
+          continue;
+      } else if (searchForward(MBB, I)) {
+        continue;
+      }
+    }
+
+    // Bundle the NOP to the instruction with the delay slot.
+    BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+    MIBundleBuilder(MBB, I, llvm::next(llvm::next(I)));
+  }
+
+  return Changed;
 }
 
 /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
@@ -141,129 +527,195 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
   return new Filler(tm);
 }
 
-bool Filler::findDelayInstr(MachineBasicBlock &MBB,
-                            InstrIter slot,
-                            InstrIter &Filler) {
-  SmallSet<unsigned, 32> RegDefs;
-  SmallSet<unsigned, 32> RegUses;
-
-  insertDefsUses(slot, RegDefs, RegUses);
-
-  bool sawLoad = false;
-  bool sawStore = false;
-
-  for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) {
+template<typename IterTy>
+bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
+                         RegDefsUses &RegDU, InspectMemInstr& IM,
+                         IterTy &Filler) const {
+  for (IterTy I = Begin; I != End; ++I) {
     // skip debug value
     if (I->isDebugValue())
       continue;
 
-    // Convert to forward iterator.
-    InstrIter FI(llvm::next(I).base());
-
-    if (I->hasUnmodeledSideEffects()
-        || I->isInlineAsm()
-        || I->isLabel()
-        || FI == LastFiller
-        || I->isPseudo()
-        //
-        // Should not allow:
-        // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
-        // list. TBD.
-        )
+    if (terminateSearch(*I))
       break;
 
-    if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) {
-      insertDefsUses(FI, RegDefs, RegUses);
+    assert((!I->isCall() && !I->isReturn() && !I->isBranch()) &&
+           "Cannot put calls, returns or branches in delay slot.");
+
+    if (delayHasHazard(*I, RegDU, IM))
       continue;
-    }
 
-    Filler = FI;
+    Filler = I;
     return true;
   }
 
   return false;
 }
 
-bool Filler::delayHasHazard(InstrIter candidate,
-                            bool &sawLoad, bool &sawStore,
-                            SmallSet<unsigned, 32> &RegDefs,
-                            SmallSet<unsigned, 32> &RegUses) {
-  if (candidate->isImplicitDef() || candidate->isKill())
-    return true;
+bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
+  if (DisableBackwardSearch)
+    return false;
 
-  // Loads or stores cannot be moved past a store to the delay slot
-  // and stores cannot be moved past a load.
-  if (candidate->mayLoad()) {
-    if (sawStore)
-      return true;
-    sawLoad = true;
-  }
+  RegDefsUses RegDU(TM);
+  MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+  ReverseIter Filler;
 
-  if (candidate->mayStore()) {
-    if (sawStore)
-      return true;
-    sawStore = true;
-    if (sawLoad)
-      return true;
+  RegDU.init(*Slot);
+
+  if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) {
+    MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
+    MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+    ++UsefulSlots;
+    return true;
   }
 
-  assert((!candidate->isCall() && !candidate->isReturn()) &&
-         "Cannot put calls or returns in delay slot.");
+  return false;
+}
 
-  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
-    const MachineOperand &MO = candidate->getOperand(i);
-    unsigned Reg;
+bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
+  // Can handle only calls.
+  if (DisableForwardSearch || !Slot->isCall())
+    return false;
 
-    if (!MO.isReg() || !(Reg = MO.getReg()))
-      continue; // skip
+  RegDefsUses RegDU(TM);
+  NoMemInstr NM;
+  Iter Filler;
 
-    if (MO.isDef()) {
-      // check whether Reg is defined or used before delay slot.
-      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
-        return true;
-    }
-    if (MO.isUse()) {
-      // check whether Reg is defined before delay slot.
-      if (IsRegInSet(RegDefs, Reg))
-        return true;
-    }
+  RegDU.setCallerSaved(*Slot);
+
+  if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) {
+    MBB.splice(llvm::next(Slot), &MBB, Filler);
+    MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+    ++UsefulSlots;
+    return true;
   }
+
   return false;
 }
 
-// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
-void Filler::insertDefsUses(InstrIter MI,
-                            SmallSet<unsigned, 32> &RegDefs,
-                            SmallSet<unsigned, 32> &RegUses) {
-  // If MI is a call or return, just examine the explicit non-variadic operands.
-  MCInstrDesc MCID = MI->getDesc();
-  unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
-                                                MI->getNumOperands();
+bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
+  if (DisableSuccBBSearch)
+    return false;
+
+  MachineBasicBlock *SuccBB = selectSuccBB(MBB);
+
+  if (!SuccBB)
+    return false;
+
+  RegDefsUses RegDU(TM);
+  bool HasMultipleSuccs = false;
+  BB2BrMap BrMap;
+  OwningPtr<InspectMemInstr> IM;
+  Iter Filler;
+
+  // Iterate over SuccBB's predecessor list.
+  for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
+       PE = SuccBB->pred_end(); PI != PE; ++PI)
+    if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+      return false;
+
+  // Do not allow moving instructions which have unallocatable register operands
+  // across basic block boundaries.
+  RegDU.setUnallocatableRegs(*MBB.getParent());
+
+  // Only allow moving loads from stack or constants if any of the SuccBB's
+  // predecessors have multiple successors.
+  if (HasMultipleSuccs) {
+    IM.reset(new LoadFromStackOrConst());
+  } else {
+    const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
+    IM.reset(new MemDefsUses(MFI));
+  }
 
-  // Add RA to RegDefs to prevent users of RA from going into delay slot.
-  if (MI->isCall())
-    RegDefs.insert(Mips::RA);
+  if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler))
+    return false;
 
-  for (unsigned i = 0; i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    unsigned Reg;
+  insertDelayFiller(Filler, BrMap);
+  addLiveInRegs(Filler, *SuccBB);
+  Filler->eraseFromParent();
 
-    if (!MO.isReg() || !(Reg = MO.getReg()))
-      continue;
+  return true;
+}
+
+MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
+  if (B.succ_empty())
+    return NULL;
+
+  // Select the successor with the larget edge weight.
+  CmpWeight Cmp(B, getAnalysis<MachineBranchProbabilityInfo>());
+  MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp);
+  return S->isLandingPad() ? NULL : S;
+}
+
+std::pair<MipsInstrInfo::BranchType, MachineInstr *>
+Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
+  const MipsInstrInfo *TII =
+    static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
+  MachineBasicBlock *TrueBB = 0, *FalseBB = 0;
+  SmallVector<MachineInstr*, 2> BranchInstrs;
+  SmallVector<MachineOperand, 2> Cond;
+
+  MipsInstrInfo::BranchType R =
+    TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
+
+  if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
+    return std::make_pair(R, (MachineInstr*)NULL);
+
+  if (R != MipsInstrInfo::BT_CondUncond) {
+    if (!hasUnoccupiedSlot(BranchInstrs[0]))
+      return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+
+    assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
+
+    return std::make_pair(R, BranchInstrs[0]);
+  }
+
+  assert((TrueBB == &Dst) || (FalseBB == &Dst));
 
-    if (MO.isDef())
-      RegDefs.insert(Reg);
-    else if (MO.isUse())
-      RegUses.insert(Reg);
+  // Examine the conditional branch. See if its slot is occupied.
+  if (hasUnoccupiedSlot(BranchInstrs[0]))
+    return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
+
+  // If that fails, try the unconditional branch.
+  if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
+    return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
+
+  return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+}
+
+bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
+                         RegDefsUses &RegDU, bool &HasMultipleSuccs,
+                         BB2BrMap &BrMap) const {
+  std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
+    getBranch(Pred, Succ);
+
+  // Return if either getBranch wasn't able to analyze the branches or there
+  // were no branches with unoccupied slots.
+  if (P.first == MipsInstrInfo::BT_None)
+    return false;
+
+  if ((P.first != MipsInstrInfo::BT_Uncond) &&
+      (P.first != MipsInstrInfo::BT_NoBranch)) {
+    HasMultipleSuccs = true;
+    RegDU.addLiveOut(Pred, Succ);
   }
+
+  BrMap[&Pred] = P.second;
+  return true;
 }
 
-//returns true if the Reg or its alias is in the RegSet.
-bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) {
-  // Check Reg and all aliased Registers.
-  for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
-       AI.isValid(); ++AI)
-    if (RegSet.count(*AI))
-      return true;
-  return false;
+bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
+                            InspectMemInstr &IM) const {
+  bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
+
+  HasHazard |= IM.hasHazard(Candidate);
+  HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
+
+  return HasHazard;
+}
+
+bool Filler::terminateSearch(const MachineInstr &Candidate) const {
+  return (Candidate.isTerminator() || Candidate.isCall() ||
+          Candidate.isLabel() || Candidate.isInlineAsm() ||
+          Candidate.hasUnmodeledSideEffects());
 }
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 2cad2a6264ab..eb9d49fefb2f 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index df52d92da830..6a5f79d0dfc4 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -26,9 +26,8 @@ protected:
   const MipsSubtarget &STI;
 
 public:
-  explicit MipsFrameLowering(const MipsSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
-                          sti.hasMips64() ? 16 : 8), STI(sti) {}
+  explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment)
+    : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {}
 
   static const MipsFrameLowering *create(MipsTargetMachine &TM,
                                          const MipsSubtarget &ST);
@@ -39,7 +38,7 @@ protected:
   uint64_t estimateStackSize(const MachineFunction &MF) const;
 };
 
-/// Create MipsInstrInfo objects.
+/// Create MipsFrameLowering objects.
 const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
 const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index c5fca7f4b27a..77b08cb11e0c 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -12,29 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
 #include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -45,263 +45,11 @@ using namespace llvm;
 // MipsDAGToDAGISel - MIPS specific code to select MIPS machine
 // instructions for SelectionDAG operations.
 //===----------------------------------------------------------------------===//
-namespace {
-
-class MipsDAGToDAGISel : public SelectionDAGISel {
-
-  /// TM - Keep a reference to MipsTargetMachine.
-  MipsTargetMachine &TM;
-
-  /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
-  /// make the right decision when generating code for different targets.
-  const MipsSubtarget &Subtarget;
-
-public:
-  explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
-  SelectionDAGISel(tm),
-  TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
-  // Pass Name
-  virtual const char *getPassName() const {
-    return "MIPS DAG->DAG Pattern Instruction Selection";
-  }
-
-  virtual bool runOnMachineFunction(MachineFunction &MF);
-
-private:
-  // Include the pieces autogenerated from the target description.
-  #include "MipsGenDAGISel.inc"
-
-  /// getTargetMachine - Return a reference to the TargetMachine, casted
-  /// to the target-specific type.
-  const MipsTargetMachine &getTargetMachine() {
-    return static_cast<const MipsTargetMachine &>(TM);
-  }
-
-  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
-  /// to the target-specific type.
-  const MipsInstrInfo *getInstrInfo() {
-    return getTargetMachine().getInstrInfo();
-  }
-
-  SDNode *getGlobalBaseReg();
-
-  SDValue getMips16SPAliasReg();
-
-  void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg);
-
-  std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
-                                         EVT Ty, bool HasLo, bool HasHi);
-
-  SDNode *Select(SDNode *N);
-
-  // Complex Pattern.
-  bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset);
-
-  bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
-       SDValue &Alias);
-
-  // getImm - Return a target constant with the specified value.
-  inline SDValue getImm(const SDNode *Node, unsigned Imm) {
-    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
-  }
-
-  void ProcessFunctionAfterISel(MachineFunction &MF);
-  bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
-  void InitGlobalBaseReg(MachineFunction &MF);
-  void InitMips16SPAliasReg(MachineFunction &MF);
-
-  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                            char ConstraintCode,
-                                            std::vector<SDValue> &OutOps);
-};
-
-}
-
-// Insert instructions to initialize the global base register in the
-// first MBB of the function. When the ABI is O32 and the relocation model is
-// PIC, the necessary instructions are emitted later to prevent optimization
-// passes from moving them.
-void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
-  if (!MipsFI->globalBaseRegSet())
-    return;
-
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator I = MBB.begin();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-  unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
-  const TargetRegisterClass *RC;
-
-  if (Subtarget.isABI_N64())
-    RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
-  else if (Subtarget.inMips16Mode())
-    RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
-  else
-    RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
-  V0 = RegInfo.createVirtualRegister(RC);
-  V1 = RegInfo.createVirtualRegister(RC);
-  V2 = RegInfo.createVirtualRegister(RC);
-
-  if (Subtarget.isABI_N64()) {
-    MF.getRegInfo().addLiveIn(Mips::T9_64);
-    MBB.addLiveIn(Mips::T9_64);
-
-    // lui $v0, %hi(%neg(%gp_rel(fname)))
-    // daddu $v1, $v0, $t9
-    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
-    const GlobalValue *FName = MF.getFunction();
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
-      .addReg(Mips::T9_64);
-    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    return;
-  }
-
-  if (Subtarget.inMips16Mode()) {
-    BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
-      .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
-      .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
-    BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
-    BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
-      .addReg(V1).addReg(V2);
-    return;
-  }
-
-  if (MF.getTarget().getRelocationModel() == Reloc::Static) {
-    // Set global register to __gnu_local_gp.
-    //
-    // lui   $v0, %hi(__gnu_local_gp)
-    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
-      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
-      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
-    return;
-  }
-
-  MF.getRegInfo().addLiveIn(Mips::T9);
-  MBB.addLiveIn(Mips::T9);
-
-  if (Subtarget.isABI_N32()) {
-    // lui $v0, %hi(%neg(%gp_rel(fname)))
-    // addu $v1, $v0, $t9
-    // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
-    const GlobalValue *FName = MF.getFunction();
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    return;
-  }
-
-  assert(Subtarget.isABI_O32());
-
-  // For O32 ABI, the following instruction sequence is emitted to initialize
-  // the global base register:
-  //
-  //  0. lui   $2, %hi(_gp_disp)
-  //  1. addiu $2, $2, %lo(_gp_disp)
-  //  2. addu  $globalbasereg, $2, $t9
-  //
-  // We emit only the last instruction here.
-  //
-  // GNU linker requires that the first two instructions appear at the beginning
-  // of a function and no instructions be inserted before or between them.
-  // The two instructions are emitted during lowering to MC layer in order to
-  // avoid any reordering.
-  //
-  // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
-  // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
-  // reads it.
-  MF.getRegInfo().addLiveIn(Mips::V0);
-  MBB.addLiveIn(Mips::V0);
-  BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
-    .addReg(Mips::V0).addReg(Mips::T9);
-}
-
-// Insert instructions to initialize the Mips16 SP Alias register in the
-// first MBB of the function.
-//
-void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) {
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
-  if (!MipsFI->mips16SPAliasRegSet())
-    return;
-
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator I = MBB.begin();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-  unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
-
-  BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
-    .addReg(Mips::SP);
-}
-
-
-bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
-                                              const MachineInstr& MI) {
-  unsigned DstReg = 0, ZeroReg = 0;
-
-  // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
-  if ((MI.getOpcode() == Mips::ADDiu) &&
-      (MI.getOperand(1).getReg() == Mips::ZERO) &&
-      (MI.getOperand(2).getImm() == 0)) {
-    DstReg = MI.getOperand(0).getReg();
-    ZeroReg = Mips::ZERO;
-  } else if ((MI.getOpcode() == Mips::DADDiu) &&
-             (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
-             (MI.getOperand(2).getImm() == 0)) {
-    DstReg = MI.getOperand(0).getReg();
-    ZeroReg = Mips::ZERO_64;
-  }
-
-  if (!DstReg)
-    return false;
-
-  // Replace uses with ZeroReg.
-  for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
-       E = MRI->use_end(); U != E;) {
-    MachineOperand &MO = U.getOperand();
-    unsigned OpNo = U.getOperandNo();
-    MachineInstr *MI = MO.getParent();
-    ++U;
-
-    // Do not replace if it is a phi's operand or is tied to def operand.
-    if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
-      continue;
-
-    MO.setReg(ZeroReg);
-  }
-
-  return true;
-}
-
-void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
-  InitGlobalBaseReg(MF);
-  InitMips16SPAliasReg(MF);
-
-  MachineRegisterInfo *MRI = &MF.getRegInfo();
-
-  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
-       ++MFI)
-    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
-      ReplaceUsesWithZeroReg(MRI, *I);
-}
 
 bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
 
-  ProcessFunctionAfterISel(MF);
+  processFunctionAfterISel(MF);
 
   return Ret;
 }
@@ -313,230 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-/// getMips16SPAliasReg - Output the instructions required to put the
-/// SP into a Mips16 accessible aliased register.
-SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
-  unsigned Mips16SPAliasReg =
-    MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
-  return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
-}
-
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
-bool MipsDAGToDAGISel::
-SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
-  EVT ValTy = Addr.getValueType();
-
-  // if Address is FI, get the TargetFrameIndex.
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-    Offset = CurDAG->getTargetConstant(0, ValTy);
-    return true;
-  }
-
-  // on PIC code Load GA
-  if (Addr.getOpcode() == MipsISD::Wrapper) {
-    Base   = Addr.getOperand(0);
-    Offset = Addr.getOperand(1);
-    return true;
-  }
-
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
-        Addr.getOpcode() == ISD::TargetGlobalAddress))
-      return false;
-  }
-
-  // Addresses of the form FI+const or FI|const
-  if (CurDAG->isBaseWithConstantOffset(Addr)) {
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
-    if (isInt<16>(CN->getSExtValue())) {
-
-      // If the first operand is a FI, get the TargetFI Node
-      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
-                                  (Addr.getOperand(0)))
-        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-      else
-        Base = Addr.getOperand(0);
-
-      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
-      return true;
-    }
-  }
-
-  // Operand is a result from an ADD.
-  if (Addr.getOpcode() == ISD::ADD) {
-    // When loading from constant pools, load the lower address part in
-    // the instruction itself. Example, instead of:
-    //  lui $2, %hi($CPI1_0)
-    //  addiu $2, $2, %lo($CPI1_0)
-    //  lwc1 $f0, 0($2)
-    // Generate:
-    //  lui $2, %hi($CPI1_0)
-    //  lwc1 $f0, %lo($CPI1_0)($2)
-    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
-        Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
-      SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
-      if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
-          isa<JumpTableSDNode>(Opnd0)) {
-        Base = Addr.getOperand(0);
-        Offset = Opnd0;
-        return true;
-      }
-    }
-
-    // If an indexed floating point load/store can be emitted, return false.
-    const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
-    if (LS &&
-        (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
-        Subtarget.hasMips32r2Or64())
-      return false;
-  }
-
-  Base   = Addr;
-  Offset = CurDAG->getTargetConstant(0, ValTy);
-  return true;
+bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+                                        SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
 }
 
-void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
-  SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
-  if (Parent) {
-    switch (Parent->getOpcode()) {
-      case ISD::LOAD: {
-        LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
-        switch (SD->getMemoryVT().getSizeInBits()) {
-        case 8:
-        case 16:
-          AliasReg = TM.getFrameLowering()->hasFP(*MF)?
-            AliasFPReg: getMips16SPAliasReg();
-          return;
-        }
-        break;
-      }
-      case ISD::STORE: {
-        StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
-        switch (SD->getMemoryVT().getSizeInBits()) {
-        case 8:
-        case 16:
-          AliasReg = TM.getFrameLowering()->hasFP(*MF)?
-            AliasFPReg: getMips16SPAliasReg();
-          return;
-        }
-        break;
-      }
-    }
-  }
-  AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
-  return;
-
+bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+                                         SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
 }
-bool MipsDAGToDAGISel::SelectAddr16(
-  SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
-  SDValue &Alias) {
-  EVT ValTy = Addr.getValueType();
-
-  Alias = CurDAG->getTargetConstant(0, ValTy);
-
-  // if Address is FI, get the TargetFrameIndex.
-  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-    Offset = CurDAG->getTargetConstant(0, ValTy);
-    getMips16SPRefReg(Parent, Alias);
-    return true;
-  }
-  // on PIC code Load GA
-  if (Addr.getOpcode() == MipsISD::Wrapper) {
-    Base   = Addr.getOperand(0);
-    Offset = Addr.getOperand(1);
-    return true;
-  }
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
-        Addr.getOpcode() == ISD::TargetGlobalAddress))
-      return false;
-  }
-  // Addresses of the form FI+const or FI|const
-  if (CurDAG->isBaseWithConstantOffset(Addr)) {
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
-    if (isInt<16>(CN->getSExtValue())) {
-
-      // If the first operand is a FI, get the TargetFI Node
-      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
-                                  (Addr.getOperand(0))) {
-        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-        getMips16SPRefReg(Parent, Alias);
-      }
-      else
-        Base = Addr.getOperand(0);
-
-      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
-      return true;
-    }
-  }
-  // Operand is a result from an ADD.
-  if (Addr.getOpcode() == ISD::ADD) {
-    // When loading from constant pools, load the lower address part in
-    // the instruction itself. Example, instead of:
-    //  lui $2, %hi($CPI1_0)
-    //  addiu $2, $2, %lo($CPI1_0)
-    //  lwc1 $f0, 0($2)
-    // Generate:
-    //  lui $2, %hi($CPI1_0)
-    //  lwc1 $f0, %lo($CPI1_0)($2)
-    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
-        Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
-      SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
-      if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
-          isa<JumpTableSDNode>(Opnd0)) {
-        Base = Addr.getOperand(0);
-        Offset = Opnd0;
-        return true;
-      }
-    }
 
-    // If an indexed floating point load/store can be emitted, return false.
-    const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
-    if (LS &&
-        (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
-        Subtarget.hasMips32r2Or64())
-      return false;
-  }
-  Base   = Addr;
-  Offset = CurDAG->getTargetConstant(0, ValTy);
-  return true;
+bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+                                     SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
 }
 
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
-                             bool HasLo, bool HasHi) {
-  SDNode *Lo = 0, *Hi = 0;
-  SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
-                                       N->getOperand(1));
-  SDValue InFlag = SDValue(Mul, 0);
-
-  if (HasLo) {
-    unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 :
-      (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
-    Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag);
-    InFlag = SDValue(Lo, 1);
-  }
-  if (HasHi) {
-    unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 :
-      (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
-    Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag);
-  }
-  return std::make_pair(Lo, Hi);
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+                                    SDValue &Offset, SDValue &Alias) {
+  llvm_unreachable("Unimplemented function.");
+  return false;
 }
 
-
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
 SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
-  DebugLoc dl = Node->getDebugLoc();
 
   // Dump information about the Node being selected
   DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -547,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
     return NULL;
   }
 
-  ///
-  // Instruction Selection not handled by the auto-generated
-  // tablegen selection should be handled here.
-  ///
-  EVT NodeTy = Node->getValueType(0);
-  unsigned MultOpc;
+  // See if subclasses can handle this node.
+  std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+  if (Ret.first)
+    return Ret.second;
 
   switch(Opcode) {
   default: break;
 
-  case ISD::SUBE:
-  case ISD::ADDE: {
-    bool inMips16Mode = Subtarget.inMips16Mode();
-    SDValue InFlag = Node->getOperand(2), CmpLHS;
-    unsigned Opc = InFlag.getOpcode(); (void)Opc;
-    assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
-            (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
-           "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
-    unsigned MOp;
-    if (Opcode == ISD::ADDE) {
-      CmpLHS = InFlag.getValue(0);
-      if (inMips16Mode)
-        MOp = Mips::AdduRxRyRz16;
-      else
-        MOp = Mips::ADDu;
-    } else {
-      CmpLHS = InFlag.getOperand(0);
-      if (inMips16Mode)
-        MOp = Mips::SubuRxRyRz16;
-      else
-        MOp = Mips::SUBu;
-    }
-
-    SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
-
-    SDValue LHS = Node->getOperand(0);
-    SDValue RHS = Node->getOperand(1);
-
-    EVT VT = LHS.getValueType();
-
-    unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu;
-    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2);
-    unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu;
-    SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT,
-                                              SDValue(Carry,0), RHS);
-
-    return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
-                                LHS, SDValue(AddCarry,0));
-  }
-
-  /// Mul with two results
-  case ISD::SMUL_LOHI:
-  case ISD::UMUL_LOHI: {
-    if (NodeTy == MVT::i32) {
-      if (Subtarget.inMips16Mode())
-        MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 :
-                   Mips::MultRxRy16);
-      else
-        MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
-    }
-    else
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
-    std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
-                                                  true, true);
-
-    if (!SDValue(Node, 0).use_empty())
-      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
-    if (!SDValue(Node, 1).use_empty())
-      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
-    return NULL;
-  }
-
-  /// Special Muls
-  case ISD::MUL: {
-    // Mips32 has a 32-bit three operand mul instruction.
-    if (Subtarget.hasMips32() && NodeTy == MVT::i32)
-      break;
-    return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
-                      dl, NodeTy, true, false).first;
-  }
-  case ISD::MULHS:
-  case ISD::MULHU: {
-    if (NodeTy == MVT::i32) {
-      if (Subtarget.inMips16Mode())
-        MultOpc = (Opcode == ISD::MULHU ?
-                   Mips::MultuRxRy16 : Mips::MultRxRy16);
-      else
-        MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
-    }
-    else
-      MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
-    return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
-  }
-
   // Get target GOT address.
   case ISD::GLOBAL_OFFSET_TABLE:
     return getGlobalBaseReg();
 
-  case ISD::ConstantFP: {
-    ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
-    if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
-      if (Subtarget.hasMips64()) {
-        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                              Mips::ZERO_64, MVT::i64);
-        return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
-      }
-
-      SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                            Mips::ZERO, MVT::i32);
-      return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
-                                    Zero);
-    }
-    break;
-  }
-
-  case ISD::Constant: {
-    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
-    unsigned Size = CN->getValueSizeInBits(0);
-
-    if (Size == 32)
-      break;
-
-    MipsAnalyzeImmediate AnalyzeImm;
-    int64_t Imm = CN->getSExtValue();
-
-    const MipsAnalyzeImmediate::InstSeq &Seq =
-      AnalyzeImm.Analyze(Imm, Size, false);
-
-    MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
-    DebugLoc DL = CN->getDebugLoc();
-    SDNode *RegOpnd;
-    SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
-                                                MVT::i64);
-
-    // The first instruction can be a LUi which is different from other
-    // instructions (ADDiu, ORI and SLL) in that it does not have a register
-    // operand.
-    if (Inst->Opc == Mips::LUi64)
-      RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
-    else
-      RegOpnd =
-        CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
-                               CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
-                               ImmOpnd);
-
-    // The remaining instructions in the sequence are handled here.
-    for (++Inst; Inst != Seq.end(); ++Inst) {
-      ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
-                                          MVT::i64);
-      RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
-                                       SDValue(RegOpnd, 0), ImmOpnd);
-    }
-
-    return RegOpnd;
-  }
-
 #ifndef NDEBUG
   case ISD::LOAD:
   case ISD::STORE:
@@ -716,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
            "Unexpected unaligned loads/stores.");
     break;
 #endif
-
-  case MipsISD::ThreadPointer: {
-    EVT PtrVT = TLI.getPointerTy();
-    unsigned RdhwrOpc, SrcReg, DestReg;
-
-    if (PtrVT == MVT::i32) {
-      RdhwrOpc = Mips::RDHWR;
-      SrcReg = Mips::HWR29;
-      DestReg = Mips::V1;
-    } else {
-      RdhwrOpc = Mips::RDHWR64;
-      SrcReg = Mips::HWR29_64;
-      DestReg = Mips::V1_64;
-    }
-
-    SDNode *Rdhwr =
-      CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
-                             Node->getValueType(0),
-                             CurDAG->getRegister(SrcReg, PtrVT));
-    SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
-                                         SDValue(Rdhwr, 0));
-    SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
-    ReplaceUses(SDValue(Node, 0), ResNode);
-    return ResNode.getNode();
-  }
   }
 
   // Select the default instruction
@@ -766,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
 /// createMipsISelDag - This pass converts a legalized DAG into a
 /// MIPS-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
-  return new MipsDAGToDAGISel(TM);
+  if (TM.getSubtargetImpl()->inMips16Mode())
+    return llvm::createMips16ISelDag(TM);
+
+  return llvm::createMipsSEISelDag(TM);
 }
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 000000000000..cf0f9c58aa9c
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+  explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+    : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MIPS DAG->DAG Pattern Instruction Selection";
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+  SDNode *getGlobalBaseReg();
+
+  /// Keep a pointer to the MipsSubtarget around so that we can make the right
+  /// decision when generating code for different targets.
+  const MipsSubtarget &Subtarget;
+
+private:
+  // Include the pieces autogenerated from the target description.
+  #include "MipsGenDAGISel.inc"
+
+  // Complex Pattern.
+  /// (reg + imm).
+  virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+                                SDValue &Offset) const;
+
+  /// Fall back on this function if all else fails.
+  virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+                                 SDValue &Offset) const;
+
+  /// Match integer address pattern.
+  virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+                             SDValue &Offset) const;
+
+  virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+                            SDValue &Offset, SDValue &Alias);
+
+  virtual SDNode *Select(SDNode *N);
+
+  virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+  // getImm - Return a target constant with the specified value.
+  inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+  }
+
+  virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b0dd0a766f70..e2219f257ecd 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -11,20 +11,14 @@
 // selection DAG.
 //
 //===----------------------------------------------------------------------===//
-
 #define DEBUG_TYPE "mips-lower"
 #include "MipsISelLowering.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
 #include "MipsTargetObjectFile.h"
-#include "MipsSubtarget.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -33,6 +27,10 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -43,10 +41,6 @@ using namespace llvm;
 STATISTIC(NumTailCalls, "Number of tail calls");
 
 static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
-                    cl::desc("MIPS: Enable tail calls."), cl::init(false));
-
-static cl::opt<bool>
 LargeGOT("mxgot", cl::Hidden,
          cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
 
@@ -67,7 +61,7 @@ static const uint16_t Mips64DPRegs[8] = {
 // If I is a shifted mask, set the size (Size) and the first bit of the
 // mask (Pos), and return true.
 // For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
-static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
+static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
   if (!isShiftedMask_64(I))
      return false;
 
@@ -76,7 +70,7 @@ static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
   return true;
 }
 
-static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
   MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
   return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
 }
@@ -111,11 +105,12 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
                      DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
 }
 
-static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+                                         bool HasMips64) const {
   DebugLoc DL = Op.getDebugLoc();
   EVT Ty = Op.getValueType();
   unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
-  SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+  SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
                             getTargetNode(Op, DAG, GOTFlag));
   SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
                              MachinePointerInfo::getGOT(), false, false, false,
@@ -125,21 +120,23 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
   return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
 }
 
-static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+                                          unsigned Flag) const {
   DebugLoc DL = Op.getDebugLoc();
   EVT Ty = Op.getValueType();
-  SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+  SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
                             getTargetNode(Op, DAG, Flag));
   return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
                      MachinePointerInfo::getGOT(), false, false, false, 0);
 }
 
-static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
-                                     unsigned HiFlag, unsigned LoFlag) {
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+                                                  unsigned HiFlag,
+                                                  unsigned LoFlag) const {
   DebugLoc DL = Op.getDebugLoc();
   EVT Ty = Op.getValueType();
   SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
-  Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, GetGlobalReg(DAG, Ty));
+  Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
   SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
                                 getTargetNode(Op, DAG, LoFlag));
   return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
@@ -155,21 +152,27 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::GPRel:             return "MipsISD::GPRel";
   case MipsISD::ThreadPointer:     return "MipsISD::ThreadPointer";
   case MipsISD::Ret:               return "MipsISD::Ret";
+  case MipsISD::EH_RETURN:         return "MipsISD::EH_RETURN";
   case MipsISD::FPBrcond:          return "MipsISD::FPBrcond";
   case MipsISD::FPCmp:             return "MipsISD::FPCmp";
   case MipsISD::CMovFP_T:          return "MipsISD::CMovFP_T";
   case MipsISD::CMovFP_F:          return "MipsISD::CMovFP_F";
   case MipsISD::FPRound:           return "MipsISD::FPRound";
+  case MipsISD::ExtractLOHI:       return "MipsISD::ExtractLOHI";
+  case MipsISD::InsertLOHI:        return "MipsISD::InsertLOHI";
+  case MipsISD::Mult:              return "MipsISD::Mult";
+  case MipsISD::Multu:             return "MipsISD::Multu";
   case MipsISD::MAdd:              return "MipsISD::MAdd";
   case MipsISD::MAddu:             return "MipsISD::MAddu";
   case MipsISD::MSub:              return "MipsISD::MSub";
   case MipsISD::MSubu:             return "MipsISD::MSubu";
   case MipsISD::DivRem:            return "MipsISD::DivRem";
   case MipsISD::DivRemU:           return "MipsISD::DivRemU";
+  case MipsISD::DivRem16:          return "MipsISD::DivRem16";
+  case MipsISD::DivRemU16:         return "MipsISD::DivRemU16";
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::Wrapper:           return "MipsISD::Wrapper";
-  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   case MipsISD::Sync:              return "MipsISD::Sync";
   case MipsISD::Ext:               return "MipsISD::Ext";
   case MipsISD::Ins:               return "MipsISD::Ins";
@@ -191,7 +194,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::MTHLIP:            return "MipsISD::MTHLIP";
   case MipsISD::MULT:              return "MipsISD::MULT";
   case MipsISD::MULTU:             return "MipsISD::MULTU";
-  case MipsISD::MADD_DSP:          return "MipsISD::MADD_DSPDSP";
+  case MipsISD::MADD_DSP:          return "MipsISD::MADD_DSP";
   case MipsISD::MADDU_DSP:         return "MipsISD::MADDU_DSP";
   case MipsISD::MSUB_DSP:          return "MipsISD::MSUB_DSP";
   case MipsISD::MSUBU_DSP:         return "MipsISD::MSUBU_DSP";
@@ -205,50 +208,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
     Subtarget(&TM.getSubtarget<MipsSubtarget>()),
     HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
     IsO32(Subtarget->isABI_O32()) {
-
   // Mips does not have i1 type, so use i32 for
   // setcc operations results (slt, sgt, ...).
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
 
-  // Set up the register classes
-  addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
-
-  if (HasMips64)
-    addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
-
-  if (Subtarget->inMips16Mode()) {
-    addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
-  }
-
-  if (Subtarget->hasDSP()) {
-    MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
-
-    for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
-      addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
-
-      // Expand all builtin opcodes.
-      for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
-        setOperationAction(Opc, VecTys[i], Expand);
-
-      setOperationAction(ISD::LOAD, VecTys[i], Legal);
-      setOperationAction(ISD::STORE, VecTys[i], Legal);
-      setOperationAction(ISD::BITCAST, VecTys[i], Legal);
-    }
-  }
-
-  if (!TM.Options.UseSoftFloat) {
-    addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
-
-    // When dealing with single precision only, use libcalls
-    if (!Subtarget->isSingleFloat()) {
-      if (HasMips64)
-        addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
-      else
-        addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
-    }
-  }
-
   // Load extented operations for i1 types must be promoted
   setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
@@ -265,6 +229,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
   AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
 
   // Mips Custom Operations
+  setOperationAction(ISD::BR_JT,              MVT::Other, Custom);
   setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
   setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
@@ -281,18 +246,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
   setOperationAction(ISD::FCOPYSIGN,          MVT::f32,   Custom);
   setOperationAction(ISD::FCOPYSIGN,          MVT::f64,   Custom);
-  if (Subtarget->inMips16Mode()) {
-    setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
-    setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Expand);
-  }
-  else {
-    setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
-    setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
-  }
-  if (!Subtarget->inMips16Mode()) {
-    setOperationAction(ISD::LOAD,               MVT::i32, Custom);
-    setOperationAction(ISD::STORE,              MVT::i32, Custom);
-  }
 
   if (!TM.Options.NoNaNsFPMath) {
     setOperationAction(ISD::FABS,             MVT::f32,   Custom);
@@ -330,8 +283,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::UREM, MVT::i64, Expand);
 
   // Operations not directly supported by Mips.
-  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,             MVT::f32,   Expand);
+  setOperationAction(ISD::BR_CC,             MVT::f64,   Expand);
+  setOperationAction(ISD::BR_CC,             MVT::i32,   Expand);
+  setOperationAction(ISD::BR_CC,             MVT::i64,   Expand);
   setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
   setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
   setOperationAction(ISD::UINT_TO_FP,        MVT::i64,   Expand);
@@ -361,6 +316,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
   setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
   setOperationAction(ISD::FCOS,              MVT::f64,   Expand);
+  setOperationAction(ISD::FSINCOS,           MVT::f32,   Expand);
+  setOperationAction(ISD::FSINCOS,           MVT::f64,   Expand);
   setOperationAction(ISD::FPOWI,             MVT::f32,   Expand);
   setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
   setOperationAction(ISD::FPOW,              MVT::f64,   Expand);
@@ -383,6 +340,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::EHSELECTION,       MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,       MVT::i64, Expand);
 
+  setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+
   setOperationAction(ISD::VAARG,             MVT::Other, Expand);
   setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
   setOperationAction(ISD::VAEND,             MVT::Other, Expand);
@@ -399,21 +358,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::ATOMIC_STORE,      MVT::i32,    Expand);
   setOperationAction(ISD::ATOMIC_STORE,      MVT::i64,    Expand);
 
-  if (Subtarget->inMips16Mode()) {
-    setOperationAction(ISD::ATOMIC_CMP_SWAP,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_SWAP,           MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_ADD,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_SUB,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_AND,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_OR,        MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_XOR,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_NAND,      MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_MIN,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_MAX,       MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_UMIN,      MVT::i32,    Expand);
-    setOperationAction(ISD::ATOMIC_LOAD_UMAX,      MVT::i32,    Expand);
-  }
-
   setInsertFencesForAtomic(true);
 
   if (!Subtarget->hasSEInReg()) {
@@ -438,8 +382,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
     setTruncStoreAction(MVT::i64, MVT::i32, Custom);
   }
 
-  setTargetDAGCombine(ISD::ADDE);
-  setTargetDAGCombine(ISD::SUBE);
   setTargetDAGCombine(ISD::SDIVREM);
   setTargetDAGCombine(ISD::UDIVREM);
   setTargetDAGCombine(ISD::SELECT);
@@ -450,206 +392,27 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setMinFunctionAlignment(HasMips64 ? 3 : 2);
 
   setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
-  computeRegisterProperties();
 
   setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
   setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
 
-  maxStoresPerMemcpy = 16;
+  MaxStoresPerMemcpy = 16;
 }
 
-bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
-  MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-
-  if (Subtarget->inMips16Mode())
-    return false;
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+  if (TM.getSubtargetImpl()->inMips16Mode())
+    return llvm::createMips16TargetLowering(TM);
 
-  switch (SVT) {
-  case MVT::i64:
-  case MVT::i32:
-    return true;
-  default:
-    return false;
-  }
+  return llvm::createMipsSETargetLowering(TM);
 }
 
 EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
-  return MVT::i32;
-}
-
-// SelectMadd -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-//  (addc multLo, Lo0), (adde multHi, Hi0),
-// where,
-//  multHi/Lo: product of multiplication
-//  Lo0: initial value of Lo register
-//  Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
-  // ADDENode's second operand must be a flag output of an ADDC node in order
-  // for the matching to be successful.
-  SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
-
-  if (ADDCNode->getOpcode() != ISD::ADDC)
-    return false;
-
-  SDValue MultHi = ADDENode->getOperand(0);
-  SDValue MultLo = ADDCNode->getOperand(0);
-  SDNode *MultNode = MultHi.getNode();
-  unsigned MultOpc = MultHi.getOpcode();
-
-  // MultHi and MultLo must be generated by the same node,
-  if (MultLo.getNode() != MultNode)
-    return false;
-
-  // and it must be a multiplication.
-  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
-    return false;
-
-  // MultLo amd MultHi must be the first and second output of MultNode
-  // respectively.
-  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
-    return false;
-
-  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
-  // of the values of MultNode, in which case MultNode will be removed in later
-  // phases.
-  // If there exist users other than ADDENode or ADDCNode, this function returns
-  // here, which will result in MultNode being mapped to a single MULT
-  // instruction node rather than a pair of MULT and MADD instructions being
-  // produced.
-  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
-    return false;
-
-  SDValue Chain = CurDAG->getEntryNode();
-  DebugLoc dl = ADDENode->getDebugLoc();
-
-  // create MipsMAdd(u) node
-  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
-
-  SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue,
-                                 MultNode->getOperand(0),// Factor 0
-                                 MultNode->getOperand(1),// Factor 1
-                                 ADDCNode->getOperand(1),// Lo0
-                                 ADDENode->getOperand(1));// Hi0
-
-  // create CopyFromReg nodes
-  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
-                                              MAdd);
-  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
-                                              Mips::HI, MVT::i32,
-                                              CopyFromLo.getValue(2));
-
-  // replace uses of adde and addc here
-  if (!SDValue(ADDCNode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
-
-  if (!SDValue(ADDENode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
-
-  return true;
-}
-
-// SelectMsub -
-// Transforms a subgraph in CurDAG if the following pattern is found:
-//  (addc Lo0, multLo), (sube Hi0, multHi),
-// where,
-//  multHi/Lo: product of multiplication
-//  Lo0: initial value of Lo register
-//  Hi0: initial value of Hi register
-// Return true if pattern matching was successful.
-static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
-  // SUBENode's second operand must be a flag output of an SUBC node in order
-  // for the matching to be successful.
-  SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
-
-  if (SUBCNode->getOpcode() != ISD::SUBC)
-    return false;
-
-  SDValue MultHi = SUBENode->getOperand(1);
-  SDValue MultLo = SUBCNode->getOperand(1);
-  SDNode *MultNode = MultHi.getNode();
-  unsigned MultOpc = MultHi.getOpcode();
-
-  // MultHi and MultLo must be generated by the same node,
-  if (MultLo.getNode() != MultNode)
-    return false;
-
-  // and it must be a multiplication.
-  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
-    return false;
-
-  // MultLo amd MultHi must be the first and second output of MultNode
-  // respectively.
-  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
-    return false;
-
-  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
-  // of the values of MultNode, in which case MultNode will be removed in later
-  // phases.
-  // If there exist users other than SUBENode or SUBCNode, this function returns
-  // here, which will result in MultNode being mapped to a single MULT
-  // instruction node rather than a pair of MULT and MSUB instructions being
-  // produced.
-  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
-    return false;
-
-  SDValue Chain = CurDAG->getEntryNode();
-  DebugLoc dl = SUBENode->getDebugLoc();
-
-  // create MipsSub(u) node
-  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
-
-  SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue,
-                                 MultNode->getOperand(0),// Factor 0
-                                 MultNode->getOperand(1),// Factor 1
-                                 SUBCNode->getOperand(0),// Lo0
-                                 SUBENode->getOperand(0));// Hi0
-
-  // create CopyFromReg nodes
-  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
-                                              MSub);
-  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
-                                              Mips::HI, MVT::i32,
-                                              CopyFromLo.getValue(2));
-
-  // replace uses of sube and subc here
-  if (!SDValue(SUBCNode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
-
-  if (!SDValue(SUBENode, 0).use_empty())
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
-
-  return true;
-}
-
-static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  const MipsSubtarget *Subtarget) {
-  if (DCI.isBeforeLegalize())
-    return SDValue();
-
-  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
-      SelectMadd(N, &DAG))
-    return SDValue(N, 0);
-
-  return SDValue();
-}
-
-static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  const MipsSubtarget *Subtarget) {
-  if (DCI.isBeforeLegalize())
-    return SDValue();
-
-  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
-      SelectMsub(N, &DAG))
-    return SDValue(N, 0);
-
-  return SDValue();
+  if (!VT.isVector())
+    return MVT::i32;
+  return VT.changeVectorElementTypeToInteger();
 }
 
-static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     const MipsSubtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
@@ -658,18 +421,18 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
   EVT Ty = N->getValueType(0);
   unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
   unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
-  unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
-                                                  MipsISD::DivRemU;
-  DebugLoc dl = N->getDebugLoc();
+  unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
+                                                  MipsISD::DivRemU16;
+  DebugLoc DL = N->getDebugLoc();
 
-  SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+  SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue,
                                N->getOperand(0), N->getOperand(1));
   SDValue InChain = DAG.getEntryNode();
   SDValue InGlue = DivRem;
 
   // insert MFLO
   if (N->hasAnyUseOfValue(0)) {
-    SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty,
+    SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty,
                                             InGlue);
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
     InChain = CopyFromLo.getValue(1);
@@ -678,7 +441,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
 
   // insert MFHI
   if (N->hasAnyUseOfValue(1)) {
-    SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+    SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL,
                                             HI, Ty, InGlue);
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
   }
@@ -713,8 +476,9 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
 }
 
 
-// Returns true if condition code has to be inverted.
-static bool InvertFPCondCode(Mips::CondCode CC) {
+/// This function returns true if the floating point conditional branches and
+/// conditional moves which use condition code CC should be inverted.
+static bool invertFPCondCodeUser(Mips::CondCode CC) {
   if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
     return false;
 
@@ -726,7 +490,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
 
 // Creates and returns an FPCmp node from a setcc node.
 // Returns Op if setcc is not a floating point comparison.
-static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
+static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
   // must be a SETCC node
   if (Op.getOpcode() != ISD::SETCC)
     return Op;
@@ -737,28 +501,27 @@ static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
     return Op;
 
   SDValue RHS = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
   // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
   // node if necessary.
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
-  return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+  return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
                      DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
 }
 
 // Creates and returns a CMovFPT/F node.
-static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
+static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
                             SDValue False, DebugLoc DL) {
-  bool invert = InvertFPCondCode((Mips::CondCode)
-                                 cast<ConstantSDNode>(Cond.getOperand(2))
-                                 ->getSExtValue());
+  ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2));
+  bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue());
 
   return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
                      True.getValueType(), True, False, Cond);
 }
 
-static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     const MipsSubtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
@@ -791,7 +554,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
 }
 
-static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const MipsSubtarget *Subtarget) {
   // Pattern match EXT.
@@ -817,7 +580,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
 
   // Op's second operand must be a shifted mask.
   if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
-      !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+      !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
     return SDValue();
 
   // Return if the shifted mask does not start at bit 0 or the sum of its size
@@ -831,7 +594,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
                      DAG.getConstant(SMSize, MVT::i32));
 }
 
-static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget *Subtarget) {
   // Pattern match INS.
@@ -850,7 +613,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) ||
-      !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
+      !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0))
     return SDValue();
 
   // See if Op's second operand matches (and (shl $src, pos), mask1).
@@ -858,7 +621,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
-      !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
+      !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
     return SDValue();
 
   // The shift masks must have the same position and size.
@@ -885,7 +648,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
                      DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
 }
 
-static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const MipsSubtarget *Subtarget) {
   // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
@@ -915,25 +678,21 @@ static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
 SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   const {
   SelectionDAG &DAG = DCI.DAG;
-  unsigned opc = N->getOpcode();
+  unsigned Opc = N->getOpcode();
 
-  switch (opc) {
+  switch (Opc) {
   default: break;
-  case ISD::ADDE:
-    return PerformADDECombine(N, DAG, DCI, Subtarget);
-  case ISD::SUBE:
-    return PerformSUBECombine(N, DAG, DCI, Subtarget);
   case ISD::SDIVREM:
   case ISD::UDIVREM:
-    return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+    return performDivRemCombine(N, DAG, DCI, Subtarget);
   case ISD::SELECT:
-    return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+    return performSELECTCombine(N, DAG, DCI, Subtarget);
   case ISD::AND:
-    return PerformANDCombine(N, DAG, DCI, Subtarget);
+    return performANDCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
-    return PerformORCombine(N, DAG, DCI, Subtarget);
+    return performORCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:
-    return PerformADDCombine(N, DAG, DCI, Subtarget);
+    return performADDCombine(N, DAG, DCI, Subtarget);
   }
 
   return SDValue();
@@ -964,30 +723,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   switch (Op.getOpcode())
   {
-    case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
-    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
-    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
-    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
-    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
-    case ISD::SELECT:             return LowerSELECT(Op, DAG);
-    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
-    case ISD::SETCC:              return LowerSETCC(Op, DAG);
-    case ISD::VASTART:            return LowerVASTART(Op, DAG);
-    case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
-    case ISD::FABS:               return LowerFABS(Op, DAG);
-    case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
-    case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
-    case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
-    case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
-    case ISD::SHL_PARTS:          return LowerShiftLeftParts(Op, DAG);
-    case ISD::SRA_PARTS:          return LowerShiftRightParts(Op, DAG, true);
-    case ISD::SRL_PARTS:          return LowerShiftRightParts(Op, DAG, false);
-    case ISD::LOAD:               return LowerLOAD(Op, DAG);
-    case ISD::STORE:              return LowerSTORE(Op, DAG);
-    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
-    case ISD::INTRINSIC_W_CHAIN:  return LowerINTRINSIC_W_CHAIN(Op, DAG);
-    case ISD::ADD:                return LowerADD(Op, DAG);
+  case ISD::BR_JT:              return lowerBR_JT(Op, DAG);
+  case ISD::BRCOND:             return lowerBRCOND(Op, DAG);
+  case ISD::ConstantPool:       return lowerConstantPool(Op, DAG);
+  case ISD::GlobalAddress:      return lowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:       return lowerBlockAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   return lowerGlobalTLSAddress(Op, DAG);
+  case ISD::JumpTable:          return lowerJumpTable(Op, DAG);
+  case ISD::SELECT:             return lowerSELECT(Op, DAG);
+  case ISD::SELECT_CC:          return lowerSELECT_CC(Op, DAG);
+  case ISD::SETCC:              return lowerSETCC(Op, DAG);
+  case ISD::VASTART:            return lowerVASTART(Op, DAG);
+  case ISD::FCOPYSIGN:          return lowerFCOPYSIGN(Op, DAG);
+  case ISD::FABS:               return lowerFABS(Op, DAG);
+  case ISD::FRAMEADDR:          return lowerFRAMEADDR(Op, DAG);
+  case ISD::RETURNADDR:         return lowerRETURNADDR(Op, DAG);
+  case ISD::EH_RETURN:          return lowerEH_RETURN(Op, DAG);
+  case ISD::MEMBARRIER:         return lowerMEMBARRIER(Op, DAG);
+  case ISD::ATOMIC_FENCE:       return lowerATOMIC_FENCE(Op, DAG);
+  case ISD::SHL_PARTS:          return lowerShiftLeftParts(Op, DAG);
+  case ISD::SRA_PARTS:          return lowerShiftRightParts(Op, DAG, true);
+  case ISD::SRL_PARTS:          return lowerShiftRightParts(Op, DAG, false);
+  case ISD::LOAD:               return lowerLOAD(Op, DAG);
+  case ISD::STORE:              return lowerSTORE(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
+  case ISD::ADD:                return lowerADD(Op, DAG);
   }
   return SDValue();
 }
@@ -996,287 +757,133 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 //  Lower helper functions
 //===----------------------------------------------------------------------===//
 
-// AddLiveIn - This helper function adds the specified physical register to the
+// addLiveIn - This helper function adds the specified physical register to the
 // MachineFunction as a live in value.  It also creates a corresponding
 // virtual register for it.
 static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
+addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
 {
-  assert(RC->contains(PReg) && "Not the correct regclass!");
   unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
   MF.getRegInfo().addLiveIn(PReg, VReg);
   return VReg;
 }
 
-// Get fp branch code (not opcode) from condition code.
-static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
-  if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
-    return Mips::BRANCH_T;
-
-  assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
-         "Invalid CondCode.");
-
-  return Mips::BRANCH_F;
-}
-
-/*
-static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
-                                        DebugLoc dl,
-                                        const MipsSubtarget *Subtarget,
-                                        const TargetInstrInfo *TII,
-                                        bool isFPCmp, unsigned Opc) {
-  // There is no need to expand CMov instructions if target has
-  // conditional moves.
-  if (Subtarget->hasCondMov())
-    return BB;
-
-  // To "insert" a SELECT_CC instruction, we actually have to insert the
-  // diamond control-flow pattern.  The incoming instruction knows the
-  // destination vreg to set, the condition code register to branch on, the
-  // true/false values to select between, and a branch opcode to use.
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
-
-  //  thisMBB:
-  //  ...
-  //   TrueVal = ...
-  //   setcc r1, r2, r3
-  //   bNE   r1, r0, copy1MBB
-  //   fallthrough --> copy0MBB
-  MachineBasicBlock *thisMBB  = BB;
-  MachineFunction *F = BB->getParent();
-  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
-  F->insert(It, copy0MBB);
-  F->insert(It, sinkMBB);
-
-  // Transfer the remainder of BB and its successor edges to sinkMBB.
-  sinkMBB->splice(sinkMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
-  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  // Next, add the true and fallthrough blocks as its successors.
-  BB->addSuccessor(copy0MBB);
-  BB->addSuccessor(sinkMBB);
-
-  // Emit the right instruction according to the type of the operands compared
-  if (isFPCmp)
-    BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
-  else
-    BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
-      .addReg(Mips::ZERO).addMBB(sinkMBB);
-
-  //  copy0MBB:
-  //   %FalseValue = ...
-  //   # fallthrough to sinkMBB
-  BB = copy0MBB;
-
-  // Update machine-CFG edges
-  BB->addSuccessor(sinkMBB);
-
-  //  sinkMBB:
-  //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
-  //  ...
-  BB = sinkMBB;
-
-  if (isFPCmp)
-    BuildMI(*BB, BB->begin(), dl,
-            TII->get(Mips::PHI), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
-      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
-  else
-    BuildMI(*BB, BB->begin(), dl,
-            TII->get(Mips::PHI), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
-      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
-
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
-  return BB;
-}
-*/
-
-MachineBasicBlock *
-MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
-  // $bb:
-  //  bposge32_pseudo $vr0
-  //  =>
-  // $bb:
-  //  bposge32 $tbb
-  // $fbb:
-  //  li $vr2, 0
-  //  b $sink
-  // $tbb:
-  //  li $vr1, 1
-  // $sink:
-  //  $vr0 = phi($vr2, $fbb, $vr1, $tbb)
-
-  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
-  DebugLoc DL = MI->getDebugLoc();
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
-  MachineFunction *F = BB->getParent();
-  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
-  F->insert(It, FBB);
-  F->insert(It, TBB);
-  F->insert(It, Sink);
-
-  // Transfer the remainder of BB and its successor edges to Sink.
-  Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
-               BB->end());
-  Sink->transferSuccessorsAndUpdatePHIs(BB);
-
-  // Add successors.
-  BB->addSuccessor(FBB);
-  BB->addSuccessor(TBB);
-  FBB->addSuccessor(Sink);
-  TBB->addSuccessor(Sink);
-
-  // Insert the real bposge32 instruction to $BB.
-  BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
-
-  // Fill $FBB.
-  unsigned VR2 = RegInfo.createVirtualRegister(RC);
-  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
-    .addReg(Mips::ZERO).addImm(0);
-  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
-
-  // Fill $TBB.
-  unsigned VR1 = RegInfo.createVirtualRegister(RC);
-  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
-    .addReg(Mips::ZERO).addImm(1);
-
-  // Insert phi function to $Sink.
-  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
-          MI->getOperand(0).getReg())
-    .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
-
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
-  return Sink;
-}
-
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
-  default: llvm_unreachable("Unexpected instr type to insert");
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
   case Mips::ATOMIC_LOAD_ADD_I8:
   case Mips::ATOMIC_LOAD_ADD_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
+    return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
   case Mips::ATOMIC_LOAD_ADD_I16:
   case Mips::ATOMIC_LOAD_ADD_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
+    return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
   case Mips::ATOMIC_LOAD_ADD_I32:
   case Mips::ATOMIC_LOAD_ADD_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+    return emitAtomicBinary(MI, BB, 4, Mips::ADDu);
   case Mips::ATOMIC_LOAD_ADD_I64:
   case Mips::ATOMIC_LOAD_ADD_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, Mips::DADDu);
+    return emitAtomicBinary(MI, BB, 8, Mips::DADDu);
 
   case Mips::ATOMIC_LOAD_AND_I8:
   case Mips::ATOMIC_LOAD_AND_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
+    return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
   case Mips::ATOMIC_LOAD_AND_I16:
   case Mips::ATOMIC_LOAD_AND_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
+    return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
   case Mips::ATOMIC_LOAD_AND_I32:
   case Mips::ATOMIC_LOAD_AND_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+    return emitAtomicBinary(MI, BB, 4, Mips::AND);
   case Mips::ATOMIC_LOAD_AND_I64:
   case Mips::ATOMIC_LOAD_AND_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, Mips::AND64);
+    return emitAtomicBinary(MI, BB, 8, Mips::AND64);
 
   case Mips::ATOMIC_LOAD_OR_I8:
   case Mips::ATOMIC_LOAD_OR_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
+    return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
   case Mips::ATOMIC_LOAD_OR_I16:
   case Mips::ATOMIC_LOAD_OR_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
+    return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
   case Mips::ATOMIC_LOAD_OR_I32:
   case Mips::ATOMIC_LOAD_OR_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+    return emitAtomicBinary(MI, BB, 4, Mips::OR);
   case Mips::ATOMIC_LOAD_OR_I64:
   case Mips::ATOMIC_LOAD_OR_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, Mips::OR64);
+    return emitAtomicBinary(MI, BB, 8, Mips::OR64);
 
   case Mips::ATOMIC_LOAD_XOR_I8:
   case Mips::ATOMIC_LOAD_XOR_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
+    return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
   case Mips::ATOMIC_LOAD_XOR_I16:
   case Mips::ATOMIC_LOAD_XOR_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
+    return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
   case Mips::ATOMIC_LOAD_XOR_I32:
   case Mips::ATOMIC_LOAD_XOR_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+    return emitAtomicBinary(MI, BB, 4, Mips::XOR);
   case Mips::ATOMIC_LOAD_XOR_I64:
   case Mips::ATOMIC_LOAD_XOR_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, Mips::XOR64);
+    return emitAtomicBinary(MI, BB, 8, Mips::XOR64);
 
   case Mips::ATOMIC_LOAD_NAND_I8:
   case Mips::ATOMIC_LOAD_NAND_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
+    return emitAtomicBinaryPartword(MI, BB, 1, 0, true);
   case Mips::ATOMIC_LOAD_NAND_I16:
   case Mips::ATOMIC_LOAD_NAND_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
+    return emitAtomicBinaryPartword(MI, BB, 2, 0, true);
   case Mips::ATOMIC_LOAD_NAND_I32:
   case Mips::ATOMIC_LOAD_NAND_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, 0, true);
+    return emitAtomicBinary(MI, BB, 4, 0, true);
   case Mips::ATOMIC_LOAD_NAND_I64:
   case Mips::ATOMIC_LOAD_NAND_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, 0, true);
+    return emitAtomicBinary(MI, BB, 8, 0, true);
 
   case Mips::ATOMIC_LOAD_SUB_I8:
   case Mips::ATOMIC_LOAD_SUB_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
+    return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
   case Mips::ATOMIC_LOAD_SUB_I16:
   case Mips::ATOMIC_LOAD_SUB_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
+    return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
   case Mips::ATOMIC_LOAD_SUB_I32:
   case Mips::ATOMIC_LOAD_SUB_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+    return emitAtomicBinary(MI, BB, 4, Mips::SUBu);
   case Mips::ATOMIC_LOAD_SUB_I64:
   case Mips::ATOMIC_LOAD_SUB_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu);
+    return emitAtomicBinary(MI, BB, 8, Mips::DSUBu);
 
   case Mips::ATOMIC_SWAP_I8:
   case Mips::ATOMIC_SWAP_I8_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 1, 0);
+    return emitAtomicBinaryPartword(MI, BB, 1, 0);
   case Mips::ATOMIC_SWAP_I16:
   case Mips::ATOMIC_SWAP_I16_P8:
-    return EmitAtomicBinaryPartword(MI, BB, 2, 0);
+    return emitAtomicBinaryPartword(MI, BB, 2, 0);
   case Mips::ATOMIC_SWAP_I32:
   case Mips::ATOMIC_SWAP_I32_P8:
-    return EmitAtomicBinary(MI, BB, 4, 0);
+    return emitAtomicBinary(MI, BB, 4, 0);
   case Mips::ATOMIC_SWAP_I64:
   case Mips::ATOMIC_SWAP_I64_P8:
-    return EmitAtomicBinary(MI, BB, 8, 0);
+    return emitAtomicBinary(MI, BB, 8, 0);
 
   case Mips::ATOMIC_CMP_SWAP_I8:
   case Mips::ATOMIC_CMP_SWAP_I8_P8:
-    return EmitAtomicCmpSwapPartword(MI, BB, 1);
+    return emitAtomicCmpSwapPartword(MI, BB, 1);
   case Mips::ATOMIC_CMP_SWAP_I16:
   case Mips::ATOMIC_CMP_SWAP_I16_P8:
-    return EmitAtomicCmpSwapPartword(MI, BB, 2);
+    return emitAtomicCmpSwapPartword(MI, BB, 2);
   case Mips::ATOMIC_CMP_SWAP_I32:
   case Mips::ATOMIC_CMP_SWAP_I32_P8:
-    return EmitAtomicCmpSwap(MI, BB, 4);
+    return emitAtomicCmpSwap(MI, BB, 4);
   case Mips::ATOMIC_CMP_SWAP_I64:
   case Mips::ATOMIC_CMP_SWAP_I64_P8:
-    return EmitAtomicCmpSwap(MI, BB, 8);
-  case Mips::BPOSGE32_PSEUDO:
-    return EmitBPOSGE32(MI, BB);
+    return emitAtomicCmpSwap(MI, BB, 8);
   }
 }
 
 // This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
 // Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
 MachineBasicBlock *
-MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                                      unsigned Size, unsigned BinOpcode,
                                      bool Nand) const {
   assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary.");
@@ -1285,7 +892,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc DL = MI->getDebugLoc();
   unsigned LL, SC, AND, NOR, ZERO, BEQ;
 
   if (Size == 4) {
@@ -1341,20 +948,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //    sc success, storeval, 0(ptr)
   //    beq success, $0, loopMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
+  BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
   if (Nand) {
     //  and andres, oldval, incr
     //  nor storeval, $0, andres
-    BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
-    BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
+    BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
+    BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
   } else if (BinOpcode) {
     //  <binop> storeval, oldval, incr
-    BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
+    BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
   } else {
     StoreVal = Incr;
   }
-  BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
-  BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
+  BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
+  BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
 
   MI->eraseFromParent();   // The instruction is gone now.
 
@@ -1362,7 +969,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 }
 
 MachineBasicBlock *
-MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
+MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
                                              MachineBasicBlock *BB,
                                              unsigned Size, unsigned BinOpcode,
                                              bool Nand) const {
@@ -1373,7 +980,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc DL = MI->getDebugLoc();
   unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
   unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
 
@@ -1432,18 +1039,18 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   //    sll     incr2,incr,shiftamt
 
   int64_t MaskImm = (Size == 1) ? 255 : 65535;
-  BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+  BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
     .addReg(Mips::ZERO).addImm(-4);
-  BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+  BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
     .addReg(Ptr).addReg(MaskLSB2);
-  BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
-  BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
-  BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+  BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+  BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+  BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
     .addReg(Mips::ZERO).addImm(MaskImm);
-  BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+  BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
     .addReg(ShiftAmt).addReg(MaskUpper);
-  BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
-  BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
+  BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+  BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
 
   // atomic.load.binop
   // loopMBB:
@@ -1465,32 +1072,32 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   //   beq     success,$0,loopMBB
 
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+  BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
   if (Nand) {
     //  and andres, oldval, incr2
     //  nor binopres, $0, andres
     //  and newval, binopres, mask
-    BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
-    BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes)
+    BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2);
+    BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes)
       .addReg(Mips::ZERO).addReg(AndRes);
-    BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+    BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
   } else if (BinOpcode) {
     //  <binop> binopres, oldval, incr2
     //  and newval, binopres, mask
-    BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
-    BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
+    BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
+    BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
   } else {// atomic.swap
     //  and newval, incr2, mask
-    BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
+    BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
   }
 
-  BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
     .addReg(OldVal).addReg(Mask2);
-  BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+  BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
     .addReg(MaskedOldVal0).addReg(NewVal);
-  BuildMI(BB, dl, TII->get(SC), Success)
+  BuildMI(BB, DL, TII->get(SC), Success)
     .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
-  BuildMI(BB, dl, TII->get(Mips::BEQ))
+  BuildMI(BB, DL, TII->get(Mips::BEQ))
     .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
 
   //  sinkMBB:
@@ -1501,13 +1108,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   BB = sinkMBB;
   int64_t ShiftImm = (Size == 1) ? 24 : 16;
 
-  BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
     .addReg(OldVal).addReg(Mask);
-  BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+  BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
       .addReg(ShiftAmt).addReg(MaskedOldVal1);
-  BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+  BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
       .addReg(SrlRes).addImm(ShiftImm);
-  BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+  BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
       .addReg(SllRes).addImm(ShiftImm);
 
   MI->eraseFromParent();   // The instruction is gone now.
@@ -1516,7 +1123,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
 }
 
 MachineBasicBlock *
-MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
                                       MachineBasicBlock *BB,
                                       unsigned Size) const {
   assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
@@ -1525,7 +1132,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc DL = MI->getDebugLoc();
   unsigned LL, SC, ZERO, BNE, BEQ;
 
   if (Size == 4) {
@@ -1579,17 +1186,17 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   //   ll dest, 0(ptr)
   //   bne dest, oldval, exitMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0);
-  BuildMI(BB, dl, TII->get(BNE))
+  BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+  BuildMI(BB, DL, TII->get(BNE))
     .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
 
   // loop2MBB:
   //   sc success, newval, 0(ptr)
   //   beq success, $0, loop1MBB
   BB = loop2MBB;
-  BuildMI(BB, dl, TII->get(SC), Success)
+  BuildMI(BB, DL, TII->get(SC), Success)
     .addReg(NewVal).addReg(Ptr).addImm(0);
-  BuildMI(BB, dl, TII->get(BEQ))
+  BuildMI(BB, DL, TII->get(BEQ))
     .addReg(Success).addReg(ZERO).addMBB(loop1MBB);
 
   MI->eraseFromParent();   // The instruction is gone now.
@@ -1598,7 +1205,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
 }
 
 MachineBasicBlock *
-MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
+MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
                                               MachineBasicBlock *BB,
                                               unsigned Size) const {
   assert((Size == 1 || Size == 2) &&
@@ -1608,7 +1215,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
   const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc DL = MI->getDebugLoc();
   unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
   unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
 
@@ -1675,24 +1282,24 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   //    andi    maskednewval,newval,255
   //    sll     shiftednewval,maskednewval,shiftamt
   int64_t MaskImm = (Size == 1) ? 255 : 65535;
-  BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2)
+  BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2)
     .addReg(Mips::ZERO).addImm(-4);
-  BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr)
+  BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr)
     .addReg(Ptr).addReg(MaskLSB2);
-  BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
-  BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
-  BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper)
+  BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3);
+  BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3);
+  BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper)
     .addReg(Mips::ZERO).addImm(MaskImm);
-  BuildMI(BB, dl, TII->get(Mips::SLLV), Mask)
+  BuildMI(BB, DL, TII->get(Mips::SLLV), Mask)
     .addReg(ShiftAmt).addReg(MaskUpper);
-  BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
-  BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal)
+  BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
+  BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal)
     .addReg(CmpVal).addImm(MaskImm);
-  BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal)
+  BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal)
     .addReg(ShiftAmt).addReg(MaskedCmpVal);
-  BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal)
+  BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal)
     .addReg(NewVal).addImm(MaskImm);
-  BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal)
+  BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
     .addReg(ShiftAmt).addReg(MaskedNewVal);
 
   //  loop1MBB:
@@ -1700,10 +1307,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   //    and     maskedoldval0,oldval,mask
   //    bne     maskedoldval0,shiftedcmpval,sinkMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
-  BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
+  BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
     .addReg(OldVal).addReg(Mask);
-  BuildMI(BB, dl, TII->get(Mips::BNE))
+  BuildMI(BB, DL, TII->get(Mips::BNE))
     .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
 
   //  loop2MBB:
@@ -1712,13 +1319,13 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   //    sc      success,storeval,0(alignedaddr)
   //    beq     success,$0,loop1MBB
   BB = loop2MBB;
-  BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1)
+  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
     .addReg(OldVal).addReg(Mask2);
-  BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
+  BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
     .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
-  BuildMI(BB, dl, TII->get(SC), Success)
+  BuildMI(BB, DL, TII->get(SC), Success)
       .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
-  BuildMI(BB, dl, TII->get(Mips::BEQ))
+  BuildMI(BB, DL, TII->get(Mips::BEQ))
       .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
 
   //  sinkMBB:
@@ -1728,11 +1335,11 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   BB = sinkMBB;
   int64_t ShiftImm = (Size == 1) ? 24 : 16;
 
-  BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes)
+  BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
       .addReg(ShiftAmt).addReg(MaskedOldVal0);
-  BuildMI(BB, dl, TII->get(Mips::SLL), SllRes)
+  BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
       .addReg(SrlRes).addImm(ShiftImm);
-  BuildMI(BB, dl, TII->get(Mips::SRA), Dest)
+  BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
       .addReg(SllRes).addImm(ShiftImm);
 
   MI->eraseFromParent();   // The instruction is gone now.
@@ -1743,16 +1350,46 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 //  Misc Lower Operation implementation
 //===----------------------------------------------------------------------===//
+SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+  EVT PTy = getPointerTy();
+  unsigned EntrySize =
+    DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
+
+  Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
+                      DAG.getConstant(EntrySize, PTy));
+  SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
+
+  EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+  Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+                        MachinePointerInfo::getJumpTable(), MemVT, false, false,
+                        0);
+  Chain = Addr.getValue(1);
+
+  if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || IsN64) {
+    // For PIC, the sequence is:
+    // BRIND(load(Jumptable + index) + RelocBase)
+    // RelocBase can be JumpTable, GOT or some sort of global base.
+    Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr,
+                       getPICJumpTableRelocBase(Table, DAG));
+  }
+
+  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
+}
+
 SDValue MipsTargetLowering::
-LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 {
   // The first operand is the chain, the second is the condition, the third is
   // the block to branch to if the condition is true.
   SDValue Chain = Op.getOperand(0);
   SDValue Dest = Op.getOperand(2);
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
 
-  SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+  SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
 
   // Return if flag is not set by a floating point comparison.
   if (CondRes.getOpcode() != MipsISD::FPCmp)
@@ -1761,27 +1398,27 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
   SDValue CCNode  = CondRes.getOperand(2);
   Mips::CondCode CC =
     (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
-  SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
-
-  return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+  unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T;
+  SDValue BrCode = DAG.getConstant(Opc, MVT::i32);
+  return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode,
                      Dest, CondRes);
 }
 
 SDValue MipsTargetLowering::
-LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+lowerSELECT(SDValue Op, SelectionDAG &DAG) const
 {
-  SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
+  SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
 
   // Return if flag is not set by a floating point comparison.
   if (Cond.getOpcode() != MipsISD::FPCmp)
     return Op;
 
-  return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+  return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
                       Op.getDebugLoc());
 }
 
 SDValue MipsTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 {
   DebugLoc DL = Op.getDebugLoc();
   EVT Ty = Op.getOperand(0).getValueType();
@@ -1793,8 +1430,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
                      Op.getOperand(3));
 }
 
-SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Cond = CreateFPCmp(DAG, Op);
+SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = createFPCmp(DAG, Op);
 
   assert(Cond.getOpcode() == MipsISD::FPCmp &&
          "Floating point operand expected.");
@@ -1802,13 +1439,13 @@ SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue True  = DAG.getConstant(1, MVT::i32);
   SDValue False = DAG.getConstant(0, MVT::i32);
 
-  return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+  return createCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
 }
 
-SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
@@ -1817,12 +1454,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
 
     // %gp_rel relocation
     if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
-      SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+      SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
                                               MipsII::MO_GPREL);
-      SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl,
+      SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
                                       DAG.getVTList(MVT::i32), &GA, 1);
       SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
-      return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode);
+      return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
     }
 
     // %hi/%lo relocation
@@ -1840,7 +1477,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
                        HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
 }
 
-SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
     return getAddrNonPIC(Op, DAG);
@@ -1849,14 +1486,14 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
 }
 
 SDValue MipsTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
   // If the relocation model is PIC, use the General Dynamic TLS Model or
   // Local Dynamic TLS model, otherwise use the Initial Exec or
   // Local Exec TLS Model.
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-  DebugLoc dl = GA->getDebugLoc();
+  DebugLoc DL = GA->getDebugLoc();
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy();
 
@@ -1867,9 +1504,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
     unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
                                                       : MipsII::MO_TLSGD;
 
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
-    SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT,
-                                   GetGlobalReg(DAG, PtrVT), TGA);
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag);
+    SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT,
+                                   getGlobalReg(DAG, PtrVT), TGA);
     unsigned PtrSize = PtrVT.getSizeInBits();
     IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
 
@@ -1883,9 +1520,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 
     TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
                   false, false, false, false, 0, CallingConv::C,
-                  /*isTailCall=*/false, /*doesNotRet=*/false,
+                  /*IsTailCall=*/false, /*doesNotRet=*/false,
                   /*isReturnValueUsed=*/true,
-                  TlsGetAddr, Args, DAG, dl);
+                  TlsGetAddr, Args, DAG, DL);
     std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
 
     SDValue Ret = CallResult.first;
@@ -1893,44 +1530,44 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
     if (model != TLSModel::LocalDynamic)
       return Ret;
 
-    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                                MipsII::MO_DTPREL_HI);
-    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
-    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+    SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                                MipsII::MO_DTPREL_LO);
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
-    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
-    return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+    SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret);
+    return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo);
   }
 
   SDValue Offset;
   if (model == TLSModel::InitialExec) {
     // Initial Exec TLS Model
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                              MipsII::MO_GOTTPREL);
-    TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+    TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT),
                       TGA);
-    Offset = DAG.getLoad(PtrVT, dl,
+    Offset = DAG.getLoad(PtrVT, DL,
                          DAG.getEntryNode(), TGA, MachinePointerInfo(),
                          false, false, false, 0);
   } else {
     // Local Exec TLS Model
     assert(model == TLSModel::LocalExec);
-    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                                MipsII::MO_TPREL_HI);
-    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                                MipsII::MO_TPREL_LO);
-    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
-    Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo);
+    Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
   }
 
-  SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
-  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+  SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
+  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset);
 }
 
 SDValue MipsTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 {
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
     return getAddrNonPIC(Op, DAG);
@@ -1939,7 +1576,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 }
 
 SDValue MipsTargetLowering::
-LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
 {
   // gp_rel relocation
   // FIXME: we should reference the constant pool using small data sections,
@@ -1957,22 +1594,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
   return getAddrLocal(Op, DAG, HasMips64);
 }
 
-SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
 
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
                                  getPointerTy());
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
 }
 
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
   EVT TyX = Op.getOperand(0).getValueType();
   EVT TyY = Op.getOperand(1).getValueType();
   SDValue Const1 = DAG.getConstant(1, MVT::i32);
@@ -2017,7 +1654,7 @@ static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
   return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
 }
 
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
   unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
   unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
   EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
@@ -2066,14 +1703,14 @@ static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
 }
 
 SDValue
-MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   if (Subtarget->hasMips64())
-    return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
+    return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
 
-  return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+  return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
 }
 
-static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
   SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
   DebugLoc DL = Op.getDebugLoc();
 
@@ -2102,7 +1739,7 @@ static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
   return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
 }
 
-static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
   SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
   DebugLoc DL = Op.getDebugLoc();
 
@@ -2123,15 +1760,15 @@ static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
 }
 
 SDValue
-MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
   if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
-    return LowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+    return lowerFABS64(Op, DAG, Subtarget->hasMips32r2());
 
-  return LowerFABS32(Op, DAG, Subtarget->hasMips32r2());
+  return lowerFABS32(Op, DAG, Subtarget->hasMips32r2());
 }
 
 SDValue MipsTargetLowering::
-LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   // check the depth
   assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
          "Frame address can only be determined for current frame.");
@@ -2139,13 +1776,13 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
                                          IsN64 ? Mips::FP_64 : Mips::FP, VT);
   return FrameAddr;
 }
 
-SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
+SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
                                             SelectionDAG &DAG) const {
   // check the depth
   assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
@@ -2153,7 +1790,7 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getSimpleValueType();
   unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA;
   MFI->setReturnAddressIsTaken(true);
 
@@ -2162,26 +1799,54 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
   return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT);
 }
 
+// An EH_RETURN is the result of lowering llvm.eh.return which in turn is
+// generated from __builtin_eh_return (offset, handler)
+// The effect of this is to adjust the stack pointer by "offset"
+// and then branch to "handler".
+SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+                                                                     const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  MipsFI->setCallsEhReturn();
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc DL       = Op.getDebugLoc();
+  EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+
+  // Store stack offset in V1, store jump target in V0. Glue CopyToReg and
+  // EH_RETURN nodes, so that instructions are emitted back-to-back.
+  unsigned OffsetReg = IsN64 ? Mips::V1_64 : Mips::V1;
+  unsigned AddrReg = IsN64 ? Mips::V0_64 : Mips::V0;
+  Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
+  Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
+  return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
+                     DAG.getRegister(OffsetReg, Ty),
+                     DAG.getRegister(AddrReg, getPointerTy()),
+                     Chain.getValue(1));
+}
+
 // TODO: set SType according to the desired memory barrier behavior.
 SDValue
-MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
+MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
   unsigned SType = 0;
-  DebugLoc dl = Op.getDebugLoc();
-  return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
                      DAG.getConstant(SType, MVT::i32));
 }
 
-SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
                                               SelectionDAG &DAG) const {
   // FIXME: Need pseudo-fence for 'singlethread' fences
   // FIXME: Set SType for weaker fences where supported/appropriate.
   unsigned SType = 0;
-  DebugLoc dl = Op.getDebugLoc();
-  return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
                      DAG.getConstant(SType, MVT::i32));
 }
 
-SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
+SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
                                                 SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
   SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2212,7 +1877,7 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
                                                  bool IsSRA) const {
   DebugLoc DL = Op.getDebugLoc();
   SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2271,7 +1936,7 @@ static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
 }
 
 // Expand an unaligned 32 or 64-bit integer load node.
-SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   EVT MemVT = LD->getMemoryVT();
 
@@ -2349,7 +2014,7 @@ static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
 }
 
 // Expand an unaligned 32 or 64-bit integer store node.
-SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   StoreSDNode *SD = cast<StoreSDNode>(Op);
   EVT MemVT = SD->getMemoryVT();
 
@@ -2385,6 +2050,22 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
 }
 
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(0, MVT::i32));
+  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_lo, MVT::i32));
+  SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_hi, MVT::i32));
+  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
 // This function expands mips intrinsic nodes which have 64-bit input operands
 // or output values.
 //
@@ -2397,140 +2078,143 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 // v1 = copy hi
 // out64 = merge-values (v0, v1)
 //
-static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG,
-                            unsigned Opc, bool HasI64In, bool HasI64Out) {
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
   DebugLoc DL = Op.getDebugLoc();
   bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
-  SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode();
   SmallVector<SDValue, 3> Ops;
+  unsigned OpNo = 0;
 
-  if (HasI64In) {
-    SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
-                               Op->getOperand(1 + HasChainIn),
-                               DAG.getConstant(0, MVT::i32));
-    SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
-                               Op->getOperand(1 + HasChainIn),
-                               DAG.getConstant(1, MVT::i32));
+  // See if Op has a chain input.
+  if (HasChainIn)
+    Ops.push_back(Op->getOperand(OpNo++));
 
-    Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue());
-    Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1));
+  // The next operand is the intrinsic opcode.
+  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
 
-    Ops.push_back(Chain);
-    Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end());
-    Ops.push_back(Chain.getValue(1));
-  } else {
-    Ops.push_back(Chain);
-    Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end());
-  }
+  // See if the next operand has type i64.
+  SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+  if (Opnd.getValueType() == MVT::i64)
+    In64 = initAccumulator(Opnd, DL, DAG);
+  else
+    Ops.push_back(Opnd);
 
-  if (!HasI64Out)
-    return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(),
-                       Ops.begin(), Ops.size());
+  // Push the remaining operands.
+  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+    Ops.push_back(Op->getOperand(OpNo));
 
-  SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue),
-                             Ops.begin(), Ops.size());
-  SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32,
-                                     Intr.getValue(1));
-  SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32,
-                                     OutLo.getValue(2));
-  SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi);
+  // Add In64 to the end of the list.
+  if (In64.getNode())
+    Ops.push_back(In64);
+
+  // Scan output.
+  SmallVector<EVT, 2> ResTys;
+
+  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+       I != E; ++I)
+    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+  // Create node.
+  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
 
   if (!HasChainIn)
     return Out;
 
-  SDValue Vals[] = { Out, OutHi.getValue(1) };
+  assert(Val->getValueType(1) == MVT::Other);
+  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
   return DAG.getMergeValues(Vals, 2, DL);
 }
 
-SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const {
   switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
   default:
     return SDValue();
   case Intrinsic::mips_shilo:
-    return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
   case Intrinsic::mips_dpau_h_qbl:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
   case Intrinsic::mips_dpau_h_qbr:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
   case Intrinsic::mips_dpsu_h_qbl:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
   case Intrinsic::mips_dpsu_h_qbr:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
   case Intrinsic::mips_dpa_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
   case Intrinsic::mips_dps_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
   case Intrinsic::mips_dpax_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
   case Intrinsic::mips_dpsx_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
   case Intrinsic::mips_mulsa_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
   case Intrinsic::mips_mult:
-    return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
   case Intrinsic::mips_multu:
-    return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
   case Intrinsic::mips_madd:
-    return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
   case Intrinsic::mips_maddu:
-    return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
   case Intrinsic::mips_msub:
-    return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
   case Intrinsic::mips_msubu:
-    return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
   }
 }
 
-SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
                                                    SelectionDAG &DAG) const {
   switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
   default:
     return SDValue();
   case Intrinsic::mips_extp:
-    return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false);
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
   case Intrinsic::mips_extpdp:
-    return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false);
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
   case Intrinsic::mips_extr_w:
-    return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false);
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
   case Intrinsic::mips_extr_r_w:
-    return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false);
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
   case Intrinsic::mips_extr_rs_w:
-    return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false);
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
   case Intrinsic::mips_extr_s_h:
-    return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false);
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
   case Intrinsic::mips_mthlip:
-    return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
   case Intrinsic::mips_mulsaq_s_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
   case Intrinsic::mips_maq_s_w_phl:
-    return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
   case Intrinsic::mips_maq_s_w_phr:
-    return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
   case Intrinsic::mips_maq_sa_w_phl:
-    return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
   case Intrinsic::mips_maq_sa_w_phr:
-    return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
   case Intrinsic::mips_dpaq_s_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
   case Intrinsic::mips_dpsq_s_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
   case Intrinsic::mips_dpaq_sa_l_w:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
   case Intrinsic::mips_dpsq_sa_l_w:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
   case Intrinsic::mips_dpaqx_s_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
   case Intrinsic::mips_dpaqx_sa_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
   case Intrinsic::mips_dpsqx_s_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
   case Intrinsic::mips_dpsqx_sa_w_ph:
-    return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true);
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
   }
 }
 
-SDValue MipsTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
+SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
   if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR
       || cast<ConstantSDNode>
         (Op->getOperand(0).getOperand(0))->getZExtValue() != 0
@@ -2667,28 +2351,6 @@ static unsigned getNextIntArgReg(unsigned Reg) {
   return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
 }
 
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization.
-bool MipsTargetLowering::
-IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
-                                  unsigned NextStackOffset,
-                                  const MipsFunctionInfo& FI) const {
-  if (!EnableMipsTailCalls)
-    return false;
-
-  // No tail call optimization for mips16.
-  if (Subtarget->inMips16Mode())
-    return false;
-
-  // Return false if either the callee or caller has a byval argument.
-  if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
-    return false;
-
-  // Return true if the callee's argument area is no larger than the
-  // caller's.
-  return NextStackOffset <= FI.getIncomingArgSize();
-}
-
 SDValue
 MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
                                    SDValue Chain, SDValue Arg, DebugLoc DL,
@@ -2707,21 +2369,65 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
                       /*isVolatile=*/ true, false, 0);
 }
 
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+            std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+            bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+            CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+  // Insert node "GP copy globalreg" before call to function.
+  //
+  // R_MIPS_CALL* operators (emitted when non-internal functions are called
+  // in PIC mode) allow symbols to be resolved via lazy binding.
+  // The lazy binding stub requires GP to point to the GOT.
+  if (IsPICCall && !InternalLinkage) {
+    unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+    EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+    RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emitted instructions must be
+  // stuck together.
+  SDValue InFlag;
+
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+                                 RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+                                      RegsToPass[i].second.getValueType()));
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+}
+
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 SDValue
 MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+  DebugLoc &DL                          = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
   SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
   SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
   SDValue Chain                         = CLI.Chain;
   SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
+  bool &IsTailCall                      = CLI.IsTailCall;
   CallingConv::ID CallConv              = CLI.CallConv;
-  bool isVarArg                         = CLI.IsVarArg;
+  bool IsVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2730,22 +2436,24 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  getTargetMachine(), ArgLocs, *DAG.getContext());
-  MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo);
+  MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
 
-  MipsCCInfo.analyzeCallOperands(Outs);
+  MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
+                                 getTargetMachine().Options.UseSoftFloat,
+                                 Callee.getNode(), CLI.Args);
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NextStackOffset = CCInfo.getNextStackOffset();
 
   // Check if it's really possible to do a tail call.
-  if (isTailCall)
-    isTailCall =
-      IsEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
+  if (IsTailCall)
+    IsTailCall =
+      isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
                                         *MF.getInfo<MipsFunctionInfo>());
 
-  if (isTailCall)
+  if (IsTailCall)
     ++NumTailCalls;
 
   // Chain is the output chain of the last Load/Store or CopyToReg node.
@@ -2755,15 +2463,15 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
   SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
 
-  if (!isTailCall)
+  if (!IsTailCall)
     Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
 
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL,
                                         IsN64 ? Mips::SP_64 : Mips::SP,
                                         getPointerTy());
 
   // With EABI is it possible to have 16 args on registers.
-  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+  std::deque< std::pair<unsigned, SDValue> > RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
   MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
 
@@ -2779,9 +2487,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       assert(Flags.getByValSize() &&
              "ByVal args of size 0 should have been ignored by front-end.");
       assert(ByValArg != MipsCCInfo.byval_end());
-      assert(!isTailCall &&
+      assert(!IsTailCall &&
              "Do not tail-call optimize if there is a byval argument.");
-      passByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
+      passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg,
                    MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle());
       ++ByValArg;
       continue;
@@ -2793,12 +2501,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     case CCValAssign::Full:
       if (VA.isRegLoc()) {
         if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
-            (ValVT == MVT::f64 && LocVT == MVT::i64))
-          Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg);
+            (ValVT == MVT::f64 && LocVT == MVT::i64) ||
+            (ValVT == MVT::i64 && LocVT == MVT::f64))
+          Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
         else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
-          SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+          SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
                                    Arg, DAG.getConstant(0, MVT::i32));
-          SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+          SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
                                    Arg, DAG.getConstant(1, MVT::i32));
           if (!Subtarget->isLittle())
             std::swap(Lo, Hi);
@@ -2811,13 +2520,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
       break;
     case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg);
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
       break;
     case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg);
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
       break;
     case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg);
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
       break;
     }
 
@@ -2834,25 +2543,27 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // emit ISD::STORE whichs stores the
     // parameter value to a stack Location
     MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(),
-                                         Chain, Arg, dl, isTailCall, DAG));
+                                         Chain, Arg, DL, IsTailCall, DAG));
   }
 
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
   if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
   bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
-  bool GlobalOrExternal = false;
+  bool GlobalOrExternal = false, InternalLinkage = false;
   SDValue CalleeLo;
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     if (IsPICCall) {
-      if (G->getGlobal()->hasInternalLinkage())
+      InternalLinkage = G->getGlobal()->hasInternalLinkage();
+
+      if (InternalLinkage)
         Callee = getAddrLocal(Callee, DAG, HasMips64);
       else if (LargeGOT)
         Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
@@ -2860,7 +2571,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       else
         Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
     } else
-      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0,
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
                                           MipsII::MO_NO_FLAG);
     GlobalOrExternal = true;
   }
@@ -2871,84 +2582,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     else if (LargeGOT)
       Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
                                      MipsII::MO_CALL_LO16);
-    else if (HasMips64)
-      Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_DISP);
-    else // O32 & PIC
+    else // N64 || PIC
       Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
 
     GlobalOrExternal = true;
   }
 
-  SDValue InFlag;
-
-  // T9 register operand.
-  SDValue T9;
-
-  // T9 should contain the address of the callee function if
-  // -reloction-model=pic or it is an indirect call.
-  if (IsPICCall || !GlobalOrExternal) {
-    // copy to T9
-    unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
-    Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
-    InFlag = Chain.getValue(1);
-
-    if (Subtarget->inMips16Mode())
-      T9 = DAG.getRegister(T9Reg, getPointerTy());
-    else
-      Callee = DAG.getRegister(T9Reg, getPointerTy());
-  }
-
-  // Insert node "GP copy globalreg" before call to function.
-  // Lazy-binding stubs require GP to point to the GOT.
-  if (IsPICCall) {
-    unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
-    EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
-    RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty)));
-  }
-
-  // Build a sequence of copy-to-reg nodes chained together with token
-  // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emitted instructions must be
-  // stuck together.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // MipsJmpLink = #chain, #target_address, #opt_in_flags...
-  //             = Chain, Callee, Reg#1, Reg#2, ...
-  //
-  // Returns a chain & a flag for retval copy to use.
+  SmallVector<SDValue, 8> Ops(1, Chain);
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SmallVector<SDValue, 8> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
 
-  // Add argument registers to the end of the list so that they are
-  // known live into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
+  getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+              CLI, Callee, Chain);
 
-  // Add T9 register operand.
-  if (T9.getNode())
-    Ops.push_back(T9);
+  if (IsTailCall)
+    return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
 
-  // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
-  assert(Mask && "Missing call preserved mask for calling convention");
-  Ops.push_back(DAG.getRegisterMask(Mask));
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-
-  if (isTailCall)
-    return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size());
-
-  Chain  = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
+  Chain  = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
+  SDValue InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
@@ -2957,31 +2607,40 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
-                         Ins, dl, DAG, InVals);
+  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
+                         Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy);
 }
 
 /// LowerCallResult - Lower the result values of a call into the
 /// appropriate copies out of appropriate physical registers.
 SDValue
 MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
-                                    CallingConv::ID CallConv, bool isVarArg,
+                                    CallingConv::ID CallConv, bool IsVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
-                                    SmallVectorImpl<SDValue> &InVals) const {
+                                    DebugLoc DL, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals,
+                                    const SDNode *CallNode,
+                                    const Type *RetTy) const {
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  getTargetMachine(), RVLocs, *DAG.getContext());
+  MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
 
-  CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+  MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat,
+                               CallNode, RetTy);
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
-                               RVLocs[i].getValVT(), InFlag).getValue(1);
-    InFlag = Chain.getValue(2);
-    InVals.push_back(Chain.getValue(0));
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
+                                     RVLocs[i].getLocVT(), InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+
+    if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+      Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+
+    InVals.push_back(Val);
   }
 
   return Chain;
@@ -2995,9 +2654,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 SDValue
 MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                                          CallingConv::ID CallConv,
-                                         bool isVarArg,
+                                         bool IsVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
-                                         DebugLoc dl, SelectionDAG &DAG,
+                                         DebugLoc DL, SelectionDAG &DAG,
                                          SmallVectorImpl<SDValue> &InVals)
                                           const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -3011,16 +2670,17 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  getTargetMachine(), ArgLocs, *DAG.getContext());
-  MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo);
+  MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+  Function::const_arg_iterator FuncArg =
+    DAG.getMachineFunction().getFunction()->arg_begin();
+  bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat;
 
-  MipsCCInfo.analyzeFormalArguments(Ins);
+  MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
   MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
                            MipsCCInfo.hasByValArg());
 
-  Function::const_arg_iterator FuncArg =
-    DAG.getMachineFunction().getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
   MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
 
@@ -3036,7 +2696,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       assert(Flags.getByValSize() &&
              "ByVal args of size 0 should have been ignored by front-end.");
       assert(ByValArg != MipsCCInfo.byval_end());
-      copyByValRegs(Chain, dl, OutChains, DAG, Flags, InVals, &*FuncArg,
+      copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg,
                     MipsCCInfo, *ByValArg);
       ++ByValArg;
       continue;
@@ -3049,7 +2709,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       const TargetRegisterClass *RC;
 
       if (RegVT == MVT::i32)
-        RC = &Mips::CPURegsRegClass;
+        RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass :
+                                        &Mips::CPURegsRegClass;
       else if (RegVT == MVT::i64)
         RC = &Mips::CPU64RegsRegClass;
       else if (RegVT == MVT::f32)
@@ -3061,8 +2722,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Transform the arguments stored on
       // physical registers into virtual ones
-      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it has been passed promoted
       // to 32 bits.  Insert an assert[sz]ext to capture this, then
@@ -3074,22 +2735,24 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
         else if (VA.getLocInfo() == CCValAssign::ZExt)
           Opcode = ISD::AssertZext;
         if (Opcode)
-          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+          ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue,
                                  DAG.getValueType(ValVT));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
       }
 
-      // Handle floating point arguments passed in integer registers.
+      // Handle floating point arguments passed in integer registers and
+      // long double arguments passed in floating point registers.
       if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
-          (RegVT == MVT::i64 && ValVT == MVT::f64))
-        ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue);
+          (RegVT == MVT::i64 && ValVT == MVT::f64) ||
+          (RegVT == MVT::f64 && ValVT == MVT::i64))
+        ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
       else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) {
-        unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+        unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
                                   getNextIntArgReg(ArgReg), RC);
-        SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+        SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
         if (!Subtarget->isLittle())
           std::swap(ArgValue, ArgValue2);
-        ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+        ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64,
                                ArgValue, ArgValue2);
       }
 
@@ -3105,7 +2768,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN,
+      InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN,
                                    MachinePointerInfo::getFixedStack(FI),
                                    false, false, false, 0));
     }
@@ -3121,18 +2784,18 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
         createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32));
       MipsFI->setSRetReturnReg(Reg);
     }
-    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
   }
 
-  if (isVarArg)
-    writeVarArgRegs(OutChains, MipsCCInfo, Chain, dl, DAG);
+  if (IsVarArg)
+    writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG);
 
   // All stores are grouped in one node to allow the matching between
   // the size of Ins and InVals. This only happens when on varg functions
   if (!OutChains.empty()) {
     OutChains.push_back(Chain);
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                         &OutChains[0], OutChains.size());
   }
 
@@ -3145,80 +2808,80 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
 bool
 MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
-                                   MachineFunction &MF, bool isVarArg,
+                                   MachineFunction &MF, bool IsVarArg,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                                    LLVMContext &Context) const {
   SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+  CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(),
                  RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC_Mips);
 }
 
 SDValue
 MipsTargetLowering::LowerReturn(SDValue Chain,
-                                CallingConv::ID CallConv, bool isVarArg,
+                                CallingConv::ID CallConv, bool IsVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<SDValue> &OutVals,
-                                DebugLoc dl, SelectionDAG &DAG) const {
-
+                                DebugLoc DL, SelectionDAG &DAG) const {
   // CCValAssign - represent the assignment of
   // the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
+  MachineFunction &MF = DAG.getMachineFunction();
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), RVLocs, *DAG.getContext());
+  CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
+  MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
 
-  // Analize return values.
-  CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
-
-  // If this is the first return lowered for this function, add
-  // the regs to the liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
+  // Analyze return values.
+  MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat,
+                           MF.getFunction()->getReturnType());
 
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    SDValue Val = OutVals[i];
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+    if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
+      Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
 
-    // guarantee that all emitted copies are
-    // stuck together, avoiding something bad
+    // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
   // The mips ABIs for returning structs by value requires that we copy
   // the sret argument into $v0 for the return. We saved the argument into
   // a virtual register in the entry block, so now we copy the value out
   // and into $v0.
-  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
-    MachineFunction &MF      = DAG.getMachineFunction();
+  if (MF.getFunction()->hasStructRetAttr()) {
     MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
     unsigned Reg = MipsFI->getSRetReturnReg();
 
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
     unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0;
 
-    Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag);
+    Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
     Flag = Chain.getValue(1);
-    MF.getRegInfo().addLiveOut(V0);
+    RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
   }
 
-  // Return on Mips is always a "jr $ra"
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag);
+    RetOps.push_back(Flag);
 
-  // Return Void
-  return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain);
+  // Return on Mips is always a "jr $ra"
+  return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size());
 }
 
 //===----------------------------------------------------------------------===//
@@ -3251,6 +2914,8 @@ getConstraintType(const std::string &Constraint) const
       case 'l':
       case 'x':
         return C_RegisterClass;
+      case 'R':
+        return C_Memory;
     }
   }
   return TargetLowering::getConstraintType(Constraint);
@@ -3299,6 +2964,9 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
     if (isa<ConstantInt>(CallOperandVal))
       weight = CW_Constant;
     break;
+  case 'R':
+    weight = CW_Memory;
+    break;
   }
   return weight;
 }
@@ -3448,13 +3116,34 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 }
 
 bool
+MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  switch (AM.Scale) {
+  case 0: // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (!AM.HasBaseReg) // allow "r+i".
+      break;
+    return false; // disallow "r+r" or "r+r+i".
+  default:
+    return false;
+  }
+
+  return true;
+}
+
+bool
 MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Mips target isn't yet aware of offsets.
   return false;
 }
 
 EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                            unsigned SrcAlign, bool IsZeroVal,
+                                            unsigned SrcAlign,
+                                            bool IsMemset, bool ZeroMemset,
                                             bool MemcpyStrSrc,
                                             MachineFunction &MF) const {
   if (Subtarget->hasMips64())
@@ -3478,40 +3167,62 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const {
   return TargetLowering::getJumpTableEncoding();
 }
 
-MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg,
-                                   bool IsO32, CCState &Info) : CCInfo(Info) {
-  UseRegsForByval = true;
+/// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall(const char *CallSym) {
+  const char *const LibCalls[] =
+    {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2",
+     "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi",
+     "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf",
+     "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2",
+     "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3",
+     "__trunctfdf2", "__trunctfsf2", "__unordtf2",
+     "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl",
+     "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
+     "truncl"};
 
-  if (IsO32) {
-    RegSize = 4;
-    NumIntArgRegs = array_lengthof(O32IntRegs);
-    ReservedArgArea = 16;
-    IntArgRegs = ShadowRegs = O32IntRegs;
-    FixedFn = VarFn = CC_MipsO32;
-  } else {
-    RegSize = 8;
-    NumIntArgRegs = array_lengthof(Mips64IntRegs);
-    ReservedArgArea = 0;
-    IntArgRegs = Mips64IntRegs;
-    ShadowRegs = Mips64DPRegs;
-    FixedFn = CC_MipsN;
-    VarFn = CC_MipsN_VarArg;
-  }
+  const char * const *End = LibCalls + array_lengthof(LibCalls);
 
-  if (CallConv == CallingConv::Fast) {
-    assert(!IsVarArg);
-    UseRegsForByval = false;
-    ReservedArgArea = 0;
-    FixedFn = VarFn = CC_Mips_FastCC;
-  }
+  // Check that LibCalls is sorted alphabetically.
+  MipsTargetLowering::LTStr Comp;
+
+#ifndef NDEBUG
+  for (const char * const *I = LibCalls; I < End - 1; ++I)
+    assert(Comp(*I, *(I + 1)));
+#endif
+
+  return std::binary_search(LibCalls, End, CallSym, Comp);
+}
+
+/// This function returns true if Ty is fp128 or i128 which was originally a
+/// fp128.
+static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+  if (Ty->isFP128Ty())
+    return true;
 
+  const ExternalSymbolSDNode *ES =
+    dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+  // If the Ty is i128 and the function being called is a long double emulation
+  // routine, then the original type is f128.
+  return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+}
+
+MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_,
+                                   CCState &Info)
+  : CCInfo(Info), CallConv(CC), IsO32(IsO32_) {
   // Pre-allocate reserved argument area.
-  CCInfo.AllocateStack(ReservedArgArea, 1);
+  CCInfo.AllocateStack(reservedArgArea(), 1);
 }
 
 void MipsTargetLowering::MipsCC::
-analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
+analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
+                    bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode,
+                    std::vector<ArgListEntry> &FuncArgs) {
+  assert((CallConv != CallingConv::Fast || !IsVarArg) &&
+         "CallingConv::Fast shouldn't be used for vararg functions.");
+
   unsigned NumOpnds = Args.size();
+  llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn();
 
   for (unsigned I = 0; I != NumOpnds; ++I) {
     MVT ArgVT = Args[I].VT;
@@ -3523,10 +3234,13 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
       continue;
     }
 
-    if (Args[I].IsFixed)
-      R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
-    else
+    if (IsVarArg && !Args[I].IsFixed)
       R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+    else {
+      MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode,
+                           IsSoftFloat);
+      R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo);
+    }
 
     if (R) {
 #ifndef NDEBUG
@@ -3539,19 +3253,26 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
 }
 
 void MipsTargetLowering::MipsCC::
-analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
+analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
+                       bool IsSoftFloat, Function::const_arg_iterator FuncArg) {
   unsigned NumArgs = Args.size();
+  llvm::CCAssignFn *FixedFn = fixedArgFn();
+  unsigned CurArgIdx = 0;
 
   for (unsigned I = 0; I != NumArgs; ++I) {
     MVT ArgVT = Args[I].VT;
     ISD::ArgFlagsTy ArgFlags = Args[I].Flags;
+    std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx);
+    CurArgIdx = Args[I].OrigArgIndex;
 
     if (ArgFlags.isByVal()) {
       handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags);
       continue;
     }
 
-    if (!FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo))
+    MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat);
+
+    if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
       continue;
 
 #ifndef NDEBUG
@@ -3562,6 +3283,44 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
   }
 }
 
+template<typename Ty>
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+              const SDNode *CallNode, const Type *RetTy) const {
+  CCAssignFn *Fn;
+
+  if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode))
+    Fn = RetCC_F128Soft;
+  else
+    Fn = RetCC_Mips;
+
+  for (unsigned I = 0, E = RetVals.size(); I < E; ++I) {
+    MVT VT = RetVals[I].VT;
+    ISD::ArgFlagsTy Flags = RetVals[I].Flags;
+    MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat);
+
+    if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) {
+#ifndef NDEBUG
+      dbgs() << "Call result #" << I << " has unhandled type "
+             << EVT(VT).getEVTString() << '\n';
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
+                  const SDNode *CallNode, const Type *RetTy) const {
+  analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy);
+}
+
+void MipsTargetLowering::MipsCC::
+analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
+              const Type *RetTy) const {
+  analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
+}
+
 void
 MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
                                            MVT LocVT,
@@ -3570,11 +3329,12 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
   assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
 
   struct ByValArgInfo ByVal;
+  unsigned RegSize = regSize();
   unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize);
   unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize),
                             RegSize * 2);
 
-  if (UseRegsForByval)
+  if (useRegsForByval())
     allocateRegs(ByVal, ByValSize, Align);
 
   // Allocate space on caller's stack.
@@ -3585,9 +3345,38 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
   ByValArgs.push_back(ByVal);
 }
 
+unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const {
+  return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs);
+}
+
+unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
+  return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const {
+  return IsO32 ? O32IntRegs : Mips64IntRegs;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
+  if (CallConv == CallingConv::Fast)
+    return CC_Mips_FastCC;
+
+  return IsO32 ? CC_MipsO32 : CC_MipsN;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
+  return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
+  return IsO32 ? O32IntRegs : Mips64DPRegs;
+}
+
 void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
                                               unsigned ByValSize,
                                               unsigned Align) {
+  unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
+  const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
   assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
          "Byval argument's size and alignment should be a multiple of"
          "RegSize.");
@@ -3606,6 +3395,21 @@ void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
     CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]);
 }
 
+MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy,
+                                         const SDNode *CallNode,
+                                         bool IsSoftFloat) const {
+  if (IsSoftFloat || IsO32)
+    return VT;
+
+  // Check if the original type was fp128.
+  if (originalTypeIsF128(OrigTy, CallNode)) {
+    assert(VT == MVT::i64);
+    return MVT::f64;
+  }
+
+  return VT;
+}
+
 void MipsTargetLowering::
 copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
               SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags,
@@ -3633,12 +3437,12 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
     return;
 
   // Copy arg registers.
-  EVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
+  MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8);
   const TargetRegisterClass *RC = getRegClassFor(RegTy);
 
   for (unsigned I = 0; I < ByVal.NumRegs; ++I) {
     unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I];
-    unsigned VReg = AddLiveIn(MF, ArgReg, RC);
+    unsigned VReg = addLiveIn(MF, ArgReg, RC);
     unsigned Offset = I * CC.regSize();
     SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN,
                                    DAG.getConstant(Offset, PtrTy));
@@ -3652,7 +3456,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
 // Copy byVal arg to registers and stack.
 void MipsTargetLowering::
 passByValArg(SDValue Chain, DebugLoc DL,
-             SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+             std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
              SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
              MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
              const MipsCC &CC, const ByValArgInfo &ByVal,
@@ -3755,7 +3559,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
   const CCState &CCInfo = CC.getCCInfo();
   unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
   unsigned RegSize = CC.regSize();
-  EVT RegTy = MVT::getIntegerVT(RegSize * 8);
+  MVT RegTy = MVT::getIntegerVT(RegSize * 8);
   const TargetRegisterClass *RC = getRegClassFor(RegTy);
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3780,7 +3584,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
   // in the caller's stack frame, while for N32/64, it is allocated in the
   // callee's stack frame.
   for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) {
-    unsigned Reg = AddLiveIn(MF, ArgRegs[I], RC);
+    unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
     SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
     FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true);
     SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 43f97e89a7bf..cab71a61e07a 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -19,7 +19,10 @@
 #include "MipsSubtarget.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetLowering.h"
+#include <deque>
+#include <string>
 
 namespace llvm {
   namespace MipsISD {
@@ -63,6 +66,18 @@ namespace llvm {
       // Return
       Ret,
 
+      EH_RETURN,
+
+      // Node used to extract integer from accumulator.
+      ExtractLOHI,
+
+      // Node used to insert integers to accumulator.
+      InsertLOHI,
+
+      // Mult nodes.
+      Mult,
+      Multu,
+
       // MAdd/Sub nodes
       MAdd,
       MAddu,
@@ -72,6 +87,8 @@ namespace llvm {
       // DivRem(u)
       DivRem,
       DivRemU,
+      DivRem16,
+      DivRemU16,
 
       BuildPairF64,
       ExtractElementF64,
@@ -147,9 +164,9 @@ namespace llvm {
   public:
     explicit MipsTargetLowering(MipsTargetMachine &TM);
 
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+    static const MipsTargetLowering *create(MipsTargetMachine &TM);
 
-    virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     virtual void LowerOperationWrapper(SDNode *N,
                                        SmallVectorImpl<SDValue> &Results,
@@ -172,7 +189,34 @@ namespace llvm {
     EVT getSetCCResultType(EVT VT) const;
 
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-  private:
+
+    virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    struct LTStr {
+      bool operator()(const char *S1, const char *S2) const {
+        return strcmp(S1, S2) < 0;
+      }
+    };
+
+  protected:
+    SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
+
+    SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
+
+    SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
+
+    SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+                                  unsigned HiFlag, unsigned LoFlag) const;
+
+    /// This function fills Ops, which is the list of operands that will later
+    /// be used when a function call node is created. It also generates
+    /// copyToReg nodes to set up argument registers.
+    virtual void
+    getOpndList(SmallVectorImpl<SDValue> &Ops,
+                std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+                bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+                CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
 
     /// ByValArgInfo - Byval argument information.
     struct ByValArgInfo {
@@ -187,53 +231,80 @@ namespace llvm {
     /// arguments and inquire about calling convention information.
     class MipsCC {
     public:
-      MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32,
-             CCState &Info);
+      MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info);
 
-      void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs);
-      void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins);
-      void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
-                          CCValAssign::LocInfo LocInfo,
-                          ISD::ArgFlagsTy ArgFlags);
+      void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               bool IsVarArg, bool IsSoftFloat,
+                               const SDNode *CallNode,
+                               std::vector<ArgListEntry> &FuncArgs);
+      void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  bool IsSoftFloat,
+                                  Function::const_arg_iterator FuncArg);
+
+      void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                             bool IsSoftFloat, const SDNode *CallNode,
+                             const Type *RetTy) const;
+
+      void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                         bool IsSoftFloat, const Type *RetTy) const;
 
       const CCState &getCCInfo() const { return CCInfo; }
 
       /// hasByValArg - Returns true if function has byval arguments.
       bool hasByValArg() const { return !ByValArgs.empty(); }
 
-      /// useRegsForByval - Returns true if the calling convention allows the
-      /// use of registers to pass byval arguments.
-      bool useRegsForByval() const { return UseRegsForByval; }
-
       /// regSize - Size (in number of bits) of integer registers.
-      unsigned regSize() const { return RegSize; }
+      unsigned regSize() const { return IsO32 ? 4 : 8; }
 
       /// numIntArgRegs - Number of integer registers available for calls.
-      unsigned numIntArgRegs() const { return NumIntArgRegs; }
+      unsigned numIntArgRegs() const;
 
       /// reservedArgArea - The size of the area the caller reserves for
       /// register arguments. This is 16-byte if ABI is O32.
-      unsigned reservedArgArea() const { return ReservedArgArea; }
+      unsigned reservedArgArea() const;
 
-      /// intArgRegs - Pointer to array of integer registers.
-      const uint16_t *intArgRegs() const { return IntArgRegs; }
+      /// Return pointer to array of integer argument registers.
+      const uint16_t *intArgRegs() const;
 
       typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator;
       byval_iterator byval_begin() const { return ByValArgs.begin(); }
       byval_iterator byval_end() const { return ByValArgs.end(); }
 
     private:
+      void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags);
+
+      /// useRegsForByval - Returns true if the calling convention allows the
+      /// use of registers to pass byval arguments.
+      bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
+
+      /// Return the function that analyzes fixed argument list functions.
+      llvm::CCAssignFn *fixedArgFn() const;
+
+      /// Return the function that analyzes variable argument list functions.
+      llvm::CCAssignFn *varArgFn() const;
+
+      const uint16_t *shadowRegs() const;
+
       void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
                         unsigned Align);
 
+      /// Return the type of the register which is used to pass an argument or
+      /// return a value. This function returns f64 if the argument is an i64
+      /// value which has been generated as a result of softening an f128 value.
+      /// Otherwise, it just returns VT.
+      MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode,
+                   bool IsSoftFloat) const;
+
+      template<typename Ty>
+      void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
+                         const SDNode *CallNode, const Type *RetTy) const;
+
       CCState &CCInfo;
-      bool UseRegsForByval;
-      unsigned RegSize;
-      unsigned NumIntArgRegs;
-      unsigned ReservedArgArea;
-      const uint16_t *IntArgRegs, *ShadowRegs;
+      CallingConv::ID CallConv;
+      bool IsO32;
       SmallVector<ByValArgInfo, 2> ByValArgs;
-      llvm::CCAssignFn *FixedFn, *VarFn;
     };
 
     // Subtarget Info
@@ -241,44 +312,49 @@ namespace llvm {
 
     bool HasMips64, IsN64, IsO32;
 
+  private:
     // Lower Operand helpers
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
+                            SmallVectorImpl<SDValue> &InVals,
+                            const SDNode *CallNode, const Type *RetTy) const;
 
     // Lower Operand specifics
-    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
-    SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
-    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
-    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+    SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+    SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+    SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
+    SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
                                  bool IsSRA) const;
-    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
 
-    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// isEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization.
-    bool IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
-                                           unsigned NextStackOffset,
-                                           const MipsFunctionInfo& FI) const;
+    virtual bool
+    isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+                                      unsigned NextStackOffset,
+                                      const MipsFunctionInfo& FI) const = 0;
 
     /// copyByValArg - Copy argument registers which were used to pass a byval
     /// argument to the stack. Create a stack frame object for the byval
@@ -292,7 +368,7 @@ namespace llvm {
 
     /// passByValArg - Pass a byval argument in registers or on stack.
     void passByValArg(SDValue Chain, DebugLoc DL,
-                      SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+                      std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
                       SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
                       MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
                       const MipsCC &CC, const ByValArgInfo &ByVal,
@@ -332,10 +408,6 @@ namespace llvm {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual MachineBasicBlock *
-      EmitInstrWithCustomInserter(MachineInstr *MI,
-                                  MachineBasicBlock *MBB) const;
-
     // Inline asm support
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
@@ -357,10 +429,13 @@ namespace llvm {
                                               std::vector<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
 
+    virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
-                                    unsigned SrcAlign, bool IsZeroVal,
+                                    unsigned SrcAlign,
+                                    bool IsMemset, bool ZeroMemset,
                                     bool MemcpyStrSrc,
                                     MachineFunction &MF) const;
 
@@ -371,18 +446,20 @@ namespace llvm {
 
     virtual unsigned getJumpTableEncoding() const;
 
-    MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI,
-                                    MachineBasicBlock *BB) const;
-    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+    MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                     unsigned Size, unsigned BinOpcode, bool Nand = false) const;
-    MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
+    MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
                     MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
                     bool Nand = false) const;
-    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+    MachineBasicBlock *emitAtomicCmpSwap(MachineInstr *MI,
                                   MachineBasicBlock *BB, unsigned Size) const;
-    MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
+    MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
                                   MachineBasicBlock *BB, unsigned Size) const;
   };
+
+  /// Create MipsTargetLowering objects.
+  const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+  const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
 }
 
 #endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 33ee02068946..6b23057c9cdb 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -86,272 +86,320 @@ def fpimm0neg : PatLeaf<(fpimm), [{
 // Only S32 and D32 are supported right now.
 //===----------------------------------------------------------------------===//
 
-// FP load.
-let DecoderMethod = "DecodeFMem" in {
-class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
-  FMem<op, (outs RC:$ft), (ins MemOpnd:$addr),
-      !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load addr:$addr))],
-      IILoad>;
-
-// FP store.
-class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
-  FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr),
-      !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)],
-      IIStore>;
-}
-// FP indexed load.
-class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
-                RegisterClass PRC, SDPatternOperator FOp = null_frag>:
-  FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
-           !strconcat(opstr, "\t$fd, ${index}(${base})"),
-           [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
-  let fs = 0;
-}
-
-// FP indexed store.
-class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
-                 RegisterClass PRC, SDPatternOperator FOp= null_frag>:
-  FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
-           !strconcat(opstr, "\t$fs, ${index}(${base})"),
-           [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
-  let fd = 0;
-}
-
-// Instructions that convert an FP value to 32-bit fixed point.
-multiclass FFR1_W_M<bits<6> funct, string opstr> {
-  def _S   : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
-  def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
-             Requires<[NotFP64bit, HasStandardEncoding]>;
-  def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
-             Requires<[IsFP64bit, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
+class ADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, bit IsComm,
+              SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft),
+         !strconcat(opstr, "\t$fd, $fs, $ft"),
+         [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR> {
+  let isCommutable = IsComm;
+}
+
+multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
+                  SDPatternOperator OpNode = null_frag> {
+  def _D32 : ADDS_FT<opstr, AFGR64, Itin, IsComm, OpNode>,
+             Requires<[NotFP64bit, HasStdEnc]>;
+  def _D64 : ADDS_FT<opstr, FGR64, Itin, IsComm, OpNode>,
+             Requires<[IsFP64bit, HasStdEnc]> {
+    string DecoderNamespace = "Mips64";
   }
 }
 
-// Instructions that convert an FP value to 64-bit fixed point.
-let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in
-multiclass FFR1_L_M<bits<6> funct, string opstr> {
-  def _S   : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
-  def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
-}
+class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+              InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"),
+         [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>,
+  NeverHasSideEffects;
 
-// FP-to-FP conversion instructions.
-multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
-  def _S   : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
-  def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
-             Requires<[NotFP64bit, HasStandardEncoding]>;
-  def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
-             Requires<[IsFP64bit, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
+multiclass ABSS_M<string opstr, InstrItinClass Itin,
+                  SDPatternOperator OpNode= null_frag> {
+  def _D32 : ABSS_FT<opstr, AFGR64, AFGR64, Itin, OpNode>,
+             Requires<[NotFP64bit, HasStdEnc]>;
+  def _D64 : ABSS_FT<opstr, FGR64, FGR64, Itin, OpNode>,
+             Requires<[IsFP64bit, HasStdEnc]> {
+    string DecoderNamespace = "Mips64";
   }
 }
 
-multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
-  let isCommutable = isComm in {
-  def _S   : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
-  def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
-             Requires<[NotFP64bit, HasStandardEncoding]>;
-  def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
-             Requires<[IsFP64bit, HasStandardEncoding]> {
+multiclass ROUND_M<string opstr, InstrItinClass Itin> {
+  def _D32 : ABSS_FT<opstr, FGR32, AFGR64, Itin>,
+             Requires<[NotFP64bit, HasStdEnc]>;
+  def _D64 : ABSS_FT<opstr, FGR32, FGR64, Itin>,
+             Requires<[IsFP64bit, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
-}
 
-// FP madd/msub/nmadd/nmsub instruction classes.
-class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
-               SDNode OpNode, RegisterClass RC> :
-  FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
-            !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
-            [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>;
-
-class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
-                SDNode OpNode, RegisterClass RC> :
-  FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
-            !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
-            [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>;
+class MFC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+              InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+         [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
+              InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+         [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC,
+              InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+         [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC,
+              InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+         [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
+class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+            Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
+  let DecoderMethod = "DecodeFMem";
+}
+
+class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+            Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
+  InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
+  let DecoderMethod = "DecodeFMem";
+}
+
+class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+               SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+         !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+         [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>;
+
+class NMADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
+                SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+         !strconcat(opstr, "\t$fd, $fr, $fs, $ft"),
+         [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
+         Itin, FrmFR>;
+
+class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+               InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
+         !strconcat(opstr, "\t$fd, ${index}(${base})"),
+         [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> {
+  let AddedComplexity = 20;
+}
+
+class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
+               InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+         !strconcat(opstr, "\t$fs, ${index}(${base})"),
+         [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> {
+  let AddedComplexity = 20;
+}
+
+class BC1F_FT<string opstr, InstrItinClass Itin,
+              SDPatternOperator Op = null_frag>  :
+  InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+         [(MipsFPBrcond Op, bb:$offset)], Itin, FrmFI> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 1;
+  let Defs = [AT];
+  let Uses = [FCR31];
+}
+
+class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin,
+              SDPatternOperator OpNode = null_frag>  :
+  InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond),
+         !strconcat("c.$cond.", typestr, "\t$fs, $ft"),
+         [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR> {
+  let Defs = [FCR31];
+}
 
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
 //===----------------------------------------------------------------------===//
-defm ROUND_W : FFR1_W_M<0xc, "round">;
-defm ROUND_L : FFR1_L_M<0x8, "round">;
-defm TRUNC_W : FFR1_W_M<0xd, "trunc">;
-defm TRUNC_L : FFR1_L_M<0x9, "trunc">;
-defm CEIL_W  : FFR1_W_M<0xe, "ceil">;
-defm CEIL_L  : FFR1_L_M<0xa, "ceil">;
-defm FLOOR_W : FFR1_W_M<0xf, "floor">;
-defm FLOOR_L : FFR1_L_M<0xb, "floor">;
-defm CVT_W   : FFR1_W_M<0x24, "cvt">, NeverHasSideEffects;
-//defm CVT_L   : FFR1_L_M<0x25, "cvt">;
-
-def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects;
-def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects;
-def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects;
-
-let Predicates = [NotFP64bit, HasStandardEncoding], neverHasSideEffects = 1 in {
-  def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
-  def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
-  def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
-}
-
-let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64",
-    neverHasSideEffects = 1 in {
- def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
- def CVT_S_L   : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
- def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>;
- def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>;
- def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
-}
-
-let Predicates = [NoNaNsFPMath, HasStandardEncoding] in {
-  defm FABS    : FFR1P_M<0x5, "abs",  fabs>;
-  defm FNEG    : FFR1P_M<0x7, "neg",  fneg>;
-}
-defm FSQRT   : FFR1P_M<0x4, "sqrt", fsqrt>;
+def ROUND_W_S  : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>;
+def TRUNC_W_S  : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>;
+def CEIL_W_S   : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>;
+def FLOOR_W_S  : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>;
+def CVT_W_S    : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>;
+
+defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
+defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>;
+defm CEIL_W  : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>;
+defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
+defm CVT_W   : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>;
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+  def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>;
+  def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64, FGR64, IIFcvt>,
+                    ABSS_FM<0x8, 17>;
+  def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x9, 16>;
+  def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64, FGR64, IIFcvt>,
+                    ABSS_FM<0x9, 17>;
+  def CEIL_L_S  : ABSS_FT<"ceil.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xa, 16>;
+  def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0xa, 17>;
+  def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xb, 16>;
+  def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64, FGR64, IIFcvt>,
+                    ABSS_FM<0xb, 17>;
+}
+
+def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>;
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>;
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>;
+
+let Predicates = [NotFP64bit, HasStdEnc] in {
+  def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+  def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+  def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+}
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>;
+ def CVT_S_L   : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>;
+ def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
+ def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
+ def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64, FGR64, IIFcvt>, ABSS_FM<0x21, 21>;
+}
+
+let Predicates = [NoNaNsFPMath, HasStdEnc] in {
+  def FABS_S : ABSS_FT<"abs.s", FGR32, FGR32, IIFcvt, fabs>, ABSS_FM<0x5, 16>;
+  def FNEG_S : ABSS_FT<"neg.s", FGR32, FGR32, IIFcvt, fneg>, ABSS_FM<0x7, 16>;
+  defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>;
+  defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>;
+}
+
+def  FSQRT_S : ABSS_FT<"sqrt.s", FGR32, FGR32, IIFsqrtSingle, fsqrt>,
+               ABSS_FM<0x4, 16>;
+defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
 
 // The odd-numbered registers are only referenced when doing loads,
 // stores, and moves between floating-point and integer registers.
 // When defining instructions, we reference all 32-bit registers,
 // regardless of register aliasing.
 
-class FFRGPR<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern>:
-             FFR<0x11, 0x0, _fmt, outs, ins, asmstr, pattern> {
-  bits<5> rt;
-  let ft = rt;
-  let fd = 0;
-}
-
 /// Move Control Registers From/To CPU Registers
-def CFC1  : FFRGPR<0x2, (outs CPURegs:$rt), (ins CCR:$fs),
-                  "cfc1\t$rt, $fs", []>;
-
-def CTC1  : FFRGPR<0x6, (outs CCR:$fs), (ins CPURegs:$rt),
-                  "ctc1\t$rt, $fs", []>;
-
-def MFC1  : FFRGPR<0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
-                  "mfc1\t$rt, $fs",
-                  [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>;
-
-def MTC1  : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
-                  "mtc1\t$rt, $fs",
-                  [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
-
-def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs),
-                  "dmfc1\t$rt, $fs",
-                  [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>;
-
-def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
-                  "dmtc1\t$rt, $fs",
-                  [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>;
-
-def FMOV_S   : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
-def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
-               Requires<[NotFP64bit, HasStandardEncoding]>;
-def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
-               Requires<[IsFP64bit, HasStandardEncoding]> {
+def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>;
+def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>;
+def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>;
+def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>;
+def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>;
+def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmove, bitconvert>, MFC1_FM<5>;
+
+def FMOV_S   : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>;
+def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>,
+               Requires<[NotFP64bit, HasStdEnc]>;
+def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>,
+               Requires<[IsFP64bit, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 /// Floating Point Memory Instructions
-let Predicates = [IsN64, HasStandardEncoding], DecoderNamespace = "Mips64" in {
-  def LWC1_P8   : FPLoad<0x31, "lwc1", FGR32, mem64>;
-  def SWC1_P8   : FPStore<0x39, "swc1", FGR32, mem64>;
-  def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> {
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+  def LWC1_P8 : LW_FT<"lwc1", FGR32, IILoad, mem64, load>, LW_FM<0x31>;
+  def SWC1_P8 : SW_FT<"swc1", FGR32, IIStore, mem64, store>, LW_FM<0x39>;
+  def LDC164_P8 : LW_FT<"ldc1", FGR64, IILoad, mem64, load>, LW_FM<0x35> {
     let isCodeGenOnly =1;
   }
-  def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64> {
+  def SDC164_P8 : SW_FT<"sdc1", FGR64, IIStore, mem64, store>, LW_FM<0x3d> {
     let isCodeGenOnly =1;
   }
 }
 
-let Predicates = [NotN64, HasStandardEncoding] in {
-  def LWC1   : FPLoad<0x31, "lwc1", FGR32, mem>;
-  def SWC1   : FPStore<0x39, "swc1", FGR32, mem>;
+let Predicates = [NotN64, HasStdEnc] in {
+  def LWC1 : LW_FT<"lwc1", FGR32, IILoad, mem, load>, LW_FM<0x31>;
+  def SWC1 : SW_FT<"swc1", FGR32, IIStore, mem, store>, LW_FM<0x39>;
 }
 
-let Predicates = [NotN64, HasMips64, HasStandardEncoding],
+let Predicates = [NotN64, HasMips64, HasStdEnc],
   DecoderNamespace = "Mips64" in {
-  def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
-  def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
+  def LDC164 : LW_FT<"ldc1", FGR64, IILoad, mem, load>, LW_FM<0x35>;
+  def SDC164 : SW_FT<"sdc1", FGR64, IIStore, mem, store>, LW_FM<0x3d>;
 }
 
-let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
-  def LDC1   : FPLoad<0x35, "ldc1", AFGR64, mem>;
-  def SDC1   : FPStore<0x3d, "sdc1", AFGR64, mem>;
+let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+  def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>;
+  def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>;
 }
 
 // Indexed loads and stores.
-let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
-  def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>;
-  def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>;
+let Predicates = [HasFPIdx, HasStdEnc] in {
+  def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>;
+  def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>;
 }
 
-let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
-  def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load>;
-  def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store>;
+let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in {
+  def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+  def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
 }
 
-let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in {
-  def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load>;
-  def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store>;
+let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in {
+  def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
+  def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
 }
 
 // n64
-let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
-  def LWXC1_P8   : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load>;
-  def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load>;
-  def SWXC1_P8   : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store>;
-  def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store>;
+let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in {
+  def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>;
+  def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>,
+                   LWXC1_FM<1>;
+  def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32, CPU64Regs, IIStore, store>,
+                 SWXC1_FM<8>;
+  def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64, CPU64Regs, IIStore, store>,
+                   SWXC1_FM<9>;
 }
 
 // Load/store doubleword indexed unaligned.
-let Predicates = [NotMips64, HasStandardEncoding] in {
-  def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
-  def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+let Predicates = [NotMips64, HasStdEnc] in {
+  def LUXC1 : LWXC1_FT<"luxc1", AFGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+  def SUXC1 : SWXC1_FT<"suxc1", AFGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
 }
 
-let Predicates = [HasMips64, HasStandardEncoding],
+let Predicates = [HasMips64, HasStdEnc],
   DecoderNamespace="Mips64" in {
-  def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
-  def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+  def LUXC164 : LWXC1_FT<"luxc1", FGR64, CPURegs, IILoad>, LWXC1_FM<0x5>;
+  def SUXC164 : SWXC1_FT<"suxc1", FGR64, CPURegs, IIStore>, SWXC1_FM<0xd>;
 }
 
 /// Floating-point Aritmetic
-defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
-defm FDIV : FFR2P_M<0x03, "div", fdiv>;
-defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
-defm FSUB : FFR2P_M<0x01, "sub", fsub>;
+def FADD_S : ADDS_FT<"add.s", FGR32, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>;
+defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>;
+def FDIV_S : ADDS_FT<"div.s", FGR32, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>;
+defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>;
+def FMUL_S : ADDS_FT<"mul.s", FGR32, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>;
+defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>;
+def FSUB_S : ADDS_FT<"sub.s", FGR32, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>;
+defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>;
 
-let Predicates = [HasMips32r2, HasStandardEncoding] in {
-  def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
-  def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
+let Predicates = [HasMips32r2, HasStdEnc] in {
+  def MADD_S : MADDS_FT<"madd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<4, 0>;
+  def MSUB_S : MADDS_FT<"msub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<5, 0>;
 }
 
-let Predicates = [HasMips32r2, NoNaNsFPMath, HasStandardEncoding] in {
-  def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
-  def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
+let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
+  def NMADD_S : NMADDS_FT<"nmadd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<6, 0>;
+  def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<7, 0>;
 }
 
-let Predicates = [HasMips32r2, NotFP64bit, HasStandardEncoding] in {
-  def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
-  def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
+let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
+  def MADD_D32 : MADDS_FT<"madd.d", AFGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+  def MSUB_D32 : MADDS_FT<"msub.d", AFGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
 }
 
-let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStandardEncoding] in {
-  def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
-  def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
+  def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64, IIFmulDouble, fadd>,
+                  MADDS_FM<6, 1>;
+  def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64, IIFmulDouble, fsub>,
+                  MADDS_FM<7, 1>;
 }
 
-let Predicates = [HasMips32r2, IsFP64bit, HasStandardEncoding], isCodeGenOnly=1 in {
-  def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
-  def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
+let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
+  def MADD_D64 : MADDS_FT<"madd.d", FGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>;
+  def MSUB_D64 : MADDS_FT<"msub.d", FGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>;
 }
 
-let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding],
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
     isCodeGenOnly=1 in {
-  def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
-  def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
+  def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64, IIFmulDouble, fadd>,
+                  MADDS_FM<6, 1>;
+  def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64, IIFmulDouble, fsub>,
+                  MADDS_FM<7, 1>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -362,19 +410,9 @@ let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding],
 def MIPS_BRANCH_F  : PatLeaf<(i32 0)>;
 def MIPS_BRANCH_T  : PatLeaf<(i32 1)>;
 
-/// Floating Point Branch of False/True (Likely)
-let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
-  class FBRANCH<bits<1> nd, bits<1> tf, PatLeaf op, string asmstr> :
-      FFI<0x11, (outs), (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"),
-        [(MipsFPBrcond op, bb:$dst)]> {
-  let Inst{20-18} = 0;
-  let Inst{17} = nd;
-  let Inst{16} = tf;
-}
-
 let DecoderMethod = "DecodeBC1" in {
-def BC1F  : FBRANCH<0, 0, MIPS_BRANCH_F,  "bc1f">;
-def BC1T  : FBRANCH<0, 1, MIPS_BRANCH_T,  "bc1t">;
+def BC1F : BC1F_FT<"bc1f", IIBranch, MIPS_BRANCH_F>, BC1F_FM<0, 0>;
+def BC1T : BC1F_FT<"bc1t", IIBranch, MIPS_BRANCH_T>, BC1F_FM<0, 1>;
 }
 //===----------------------------------------------------------------------===//
 // Floating Point Flag Conditions
@@ -398,33 +436,24 @@ def MIPS_FCOND_NGE  : PatLeaf<(i32 13)>;
 def MIPS_FCOND_LE   : PatLeaf<(i32 14)>;
 def MIPS_FCOND_NGT  : PatLeaf<(i32 15)>;
 
-class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
-  FCC<fmt, (outs), (ins RC:$fs, RC:$ft, condcode:$cc),
-      !strconcat("c.$cc.", typestr, "\t$fs, $ft"),
-      [(MipsFPCmp RC:$fs, RC:$ft, imm:$cc)]>;
-
 /// Floating Point Compare
-let Defs=[FCR31] in {
-  def FCMP_S32 : FCMP<0x10, FGR32, "s">;
-  def FCMP_D32 : FCMP<0x11, AFGR64, "d">,
-      Requires<[NotFP64bit, HasStandardEncoding]>;
-  def FCMP_D64 : FCMP<0x11, FGR64, "d">,
-      Requires<[IsFP64bit, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
-  }
-}
+def FCMP_S32 : CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>;
+def FCMP_D32 : CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+               Requires<[NotFP64bit, HasStdEnc]>;
+let DecoderNamespace = "Mips64" in
+def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
+               Requires<[IsFP64bit, HasStdEnc]>;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Pseudo-Instructions
 //===----------------------------------------------------------------------===//
-def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
-                           "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>;
 
 // This pseudo instr gets expanded into 2 mtc1 instrs after register
 // allocation.
 def BuildPairF64 :
   PseudoSE<(outs AFGR64:$dst),
-           (ins CPURegs:$lo, CPURegs:$hi), "",
+           (ins CPURegs:$lo, CPURegs:$hi),
            [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
 
 // This pseudo instr gets expanded into 2 mfc1 instrs after register
@@ -432,7 +461,7 @@ def BuildPairF64 :
 // if n is 0, lower part of src is extracted.
 // if n is 1, higher part of src is extracted.
 def ExtractElementF64 :
-  PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+  PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n),
            [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
 
 //===----------------------------------------------------------------------===//
@@ -444,7 +473,7 @@ def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
 def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
 def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
   def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
                 (CVT_D32_W (MTC1 CPURegs:$src))>;
   def : MipsPat<(i32 (fp_to_sint AFGR64:$src)),
@@ -453,7 +482,7 @@ let Predicates = [NotFP64bit, HasStandardEncoding] in {
   def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
 }
 
-let Predicates = [IsFP64bit, HasStandardEncoding] in {
+let Predicates = [IsFP64bit, HasStdEnc] in {
   def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
   def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
 
@@ -473,3 +502,28 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in {
   def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
   def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
 }
+
+// Patterns for loads/stores with a reg+imm operand.
+let AddedComplexity = 40 in {
+  let Predicates = [IsN64, HasStdEnc] in {
+    def : LoadRegImmPat<LWC1_P8, f32, load>;
+    def : StoreRegImmPat<SWC1_P8, f32>;
+    def : LoadRegImmPat<LDC164_P8, f64, load>;
+    def : StoreRegImmPat<SDC164_P8, f64>;
+  }
+
+  let Predicates = [NotN64, HasStdEnc] in {
+    def : LoadRegImmPat<LWC1, f32, load>;
+    def : StoreRegImmPat<SWC1, f32>;
+  }
+
+  let Predicates = [NotN64, HasMips64, HasStdEnc] in {
+    def : LoadRegImmPat<LDC164, f64, load>;
+    def : StoreRegImmPat<SDC164, f64>;
+  }
+
+  let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+    def : LoadRegImmPat<LDC1, f64, load>;
+    def : StoreRegImmPat<SDC1, f64>;
+  }
+}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1ecbdc2474b3..ee432c875355 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -76,20 +76,22 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
 class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
              InstrItinClass itin, Format f>:
   MipsInst<outs, ins, asmstr, pattern, itin, f> {
-  let Predicates = [HasStandardEncoding];
+  let Predicates = [HasStdEnc];
 }
 
 // Mips Pseudo Instructions Format
-class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
-  MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+class MipsPseudo<dag outs, dag ins, list<dag> pattern,
+                 InstrItinClass itin = IIPseudo> :
+  MipsInst<outs, ins, "", pattern, itin, Pseudo> {
   let isCodeGenOnly = 1;
   let isPseudo = 1;
 }
 
 // Mips32/64 Pseudo Instruction Format
-class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
-  MipsPseudo<outs, ins, asmstr, pattern> {
-  let Predicates = [HasStandardEncoding];
+class PseudoSE<dag outs, dag ins, list<dag> pattern,
+               InstrItinClass itin = IIPseudo>:
+  MipsPseudo<outs, ins, pattern, itin> {
+  let Predicates = [HasStdEnc];
 }
 
 // Pseudo-instructions for alternate assembly syntax (never used by codegen).
@@ -161,30 +163,28 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
 // Format J instruction class in Mips : <|opcode|address|>
 //===----------------------------------------------------------------------===//
 
-class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
+class FJ<bits<6> op>
 {
-  bits<26> addr;
+  bits<26> target;
 
-  let Opcode = op;
+  bits<32> Inst;
 
-  let Inst{25-0} = addr;
+  let Inst{31-26} = op;
+  let Inst{25-0}  = target;
 }
 
- //===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 // MFC instruction class in Mips : <|op|mf|rt|rd|0000000|sel|>
 //===----------------------------------------------------------------------===//
-class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
-  InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>
+class MFC3OP_FM<bits<6> op, bits<5> mfmt>
 {
-  bits<5> mfmt;
   bits<5> rt;
   bits<5> rd;
   bits<3> sel;
 
-  let Opcode = op;
-  let mfmt = _mfmt;
+  bits<32> Inst;
 
+  let Inst{31-26} = op;
   let Inst{25-21} = mfmt;
   let Inst{20-16} = rt;
   let Inst{15-11} = rd;
@@ -192,6 +192,270 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
   let Inst{2-0}   = sel;
 }
 
+class ADD_FM<bits<6> op, bits<6> funct> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class ADDI_FM<bits<6> op> {
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class SRA_FM<bits<6> funct, bit rotate> {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> shamt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-22} = 0;
+  let Inst{21}    = rotate;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = shamt;
+  let Inst{5-0}   = funct;
+}
+
+class SRLV_FM<bits<6> funct, bit rotate> {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-7}  = 0;
+  let Inst{6}     = rotate;
+  let Inst{5-0}   = funct;
+}
+
+class BEQ_FM<bits<6> op> {
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = offset;
+}
+
+class BGEZ_FM<bits<6> op, bits<5> funct> {
+  bits<5>  rs;
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = funct;
+  let Inst{15-0}  = offset;
+}
+
+class B_FM {
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 4;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = 0;
+  let Inst{15-0}  = offset;
+}
+
+class SLTI_FM<bits<6> op> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class MFLO_FM<bits<6> funct> {
+  bits<5> rd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class MTLO_FM<bits<6> funct> {
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class SEB_FM<bits<5> funct, bits<6> funct2> {
+  bits<5> rd;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1f;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = funct;
+  let Inst{5-0}   = funct2;
+}
+
+class CLO_FM<bits<6> funct> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1c;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+  let rt = rd;
+}
+
+class LUI_FM {
+  bits<5> rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0xf;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class JALR_FM {
+  bits<5> rd;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = 9;
+}
+
+class BAL_FM {
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 1;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = 0x11;
+  let Inst{15-0}  = offset;
+}
+
+class BGEZAL_FM<bits<5> funct> {
+  bits<5>  rs;
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 1;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = funct;
+  let Inst{15-0}  = offset;
+}
+
+class SYNC_FM {
+  bits<5> stype;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{10-6}  = stype;
+  let Inst{5-0}   = 0xf;
+}
+
+class MULT_FM<bits<6> op, bits<6> funct> {
+  bits<5>  rs;
+  bits<5>  rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class EXT_FM<bits<6> funct> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> pos;
+  bits<5> size;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1f;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = size;
+  let Inst{10-6}  = pos;
+  let Inst{5-0}   = funct;
+}
+
+class RDHWR_FM {
+  bits<5> rt;
+  bits<5> rd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x1f;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = 0x3b;
+}
+
 //===----------------------------------------------------------------------===//
 //
 //  FLOATING POINT INSTRUCTION FORMATS
@@ -206,31 +470,6 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>:
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
-//===----------------------------------------------------------------------===//
-
-class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
-          string asmstr, list<dag> pattern> :
-  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
-{
-  bits<5>  fd;
-  bits<5>  fs;
-  bits<5>  ft;
-  bits<5>  fmt;
-  bits<6>  funct;
-
-  let Opcode = op;
-  let funct  = _funct;
-  let fmt    = _fmt;
-
-  let Inst{25-21} = fmt;
-  let Inst{20-16} = ft;
-  let Inst{15-11} = fs;
-  let Inst{10-6}  = fd;
-  let Inst{5-0}   = funct;
-}
-
-//===----------------------------------------------------------------------===//
 // Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
 //===----------------------------------------------------------------------===//
 
@@ -248,130 +487,179 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
   let Inst{15-0}  = imm16;
 }
 
-//===----------------------------------------------------------------------===//
-// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
-//===----------------------------------------------------------------------===//
-
-class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
-  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
-  bits<5>  fs;
-  bits<5>  ft;
-  bits<4>  cc;
-  bits<5>  fmt;
+class ADDS_FM<bits<6> funct, bits<5> fmt> {
+  bits<5> fd;
+  bits<5> fs;
+  bits<5> ft;
 
-  let Opcode = 0x11;
-  let fmt    = _fmt;
+  bits<32> Inst;
 
+  let Inst{31-26} = 0x11;
   let Inst{25-21} = fmt;
   let Inst{20-16} = ft;
   let Inst{15-11} = fs;
-  let Inst{10-6}  = 0;
-  let Inst{5-4}   = 0b11;
-  let Inst{3-0}   = cc;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = funct;
 }
 
+class ABSS_FM<bits<6> funct, bits<5> fmt> {
+  bits<5> fd;
+  bits<5> fs;
 
-class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
-            list<dag> pattern> :
-  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
-  bits<5>  rd;
-  bits<5>  rs;
-  bits<3>  cc;
-  bits<1>  tf;
-
-  let Opcode = 0;
-  let tf = _tf;
+  bits<32> Inst;
 
-  let Inst{25-21} = rs;
-  let Inst{20-18} = cc;
-  let Inst{17} = 0;
-  let Inst{16} = tf;
-  let Inst{15-11} = rd;
-  let Inst{10-6}  = 0;
-  let Inst{5-0}   = 1;
+  let Inst{31-26} = 0x11;
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = funct;
 }
 
-class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
-             list<dag> pattern> :
-  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
-  bits<5>  fd;
-  bits<5>  fs;
-  bits<3>  cc;
-  bits<5>  fmt;
-  bits<1>  tf;
+class MFC1_FM<bits<5> funct> {
+  bits<5> rt;
+  bits<5> fs;
 
-  let Opcode = 17;
-  let fmt = _fmt;
-  let tf = _tf;
+  bits<32> Inst;
 
-  let Inst{25-21} = fmt;
-  let Inst{20-18} = cc;
-  let Inst{17} = 0;
-  let Inst{16} = tf;
+  let Inst{31-26} = 0x11;
+  let Inst{25-21} = funct;
+  let Inst{20-16} = rt;
   let Inst{15-11} = fs;
-  let Inst{10-6}  = fd;
-  let Inst{5-0}   = 17;
+  let Inst{10-0}  = 0;
 }
 
-// FP unary instructions without patterns.
-class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
-           RegisterClass DstRC, RegisterClass SrcRC> :
-  FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
-      !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> {
-  let ft = 0;
-}
+class LW_FM<bits<6> op> {
+  bits<5> rt;
+  bits<21> addr;
 
-// FP unary instructions with patterns.
-class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr,
-            RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> :
-  FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs),
-      !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"),
-      [(set DstRC:$fd, (OpNode SrcRC:$fs))]> {
-  let ft = 0;
-}
+  bits<32> Inst;
 
-class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
-            string fmtstr, RegisterClass RC, SDNode OpNode> :
-  FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
-      !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
-      [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
+  let Inst{31-26} = op;
+  let Inst{25-21} = addr{20-16};
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = addr{15-0};
+}
 
-// Floating point madd/msub/nmadd/nmsub.
-class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
-                list<dag> pattern>
-  : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+class MADDS_FM<bits<3> funct, bits<3> fmt> {
   bits<5> fd;
   bits<5> fr;
   bits<5> fs;
   bits<5> ft;
 
-  let Opcode = 0x13;
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x13;
   let Inst{25-21} = fr;
   let Inst{20-16} = ft;
   let Inst{15-11} = fs;
-  let Inst{10-6} = fd;
-  let Inst{5-3} = funct;
-  let Inst{2-0} = fmt;
+  let Inst{10-6}  = fd;
+  let Inst{5-3}   = funct;
+  let Inst{2-0}   = fmt;
 }
 
-// FP indexed load/store instructions.
-class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
-               list<dag> pattern> :
-  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
-{
-  bits<5>  base;
-  bits<5>  index;
-  bits<5>  fs;
-  bits<5>  fd;
+class LWXC1_FM<bits<6> funct> {
+  bits<5> fd;
+  bits<5> base;
+  bits<5> index;
 
-  let Opcode = 0x13;
+  bits<32> Inst;
 
+  let Inst{31-26} = 0x13;
+  let Inst{25-21} = base;
+  let Inst{20-16} = index;
+  let Inst{15-11} = 0;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = funct;
+}
+
+class SWXC1_FM<bits<6> funct> {
+  bits<5> fs;
+  bits<5> base;
+  bits<5> index;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x13;
   let Inst{25-21} = base;
   let Inst{20-16} = index;
   let Inst{15-11} = fs;
+  let Inst{10-6}  = 0;
+  let Inst{5-0}   = funct;
+}
+
+class BC1F_FM<bit nd, bit tf> {
+  bits<16> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x11;
+  let Inst{25-21} = 0x8;
+  let Inst{20-18} = 0; // cc
+  let Inst{17} = nd;
+  let Inst{16} = tf;
+  let Inst{15-0} = offset;
+}
+
+class CEQS_FM<bits<5> fmt> {
+  bits<5> fs;
+  bits<5> ft;
+  bits<4> cond;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x11;
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = ft;
+  let Inst{15-11} = fs;
+  let Inst{10-8} = 0; // cc
+  let Inst{7-4} = 0x3;
+  let Inst{3-0} = cond;
+}
+
+class CMov_I_F_FM<bits<6> funct, bits<5> fmt> {
+  bits<5> fd;
+  bits<5> fs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x11;
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = fs;
   let Inst{10-6} = fd;
   let Inst{5-0} = funct;
 }
+
+class CMov_F_I_FM<bit tf> {
+  bits<5> rd;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-18} = 0; // cc
+  let Inst{17} = 0;
+  let Inst{16} = tf;
+  let Inst{15-11} = rd;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = 1;
+}
+
+class CMov_F_F_FM<bits<5> fmt, bit tf> {
+  bits<5> fd;
+  bits<5> fs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x11;
+  let Inst{25-21} = fmt;
+  let Inst{20-18} = 0; // cc
+  let Inst{17} = 0;
+  let Inst{16} = tf;
+  let Inst{15-11} = fs;
+  let Inst{10-6} = fd;
+  let Inst{5-0} = 0x11;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index ca80d43f36f1..ad92d41209e9 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -11,16 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "MipsGenInstrInfo.inc"
@@ -93,81 +93,11 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                   MachineBasicBlock *&TBB,
                                   MachineBasicBlock *&FBB,
                                   SmallVectorImpl<MachineOperand> &Cond,
-                                  bool AllowModify) const
-{
-
-  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
-
-  // Skip all the debug instructions.
-  while (I != REnd && I->isDebugValue())
-    ++I;
-
-  if (I == REnd || !isUnpredicatedTerminator(&*I)) {
-    // If this block ends with no branches (it just falls through to its succ)
-    // just return false, leaving TBB/FBB null.
-    TBB = FBB = NULL;
-    return false;
-  }
-
-  MachineInstr *LastInst = &*I;
-  unsigned LastOpc = LastInst->getOpcode();
-
-  // Not an analyzable branch (must be an indirect jump).
-  if (!GetAnalyzableBrOpc(LastOpc))
-    return true;
-
-  // Get the second to last instruction in the block.
-  unsigned SecondLastOpc = 0;
-  MachineInstr *SecondLastInst = NULL;
-
-  if (++I != REnd) {
-    SecondLastInst = &*I;
-    SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
-
-    // Not an analyzable branch (must be an indirect jump).
-    if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
-      return true;
-  }
-
-  // If there is only one terminator instruction, process it.
-  if (!SecondLastOpc) {
-    // Unconditional branch
-    if (LastOpc == UncondBrOpc) {
-      TBB = LastInst->getOperand(0).getMBB();
-      return false;
-    }
-
-    // Conditional branch
-    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
-    return false;
-  }
-
-  // If we reached here, there are two branches.
-  // If there are three terminators, we don't know what sort of block this is.
-  if (++I != REnd && isUnpredicatedTerminator(&*I))
-    return true;
-
-  // If second to last instruction is an unconditional branch,
-  // analyze it and remove the last instruction.
-  if (SecondLastOpc == UncondBrOpc) {
-    // Return if the last instruction cannot be removed.
-    if (!AllowModify)
-      return true;
-
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    LastInst->eraseFromParent();
-    return false;
-  }
-
-  // Conditional branch followed by an unconditional branch.
-  // The last one must be unconditional.
-  if (LastOpc != UncondBrOpc)
-    return true;
-
-  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
-  FBB = LastInst->getOperand(0).getMBB();
+                                  bool AllowModify) const {
+  SmallVector<MachineInstr*, 2> BranchInstrs;
+  BranchType BT = AnalyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs);
 
-  return false;
+  return (BT == BT_None) || (BT == BT_Indirect);
 }
 
 void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
@@ -256,6 +186,90 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
   return false;
 }
 
+MipsInstrInfo::BranchType MipsInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+              MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond,
+              bool AllowModify,
+              SmallVectorImpl<MachineInstr*> &BranchInstrs) const {
+
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+  // Skip all the debug instructions.
+  while (I != REnd && I->isDebugValue())
+    ++I;
+
+  if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+    // This block ends with no branches (it just falls through to its succ).
+    // Leave TBB/FBB null.
+    TBB = FBB = NULL;
+    return BT_NoBranch;
+  }
+
+  MachineInstr *LastInst = &*I;
+  unsigned LastOpc = LastInst->getOpcode();
+  BranchInstrs.push_back(LastInst);
+
+  // Not an analyzable branch (e.g., indirect jump).
+  if (!GetAnalyzableBrOpc(LastOpc))
+    return LastInst->isIndirectBranch() ? BT_Indirect : BT_None;
+
+  // Get the second to last instruction in the block.
+  unsigned SecondLastOpc = 0;
+  MachineInstr *SecondLastInst = NULL;
+
+  if (++I != REnd) {
+    SecondLastInst = &*I;
+    SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+    // Not an analyzable branch (must be an indirect jump).
+    if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
+      return BT_None;
+  }
+
+  // If there is only one terminator instruction, process it.
+  if (!SecondLastOpc) {
+    // Unconditional branch
+    if (LastOpc == UncondBrOpc) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return BT_Uncond;
+    }
+
+    // Conditional branch
+    AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+    return BT_Cond;
+  }
+
+  // If we reached here, there are two branches.
+  // If there are three terminators, we don't know what sort of block this is.
+  if (++I != REnd && isUnpredicatedTerminator(&*I))
+    return BT_None;
+
+  BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst);
+
+  // If second to last instruction is an unconditional branch,
+  // analyze it and remove the last instruction.
+  if (SecondLastOpc == UncondBrOpc) {
+    // Return if the last instruction cannot be removed.
+    if (!AllowModify)
+      return BT_None;
+
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    LastInst->eraseFromParent();
+    BranchInstrs.pop_back();
+    return BT_Uncond;
+  }
+
+  // Conditional branch followed by an unconditional branch.
+  // The last one must be unconditional.
+  if (LastOpc != UncondBrOpc)
+    return BT_None;
+
+  AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+  FBB = LastInst->getOperand(0).getMBB();
+
+  return BT_CondUncond;
+}
+
 /// Return the number of bytes of code the specified instruction may be.
 unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   switch (MI->getOpcode()) {
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index aca2bc7ae98d..8c05d97beac2 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -31,6 +31,15 @@ protected:
   unsigned UncondBrOpc;
 
 public:
+  enum BranchType {
+    BT_None,       // Couldn't analyze branch.
+    BT_NoBranch,   // No branches found.
+    BT_Uncond,     // One unconditional branch.
+    BT_Cond,       // One conditional branch.
+    BT_CondUncond, // A conditional branch followed by an unconditional branch.
+    BT_Indirect    // One indirct branch.
+  };
+
   explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
 
   static const MipsInstrInfo *create(MipsTargetMachine &TM);
@@ -51,6 +60,12 @@ public:
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
+  BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                           MachineBasicBlock *&FBB,
+                           SmallVectorImpl<MachineOperand> &Cond,
+                           bool AllowModify,
+                           SmallVectorImpl<MachineInstr*> &BranchInstrs) const;
+
   virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
                                                  int FrameIx, uint64_t Offset,
                                                  const MDNode *MDPtr,
@@ -71,6 +86,36 @@ public:
   /// Return the number of bytes of code the specified instruction may be.
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
 
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
+    storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
+  }
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+    loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
+  }
+
+  virtual void storeRegToStack(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI,
+                               unsigned SrcReg, bool isKill, int FrameIndex,
+                               const TargetRegisterClass *RC,
+                               const TargetRegisterInfo *TRI,
+                               int64_t Offset) const = 0;
+
+  virtual void loadRegFromStack(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                unsigned DestReg, int FrameIndex,
+                                const TargetRegisterClass *RC,
+                                const TargetRegisterInfo *TRI,
+                                int64_t Offset) const = 0;
+
 protected:
   bool isZeroImm(const MachineOperand &op) const;
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index aa8881997285..3a82e8171301 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -23,18 +23,19 @@ def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
                                                 SDTCisInt<4>]>;
 def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
 def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_MipsMAddMSub     : SDTypeProfile<0, 4,
-                                         [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
-                                          SDTCisSameAs<1, 2>,
-                                          SDTCisSameAs<2, 3>]>;
-def SDT_MipsDivRem       : SDTypeProfile<0, 2,
-                                         [SDTCisInt<0>,
-                                          SDTCisSameAs<0, 1>]>;
+def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
+                                           SDTCisVT<2, i32>]>;
+def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+                                          SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
+                                    SDTCisSameAs<1, 2>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
+                                     [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
+                                      SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>;
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
-def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>,
-                                               SDTCisSameAs<0, 1>]>;
 def SDT_Sync             : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 
 def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
@@ -74,7 +75,8 @@ def MipsTprelLo    : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
 def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
 
 // Return
-def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
+                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
@@ -83,20 +85,27 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
                            [SDNPHasChain, SDNPSideEffect,
                             SDNPOptInGlue, SDNPOutGlue]>;
 
+// Node used to extract integer from LO/HI register.
+def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+
+// Node used to insert 32-bit integers to LOHI register pair.
+def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+
+// Mult nodes.
+def MipsMult  : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
+def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>;
+
 // MAdd*/MSub* nodes
-def MipsMAdd      : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMAddu     : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSub      : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
-def MipsMSubu     : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
-                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAdd  : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>;
+def MipsMSub  : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>;
 
 // DivRem(u) nodes
-def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
-                           [SDNPOutGlue]>;
-def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>;
+def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>;
+def MipsDivRem16  : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>;
+def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
                            [SDNPOutGlue]>;
 
 // Target constant nodes that are not part of any isel patterns and remain
@@ -111,10 +120,6 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 
 def MipsWrapper    : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
 
-// Pointer to dynamically allocated stack area.
-def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
-                           [SDNPHasChain, SDNPInGlue]>;
-
 def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
 
 def MipsExt :  SDNode<"MipsISD::Ext", SDT_Ext>;
@@ -148,14 +153,14 @@ def HasSwap     :     Predicate<"Subtarget.hasSwap()">,
                       AssemblerPredicate<"FeatureSwap">;
 def HasCondMov  :     Predicate<"Subtarget.hasCondMov()">,
                       AssemblerPredicate<"FeatureCondMov">;
+def HasFPIdx    :     Predicate<"Subtarget.hasFPIdx()">,
+                      AssemblerPredicate<"FeatureFPIdx">;
 def HasMips32    :    Predicate<"Subtarget.hasMips32()">,
                       AssemblerPredicate<"FeatureMips32">;
 def HasMips32r2  :    Predicate<"Subtarget.hasMips32r2()">,
                       AssemblerPredicate<"FeatureMips32r2">;
 def HasMips64    :    Predicate<"Subtarget.hasMips64()">,
                       AssemblerPredicate<"FeatureMips64">;
-def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">,
-                      AssemblerPredicate<"FeatureMips32r2,FeatureMips64">;
 def NotMips64    :    Predicate<"!Subtarget.hasMips64()">,
                       AssemblerPredicate<"!FeatureMips64">;
 def HasMips64r2  :    Predicate<"Subtarget.hasMips64r2()">,
@@ -172,11 +177,15 @@ def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
                       AssemblerPredicate<"FeatureMips32">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">,
                       AssemblerPredicate<"FeatureMips32">;
-def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">,
-                          AssemblerPredicate<"!FeatureMips16">;
+def HasStdEnc :       Predicate<"Subtarget.hasStandardEncoding()">,
+                      AssemblerPredicate<"!FeatureMips16">;
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
-  let Predicates = [HasStandardEncoding];
+  let Predicates = [HasStdEnc];
+}
+
+class IsCommutable {
+  bit isCommutable = 1;
 }
 
 class IsBranch {
@@ -234,6 +243,10 @@ def calltarget64: Operand<i64>;
 def simm16      : Operand<i32> {
   let DecoderMethod= "DecodeSimm16";
 }
+
+def simm20      : Operand<i32> {
+}
+
 def simm16_64   : Operand<i64>;
 def shamt       : Operand<i32>;
 
@@ -253,6 +266,7 @@ def mem : Operand<i32> {
   let MIOperandInfo = (ops CPURegs, simm16);
   let EncoderMethod = "getMemEncoding";
   let ParserMatchClass = MipsMemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 def mem64 : Operand<i64> {
@@ -260,18 +274,21 @@ def mem64 : Operand<i64> {
   let MIOperandInfo = (ops CPU64Regs, simm16_64);
   let EncoderMethod = "getMemEncoding";
   let ParserMatchClass = MipsMemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 def mem_ea : Operand<i32> {
   let PrintMethod = "printMemOperandEA";
   let MIOperandInfo = (ops CPURegs, simm16);
   let EncoderMethod = "getMemEncoding";
+  let OperandType = "OPERAND_MEMORY";
 }
 
 def mem_ea_64 : Operand<i64> {
   let PrintMethod = "printMemOperandEA";
   let MIOperandInfo = (ops CPU64Regs, simm16_64);
   let EncoderMethod = "getMemEncoding";
+  let OperandType = "OPERAND_MEMORY";
 }
 
 // size operand of ext instruction
@@ -296,10 +313,21 @@ def HI16 : SDNodeXForm<imm, [{
   return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
 }]>;
 
+// Plus 1.
+def Plus1 : SDNodeXForm<imm, [{ return getImm(N, N->getSExtValue() + 1); }]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt8  : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+
 // Node immediate fits as 16-bit sign extended on target immediate.
 // e.g. addi, andi
 def immSExt16  : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
 
+// Node immediate fits as 15-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt15  : PatLeaf<(imm), [{ return isInt<15>(N->getSExtValue()); }]>;
+
 // Node immediate fits as 16-bit zero extended on target immediate.
 // The LO16 param means that only the lower 16 bits of the node
 // immediate are caught.
@@ -320,113 +348,84 @@ def immLow16Zero : PatLeaf<(imm), [{
 // shamt field must fit in 5 bits.
 def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
 
+// True if (N + 1) fits in 16-bit field.
+def immSExt16Plus1 : PatLeaf<(imm), [{
+  return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1);
+}]>;
+
 // Mips Address Mode! SDNode frameindex could possibily be a match
 // since load and store instructions from stack used it.
 def addr :
-  ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
+  ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>;
+
+def addrRegImm :
+  ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
+
+def addrDefault :
+  ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
 
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
 
-/// Move Control Registers From/To CPU Registers
-def MFC0_3OP  : MFC3OP<0x10, 0, (outs CPURegs:$rt),
-                       (ins CPURegs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">;
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-
-def MTC0_3OP  : MFC3OP<0x10, 4, (outs CPURegs:$rd, uimm16:$sel),
-                       (ins CPURegs:$rt),"mtc0\t$rt, $rd, $sel">;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-
-def MFC2_3OP  : MFC3OP<0x12, 0, (outs CPURegs:$rt),
-                       (ins CPURegs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-
-def MTC2_3OP  : MFC3OP<0x12, 4, (outs CPURegs:$rd, uimm16:$sel),
-                       (ins CPURegs:$rt),"mtc2\t$rt, $rd, $sel">;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-
 // Arithmetic and logical instructions with 3 register operands.
-class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
-                  InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
-  FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rs, $rt"),
-     [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> {
-  let shamt = 0;
+class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
+                  InstrItinClass Itin = NoItinerary,
+                  SDPatternOperator OpNode = null_frag>:
+  InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
+         !strconcat(opstr, "\t$rd, $rs, $rt"),
+         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
   let isCommutable = isComm;
   let isReMaterializable = 1;
-}
-
-class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm,
-                    InstrItinClass itin, RegisterClass RC, bit isComm = 0>:
-  FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> {
-  let shamt = 0;
-  let isCommutable = isComm;
+  string BaseOpcode;
+  string Arch;
 }
 
 // Arithmetic and logical instructions with 2 register operands.
-class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode,
-                  Operand Od, PatLeaf imm_type, RegisterClass RC> :
-  FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16),
-     !strconcat(instr_asm, "\t$rt, $rs, $imm16"),
-     [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu> {
+class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
+                  SDPatternOperator imm_type = null_frag,
+                  SDPatternOperator OpNode = null_frag> :
+  InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
+         !strconcat(opstr, "\t$rt, $rs, $imm16"),
+         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
   let isReMaterializable = 1;
 }
 
-class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
-                     Operand Od, PatLeaf imm_type, RegisterClass RC> :
-  FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16),
-     !strconcat(instr_asm, "\t$rt, $rs, $imm16"), [], IIAlu>;
-
 // Arithmetic Multiply ADD/SUB
-let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
-class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> :
-  FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
-     !strconcat(instr_asm, "\t$rs, $rt"),
-     [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> {
-  let rd = 0;
-  let shamt = 0;
+class MArithR<string opstr, bit isComm = 0> :
+  InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
+         !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> {
+  let Defs = [HI, LO];
+  let Uses = [HI, LO];
   let isCommutable = isComm;
 }
 
 //  Logical
-class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
-  FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rs, $rt"),
-     [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> {
-  let shamt = 0;
+class LogicNOR<string opstr, RegisterOperand RC>:
+  InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
+         !strconcat(opstr, "\t$rd, $rs, $rt"),
+         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
   let isCommutable = 1;
 }
 
 // Shifts
-class shift_rotate_imm<bits<6> func, bits<5> isRotate, string instr_asm,
-                       SDNode OpNode, PatFrag PF, Operand ImmOpnd,
-                       RegisterClass RC>:
-  FR<0x00, func, (outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
-     !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
-     [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu> {
-  let rs = isRotate;
-}
-
-// 32-bit shift instructions.
-class shift_rotate_imm32<bits<6> func, bits<5> isRotate, string instr_asm,
-                         SDNode OpNode>:
-  shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, CPURegs>;
-
-class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
-                       SDNode OpNode, RegisterClass RC>:
-  FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rt, $rs"),
-     [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> {
-  let shamt = isRotate;
-}
+class shift_rotate_imm<string opstr, Operand ImmOpnd,
+                       RegisterOperand RC, SDPatternOperator OpNode = null_frag,
+                       SDPatternOperator PF = null_frag> :
+  InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
+         !strconcat(opstr, "\t$rd, $rt, $shamt"),
+         [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+
+class shift_rotate_reg<string opstr, RegisterOperand RC,
+                       SDPatternOperator OpNode = null_frag>:
+  InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
+         !strconcat(opstr, "\t$rd, $rt, $rs"),
+         [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
 
 // Load Upper Imediate
-class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>:
-  FI<op, (outs RC:$rt), (ins Imm:$imm16),
-     !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu>, IsAsCheapAsAMove {
-  let rs = 0;
+class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
+  InstSE<(outs RC:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
+         [], IIAlu, FrmI>, IsAsCheapAsAMove {
   let neverHasSideEffects = 1;
   let isReMaterializable = 1;
 }
@@ -440,66 +439,40 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 }
 
 // Memory Load/Store
-let canFoldAsLoad = 1 in
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
-            Operand MemOpnd, bit Pseudo>:
-  FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
-     !strconcat(instr_asm, "\t$rt, $addr"),
-     [(set RC:$rt, (OpNode addr:$addr))], IILoad> {
-  let isPseudo = Pseudo;
-}
-
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
-             Operand MemOpnd, bit Pseudo>:
-  FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
-     !strconcat(instr_asm, "\t$rt, $addr"),
-     [(OpNode RC:$rt, addr:$addr)], IIStore> {
-  let isPseudo = Pseudo;
-}
-
-// 32-bit load.
-multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
-                   bit Pseudo = 0> {
-  def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
-    let isCodeGenOnly = 1;
-  }
+class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+           Operand MemOpnd, ComplexPattern Addr> :
+  InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMem";
+  let canFoldAsLoad = 1;
+  let mayLoad = 1;
 }
 
-// 64-bit load.
-multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
-                   bit Pseudo = 0> {
-  def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
-    let isCodeGenOnly = 1;
-  }
+class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
+            Operand MemOpnd, ComplexPattern Addr> :
+  InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMem";
+  let mayStore = 1;
 }
 
-// 32-bit store.
-multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
-                    bit Pseudo = 0> {
-  def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
+multiclass LoadM<string opstr, RegisterClass RC,
+                 SDPatternOperator OpNode = null_frag,
+                 ComplexPattern Addr = addr> {
+  def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Load<opstr, OpNode, RC, mem64, Addr>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
 }
 
-// 64-bit store.
-multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
-                    bit Pseudo = 0> {
-  def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
+multiclass StoreM<string opstr, RegisterClass RC,
+                  SDPatternOperator OpNode = null_frag,
+                  ComplexPattern Addr = addr> {
+  def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Store<opstr, OpNode, RC, mem64, Addr>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -507,81 +480,58 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
 
 // Load/Store Left/Right
 let canFoldAsLoad = 1 in
-class LoadLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
-                    RegisterClass RC, Operand MemOpnd> :
-  FMem<op, (outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
-       !strconcat(instr_asm, "\t$rt, $addr"),
-       [(set RC:$rt, (OpNode addr:$addr, RC:$src))], IILoad> {
+class LoadLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+                    Operand MemOpnd> :
+  InstSE<(outs RC:$rt), (ins MemOpnd:$addr, RC:$src),
+         !strconcat(opstr, "\t$rt, $addr"),
+         [(set RC:$rt, (OpNode addr:$addr, RC:$src))], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMem";
   string Constraints = "$src = $rt";
 }
 
-class StoreLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
-                     RegisterClass RC, Operand MemOpnd>:
-  FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
-       !strconcat(instr_asm, "\t$rt, $addr"), [(OpNode RC:$rt, addr:$addr)],
-       IIStore>;
-
-// 32-bit load left/right.
-multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
-  def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
-    let isCodeGenOnly = 1;
-  }
-}
-
-// 64-bit load left/right.
-multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
-  def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
-    let DecoderNamespace = "Mips64";
-    let isCodeGenOnly = 1;
-  }
+class StoreLeftRight<string opstr, SDNode OpNode, RegisterClass RC,
+                     Operand MemOpnd>:
+  InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMem";
 }
 
-// 32-bit store left/right.
-multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
-  def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
+multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+  def NAME : LoadLeftRight<opstr, OpNode, RC, mem>,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : LoadLeftRight<opstr, OpNode, RC, mem64>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
 }
 
-// 64-bit store left/right.
-multiclass StoreLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
-  def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
+multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> {
+  def NAME : StoreLeftRight<opstr, OpNode, RC, mem>,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : StoreLeftRight<opstr, OpNode, RC, mem64>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
 }
 
 // Conditional Branch
-class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
-  BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
-             !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
-             [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+class CBranch<string opstr, PatFrag cond_op, RegisterClass RC> :
+  InstSE<(outs), (ins RC:$rs, RC:$rt, brtarget:$offset),
+         !strconcat(opstr, "\t$rs, $rt, $offset"),
+         [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch,
+         FrmI> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 1;
   let Defs = [AT];
 }
 
-class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
-                  RegisterClass RC>:
-  BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
-             !strconcat(instr_asm, "\t$rs, $imm16"),
-             [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
-  let rt = _rt;
+class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
+  InstSE<(outs), (ins RC:$rs, brtarget:$offset),
+         !strconcat(opstr, "\t$rs, $offset"),
+         [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch, FrmI> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 1;
@@ -589,27 +539,23 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
 }
 
 // SetCC
-class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op,
-              RegisterClass RC>:
-  FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rs, $rt"),
-     [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))],
-     IIAlu> {
-  let shamt = 0;
-}
+class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
+  InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
+         !strconcat(opstr, "\t$rd, $rs, $rt"),
+         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
 
-class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
-              PatLeaf imm_type, RegisterClass RC>:
-  FI<op, (outs CPURegs:$rt), (ins RC:$rs, Od:$imm16),
-     !strconcat(instr_asm, "\t$rt, $rs, $imm16"),
-     [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
-     IIAlu>;
+class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
+              RegisterClass RC>:
+  InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
+         !strconcat(opstr, "\t$rt, $rs, $imm16"),
+         [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
+         IIAlu, FrmI>;
 
 // Jump
-class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
-             SDPatternOperator operator, SDPatternOperator targetoperator>:
-  FJ<op, (outs), (ins opnd:$target), !strconcat(instr_asm, "\t$target"),
-     [(operator targetoperator:$target)], IIBranch> {
+class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
+             SDPatternOperator targetoperator> :
+  InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+         [(operator targetoperator:$target)], IIBranch, FrmJ> {
   let isTerminator=1;
   let isBarrier=1;
   let hasDelaySlot = 1;
@@ -618,27 +564,21 @@ class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
 }
 
 // Unconditional branch
-class UncondBranch<bits<6> op, string instr_asm>:
-  BranchBase<op, (outs), (ins brtarget:$imm16),
-             !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
-  let rs = 0;
-  let rt = 0;
+class UncondBranch<string opstr> :
+  InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
+         [(br bb:$offset)], IIBranch, FrmI> {
   let isBranch = 1;
   let isTerminator = 1;
   let isBarrier = 1;
   let hasDelaySlot = 1;
-  let Predicates = [RelocPIC, HasStandardEncoding];
+  let Predicates = [RelocPIC, HasStdEnc];
   let Defs = [AT];
 }
 
 // Base class for indirect branch and return instruction classes.
 let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
 class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>:
-  FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch> {
-  let rt = 0;
-  let rd = 0;
-  let shamt = 0;
-}
+  InstSE<(outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch, FrmR>;
 
 // Indirect branch
 class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> {
@@ -656,465 +596,523 @@ class RetBase<RegisterClass RC>: JumpFR<RC> {
 
 // Jump and Link (Call)
 let isCall=1, hasDelaySlot=1, Defs = [RA] in {
-  class JumpLink<bits<6> op, string instr_asm>:
-    FJ<op, (outs), (ins calltarget:$target),
-       !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
-       IIBranch> {
-       let DecoderMethod = "DecodeJumpTarget";
-       }
-
-  class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm,
-                    RegisterClass RC>:
-    FR<op, func, (outs), (ins RC:$rs),
-       !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> {
-    let rt = 0;
-    let rd = 31;
-    let shamt = 0;
+  class JumpLink<string opstr> :
+    InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"),
+           [(MipsJmpLink imm:$target)], IIBranch, FrmJ> {
+    let DecoderMethod = "DecodeJumpTarget";
   }
 
-  class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>:
-    FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16),
-       !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> {
-    let rt = _rt;
-  }
+  class JumpLinkRegPseudo<RegisterClass RC, Instruction JALRInst,
+                          Register RetReg>:
+    PseudoSE<(outs), (ins RC:$rs), [(MipsJmpLink RC:$rs)], IIBranch>,
+    PseudoInstExpansion<(JALRInst RetReg, RC:$rs)>;
+
+  class JumpLinkReg<string opstr, RegisterClass RC>:
+    InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"),
+           [], IIBranch, FrmR>;
+
+  class BGEZAL_FT<string opstr, RegisterOperand RO> :
+    InstSE<(outs), (ins RO:$rs, brtarget:$offset),
+           !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
+
+}
+
+class BAL_FT :
+  InstSE<(outs), (ins brtarget:$offset), "bal\t$offset", [], IIBranch, FrmI> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let hasDelaySlot = 1;
+  let Defs = [RA];
 }
 
+// Sync
+let hasSideEffects = 1 in
+class SYNC_FT :
+  InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)],
+         NoItinerary, FrmOther>;
+
 // Mul, Div
-class Mult<bits<6> func, string instr_asm, InstrItinClass itin,
-           RegisterClass RC, list<Register> DefRegs>:
-  FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
-  let rd = 0;
-  let shamt = 0;
+class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
+           list<Register> DefRegs> :
+  InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
+         itin, FrmR> {
   let isCommutable = 1;
   let Defs = DefRegs;
   let neverHasSideEffects = 1;
 }
 
-class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>:
-  Mult<func, instr_asm, itin, CPURegs, [HI, LO]>;
-
-class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin,
-          RegisterClass RC, list<Register> DefRegs>:
-  FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
-     !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
-     [(op RC:$rs, RC:$rt)], itin> {
-  let rd = 0;
-  let shamt = 0;
+// Pseudo multiply/divide instruction with explicit accumulator register
+// operands.
+class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
+                    SDPatternOperator OpNode, InstrItinClass Itin,
+                    bit IsComm = 1, bit HasSideEffects = 0> :
+  PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt),
+           [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>,
+  PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> {
+  let isCommutable = IsComm;
+  let hasSideEffects = HasSideEffects;
+}
+
+// Pseudo multiply add/sub instruction with explicit accumulator register
+// operands.
+class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
+  : PseudoSE<(outs ACRegs:$ac),
+             (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin),
+             [(set ACRegs:$ac,
+              (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))],
+             IIImul>,
+    PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> {
+  string Constraints = "$acin = $ac";
+}
+
+class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
+          list<Register> DefRegs> :
+  InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
+         [], itin, FrmR> {
   let Defs = DefRegs;
 }
 
-class Div32<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
-  Div<op, func, instr_asm, itin, CPURegs, [HI, LO]>;
-
 // Move from Hi/Lo
-class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC,
-                   list<Register> UseRegs>:
-  FR<0x00, func, (outs RC:$rd), (ins),
-     !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
-  let rs = 0;
-  let rt = 0;
-  let shamt = 0;
+class MoveFromLOHI<string opstr, RegisterClass RC, list<Register> UseRegs>:
+  InstSE<(outs RC:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> {
   let Uses = UseRegs;
   let neverHasSideEffects = 1;
 }
 
-class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
-                 list<Register> DefRegs>:
-  FR<0x00, func, (outs), (ins RC:$rs),
-     !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
-  let rt = 0;
-  let rd = 0;
-  let shamt = 0;
+class MoveToLOHI<string opstr, RegisterClass RC, list<Register> DefRegs>:
+  InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> {
   let Defs = DefRegs;
   let neverHasSideEffects = 1;
 }
 
-class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
-  FMem<opc, (outs RC:$rt), (ins Mem:$addr),
-     instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
- let isCodeGenOnly = 1;
+class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> :
+  InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(set RC:$rt, addr:$addr)], NoItinerary, FrmI> {
+  let isCodeGenOnly = 1;
+  let DecoderMethod = "DecodeMem";
 }
 
 // Count Leading Ones/Zeros in Word
-class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
-  FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
-     !strconcat(instr_asm, "\t$rd, $rs"),
-     [(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
-     Requires<[HasBitCount, HasStandardEncoding]> {
-  let shamt = 0;
-  let rt = rd;
-}
+class CountLeading0<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+         [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>,
+  Requires<[HasBitCount, HasStdEnc]>;
+
+class CountLeading1<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+         [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>,
+  Requires<[HasBitCount, HasStdEnc]>;
 
-class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
-  FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
-     !strconcat(instr_asm, "\t$rd, $rs"),
-     [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
-     Requires<[HasBitCount, HasStandardEncoding]> {
-  let shamt = 0;
-  let rt = rd;
-}
 
 // Sign Extend in Register.
-class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
-                   RegisterClass RC>:
-  FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rt"),
-     [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
-  let rs = 0;
-  let shamt = sa;
-  let Predicates = [HasSEInReg, HasStandardEncoding];
+class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> :
+  InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"),
+         [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary, FrmR> {
+  let Predicates = [HasSEInReg, HasStdEnc];
 }
 
 // Subword Swap
-class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
-  FR<0x1f, func, (outs RC:$rd), (ins RC:$rt),
-     !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
-  let rs = 0;
-  let shamt = sa;
-  let Predicates = [HasSwap, HasStandardEncoding];
+class SubwordSwap<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
+         NoItinerary, FrmR> {
+  let Predicates = [HasSwap, HasStdEnc];
   let neverHasSideEffects = 1;
 }
 
 // Read Hardware
-class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
-  : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
-       "rdhwr\t$rt, $rd", [], IIAlu> {
-  let rs = 0;
-  let shamt = 0;
-}
+class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> :
+  InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
+         IIAlu, FrmR>;
 
 // Ext and Ins
-class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
-  FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
-     !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
-     [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
-  bits<5> pos;
-  bits<5> sz;
-  let rd = sz;
-  let shamt = pos;
-  let Predicates = [HasMips32r2, HasStandardEncoding];
-}
-
-class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
-  FR<0x1f, _funct, (outs RC:$rt),
-     (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
-     !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
-     [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
-     NoItinerary> {
-  bits<5> pos;
-  bits<5> sz;
-  let rd = sz;
-  let shamt = pos;
-  let Predicates = [HasMips32r2, HasStandardEncoding];
+class ExtBase<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
+         !strconcat(opstr, " $rt, $rs, $pos, $size"),
+         [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+         FrmR> {
+  let Predicates = [HasMips32r2, HasStdEnc];
+}
+
+class InsBase<string opstr, RegisterOperand RO>:
+  InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
+         !strconcat(opstr, " $rt, $rs, $pos, $size"),
+         [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
+         NoItinerary, FrmR> {
+  let Predicates = [HasMips32r2, HasStdEnc];
   let Constraints = "$src = $rt";
 }
 
 // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
-                 RegisterClass PRC> :
+class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
   PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
-           !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
            [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
 
-multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
-  def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
-                          Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>,
-                          Requires<[IsN64, HasStandardEncoding]> {
+multiclass Atomic2Ops32<PatFrag Op> {
+  def NAME : Atomic2Ops<Op, CPURegs, CPURegs>, Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Atomic2Ops<Op, CPURegs, CPU64Regs>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
 
 // Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
-                    RegisterClass PRC> :
+class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
   PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
-           !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
            [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
 
-multiclass AtomicCmpSwap32<PatFrag Op, string Width>  {
-  def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
-                             Requires<[NotN64, HasStandardEncoding]>;
-  def _P8    : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>,
-                             Requires<[IsN64, HasStandardEncoding]> {
+multiclass AtomicCmpSwap32<PatFrag Op>  {
+  def NAME : AtomicCmpSwap<Op, CPURegs, CPURegs>,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : AtomicCmpSwap<Op, CPURegs, CPU64Regs>,
+             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
 
-class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
-  FMem<Opc, (outs RC:$rt), (ins Mem:$addr),
-       !strconcat(opstring, "\t$rt, $addr"), [], IILoad> {
+class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
+  InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMem";
   let mayLoad = 1;
 }
 
-class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
-  FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr),
-       !strconcat(opstring, "\t$rt, $addr"), [], IIStore> {
+class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
+  InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
+         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMem";
   let mayStore = 1;
   let Constraints = "$rt = $dst";
 }
 
+class MFC3OP<dag outs, dag ins, string asmstr> :
+  InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>;
+
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
 //===----------------------------------------------------------------------===//
 
 // Return RA.
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
+def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
 
 let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
-                                  "!ADJCALLSTACKDOWN $amt",
                                   [(callseq_start timm:$amt)]>;
 def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                                  "!ADJCALLSTACKUP $amt1",
                                   [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-// When handling PIC code the assembler needs .cpload and .cprestore
-// directives. If the real instructions corresponding these directives
-// are used, we have the same behavior, but get also a bunch of warnings
-// from the assembler.
-let neverHasSideEffects = 1 in
-def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
-                         ".cprestore\t$loc", []>;
-
 let usesCustomInserter = 1 in {
-  defm ATOMIC_LOAD_ADD_I8   : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
-  defm ATOMIC_LOAD_ADD_I16  : Atomic2Ops32<atomic_load_add_16, "load_add_16">;
-  defm ATOMIC_LOAD_ADD_I32  : Atomic2Ops32<atomic_load_add_32, "load_add_32">;
-  defm ATOMIC_LOAD_SUB_I8   : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">;
-  defm ATOMIC_LOAD_SUB_I16  : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">;
-  defm ATOMIC_LOAD_SUB_I32  : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">;
-  defm ATOMIC_LOAD_AND_I8   : Atomic2Ops32<atomic_load_and_8, "load_and_8">;
-  defm ATOMIC_LOAD_AND_I16  : Atomic2Ops32<atomic_load_and_16, "load_and_16">;
-  defm ATOMIC_LOAD_AND_I32  : Atomic2Ops32<atomic_load_and_32, "load_and_32">;
-  defm ATOMIC_LOAD_OR_I8    : Atomic2Ops32<atomic_load_or_8, "load_or_8">;
-  defm ATOMIC_LOAD_OR_I16   : Atomic2Ops32<atomic_load_or_16, "load_or_16">;
-  defm ATOMIC_LOAD_OR_I32   : Atomic2Ops32<atomic_load_or_32, "load_or_32">;
-  defm ATOMIC_LOAD_XOR_I8   : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">;
-  defm ATOMIC_LOAD_XOR_I16  : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">;
-  defm ATOMIC_LOAD_XOR_I32  : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">;
-  defm ATOMIC_LOAD_NAND_I8  : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">;
-  defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">;
-  defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">;
-
-  defm ATOMIC_SWAP_I8       : Atomic2Ops32<atomic_swap_8, "swap_8">;
-  defm ATOMIC_SWAP_I16      : Atomic2Ops32<atomic_swap_16, "swap_16">;
-  defm ATOMIC_SWAP_I32      : Atomic2Ops32<atomic_swap_32, "swap_32">;
-
-  defm ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap32<atomic_cmp_swap_8, "8">;
-  defm ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap32<atomic_cmp_swap_16, "16">;
-  defm ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap32<atomic_cmp_swap_32, "32">;
-}
+  defm ATOMIC_LOAD_ADD_I8   : Atomic2Ops32<atomic_load_add_8>;
+  defm ATOMIC_LOAD_ADD_I16  : Atomic2Ops32<atomic_load_add_16>;
+  defm ATOMIC_LOAD_ADD_I32  : Atomic2Ops32<atomic_load_add_32>;
+  defm ATOMIC_LOAD_SUB_I8   : Atomic2Ops32<atomic_load_sub_8>;
+  defm ATOMIC_LOAD_SUB_I16  : Atomic2Ops32<atomic_load_sub_16>;
+  defm ATOMIC_LOAD_SUB_I32  : Atomic2Ops32<atomic_load_sub_32>;
+  defm ATOMIC_LOAD_AND_I8   : Atomic2Ops32<atomic_load_and_8>;
+  defm ATOMIC_LOAD_AND_I16  : Atomic2Ops32<atomic_load_and_16>;
+  defm ATOMIC_LOAD_AND_I32  : Atomic2Ops32<atomic_load_and_32>;
+  defm ATOMIC_LOAD_OR_I8    : Atomic2Ops32<atomic_load_or_8>;
+  defm ATOMIC_LOAD_OR_I16   : Atomic2Ops32<atomic_load_or_16>;
+  defm ATOMIC_LOAD_OR_I32   : Atomic2Ops32<atomic_load_or_32>;
+  defm ATOMIC_LOAD_XOR_I8   : Atomic2Ops32<atomic_load_xor_8>;
+  defm ATOMIC_LOAD_XOR_I16  : Atomic2Ops32<atomic_load_xor_16>;
+  defm ATOMIC_LOAD_XOR_I32  : Atomic2Ops32<atomic_load_xor_32>;
+  defm ATOMIC_LOAD_NAND_I8  : Atomic2Ops32<atomic_load_nand_8>;
+  defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>;
+  defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>;
+
+  defm ATOMIC_SWAP_I8       : Atomic2Ops32<atomic_swap_8>;
+  defm ATOMIC_SWAP_I16      : Atomic2Ops32<atomic_swap_16>;
+  defm ATOMIC_SWAP_I32      : Atomic2Ops32<atomic_swap_32>;
+
+  defm ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap32<atomic_cmp_swap_8>;
+  defm ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap32<atomic_cmp_swap_16>;
+  defm ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap32<atomic_cmp_swap_32>;
+}
+
+/// Pseudo instructions for loading, storing and copying accumulator registers.
+let isPseudo = 1 in {
+  defm LOAD_AC64  : LoadM<"load_ac64", ACRegs>;
+  defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
+}
+
+def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
 
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
-
-class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> :
-  MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
-                     !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>;
-
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> :
-  MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr),
-                     !strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>;
-
-class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> :
-  MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
-                     !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>;
-
 //===----------------------------------------------------------------------===//
 // MipsI Instructions
 //===----------------------------------------------------------------------===//
 
 /// Arithmetic Instructions (ALU Immediate)
-def ADDiu   : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>,
-              IsAsCheapAsAMove;
-def ADDi    : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>;
-def SLTi    : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>;
-def SLTiu   : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
-def ANDi    : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
-def ORi     : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
-def XORi    : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
-def LUi     : LoadUpper<0x0f, "lui", CPURegs, uimm16>;
+def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+            ADDI_FM<0x9>, IsAsCheapAsAMove;
+def ADDi  : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi  : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
+def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
+def ANDi  : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+            ADDI_FM<0xc>;
+def ORi   : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+            ADDI_FM<0xd>;
+def XORi  : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+            ADDI_FM<0xe>;
+def LUi   : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu    : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
-def SUBu    : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>;
-def ADD     : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>;
-def SUB     : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>;
-def SLT     : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>;
-def SLTu    : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>;
-def AND     : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>;
-def OR      : ArithLogicR<0x00, 0x25, "or",  or, IIAlu, CPURegs, 1>;
-def XOR     : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
-def NOR     : LogicNOR<0x00, 0x27, "nor", CPURegs>;
+def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
+def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
+def MUL  : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
+def ADD  : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB  : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT  : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND  : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR   : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR  : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR  : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
 
 /// Shift Instructions
-def SLL     : shift_rotate_imm32<0x00, 0x00, "sll", shl>;
-def SRL     : shift_rotate_imm32<0x02, 0x00, "srl", srl>;
-def SRA     : shift_rotate_imm32<0x03, 0x00, "sra", sra>;
-def SLLV    : shift_rotate_reg<0x04, 0x00, "sllv", shl, CPURegs>;
-def SRLV    : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
-def SRAV    : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
+def SLL  : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+           SRA_FM<0, 0>;
+def SRL  : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+           SRA_FM<2, 0>;
+def SRA  : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+           SRA_FM<3, 0>;
+def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
 
 // Rotate Instructions
-let Predicates = [HasMips32r2, HasStandardEncoding] in {
-    def ROTR    : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
-    def ROTRV   : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
+let Predicates = [HasMips32r2, HasStdEnc] in {
+  def ROTR  : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+              SRA_FM<2, 1>;
+  def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
 }
 
 /// Load and Store Instructions
 ///  aligned
-defm LB      : LoadM32<0x20, "lb",  sextloadi8>;
-defm LBu     : LoadM32<0x24, "lbu", zextloadi8>;
-defm LH      : LoadM32<0x21, "lh",  sextloadi16>;
-defm LHu     : LoadM32<0x25, "lhu", zextloadi16>;
-defm LW      : LoadM32<0x23, "lw",  load>;
-defm SB      : StoreM32<0x28, "sb", truncstorei8>;
-defm SH      : StoreM32<0x29, "sh", truncstorei16>;
-defm SW      : StoreM32<0x2b, "sw", store>;
+defm LB  : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
+defm LH  : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
+defm LW  : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
+defm SB  : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
+defm SH  : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
+defm SW  : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
 
 /// load/store left/right
-defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>;
-defm LWR : LoadLeftRightM32<0x26, "lwr", MipsLWR>;
-defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
-defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
+defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
+defm LWR : LoadLeftRightM<"lwr", MipsLWR, CPURegs>, LW_FM<0x26>;
+defm SWL : StoreLeftRightM<"swl", MipsSWL, CPURegs>, LW_FM<0x2a>;
+defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>;
 
-let hasSideEffects = 1 in
-def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
-                  [(MipsSync imm:$stype)], NoItinerary, FrmOther>
-{
-  bits<5> stype;
-  let Opcode = 0;
-  let Inst{25-11} = 0;
-  let Inst{10-6} = stype;
-  let Inst{5-0} = 15;
-}
+def SYNC : SYNC_FT, SYNC_FM;
 
 /// Load-linked, Store-conditional
-def LL    : LLBase<0x30, "ll", CPURegs, mem>,
-            Requires<[NotN64, HasStandardEncoding]>;
-def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>,
-            Requires<[IsN64, HasStandardEncoding]> {
-  let DecoderNamespace = "Mips64";
+let Predicates = [NotN64, HasStdEnc] in {
+  def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>;
+  def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>;
 }
 
-def SC    : SCBase<0x38, "sc", CPURegs, mem>,
-            Requires<[NotN64, HasStandardEncoding]>;
-def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>,
-            Requires<[IsN64, HasStandardEncoding]> {
-  let DecoderNamespace = "Mips64";
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
+  def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>;
+  def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>;
 }
 
 /// Jump and Branch Instructions
-def J       : JumpFJ<0x02, jmptarget, "j", br, bb>,
-              Requires<[RelocStatic, HasStandardEncoding]>, IsBranch;
-def JR      : IndirectBranch<CPURegs>;
-def B       : UncondBranch<0x04, "b">;
-def BEQ     : CBranch<0x04, "beq", seteq, CPURegs>;
-def BNE     : CBranch<0x05, "bne", setne, CPURegs>;
-def BGEZ    : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
-def BGTZ    : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
-def BLEZ    : CBranchZero<0x06, 0, "blez", setle, CPURegs>;
-def BLTZ    : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
-
-let rt = 0, rs = 0, isBranch = 1, isTerminator = 1, isBarrier = 1,
-    hasDelaySlot = 1, Defs = [RA] in
-def BAL_BR: FI<0x1, (outs), (ins brtarget:$imm16), "bal\t$imm16", [], IIBranch>;
-
-def JAL  : JumpLink<0x03, "jal">;
-def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
-def BGEZAL  : BranchLink<"bgezal", 0x11, CPURegs>;
-def BLTZAL  : BranchLink<"bltzal", 0x10, CPURegs>;
-def TAILCALL : JumpFJ<0x02, calltarget, "j", MipsTailCall, imm>, IsTailCall;
-def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, IsTailCall;
-
-def RET : RetBase<CPURegs>;
+def J       : JumpFJ<jmptarget, "j", br, bb>, FJ<2>,
+              Requires<[RelocStatic, HasStdEnc]>, IsBranch;
+def JR      : IndirectBranch<CPURegs>, MTLO_FM<8>;
+def B       : UncondBranch<"b">, B_FM;
+def BEQ     : CBranch<"beq", seteq, CPURegs>, BEQ_FM<4>;
+def BNE     : CBranch<"bne", setne, CPURegs>, BEQ_FM<5>;
+def BGEZ    : CBranchZero<"bgez", setge, CPURegs>, BGEZ_FM<1, 1>;
+def BGTZ    : CBranchZero<"bgtz", setgt, CPURegs>, BGEZ_FM<7, 0>;
+def BLEZ    : CBranchZero<"blez", setle, CPURegs>, BGEZ_FM<6, 0>;
+def BLTZ    : CBranchZero<"bltz", setlt, CPURegs>, BGEZ_FM<1, 0>;
+
+def BAL_BR: BAL_FT, BAL_FM;
+
+def JAL  : JumpLink<"jal">, FJ<3>;
+def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM;
+def JALRPseudo : JumpLinkRegPseudo<CPURegs, JALR, RA>;
+def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>;
+def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>;
+def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
+def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+
+def RET : RetBase<CPURegs>, MTLO_FM<8>;
+
+// Exception handling related node and instructions.
+// The conversion sequence is:
+// ISD::EH_RETURN -> MipsISD::EH_RETURN ->
+// MIPSeh_return -> (stack change + indirect branch)
+//
+// MIPSeh_return takes the place of regular return instruction
+// but takes two arguments (V1, V0) which are used for storing
+// the offset and return address respectively.
+def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
 
-/// Multiply and Divide Instructions.
-def MULT    : Mult32<0x18, "mult", IIImul>;
-def MULTu   : Mult32<0x19, "multu", IIImul>;
-def SDIV    : Div32<MipsDivRem, 0x1a, "div", IIIdiv>;
-def UDIV    : Div32<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  def MIPSeh_return32 : MipsPseudo<(outs), (ins CPURegs:$spoff, CPURegs:$dst),
+                                [(MIPSehret CPURegs:$spoff, CPURegs:$dst)]>;
+  def MIPSeh_return64 : MipsPseudo<(outs), (ins CPU64Regs:$spoff,
+                                                CPU64Regs:$dst),
+                                [(MIPSehret CPU64Regs:$spoff, CPU64Regs:$dst)]>;
+}
 
-def MTHI : MoveToLOHI<0x11, "mthi", CPURegs, [HI]>;
-def MTLO : MoveToLOHI<0x13, "mtlo", CPURegs, [LO]>;
-def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>;
-def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>;
+/// Multiply and Divide Instructions.
+def MULT  : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
+def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def PseudoMULT  : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
+def SDIV  : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
+def UDIV  : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>;
+def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv,
+                               0>;
+
+def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
+def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>;
+def MFHI : MoveFromLOHI<"mfhi", CPURegs, [HI]>, MFLO_FM<0x10>;
+def MFLO : MoveFromLOHI<"mflo", CPURegs, [LO]>, MFLO_FM<0x12>;
 
 /// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>;
-def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>;
+def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>;
+def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>;
 
 /// Count Leading
-def CLZ : CountLeading0<0x20, "clz", CPURegs>;
-def CLO : CountLeading1<0x21, "clo", CPURegs>;
+def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>;
+def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>;
 
 /// Word Swap Bytes Within Halfwords
-def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>;
+def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>;
 
-/// No operation
-let addr=0 in
-  def NOP   : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+/// No operation.
+def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
 
 // FrameIndexes are legalized when they are operands from load/store
 // instructions. The same not happens for stack address copies, so an
 // add op with mem ComplexPattern is used and the stack address copy
 // can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
-
-// DynAlloc node points to dynamically allocated stack space.
-// $sp is added to the list of implicitly used registers to prevent dead code
-// elimination from removing instructions that modify $sp.
-let Uses = [SP] in
-def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
+def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>;
 
 // MADD*/MSUB*
-def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
-def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
-def MSUB  : MArithR<4, "msub", MipsMSub>;
-def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+def MADD  : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB  : MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def PseudoMADD  : MAddSubPseudo<MADD, MipsMAdd>;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
+def PseudoMSUB  : MAddSubPseudo<MSUB, MipsMSub>;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
 
-// MUL is a assembly macro in the current used ISAs. In recent ISA's
-// it is a real instruction.
-def MUL   : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
-            Requires<[HasMips32, HasStandardEncoding]>;
+def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
 
-def RDHWR : ReadHardware<CPURegs, HWRegs>;
+def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>;
+def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>;
 
-def EXT : ExtBase<0, "ext", CPURegs>;
-def INS : InsBase<4, "ins", CPURegs>;
+/// Move Control Registers From/To CPU Registers
+def MFC0_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+                      (ins CPURegsOpnd:$rd, uimm16:$sel),
+                      "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
+
+def MTC0_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+                      (ins CPURegsOpnd:$rt),
+                      "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
+
+def MFC2_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+                      (ins CPURegsOpnd:$rd, uimm16:$sel),
+                      "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
+
+def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+                      (ins CPURegsOpnd:$rt),
+                      "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
 
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>;
-def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>;
-def : InstAlias<"addu $rs,$rt,$imm",
-                (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"add $rs,$rt,$imm",
-                (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"and $rs,$rt,$imm",
-                (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"j $rs", (JR CPURegs:$rs)>;
-def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>;
-def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"slt $rs,$rt,$imm",
-                (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"xor $rs,$rt,$imm",
-                (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
+def : InstAlias<"move $dst, $src",
+                (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+      Requires<[NotMips64]>;
+def : InstAlias<"move $dst, $src",
+                (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+      Requires<[NotMips64]>;
+def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>;
+def : InstAlias<"addu $rs, $rt, $imm",
+                (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"add $rs, $rt, $imm",
+                (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"and $rs, $rt, $imm",
+                (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>,
+      Requires<[NotMips64]>;
+def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>;
+def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>,
+                 Requires<[NotMips64]>;
+def : InstAlias<"not $rt, $rs",
+                (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>;
+def : InstAlias<"neg $rt, $rs",
+                (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"negu $rt, $rs",
+                (SUBu CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"slt $rs, $rt, $imm",
+                (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
+def : InstAlias<"xor $rs, $rt, $imm",
+                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+      Requires<[NotMips64]>;
+def : InstAlias<"or $rs, $rt, $imm",
+                (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+                 Requires<[NotMips64]>;
+def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+def : InstAlias<"mfc0 $rt, $rd",
+                (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc0 $rt, $rd",
+                (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+def : InstAlias<"mfc2 $rt, $rd",
+                (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc2 $rt, $rd",
+                (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+
+//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
+  MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+                     !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>;
+
+class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+  MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
+                     !strconcat(instr_asm, "\t$rt, $addr")> ;
+def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>;
+
+class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+  MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
+                     !strconcat(instr_asm, "\t$rt, $imm32")> ;
+def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
+
+
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
+// Load/store pattern templates.
+class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
+  MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>;
+
+class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
+  MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
+
 // Small immediates
 def : MipsPat<(i32 immSExt16:$in),
               (ADDiu ZERO, imm:$in)>;
@@ -1194,25 +1192,25 @@ def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
 
 // Mips does not have "not", so we expand our way
 def : MipsPat<(not CPURegs:$in),
-              (NOR CPURegs:$in, ZERO)>;
+              (NOR CPURegsOpnd:$in, ZERO)>;
 
 // extended loads
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def : MipsPat<(i32 (extloadi1  addr:$src)), (LBu addr:$src)>;
   def : MipsPat<(i32 (extloadi8  addr:$src)), (LBu addr:$src)>;
   def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
 }
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
   def : MipsPat<(i32 (extloadi1  addr:$src)), (LBu_P8 addr:$src)>;
   def : MipsPat<(i32 (extloadi8  addr:$src)), (LBu_P8 addr:$src)>;
   def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>;
 }
 
 // peepholes
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
 }
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
   def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
 }
 
@@ -1289,12 +1287,27 @@ defm : SetgtPats<CPURegs, SLT, SLTu>;
 defm : SetgePats<CPURegs, SLT, SLTu>;
 defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
 
-// select MipsDynAlloc
-def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
-
 // bswap pattern
 def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
 
+// mflo/hi patterns.
+def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
+              (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
+
+// Load halfword/word patterns.
+let AddedComplexity = 40 in {
+  let Predicates = [NotN64, HasStdEnc] in {
+    def : LoadRegImmPat<LBu, i32, zextloadi8>;
+    def : LoadRegImmPat<LH, i32, sextloadi16>;
+    def : LoadRegImmPat<LW, i32, load>;
+  }
+  let Predicates = [IsN64, HasStdEnc] in {
+    def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
+    def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
+    def : LoadRegImmPat<LW_P8, i32, load>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index da1119df8f9f..1b2a325d3ce6 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -16,12 +16,12 @@
 #include "MipsInstrInfo.h"
 #include "MipsRelocations.h"
 #include "MipsSubtarget.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 5d9f0cffb749..2efe534053a2 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -10,21 +10,21 @@
 // This pass expands a branch or jump instruction into a long branch if its
 // offset is too large to fit into its immediate field.
 //
-// FIXME: 
-// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. 
+// FIXME:
+// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
 // 2. If program has inline assembly statements whose size cannot be
-//    determined accurately, load branch target addresses from the GOT. 
+//    determined accurately, load branch target addresses from the GOT.
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-long-branch"
 
 #include "Mips.h"
-#include "MipsTargetMachine.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -258,7 +258,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
     BalTgtMBB->addSuccessor(TgtMBB);
 
     int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address;
-    int64_t Offset = TgtAddress - (I.Address + I.Size - 20);
+    unsigned BalTgtMBBSize = 5;
+    int64_t Offset = TgtAddress - (I.Address + I.Size - BalTgtMBBSize * 4);
     int64_t Lo = SignExtend64<16>(Offset & 0xffff);
     int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff);
 
@@ -283,9 +284,10 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::SP).addImm(-8);
       BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA)
         .addReg(Mips::SP).addImm(0);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi)
-        ->setIsInsideBundle();
+
+      MIBundleBuilder(*LongBrMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+        .append(BuildMI(*MF, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi));
 
       Pos = BalTgtMBB->begin();
 
@@ -295,9 +297,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::RA).addReg(Mips::AT);
       BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
         .addReg(Mips::SP).addImm(0);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
-        .addReg(Mips::SP).addImm(8)->setIsInsideBundle();
+
+      MIBundleBuilder(*BalTgtMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+        .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
+                .addReg(Mips::SP).addImm(8));
     } else {
       // $longbr:
       //  daddiu $sp, $sp, -16
@@ -335,9 +339,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::AT_64).addImm(16);
       BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
         .addReg(Mips::AT_64).addImm(Hi);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
-        .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle();
+
+      MIBundleBuilder(*LongBrMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+        .append(BuildMI(*MF, DL, TII->get(Mips::DSLL), Mips::AT_64)
+                .addReg(Mips::AT_64).addImm(16));
 
       Pos = BalTgtMBB->begin();
 
@@ -347,10 +353,15 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::RA_64).addReg(Mips::AT_64);
       BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
         .addReg(Mips::SP_64).addImm(0);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
-        .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle();
+
+      MIBundleBuilder(*BalTgtMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64))
+        .append(BuildMI(*MF, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+                .addReg(Mips::SP_64).addImm(16));
     }
+
+    assert(BalTgtMBBSize == BalTgtMBB->size());
+    assert(LongBrMBB->size() + BalTgtMBBSize == LongBranchSeqSize);
   } else {
     // $longbr:
     //  j $tgt
@@ -359,8 +370,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
     //
     Pos = LongBrMBB->begin();
     LongBrMBB->addSuccessor(TgtMBB);
-    BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
-    BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
+    MIBundleBuilder(*LongBrMBB, Pos)
+      .append(BuildMI(*MF, DL, TII->get(Mips::J)).addMBB(TgtMBB))
+      .append(BuildMI(*MF, DL, TII->get(Mips::NOP)));
+
+    assert(LongBrMBB->size() == LongBranchSeqSize);
   }
 
   if (I.Br->isUnconditionalBranch()) {
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 4162f981d1df..d836975eb7d2 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 #include "MipsMCInstLower.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 5ff19aba0267..59b23f7ad7c1 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsInstrInfo.h"
 #include "MipsSubtarget.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
@@ -56,4 +56,20 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() {
   return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC);
 }
 
+void MipsFunctionInfo::createEhDataRegsFI() {
+  for (int I = 0; I < 4; ++I) {
+    const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+    const TargetRegisterClass *RC = ST.isABI_N64() ?
+        &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+    EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+        RC->getAlignment(), false);
+  }
+}
+
+bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
+  return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
+                        || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
+}
+
 void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index bb45f92f18fd..b05b348037d9 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,8 +15,8 @@
 #define MIPS_MACHINE_FUNCTION_INFO_H
 
 #include "MipsSubtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include <utility>
@@ -53,10 +53,16 @@ class MipsFunctionInfo : public MachineFunctionInfo {
   /// Size of incoming argument area.
   unsigned IncomingArgSize;
 
+  /// CallsEhReturn - Whether the function calls llvm.eh.return.
+  bool CallsEhReturn;
+
+  /// Frame objects for spilling eh data registers.
+  int EhDataRegFI[4];
+
 public:
   MipsFunctionInfo(MachineFunction& MF)
    : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
-     VarArgsFrameIndex(0)
+     VarArgsFrameIndex(0), CallsEhReturn(false)
   {}
 
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
@@ -78,6 +84,14 @@ public:
   }
 
   unsigned getIncomingArgSize() const { return IncomingArgSize; }
+
+  bool callsEhReturn() const { return CallsEhReturn; }
+  void setCallsEhReturn() { CallsEhReturn = true; }
+
+  void createEhDataRegsFI();
+  int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
+  bool isEhDataRegFI(int FI) const;
+
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index d8e0dd436a95..32507334e9c6 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -17,25 +17,25 @@
 #include "Mips.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
-#include "MipsSubtarget.h"
 #include "MipsMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "MipsGenRegisterInfo.inc"
@@ -47,6 +47,28 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
 
 unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 
+
+unsigned
+MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                      MachineFunction &MF) const {
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case Mips::CPURegsRegClassID:
+  case Mips::CPU64RegsRegClassID:
+  case Mips::DSPRegsRegClassID: {
+    const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+    return 28 - TFI->hasFP(MF);
+  }
+  case Mips::FGR32RegClassID:
+    return 32;
+  case Mips::AFGR64RegClassID:
+    return 16;
+  case Mips::FGR64RegClassID:
+    return 32;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Callee Saved Registers methods
 //===----------------------------------------------------------------------===//
@@ -155,21 +177,14 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
 // direct reference.
 void MipsRegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
-                    RegScavenger *RS) const {
+                    unsigned FIOperandNum, RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
 
-  unsigned i = 0;
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-  }
-
   DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
         errs() << "<--------->\n" << MI);
 
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   uint64_t stackSize = MF.getFrameInfo()->getStackSize();
   int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
 
@@ -177,7 +192,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                << "spOffset   : " << spOffset << "\n"
                << "stackSize  : " << stackSize << "\n");
 
-  eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
+  eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
 }
 
 unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 78adf7f18bf2..5ed51241391f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,6 +42,8 @@ public:
   void adjustMipsStackFrame(MachineFunction &MF) const;
 
   /// Code Generation virtual methods...
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
   const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
   const uint32_t *getCallPreservedMask(CallingConv::ID) const;
 
@@ -53,9 +55,11 @@ public:
 
   /// Stack Frame Processing Methods
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                       RegScavenger *RS = NULL) const;
 
   /// Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -64,6 +68,9 @@ public:
   unsigned getEHExceptionRegister() const;
   unsigned getEHHandlerRegister() const;
 
+  /// \brief Return GPR register class.
+  virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
+
 private:
   virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
                            int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 391c19e07e33..64458bcef7ef 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -18,54 +18,56 @@ def sub_lo     : SubRegIndex;
 def sub_hi     : SubRegIndex;
 }
 
+class Unallocatable {
+  bit isAllocatable = 0;
+}
+
 // We have banks of 32 registers each.
-class MipsReg<string n> : Register<n> {
-  field bits<5> Num;
+class MipsReg<bits<16> Enc, string n> : Register<n> {
+  let HWEncoding = Enc;
   let Namespace = "Mips";
 }
 
-class MipsRegWithSubRegs<string n, list<Register> subregs>
+class MipsRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
   : RegisterWithSubRegs<n, subregs> {
-  field bits<5> Num;
+  let HWEncoding = Enc;
   let Namespace = "Mips";
 }
 
 // Mips CPU Registers
-class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
-  let Num = num;
-}
+class MipsGPRReg<bits<16> Enc, string n> : MipsReg<Enc, n>;
 
 // Mips 64-bit CPU Registers
-class Mips64GPRReg<bits<5> num, string n, list<Register> subregs>
-  : MipsRegWithSubRegs<n, subregs> {
-  let Num = num;
+class Mips64GPRReg<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
   let SubRegIndices = [sub_32];
 }
 
 // Mips 32-bit FPU Registers
-class FPR<bits<5> num, string n> : MipsReg<n> {
-  let Num = num;
-}
+class FPR<bits<16> Enc, string n> : MipsReg<Enc, n>;
 
 // Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> subregs>
-  : MipsRegWithSubRegs<n, subregs> {
-  let Num = num;
+class AFPR<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
   let SubRegIndices = [sub_fpeven, sub_fpodd];
   let CoveredBySubRegs = 1;
 }
 
-class AFPR64<bits<5> num, string n, list<Register> subregs>
-  : MipsRegWithSubRegs<n, subregs> {
-  let Num = num;
+class AFPR64<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
   let SubRegIndices = [sub_32];
 }
 
-// Mips Hardware Registers
-class HWR<bits<5> num, string n> : MipsReg<n> {
-  let Num = num;
+// Accumulator Registers
+class ACC<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
+  let SubRegIndices = [sub_lo, sub_hi];
+  let CoveredBySubRegs = 1;
 }
 
+// Mips Hardware Registers
+class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>;
+
 //===----------------------------------------------------------------------===//
 //  Registers
 //===----------------------------------------------------------------------===//
@@ -228,7 +230,13 @@ let Namespace = "Mips" in {
 
   // Hi/Lo registers
   def HI  : Register<"hi">, DwarfRegNum<[64]>;
+  def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
+  def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
+  def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
   def LO  : Register<"lo">, DwarfRegNum<[65]>;
+  def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
+  def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
+  def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
 
   let SubRegIndices = [sub_32] in {
   def HI64  : RegisterWithSubRegs<"hi", [HI]>;
@@ -239,21 +247,22 @@ let Namespace = "Mips" in {
   def FCR31 : Register<"31">;
 
   // fcc0 register
-  def FCC0 : Register<"fcc0">;
+  def FCC0 : MipsReg<0, "fcc0">;
 
   // PC register
   def PC : Register<"pc">;
 
   // Hardware register $29
-  def HWR29 : Register<"29">;
-  def HWR29_64 : Register<"29">;
+  def HWR29 : MipsReg<29, "29">;
+  def HWR29_64 : MipsReg<29, "29">;
 
   // Accum registers
-  let SubRegIndices = [sub_lo, sub_hi] in
-  def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>;
-  def AC1 : Register<"ac1">;
-  def AC2 : Register<"ac2">;
-  def AC3 : Register<"ac3">;
+  def AC0 : ACC<0, "ac0", [LO, HI]>;
+  def AC1 : ACC<1, "ac1", [LO1, HI1]>;
+  def AC2 : ACC<2, "ac2", [LO2, HI2]>;
+  def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+
+  def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
 
   def DSPCtrl : Register<"dspctrl">;
 }
@@ -300,9 +309,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
   // Callee save
   S0, S1)>;
 
-def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
 
-def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
 
 // 64bit fp:
 // * FGR64  - 32 64-bit registers
@@ -328,15 +337,70 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
 def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
 
 // Condition Register for floating point operations
-def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
+def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
 
 // Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
 
 // Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
-def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
 
 // Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+  let Size = 64;
+}
+
+def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+  let Size = 128;
+}
+
+def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+  let Size = 64;
+}
+
+def CPURegsAsmOperand : AsmOperandClass {
+  let Name = "CPURegsAsm";
+  let ParserMethod = "parseCPURegs";
+}
+
+def CPU64RegsAsmOperand : AsmOperandClass {
+  let Name = "CPU64RegsAsm";
+  let ParserMethod = "parseCPU64Regs";
+}
+
+def CCRAsmOperand : AsmOperandClass {
+  let Name = "CCRAsm";
+  let ParserMethod = "parseCCRRegs";
+}
+
+def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> {
+  let ParserMatchClass = CPURegsAsmOperand;
+}
+
+def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> {
+  let ParserMatchClass = CPU64RegsAsmOperand;
+}
+
+def CCROpnd : RegisterOperand<CCR, "printCPURegs"> {
+  let ParserMatchClass = CCRAsmOperand;
+}
+
+def HWRegsAsmOperand : AsmOperandClass {
+  let Name = "HWRegsAsm";
+  let ParserMethod = "parseHWRegs";
+}
+
+def HW64RegsAsmOperand : AsmOperandClass {
+  let Name = "HW64RegsAsm";
+  let ParserMethod = "parseHW64Regs";
+}
+
+def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> {
+  let ParserMatchClass = HWRegsAsmOperand;
+}
+
+def HW64RegsOpnd : RegisterOperand<HWRegs64, "printCPURegs"> {
+  let ParserMatchClass = HW64RegsAsmOperand;
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 03f5176b2974..68ec92188802 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -12,26 +12,187 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsSEFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAnalyzeImmediate.h"
-#include "MipsSEInstrInfo.h"
 #include "MipsMachineFunction.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
+#include "MipsSEInstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
+namespace {
+typedef MachineBasicBlock::iterator Iter;
+
+/// Helper class to expand accumulator pseudos.
+class ExpandACCPseudo {
+public:
+  ExpandACCPseudo(MachineFunction &MF);
+  bool expand();
+
+private:
+  bool expandInstr(MachineBasicBlock &MBB, Iter I);
+  void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+
+  MachineFunction &MF;
+  const MipsSEInstrInfo &TII;
+  const MipsRegisterInfo &RegInfo;
+  MachineRegisterInfo &MRI;
+};
+}
+
+ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+  : MF(MF_),
+    TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
+    RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
+
+bool ExpandACCPseudo::expand() {
+  bool Expanded = false;
+
+  for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
+       BB != BBEnd; ++BB)
+    for (Iter I = BB->begin(), End = BB->end(); I != End;)
+      Expanded |= expandInstr(*BB, I++);
+
+  return Expanded;
+}
+
+bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+  switch(I->getOpcode()) {
+  case Mips::LOAD_AC64:
+  case Mips::LOAD_AC64_P8:
+  case Mips::LOAD_AC_DSP:
+  case Mips::LOAD_AC_DSP_P8:
+    expandLoad(MBB, I, 4);
+    break;
+  case Mips::LOAD_AC128:
+  case Mips::LOAD_AC128_P8:
+    expandLoad(MBB, I, 8);
+    break;
+  case Mips::STORE_AC64:
+  case Mips::STORE_AC64_P8:
+  case Mips::STORE_AC_DSP:
+  case Mips::STORE_AC_DSP_P8:
+    expandStore(MBB, I, 4);
+    break;
+  case Mips::STORE_AC128:
+  case Mips::STORE_AC128_P8:
+    expandStore(MBB, I, 8);
+    break;
+  case Mips::COPY_AC64:
+  case Mips::COPY_AC_DSP:
+    expandCopy(MBB, I, 4);
+    break;
+  case Mips::COPY_AC128:
+    expandCopy(MBB, I, 8);
+    break;
+  default:
+    return false;
+  }
+
+  MBB.erase(I);
+  return true;
+}
+
+void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+                                 unsigned RegSize) {
+  //  load $vr0, FI
+  //  copy lo, $vr0
+  //  load $vr1, FI + 4
+  //  copy hi, $vr1
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+  unsigned VR0 = MRI.createVirtualRegister(RC);
+  unsigned VR1 = MRI.createVirtualRegister(RC);
+  unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+  unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+  unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+  DebugLoc DL = I->getDebugLoc();
+  const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+
+  TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0);
+  BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill);
+  TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize);
+  BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
+}
+
+void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+                                  unsigned RegSize) {
+  //  copy $vr0, lo
+  //  store $vr0, FI
+  //  copy $vr1, hi
+  //  store $vr1, FI + 4
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+  unsigned VR0 = MRI.createVirtualRegister(RC);
+  unsigned VR1 = MRI.createVirtualRegister(RC);
+  unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+  unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
+  unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
+  unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
+  DebugLoc DL = I->getDebugLoc();
+
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+  TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+  TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
+}
+
+void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
+                                 unsigned RegSize) {
+  //  copy $vr0, src_lo
+  //  copy dst_lo, $vr0
+  //  copy $vr1, src_hi
+  //  copy dst_hi, $vr1
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+  unsigned VR0 = MRI.createVirtualRegister(RC);
+  unsigned VR1 = MRI.createVirtualRegister(RC);
+  unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+  unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
+  unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
+  unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
+  unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
+  unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
+  DebugLoc DL = I->getDebugLoc();
+
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
+    .addReg(VR0, RegState::Kill);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+  BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
+    .addReg(VR1, RegState::Kill);
+}
+
+unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
+  static const unsigned EhDataReg[] = {
+    Mips::A0, Mips::A1, Mips::A2, Mips::A3
+  };
+  static const unsigned EhDataReg64[] = {
+    Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64
+  };
+
+  return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I];
+}
+
 void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB   = MF.front();
   MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
   const MipsRegisterInfo *RegInfo =
     static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
   const MipsSEInstrInfo &TII =
@@ -105,6 +266,30 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
     }
   }
 
+  if (MipsFI->callsEhReturn()) {
+    const TargetRegisterClass *RC = STI.isABI_N64() ?
+        &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+    // Insert instructions that spill eh data registers.
+    for (int I = 0; I < 4; ++I) {
+      if (!MBB.isLiveIn(ehDataReg(I)))
+        MBB.addLiveIn(ehDataReg(I));
+      TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false,
+                              MipsFI->getEhDataRegFI(I), RC, RegInfo);
+    }
+
+    // Emit .cfi_offset directives for eh data registers.
+    MCSymbol *CSLabel2 = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl,
+            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2);
+    for (int I = 0; I < 4; ++I) {
+      int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I));
+      DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+      SrcML = MachineLocation(ehDataReg(I));
+      Moves.push_back(MachineMove(CSLabel2, DstML, SrcML));
+    }
+  }
+
   // if framepointer enabled, set it to point to the stack pointer.
   if (hasFP(MF)) {
     // Insert instruction "move $fp, $sp" at this location.
@@ -124,6 +309,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
   const MipsSEInstrInfo &TII =
     *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
@@ -144,6 +332,22 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
     BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
   }
 
+  if (MipsFI->callsEhReturn()) {
+    const TargetRegisterClass *RC = STI.isABI_N64() ?
+        &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+    // Find first instruction that restores a callee-saved register.
+    MachineBasicBlock::iterator I = MBBI;
+    for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+      --I;
+
+    // Insert instructions that restore eh data registers.
+    for (int J = 0; J < 4; ++J) {
+      TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J),
+                               RC, RegInfo);
+    }
+  }
+
   // Get the number of bytes from FrameInfo
   uint64_t StackSize = MFI->getStackSize();
 
@@ -191,19 +395,59 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 
   // Reserve call frame if the size of the maximum call frame fits into 16-bit
   // immediate field and there are no variable sized objects on the stack.
-  return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+  // Make sure the second register scavenger spill slot can be accessed with one
+  // instruction.
+  return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) &&
+    !MFI->hasVarSizedObjects();
+}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsSEFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const MipsSEInstrInfo &TII =
+    *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  if (!hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+
+    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+      Amount = -Amount;
+
+    unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+    TII.adjustStackPtr(SP, Amount, MBB, I);
+  }
+
+  MBB.erase(I);
 }
 
 void MipsSEFrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
   unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
 
   // Mark $fp as used if function has dedicated frame pointer.
   if (hasFP(MF))
     MRI.setPhysRegUsed(FP);
 
+  // Create spill slots for eh data registers if function calls eh_return.
+  if (MipsFI->callsEhReturn())
+    MipsFI->createEhDataRegsFI();
+
+  // Expand pseudo instructions which load, store or copy accumulators.
+  // Add an emergency spill slot if a pseudo was expanded.
+  if (ExpandACCPseudo(MF).expand()) {
+    // The spill slot should be half the size of the accumulator. If target is
+    // mips64, it should be 64-bit, otherwise it should be 32-bt.
+    const TargetRegisterClass *RC = STI.hasMips64() ?
+      &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+    int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+                                                  RC->getAlignment(), false);
+    RS->addScavengingFrameIndex(FI);
+  }
+
   // Set scavenging frame index if necessary.
   uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
     estimateStackSize(MF);
@@ -215,7 +459,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
   int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
                                                 RC->getAlignment(), false);
-  RS->setScavengingFrameIndex(FI);
+  RS->addScavengingFrameIndex(FI);
 }
 
 const MipsFrameLowering *
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 6481a0ac86d7..193a66cc65a7 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -21,13 +21,17 @@ namespace llvm {
 class MipsSEFrameLowering : public MipsFrameLowering {
 public:
   explicit MipsSEFrameLowering(const MipsSubtarget &STI)
-    : MipsFrameLowering(STI) {}
+    : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
@@ -37,6 +41,7 @@ public:
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS) const;
+  unsigned ehDataReg(unsigned I) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 000000000000..d6d220750c61
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,473 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+                                                const MachineInstr& MI) {
+  unsigned DstReg = 0, ZeroReg = 0;
+
+  // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+  if ((MI.getOpcode() == Mips::ADDiu) &&
+      (MI.getOperand(1).getReg() == Mips::ZERO) &&
+      (MI.getOperand(2).getImm() == 0)) {
+    DstReg = MI.getOperand(0).getReg();
+    ZeroReg = Mips::ZERO;
+  } else if ((MI.getOpcode() == Mips::DADDiu) &&
+             (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+             (MI.getOperand(2).getImm() == 0)) {
+    DstReg = MI.getOperand(0).getReg();
+    ZeroReg = Mips::ZERO_64;
+  }
+
+  if (!DstReg)
+    return false;
+
+  // Replace uses with ZeroReg.
+  for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+       E = MRI->use_end(); U != E;) {
+    MachineOperand &MO = U.getOperand();
+    unsigned OpNo = U.getOperandNo();
+    MachineInstr *MI = MO.getParent();
+    ++U;
+
+    // Do not replace if it is a phi's operand or is tied to def operand.
+    if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+      continue;
+
+    MO.setReg(ZeroReg);
+  }
+
+  return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  if (!MipsFI->globalBaseRegSet())
+    return;
+
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator I = MBB.begin();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+  const TargetRegisterClass *RC;
+
+  if (Subtarget.isABI_N64())
+    RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+  else
+    RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+  V0 = RegInfo.createVirtualRegister(RC);
+  V1 = RegInfo.createVirtualRegister(RC);
+
+  if (Subtarget.isABI_N64()) {
+    MF.getRegInfo().addLiveIn(Mips::T9_64);
+    MBB.addLiveIn(Mips::T9_64);
+
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // daddu $v1, $v0, $t9
+    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+      .addReg(Mips::T9_64);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    return;
+  }
+
+  if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+    // Set global register to __gnu_local_gp.
+    //
+    // lui   $v0, %hi(__gnu_local_gp)
+    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+    return;
+  }
+
+  MF.getRegInfo().addLiveIn(Mips::T9);
+  MBB.addLiveIn(Mips::T9);
+
+  if (Subtarget.isABI_N32()) {
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // addu $v1, $v0, $t9
+    // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    return;
+  }
+
+  assert(Subtarget.isABI_O32());
+
+  // For O32 ABI, the following instruction sequence is emitted to initialize
+  // the global base register:
+  //
+  //  0. lui   $2, %hi(_gp_disp)
+  //  1. addiu $2, $2, %lo(_gp_disp)
+  //  2. addu  $globalbasereg, $2, $t9
+  //
+  // We emit only the last instruction here.
+  //
+  // GNU linker requires that the first two instructions appear at the beginning
+  // of a function and no instructions be inserted before or between them.
+  // The two instructions are emitted during lowering to MC layer in order to
+  // avoid any reordering.
+  //
+  // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+  // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+  // reads it.
+  MF.getRegInfo().addLiveIn(Mips::V0);
+  MBB.addLiveIn(Mips::V0);
+  BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+    .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+  initGlobalBaseReg(MF);
+
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+       ++MFI)
+    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+      replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+                               bool HasLo, bool HasHi) {
+  SDNode *Lo = 0, *Hi = 0;
+  SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+                                       N->getOperand(1));
+  SDValue InFlag = SDValue(Mul, 0);
+
+  if (HasLo) {
+    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+    Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+    InFlag = SDValue(Lo, 1);
+  }
+  if (HasHi) {
+    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+    Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+  }
+  return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+                                           SDValue CmpLHS, DebugLoc DL,
+                                           SDNode *Node) const {
+  unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+  assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+          (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+         "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+  SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+  SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+  EVT VT = LHS.getValueType();
+
+  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+  SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+                                            SDValue(Carry, 0), RHS);
+  return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+                              SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+                                          SDValue &Offset) const {
+  EVT ValTy = Addr.getValueType();
+
+  // if Address is FI, get the TargetFrameIndex.
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+    Offset = CurDAG->getTargetConstant(0, ValTy);
+    return true;
+  }
+
+  // on PIC code Load GA
+  if (Addr.getOpcode() == MipsISD::Wrapper) {
+    Base   = Addr.getOperand(0);
+    Offset = Addr.getOperand(1);
+    return true;
+  }
+
+  if (TM.getRelocationModel() != Reloc::PIC_) {
+    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+        Addr.getOpcode() == ISD::TargetGlobalAddress))
+      return false;
+  }
+
+  // Addresses of the form FI+const or FI|const
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isInt<16>(CN->getSExtValue())) {
+
+      // If the first operand is a FI, get the TargetFI Node
+      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+                                  (Addr.getOperand(0)))
+        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+      else
+        Base = Addr.getOperand(0);
+
+      Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+      return true;
+    }
+  }
+
+  // Operand is a result from an ADD.
+  if (Addr.getOpcode() == ISD::ADD) {
+    // When loading from constant pools, load the lower address part in
+    // the instruction itself. Example, instead of:
+    //  lui $2, %hi($CPI1_0)
+    //  addiu $2, $2, %lo($CPI1_0)
+    //  lwc1 $f0, 0($2)
+    // Generate:
+    //  lui $2, %hi($CPI1_0)
+    //  lwc1 $f0, %lo($CPI1_0)($2)
+    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+        Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+      SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+      if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+          isa<JumpTableSDNode>(Opnd0)) {
+        Base = Addr.getOperand(0);
+        Offset = Opnd0;
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) const {
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+  return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) const {
+  return selectAddrRegImm(Addr, Base, Offset) ||
+    selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc DL = Node->getDebugLoc();
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  EVT NodeTy = Node->getValueType(0);
+  SDNode *Result;
+  unsigned MultOpc;
+
+  switch(Opcode) {
+  default: break;
+
+  case ISD::SUBE: {
+    SDValue InFlag = Node->getOperand(2);
+    Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+    return std::make_pair(true, Result);
+  }
+
+  case ISD::ADDE: {
+    SDValue InFlag = Node->getOperand(2);
+    Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+    return std::make_pair(true, Result);
+  }
+
+  /// Mul with two results
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI: {
+    if (NodeTy == MVT::i32)
+      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+    else
+      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+    std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+                                                  true, true);
+
+    if (!SDValue(Node, 0).use_empty())
+      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+    if (!SDValue(Node, 1).use_empty())
+      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+    return std::make_pair(true, (SDNode*)NULL);
+  }
+
+  /// Special Muls
+  case ISD::MUL: {
+    // Mips32 has a 32-bit three operand mul instruction.
+    if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+      break;
+    MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+    Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+    return std::make_pair(true, Result);
+  }
+  case ISD::MULHS:
+  case ISD::MULHU: {
+    if (NodeTy == MVT::i32)
+      MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+    else
+      MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+    Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+    return std::make_pair(true, Result);
+  }
+
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+    if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+      if (Subtarget.hasMips64()) {
+        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+                                              Mips::ZERO_64, MVT::i64);
+        Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+      } else {
+        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+                                              Mips::ZERO, MVT::i32);
+        Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+                                        Zero);
+      }
+
+      return std::make_pair(true, Result);
+    }
+    break;
+  }
+
+  case ISD::Constant: {
+    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+    unsigned Size = CN->getValueSizeInBits(0);
+
+    if (Size == 32)
+      break;
+
+    MipsAnalyzeImmediate AnalyzeImm;
+    int64_t Imm = CN->getSExtValue();
+
+    const MipsAnalyzeImmediate::InstSeq &Seq =
+      AnalyzeImm.Analyze(Imm, Size, false);
+
+    MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+    DebugLoc DL = CN->getDebugLoc();
+    SDNode *RegOpnd;
+    SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+                                                MVT::i64);
+
+    // The first instruction can be a LUi which is different from other
+    // instructions (ADDiu, ORI and SLL) in that it does not have a register
+    // operand.
+    if (Inst->Opc == Mips::LUi64)
+      RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+    else
+      RegOpnd =
+        CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+                               CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+                               ImmOpnd);
+
+    // The remaining instructions in the sequence are handled here.
+    for (++Inst; Inst != Seq.end(); ++Inst) {
+      ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+                                          MVT::i64);
+      RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+                                       SDValue(RegOpnd, 0), ImmOpnd);
+    }
+
+    return std::make_pair(true, RegOpnd);
+  }
+
+  case MipsISD::ThreadPointer: {
+    EVT PtrVT = TLI.getPointerTy();
+    unsigned RdhwrOpc, SrcReg, DestReg;
+
+    if (PtrVT == MVT::i32) {
+      RdhwrOpc = Mips::RDHWR;
+      SrcReg = Mips::HWR29;
+      DestReg = Mips::V1;
+    } else {
+      RdhwrOpc = Mips::RDHWR64;
+      SrcReg = Mips::HWR29_64;
+      DestReg = Mips::V1_64;
+    }
+
+    SDNode *Rdhwr =
+      CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+                             Node->getValueType(0),
+                             CurDAG->getRegister(SrcReg, PtrVT));
+    SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+                                         SDValue(Rdhwr, 0));
+    SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+    ReplaceUses(SDValue(Node, 0), ResNode);
+    return std::make_pair(true, ResNode.getNode());
+  }
+
+  case MipsISD::InsertLOHI: {
+    unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
+                                         Mips::ACRegsRegClassID;
+    SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
+    SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
+    SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
+    const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
+                            Node->getOperand(1), HiIdx };
+    SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
+                                         MVT::Untyped, Ops, 5);
+    return std::make_pair(true, Res);
+  }
+  }
+
+  return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+  return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 000000000000..6137ab040bbc
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+  explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+  bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+  std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+                                         EVT Ty, bool HasLo, bool HasHi);
+
+  SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+                         DebugLoc DL, SDNode *Node) const;
+
+  virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+                                SDValue &Offset) const;
+
+  virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+                                 SDValue &Offset) const;
+
+  virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+                             SDValue &Offset) const;
+
+  virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+  virtual void processFunctionAfterISel(MachineFunction &MF);
+
+  // Insert instructions to initialize the global base register in the
+  // first MBB of the function.
+  void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 000000000000..4f219218d31f
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,442 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+                    cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+  : MipsTargetLowering(TM) {
+  // Set up the register classes
+  addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+  if (HasMips64)
+    addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+  if (Subtarget->hasDSP()) {
+    MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+    for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+      addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+      // Expand all builtin opcodes.
+      for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+        setOperationAction(Opc, VecTys[i], Expand);
+
+      setOperationAction(ISD::LOAD, VecTys[i], Legal);
+      setOperationAction(ISD::STORE, VecTys[i], Legal);
+      setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+    }
+  }
+
+  if (!TM.Options.UseSoftFloat) {
+    addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+    // When dealing with single precision only, use libcalls
+    if (!Subtarget->isSingleFloat()) {
+      if (HasMips64)
+        addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+      else
+        addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+    }
+  }
+
+  setOperationAction(ISD::SMUL_LOHI,          MVT::i32, Custom);
+  setOperationAction(ISD::UMUL_LOHI,          MVT::i32, Custom);
+  setOperationAction(ISD::MULHS,              MVT::i32, Custom);
+  setOperationAction(ISD::MULHU,              MVT::i32, Custom);
+
+  if (HasMips64)
+    setOperationAction(ISD::MUL,              MVT::i64, Custom);
+
+  setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
+  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
+  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
+  setOperationAction(ISD::LOAD,               MVT::i32, Custom);
+  setOperationAction(ISD::STORE,              MVT::i32, Custom);
+
+  setTargetDAGCombine(ISD::ADDE);
+  setTargetDAGCombine(ISD::SUBE);
+
+  computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+  return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+  MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+  switch (SVT) {
+  case MVT::i64:
+  case MVT::i32:
+    if (Fast)
+      *Fast = true;
+    return true;
+  default:
+    return false;
+  }
+}
+
+SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch(Op.getOpcode()) {
+  case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
+  case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
+  case ISD::MULHS:     return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
+  case ISD::MULHU:     return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
+  case ISD::MUL:       return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
+  case ISD::SDIVREM:   return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
+  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+  }
+
+  return MipsTargetLowering::LowerOperation(Op, DAG);
+}
+
+// selectMADD -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
+  // ADDENode's second operand must be a flag output of an ADDC node in order
+  // for the matching to be successful.
+  SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
+
+  if (ADDCNode->getOpcode() != ISD::ADDC)
+    return false;
+
+  SDValue MultHi = ADDENode->getOperand(0);
+  SDValue MultLo = ADDCNode->getOperand(0);
+  SDNode *MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than ADDENode or ADDCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MADD instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  DebugLoc DL = ADDENode->getDebugLoc();
+
+  // Initialize accumulator.
+  SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+                                  ADDCNode->getOperand(1),
+                                  ADDENode->getOperand(1));
+
+  // create MipsMAdd(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+  SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ACCIn);
+
+  // replace uses of adde and addc here
+  if (!SDValue(ADDCNode, 0).use_empty()) {
+    SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+    SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+                                    LoIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
+  }
+  if (!SDValue(ADDENode, 0).use_empty()) {
+    SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+    SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
+                                    HiIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
+  }
+
+  return true;
+}
+
+// selectMSUB -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if pattern matching was successful.
+static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
+  // SUBENode's second operand must be a flag output of an SUBC node in order
+  // for the matching to be successful.
+  SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
+
+  if (SUBCNode->getOpcode() != ISD::SUBC)
+    return false;
+
+  SDValue MultHi = SUBENode->getOperand(1);
+  SDValue MultLo = SUBCNode->getOperand(1);
+  SDNode *MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than SUBENode or SUBCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MSUB instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  DebugLoc DL = SUBENode->getDebugLoc();
+
+  // Initialize accumulator.
+  SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+                                  SUBCNode->getOperand(0),
+                                  SUBENode->getOperand(0));
+
+  // create MipsSub(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+  SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ACCIn);
+
+  // replace uses of sube and subc here
+  if (!SDValue(SUBCNode, 0).use_empty()) {
+    SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
+    SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+                                    LoIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
+  }
+  if (!SDValue(SUBENode, 0).use_empty()) {
+    SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
+    SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
+                                    HiIdx);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
+  }
+
+  return true;
+}
+
+static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget *Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+      selectMADD(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget *Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+      selectMSUB(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+SDValue
+MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  switch (N->getOpcode()) {
+  case ISD::ADDE:
+    return performADDECombine(N, DAG, DCI, Subtarget);
+  case ISD::SUBE:
+    return performSUBECombine(N, DAG, DCI, Subtarget);
+  default:
+    return MipsTargetLowering::PerformDAGCombine(N, DCI);
+  }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
+  switch (MI->getOpcode()) {
+  default:
+    return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+  case Mips::BPOSGE32_PSEUDO:
+    return emitBPOSGE32(MI, BB);
+  }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+                                  unsigned NextStackOffset,
+                                  const MipsFunctionInfo& FI) const {
+  if (!EnableMipsTailCalls)
+    return false;
+
+  // Return false if either the callee or caller has a byval argument.
+  if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+    return false;
+
+  // Return true if the callee's argument area is no larger than the
+  // caller's.
+  return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+            std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+            bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+            CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+  // T9 should contain the address of the callee function if
+  // -reloction-model=pic or it is an indirect call.
+  if (IsPICCall || !GlobalOrExternal) {
+    unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+    RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+  } else
+    Ops.push_back(Callee);
+
+  MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+                                  InternalLinkage, CLI, Callee, Chain);
+}
+
+SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
+                                          bool HasLo, bool HasHi,
+                                          SelectionDAG &DAG) const {
+  EVT Ty = Op.getOperand(0).getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
+                             Op.getOperand(0), Op.getOperand(1));
+  SDValue Lo, Hi;
+
+  if (HasLo)
+    Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+                     DAG.getConstant(Mips::sub_lo, MVT::i32));
+  if (HasHi)
+    Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
+                     DAG.getConstant(Mips::sub_hi, MVT::i32));
+
+  if (!HasLo || !HasHi)
+    return HasLo ? Lo : Hi;
+
+  SDValue Vals[] = { Lo, Hi };
+  return DAG.getMergeValues(Vals, 2, DL);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+  // $bb:
+  //  bposge32_pseudo $vr0
+  //  =>
+  // $bb:
+  //  bposge32 $tbb
+  // $fbb:
+  //  li $vr2, 0
+  //  b $sink
+  // $tbb:
+  //  li $vr1, 1
+  // $sink:
+  //  $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+  DebugLoc DL = MI->getDebugLoc();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *Sink  = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, FBB);
+  F->insert(It, TBB);
+  F->insert(It, Sink);
+
+  // Transfer the remainder of BB and its successor edges to Sink.
+  Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+               BB->end());
+  Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add successors.
+  BB->addSuccessor(FBB);
+  BB->addSuccessor(TBB);
+  FBB->addSuccessor(Sink);
+  TBB->addSuccessor(Sink);
+
+  // Insert the real bposge32 instruction to $BB.
+  BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+  // Fill $FBB.
+  unsigned VR2 = RegInfo.createVirtualRegister(RC);
+  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+    .addReg(Mips::ZERO).addImm(0);
+  BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+  // Fill $TBB.
+  unsigned VR1 = RegInfo.createVirtualRegister(RC);
+  BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+    .addReg(Mips::ZERO).addImm(1);
+
+  // Insert phi function to $Sink.
+  BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return Sink;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 000000000000..186f6a343dee
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,62 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+  class MipsSETargetLowering : public MipsTargetLowering  {
+  public:
+    explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+      if (VT == MVT::Untyped)
+        return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
+                                     &Mips::ACRegsRegClass;
+
+      return TargetLowering::getRepRegClassFor(VT);
+    }
+
+  private:
+    virtual bool
+    isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+                                      unsigned NextStackOffset,
+                                      const MipsFunctionInfo& FI) const;
+
+    virtual void
+    getOpndList(SmallVectorImpl<SDValue> &Ops,
+                std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+                bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+                CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+    SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
+                        SelectionDAG &DAG) const;
+
+    MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+                                    MachineBasicBlock *BB) const;
+  };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index fb0f9df038c3..ca0315ed9f6e 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsSEInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 
 using namespace llvm;
 
@@ -90,7 +90,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
     if (Mips::CPURegsRegClass.contains(SrcReg))
-      Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+      Opc = Mips::OR, ZeroReg = Mips::ZERO;
     else if (Mips::CCRRegClass.contains(SrcReg))
       Opc = Mips::CFC1;
     else if (Mips::FGR32RegClass.contains(SrcReg))
@@ -120,7 +120,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = Mips::MOVCCRToCCR;
   else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
     if (Mips::CPU64RegsRegClass.contains(SrcReg))
-      Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+      Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
     else if (SrcReg == Mips::HI64)
       Opc = Mips::MFHI64, SrcReg = 0;
     else if (SrcReg == Mips::LO64)
@@ -136,6 +136,12 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     else if (Mips::FGR64RegClass.contains(DestReg))
       Opc = Mips::DMTC1;
   }
+  else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
+    Opc = Mips::COPY_AC64;
+  else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
+    Opc = Mips::COPY_AC_DSP;
+  else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
+    Opc = Mips::COPY_AC128;
 
   assert(Opc && "Cannot copy registers");
 
@@ -144,18 +150,18 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (DestReg)
     MIB.addReg(DestReg, RegState::Define);
 
-  if (ZeroReg)
-    MIB.addReg(ZeroReg);
-
   if (SrcReg)
     MIB.addReg(SrcReg, getKillRegState(KillSrc));
+
+  if (ZeroReg)
+    MIB.addReg(ZeroReg);
 }
 
 void MipsSEInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC,
-                    const TargetRegisterInfo *TRI) const {
+storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                unsigned SrcReg, bool isKill, int FI,
+                const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
+                int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
@@ -166,6 +172,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
   else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+  else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
+  else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
+  else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -175,15 +187,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   assert(Opc && "Register class not handled!");
   BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
-    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
 }
 
 void MipsSEInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const
-{
+loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                 unsigned DestReg, int FI, const TargetRegisterClass *RC,
+                 const TargetRegisterInfo *TRI, int64_t Offset) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
@@ -193,6 +203,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
   else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+  else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
+  else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
+  else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -201,7 +217,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
 
   assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
     .addMemOperand(MMO);
 }
 
@@ -220,6 +236,10 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   case Mips::ExtractElementF64:
     ExpandExtractElementF64(MBB, MI);
     break;
+  case Mips::MIPSeh_return32:
+  case Mips::MIPSeh_return64:
+    ExpandEhReturn(MBB, MI);
+    break;
   }
 
   MBB.erase(MI);
@@ -356,6 +376,35 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
     .addReg(HiReg);
 }
 
+void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const {
+  // This pseudo instruction is generated as part of the lowering of
+  // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
+  // indirect jump to TargetReg
+  const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+  unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  unsigned OR = STI.isABI_N64() ? Mips::OR64 : Mips::OR;
+  unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
+  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+  unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
+  unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
+  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+  unsigned OffsetReg = I->getOperand(0).getReg();
+  unsigned TargetReg = I->getOperand(1).getReg();
+
+  // or   $ra, $v0, $zero
+  // addu $sp, $sp, $v1
+  // jr   $ra
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), T9)
+        .addReg(TargetReg).addReg(ZERO);
+  BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA)
+      .addReg(TargetReg).addReg(ZERO);
+  BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
+      .addReg(SP).addReg(OffsetReg);
+  BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA);
+}
+
 const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
   return new MipsSEInstrInfo(TM);
 }
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 55b78b2cfb97..0bf7876f0fe0 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -49,17 +49,19 @@ public:
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const;
 
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
+  virtual void storeRegToStack(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI,
+                               unsigned SrcReg, bool isKill, int FrameIndex,
+                               const TargetRegisterClass *RC,
+                               const TargetRegisterInfo *TRI,
+                               int64_t Offset) const;
+
+  virtual void loadRegFromStack(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                unsigned DestReg, int FrameIndex,
+                                const TargetRegisterClass *RC,
+                                const TargetRegisterInfo *TRI,
+                                int64_t Offset) const;
 
   virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
 
@@ -85,6 +87,8 @@ private:
                                MachineBasicBlock::iterator I) const;
   void ExpandBuildPairF64(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator I) const;
+  void ExpandEhReturn(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I) const;
 };
 
 }
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 56b9ba95e5de..96967380b29d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -15,28 +15,28 @@
 #include "MipsSERegisterInfo.h"
 #include "Mips.h"
 #include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
 #include "MipsSEInstrInfo.h"
 #include "MipsSubtarget.h"
-#include "MipsMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -54,26 +54,13 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
   return true;
 }
 
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsSERegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
-    unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+const TargetRegisterClass *
+MipsSERegisterInfo::intRegClass(unsigned Size) const {
+  if (Size == 4)
+    return &Mips::CPURegsRegClass;
 
-    II->adjustStackPtr(SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
+  assert(Size == 8);
+  return &Mips::CPU64RegsRegClass;
 }
 
 void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
@@ -83,6 +70,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   int MinCSFI = 0;
@@ -93,15 +81,18 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
     MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
   }
 
+  bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
+
   // The following stack frame objects are always referenced relative to $sp:
   //  1. Outgoing arguments.
   //  2. Pointer to dynamically allocated stack space.
   //  3. Locations for callee-saved registers.
+  //  4. Locations for eh data registers.
   // Everything else is referenced relative to whatever register
   // getFrameRegister() returns.
   unsigned FrameReg;
 
-  if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
+  if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
     FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
   else
     FrameReg = getFrameRegister(MF);
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 7437bd36c333..2f7c37bb460d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -31,9 +31,7 @@ public:
 
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
+  virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
 
 private:
   virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 930af4dda159..e11e5d142b74 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -26,13 +26,14 @@ void MipsSubtarget::anchor() { }
 
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                              const std::string &FS, bool little,
-                             Reloc::Model RM) :
+                             Reloc::Model _RM) :
   MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
-  IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
-  HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false),
-  HasDSP(false), HasDSPR2(false), IsAndroid(false)
+  IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
+  HasBitCount(false), HasFPIdx(false),
+  InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+  RM(_RM)
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ff69237ec2bd..7a2e47ce5a9d 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -14,8 +14,9 @@
 #ifndef MIPSSUBTARGET_H
 #define MIPSSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "MCTargetDesc/MipsReginfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -76,30 +77,32 @@ protected:
   // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
   bool HasCondMov;
 
-  // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
-  // instructions.
-  bool HasMulDivAdd;
-
-  // HasMinMax - MIN and MAX instructions.
-  bool HasMinMax;
-
   // HasSwap - Byte and half swap instructions.
   bool HasSwap;
 
   // HasBitCount - Count leading '1' and '0' bits.
   bool HasBitCount;
 
+  // HasFPIdx -- Floating point indexed load/store instructions.
+  bool HasFPIdx;
+
   // InMips16 -- can process Mips16 instructions
   bool InMips16Mode;
 
+  // InMicroMips -- can process MicroMips instructions
+  bool InMicroMipsMode;
+
   // HasDSP, HasDSPR2 -- supports DSP ASE.
   bool HasDSP, HasDSPR2;
 
-  // IsAndroid -- target is android
-  bool IsAndroid;
-
   InstrItineraryData InstrItins;
 
+  // The instance to the register info section object
+  MipsReginfo MRI;
+
+  // Relocation Model
+  Reloc::Model RM;
+
 public:
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                                      AntiDepBreakMode& Mode,
@@ -127,8 +130,6 @@ public:
   bool hasMips64() const { return MipsArchVersion >= Mips64; }
   bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
 
-  bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); }
-
   bool isLittle() const { return IsLittle; }
   bool isFP64bit() const { return IsFP64bit; }
   bool isGP64bit() const { return IsGP64bit; }
@@ -137,9 +138,9 @@ public:
   bool isNotSingleFloat() const { return !IsSingleFloat; }
   bool hasVFPU() const { return HasVFPU; }
   bool inMips16Mode() const { return InMips16Mode; }
+  bool inMicroMipsMode() const { return InMicroMipsMode; }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
-  bool isAndroid() const { return IsAndroid; }
   bool isLinux() const { return IsLinux; }
   bool useSmallSection() const { return UseSmallSection; }
 
@@ -148,10 +149,15 @@ public:
   /// Features related to the presence of specific instructions.
   bool hasSEInReg()   const { return HasSEInReg; }
   bool hasCondMov()   const { return HasCondMov; }
-  bool hasMulDivAdd() const { return HasMulDivAdd; }
-  bool hasMinMax()    const { return HasMinMax; }
   bool hasSwap()      const { return HasSwap; }
   bool hasBitCount()  const { return HasBitCount; }
+  bool hasFPIdx()     const { return HasFPIdx; }
+
+  // Grab MipsRegInfo object
+  const MipsReginfo &getMReginfo() const { return MRI; }
+
+  // Grab relocation model
+  Reloc::Model getRelocationModel() const {return RM;}
 };
 } // End llvm namespace
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 983ee219412b..33363580aba7 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,8 +15,8 @@
 #include "Mips.h"
 #include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -45,15 +45,16 @@ MipsTargetMachine(const Target &T, StringRef TT,
     Subtarget(TT, CPU, FS, isLittle, RM),
     DL(isLittle ?
                (Subtarget.isABI_N64() ?
-                "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
-                "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+                "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+                "n32:64-S128" :
+                "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64") :
                (Subtarget.isABI_N64() ?
-                "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
-                "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+                "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
+                "n32:64-S128" :
+                "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
     InstrInfo(MipsInstrInfo::create(*this)),
     FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
-    TLInfo(*this), TSInfo(*this), JITInfo(),
-    STTI(&TLInfo), VTTI(&TLInfo) {
+    TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
 }
 
 void MipsebTargetMachine::anchor() { }
@@ -115,6 +116,8 @@ bool MipsPassConfig::addPreEmitPass() {
   // NOTE: long branch has not been implemented for mips16.
   if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
     addPass(createMipsLongBranchPass(TM));
+  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+    addPass(createMipsConstantIslandPass(TM));
 
   return true;
 }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index b54f5cee6d4d..7e5f19226433 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -15,15 +15,15 @@
 #define MIPSTARGETMACHINE_H
 
 #include "MipsFrameLowering.h"
-#include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
 #include "MipsJITInfo.h"
 #include "MipsSelectionDAGInfo.h"
 #include "MipsSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 class formatted_raw_ostream;
@@ -32,13 +32,11 @@ class MipsRegisterInfo;
 class MipsTargetMachine : public LLVMTargetMachine {
   MipsSubtarget       Subtarget;
   const DataLayout    DL; // Calculates type size & alignment
-  const MipsInstrInfo *InstrInfo;
-  const MipsFrameLowering *FrameLowering;
-  MipsTargetLowering  TLInfo;
+  OwningPtr<const MipsInstrInfo> InstrInfo;
+  OwningPtr<const MipsFrameLowering> FrameLowering;
+  OwningPtr<const MipsTargetLowering> TLInfo;
   MipsSelectionDAGInfo TSInfo;
   MipsJITInfo JITInfo;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 
 public:
   MipsTargetMachine(const Target &T, StringRef TT,
@@ -47,12 +45,12 @@ public:
                     CodeGenOpt::Level OL,
                     bool isLittle);
 
-  virtual ~MipsTargetMachine() { delete InstrInfo; }
+  virtual ~MipsTargetMachine() {}
 
   virtual const MipsInstrInfo *getInstrInfo() const
-  { return InstrInfo; }
+  { return InstrInfo.get(); }
   virtual const TargetFrameLowering *getFrameLowering() const
-  { return FrameLowering; }
+  { return FrameLowering.get(); }
   virtual const MipsSubtarget *getSubtargetImpl() const
   { return &Subtarget; }
   virtual const DataLayout *getDataLayout()    const
@@ -65,20 +63,13 @@ public:
   }
 
   virtual const MipsTargetLowering *getTargetLowering() const {
-    return &TLInfo;
+    return TLInfo.get();
   }
 
   virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
 
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
-
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 881908b82c91..4c748c5b57cd 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,14 +9,14 @@
 
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 static cl::opt<unsigned>
@@ -38,6 +38,20 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
                                ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getBSS());
 
+  // Register info information
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  if (Subtarget.isABI_N64() || Subtarget.isABI_N32())
+    ReginfoSection =
+      getContext().getELFSection(".MIPS.options",
+                                 ELF::SHT_MIPS_OPTIONS,
+                                 ELF::SHF_ALLOC |ELF::SHF_MIPS_NOSTRIP,
+                                 SectionKind::getMetadata());
+  else
+    ReginfoSection =
+      getContext().getELFSection(".reginfo",
+                                 ELF::SHT_MIPS_REGINFO,
+                                 ELF::SHF_ALLOC,
+                                 SectionKind::getMetadata());
 }
 
 // A address must be loaded from a small section if its size is less than the
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index c394a9dc02e4..c0e9140c829c 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -17,6 +17,7 @@ namespace llvm {
   class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
     const MCSection *SmallDataSection;
     const MCSection *SmallBSSSection;
+    const MCSection *ReginfoSection;
   public:
 
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
@@ -35,6 +36,7 @@ namespace llvm {
                                             const TargetMachine &TM) const;
 
     // TODO: Classify globals as mips wishes.
+    const MCSection *getReginfoSection() const { return ReginfoSection; }
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 243632b20aac..3615c146a527 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7cb16b4dd810..7da2fed4cd57 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,7 +22,7 @@ set(NVPTXCodeGen_sources
   NVPTXAllocaHoisting.cpp
   NVPTXAsmPrinter.cpp
   NVPTXUtilities.cpp
-  VectorElementize.cpp
+  NVVMReflect.cpp
   )
 
 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 454583850b71..b3e8b5d2622d 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -52,25 +52,24 @@ enum PropertyAnnotation {
 };
 
 const unsigned AnnotationNameLen = 8; // length of each annotation name
-const char
-PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
-  "maxntidx",               // PROPERTY_MAXNTID_X
-  "maxntidy",               // PROPERTY_MAXNTID_Y
-  "maxntidz",               // PROPERTY_MAXNTID_Z
-  "reqntidx",               // PROPERTY_REQNTID_X
-  "reqntidy",               // PROPERTY_REQNTID_Y
-  "reqntidz",               // PROPERTY_REQNTID_Z
-  "minctasm",               // PROPERTY_MINNCTAPERSM
-  "texture",                // PROPERTY_ISTEXTURE
-  "surface",                // PROPERTY_ISSURFACE
-  "sampler",                // PROPERTY_ISSAMPLER
-  "rdoimage",               // PROPERTY_ISREADONLY_IMAGE_PARAM
-  "wroimage",               // PROPERTY_ISWRITEONLY_IMAGE_PARAM
-  "kernel",                 // PROPERTY_ISKERNEL_FUNCTION
-  "align",                  // PROPERTY_ALIGN
+const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+  "maxntidx",                         // PROPERTY_MAXNTID_X
+  "maxntidy",                         // PROPERTY_MAXNTID_Y
+  "maxntidz",                         // PROPERTY_MAXNTID_Z
+  "reqntidx",                         // PROPERTY_REQNTID_X
+  "reqntidy",                         // PROPERTY_REQNTID_Y
+  "reqntidz",                         // PROPERTY_REQNTID_Z
+  "minctasm",                         // PROPERTY_MINNCTAPERSM
+  "texture",                          // PROPERTY_ISTEXTURE
+  "surface",                          // PROPERTY_ISSURFACE
+  "sampler",                          // PROPERTY_ISSAMPLER
+  "rdoimage",                         // PROPERTY_ISREADONLY_IMAGE_PARAM
+  "wroimage",                         // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+  "kernel",                           // PROPERTY_ISKERNEL_FUNCTION
+  "align",                            // PROPERTY_ALIGN
 
-  // last property
-  "proplast",               // PROPERTY_LAST
+              // last property
+  "proplast", // PROPERTY_LAST
 };
 
 // name of named metadata used for global annotations
@@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
 // compiling those .cpp files, hence __attribute__((unused)).
 __attribute__((unused))
 #endif
-static const char* NamedMDForAnnotations = "nvvm.annotations";
+    static const char *NamedMDForAnnotations = "nvvm.annotations";
 
 }
 
-
 #endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 1d4166575da5..459cd96cb0cd 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -23,15 +23,15 @@ bool CompileForDebugging;
 // compile for debugging
 static cl::opt<bool, true>
 Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
-      cl::location(CompileForDebugging),
-      cl::init(false));
+      cl::location(CompileForDebugging), cl::init(false));
 
-void NVPTXMCAsmInfo::anchor() { }
+void NVPTXMCAsmInfo::anchor() {}
 
 NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Triple TheTriple(TT);
-  if (TheTriple.getArch() == Triple::nvptx64)
-    PointerSize = 8;
+  if (TheTriple.getArch() == Triple::nvptx64) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
 
   CommentString = "//";
 
@@ -54,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Data32bitsDirective = " .b32 ";
   Data64bitsDirective = " .b64 ";
   PrivateGlobalPrefix = "";
-  ZeroDirective =  " .b8";
+  ZeroDirective = " .b8";
   AsciiDirective = " .b8";
   AscizDirective = " .b8";
 
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 44aa01ca6e30..ccd29705df72 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -28,7 +28,6 @@
 #define GET_REGINFO_MC_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
-
 using namespace llvm;
 
 static MCInstrInfo *createNVPTXMCInstrInfo() {
@@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
+static MCSubtargetInfo *
+createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
   return X;
 }
 
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM,
-                                               CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
+    StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-
 // Force static initialization.
 extern "C" void LLVMInitializeNVPTXTargetMC() {
   // Register the MC asm info.
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index b5684883fc95..d6c79b5110cc 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_SUPPORT_MANAGED_STRING_H
 #define LLVM_SUPPORT_MANAGED_STRING_H
 
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index a8d082a4d8b0..6a53a443bfb6 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_TARGET_NVPTX_H
 #define LLVM_TARGET_NVPTX_H
 
-#include "llvm/Value.h"
-#include "llvm/Module.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include <cassert>
 #include <iosfwd>
 
@@ -41,19 +41,24 @@ enum CondCodes {
 
 inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
   switch (CC) {
-  case NVPTXCC::NE:  return "ne";
-  case NVPTXCC::EQ:   return "eq";
-  case NVPTXCC::LT:   return "lt";
-  case NVPTXCC::LE:  return "le";
-  case NVPTXCC::GT:  return "gt";
-  case NVPTXCC::GE:   return "ge";
+  case NVPTXCC::NE:
+    return "ne";
+  case NVPTXCC::EQ:
+    return "eq";
+  case NVPTXCC::LT:
+    return "lt";
+  case NVPTXCC::LE:
+    return "le";
+  case NVPTXCC::GT:
+    return "gt";
+  case NVPTXCC::GE:
+    return "ge";
   }
   llvm_unreachable("Unknown condition code");
 }
 
-FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
-                                 llvm::CodeGenOpt::Level OptLevel);
-FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
+FunctionPass *
+createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
 FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
@@ -63,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *);
 extern Target TheNVPTXTarget32;
 extern Target TheNVPTXTarget64;
 
-namespace NVPTX
-{
+namespace NVPTX {
 enum DrvInterface {
   NVCL,
   CUDA,
@@ -103,7 +107,7 @@ enum LoadStore {
 };
 
 namespace PTXLdStInstCode {
-enum AddressSpace{
+enum AddressSpace {
   GENERIC = 0,
   GLOBAL = 1,
   CONSTANT = 2,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 7aee3595c625..d78b4e81a3e5 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td"
 //===----------------------------------------------------------------------===//
 
 // SM Versions
-def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10",
-                            "Target SM 1.0">;
-def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11",
-                            "Target SM 1.1">;
-def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12",
-                            "Target SM 1.2">;
-def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13",
-                            "Target SM 1.3">;
 def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
                             "Target SM 2.0">;
 def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
@@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
 
-def : Proc<"sm_10", [SM10]>;
-def : Proc<"sm_11", [SM11]>;
-def : Proc<"sm_12", [SM12]>;
-def : Proc<"sm_13", [SM13]>;
 def : Proc<"sm_20", [SM20]>;
 def : Proc<"sm_21", [SM21]>;
 def : Proc<"sm_30", [SM30]>;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 668c39308f71..0f792ec6826e 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
 #include "NVPTXAllocaHoisting.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 
 bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
-  bool               functionModified    = false;
-  Function::iterator I                   = function.begin();
-  TerminatorInst    *firstTerminatorInst = (I++)->getTerminator();
+  bool functionModified = false;
+  Function::iterator I = function.begin();
+  TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
 
   for (Function::iterator E = function.end(); I != E; ++I) {
     for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
@@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
 }
 
 char NVPTXAllocaHoisting::ID = 1;
-RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
-                                    "Hoisting alloca instructions in non-entry "
-                                    "blocks to the entry block");
+RegisterPass<NVPTXAllocaHoisting>
+X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
+                     "blocks to the entry block");
 
-FunctionPass *createAllocaHoisting() {
-  return new NVPTXAllocaHoisting();
-}
+FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
 
 } // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c7cabf695311..19d73c5783cb 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -15,8 +15,8 @@
 #define NVPTX_ALLOCA_HOISTING_H_
 
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0a885ce1c4a6..ce5d78afa332 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -13,40 +13,40 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXAsmPrinter.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
 #include "NVPTX.h"
 #include "NVPTXInstrInfo.h"
-#include "NVPTXTargetMachine.h"
+#include "NVPTXNumRegisters.h"
 #include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
 #include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXMCAsmInfo.h"
-#include "NVPTXNumRegisters.h"
+#include "cl_common_defines.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Support/TimeValue.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Assembly/Writer.h"
-#include "cl_common_defines.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include <sstream>
 using namespace llvm;
 
-
 #include "NVPTXGenAsmWriter.inc"
 
 bool RegAllocNilUsed = true;
@@ -58,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers",
                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
                 cl::init(true));
 
-namespace llvm  {
-bool InterleaveSrcInPtx = false;
-}
-
-static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
-                                        cl::ZeroOrMore,
-                       cl::desc("NVPTX Specific: Emit source line in ptx file"),
-                                        cl::location(llvm::InterleaveSrcInPtx));
+namespace llvm { bool InterleaveSrcInPtx = false; }
 
+static cl::opt<bool, true>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+              cl::desc("NVPTX Specific: Emit source line in ptx file"),
+              cl::location(llvm::InterleaveSrcInPtx));
 
 namespace {
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
 /// depends.
-void DiscoverDependentGlobals(Value *V,
-                              DenseSet<GlobalVariable*> &Globals) {
+void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     Globals.insert(GV);
   else {
@@ -87,12 +83,12 @@ void DiscoverDependentGlobals(Value *V,
 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
 /// instances to be emitted, but only after any dependents have been added
 /// first.
-void VisitGlobalVariableForEmission(GlobalVariable *GV,
-                                    SmallVectorImpl<GlobalVariable*> &Order,
-                                    DenseSet<GlobalVariable*> &Visited,
-                                    DenseSet<GlobalVariable*> &Visiting) {
+void VisitGlobalVariableForEmission(
+    GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
+    DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
   // Have we already visited this one?
-  if (Visited.count(GV)) return;
+  if (Visited.count(GV))
+    return;
 
   // Do we have a circular dependency?
   if (Visiting.count(GV))
@@ -102,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV,
   Visiting.insert(GV);
 
   // Make sure we visit all dependents first
-  DenseSet<GlobalVariable*> Others;
+  DenseSet<GlobalVariable *> Others;
   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
     DiscoverDependentGlobals(GV->getOperand(i), Others);
-  
-  for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
-       E = Others.end(); I != E; ++I)
+
+  for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
+                                            E = Others.end();
+       I != E; ++I)
     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
 
   // Now we can visit ourself
@@ -141,43 +138,35 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
   if (CE == 0)
     llvm_unreachable("Unknown constant value to lower!");
 
-
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C =
-        ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
       if (C != CE)
         return LowerConstant(C, AP);
 
     // Otherwise report the problem to the user.
     {
-        std::string S;
-        raw_string_ostream OS(S);
-        OS << "Unsupported expression in static initializer: ";
-        WriteAsOperand(OS, CE, /*PrintType=*/false,
-                       !AP.MF ? 0 : AP.MF->getFunction()->getParent());
-        report_fatal_error(OS.str());
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Unsupported expression in static initializer: ";
+      WriteAsOperand(OS, CE, /*PrintType=*/ false,
+                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+      report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
     const DataLayout &TD = *AP.TM.getDataLayout();
     // Generate a symbolic expression for the byte address
-    const Constant *PtrVal = CE->getOperand(0);
-    SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
-    int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
+    APInt OffsetAI(TD.getPointerSizeInBits(), 0);
+    cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
 
     const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
-    if (Offset == 0)
+    if (!OffsetAI)
       return Base;
 
-    // Truncate/sext the offset to the pointer size.
-    if (TD.getPointerSizeInBits() != 64) {
-      int SExtAmount = 64-TD.getPointerSizeInBits();
-      Offset = (Offset << SExtAmount) >> SExtAmount;
-    }
-
+    int64_t Offset = OffsetAI.getSExtValue();
     return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
                                    Ctx);
   }
@@ -187,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // expression properly.  This is important for differences between
     // blockaddress labels.  Since the two labels are in the same function, it
     // is reasonable to treat their delta as a 32-bit value.
-    // FALL THROUGH.
+  // FALL THROUGH.
   case Instruction::BitCast:
     return LowerConstant(CE->getOperand(0), AP);
 
@@ -197,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
-                                      false/*ZExt*/);
+                                      false /*ZExt*/);
     return LowerConstant(Op, AP);
   }
 
@@ -219,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // the high bits so we are sure to get a proper truncation if the input is
     // a constant expr.
     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
-    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+    const MCExpr *MaskExpr =
+        MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
   }
 
-  // The MC library also has a right-shift operator, but it isn't consistently
+    // The MC library also has a right-shift operator, but it isn't consistently
   // signed or unsigned between different targets.
   case Instruction::Add:
   case Instruction::Sub:
@@ -237,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
     switch (CE->getOpcode()) {
-    default: llvm_unreachable("Unknown binary operator constant cast expr");
-    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
-    case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
-    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
-    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
-    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
-    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
-    case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
-    case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
-    case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+    default:
+      llvm_unreachable("Unknown binary operator constant cast expr");
+    case Instruction::Add:
+      return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+    case Instruction::Sub:
+      return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+    case Instruction::Mul:
+      return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+    case Instruction::SDiv:
+      return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+    case Instruction::SRem:
+      return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+    case Instruction::Shl:
+      return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+    case Instruction::And:
+      return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+    case Instruction::Or:
+      return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
+    case Instruction::Xor:
+      return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
     }
   }
   }
 }
 
-
-void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
-{
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
   if (!EmitLineNumbers)
     return;
   if (ignoreLoc(MI))
@@ -273,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
   if (curLoc.isUnknown())
     return;
 
-
   const MachineFunction *MF = MI.getParent()->getParent();
   //const TargetMachine &TM = MF->getTarget();
 
@@ -294,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
   if (filenameMap.find(fileName.str()) == filenameMap.end())
     return;
 
-
   // Emit the line from the source file.
   if (llvm::InterleaveSrcInPtx)
     this->emitSrcInText(fileName.str(), curLoc.getLine());
 
   std::stringstream temp;
-  temp << "\t.loc " << filenameMap[fileName.str()]
-       << " " << curLoc.getLine() << " " << curLoc.getCol();
+  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+       << " " << curLoc.getCol();
   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
 }
 
@@ -314,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   OutStreamer.EmitRawText(OS.str());
 }
 
-void NVPTXAsmPrinter::printReturnValStr(const Function *F,
-                                        raw_ostream &O)
-{
+void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
   const TargetLowering *TLI = TM.getTargetLowering();
 
@@ -334,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
       unsigned size = 0;
       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
         size = ITy->getBitWidth();
-        if (size < 32) size = 32;
+        if (size < 32)
+          size = 32;
       } else {
-        assert(Ty->isFloatingPointTy() &&
-               "Floating point type expected here");
+        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
         size = Ty->getPrimitiveSizeInBits();
       }
 
       O << ".param .b" << size << " func_retval0";
-    }
-    else if (isa<PointerType>(Ty)) {
+    } else if (isa<PointerType>(Ty)) {
       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
-            << " func_retval0";
+        << " func_retval0";
     } else {
-      if ((Ty->getTypeID() == Type::StructTyID) ||
-          isa<VectorType>(Ty)) {
+      if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
         SmallVector<EVT, 16> vtparts;
         ComputeValueVTs(*TLI, Ty, vtparts);
         unsigned totalsz = 0;
-        for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+        for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
           unsigned elems = 1;
           EVT elemtype = vtparts[i];
           if (vtparts[i].isVector()) {
             elems = vtparts[i].getVectorNumElements();
             elemtype = vtparts[i].getVectorElementType();
           }
-          for (unsigned j=0, je=elems; j!=je; ++j) {
+          for (unsigned j = 0, je = elems; j != je; ++j) {
             unsigned sz = elemtype.getSizeInBits();
-            if (elemtype.isInteger() && (sz < 8)) sz = 8;
-            totalsz += sz/8;
+            if (elemtype.isInteger() && (sz < 8))
+              sz = 8;
+            totalsz += sz / 8;
           }
         }
         unsigned retAlignment = 0;
         if (!llvm::getAlign(*F, 0, retAlignment))
           retAlignment = TD->getABITypeAlignment(Ty);
-        O << ".param .align "
-            << retAlignment
-            << " .b8 func_retval0["
-            << totalsz << "]";
+        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+          << "]";
       } else
-        assert(false &&
-               "Unknown return type");
+        assert(false && "Unknown return type");
     }
   } else {
     SmallVector<EVT, 16> vtparts;
     ComputeValueVTs(*TLI, Ty, vtparts);
     unsigned idx = 0;
-    for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
       unsigned elems = 1;
       EVT elemtype = vtparts[i];
       if (vtparts[i].isVector()) {
@@ -388,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
         elemtype = vtparts[i].getVectorElementType();
       }
 
-      for (unsigned j=0, je=elems; j!=je; ++j) {
+      for (unsigned j = 0, je = elems; j != je; ++j) {
         unsigned sz = elemtype.getSizeInBits();
-        if (elemtype.isInteger() && (sz < 32)) sz = 32;
+        if (elemtype.isInteger() && (sz < 32))
+          sz = 32;
         O << ".reg .b" << sz << " func_retval" << idx;
-        if (j<je-1) O << ", ";
+        if (j < je - 1)
+          O << ", ";
         ++idx;
       }
-      if (i < e-1)
+      if (i < e - 1)
         O << ", ";
     }
   }
@@ -416,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   // Set up
   MRI = &MF->getRegInfo();
   F = MF->getFunction();
-  emitLinkageDirective(F,O);
+  emitLinkageDirective(F, O);
   if (llvm::isKernelFunction(*F))
     O << ".entry ";
   else {
@@ -439,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
   unsigned numRegClasses = TRI.getNumRegClasses();
-  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
   OutStreamer.EmitRawText(StringRef("{\n"));
   setAndEmitFunctionVirtualRegisters(*MF);
 
@@ -451,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
 
 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
   OutStreamer.EmitRawText(StringRef("}\n"));
-  delete []VRidGlobal2LocalMap;
+  delete[] VRidGlobal2LocalMap;
 }
 
-
-void
-NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
-                                              raw_ostream &O) const {
+void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
+                                                   raw_ostream &O) const {
   // If the NVVM IR has some of reqntid* specified, then output
   // the reqntid directive, and set the unspecified ones to 1.
   // If none of reqntid* is specified, don't output reqntid directive.
   unsigned reqntidx, reqntidy, reqntidz;
   bool specified = false;
-  if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
-  else specified = true;
-  if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
-  else specified = true;
-  if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
-  else specified = true;
+  if (llvm::getReqNTIDx(F, reqntidx) == false)
+    reqntidx = 1;
+  else
+    specified = true;
+  if (llvm::getReqNTIDy(F, reqntidy) == false)
+    reqntidy = 1;
+  else
+    specified = true;
+  if (llvm::getReqNTIDz(F, reqntidz) == false)
+    reqntidz = 1;
+  else
+    specified = true;
 
   if (specified)
-    O << ".reqntid " << reqntidx << ", "
-    << reqntidy << ", " << reqntidz << "\n";
+    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+      << "\n";
 
   // If the NVVM IR has some of maxntid* specified, then output
   // the maxntid directive, and set the unspecified ones to 1.
   // If none of maxntid* is specified, don't output maxntid directive.
   unsigned maxntidx, maxntidy, maxntidz;
   specified = false;
-  if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
-  else specified = true;
-  if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
-  else specified = true;
-  if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
-  else specified = true;
+  if (llvm::getMaxNTIDx(F, maxntidx) == false)
+    maxntidx = 1;
+  else
+    specified = true;
+  if (llvm::getMaxNTIDy(F, maxntidy) == false)
+    maxntidy = 1;
+  else
+    specified = true;
+  if (llvm::getMaxNTIDz(F, maxntidz) == false)
+    maxntidz = 1;
+  else
+    specified = true;
 
   if (specified)
-    O << ".maxntid " << maxntidx << ", "
-    << maxntidy << ", " << maxntidz << "\n";
+    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+      << "\n";
 
   unsigned mincta;
   if (llvm::getMinCTASm(F, mincta))
     O << ".minnctapersm " << mincta << "\n";
 }
 
-void
-NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
-                                        raw_ostream &O) {
-  const TargetRegisterClass * RC = MRI->getRegClass(vr);
+void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+                                             raw_ostream &O) {
+  const TargetRegisterClass *RC = MRI->getRegClass(vr);
   unsigned id = RC->getID();
 
   std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
@@ -508,61 +509,41 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
     O << getNVPTXRegClassStr(RC) << mapped_vr;
     return;
   }
-  // Vector virtual register
-  if (getNVPTXVectorSize(RC) == 4)
-    O << "{"
-    << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
-    << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
-    << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
-    << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
-    << "}";
-  else if (getNVPTXVectorSize(RC) == 2)
-    O << "{"
-    << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
-    << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
-    << "}";
-  else
-    llvm_unreachable("Unsupported vector size");
+  report_fatal_error("Bad register!");
 }
 
-void
-NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
-                                     raw_ostream &O) {
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+                                          raw_ostream &O) {
   getVirtualRegisterName(vr, isVec, O);
 }
 
-void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
-                                                const char *Modifier,
-                                                raw_ostream &O) {
-  static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
-  int Imm = (int)MO.getImm();
-  if(0 == strcmp(Modifier, "vecelem"))
+void NVPTXAsmPrinter::printVecModifiedImmediate(
+    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
+  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
+  int Imm = (int) MO.getImm();
+  if (0 == strcmp(Modifier, "vecelem"))
     O << "_" << vecelem[Imm];
-  else if(0 == strcmp(Modifier, "vecv4comm1")) {
-    if((Imm < 0) || (Imm > 3))
+  else if (0 == strcmp(Modifier, "vecv4comm1")) {
+    if ((Imm < 0) || (Imm > 3))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv4comm2")) {
-    if((Imm < 4) || (Imm > 7))
+  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
+    if ((Imm < 4) || (Imm > 7))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv4pos")) {
-    if(Imm < 0) Imm = 0;
-    O << "_" << vecelem[Imm%4];
-  }
-  else if(0 == strcmp(Modifier, "vecv2comm1")) {
-    if((Imm < 0) || (Imm > 1))
+  } else if (0 == strcmp(Modifier, "vecv4pos")) {
+    if (Imm < 0)
+      Imm = 0;
+    O << "_" << vecelem[Imm % 4];
+  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
+    if ((Imm < 0) || (Imm > 1))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv2comm2")) {
-    if((Imm < 2) || (Imm > 3))
+  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
+    if ((Imm < 2) || (Imm > 3))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv2pos")) {
-    if(Imm < 0) Imm = 0;
-    O << "_" << vecelem[Imm%2];
-  }
-  else
+  } else if (0 == strcmp(Modifier, "vecv2pos")) {
+    if (Imm < 0)
+      Imm = 0;
+    O << "_" << vecelem[Imm % 2];
+  } else
     llvm_unreachable("Unknown Modifier on immediate operand");
 }
 
@@ -584,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
           emitVirtualRegister(MO.getReg(), true, O);
         else
           llvm_unreachable(
-                 "Don't know how to handle the modifier on virtual register.");
+              "Don't know how to handle the modifier on virtual register.");
       }
     }
     return;
@@ -595,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     else if (strstr(Modifier, "vec") == Modifier)
       printVecModifiedImmediate(MO, Modifier, O);
     else
-      llvm_unreachable("Don't know how to handle modifier on immediate operand");
+      llvm_unreachable(
+          "Don't know how to handle modifier on immediate operand");
     return;
 
   case MachineOperand::MO_FPImmediate:
@@ -607,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     break;
 
   case MachineOperand::MO_ExternalSymbol: {
-    const char * symbname = MO.getSymbolName();
+    const char *symbname = MO.getSymbolName();
     if (strstr(symbname, ".PARAM") == symbname) {
       unsigned index;
-      sscanf(symbname+6, "%u[];", &index);
+      sscanf(symbname + 6, "%u[];", &index);
       printParamName(index, O);
-    }
-    else if (strstr(symbname, ".HLPPARAM") == symbname) {
+    } else if (strstr(symbname, ".HLPPARAM") == symbname) {
       unsigned index;
-      sscanf(symbname+9, "%u[];", &index);
+      sscanf(symbname + 9, "%u[];", &index);
       O << *CurrentFnSym << "_param_" << index << "_offset";
-    }
-    else
+    } else
       O << symbname;
     break;
   }
@@ -632,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   }
 }
 
-void NVPTXAsmPrinter::
-printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
+                                       raw_ostream &O) const {
 #ifndef __OPTIMIZE__
   O << "\t// Implicit def :";
   //printOperand(MI, 0);
@@ -647,64 +627,69 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
 
   if (Modifier && !strcmp(Modifier, "add")) {
     O << ", ";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum + 1, O);
   } else {
-    if (MI->getOperand(opNum+1).isImm() &&
-        MI->getOperand(opNum+1).getImm() == 0)
+    if (MI->getOperand(opNum + 1).isImm() &&
+        MI->getOperand(opNum + 1).getImm() == 0)
       return; // don't print ',0' or '+0'
     O << "+";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum + 1, O);
   }
 }
 
 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
-                                    raw_ostream &O, const char *Modifier)
-{
+                                    raw_ostream &O, const char *Modifier) {
   if (Modifier) {
     const MachineOperand &MO = MI->getOperand(opNum);
-    int Imm = (int)MO.getImm();
+    int Imm = (int) MO.getImm();
     if (!strcmp(Modifier, "volatile")) {
       if (Imm)
         O << ".volatile";
     } else if (!strcmp(Modifier, "addsp")) {
       switch (Imm) {
-      case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
-      case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
-      case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
-      case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
-      case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+      case NVPTX::PTXLdStInstCode::GLOBAL:
+        O << ".global";
+        break;
+      case NVPTX::PTXLdStInstCode::SHARED:
+        O << ".shared";
+        break;
+      case NVPTX::PTXLdStInstCode::LOCAL:
+        O << ".local";
+        break;
+      case NVPTX::PTXLdStInstCode::PARAM:
+        O << ".param";
+        break;
+      case NVPTX::PTXLdStInstCode::CONSTANT:
+        O << ".const";
+        break;
       case NVPTX::PTXLdStInstCode::GENERIC:
         if (!nvptxSubtarget.hasGenericLdSt())
           O << ".global";
         break;
       default:
-        assert("wrong value");
+        llvm_unreachable("Wrong Address Space");
       }
-    }
-    else if (!strcmp(Modifier, "sign")) {
-      if (Imm==NVPTX::PTXLdStInstCode::Signed)
+    } else if (!strcmp(Modifier, "sign")) {
+      if (Imm == NVPTX::PTXLdStInstCode::Signed)
         O << "s";
-      else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+      else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
         O << "u";
       else
         O << "f";
-    }
-    else if (!strcmp(Modifier, "vec")) {
-      if (Imm==NVPTX::PTXLdStInstCode::V2)
+    } else if (!strcmp(Modifier, "vec")) {
+      if (Imm == NVPTX::PTXLdStInstCode::V2)
         O << ".v2";
-      else if (Imm==NVPTX::PTXLdStInstCode::V4)
+      else if (Imm == NVPTX::PTXLdStInstCode::V4)
         O << ".v4";
-    }
-    else
-      assert("unknown modifier");
-  }
-  else
-    assert("unknown modifier");
+    } else
+      llvm_unreachable("Unknown Modifier");
+  } else
+    llvm_unreachable("Empty Modifier");
 }
 
-void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
 
-  emitLinkageDirective(F,O);
+  emitLinkageDirective(F, O);
   if (llvm::isKernelFunction(*F))
     O << ".entry ";
   else
@@ -715,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
   O << ";\n";
 }
 
-static bool usedInGlobalVarDef(const Constant *C)
-{
+static bool usedInGlobalVarDef(const Constant *C) {
   if (!C)
     return false;
 
@@ -726,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C)
     return true;
   }
 
-  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
-      ui!=ue; ++ui) {
+  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+       ui != ue; ++ui) {
     const Constant *C = dyn_cast<Constant>(*ui);
     if (usedInGlobalVarDef(C))
       return true;
@@ -735,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C)
   return false;
 }
 
-static bool usedInOneFunc(const User *U, Function const *&oneFunc)
-{
+static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
     if (othergv->getName().str() == "llvm.used")
       return true;
@@ -749,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc)
         return false;
       oneFunc = curFunc;
       return true;
-    }
-    else
+    } else
       return false;
   }
 
   if (const MDNode *md = dyn_cast<MDNode>(U))
     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
-        (md->getName().str() == "llvm.dbg.sp")))
+                          (md->getName().str() == "llvm.dbg.sp")))
       return true;
 
-
-  for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
-      ui!=ue; ++ui) {
+  for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
+       ui != ue; ++ui) {
     if (usedInOneFunc(*ui, oneFunc) == false)
       return false;
   }
@@ -795,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
 
 static bool useFuncSeen(const Constant *C,
                         llvm::DenseMap<const Function *, bool> &seenMap) {
-  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
-      ui!=ue; ++ui) {
+  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+       ui != ue; ++ui) {
     if (const Constant *cu = dyn_cast<Constant>(*ui)) {
       if (useFuncSeen(cu, seenMap))
         return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
       const BasicBlock *bb = I->getParent();
-      if (!bb) continue;
+      if (!bb)
+        continue;
       const Function *caller = bb->getParent();
-      if (!caller) continue;
+      if (!caller)
+        continue;
       if (seenMap.find(caller) != seenMap.end())
         return true;
     }
@@ -812,10 +795,9 @@ static bool useFuncSeen(const Constant *C,
   return false;
 }
 
-void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
   llvm::DenseMap<const Function *, bool> seenMap;
-  for (Module::const_iterator FI=M.begin(), FE=M.end();
-      FI!=FE; ++FI) {
+  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
     const Function *F = FI;
 
     if (F->isDeclaration()) {
@@ -827,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
       emitDeclaration(F, O);
       continue;
     }
-    for (Value::const_use_iterator iter=F->use_begin(),
-        iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+    for (Value::const_use_iterator iter = F->use_begin(),
+                                   iterEnd = F->use_end();
+         iter != iterEnd; ++iter) {
       if (const Constant *C = dyn_cast<Constant>(*iter)) {
         if (usedInGlobalVarDef(C)) {
           // The use is in the initialization of a global variable
@@ -847,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
         }
       }
 
-      if (!isa<Instruction>(*iter)) continue;
+      if (!isa<Instruction>(*iter))
+        continue;
       const Instruction *instr = cast<Instruction>(*iter);
       const BasicBlock *bb = instr->getParent();
-      if (!bb) continue;
+      if (!bb)
+        continue;
       const Function *caller = bb->getParent();
-      if (!caller) continue;
+      if (!caller)
+        continue;
 
       // If a caller has already been seen, then the caller is
       // appearing in the module before the callee. so print out
@@ -871,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(M);
 
-  unsigned i=1;
+  unsigned i = 1;
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-      E = DbgFinder.compile_unit_end(); I != E; ++I) {
+                                 E = DbgFinder.compile_unit_end();
+       I != E; ++I) {
     DICompileUnit DIUnit(*I);
     StringRef Filename(DIUnit.getFilename());
     StringRef Dirname(DIUnit.getDirectory());
@@ -890,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   }
 
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
-      E = DbgFinder.subprogram_end(); I != E; ++I) {
+                                 E = DbgFinder.subprogram_end();
+       I != E; ++I) {
     DISubprogram SP(*I);
     StringRef Filename(SP.getFilename());
     StringRef Dirname(SP.getDirectory());
@@ -906,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   }
 }
 
-bool NVPTXAsmPrinter::doInitialization (Module &M) {
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
 
   SmallString<128> Str1;
   raw_svector_ostream OS1(Str1);
@@ -918,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   //bool Result = AsmPrinter::doInitialization(M);
 
   // Initialize TargetLoweringObjectFile.
-  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
-          .Initialize(OutContext, TM);
+  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+      .Initialize(OutContext, TM);
 
   Mang = new Mangler(OutContext, *TM.getDataLayout());
 
@@ -927,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   emitHeader(M, OS1);
   OutStreamer.EmitRawText(OS1.str());
 
-
   // Already commented out
   //bool Result = AsmPrinter::doInitialization(M);
 
-
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
     recordAndEmitFilenames(M);
 
@@ -945,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   // global variable in order, and ensure that we emit it *after* its dependent
   // globals. We use a little extra memory maintaining both a set and a list to
   // have fast searches while maintaining a strict ordering.
-  SmallVector<GlobalVariable*,8> Globals;
-  DenseSet<GlobalVariable*> GVVisited;
-  DenseSet<GlobalVariable*> GVVisiting;
+  SmallVector<GlobalVariable *, 8> Globals;
+  DenseSet<GlobalVariable *> GVVisited;
+  DenseSet<GlobalVariable *> GVVisiting;
 
   // Visit each global variable, in order
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+       ++I)
     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
 
-  assert(GVVisited.size() == M.getGlobalList().size() && 
+  assert(GVVisited.size() == M.getGlobalList().size() &&
          "Missed a global variable");
   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
 
@@ -965,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   OS2 << '\n';
 
   OutStreamer.EmitRawText(OS2.str());
-  return false;  // success
+  return false; // success
 }
 
-void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
   O << "//\n";
   O << "// Generated by LLVM NVPTX Back-End\n";
   O << "//\n";
@@ -1008,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 
   Module::GlobalListType &global_list = M.getGlobalList();
   int i, n = global_list.size();
-  GlobalVariable **gv_array = new GlobalVariable* [n];
+  GlobalVariable **gv_array = new GlobalVariable *[n];
 
   // first, back-up GlobalVariable in gv_array
   i = 0;
   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
-      I != E; ++I)
+       I != E; ++I)
     gv_array[i++] = &*I;
 
   // second, empty global_list
@@ -1024,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
   bool ret = AsmPrinter::doFinalization(M);
 
   // now we restore global variables
-  for (i = 0; i < n; i ++)
+  for (i = 0; i < n; i++)
     global_list.insert(global_list.end(), gv_array[i]);
 
   delete[] gv_array;
   return ret;
 
-
   //bool Result = AsmPrinter::doFinalization(M);
   // Instead of calling the parents doFinalization, we may
   // clone parents doFinalization and customize here.
@@ -1050,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 // external without init                  -> .extern
 // appending                              -> not allowed, assert.
 
-void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
-{
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
+                                           raw_ostream &O) {
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
     if (V->hasExternalLinkage()) {
       if (isa<GlobalVariable>(V)) {
@@ -1078,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
   }
 }
 
-
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
                                          bool processDemoted) {
 
   // Skip meta data
@@ -1130,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
     if (Initializer)
       CI = dyn_cast<ConstantInt>(Initializer);
     if (CI) {
-      unsigned sample=CI->getZExtValue();
+      unsigned sample = CI->getZExtValue();
 
       O << " = { ";
 
-      for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
-          __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+      for (int i = 0,
+               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
+           i < 3; i++) {
         O << "addr_mode_" << i << " = ";
         switch (addr) {
-        case 0: O << "wrap"; break;
-        case 1: O << "clamp_to_border"; break;
-        case 2: O << "clamp_to_edge"; break;
-        case 3: O << "wrap"; break;
-        case 4: O << "mirror"; break;
+        case 0:
+          O << "wrap";
+          break;
+        case 1:
+          O << "clamp_to_border";
+          break;
+        case 2:
+          O << "clamp_to_edge";
+          break;
+        case 3:
+          O << "wrap";
+          break;
+        case 4:
+          O << "mirror";
+          break;
         }
-        O <<", ";
+        O << ", ";
       }
       O << "filter_mode = ";
-      switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
-      case 0: O << "nearest"; break;
-      case 1: O << "linear";  break;
-      case 2: assert ( 0 && "Anisotropic filtering is not supported");
-      default: O << "nearest"; break;
+      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
+      case 0:
+        O << "nearest";
+        break;
+      case 1:
+        O << "linear";
+        break;
+      case 2:
+        assert(0 && "Anisotropic filtering is not supported");
+      default:
+        O << "nearest";
+        break;
       }
-      if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
         O << ", force_unnormalized_coords = 1";
       }
       O << " }";
@@ -1195,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
   else
     O << " .align " << GVar->getAlignment();
 
-
   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
     O << " .";
     O << getPTXFundamentalTypeStr(ETy, false);
@@ -1205,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
     // Ptx allows variable initilization only for constant and global state
     // spaces.
     if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
-        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
-        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
-        && GVar->hasInitializer()) {
+         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+        GVar->hasInitializer()) {
       Constant *Initializer = GVar->getInitializer();
       if (!Initializer->isNullValue()) {
-        O << " = " ;
+        O << " = ";
         printScalarConstant(Initializer, O);
       }
     }
   } else {
-    unsigned int ElementSize =0;
+    unsigned int ElementSize = 0;
 
     // Although PTX has direct support for struct type and array type and
     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
@@ -1229,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
       // Ptx allows variable initilization only for constant and
       // global state spaces.
       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
-          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
-          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
-          && GVar->hasInitializer()) {
+           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+          GVar->hasInitializer()) {
         Constant *Initializer = GVar->getInitializer();
-        if (!isa<UndefValue>(Initializer) &&
-            !Initializer->isNullValue()) {
+        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
           AggBuffer aggBuffer(ElementSize, O, *this);
           bufferAggregateConstant(Initializer, &aggBuffer);
           if (aggBuffer.numSymbols) {
             if (nvptxSubtarget.is64Bit()) {
-              O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
-              O << ElementSize/8;
-            }
-            else {
-              O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
-              O << ElementSize/4;
+              O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+              O << ElementSize / 8;
+            } else {
+              O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+              O << ElementSize / 4;
             }
             O << "]";
-          }
-          else {
-            O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+          } else {
+            O << " .b8 " << *Mang->getSymbol(GVar) << "[";
             O << ElementSize;
             O << "]";
           }
-          O << " = {" ;
+          O << " = {";
           aggBuffer.print();
           O << "}";
-        }
-        else {
-          O << " .b8 " << *Mang->getSymbol(GVar) ;
+        } else {
+          O << " .b8 " << *Mang->getSymbol(GVar);
           if (ElementSize) {
-            O <<"[" ;
+            O << "[";
             O << ElementSize;
             O << "]";
           }
         }
-      }
-      else {
+      } else {
         O << " .b8 " << *Mang->getSymbol(GVar);
         if (ElementSize) {
-          O <<"[" ;
+          O << "[";
           O << ElementSize;
           O << "]";
         }
       }
       break;
     default:
-      assert( 0 && "type not supported yet");
+      assert(0 && "type not supported yet");
     }
 
   }
@@ -1289,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
 
   std::vector<GlobalVariable *> &gvars = localDecls[f];
 
-  for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
     O << "\t// demoted variable\n\t";
     printModuleLevelGV(gvars[i], O, true);
   }
@@ -1299,32 +1295,33 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
                                           raw_ostream &O) const {
   switch (AddressSpace) {
   case llvm::ADDRESS_SPACE_LOCAL:
-    O << "local" ;
+    O << "local";
     break;
   case llvm::ADDRESS_SPACE_GLOBAL:
-    O << "global" ;
+    O << "global";
     break;
   case llvm::ADDRESS_SPACE_CONST:
     // This logic should be consistent with that in
     // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
     if (nvptxSubtarget.hasGenericLdSt())
-      O << "global" ;
+      O << "global";
     else
-      O << "const" ;
+      O << "const";
     break;
   case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
-    O << "const" ;
+    O << "const";
     break;
   case llvm::ADDRESS_SPACE_SHARED:
-    O << "shared" ;
+    O << "shared";
     break;
   default:
-    llvm_unreachable("unexpected address space");
+    report_fatal_error("Bad address space found while emitting PTX");
+    break;
   }
 }
 
-std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
-                                                      bool useB4PTR) const {
+std::string
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
   switch (Ty->getTypeID()) {
   default:
     llvm_unreachable("unexpected type");
@@ -1348,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
     return "f64";
   case Type::PointerTyID:
     if (nvptxSubtarget.is64Bit())
-      if (useB4PTR) return "b64";
-      else return "u64";
+      if (useB4PTR)
+        return "b64";
+      else
+        return "u64";
+    else if (useB4PTR)
+      return "b32";
     else
-      if (useB4PTR) return "b32";
-      else return "u32";
+      return "u32";
   }
   llvm_unreachable("unexpected type");
   return NULL;
 }
 
-void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
                                             raw_ostream &O) {
 
   const DataLayout *TD = TM.getDataLayout();
@@ -1382,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
     return;
   }
 
-  int64_t ElementSize =0;
+  int64_t ElementSize = 0;
 
   // Although PTX has direct support for struct type and array type and LLVM IR
   // is very similar to PTX, the LLVM CodeGen does not support for targets that
@@ -1393,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
   case Type::ArrayTyID:
   case Type::VectorTyID:
     ElementSize = TD->getTypeStoreSize(ETy);
-    O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+    O << " .b8 " << *Mang->getSymbol(GVar) << "[";
     if (ElementSize) {
-      O << itostr(ElementSize) ;
+      O << itostr(ElementSize);
     }
     O << "]";
     break;
   default:
-    assert( 0 && "type not supported yet");
+    assert(0 && "type not supported yet");
   }
-  return ;
+  return;
 }
 
-
-static unsigned int
-getOpenCLAlignment(const DataLayout *TD,
-                   Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
   if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
     return TD->getPrefTypeAlignment(Ty);
 
@@ -1422,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD,
     unsigned int numE = VTy->getNumElements();
     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
     if (numE == 3)
-      return 4*alignE;
+      return 4 * alignE;
     else
-      return numE*alignE;
+      return numE * alignE;
   }
 
   const StructType *STy = dyn_cast<StructType>(Ty);
@@ -1432,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD,
     unsigned int alignStruct = 1;
     // Go through each element of the struct and find the
     // largest alignment.
-    for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
       Type *ETy = STy->getElementType(i);
       unsigned int align = getOpenCLAlignment(TD, ETy);
       if (align > alignStruct)
@@ -1476,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   }
 
   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
-    if (i==paramIndex) {
+    if (i == paramIndex) {
       printParamName(I, paramIndex, O);
       return;
     }
@@ -1484,10 +1481,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   llvm_unreachable("paramIndex out of bound");
 }
 
-void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
-                                            raw_ostream &O) {
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
-  const AttrListPtr &PAL = F->getAttributes();
+  const AttributeSet &PAL = F->getAttributes();
   const TargetLowering *TLI = TM.getTargetLowering();
   Function::const_arg_iterator I, E;
   unsigned paramIndex = 0;
@@ -1499,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
   O << "(\n";
 
   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
-    const Type *Ty = I->getType();
+    Type *Ty = I->getType();
 
     if (!first)
       O << ",\n";
@@ -1514,15 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
         else // Default image is read_only
           O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
-      }
-      else // Should be llvm::isSampler(*I)
+      } else // Should be llvm::isSampler(*I)
         O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
-        << paramIndex;
+          << paramIndex;
       continue;
     }
 
-    if (PAL.getParamAttributes(paramIndex+1).
-          hasAttribute(Attributes::ByVal) == false) {
+    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+      if (Ty->isVectorTy()) {
+        // Just print .param .b8 .align <a> .param[size];
+        // <a> = PAL.getparamalignment
+        // size = typeallocsize of element type
+        unsigned align = PAL.getParamAlignment(paramIndex + 1);
+        if (align == 0)
+          align = TD->getABITypeAlignment(Ty);
+
+        unsigned sz = TD->getTypeAllocSize(Ty);
+        O << "\t.param .align " << align << " .b8 ";
+        printParamName(I, paramIndex, O);
+        O << "[" << sz << "]";
+
+        continue;
+      }
       // Just a scalar
       const PointerType *PTy = dyn_cast<PointerType>(Ty);
       if (isKernelFunc) {
@@ -1533,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
             Type *ETy = PTy->getElementType();
             int addrSpace = PTy->getAddressSpace();
-            switch(addrSpace) {
+            switch (addrSpace) {
             default:
               O << ".ptr ";
               break;
@@ -1548,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
               O << ".ptr .global ";
               break;
             }
-            O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
           }
           printParamName(I, paramIndex, O);
           continue;
         }
 
         // non-pointer scalar to kernel func
-        O << "\t.param ."
-            << getPTXFundamentalTypeStr(Ty) << " ";
+        O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
         printParamName(I, paramIndex, O);
         continue;
       }
@@ -1565,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
       unsigned sz = 0;
       if (isa<IntegerType>(Ty)) {
         sz = cast<IntegerType>(Ty)->getBitWidth();
-        if (sz < 32) sz = 32;
-      }
-      else if (isa<PointerType>(Ty))
+        if (sz < 32)
+          sz = 32;
+      } else if (isa<PointerType>(Ty))
         sz = thePointerTy.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
@@ -1581,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
 
     // param has byVal attribute. So should be a pointer
     const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    assert(PTy &&
-           "Param with byval attribute should be a pointer type");
+    assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
     if (isABI || isKernelFunc) {
       // Just print .param .b8 .align <a> .param[size];
       // <a> = PAL.getparamalignment
       // size = typeallocsize of element type
-      unsigned align = PAL.getParamAlignment(paramIndex+1);
+      unsigned align = PAL.getParamAlignment(paramIndex + 1);
       if (align == 0)
         align = TD->getABITypeAlignment(ETy);
 
       unsigned sz = TD->getTypeAllocSize(ETy);
-      O << "\t.param .align " << align
-          << " .b8 ";
+      O << "\t.param .align " << align << " .b8 ";
       printParamName(I, paramIndex, O);
       O << "[" << sz << "]";
       continue;
@@ -1606,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
       // each vector element.
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*TLI, ETy, vtparts);
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -1614,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0,je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << "\t.reg .b" << sz << " ";
           printParamName(I, paramIndex, O);
-          if (j<je-1) O << ",\n";
+          if (j < je - 1)
+            O << ",\n";
           ++paramIndex;
         }
-        if (i<e-1)
+        if (i < e - 1)
           O << ",\n";
       }
       --paramIndex;
@@ -1639,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   emitFunctionParamList(F, O);
 }
 
-
-void NVPTXAsmPrinter::
-setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
+    const MachineFunction &MF) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
@@ -1654,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   int NumBytes = (int) MFI->getStackSize();
   if (NumBytes) {
-    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
-        << DEPOTNAME
-        << getFunctionNumber() << "[" << NumBytes << "];\n";
+    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+      << getFunctionNumber() << "[" << NumBytes << "];\n";
     if (nvptxSubtarget.is64Bit()) {
       O << "\t.reg .b64 \t%SP;\n";
       O << "\t.reg .b64 \t%SPL;\n";
-    }
-    else {
+    } else {
       O << "\t.reg .b32 \t%SP;\n";
       O << "\t.reg .b32 \t%SPL;\n";
     }
@@ -1672,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   // register number and the per class virtual register number.
   // We use the per class virtual register number in the ptx output.
   unsigned int numVRs = MRI->getNumVirtRegs();
-  for (unsigned i=0; i< numVRs; i++) {
+  for (unsigned i = 0; i < numVRs; i++) {
     unsigned int vr = TRI->index2VirtReg(i);
     const TargetRegisterClass *RC = MRI->getRegClass(vr);
     std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
     int n = regmap.size();
-    regmap.insert(std::make_pair(vr, n+1));
+    regmap.insert(std::make_pair(vr, n + 1));
   }
 
   // Emit register declarations
@@ -1721,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   OutStreamer.EmitRawText(O.str());
 }
 
-
 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
-  APFloat APF = APFloat(Fp->getValueAPF());  // make a copy
+  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
   bool ignored;
   unsigned int numHex;
   const char *lead;
 
-  if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
     numHex = 8;
     lead = "0f";
-    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
-                &ignored);
+    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
     numHex = 16;
     lead = "0d";
-    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                &ignored);
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
   } else
     llvm_unreachable("unsupported fp type");
 
@@ -1779,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
   llvm_unreachable("Not scalar type found in printScalarConstant()");
 }
 
-
 void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
@@ -1787,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
 
   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
     int s = TD->getTypeAllocSize(CPV->getType());
-    if (s<Bytes)
+    if (s < Bytes)
       s = Bytes;
     aggBuffer->addZeros(s);
     return;
@@ -1798,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
 
   case Type::IntegerTyID: {
     const Type *ETy = CPV->getType();
-    if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+    if (ETy == Type::getInt8Ty(CPV->getContext())) {
       unsigned char c =
           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
       ptr = &c;
       aggBuffer->addBytes(ptr, 1, Bytes);
-    } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
-      short int16 =
-          (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
-      ptr = (unsigned char*)&int16;
+    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
+      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
-    } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
-        int int32 =(int)(constInt->getZExtValue());
-        ptr = (unsigned char*)&int32;
+        int int32 = (int)(constInt->getZExtValue());
+        ptr = (unsigned char *)&int32;
         aggBuffer->addBytes(ptr, 4, Bytes);
         break;
       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt =
-            dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
-                Cexpr, TD))) {
-          int int32 =(int)(constInt->getZExtValue());
-          ptr = (unsigned char*)&int32;
+        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+                ConstantFoldConstantExpression(Cexpr, TD))) {
+          int int32 = (int)(constInt->getZExtValue());
+          ptr = (unsigned char *)&int32;
           aggBuffer->addBytes(ptr, 4, Bytes);
           break;
         }
@@ -1831,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
         }
       }
       llvm_unreachable("unsupported integer const type");
-    } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
-        long long int64 =(long long)(constInt->getZExtValue());
-        ptr = (unsigned char*)&int64;
+        long long int64 = (long long)(constInt->getZExtValue());
+        ptr = (unsigned char *)&int64;
         aggBuffer->addBytes(ptr, 8, Bytes);
         break;
       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (ConstantInt *constInt = dyn_cast<ConstantInt>(
-            ConstantFoldConstantExpression(Cexpr, TD))) {
-          long long int64 =(long long)(constInt->getZExtValue());
-          ptr = (unsigned char*)&int64;
+                ConstantFoldConstantExpression(Cexpr, TD))) {
+          long long int64 = (long long)(constInt->getZExtValue());
+          ptr = (unsigned char *)&int64;
           aggBuffer->addBytes(ptr, 8, Bytes);
           break;
         }
@@ -1860,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   case Type::FloatTyID:
   case Type::DoubleTyID: {
     ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
-    const Type* Ty = CFP->getType();
+    const Type *Ty = CFP->getType();
     if (Ty == Type::getFloatTy(CPV->getContext())) {
-      float float32 = (float)CFP->getValueAPF().convertToFloat();
-      ptr = (unsigned char*)&float32;
+      float float32 = (float) CFP->getValueAPF().convertToFloat();
+      ptr = (unsigned char *)&float32;
       aggBuffer->addBytes(ptr, 4, Bytes);
     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
       double float64 = CFP->getValueAPF().convertToDouble();
-      ptr = (unsigned char*)&float64;
+      ptr = (unsigned char *)&float64;
       aggBuffer->addBytes(ptr, 8, Bytes);
-    }
-    else {
+    } else {
       llvm_unreachable("unsupported fp const type");
     }
     break;
@@ -1878,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   case Type::PointerTyID: {
     if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
       aggBuffer->addSymbol(GVar);
-    }
-    else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+    } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
       Value *v = Cexpr->stripPointerCasts();
       aggBuffer->addSymbol(v);
     }
@@ -1895,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
         isa<ConstantStruct>(CPV)) {
       int ElementSize = TD->getTypeAllocSize(CPV->getType());
       bufferAggregateConstant(CPV, aggBuffer);
-      if ( Bytes > ElementSize )
-        aggBuffer->addZeros(Bytes-ElementSize);
-    }
-    else if (isa<ConstantAggregateZero>(CPV))
+      if (Bytes > ElementSize)
+        aggBuffer->addZeros(Bytes - ElementSize);
+    } else if (isa<ConstantAggregateZero>(CPV))
       aggBuffer->addZeros(Bytes);
     else
       llvm_unreachable("Unexpected Constant type");
@@ -1924,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
   }
 
   if (const ConstantDataSequential *CDS =
-      dyn_cast<ConstantDataSequential>(CPV)) {
+          dyn_cast<ConstantDataSequential>(CPV)) {
     if (CDS->getNumElements())
       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
@@ -1932,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
     return;
   }
 
-
   if (isa<ConstantStruct>(CPV)) {
     if (CPV->getNumOperands()) {
       StructType *ST = cast<StructType>(CPV->getType());
       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
-        if ( i == (e - 1))
+        if (i == (e - 1))
           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
-          TD->getTypeAllocSize(ST)
-          - TD->getStructLayout(ST)->getElementOffset(i);
+                  TD->getTypeAllocSize(ST) -
+                  TD->getStructLayout(ST)->getElementOffset(i);
         else
-          Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
-          TD->getStructLayout(ST)->getElementOffset(i);
-        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
-                     aggBuffer);
+          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
+                  TD->getStructLayout(ST)->getElementOffset(i);
+        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
       }
     }
     return;
@@ -1956,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
 // buildTypeNameMap - Run through symbol table looking for type names.
 //
 
-
 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
 
   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
 
-  if (PI != TypeNameMap.end() &&
-      (!PI->second.compare("struct._image1d_t") ||
-          !PI->second.compare("struct._image2d_t") ||
-          !PI->second.compare("struct._image3d_t")))
+  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
+                                  !PI->second.compare("struct._image2d_t") ||
+                                  !PI->second.compare("struct._image3d_t")))
     return true;
 
   return false;
@@ -1974,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
 ///
 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                       unsigned AsmVariant,
-                                      const char *ExtraCode,
-                                      raw_ostream &O) {
+                                      const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
 
     switch (ExtraCode[0]) {
     default:
@@ -1993,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo,
-                                            unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+    const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
-    return true;  // Unknown modifier
+    return true; // Unknown modifier
 
   O << '[';
   printMemOperand(MI, OpNo, O);
@@ -2008,71 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
-{
-  switch(MI.getOpcode()) {
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default:
     return false;
-  case NVPTX::CallArgBeginInst:  case NVPTX::CallArgEndInst0:
-  case NVPTX::CallArgEndInst1:  case NVPTX::CallArgF32:
-  case NVPTX::CallArgF64:  case NVPTX::CallArgI16:
-  case NVPTX::CallArgI32:  case NVPTX::CallArgI32imm:
-  case NVPTX::CallArgI64:  case NVPTX::CallArgI8:
-  case NVPTX::CallArgParam:  case NVPTX::CallVoidInst:
-  case NVPTX::CallVoidInstReg:  case NVPTX::Callseq_End:
+  case NVPTX::CallArgBeginInst:
+  case NVPTX::CallArgEndInst0:
+  case NVPTX::CallArgEndInst1:
+  case NVPTX::CallArgF32:
+  case NVPTX::CallArgF64:
+  case NVPTX::CallArgI16:
+  case NVPTX::CallArgI32:
+  case NVPTX::CallArgI32imm:
+  case NVPTX::CallArgI64:
+  case NVPTX::CallArgI8:
+  case NVPTX::CallArgParam:
+  case NVPTX::CallVoidInst:
+  case NVPTX::CallVoidInstReg:
+  case NVPTX::Callseq_End:
   case NVPTX::CallVoidInstReg64:
-  case NVPTX::DeclareParamInst:  case NVPTX::DeclareRetMemInst:
-  case NVPTX::DeclareRetRegInst:  case NVPTX::DeclareRetScalarInst:
-  case NVPTX::DeclareScalarParamInst:  case NVPTX::DeclareScalarRegInst:
-  case NVPTX::StoreParamF32:  case NVPTX::StoreParamF64:
-  case NVPTX::StoreParamI16:  case NVPTX::StoreParamI32:
-  case NVPTX::StoreParamI64:  case NVPTX::StoreParamI8:
-  case NVPTX::StoreParamS32I8:  case NVPTX::StoreParamU32I8:
-  case NVPTX::StoreParamS32I16:  case NVPTX::StoreParamU32I16:
-  case NVPTX::StoreParamScalar2F32:  case NVPTX::StoreParamScalar2F64:
-  case NVPTX::StoreParamScalar2I16:  case NVPTX::StoreParamScalar2I32:
-  case NVPTX::StoreParamScalar2I64:  case NVPTX::StoreParamScalar2I8:
-  case NVPTX::StoreParamScalar4F32:  case NVPTX::StoreParamScalar4I16:
-  case NVPTX::StoreParamScalar4I32:  case NVPTX::StoreParamScalar4I8:
-  case NVPTX::StoreParamV2F32:  case NVPTX::StoreParamV2F64:
-  case NVPTX::StoreParamV2I16:  case NVPTX::StoreParamV2I32:
-  case NVPTX::StoreParamV2I64:  case NVPTX::StoreParamV2I8:
-  case NVPTX::StoreParamV4F32:  case NVPTX::StoreParamV4I16:
-  case NVPTX::StoreParamV4I32:  case NVPTX::StoreParamV4I8:
-  case NVPTX::StoreRetvalF32:  case NVPTX::StoreRetvalF64:
-  case NVPTX::StoreRetvalI16:  case NVPTX::StoreRetvalI32:
-  case NVPTX::StoreRetvalI64:  case NVPTX::StoreRetvalI8:
-  case NVPTX::StoreRetvalScalar2F32:  case NVPTX::StoreRetvalScalar2F64:
-  case NVPTX::StoreRetvalScalar2I16:  case NVPTX::StoreRetvalScalar2I32:
-  case NVPTX::StoreRetvalScalar2I64:  case NVPTX::StoreRetvalScalar2I8:
-  case NVPTX::StoreRetvalScalar4F32:  case NVPTX::StoreRetvalScalar4I16:
-  case NVPTX::StoreRetvalScalar4I32:  case NVPTX::StoreRetvalScalar4I8:
-  case NVPTX::StoreRetvalV2F32:  case NVPTX::StoreRetvalV2F64:
-  case NVPTX::StoreRetvalV2I16:  case NVPTX::StoreRetvalV2I32:
-  case NVPTX::StoreRetvalV2I64:  case NVPTX::StoreRetvalV2I8:
-  case NVPTX::StoreRetvalV4F32:  case NVPTX::StoreRetvalV4I16:
-  case NVPTX::StoreRetvalV4I32:  case NVPTX::StoreRetvalV4I8:
-  case NVPTX::LastCallArgF32:  case NVPTX::LastCallArgF64:
-  case NVPTX::LastCallArgI16:  case NVPTX::LastCallArgI32:
-  case NVPTX::LastCallArgI32imm:  case NVPTX::LastCallArgI64:
-  case NVPTX::LastCallArgI8:  case NVPTX::LastCallArgParam:
-  case NVPTX::LoadParamMemF32:  case NVPTX::LoadParamMemF64:
-  case NVPTX::LoadParamMemI16:  case NVPTX::LoadParamMemI32:
-  case NVPTX::LoadParamMemI64:  case NVPTX::LoadParamMemI8:
-  case NVPTX::LoadParamRegF32:  case NVPTX::LoadParamRegF64:
-  case NVPTX::LoadParamRegI16:  case NVPTX::LoadParamRegI32:
-  case NVPTX::LoadParamRegI64:  case NVPTX::LoadParamRegI8:
-  case NVPTX::LoadParamScalar2F32:  case NVPTX::LoadParamScalar2F64:
-  case NVPTX::LoadParamScalar2I16:  case NVPTX::LoadParamScalar2I32:
-  case NVPTX::LoadParamScalar2I64:  case NVPTX::LoadParamScalar2I8:
-  case NVPTX::LoadParamScalar4F32:  case NVPTX::LoadParamScalar4I16:
-  case NVPTX::LoadParamScalar4I32:  case NVPTX::LoadParamScalar4I8:
-  case NVPTX::LoadParamV2F32:  case NVPTX::LoadParamV2F64:
-  case NVPTX::LoadParamV2I16:  case NVPTX::LoadParamV2I32:
-  case NVPTX::LoadParamV2I64:  case NVPTX::LoadParamV2I8:
-  case NVPTX::LoadParamV4F32:  case NVPTX::LoadParamV4I16:
-  case NVPTX::LoadParamV4I32:  case NVPTX::LoadParamV4I8:
-  case NVPTX::PrototypeInst:   case NVPTX::DBG_VALUE:
+  case NVPTX::DeclareParamInst:
+  case NVPTX::DeclareRetMemInst:
+  case NVPTX::DeclareRetRegInst:
+  case NVPTX::DeclareRetScalarInst:
+  case NVPTX::DeclareScalarParamInst:
+  case NVPTX::DeclareScalarRegInst:
+  case NVPTX::StoreParamF32:
+  case NVPTX::StoreParamF64:
+  case NVPTX::StoreParamI16:
+  case NVPTX::StoreParamI32:
+  case NVPTX::StoreParamI64:
+  case NVPTX::StoreParamI8:
+  case NVPTX::StoreParamS32I8:
+  case NVPTX::StoreParamU32I8:
+  case NVPTX::StoreParamS32I16:
+  case NVPTX::StoreParamU32I16:
+  case NVPTX::StoreRetvalF32:
+  case NVPTX::StoreRetvalF64:
+  case NVPTX::StoreRetvalI16:
+  case NVPTX::StoreRetvalI32:
+  case NVPTX::StoreRetvalI64:
+  case NVPTX::StoreRetvalI8:
+  case NVPTX::LastCallArgF32:
+  case NVPTX::LastCallArgF64:
+  case NVPTX::LastCallArgI16:
+  case NVPTX::LastCallArgI32:
+  case NVPTX::LastCallArgI32imm:
+  case NVPTX::LastCallArgI64:
+  case NVPTX::LastCallArgI8:
+  case NVPTX::LastCallArgParam:
+  case NVPTX::LoadParamMemF32:
+  case NVPTX::LoadParamMemF64:
+  case NVPTX::LoadParamMemI16:
+  case NVPTX::LoadParamMemI32:
+  case NVPTX::LoadParamMemI64:
+  case NVPTX::LoadParamMemI8:
+  case NVPTX::LoadParamRegF32:
+  case NVPTX::LoadParamRegF64:
+  case NVPTX::LoadParamRegI16:
+  case NVPTX::LoadParamRegI32:
+  case NVPTX::LoadParamRegI64:
+  case NVPTX::LoadParamRegI8:
+  case NVPTX::PrototypeInst:
+  case NVPTX::DBG_VALUE:
     return true;
   }
   return false;
@@ -2084,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
 }
 
-
 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   std::stringstream temp;
-  LineReader * reader = this->getReader(filename.str());
+  LineReader *reader = this->getReader(filename.str());
   temp << "\n//";
   temp << filename.str();
   temp << ":";
@@ -2098,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   this->OutStreamer.EmitRawText(Twine(temp.str()));
 }
 
-
 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
-  if (reader == NULL)  {
-    reader =  new LineReader(filename);
+  if (reader == NULL) {
+    reader = new LineReader(filename);
   }
 
   if (reader->fileName() != filename) {
     delete reader;
-    reader =  new LineReader(filename);
+    reader = new LineReader(filename);
   }
 
   return reader;
 }
 
-
-std::string
-LineReader::readLine(unsigned lineNum) {
+std::string LineReader::readLine(unsigned lineNum) {
   if (lineNum < theCurLine) {
     theCurLine = 0;
-    fstr.seekg(0,std::ios::beg);
+    fstr.seekg(0, std::ios::beg);
   }
   while (theCurLine < lineNum) {
-    fstr.getline(buff,500);
+    fstr.getline(buff, 500);
     theCurLine++;
   }
   return buff;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6488b1442580..6dc9fc0ffeff 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -16,19 +16,19 @@
 #define NVPTXASMPRINTER_H
 
 #include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
 #include "NVPTXSubtarget.h"
-#include "llvm/Function.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
 #include <fstream>
 
 // The ptx syntax and format is very different from that usually seem in a .s
@@ -43,15 +43,15 @@
 // This is defined in AsmPrinter.cpp.
 // Used to process the constant expressions in initializers.
 namespace nvptx {
-const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
-                                  llvm::AsmPrinter &AP) ;
+const llvm::MCExpr *
+LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
 }
 
 namespace llvm {
 
 class LineReader {
 private:
-  unsigned theCurLine ;
+  unsigned theCurLine;
   std::ifstream fstr;
   char buff[512];
   std::string theFileName;
@@ -63,17 +63,12 @@ public:
     theFileName = filename;
   }
   std::string fileName() { return theFileName; }
-  ~LineReader() {
-    fstr.close();
-  }
+  ~LineReader() { fstr.close(); }
   std::string readLine(unsigned line);
 };
 
-
-
 class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
-
   class AggBuffer {
     // Used to buffer the emitted string for initializing global
     // aggregates.
@@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     // Once we have this AggBuffer setup, we can choose how to print
     // it out.
   public:
-    unsigned size;   // size of the buffer in bytes
+    unsigned size;         // size of the buffer in bytes
     unsigned char *buffer; // the buffer
     unsigned numSymbols;   // number of symbol addresses
     SmallVector<unsigned, 4> symbolPosInBuffer;
@@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   public:
     AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
-    :O(_O),AP(_AP) {
+        : O(_O), AP(_AP) {
       buffer = new unsigned char[_size];
       size = _size;
       curpos = 0;
       numSymbols = 0;
     }
-    ~AggBuffer() {
-      delete [] buffer;
-    }
+    ~AggBuffer() { delete[] buffer; }
     unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
-      assert((curpos+Num) <= size);
-      assert((curpos+Bytes) <= size);
-      for ( int i= 0; i < Num; ++i) {
+      assert((curpos + Num) <= size);
+      assert((curpos + Bytes) <= size);
+      for (int i = 0; i < Num; ++i) {
         buffer[curpos] = Ptr[i];
-        curpos ++;
+        curpos++;
       }
-      for ( int i=Num; i < Bytes ; ++i) {
+      for (int i = Num; i < Bytes; ++i) {
         buffer[curpos] = 0;
-        curpos ++;
+        curpos++;
       }
       return curpos;
     }
     unsigned addZeros(int Num) {
-      assert((curpos+Num) <= size);
-      for ( int i= 0; i < Num; ++i) {
+      assert((curpos + Num) <= size);
+      for (int i = 0; i < Num; ++i) {
         buffer[curpos] = 0;
-        curpos ++;
+        curpos++;
       }
       return curpos;
     }
@@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     void print() {
       if (numSymbols == 0) {
         // print out in bytes
-        for (unsigned i=0; i<size; i++) {
+        for (unsigned i = 0; i < size; i++) {
           if (i)
             O << ", ";
-          O << (unsigned int)buffer[i];
+          O << (unsigned int) buffer[i];
         }
       } else {
         // print out in 4-bytes or 8-bytes
@@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
         unsigned int nBytes = 4;
         if (AP.nvptxSubtarget.is64Bit())
           nBytes = 8;
-        for (pos=0; pos<size; pos+=nBytes) {
+        for (pos = 0; pos < size; pos += nBytes) {
           if (pos)
             O << ", ";
           if (pos == nextSymbolPos) {
@@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
             if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
               MCSymbol *Name = AP.Mang->getSymbol(GVar);
               O << *Name;
-            }
-            else if (ConstantExpr *Cexpr =
-                dyn_cast<ConstantExpr>(v)) {
+            } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
               O << *nvptx::LowerConstant(Cexpr, AP);
             } else
               llvm_unreachable("symbol type unknown");
             nSym++;
             if (nSym >= numSymbols)
-              nextSymbolPos = size+1;
+              nextSymbolPos = size + 1;
             else
               nextSymbolPos = symbolPosInBuffer[nSym];
-          } else
-            if (nBytes == 4)
-              O << *(unsigned int*)(buffer+pos);
-            else
-              O << *(unsigned long long*)(buffer+pos);
+          } else if (nBytes == 4)
+            O << *(unsigned int *)(buffer + pos);
+          else
+            O << *(unsigned long long *)(buffer + pos);
         }
       }
     }
@@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   virtual void emitSrcInText(StringRef filename, unsigned line);
 
-private :
-  virtual const char *getPassName() const {
-    return "NVPTX Assembly Printer";
-  }
+private:
+  virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
 
   const Function *F;
   std::string CurrentFnName;
@@ -207,31 +195,28 @@ private :
 
   void printGlobalVariable(const GlobalVariable *GVar);
   void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                    const char *Modifier=0);
+                    const char *Modifier = 0);
   void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
-                     const char *Modifier=0);
-  void printVecModifiedImmediate(const MachineOperand &MO,
-                                 const char *Modifier, raw_ostream &O);
+                     const char *Modifier = 0);
+  void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
+                                 raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                       const char *Modifier=0);
+                       const char *Modifier = 0);
   void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
   // definition autogenerated.
   void printInstruction(const MachineInstr *MI, raw_ostream &O);
-  void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
-                          bool=false);
+  void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
   void printParamName(int paramIndex, raw_ostream &O);
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
   void emitHeader(Module &M, raw_ostream &O);
-  void emitKernelFunctionDirectives(const Function& F,
-                                    raw_ostream &O) const;
+  void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
   void emitFunctionExternParamList(const MachineFunction &MF);
   void emitFunctionParamList(const Function *, raw_ostream &O);
   void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
   void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
-  void emitFunctionTempData(const MachineFunction &MF,
-                            unsigned &FrameSize);
+  void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
   bool isImageType(const Type *Ty);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
@@ -269,17 +254,16 @@ private:
   void recordAndEmitFilenames(Module &);
 
   void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
-  void emitPTXAddressSpace(unsigned int AddressSpace,
-                           raw_ostream &O) const;
-  std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
-  void printScalarConstant(Constant *CPV, raw_ostream &O) ;
-  void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
-  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
-  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+  void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
+  std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+  void printScalarConstant(Constant *CPV, raw_ostream &O);
+  void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
+  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
 
   void printOperandProper(const MachineOperand &MO);
 
-  void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+  void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
   void emitDeclarations(Module &, raw_ostream &O);
   void emitDeclaration(const Function *, raw_ostream &O);
 
@@ -289,10 +273,9 @@ private:
   LineReader *reader;
   LineReader *getReader(std::string);
 public:
-  NVPTXAsmPrinter(TargetMachine &TM,
-                  MCStreamer &Streamer)
-  : AsmPrinter(TM, Streamer),
-    nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+  NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
     CurrentBankselLabelInBasicBlock = "";
     VRidGlobal2LocalMap = NULL;
     reader = NULL;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index a9abc00bf3f6..6533da5102b0 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -17,17 +17,15 @@
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
-bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
-  return true;
-}
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
 
 void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
   if (MF.getFrameInfo()->hasStackObjects()) {
@@ -42,35 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
       // mov %SPL, %depot;
       // cvta.local %SP, %SPL;
       if (is64bit) {
-        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
-                               tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
-                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
-        .addReg(NVPTX::VRDepot);
+        MachineInstr *MI = BuildMI(
+            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
       } else {
-        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
-                                  tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
-                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
-        .addReg(NVPTX::VRDepot);
+        MachineInstr *MI = BuildMI(
+            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
       }
-    }
-    else {
+    } else {
       // mov %SP, %depot;
       if (is64bit)
-        BuildMI(MBB, MBBI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
-                .addReg(NVPTX::VRDepot);
+        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
       else
-        BuildMI(MBB, MBBI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
-                .addReg(NVPTX::VRDepot);
+        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
     }
   }
 }
 
 void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
+                                      MachineBasicBlock &MBB) const {}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator I) const {
+  // Simply discard ADJCALLSTACKDOWN,
+  // ADJCALLSTACKUP instructions.
+  MBB.erase(I);
 }
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index ee87b3997e78..819f1dd3f4be 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Target/TargetFrameLowering.h"
 
-
 namespace llvm {
 class NVPTXTargetMachine;
 
@@ -26,13 +25,16 @@ class NVPTXFrameLowering : public TargetFrameLowering {
 
 public:
   explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
-  : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
-    tm(_tm), is64bit(_is64bit) {}
+      : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
+        is64bit(_is64bit) {}
 
   virtual bool hasFP(const MachineFunction &MF) const;
   virtual void emitPrologue(MachineFunction &MF) const;
-  virtual void emitEpilogue(MachineFunction &MF,
-                            MachineBasicBlock &MBB) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4e92f0e785fe..e862988c85d1 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,42 +11,36 @@
 //
 //===----------------------------------------------------------------------===//
 
-
-#include "llvm/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
 #include "NVPTXISelDAGToDAG.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/GlobalValue.h"
 
 #undef DEBUG_TYPE
 #define DEBUG_TYPE "nvptx-isel"
 
 using namespace llvm;
 
-
-static cl::opt<bool>
-UseFMADInstruction("nvptx-mad-enable",
-                   cl::ZeroOrMore,
-                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
-                   cl::init(false));
+static cl::opt<bool> UseFMADInstruction(
+    "nvptx-mad-enable", cl::ZeroOrMore,
+    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+    cl::init(false));
 
 static cl::opt<int>
-FMAContractLevel("nvptx-fma-level",
-                 cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
                  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
-                     " 1: do it  2: do it aggressively"),
-                     cl::init(2));
+                          " 1: do it  2: do it aggressively"),
+                 cl::init(2));
 
-
-static cl::opt<int>
-UsePrecDivF32("nvptx-prec-divf32",
-              cl::ZeroOrMore,
-             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
-                  " IEEE Compliant F32 div.rnd if avaiable."),
-                  cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+    "nvptx-prec-divf32", cl::ZeroOrMore,
+    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+             " IEEE Compliant F32 div.rnd if avaiable."),
+    cl::init(2));
 
 /// createNVPTXISelDag - This pass converts a legalized DAG into a
 /// NVPTX-specific DAG, ready for instruction scheduling.
@@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
   return new NVPTXDAGToDAGISel(TM, OptLevel);
 }
 
-
 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
                                      CodeGenOpt::Level OptLevel)
-: SelectionDAGISel(tm, OptLevel),
-  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
-{
+    : SelectionDAGISel(tm, OptLevel),
+      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
   // Always do fma.f32 fpcontract if the target supports the instruction.
   // Always do fma.f64 fpcontract if the target supports the instruction.
   // Do mad.f32 is nvptx-mad-enable is specified and the target does not
   // support fma.f32.
 
   doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
-  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
-      (FMAContractLevel>=1);
-  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
-      (FMAContractLevel>=1);
-  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
-      (FMAContractLevel==2);
-  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
-      (FMAContractLevel==2);
+  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
+  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
+  doFMAF32AGG =
+      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
+  doFMAF64AGG =
+      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
 
   allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
 
@@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
 
 /// Select - Select instructions not customized! Used for
 /// expanded, promoted and normal instructions.
-SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
 
   if (N->isMachineOpcode())
-    return NULL;   // Already selected.
+    return NULL; // Already selected.
 
   SDNode *ResNode = NULL;
   switch (N->getOpcode()) {
@@ -105,29 +95,48 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
   case ISD::STORE:
     ResNode = SelectStore(N);
     break;
+  case NVPTXISD::LoadV2:
+  case NVPTXISD::LoadV4:
+    ResNode = SelectLoadVector(N);
+    break;
+  case NVPTXISD::LDGV2:
+  case NVPTXISD::LDGV4:
+  case NVPTXISD::LDUV2:
+  case NVPTXISD::LDUV4:
+    ResNode = SelectLDGLDUVector(N);
+    break;
+  case NVPTXISD::StoreV2:
+  case NVPTXISD::StoreV4:
+    ResNode = SelectStoreVector(N);
+    break;
+  default:
+    break;
   }
   if (ResNode)
     return ResNode;
   return SelectCode(N);
 }
 
-
-static unsigned int
-getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
-{
+static unsigned int getCodeAddrSpace(MemSDNode *N,
+                                     const NVPTXSubtarget &Subtarget) {
   const Value *Src = N->getSrcValue();
   if (!Src)
     return NVPTX::PTXLdStInstCode::LOCAL;
 
   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
     switch (PT->getAddressSpace()) {
-    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
-    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
-    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+    case llvm::ADDRESS_SPACE_LOCAL:
+      return NVPTX::PTXLdStInstCode::LOCAL;
+    case llvm::ADDRESS_SPACE_GLOBAL:
+      return NVPTX::PTXLdStInstCode::GLOBAL;
+    case llvm::ADDRESS_SPACE_SHARED:
+      return NVPTX::PTXLdStInstCode::SHARED;
     case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
       return NVPTX::PTXLdStInstCode::CONSTANT;
-    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
-    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+    case llvm::ADDRESS_SPACE_GENERIC:
+      return NVPTX::PTXLdStInstCode::GENERIC;
+    case llvm::ADDRESS_SPACE_PARAM:
+      return NVPTX::PTXLdStInstCode::PARAM;
     case llvm::ADDRESS_SPACE_CONST:
       // If the arch supports generic address space, translate it to GLOBAL
       // for correctness.
@@ -138,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
         return NVPTX::PTXLdStInstCode::GLOBAL;
       else
         return NVPTX::PTXLdStInstCode::CONSTANT;
-    default: break;
+    default:
+      break;
     }
   }
   return NVPTX::PTXLdStInstCode::LOCAL;
 }
 
-
-SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   LoadSDNode *LD = cast<LoadSDNode>(N);
   EVT LoadedVT = LD->getMemoryVT();
-  SDNode *NVPTXLD= NULL;
+  SDNode *NVPTXLD = NULL;
 
   // do not support pre/post inc/dec
   if (LD->isIndexed())
@@ -189,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   //          type is integer
   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
   unsigned int fromType;
   if ((LD->getExtensionType() == ISD::SEXTLOAD))
     fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -208,121 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
 
   if (SelectDirectAddr(N1, Addr)) {
     switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
-    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_avar; break;
-    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
-    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
-    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
-    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
-    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
-    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_avar; break;
-    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
-    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
-    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::LD_i8_avar;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::LD_i16_avar;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::LD_i32_avar;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::LD_i64_avar;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::LD_f32_avar;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::LD_f64_avar;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Addr, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 7);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
-      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Addr, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
     switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
-    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_asi; break;
-    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
-    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
-    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
-    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
-    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
-    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_asi; break;
-    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
-    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
-    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::LD_i8_asi;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::LD_i16_asi;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::LD_i32_asi;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::LD_i64_asi;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::LD_f32_asi;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::LD_f64_asi;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 8);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRri64(N1.getNode(), N1, Base, Offset):
-      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
-    switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
-    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_ari; break;
-    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
-    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
-    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
-    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
-    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
-    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_ari; break;
-    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
-    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
-    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
-    default: return NULL;
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+    if (Subtarget.is64Bit()) {
+      switch (TargetVT) {
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_ari_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_ari_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_ari_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_ari_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_ari_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_ari_64;
+        break;
+      default:
+        return NULL;
+      }
+    } else {
+      switch (TargetVT) {
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_ari;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_ari;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_ari;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_ari;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_ari;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_ari;
+        break;
+      default:
+        return NULL;
+      }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 8);
-  }
-  else {
-    switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
-    case MVT::v2i8:  Opcode = NVPTX::LD_v2i8_areg; break;
-    case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
-    case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
-    case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
-    case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
-    case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
-    case MVT::v4i8:  Opcode = NVPTX::LD_v4i8_areg; break;
-    case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
-    case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
-    case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
-    default: return NULL;
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+  } else {
+    if (Subtarget.is64Bit()) {
+      switch (TargetVT) {
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_areg_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_areg_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_areg_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_areg_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_areg_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_areg_64;
+        break;
+      default:
+        return NULL;
+      }
+    } else {
+      switch (TargetVT) {
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_areg;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_areg;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_areg;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_areg;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_areg;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_areg;
+        break;
+      default:
+        return NULL;
+      }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      N1, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 7);
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), N1, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
   }
 
   if (NVPTXLD != NULL) {
@@ -334,7 +388,590 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   return NVPTXLD;
 }
 
-SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
+
+  SDValue Chain = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  SDValue Addr, Offset, Base;
+  unsigned Opcode;
+  DebugLoc DL = N->getDebugLoc();
+  SDNode *LD;
+  MemSDNode *MemSD = cast<MemSDNode>(N);
+  EVT LoadedVT = MemSD->getMemoryVT();
+
+  if (!LoadedVT.isSimple())
+    return NULL;
+
+  // Address Space Setting
+  unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+  // Volatile Setting
+  // - .volatile is only availalble for .global and .shared
+  bool IsVolatile = MemSD->isVolatile();
+  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+    IsVolatile = false;
+
+  // Vector Setting
+  MVT SimpleVT = LoadedVT.getSimpleVT();
+
+  // Type Setting: fromType + fromTypeWidth
+  //
+  // Sign   : ISD::SEXTLOAD
+  // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+  //          type is integer
+  // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+  MVT ScalarVT = SimpleVT.getScalarType();
+  unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+  unsigned int FromType;
+  // The last operand holds the original LoadSDNode::getExtensionType() value
+  unsigned ExtensionType = cast<ConstantSDNode>(
+      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
+  if (ExtensionType == ISD::SEXTLOAD)
+    FromType = NVPTX::PTXLdStInstCode::Signed;
+  else if (ScalarVT.isFloatingPoint())
+    FromType = NVPTX::PTXLdStInstCode::Float;
+  else
+    FromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+  unsigned VecType;
+
+  switch (N->getOpcode()) {
+  case NVPTXISD::LoadV2:
+    VecType = NVPTX::PTXLdStInstCode::V2;
+    break;
+  case NVPTXISD::LoadV4:
+    VecType = NVPTX::PTXLdStInstCode::V4;
+    break;
+  default:
+    return NULL;
+  }
+
+  EVT EltVT = N->getValueType(0);
+
+  if (SelectDirectAddr(Op1, Addr)) {
+    switch (N->getOpcode()) {
+    default:
+      return NULL;
+    case NVPTXISD::LoadV2:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v2_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v2_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v2_avar;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LDV_i64_v2_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v2_avar;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LDV_f64_v2_avar;
+        break;
+      }
+      break;
+    case NVPTXISD::LoadV4:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v4_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v4_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v4_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v4_avar;
+        break;
+      }
+      break;
+    }
+
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Addr, Chain };
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+    switch (N->getOpcode()) {
+    default:
+      return NULL;
+    case NVPTXISD::LoadV2:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v2_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v2_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v2_asi;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LDV_i64_v2_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v2_asi;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LDV_f64_v2_asi;
+        break;
+      }
+      break;
+    case NVPTXISD::LoadV4:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v4_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v4_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v4_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v4_asi;
+        break;
+      }
+      break;
+    }
+
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+    if (Subtarget.is64Bit()) {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::LoadV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_ari_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_ari_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_ari_64;
+          break;
+        }
+        break;
+      case NVPTXISD::LoadV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_ari_64;
+          break;
+        }
+        break;
+      }
+    } else {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::LoadV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_ari;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_ari;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_ari;
+          break;
+        }
+        break;
+      case NVPTXISD::LoadV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_ari;
+          break;
+        }
+        break;
+      }
+    }
+
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
+
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+  } else {
+    if (Subtarget.is64Bit()) {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::LoadV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_areg_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_areg_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_areg_64;
+          break;
+        }
+        break;
+      case NVPTXISD::LoadV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_areg_64;
+          break;
+        }
+        break;
+      }
+    } else {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::LoadV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_areg;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_areg;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_areg;
+          break;
+        }
+        break;
+      case NVPTXISD::LoadV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_areg;
+          break;
+        }
+        break;
+      }
+    }
+
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Op1, Chain };
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+  }
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+
+  SDValue Chain = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  unsigned Opcode;
+  DebugLoc DL = N->getDebugLoc();
+  SDNode *LD;
+
+  EVT RetVT = N->getValueType(0);
+
+  // Select opcode
+  if (Subtarget.is64Bit()) {
+    switch (N->getOpcode()) {
+    default:
+      return NULL;
+    case NVPTXISD::LDGV2:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+        break;
+      }
+      break;
+    case NVPTXISD::LDGV4:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+        break;
+      }
+      break;
+    case NVPTXISD::LDUV2:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+        break;
+      }
+      break;
+    case NVPTXISD::LDUV4:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+        break;
+      }
+      break;
+    }
+  } else {
+    switch (N->getOpcode()) {
+    default:
+      return NULL;
+    case NVPTXISD::LDGV2:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+        break;
+      }
+      break;
+    case NVPTXISD::LDGV4:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+        break;
+      }
+      break;
+    case NVPTXISD::LDUV2:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+        break;
+      }
+      break;
+    case NVPTXISD::LDUV4:
+      switch (RetVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+        break;
+      }
+      break;
+    }
+  }
+
+  SDValue Ops[] = { Op1, Chain };
+  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   StoreSDNode *ST = cast<StoreSDNode>(N);
   EVT StoreVT = ST->getMemoryVT();
@@ -375,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   // - for integer type, always use 'u'
   //
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned toTypeWidth = ScalarVT.getSizeInBits();
   unsigned int toType;
   if (ScalarVT.isFloatingPoint())
     toType = NVPTX::PTXLdStInstCode::Float;
@@ -394,124 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
 
   if (SelectDirectAddr(N2, Addr)) {
     switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
-    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_avar; break;
-    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
-    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
-    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
-    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
-    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
-    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_avar; break;
-    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
-    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
-    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::ST_i8_avar;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::ST_i16_avar;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::ST_i32_avar;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::ST_i64_avar;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::ST_f32_avar;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::ST_f64_avar;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Addr, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 8);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
-      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Addr, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
-    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_asi; break;
-    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
-    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
-    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
-    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
-    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
-    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_asi; break;
-    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
-    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
-    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::ST_i8_asi;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::ST_i16_asi;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::ST_i32_asi;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::ST_i64_asi;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::ST_f32_asi;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::ST_f64_asi;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 9);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRri64(N2.getNode(), N2, Base, Offset):
-      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
-    switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
-    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_ari; break;
-    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
-    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
-    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
-    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
-    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
-    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_ari; break;
-    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
-    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
-    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
-    default: return NULL;
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+    if (Subtarget.is64Bit()) {
+      switch (SourceVT) {
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_ari_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_ari_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_ari_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_ari_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_ari_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_ari_64;
+        break;
+      default:
+        return NULL;
+      }
+    } else {
+      switch (SourceVT) {
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_ari;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_ari;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_ari;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_ari;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_ari;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_ari;
+        break;
+      default:
+        return NULL;
+      }
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 9);
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
   } else {
-    switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
-    case MVT::v2i8:  Opcode = NVPTX::ST_v2i8_areg; break;
-    case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
-    case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
-    case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
-    case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
-    case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
-    case MVT::v4i8:  Opcode = NVPTX::ST_v4i8_areg; break;
-    case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
-    case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
-    case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
-    default: return NULL;
+    if (Subtarget.is64Bit()) {
+      switch (SourceVT) {
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_areg_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_areg_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_areg_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_areg_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_areg_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_areg_64;
+        break;
+      default:
+        return NULL;
+      }
+    } else {
+      switch (SourceVT) {
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_areg;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_areg;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_areg;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_areg;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_areg;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_areg;
+        break;
+      default:
+        return NULL;
+      }
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      N2, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 8);
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), N2, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
   }
 
   if (NVPTXST != NULL) {
@@ -523,12 +1202,388 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   return NVPTXST;
 }
 
+SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  SDValue Addr, Offset, Base;
+  unsigned Opcode;
+  DebugLoc DL = N->getDebugLoc();
+  SDNode *ST;
+  EVT EltVT = Op1.getValueType();
+  MemSDNode *MemSD = cast<MemSDNode>(N);
+  EVT StoreVT = MemSD->getMemoryVT();
+
+  // Address Space Setting
+  unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+  if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
+    report_fatal_error("Cannot store to pointer that points to constant "
+                       "memory space");
+  }
+
+  // Volatile Setting
+  // - .volatile is only availalble for .global and .shared
+  bool IsVolatile = MemSD->isVolatile();
+  if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+      CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+      CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+    IsVolatile = false;
+
+  // Type Setting: toType + toTypeWidth
+  // - for integer type, always use 'u'
+  assert(StoreVT.isSimple() && "Store value is not simple");
+  MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
+  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
+  unsigned ToType;
+  if (ScalarVT.isFloatingPoint())
+    ToType = NVPTX::PTXLdStInstCode::Float;
+  else
+    ToType = NVPTX::PTXLdStInstCode::Unsigned;
+
+  SmallVector<SDValue, 12> StOps;
+  SDValue N2;
+  unsigned VecType;
+
+  switch (N->getOpcode()) {
+  case NVPTXISD::StoreV2:
+    VecType = NVPTX::PTXLdStInstCode::V2;
+    StOps.push_back(N->getOperand(1));
+    StOps.push_back(N->getOperand(2));
+    N2 = N->getOperand(3);
+    break;
+  case NVPTXISD::StoreV4:
+    VecType = NVPTX::PTXLdStInstCode::V4;
+    StOps.push_back(N->getOperand(1));
+    StOps.push_back(N->getOperand(2));
+    StOps.push_back(N->getOperand(3));
+    StOps.push_back(N->getOperand(4));
+    N2 = N->getOperand(5);
+    break;
+  default:
+    return NULL;
+  }
+
+  StOps.push_back(getI32Imm(IsVolatile));
+  StOps.push_back(getI32Imm(CodeAddrSpace));
+  StOps.push_back(getI32Imm(VecType));
+  StOps.push_back(getI32Imm(ToType));
+  StOps.push_back(getI32Imm(ToTypeWidth));
+
+  if (SelectDirectAddr(N2, Addr)) {
+    switch (N->getOpcode()) {
+    default:
+      return NULL;
+    case NVPTXISD::StoreV2:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v2_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v2_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v2_avar;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::STV_i64_v2_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v2_avar;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::STV_f64_v2_avar;
+        break;
+      }
+      break;
+    case NVPTXISD::StoreV4:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v4_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v4_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v4_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v4_avar;
+        break;
+      }
+      break;
+    }
+    StOps.push_back(Addr);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+    switch (N->getOpcode()) {
+    default:
+      return NULL;
+    case NVPTXISD::StoreV2:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v2_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v2_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v2_asi;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::STV_i64_v2_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v2_asi;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::STV_f64_v2_asi;
+        break;
+      }
+      break;
+    case NVPTXISD::StoreV4:
+      switch (EltVT.getSimpleVT().SimpleTy) {
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v4_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v4_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v4_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v4_asi;
+        break;
+      }
+      break;
+    }
+    StOps.push_back(Base);
+    StOps.push_back(Offset);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+    if (Subtarget.is64Bit()) {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::StoreV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_ari_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_ari_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_ari_64;
+          break;
+        }
+        break;
+      case NVPTXISD::StoreV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_ari_64;
+          break;
+        }
+        break;
+      }
+    } else {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::StoreV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_ari;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_ari;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_ari;
+          break;
+        }
+        break;
+      case NVPTXISD::StoreV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_ari;
+          break;
+        }
+        break;
+      }
+    }
+    StOps.push_back(Base);
+    StOps.push_back(Offset);
+  } else {
+    if (Subtarget.is64Bit()) {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::StoreV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_areg_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_areg_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_areg_64;
+          break;
+        }
+        break;
+      case NVPTXISD::StoreV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_areg_64;
+          break;
+        }
+        break;
+      }
+    } else {
+      switch (N->getOpcode()) {
+      default:
+        return NULL;
+      case NVPTXISD::StoreV2:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_areg;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_areg;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_areg;
+          break;
+        }
+        break;
+      case NVPTXISD::StoreV4:
+        switch (EltVT.getSimpleVT().SimpleTy) {
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_areg;
+          break;
+        }
+        break;
+      }
+    }
+    StOps.push_back(N2);
+  }
+
+  StOps.push_back(Chain);
+
+  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  return ST;
+}
+
 // SelectDirectAddr - Match a direct address for DAG.
 // A direct address could be a globaladdress or externalsymbol.
 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   // Return true if TGA or ES.
-  if (N.getOpcode() == ISD::TargetGlobalAddress
-      || N.getOpcode() == ISD::TargetExternalSymbol) {
+  if (N.getOpcode() == ISD::TargetGlobalAddress ||
+      N.getOpcode() == ISD::TargetExternalSymbol) {
     Address = N;
     return true;
   }
@@ -546,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
 }
 
 // symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
-                                         SDValue &Base, SDValue &Offset,
-                                         MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
+    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (Addr.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      SDValue base=Addr.getOperand(0);
+      SDValue base = Addr.getOperand(0);
       if (SelectDirectAddr(base, Base)) {
         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
         return true;
@@ -574,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
 }
 
 // register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
-                                         SDValue &Base, SDValue &Offset,
-                                         MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(
+    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
     Offset = CurDAG->getTargetConstant(0, mvt);
@@ -584,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
   }
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
       Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // direct calls.
+    return false; // direct calls.
 
   if (Addr.getOpcode() == ISD::ADD) {
     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
@@ -592,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
     }
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
       if (FrameIndexSDNode *FIN =
-          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
         // Constant offset from frame ref.
         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
       else
@@ -624,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   // (See SelectionDAGNodes.h). So we need to check for both.
   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
     Src = mN->getSrcValue();
-  }
-  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
     Src = mN->getSrcValue();
   }
   if (!Src)
@@ -637,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
 
 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 /// inline asm expressions.
-bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                                     char ConstraintCode,
-                                                 std::vector<SDValue> &OutOps) {
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
+    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1;
   switch (ConstraintCode) {
-  default: return true;
-  case 'm':   // memory
+  default:
+    return true;
+  case 'm': // memory
     if (SelectDirectAddr(Op, Op0)) {
       OutOps.push_back(Op0);
       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
@@ -666,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 // pattern matcher inserts a bunch of IMOVi8rr to convert
 // the imm to i8imm, and this causes instruction selection
 // to fail.
-bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
-                                   SDValue &Retval) {
-  if (!(N.getOpcode() == ISD::UNDEF) &&
-      !(N.getOpcode() == ISD::Constant))
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
+  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
     return false;
 
   if (N.getOpcode() == ISD::UNDEF)
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ccd69b29dd42..70e8e464297d 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -18,8 +18,8 @@
 #include "NVPTXRegisterInfo.h"
 #include "NVPTXTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Intrinsics.h"
 using namespace llvm;
 
 namespace {
@@ -64,16 +64,18 @@ public:
 
   const NVPTXSubtarget &Subtarget;
 
-  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                            char ConstraintCode,
-                                            std::vector<SDValue> &OutOps);
+  virtual bool SelectInlineAsmMemoryOperand(
+      const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
 private:
-  // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
 #include "NVPTXGenDAGISel.inc"
 
   SDNode *Select(SDNode *N);
-  SDNode* SelectLoad(SDNode *N);
-  SDNode* SelectStore(SDNode *N);
+  SDNode *SelectLoad(SDNode *N);
+  SDNode *SelectLoadVector(SDNode *N);
+  SDNode *SelectLDGLDUVector(SDNode *N);
+  SDNode *SelectStore(SDNode *N);
+  SDNode *SelectStoreVector(SDNode *N);
 
   inline SDValue getI32Imm(unsigned Imm) {
     return CurDAG->getTargetConstant(Imm, MVT::i32);
@@ -96,7 +98,6 @@ private:
   bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
                       SDValue &Offset);
 
-
   bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
 
   bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f1a99d77be9d..6e01a5a82071 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -11,30 +11,29 @@
 //
 //===----------------------------------------------------------------------===//
 
-
-#include "NVPTX.h"
 #include "NVPTXISelLowering.h"
+#include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
 #include "NVPTXTargetObjectFile.h"
 #include "NVPTXUtilities.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/MC/MCSectionELF.h"
 #include <sstream>
 
 #undef DEBUG_TYPE
@@ -44,28 +43,39 @@ using namespace llvm;
 
 static unsigned int uniqueCallSite = 0;
 
-static cl::opt<bool>
-RetainVectorOperands("nvptx-codegen-vectors",
-     cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
-                     cl::init(true));
+static cl::opt<bool> sched4reg(
+    "nvptx-sched4reg",
+    cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
 
-static cl::opt<bool>
-sched4reg("nvptx-sched4reg",
-          cl::desc("NVPTX Specific: schedule for register pressue"),
-          cl::init(false));
+static bool IsPTXVectorType(MVT VT) {
+  switch (VT.SimpleTy) {
+  default:
+    return false;
+  case MVT::v2i8:
+  case MVT::v4i8:
+  case MVT::v2i16:
+  case MVT::v4i16:
+  case MVT::v2i32:
+  case MVT::v4i32:
+  case MVT::v2i64:
+  case MVT::v2f32:
+  case MVT::v4f32:
+  case MVT::v2f64:
+    return true;
+  }
+}
 
 // NVPTXTargetLowering Constructor.
 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
-: TargetLowering(TM, new NVPTXTargetObjectFile()),
-  nvTM(&TM),
-  nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+    : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+      nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
 
   // always lower memset, memcpy, and memmove intrinsics to load/store
   // instructions, rather
   // then generating calls to memset, mempcy or memmove.
-  maxStoresPerMemset = (unsigned)0xFFFFFFFF;
-  maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
-  maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
+  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
+  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 
@@ -87,82 +97,51 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
 
-  if (RetainVectorOperands) {
-    addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
-    addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
-    addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
-    addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
-    addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
-    addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
-    addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
-    addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
-    addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
-    addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
-
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8   , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16  , Custom);
-    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8   , Custom);
-
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8   , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16  , Custom);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8   , Custom);
-  }
-
   // Operations not directly supported by NVPTX.
-  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   if (nvptxSubtarget.hasROT64()) {
-    setOperationAction(ISD::ROTL , MVT::i64, Legal);
-    setOperationAction(ISD::ROTR , MVT::i64, Legal);
-  }
-  else {
-    setOperationAction(ISD::ROTL , MVT::i64, Expand);
-    setOperationAction(ISD::ROTR , MVT::i64, Expand);
+    setOperationAction(ISD::ROTL, MVT::i64, Legal);
+    setOperationAction(ISD::ROTR, MVT::i64, Legal);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i64, Expand);
+    setOperationAction(ISD::ROTR, MVT::i64, Expand);
   }
   if (nvptxSubtarget.hasROT32()) {
-    setOperationAction(ISD::ROTL , MVT::i32, Legal);
-    setOperationAction(ISD::ROTR , MVT::i32, Legal);
-  }
-  else {
-    setOperationAction(ISD::ROTL , MVT::i32, Expand);
-    setOperationAction(ISD::ROTR , MVT::i32, Expand);
+    setOperationAction(ISD::ROTL, MVT::i32, Legal);
+    setOperationAction(ISD::ROTR, MVT::i32, Legal);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i32, Expand);
+    setOperationAction(ISD::ROTR, MVT::i32, Expand);
   }
 
-  setOperationAction(ISD::ROTL , MVT::i16, Expand);
-  setOperationAction(ISD::ROTR , MVT::i16, Expand);
-  setOperationAction(ISD::ROTL , MVT::i8, Expand);
-  setOperationAction(ISD::ROTR , MVT::i8, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i16, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i32, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+  setOperationAction(ISD::ROTL, MVT::i16, Expand);
+  setOperationAction(ISD::ROTR, MVT::i16, Expand);
+  setOperationAction(ISD::ROTL, MVT::i8, Expand);
+  setOperationAction(ISD::ROTR, MVT::i8, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
 
   // Indirect branch is not supported.
   // This also disables Jump Table creation.
-  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
-  setOperationAction(ISD::BRIND,             MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
 
-  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
-  setOperationAction(ISD::GlobalAddress   , MVT::i64  , Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 
   // We want to legalize constant related memmove and memcopy
   // intrinsics.
@@ -185,92 +164,114 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   setTruncStoreAction(MVT::i8, MVT::i1, Expand);
 
   // This is legal in NVPTX
-  setOperationAction(ISD::ConstantFP,         MVT::f64, Legal);
-  setOperationAction(ISD::ConstantFP,         MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 
   // TRAP can be lowered to PTX trap
-  setOperationAction(ISD::TRAP,               MVT::Other, Legal);
-
-  // By default, CONCAT_VECTORS is implemented via store/load
-  // through stack. It is slow and uses local memory. We need
-  // to custom-lowering them.
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8   , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16  , Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8   , Custom);
-
-  // Expand vector int to float and float to int conversions
-  // - For SINT_TO_FP and UINT_TO_FP, the src type
-  //   (Node->getOperand(0).getValueType())
-  //   is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
-  //   the dest type (Node->getValueType(0)) is used.
-  //
-  //   See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
-  //   case, and
-  //   SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
-  //
-  //   That is why v4i32 or v2i32 are used here.
-  //
-  //   The expansion for vectors happens in VectorLegalizer::LegalizeOp()
-  //   (LegalizeVectorOps.cpp).
-  setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
-  setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
-  setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
-  setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
-  setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
-  setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // Register custom handling for vector loads/stores
+  for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
+       ++i) {
+    MVT VT = (MVT::SimpleValueType) i;
+    if (IsPTXVectorType(VT)) {
+      setOperationAction(ISD::LOAD, VT, Custom);
+      setOperationAction(ISD::STORE, VT, Custom);
+      setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
+    }
+  }
 
   // Now deduce the information based on the above mentioned
   // actions
   computeRegisterProperties();
 }
 
-
 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
-  default: return 0;
-  case NVPTXISD::CALL:            return "NVPTXISD::CALL";
-  case NVPTXISD::RET_FLAG:        return "NVPTXISD::RET_FLAG";
-  case NVPTXISD::Wrapper:         return "NVPTXISD::Wrapper";
-  case NVPTXISD::NVBuiltin:       return "NVPTXISD::NVBuiltin";
-  case NVPTXISD::DeclareParam:    return "NVPTXISD::DeclareParam";
+  default:
+    return 0;
+  case NVPTXISD::CALL:
+    return "NVPTXISD::CALL";
+  case NVPTXISD::RET_FLAG:
+    return "NVPTXISD::RET_FLAG";
+  case NVPTXISD::Wrapper:
+    return "NVPTXISD::Wrapper";
+  case NVPTXISD::NVBuiltin:
+    return "NVPTXISD::NVBuiltin";
+  case NVPTXISD::DeclareParam:
+    return "NVPTXISD::DeclareParam";
   case NVPTXISD::DeclareScalarParam:
     return "NVPTXISD::DeclareScalarParam";
-  case NVPTXISD::DeclareRet:      return "NVPTXISD::DeclareRet";
-  case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
-  case NVPTXISD::PrintCall:       return "NVPTXISD::PrintCall";
-  case NVPTXISD::LoadParam:       return "NVPTXISD::LoadParam";
-  case NVPTXISD::StoreParam:      return "NVPTXISD::StoreParam";
-  case NVPTXISD::StoreParamS32:   return "NVPTXISD::StoreParamS32";
-  case NVPTXISD::StoreParamU32:   return "NVPTXISD::StoreParamU32";
-  case NVPTXISD::MoveToParam:     return "NVPTXISD::MoveToParam";
-  case NVPTXISD::CallArgBegin:    return "NVPTXISD::CallArgBegin";
-  case NVPTXISD::CallArg:         return "NVPTXISD::CallArg";
-  case NVPTXISD::LastCallArg:     return "NVPTXISD::LastCallArg";
-  case NVPTXISD::CallArgEnd:      return "NVPTXISD::CallArgEnd";
-  case NVPTXISD::CallVoid:        return "NVPTXISD::CallVoid";
-  case NVPTXISD::CallVal:         return "NVPTXISD::CallVal";
-  case NVPTXISD::CallSymbol:      return "NVPTXISD::CallSymbol";
-  case NVPTXISD::Prototype:       return "NVPTXISD::Prototype";
-  case NVPTXISD::MoveParam:       return "NVPTXISD::MoveParam";
-  case NVPTXISD::MoveRetval:      return "NVPTXISD::MoveRetval";
-  case NVPTXISD::MoveToRetval:    return "NVPTXISD::MoveToRetval";
-  case NVPTXISD::StoreRetval:     return "NVPTXISD::StoreRetval";
-  case NVPTXISD::PseudoUseParam:  return "NVPTXISD::PseudoUseParam";
-  case NVPTXISD::RETURN:          return "NVPTXISD::RETURN";
-  case NVPTXISD::CallSeqBegin:    return "NVPTXISD::CallSeqBegin";
-  case NVPTXISD::CallSeqEnd:      return "NVPTXISD::CallSeqEnd";
+  case NVPTXISD::DeclareRet:
+    return "NVPTXISD::DeclareRet";
+  case NVPTXISD::DeclareRetParam:
+    return "NVPTXISD::DeclareRetParam";
+  case NVPTXISD::PrintCall:
+    return "NVPTXISD::PrintCall";
+  case NVPTXISD::LoadParam:
+    return "NVPTXISD::LoadParam";
+  case NVPTXISD::StoreParam:
+    return "NVPTXISD::StoreParam";
+  case NVPTXISD::StoreParamS32:
+    return "NVPTXISD::StoreParamS32";
+  case NVPTXISD::StoreParamU32:
+    return "NVPTXISD::StoreParamU32";
+  case NVPTXISD::MoveToParam:
+    return "NVPTXISD::MoveToParam";
+  case NVPTXISD::CallArgBegin:
+    return "NVPTXISD::CallArgBegin";
+  case NVPTXISD::CallArg:
+    return "NVPTXISD::CallArg";
+  case NVPTXISD::LastCallArg:
+    return "NVPTXISD::LastCallArg";
+  case NVPTXISD::CallArgEnd:
+    return "NVPTXISD::CallArgEnd";
+  case NVPTXISD::CallVoid:
+    return "NVPTXISD::CallVoid";
+  case NVPTXISD::CallVal:
+    return "NVPTXISD::CallVal";
+  case NVPTXISD::CallSymbol:
+    return "NVPTXISD::CallSymbol";
+  case NVPTXISD::Prototype:
+    return "NVPTXISD::Prototype";
+  case NVPTXISD::MoveParam:
+    return "NVPTXISD::MoveParam";
+  case NVPTXISD::MoveRetval:
+    return "NVPTXISD::MoveRetval";
+  case NVPTXISD::MoveToRetval:
+    return "NVPTXISD::MoveToRetval";
+  case NVPTXISD::StoreRetval:
+    return "NVPTXISD::StoreRetval";
+  case NVPTXISD::PseudoUseParam:
+    return "NVPTXISD::PseudoUseParam";
+  case NVPTXISD::RETURN:
+    return "NVPTXISD::RETURN";
+  case NVPTXISD::CallSeqBegin:
+    return "NVPTXISD::CallSeqBegin";
+  case NVPTXISD::CallSeqEnd:
+    return "NVPTXISD::CallSeqEnd";
+  case NVPTXISD::LoadV2:
+    return "NVPTXISD::LoadV2";
+  case NVPTXISD::LoadV4:
+    return "NVPTXISD::LoadV4";
+  case NVPTXISD::LDGV2:
+    return "NVPTXISD::LDGV2";
+  case NVPTXISD::LDGV4:
+    return "NVPTXISD::LDGV4";
+  case NVPTXISD::LDUV2:
+    return "NVPTXISD::LDUV2";
+  case NVPTXISD::LDUV4:
+    return "NVPTXISD::LDUV4";
+  case NVPTXISD::StoreV2:
+    return "NVPTXISD::StoreV2";
+  case NVPTXISD::StoreV4:
+    return "NVPTXISD::StoreV4";
   }
 }
 
+bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
+  return VT == MVT::i1;
+}
 
 SDValue
 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
@@ -280,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
 }
 
-std::string NVPTXTargetLowering::getPrototype(Type *retTy,
-                                              const ArgListTy &Args,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                              unsigned retAlignment) const {
+std::string NVPTXTargetLowering::getPrototype(
+    Type *retTy, const ArgListTy &Args,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
@@ -299,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
         unsigned size = 0;
         if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
           size = ITy->getBitWidth();
-          if (size < 32) size = 32;
-        }
-        else {
+          if (size < 32)
+            size = 32;
+        } else {
           assert(retTy->isFloatingPointTy() &&
                  "Floating point type expected here");
           size = retTy->getPrimitiveSizeInBits();
         }
 
         O << ".param .b" << size << " _";
-      }
-      else if (isa<PointerType>(retTy))
-        O << ".param .b" << getPointerTy().getSizeInBits()
-        << " _";
+      } else if (isa<PointerType>(retTy))
+        O << ".param .b" << getPointerTy().getSizeInBits() << " _";
       else {
         if ((retTy->getTypeID() == Type::StructTyID) ||
             isa<VectorType>(retTy)) {
           SmallVector<EVT, 16> vtparts;
           ComputeValueVTs(*this, retTy, vtparts);
           unsigned totalsz = 0;
-          for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+          for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
             unsigned elems = 1;
             EVT elemtype = vtparts[i];
             if (vtparts[i].isVector()) {
               elems = vtparts[i].getVectorNumElements();
               elemtype = vtparts[i].getVectorElementType();
             }
-            for (unsigned j=0, je=elems; j!=je; ++j) {
+            for (unsigned j = 0, je = elems; j != je; ++j) {
               unsigned sz = elemtype.getSizeInBits();
-              if (elemtype.isInteger() && (sz < 8)) sz = 8;
-              totalsz += sz/8;
+              if (elemtype.isInteger() && (sz < 8))
+                sz = 8;
+              totalsz += sz / 8;
             }
           }
-          O << ".param .align "
-              << retAlignment
-              << " .b8 _["
-              << totalsz << "]";
-        }
-        else {
-          assert(false &&
-                 "Unknown return type");
+          O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+        } else {
+          assert(false && "Unknown return type");
         }
       }
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*this, retTy, vtparts);
       unsigned idx = 0;
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -354,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0, je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << ".reg .b" << sz << " _";
-          if (j<je-1) O << ", ";
+          if (j < je - 1)
+            O << ", ";
           ++idx;
         }
-        if (i < e-1)
+        if (i < e - 1)
           O << ", ";
       }
     }
@@ -372,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
   bool first = true;
   MVT thePointerTy = getPointerTy();
 
-  for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
     const Type *Ty = Args[i].Ty;
     if (!first) {
       O << ", ";
@@ -383,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
       unsigned sz = 0;
       if (isa<IntegerType>(Ty)) {
         sz = cast<IntegerType>(Ty)->getBitWidth();
-        if (sz < 32) sz = 32;
-      }
-      else if (isa<PointerType>(Ty))
+        if (sz < 32)
+          sz = 32;
+      } else if (isa<PointerType>(Ty))
         sz = thePointerTy.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
@@ -397,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
       continue;
     }
     const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    assert(PTy &&
-           "Param with byval attribute should be a pointer type");
+    assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
     if (isABI) {
       unsigned align = Outs[i].Flags.getByValAlign();
       unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
-      O << ".param .align " << align
-          << " .b8 ";
+      O << ".param .align " << align << " .b8 ";
       O << "_";
       O << "[" << sz << "]";
       continue;
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*this, ETy, vtparts);
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -421,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0,je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << ".reg .b" << sz << " ";
           O << "_";
-          if (j<je-1) O << ", ";
+          if (j < je - 1)
+            O << ", ";
         }
-        if (i<e-1)
+        if (i < e - 1)
           O << ", ";
       }
       continue;
@@ -438,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
   return O.str();
 }
 
-
-SDValue
-NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                               SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc &dl = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
-  SDValue Chain                         = CLI.Chain;
-  SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
-  ArgListTy &Args                       = CLI.Args;
-  Type *retTy                           = CLI.RetTy;
-  ImmutableCallSite *CS                 = CLI.CS;
+  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  ArgListTy &Args = CLI.Args;
+  Type *retTy = CLI.RetTy;
+  ImmutableCallSite *CS = CLI.CS;
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
   SDValue tempChain = Chain;
-  Chain = DAG.getCALLSEQ_START(Chain,
-                               DAG.getIntPtrConstant(uniqueCallSite, true));
+  Chain =
+      DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
   SDValue InFlag = Chain.getValue(1);
 
   assert((Outs.size() == Args.size()) &&
@@ -466,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   unsigned paramCount = 0;
   // Declare the .params or .reg need to pass values
   // to the function
-  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     EVT VT = Outs[i].VT;
 
     if (Outs[i].Flags.isByVal() == false) {
@@ -477,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       if (isABI)
         isReg = 0;
       unsigned sz = VT.getSizeInBits();
-      if (VT.isInteger() && (sz < 32)) sz = 32;
+      if (VT.isInteger() && (sz < 32))
+        sz = 32;
       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue DeclareParamOps[] = { Chain,
                                     DAG.getConstant(paramCount, MVT::i32),
                                     DAG.getConstant(sz, MVT::i32),
-                                    DAG.getConstant(isReg, MVT::i32),
-                                    InFlag };
+                                    DAG.getConstant(isReg, MVT::i32), InFlag };
       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                           DeclareParamOps, 5);
       InFlag = Chain.getValue(1);
       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
-                             DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+                                 DAG.getConstant(0, MVT::i32), OutVals[i],
+                                 InFlag };
 
       unsigned opcode = NVPTXISD::StoreParam;
       if (isReg)
@@ -509,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // struct or vector
     SmallVector<EVT, 16> vtparts;
     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
-    assert(PTy &&
-           "Type of a byval parameter should be pointer");
+    assert(PTy && "Type of a byval parameter should be pointer");
     ComputeValueVTs(*this, PTy->getElementType(), vtparts);
 
     if (isABI) {
@@ -520,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
       // don't need to
       // worry about natural alignment or not. See TargetLowering::LowerCallTo()
-      SDValue DeclareParamOps[] = { Chain,
-                       DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
-                                    DAG.getConstant(paramCount, MVT::i32),
-                                    DAG.getConstant(sz, MVT::i32),
-                                    InFlag };
+      SDValue DeclareParamOps[] = {
+        Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+        DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+        InFlag
+      };
       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                           DeclareParamOps, 5);
       InFlag = Chain.getValue(1);
       unsigned curOffset = 0;
-      for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
         unsigned elems = 1;
         EVT elemtype = vtparts[j];
         if (vtparts[j].isVector()) {
           elems = vtparts[j].getVectorNumElements();
           elemtype = vtparts[j].getVectorElementType();
         }
-        for (unsigned k=0,ke=elems; k!=ke; ++k) {
+        for (unsigned k = 0, ke = elems; k != ke; ++k) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 8)) sz = 8;
-          SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                        OutVals[i],
-                                        DAG.getConstant(curOffset,
-                                                        getPointerTy()));
-          SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
-                                MachinePointerInfo(), false, false, false, 0);
+          if (elemtype.isInteger() && (sz < 8))
+            sz = 8;
+          SDValue srcAddr =
+              DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+                          DAG.getConstant(curOffset, getPointerTy()));
+          SDValue theVal =
+              DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+                          MachinePointerInfo(), false, false, false, 0);
           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-          SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
-                                                            MVT::i32),
-                                           DAG.getConstant(curOffset, MVT::i32),
-                                                            theVal, InFlag };
+          SDValue CopyParamOps[] = { Chain,
+                                     DAG.getConstant(paramCount, MVT::i32),
+                                     DAG.getConstant(curOffset, MVT::i32),
+                                     theVal, InFlag };
           Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
                               CopyParamOps, 5);
           InFlag = Chain.getValue(1);
-          curOffset += sz/8;
+          curOffset += sz / 8;
         }
       }
       ++paramCount;
@@ -562,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Non-abi, struct or vector
     // Declare a bunch or .reg .b<size> .param<n>
     unsigned curOffset = 0;
-    for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+    for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
       unsigned elems = 1;
       EVT elemtype = vtparts[j];
       if (vtparts[j].isVector()) {
         elems = vtparts[j].getVectorNumElements();
         elemtype = vtparts[j].getVectorElementType();
       }
-      for (unsigned k=0,ke=elems; k!=ke; ++k) {
+      for (unsigned k = 0, ke = elems; k != ke; ++k) {
         unsigned sz = elemtype.getSizeInBits();
-        if (elemtype.isInteger() && (sz < 32)) sz = 32;
+        if (elemtype.isInteger() && (sz < 32))
+          sz = 32;
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
-                                                             MVT::i32),
-                                                  DAG.getConstant(sz, MVT::i32),
-                                                   DAG.getConstant(1, MVT::i32),
-                                                             InFlag };
+        SDValue DeclareParamOps[] = { Chain,
+                                      DAG.getConstant(paramCount, MVT::i32),
+                                      DAG.getConstant(sz, MVT::i32),
+                                      DAG.getConstant(1, MVT::i32), InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                             DeclareParamOps, 5);
         InFlag = Chain.getValue(1);
-        SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
-                                      DAG.getConstant(curOffset,
-                                                      getPointerTy()));
-        SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
-                                  MachinePointerInfo(), false, false, false, 0);
+        SDValue srcAddr =
+            DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+                        DAG.getConstant(curOffset, getPointerTy()));
+        SDValue theVal =
+            DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
+                        false, false, false, 0);
         SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
         SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
                                    DAG.getConstant(0, MVT::i32), theVal,
@@ -610,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
     // individual .reg .b<size> func_retval<0..> for non ABI
     unsigned resultsz = 0;
-    for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+    for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
       unsigned elems = 1;
       EVT elemtype = resvtparts[i];
       if (resvtparts[i].isVector()) {
         elems = resvtparts[i].getVectorNumElements();
         elemtype = resvtparts[i].getVectorElementType();
       }
-      for (unsigned j=0,je=elems; j!=je; ++j) {
+      for (unsigned j = 0, je = elems; j != je; ++j) {
         unsigned sz = elemtype.getSizeInBits();
         if (isABI == false) {
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
-        }
-        else {
-          if (elemtype.isInteger() && (sz < 8)) sz = 8;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
+        } else {
+          if (elemtype.isInteger() && (sz < 8))
+            sz = 8;
         }
         if (isABI == false) {
           SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -641,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
     if (isABI) {
       if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
-          retTy->isPointerTy() ) {
+          retTy->isPointerTy()) {
         // Scalar needs to be at least 32bit wide
         if (resultsz < 32)
           resultsz = 32;
@@ -652,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
                             DeclareRetOps, 5);
         InFlag = Chain.getValue(1);
-      }
-      else {
+      } else {
         if (Func) { // direct call
           if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
@@ -663,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
         }
         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
-                                                           MVT::i32),
-                                          DAG.getConstant(resultsz/8, MVT::i32),
-                                         DAG.getConstant(0, MVT::i32), InFlag };
+        SDValue DeclareRetOps[] = { Chain,
+                                    DAG.getConstant(retAlignment, MVT::i32),
+                                    DAG.getConstant(resultsz / 8, MVT::i32),
+                                    DAG.getConstant(0, MVT::i32), InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
                             DeclareRetOps, 5);
         InFlag = Chain.getValue(1);
@@ -684,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // INLINEASM SDNode.
     SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
-    const char *asmstr = nvTM->getManagedStrPool()->
-        getManagedString(proto_string.c_str())->c_str();
-    SDValue InlineAsmOps[] = { Chain,
-                               DAG.getTargetExternalSymbol(asmstr,
-                                                           getPointerTy()),
-                                                           DAG.getMDNode(0),
-                                   DAG.getTargetConstant(0, MVT::i32), InFlag };
+    const char *asmstr = nvTM->getManagedStrPool()
+        ->getManagedString(proto_string.c_str())->c_str();
+    SDValue InlineAsmOps[] = {
+      Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
+      DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+    };
     Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
     InFlag = Chain.getValue(1);
   }
   // Op to just print "call"
   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue PrintCallOps[] = { Chain,
-                             DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
-                                 : retCount, MVT::i32),
-                                   InFlag };
-  Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
-      PrintCallVTs, PrintCallOps, 3);
+  SDValue PrintCallOps[] = {
+    Chain,
+    DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
+    InFlag
+  };
+  Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
+                      dl, PrintCallVTs, PrintCallOps, 3);
   InFlag = Chain.getValue(1);
 
   // Ops to print out the function name
@@ -717,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                       CallArgBeginOps, 2);
   InFlag = Chain.getValue(1);
 
-  for (unsigned i=0, e=paramCount; i!=e; ++i) {
+  for (unsigned i = 0, e = paramCount; i != e; ++i) {
     unsigned opcode;
-    if (i==(e-1))
+    if (i == (e - 1))
       opcode = NVPTXISD::LastCallArg;
     else
       opcode = NVPTXISD::CallArg;
     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
-                             DAG.getConstant(i, MVT::i32),
-                             InFlag };
+                             DAG.getConstant(i, MVT::i32), InFlag };
     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
     InFlag = Chain.getValue(1);
   }
   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue CallArgEndOps[] = { Chain,
-                              DAG.getConstant(Func ? 1 : 0, MVT::i32),
+  SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
                               InFlag };
-  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
-                      3);
+  Chain =
+      DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
   InFlag = Chain.getValue(1);
 
   if (!Func) {
     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue PrototypeOps[] = { Chain,
-                               DAG.getConstant(uniqueCallSite, MVT::i32),
+    SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
                                InFlag };
     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
     InFlag = Chain.getValue(1);
@@ -751,33 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (Ins.size() > 0) {
     if (isABI) {
       unsigned resoffset = 0;
-      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         unsigned sz = Ins[i].VT.getSizeInBits();
-        if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
-        std::vector<EVT> LoadRetVTs;
-        LoadRetVTs.push_back(Ins[i].VT);
-        LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue);
-        std::vector<SDValue> LoadRetOps;
-        LoadRetOps.push_back(Chain);
-        LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
-        LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
-        LoadRetOps.push_back(InFlag);
+        if (Ins[i].VT.isInteger() && (sz < 8))
+          sz = 8;
+        EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
+        SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+                                 DAG.getConstant(resoffset, MVT::i32), InFlag };
         SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
-                                     &LoadRetOps[0], LoadRetOps.size());
+                                     LoadRetOps, array_lengthof(LoadRetOps));
         Chain = retval.getValue(1);
         InFlag = retval.getValue(2);
         InVals.push_back(retval);
-        resoffset += sz/8;
+        resoffset += sz / 8;
       }
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> resvtparts;
       ComputeValueVTs(*this, retTy, resvtparts);
 
       assert(Ins.size() == resvtparts.size() &&
              "Unexpected number of return values in non-ABI case");
       unsigned paramNum = 0;
-      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         assert(EVT(Ins[i].VT) == resvtparts[i] &&
                "Unexpected EVT type in non-ABI case");
         unsigned numelems = 1;
@@ -787,17 +773,13 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           elemtype = Ins[i].VT.getVectorElementType();
         }
         std::vector<SDValue> tempRetVals;
-        for (unsigned j=0; j<numelems; ++j) {
-          std::vector<EVT> MoveRetVTs;
-          MoveRetVTs.push_back(elemtype);
-          MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue);
-          std::vector<SDValue> MoveRetOps;
-          MoveRetOps.push_back(Chain);
-          MoveRetOps.push_back(DAG.getConstant(0, MVT::i32));
-          MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32));
-          MoveRetOps.push_back(InFlag);
+        for (unsigned j = 0; j < numelems; ++j) {
+          EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
+          SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
+                                   DAG.getConstant(paramNum, MVT::i32),
+                                   InFlag };
           SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
-                                       &MoveRetOps[0], MoveRetOps.size());
+                                       MoveRetOps, array_lengthof(MoveRetOps));
           Chain = retval.getValue(1);
           InFlag = retval.getValue(2);
           tempRetVals.push_back(retval);
@@ -811,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
     }
   }
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getIntPtrConstant(uniqueCallSite, true),
-                             DAG.getIntPtrConstant(uniqueCallSite+1, true),
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+                             DAG.getIntPtrConstant(uniqueCallSite + 1, true),
                              InFlag);
   uniqueCallSite++;
 
@@ -826,76 +807,183 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
 // (see LegalizeDAG.cpp). This is slow and uses local memory.
 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
-SDValue NVPTXTargetLowering::
-LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   SmallVector<SDValue, 8> Ops;
   unsigned NumOperands = Node->getNumOperands();
-  for (unsigned i=0; i < NumOperands; ++i) {
+  for (unsigned i = 0; i < NumOperands; ++i) {
     SDValue SubOp = Node->getOperand(i);
     EVT VVT = SubOp.getNode()->getValueType(0);
     EVT EltVT = VVT.getVectorElementType();
     unsigned NumSubElem = VVT.getVectorNumElements();
-    for (unsigned j=0; j < NumSubElem; ++j) {
+    for (unsigned j = 0; j < NumSubElem; ++j) {
       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
                                 DAG.getIntPtrConstant(j)));
     }
   }
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                     &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
+                     Ops.size());
 }
 
-SDValue NVPTXTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
-  case ISD::RETURNADDR: return SDValue();
-  case ISD::FRAMEADDR:  return SDValue();
-  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::INTRINSIC_W_CHAIN: return Op;
+  case ISD::RETURNADDR:
+    return SDValue();
+  case ISD::FRAMEADDR:
+    return SDValue();
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return Op;
   case ISD::BUILD_VECTOR:
   case ISD::EXTRACT_SUBVECTOR:
     return Op;
-  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
-  case ISD::STORE: return LowerSTORE(Op, DAG);
-  case ISD::LOAD: return LowerLOAD(Op, DAG);
+  case ISD::CONCAT_VECTORS:
+    return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::STORE:
+    return LowerSTORE(Op, DAG);
+  case ISD::LOAD:
+    return LowerLOAD(Op, DAG);
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }
 }
 
+SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType() == MVT::i1)
+    return LowerLOADi1(Op, DAG);
+  else
+    return SDValue();
+}
 
 // v = ld i1* addr
 //   =>
 // v1 = ld i8* addr
 // v = trunc v1 to i1
-SDValue NVPTXTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   LoadSDNode *LD = cast<LoadSDNode>(Node);
   DebugLoc dl = Node->getDebugLoc();
-  assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
   assert(Node->getValueType(0) == MVT::i1 &&
          "Custom lowering for i1 load only");
-  SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
-                              LD->getPointerInfo(),
-                              LD->isVolatile(), LD->isNonTemporal(),
-                              LD->isInvariant(),
-                              LD->getAlignment());
+  SDValue newLD =
+      DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+                  LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
+                  LD->isInvariant(), LD->getAlignment());
   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
   // The legalizer (the caller) is expecting two values from the legalized
   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
   // in LegalizeDAG.cpp which also uses MergeValues.
-  SDValue Ops[] = {result, LD->getChain()};
+  SDValue Ops[] = { result, LD->getChain() };
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
+SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  EVT ValVT = Op.getOperand(1).getValueType();
+  if (ValVT == MVT::i1)
+    return LowerSTOREi1(Op, DAG);
+  else if (ValVT.isVector())
+    return LowerSTOREVector(Op, DAG);
+  else
+    return SDValue();
+}
+
+SDValue
+NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *N = Op.getNode();
+  SDValue Val = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  EVT ValVT = Val.getValueType();
+
+  if (ValVT.isVector()) {
+    // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+    // legal.  We can (and should) split that into 2 stores of <2 x double> here
+    // but I'm leaving that as a TODO for now.
+    if (!ValVT.isSimple())
+      return SDValue();
+    switch (ValVT.getSimpleVT().SimpleTy) {
+    default:
+      return SDValue();
+    case MVT::v2i8:
+    case MVT::v2i16:
+    case MVT::v2i32:
+    case MVT::v2i64:
+    case MVT::v2f32:
+    case MVT::v2f64:
+    case MVT::v4i8:
+    case MVT::v4i16:
+    case MVT::v4i32:
+    case MVT::v4f32:
+      // This is a "native" vector type
+      break;
+    }
+
+    unsigned Opcode = 0;
+    EVT EltVT = ValVT.getVectorElementType();
+    unsigned NumElts = ValVT.getVectorNumElements();
+
+    // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
+    // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
+    // stored type to i16 and propogate the "real" type as the memory type.
+    bool NeedExt = false;
+    if (EltVT.getSizeInBits() < 16)
+      NeedExt = true;
+
+    switch (NumElts) {
+    default:
+      return SDValue();
+    case 2:
+      Opcode = NVPTXISD::StoreV2;
+      break;
+    case 4: {
+      Opcode = NVPTXISD::StoreV4;
+      break;
+    }
+    }
+
+    SmallVector<SDValue, 8> Ops;
+
+    // First is the chain
+    Ops.push_back(N->getOperand(0));
+
+    // Then the split values
+    for (unsigned i = 0; i < NumElts; ++i) {
+      SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+                                   DAG.getIntPtrConstant(i));
+      if (NeedExt)
+        // ANY_EXTEND is correct here since the store will only look at the
+        // lower-order bits anyway.
+        ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+      Ops.push_back(ExtVal);
+    }
+
+    // Then any remaining arguments
+    for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
+      Ops.push_back(N->getOperand(i));
+    }
+
+    MemSDNode *MemSD = cast<MemSDNode>(N);
+
+    SDValue NewSt = DAG.getMemIntrinsicNode(
+        Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+        MemSD->getMemoryVT(), MemSD->getMemOperand());
+
+    //return DCI.CombineTo(N, NewSt, true);
+    return NewSt;
+  }
+
+  return SDValue();
+}
+
 // st i1 v, addr
 //    =>
 // v1 = zxt v to i8
 // st i8, addr
-SDValue NVPTXTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -906,18 +994,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
   bool isNonTemporal = ST->isNonTemporal();
-  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
-                     MVT::i8, Tmp3);
-  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getPointerInfo(), isVolatile,
-                                isNonTemporal, Alignment);
+  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
+  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
   return Result;
 }
 
-
-SDValue
-NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
-                                EVT v) const {
+SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
+                                        int idx, EVT v) const {
   std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
   std::stringstream suffix;
   suffix << idx;
@@ -930,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   return getExtSymb(DAG, ".PARAM", idx, v);
 }
 
-SDValue
-NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
   return getExtSymb(DAG, ".HLPPARAM", idx);
 }
 
 // Check to see if the kernel argument is image*_t or sampler_t
 
 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
-  static const char *const specialTypes[] = {
-                                             "struct._image2d_t",
-                                             "struct._image3d_t",
-                                             "struct._sampler_t"
-  };
+  static const char *const specialTypes[] = { "struct._image2d_t",
+                                              "struct._image3d_t",
+                                              "struct._sampler_t" };
 
   const Type *Ty = arg->getType();
   const PointerType *PTy = dyn_cast<PointerType>(Ty);
@@ -954,7 +1035,7 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
     return false;
 
   const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
-  const std::string TypeName = STy ? STy->getName() : "";
+  const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
 
   for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
     if (TypeName == specialTypes[i])
@@ -963,17 +1044,15 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
   return false;
 }
 
-SDValue
-NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv, bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) const {
+SDValue NVPTXTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const DataLayout *TD = getDataLayout();
 
   const Function *F = MF.getFunction();
-  const AttrListPtr &PAL = F->getAttributes();
+  const AttributeSet &PAL = F->getAttributes();
 
   SDValue Root = DAG.getRoot();
   std::vector<SDValue> OutChains;
@@ -984,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   std::vector<Type *> argTypes;
   std::vector<const Argument *> theArgs;
   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-      I != E; ++I) {
+       I != E; ++I) {
     theArgs.push_back(I);
     argTypes.push_back(I->getType());
   }
-  assert(argTypes.size() == Ins.size() &&
-         "Ins types and function types did not match");
+  //assert(argTypes.size() == Ins.size() &&
+  //       "Ins types and function types did not match");
 
   int idx = 0;
-  for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+  for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
     Type *Ty = argTypes[i];
     EVT ObjectVT = getValueType(Ty);
-    assert(ObjectVT == Ins[i].VT &&
-           "Ins type did not match function type");
+    //assert(ObjectVT == Ins[i].VT &&
+    //       "Ins type did not match function type");
 
     // If the kernel argument is image*_t or sampler_t, convert it to
     // a i32 constant holding the parameter position. This can later
     // matched in the AsmPrinter to output the correct mangled name.
-    if (isImageOrSamplerVal(theArgs[i],
-                           (theArgs[i]->getParent() ?
-                               theArgs[i]->getParent()->getParent() : 0))) {
+    if (isImageOrSamplerVal(
+            theArgs[i],
+            (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
+                                     : 0))) {
       assert(isKernel && "Only kernels can have image/sampler params");
-      InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+      InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
       continue;
     }
 
     if (theArgs[i]->use_empty()) {
       // argument is dead
-      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+      if (ObjectVT.isVector()) {
+        EVT EltVT = ObjectVT.getVectorElementType();
+        unsigned NumElts = ObjectVT.getVectorNumElements();
+        for (unsigned vi = 0; vi < NumElts; ++vi) {
+          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
+        }
+      } else {
+        InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+      }
       continue;
     }
 
@@ -1019,29 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
     // to newly created nodes. The SDNOdes for params have to
     // appear in the same order as their order of appearance
     // in the original function. "idx+1" holds that order.
-    if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) {
+    if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+      if (ObjectVT.isVector()) {
+        unsigned NumElts = ObjectVT.getVectorNumElements();
+        EVT EltVT = ObjectVT.getVectorElementType();
+        unsigned Offset = 0;
+        for (unsigned vi = 0; vi < NumElts; ++vi) {
+          SDValue A = getParamSymbol(DAG, idx, getPointerTy());
+          SDValue B = DAG.getIntPtrConstant(Offset);
+          SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                     //getParamSymbol(DAG, idx, EltVT),
+                                     //DAG.getConstant(Offset, getPointerTy()));
+                                     A, B);
+          Value *SrcValue = Constant::getNullValue(PointerType::get(
+              EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
+          SDValue Ld = DAG.getLoad(
+              EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
+              false,
+              TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+          Offset += EltVT.getStoreSizeInBits() / 8;
+          InVals.push_back(Ld);
+        }
+        continue;
+      }
+
       // A plain scalar.
       if (isABI || isKernel) {
         // If ABI, load from the param symbol
         SDValue Arg = getParamSymbol(DAG, idx);
-        Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
-            F->getContext()),
-            llvm::ADDRESS_SPACE_PARAM));
-        SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
-                                MachinePointerInfo(srcValue), false, false,
-                                false,
-                                TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
-                                  F->getContext())));
+        // Conjure up a value that we can get the address space from.
+        // FIXME: Using a constant here is a hack.
+        Value *srcValue = Constant::getNullValue(
+            PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
+                             llvm::ADDRESS_SPACE_PARAM));
+        SDValue p = DAG.getLoad(
+            ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
+            false,
+            TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
         if (p.getNode())
-          DAG.AssignOrdering(p.getNode(), idx+1);
+          DAG.AssignOrdering(p.getNode(), idx + 1);
         InVals.push_back(p);
-      }
-      else {
+      } else {
         // If no ABI, just move the param symbol
         SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
         SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
         if (p.getNode())
-          DAG.AssignOrdering(p.getNode(), idx+1);
+          DAG.AssignOrdering(p.getNode(), idx + 1);
         InVals.push_back(p);
       }
       continue;
@@ -1058,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
       SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
       SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
       if (p.getNode())
-        DAG.AssignOrdering(p.getNode(), idx+1);
+        DAG.AssignOrdering(p.getNode(), idx + 1);
       if (isKernel)
         InVals.push_back(p);
       else {
-        SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-                    DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
-                                 p);
+        SDValue p2 = DAG.getNode(
+            ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+            DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
         InVals.push_back(p2);
       }
     } else {
       // Have to move a set of param symbols to registers and
       // store them locally and return the local pointer in InVals
       const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
-      assert(elemPtrType &&
-             "Byval parameter should be a pointer type");
+      assert(elemPtrType && "Byval parameter should be a pointer type");
       Type *elemType = elemPtrType->getElementType();
       // Compute the constituent parts
       SmallVector<EVT, 16> vtparts;
       SmallVector<uint64_t, 16> offsets;
       ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
       unsigned totalsize = 0;
-      for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
         totalsize += vtparts[j].getStoreSizeInBits();
-      SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
-                                      CreateStackObject(totalsize/8, 16, false),
-                                             getPointerTy());
+      SDValue localcopy = DAG.getFrameIndex(
+          MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
+          getPointerTy());
       unsigned sizesofar = 0;
       std::vector<SDValue> theChains;
-      for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
         unsigned numElems = 1;
-        if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
-        for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+        if (vtparts[j].isVector())
+          numElems = vtparts[j].getVectorNumElements();
+        for (unsigned k = 0, ke = numElems; k != ke; ++k) {
           EVT tmpvt = vtparts[j];
-          if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+          if (tmpvt.isVector())
+            tmpvt = tmpvt.getVectorElementType();
           SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
                                     getParamSymbol(DAG, idx, tmpvt));
-          SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
-                                    DAG.getConstant(sizesofar, getPointerTy()));
-          theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
-                                        MachinePointerInfo(), false, false, 0));
-          sizesofar += tmpvt.getStoreSizeInBits()/8;
+          SDValue addr =
+              DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+                          DAG.getConstant(sizesofar, getPointerTy()));
+          theChains.push_back(DAG.getStore(
+              Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
+          sizesofar += tmpvt.getStoreSizeInBits() / 8;
           ++idx;
         }
       }
@@ -1118,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   //}
 
   if (!OutChains.empty())
-    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                            &OutChains[0], OutChains.size()));
+    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
+                            OutChains.size()));
 
   return Chain;
 }
 
-SDValue
-NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-                                 bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerReturn(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+    SelectionDAG &DAG) const {
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
   unsigned sizesofar = 0;
   unsigned idx = 0;
-  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     SDValue theVal = OutVals[i];
     EVT theValType = theVal.getValueType();
     unsigned numElems = 1;
-    if (theValType.isVector()) numElems = theValType.getVectorNumElements();
-    for (unsigned j=0,je=numElems; j!=je; ++j) {
+    if (theValType.isVector())
+      numElems = theValType.getVectorNumElements();
+    for (unsigned j = 0, je = numElems; j != je; ++j) {
       SDValue tmpval = theVal;
       if (theValType.isVector())
         tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                             theValType.getVectorElementType(),
-                             tmpval, DAG.getIntPtrConstant(j));
-      Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
-          dl, MVT::Other,
-          Chain,
-          DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+                             theValType.getVectorElementType(), tmpval,
+                             DAG.getIntPtrConstant(j));
+      Chain = DAG.getNode(
+          isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
+          MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
           tmpval);
       if (theValType.isVector())
-        sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+        sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
       else
-        sizesofar += theValType.getStoreSizeInBits()/8;
+        sizesofar += theValType.getStoreSizeInBits() / 8;
       ++idx;
     }
   }
@@ -1162,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
 }
 
-void
-NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
-                                                  std::string &Constraint,
-                                                  std::vector<SDValue> &Ops,
-                                                  SelectionDAG &DAG) const
-{
+void NVPTXTargetLowering::LowerAsmOperandForConstraint(
+    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+    SelectionDAG &DAG) const {
   if (Constraint.length() > 1)
     return;
   else
@@ -1177,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 // NVPTX suuport vector of legal types of any length in Intrinsics because the
 // NVPTX specific type legalizer
 // will legalize them to the PTX supported length.
-bool
-NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   if (isTypeLegal(VT))
     return true;
   if (VT.isVector()) {
@@ -1189,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   return false;
 }
 
-
 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
 // TgtMemIntrinsic
 // because we need the information that is only available in the "Value" type
 // of destination
 // pointer. In particular, the address space information.
-bool
-NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
-                                        unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+    IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
   switch (Intrinsic) {
   default:
     return false;
@@ -1253,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
 /// Used to guide target specific optimizations, like loop strength reduction
 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
 /// (CodeGenPrepare.cpp)
-bool
-NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                           Type *Ty) const {
+bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                                Type *Ty) const {
 
   // AddrMode - This represents an addressing mode of:
   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
@@ -1273,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   }
 
   switch (AM.Scale) {
-  case 0:  // "r", "r+i" or "i" is allowed
+  case 0: // "r", "r+i" or "i" is allowed
     break;
   case 1:
-    if (AM.HasBaseReg)  // "r+r+i" or "r+r" is not allowed.
+    if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
       return false;
     // Otherwise we have r+i.
     break;
@@ -1313,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
-
-std::pair<unsigned, const TargetRegisterClass*>
+std::pair<unsigned, const TargetRegisterClass *>
 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                   EVT VT) const {
   if (Constraint.size() == 1) {
@@ -1337,9 +1441,253 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-
-
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
   return 4;
 }
+
+/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
+static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &Results) {
+  EVT ResVT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  assert(ResVT.isVector() && "Vector load must have vector type");
+
+  // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+  // legal.  We can (and should) split that into 2 loads of <2 x double> here
+  // but I'm leaving that as a TODO for now.
+  assert(ResVT.isSimple() && "Can only handle simple types");
+  switch (ResVT.getSimpleVT().SimpleTy) {
+  default:
+    return;
+  case MVT::v2i8:
+  case MVT::v2i16:
+  case MVT::v2i32:
+  case MVT::v2i64:
+  case MVT::v2f32:
+  case MVT::v2f64:
+  case MVT::v4i8:
+  case MVT::v4i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+    // This is a "native" vector type
+    break;
+  }
+
+  EVT EltVT = ResVT.getVectorElementType();
+  unsigned NumElts = ResVT.getVectorNumElements();
+
+  // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
+  // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
+  // loaded type to i16 and propogate the "real" type as the memory type.
+  bool NeedTrunc = false;
+  if (EltVT.getSizeInBits() < 16) {
+    EltVT = MVT::i16;
+    NeedTrunc = true;
+  }
+
+  unsigned Opcode = 0;
+  SDVTList LdResVTs;
+
+  switch (NumElts) {
+  default:
+    return;
+  case 2:
+    Opcode = NVPTXISD::LoadV2;
+    LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+    break;
+  case 4: {
+    Opcode = NVPTXISD::LoadV4;
+    EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+    LdResVTs = DAG.getVTList(ListVTs, 5);
+    break;
+  }
+  }
+
+  SmallVector<SDValue, 8> OtherOps;
+
+  // Copy regular operands
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    OtherOps.push_back(N->getOperand(i));
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+
+  // The select routine does not have access to the LoadSDNode instance, so
+  // pass along the extension information
+  OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+
+  SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+                                          OtherOps.size(), LD->getMemoryVT(),
+                                          LD->getMemOperand());
+
+  SmallVector<SDValue, 4> ScalarRes;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Res = NewLD.getValue(i);
+    if (NeedTrunc)
+      Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+    ScalarRes.push_back(Res);
+  }
+
+  SDValue LoadChain = NewLD.getValue(NumElts);
+
+  SDValue BuildVec =
+      DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+  Results.push_back(BuildVec);
+  Results.push_back(LoadChain);
+}
+
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &Results) {
+  SDValue Chain = N->getOperand(0);
+  SDValue Intrin = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Get the intrinsic ID
+  unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
+  switch (IntrinNo) {
+  default:
+    return;
+  case Intrinsic::nvvm_ldg_global_i:
+  case Intrinsic::nvvm_ldg_global_f:
+  case Intrinsic::nvvm_ldg_global_p:
+  case Intrinsic::nvvm_ldu_global_i:
+  case Intrinsic::nvvm_ldu_global_f:
+  case Intrinsic::nvvm_ldu_global_p: {
+    EVT ResVT = N->getValueType(0);
+
+    if (ResVT.isVector()) {
+      // Vector LDG/LDU
+
+      unsigned NumElts = ResVT.getVectorNumElements();
+      EVT EltVT = ResVT.getVectorElementType();
+
+      // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
+      // Therefore, we must ensure the type is legal.  For i1 and i8, we set the
+      // loaded type to i16 and propogate the "real" type as the memory type.
+      bool NeedTrunc = false;
+      if (EltVT.getSizeInBits() < 16) {
+        EltVT = MVT::i16;
+        NeedTrunc = true;
+      }
+
+      unsigned Opcode = 0;
+      SDVTList LdResVTs;
+
+      switch (NumElts) {
+      default:
+        return;
+      case 2:
+        switch (IntrinNo) {
+        default:
+          return;
+        case Intrinsic::nvvm_ldg_global_i:
+        case Intrinsic::nvvm_ldg_global_f:
+        case Intrinsic::nvvm_ldg_global_p:
+          Opcode = NVPTXISD::LDGV2;
+          break;
+        case Intrinsic::nvvm_ldu_global_i:
+        case Intrinsic::nvvm_ldu_global_f:
+        case Intrinsic::nvvm_ldu_global_p:
+          Opcode = NVPTXISD::LDUV2;
+          break;
+        }
+        LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+        break;
+      case 4: {
+        switch (IntrinNo) {
+        default:
+          return;
+        case Intrinsic::nvvm_ldg_global_i:
+        case Intrinsic::nvvm_ldg_global_f:
+        case Intrinsic::nvvm_ldg_global_p:
+          Opcode = NVPTXISD::LDGV4;
+          break;
+        case Intrinsic::nvvm_ldu_global_i:
+        case Intrinsic::nvvm_ldu_global_f:
+        case Intrinsic::nvvm_ldu_global_p:
+          Opcode = NVPTXISD::LDUV4;
+          break;
+        }
+        EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+        LdResVTs = DAG.getVTList(ListVTs, 5);
+        break;
+      }
+      }
+
+      SmallVector<SDValue, 8> OtherOps;
+
+      // Copy regular operands
+
+      OtherOps.push_back(Chain); // Chain
+                                 // Skip operand 1 (intrinsic ID)
+                                 // Others
+      for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
+        OtherOps.push_back(N->getOperand(i));
+
+      MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+      SDValue NewLD = DAG.getMemIntrinsicNode(
+          Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
+          MemSD->getMemoryVT(), MemSD->getMemOperand());
+
+      SmallVector<SDValue, 4> ScalarRes;
+
+      for (unsigned i = 0; i < NumElts; ++i) {
+        SDValue Res = NewLD.getValue(i);
+        if (NeedTrunc)
+          Res =
+              DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+        ScalarRes.push_back(Res);
+      }
+
+      SDValue LoadChain = NewLD.getValue(NumElts);
+
+      SDValue BuildVec =
+          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+      Results.push_back(BuildVec);
+      Results.push_back(LoadChain);
+    } else {
+      // i8 LDG/LDU
+      assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
+             "Custom handling of non-i8 ldu/ldg?");
+
+      // Just copy all operands as-is
+      SmallVector<SDValue, 4> Ops;
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        Ops.push_back(N->getOperand(i));
+
+      // Force output to i16
+      SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
+
+      MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+      // We make sure the memory type is i8, which will be used during isel
+      // to select the proper instruction.
+      SDValue NewLD =
+          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
+                                  Ops.size(), MVT::i8, MemSD->getMemOperand());
+
+      Results.push_back(NewLD.getValue(0));
+      Results.push_back(NewLD.getValue(1));
+    }
+  }
+  }
+}
+
+void NVPTXTargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  default:
+    report_fatal_error("Unhandled custom legalization");
+  case ISD::LOAD:
+    ReplaceLoadVector(N, DAG, Results);
+    return;
+  case ISD::INTRINSIC_W_CHAIN:
+    ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
+    return;
+  }
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 94a177ceb00a..3cd49d38af76 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -58,7 +58,16 @@ enum NodeType {
   RETURN,
   CallSeqBegin,
   CallSeqEnd,
-  Dummy
+  Dummy,
+
+  LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+  LoadV4,
+  LDGV2, // LDG.v2
+  LDGV4, // LDG.v4
+  LDUV2, // LDU.v2
+  LDUV4, // LDU.v4
+  StoreV2,
+  StoreV4
 };
 }
 
@@ -78,7 +87,7 @@ public:
 
   bool isTypeSupportedInIntrinsic(MVT VT) const;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           unsigned Intrinsic) const;
 
   /// isLegalAddressingMode - Return true if the addressing mode represented
@@ -92,18 +101,19 @@ public:
   virtual unsigned getFunctionAlignment(const Function *F) const;
 
   virtual EVT getSetCCResultType(EVT VT) const {
+    if (VT.isVector())
+      return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
     return MVT::i1;
   }
 
   ConstraintType getConstraintType(const std::string &Constraint) const;
-  std::pair<unsigned, const TargetRegisterClass*>
+  std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
-  virtual SDValue
-  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                       const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
-                       SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+  virtual SDValue LowerFormalArguments(
+      SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+      const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+      SmallVectorImpl<SDValue> &InVals) const;
 
   virtual SDValue
   LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
@@ -125,22 +135,29 @@ public:
   NVPTXTargetMachine *nvTM;
 
   // PTX always uses 32-bit shift amounts
-  virtual MVT getShiftAmountTy(EVT LHSTy) const {
-    return MVT::i32;
-  }
+  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+  virtual bool shouldSplitVectorElementType(EVT VT) const;
 
 private:
-  const NVPTXSubtarget &nvptxSubtarget;  // cache the subtarget here
+  const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
 
-  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
-                         MVT::i32) const;
+  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
+                     EVT = MVT::i32) const;
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
   SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
 
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
 
-  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+
+  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                  SelectionDAG &DAG) const;
 };
 } // namespace llvm
 
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cd50deb26a23..33a63c26f4e2 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -16,108 +16,62 @@
 #include "NVPTXTargetMachine.h"
 #define GET_INSTRINFO_CTOR
 #include "NVPTXGenInstrInfo.inc"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include <cstdio>
 
-
 using namespace llvm;
 
 // FIXME: Add the subtarget support on this constructor.
 NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
-: NVPTXGenInstrInfo(),
-  TM(tm),
-  RegInfo(*this, *TM.getSubtargetImpl()) {}
-
+    : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
 
-void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I, DebugLoc DL,
-                                  unsigned DestReg, unsigned SrcReg,
-                                  bool KillSrc) const {
+void NVPTXInstrInfo::copyPhysReg(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+    unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
   if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
       NVPTX::Int32RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
-      NVPTX::Int8RegsRegClass.contains(SrcReg))
+           NVPTX::Int8RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
-      NVPTX::Int1RegsRegClass.contains(SrcReg))
+           NVPTX::Int1RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
-      NVPTX::Float32RegsRegClass.contains(SrcReg))
+           NVPTX::Float32RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
-      NVPTX::Int16RegsRegClass.contains(SrcReg))
+           NVPTX::Int16RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
-      NVPTX::Int64RegsRegClass.contains(SrcReg))
+           NVPTX::Int64RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
-      NVPTX::Float64RegsRegClass.contains(SrcReg))
+           NVPTX::Float64RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
-      NVPTX::V4F32RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
-      NVPTX::V4I32RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
-      NVPTX::V2F32RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
-      NVPTX::V2I32RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
-      NVPTX::V4I8RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
-      NVPTX::V2I8RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
-      NVPTX::V4I16RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
-      NVPTX::V2I16RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
-      NVPTX::V2I64RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
-      NVPTX::V2F64RegsRegClass.contains(SrcReg))
-    BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else {
     llvm_unreachable("Don't know how to copy a register");
   }
 }
 
-bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
-                                 unsigned &SrcReg,
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
                                  unsigned &DestReg) const {
   // Look for the appropriate part of TSFlags
   bool isMove = false;
 
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
-      NVPTX::SimpleMoveShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
   isMove = (TSFlags == 1);
 
   if (isMove) {
@@ -134,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
   return false;
 }
 
-bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
-{
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
   switch (MI.getOpcode()) {
-  default: return false;
+  default:
+    return false;
   case NVPTX::INT_PTX_SREG_NTID_X:
   case NVPTX::INT_PTX_SREG_NTID_Y:
   case NVPTX::INT_PTX_SREG_NTID_Z:
@@ -155,12 +109,11 @@ bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
   }
 }
 
-
 bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
                                  unsigned &AddrSpace) const {
   bool isLoad = false;
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
-      NVPTX::isLoadShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
   isLoad = (TSFlags == 1);
   if (isLoad)
     AddrSpace = getLdStCodeAddrSpace(MI);
@@ -170,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
 bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
                                   unsigned &AddrSpace) const {
   bool isStore = false;
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
-      NVPTX::isStoreShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
   isStore = (TSFlags == 1);
   if (isStore)
     AddrSpace = getLdStCodeAddrSpace(MI);
   return isStore;
 }
 
-
 bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
   unsigned addrspace = 0;
   if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
@@ -192,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
   return true;
 }
 
-
 /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
 /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
 /// implemented for a target).  Upon success, this returns false and returns
@@ -216,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
 /// Note that RemoveBranch and InsertBranch must be implemented to support
 /// cases where this method returns success.
 ///
-bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
+bool NVPTXInstrInfo::AnalyzeBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+    SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -248,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   MachineInstr *SecondLastInst = I;
 
   // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
 
   // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
   if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
       LastInst->getOpcode() == NVPTX::GOTO) {
-    TBB =  SecondLastInst->getOperand(1).getMBB();
+    TBB = SecondLastInst->getOperand(1).getMBB();
     Cond.push_back(SecondLastInst->getOperand(0));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
@@ -278,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
 unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
+  if (I == MBB.begin())
+    return 0;
   --I;
   if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
     return 0;
@@ -288,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
   I = MBB.end();
 
-  if (I == MBB.begin()) return 1;
+  if (I == MBB.begin())
+    return 1;
   --I;
   if (I->getOpcode() != NVPTX::CBranch)
     return 1;
@@ -298,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-unsigned
-NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                             MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL) const {
+unsigned NVPTXInstrInfo::InsertBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -310,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
   // One-way branch.
   if (FBB == 0) {
-    if (Cond.empty())   // Unconditional branch
+    if (Cond.empty()) // Unconditional branch
       BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
-    else                // Conditional branch
-      BuildMI(&MBB, DL, get(NVPTX::CBranch))
-      .addReg(Cond[0].getReg()).addMBB(TBB);
+    else // Conditional branch
+      BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
+          .addMBB(TBB);
     return 1;
   }
 
   // Two-way Conditional Branch.
-  BuildMI(&MBB, DL, get(NVPTX::CBranch))
-  .addReg(Cond[0].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
   BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
   return 2;
 }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 7b8e218b05b6..b1972e9b7254 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -23,8 +23,7 @@
 
 namespace llvm {
 
-class NVPTXInstrInfo : public NVPTXGenInstrInfo
-{
+class NVPTXInstrInfo : public NVPTXGenInstrInfo {
   NVPTXTargetMachine &TM;
   const NVPTXRegisterInfo RegInfo;
 public:
@@ -50,30 +49,26 @@ public:
    *                               const TargetRegisterClass *RC) const;
    */
 
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const ;
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg,
+  virtual void copyPhysReg(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+      unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+  virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
                            unsigned &DestReg) const;
   bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isReadSpecialReg(MachineInstr &MI) const;
 
-  virtual bool CanTailMerge(const MachineInstr *MI) const ;
+  virtual bool CanTailMerge(const MachineInstr *MI) const;
   // Branch analysis.
-  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                             MachineBasicBlock *&FBB,
-                             SmallVectorImpl<MachineOperand> &Cond,
-                             bool AllowModify) const;
+  virtual bool AnalyzeBranch(
+      MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+      SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
   virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
-                                MachineBasicBlock *FBB,
-                                const SmallVectorImpl<MachineOperand> &Cond,
-                                DebugLoc DL) const;
+  virtual unsigned InsertBranch(
+      MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+      const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
   unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
-    return  MI.getOperand(2).getImm();
+    return MI.getOperand(2).getImm();
   }
 
 };
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8a410b872925..f43abe283b58 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
 def hasVote : Predicate<"Subtarget.hasVote()">;
 def hasDouble : Predicate<"Subtarget.hasDouble()">;
 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDG : Predicate<"Subtarget.hasLDG()">;
 def hasLDU : Predicate<"Subtarget.hasLDU()">;
 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
 
@@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
       i32imm:$fromWidth, Int32Regs:$addr),
 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
            "$fromWidth \t$dst, [$addr];"), []>;
+  def _areg_64 : NVPTXInst<(outs regclass:$dst),
+    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr),
+     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+                " \t$dst, [$addr];"), []>;
   def _ari : NVPTXInst<(outs regclass:$dst),
     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
            "$fromWidth \t$dst, [$addr+$offset];"), []>;
+  def _ari_64 : NVPTXInst<(outs regclass:$dst),
+    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+               " \t$dst, [$addr+$offset];"), []>;
   def _asi : NVPTXInst<(outs regclass:$dst),
     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
 defm LD_f64 : LD<Float64Regs>;
 }
 
-let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
-defm LD_v2i8 : LD<V2I8Regs>;
-defm LD_v4i8 : LD<V4I8Regs>;
-defm LD_v2i16 : LD<V2I16Regs>;
-defm LD_v4i16 : LD<V4I16Regs>;
-defm LD_v2i32 : LD<V2I32Regs>;
-defm LD_v4i32 : LD<V4I32Regs>;
-defm LD_v2f32 : LD<V2F32Regs>;
-defm LD_v4f32 : LD<V4F32Regs>;
-defm LD_v2i64 : LD<V2I64Regs>;
-defm LD_v2f64 : LD<V2F64Regs>;
-}
-
 multiclass ST<NVPTXRegClass regclass> {
   def _avar : NVPTXInst<(outs),
     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
@@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
            " \t[$addr], $src;"), []>;
+  def _areg_64 : NVPTXInst<(outs),
+    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
+  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+               "\t[$addr], $src;"), []>;
   def _ari : NVPTXInst<(outs),
     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
            " \t[$addr+$offset], $src;"), []>;
+  def _ari_64 : NVPTXInst<(outs),
+    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
+  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+               "\t[$addr+$offset], $src;"), []>;
   def _asi : NVPTXInst<(outs),
     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
       LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
@@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
 defm ST_f64 : ST<Float64Regs>;
 }
 
-let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
-defm ST_v2i8 : ST<V2I8Regs>;
-defm ST_v4i8 : ST<V4I8Regs>;
-defm ST_v2i16 : ST<V2I16Regs>;
-defm ST_v4i16 : ST<V4I16Regs>;
-defm ST_v2i32 : ST<V2I32Regs>;
-defm ST_v4i32 : ST<V4I32Regs>;
-defm ST_v2f32 : ST<V2F32Regs>;
-defm ST_v4f32 : ST<V4F32Regs>;
-defm ST_v2i64 : ST<V2I64Regs>;
-defm ST_v2f64 : ST<V2F64Regs>;
-}
-
 // The following is used only in and after vector elementizations.
 // Vector elementization happens at the machine instruction level, so the
 // following instruction
@@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
       i32imm:$fromWidth, Int32Regs:$addr),
     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+  def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr),
+    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
   def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+  def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
   def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
       i32imm:$fromWidth, Int32Regs:$addr),
     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+  def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+                               regclass:$dst3, regclass:$dst4),
+    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr),
+    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
   def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
       regclass:$dst4),
     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
                 []>;
+  def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+                              regclass:$dst3, regclass:$dst4),
+    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+    []>;
   def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
       regclass:$dst4),
     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+  def _v2_areg_64 : NVPTXInst<(outs),
+    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
+    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
   def _v2_ari : NVPTXInst<(outs),
     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
       i32imm:$offset),
     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+  def _v2_ari_64 : NVPTXInst<(outs),
+    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
+     i32imm:$offset),
+    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
   def _v2_asi : NVPTXInst<(outs),
     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
@@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
       i32imm:$fromWidth, Int32Regs:$addr),
     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+  def _v4_areg_64 : NVPTXInst<(outs),
+    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr),
+    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
   def _v4_ari : NVPTXInst<(outs),
     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
     []>;
+  def _v4_ari_64 : NVPTXInst<(outs),
+    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+     []>;
   def _v4_asi : NVPTXInst<(outs),
     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
                          "trap;",
                          [(trap)]>;
 
-include "NVPTXVector.td"
-
 include "NVPTXIntrinsics.td"
 
 
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 028a94bfd1bb..49e2568dfa2c 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
   : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
     Float32Regs>;
 
-// Vector ldu
-multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
-  NVPTXInst eleInst, NVPTXInst eleInst64> {
- def _32:    NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
-               !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
- Requires<[hasLDU]>;
- def _64:    NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
-               !strconcat("ldu.global.", TyStr),
-         [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
- Requires<[hasLDU]>;
+
+//-----------------------------------
+// Support for ldg on sm_35 or later 
+//-----------------------------------
+
+def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
+  MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+  return M->getMemoryVT() == MVT::i8;
+}]>;
+
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+         Requires<[hasLDG]>;
+ def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar:  NVPTXInst<(outs regclass:$result), (ins imem:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+        Requires<[hasLDG]>;
+ def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+               !strconcat("ld.global.nc.", TyStr),
+         [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+defm INT_PTX_LDG_GLOBAL_i8
+  : LDG_G_NOINTRIN<"u8 \t$result, [$src];",  Int16Regs, ldg_i8>;
+defm INT_PTX_LDG_GLOBAL_i16
+  : LDG_G<"u16 \t$result, [$src];", Int16Regs,   int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i32
+  : LDG_G<"u32 \t$result, [$src];", Int32Regs,   int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i64
+  : LDG_G<"u64 \t$result, [$src];", Int64Regs,   int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_f32
+  : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_f64
+  : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_p32
+  : LDG_G<"u32 \t$result, [$src];", Int32Regs,   int_nvvm_ldg_global_p>;
+defm INT_PTX_LDG_GLOBAL_p64
+  : LDG_G<"u64 \t$result, [$src];", Int64Regs,   int_nvvm_ldg_global_p>;
+
+// vector
+
+// Elementized vector ldg 
+multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32:     NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+                     (ins Int32Regs:$src),
+                     !strconcat("ld.global.nc.", TyStr), []>;
+ def _64:     NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+                     (ins Int64Regs:$src),
+                     !strconcat("ld.global.nc.", TyStr), []>;
 }
 
-let VecInstType=isVecLD.Value in {
-defm INT_PTX_LDU_G_v2i8  : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
-  V2I8Regs,  int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
-  INT_PTX_LDU_G_v2i8_ELE_64>;
-defm INT_PTX_LDU_G_v4i8  : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
-  V4I8Regs,  int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
-  INT_PTX_LDU_G_v4i8_ELE_64>;
-defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
-  V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
-  INT_PTX_LDU_G_v2i16_ELE_64>;
-defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
-  V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
-  INT_PTX_LDU_G_v4i16_ELE_64>;
-defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
-  V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
-  INT_PTX_LDU_G_v2i32_ELE_64>;
-defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
-  V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
-  INT_PTX_LDU_G_v4i32_ELE_64>;
-defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
-  V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
-  INT_PTX_LDU_G_v2f32_ELE_64>;
-defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
-  V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
-  INT_PTX_LDU_G_v4f32_ELE_64>;
-defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
-  V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
-  INT_PTX_LDU_G_v2i64_ELE_64>;
-defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
-  V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
-  INT_PTX_LDU_G_v2f64_ELE_64>;
+multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 
+ def _32:    NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+                        regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
+               !strconcat("ld.global.nc.", TyStr), []>;
+ def _64:    NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+                        regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
+               !strconcat("ld.global.nc.", TyStr), []>;
 }
 
+// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
+defm INT_PTX_LDG_G_v2i8_ELE
+  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
+defm INT_PTX_LDG_G_v2i16_ELE
+  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i32_ELE
+  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f32_ELE
+  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDG_G_v2i64_ELE
+  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDG_G_v2f64_ELE
+  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDG_G_v4i8_ELE
+  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i16_ELE
+  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i32_ELE
+  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f32_ELE
+  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
 
 
 multiclass NG_TO_G<string Str, Intrinsic Intrin> {
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 9273931e9919..7c257b4c6a89 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,31 +12,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXLowerAggrCopies.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/DataLayout.h"
 
 using namespace llvm;
 
-namespace llvm {
-FunctionPass *createLowerAggrCopies();
-}
+namespace llvm { FunctionPass *createLowerAggrCopies(); }
 
 char NVPTXLowerAggrCopies::ID = 0;
 
 // Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
-                                  Value *dstAddr, Value *len,
-                                  //unsigned numLoads,
-                                  bool srcVolatile, bool dstVolatile,
-                                  LLVMContext &Context, Function &F) {
+static void convertTransferToLoop(
+    Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
+    //unsigned numLoads,
+    bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
   Type *indType = len->getType();
 
   BasicBlock *origBB = splitAt->getParent();
@@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
 
   // srcAddr and dstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS =
-      dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS =
-      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
   srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   origBB->getTerminator()->setSuccessor(0, loopBB);
   IRBuilder<> builder(origBB, origBB->getTerminator());
 
-  unsigned dstAS =
-      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointer to the type of value getting stored
-  dstAddr = builder.CreateBitCast(dstAddr,
-                                  PointerType::get(val->getType(), dstAS));
+  dstAddr =
+      builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
 
   IRBuilder<> loop(loopBB);
   PHINode *ind = loop.CreatePHI(len->getType(), 0);
@@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     //BasicBlock *bb = BI;
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-        ++II) {
-      if (LoadInst * load = dyn_cast<LoadInst>(II)) {
+         ++II) {
+      if (LoadInst *load = dyn_cast<LoadInst>(II)) {
 
-        if (load->hasOneUse() == false) continue;
+        if (load->hasOneUse() == false)
+          continue;
 
-        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
+        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+          continue;
 
         User *use = *(load->use_begin());
-        if (StoreInst * store = dyn_cast<StoreInst>(use)) {
+        if (StoreInst *store = dyn_cast<StoreInst>(use)) {
           if (store->getOperand(0) != load) //getValueOperand
-          continue;
+            continue;
           aggrLoads.push_back(load);
         }
-      } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
+      } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
         Value *len = intr->getLength();
         // If the number of elements being copied is greater
         // than MaxAggrCopySize, lower it to a loop
-        if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
+        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
           if (len_int->getZExtValue() >= MaxAggrCopySize) {
             aggrMemcpys.push_back(intr);
           }
@@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
           // turn variable length memcpy/memmov into loop
           aggrMemcpys.push_back(intr);
         }
-      } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
+      } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
         Value *len = memsetintr->getLength();
-        if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
+        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
           if (len_int->getZExtValue() >= MaxAggrCopySize) {
             aggrMemsets.push_back(memsetintr);
           }
@@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
       }
     }
   }
-  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
-      && (aggrMemsets.size() == 0)) return false;
+  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
+      (aggrMemsets.size() == 0))
+    return false;
 
   //
   // Do the transformation of an aggr load/copy/set to a loop
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index b150c69815dd..286e753fa92b 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,9 +15,9 @@
 #ifndef NVPTX_LOWER_AGGR_COPIES_H
 #define NVPTX_LOWER_AGGR_COPIES_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h
index b4a4dbce98a9..a95c16b1e67e 100644
--- a/lib/Target/NVPTX/NVPTXNumRegisters.h
+++ b/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -11,10 +11,6 @@
 #ifndef NVPTX_NUM_REGISTERS_H
 #define NVPTX_NUM_REGISTERS_H
 
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = 396;
-
-}
+namespace llvm { const unsigned NVPTXNumRegisters = 396; }
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index e3cd46f063bf..282465359b07 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -13,260 +13,88 @@
 
 #define DEBUG_TYPE "nvptx-reg-info"
 
-#include "NVPTX.h"
 #include "NVPTXRegisterInfo.h"
+#include "NVPTX.h"
 #include "NVPTXSubtarget.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
-
 using namespace llvm;
 
-namespace llvm
-{
-std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
+namespace llvm {
+std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float32RegsRegClass) {
     return ".f32";
   }
   if (RC == &NVPTX::Float64RegsRegClass) {
     return ".f64";
-  }
-  else if (RC == &NVPTX::Int64RegsRegClass) {
+  } else if (RC == &NVPTX::Int64RegsRegClass) {
     return ".s64";
-  }
-  else if (RC == &NVPTX::Int32RegsRegClass) {
+  } else if (RC == &NVPTX::Int32RegsRegClass) {
     return ".s32";
-  }
-  else if (RC == &NVPTX::Int16RegsRegClass) {
+  } else if (RC == &NVPTX::Int16RegsRegClass) {
     return ".s16";
   }
-  // Int8Regs become 16-bit registers in PTX
-  else if (RC == &NVPTX::Int8RegsRegClass) {
+      // Int8Regs become 16-bit registers in PTX
+      else if (RC == &NVPTX::Int8RegsRegClass) {
     return ".s16";
-  }
-  else if (RC == &NVPTX::Int1RegsRegClass) {
+  } else if (RC == &NVPTX::Int1RegsRegClass) {
     return ".pred";
-  }
-  else if (RC == &NVPTX::SpecialRegsRegClass) {
+  } else if (RC == &NVPTX::SpecialRegsRegClass) {
     return "!Special!";
-  }
-  else if (RC == &NVPTX::V2F32RegsRegClass) {
-    return ".v2.f32";
-  }
-  else if (RC == &NVPTX::V4F32RegsRegClass) {
-    return ".v4.f32";
-  }
-  else if (RC == &NVPTX::V2I32RegsRegClass) {
-    return ".v2.s32";
-  }
-  else if (RC == &NVPTX::V4I32RegsRegClass) {
-    return ".v4.s32";
-  }
-  else if (RC == &NVPTX::V2F64RegsRegClass) {
-    return ".v2.f64";
-  }
-  else if (RC == &NVPTX::V2I64RegsRegClass) {
-    return ".v2.s64";
-  }
-  else if (RC == &NVPTX::V2I16RegsRegClass) {
-    return ".v2.s16";
-  }
-  else if (RC == &NVPTX::V4I16RegsRegClass) {
-    return ".v4.s16";
-  }
-  else if (RC == &NVPTX::V2I8RegsRegClass) {
-    return ".v2.s16";
-  }
-  else if (RC == &NVPTX::V4I8RegsRegClass) {
-    return ".v4.s16";
-  }
-  else {
+  } else {
     return "INTERNAL";
   }
   return "";
 }
 
-std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
+std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float32RegsRegClass) {
     return "%f";
   }
   if (RC == &NVPTX::Float64RegsRegClass) {
     return "%fd";
-  }
-  else if (RC == &NVPTX::Int64RegsRegClass) {
+  } else if (RC == &NVPTX::Int64RegsRegClass) {
     return "%rd";
-  }
-  else if (RC == &NVPTX::Int32RegsRegClass) {
+  } else if (RC == &NVPTX::Int32RegsRegClass) {
     return "%r";
-  }
-  else if (RC == &NVPTX::Int16RegsRegClass) {
+  } else if (RC == &NVPTX::Int16RegsRegClass) {
     return "%rs";
-  }
-  else if (RC == &NVPTX::Int8RegsRegClass) {
+  } else if (RC == &NVPTX::Int8RegsRegClass) {
     return "%rc";
-  }
-  else if (RC == &NVPTX::Int1RegsRegClass) {
+  } else if (RC == &NVPTX::Int1RegsRegClass) {
     return "%p";
-  }
-  else if (RC == &NVPTX::SpecialRegsRegClass) {
+  } else if (RC == &NVPTX::SpecialRegsRegClass) {
     return "!Special!";
-  }
-  else if (RC == &NVPTX::V2F32RegsRegClass) {
-    return "%v2f";
-  }
-  else if (RC == &NVPTX::V4F32RegsRegClass) {
-    return "%v4f";
-  }
-  else if (RC == &NVPTX::V2I32RegsRegClass) {
-    return "%v2r";
-  }
-  else if (RC == &NVPTX::V4I32RegsRegClass) {
-    return "%v4r";
-  }
-  else if (RC == &NVPTX::V2F64RegsRegClass) {
-    return "%v2fd";
-  }
-  else if (RC == &NVPTX::V2I64RegsRegClass) {
-    return "%v2rd";
-  }
-  else if (RC == &NVPTX::V2I16RegsRegClass) {
-    return "%v2s";
-  }
-  else if (RC == &NVPTX::V4I16RegsRegClass) {
-    return "%v4rs";
-  }
-  else if (RC == &NVPTX::V2I8RegsRegClass) {
-    return "%v2rc";
-  }
-  else if (RC == &NVPTX::V4I8RegsRegClass) {
-    return "%v4rc";
-  }
-  else {
+  } else {
     return "INTERNAL";
   }
   return "";
 }
-
-bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
-  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
-    return true;
-  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
-    return true;
-  return false;
-}
-
-std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
-  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
-  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
-  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
-  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
-  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
-  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
-  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
-  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
-  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
-  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
-    return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
-  llvm_unreachable("Not a vector register class");
-}
-
-const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
-  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
-    return (&NVPTX::Float32RegsRegClass);
-  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
-    return (&NVPTX::Float64RegsRegClass);
-  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
-    return (&NVPTX::Int16RegsRegClass);
-  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
-    return (&NVPTX::Int32RegsRegClass);
-  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
-    return (&NVPTX::Int64RegsRegClass);
-  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
-    return (&NVPTX::Int8RegsRegClass);
-  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
-    return (&NVPTX::Float32RegsRegClass);
-  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
-    return (&NVPTX::Int16RegsRegClass);
-  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
-    return (&NVPTX::Int32RegsRegClass);
-  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
-    return (&NVPTX::Int8RegsRegClass);
-  llvm_unreachable("Not a vector register class");
-}
-
-int getNVPTXVectorSize(TargetRegisterClass const *RC) {
-  if (RC->getID() == NVPTX::V2F32RegsRegClassID)
-    return 2;
-  if (RC->getID() == NVPTX::V2F64RegsRegClassID)
-    return 2;
-  if (RC->getID() == NVPTX::V2I16RegsRegClassID)
-    return 2;
-  if (RC->getID() == NVPTX::V2I32RegsRegClassID)
-    return 2;
-  if (RC->getID() == NVPTX::V2I64RegsRegClassID)
-    return 2;
-  if (RC->getID() == NVPTX::V2I8RegsRegClassID)
-    return 2;
-  if (RC->getID() == NVPTX::V4F32RegsRegClassID)
-    return 4;
-  if (RC->getID() == NVPTX::V4I16RegsRegClassID)
-    return 4;
-  if (RC->getID() == NVPTX::V4I32RegsRegClassID)
-    return 4;
-  if (RC->getID() == NVPTX::V4I8RegsRegClassID)
-    return 4;
-  llvm_unreachable("Not a vector register class");
-}
 }
 
 NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
                                      const NVPTXSubtarget &st)
-  : NVPTXGenRegisterInfo(0),
-    Is64Bit(st.is64Bit()) {}
+    : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
 
 #define GET_REGINFO_TARGET_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
 /// NVPTX Callee Saved Registers
-const uint16_t* NVPTXRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
+const uint16_t *
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const uint16_t CalleeSavedRegs[] = { 0 };
   return CalleeSavedRegs;
 }
 
 // NVPTX Callee Saved Reg Classes
-const TargetRegisterClass* const*
+const TargetRegisterClass *const *
 NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+  static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
   return CalleeSavedRegClasses;
 }
 
@@ -275,34 +103,24 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void NVPTXRegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II,
-                    int SPAdj,
-                    RegScavenger *RS) const {
+void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, unsigned FIOperandNum,
+                                            RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
-  unsigned i = 0;
   MachineInstr &MI = *II;
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   MachineFunction &MF = *MI.getParent()->getParent();
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-      MI.getOperand(i+1).getImm();
+               MI.getOperand(FIOperandNum + 1).getImm();
 
   // Using I0 as the frame pointer
-  MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
-  MI.getOperand(i+1).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-
-int NVPTXRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return 0;
 }
 
@@ -310,16 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return NVPTX::VRFrame;
 }
 
-unsigned NVPTXRegisterInfo::getRARegister() const {
-  return 0;
-}
-
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void NVPTXRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  // Simply discard ADJCALLSTACKDOWN,
-  // ADJCALLSTACKUP instructions.
-  MBB.erase(I);
-}
+unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 595178335ae2..d40682066142 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -17,7 +17,6 @@
 #include "ManagedStringPool.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
-
 #define GET_REGINFO_HEADER
 #include "NVPTXGenRegisterInfo.inc"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -33,34 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
 private:
   bool Is64Bit;
   // Hold Strings that can be free'd all together with NVPTXRegisterInfo
-  ManagedStringPool     ManagedStrPool;
+  ManagedStringPool ManagedStrPool;
 
 public:
-  NVPTXRegisterInfo(const TargetInstrInfo &tii,
-                    const NVPTXSubtarget &st);
-
+  NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
 
   //------------------------------------------------------
   // Pure virtual functions from TargetRegisterInfo
   //------------------------------------------------------
 
   // NVPTX callee saved registers
-  virtual const uint16_t*
+  virtual const uint16_t *
   getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   // NVPTX callee saved register classes
-  virtual const TargetRegisterClass* const *
+  virtual const TargetRegisterClass *const *
   getCalleeSavedRegClasses(const MachineFunction *MF) const;
 
   virtual BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                                   int SPAdj,
-                                   RegScavenger *RS=NULL) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                                   unsigned FIOperandNum,
+                                   RegScavenger *RS = NULL) const;
 
   virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
   virtual unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -78,15 +71,9 @@ public:
 
 };
 
-
-std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
-std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
-bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
-std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
-int getNVPTXVectorSize (const TargetRegisterClass *RC);
-const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
 
 } // end namespace llvm
 
-
 #endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index ba158258b994..8d100d631683 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -37,9 +37,6 @@ foreach i = 0-395 in {
   def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
   def F#i  : NVPTXReg<"%f"#i>;  // 32-bit float
   def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
-  // Vectors
-  foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
-    def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
 
   // Arguments
   def ia#i : NVPTXReg<"%ia"#i>;
@@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
 
 // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
 def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
-                       NVPTXRegClass sClass,
-                       int e,
-                       string n>
-  : NVPTXRegClass<regTypes, alignment, regList>
-{
-  NVPTXRegClass scalarClass=sClass;
-  int elems=e;
-  string name=n;
-}
-def V2F32Regs
-  : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
-    Float32Regs, 2, ".v2.f32">;
-def V4F32Regs
-  : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
-    Float32Regs, 4, ".v4.f32">;
-def V2I32Regs
-  : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
-    Int32Regs, 2, ".v2.u32">;
-def V4I32Regs
-  : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
-    Int32Regs, 4, ".v4.u32">;
-def V2F64Regs
-  : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
-    Float64Regs, 2, ".v2.f64">;
-def V2I64Regs
-  : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
-    Int64Regs, 2, ".v2.u64">;
-def V2I16Regs
-  : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
-    Int16Regs, 2, ".v2.u16">;
-def V4I16Regs
-  : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
-    Int16Regs, 4, ".v4.u16">;
-def V2I8Regs
-  : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
-    Int8Regs, 2, ".v2.u8">;
-def V4I8Regs
-  : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
-    Int8Regs, 4, ".v4.u8">;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f1ca466266f6..e166be5a68e4 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_NVPTXSECTION_H
 #define LLVM_NVPTXSECTION_H
 
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCSection.h"
-#include "llvm/GlobalVariable.h"
 #include <vector>
 
 namespace llvm {
@@ -38,6 +38,8 @@ public:
   virtual bool isBaseAddressKnownZero() const { return true; }
   virtual bool UseCodeAlign() const { return false; }
   virtual bool isVirtualSection() const { return false; }
+  virtual std::string getLabelBeginName() const { return ""; }
+  virtual std::string getLabelEndName() const { return ""; }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index 2836cad4f021..83dfe120899a 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -11,19 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Support/InstIterator.h"
-#include "NVPTXUtilities.h"
 #include "NVPTXSplitBBatBar.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/InstIterator.h"
 
 using namespace llvm;
 
-namespace llvm {
-FunctionPass *createSplitBBatBarPass();
-}
+namespace llvm { FunctionPass *createSplitBBatBarPass(); }
 
 char NVPTXSplitBBatBar::ID = 0;
 
@@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
 // This interface will most likely not be necessary, because this pass will
 // not be invoked by the driver, but will be used as a prerequisite to
 // another pass.
-FunctionPass *llvm::createSplitBBatBarPass() {
-  return new NVPTXSplitBBatBar();
-}
+FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
index 9e4d5a066d4c..bdafba9075a0 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.h
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
@@ -15,8 +15,8 @@
 #ifndef NVPTX_SPLIT_BB_AT_BAR_H
 #define NVPTX_SPLIT_BB_AT_BAR_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7b62cce2c65c..2dcd73dcff9c 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -22,27 +22,23 @@ using namespace llvm;
 // Select Driver Interface
 #include "llvm/Support/CommandLine.h"
 namespace {
-cl::opt<NVPTX::DrvInterface>
-DriverInterface(cl::desc("Choose driver interface:"),
-                cl::values(
-                    clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
-                    clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
-                    clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
-                    clEnumValEnd),
-                    cl::init(NVPTX::NVCL));
+cl::opt<NVPTX::DrvInterface> DriverInterface(
+    cl::desc("Choose driver interface:"),
+    cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+               clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+               clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
+    cl::init(NVPTX::NVCL));
 }
 
 NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
                                const std::string &FS, bool is64Bit)
-: NVPTXGenSubtargetInfo(TT, CPU, FS),
-  Is64Bit(is64Bit),
-  PTXVersion(0),
-  SmVersion(10) {
+    : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
+      SmVersion(20) {
 
   drvInterface = DriverInterface;
 
   // Provide the default CPU if none
-  std::string defCPU = "sm_10";
+  std::string defCPU = "sm_20";
 
   ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
 
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 3cfd9718e541..670077daaa69 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef NVPTXSUBTARGET_H
 #define NVPTXSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "NVPTX.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 #define GET_SUBTARGETINFO_HEADER
 #include "NVPTXGenSubtargetInfo.inc"
@@ -25,7 +25,7 @@
 namespace llvm {
 
 class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-  
+
   std::string TargetName;
   NVPTX::DrvInterface drvInterface;
   bool Is64Bit;
@@ -57,16 +57,14 @@ public:
   bool hasF32FTZ() const { return SmVersion >= 20; }
   bool hasFMAF32() const { return SmVersion >= 20; }
   bool hasFMAF64() const { return SmVersion >= 13; }
+  bool hasLDG() const { return SmVersion >= 32; }
   bool hasLDU() const { return SmVersion >= 20; }
   bool hasGenericLdSt() const { return SmVersion >= 20; }
   inline bool hasHWROT32() const { return false; }
-  inline bool hasSWROT32() const {
-    return true;
-  }
-  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
+  inline bool hasSWROT32() const { return true; }
+  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
   inline bool hasROT64() const { return SmVersion >= 20; }
 
-
   bool is64Bit() const { return Is64Bit; }
 
   unsigned int getSmVersion() const { return SmVersion; }
@@ -95,4 +93,4 @@ public:
 
 } // End llvm namespace
 
-#endif  // NVPTXSUBTARGET_H
+#endif // NVPTXSUBTARGET_H
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index cbb490003d37..67ca6b58e5a6 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -12,27 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXTargetMachine.h"
-#include "NVPTX.h"
-#include "NVPTXSplitBBatBar.h"
-#include "NVPTXLowerAggrCopies.h"
 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTX.h"
 #include "NVPTXAllocaHoisting.h"
-#include "llvm/PassManager.h"
+#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXSplitBBatBar.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -41,14 +44,12 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
-
 
 using namespace llvm;
 
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry&);
+}
 
 extern "C" void LLVMInitializeNVPTXTarget() {
   // Register the target.
@@ -58,53 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
   RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
 
+  // FIXME: This pass is really intended to be invoked during IR optimization,
+  // but it's very NVPTX-specific.
+  initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
 }
 
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
-                                       StringRef TT,
-                                       StringRef CPU,
-                                       StringRef FS,
-                                       const TargetOptions& Options,
-                                       Reloc::Model RM,
-                                       CodeModel::Model CM,
-                                       CodeGenOpt::Level OL,
-                                       bool is64bit)
-: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-  Subtarget(TT, CPU, FS, is64bit),
-  DL(Subtarget.getDataLayout()),
-  InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit),
-  STTI(&TLInfo), VTTI(&TLInfo)
-/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
-}
-
-
+NVPTXTargetMachine::NVPTXTargetMachine(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL, bool is64bit)
+    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+      Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
+      InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+      FrameLowering(
+          *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
 
 void NVPTXTargetMachine32::anchor() {}
 
-NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
-                                           StringRef CPU, StringRef FS,
-                                           const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
+NVPTXTargetMachine32::NVPTXTargetMachine32(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void NVPTXTargetMachine64::anchor() {}
 
-NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
-                                           StringRef CPU, StringRef FS,
-                                           const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
-
+NVPTXTargetMachine64::NVPTXTargetMachine64(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 namespace llvm {
 class NVPTXPassConfig : public TargetPassConfig {
 public:
   NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
-  : TargetPassConfig(TM, PM) {}
+      : TargetPassConfig(TM, PM) {}
 
   NVPTXTargetMachine &getNVPTXTargetMachine() const {
     return getTM<NVPTXTargetMachine>();
@@ -125,10 +115,7 @@ bool NVPTXPassConfig::addInstSelector() {
   addPass(createSplitBBatBarPass());
   addPass(createAllocaHoisting());
   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
-  addPass(createVectorElementizePass(getNVPTXTargetMachine()));
   return false;
 }
 
-bool NVPTXPassConfig::addPreRegAlloc() {
-  return false;
-}
+bool NVPTXPassConfig::addPreRegAlloc() { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 11bc9d4fa698..5fbcf735b48f 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,77 +11,64 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef NVPTX_TARGETMACHINE_H
 #define NVPTX_TARGETMACHINE_H
 
-#include "NVPTXInstrInfo.h"
+#include "ManagedStringPool.h"
+#include "NVPTXFrameLowering.h"
 #include "NVPTXISelLowering.h"
+#include "NVPTXInstrInfo.h"
 #include "NVPTXRegisterInfo.h"
 #include "NVPTXSubtarget.h"
-#include "NVPTXFrameLowering.h"
-#include "ManagedStringPool.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
 
 /// NVPTXTargetMachine
 ///
 class NVPTXTargetMachine : public LLVMTargetMachine {
-  NVPTXSubtarget        Subtarget;
-  const DataLayout      DL;       // Calculates type size & alignment
-  NVPTXInstrInfo        InstrInfo;
-  NVPTXTargetLowering   TLInfo;
-  TargetSelectionDAGInfo   TSInfo;
+  NVPTXSubtarget Subtarget;
+  const DataLayout DL; // Calculates type size & alignment
+  NVPTXInstrInfo InstrInfo;
+  NVPTXTargetLowering TLInfo;
+  TargetSelectionDAGInfo TSInfo;
 
   // NVPTX does not have any call stack frame, but need a NVPTX specific
   // FrameLowering class because TargetFrameLowering is abstract.
-  NVPTXFrameLowering       FrameLowering;
+  NVPTXFrameLowering FrameLowering;
 
   // Hold Strings that can be free'd all together with NVPTXTargetMachine
-  ManagedStringPool     ManagedStrPool;
-
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
+  ManagedStringPool ManagedStrPool;
 
   //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
   //                            bool DisableVerify, MCContext *&OutCtx);
 
 public:
-  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                     StringRef FS, const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OP,
-                     bool is64bit);
+  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options, Reloc::Model RM,
+                     CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
   }
-  virtual const NVPTXInstrInfo *getInstrInfo() const  { return &InstrInfo; }
-  virtual const DataLayout *getDataLayout() const     { return &DL;}
-  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
+  virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const DataLayout *getDataLayout() const { return &DL; }
+  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
 
   virtual const NVPTXRegisterInfo *getRegisterInfo() const {
     return &(InstrInfo.getRegisterInfo());
   }
 
   virtual NVPTXTargetLowering *getTargetLowering() const {
-    return const_cast<NVPTXTargetLowering*>(&TLInfo);
+    return const_cast<NVPTXTargetLowering *>(&TLInfo);
   }
 
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
     return &TSInfo;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
 
   //virtual bool addInstSelector(PassManagerBase &PM,
   //                             CodeGenOpt::Level OptLevel);
@@ -89,22 +76,19 @@ public:
   //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
 
   ManagedStringPool *getManagedStrPool() const {
-    return const_cast<ManagedStringPool*>(&ManagedStrPool);
+    return const_cast<ManagedStringPool *>(&ManagedStrPool);
   }
 
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
   // Emission of machine code through JITCodeEmitter is not supported.
-  virtual bool addPassesToEmitMachineCode(PassManagerBase &,
-                                          JITCodeEmitter &,
+  virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
                                           bool = true) {
     return true;
   }
 
   // Emission of machine code through MCJIT is not supported.
-  virtual bool addPassesToEmitMC(PassManagerBase &,
-                                 MCContext *&,
-                                 raw_ostream &,
+  virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
                                  bool = true) {
     return true;
   }
@@ -129,7 +113,6 @@ public:
                        CodeGenOpt::Level OL);
 };
 
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index b5698a2fc08f..6ab0e08ad091 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -46,45 +46,43 @@ public:
   }
 
   virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
-    TextSection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getText());
-    DataSection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getDataRel());
-    BSSSection = new NVPTXSection(MCSection::SV_ELF,
-                                  SectionKind::getBSS());
-    ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getReadOnly());
+    TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
+    DataSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+    BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
+    ReadOnlySection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
 
-    StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    LSDASection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getMetadata());
-    EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
-                                      SectionKind::getMetadata());
-    DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
-                                          SectionKind::getMetadata());
-    DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
-                                        SectionKind::getMetadata());
-    DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
-                                        SectionKind::getMetadata());
-    DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
-                                            SectionKind::getMetadata());
-    DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
-                                               SectionKind::getMetadata());
-    DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getMetadata());
-    DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getMetadata());
-    DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
-                                           SectionKind::getMetadata());
-    DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
-                                          SectionKind::getMetadata());
-    DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
-                                             SectionKind::getMetadata());
+    StaticCtorSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    StaticDtorSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    LSDASection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    EHFrameSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfAbbrevSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfInfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfLineSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfFrameSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfPubTypesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfDebugInlineSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfStrSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfLocSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfARangesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfRangesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfMacroInfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
   }
 
   virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
@@ -93,8 +91,7 @@ public:
 
   virtual const MCSection *
   getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                           Mangler *Mang,
-                           const TargetMachine &TM) const {
+                           Mangler *Mang, const TargetMachine &TM) const {
     return DataSection;
   }
 
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 3f52251cc1b2..6786eb02240c 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -12,11 +12,11 @@
 
 #include "NVPTXUtilities.h"
 #include "NVPTX.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Constants.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include <algorithm>
 #include <cstring>
 #include <map>
@@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
 
 ManagedStatic<per_module_annot_t> annotationCache;
 
-
 static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
   assert(md && "Invalid mdnode for annotation");
   assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
@@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
     assert(prop && "Annotation property not a string");
 
     // value
-    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
+    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
     assert(Val && "Value operand not a constant int");
 
     std::string keyname = prop->getString().str();
@@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
 bool llvm::isTexture(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a texture symbol");
       return true;
     }
@@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) {
 bool llvm::isSurface(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a surface symbol");
       return true;
     }
@@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) {
 bool llvm::isSampler(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a sampler symbol");
       return true;
     }
@@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(func,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-                                   annot)) {
+    if (llvm::findAllNVVMAnnotation(
+            func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+            annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
     if (llvm::findAllNVVMAnnotation(func,
-          llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
-                                   annot)) {
+                                    llvm::PropertyAnnotationNames[
+                                        llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+                                    annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
     if (llvm::findAllNVVMAnnotation(func,
-         llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
-                                   annot)) {
+                                    llvm::PropertyAnnotationNames[
+                                        llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+                                    annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) {
 }
 
 bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
 }
 
 bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
-                                      y));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
 }
 
 bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
-                                      z));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
 }
 
 bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
 }
 
 bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
-                                      y));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
 }
 
 bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
-                                      z));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
 }
 
 bool llvm::getMinCTASm(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                    llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
 }
 
 bool llvm::isKernelFunction(const Function &F) {
   unsigned x = 0;
-  bool retval = llvm::findOneNVVMAnnotation(&F,
-               llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
-                                            x);
+  bool retval = llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
   if (retval == false) {
     // There is no NVVM metadata, check the calling convention
     if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
@@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) {
     else
       return false;
   }
-  return (x==1);
+  return (x == 1);
 }
 
 bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
   std::vector<unsigned> Vs;
-  bool retval = llvm::findAllNVVMAnnotation(&F,
-                           llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
-                                            Vs);
+  bool retval = llvm::findAllNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
   if (retval == false)
     return false;
-  for (int i=0, e=Vs.size(); i<e; i++) {
+  for (int i = 0, e = Vs.size(); i < e; i++) {
     unsigned v = Vs[i];
-    if ( (v >> 16) == index ) {
-      align =  v & 0xFFFF;
+    if ((v >> 16) == index) {
+      align = v & 0xFFFF;
       return true;
     }
   }
@@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
 
 bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
   if (MDNode *alignNode = I.getMetadata("callalign")) {
-    for (int i=0, n = alignNode->getNumOperands();
-        i<n; i++) {
+    for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
       if (const ConstantInt *CI =
-          dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+              dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
         unsigned v = CI->getZExtValue();
-        if ( (v>>16) == index ) {
+        if ((v >> 16) == index) {
           align = v & 0xFFFF;
           return true;
         }
-        if ( (v>>16) > index ) {
+        if ((v >> 16) > index) {
           return false;
         }
       }
@@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
 // consider several special intrinsics in striping pointer casts, and
 // provide an option to ignore GEP indicies for find out the base address only
 // which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
-                                       bool ignore_GEP_indices) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
   V = V->stripPointerCasts();
   while (true) {
     if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
@@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V,
 // - ignore GEP indicies for find out the base address only, and
 // - tracking PHINode
 // which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
-                                       std::set<const Value *> &processed) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
   if (processed.find(V) != processed.end())
     return NULL;
   processed.insert(V);
@@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V,
   return V;
 }
 
-
 // The following are some useful utilities for debuggung
 
 BasicBlock *llvm::getParentBlock(Value *v) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index fe6ad559e9df..a208004297d0 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -14,17 +14,16 @@
 #ifndef NVPTXUTILITIES_H
 #define NVPTXUTILITIES_H
 
-#include "llvm/Value.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
 #include <cstdarg>
 #include <set>
 #include <string>
 #include <vector>
 
-namespace llvm
-{
+namespace llvm {
 
 #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
 #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
@@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID);
 /// to pass into type construction of CallInst ctors.  This turns a null
 /// terminated list of pointers (or other value types) into a real live vector.
 ///
-template<typename T>
-inline std::vector<T> make_vector(T A, ...) {
+template <typename T> inline std::vector<T> make_vector(T A, ...) {
   va_list Args;
   va_start(Args, A);
   std::vector<T> Result;
@@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) {
 
 bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
 const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *skipPointerTransfer(const Value *V,
-                                 std::set<const Value *> &processed);
+const Value *
+skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
 BasicBlock *getParentBlock(Value *v);
 Function *getParentFunction(Value *v);
 void dumpBlock(Value *v, char *blockName);
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
index 6a0e5328f62f..5f074b33a2d4 100644
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ b/lib/Target/NVPTX/NVPTXutil.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
 
 namespace llvm {
 
-bool isParamLoad(const MachineInstr *MI)
-{
+bool isParamLoad(const MachineInstr *MI) {
   if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
       (MI->getOpcode() != NVPTX::LD_i64_avar))
     return false;
@@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI)
   return true;
 }
 
-#define DATA_MASK     0x7f
-#define DIGIT_WIDTH   7
-#define MORE_BYTES    0x80
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
 
-static int encode_leb128(uint64_t val, int *nbytes,
-                         char *space, int splen)
-{
+static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
   char *a;
   char *end = space + splen;
 
@@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes,
 #undef DIGIT_WIDTH
 #undef MORE_BYTES
 
-uint64_t encode_leb128(const char *str)
-{
-  union { uint64_t x; char a[8]; } temp64;
+uint64_t encode_leb128(const char *str) {
+  union {
+    uint64_t x;
+    char a[8];
+  } temp64;
 
   temp64.x = 0;
 
-  for (unsigned i=0,e=strlen(str); i!=e; ++i)
-    temp64.a[i] = str[e-1-i];
+  for (unsigned i = 0, e = strlen(str); i != e; ++i)
+    temp64.a[i] = str[e - 1 - i];
 
   char encoded[16];
   int nbytes;
 
   int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
 
-  (void)retval;
-  assert(retval == 0 &&
-         "Encoding to leb128 failed");
+  (void) retval;
+  assert(retval == 0 && "Encoding to leb128 failed");
 
   assert(nbytes <= 8 &&
          "Cannot support register names with leb128 encoding > 8 bytes");
 
   temp64.x = 0;
-  for (int i=0; i<nbytes; ++i)
+  for (int i = 0; i < nbytes; ++i)
     temp64.a[i] = encoded[i];
 
   return temp64.x;
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
new file mode 100644
index 000000000000..0ad62ce39b0d
--- /dev/null
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -0,0 +1,177 @@
+//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces occurences of __nvvm_reflect("string") with an
+// integer based on -nvvm-reflect-list string=<int> option given to this pass.
+// If an undefined string value is seen in a call to __nvvm_reflect("string"),
+// a default value of 0 will be used.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+
+using namespace llvm;
+
+namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+private:
+  StringMap<int> VarMap;
+  typedef DenseMap<std::string, int>::iterator VarMapIter;
+  Function *ReflectFunction;
+
+public:
+  static char ID;
+  NVVMReflect() : ModulePass(ID) {
+    VarMap.clear();
+    ReflectFunction = 0;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
+  virtual bool runOnModule(Module &);
+
+  void setVarMap();
+};
+}
+
+static cl::opt<bool>
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+                   cl::desc("NVVM reflection, enabled by default"));
+
+char NVVMReflect::ID = 0;
+INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
+                "Replace occurences of __nvvm_reflect() calls with 0/1", false,
+                false)
+
+static cl::list<std::string>
+ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"),
+            cl::desc("A list of string=num assignments"),
+            cl::ValueRequired);
+
+/// The command line can look as follows :
+/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
+/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
+/// ReflectList vector. First, each of ReflectList[i] is 'split'
+/// using "," as the delimiter. Then each of this part is split
+/// using "=" as the delimiter.
+void NVVMReflect::setVarMap() {
+  for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
+    DEBUG(dbgs() << "Option : "  << ReflectList[i] << "\n");
+    SmallVector<StringRef, 4> NameValList;
+    StringRef(ReflectList[i]).split(NameValList, ",");
+    for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
+      SmallVector<StringRef, 2> NameValPair;
+      NameValList[j].split(NameValPair, "=");
+      assert(NameValPair.size() == 2 && "name=val expected");
+      std::stringstream ValStream(NameValPair[1]);
+      int Val;
+      ValStream >> Val;
+      assert((!(ValStream.fail())) && "integer value expected");
+      VarMap[NameValPair[0]] = Val;
+    }
+  }
+}
+
+bool NVVMReflect::runOnModule(Module &M) {
+  if (!NVVMReflectEnabled)
+    return false;
+
+  setVarMap();
+
+  ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+
+  // If reflect function is not used, then there will be
+  // no entry in the module.
+  if (ReflectFunction == 0)
+    return false;
+
+  // Validate _reflect function
+  assert(ReflectFunction->isDeclaration() &&
+         "_reflect function should not have a body");
+  assert(ReflectFunction->getReturnType()->isIntegerTy() &&
+         "_reflect's return type should be integer");
+
+  std::vector<Instruction *> ToRemove;
+
+  // Go through the uses of ReflectFunction in this Function.
+  // Each of them should a CallInst with a ConstantArray argument.
+  // First validate that. If the c-string corresponding to the
+  // ConstantArray can be found successfully, see if it can be
+  // found in VarMap. If so, replace the uses of CallInst with the
+  // value found in VarMap. If not, replace the use  with value 0.
+  for (Value::use_iterator I = ReflectFunction->use_begin(),
+                           E = ReflectFunction->use_end();
+       I != E; ++I) {
+    assert(isa<CallInst>(*I) && "Only a call instruction can use _reflect");
+    CallInst *Reflect = cast<CallInst>(*I);
+
+    assert((Reflect->getNumOperands() == 2) &&
+           "Only one operand expect for _reflect function");
+    // In cuda, we will have an extra constant-to-generic conversion of
+    // the string.
+    const Value *conv = Reflect->getArgOperand(0);
+    assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
+    const CallInst *ConvCall = cast<CallInst>(conv);
+    const Value *str = ConvCall->getArgOperand(0);
+    assert(isa<ConstantExpr>(str) &&
+           "Format of _reflect function not recognized");
+    const ConstantExpr *GEP = cast<ConstantExpr>(str);
+
+    const Value *Sym = GEP->getOperand(0);
+    assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
+
+    const Constant *SymStr = cast<Constant>(Sym);
+
+    assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+           "Format of _reflect function not recognized");
+
+    assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+           "Format of _reflect function not recognized");
+
+    std::string ReflectArg =
+        cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+
+    ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
+    DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
+
+    int ReflectVal = 0; // The default value is 0
+    if (VarMap.find(ReflectArg) != VarMap.end()) {
+      ReflectVal = VarMap[ReflectArg];
+    }
+    Reflect->replaceAllUsesWith(
+        ConstantInt::get(Reflect->getType(), ReflectVal));
+    ToRemove.push_back(Reflect);
+  }
+  if (ToRemove.size() == 0)
+    return false;
+
+  for (unsigned i = 0, e = ToRemove.size(); i != e; ++i)
+    ToRemove[i]->eraseFromParent();
+  return true;
+}
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index f3624b9f23c7..cc7d4dc5ece7 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTX.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64;
 
 extern "C" void LLVMInitializeNVPTXTargetInfo() {
   RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
-    "NVIDIA PTX 32-bit");
+                                  "NVIDIA PTX 32-bit");
   RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
-    "NVIDIA PTX 64-bit");
+                                    "NVIDIA PTX 64-bit");
 }
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
deleted file mode 100644
index 8043e2de0972..000000000000
--- a/lib/Target/NVPTX/VectorElementize.cpp
+++ /dev/null
@@ -1,1248 +0,0 @@
-//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts operations on vector types to operations on their
-// element types.
-//
-// For generic binary and unary vector instructions, the conversion is simple.
-// Suppose we have
-//        av = bv Vop cv
-// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
-// This gets converted to the following :
-//       a1 = b1 Sop c1
-//       a2 = b2 Sop c2
-//
-// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
-// For the above example, the map will look as follows:
-// av => [a1, a2]
-// bv => [b1, b2]
-//
-// In addition, initVectorInfo creates the following opcode->opcode map.
-// Vop => Sop
-// OtherVop => OtherSop
-// ...
-//
-// For vector specific instructions like vecbuild, vecshuffle etc, the
-// conversion is different. Look at comments near the functions with
-// prefix createVec<...>.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
-
-using namespace llvm;
-
-namespace {
-
-class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
-  virtual bool runOnMachineFunction(MachineFunction &F);
-
-  NVPTXTargetMachine &TM;
-  MachineRegisterInfo *MRI;
-  const NVPTXRegisterInfo *RegInfo;
-  const NVPTXInstrInfo *InstrInfo;
-
-  llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
-  RegClassMap;
-  llvm::DenseMap<unsigned, bool> SimpleMoveMap;
-
-  llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
-
-  bool isVectorInstr(MachineInstr *);
-
-  SmallVector<unsigned, 4> getScalarRegisters(unsigned);
-  unsigned getScalarVersion(unsigned);
-  unsigned getScalarVersion(MachineInstr *);
-
-  bool isVectorRegister(unsigned);
-  const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
-  unsigned numCopiesNeeded(MachineInstr *);
-
-  void createLoadCopy(MachineFunction&, MachineInstr *,
-                      std::vector<MachineInstr *>&);
-  void createStoreCopy(MachineFunction&, MachineInstr *,
-                       std::vector<MachineInstr *>&);
-
-  void createVecDest(MachineFunction&, MachineInstr *,
-                     std::vector<MachineInstr *>&);
-
-  void createCopies(MachineFunction&, MachineInstr *,
-                    std::vector<MachineInstr *>&);
-
-  unsigned copyProp(MachineFunction&);
-  unsigned removeDeadMoves(MachineFunction&);
-
-  void elementize(MachineFunction&);
-
-  bool isSimpleMove(MachineInstr *);
-
-  void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
-                        std::vector<MachineInstr *>& copies);
-
-  void createVecExtract(MachineFunction& F, MachineInstr *Instr,
-                        std::vector<MachineInstr *>& copies);
-
-  void createVecInsert(MachineFunction& F, MachineInstr *Instr,
-                       std::vector<MachineInstr *>& copies);
-
-  void createVecBuild(MachineFunction& F, MachineInstr *Instr,
-                      std::vector<MachineInstr *>& copies);
-
-public:
-
-  static char ID; // Pass identification, replacement for typeid
-  VectorElementize(NVPTXTargetMachine &tm)
-  : MachineFunctionPass(ID), TM(tm) {}
-
-  virtual const char *getPassName() const {
-    return "Convert LLVM vector types to their element types";
-  }
-};
-
-char VectorElementize::ID = 1;
-}
-
-static cl::opt<bool>
-RemoveRedundantMoves("nvptx-remove-redundant-moves",
-       cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
-                     cl::init(true));
-
-#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
-    >> NVPTX::VecInstTypeShift)
-#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
-#define ISVECLOAD(x)    (VECINST(x) == NVPTX::VecLoad)
-#define ISVECSTORE(x)   (VECINST(x) == NVPTX::VecStore)
-#define ISVECBUILD(x)   (VECINST(x) == NVPTX::VecBuild)
-#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
-#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
-#define ISVECINSERT(x)  (VECINST(x) == NVPTX::VecInsert)
-#define ISVECDEST(x)     (VECINST(x) == NVPTX::VecDest)
-
-bool VectorElementize::isSimpleMove(MachineInstr *mi) {
-  if (mi->isCopy())
-    return true;
-  unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
-        >> NVPTX::SimpleMoveShift;
-  return (TSFlags == 1);
-}
-
-bool VectorElementize::isVectorInstr(MachineInstr *mi) {
-  if ((mi->getOpcode() == NVPTX::PHI) ||
-      (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
-    MachineOperand dest = mi->getOperand(0);
-    return isVectorRegister(dest.getReg());
-  }
-  return ISVECINST(mi);
-}
-
-unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
-  return getScalarVersion(mi->getOpcode());
-}
-
-///=============================================================================
-///Instr is assumed to be a vector instruction. For most vector instructions,
-///the size of the destination vector register gives the number of scalar copies
-///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
-///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
-///number of scalar copies needed.
-///=============================================================================
-unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
-  unsigned numDefs=0;
-  unsigned def;
-  for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
-    MachineOperand oper = Instr->getOperand(i);
-
-    if (!oper.isReg()) continue;
-    if (!oper.isDef()) continue;
-    def = i;
-    numDefs++;
-  }
-  assert((numDefs <= 1) && "Only 0 or 1 defs supported");
-
-  if (numDefs == 1) {
-    unsigned regnum = Instr->getOperand(def).getReg();
-    if (ISVECEXTRACT(Instr))
-      regnum = Instr->getOperand(1).getReg();
-    return getNVPTXVectorSize(MRI->getRegClass(regnum));
-  }
-  else if (numDefs == 0) {
-    assert(ISVECSTORE(Instr)
-           && "Only 0 def instruction supported is vector store");
-
-    unsigned regnum = Instr->getOperand(0).getReg();
-    return getNVPTXVectorSize(MRI->getRegClass(regnum));
-  }
-  return 1;
-}
-
-const TargetRegisterClass *VectorElementize::
-getScalarRegClass(const TargetRegisterClass *RC) {
-  assert(isNVPTXVectorRegClass(RC) &&
-         "Not a vector register class");
-  return getNVPTXElemClass(RC);
-}
-
-bool VectorElementize::isVectorRegister(unsigned reg) {
-  const TargetRegisterClass *RC=MRI->getRegClass(reg);
-  return isNVPTXVectorRegClass(RC);
-}
-
-///=============================================================================
-///For every vector register 'v' that is not already in the VectorToScalarMap,
-///create n scalar registers of the corresponding element type, where n
-///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
-///=============================================================================
-SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
-  assert(isVectorRegister(regnum) && "Expecting a vector register here");
-  // Create the scalar registers and put them in the map, if not already there.
-  if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
-    const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
-    const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
-
-    SmallVector<unsigned, 4> temp;
-
-    for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
-      temp.push_back(MRI->createVirtualRegister(scalarClass));
-
-    VectorToScalarMap[regnum] = temp;
-  }
-  return VectorToScalarMap[regnum];
-}
-
-///=============================================================================
-///For a vector load of the form
-///va <= ldv2 [addr]
-///the following multi output instruction is created :
-///[v1, v2] <= LD [addr]
-///Look at NVPTXVector.td for the definitions of multi output loads.
-///=============================================================================
-void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
-                                      std::vector<MachineInstr *>& copies) {
-  copies.push_back(F.CloneMachineInstr(Instr));
-
-  MachineInstr *copy=copies[0];
-  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
-  // Remove the dest, that should be a vector operand.
-  MachineOperand dest = copy->getOperand(0);
-  unsigned regnum = dest.getReg();
-
-  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
-  copy->RemoveOperand(0);
-
-  std::vector<MachineOperand> otherOperands;
-  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
-    otherOperands.push_back(copy->getOperand(i));
-
-  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
-    copy->RemoveOperand(0);
-
-  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) {
-    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
-  }
-
-  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
-    copy->addOperand(otherOperands[i]);
-
-}
-
-///=============================================================================
-///For a vector store of the form
-///stv2 va, [addr]
-///the following multi input instruction is created :
-///ST v1, v2, [addr]
-///Look at NVPTXVector.td for the definitions of multi input stores.
-///=============================================================================
-void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
-                                       std::vector<MachineInstr *>& copies) {
-  copies.push_back(F.CloneMachineInstr(Instr));
-
-  MachineInstr *copy=copies[0];
-  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
-  MachineOperand src = copy->getOperand(0);
-  unsigned regnum = src.getReg();
-
-  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
-  copy->RemoveOperand(0);
-
-  std::vector<MachineOperand> otherOperands;
-  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
-    otherOperands.push_back(copy->getOperand(i));
-
-  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
-    copy->RemoveOperand(0);
-
-  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
-    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false));
-
-  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
-    copy->addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///va <= shufflev2 vb, vc, <i1>, <i2>
-///gets converted to 2 moves into a1 and a2. The source of the moves depend on
-///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
-///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
-///instructions will be
-///a1 <= c2
-///a2 <= b1
-///=============================================================================
-void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
-                                        std::vector<MachineInstr *>& copies) {
-  unsigned numcopies=numCopiesNeeded(Instr);
-
-  unsigned destregnum = Instr->getOperand(0).getReg();
-  unsigned src1regnum = Instr->getOperand(1).getReg();
-  unsigned src2regnum = Instr->getOperand(2).getReg();
-
-  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
-  SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
-  SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
-
-  DebugLoc DL = Instr->getDebugLoc();
-
-  for (unsigned i=0; i<numcopies; i++) {
-    MachineInstr *copy = BuildMI(F, DL,
-                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-    MachineOperand which=Instr->getOperand(3+i);
-    assert(which.isImm() && "Shuffle operand not a constant");
-
-    int src=which.getImm();
-    int elem=src%numcopies;
-
-    if (which.getImm() < numcopies)
-      copy->addOperand(MachineOperand::CreateReg(src1[elem], false));
-    else
-      copy->addOperand(MachineOperand::CreateReg(src2[elem], false));
-    copies.push_back(copy);
-  }
-}
-
-///=============================================================================
-///a <= extractv2 va, <i1>
-///gets turned into a simple move to the scalar register a. The source depends
-///on i1.
-///=============================================================================
-void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
-                                        std::vector<MachineInstr *>& copies) {
-  unsigned srcregnum = Instr->getOperand(1).getReg();
-
-  SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
-  MachineOperand which = Instr->getOperand(2);
-  assert(which.isImm() && "Extract operand not a constant");
-
-  DebugLoc DL = Instr->getDebugLoc();
-
-  MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
-                               Instr->getOperand(0).getReg());
-  copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false));
-
-  copies.push_back(copy);
-}
-
-///=============================================================================
-///va <= vecinsertv2 vb, c, <i1>
-///This instruction copies all elements of vb to va, except the 'i1'th element.
-///The scalar value c becomes the 'i1'th element of va.
-///This gets translated to 2 (4 for vecinsertv4) moves.
-///=============================================================================
-void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
-                                       std::vector<MachineInstr *>& copies) {
-  unsigned numcopies=numCopiesNeeded(Instr);
-
-  unsigned destregnum = Instr->getOperand(0).getReg();
-  unsigned srcregnum = Instr->getOperand(1).getReg();
-
-  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
-  SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
-  MachineOperand which=Instr->getOperand(3);
-  assert(which.isImm() && "Insert operand not a constant");
-  unsigned int elem=which.getImm();
-
-  DebugLoc DL = Instr->getDebugLoc();
-
-  for (unsigned i=0; i<numcopies; i++) {
-    MachineInstr *copy = BuildMI(F, DL,
-                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
-    if (i != elem)
-      copy->addOperand(MachineOperand::CreateReg(src[i], false));
-    else
-      copy->addOperand(Instr->getOperand(2));
-
-    copies.push_back(copy);
-  }
-
-}
-
-///=============================================================================
-///va <= buildv2 b1, b2
-///gets translated to
-///a1 <= b1
-///a2 <= b2
-///=============================================================================
-void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
-                                      std::vector<MachineInstr *>& copies) {
-  unsigned numcopies=numCopiesNeeded(Instr);
-
-  unsigned destregnum = Instr->getOperand(0).getReg();
-
-  SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
-
-  DebugLoc DL = Instr->getDebugLoc();
-
-  for (unsigned i=0; i<numcopies; i++) {
-    MachineInstr *copy = BuildMI(F, DL,
-                              InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
-    copy->addOperand(Instr->getOperand(1+i));
-
-    copies.push_back(copy);
-  }
-
-}
-
-///=============================================================================
-///For a tex inst of the form
-///va <= op [scalar operands]
-///the following multi output instruction is created :
-///[v1, v2] <= op' [scalar operands]
-///=============================================================================
-void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
-                                     std::vector<MachineInstr *>& copies) {
-  copies.push_back(F.CloneMachineInstr(Instr));
-
-  MachineInstr *copy=copies[0];
-  copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
-  // Remove the dest, that should be a vector operand.
-  MachineOperand dest = copy->getOperand(0);
-  unsigned regnum = dest.getReg();
-
-  SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
-  copy->RemoveOperand(0);
-
-  std::vector<MachineOperand> otherOperands;
-  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
-    otherOperands.push_back(copy->getOperand(i));
-
-  for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
-    copy->RemoveOperand(0);
-
-  for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
-    copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true));
-
-  for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
-    copy->addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///Look at the vector instruction type and dispatch to the createVec<...>
-///function that creates the scalar copies.
-///=============================================================================
-void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
-                                    std::vector<MachineInstr *>& copies) {
-  if (ISVECLOAD(Instr)) {
-    createLoadCopy(F, Instr, copies);
-    return;
-  }
-  if (ISVECSTORE(Instr)) {
-    createStoreCopy(F, Instr, copies);
-    return;
-  }
-  if (ISVECSHUFFLE(Instr)) {
-    createVecShuffle(F, Instr, copies);
-    return;
-  }
-  if (ISVECEXTRACT(Instr)) {
-    createVecExtract(F, Instr, copies);
-    return;
-  }
-  if (ISVECINSERT(Instr)) {
-    createVecInsert(F, Instr, copies);
-    return;
-  }
-  if (ISVECDEST(Instr)) {
-    createVecDest(F, Instr, copies);
-    return;
-  }
-  if (ISVECBUILD(Instr)) {
-    createVecBuild(F, Instr, copies);
-    return;
-  }
-
-  unsigned numcopies=numCopiesNeeded(Instr);
-
-  for (unsigned i=0; i<numcopies; ++i)
-    copies.push_back(F.CloneMachineInstr(Instr));
-
-  for (unsigned i=0; i<numcopies; ++i) {
-    MachineInstr *copy = copies[i];
-
-    std::vector<MachineOperand> allOperands;
-    std::vector<bool> isDef;
-
-    for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
-      MachineOperand oper = copy->getOperand(j);
-      allOperands.push_back(oper);
-      if (oper.isReg())
-        isDef.push_back(oper.isDef());
-      else
-        isDef.push_back(false);
-    }
-
-    for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
-      copy->RemoveOperand(0);
-
-    copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
-
-    for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
-      MachineOperand oper=allOperands[j];
-      if (oper.isReg()) {
-        unsigned regnum = oper.getReg();
-        if (isVectorRegister(regnum)) {
-
-          SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
-          copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j]));
-        }
-        else
-          copy->addOperand(oper);
-      }
-      else
-        copy->addOperand(oper);
-    }
-  }
-}
-
-///=============================================================================
-///Scan through all basic blocks, looking for vector instructions.
-///For each vector instruction I, insert the scalar copies before I, and
-///add I into toRemove vector. Finally remove all instructions in toRemove.
-///=============================================================================
-void VectorElementize::elementize(MachineFunction &F) {
-  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
-      BI!=BE; ++BI) {
-    MachineBasicBlock *BB = &*BI;
-
-    std::vector<MachineInstr *> copies;
-    std::vector<MachineInstr *> toRemove;
-
-    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
-        II!=IE; ++II) {
-      MachineInstr *Instr = &*II;
-
-      if (!isVectorInstr(Instr))
-        continue;
-
-      copies.clear();
-      createCopies(F, Instr, copies);
-      for (unsigned i=0, e=copies.size(); i!=e; ++i)
-        BB->insert(II, copies[i]);
-
-      assert((copies.size() > 0) && "Problem in createCopies");
-      toRemove.push_back(Instr);
-    }
-    for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
-      F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
-  }
-}
-
-///=============================================================================
-///a <= b
-///...
-///...
-///x <= op(a, ...)
-///gets converted to
-///
-///x <= op(b, ...)
-///The original move is still present. This works on SSA form machine code.
-///Note that a <= b should be a simple vreg-to-vreg move instruction.
-///TBD : I didn't find a function that can do replaceOperand, so I remove
-///all operands and add all of them again, replacing the one while adding.
-///=============================================================================
-unsigned VectorElementize::copyProp(MachineFunction &F) {
-  unsigned numReplacements = 0;
-
-  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
-      ++BI) {
-    MachineBasicBlock *BB = &*BI;
-
-    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
-        ++II) {
-      MachineInstr *Instr = &*II;
-
-      // Don't do copy propagation on PHI as it will cause unnecessary
-      // live range overlap.
-      if ((Instr->getOpcode() == TargetOpcode::PHI) ||
-          (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
-        continue;
-
-      bool needsReplacement = false;
-
-      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
-        MachineOperand oper = Instr->getOperand(i);
-        if (!oper.isReg()) continue;
-        if (oper.isDef()) continue;
-        if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
-
-        MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
-
-        if (!defInstr) continue;
-
-        if (!isSimpleMove(defInstr)) continue;
-
-        MachineOperand defSrc = defInstr->getOperand(1);
-        if (!defSrc.isReg()) continue;
-        if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
-
-        needsReplacement = true;
-
-      }
-      if (!needsReplacement) continue;
-
-      numReplacements++;
-
-      std::vector<MachineOperand> operands;
-
-      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
-        MachineOperand oper = Instr->getOperand(i);
-        bool flag = false;
-        do {
-          if (!(oper.isReg()))
-            break;
-          if (oper.isDef())
-            break;
-          if (!(RegInfo->isVirtualRegister(oper.getReg())))
-            break;
-          MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
-          if (!(isSimpleMove(defInstr)))
-            break;
-          MachineOperand defSrc = defInstr->getOperand(1);
-          if (!(defSrc.isReg()))
-            break;
-          if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
-            break;
-          operands.push_back(defSrc);
-          flag = true;
-        } while (0);
-        if (flag == false)
-          operands.push_back(oper);
-      }
-
-      for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
-        Instr->RemoveOperand(0);
-      for (unsigned i=0, e=operands.size(); i!=e; ++i)
-        Instr->addOperand(operands[i]);
-
-    }
-  }
-  return numReplacements;
-}
-
-///=============================================================================
-///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
-///them to deadMoves vector. Then remove all instructions in deadMoves.
-///=============================================================================
-unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
-  std::vector<MachineInstr *> deadMoves;
-  for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
-      ++BI) {
-    MachineBasicBlock *BB = &*BI;
-
-    for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
-        ++II) {
-      MachineInstr *Instr = &*II;
-
-      if (!isSimpleMove(Instr)) continue;
-
-      MachineOperand dest = Instr->getOperand(0);
-      assert(dest.isReg() && "dest of move not a register");
-      assert(RegInfo->isVirtualRegister(dest.getReg()) &&
-             "dest of move not a virtual register");
-
-      if (MRI->use_empty(dest.getReg())) {
-        deadMoves.push_back(Instr);
-      }
-    }
-  }
-
-  for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
-    F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
-
-  return deadMoves.size();
-}
-
-///=============================================================================
-///Main function for this pass.
-///=============================================================================
-bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
-  MRI = &F.getRegInfo();
-
-  RegInfo = TM.getRegisterInfo();
-  InstrInfo = TM.getInstrInfo();
-
-  VectorToScalarMap.clear();
-
-  elementize(F);
-
-  if (RemoveRedundantMoves)
-    while (1) {
-      if (copyProp(F) == 0) break;
-      removeDeadMoves(F);
-    }
-
-  return true;
-}
-
-FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
-  return new VectorElementize(tm);
-}
-
-unsigned VectorElementize::getScalarVersion(unsigned opcode) {
-  if (opcode == NVPTX::PHI)
-    return opcode;
-  if (opcode == NVPTX::IMPLICIT_DEF)
-    return opcode;
-  switch(opcode) {
-  default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
-  case TargetOpcode::COPY: return TargetOpcode::COPY;
-  case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
-  case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
-  case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
-  case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
-  case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
-  case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
-  case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
-  case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
-  case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
-  case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
-  case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
-  case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
-  case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
-  case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
-  case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
-  case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
-  case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
-  case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
-  case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
-  case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
-  case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
-  case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
-  case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
-  case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
-  case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
-  case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
-  case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
-  case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
-  case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
-  case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
-  case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
-  case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
-  case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
-  case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
-  case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
-  case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
-  case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
-  case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
-  case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
-  case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
-  case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
-  case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
-  case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
-  case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
-  case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
-  case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
-  case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
-  case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
-  case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
-  case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
-  case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
-  case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
-  case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
-  case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
-  case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
-  case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
-  case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
-  case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
-  case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
-  case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
-  case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
-  case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
-  case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
-  case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
-  case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
-  case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
-  case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
-  case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
-  case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
-  case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
-  case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
-  case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
-  case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
-  case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
-  case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
-  case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
-  case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
-  case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
-  case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
-  case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
-  case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
-  case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
-  case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
-  case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
-  case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
-  case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
-  case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
-  case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
-  case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
-  case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
-  case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
-  case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
-  case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
-  case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
-  case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
-  case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
-  case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
-  case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
-  case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
-  case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
-  case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
-  case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
-  case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
-  case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
-  case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
-  case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
-  case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
-  case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
-  case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
-  case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
-  case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
-  case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
-  case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
-  case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
-  case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
-  case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
-  case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
-  case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
-  case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
-  case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
-  case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
-  case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
-  case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
-  case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
-  case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
-  case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
-  case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
-  case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
-  case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
-  case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
-  case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
-  case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
-  case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
-  case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
-  case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
-  case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
-  case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
-  case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
-  case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
-  case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
-  case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
-  case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
-  case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
-  case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
-  case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
-  case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
-  case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
-  case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
-  case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
-  case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
-  case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
-  case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
-  case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
-  case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
-  case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
-  case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
-  case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
-  case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
-  case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
-  case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
-  case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
-  case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
-  case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
-  case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
-  case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
-  case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
-  case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
-  case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
-  case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
-  case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
-  case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
-  case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
-  case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
-  case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
-  case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
-  case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
-  case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
-  case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
-  case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
-  case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
-  case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
-  case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
-  case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
-  case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
-  case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
-  case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
-  case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
-  case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
-  case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
-  case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
-  case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
-  case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
-  case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
-  case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
-  case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
-  case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
-  case NVPTX::VNegV2I16: return NVPTX::INEG16;
-  case NVPTX::VNegV2I32: return NVPTX::INEG32;
-  case NVPTX::VNegV2I64: return NVPTX::INEG64;
-  case NVPTX::VNegV2I8: return NVPTX::INEG8;
-  case NVPTX::VNegV4I16: return NVPTX::INEG16;
-  case NVPTX::VNegV4I32: return NVPTX::INEG32;
-  case NVPTX::VNegV4I8: return NVPTX::INEG8;
-  case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
-  case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
-  case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
-  case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
-  case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
-  case NVPTX::VNotV2I16: return NVPTX::NOT16;
-  case NVPTX::VNotV2I32: return NVPTX::NOT32;
-  case NVPTX::VNotV2I64: return NVPTX::NOT64;
-  case NVPTX::VNotV2I8: return NVPTX::NOT8;
-  case NVPTX::VNotV4I16: return NVPTX::NOT16;
-  case NVPTX::VNotV4I32: return NVPTX::NOT32;
-  case NVPTX::VNotV4I8: return NVPTX::NOT8;
-  case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
-  case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
-  case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
-  case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
-  case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
-  case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
-  case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
-  case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
-  case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
-  case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
-  case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
-  case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
-  case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
-  case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
-  case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
-  case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
-  case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
-  case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
-  case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
-  case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
-  case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
-  case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
-  case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
-  case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
-  case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
-  case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
-  case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
-  case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
-  case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
-  case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
-  case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
-  case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
-  case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
-  case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
-  case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
-  case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
-  case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
-  case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
-  case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
-  case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
-  case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
-  case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
-  case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
-  case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
-  case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
-  case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
-  case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
-  case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
-  case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
-  case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
-  case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
-  case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
-  case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
-  case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
-  case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
-  case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
-  case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
-  case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
-  case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
-  case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
-  case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
-  case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
-  case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
-  case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
-  case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
-  case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
-  case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
-  case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
-  case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
-  case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
-  case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
-  case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
-  case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
-  case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
-  case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
-  case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
-  case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
-  case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
-  case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
-  case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
-  case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
-  case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
-  case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
-  case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
-  case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
-  case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
-  case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
-  case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
-  case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
-  case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
-  case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
-  case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
-  case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
-  case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
-  case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
-  case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
-  case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
-  case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
-  case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
-  case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
-  case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
-  case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
-  case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
-  case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
-  case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
-  case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
-  case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
-  case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
-  case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
-  case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
-  case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
-  case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
-  case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
-  case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
-  case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
-  case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
-  case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
-  case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
-  case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
-  case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
-  case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
-  case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
-  case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
-  case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
-  case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
-  case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
-  case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
-  case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
-  case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
-  case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
-  case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
-  case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
-  case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
-  case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
-  case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
-  case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
-  case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
-  case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
-  case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
-  case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
-  case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
-  case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
-  case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
-  case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
-  case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
-  case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
-  case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
-  case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
-  case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
-  case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
-  case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
-
-  case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
-  case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
-  case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
-  case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
-  case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
-  case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
-  case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
-  case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
-  case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
-  case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
-  case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
-  case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
-  case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
-  case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
-  case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
-  case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
-  case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
-  case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
-  case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
-  case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
-  case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
-  case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
-  case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
-  case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
-  case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
-  case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
-  case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
-  case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
-  case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
-  case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
-  case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
-  case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
-  case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
-  case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
-  case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
-  case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
-
-  case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
-  case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
-  case NVPTX::LD_v2i8_ari:  return NVPTX::LDV_i8_v2_ari;
-  case NVPTX::LD_v2i8_asi:  return NVPTX::LDV_i8_v2_asi;
-  case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
-  case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
-  case NVPTX::LD_v4i8_ari:  return NVPTX::LDV_i8_v4_ari;
-  case NVPTX::LD_v4i8_asi:  return NVPTX::LDV_i8_v4_asi;
-
-  case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
-  case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
-  case NVPTX::LD_v2i16_ari:  return NVPTX::LDV_i16_v2_ari;
-  case NVPTX::LD_v2i16_asi:  return NVPTX::LDV_i16_v2_asi;
-  case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
-  case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
-  case NVPTX::LD_v4i16_ari:  return NVPTX::LDV_i16_v4_ari;
-  case NVPTX::LD_v4i16_asi:  return NVPTX::LDV_i16_v4_asi;
-
-  case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
-  case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
-  case NVPTX::LD_v2i32_ari:  return NVPTX::LDV_i32_v2_ari;
-  case NVPTX::LD_v2i32_asi:  return NVPTX::LDV_i32_v2_asi;
-  case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
-  case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
-  case NVPTX::LD_v4i32_ari:  return NVPTX::LDV_i32_v4_ari;
-  case NVPTX::LD_v4i32_asi:  return NVPTX::LDV_i32_v4_asi;
-
-  case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
-  case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
-  case NVPTX::LD_v2f32_ari:  return NVPTX::LDV_f32_v2_ari;
-  case NVPTX::LD_v2f32_asi:  return NVPTX::LDV_f32_v2_asi;
-  case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
-  case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
-  case NVPTX::LD_v4f32_ari:  return NVPTX::LDV_f32_v4_ari;
-  case NVPTX::LD_v4f32_asi:  return NVPTX::LDV_f32_v4_asi;
-
-  case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
-  case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
-  case NVPTX::LD_v2i64_ari:  return NVPTX::LDV_i64_v2_ari;
-  case NVPTX::LD_v2i64_asi:  return NVPTX::LDV_i64_v2_asi;
-  case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
-  case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
-  case NVPTX::LD_v2f64_ari:  return NVPTX::LDV_f64_v2_ari;
-  case NVPTX::LD_v2f64_asi:  return NVPTX::LDV_f64_v2_asi;
-
-  case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
-  case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
-  case NVPTX::ST_v2i8_ari:  return NVPTX::STV_i8_v2_ari;
-  case NVPTX::ST_v2i8_asi:  return NVPTX::STV_i8_v2_asi;
-  case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
-  case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
-  case NVPTX::ST_v4i8_ari:  return NVPTX::STV_i8_v4_ari;
-  case NVPTX::ST_v4i8_asi:  return NVPTX::STV_i8_v4_asi;
-
-  case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
-  case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
-  case NVPTX::ST_v2i16_ari:  return NVPTX::STV_i16_v2_ari;
-  case NVPTX::ST_v2i16_asi:  return NVPTX::STV_i16_v2_asi;
-  case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
-  case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
-  case NVPTX::ST_v4i16_ari:  return NVPTX::STV_i16_v4_ari;
-  case NVPTX::ST_v4i16_asi:  return NVPTX::STV_i16_v4_asi;
-
-  case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
-  case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
-  case NVPTX::ST_v2i32_ari:  return NVPTX::STV_i32_v2_ari;
-  case NVPTX::ST_v2i32_asi:  return NVPTX::STV_i32_v2_asi;
-  case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
-  case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
-  case NVPTX::ST_v4i32_ari:  return NVPTX::STV_i32_v4_ari;
-  case NVPTX::ST_v4i32_asi:  return NVPTX::STV_i32_v4_asi;
-
-  case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
-  case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
-  case NVPTX::ST_v2f32_ari:  return NVPTX::STV_f32_v2_ari;
-  case NVPTX::ST_v2f32_asi:  return NVPTX::STV_f32_v2_asi;
-  case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
-  case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
-  case NVPTX::ST_v4f32_ari:  return NVPTX::STV_f32_v4_ari;
-  case NVPTX::ST_v4f32_asi:  return NVPTX::STV_f32_v4_asi;
-
-  case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
-  case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
-  case NVPTX::ST_v2i64_ari:  return NVPTX::STV_i64_v2_ari;
-  case NVPTX::ST_v2i64_asi:  return NVPTX::STV_i64_v2_asi;
-  case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
-  case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
-  case NVPTX::ST_v2f64_ari:  return NVPTX::STV_f64_v2_ari;
-  case NVPTX::ST_v2f64_asi:  return NVPTX::STV_f64_v2_asi;
-  }
-  return 0;
-}
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index a7347efd7850..45cc0b8b67f2 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -24,22 +24,21 @@ enum {
   CLK_LUMINANCE = 0x10B9
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
-  ,
+      ,
   CLK_Rx = 0x10BA,
   CLK_RGx = 0x10BB,
   CLK_RGBx = 0x10BC
 #endif
 };
 
-
 typedef enum clk_channel_type {
   // valid formats for float return types
-  CLK_SNORM_INT8 = 0x10D0,            // four channel RGBA unorm8
-  CLK_SNORM_INT16 = 0x10D1,           // four channel RGBA unorm16
-  CLK_UNORM_INT8 = 0x10D2,            // four channel RGBA unorm8
-  CLK_UNORM_INT16 = 0x10D3,           // four channel RGBA unorm16
-  CLK_HALF_FLOAT = 0x10DD,            // four channel RGBA half
-  CLK_FLOAT = 0x10DE,                 // four channel RGBA float
+  CLK_SNORM_INT8 = 0x10D0,  // four channel RGBA unorm8
+  CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+  CLK_UNORM_INT8 = 0x10D2,  // four channel RGBA unorm8
+  CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+  CLK_HALF_FLOAT = 0x10DD,  // four channel RGBA half
+  CLK_FLOAT = 0x10DE,       // four channel RGBA float
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
   CLK_UNORM_SHORT_565 = 0x10D4,
@@ -48,7 +47,7 @@ typedef enum clk_channel_type {
 #endif
 
   // valid only for integer return types
-  CLK_SIGNED_INT8 =  0x10D7,
+  CLK_SIGNED_INT8 = 0x10D7,
   CLK_SIGNED_INT16 = 0x10D8,
   CLK_SIGNED_INT32 = 0x10D9,
   CLK_UNSIGNED_INT8 = 0x10DA,
@@ -56,70 +55,68 @@ typedef enum clk_channel_type {
   CLK_UNSIGNED_INT32 = 0x10DC,
 
   // CI SPI for CPU
-  __CLK_UNORM_INT8888 ,         // four channel ARGB unorm8
-  __CLK_UNORM_INT8888R,        // four channel BGRA unorm8
+  __CLK_UNORM_INT8888,  // four channel ARGB unorm8
+  __CLK_UNORM_INT8888R, // four channel BGRA unorm8
 
   __CLK_VALID_IMAGE_TYPE_COUNT,
   __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
-  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4,         // number of bits required to
-                                                // represent any image type
-  __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
-}clk_channel_type;
+  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+                                        // represent any image type
+  __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
+} clk_channel_type;
 
 typedef enum clk_sampler_type {
-    __CLK_ADDRESS_BASE             = 0,
-    CLK_ADDRESS_NONE               = 0 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_CLAMP              = 1 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_CLAMP_TO_EDGE      = 2 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_REPEAT             = 3 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_MIRROR             = 4 << __CLK_ADDRESS_BASE,
+  __CLK_ADDRESS_BASE = 0,
+  CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
-    CLK_ADDRESS_MIRRORED_REPEAT    = CLK_ADDRESS_MIRROR,
+  CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
 #endif
-    __CLK_ADDRESS_MASK             = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
-                                     CLK_ADDRESS_CLAMP_TO_EDGE |
-                                     CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
-    __CLK_ADDRESS_BITS             = 3,        // number of bits required to
-                                               // represent address info
-
-    __CLK_NORMALIZED_BASE          = __CLK_ADDRESS_BITS,
-    CLK_NORMALIZED_COORDS_FALSE    = 0,
-    CLK_NORMALIZED_COORDS_TRUE     = 1 << __CLK_NORMALIZED_BASE,
-    __CLK_NORMALIZED_MASK          = CLK_NORMALIZED_COORDS_FALSE |
-                                     CLK_NORMALIZED_COORDS_TRUE,
-    __CLK_NORMALIZED_BITS          = 1,        // number of bits required to
-                                               // represent normalization
-
-    __CLK_FILTER_BASE              = __CLK_NORMALIZED_BASE +
-                                     __CLK_NORMALIZED_BITS,
-    CLK_FILTER_NEAREST             = 0 << __CLK_FILTER_BASE,
-    CLK_FILTER_LINEAR              = 1 << __CLK_FILTER_BASE,
-    CLK_FILTER_ANISOTROPIC         = 2 << __CLK_FILTER_BASE,
-    __CLK_FILTER_MASK              = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
-                                     CLK_FILTER_ANISOTROPIC,
-    __CLK_FILTER_BITS              = 2,        // number of bits required to
-                                               // represent address info
-
-    __CLK_MIP_BASE                 = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
-    CLK_MIP_NEAREST                = 0 << __CLK_MIP_BASE,
-    CLK_MIP_LINEAR                 = 1 << __CLK_MIP_BASE,
-    CLK_MIP_ANISOTROPIC            = 2 << __CLK_MIP_BASE,
-    __CLK_MIP_MASK                 = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
-                                     CLK_MIP_ANISOTROPIC,
-    __CLK_MIP_BITS                 = 2,
-
-    __CLK_SAMPLER_BITS             = __CLK_MIP_BASE + __CLK_MIP_BITS,
-    __CLK_SAMPLER_MASK             = __CLK_MIP_MASK | __CLK_FILTER_MASK |
-                                     __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
-
-    __CLK_ANISOTROPIC_RATIO_BITS   = 5,
-    __CLK_ANISOTROPIC_RATIO_MASK   = (int) 0x80000000 >>
-                                      (__CLK_ANISOTROPIC_RATIO_BITS-1)
+  __CLK_ADDRESS_MASK =
+      CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
+      CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+  __CLK_ADDRESS_BITS = 3, // number of bits required to
+                          // represent address info
+
+  __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+  CLK_NORMALIZED_COORDS_FALSE = 0,
+  CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+  __CLK_NORMALIZED_MASK =
+      CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
+  __CLK_NORMALIZED_BITS = 1, // number of bits required to
+                             // represent normalization
+
+  __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
+  CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+  CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+  CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+  __CLK_FILTER_MASK =
+      CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
+  __CLK_FILTER_BITS = 2, // number of bits required to
+                         // represent address info
+
+  __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+  CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+  CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+  CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+  __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
+  __CLK_MIP_BITS = 2,
+
+  __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+  __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+                       __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+  __CLK_ANISOTROPIC_RATIO_BITS = 5,
+  __CLK_ANISOTROPIC_RATIO_MASK =
+      (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
 } clk_sampler_type;
 
 // Memory synchronization
-#define CLK_LOCAL_MEM_FENCE     (1 << 0)
-#define CLK_GLOBAL_MEM_FENCE    (1 << 1)
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
 
 #endif // __CL_COMMON_DEFINES_H__
diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py
deleted file mode 100644
index ed0666823124..000000000000
--- a/lib/Target/NVPTX/gen-register-defs.py
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-
-num_regs = 396
-
-outFile = open('NVPTXRegisterInfo.td', 'w')
-
-outFile.write('''
-//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//  Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class NVPTXReg<string n> : Register<n> {
-  let Namespace = "NVPTX";
-}
-
-class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
-     : RegisterClass <"NVPTX", regTypes, alignment, regList>;
-
-//===----------------------------------------------------------------------===//
-//  Registers
-//===----------------------------------------------------------------------===//
-
-// Special Registers used as stack pointer
-def VRFrame         : NVPTXReg<"%SP">;
-def VRFrameLocal    : NVPTXReg<"%SPL">;
-
-// Special Registers used as the stack
-def VRDepot  : NVPTXReg<"%Depot">;
-''')
-
-# Predicates
-outFile.write('''
-//===--- Predicate --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
-
-# Int8
-outFile.write('''
-//===--- 8-bit ------------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
-
-# Int16
-outFile.write('''
-//===--- 16-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
-
-# Int32
-outFile.write('''
-//===--- 32-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
-
-# Int64
-outFile.write('''
-//===--- 64-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
-
-# F32
-outFile.write('''
-//===--- 32-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
-
-# F64
-outFile.write('''
-//===--- 64-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
-
-# Vector registers
-outFile.write('''
-//===--- Vector -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
-
-for i in range(0, num_regs):
-  outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
-
-# Argument registers
-outFile.write('''
-//===--- Arguments --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
-  outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
-for i in range(0, num_regs):
-  outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
-
-outFile.write('''
-//===----------------------------------------------------------------------===//
-//  Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('''
-// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-''')
-
-outFile.write('''
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
-                       NVPTXRegClass sClass,
-                       int e,
-                       string n>
-  : NVPTXRegClass<regTypes, alignment, regList>
-{
-  NVPTXRegClass scalarClass=sClass;
-  int elems=e;
-  string name=n;
-}
-''')
-
-
-outFile.write('def V2F32Regs\n  : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n    Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
-outFile.write('def V4F32Regs\n  : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n    Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
-
-outFile.write('def V2I32Regs\n  : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n    Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
-outFile.write('def V4I32Regs\n  : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n    Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
-
-outFile.write('def V2F64Regs\n  : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n    Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
-outFile.write('def V2I64Regs\n  : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n    Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
-
-outFile.write('def V2I16Regs\n  : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n    Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
-outFile.write('def V4I16Regs\n  : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n    Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
-
-outFile.write('def V2I8Regs\n  : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n    Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
-outFile.write('def V4I8Regs\n  : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n    Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
-
-outFile.close()
-
-
-outFile = open('NVPTXNumRegisters.h', 'w')
-outFile.write('''
-//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NVPTX_NUM_REGISTERS_H
-#define NVPTX_NUM_REGISTERS_H
-
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = %d;
-
-}
-
-#endif
-''' % num_regs)
-
-outFile.close()
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 192d18d66440..6036428fad93 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
+  PPCTargetTransformInfo.cpp
   PPCSelectionDAGInfo.cpp
   )
 
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 3d583060d1ef..bacc108c62b4 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -13,7 +13,7 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "PPCInstPrinter.h"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
                                            raw_ostream &O, 
                                            const char *Modifier) {
   unsigned Code = MI->getOperand(OpNo).getImm();
-  if (!Modifier) {
-    unsigned CCReg = MI->getOperand(OpNo+1).getReg();
-    unsigned RegNo;
-    switch (CCReg) {
-    default: llvm_unreachable("Unknown CR register");
-    case PPC::CR0: RegNo = 0; break;
-    case PPC::CR1: RegNo = 1; break;
-    case PPC::CR2: RegNo = 2; break;
-    case PPC::CR3: RegNo = 3; break;
-    case PPC::CR4: RegNo = 4; break;
-    case PPC::CR5: RegNo = 5; break;
-    case PPC::CR6: RegNo = 6; break;
-    case PPC::CR7: RegNo = 7; break;
-    }
-
-    // Print the CR bit number. The Code is ((BI << 5) | BO) for a
-    // BCC, but we must have the positive form here (BO == 12)
-    unsigned BI = Code >> 5;
-    assert((Code & 0xF) == 12 &&
-           "BO in predicate bit must have the positive form");
-
-    unsigned Value = 4*RegNo + BI;
-    O << Value;
-    return;
-  }
 
   if (StringRef(Modifier) == "cc") {
     switch ((PPC::Predicate)Code) {
-    case PPC::PRED_ALWAYS: return; // Don't print anything for always.
     case PPC::PRED_LT: O << "lt"; return;
     case PPC::PRED_LE: O << "le"; return;
     case PPC::PRED_EQ: O << "eq"; return;
@@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
   
   assert(StringRef(Modifier) == "reg" &&
          "Need to specify 'cc' or 'reg' as predicate op modifier!");
-  // Don't print the register for 'always'.
-  if (Code == PPC::PRED_ALWAYS) return;
   printOperand(MI, OpNo+1, O);
 }
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 87ecb13a4c76..ec2657403e0c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -13,8 +13,8 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ELF.h"
@@ -30,11 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case FK_Data_2:
   case FK_Data_4:
   case FK_Data_8:
-  case PPC::fixup_ppc_toc:
+  case PPC::fixup_ppc_tlsreg:
+  case PPC::fixup_ppc_nofixup:
     return Value;
-  case PPC::fixup_ppc_lo14:
-  case PPC::fixup_ppc_toc16_ds:
-    return (Value & 0xffff) << 2;
   case PPC::fixup_ppc_brcond14:
     return Value & 0xfffc;
   case PPC::fixup_ppc_br24:
@@ -46,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case PPC::fixup_ppc_ha16:
     return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
   case PPC::fixup_ppc_lo16:
-  case PPC::fixup_ppc_toc16:
     return Value & 0xffff;
+  case PPC::fixup_ppc_lo16_ds:
+    return Value & 0xfffc;
   }
 }
 
@@ -61,7 +60,9 @@ public:
   void RecordRelocation(MachObjectWriter *Writer,
                         const MCAssembler &Asm, const MCAsmLayout &Layout,
                         const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) {}
+                        MCValue Target, uint64_t &FixedValue) {
+    llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+  }
 };
 
 class PPCAsmBackend : public MCAsmBackend {
@@ -78,10 +79,9 @@ public:
       { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_lo16",        16,     16,   0 },
       { "fixup_ppc_ha16",        16,     16,   0 },
-      { "fixup_ppc_lo14",        16,     14,   0 },
-      { "fixup_ppc_toc",          0,     64,   0 },
-      { "fixup_ppc_toc16",       16,     16,   0 },
-      { "fixup_ppc_toc16_ds",    16,     14,   0 }
+      { "fixup_ppc_lo16_ds",     16,     14,   0 },
+      { "fixup_ppc_tlsreg",       0,      0,   0 },
+      { "fixup_ppc_nofixup",      0,      0,   0 }
     };
 
     if (Kind < FirstTargetFixupKind)
@@ -92,6 +92,20 @@ public:
     return Infos[Kind - FirstTargetFixupKind];
   }
 
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    Value = adjustFixupValue(Fixup.getKind(), Value);
+    if (!Value) return;           // Doesn't change encoding.
+
+    unsigned Offset = Fixup.getOffset();
+
+    // For each byte of the fragment that the fixup touches, mask in the bits
+    // from the fixup value. The Value has been "split up" into the appropriate
+    // bitfields above.
+    for (unsigned i = 0; i != 4; ++i)
+      Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+  }
+
   bool mayNeedRelaxation(const MCInst &Inst) const {
     // FIXME.
     return false;
@@ -99,7 +113,7 @@ public:
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup,
                             uint64_t Value,
-                            const MCInstFragment *DF,
+                            const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const {
     // FIXME.
     llvm_unreachable("relaxInstruction() unimplemented");
@@ -135,11 +149,6 @@ namespace {
   public:
     DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
 
-    void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                    uint64_t Value) const {
-      llvm_unreachable("UNIMP");
-    }
-
     MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
       bool is64 = getPointerSize() == 8;
       return createMachObjectWriter(new PPCMachObjectWriter(
@@ -161,19 +170,6 @@ namespace {
     ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
       PPCAsmBackend(T), OSABI(OSABI) { }
 
-    void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                    uint64_t Value) const {
-      Value = adjustFixupValue(Fixup.getKind(), Value);
-      if (!Value) return;           // Doesn't change encoding.
-
-      unsigned Offset = Fixup.getOffset();
-
-      // For each byte of the fragment that the fixup touches, mask in the bits from
-      // the fixup value. The Value has been "split up" into the appropriate
-      // bitfields above.
-      for (unsigned i = 0; i != 4; ++i)
-        Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
-    }
 
     MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
       bool is64 = getPointerSize() == 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
deleted file mode 100644
index 9c975c089ea6..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the PPC target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PPCBASEINFO_H
-#define PPCBASEINFO_H
-
-#include "PPCMCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-/// getPPCRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) {
-  using namespace PPC;
-  switch (RegEnum) {
-  case 0: return 0;
-  case R0 :  case X0 :  case F0 :  case V0 : case CR0:  case CR0LT: return  0;
-  case R1 :  case X1 :  case F1 :  case V1 : case CR1:  case CR0GT: return  1;
-  case R2 :  case X2 :  case F2 :  case V2 : case CR2:  case CR0EQ: return  2;
-  case R3 :  case X3 :  case F3 :  case V3 : case CR3:  case CR0UN: return  3;
-  case R4 :  case X4 :  case F4 :  case V4 : case CR4:  case CR1LT: return  4;
-  case R5 :  case X5 :  case F5 :  case V5 : case CR5:  case CR1GT: return  5;
-  case R6 :  case X6 :  case F6 :  case V6 : case CR6:  case CR1EQ: return  6;
-  case R7 :  case X7 :  case F7 :  case V7 : case CR7:  case CR1UN: return  7;
-  case R8 :  case X8 :  case F8 :  case V8 : case CR2LT: return  8;
-  case R9 :  case X9 :  case F9 :  case V9 : case CR2GT: return  9;
-  case R10:  case X10:  case F10:  case V10: case CR2EQ: return 10;
-  case R11:  case X11:  case F11:  case V11: case CR2UN: return 11;
-  case R12:  case X12:  case F12:  case V12: case CR3LT: return 12;
-  case R13:  case X13:  case F13:  case V13: case CR3GT: return 13;
-  case R14:  case X14:  case F14:  case V14: case CR3EQ: return 14;
-  case R15:  case X15:  case F15:  case V15: case CR3UN: return 15;
-  case R16:  case X16:  case F16:  case V16: case CR4LT: return 16;
-  case R17:  case X17:  case F17:  case V17: case CR4GT: return 17;
-  case R18:  case X18:  case F18:  case V18: case CR4EQ: return 18;
-  case R19:  case X19:  case F19:  case V19: case CR4UN: return 19;
-  case R20:  case X20:  case F20:  case V20: case CR5LT: return 20;
-  case R21:  case X21:  case F21:  case V21: case CR5GT: return 21;
-  case R22:  case X22:  case F22:  case V22: case CR5EQ: return 22;
-  case R23:  case X23:  case F23:  case V23: case CR5UN: return 23;
-  case R24:  case X24:  case F24:  case V24: case CR6LT: return 24;
-  case R25:  case X25:  case F25:  case V25: case CR6GT: return 25;
-  case R26:  case X26:  case F26:  case V26: case CR6EQ: return 26;
-  case R27:  case X27:  case F27:  case V27: case CR6UN: return 27;
-  case R28:  case X28:  case F28:  case V28: case CR7LT: return 28;
-  case R29:  case X29:  case F29:  case V29: case CR7GT: return 29;
-  case R30:  case X30:  case F30:  case V30: case CR7EQ: return 30;
-  case R31:  case X31:  case F31:  case V31: case CR7UN: return 31;
-  default:
-    llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
-  }
-}
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index dc93f7124a52..84e4175e635b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,12 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/PPCFixupKinds.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -33,9 +34,25 @@ namespace {
                                                     const MCFixup &Fixup,
                                                     bool IsPCRel) const;
     virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+
+    virtual void sortRelocs(const MCAssembler &Asm,
+                            std::vector<ELFRelocationEntry> &Relocs);
+  };
+
+  class PPCELFRelocationEntry : public ELFRelocationEntry {
+  public:
+    PPCELFRelocationEntry(const ELFRelocationEntry &RE);
+    bool operator<(const PPCELFRelocationEntry &RE) const {
+      return (RE.r_offset < r_offset ||
+              (RE.r_offset == r_offset && RE.Type > Type));
+    }
   };
 }
 
+PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
+  : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
+                       RE.r_addend, *RE.Fixup) {}
+
 PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
   : MCELFObjectTargetWriter(Is64Bit, OSABI,
                             Is64Bit ?  ELF::EM_PPC64 : ELF::EM_PPC,
@@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
     case PPC::fixup_ppc_br24:
       Type = ELF::R_PPC_REL24;
       break;
+    case FK_Data_4:
     case FK_PCRel_4:
       Type = ELF::R_PPC_REL32;
       break;
+    case FK_Data_8:
+    case FK_PCRel_8:
+      Type = ELF::R_PPC64_REL64;
+      break;
     }
   } else {
     switch ((unsigned)Fixup.getKind()) {
@@ -79,9 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
         Type = ELF::R_PPC_TPREL16_HA;
         break;
+      case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
+        Type = ELF::R_PPC64_DTPREL16_HA;
+        break;
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC_ADDR16_HA;
 	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_HA:
+        Type = ELF::R_PPC64_TOC16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
+        Type = ELF::R_PPC64_GOT_TPREL16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
+        Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
+        Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+        break;
       }
       break;
     case PPC::fixup_ppc_lo16:
@@ -90,22 +127,56 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
         Type = ELF::R_PPC_TPREL16_LO;
         break;
+      case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+        Type = ELF::R_PPC64_DTPREL16_LO;
+        break;
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC_ADDR16_LO;
 	break;
+      case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+        Type = ELF::R_PPC64_TOC16;
+        break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+        Type = ELF::R_PPC64_TOC16_LO;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+        Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+        Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+        break;
       }
       break;
-    case PPC::fixup_ppc_lo14:
-      Type = ELF::R_PPC_ADDR14;
-      break;
-    case PPC::fixup_ppc_toc:
-      Type = ELF::R_PPC64_TOC;
+    case PPC::fixup_ppc_lo16_ds:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC64_ADDR16_DS;
+        break;
+      case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+        Type = ELF::R_PPC64_TOC16_DS;
+	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+        Type = ELF::R_PPC64_TOC16_LO_DS;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+        Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+        break;
+      }
       break;
-    case PPC::fixup_ppc_toc16:
-      Type = ELF::R_PPC64_TOC16;
+    case PPC::fixup_ppc_tlsreg:
+      Type = ELF::R_PPC64_TLS;
       break;
-    case PPC::fixup_ppc_toc16_ds:
-      Type = ELF::R_PPC64_TOC16_DS;
+    case PPC::fixup_ppc_nofixup:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TLSGD:
+        Type = ELF::R_PPC64_TLSGD;
+        break;
+      case MCSymbolRefExpr::VK_PPC_TLSLD:
+        Type = ELF::R_PPC64_TLSLD;
+        break;
+      }
       break;
     case FK_Data_8:
       switch (Modifier) {
@@ -162,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
   switch ((unsigned)Fixup.getKind()) {
     case PPC::fixup_ppc_ha16:
     case PPC::fixup_ppc_lo16:
-    case PPC::fixup_ppc_toc16:
-    case PPC::fixup_ppc_toc16_ds:
+    case PPC::fixup_ppc_lo16_ds:
       RelocOffset += 2;
       break;
     default:
@@ -171,6 +241,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
   }
 }
 
+// The standard sorter only sorts on the r_offset field, but PowerPC can
+// have multiple relocations at the same offset.  Sort secondarily on the
+// relocation type to avoid nondeterminism.
+void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                    std::vector<ELFRelocationEntry> &Relocs) {
+
+  // Copy to a temporary vector of relocation entries having a different
+  // sort function.
+  std::vector<PPCELFRelocationEntry> TmpRelocs;
+  
+  for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
+       R != Relocs.end(); ++R) {
+    TmpRelocs.push_back(PPCELFRelocationEntry(*R));
+  }
+
+  // Sort in place by ascending r_offset and descending r_type.
+  array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
+
+  // Copy back to the original vector.
+  unsigned I = 0;
+  for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
+       R != TmpRelocs.end(); ++R, ++I) {
+    Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
+                                   R->Symbol, R->r_addend, *R->Fixup);
+  }
+}
+
+
 MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
                                                bool Is64Bit,
                                                uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 37b265e7fd38..86c44f57a5e2 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
 
 #include "llvm/MC/MCFixup.h"
 
+#undef PPC
+
 namespace llvm {
 namespace PPC {
 enum Fixups {
@@ -31,19 +33,16 @@ enum Fixups {
   /// like 'lis'.
   fixup_ppc_ha16,
   
-  /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
-  /// like 'std'.
-  fixup_ppc_lo14,
-
-  /// fixup_ppc_toc - Insert value of TOC base (.TOC.).
-  fixup_ppc_toc,
+  /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+  /// implied 2 zero bits for instrs like 'std'.
+  fixup_ppc_lo16_ds,
 
-  /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base.
-  fixup_ppc_toc16,
+  /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+  fixup_ppc_tlsreg,
 
-  /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
-  /// implied 2 zero bits
-  fixup_ppc_toc16_ds,
+  /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+  /// to __tls_get_addr for the TLS general and local dynamic models.
+  fixup_ppc_nofixup,
   
   // Marker
   LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 215aa40c4afd..a25d7fe64f3a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -17,8 +17,9 @@ using namespace llvm;
 void PPCMCAsmInfoDarwin::anchor() { }
 
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
-  if (is64Bit)
-    PointerSize = 8;
+  if (is64Bit) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
   IsLittleEndian = false;
 
   PCSymbol = ".";
@@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
 void PPCLinuxMCAsmInfo::anchor() { }
 
 PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
-  if (is64Bit)
-    PointerSize = 8;
+  if (is64Bit) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
   IsLittleEndian = false;
 
   // ".comm align is in bytes but .align is pow-2."
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 21183024a509..2223cd623cb5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,15 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/PPCBaseInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -31,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
   void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
 
   const MCSubtargetInfo &STI;
+  const MCContext &CTX;
   Triple TT;
 
 public:
   PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
                    MCContext &ctx)
-    : STI(sti), TT(STI.getTargetTriple()) {
+    : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) {
   }
   
   ~PPCMCCodeEmitter() {}
 
-  bool is64BitMode() const {
-    return (STI.getFeatureBits() & PPC::Feature64Bit) != 0;
-  }
-
-  bool isSVR4ABI() const {
-    return TT.isMacOSX() == 0;
-  }
-
   unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -61,6 +56,8 @@ public:
                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups) const;
 
@@ -77,11 +74,11 @@ public:
                          SmallVectorImpl<MCFixup> &Fixups) const {
     uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
 
-    // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the
-    // following 'nop'.
+    // BL8_NOP etc. all have a size of 8 because of the following 'nop'.
     unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
     unsigned Opcode = MI.getOpcode();
-    if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF)
+    if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
+        Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
       Size = 8;
     
     // Output the constant in big endian byte order.
@@ -114,6 +111,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
   // Add a fixup for the branch target.
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
                                    (MCFixupKind)PPC::fixup_ppc_br24));
+
+  // For special TLS calls, add another fixup for the symbol.  Apparently
+  // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
+  // similar that TblGen will not generate a separate case for the latter
+  // two, so this is the only way to get the extra fixup generated.
+  unsigned Opcode = MI.getOpcode();
+  if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
+    const MCOperand &MO2 = MI.getOperand(OpNo+1);
+    Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
+                                     (MCFixupKind)PPC::fixup_ppc_nofixup));
+  }
   return 0;
 }
 
@@ -162,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
     return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
   
   // Add a fixup for the displacement field.
-  if (isSVR4ABI() && is64BitMode())
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_toc16));
-  else
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_lo16));
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
   return RegBits;
 }
 
@@ -183,17 +187,26 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
   if (MO.isImm())
     return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
   
-  // Add a fixup for the branch target.
-  if (isSVR4ABI() && is64BitMode())
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_toc16_ds));
-  else
-    Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
-                                     (MCFixupKind)PPC::fixup_ppc_lo14));
+  // Add a fixup for the displacement field.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16_ds));
   return RegBits;
 }
 
 
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the TLS register, which simply provides a relocation
+  // hint to the linker that this statement is part of a relocation sequence.
+  // Return the thread-pointer register's encoding.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_tlsreg));
+  return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
+}
+
 unsigned PPCMCCodeEmitter::
 get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
                     SmallVectorImpl<MCFixup> &Fixups) const {
@@ -202,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
           MI.getOpcode() == PPC::MFOCRF ||
           MI.getOpcode() == PPC::MTCRF8) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
-  return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+  return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 }
 
 
@@ -214,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     // The GPR operand should come through here though.
     assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return getPPCRegisterNumbering(MO.getReg());
+    return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   }
   
   assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 4c2578d5dc53..2209f936ec33 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCMCTargetDesc.h"
-#include "PPCMCAsmInfo.h"
 #include "InstPrinter/PPCInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "PPCMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     else
       RM = Reloc::Static;
   }
+  if (CM == CodeModel::Default) {
+    Triple T(TT);
+    if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+      CM = CodeModel::Medium;
+  }
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index a0e4cf3005f2..38a7420d972d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,6 +14,9 @@
 #ifndef PPCMCTARGETDESC_H
 #define PPCMCTARGETDESC_H
 
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -44,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
                                          uint8_t OSABI);
 } // End llvm namespace
 
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
 // Defines symbolic names for PowerPC registers.  This defines a mapping from
 // register name to register number.
 //
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 12bb0a143406..d84eb9c6aa03 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -18,7 +18,6 @@ using namespace llvm;
 
 PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   switch (Opcode) {
-  default: llvm_unreachable("Unknown PPC branch opcode!");
   case PPC::PRED_EQ: return PPC::PRED_NE;
   case PPC::PRED_NE: return PPC::PRED_EQ;
   case PPC::PRED_LT: return PPC::PRED_GE;
@@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   case PPC::PRED_NU: return PPC::PRED_UN;
   case PPC::PRED_UN: return PPC::PRED_NU;
   }
+  llvm_unreachable("Unknown PPC branch opcode!");
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index f872e861bfa7..ad2b01812816 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -14,11 +14,17 @@
 #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
 #define LLVM_TARGET_POWERPC_PPCPREDICATES_H
 
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
 namespace llvm {
 namespace PPC {
   /// Predicate - These are "(BI << 5) | BO"  for various predicates.
   enum Predicate {
-    PRED_ALWAYS = (0 << 5) | 20,
     PRED_LT     = (0 << 5) | 12,
     PRED_LE     = (1 << 5) |  4,
     PRED_EQ     = (2 << 5) | 12,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 9103e1232505..446b6854fb5b 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_TARGET_POWERPC_H
 #define LLVM_TARGET_POWERPC_H
 
-#include "MCTargetDesc/PPCBaseInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include <string>
 
@@ -25,6 +24,7 @@
 namespace llvm {
   class PPCTargetMachine;
   class FunctionPass;
+  class ImmutablePass;
   class JITCodeEmitter;
   class MachineInstr;
   class AsmPrinter;
@@ -37,6 +37,9 @@ namespace llvm {
                                             JITCodeEmitter &MCE);
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
+
+  /// \brief Creates an PPC-specific Target Transformation Info pass.
+  ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
   
   namespace PPCII {
     
@@ -53,25 +56,32 @@ namespace llvm {
     
     /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
     /// the function's picbase, e.g. lo16(symbol-picbase).
-    MO_PIC_FLAG = 4,
+    MO_PIC_FLAG = 2,
 
     /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
     /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
-    MO_NLP_FLAG = 8,
+    MO_NLP_FLAG = 4,
     
     /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
     /// symbol with hidden visibility.  This causes a different kind of
     /// non-lazy-pointer to be generated.
-    MO_NLP_HIDDEN_FLAG = 16,
+    MO_NLP_HIDDEN_FLAG = 8,
 
     /// The next are not flags but distinct values.
-    MO_ACCESS_MASK = 224,
+    MO_ACCESS_MASK = 0xf0,
 
     /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
-    MO_LO16 = 32, MO_HA16 = 64,
+    MO_LO16 = 1 << 4,
+    MO_HA16 = 2 << 4,
+
+    MO_TPREL16_HA = 3 << 4,
+    MO_TPREL16_LO = 4 << 4,
 
-    MO_TPREL16_HA = 96,
-    MO_TPREL16_LO = 128
+    /// These values identify relocations on immediates folded
+    /// into memory operations.
+    MO_DTPREL16_LO = 5 << 4,
+    MO_TLSLD16_LO  = 6 << 4,
+    MO_TOC16_LO    = 7 << 4
   };
   } // end namespace PPCII
   
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index cb15dadb7e99..389216278ee4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
                                        "PPC::DIR_E500mc", "">;
 def DirectiveE5500  : SubtargetFeature<"", "DarwinDirective", 
                                        "PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
 def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
 def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
 
 def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
@@ -52,12 +57,42 @@ def FeatureMFOCRF    : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
                                         "Enable the MFOCRF instruction">;
 def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
                                         "Enable the fsqrt instruction">;
+def FeatureFRE       : SubtargetFeature<"fre", "HasFRE", "true",
+                                        "Enable the fre instruction">;
+def FeatureFRES      : SubtargetFeature<"fres", "HasFRES", "true",
+                                        "Enable the fres instruction">;
+def FeatureFRSQRTE   : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+                                        "Enable the frsqrte instruction">;
+def FeatureFRSQRTES  : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+                                        "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+                              "Assume higher precision reciprocal estimates">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                         "Enable the stfiwx instruction">;
+def FeatureLFIWAX    : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+                                        "Enable the lfiwax instruction">;
+def FeatureFPRND     : SubtargetFeature<"fprnd", "HasFPRND", "true",
+                                        "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT     : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+  "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
+def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+                                        "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+                                        "Enable the ldbrx instruction">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
                                         "Enable Book E instructions">;
+def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
+                                        "Enable QPX instructions">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB         p6, p6x, p7        cmpb
+// DFP          p6, p6x, p7        decimal floating-point instructions
+// POPCNTB      p5 through p7      popcntb and related instructions
+// VSX          p7                 vector-scalar instruction set
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -73,30 +108,46 @@ include "PPCInstrInfo.td"
 
 def : Processor<"generic", G3Itineraries, [Directive32]>;
 def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+                                           FeatureFRES, FeatureFRSQRTE,
                                            FeatureBookE]>;
 def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+                                           FeatureFRES, FeatureFRSQRTE,
                                            FeatureBookE]>;
 def : Processor<"601", G3Itineraries, [Directive601]>;
 def : Processor<"602", G3Itineraries, [Directive602]>;
-def : Processor<"603", G3Itineraries, [Directive603]>;
-def : Processor<"603e", G3Itineraries, [Directive603]>;
-def : Processor<"603ev", G3Itineraries, [Directive603]>;
-def : Processor<"604", G3Itineraries, [Directive604]>;
-def : Processor<"604e", G3Itineraries, [Directive604]>;
-def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"750", G4Itineraries, [Directive750]>;
-def : Processor<"g3", G3Itineraries, [Directive750]>;
-def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"970", G5Itineraries,
+def : Processor<"603", G3Itineraries, [Directive603,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+                                        FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+                                         FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+                                        FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+                                       FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+                                      FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+                                        FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+                                      FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+                                            FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+                                           FeatureFRES, FeatureFRSQRTE]>;
+def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+                   FeatureMFOCRF, FeatureFSqrt,
+                   FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"g5", G5Itineraries,
+def : ProcessorModel<"g5", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+                   FeatureFRES, FeatureFRSQRTE,
                    Feature64Bit /*, Feature64BitRegs */]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
                   [DirectiveE500mc, FeatureMFOCRF,
@@ -104,23 +155,65 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
 def : ProcessorModel<"e5500", PPCE5500Model,
                   [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
                    FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
-                                         FeatureMFOCRF, FeatureFSqrt,
-                                         FeatureSTFIWX, FeatureISEL,
-                                         Feature64Bit
-                                     /*, Feature64BitRegs */]>;
-def : Processor<"pwr6", G5Itineraries,
+def : ProcessorModel<"a2", PPCA2Model,
+                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+                   FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+                   FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */, FeatureQPX]>;
+def : ProcessorModel<"pwr3", G5Model,
+                  [DirectivePwr3, FeatureAltivec,
+                   FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
+                   FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr4", G5Model,
+                  [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+                   FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5", G5Model,
+                  [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5x", G5Model,
+                  [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr6", G5Model,
                   [DirectivePwr6, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"pwr7", G5Itineraries,
+                   FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"pwr6x", G5Model,
+                  [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureFRE, FeatureFRES,
+                   FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+                   FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, Feature64Bit]>;
+def : ProcessorModel<"pwr7", G5Model,
                   [DirectivePwr7, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
-def : Processor<"ppc64", G5Itineraries,
+def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
-                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+                   FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+                   FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
 
 
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 15d690bd8970..96a9f0a39006 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,14 +18,13 @@
 
 #define DEBUG_TYPE "asmprinter"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
-#include "PPCSubtarget.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -33,28 +32,30 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
@@ -72,6 +73,7 @@ namespace {
       return "PowerPC Assembly Printer";
     }
 
+    MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
 
     virtual void EmitInstruction(const MachineInstr *MI);
 
@@ -309,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it.  If not, create one.  Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+
+  MCSymbol *&TOCEntry = TOC[Sym];
+
+  // To avoid name clash check if the name already exists.
+  while (TOCEntry == 0) {
+    if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+                                "C" + Twine(TOCLabelID++)) == 0) {
+      TOCEntry = GetTempSymbol("C", TOCLabelID);
+    }
+  }
+
+  return TOCEntry;
+}
+
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -349,14 +370,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     
     // Emit the 'bl'.
-    TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
-    
-    
-    // FIXME: We would like an efficient form for this, so we don't have to do
-    // a lot of extra uniquing.
-    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
-                                             Create(PICBase, OutContext)));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL)
+      // FIXME: We would like an efficient form for this, so we don't have to do
+      // a lot of extra uniquing.
+      .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
     
     // Emit the label.
     OutStreamer.EmitLabel(PICBase);
@@ -382,14 +399,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MOSymbol = GetCPISymbol(MO.getIndex());
     else if (MO.isJTI())
       MOSymbol = GetJTISymbol(MO.getIndex());
-    MCSymbol *&TOCEntry = TOC[MOSymbol];
-    // To avoid name clash check if the name already exists.
-    while (TOCEntry == 0) {
-      if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
-                                  "C" + Twine(TOCLabelID++)) == 0) {
-        TOCEntry = GetTempSymbol("C", TOCLabelID);
-      }
-    }
+
+    MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
 
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
@@ -399,15 +410,299 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
       
+  case PPC::ADDIStocHA: {
+    // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to ADDIS8.  If the global address is external,
+    // has common linkage, is a function address, or is a jump table
+    // address, then generate a TOC entry and reference that.  Otherwise
+    // reference the symbol directly.
+    TmpInst.setOpcode(PPC::ADDIS8);
+    const MachineOperand &MO = MI->getOperand(2);
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+           "Invalid operand for ADDIStocHA!");
+    MCSymbol *MOSymbol = 0;
+    bool IsExternal = false;
+    bool IsFunction = false;
+    bool IsCommon = false;
+    bool IsAvailExt = false;
+
+    if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      MOSymbol = Mang->getSymbol(RealGValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+      IsExternal = GVar && !GVar->hasInitializer();
+      IsCommon = GVar && RealGValue->hasCommonLinkage();
+      IsFunction = !GVar;
+      IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+    } else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+    else if (MO.isJTI())
+      MOSymbol = GetJTISymbol(MO.getIndex());
+
+    if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+                              OutContext);
+    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::LDtocL: {
+    // Transform %Xd = LDtocL <ga:@sym>, %Xs
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to LD.  If the global address is external, has
+    // common linkage, or is a jump table address, then reference the
+    // associated TOC entry.  Otherwise reference the symbol directly.
+    TmpInst.setOpcode(PPC::LD);
+    const MachineOperand &MO = MI->getOperand(1);
+    assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+           "Invalid operand for LDtocL!");
+    MCSymbol *MOSymbol = 0;
+
+    if (MO.isJTI())
+      MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+    else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+    else if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      MOSymbol = Mang->getSymbol(RealGValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+    
+      if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+          RealGValue->hasAvailableExternallyLinkage())
+        MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDItocL: {
+    // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to ADDI8.  If the global address is external, then
+    // generate a TOC entry and reference that.  Otherwise reference the
+    // symbol directly.
+    TmpInst.setOpcode(PPC::ADDI8);
+    const MachineOperand &MO = MI->getOperand(2);
+    assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+    MCSymbol *MOSymbol = 0;
+    bool IsExternal = false;
+    bool IsFunction = false;
+
+    if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      MOSymbol = Mang->getSymbol(RealGValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+      IsExternal = GVar && !GVar->hasInitializer();
+      IsFunction = !GVar;
+    } else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+
+    if (IsFunction || IsExternal)
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+                              OutContext);
+    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDISgotTprelHA: {
+    // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTprel =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X2)
+                                .addExpr(SymGotTprel));
+    return;
+  }
+  case PPC::LDgotTprelL: {
+    // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to LD.
+    TmpInst.setOpcode(PPC::LD);
+    const MachineOperand &MO = MI->getOperand(1);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDIStlsgdHA: {
+    // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsGD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X2)
+                                .addExpr(SymGotTlsGD));
+    return;
+  }
+  case PPC::ADDItlsgdL: {
+    // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+    // Into:      %Xd = ADDI8 %Xs, sym@got@tlsgd@l
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsGD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(SymGotTlsGD));
+    return;
+  }
+  case PPC::GETtlsADDR: {
+    // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+    // Into:      BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+    StringRef Name = "__tls_get_addr";
+    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+    const MCSymbolRefExpr *TlsRef = 
+      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymVar =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
+                                .addExpr(TlsRef)
+                                .addExpr(SymVar));
+    return;
+  }
+  case PPC::ADDIStlsldHA: {
+    // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsLD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X2)
+                                .addExpr(SymGotTlsLD));
+    return;
+  }
+  case PPC::ADDItlsldL: {
+    // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+    // Into:      %Xd = ADDI8 %Xs, sym@got@tlsld@l
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymGotTlsLD =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(SymGotTlsLD));
+    return;
+  }
+  case PPC::GETtlsldADDR: {
+    // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+    // Into:      BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+
+    StringRef Name = "__tls_get_addr";
+    MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+    const MCSymbolRefExpr *TlsRef = 
+      MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymVar =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
+                                .addExpr(TlsRef)
+                                .addExpr(SymVar));
+    return;
+  }
+  case PPC::ADDISdtprelHA: {
+    // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+    // Into:      %Xd = ADDIS8 %X3, sym@dtprel@ha
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymDtprel =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(PPC::X3)
+                                .addExpr(SymDtprel));
+    return;
+  }
+  case PPC::ADDIdtprelL: {
+    // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+    // Into:      %Xd = ADDI8 %Xs, sym@dtprel@l
+    assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+    const MachineOperand &MO = MI->getOperand(2);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *SymDtprel =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
+                                .addReg(MI->getOperand(0).getReg())
+                                .addReg(MI->getOperand(1).getReg())
+                                .addExpr(SymDtprel));
+    return;
+  }
   case PPC::MFCRpseud:
   case PPC::MFCR8pseud:
     // Transform: %R3 = MFCRpseud %CR7
     // Into:      %R3 = MFCR      ;; cr7
     OutStreamer.AddComment(PPCInstPrinter::
                            getRegisterName(MI->getOperand(1).getReg()));
-    TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
+      .addReg(MI->getOperand(0).getReg()));
     return;
   case PPC::SYNC:
     // In Book E sync is called msync, handle this special case here...
@@ -438,14 +733,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
   // entry point.
   OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
-                        8/*size*/, 0/*addrspace*/);
+			8 /*size*/);
   MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
   // Generates a R_PPC64_TOC relocation for TOC base insertion.
   OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
                         MCSymbolRefExpr::VK_PPC_TOC, OutContext),
-                        8/*size*/, 0/*addrspace*/);
+                        8/*size*/);
   // Emit a null environment pointer.
-  OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
+  OutStreamer.EmitIntValue(0, 8 /* size */);
   OutStreamer.SwitchSection(Current);
 
   MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -474,6 +769,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
     }
   }
 
+  MachineModuleInfoELF &MMIELF =
+    MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+  if (!Stubs.empty()) {
+    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .long _foo
+      OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+                                                    OutContext),
+                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
   return AsmPrinter::doFinalization(M);
 }
 
@@ -508,7 +822,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
     "ppcA2",
     "ppce500mc",
     "ppce5500",
+    "power3",
+    "power4",
+    "power5",
+    "power5x",
     "power6",
+    "power6x",
     "power7",
     "ppc64"
   };
@@ -523,8 +842,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
   assert(Directive <= PPC::DIR_64 && "Directive out of range.");
   
   // FIXME: This is a total hack, finish mc'izing the PPC backend.
-  if (OutStreamer.hasRawTextSupport())
+  if (OutStreamer.hasRawTextSupport()) {
+    assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+           "CPUDirectives[] might not be up-to-date!");
     OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+  }
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
@@ -549,16 +871,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
 static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
   // Remove $stub suffix, add $lazy_ptr.
-  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
-  TmpStr += "$lazy_ptr";
-  return Ctx.GetOrCreateSymbol(TmpStr.str());
+  StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+  return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
 }
 
 static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
   // Add $tmp suffix to $stub, yielding $stub$tmp.
-  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
-  TmpStr += "$tmp";
-  return Ctx.GetOrCreateSymbol(TmpStr.str());
+  return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
 }
 
 void PPCDarwinAsmPrinter::
@@ -589,32 +908,51 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
                                            
       OutStreamer.EmitLabel(Stub);
       OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-      // FIXME: MCize this.
-      OutStreamer.EmitRawText(StringRef("\tmflr r0"));
-      OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+
+      const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+
+      // mflr r0
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+      // bcl 20, 31, AnonSymbol
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon));
       OutStreamer.EmitLabel(AnonSymbol);
-      OutStreamer.EmitRawText(StringRef("\tmflr r11"));
-      OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
-                              "-" + AnonSymbol->getName() + ")");
-      OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
-      
-      if (isPPC64)
-        OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
-                                "-" + AnonSymbol->getName() + ")(r11)");
-      else
-        OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
-                                "-" + AnonSymbol->getName() + ")(r11)");
-      OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
-      OutStreamer.EmitRawText(StringRef("\tbctr"));
-      
+      // mflr r11
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+      // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
+                                Anon, OutContext);
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+        .addReg(PPC::R11)
+        .addReg(PPC::R11)
+        .addExpr(Sub));
+      // mtlr r0
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+      // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+      // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+      OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+        .addReg(PPC::R12)
+        .addExpr(Sub).addExpr(Sub)
+        .addReg(PPC::R11));
+      // mtctr r12
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+      // bctr
+      OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
       OutStreamer.SwitchSection(LSPSection);
       OutStreamer.EmitLabel(LazyPtr);
       OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-      
-      if (isPPC64)
-        OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
-      else
-        OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+      MCSymbol *DyldStubBindingHelper =
+        OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+      if (isPPC64) {
+        // .quad dyld_stub_binding_helper
+        OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+      } else {
+        // .long dyld_stub_binding_helper
+        OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+      }
     }
     OutStreamer.AddBlankLine();
     return;
@@ -634,23 +972,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
     EmitAlignment(4);
     OutStreamer.EmitLabel(Stub);
     OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-    OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
-    if (isPPC64)
-      OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
-                              ")(r11)");
-    else
-      OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
-                              ")(r11)");
-    OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
-    OutStreamer.EmitRawText(StringRef("\tbctr"));
+    // lis r11, ha16(LazyPtr)
+    const MCExpr *LazyPtrHa16 =
+      MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
+                              OutContext);
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+      .addReg(PPC::R11)
+      .addExpr(LazyPtrHa16));
+
+    const MCExpr *LazyPtrLo16 =
+      MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
+                              OutContext);
+    // ldu r12, lo16(LazyPtr)(r11)
+    // lwzu r12, lo16(LazyPtr)(r11)
+    OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+      .addReg(PPC::R12)
+      .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+      .addReg(PPC::R11));
+
+    // mtctr r12
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+    // bctr
+    OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
     OutStreamer.SwitchSection(LSPSection);
     OutStreamer.EmitLabel(LazyPtr);
     OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-    
-    if (isPPC64)
-      OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
-    else
-      OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+    MCSymbol *DyldStubBindingHelper =
+      OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+    if (isPPC64) {
+      // .quad dyld_stub_binding_helper
+      OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+    } else {
+      // .long dyld_stub_binding_helper
+      OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+    }
   }
   
   OutStreamer.AddBlankLine();
@@ -703,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 
       if (MCSym.getInt())
         // External to current translation unit.
-        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
       else
         // Internal to current translation unit.
         //
@@ -713,7 +1070,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
         // fill in the value for the NLP in those cases.
         OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
                                                       OutContext),
-                              isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+                              isPPC64 ? 8 : 4/*size*/);
     }
 
     Stubs.clear();
@@ -732,7 +1089,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
       OutStreamer.EmitValue(MCSymbolRefExpr::
                             Create(Stubs[i].second.getPointer(),
                                    OutContext),
-                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+                            isPPC64 ? 8 : 4/*size*/);
     }
 
     Stubs.clear();
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 21a0fb200f20..bd1c37868110 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -17,21 +17,27 @@
 
 #define DEBUG_TYPE "ppc-branch-select"
 #include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
 
+namespace llvm {
+  void initializePPCBSelPass(PassRegistry&);
+}
+
 namespace {
   struct PPCBSel : public MachineFunctionPass {
     static char ID;
-    PPCBSel() : MachineFunctionPass(ID) {}
+    PPCBSel() : MachineFunctionPass(ID) {
+      initializePPCBSelPass(*PassRegistry::getPassRegistry());
+    }
 
     /// BlockSizes - The sizes of the basic blocks in the function.
     std::vector<unsigned> BlockSizes;
@@ -45,6 +51,9 @@ namespace {
   char PPCBSel::ID = 0;
 }
 
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+                false, false)
+
 /// createPPCBranchSelectionPass - returns an instance of the Branch Selection
 /// Pass
 ///
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2a2abb171fb1..81a54d7015b0 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -31,20 +31,20 @@
 
 #define DEBUG_TYPE "ctrloops"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -54,6 +54,10 @@ using namespace llvm;
 
 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
 
+namespace llvm {
+  void initializePPCCTRLoopsPass(PassRegistry&);
+}
+
 namespace {
   class CountValue;
   struct PPCCTRLoops : public MachineFunctionPass {
@@ -64,7 +68,9 @@ namespace {
   public:
     static char ID;   // Pass identification, replacement for typeid
 
-    PPCCTRLoops() : MachineFunctionPass(ID) {}
+    PPCCTRLoops() : MachineFunctionPass(ID) {
+      initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -174,15 +180,32 @@ namespace {
   };
 } // end anonymous namespace
 
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+                    false, false)
 
 /// isCompareEquals - Returns true if the instruction is a compare equals
 /// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
-  if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+                               bool &Int64Cmp) {
+  if (MI->getOpcode() == PPC::CMPWI) {
     SignedCmp = true;
+    Int64Cmp = false;
+    return true;
+  } else if (MI->getOpcode() == PPC::CMPDI) {
+    SignedCmp = true;
+    Int64Cmp = true;
+    return true;
+  } else if (MI->getOpcode() == PPC::CMPLWI) {
+    SignedCmp = false;
+    Int64Cmp = false;
     return true;
-  } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+  } else if (MI->getOpcode() == PPC::CMPLDI) {
     SignedCmp = false;
+    Int64Cmp = true;
     return true;
   }
 
@@ -341,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
          RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
          RI != RE; ++RI) {
       IV_Opnd = &RI.getOperand();
-      bool SignedCmp;
+      bool SignedCmp, Int64Cmp;
       MachineInstr *MI = IV_Opnd->getParent();
-      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+      if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
           MI->getOperand(0).getReg() == PredReg) {
 
         OldInsts.push_back(MI);
@@ -368,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
         assert(InitialValue->isReg() && "Expecting register for init value");
         unsigned InitialValueReg = InitialValue->getReg();
   
-        const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+        MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
   
         // Here we need to look for an immediate load (an li or lis/ori pair).
         if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
                          DefInstr->getOpcode() == PPC::ORI)) {
-          int64_t start = (short) DefInstr->getOperand(2).getImm();
-          const MachineInstr *DefInstr2 =
-            MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+          int64_t start = DefInstr->getOperand(2).getImm();
+          MachineInstr *DefInstr2 =
+            MRI->getVRegDef(DefInstr->getOperand(1).getReg());
           if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
                             DefInstr2->getOpcode() == PPC::LIS)) {
             DEBUG(dbgs() << "  initial constant: " << *DefInstr);
@@ -387,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
             if ((count % iv_value) != 0) {
               return 0;
             }
-            return new CountValue(count/iv_value);
+
+            OldInsts.push_back(DefInstr);
+            OldInsts.push_back(DefInstr2);
+
+            // count/iv_value, the trip count, should be positive here. If it
+            // is negative, that indicates that the counter will wrap.
+            if (Int64Cmp)
+              return new CountValue(count/iv_value);
+            else
+              return new CountValue(uint32_t(count/iv_value));
           }
         } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
                                 DefInstr->getOpcode() == PPC::LI)) {
           DEBUG(dbgs() << "  initial constant: " << *DefInstr);
 
-          int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+          int64_t count = ImmVal -
+            int64_t(short(DefInstr->getOperand(1).getImm()));
           if ((count % iv_value) != 0) {
             return 0;
           }
-          return new CountValue(count/iv_value);
+
+          OldInsts.push_back(DefInstr);
+
+          if (Int64Cmp)
+            return new CountValue(count/iv_value);
+          else
+            return new CountValue(uint32_t(count/iv_value));
         } else if (iv_value == 1 || iv_value == -1) {
           // We can't determine a constant starting value.
           if (ImmVal == 0) {
@@ -405,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
           }
           // FIXME: handle non-zero end value.
         }
-        // FIXME: handle non-unit increments (we might not want to introduce division
-        // but we can handle some 2^n cases with shifts).
+        // FIXME: handle non-unit increments (we might not want to introduce
+        // division but we can handle some 2^n cases with shifts).
   
       }
     }
@@ -477,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
     if (MO.isReg() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (!MRI->use_nodbg_empty(Reg)) {
-        // This instruction has users, but if the only user is the phi node for the
-        // parent block, and the only use of that phi node is this instruction, then
-        // this instruction is dead: both it (and the phi node) can be removed.
+        // This instruction has users, but if the only user is the phi node for
+        // the parent block, and the only use of that phi node is this
+        // instruction, then this instruction is dead: both it (and the phi
+        // node) can be removed.
         MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
         if (llvm::next(I) == MRI->use_end() &&
             I.getOperand().getParent()->isPHI()) {
@@ -582,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
     DEBUG(dbgs() << "failed to get trip count!\n");
     return false;
   }
+
+  if (TripCount->isImm()) {
+    DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+    // FIXME: We currently can't form 64-bit constants
+    // (including 32-bit unsigned constants)
+    if (!isInt<32>(TripCount->getImm()))
+      return false;
+  }
+
   // Does the loop contain any invalid instructions?
   if (containsInvalidInstruction(L)) {
     return false;
@@ -635,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
     const TargetRegisterClass *SrcRC =
       MF->getRegInfo().getRegClass(TripCount->getReg());
     CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+    unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
                         (unsigned) PPC::EXTSW_32_64 :
                         (unsigned) TargetOpcode::COPY;
     BuildMI(*Preheader, InsertPos, dl,
@@ -652,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
     // Put the trip count in a register for transfer into the count register.
 
     int64_t CountImm = TripCount->getImm();
-    assert(!TripCount->isNeg() && "Constant trip count must be positive");
+    if (TripCount->isNeg())
+      CountImm = -CountImm;
 
     CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    if (CountImm > 0xFFFF) {
+    if (abs64(CountImm) > 0x7FFF) {
       BuildMI(*Preheader, InsertPos, dl,
               TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
-              CountReg).addImm(CountImm >> 16);
+              CountReg).addImm((CountImm >> 16) & 0xFFFF);
       unsigned CountReg1 = CountReg;
       CountReg = MF->getRegInfo().createVirtualRegister(RC);
       BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 3f87e883b1e4..c8a29a3d2cfe 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[
 
   CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
   CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+  CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
   
-  CCIfType<[f32], CCAssignToReg<[F1]>>,
-  CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+  CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+  CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
   
   // Vector types are always returned in V2.
   CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
@@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[
 
 
 //===----------------------------------------------------------------------===//
-// PowerPC Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-/*
-def CC_PPC : CallingConv<[
-  // The first 8 integer arguments are passed in integer registers.
-  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
-  CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
-  
-  // Common sub-targets passes FP values in F1 - F13
-  CCIfType<[f32, f64], 
-           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
-           
-  // The first 12 Vector arguments are passed in altivec registers.
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-              CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
-
-/*
-  // Integer/FP values get stored in stack slots that are 8 bytes in size and
-  // 8-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-  
-  // Vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-              CCAssignToStack<16, 16>>*/
-]>;
-
-*/
-
-//===----------------------------------------------------------------------===//
-// PowerPC System V Release 4 ABI
+// PowerPC System V Release 4 32-bit ABI
 //===----------------------------------------------------------------------===//
 
-def CC_PPC_SVR4_Common : CallingConv<[
+def CC_PPC32_SVR4_Common : CallingConv<[
   // The ABI requires i64 to be passed in two adjacent registers with the first
   // register having an odd register number.
-  CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+  CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
 
   // The first 8 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
 
   // Make sure the i64 words from a long double are either both passed in
   // registers or both passed on the stack.
-  CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+  CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
   
   // FP values are passed in F1 - F8.
   CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
@@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[
 // This calling convention puts vector arguments always on the stack. It is used
 // to assign vector arguments which belong to the variable portion of the
 // parameter list of a variable argument function.
-def CC_PPC_SVR4_VarArg : CallingConv<[
-  CCDelegateTo<CC_PPC_SVR4_Common>
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+  CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;
 
-// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
-// vector arguments in vector registers before putting them on the stack.
-def CC_PPC_SVR4 : CallingConv<[
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
   // The first 12 Vector arguments are passed in AltiVec registers.
   CCIfType<[v16i8, v8i16, v4i32, v4f32],
            CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
            
-  CCDelegateTo<CC_PPC_SVR4_Common>
+  CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;  
 
 // Helper "calling convention" to handle aggregate by value arguments.
@@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[
 // Still, the address of the aggregate copy in the callers stack frame is passed
 // in a GPR (or in the parameter list area if all GPRs are allocated) from the
 // caller to the callee. The location for the address argument is assigned by
-// the CC_PPC_SVR4 calling convention.
+// the CC_PPC32_SVR4 calling convention.
 //
-// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
 // not passed by value.
  
-def CC_PPC_SVR4_ByVal : CallingConv<[
+def CC_PPC32_SVR4_ByVal : CallingConv<[
   CCIfByVal<CCPassByVal<4, 4>>,
   
-  CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+  CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
 ]>;
 
 def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
@@ -164,3 +136,9 @@ def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4,
                                         V20, V21, V22, V23, V24, V25, V26, V27,
                                         V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
+def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 252a2d159ec3..64787185138b 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -12,15 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCTargetMachine.h"
-#include "PPCRelocations.h"
 #include "PPC.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
@@ -68,6 +68,7 @@ namespace {
     unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
     const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
 
@@ -141,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
   assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
             MI.getOpcode() == PPC::MFOCRF) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
-  return 0x80 >> getPPCRegisterNumbering(MO.getReg());
+  return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
 }
 
 MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, 
@@ -243,6 +244,13 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
 }
 
 
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  llvm_unreachable("TLS not supported on the old JIT.");
+  return 0;
+}
+
+
 unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const {
 
@@ -252,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
     assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
              MI.getOpcode() != PPC::MFOCRF) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return getPPCRegisterNumbering(MO.getReg());
+    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
   }
   
   assert(MO.isImm() &&
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index caf7bf2be793..3244b904ee64 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCFrameLowering.h"
-#include "PPCInstrInfo.h"
 #include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
@@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
 // transform this into the appropriate ORI instruction.
 static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   MachineFunction *MF = MI->getParent()->getParent();
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
   DebugLoc dl = MI->getDebugLoc();
 
   unsigned UsedRegMask = 0;
@@ -115,16 +116,25 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   for (MachineRegisterInfo::livein_iterator
        I = MF->getRegInfo().livein_begin(),
        E = MF->getRegInfo().livein_end(); I != E; ++I) {
-    unsigned RegNo = getPPCRegisterNumbering(I->first);
+    unsigned RegNo = TRI->getEncodingValue(I->first);
     if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
   }
-  for (MachineRegisterInfo::liveout_iterator
-       I = MF->getRegInfo().liveout_begin(),
-       E = MF->getRegInfo().liveout_end(); I != E; ++I) {
-    unsigned RegNo = getPPCRegisterNumbering(*I);
-    if (VRRegNo[RegNo] == *I)              // If this really is a vector reg.
-      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+
+  // Live out registers appear as use operands on return instructions.
+  for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+       UsedRegMask != 0 && BI != BE; ++BI) {
+    const MachineBasicBlock &MBB = *BI;
+    if (MBB.empty() || !MBB.back().isReturn())
+      continue;
+    const MachineInstr &Ret = MBB.back();
+    for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+      const MachineOperand &MO = Ret.getOperand(I);
+      if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+        continue;
+      unsigned RegNo = TRI->getEncodingValue(MO.getReg());
+      UsedRegMask &= ~(1 << (31-RegNo));
+    }
   }
 
   // If no registers are used, turn this into a copy.
@@ -179,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) {
   return FuncInfo->isCRSpilled();
 }
 
+static bool spillsVRSAVE(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->hasNonRISpills();
+}
+
 /// determineFrameLayout - Determine the size of the frame and maximum call
 /// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+                                                bool UpdateMF,
+                                                bool UseEstimate) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // Get the number of bytes to allocate from the FrameInfo
-  unsigned FrameSize = MFI->getStackSize();
+  unsigned FrameSize =
+    UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
 
   // Get the alignments provided by the target, and the maximum alignment
   // (if any) of the fixed frame objects.
@@ -198,13 +226,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
   // to adjust the stack pointer (we fit in the Red Zone).  For 64-bit
   // SVR4, we also require a stack frame if we need to spill the CR,
   // since this spill area is addressed relative to the stack pointer.
-  bool DisableRedZone = MF.getFunction()->getFnAttributes().
-    hasAttribute(Attributes::NoRedZone);
-  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.  However, it can
-  // still generate stackless code if all local vars are reg-allocated.
-  // Try: (FrameSize <= 224
-  //       || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
+  // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+  // stackless code if all local vars are reg-allocated.
+  bool DisableRedZone = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
   if (!DisableRedZone &&
+      (Subtarget.isPPC64() ||                      // 32-bit SVR4, no stack-
+       !Subtarget.isSVR4ABI() ||                   //   allocated locals.
+	FrameSize == 0) &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
@@ -213,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
 	&& spillsCR(MF)) &&
       (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
     // No need for frame
-    MFI->setStackSize(0);
-    return;
+    if (UpdateMF)
+      MFI->setStackSize(0);
+    return 0;
   }
 
   // Get the maximum call frame size of all the calls.
@@ -231,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
 
   // Update maximum call frame size.
-  MFI->setMaxCallFrameSize(maxCallFrameSize);
+  if (UpdateMF)
+    MFI->setMaxCallFrameSize(maxCallFrameSize);
 
   // Include call frame size in total.
   FrameSize += maxCallFrameSize;
@@ -240,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
 
   // Update frame info.
-  MFI->setStackSize(FrameSize);
+  if (UpdateMF)
+    MFI->setStackSize(FrameSize);
+
+  return FrameSize;
 }
 
 // hasFP - Return true if the specified function actually has a dedicated frame
@@ -261,7 +295,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
 
   // Naked functions have no stack frame pushed, so we don't have a frame
   // pointer.
-  if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+  if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                     Attribute::Naked))
     return false;
 
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -270,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+  bool is31 = needsFP(MF);
+  unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
+  unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI)
+    for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+      --MBBI;
+      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+        MachineOperand &MO = MBBI->getOperand(I);
+        if (!MO.isReg())
+          continue;
+
+        switch (MO.getReg()) {
+        case PPC::FP:
+          MO.setReg(FPReg);
+          break;
+        case PPC::FP8:
+          MO.setReg(FP8Reg);
+          break;
+        }
+      }
+    }
+}
 
 void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
@@ -300,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   MBBI = MBB.begin();
 
   // Work out frame sizes.
-  // FIXME: determineFrameLayout() may change the frame size. This should be
-  // moved upper, to some hook.
-  determineFrameLayout(MF);
-  unsigned FrameSize = MFI->getStackSize();
-
+  unsigned FrameSize = determineFrameLayout(MF);
   int NegFrameSize = -FrameSize;
 
+  if (MFI->isFrameAddressTaken())
+    replaceFPWithRealFP(MF);
+
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
   // Get operating system
@@ -769,14 +828,15 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
 
 void
 PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                   RegScavenger *RS) const {
+                                                   RegScavenger *) const {
   const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
 
   //  Save and clear the LR state.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   unsigned LR = RegInfo->getRARegister();
   FI->setMustSaveLR(MustSaveLR(MF, LR));
-  MF.getRegInfo().setPhysRegUnused(LR);
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.setPhysRegUnused(LR);
 
   //  Save R31 if necessary
   int FPSI = FI->getFramePointerSaveIndex();
@@ -801,29 +861,24 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
   }
 
-  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
-  // a large stack, which will require scavenging a register to materialize a
-  // large offset.
-  // FIXME: this doesn't actually check stack size, so is a bit pessimistic
-  // FIXME: doesn't detect whether or not we need to spill vXX, which requires
-  //        r0 for now.
-
-  if (RegInfo->requiresRegisterScavenging(MF))
-    if (needsFP(MF) || spillsCR(MF)) {
-      const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-      const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-      const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
-      RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                         RC->getAlignment(),
-                                                         false));
-    }
+  // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 
+  // function uses CR 2, 3, or 4.
+  if (!isPPC64 && !isDarwinABI && 
+      (MRI.isPhysRegUsed(PPC::CR2) ||
+       MRI.isPhysRegUsed(PPC::CR3) ||
+       MRI.isPhysRegUsed(PPC::CR4))) {
+    int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+    FI->setCRSpillFrameIndex(FrameIdx);
+  }
 }
 
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
-                                                                        const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
   // Early exit if not using the SVR4 ABI.
-  if (!Subtarget.isSVR4ABI())
+  if (!Subtarget.isSVR4ABI()) {
+    addScavengingSpillSlot(MF, RS);
     return;
+  }
 
   // Get callee saved register information.
   MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -831,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
 
   // Early exit if no callee saved registers are modified!
   if (CSI.empty() && !needsFP(MF)) {
+    addScavengingSpillSlot(MF, RS);
     return;
   }
 
@@ -895,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
   }
 
   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
 
   int64_t LowerBound = 0;
 
@@ -914,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
     }
 
-    LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8;
+    LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
   }
 
   // Check whether the frame pointer register is allocated. If so, make sure it
@@ -948,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
     }
 
     unsigned MinReg =
-      std::min<unsigned>(getPPCRegisterNumbering(MinGPR),
-                         getPPCRegisterNumbering(MinG8R));
+      std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+                         TRI->getEncodingValue(MinG8R));
 
     if (Subtarget.isPPC64()) {
       LowerBound -= (31 - MinReg + 1) * 8;
@@ -1009,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
       FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
     }
   }
+
+  addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+                                         RegScavenger *RS) const {
+  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+  // a large stack, which will require scavenging a register to materialize a
+  // large offset.
+
+  // We need to have a scavenger spill slot for spills if the frame size is
+  // large. In case there is no free register for large-offset addressing,
+  // this slot is used for the necessary emergency spill. Also, we need the
+  // slot for dynamic stack allocations.
+
+  // The scavenger might be invoked if the frame offset does not fit into
+  // the 16-bit immediate. We don't know the complete frame size here
+  // because we've not yet computed callee-saved register spills or the
+  // needed alignment padding.
+  unsigned StackSize = determineFrameLayout(MF, false, true);
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+      hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+    const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+    const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+
+    // These kinds of spills might need two registers.
+    if (spillsCR(MF) || spillsVRSAVE(MF))
+      RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                         RC->getAlignment(),
+                                                         false));
+
+  }
 }
 
 bool 
@@ -1046,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       // save slot via GPR12 (available in the prolog for 32- and 64-bit).
       if (Subtarget.isPPC64()) {
 	// 64-bit:  SP+8
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12));
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW))
+	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
+	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
 			       .addReg(PPC::X12,
 				       getKillRegState(true))
 			       .addImm(8)
@@ -1087,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
 
   if (isPPC64) {
     // 64-bit:  SP+8
-    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12)
+    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
 	       .addImm(8)
 	       .addReg(PPC::X1));
     RestoreOp = PPC::MTCRF8;
@@ -1103,15 +1198,56 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
   
   if (CR2Spilled)
     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
-	       .addReg(MoveReg));
+               .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
 
   if (CR3Spilled)
     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
-	       .addReg(MoveReg));
+               .addReg(MoveReg, getKillRegState(!CR4Spilled)));
 
   if (CR4Spilled)
     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
-	       .addReg(MoveReg));
+               .addReg(MoveReg, getKillRegState(true)));
+}
+
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      I->getOpcode() == PPC::ADJCALLSTACKUP) {
+    // Add (actually subtract) back the amount the callee popped on return.
+    if (int CalleeAmt =  I->getOperand(1).getImm()) {
+      bool is64Bit = Subtarget.isPPC64();
+      CalleeAmt *= -1;
+      unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+      unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+      unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+      unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+      unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+      unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+      MachineInstr *MI = I;
+      DebugLoc dl = MI->getDebugLoc();
+
+      if (isInt<16>(CalleeAmt)) {
+        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+          .addReg(StackReg, RegState::Kill)
+          .addImm(CalleeAmt);
+      } else {
+        MachineBasicBlock::iterator MBBI = I;
+        BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+          .addImm(CalleeAmt >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+          .addReg(TmpReg, RegState::Kill)
+          .addImm(CalleeAmt & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+          .addReg(StackReg, RegState::Kill)
+          .addReg(TmpReg);
+      }
+    }
+  }
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
 }
 
 bool 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 4d957b91c7bb..6f5f9368c6c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,9 +15,9 @@
 
 #include "PPC.h"
 #include "PPCSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
 
 namespace llvm {
   class PPCSubtarget;
@@ -27,11 +27,14 @@ class PPCFrameLowering: public TargetFrameLowering {
 
 public:
   PPCFrameLowering(const PPCSubtarget &sti)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+        (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
       Subtarget(sti) {
   }
 
-  void determineFrameLayout(MachineFunction &MF) const;
+  unsigned determineFrameLayout(MachineFunction &MF,
+                                bool UpdateMF = true,
+                                bool UseEstimate = false) const;
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
@@ -40,16 +43,23 @@ public:
 
   bool hasFP(const MachineFunction &MF) const;
   bool needsFP(const MachineFunction &MF) const;
+  void replaceFPWithRealFP(MachineFunction &MF) const;
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                       RegScavenger *RS = NULL) const;
+  void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
 
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
                                  const TargetRegisterInfo *TRI) const;
 
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MI,
                                    const std::vector<CalleeSavedInfo> &CSI,
@@ -139,6 +149,9 @@ public:
       return 0;
     }
 
+    // Note that the offsets here overlap, but this is fixed up in
+    // processFunctionBeforeFrameFinalized.
+
     static const SpillSlot Offsets[] = {
       // Floating-point register save area offsets.
       {PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 6ed1fb9e6a3c..4bf1e3396429 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) {
   }
 
   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
-  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+  if (HasCTRSet && Opcode == PPC::BCTRL)
     return NoopHazard;
 
   // If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 254fea67fc4e..95efc11b53c1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,24 +14,30 @@
 
 #define DEBUG_TYPE "ppc-codegen"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
+namespace llvm {
+  void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
 namespace {
   //===--------------------------------------------------------------------===//
   /// PPCDAGToDAGISel - PPC specific code to select PPC machine
@@ -46,7 +52,9 @@ namespace {
     explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
       : SelectionDAGISel(tm), TM(tm),
         PPCLowering(*TM.getTargetLowering()),
-        PPCSubTarget(*TM.getSubtargetImpl()) {}
+        PPCSubTarget(*TM.getSubtargetImpl()) {
+      initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       // Make sure we re-emit a set of the global base reg if necessary
@@ -59,6 +67,8 @@ namespace {
       return true;
     }
 
+    virtual void PostprocessISelDAG();
+
     /// getI32Imm - Return a target constant with the specified value, of type
     /// i32.
     inline SDValue getI32Imm(unsigned Imm) {
@@ -110,10 +120,10 @@ namespace {
     }
 
     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
-    /// immediate field.  Because preinc imms have already been validated, just
-    /// accept it.
+    /// immediate field.  Note that the operand at this point is already the
+    /// result of a prior SelectAddressRegImm call.
     bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
-      if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+      if (N.getOpcode() == ISD::TargetConstant ||
           N.getOpcode() == ISD::TargetGlobalAddress) {
         Out = N;
         return true;
@@ -122,18 +132,6 @@ namespace {
       return false;
     }
 
-    /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
-    /// index field.  Because preinc imms have already been validated, just
-    /// accept it.
-    bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
-      if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
-          N.getOpcode() == ISD::TargetGlobalAddress)
-        return false;
-
-      Out = N;
-      return true;
-    }
-
     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
     /// represented as an indexed [r+r] operation.  Returns false if it can
     /// be represented by [r+imm], which are preferred.
@@ -154,6 +152,12 @@ namespace {
       return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
     }
 
+    // Select an address into a single register.
+    bool SelectAddr(SDValue N, SDValue &Base) {
+      Base = N;
+      return true;
+    }
+
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.  It is always correct to compute the value into
     /// a register.  The case of adding a (possibly relocatable) constant to a
@@ -1040,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       break;
 
     SDValue Offset = LD->getOffset();
-    if (isa<ConstantSDNode>(Offset) ||
+    if (Offset.getOpcode() == ISD::TargetConstant ||
         Offset.getOpcode() == ISD::TargetGlobalAddress) {
 
       unsigned Opcode;
@@ -1107,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
 
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
-      SDValue Ops[] = { Offset, Base, Chain };
+      SDValue Ops[] = { Base, Offset, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
                                     MVT::Other, Ops, 3);
@@ -1268,11 +1272,277 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                            Chain), 0);
     return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   }
+  case PPCISD::TOC_ENTRY: {
+    assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+
+    // For medium and large code model, we generate two instructions as
+    // described below.  Otherwise we allow SelectCodeCommon to handle this,
+    // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+    CodeModel::Model CModel = TM.getCodeModel();
+    if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+      break;
+
+    // The first source operand is a TargetGlobalAddress or a
+    // TargetJumpTable.  If it is an externally defined symbol, a symbol
+    // with common linkage, a function address, or a jump table address,
+    // or if we are generating code for large code model, we generate:
+    //   LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+    // Otherwise we generate:
+    //   ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+    SDValue GA = N->getOperand(0);
+    SDValue TOCbase = N->getOperand(1);
+    SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+                                        TOCbase, GA);
+
+    if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+      return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+                                    SDValue(Tmp, 0));
+
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+      const GlobalValue *GValue = G->getGlobal();
+      const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
+      const GlobalValue *RealGValue = GAlias ?
+        GAlias->resolveAliasedGlobal(false) : GValue;
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
+      assert((GVar || isa<Function>(RealGValue)) &&
+             "Unexpected global value subclass!");
+
+      // An external variable is one without an initializer.  For these,
+      // for variables with common linkage, and for Functions, generate
+      // the LDtocL form.
+      if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
+          RealGValue->hasAvailableExternallyLinkage())
+        return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+                                      SDValue(Tmp, 0));
+    }
+
+    return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+                                  SDValue(Tmp, 0), GA);
+  }
+  case PPCISD::VADD_SPLAT: {
+    // This expands into one of three sequences, depending on whether
+    // the first operand is odd or even, positive or negative.
+    assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+           isa<ConstantSDNode>(N->getOperand(1)) &&
+           "Invalid operand on VADD_SPLAT!");
+
+    int Elt     = N->getConstantOperandVal(0);
+    int EltSize = N->getConstantOperandVal(1);
+    unsigned Opc1, Opc2, Opc3;
+    EVT VT;
+
+    if (EltSize == 1) {
+      Opc1 = PPC::VSPLTISB;
+      Opc2 = PPC::VADDUBM;
+      Opc3 = PPC::VSUBUBM;
+      VT = MVT::v16i8;
+    } else if (EltSize == 2) {
+      Opc1 = PPC::VSPLTISH;
+      Opc2 = PPC::VADDUHM;
+      Opc3 = PPC::VSUBUHM;
+      VT = MVT::v8i16;
+    } else {
+      assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+      Opc1 = PPC::VSPLTISW;
+      Opc2 = PPC::VADDUWM;
+      Opc3 = PPC::VSUBUWM;
+      VT = MVT::v4i32;
+    }
+
+    if ((Elt & 1) == 0) {
+      // Elt is even, in the range [-32,-18] + [16,30].
+      //
+      // Convert: VADD_SPLAT elt, size
+      // Into:    tmp = VSPLTIS[BHW] elt
+      //          VADDU[BHW]M tmp, tmp
+      // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
+      SDValue EltVal = getI32Imm(Elt >> 1);
+      SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      SDValue TmpVal = SDValue(Tmp, 0);
+      return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+    } else if (Elt > 0) {
+      // Elt is odd and positive, in the range [17,31].
+      //
+      // Convert: VADD_SPLAT elt, size
+      // Into:    tmp1 = VSPLTIS[BHW] elt-16
+      //          tmp2 = VSPLTIS[BHW] -16
+      //          VSUBU[BHW]M tmp1, tmp2
+      SDValue EltVal = getI32Imm(Elt - 16);
+      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      EltVal = getI32Imm(-16);
+      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+                                    SDValue(Tmp2, 0));
+
+    } else {
+      // Elt is odd and negative, in the range [-31,-17].
+      //
+      // Convert: VADD_SPLAT elt, size
+      // Into:    tmp1 = VSPLTIS[BHW] elt+16
+      //          tmp2 = VSPLTIS[BHW] -16
+      //          VADDU[BHW]M tmp1, tmp2
+      SDValue EltVal = getI32Imm(Elt + 16);
+      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      EltVal = getI32Imm(-16);
+      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+                                    SDValue(Tmp2, 0));
+    }
+  }
   }
 
   return SelectCode(N);
 }
 
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+  // Skip peepholes at -O0.
+  if (TM.getOptLevel() == CodeGenOpt::None)
+    return;
+
+  // These optimizations are currently supported only for 64-bit SVR4.
+  if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+    return;
+
+  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+  ++Position;
+
+  while (Position != CurDAG->allnodes_begin()) {
+    SDNode *N = --Position;
+    // Skip dead nodes and any non-machine opcodes.
+    if (N->use_empty() || !N->isMachineOpcode())
+      continue;
+
+    unsigned FirstOp;
+    unsigned StorageOpcode = N->getMachineOpcode();
+
+    switch (StorageOpcode) {
+    default: continue;
+
+    case PPC::LBZ:
+    case PPC::LBZ8:
+    case PPC::LD:
+    case PPC::LFD:
+    case PPC::LFS:
+    case PPC::LHA:
+    case PPC::LHA8:
+    case PPC::LHZ:
+    case PPC::LHZ8:
+    case PPC::LWA:
+    case PPC::LWZ:
+    case PPC::LWZ8:
+      FirstOp = 0;
+      break;
+
+    case PPC::STB:
+    case PPC::STB8:
+    case PPC::STD:
+    case PPC::STFD:
+    case PPC::STFS:
+    case PPC::STH:
+    case PPC::STH8:
+    case PPC::STW:
+    case PPC::STW8:
+      FirstOp = 1;
+      break;
+    }
+
+    // If this is a load or store with a zero offset, we may be able to
+    // fold an add-immediate into the memory operation.
+    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+        N->getConstantOperandVal(FirstOp) != 0)
+      continue;
+
+    SDValue Base = N->getOperand(FirstOp + 1);
+    if (!Base.isMachineOpcode())
+      continue;
+
+    unsigned Flags = 0;
+    bool ReplaceFlags = true;
+
+    // When the feeding operation is an add-immediate of some sort,
+    // determine whether we need to add relocation information to the
+    // target flags on the immediate operand when we fold it into the
+    // load instruction.
+    //
+    // For something like ADDItocL, the relocation information is
+    // inferred from the opcode; when we process it in the AsmPrinter,
+    // we add the necessary relocation there.  A load, though, can receive
+    // relocation from various flavors of ADDIxxx, so we need to carry
+    // the relocation information in the target flags.
+    switch (Base.getMachineOpcode()) {
+    default: continue;
+
+    case PPC::ADDI8:
+    case PPC::ADDI:
+      // In some cases (such as TLS) the relocation information
+      // is already in place on the operand, so copying the operand
+      // is sufficient.
+      ReplaceFlags = false;
+      // For these cases, the immediate may not be divisible by 4, in
+      // which case the fold is illegal for DS-form instructions.  (The
+      // other cases provide aligned addresses and are always safe.)
+      if ((StorageOpcode == PPC::LWA ||
+           StorageOpcode == PPC::LD  ||
+           StorageOpcode == PPC::STD) &&
+          (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+           Base.getConstantOperandVal(1) % 4 != 0))
+        continue;
+      break;
+    case PPC::ADDIdtprelL:
+      Flags = PPCII::MO_DTPREL16_LO;
+      break;
+    case PPC::ADDItlsldL:
+      Flags = PPCII::MO_TLSLD16_LO;
+      break;
+    case PPC::ADDItocL:
+      Flags = PPCII::MO_TOC16_LO;
+      break;
+    }
+
+    // We found an opportunity.  Reverse the operands from the add
+    // immediate and substitute them into the load or store.  If
+    // needed, update the target flags for the immediate operand to
+    // reflect the necessary relocation information.
+    DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
+    DEBUG(Base->dump(CurDAG));
+    DEBUG(dbgs() << "\nN: ");
+    DEBUG(N->dump(CurDAG));
+    DEBUG(dbgs() << "\n");
+
+    SDValue ImmOpnd = Base.getOperand(1);
+
+    // If the relocation information isn't already present on the
+    // immediate operand, add it now.
+    if (ReplaceFlags) {
+      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+        DebugLoc dl = GA->getDebugLoc();
+        const GlobalValue *GV = GA->getGlobal();
+        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+      } else if (ConstantPoolSDNode *CP =
+                 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+        const Constant *C = CP->getConstVal();
+        ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+                                                CP->getAlignment(),
+                                                0, Flags);
+      }
+    }
+
+    if (FirstOp == 1) // Store
+      (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+                                       Base.getOperand(0), N->getOperand(3));
+    else // Load
+      (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+                                       N->getOperand(2));
+
+    // The add-immediate may now be dead, in which case remove it.
+    if (Base.getNode()->use_empty())
+      CurDAG->RemoveDeadNode(Base.getNode());
+  }
+}
 
 
 /// createPPCISelDag - This pass converts a legalized DAG into a
@@ -1282,3 +1552,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
   return new PPCDAGToDAGISel(TM);
 }
 
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+  PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
+                              false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index adf78d5233ae..16fc8a0e3726 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12,15 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,6 +24,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -36,20 +36,20 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                     CCValAssign::LocInfo &LocInfo,
-                                     ISD::ArgFlagsTy &ArgFlags,
-                                     CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                            MVT &LocVT,
-                                            CCValAssign::LocInfo &LocInfo,
-                                            ISD::ArgFlagsTy &ArgFlags,
-                                            CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
                                               MVT &LocVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                                MVT &LocVT,
+                                                CCValAssign::LocInfo &LocInfo,
+                                                ISD::ArgFlagsTy &ArgFlags,
+                                                CCState &State);
 
 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
 
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
   if (TM.getSubtargetImpl()->isDarwin())
     return new TargetLoweringObjectFileMachO();
@@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
   const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+  PPCRegInfo = TM.getRegisterInfo();
 
   setPow2DivIsCheap();
 
@@ -112,6 +116,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 
   // PowerPC has no SREM/UREM instructions
   setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -132,11 +137,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // We don't support sin/cos/sqrt/fmod/pow
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
   setOperationAction(ISD::FMA  , MVT::f64, Legal);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
   setOperationAction(ISD::FMA  , MVT::f32, Legal);
@@ -144,26 +151,58 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
   // If we're enabling GP optimizations, use hardware square root
-  if (!Subtarget->hasFSQRT()) {
+  if (!Subtarget->hasFSQRT() &&
+      !(TM.Options.UnsafeFPMath &&
+        Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+  if (!Subtarget->hasFSQRT() &&
+      !(TM.Options.UnsafeFPMath &&
+        Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-  }
 
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
+  if (Subtarget->hasFPRND()) {
+    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+    // frin does not implement "ties to even." Thus, this is safe only in
+    // fast-math mode.
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+
+      // These need to set FE_INEXACT, and use a custom inserter.
+      setOperationAction(ISD::FRINT, MVT::f64, Legal);
+      setOperationAction(ISD::FRINT, MVT::f32, Legal);
+    }
+  }
+
   // PowerPC does not have BSWAP, CTPOP or CTTZ
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
-  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 
+  if (Subtarget->hasPOPCNTD()) {
+    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
+    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
+  } else {
+    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
+    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
+  }
+
   // PowerPC does not have ROTR
   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
@@ -206,6 +245,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
 
+  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+  // support continuation, user-level threading, and etc.. As a result, no
+  // other SjLj exception interfaces are implemented and please don't build
+  // your own exception handling based on them.
+  // LLVM/Clang supports zero-cost DWARF exception handling.
+  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 
   // We want to legalize GlobalAddress and ConstantPool nodes into the
   // appropriate instructions to materialize the address.
@@ -285,15 +332,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     // We cannot do this with Promote because i64 is not a legal type.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 
-    // FIXME: disable this lowered code.  This generates 64-bit register values,
-    // and we don't model the fact that the top part is clobbered by calls.  We
-    // need to flag these together so that the value isn't live across a call.
-    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
   } else {
     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   }
 
+  // With the instructions enabled under FPCVT, we can do everything.
+  if (PPCSubTarget.hasFPCVT()) {
+    if (Subtarget->has64BitSupport()) {
+      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+    }
+
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+  }
+
   if (Subtarget->use64BitRegs()) {
     // 64-bit PowerPC implementations can support i64 types directly
     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
@@ -347,6 +407,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::UREM, VT, Expand);
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FNEG, VT, Expand);
+      setOperationAction(ISD::FSQRT, VT, Expand);
+      setOperationAction(ISD::FLOG, VT, Expand);
+      setOperationAction(ISD::FLOG10, VT, Expand);
+      setOperationAction(ISD::FLOG2, VT, Expand);
+      setOperationAction(ISD::FEXP, VT, Expand);
+      setOperationAction(ISD::FEXP2, VT, Expand);
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FABS, VT, Expand);
+      setOperationAction(ISD::FPOWI, VT, Expand);
+      setOperationAction(ISD::FFLOOR, VT, Expand);
+      setOperationAction(ISD::FCEIL,  VT, Expand);
+      setOperationAction(ISD::FTRUNC, VT, Expand);
+      setOperationAction(ISD::FRINT,  VT, Expand);
+      setOperationAction(ISD::FNEARBYINT, VT, Expand);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
@@ -361,6 +436,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+      setOperationAction(ISD::VSELECT, VT, Expand);
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 
       for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -373,12 +449,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setLoadExtAction(ISD::EXTLOAD, VT, Expand);
     }
 
-    for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE;
-         i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) {
-      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-      setOperationAction(ISD::FSQRT, VT, Expand);
-    }
-
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
     // with merges, splats, etc.
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -393,6 +463,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 
     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
@@ -401,6 +475,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+    }
+
     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -429,6 +509,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -449,6 +531,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setTargetDAGCombine(ISD::BR_CC);
   setTargetDAGCombine(ISD::BSWAP);
 
+  // Use reciprocal estimates.
+  if (TM.Options.UnsafeFPMath) {
+    setTargetDAGCombine(ISD::FDIV);
+    setTargetDAGCombine(ISD::FSQRT);
+  }
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget->isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -482,15 +570,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
   if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
       Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
-    maxStoresPerMemset = 32;
-    maxStoresPerMemsetOptSize = 16;
-    maxStoresPerMemcpy = 32;
-    maxStoresPerMemcpyOptSize = 8;
-    maxStoresPerMemmove = 32;
-    maxStoresPerMemmoveOptSize = 8;
+    MaxStoresPerMemset = 32;
+    MaxStoresPerMemsetOptSize = 16;
+    MaxStoresPerMemcpy = 32;
+    MaxStoresPerMemcpyOptSize = 8;
+    MaxStoresPerMemmove = 32;
+    MaxStoresPerMemmoveOptSize = 8;
 
     setPrefFunctionAlignment(4);
-    benefitFromCodePlacementOpt = true;
   }
 }
 
@@ -521,6 +608,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
+  case PPCISD::FRE:             return "PPCISD::FRE";
+  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
@@ -536,16 +625,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::SRL:             return "PPCISD::SRL";
   case PPCISD::SRA:             return "PPCISD::SRA";
   case PPCISD::SHL:             return "PPCISD::SHL";
-  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
-  case PPCISD::STD_32:          return "PPCISD::STD_32";
-  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
-  case PPCISD::CALL_NOP_SVR4:   return "PPCISD::CALL_NOP_SVR4";
-  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
-  case PPCISD::NOP:             return "PPCISD::NOP";
+  case PPCISD::CALL:            return "PPCISD::CALL";
+  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
-  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
-  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
+  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
+  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
+  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
   case PPCISD::MFCR:            return "PPCISD::MFCR";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
@@ -555,13 +641,25 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::STCX:            return "PPCISD::STCX";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
   case PPCISD::MFFS:            return "PPCISD::MFFS";
-  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
-  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
-  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
+  case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
+  case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
+  case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
+  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
+  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
+  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
+  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
+  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
+  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
+  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
+  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
+  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
+  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
   }
 }
 
@@ -995,7 +1093,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     short Imm;
     if (isIntS16Immediate(CN, Imm)) {
       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
-      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                              CN->getValueType(0));
       return true;
     }
@@ -1044,7 +1142,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
   }
 
   // Otherwise, do it the hard way, using R0 as the base register.
-  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                          N.getValueType());
   Index = N;
   return true;
@@ -1107,7 +1205,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
       short Imm;
       if (isIntS16Immediate(CN, Imm)) {
         Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
-        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                                CN->getValueType(0));
         return true;
       }
@@ -1145,15 +1243,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
                                                   SelectionDAG &DAG) const {
   if (DisablePPCPreinc) return false;
 
+  bool isLoad = true;
   SDValue Ptr;
   EVT VT;
+  unsigned Alignment;
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     VT = LD->getMemoryVT();
-
+    Alignment = LD->getAlignment();
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
     VT  = ST->getMemoryVT();
+    Alignment = ST->getAlignment();
+    isLoad = false;
   } else
     return false;
 
@@ -1161,7 +1263,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (VT.isVector())
     return false;
 
-  if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+    // Common code will reject creating a pre-inc form if the base pointer
+    // is a frame index, or if N is a store and the base pointer is either
+    // the same as or a predecessor of the value being stored.  Check for
+    // those situations here, and try with swapped Base/Offset instead.
+    bool Swap = false;
+
+    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+      Swap = true;
+    else if (!isLoad) {
+      SDValue Val = cast<StoreSDNode>(N)->getValue();
+      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+        Swap = true;
+    }
+
+    if (Swap)
+      std::swap(Base, Offset);
+
     AM = ISD::PRE_INC;
     return true;
   }
@@ -1172,6 +1292,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
       return false;
   } else {
+    // LDU/STU need an address with at least 4-byte alignment.
+    if (Alignment < 4)
+      return false;
+
     // reg + imm * 4.
     if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
       return false;
@@ -1308,19 +1432,81 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   EVT PtrVT = getPointerTy();
   bool is64bit = PPCSubTarget.isPPC64();
 
-  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+  if (Model == TLSModel::LocalExec) {
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               PPCII::MO_TPREL16_HA);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               PPCII::MO_TPREL16_LO);
+    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+                                     is64bit ? MVT::i64 : MVT::i32);
+    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+  }
+
+  if (!is64bit)
+    llvm_unreachable("only local-exec is currently supported for ppc32");
+
+  if (Model == TLSModel::InitialExec) {
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+    SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+                                     PtrVT, GOTReg, TGA);
+    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+                                   PtrVT, TGA, TPOffsetHi);
+    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+  }
+
+  if (Model == TLSModel::GeneralDynamic) {
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+                                     GOTReg, TGA);
+    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+                                   GOTEntryHi, TGA);
+
+    // We need a chain node, and don't have one handy.  The underlying
+    // call has no side effects, so using the function entry node
+    // suffices.
+    SDValue Chain = DAG.getEntryNode();
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+                                  PtrVT, ParmReg, TGA);
+    // The return value from GET_TLS_ADDR really is in X3 already, but
+    // some hacks are needed here to tie everything together.  The extra
+    // copies dissolve during subsequent transforms.
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+    return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
+  }
 
-  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TPREL16_HA);
-  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TPREL16_LO);
+  if (Model == TLSModel::LocalDynamic) {
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+                                     GOTReg, TGA);
+    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+                                   GOTEntryHi, TGA);
+
+    // We need a chain node, and don't have one handy.  The underlying
+    // call has no side effects, so using the function entry node
+    // suffices.
+    SDValue Chain = DAG.getEntryNode();
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
+    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
+    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+                                  PtrVT, ParmReg, TGA);
+    // The return value from GET_TLSLD_ADDR really is in X3 already, but
+    // some hacks are needed here to tie everything together.  The extra
+    // copies dissolve during subsequent transforms.
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
+    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+                                      Chain, ParmReg, TGA);
+    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+  }
 
-  if (model != TLSModel::LocalExec)
-    llvm_unreachable("only local-exec TLS mode supported");
-  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
-                                   is64bit ? MVT::i64 : MVT::i32);
-  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
-  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+  llvm_unreachable("Unknown TLS model!");
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -1654,18 +1840,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 #include "PPCGenCallingConv.inc"
 
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                     CCValAssign::LocInfo &LocInfo,
-                                     ISD::ArgFlagsTy &ArgFlags,
-                                     CCState &State) {
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
   return true;
 }
 
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                            MVT &LocVT,
-                                            CCValAssign::LocInfo &LocInfo,
-                                            ISD::ArgFlagsTy &ArgFlags,
-                                            CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
   static const uint16_t ArgRegs[] = {
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1688,11 +1874,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
   return false;
 }
 
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                                MVT &LocVT,
+                                                CCValAssign::LocInfo &LocInfo,
+                                                ISD::ArgFlagsTy &ArgFlags,
+                                                CCState &State) {
   static const uint16_t ArgRegs[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8
@@ -1815,7 +2001,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
   // Reserve space for the linkage area on the stack.
   CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
 
-  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -1876,7 +2062,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
 
-  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
 
   // Area that is at least reserved in the caller of this function.
   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
@@ -2068,13 +2254,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+  unsigned CurArgIdx = 0;
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
     SDValue ArgVal;
     bool needsLoad = false;
     EVT ObjectVT = Ins[ArgNo].VT;
     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+    CurArgIdx = Ins[ArgNo].OrigArgIndex;
 
     unsigned CurArgOffset = ArgOffset;
 
@@ -2409,6 +2598,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
+  // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+  // When passing anonymous aggregates, this is currently not true.
+  // See LowerFormalArguments_64SVR4 for a fix.
   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
     SDValue ArgVal;
@@ -2995,7 +3187,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   NodeTys.push_back(MVT::Other);   // Returns a chain
   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
 
-  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+  unsigned CallOpc = PPCISD::CALL;
 
   bool needIndirectCall = true;
   if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
@@ -3128,8 +3320,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     NodeTys.push_back(MVT::Other);
     NodeTys.push_back(MVT::Glue);
     Ops.push_back(Chain);
-    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+    CallOpc = PPCISD::BCTRL;
     Callee.setNode(0);
+    // Add use of X11 (holding environment pointer)
+    if (isSVR4ABI && isPPC64)
+      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
     // Add CTR register as callee so a bctr can be emitted later.
     if (isTailCall)
       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
@@ -3231,7 +3426,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 
   // When performing tail call optimization the callee pops its arguments off
   // the stack. Account for this here so these bytes can be pushed back on in
-  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+  // PPCFrameLowering::eliminateCallFramePseudoInstr.
   int BytesCalleePops =
     (CallConv == CallingConv::Fast &&
      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
@@ -3247,17 +3442,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 
   // Emit tail call.
   if (isTailCall) {
-    // If this is the first return lowered for this function, add the regs
-    // to the liveout set for the function.
-    if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-      SmallVector<CCValAssign, 16> RVLocs;
-      CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                     getTargetMachine(), RVLocs, *DAG.getContext());
-      CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
-      for (unsigned i = 0; i != RVLocs.size(); ++i)
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-    }
-
     assert(((Callee.getOpcode() == ISD::Register &&
              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
@@ -3279,7 +3463,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 
   bool needsTOCRestore = false;
   if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
-    if (CallOpc == PPCISD::BCTRL_SVR4) {
+    if (CallOpc == PPCISD::BCTRL) {
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
       // See PrepareCall() for more information about calls through function
@@ -3290,9 +3474,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
       // from allocating it), resulting in an additional register being
       // allocated and an unnecessary move instruction being generated.
       needsTOCRestore = true;
-    } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+    } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
       // Otherwise insert NOP for non-local calls.
-      CallOpc = PPCISD::CALL_NOP_SVR4;
+      CallOpc = PPCISD::CALL_NOP;
     }
   }
 
@@ -3401,11 +3585,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
       bool Result;
 
       if (Outs[i].IsFixed) {
-        Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
-                             CCInfo);
+        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+                               CCInfo);
       } else {
-        Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
-                                    ArgFlags, CCInfo);
+        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+                                      ArgFlags, CCInfo);
       }
 
       if (Result) {
@@ -3418,7 +3602,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
     }
   } else {
     // All arguments are treated the same.
-    CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
   }
 
   // Assign locations to all of the outgoing aggregate by value arguments.
@@ -3429,7 +3613,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
   // Reserve stack space for the allocations in CCInfo.
   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
 
-  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
 
   // Size of the linkage area, parameter list area and the part of the local
   // space variable where copies of aggregates which are passed by value are
@@ -4323,14 +4507,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
                  getTargetMachine(), RVLocs, *DAG.getContext());
   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
 
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -4355,12 +4533,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
 
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  else
-    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+                     &RetOps[0], RetOps.size());
 }
 
 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -4466,6 +4649,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
 }
 
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+                     DAG.getVTList(MVT::i32, MVT::Other),
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4553,37 +4751,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
   case MVT::i32:
     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
-                                                         PPCISD::FCTIDZ,
+                        (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+                                                   PPCISD::FCTIDZ),
                       dl, MVT::f64, Src);
     break;
   case MVT::i64:
-    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+    assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+           "i64 FP_TO_UINT is supported only with FPCVT");
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+                                                        PPCISD::FCTIDUZ,
+                      dl, MVT::f64, Src);
     break;
   }
 
   // Convert the FP value to an int value through memory.
-  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+  bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
+    (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
 
   // Emit a store to the stack slot.
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
-                               MachinePointerInfo(), false, false, 0);
+  SDValue Chain;
+  if (i32Stack) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+              DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+              MVT::i32, MMO);
+  } else
+    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+                         MPI, false, false, 0);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias.
-  if (Op.getValueType() == MVT::i32)
+  if (Op.getValueType() == MVT::i32 && !i32Stack) {
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+    MPI = MachinePointerInfo();
+  }
+
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
                      false, false, false, 0);
 }
 
-SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
 
+  assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+         "UINT_TO_FP is supported only with FPCVT");
+
+  // If we have FCFIDS, then use it when converting to single-precision.
+  // Otherwise, convert to double-precision and then round.
+  unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+                   (Op.getOpcode() == ISD::UINT_TO_FP ?
+                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+                   (Op.getOpcode() == ISD::UINT_TO_FP ?
+                    PPCISD::FCFIDU : PPCISD::FCFID);
+  MVT      FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+                   MVT::f32 : MVT::f64;
+
   if (Op.getOperand(0).getValueType() == MVT::i64) {
     SDValue SINT = Op.getOperand(0);
     // When converting to single-precision, we actually need to convert
@@ -4597,6 +4830,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
     // However, if -enable-unsafe-fp-math is in effect, accept double
     // rounding to avoid the extra overhead.
     if (Op.getValueType() == MVT::f32 &&
+        !PPCSubTarget.hasFPCVT() &&
         !DAG.getTarget().Options.UnsafeFPMath) {
 
       // Twiddle input to make sure the low 11 bits are zero.  (If this
@@ -4630,44 +4864,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
 
       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
     }
+
     SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
-    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
-    if (Op.getValueType() == MVT::f32)
+    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+    if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
       FP = DAG.getNode(ISD::FP_ROUND, dl,
                        MVT::f32, FP, DAG.getIntPtrConstant(0));
     return FP;
   }
 
   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
-         "Unhandled SINT_TO_FP type in custom expander!");
+         "Unhandled INT_TO_FP type in custom expander!");
   // Since we only generate this in 64-bit mode, we can take advantage of
   // 64-bit registers.  In particular, sign extend the input value into the
   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
   // then lfd it and fcfid it.
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
-  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+  SDValue Ld;
+  if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+    int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+                                 MachinePointerInfo::getFixedStack(FrameIdx),
+                                 false, false, 0);
+
+    assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+           "Expected an i32 store");
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                              MachineMemOperand::MOLoad, 4, 4);
+    SDValue Ops[] = { Store, FIdx };
+    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+                                   PPCISD::LFIWZX : PPCISD::LFIWAX,
+                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
+                                 Ops, 2, MVT::i32, MMO);
+  } else {
+    assert(PPCSubTarget.isPPC64() &&
+           "i32->FP without LFIWAX supported only on PPC64");
+
+    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
                                 Op.getOperand(0));
 
-  // STD the extended value into the stack slot.
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore, 8, 8);
-  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
-  SDValue Store =
-    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
-                            Ops, 4, MVT::i64, MMO);
-  // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
-                           false, false, false, 0);
+    // STD the extended value into the stack slot.
+    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+                                 MachinePointerInfo::getFixedStack(FrameIdx),
+                                 false, false, 0);
+
+    // Load the value as a double.
+    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+                     MachinePointerInfo::getFixedStack(FrameIdx),
+                     false, false, false, 0);
+  }
 
   // FCFID it and return it.
-  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
-  if (Op.getValueType() == MVT::f32)
+  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+  if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
   return FP;
 }
@@ -4697,12 +4956,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   EVT VT = Op.getValueType();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  std::vector<EVT> NodeTys;
   SDValue MFFSreg, InFlag;
 
   // Save FP Control Word to register
-  NodeTys.push_back(MVT::f64);    // return register
-  NodeTys.push_back(MVT::Glue);   // unused in this context
+  EVT NodeTys[] = {
+    MVT::f64,    // return register
+    MVT::Glue    // unused in this context
+  };
   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
 
   // Save FP register to stack slot
@@ -4936,11 +5196,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   // Two instruction sequences.
 
   // If this value is in the range [-32,30] and is even, use:
-  //    tmp = VSPLTI[bhw], result = add tmp, tmp
-  if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
-    SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
-    Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
-    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+  // If this value is in the range [17,31] and is odd, use:
+  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+  // If this value is in the range [-31,-17] and is odd, use:
+  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+  // Note the last two are three-instruction sequences.
+  if (SextVal >= -32 && SextVal <= 31) {
+    // To avoid having these optimizations undone by constant folding,
+    // we convert to a pseudo that will be expanded later into one of
+    // the above forms.
+    SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+    EVT VT = Op.getValueType();
+    int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+    SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+    return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
   }
 
   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
@@ -5036,23 +5306,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     }
   }
 
-  // Three instruction sequences.
-
-  // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
-  if (SextVal >= 0 && SextVal <= 31) {
-    SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
-    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
-    LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
-    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
-  }
-  // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
-  if (SextVal >= -31 && SextVal <= 0) {
-    SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
-    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
-    LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
-    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
-  }
-
   return SDValue();
 }
 
@@ -5326,9 +5579,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     Op.getOperand(3),  // RHS
     DAG.getConstant(CompareOpc, MVT::i32)
   };
-  std::vector<EVT> VTs;
-  VTs.push_back(Op.getOperand(2).getValueType());
-  VTs.push_back(MVT::Glue);
+  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
 
   // Now that we have the comparison, emit a copy from the CR to a GPR.
@@ -5470,11 +5721,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
 
+  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
                                                        Op.getDebugLoc());
-  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
 
   // Lower 64-bit shifts.
@@ -5528,50 +5783,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                              MVT::f64, N->getOperand(0),
                              DAG.getIntPtrConstant(1));
 
-    // This sequence changes FPSCR to do round-to-zero, adds the two halves
-    // of the long double, and puts FPSCR back the way it was.  We do not
-    // actually model FPSCR.
-    std::vector<EVT> NodeTys;
-    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
-    NodeTys.push_back(MVT::f64);   // Return register
-    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
-    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
-    MFFSreg = Result.getValue(0);
-    InFlag = Result.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = DAG.getConstant(31, MVT::i32);
-    Ops[1] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
-    InFlag = Result.getValue(0);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = DAG.getConstant(30, MVT::i32);
-    Ops[1] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
-    InFlag = Result.getValue(0);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::f64);    // result of add
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = Lo;
-    Ops[1] = Hi;
-    Ops[2] = InFlag;
-    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
-    FPreg = Result.getValue(0);
-    InFlag = Result.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::f64);
-    Ops[0] = DAG.getConstant(1, MVT::i32);
-    Ops[1] = MFFSreg;
-    Ops[2] = FPreg;
-    Ops[3] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
-    FPreg = Result.getValue(0);
+    // Add the two halves of the long double in round-to-zero mode.
+    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
 
     // We know the low half is about to be thrown away, so just use something
     // convenient.
@@ -5663,7 +5876,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   // registers without caring whether they're 32 or 64, but here we're
   // doing actual arithmetic on the addresses.
   bool is64bit = PPCSubTarget.isPPC64();
-  unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
@@ -5767,7 +5980,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
     .addReg(TmpReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+  BuildMI(BB, dl, TII->get(PPC::STWCX))
     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -5782,9 +5995,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
   return BB;
 }
 
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+                                    MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const BasicBlock *BB = MBB->getBasicBlock();
+  MachineFunction::iterator I = MBB;
+  ++I;
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  unsigned DstReg = MI->getOperand(0).getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+  assert(RC->hasType(MVT::i32) && "Invalid destination!");
+  unsigned mainDstReg = MRI.createVirtualRegister(RC);
+  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+  MVT PVT = getPointerTy();
+  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+         "Invalid Pointer Size!");
+  // For v = setjmp(buf), we generate
+  //
+  // thisMBB:
+  //  SjLjSetup mainMBB
+  //  bl mainMBB
+  //  v_restore = 1
+  //  b sinkMBB
+  //
+  // mainMBB:
+  //  buf[LabelOffset] = LR
+  //  v_main = 0
+  //
+  // sinkMBB:
+  //  v = phi(main, restore)
+  //
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+  MF->insert(I, mainMBB);
+  MF->insert(I, sinkMBB);
+
+  MachineInstrBuilder MIB;
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), MBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Note that the structure of the jmp_buf used here is not compatible
+  // with that used by libc, and is not designed to be. Specifically, it
+  // stores only those 'reserved' registers that LLVM does not otherwise
+  // understand how to spill. Also, by convention, by the time this
+  // intrinsic is called, Clang has already stored the frame address in the
+  // first slot of the buffer and stack address in the third. Following the
+  // X86 target code, we'll store the jump address in the second slot. We also
+  // need to save the TOC pointer (R2) to handle jumps between shared
+  // libraries, and that will be stored in the fourth slot. The thread
+  // identifier (R13) is not affected.
+
+  // thisMBB:
+  const int64_t LabelOffset = 1 * PVT.getStoreSize();
+  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+
+  // Prepare IP either in reg.
+  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+  unsigned BufReg = MI->getOperand(1).getReg();
+
+  if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+            .addReg(PPC::X2)
+            .addImm(TOCOffset / 4)
+            .addReg(BufReg);
+
+    MIB.setMemRefs(MMOBegin, MMOEnd);
+  }
+
+  // Setup
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+          .addMBB(mainMBB);
+  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+  // mainMBB:
+  //  mainDstReg = 0
+  MIB = BuildMI(mainMBB, DL,
+    TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+  // Store IP
+  if (PPCSubTarget.isPPC64()) {
+    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+            .addReg(LabelReg)
+            .addImm(LabelOffset / 4)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+            .addReg(LabelReg)
+            .addImm(LabelOffset)
+            .addReg(BufReg);
+  }
+
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+  mainMBB->addSuccessor(sinkMBB);
+
+  // sinkMBB:
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(PPC::PHI), DstReg)
+    .addReg(mainDstReg).addMBB(mainMBB)
+    .addReg(restoreDstReg).addMBB(thisMBB);
+
+  MI->eraseFromParent();
+  return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+                                     MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // Memory Reference
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  MVT PVT = getPointerTy();
+  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+         "Invalid Pointer Size!");
+
+  const TargetRegisterClass *RC =
+    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+  unsigned Tmp = MRI.createVirtualRegister(RC);
+  // Since FP is only updated here but NOT referenced, it's treated as GPR.
+  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+  MachineInstrBuilder MIB;
+
+  const int64_t LabelOffset = 1 * PVT.getStoreSize();
+  const int64_t SPOffset    = 2 * PVT.getStoreSize();
+  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+
+  unsigned BufReg = MI->getOperand(0).getReg();
+
+  // Reload FP (the jumped-to function may not have had a
+  // frame pointer, and if so, then its r31 will be restored
+  // as necessary).
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+            .addImm(0)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+            .addImm(0)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // Reload IP
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+            .addImm(LabelOffset / 4)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+            .addImm(LabelOffset)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // Reload SP
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+            .addImm(SPOffset / 4)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+            .addImm(SPOffset)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
+  // FIXME: When we also support base pointers, that register must also be
+  // restored here.
+
+  // Reload TOC
+  if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+            .addImm(TOCOffset / 4)
+            .addReg(BufReg);
+
+    MIB.setMemRefs(MMOBegin, MMOEnd);
+  }
+
+  // Jump
+  BuildMI(*MBB, MI, DL,
+          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
 MachineBasicBlock *
 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
+  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+      MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+    return emitEHSjLjSetJmp(MI, BB);
+  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+             MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+    return emitEHSjLjLongJmp(MI, BB);
+  }
+
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
   // To "insert" these instructions we actually have to insert their
@@ -5802,24 +6244,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     unsigned SelectPred = MI->getOperand(4).getImm();
     DebugLoc dl = MI->getDebugLoc();
 
-    // The SelectPred is ((BI << 5) | BO) for a BCC
-    unsigned BO = SelectPred & 0xF;
-    assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
-
-    unsigned TrueOpNo, FalseOpNo;
-    if (BO == 12) {
-      TrueOpNo = 2;
-      FalseOpNo = 3;
-    } else {
-      TrueOpNo = 3;
-      FalseOpNo = 2;
-      SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+    unsigned SubIdx;
+    bool SwapOps;
+    switch (SelectPred) {
+    default: llvm_unreachable("invalid predicate for isel");
+    case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+    case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+    case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+    case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+    case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+    case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+    case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+    case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
     }
 
     BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(TrueOpNo).getReg())
-      .addReg(MI->getOperand(FalseOpNo).getReg())
-      .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+      .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
+      .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
+      .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
              MI->getOpcode() == PPC::SELECT_CC_I8 ||
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6052,7 +6494,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
     unsigned Ptr1Reg;
     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
-    unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
     //  thisMBB:
     //   ...
     //   fallthrough --> loopMBB
@@ -6155,6 +6597,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     BB = exitMBB;
     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
       .addReg(ShiftReg);
+  } else if (MI->getOpcode() == PPC::FADDrtz) {
+    // This pseudo performs an FADD with rounding mode temporarily forced
+    // to round-to-zero.  We emit this via custom inserter since the FPSCR
+    // is not modeled at the SelectionDAG level.
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src1 = MI->getOperand(1).getReg();
+    unsigned Src2 = MI->getOperand(2).getReg();
+    DebugLoc dl   = MI->getDebugLoc();
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+    // Set rounding mode to round-to-zero.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+    // Perform addition.
+    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+    // Restore FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+  } else if (MI->getOpcode() == PPC::FRINDrint ||
+             MI->getOpcode() == PPC::FRINSrint) {
+    bool isf32 = MI->getOpcode() == PPC::FRINSrint;
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src = MI->getOperand(1).getReg();
+    DebugLoc dl   = MI->getDebugLoc();
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+
+    // Perform the rounding.
+    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
+      .addReg(Src);
+
+    // Compare the results.
+    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
+      .addReg(Dest).addReg(Src);
+
+    // If the results were not equal, then set the FPSCR XX bit.
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
+
+    BB->addSuccessor(midMBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+
+    // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
+    // the FI bit here because that will not automatically set XX also,
+    // and XX is what libm interprets as the FE_INEXACT flag.
+    BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+
+    BB->addSuccessor(exitMBB);
+
+    BB = exitMBB;
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
@@ -6167,6 +6678,139 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+                                               DAGCombinerInfo &DCI) const {
+  if (DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+
+  if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
+      (VT == MVT::f64 && PPCSubTarget.hasFRE())  ||
+      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+    // For the reciprocal, we need to find the zero of the function:
+    //   F(X) = A X - 1 [which has a zero at X = 1/A]
+    //     =>
+    //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+    //     does not require additional intermediate precision]
+
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. The minimum architected relative
+    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+    // 23 digits and double has 52 digits.
+    int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++Iterations;
+
+    SelectionDAG &DAG = DCI.DAG;
+    DebugLoc dl = Op.getDebugLoc();
+
+    SDValue FPOne =
+      DAG.getConstantFP(1.0, VT.getScalarType());
+    if (VT.isVector()) {
+      assert(VT.getVectorNumElements() == 4 &&
+             "Unknown vector type");
+      FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+                          FPOne, FPOne, FPOne, FPOne);
+    }
+
+    SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+    DCI.AddToWorklist(Est.getNode());
+
+    // Newton iterations: Est = Est + Est (1 - Arg * Est)
+    for (int i = 0; i < Iterations; ++i) {
+      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+      DCI.AddToWorklist(Est.getNode());
+    }
+
+    return Est;
+  }
+
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+                                             DAGCombinerInfo &DCI) const {
+  if (DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+
+  if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+    // For the reciprocal sqrt, we need to find the zero of the function:
+    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+    //     =>
+    //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+    // As a result, we precompute A/2 prior to the iteration loop.
+
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. The minimum architected relative
+    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+    // 23 digits and double has 52 digits.
+    int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++Iterations;
+
+    SelectionDAG &DAG = DCI.DAG;
+    DebugLoc dl = Op.getDebugLoc();
+
+    SDValue FPThreeHalves =
+      DAG.getConstantFP(1.5, VT.getScalarType());
+    if (VT.isVector()) {
+      assert(VT.getVectorNumElements() == 4 &&
+             "Unknown vector type");
+      FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+                                  FPThreeHalves, FPThreeHalves,
+                                  FPThreeHalves, FPThreeHalves);
+    }
+
+    SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+    DCI.AddToWorklist(Est.getNode());
+
+    // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+    // this entire sequence requires only one FP constant.
+    SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+    DCI.AddToWorklist(HalfArg.getNode());
+
+    HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+    DCI.AddToWorklist(HalfArg.getNode());
+
+    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+    for (int i = 0; i < Iterations; ++i) {
+      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+      DCI.AddToWorklist(NewEst.getNode());
+
+      Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+      DCI.AddToWorklist(Est.getNode());
+    }
+
+    return Est;
+  }
+
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   const TargetMachine &TM = getTargetMachine();
@@ -6193,7 +6837,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         return N->getOperand(0);
     }
     break;
+  case ISD::FDIV: {
+    assert(TM.Options.UnsafeFPMath &&
+           "Reciprocal estimates require UnsafeFPMath");
+
+    if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+      SDValue RV =
+        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+      if (RV.getNode() != 0) {
+        DCI.AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                           N->getOperand(0), RV);
+      }
+    } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+      SDValue RV =
+        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+                                 DCI);
+      if (RV.getNode() != 0) {
+        DCI.AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+                         N->getValueType(0), RV);
+        DCI.AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                           N->getOperand(0), RV);
+      }
+    } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+      SDValue RV =
+        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+                                 DCI);
+      if (RV.getNode() != 0) {
+        DCI.AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+                         N->getValueType(0), RV,
+                         N->getOperand(1).getOperand(1));
+        DCI.AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                           N->getOperand(0), RV);
+      }
+    }
+
+    SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+    if (RV.getNode() != 0) {
+      DCI.AddToWorklist(RV.getNode());
+      return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+                         N->getOperand(0), RV);
+    }
+
+    }
+    break;
+  case ISD::FSQRT: {
+    assert(TM.Options.UnsafeFPMath &&
+           "Reciprocal estimates require UnsafeFPMath");
+
+    // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+    // reciprocal sqrt.
+    SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+    if (RV.getNode() != 0) {
+      DCI.AddToWorklist(RV.getNode());
+      RV = DAGCombineFastRecip(RV, DCI);
+      if (RV.getNode() != 0)
+        return RV;
+    }
 
+    }
+    break;
   case ISD::SINT_TO_FP:
     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
@@ -6240,8 +6949,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
       DCI.AddToWorklist(Val.getNode());
 
-      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
-                        N->getOperand(2), N->getOperand(3));
+      SDValue Ops[] = {
+        N->getOperand(0), Val, N->getOperand(2),
+        DAG.getValueType(N->getOperand(1).getValueType())
+      };
+
+      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+              DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+              cast<StoreSDNode>(N)->getMemoryVT(),
+              cast<StoreSDNode>(N)->getMemOperand());
       DCI.AddToWorklist(Val.getNode());
       return Val;
     }
@@ -6251,7 +6967,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         N->getOperand(1).getOpcode() == ISD::BSWAP &&
         N->getOperand(1).getNode()->hasOneUse() &&
         (N->getOperand(1).getValueType() == MVT::i32 ||
-         N->getOperand(1).getValueType() == MVT::i16)) {
+         N->getOperand(1).getValueType() == MVT::i16 ||
+         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+          N->getOperand(1).getValueType() == MVT::i64))) {
       SDValue BSwapOp = N->getOperand(1).getOperand(0);
       // Do an any-extend to 32-bits if this is a half-word input.
       if (BSwapOp.getValueType() == MVT::i16)
@@ -6272,7 +6991,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
         N->getOperand(0).hasOneUse() &&
-        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+          N->getValueType(0) == MVT::i64))) {
       SDValue Load = N->getOperand(0);
       LoadSDNode *LD = cast<LoadSDNode>(Load);
       // Create the byte-swapping load.
@@ -6283,8 +7005,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       };
       SDValue BSLoad =
         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
-                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
-                                LD->getMemoryVT(), LD->getMemOperand());
+                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+                                              MVT::i64 : MVT::i32, MVT::Other),
+                                Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
 
       // If this is an i16 load, insert the truncate.
       SDValue ResVal = BSLoad;
@@ -6384,14 +7107,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
 
       // Create the PPCISD altivec 'dot' comparison node.
-      std::vector<EVT> VTs;
       SDValue Ops[] = {
         LHS.getOperand(2),  // LHS of compare
         LHS.getOperand(3),  // RHS of compare
         DAG.getConstant(CompareOpc, MVT::i32)
       };
-      VTs.push_back(LHS.getOperand(2).getValueType());
-      VTs.push_back(MVT::Glue);
+      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
 
       // Unpack the result based on how the target uses it.
@@ -6543,6 +7264,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     // GCC RS6000 Constraint Letters
     switch (Constraint[0]) {
     case 'b':   // R1-R31
+      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
     case 'r':   // R0-R31
       if (VT == MVT::i64 && PPCSubTarget.isPPC64())
         return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6727,13 +7451,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
-  bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
-               MFI->hasVarSizedObjects()) &&
-                  MFI->getStackSize() &&
-                  !MF.getFunction()->getFnAttributes().
-                    hasAttribute(Attributes::Naked);
-  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
-                                (is31 ? PPC::R31 : PPC::R1);
+
+  // Naked functions never have a frame pointer, and so we use r1. For all
+  // other functions, this decision must be delayed until during PEI.
+  unsigned FrameReg;
+  if (MF.getFunction()->getAttributes().hasAttribute(
+        AttributeSet::FunctionIndex, Attribute::Naked))
+    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+  else
+    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
                                          PtrVT);
   while (Depth--)
@@ -6754,16 +7481,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// lowering. If DstAlign is zero that means it's safe to destination
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
-                                           bool IsZeroVal,
+                                           bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
   if (this->PPCSubTarget.isPPC64()) {
@@ -6773,6 +7499,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
   }
 }
 
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+                                                      bool *Fast) const {
+  if (DisablePPCUnaligned)
+    return false;
+
+  // PowerPC supports unaligned memory access for simple non-vector types.
+  // Although accessing unaligned addresses is not as efficient as accessing
+  // aligned addresses, it is generally more efficient than manual expansion,
+  // and generally only traps for software emulation when crossing page
+  // boundaries.
+
+  if (!VT.isSimple())
+    return false;
+
+  if (VT.getSimpleVT().isVector())
+    return false;
+
+  if (VT == MVT::ppcf128)
+    return false;
+
+  if (Fast)
+    *Fast = true;
+
+  return true;
+}
+
 /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
 /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
 /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b3c7f9c28d40..7157b70d8622 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,9 +16,10 @@
 #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 
 #include "PPC.h"
+#include "PPCRegisterInfo.h"
 #include "PPCSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace PPCISD {
@@ -35,14 +36,21 @@ namespace llvm {
       /// was temporarily in the f64 operand.
       FCFID,
 
+      /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+      /// unsigned integers and single-precision outputs.
+      FCFIDU, FCFIDS, FCFIDUS,
+
       /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
       /// operand, producing an f64 value containing the integer representation
       /// of that FP value.
       FCTIDZ, FCTIWZ,
 
-      /// STFIWX - The STFIWX instruction.  The first operand is an input token
-      /// chain, then an f64 value to store, then an address to store it to.
-      STFIWX,
+      /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+      /// unsigned integers.
+      FCTIDUZ, FCTIWUZ,
+
+      /// Reciprocal estimate instructions (unary FP ops).
+      FRE, FRSQRTE,
 
       // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
       // three v4f32 operands and producing a v4f32 result.
@@ -90,17 +98,10 @@ namespace llvm {
       /// code.
       SRL, SRA, SHL,
 
-      /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
-      /// registers.
-      EXTSW_32,
-
       /// CALL - A direct function call.
-      /// CALL_NOP_SVR4 is a call with the special  NOP which follows 64-bit
+      /// CALL_NOP is a call with the special NOP which follows 64-bit
       /// SVR4 calls.
-      CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
-
-      /// NOP - Special NOP which follows 64-bit SVR4 calls.
-      NOP,
+      CALL, CALL_NOP,
 
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
@@ -108,7 +109,7 @@ namespace llvm {
 
       /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
       /// BCTRL instruction.
-      BCTRL_Darwin, BCTRL_SVR4,
+      BCTRL,
 
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
@@ -119,6 +120,12 @@ namespace llvm {
       /// are undefined.
       MFCR,
 
+      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+      EH_SJLJ_SETJMP,
+
+      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+      EH_SJLJ_LONGJMP,
+
       /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
       /// instructions.  For lack of better number, we use the opcode number
       /// encoding for the OPC field to identify the compare.  For example, 838
@@ -138,26 +145,13 @@ namespace llvm {
       /// an optional input flag argument.
       COND_BRANCH,
 
-      // The following 5 instructions are used only as part of the
-      // long double-to-int conversion sequence.
-
-      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
-      /// register.
-      MFFS,
-
-      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
-      MTFSB0,
-
-      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
-      MTFSB1,
-
-      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
-      /// rounding towards zero.  It has flags added so it won't move past the
-      /// FPSCR-setting instructions.
+      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+      /// towards zero.  Used only as part of the long double-to-int
+      /// conversion sequence.
       FADDRTZ,
 
-      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
-      MTFSF,
+      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+      MFFS,
 
       /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
       /// reserve indexed. This is used to implement atomic operations.
@@ -178,20 +172,111 @@ namespace llvm {
       CR6SET,
       CR6UNSET,
 
-      /// STD_32 - This is the STD instruction for use with "32-bit" registers.
-      STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+      /// TLS model, produces an ADDIS8 instruction that adds the GOT
+      /// base to sym@got@tprel@ha.
+      ADDIS_GOT_TPREL_HA,
+
+      /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+      /// TLS model, produces a LD instruction with base register G8RReg
+      /// and offset sym@got@tprel@l.  This completes the addition that
+      /// finds the offset of "sym" relative to the thread pointer.
+      LD_GOT_TPREL_L,
+
+      /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+      /// model, produces an ADD instruction that adds the contents of
+      /// G8RReg to the thread pointer.  Symbol contains a relocation
+      /// sym@tls which is to be replaced by the thread pointer and
+      /// identifies to the linker that the instruction is part of a
+      /// TLS sequence.
+      ADD_TLS,
+
+      /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+      /// model, produces an ADDIS8 instruction that adds the GOT base
+      /// register to sym@got@tlsgd@ha.
+      ADDIS_TLSGD_HA,
+
+      /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+      /// model, produces an ADDI8 instruction that adds G8RReg to
+      /// sym@got@tlsgd@l.
+      ADDI_TLSGD_L,
+
+      /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym@tlsgd).
+      GET_TLS_ADDR,
+
+      /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDIS8 instruction that adds the GOT base
+      /// register to sym@got@tlsld@ha.
+      ADDIS_TLSLD_HA,
+
+      /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDI8 instruction that adds G8RReg to
+      /// sym@got@tlsld@l.
+      ADDI_TLSLD_L,
+
+      /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+      /// model, produces a call to __tls_get_addr(sym@tlsld).
+      GET_TLSLD_ADDR,
+
+      /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+      /// local-dynamic TLS model, produces an ADDIS8 instruction
+      /// that adds X3 to sym@dtprel@ha.  The Chain operand is needed 
+      /// to tie this in place following a copy to %X3 from the result
+      /// of a GET_TLSLD_ADDR.
+      ADDIS_DTPREL_HA,
+
+      /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+      /// model, produces an ADDI8 instruction that adds G8RReg to
+      /// sym@got@dtprel@l.
+      ADDI_DTPREL_L,
+
+      /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+      /// during instruction selection to optimize a BUILD_VECTOR into
+      /// operations on splats.  This is necessary to avoid losing these
+      /// optimizations due to constant folding.
+      VADD_SPLAT,
 
       /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
       /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
       /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
       /// i32.
-      STBRX,
+      STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
 
       /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
       /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
       /// or i32.
-      LBRX
+      LBRX,
+
+      /// STFIWX - The STFIWX instruction.  The first operand is an input token
+      /// chain, then an f64 value to store, then an address to store it to.
+      STFIWX,
+
+      /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+      /// load which sign-extends from a 32-bit integer value into the
+      /// destination 64-bit register.
+      LFIWAX,
+
+      /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+      /// load which zero-extends from a 32-bit integer value into the
+      /// destination 64-bit register.
+      LFIWZX,
+
+      /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+      /// produces an ADDIS8 instruction that adds the TOC base register to
+      /// sym@toc@ha.
+      ADDIS_TOC_HA,
+
+      /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+      /// produces a LD instruction with base register G8RReg and offset
+      /// sym@toc@l.  Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      LD_TOC_L,
+
+      /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+      /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      ADDI_TOC_L
     };
   }
 
@@ -241,6 +326,7 @@ namespace llvm {
 
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &PPCSubTarget;
+    const PPCRegisterInfo *PPCRegInfo;
 
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -249,7 +335,7 @@ namespace llvm {
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     virtual EVT getSetCCResultType(EVT VT) const;
@@ -315,6 +401,12 @@ namespace llvm {
                                                 MachineBasicBlock *MBB,
                                             bool is8bit, unsigned Opcode) const;
 
+    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+                                        MachineBasicBlock *MBB) const;
+
+    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+                                         MachineBasicBlock *MBB) const;
+
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
     /// Examine constraint string and operand type and determine a weight value.
@@ -358,18 +450,21 @@ namespace llvm {
     /// lowering. If DstAlign is zero that means it's safe to destination
     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
     /// means there isn't a need to check it against alignment requirement,
-    /// probably because the source does not need to be loaded. If
-    /// 'IsZeroVal' is true, that means it's safe to return a
-    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-    /// constant so it does not need to be loaded.
+    /// probably because the source does not need to be loaded. If 'IsMemset' is
+    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+    /// source is constant so it does not need to be loaded.
     /// It returns EVT::Other if the type should be determined using generic
     /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool IsZeroVal, bool MemcpyStrSrc,
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 
+                        bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
+    /// Is unaligned memory access allowed for the given type, and is it fast
+    /// relative to software emulation.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
     /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
     /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
     /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
@@ -415,7 +510,7 @@ namespace llvm {
                                       const PPCSubtarget &Subtarget) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
-    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -525,6 +620,12 @@ namespace llvm {
                      const SmallVectorImpl<ISD::InputArg> &Ins,
                      DebugLoc dl, SelectionDAG &DAG,
                      SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
+    SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9711452ec46a..fa5b65f0ba2d 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,8 +30,12 @@ def symbolLo64 : Operand<i64> {
   let EncoderMethod = "getLO16Encoding";
 }
 def tocentry : Operand<iPTR> {
-  let MIOperandInfo = (ops i32imm:$imm);
+  let MIOperandInfo = (ops i64imm:$imm);
 }
+def tlsreg : Operand<i64> {
+  let EncoderMethod = "getTLSRegEncoding";
+}
+def tlsgd : Operand<i64> {}
 
 //===----------------------------------------------------------------------===//
 // 64-bit transformation functions.
@@ -62,123 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{
 // Calls.
 //
 
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+    def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+        Requires<[In64BitMode]>;
+}
+
 let Defs = [LR8] in
   def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
                     PPC970_Unit_BRU;
 
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
-  // Convenient aliases for call instructions
-  let Uses = [RM] in {
-    def BL8_Darwin  : IForm<18, 0, 1,
-                            (outs), (ins calltarget:$func),
-                            "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA8_Darwin : IForm<18, 1, 1,
-                          (outs), (ins aaddr:$func),
-                          "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
-  }
-  let Uses = [CTR8, RM] in {
-    def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                  (outs), (ins),
-                                  "bctrl", BrB,
-                                  [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+  let Defs = [CTR8], Uses = [CTR8] in {
+    def BDZ8  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+                        "bdz $dst">;
+    def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+                        "bdnz $dst">;
   }
 }
 
-// ELF 64 ABI Calls = Darwin ABI Calls
-// Used to define BL8_ELF and BLA8_ELF
 let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL8_ELF  : IForm<18, 0, 1,
-                         (outs), (ins calltarget:$func),
-                         "bl $func", BrB, []>;  // See Pat patterns below.
+    def BL8  : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+                     "bl $func", BrB, []>;  // See Pat patterns below.
 
-    let isCodeGenOnly = 1 in
-    def BL8_NOP_ELF  : IForm_and_DForm_4_zero<18, 0, 1, 24,
+    def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+                     "bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
+  }
+  let Uses = [RM], isCodeGenOnly = 1 in {
+    def BL8_NOP  : IForm_and_DForm_4_zero<18, 0, 1, 24,
                              (outs), (ins calltarget:$func),
                              "bl $func\n\tnop", BrB, []>;
 
-    def BLA8_ELF : IForm<18, 1, 1,
-                         (outs), (ins aaddr:$func),
-                         "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+    def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                  (outs), (ins calltarget:$func, tlsgd:$sym),
+                                  "bl $func($sym)\n\tnop", BrB, []>;
 
-    let isCodeGenOnly = 1 in
-    def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+    def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                  (outs), (ins calltarget:$func, tlsgd:$sym),
+                                  "bl $func($sym)\n\tnop", BrB, []>;
+
+    def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
                              (outs), (ins aaddr:$func),
                              "bla $func\n\tnop", BrB,
-                             [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
+                             [(PPCcall_nop (i64 imm:$func))]>;
   }
-  let Uses = [X11, CTR8, RM] in {
-    def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
-                               (outs), (ins),
-                               "bctrl", BrB,
-                               [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+  let Uses = [CTR8, RM] in {
+    def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+                              "bctrl", BrB, [(PPCbctrl)]>,
+                 Requires<[In64BitMode]>;
   }
 }
 
 
 // Calls
-def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
-          (BL8_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
-          (BL8_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+          (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+          (BL8_NOP tglobaladdr:$dst)>;
 
-def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
-          (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
-          (BL8_NOP_ELF tglobaladdr:$dst)>;
-
-def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
-          (BL8_ELF texternalsym:$dst)>;
-def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
-          (BL8_NOP_ELF texternalsym:$dst)>;
-
-def : Pat<(PPCnop),
-          (NOP)>;
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+          (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+          (BL8_NOP texternalsym:$dst)>;
 
 // Atomic operations
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
-      [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_SUB_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
-      [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_OR_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
-      [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_XOR_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
-      [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_AND_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
-      [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_NAND_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
-      [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+      [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
-      [(set G8RC:$dst, 
-                    (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+      [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
 
     def ATOMIC_SWAP_I64 : Pseudo<
       (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
-      [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+      [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
 def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
                    "ldarx $rD, $ptr", LdStLDARX,
-                   [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+                   [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
 
 let Defs = [CR0] in
 def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stdcx. $rS, $dst", LdStSTDCX,
-                   [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+                   [(PPCstcx i64:$rS, xoaddr:$dst)]>,
                    isDOT;
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -197,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
                  "#TC_RETURNr8 $dst $offset",
                  []>;
 
+let isCodeGenOnly = 1 in {
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
-    isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in {
-  let isReturn = 1 in {
-    def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
-        Requires<[In64BitMode]>;
-  }
-
-  def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
-      Requires<[In64BitMode]>;
-}
+    isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+    Requires<[In64BitMode]>;
 
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
@@ -223,6 +211,8 @@ def TAILBA8   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   "ba $dst", BrB,
                   []>;
 
+}
+
 def : Pat<(PPCtc_return (i64 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
 
@@ -232,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
-  let Defs = [CTR8], Uses = [CTR8] in {
-    def BDZ8  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
-                        "bdz $dst">;
-    def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
-                        "bdnz $dst">;
-  }
-}
 
-// 64-but CR instructions
+// 64-bit CR instructions
 def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
+let isCodeGenOnly = 1 in
 def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
                        "#MFCR8pseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
@@ -254,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
                      "mfcr $rT", SprMFCR>,
                      PPC970_MicroCode, PPC970_Unit_CRU;
 
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+  def EH_SjLj_SetJmp64  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+                            "#EH_SJLJ_SETJMP64",
+                            [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+                          Requires<[In64BitMode]>;
+  let isTerminator = 1 in
+  def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+                            "#EH_SJLJ_LONGJMP64",
+                            [(PPCeh_sjlj_longjmp addr:$buf)]>,
+                          Requires<[In64BitMode]>;
+}
+
 //===----------------------------------------------------------------------===//
 // 64-bit SPR manipulation instrs.
 
@@ -262,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
                            "mfctr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
-let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
 def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
                            "mtctr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
-let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+let Pattern = [(set i64:$rT, readcyclecounter)] in
 def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
                           "mfspr $rT, 268", SprMFTB>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -279,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
 
 let Defs = [X1], Uses = [X1] in
 def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
-                       [(set G8RC:$result,
-                             (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+                       [(set i64:$result,
+                             (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
 
 let Defs = [LR8] in {
 def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
@@ -302,126 +297,129 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
 def LI8  : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
                       "li $rD, $imm", IntSimple,
-                      [(set G8RC:$rD, immSExt16:$imm)]>;
+                      [(set i64:$rD, immSExt16:$imm)]>;
 def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
                       "lis $rD, $imm", IntSimple,
-                      [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+                      [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
 }
 
 // Logical ops.
 def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "nand $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+                   [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
 def AND8 : XForm_6<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "and $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+                   [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
 def ANDC8: XForm_6<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "andc $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+                   [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
 def OR8  : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "or $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+                   [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
 def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "nor $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+                   [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
 def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "orc $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+                   [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
 def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "eqv $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+                   [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
 def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
                    "xor $rA, $rS, $rB", IntSimple,
-                   [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+                   [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
 
 // Logical ops with immediate.
 def ANDIo8  : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "andi. $dst, $src1, $src2", IntGeneral,
-                      [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+                      [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
                       isDOT;
 def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                      "andis. $dst, $src1, $src2", IntGeneral,
-                    [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+                    [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
                      isDOT;
 def ORI8    : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "ori $dst, $src1, $src2", IntSimple,
-                      [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+                      [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
 def ORIS8   : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "oris $dst, $src1, $src2", IntSimple,
-                    [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+                    [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
 def XORI8   : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "xori $dst, $src1, $src2", IntSimple,
-                      [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+                      [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
 def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
                       "xoris $dst, $src1, $src2", IntSimple,
-                   [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+                   [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
 
 def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "add $rT, $rA, $rB", IntSimple,
-                     [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+                     [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+let isCodeGenOnly = 1 in
+def ADD8TLS  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+                        "add $rT, $rA, $rB@tls", IntSimple,
+                        [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
                      
 let Defs = [CARRY] in {
 def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "addc $rT, $rA, $rB", IntGeneral,
-                     [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+                     [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
                      PPC970_DGroup_Cracked;
 def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
-                     [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+                     [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
 }
-def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
-                     "addi $rD, $rA, $imm", IntSimple,
-                     [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
-                     [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+                     [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
-                     [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+                     [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
 
 let Defs = [CARRY] in {
 def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
-                     [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+                     [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
 def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                       "subfc $rT, $rA, $rB", IntGeneral,
-                      [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+                      [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
                       PPC970_DGroup_Cracked;
 }
 def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "subf $rT, $rA, $rB", IntGeneral,
-                     [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+                     [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
 def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "neg $rT, $rA", IntSimple,
-                       [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+                       [(set i64:$rT, (ineg i64:$rA))]>;
 let Uses = [CARRY], Defs = [CARRY] in {
 def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                        "adde $rT, $rA, $rB", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+                       [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
 def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "addme $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+                       [(set i64:$rT, (adde i64:$rA, -1))]>;
 def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "addze $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+                       [(set i64:$rT, (adde i64:$rA, 0))]>;
 def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                        "subfe $rT, $rA, $rB", IntGeneral,
-                       [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+                       [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
 def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "subfme $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+                       [(set i64:$rT, (sube -1, i64:$rA))]>;
 def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "subfze $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+                       [(set i64:$rT, (sube 0, i64:$rA))]>;
 }
 
 
 def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "mulhd $rT, $rA, $rB", IntMulHW,
-                     [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+                     [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
 def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "mulhdu $rT, $rA, $rB", IntMulHWU,
-                     [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+                     [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
 
 def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
                           "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
@@ -434,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
 
 def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
                    "sld $rA, $rS, $rB", IntRotateD,
-                   [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+                   [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
 def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
                    "srd $rA, $rS, $rB", IntRotateD,
-                   [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+                   [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
 let Defs = [CARRY] in {
 def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
                    "srad $rA, $rS, $rB", IntRotateD,
-                   [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+                   [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
 }
                    
 def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
                       "extsb $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+                      [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
 def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
                       "extsh $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+                      [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
 
 def EXTSW  : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
                       "extsw $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
-/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
-def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+                      [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
 def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
                       "extsw $rA, $rS", IntSimple,
-                      [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+                      [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
 
 let Defs = [CARRY] in {
 def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
                       "sradi $rA, $rS, $SH", IntRotateDI,
-                      [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+                      [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
 }
 def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
                       "cntlzd $rA, $rS", IntGeneral,
-                      [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+                      [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "popcntd $rA, $rS", IntGeneral,
+                      [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "popcntw $rA, $rS", IntGeneral,
+                      [(set i32:$rA, (ctpop i32:$rS))]>;
 
 def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "divd $rT, $rA, $rB", IntDivD,
-                     [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+                     [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
                      PPC970_DGroup_First, PPC970_DGroup_Cracked;
 def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "divdu $rT, $rA, $rB", IntDivD,
-                     [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+                     [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
                      PPC970_DGroup_First, PPC970_DGroup_Cracked;
 def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "mulld $rT, $rA, $rB", IntMulHD,
-                     [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+                     [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
 
 
 let isCommutable = 1 in {
@@ -512,7 +516,7 @@ def RLWINM8 : MForm_2<21,
                      []>;
 
 def ISEL8   : AForm_4<31, 15,
-                     (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+                     (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
 }  // End FXU Operations.
@@ -527,94 +531,96 @@ def ISEL8   : AForm_4<31, 15,
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
-                  [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+                  [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
 def LWA  : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
                     "lwa $rD, $src", LdStLWA,
-                    [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+                    [(set i64:$rD,
+                          (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
                     PPC970_DGroup_Cracked;
 def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
-                   [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+                   [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
 def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
                    "lwax $rD, $src", LdStLHA,
-                   [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+                   [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
                    PPC970_DGroup_Cracked;
 
 // Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
-                            ptr_rc:$rA),
-                    "lhau $rD, $disp($rA)", LdStLHAU,
-                    []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+                    (ins memri:$addr),
+                    "lhau $rD, $addr", LdStLHAU,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 // NO LWAU!
 
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lhaux $rD, $addr", LdStLHAU,
-                    []>, RegConstraint<"$addr.offreg = $ea_result">,
+                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lwaux $rD, $addr", LdStLHAU,
-                    []>, RegConstraint<"$addr.offreg = $ea_result">,
+                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                     NoEncode<"$ea_result">, isPPC64;
 }
+}
 
 // Zero extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
-                  [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+                  [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
 def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
-                  [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+                  [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
 def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
-                  [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+                  [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
 
 def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
-                   [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+                   [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
 def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
-                   [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+                   [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
 def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
-                   [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+                   [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
                    
                    
 // Update forms.
 let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lbzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lhzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lwzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 }
 }
@@ -624,25 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
-                    [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+                    [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
 def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtoc",
-                  [(set G8RC:$rD,
-                     (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+                  [(set i64:$rD,
+                     (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
 def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtocJTI",
-                  [(set G8RC:$rD,
-                     (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64;
+                  [(set i64:$rD,
+                     (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
 def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtocCPT",
-                  [(set G8RC:$rD,
-                     (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64;
+                  [(set i64:$rD,
+                     (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
 
-let hasSideEffects = 1 in { 
+let hasSideEffects = 1, isCodeGenOnly = 1 in {
 let RST = 2, DS = 2 in
 def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
                     "ld 2, 8($reg)", LdStLD,
-                    [(PPCload_toc G8RC:$reg)]>, isPPC64;
+                    [(PPCload_toc i64:$reg)]>, isPPC64;
                     
 let RST = 2, DS = 10, RA = 1 in
 def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
@@ -651,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
 }
 def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
-                   [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
-                   
+                   [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31,  532, (outs G8RC:$rD), (ins memrr:$src),
+                   "ldbrx $rD, $src", LdStLoad,
+                   [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
 let mayLoad = 1 in
-def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
                     "ldu $rD, $addr", LdStLDU,
                     []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
                     NoEncode<"$ea_result">;
 
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "ldux $rD, $addr", LdStLDU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">, isPPC64;
 }
 
@@ -671,118 +683,168 @@ def : Pat<(PPCload ixaddr:$src),
 def : Pat<(PPCload xaddr:$src),
           (LDX xaddr:$src)>;
 
+// Support for medium and large code model.
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+                       "#ADDIStocHA",
+                       [(set i64:$rD,
+                         (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
+                       isPPC64;
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+                   "#LDtocL",
+                   [(set i64:$rD,
+                     (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+                     "#ADDItocL",
+                     [(set i64:$rD,
+                       (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+                         "#ADDISgotTprelHA",
+                         [(set i64:$rD,
+                           (PPCaddisGotTprelHA i64:$reg,
+                                               tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+                        "#LDgotTprelL",
+                        [(set i64:$rD,
+                          (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
+                 isPPC64;
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+          (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+                         "#ADDIStlsgdHA",
+                         [(set i64:$rD,
+                           (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+                       "#ADDItlsgdL",
+                       [(set i64:$rD,
+                         (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
+                 isPPC64;
+def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+                        "#GETtlsADDR",
+                        [(set i64:$rD,
+                          (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+                 isPPC64;
+def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+                         "#ADDIStlsldHA",
+                         [(set i64:$rD,
+                           (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+                       "#ADDItlsldL",
+                       [(set i64:$rD,
+                         (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
+                 isPPC64;
+def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+                          "#GETtlsldADDR",
+                          [(set i64:$rD,
+                            (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+                   isPPC64;
+def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+                          "#ADDISdtprelHA",
+                          [(set i64:$rD,
+                            (PPCaddisDtprelHA i64:$reg,
+                                              tglobaltlsaddr:$disp))]>,
+                   isPPC64;
+def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+                         "#ADDIdtprelL",
+                         [(set i64:$rD,
+                           (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
+                  isPPC64;
+
 let PPC970_Unit = 2 in {
 // Truncating stores.                       
 def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
-                   [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+                   [(truncstorei8 i64:$rS, iaddr:$src)]>;
 def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
-                   [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+                   [(truncstorei16 i64:$rS, iaddr:$src)]>;
 def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
-                   [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+                   [(truncstorei32 i64:$rS, iaddr:$src)]>;
 def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
-                   [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei8 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
-                   [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei16 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
-                   [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+                   [(truncstorei32 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 // Normal 8-byte stores.
 def STD  : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
                     "std $rS, $dst", LdStSTD,
-                    [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+                    [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
 def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
                    "stdx $rS, $dst", LdStSTD,
-                   [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+                   [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdbrx $rS, $dst", LdStStore,
+                   [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
                    PPC970_DGroup_Cracked;
 }
 
-let PPC970_Unit = 2 in {
-
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                          (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, 
-                                         iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                        (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, 
-                                        iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                          (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
-                                          iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                                        s16immX4:$ptroff, ptr_rc:$ptrreg),
-                    "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
-                    [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, 
-                                                     iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
-                    isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                       (pre_truncsti8 G8RC:$rS,
-                                      ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+                   "stbu $rS, $dst", LdStStoreUpd, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+                   "sthu $rS, $dst", LdStStoreUpd, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+                   "stwu $rS, $dst", LdStStoreUpd, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+                   "stdu $rS, $dst", LdStSTDU, []>,
+                   RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+                   isPPC64;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "stbux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                       (pre_truncsti16 G8RC:$rS,
-                                       ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "sthux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                       (pre_truncsti32 G8RC:$rS,
-                                       ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "stwux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
-                              (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
-                    [(set ptr_rc:$ea_res,
-                       (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+                    "stdux $rS, $dst", LdStSTDU, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked, isPPC64;
-
-// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
-def STD_32  : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
-                       "std $rT, $dst", LdStSTD,
-                       [(PPCstd_32  GPRC:$rT, ixaddr:$dst)]>, isPPC64;
-def STDX_32  : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
-                       "stdx $rT, $dst", LdStSTD,
-                       [(PPCstd_32  GPRC:$rT, xaddr:$dst)]>, isPPC64,
-                       PPC970_DGroup_Cracked;
 }
 
+// Patterns to match the pre-inc stores.  We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STDUX $rS, $ptrreg, $ptroff)>;
 
 
 //===----------------------------------------------------------------------===//
@@ -793,10 +855,26 @@ def STDX_32  : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
 let PPC970_Unit = 3, Uses = [RM] in {  // FPU Operations.
 def FCFID  : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
                       "fcfid $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+                      [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
 def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
                       "fctidz $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+                      [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+def FCFIDU  : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fcfidu $frD, $frB", FPGeneral,
+                      [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+def FCFIDS  : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
+                      "fcfids $frD, $frB", FPGeneral,
+                      [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
+                      "fcfidus $frD, $frB", FPGeneral,
+                      [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fctiduz $frD, $frB", FPGeneral,
+                      [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fctiwuz $frD, $frB", FPGeneral,
+                      [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
 }
 
 
@@ -805,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
 //
 
 // Extensions and truncates to/from 32-bit regs.
-def : Pat<(i64 (zext GPRC:$in)),
-          (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32),
+def : Pat<(i64 (zext i32:$in)),
+          (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
                   0, 32)>;
-def : Pat<(i64 (anyext GPRC:$in)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>;
-def : Pat<(i32 (trunc G8RC:$in)),
-          (EXTRACT_SUBREG G8RC:$in, sub_32)>;
+def : Pat<(i64 (anyext i32:$in)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+          (EXTRACT_SUBREG $in, sub_32)>;
 
 // Extending loads with i64 targets.
 def : Pat<(zextloadi1 iaddr:$src),
@@ -838,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src),
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 6-bit and 7-bit shift
 // amounts.
-def : Pat<(sra G8RC:$rS, GPRC:$rB),
-          (SRAD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(srl G8RC:$rS, GPRC:$rB),
-          (SRD G8RC:$rS, GPRC:$rB)>;
-def : Pat<(shl G8RC:$rS, GPRC:$rB),
-          (SLD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(sra i64:$rS, i32:$rB),
+          (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+          (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+          (SLD $rS, $rB)>;
 
 // SHL/SRL
-def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
-          (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
-def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
-          (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+          (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+          (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
 
 // ROTL
-def : Pat<(rotl G8RC:$in, GPRC:$sh),
-          (RLDCL G8RC:$in, GPRC:$sh, 0)>;
-def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
-          (RLDICL G8RC:$in, imm:$imm, 0)>;
+def : Pat<(rotl i64:$in, i32:$sh),
+          (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+          (RLDICL $in, imm:$imm, 0)>;
 
 // Hi and Lo for Darwin Global Addresses.
 def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
@@ -866,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
 def : Pat<(PPClo tjumptable:$in , 0), (LI8  tjumptable:$in)>;
 def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
 def : Pat<(PPClo tblockaddress:$in, 0), (LI8  tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in),
-          (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in),
-          (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
-          (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
-          (ADDIS8 G8RC:$in, tconstpool:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
-          (ADDIS8 G8RC:$in, tjumptable:$g)>;
-def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
-          (ADDIS8 G8RC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+          (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+          (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+          (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+          (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+          (STDX $rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index ba58c3e4ac88..a5ba4c8aebef 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -158,34 +158,75 @@ def vecspltisw : PatLeaf<(build_vector), [{
   return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
 }], VSPLTISW_get_imm>;
 
-def V_immneg0 : PatLeaf<(build_vector), [{
-  return PPC::isAllNegativeZeroVector(N);
-}]>;
-
 //===----------------------------------------------------------------------===//
 // Helpers for defining instructions that directly correspond to intrinsics.
 
-// VA1a_Int - A VAForm_1a intrinsic definition.
-class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
   : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
-                       [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+                       [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
 
-// VX1_Int - A VXForm_1 intrinsic definition.
-class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                   ValueType InTy>
+  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+              !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+                       [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                   ValueType In1Ty, ValueType In2Ty>
+  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+              !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+                       [(set OutTy:$vD,
+                         (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+             !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+             [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                  ValueType InTy>
   : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
-             [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+             [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
 
-// VX2_Int - A VXForm_2 intrinsic definition.
-class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                  ValueType In1Ty, ValueType In2Ty>
+  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+             !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+             [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+             !strconcat(opc, " $vD, $vB"), VecFP,
+             [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+                  ValueType InTy>
   : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
              !strconcat(opc, " $vD, $vB"), VecFP,
-             [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+             [(set OutTy:$vD, (IntID InTy:$vB))]>;
 
 //===----------------------------------------------------------------------===//
 // Instruction Definitions.
 
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+let isCodeGenOnly = 1 in {
 def DSS      : DSS_Form<822, (outs),
                         (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
                         "dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -217,129 +258,136 @@ def DSTST64  : DSS_Form<374, (outs),
 def DSTSTT64 : DSS_Form<374, (outs),
                         (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
                         "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+}
 
 def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
                       "mfvscr $vD", LdStStore,
-                      [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; 
+                      [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; 
 def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
                       "mtvscr $vB", LdStLoad,
-                      [(int_ppc_altivec_mtvscr VRRC:$vB)]>; 
+                      [(int_ppc_altivec_mtvscr v4i32:$vB)]>; 
 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
 def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
                    "lvebx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+                   [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
 def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
                    "lvehx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+                   [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
 def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
                    "lvewx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+                   [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
 def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
                    "lvx $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+                   [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
 def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
                    "lvxl $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+                   [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
 }
 
 def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
                    "lvsl $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+                   [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
                    "lvsr $vD, $src", LdStLoad,
-                   [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+                   [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 
 let PPC970_Unit = 2 in {   // Stores.
 def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvebx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
 def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvehx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
 def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvewx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
 def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvx $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
 def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
                    "stvxl $rS, $dst", LdStStore,
-                   [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+                   [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
 }
 
 let PPC970_Unit = 5 in {  // VALU Operations.
 // VA-Form instructions.  3-input AltiVec ops.
 def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
                        "vmaddfp $vD, $vA, $vC, $vB", VecFP,
-                       [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
+                       [(set v4f32:$vD,
+                        (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
 def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
                        "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
-                       [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
-                                                  (fneg VRRC:$vB))))]>; 
+                       [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+                                                  (fneg v4f32:$vB))))]>; 
+
+def VMHADDSHS  : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+                             v8i16>;
+def VMLADDUHM  : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
 
-def VMHADDSHS  : VA1a_Int<32, "vmhaddshs",  int_ppc_altivec_vmhaddshs>;
-def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
-def VMLADDUHM  : VA1a_Int<34, "vmladduhm",  int_ppc_altivec_vmladduhm>;
-def VPERM      : VA1a_Int<43, "vperm",      int_ppc_altivec_vperm>;
-def VSEL       : VA1a_Int<42, "vsel",       int_ppc_altivec_vsel>;
+def VPERM      : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+                              v4i32, v4i32, v16i8>;
+def VSEL       : VA1a_Int_Ty<42, "vsel",  int_ppc_altivec_vsel, v4i32>;
 
 // Shuffles.
 def VSLDOI  : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
                        "vsldoi $vD, $vA, $vB, $SH", VecFP,
-                       [(set VRRC:$vD, 
-                         (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+                       [(set v16i8:$vD, 
+                         (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
 
 // VX-Form instructions.  AltiVec arithmetic ops.
 def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vaddfp $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+                      [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
                       
 def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vaddubm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
 def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vadduhm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
 def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vadduwm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
                       
-def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
-def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
-def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
-def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
-def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
-def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
-def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
                              
                              
 def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                     "vand $vD, $vA, $vB", VecFP,
-                    [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                    [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
 def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                      "vandc $vD, $vA, $vB", VecFP,
-                     [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
-                                          (vnot_ppc VRRC:$vB)))]>;
+                     [(set v4i32:$vD, (and v4i32:$vA,
+                                           (vnot_ppc v4i32:$vB)))]>;
 
 def VCFSX  : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vcfsx $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4f32:$vD,
+                             (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
 def VCFUX  : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vcfux $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4f32:$vD,
+                             (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
 def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vctsxs $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4i32:$vD,
+                             (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
 def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vctuxs $vD, $vB, $UIMM", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+                      [(set v4i32:$vD,
+                             (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
 
 // Defines with the UIM field set to 0 for floating-point
 // to integer (fp_to_sint/fp_to_uint) conversions and integer
@@ -347,203 +395,237 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
 let VA = 0 in {
 def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
                        "vcfsx $vD, $vB, 0", VecFP,
-                       [(set VRRC:$vD,
-                             (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>;
+                       [(set v4f32:$vD,
+                             (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
 def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
                         "vctuxs $vD, $vB, 0", VecFP,
-                        [(set VRRC:$vD,
-                               (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>;
+                        [(set v4i32:$vD,
+                               (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
 def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
                        "vcfux $vD, $vB, 0", VecFP,
-                       [(set VRRC:$vD,
-                               (int_ppc_altivec_vcfux VRRC:$vB, 0))]>;
+                       [(set v4f32:$vD,
+                               (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
 def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
                       "vctsxs $vD, $vB, 0", VecFP,
-                      [(set VRRC:$vD,
-                             (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>;
+                      [(set v4i32:$vD,
+                             (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
 }
-def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
-def VLOGEFP  : VX2_Int<458, "vlogefp",  int_ppc_altivec_vlogefp>;
-
-def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
-def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
-def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
-def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
-def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
-def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
-
-def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
-def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
-def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
-def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
-def VMAXUB : VX1_Int<   2, "vmaxub", int_ppc_altivec_vmaxub>;
-def VMAXUH : VX1_Int<  66, "vmaxuh", int_ppc_altivec_vmaxuh>;
-def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
-def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
-def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
-def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
-def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
-def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
-def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
-def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP  : VX2_Int_SP<458, "vlogefp",  int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty<   2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty<  66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
 
 def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrghb $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrghh $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrghw $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrglb $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrglh $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+                      [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vmrglw $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
-
-def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
-def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
-def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
-def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
-def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
-def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
-
-def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
-def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
-def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
-def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
-def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
-def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
-def VMULOUB : VX1_Int<  8, "vmuloub", int_ppc_altivec_vmuloub>;
-def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+                      [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+                            v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+                            v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+                            v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+                            v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+                            v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+                            v4i32, v8i16, v4i32>;
+
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+                          v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+                          v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+                          v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+                          v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+                          v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+                          v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2<  8, "vmuloub", int_ppc_altivec_vmuloub,
+                          v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+                          v4i32, v8i16>;
                        
-def VREFP     : VX2_Int<266, "vrefp",     int_ppc_altivec_vrefp>;
-def VRFIM     : VX2_Int<714, "vrfim",     int_ppc_altivec_vrfim>;
-def VRFIN     : VX2_Int<522, "vrfin",     int_ppc_altivec_vrfin>;
-def VRFIP     : VX2_Int<650, "vrfip",     int_ppc_altivec_vrfip>;
-def VRFIZ     : VX2_Int<586, "vrfiz",     int_ppc_altivec_vrfiz>;
-def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+def VREFP     : VX2_Int_SP<266, "vrefp",     int_ppc_altivec_vrefp>;
+def VRFIM     : VX2_Int_SP<714, "vrfim",     int_ppc_altivec_vrfim>;
+def VRFIN     : VX2_Int_SP<522, "vrfin",     int_ppc_altivec_vrfin>;
+def VRFIP     : VX2_Int_SP<650, "vrfip",     int_ppc_altivec_vrfip>;
+def VRFIZ     : VX2_Int_SP<586, "vrfiz",     int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
 
-def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
 
 def VSUBFP  : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsubfp $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+                      [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
 def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsububm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
 def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsubuhm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
 def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vsubuwm $vD, $vA, $vB", VecGeneral,
-                      [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
                       
-def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
-def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
-def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
-def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
-def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
-def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
-def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
-def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
-def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
-def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
-def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+                          v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+                          v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+                          v4i32, v16i8, v4i32>;
 
 def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                     "vnor $vD, $vA, $vB", VecFP,
-                    [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
-                                                  VRRC:$vB)))]>;
+                    [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+                                                   v4i32:$vB)))]>;
 def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vor $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
 def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                       "vxor $vD, $vA, $vB", VecFP,
-                      [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+
+def VRLB   : VX1_Int_Ty<   4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH   : VX1_Int_Ty<  68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW   : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
 
-def VRLB   : VX1_Int<   4, "vrlb", int_ppc_altivec_vrlb>;
-def VRLH   : VX1_Int<  68, "vrlh", int_ppc_altivec_vrlh>;
-def VRLW   : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+def VSL    : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl,  v4i32 >;
+def VSLO   : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
 
-def VSL    : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
-def VSLO   : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
-def VSLB   : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
-def VSLH   : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
-def VSLW   : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+def VSLB   : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH   : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW   : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
 
 def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vspltb $vD, $vB, $UIMM", VecPerm,
-                      [(set VRRC:$vD,
-                        (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+                      [(set v16i8:$vD,
+                        (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
 def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vsplth $vD, $vB, $UIMM", VecPerm,
-                      [(set VRRC:$vD,
-                        (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+                      [(set v16i8:$vD,
+                        (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
 def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
                       "vspltw $vD, $vB, $UIMM", VecPerm,
-                      [(set VRRC:$vD, 
-                        (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+                      [(set v16i8:$vD, 
+                        (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
 
-def VSR    : VX1_Int< 708, "vsr"  , int_ppc_altivec_vsr>;
-def VSRO   : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
-def VSRAB  : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
-def VSRAH  : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
-def VSRAW  : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
-def VSRB   : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
-def VSRH   : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
-def VSRW   : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+def VSR    : VX1_Int_Ty< 708, "vsr"  , int_ppc_altivec_vsr,  v4i32>;
+def VSRO   : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB  : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH  : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW  : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB   : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH   : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW   : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
 
 
 def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
                        "vspltisb $vD, $SIMM", VecPerm,
-                       [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+                       [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
 def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
                        "vspltish $vD, $SIMM", VecPerm,
-                       [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+                       [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
 def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
                        "vspltisw $vD, $SIMM", VecPerm,
-                       [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+                       [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
 
 // Vector Pack.
-def VPKPX   : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
-def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
-def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
-def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
-def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKPX   : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+                          v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+                          v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+                          v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+                          v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+                          v8i16, v4i32>;
 def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                        "vpkuhum $vD, $vA, $vB", VecFP,
-                       [(set VRRC:$vD,
-                         (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+                       [(set v16i8:$vD,
+                         (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+                          v16i8, v8i16>;
 def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
                        "vpkuwum $vD, $vA, $vB", VecFP,
-                       [(set VRRC:$vD,
-                         (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
-def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+                       [(set v16i8:$vD,
+                         (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+                          v8i16, v4i32>;
 
 // Vector Unpack.
-def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
-def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
-def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
-def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
-def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
-def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+                          v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+                          v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+                          v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+                          v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+                          v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+                          v4i32, v8i16>;
 
 
 // Altivec Comparisons.
 
 class VCMP<bits<10> xo, string asmstr, ValueType Ty>
   : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
-              [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+              [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
 class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
   : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
-              [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+              [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
   let Defs = [CR6];
   let RC = 1;
 }
@@ -582,10 +664,16 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
 def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
                       
+let isCodeGenOnly = 1 in
 def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
                       "vxor $vD, $vD, $vD", VecFP,
-                      [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+                      [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+let IMM=-1 in {
+def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+                      "vspltisw $vD, -1", VecFP,
+                      [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
 }
+} // VALU Operations.
 
 //===----------------------------------------------------------------------===//
 // Additional Altivec Patterns
@@ -596,31 +684,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
 def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
 
 //  * 32-bit
-def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+          (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+          (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+          (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+          (DSTSTT 1, imm:$STRM, $rA, $rB)>;
 
 //  * 64-bit
-def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
-def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
-          (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+          (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+          (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+          (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+          (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
 
 // Loads.
 def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
 
 // Stores.
-def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
-          (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+          (STVX $rS, xoaddr:$dst)>;
 
 // Bit conversions.
 def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
@@ -642,82 +730,99 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
 // Shuffles.
 
 // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
-def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
-        (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
-def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VPKUWUM VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+        (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+        (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+        (VPKUHUM $vA, $vA)>;
 
 // Match vmrg*(x,x)
-def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGLB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGLH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGLW VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGHB VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGHH VRRC:$vA, VRRC:$vA)>;
-def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
-        (VMRGHW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+        (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+        (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+        (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+        (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+        (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+        (VMRGHW $vA, $vA)>;
 
 // Logical Operations
-def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
 
-def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
-          (VNOR VRRC:$A, VRRC:$B)>;
-def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
-          (VANDC VRRC:$A, VRRC:$B)>;
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+          (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+          (VANDC $A, $B)>;
 
-def : Pat<(fmul VRRC:$vA, VRRC:$vB),
-          (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; 
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+          (VMADDFP $vA, $vB,
+             (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; 
 
 // Fused multiply add and multiply sub for packed float.  These are represented
 // separately from the real instructions above, for operations that must have
 // the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+          (VNMSUBFP $A, $B, $C)>;
 
-def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
-def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
-          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+          (VPERM $vA, $vB, $vC)>;
 
-def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
-          (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
 
 // Vector shifts
-def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
-          (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
-          (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
-          (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
-          (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
-          (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
-          (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
-
-def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
-          (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
-          (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
-def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
-          (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+          (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+          (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+          (v4i32 (VSRAW $vA, $vB))>;
 
 // Float to integer and integer to float conversions
-def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))),
-           (VCTSXS_0 VRRC:$vA)>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))),
-           (VCTUXS_0 VRRC:$vA)>;
-def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
-           (VCFSX_0 VRRC:$vA)>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
-           (VCFUX_0 VRRC:$vA)>;
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+           (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+           (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+           (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+           (VCFUX_0 $vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+          (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+          (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+          (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+          (VRFIN $vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21fc..400b7e367bfe 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
   let CR = 0;
 }
 
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+              dag OOL, dag IOL, string asmstr>
+  : I<opcode, OOL, IOL, asmstr, BrB> {
+  bits<14> BD;
+
+  let Inst{6-10}  = bo;
+  let Inst{11-15} = bi;
+  let Inst{16-29} = BD;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
 // 1.7.4 D-Form
 class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern> 
@@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
 // This is probably 1.7.9, but I don't have the reference that uses this
 // numbering scheme...
 class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
-                      string cstr, InstrItinClass itin, list<dag>pattern>
+              InstrItinClass itin, list<dag>pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
   bits<8> FM;
   bits<5> rT;
 
   bit RC = 0;    // set by isDOT
   let Pattern = pattern;
-  let Constraints = cstr;
 
   let Inst{6} = 0;
   let Inst{7-14}  = FM;
@@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
   bits<5> RT;
   bits<5> RA;
   bits<5> RB;
-  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO (must be 12).
-  bits<3> CR;
+  bits<5> COND;
 
   let Pattern = pattern;
 
   let Inst{6-10}  = RT;
   let Inst{11-15} = RA;
   let Inst{16-20} = RB;
-  let Inst{21-23} = CR;
-  let Inst{24-25} = BIBO{6-5};
+  let Inst{21-25} = COND;
   let Inst{26-30} = xo;
   let Inst{31}    = 0;
 }
@@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
 //===----------------------------------------------------------------------===//
 class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
     : I<0, OOL, IOL, asmstr, NoItinerary> {
+  let isCodeGenOnly = 1;
   let PPC64 = 0;
   let Pattern = pattern;
   let Inst{31-0} = 0;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d9d68446f536..69c54ed084be 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
+#include "PPCHazardRecognizers.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -28,16 +29,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
 
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
 using namespace llvm;
 
 static cl::
@@ -60,7 +55,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
     return new PPCScoreboardHazardRecognizer(II, DAG);
   }
 
-  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
 }
 
 /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
 
 unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
+  // Note: This list must be kept consistent with LoadRegFromStackSlot.
   switch (MI->getOpcode()) {
   default: break;
   case PPC::LD:
   case PPC::LWZ:
   case PPC::LFS:
   case PPC::LFD:
+  case PPC::RESTORE_CR:
+  case PPC::LVX:
+  case PPC::RESTORE_VRSAVE:
+    // Check for the operands added by addFrameReference (the immediate is the
+    // offset which defaults to 0).
     if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
         MI->getOperand(2).isFI()) {
       FrameIndex = MI->getOperand(2).getIndex();
@@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
 
 unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                           int &FrameIndex) const {
+  // Note: This list must be kept consistent with StoreRegToStackSlot.
   switch (MI->getOpcode()) {
   default: break;
   case PPC::STD:
   case PPC::STW:
   case PPC::STFS:
   case PPC::STFD:
+  case PPC::SPILL_CR:
+  case PPC::STVX:
+  case PPC::SPILL_VRSAVE:
+    // Check for the operands added by addFrameReference (the immediate is the
+    // offset which defaults to 0).
     if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
         MI->getOperand(2).isFI()) {
       FrameIndex = MI->getOperand(2).getIndex();
@@ -141,7 +148,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 
   // Normal instructions can be commuted the obvious way.
   if (MI->getOpcode() != PPC::RLWIMI)
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
   if (MI->getOperand(3).getImm() != 0)
@@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                   unsigned SrcReg, bool isKill,
                                   int FrameIdx,
                                   const TargetRegisterClass *RC,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+                                  SmallVectorImpl<MachineInstr*> &NewMIs,
+                                  bool &NonRI, bool &SpillsVRS) const{
+  // Note: If additional store instructions are added here,
+  // update isStoreToStackSlot.
+
   DebugLoc DL;
   if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
-    if (SrcReg != PPC::LR) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    } else {
-      // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use R11, which we know cannot be used in the prolog/epilog.  This is
-      // a hack.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(PPC::R11,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
   } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
-    if (SrcReg != PPC::LR8) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    } else {
-      // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use X11, which we know cannot be used in the prolog/epilog.  This is
-      // a hack.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
-                                         .addReg(PPC::X11,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
                                        .addReg(SrcReg,
@@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
   } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
-    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
-        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
-                                         .addReg(SrcReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-      return true;
-    } else {
-      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
-      // it's possible for the stack frame to be so big the save location is
-      // out of range of immediate offsets, necessitating another register.
-      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
-      // at the moment.
-
-      bool is64Bit = TM.getSubtargetImpl()->isPPC64();
-      // We need to store the CR in the low 4-bits of the saved value.  First,
-      // issue a MFCR to save all of the CRBits.
-      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                              (is64Bit ? PPC::X2 : PPC::R2) :
-                              (is64Bit ? PPC::X0 : PPC::R0);
-      NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
-                                             PPC::MFCRpseud), ScratchReg)
-                               .addReg(SrcReg, getKillRegState(isKill)));
-
-      // If the saved register wasn't CR0, shift the bits left so that they are
-      // in CR0's slot.
-      if (SrcReg != PPC::CR0) {
-        unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
-        // rlwinm scratch, scratch, ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
-                           PPC::RLWINM), ScratchReg)
-                       .addReg(ScratchReg).addImm(ShiftBits)
-                       .addImm(0).addImm(31));
-      }
-
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
-                                           PPC::STW8 : PPC::STW))
-                                         .addReg(ScratchReg,
-                                                 getKillRegState(isKill)),
-                                         FrameIdx));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    return true;
   } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
     // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
     // backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       Reg = PPC::CR7;
 
     return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
-                               &PPC::CRRCRegClass, NewMIs);
+                               &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
 
   } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
-    // We don't have indexed addressing for vector loads.  Emit:
-    // R0 = ADDI FI#
-    // STVX VAL, 0, R0
-    //
-    // FIXME: We use R0 here, because it isn't available for RA.
-    bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
-    unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
-    unsigned GPR0  = Is64Bit ? PPC::X0    : PPC::R0;
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
-                                       FrameIdx, 0, 0));
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
-                     .addReg(SrcReg, getKillRegState(isKill))
-                     .addReg(GPR0)
-                     .addReg(GPR0));
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+    assert(TM.getSubtargetImpl()->isDarwin() &&
+           "VRSAVE only needs spill/restore on Darwin");
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    SpillsVRS = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   SmallVector<MachineInstr*, 4> NewMIs;
 
-  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
-    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setHasSpills();
+
+  bool NonRI = false, SpillsVRS = false;
+  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+                          NonRI, SpillsVRS))
     FuncInfo->setSpillsCR();
-  }
+
+  if (SpillsVRS)
+    FuncInfo->setSpillsVRSAVE();
+
+  if (NonRI)
+    FuncInfo->setHasNonRISpills();
 
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
@@ -616,25 +577,17 @@ bool
 PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                    unsigned DestReg, int FrameIdx,
                                    const TargetRegisterClass *RC,
-                                   SmallVectorImpl<MachineInstr*> &NewMIs)const{
+                                   SmallVectorImpl<MachineInstr*> &NewMIs,
+                                   bool &NonRI, bool &SpillsVRS) const{
+  // Note: If additional load instructions are added here,
+  // update isLoadFromStackSlot.
+
   if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
-    if (DestReg != PPC::LR) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                                 DestReg), FrameIdx));
-    } else {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                                 PPC::R11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                               DestReg), FrameIdx));
   } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
-    if (DestReg != PPC::LR8) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
-                                         FrameIdx));
-    } else {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
-                                                 PPC::X11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+                                       FrameIdx));
   } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
                                        FrameIdx));
@@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
                                        FrameIdx));
   } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
-    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
-        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
-                                                 get(PPC::RESTORE_CR), DestReg)
-                                         , FrameIdx));
-      return true;
-    } else {
-      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
-      // it's possible for the stack frame to be so big the save location is
-      // out of range of immediate offsets, necessitating another register.
-      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
-      // at the moment.
-      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                                                            PPC::R2 : PPC::R0;
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                         ScratchReg), FrameIdx));
-  
-      // If the reloaded register isn't CR0, shift the bits right so that they are
-      // in the right CR's slot.
-      if (DestReg != PPC::CR0) {
-        unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
-        // rlwinm r11, r11, 32-ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
-                      .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
-                      .addImm(31));
-      }
-  
-      NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
-                         PPC::MTCRF8 : PPC::MTCRF), DestReg)
-                       .addReg(ScratchReg));
-    }
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                               get(PPC::RESTORE_CR), DestReg),
+                                       FrameIdx));
+    return true;
   } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
 
     unsigned Reg = 0;
@@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
       Reg = PPC::CR7;
 
     return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
-                                &PPC::CRRCRegClass, NewMIs);
+                                &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS);
 
   } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
-    // We don't have indexed addressing for vector loads.  Emit:
-    // R0 = ADDI FI#
-    // Dest = LVX 0, R0
-    //
-    // FIXME: We use R0 here, because it isn't available for RA.
-    bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
-    unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
-    unsigned GPR0  = Is64Bit ? PPC::X0    : PPC::R0;
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
-                                       FrameIdx, 0, 0));
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
-                     .addReg(GPR0));
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+    assert(TM.getSubtargetImpl()->isDarwin() &&
+           "VRSAVE only needs spill/restore on Darwin");
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                               get(PPC::RESTORE_VRSAVE),
+                                               DestReg),
+                                       FrameIdx));
+    SpillsVRS = true;
   } else {
     llvm_unreachable("Unknown regclass!");
   }
@@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   SmallVector<MachineInstr*, 4> NewMIs;
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
-  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
-    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setHasSpills();
+
+  bool NonRI = false, SpillsVRS = false;
+  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+                           NonRI, SpillsVRS))
     FuncInfo->setSpillsCR();
-  }
+
+  if (SpillsVRS)
+    FuncInfo->setSpillsVRSAVE();
+
+  if (NonRI)
+    FuncInfo->setHasNonRISpills();
+
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
 
@@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::GC_LABEL:
   case PPC::DBG_VALUE:
     return 0;
-  case PPC::BL8_NOP_ELF:
-  case PPC::BLA8_NOP_ELF:
+  case PPC::BL8_NOP:
+  case PPC::BLA8_NOP:
     return 8;
   default:
     return 4; // PowerPC instructions are all 4 bytes
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213ea435b..635e3480b06d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   bool StoreRegToStackSlot(MachineFunction &MF,
                            unsigned SrcReg, bool isKill, int FrameIdx,
                            const TargetRegisterClass *RC,
-                           SmallVectorImpl<MachineInstr*> &NewMIs) const;
+                           SmallVectorImpl<MachineInstr*> &NewMIs,
+                           bool &NonRI, bool &SpillsVRS) const;
   bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                             unsigned DestReg, int FrameIdx,
                             const TargetRegisterClass *RC,
-                            SmallVectorImpl<MachineInstr*> &NewMIs) const;
+                            SmallVectorImpl<MachineInstr*> &NewMIs,
+                            bool &NonRI, bool &SpillsVRS) const;
 public:
   explicit PPCInstrInfo(PPCTargetMachine &TM);
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 6ee045a2c7c9..ab907622beeb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
 def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
   SDTCisVT<0, f64>, SDTCisPtrTy<1>
 ]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                          SDTCisVT<1, i32> ]>;
@@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
 ]>;
 
 def SDT_PPClbrx : SDTypeProfile<1, 2, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 def SDT_PPCstbrx : SDTypeProfile<0, 3, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 
 def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -53,32 +57,36 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
   SDTCisPtrTy<0>, SDTCisVT<1, i32>
 ]>;
 
-def SDT_PPCnop : SDTypeProfile<0, 0, []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
 
-def PPCfcfid  : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfre    : SDNode<"PPCISD::FRE",     SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid  : SDNode<"PPCISD::FCFID",   SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU",  SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS",  SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
 def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
 def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
 def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
                        [SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+                       [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+                       [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs   : SDNode<"PPCISD::MFFS",
+                       SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
 
-// This sequence is used for long double->int conversions.  It changes the
-// bits in the FPSCR which is not modelled.  
-def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
-                        [SDNPOutGlue]>;
-def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
-                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
-                        SDTCisVT<3, f64>]>,
-                       [SDNPInGlue]>;
 
 def PPCfsel   : SDNode<"PPCISD::FSEL",  
    // Type constraint for fsel.
@@ -91,6 +99,20 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
 def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
 
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+                            [SDNPMayLoad]>;
+def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL   : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr   : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL   : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+                              [SDNPHasChain]>;
+def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
 def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 
 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -99,10 +121,6 @@ def PPCsrl        : SDNode<"PPCISD::SRL"       , SDTIntShiftOp>;
 def PPCsra        : SDNode<"PPCISD::SRA"       , SDTIntShiftOp>;
 def PPCshl        : SDNode<"PPCISD::SHL"       , SDTIntShiftOp>;
 
-def PPCextsw_32   : SDNode<"PPCISD::EXTSW_32"  , SDTIntUnaryOp>;
-def PPCstd_32     : SDNode<"PPCISD::STD_32"    , SDTStore,
-                           [SDNPHasChain, SDNPMayStore]>;
-
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
                            [SDNPHasChain, SDNPOutGlue]>;
@@ -110,16 +128,12 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
-                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                             SDNPVariadic]>;
-def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                            SDNPVariadic]>;
-def PPCcall_nop_SVR4  : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
-                               [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                                SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCcall  : SDNode<"PPCISD::CALL", SDT_PPCCall,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                       SDNPVariadic]>;
+def PPCcall_nop  : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                           SDNPVariadic]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
@@ -130,13 +144,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
                              SDNPInGlue, SDNPOutGlue]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
-                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                               SDNPVariadic]>;
-
-def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
-                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                             SDNPVariadic]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                       SDNPVariadic]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -144,6 +154,14 @@ def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
 def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
                         [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
+def PPCeh_sjlj_setjmp  : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+                                SDTypeProfile<1, 1, [SDTCisInt<0>,
+                                                     SDTCisPtrTy<1>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+                                SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+                                [SDNPHasChain, SDNPSideEffect]>;
+
 def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
 def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
 
@@ -167,6 +185,12 @@ def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
 def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
                           [SDNPHasChain, SDNPMayStore]>;
 
+// Instructions to support medium and large code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL     : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL   : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
 // Instructions to support dynamic alloca.
 def SDTDynOp  : SDTypeProfile<1, 2, []>;
 def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -258,6 +282,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
   return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
 }], HI16>;
 
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+                            (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+                          (ops node:$val, node:$base, node:$offset),
+                          (pre_store node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+                              (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Flag Definitions.
@@ -294,9 +350,6 @@ def s16imm  : Operand<i32> {
 def u16imm  : Operand<i32> {
   let PrintMethod = "printU16ImmOperand";
 }
-def s16immX4  : Operand<i32> {   // Multiply imm by 4 before printing.
-  let PrintMethod = "printS16X4ImmOperand";
-}
 def directbrtarget : Operand<OtherVT> {
   let PrintMethod = "printBranchOperand";
   let EncoderMethod = "getDirectBrEncoding";
@@ -324,26 +377,37 @@ def crbitm: Operand<i8> {
   let EncoderMethod = "get_crbitm_encoding";
 }
 // Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
+def dispRI : Operand<iPTR>;
+def dispRIX : Operand<iPTR>;
+
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
-  let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
   let EncoderMethod = "getMemRIEncoding";
 }
 def memrr : Operand<iPTR> {
   let PrintMethod = "printMemRegReg";
-  let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
 }
 def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
   let PrintMethod = "printMemRegImmShifted";
-  let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
   let EncoderMethod = "getMemRIXEncoding";
 }
 
-// PowerPC Predicate operand.  20 = (0<<5)|20 = always, CR0 is a dummy reg
-// that doesn't matter.
-def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
-                                     (ops (i32 20), (i32 zero_reg))> {
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+  let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
   let PrintMethod = "printPredicateOperand";
+  let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
 }
 
 // Define PowerPC specific addressing mode.
@@ -352,9 +416,12 @@ def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
 def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
 def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
 
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr   : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
 /// This is just the offset part of iaddr, used for preinc.
 def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
@@ -381,17 +448,22 @@ def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
 
 let Defs = [R1], Uses = [R1] in
 def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
-                       [(set GPRC:$result,
-                             (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+                       [(set i32:$result,
+                             (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
                          
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
 let usesCustomInserter = 1,    // Expanded after instruction selection.
     PPC970_Single = 1 in {
-  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+  // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+  // because either operand might become the first operand in an isel, and
+  // that operand cannot be r0.
+  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
+                              GPRC_NOR0:$T, GPRC_NOR0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I4",
                               []>;
-  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
+                              G8RC_NOX0:$T, G8RC_NOX0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I8",
                               []>;
   def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
@@ -418,10 +490,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
                      "#RESTORE_CR", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in
-    def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
-                          "b${p:cc}lr ${p:reg}", BrB, 
-                          [(retflag)]>;
+  let isReturn = 1, Uses = [LR, RM] in
+    def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
+                           [(retflag)]>;
   let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
     def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
 }
@@ -453,46 +524,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   }
 }
 
-// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
-  // Convenient aliases for call instructions
-  let Uses = [RM] in {
-    def BL_Darwin  : IForm<18, 0, 1,
-                           (outs), (ins calltarget:$func), 
-                           "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_Darwin : IForm<18, 1, 1, 
-                          (outs), (ins aaddr:$func),
-                          "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
-  }
-  let Uses = [CTR, RM] in {
-    def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                  (outs), (ins),
-                                  "bctrl", BrB,
-                                  [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+  let Defs = [LR], Uses = [RM] in {
+    def BCLalways  : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+                            "bcl 20, 31, $dst">;
   }
 }
 
-// SVR4 ABI Calls.
 let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL_SVR4  : IForm<18, 0, 1,
-                        (outs), (ins calltarget:$func), 
-                        "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_SVR4 : IForm<18, 1, 1,
-                        (outs), (ins aaddr:$func),
-                        "bla $func", BrB,
-                        [(PPCcall_SVR4 (i32 imm:$func))]>;
+    def BL  : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+                    "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+                    "bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
   }
   let Uses = [CTR, RM] in {
-    def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
-                                (outs), (ins),
-                                "bctrl", BrB,
-                                [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+    def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+                             "bctrl", BrB, [(PPCbctrl)]>,
+                Requires<[In32BitMode]>;
   }
 }
 
-
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
 def TCRETURNdi :Pseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
@@ -511,6 +565,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
                  []>;
 
 
+let isCodeGenOnly = 1 in {
+
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
     isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM]  in
 def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
@@ -524,6 +580,7 @@ def TAILB   : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
                   "b $dst", BrB,
                   []>;
 
+}
 
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
     isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
@@ -531,6 +588,22 @@ def TAILBA   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   "ba $dst", BrB,
                   []>;
 
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+  def EH_SjLj_SetJmp32  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+                            "#EH_SJLJ_SETJMP32",
+                            [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+                          Requires<[In32BitMode]>;
+  let isTerminator = 1 in
+  def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+                            "#EH_SJLJ_LONGJMP32",
+                            [(PPCeh_sjlj_longjmp addr:$buf)]>,
+                          Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+                        "#EH_SjLj_Setup\t$dst", []>;
+}
 
 // DCB* instructions.
 def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -566,93 +639,90 @@ let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
-      [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
-      [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
-      [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
-      [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
-      [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
-      [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
-      [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
-      [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
-      [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
-      [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
-      [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
-      [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
-      [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
-      [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
-      [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
-      [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
-      [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
-      [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+      [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
-      [(set GPRC:$dst, 
-                    (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
-      [(set GPRC:$dst, 
-                    (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
-      [(set GPRC:$dst, 
-                    (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
 
     def ATOMIC_SWAP_I8 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
-      [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I16 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
-      [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I32 : Pseudo<
       (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
-      [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+      [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
 def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
                    "lwarx $rD, $src", LdStLWARX,
-                   [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+                   [(set i32:$rD, (PPClarx xoaddr:$src))]>;
 
 let Defs = [CR0] in
 def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwcx. $rS, $dst", LdStSTWCX,
-                   [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+                   [(PPCstcx i32:$rS, xoaddr:$dst)]>,
                    isDOT;
 
 let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
@@ -666,94 +736,94 @@ def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
-                  [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+                  [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
 def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
-                  [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+                  [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
 def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
-                  [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+                  [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
 def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
-                  [(set GPRC:$rD, (load iaddr:$src))]>;
+                  [(set i32:$rD, (load iaddr:$src))]>;
 
 def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
                   "lfs $rD, $src", LdStLFD,
-                  [(set F4RC:$rD, (load iaddr:$src))]>;
+                  [(set f32:$rD, (load iaddr:$src))]>;
 def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
                   "lfd $rD, $src", LdStLFD,
-                  [(set F8RC:$rD, (load iaddr:$src))]>;
+                  [(set f64:$rD, (load iaddr:$src))]>;
 
 
 // Unindexed (r+i) Loads with Update (preinc).
 let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lbzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhau $rD, $addr", LdStLHAU,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lwzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfsu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfdu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 
 // Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhaux $rD, $addr", LdStLHAU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfsux $rD, $addr", LdStLFDU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfdux $rD, $addr", LdStLFDU,
-                   []>, RegConstraint<"$addr.offreg = $ea_result">,
+                   []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 }
 }
@@ -763,32 +833,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+                   [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
 def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
-                   [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+                   [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
 def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+                   [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
 def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (load xaddr:$src))]>;
+                   [(set i32:$rD, (load xaddr:$src))]>;
                    
                    
 def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
                    "lhbrx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+                   [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
 def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
                    "lwbrx $rD, $src", LdStLoad,
-                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+                   [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
 
 def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
                       "lfsx $frD, $src", LdStLFD,
-                      [(set F4RC:$frD, (load xaddr:$src))]>;
+                      [(set f32:$frD, (load xaddr:$src))]>;
 def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
                       "lfdx $frD, $src", LdStLFD,
-                      [(set F8RC:$frD, (load xaddr:$src))]>;
+                      [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfiwax $frD, $src", LdStLFD,
+                      [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfiwzx $frD, $src", LdStLFD,
+                      [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -799,137 +876,128 @@ def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
 let PPC970_Unit = 2 in {
 def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
-                   [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+                   [(truncstorei8 i32:$rS, iaddr:$src)]>;
 def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
-                   [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+                   [(truncstorei16 i32:$rS, iaddr:$src)]>;
 def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
-                   [(store GPRC:$rS, iaddr:$src)]>;
+                   [(store i32:$rS, iaddr:$src)]>;
 def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
                    "stfs $rS, $dst", LdStSTFD,
-                   [(store F4RC:$rS, iaddr:$dst)]>;
+                   [(store f32:$rS, iaddr:$dst)]>;
 def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
                    "stfd $rS, $dst", LdStSTFD,
-                   [(store F8RC:$rS, iaddr:$dst)]>;
+                   [(store f64:$rS, iaddr:$dst)]>;
 }
 
 // Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU  : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                          (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, 
-                                         iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU  : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res,
-                        (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, 
-                                        iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU  : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
-                    [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, 
-                                                     iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
-                    [(set ptr_rc:$ea_res, (pre_store F4RC:$rS,  ptr_rc:$ptrreg, 
-                                          iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
-                    [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, 
-                                          iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "stbu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "sthu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+                    "stwu $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+                    "stfsu $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+                    "stfdu $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 }
 
+// Patterns to match the pre-inc stores.  We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+          (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
 
 // Indexed (r+r) Stores.
-//
 let PPC970_Unit = 2 in {
 def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
-                   [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei8 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
-                   [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, 
+                   [(truncstorei16 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
-                   [(store GPRC:$rS, xaddr:$dst)]>,
-                   PPC970_DGroup_Cracked;
- 
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
-                             (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                   "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                   [(set ptr_rc:$ea_res,
-                      (pre_truncsti8 GPRC:$rS,
-                                     ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                   RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+                   [(store i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
  
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
-                             (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                   "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                   [(set ptr_rc:$ea_res,
-                      (pre_truncsti16 GPRC:$rS,
-                                      ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                   RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                   PPC970_DGroup_Cracked;
-                 
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
-                             (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                   "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
-                   [(set ptr_rc:$ea_res,
-                      (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                   RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                   PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
-                              (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
-                    [(set ptr_rc:$ea_res,
-                       (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                    PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
-                              (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
-                    "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
-                    [(set ptr_rc:$ea_res,
-                       (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
-                    RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
-                    PPC970_DGroup_Cracked;
-
 def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
                    "sthbrx $rS, $dst", LdStStore,
-                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, 
+                   [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
                    PPC970_DGroup_Cracked;
 def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
                    "stwbrx $rS, $dst", LdStStore,
-                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+                   [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
                    PPC970_DGroup_Cracked;
 
 def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
                      "stfiwx $frS, $dst", LdStSTFD,
-                     [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+                     [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
                      
 def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
                      "stfsx $frS, $dst", LdStSTFD,
-                     [(store F4RC:$frS, xaddr:$dst)]>;
+                     [(store f32:$frS, xaddr:$dst)]>;
 def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
                      "stfdx $frS, $dst", LdStSTFD,
-                     [(store F8RC:$frS, xaddr:$dst)]>;
+                     [(store f64:$frS, xaddr:$dst)]>;
+}
+
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+                    "stbux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+                    "sthux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+                    "stwux $rS, $dst", LdStStoreUpd, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+                    "stfsux $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+                    "stfdux $rS, $dst", LdStSTFDU, []>,
+                    RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    PPC970_DGroup_Cracked;
 }
 
+// Patterns to match the pre-inc stores.  We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (STFDUX $rS, $ptrreg, $ptroff)>;
+
 def SYNC : XForm_24_sync<31, 598, (outs), (ins),
                         "sync", LdStSync,
                         [(int_ppc_sync)]>;
@@ -939,68 +1007,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
-                     "addi $rD, $rA, $imm", IntSimple,
-                     [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL  : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
-                     [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+                     [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
 let Defs = [CARRY] in {
 def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
-                     [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+                     [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
                      PPC970_DGroup_Cracked;
 def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IntGeneral,
                      []>;
 }
-def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
-                     [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+                     [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
                      "la $rD, $sym($rA)", IntGeneral,
-                     [(set GPRC:$rD, (add GPRC:$rA,
+                     [(set i32:$rD, (add i32:$rA,
                                           (PPClo tglobaladdr:$sym, 0)))]>;
 def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "mulli $rD, $rA, $imm", IntMulLI,
-                     [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+                     [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
 let Defs = [CARRY] in {
 def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
-                     [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+                     [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
   def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
                        "li $rD, $imm", IntSimple,
-                       [(set GPRC:$rD, immSExt16:$imm)]>;
+                       [(set i32:$rD, immSExt16:$imm)]>;
   def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
                        "lis $rD, $imm", IntSimple,
-                       [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+                       [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
 }
 }
 
 let PPC970_Unit = 1 in {  // FXU Operations.
 def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "andi. $dst, $src1, $src2", IntGeneral,
-                    [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+                    [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
                     isDOT;
 def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "andis. $dst, $src1, $src2", IntGeneral,
-                    [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+                    [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
                     isDOT;
 def ORI   : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "ori $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+                    [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
 def ORIS  : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "oris $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+                    [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
 def XORI  : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "xori $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+                    [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
 def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
                     "xoris $dst, $src1, $src2", IntSimple,
-                    [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+                    [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
 def NOP   : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
                          []>;
 def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
@@ -1013,38 +1079,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
 let PPC970_Unit = 1 in {  // FXU Operations.
 def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "nand $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+                   [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
 def AND  : XForm_6<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "and $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+                   [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
 def ANDC : XForm_6<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "andc $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+                   [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
 def OR   : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "or $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+                   [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
 def NOR  : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "nor $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+                   [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
 def ORC  : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "orc $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+                   [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
 def EQV  : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "eqv $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+                   [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
 def XOR  : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "xor $rA, $rS, $rB", IntSimple,
-                   [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+                   [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
 def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "slw $rA, $rS, $rB", IntGeneral,
-                   [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+                   [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
 def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "srw $rA, $rS, $rB", IntGeneral,
-                   [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+                   [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
 let Defs = [CARRY] in {
 def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
                    "sraw $rA, $rS, $rB", IntShift,
-                   [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+                   [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
 }
 }
 
@@ -1052,17 +1118,17 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let Defs = [CARRY] in {
 def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
                      "srawi $rA, $rS, $SH", IntShift,
-                     [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+                     [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
 }
 def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
                       "cntlzw $rA, $rS", IntGeneral,
-                      [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+                      [(set i32:$rA, (ctlz i32:$rS))]>;
 def EXTSB  : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
                       "extsb $rA, $rS", IntSimple,
-                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+                      [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
 def EXTSH  : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
                       "extsh $rA, $rS", IntSimple,
-                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+                      [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
 
 def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
                           "cmpw $crD, $rA, $rB", IntCompare>;
@@ -1080,16 +1146,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
 let Uses = [RM] in {
   def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
                         "fctiwz $frD, $frB", FPGeneral,
-                        [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+                        [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
   def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
                         "frsp $frD, $frB", FPGeneral,
-                        [(set F4RC:$frD, (fround F8RC:$frB))]>;
+                        [(set f32:$frD, (fround f64:$frB))]>;
+
+  // The frin -> nearbyint mapping is valid only in fast-math mode.
+  def FRIND  : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frin $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fnearbyint f64:$frB))]>;
+  def FRINS  : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frin $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fnearbyint f32:$frB))]>;
+
+  // These pseudos expand to rint but also set FE_INEXACT when the result does
+  // not equal the argument.
+  let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
+    def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+                            "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
+    def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+                            "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+  }
+
+  def FRIPD  : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frip $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fceil f64:$frB))]>;
+  def FRIPS  : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frip $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fceil f32:$frB))]>;
+  def FRIZD  : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "friz $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (ftrunc f64:$frB))]>;
+  def FRIZS  : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "friz $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (ftrunc f32:$frB))]>;
+  def FRIMD  : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frim $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (ffloor f64:$frB))]>;
+  def FRIMS  : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frim $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (ffloor f32:$frB))]>;
+
   def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
                         "fsqrt $frD, $frB", FPSqrt,
-                        [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+                        [(set f64:$frD, (fsqrt f64:$frB))]>;
   def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
                         "fsqrts $frD, $frB", FPSqrt,
-                        [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+                        [(set f32:$frD, (fsqrt f32:$frB))]>;
   }
 }
 
@@ -1099,31 +1203,44 @@ let Uses = [RM] in {
 /// sneak into a d-group with a store).
 def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
                      "fmr $frD, $frB", FPGeneral,
-                     []>,  // (set F4RC:$frD, F4RC:$frB)
+                     []>,  // (set f32:$frD, f32:$frB)
                      PPC970_Unit_Pseudo;
 
 let PPC970_Unit = 3 in {  // FPU Operations.
 // These are artificially split into two different forms, for 4/8 byte FP.
 def FABSS  : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
                       "fabs $frD, $frB", FPGeneral,
-                      [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+                      [(set f32:$frD, (fabs f32:$frB))]>;
 def FABSD  : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
                       "fabs $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+                      [(set f64:$frD, (fabs f64:$frB))]>;
 def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
                       "fnabs $frD, $frB", FPGeneral,
-                      [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+                      [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
 def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
                       "fnabs $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+                      [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
 def FNEGS  : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
                       "fneg $frD, $frB", FPGeneral,
-                      [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+                      [(set f32:$frD, (fneg f32:$frB))]>;
 def FNEGD  : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
                       "fneg $frD, $frB", FPGeneral,
-                      [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+                      [(set f64:$frD, (fneg f64:$frB))]>;
+
+// Reciprocal estimates.
+def FRE      : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fre $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfre f64:$frB))]>;
+def FRES     : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "fres $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfre f32:$frB))]>;
+def FRSQRTE  : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frsqrte $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frsqrtes $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
 }
-                      
 
 // XL-Form instructions.  condition register logical ops.
 //
@@ -1141,6 +1258,7 @@ def CROR  : XLForm_1<19, 449, (outs CRBITRC:$CRD),
                       "cror $CRD, $CRA, $CRB", BrCR,
                       []>;
 
+let isCodeGenOnly = 1 in {
 def CRSET  : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
               "creqv $dst, $dst, $dst", BrCR,
               []>;
@@ -1158,6 +1276,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
               "crxor 6, 6, 6", BrCR,
               [(PPCcr6unset)]>;
 }
+}
 
 // XFX-Form instructions.  Instructions that deal with SPRs.
 //
@@ -1166,7 +1285,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
                           "mfctr $rT", SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
-let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
 def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
                           "mtctr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1193,6 +1312,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
                              "mfspr $rT, 256", IntGeneral>,
                PPC970_DGroup_First, PPC970_Unit_FXU;
 
+let isCodeGenOnly = 1 in {
+  def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+                                (outs VRSAVERC:$reg), (ins GPRC:$rS),
+                                "mtspr 256, $rS", IntGeneral>,
+                  PPC970_DGroup_Single, PPC970_Unit_FXU;
+  def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+                                (ins VRSAVERC:$reg),
+                                "mfspr $rT, 256", IntGeneral>,
+                  PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+                     "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+                     "#RESTORE_VRSAVE", []>;
+
 def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1207,6 +1349,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
 // instruction to keep the register allocator from becoming confused.
 //
 // FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+let isCodeGenOnly = 1 in
 def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "#MFCRpseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1219,38 +1362,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                        "mfocrf $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
 
-// Instructions to manipulate FPSCR.  Only long double handling uses these.
-// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+  def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
 
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR.  Note that FPSCR is not modeled at the DAG level.
 let Uses = [RM], Defs = [RM] in { 
   def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
-                         "mtfsb0 $FM", IntMTFSB0,
-                        [(PPCmtfsb0 (i32 imm:$FM))]>,
+                        "mtfsb0 $FM", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
   def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
-                         "mtfsb1 $FM", IntMTFSB0,
-                        [(PPCmtfsb1 (i32 imm:$FM))]>,
+                        "mtfsb1 $FM", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
-  // MTFSF does not actually produce an FP result.  We pretend it copies
-  // input reg B to the output.  If we didn't do this it would look like the
-  // instruction had no outputs (because we aren't modelling the FPSCR) and
-  // it would be deleted.
-  def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
-                                (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
-                         "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
-                         [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), 
-                                                     F8RC:$rT, F8RC:$FRB))]>,
+  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+                       "mtfsf $FM, $rT", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 let Uses = [RM] in {
   def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
                          "mffs $rT", IntMFFS,
-                         [(set F8RC:$rT, (PPCmffs))]>,
-               PPC970_DGroup_Single, PPC970_Unit_FPU;
-  def FADDrtz: AForm_2<63, 21,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+                         [(set f64:$rT, (PPCmffs))]>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 
@@ -1261,61 +1395,61 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 //
 def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "add $rT, $rA, $rB", IntSimple,
-                     [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+                     [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
 let Defs = [CARRY] in {
 def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "addc $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+                     [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
                      PPC970_DGroup_Cracked;
 }
 def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "divw $rT, $rA, $rB", IntDivW,
-                     [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+                     [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
                      PPC970_DGroup_First, PPC970_DGroup_Cracked;
 def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "divwu $rT, $rA, $rB", IntDivW,
-                     [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+                     [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
                      PPC970_DGroup_First, PPC970_DGroup_Cracked;
 def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "mulhw $rT, $rA, $rB", IntMulHW,
-                     [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+                     [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
 def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "mulhwu $rT, $rA, $rB", IntMulHWU,
-                     [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+                     [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
 def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "mullw $rT, $rA, $rB", IntMulHW,
-                     [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+                     [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
 def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "subf $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+                     [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
 let Defs = [CARRY] in {
 def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                      "subfc $rT, $rA, $rB", IntGeneral,
-                     [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+                     [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
                      PPC970_DGroup_Cracked;
 }
 def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "neg $rT, $rA", IntSimple,
-                      [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+                      [(set i32:$rT, (ineg i32:$rA))]>;
 let Uses = [CARRY], Defs = [CARRY] in {
 def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                       "adde $rT, $rA, $rB", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+                      [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
 def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "addme $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+                      [(set i32:$rT, (adde i32:$rA, -1))]>;
 def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "addze $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+                      [(set i32:$rT, (adde i32:$rA, 0))]>;
 def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                       "subfe $rT, $rA, $rB", IntGeneral,
-                      [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+                      [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
 def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "subfme $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+                      [(set i32:$rT, (sube -1, i32:$rA))]>;
 def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "subfze $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+                      [(set i32:$rT, (sube 0, i32:$rA))]>;
 }
 }
 
@@ -1327,43 +1461,41 @@ let Uses = [RM] in {
   def FMADD : AForm_1<63, 29, 
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                       "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT,
-                            (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
+                      [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
   def FMADDS : AForm_1<59, 29,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                       "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT,
-                            (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
+                      [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
   def FMSUB : AForm_1<63, 28,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                       "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT,
-                            (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
+                      [(set f64:$FRT,
+                            (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
   def FMSUBS : AForm_1<59, 28,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                       "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT,
-                            (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
+                      [(set f32:$FRT,
+                            (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
   def FNMADD : AForm_1<63, 31,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                       "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT,
-                            (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
+                      [(set f64:$FRT,
+                            (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
   def FNMADDS : AForm_1<59, 31,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                       "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT,
-                            (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
+                      [(set f32:$FRT,
+                            (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
   def FNMSUB : AForm_1<63, 30,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                       "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
-                                                  (fneg F8RC:$FRB))))]>;
+                      [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+                                                 (fneg f64:$FRB))))]>;
   def FNMSUBS : AForm_1<59, 30,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                       "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
-                                                  (fneg F4RC:$FRB))))]>;
+                      [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+                                                 (fneg f32:$FRB))))]>;
 }
 // FSEL is artificially split into 4 and 8-byte forms for the result.  To avoid
 // having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1372,50 +1504,50 @@ let Uses = [RM] in {
 def FSELD : AForm_1<63, 23,
                     (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                     "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+                    [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
 def FSELS : AForm_1<63, 23,
                      (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                      "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+                    [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
 let Uses = [RM] in {
   def FADD  : AForm_2<63, 21,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
                       "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+                      [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
   def FADDS : AForm_2<59, 21,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
                       "fadds $FRT, $FRA, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+                      [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
   def FDIV  : AForm_2<63, 18,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
                       "fdiv $FRT, $FRA, $FRB", FPDivD,
-                      [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+                      [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
   def FDIVS : AForm_2<59, 18,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
                       "fdivs $FRT, $FRA, $FRB", FPDivS,
-                      [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+                      [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
   def FMUL  : AForm_3<63, 25,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
                       "fmul $FRT, $FRA, $FRC", FPFused,
-                      [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
+                      [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
   def FMULS : AForm_3<59, 25,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
                       "fmuls $FRT, $FRA, $FRC", FPGeneral,
-                      [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
+                      [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
   def FSUB  : AForm_2<63, 20,
                       (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
                       "fsub $FRT, $FRA, $FRB", FPAddSub,
-                      [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+                      [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
   def FSUBS : AForm_2<59, 20,
                       (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
                       "fsubs $FRT, $FRA, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+                      [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
   }
 }
 
 let PPC970_Unit = 1 in {  // FXU Operations.
   def ISEL  : AForm_4<31, 15,
-                     (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+                     (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
 }
@@ -1455,47 +1587,43 @@ def : Pat<(i32 imm:$imm),
           (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
 
 // Implement the 'not' operation with the NOR instruction.
-def NOT : Pat<(not GPRC:$in),
-              (NOR GPRC:$in, GPRC:$in)>;
+def NOT : Pat<(not i32:$in),
+              (NOR $in, $in)>;
 
 // ADD an arbitrary immediate.
-def : Pat<(add GPRC:$in, imm:$imm),
-          (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+def : Pat<(add i32:$in, imm:$imm),
+          (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
 // OR an arbitrary immediate.
-def : Pat<(or GPRC:$in, imm:$imm),
-          (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(or i32:$in, imm:$imm),
+          (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
 // XOR an arbitrary immediate.
-def : Pat<(xor GPRC:$in, imm:$imm),
-          (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+def : Pat<(xor i32:$in, imm:$imm),
+          (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
 // SUBFIC
-def : Pat<(sub  immSExt16:$imm, GPRC:$in),
-          (SUBFIC GPRC:$in, imm:$imm)>;
+def : Pat<(sub immSExt16:$imm, i32:$in),
+          (SUBFIC $in, imm:$imm)>;
 
 // SHL/SRL
-def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
-          (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
-def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
-          (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+          (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+          (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
 
 // ROTL
-def : Pat<(rotl GPRC:$in, GPRC:$sh),
-          (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
-def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
-          (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+def : Pat<(rotl i32:$in, i32:$sh),
+          (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+          (RLWINM $in, imm:$imm, 0, 31)>;
 
 // RLWNM
-def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
-          (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+          (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
 
 // Calls
-def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
-          (BL_Darwin tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
-          (BL_Darwin texternalsym:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
-          (BL_SVR4 tglobaladdr:$dst)>;
-def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
-          (BL_SVR4 texternalsym:$dst)>;
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+          (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+          (BL texternalsym:$dst)>;
 
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
@@ -1518,28 +1646,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
 def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
 def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
 def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
-def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in),
-          (ADDIS GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in),
-          (ADDIL GPRC:$in, tglobaltlsaddr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
-          (ADDIS GPRC:$in, tglobaladdr:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
-          (ADDIS GPRC:$in, tconstpool:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
-          (ADDIS GPRC:$in, tjumptable:$g)>;
-def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
-          (ADDIS GPRC:$in, tblockaddress:$g)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+          (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+          (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS $in, tblockaddress:$g)>;
 
 // Standard shifts.  These are represented separately from the real shifts above
 // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
 // amounts.
-def : Pat<(sra GPRC:$rS, GPRC:$rB),
-          (SRAW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(srl GPRC:$rS, GPRC:$rB),
-          (SRW GPRC:$rS, GPRC:$rB)>;
-def : Pat<(shl GPRC:$rS, GPRC:$rB),
-          (SLW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(sra i32:$rS, i32:$rB),
+          (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+          (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+          (SLW $rS, $rB)>;
 
 def : Pat<(zextloadi1 iaddr:$src),
           (LBZ iaddr:$src)>;
@@ -1562,8 +1690,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
 def : Pat<(f64 (extloadf32 xaddr:$src)),
           (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
 
-def : Pat<(f64 (fextend F4RC:$src)),
-          (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+def : Pat<(f64 (fextend f32:$src)),
+          (COPY_TO_REGCLASS $src, F8RC)>;
 
 // Memory barriers
 def : Pat<(membarrier (i32 imm /*ll*/),
@@ -1575,5 +1703,15 @@ def : Pat<(membarrier (i32 imm /*ll*/),
 
 def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
 
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+          (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+          (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+          (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+          (FNMSUBS $A, $C, $B)>;
+
 include "PPCInstrAltivec.td"
 include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index aba27399d6da..cfcd7490ed0d 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -15,10 +15,10 @@
 #include "PPCJITInfo.h"
 #include "PPCRelocations.h"
 #include "PPCTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Support/Memory.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -115,7 +115,7 @@ asm(
     "lwz  r2, 208(r1)\n" // stub's frame
     "lwz  r4, 8(r2)\n" // stub's lr
     "li   r5, 0\n"       // 0 == 32 bit
-    "bl _PPCCompilationCallbackC\n"
+    "bl _LLVMPPCCompilationCallback\n"
     "mtctr r3\n"
     // Restore all int arg registers
     "lwz r10, 204(r1)\n"    "lwz r9,  200(r1)\n"
@@ -178,7 +178,7 @@ asm(
     "lwz  5, 104(1)\n" // stub's frame
     "lwz  4, 4(5)\n" // stub's lr
     "li   5, 0\n"       // 0 == 32 bit
-    "bl PPCCompilationCallbackC\n"
+    "bl LLVMPPCCompilationCallback\n"
     "mtctr 3\n"
     // Restore all int arg registers
     "lwz 10, 100(1)\n"   "lwz 9,  96(1)\n"
@@ -259,10 +259,10 @@ asm(
     "ld   4, 16(5)\n"  // stub's lr
     "li   5, 1\n"      // 1 == 64 bit
 #ifdef __ELF__
-    "bl PPCCompilationCallbackC\n"
+    "bl LLVMPPCCompilationCallback\n"
     "nop\n"
 #else
-    "bl _PPCCompilationCallbackC\n"
+    "bl _LLVMPPCCompilationCallback\n"
 #endif
     "mtctr 3\n"
     // Restore all int arg registers
@@ -292,9 +292,10 @@ void PPC64CompilationCallback() {
 #endif
 
 extern "C" {
-static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
-                                                         unsigned *OrigCallAddrPlus4,
-                                                         bool is64Bit) {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+                           unsigned *OrigCallAddrPlus4,
+                           bool is64Bit) {
   // Adjust the pointer to the address of the call instruction in the stub
   // emitted by emitFunctionStub, rather than the instruction after it.
   unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2f8243a597e6..46d4a08eb687 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -14,8 +14,8 @@
 #ifndef POWERPC_JITINFO_H
 #define POWERPC_JITINFO_H
 
-#include "llvm/Target/TargetJITInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
 
 namespace llvm {
   class PPCTargetMachine;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 19ec993ba00f..9b0df3e86a75 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,14 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
@@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
                                break;
     case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
                                break;
+    case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+                                break;
+    case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+                               break;
+    case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+                             break;
    }
 
   // FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 24caffa3f0f2..ee18eadf6e5f 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// PEI.
   bool MustSaveLR;
 
+  /// Does this function have any stack spills.
+  bool HasSpills;
+
+  /// Does this function spill using instructions with only r+r (not r+i)
+  /// forms.
+  bool HasNonRISpills;
+
   /// SpillsCR - Indicates whether CR is spilled in the current function.
   bool SpillsCR;
 
+  /// Indicates whether VRSAVE is spilled in the current function.
+  bool SpillsVRSAVE;
+
   /// LRStoreRequired - The bool indicates whether there is some explicit use of
   /// the LR/LR8 stack slot that is not obvious from scanning the code.  This
   /// requires that the code generator produce a store of LR to the stack on
@@ -71,11 +81,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// register for parameter passing.
   unsigned VarArgsNumFPR;
 
+  /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+  int CRSpillFrameIndex;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
       ReturnAddrSaveIndex(0),
+      HasSpills(false),
+      HasNonRISpills(false),
       SpillsCR(false),
+      SpillsVRSAVE(false),
       LRStoreRequired(false),
       MinReservedArea(0),
       TailCallSPDelta(0),
@@ -83,7 +99,8 @@ public:
       VarArgsFrameIndex(0),
       VarArgsStackOffset(0),
       VarArgsNumGPR(0),
-      VarArgsNumFPR(0) {}
+      VarArgsNumFPR(0),
+      CRSpillFrameIndex(0) {}
 
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -105,9 +122,18 @@ public:
   void setMustSaveLR(bool U) { MustSaveLR = U; }
   bool mustSaveLR() const    { return MustSaveLR; }
 
+  void setHasSpills()      { HasSpills = true; }
+  bool hasSpills() const   { return HasSpills; }
+
+  void setHasNonRISpills()    { HasNonRISpills = true; }
+  bool hasNonRISpills() const { return HasNonRISpills; }
+
   void setSpillsCR()       { SpillsCR = true; }
   bool isCRSpilled() const { return SpillsCR; }
 
+  void setSpillsVRSAVE()       { SpillsVRSAVE = true; }
+  bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
   void setLRStoreRequired() { LRStoreRequired = true; }
   bool isLRStoreRequired() const { return LRStoreRequired; }
 
@@ -125,6 +151,9 @@ public:
 
   unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
   void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+  int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+  void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3f6..1d61a3a8eac2 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -15,63 +15,45 @@
 #define DEBUG_TYPE "reginfo"
 #include "PPCRegisterInfo.h"
 #include "PPC.h"
+#include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
-#include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <cstdlib>
 
 #define GET_REGINFO_TARGET_DESC
 #include "PPCGenRegisterInfo.inc"
 
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
-                                   cl::init(false),
-                                   cl::desc("Disable PPC32 register scavenger"),
-                                   cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
-                                   cl::init(false),
-                                   cl::desc("Disable PPC64 register scavenger"),
-                                   cl::Hidden);
-}
-
 using namespace llvm;
 
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
-  return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
-          (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
 PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
                                  const TargetInstrInfo &tii)
   : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
                        ST.isPPC64() ? 0 : 1,
                        ST.isPPC64() ? 0 : 1),
-    Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
+    Subtarget(ST), TII(tii) {
   ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
   ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
   ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
@@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
   ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
   ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
   ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
-  ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+  ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
 }
 
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
-  return requiresRegisterScavenging(MF);
-}
-
-
 /// getPointerRegClass - Return the register class to use to hold pointers.
 /// This is used for addressing modes.
 const TargetRegisterClass *
 PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
                                                                        const {
+  if (Kind == 1) {
+    if (Subtarget.isPPC64())
+      return &PPC::G8RC_NOX0RegClass;
+    return &PPC::GPRC_NOR0RegClass;
+  }
+
   if (Subtarget.isPPC64())
     return &PPC::G8RCRegClass;
   return &PPC::GPRCRegClass;
@@ -111,11 +93,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
                                  CSR_Darwin32_SaveList;
 
-  // For 32-bit SVR4, also initialize the frame index associated with
-  // the CR spill slot.
-  if (!Subtarget.isPPC64())
-    CRSpillFrameIdx = 0;
-
   return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
 }
 
@@ -128,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
 }
 
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+  // The naming here is inverted: The CSR_NoRegs_Altivec has the
+  // Altivec registers masked so that they're not saved and restored around
+  // instructions with this preserved mask.
+
+  if (!Subtarget.hasAltivec())
+    return CSR_NoRegs_Altivec_RegMask;
+
+  if (Subtarget.isDarwin())
+    return CSR_NoRegs_Darwin_RegMask;
+  return CSR_NoRegs_RegMask;
+}
+
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const PPCFrameLowering *PPCFI =
     static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
 
-  Reserved.set(PPC::R0);
+  // The ZERO register is not really a register, but the representation of r0
+  // when used in instructions that treat r0 as the constant 0.
+  Reserved.set(PPC::ZERO);
+  Reserved.set(PPC::ZERO8);
+
+  // The FP register is also not really a register, but is the representation
+  // of the frame pointer register used by ISD::FRAMEADDR.
+  Reserved.set(PPC::FP);
+  Reserved.set(PPC::FP8);
+
   Reserved.set(PPC::R1);
   Reserved.set(PPC::LR);
   Reserved.set(PPC::LR8);
@@ -144,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R2);  // System-reserved register
     Reserved.set(PPC::R13); // Small Data Area pointer register
   }
-  // Reserve R2 on Darwin to hack around the problem of save/restore of CR
-  // when the stack frame is too big to address directly; we need two regs.
-  // This is a hack.
-  if (Subtarget.isDarwinABI()) {
-    Reserved.set(PPC::R2);
-  }
   
   // On PPC64, r13 is the thread pointer. Never allocate this register.
-  // Note that this is over conservative, as it also prevents allocation of R31
-  // when the FP is not needed.
   if (Subtarget.isPPC64()) {
     Reserved.set(PPC::R13);
-    Reserved.set(PPC::R31);
 
-    Reserved.set(PPC::X0);
     Reserved.set(PPC::X1);
     Reserved.set(PPC::X13);
-    Reserved.set(PPC::X31);
+
+    if (PPCFI->needsFP(MF))
+      Reserved.set(PPC::X31);
 
     // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
     if (Subtarget.isSVR4ABI()) {
       Reserved.set(PPC::X2);
     }
-    // Reserve X2 on Darwin to hack around the problem of save/restore of CR
-    // when the stack frame is too big to address directly; we need two regs.
-    // This is a hack.
-    if (Subtarget.isDarwinABI()) {
-      Reserved.set(PPC::X2);
-    }
   }
 
   if (PPCFI->needsFP(MF))
@@ -190,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   switch (RC->getID()) {
   default:
     return 0;
+  case PPC::G8RC_NOX0RegClassID:
+  case PPC::GPRC_NOR0RegClassID: 
   case PPC::G8RCRegClassID:
   case PPC::GPRCRegClassID: {
     unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -204,77 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
-  switch (RC->getID()) {
-  case PPC::G8RCRegClassID:
-  case PPC::GPRCRegClassID:
-  case PPC::F8RCRegClassID:
-  case PPC::F4RCRegClassID:
-  case PPC::VRRCRegClassID:
-    return true;
-  default:
-    return false;
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-void PPCRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
-      I->getOpcode() == PPC::ADJCALLSTACKUP) {
-    // Add (actually subtract) back the amount the callee popped on return.
-    if (int CalleeAmt =  I->getOperand(1).getImm()) {
-      bool is64Bit = Subtarget.isPPC64();
-      CalleeAmt *= -1;
-      unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
-      unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
-      unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
-      unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
-      unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
-      unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
-      MachineInstr *MI = I;
-      DebugLoc dl = MI->getDebugLoc();
-
-      if (isInt<16>(CalleeAmt)) {
-        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
-          .addReg(StackReg, RegState::Kill)
-          .addImm(CalleeAmt);
-      } else {
-        MachineBasicBlock::iterator MBBI = I;
-        BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
-          .addImm(CalleeAmt >> 16);
-        BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
-          .addReg(TmpReg, RegState::Kill)
-          .addImm(CalleeAmt & 0xFFFF);
-        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
-          .addReg(StackReg, RegState::Kill)
-          .addReg(TmpReg);
-      }
-    }
-  }
-  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
-  MBB.erase(I);
-}
-
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
-                             const TargetRegisterClass *RC, int SPAdj) {
-  assert(RS && "Register scavenging must be on");
-  unsigned Reg = RS->FindUnusedReg(RC);
-  // FIXME: move ARM callee-saved reg scan to target independent code, then 
-  // search for already spilled CS register here.
-  if (Reg == 0)
-    Reg = RS->scavengeRegister(RC, II, SPAdj);
-  return Reg;
-}
-
 /// lowerDynamicAlloc - Generate the code for allocating an object in the
 /// current frame.  The sequence of code with be in the general form
 ///
@@ -282,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
 ///   stwxu  R0, SP, Rnegsize   ; add and update the SP with the negated size
 ///   addi   Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
 ///
-void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
-                                        int SPAdj, RegScavenger *RS) const {
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   // Get the instruction.
   MachineInstr &MI = *II;
   // Get the instruction's basic block.
@@ -315,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
   // Fortunately, a frame greater than 32K is rare.
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-  const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
-  // FIXME (64-bit): Use "findScratchRegister"
-  unsigned Reg;
-  if (requiresRegisterScavenging(MF))
-    Reg = findScratchRegister(II, RS, RC, SPAdj);
-  else
-    Reg = PPC::R0;
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   
   if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
     BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
       .addReg(PPC::R31)
       .addImm(FrameSize);
   } else if (LP64) {
-    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
-      BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
-        .addImm(0)
-        .addReg(PPC::X1);
-    else
-      BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
-        .addImm(0)
-        .addReg(PPC::X1);
+    BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+      .addImm(0)
+      .addReg(PPC::X1);
   } else {
     BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
       .addImm(0)
@@ -346,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
   // Grow the stack and update the stack pointer link, then determine the
   // address of new allocated space.
   if (LP64) {
-    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
-      BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
-        .addReg(Reg, RegState::Kill)
-        .addReg(PPC::X1)
-        .addReg(MI.getOperand(1).getReg());
-    else
-      BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
-        .addReg(PPC::X0, RegState::Kill)
-        .addReg(PPC::X1)
-        .addReg(MI.getOperand(1).getReg());
-
+    BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+      .addReg(Reg, RegState::Kill)
+      .addReg(PPC::X1)
+      .addReg(MI.getOperand(1).getReg());
     if (!MI.getOperand(1).isKill())
       BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
         .addReg(PPC::X1)
@@ -398,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
 ///   stw rA, FI               ; Store rA to the frame.
 ///
 void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
-                                      unsigned FrameIndex, int SPAdj,
-                                      RegScavenger *RS) const {
+                                      unsigned FrameIndex) const {
   // Get the instruction.
   MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
   DebugLoc dl = MI.getDebugLoc();
 
-  // FIXME: Once LLVM supports creating virtual registers here, or the register
-  // scavenger can return multiple registers, stop using reserved registers
-  // here.
-  (void) SPAdj;
-  (void) RS;
-
   bool LP64 = Subtarget.isPPC64();
-  unsigned Reg = Subtarget.isDarwinABI() ?  (LP64 ? PPC::X2 : PPC::R2) :
-                                            (LP64 ? PPC::X0 : PPC::R0);
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned SrcReg = MI.getOperand(0).getReg();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -424,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
     
   // If the saved register wasn't CR0, shift the bits left so that they are in
   // CR0's slot.
-  if (SrcReg != PPC::CR0)
+  if (SrcReg != PPC::CR0) {
+    unsigned Reg1 = Reg;
+    Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
     // rlwinm rA, rA, ShiftBits, 0, 31.
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
-      .addReg(Reg, RegState::Kill)
-      .addImm(getPPCRegisterNumbering(SrcReg) * 4)
+      .addReg(Reg1, RegState::Kill)
+      .addImm(getEncodingValue(SrcReg) * 4)
       .addImm(0)
       .addImm(31);
+  }
 
   addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
-                    .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+                    .addReg(Reg, RegState::Kill),
                     FrameIndex);
 
   // Discard the pseudo instruction.
@@ -441,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
 }
 
 void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
-                                      unsigned FrameIndex, int SPAdj,
-                                      RegScavenger *RS) const {
+                                      unsigned FrameIndex) const {
   // Get the instruction.
   MachineInstr &MI = *II;       // ; <DestReg> = RESTORE_CR <offset>
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
   DebugLoc dl = MI.getDebugLoc();
 
-  // FIXME: Once LLVM supports creating virtual registers here, or the register
-  // scavenger can return multiple registers, stop using reserved registers
-  // here.
-  (void) SPAdj;
-  (void) RS;
-
   bool LP64 = Subtarget.isPPC64();
-  unsigned Reg = Subtarget.isDarwinABI() ?  (LP64 ? PPC::X2 : PPC::R2) :
-                                            (LP64 ? PPC::X0 : PPC::R0);
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
   unsigned DestReg = MI.getOperand(0).getReg();
   assert(MI.definesRegister(DestReg) &&
     "RESTORE_CR does not define its destination");
@@ -468,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
   // If the reloaded register isn't CR0, shift the bits right so that they are
   // in the right CR's slot.
   if (DestReg != PPC::CR0) {
-    unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+    unsigned Reg1 = Reg;
+    Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+    unsigned ShiftBits = getEncodingValue(DestReg)*4;
     // rlwinm r11, r11, 32-ShiftBits, 0, 31.
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
-             .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+             .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
              .addImm(31);
   }
 
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
-             .addReg(Reg);
+             .addReg(Reg, RegState::Kill);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+                                          unsigned FrameIndex) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; SPILL_VRSAVE <SrcReg>, <offset>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+  unsigned SrcReg = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+    
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+                    .addReg(Reg, RegState::Kill),
+                    FrameIndex);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+                                         unsigned FrameIndex) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; <DestReg> = RESTORE_VRSAVE <offset>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(MI.definesRegister(DestReg) &&
+    "RESTORE_VRSAVE does not define its destination");
+
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+                              Reg), FrameIndex);
+
+  BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+             .addReg(Reg, RegState::Kill);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -489,18 +438,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
   // ABI, return true to prevent allocating an additional frame slot.
   // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
-  // is arbitrary and will be subsequently ignored.  For 32-bit, we must
-  // create exactly one stack slot and return its FrameIdx for all
-  // nonvolatiles.
+  // is arbitrary and will be subsequently ignored.  For 32-bit, we have
+  // previously created the stack slot if needed, so return its FrameIdx.
   if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
-    if (Subtarget.isPPC64()) {
+    if (Subtarget.isPPC64())
       FrameIdx = 0;
-    } else if (CRSpillFrameIdx) {
-      FrameIdx = CRSpillFrameIdx;
-    } else {
-      MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
-      FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
-      CRSpillFrameIdx = FrameIdx;
+    else {
+      const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+      FrameIdx = FI->getCRSpillFrameIndex();
     }
     return true;
   }
@@ -509,7 +454,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
 
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                     int SPAdj, RegScavenger *RS) const {
+                                     int SPAdj, unsigned FIOperandNum,
+                                     RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   // Get the instruction.
@@ -523,20 +469,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
 
-  // Find out which operand is the frame index.
-  unsigned FIOperandNo = 0;
-  while (!MI.getOperand(FIOperandNo).isFI()) {
-    ++FIOperandNo;
-    assert(FIOperandNo != MI.getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-  }
   // Take into account whether it's an add or mem instruction
-  unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
   if (MI.isInlineAsm())
-    OffsetOperandNo = FIOperandNo-1;
+    OffsetOperandNo = FIOperandNum-1;
 
   // Get the frame index.
-  int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   // Get the frame pointer save index.  Users of this index are primarily
   // DYNALLOC instructions.
@@ -548,25 +487,29 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Special case for dynamic alloca.
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
-    lowerDynamicAlloc(II, SPAdj, RS);
+    lowerDynamicAlloc(II);
     return;
   }
 
-  // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
-  if (requiresRegisterScavenging(MF)) {
-    if (OpC == PPC::SPILL_CR) {
-      lowerCRSpilling(II, FrameIndex, SPAdj, RS);
-      return;
-    } else if (OpC == PPC::RESTORE_CR) {
-      lowerCRRestore(II, FrameIndex, SPAdj, RS);
-      return;
-    }
+  // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+  if (OpC == PPC::SPILL_CR) {
+    lowerCRSpilling(II, FrameIndex);
+    return;
+  } else if (OpC == PPC::RESTORE_CR) {
+    lowerCRRestore(II, FrameIndex);
+    return;
+  } else if (OpC == PPC::SPILL_VRSAVE) {
+    lowerVRSAVESpilling(II, FrameIndex);
+    return;
+  } else if (OpC == PPC::RESTORE_VRSAVE) {
+    lowerVRSAVERestore(II, FrameIndex);
+    return;
   }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
 
   bool is64Bit = Subtarget.isPPC64();
-  MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+  MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
                                               (is64Bit ? PPC::X31 : PPC::R31) :
                                                 (is64Bit ? PPC::X1 : PPC::R1),
                                               false);
@@ -579,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   case PPC::LWA:
   case PPC::LD:
   case PPC::STD:
-  case PPC::STD_32:
     isIXAddr = true;
     break;
   }
-  
+
+  // If the instruction is not present in ImmToIdxMap, then it has no immediate
+  // form (and must be r+r).
+  bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+
   // Now add the frame object offset to the offset from r1.
   int Offset = MFI->getObjectOffset(FrameIndex);
   if (!isIXAddr)
@@ -596,7 +542,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // to Offset to get the correct offset.
   // Naked functions have stack size 0, although getStackSize may not reflect that
   // because we didn't call all the pieces that compute it for naked functions.
-  if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
+  if (!MF.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
     Offset += MFI->getStackSize();
 
   // If we can, encode the offset directly into the instruction.  If this is a
@@ -606,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // only "std" to a stack slot that is at least 4-byte aligned, but it can
   // happen in invalid code.
   if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
-      (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+      (!noImmForm &&
+       isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
     if (isIXAddr)
       Offset >>= 2;    // The actual encoded value has the low two bits zero.
     MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -616,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // The offset doesn't fit into a single register, scavenge one to build the
   // offset in.
 
-  unsigned SReg;
-  if (requiresRegisterScavenging(MF)) {
-    const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-    SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
-  } else
-    SReg = is64Bit ? PPC::X0 : PPC::R0;
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+  unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+           SReg = MF.getRegInfo().createVirtualRegister(RC);
 
   // Insert a set of rA with the full offset value before the ld, st, or add
-  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
+  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
     .addImm(Offset >> 16);
   BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
-    .addReg(SReg, RegState::Kill)
+    .addReg(SRegHi, RegState::Kill)
     .addImm(Offset);
 
   // Convert into indexed form of the instruction:
@@ -637,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   //   addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
   unsigned OperandBase;
 
-  if (OpC != TargetOpcode::INLINEASM) {
+  if (noImmForm)
+    OperandBase = 1;
+  else if (OpC != TargetOpcode::INLINEASM) {
     assert(ImmToIdxMap.count(OpC) &&
            "No indexed form of load or store available!");
     unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -647,7 +595,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     OperandBase = OffsetOperandNo;
   }
 
-  unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+  unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
   MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
   MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
 }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index a8fd796d9e97..7e6683eeb2ef 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -15,8 +15,8 @@
 #ifndef POWERPC32_REGISTERINFO_H
 #define POWERPC32_REGISTERINFO_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "PPC.h"
-#include <map>
 
 #define GET_REGINFO_HEADER
 #include "PPCGenRegisterInfo.inc"
@@ -27,10 +27,9 @@ class TargetInstrInfo;
 class Type;
 
 class PPCRegisterInfo : public PPCGenRegisterInfo {
-  std::map<unsigned, unsigned> ImmToIdxMap;
+  DenseMap<unsigned, unsigned> ImmToIdxMap;
   const PPCSubtarget &Subtarget;
   const TargetInstrInfo &TII;
-  mutable int CRSpillFrameIdx;
 public:
   PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
   
@@ -45,31 +44,38 @@ public:
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
   const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+  const uint32_t *getNoPreservedMask() const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+  /// We require the register scavenger.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+    return true;
+  }
+
+  void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+  void lowerCRSpilling(MachineBasicBlock::iterator II,
+                       unsigned FrameIndex) const;
+  void lowerCRRestore(MachineBasicBlock::iterator II,
+                      unsigned FrameIndex) const;
+  void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+                           unsigned FrameIndex) const;
+  void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+                          unsigned FrameIndex) const;
 
-  /// requiresRegisterScavenging - We require a register scavenger.
-  /// FIXME (64-bit): Should be inlined.
-  bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
-  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void lowerDynamicAlloc(MachineBasicBlock::iterator II,
-                         int SPAdj, RegScavenger *RS) const;
-  void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
-                       int SPAdj, RegScavenger *RS) const;
-  void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
-                       int SPAdj, RegScavenger *RS) const;
   bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
 			    int &FrameIdx) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 5ca387629b6c..57a25f5143fa 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -27,178 +27,72 @@ class PPCReg<string n> : Register<n> {
 
 // GPR - One of the 32 32-bit general-purpose registers
 class GPR<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // GP8 - One of the 32 64-bit general-purpose registers
 class GP8<GPR SubReg, string n> : PPCReg<n> {
-  field bits<5> Num = SubReg.Num;
+  let HWEncoding = SubReg.HWEncoding;
   let SubRegs = [SubReg];
   let SubRegIndices = [sub_32];
 }
 
 // SPR - One of the 32-bit special-purpose registers
 class SPR<bits<10> num, string n> : PPCReg<n> {
-  field bits<10> Num = num;
+  let HWEncoding{9-0} = num;
 }
 
 // FPR - One of the 32 64-bit floating-point registers
 class FPR<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // VR - One of the 32 128-bit vector registers
 class VR<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
 // CR - One of the 8 4-bit condition registers
 class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
-  field bits<3> Num = num;
+  let HWEncoding{2-0} = num;
   let SubRegs = subregs;
 }
 
 // CRBIT - One of the 32 1-bit condition register fields
 class CRBIT<bits<5> num, string n> : PPCReg<n> {
-  field bits<5> Num = num;
+  let HWEncoding{4-0} = num;
 }
 
-
 // General-purpose registers
-def R0  : GPR< 0,  "r0">, DwarfRegNum<[-2, 0]>;
-def R1  : GPR< 1,  "r1">, DwarfRegNum<[-2, 1]>;
-def R2  : GPR< 2,  "r2">, DwarfRegNum<[-2, 2]>;
-def R3  : GPR< 3,  "r3">, DwarfRegNum<[-2, 3]>;
-def R4  : GPR< 4,  "r4">, DwarfRegNum<[-2, 4]>;
-def R5  : GPR< 5,  "r5">, DwarfRegNum<[-2, 5]>;
-def R6  : GPR< 6,  "r6">, DwarfRegNum<[-2, 6]>;
-def R7  : GPR< 7,  "r7">, DwarfRegNum<[-2, 7]>;
-def R8  : GPR< 8,  "r8">, DwarfRegNum<[-2, 8]>;
-def R9  : GPR< 9,  "r9">, DwarfRegNum<[-2, 9]>;
-def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
-def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
-def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
-def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
-def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
-def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
-def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
-def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
-def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
-def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
-def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
-def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
-def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
-def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
-def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
-def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
-def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
-def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
-def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
-def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
-def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
-def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+foreach Index = 0-31 in {
+  def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
 
 // 64-bit General-purpose registers
-def X0  : GP8< R0,  "r0">, DwarfRegNum<[0, -2]>;
-def X1  : GP8< R1,  "r1">, DwarfRegNum<[1, -2]>;
-def X2  : GP8< R2,  "r2">, DwarfRegNum<[2, -2]>;
-def X3  : GP8< R3,  "r3">, DwarfRegNum<[3, -2]>;
-def X4  : GP8< R4,  "r4">, DwarfRegNum<[4, -2]>;
-def X5  : GP8< R5,  "r5">, DwarfRegNum<[5, -2]>;
-def X6  : GP8< R6,  "r6">, DwarfRegNum<[6, -2]>;
-def X7  : GP8< R7,  "r7">, DwarfRegNum<[7, -2]>;
-def X8  : GP8< R8,  "r8">, DwarfRegNum<[8, -2]>;
-def X9  : GP8< R9,  "r9">, DwarfRegNum<[9, -2]>;
-def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
-def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
-def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
-def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
-def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
-def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
-def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
-def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
-def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
-def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
-def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
-def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
-def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
-def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
-def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
-def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
-def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
-def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
-def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
-def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
-def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
-def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+foreach Index = 0-31 in {
+  def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+                    DwarfRegNum<[Index, -2]>;
+}
 
 // Floating-point registers
-def F0  : FPR< 0,  "f0">, DwarfRegNum<[32, 32]>;
-def F1  : FPR< 1,  "f1">, DwarfRegNum<[33, 33]>;
-def F2  : FPR< 2,  "f2">, DwarfRegNum<[34, 34]>;
-def F3  : FPR< 3,  "f3">, DwarfRegNum<[35, 35]>;
-def F4  : FPR< 4,  "f4">, DwarfRegNum<[36, 36]>;
-def F5  : FPR< 5,  "f5">, DwarfRegNum<[37, 37]>;
-def F6  : FPR< 6,  "f6">, DwarfRegNum<[38, 38]>;
-def F7  : FPR< 7,  "f7">, DwarfRegNum<[39, 39]>;
-def F8  : FPR< 8,  "f8">, DwarfRegNum<[40, 40]>;
-def F9  : FPR< 9,  "f9">, DwarfRegNum<[41, 41]>;
-def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
-def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
-def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
-def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
-def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
-def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
-def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
-def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
-def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
-def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
-def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
-def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
-def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
-def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
-def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
-def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
-def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
-def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
-def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
-def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
-def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
-def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+foreach Index = 0-31 in {
+  def F#Index : FPR<Index, "f"#Index>,
+                DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
 
 // Vector registers
-def V0  : VR< 0,  "v0">, DwarfRegNum<[77, 77]>;
-def V1  : VR< 1,  "v1">, DwarfRegNum<[78, 78]>;
-def V2  : VR< 2,  "v2">, DwarfRegNum<[79, 79]>;
-def V3  : VR< 3,  "v3">, DwarfRegNum<[80, 80]>;
-def V4  : VR< 4,  "v4">, DwarfRegNum<[81, 81]>;
-def V5  : VR< 5,  "v5">, DwarfRegNum<[82, 82]>;
-def V6  : VR< 6,  "v6">, DwarfRegNum<[83, 83]>;
-def V7  : VR< 7,  "v7">, DwarfRegNum<[84, 84]>;
-def V8  : VR< 8,  "v8">, DwarfRegNum<[85, 85]>;
-def V9  : VR< 9,  "v9">, DwarfRegNum<[86, 86]>;
-def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
-def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
-def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
-def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
-def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
-def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
-def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
-def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
-def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
-def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
-def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
-def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
-def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
-def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
-def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
-def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
-def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
-def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
-def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
-def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
-def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
-def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+foreach Index = 0-31 in {
+  def V#Index : VR<Index, "v"#Index>,
+                DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
+
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO  : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP   : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8  : GP8<FP, "**FRAME POINTER**">;
 
 // Condition register bits
 def CR0LT : CRBIT< 0, "0">;
@@ -278,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
 // then nonvolatiles in reverse order since stmw/lmw save from rN to r31
 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
                                                 (sequence "R%u", 30, 13),
-                                                R31, R0, R1, LR)>;
+                                                R31, R0, R1, FP)>;
 
 def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
                                                 (sequence "X%u", 30, 14),
-                                                X31, X13, X0, X1, LR8)>;
+                                                X31, X13, X0, X1, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
 
 // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
 // ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index ba63b5cd8faf..ae084aa0e8c1 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -749,3 +749,18 @@ def PPCA2Itineraries : ProcessorItineraries<
                               [15, 7],
                               [FPR_Bypass, FPR_Bypass]>
 ]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+  let IssueWidth = 1;  // 2 micro-ops are dispatched per cycle.
+  let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+  let LoadLatency = 6; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+  let MispredictPenalty = 6;
+
+  let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 7c02ea099c14..c64998d52a0c 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -92,3 +92,18 @@ def G5Itineraries : ProcessorItineraries<
   InstrItinData<VecVSL      , [InstrStage<2, [VIU1]>]>,
   InstrItinData<VecVSR      , [InstrStage<3, [VPU]>]>
 ]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+  let IssueWidth = 4;  // 4 (non-branch) instructions are dispatched per cycle.
+  let MinLatency = 0;  // Out-of-order dispatch.
+  let LoadLatency = 3; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+  let MispredictPenalty = 16;
+
+  let Itineraries = G5Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 9c8cb92cc7ea..a8f2b3f47d1b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCSubtarget.h"
-#include "PPCRegisterInfo.h"
 #include "PPC.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
 
 #define GET_SUBTARGETINFO_TARGET_DESC
@@ -36,9 +36,20 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
   , Use64BitRegs(false)
   , IsPPC64(is64Bit)
   , HasAltivec(false)
+  , HasQPX(false)
   , HasFSQRT(false)
+  , HasFRE(false)
+  , HasFRES(false)
+  , HasFRSQRTE(false)
+  , HasFRSQRTES(false)
+  , HasRecipPrec(false)
   , HasSTFIWX(false)
+  , HasLFIWAX(false)
+  , HasFPRND(false)
+  , HasFPCVT(false)
   , HasISEL(false)
+  , HasPOPCNTD(false)
+  , HasLDBRX(false)
   , IsBookE(false)
   , HasLazyResolverStubs(false)
   , IsJITCodeModel(false)
@@ -82,6 +93,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
   // Set up darwin-specific properties.
   if (isDarwin())
     HasLazyResolverStubs = true;
+
+  // QPX requires a 32-byte aligned stack. Note that we need to do this if
+  // we're compiling for a BG/Q system regardless of whether or not QPX
+  // is enabled because external functions will assume this alignment.
+  if (hasQPX() || isBGQ())
+    StackAlignment = 32;
 }
 
 /// SetJITMode - This is called to inform the subtarget info that we are
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b9e22f43c39e..65b4d211fc6a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,9 +14,9 @@
 #ifndef POWERPCSUBTARGET_H
 #define POWERPCSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -43,7 +43,12 @@ namespace PPC {
     DIR_A2,
     DIR_E500mc,
     DIR_E5500,
+    DIR_PWR3,
+    DIR_PWR4,
+    DIR_PWR5,
+    DIR_PWR5X,
     DIR_PWR6,
+    DIR_PWR6X,
     DIR_PWR7,
     DIR_64
   };
@@ -70,9 +75,17 @@ protected:
   bool Use64BitRegs;
   bool IsPPC64;
   bool HasAltivec;
+  bool HasQPX;
   bool HasFSQRT;
+  bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+  bool HasRecipPrec;
   bool HasSTFIWX;
+  bool HasLFIWAX;
+  bool HasFPRND;
+  bool HasFPCVT;
   bool HasISEL;
+  bool HasPOPCNTD;
+  bool HasLDBRX;
   bool IsBookE;
   bool HasLazyResolverStubs;
   bool IsJITCodeModel;
@@ -148,10 +161,21 @@ public:
 
   // Specific obvious features.
   bool hasFSQRT() const { return HasFSQRT; }
+  bool hasFRE() const { return HasFRE; }
+  bool hasFRES() const { return HasFRES; }
+  bool hasFRSQRTE() const { return HasFRSQRTE; }
+  bool hasFRSQRTES() const { return HasFRSQRTES; }
+  bool hasRecipPrec() const { return HasRecipPrec; }
   bool hasSTFIWX() const { return HasSTFIWX; }
+  bool hasLFIWAX() const { return HasLFIWAX; }
+  bool hasFPRND() const { return HasFPRND; }
+  bool hasFPCVT() const { return HasFPCVT; }
   bool hasAltivec() const { return HasAltivec; }
+  bool hasQPX() const { return HasQPX; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
+  bool hasPOPCNTD() const { return HasPOPCNTD; }
+  bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
@@ -160,6 +184,8 @@ public:
   bool isDarwin() const { return TargetTriple.isMacOSX(); }
   /// isBGP - True if this is a BG/P platform.
   bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
+  /// isBGQ - True if this is a BG/Q platform.
+  bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
 
   bool isDarwinABI() const { return isDarwin(); }
   bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3fc977ee2b41..fe851c1b6fb8 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -13,13 +13,13 @@
 
 #include "PPCTargetMachine.h"
 #include "PPC.h"
-#include "llvm/PassManager.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 static cl::
@@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
     DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
-    InstrItins(Subtarget.getInstrItineraryData()),
-    STTI(&TLInfo), VTTI(&TLInfo) {
+    InstrItins(Subtarget.getInstrItineraryData()) {
 
   // The binutils for the BG/P are too old for CFI.
   if (Subtarget.isBGP())
@@ -127,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
 
   return false;
 }
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  // Add first the target-independent BasicTTI pass, then our PPC pass. This
+  // allows the PPC pass to delegate to the target independent layer when
+  // appropriate.
+  PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+  PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index c168433a71b3..606ccb314126 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -15,14 +15,13 @@
 #define PPC_TARGETMACHINE_H
 
 #include "PPCFrameLowering.h"
-#include "PPCSubtarget.h"
-#include "PPCJITInfo.h"
-#include "PPCInstrInfo.h"
 #include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCJITInfo.h"
 #include "PPCSelectionDAGInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
@@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine {
   PPCTargetLowering   TLInfo;
   PPCSelectionDAGInfo TSInfo;
   InstrItineraryData  InstrItins;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 
 public:
   PPCTargetMachine(const Target &T, StringRef TT,
@@ -66,17 +63,14 @@ public:
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
 
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   virtual bool addCodeEmitter(PassManagerBase &PM,
                               JITCodeEmitter &JCE);
+
+  /// \brief Register PPC analysis passes with a pass manager.
+  virtual void addAnalysisPasses(PassManagerBase &PM);
 };
 
 /// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 000000000000..2504ba70c25a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppctti"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+  const PPCTargetMachine *TM;
+  const PPCSubtarget *ST;
+  const PPCTargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+  PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  PPCTTI(const PPCTargetMachine *TM)
+      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+        TLI(TM->getTargetLowering()) {
+    initializePPCTTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    pushTTIStack(this);
+  }
+
+  virtual void finalizePass() {
+    popTTIStack();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+  virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  virtual unsigned getNumberOfRegisters(bool Vector) const;
+  virtual unsigned getRegisterBitWidth(bool Vector) const;
+  virtual unsigned getMaximumUnrollFactor() const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const;
+  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                                  int Index, Type *SubTp) const;
+  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const;
+  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const;
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const;
+  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const;
+
+  /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+                   "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+  return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  if (ST->hasPOPCNTD() && TyWidth <= 64)
+    return PSK_FastHardware;
+  return PSK_Software;
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+  if (Vector && !ST->hasAltivec())
+    return 0;
+  return 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+  if (Vector) {
+    if (ST->hasAltivec()) return 128;
+    return 0;
+  }
+
+  if (ST->isPPC64())
+    return 64;
+  return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+  unsigned Directive = ST->getDarwinDirective();
+  // The 440 has no SIMD support, but floating-point instructions
+  // have a 5-cycle latency, so unroll by 5x for latency hiding.
+  if (Directive == PPC::DIR_440)
+    return 5;
+
+  // The A2 has no SIMD support, but floating-point instructions
+  // have a 6-cycle latency, so unroll by 6x for latency hiding.
+  if (Directive == PPC::DIR_A2)
+    return 6;
+
+  // FIXME: For lack of any better information, do no harm...
+  if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+    return 1;
+
+  // For most things, modern systems have two execution units (and
+  // out-of-order execution).
+  return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                        OperandValueKind Op1Info,
+                                        OperandValueKind Op2Info) const {
+  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+  // Fallback to the default implementation.
+  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+                                                     Op2Info);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                                Type *SubTp) const {
+  return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                    Type *CondTy) const {
+  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                    unsigned Index) const {
+  assert(Val->isVectorTy() && "This must be a vector type");
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  // Estimated cost of a load-hit-store delay.  This was obtained
+  // experimentally as a minimum needed to prevent unprofitable
+  // vectorization for the paq8p benchmark.  It may need to be
+  // raised further if other unprofitable cases remain.
+  unsigned LHSPenalty = 12;
+
+  // Vector element insert/extract with Altivec is very expensive,
+  // because they require store and reload with the attendant
+  // processor stall for load-hit-store.  Until VSX is available,
+  // these need to be estimated as very costly.
+  if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+      ISD == ISD::INSERT_VECTOR_ELT)
+    return LHSPenalty +
+      TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+  return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                 unsigned AddressSpace) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+         "Invalid Opcode");
+
+  // Each load/store unit costs 1.
+  unsigned Cost = LT.first * 1;
+
+  // PPC in general does not support unaligned loads and stores. They'll need
+  // to be decomposed based on the alignment factor.
+  unsigned SrcBytes = LT.second.getStoreSize();
+  if (SrcBytes && Alignment && Alignment < SrcBytes)
+    Cost *= (SrcBytes/Alignment);
+
+  return Cost;
+}
+
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index b6763aa73802..cc2ff966332e 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -1,7 +1,6 @@
 //===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
 
 TODO:
-* gpr0 allocation
 * lmw/stmw pass a la arm load store optimizer for prolog/epilog
 
 ===-------------------------------------------------------------------------===
@@ -204,12 +203,6 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
 
 ===-------------------------------------------------------------------------===
 
-Implement Newton-Rhapson method for improving estimate instructions to the
-correct accuracy, and implementing divide as multiply by reciprocal when it has
-more than one use.  Itanium would want this too.
-
-===-------------------------------------------------------------------------===
-
 Compile offsets from allocas:
 
 int *%test() {
@@ -536,20 +529,6 @@ void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
 
 ===-------------------------------------------------------------------------===
 
-Complete the signed i32 to FP conversion code using 64-bit registers
-transformation, good for PI.  See PPCISelLowering.cpp, this comment:
-
-     // FIXME: disable this lowered code.  This generates 64-bit register values,
-     // and we don't model the fact that the top part is clobbered by calls.  We
-     // need to flag these together so that the value isn't live across a call.
-     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
-Also, if the registers are spilled to the stack, we have to ensure that all
-64-bits of them are save/restored, otherwise we will miscompile the code.  It
-sounds like we need to get the 64-bit register classes going.
-
-===-------------------------------------------------------------------------===
-
 %struct.B = type { i8, [3 x i8] }
 
 define void @bar(%struct.B* %b) {
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5dc8568d83f2..fa44331b8af6 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
new file mode 100644
index 000000000000..0b01433cc926
--- /dev/null
+++ b/lib/Target/R600/AMDGPU.h
@@ -0,0 +1,51 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class AMDGPUTargetMachine;
+
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
+
+// SI Passes
+FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createSIInsertWaits(TargetMachine &tm);
+
+// Passes common to R600 and SI
+Pass *createAMDGPUStructurizeCFGPass();
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
+
+} // End namespace llvm
+
+namespace ShaderType {
+  enum Type {
+    PIXEL = 0,
+    VERTEX = 1,
+    GEOMETRY = 2,
+    COMPUTE = 3
+  };
+}
+
+#endif // AMDGPU_H
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
new file mode 100644
index 000000000000..1a26c77d6bb2
--- /dev/null
+++ b/lib/Target/R600/AMDGPU.td
@@ -0,0 +1,41 @@
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+// Include AMDIL TD files
+include "AMDILBase.td"
+
+
+def AMDGPUInstrInfo : InstrInfo {
+  let guessInstructionProperties = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDGPUAsmWriter : AsmWriter {
+    string AsmWriterClassName = "InstPrinter";
+    int Variant = 0;
+    bit isMCAsmWriter = 1;
+}
+
+def AMDGPU : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = AMDGPUInstrInfo;
+  let AssemblyWriters = [AMDGPUAsmWriter];
+}
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
+include "AMDGPUCallingConv.td"
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
new file mode 100644
index 000000000000..f6001445f4b3
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -0,0 +1,145 @@
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
+/// code.  When passed an MCAsmStreamer it prints assembly and when passed
+/// an MCObjectStreamer it outputs binary code.
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+#include "AMDGPUAsmPrinter.h"
+#include "AMDGPU.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+
+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
+                                              MCStreamer &Streamer) {
+  return new AMDGPUAsmPrinter(tm, Streamer);
+}
+
+extern "C" void LLVMInitializeR600AsmPrinter() {
+  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
+}
+
+/// We need to override this function so we can avoid
+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+  if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+    MF.dump();
+#endif
+  }
+  SetupMachineFunction(MF);
+  if (OutStreamer.hasRawTextSupport()) {
+    OutStreamer.EmitRawText("@" + MF.getName() + ":");
+  }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+  if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    EmitProgramInfo(MF);
+  }
+  EmitFunctionBody();
+  return false;
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+  unsigned MaxSGPR = 0;
+  unsigned MaxVGPR = 0;
+  bool VCCUsed = false;
+  const SIRegisterInfo * RI =
+                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                    I != E; ++I) {
+      MachineInstr &MI = *I;
+
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        MachineOperand & MO = MI.getOperand(op_idx);
+        unsigned maxUsed;
+        unsigned width = 0;
+        bool isSGPR = false;
+        unsigned reg;
+        unsigned hwReg;
+        if (!MO.isReg()) {
+          continue;
+        }
+        reg = MO.getReg();
+        if (reg == AMDGPU::VCC) {
+          VCCUsed = true;
+          continue;
+        }
+        switch (reg) {
+        default: break;
+        case AMDGPU::EXEC:
+        case AMDGPU::M0:
+          continue;
+        }
+
+        if (AMDGPU::SReg_32RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 1;
+        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 1;
+        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 2;
+        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 2;
+        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 4;
+        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 4;
+        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+          isSGPR = true;
+          width = 8;
+        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 8;
+        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 16;
+        } else {
+          assert(!"Unknown register class");
+        }
+        hwReg = RI->getEncodingValue(reg) & 0xff;
+        maxUsed = hwReg + width - 1;
+        if (isSGPR) {
+          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
+        } else {
+          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
+        }
+      }
+    }
+  }
+  if (VCCUsed) {
+    MaxSGPR += 2;
+  }
+  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
+  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
+  OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
new file mode 100644
index 000000000000..3812282b1798
--- /dev/null
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -0,0 +1,44 @@
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_ASMPRINTER_H
+#define AMDGPU_ASMPRINTER_H
+
+#include "llvm/CodeGen/AsmPrinter.h"
+
+namespace llvm {
+
+class AMDGPUAsmPrinter : public AsmPrinter {
+
+public:
+  explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual const char *getPassName() const {
+    return "AMDGPU Assembly Printer";
+  }
+
+  /// \brief Emit register usage information so that the GPU driver
+  /// can correctly setup the GPU state.
+  void EmitProgramInfo(MachineFunction &MF);
+
+  /// Implemented in AMDGPUMCInstLower.cpp
+  virtual void EmitInstruction(const MachineInstr *MI);
+};
+
+} // End anonymous llvm
+
+#endif //AMDGPU_ASMPRINTER_H
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
new file mode 100644
index 000000000000..45ae37ef0c7f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -0,0 +1,42 @@
+//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMD Radeon GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+// Inversion of CCIfInReg
+class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+
+// Calling convention for SI
+def CC_SI : CallingConv<[
+
+  CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
+  ]>>>,
+
+  CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
+    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
+  >>>,
+
+  CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+  ]>>>
+
+]>;
+
+def CC_AMDGPU : CallingConv<[
+  CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
+       "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
+]>;
diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp
new file mode 100644
index 000000000000..50297d1f60c8
--- /dev/null
+++ b/lib/Target/R600/AMDGPUConvertToISA.cpp
@@ -0,0 +1,62 @@
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers AMDIL machine instructions to the appropriate
+/// hardware instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  TargetMachine &TM;
+
+public:
+  AMDGPUConvertToISAPass(TargetMachine &tm) :
+    MachineFunctionPass(ID), TM(tm) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+  return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
+  const AMDGPUInstrInfo * TII =
+                      static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                      I != E; ++I) {
+      MachineInstr &MI = *I;
+      TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+    }
+  }
+  return false;
+}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp
new file mode 100644
index 000000000000..815d6f71c3be
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -0,0 +1,122 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+    int LAO, unsigned TransAl)
+  : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+  // XXX: Hardcoding to 1 for now.
+  //
+  // I think the StackWidth should stored as metadata associated with the
+  // MachineFunction.  This metadata can either be added by a frontend, or
+  // calculated by a R600 specific LLVM IR pass.
+  //
+  // The StackWidth determines how stack objects are laid out in memory.
+  // For a vector stack variable, like: int4 stack[2], the data will be stored
+  // in the following ways depending on the StackWidth.
+  //
+  // StackWidth = 1:
+  //
+  // T0.X = stack[0].x
+  // T1.X = stack[0].y
+  // T2.X = stack[0].z
+  // T3.X = stack[0].w
+  // T4.X = stack[1].x
+  // T5.X = stack[1].y
+  // T6.X = stack[1].z
+  // T7.X = stack[1].w
+  //
+  // StackWidth = 2:
+  //
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T1.X = stack[0].z
+  // T1.Y = stack[0].w
+  // T2.X = stack[1].x
+  // T2.Y = stack[1].y
+  // T3.X = stack[1].z
+  // T3.Y = stack[1].w
+  // 
+  // StackWidth = 4:
+  // T0.X = stack[0].x
+  // T0.Y = stack[0].y
+  // T0.Z = stack[0].z
+  // T0.W = stack[0].w
+  // T1.X = stack[1].x
+  // T1.Y = stack[1].y
+  // T1.Z = stack[1].z
+  // T1.W = stack[1].w
+  return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned Offset = 0;
+  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+    const AllocaInst *Alloca = MFI->getObjectAllocation(i);
+    unsigned ArrayElements;
+    const Type *AllocaType = Alloca->getAllocatedType();
+    const Type *ElementType;
+
+    if (AllocaType->isArrayTy()) {
+      ArrayElements = AllocaType->getArrayNumElements();
+      ElementType = AllocaType->getArrayElementType();
+    } else {
+      ArrayElements = 1;
+      ElementType = AllocaType;
+    }
+
+    unsigned VectorElements;
+    if (ElementType->isVectorTy()) {
+      VectorElements = ElementType->getVectorNumElements();
+    } else {
+      VectorElements = 1;
+    }
+
+    Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
+  }
+  return Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+  NumEntries = 0;
+  return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  return false;
+}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h
new file mode 100644
index 000000000000..cf5742ee0952
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -0,0 +1,44 @@
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface to describe a layout of a stack frame on a AMDIL target
+/// machine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef AMDILFRAME_LOWERING_H
+#define AMDILFRAME_LOWERING_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+/// \brief Information about the stack frame layout on the AMDGPU targets.
+///
+/// It holds the direction of the stack growth, the known stack alignment on
+/// entry to each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+class AMDGPUFrameLowering : public TargetFrameLowering {
+public:
+  AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1);
+  virtual ~AMDGPUFrameLowering();
+
+  /// \returns The number of 32-bit sub-registers that are used when storing
+  /// values to the stack.
+  virtual unsigned getStackWidth(const MachineFunction &MF) const;
+  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+  virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+  virtual void emitPrologue(MachineFunction &MF) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  virtual bool hasFP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif // AMDILFRAME_LOWERING_H
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
new file mode 100644
index 000000000000..a266df535d56
--- /dev/null
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -0,0 +1,414 @@
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This is the parent TargetLowering class for hardware code gen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+#include "AMDGPUGenCallingConv.inc"
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+  // Initialize target lowering borrowed from AMDIL
+  InitAMDILLowering();
+
+  // We need to custom lower some of the intrinsics
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  // Library functions.  These default to Expand, but we have instructions
+  // for them.
+  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
+  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
+  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
+  setOperationAction(ISD::FABS,   MVT::f32, Legal);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
+
+  // Lower floating point store/load to integer store/load to reduce the number
+  // of patterns in tablegen.
+  setOperationAction(ISD::STORE, MVT::f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
+
+  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+
+  setOperationAction(ISD::LOAD, MVT::f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
+
+  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+
+  setOperationAction(ISD::MUL, MVT::i64, Expand);
+
+  setOperationAction(ISD::UDIV, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+}
+
+//===---------------------------------------------------------------------===//
+// TargetLowering Callbacks
+//===---------------------------------------------------------------------===//
+
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
+
+  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
+}
+
+SDValue AMDGPUTargetLowering::LowerReturn(
+                                     SDValue Chain,
+                                     CallingConv::ID CallConv,
+                                     bool isVarArg,
+                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                     const SmallVectorImpl<SDValue> &OutVals,
+                                     DebugLoc DL, SelectionDAG &DAG) const {
+  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
+}
+
+//===---------------------------------------------------------------------===//
+// Target specific lowering
+//===---------------------------------------------------------------------===//
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+    const {
+  switch (Op.getOpcode()) {
+  default:
+    Op.getNode()->dump();
+    assert(0 && "Custom lowering code for this"
+        "instruction is not implemented yet!");
+    break;
+  // AMDIL DAG lowering
+  case ISD::SDIV: return LowerSDIV(Op, DAG);
+  case ISD::SREM: return LowerSREM(Op, DAG);
+  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  // AMDGPU DAG lowering
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+  }
+  return Op;
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+    SelectionDAG &DAG) const {
+  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  switch (IntrinsicID) {
+    default: return Op;
+    case AMDGPUIntrinsic::AMDIL_abs:
+      return LowerIntrinsicIABS(Op, DAG);
+    case AMDGPUIntrinsic::AMDIL_exp:
+      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+    case AMDGPUIntrinsic::AMDGPU_lrp:
+      return LowerIntrinsicLRP(Op, DAG);
+    case AMDGPUIntrinsic::AMDIL_fraction:
+      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+    case AMDGPUIntrinsic::AMDIL_max:
+      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
+                                                  Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_imax:
+      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
+                                                  Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_umax:
+      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
+                                                  Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDIL_min:
+      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
+                                                  Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_imin:
+      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
+                                                  Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDGPU_umin:
+      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
+                                                  Op.getOperand(2));
+    case AMDGPUIntrinsic::AMDIL_round_nearest:
+      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+  }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+    SelectionDAG &DAG) const {
+
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+                                              Op.getOperand(1));
+
+  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+    SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+                                DAG.getConstantFP(1.0f, MVT::f32),
+                                Op.getOperand(1));
+  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+                                                    Op.getOperand(3));
+  return DAG.getNode(ISD::FADD, DL, VT,
+      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+      OneSubAC);
+}
+
+/// \brief Generate Min/Max node
+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
+    SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue True = Op.getOperand(2);
+  SDValue False = Op.getOperand(3);
+  SDValue CC = Op.getOperand(4);
+
+  if (VT != MVT::f32 ||
+      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+    return SDValue();
+  }
+
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  switch (CCOpcode) {
+  case ISD::SETOEQ:
+  case ISD::SETONE:
+  case ISD::SETUNE:
+  case ISD::SETNE:
+  case ISD::SETUEQ:
+  case ISD::SETEQ:
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2:
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:
+  case ISD::SETUO:
+  case ISD::SETO:
+    assert(0 && "Operation should already be optimised !");
+  case ISD::SETULE:
+  case ISD::SETULT:
+  case ISD::SETOLE:
+  case ISD::SETOLT:
+  case ISD::SETLE:
+  case ISD::SETLT: {
+    if (LHS == True)
+      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+    else
+      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+  }
+  case ISD::SETGT:
+  case ISD::SETGE:
+  case ISD::SETUGE:
+  case ISD::SETOGE:
+  case ISD::SETUGT:
+  case ISD::SETOGT: {
+    if (LHS == True)
+      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+    else
+      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+  }
+  case ISD::SETCC_INVALID:
+    assert(0 && "Invalid setcc condcode !");
+  }
+  return Op;
+}
+
+
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+    SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  SDValue Num = Op.getOperand(0);
+  SDValue Den = Op.getOperand(1);
+
+  SmallVector<SDValue, 8> Results;
+
+  // RCP =  URECIP(Den) = 2^32 / Den + e
+  // e is rounding error.
+  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+  // RCP_LO = umulo(RCP, Den) */
+  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+
+  // RCP_HI = mulhu (RCP, Den) */
+  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+  // NEG_RCP_LO = -RCP_LO
+  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+                                                     RCP_LO);
+
+  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+                                           NEG_RCP_LO, RCP_LO,
+                                           ISD::SETEQ);
+  // Calculate the rounding error from the URECIP instruction
+  // E = mulhu(ABS_RCP_LO, RCP)
+  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+  // RCP_A_E = RCP + E
+  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+  // RCP_S_E = RCP - E
+  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+                                     RCP_A_E, RCP_S_E,
+                                     ISD::SETEQ);
+  // Quotient = mulhu(Tmp0, Num)
+  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+  // Num_S_Remainder = Quotient * Den
+  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+
+  // Remainder = Num - Num_S_Remainder
+  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+                                                 DAG.getConstant(-1, VT),
+                                                 DAG.getConstant(0, VT),
+                                                 ISD::SETGE);
+  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
+  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
+                                                  DAG.getConstant(0, VT),
+                                                  DAG.getConstant(-1, VT),
+                                                  DAG.getConstant(0, VT),
+                                                  ISD::SETGE);
+  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+                                               Remainder_GE_Zero);
+
+  // Calculate Division result:
+
+  // Quotient_A_One = Quotient + 1
+  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+                                                         DAG.getConstant(1, VT));
+
+  // Quotient_S_One = Quotient - 1
+  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+                                                         DAG.getConstant(1, VT));
+
+  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+                                     Quotient, Quotient_A_One, ISD::SETEQ);
+
+  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+                            Quotient_S_One, Div, ISD::SETEQ);
+
+  // Calculate Rem result:
+
+  // Remainder_S_Den = Remainder - Den
+  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+  // Remainder_A_Den = Remainder + Den
+  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+                                    Remainder, Remainder_S_Den, ISD::SETEQ);
+
+  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+                            Remainder_A_Den, Rem, ISD::SETEQ);
+  SDValue Ops[2];
+  Ops[0] = Div;
+  Ops[1] = Rem;
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
+  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+    return CFP->isExactlyValue(1.0);
+  }
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    return C->isAllOnesValue();
+  }
+  return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
+  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+    return CFP->getValueAPF().isZero();
+  }
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    return C->isNullValue();
+  }
+  return false;
+}
+
+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
+                                                  const TargetRegisterClass *RC,
+                                                   unsigned Reg, EVT VT) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned VirtualRegister;
+  if (!MRI.isLiveIn(Reg)) {
+    VirtualRegister = MRI.createVirtualRegister(RC);
+    MRI.addLiveIn(Reg, VirtualRegister);
+  } else {
+    VirtualRegister = MRI.getLiveInVirtReg(Reg);
+  }
+  return DAG.getRegister(VirtualRegister, VT);
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  // AMDIL DAG nodes
+  NODE_NAME_CASE(CALL);
+  NODE_NAME_CASE(UMUL);
+  NODE_NAME_CASE(DIV_INF);
+  NODE_NAME_CASE(RET_FLAG);
+  NODE_NAME_CASE(BRANCH_COND);
+
+  // AMDGPU DAG nodes
+  NODE_NAME_CASE(DWORDADDR)
+  NODE_NAME_CASE(FRACT)
+  NODE_NAME_CASE(FMAX)
+  NODE_NAME_CASE(SMAX)
+  NODE_NAME_CASE(UMAX)
+  NODE_NAME_CASE(FMIN)
+  NODE_NAME_CASE(SMIN)
+  NODE_NAME_CASE(UMIN)
+  NODE_NAME_CASE(URECIP)
+  NODE_NAME_CASE(EXPORT)
+  NODE_NAME_CASE(CONST_ADDRESS)
+  NODE_NAME_CASE(REGISTER_LOAD)
+  NODE_NAME_CASE(REGISTER_STORE)
+  }
+}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
new file mode 100644
index 000000000000..f31b6466bd46
--- /dev/null
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -0,0 +1,140 @@
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition of the TargetLowering class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class AMDGPUTargetLowering : public TargetLowering {
+private:
+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+
+protected:
+
+  /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
+  /// MachineFunction.
+  ///
+  /// \returns a RegisterSDNode representing Reg.
+  SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
+                                                  unsigned Reg, EVT VT) const;
+
+  bool isHWTrueValue(SDValue Op) const;
+  bool isHWFalseValue(SDValue Op) const;
+
+  void AnalyzeFormalArguments(CCState &State,
+                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
+
+public:
+  AMDGPUTargetLowering(TargetMachine &TM);
+
+  virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<SDValue> &OutVals,
+                              DebugLoc DL, SelectionDAG &DAG) const;
+  virtual SDValue LowerCall(CallLoweringInfo &CLI,
+                            SmallVectorImpl<SDValue> &InVals) const {
+    CLI.Callee.dump();
+    llvm_unreachable("Undefined function");
+  }
+
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
+  virtual const char* getTargetNodeName(unsigned Opcode) const;
+
+  virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const {
+    return N;
+  }
+
+// Functions defined in AMDILISelLowering.cpp
+public:
+
+  /// \brief Determine which of the bits specified in \p Mask are known to be
+  /// either zero or one and return them in the \p KnownZero and \p KnownOne
+  /// bitsets.
+  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const SelectionDAG &DAG,
+                                              unsigned Depth = 0) const;
+
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                  const CallInst &I, unsigned Intrinsic) const;
+
+  /// We want to mark f32/f64 floating point values as legal.
+  bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+  /// We don't want to shrink f64/f32 constants.
+  bool ShouldShrinkFPConstant(EVT VT) const;
+
+private:
+  void InitAMDILLowering();
+  SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+  EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+};
+
+namespace AMDGPUISD {
+
+enum {
+  // AMDIL ISD Opcodes
+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
+  CALL,        // Function call based on a single integer
+  UMUL,        // 32bit unsigned multiplication
+  DIV_INF,      // Divide with infinity returned on zero divisor
+  RET_FLAG,
+  BRANCH_COND,
+  // End AMDIL ISD Opcodes
+  BITALIGN,
+  DWORDADDR,
+  FRACT,
+  FMAX,
+  SMAX,
+  UMAX,
+  FMIN,
+  SMIN,
+  UMIN,
+  URECIP,
+  EXPORT,
+  CONST_ADDRESS,
+  REGISTER_LOAD,
+  REGISTER_STORE,
+  LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+} // End namespace llvm
+
+#endif // AMDGPUISELLOWERING_H
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
new file mode 100644
index 000000000000..ed6c8ec55dd2
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
@@ -0,0 +1,343 @@
+//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Instructions can use indirect addressing to index the register file as if it
+/// were memory.  This pass lowers RegisterLoad and RegisterStore instructions
+/// to either a COPY or a MOV that uses indirect addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const AMDGPUInstrInfo *TII;
+
+  bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
+
+public:
+  AMDGPUIndirectAddressingPass(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
+    { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const { return "R600 Handle indirect addressing"; }
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUIndirectAddressingPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
+  return new AMDGPUIndirectAddressingPass(tm);
+}
+
+bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  int IndirectBegin = TII->getIndirectIndexBegin(MF);
+  int IndirectEnd = TII->getIndirectIndexEnd(MF);
+
+  if (IndirectBegin == -1) {
+    // No indirect addressing, we can skip this pass
+    assert(IndirectEnd == -1);
+    return false;
+  }
+
+  // The map keeps track of the indirect address that is represented by
+  // each virtual register. The key is the register and the value is the
+  // indirect address it uses.
+  std::map<unsigned, unsigned> RegisterAddressMap;
+
+  // First pass - Lower all of the RegisterStore instructions and track which
+  // registers are live.
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                      BB != BB_E; ++BB) {
+    // This map keeps track of the current live indirect registers.
+    // The key is the address and the value is the register
+    std::map<unsigned, unsigned> LiveAddressRegisterMap;
+    MachineBasicBlock &MBB = *BB;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+                               I != MBB.end(); I = Next) {
+      Next = llvm::next(I);
+      MachineInstr &MI = *I;
+
+      if (!TII->isRegisterStore(MI)) {
+        continue;
+      }
+
+      // Lower RegisterStore
+
+      unsigned RegIndex = MI.getOperand(2).getImm();
+      unsigned Channel = MI.getOperand(3).getImm();
+      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+      const TargetRegisterClass *IndirectStoreRegClass =
+                   TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
+
+      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+        // Direct register access.
+        unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+
+        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
+                .addOperand(MI.getOperand(0));
+
+        RegisterAddressMap[DstReg] = Address;
+        LiveAddressRegisterMap[Address] = DstReg;
+      } else {
+        // Indirect register access.
+        MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
+                                           MI.getOperand(0).getReg(), // Value
+                                           Address,
+                                           MI.getOperand(1).getReg()); // Offset
+        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+          unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+          MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
+          RegisterAddressMap[DstReg] = Addr;
+          LiveAddressRegisterMap[Addr] = DstReg;
+        }
+      }
+      MI.eraseFromParent();
+    }
+
+    // Update the live-ins of the succesor blocks
+    for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
+                                          SuccEnd = MBB.succ_end();
+                                          SuccEnd != Succ; ++Succ) {
+      std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
+      for (Key = LiveAddressRegisterMap.begin(),
+           KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
+        (*Succ)->addLiveIn(Key->second);
+      }
+    }
+  }
+
+  // Second pass - Lower the RegisterLoad instructions
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                      BB != BB_E; ++BB) {
+    // Key is the address and the value is the register
+    std::map<unsigned, unsigned> LiveAddressRegisterMap;
+    MachineBasicBlock &MBB = *BB;
+
+    MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
+    while (LI != MBB.livein_end()) {
+      std::vector<unsigned> PhiRegisters;
+
+      // Make sure this live in is used for indirect addressing
+      if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
+        ++LI;
+        continue;
+      }
+
+      unsigned Address = RegisterAddressMap[*LI];
+      LiveAddressRegisterMap[Address] = *LI;
+      PhiRegisters.push_back(*LI);
+
+      // Check if there are other live in registers which map to the same
+      // indirect address.
+      for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
+                                              LE = MBB.livein_end();
+                                              LJ != LE; ++LJ) {
+        unsigned Reg = *LJ;
+        if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
+          continue;
+        }
+
+        if (RegisterAddressMap[Reg] == Address) {
+          PhiRegisters.push_back(Reg);
+        }
+      }
+
+      if (PhiRegisters.size() == 1) {
+        // We don't need to insert a Phi instruction, so we can just add the
+        // registers to the live list for the block.
+        LiveAddressRegisterMap[Address] = *LI;
+        MBB.removeLiveIn(*LI);
+      } else {
+        // We need to insert a PHI, because we have the same address being
+        // written in multiple predecessor blocks.
+        const TargetRegisterClass *PhiDstClass =
+                   TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
+        unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
+        MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
+                                          MBB.findDebugLoc(MBB.begin()),
+                                          TII->get(AMDGPU::PHI), PhiDstReg);
+
+        for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
+                                                   RE = PhiRegisters.end();
+                                                   RI != RE; ++RI) {
+          unsigned Reg = *RI;
+          MachineInstr *DefInst = MRI.getVRegDef(Reg);
+          assert(DefInst);
+          MachineBasicBlock *RegBlock = DefInst->getParent();
+          Phi.addReg(Reg);
+          Phi.addMBB(RegBlock);
+          MBB.removeLiveIn(Reg);
+        }
+        RegisterAddressMap[PhiDstReg] = Address;
+        LiveAddressRegisterMap[Address] = PhiDstReg;
+      }
+      LI = MBB.livein_begin();
+    }
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+                               I != MBB.end(); I = Next) {
+      Next = llvm::next(I);
+      MachineInstr &MI = *I;
+
+      if (!TII->isRegisterLoad(MI)) {
+        if (MI.getOpcode() == AMDGPU::PHI) {
+          continue;
+        }
+        // Check for indirect register defs
+        for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
+                                 OpIdx < NumOperands; ++OpIdx) {
+          MachineOperand &MO = MI.getOperand(OpIdx);
+          if (MO.isReg() && MO.isDef() &&
+              RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
+            unsigned Reg = MO.getReg();
+            unsigned LiveAddress = RegisterAddressMap[Reg];
+            // Chain the live-ins
+            if (LiveAddressRegisterMap.find(LiveAddress) !=
+                                                     RegisterAddressMap.end()) {
+              MI.addOperand(MachineOperand::CreateReg(
+                                  LiveAddressRegisterMap[LiveAddress],
+                                  false, // isDef
+                                  true,  // isImp
+                                  true));  // isKill
+            }
+            LiveAddressRegisterMap[LiveAddress] = Reg;
+          }
+        }
+        continue;
+      }
+
+      const TargetRegisterClass *SuperIndirectRegClass =
+                                                TII->getSuperIndirectRegClass();
+      const TargetRegisterClass *IndirectLoadRegClass =
+                                             TII->getIndirectAddrLoadRegClass();
+      unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
+
+      unsigned RegIndex = MI.getOperand(2).getImm();
+      unsigned Channel = MI.getOperand(3).getImm();
+      unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+
+      if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+        // Direct register access
+        unsigned Reg = LiveAddressRegisterMap[Address];
+        unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
+
+        if (regHasExplicitDef(MRI, Reg)) {
+          // If the register we are reading from has an explicit def, then that
+          // means it was written via a direct register access (i.e. COPY
+          // or other instruction that doesn't use indirect addressing).  In
+          // this case we know where the value has been stored, so we can just
+          // issue a copy.
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+                  MI.getOperand(0).getReg())
+                  .addReg(Reg);
+        } else {
+          // If the register we are reading has an implicit def, then that
+          // means it was written by an indirect register access (i.e. An
+          // instruction that uses indirect addressing. 
+          BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+                   MI.getOperand(0).getReg())
+                   .addReg(AddrReg)
+                   .addReg(Reg, RegState::Implicit);
+        }
+      } else {
+        // Indirect register access
+
+        // Note on REQ_SEQUENCE instructons: You can't actually use the register
+        // it defines unless  you have an instruction that takes the defined
+        // register class as an operand.
+
+        MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
+                                               TII->get(AMDGPU::REG_SEQUENCE),
+                                               IndirectReg);
+        for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+          unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+          if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
+            continue;
+          }
+          unsigned Reg = LiveAddressRegisterMap[Addr];
+
+          // We only need to use REG_SEQUENCE for explicit defs, since the
+          // register coalescer won't do anything with the implicit defs.
+          if (!regHasExplicitDef(MRI, Reg)) {
+            continue;
+          }
+
+          // Insert a REQ_SEQUENCE instruction to force the register allocator
+          // to allocate the virtual register to the correct physical register.
+          Sequence.addReg(LiveAddressRegisterMap[Addr]);
+          Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
+        }
+        MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
+                                           MI.getOperand(0).getReg(), // Value
+                                           Address,
+                                           MI.getOperand(1).getReg()); // Offset
+
+
+
+        Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
+        Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
+
+      }
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
+
+bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
+                                                  unsigned Reg) const {
+  MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+
+  if (!DefInstr) {
+    return false;
+  }
+
+  if (DefInstr->getOpcode() == AMDGPU::PHI) {
+    bool Explicit = false;
+    for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
+                                          E = DefInstr->operands_end();
+                                          I != E; ++I) {
+      const MachineOperand &MO = *I;
+      if (!MO.isReg() || MO.isDef()) {
+        continue;
+      }
+
+      Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
+    }
+    return Explicit;
+  }
+
+  return DefInstr->getOperand(0).isReg() &&
+         DefInstr->getOperand(0).getReg() == Reg;
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
new file mode 100644
index 000000000000..30f736c84c25
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -0,0 +1,267 @@
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implementation of the TargetInstrInfo class that is common to all
+/// AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "AMDGPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
+  : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
+
+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                           unsigned &SrcReg, unsigned &DstReg,
+                                           unsigned &SubIdx) const {
+// TODO: Implement this function
+  return false;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+
+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                                   int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+
+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                          const MachineMemOperand *&MMO,
+                                          int &FrameIndex) const {
+// TODO: Implement this function
+  return false;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
+                                                    int &FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
+                                           const MachineMemOperand *&MMO,
+                                           int &FrameIndex) const {
+// TODO: Implement this function
+  return false;
+}
+
+MachineInstr *
+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      LiveVariables *LV) const {
+// TODO: Implement this function
+  return NULL;
+}
+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
+                                        MachineBasicBlock &MBB) const {
+  while (iter != MBB.end()) {
+    switch (iter->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::BRANCH_COND_i32:
+    case AMDGPU::BRANCH_COND_f32:
+    case AMDGPU::BRANCH:
+      return true;
+    };
+    ++iter;
+  }
+  return false;
+}
+
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
+  MachineBasicBlock::iterator tmp = MBB->end();
+  if (!MBB->size()) {
+    return MBB->end();
+  }
+  while (--tmp) {
+    if (tmp->getOpcode() == AMDGPU::ENDLOOP
+        || tmp->getOpcode() == AMDGPU::ENDIF
+        || tmp->getOpcode() == AMDGPU::ELSE) {
+      if (tmp == MBB->begin()) {
+        return tmp;
+      } else {
+        continue;
+      }
+    }  else {
+      return ++tmp;
+    }
+  }
+  return MBB->end();
+}
+
+void
+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill,
+                                    int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+  assert(!"Not Implemented");
+}
+
+void
+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI,
+                                     unsigned DestReg, int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const {
+  assert(!"Not Implemented");
+}
+
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const {
+// TODO: Implement this function
+  return 0;
+}
+MachineInstr*
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) const {
+  // TODO: Implement this function
+  return 0;
+}
+bool
+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                     const SmallVectorImpl<unsigned> &Ops) const {
+  // TODO: Implement this function
+  return false;
+}
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                                 unsigned Reg, bool UnfoldLoad,
+                                 bool UnfoldStore,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool
+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                    SmallVectorImpl<SDNode*> &NewNodes) const {
+  // TODO: Implement this function
+  return false;
+}
+
+unsigned
+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                           bool UnfoldLoad, bool UnfoldStore,
+                                           unsigned *LoadRegIndex) const {
+  // TODO: Implement this function
+  return 0;
+}
+
+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                             int64_t Offset1, int64_t Offset2,
+                                             unsigned NumLoads) const {
+  assert(Offset2 > Offset1
+         && "Second offset should be larger than first offset!");
+  // If we have less than 16 loads in a row, and the offsets are within 16,
+  // then schedule together.
+  // TODO: Make the loads schedule near if it fits in a cacheline
+  return (NumLoads < 16 && (Offset2 - Offset1) < 16);
+}
+
+bool
+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+  const {
+  // TODO: Implement this function
+  return true;
+}
+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const {
+  // TODO: Implement this function
+}
+
+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
+  // TODO: Implement this function
+  return false;
+}
+bool
+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                                  const SmallVectorImpl<MachineOperand> &Pred2)
+  const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                      std::vector<MachineOperand> &Pred) const {
+  // TODO: Implement this function
+  return false;
+}
+
+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
+  // TODO: Implement this function
+  return MI->getDesc().isPredicable();
+}
+
+bool
+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+  // TODO: Implement this function
+  return true;
+}
+
+bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
+}
+
+bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
+}
+
+
+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+    DebugLoc DL) const {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const AMDGPURegisterInfo & RI = getRegisterInfo();
+
+  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+    MachineOperand &MO = MI.getOperand(i);
+    // Convert dst regclass to one that is supported by the ISA
+    if (MO.isReg() && MO.isDef()) {
+      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+        const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+        assert(newRegClass);
+
+        MRI.setRegClass(MO.getReg(), newRegClass);
+      }
+    }
+  }
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
new file mode 100644
index 000000000000..3909e4e105ee
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -0,0 +1,206 @@
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Contains the definition of a TargetInstrInfo class that is common
+/// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H
+#define AMDGPUINSTRUCTIONINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <map>
+
+#define GET_INSTRINFO_HEADER
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
+private:
+  const AMDGPURegisterInfo RI;
+  bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
+                          MachineBasicBlock &MBB) const;
+protected:
+  TargetMachine &TM;
+public:
+  explicit AMDGPUInstrInfo(TargetMachine &tm);
+
+  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+                             unsigned &DstReg, unsigned &SubIdx) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                     int &FrameIndex) const;
+  bool hasLoadFromStackSlot(const MachineInstr *MI,
+                            const MachineMemOperand *&MMO,
+                            int &FrameIndex) const;
+  unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  bool hasStoreFromStackSlot(const MachineInstr *MI,
+                             const MachineMemOperand *&MMO,
+                             int &FrameIndex) const;
+
+  MachineInstr *
+  convertToThreeAddress(MachineFunction::iterator &MFI,
+                        MachineBasicBlock::iterator &MBBI,
+                        LiveVariables *LV) const;
+
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const = 0;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+protected:
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const;
+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr *MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) const;
+public:
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                            const SmallVectorImpl<unsigned> &Ops) const;
+  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                           SmallVectorImpl<MachineInstr *> &NewMIs) const;
+  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                           SmallVectorImpl<SDNode *> &NewNodes) const;
+  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const;
+  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                               int64_t Offset1, int64_t Offset2,
+                               unsigned NumLoads) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const;
+  bool isPredicated(const MachineInstr *MI) const;
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+  bool DefinesPredicate(MachineInstr *MI,
+                        std::vector<MachineOperand> &Pred) const;
+  bool isPredicable(MachineInstr *MI) const;
+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+  // Helper functions that check the opcode for status information
+  bool isLoadInst(llvm::MachineInstr *MI) const;
+  bool isExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isStoreInst(llvm::MachineInstr *MI) const;
+  bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+  bool isRegisterStore(const MachineInstr &MI) const;
+  bool isRegisterLoad(const MachineInstr &MI) const;
+
+//===---------------------------------------------------------------------===//
+// Pure virtual funtions to be implemented by sub-classes.
+//===---------------------------------------------------------------------===//
+
+  virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+                                       int64_t Imm) const = 0;
+  virtual unsigned getIEQOpcode() const = 0;
+  virtual bool isMov(unsigned opcode) const = 0;
+
+  /// \returns the smallest register index that will be accessed by an indirect
+  /// read or write or -1 if indirect addressing is not used by this program.
+  virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
+
+  /// \returns the largest register index that will be accessed by an indirect
+  /// read or write or -1 if indirect addressing is not used by this program.
+  virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
+
+  /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
+  ///        \p Channel
+  ///
+  /// We model indirect addressing using a virtual address space that can be
+  /// accesed with loads and stores.  The "Indirect Address" is the memory
+  /// address in this virtual address space that maps to the given \p RegIndex
+  /// and \p Channel.
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const = 0;
+
+  /// \returns The register class to be used for storing values to an
+  /// "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+                                                  unsigned SourceReg) const = 0;
+
+  /// \returns The register class to be used for loading values from
+  /// an "Indirect Address" .
+  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+
+  /// \brief Build instruction(s) for an indirect register write.
+  ///
+  /// \returns The instruction that performs the indirect register write
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned ValueReg, unsigned Address,
+                                    unsigned OffsetReg) const = 0;
+
+  /// \brief Build instruction(s) for an indirect register read.
+  ///
+  /// \returns The instruction that performs the indirect register read
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned ValueReg, unsigned Address,
+                                    unsigned OffsetReg) const = 0;
+
+  /// \returns the register class whose sub registers are the set of all
+  /// possible registers that can be used for indirect addressing.
+  virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
+
+
+  /// \brief Convert the AMDIL MachineInstr to a supported ISA
+  /// MachineInstr
+  virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
+    DebugLoc DL) const;
+
+};
+
+} // End llvm namespace
+
+#define AMDGPU_FLAG_REGISTER_LOAD  (UINT64_C(1) << 63)
+#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
+
+#endif // AMDGPUINSTRINFO_H
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
new file mode 100644
index 000000000000..b66ae879dc20
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -0,0 +1,82 @@
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
+// This argument to this node is a dword address.
+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = max(a, b) a and b are floats
+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats
+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a snd b are signed ints
+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+
+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+
+def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
+                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                          [SDNPHasChain, SDNPMayLoad]>;
+
+def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
+                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                           [SDNPHasChain, SDNPMayStore]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
new file mode 100644
index 000000000000..e740348717c7
--- /dev/null
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -0,0 +1,266 @@
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+  field bit isRegisterLoad = 0;
+  field bit isRegisterStore = 0;
+
+  let Namespace = "AMDGPU";
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString = asm;
+  let Pattern = pattern;
+  let Itinerary = NullALU;
+
+  let TSFlags{63} = isRegisterLoad;
+  let TSFlags{62} = isRegisterStore;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+    : AMDGPUInst<outs, ins, asm, pattern> {
+
+  field bits<32> Inst = 0xffffffff;
+
+}
+
+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+
+def COND_EQ : PatLeaf <
+  (cond),
+  [{switch(N->get()){{default: return false;
+                     case ISD::SETOEQ: case ISD::SETUEQ:
+                     case ISD::SETEQ: return true;}}}]
+>;
+
+def COND_NE : PatLeaf <
+  (cond),
+  [{switch(N->get()){{default: return false;
+                     case ISD::SETONE: case ISD::SETUNE:
+                     case ISD::SETNE: return true;}}}]
+>;
+def COND_GT : PatLeaf <
+  (cond),
+  [{switch(N->get()){{default: return false;
+                     case ISD::SETOGT: case ISD::SETUGT:
+                     case ISD::SETGT: return true;}}}]
+>;
+
+def COND_GE : PatLeaf <
+  (cond),
+  [{switch(N->get()){{default: return false;
+                     case ISD::SETOGE: case ISD::SETUGE:
+                     case ISD::SETGE: return true;}}}]
+>;
+
+def COND_LT : PatLeaf <
+  (cond),
+  [{switch(N->get()){{default: return false;
+                     case ISD::SETOLT: case ISD::SETULT:
+                     case ISD::SETLT: return true;}}}]
+>;
+
+def COND_LE : PatLeaf <
+  (cond),
+  [{switch(N->get()){{default: return false;
+                     case ISD::SETOLE: case ISD::SETULE:
+                     case ISD::SETLE: return true;}}}]
+>;
+
+def COND_NULL : PatLeaf <
+  (cond),
+  [{return false;}]
+>;
+
+//===----------------------------------------------------------------------===//
+// Load/Store Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+  (fpimm),
+  [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+  (fpimm),
+  [{return N->isExactlyValue(1.0);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+let usesCustomInserter = 1  in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "CLAMP $dst, $src0",
+  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "FABS $dst, $src0",
+  [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+  (outs rc:$dst),
+  (ins rc:$src0),
+  "FNEG $dst, $src0",
+  [(set rc:$dst, (fneg rc:$src0))]
+>;
+
+} // usesCustomInserter = 1
+
+multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
+                    ComplexPattern addrPat> {
+  def RegisterLoad : AMDGPUShaderInst <
+    (outs dstClass:$dst),
+    (ins addrClass:$addr, i32imm:$chan),
+    "RegisterLoad $dst, $addr",
+    [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
+                                                    (i32 timm:$chan)))]
+  > {
+    let isRegisterLoad = 1;
+  }
+
+  def RegisterStore : AMDGPUShaderInst <
+    (outs),
+    (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
+    "RegisterStore $val, $addr",
+    [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+  > {
+    let isRegisterStore = 1;
+  }
+}
+
+} // End isCodeGenOnly = 1, isPseudo = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+                  RegisterClass rc> : Pat <
+  (fpow rc:$src0, rc:$src1),
+  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type,
+                     RegisterClass vec_class, int sub_idx, 
+                     SubRegIndex sub_reg>: Pat<
+  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
+  (EXTRACT_SUBREG vec_class:$src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+                      RegisterClass elem_class, RegisterClass vec_class,
+                      int sub_idx, SubRegIndex sub_reg> : Pat <
+
+  (vec_type (vector_insert (vec_type vec_class:$vec),
+                           (elem_type elem_class:$elem), sub_idx)),
+  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+>;
+
+// Vector Build pattern
+class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
+                     ValueType elemType, RegisterClass elemClass> : Pat <
+  (vecType (build_vector (elemType elemClass:$src))),
+  (vecType elemClass:$src)
+>;
+
+class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
+                     ValueType elemType, RegisterClass elemClass> : Pat <
+  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+  (INSERT_SUBREG (INSERT_SUBREG
+  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+>;
+
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+                     ValueType elemType, RegisterClass elemClass> : Pat <
+  (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
+                         (elemType elemClass:$z), (elemType elemClass:$w))),
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
+                            elemClass:$z, sub2), elemClass:$w, sub3)
+>;
+
+class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
+                     ValueType elemType, RegisterClass elemClass> : Pat <
+  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+                         (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+                         (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+                         (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+                            elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+                            elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+                            elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+>;
+
+class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
+                      ValueType elemType, RegisterClass elemClass> : Pat <
+  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+                         (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+                         (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+                         (elemType elemClass:$sub6), (elemType elemClass:$sub7),
+                         (elemType elemClass:$sub8), (elemType elemClass:$sub9),
+                         (elemType elemClass:$sub10), (elemType elemClass:$sub11),
+                         (elemType elemClass:$sub12), (elemType elemClass:$sub13),
+                         (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+                            elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+                            elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+                            elemClass:$sub6, sub6), elemClass:$sub7, sub7),
+                            elemClass:$sub8, sub8), elemClass:$sub9, sub9),
+                            elemClass:$sub10, sub10), elemClass:$sub11, sub11),
+                            elemClass:$sub12, sub12), elemClass:$sub13, sub13),
+                            elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+>;
+
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+  (dt (bitconvert (st rc:$src0))),
+  (dt rc:$src0)
+>;
+
+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
+  (vt (AMDGPUdwordaddr (vt rc:$addr))),
+  (vt rc:$addr)
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
new file mode 100644
index 000000000000..eecb25b04f79
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -0,0 +1,60 @@
+//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+  def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+  def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+  def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
+}
+
+include "SIIntrinsics.td"
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
new file mode 100644
index 000000000000..1dc1c657dfe5
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -0,0 +1,83 @@
+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "AMDGPUAsmPrinter.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
+  Ctx(ctx)
+{ }
+
+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_FPImmediate: {
+      const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
+      assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
+             "Only floating point immediates are supported at the moment.");
+      MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
+      break;
+    }
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_Register:
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                   MO.getMBB()->getSymbol(), Ctx));
+    }
+    OutMI.addOperand(MCOp);
+  }
+}
+
+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  AMDGPUMCInstLower MCInstLowering(OutContext);
+
+  if (MI->isBundle()) {
+    const MachineBasicBlock *MBB = MI->getParent();
+    MachineBasicBlock::const_instr_iterator I = MI;
+    ++I;
+    while (I != MBB->end() && I->isInsideBundle()) {
+      MCInst MCBundleInst;
+      const MachineInstr *BundledInst = I;
+      MCInstLowering.lower(BundledInst, MCBundleInst);
+      OutStreamer.EmitInstruction(MCBundleInst);
+      ++I;
+    }
+  } else {
+    MCInst TmpInst;
+    MCInstLowering.lower(MI, TmpInst);
+    OutStreamer.EmitInstruction(TmpInst);
+  }
+}
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
new file mode 100644
index 000000000000..d7d538e92599
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -0,0 +1,34 @@
+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_MCINSTLOWER_H
+#define AMDGPU_MCINSTLOWER_H
+
+namespace llvm {
+
+class MCInst;
+class MCContext;
+class MachineInstr;
+
+class AMDGPUMCInstLower {
+
+  MCContext &Ctx;
+
+public:
+  AMDGPUMCInstLower(MCContext &ctx);
+
+  /// \brief Lower a MachineInstr to an MCInst
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+};
+
+} // End namespace llvm
+
+#endif //AMDGPU_MCINSTLOWER_H
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
new file mode 100644
index 000000000000..0223ec8e4f3f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -0,0 +1,22 @@
+#include "AMDGPUMachineFunction.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
+
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
+    MachineFunctionInfo() {
+  AttributeSet Set = MF.getFunction()->getAttributes();
+  Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
+                                 ShaderTypeAttribute);
+
+  if (A.isStringAttribute()) {
+    StringRef Str = A.getValueAsString();
+    if (Str.getAsInteger(0, ShaderType))
+      llvm_unreachable("Can't parse shader type!");
+  }
+}
+
+}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
new file mode 100644
index 000000000000..21c8c51dae45
--- /dev/null
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -0,0 +1,29 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMACHINEFUNCTION_H
+#define AMDGPUMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class AMDGPUMachineFunction : public MachineFunctionInfo {
+private:
+  static const char *ShaderTypeAttribute;
+public:
+  AMDGPUMachineFunction(const MachineFunction &MF);
+  unsigned ShaderType;
+};
+
+}
+#endif // AMDGPUMACHINEFUNCTION_H
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
new file mode 100644
index 000000000000..fe994d2d05a1
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -0,0 +1,75 @@
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
+    const TargetInstrInfo &tii)
+: AMDGPUGenRegisterInfo(0),
+  TM(tm),
+  TII(tii)
+  { }
+
+//===----------------------------------------------------------------------===//
+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
+// they are not supported at this time.
+//===----------------------------------------------------------------------===//
+
+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+
+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  return &CalleeSavedReg;
+}
+
+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                             int SPAdj,
+                                             unsigned FIOperandNum,
+                                             RegScavenger *RS) const {
+  assert(!"Subroutines not supported yet");
+}
+
+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  assert(!"Subroutines not supported yet");
+  return 0;
+}
+
+unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
+
+  switch(IndirectIndex) {
+  case 0: return AMDGPU::sub0;
+  case 1: return AMDGPU::sub1;
+  case 2: return AMDGPU::sub2;
+  case 3: return AMDGPU::sub3;
+  case 4: return AMDGPU::sub4;
+  case 5: return AMDGPU::sub5;
+  case 6: return AMDGPU::sub6;
+  case 7: return AMDGPU::sub7;
+  case 8: return AMDGPU::sub8;
+  case 9: return AMDGPU::sub9;
+  case 10: return AMDGPU::sub10;
+  case 11: return AMDGPU::sub11;
+  case 12: return AMDGPU::sub12;
+  case 13: return AMDGPU::sub13;
+  case 14: return AMDGPU::sub14;
+  case 15: return AMDGPU::sub15;
+  default: llvm_unreachable("indirect index out of range");
+  }
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
new file mode 100644
index 000000000000..1fc88e7455b9
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -0,0 +1,66 @@
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
+/// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H
+#define AMDGPUREGISTERINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
+  TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  static const uint16_t CalleeSavedReg;
+
+  AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
+
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+    assert(!"Unimplemented");  return BitVector();
+  }
+
+  /// \param RC is an AMDIL reg class.
+  ///
+  /// \returns The ISA reg class that is equivalent to \p RC.
+  virtual const TargetRegisterClass * getISARegClass(
+                                         const TargetRegisterClass * RC) const {
+    assert(!"Unimplemented"); return NULL;
+  }
+
+  virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
+    assert(!"Unimplemented"); return NULL;
+  }
+
+  const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS) const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  unsigned getIndirectSubReg(unsigned IndirectIndex) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H
diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td
new file mode 100644
index 000000000000..b5aca0347fb0
--- /dev/null
+++ b/lib/Target/R600/AMDGPURegisterInfo.td
@@ -0,0 +1,25 @@
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+
+foreach Index = 0-15 in {
+  def sub#Index : SubRegIndex;
+}
+
+def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
+
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
new file mode 100644
index 000000000000..dea43b874c6f
--- /dev/null
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -0,0 +1,896 @@
+//===-- AMDGPUStructurizeCFG.cpp -  ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The pass implemented in this file transforms the programs control flow
+/// graph into a form that's suitable for code generation on hardware that
+/// implements control flow by execution masking. This currently includes all
+/// AMD GPUs but may as well be useful for other types of hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/PatternMatch.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace {
+
+// Definition of the complex types used in this pass.
+
+typedef std::pair<BasicBlock *, Value *> BBValuePair;
+
+typedef SmallVector<RegionNode*, 8> RNVector;
+typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BranchInst*, 8> BranchVector;
+typedef SmallVector<BBValuePair, 2> BBValueVector;
+
+typedef SmallPtrSet<BasicBlock *, 8> BBSet;
+
+typedef MapVector<PHINode *, BBValueVector> PhiMap;
+typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
+
+typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
+typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
+
+// The name for newly created blocks.
+
+static const char *FlowBlockName = "Flow";
+
+/// @brief Find the nearest common dominator for multiple BasicBlocks
+///
+/// Helper class for AMDGPUStructurizeCFG
+/// TODO: Maybe move into common code
+class NearestCommonDominator {
+
+  DominatorTree *DT;
+
+  DTN2UnsignedMap IndexMap;
+
+  BasicBlock *Result;
+  unsigned ResultIndex;
+  bool ExplicitMentioned;
+
+public:
+  /// \brief Start a new query
+  NearestCommonDominator(DominatorTree *DomTree) {
+    DT = DomTree;
+    Result = 0;
+  }
+
+  /// \brief Add BB to the resulting dominator
+  void addBlock(BasicBlock *BB, bool Remember = true) {
+
+    DomTreeNode *Node = DT->getNode(BB);
+
+    if (Result == 0) {
+      unsigned Numbering = 0;
+      for (;Node;Node = Node->getIDom())
+        IndexMap[Node] = ++Numbering;
+      Result = BB;
+      ResultIndex = 1;
+      ExplicitMentioned = Remember;
+      return;
+    }
+
+    for (;Node;Node = Node->getIDom())
+      if (IndexMap.count(Node))
+        break;
+      else
+        IndexMap[Node] = 0;
+
+    assert(Node && "Dominator tree invalid!");
+
+    unsigned Numbering = IndexMap[Node];
+    if (Numbering > ResultIndex) {
+      Result = Node->getBlock();
+      ResultIndex = Numbering;
+      ExplicitMentioned = Remember && (Result == BB);
+    } else if (Numbering == ResultIndex) {
+      ExplicitMentioned |= Remember;
+    }
+  }
+
+  /// \brief Is "Result" one of the BBs added with "Remember" = True?
+  bool wasResultExplicitMentioned() {
+    return ExplicitMentioned;
+  }
+
+  /// \brief Get the query result
+  BasicBlock *getResult() {
+    return Result;
+  }
+};
+
+/// @brief Transforms the control flow graph on one single entry/exit region
+/// at a time.
+///
+/// After the transform all "If"/"Then"/"Else" style control flow looks like
+/// this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 |
+/// | /
+/// |/   
+/// 3
+/// ||   Where:
+/// | |  1 = "If" block, calculates the condition
+/// 4 |  2 = "Then" subregion, runs if the condition is true
+/// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
+/// |/   4 = "Else" optional subregion, runs if the condition is false
+/// 5    5 = "End" block, also rejoins the control flow
+/// \endverbatim
+///
+/// Control flow is expressed as a branch where the true exit goes into the
+/// "Then"/"Else" region, while the false exit skips the region
+/// The condition for the optional "Else" region is expressed as a PHI node.
+/// The incomming values of the PHI node are true for the "If" edge and false
+/// for the "Then" edge.
+///
+/// Additionally to that even complicated loops look like this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 ^  Where:
+/// | /  1 = "Entry" block
+/// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
+/// 3    3 = "Flow" block, with back edge to entry block
+/// |
+/// \endverbatim
+///
+/// The back edge of the "Flow" block is always on the false side of the branch
+/// while the true side continues the general flow. So the loop condition
+/// consist of a network of PHI nodes where the true incoming values expresses
+/// breaks and the false values expresses continue states.
+class AMDGPUStructurizeCFG : public RegionPass {
+
+  static char ID;
+
+  Type *Boolean;
+  ConstantInt *BoolTrue;
+  ConstantInt *BoolFalse;
+  UndefValue *BoolUndef;
+
+  Function *Func;
+  Region *ParentRegion;
+
+  DominatorTree *DT;
+
+  RNVector Order;
+  BBSet Visited;
+
+  BBPhiMap DeletedPhis;
+  BB2BBVecMap AddedPhis;
+
+  PredMap Predicates;
+  BranchVector Conditions;
+
+  BB2BBMap Loops;
+  PredMap LoopPreds;
+  BranchVector LoopConds;
+
+  RegionNode *PrevNode;
+
+  void orderNodes();
+
+  void analyzeLoops(RegionNode *N);
+
+  Value *invert(Value *Condition);
+
+  Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
+
+  void gatherPredicates(RegionNode *N);
+
+  void collectInfos();
+
+  void insertConditions(bool Loops);
+
+  void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+  void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+  void setPhiValues();
+
+  void killTerminator(BasicBlock *BB);
+
+  void changeExit(RegionNode *Node, BasicBlock *NewExit,
+                  bool IncludeDominator);
+
+  BasicBlock *getNextFlow(BasicBlock *Dominator);
+
+  BasicBlock *needPrefix(bool NeedEmpty);
+
+  BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
+
+  void setPrevNode(BasicBlock *BB);
+
+  bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
+
+  bool isPredictableTrue(RegionNode *Node);
+
+  void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+  void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+  void createFlow();
+
+  void rebuildSSA();
+
+public:
+  AMDGPUStructurizeCFG():
+    RegionPass(ID) {
+
+    initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+  }
+
+  using Pass::doInitialization;
+  virtual bool doInitialization(Region *R, RGPassManager &RGM);
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+
+  virtual const char *getPassName() const {
+    return "AMDGPU simplify control flow";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+
+    AU.addRequired<DominatorTree>();
+    AU.addPreserved<DominatorTree>();
+    RegionPass::getAnalysisUsage(AU);
+  }
+
+};
+
+} // end anonymous namespace
+
+char AMDGPUStructurizeCFG::ID = 0;
+
+/// \brief Initialize the types and constants used in the pass
+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
+  LLVMContext &Context = R->getEntry()->getContext();
+
+  Boolean = Type::getInt1Ty(Context);
+  BoolTrue = ConstantInt::getTrue(Context);
+  BoolFalse = ConstantInt::getFalse(Context);
+  BoolUndef = UndefValue::get(Boolean);
+
+  return false;
+}
+
+/// \brief Build up the general order of nodes
+void AMDGPUStructurizeCFG::orderNodes() {
+  scc_iterator<Region *> I = scc_begin(ParentRegion),
+                         E = scc_end(ParentRegion);
+  for (Order.clear(); I != E; ++I) {
+    std::vector<RegionNode *> &Nodes = *I;
+    Order.append(Nodes.begin(), Nodes.end());
+  }
+}
+
+/// \brief Determine the end of the loops
+void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) {
+
+  if (N->isSubRegion()) {
+    // Test for exit as back edge
+    BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
+    if (Visited.count(Exit))
+      Loops[Exit] = N->getEntry();
+
+  } else {
+    // Test for sucessors as back edge
+    BasicBlock *BB = N->getNodeAs<BasicBlock>();
+    BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+      BasicBlock *Succ = Term->getSuccessor(i);
+
+      if (Visited.count(Succ))
+        Loops[Succ] = BB;
+    }
+  }
+}
+
+/// \brief Invert the given condition
+Value *AMDGPUStructurizeCFG::invert(Value *Condition) {
+
+  // First: Check if it's a constant
+  if (Condition == BoolTrue)
+    return BoolFalse;
+
+  if (Condition == BoolFalse)
+    return BoolTrue;
+
+  if (Condition == BoolUndef)
+    return BoolUndef;
+
+  // Second: If the condition is already inverted, return the original value
+  if (match(Condition, m_Not(m_Value(Condition))))
+    return Condition;
+
+  // Third: Check all the users for an invert
+  BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
+  for (Value::use_iterator I = Condition->use_begin(),
+       E = Condition->use_end(); I != E; ++I) {
+
+    Instruction *User = dyn_cast<Instruction>(*I);
+    if (!User || User->getParent() != Parent)
+      continue;
+
+    if (match(*I, m_Not(m_Specific(Condition))))
+      return *I;
+  }
+
+  // Last option: Create a new instruction
+  return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+}
+
+/// \brief Build the condition for one edge
+Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+                                            bool Invert) {
+  Value *Cond = Invert ? BoolFalse : BoolTrue;
+  if (Term->isConditional()) {
+    Cond = Term->getCondition();
+
+    if (Idx != Invert)
+      Cond = invert(Cond);
+  }
+  return Cond;
+}
+
+/// \brief Analyze the predecessors of each block and build up predicates
+void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) {
+
+  RegionInfo *RI = ParentRegion->getRegionInfo();
+  BasicBlock *BB = N->getEntry();
+  BBPredicates &Pred = Predicates[BB];
+  BBPredicates &LPred = LoopPreds[BB];
+
+  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+       PI != PE; ++PI) {
+
+    // Ignore it if it's a branch from outside into our region entry
+    if (!ParentRegion->contains(*PI))
+      continue;
+
+    Region *R = RI->getRegionFor(*PI);
+    if (R == ParentRegion) {
+
+      // It's a top level block in our region
+      BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *Succ = Term->getSuccessor(i);
+        if (Succ != BB)
+          continue;
+
+        if (Visited.count(*PI)) {
+          // Normal forward edge
+          if (Term->isConditional()) {
+            // Try to treat it like an ELSE block
+            BasicBlock *Other = Term->getSuccessor(!i);
+            if (Visited.count(Other) && !Loops.count(Other) &&
+                !Pred.count(Other) && !Pred.count(*PI)) {
+
+              Pred[Other] = BoolFalse;
+              Pred[*PI] = BoolTrue;
+              continue;
+            }
+          }
+          Pred[*PI] = buildCondition(Term, i, false);
+ 
+        } else {
+          // Back edge
+          LPred[*PI] = buildCondition(Term, i, true);
+        }
+      }
+
+    } else {
+
+      // It's an exit from a sub region
+      while(R->getParent() != ParentRegion)
+        R = R->getParent();
+
+      // Edge from inside a subregion to its entry, ignore it
+      if (R == N)
+        continue;
+
+      BasicBlock *Entry = R->getEntry();
+      if (Visited.count(Entry))
+        Pred[Entry] = BoolTrue;
+      else
+        LPred[Entry] = BoolFalse;
+    }
+  }
+}
+
+/// \brief Collect various loop and predicate infos
+void AMDGPUStructurizeCFG::collectInfos() {
+
+  // Reset predicate
+  Predicates.clear();
+
+  // and loop infos
+  Loops.clear();
+  LoopPreds.clear();
+
+  // Reset the visited nodes
+  Visited.clear();
+
+  for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+       OI != OE; ++OI) {
+
+    // Analyze all the conditions leading to a node
+    gatherPredicates(*OI);
+
+    // Remember that we've seen this node
+    Visited.insert((*OI)->getEntry());
+
+    // Find the last back edges
+    analyzeLoops(*OI);
+  }
+}
+
+/// \brief Insert the missing branch conditions
+void AMDGPUStructurizeCFG::insertConditions(bool Loops) {
+  BranchVector &Conds = Loops ? LoopConds : Conditions;
+  Value *Default = Loops ? BoolTrue : BoolFalse;
+  SSAUpdater PhiInserter;
+
+  for (BranchVector::iterator I = Conds.begin(),
+       E = Conds.end(); I != E; ++I) {
+
+    BranchInst *Term = *I;
+    assert(Term->isConditional());
+
+    BasicBlock *Parent = Term->getParent();
+    BasicBlock *SuccTrue = Term->getSuccessor(0);
+    BasicBlock *SuccFalse = Term->getSuccessor(1);
+
+    PhiInserter.Initialize(Boolean, "");
+    PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
+    PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+
+    BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
+
+    NearestCommonDominator Dominator(DT);
+    Dominator.addBlock(Parent, false);
+
+    Value *ParentValue = 0;
+    for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+         PI != PE; ++PI) {
+
+      if (PI->first == Parent) {
+        ParentValue = PI->second;
+        break;
+      }
+      PhiInserter.AddAvailableValue(PI->first, PI->second);
+      Dominator.addBlock(PI->first);
+    }
+
+    if (ParentValue) {
+      Term->setCondition(ParentValue);
+    } else {
+      if (!Dominator.wasResultExplicitMentioned())
+        PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
+
+      Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+    }
+  }
+}
+
+/// \brief Remove all PHI values coming from "From" into "To" and remember
+/// them in DeletedPhis
+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
+  PhiMap &Map = DeletedPhis[To];
+  for (BasicBlock::iterator I = To->begin(), E = To->end();
+       I != E && isa<PHINode>(*I);) {
+
+    PHINode &Phi = cast<PHINode>(*I++);
+    while (Phi.getBasicBlockIndex(From) != -1) {
+      Value *Deleted = Phi.removeIncomingValue(From, false);
+      Map[&Phi].push_back(std::make_pair(From, Deleted));
+    }
+  }
+}
+
+/// \brief Add a dummy PHI value as soon as we knew the new predecessor
+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
+  for (BasicBlock::iterator I = To->begin(), E = To->end();
+       I != E && isa<PHINode>(*I);) {
+
+    PHINode &Phi = cast<PHINode>(*I++);
+    Value *Undef = UndefValue::get(Phi.getType());
+    Phi.addIncoming(Undef, From);
+  }
+  AddedPhis[To].push_back(From);
+}
+
+/// \brief Add the real PHI value as soon as everything is set up
+void AMDGPUStructurizeCFG::setPhiValues() {
+
+  SSAUpdater Updater;
+  for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
+       AI != AE; ++AI) {
+
+    BasicBlock *To = AI->first;
+    BBVector &From = AI->second;
+
+    if (!DeletedPhis.count(To))
+      continue;
+
+    PhiMap &Map = DeletedPhis[To];
+    for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
+         PI != PE; ++PI) {
+
+      PHINode *Phi = PI->first;
+      Value *Undef = UndefValue::get(Phi->getType());
+      Updater.Initialize(Phi->getType(), "");
+      Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+      Updater.AddAvailableValue(To, Undef);
+
+      NearestCommonDominator Dominator(DT);
+      Dominator.addBlock(To, false);
+      for (BBValueVector::iterator VI = PI->second.begin(),
+           VE = PI->second.end(); VI != VE; ++VI) {
+
+        Updater.AddAvailableValue(VI->first, VI->second);
+        Dominator.addBlock(VI->first);
+      }
+
+      if (!Dominator.wasResultExplicitMentioned())
+        Updater.AddAvailableValue(Dominator.getResult(), Undef);
+
+      for (BBVector::iterator FI = From.begin(), FE = From.end();
+           FI != FE; ++FI) {
+
+        int Idx = Phi->getBasicBlockIndex(*FI);
+        assert(Idx != -1);
+        Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
+      }
+    }
+
+    DeletedPhis.erase(To);
+  }
+  assert(DeletedPhis.empty());
+}
+
+/// \brief Remove phi values from all successors and then remove the terminator.
+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
+  TerminatorInst *Term = BB->getTerminator();
+  if (!Term)
+    return;
+
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+       SI != SE; ++SI) {
+
+    delPhiValues(BB, *SI);
+  }
+
+  Term->eraseFromParent();
+}
+
+/// \brief Let node exit(s) point to NewExit
+void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
+                                      bool IncludeDominator) {
+
+  if (Node->isSubRegion()) {
+    Region *SubRegion = Node->getNodeAs<Region>();
+    BasicBlock *OldExit = SubRegion->getExit();
+    BasicBlock *Dominator = 0;
+
+    // Find all the edges from the sub region to the exit
+    for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
+         I != E;) {
+
+      BasicBlock *BB = *I++;
+      if (!SubRegion->contains(BB))
+        continue;
+
+      // Modify the edges to point to the new exit
+      delPhiValues(BB, OldExit);
+      BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
+      addPhiValues(BB, NewExit);
+
+      // Find the new dominator (if requested)
+      if (IncludeDominator) {
+        if (!Dominator)
+          Dominator = BB;
+        else
+          Dominator = DT->findNearestCommonDominator(Dominator, BB);
+      }
+    }
+
+    // Change the dominator (if requested)
+    if (Dominator)
+      DT->changeImmediateDominator(NewExit, Dominator);
+
+    // Update the region info
+    SubRegion->replaceExit(NewExit);
+
+  } else {
+    BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+    killTerminator(BB);
+    BranchInst::Create(NewExit, BB);
+    addPhiValues(BB, NewExit);
+    if (IncludeDominator)
+      DT->changeImmediateDominator(NewExit, BB);
+  }
+}
+
+/// \brief Create a new flow node and update dominator tree and region info
+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) {
+  LLVMContext &Context = Func->getContext();
+  BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
+                       Order.back()->getEntry();
+  BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
+                                        Func, Insert);
+  DT->addNewBlock(Flow, Dominator);
+  ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
+  return Flow;
+}
+
+/// \brief Create a new or reuse the previous node as flow node
+BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) {
+
+  BasicBlock *Entry = PrevNode->getEntry();
+
+  if (!PrevNode->isSubRegion()) {
+    killTerminator(Entry);
+    if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
+      return Entry;
+
+  } 
+
+  // create a new flow node
+  BasicBlock *Flow = getNextFlow(Entry);
+
+  // and wire it up
+  changeExit(PrevNode, Flow, true);
+  PrevNode = ParentRegion->getBBNode(Flow);
+  return Flow;
+}
+
+/// \brief Returns the region exit if possible, otherwise just a new flow node
+BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow,
+                                              bool ExitUseAllowed) {
+
+  if (Order.empty() && ExitUseAllowed) {
+    BasicBlock *Exit = ParentRegion->getExit();
+    DT->changeImmediateDominator(Exit, Flow);
+    addPhiValues(Flow, Exit);
+    return Exit;
+  }
+  return getNextFlow(Flow);
+}
+
+/// \brief Set the previous node
+void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) {
+  PrevNode =  ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+}
+
+/// \brief Does BB dominate all the predicates of Node ?
+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
+  BBPredicates &Preds = Predicates[Node->getEntry()];
+  for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+       PI != PE; ++PI) {
+
+    if (!DT->dominates(BB, PI->first))
+      return false;
+  }
+  return true;
+}
+
+/// \brief Can we predict that this node will always be called?
+bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) {
+
+  BBPredicates &Preds = Predicates[Node->getEntry()];
+  bool Dominated = false;
+
+  // Regionentry is always true
+  if (PrevNode == 0)
+    return true;
+
+  for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
+       I != E; ++I) {
+
+    if (I->second != BoolTrue)
+      return false;
+
+    if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
+      Dominated = true;
+  }
+
+  // TODO: The dominator check is too strict
+  return Dominated;
+}
+
+/// Take one node from the order vector and wire it up
+void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed,
+                                    BasicBlock *LoopEnd) {
+
+  RegionNode *Node = Order.pop_back_val();
+  Visited.insert(Node->getEntry());
+
+  if (isPredictableTrue(Node)) {
+    // Just a linear flow
+    if (PrevNode) {
+      changeExit(PrevNode, Node->getEntry(), true);
+    }
+    PrevNode = Node;
+
+  } else {
+    // Insert extra prefix node (or reuse last one)
+    BasicBlock *Flow = needPrefix(false);
+
+    // Insert extra postfix node (or use exit instead)
+    BasicBlock *Entry = Node->getEntry();
+    BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
+
+    // let it point to entry and next block
+    Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
+    addPhiValues(Flow, Entry);
+    DT->changeImmediateDominator(Entry, Flow);
+
+    PrevNode = Node;
+    while (!Order.empty() && !Visited.count(LoopEnd) &&
+           dominatesPredicates(Entry, Order.back())) {
+      handleLoops(false, LoopEnd);
+    }
+
+    changeExit(PrevNode, Next, false);
+    setPrevNode(Next);
+  }
+}
+
+void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed,
+                                       BasicBlock *LoopEnd) {
+  RegionNode *Node = Order.back();
+  BasicBlock *LoopStart = Node->getEntry();
+
+  if (!Loops.count(LoopStart)) {
+    wireFlow(ExitUseAllowed, LoopEnd);
+    return;
+  }
+
+  if (!isPredictableTrue(Node))
+    LoopStart = needPrefix(true);
+
+  LoopEnd = Loops[Node->getEntry()];
+  wireFlow(false, LoopEnd);
+  while (!Visited.count(LoopEnd)) {
+    handleLoops(false, LoopEnd);
+  }
+
+  // Create an extra loop end node
+  LoopEnd = needPrefix(false);
+  BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
+  LoopConds.push_back(BranchInst::Create(Next, LoopStart,
+                                         BoolUndef, LoopEnd));
+  addPhiValues(LoopEnd, LoopStart);
+  setPrevNode(Next);
+}
+
+/// After this function control flow looks like it should be, but
+/// branches and PHI nodes only have undefined conditions.
+void AMDGPUStructurizeCFG::createFlow() {
+
+  BasicBlock *Exit = ParentRegion->getExit();
+  bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
+
+  DeletedPhis.clear();
+  AddedPhis.clear();
+  Conditions.clear();
+  LoopConds.clear();
+
+  PrevNode = 0;
+  Visited.clear();
+
+  while (!Order.empty()) {
+    handleLoops(EntryDominatesExit, 0);
+  }
+
+  if (PrevNode)
+    changeExit(PrevNode, Exit, EntryDominatesExit);
+  else
+    assert(EntryDominatesExit);
+}
+
+/// Handle a rare case where the disintegrated nodes instructions
+/// no longer dominate all their uses. Not sure if this is really nessasary
+void AMDGPUStructurizeCFG::rebuildSSA() {
+  SSAUpdater Updater;
+  for (Region::block_iterator I = ParentRegion->block_begin(),
+                              E = ParentRegion->block_end();
+       I != E; ++I) {
+
+    BasicBlock *BB = *I;
+    for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+         II != IE; ++II) {
+
+      bool Initialized = false;
+      for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
+
+        Next = I->getNext();
+
+        Instruction *User = cast<Instruction>(I->getUser());
+        if (User->getParent() == BB) {
+          continue;
+
+        } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+          if (UserPN->getIncomingBlock(*I) == BB)
+            continue;
+        }
+
+        if (DT->dominates(II, User))
+          continue;
+
+        if (!Initialized) {
+          Value *Undef = UndefValue::get(II->getType());
+          Updater.Initialize(II->getType(), "");
+          Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+          Updater.AddAvailableValue(BB, II);
+          Initialized = true;
+        }
+        Updater.RewriteUseAfterInsertions(*I);
+      }
+    }
+  }
+}
+
+/// \brief Run the transformation for each region found
+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+  if (R->isTopLevelRegion())
+    return false;
+
+  Func = R->getEntry()->getParent();
+  ParentRegion = R;
+
+  DT = &getAnalysis<DominatorTree>();
+
+  orderNodes();
+  collectInfos();
+  createFlow();
+  insertConditions(false);
+  insertConditions(true);
+  setPhiValues();
+  rebuildSSA();
+
+  // Cleanup
+  Order.clear();
+  Visited.clear();
+  DeletedPhis.clear();
+  AddedPhis.clear();
+  Predicates.clear();
+  Conditions.clear();
+  Loops.clear();
+  LoopPreds.clear();
+  LoopConds.clear();
+
+  return true;
+}
+
+/// \brief Create the pass
+Pass *llvm::createAMDGPUStructurizeCFGPass() {
+  return new AMDGPUStructurizeCFG();
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
new file mode 100644
index 000000000000..0f356a1c3f11
--- /dev/null
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AMDGPUGenSubtargetInfo.inc"
+
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+  AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
+    InstrItins = getInstrItineraryForCPU(CPU);
+
+  memset(CapsOverride, 0, sizeof(*CapsOverride)
+      * AMDGPUDeviceInfo::MaxNumberCapabilities);
+  // Default card
+  StringRef GPU = CPU;
+  Is64bit = false;
+  DefaultSize[0] = 64;
+  DefaultSize[1] = 1;
+  DefaultSize[2] = 1;
+  ParseSubtargetFeatures(GPU, FS);
+  DevName = GPU;
+  Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
+}
+
+AMDGPUSubtarget::~AMDGPUSubtarget() {
+  delete Device;
+}
+
+bool
+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
+  assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
+      "Caps index is out of bounds!");
+  return CapsOverride[caps];
+}
+bool
+AMDGPUSubtarget::is64bit() const  {
+  return Is64bit;
+}
+bool
+AMDGPUSubtarget::isTargetELF() const {
+  return false;
+}
+size_t
+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
+  if (dim > 3) {
+    return 1;
+  } else {
+    return DefaultSize[dim];
+  }
+}
+
+std::string
+AMDGPUSubtarget::getDataLayout() const {
+    if (!Device) {
+        return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+                "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+                "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+                "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+                "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
+    }
+    return Device->getDataLayout();
+}
+
+std::string
+AMDGPUSubtarget::getDeviceName() const {
+  return DevName;
+}
+const AMDGPUDevice *
+AMDGPUSubtarget::device() const {
+  return Device;
+}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
new file mode 100644
index 000000000000..1973fc6d544c
--- /dev/null
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -0,0 +1,65 @@
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUSUBTARGET_H
+#define AMDGPUSUBTARGET_H
+#include "AMDILDevice.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define MAX_CB_SIZE (1 << 16)
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+private:
+  bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
+  const AMDGPUDevice *Device;
+  size_t DefaultSize[3];
+  std::string DevName;
+  bool Is64bit;
+  bool Is32on64bit;
+  bool DumpCode;
+  bool R600ALUInst;
+
+  InstrItineraryData InstrItins;
+
+public:
+  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+  virtual ~AMDGPUSubtarget();
+
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+  virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool isOverride(AMDGPUDeviceInfo::Caps) const;
+  bool is64bit() const;
+
+  // Helper functions to simplify if statements
+  bool isTargetELF() const;
+  const AMDGPUDevice* device() const;
+  std::string getDataLayout() const;
+  std::string getDeviceName() const;
+  virtual size_t getDefaultSize(uint32_t dim) const;
+  bool dumpCode() const { return DumpCode; }
+  bool r600ALUEncoding() const { return R600ALUInst; }
+
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
new file mode 100644
index 000000000000..e7ea876e2abb
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -0,0 +1,164 @@
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU target machine contains all of the hardware specific
+/// information  needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "R600MachineScheduler.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include <llvm/CodeGen/Passes.h>
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeR600Target() {
+  // Register the target
+  RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+}
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new R600SchedStrategy());
+}
+
+static MachineSchedRegistry
+SchedCustomRegistry("r600", "Run R600's custom scheduler",
+                    createR600MachineScheduler);
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+    StringRef CPU, StringRef FS,
+  TargetOptions Options,
+  Reloc::Model RM, CodeModel::Model CM,
+  CodeGenOpt::Level OptLevel
+)
+:
+  LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+  Subtarget(TT, CPU, FS),
+  Layout(Subtarget.getDataLayout()),
+  FrameLowering(TargetFrameLowering::StackGrowsUp,
+      Subtarget.device()->getStackAlignment(), 0),
+  IntrinsicInfo(this),
+  InstrItins(&Subtarget.getInstrItineraryData()) {
+  // TLInfo uses InstrInfo so it must be initialized after.
+  if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    InstrInfo = new R600InstrInfo(*this);
+    TLInfo = new R600TargetLowering(*this);
+  } else {
+    InstrInfo = new SIInstrInfo(*this);
+    TLInfo = new SITargetLowering(*this);
+  }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+  AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {
+    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+      enablePass(&MachineSchedulerID);
+      MachineSchedRegistry::setDefault(createR600MachineScheduler);
+    }
+  }
+
+  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+    return getTM<AMDGPUTargetMachine>();
+  }
+
+  virtual bool addPreISel();
+  virtual bool addInstSelector();
+  virtual bool addPreRegAlloc();
+  virtual bool addPostRegAlloc();
+  virtual bool addPreSched2();
+  virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel() {
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    addPass(createAMDGPUStructurizeCFGPass());
+    addPass(createSIAnnotateControlFlowPass());
+  }
+  return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+  addPass(createAMDGPUPeepholeOpt(*TM));
+  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    // This callbacks this pass uses are not implemented yet on SI.
+    addPass(createAMDGPUIndirectAddressingPass(*TM));
+  }
+  return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+  addPass(createAMDGPUConvertToISAPass(*TM));
+  return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
+  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    addPass(createSIInsertWaits(*TM));
+  }
+  return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+
+  addPass(&IfConverterID);
+  return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    addPass(createAMDGPUCFGPreparationPass(*TM));
+    addPass(createAMDGPUCFGStructurizerPass(*TM));
+    addPass(createR600EmitClauseMarkers(*TM));
+    addPass(createR600ExpandSpecialInstrsPass(*TM));
+    addPass(createR600ControlFlowFinalizer(*TM));
+    addPass(&FinalizeMachineBundlesID);
+  } else {
+    addPass(createSILowerControlFlowPass(*TM));
+  }
+
+  return false;
+}
+
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
new file mode 100644
index 000000000000..2afe7873a90c
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -0,0 +1,70 @@
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILIntrinsicInfo.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DataLayout.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+
+  AMDGPUSubtarget Subtarget;
+  const DataLayout Layout;
+  AMDGPUFrameLowering FrameLowering;
+  AMDGPUIntrinsicInfo IntrinsicInfo;
+  const AMDGPUInstrInfo * InstrInfo;
+  AMDGPUTargetLowering * TLInfo;
+  const InstrItineraryData* InstrItins;
+
+public:
+   AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+                       StringRef CPU,
+                       TargetOptions Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+   ~AMDGPUTargetMachine();
+   virtual const AMDGPUFrameLowering* getFrameLowering() const {
+     return &FrameLowering;
+   }
+   virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
+     return &IntrinsicInfo;
+   }
+   virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+   virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
+   virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+      return &InstrInfo->getRegisterInfo();
+   }
+   virtual AMDGPUTargetLowering * getTargetLowering() const {
+      return TLInfo;
+   }
+   virtual const InstrItineraryData* getInstrItineraryData() const {
+      return InstrItins;
+   }
+   virtual const DataLayout* getDataLayout() const { return &Layout; }
+   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPU_TARGET_MACHINE_H
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
new file mode 100644
index 000000000000..39ab664d1018
--- /dev/null
+++ b/lib/Target/R600/AMDIL.h
@@ -0,0 +1,121 @@
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// This file contains the entry points for global functions defined in the LLVM
+/// AMDGPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H
+#define AMDIL_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID     1
+#define DEFAULT_GDS_ID     1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS  8
+
+#define OCL_DEVICE_RV710        0x0001
+#define OCL_DEVICE_RV730        0x0002
+#define OCL_DEVICE_RV770        0x0004
+#define OCL_DEVICE_CEDAR        0x0008
+#define OCL_DEVICE_REDWOOD      0x0010
+#define OCL_DEVICE_JUNIPER      0x0020
+#define OCL_DEVICE_CYPRESS      0x0040
+#define OCL_DEVICE_CAICOS       0x0080
+#define OCL_DEVICE_TURKS        0x0100
+#define OCL_DEVICE_BARTS        0x0200
+#define OCL_DEVICE_CAYMAN       0x0400
+#define OCL_DEVICE_ALL          0x3FFF
+
+/// The number of function ID's that are reserved for 
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+namespace llvm {
+class AMDGPUInstrPrinter;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+// Instruction selection passes.
+FunctionPass*
+  createAMDGPUISelDag(TargetMachine &TM);
+FunctionPass*
+  createAMDGPUPeepholeOpt(TargetMachine &TM);
+
+// Pre emit passes.
+FunctionPass*
+  createAMDGPUCFGPreparationPass(TargetMachine &TM);
+FunctionPass*
+  createAMDGPUCFGStructurizerPass(TargetMachine &TM);
+
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDGPUAS {
+enum AddressSpaces {
+  PRIVATE_ADDRESS  = 0, ///< Address space for private memory.
+  GLOBAL_ADDRESS   = 1, ///< Address space for global memory (RAT0, VTX0).
+  CONSTANT_ADDRESS = 2, ///< Address space for constant memory
+  LOCAL_ADDRESS    = 3, ///< Address space for local memory.
+  REGION_ADDRESS   = 4, ///< Address space for region memory.
+  ADDRESS_NONE     = 5, ///< Address space for unknown memory.
+  PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
+  PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+  CONSTANT_BUFFER_0 = 8,
+  CONSTANT_BUFFER_1 = 9,
+  CONSTANT_BUFFER_2 = 10,
+  CONSTANT_BUFFER_3 = 11,
+  CONSTANT_BUFFER_4 = 12,
+  CONSTANT_BUFFER_5 = 13,
+  CONSTANT_BUFFER_6 = 14,
+  CONSTANT_BUFFER_7 = 15,
+  CONSTANT_BUFFER_8 = 16,
+  CONSTANT_BUFFER_9 = 17,
+  CONSTANT_BUFFER_10 = 18,
+  CONSTANT_BUFFER_11 = 19,
+  CONSTANT_BUFFER_12 = 20,
+  CONSTANT_BUFFER_13 = 21,
+  CONSTANT_BUFFER_14 = 22,
+  CONSTANT_BUFFER_15 = 23,
+  LAST_ADDRESS     = 24
+};
+
+} // namespace AMDGPUAS
+
+} // end namespace llvm
+#endif // AMDIL_H
diff --git a/lib/Target/R600/AMDIL7XXDevice.cpp b/lib/Target/R600/AMDIL7XXDevice.cpp
new file mode 100644
index 000000000000..ea6ac34f570c
--- /dev/null
+++ b/lib/Target/R600/AMDIL7XXDevice.cpp
@@ -0,0 +1,115 @@
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
+  setCaps();
+  std::string name = mSTM->getDeviceName();
+  if (name == "rv710") {
+    DeviceFlag = OCL_DEVICE_RV710;
+  } else if (name == "rv730") {
+    DeviceFlag = OCL_DEVICE_RV730;
+  } else {
+    DeviceFlag = OCL_DEVICE_RV770;
+  }
+}
+
+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
+}
+
+void AMDGPU7XXDevice::setCaps() {
+  mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
+  if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_700;
+  }
+  return 0;
+}
+
+size_t AMDGPU7XXDevice::getWavefrontSize() const {
+  return AMDGPUDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDGPU7XXDevice::getGeneration() const {
+  return AMDGPUDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
+  switch (DeviceID) {
+  default:
+    assert(0 && "ID type passed in is unknown!");
+    break;
+  case GLOBAL_ID:
+  case CONSTANT_ID:
+  case RAW_UAV_ID:
+  case ARENA_UAV_ID:
+    break;
+  case LDS_ID:
+    if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+      return DEFAULT_LDS_ID;
+    }
+    break;
+  case SCRATCH_ID:
+    if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+      return DEFAULT_SCRATCH_ID;
+    }
+    break;
+  case GDS_ID:
+    assert(0 && "GDS UAV ID is not supported on this chip");
+    if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+      return DEFAULT_GDS_ID;
+    }
+    break;
+  };
+
+  return 0;
+}
+
+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
+  return 1;
+}
+
+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
+  setCaps();
+}
+
+AMDGPU770Device::~AMDGPU770Device() {
+}
+
+void AMDGPU770Device::setCaps() {
+  if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+    mSWBits.set(AMDGPUDeviceInfo::FMA);
+    mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+  }
+  mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+  mHWBits.reset(AMDGPUDeviceInfo::LongOps);
+  mSWBits.set(AMDGPUDeviceInfo::LongOps);
+  mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+}
+
+size_t AMDGPU770Device::getWavefrontSize() const {
+  return AMDGPUDevice::WavefrontSize;
+}
+
+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
+}
+
+AMDGPU710Device::~AMDGPU710Device() {
+}
+
+size_t AMDGPU710Device::getWavefrontSize() const {
+  return AMDGPUDevice::QuarterWavefrontSize;
+}
diff --git a/lib/Target/R600/AMDIL7XXDevice.h b/lib/Target/R600/AMDIL7XXDevice.h
new file mode 100644
index 000000000000..1cf4ca415a4c
--- /dev/null
+++ b/lib/Target/R600/AMDIL7XXDevice.h
@@ -0,0 +1,72 @@
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDIL7XXDEVICEIMPL_H
+#define AMDIL7XXDEVICEIMPL_H
+#include "AMDILDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
+///
+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
+/// support the minimal features that are required to be considered OpenCL 1.0
+/// compliant and nothing more.
+class AMDGPU7XXDevice : public AMDGPUDevice {
+public:
+  AMDGPU7XXDevice(AMDGPUSubtarget *ST);
+  virtual ~AMDGPU7XXDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual size_t getWavefrontSize() const;
+  virtual uint32_t getGeneration() const;
+  virtual uint32_t getResourceID(uint32_t DeviceID) const;
+  virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+  virtual void setCaps();
+};
+
+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
+/// derivative cards.
+///
+/// The difference between this device and the base class is this device device
+/// adds support for double precision and has a larger wavefront size.
+class AMDGPU770Device : public AMDGPU7XXDevice {
+public:
+  AMDGPU770Device(AMDGPUSubtarget *ST);
+  virtual ~AMDGPU770Device();
+  virtual size_t getWavefrontSize() const;
+private:
+  virtual void setCaps();
+};
+
+/// \brief The AMDGPU710Device class derives from the 7XX base class.
+///
+/// This class is a smaller derivative, so we need to overload some of the
+/// functions in order to correctly specify this information.
+class AMDGPU710Device : public AMDGPU7XXDevice {
+public:
+  AMDGPU710Device(AMDGPUSubtarget *ST);
+  virtual ~AMDGPU710Device();
+  virtual size_t getWavefrontSize() const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
new file mode 100644
index 000000000000..c12cedcf7fd5
--- /dev/null
+++ b/lib/Target/R600/AMDILBase.td
@@ -0,0 +1,85 @@
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64     : SubtargetFeature<"fp64",
+        "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
+        "true",
+        "Enable 64bit double precision operations">;
+def FeatureByteAddress    : SubtargetFeature<"byte_addressable_store",
+        "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
+        "true",
+        "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+        "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
+        "true",
+        "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+        "CapsOverride[AMDGPUDeviceInfo::Images]",
+        "true",
+        "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+        "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
+        "true",
+        "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+        "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
+        "true",
+        "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+        "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
+        "true",
+        "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+        "CapsOverride[AMDGPUDeviceInfo::NoInline]",
+        "true",
+        "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+        "Is64bit",
+        "false",
+        "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+        "Is32on64bit",
+        "false",
+        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+        "CapsOverride[AMDGPUDeviceInfo::Debug]",
+        "true",
+        "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+        "DumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+        "R600ALUInst",
+        "false",
+        "Older version of ALU instructions encoding.">;
+
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILInstrInfo.td"
+
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
new file mode 100644
index 000000000000..b0cd0f9756a4
--- /dev/null
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -0,0 +1,3051 @@
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUGME 0
+#define DEBUG_TYPE "structcfg"
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
+    "matched");
+STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
+    "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+    "pattern matched");
+STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
+    "pattern matched");
+STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
+    "matched");
+STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct {
+#define SHOWNEWINSTR(i) \
+  if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+  errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+  if (b) { \
+  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+  b->print(errs()); \
+  errs() << "\n"; \
+  } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+       iterEnd = LoopInfo.end();
+       iter != iterEnd; ++iter) {
+    (*iter)->print(OS, 0);
+  }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+  size_t sz = Src.size();
+  for (size_t i = 0; i < sz/2; ++i) {
+    NodeT *t = Src[i];
+    Src[i] = Src[sz - i - 1];
+    Src[sz - i - 1] = t;
+  }
+}
+
+} //end namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+  bool isRetired;
+  int  sccNum;
+  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+  //Instructions defining the corresponding successor.
+  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+  BlockT *landBlk;
+  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
+                                  //WHILELOOP(thisloop) init before entering
+                                  //thisloop.
+  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
+                                  //WHILELOOP(thisloop) init after entering
+                                  //thisloop.
+  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+                                     //land block, branch cond on this reg.
+  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
+                                     //endif" after ENDLOOP(thisloop) break
+                                     //outerLoopOf(thisLoop).
+  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
+                                    //endif" after ENDLOOP(thisloop) continue on
+                                    //outerLoopOf(thisLoop).
+  LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class  CFGStructurizer {
+public:
+  typedef enum {
+    Not_SinglePath = 0,
+    SinglePath_InPath = 1,
+    SinglePath_NotInPath = 2
+  } PathToKind;
+
+public:
+  typedef typename PassT::InstructionType         InstrT;
+  typedef typename PassT::FunctionType            FuncT;
+  typedef typename PassT::DominatortreeType       DomTreeT;
+  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
+  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
+  typedef typename PassT::LoopinfoType            LoopInfoT;
+
+  typedef GraphTraits<FuncT *>                    FuncGTraits;
+  //typedef FuncGTraits::nodes_iterator BlockIterator;
+  typedef typename FuncT::iterator                BlockIterator;
+
+  typedef typename FuncGTraits::NodeType          BlockT;
+  typedef GraphTraits<BlockT *>                   BlockGTraits;
+  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
+  //typedef BlockGTraits::succ_iterator InstructionIterator;
+  typedef typename BlockT::iterator               InstrIterator;
+
+  typedef CFGStructTraits<PassT>                  CFGTraits;
+  typedef BlockInformation<InstrT>                BlockInfo;
+  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
+
+  typedef int                                     RegiT;
+  typedef typename PassT::LoopType                LoopT;
+  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
+        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+        //landing info for loop break
+  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
+
+public:
+  CFGStructurizer();
+  ~CFGStructurizer();
+
+  /// Perform the CFG structurization
+  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+  /// Perform the CFG preparation
+  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
+
+private:
+  void reversePredicateSetter(typename BlockT::iterator);
+  void   orderBlocks();
+  void   printOrderedBlocks(llvm::raw_ostream &OS);
+  int patternMatch(BlockT *CurBlock);
+  int patternMatchGroup(BlockT *CurBlock);
+
+  int serialPatternMatch(BlockT *CurBlock);
+  int ifPatternMatch(BlockT *CurBlock);
+  int switchPatternMatch(BlockT *CurBlock);
+  int loopendPatternMatch(BlockT *CurBlock);
+  int loopPatternMatch(BlockT *CurBlock);
+
+  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+  //int loopWithoutBreak(BlockT *);
+
+  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+                           BlockT *ContBlock, LoopT *contLoop);
+  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+                       BlockT *FalseBlock);
+  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+                          BlockT *FalseBlock);
+  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+                              BlockT *FalseBlock, BlockT **LandBlockPtr);
+  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+                                   BlockT *FalseBlock, BlockT *LandBlock,
+                                   bool Detail = false);
+  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+                          bool AllowSideEntry = true);
+  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+                        bool AllowSideEntry = true);
+  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+                            BlockT *TrueBlock, BlockT *FalseBlock,
+                            BlockT *LandBlock);
+  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+                           BlockT *ExitLandBlock, RegiT SetReg);
+  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+                           RegiT SetReg);
+  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+                                std::set<BlockT*> &ExitBlockSet,
+                                BlockT *ExitLandBlk);
+  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+                                BlockTSmallerVector &ExitingBlocks,
+                                BlockTSmallerVector &ExitBlocks);
+  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+  void removeUnconditionalBranch(BlockT *SrcBlock);
+  void removeRedundantConditionalBranch(BlockT *SrcBlock);
+  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+  void removeSuccessor(BlockT *SrcBlock);
+  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+                          InstrIterator InsertPos);
+
+  void recordSccnum(BlockT *SrcBlock, int SCCNum);
+  int getSCCNum(BlockT *srcBlk);
+
+  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+  bool isRetiredBlock(BlockT *SrcBlock);
+  bool isActiveLoophead(BlockT *CurBlock);
+  bool needMigrateBlock(BlockT *Block);
+
+  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+                              BlockTSmallerVector &exitBlocks,
+                              std::set<BlockT*> &ExitBlockSet);
+  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+  BlockT *getLoopLandBlock(LoopT *LoopRep);
+  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+  bool hasBackEdge(BlockT *curBlock);
+  unsigned getLoopDepth  (LoopT *LoopRep);
+  int countActiveBlock(
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+  DomTreeT *domTree;
+  PostDomTreeT *postDomTree;
+  LoopInfoT *loopInfo;
+  PassT *passRep;
+  FuncT *funcRep;
+
+  BlockInfoMap blockInfoMap;
+  LoopLandInfoMap loopLandInfoMap;
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+  const AMDGPURegisterInfo *TRI;
+
+};  //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+       E = blockInfoMap.end(); I != E; ++I) {
+    delete I->second;
+  }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
+                                     const AMDGPURegisterInfo * tri) {
+  passRep = &pass;
+  funcRep = &func;
+  TRI = tri;
+
+  bool changed = false;
+
+  //FIXME: if not reducible flow graph, make it so ???
+
+  if (DEBUGME) {
+        errs() << "AMDGPUCFGStructurizer::prepare\n";
+  }
+
+  loopInfo = CFGTraits::getLoopInfo(pass);
+  if (DEBUGME) {
+    errs() << "LoopInfo:\n";
+    PrintLoopinfo(*loopInfo, errs());
+  }
+
+  orderBlocks();
+  if (DEBUGME) {
+    errs() << "Ordered blocks:\n";
+    printOrderedBlocks(errs());
+  }
+
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+       iterEnd = loopInfo->end();
+       iter != iterEnd; ++iter) {
+    LoopT* loopRep = (*iter);
+    BlockTSmallerVector exitingBlks;
+    loopRep->getExitingBlocks(exitingBlks);
+    
+    if (exitingBlks.size() == 0) {
+      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+      if (dummyExitBlk != NULL)
+        retBlks.push_back(dummyExitBlk);
+    }
+  }
+
+  // Remove unconditional branch instr.
+  // Add dummy exit block iff there are multiple returns.
+
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+       iterBlk != iterEndBlk;
+       ++iterBlk) {
+    BlockT *curBlk = *iterBlk;
+    removeUnconditionalBranch(curBlk);
+    removeRedundantConditionalBranch(curBlk);
+    if (CFGTraits::isReturnBlock(curBlk)) {
+      retBlks.push_back(curBlk);
+    }
+    assert(curBlk->succ_size() <= 2);
+  } //for
+
+  if (retBlks.size() >= 2) {
+    addDummyExitBlock(retBlks);
+    changed = true;
+  }
+
+  return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
+    const AMDGPURegisterInfo * tri) {
+  passRep = &pass;
+  funcRep = &func;
+  TRI = tri;
+
+  //Assume reducible CFG...
+  if (DEBUGME) {
+    errs() << "AMDGPUCFGStructurizer::run\n";
+    func.viewCFG();
+  }
+
+  domTree = CFGTraits::getDominatorTree(pass);
+  if (DEBUGME) {
+    domTree->print(errs(), (const llvm::Module*)0);
+  }
+
+  postDomTree = CFGTraits::getPostDominatorTree(pass);
+  if (DEBUGME) {
+    postDomTree->print(errs());
+  }
+
+  loopInfo = CFGTraits::getLoopInfo(pass);
+  if (DEBUGME) {
+    errs() << "LoopInfo:\n";
+    PrintLoopinfo(*loopInfo, errs());
+  }
+
+  orderBlocks();
+#ifdef STRESSTEST
+  //Use the worse block ordering to test the algorithm.
+  ReverseVector(orderedBlks);
+#endif
+
+  if (DEBUGME) {
+    errs() << "Ordered blocks:\n";
+    printOrderedBlocks(errs());
+  }
+  int numIter = 0;
+  bool finish = false;
+  BlockT *curBlk;
+  bool makeProgress = false;
+  int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+                                        orderedBlks.end());
+
+  do {
+    ++numIter;
+    if (DEBUGME) {
+      errs() << "numIter = " << numIter
+             << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+    }
+
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+      iterBlk = orderedBlks.begin();
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+      iterBlkEnd = orderedBlks.end();
+
+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+      sccBeginIter = iterBlk;
+    BlockT *sccBeginBlk = NULL;
+    int sccNumBlk = 0;  // The number of active blocks, init to a
+                        // maximum possible number.
+    int sccNumIter;     // Number of iteration in this SCC.
+
+    while (iterBlk != iterBlkEnd) {
+      curBlk = *iterBlk;
+
+      if (sccBeginBlk == NULL) {
+        sccBeginIter = iterBlk;
+        sccBeginBlk = curBlk;
+        sccNumIter = 0;
+        sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+        if (DEBUGME) {
+              errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+              errs() << "\n";
+        }
+      }
+
+      if (!isRetiredBlock(curBlk)) {
+        patternMatch(curBlk);
+      }
+
+      ++iterBlk;
+
+      bool contNextScc = true;
+      if (iterBlk == iterBlkEnd
+          || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+        // Just finish one scc.
+        ++sccNumIter;
+        int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+          if (DEBUGME) {
+            errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+                   << ", sccNumIter = " << sccNumIter;
+            errs() << "doesn't make any progress\n";
+          }
+          contNextScc = true;
+        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+          sccNumBlk = sccRemainedNumBlk;
+          iterBlk = sccBeginIter;
+          contNextScc = false;
+          if (DEBUGME) {
+            errs() << "repeat processing SCC" << getSCCNum(curBlk)
+                   << "sccNumIter = " << sccNumIter << "\n";
+            func.viewCFG();
+          }
+        } else {
+          // Finish the current scc.
+          contNextScc = true;
+        }
+      } else {
+        // Continue on next component in the current scc.
+        contNextScc = false;
+      }
+
+      if (contNextScc) {
+        sccBeginBlk = NULL;
+      }
+    } //while, "one iteration" over the function.
+
+    BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+    if (entryBlk->succ_size() == 0) {
+      finish = true;
+      if (DEBUGME) {
+        errs() << "Reduce to one block\n";
+      }
+    } else {
+      int newnumRemainedBlk
+        = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+      // consider cloned blocks ??
+      if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+        makeProgress = true;
+        numRemainedBlk = newnumRemainedBlk;
+      } else {
+        makeProgress = false;
+        if (DEBUGME) {
+          errs() << "No progress\n";
+        }
+      }
+    }
+  } while (!finish && makeProgress);
+
+  // Misc wrap up to maintain the consistency of the Function representation.
+  CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+  // Detach retired Block, release memory.
+  for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+       iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+    if ((*iterMap).second && (*iterMap).second->isRetired) {
+      assert(((*iterMap).first)->getNumber() != -1);
+      if (DEBUGME) {
+        errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+      }
+      (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
+    }
+    delete (*iterMap).second;
+  }
+  blockInfoMap.clear();
+
+  // clear loopLandInfoMap
+  for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+       iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+    delete (*iterMap).second;
+  }
+  loopLandInfoMap.clear();
+
+  if (DEBUGME) {
+    func.viewCFG();
+  }
+
+  if (!finish) {
+    assert(!"IRREDUCIBL_CF");
+  }
+
+  return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+  size_t i = 0;
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+      iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+       iterBlk != iterBlkEnd;
+       ++iterBlk, ++i) {
+    os << "BB" << (*iterBlk)->getNumber();
+    os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+    if (i != 0 && i % 10 == 0) {
+      os << "\n";
+    } else {
+      os << " ";
+    }
+  }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+  int sccNum = 0;
+  BlockT *bb;
+  for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+       sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+    std::vector<BlockT *> &sccNext = *sccIter;
+    for (typename std::vector<BlockT *>::const_iterator
+         blockIter = sccNext.begin(), blockEnd = sccNext.end();
+         blockIter != blockEnd; ++blockIter) {
+      bb = *blockIter;
+      orderedBlks.push_back(bb);
+      recordSccnum(bb, sccNum);
+    }
+  }
+
+  //walk through all the block in func to check for unreachable
+  for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+       blockEnd1 = FuncGTraits::nodes_end(funcRep);
+       blockIter1 != blockEnd1; ++blockIter1) {
+    BlockT *bb = &(*blockIter1);
+    sccNum = getSCCNum(bb);
+    if (sccNum == INVALIDSCCNUM) {
+      errs() << "unreachable block BB" << bb->getNumber() << "\n";
+    }
+  }
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+  int numMatch = 0;
+  int curMatch;
+
+  if (DEBUGME) {
+        errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+  }
+
+  while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+    numMatch += curMatch;
+  }
+
+  if (DEBUGME) {
+        errs() << "End patternMatch BB" << curBlk->getNumber()
+      << ", numMatch = " << numMatch << "\n";
+  }
+
+  return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+  int numMatch = 0;
+  numMatch += serialPatternMatch(curBlk);
+  numMatch += ifPatternMatch(curBlk);
+  numMatch += loopendPatternMatch(curBlk);
+  numMatch += loopPatternMatch(curBlk);
+  return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+  if (curBlk->succ_size() != 1) {
+    return 0;
+  }
+
+  BlockT *childBlk = *curBlk->succ_begin();
+  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+    return 0;
+  }
+
+  mergeSerialBlock(curBlk, childBlk);
+  ++numSerialPatternMatch;
+  return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+  //two edges
+  if (curBlk->succ_size() != 2) {
+    return 0;
+  }
+
+  if (hasBackEdge(curBlk)) {
+    return 0;
+  }
+
+  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+  if (branchInstr == NULL) {
+    return 0;
+  }
+
+  assert(CFGTraits::isCondBranch(branchInstr));
+
+  BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+  BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+  BlockT *landBlk;
+  int cloned = 0;
+
+  // TODO: Simplify
+  if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+    && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+    landBlk = *trueBlk->succ_begin();
+  } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+    landBlk = NULL;
+  } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+    landBlk = falseBlk;
+    falseBlk = NULL;
+  } else if (falseBlk->succ_size() == 1
+             && *falseBlk->succ_begin() == trueBlk) {
+    landBlk = trueBlk;
+    trueBlk = NULL;
+  } else if (falseBlk->succ_size() == 1
+             && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+    landBlk = *falseBlk->succ_begin();
+  } else if (trueBlk->succ_size() == 1
+    && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+    landBlk = *trueBlk->succ_begin();
+  } else {
+    return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+  }
+
+  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+  // new BB created for landBlk==NULL may introduce new challenge to the
+  // reduction process.
+  if (landBlk != NULL &&
+      ((trueBlk && trueBlk->pred_size() > 1)
+      || (falseBlk && falseBlk->pred_size() > 1))) {
+     cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+  }
+
+  if (trueBlk && trueBlk->pred_size() > 1) {
+    trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+    ++cloned;
+  }
+
+  if (falseBlk && falseBlk->pred_size() > 1) {
+    falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+    ++cloned;
+  }
+
+  mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+  ++numIfPatternMatch;
+
+  numClonedBlock += cloned;
+
+  return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+  return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  typename std::vector<LoopT *> nestedLoops;
+  while (loopRep) {
+    nestedLoops.push_back(loopRep);
+    loopRep = loopRep->getParentLoop();
+  }
+
+  if (nestedLoops.size() == 0) {
+    return 0;
+  }
+
+  // Process nested loop outside->inside, so "continue" to a outside loop won't
+  // be mistaken as "break" of the current loop.
+  int num = 0;
+  for (typename std::vector<LoopT *>::reverse_iterator
+       iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+       iter != iterEnd; ++iter) {
+    loopRep = *iter;
+
+    if (getLoopLandBlock(loopRep) != NULL) {
+      continue;
+    }
+
+    BlockT *loopHeader = loopRep->getHeader();
+
+    int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+    if (numBreak == -1) {
+      break;
+    }
+
+    int numCont = loopcontPatternMatch(loopRep, loopHeader);
+    num += numBreak + numCont;
+  }
+
+  return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+  if (curBlk->succ_size() != 0) {
+    return 0;
+  }
+
+  int numLoop = 0;
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  while (loopRep && loopRep->getHeader() == curBlk) {
+    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+    if (loopLand) {
+      BlockT *landBlk = loopLand->landBlk;
+      assert(landBlk);
+      if (!isRetiredBlock(landBlk)) {
+        mergeLooplandBlock(curBlk, loopLand);
+        ++numLoop;
+      }
+    }
+    loopRep = loopRep->getParentLoop();
+  }
+
+  numLoopPatternMatch += numLoop;
+
+  return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+                                                  BlockT *loopHeader) {
+  BlockTSmallerVector exitingBlks;
+  loopRep->getExitingBlocks(exitingBlks);
+
+  if (DEBUGME) {
+    errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+  }
+
+  if (exitingBlks.size() == 0) {
+    setLoopLandBlock(loopRep);
+    return 0;
+  }
+
+  // Compute the corresponding exitBlks and exit block set.
+  BlockTSmallerVector exitBlks;
+  std::set<BlockT *> exitBlkSet;
+  for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+       iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+    BlockT *exitingBlk = *iter;
+    BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+    exitBlks.push_back(exitBlk);
+    exitBlkSet.insert(exitBlk);  //non-duplicate insert
+  }
+
+  assert(exitBlkSet.size() > 0);
+  assert(exitBlks.size() == exitingBlks.size());
+
+  if (DEBUGME) {
+    errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+  }
+
+  // Find exitLandBlk.
+  BlockT *exitLandBlk = NULL;
+  int numCloned = 0;
+  int numSerial = 0;
+
+  if (exitBlkSet.size() == 1) {
+    exitLandBlk = *exitBlkSet.begin();
+  } else {
+    exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+    if (exitLandBlk == NULL) {
+      return -1;
+    }
+
+    bool allInPath = true;
+    bool allNotInPath = true;
+    for (typename std::set<BlockT*>::const_iterator
+         iter = exitBlkSet.begin(),
+         iterEnd = exitBlkSet.end();
+         iter != iterEnd; ++iter) {
+      BlockT *exitBlk = *iter;
+
+      PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+      if (DEBUGME) {
+        errs() << "BB" << exitBlk->getNumber()
+               << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+               << pathKind << "\n";
+      }
+
+      allInPath = allInPath && (pathKind == SinglePath_InPath);
+      allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+      if (!allInPath && !allNotInPath) {
+        if (DEBUGME) {
+              errs() << "singlePath check fail\n";
+        }
+        return -1;
+      }
+    } // check all exit blocks
+
+    if (allNotInPath) {
+
+      // TODO: Simplify, maybe separate function?
+      LoopT *parentLoopRep = loopRep->getParentLoop();
+      BlockT *parentLoopHeader = NULL;
+      if (parentLoopRep)
+        parentLoopHeader = parentLoopRep->getHeader();
+
+      if (exitLandBlk == parentLoopHeader &&
+          (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+                                               loopRep,
+                                               exitBlkSet,
+                                               exitLandBlk)) != NULL) {
+        if (DEBUGME) {
+          errs() << "relocateLoopcontBlock success\n";
+        }
+      } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+                                                      exitingBlks,
+                                                      exitBlks)) != NULL) {
+        if (DEBUGME) {
+          errs() << "insertEndbranchBlock success\n";
+        }
+      } else {
+        if (DEBUGME) {
+          errs() << "loop exit fail\n";
+        }
+        return -1;
+      }
+    }
+
+    // Handle side entry to exit path.
+    exitBlks.clear();
+    exitBlkSet.clear();
+    for (typename BlockTSmallerVector::iterator iterExiting =
+           exitingBlks.begin(),
+         iterExitingEnd = exitingBlks.end();
+         iterExiting != iterExitingEnd; ++iterExiting) {
+      BlockT *exitingBlk = *iterExiting;
+      BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+      BlockT *newExitBlk = exitBlk;
+
+      if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+        newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+        ++numCloned;
+      }
+
+      numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+      exitBlks.push_back(newExitBlk);
+      exitBlkSet.insert(newExitBlk);
+    }
+
+    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+         iterExitEnd = exitBlks.end();
+         iterExit != iterExitEnd; ++iterExit) {
+      BlockT *exitBlk = *iterExit;
+      numSerial += serialPatternMatch(exitBlk);
+    }
+
+    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+         iterExitEnd = exitBlks.end();
+         iterExit != iterExitEnd; ++iterExit) {
+      BlockT *exitBlk = *iterExit;
+      if (exitBlk->pred_size() > 1) {
+        if (exitBlk != exitLandBlk) {
+          return -1;
+        }
+      } else {
+        if (exitBlk != exitLandBlk &&
+            (exitBlk->succ_size() != 1 ||
+            *exitBlk->succ_begin() != exitLandBlk)) {
+          return -1;
+        }
+      }
+    }
+  } // else
+
+  exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+  // Fold break into the breaking block. Leverage across level breaks.
+  assert(exitingBlks.size() == exitBlks.size());
+  for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+       iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+       iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+    BlockT *exitBlk = *iterExit;
+    BlockT *exitingBlk = *iterExiting;
+    assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+    LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+    handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+  }
+
+  int numBreak = static_cast<int>(exitingBlks.size());
+  numLoopbreakPatternMatch += numBreak;
+  numClonedBlock += numCloned;
+  return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+                                                 BlockT *loopHeader) {
+  int numCont = 0;
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+  for (typename InvBlockGTraits::ChildIteratorType iter =
+       InvBlockGTraits::child_begin(loopHeader),
+       iterEnd = InvBlockGTraits::child_end(loopHeader);
+       iter != iterEnd; ++iter) {
+    BlockT *curBlk = *iter;
+    if (loopRep->contains(curBlk)) {
+      handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+                          loopHeader, loopRep);
+      contBlk.push_back(curBlk);
+      ++numCont;
+    }
+  }
+
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+       iter = contBlk.begin(), iterEnd = contBlk.end();
+       iter != iterEnd; ++iter) {
+    (*iter)->removeSuccessor(loopHeader);
+  }
+
+  numLoopcontPatternMatch += numCont;
+
+  return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+                                                         BlockT *src2Blk) {
+  // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+  // same loop with LoopLandInfo without explicitly keeping track of
+  // loopContBlks and loopBreakBlks, this is a method to get the information.
+  //
+  if (src1Blk->succ_size() == 0) {
+    LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+    if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+      LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+      if (theEntry != NULL) {
+        if (DEBUGME) {
+          errs() << "isLoopContBreakBlock yes src1 = BB"
+                 << src1Blk->getNumber()
+                 << " src2 = BB" << src2Blk->getNumber() << "\n";
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}  //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+                                             BlockT *trueBlk,
+                                             BlockT *falseBlk) {
+  int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+  if (num == 0) {
+    if (DEBUGME) {
+      errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+    }
+    num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+  }
+  return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+                                                BlockT *trueBlk,
+                                                BlockT *falseBlk) {
+  int num = 0;
+  BlockT *downBlk;
+
+  //trueBlk could be the common post dominator
+  downBlk = trueBlk;
+
+  if (DEBUGME) {
+    errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+           << " true = BB" << trueBlk->getNumber()
+           << ", numSucc=" << trueBlk->succ_size()
+           << " false = BB" << falseBlk->getNumber() << "\n";
+  }
+
+  while (downBlk) {
+    if (DEBUGME) {
+      errs() << "check down = BB" << downBlk->getNumber();
+    }
+
+    if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+      if (DEBUGME) {
+        errs() << " working\n";
+      }
+
+      num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+      num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+      numClonedBlock += num;
+      num += serialPatternMatch(*headBlk->succ_begin());
+      num += serialPatternMatch(*(++headBlk->succ_begin()));
+      num += ifPatternMatch(headBlk);
+      assert(num > 0);
+
+      break;
+    }
+    if (DEBUGME) {
+      errs() << " not working\n";
+    }
+    downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+  } // walk down the postDomTree
+
+  return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+                                                         BlockT *trueBlk,
+                                                         BlockT *falseBlk,
+                                                         BlockT *landBlk,
+                                                         bool detail) {
+  errs() << "head = BB" << headBlk->getNumber()
+         << " size = " << headBlk->size();
+  if (detail) {
+    errs() << "\n";
+    headBlk->print(errs());
+    errs() << "\n";
+  }
+
+  if (trueBlk) {
+    errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+           << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+    if (detail) {
+      errs() << "\n";
+      trueBlk->print(errs());
+      errs() << "\n";
+    }
+  }
+  if (falseBlk) {
+    errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+           << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+    if (detail) {
+      errs() << "\n";
+      falseBlk->print(errs());
+      errs() << "\n";
+    }
+  }
+  if (landBlk) {
+    errs() << ", land = BB" << landBlk->getNumber() << " size = "
+           << landBlk->size() << " numPred = " << landBlk->pred_size();
+    if (detail) {
+      errs() << "\n";
+      landBlk->print(errs());
+      errs() << "\n";
+    }
+  }
+
+    errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+                                                    BlockT *trueBlk,
+                                                    BlockT *falseBlk,
+                                                    BlockT **plandBlk) {
+  bool migrateTrue = false;
+  bool migrateFalse = false;
+
+  BlockT *landBlk = *plandBlk;
+
+  assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+         && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+  if (trueBlk == falseBlk) {
+    return 0;
+  }
+
+  migrateTrue = needMigrateBlock(trueBlk);
+  migrateFalse = needMigrateBlock(falseBlk);
+
+  if (!migrateTrue && !migrateFalse) {
+    return 0;
+  }
+
+  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+  // have more than one predecessors.  without doing this, its predecessor
+  // rather than headBlk will have undefined value in initReg.
+  if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+    migrateTrue = true;
+  }
+  if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+    migrateFalse = true;
+  }
+
+  if (DEBUGME) {
+    errs() << "before improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+  }
+
+  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+  //
+  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+  //      {initReg = 0; org falseBlk branch }
+  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+  //      => org landBlk
+  //      if landBlk->pred_size() > 2, put the about if-else inside
+  //      if (initReg !=2) {...}
+  //
+  // add initReg = initVal to headBlk
+
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+  unsigned initReg =
+    funcRep->getRegInfo().createVirtualRegister(I32RC);
+  if (!migrateTrue || !migrateFalse) {
+    int initVal = migrateTrue ? 0 : 1;
+    CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+  }
+
+  int numNewBlk = 0;
+
+  if (landBlk == NULL) {
+    landBlk = funcRep->CreateMachineBasicBlock();
+    funcRep->push_back(landBlk);  //insert to function
+
+    if (trueBlk) {
+      trueBlk->addSuccessor(landBlk);
+    } else {
+      headBlk->addSuccessor(landBlk);
+    }
+
+    if (falseBlk) {
+      falseBlk->addSuccessor(landBlk);
+    } else {
+      headBlk->addSuccessor(landBlk);
+    }
+
+    numNewBlk ++;
+  }
+
+  bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+  typename BlockT::iterator insertPos =
+    CFGTraits::getInstrPos
+    (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
+
+  if (landBlkHasOtherPred) {
+    unsigned immReg =
+      funcRep->getRegInfo().createVirtualRegister(I32RC);
+    CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+    unsigned cmpResReg =
+      funcRep->getRegInfo().createVirtualRegister(I32RC);
+
+    CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+                                        initReg, immReg);
+    CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+                                      AMDGPU::IF_PREDICATE_SET, passRep,
+                                      cmpResReg, DebugLoc());
+  }
+
+  CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
+                                    passRep, initReg, DebugLoc());
+
+  if (migrateTrue) {
+    migrateInstruction(trueBlk, landBlk, insertPos);
+    // need to uncondionally insert the assignment to ensure a path from its
+    // predecessor rather than headBlk has valid value in initReg if
+    // (initVal != 1).
+    CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+  }
+  CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+
+  if (migrateFalse) {
+    migrateInstruction(falseBlk, landBlk, insertPos);
+    // need to uncondionally insert the assignment to ensure a path from its
+    // predecessor rather than headBlk has valid value in initReg if
+    // (initVal != 0)
+    CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+  }
+
+  if (landBlkHasOtherPred) {
+    // add endif
+    CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+    // put initReg = 2 to other predecessors of landBlk
+    for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+         predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+         ++predIter) {
+      BlockT *curBlk = *predIter;
+      if (curBlk != trueBlk && curBlk != falseBlk) {
+        CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+      }
+    } //for
+  }
+  if (DEBUGME) {
+    errs() << "result from improveSimpleJumpintoIf: ";
+    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+  }
+
+  // update landBlk
+  *plandBlk = landBlk;
+
+  return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+                                              LoopT *exitingLoop,
+                                             BlockT *exitBlk,
+                                              LoopT *exitLoop,
+                                             BlockT *landBlk) {
+  if (DEBUGME) {
+    errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+           << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+  }
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+  RegiT initReg = INVALIDREGNUM;
+  if (exitingLoop != exitLoop) {
+    initReg = static_cast<int>
+      (funcRep->getRegInfo().createVirtualRegister(I32RC));
+    assert(initReg != INVALIDREGNUM);
+    addLoopBreakInitReg(exitLoop, initReg);
+    while (exitingLoop != exitLoop && exitingLoop) {
+      addLoopBreakOnReg(exitingLoop, initReg);
+      exitingLoop = exitingLoop->getParentLoop();
+    }
+    assert(exitingLoop == exitLoop);
+  }
+
+  mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+                                                  LoopT *contingLoop,
+                                                 BlockT *contBlk,
+                                                  LoopT *contLoop) {
+  if (DEBUGME) {
+    errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+           << " header = BB" << contBlk->getNumber() << "\n";
+
+    errs() << "Trying to continue loop-depth = "
+           << getLoopDepth(contLoop)
+           << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+  }
+
+  RegiT initReg = INVALIDREGNUM;
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+  if (contingLoop != contLoop) {
+    initReg = static_cast<int>
+      (funcRep->getRegInfo().createVirtualRegister(I32RC));
+    assert(initReg != INVALIDREGNUM);
+    addLoopContInitReg(contLoop, initReg);
+    while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+      addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
+      contingLoop = contingLoop->getParentLoop();
+    }
+    assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+    addLoopContOnReg(contingLoop, initReg);
+  }
+
+  settleLoopcontBlock(contingBlk, contBlk, initReg);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+  if (DEBUGME) {
+    errs() << "serialPattern BB" << dstBlk->getNumber()
+           << " <= BB" << srcBlk->getNumber() << "\n";
+  }
+  dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
+
+  dstBlk->removeSuccessor(srcBlk);
+  CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+  removeSuccessor(srcBlk);
+  retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+                                                  BlockT *curBlk,
+                                                  BlockT *trueBlk,
+                                                  BlockT *falseBlk,
+                                                  BlockT *landBlk) {
+  if (DEBUGME) {
+    errs() << "ifPattern BB" << curBlk->getNumber();
+    errs() << "{  ";
+    if (trueBlk) {
+      errs() << "BB" << trueBlk->getNumber();
+    }
+    errs() << "  } else ";
+    errs() << "{  ";
+    if (falseBlk) {
+      errs() << "BB" << falseBlk->getNumber();
+    }
+    errs() << "  }\n ";
+    errs() << "landBlock: ";
+    if (landBlk == NULL) {
+      errs() << "NULL";
+    } else {
+      errs() << "BB" << landBlk->getNumber();
+    }
+    errs() << "\n";
+  }
+
+  int oldOpcode = branchInstr->getOpcode();
+  DebugLoc branchDL = branchInstr->getDebugLoc();
+
+//    transform to
+//    if cond
+//       trueBlk
+//    else
+//       falseBlk
+//    endif
+//    landBlk
+
+  typename BlockT::iterator branchInstrPos =
+    CFGTraits::getInstrPos(curBlk, branchInstr);
+  CFGTraits::insertCondBranchBefore(branchInstrPos,
+                                    CFGTraits::getBranchNzeroOpcode(oldOpcode),
+                                    passRep,
+                                    branchDL);
+
+  if (trueBlk) {
+    curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
+    curBlk->removeSuccessor(trueBlk);
+    if (landBlk && trueBlk->succ_size()!=0) {
+      trueBlk->removeSuccessor(landBlk);
+    }
+    retireBlock(curBlk, trueBlk);
+  }
+  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
+
+  if (falseBlk) {
+    curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
+                   falseBlk->end());
+    curBlk->removeSuccessor(falseBlk);
+    if (landBlk && falseBlk->succ_size() != 0) {
+      falseBlk->removeSuccessor(landBlk);
+    }
+    retireBlock(curBlk, falseBlk);
+  }
+  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+
+  branchInstr->eraseFromParent();
+
+  if (landBlk && trueBlk && falseBlk) {
+    curBlk->addSuccessor(landBlk);
+  }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+                                                LoopLandInfo *loopLand) {
+  BlockT *landBlk = loopLand->landBlk;
+
+  if (DEBUGME) {
+    errs() << "loopPattern header = BB" << dstBlk->getNumber()
+           << " land = BB" << landBlk->getNumber() << "\n";
+  }
+
+  // Loop contInitRegs are init at the beginning of the loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->contInitRegs.begin(),
+       iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+  }
+
+  /* we last inserterd the DebugLoc in the
+   * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
+   * search for the DebugLoc in the that statement.
+   * if not found, we have to insert the empty/default DebugLoc */
+  InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+  DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+  CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
+  // Loop breakInitRegs are init before entering the loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->breakInitRegs.begin(),
+       iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+  }
+  // Loop endbranchInitRegs are init before entering the loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->endbranchInitRegs.begin(),
+       iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+  }
+
+  /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+   * search for the DebugLoc in the continue statement.
+   * if not found, we have to insert the empty/default DebugLoc */
+  InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+  DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+  CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
+  // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+  // loop.
+  for (typename std::set<RegiT>::const_iterator iter =
+         loopLand->breakOnRegs.begin(),
+       iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
+                                   *iter);
+  }
+
+  // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+  // loop.
+  for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+       iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
+                                   passRep, *iter);
+  }
+
+  dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+  for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+       iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+    dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
+  }
+
+  removeSuccessor(landBlk);
+  retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
+  while (I--) {
+    if (I->getOpcode() == AMDGPU::PRED_X) {
+      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
+      case OPCODE_IS_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
+        return;
+      case OPCODE_IS_NOT_ZERO_INT:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
+        return;
+      case OPCODE_IS_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
+        return;
+      case OPCODE_IS_NOT_ZERO:
+        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
+        return;
+      default:
+        assert(0 && "PRED_X Opcode invalid!");
+      }
+    }
+  }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+                                                 BlockT *exitBlk,
+                                                 BlockT *exitLandBlk,
+                                                 RegiT  setReg) {
+  if (DEBUGME) {
+    errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+           << " exit = BB" << exitBlk->getNumber()
+           << " land = BB" << exitLandBlk->getNumber() << "\n";
+  }
+
+  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+  assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+  DebugLoc DL = branchInstr->getDebugLoc();
+
+  BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+
+  //    transform exitingBlk to
+  //    if ( ) {
+  //       exitBlk (if exitBlk != exitLandBlk)
+  //       setReg = 1
+  //       break
+  //    }endif
+  //    successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+  typename BlockT::iterator branchInstrPos =
+    CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+  if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+    //break_logical
+
+    if (trueBranch != exitBlk) {
+      reversePredicateSetter(branchInstrPos);
+    }
+    CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+  } else {
+    if (trueBranch != exitBlk) {
+      reversePredicateSetter(branchInstr);
+    }
+    CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
+    if (exitBlk != exitLandBlk) {
+      //splice is insert-before ...
+      exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+                         exitBlk->end());
+    }
+    if (setReg != INVALIDREGNUM) {
+      CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+    }
+    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
+  } //if_logical
+
+  //now branchInst can be erase safely
+  branchInstr->eraseFromParent();
+
+  //now take care of successors, retire blocks
+  exitingBlk->removeSuccessor(exitBlk);
+  if (exitBlk != exitLandBlk) {
+    //splice is insert-before ...
+    exitBlk->removeSuccessor(exitLandBlk);
+    retireBlock(exitingBlk, exitBlk);
+  }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+                                                 BlockT *contBlk,
+                                                 RegiT   setReg) {
+  if (DEBUGME) {
+    errs() << "settleLoopcontBlock conting = BB"
+           << contingBlk->getNumber()
+           << ", cont = BB" << contBlk->getNumber() << "\n";
+  }
+
+  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+  if (branchInstr) {
+    assert(CFGTraits::isCondBranch(branchInstr));
+    typename BlockT::iterator branchInstrPos =
+      CFGTraits::getInstrPos(contingBlk, branchInstr);
+    BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+    int oldOpcode = branchInstr->getOpcode();
+    DebugLoc DL = branchInstr->getDebugLoc();
+
+    //    transform contingBlk to
+    //     if () {
+    //          move instr after branchInstr
+    //          continue
+    //        or
+    //          setReg = 1
+    //          break
+    //     }endif
+    //     successor = {orgSuccessor(contingBlk) - loopHeader}
+
+    bool useContinueLogical = 
+      (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+    if (useContinueLogical == false) {
+      int branchOpcode =
+        trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+                              : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+      if (setReg != INVALIDREGNUM) {
+        CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
+      } else {
+        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
+      }
+
+      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+    } else {
+      int branchOpcode =
+        trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+                              : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+    }
+
+    branchInstr->eraseFromParent();
+  } else {
+    // if we've arrived here then we've already erased the branch instruction
+    // travel back up the basic block to see the last reference of our debug location
+    // we've just inserted that reference here so it should be representative
+    if (setReg != INVALIDREGNUM) {
+      CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+    } else {
+      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+    }
+  } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+//    (1) set newBlk common successor of BBs in exitBlkSet
+//    (2) change the continue-instr in BBs in exitBlkSet to break-instr
+//    (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+                                              LoopT *loopRep,
+                                              std::set<BlockT *> &exitBlkSet,
+                                              BlockT *exitLandBlk) {
+  std::set<BlockT *> endBlkSet;
+
+
+
+  for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+       iterEnd = exitBlkSet.end();
+       iter != iterEnd; ++iter) {
+    BlockT *exitBlk = *iter;
+    BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+    if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+      return NULL;
+
+    endBlkSet.insert(endBlk);
+  }
+
+  BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+  funcRep->push_back(newBlk);  //insert to function
+  CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
+  SHOWNEWBLK(newBlk, "New continue block: ");
+
+  for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+       iterEnd = endBlkSet.end();
+       iter != iterEnd; ++iter) {
+      BlockT *endBlk = *iter;
+      InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+      if (contInstr) {
+        contInstr->eraseFromParent();
+      }
+      endBlk->addSuccessor(newBlk);
+      if (DEBUGME) {
+        errs() << "Add new continue Block to BB"
+               << endBlk->getNumber() << " successors\n";
+      }
+  }
+
+  return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+                                              BlockTSmallerVector &exitingBlks,
+                                              BlockTSmallerVector &exitBlks) {
+  const AMDGPUInstrInfo *tii =
+             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+  RegiT endBranchReg = static_cast<int>
+    (funcRep->getRegInfo().createVirtualRegister(I32RC));
+  assert(endBranchReg >= 0);
+
+  // reg = 0 before entering the loop
+  addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+  uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+  assert(numBlks >=2 && numBlks == exitBlks.size());
+
+  BlockT *preExitingBlk = exitingBlks[0];
+  BlockT *preExitBlk = exitBlks[0];
+  BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+  funcRep->push_back(preBranchBlk);  //insert to function
+  SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+  BlockT *newLandBlk = preBranchBlk;
+
+      CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+        newLandBlk);
+  preExitingBlk->removeSuccessor(preExitBlk);
+  preExitingBlk->addSuccessor(newLandBlk);
+
+  //it is redundant to add reg = 0 to exitingBlks[0]
+
+  // For 1..n th exiting path (the last iteration handles two pathes) create the
+  // branch to the previous path and the current path.
+  for (uint32_t i = 1; i < numBlks; ++i) {
+    BlockT *curExitingBlk = exitingBlks[i];
+    BlockT *curExitBlk = exitBlks[i];
+    BlockT *curBranchBlk;
+
+    if (i == numBlks - 1) {
+      curBranchBlk = curExitBlk;
+    } else {
+      curBranchBlk = funcRep->CreateMachineBasicBlock();
+      funcRep->push_back(curBranchBlk);  //insert to function
+      SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+    }
+
+    // Add reg = i to exitingBlks[i].
+    CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+                                       endBranchReg, i);
+
+    // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+    // (exitingBlks[i], newLandBlk).
+    CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+                                          newLandBlk);
+    curExitingBlk->removeSuccessor(curExitBlk);
+    curExitingBlk->addSuccessor(newLandBlk);
+
+    // add to preBranchBlk the branch instruction:
+    // if (endBranchReg == preVal)
+    //    preExitBlk
+    // else
+    //    curBranchBlk
+    //
+    // preValReg = i - 1
+
+  DebugLoc DL;
+  RegiT preValReg = static_cast<int>
+    (funcRep->getRegInfo().createVirtualRegister(I32RC));
+
+  preBranchBlk->insert(preBranchBlk->begin(),
+                       tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
+                       i - 1));
+
+  // condResReg = (endBranchReg == preValReg)
+    RegiT condResReg = static_cast<int>
+      (funcRep->getRegInfo().createVirtualRegister(I32RC));
+    BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
+      .addReg(endBranchReg).addReg(preValReg);
+
+    BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
+      .addMBB(preExitBlk).addReg(condResReg);
+
+    preBranchBlk->addSuccessor(preExitBlk);
+    preBranchBlk->addSuccessor(curBranchBlk);
+
+    // Update preExitingBlk, preExitBlk, preBranchBlk.
+    preExitingBlk = curExitingBlk;
+    preExitBlk = curExitBlk;
+    preBranchBlk = curBranchBlk;
+
+  }  //end for 1 .. n blocks
+
+  return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+                                     bool allowSideEntry) {
+  assert(dstBlk);
+
+  if (srcBlk == dstBlk) {
+    return SinglePath_InPath;
+  }
+
+  while (srcBlk && srcBlk->succ_size() == 1) {
+    srcBlk = *srcBlk->succ_begin();
+    if (srcBlk == dstBlk) {
+      return SinglePath_InPath;
+    }
+
+    if (!allowSideEntry && srcBlk->pred_size() > 1) {
+      return Not_SinglePath;
+    }
+  }
+
+  if (srcBlk && srcBlk->succ_size()==0) {
+    return SinglePath_NotInPath;
+  }
+
+  return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+                                      bool allowSideEntry) {
+  assert(dstBlk);
+
+  if (srcBlk == dstBlk) {
+    return srcBlk;
+  }
+
+  if (srcBlk->succ_size() == 0) {
+    return srcBlk;
+  }
+
+  while (srcBlk && srcBlk->succ_size() == 1) {
+    BlockT *preBlk = srcBlk;
+
+    srcBlk = *srcBlk->succ_begin();
+    if (srcBlk == NULL) {
+      return preBlk;
+    }
+
+    if (!allowSideEntry && srcBlk->pred_size() > 1) {
+      return NULL;
+    }
+  }
+
+  if (srcBlk && srcBlk->succ_size()==0) {
+    return srcBlk;
+  }
+
+  return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+                                               BlockT *dstBlk) {
+  int cloned = 0;
+  assert(preBlk->isSuccessor(srcBlk));
+  while (srcBlk && srcBlk != dstBlk) {
+    assert(srcBlk->succ_size() == 1);
+    if (srcBlk->pred_size() > 1) {
+      srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+      ++cloned;
+    }
+
+    preBlk = srcBlk;
+    srcBlk = *srcBlk->succ_begin();
+  }
+
+  return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+                                                 BlockT *predBlk) {
+  assert(predBlk->isSuccessor(curBlk) &&
+         "succBlk is not a prececessor of curBlk");
+
+  BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
+  CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+  //srcBlk, oldBlk, newBlk
+
+  predBlk->removeSuccessor(curBlk);
+  predBlk->addSuccessor(cloneBlk);
+
+  // add all successor to cloneBlk
+  CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+  numClonedInstr += curBlk->size();
+
+  if (DEBUGME) {
+    errs() << "Cloned block: " << "BB"
+           << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+  }
+
+  SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+  return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+                                               BlockT *exitingBlk) {
+  BlockT *exitBlk = NULL;
+
+  for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+       iterSuccEnd = exitingBlk->succ_end();
+       iterSucc != iterSuccEnd; ++iterSucc) {
+    BlockT *curBlk = *iterSucc;
+    if (!loopRep->contains(curBlk)) {
+      assert(exitBlk == NULL);
+      exitBlk = curBlk;
+    }
+  }
+
+  assert(exitBlk != NULL);
+
+  return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+                                                BlockT *dstBlk,
+                                                InstrIterator insertPos) {
+  InstrIterator spliceEnd;
+  //look for the input branchinstr, not the AMDGPU branchinstr
+  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+  if (branchInstr == NULL) {
+    if (DEBUGME) {
+      errs() << "migrateInstruction don't see branch instr\n" ;
+    }
+    spliceEnd = srcBlk->end();
+  } else {
+    if (DEBUGME) {
+      errs() << "migrateInstruction see branch instr\n" ;
+      branchInstr->dump();
+    }
+    spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+  }
+  if (DEBUGME) {
+    errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+      << "srcSize = " << srcBlk->size() << "\n";
+  }
+
+  //splice insert before insertPos
+  dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+  if (DEBUGME) {
+    errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+      << "srcSize = " << srcBlk->size() << "\n";
+  }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+//   B1:
+//        uncond_br LoopHeader
+//
+// to
+//   B1:
+//        cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+// 
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+  BlockT *loopHeader;
+  BlockT *loopLatch;
+  loopHeader = LoopRep->getHeader();
+  loopLatch = LoopRep->getLoopLatch();
+  BlockT *dummyExitBlk = NULL;
+  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+  if (loopHeader!=NULL && loopLatch!=NULL) {
+    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+    if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+      dummyExitBlk = funcRep->CreateMachineBasicBlock();
+      funcRep->push_back(dummyExitBlk);  //insert to function
+      SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+      if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+      typename BlockT::iterator insertPos =
+        CFGTraits::getInstrPos(loopLatch, branchInstr);
+      unsigned immReg =
+        funcRep->getRegInfo().createVirtualRegister(I32RC);
+      CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+      InstrT *newInstr = 
+        CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
+      MachineInstrBuilder MIB(*funcRep, newInstr);
+      MIB.addMBB(loopHeader);
+      MIB.addReg(immReg, false);
+
+      SHOWNEWINSTR(newInstr);
+
+      branchInstr->eraseFromParent();
+      loopLatch->addSuccessor(dummyExitBlk);
+    }
+  }
+
+  return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+  InstrT *branchInstr;
+
+  // I saw two unconditional branch in one basic block in example
+  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+  while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+          && CFGTraits::isUncondBranch(branchInstr)) {
+    if (DEBUGME) {
+          errs() << "Removing unconditional branch instruction" ;
+      branchInstr->dump();
+    }
+    branchInstr->eraseFromParent();
+  }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+  if (srcBlk->succ_size() == 2) {
+    BlockT *blk1 = *srcBlk->succ_begin();
+    BlockT *blk2 = *(++srcBlk->succ_begin());
+
+    if (blk1 == blk2) {
+      InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+      assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+      if (DEBUGME) {
+        errs() << "Removing unneeded conditional branch instruction" ;
+        branchInstr->dump();
+      }
+      branchInstr->eraseFromParent();
+      SHOWNEWBLK(blk1, "Removing redundant successor");
+      srcBlk->removeSuccessor(blk1);
+    }
+  }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+                                               DEFAULT_VEC_SLOTS> &retBlks) {
+  BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+  funcRep->push_back(dummyExitBlk);  //insert to function
+  CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
+
+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+         retBlks.begin(),
+       iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+    BlockT *curBlk = *iter;
+    InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+    if (curInstr) {
+      curInstr->eraseFromParent();
+    }
+    curBlk->addSuccessor(dummyExitBlk);
+    if (DEBUGME) {
+      errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+             << " successors\n";
+    }
+  } //for
+
+  SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+  while (srcBlk->succ_size()) {
+    srcBlk->removeSuccessor(*srcBlk->succ_begin());
+  }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+  if (srcBlkInfo == NULL) {
+    srcBlkInfo = new BlockInfo();
+  }
+
+  srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+  return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+  if (DEBUGME) {
+        errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+  }
+
+  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+  if (srcBlkInfo == NULL) {
+    srcBlkInfo = new BlockInfo();
+  }
+
+  srcBlkInfo->isRetired = true;
+  assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+         && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+  return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  while (loopRep && loopRep->getHeader() == curBlk) {
+    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+    if(loopLand == NULL)
+      return true;
+
+    BlockT *landBlk = loopLand->landBlk;
+    assert(landBlk);
+    if (!isRetiredBlock(landBlk)) {
+      return true;
+    }
+
+    loopRep = loopRep->getParentLoop();
+  }
+
+  return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+  const unsigned blockSizeThreshold = 30;
+  const unsigned cloneInstrThreshold = 100;
+
+  bool multiplePreds = blk && (blk->pred_size() > 1);
+
+  if(!multiplePreds)
+    return false;
+
+  unsigned blkSize = blk->size();
+  return ((blkSize > blockSizeThreshold)
+          && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+                                            BlockTSmallerVector &exitBlks,
+                                            std::set<BlockT *> &exitBlkSet) {
+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
+
+  for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+       predIterEnd = landBlk->pred_end();
+       predIter != predIterEnd; ++predIter) {
+    BlockT *curBlk = *predIter;
+    if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+      inpathBlks.push_back(curBlk);
+    }
+  } //for
+
+  //if landBlk has predecessors that are not in the given loop,
+  //create a new block
+  BlockT *newLandBlk = landBlk;
+  if (inpathBlks.size() != landBlk->pred_size()) {
+    newLandBlk = funcRep->CreateMachineBasicBlock();
+    funcRep->push_back(newLandBlk);  //insert to function
+    newLandBlk->addSuccessor(landBlk);
+    for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+         inpathBlks.begin(),
+         iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+      BlockT *curBlk = *iter;
+      CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+      //srcBlk, oldBlk, newBlk
+      curBlk->removeSuccessor(landBlk);
+      curBlk->addSuccessor(newLandBlk);
+    }
+    for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+      if (exitBlks[i] == landBlk) {
+        exitBlks[i] = newLandBlk;
+      }
+    }
+    SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+  }
+
+  setLoopLandBlock(loopRep, newLandBlk);
+
+  return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  assert(theEntry->landBlk == NULL);
+
+  if (blk == NULL) {
+    blk = funcRep->CreateMachineBasicBlock();
+    funcRep->push_back(blk);  //insert to function
+    SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+  }
+
+  theEntry->landBlk = blk;
+
+  if (DEBUGME) {
+    errs() << "setLoopLandBlock loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  landing-block = BB" << blk->getNumber() << "\n";
+  }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+
+  theEntry->breakOnRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopBreakOnReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->contOnRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopContOnReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->breakInitRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopBreakInitReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->contInitRegs.insert(regNum);
+
+  if (DEBUGME) {
+    errs() << "addLoopContInitReg loop-header = BB"
+           << loopRep->getHeader()->getNumber()
+           << "  regNum = " << regNum << "\n";
+  }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+                                                     RegiT regNum) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  if (theEntry == NULL) {
+    theEntry = new LoopLandInfo();
+  }
+  theEntry->endbranchInitRegs.insert(regNum);
+
+  if (DEBUGME) {
+        errs() << "addLoopEndbranchInitReg loop-header = BB"
+      << loopRep->getHeader()->getNumber()
+      << "  regNum = " << regNum << "\n";
+  }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+  return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+  if (loopRep == NULL)
+    return false;
+
+  BlockT *loopHeader = loopRep->getHeader();
+
+  return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+  return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+  int count = 0;
+  while (iterStart != iterEnd) {
+    if (!isRetiredBlock(*iterStart)) {
+      ++count;
+    }
+    ++iterStart;
+  }
+
+  return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+  if (postDomTree->dominates(blk1, blk2)) {
+    return blk1;
+  }
+  if (postDomTree->dominates(blk2, blk1)) {
+    return blk2;
+  }
+
+  DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+  DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+  // Handle newly cloned node.
+  if (node1 == NULL && blk1->succ_size() == 1) {
+    return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+  }
+  if (node2 == NULL && blk2->succ_size() == 1) {
+    return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+  }
+
+  if (node1 == NULL || node2 == NULL) {
+    return NULL;
+  }
+
+  node1 = node1->getIDom();
+  while (node1) {
+    if (postDomTree->dominates(node1, node2)) {
+      return node1->getBlock();
+    }
+    node1 = node1->getIDom();
+  }
+
+  return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+  BlockT *commonDom;
+  typename std::set<BlockT *>::const_iterator iter = blks.begin();
+  typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+  for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+    BlockT *curBlk = *iter;
+    if (curBlk != commonDom) {
+      commonDom = findNearestCommonPostDom(curBlk, commonDom);
+    }
+  }
+
+  if (DEBUGME) {
+    errs() << "Common post dominator for exit blocks is ";
+    if (commonDom) {
+          errs() << "BB" << commonDom->getNumber() << "\n";
+    } else {
+      errs() << "NULL\n";
+    }
+  }
+
+  return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDGPU
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGStructurizer : public MachineFunctionPass {
+public:
+  typedef MachineInstr              InstructionType;
+  typedef MachineFunction           FunctionType;
+  typedef MachineBasicBlock         BlockType;
+  typedef MachineLoopInfo           LoopinfoType;
+  typedef MachineDominatorTree      DominatortreeType;
+  typedef MachinePostDominatorTree  PostDominatortreeType;
+  typedef MachineDomTreeNode        DomTreeNodeType;
+  typedef MachineLoop               LoopType;
+
+protected:
+  TargetMachine &TM;
+  const TargetInstrInfo *TII;
+  const AMDGPURegisterInfo *TRI;
+
+public:
+  AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
+  const TargetInstrInfo *getTargetInstrInfo() const;
+
+private:
+
+};
+
+} //end of namespace llvm
+AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
+  TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
+}
+
+const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
+  return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
+public:
+  static char ID;
+
+public:
+  AMDGPUCFGPrepare(TargetMachine &tm);
+
+  virtual const char *getPassName() const;
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
+  : AMDGPUCFGStructurizer(ID, tm )  {
+}
+const char *AMDGPUCFGPrepare::getPassName() const {
+  return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<MachineFunctionAnalysis>();
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachinePostDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm {
+class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
+public:
+  static char ID;
+
+public:
+  AMDGPUCFGPerform(TargetMachine &tm);
+  virtual const char *getPassName() const;
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+};
+
+char AMDGPUCFGPerform::ID = 0;
+} //end of namespace llvm
+
+  AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
+: AMDGPUCFGStructurizer(ID, tm) {
+}
+
+const char *AMDGPUCFGPerform::getPassName() const {
+  return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<MachineFunctionAnalysis>();
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachinePostDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDGPUCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct {
+// this class is tailor to the AMDGPU backend
+template<>
+struct CFGStructTraits<AMDGPUCFGStructurizer> {
+  typedef int RegiT;
+
+  static int getBranchNzeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+    case AMDGPU::JUMP_COND:
+    case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+    case AMDGPU::BRANCH_COND_i32:
+    case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+    default:
+      assert(0 && "internal error");
+    }
+    return -1;
+  }
+
+  static int getBranchZeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+    case AMDGPU::JUMP_COND:
+    case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
+    case AMDGPU::BRANCH_COND_i32:
+    case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+    default:
+      assert(0 && "internal error");
+    }
+    return -1;
+  }
+
+  static int getContinueNzeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+    case AMDGPU::JUMP_COND:
+    case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+    default:
+      assert(0 && "internal error");
+    };
+    return -1;
+  }
+
+  static int getContinueZeroOpcode(int oldOpcode) {
+    switch(oldOpcode) {
+    case AMDGPU::JUMP_COND:
+    case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+    default:
+      assert(0 && "internal error");
+    }
+    return -1;
+  }
+
+  static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+    return instr->getOperand(0).getMBB();
+  }
+
+  static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+    instr->getOperand(0).setMBB(blk);
+  }
+
+  static MachineBasicBlock *
+  getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+    assert(blk->succ_size() == 2);
+    MachineBasicBlock *trueBranch = getTrueBranch(instr);
+    MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+    MachineBasicBlock::succ_iterator iterNext = iter;
+    ++iterNext;
+
+    return (*iter == trueBranch) ? *iterNext : *iter;
+  }
+
+  static bool isCondBranch(MachineInstr *instr) {
+    switch (instr->getOpcode()) {
+      case AMDGPU::JUMP_COND:
+      case AMDGPU::BRANCH_COND_i32:
+      case AMDGPU::BRANCH_COND_f32:
+      break;
+    default:
+      return false;
+    }
+    return true;
+  }
+
+  static bool isUncondBranch(MachineInstr *instr) {
+    switch (instr->getOpcode()) {
+    case AMDGPU::JUMP:
+    case AMDGPU::BRANCH:
+      return true;
+    default:
+      return false;
+    }
+    return true;
+  }
+
+  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+    //get DebugLoc from the first MachineBasicBlock instruction with debug info
+    DebugLoc DL;
+    for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getDebugLoc().isUnknown() == false) {
+        DL = instr->getDebugLoc();
+      }
+    }
+    return DL;
+  }
+
+  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+    MachineInstr *instr = &*iter;
+    if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+      return instr;
+    }
+    return NULL;
+  }
+
+  // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+  //
+  // BB with backward-edge could have move instructions after the branch
+  // instruction.  Such move instruction "belong to" the loop backward-edge.
+  //
+  static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+    const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
+                                  blk->getParent()->getTarget().getInstrInfo());
+
+    for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+         iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+      // FIXME: Simplify
+      MachineInstr *instr = &*iter;
+      if (instr) {
+        if (isCondBranch(instr) || isUncondBranch(instr)) {
+          return instr;
+        } else if (!TII->isMov(instr->getOpcode())) {
+          break;
+        }
+      }
+    }
+    return NULL;
+  }
+
+  static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+    if (iter != blk->rend()) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getOpcode() == AMDGPU::RETURN) {
+        return instr;
+      }
+    }
+    return NULL;
+  }
+
+  static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+    if (iter != blk->rend()) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getOpcode() == AMDGPU::CONTINUE) {
+        return instr;
+      }
+    }
+    return NULL;
+  }
+
+  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+    for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+      MachineInstr *instr = &(*iter);
+      if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
+        return instr;
+      }
+    }
+    return NULL;
+  }
+
+  static bool isReturnBlock(MachineBasicBlock *blk) {
+    MachineInstr *instr = getReturnInstr(blk);
+    bool isReturn = (blk->succ_size() == 0);
+    if (instr) {
+      assert(isReturn);
+    } else if (isReturn) {
+      if (DEBUGME) {
+        errs() << "BB" << blk->getNumber()
+               <<" is return block without RETURN instr\n";
+      }
+    }
+
+    return  isReturn;
+  }
+
+  static MachineBasicBlock::iterator
+  getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+    assert(instr->getParent() == blk && "instruction doesn't belong to block");
+    MachineBasicBlock::iterator iter = blk->begin();
+    MachineBasicBlock::iterator iterEnd = blk->end();
+    while (&(*iter) != instr && iter != iterEnd) {
+      ++iter;
+    }
+
+    assert(iter != iterEnd);
+    return iter;
+  }//getInstrPos
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+                                         AMDGPUCFGStructurizer *passRep) {
+    return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+  } //insertInstrBefore
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+                                         AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    MachineBasicBlock::iterator res;
+    if (blk->begin() != blk->end()) {
+      blk->insert(blk->begin(), newInstr);
+    } else {
+      blk->push_back(newInstr);
+    }
+
+    SHOWNEWINSTR(newInstr);
+
+    return newInstr;
+  } //insertInstrBefore
+
+  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+                             AMDGPUCFGStructurizer *passRep) {
+    insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+  } //insertInstrEnd
+
+  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+                             AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+   MachineInstr *newInstr = blk->getParent()
+      ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    blk->push_back(newInstr);
+    //assume the instruction doesn't take any reg operand ...
+
+    SHOWNEWINSTR(newInstr);
+  } //insertInstrEnd
+
+  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+                                         int newOpcode, 
+                                         AMDGPUCFGStructurizer *passRep) {
+    MachineInstr *oldInstr = &(*instrPos);
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineBasicBlock *blk = oldInstr->getParent();
+    MachineInstr *newInstr =
+      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+                                           DebugLoc());
+
+    blk->insert(instrPos, newInstr);
+    //assume the instruction doesn't take any reg operand ...
+
+    SHOWNEWINSTR(newInstr);
+    return newInstr;
+  } //insertInstrBefore
+
+  static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+                                     int newOpcode,
+                                     AMDGPUCFGStructurizer *passRep,
+                                     DebugLoc DL) {
+    MachineInstr *oldInstr = &(*instrPos);
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineBasicBlock *blk = oldInstr->getParent();
+    MachineFunction *MF = blk->getParent();
+    MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    blk->insert(instrPos, newInstr);
+    MachineInstrBuilder MIB(*MF, newInstr);
+    MIB.addReg(oldInstr->getOperand(1).getReg(), false);
+
+    SHOWNEWINSTR(newInstr);
+    //erase later oldInstr->eraseFromParent();
+  } //insertCondBranchBefore
+
+  static void insertCondBranchBefore(MachineBasicBlock *blk,
+                                     MachineBasicBlock::iterator insertPos,
+                                     int newOpcode,
+                                     AMDGPUCFGStructurizer *passRep,
+                                     RegiT regNum,
+                                     DebugLoc DL) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineFunction *MF = blk->getParent();
+
+    MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
+
+    //insert before
+    blk->insert(insertPos, newInstr);
+    MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+    SHOWNEWINSTR(newInstr);
+  } //insertCondBranchBefore
+
+  static void insertCondBranchEnd(MachineBasicBlock *blk,
+                                  int newOpcode,
+                                  AMDGPUCFGStructurizer *passRep,
+                                  RegiT regNum) {
+    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+    MachineFunction *MF = blk->getParent();
+    MachineInstr *newInstr =
+      MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+    blk->push_back(newInstr);
+    MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
+
+    SHOWNEWINSTR(newInstr);
+  } //insertCondBranchEnd
+
+
+  static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+                                      AMDGPUCFGStructurizer *passRep,
+                                      RegiT regNum, int regVal) {
+    MachineInstr *oldInstr = &(*instrPos);
+    const AMDGPUInstrInfo *tii =
+             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+    MachineBasicBlock *blk = oldInstr->getParent();
+    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+                                                 regVal);
+    blk->insert(instrPos, newInstr);
+
+    SHOWNEWINSTR(newInstr);
+  } //insertAssignInstrBefore
+
+  static void insertAssignInstrBefore(MachineBasicBlock *blk,
+                                      AMDGPUCFGStructurizer *passRep,
+                                      RegiT regNum, int regVal) {
+    const AMDGPUInstrInfo *tii =
+             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+
+    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+                                                 regVal);
+    if (blk->begin() != blk->end()) {
+      blk->insert(blk->begin(), newInstr);
+    } else {
+      blk->push_back(newInstr);
+    }
+
+    SHOWNEWINSTR(newInstr);
+
+  } //insertInstrBefore
+
+  static void insertCompareInstrBefore(MachineBasicBlock *blk,
+                                       MachineBasicBlock::iterator instrPos,
+                                       AMDGPUCFGStructurizer *passRep,
+                                       RegiT dstReg, RegiT src1Reg,
+                                       RegiT src2Reg) {
+    const AMDGPUInstrInfo *tii =
+             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
+    MachineFunction *MF = blk->getParent();
+    MachineInstr *newInstr =
+      MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
+
+    MachineInstrBuilder MIB(*MF, newInstr);
+    MIB.addReg(dstReg, RegState::Define); //set target
+    MIB.addReg(src1Reg); //set src value
+    MIB.addReg(src2Reg); //set src value
+
+    blk->insert(instrPos, newInstr);
+    SHOWNEWINSTR(newInstr);
+
+  } //insertCompareInstrBefore
+
+  static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+                                 MachineBasicBlock *srcBlk) {
+    for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+         iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+      dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
+    }
+  } //cloneSuccessorList
+
+  static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+    MachineFunction *func = srcBlk->getParent();
+    MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+    func->push_back(newBlk);  //insert to function
+    for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+         iterEnd = srcBlk->end();
+         iter != iterEnd; ++iter) {
+      MachineInstr *instr = func->CloneMachineInstr(iter);
+      newBlk->push_back(instr);
+    }
+    return newBlk;
+  }
+
+  //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+  //the AMDGPU instruction is not recognized as terminator fix this and retire
+  //this routine
+  static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+                                         MachineBasicBlock *oldBlk,
+                                         MachineBasicBlock *newBlk) {
+    MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+    if (branchInstr && isCondBranch(branchInstr) &&
+        getTrueBranch(branchInstr) == oldBlk) {
+      setTrueBranch(branchInstr, newBlk);
+    }
+  }
+
+  static void wrapup(MachineBasicBlock *entryBlk) {
+    assert((!entryBlk->getParent()->getJumpTableInfo()
+            || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+           && "found a jump table");
+
+     //collect continue right before endloop
+     SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+     MachineBasicBlock::iterator pre = entryBlk->begin();
+     MachineBasicBlock::iterator iterEnd = entryBlk->end();
+     MachineBasicBlock::iterator iter = pre;
+     while (iter != iterEnd) {
+       if (pre->getOpcode() == AMDGPU::CONTINUE
+           && iter->getOpcode() == AMDGPU::ENDLOOP) {
+         contInstr.push_back(pre);
+       }
+       pre = iter;
+       ++iter;
+     } //end while
+
+     //delete continue right before endloop
+     for (unsigned i = 0; i < contInstr.size(); ++i) {
+        contInstr[i]->eraseFromParent();
+     }
+
+     // TODO to fix up jump table so later phase won't be confused.  if
+     // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+     // there isn't such an interface yet.  alternatively, replace all the other
+     // blocks in the jump table with the entryBlk //}
+
+  } //wrapup
+
+  static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
+    return &pass.getAnalysis<MachineDominatorTree>();
+  }
+
+  static MachinePostDominatorTree*
+  getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
+    return &pass.getAnalysis<MachinePostDominatorTree>();
+  }
+
+  static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
+    return &pass.getAnalysis<MachineLoopInfo>();
+  }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDGPUCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
+                                                 ) {
+  return new AMDGPUCFGPrepare(tm );
+}
+
+bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+  return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
+                                                                        *this,
+                                                                        TRI);
+}
+
+// createAMDGPUCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
+                                                  ) {
+  return new AMDGPUCFGPerform(tm );
+}
+
+bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
+  return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
+                                                                    *this,
+                                                                    TRI);
+}
diff --git a/lib/Target/R600/AMDILDevice.cpp b/lib/Target/R600/AMDILDevice.cpp
new file mode 100644
index 000000000000..db8e01ea4043
--- /dev/null
+++ b/lib/Target/R600/AMDILDevice.cpp
@@ -0,0 +1,132 @@
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) {
+  mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+  mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
+  setCaps();
+  DeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDGPUDevice::~AMDGPUDevice() {
+    mHWBits.clear();
+    mSWBits.clear();
+}
+
+size_t AMDGPUDevice::getMaxGDSSize() const {
+  return 0;
+}
+
+uint32_t 
+AMDGPUDevice::getDeviceFlag() const {
+  return DeviceFlag;
+}
+
+size_t AMDGPUDevice::getMaxNumCBs() const {
+  if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+    return HW_MAX_NUM_CB;
+  }
+
+  return 0;
+}
+
+size_t AMDGPUDevice::getMaxCBSize() const {
+  if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
+    return MAX_CB_SIZE;
+  }
+
+  return 0;
+}
+
+size_t AMDGPUDevice::getMaxScratchSize() const {
+  return 65536;
+}
+
+uint32_t AMDGPUDevice::getStackAlignment() const {
+  return 16;
+}
+
+void AMDGPUDevice::setCaps() {
+  mSWBits.set(AMDGPUDeviceInfo::HalfOps);
+  mSWBits.set(AMDGPUDeviceInfo::ByteOps);
+  mSWBits.set(AMDGPUDeviceInfo::ShortOps);
+  mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+  if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
+    mSWBits.set(AMDGPUDeviceInfo::NoInline);
+  }
+  if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
+    mSWBits.set(AMDGPUDeviceInfo::MacroDB);
+  }
+  if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+    mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
+  } else {
+    mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
+  }
+  if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+    mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
+  } else {
+    mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
+  }
+  if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
+    mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
+  }
+  mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+  mSWBits.set(AMDGPUDeviceInfo::LongOps);
+}
+
+AMDGPUDeviceInfo::ExecutionMode
+AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const {
+  if (mHWBits[Caps]) {
+    assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+    return AMDGPUDeviceInfo::Hardware;
+  }
+
+  if (mSWBits[Caps]) {
+    assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+    return AMDGPUDeviceInfo::Software;
+  }
+
+  return AMDGPUDeviceInfo::Unsupported;
+
+}
+
+bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const {
+  return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
+}
+
+bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const {
+  return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
+}
+
+bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const {
+  return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
+}
+
+std::string
+AMDGPUDevice::getDataLayout() const {
+  std::string DataLayout = std::string(
+   "e"
+   "-p:32:32:32"
+   "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+   "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128"
+   "-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+   "-n32:64"
+  );
+
+  if (usesHardware(AMDGPUDeviceInfo::DoubleOps)) {
+    DataLayout.append("-f64:64:64");
+  }
+
+  return DataLayout;
+}
diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h
new file mode 100644
index 000000000000..97df98cafb2a
--- /dev/null
+++ b/lib/Target/R600/AMDILDevice.h
@@ -0,0 +1,117 @@
+//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+//
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEIMPL_H
+#define AMDILDEVICEIMPL_H
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+  class AMDGPUSubtarget;
+  class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDGPUDevice {
+public:
+  AMDGPUDevice(AMDGPUSubtarget *ST);
+  virtual ~AMDGPUDevice();
+
+  // Enum values for the various memory types.
+  enum {
+    RAW_UAV_ID   = 0,
+    ARENA_UAV_ID = 1,
+    LDS_ID       = 2,
+    GDS_ID       = 3,
+    SCRATCH_ID   = 4,
+    CONSTANT_ID  = 5,
+    GLOBAL_ID    = 6,
+    MAX_IDS      = 7
+  } IO_TYPE_IDS;
+
+  /// \returns The max LDS size that the hardware supports.  Size is in
+  /// bytes.
+  virtual size_t getMaxLDSSize() const = 0;
+
+  /// \returns The max GDS size that the hardware supports if the GDS is
+  /// supported by the hardware.  Size is in bytes.
+  virtual size_t getMaxGDSSize() const;
+
+  /// \returns The max number of hardware constant address spaces that
+  /// are supported by this device.
+  virtual size_t getMaxNumCBs() const;
+
+  /// \returns The max number of bytes a single hardware constant buffer
+  /// can support.  Size is in bytes.
+  virtual size_t getMaxCBSize() const;
+
+  /// \returns The max number of bytes allowed by the hardware scratch
+  /// buffer.  Size is in bytes.
+  virtual size_t getMaxScratchSize() const;
+
+  /// \brief Get the flag that corresponds to the device.
+  virtual uint32_t getDeviceFlag() const;
+
+  /// \returns The number of work-items that exist in a single hardware
+  /// wavefront.
+  virtual size_t getWavefrontSize() const = 0;
+
+  /// \brief Get the generational name of this specific device.
+  virtual uint32_t getGeneration() const = 0;
+
+  /// \brief Get the stack alignment of this specific device.
+  virtual uint32_t getStackAlignment() const;
+
+  /// \brief Get the resource ID for this specific device.
+  virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+  /// \brief Get the max number of UAV's for this device.
+  virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+  // API utilizing more detailed capabilities of each family of
+  // cards. If a capability is supported, then either usesHardware or
+  // usesSoftware returned true.  If usesHardware returned true, then
+  // usesSoftware must return false for the same capability.  Hardware
+  // execution means that the feature is done natively by the hardware
+  // and is not emulated by the softare.  Software execution means
+  // that the feature could be done in the hardware, but there is
+  // software that emulates it with possibly using the hardware for
+  // support since the hardware does not fully comply with OpenCL
+  // specs.
+
+  bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
+  bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
+  bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
+  virtual std::string getDataLayout() const;
+  static const unsigned int MAX_LDS_SIZE_700 = 16384;
+  static const unsigned int MAX_LDS_SIZE_800 = 32768;
+  static const unsigned int WavefrontSize = 64;
+  static const unsigned int HalfWavefrontSize = 32;
+  static const unsigned int QuarterWavefrontSize = 16;
+protected:
+  virtual void setCaps();
+  BitVector mHWBits;
+  llvm::BitVector mSWBits;
+  AMDGPUSubtarget *mSTM;
+  uint32_t DeviceFlag;
+private:
+  AMDGPUDeviceInfo::ExecutionMode
+  getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
+};
+
+} // namespace llvm
+#endif // AMDILDEVICEIMPL_H
diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp
new file mode 100644
index 000000000000..9605fbe63340
--- /dev/null
+++ b/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -0,0 +1,94 @@
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDGPUSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+namespace AMDGPUDeviceInfo {
+
+AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
+                                AMDGPUSubtarget *ptr,
+                                bool is64bit, bool is64on32bit) {
+  if (deviceName.c_str()[2] == '7') {
+    switch (deviceName.c_str()[3]) {
+    case '1':
+      return new AMDGPU710Device(ptr);
+    case '7':
+      return new AMDGPU770Device(ptr);
+    default:
+      return new AMDGPU7XXDevice(ptr);
+    }
+  } else if (deviceName == "cypress") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPUCypressDevice(ptr);
+  } else if (deviceName == "juniper") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPUEvergreenDevice(ptr);
+  } else if (deviceName == "redwood") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPURedwoodDevice(ptr);
+  } else if (deviceName == "cedar") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPUCedarDevice(ptr);
+  } else if (deviceName == "barts" || deviceName == "turks") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPUNIDevice(ptr);
+  } else if (deviceName == "cayman") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPUCaymanDevice(ptr);
+  } else if (deviceName == "caicos") {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPUNIDevice(ptr);
+  } else if (deviceName == "SI") {
+    return new AMDGPUSIDevice(ptr);
+  } else {
+#if DEBUG
+    assert(!is64bit && "This device does not support 64bit pointers!");
+    assert(!is64on32bit && "This device does not support 64bit"
+          " on 32bit pointers!");
+#endif
+    return new AMDGPU7XXDevice(ptr);
+  }
+}
+} // End namespace AMDGPUDeviceInfo
+} // End namespace llvm
diff --git a/lib/Target/R600/AMDILDeviceInfo.h b/lib/Target/R600/AMDILDeviceInfo.h
new file mode 100644
index 000000000000..4b2c3a53c79f
--- /dev/null
+++ b/lib/Target/R600/AMDILDeviceInfo.h
@@ -0,0 +1,88 @@
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDILDEVICEINFO_H
+#define AMDILDEVICEINFO_H
+
+
+#include <string>
+
+namespace llvm {
+  class AMDGPUDevice;
+  class AMDGPUSubtarget;
+  namespace AMDGPUDeviceInfo {
+    /// Each Capabilities can be executed using a hardware instruction,
+    /// emulated with a sequence of software instructions, or not
+    /// supported at all.
+    enum ExecutionMode {
+      Unsupported = 0, ///< Unsupported feature on the card(Default value)
+       /// This is the execution mode that is set if the feature is emulated in
+       /// software.
+      Software,
+      /// This execution mode is set if the feature exists natively in hardware
+      Hardware
+    };
+
+    enum Caps {
+      HalfOps          = 0x1,  ///< Half float is supported or not.
+      DoubleOps        = 0x2,  ///< Double is supported or not.
+      ByteOps          = 0x3,  ///< Byte(char) is support or not.
+      ShortOps         = 0x4,  ///< Short is supported or not.
+      LongOps          = 0x5,  ///< Long is supported or not.
+      Images           = 0x6,  ///< Images are supported or not.
+      ByteStores       = 0x7,  ///< ByteStores available(!HD4XXX).
+      ConstantMem      = 0x8,  ///< Constant/CB memory.
+      LocalMem         = 0x9,  ///< Local/LDS memory.
+      PrivateMem       = 0xA,  ///< Scratch/Private/Stack memory.
+      RegionMem        = 0xB,  ///< OCL GDS Memory Extension.
+      FMA              = 0xC,  ///< Use HW FMA or SW FMA.
+      ArenaSegment     = 0xD,  ///< Use for Arena UAV per pointer 12-1023.
+      MultiUAV         = 0xE,  ///< Use for UAV per Pointer 0-7.
+      Reserved0        = 0xF,  ///< ReservedFlag
+      NoAlias          = 0x10, ///< Cached loads.
+      Signed24BitOps   = 0x11, ///< Peephole Optimization.
+      /// Debug mode implies that no hardware features or optimizations
+      /// are performned and that all memory access go through a single
+      /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+      Debug            = 0x12,
+      CachedMem        = 0x13, ///< Cached mem is available or not.
+      BarrierDetect    = 0x14, ///< Detect duplicate barriers.
+      Reserved1        = 0x15, ///< Reserved flag
+      ByteLDSOps       = 0x16, ///< Flag to specify if byte LDS ops are available.
+      ArenaVectors     = 0x17, ///< Flag to specify if vector loads from arena work.
+      TmrReg           = 0x18, ///< Flag to specify if Tmr register is supported.
+      NoInline         = 0x19, ///< Flag to specify that no inlining should occur.
+      MacroDB          = 0x1A, ///< Flag to specify that backend handles macrodb.
+      HW64BitDivMod    = 0x1B, ///< Flag for backend to generate 64bit div/mod.
+      ArenaUAV         = 0x1C, ///< Flag to specify that arena uav is supported.
+      PrivateUAV       = 0x1D, ///< Flag to specify that private memory uses uav's.
+      /// If more capabilities are required, then
+      /// this number needs to be increased.
+      /// All capabilities must come before this
+      /// number.
+      MaxNumberCapabilities = 0x20
+    };
+    /// These have to be in order with the older generations
+    /// having the lower number enumerations.
+    enum Generation {
+      HD4XXX = 0, ///< 7XX based devices.
+      HD5XXX, ///< Evergreen based devices.
+      HD6XXX, ///< NI/Evergreen+ based devices.
+      HD7XXX, ///< Southern Islands based devices.
+      HDTEST, ///< Experimental feature testing device.
+      HDNUMGEN
+    };
+
+
+  AMDGPUDevice*
+    getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
+                      bool is64bit = false, bool is64on32bit = false);
+  } // namespace AMDILDeviceInfo
+} // namespace llvm
+#endif // AMDILDEVICEINFO_H
diff --git a/lib/Target/R600/AMDILDevices.h b/lib/Target/R600/AMDILDevices.h
new file mode 100644
index 000000000000..636fa6d35947
--- /dev/null
+++ b/lib/Target/R600/AMDILDevices.h
@@ -0,0 +1,19 @@
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#ifndef AMDIL_DEVICES_H
+#define AMDIL_DEVICES_H
+// Include all of the device specific header files
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // AMDIL_DEVICES_H
diff --git a/lib/Target/R600/AMDILEvergreenDevice.cpp b/lib/Target/R600/AMDILEvergreenDevice.cpp
new file mode 100644
index 000000000000..c5213a041005
--- /dev/null
+++ b/lib/Target/R600/AMDILEvergreenDevice.cpp
@@ -0,0 +1,169 @@
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
+: AMDGPUDevice(ST) {
+  setCaps();
+  std::string name = ST->getDeviceName();
+  if (name == "cedar") {
+    DeviceFlag = OCL_DEVICE_CEDAR;
+  } else if (name == "redwood") {
+    DeviceFlag = OCL_DEVICE_REDWOOD;
+  } else if (name == "cypress") {
+    DeviceFlag = OCL_DEVICE_CYPRESS;
+  } else {
+    DeviceFlag = OCL_DEVICE_JUNIPER;
+  }
+}
+
+AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
+}
+
+size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
+  if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_800;
+  } else {
+    return 0;
+  }
+}
+size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
+  if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+    return MAX_LDS_SIZE_800;
+  } else {
+    return 0;
+  }
+}
+uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
+  return 12;
+}
+
+uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
+  switch(id) {
+  default:
+    assert(0 && "ID type passed in is unknown!");
+    break;
+  case CONSTANT_ID:
+  case RAW_UAV_ID:
+    return GLOBAL_RETURN_RAW_UAV_ID;
+  case GLOBAL_ID:
+  case ARENA_UAV_ID:
+    return DEFAULT_ARENA_UAV_ID;
+  case LDS_ID:
+    if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+      return DEFAULT_LDS_ID;
+    } else {
+      return DEFAULT_ARENA_UAV_ID;
+    }
+  case GDS_ID:
+    if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
+      return DEFAULT_GDS_ID;
+    } else {
+      return DEFAULT_ARENA_UAV_ID;
+    }
+  case SCRATCH_ID:
+    if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
+      return DEFAULT_SCRATCH_ID;
+    } else {
+      return DEFAULT_ARENA_UAV_ID;
+    }
+  };
+  return 0;
+}
+
+size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
+  return AMDGPUDevice::WavefrontSize;
+}
+
+uint32_t AMDGPUEvergreenDevice::getGeneration() const {
+  return AMDGPUDeviceInfo::HD5XXX;
+}
+
+void AMDGPUEvergreenDevice::setCaps() {
+  mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+  mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
+  mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
+  mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
+  mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+  if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
+    mHWBits.set(AMDGPUDeviceInfo::ByteStores);
+  }
+  if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
+    mSWBits.set(AMDGPUDeviceInfo::LocalMem);
+    mSWBits.set(AMDGPUDeviceInfo::RegionMem);
+  } else {
+    mHWBits.set(AMDGPUDeviceInfo::LocalMem);
+    mHWBits.set(AMDGPUDeviceInfo::RegionMem);
+  }
+  mHWBits.set(AMDGPUDeviceInfo::Images);
+  if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
+    mHWBits.set(AMDGPUDeviceInfo::NoAlias);
+  }
+  mHWBits.set(AMDGPUDeviceInfo::CachedMem);
+  if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
+    mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
+  }
+  mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
+  mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
+  mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
+  mHWBits.set(AMDGPUDeviceInfo::LongOps);
+  mSWBits.reset(AMDGPUDeviceInfo::LongOps);
+  mHWBits.set(AMDGPUDeviceInfo::TmrReg);
+}
+
+AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
+  : AMDGPUEvergreenDevice(ST) {
+  setCaps();
+}
+
+AMDGPUCypressDevice::~AMDGPUCypressDevice() {
+}
+
+void AMDGPUCypressDevice::setCaps() {
+  if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+    mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+    mHWBits.set(AMDGPUDeviceInfo::FMA);
+  }
+}
+
+
+AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
+  : AMDGPUEvergreenDevice(ST) {
+  setCaps();
+}
+
+AMDGPUCedarDevice::~AMDGPUCedarDevice() {
+}
+
+void AMDGPUCedarDevice::setCaps() {
+  mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPUCedarDevice::getWavefrontSize() const {
+  return AMDGPUDevice::QuarterWavefrontSize;
+}
+
+AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
+  : AMDGPUEvergreenDevice(ST) {
+  setCaps();
+}
+
+AMDGPURedwoodDevice::~AMDGPURedwoodDevice() {
+}
+
+void AMDGPURedwoodDevice::setCaps() {
+  mSWBits.set(AMDGPUDeviceInfo::FMA);
+}
+
+size_t AMDGPURedwoodDevice::getWavefrontSize() const {
+  return AMDGPUDevice::HalfWavefrontSize;
+}
diff --git a/lib/Target/R600/AMDILEvergreenDevice.h b/lib/Target/R600/AMDILEvergreenDevice.h
new file mode 100644
index 000000000000..ea90f774a856
--- /dev/null
+++ b/lib/Target/R600/AMDILEvergreenDevice.h
@@ -0,0 +1,93 @@
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef AMDILEVERGREENDEVICE_H
+#define AMDILEVERGREENDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevice.h"
+
+namespace llvm {
+  class AMDGPUSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
+/// series of cards.
+///
+/// This class contains information required to differentiate
+/// the Evergreen device from the generic AMDGPUDevice. This device represents
+/// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDGPUEvergreenDevice : public AMDGPUDevice {
+public:
+  AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
+  virtual ~AMDGPUEvergreenDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual size_t getMaxGDSSize() const;
+  virtual size_t getWavefrontSize() const;
+  virtual uint32_t getGeneration() const;
+  virtual uint32_t getMaxNumUAVs() const;
+  virtual uint32_t getResourceID(uint32_t) const;
+protected:
+  virtual void setCaps();
+};
+
+/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
+/// support for double precision operations. This device is used to represent
+/// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+/// and HD59XX cards.
+class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
+public:
+  AMDGPUCypressDevice(AMDGPUSubtarget *ST);
+  virtual ~AMDGPUCypressDevice();
+private:
+  virtual void setCaps();
+};
+
+
+/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
+/// devices.
+///
+/// This class differs from the base AMDGPUEvergreenDevice in that the
+/// device is a ~quarter of the 'Juniper'. These are commercially known as the
+/// HD54XX and HD53XX series of cards.
+class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
+public:
+  AMDGPUCedarDevice(AMDGPUSubtarget *ST);
+  virtual ~AMDGPUCedarDevice();
+  virtual size_t getWavefrontSize() const;
+private:
+  virtual void setCaps();
+};
+
+/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
+/// devices.
+///
+/// This class differs from the base class, in that these devices are
+/// considered about half of a 'Juniper' device. These are commercially known as
+/// the HD55XX and HD56XX series of cards.
+class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
+public:
+  AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
+  virtual ~AMDGPURedwoodDevice();
+  virtual size_t getWavefrontSize() const;
+private:
+  virtual void setCaps();
+};
+  
+} // namespace llvm
+#endif // AMDILEVERGREENDEVICE_H
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fa8f62de9c0a
--- /dev/null
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -0,0 +1,643 @@
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDGPURegisterInfo.h"
+#include "AMDILDevices.h"
+#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+  // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+  // make the right decision when generating code for different targets.
+  const AMDGPUSubtarget &Subtarget;
+public:
+  AMDGPUDAGToDAGISel(TargetMachine &TM);
+  virtual ~AMDGPUDAGToDAGISel();
+
+  SDNode *Select(SDNode *N);
+  virtual const char *getPassName() const;
+  virtual void PostprocessISelDAG();
+
+private:
+  inline SDValue getSmallIPtrImm(unsigned Imm);
+  bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
+
+  // Complex pattern selectors
+  bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+  bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+  static bool checkType(const Value *ptr, unsigned int addrspace);
+  static const Value *getBasePointerValue(const Value *V);
+
+  static bool isGlobalStore(const StoreSDNode *N);
+  static bool isPrivateStore(const StoreSDNode *N);
+  static bool isLocalStore(const StoreSDNode *N);
+  static bool isRegionStore(const StoreSDNode *N);
+
+  static bool isCPLoad(const LoadSDNode *N);
+  static bool isConstantLoad(const LoadSDNode *N, int cbID);
+  static bool isGlobalLoad(const LoadSDNode *N);
+  static bool isParamLoad(const LoadSDNode *N);
+  static bool isPrivateLoad(const LoadSDNode *N);
+  static bool isLocalLoad(const LoadSDNode *N);
+  static bool isRegionLoad(const LoadSDNode *N);
+
+  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+  bool SelectGlobalValueVariableOffset(SDValue Addr,
+      SDValue &BaseReg, SDValue& Offset);
+  bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+  // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+}  // end anonymous namespace
+
+/// \brief This pass converts a legalized DAG into a AMDGPU-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
+                                       ) {
+  return new AMDGPUDAGToDAGISel(TM);
+}
+
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
+                                     )
+  : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) {
+}
+
+AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
+}
+
+SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+  return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRParam(
+    SDValue Addr, SDValue& R1, SDValue& R2) {
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      R2 = CurDAG->getTargetConstant(0, MVT::i32);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, MVT::i32);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, MVT::i32);
+  }
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+  return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress) {
+    return false;
+  }
+
+  if (Addr.getOpcode() == ISD::FrameIndex) {
+    if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+      R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+      R2 = CurDAG->getTargetConstant(0, MVT::i64);
+    } else {
+      R1 = Addr;
+      R2 = CurDAG->getTargetConstant(0, MVT::i64);
+    }
+  } else if (Addr.getOpcode() == ISD::ADD) {
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+  } else {
+    R1 = Addr;
+    R2 = CurDAG->getTargetConstant(0, MVT::i64);
+  }
+  return true;
+}
+
+SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+  unsigned int Opc = N->getOpcode();
+  if (N->isMachineOpcode()) {
+    return NULL;   // Already selected.
+  }
+  switch (Opc) {
+  default: break;
+  case ISD::BUILD_VECTOR: {
+    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+      break;
+    }
+    // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+    // that adds a 128 bits reg copy when going through TwoAddressInstructions
+    // pass. We want to avoid 128 bits copies as much as possible because they
+    // can't be bundled by our scheduler.
+    SDValue RegSeqArgs[9] = {
+      CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
+      SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
+    };
+    bool IsRegSeq = true;
+    for (unsigned i = 0; i < N->getNumOperands(); i++) {
+      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+        IsRegSeq = false;
+        break;
+      }
+      RegSeqArgs[2 * i + 1] = N->getOperand(i);
+    }
+    if (!IsRegSeq)
+      break;
+    return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+        RegSeqArgs, 2 * N->getNumOperands() + 1);
+  }
+  case ISD::ConstantFP:
+  case ISD::Constant: {
+    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+    // XXX: Custom immediate lowering not implemented yet.  Instead we use
+    // pseudo instructions defined in SIInstructions.td
+    if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+      break;
+    }
+    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
+    uint64_t ImmValue = 0;
+    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+
+    if (N->getOpcode() == ISD::ConstantFP) {
+      // XXX: 64-bit Immediates not supported yet
+      assert(N->getValueType(0) != MVT::f64);
+
+      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
+      APFloat Value = C->getValueAPF();
+      float FloatValue = Value.convertToFloat();
+      if (FloatValue == 0.0) {
+        ImmReg = AMDGPU::ZERO;
+      } else if (FloatValue == 0.5) {
+        ImmReg = AMDGPU::HALF;
+      } else if (FloatValue == 1.0) {
+        ImmReg = AMDGPU::ONE;
+      } else {
+        ImmValue = Value.bitcastToAPInt().getZExtValue();
+      }
+    } else {
+      // XXX: 64-bit Immediates not supported yet
+      assert(N->getValueType(0) != MVT::i64);
+
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+      if (C->getZExtValue() == 0) {
+        ImmReg = AMDGPU::ZERO;
+      } else if (C->getZExtValue() == 1) {
+        ImmReg = AMDGPU::ONE_INT;
+      } else {
+        ImmValue = C->getZExtValue();
+      }
+    }
+
+    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
+                              Use != SDNode::use_end(); Use = Next) {
+      Next = llvm::next(Use);
+      std::vector<SDValue> Ops;
+      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
+        Ops.push_back(Use->getOperand(i));
+      }
+
+      if (!Use->isMachineOpcode()) {
+          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+            // We can only use literal constants (e.g. AMDGPU::ZERO,
+            // AMDGPU::ONE, etc) in machine opcodes.
+            continue;
+          }
+      } else {
+        if (!TII->isALUInstr(Use->getMachineOpcode()) ||
+            (TII->get(Use->getMachineOpcode()).TSFlags &
+            R600_InstFlag::VECTOR)) {
+          continue;
+        }
+
+        int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM);
+        assert(ImmIdx != -1);
+
+        // subtract one from ImmIdx, because the DST operand is usually index
+        // 0 for MachineInstrs, but we have no DST in the Ops vector.
+        ImmIdx--;
+
+        // Check that we aren't already using an immediate.
+        // XXX: It's possible for an instruction to have more than one
+        // immediate operand, but this is not supported yet.
+        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
+          assert(C);
+
+          if (C->getZExtValue() != 0) {
+            // This instruction is already using an immediate.
+            continue;
+          }
+
+          // Set the immediate value
+          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
+        }
+      }
+      // Set the immediate register
+      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
+
+      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
+    }
+    break;
+  }
+  }
+  SDNode *Result = SelectCode(N);
+
+  // Fold operands of selected node
+
+  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    const R600InstrInfo *TII =
+        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+    if (Result && Result->isMachineOpcode() &&
+        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
+        && TII->isALUInstr(Result->getMachineOpcode())) {
+      // Fold FNEG/FABS/CONST_ADDRESS
+      // TODO: Isel can generate multiple MachineInst, we need to recursively
+      // parse Result
+      bool IsModified = false;
+      do {
+        std::vector<SDValue> Ops;
+        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+            I != E; ++I)
+          Ops.push_back(*I);
+        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
+        if (IsModified) {
+          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
+        }
+      } while (IsModified);
+
+      // If node has a single use which is CLAMP_R600, folds it
+      if (Result->hasOneUse() && Result->isMachineOpcode()) {
+        SDNode *PotentialClamp = *Result->use_begin();
+        if (PotentialClamp->isMachineOpcode() &&
+            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
+          unsigned ClampIdx =
+            TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
+          std::vector<SDValue> Ops;
+          unsigned NumOp = Result->getNumOperands();
+          for (unsigned i = 0; i < NumOp; ++i) {
+            Ops.push_back(Result->getOperand(i));
+          }
+          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+          Result = CurDAG->SelectNodeTo(PotentialClamp,
+              Result->getMachineOpcode(), PotentialClamp->getVTList(),
+              Ops.data(), NumOp);
+        }
+      }
+    }
+  }
+
+  return Result;
+}
+
+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
+    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+  int OperandIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1),
+    TII->getOperandIdx(Opcode, R600Operands::SRC2)
+  };
+  int SelIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
+    TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
+  };
+  int NegIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG),
+    TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG)
+  };
+  int AbsIdx[] = {
+    TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS),
+    TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS),
+    -1
+  };
+
+  for (unsigned i = 0; i < 3; i++) {
+    if (OperandIdx[i] < 0)
+      return false;
+    SDValue Operand = Ops[OperandIdx[i] - 1];
+    switch (Operand.getOpcode()) {
+    case AMDGPUISD::CONST_ADDRESS: {
+      SDValue CstOffset;
+      if (Operand.getValueType().isVector() ||
+          !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+        break;
+
+      // Gather others constants values
+      std::vector<unsigned> Consts;
+      for (unsigned j = 0; j < 3; j++) {
+        int SrcIdx = OperandIdx[j];
+        if (SrcIdx < 0)
+          break;
+        if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+          if (Reg->getReg() == AMDGPU::ALU_CONST) {
+            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+            Consts.push_back(Cst->getZExtValue());
+          }
+        }
+      }
+
+      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+      Consts.push_back(Cst->getZExtValue());
+      if (!TII->fitsConstReadLimitations(Consts))
+        break;
+
+      Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+      Ops[SelIdx[i] - 1] = CstOffset;
+      return true;
+      }
+    case ISD::FNEG:
+      if (NegIdx[i] < 0)
+        break;
+      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+      Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+      return true;
+    case ISD::FABS:
+      if (AbsIdx[i] < 0)
+        break;
+      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+      Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+      return true;
+    case ISD::BITCAST:
+      Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+      return true;
+    default:
+      break;
+    }
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+  if (!ptr) {
+    return false;
+  }
+  Type *ptrType = ptr->getType();
+  return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) {
+  if (!V) {
+    return NULL;
+  }
+  const Value *ret = NULL;
+  ValueMap<const Value *, bool> ValueBitMap;
+  std::queue<const Value *, std::list<const Value *> > ValueQueue;
+  ValueQueue.push(V);
+  while (!ValueQueue.empty()) {
+    V = ValueQueue.front();
+    if (ValueBitMap.find(V) == ValueBitMap.end()) {
+      ValueBitMap[V] = true;
+      if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+        ret = V;
+        break;
+      } else if (dyn_cast<GlobalVariable>(V)) {
+        ret = V;
+        break;
+      } else if (dyn_cast<Constant>(V)) {
+        const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+        if (CE) {
+          ValueQueue.push(CE->getOperand(0));
+        }
+      } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+        ret = AI;
+        break;
+      } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+        uint32_t numOps = I->getNumOperands();
+        for (uint32_t x = 0; x < numOps; ++x) {
+          ValueQueue.push(I->getOperand(x));
+        }
+      } else {
+        assert(!"Found a Value that we didn't know how to handle!");
+      }
+    }
+    ValueQueue.pop();
+  }
+  return ret;
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+  return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+          && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+          && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
+}
+
+bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+  if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
+    return true;
+  }
+  MachineMemOperand *MMO = N->getMemOperand();
+  const Value *V = MMO->getValue();
+  const Value *BV = getBasePointerValue(V);
+  if (MMO
+      && MMO->getValue()
+      && ((V && dyn_cast<GlobalValue>(V))
+          || (BV && dyn_cast<GlobalValue>(
+                        getBasePointerValue(MMO->getValue()))))) {
+    return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
+  } else {
+    return false;
+  }
+}
+
+bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) {
+  return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+}
+
+bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+  MachineMemOperand *MMO = N->getMemOperand();
+  if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+    if (MMO) {
+      const Value *V = MMO->getValue();
+      const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+  if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+    // Check to make sure we are not a constant pool load or a constant load
+    // that is marked as a private load
+    if (isCPLoad(N) || isConstantLoad(N, -1)) {
+      return false;
+    }
+  }
+  if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
+    return true;
+  }
+  return false;
+}
+
+const char *AMDGPUDAGToDAGISel::getPassName() const {
+  return "AMDGPU DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+///==== AMDGPU Functions ====///
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+    SDValue& IntPtr) {
+  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+    SDValue& BaseReg, SDValue &Offset) {
+  if (!dyn_cast<ConstantSDNode>(Addr)) {
+    BaseReg = Addr;
+    Offset = CurDAG->getIntPtrConstant(0, true);
+    return true;
+  }
+  return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) {
+  ConstantSDNode * IMMOffset;
+
+  if (Addr.getOpcode() == ISD::ADD
+      && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && isInt<16>(IMMOffset->getZExtValue())) {
+
+      Base = Addr.getOperand(0);
+      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+      return true;
+  // If the pointer address is constant, we can move it to the offset field.
+  } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+             && isInt<16>(IMMOffset->getZExtValue())) {
+    Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                  CurDAG->getEntryNode().getDebugLoc(),
+                                  AMDGPU::ZERO, MVT::i32);
+    Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+    return true;
+  }
+
+  // Default case, no offset
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  ConstantSDNode *C;
+
+  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+  } else {
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  }
+
+  return true;
+}
+
+void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
+
+  // Go over all selected nodes and try to fold them a bit more
+  const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI);
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ++I) {
+
+    MachineSDNode *Node = dyn_cast<MachineSDNode>(I);
+    if (!Node)
+      continue;
+
+    SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG);
+    if (ResNode != Node)
+      ReplaceUses(Node, ResNode);
+  }
+}
+
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
new file mode 100644
index 000000000000..922cac12b98e
--- /dev/null
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -0,0 +1,647 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief TargetLowering functions borrowed from AMDIL.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::InitAMDILLowering() {
+  int types[] = {
+    (int)MVT::i8,
+    (int)MVT::i16,
+    (int)MVT::i32,
+    (int)MVT::f32,
+    (int)MVT::f64,
+    (int)MVT::i64,
+    (int)MVT::v2i8,
+    (int)MVT::v4i8,
+    (int)MVT::v2i16,
+    (int)MVT::v4i16,
+    (int)MVT::v4f32,
+    (int)MVT::v4i32,
+    (int)MVT::v2f32,
+    (int)MVT::v2i32,
+    (int)MVT::v2f64,
+    (int)MVT::v2i64
+  };
+
+  int IntTypes[] = {
+    (int)MVT::i8,
+    (int)MVT::i16,
+    (int)MVT::i32,
+    (int)MVT::i64
+  };
+
+  int FloatTypes[] = {
+    (int)MVT::f32,
+    (int)MVT::f64
+  };
+
+  int VectorTypes[] = {
+    (int)MVT::v2i8,
+    (int)MVT::v4i8,
+    (int)MVT::v2i16,
+    (int)MVT::v4i16,
+    (int)MVT::v4f32,
+    (int)MVT::v4i32,
+    (int)MVT::v2f32,
+    (int)MVT::v2i32,
+    (int)MVT::v2f64,
+    (int)MVT::v2i64
+  };
+  size_t NumTypes = sizeof(types) / sizeof(*types);
+  size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+  size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+  size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+  const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
+  // These are the current register classes that are
+  // supported
+
+  for (unsigned int x  = 0; x < NumTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+    // We cannot sextinreg, expand to shifts
+    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+    setOperationAction(ISD::SUBE, VT, Expand);
+    setOperationAction(ISD::SUBC, VT, Expand);
+    setOperationAction(ISD::ADDE, VT, Expand);
+    setOperationAction(ISD::ADDC, VT, Expand);
+    setOperationAction(ISD::BRCOND, VT, Custom);
+    setOperationAction(ISD::BR_JT, VT, Expand);
+    setOperationAction(ISD::BRIND, VT, Expand);
+    // TODO: Implement custom UREM/SREM routines
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+    if (VT != MVT::i64 && VT != MVT::v2i64) {
+      setOperationAction(ISD::SDIV, VT, Custom);
+    }
+  }
+  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+    // IL does not have these operations for floating point types
+    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+    setOperationAction(ISD::SETOLT, VT, Expand);
+    setOperationAction(ISD::SETOGE, VT, Expand);
+    setOperationAction(ISD::SETOGT, VT, Expand);
+    setOperationAction(ISD::SETOLE, VT, Expand);
+    setOperationAction(ISD::SETULT, VT, Expand);
+    setOperationAction(ISD::SETUGE, VT, Expand);
+    setOperationAction(ISD::SETUGT, VT, Expand);
+    setOperationAction(ISD::SETULE, VT, Expand);
+  }
+
+  for (unsigned int x = 0; x < NumIntTypes; ++x) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+    // GPU also does not have divrem function for signed or unsigned
+    setOperationAction(ISD::SDIVREM, VT, Expand);
+
+    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+    // GPU doesn't have a rotl, rotr, or byteswap instruction
+    setOperationAction(ISD::ROTR, VT, Expand);
+    setOperationAction(ISD::BSWAP, VT, Expand);
+
+    // GPU doesn't have any counting operators
+    setOperationAction(ISD::CTPOP, VT, Expand);
+    setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTLZ, VT, Expand);
+  }
+
+  for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    // setOperationAction(ISD::VSETCC, VT, Expand);
+    setOperationAction(ISD::SELECT_CC, VT, Expand);
+
+  }
+  if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
+    setOperationAction(ISD::MULHU, MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+    setOperationAction(ISD::MULHS, MVT::i64, Expand);
+    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
+    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+  }
+  if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
+    // we support loading/storing v2f64 but not operations on the type
+    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
+    // We want to expand vector conversions into their scalar
+    // counterparts.
+    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+    setOperationAction(ISD::FABS, MVT::f64, Expand);
+    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+  }
+  // TODO: Fix the UDIV24 algorithm so it works for these
+  // types correctly. This needs vector comparisons
+  // for this to work correctly.
+  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+  setOperationAction(ISD::SUBC, MVT::Other, Expand);
+  setOperationAction(ISD::ADDE, MVT::Other, Expand);
+  setOperationAction(ISD::ADDC, MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+
+  // Use the default implementation.
+  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
+  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
+
+  setSchedulingPreference(Sched::RegPressure);
+  setPow2DivIsCheap(false);
+  setSelectIsExpensive(true);
+  setJumpIsExpensive(true);
+
+  MaxStoresPerMemcpy  = 4096;
+  MaxStoresPerMemmove = 4096;
+  MaxStoresPerMemset  = 4096;
+
+}
+
+bool
+AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+    const CallInst &I, unsigned Intrinsic) const {
+  return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool
+AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
+    const SDValue Op,
+    APInt &KnownZero,
+    APInt &KnownOne,
+    const SelectionDAG &DAG,
+    unsigned Depth) const {
+  APInt KnownZero2;
+  APInt KnownOne2;
+  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+  switch (Op.getOpcode()) {
+    default: break;
+    case ISD::SELECT_CC:
+             DAG.ComputeMaskedBits(
+                 Op.getOperand(1),
+                 KnownZero,
+                 KnownOne,
+                 Depth + 1
+                 );
+             DAG.ComputeMaskedBits(
+                 Op.getOperand(0),
+                 KnownZero2,
+                 KnownOne2
+                 );
+             assert((KnownZero & KnownOne) == 0
+                 && "Bits known to be one AND zero?");
+             assert((KnownZero2 & KnownOne2) == 0
+                 && "Bits known to be one AND zero?");
+             // Only known if known in both the LHS and RHS
+             KnownOne &= KnownOne2;
+             KnownZero &= KnownZero2;
+             break;
+  };
+}
+
+//===----------------------------------------------------------------------===//
+//                           Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::i64) {
+    DST = LowerSDIV64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i32) {
+    DST = LowerSDIV32(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i16
+      || OVT.getScalarType() == MVT::i8) {
+    DST = LowerSDIV24(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+  EVT OVT = Op.getValueType();
+  SDValue DST;
+  if (OVT.getScalarType() == MVT::i64) {
+    DST = LowerSREM64(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i32) {
+    DST = LowerSREM32(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i16) {
+    DST = LowerSREM16(Op, DAG);
+  } else if (OVT.getScalarType() == MVT::i8) {
+    DST = LowerSREM8(Op, DAG);
+  } else {
+    DST = SDValue(Op.getNode(), 0);
+  }
+  return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Data = Op.getOperand(0);
+  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+  DebugLoc DL = Op.getDebugLoc();
+  EVT DVT = Data.getValueType();
+  EVT BVT = BaseType->getVT();
+  unsigned baseBits = BVT.getScalarType().getSizeInBits();
+  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+  unsigned shiftBits = srcBits - baseBits;
+  if (srcBits < 32) {
+    // If the op is less than 32 bits, then it needs to extend to 32bits
+    // so it can properly keep the upper bits valid.
+    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+    shiftBits = 32 - baseBits;
+    DVT = IVT;
+  }
+  SDValue Shift = DAG.getConstant(shiftBits, DVT);
+  // Shift left by 'Shift' bits.
+  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+  // Signed shift Right by 'Shift' bits.
+  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+  if (srcBits < 32) {
+    // Once the sign extension is done, the op needs to be converted to
+    // its original type.
+    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+  }
+  return Data;
+}
+EVT
+AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
+  int iSize = (size * numEle);
+  int vEle = (iSize >> ((size == 64) ? 6 : 5));
+  if (!vEle) {
+    vEle = 1;
+  }
+  if (size == 64) {
+    if (vEle == 1) {
+      return EVT(MVT::i64);
+    } else {
+      return EVT(MVT::getVectorVT(MVT::i64, vEle));
+    }
+  } else {
+    if (vEle == 1) {
+      return EVT(MVT::i32);
+    } else {
+      return EVT(MVT::getVectorVT(MVT::i32, vEle));
+    }
+  }
+}
+
+SDValue
+AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Cond  = Op.getOperand(1);
+  SDValue Jump  = Op.getOperand(2);
+  SDValue Result;
+  Result = DAG.getNode(
+      AMDGPUISD::BRANCH_COND,
+      Op.getDebugLoc(),
+      Op.getValueType(),
+      Chain, Jump, Cond);
+  return Result;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  MVT INTTY;
+  MVT FLTTY;
+  if (!OVT.isVector()) {
+    INTTY = MVT::i32;
+    FLTTY = MVT::f32;
+  } else if (OVT.getVectorNumElements() == 2) {
+    INTTY = MVT::v2i32;
+    FLTTY = MVT::v2f32;
+  } else if (OVT.getVectorNumElements() == 4) {
+    INTTY = MVT::v4i32;
+    FLTTY = MVT::v4f32;
+  }
+  unsigned bitsize = OVT.getScalarType().getSizeInBits();
+  // char|short jq = ia ^ ib;
+  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+  // jq = jq >> (bitsize - 2)
+  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 
+
+  // jq = jq | 0x1
+  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+  // jq = (int)jq
+  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+  // int ia = (int)LHS;
+  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+  // int ib, (int)RHS;
+  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+  // float fa = (float)ia;
+  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+  // float fb = (float)ib;
+  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+  // float fq = native_divide(fa, fb);
+  SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
+
+  // fq = trunc(fq);
+  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+  // float fqneg = -fq;
+  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+  // float fr = mad(fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+      DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+
+  // int iq = (int)fq;
+  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+  // fr = fabs(fr);
+  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+  // fb = fabs(fb);
+  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+  // int cv = fr >= fb;
+  SDValue cv;
+  if (INTTY == MVT::i32) {
+    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+  } else {
+    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+  }
+  // jq = (cv ? jq : 0);
+  jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, 
+      DAG.getConstant(0, OVT));
+  // dst = iq + jq;
+  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+  return iq;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerSDIV32 function generates equivalent to the following IL.
+  // mov r0, LHS
+  // mov r1, RHS
+  // ilt r10, r0, 0
+  // ilt r11, r1, 0
+  // iadd r0, r0, r10
+  // iadd r1, r1, r11
+  // ixor r0, r0, r10
+  // ixor r1, r1, r11
+  // udiv r0, r0, r1
+  // ixor r10, r10, r11
+  // iadd r0, r0, r10
+  // ixor DST, r0, r10
+
+  // mov r0, LHS
+  SDValue r0 = LHS;
+
+  // mov r1, RHS
+  SDValue r1 = RHS;
+
+  // ilt r10, r0, 0
+  SDValue r10 = DAG.getSelectCC(DL,
+      r0, DAG.getConstant(0, OVT),
+      DAG.getConstant(-1, MVT::i32),
+      DAG.getConstant(0, MVT::i32),
+      ISD::SETLT);
+
+  // ilt r11, r1, 0
+  SDValue r11 = DAG.getSelectCC(DL,
+      r1, DAG.getConstant(0, OVT),
+      DAG.getConstant(-1, MVT::i32),
+      DAG.getConstant(0, MVT::i32),
+      ISD::SETLT);
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // iadd r1, r1, r11
+  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+  // ixor r0, r0, r10
+  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+  // ixor r1, r1, r11
+  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+  // udiv r0, r0, r1
+  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+  // ixor r10, r10, r11
+  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // ixor DST, r0, r10
+  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 
+  return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+  return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2i8) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4i8) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+  return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  MVT INTTY = MVT::i32;
+  if (OVT == MVT::v2i16) {
+    INTTY = MVT::v2i32;
+  } else if (OVT == MVT::v4i16) {
+    INTTY = MVT::v4i32;
+  }
+  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+  return LHS;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT OVT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  // The LowerSREM32 function generates equivalent to the following IL.
+  // mov r0, LHS
+  // mov r1, RHS
+  // ilt r10, r0, 0
+  // ilt r11, r1, 0
+  // iadd r0, r0, r10
+  // iadd r1, r1, r11
+  // ixor r0, r0, r10
+  // ixor r1, r1, r11
+  // udiv r20, r0, r1
+  // umul r20, r20, r1
+  // sub r0, r0, r20
+  // iadd r0, r0, r10
+  // ixor DST, r0, r10
+
+  // mov r0, LHS
+  SDValue r0 = LHS;
+
+  // mov r1, RHS
+  SDValue r1 = RHS;
+
+  // ilt r10, r0, 0
+  SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+  // ilt r11, r1, 0
+  SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // iadd r1, r1, r11
+  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+  // ixor r0, r0, r10
+  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+  // ixor r1, r1, r11
+  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+  // udiv r20, r0, r1
+  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+  // umul r20, r20, r1
+  r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+  // sub r0, r0, r20
+  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+  // iadd r0, r0, r10
+  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+  // ixor DST, r0, r10
+  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 
+  return DST;
+}
+
+SDValue
+AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+  return SDValue(Op.getNode(), 0);
+}
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
new file mode 100644
index 000000000000..110f1476513b
--- /dev/null
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -0,0 +1,207 @@
+//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file describes the AMDIL instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+// AMDIL Instruction Predicate Definitions
+// Predicate that is set to true if the hardware supports double precision
+// divide
+def HasHWDDiv                 : Predicate<"Subtarget.device()"
+                           "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
+              "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware supports double, but not double
+// precision divide in hardware
+def HasSWDDiv             : Predicate<"Subtarget.device()"
+                           "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+              "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
+
+// Predicate that is set to true if the hardware support 24bit signed
+// math ops. Otherwise a software expansion to 32bit math ops is used instead.
+def HasHWSign24Bit          : Predicate<"Subtarget.device()"
+                            "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is set to true if 64bit operations are supported or not
+def HasHW64Bit              : Predicate<"Subtarget.device()"
+                            "->usesHardware(AMDGPUDeviceInfo::LongOps)">;
+def HasSW64Bit              : Predicate<"Subtarget.device()"
+                            "->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
+
+// Predicate that is set to true if the timer register is supported
+def HasTmrRegister          : Predicate<"Subtarget.device()"
+                            "->isSupported(AMDGPUDeviceInfo::TmrReg)">;
+// Predicate that is true if we are at least evergreen series
+def HasDeviceIDInst         : Predicate<"Subtarget.device()"
+                            "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
+
+// Predicate that is true if we have region address space.
+def hasRegionAS             : Predicate<"Subtarget.device()"
+                            "->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
+
+// Predicate that is false if we don't have region address space.
+def noRegionAS             : Predicate<"!Subtarget.device()"
+                            "->isSupported(AMDGPUDeviceInfo::RegionMem)">;
+
+
+// Predicate that is set to true if 64bit Mul is supported in the IL or not
+def HasHW64Mul              : Predicate<"Subtarget.calVersion()" 
+                                          ">= CAL_VERSION_SC_139"
+                                          "&& Subtarget.device()"
+                                          "->getGeneration() >="
+                                          "AMDGPUDeviceInfo::HD5XXX">;
+def HasSW64Mul              : Predicate<"Subtarget.calVersion()" 
+                                          "< CAL_VERSION_SC_139">;
+// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
+def HasHW64DivMod           : Predicate<"Subtarget.device()"
+                            "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+def HasSW64DivMod           : Predicate<"Subtarget.device()"
+                            "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
+
+// Predicate that is set to true if 64bit pointer are used.
+def Has64BitPtr             : Predicate<"Subtarget.is64bit()">;
+def Has32BitPtr             : Predicate<"!Subtarget.is64bit()">;
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget   : Operand<OtherVT>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Type Profiles
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Generic Profile Types
+//===----------------------------------------------------------------------===//
+
+def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
+    SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+    ]>;
+def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
+    SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
+    ]>;
+def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
+    SDTCisEltOfVec<1, 0>
+    ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+    SDTCisVT<0, OtherVT>
+    ]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Selection DAG Nodes
+//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond      : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag       : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+    [SDNPHasChain, SDNPOptInGlue]>;
+
+//===--------------------------------------------------------------------===//
+// Instructions
+//===--------------------------------------------------------------------===//
+// Floating point math functions
+def IL_div_inf      : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Integer functions
+//===----------------------------------------------------------------------===//
+def IL_umul        : SDNode<"AMDGPUISD::UMUL"    , SDTIntBinOp,
+    [SDNPCommutative, SDNPAssociative]>;
+
+//===--------------------------------------------------------------------===//
+// Custom Pattern DAG Nodes
+//===--------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+    (store node:$val, node:$ptr), [{
+        return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Load pattern fragments
+//===----------------------------------------------------------------------===//
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+    return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Complex addressing mode patterns
+//===----------------------------------------------------------------------===//
+def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
+def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
+def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
+def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format classes
+//===----------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+     let Namespace = "AMDGPU";
+     dag OutOperandList = outs;
+     dag InOperandList = ins;
+     let Pattern = pattern;
+     let AsmString = !strconcat(asmstr, "\n");
+     let isPseudo = 1;
+     let Itinerary = NullALU;
+     bit hasIEEEFlag = 0;
+     bit hasZeroOpFlag = 0;
+     let mayLoad = 0;
+     let mayStore = 0;
+     let hasSideEffects = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// Multiclass Instruction formats
+//===--------------------------------------------------------------------===//
+// Multiclass that handles branch instructions
+multiclass BranchConditional<SDNode Op> {
+    def _i32 : ILFormat<(outs),
+  (ins brtarget:$target, GPRI32:$src0),
+        "; i32 Pseudo branch instruction",
+  [(Op bb:$target, GPRI32:$src0)]>;
+    def _f32 : ILFormat<(outs),
+  (ins brtarget:$target, GPRF32:$src0),
+        "; f32 Pseudo branch instruction",
+  [(Op bb:$target, GPRF32:$src0)]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+  def _i32 : ILFormat<(outs), (ins GPRI32:$src),
+      !strconcat(name, " $src"), []>;
+  def _f32 : ILFormat<(outs), (ins GPRF32:$src),
+      !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+  def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
+      !strconcat(name, " $src0, $src1"), []>;
+  def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
+      !strconcat(name, " $src0, $src1"), []>;
+}
+
+//===--------------------------------------------------------------------===//
+// Intrinsics support
+//===--------------------------------------------------------------------===//
+include "AMDILIntrinsics.td"
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDILIntrinsicInfo.cpp
new file mode 100644
index 000000000000..4ddb057d80a7
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsicInfo.cpp
@@ -0,0 +1,79 @@
+//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Implementation of the IntrinsicInfo class.
+//
+//===-----------------------------------------------------------------------===//
+
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDIL.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) 
+  : TargetIntrinsicInfo() {
+}
+
+std::string 
+AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
+    unsigned int numTys) const  {
+  static const char* const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  if (IntrID < Intrinsic::num_intrinsics) {
+    return 0;
+  }
+  assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
+      && "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned int
+AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const  {
+#define GET_FUNCTION_RECOGNIZER
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  AMDGPUIntrinsic::ID IntrinsicID
+    = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+  IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
+
+  if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
+    return IntrinsicID;
+  }
+  return 0;
+}
+
+bool 
+AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const  {
+  // Overload Table
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+}
+
+Function*
+AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+    Type **Tys,
+    unsigned numTys) const  {
+  llvm_unreachable("Not implemented");
+}
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDILIntrinsicInfo.h
new file mode 100644
index 000000000000..35559e23fceb
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsicInfo.h
@@ -0,0 +1,49 @@
+//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
+//
+//===-----------------------------------------------------------------------===//
+#ifndef AMDIL_INTRINSICS_H
+#define AMDIL_INTRINSICS_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+class TargetMachine;
+
+namespace AMDGPUIntrinsic {
+enum ID {
+  last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "AMDGPUGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+      , num_AMDGPU_intrinsics
+};
+
+} // end namespace AMDGPUIntrinsic
+
+class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
+public:
+  AMDGPUIntrinsicInfo(TargetMachine *tm);
+  std::string getName(unsigned int IntrId, Type **Tys = 0,
+                      unsigned int numTys = 0) const;
+  unsigned int lookupName(const char *Name, unsigned int Len) const;
+  bool isOverloaded(unsigned int IID) const;
+  Function *getDeclaration(Module *M, unsigned int ID,
+                           Type **Tys = 0,
+                           unsigned int numTys = 0) const;
+};
+
+} // end namespace llvm
+
+#endif // AMDIL_INTRINSICS_H
+
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
new file mode 100644
index 000000000000..6ec3559af24c
--- /dev/null
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -0,0 +1,232 @@
+//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines all of the amdil-specific intrinsics
+//
+//===---------------------------------------------------------------===//
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+     class VoidIntLong :
+          Intrinsic<[llvm_i64_ty], [], []>;
+     class VoidIntInt :
+          Intrinsic<[llvm_i32_ty], [], []>;
+     class VoidIntBool :
+          Intrinsic<[llvm_i32_ty], [], []>;
+     class UnaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+     class UnaryIntFloat :
+          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+     class ConvertIntFTOI :
+          Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+     class ConvertIntITOF :
+          Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+     class UnaryIntNoRetInt :
+          Intrinsic<[], [llvm_anyint_ty], []>;
+     class UnaryIntNoRetFloat :
+          Intrinsic<[], [llvm_anyfloat_ty], []>;
+     class BinaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+     class BinaryIntFloat :
+          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+     class BinaryIntNoRetInt :
+          Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+     class BinaryIntNoRetFloat :
+          Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+     class TernaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+          LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+     class TernaryIntFloat :
+          Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+          LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+     class QuaternaryIntInt :
+          Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+          LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+     class UnaryAtomicInt :
+          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class BinaryAtomicInt :
+          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class TernaryAtomicInt :
+          Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+     class UnaryAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class BinaryAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+     class TernaryAtomicIntNoRet :
+          Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
+
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+  def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
+
+  def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
+          TernaryIntInt;
+  def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
+          TernaryIntInt;
+  def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
+          UnaryIntInt;
+  def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
+          UnaryIntInt;
+  def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
+          UnaryIntInt;
+  def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
+          UnaryIntInt;
+  def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
+          UnaryIntInt;
+  def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
+                    TernaryIntInt;
+  def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
+                    TernaryIntInt;
+  def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
+                    QuaternaryIntInt;
+  def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
+      TernaryIntInt;
+  def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
+      BinaryIntInt;
+  def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
+          BinaryIntInt;
+  def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
+          BinaryIntInt;
+  def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
+          BinaryIntInt;
+  def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
+          BinaryIntInt;
+  def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
+          BinaryIntInt;
+  def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
+          BinaryIntInt;
+  def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
+          BinaryIntInt;
+  def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
+          BinaryIntInt;
+  def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
+          BinaryIntInt;
+  def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
+          BinaryIntInt;
+  def int_AMDIL_min     : GCCBuiltin<"__amdil_min">,
+          BinaryIntFloat;
+  def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
+          BinaryIntInt;
+  def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
+          BinaryIntInt;
+  def int_AMDIL_max     : GCCBuiltin<"__amdil_max">,
+          BinaryIntFloat;
+  def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
+          TernaryIntInt;
+  def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
+          TernaryIntInt;
+  def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
+          TernaryIntInt;
+  def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
+          UnaryIntFloat;
+  def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
+          TernaryIntFloat;
+  def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
+          UnaryIntFloat;
+  def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
+          UnaryIntFloat;
+  def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
+          UnaryIntFloat;
+  def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
+          UnaryIntFloat;
+  def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
+          UnaryIntFloat;
+  def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
+          UnaryIntFloat;
+  def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
+          UnaryIntFloat;
+  def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
+          UnaryIntFloat;
+  def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
+          UnaryIntFloat;
+  def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
+          UnaryIntFloat;
+  def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
+  def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
+  def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+  def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
+          UnaryIntFloat;
+  def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
+          UnaryIntFloat;
+  def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
+          UnaryIntFloat;
+  def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
+          UnaryIntFloat;
+  def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
+          UnaryIntFloat;
+  def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
+          TernaryIntFloat;
+  def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
+          UnaryIntFloat;
+  def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
+          UnaryIntFloat;
+  def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
+          UnaryIntFloat;
+  def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
+          TernaryIntFloat;
+  def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+      Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+           llvm_v4i32_ty, llvm_i32_ty], []>;
+
+  def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
+    Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+  def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
+      Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+  def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
+      ConvertIntITOF;
+  def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
+      ConvertIntFTOI;
+  def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
+      ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+      Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+  def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
+      ConvertIntITOF;
+  def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
+      ConvertIntITOF;
+  def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
+      ConvertIntITOF;
+  def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
+      ConvertIntITOF;
+  def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
+        Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+          llvm_v2f32_ty, llvm_float_ty], []>;
+  def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
+        Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+          llvm_v2f32_ty], []>;
+  def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
+        Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+          llvm_v4f32_ty], []>;
+  def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
+        Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+          llvm_v4f32_ty], []>;
+}
diff --git a/lib/Target/R600/AMDILNIDevice.cpp b/lib/Target/R600/AMDILNIDevice.cpp
new file mode 100644
index 000000000000..47c3f7f209d6
--- /dev/null
+++ b/lib/Target/R600/AMDILNIDevice.cpp
@@ -0,0 +1,65 @@
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILNIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
+  : AMDGPUEvergreenDevice(ST) {
+  std::string name = ST->getDeviceName();
+  if (name == "caicos") {
+    DeviceFlag = OCL_DEVICE_CAICOS;
+  } else if (name == "turks") {
+    DeviceFlag = OCL_DEVICE_TURKS;
+  } else if (name == "cayman") {
+    DeviceFlag = OCL_DEVICE_CAYMAN;
+  } else {
+    DeviceFlag = OCL_DEVICE_BARTS;
+  }
+}
+AMDGPUNIDevice::~AMDGPUNIDevice() {
+}
+
+size_t
+AMDGPUNIDevice::getMaxLDSSize() const {
+  if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_900;
+  } else {
+    return 0;
+  }
+}
+
+uint32_t
+AMDGPUNIDevice::getGeneration() const {
+  return AMDGPUDeviceInfo::HD6XXX;
+}
+
+
+AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
+  : AMDGPUNIDevice(ST) {
+  setCaps();
+}
+
+AMDGPUCaymanDevice::~AMDGPUCaymanDevice() {
+}
+
+void
+AMDGPUCaymanDevice::setCaps() {
+  if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
+    mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
+    mHWBits.set(AMDGPUDeviceInfo::FMA);
+  }
+  mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
+  mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
+  mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
+}
+
diff --git a/lib/Target/R600/AMDILNIDevice.h b/lib/Target/R600/AMDILNIDevice.h
new file mode 100644
index 000000000000..24a640845eab
--- /dev/null
+++ b/lib/Target/R600/AMDILNIDevice.h
@@ -0,0 +1,57 @@
+//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILNIDEVICE_H
+#define AMDILNIDEVICE_H
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// NI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of
+/// cards.
+///
+/// It is very similiar to the AMDGPUEvergreenDevice, with the major
+/// exception being differences in wavefront size and hardware capabilities.  The
+/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
+/// integer operations
+class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
+public:
+  AMDGPUNIDevice(AMDGPUSubtarget*);
+  virtual ~AMDGPUNIDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual uint32_t getGeneration() const;
+};
+
+/// Just as the AMDGPUCypressDevice is the double capable version of the
+/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version
+/// of the AMDGPUNIDevice.  The other major difference is that the Cayman Device
+/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide.
+class AMDGPUCaymanDevice: public AMDGPUNIDevice {
+public:
+  AMDGPUCaymanDevice(AMDGPUSubtarget*);
+  virtual ~AMDGPUCaymanDevice();
+private:
+  virtual void setCaps();
+};
+
+static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
+} // namespace llvm
+#endif // AMDILNIDEVICE_H
diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
new file mode 100644
index 000000000000..3a28038666f7
--- /dev/null
+++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
@@ -0,0 +1,1215 @@
+//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PeepholeOpt"
+#ifdef DEBUG
+#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
+#else
+#define DEBUGME 0
+#endif
+
+#include "AMDILDevices.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+#include <sstream>
+
+#if 0
+STATISTIC(PointerAssignments, "Number of dynamic pointer "
+    "assigments discovered");
+STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
+#endif
+
+using namespace llvm;
+// The Peephole optimization pass is used to do simple last minute optimizations
+// that are required for correct code or to remove redundant functions
+namespace {
+
+class OpaqueType;
+
+class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
+public:
+  TargetMachine &TM;
+  static char ID;
+  AMDGPUPeepholeOpt(TargetMachine &tm);
+  ~AMDGPUPeepholeOpt();
+  const char *getPassName() const;
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool doFinalization(Module &M);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+protected:
+private:
+  // Function to initiate all of the instruction level optimizations.
+  bool instLevelOptimizations(BasicBlock::iterator *inst);
+  // Quick check to see if we need to dump all of the pointers into the
+  // arena. If this is correct, then we set all pointers to exist in arena. This
+  // is a workaround for aliasing of pointers in a struct/union.
+  bool dumpAllIntoArena(Function &F);
+  // Because I don't want to invalidate any pointers while in the
+  // safeNestedForEachFunction. I push atomic conversions to a vector and handle
+  // it later. This function does the conversions if required.
+  void doAtomicConversionIfNeeded(Function &F);
+  // Because __amdil_is_constant cannot be properly evaluated if
+  // optimizations are disabled, the call's are placed in a vector
+  // and evaluated after the __amdil_image* functions are evaluated
+  // which should allow the __amdil_is_constant function to be
+  // evaluated correctly.
+  void doIsConstCallConversionIfNeeded();
+  bool mChanged;
+  bool mDebug;
+  bool mConvertAtomics;
+  CodeGenOpt::Level optLevel;
+  // Run a series of tests to see if we can optimize a CALL instruction.
+  bool optimizeCallInst(BasicBlock::iterator *bbb);
+  // A peephole optimization to optimize bit extract sequences.
+  bool optimizeBitExtract(Instruction *inst);
+  // A peephole optimization to optimize bit insert sequences.
+  bool optimizeBitInsert(Instruction *inst);
+  bool setupBitInsert(Instruction *base, 
+                      Instruction *&src, 
+                      Constant *&mask, 
+                      Constant *&shift);
+  // Expand the bit field insert instruction on versions of OpenCL that
+  // don't support it.
+  bool expandBFI(CallInst *CI);
+  // Expand the bit field mask instruction on version of OpenCL that 
+  // don't support it.
+  bool expandBFM(CallInst *CI);
+  // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
+  // this case we need to expand them. These functions check for 24bit functions
+  // and then expand.
+  bool isSigned24BitOps(CallInst *CI);
+  void expandSigned24BitOps(CallInst *CI);
+  // One optimization that can occur is that if the required workgroup size is
+  // specified then the result of get_local_size is known at compile time and
+  // can be returned accordingly.
+  bool isRWGLocalOpt(CallInst *CI);
+  // On northern island cards, the division is slightly less accurate than on
+  // previous generations, so we need to utilize a more accurate division. So we
+  // can translate the accurate divide to a normal divide on all other cards.
+  bool convertAccurateDivide(CallInst *CI);
+  void expandAccurateDivide(CallInst *CI);
+  // If the alignment is set incorrectly, it can produce really inefficient
+  // code. This checks for this scenario and fixes it if possible.
+  bool correctMisalignedMemOp(Instruction *inst);
+
+  // If we are in no opt mode, then we need to make sure that
+  // local samplers are properly propagated as constant propagation 
+  // doesn't occur and we need to know the value of kernel defined
+  // samplers at compile time.
+  bool propagateSamplerInst(CallInst *CI);
+
+  // Helper functions
+
+  // Group of functions that recursively calculate the size of a structure based
+  // on it's sub-types.
+  size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+  size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+  size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+  size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+  size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+  size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+  size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+  size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
+  LLVMContext *mCTX;
+  Function *mF;
+  const AMDGPUSubtarget *mSTM;
+  SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
+  SmallVector<CallInst *, 16> isConstVec;
+}; // class AMDGPUPeepholeOpt
+  char AMDGPUPeepholeOpt::ID = 0;
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator.
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+                              SecondIterator S, Function F) {
+  for ( ; First != Last; ++First) {
+    SecondIterator sf, sl;
+    for (sf = First->begin(), sl = First->end();
+         sf != sl; )  {
+      if (!F(&sf)) {
+        ++sf;
+      } 
+    }
+  }
+  return F;
+}
+
+} // anonymous namespace
+
+namespace llvm {
+  FunctionPass *
+  createAMDGPUPeepholeOpt(TargetMachine &tm) {
+    return new AMDGPUPeepholeOpt(tm);
+  }
+} // llvm namespace
+
+AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
+  : FunctionPass(ID), TM(tm)  {
+  mDebug = DEBUGME;
+  optLevel = TM.getOptLevel();
+
+}
+
+AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()  {
+}
+
+const char *
+AMDGPUPeepholeOpt::getPassName() const  {
+  return "AMDGPU PeepHole Optimization Pass";
+}
+
+bool 
+containsPointerType(Type *Ty)  {
+  if (!Ty) {
+    return false;
+  }
+  switch(Ty->getTypeID()) {
+  default:
+    return false;
+  case Type::StructTyID: {
+    const StructType *ST = dyn_cast<StructType>(Ty);
+    for (StructType::element_iterator stb = ST->element_begin(),
+           ste = ST->element_end(); stb != ste; ++stb) {
+      if (!containsPointerType(*stb)) {
+        continue;
+      }
+      return true;
+    }
+    break;
+  }
+  case Type::VectorTyID:
+  case Type::ArrayTyID:
+    return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
+  case Type::PointerTyID:
+    return true;
+  };
+  return false;
+}
+
+bool 
+AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)  {
+  bool dumpAll = false;
+  for (Function::const_arg_iterator cab = F.arg_begin(),
+       cae = F.arg_end(); cab != cae; ++cab) {
+    const Argument *arg = cab;
+    const PointerType *PT = dyn_cast<PointerType>(arg->getType());
+    if (!PT) {
+      continue;
+    }
+    Type *DereferencedType = PT->getElementType();
+    if (!dyn_cast<StructType>(DereferencedType) 
+        ) {
+      continue;
+    }
+    if (!containsPointerType(DereferencedType)) {
+      continue;
+    }
+    // FIXME: Because a pointer inside of a struct/union may be aliased to
+    // another pointer we need to take the conservative approach and place all
+    // pointers into the arena until more advanced detection is implemented.
+    dumpAll = true;
+  }
+  return dumpAll;
+}
+void
+AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
+  if (isConstVec.empty()) {
+    return;
+  }
+  for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
+    CallInst *CI = isConstVec[x];
+    Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+    Type *aType = Type::getInt32Ty(*mCTX);
+    Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+      : ConstantInt::get(aType, 0);
+    CI->replaceAllUsesWith(Val);
+    CI->eraseFromParent();
+  }
+  isConstVec.clear();
+}
+void 
+AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)  {
+  // Don't do anything if we don't have any atomic operations.
+  if (atomicFuncs.empty()) {
+    return;
+  }
+  // Change the function name for the atomic if it is required
+  uint32_t size = atomicFuncs.size();
+  for (uint32_t x = 0; x < size; ++x) {
+    atomicFuncs[x].first->setOperand(
+        atomicFuncs[x].first->getNumOperands()-1, 
+        atomicFuncs[x].second);
+
+  }
+  mChanged = true;
+  if (mConvertAtomics) {
+    return;
+  }
+}
+
+bool 
+AMDGPUPeepholeOpt::runOnFunction(Function &MF)  {
+  mChanged = false;
+  mF = &MF;
+  mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
+  if (mDebug) {
+    MF.dump();
+  }
+  mCTX = &MF.getType()->getContext();
+  mConvertAtomics = true;
+  safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
+     std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
+                  this));
+
+  doAtomicConversionIfNeeded(MF);
+  doIsConstCallConversionIfNeeded();
+
+  if (mDebug) {
+    MF.dump();
+  }
+  return mChanged;
+}
+
+bool 
+AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)  {
+  Instruction *inst = (*bbb);
+  CallInst *CI = dyn_cast<CallInst>(inst);
+  if (!CI) {
+    return false;
+  }
+  if (isSigned24BitOps(CI)) {
+    expandSigned24BitOps(CI);
+    ++(*bbb);
+    CI->eraseFromParent();
+    return true;
+  }
+  if (propagateSamplerInst(CI)) {
+    return false;
+  }
+  if (expandBFI(CI) || expandBFM(CI)) {
+    ++(*bbb);
+    CI->eraseFromParent();
+    return true;
+  }
+  if (convertAccurateDivide(CI)) {
+    expandAccurateDivide(CI);
+    ++(*bbb);
+    CI->eraseFromParent();
+    return true;
+  }
+
+  StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
+  if (calleeName.startswith("__amdil_is_constant")) {
+    // If we do not have optimizations, then this
+    // cannot be properly evaluated, so we add the
+    // call instruction to a vector and process
+    // them at the end of processing after the
+    // samplers have been correctly handled.
+    if (optLevel == CodeGenOpt::None) {
+      isConstVec.push_back(CI);
+      return false;
+    } else {
+      Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
+      Type *aType = Type::getInt32Ty(*mCTX);
+      Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
+        : ConstantInt::get(aType, 0);
+      CI->replaceAllUsesWith(Val);
+      ++(*bbb);
+      CI->eraseFromParent();
+      return true;
+    }
+  }
+
+  if (calleeName.equals("__amdil_is_asic_id_i32")) {
+    ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
+    Type *aType = Type::getInt32Ty(*mCTX);
+    Value *Val = CV;
+    if (Val) {
+      Val = ConstantInt::get(aType, 
+          mSTM->device()->getDeviceFlag() & CV->getZExtValue());
+    } else {
+      Val = ConstantInt::get(aType, 0);
+    }
+    CI->replaceAllUsesWith(Val);
+    ++(*bbb);
+    CI->eraseFromParent();
+    return true;
+  }
+  Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
+  if (!F) {
+    return false;
+  } 
+  if (F->getName().startswith("__atom") && !CI->getNumUses() 
+      && F->getName().find("_xchg") == StringRef::npos) {
+    std::string buffer(F->getName().str() + "_noret");
+    F = dyn_cast<Function>(
+          F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
+    atomicFuncs.push_back(std::make_pair(CI, F));
+  }
+  
+  if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
+      && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
+    return false;
+  }
+  if (!mConvertAtomics) {
+    return false;
+  }
+  StringRef name = F->getName();
+  if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
+    mConvertAtomics = false;
+  }
+  return false;
+}
+
+bool
+AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, 
+    Instruction *&src, 
+    Constant *&mask, 
+    Constant *&shift) {
+  if (!base) {
+    if (mDebug) {
+      dbgs() << "Null pointer passed into function.\n";
+    }
+    return false;
+  }
+  bool andOp = false;
+  if (base->getOpcode() == Instruction::Shl) {
+    shift = dyn_cast<Constant>(base->getOperand(1));
+  } else if (base->getOpcode() == Instruction::And) {
+    mask = dyn_cast<Constant>(base->getOperand(1));
+    andOp = true;
+  } else {
+    if (mDebug) {
+      dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
+    }
+    // If the base is neither a Shl or a And, we don't fit any of the patterns above.
+    return false;
+  }
+  src = dyn_cast<Instruction>(base->getOperand(0));
+  if (!src) {
+    if (mDebug) {
+      dbgs() << "Failed setup since the base operand is not an instruction!\n";
+    }
+    return false;
+  }
+  // If we find an 'and' operation, then we don't need to
+  // find the next operation as we already know the
+  // bits that are valid at this point.
+  if (andOp) {
+    return true;
+  }
+  if (src->getOpcode() == Instruction::Shl && !shift) {
+    shift = dyn_cast<Constant>(src->getOperand(1));
+    src = dyn_cast<Instruction>(src->getOperand(0));
+  } else if (src->getOpcode() == Instruction::And && !mask) {
+    mask = dyn_cast<Constant>(src->getOperand(1));
+  }
+  if (!mask && !shift) {
+    if (mDebug) {
+      dbgs() << "Failed setup since both mask and shift are NULL!\n";
+    }
+    // Did not find a constant mask or a shift.
+    return false;
+  }
+  return true;
+}
+bool
+AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)  {
+  if (!inst) {
+    return false;
+  }
+  if (!inst->isBinaryOp()) {
+    return false;
+  }
+  if (inst->getOpcode() != Instruction::Or) {
+    return false;
+  }
+  if (optLevel == CodeGenOpt::None) {
+    return false;
+  }
+  // We want to do an optimization on a sequence of ops that in the end equals a
+  // single ISA instruction.
+  // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
+  // Some simplified versions of this pattern are as follows:
+  // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
+  // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
+  // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
+  // (A & B) | (D << F) when (1 << F) >= B
+  // (A << C) | (D & E) when (1 << C) >= E
+  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+    // The HD4XXX hardware doesn't support the ubit_insert instruction.
+    return false;
+  }
+  Type *aType = inst->getType();
+  bool isVector = aType->isVectorTy();
+  int numEle = 1;
+  // This optimization only works on 32bit integers.
+  if (aType->getScalarType()
+      != Type::getInt32Ty(inst->getContext())) {
+    return false;
+  }
+  if (isVector) {
+    const VectorType *VT = dyn_cast<VectorType>(aType);
+    numEle = VT->getNumElements();
+    // We currently cannot support more than 4 elements in a intrinsic and we
+    // cannot support Vec3 types.
+    if (numEle > 4 || numEle == 3) {
+      return false;
+    }
+  }
+  // TODO: Handle vectors.
+  if (isVector) {
+    if (mDebug) {
+      dbgs() << "!!! Vectors are not supported yet!\n";
+    }
+    return false;
+  }
+  Instruction *LHSSrc = NULL, *RHSSrc = NULL;
+  Constant *LHSMask = NULL, *RHSMask = NULL;
+  Constant *LHSShift = NULL, *RHSShift = NULL;
+  Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
+  Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
+  if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
+    if (mDebug) {
+      dbgs() << "Found an OR Operation that failed setup!\n";
+      inst->dump();
+      if (LHS) { LHS->dump(); }
+      if (LHSSrc) { LHSSrc->dump(); }
+      if (LHSMask) { LHSMask->dump(); }
+      if (LHSShift) { LHSShift->dump(); }
+    }
+    // There was an issue with the setup for BitInsert.
+    return false;
+  }
+  if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
+    if (mDebug) {
+      dbgs() << "Found an OR Operation that failed setup!\n";
+      inst->dump();
+      if (RHS) { RHS->dump(); }
+      if (RHSSrc) { RHSSrc->dump(); }
+      if (RHSMask) { RHSMask->dump(); }
+      if (RHSShift) { RHSShift->dump(); }
+    }
+    // There was an issue with the setup for BitInsert.
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
+    dbgs() << "Op:        "; inst->dump();
+    dbgs() << "LHS:       "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "LHS Src:   "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "LHS Mask:  "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "RHS:       "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "RHS Src:   "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "RHS Mask:  "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
+    dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
+  }
+  Constant *offset = NULL;
+  Constant *width = NULL;
+  uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
+  uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
+  uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
+  uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
+  lhsMaskVal = (LHSMask 
+      ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
+  rhsMaskVal = (RHSMask 
+      ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
+  lhsShiftVal = (LHSShift 
+      ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
+  rhsShiftVal = (RHSShift 
+      ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
+  lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
+  rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
+  lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
+  rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
+  // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
+  if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
+    return false;
+  }
+  if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
+    offset = ConstantInt::get(aType, lhsMaskOffset, false);
+    width = ConstantInt::get(aType, lhsMaskWidth, false);
+    RHSSrc = RHS;
+    if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
+      return false;
+    }
+    if (!LHSShift) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+          "MaskShr", LHS);
+    } else if (lhsShiftVal != lhsMaskOffset) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+          "MaskShr", LHS);
+    }
+    if (mDebug) {
+      dbgs() << "Optimizing LHS!\n";
+    }
+  } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
+    offset = ConstantInt::get(aType, rhsMaskOffset, false);
+    width = ConstantInt::get(aType, rhsMaskWidth, false);
+    LHSSrc = RHSSrc;
+    RHSSrc = LHS;
+    if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
+      return false;
+    }
+    if (!RHSShift) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+          "MaskShr", RHS);
+    } else if (rhsShiftVal != rhsMaskOffset) {
+      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
+          "MaskShr", RHS);
+    }
+    if (mDebug) {
+      dbgs() << "Optimizing RHS!\n";
+    }
+  } else {
+    if (mDebug) {
+      dbgs() << "Failed constraint 3!\n";
+    }
+    return false;
+  }
+  if (mDebug) {
+    dbgs() << "Width:  "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
+    dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
+    dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+    dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
+  }
+  if (!offset || !width) {
+    if (mDebug) {
+      dbgs() << "Either width or offset are NULL, failed detection!\n";
+    }
+    return false;
+  }
+  // Lets create the function signature.
+  std::vector<Type *> callTypes;
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+  std::string name = "__amdil_ubit_insert";
+  if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
+  Function *Func = 
+    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+        getOrInsertFunction(StringRef(name), funcType));
+  Value *Operands[4] = {
+    width,
+    offset,
+    LHSSrc,
+    RHSSrc
+  };
+  CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
+  if (mDebug) {
+    dbgs() << "Old Inst: ";
+    inst->dump();
+    dbgs() << "New Inst: ";
+    CI->dump();
+    dbgs() << "\n\n";
+  }
+  CI->insertBefore(inst);
+  inst->replaceAllUsesWith(CI);
+  return true;
+}
+
+bool 
+AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)  {
+  if (!inst) {
+    return false;
+  }
+  if (!inst->isBinaryOp()) {
+    return false;
+  }
+  if (inst->getOpcode() != Instruction::And) {
+    return false;
+  }
+  if (optLevel == CodeGenOpt::None) {
+    return false;
+  }
+  // We want to do some simple optimizations on Shift right/And patterns. The
+  // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
+  // value smaller than 32 and C is a mask. If C is a constant value, then the
+  // following transformation can occur. For signed integers, it turns into the
+  // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
+  // integers, it turns into the function call dst =
+  // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
+  // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
+  // Evergreen hardware.
+  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
+    // This does not work on HD4XXX hardware.
+    return false;
+  }
+  Type *aType = inst->getType();
+  bool isVector = aType->isVectorTy();
+
+  // XXX Support vector types
+  if (isVector) {
+    return false;
+  }
+  int numEle = 1;
+  // This only works on 32bit integers
+  if (aType->getScalarType()
+      != Type::getInt32Ty(inst->getContext())) {
+    return false;
+  }
+  if (isVector) {
+    const VectorType *VT = dyn_cast<VectorType>(aType);
+    numEle = VT->getNumElements();
+    // We currently cannot support more than 4 elements in a intrinsic and we
+    // cannot support Vec3 types.
+    if (numEle > 4 || numEle == 3) {
+      return false;
+    }
+  }
+  BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
+  // If the first operand is not a shift instruction, then we can return as it
+  // doesn't match this pattern.
+  if (!ShiftInst || !ShiftInst->isShift()) {
+    return false;
+  }
+  // If we are a shift left, then we need don't match this pattern.
+  if (ShiftInst->getOpcode() == Instruction::Shl) {
+    return false;
+  }
+  bool isSigned = ShiftInst->isArithmeticShift();
+  Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
+  Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
+  // Lets make sure that the shift value and the and mask are constant integers.
+  if (!AndMask || !ShrVal) {
+    return false;
+  }
+  Constant *newMaskConst;
+  Constant *shiftValConst;
+  if (isVector) {
+    // Handle the vector case
+    std::vector<Constant *> maskVals;
+    std::vector<Constant *> shiftVals;
+    ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
+    ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
+    Type *scalarType = AndMaskVec->getType()->getScalarType();
+    assert(AndMaskVec->getNumOperands() ==
+           ShrValVec->getNumOperands() && "cannot have a "
+           "combination where the number of elements to a "
+           "shift and an and are different!");
+    for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
+      ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
+      ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
+      if (!AndCI || !ShiftIC) {
+        return false;
+      }
+      uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
+      if (!isMask_32(maskVal)) {
+        return false;
+      }
+      maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+      uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
+      // If the mask or shiftval is greater than the bitcount, then break out.
+      if (maskVal >= 32 || shiftVal >= 32) {
+        return false;
+      }
+      // If the mask val is greater than the the number of original bits left
+      // then this optimization is invalid.
+      if (maskVal > (32 - shiftVal)) {
+        return false;
+      }
+      maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
+      shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
+    }
+    newMaskConst = ConstantVector::get(maskVals);
+    shiftValConst = ConstantVector::get(shiftVals);
+  } else {
+    // Handle the scalar case
+    uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
+    // This must be a mask value where all lower bits are set to 1 and then any
+    // bit higher is set to 0.
+    if (!isMask_32(maskVal)) {
+      return false;
+    }
+    maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
+    // Count the number of bits set in the mask, this is the width of the
+    // resulting bit set that is extracted from the source value.
+    uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
+    // If the mask or shift val is greater than the bitcount, then break out.
+    if (maskVal >= 32 || shiftVal >= 32) {
+      return false;
+    }
+    // If the mask val is greater than the the number of original bits left then
+    // this optimization is invalid.
+    if (maskVal > (32 - shiftVal)) {
+      return false;
+    }
+    newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
+    shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
+  }
+  // Lets create the function signature.
+  std::vector<Type *> callTypes;
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  callTypes.push_back(aType);
+  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
+  std::string name = "llvm.AMDGPU.bit.extract.u32";
+  if (isVector) {
+    name += ".v" + itostr(numEle) + "i32";
+  } else {
+    name += ".";
+  }
+  // Lets create the function.
+  Function *Func = 
+    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(StringRef(name), funcType));
+  Value *Operands[3] = {
+    ShiftInst->getOperand(0),
+    shiftValConst,
+    newMaskConst
+  };
+  // Lets create the Call with the operands
+  CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
+  CI->setDoesNotAccessMemory();
+  CI->insertBefore(inst);
+  inst->replaceAllUsesWith(CI);
+  return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
+  if (!CI) {
+    return false;
+  }
+  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+  if (!LHS->getName().startswith("__amdil_bfi")) {
+    return false;
+  }
+  Type* type = CI->getOperand(0)->getType();
+  Constant *negOneConst = NULL;
+  if (type->isVectorTy()) {
+    std::vector<Constant *> negOneVals;
+    negOneConst = ConstantInt::get(CI->getContext(), 
+        APInt(32, StringRef("-1"), 10));
+    for (size_t x = 0,
+        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+      negOneVals.push_back(negOneConst);
+    }
+    negOneConst = ConstantVector::get(negOneVals);
+  } else {
+    negOneConst = ConstantInt::get(CI->getContext(), 
+        APInt(32, StringRef("-1"), 10));
+  }
+  // __amdil_bfi => (A & B) | (~A & C)
+  BinaryOperator *lhs = 
+    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+        CI->getOperand(1), "bfi_and", CI);
+  BinaryOperator *rhs =
+    BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
+        "bfi_not", CI);
+  rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
+      "bfi_and", CI);
+  lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
+  CI->replaceAllUsesWith(lhs);
+  return true;
+}
+
+bool
+AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
+  if (!CI) {
+    return false;
+  }
+  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+  if (!LHS->getName().startswith("__amdil_bfm")) {
+    return false;
+  }
+  // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
+  Constant *newMaskConst = NULL;
+  Constant *newShiftConst = NULL;
+  Type* type = CI->getOperand(0)->getType();
+  if (type->isVectorTy()) {
+    std::vector<Constant*> newMaskVals, newShiftVals;
+    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+    for (size_t x = 0,
+        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
+      newMaskVals.push_back(newMaskConst);
+      newShiftVals.push_back(newShiftConst);
+    }
+    newMaskConst = ConstantVector::get(newMaskVals);
+    newShiftConst = ConstantVector::get(newShiftVals);
+  } else {
+    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
+    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
+  }
+  BinaryOperator *lhs =
+    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
+        newMaskConst, "bfm_mask", CI);
+  lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
+      lhs, "bfm_shl", CI);
+  lhs = BinaryOperator::Create(Instruction::Sub, lhs,
+      newShiftConst, "bfm_sub", CI);
+  BinaryOperator *rhs =
+    BinaryOperator::Create(Instruction::And, CI->getOperand(1),
+        newMaskConst, "bfm_mask", CI);
+  lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
+  CI->replaceAllUsesWith(lhs);
+  return true;
+}
+
+bool
+AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)  {
+  Instruction *inst = (*bbb);
+  if (optimizeCallInst(bbb)) {
+    return true;
+  }
+  if (optimizeBitExtract(inst)) {
+    return false;
+  }
+  if (optimizeBitInsert(inst)) {
+    return false;
+  }
+  if (correctMisalignedMemOp(inst)) {
+    return false;
+  }
+  return false;
+}
+bool
+AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
+  LoadInst *linst = dyn_cast<LoadInst>(inst);
+  StoreInst *sinst = dyn_cast<StoreInst>(inst);
+  unsigned alignment;
+  Type* Ty = inst->getType();
+  if (linst) {
+    alignment = linst->getAlignment();
+    Ty = inst->getType();
+  } else if (sinst) {
+    alignment = sinst->getAlignment();
+    Ty = sinst->getValueOperand()->getType();
+  } else {
+    return false;
+  }
+  unsigned size = getTypeSize(Ty);
+  if (size == alignment || size < alignment) {
+    return false;
+  }
+  if (!Ty->isStructTy()) {
+    return false;
+  }
+  if (alignment < 4) {
+    if (linst) {
+      linst->setAlignment(0);
+      return true;
+    } else if (sinst) {
+      sinst->setAlignment(0);
+      return true;
+    }
+  }
+  return false;
+}
+bool 
+AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)  {
+  if (!CI) {
+    return false;
+  }
+  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
+  std::string namePrefix = LHS->getName().substr(0, 14);
+  if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
+      && namePrefix != "__amdil__imul24_high") {
+    return false;
+  }
+  if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
+    return false;
+  }
+  return true;
+}
+
+void 
+AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)  {
+  assert(isSigned24BitOps(CI) && "Must be a "
+      "signed 24 bit operation to call this function!");
+  Value *LHS = CI->getOperand(CI->getNumOperands()-1);
+  // On 7XX and 8XX we do not have signed 24bit, so we need to
+  // expand it to the following:
+  // imul24 turns into 32bit imul
+  // imad24 turns into 32bit imad
+  // imul24_high turns into 32bit imulhigh
+  if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
+    Type *aType = CI->getOperand(0)->getType();
+    bool isVector = aType->isVectorTy();
+    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+    std::vector<Type*> callTypes;
+    callTypes.push_back(CI->getOperand(0)->getType());
+    callTypes.push_back(CI->getOperand(1)->getType());
+    callTypes.push_back(CI->getOperand(2)->getType());
+    FunctionType *funcType =
+      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+    std::string name = "__amdil_imad";
+    if (isVector) {
+      name += "_v" + itostr(numEle) + "i32";
+    } else {
+      name += "_i32";
+    }
+    Function *Func = dyn_cast<Function>(
+                       CI->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(StringRef(name), funcType));
+    Value *Operands[3] = {
+      CI->getOperand(0),
+      CI->getOperand(1),
+      CI->getOperand(2)
+    };
+    CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
+    nCI->insertBefore(CI);
+    CI->replaceAllUsesWith(nCI);
+  } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
+    BinaryOperator *mulOp =
+      BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
+          CI->getOperand(1), "imul24", CI);
+    CI->replaceAllUsesWith(mulOp);
+  } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
+    Type *aType = CI->getOperand(0)->getType();
+
+    bool isVector = aType->isVectorTy();
+    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
+    std::vector<Type*> callTypes;
+    callTypes.push_back(CI->getOperand(0)->getType());
+    callTypes.push_back(CI->getOperand(1)->getType());
+    FunctionType *funcType =
+      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
+    std::string name = "__amdil_imul_high";
+    if (isVector) {
+      name += "_v" + itostr(numEle) + "i32";
+    } else {
+      name += "_i32";
+    }
+    Function *Func = dyn_cast<Function>(
+                       CI->getParent()->getParent()->getParent()->
+                       getOrInsertFunction(StringRef(name), funcType));
+    Value *Operands[2] = {
+      CI->getOperand(0),
+      CI->getOperand(1)
+    };
+    CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
+    nCI->insertBefore(CI);
+    CI->replaceAllUsesWith(nCI);
+  }
+}
+
+bool 
+AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)  {
+  return (CI != NULL
+          && CI->getOperand(CI->getNumOperands() - 1)->getName() 
+          == "__amdil_get_local_size_int");
+}
+
+bool 
+AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)  {
+  if (!CI) {
+    return false;
+  }
+  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
+      && (mSTM->getDeviceName() == "cayman")) {
+    return false;
+  }
+  return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) 
+      == "__amdil_improved_div";
+}
+
+void 
+AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)  {
+  assert(convertAccurateDivide(CI)
+         && "expanding accurate divide can only happen if it is expandable!");
+  BinaryOperator *divOp =
+    BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
+                           CI->getOperand(1), "fdiv32", CI);
+  CI->replaceAllUsesWith(divOp);
+}
+
+bool
+AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
+  if (optLevel != CodeGenOpt::None) {
+    return false;
+  }
+
+  if (!CI) {
+    return false;
+  }
+
+  unsigned funcNameIdx = 0;
+  funcNameIdx = CI->getNumOperands() - 1;
+  StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
+  if (calleeName != "__amdil_image2d_read_norm"
+   && calleeName != "__amdil_image2d_read_unnorm"
+   && calleeName != "__amdil_image3d_read_norm"
+   && calleeName != "__amdil_image3d_read_unnorm") {
+    return false;
+  }
+
+  unsigned samplerIdx = 2;
+  samplerIdx = 1;
+  Value *sampler = CI->getOperand(samplerIdx);
+  LoadInst *lInst = dyn_cast<LoadInst>(sampler);
+  if (!lInst) {
+    return false;
+  }
+
+  if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return false;
+  }
+
+  GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
+  // If we are loading from what is not a global value, then we
+  // fail and return.
+  if (!gv) {
+    return false;
+  }
+
+  // If we don't have an initializer or we have an initializer and
+  // the initializer is not a 32bit integer, we fail.
+  if (!gv->hasInitializer() 
+      || !gv->getInitializer()->getType()->isIntegerTy(32)) {
+      return false;
+  }
+
+  // Now that we have the global variable initializer, lets replace
+  // all uses of the load instruction with the samplerVal and
+  // reparse the __amdil_is_constant() function.
+  Constant *samplerVal = gv->getInitializer();
+  lInst->replaceAllUsesWith(samplerVal);
+  return true;
+}
+
+bool 
+AMDGPUPeepholeOpt::doInitialization(Module &M)  {
+  return false;
+}
+
+bool 
+AMDGPUPeepholeOpt::doFinalization(Module &M)  {
+  return false;
+}
+
+void 
+AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const  {
+  AU.addRequired<MachineFunctionAnalysis>();
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+  size_t size = 0;
+  if (!T) {
+    return size;
+  }
+  switch (T->getTypeID()) {
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+    assert(0 && "These types are not supported by this backend");
+  default:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+    size = T->getPrimitiveSizeInBits() >> 3;
+    break;
+  case Type::PointerTyID:
+    size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+    break;
+  case Type::IntegerTyID:
+    size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+    break;
+  case Type::StructTyID:
+    size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+    break;
+  case Type::ArrayTyID:
+    size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+    break;
+  case Type::FunctionTyID:
+    size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+    break;
+  case Type::VectorTyID:
+    size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+    break;
+  };
+  return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
+    bool dereferencePtr) {
+  size_t size = 0;
+  if (!ST) {
+    return size;
+  }
+  Type *curType;
+  StructType::element_iterator eib;
+  StructType::element_iterator eie;
+  for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+    curType = *eib;
+    size += getTypeSize(curType, dereferencePtr);
+  }
+  return size;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
+    bool dereferencePtr) {
+  return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
+    bool dereferencePtr) {
+    assert(0 && "Should not be able to calculate the size of an function type");
+    return 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
+    bool dereferencePtr) {
+  return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+                                    dereferencePtr) * AT->getNumElements())
+                     : 0);
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
+    bool dereferencePtr) {
+  return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
+    bool dereferencePtr) {
+  if (!PT) {
+    return 0;
+  }
+  Type *CT = PT->getElementType();
+  if (CT->getTypeID() == Type::StructTyID &&
+      PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+    return getTypeSize(dyn_cast<StructType>(CT));
+  } else if (dereferencePtr) {
+    size_t size = 0;
+    for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+      size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+    }
+    return size;
+  } else {
+    return 4;
+  }
+}
+
+size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
+    bool dereferencePtr) {
+  //assert(0 && "Should not be able to calculate the size of an opaque type");
+  return 4;
+}
diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td
new file mode 100644
index 000000000000..b9d033432e8c
--- /dev/null
+++ b/lib/Target/R600/AMDILRegisterInfo.td
@@ -0,0 +1,107 @@
+//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+//  Declarations that describe the AMDIL register file
+//
+//===----------------------------------------------------------------------===//
+
+class AMDILReg<bits<16> num, string n> : Register<n> {
+  field bits<16> Value;
+  let Value = num;
+  let Namespace = "AMDGPU";
+}
+
+// We will start with 8 registers for each class before expanding to more
+// Since the swizzle is added based on the register class, we can leave it
+// off here and just specify different registers for different register classes
+def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
+
+// All registers between 1000 and 1024 are reserved and cannot be used
+// unless commented in this section
+// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
+// r1020 is used to hold the frame index for local arrays
+// r1019 is used to hold the dynamic stack allocation pointer
+// r1018 is used as a temporary register for handwritten code
+// r1017 is used as a temporary register for handwritten code
+// r1016 is used as a temporary register for load/store code
+// r1015 is used as a temporary register for data segment offset
+// r1014 is used as a temporary register for store code
+// r1013 is used as the section data pointer register
+// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
+// r1009 is used as the frame pointer register
+// r999 is used as the mem register.
+// r998 is used as the return address register.
+//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
+//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
+//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
+//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
+//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
+//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
+def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
+def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
+def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
+def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
+def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
+def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
+def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
+def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
+def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
+def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
+def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
+def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
+def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
+def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
+def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
+def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
+def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
+def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
+def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
+def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
+def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
+def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
+def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
+  (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+        let AltOrders = [(add (sequence "R%u", 1, 20))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
+  (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+        let AltOrders = [(add (sequence "R%u", 1, 20))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
+def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
+  (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
+        let AltOrders = [(add (sequence "R%u", 1, 20))];
+        let AltOrderSelect = [{
+          return 1;
+        }];
+    }
diff --git a/lib/Target/R600/AMDILSIDevice.cpp b/lib/Target/R600/AMDILSIDevice.cpp
new file mode 100644
index 000000000000..0d1de3d11eb4
--- /dev/null
+++ b/lib/Target/R600/AMDILSIDevice.cpp
@@ -0,0 +1,48 @@
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+
+using namespace llvm;
+
+AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
+  : AMDGPUEvergreenDevice(ST) {
+}
+AMDGPUSIDevice::~AMDGPUSIDevice() {
+}
+
+size_t
+AMDGPUSIDevice::getMaxLDSSize() const {
+  if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_900;
+  } else {
+    return 0;
+  }
+}
+
+uint32_t
+AMDGPUSIDevice::getGeneration() const {
+  return AMDGPUDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDGPUSIDevice::getDataLayout() const {
+  return std::string(
+    "e"
+    "-p:64:64:64"
+    "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64"
+    "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128"
+    "-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024"
+    "-v2048:2048:2048"
+    "-n32:64"
+  );
+}
diff --git a/lib/Target/R600/AMDILSIDevice.h b/lib/Target/R600/AMDILSIDevice.h
new file mode 100644
index 000000000000..5b2cb2502211
--- /dev/null
+++ b/lib/Target/R600/AMDILSIDevice.h
@@ -0,0 +1,39 @@
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface for the subtarget data classes.
+///
+/// This file will define the interface that each generation needs to
+/// implement in order to correctly answer queries on the capabilities of the
+/// specific hardware.
+//===---------------------------------------------------------------------===//
+#ifndef AMDILSIDEVICE_H
+#define AMDILSIDEVICE_H
+#include "AMDILEvergreenDevice.h"
+
+namespace llvm {
+class AMDGPUSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+/// \brief The AMDGPUSIDevice is the base class for all Southern Island series
+/// of cards.
+class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
+public:
+  AMDGPUSIDevice(AMDGPUSubtarget*);
+  virtual ~AMDGPUSIDevice();
+  virtual size_t getMaxLDSSize() const;
+  virtual uint32_t getGeneration() const;
+  virtual std::string getDataLayout() const;
+};
+
+} // namespace llvm
+#endif // AMDILSIDEVICE_H
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
new file mode 100644
index 000000000000..8efba5846bef
--- /dev/null
+++ b/lib/Target/R600/CMakeLists.txt
@@ -0,0 +1,59 @@
+set(LLVM_TARGET_DEFINITIONS AMDGPU.td)
+
+tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
+add_public_tablegen_target(AMDGPUCommonTableGen)
+
+add_llvm_target(R600CodeGen
+  AMDIL7XXDevice.cpp
+  AMDILCFGStructurizer.cpp
+  AMDILDevice.cpp
+  AMDILDeviceInfo.cpp
+  AMDILEvergreenDevice.cpp
+  AMDILIntrinsicInfo.cpp
+  AMDILISelDAGToDAG.cpp
+  AMDILISelLowering.cpp
+  AMDILNIDevice.cpp
+  AMDILPeepholeOptimizer.cpp
+  AMDILSIDevice.cpp
+  AMDGPUAsmPrinter.cpp
+  AMDGPUFrameLowering.cpp
+  AMDGPUIndirectAddressing.cpp
+  AMDGPUMCInstLower.cpp
+  AMDGPUMachineFunction.cpp
+  AMDGPUSubtarget.cpp
+  AMDGPUStructurizeCFG.cpp
+  AMDGPUTargetMachine.cpp
+  AMDGPUISelLowering.cpp
+  AMDGPUConvertToISA.cpp
+  AMDGPUInstrInfo.cpp
+  AMDGPURegisterInfo.cpp
+  R600ControlFlowFinalizer.cpp
+  R600EmitClauseMarkers.cpp
+  R600ExpandSpecialInstrs.cpp
+  R600InstrInfo.cpp
+  R600ISelLowering.cpp
+  R600MachineFunctionInfo.cpp
+  R600MachineScheduler.cpp
+  R600RegisterInfo.cpp
+  SIAnnotateControlFlow.cpp
+  SIInsertWaits.cpp
+  SIInstrInfo.cpp
+  SIISelLowering.cpp
+  SILowerControlFlow.cpp
+  SIMachineFunctionInfo.cpp
+  SIRegisterInfo.cpp
+  )
+
+add_dependencies(LLVMR600CodeGen intrinsics_gen)
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
new file mode 100644
index 000000000000..10547a598805
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -0,0 +1,172 @@
+//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+
+using namespace llvm;
+
+void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                             StringRef Annot) {
+  printInstruction(MI, OS);
+
+  printAnnotation(OS, Annot);
+}
+
+void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    switch (Op.getReg()) {
+    // This is the default predicate state, so we don't need to print it.
+    case AMDGPU::PRED_SEL_OFF: break;
+    default: O << getRegisterName(Op.getReg()); break;
+    }
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else if (Op.isFPImm()) {
+    O << Op.getFPImm();
+  } else if (Op.isExpr()) {
+    const MCExpr *Exp = Op.getExpr();
+    Exp->print(O);
+  } else {
+    assert(!"unknown operand type in printOperand");
+  }
+}
+
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  unsigned Imm = MI->getOperand(OpNum).getImm();
+
+  if (Imm == 2) {
+    O << "P0";
+  } else if (Imm == 1) {
+    O << "P20";
+  } else if (Imm == 0) {
+    O << "P10";
+  } else {
+    assert(!"Invalid interpolation parameter slot");
+  }
+}
+
+void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  O  << ", ";
+  printOperand(MI, OpNo + 1, O);
+}
+
+void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O, StringRef Asm) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  assert(Op.isImm());
+  if (Op.getImm() == 1) {
+    O << Asm;
+  }
+}
+
+void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "|");
+}
+
+void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  union Literal {
+    float f;
+    int32_t i;
+  } L;
+
+  L.i = MI->getOperand(OpNo).getImm();
+  O << L.i << "(" << L.f << ")";
+}
+
+void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  printIfSet(MI, OpNo, O, " *");
+}
+
+void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "-");
+}
+
+void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  switch (MI->getOperand(OpNo).getImm()) {
+  default: break;
+  case 1:
+    O << " * 2.0";
+    break;
+  case 2:
+    O << " * 4.0";
+    break;
+  case 3:
+    O << " / 2.0";
+    break;
+  }
+}
+
+void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "+");
+}
+
+void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.getImm() == 0) {
+    O << " (MASKED)";
+  }
+}
+
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const char * chans = "XYZW";
+  int sel = MI->getOperand(OpNo).getImm();
+
+  int chan = sel & 3;
+  sel >>= 2;
+
+  if (sel >= 512) {
+    sel -= 512;
+    int cb = sel >> 12;
+    sel &= 4095;
+    O << cb << "[" << sel << "]";
+  } else if (sel >= 448) {
+    sel -= 448;
+    O << sel;
+  } else if (sel >= 0){
+    O << sel;
+  }
+
+  if (sel >= 0)
+    O << "." << chans[chan];
+}
+
+#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
new file mode 100644
index 000000000000..767a7082cc2c
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -0,0 +1,54 @@
+//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTPRINTER_H
+#define AMDGPUINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class AMDGPUInstPrinter : public MCInstPrinter {
+public:
+  AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  //Autogenerated by tblgen
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+private:
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+  void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUINSTRPRINTER_H
diff --git a/lib/Target/R600/InstPrinter/CMakeLists.txt b/lib/Target/R600/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..069c55ba948e
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMR600AsmPrinter
+  AMDGPUInstPrinter.cpp
+  )
+
+add_dependencies(LLVMR600AsmPrinter AMDGPUCommonTableGen)
diff --git a/lib/Target/R600/InstPrinter/LLVMBuild.txt b/lib/Target/R600/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..ec0be89f104c
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600AsmPrinter
+parent = R600
+required_libraries = MC Support
+add_to_library_groups = R600
+
diff --git a/lib/Target/R600/InstPrinter/Makefile b/lib/Target/R600/InstPrinter/Makefile
new file mode 100644
index 000000000000..a794cc1124ed
--- /dev/null
+++ b/lib/Target/R600/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/LLVMBuild.txt b/lib/Target/R600/LLVMBuild.txt
new file mode 100644
index 000000000000..f2a7554e5269
--- /dev/null
+++ b/lib/Target/R600/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = R600
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = R600CodeGen
+parent = R600
+required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info 
+add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
new file mode 100644
index 000000000000..98fca432670d
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -0,0 +1,90 @@
+//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUMCObjectWriter : public MCObjectWriter {
+public:
+  AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                        const MCAsmLayout &Layout) {
+    //XXX: Implement if necessary.
+  }
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCAsmLayout &Layout,
+                                const MCFragment *Fragment,
+                                const MCFixup &Fixup,
+                                MCValue Target, uint64_t &FixedValue) {
+    assert(!"Not implemented");
+  }
+
+  virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+};
+
+class AMDGPUAsmBackend : public MCAsmBackend {
+public:
+  AMDGPUAsmBackend(const Target &T)
+    : MCAsmBackend() {}
+
+  virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+  virtual unsigned getNumFixupKinds() const { return 0; };
+  virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                          uint64_t Value) const;
+  virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                                    const MCRelaxableFragment *DF,
+                                    const MCAsmLayout &Layout) const {
+    return false;
+  }
+  virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+    assert(!"Not implemented");
+  }
+  virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
+  virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+    return true;
+  }
+};
+
+} //End anonymous namespace
+
+void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
+                                       const MCAsmLayout &Layout) {
+  for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
+    Asm.writeSectionData(I, Layout);
+  }
+}
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+                                           StringRef CPU) {
+  return new AMDGPUAsmBackend(T);
+}
+
+AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
+                                                        raw_ostream &OS) const {
+  return new AMDGPUMCObjectWriter(OS);
+}
+
+void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                  unsigned DataSize, uint64_t Value) const {
+
+  uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
+  assert(Fixup.getKind() == FK_PCRel_4);
+  *Dst = (Value - 4) / 4;
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..b7cdd7c8cde9
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -0,0 +1,83 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCAsmInfo.h"
+
+using namespace llvm;
+AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
+  HasSingleParameterDotFile = false;
+  WeakDefDirective = 0;
+  //===------------------------------------------------------------------===//
+  HasSubsectionsViaSymbols = true;
+  HasMachoZeroFillDirective = false;
+  HasMachoTBSSDirective = false;
+  HasStaticCtorDtorReferenceInStaticMode = false;
+  LinkerRequiresNonEmptyDwarfLines = true;
+  MaxInstLength = 16;
+  PCSymbol = "$";
+  SeparatorString = "\n";
+  CommentColumn = 40;
+  CommentString = ";";
+  LabelSuffix = ":";
+  GlobalPrefix = "@";
+  PrivateGlobalPrefix = ";.";
+  LinkerPrivateGlobalPrefix = "!";
+  InlineAsmStart = ";#ASMSTART";
+  InlineAsmEnd = ";#ASMEND";
+  AssemblerDialect = 0;
+  AllowQuotesInName = false;
+  AllowNameToStartWithDigit = false;
+  AllowPeriodsInName = false;
+
+  //===--- Data Emission Directives -------------------------------------===//
+  ZeroDirective = ".zero";
+  AsciiDirective = ".ascii\t";
+  AscizDirective = ".asciz\t";
+  Data8bitsDirective = ".byte\t";
+  Data16bitsDirective = ".short\t";
+  Data32bitsDirective = ".long\t";
+  Data64bitsDirective = ".quad\t";
+  GPRel32Directive = 0;
+  SunStyleELFSectionSwitchSyntax = true;
+  UsesELFSectionDirectiveForBSS = true;
+  HasMicrosoftFastStdCallMangling = false;
+
+  //===--- Alignment Information ----------------------------------------===//
+  AlignDirective = ".align\t";
+  AlignmentIsInBytes = true;
+  TextAlignFillValue = 0;
+
+  //===--- Global Variable Emission Directives --------------------------===//
+  GlobalDirective = ".global";
+  ExternDirective = ".extern";
+  HasSetDirective = false;
+  HasAggressiveSymbolFolding = true;
+  COMMDirectiveAlignmentIsInBytes = false;
+  HasDotTypeDotSizeDirective = false;
+  HasNoDeadStrip = true;
+  HasSymbolResolver = false;
+  WeakRefDirective = ".weakref\t";
+  LinkOnceDirective = 0;
+  //===--- Dwarf Emission Directives -----------------------------------===//
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+  DwarfSectionOffsetDirective = ".offset";
+
+}
+
+const char*
+AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const {
+  return 0;
+}
+
+const MCSection*
+AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
+  return 0;
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
new file mode 100644
index 000000000000..3ad0fa6824ab
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface  ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUMCASMINFO_H
+#define AMDGPUMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+namespace llvm {
+
+class Target;
+class StringRef;
+
+class AMDGPUMCAsmInfo : public MCAsmInfo {
+public:
+  explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
+  const char* getDataASDirective(unsigned int Size, unsigned int AS) const;
+  const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+};
+} // namespace llvm
+#endif // AMDGPUMCASMINFO_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
new file mode 100644
index 000000000000..cd3a7ce65aa5
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -0,0 +1,40 @@
+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUCODEEMITTER_H
+#define AMDGPUCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCInst;
+class MCOperand;
+
+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+public:
+
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+    return 0;
+  }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..072ee49b6311
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -0,0 +1,113 @@
+//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUMCAsmInfo.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AMDGPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAMDGPUMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAMDGPUMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAMDGPUMCRegisterInfo(X, 0);
+  return X;
+}
+
+static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo * X = new MCSubtargetInfo();
+  InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                               CodeModel::Model CM,
+                                               CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI,
+                                                const MCInstrInfo &MII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &STI) {
+  return new AMDGPUInstPrinter(MAI, MII, MRI);
+}
+
+static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &STI,
+                                                MCContext &Ctx) {
+  if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
+    return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
+  } else {
+    return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+  }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+                                    MCContext &Ctx, MCAsmBackend &MAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+}
+
+extern "C" void LLVMInitializeR600TargetMC() {
+
+  RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
+
+  TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
+
+  TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
+
+  TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
+
+  TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
+
+  TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
+
+  TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
+
+  TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
+
+  TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
new file mode 100644
index 000000000000..363a4af3f3a4
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -0,0 +1,55 @@
+//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Provides AMDGPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef AMDGPUMCTARGETDESC_H
+#define AMDGPUMCTARGETDESC_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheAMDGPUTarget;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+                                       const MCRegisterInfo &MRI,
+                                       const MCSubtargetInfo &STI,
+                                       MCContext &Ctx);
+
+MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
+                                     const MCRegisterInfo &MRI,
+                                     const MCSubtargetInfo &STI,
+                                     MCContext &Ctx);
+
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
+                                     StringRef CPU);
+} // End llvm namespace
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+
+#endif // AMDGPUMCTARGETDESC_H
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..37e714c2e7b8
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+add_llvm_library(LLVMR600Desc
+  AMDGPUAsmBackend.cpp
+  AMDGPUMCTargetDesc.cpp
+  AMDGPUMCAsmInfo.cpp
+  R600MCCodeEmitter.cpp
+  SIMCCodeEmitter.cpp
+  )
+
+add_dependencies(LLVMR600Desc AMDGPUCommonTableGen)
diff --git a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..b1beab0bb301
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600Desc
+parent = R600
+required_libraries = R600AsmPrinter R600Info MC
+add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/Makefile b/lib/Target/R600/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..8894a7607f4f
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
new file mode 100644
index 000000000000..927bcbd8305c
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -0,0 +1,585 @@
+//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This code emitter outputs bytecode that is understood by the r600g driver
+/// in the Mesa [1] project.  The bytecode is very similar to the hardware's ISA,
+/// but it still needs to be run through a finalizer in order to be executed
+/// by the GPU.
+///
+/// [1] http://www.mesa3d.org/
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Defines.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdio.h>
+
+#define SRC_BYTE_COUNT 11
+#define DST_BYTE_COUNT 5
+
+using namespace llvm;
+
+namespace {
+
+class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+  R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  const MCInstrInfo &MCII;
+  const MCRegisterInfo &MRI;
+  const MCSubtargetInfo &STI;
+  MCContext &Ctx;
+
+public:
+
+  R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+                    const MCSubtargetInfo &sti, MCContext &ctx)
+    : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+
+  /// \brief Encode the instruction and write it to the OS.
+  virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// \returns the encoding for an MCOperand.
+  virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+private:
+
+  void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+                    raw_ostream &OS) const;
+  void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
+  void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+                    raw_ostream &OS) const;
+  void EmitDst(const MCInst &MI, raw_ostream &OS) const;
+  void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
+
+  void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
+
+  void EmitByte(unsigned int byte, raw_ostream &OS) const;
+
+  void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
+
+  void Emit(uint32_t value, raw_ostream &OS) const;
+  void Emit(uint64_t value, raw_ostream &OS) const;
+
+  unsigned getHWRegChan(unsigned reg) const;
+  unsigned getHWReg(unsigned regNo) const;
+
+  bool isFCOp(unsigned opcode) const;
+  bool isTexOp(unsigned opcode) const;
+  bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
+
+};
+
+} // End anonymous namespace
+
+enum RegElement {
+  ELEMENT_X = 0,
+  ELEMENT_Y,
+  ELEMENT_Z,
+  ELEMENT_W
+};
+
+enum InstrTypes {
+  INSTR_ALU = 0,
+  INSTR_TEX,
+  INSTR_FC,
+  INSTR_NATIVE,
+  INSTR_VTX,
+  INSTR_EXPORT,
+  INSTR_CFALU
+};
+
+enum FCInstr {
+  FC_IF_PREDICATE = 0,
+  FC_ELSE,
+  FC_ENDIF,
+  FC_BGNLOOP,
+  FC_ENDLOOP,
+  FC_BREAK_PREDICATE,
+  FC_CONTINUE
+};
+
+enum TextureTypes {
+  TEXTURE_1D = 1,
+  TEXTURE_2D,
+  TEXTURE_3D,
+  TEXTURE_CUBE,
+  TEXTURE_RECT,
+  TEXTURE_SHADOW1D,
+  TEXTURE_SHADOW2D,
+  TEXTURE_SHADOWRECT,
+  TEXTURE_1D_ARRAY,
+  TEXTURE_2D_ARRAY,
+  TEXTURE_SHADOW1D_ARRAY,
+  TEXTURE_SHADOW2D_ARRAY
+};
+
+MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCRegisterInfo &MRI,
+                                           const MCSubtargetInfo &STI,
+                                           MCContext &Ctx) {
+  return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (isFCOp(MI.getOpcode())){
+    EmitFCInstr(MI, OS);
+  } else if (MI.getOpcode() == AMDGPU::RETURN ||
+    MI.getOpcode() == AMDGPU::BUNDLE ||
+    MI.getOpcode() == AMDGPU::KILL) {
+    return;
+  } else {
+    switch(MI.getOpcode()) {
+    case AMDGPU::STACK_SIZE: {
+      EmitByte(MI.getOperand(0).getImm(), OS);
+      break;
+    }
+    case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+    case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+      uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
+      EmitByte(INSTR_NATIVE, OS);
+      Emit(inst, OS);
+      break;
+    }
+    case AMDGPU::CONSTANT_LOAD_eg:
+    case AMDGPU::VTX_READ_PARAM_8_eg:
+    case AMDGPU::VTX_READ_PARAM_16_eg:
+    case AMDGPU::VTX_READ_PARAM_32_eg:
+    case AMDGPU::VTX_READ_PARAM_128_eg:
+    case AMDGPU::VTX_READ_GLOBAL_8_eg:
+    case AMDGPU::VTX_READ_GLOBAL_32_eg:
+    case AMDGPU::VTX_READ_GLOBAL_128_eg:
+    case AMDGPU::TEX_VTX_CONSTBUF:
+    case AMDGPU::TEX_VTX_TEXBUF : {
+      uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+      uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+
+      EmitByte(INSTR_VTX, OS);
+      Emit(InstWord01, OS);
+      Emit(InstWord2, OS);
+      break;
+    }
+    case AMDGPU::TEX_LD:
+    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+    case AMDGPU::TEX_SAMPLE:
+    case AMDGPU::TEX_SAMPLE_C:
+    case AMDGPU::TEX_SAMPLE_L:
+    case AMDGPU::TEX_SAMPLE_C_L:
+    case AMDGPU::TEX_SAMPLE_LB:
+    case AMDGPU::TEX_SAMPLE_C_LB:
+    case AMDGPU::TEX_SAMPLE_G:
+    case AMDGPU::TEX_SAMPLE_C_G:
+    case AMDGPU::TEX_GET_GRADIENTS_H:
+    case AMDGPU::TEX_GET_GRADIENTS_V:
+    case AMDGPU::TEX_SET_GRADIENTS_H:
+    case AMDGPU::TEX_SET_GRADIENTS_V: {
+      unsigned Opcode = MI.getOpcode();
+      bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+      unsigned OpOffset = HasOffsets ? 3 : 0;
+      int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+      int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+      uint32_t SrcSelect[4] = {0, 1, 2, 3};
+      uint32_t Offsets[3] = {0, 0, 0};
+      uint64_t CoordType[4] = {1, 1, 1, 1};
+
+      if (HasOffsets)
+        for (unsigned i = 0; i < 3; i++) {
+          int SignedOffset = MI.getOperand(i + 2).getImm();
+          Offsets[i] = (SignedOffset & 0x1F);
+        }
+          
+
+      if (TextureType == TEXTURE_RECT ||
+          TextureType == TEXTURE_SHADOWRECT) {
+        CoordType[ELEMENT_X] = 0;
+        CoordType[ELEMENT_Y] = 0;
+      }
+
+      if (TextureType == TEXTURE_1D_ARRAY ||
+          TextureType == TEXTURE_SHADOW1D_ARRAY) {
+        if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+            Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+          CoordType[ELEMENT_Y] = 0;
+        } else {
+          CoordType[ELEMENT_Z] = 0;
+          SrcSelect[ELEMENT_Z] = ELEMENT_Y;
+        }
+      } else if (TextureType == TEXTURE_2D_ARRAY ||
+          TextureType == TEXTURE_SHADOW2D_ARRAY) {
+        CoordType[ELEMENT_Z] = 0;
+      }
+
+
+      if ((TextureType == TEXTURE_SHADOW1D ||
+          TextureType == TEXTURE_SHADOW2D ||
+          TextureType == TEXTURE_SHADOWRECT ||
+          TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+          Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+          Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+        SrcSelect[ELEMENT_W] = ELEMENT_Z;
+      }
+
+      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+          CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+          CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+          Offsets[2] << 10;
+
+      EmitByte(INSTR_TEX, OS);
+      Emit(Word01, OS);
+      Emit(Word2, OS);
+      break;
+    }
+    case AMDGPU::EG_ExportSwz:
+    case AMDGPU::R600_ExportSwz:
+    case AMDGPU::EG_ExportBuf:
+    case AMDGPU::R600_ExportBuf: {
+      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+      EmitByte(INSTR_EXPORT, OS);
+      Emit(Inst, OS);
+      break;
+    }
+    case AMDGPU::CF_ALU:
+    case AMDGPU::CF_ALU_PUSH_BEFORE: {
+      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+      EmitByte(INSTR_CFALU, OS);
+      Emit(Inst, OS);
+      break;
+    }
+    case AMDGPU::CF_TC:
+    case AMDGPU::CF_VC:
+    case AMDGPU::CF_CALL_FS:
+      return;
+    case AMDGPU::WHILE_LOOP:
+    case AMDGPU::END_LOOP:
+    case AMDGPU::LOOP_BREAK:
+    case AMDGPU::CF_CONTINUE:
+    case AMDGPU::CF_JUMP:
+    case AMDGPU::CF_ELSE:
+    case AMDGPU::POP: {
+      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+      EmitByte(INSTR_NATIVE, OS);
+      Emit(Inst, OS);
+      break;
+    }
+    default:
+      EmitALUInstr(MI, Fixups, OS);
+      break;
+    }
+  }
+}
+
+void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     raw_ostream &OS) const {
+  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+
+  // Emit instruction type
+  EmitByte(INSTR_ALU, OS);
+
+  uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+
+  //older alu have different encoding for instructions with one or two src
+  //parameters.
+  if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+      !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
+    uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
+    InstWord01 &= ~(0x3FFULL << 39);
+    InstWord01 |= ISAOpCode << 1;
+  }
+
+  unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+      MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
+
+  EmitByte(SrcNum, OS);
+
+  const unsigned SrcOps[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL}
+  };
+
+  for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+    unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+    unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+    EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
+  }
+
+  Emit(InstWord01, OS);
+  return;
+}
+
+void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
+                                raw_ostream &OS) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  union {
+    float f;
+    uint32_t i;
+  } Value;
+  Value.i = 0;
+  // Emit the source select (2 bytes).  For GPRs, this is the register index.
+  // For other potential instruction operands, (e.g. constant registers) the
+  // value of the source select is defined in the r600isa docs.
+  if (MO.isReg()) {
+    unsigned reg = MO.getReg();
+    EmitTwoBytes(getHWReg(reg), OS);
+    if (reg == AMDGPU::ALU_LITERAL_X) {
+      unsigned ImmOpIndex = MI.getNumOperands() - 1;
+      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+      if (ImmOp.isFPImm()) {
+        Value.f = ImmOp.getFPImm();
+      } else {
+        assert(ImmOp.isImm());
+        Value.i = ImmOp.getImm();
+      }
+    }
+  } else {
+    // XXX: Handle other operand types.
+    EmitTwoBytes(0, OS);
+  }
+
+  // Emit the source channel (1 byte)
+  if (MO.isReg()) {
+    EmitByte(getHWRegChan(MO.getReg()), OS);
+  } else {
+    EmitByte(0, OS);
+  }
+
+  // XXX: Emit isNegated (1 byte)
+  if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
+      && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
+     (MO.isReg() &&
+      (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
+    EmitByte(1, OS);
+  } else {
+    EmitByte(0, OS);
+  }
+
+  // Emit isAbsolute (1 byte)
+  if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
+    EmitByte(1, OS);
+  } else {
+    EmitByte(0, OS);
+  }
+
+  // XXX: Emit relative addressing mode (1 byte)
+  EmitByte(0, OS);
+
+  // Emit kc_bank, This will be adjusted later by r600_asm
+  EmitByte(0, OS);
+
+  // Emit the literal value, if applicable (4 bytes).
+  Emit(Value.i, OS);
+
+}
+
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+                                   unsigned SelOpIdx, raw_ostream &OS) const {
+  const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+  const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
+  union {
+    float f;
+    uint32_t i;
+  } InlineConstant;
+  InlineConstant.i = 0;
+  // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+  // and select is 0 (GPR index is encoded in the instr encoding. For constants
+  // type is 1 and select is the original const select passed from the driver.
+  unsigned Reg = RegMO.getReg();
+  if (Reg == AMDGPU::ALU_CONST) {
+    EmitByte(1, OS);
+    uint32_t Sel = SelMO.getImm();
+    Emit(Sel, OS);
+  } else {
+    EmitByte(0, OS);
+    Emit((uint32_t)0, OS);
+  }
+
+  if (Reg == AMDGPU::ALU_LITERAL_X) {
+    unsigned ImmOpIndex = MI.getNumOperands() - 1;
+    MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+    if (ImmOp.isFPImm()) {
+      InlineConstant.f = ImmOp.getFPImm();
+    } else {
+      assert(ImmOp.isImm());
+      InlineConstant.i = ImmOp.getImm();
+    }
+  }
+
+  // Emit the literal value, if applicable (4 bytes).
+  Emit(InlineConstant.i, OS);
+}
+
+void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
+
+  // Emit instruction type
+  EmitByte(INSTR_FC, OS);
+
+  // Emit SRC
+  unsigned NumOperands = MI.getNumOperands();
+  if (NumOperands > 0) {
+    assert(NumOperands == 1);
+    EmitSrc(MI, 0, OS);
+  } else {
+    EmitNullBytes(SRC_BYTE_COUNT, OS);
+  }
+
+  // Emit FC Instruction
+  enum FCInstr instr;
+  switch (MI.getOpcode()) {
+  case AMDGPU::PREDICATED_BREAK:
+    instr = FC_BREAK_PREDICATE;
+    break;
+  case AMDGPU::CONTINUE:
+    instr = FC_CONTINUE;
+    break;
+  case AMDGPU::IF_PREDICATE_SET:
+    instr = FC_IF_PREDICATE;
+    break;
+  case AMDGPU::ELSE:
+    instr = FC_ELSE;
+    break;
+  case AMDGPU::ENDIF:
+    instr = FC_ENDIF;
+    break;
+  case AMDGPU::ENDLOOP:
+    instr = FC_ENDLOOP;
+    break;
+  case AMDGPU::WHILELOOP:
+    instr = FC_BGNLOOP;
+    break;
+  default:
+    abort();
+    break;
+  }
+  EmitByte(instr, OS);
+}
+
+void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
+                                      raw_ostream &OS) const {
+
+  for (unsigned int i = 0; i < ByteCount; i++) {
+    EmitByte(0, OS);
+  }
+}
+
+void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
+  OS.write((uint8_t) Byte & 0xff);
+}
+
+void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
+                                     raw_ostream &OS) const {
+  OS.write((uint8_t) (Bytes & 0xff));
+  OS.write((uint8_t) ((Bytes >> 8) & 0xff));
+}
+
+void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
+  for (unsigned i = 0; i < 4; i++) {
+    OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
+  }
+}
+
+void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
+  for (unsigned i = 0; i < 8; i++) {
+    EmitByte((Value >> (8 * i)) & 0xff, OS);
+  }
+}
+
+unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
+  return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
+  return MRI.getEncodingValue(RegNo) & HW_REG_MASK;
+}
+
+uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                              const MCOperand &MO,
+                                        SmallVectorImpl<MCFixup> &Fixup) const {
+  if (MO.isReg()) {
+    if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+      return MRI.getEncodingValue(MO.getReg());
+    } else {
+      return getHWReg(MO.getReg());
+    }
+  } else if (MO.isImm()) {
+    return MO.getImm();
+  } else {
+    assert(0);
+    return 0;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Encoding helper functions
+//===----------------------------------------------------------------------===//
+
+bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
+  switch(opcode) {
+  default: return false;
+  case AMDGPU::PREDICATED_BREAK:
+  case AMDGPU::CONTINUE:
+  case AMDGPU::IF_PREDICATE_SET:
+  case AMDGPU::ELSE:
+  case AMDGPU::ENDIF:
+  case AMDGPU::ENDLOOP:
+  case AMDGPU::WHILELOOP:
+    return true;
+  }
+}
+
+bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
+  switch(opcode) {
+  default: return false;
+  case AMDGPU::TEX_LD:
+  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+  case AMDGPU::TEX_SAMPLE:
+  case AMDGPU::TEX_SAMPLE_C:
+  case AMDGPU::TEX_SAMPLE_L:
+  case AMDGPU::TEX_SAMPLE_C_L:
+  case AMDGPU::TEX_SAMPLE_LB:
+  case AMDGPU::TEX_SAMPLE_C_LB:
+  case AMDGPU::TEX_SAMPLE_G:
+  case AMDGPU::TEX_SAMPLE_C_G:
+  case AMDGPU::TEX_GET_GRADIENTS_H:
+  case AMDGPU::TEX_GET_GRADIENTS_V:
+  case AMDGPU::TEX_SET_GRADIENTS_H:
+  case AMDGPU::TEX_SET_GRADIENTS_V:
+    return true;
+  }
+}
+
+bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
+                                  unsigned Flag) const {
+  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
+  unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
+  if (FlagIndex == 0) {
+    return false;
+  }
+  assert(MI.getOperand(FlagIndex).isImm());
+  return !!((MI.getOperand(FlagIndex).getImm() >>
+            (NUM_MO_FLAGS * Operand)) & Flag);
+}
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
new file mode 100644
index 000000000000..5af83209a0d5
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -0,0 +1,201 @@
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief The SI code emitter produces machine code that can be executed
+/// directly on the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Helper type used in encoding
+typedef union {
+  int32_t I;
+  float F;
+} IntFloatUnion;
+
+class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
+  SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+  const MCInstrInfo &MCII;
+  const MCRegisterInfo &MRI;
+
+  /// \brief Can this operand also contain immediate values?
+  bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
+
+  /// \brief Encode an fp or int literal
+  uint32_t getLitEncoding(const MCOperand &MO) const;
+
+public:
+  SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+                  const MCSubtargetInfo &sti, MCContext &ctx)
+    : MCII(mcii), MRI(mri) { }
+
+  ~SIMCCodeEmitter() { }
+
+  /// \breif Encode the instruction and write it to the OS.
+  virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// \returns the encoding for an MCOperand.
+  virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCRegisterInfo &MRI,
+                                           const MCSubtargetInfo &STI,
+                                           MCContext &Ctx) {
+  return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+}
+
+bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
+                                   unsigned OpNo) const {
+
+  unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
+  return (AMDGPU::SSrc_32RegClassID == RegClass) ||
+         (AMDGPU::SSrc_64RegClassID == RegClass) ||
+         (AMDGPU::VSrc_32RegClassID == RegClass) ||
+         (AMDGPU::VSrc_64RegClassID == RegClass);
+}
+
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
+
+  IntFloatUnion Imm;
+  if (MO.isImm())
+    Imm.I = MO.getImm();
+  else if (MO.isFPImm())
+    Imm.F = MO.getFPImm();
+  else
+    return ~0;
+
+  if (Imm.I >= 0 && Imm.I <= 64)
+    return 128 + Imm.I;
+
+  if (Imm.I >= -16 && Imm.I <= -1)
+    return 192 + abs(Imm.I);
+
+  if (Imm.F == 0.5f)
+    return 240;
+
+  if (Imm.F == -0.5f)
+    return 241;
+
+  if (Imm.F == 1.0f)
+    return 242;
+
+  if (Imm.F == -1.0f)
+    return 243;
+
+  if (Imm.F == 2.0f)
+    return 244;
+
+  if (Imm.F == -2.0f)
+    return 245;
+
+  if (Imm.F == 4.0f)
+    return 246;
+
+  if (Imm.F == -4.0f)
+    return 247;
+
+  return 255;
+}
+
+void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  unsigned bytes = Desc.getSize();
+
+  for (unsigned i = 0; i < bytes; i++) {
+    OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
+  }
+
+  if (bytes > 4)
+    return;
+
+  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+  for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+
+    // Check if this operand should be encoded as [SV]Src
+    if (!isSrcOperand(Desc, i))
+      continue;
+
+    // Is this operand a literal immediate?
+    const MCOperand &Op = MI.getOperand(i);
+    if (getLitEncoding(Op) != 255)
+      continue;
+
+    // Yes! Encode it
+    IntFloatUnion Imm;
+    if (Op.isImm())
+      Imm.I = Op.getImm();
+    else
+      Imm.F = Op.getFPImm();
+
+    for (unsigned j = 0; j < 4; j++) {
+      OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
+    }
+
+    // Only one literal value allowed
+    break;
+  }
+}
+
+uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                            const MCOperand &MO,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg())
+    return MRI.getEncodingValue(MO.getReg());
+
+  if (MO.isExpr()) {
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+    return 0;
+  }
+
+  // Figure out the operand number, needed for isSrcOperand check
+  unsigned OpNo = 0;
+  for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
+    if (&MO == &MI.getOperand(OpNo))
+      break;
+  }
+
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  if (isSrcOperand(Desc, OpNo)) {
+    uint32_t Enc = getLitEncoding(MO);
+    if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+      return Enc;
+
+  } else if (MO.isImm())
+    return MO.getImm();
+
+  llvm_unreachable("Encoding of this operand type is not supported yet.");
+  return 0;
+}
+
diff --git a/lib/Target/R600/Makefile b/lib/Target/R600/Makefile
new file mode 100644
index 000000000000..1b3ebbe8c8f3
--- /dev/null
+++ b/lib/Target/R600/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMR600CodeGen
+TARGET = AMDGPU
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
+		AMDGPUGenDAGISel.inc  AMDGPUGenSubtargetInfo.inc \
+		AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \
+		AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
+		AMDGPUGenAsmWriter.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
new file mode 100644
index 000000000000..868810c613b3
--- /dev/null
+++ b/lib/Target/R600/Processors.td
@@ -0,0 +1,30 @@
+//===-- Processors.td - TODO: Add brief description -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDIL processors supported.
+//
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
+: Processor<Name, itin, Features>;
+def : Proc<"",           R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"r600",       R600_EG_Itin, [FeatureR600ALUInst]>;
+def : Proc<"rv710",      R600_EG_Itin, []>;
+def : Proc<"rv730",      R600_EG_Itin, []>;
+def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
+def : Proc<"cedar",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"redwood",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cypress",    R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"barts",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"turks",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"caicos",     R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman",     R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
+def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
+
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
new file mode 100644
index 000000000000..3a6c7eac730f
--- /dev/null
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -0,0 +1,268 @@
+//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass compute turns all control flow pseudo instructions into native one
+/// computing their address on the fly ; it also sets STACK_SIZE info.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600cf"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600ControlFlowFinalizer : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+  unsigned MaxFetchInst;
+
+  bool isFetch(const MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::TEX_VTX_CONSTBUF:
+    case AMDGPU::TEX_VTX_TEXBUF:
+    case AMDGPU::TEX_LD:
+    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+    case AMDGPU::TEX_GET_GRADIENTS_H:
+    case AMDGPU::TEX_GET_GRADIENTS_V:
+    case AMDGPU::TEX_SET_GRADIENTS_H:
+    case AMDGPU::TEX_SET_GRADIENTS_V:
+    case AMDGPU::TEX_SAMPLE:
+    case AMDGPU::TEX_SAMPLE_C:
+    case AMDGPU::TEX_SAMPLE_L:
+    case AMDGPU::TEX_SAMPLE_C_L:
+    case AMDGPU::TEX_SAMPLE_LB:
+    case AMDGPU::TEX_SAMPLE_C_LB:
+    case AMDGPU::TEX_SAMPLE_G:
+    case AMDGPU::TEX_SAMPLE_C_G:
+    case AMDGPU::TXD:
+    case AMDGPU::TXD_SHADOW:
+     return true;
+    default:
+      return false;
+    }
+  }
+
+  bool IsTrivialInst(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::KILL:
+    case AMDGPU::RETURN:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  MachineBasicBlock::iterator
+  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+      unsigned CfAddress) const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    unsigned AluInstCount = 0;
+    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+      if (IsTrivialInst(I))
+        continue;
+      if (!isFetch(I))
+        break;
+      AluInstCount ++;
+      if (AluInstCount > MaxFetchInst)
+        break;
+    }
+    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+        TII->get(AMDGPU::CF_TC))
+        .addImm(CfAddress) // ADDR
+        .addImm(AluInstCount); // COUNT
+    return I;
+  }
+  void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
+    MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
+  }
+  void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
+      const {
+    for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
+        It != E; ++It) {
+      MachineInstr *MI = *It;
+      CounterPropagateAddr(MI, Addr);
+    }
+  }
+
+public:
+  R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+      const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
+      if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+        MaxFetchInst = 8;
+      else
+        MaxFetchInst = 16;
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF) {
+    unsigned MaxStack = 0;
+    unsigned CurrentStack = 0;
+    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
+        ++MB) {
+      MachineBasicBlock &MBB = *MB;
+      unsigned CfCount = 0;
+      std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+      std::vector<MachineInstr * > IfThenElseStack;
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      if (MFI->ShaderType == 1) {
+        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+            TII->get(AMDGPU::CF_CALL_FS));
+        CfCount++;
+      }
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+          I != E;) {
+        if (isFetch(I)) {
+          DEBUG(dbgs() << CfCount << ":"; I->dump(););
+          I = MakeFetchClause(MBB, I, 0);
+          CfCount++;
+          continue;
+        }
+
+        MachineBasicBlock::iterator MI = I;
+        I++;
+        switch (MI->getOpcode()) {
+        case AMDGPU::CF_ALU_PUSH_BEFORE:
+          CurrentStack++;
+          MaxStack = std::max(MaxStack, CurrentStack);
+        case AMDGPU::CF_ALU:
+        case AMDGPU::EG_ExportBuf:
+        case AMDGPU::EG_ExportSwz:
+        case AMDGPU::R600_ExportBuf:
+        case AMDGPU::R600_ExportSwz:
+          DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+          CfCount++;
+          break;
+        case AMDGPU::WHILELOOP: {
+          CurrentStack++;
+          MaxStack = std::max(MaxStack, CurrentStack);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              TII->get(AMDGPU::WHILE_LOOP))
+              .addImm(2);
+          std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+              std::set<MachineInstr *>());
+          Pair.second.insert(MIb);
+          LoopStack.push_back(Pair);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ENDLOOP: {
+          CurrentStack--;
+          std::pair<unsigned, std::set<MachineInstr *> > Pair =
+              LoopStack.back();
+          LoopStack.pop_back();
+          CounterPropagateAddr(Pair.second, CfCount);
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+              .addImm(Pair.first + 1);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::IF_PREDICATE_SET: {
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              TII->get(AMDGPU::CF_JUMP))
+              .addImm(0)
+              .addImm(0);
+          IfThenElseStack.push_back(MIb);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ELSE: {
+          MachineInstr * JumpInst = IfThenElseStack.back();
+          IfThenElseStack.pop_back();
+          CounterPropagateAddr(JumpInst, CfCount);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              TII->get(AMDGPU::CF_ELSE))
+              .addImm(0)
+              .addImm(1);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          IfThenElseStack.push_back(MIb);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::ENDIF: {
+          CurrentStack--;
+          MachineInstr *IfOrElseInst = IfThenElseStack.back();
+          IfThenElseStack.pop_back();
+          CounterPropagateAddr(IfOrElseInst, CfCount + 1);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              TII->get(AMDGPU::POP))
+              .addImm(CfCount + 1)
+              .addImm(1);
+          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        case AMDGPU::PREDICATED_BREAK: {
+          CurrentStack--;
+          CfCount += 3;
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+              .addImm(CfCount)
+              .addImm(1);
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              TII->get(AMDGPU::LOOP_BREAK))
+              .addImm(0);
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+              .addImm(CfCount)
+              .addImm(1);
+          LoopStack.back().second.insert(MIb);
+          MI->eraseFromParent();
+          break;
+        }
+        case AMDGPU::CONTINUE: {
+          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+              TII->get(AMDGPU::CF_CONTINUE))
+              .addImm(0);
+          LoopStack.back().second.insert(MIb);
+          MI->eraseFromParent();
+          CfCount++;
+          break;
+        }
+        default:
+          break;
+        }
+      }
+      BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
+          TII->get(AMDGPU::STACK_SIZE))
+          .addImm(MaxStack);
+    }
+
+    return false;
+  }
+
+  const char *getPassName() const {
+    return "R600 Control Flow Finalizer Pass";
+  }
+};
+
+char R600ControlFlowFinalizer::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+  return new R600ControlFlowFinalizer(TM);
+}
+
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
new file mode 100644
index 000000000000..16cfcf59eb3d
--- /dev/null
+++ b/lib/Target/R600/R600Defines.h
@@ -0,0 +1,97 @@
+//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600DEFINES_H_
+#define R600DEFINES_H_
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+// Operand Flags
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG   (1 << 1)
+#define MO_FLAG_ABS   (1 << 2)
+#define MO_FLAG_MASK  (1 << 3)
+#define MO_FLAG_PUSH  (1 << 4)
+#define MO_FLAG_NOT_LAST  (1 << 5)
+#define MO_FLAG_LAST  (1 << 6)
+#define NUM_MO_FLAGS 7
+
+/// \brief Helper for getting the operand index for the instruction flags
+/// operand.
+#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
+
+namespace R600_InstFlag {
+  enum TIF {
+    TRANS_ONLY = (1 << 0),
+    TEX = (1 << 1),
+    REDUCTION = (1 << 2),
+    FC = (1 << 3),
+    TRIG = (1 << 4),
+    OP3 = (1 << 5),
+    VECTOR = (1 << 6),
+    //FlagOperand bits 7, 8
+    NATIVE_OPERANDS = (1 << 9),
+    OP1 = (1 << 10),
+    OP2 = (1 << 11)
+  };
+}
+
+#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
+
+/// \brief Defines for extracting register infomation from register encoding
+#define HW_REG_MASK 0x1ff
+#define HW_CHAN_SHIFT 9
+
+#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
+#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+
+namespace R600Operands {
+  enum Ops {
+    DST,
+    UPDATE_EXEC_MASK,
+    UPDATE_PREDICATE,
+    WRITE,
+    OMOD,
+    DST_REL,
+    CLAMP,
+    SRC0,
+    SRC0_NEG,
+    SRC0_REL,
+    SRC0_ABS,
+    SRC0_SEL,
+    SRC1,
+    SRC1_NEG,
+    SRC1_REL,
+    SRC1_ABS,
+    SRC1_SEL,
+    SRC2,
+    SRC2_NEG,
+    SRC2_REL,
+    SRC2_SEL,
+    LAST,
+    PRED_SEL,
+    IMM,
+    COUNT
+ };
+
+  const static int ALUOpTable[3][R600Operands::COUNT] = {
+//            W        C     S  S  S  S     S  S  S  S     S  S  S
+//            R  O  D  L  S  R  R  R  R  S  R  R  R  R  S  R  R  R  L  P
+//   D  U     I  M  R  A  R  C  C  C  C  R  C  C  C  C  R  C  C  C  A  R  I
+//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M
+//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M
+    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+  };
+
+}
+
+#endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
new file mode 100644
index 000000000000..3fdc678b9ef1
--- /dev/null
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -0,0 +1,255 @@
+//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
+/// 128 Alu instructions ; these instructions can access up to 4 prefetched
+/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
+/// initiated by CF_ALU instructions.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+class R600EmitClauseMarkersPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  unsigned OccupiedDwords(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::DOT4_eg_pseudo:
+    case AMDGPU::DOT4_r600_pseudo:
+      return 4;
+    case AMDGPU::KILL:
+      return 0;
+    default:
+      break;
+    }
+
+    if(TII->isVector(*MI) ||
+        TII->isCubeOp(MI->getOpcode()) ||
+        TII->isReductionOp(MI->getOpcode()))
+      return 4;
+
+    unsigned NumLiteral = 0;
+    for (MachineInstr::mop_iterator It = MI->operands_begin(),
+        E = MI->operands_end(); It != E; ++It) {
+      MachineOperand &MO = *It;
+      if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+        ++NumLiteral;
+    }
+    return 1 + NumLiteral;
+  }
+
+  bool isALU(const MachineInstr *MI) const {
+    if (TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode()))
+      return true;
+    switch (MI->getOpcode()) {
+    case AMDGPU::PRED_X:
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+    case AMDGPU::COPY:
+    case AMDGPU::DOT4_eg_pseudo:
+    case AMDGPU::DOT4_r600_pseudo:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool IsTrivialInst(MachineInstr *MI) const {
+    switch (MI->getOpcode()) {
+    case AMDGPU::KILL:
+    case AMDGPU::RETURN:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  // Register Idx, then Const value
+  std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI)
+      const {
+    const R600Operands::Ops OpTable[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL},
+    };
+    std::vector<std::pair<unsigned, unsigned> > Result;
+
+    if (!TII->isALUInstr(MI->getOpcode()))
+      return Result;
+    for (unsigned j = 0; j < 3; j++) {
+      int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+      if (SrcIdx < 0)
+        break;
+      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+        unsigned Const = MI->getOperand(
+            TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+        Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const));
+      }
+    }
+    return Result;
+  }
+
+  std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
+    // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
+    // (See also R600ISelLowering.cpp)
+    // ConstIndex value is in [0, 4095];
+    return std::pair<unsigned, unsigned>(
+        ((Sel >> 2) - 512) >> 12, // KC_BANK
+        // Line Number of ConstIndex
+        // A line contains 16 constant registers however KCX bank can lock
+        // two line at the same time ; thus we want to get an even line number.
+        // Line number can be retrieved with (>>4), using (>>5) <<1 generates
+        // an even number.
+        ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
+  }
+
+  bool SubstituteKCacheBank(MachineInstr *MI,
+      std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+    std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+    std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI);
+    assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
+    for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+      unsigned Sel = Consts[i].second;
+      unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
+      unsigned KCacheIndex = Index * 4 + Chan;
+      const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
+      if (CachedConsts.empty()) {
+        CachedConsts.push_back(BankLine);
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts[0] == BankLine) {
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts.size() == 1) {
+        CachedConsts.push_back(BankLine);
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+        continue;
+      }
+      if (CachedConsts[1] == BankLine) {
+        UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
+        continue;
+      }
+      return false;
+    }
+
+    for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+      switch(UsedKCache[i].first) {
+      case 0:
+        MI->getOperand(Consts[i].first).setReg(
+            AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second));
+        break;
+      case 1:
+        MI->getOperand(Consts[i].first).setReg(
+            AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second));
+        break;
+      default:
+        llvm_unreachable("Wrong Cache Line");
+      }
+    }
+    return true;
+  }
+
+  MachineBasicBlock::iterator
+  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+    bool PushBeforeModifier = false;
+    unsigned AluInstCount = 0;
+    for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
+      if (IsTrivialInst(I))
+        continue;
+      if (!isALU(I))
+        break;
+      if (AluInstCount > TII->getMaxAlusPerClause())
+        break;
+      if (I->getOpcode() == AMDGPU::PRED_X) {
+        if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
+          PushBeforeModifier = true;
+        AluInstCount ++;
+        continue;
+      }
+      if (I->getOpcode() == AMDGPU::KILLGT) {
+        I++;
+        break;
+      }
+      if (TII->isALUInstr(I->getOpcode()) &&
+          !SubstituteKCacheBank(I, KCacheBanks))
+        break;
+      AluInstCount += OccupiedDwords(I);
+    }
+    unsigned Opcode = PushBeforeModifier ?
+        AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
+        .addImm(0) // ADDR
+        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
+        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
+        .addImm(KCacheBanks.empty()?0:2) // KM0
+        .addImm((KCacheBanks.size() < 2)?0:2) // KM1
+        .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
+        .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
+        .addImm(AluInstCount); // COUNT
+    return I;
+  }
+
+public:
+  R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF) {
+    for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                    BB != BB_E; ++BB) {
+      MachineBasicBlock &MBB = *BB;
+      MachineBasicBlock::iterator I = MBB.begin();
+      if (I->getOpcode() == AMDGPU::CF_ALU)
+        continue; // BB was already parsed
+      for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
+        if (isALU(I))
+          I = MakeALUClause(MBB, I);
+        else
+          ++I;
+      }
+    }
+    return false;
+  }
+
+  const char *getPassName() const {
+    return "R600 Emit Clause Markers Pass";
+  }
+};
+
+char R600EmitClauseMarkersPass::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) {
+  return new R600EmitClauseMarkersPass(TM);
+}
+
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
new file mode 100644
index 000000000000..f8c900f72776
--- /dev/null
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -0,0 +1,297 @@
+//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Vector, Reduction, and Cube instructions need to fill the entire instruction
+/// group to work correctly.  This pass expands these individual instructions
+/// into several instructions that will completely fill the instruction group.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const R600InstrInfo *TII;
+
+  bool ExpandInputPerspective(MachineInstr& MI);
+  bool ExpandInputConstant(MachineInstr& MI);
+
+public:
+  R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const {
+    return "R600 Expand special instructions pass";
+  }
+};
+
+} // End anonymous namespace
+
+char R600ExpandSpecialInstrsPass::ID = 0;
+
+FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
+  return new R600ExpandSpecialInstrsPass(TM);
+}
+
+bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
+
+  const R600RegisterInfo &TRI = TII->getRegisterInfo();
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    MachineBasicBlock::iterator I = MBB.begin();
+    while (I != MBB.end()) {
+      MachineInstr &MI = *I;
+      I = llvm::next(I);
+
+      switch (MI.getOpcode()) {
+      default: break;
+      // Expand PRED_X to one of the PRED_SET instructions.
+      case AMDGPU::PRED_X: {
+        uint64_t Flags = MI.getOperand(3).getImm();
+        // The native opcode used by PRED_X is stored as an immediate in the
+        // third operand.
+        MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+                                            MI.getOperand(2).getImm(), // opcode
+                                            MI.getOperand(0).getReg(), // dst
+                                            MI.getOperand(1).getReg(), // src0
+                                            AMDGPU::ZERO);             // src1
+        TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+        if (Flags & MO_FLAG_PUSH) {
+          TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+        } else {
+          TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
+        }
+        MI.eraseFromParent();
+        continue;
+        }
+      case AMDGPU::BREAK: {
+        MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
+                                          AMDGPU::PRED_SETE_INT,
+                                          AMDGPU::PREDICATE_BIT,
+                                          AMDGPU::ZERO,
+                                          AMDGPU::ZERO);
+        TII->addFlag(PredSet, 0, MO_FLAG_MASK);
+        TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
+
+        BuildMI(MBB, I, MBB.findDebugLoc(I),
+                TII->get(AMDGPU::PREDICATED_BREAK))
+                .addReg(AMDGPU::PREDICATE_BIT);
+        MI.eraseFromParent();
+        continue;
+        }
+
+      case AMDGPU::INTERP_PAIR_XY: {
+        MachineInstr *BMI;
+        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+                MI.getOperand(2).getImm());
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          unsigned DstReg;
+
+          if (Chan < 2)
+            DstReg = MI.getOperand(Chan).getReg();
+          else
+            DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
+
+          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
+              DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Chan >= 2)
+            TII->addFlag(BMI, 0, MO_FLAG_MASK);
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+        }
+
+        MI.eraseFromParent();
+        continue;
+        }
+
+      case AMDGPU::INTERP_PAIR_ZW: {
+        MachineInstr *BMI;
+        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+                MI.getOperand(2).getImm());
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          unsigned DstReg;
+
+          if (Chan < 2)
+            DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
+          else
+            DstReg = MI.getOperand(Chan-2).getReg();
+
+          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
+              DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Chan < 2)
+            TII->addFlag(BMI, 0, MO_FLAG_MASK);
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+        }
+
+        MI.eraseFromParent();
+        continue;
+        }
+
+      case AMDGPU::INTERP_VEC_LOAD: {
+        const R600RegisterInfo &TRI = TII->getRegisterInfo();
+        MachineInstr *BMI;
+        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+                MI.getOperand(1).getImm());
+        unsigned DstReg = MI.getOperand(0).getReg();
+
+        for (unsigned Chan = 0; Chan < 4; ++Chan) {
+          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
+              TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
+          if (Chan > 0) {
+            BMI->bundleWithPred();
+          }
+          if (Chan != 3)
+            TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+        }
+
+        MI.eraseFromParent();
+        continue;
+        }
+      }
+
+      bool IsReduction = TII->isReductionOp(MI.getOpcode());
+      bool IsVector = TII->isVector(MI);
+      bool IsCube = TII->isCubeOp(MI.getOpcode());
+      if (!IsReduction && !IsVector && !IsCube) {
+        continue;
+      }
+
+      // Expand the instruction
+      //
+      // Reduction instructions:
+      // T0_X = DP4 T1_XYZW, T2_XYZW
+      // becomes:
+      // TO_X = DP4 T1_X, T2_X
+      // TO_Y (write masked) = DP4 T1_Y, T2_Y
+      // TO_Z (write masked) = DP4 T1_Z, T2_Z
+      // TO_W (write masked) = DP4 T1_W, T2_W
+      //
+      // Vector instructions:
+      // T0_X = MULLO_INT T1_X, T2_X
+      // becomes:
+      // T0_X = MULLO_INT T1_X, T2_X
+      // T0_Y (write masked) = MULLO_INT T1_X, T2_X
+      // T0_Z (write masked) = MULLO_INT T1_X, T2_X
+      // T0_W (write masked) = MULLO_INT T1_X, T2_X
+      //
+      // Cube instructions:
+      // T0_XYZW = CUBE T1_XYZW
+      // becomes:
+      // TO_X = CUBE T1_Z, T1_Y
+      // T0_Y = CUBE T1_Z, T1_X
+      // T0_Z = CUBE T1_X, T1_Z
+      // T0_W = CUBE T1_Y, T1_Z
+      for (unsigned Chan = 0; Chan < 4; Chan++) {
+        unsigned DstReg = MI.getOperand(
+                            TII->getOperandIdx(MI, R600Operands::DST)).getReg();
+        unsigned Src0 = MI.getOperand(
+                           TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
+        unsigned Src1 = 0;
+
+        // Determine the correct source registers
+        if (!IsCube) {
+          int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
+          if (Src1Idx != -1) {
+            Src1 = MI.getOperand(Src1Idx).getReg();
+          }
+        }
+        if (IsReduction) {
+          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+          Src0 = TRI.getSubReg(Src0, SubRegIndex);
+          Src1 = TRI.getSubReg(Src1, SubRegIndex);
+        } else if (IsCube) {
+          static const int CubeSrcSwz[] = {2, 2, 0, 1};
+          unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
+          unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
+          Src1 = TRI.getSubReg(Src0, SubRegIndex1);
+          Src0 = TRI.getSubReg(Src0, SubRegIndex0);
+        }
+
+        // Determine the correct destination registers;
+        bool Mask = false;
+        bool NotLast = true;
+        if (IsCube) {
+          unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
+          DstReg = TRI.getSubReg(DstReg, SubRegIndex);
+        } else {
+          // Mask the write if the original instruction does not write to
+          // the current Channel.
+          Mask = (Chan != TRI.getHWRegChan(DstReg));
+          unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
+          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+        }
+
+        // Set the IsLast bit
+        NotLast = (Chan != 3 );
+
+        // Add the new instruction
+        unsigned Opcode = MI.getOpcode();
+        switch (Opcode) {
+        case AMDGPU::CUBE_r600_pseudo:
+          Opcode = AMDGPU::CUBE_r600_real;
+          break;
+        case AMDGPU::CUBE_eg_pseudo:
+          Opcode = AMDGPU::CUBE_eg_real;
+          break;
+        case AMDGPU::DOT4_r600_pseudo:
+          Opcode = AMDGPU::DOT4_r600_real;
+          break;
+        case AMDGPU::DOT4_eg_pseudo:
+          Opcode = AMDGPU::DOT4_eg_real;
+          break;
+        default:
+          break;
+        }
+
+        MachineInstr *NewMI =
+          TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
+
+        if (Chan != 0)
+          NewMI->bundleWithPred();
+        if (Mask) {
+          TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+        }
+        if (NotLast) {
+          TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+        }
+      }
+      MI.eraseFromParent();
+    }
+  }
+  return false;
+}
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
new file mode 100644
index 000000000000..53e6e51dd2b1
--- /dev/null
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -0,0 +1,1106 @@
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for R600
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600ISelLowering.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
+    AMDGPUTargetLowering(TM),
+    TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
+  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
+  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
+  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
+  computeRegisterProperties();
+
+  setOperationAction(ISD::FADD, MVT::v4f32, Expand);
+  setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
+  setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+  setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
+
+  setOperationAction(ISD::ADD,  MVT::v4i32, Expand);
+  setOperationAction(ISD::AND,  MVT::v4i32, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+  setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
+  setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
+  setOperationAction(ISD::UREM, MVT::v4i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+
+  setOperationAction(ISD::FSUB, MVT::f32, Expand);
+
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
+
+  setOperationAction(ISD::ROTL, MVT::i32, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+  setOperationAction(ISD::SETCC, MVT::i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
+
+  setOperationAction(ISD::SELECT, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT, MVT::f32, Custom);
+
+  // Legalize loads and stores to the private address space.
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+  setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+  setOperationAction(ISD::STORE, MVT::i8, Custom);
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+  setTargetDAGCombine(ISD::FP_ROUND);
+  setTargetDAGCombine(ISD::FP_TO_SINT);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::SELECT_CC);
+
+  setBooleanContents(ZeroOrNegativeOneBooleanContent);
+  setSchedulingPreference(Sched::VLIW);
+}
+
+MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr * MI, MachineBasicBlock * BB) const {
+  MachineFunction * MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineBasicBlock::iterator I = *MI;
+
+  switch (MI->getOpcode()) {
+  default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+  case AMDGPU::CLAMP_R600: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                   AMDGPU::MOV,
+                                                   MI->getOperand(0).getReg(),
+                                                   MI->getOperand(1).getReg());
+    TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
+    break;
+  }
+
+  case AMDGPU::FABS_R600: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                    AMDGPU::MOV,
+                                                    MI->getOperand(0).getReg(),
+                                                    MI->getOperand(1).getReg());
+    TII->addFlag(NewMI, 0, MO_FLAG_ABS);
+    break;
+  }
+
+  case AMDGPU::FNEG_R600: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
+                                                    AMDGPU::MOV,
+                                                    MI->getOperand(0).getReg(),
+                                                    MI->getOperand(1).getReg());
+    TII->addFlag(NewMI, 0, MO_FLAG_NEG);
+    break;
+  }
+
+  case AMDGPU::MASK_WRITE: {
+    unsigned maskedRegister = MI->getOperand(0).getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
+    MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
+    TII->addFlag(defInstr, 0, MO_FLAG_MASK);
+    break;
+  }
+
+  case AMDGPU::MOV_IMM_F32:
+    TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+                     MI->getOperand(1).getFPImm()->getValueAPF()
+                         .bitcastToAPInt().getZExtValue());
+    break;
+  case AMDGPU::MOV_IMM_I32:
+    TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
+                     MI->getOperand(1).getImm());
+    break;
+  case AMDGPU::CONST_COPY: {
+    MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+        MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+    TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+        MI->getOperand(1).getImm());
+    break;
+  }
+
+  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+    unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addImm(EOP); // Set End of program bit
+    break;
+  }
+
+  case AMDGPU::TXD: {
+    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+            .addOperand(MI->getOperand(3))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6));
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+            .addOperand(MI->getOperand(2))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6));
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6))
+            .addReg(T0, RegState::Implicit)
+            .addReg(T1, RegState::Implicit);
+    break;
+  }
+
+  case AMDGPU::TXD_SHADOW: {
+    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
+            .addOperand(MI->getOperand(3))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6));
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
+            .addOperand(MI->getOperand(2))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6));
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6))
+            .addReg(T0, RegState::Implicit)
+            .addReg(T1, RegState::Implicit);
+    break;
+  }
+
+  case AMDGPU::BRANCH:
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+              .addOperand(MI->getOperand(0));
+      break;
+
+  case AMDGPU::BRANCH_COND_f32: {
+    MachineInstr *NewMI =
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+              AMDGPU::PREDICATE_BIT)
+              .addOperand(MI->getOperand(1))
+              .addImm(OPCODE_IS_NOT_ZERO)
+              .addImm(0); // Flags
+    TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+            .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    break;
+  }
+
+  case AMDGPU::BRANCH_COND_i32: {
+    MachineInstr *NewMI =
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
+            AMDGPU::PREDICATE_BIT)
+            .addOperand(MI->getOperand(1))
+            .addImm(OPCODE_IS_NOT_ZERO_INT)
+            .addImm(0); // Flags
+    TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+           .addOperand(MI->getOperand(0))
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    break;
+  }
+
+  case AMDGPU::EG_ExportSwz:
+  case AMDGPU::R600_ExportSwz: {
+    // Instruction is left unmodified if its not the last one of its type
+    bool isLastInstructionOfItsType = true;
+    unsigned InstExportType = MI->getOperand(1).getImm();
+    for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+         EndBlock = BB->end(); NextExportInst != EndBlock;
+         NextExportInst = llvm::next(NextExportInst)) {
+      if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+          NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+        unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+            .getImm();
+        if (CurrentInstExportType == InstExportType) {
+          isLastInstructionOfItsType = false;
+          break;
+        }
+      }
+    }
+    bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
+    if (!EOP && !isLastInstructionOfItsType)
+      return BB;
+    unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addOperand(MI->getOperand(2))
+            .addOperand(MI->getOperand(3))
+            .addOperand(MI->getOperand(4))
+            .addOperand(MI->getOperand(5))
+            .addOperand(MI->getOperand(6))
+            .addImm(CfInst)
+            .addImm(EOP);
+    break;
+  }
+  case AMDGPU::RETURN: {
+    // RETURN instructions must have the live-out registers as implicit uses,
+    // otherwise they appear dead.
+    R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+    MachineInstrBuilder MIB(*MF, MI);
+    for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
+      MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
+    return BB;
+  }
+  }
+
+  MI->eraseFromParent();
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+using namespace llvm::Intrinsic;
+using namespace llvm::AMDGPUIntrinsic;
+
+SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::ROTL: return LowerROTL(Op, DAG);
+  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::SELECT: return LowerSELECT(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
+  case ISD::LOAD: return LowerLOAD(Op, DAG);
+  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
+  case ISD::INTRINSIC_VOID: {
+    SDValue Chain = Op.getOperand(0);
+    unsigned IntrinsicID =
+                         cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    switch (IntrinsicID) {
+    case AMDGPUIntrinsic::AMDGPU_store_output: {
+      MachineFunction &MF = DAG.getMachineFunction();
+      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+      MFI->LiveOuts.push_back(Reg);
+      return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+    }
+    case AMDGPUIntrinsic::R600_store_swizzle: {
+      const SDValue Args[8] = {
+        Chain,
+        Op.getOperand(2), // Export Value
+        Op.getOperand(3), // ArrayBase
+        Op.getOperand(4), // Type
+        DAG.getConstant(0, MVT::i32), // SWZ_X
+        DAG.getConstant(1, MVT::i32), // SWZ_Y
+        DAG.getConstant(2, MVT::i32), // SWZ_Z
+        DAG.getConstant(3, MVT::i32) // SWZ_W
+      };
+      return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
+          Args, 8);
+    }
+
+    // default for switch(IntrinsicID)
+    default: break;
+    }
+    // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
+    break;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntrinsicID =
+                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    EVT VT = Op.getValueType();
+    DebugLoc DL = Op.getDebugLoc();
+    switch(IntrinsicID) {
+    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+    case AMDGPUIntrinsic::R600_load_input: {
+      int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+    }
+
+    case AMDGPUIntrinsic::R600_interp_input: {
+      int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+      MachineSDNode *interp;
+      if (ijb < 0) {
+        interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
+            MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
+        return DAG.getTargetExtractSubreg(
+            TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
+            DL, MVT::f32, SDValue(interp, 0));
+      }
+
+      if (slot % 4 < 2)
+        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+      else
+        interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+            MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+            CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+
+      return SDValue(interp, slot % 2);
+    }
+
+    case r600_read_ngroups_x:
+      return LowerImplicitParameter(DAG, VT, DL, 0);
+    case r600_read_ngroups_y:
+      return LowerImplicitParameter(DAG, VT, DL, 1);
+    case r600_read_ngroups_z:
+      return LowerImplicitParameter(DAG, VT, DL, 2);
+    case r600_read_global_size_x:
+      return LowerImplicitParameter(DAG, VT, DL, 3);
+    case r600_read_global_size_y:
+      return LowerImplicitParameter(DAG, VT, DL, 4);
+    case r600_read_global_size_z:
+      return LowerImplicitParameter(DAG, VT, DL, 5);
+    case r600_read_local_size_x:
+      return LowerImplicitParameter(DAG, VT, DL, 6);
+    case r600_read_local_size_y:
+      return LowerImplicitParameter(DAG, VT, DL, 7);
+    case r600_read_local_size_z:
+      return LowerImplicitParameter(DAG, VT, DL, 8);
+
+    case r600_read_tgid_x:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T1_X, VT);
+    case r600_read_tgid_y:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T1_Y, VT);
+    case r600_read_tgid_z:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T1_Z, VT);
+    case r600_read_tidig_x:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T0_X, VT);
+    case r600_read_tidig_y:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T0_Y, VT);
+    case r600_read_tidig_z:
+      return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+                                  AMDGPU::T0_Z, VT);
+    }
+    // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
+    break;
+  }
+  } // end switch(Op.getOpcode())
+  return SDValue();
+}
+
+void R600TargetLowering::ReplaceNodeResults(SDNode *N,
+                                            SmallVectorImpl<SDValue> &Results,
+                                            SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  default: return;
+  case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+    return;
+  case ISD::LOAD: {
+    SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    Results.push_back(SDValue(Node, 1));
+    // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+    // function
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+    return;
+  }
+  case ISD::STORE:
+    SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+    Results.push_back(SDValue(Node, 0));
+    return;
+  }
+}
+
+SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
+  return DAG.getNode(
+      ISD::SETCC,
+      Op.getDebugLoc(),
+      MVT::i1,
+      Op, DAG.getConstantFP(0.0f, MVT::f32),
+      DAG.getCondCode(ISD::SETNE)
+      );
+}
+
+SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+                                                   DebugLoc DL,
+                                                   unsigned DwordOffset) const {
+  unsigned ByteOffset = DwordOffset * 4;
+  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+                                      AMDGPUAS::PARAM_I_ADDRESS);
+
+  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
+  assert(isInt<16>(ByteOffset));
+
+  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+                     DAG.getConstant(ByteOffset, MVT::i32), // PTR
+                     MachinePointerInfo(ConstantPointerNull::get(PtrType)),
+                     false, false, false, 0);
+}
+
+SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL =
+   static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+  assert(FIN);
+
+  unsigned FrameIndex = FIN->getIndex();
+  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
+}
+
+SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
+                     Op.getOperand(0),
+                     Op.getOperand(0),
+                     DAG.getNode(ISD::SUB, DL, VT,
+                                 DAG.getConstant(32, MVT::i32),
+                                 Op.getOperand(1)));
+}
+
+bool R600TargetLowering::isZero(SDValue Op) const {
+  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+    return Cst->isNullValue();
+  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
+    return CstFP->isZero();
+  } else {
+    return false;
+  }
+}
+
+SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue True = Op.getOperand(2);
+  SDValue False = Op.getOperand(3);
+  SDValue CC = Op.getOperand(4);
+  SDValue Temp;
+
+  // LHS and RHS are guaranteed to be the same value type
+  EVT CompareVT = LHS.getValueType();
+
+  // Check if we can lower this to a native operation.
+
+  // Try to lower to a SET* instruction:
+  //
+  // SET* can match the following patterns:
+  //
+  // select_cc f32, f32, -1,  0, cc_any
+  // select_cc f32, f32, 1.0f, 0.0f, cc_any
+  // select_cc i32, i32, -1,  0, cc_any
+  //
+
+  // Move hardware True/False values to the correct operand.
+  if (isHWTrueValue(False) && isHWFalseValue(True)) {
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    std::swap(False, True);
+    CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+  }
+
+  if (isHWTrueValue(True) && isHWFalseValue(False) &&
+      (CompareVT == VT || VT == MVT::i32)) {
+    // This can be matched by a SET* instruction.
+    return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+  }
+
+  // Try to lower to a CND* instruction:
+  //
+  // CND* can match the following patterns:
+  //
+  // select_cc f32, 0.0, f32, f32, cc_any
+  // select_cc f32, 0.0, i32, i32, cc_any
+  // select_cc i32, 0,   f32, f32, cc_any
+  // select_cc i32, 0,   i32, i32, cc_any
+  //
+  if (isZero(LHS) || isZero(RHS)) {
+    SDValue Cond = (isZero(LHS) ? RHS : LHS);
+    SDValue Zero = (isZero(LHS) ? LHS : RHS);
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    if (CompareVT != VT) {
+      // Bitcast True / False to the correct types.  This will end up being
+      // a nop, but it allows us to define only a single pattern in the
+      // .TD files for each CND* instruction rather than having to have
+      // one pattern for integer True/False and one for fp True/False
+      True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
+      False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
+    }
+    if (isZero(LHS)) {
+      CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
+    }
+
+    switch (CCOpcode) {
+    case ISD::SETONE:
+    case ISD::SETUNE:
+    case ISD::SETNE:
+    case ISD::SETULE:
+    case ISD::SETULT:
+    case ISD::SETOLE:
+    case ISD::SETOLT:
+    case ISD::SETLE:
+    case ISD::SETLT:
+      CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+      Temp = True;
+      True = False;
+      False = Temp;
+      break;
+    default:
+      break;
+    }
+    SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+        Cond, Zero,
+        True, False,
+        DAG.getCondCode(CCOpcode));
+    return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
+  }
+
+
+  // Possible Min/Max pattern
+  SDValue MinMax = LowerMinMax(Op, DAG);
+  if (MinMax.getNode()) {
+    return MinMax;
+  }
+
+  // If we make it this for it means we have no native instructions to handle
+  // this SELECT_CC, so we must lower it.
+  SDValue HWTrue, HWFalse;
+
+  if (CompareVT == MVT::f32) {
+    HWTrue = DAG.getConstantFP(1.0f, CompareVT);
+    HWFalse = DAG.getConstantFP(0.0f, CompareVT);
+  } else if (CompareVT == MVT::i32) {
+    HWTrue = DAG.getConstant(-1, CompareVT);
+    HWFalse = DAG.getConstant(0, CompareVT);
+  }
+  else {
+    assert(!"Unhandled value type in LowerSELECT_CC");
+  }
+
+  // Lower this unsupported SELECT_CC into a combination of two supported
+  // SELECT_CC operations.
+  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
+
+  return DAG.getNode(ISD::SELECT_CC, DL, VT,
+      Cond, HWFalse,
+      True, False,
+      DAG.getCondCode(ISD::SETNE));
+}
+
+SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  return DAG.getNode(ISD::SELECT_CC,
+      Op.getDebugLoc(),
+      Op.getValueType(),
+      Op.getOperand(0),
+      DAG.getConstant(0, MVT::i32),
+      Op.getOperand(1),
+      Op.getOperand(2),
+      DAG.getCondCode(ISD::SETNE));
+}
+
+/// LLVM generates byte-addresed pointers.  For indirect addressing, we need to
+/// convert these pointers to a register index.  Each register holds
+/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
+/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// for indirect addressing.
+SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
+                                               unsigned StackWidth,
+                                               SelectionDAG &DAG) const {
+  unsigned SRLPad;
+  switch(StackWidth) {
+  case 1:
+    SRLPad = 2;
+    break;
+  case 2:
+    SRLPad = 3;
+    break;
+  case 4:
+    SRLPad = 4;
+    break;
+  default: llvm_unreachable("Invalid stack width");
+  }
+
+  return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+                     DAG.getConstant(SRLPad, MVT::i32));
+}
+
+void R600TargetLowering::getStackAddress(unsigned StackWidth,
+                                         unsigned ElemIdx,
+                                         unsigned &Channel,
+                                         unsigned &PtrIncr) const {
+  switch (StackWidth) {
+  default:
+  case 1:
+    Channel = 0;
+    if (ElemIdx > 0) {
+      PtrIncr = 1;
+    } else {
+      PtrIncr = 0;
+    }
+    break;
+  case 2:
+    Channel = ElemIdx % 2;
+    if (ElemIdx == 2) {
+      PtrIncr = 1;
+    } else {
+      PtrIncr = 0;
+    }
+    break;
+  case 4:
+    Channel = ElemIdx;
+    PtrIncr = 0;
+    break;
+  }
+}
+
+SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue Ptr = Op.getOperand(2);
+
+  if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+      Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
+    // Convert pointer from byte address to dword address.
+    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+                      DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+                                  Ptr, DAG.getConstant(2, MVT::i32)));
+
+    if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+      assert(!"Truncated and indexed stores not supported yet");
+    } else {
+      Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+    }
+    return Chain;
+  }
+
+  EVT ValueVT = Value.getValueType();
+
+  if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return SDValue();
+  }
+
+  // Lowering for indirect addressing
+
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+                                         getTargetMachine().getFrameLowering());
+  unsigned StackWidth = TFL->getStackWidth(MF);
+
+  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+  if (ValueVT.isVector()) {
+    unsigned NumElemVT = ValueVT.getVectorNumElements();
+    EVT ElemVT = ValueVT.getVectorElementType();
+    SDValue Stores[4];
+
+    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+                                      "vector width in load");
+
+    for (unsigned i = 0; i < NumElemVT; ++i) {
+      unsigned Channel, PtrIncr;
+      getStackAddress(StackWidth, i, Channel, PtrIncr);
+      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+                        DAG.getConstant(PtrIncr, MVT::i32));
+      SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+                                 Value, DAG.getConstant(i, MVT::i32));
+
+      Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+                              Chain, Elem, Ptr,
+                              DAG.getTargetConstant(Channel, MVT::i32));
+    }
+     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+   } else {
+    if (ValueVT == MVT::i8) {
+      Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+    }
+    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+    DAG.getTargetConstant(0, MVT::i32)); // Channel 
+  }
+
+  return Chain;
+}
+
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+  switch (AddressSpace) {
+  case AMDGPUAS::CONSTANT_BUFFER_0:
+    return 512;
+  case AMDGPUAS::CONSTANT_BUFFER_1:
+    return 512 + 4096;
+  case AMDGPUAS::CONSTANT_BUFFER_2:
+    return 512 + 4096 * 2;
+  case AMDGPUAS::CONSTANT_BUFFER_3:
+    return 512 + 4096 * 3;
+  case AMDGPUAS::CONSTANT_BUFFER_4:
+    return 512 + 4096 * 4;
+  case AMDGPUAS::CONSTANT_BUFFER_5:
+    return 512 + 4096 * 5;
+  case AMDGPUAS::CONSTANT_BUFFER_6:
+    return 512 + 4096 * 6;
+  case AMDGPUAS::CONSTANT_BUFFER_7:
+    return 512 + 4096 * 7;
+  case AMDGPUAS::CONSTANT_BUFFER_8:
+    return 512 + 4096 * 8;
+  case AMDGPUAS::CONSTANT_BUFFER_9:
+    return 512 + 4096 * 9;
+  case AMDGPUAS::CONSTANT_BUFFER_10:
+    return 512 + 4096 * 10;
+  case AMDGPUAS::CONSTANT_BUFFER_11:
+    return 512 + 4096 * 11;
+  case AMDGPUAS::CONSTANT_BUFFER_12:
+    return 512 + 4096 * 12;
+  case AMDGPUAS::CONSTANT_BUFFER_13:
+    return 512 + 4096 * 13;
+  case AMDGPUAS::CONSTANT_BUFFER_14:
+    return 512 + 4096 * 14;
+  case AMDGPUAS::CONSTANT_BUFFER_15:
+    return 512 + 4096 * 15;
+  default:
+    return -1;
+  }
+}
+
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Ptr = Op.getOperand(1);
+  SDValue LoweredLoad;
+
+  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+  if (ConstantBlock > -1) {
+    SDValue Result;
+    if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+        dyn_cast<Constant>(LoadNode->getSrcValue()) ||
+        dyn_cast<ConstantSDNode>(Ptr)) {
+      SDValue Slots[4];
+      for (unsigned i = 0; i < 4; i++) {
+        // We want Const position encoded with the following formula :
+        // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+        // const_index is Ptr computed by llvm using an alignment of 16.
+        // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+        // then div by 4 at the ISel step
+        SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+            DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+        Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+      }
+      Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
+    } else {
+      // non constant ptr cant be folded, keeps it as a v4f32 load
+      Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+          DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
+          DAG.getConstant(LoadNode->getAddressSpace() -
+	                  AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
+          );
+    }
+
+    if (!VT.isVector()) {
+      Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+          DAG.getConstant(0, MVT::i32));
+    }
+
+    SDValue MergedValues[2] = {
+        Result,
+        Chain
+    };
+    return DAG.getMergeValues(MergedValues, 2, DL);
+  }
+
+  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+    return SDValue();
+  }
+
+  // Lowering for indirect addressing
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+                                         getTargetMachine().getFrameLowering());
+  unsigned StackWidth = TFL->getStackWidth(MF);
+
+  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+  if (VT.isVector()) {
+    unsigned NumElemVT = VT.getVectorNumElements();
+    EVT ElemVT = VT.getVectorElementType();
+    SDValue Loads[4];
+
+    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+                                      "vector width in load");
+
+    for (unsigned i = 0; i < NumElemVT; ++i) {
+      unsigned Channel, PtrIncr;
+      getStackAddress(StackWidth, i, Channel, PtrIncr);
+      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+                        DAG.getConstant(PtrIncr, MVT::i32));
+      Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+                             Chain, Ptr,
+                             DAG.getTargetConstant(Channel, MVT::i32),
+                             Op.getOperand(2));
+    }
+    for (unsigned i = NumElemVT; i < 4; ++i) {
+      Loads[i] = DAG.getUNDEF(ElemVT);
+    }
+    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+    LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+  } else {
+    LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+                              Chain, Ptr,
+                              DAG.getTargetConstant(0, MVT::i32), // Channel
+                              Op.getOperand(2));
+  }
+
+  SDValue Ops[2];
+  Ops[0] = LoweredLoad;
+  Ops[1] = Chain;
+
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+/// XXX Only kernel functions are supported, so we can assume for now that
+/// every function is a kernel function, but in the future we should use
+/// separate calling conventions for kernel and non-kernel functions.
+SDValue R600TargetLowering::LowerFormalArguments(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc DL, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  unsigned ParamOffsetBytes = 36;
+  Function::const_arg_iterator FuncArg =
+                            DAG.getMachineFunction().getFunction()->arg_begin();
+  for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
+    EVT VT = Ins[i].VT;
+    Type *ArgType = FuncArg->getType();
+    unsigned ArgSizeInBits = ArgType->isPointerTy() ?
+                             32 : ArgType->getPrimitiveSizeInBits();
+    unsigned ArgBytes = ArgSizeInBits >> 3;
+    EVT ArgVT;
+    if (ArgSizeInBits < VT.getSizeInBits()) {
+      assert(!ArgType->isFloatTy() &&
+             "Extending floating point arguments not supported yet");
+      ArgVT = MVT::getIntegerVT(ArgSizeInBits);
+    } else {
+      ArgVT = VT;
+    }
+    PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
+                                                    AMDGPUAS::PARAM_I_ADDRESS);
+    SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
+                                DAG.getConstant(ParamOffsetBytes, MVT::i32),
+                                       MachinePointerInfo(UndefValue::get(PtrTy)),
+                                       ArgVT, false, false, ArgBytes);
+    InVals.push_back(Arg);
+    ParamOffsetBytes += ArgBytes;
+  }
+  return Chain;
+}
+
+EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
+   if (!VT.isVector()) return MVT::i32;
+   return VT.changeVectorElementTypeToInteger();
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
+                                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  switch (N->getOpcode()) {
+  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
+  case ISD::FP_ROUND: {
+      SDValue Arg = N->getOperand(0);
+      if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
+        return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
+                           Arg.getOperand(0));
+      }
+      break;
+    }
+
+  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+  // (i32 select_cc f32, f32, -1, 0 cc)
+  //
+  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
+  // this to one of the SET*_DX10 instructions.
+  case ISD::FP_TO_SINT: {
+    SDValue FNeg = N->getOperand(0);
+    if (FNeg.getOpcode() != ISD::FNEG) {
+      return SDValue();
+    }
+    SDValue SelectCC = FNeg.getOperand(0);
+    if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+        SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+        SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+        !isHWTrueValue(SelectCC.getOperand(2)) ||
+        !isHWFalseValue(SelectCC.getOperand(3))) {
+      return SDValue();
+    }
+
+    return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+                           SelectCC.getOperand(0), // LHS
+                           SelectCC.getOperand(1), // RHS
+                           DAG.getConstant(-1, MVT::i32), // True
+                           DAG.getConstant(0, MVT::i32),  // Flase
+                           SelectCC.getOperand(4)); // CC
+
+    break;
+  }
+  // Extract_vec (Build_vector) generated by custom lowering
+  // also needs to be customly combined
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue Arg = N->getOperand(0);
+    if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+        unsigned Element = Const->getZExtValue();
+        return Arg->getOperand(Element);
+      }
+    }
+    if (Arg.getOpcode() == ISD::BITCAST &&
+        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+      if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+        unsigned Element = Const->getZExtValue();
+        return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
+            Arg->getOperand(0).getOperand(Element));
+      }
+    }
+  }
+
+  case ISD::SELECT_CC: {
+    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+    //      selectcc x, y, a, b, inv(cc)
+    //
+    // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+    //      selectcc x, y, a, b, cc
+    SDValue LHS = N->getOperand(0);
+    if (LHS.getOpcode() != ISD::SELECT_CC) {
+      return SDValue();
+    }
+
+    SDValue RHS = N->getOperand(1);
+    SDValue True = N->getOperand(2);
+    SDValue False = N->getOperand(3);
+    ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+    if (LHS.getOperand(2).getNode() != True.getNode() ||
+        LHS.getOperand(3).getNode() != False.getNode() ||
+        RHS.getNode() != False.getNode()) {
+      return SDValue();
+    }
+
+    switch (NCC) {
+    default: return SDValue();
+    case ISD::SETNE: return LHS;
+    case ISD::SETEQ: {
+      ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+      LHSCC = ISD::getSetCCInverse(LHSCC,
+                                  LHS.getOperand(0).getValueType().isInteger());
+      return DAG.getSelectCC(N->getDebugLoc(),
+                             LHS.getOperand(0),
+                             LHS.getOperand(1),
+                             LHS.getOperand(2),
+                             LHS.getOperand(3),
+                             LHSCC);
+    }
+    }
+  }
+  case AMDGPUISD::EXPORT: {
+    SDValue Arg = N->getOperand(1);
+    if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+      break;
+    SDValue NewBldVec[4] = {
+        DAG.getUNDEF(MVT::f32),
+        DAG.getUNDEF(MVT::f32),
+        DAG.getUNDEF(MVT::f32),
+        DAG.getUNDEF(MVT::f32)
+      };
+    SDValue NewArgs[8] = {
+      N->getOperand(0), // Chain
+      SDValue(),
+      N->getOperand(2), // ArrayBase
+      N->getOperand(3), // Type
+      N->getOperand(4), // SWZ_X
+      N->getOperand(5), // SWZ_Y
+      N->getOperand(6), // SWZ_Z
+      N->getOperand(7) // SWZ_W
+    };
+    for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
+      if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
+        if (C->isZero()) {
+          NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
+        } else if (C->isExactlyValue(1.0)) {
+          NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
+        } else {
+          NewBldVec[i] = Arg.getOperand(i);
+        }
+      } else {
+        NewBldVec[i] = Arg.getOperand(i);
+      }
+    }
+    DebugLoc DL = N->getDebugLoc();
+    NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+    return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
+  }
+  }
+  return SDValue();
+}
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
new file mode 100644
index 000000000000..2c09acb9af30
--- /dev/null
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -0,0 +1,74 @@
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600ISELLOWERING_H
+#define R600ISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+
+namespace llvm {
+
+class R600InstrInfo;
+
+class R600TargetLowering : public AMDGPUTargetLowering {
+public:
+  R600TargetLowering(TargetMachine &TM);
+  virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+      MachineBasicBlock * BB) const;
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  void ReplaceNodeResults(SDNode * N,
+      SmallVectorImpl<SDValue> &Results,
+      SelectionDAG &DAG) const;
+  virtual SDValue LowerFormalArguments(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc DL, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const;
+  virtual EVT getSetCCResultType(EVT VT) const;
+private:
+  const R600InstrInfo * TII;
+
+  /// Each OpenCL kernel has nine implicit parameters that are stored in the
+  /// first nine dwords of a Vertex Buffer.  These implicit parameters are
+  /// lowered to load instructions which retreive the values from the Vertex
+  /// Buffer.
+  SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
+                                 DebugLoc DL, unsigned DwordOffset) const;
+
+  void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
+      MachineRegisterInfo & MRI, unsigned dword_offset) const;
+
+  /// \brief Lower ROTL opcode to BITALIGN
+  SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
+                                          SelectionDAG &DAG) const;
+  void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
+                       unsigned &Channel, unsigned &PtrIncr) const;
+  bool isZero(SDValue Op) const;
+};
+
+} // End namespace llvm;
+
+#endif // R600ISELLOWERING_H
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
new file mode 100644
index 000000000000..b232188a2641
--- /dev/null
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -0,0 +1,841 @@
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600InstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AMDGPUGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
+  : AMDGPUInstrInfo(tm),
+    RI(tm, *this)
+  { }
+
+const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
+}
+
+bool R600InstrInfo::isVector(const MachineInstr &MI) const {
+  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
+}
+
+void
+R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const {
+  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
+      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+    for (unsigned I = 0; I < 4; I++) {
+      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
+      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+                              RI.getSubReg(DestReg, SubRegIndex),
+                              RI.getSubReg(SrcReg, SubRegIndex))
+                              .addReg(DestReg,
+                                      RegState::Define | RegState::Implicit);
+    }
+  } else {
+
+    // We can't copy vec4 registers
+    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
+           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
+
+    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+                                                  DestReg, SrcReg);
+    NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0))
+                                    .setIsKill(KillSrc);
+  }
+}
+
+MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
+                                             unsigned DstReg, int64_t Imm) const {
+  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
+  MachineInstrBuilder MIB(*MF, MI);
+  MIB.addReg(DstReg, RegState::Define);
+  MIB.addReg(AMDGPU::ALU_LITERAL_X);
+  MIB.addImm(Imm);
+  MIB.addReg(0); // PREDICATE_BIT
+
+  return MI;
+}
+
+unsigned R600InstrInfo::getIEQOpcode() const {
+  return AMDGPU::SETE_INT;
+}
+
+bool R600InstrInfo::isMov(unsigned Opcode) const {
+
+
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::MOV:
+  case AMDGPU::MOV_IMM_F32:
+  case AMDGPU::MOV_IMM_I32:
+    return true;
+  }
+}
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return false;
+  case AMDGPU::RETURN:
+    return true;
+  }
+}
+
+bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
+  switch(Opcode) {
+    default: return false;
+    case AMDGPU::DOT4_r600_pseudo:
+    case AMDGPU::DOT4_eg_pseudo:
+      return true;
+  }
+}
+
+bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
+  switch(Opcode) {
+    default: return false;
+    case AMDGPU::CUBE_r600_pseudo:
+    case AMDGPU::CUBE_r600_real:
+    case AMDGPU::CUBE_eg_pseudo:
+    case AMDGPU::CUBE_eg_real:
+      return true;
+  }
+}
+
+bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
+  return ((TargetFlags & R600_InstFlag::OP1) |
+          (TargetFlags & R600_InstFlag::OP2) |
+          (TargetFlags & R600_InstFlag::OP3));
+}
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+    const {
+  assert (Consts.size() <= 12 && "Too many operands in instructions group");
+  unsigned Pair1 = 0, Pair2 = 0;
+  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+    unsigned ReadConstHalf = Consts[i] & 2;
+    unsigned ReadConstIndex = Consts[i] & (~3);
+    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+    if (!Pair1) {
+      Pair1 = ReadHalfConst;
+      continue;
+    }
+    if (Pair1 == ReadHalfConst)
+      continue;
+    if (!Pair2) {
+      Pair2 = ReadHalfConst;
+      continue;
+    }
+    if (Pair2 != ReadHalfConst)
+      return false;
+  }
+  return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+  std::vector<unsigned> Consts;
+  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+    const MachineInstr *MI = MIs[i];
+
+    const R600Operands::Ops OpTable[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL},
+    };
+
+    if (!isALUInstr(MI->getOpcode()))
+      continue;
+
+    for (unsigned j = 0; j < 3; j++) {
+      int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+      if (SrcIdx < 0)
+        break;
+      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+        unsigned Const = MI->getOperand(
+            getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+        Consts.push_back(Const);
+      }
+    }
+  }
+  return fitsConstReadLimitations(Consts);
+}
+
+DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
+    const ScheduleDAG *DAG) const {
+  const InstrItineraryData *II = TM->getInstrItineraryData();
+  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
+}
+
+static bool
+isPredicateSetter(unsigned Opcode) {
+  switch (Opcode) {
+  case AMDGPU::PRED_X:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static MachineInstr *
+findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I) {
+  while (I != MBB.begin()) {
+    --I;
+    MachineInstr *MI = I;
+    if (isPredicateSetter(MI->getOpcode()))
+      return MI;
+  }
+
+  return NULL;
+}
+
+static
+bool isJump(unsigned Opcode) {
+  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+}
+
+bool
+R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const {
+  // Most of the following comes from the ARM implementation of AnalyzeBranch
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
+    return false;
+  }
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() ||
+          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
+    if (LastOpc == AMDGPU::JUMP) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (LastOpc == AMDGPU::JUMP_COND) {
+      MachineInstr *predSet = I;
+      while (!isPredicateSetter(predSet->getOpcode())) {
+        predSet = --I;
+      }
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(predSet->getOperand(1));
+      Cond.push_back(predSet->getOperand(2));
+      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
+    MachineInstr *predSet = --I;
+    while (!isPredicateSetter(predSet->getOpcode())) {
+      predSet = --I;
+    }
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    FBB = LastInst->getOperand(0).getMBB();
+    Cond.push_back(predSet->getOperand(1));
+    Cond.push_back(predSet->getOperand(2));
+    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
+  const MachineInstr *MI = op.getParent();
+
+  switch (MI->getDesc().OpInfo->RegClass) {
+  default: // FIXME: fallthrough??
+  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
+  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
+  };
+}
+
+unsigned
+R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                            MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond,
+                            DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  if (FBB == 0) {
+    if (Cond.empty()) {
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
+      return 1;
+    } else {
+      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+      assert(PredSet && "No previous predicate !");
+      addFlag(PredSet, 0, MO_FLAG_PUSH);
+      PredSet->getOperand(2).setImm(Cond[1].getImm());
+
+      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+             .addMBB(TBB)
+             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+      return 1;
+    }
+  } else {
+    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
+    assert(PredSet && "No previous predicate !");
+    addFlag(PredSet, 0, MO_FLAG_PUSH);
+    PredSet->getOperand(2).setImm(Cond[1].getImm());
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+            .addMBB(TBB)
+            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+    return 2;
+  }
+}
+
+unsigned
+R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+
+  // Note : we leave PRED* instructions there.
+  // They may be needed when predicating instructions.
+
+  MachineBasicBlock::iterator I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 0;
+  }
+  --I;
+  switch (I->getOpcode()) {
+  default:
+    return 0;
+  case AMDGPU::JUMP_COND: {
+    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+    clearFlag(predSet, 0, MO_FLAG_PUSH);
+    I->eraseFromParent();
+    break;
+  }
+  case AMDGPU::JUMP:
+    I->eraseFromParent();
+    break;
+  }
+  I = MBB.end();
+
+  if (I == MBB.begin()) {
+    return 1;
+  }
+  --I;
+  switch (I->getOpcode()) {
+    // FIXME: only one case??
+  default:
+    return 1;
+  case AMDGPU::JUMP_COND: {
+    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
+    clearFlag(predSet, 0, MO_FLAG_PUSH);
+    I->eraseFromParent();
+    break;
+  }
+  case AMDGPU::JUMP:
+    I->eraseFromParent();
+    break;
+  }
+  return 2;
+}
+
+bool
+R600InstrInfo::isPredicated(const MachineInstr *MI) const {
+  int idx = MI->findFirstPredOperandIdx();
+  if (idx < 0)
+    return false;
+
+  unsigned Reg = MI->getOperand(idx).getReg();
+  switch (Reg) {
+  default: return false;
+  case AMDGPU::PRED_SEL_ONE:
+  case AMDGPU::PRED_SEL_ZERO:
+  case AMDGPU::PREDICATE_BIT:
+    return true;
+  }
+}
+
+bool
+R600InstrInfo::isPredicable(MachineInstr *MI) const {
+  // XXX: KILL* instructions can be predicated, but they must be the last
+  // instruction in a clause, so this means any instructions after them cannot
+  // be predicated.  Until we have proper support for instruction clauses in the
+  // backend, we will mark KILL* instructions as unpredicable.
+
+  if (MI->getOpcode() == AMDGPU::KILLGT) {
+    return false;
+  } else if (isVector(*MI)) {
+    return false;
+  } else {
+    return AMDGPUInstrInfo::isPredicable(MI);
+  }
+}
+
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCyles,
+                                   unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const{
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumTCycles,
+                                   unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumFCycles,
+                                   unsigned ExtraFCycles,
+                                   const BranchProbability &Probability) const {
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCyles,
+                                         const BranchProbability &Probability)
+                                         const {
+  return true;
+}
+
+bool
+R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+  return false;
+}
+
+
+bool
+R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  MachineOperand &MO = Cond[1];
+  switch (MO.getImm()) {
+  case OPCODE_IS_ZERO_INT:
+    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+    break;
+  case OPCODE_IS_NOT_ZERO_INT:
+    MO.setImm(OPCODE_IS_ZERO_INT);
+    break;
+  case OPCODE_IS_ZERO:
+    MO.setImm(OPCODE_IS_NOT_ZERO);
+    break;
+  case OPCODE_IS_NOT_ZERO:
+    MO.setImm(OPCODE_IS_ZERO);
+    break;
+  default:
+    return true;
+  }
+
+  MachineOperand &MO2 = Cond[2];
+  switch (MO2.getReg()) {
+  case AMDGPU::PRED_SEL_ZERO:
+    MO2.setReg(AMDGPU::PRED_SEL_ONE);
+    break;
+  case AMDGPU::PRED_SEL_ONE:
+    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+    break;
+  default:
+    return true;
+  }
+  return false;
+}
+
+bool
+R600InstrInfo::DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const {
+  return isPredicateSetter(MI->getOpcode());
+}
+
+
+bool
+R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                       const SmallVectorImpl<MachineOperand> &Pred2) const {
+  return false;
+}
+
+
+bool
+R600InstrInfo::PredicateInstruction(MachineInstr *MI,
+                      const SmallVectorImpl<MachineOperand> &Pred) const {
+  int PIdx = MI->findFirstPredOperandIdx();
+
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setReg(Pred[2].getReg());
+    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+    return true;
+  }
+
+  return false;
+}
+
+unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                            const MachineInstr *MI,
+                                            unsigned *PredCost) const {
+  if (PredCost)
+    *PredCost = 2;
+  return 2;
+}
+
+int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = 0;
+
+  if (MFI->getNumObjects() == 0) {
+    return -1;
+  }
+
+  if (MRI.livein_empty()) {
+    return 0;
+  }
+
+  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+                                            LE = MRI.livein_end();
+                                            LI != LE; ++LI) {
+    Offset = std::max(Offset,
+                      GET_REG_INDEX(RI.getEncodingValue(LI->first)));
+  }
+
+  return Offset + 1;
+}
+
+int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+  int Offset = 0;
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Variable sized objects are not supported
+  assert(!MFI->hasVarSizedObjects());
+
+  if (MFI->getNumObjects() == 0) {
+    return -1;
+  }
+
+  Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+  return getIndirectIndexBegin(MF) + Offset;
+}
+
+std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+                                             const MachineFunction &MF) const {
+  const AMDGPUFrameLowering *TFL =
+                 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+  std::vector<unsigned> Regs;
+
+  unsigned StackWidth = TFL->getStackWidth(MF);
+  int End = getIndirectIndexEnd(MF);
+
+  if (End == -1) {
+    return Regs;
+  }
+
+  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
+    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
+    Regs.push_back(SuperReg);
+    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
+      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+      Regs.push_back(Reg);
+    }
+  }
+  return Regs;
+}
+
+unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
+                                                 unsigned Channel) const {
+  // XXX: Remove when we support a stack width > 2
+  assert(Channel == 0);
+  return RegIndex;
+}
+
+const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
+                                                     unsigned SourceReg) const {
+  return &AMDGPU::R600_TReg32RegClass;
+}
+
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
+  return &AMDGPU::TRegMemRegClass;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg) const {
+  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+                                               AMDGPU::AR_X, OffsetReg);
+  setImmOperand(MOVA, R600Operands::WRITE, 0);
+
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+                                      AddrReg, ValueReg)
+                                      .addReg(AMDGPU::AR_X, RegState::Implicit);
+  setImmOperand(Mov, R600Operands::DST_REL, 1);
+  return Mov;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned ValueReg, unsigned Address,
+                                       unsigned OffsetReg) const {
+  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+                                                       AMDGPU::AR_X,
+                                                       OffsetReg);
+  setImmOperand(MOVA, R600Operands::WRITE, 0);
+  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+                                      ValueReg,
+                                      AddrReg)
+                                      .addReg(AMDGPU::AR_X, RegState::Implicit);
+  setImmOperand(Mov, R600Operands::SRC0_REL, 1);
+
+  return Mov;
+}
+
+const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
+  return &AMDGPU::IndirectRegRegClass;
+}
+
+unsigned R600InstrInfo::getMaxAlusPerClause() const {
+  return 115;
+}
+
+MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
+                                                  MachineBasicBlock::iterator I,
+                                                  unsigned Opcode,
+                                                  unsigned DstReg,
+                                                  unsigned Src0Reg,
+                                                  unsigned Src1Reg) const {
+  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
+    DstReg);           // $dst
+
+  if (Src1Reg) {
+    MIB.addImm(0)     // $update_exec_mask
+       .addImm(0);    // $update_predicate
+  }
+  MIB.addImm(1)        // $write
+     .addImm(0)        // $omod
+     .addImm(0)        // $dst_rel
+     .addImm(0)        // $dst_clamp
+     .addReg(Src0Reg)  // $src0
+     .addImm(0)        // $src0_neg
+     .addImm(0)        // $src0_rel
+     .addImm(0)        // $src0_abs
+     .addImm(-1);       // $src0_sel
+
+  if (Src1Reg) {
+    MIB.addReg(Src1Reg) // $src1
+       .addImm(0)       // $src1_neg
+       .addImm(0)       // $src1_rel
+       .addImm(0)       // $src1_abs
+       .addImm(-1);      // $src1_sel
+  }
+
+  //XXX: The r600g finalizer expects this to be 1, once we've moved the
+  //scheduling to the backend, we can change the default to 0.
+  MIB.addImm(1)        // $last
+      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+      .addImm(0);        // $literal
+
+  return MIB;
+}
+
+MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned DstReg,
+                                         uint64_t Imm) const {
+  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
+                                                  AMDGPU::ALU_LITERAL_X);
+  setImmOperand(MovImm, R600Operands::IMM, Imm);
+  return MovImm;
+}
+
+int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
+                                 R600Operands::Ops Op) const {
+  return getOperandIdx(MI.getOpcode(), Op);
+}
+
+int R600InstrInfo::getOperandIdx(unsigned Opcode,
+                                 R600Operands::Ops Op) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+  unsigned OpTableIdx;
+
+  if (!HAS_NATIVE_OPERANDS(TargetFlags)) {
+    switch (Op) {
+    case R600Operands::DST: return 0;
+    case R600Operands::SRC0: return 1;
+    case R600Operands::SRC1: return 2;
+    case R600Operands::SRC2: return 3;
+    default:
+      assert(!"Unknown operand type for instruction");
+      return -1;
+    }
+  }
+
+  if (TargetFlags & R600_InstFlag::OP1) {
+    OpTableIdx = 0;
+  } else if (TargetFlags & R600_InstFlag::OP2) {
+    OpTableIdx = 1;
+  } else {
+    assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined "
+                                                 "for this instruction");
+    OpTableIdx = 2;
+  }
+
+  return R600Operands::ALUOpTable[OpTableIdx][Op];
+}
+
+void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
+                                  int64_t Imm) const {
+  int Idx = getOperandIdx(*MI, Op);
+  assert(Idx != -1 && "Operand not supported for this instruction.");
+  assert(MI->getOperand(Idx).isImm());
+  MI->getOperand(Idx).setImm(Imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction flag getters/setters
+//===----------------------------------------------------------------------===//
+
+bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
+  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
+}
+
+MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
+                                         unsigned Flag) const {
+  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+  int FlagIndex = 0;
+  if (Flag != 0) {
+    // If we pass something other than the default value of Flag to this
+    // function, it means we are want to set a flag on an instruction
+    // that uses native encoding.
+    assert(HAS_NATIVE_OPERANDS(TargetFlags));
+    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
+    switch (Flag) {
+    case MO_FLAG_CLAMP:
+      FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP);
+      break;
+    case MO_FLAG_MASK:
+      FlagIndex = getOperandIdx(*MI, R600Operands::WRITE);
+      break;
+    case MO_FLAG_NOT_LAST:
+    case MO_FLAG_LAST:
+      FlagIndex = getOperandIdx(*MI, R600Operands::LAST);
+      break;
+    case MO_FLAG_NEG:
+      switch (SrcIdx) {
+      case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break;
+      case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break;
+      case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break;
+      }
+      break;
+
+    case MO_FLAG_ABS:
+      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
+                       "instructions.");
+      (void)IsOP3;
+      switch (SrcIdx) {
+      case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break;
+      case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break;
+      }
+      break;
+
+    default:
+      FlagIndex = -1;
+      break;
+    }
+    assert(FlagIndex != -1 && "Flag not supported for this instruction");
+  } else {
+      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
+      assert(FlagIndex != 0 &&
+         "Instruction flags not supported for this instruction");
+  }
+
+  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
+  assert(FlagOp.isImm());
+  return FlagOp;
+}
+
+void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
+                            unsigned Flag) const {
+  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+  if (Flag == 0) {
+    return;
+  }
+  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+    if (Flag == MO_FLAG_NOT_LAST) {
+      clearFlag(MI, Operand, MO_FLAG_LAST);
+    } else if (Flag == MO_FLAG_MASK) {
+      clearFlag(MI, Operand, Flag);
+    } else {
+      FlagOp.setImm(1);
+    }
+  } else {
+      MachineOperand &FlagOp = getFlagOp(MI, Operand);
+      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
+  }
+}
+
+void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
+                              unsigned Flag) const {
+  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
+  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
+    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
+    FlagOp.setImm(0);
+  } else {
+    MachineOperand &FlagOp = getFlagOp(MI);
+    unsigned InstFlags = FlagOp.getImm();
+    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
+    FlagOp.setImm(InstFlags);
+  }
+}
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
new file mode 100644
index 000000000000..dbae90013d22
--- /dev/null
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -0,0 +1,204 @@
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600InstrInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600INSTRUCTIONINFO_H_
+#define R600INSTRUCTIONINFO_H_
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDIL.h"
+#include "R600Defines.h"
+#include "R600RegisterInfo.h"
+#include <map>
+
+namespace llvm {
+
+  class AMDGPUTargetMachine;
+  class DFAPacketizer;
+  class ScheduleDAG;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineInstrBuilder;
+
+  class R600InstrInfo : public AMDGPUInstrInfo {
+  private:
+  const R600RegisterInfo RI;
+
+  int getBranchInstr(const MachineOperand &op) const;
+
+  public:
+  explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+
+  const R600RegisterInfo &getRegisterInfo() const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  bool isTrig(const MachineInstr &MI) const;
+  bool isPlaceHolderOpcode(unsigned opcode) const;
+  bool isReductionOp(unsigned opcode) const;
+  bool isCubeOp(unsigned opcode) const;
+
+  /// \returns true if this \p Opcode represents an ALU instruction.
+  bool isALUInstr(unsigned Opcode) const;
+
+  bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+  bool canBundle(const std::vector<MachineInstr *> &) const;
+
+  /// \breif Vector instructions are instructions that must fill all
+  /// instruction slots within an instruction group.
+  bool isVector(const MachineInstr &MI) const;
+
+  virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+                                        int64_t Imm) const;
+
+  virtual unsigned getIEQOpcode() const;
+  virtual bool isMov(unsigned Opcode) const;
+
+  DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
+                                           const ScheduleDAG *DAG) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  bool isPredicated(const MachineInstr *MI) const;
+
+  bool isPredicable(MachineInstr *MI) const;
+
+  bool
+   isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                             const BranchProbability &Probability) const;
+
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                           unsigned ExtraPredCycles,
+                           const BranchProbability &Probability) const ;
+
+  bool
+   isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                       unsigned NumTCycles, unsigned ExtraTCycles,
+                       MachineBasicBlock &FMBB,
+                       unsigned NumFCycles, unsigned ExtraFCycles,
+                       const BranchProbability &Probability) const;
+
+  bool DefinesPredicate(MachineInstr *MI,
+                                  std::vector<MachineOperand> &Pred) const;
+
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                          MachineBasicBlock &FMBB) const;
+
+  bool PredicateInstruction(MachineInstr *MI,
+                        const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  unsigned int getInstrLatency(const InstrItineraryData *ItinData,
+                               const MachineInstr *MI,
+                               unsigned *PredCost = 0) const;
+
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const { return 1;}
+
+  /// \returns a list of all the registers that may be accesed using indirect
+  /// addressing.
+  std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
+
+  virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+  virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+                                                      unsigned SourceReg) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned ValueReg, unsigned Address,
+                                  unsigned OffsetReg) const;
+
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned ValueReg, unsigned Address,
+                                  unsigned OffsetReg) const;
+
+  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+
+  unsigned getMaxAlusPerClause() const;
+
+  ///buildDefaultInstruction - This function returns a MachineInstr with
+  /// all the instruction modifiers initialized to their default values.
+  /// You can use this function to avoid manually specifying each instruction
+  /// modifier operand when building a new instruction.
+  ///
+  /// \returns a MachineInstr with all the instruction modifiers initialized
+  /// to their default values.
+  MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB,
+                                              MachineBasicBlock::iterator I,
+                                              unsigned Opcode,
+                                              unsigned DstReg,
+                                              unsigned Src0Reg,
+                                              unsigned Src1Reg = 0) const;
+
+  MachineInstr *buildMovImm(MachineBasicBlock &BB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned DstReg,
+                                  uint64_t Imm) const;
+
+  /// \brief Get the index of Op in the MachineInstr.
+  ///
+  /// \returns -1 if the Instruction does not contain the specified \p Op.
+  int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
+
+  /// \brief Get the index of \p Op for the given Opcode.
+  ///
+  /// \returns -1 if the Instruction does not contain the specified \p Op.
+  int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
+
+  /// \brief Helper function for setting instruction flag values.
+  void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
+
+  /// \returns true if this instruction has an operand for storing target flags.
+  bool hasFlagOperand(const MachineInstr &MI) const;
+
+  ///\brief Add one of the MO_FLAG* flags to the specified \p Operand.
+  void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+
+  ///\brief Determine if the specified \p Flag is set on this \p Operand.
+  bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
+
+  /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2)
+  /// \param Flag The flag being set.
+  ///
+  /// \returns the operand containing the flags for this instruction.
+  MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0,
+                            unsigned Flag = 0) const;
+
+  /// \brief Clear the specified flag on the instruction.
+  void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
+};
+
+} // End llvm namespace
+
+#endif // R600INSTRINFO_H_
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
new file mode 100644
index 000000000000..663b41a66d6f
--- /dev/null
+++ b/lib/Target/R600/R600Instructions.td
@@ -0,0 +1,2267 @@
+//===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Tablegen instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+include "R600Intrinsics.td"
+
+class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+                InstrItinClass itin>
+    : AMDGPUInst <outs, ins, asm, pattern> {
+
+  field bits<64> Inst;
+  bit Trig = 0;
+  bit Op3 = 0;
+  bit isVector = 0;
+  bits<2> FlagOperandIdx = 0;
+  bit Op1 = 0;
+  bit Op2 = 0;
+  bit HasNativeOperands = 0;
+
+  bits<11> op_code = inst;
+  //let Inst = inst;
+  let Namespace = "AMDGPU";
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString = asm;
+  let Pattern = pattern;
+  let Itinerary = itin;
+
+  let TSFlags{4} = Trig;
+  let TSFlags{5} = Op3;
+
+  // Vector instructions are instructions that must fill all slots in an
+  // instruction group
+  let TSFlags{6} = isVector;
+  let TSFlags{8-7} = FlagOperandIdx;
+  let TSFlags{9} = HasNativeOperands;
+  let TSFlags{10} = Op1;
+  let TSFlags{11} = Op2;
+}
+
+class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
+    AMDGPUInst <outs, ins, asm, pattern> {
+  field bits<64> Inst;
+
+  let Namespace = "AMDGPU";
+}
+
+def MEMxi : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
+  let PrintMethod = "printMemOperand";
+}
+
+def MEMrr : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
+}
+
+// Operands for non-registers
+
+class InstFlag<string PM = "printOperand", int Default = 0>
+    : OperandWithDefaultOps <i32, (ops (i32 Default))> {
+  let PrintMethod = PM;
+}
+
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+  let PrintMethod = "printSel";
+}
+
+def LITERAL : InstFlag<"printLiteral">;
+
+def WRITE : InstFlag <"printWrite", 1>;
+def OMOD : InstFlag <"printOMOD">;
+def REL : InstFlag <"printRel">;
+def CLAMP : InstFlag <"printClamp">;
+def NEG : InstFlag <"printNeg">;
+def ABS : InstFlag <"printAbs">;
+def UEM : InstFlag <"printUpdateExecMask">;
+def UP : InstFlag <"printUpdatePred">;
+
+// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
+// Once we start using the packetizer in this backend we should have this
+// default to 0.
+def LAST : InstFlag<"printLast", 1>;
+
+def FRAMEri : Operand<iPTR> {
+  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
+def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
+def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
+def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+
+class R600ALU_Word0 {
+  field bits<32> Word0;
+
+  bits<11> src0;
+  bits<1>  src0_neg;
+  bits<1>  src0_rel;
+  bits<11> src1;
+  bits<1>  src1_rel;
+  bits<1>  src1_neg;
+  bits<3>  index_mode = 0;
+  bits<2>  pred_sel;
+  bits<1>  last;
+
+  bits<9>  src0_sel  = src0{8-0};
+  bits<2>  src0_chan = src0{10-9};
+  bits<9>  src1_sel  = src1{8-0};
+  bits<2>  src1_chan = src1{10-9};
+
+  let Word0{8-0}   = src0_sel;
+  let Word0{9}     = src0_rel;
+  let Word0{11-10} = src0_chan;
+  let Word0{12}    = src0_neg;
+  let Word0{21-13} = src1_sel;
+  let Word0{22}    = src1_rel;
+  let Word0{24-23} = src1_chan;
+  let Word0{25}    = src1_neg;
+  let Word0{28-26} = index_mode;
+  let Word0{30-29} = pred_sel;
+  let Word0{31}    = last;
+}
+
+class R600ALU_Word1 {
+  field bits<32> Word1;
+
+  bits<11> dst;
+  bits<3>  bank_swizzle = 0;
+  bits<1>  dst_rel;
+  bits<1>  clamp;
+
+  bits<7>  dst_sel  = dst{6-0};
+  bits<2>  dst_chan = dst{10-9};
+
+  let Word1{20-18} = bank_swizzle;
+  let Word1{27-21} = dst_sel;
+  let Word1{28}    = dst_rel;
+  let Word1{30-29} = dst_chan;
+  let Word1{31}    = clamp;
+}
+
+class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{
+
+  bits<1>  src0_abs;
+  bits<1>  src1_abs;
+  bits<1>  update_exec_mask;
+  bits<1>  update_pred;
+  bits<1>  write;
+  bits<2>  omod;
+
+  let Word1{0}     = src0_abs;
+  let Word1{1}     = src1_abs;
+  let Word1{2}     = update_exec_mask;
+  let Word1{3}     = update_pred;
+  let Word1{4}     = write;
+  let Word1{6-5}   = omod;
+  let Word1{17-7}  = alu_inst;
+}
+
+class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
+
+  bits<11> src2;
+  bits<1>  src2_rel;
+  bits<1>  src2_neg;
+
+  bits<9>  src2_sel = src2{8-0};
+  bits<2>  src2_chan = src2{10-9};
+
+  let Word1{8-0}   = src2_sel;
+  let Word1{9}     = src2_rel;
+  let Word1{11-10} = src2_chan;
+  let Word1{12}    = src2_neg;
+  let Word1{17-13} = alu_inst;
+}
+
+class VTX_WORD0 {
+  field bits<32> Word0;
+  bits<7> SRC_GPR;
+  bits<5> VC_INST;
+  bits<2> FETCH_TYPE;
+  bits<1> FETCH_WHOLE_QUAD;
+  bits<8> BUFFER_ID;
+  bits<1> SRC_REL;
+  bits<2> SRC_SEL_X;
+  bits<6> MEGA_FETCH_COUNT;
+
+  let Word0{4-0}   = VC_INST;
+  let Word0{6-5}   = FETCH_TYPE;
+  let Word0{7}     = FETCH_WHOLE_QUAD;
+  let Word0{15-8}  = BUFFER_ID;
+  let Word0{22-16} = SRC_GPR;
+  let Word0{23}    = SRC_REL;
+  let Word0{25-24} = SRC_SEL_X;
+  let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD1_GPR {
+  field bits<32> Word1;
+  bits<7> DST_GPR;
+  bits<1> DST_REL;
+  bits<3> DST_SEL_X;
+  bits<3> DST_SEL_Y;
+  bits<3> DST_SEL_Z;
+  bits<3> DST_SEL_W;
+  bits<1> USE_CONST_FIELDS;
+  bits<6> DATA_FORMAT;
+  bits<2> NUM_FORMAT_ALL;
+  bits<1> FORMAT_COMP_ALL;
+  bits<1> SRF_MODE_ALL;
+
+  let Word1{6-0} = DST_GPR;
+  let Word1{7}    = DST_REL;
+  let Word1{8}    = 0; // Reserved
+  let Word1{11-9} = DST_SEL_X;
+  let Word1{14-12} = DST_SEL_Y;
+  let Word1{17-15} = DST_SEL_Z;
+  let Word1{20-18} = DST_SEL_W;
+  let Word1{21}    = USE_CONST_FIELDS;
+  let Word1{27-22} = DATA_FORMAT;
+  let Word1{29-28} = NUM_FORMAT_ALL;
+  let Word1{30}    = FORMAT_COMP_ALL;
+  let Word1{31}    = SRF_MODE_ALL;
+}
+
+class TEX_WORD0 {
+  field bits<32> Word0;
+
+  bits<5> TEX_INST;
+  bits<2> INST_MOD;
+  bits<1> FETCH_WHOLE_QUAD;
+  bits<8> RESOURCE_ID;
+  bits<7> SRC_GPR;
+  bits<1> SRC_REL;
+  bits<1> ALT_CONST;
+  bits<2> RESOURCE_INDEX_MODE;
+  bits<2> SAMPLER_INDEX_MODE;
+
+  let Word0{4-0} = TEX_INST;
+  let Word0{6-5} = INST_MOD;
+  let Word0{7} = FETCH_WHOLE_QUAD;
+  let Word0{15-8} = RESOURCE_ID;
+  let Word0{22-16} = SRC_GPR;
+  let Word0{23} = SRC_REL;
+  let Word0{24} = ALT_CONST;
+  let Word0{26-25} = RESOURCE_INDEX_MODE;
+  let Word0{28-27} = SAMPLER_INDEX_MODE;
+}
+
+class TEX_WORD1 {
+  field bits<32> Word1;
+
+  bits<7> DST_GPR;
+  bits<1> DST_REL;
+  bits<3> DST_SEL_X;
+  bits<3> DST_SEL_Y;
+  bits<3> DST_SEL_Z;
+  bits<3> DST_SEL_W;
+  bits<7> LOD_BIAS;
+  bits<1> COORD_TYPE_X;
+  bits<1> COORD_TYPE_Y;
+  bits<1> COORD_TYPE_Z;
+  bits<1> COORD_TYPE_W;
+
+  let Word1{6-0} = DST_GPR;
+  let Word1{7} = DST_REL;
+  let Word1{11-9} = DST_SEL_X;
+  let Word1{14-12} = DST_SEL_Y;
+  let Word1{17-15} = DST_SEL_Z;
+  let Word1{20-18} = DST_SEL_W;
+  let Word1{27-21} = LOD_BIAS;
+  let Word1{28} = COORD_TYPE_X;
+  let Word1{29} = COORD_TYPE_Y;
+  let Word1{30} = COORD_TYPE_Z;
+  let Word1{31} = COORD_TYPE_W;
+}
+
+class TEX_WORD2 {
+  field bits<32> Word2;
+
+  bits<5> OFFSET_X;
+  bits<5> OFFSET_Y;
+  bits<5> OFFSET_Z;
+  bits<5> SAMPLER_ID;
+  bits<3> SRC_SEL_X;
+  bits<3> SRC_SEL_Y;
+  bits<3> SRC_SEL_Z;
+  bits<3> SRC_SEL_W;
+
+  let Word2{4-0} = OFFSET_X;
+  let Word2{9-5} = OFFSET_Y;
+  let Word2{14-10} = OFFSET_Z;
+  let Word2{19-15} = SAMPLER_ID;
+  let Word2{22-20} = SRC_SEL_X;
+  let Word2{25-23} = SRC_SEL_Y;
+  let Word2{28-26} = SRC_SEL_Z;
+  let Word2{31-29} = SRC_SEL_W;
+}
+
+/*
+XXX: R600 subtarget uses a slightly different encoding than the other
+subtargets.  We currently handle this in R600MCCodeEmitter, but we may
+want to use these instruction classes in the future.
+
+class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 {
+
+  bits<1>  fog_merge;
+  bits<10> alu_inst;
+
+  let Inst{37}    = fog_merge;
+  let Inst{39-38} = omod;
+  let Inst{49-40} = alu_inst;
+}
+
+class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
+
+  bits<11> alu_inst;
+
+  let Inst{38-37} = omod;
+  let Inst{49-39} = alu_inst;
+}
+*/
+
+def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
+                                     (ops PRED_SEL_OFF)>;
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+
+// Class for instructions with only one source register.
+// If you add new ins to this instruction, make sure they are listed before
+// $literal, because the backend currently assumes that the last operand is
+// a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
+// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
+// and R600InstrInfo::getOperandIdx().
+class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+    InstR600 <0,
+              (outs R600_Reg32:$dst),
+              (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+              !strconcat("  ", opName,
+                   "$clamp $dst$write$dst_rel$omod, "
+                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+                   "$literal $pred_sel$last"),
+              pattern,
+              itin>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP2 <inst> {
+
+  let src1 = 0;
+  let src1_rel = 0;
+  let src1_neg = 0;
+  let src1_abs = 0;
+  let update_exec_mask = 0;
+  let update_pred = 0;
+  let HasNativeOperands = 1;
+  let Op1 = 1;
+  let DisableEncoding = "$literal";
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+                    InstrItinClass itin = AnyALU> :
+    R600_1OP <inst, opName,
+              [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
+>;
+
+// If you add our change the operands for R600_2OP instructions, you must
+// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
+class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+  InstR600 <inst,
+          (outs R600_Reg32:$dst),
+          (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
+               OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+          !strconcat("  ", opName,
+                "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+                "$literal $pred_sel$last"),
+          pattern,
+          itin>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP2 <inst> {
+
+  let HasNativeOperands = 1;
+  let Op2 = 1;
+  let DisableEncoding = "$literal";
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
+                       InstrItinClass itim = AnyALU> :
+    R600_2OP <inst, opName,
+              [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
+                                           R600_Reg32:$src1))]
+>;
+
+// If you add our change the operands for R600_3OP instructions, you must
+// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
+// R600InstrInfo::buildDefaultInstruction(), and
+// R600InstrInfo::getOperandIdx().
+class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+  InstR600 <0,
+          (outs R600_Reg32:$dst),
+          (ins REL:$dst_rel, CLAMP:$clamp,
+               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+          !strconcat("  ", opName, "$clamp $dst$dst_rel, "
+                             "$src0_neg$src0$src0_rel, "
+                             "$src1_neg$src1$src1_rel, "
+                             "$src2_neg$src2$src2_rel, "
+                             "$literal $pred_sel$last"),
+          pattern,
+          itin>,
+    R600ALU_Word0,
+    R600ALU_Word1_OP3<inst>{
+
+  let HasNativeOperands = 1;
+  let DisableEncoding = "$literal";
+  let Op3 = 1;
+
+  let Inst{31-0}  = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
+                      InstrItinClass itin = VecALU> :
+  InstR600 <inst,
+          (outs R600_Reg32:$dst),
+          ins,
+          asm,
+          pattern,
+          itin>;
+
+class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
+                InstrItinClass itin = AnyALU> :
+  InstR600 <inst,
+          (outs R600_Reg128:$DST_GPR),
+          (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
+          !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
+          pattern,
+          itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
+    let Inst{31-0} = Word0;
+    let Inst{63-32} = Word1;
+
+    let TEX_INST = inst{4-0};
+    let SRC_REL = 0;
+    let DST_REL = 0;
+    let DST_SEL_X = 0;
+    let DST_SEL_Y = 1;
+    let DST_SEL_Z = 2;
+    let DST_SEL_W = 3;
+    let LOD_BIAS = 0;
+
+    let INST_MOD = 0;
+    let FETCH_WHOLE_QUAD = 0;
+    let ALT_CONST = 0;
+    let SAMPLER_INDEX_MODE = 0;
+
+    let COORD_TYPE_X = 0;
+    let COORD_TYPE_Y = 0;
+    let COORD_TYPE_Z = 0;
+    let COORD_TYPE_W = 0;
+  }
+
+} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
+
+def TEX_SHADOW : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13);
+  }]
+>;
+
+def TEX_RECT : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 5;
+  }]
+>;
+
+def TEX_ARRAY : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+  }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+  (imm),
+  [{uint32_t TType = (uint32_t)N->getZExtValue();
+    return TType == 11 || TType == 12 || TType == 17;
+  }]
+>;
+
+class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
+                 dag ins, string asm, list<dag> pattern> :
+    InstR600ISA <outs, ins, asm, pattern> {
+  bits<7>  RW_GPR;
+  bits<7>  INDEX_GPR;
+
+  bits<2>  RIM;
+  bits<2>  TYPE;
+  bits<1>  RW_REL;
+  bits<2>  ELEM_SIZE;
+
+  bits<12> ARRAY_SIZE;
+  bits<4>  COMP_MASK;
+  bits<4>  BURST_COUNT;
+  bits<1>  VPM;
+  bits<1>  eop;
+  bits<1>  MARK;
+  bits<1>  BARRIER;
+
+  // CF_ALLOC_EXPORT_WORD0_RAT
+  let Inst{3-0}   = rat_id;
+  let Inst{9-4}   = rat_inst;
+  let Inst{10}    = 0; // Reserved
+  let Inst{12-11} = RIM;
+  let Inst{14-13} = TYPE;
+  let Inst{21-15} = RW_GPR;
+  let Inst{22}    = RW_REL;
+  let Inst{29-23} = INDEX_GPR;
+  let Inst{31-30} = ELEM_SIZE;
+
+  // CF_ALLOC_EXPORT_WORD1_BUF
+  let Inst{43-32} = ARRAY_SIZE;
+  let Inst{47-44} = COMP_MASK;
+  let Inst{51-48} = BURST_COUNT;
+  let Inst{52}    = VPM;
+  let Inst{53}    = eop;
+  let Inst{61-54} = cf_inst;
+  let Inst{62}    = MARK;
+  let Inst{63}    = BARRIER;
+}
+
+class LoadParamFrag <PatFrag load_type> : PatFrag <
+  (ops node:$ptr), (load_type node:$ptr),
+  [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
+>;
+
+def load_param : LoadParamFrag<load>;
+def load_param_zexti8 : LoadParamFrag<zextloadi8>;
+def load_param_zexti16 : LoadParamFrag<zextloadi16>;
+
+def isR600 : Predicate<"Subtarget.device()"
+                            "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">;
+def isR700 : Predicate<"Subtarget.device()"
+                            "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
+                            "Subtarget.device()->getDeviceFlag()"
+                            ">= OCL_DEVICE_RV710">;
+def isEG : Predicate<
+  "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && "
+  "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && "
+  "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">;
+
+def isCayman : Predicate<"Subtarget.device()"
+                            "->getDeviceFlag() == OCL_DEVICE_CAYMAN">;
+def isEGorCayman : Predicate<"Subtarget.device()"
+                            "->getGeneration() == AMDGPUDeviceInfo::HD5XXX"
+                            "|| Subtarget.device()->getGeneration() =="
+                            "AMDGPUDeviceInfo::HD6XXX">;
+
+def isR600toCayman : Predicate<
+                     "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
+
+//===----------------------------------------------------------------------===//
+// R600 SDNodes
+//===----------------------------------------------------------------------===//
+
+def INTERP_PAIR_XY :  AMDGPUShaderInst <
+  (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+  (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+  "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+  []>;
+
+def INTERP_PAIR_ZW :  AMDGPUShaderInst <
+  (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+  (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+  "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+  []>;
+
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+  SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+  [SDNPVariadic]
+>;
+
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
+def INTERP_VEC_LOAD :  AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins i32imm:$src0),
+  "INTERP_LOAD $src0 : $dst",
+  []>;
+
+def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
+  let bank_swizzle = 5;
+}
+
+def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
+  let bank_swizzle = 5;
+}
+
+def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
+
+//===----------------------------------------------------------------------===//
+// Export Instructions
+//===----------------------------------------------------------------------===//
+
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
+  [SDNPHasChain, SDNPSideEffect]>;
+
+class ExportWord0 {
+  field bits<32> Word0;
+
+  bits<13> arraybase;
+  bits<2> type;
+  bits<7> gpr;
+  bits<2> elem_size;
+
+  let Word0{12-0} = arraybase;
+  let Word0{14-13} = type;
+  let Word0{21-15} = gpr;
+  let Word0{22} = 0; // RW_REL
+  let Word0{29-23} = 0; // INDEX_GPR
+  let Word0{31-30} = elem_size;
+}
+
+class ExportSwzWord1 {
+  field bits<32> Word1;
+
+  bits<3> sw_x;
+  bits<3> sw_y;
+  bits<3> sw_z;
+  bits<3> sw_w;
+  bits<1> eop;
+  bits<8> inst;
+
+  let Word1{2-0} = sw_x;
+  let Word1{5-3} = sw_y;
+  let Word1{8-6} = sw_z;
+  let Word1{11-9} = sw_w;
+}
+
+class ExportBufWord1 {
+  field bits<32> Word1;
+
+  bits<12> arraySize;
+  bits<4> compMask;
+  bits<1> eop;
+  bits<8> inst;
+
+  let Word1{11-0} = arraySize;
+  let Word1{15-12} = compMask;
+}
+
+multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
+  def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
+    (ExportInst
+        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+        0, 61, 0, 7, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
+    (ExportInst
+        (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
+        0, 61, 7, 0, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(int_R600_store_dummy (i32 imm:$type)),
+    (ExportInst
+        (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(int_R600_store_dummy 1),
+    (ExportInst
+        (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
+  >;
+
+  def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+    (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+        (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+        imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
+  >;
+
+}
+
+multiclass SteamOutputExportPattern<Instruction ExportInst,
+    bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
+// Stream0
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+      4095, imm:$mask, buf0inst, 0)>;
+// Stream1
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+      4095, imm:$mask, buf1inst, 0)>;
+// Stream2
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+      4095, imm:$mask, buf2inst, 0)>;
+// Stream3
+  def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+      (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
+      4095, imm:$mask, buf3inst, 0)>;
+}
+
+let usesCustomInserter = 1 in {
+
+class ExportSwzInst : InstR600ISA<(
+    outs),
+    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+    i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst,
+    i32imm:$eop),
+    !strconcat("EXPORT", " $gpr"),
+    []>, ExportWord0, ExportSwzWord1 {
+  let elem_size = 3;
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+} // End usesCustomInserter = 1
+
+class ExportBufInst : InstR600ISA<(
+    outs),
+    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
+    i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
+    !strconcat("EXPORT", " $gpr"),
+    []>, ExportWord0, ExportBufWord1 {
+  let elem_size = 0;
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions
+//===----------------------------------------------------------------------===//
+
+class CF_ALU_WORD0 {
+  field bits<32> Word0;
+
+  bits<22> ADDR;
+  bits<4> KCACHE_BANK0;
+  bits<4> KCACHE_BANK1;
+  bits<2> KCACHE_MODE0;
+
+  let Word0{21-0} = ADDR;
+  let Word0{25-22} = KCACHE_BANK0;
+  let Word0{29-26} = KCACHE_BANK1;
+  let Word0{31-30} = KCACHE_MODE0;
+}
+
+class CF_ALU_WORD1 {
+  field bits<32> Word1;
+
+  bits<2> KCACHE_MODE1;
+  bits<8> KCACHE_ADDR0;
+  bits<8> KCACHE_ADDR1;
+  bits<7> COUNT;
+  bits<1> ALT_CONST;
+  bits<4> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
+  bits<1> BARRIER;
+
+  let Word1{1-0} = KCACHE_MODE1;
+  let Word1{9-2} = KCACHE_ADDR0;
+  let Word1{17-10} = KCACHE_ADDR1;
+  let Word1{24-18} = COUNT;
+  let Word1{25} = ALT_CONST;
+  let Word1{29-26} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
+  let Word1{31} = BARRIER;
+}
+
+class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+!strconcat(OpName, " $COUNT, @$ADDR, "
+"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
+", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let ALT_CONST = 0;
+  let WHOLE_QUAD_MODE = 0;
+  let BARRIER = 1;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+class CF_WORD0 {
+  field bits<32> Word0;
+
+  bits<24> ADDR;
+  bits<3> JUMPTABLE_SEL;
+
+  let Word0{23-0} = ADDR;
+  let Word0{26-24} = JUMPTABLE_SEL;
+}
+
+class CF_WORD1 {
+  field bits<32> Word1;
+
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<6> COUNT;
+  bits<1> VALID_PIXEL_MODE;
+  bits<8> CF_INST;
+  bits<1> BARRIER;
+
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{15-10} = COUNT;
+  let Word1{20} = VALID_PIXEL_MODE;
+  let Word1{29-22} = CF_INST;
+  let Word1{31} = BARRIER;
+}
+
+class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+  field bits<64> Inst;
+
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let JUMPTABLE_SEL = 0;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+}
+
+def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+"TEX $COUNT @$ADDR"> {
+  let POP_COUNT = 0;
+}
+
+def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+"VTX $COUNT @$ADDR"> {
+  let POP_COUNT = 0;
+}
+
+def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+
+def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+
+def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+
+def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
+  let POP_COUNT = 0;
+  let COUNT = 0;
+}
+
+def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+
+def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+
+def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
+  let ADDR = 0;
+  let COUNT = 0;
+  let POP_COUNT = 0;
+}
+
+def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
+  let COUNT = 0;
+}
+
+def CF_ALU : ALU_CLAUSE<8, "ALU">;
+def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+
+def STACK_SIZE : AMDGPUInst <(outs),
+(ins i32imm:$num), "nstack $num", [] > {
+  field bits<8> Inst;
+  bits<8> num;
+  let Inst = num;
+}
+
+let Predicates = [isR600toCayman] in {
+
+//===----------------------------------------------------------------------===//
+// Common Instructions R600, R700, Evergreen, Cayman
+//===----------------------------------------------------------------------===//
+
+def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
+// Non-IEEE MUL: 0 * anything = 0
+def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
+def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+
+// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
+// so some of the instruction names don't match the asm string.
+// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
+def SETE : R600_2OP <
+  0x08, "SETE",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+             COND_EQ))]
+>;
+
+def SGT : R600_2OP <
+  0x09, "SETGT",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+              COND_GT))]
+>;
+
+def SGE : R600_2OP <
+  0xA, "SETGE",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+              COND_GE))]
+>;
+
+def SNE : R600_2OP <
+  0xB, "SETNE",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
+    COND_NE))]
+>;
+
+def SETE_DX10 : R600_2OP <
+  0xC, "SETE_DX10",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+    COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+  0xD, "SETGT_DX10",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+    COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+  0xE, "SETGE_DX10",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+    COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+  0xF, "SETNE_DX10",
+  [(set R600_Reg32:$dst,
+    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+     COND_NE))]
+>;
+
+def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
+def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
+def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
+
+def MOV : R600_1OP <0x19, "MOV", []>;
+
+let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
+
+class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
+  (outs R600_Reg32:$dst),
+  (ins immType:$imm),
+  "",
+  []
+>;
+
+} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
+
+def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def : Pat <
+  (imm:$val),
+  (MOV_IMM_I32 imm:$val)
+>;
+
+def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def : Pat <
+  (fpimm:$val),
+  (MOV_IMM_F32  fpimm:$val)
+>;
+
+def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
+def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
+def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
+def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
+
+let hasSideEffects = 1 in {
+
+def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
+
+} // end hasSideEffects
+
+def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
+def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
+def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
+def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
+def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
+def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
+def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>;
+def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>;
+def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>;
+def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
+
+def SETE_INT : R600_2OP <
+  0x3A, "SETE_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+>;
+
+def SETGT_INT : R600_2OP <
+  0x3B, "SETGT_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+>;
+
+def SETGE_INT : R600_2OP <
+  0x3C, "SETGE_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+>;
+
+def SETNE_INT : R600_2OP <
+  0x3D, "SETNE_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+>;
+
+def SETGT_UINT : R600_2OP <
+  0x3E, "SETGT_UINT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+>;
+
+def SETGE_UINT : R600_2OP <
+  0x3F, "SETGE_UINT",
+  [(set (i32 R600_Reg32:$dst),
+    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+>;
+
+def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
+def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
+def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
+def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
+
+def CNDE_INT : R600_3OP <
+  0x1C, "CNDE_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), 0,
+       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+       COND_EQ))]
+>;
+
+def CNDGE_INT : R600_3OP <
+  0x1E, "CNDGE_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), 0,
+       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+       COND_GE))]
+>;
+
+def CNDGT_INT : R600_3OP <
+  0x1D, "CNDGT_INT",
+  [(set (i32 R600_Reg32:$dst),
+   (selectcc (i32 R600_Reg32:$src0), 0,
+       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
+       COND_GT))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Texture instructions
+//===----------------------------------------------------------------------===//
+
+def TEX_LD : R600_TEX <
+  0x03, "TEX_LD",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+      imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
+      imm:$SAMPLER_ID, imm:$textureTarget))]
+> {
+let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z,"
+    "$RESOURCE_ID, $SAMPLER_ID, $textureTarget";
+let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
+    i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
+    i32imm:$textureTarget);
+}
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+  0x04, "TEX_GET_TEXTURE_RESINFO",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+  0x07, "TEX_GET_GRADIENTS_H",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+  0x08, "TEX_GET_GRADIENTS_V",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SET_GRADIENTS_H : R600_TEX <
+  0x0B, "TEX_SET_GRADIENTS_H",
+  []
+>;
+
+def TEX_SET_GRADIENTS_V : R600_TEX <
+  0x0C, "TEX_SET_GRADIENTS_V",
+  []
+>;
+
+def TEX_SAMPLE : R600_TEX <
+  0x10, "TEX_SAMPLE",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C : R600_TEX <
+  0x18, "TEX_SAMPLE_C",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_L : R600_TEX <
+  0x11, "TEX_SAMPLE_L",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_L : R600_TEX <
+  0x19, "TEX_SAMPLE_C_L",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_LB : R600_TEX <
+  0x12, "TEX_SAMPLE_LB",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
+>;
+
+def TEX_SAMPLE_C_LB : R600_TEX <
+  0x1A, "TEX_SAMPLE_C_LB",
+  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+      imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
+>;
+
+def TEX_SAMPLE_G : R600_TEX <
+  0x14, "TEX_SAMPLE_G",
+  []
+>;
+
+def TEX_SAMPLE_C_G : R600_TEX <
+  0x1C, "TEX_SAMPLE_C_G",
+  []
+>;
+
+//===----------------------------------------------------------------------===//
+// Helper classes for common instructions
+//===----------------------------------------------------------------------===//
+
+class MUL_LIT_Common <bits<5> inst> : R600_3OP <
+  inst, "MUL_LIT",
+  []
+>;
+
+class MULADD_Common <bits<5> inst> : R600_3OP <
+  inst, "MULADD",
+  []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+  inst, "MULADD_IEEE",
+  [(set (f32 R600_Reg32:$dst),
+   (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+>;
+
+class CNDE_Common <bits<5> inst> : R600_3OP <
+  inst, "CNDE",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+       COND_EQ))]
+>;
+
+class CNDGT_Common <bits<5> inst> : R600_3OP <
+  inst, "CNDGT",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+       COND_GT))]
+>;
+
+class CNDGE_Common <bits<5> inst> : R600_3OP <
+  inst, "CNDGE",
+  [(set R600_Reg32:$dst,
+   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
+       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
+       COND_GE))]
+>;
+
+multiclass DOT4_Common <bits<11> inst> {
+
+  def _pseudo : R600_REDUCTION <inst,
+    (ins R600_Reg128:$src0, R600_Reg128:$src1),
+    "DOT4 $dst $src0, $src1",
+    [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+  >;
+
+  def _real : R600_2OP <inst, "DOT4", []>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+multiclass CUBE_Common <bits<11> inst> {
+
+  def _pseudo : InstR600 <
+    inst,
+    (outs R600_Reg128:$dst),
+    (ins R600_Reg128:$src),
+    "CUBE $dst $src",
+    [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+    VecALU
+  > {
+    let isPseudo = 1;
+  }
+
+  def _real : R600_2OP <inst, "CUBE", []>;
+}
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
+
+class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "EXP_IEEE", fexp2
+>;
+
+class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "FLT_TO_INT", fp_to_sint
+>;
+
+class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "INT_TO_FLT", sint_to_fp
+>;
+
+class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "FLT_TO_UINT", fp_to_uint
+>;
+
+class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "UINT_TO_FLT", uint_to_fp
+>;
+
+class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
+  inst, "LOG_CLAMPED", []
+>;
+
+class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "LOG_IEEE", flog2
+>;
+
+class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
+class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
+class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
+class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULHI_INT", mulhs
+>;
+class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULHI", mulhu
+>;
+class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULLO_INT", mul
+>;
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+
+class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
+  inst, "RECIP_CLAMPED", []
+>;
+
+class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
+  inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
+>;
+
+class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "RECIP_UINT", AMDGPUurecip
+>;
+
+class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
+  inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+>;
+
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
+  inst, "RECIPSQRT_IEEE", []
+>;
+
+class SIN_Common <bits<11> inst> : R600_1OP <
+  inst, "SIN", []>{
+  let Trig = 1;
+}
+
+class COS_Common <bits<11> inst> : R600_1OP <
+  inst, "COS", []> {
+  let Trig = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper patterns for complex intrinsics
+//===----------------------------------------------------------------------===//
+
+multiclass DIV_Common <InstR600 recip_ieee> {
+def : Pat<
+  (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
+  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+
+def : Pat<
+  (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
+  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+>;
+}
+
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
+  (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
+  (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+>;
+
+//===----------------------------------------------------------------------===//
+// R600 / R700 Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR600] in {
+
+  def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
+  def MULADD_r600 : MULADD_Common<0x10>;
+  def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
+  def CNDE_r600 : CNDE_Common<0x18>;
+  def CNDGT_r600 : CNDGT_Common<0x19>;
+  def CNDGE_r600 : CNDGE_Common<0x1A>;
+  defm DOT4_r600 : DOT4_Common<0x50>;
+  defm CUBE_r600 : CUBE_Common<0x52>;
+  def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
+  def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
+  def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
+  def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
+  def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
+  def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
+  def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
+  def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
+  def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
+  def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
+  def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
+  def SIN_r600 : SIN_Common<0x6E>;
+  def COS_r600 : COS_Common<0x6F>;
+  def ASHR_r600 : ASHR_Common<0x70>;
+  def LSHR_r600 : LSHR_Common<0x71>;
+  def LSHL_r600 : LSHL_Common<0x72>;
+  def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
+  def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
+  def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
+  def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
+  def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
+
+  defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
+  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
+  def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
+
+  def : Pat<(fsqrt R600_Reg32:$src),
+    (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+
+  def R600_ExportSwz : ExportSwzInst {
+    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{21} = eop;
+    let Word1{22} = 1; // VALID_PIXEL_MODE
+    let Word1{30-23} = inst;
+    let Word1{31} = 1; // BARRIER
+  }
+  defm : ExportPattern<R600_ExportSwz, 39>;
+
+  def R600_ExportBuf : ExportBufInst {
+    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{21} = eop;
+    let Word1{22} = 1; // VALID_PIXEL_MODE
+    let Word1{30-23} = inst;
+    let Word1{31} = 1; // BARRIER
+  }
+  defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+}
+
+// Helper pattern for normalizing inputs to triginomic instructions for R700+
+// cards.
+class COS_PAT <InstR600 trig> : Pat<
+  (fcos R600_Reg32:$src),
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+class SIN_PAT <InstR600 trig> : Pat<
+  (fsin R600_Reg32:$src),
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+>;
+
+//===----------------------------------------------------------------------===//
+// R700 Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isR700] in {
+  def SIN_r700 : SIN_Common<0x6E>;
+  def COS_r700 : COS_Common<0x6F>;
+
+  // R700 normalizes inputs to SIN/COS the same as EG
+  def : SIN_PAT <SIN_r700>;
+  def : COS_PAT <COS_r700>;
+}
+
+//===----------------------------------------------------------------------===//
+// Evergreen Only instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEG] in {
+
+def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
+defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
+
+def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
+def MULHI_INT_eg : MULHI_INT_Common<0x90>;
+def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
+def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_eg : SIN_Common<0x8D>;
+def COS_eg : COS_Common<0x8E>;
+
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
+def : SIN_PAT <SIN_eg>;
+def : COS_PAT <COS_eg>;
+def : Pat<(fsqrt R600_Reg32:$src),
+  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+  // BFE_UINT - bit_extract, an optimization for mask and shift
+  // Src0 = Input
+  // Src1 = Offset
+  // Src2 = Width
+  //
+  // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+  //
+  // Example Usage:
+  // (Offset, Width)
+  //
+  // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0
+  // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8
+  // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
+  // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
+  def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+    [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
+                                                      R600_Reg32:$src1,
+                                                      R600_Reg32:$src2))],
+    VecALU
+  >;
+
+  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
+    [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
+                                          R600_Reg32:$src2))],
+    VecALU
+  >;
+
+  def MULADD_eg : MULADD_Common<0x14>;
+  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+  def ASHR_eg : ASHR_Common<0x15>;
+  def LSHR_eg : LSHR_Common<0x16>;
+  def LSHL_eg : LSHL_Common<0x17>;
+  def CNDE_eg : CNDE_Common<0x19>;
+  def CNDGT_eg : CNDGT_Common<0x1A>;
+  def CNDGE_eg : CNDGE_Common<0x1B>;
+  def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+  def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+  defm DOT4_eg : DOT4_Common<0xBE>;
+  defm CUBE_eg : CUBE_Common<0xC0>;
+
+let hasSideEffects = 1 in {
+  def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
+  def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
+
+  def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
+    let Pattern = [];
+  }
+
+  def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
+
+  def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
+    let Pattern = [];
+  }
+
+  def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
+
+  // TRUNC is used for the FLT_TO_INT instructions to work around a
+  // perceived problem where the rounding modes are applied differently
+  // depending on the instruction and the slot they are in.
+  // See:
+  // https://bugs.freedesktop.org/show_bug.cgi?id=50232
+  // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c
+  //
+  // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
+  // which do not need to be truncated since the fp values are 0.0f or 1.0f.
+  // We should look into handling these cases separately.
+  def : Pat<(fp_to_sint R600_Reg32:$src0),
+    (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+
+  def : Pat<(fp_to_uint R600_Reg32:$src0),
+    (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+
+  def EG_ExportSwz : ExportSwzInst {
+    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{20} = 1; // VALID_PIXEL_MODE
+    let Word1{21} = eop;
+    let Word1{29-22} = inst;
+    let Word1{30} = 0; // MARK
+    let Word1{31} = 1; // BARRIER
+  }
+  defm : ExportPattern<EG_ExportSwz, 83>;
+
+  def EG_ExportBuf : ExportBufInst {
+    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{20} = 1; // VALID_PIXEL_MODE
+    let Word1{21} = eop;
+    let Word1{29-22} = inst;
+    let Word1{30} = 0; // MARK
+    let Word1{31} = 1; // BARRIER
+  }
+  defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+
+//===----------------------------------------------------------------------===//
+// Memory read/write instructions
+//===----------------------------------------------------------------------===//
+let usesCustomInserter = 1 in {
+
+class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
+                              list<dag> pattern>
+    : EG_CF_RAT <0x57, 0x2, 0, (outs), ins,
+                 !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> {
+  let RIM         = 0;
+  // XXX: Have a separate instruction for non-indexed writes.
+  let TYPE        = 1;
+  let RW_REL      = 0;
+  let ELEM_SIZE   = 0;
+
+  let ARRAY_SIZE  = 0;
+  let COMP_MASK   = comp_mask;
+  let BURST_COUNT = 0;
+  let VPM         = 0;
+  let MARK        = 0;
+  let BARRIER     = 1;
+}
+
+} // End usesCustomInserter = 1
+
+// 32-bit store
+def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+  (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  0x1, "RAT_WRITE_CACHELESS_32_eg",
+  [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+//128-bit store
+def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+  (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
+  0xf, "RAT_WRITE_CACHELESS_128",
+  [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+>;
+
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+    : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
+      VTX_WORD1_GPR, VTX_WORD0 {
+
+  // Static fields
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let BUFFER_ID = buffer_id;
+  let SRC_REL = 0;
+  // XXX: We can infer this field based on the SRC_GPR.  This would allow us
+  // to store vertex addresses in any channel, not just X.
+  let SRC_SEL_X = 0;
+  let DST_REL = 0;
+  // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
+  // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
+  // however, based on my testing if USE_CONST_FIELDS is set, then all
+  // these fields need to be set to 0.
+  let USE_CONST_FIELDS = 0;
+  let NUM_FORMAT_ALL = 1;
+  let FORMAT_COMP_ALL = 0;
+  let SRF_MODE_ALL = 0;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+  // LLVM can only encode 64-bit instructions, so these fields are manually
+  // encoded in R600CodeEmitter
+  //
+  // bits<16> OFFSET;
+  // bits<2>  ENDIAN_SWAP = 0;
+  // bits<1>  CONST_BUF_NO_STRIDE = 0;
+  // bits<1>  MEGA_FETCH = 0;
+  // bits<1>  ALT_CONST = 0;
+  // bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+  // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+  // is done in R600CodeEmitter
+  //
+  // Inst{79-64} = OFFSET;
+  // Inst{81-80} = ENDIAN_SWAP;
+  // Inst{82}    = CONST_BUF_NO_STRIDE;
+  // Inst{83}    = MEGA_FETCH;
+  // Inst{84}    = ALT_CONST;
+  // Inst{86-85} = BUFFER_INDEX_MODE;
+  // Inst{95-86} = 0; Reserved
+
+  // VTX_WORD3 (Padding)
+  //
+  // Inst{127-96} = 0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst),
+                   pattern> {
+
+  let MEGA_FETCH_COUNT = 1;
+  let DST_SEL_X = 0;
+  let DST_SEL_Y = 7;   // Masked
+  let DST_SEL_Z = 7;   // Masked
+  let DST_SEL_W = 7;   // Masked
+  let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst),
+                    pattern> {
+  let MEGA_FETCH_COUNT = 2;
+  let DST_SEL_X = 0;
+  let DST_SEL_Y = 7;   // Masked
+  let DST_SEL_Z = 7;   // Masked
+  let DST_SEL_W = 7;   // Masked
+  let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst),
+                   pattern> {
+
+  let MEGA_FETCH_COUNT = 4;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 7;   // Masked
+  let DST_SEL_Z        = 7;   // Masked
+  let DST_SEL_W        = 7;   // Masked
+  let DATA_FORMAT      = 0xD; // COLOR_32
+
+  // This is not really necessary, but there were some GPU hangs that appeared
+  // to be caused by ALU instructions in the next instruction group that wrote
+  // to the $ptr registers of the VTX_READ.
+  // e.g.
+  // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+  // %T2_X<def> = MOV %ZERO
+  //Adding this constraint prevents this from happening.
+  let Constraints = "$ptr.ptr = $dst";
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+    : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst),
+                   pattern> {
+
+  let MEGA_FETCH_COUNT = 16;
+  let DST_SEL_X        =  0;
+  let DST_SEL_Y        =  1;
+  let DST_SEL_Z        =  2;
+  let DST_SEL_W        =  3;
+  let DATA_FORMAT      =  0x22; // COLOR_32_32_32_32
+
+  // XXX: Need to force VTX_READ_128 instructions to write to the same register
+  // that holds its buffer address to avoid potential hangs.  We can't use
+  // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
+  // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+  [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+  [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+  [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+  [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+  [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+  [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+  [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// Constant Loads
+// XXX: We are currently storing all constants in the global address space.
+//===----------------------------------------------------------------------===//
+
+def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
+  [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
+let Predicates = [isCayman] in {
+
+let isVector = 1 in {
+
+def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
+
+def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
+def MULHI_INT_cm : MULHI_INT_Common<0x90>;
+def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
+def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
+def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
+def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
+def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>;
+def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>;
+def SIN_cm : SIN_Common<0x8D>;
+def COS_cm : COS_Common<0x8E>;
+} // End isVector = 1
+
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
+def : SIN_PAT <SIN_cm>;
+def : COS_PAT <COS_cm>;
+
+defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+
+// RECIP_UINT emulation for Cayman
+def : Pat <
+  (AMDGPUurecip R600_Reg32:$src0),
+  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
+                            (MOV_IMM_I32 0x4f800000)))
+>;
+
+
+def : Pat<(fsqrt R600_Reg32:$src),
+  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+
+} // End isCayman
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+
+def IF_PREDICATE_SET  : ILFormat<(outs), (ins GPRI32:$src),
+  "IF_PREDICATE_SET $src", []>;
+
+def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
+  "PREDICATED_BREAK $src", []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+def PRED_X : InstR600 <
+  0, (outs R600_Predicate_Bit:$dst),
+  (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
+  "", [], NullALU> {
+  let FlagOperandIdx = 3;
+}
+
+let isTerminator = 1, isBranch = 1 in {
+def JUMP_COND : InstR600 <0x10,
+          (outs),
+          (ins brtarget:$target, R600_Predicate_Bit:$p),
+          "JUMP $target ($p)",
+          [], AnyALU
+  >;
+
+def JUMP : InstR600 <0x10,
+          (outs),
+          (ins brtarget:$target),
+          "JUMP $target",
+          [], AnyALU
+  >
+{
+  let isPredicable = 1;
+  let isBarrier = 1;
+}
+
+}  // End isTerminator = 1, isBranch = 1
+
+let usesCustomInserter = 1 in {
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
+
+def MASK_WRITE : AMDGPUShaderInst <
+    (outs),
+    (ins R600_Reg32:$src),
+    "MASK_WRITE $src",
+    []
+>;
+
+} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
+
+
+def TXD: AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
+>;
+
+def TXD_SHADOW: AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
+  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
+>;
+
+} // End isPseudo = 1
+} // End usesCustomInserter = 1
+
+def CLAMP_R600 :  CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+    usesCustomInserter = 1 in {
+  def RETURN          : ILFormat<(outs), (ins variable_ops),
+      "RETURN", [(IL_retflag)]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in {
+def CONST_COPY : Instruction {
+  let OutOperandList = (outs R600_Reg32:$dst);
+  let InOperandList = (ins i32imm:$src);
+  let Pattern =
+      [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+  let AsmString = "CONST_COPY";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let Itinerary = NullALU;
+}
+} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
+      [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+  VTX_WORD1_GPR, VTX_WORD0 {
+
+  let VC_INST = 0;
+  let FETCH_TYPE = 2;
+  let FETCH_WHOLE_QUAD = 0;
+  let SRC_REL = 0;
+  let SRC_SEL_X = 0;
+  let DST_REL = 0;
+  let USE_CONST_FIELDS = 0;
+  let NUM_FORMAT_ALL = 2;
+  let FORMAT_COMP_ALL = 1;
+  let SRF_MODE_ALL = 1;
+  let MEGA_FETCH_COUNT = 16;
+  let DST_SEL_X        = 0;
+  let DST_SEL_Y        = 1;
+  let DST_SEL_Z        = 2;
+  let DST_SEL_W        = 3;
+  let DATA_FORMAT      = 35;
+
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2>  ENDIAN_SWAP = 0;
+// bits<1>  CONST_BUF_NO_STRIDE = 0;
+// bits<1>  MEGA_FETCH = 0;
+// bits<1>  ALT_CONST = 0;
+// bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}    = CONST_BUF_NO_STRIDE;
+// Inst{83}    = MEGA_FETCH;
+// Inst{84}    = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+def TEX_VTX_TEXBUF:
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+      [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X        = 0;
+let DST_SEL_Y        = 1;
+let DST_SEL_Z        = 2;
+let DST_SEL_W        = 3;
+let DATA_FORMAT      = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2>  ENDIAN_SWAP = 0;
+// bits<1>  CONST_BUF_NO_STRIDE = 0;
+// bits<1>  MEGA_FETCH = 0;
+// bits<1>  ALT_CONST = 0;
+// bits<2>  BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82}    = CONST_BUF_NO_STRIDE;
+// Inst{83}    = MEGA_FETCH;
+// Inst{84}    = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
+
+//===--------------------------------------------------------------------===//
+// Instructions support
+//===--------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
+// Custom Inserter for Branches and returns, this eventually will be a
+// seperate pass
+//===---------------------------------------------------------------------===//
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+  def BRANCH : ILFormat<(outs), (ins brtarget:$target),
+      "; Pseudo unconditional branch instruction",
+      [(br bb:$target)]>;
+  defm BRANCH_COND : BranchConditional<IL_brcond>;
+}
+
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+let isTerminator=1 in {
+  def SWITCH      : ILFormat< (outs), (ins GPRI32:$src),
+  !strconcat("SWITCH", " $src"), []>;
+  def CASE        : ILFormat< (outs), (ins GPRI32:$src),
+      !strconcat("CASE", " $src"), []>;
+  def BREAK       : ILFormat< (outs), (ins),
+      "BREAK", []>;
+  def CONTINUE    : ILFormat< (outs), (ins),
+      "CONTINUE", []>;
+  def DEFAULT     : ILFormat< (outs), (ins),
+      "DEFAULT", []>;
+  def ELSE        : ILFormat< (outs), (ins),
+      "ELSE", []>;
+  def ENDSWITCH   : ILFormat< (outs), (ins),
+      "ENDSWITCH", []>;
+  def ENDMAIN     : ILFormat< (outs), (ins),
+      "ENDMAIN", []>;
+  def END         : ILFormat< (outs), (ins),
+      "END", []>;
+  def ENDFUNC     : ILFormat< (outs), (ins),
+      "ENDFUNC", []>;
+  def ENDIF       : ILFormat< (outs), (ins),
+      "ENDIF", []>;
+  def WHILELOOP   : ILFormat< (outs), (ins),
+      "WHILE", []>;
+  def ENDLOOP     : ILFormat< (outs), (ins),
+      "ENDLOOP", []>;
+  def FUNC        : ILFormat< (outs), (ins),
+      "FUNC", []>;
+  def RETDYN      : ILFormat< (outs), (ins),
+      "RET_DYN", []>;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
+  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
+  defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
+  defm IFC         : BranchInstr2<"IFC">;
+  defm BREAKC      : BranchInstr2<"BREAKC">;
+  defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
+}
+
+//===----------------------------------------------------------------------===//
+// ISel Patterns
+//===----------------------------------------------------------------------===//
+
+// CND*_INT Pattterns for f32 True / False values
+
+class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
+  (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
+                                            R600_Reg32:$src2, cc),
+  (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+def : CND_INT_f32 <CNDE_INT,  SETEQ>;
+def : CND_INT_f32 <CNDGT_INT, SETGT>;
+def : CND_INT_f32 <CNDGE_INT, SETGE>;
+
+//CNDGE_INT extra pattern
+def : Pat <
+  (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
+                                        (i32 R600_Reg32:$src2), COND_GT),
+  (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+>;
+
+// KIL Patterns
+def KILP : Pat <
+  (int_AMDGPU_kilp),
+  (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
+>;
+
+def KIL : Pat <
+  (int_AMDGPU_kill R600_Reg32:$src0),
+  (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+>;
+
+// SGT Reverse args
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
+  (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SGE Reverse args
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
+  (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_DX10 reverse args
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+  (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+  (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_INT reverse args
+def : Pat <
+  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
+  (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_INT reverse args
+def : Pat <
+  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
+  (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_UINT reverse args
+def : Pat <
+  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
+  (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_UINT reverse args
+def : Pat <
+  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
+  (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// The next two patterns are special cases for handling 'true if ordered' and
+// 'true if unordered' conditionals.  The assumption here is that the behavior of
+// SETE and SNE conforms to the Direct3D 10 rules for floating point values
+// described here:
+// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
+// We assume that  SETE returns false when one of the operands is NAN and
+// SNE returns true when on of the operands is NAN
+
+//SETE - 'true if ordered'
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
+  (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+  (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SNE - 'true if unordered'
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
+  (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+  (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
+
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
+
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
+
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
+
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+def : BitConvert <v4f32, v4i32, R600_Reg128>;
+def : BitConvert <v4i32, v4f32, R600_Reg128>;
+
+// DWORDADDR pattern
+def : DwordAddrPat  <i32, R600_Reg32>;
+
+} // End isR600toCayman Predicate
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
new file mode 100644
index 000000000000..dc8980aef146
--- /dev/null
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -0,0 +1,31 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+  def int_R600_load_input :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_interp_input :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_load_texbuf :
+    Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_store_swizzle :
+    Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_R600_store_stream_output :
+    Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_R600_store_pixel_depth :
+      Intrinsic<[], [llvm_float_ty], []>;
+  def int_R600_store_pixel_stencil :
+      Intrinsic<[], [llvm_float_ty], []>;
+  def int_R600_store_dummy :
+      Intrinsic<[], [llvm_i32_ty], []>;
+}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..018b40363363
--- /dev/null
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+  : AMDGPUMachineFunction(MF) { }
+
+
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
new file mode 100644
index 000000000000..99c1f91b09b1
--- /dev/null
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -0,0 +1,32 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "AMDGPUMachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public AMDGPUMachineFunction {
+public:
+  R600MachineFunctionInfo(const MachineFunction &MF);
+  SmallVector<unsigned, 4> LiveOuts;
+  std::vector<unsigned> IndirectRegs;
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
new file mode 100644
index 000000000000..a777142a9e70
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -0,0 +1,427 @@
+//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "R600MachineScheduler.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+
+using namespace llvm;
+
+void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
+
+  DAG = dag;
+  TII = static_cast<const R600InstrInfo*>(DAG->TII);
+  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+  MRI = &DAG->MRI;
+  Available[IDAlu]->clear();
+  Available[IDFetch]->clear();
+  Available[IDOther]->clear();
+  CurInstKind = IDOther;
+  CurEmitted = 0;
+  OccupedSlotsMask = 15;
+  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
+
+
+  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
+    InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
+  } else {
+    InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
+  }
+}
+
+void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
+{
+  if (QSrc->empty())
+    return;
+  for (ReadyQueue::iterator I = QSrc->begin(),
+      E = QSrc->end(); I != E; ++I) {
+    (*I)->NodeQueueId &= ~QSrc->getID();
+    QDst->push(*I);
+  }
+  QSrc->clear();
+}
+
+SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
+  SUnit *SU = 0;
+  IsTopNode = true;
+  NextInstKind = IDOther;
+
+  // check if we might want to switch current clause type
+  bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
+      (CurEmitted > InstKindLimit[CurInstKind]) ||
+      (Available[CurInstKind]->empty());
+  bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
+      (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
+
+  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+    // try to pick ALU
+    SU = pickAlu();
+    if (SU) {
+      if (CurEmitted >  InstKindLimit[IDAlu])
+        CurEmitted = 0;
+      NextInstKind = IDAlu;
+    }
+  }
+
+  if (!SU) {
+    // try to pick FETCH
+    SU = pickOther(IDFetch);
+    if (SU)
+      NextInstKind = IDFetch;
+  }
+
+  // try to pick other
+  if (!SU) {
+    SU = pickOther(IDOther);
+    if (SU)
+      NextInstKind = IDOther;
+  }
+
+  DEBUG(
+      if (SU) {
+        dbgs() << "picked node: ";
+        SU->dump(DAG);
+      } else {
+        dbgs() << "NO NODE ";
+        for (int i = 0; i < IDLast; ++i) {
+          Available[i]->dump();
+          Pending[i]->dump();
+        }
+        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
+          const SUnit &S = DAG->SUnits[i];
+          if (!S.isScheduled)
+            S.dump(DAG);
+        }
+      }
+  );
+
+  return SU;
+}
+
+void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+
+  DEBUG(dbgs() << "scheduled: ");
+  DEBUG(SU->dump(DAG));
+
+  if (NextInstKind != CurInstKind) {
+    DEBUG(dbgs() << "Instruction Type Switch\n");
+    if (NextInstKind != IDAlu)
+      OccupedSlotsMask = 15;
+    CurEmitted = 0;
+    CurInstKind = NextInstKind;
+  }
+
+  if (CurInstKind == IDAlu) {
+    switch (getAluKind(SU)) {
+    case AluT_XYZW:
+      CurEmitted += 4;
+      break;
+    case AluDiscarded:
+      break;
+    default: {
+      ++CurEmitted;
+      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
+          E = SU->getInstr()->operands_end(); It != E; ++It) {
+        MachineOperand &MO = *It;
+        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+          ++CurEmitted;
+      }
+    }
+    }
+  } else {
+    ++CurEmitted;
+  }
+
+
+  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
+
+  if (CurInstKind != IDFetch) {
+    MoveUnits(Pending[IDFetch], Available[IDFetch]);
+  }
+  MoveUnits(Pending[IDOther], Available[IDOther]);
+}
+
+void R600SchedStrategy::releaseTopNode(SUnit *SU) {
+  int IK = getInstKind(SU);
+
+  DEBUG(dbgs() << IK << " <= ");
+  DEBUG(SU->dump(DAG));
+
+  Pending[IK]->push(SU);
+}
+
+void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
+}
+
+bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
+                                          const TargetRegisterClass *RC) const {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+    return RC->contains(Reg);
+  } else {
+    return MRI->getRegClass(Reg) == RC;
+  }
+}
+
+R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
+  MachineInstr *MI = SU->getInstr();
+
+    switch (MI->getOpcode()) {
+    case AMDGPU::INTERP_PAIR_XY:
+    case AMDGPU::INTERP_PAIR_ZW:
+    case AMDGPU::INTERP_VEC_LOAD:
+      return AluT_XYZW;
+    case AMDGPU::COPY:
+      if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+        // %vregX = COPY Tn_X is likely to be discarded in favor of an
+        // assignement of Tn_X to %vregX, don't considers it in scheduling
+        return AluDiscarded;
+      }
+      else if (MI->getOperand(1).isUndef()) {
+        // MI will become a KILL, don't considers it in scheduling
+        return AluDiscarded;
+      }
+    default:
+      break;
+    }
+
+    // Does the instruction take a whole IG ?
+    if(TII->isVector(*MI) ||
+        TII->isCubeOp(MI->getOpcode()) ||
+        TII->isReductionOp(MI->getOpcode()))
+      return AluT_XYZW;
+
+    // Is the result already assigned to a channel ?
+    unsigned DestSubReg = MI->getOperand(0).getSubReg();
+    switch (DestSubReg) {
+    case AMDGPU::sub0:
+      return AluT_X;
+    case AMDGPU::sub1:
+      return AluT_Y;
+    case AMDGPU::sub2:
+      return AluT_Z;
+    case AMDGPU::sub3:
+      return AluT_W;
+    default:
+      break;
+    }
+
+    // Is the result already member of a X/Y/Z/W class ?
+    unsigned DestReg = MI->getOperand(0).getReg();
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
+        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+      return AluT_X;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+      return AluT_Y;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+      return AluT_Z;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+      return AluT_W;
+    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+      return AluT_XYZW;
+
+    return AluAny;
+
+}
+
+int R600SchedStrategy::getInstKind(SUnit* SU) {
+  int Opcode = SU->getInstr()->getOpcode();
+
+  if (TII->isALUInstr(Opcode)) {
+    return IDAlu;
+  }
+
+  switch (Opcode) {
+  case AMDGPU::COPY:
+  case AMDGPU::CONST_COPY:
+  case AMDGPU::INTERP_PAIR_XY:
+  case AMDGPU::INTERP_PAIR_ZW:
+  case AMDGPU::INTERP_VEC_LOAD:
+  case AMDGPU::DOT4_eg_pseudo:
+  case AMDGPU::DOT4_r600_pseudo:
+    return IDAlu;
+  case AMDGPU::TEX_VTX_CONSTBUF:
+  case AMDGPU::TEX_VTX_TEXBUF:
+  case AMDGPU::TEX_LD:
+  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+  case AMDGPU::TEX_GET_GRADIENTS_H:
+  case AMDGPU::TEX_GET_GRADIENTS_V:
+  case AMDGPU::TEX_SET_GRADIENTS_H:
+  case AMDGPU::TEX_SET_GRADIENTS_V:
+  case AMDGPU::TEX_SAMPLE:
+  case AMDGPU::TEX_SAMPLE_C:
+  case AMDGPU::TEX_SAMPLE_L:
+  case AMDGPU::TEX_SAMPLE_C_L:
+  case AMDGPU::TEX_SAMPLE_LB:
+  case AMDGPU::TEX_SAMPLE_C_LB:
+  case AMDGPU::TEX_SAMPLE_G:
+  case AMDGPU::TEX_SAMPLE_C_G:
+  case AMDGPU::TXD:
+  case AMDGPU::TXD_SHADOW:
+    return IDFetch;
+  default:
+    DEBUG(
+        dbgs() << "other inst: ";
+        SU->dump(DAG);
+    );
+    return IDOther;
+  }
+}
+
+SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
+  if (Q.empty())
+    return NULL;
+  for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
+      It != E; ++It) {
+    SUnit *SU = *It;
+    InstructionsGroupCandidate.push_back(SU->getInstr());
+    if (TII->canBundle(InstructionsGroupCandidate)) {
+      InstructionsGroupCandidate.pop_back();
+      Q.erase(It);
+      return SU;
+    } else {
+      InstructionsGroupCandidate.pop_back();
+    }
+  }
+  return NULL;
+}
+
+void R600SchedStrategy::LoadAlu() {
+  ReadyQueue *QSrc = Pending[IDAlu];
+  for (ReadyQueue::iterator I = QSrc->begin(),
+        E = QSrc->end(); I != E; ++I) {
+      (*I)->NodeQueueId &= ~QSrc->getID();
+      AluKind AK = getAluKind(*I);
+      AvailableAlus[AK].insert(*I);
+    }
+    QSrc->clear();
+}
+
+void R600SchedStrategy::PrepareNextSlot() {
+  DEBUG(dbgs() << "New Slot\n");
+  assert (OccupedSlotsMask && "Slot wasn't filled");
+  OccupedSlotsMask = 0;
+  InstructionsGroupCandidate.clear();
+  LoadAlu();
+}
+
+void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
+  unsigned DestReg = MI->getOperand(0).getReg();
+  // PressureRegister crashes if an operand is def and used in the same inst
+  // and we try to constraint its regclass
+  for (MachineInstr::mop_iterator It = MI->operands_begin(),
+      E = MI->operands_end(); It != E; ++It) {
+    MachineOperand &MO = *It;
+    if (MO.isReg() && !MO.isDef() &&
+        MO.getReg() == MI->getOperand(0).getReg())
+      return;
+  }
+  // Constrains the regclass of DestReg to assign it to Slot
+  switch (Slot) {
+  case 0:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+    break;
+  case 1:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+    break;
+  case 2:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+    break;
+  case 3:
+    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+    break;
+  }
+}
+
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
+  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+  if (!UnslotedSU) {
+    return SlotedSU;
+  } else if (!SlotedSU) {
+    AssignSlot(UnslotedSU->getInstr(), Slot);
+    return UnslotedSU;
+  } else {
+    //Determine which one to pick (the lesser one)
+    if (CompareSUnit()(SlotedSU, UnslotedSU)) {
+      AvailableAlus[AluAny].insert(UnslotedSU);
+      return SlotedSU;
+    } else {
+      AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
+      AssignSlot(UnslotedSU->getInstr(), Slot);
+      return UnslotedSU;
+    }
+  }
+}
+
+bool R600SchedStrategy::isAvailablesAluEmpty() const {
+  return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
+      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
+      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
+      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
+}
+
+SUnit* R600SchedStrategy::pickAlu() {
+  while (!isAvailablesAluEmpty()) {
+    if (!OccupedSlotsMask) {
+      // Flush physical reg copies (RA will discard them)
+      if (!AvailableAlus[AluDiscarded].empty()) {
+        OccupedSlotsMask = 15;
+        return PopInst(AvailableAlus[AluDiscarded]);
+      }
+      // If there is a T_XYZW alu available, use it
+      if (!AvailableAlus[AluT_XYZW].empty()) {
+        OccupedSlotsMask = 15;
+        return PopInst(AvailableAlus[AluT_XYZW]);
+      }
+    }
+    for (unsigned Chan = 0; Chan < 4; ++Chan) {
+      bool isOccupied = OccupedSlotsMask & (1 << Chan);
+      if (!isOccupied) {
+        SUnit *SU = AttemptFillSlot(Chan);
+        if (SU) {
+          OccupedSlotsMask |= (1 << Chan);
+          InstructionsGroupCandidate.push_back(SU->getInstr());
+          return SU;
+        }
+      }
+    }
+    PrepareNextSlot();
+  }
+  return NULL;
+}
+
+SUnit* R600SchedStrategy::pickOther(int QID) {
+  SUnit *SU = 0;
+  ReadyQueue *AQ = Available[QID];
+
+  if (AQ->empty()) {
+    MoveUnits(Pending[QID], AQ);
+  }
+  if (!AQ->empty()) {
+    SU = *AQ->begin();
+    AQ->remove(AQ->begin());
+  }
+  return SU;
+}
+
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
new file mode 100644
index 000000000000..3d0367fd8ebf
--- /dev/null
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -0,0 +1,120 @@
+//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINESCHEDULER_H_
+#define R600MACHINESCHEDULER_H_
+
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+class CompareSUnit {
+public:
+  bool operator()(const SUnit *S1, const SUnit *S2) {
+    return S1->getDepth() > S2->getDepth();
+  }
+};
+
+class R600SchedStrategy : public MachineSchedStrategy {
+
+  const ScheduleDAGMI *DAG;
+  const R600InstrInfo *TII;
+  const R600RegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+
+  enum InstQueue {
+    QAlu = 1,
+    QFetch = 2,
+    QOther = 4
+  };
+
+  enum InstKind {
+    IDAlu,
+    IDFetch,
+    IDOther,
+    IDLast
+  };
+
+  enum AluKind {
+    AluAny,
+    AluT_X,
+    AluT_Y,
+    AluT_Z,
+    AluT_W,
+    AluT_XYZW,
+    AluDiscarded, // LLVM Instructions that are going to be eliminated
+    AluLast
+  };
+
+  ReadyQueue *Available[IDLast], *Pending[IDLast];
+  std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast];
+
+  InstKind CurInstKind;
+  int CurEmitted;
+  InstKind NextInstKind;
+
+  int InstKindLimit[IDLast];
+
+  int OccupedSlotsMask;
+
+public:
+  R600SchedStrategy() :
+    DAG(0), TII(0), TRI(0), MRI(0) {
+    Available[IDAlu] = new ReadyQueue(QAlu, "AAlu");
+    Available[IDFetch] = new ReadyQueue(QFetch, "AFetch");
+    Available[IDOther] = new ReadyQueue(QOther, "AOther");
+    Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu");
+    Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch");
+    Pending[IDOther] = new ReadyQueue(QOther<<4, "POther");
+  }
+
+  virtual ~R600SchedStrategy() {
+    for (unsigned I = 0; I < IDLast; ++I) {
+      delete Available[I];
+      delete Pending[I];
+    }
+  }
+
+  virtual void initialize(ScheduleDAGMI *dag);
+  virtual SUnit *pickNode(bool &IsTopNode);
+  virtual void schedNode(SUnit *SU, bool IsTopNode);
+  virtual void releaseTopNode(SUnit *SU);
+  virtual void releaseBottomNode(SUnit *SU);
+
+private:
+  std::vector<MachineInstr *> InstructionsGroupCandidate;
+
+  int getInstKind(SUnit *SU);
+  bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
+  AluKind getAluKind(SUnit *SU) const;
+  void LoadAlu();
+  bool isAvailablesAluEmpty() const;
+  SUnit *AttemptFillSlot (unsigned Slot);
+  void PrepareNextSlot();
+  SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q);
+
+  void AssignSlot(MachineInstr *MI, unsigned Slot);
+  SUnit* pickAlu();
+  SUnit* pickOther(int QID);
+  void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
+};
+
+} // namespace llvm
+
+#endif /* R600MACHINESCHEDULER_H_ */
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
new file mode 100644
index 000000000000..bbd7995d7d51
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -0,0 +1,99 @@
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief R600 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600RegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
+    const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+  TM(tm),
+  TII(tii)
+  { }
+
+BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+
+  Reserved.set(AMDGPU::ZERO);
+  Reserved.set(AMDGPU::HALF);
+  Reserved.set(AMDGPU::ONE);
+  Reserved.set(AMDGPU::ONE_INT);
+  Reserved.set(AMDGPU::NEG_HALF);
+  Reserved.set(AMDGPU::NEG_ONE);
+  Reserved.set(AMDGPU::PV_X);
+  Reserved.set(AMDGPU::ALU_LITERAL_X);
+  Reserved.set(AMDGPU::ALU_CONST);
+  Reserved.set(AMDGPU::PREDICATE_BIT);
+  Reserved.set(AMDGPU::PRED_SEL_OFF);
+  Reserved.set(AMDGPU::PRED_SEL_ZERO);
+  Reserved.set(AMDGPU::PRED_SEL_ONE);
+
+  for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
+                        E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+    Reserved.set(*I);
+  }
+
+  for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
+                                     E = AMDGPU::TRegMemRegClass.end();
+                                     I !=  E; ++I) {
+    Reserved.set(*I);
+  }
+
+  const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
+  std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
+  for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
+                                       E = IndirectRegs.end();
+                                       I != E; ++I) {
+    Reserved.set(*I);
+  }
+  return Reserved;
+}
+
+const TargetRegisterClass *
+R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+  switch (rc->getID()) {
+  case AMDGPU::GPRF32RegClassID:
+  case AMDGPU::GPRI32RegClassID:
+    return &AMDGPU::R600_Reg32RegClass;
+  default: return rc;
+  }
+}
+
+unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
+  return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
+}
+
+const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
+                                                                   MVT VT) const {
+  switch(VT.SimpleTy) {
+  default:
+  case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+  }
+}
+
+unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+  switch (Channel) {
+    default: assert(!"Invalid channel index"); return 0;
+    case 0: return AMDGPU::sub0;
+    case 1: return AMDGPU::sub1;
+    case 2: return AMDGPU::sub2;
+    case 3: return AMDGPU::sub3;
+  }
+}
+
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
new file mode 100644
index 000000000000..f9ca918f246b
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -0,0 +1,55 @@
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for R600RegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600REGISTERINFO_H_
+#define R600REGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+namespace llvm {
+
+class R600TargetMachine;
+class TargetInstrInfo;
+
+struct R600RegisterInfo : public AMDGPURegisterInfo {
+  AMDGPUTargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+  R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  /// \param RC is an AMDIL reg class.
+  ///
+  /// \returns the R600 reg class that is equivalent to \p RC.
+  virtual const TargetRegisterClass *getISARegClass(
+    const TargetRegisterClass *RC) const;
+
+  /// \brief get the HW encoding for a register's channel.
+  unsigned getHWRegChan(unsigned reg) const;
+
+  /// \brief get the register class of the specified type to use in the
+  /// CFGStructurizer
+  virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+
+  /// \returns the sub reg enum value for the given \p Channel
+  /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
+  unsigned getSubRegFromChannel(unsigned Channel) const;
+
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
new file mode 100644
index 000000000000..03f49761ea40
--- /dev/null
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -0,0 +1,209 @@
+
+class R600Reg <string name, bits<16> encoding> : Register<name> {
+  let Namespace = "AMDGPU";
+  let HWEncoding = encoding;
+}
+
+class R600RegWithChan <string name, bits<9> sel, string chan> :
+    Register <name> {
+
+  field bits<2> chan_encoding = !if(!eq(chan, "X"), 0,
+                                !if(!eq(chan, "Y"), 1,
+                                !if(!eq(chan, "Z"), 2,
+                                !if(!eq(chan, "W"), 3, 0))));
+  let HWEncoding{8-0}  = sel;
+  let HWEncoding{10-9} = chan_encoding;
+  let Namespace = "AMDGPU";
+}
+
+class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
+    RegisterWithSubRegs<n, subregs> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1, sub2, sub3];
+  let HWEncoding = encoding;
+}
+
+foreach Index = 0-127 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
+
+    // Indirect addressing offset registers
+    def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+                                              Index, Chan>;
+    def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
+                                                Chan>;
+  }
+  // 128-bit Temporary Registers
+  def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
+                                   [!cast<Register>("T"#Index#"_X"),
+                                    !cast<Register>("T"#Index#"_Y"),
+                                    !cast<Register>("T"#Index#"_Z"),
+                                    !cast<Register>("T"#Index#"_W")],
+                                   Index>;
+}
+
+// KCACHE_BANK0
+foreach Index = 159-128 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW",
+                                 [!cast<Register>("KC0_"#Index#"_X"),
+                                  !cast<Register>("KC0_"#Index#"_Y"),
+                                  !cast<Register>("KC0_"#Index#"_Z"),
+                                  !cast<Register>("KC0_"#Index#"_W")],
+                                 Index>;
+}
+
+// KCACHE_BANK1
+foreach Index = 191-160 in {
+  foreach Chan = [ "X", "Y", "Z", "W" ] in {
+    // 32-bit Temporary Registers
+    def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>;
+  }
+  // 128-bit Temporary Registers
+  def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW",
+                                 [!cast<Register>("KC1_"#Index#"_X"),
+                                  !cast<Register>("KC1_"#Index#"_Y"),
+                                  !cast<Register>("KC1_"#Index#"_Z"),
+                                  !cast<Register>("KC1_"#Index#"_W")],
+                                 Index>;
+}
+
+
+// Array Base Register holding input in FS
+foreach Index = 448-480 in {
+  def ArrayBase#Index :  R600Reg<"ARRAY_BASE", Index>;
+}
+
+
+// Special Registers
+
+def ZERO : R600Reg<"0.0", 248>;
+def ONE : R600Reg<"1.0", 249>;
+def NEG_ONE : R600Reg<"-1.0", 249>;
+def ONE_INT : R600Reg<"1", 250>;
+def HALF : R600Reg<"0.5", 252>;
+def NEG_HALF : R600Reg<"-0.5", 252>;
+def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
+def PV_X : R600Reg<"pv.x", 254>;
+def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
+def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
+def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
+def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
+
+def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
+                          (add (sequence "ArrayBase%u", 448, 480))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
+
+let isAllocatable = 0 in {
+
+// XXX: Only use the X channel, until we support wider stack widths
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+
+} // End isAllocatable = 0
+
+def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_X", 128, 159))>;
+
+def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_Y", 128, 159))>;
+
+def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_Z", 128, 159))>;
+
+def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC0_%u_W", 128, 159))>;
+
+def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_KC0_X, R600_KC0_Y,
+                                               R600_KC0_Z, R600_KC0_W)>;
+
+def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_X", 160, 191))>;
+
+def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_Y", 160, 191))>;
+
+def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_Z", 160, 191))>;
+
+def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                              (add (sequence "KC1_%u_W", 160, 191))>;
+
+def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_KC1_X, R600_KC1_Y,
+                                               R600_KC1_Z, R600_KC1_W)>;
+
+def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_X", 0, 127), AR_X)>;
+
+def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_Y", 0, 127))>;
+
+def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_Z", 0, 127))>;
+
+def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (add (sequence "T%u_W", 0, 127))>;
+
+def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
+                                   (interleave R600_TReg32_X, R600_TReg32_Y,
+                                               R600_TReg32_Z, R600_TReg32_W)>;
+
+def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
+    R600_TReg32,
+    R600_ArrayBase,
+    R600_Addr,
+    ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+    ALU_CONST, ALU_PARAM
+    )>;
+
+def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
+    PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
+
+def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
+    PREDICATE_BIT)>;
+
+def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+                                (add (sequence "T%u_XYZW", 0, 127))> {
+  let CopyCost = -1;
+}
+
+//===----------------------------------------------------------------------===//
+// Register classes for indirect addressing
+//===----------------------------------------------------------------------===//
+
+// Super register for all the Indirect Registers.  This register class is used
+// by the REG_SEQUENCE instruction to specify the registers to use for direct
+// reads / writes which may be written / read by an indirect address.
+class IndirectSuper<string n, list<Register> subregs> :
+    RegisterWithSubRegs<n, subregs> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices =
+ [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+  sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
+}
+
+def IndirectSuperReg : IndirectSuper<"Indirect",
+  [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
+   TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
+   TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
+>;
+
+def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
+
+// This register class defines the registers that are the storage units for
+// the "Indirect Addressing" pseudo memory space.
+// XXX: Only use the X channel, until we support wider stack widths
+def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
+  (add (sequence "TRegMem%u_X", 0, 16))
+>;
diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td
new file mode 100644
index 000000000000..7ede181c51dc
--- /dev/null
+++ b/lib/Target/R600/R600Schedule.td
@@ -0,0 +1,36 @@
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 has a VLIW architecture.  On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS.  For cayman cards, the TRANS
+// slot has been removed. 
+//
+//===----------------------------------------------------------------------===//
+
+
+def ALU_X : FuncUnit;
+def ALU_Y : FuncUnit;
+def ALU_Z : FuncUnit;
+def ALU_W : FuncUnit;
+def TRANS : FuncUnit;
+
+def AnyALU : InstrItinClass;
+def VecALU : InstrItinClass;
+def TransALU : InstrItinClass;
+
+def R600_EG_Itin : ProcessorItineraries <
+  [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
+  [],
+  [
+    InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+    InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
+    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+  ]
+>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
new file mode 100644
index 000000000000..2477e2a9dcc3
--- /dev/null
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -0,0 +1,329 @@
+//===-- SIAnnotateControlFlow.cpp -  ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Annotates the control flow with hardware specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+namespace {
+
+// Complex types used in this pass
+typedef std::pair<BasicBlock *, Value *> StackEntry;
+typedef SmallVector<StackEntry, 16> StackVector;
+
+// Intrinsic names the control flow is annotated with
+static const char *IfIntrinsic = "llvm.SI.if";
+static const char *ElseIntrinsic = "llvm.SI.else";
+static const char *BreakIntrinsic = "llvm.SI.break";
+static const char *IfBreakIntrinsic = "llvm.SI.if.break";
+static const char *ElseBreakIntrinsic = "llvm.SI.else.break";
+static const char *LoopIntrinsic = "llvm.SI.loop";
+static const char *EndCfIntrinsic = "llvm.SI.end.cf";
+
+class SIAnnotateControlFlow : public FunctionPass {
+
+  static char ID;
+
+  Type *Boolean;
+  Type *Void;
+  Type *Int64;
+  Type *ReturnStruct;
+
+  ConstantInt *BoolTrue;
+  ConstantInt *BoolFalse;
+  UndefValue *BoolUndef;
+  Constant *Int64Zero;
+
+  Constant *If;
+  Constant *Else;
+  Constant *Break;
+  Constant *IfBreak;
+  Constant *ElseBreak;
+  Constant *Loop;
+  Constant *EndCf;
+
+  DominatorTree *DT;
+  StackVector Stack;
+  SSAUpdater PhiInserter;
+
+  bool isTopOfStack(BasicBlock *BB);
+
+  Value *popSaved();
+
+  void push(BasicBlock *BB, Value *Saved);
+
+  bool isElse(PHINode *Phi);
+
+  void eraseIfUnused(PHINode *Phi);
+
+  void openIf(BranchInst *Term);
+
+  void insertElse(BranchInst *Term);
+
+  void handleLoopCondition(Value *Cond);
+
+  void handleLoop(BranchInst *Term);
+
+  void closeControlFlow(BasicBlock *BB);
+
+public:
+  SIAnnotateControlFlow():
+    FunctionPass(ID) { }
+
+  virtual bool doInitialization(Module &M);
+
+  virtual bool runOnFunction(Function &F);
+
+  virtual const char *getPassName() const {
+    return "SI annotate control flow";
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<DominatorTree>();
+    AU.addPreserved<DominatorTree>();
+    FunctionPass::getAnalysisUsage(AU);
+  }
+
+};
+
+} // end anonymous namespace
+
+char SIAnnotateControlFlow::ID = 0;
+
+/// \brief Initialize all the types and constants used in the pass
+bool SIAnnotateControlFlow::doInitialization(Module &M) {
+  LLVMContext &Context = M.getContext();
+
+  Void = Type::getVoidTy(Context);
+  Boolean = Type::getInt1Ty(Context);
+  Int64 = Type::getInt64Ty(Context);
+  ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
+
+  BoolTrue = ConstantInt::getTrue(Context);
+  BoolFalse = ConstantInt::getFalse(Context);
+  BoolUndef = UndefValue::get(Boolean);
+  Int64Zero = ConstantInt::get(Int64, 0);
+
+  If = M.getOrInsertFunction(
+    IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
+
+  Else = M.getOrInsertFunction(
+    ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
+
+  Break = M.getOrInsertFunction(
+    BreakIntrinsic, Int64, Int64, (Type *)0);
+
+  IfBreak = M.getOrInsertFunction(
+    IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
+
+  ElseBreak = M.getOrInsertFunction(
+    ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
+
+  Loop = M.getOrInsertFunction(
+    LoopIntrinsic, Boolean, Int64, (Type *)0);
+
+  EndCf = M.getOrInsertFunction(
+    EndCfIntrinsic, Void, Int64, (Type *)0);
+
+  return false;
+}
+
+/// \brief Is BB the last block saved on the stack ?
+bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
+  return !Stack.empty() && Stack.back().first == BB;
+}
+
+/// \brief Pop the last saved value from the control flow stack
+Value *SIAnnotateControlFlow::popSaved() {
+  return Stack.pop_back_val().second;
+}
+
+/// \brief Push a BB and saved value to the control flow stack
+void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
+  Stack.push_back(std::make_pair(BB, Saved));
+}
+
+/// \brief Can the condition represented by this PHI node treated like
+/// an "Else" block?
+bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
+  BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
+  for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+    if (Phi->getIncomingBlock(i) == IDom) {
+
+      if (Phi->getIncomingValue(i) != BoolTrue)
+        return false;
+
+    } else {
+      if (Phi->getIncomingValue(i) != BoolFalse)
+        return false;
+ 
+    }
+  }
+  return true;
+}
+
+// \brief Erase "Phi" if it is not used any more
+void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
+  if (!Phi->hasNUsesOrMore(1))
+    Phi->eraseFromParent();
+}
+
+/// \brief Open a new "If" block
+void SIAnnotateControlFlow::openIf(BranchInst *Term) {
+  Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
+  Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+  push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Close the last "If" block and open a new "Else" block
+void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
+  Value *Ret = CallInst::Create(Else, popSaved(), "", Term);
+  Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
+  push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
+}
+
+/// \brief Recursively handle the condition leading to a loop
+void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
+  if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+
+    // Handle all non constant incoming values first
+    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+      Value *Incoming = Phi->getIncomingValue(i);
+      if (isa<ConstantInt>(Incoming))
+        continue;
+
+      Phi->setIncomingValue(i, BoolFalse);
+      handleLoopCondition(Incoming);
+    }
+
+    BasicBlock *Parent = Phi->getParent();
+    BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
+
+    for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
+
+      Value *Incoming = Phi->getIncomingValue(i);
+      if (Incoming != BoolTrue)
+        continue;
+
+      BasicBlock *From = Phi->getIncomingBlock(i);
+      if (From == IDom) {
+        CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
+        if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
+          Value *Args[] = {
+            OldEnd->getArgOperand(0),
+            PhiInserter.GetValueAtEndOfBlock(Parent)
+          };
+          Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
+          PhiInserter.AddAvailableValue(Parent, Ret);
+          continue;
+        }
+      }
+
+      TerminatorInst *Insert = From->getTerminator();
+      Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
+      Value *Ret = CallInst::Create(Break, Arg, "", Insert);
+      PhiInserter.AddAvailableValue(From, Ret);
+    }
+    eraseIfUnused(Phi);
+
+  } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+    BasicBlock *Parent = Inst->getParent();
+    TerminatorInst *Insert = Parent->getTerminator();
+    Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
+    Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
+    PhiInserter.AddAvailableValue(Parent, Ret);
+
+  } else {
+    assert(0 && "Unhandled loop condition!");
+  }
+}
+
+/// \brief Handle a back edge (loop)
+void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+  BasicBlock *Target = Term->getSuccessor(1);
+  PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+
+  PhiInserter.Initialize(Int64, "");
+  PhiInserter.AddAvailableValue(Target, Broken);
+
+  Value *Cond = Term->getCondition();
+  Term->setCondition(BoolTrue);
+  handleLoopCondition(Cond);
+
+  BasicBlock *BB = Term->getParent();
+  Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
+  for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
+       PI != PE; ++PI) {
+
+    Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+  }
+
+  Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
+  push(Term->getSuccessor(0), Arg);
+}
+
+/// \brief Close the last opened control flow
+void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
+  CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+}
+
+/// \brief Annotate the control flow with intrinsics so the backend can
+/// recognize if/then/else and loops.
+bool SIAnnotateControlFlow::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTree>();
+
+  for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
+       E = df_end(&F.getEntryBlock()); I != E; ++I) {
+
+    BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+
+    if (!Term || Term->isUnconditional()) {
+      if (isTopOfStack(*I))
+        closeControlFlow(*I);
+      continue;
+    }
+
+    if (I.nodeVisited(Term->getSuccessor(1))) {
+      if (isTopOfStack(*I))
+        closeControlFlow(*I);
+      handleLoop(Term);
+      continue;
+    }
+
+    if (isTopOfStack(*I)) {
+      PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
+      if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+        insertElse(Term);
+        eraseIfUnused(Phi);
+        continue;
+      }
+      closeControlFlow(*I);
+    }
+    openIf(Term);
+  }
+
+  assert(Stack.empty());
+  return true;
+}
+
+/// \brief Create the annotation pass
+FunctionPass *llvm::createSIAnnotateControlFlowPass() {
+  return new SIAnnotateControlFlow();
+}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
new file mode 100644
index 000000000000..6f0c30761506
--- /dev/null
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -0,0 +1,670 @@
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Custom DAG lowering for SI
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIISelLowering.h"
+#include "AMDIL.h"
+#include "AMDGPU.h"
+#include "AMDILIntrinsicInfo.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+using namespace llvm;
+
+SITargetLowering::SITargetLowering(TargetMachine &TM) :
+    AMDGPUTargetLowering(TM),
+    TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
+    TRI(TM.getRegisterInfo()) {
+
+  addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+  addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
+
+  addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
+  addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
+  addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
+
+  addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
+  addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
+
+  addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
+
+  addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
+  addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+
+  addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+
+  addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+  addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
+
+  addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
+  addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
+
+  computeRegisterProperties();
+
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
+  setOperationAction(ISD::ADD, MVT::i64, Legal);
+  setOperationAction(ISD::ADD, MVT::i32, Legal);
+
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setTargetDAGCombine(ISD::SELECT_CC);
+
+  setTargetDAGCombine(ISD::SETCC);
+
+  setSchedulingPreference(Sched::RegPressure);
+}
+
+SDValue SITargetLowering::LowerFormalArguments(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc DL, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  FunctionType *FType = MF.getFunction()->getFunctionType();
+  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+  assert(CallConv == CallingConv::C);
+
+  SmallVector<ISD::InputArg, 16> Splits;
+  uint32_t Skipped = 0;
+
+  for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
+    const ISD::InputArg &Arg = Ins[i];
+   
+    // First check if it's a PS input addr 
+    if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+
+      assert((PSInputNum <= 15) && "Too many PS inputs!");
+
+      if (!Arg.Used) {
+        // We can savely skip PS inputs
+        Skipped |= 1 << i;
+        ++PSInputNum;
+        continue;
+      }
+
+      Info->PSInputAddr |= 1 << PSInputNum++;
+    }
+
+    // Second split vertices into their elements
+    if (Arg.VT.isVector()) {
+      ISD::InputArg NewArg = Arg;
+      NewArg.Flags.setSplit();
+      NewArg.VT = Arg.VT.getVectorElementType();
+
+      // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+      // three or five element vertex only needs three or five registers,
+      // NOT four or eigth.
+      Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+      unsigned NumElements = ParamType->getVectorNumElements();
+
+      for (unsigned j = 0; j != NumElements; ++j) {
+        Splits.push_back(NewArg);
+        NewArg.PartOffset += NewArg.VT.getStoreSize();
+      }
+
+    } else {
+      Splits.push_back(Arg);
+    }
+  }
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // At least one interpolation mode must be enabled or else the GPU will hang.
+  if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
+    Info->PSInputAddr |= 1;
+    CCInfo.AllocateReg(AMDGPU::VGPR0);
+    CCInfo.AllocateReg(AMDGPU::VGPR1);
+  }
+
+  AnalyzeFormalArguments(CCInfo, Splits);
+
+  for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+
+    if (Skipped & (1 << i)) {
+      InVals.push_back(SDValue());
+      continue;
+    }
+
+    CCValAssign &VA = ArgLocs[ArgIdx++];
+    assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+    unsigned Reg = VA.getLocReg();
+    MVT VT = VA.getLocVT();
+
+    if (VT == MVT::i64) {
+      // For now assume it is a pointer
+      Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
+                                     &AMDGPU::SReg_64RegClass);
+      Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
+      InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+      continue;
+    }
+
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+    Reg = MF.addLiveIn(Reg, RC);
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+    const ISD::InputArg &Arg = Ins[i];
+    if (Arg.VT.isVector()) {
+
+      // Build a vector from the registers
+      Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
+      unsigned NumElements = ParamType->getVectorNumElements();
+
+      SmallVector<SDValue, 4> Regs;
+      Regs.push_back(Val);
+      for (unsigned j = 1; j != NumElements; ++j) {
+        Reg = ArgLocs[ArgIdx++].getLocReg();
+        Reg = MF.addLiveIn(Reg, RC);
+        Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+      }
+
+      // Fill up the missing vector elements
+      NumElements = Arg.VT.getVectorNumElements() - NumElements;
+      for (unsigned j = 0; j != NumElements; ++j)
+        Regs.push_back(DAG.getUNDEF(VT));
+ 
+      InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
+                                   Regs.data(), Regs.size()));
+      continue;
+    }
+
+    InVals.push_back(Val);
+  }
+  return Chain;
+}
+
+MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
+    MachineInstr * MI, MachineBasicBlock * BB) const {
+
+  switch (MI->getOpcode()) {
+  default:
+    return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+  case AMDGPU::BRANCH: return BB;
+  }
+  return BB;
+}
+
+EVT SITargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i1;
+}
+
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+  return MVT::i32;
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG Lowering Operations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  }
+  return SDValue();
+}
+
+/// \brief Helper function for LowerBRCOND
+static SDNode *findUser(SDValue Value, unsigned Opcode) {
+
+  SDNode *Parent = Value.getNode();
+  for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
+       I != E; ++I) {
+
+    if (I.getUse().get() != Value)
+      continue;
+
+    if (I->getOpcode() == Opcode)
+      return *I;
+  }
+  return 0;
+}
+
+/// This transforms the control flow intrinsics to get the branch destination as
+/// last parameter, also switches branch target with BR if the need arise
+SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
+                                      SelectionDAG &DAG) const {
+
+  DebugLoc DL = BRCOND.getDebugLoc();
+
+  SDNode *Intr = BRCOND.getOperand(1).getNode();
+  SDValue Target = BRCOND.getOperand(2);
+  SDNode *BR = 0;
+
+  if (Intr->getOpcode() == ISD::SETCC) {
+    // As long as we negate the condition everything is fine
+    SDNode *SetCC = Intr;
+    assert(SetCC->getConstantOperandVal(1) == 1);
+    assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
+           ISD::SETNE);
+    Intr = SetCC->getOperand(0).getNode();
+
+  } else {
+    // Get the target from BR if we don't negate the condition
+    BR = findUser(BRCOND, ISD::BR);
+    Target = BR->getOperand(1);
+  }
+
+  assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+
+  // Build the result and
+  SmallVector<EVT, 4> Res;
+  for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
+    Res.push_back(Intr->getValueType(i));
+
+  // operands of the new intrinsic call
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(BRCOND.getOperand(0));
+  for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
+    Ops.push_back(Intr->getOperand(i));
+  Ops.push_back(Target);
+
+  // build the new intrinsic call
+  SDNode *Result = DAG.getNode(
+    Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
+    DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
+
+  if (BR) {
+    // Give the branch instruction our target
+    SDValue Ops[] = {
+      BR->getOperand(0),
+      BRCOND.getOperand(2)
+    };
+    DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+  }
+
+  SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
+
+  // Copy the intrinsic results to registers
+  for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
+    SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
+    if (!CopyToReg)
+      continue;
+
+    Chain = DAG.getCopyToReg(
+      Chain, DL,
+      CopyToReg->getOperand(1),
+      SDValue(Result, i - 1),
+      SDValue());
+
+    DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
+  }
+
+  // Remove the old intrinsic from the chain
+  DAG.ReplaceAllUsesOfValueWith(
+    SDValue(Intr, Intr->getNumValues() - 1),
+    Intr->getOperand(0));
+
+  return Chain;
+}
+
+SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue True = Op.getOperand(2);
+  SDValue False = Op.getOperand(3);
+  SDValue CC = Op.getOperand(4);
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Possible Min/Max pattern
+  SDValue MinMax = LowerMinMax(Op, DAG);
+  if (MinMax.getNode()) {
+    return MinMax;
+  }
+
+  SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
+  return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc DL = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  switch (N->getOpcode()) {
+    default: break;
+    case ISD::SELECT_CC: {
+      N->dump();
+      ConstantSDNode *True, *False;
+      // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
+      if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+          && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+          && True->isAllOnesValue()
+          && False->isNullValue()
+          && VT == MVT::i1) {
+        return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
+                           N->getOperand(1), N->getOperand(4));
+
+      }
+      break;
+    }
+    case ISD::SETCC: {
+      SDValue Arg0 = N->getOperand(0);
+      SDValue Arg1 = N->getOperand(1);
+      SDValue CC = N->getOperand(2);
+      ConstantSDNode * C = NULL;
+      ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
+
+      // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
+      if (VT == MVT::i1
+          && Arg0.getOpcode() == ISD::SIGN_EXTEND
+          && Arg0.getOperand(0).getValueType() == MVT::i1
+          && (C = dyn_cast<ConstantSDNode>(Arg1))
+          && C->isNullValue()
+          && CCOp == ISD::SETNE) {
+        return SimplifySetCC(VT, Arg0.getOperand(0),
+                             DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
+      }
+      break;
+    }
+  }
+  return SDValue();
+}
+
+/// \brief Test if RegClass is one of the VSrc classes 
+static bool isVSrc(unsigned RegClass) {
+  return AMDGPU::VSrc_32RegClassID == RegClass ||
+         AMDGPU::VSrc_64RegClassID == RegClass;
+}
+
+/// \brief Test if RegClass is one of the SSrc classes 
+static bool isSSrc(unsigned RegClass) {
+  return AMDGPU::SSrc_32RegClassID == RegClass ||
+         AMDGPU::SSrc_64RegClassID == RegClass;
+}
+
+/// \brief Analyze the possible immediate value Op
+///
+/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
+/// and the immediate value if it's a literal immediate
+int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
+
+  union {
+    int32_t I;
+    float F;
+  } Imm;
+
+  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+    Imm.I = Node->getSExtValue();
+  else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+    Imm.F = Node->getValueAPF().convertToFloat();
+  else
+    return -1; // It isn't an immediate
+
+  if ((Imm.I >= -16 && Imm.I <= 64) ||
+      Imm.F == 0.5f || Imm.F == -0.5f ||
+      Imm.F == 1.0f || Imm.F == -1.0f ||
+      Imm.F == 2.0f || Imm.F == -2.0f ||
+      Imm.F == 4.0f || Imm.F == -4.0f)
+    return 0; // It's an inline immediate
+
+  return Imm.I; // It's a literal immediate
+}
+
+/// \brief Try to fold an immediate directly into an instruction
+bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
+                               bool &ScalarSlotUsed) const {
+
+  MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
+  if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+    return false;
+
+  const SDValue &Op = Mov->getOperand(0);
+  int32_t Value = analyzeImmediate(Op.getNode());
+  if (Value == -1) {
+    // Not an immediate at all
+    return false;
+
+  } else if (Value == 0) {
+    // Inline immediates can always be fold
+    Operand = Op;
+    return true;
+
+  } else if (Value == Immediate) {
+    // Already fold literal immediate
+    Operand = Op;
+    return true;
+
+  } else if (!ScalarSlotUsed && !Immediate) {
+    // Fold this literal immediate
+    ScalarSlotUsed = true;
+    Immediate = Value;
+    Operand = Op;
+    return true;
+
+  }
+
+  return false;
+}
+
+/// \brief Does "Op" fit into register class "RegClass" ?
+bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
+                                    unsigned RegClass) const {
+
+  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 
+  SDNode *Node = Op.getNode();
+
+  const TargetRegisterClass *OpClass;
+  if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
+    const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
+    int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
+    if (OpClassID == -1)
+      OpClass = getRegClassFor(Op.getSimpleValueType());
+    else
+      OpClass = TRI->getRegClass(OpClassID);
+
+  } else if (Node->getOpcode() == ISD::CopyFromReg) {
+    RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
+    OpClass = MRI.getRegClass(Reg->getReg());
+
+  } else
+    return false;
+
+  return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
+}
+
+/// \brief Make sure that we don't exeed the number of allowed scalars
+void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+                                       unsigned RegClass,
+                                       bool &ScalarSlotUsed) const {
+
+  // First map the operands register class to a destination class
+  if (RegClass == AMDGPU::VSrc_32RegClassID)
+    RegClass = AMDGPU::VReg_32RegClassID;
+  else if (RegClass == AMDGPU::VSrc_64RegClassID)
+    RegClass = AMDGPU::VReg_64RegClassID;
+  else
+    return;
+
+  // Nothing todo if they fit naturaly
+  if (fitsRegClass(DAG, Operand, RegClass))
+    return;
+
+  // If the scalar slot isn't used yet use it now
+  if (!ScalarSlotUsed) {
+    ScalarSlotUsed = true;
+    return;
+  }
+
+  // This is a conservative aproach, it is possible that we can't determine
+  // the correct register class and copy too often, but better save than sorry.
+  SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
+  SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(),
+                                    Operand.getValueType(), Operand, RC);
+  Operand = SDValue(Node, 0);
+}
+
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+                                          SelectionDAG &DAG) const {
+
+  // Original encoding (either e32 or e64)
+  int Opcode = Node->getMachineOpcode();
+  const MCInstrDesc *Desc = &TII->get(Opcode);
+
+  unsigned NumDefs = Desc->getNumDefs();
+  unsigned NumOps = Desc->getNumOperands();
+
+  // Commuted opcode if available
+  int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
+  const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+
+  assert(!DescRev || DescRev->getNumDefs() == NumDefs);
+  assert(!DescRev || DescRev->getNumOperands() == NumOps);
+
+  // e64 version if available, -1 otherwise
+  int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
+  const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+
+  assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
+  assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
+
+  int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
+  bool HaveVSrc = false, HaveSSrc = false;
+
+  // First figure out what we alread have in this instruction
+  for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+       i != e && Op < NumOps; ++i, ++Op) {
+
+    unsigned RegClass = Desc->OpInfo[Op].RegClass;
+    if (isVSrc(RegClass))
+      HaveVSrc = true;
+    else if (isSSrc(RegClass))
+      HaveSSrc = true;
+    else
+      continue;
+
+    int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
+    if (Imm != -1 && Imm != 0) {
+      // Literal immediate
+      Immediate = Imm;
+    }
+  }
+
+  // If we neither have VSrc nor SSrc it makes no sense to continue
+  if (!HaveVSrc && !HaveSSrc)
+    return Node;
+
+  // No scalar allowed when we have both VSrc and SSrc
+  bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
+
+  // Second go over the operands and try to fold them
+  std::vector<SDValue> Ops;
+  bool Promote2e64 = false;
+  for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+       i != e && Op < NumOps; ++i, ++Op) {
+
+    const SDValue &Operand = Node->getOperand(i);
+    Ops.push_back(Operand);
+
+    // Already folded immediate ?
+    if (isa<ConstantSDNode>(Operand.getNode()) ||
+        isa<ConstantFPSDNode>(Operand.getNode()))
+      continue;
+
+    // Is this a VSrc or SSrc operand ?
+    unsigned RegClass = Desc->OpInfo[Op].RegClass;
+    if (isVSrc(RegClass) || isSSrc(RegClass)) {
+      // Try to fold the immediates
+      if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
+        // Folding didn't worked, make sure we don't hit the SReg limit
+        ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+      }
+      continue;
+    }
+
+    if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
+
+      unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
+      assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
+
+      // Test if it makes sense to swap operands
+      if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
+          (!fitsRegClass(DAG, Ops[1], RegClass) &&
+           fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+        // Swap commutable operands
+        SDValue Tmp = Ops[1];
+        Ops[1] = Ops[0];
+        Ops[0] = Tmp;
+
+        Desc = DescRev;
+        DescRev = 0;
+        continue;
+      }
+    }
+
+    if (DescE64 && !Immediate) {
+
+      // Test if it makes sense to switch to e64 encoding
+      unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
+      if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
+        continue;
+
+      int32_t TmpImm = -1;
+      if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
+          (!fitsRegClass(DAG, Ops[i], RegClass) &&
+           fitsRegClass(DAG, Ops[1], OtherRegClass))) {
+
+        // Switch to e64 encoding
+        Immediate = -1;
+        Promote2e64 = true;
+        Desc = DescE64;
+        DescE64 = 0;
+      }
+    }
+  }
+
+  if (Promote2e64) {
+    // Add the modifier flags while promoting
+    for (unsigned i = 0; i < 4; ++i)
+      Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
+  }
+
+  // Add optional chain and glue
+  for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
+    Ops.push_back(Node->getOperand(i));
+
+  // Create a complete new instruction
+  return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
+                            Node->getVTList(), Ops.data(), Ops.size());
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
new file mode 100644
index 000000000000..5ad2f40f0f3a
--- /dev/null
+++ b/lib/Target/R600/SIISelLowering.h
@@ -0,0 +1,58 @@
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI DAG Lowering interface definition
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SIISELLOWERING_H
+#define SIISELLOWERING_H
+
+#include "AMDGPUISelLowering.h"
+#include "SIInstrInfo.h"
+
+namespace llvm {
+
+class SITargetLowering : public AMDGPUTargetLowering {
+  const SIInstrInfo * TII;
+  const TargetRegisterInfo * TRI;
+
+  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+
+  bool foldImm(SDValue &Operand, int32_t &Immediate,
+               bool &ScalarSlotUsed) const;
+  bool fitsRegClass(SelectionDAG &DAG, SDValue &Op, unsigned RegClass) const;
+  void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, 
+                       unsigned RegClass, bool &ScalarSlotUsed) const;
+
+public:
+  SITargetLowering(TargetMachine &tm);
+
+  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+                               bool isVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc DL, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const;
+
+  virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+                                              MachineBasicBlock * BB) const;
+  virtual EVT getSetCCResultType(EVT VT) const;
+  virtual MVT getScalarShiftAmountTy(EVT VT) const;
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+
+  int32_t analyzeImmediate(const SDNode *N) const;
+};
+
+} // End namespace llvm
+
+#endif //SIISELLOWERING_H
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
new file mode 100644
index 000000000000..98bd3dbb6646
--- /dev/null
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -0,0 +1,358 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief One variable for each of the hardware counters
+typedef union {
+  struct {
+    unsigned VM;
+    unsigned EXP;
+    unsigned LGKM;
+  } Named;
+  unsigned Array[3];
+
+} Counters;
+
+typedef Counters RegCounters[512];
+typedef std::pair<unsigned, unsigned> RegInterval;
+
+class SIInsertWaits : public MachineFunctionPass {
+
+private:
+  static char ID;
+  const SIInstrInfo *TII;
+  const SIRegisterInfo &TRI;
+  const MachineRegisterInfo *MRI;
+
+  /// \brief Constant hardware limits
+  static const Counters WaitCounts;
+
+  /// \brief Constant zero value
+  static const Counters ZeroCounts;
+
+  /// \brief Counter values we have already waited on.
+  Counters WaitedOn;
+
+  /// \brief Counter values for last instruction issued.
+  Counters LastIssued;
+
+  /// \brief Registers used by async instructions.
+  RegCounters UsedRegs;
+
+  /// \brief Registers defined by async instructions.
+  RegCounters DefinedRegs;
+
+  /// \brief Different export instruction types seen since last wait.
+  unsigned ExpInstrTypesSeen;
+
+  /// \brief Get increment/decrement amount for this instruction.
+  Counters getHwCounts(MachineInstr &MI);
+
+  /// \brief Is operand relevant for async execution?
+  bool isOpRelevant(MachineOperand &Op);
+
+  /// \brief Get register interval an operand affects.
+  RegInterval getRegInterval(MachineOperand &Op);
+
+  /// \brief Handle instructions async components
+  void pushInstruction(MachineInstr &MI);
+
+  /// \brief Insert the actual wait instruction
+  bool insertWait(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  const Counters &Counts);
+
+  /// \brief Do we need def2def checks?
+  bool unorderedDefines(MachineInstr &MI);
+
+  /// \brief Resolve all operand dependencies to counter requirements
+  Counters handleOperands(MachineInstr &MI);
+
+public:
+  SIInsertWaits(TargetMachine &tm) :
+    MachineFunctionPass(ID),
+    TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
+    TRI(TII->getRegisterInfo()) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const {
+    return "SI insert wait  instructions";
+  }
+
+};
+
+} // End anonymous namespace
+
+char SIInsertWaits::ID = 0;
+
+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+
+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
+  return new SIInsertWaits(tm);
+}
+
+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
+
+  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+  Counters Result;
+
+  Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
+
+  // Only consider stores or EXP for EXP_CNT
+  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
+      (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+
+  // LGKM may uses larger values
+  if (TSFlags & SIInstrFlags::LGKM_CNT) {
+
+    MachineOperand &Op = MI.getOperand(0);
+    assert(Op.isReg() && "First LGKM operand must be a register!");
+
+    unsigned Reg = Op.getReg();
+    unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+    Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+  } else {
+    Result.Named.LGKM = 0;
+  }
+
+  return Result;
+}
+
+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
+
+  // Constants are always irrelevant
+  if (!Op.isReg())
+    return false;
+
+  // Defines are always relevant
+  if (Op.isDef())
+    return true;
+
+  // For exports all registers are relevant
+  MachineInstr &MI = *Op.getParent();
+  if (MI.getOpcode() == AMDGPU::EXP)
+    return true;
+
+  // For stores the stored value is also relevant
+  if (!MI.getDesc().mayStore())
+    return false;
+
+  for (MachineInstr::mop_iterator I = MI.operands_begin(),
+       E = MI.operands_end(); I != E; ++I) {
+
+    if (I->isReg() && I->isUse())
+      return Op.isIdenticalTo(*I);
+  }
+
+  return false;
+}
+
+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
+
+  if (!Op.isReg())
+    return std::make_pair(0, 0);
+
+  unsigned Reg = Op.getReg();
+  unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+
+  assert(Size >= 4);
+
+  RegInterval Result;
+  Result.first = TRI.getEncodingValue(Reg);
+  Result.second = Result.first + Size / 4;
+
+  return Result;
+}
+
+void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+
+  // Get the hardware counter increments and sum them up
+  Counters Increment = getHwCounts(MI);
+  unsigned Sum = 0;
+
+  for (unsigned i = 0; i < 3; ++i) {
+    LastIssued.Array[i] += Increment.Array[i];
+    Sum += Increment.Array[i];
+  }
+
+  // If we don't increase anything then that's it
+  if (Sum == 0)
+    return;
+
+  // Remember which export instructions we have seen
+  if (Increment.Named.EXP) {
+    ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+    MachineOperand &Op = MI.getOperand(i);
+    if (!isOpRelevant(Op))
+      continue;
+
+    RegInterval Interval = getRegInterval(Op);
+    for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+      // Remember which registers we define
+      if (Op.isDef())
+        DefinedRegs[j] = LastIssued;
+
+      // and which one we are using
+      if (Op.isUse())
+        UsedRegs[j] = LastIssued;
+    }
+  }
+}
+
+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I,
+                               const Counters &Required) {
+
+  // End of program? No need to wait on anything
+  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+    return false;
+
+  // Figure out if the async instructions execute in order
+  bool Ordered[3];
+
+  // VM_CNT is always ordered
+  Ordered[0] = true;
+
+  // EXP_CNT is unordered if we have both EXP & VM-writes
+  Ordered[1] = ExpInstrTypesSeen == 3;
+
+  // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
+  Ordered[2] = false;
+
+  // The values we are going to put into the S_WAITCNT instruction
+  Counters Counts = WaitCounts;
+
+  // Do we really need to wait?
+  bool NeedWait = false;
+
+  for (unsigned i = 0; i < 3; ++i) {
+
+    if (Required.Array[i] <= WaitedOn.Array[i])
+      continue;
+
+    NeedWait = true;
+    
+    if (Ordered[i]) {
+      unsigned Value = LastIssued.Array[i] - Required.Array[i];
+
+      // adjust the value to the real hardware posibilities
+      Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+
+    } else
+      Counts.Array[i] = 0;
+
+    // Remember on what we have waited on
+    WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
+  }
+
+  if (!NeedWait)
+    return false;
+
+  // Reset EXP_CNT instruction types
+  if (Counts.Named.EXP == 0)
+    ExpInstrTypesSeen = 0;
+
+  // Build the wait instruction
+  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+          .addImm((Counts.Named.VM & 0xF) |
+                  ((Counts.Named.EXP & 0x7) << 4) |
+                  ((Counts.Named.LGKM & 0x7) << 8));
+
+  return true;
+}
+
+/// \brief helper function for handleOperands
+static void increaseCounters(Counters &Dst, const Counters &Src) {
+
+  for (unsigned i = 0; i < 3; ++i)
+    Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
+}
+
+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+  Counters Result = ZeroCounts;
+
+  // For each register affected by this
+  // instruction increase the result sequence
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+    MachineOperand &Op = MI.getOperand(i);
+    RegInterval Interval = getRegInterval(Op);
+    for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+      if (Op.isDef()) {
+        increaseCounters(Result, UsedRegs[j]);
+        increaseCounters(Result, DefinedRegs[j]);
+      }
+
+      if (Op.isUse())
+        increaseCounters(Result, DefinedRegs[j]);
+    }
+  }
+
+  return Result;
+}
+
+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
+
+  bool Changes = false;
+
+  MRI = &MF.getRegInfo();
+
+  WaitedOn = ZeroCounts;
+  LastIssued = ZeroCounts;
+
+  memset(&UsedRegs, 0, sizeof(UsedRegs));
+  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+
+      Changes |= insertWait(MBB, I, handleOperands(*I));
+      pushInstruction(*I);
+    }
+
+    // Wait for everything at the end of the MBB
+    Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+  }
+
+  return Changes;
+}
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
new file mode 100644
index 000000000000..3891ddb2dbe2
--- /dev/null
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -0,0 +1,426 @@
+//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+    AMDGPUInst<outs, ins, asm, pattern> {
+
+  field bits<1> VM_CNT = 0;
+  field bits<1> EXP_CNT = 0;
+  field bits<1> LGKM_CNT = 0;
+
+  let TSFlags{0} = VM_CNT;
+  let TSFlags{1} = EXP_CNT;
+  let TSFlags{2} = LGKM_CNT;
+}
+
+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+
+  field bits<32> Inst;
+  let Size = 4;
+}
+
+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+
+  field bits<64> Inst;
+  let Size = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar operations
+//===----------------------------------------------------------------------===//
+
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc32<outs, ins, asm, pattern> {
+
+  bits<7> SDST;
+  bits<8> SSRC0;
+
+  let Inst{7-0} = SSRC0;
+  let Inst{15-8} = op;
+  let Inst{22-16} = SDST;
+  let Inst{31-23} = 0x17d; //encoding;
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc32 <outs, ins, asm, pattern> {
+  
+  bits<7> SDST;
+  bits<8> SSRC0;
+  bits<8> SSRC1;
+
+  let Inst{7-0} = SSRC0;
+  let Inst{15-8} = SSRC1;
+  let Inst{22-16} = SDST;
+  let Inst{29-23} = op;
+  let Inst{31-30} = 0x2; // encoding
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  Enc32<outs, ins, asm, pattern> {
+
+  bits<8> SSRC0;
+  bits<8> SSRC1;
+
+  let Inst{7-0} = SSRC0;
+  let Inst{15-8} = SSRC1;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17e;
+
+  let DisableEncoding = "$dst";
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+   Enc32 <outs, ins , asm, pattern> {
+
+  bits <7> SDST;
+  bits <16> SIMM16;
+  
+  let Inst{15-0} = SIMM16;
+  let Inst{22-16} = SDST;
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb; //encoding
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
+  (outs),
+  ins,
+  asm,
+  pattern > {
+
+  bits <16> SIMM16;
+
+  let Inst{15-0} = SIMM16;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17f; // encoding
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
+            list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
+
+  bits<7> SDST;
+  bits<7> SBASE;
+  bits<8> OFFSET;
+  
+  let Inst{7-0} = OFFSET;
+  let Inst{8} = imm;
+  let Inst{14-9} = SBASE{6-1};
+  let Inst{21-15} = SDST;
+  let Inst{26-22} = op;
+  let Inst{31-27} = 0x18; //encoding
+
+  let LGKM_CNT = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU operations
+//===----------------------------------------------------------------------===//
+    
+let Uses = [EXEC] in {
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc32 <outs, ins, asm, pattern> {
+
+  bits<8> VDST;
+  bits<9> SRC0;
+  
+  let Inst{8-0} = SRC0;
+  let Inst{16-9} = op;
+  let Inst{24-17} = VDST;
+  let Inst{31-25} = 0x3f; //encoding
+  
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc32 <outs, ins, asm, pattern> {
+
+  bits<8> VDST;
+  bits<9> SRC0;
+  bits<8> VSRC1;
+  
+  let Inst{8-0} = SRC0;
+  let Inst{16-9} = VSRC1;
+  let Inst{24-17} = VDST;
+  let Inst{30-25} = op;
+  let Inst{31} = 0x0; //encoding
+  
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64 <outs, ins, asm, pattern> {
+
+  bits<8> VDST;
+  bits<9> SRC0;
+  bits<9> SRC1;
+  bits<9> SRC2;
+  bits<3> ABS; 
+  bits<1> CLAMP;
+  bits<2> OMOD;
+  bits<3> NEG;
+
+  let Inst{7-0} = VDST;
+  let Inst{10-8} = ABS;
+  let Inst{11} = CLAMP;
+  let Inst{25-17} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = SRC0;
+  let Inst{49-41} = SRC1;
+  let Inst{58-50} = SRC2;
+  let Inst{60-59} = OMOD;
+  let Inst{63-61} = NEG;
+  
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64 <outs, ins, asm, pattern> {
+
+  bits<8> VDST;
+  bits<9> SRC0;
+  bits<9> SRC1;
+  bits<9> SRC2;
+  bits<7> SDST;
+  bits<2> OMOD;
+  bits<3> NEG;
+
+  let Inst{7-0} = VDST;
+  let Inst{14-8} = SDST;
+  let Inst{25-17} = op;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = SRC0;
+  let Inst{49-41} = SRC1;
+  let Inst{58-50} = SRC2;
+  let Inst{60-59} = OMOD;
+  let Inst{63-61} = NEG;
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+    Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+
+  bits<9> SRC0;
+  bits<8> VSRC1;
+
+  let Inst{8-0} = SRC0;
+  let Inst{16-9} = VSRC1;
+  let Inst{24-17} = op;
+  let Inst{31-25} = 0x3e;
+ 
+  let DisableEncoding = "$dst";
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+}
+
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc32 <outs, ins, asm, pattern> {
+
+  bits<8> VDST;
+  bits<8> VSRC;
+  bits<2> ATTRCHAN;
+  bits<6> ATTR;
+
+  let Inst{7-0} = VSRC;
+  let Inst{9-8} = ATTRCHAN;
+  let Inst{15-10} = ATTR;
+  let Inst{17-16} = op;
+  let Inst{25-18} = VDST;
+  let Inst{31-26} = 0x32; // encoding
+
+  let neverHasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 0;
+}
+
+} // End Uses = [EXEC]
+
+//===----------------------------------------------------------------------===//
+// Vector I/O operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64<outs, ins, asm, pattern> {
+
+  bits<8> VDATA;
+  bits<12> OFFSET;
+  bits<1> OFFEN;
+  bits<1> IDXEN;
+  bits<1> GLC;
+  bits<1> ADDR64;
+  bits<1> LDS;
+  bits<8> VADDR;
+  bits<7> SRSRC;
+  bits<1> SLC;
+  bits<1> TFE;
+  bits<8> SOFFSET;
+
+  let Inst{11-0} = OFFSET;
+  let Inst{12} = OFFEN;
+  let Inst{13} = IDXEN;
+  let Inst{14} = GLC;
+  let Inst{15} = ADDR64;
+  let Inst{16} = LDS;
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x38; //encoding
+  let Inst{39-32} = VADDR;
+  let Inst{47-40} = VDATA;
+  let Inst{52-48} = SRSRC{6-2};
+  let Inst{54} = SLC;
+  let Inst{55} = TFE;
+  let Inst{63-56} = SOFFSET;
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+
+  let neverHasSideEffects = 1;
+}
+
+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64<outs, ins, asm, pattern> {
+
+  bits<8> VDATA;
+  bits<12> OFFSET;
+  bits<1> OFFEN;
+  bits<1> IDXEN;
+  bits<1> GLC;
+  bits<1> ADDR64;
+  bits<4> DFMT;
+  bits<3> NFMT;
+  bits<8> VADDR;
+  bits<7> SRSRC;
+  bits<1> SLC;
+  bits<1> TFE;
+  bits<8> SOFFSET;
+
+  let Inst{11-0} = OFFSET;
+  let Inst{12} = OFFEN;
+  let Inst{13} = IDXEN;
+  let Inst{14} = GLC;
+  let Inst{15} = ADDR64;
+  let Inst{18-16} = op;
+  let Inst{22-19} = DFMT;
+  let Inst{25-23} = NFMT;
+  let Inst{31-26} = 0x3a; //encoding
+  let Inst{39-32} = VADDR;
+  let Inst{47-40} = VDATA;
+  let Inst{52-48} = SRSRC{6-2};
+  let Inst{54} = SLC;
+  let Inst{55} = TFE;
+  let Inst{63-56} = SOFFSET;
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+
+  let neverHasSideEffects = 1;
+}
+
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+    Enc64 <outs, ins, asm, pattern> {
+
+  bits<8> VDATA;
+  bits<4> DMASK;
+  bits<1> UNORM;
+  bits<1> GLC;
+  bits<1> DA;
+  bits<1> R128;
+  bits<1> TFE;
+  bits<1> LWE;
+  bits<1> SLC;
+  bits<8> VADDR;
+  bits<7> SRSRC;
+  bits<7> SSAMP; 
+
+  let Inst{11-8} = DMASK;
+  let Inst{12} = UNORM;
+  let Inst{13} = GLC;
+  let Inst{14} = DA;
+  let Inst{15} = R128;
+  let Inst{16} = TFE;
+  let Inst{17} = LWE;
+  let Inst{24-18} = op;
+  let Inst{25} = SLC;
+  let Inst{31-26} = 0x3c;
+  let Inst{39-32} = VADDR;
+  let Inst{47-40} = VDATA;
+  let Inst{52-48} = SRSRC{6-2};
+  let Inst{57-53} = SSAMP{6-2};
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+}
+
+def EXP : Enc64<
+  (outs),
+  (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+       VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+  "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+  [] > {
+
+  bits<4> EN;
+  bits<6> TGT;
+  bits<1> COMPR;
+  bits<1> DONE;
+  bits<1> VM;
+  bits<8> VSRC0;
+  bits<8> VSRC1;
+  bits<8> VSRC2;
+  bits<8> VSRC3;
+
+  let Inst{3-0} = EN;
+  let Inst{9-4} = TGT;
+  let Inst{10} = COMPR;
+  let Inst{11} = DONE;
+  let Inst{12} = VM;
+  let Inst{31-26} = 0x3e;
+  let Inst{39-32} = VSRC0;
+  let Inst{47-40} = VSRC1;
+  let Inst{55-48} = VSRC2;
+  let Inst{63-56} = VSRC3;
+
+  let EXP_CNT = 1;
+}
+
+} // End Uses = [EXEC]
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
new file mode 100644
index 000000000000..0bfcef562f04
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -0,0 +1,264 @@
+//===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Implementation of TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIInstrInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include <stdio.h>
+
+using namespace llvm;
+
+SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
+  : AMDGPUInstrInfo(tm),
+    RI(tm, *this)
+    { }
+
+const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+void
+SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MI, DebugLoc DL,
+                         unsigned DestReg, unsigned SrcReg,
+                         bool KillSrc) const {
+
+  // If we are trying to copy to or from SCC, there is a bug somewhere else in
+  // the backend.  While it may be theoretically possible to do this, it should
+  // never be necessary.
+  assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
+
+  const int16_t Sub0_15[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+  };
+
+  const int16_t Sub0_7[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+  };
+
+  const int16_t Sub0_3[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+  };
+
+  const int16_t Sub0_1[] = {
+    AMDGPU::sub0, AMDGPU::sub1, 0
+  };
+
+  unsigned Opcode;
+  const int16_t *SubIndices;
+
+  if (AMDGPU::M0 == DestReg) {
+    // Check if M0 isn't already set to this value
+    for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
+      I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
+
+      if (!I->definesRegister(AMDGPU::M0))
+        continue;
+
+      unsigned Opc = I->getOpcode();
+      if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
+        break;
+
+      if (!I->readsRegister(SrcReg))
+        break;
+
+      // The copy isn't necessary
+      return;
+    }
+  }
+
+  if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+
+  } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+
+  } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
+    Opcode = AMDGPU::S_MOV_B32;
+    SubIndices = Sub0_3;
+
+  } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
+    Opcode = AMDGPU::S_MOV_B32;
+    SubIndices = Sub0_7;
+
+  } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
+    assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
+    Opcode = AMDGPU::S_MOV_B32;
+    SubIndices = Sub0_15;
+
+  } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
+	   AMDGPU::SReg_32RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+            .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+
+  } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+	   AMDGPU::SReg_64RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_1;
+
+  } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
+	   AMDGPU::SReg_128RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_3;
+
+  } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
+	   AMDGPU::SReg_256RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_7;
+
+  } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
+	   AMDGPU::SReg_512RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_15;
+
+  } else {
+    llvm_unreachable("Can't copy register!");
+  }
+
+  while (unsigned SubIdx = *SubIndices++) {
+    MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
+      get(Opcode), RI.getSubReg(DestReg, SubIdx));
+
+    Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+
+    if (*SubIndices)
+      Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
+  }
+}
+
+unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+
+  int NewOpc;
+
+  // Try to map original to commuted opcode
+  if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+    return NewOpc;
+
+  // Try to map commuted to original opcode
+  if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+    return NewOpc;
+
+  return Opcode;
+}
+
+MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
+                                              bool NewMI) const {
+
+  if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
+      !MI->getOperand(2).isReg())
+    return 0;
+
+  MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+  if (MI)
+    MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+
+  return MI;
+}
+
+MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+                                           int64_t Imm) const {
+  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
+  MachineInstrBuilder MIB(*MF, MI);
+  MIB.addReg(DstReg, RegState::Define);
+  MIB.addImm(Imm);
+
+  return MI;
+
+}
+
+bool SIInstrInfo::isMov(unsigned Opcode) const {
+  switch(Opcode) {
+  default: return false;
+  case AMDGPU::S_MOV_B32:
+  case AMDGPU::S_MOV_B64:
+  case AMDGPU::V_MOV_B32_e32:
+  case AMDGPU::V_MOV_B32_e64:
+    return true;
+  }
+}
+
+bool
+SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+  return RC != &AMDGPU::EXECRegRegClass;
+}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+                                                 unsigned Channel) const {
+  assert(Channel == 0);
+  return RegIndex;
+}
+
+
+int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+  llvm_unreachable("Unimplemented");
+}
+
+int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+  llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
+                                                     unsigned SourceReg) const {
+  llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+  llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned ValueReg,
+                                   unsigned Address, unsigned OffsetReg) const {
+  llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectRead(
+                                   MachineBasicBlock *MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned ValueReg,
+                                   unsigned Address, unsigned OffsetReg) const {
+  llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
+  llvm_unreachable("Unimplemented");
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
new file mode 100644
index 000000000000..d4e60e508634
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -0,0 +1,97 @@
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIINSTRINFO_H
+#define SIINSTRINFO_H
+
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+namespace llvm {
+
+class SIInstrInfo : public AMDGPUInstrInfo {
+private:
+  const SIRegisterInfo RI;
+
+public:
+  explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+
+  const SIRegisterInfo &getRegisterInfo() const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  unsigned commuteOpcode(unsigned Opcode) const;
+
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI,
+                                           bool NewMI=false) const;
+
+  virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
+                                        int64_t Imm) const;
+
+  virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+  virtual bool isMov(unsigned Opcode) const;
+
+  virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+  virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+  virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+  virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+                                            unsigned Channel) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+                                                      unsigned SourceReg) const;
+
+  virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+  virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+                                                 MachineBasicBlock::iterator I,
+                                                 unsigned ValueReg,
+                                                 unsigned Address,
+                                                 unsigned OffsetReg) const;
+
+  virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+                                                MachineBasicBlock::iterator I,
+                                                unsigned ValueReg,
+                                                unsigned Address,
+                                                unsigned OffsetReg) const;
+
+  virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+  };
+
+namespace AMDGPU {
+
+  int getVOPe64(uint16_t Opcode);
+  int getCommuteRev(uint16_t Opcode);
+  int getCommuteOrig(uint16_t Opcode);
+
+} // End namespace AMDGPU
+
+} // End namespace llvm
+
+namespace SIInstrFlags {
+  enum Flags {
+    // First 4 bits are the instruction encoding
+    VM_CNT = 1 << 0,
+    EXP_CNT = 1 << 1,
+    LGKM_CNT = 1 << 2
+  };
+}
+
+#endif //SIINSTRINFO_H
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
new file mode 100644
index 000000000000..617f0b871c25
--- /dev/null
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -0,0 +1,356 @@
+//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI DAG Nodes
+//===----------------------------------------------------------------------===//
+
+// SMRD takes a 64bit memory address and can only add an 32bit offset
+def SIadd64bit32bit : SDNode<"ISD::ADD",
+  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
+>;
+
+// Transformation function, extract the lower 32bit of a 64bit immediate
+def LO32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
+}]>;
+
+// Transformation function, extract the upper 32bit of a 64bit immediate
+def HI32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
+}]>;
+
+def IMM8bitDWORD : ImmLeaf <
+  i32, [{
+    return (Imm & ~0x3FC) == 0;
+  }], SDNodeXForm<imm, [{
+    return CurDAG->getTargetConstant(
+      N->getZExtValue() >> 2, MVT::i32);
+  }]>
+>;
+
+def IMM12bit : ImmLeaf <
+  i16,
+  [{return isUInt<12>(Imm);}]
+>;
+
+class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
+  return ((const SITargetLowering &)TLI).analyzeImmediate(N) == 0;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// SI assembler operands
+//===----------------------------------------------------------------------===//
+
+def SIOperand {
+  int ZERO = 0x80;
+  int VCC = 0x6A;
+}
+
+include "SIInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction multiclass helpers.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Scalar classes
+//===----------------------------------------------------------------------===//
+
+class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+  op, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+  opName#" $dst, $src0", pattern
+>;
+
+class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+  op, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+  opName#" $dst, $src0", pattern
+>;
+
+class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+  op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+  opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+  op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+  opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+  op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+  opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+  op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+  opName#" $dst, $src0, $src1", pattern
+>;
+
+class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+  op, (outs SReg_32:$dst), (ins i16imm:$src0),
+  opName#" $dst, $src0", pattern
+>;
+
+class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+  op, (outs SReg_64:$dst), (ins i16imm:$src0),
+  opName#" $dst, $src0", pattern
+>;
+
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+                        RegisterClass dstClass> {
+  def _IMM : SMRD <
+    op, 1, (outs dstClass:$dst),
+    (ins baseClass:$sbase, i32imm:$offset),
+    asm#" $dst, $sbase, $offset", []
+  >;
+
+  def _SGPR : SMRD <
+    op, 0, (outs dstClass:$dst),
+    (ins baseClass:$sbase, SReg_32:$soff),
+    asm#" $dst, $sbase, $soff", []
+  >;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector ALU classes
+//===----------------------------------------------------------------------===//
+
+class VOP <string opName> {
+  string OpName = opName;
+}
+
+class VOP2_REV <string revOp, bit isOrig> {
+  string RevOp = revOp;
+  bit IsOrig = isOrig;
+}
+
+multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
+                        string opName, list<dag> pattern> {
+
+  def _e32 : VOP1 <
+    op, (outs drc:$dst), (ins src:$src0),
+    opName#"_e32 $dst, $src0", pattern
+  >, VOP <opName>;
+
+  def _e64 : VOP3 <
+    {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+    (outs drc:$dst),
+    (ins src:$src0,
+         i32imm:$abs, i32imm:$clamp,
+         i32imm:$omod, i32imm:$neg),
+    opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
+  >, VOP <opName> {
+    let SRC1 = SIOperand.ZERO;
+    let SRC2 = SIOperand.ZERO;
+  }
+}
+
+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
+  : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
+  : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
+                        string opName, list<dag> pattern, string revOp> {
+  def _e32 : VOP2 <
+    op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
+    opName#"_e32 $dst, $src0, $src1", pattern
+  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+  def _e64 : VOP3 <
+    {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+    (outs vrc:$dst),
+    (ins arc:$src0, arc:$src1,
+         i32imm:$abs, i32imm:$clamp,
+         i32imm:$omod, i32imm:$neg),
+    opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+  >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+    let SRC2 = SIOperand.ZERO;
+  }
+}
+
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
+                    string revOp = opName>
+  : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
+
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
+                    string revOp = opName>
+  : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
+
+multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
+                     string revOp = opName> {
+
+  def _e32 : VOP2 <
+    op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+    opName#"_e32 $dst, $src0, $src1", pattern
+  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+  def _e64 : VOP3b <
+    {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+    (outs VReg_32:$dst),
+    (ins VSrc_32:$src0, VSrc_32:$src1,
+         i32imm:$abs, i32imm:$clamp,
+         i32imm:$omod, i32imm:$neg),
+    opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+  >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+    let SRC2 = SIOperand.ZERO;
+    /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
+       can write it into any SGPR. We currently don't use the carry out,
+       so for now hardcode it to VCC as well */
+    let SDST = SIOperand.VCC;
+  }
+}
+
+multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+                        string opName, ValueType vt, PatLeaf cond> {
+
+  def _e32 : VOPC <
+    op, (ins arc:$src0, vrc:$src1),
+    opName#"_e32 $dst, $src0, $src1", []
+  >, VOP <opName>;
+
+  def _e64 : VOP3 <
+    {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+    (outs SReg_64:$dst),
+    (ins arc:$src0, arc:$src1,
+         InstFlag:$abs, InstFlag:$clamp,
+         InstFlag:$omod, InstFlag:$neg),
+    opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
+    !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
+      [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
+    )
+  >, VOP <opName> {
+    let SRC2 = SIOperand.ZERO;
+  }
+}
+
+multiclass VOPC_32 <bits<8> op, string opName,
+  ValueType vt = untyped, PatLeaf cond = COND_NULL>
+  : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
+
+multiclass VOPC_64 <bits<8> op, string opName,
+  ValueType vt = untyped, PatLeaf cond = COND_NULL>
+  : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+
+class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+  op, (outs VReg_32:$dst),
+  (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+  opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+  op, (outs VReg_64:$dst),
+  (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
+   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+  opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
+
+//===----------------------------------------------------------------------===//
+// Vector I/O classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+  op,
+  (outs),
+  (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+   i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
+   SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+  asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+     #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
+  []> {
+  let mayStore = 1;
+  let mayLoad = 0;
+}
+
+class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
+  op,
+  (outs regClass:$dst),
+  (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+       i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
+       i1imm:$tfe, SSrc_32:$soffset),
+  asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+     #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+}
+
+class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+  op,
+  (outs regClass:$dst),
+  (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+       i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
+       i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+  asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+     #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+}
+
+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+  op,
+  (outs VReg_128:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+       SReg_256:$srsrc, SReg_128:$ssamp),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+  []> {
+  let mayLoad = 1;
+  let mayStore = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector instruction mappings
+//===----------------------------------------------------------------------===//
+
+// Maps an opcode in e32 form to its e64 equivalent
+def getVOPe64 : InstrMapping {
+  let FilterClass = "VOP";
+  let RowFields = ["OpName"];
+  let ColFields = ["Size"];
+  let KeyCol = ["4"];
+  let ValueCols = [["8"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
+// Maps an commuted opcode to its original version
+def getCommuteOrig : InstrMapping {
+  let FilterClass = "VOP2_REV";
+  let RowFields = ["RevOp"];
+  let ColFields = ["IsOrig"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
new file mode 100644
index 000000000000..4f734f91245a
--- /dev/null
+++ b/lib/Target/R600/SIInstructions.td
@@ -0,0 +1,1607 @@
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file was originally auto-generated from a GPU register header file and
+// all the instruction definitions were originally commented out.  Instructions
+// that are not yet supported remain commented out.
+//===----------------------------------------------------------------------===//
+
+class InterpSlots {
+int P0 = 2;
+int P10 = 0;
+int P20 = 1;
+}
+def INTERP : InterpSlots;
+
+def InterpSlot : Operand<i32> {
+  let PrintMethod = "printInterpSlot";
+}
+
+def isSI : Predicate<"Subtarget.device()"
+                            "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
+
+let Predicates = [isSI] in {
+
+let neverHasSideEffects = 1 in {
+
+let isMoveImm = 1 in {
+def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
+def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
+def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
+def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+} // End isMoveImm = 1
+
+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
+def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
+} // End neverHasSideEffects = 1
+
+////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
+////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
+////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
+////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
+////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
+////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
+//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
+def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
+//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
+//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
+//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
+////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
+////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
+////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>;
+def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>;
+def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>;
+def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>;
+def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in {
+
+def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>;
+
+} // End hasSideEffects = 1
+
+def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>;
+def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>;
+def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>;
+def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>;
+def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>;
+def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
+//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>;
+def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
+def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
+def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
+def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
+
+/*
+This instruction is disabled for now until we can figure out how to teach
+the instruction selector to correctly use the  S_CMP* vs V_CMP*
+instructions.
+
+When this instruction is enabled the code generator sometimes produces this
+invalid sequence:
+
+SCC = S_CMPK_EQ_I32 SGPR0, imm
+VCC = COPY SCC
+VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+
+def S_CMPK_EQ_I32 : SOPK <
+  0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
+  "S_CMPK_EQ_I32",
+  [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+>;
+*/
+
+let isCompare = 1 in {
+def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
+def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
+def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
+def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>;
+def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>;
+def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>;
+def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>;
+def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
+def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
+def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
+def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+} // End isCompare = 1
+
+def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
+def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
+def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
+def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
+//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
+//def EXP : EXP_ <0x00000000, "EXP", []>;
+
+let isCompare = 1 in {
+
+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">;
+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>;
+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">;
+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">;
+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">;
+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>;
+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>;
+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>;
+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">;
+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">;
+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">;
+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+} // End isCompare = 1
+
+//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
+//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
+//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
+def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>;
+//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
+//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
+//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
+//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>;
+//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
+//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
+//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
+//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
+//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
+//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
+//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
+//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
+//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>;
+//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>;
+//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>;
+//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>;
+//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>;
+//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>;
+//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>;
+//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>;
+//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>;
+//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>;
+//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>;
+//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>;
+//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>;
+//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>;
+//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>;
+//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>;
+//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>;
+//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>;
+//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>;
+//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>;
+//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>;
+//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>;
+//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>;
+//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>;
+//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>;
+//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>;
+//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>;
+//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>;
+//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>;
+//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>;
+//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
+//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
+//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
+//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
+def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
+//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
+//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
+//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
+//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+
+let mayLoad = 1 in {
+
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+  0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+  0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+  0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+  0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+  0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
+
+} // mayLoad = 1
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
+//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>;
+//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>;
+//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
+//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
+//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
+//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
+//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>;
+//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>;
+//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>;
+//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>;
+//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>;
+//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>;
+//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>;
+//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>;
+//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>;
+//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
+def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; 
+//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
+def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
+def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
+//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
+def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
+//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
+//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
+def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
+//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
+//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
+//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>;
+//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>;
+//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>;
+//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>;
+//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>;
+//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>;
+//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>;
+//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>;
+//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>;
+//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>;
+//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>;
+//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>;
+//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
+//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
+//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
+//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
+//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
+//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
+//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
+//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
+//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
+//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
+//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
+//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
+//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
+//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
+//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
+//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
+//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
+//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
+//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
+//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
+//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
+//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
+//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
+//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
+//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
+//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
+//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
+//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
+//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
+//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
+//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>;
+//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>;
+//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>;
+//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>;
+//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+
+
+let neverHasSideEffects = 1, isMoveImm = 1 in {
+defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
+} // End neverHasSideEffects = 1, isMoveImm = 1
+
+defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
+//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
+//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
+  [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
+>;
+//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
+  [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
+>;
+defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
+////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
+//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
+//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
+//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
+//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
+//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
+//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
+//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
+//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
+//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
+defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
+  [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
+  [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+>;
+defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
+  [(set VReg_32:$dst, (frint VSrc_32:$src0))]
+>;
+defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
+  [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
+>;
+defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
+  [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
+>;
+defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
+  [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+>;
+defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
+defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
+defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
+  [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
+>;
+defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_LEGACY_F32 : VOP1_32 <
+  0x0000002d, "V_RSQ_LEGACY_F32",
+  [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
+defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
+defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>;
+defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
+defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
+defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
+defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>;
+defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>;
+//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>;
+defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>;
+defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>;
+//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>;
+defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>;
+//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>;
+defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
+defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
+defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+
+def V_INTERP_P1_F32 : VINTRP <
+  0x00000000,
+  (outs VReg_32:$dst),
+  (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+  "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
+  []> {
+  let DisableEncoding = "$m0";
+}
+
+def V_INTERP_P2_F32 : VINTRP <
+  0x00000001,
+  (outs VReg_32:$dst),
+  (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+  "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
+  []> {
+
+  let Constraints = "$src0 = $dst";
+  let DisableEncoding = "$src0,$m0";
+
+}
+
+def V_INTERP_MOV_F32 : VINTRP <
+  0x00000002,
+  (outs VReg_32:$dst),
+  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+  "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
+  []> {
+  let DisableEncoding = "$m0";
+}
+
+//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+  [(IL_retflag)]> {
+  let SIMM16 = 0;
+  let isBarrier = 1;
+  let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+  0x00000002, (ins brtarget:$target), "S_BRANCH $target",
+  [(br bb:$target)]> {
+  let isBarrier = 1;
+}
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+  0x00000004, (ins brtarget:$target, SCCReg:$scc),
+  "S_CBRANCH_SCC0 $target", []
+>;
+def S_CBRANCH_SCC1 : SOPP <
+  0x00000005, (ins brtarget:$target, SCCReg:$scc),
+  "S_CBRANCH_SCC1 $target",
+  []
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+  0x00000006, (ins brtarget:$target, VCCReg:$vcc),
+  "S_CBRANCH_VCCZ $target",
+  []
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+  0x00000007, (ins brtarget:$target, VCCReg:$vcc),
+  "S_CBRANCH_VCCNZ $target",
+  []
+>;
+
+let DisableEncoding = "$exec" in {
+def S_CBRANCH_EXECZ : SOPP <
+  0x00000008, (ins brtarget:$target, EXECReg:$exec),
+  "S_CBRANCH_EXECZ $target",
+  []
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+  0x00000009, (ins brtarget:$target, EXECReg:$exec),
+  "S_CBRANCH_EXECNZ $target",
+  []
+>;
+} // End DisableEncoding = "$exec"
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>;
+let hasSideEffects = 1 in {
+def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+  []
+>;
+} // End hasSideEffects
+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+
+def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
+  (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
+  "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
+  []
+>{
+  let DisableEncoding = "$vcc";
+}
+
+def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
+  (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+  "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
+  [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
+   VSrc_32:$src1, VSrc_32:$src0))]
+>;
+
+//f32 pattern for V_CNDMASK_B32_e64
+def : Pat <
+  (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
+  (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
+>;
+
+defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
+defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
+
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
+  [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
+  [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
+} // End isCommutable = 1
+
+defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+
+let isCommutable = 1 in {
+
+defm V_MUL_LEGACY_F32 : VOP2_32 <
+  0x00000007, "V_MUL_LEGACY_F32",
+  [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
+  [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+} // End isCommutable = 1
+
+//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
+//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
+//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
+//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+
+let isCommutable = 1 in {
+
+defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
+  [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
+  [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
+defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+  [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
+
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+  [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
+
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+  [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
+defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
+
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
+  [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
+  [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
+>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
+  [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
+>;
+
+} // End isCommutable = 1
+
+defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
+defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
+defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
+defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
+//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
+//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
+
+let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
+  [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+>;
+
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
+  [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+>;
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
+
+let Uses = [VCC] in { // Carry-out comes from VCC
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
+} // End Uses = [VCC]
+} // End isCommutable = 1, Defs = [VCC]
+
+defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
+////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
+////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
+////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
+ [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
+>;
+////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
+////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+let neverHasSideEffects = 1 in {
+
+def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
+def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
+//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>;
+//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>;
+
+} // End neverHasSideEffects
+def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
+def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
+def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
+def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
+def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
+def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
+def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
+def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
+////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
+////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
+////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>;
+////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>;
+////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>;
+////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>;
+////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>;
+////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>;
+//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
+//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
+//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
+def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
+////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
+def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>;
+def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>;
+def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>;
+def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>;
+def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>;
+def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>;
+def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
+def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
+
+let isCommutable = 1 in {
+
+def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
+def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
+def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+
+} // isCommutable = 1
+
+def : Pat <
+  (mul VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+  (mulhu VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def : Pat <
+  (mulhs VSrc_32:$src0, VReg_32:$src1),
+  (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
+
+def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
+//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
+//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
+def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
+
+def S_CSELECT_B32 : SOP2 <
+  0x0000000a, (outs SReg_32:$dst),
+  (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+  [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
+                                     SReg_32:$src0, SReg_32:$src1))]
+>;
+
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+
+// f32 pattern for S_CSELECT_B32
+def : Pat <
+  (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
+  (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
+>;
+
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
+
+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+  [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
+>;
+
+def : Pat <
+  (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
+  (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
+
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def : Pat <
+  (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
+  (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
+def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
+def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
+def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+def LOAD_CONST : AMDGPUShaderInst <
+  (outs GPRF32:$dst),
+  (ins i32imm:$src),
+  "LOAD_CONST $dst, $src",
+  [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
+// SI Psuedo instructions. These are used by the CFG structurizer pass
+// and should be lowered to ISA instructions prior to codegen.
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
+    Uses = [EXEC], Defs = [EXEC] in {
+
+let isBranch = 1, isTerminator = 1 in {
+
+def SI_IF : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$vcc, brtarget:$target),
+  "SI_IF $dst, $vcc, $target",
+  [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
+>;
+
+def SI_ELSE : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$src, brtarget:$target),
+  "SI_ELSE $dst, $src, $target",
+  [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+
+  let Constraints = "$src = $dst";
+}
+
+def SI_LOOP : InstSI <
+  (outs),
+  (ins SReg_64:$saved, brtarget:$target),
+  "SI_LOOP $saved, $target",
+  [(int_SI_loop SReg_64:$saved, bb:$target)]
+>;
+
+} // end isBranch = 1, isTerminator = 1
+
+def SI_BREAK : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$src),
+  "SI_ELSE $dst, $src",
+  [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+>;
+
+def SI_IF_BREAK : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$vcc, SReg_64:$src),
+  "SI_IF_BREAK $dst, $vcc, $src",
+  [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
+>;
+
+def SI_ELSE_BREAK : InstSI <
+  (outs SReg_64:$dst),
+  (ins SReg_64:$src0, SReg_64:$src1),
+  "SI_ELSE_BREAK $dst, $src0, $src1",
+  [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+>;
+
+def SI_END_CF : InstSI <
+  (outs),
+  (ins SReg_64:$saved),
+  "SI_END_CF $saved",
+  [(int_SI_end_cf SReg_64:$saved)]
+>;
+
+def SI_KILL : InstSI <
+  (outs),
+  (ins VReg_32:$src),
+  "SI_KIL $src",
+  [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
+} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+  // Uses = [EXEC], Defs = [EXEC]
+
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+  (outs VReg_32:$dst, SReg_64:$temp),
+  (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+  "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+  []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+  (outs rc:$dst, SReg_64:$temp),
+  (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+  "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+  []
+> {
+  let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
+} // end IsCodeGenOnly, isPseudo
+
+def : Pat<
+  (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
+  (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+>;
+
+def : Pat <
+  (int_AMDGPU_kilp),
+  (SI_KILL (V_MOV_B32_e32 0xbf800000))
+>;
+
+/* int_SI_vs_load_input */
+def : Pat<
+  (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
+                        VReg_32:$buf_idx_vgpr),
+  (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
+                           VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
+                           0, 0, 0)
+>;
+
+/* int_SI_export */
+def : Pat <
+  (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
+                 VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+  (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
+       VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+>;
+
+
+/* int_SI_sample for simple 1D texture lookup */
+def : Pat <
+  (int_SI_sample imm:$writemask, VReg_32:$addr,
+                 SReg_256:$rsrc, SReg_128:$sampler, imm),
+  (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
+                SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+                    ValueType addr_type> : Pat <
+    (name imm:$writemask, (addr_type addr_class:$addr),
+          SReg_256:$rsrc, SReg_128:$sampler, imm),
+    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+          SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+                        ValueType addr_type> : Pat <
+    (name imm:$writemask, (addr_type addr_class:$addr),
+          SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
+    (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+          SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+                         ValueType addr_type> : Pat <
+    (name imm:$writemask, (addr_type addr_class:$addr),
+          SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
+    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
+          SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowPattern<Intrinsic name, MIMG opcode,
+                          RegisterClass addr_class, ValueType addr_type> : Pat <
+    (name imm:$writemask, (addr_type addr_class:$addr),
+          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
+    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
+          SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
+                               RegisterClass addr_class, ValueType addr_type> : Pat <
+    (name imm:$writemask, (addr_type addr_class:$addr),
+          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
+    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
+          SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+/* int_SI_sample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
+  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+
+  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+
+  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+}
+
+defm : SamplePatterns<VReg_64, v2i32>;
+defm : SamplePatterns<VReg_128, v4i32>;
+defm : SamplePatterns<VReg_256, v8i32>;
+defm : SamplePatterns<VReg_512, v16i32>;
+
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting  **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+  def Extract_Element_v2i32_#Index : Extract_Element <
+    i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v2i32_#Index : Insert_Element <
+    i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v2f32_#Index : Extract_Element <
+    f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v2f32_#Index : Insert_Element <
+    f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+foreach Index = 0-3 in {
+  def Extract_Element_v4i32_#Index : Extract_Element <
+    i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v4i32_#Index : Insert_Element <
+    i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v4f32_#Index : Extract_Element <
+    f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v4f32_#Index : Insert_Element <
+    f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+foreach Index = 0-7 in {
+  def Extract_Element_v8i32_#Index : Extract_Element <
+    i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v8i32_#Index : Insert_Element <
+    i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v8f32_#Index : Extract_Element <
+    f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v8f32_#Index : Insert_Element <
+    f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+foreach Index = 0-15 in {
+  def Extract_Element_v16i32_#Index : Extract_Element <
+    i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v16i32_#Index : Insert_Element <
+    i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Extract_Element_v16f32_#Index : Extract_Element <
+    f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+  def Insert_Element_v16f32_#Index : Insert_Element <
+    f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+}
+
+def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
+def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
+def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
+def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
+
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <i32, f32, VReg_32>;
+
+def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <f32, i32, VReg_32>;
+
+/********** =================== **********/
+/********** Src & Dst modifiers **********/
+/********** =================== **********/
+
+def : Pat <
+  (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+   0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+  (fabs VReg_32:$src),
+  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+   1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+  (fneg VReg_32:$src),
+  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+   0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+>;
+
+/********** ================== **********/
+/********** Immediate Patterns **********/
+/********** ================== **********/
+
+def : Pat <
+  (i32 imm:$imm),
+  (V_MOV_B32_e32 imm:$imm)
+>;
+
+def : Pat <
+  (f32 fpimm:$imm),
+  (V_MOV_B32_e32 fpimm:$imm)
+>;
+
+def : Pat <
+  (i1 imm:$imm),
+  (S_MOV_B64 imm:$imm)
+>;
+
+def : Pat <
+  (i64 InlineImm<i64>:$imm),
+  (S_MOV_B64 InlineImm<i64>:$imm)
+>;
+
+// i64 immediates aren't supported in hardware, split it into two 32bit values
+def : Pat <
+  (i64 imm:$imm),
+  (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+    (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
+    (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
+>;
+
+/********** ===================== **********/
+/********** Interpolation Paterns **********/
+/********** ===================== **********/
+
+def : Pat <
+  (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
+  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
+>;
+
+def : Pat <
+  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
+  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
+                                    imm:$attr_chan, imm:$attr, M0Reg:$params),
+                   (EXTRACT_SUBREG VReg_64:$ij, sub1),
+                   imm:$attr_chan, imm:$attr, M0Reg:$params)
+>;
+
+/********** ================== **********/
+/********** Intrinsic Patterns **********/
+/********** ================== **********/
+
+/* llvm.AMDGPU.pow */
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
+
+def : Pat <
+  (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
+  (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
+>;
+
+def : Pat<
+  (fdiv VSrc_32:$src0, VSrc_32:$src1),
+  (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
+>;
+
+def : Pat <
+  (fcos VSrc_32:$src0),
+  (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+  (fsin VSrc_32:$src0),
+  (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+  (int_AMDGPU_cube VReg_128:$src),
+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
+    (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+                  (EXTRACT_SUBREG VReg_128:$src, sub1),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2),
+                  0, 0, 0, 0), sub0),
+    (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+                  (EXTRACT_SUBREG VReg_128:$src, sub1),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2),
+                  0, 0, 0, 0), sub1),
+    (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+                  (EXTRACT_SUBREG VReg_128:$src, sub1),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2),
+                  0, 0, 0, 0), sub2),
+    (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+                  (EXTRACT_SUBREG VReg_128:$src, sub1),
+                  (EXTRACT_SUBREG VReg_128:$src, sub2),
+                  0, 0, 0, 0), sub3)
+>;
+
+def : Pat <
+  (i32 (sext (i1 SReg_64:$src0))),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
+>;
+
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+  (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+  (int_SI_load_const SReg_128:$sbase, imm:$offset),
+  (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+  (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
+/********** ================== **********/
+/**********   VOP3 Patterns    **********/
+/********** ================== **********/
+
+def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
+           (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+            0, 0, 0, 0)>;
+
+/********** ================== **********/
+/**********   SMRD Patterns    **********/
+/********** ================== **********/
+
+multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+  // 1. Offset as 8bit DWORD immediate
+  def : Pat <
+    (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
+    (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+  >;
+
+  // 2. Offset loaded in an 32bit SGPR
+  def : Pat <
+    (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
+    (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+  >;
+
+  // 3. No offset at all
+  def : Pat <
+    (constant_load SReg_64:$sbase),
+    (vt (Instr_IMM SReg_64:$sbase, 0))
+  >;
+}
+
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+
+/********** ====================== **********/
+/**********   Indirect adressing   **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+                                SI_INDIRECT_DST IndDst> {
+  // 1. Extract with offset
+  def : Pat<
+    (vector_extract (vt rc:$vec),
+      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+    ),
+    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+  >;
+
+  // 2. Extract without offset
+  def : Pat<
+    (vector_extract (vt rc:$vec),
+      (i64 (zext (i32 VReg_32:$idx)))
+    ),
+    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+  >;
+
+  // 3. Insert with offset
+  def : Pat<
+    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+    ),
+    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+  >;
+
+  // 4. Insert without offset
+  def : Pat<
+    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+      (i64 (zext (i32 VReg_32:$idx)))
+    ),
+    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+  >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
+/********** =============== **********/
+/**********   Conditions    **********/
+/********** =============== **********/
+
+def : Pat<
+  (i1 (setcc f32:$src0, f32:$src1, SETO)),
+  (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+>;
+
+def : Pat<
+  (i1 (setcc f32:$src0, f32:$src1, SETUO)),
+  (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+>;
+
+} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
new file mode 100644
index 000000000000..0af378edfe2e
--- /dev/null
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -0,0 +1,42 @@
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SI Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "SI", isTarget = 1 in {
+
+  def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+  def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+  def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
+
+  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_SI_sample : Sample;
+  def int_SI_sampleb : Sample;
+  def int_SI_samplel : Sample;
+
+  /* Interpolation Intrinsics */
+
+  def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
+
+  /* Control flow Intrinsics */
+
+  def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
+  def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
+  def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
+  def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
+  def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
+  def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
+  def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
+}
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
new file mode 100644
index 000000000000..2b60eb9fb375
--- /dev/null
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -0,0 +1,501 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass lowers the pseudo control flow instructions to real
+/// machine instructions.
+///
+/// All control flow is handled using predicated instructions and
+/// a predicate stack.  Each Scalar ALU controls the operations of 64 Vector
+/// ALUs.  The Scalar ALU can update the predicate for any of the Vector ALUs
+/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// single vector ALU).  Typically, for predicates, a vector ALU will write
+/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
+/// Vector ALU) and then the ScalarALU will AND the VCC register with the
+/// EXEC to update the predicates.
+///
+/// For example:
+/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
+/// %SGPR0 = SI_IF %VCC
+///   %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
+/// %SGPR0 = SI_ELSE %SGPR0
+///   %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
+/// SI_END_CF %SGPR0
+///
+/// becomes:
+///
+/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC  // Save and update the exec mask
+/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC  // Clear live bits from saved exec mask
+/// S_CBRANCH_EXECZ label0            // This instruction is an optional
+///                                   // optimization which allows us to
+///                                   // branch if all the bits of
+///                                   // EXEC are zero.
+/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
+///
+/// label0:
+/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC   // Restore the exec mask for the Then block
+/// %EXEC = S_XOR_B64 %SGPR0, %EXEC    // Clear live bits from saved exec mask
+/// S_BRANCH_EXECZ label1              // Use our branch optimization
+///                                    // instruction again.
+/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR   // Do the THEN block
+/// label1:
+/// %EXEC = S_OR_B64 %EXEC, %SGPR0     // Re-enable saved exec mask bits
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerControlFlowPass : public MachineFunctionPass {
+
+private:
+  static const unsigned SkipThreshold = 12;
+
+  static char ID;
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+  void Skip(MachineInstr &From, MachineOperand &To);
+  void SkipIfDead(MachineInstr &MI);
+
+  void If(MachineInstr &MI);
+  void Else(MachineInstr &MI);
+  void Break(MachineInstr &MI);
+  void IfBreak(MachineInstr &MI);
+  void ElseBreak(MachineInstr &MI);
+  void Loop(MachineInstr &MI);
+  void EndCf(MachineInstr &MI);
+
+  void Kill(MachineInstr &MI);
+  void Branch(MachineInstr &MI);
+
+  void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+  void IndirectSrc(MachineInstr &MI);
+  void IndirectDst(MachineInstr &MI);
+
+public:
+  SILowerControlFlowPass(TargetMachine &tm) :
+    MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+    TII(tm.getInstrInfo()) { }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  const char *getPassName() const {
+    return "SI Lower control flow instructions";
+  }
+
+};
+
+} // End anonymous namespace
+
+char SILowerControlFlowPass::ID = 0;
+
+FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
+  return new SILowerControlFlowPass(tm);
+}
+
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+                                        MachineBasicBlock *To) {
+
+  unsigned NumInstr = 0;
+
+  for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
+       MBB = *MBB->succ_begin()) {
+
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         NumInstr < SkipThreshold && I != E; ++I) {
+
+      if (I->isBundle() || !I->isBundled())
+        if (++NumInstr >= SkipThreshold)
+          return true;
+    }
+  }
+
+  return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+  if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
+    return;
+
+  DebugLoc DL = From.getDebugLoc();
+  BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+          .addOperand(To)
+          .addReg(AMDGPU::EXEC);
+}
+
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+    return;
+
+  MachineBasicBlock::iterator Insert = &MI;
+  ++Insert;
+
+  // If the exec mask is non-zero, skip the next two instructions
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+          .addImm(3)
+          .addReg(AMDGPU::EXEC);
+
+  // Exec mask is zero: Export to NULL target...
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+          .addImm(0)
+          .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+          .addImm(0)
+          .addImm(1)
+          .addImm(1)
+          .addReg(AMDGPU::VGPR0)
+          .addReg(AMDGPU::VGPR0)
+          .addReg(AMDGPU::VGPR0)
+          .addReg(AMDGPU::VGPR0);
+
+  // ... and terminate wavefront
+  BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
+void SILowerControlFlowPass::If(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Reg = MI.getOperand(0).getReg();
+  unsigned Vcc = MI.getOperand(1).getReg();
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
+          .addReg(Vcc);
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Reg);
+
+  Skip(MI, MI.getOperand(2));
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Else(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
+
+  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+          TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
+          .addReg(Src); // Saved EXEC
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Dst);
+
+  Skip(MI, MI.getOperand(2));
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Break(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Src = MI.getOperand(1).getReg();
+ 
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Src);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IfBreak(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Vcc = MI.getOperand(1).getReg();
+  unsigned Src = MI.getOperand(2).getReg();
+ 
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+          .addReg(Vcc)
+          .addReg(Src);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Saved = MI.getOperand(1).getReg();
+  unsigned Src = MI.getOperand(2).getReg();
+ 
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+          .addReg(Saved)
+          .addReg(Src);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Loop(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Src = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Src);
+
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+          .addOperand(MI.getOperand(1))
+          .addReg(AMDGPU::EXEC);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Reg = MI.getOperand(0).getReg();
+
+  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
+          TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(Reg);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::Branch(MachineInstr &MI) {
+  MachineBasicBlock *Next = MI.getParent()->getNextNode();
+  MachineBasicBlock *Target = MI.getOperand(0).getMBB();
+  if (Target == Next)
+    MI.eraseFromParent();
+  else
+    assert(0);
+}
+
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Kill is only allowed in pixel shaders
+  assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+         ShaderType::PIXEL);
+
+  // Clear this pixel from the exec mask if the operand is negative
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+          .addImm(0)
+          .addOperand(MI.getOperand(0));
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator I = MI;
+
+  unsigned Save = MI.getOperand(1).getReg();
+  unsigned Idx = MI.getOperand(3).getReg();
+
+  if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+            .addReg(Idx);
+    MBB.insert(I, MovRel);
+    MI.eraseFromParent();
+    return;
+  }
+
+  assert(AMDGPU::SReg_64RegClass.contains(Save));
+  assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+  // Save the EXEC mask
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+          .addReg(AMDGPU::EXEC);
+
+  // Read the next variant into VCC (lower 32 bits) <- also loop target
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+          .addReg(Idx);
+
+  // Move index from VCC into M0
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+          .addReg(AMDGPU::VCC);
+
+  // Compare the just read M0 value to all possible Idx values
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+          .addReg(AMDGPU::M0)
+          .addReg(Idx);
+
+  // Update EXEC, save the original EXEC value to VCC
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+          .addReg(AMDGPU::VCC);
+
+  // Do the actual move
+  MBB.insert(I, MovRel);
+
+  // Update EXEC, switch all done bits to 0 and all todo bits to 1
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+          .addReg(AMDGPU::EXEC)
+          .addReg(AMDGPU::VCC);
+
+  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+          .addImm(-7)
+          .addReg(AMDGPU::EXEC);
+
+  // Restore EXEC
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+          .addReg(Save);
+
+  MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Vec = MI.getOperand(2).getReg();
+  unsigned Off = MI.getOperand(4).getImm();
+
+  MachineInstr *MovRel = 
+    BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+            .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+            .addReg(AMDGPU::M0, RegState::Implicit)
+            .addReg(Vec, RegState::Implicit);
+
+  LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned Off = MI.getOperand(4).getImm();
+  unsigned Val = MI.getOperand(5).getReg();
+
+  MachineInstr *MovRel = 
+    BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+            .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+            .addReg(Val)
+            .addReg(AMDGPU::M0, RegState::Implicit)
+            .addReg(Dst, RegState::Implicit);
+
+  LoadM0(MI, MovRel);
+}
+
+bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
+
+  bool HaveKill = false;
+  bool NeedWQM = false;
+  unsigned Depth = 0;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; ++BI) {
+
+    MachineBasicBlock &MBB = *BI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+         I != MBB.end(); I = Next) {
+
+      Next = llvm::next(I);
+      MachineInstr &MI = *I;
+      switch (MI.getOpcode()) {
+        default: break;
+        case AMDGPU::SI_IF:
+          ++Depth;
+          If(MI);
+          break;
+
+        case AMDGPU::SI_ELSE:
+          Else(MI);
+          break;
+
+        case AMDGPU::SI_BREAK:
+          Break(MI);
+          break;
+
+        case AMDGPU::SI_IF_BREAK:
+          IfBreak(MI);
+          break;
+
+        case AMDGPU::SI_ELSE_BREAK:
+          ElseBreak(MI);
+          break;
+
+        case AMDGPU::SI_LOOP:
+          ++Depth;
+          Loop(MI);
+          break;
+
+        case AMDGPU::SI_END_CF:
+          if (--Depth == 0 && HaveKill) {
+            SkipIfDead(MI);
+            HaveKill = false;
+          }
+          EndCf(MI);
+          break;
+
+        case AMDGPU::SI_KILL:
+          if (Depth == 0)
+            SkipIfDead(MI);
+          else
+            HaveKill = true;
+          Kill(MI);
+          break;
+
+        case AMDGPU::S_BRANCH:
+          Branch(MI);
+          break;
+
+        case AMDGPU::SI_INDIRECT_SRC:
+          IndirectSrc(MI);
+          break;
+
+        case AMDGPU::SI_INDIRECT_DST_V2:
+        case AMDGPU::SI_INDIRECT_DST_V4:
+        case AMDGPU::SI_INDIRECT_DST_V8:
+        case AMDGPU::SI_INDIRECT_DST_V16:
+          IndirectDst(MI);
+          break;
+
+        case AMDGPU::V_INTERP_P1_F32:
+        case AMDGPU::V_INTERP_P2_F32:
+        case AMDGPU::V_INTERP_MOV_F32:
+          NeedWQM = true;
+          break;
+
+      }
+    }
+  }
+
+  if (NeedWQM) {
+    MachineBasicBlock &MBB = MF.front();
+    BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+            AMDGPU::EXEC).addReg(AMDGPU::EXEC);
+  }
+
+  return true;
+}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..ee0e30755f01
--- /dev/null
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
+  : AMDGPUMachineFunction(MF),
+    PSInputAddr(0) { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
new file mode 100644
index 000000000000..6da9f7f9a14d
--- /dev/null
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -0,0 +1,33 @@
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIMACHINEFUNCTIONINFO_H_
+#define SIMACHINEFUNCTIONINFO_H_
+
+#include "AMDGPUMachineFunction.h"
+
+namespace llvm {
+
+/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
+/// tells the hardware which interpolation parameters to load.
+class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+public:
+  SIMachineFunctionInfo(const MachineFunction &MF);
+  unsigned PSInputAddr;
+};
+
+} // End namespace llvm
+
+
+#endif //_SIMACHINEFUNCTIONINFO_H_
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
new file mode 100644
index 000000000000..99278ae8dceb
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -0,0 +1,53 @@
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "SIRegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
+    const TargetInstrInfo &tii)
+: AMDGPURegisterInfo(tm, tii),
+  TM(tm),
+  TII(tii)
+  { }
+
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  return Reserved;
+}
+
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                             MachineFunction &MF) const {
+  return RC->getNumRegs();
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
+  switch (rc->getID()) {
+  case AMDGPU::GPRF32RegClassID:
+    return &AMDGPU::VReg_32RegClass;
+  default: return rc;
+  }
+}
+
+const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
+                                                                   MVT VT) const {
+  switch(VT.SimpleTy) {
+    default:
+    case MVT::i32: return &AMDGPU::VReg_32RegClass;
+  }
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
new file mode 100644
index 000000000000..caec22841345
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -0,0 +1,50 @@
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Interface definition for SIRegisterInfo
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SIREGISTERINFO_H_
+#define SIREGISTERINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct SIRegisterInfo : public AMDGPURegisterInfo {
+  AMDGPUTargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+  SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const;
+
+  /// \param RC is an AMDIL reg class.
+  ///
+  /// \returns the SI register class that is equivalent to \p RC.
+  virtual const TargetRegisterClass *
+    getISARegClass(const TargetRegisterClass *RC) const;
+
+  /// \brief get the register class of the specified type to use in the
+  /// CFGStructurizer
+  virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+};
+
+} // End namespace llvm
+
+#endif // SIREGISTERINFO_H_
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
new file mode 100644
index 000000000000..4f14931a9c48
--- /dev/null
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -0,0 +1,182 @@
+//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the SI registers
+//===----------------------------------------------------------------------===//
+
+class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+  let Namespace = "AMDGPU";
+  let HWEncoding = encoding;
+}
+
+// Special Registers
+def VCC : SIReg<"VCC", 106>;
+def EXEC : SIReg<"EXEC", 126>;
+def SCC : SIReg<"SCC", 253>;
+def M0 : SIReg <"M0", 124>;
+
+// SGPR registers
+foreach Index = 0-101 in {
+  def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+}
+
+// VGPR registers
+foreach Index = 0-255 in {
+  def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+    let HWEncoding{8} = 1;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Groupings using register classes and tuples
+//===----------------------------------------------------------------------===//
+
+// SGPR 32-bit registers
+def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+                            (add (sequence "SGPR%u", 0, 101))>;
+
+// SGPR 64-bit registers
+def SGPR_64 : RegisterTuples<[sub0, sub1],
+                             [(add (decimate (trunc SGPR_32, 101), 2)),
+                              (add (decimate (shl SGPR_32, 1), 2))]>;
+
+// SGPR 128-bit registers
+def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+                              [(add (decimate (trunc SGPR_32, 99), 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4))]>;
+
+// SGPR 256-bit registers
+def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+                              [(add (decimate (trunc SGPR_32, 95), 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4)),
+                               (add (decimate (shl SGPR_32, 4), 4)),
+                               (add (decimate (shl SGPR_32, 5), 4)),
+                               (add (decimate (shl SGPR_32, 6), 4)),
+                               (add (decimate (shl SGPR_32, 7), 4))]>;
+
+// SGPR 512-bit registers
+def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+                               sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+                              [(add (decimate (trunc SGPR_32, 87), 4)),
+                               (add (decimate (shl SGPR_32, 1), 4)),
+                               (add (decimate (shl SGPR_32, 2), 4)),
+                               (add (decimate (shl SGPR_32, 3), 4)),
+                               (add (decimate (shl SGPR_32, 4), 4)),
+                               (add (decimate (shl SGPR_32, 5), 4)),
+                               (add (decimate (shl SGPR_32, 6), 4)),
+                               (add (decimate (shl SGPR_32, 7), 4)),
+                               (add (decimate (shl SGPR_32, 8), 4)),
+                               (add (decimate (shl SGPR_32, 9), 4)),
+                               (add (decimate (shl SGPR_32, 10), 4)),
+                               (add (decimate (shl SGPR_32, 11), 4)),
+                               (add (decimate (shl SGPR_32, 12), 4)),
+                               (add (decimate (shl SGPR_32, 13), 4)),
+                               (add (decimate (shl SGPR_32, 14), 4)),
+                               (add (decimate (shl SGPR_32, 15), 4))]>;
+
+// VGPR 32-bit registers
+def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+                            (add (sequence "VGPR%u", 0, 255))>;
+
+// VGPR 64-bit registers
+def VGPR_64 : RegisterTuples<[sub0, sub1],
+                             [(add (trunc VGPR_32, 255)),
+                              (add (shl VGPR_32, 1))]>;
+
+// VGPR 128-bit registers
+def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+                              [(add (trunc VGPR_32, 253)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3))]>;
+
+// VGPR 256-bit registers
+def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+                              [(add (trunc VGPR_32, 249)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3)),
+                               (add (shl VGPR_32, 4)),
+                               (add (shl VGPR_32, 5)),
+                               (add (shl VGPR_32, 6)),
+                               (add (shl VGPR_32, 7))]>;
+
+// VGPR 512-bit registers
+def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+                               sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+                              [(add (trunc VGPR_32, 241)),
+                               (add (shl VGPR_32, 1)),
+                               (add (shl VGPR_32, 2)),
+                               (add (shl VGPR_32, 3)),
+                               (add (shl VGPR_32, 4)),
+                               (add (shl VGPR_32, 5)),
+                               (add (shl VGPR_32, 6)),
+                               (add (shl VGPR_32, 7)),
+                               (add (shl VGPR_32, 8)),
+                               (add (shl VGPR_32, 9)),
+                               (add (shl VGPR_32, 10)),
+                               (add (shl VGPR_32, 11)),
+                               (add (shl VGPR_32, 12)),
+                               (add (shl VGPR_32, 13)),
+                               (add (shl VGPR_32, 14)),
+                               (add (shl VGPR_32, 15))]>;
+
+//===----------------------------------------------------------------------===//
+//  Register classes used as source and destination
+//===----------------------------------------------------------------------===//
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+
+// Register class for all scalar registers (SGPRs + Special Registers)
+def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+  (add SGPR_32, M0Reg)
+>;
+
+def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
+  (add SGPR_64, VCCReg, EXECReg)
+>;
+
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
+
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
+
+// Register class for all vector registers (VGPRs + Interploation Registers)
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
+
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+
+//===----------------------------------------------------------------------===//
+//  [SV]Src_* register classes, can have either an immediate or an register
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
+
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
+
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
+
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+
diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td
new file mode 100644
index 000000000000..28b65b825855
--- /dev/null
+++ b/lib/Target/R600/SISchedule.td
@@ -0,0 +1,15 @@
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: This is just a place holder for now.
+//
+//===----------------------------------------------------------------------===//
+
+
+def SI_Itin : ProcessorItineraries <[], [], []>;
diff --git a/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
new file mode 100644
index 000000000000..46b1f18c6263
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+/// \brief The target for the AMDGPU backend
+Target llvm::TheAMDGPUTarget;
+
+/// \brief Extern function to initialize the targets for the AMDGPU backend
+extern "C" void LLVMInitializeR600TargetInfo() {
+  RegisterTarget<Triple::r600, false>
+    R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
+}
diff --git a/lib/Target/R600/TargetInfo/CMakeLists.txt b/lib/Target/R600/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..3d1584eba346
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMR600Info
+  AMDGPUTargetInfo.cpp
+  )
+
+add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen)
diff --git a/lib/Target/R600/TargetInfo/LLVMBuild.txt b/lib/Target/R600/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..4c6fea4aa08c
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/R600/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = R600Info
+parent = R600
+required_libraries = MC Support
+add_to_library_groups = R600
diff --git a/lib/Target/R600/TargetInfo/Makefile b/lib/Target/R600/TargetInfo/Makefile
new file mode 100644
index 000000000000..b8ac4e782302
--- /dev/null
+++ b/lib/Target/R600/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMR600Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 8165f5b8cc97..a9aab86abdac 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -262,22 +262,7 @@ unsigned countbits_slow(unsigned v) {
     c += v & 1;
   return c;
 }
-unsigned countbits_fast(unsigned v){
-  unsigned c;
-  for (c = 0; v; c++)
-    v &= v - 1; // clear the least significant bit set
-  return c;
-}
 
-BITBOARD = unsigned long long
-int PopCnt(register BITBOARD a) {
-  register int c=0;
-  while(a) {
-    c++;
-    a &= a - 1;
-  }
-  return c;
-}
 unsigned int popcount(unsigned int input) {
   unsigned int count = 0;
   for (unsigned int i =  0; i < 4 * 8; i++)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 7bf8c3f85eca..6123773d5f4b 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "delay-slot-filler"
 #include "Sparc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 9a729bd87044..1325b98cf0ee 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -14,14 +14,14 @@
 #define DEBUG_TYPE "fpmover"
 #include "Sparc.h"
 #include "SparcSubtarget.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumFpDs , "Number of instructions translated");
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index f5e10fc3a465..3d4bfdcd5e6d 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -21,8 +21,9 @@ void SparcELFMCAsmInfo::anchor() { }
 SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
   IsLittleEndian = false;
   Triple TheTriple(TT);
-  if (TheTriple.getArch() == Triple::sparcv9)
-    PointerSize = 8;
+  if (TheTriple.getArch() == Triple::sparcv9) {
+    PointerSize = CalleeSaveStackSlotSize = 8;
+  }
 
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 25548625e760..e14b3cbf161d 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,15 +16,15 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index d4712208126f..b38ac616dcf4 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -22,6 +22,14 @@ def RetCC_Sparc32 : CallingConv<[
   CCIfType<[f64], CCAssignToReg<[D0, D1]>>
 ]>;
 
+// Sparc 64-bit C return-value convention.
+def RetCC_Sparc64 : CallingConv<[
+  CCIfType<[i32], CCPromoteToType<i64>>,
+  CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
 // Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
   //Custom assign SRet to [sp+64].
@@ -34,3 +42,15 @@ def CC_Sparc32 : CallingConv<[
   // Alternatively, they are assigned to the stack in 4-byte aligned units.
   CCAssignToStack<4, 4>
 ]>;
+
+// Sparc 64-bit C Calling convention.
+def CC_Sparc64 : CallingConv<[
+  // All integers are promoted to i64 by the caller.
+  CCIfType<[i32], CCPromoteToType<i64>>,
+  // Integer arguments get passed in integer registers if there is space.
+  CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  // FIXME: Floating point arguments.
+
+  // Alternatively, they are assigned to the stack in 8-byte aligned units.
+  CCAssignToStack<8, 8>
+]>;
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 716c79f43a26..a0dae6e9480c 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -14,15 +14,15 @@
 #include "SparcFrameLowering.h"
 #include "SparcInstrInfo.h"
 #include "SparcMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -67,6 +67,22 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 }
 
+void SparcFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+  DebugLoc dl = MI.getDebugLoc();
+  int Size = MI.getOperand(0).getImm();
+  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+    Size = -Size;
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  if (Size)
+    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  MBB.erase(I);
+}
+
+
 void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
                                   MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 6b593c95bb10..464233e7da35 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -32,6 +32,10 @@ public:
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
   bool hasFP(const MachineFunction &MF) const { return false; }
 };
 
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 93710c4e0b0f..5fa545d30160 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcTargetMachine.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 8e5619e6bc8d..325f13424b42 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -13,11 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcISelLowering.h"
-#include "SparcTargetMachine.h"
 #include "SparcMachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "SparcTargetMachine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -25,6 +22,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -93,17 +93,13 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
                  DAG.getTarget(), RVLocs, *DAG.getContext());
 
   // Analize return values.
-  CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
-
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (MF.getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
+  CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ?
+                             RetCC_Sparc64 : RetCC_Sparc32);
 
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+  // Make room for the return address offset.
+  RetOps.push_back(SDValue());
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -115,6 +111,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
 
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
   unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
@@ -127,32 +124,47 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
     Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
     Flag = Chain.getValue(1);
-    if (MF.getRegInfo().liveout_empty())
-      MF.getRegInfo().addLiveOut(SP::I0);
+    RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
     RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
   }
 
-  SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+  RetOps[0] = Chain;  // Update chain.
+  RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32);
 
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
-                       RetAddrOffsetNode, Flag);
-  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
-                     RetAddrOffsetNode);
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+                     &RetOps[0], RetOps.size());
 }
 
-/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+SDValue SparcTargetLowering::
+LowerFormalArguments(SDValue Chain,
+                     CallingConv::ID CallConv,
+                     bool IsVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc DL,
+                     SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
+                                   DL, DAG, InVals);
+  return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
+                                 DL, DAG, InVals);
+}
+
+/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
 /// passed in either one or two GPRs, including FP values.  TODO: we should
 /// pass FP values in FP registers for fastcc functions.
-SDValue
-SparcTargetLowering::LowerFormalArguments(SDValue Chain,
-                                          CallingConv::ID CallConv, bool isVarArg,
-                                          const SmallVectorImpl<ISD::InputArg>
-                                            &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals)
-                                            const {
-
+SDValue SparcTargetLowering::
+LowerFormalArguments_32(SDValue Chain,
+                        CallingConv::ID CallConv,
+                        bool isVarArg,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        DebugLoc dl,
+                        SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@@ -344,6 +356,63 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
   return Chain;
 }
 
+// Lower formal arguments for the 64 bit ABI.
+SDValue SparcTargetLowering::
+LowerFormalArguments_64(SDValue Chain,
+                        CallingConv::ID CallConv,
+                        bool IsVarArg,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        DebugLoc DL,
+                        SelectionDAG &DAG,
+                        SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Analyze arguments according to CC_Sparc64.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      // This argument is passed in a register.
+      // All integer register arguments are promoted by the caller to i64.
+
+      // Create a virtual register for the promoted live-in value.
+      unsigned VReg = MF.addLiveIn(VA.getLocReg(),
+                                   getRegClassFor(VA.getLocVT()));
+      SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+
+      // The caller promoted the argument, so insert an Assert?ext SDNode so we
+      // won't promote the value again in this function.
+      switch (VA.getLocInfo()) {
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
+                          DAG.getValueType(VA.getValVT()));
+        break;
+      default:
+        break;
+      }
+
+      // Truncate the register down to the argument type.
+      if (VA.isExtInLoc())
+        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
+
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    // The registers are exhausted. This argument was passed on the stack.
+    assert(VA.isMemLoc());
+  }
+  return Chain;
+}
+
 SDValue
 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                SmallVectorImpl<SDValue> &InVals) const {
@@ -692,11 +761,14 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
 
 SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  Subtarget = &TM.getSubtarget<SparcSubtarget>();
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
   addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
   addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+  if (Subtarget->is64Bit())
+    addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
 
   // Turn FP extload into load/fextend
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -752,6 +824,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+    setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+  }
+
   // FIXME: There are instructions available for ATOMIC_FENCE
   // on SparcV8 and later.
   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
@@ -759,10 +836,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
 
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
   setOperationAction(ISD::FMA  , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
@@ -819,8 +898,10 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::CMPICC:     return "SPISD::CMPICC";
   case SPISD::CMPFCC:     return "SPISD::CMPFCC";
   case SPISD::BRICC:      return "SPISD::BRICC";
+  case SPISD::BRXCC:      return "SPISD::BRXCC";
   case SPISD::BRFCC:      return "SPISD::BRFCC";
   case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+  case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
   case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
   case SPISD::Hi:         return "SPISD::Hi";
   case SPISD::Lo:         return "SPISD::Lo";
@@ -847,6 +928,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   switch (Op.getOpcode()) {
   default: break;
   case SPISD::SELECT_ICC:
+  case SPISD::SELECT_XCC:
   case SPISD::SELECT_FCC:
     DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
     DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
@@ -867,7 +949,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   if (isa<ConstantSDNode>(RHS) &&
       cast<ConstantSDNode>(RHS)->isNullValue() &&
       CC == ISD::SETNE &&
-      ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+      (((LHS.getOpcode() == SPISD::SELECT_ICC ||
+         LHS.getOpcode() == SPISD::SELECT_XCC) &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
        (LHS.getOpcode() == SPISD::SELECT_FCC &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
@@ -955,14 +1038,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
 
   // Get the condition flag.
   SDValue CompareFlag;
-  if (LHS.getValueType() == MVT::i32) {
-    std::vector<EVT> VTs;
-    VTs.push_back(MVT::i32);
-    VTs.push_back(MVT::Glue);
+  if (LHS.getValueType().isInteger()) {
+    EVT VTs[] = { LHS.getValueType(), MVT::Glue };
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
-    Opc = SPISD::BRICC;
+    // 32-bit compares use the icc flags, 64-bit uses the xcc flags.
+    Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
   } else {
     CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
@@ -986,13 +1068,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   LookThroughSetCC(LHS, RHS, CC, SPCC);
 
   SDValue CompareFlag;
-  if (LHS.getValueType() == MVT::i32) {
-    std::vector<EVT> VTs;
-    VTs.push_back(LHS.getValueType());   // subcc returns a value
-    VTs.push_back(MVT::Glue);
+  if (LHS.getValueType().isInteger()) {
+    // subcc returns a value
+    EVT VTs[] = { LHS.getValueType(), MVT::Glue };
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
-    Opc = SPISD::SELECT_ICC;
+    Opc = LHS.getValueType() == MVT::i32 ?
+          SPISD::SELECT_ICC : SPISD::SELECT_XCC;
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
   } else {
     CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 09148ea54027..aa2ef711a080 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -19,14 +19,18 @@
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
+  class SparcSubtarget;
+
   namespace SPISD {
     enum {
       FIRST_NUMBER = ISD::BUILTIN_OP_END,
-      CMPICC,      // Compare two GPR operands, set icc.
+      CMPICC,      // Compare two GPR operands, set icc+xcc.
       CMPFCC,      // Compare two FP operands, set fcc.
       BRICC,       // Branch to dest on icc condition
+      BRXCC,       // Branch to dest on xcc condition (64-bit only).
       BRFCC,       // Branch to dest on fcc condition
       SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_XCC,  // Select between two values using the current XCC flags.
       SELECT_FCC,  // Select between two values using the current FCC flags.
 
       Hi, Lo,      // Hi/Lo operations, typically on a global address.
@@ -42,6 +46,7 @@ namespace llvm {
   }
 
   class SparcTargetLowering : public TargetLowering {
+    const SparcSubtarget *Subtarget;
   public:
     SparcTargetLowering(TargetMachine &TM);
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -74,6 +79,18 @@ namespace llvm {
                            const SmallVectorImpl<ISD::InputArg> &Ins,
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerFormalArguments_32(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerFormalArguments_64(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
new file mode 100644
index 000000000000..ca1153b3fe8f
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -0,0 +1,285 @@
+//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction definitions and patterns needed for 64-bit
+// code generation on SPARC v9.
+//
+// Some SPARC v9 instructions are defined in SparcInstrInfo.td because they can
+// also be used in 32-bit code running on a SPARC v9 CPU.
+//
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+// The same integer registers are used for i32 and i64 values.
+// When registers hold i32 values, the high bits are don't care.
+// This give us free trunc and anyext.
+def : Pat<(i64 (anyext i32:$val)), (COPY_TO_REGCLASS $val, I64Regs)>;
+def : Pat<(i32 (trunc i64:$val)), (COPY_TO_REGCLASS $val, IntRegs)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Shift Instructions.
+//===----------------------------------------------------------------------===//
+//
+// The 32-bit shift instructions are still available. The left shift srl
+// instructions shift all 64 bits, but it only accepts a 5-bit shift amount.
+//
+// The srl instructions only shift the low 32 bits and clear the high 32 bits.
+// Finally, sra shifts the low 32 bits and sign-extends to 64 bits.
+
+let Predicates = [Is64Bit] in {
+
+def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
+
+defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
+defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
+defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Immediates.
+//===----------------------------------------------------------------------===//
+//
+// All 32-bit immediates can be materialized with sethi+or, but 64-bit
+// immediates may require more code. There may be a point where it is
+// preferable to use a constant pool load instead, depending on the
+// microarchitecture.
+
+// The %g0 register is constant 0.
+// This is useful for stx %g0, [...], for example.
+def : Pat<(i64 0), (i64 G0)>, Requires<[Is64Bit]>;
+
+// Single-instruction patterns.
+
+// The ALU instructions want their simm13 operands as i32 immediates.
+def as_i32imm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32);
+}]>;
+def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>;
+def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>;
+
+// Double-instruction patterns.
+
+// All unsigned i32 immediates can be handled by sethi+or.
+def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+def : Pat<(i64 uimm32:$val), (ORri (SETHIi (HI22 $val)), (LO10 $val))>,
+      Requires<[Is64Bit]>;
+
+// All negative i33 immediates can be handled by sethi+xor.
+def nimm33 : PatLeaf<(imm), [{
+  int64_t Imm = N->getSExtValue();
+  return Imm < 0 && isInt<33>(Imm);
+}]>;
+// Bits 10-31 inverted. Same as assembler's %hix.
+def HIX22 : SDNodeXForm<imm, [{
+  uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 0-9 with ones in bits 10-31. Same as assembler's %lox.
+def LOX10 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32);
+}]>;
+def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>,
+      Requires<[Is64Bit]>;
+
+// More possible patterns:
+//
+//   (sllx sethi, n)
+//   (sllx simm13, n)
+//
+// 3 instrs:
+//
+//   (xor (sllx sethi), simm13)
+//   (sllx (xor sethi, simm13))
+//
+// 4 instrs:
+//
+//   (or sethi, (sllx sethi))
+//   (xnor sethi, (sllx sethi))
+//
+// 5 instrs:
+//
+//   (or (sllx sethi), (or sethi, simm13))
+//   (xnor (sllx sethi), (or sethi, simm13))
+//   (or (sllx sethi), (sllx sethi))
+//   (xnor (sllx sethi), (sllx sethi))
+//
+// Worst case is 6 instrs:
+//
+//   (or (sllx (or sethi, simmm13)), (or sethi, simm13))
+
+// Bits 42-63, same as assembler's %hh.
+def HH22 : SDNodeXForm<imm, [{
+  uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+// Bits 32-41, same as assembler's %hm.
+def HM10 : SDNodeXForm<imm, [{
+  uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1);
+  return CurDAG->getTargetConstant(Val, MVT::i32);
+}]>;
+def : Pat<(i64 imm:$val),
+          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)),
+                (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
+      Requires<[Is64Bit]>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Integer Arithmetic and Logic.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+// Register-register instructions.
+
+def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>;
+def : Pat<(or  i64:$a, i64:$b), (ORrr  $a, $b)>;
+def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>;
+
+def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>;
+def : Pat<(or  i64:$a, (not i64:$b)), (ORNrr  $a, $b)>;
+def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
+
+def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
+def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
+
+// Add/sub with carry were renamed to addc/subc in SPARC v9.
+def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
+def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
+
+def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
+def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+def : Pat<(SPcmpicc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
+
+// Register-immediate instructions.
+
+def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
+def : Pat<(or  i64:$a, (i64 simm13:$b)), (ORri  $a, (as_i32imm $b))>;
+def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>;
+
+def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>;
+def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
+
+def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Loads and Stores.
+//===----------------------------------------------------------------------===//
+//
+// All the 32-bit loads and stores are available. The extending loads are sign
+// or zero-extending to 64 bits. The LDrr and LDri instructions load 32 bits
+// zero-extended to i64. Their mnemonic is lduw in SPARC v9 (Load Unsigned
+// Word).
+//
+// SPARC v9 adds 64-bit loads as well as a sign-extending ldsw i32 loads.
+
+let Predicates = [Is64Bit] in {
+
+// 64-bit loads.
+def LDXrr  : F3_1<3, 0b001011,
+                  (outs I64Regs:$dst), (ins MEMrr:$addr),
+                  "ldx [$addr], $dst",
+                  [(set i64:$dst, (load ADDRrr:$addr))]>;
+def LDXri  : F3_2<3, 0b001011,
+                  (outs I64Regs:$dst), (ins MEMri:$addr),
+                  "ldx [$addr], $dst",
+                  [(set i64:$dst, (load ADDRri:$addr))]>;
+
+// Extending loads to i64.
+def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
+def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
+
+def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+
+// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
+def LDSWrr : F3_1<3, 0b001011,
+                 (outs I64Regs:$dst), (ins MEMrr:$addr),
+                 "ldsw [$addr], $dst",
+                 [(set i64:$dst, (sextloadi32 ADDRrr:$addr))]>;
+def LDSWri : F3_2<3, 0b001011,
+                 (outs I64Regs:$dst), (ins MEMri:$addr),
+                 "ldsw [$addr], $dst",
+                 [(set i64:$dst, (sextloadi32 ADDRri:$addr))]>;
+
+// 64-bit stores.
+def STXrr  : F3_1<3, 0b001110,
+                 (outs), (ins MEMrr:$addr, I64Regs:$src),
+                 "stx $src, [$addr]",
+                 [(store i64:$src, ADDRrr:$addr)]>;
+def STXri  : F3_2<3, 0b001110,
+                 (outs), (ins MEMri:$addr, I64Regs:$src),
+                 "stx $src, [$addr]",
+                 [(store i64:$src, ADDRri:$addr)]>;
+
+// Truncating stores from i64 are identical to the i32 stores.
+def : Pat<(truncstorei8  i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei8  i64:$src, ADDRri:$addr), (STBri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRrr:$addr), (STHrr ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr  ADDRrr:$addr, $src)>;
+def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri  ADDRri:$addr, $src)>;
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit Conditionals.
+//===----------------------------------------------------------------------===//
+//
+// Flag-setting instructions like subcc and addcc set both icc and xcc flags.
+// The icc flags correspond to the 32-bit result, and the xcc are for the
+// full 64-bit result.
+//
+// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for
+// 64-bit compares. See LowerBR_CC.
+
+let Predicates = [Is64Bit] in {
+
+let Uses = [ICC] in
+def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                     "bp$cc %xcc, $dst",
+                     [(SPbrxcc bb:$dst, imm:$cc)]>;
+
+// Conditional moves on %xcc.
+let Uses = [ICC], Constraints = "$f = $rd" in {
+def MOVXCCrr : Pseudo<(outs IntRegs:$rd),
+                      (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+                      "mov$cond %xcc, $rs2, $rd",
+                      [(set i32:$rd,
+                       (SPselectxcc i32:$rs2, i32:$f, imm:$cond))]>;
+def MOVXCCri : Pseudo<(outs IntRegs:$rd),
+                      (ins i32imm:$i, IntRegs:$f, CCOp:$cond),
+                      "mov$cond %xcc, $i, $rd",
+                      [(set i32:$rd,
+                       (SPselecticc simm11:$i, i32:$f, imm:$cond))]>;
+} // Uses, Constraints
+
+def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
+          (MOVXCCrr $t, $f, imm:$cond)>;
+def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
+          (MOVXCCri (as_i32imm $t), $f, imm:$cond)>;
+
+} // Predicates = [Is64Bit]
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index dce331228b8f..f1018569153c 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -111,4 +111,41 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
   let Inst{4-0}  = rs2;
 }
 
+// Shift by register rs2.
+class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bit x = xVal;           // 1 for 64-bit shifts.
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12}   = x;     // extended registers.
+  let Inst{4-0}  = rs2;
+}
 
+// Shift by immediate.
+class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
+            string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bit x = xVal;           // 1 for 64-bit shifts.
+  bits<6> shcnt;          // shcnt32 / shcnt64.
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 1;     // i field = 1
+  let Inst{12}   = x;     // extended registers.
+  let Inst{5-0}  = shcnt;
+}
+
+// Define rr and ri shift instructions with patterns.
+multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
+                ValueType VT, RegisterClass RC> {
+  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2),
+                 !strconcat(OpcStr, " $rs, $rs2, $rd"),
+                 [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>;
+  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt),
+                 !strconcat(OpcStr, " $rs, $shcnt, $rd"),
+                 [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index f8674d0bd660..39d7329f2663 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -15,12 +15,12 @@
 #include "Sparc.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
 
 #define GET_INSTRINFO_CTOR
 #include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index e64c140e4921..5ff439583c5c 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -21,6 +21,12 @@ include "SparcInstrFormats.td"
 // Feature predicates.
 //===----------------------------------------------------------------------===//
 
+// True when generating 32-bit code.
+def Is32Bit : Predicate<"!Subtarget.is64Bit()">;
+
+// True when generating 64-bit code. This also implies HasV9.
+def Is64Bit : Predicate<"Subtarget.is64Bit()">;
+
 // HasV9 - This predicate is true when the target processor supports V9
 // instructions.  Note that the machine may be running in 32-bit mode.
 def HasV9   : Predicate<"Subtarget.isV9()">;
@@ -63,17 +69,17 @@ def SETHIimm : PatLeaf<(imm), [{
 }], HI22>;
 
 // Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>;
 
 // Address operands
-def MEMrr : Operand<i32> {
+def MEMrr : Operand<iPTR> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
+  let MIOperandInfo = (ops ptr_rc, ptr_rc);
 }
-def MEMri : Operand<i32> {
+def MEMri : Operand<iPTR> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, i32imm);
+  let MIOperandInfo = (ops ptr_rc, i32imm);
 }
 
 // Branch targets have OtherVT type.
@@ -98,6 +104,7 @@ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
 def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
 def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 
 def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
@@ -107,6 +114,7 @@ def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
 def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
 
 def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
 def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
 
 //  These are target-independent nodes, but have target-specific formats.
@@ -126,7 +134,7 @@ def call          : SDNode<"SPISD::CALL", SDT_SPCall,
 
 def SDT_SPRet     : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 def retflag       : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
-                           [SDNPHasChain, SDNPOptInGlue]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
 def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
                            [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
@@ -182,11 +190,11 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
   def rr  : F3_1<2, Op3Val, 
                  (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+                 [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
   def ri  : F3_2<2, Op3Val,
                  (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                  !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+                 [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>;
 }
 
 /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
@@ -243,10 +251,10 @@ let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
                       "!FpMOVD $src, $dst", []>;
   def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
                       "!FpNEGD $src, $dst",
-                      [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+                      [(set f64:$dst, (fneg f64:$src))]>;
   def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
                       "!FpABSD $src, $dst",
-                      [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+                      [(set f64:$dst, (fabs f64:$src))]>;
 }
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
@@ -257,19 +265,16 @@ let Uses = [ICC], usesCustomInserter = 1 in {
   def SELECT_CC_Int_ICC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
-            [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
+            [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>;
   def SELECT_CC_FP_ICC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_ICC PSEUDO!",
-            [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+            [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>;
 
   def SELECT_CC_DFP_ICC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_ICC PSEUDO!",
-            [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
-                                             imm:$Cond))]>;
+            [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
 }
 
 let usesCustomInserter = 1, Uses = [FCC] in {
@@ -277,19 +282,16 @@ let usesCustomInserter = 1, Uses = [FCC] in {
   def SELECT_CC_Int_FCC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_FCC PSEUDO!",
-            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
+            [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>;
 
   def SELECT_CC_FP_FCC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_FCC PSEUDO!",
-            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+            [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>;
   def SELECT_CC_DFP_FCC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_FCC PSEUDO!",
-            [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
-                                             imm:$Cond))]>;
+            [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
 }
 
 
@@ -309,111 +311,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
 def LDSBrr : F3_1<3, 0b001001,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldsb [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>;
 def LDSBri : F3_2<3, 0b001001,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldsb [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+                  [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>;
 def LDSHrr : F3_1<3, 0b001010,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldsh [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>;
 def LDSHri : F3_2<3, 0b001010,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldsh [$addr], $dst",
-                  [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+                  [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>;
 def LDUBrr : F3_1<3, 0b000001,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ldub [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>;
 def LDUBri : F3_2<3, 0b000001,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ldub [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+                  [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>;
 def LDUHrr : F3_1<3, 0b000010,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "lduh [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+                  [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>;
 def LDUHri : F3_2<3, 0b000010,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "lduh [$addr], $dst",
-                  [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+                  [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>;
 def LDrr   : F3_1<3, 0b000000,
                   (outs IntRegs:$dst), (ins MEMrr:$addr),
                   "ld [$addr], $dst",
-                  [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set i32:$dst, (load ADDRrr:$addr))]>;
 def LDri   : F3_2<3, 0b000000,
                   (outs IntRegs:$dst), (ins MEMri:$addr),
                   "ld [$addr], $dst",
-                  [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set i32:$dst, (load ADDRri:$addr))]>;
 
 // Section B.2 - Load Floating-point Instructions, p. 92
 def LDFrr  : F3_1<3, 0b100000,
                   (outs FPRegs:$dst), (ins MEMrr:$addr),
                   "ld [$addr], $dst",
-                  [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set f32:$dst, (load ADDRrr:$addr))]>;
 def LDFri  : F3_2<3, 0b100000,
                   (outs FPRegs:$dst), (ins MEMri:$addr),
                   "ld [$addr], $dst",
-                  [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set f32:$dst, (load ADDRri:$addr))]>;
 def LDDFrr : F3_1<3, 0b100011,
                   (outs DFPRegs:$dst), (ins MEMrr:$addr),
                   "ldd [$addr], $dst",
-                  [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+                  [(set f64:$dst, (load ADDRrr:$addr))]>;
 def LDDFri : F3_2<3, 0b100011,
                   (outs DFPRegs:$dst), (ins MEMri:$addr),
                   "ldd [$addr], $dst",
-                  [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+                  [(set f64:$dst, (load ADDRri:$addr))]>;
 
 // Section B.4 - Store Integer Instructions, p. 95
 def STBrr : F3_1<3, 0b000101,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "stb $src, [$addr]",
-                 [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+                 [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
 def STBri : F3_2<3, 0b000101,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "stb $src, [$addr]",
-                 [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+                 [(truncstorei8 i32:$src, ADDRri:$addr)]>;
 def STHrr : F3_1<3, 0b000110,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "sth $src, [$addr]",
-                 [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+                 [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
 def STHri : F3_2<3, 0b000110,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "sth $src, [$addr]",
-                 [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+                 [(truncstorei16 i32:$src, ADDRri:$addr)]>;
 def STrr  : F3_1<3, 0b000100,
                  (outs), (ins MEMrr:$addr, IntRegs:$src),
                  "st $src, [$addr]",
-                 [(store IntRegs:$src, ADDRrr:$addr)]>;
+                 [(store i32:$src, ADDRrr:$addr)]>;
 def STri  : F3_2<3, 0b000100,
                  (outs), (ins MEMri:$addr, IntRegs:$src),
                  "st $src, [$addr]",
-                 [(store IntRegs:$src, ADDRri:$addr)]>;
+                 [(store i32:$src, ADDRri:$addr)]>;
 
 // Section B.5 - Store Floating-point Instructions, p. 97
 def STFrr   : F3_1<3, 0b100100,
                    (outs), (ins MEMrr:$addr, FPRegs:$src),
                    "st $src, [$addr]",
-                   [(store FPRegs:$src, ADDRrr:$addr)]>;
+                   [(store f32:$src, ADDRrr:$addr)]>;
 def STFri   : F3_2<3, 0b100100,
                    (outs), (ins MEMri:$addr, FPRegs:$src),
                    "st $src, [$addr]",
-                   [(store FPRegs:$src, ADDRri:$addr)]>;
+                   [(store f32:$src, ADDRri:$addr)]>;
 def STDFrr  : F3_1<3, 0b100111,
                    (outs), (ins MEMrr:$addr, DFPRegs:$src),
                    "std  $src, [$addr]",
-                   [(store DFPRegs:$src, ADDRrr:$addr)]>;
+                   [(store f64:$src, ADDRrr:$addr)]>;
 def STDFri  : F3_2<3, 0b100111,
                    (outs), (ins MEMri:$addr, DFPRegs:$src),
                    "std $src, [$addr]",
-                   [(store DFPRegs:$src, ADDRri:$addr)]>;
+                   [(store f64:$src, ADDRri:$addr)]>;
 
 // Section B.9 - SETHI Instruction, p. 104
 def SETHIi: F2_1<0b100,
                  (outs IntRegs:$dst), (ins i32imm:$src),
                  "sethi $src, $dst",
-                 [(set IntRegs:$dst, SETHIimm:$src)]>;
+                 [(set i32:$dst, SETHIimm:$src)]>;
 
 // Section B.10 - NOP Instruction, p. 105
 // (It's a special case of SETHI)
@@ -426,7 +428,7 @@ defm AND    : F3_12<"and", 0b000001, and>;
 def ANDNrr  : F3_1<2, 0b000101,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "andn $b, $c, $dst",
-                   [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+                   [(set i32:$dst, (and i32:$b, (not i32:$c)))]>;
 def ANDNri  : F3_2<2, 0b000101,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "andn $b, $c, $dst", []>;
@@ -436,7 +438,7 @@ defm OR     : F3_12<"or", 0b000010, or>;
 def ORNrr   : F3_1<2, 0b000110,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "orn $b, $c, $dst",
-                   [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+                   [(set i32:$dst, (or i32:$b, (not i32:$c)))]>;
 def ORNri   : F3_2<2, 0b000110,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "orn $b, $c, $dst", []>;
@@ -445,7 +447,7 @@ defm XOR    : F3_12<"xor", 0b000011, xor>;
 def XNORrr  : F3_1<2, 0b000111,
                    (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                    "xnor $b, $c, $dst",
-                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+                   [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
 def XNORri  : F3_2<2, 0b000111,
                    (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
                    "xnor $b, $c, $dst", []>;
@@ -462,7 +464,7 @@ defm ADD   : F3_12<"add", 0b000000, add>;
 def LEA_ADDri   : F3_2<2, 0b000000,
                    (outs IntRegs:$dst), (ins MEMri:$addr),
                    "add ${addr:arith}, $dst",
-                   [(set IntRegs:$dst, ADDRri:$addr)]>;
+                   [(set i32:$dst, ADDRri:$addr)]>;
 
 let Defs = [ICC] in                   
   defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
@@ -603,11 +605,11 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010,
 def FSTOD : F3_3<2, 0b110100, 0b011001001, 
                  (outs DFPRegs:$dst), (ins FPRegs:$src),
                  "fstod $src, $dst",
-                 [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+                 [(set f64:$dst, (fextend f32:$src))]>;
 def FDTOS : F3_3<2, 0b110100, 0b011000110,
                  (outs FPRegs:$dst), (ins DFPRegs:$src),
                  "fdtos $src, $dst",
-                 [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+                 [(set f32:$dst, (fround f64:$src))]>;
 
 // Floating-point Move Instructions, p. 144
 def FMOVS : F3_3<2, 0b110100, 0b000000001,
@@ -616,22 +618,22 @@ def FMOVS : F3_3<2, 0b110100, 0b000000001,
 def FNEGS : F3_3<2, 0b110100, 0b000000101, 
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fnegs $src, $dst",
-                 [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+                 [(set f32:$dst, (fneg f32:$src))]>;
 def FABSS : F3_3<2, 0b110100, 0b000001001, 
                  (outs FPRegs:$dst), (ins FPRegs:$src),
                  "fabss $src, $dst",
-                 [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+                 [(set f32:$dst, (fabs f32:$src))]>;
 
 
 // Floating-point Square Root Instructions, p.145
 def FSQRTS : F3_3<2, 0b110100, 0b000101001, 
                   (outs FPRegs:$dst), (ins FPRegs:$src),
                   "fsqrts $src, $dst",
-                  [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+                  [(set f32:$dst, (fsqrt f32:$src))]>;
 def FSQRTD : F3_3<2, 0b110100, 0b000101010, 
                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
                   "fsqrtd $src, $dst",
-                  [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+                  [(set f64:$dst, (fsqrt f64:$src))]>;
 
 
 
@@ -639,42 +641,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010,
 def FADDS  : F3_3<2, 0b110100, 0b001000001,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fadds $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>;
 def FADDD  : F3_3<2, 0b110100, 0b001000010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "faddd $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
 def FSUBS  : F3_3<2, 0b110100, 0b001000101,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fsubs $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>;
 def FSUBD  : F3_3<2, 0b110100, 0b001000110,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "fsubd $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
 
 // Floating-point Multiply and Divide Instructions, p. 147
 def FMULS  : F3_3<2, 0b110100, 0b001001001,
                   (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fmuls $src1, $src2, $dst",
-                  [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+                  [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>;
 def FMULD  : F3_3<2, 0b110100, 0b001001010,
                   (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                   "fmuld $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+                  [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
 def FSMULD : F3_3<2, 0b110100, 0b001101001,
                   (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                   "fsmuld $src1, $src2, $dst",
-                  [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
-                                            (fextend FPRegs:$src2)))]>;
+                  [(set f64:$dst, (fmul (fextend f32:$src1),
+                                        (fextend f32:$src2)))]>;
 def FDIVS  : F3_3<2, 0b110100, 0b001001101,
                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
                  "fdivs $src1, $src2, $dst",
-                 [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+                 [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>;
 def FDIVD  : F3_3<2, 0b110100, 0b001001110,
                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
                  "fdivd $src1, $src2, $dst",
-                 [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+                 [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
 
 // Floating-point Compare Instructions, p. 148
 // Note: the 2nd template arg is different for these guys.
@@ -685,11 +687,11 @@ let Defs = [FCC] in {
   def FCMPS  : F3_3<2, 0b110101, 0b001010001,
                    (outs), (ins FPRegs:$src1, FPRegs:$src2),
                    "fcmps $src1, $src2\n\tnop",
-                   [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+                   [(SPcmpfcc f32:$src1, f32:$src2)]>;
   def FCMPD  : F3_3<2, 0b110101, 0b001010010,
                    (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
                    "fcmpd $src1, $src2\n\tnop",
-                   [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+                   [(SPcmpfcc f64:$src1, f64:$src2)]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -704,52 +706,45 @@ let Predicates = [HasV9], Constraints = "$T = $dst" in {
     def MOVICCrr
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
                "mov$cc %icc, $F, $dst",
-               [(set IntRegs:$dst,
-                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>;
     def MOVICCri
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
                "mov$cc %icc, $F, $dst",
-               [(set IntRegs:$dst,
-                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>;
   }
 
   let Uses = [FCC] in {
     def MOVFCCrr
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
                "mov$cc %fcc0, $F, $dst",
-               [(set IntRegs:$dst,
-                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>;
     def MOVFCCri
       : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
                "mov$cc %fcc0, $F, $dst",
-               [(set IntRegs:$dst,
-                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+               [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>;
   }
 
   let Uses = [ICC] in {
     def FMOVS_ICC
       : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
                "fmovs$cc %icc, $F, $dst",
-               [(set FPRegs:$dst,
-                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+               [(set f32:$dst,
+                           (SPselecticc f32:$F, f32:$T, imm:$cc))]>;
     def FMOVD_ICC
       : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
                "fmovd$cc %icc, $F, $dst",
-               [(set DFPRegs:$dst,
-                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+               [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>;
   }
 
   let Uses = [FCC] in {
     def FMOVS_FCC
       : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
                "fmovs$cc %fcc0, $F, $dst",
-               [(set FPRegs:$dst,
-                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+               [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>;
     def FMOVD_FCC
       : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
                "fmovd$cc %fcc0, $F, $dst",
-               [(set DFPRegs:$dst,
-                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+               [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>;
   }
 
 }
@@ -762,11 +757,11 @@ let Predicates = [HasV9] in {
   def FNEGD : F3_3<2, 0b110100, 0b000000110, 
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fnegd $src, $dst",
-                   [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+                   [(set f64:$dst, (fneg f64:$src))]>;
   def FABSD : F3_3<2, 0b110100, 0b000001010, 
                    (outs DFPRegs:$dst), (ins DFPRegs:$src),
                    "fabsd $src, $dst",
-                   [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+                   [(set f64:$dst, (fabs f64:$src))]>;
 }
 
 // POPCrr - This does a ctpop of a 64-bit register.  As such, we have to clear
@@ -774,8 +769,8 @@ let Predicates = [HasV9] in {
 def POPCrr : F3_1<2, 0b101110, 
                   (outs IntRegs:$dst), (ins IntRegs:$src),
                   "popc $src, $dst", []>, Requires<[HasV9]>;
-def : Pat<(ctpop IntRegs:$src),
-          (POPCrr (SLLri IntRegs:$src, 0))>;
+def : Pat<(ctpop i32:$src),
+          (POPCrr (SLLri $src, 0))>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -783,28 +778,28 @@ def : Pat<(ctpop IntRegs:$src),
 
 // Small immediates.
 def : Pat<(i32 simm13:$val),
-          (ORri G0, imm:$val)>;
+          (ORri (i32 G0), imm:$val)>;
 // Arbitrary immediates.
 def : Pat<(i32 imm:$val),
           (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
 
 // subc
-def : Pat<(subc IntRegs:$b, IntRegs:$c),
-          (SUBCCrr IntRegs:$b, IntRegs:$c)>;
-def : Pat<(subc IntRegs:$b, simm13:$val),
-          (SUBCCri IntRegs:$b, imm:$val)>;
+def : Pat<(subc i32:$b, i32:$c),
+          (SUBCCrr $b, $c)>;
+def : Pat<(subc i32:$b, simm13:$val),
+          (SUBCCri $b, imm:$val)>;
 
 // Global addresses, constant pool entries
 def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
-def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
 def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
-def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
 
 // Add reg, lo.  This is used when taking the addr of a global/constpool entry.
-def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
-          (ADDri IntRegs:$r, tglobaladdr:$in)>;
-def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
-          (ADDri IntRegs:$r, tconstpool:$in)>;
+def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
+          (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
+          (ADDri $r, tconstpool:$in)>;
 
 // Calls: 
 def : Pat<(call tglobaladdr:$dst),
@@ -823,3 +818,5 @@ def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
 // zextload bool -> zextload byte
 def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
 def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+
+include "SparcInstr64Bit.td"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index ff8d3c533f3d..db9b30eb4330 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -14,14 +14,14 @@
 #include "SparcRegisterInfo.h"
 #include "Sparc.h"
 #include "SparcSubtarget.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "SparcGenRegisterInfo.inc"
@@ -56,45 +56,33 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void SparcRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
-  DebugLoc dl = MI.getDebugLoc();
-  int Size = MI.getOperand(0).getImm();
-  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
-    Size = -Size;
-  if (Size)
-    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
-  MBB.erase(I);
+const TargetRegisterClass*
+SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                      unsigned Kind) const {
+  return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
 }
 
 void
 SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                       int SPAdj, RegScavenger *RS) const {
+                                       int SPAdj, unsigned FIOperandNum,
+                                       RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
-  unsigned i = 0;
   MachineInstr &MI = *II;
   DebugLoc dl = MI.getDebugLoc();
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MI.getOperand(i+1).getImm();
+               MI.getOperand(FIOperandNum + 1).getImm();
 
   // Replace frame index with a frame pointer reference.
   if (Offset >= -4096 && Offset <= 4095) {
     // If the offset is small enough to fit in the immediate field, directly
     // encode it.
-    MI.getOperand(i).ChangeToRegister(SP::I6, false);
-    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
   } else {
     // Otherwise, emit a G1 = SETHI %hi(offset).  FIXME: it would be better to 
     // scavenge a register here instead of reserving G1 all of the time.
@@ -104,8 +92,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
       .addReg(SP::I6);
     // Insert: G1+%lo(offset) into the user.
-    MI.getOperand(i).ChangeToRegister(SP::G1, false);
-    MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+    MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
   }
 }
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 9515ad33dcc2..f91df5398953 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -36,14 +36,15 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
+  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+                                                unsigned Kind) const;
 
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
-  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                       RegScavenger *RS = NULL) const;
 
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 81bff6c51c9d..497e7c5d5612 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -43,7 +43,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
 }
 
 // Control Registers
-def ICC : SparcCtrlReg<"ICC">;
+def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
 def FCC : SparcCtrlReg<"FCC">;
 
 // Y register
@@ -140,7 +140,10 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"SP", [i32], 32,
+// This register class should not be used to hold i64 values, use the I64Regs
+// register class for that. The i64 type is included here to allow i64 patterns
+// using the integer instructions.
+def IntRegs : RegisterClass<"SP", [i32, i64], 32,
                             (add L0, L1, L2, L3, L4, L5, L6,
                                  L7, I0, I1, I2, I3, I4, I5,
                                  O0, O1, O2, O3, O4, O5, O7,
@@ -155,6 +158,13 @@ def IntRegs : RegisterClass<"SP", [i32], 32,
                                  G5, G6, G7 // reserved for kernel
                                  )>;
 
+// Register class for 64-bit mode, with a 64-bit spill slot size.
+// These are the same as the 32-bit registers, so TableGen will consider this
+// to be a sub-class of IntRegs. That works out because requiring a 64-bit
+// spill slot is a stricter constraint than only requiring a 32-bit spill slot.
+def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
+
+// Floating point register classes.
 def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
 
 def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 45c962471dda..60bceb708fbc 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -12,8 +12,8 @@
 
 #include "SparcTargetMachine.h"
 #include "Sparc.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
     DL(Subtarget.getDataLayout()),
     InstrInfo(Subtarget),
     TLInfo(*this), TSInfo(*this),
-    FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) {
+    FrameLowering(Subtarget) {
 }
 
 namespace {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 0fbe2d7cda36..081075de2dc8 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,15 +14,14 @@
 #ifndef SPARCTARGETMACHINE_H
 #define SPARCTARGETMACHINE_H
 
-#include "SparcInstrInfo.h"
-#include "SparcISelLowering.h"
 #include "SparcFrameLowering.h"
+#include "SparcISelLowering.h"
+#include "SparcInstrInfo.h"
 #include "SparcSelectionDAGInfo.h"
 #include "SparcSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
@@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcTargetLowering TLInfo;
   SparcSelectionDAGInfo TSInfo;
   SparcFrameLowering FrameLowering;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 public:
   SparcTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -55,12 +52,6 @@ public:
   virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
     return &TSInfo;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
 
   // Pass Pipeline Configuration
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index c9d5b7bdfb3d..bb714632349a 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Sparc.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 393178a4692e..9a78ebc3facb 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -14,11 +14,11 @@
 
 #include "llvm-c/Target.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/LLVMContext.h"
 #include <cstring>
 
 using namespace llvm;
@@ -26,7 +26,6 @@ using namespace llvm;
 void llvm::initializeTarget(PassRegistry &Registry) {
   initializeDataLayoutPass(Registry);
   initializeTargetLibraryInfoPass(Registry);
-  initializeTargetTransformInfoPass(Registry);
 }
 
 void LLVMInitializeTarget(LLVMPassRegistryRef R) {
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
deleted file mode 100644
index f1d1d07c38ae..000000000000
--- a/lib/Target/TargetInstrInfo.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cctype>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//  TargetInstrInfo
-//
-// Methods that depend on CodeGen are implemented in
-// TargetInstrInfoImpl.cpp. Invoking them without linking libCodeGen raises a
-// link error.
-// ===----------------------------------------------------------------------===//
-
-TargetInstrInfo::~TargetInstrInfo() {
-}
-
-const TargetRegisterClass*
-TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
-                             const TargetRegisterInfo *TRI,
-                             const MachineFunction &MF) const {
-  if (OpNum >= MCID.getNumOperands())
-    return 0;
-
-  short RegClass = MCID.OpInfo[OpNum].RegClass;
-  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
-    return TRI->getPointerRegClass(MF, RegClass);
-
-  // Instructions like INSERT_SUBREG do not have fixed register classes.
-  if (RegClass < 0)
-    return 0;
-
-  // Otherwise just look it up normally.
-  return TRI->getRegClass(RegClass);
-}
-
-/// insertNoop - Insert a noop into the instruction stream at the specified
-/// point.
-void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI) const {
-  llvm_unreachable("Target didn't implement insertNoop!");
-}
-
-/// Measure the specified inline asm to determine an approximation of its
-/// length.
-/// Comments (which run till the next SeparatorString or newline) do not
-/// count as an instruction.
-/// Any other non-whitespace text is considered an instruction, with
-/// multiple instructions separated by SeparatorString or newlines.
-/// Variable-length instructions are not handled here; this function
-/// may be overloaded in the target code to do that.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
-                                             const MCAsmInfo &MAI) const {
-
-
-  // Count the number of instructions in the asm.
-  bool atInsnStart = true;
-  unsigned Length = 0;
-  for (; *Str; ++Str) {
-    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
-                                strlen(MAI.getSeparatorString())) == 0)
-      atInsnStart = true;
-    if (atInsnStart && !std::isspace(*Str)) {
-      Length += MAI.getMaxInstLength();
-      atInsnStart = false;
-    }
-    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
-                               strlen(MAI.getCommentString())) == 0)
-      atInsnStart = false;
-  }
-
-  return Length;
-}
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
index e049a1d3b62f..64bd56f6e7df 100644
--- a/lib/Target/TargetIntrinsicInfo.cpp
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
 using namespace llvm;
 
 TargetIntrinsicInfo::TargetIntrinsicInfo() {
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 6d4eab12045c..ee88ce77c09f 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,6 +24,8 @@ void TargetLibraryInfo::anchor() { }
 
 const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
   {
+    "_IO_getc",
+    "_IO_putc",
     "_ZdaPv",
     "_ZdlPv",
     "_Znaj",
@@ -38,7 +40,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "__cxa_guard_abort",
     "__cxa_guard_acquire",
     "__cxa_guard_release",
+    "__isoc99_scanf",
+    "__isoc99_sscanf",
     "__memcpy_chk",
+    "__strdup",
+    "__strndup",
+    "__strtok_r",
+    "abs",
+    "access",
     "acos",
     "acosf",
     "acosh",
@@ -60,6 +69,13 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "atanhf",
     "atanhl",
     "atanl",
+    "atof",
+    "atoi",
+    "atol",
+    "atoll",
+    "bcmp",
+    "bcopy",
+    "bzero",
     "calloc",
     "cbrt",
     "cbrtf",
@@ -67,6 +83,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "ceil",
     "ceilf",
     "ceill",
+    "chmod",
+    "chown",
+    "clearerr",
+    "closedir",
     "copysign",
     "copysignf",
     "copysignl",
@@ -76,6 +96,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "coshf",
     "coshl",
     "cosl",
+    "ctermid",
     "exp",
     "exp10",
     "exp10f",
@@ -91,18 +112,67 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "fabs",
     "fabsf",
     "fabsl",
+    "fclose",
+    "fdopen",
+    "feof",
+    "ferror",
+    "fflush",
+    "ffs",
+    "ffsl",
+    "ffsll",
+    "fgetc",
+    "fgetpos",
+    "fgets",
+    "fileno",
     "fiprintf",
+    "flockfile",
     "floor",
     "floorf",
     "floorl",
     "fmod",
     "fmodf",
     "fmodl",
+    "fopen",
+    "fopen64",
+    "fprintf",
     "fputc",
     "fputs",
+    "fread",
     "free",
+    "frexp",
+    "frexpf",
+    "frexpl",
+    "fscanf",
+    "fseek",
+    "fseeko",
+    "fseeko64",
+    "fsetpos",
+    "fstat",
+    "fstat64",
+    "fstatvfs",
+    "fstatvfs64",
+    "ftell",
+    "ftello",
+    "ftello64",
+    "ftrylockfile",
+    "funlockfile",
     "fwrite",
+    "getc",
+    "getc_unlocked",
+    "getchar",
+    "getenv",
+    "getitimer",
+    "getlogin_r",
+    "getpwnam",
+    "gets",
+    "htonl",
+    "htons",
     "iprintf",
+    "isascii",
+    "isdigit",
+    "labs",
+    "lchown",
+    "llabs",
     "log",
     "log10",
     "log10f",
@@ -118,30 +188,64 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "logbl",
     "logf",
     "logl",
+    "lstat",
+    "lstat64",
     "malloc",
+    "memalign",
+    "memccpy",
     "memchr",
     "memcmp",
     "memcpy",
     "memmove",
+    "memrchr",
     "memset",
     "memset_pattern16",
+    "mkdir",
+    "mktime",
+    "modf",
+    "modff",
+    "modfl",
     "nearbyint",
     "nearbyintf",
     "nearbyintl",
+    "ntohl",
+    "ntohs",
+    "open",
+    "open64",
+    "opendir",
+    "pclose",
+    "perror",
+    "popen",
     "posix_memalign",
     "pow",
     "powf",
     "powl",
+    "pread",
+    "printf",
+    "putc",
     "putchar",
     "puts",
+    "pwrite",
+    "qsort",
+    "read",
+    "readlink",
     "realloc",
     "reallocf",
+    "realpath",
+    "remove",
+    "rename",
+    "rewind",
     "rint",
     "rintf",
     "rintl",
+    "rmdir",
     "round",
     "roundf",
     "roundl",
+    "scanf",
+    "setbuf",
+    "setitimer",
+    "setvbuf",
     "sin",
     "sinf",
     "sinh",
@@ -149,17 +253,28 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "sinhl",
     "sinl",
     "siprintf",
+    "snprintf",
+    "sprintf",
     "sqrt",
     "sqrtf",
     "sqrtl",
+    "sscanf",
+    "stat",
+    "stat64",
+    "statvfs",
+    "statvfs64",
     "stpcpy",
+    "stpncpy",
+    "strcasecmp",
     "strcat",
     "strchr",
     "strcmp",
+    "strcoll",
     "strcpy",
     "strcspn",
     "strdup",
     "strlen",
+    "strncasecmp",
     "strncat",
     "strncmp",
     "strncpy",
@@ -171,21 +286,43 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "strstr",
     "strtod",
     "strtof",
+    "strtok",
+    "strtok_r",
     "strtol",
     "strtold",
     "strtoll",
     "strtoul",
     "strtoull",
+    "strxfrm",
+    "system",
     "tan",
     "tanf",
     "tanh",
     "tanhf",
     "tanhl",
     "tanl",
+    "times",
+    "tmpfile",
+    "tmpfile64",
+    "toascii",
     "trunc",
     "truncf",
     "truncl",
-    "valloc"
+    "uname",
+    "ungetc",
+    "unlink",
+    "unsetenv",
+    "utime",
+    "utimes",
+    "valloc",
+    "vfprintf",
+    "vfscanf",
+    "vprintf",
+    "vscanf",
+    "vsnprintf",
+    "vsprintf",
+    "vsscanf",
+    "write"
   };
 
 /// initialize - Initialize the set of available library functions based on the
@@ -247,7 +384,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc::fabsl);
     TLI.setUnavailable(LibFunc::floorl);
     TLI.setUnavailable(LibFunc::fmodl);
+    TLI.setUnavailable(LibFunc::frexpl);
     TLI.setUnavailable(LibFunc::logl);
+    TLI.setUnavailable(LibFunc::modfl);
     TLI.setUnavailable(LibFunc::powl);
     TLI.setUnavailable(LibFunc::sinl);
     TLI.setUnavailable(LibFunc::sinhl);
@@ -324,9 +463,116 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
       TLI.setUnavailable(LibFunc::tanhf);
     }
 
-    // Win32 does *not* provide stpcpy.  It is provided on POSIX systems:
-    // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
+    // Win32 does *not* provide provide these functions, but they are
+    // generally available on POSIX-compliant systems:
+    TLI.setUnavailable(LibFunc::access);
+    TLI.setUnavailable(LibFunc::bcmp);
+    TLI.setUnavailable(LibFunc::bcopy);
+    TLI.setUnavailable(LibFunc::bzero);
+    TLI.setUnavailable(LibFunc::chmod);
+    TLI.setUnavailable(LibFunc::chown);
+    TLI.setUnavailable(LibFunc::closedir);
+    TLI.setUnavailable(LibFunc::ctermid);
+    TLI.setUnavailable(LibFunc::fdopen);
+    TLI.setUnavailable(LibFunc::ffs);
+    TLI.setUnavailable(LibFunc::fileno);
+    TLI.setUnavailable(LibFunc::flockfile);
+    TLI.setUnavailable(LibFunc::fseeko);
+    TLI.setUnavailable(LibFunc::fstat);
+    TLI.setUnavailable(LibFunc::fstatvfs);
+    TLI.setUnavailable(LibFunc::ftello);
+    TLI.setUnavailable(LibFunc::ftrylockfile);
+    TLI.setUnavailable(LibFunc::funlockfile);
+    TLI.setUnavailable(LibFunc::getc_unlocked);
+    TLI.setUnavailable(LibFunc::getitimer);
+    TLI.setUnavailable(LibFunc::getlogin_r);
+    TLI.setUnavailable(LibFunc::getpwnam);
+    TLI.setUnavailable(LibFunc::htonl);
+    TLI.setUnavailable(LibFunc::htons);
+    TLI.setUnavailable(LibFunc::lchown);
+    TLI.setUnavailable(LibFunc::lstat);
+    TLI.setUnavailable(LibFunc::memccpy);
+    TLI.setUnavailable(LibFunc::mkdir);
+    TLI.setUnavailable(LibFunc::ntohl);
+    TLI.setUnavailable(LibFunc::ntohs);
+    TLI.setUnavailable(LibFunc::open);
+    TLI.setUnavailable(LibFunc::opendir);
+    TLI.setUnavailable(LibFunc::pclose);
+    TLI.setUnavailable(LibFunc::popen);
+    TLI.setUnavailable(LibFunc::pread);
+    TLI.setUnavailable(LibFunc::pwrite);
+    TLI.setUnavailable(LibFunc::read);
+    TLI.setUnavailable(LibFunc::readlink);
+    TLI.setUnavailable(LibFunc::realpath);
+    TLI.setUnavailable(LibFunc::rmdir);
+    TLI.setUnavailable(LibFunc::setitimer);
+    TLI.setUnavailable(LibFunc::stat);
+    TLI.setUnavailable(LibFunc::statvfs);
     TLI.setUnavailable(LibFunc::stpcpy);
+    TLI.setUnavailable(LibFunc::stpncpy);
+    TLI.setUnavailable(LibFunc::strcasecmp);
+    TLI.setUnavailable(LibFunc::strncasecmp);
+    TLI.setUnavailable(LibFunc::times);
+    TLI.setUnavailable(LibFunc::uname);
+    TLI.setUnavailable(LibFunc::unlink);
+    TLI.setUnavailable(LibFunc::unsetenv);
+    TLI.setUnavailable(LibFunc::utime);
+    TLI.setUnavailable(LibFunc::utimes);
+    TLI.setUnavailable(LibFunc::write);
+
+    // Win32 does *not* provide provide these functions, but they are
+    // specified by C99:
+    TLI.setUnavailable(LibFunc::atoll);
+    TLI.setUnavailable(LibFunc::frexpf);
+    TLI.setUnavailable(LibFunc::llabs);
+  }
+
+  // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+  // Linux (GLIBC):
+  // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+  // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+  // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+  switch (T.getOS()) {
+  case Triple::Darwin:
+  case Triple::MacOSX:
+  case Triple::IOS:
+  case Triple::FreeBSD:
+  case Triple::Linux:
+    break;
+  default:
+    TLI.setUnavailable(LibFunc::ffsl);
+  }
+
+  // ffsll is available on at least FreeBSD and Linux (GLIBC):
+  // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+  // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+  switch (T.getOS()) {
+  case Triple::FreeBSD:
+  case Triple::Linux:
+    break;
+  default:
+    TLI.setUnavailable(LibFunc::ffsll);
+  }
+
+  // The following functions are available on at least Linux:
+  if (T.getOS() != Triple::Linux) {
+    TLI.setUnavailable(LibFunc::dunder_strdup);
+    TLI.setUnavailable(LibFunc::dunder_strtok_r);
+    TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+    TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+    TLI.setUnavailable(LibFunc::under_IO_getc);
+    TLI.setUnavailable(LibFunc::under_IO_putc);
+    TLI.setUnavailable(LibFunc::memalign);
+    TLI.setUnavailable(LibFunc::fopen64);
+    TLI.setUnavailable(LibFunc::fseeko64);
+    TLI.setUnavailable(LibFunc::fstat64);
+    TLI.setUnavailable(LibFunc::fstatvfs64);
+    TLI.setUnavailable(LibFunc::ftello64);
+    TLI.setUnavailable(LibFunc::lstat64);
+    TLI.setUnavailable(LibFunc::open64);
+    TLI.setUnavailable(LibFunc::stat64);
+    TLI.setUnavailable(LibFunc::statvfs64);
+    TLI.setUnavailable(LibFunc::tmpfile64);
   }
 }
 
@@ -351,11 +597,40 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
   CustomNames = TLI.CustomNames;
 }
 
+namespace {
+struct StringComparator {
+  /// Compare two strings and return true if LHS is lexicographically less than
+  /// RHS. Requires that RHS doesn't contain any zero bytes.
+  bool operator()(const char *LHS, StringRef RHS) const {
+    // Compare prefixes with strncmp. If prefixes match we know that LHS is
+    // greater or equal to RHS as RHS can't contain any '\0'.
+    return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+  }
+
+  // Provided for compatibility with MSVC's debug mode.
+  bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
+  bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+  bool operator()(const char *LHS, const char *RHS) const {
+    return std::strcmp(LHS, RHS) < 0;
+  }
+};
+}
+
 bool TargetLibraryInfo::getLibFunc(StringRef funcName,
                                    LibFunc::Func &F) const {
   const char **Start = &StandardNames[0];
   const char **End = &StandardNames[LibFunc::NumLibFuncs];
-  const char **I = std::lower_bound(Start, End, funcName);
+
+  // Filter out empty names and names containing null bytes, those can't be in
+  // our table.
+  if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+    return false;
+
+  // Check for \01 prefix that is used to mangle __asm declarations and
+  // strip it if present.
+  if (funcName.front() == '\01')
+    funcName = funcName.substr(1);
+  const char **I = std::lower_bound(Start, End, funcName, StringComparator());
   if (I != End && *I == funcName) {
     F = (LibFunc::Func)(I - Start);
     return true;
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 9d7e2b825f41..f5121e34f77f 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -13,21 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -285,35 +285,35 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
   return DataSection;
 }
 
-/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// getTTypeGlobalReference - Return an MCExpr to use for a
 /// reference to the specified global variable from exception
 /// handling information.
 const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                               MachineModuleInfo *MMI, unsigned Encoding,
-                               MCStreamer &Streamer) const {
-  const MCSymbol *Sym = Mang->getSymbol(GV);
-  return getExprForDwarfReference(Sym, Encoding, Streamer);
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
+  const MCSymbolRefExpr *Ref =
+    MCSymbolRefExpr::Create(Mang->getSymbol(GV), getContext());
+
+  return getTTypeReference(Ref, Encoding, Streamer);
 }
 
 const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
-                         MCStreamer &Streamer) const {
-  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
-
+getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+                  MCStreamer &Streamer) const {
   switch (Encoding & 0x70) {
   default:
     report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
     // Do nothing special
-    return Res;
+    return Sym;
   case dwarf::DW_EH_PE_pcrel: {
     // Emit a label to the streamer for the current position.  This gives us
     // .-foo addressing.
     MCSymbol *PCSym = getContext().CreateTempSymbol();
     Streamer.EmitLabel(PCSym);
     const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
-    return MCBinaryExpr::CreateSub(Res, PC, getContext());
+    return MCBinaryExpr::CreateSub(Sym, PC, getContext());
   }
   }
 }
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 382571982b96..e7282519d597 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,12 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
@@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() {
   delete AsmInfo;
 }
 
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+  const Function *F = MF->getFunction();
+  TargetOptions &TO = MF->getTarget().Options;
+  
+#define RESET_OPTION(X, Y)                                              \
+  do {                                                                  \
+    if (F->hasFnAttribute(Y))                                           \
+      TO.X =                                                            \
+        (F->getAttributes().                                            \
+           getAttribute(AttributeSet::FunctionIndex,                    \
+                        Y).getValueAsString() == "true");               \
+  } while (0)
+
+  RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+  RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+  RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+  RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+  RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+  RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+  RESET_OPTION(UseSoftFloat, "use-soft-float");
+  RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
 /// getRelocationModel - Returns the code generation relocation model. The
 /// choices are static, PIC, and dynamic-no-pic, and target default.
 Reloc::Model TargetMachine::getRelocationModel() const {
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index f69c2abd50d2..79f74bd66127 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm-c/TargetMachine.h"
 #include "llvm-c/Core.h"
 #include "llvm-c/Target.h"
-#include "llvm-c/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
@@ -184,7 +184,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
   }
 
   if (TM->addPassesToEmitFile(pass, destf, ft)) {
-    error = "No DataLayout in TargetMachine";
+    error = "TargetMachine can't emit a file of this type";
     *ErrorMessage = strdup(error.c_str());
     return true;
   }
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
deleted file mode 100644
index be8b58289039..000000000000
--- a/lib/Target/TargetRegisterInfo.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetRegisterInfo interface.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
-                             regclass_iterator RCB, regclass_iterator RCE,
-                             const char *const *SRINames,
-                             const unsigned *SRILaneMasks)
-  : InfoDesc(ID), SubRegIndexNames(SRINames),
-    SubRegIndexLaneMasks(SRILaneMasks),
-    RegClassBegin(RCB), RegClassEnd(RCE) {
-}
-
-TargetRegisterInfo::~TargetRegisterInfo() {}
-
-void PrintReg::print(raw_ostream &OS) const {
-  if (!Reg)
-    OS << "%noreg";
-  else if (TargetRegisterInfo::isStackSlot(Reg))
-    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
-  else if (TargetRegisterInfo::isVirtualRegister(Reg))
-    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
-  else if (TRI && Reg < TRI->getNumRegs())
-    OS << '%' << TRI->getName(Reg);
-  else
-    OS << "%physreg" << Reg;
-  if (SubIdx) {
-    if (TRI)
-      OS << ':' << TRI->getSubRegIndexName(SubIdx);
-    else
-      OS << ":sub(" << SubIdx << ')';
-  }
-}
-
-void PrintRegUnit::print(raw_ostream &OS) const {
-  // Generic printout when TRI is missing.
-  if (!TRI) {
-    OS << "Unit~" << Unit;
-    return;
-  }
-
-  // Check for invalid register units.
-  if (Unit >= TRI->getNumRegUnits()) {
-    OS << "BadUnit~" << Unit;
-    return;
-  }
-
-  // Normal units have at least one root.
-  MCRegUnitRootIterator Roots(Unit, TRI);
-  assert(Roots.isValid() && "Unit has no roots.");
-  OS << TRI->getName(*Roots);
-  for (++Roots; Roots.isValid(); ++Roots)
-    OS << '~' << TRI->getName(*Roots);
-}
-
-/// getAllocatableClass - Return the maximal subclass of the given register
-/// class that is alloctable, or NULL.
-const TargetRegisterClass *
-TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
-  if (!RC || RC->isAllocatable())
-    return RC;
-
-  const unsigned *SubClass = RC->getSubClassMask();
-  for (unsigned Base = 0, BaseE = getNumRegClasses();
-       Base < BaseE; Base += 32) {
-    unsigned Idx = Base;
-    for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
-      unsigned Offset = CountTrailingZeros_32(Mask);
-      const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
-      if (SubRC->isAllocatable())
-        return SubRC;
-      Mask >>= Offset;
-      Idx += Offset + 1;
-    }
-  }
-  return NULL;
-}
-
-/// getMinimalPhysRegClass - Returns the Register Class of a physical
-/// register of the given type, picking the most sub register class of
-/// the right type that contains this physreg.
-const TargetRegisterClass *
-TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
-  assert(isPhysicalRegister(reg) && "reg must be a physical register");
-
-  // Pick the most sub register class of the right type that contains
-  // this physreg.
-  const TargetRegisterClass* BestRC = 0;
-  for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
-    const TargetRegisterClass* RC = *I;
-    if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
-        (!BestRC || BestRC->hasSubClass(RC)))
-      BestRC = RC;
-  }
-
-  assert(BestRC && "Couldn't find the register class");
-  return BestRC;
-}
-
-/// getAllocatableSetForRC - Toggle the bits that represent allocatable
-/// registers for the specific register class.
-static void getAllocatableSetForRC(const MachineFunction &MF,
-                                   const TargetRegisterClass *RC, BitVector &R){
-  assert(RC->isAllocatable() && "invalid for nonallocatable sets");
-  ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
-  for (unsigned i = 0; i != Order.size(); ++i)
-    R.set(Order[i]);
-}
-
-BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
-                                          const TargetRegisterClass *RC) const {
-  BitVector Allocatable(getNumRegs());
-  if (RC) {
-    // A register class with no allocatable subclass returns an empty set.
-    const TargetRegisterClass *SubClass = getAllocatableClass(RC);
-    if (SubClass)
-      getAllocatableSetForRC(MF, SubClass, Allocatable);
-  } else {
-    for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
-         E = regclass_end(); I != E; ++I)
-      if ((*I)->isAllocatable())
-        getAllocatableSetForRC(MF, *I, Allocatable);
-  }
-
-  // Mask out the reserved registers
-  BitVector Reserved = getReservedRegs(MF);
-  Allocatable &= Reserved.flip();
-
-  return Allocatable;
-}
-
-static inline
-const TargetRegisterClass *firstCommonClass(const uint32_t *A,
-                                            const uint32_t *B,
-                                            const TargetRegisterInfo *TRI) {
-  for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
-    if (unsigned Common = *A++ & *B++)
-      return TRI->getRegClass(I + CountTrailingZeros_32(Common));
-  return 0;
-}
-
-const TargetRegisterClass *
-TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
-                                      const TargetRegisterClass *B) const {
-  // First take care of the trivial cases.
-  if (A == B)
-    return A;
-  if (!A || !B)
-    return 0;
-
-  // Register classes are ordered topologically, so the largest common
-  // sub-class it the common sub-class with the smallest ID.
-  return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
-}
-
-const TargetRegisterClass *
-TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
-                                             const TargetRegisterClass *B,
-                                             unsigned Idx) const {
-  assert(A && B && "Missing register class");
-  assert(Idx && "Bad sub-register index");
-
-  // Find Idx in the list of super-register indices.
-  for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
-    if (RCI.getSubReg() == Idx)
-      // The bit mask contains all register classes that are projected into B
-      // by Idx. Find a class that is also a sub-class of A.
-      return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
-  return 0;
-}
-
-const TargetRegisterClass *TargetRegisterInfo::
-getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
-                       const TargetRegisterClass *RCB, unsigned SubB,
-                       unsigned &PreA, unsigned &PreB) const {
-  assert(RCA && SubA && RCB && SubB && "Invalid arguments");
-
-  // Search all pairs of sub-register indices that project into RCA and RCB
-  // respectively. This is quadratic, but usually the sets are very small. On
-  // most targets like X86, there will only be a single sub-register index
-  // (e.g., sub_16bit projecting into GR16).
-  //
-  // The worst case is a register class like DPR on ARM.
-  // We have indices dsub_0..dsub_7 projecting into that class.
-  //
-  // It is very common that one register class is a sub-register of the other.
-  // Arrange for RCA to be the larger register so the answer will be found in
-  // the first iteration. This makes the search linear for the most common
-  // case.
-  const TargetRegisterClass *BestRC = 0;
-  unsigned *BestPreA = &PreA;
-  unsigned *BestPreB = &PreB;
-  if (RCA->getSize() < RCB->getSize()) {
-    std::swap(RCA, RCB);
-    std::swap(SubA, SubB);
-    std::swap(BestPreA, BestPreB);
-  }
-
-  // Also terminate the search one we have found a register class as small as
-  // RCA.
-  unsigned MinSize = RCA->getSize();
-
-  for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
-    unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
-    for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
-      // Check if a common super-register class exists for this index pair.
-      const TargetRegisterClass *RC =
-        firstCommonClass(IA.getMask(), IB.getMask(), this);
-      if (!RC || RC->getSize() < MinSize)
-        continue;
-
-      // The indexes must compose identically: PreA+SubA == PreB+SubB.
-      unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
-      if (FinalA != FinalB)
-        continue;
-
-      // Is RC a better candidate than BestRC?
-      if (BestRC && RC->getSize() >= BestRC->getSize())
-        continue;
-
-      // Yes, RC is the smallest super-register seen so far.
-      BestRC = RC;
-      *BestPreA = IA.getSubReg();
-      *BestPreB = IB.getSubReg();
-
-      // Bail early if we reached MinSize. We won't find a better candidate.
-      if (BestRC->getSize() == MinSize)
-        return BestRC;
-    }
-  }
-  return BestRC;
-}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 59ffdea00ea6..af0cef62d552 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -22,6 +22,10 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
 
 TargetSubtargetInfo::~TargetSubtargetInfo() {}
 
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+  return false;
+}
+
 bool TargetSubtargetInfo::enablePostRAScheduler(
           CodeGenOpt::Level OptLevel,
           AntiDepBreakMode& Mode,
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
deleted file mode 100644
index b36e6f858f72..000000000000
--- a/lib/Target/TargetTransformImpl.cpp
+++ /dev/null
@@ -1,353 +0,0 @@
-// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetLowering.h"
-#include <utility>
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by scalar transformations.
-//
-//===----------------------------------------------------------------------===//
-
-bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const {
-  return TLI->isLegalAddImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const {
-  return TLI->isLegalICmpImmediate(imm);
-}
-
-bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM,
-                                                      Type *Ty) const {
-  return TLI->isLegalAddressingMode(AM, Ty);
-}
-
-bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  return TLI->isTruncateFree(Ty1, Ty2);
-}
-
-bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const {
-  EVT T = TLI->getValueType(Ty);
-  return TLI->isTypeLegal(T);
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const {
-  return TLI->getJumpBufAlignment();
-}
-
-unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
-  return TLI->getJumpBufSize();
-}
-
-bool ScalarTargetTransformImpl::shouldBuildLookupTables() const {
-  return TLI->supportJumpTables() &&
-      (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-       TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
-}
-
-//===----------------------------------------------------------------------===//
-//
-// Calls used by the vectorizers.
-//
-//===----------------------------------------------------------------------===//
-int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
-  enum InstructionOpcodes {
-#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
-#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
-#include "llvm/Instruction.def"
-  };
-  switch (static_cast<InstructionOpcodes>(Opcode)) {
-  case Ret:            return 0;
-  case Br:             return 0;
-  case Switch:         return 0;
-  case IndirectBr:     return 0;
-  case Invoke:         return 0;
-  case Resume:         return 0;
-  case Unreachable:    return 0;
-  case Add:            return ISD::ADD;
-  case FAdd:           return ISD::FADD;
-  case Sub:            return ISD::SUB;
-  case FSub:           return ISD::FSUB;
-  case Mul:            return ISD::MUL;
-  case FMul:           return ISD::FMUL;
-  case UDiv:           return ISD::UDIV;
-  case SDiv:           return ISD::UDIV;
-  case FDiv:           return ISD::FDIV;
-  case URem:           return ISD::UREM;
-  case SRem:           return ISD::SREM;
-  case FRem:           return ISD::FREM;
-  case Shl:            return ISD::SHL;
-  case LShr:           return ISD::SRL;
-  case AShr:           return ISD::SRA;
-  case And:            return ISD::AND;
-  case Or:             return ISD::OR;
-  case Xor:            return ISD::XOR;
-  case Alloca:         return 0;
-  case Load:           return ISD::LOAD;
-  case Store:          return ISD::STORE;
-  case GetElementPtr:  return 0;
-  case Fence:          return 0;
-  case AtomicCmpXchg:  return 0;
-  case AtomicRMW:      return 0;
-  case Trunc:          return ISD::TRUNCATE;
-  case ZExt:           return ISD::ZERO_EXTEND;
-  case SExt:           return ISD::SIGN_EXTEND;
-  case FPToUI:         return ISD::FP_TO_UINT;
-  case FPToSI:         return ISD::FP_TO_SINT;
-  case UIToFP:         return ISD::UINT_TO_FP;
-  case SIToFP:         return ISD::SINT_TO_FP;
-  case FPTrunc:        return ISD::FP_ROUND;
-  case FPExt:          return ISD::FP_EXTEND;
-  case PtrToInt:       return ISD::BITCAST;
-  case IntToPtr:       return ISD::BITCAST;
-  case BitCast:        return ISD::BITCAST;
-  case ICmp:           return ISD::SETCC;
-  case FCmp:           return ISD::SETCC;
-  case PHI:            return 0;
-  case Call:           return 0;
-  case Select:         return ISD::SELECT;
-  case UserOp1:        return 0;
-  case UserOp2:        return 0;
-  case VAArg:          return 0;
-  case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
-  case InsertElement:  return ISD::INSERT_VECTOR_ELT;
-  case ShuffleVector:  return ISD::VECTOR_SHUFFLE;
-  case ExtractValue:   return ISD::MERGE_VALUES;
-  case InsertValue:    return ISD::MERGE_VALUES;
-  case LandingPad:     return 0;
-  }
-
-  llvm_unreachable("Unknown instruction type encountered!");
-}
-
-std::pair<unsigned, MVT>
-VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const {
-
-  LLVMContext &C = Ty->getContext();
-  EVT MTy = TLI->getValueType(Ty);
-
-  unsigned Cost = 1;
-  // We keep legalizing the type until we find a legal kind. We assume that
-  // the only operation that costs anything is the split. After splitting
-  // we need to handle two types.
-  while (true) {
-    TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, MTy);
-
-    if (LK.first == TargetLowering::TypeLegal)
-      return std::make_pair(Cost, MTy.getSimpleVT());
-
-    if (LK.first == TargetLowering::TypeSplitVector ||
-        LK.first == TargetLowering::TypeExpandInteger)
-      Cost *= 2;
-
-    // Keep legalizing the type.
-    MTy = LK.second;
-  }
-}
-
-unsigned
-VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty,
-                                                    bool Insert,
-                                                    bool Extract) const {
-  assert (Ty->isVectorTy() && "Can only scalarize vectors");
-  unsigned Cost = 0;
-
-  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
-    if (Insert)
-      Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
-    if (Extract)
-      Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
-  }
-
-  return Cost;
-}
-
-unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode,
-                                                           Type *Ty) const {
-  // Check if any of the operands are vector operands.
-  int ISD = InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
-
-  if (!TLI->isOperationExpand(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1. Multiply
-    // by the type-legalization overhead.
-    return LT.first * 1;
-  }
-
-  // Else, assume that we need to scalarize this op.
-  if (Ty->isVectorTy()) {
-    unsigned Num = Ty->getVectorNumElements();
-    unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
-    // return the cost of multiple scalar invocation plus the cost of inserting
-    // and extracting the values.
-    return getScalarizationOverhead(Ty, true, true) + Num * Cost;
-  }
-
-  // We don't know anything about this scalar instruction.
-  return 1;
-}
-
-unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const {
-  return 1;
-}
-
-unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                  Type *Src) const {
-  int ISD = InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  std::pair<unsigned, MVT> SrcLT = getTypeLegalizationCost(Src);
-  std::pair<unsigned, MVT> DstLT = getTypeLegalizationCost(Dst);
-
-  // Handle scalar conversions.
-  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
-    // Scalar bitcasts are usually free.
-    if (Opcode == Instruction::BitCast)
-      return 0;
-
-    if (Opcode == Instruction::Trunc &&
-        TLI->isTruncateFree(SrcLT.second, DstLT.second))
-      return 0;
-
-    if (Opcode == Instruction::ZExt &&
-        TLI->isZExtFree(SrcLT.second, DstLT.second))
-      return 0;
-
-    // Just check the op cost. If the operation is legal then assume it costs 1.
-    if (!TLI->isOperationExpand(ISD, DstLT.second))
-      return  1;
-
-    // Assume that illegal scalar instruction are expensive.
-    return 4;
-  }
-
-  // Check vector-to-vector casts.
-  if (Dst->isVectorTy() && Src->isVectorTy()) {
-
-    // If the cast is between same-sized registers, then the check is simple.
-    if (SrcLT.first == DstLT.first &&
-        SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
-
-      // Bitcast between types that are legalized to the same type are free.
-      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
-        return 0;
-
-      // Assume that Zext is done using AND.
-      if (Opcode == Instruction::ZExt)
-        return 1;
-
-      // Assume that sext is done using SHL and SRA.
-      if (Opcode == Instruction::SExt)
-        return 2;
-
-      // Just check the op cost. If the operation is legal then assume it costs
-      // 1 and multiply by the type-legalization overhead.
-      if (!TLI->isOperationExpand(ISD, DstLT.second))
-        return SrcLT.first * 1;
-    }
-
-    // If we are converting vectors and the operation is illegal, or
-    // if the vectors are legalized to different types, estimate the
-    // scalarization costs.
-    unsigned Num = Dst->getVectorNumElements();
-    unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(),
-                                     Src->getScalarType());
-
-    // Return the cost of multiple scalar invocation plus the cost of
-    // inserting and extracting the values.
-    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
-  }
-
-  // We already handled vector-to-vector and scalar-to-scalar conversions. This 
-  // is where we handle bitcast between vectors and scalars. We need to assume
-  //  that the conversion is scalarized in one way or another.
-  if (Opcode == Instruction::BitCast)
-    // Illegal bitcasts are done by storing and loading from a stack slot.
-    return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) +
-           (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0);
-
-  llvm_unreachable("Unhandled cast");
- }
-
-unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
-  return 1;
-}
-
-unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
-                                                       Type *ValTy,
-                                                       Type *CondTy) const {
-  int ISD = InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  // Selects on vectors are actually vector selects.
-  if (ISD == ISD::SELECT) {
-    assert(CondTy && "CondTy must exist");
-    if (CondTy->isVectorTy())
-      ISD = ISD::VSELECT;
-  }
-
-  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
-
-  if (!TLI->isOperationExpand(ISD, LT.second)) {
-    // The operation is legal. Assume it costs 1. Multiply
-    // by the type-legalization overhead.
-    return LT.first * 1;
-  }
-
-  // Otherwise, assume that the cast is scalarized.
-  if (ValTy->isVectorTy()) {
-    unsigned Num = ValTy->getVectorNumElements();
-    if (CondTy)
-      CondTy = CondTy->getScalarType();
-    unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
-                                       CondTy);
-
-    // Return the cost of multiple scalar invocation plus the cost of inserting
-    // and extracting the values.
-    return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
-  }
-
-  // Unknown scalar opcode.
-  return 1;
-}
-
-unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
-                                                       Type *Val,
-                                                       unsigned Index) const {
-  return 1;
-}
-
-unsigned
-VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
-                                        Type *Ty2) const {
-  return 1;
-}
-
-unsigned
-VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                           unsigned Alignment,
-                                           unsigned AddressSpace) const {
-  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src);
-
-  // Assume that all loads of legal types cost 1.
-  return LT.first;
-}
-
-unsigned
-VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
-  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp);
-  return LT.first;
-}
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 47489bb06c4e..54204d4b6390 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -1,7 +1,6 @@
 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 
 add_llvm_library(LLVMX86AsmParser
-  X86AsmLexer.cpp
   X86AsmParser.cpp
   )
 
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
deleted file mode 100644
index 66ad35370936..000000000000
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCTargetAsmLexer.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallVector.h"
-
-using namespace llvm;
-
-namespace {
-
-class X86AsmLexer : public MCTargetAsmLexer {
-  const MCAsmInfo &AsmInfo;
-
-  bool tentativeIsValid;
-  AsmToken tentativeToken;
-
-  const AsmToken &lexTentative() {
-    tentativeToken = getLexer()->Lex();
-    tentativeIsValid = true;
-    return tentativeToken;
-  }
-
-  const AsmToken &lexDefinite() {
-    if (tentativeIsValid) {
-      tentativeIsValid = false;
-      return tentativeToken;
-    }
-    return getLexer()->Lex();
-  }
-
-  AsmToken LexTokenATT();
-  AsmToken LexTokenIntel();
-protected:
-  AsmToken LexToken() {
-    if (!Lexer) {
-      SetError(SMLoc(), "No MCAsmLexer installed");
-      return AsmToken(AsmToken::Error, "", 0);
-    }
-
-    switch (AsmInfo.getAssemblerDialect()) {
-    default:
-      SetError(SMLoc(), "Unhandled dialect");
-      return AsmToken(AsmToken::Error, "", 0);
-    case 0:
-      return LexTokenATT();
-    case 1:
-      return LexTokenIntel();
-    }
-  }
-public:
-  X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
-    : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
-  }
-};
-
-} // end anonymous namespace
-
-#define GET_REGISTER_MATCHER
-#include "X86GenAsmMatcher.inc"
-
-AsmToken X86AsmLexer::LexTokenATT() {
-  AsmToken lexedToken = lexDefinite();
-
-  switch (lexedToken.getKind()) {
-  default:
-    return lexedToken;
-  case AsmToken::Error:
-    SetError(Lexer->getErrLoc(), Lexer->getErr());
-    return lexedToken;
-
-  case AsmToken::Percent: {
-    const AsmToken &nextToken = lexTentative();
-    if (nextToken.getKind() != AsmToken::Identifier)
-      return lexedToken;
-
-    if (unsigned regID = MatchRegisterName(nextToken.getString())) {
-      lexDefinite();
-
-      // FIXME: This is completely wrong when there is a space or other
-      // punctuation between the % and the register name.
-      StringRef regStr(lexedToken.getString().data(),
-                       lexedToken.getString().size() +
-                       nextToken.getString().size());
-
-      return AsmToken(AsmToken::Register, regStr,
-                      static_cast<int64_t>(regID));
-    }
-
-    // Match register name failed.  If this is "db[0-7]", match it as an alias
-    // for dr[0-7].
-    if (nextToken.getString().size() == 3 &&
-        nextToken.getString().startswith("db")) {
-      int RegNo = -1;
-      switch (nextToken.getString()[2]) {
-      case '0': RegNo = X86::DR0; break;
-      case '1': RegNo = X86::DR1; break;
-      case '2': RegNo = X86::DR2; break;
-      case '3': RegNo = X86::DR3; break;
-      case '4': RegNo = X86::DR4; break;
-      case '5': RegNo = X86::DR5; break;
-      case '6': RegNo = X86::DR6; break;
-      case '7': RegNo = X86::DR7; break;
-      }
-
-      if (RegNo != -1) {
-        lexDefinite();
-
-        // FIXME: This is completely wrong when there is a space or other
-        // punctuation between the % and the register name.
-        StringRef regStr(lexedToken.getString().data(),
-                         lexedToken.getString().size() +
-                         nextToken.getString().size());
-        return AsmToken(AsmToken::Register, regStr,
-                        static_cast<int64_t>(RegNo));
-      }
-    }
-
-
-    return lexedToken;
-  }
-  }
-}
-
-AsmToken X86AsmLexer::LexTokenIntel() {
-  const AsmToken &lexedToken = lexDefinite();
-
-  switch(lexedToken.getKind()) {
-  default:
-    return lexedToken;
-  case AsmToken::Error:
-    SetError(Lexer->getErrLoc(), Lexer->getErr());
-    return lexedToken;
-  case AsmToken::Identifier: {
-    unsigned regID = MatchRegisterName(lexedToken.getString().lower());
-
-    if (regID)
-      return AsmToken(AsmToken::Register,
-                      lexedToken.getString(),
-                      static_cast<int64_t>(regID));
-    return lexedToken;
-  }
-  }
-}
-
-extern "C" void LLVMInitializeX86AsmLexer() {
-  RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
-  RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
-}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ce446e75737c..e4623228b397 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -8,21 +8,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
@@ -57,11 +58,18 @@ private:
   X86Operand *ParseATTOperand();
   X86Operand *ParseIntelOperand();
   X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
-  X86Operand *ParseIntelTypeOperator(SMLoc StartLoc);
-  X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
-  X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
+  X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
+  X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+                                   SMLoc StartLoc);
+  X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
+                                       unsigned Size);
+  X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
+                                         SMLoc &IdentStart);
   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
+  X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
+                                    SMLoc SizeDirLoc, unsigned Size);
+
   bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
                              SmallString<64> &Err);
 
@@ -168,31 +176,35 @@ struct X86Operand : public MCParsedAsmOperand {
 
   SMLoc StartLoc, EndLoc;
   SMLoc OffsetOfLoc;
+  bool AddressOf;
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegOp {
+    unsigned RegNo;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct MemOp {
+    unsigned SegReg;
+    const MCExpr *Disp;
+    unsigned BaseReg;
+    unsigned IndexReg;
+    unsigned Scale;
+    unsigned Size;
+  };
 
   union {
-    struct {
-      const char *Data;
-      unsigned Length;
-    } Tok;
-
-    struct {
-      unsigned RegNo;
-    } Reg;
-
-    struct {
-      const MCExpr *Val;
-      bool NeedAsmRewrite;
-    } Imm;
-
-    struct {
-      unsigned SegReg;
-      const MCExpr *Disp;
-      unsigned BaseReg;
-      unsigned IndexReg;
-      unsigned Scale;
-      unsigned Size;
-      bool NeedSizeDir;
-    } Mem;
+    struct TokOp Tok;
+    struct RegOp Reg;
+    struct ImmOp Imm;
+    struct MemOp Mem;
   };
 
   X86Operand(KindTy K, SMLoc Start, SMLoc End)
@@ -230,11 +242,6 @@ struct X86Operand : public MCParsedAsmOperand {
     return Imm.Val;
   }
 
-  bool needAsmRewrite() const {
-    assert(Kind == Immediate && "Invalid access!");
-    return Imm.NeedAsmRewrite;
-  }
-
   const MCExpr *getMemDisp() const {
     assert(Kind == Memory && "Invalid access!");
     return Mem.Disp;
@@ -331,18 +338,12 @@ struct X86Operand : public MCParsedAsmOperand {
     return isImmSExti64i32Value(CE->getValue());
   }
 
-  unsigned getMemSize() const {
-    assert(Kind == Memory && "Invalid access!");
-    return Mem.Size;
-  }
-
   bool isOffsetOf() const {
     return OffsetOfLoc.getPointer();
   }
 
-  bool needSizeDirective() const {
-    assert(Kind == Memory && "Invalid access!");
-    return Mem.NeedSizeDir;
+  bool needAddressOf() const {
+    return AddressOf;
   }
 
   bool isMem() const { return Kind == Memory; }
@@ -463,7 +464,7 @@ struct X86Operand : public MCParsedAsmOperand {
   }
 
   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
-    SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
+    SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
     X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
@@ -471,24 +472,24 @@ struct X86Operand : public MCParsedAsmOperand {
   }
 
   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+                               bool AddressOf = false,
                                SMLoc OffsetOfLoc = SMLoc()) {
     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
     Res->Reg.RegNo = RegNo;
+    Res->AddressOf = AddressOf;
     Res->OffsetOfLoc = OffsetOfLoc;
     return Res;
   }
 
-  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
-                               bool NeedRewrite = true){
+  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
     X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
     Res->Imm.Val = Val;
-    Res->Imm.NeedAsmRewrite = NeedRewrite;
     return Res;
   }
 
   /// Create an absolute memory operand.
   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0, bool NeedSizeDir = false){
+                               unsigned Size = 0) {
     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
     Res->Mem.SegReg   = 0;
     Res->Mem.Disp     = Disp;
@@ -496,7 +497,7 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = 0;
     Res->Mem.Scale    = 1;
     Res->Mem.Size     = Size;
-    Res->Mem.NeedSizeDir = NeedSizeDir;
+    Res->AddressOf = false;
     return Res;
   }
 
@@ -504,7 +505,7 @@ struct X86Operand : public MCParsedAsmOperand {
   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
                                unsigned BaseReg, unsigned IndexReg,
                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0, bool NeedSizeDir = false) {
+                               unsigned Size = 0) {
     // We should never just have a displacement, that should be parsed as an
     // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -519,7 +520,7 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = IndexReg;
     Res->Mem.Scale    = Scale;
     Res->Mem.Size     = Size;
-    Res->Mem.NeedSizeDir = NeedSizeDir;
+    Res->AddressOf = false;
     return Res;
   }
 };
@@ -558,10 +559,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
     Parser.Lex(); // Eat percent token.
 
   const AsmToken &Tok = Parser.getTok();
+  EndLoc = Tok.getEndLoc();
+
   if (Tok.isNot(AsmToken::Identifier)) {
     if (isParsingIntelSyntax()) return true;
     return Error(StartLoc, "invalid register name",
-                 SMRange(StartLoc, Tok.getEndLoc()));
+                 SMRange(StartLoc, EndLoc));
   }
 
   RegNo = MatchRegisterName(Tok.getString());
@@ -582,13 +585,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
         X86II::isX86_64ExtendedReg(RegNo))
       return Error(StartLoc, "register %"
                    + Tok.getString() + " is only available in 64-bit mode",
-                   SMRange(StartLoc, Tok.getEndLoc()));
+                   SMRange(StartLoc, EndLoc));
   }
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
   if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
     RegNo = X86::ST0;
-    EndLoc = Tok.getLoc();
     Parser.Lex(); // Eat 'st'
 
     // Check to see if we have '(4)' after %st.
@@ -615,11 +617,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
     if (getParser().Lex().isNot(AsmToken::RParen))
       return Error(Parser.getTok().getLoc(), "expected ')'");
 
-    EndLoc = Tok.getLoc();
+    EndLoc = Parser.getTok().getEndLoc();
     Parser.Lex(); // Eat ')'
     return false;
   }
 
+  EndLoc = Parser.getTok().getEndLoc();
+
   // If this is "db[0-7]", match it as an alias
   // for dr[0-7].
   if (RegNo == 0 && Tok.getString().size() == 3 &&
@@ -636,7 +640,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
     }
 
     if (RegNo != 0) {
-      EndLoc = Tok.getLoc();
+      EndLoc = Parser.getTok().getEndLoc();
       Parser.Lex(); // Eat it.
       return false;
     }
@@ -645,10 +649,9 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
   if (RegNo == 0) {
     if (isParsingIntelSyntax()) return true;
     return Error(StartLoc, "invalid register name",
-                 SMRange(StartLoc, Tok.getEndLoc()));
+                 SMRange(StartLoc, EndLoc));
   }
 
-  EndLoc = Tok.getEndLoc();
   Parser.Lex(); // Eat identifier token.
   return false;
 }
@@ -673,115 +676,354 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
   return Size;
 }
 
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, 
+enum IntelBracExprState {
+  IBES_START,
+  IBES_LBRAC,
+  IBES_RBRAC,
+  IBES_REGISTER,
+  IBES_REGISTER_STAR,
+  IBES_REGISTER_STAR_INTEGER,
+  IBES_INTEGER,
+  IBES_INTEGER_STAR,
+  IBES_INDEX_REGISTER,
+  IBES_IDENTIFIER,
+  IBES_DISP_EXPR,
+  IBES_MINUS,
+  IBES_ERROR
+};
+
+class IntelBracExprStateMachine {
+  IntelBracExprState State;
+  unsigned BaseReg, IndexReg, Scale;
+  int64_t Disp;
+
+  unsigned TmpReg;
+  int64_t TmpInteger;
+
+  bool isPlus;
+
+public:
+  IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
+    State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
+    TmpReg(0), TmpInteger(0), isPlus(true) {}
+
+  unsigned getBaseReg() { return BaseReg; }
+  unsigned getIndexReg() { return IndexReg; }
+  unsigned getScale() { return Scale; }
+  int64_t getDisp() { return Disp; }
+  bool isValidEndState() { return State == IBES_RBRAC; }
+
+  void onPlus() {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_INTEGER:
+      State = IBES_START;
+      if (isPlus)
+        Disp += TmpInteger;
+      else
+        Disp -= TmpInteger;
+      break;
+    case IBES_REGISTER:
+      State = IBES_START;
+      // If we already have a BaseReg, then assume this is the IndexReg with a
+      // scale of 1.
+      if (!BaseReg) {
+        BaseReg = TmpReg;
+      } else {
+        assert (!IndexReg && "BaseReg/IndexReg already set!");
+        IndexReg = TmpReg;
+        Scale = 1;
+      }
+      break;
+    case IBES_INDEX_REGISTER:
+      State = IBES_START;
+      break;
+    }
+    isPlus = true;
+  }
+  void onMinus() {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_START:
+      State = IBES_MINUS;
+      break;
+    case IBES_INTEGER:
+      State = IBES_START;
+      if (isPlus)
+        Disp += TmpInteger;
+      else
+        Disp -= TmpInteger;
+      break;
+    case IBES_REGISTER:
+      State = IBES_START;
+      // If we already have a BaseReg, then assume this is the IndexReg with a
+      // scale of 1.
+      if (!BaseReg) {
+        BaseReg = TmpReg;
+      } else {
+        assert (!IndexReg && "BaseReg/IndexReg already set!");
+        IndexReg = TmpReg;
+        Scale = 1;
+      }
+      break;
+    case IBES_INDEX_REGISTER:
+      State = IBES_START;
+      break;
+    }
+    isPlus = false;
+  }
+  void onRegister(unsigned Reg) {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_START:
+      State = IBES_REGISTER;
+      TmpReg = Reg;
+      break;
+    case IBES_INTEGER_STAR:
+      assert (!IndexReg && "IndexReg already set!");
+      State = IBES_INDEX_REGISTER;
+      IndexReg = Reg;
+      Scale = TmpInteger;
+      break;
+    }
+  }
+  void onDispExpr() {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_START:
+      State = IBES_DISP_EXPR;
+      break;
+    }
+  }
+  void onInteger(int64_t TmpInt) {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_START:
+      State = IBES_INTEGER;
+      TmpInteger = TmpInt;
+      break;
+    case IBES_MINUS:
+      State = IBES_INTEGER;
+      TmpInteger = TmpInt;
+      break;
+    case IBES_REGISTER_STAR:
+      assert (!IndexReg && "IndexReg already set!");
+      State = IBES_INDEX_REGISTER;
+      IndexReg = TmpReg;
+      Scale = TmpInt;
+      break;
+    }
+  }
+  void onStar() {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_INTEGER:
+      State = IBES_INTEGER_STAR;
+      break;
+    case IBES_REGISTER:
+      State = IBES_REGISTER_STAR;
+      break;
+    }
+  }
+  void onLBrac() {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_RBRAC:
+      State = IBES_START;
+      isPlus = true;
+      break;
+    }
+  }
+  void onRBrac() {
+    switch (State) {
+    default:
+      State = IBES_ERROR;
+      break;
+    case IBES_DISP_EXPR:
+      State = IBES_RBRAC;
+      break;
+    case IBES_INTEGER:
+      State = IBES_RBRAC;
+      if (isPlus)
+        Disp += TmpInteger;
+      else
+        Disp -= TmpInteger;
+      break;
+    case IBES_REGISTER:
+      State = IBES_RBRAC;
+      // If we already have a BaseReg, then assume this is the IndexReg with a
+      // scale of 1.
+      if (!BaseReg) {
+        BaseReg = TmpReg;
+      } else {
+        assert (!IndexReg && "BaseReg/IndexReg already set!");
+        IndexReg = TmpReg;
+        Scale = 1;
+      }
+      break;
+    case IBES_INDEX_REGISTER:
+      State = IBES_RBRAC;
+      break;
+    }
+  }
+};
+
+X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
+                                                SMLoc End, SMLoc SizeDirLoc,
+                                                unsigned Size) {
+  bool NeedSizeDir = false;
+  bool IsVarDecl = false;
+  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+    const MCSymbol &Sym = SymRef->getSymbol();
+    // FIXME: The SemaLookup will fail if the name is anything other then an
+    // identifier.
+    // FIXME: Pass a valid SMLoc.
+    unsigned tLength, tSize, tType;
+    SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+                                            tSize, tType, IsVarDecl);
+    if (!Size) {
+      Size = tType * 8; // Size is in terms of bits in this context.
+      NeedSizeDir = Size > 0;
+    }
+  }
+
+  // If this is not a VarDecl then assume it is a FuncDecl or some other label
+  // reference.  We need an 'r' constraint here, so we need to create register
+  // operand to ensure proper matching.  Just pick a GPR based on the size of
+  // a pointer.
+  if (!IsVarDecl) {
+    unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+  }
+
+  if (NeedSizeDir)
+    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
+                                                /*Len*/0, Size));  
+
+  // When parsing inline assembly we set the base register to a non-zero value
+  // as we don't know the actual value at this time.  This is necessary to
+  // get the matching correct in some cases.
+  return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+                               /*Scale*/1, Start, End, Size);
+}
+
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+                                                   uint64_t ImmDisp,
                                                    unsigned Size) {
-  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
   const AsmToken &Tok = Parser.getTok();
-  SMLoc Start = Tok.getLoc(), End;
-
-  const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
-  // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
+  SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
 
   // Eat '['
   if (getLexer().isNot(AsmToken::LBrac))
     return ErrorOperand(Start, "Expected '[' token!");
   Parser.Lex();
 
+  unsigned TmpReg = 0;
+
+  // Try to handle '[' 'Symbol' ']'
   if (getLexer().is(AsmToken::Identifier)) {
-    // Parse BaseReg
-    if (ParseRegister(BaseReg, Start, End)) {
-      // Handle '[' 'symbol' ']'
-      if (getParser().ParseExpression(Disp, End)) return 0;
+    if (ParseRegister(TmpReg, Start, End)) {
+      const MCExpr *Disp;
+      SMLoc IdentStart = Tok.getLoc();
+      if (getParser().parseExpression(Disp, End))
+        return 0;
+
+      if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+        return Err;
+
       if (getLexer().isNot(AsmToken::RBrac))
-        return ErrorOperand(Start, "Expected ']' token!");
-      Parser.Lex();
-      End = Tok.getLoc();
-      return X86Operand::CreateMem(Disp, Start, End, Size);
-    }
-  } else if (getLexer().is(AsmToken::Integer)) {
-      int64_t Val = Tok.getIntVal();
+        return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
+
+      // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
+      if (ImmDisp)
+        return ErrorOperand(Start, "Unsupported immediate displacement!");
+
+      // Adjust the EndLoc due to the ']'.
+      End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
       Parser.Lex();
-      SMLoc Loc = Tok.getLoc();
-      if (getLexer().is(AsmToken::RBrac)) {
-        // Handle '[' number ']'
-        Parser.Lex();
-        End = Tok.getLoc();
-        const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
-        if (SegReg)
-          return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
-                                       Start, End, Size);
+      if (!isParsingInlineAsm())
         return X86Operand::CreateMem(Disp, Start, End, Size);
-      } else if (getLexer().is(AsmToken::Star)) {
-        // Handle '[' Scale*IndexReg ']'
-        Parser.Lex();
-        SMLoc IdxRegLoc = Tok.getLoc();
-        if (ParseRegister(IndexReg, IdxRegLoc, End))
-          return ErrorOperand(IdxRegLoc, "Expected register");
-        Scale = Val;
-      } else
-        return ErrorOperand(Loc, "Unexpected token");
-  }
 
-  // Parse ][ as a plus.
-  bool ExpectRBrac = true;
-  if (getLexer().is(AsmToken::RBrac)) {
-    ExpectRBrac = false;
-    Parser.Lex();
-    End = Tok.getLoc();
+      // We want the size directive before the '['.
+      SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
+      return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
+    }
   }
 
-  if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) ||
-      getLexer().is(AsmToken::LBrac)) {
-    ExpectRBrac = true;
-    bool isPlus = getLexer().is(AsmToken::Plus) ||
-      getLexer().is(AsmToken::LBrac);
-    Parser.Lex(); 
-    SMLoc PlusLoc = Tok.getLoc();
-    if (getLexer().is(AsmToken::Integer)) {
+  // Parse [ BaseReg + Scale*IndexReg + Disp ].  We may have already parsed an
+  // immediate displacement before the bracketed expression.
+  bool Done = false;
+  IntelBracExprStateMachine SM(Parser, ImmDisp);
+
+  // If we parsed a register, then the end loc has already been set and
+  // the identifier has already been lexed.  We also need to update the
+  // state.
+  if (TmpReg)
+    SM.onRegister(TmpReg);
+
+  const MCExpr *Disp = 0;
+  while (!Done) {
+    bool UpdateLocLex = true;
+
+    // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
+    // identifier.  Don't try an parse it as a register.
+    if (Tok.getString().startswith("."))
+      break;
+
+    switch (getLexer().getKind()) {
+    default: {
+      if (SM.isValidEndState()) {
+        Done = true;
+        break;
+      }
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+    }
+    case AsmToken::Identifier: {
+      // This could be a register or a displacement expression.
+      if(!ParseRegister(TmpReg, Start, End)) {
+        SM.onRegister(TmpReg);
+        UpdateLocLex = false;
+        break;
+      } else if (!getParser().parseExpression(Disp, End)) {
+        SM.onDispExpr();
+        UpdateLocLex = false;
+        break;
+      }
+      return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+    }
+    case AsmToken::Integer: {
       int64_t Val = Tok.getIntVal();
-      Parser.Lex();
-      if (getLexer().is(AsmToken::Star)) {
-        Parser.Lex();
-        SMLoc IdxRegLoc = Tok.getLoc();
-        if (ParseRegister(IndexReg, IdxRegLoc, End))
-          return ErrorOperand(IdxRegLoc, "Expected register");
-        Scale = Val;
-      } else if (getLexer().is(AsmToken::RBrac)) {
-        const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
-        Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
-      } else
-        return ErrorOperand(PlusLoc, "unexpected token after +");
-    } else if (getLexer().is(AsmToken::Identifier)) {
-      // This could be an index register or a displacement expression.
-      End = Tok.getLoc();
-      if (!IndexReg)
-        ParseRegister(IndexReg, Start, End);
-      else if (getParser().ParseExpression(Disp, End)) return 0;
+      SM.onInteger(Val);
+      break;
     }
-  }
-  
-  // Parse ][ as a plus.
-  if (getLexer().is(AsmToken::RBrac)) {
-    ExpectRBrac = false;
-    Parser.Lex();
-    End = Tok.getLoc();
-    if (getLexer().is(AsmToken::LBrac)) {
-      ExpectRBrac = true;
-      Parser.Lex();
-      if (getParser().ParseExpression(Disp, End))
-        return 0;
+    case AsmToken::Plus:    SM.onPlus(); break;
+    case AsmToken::Minus:   SM.onMinus(); break;
+    case AsmToken::Star:    SM.onStar(); break;
+    case AsmToken::LBrac:   SM.onLBrac(); break;
+    case AsmToken::RBrac:   SM.onRBrac(); break;
+    }
+    if (!Done && UpdateLocLex) {
+      End = Tok.getLoc();
+      Parser.Lex(); // Consume the token.
     }
-  } else if (ExpectRBrac) {
-      if (getParser().ParseExpression(Disp, End))
-        return 0;
   }
 
-  if (ExpectRBrac) {
-    if (getLexer().isNot(AsmToken::RBrac))
-      return ErrorOperand(End, "expected ']' token!");
-    Parser.Lex();
-    End = Tok.getLoc();
-  }
+  if (!Disp)
+    Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
 
   // Parse the dot operator (e.g., [ebx].foo.bar).
   if (Tok.getString().startswith(".")) {
@@ -790,22 +1032,73 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
     if (ParseIntelDotOperator(Disp, &NewDisp, Err))
       return ErrorOperand(Tok.getLoc(), Err);
     
+    End = Parser.getTok().getEndLoc();
     Parser.Lex();  // Eat the field.
     Disp = NewDisp;
   }
 
-  End = Tok.getLoc();
+  int BaseReg = SM.getBaseReg();
+  int IndexReg = SM.getIndexReg();
 
   // handle [-42]
-  if (!BaseReg && !IndexReg)
-    return X86Operand::CreateMem(Disp, Start, End, Size);
+  if (!BaseReg && !IndexReg) {
+    if (!SegReg)
+      return X86Operand::CreateMem(Disp, Start, End);
+    else
+      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+  }
 
+  int Scale = SM.getScale();
   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
                                Start, End, Size);
 }
 
+// Inline assembly may use variable names with namespace alias qualifiers.
+X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
+                                                     SMLoc &IdentStart) {
+  // We should only see Foo::Bar if we're parsing inline assembly.
+  if (!isParsingInlineAsm())
+    return 0;
+
+  // If we don't see a ':' then there can't be a qualifier.
+  if (getLexer().isNot(AsmToken::Colon))
+    return 0;
+
+
+  bool Done = false;
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc IdentEnd = Tok.getEndLoc();
+  while (!Done) {
+    switch (getLexer().getKind()) {
+    default:
+      Done = true; 
+      break;
+    case AsmToken::Colon:
+      getLexer().Lex(); // Consume ':'.
+      if (getLexer().isNot(AsmToken::Colon))
+        return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
+      getLexer().Lex(); // Consume second ':'.
+      if (getLexer().isNot(AsmToken::Identifier))
+        return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
+      break;
+    case AsmToken::Identifier:
+      IdentEnd = Tok.getEndLoc();
+      getLexer().Lex(); // Consume the identifier.
+      break;
+    }
+  }
+  size_t Len = IdentEnd.getPointer() - IdentStart.getPointer();
+  StringRef Identifier(IdentStart.getPointer(), Len);
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+  return 0;
+}
+
 /// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
+X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
+                                               uint64_t ImmDisp,
+                                               SMLoc Start) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc End;
 
@@ -817,8 +1110,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
     Parser.Lex();
   }
 
+  // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+  if (getLexer().is(AsmToken::Integer)) {
+    const AsmToken &IntTok = Parser.getTok();
+    if (isParsingInlineAsm())
+      InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+                                                  IntTok.getLoc()));
+    uint64_t ImmDisp = IntTok.getIntVal();
+    Parser.Lex(); // Eat the integer.
+    if (getLexer().isNot(AsmToken::LBrac))
+      return ErrorOperand(Start, "Expected '[' token!");
+    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+  }
+
   if (getLexer().is(AsmToken::LBrac))
-    return ParseIntelBracExpression(SegReg, Size);
+    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
 
   if (!ParseRegister(SegReg, Start, End)) {
     // Handel SegReg : [ ... ]
@@ -827,32 +1133,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
     Parser.Lex(); // Eat :
     if (getLexer().isNot(AsmToken::LBrac))
       return ErrorOperand(Start, "Expected '[' token!");
-    return ParseIntelBracExpression(SegReg, Size);
+    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
   }
 
   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
-  if (getParser().ParseExpression(Disp, End)) return 0;
-  End = Parser.getTok().getLoc();
+  SMLoc IdentStart = Tok.getLoc();
+  if (getParser().parseExpression(Disp, End))
+    return 0;
 
-  bool NeedSizeDir = false;
-  if (!Size && isParsingInlineAsm()) {
-    if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
-      const MCSymbol &Sym = SymRef->getSymbol();
-      // FIXME: The SemaLookup will fail if the name is anything other then an
-      // identifier.
-      // FIXME: Pass a valid SMLoc.
-      SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
-      NeedSizeDir = Size > 0;
-    }
-  }
   if (!isParsingInlineAsm())
     return X86Operand::CreateMem(Disp, Start, End, Size);
-  else
-    // When parsing inline assembly we set the base register to a non-zero value
-    // as we don't know the actual value at this time.  This is necessary to
-    // get the matching correct in some cases.
-    return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
-                                 /*Scale*/1, Start, End, Size, NeedSizeDir);
+
+  if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+    return Err;
+
+  return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
 }
 
 /// Parse the '.' operator.
@@ -918,11 +1213,9 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
 
   SMLoc End;
   const MCExpr *Val;
-  if (getParser().ParseExpression(Val, End))
+  if (getParser().parseExpression(Val, End))
     return ErrorOperand(Start, "Unable to parse expression!");
 
-  End = Parser.getTok().getLoc();
-
   // Don't emit the offset operator.
   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
 
@@ -930,13 +1223,23 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
   // register operand to ensure proper matching.  Just pick a GPR based on
   // the size of a pointer.
   unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
-  return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
+  return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
+                               OffsetOfLoc);
 }
 
-/// Parse the 'TYPE' operator.  The TYPE operator returns the size of a C or
-/// C++ type or variable. If the variable is an array, TYPE returns the size of
-/// a single element of the array.
-X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
+enum IntelOperatorKind {
+  IOK_LENGTH,
+  IOK_SIZE,
+  IOK_TYPE
+};
+
+/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
+/// returns the number of elements in an array.  It returns the value 1 for
+/// non-array variables.  The SIZE operator returns the size of a C or C++
+/// variable.  A variable's size is the product of its LENGTH and TYPE.  The
+/// TYPE operator returns the size of a C or C++ type or variable. If the
+/// variable is an array, TYPE returns the size of a single element.
+X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
   SMLoc TypeLoc = Start;
   Parser.Lex(); // Eat offset.
   Start = Parser.getTok().getLoc();
@@ -944,76 +1247,92 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
 
   SMLoc End;
   const MCExpr *Val;
-  if (getParser().ParseExpression(Val, End))
+  if (getParser().parseExpression(Val, End))
     return 0;
 
-  End = Parser.getTok().getLoc();
-
-  unsigned Size = 0;
+  unsigned Length = 0, Size = 0, Type = 0;
   if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
     const MCSymbol &Sym = SymRef->getSymbol();
     // FIXME: The SemaLookup will fail if the name is anything other then an
     // identifier.
     // FIXME: Pass a valid SMLoc.
-    if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size))
-      return ErrorOperand(Start, "Unable to lookup TYPE of expr!");
-
-    Size /= 8; // Size is in terms of bits, but we want bytes in the context.
+    bool IsVarDecl;
+    if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
+                                                 Size, Type, IsVarDecl))
+      return ErrorOperand(Start, "Unable to lookup expr!");
+  }
+  unsigned CVal;
+  switch(OpKind) {
+  default: llvm_unreachable("Unexpected operand kind!");
+  case IOK_LENGTH: CVal = Length; break;
+  case IOK_SIZE: CVal = Size; break;
+  case IOK_TYPE: CVal = Type; break;
   }
 
   // Rewrite the type operator and the C or C++ type or variable in terms of an
   // immediate.  E.g. TYPE foo -> $$4
   unsigned Len = End.getPointer() - TypeLoc.getPointer();
-  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size));
+  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
 
-  const MCExpr *Imm = MCConstantExpr::Create(Size, getContext());
-  return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
+  const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
+  return X86Operand::CreateImm(Imm, Start, End);
 }
 
 X86Operand *X86AsmParser::ParseIntelOperand() {
   SMLoc Start = Parser.getTok().getLoc(), End;
-
-  // offset operator.
   StringRef AsmTokStr = Parser.getTok().getString();
-  if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") &&
-      isParsingInlineAsm())
-    return ParseIntelOffsetOfOperator(Start);
-
-  // Type directive.
-  if ((AsmTokStr == "type" || AsmTokStr == "TYPE") &&
-      isParsingInlineAsm())
-    return ParseIntelTypeOperator(Start);
-
-  // Unsupported directives.
-  if (isParsingIntelSyntax() &&
-      (AsmTokStr == "size" || AsmTokStr == "SIZE" ||
-       AsmTokStr == "length" || AsmTokStr == "LENGTH"))
-      return ErrorOperand(Start, "Unsupported directive!");
-
-  // immediate.
+
+  // Offset, length, type and size operators.
+  if (isParsingInlineAsm()) {
+    if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
+      return ParseIntelOffsetOfOperator(Start);
+    if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
+      return ParseIntelOperator(Start, IOK_LENGTH);
+    if (AsmTokStr == "size" || AsmTokStr == "SIZE")
+      return ParseIntelOperator(Start, IOK_SIZE);
+    if (AsmTokStr == "type" || AsmTokStr == "TYPE")
+      return ParseIntelOperator(Start, IOK_TYPE);
+  }
+
+  // Immediate.
   if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
       getLexer().is(AsmToken::Minus)) {
     const MCExpr *Val;
-    if (!getParser().ParseExpression(Val, End)) {
-      End = Parser.getTok().getLoc();
-      return X86Operand::CreateImm(Val, Start, End);
+    bool isInteger = getLexer().is(AsmToken::Integer);
+    if (!getParser().parseExpression(Val, End)) {
+      if (isParsingInlineAsm())
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+      // Immediate.
+      if (getLexer().isNot(AsmToken::LBrac))
+        return X86Operand::CreateImm(Val, Start, End);
+
+      // Only positive immediates are valid.
+      if (!isInteger) {
+        Error(Parser.getTok().getLoc(), "expected a positive immediate "
+              "displacement before bracketed expr.");
+        return 0;
+      }
+
+      // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+      if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
+        return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
     }
   }
 
-  // register
+  // Register.
   unsigned RegNo = 0;
   if (!ParseRegister(RegNo, Start, End)) {
     // If this is a segment register followed by a ':', then this is the start
     // of a memory reference, otherwise this is a normal register reference.
     if (getLexer().isNot(AsmToken::Colon))
-      return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc());
+      return X86Operand::CreateReg(RegNo, Start, End);
 
     getParser().Lex(); // Eat the colon.
-    return ParseIntelMemOperand(RegNo, Start);
+    return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
   }
 
-  // mem operand
-  return ParseIntelMemOperand(0, Start);
+  // Memory operand.
+  return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
 }
 
 X86Operand *X86AsmParser::ParseATTOperand() {
@@ -1037,7 +1356,6 @@ X86Operand *X86AsmParser::ParseATTOperand() {
     if (getLexer().isNot(AsmToken::Colon))
       return X86Operand::CreateReg(RegNo, Start, End);
 
-
     getParser().Lex(); // Eat the colon.
     return ParseMemOperand(RegNo, Start);
   }
@@ -1046,7 +1364,7 @@ X86Operand *X86AsmParser::ParseATTOperand() {
     SMLoc Start = Parser.getTok().getLoc(), End;
     Parser.Lex();
     const MCExpr *Val;
-    if (getParser().ParseExpression(Val, End))
+    if (getParser().parseExpression(Val, End))
       return 0;
     return X86Operand::CreateImm(Val, Start, End);
   }
@@ -1064,7 +1382,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
   if (getLexer().isNot(AsmToken::LParen)) {
     SMLoc ExprEnd;
-    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+    if (getParser().parseExpression(Disp, ExprEnd)) return 0;
 
     // After parsing the base expression we could either have a parenthesized
     // memory address or not.  If not, return now.  If so, eat the (.
@@ -1090,7 +1408,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
       SMLoc ExprEnd;
 
       // It must be an parenthesized expression, parse it now.
-      if (getParser().ParseParenExpression(Disp, ExprEnd))
+      if (getParser().parseParenExpression(Disp, ExprEnd))
         return 0;
 
       // After parsing the base expression we could either have a parenthesized
@@ -1150,7 +1468,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
           SMLoc Loc = Parser.getTok().getLoc();
 
           int64_t ScaleVal;
-          if (getParser().ParseAbsoluteExpression(ScaleVal)){
+          if (getParser().parseAbsoluteExpression(ScaleVal)){
             Error(Loc, "expected scale expression");
             return 0;
           }
@@ -1169,7 +1487,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
       SMLoc Loc = Parser.getTok().getLoc();
 
       int64_t Value;
-      if (getParser().ParseAbsoluteExpression(Value))
+      if (getParser().parseAbsoluteExpression(Value))
         return 0;
 
       if (Value != 1)
@@ -1183,7 +1501,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
     return 0;
   }
-  SMLoc MemEnd = Parser.getTok().getLoc();
+  SMLoc MemEnd = Parser.getTok().getEndLoc();
   Parser.Lex(); // Eat the ')'.
 
   // If we have both a base register and an index register make sure they are
@@ -1310,7 +1628,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     if (X86Operand *Op = ParseOperand())
       Operands.push_back(Op);
     else {
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return true;
     }
 
@@ -1321,14 +1639,14 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
       if (X86Operand *Op = ParseOperand())
         Operands.push_back(Op);
       else {
-        Parser.EatToEndOfStatement();
+        Parser.eatToEndOfStatement();
         return true;
       }
     }
 
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.EatToEndOfStatement();
+      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
   }
@@ -1509,245 +1827,78 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
   return false;
 }
 
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
-                   const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
-  switch (Inst.getOpcode()) {
-  default: return false;
-  case X86::AND16i16: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::AND16ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::AND32i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::AND32ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::AND64i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::AND64ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::XOR16i16: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::XOR16ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::XOR32i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::XOR32ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::XOR64i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::XOR64ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::OR16i16: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::OR16ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::OR32i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::OR32ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::OR64i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::OR64ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::CMP16i16: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::CMP16ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::CMP32i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-      return false;
-
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::CMP32ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::CMP64i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-      return false;
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+                            bool isCmp) {
+  MCInst TmpInst;
+  TmpInst.setOpcode(Opcode);
+  if (!isCmp)
+    TmpInst.addOperand(MCOperand::CreateReg(Reg));
+  TmpInst.addOperand(MCOperand::CreateReg(Reg));
+  TmpInst.addOperand(Inst.getOperand(0));
+  Inst = TmpInst;
+  return true;
+}
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::CMP64ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::ADD16i16: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-      return false;
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+                                bool isCmp = false) {
+  if (!Inst.getOperand(0).isImm() ||
+      !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+    return false;
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::ADD16ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::ADD32i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-      return false;
+  return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::ADD32ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::ADD64i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-      return false;
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+                                bool isCmp = false) {
+  if (!Inst.getOperand(0).isImm() ||
+      !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+    return false;
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::ADD64ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::SUB16i16: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-      return false;
+  return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::SUB16ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::SUB32i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-      return false;
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+                                bool isCmp = false) {
+  if (!Inst.getOperand(0).isImm() ||
+      !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+    return false;
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::SUB32ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
-  case X86::SUB64i32: {
-    if (!Inst.getOperand(0).isImm() ||
-        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-      return false;
+  return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::SUB64ri8);
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
-    TmpInst.addOperand(Inst.getOperand(0));
-    Inst = TmpInst;
-    return true;
-  }
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+                   const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+  switch (Inst.getOpcode()) {
+  default: return false;
+  case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+  case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+  case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+  case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+  case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+  case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+  case X86::OR16i16:  return convert16i16to16ri8(Inst, X86::OR16ri8);
+  case X86::OR32i32:  return convert32i32to32ri8(Inst, X86::OR32ri8);
+  case X86::OR64i32:  return convert64i32to64ri8(Inst, X86::OR64ri8);
+  case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+  case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+  case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+  case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+  case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+  case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+  case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+  case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+  case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+  case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+  case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+  case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+  case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+  case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+  case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
   }
 }
 
+static const char *getSubtargetFeatureName(unsigned Val);
 bool X86AsmParser::
 MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -1809,10 +1960,21 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       Out.EmitInstruction(Inst);
     Opcode = Inst.getOpcode();
     return false;
-  case Match_MissingFeature:
-    Error(IDLoc, "instruction requires a CPU feature not currently enabled",
-          EmptyRanges, MatchingInlineAsm);
-    return true;
+  case Match_MissingFeature: {
+    assert(ErrorInfo && "Unknown missing feature!");
+    // Special case the error message for the very common case where only
+    // a single subtarget feature is missing.
+    std::string Msg = "instruction requires:";
+    unsigned Mask = 1;
+    for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+      if (ErrorInfo & Mask) {
+        Msg += " ";
+        Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+      }
+      Mask <<= 1;
+    }
+    return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+  }
   case Match_InvalidOperand:
     WasOriginallyInvalidOperand = true;
     break;
@@ -1843,19 +2005,32 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   // Check for the various suffix matches.
   Tmp[Base.size()] = Suffixes[0];
   unsigned ErrorInfoIgnore;
+  unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
   unsigned Match1, Match2, Match3, Match4;
 
   Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
                                 isParsingIntelSyntax());
+  // If this returned as a missing feature failure, remember that.
+  if (Match1 == Match_MissingFeature)
+    ErrorInfoMissingFeature = ErrorInfoIgnore;
   Tmp[Base.size()] = Suffixes[1];
   Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
                                 isParsingIntelSyntax());
+  // If this returned as a missing feature failure, remember that.
+  if (Match2 == Match_MissingFeature)
+    ErrorInfoMissingFeature = ErrorInfoIgnore;
   Tmp[Base.size()] = Suffixes[2];
   Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
                                 isParsingIntelSyntax());
+  // If this returned as a missing feature failure, remember that.
+  if (Match3 == Match_MissingFeature)
+    ErrorInfoMissingFeature = ErrorInfoIgnore;
   Tmp[Base.size()] = Suffixes[3];
   Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
                                 isParsingIntelSyntax());
+  // If this returned as a missing feature failure, remember that.
+  if (Match4 == Match_MissingFeature)
+    ErrorInfoMissingFeature = ErrorInfoIgnore;
 
   // Restore the old token.
   Op->setTokenValue(Base);
@@ -1936,9 +2111,16 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   // missing feature.
   if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
       (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
-    Error(IDLoc, "instruction requires a CPU feature not currently enabled",
-          EmptyRanges, MatchingInlineAsm);
-    return true;
+    std::string Msg = "instruction requires:";
+    unsigned Mask = 1;
+    for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
+      if (ErrorInfoMissingFeature & Mask) {
+        Msg += " ";
+        Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
+      }
+      Mask <<= 1;
+    }
+    return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
   }
 
   // If one instruction matched with an invalid operand, report this as an
@@ -1986,10 +2168,10 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
-      if (getParser().ParseExpression(Value))
+      if (getParser().parseExpression(Value))
         return true;
 
-      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+      getParser().getStreamer().EmitValue(Value, Size);
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
@@ -2027,16 +2209,13 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
   return false;
 }
 
-
-extern "C" void LLVMInitializeX86AsmLexer();
-
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
   RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
   RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
-  LLVMInitializeX86AsmLexer();
 }
 
 #define GET_REGISTER_MATCHER
 #define GET_MATCHER_IMPLEMENTATION
+#define GET_SUBTARGET_FEATURE_NAME
 #include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index f4d03a602cf5..d14899d28a23 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -10,7 +10,6 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info)
 add_public_tablegen_target(X86CommonTableGen)
 
 set(sources
@@ -26,11 +25,13 @@ set(sources
   X86JITInfo.cpp
   X86MCInstLower.cpp
   X86MachineFunctionInfo.cpp
+  X86PadShortFunction.cpp
   X86RegisterInfo.cpp
   X86SelectionDAGInfo.cpp
   X86Subtarget.cpp
   X86TargetMachine.cpp
   X86TargetObjectFile.cpp
+  X86TargetTransformInfo.cpp
   X86VZeroUpper.cpp
   )
 
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index f13692739a17..ca6f80ce3e58 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -16,11 +16,9 @@
 
 #include "X86Disassembler.h"
 #include "X86DisassemblerDecoder.h"
-
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -33,7 +31,6 @@
 #include "X86GenRegisterInfo.inc"
 #define GET_INSTRINFO_ENUM
 #include "X86GenInstrInfo.inc"
-#include "X86GenEDInfo.inc"
 
 using namespace llvm;
 using namespace llvm::X86Disassembler;
@@ -84,10 +81,6 @@ X86GenericDisassembler::~X86GenericDisassembler() {
   delete MII;
 }
 
-const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
-  return instInfoX86;
-}
-
 /// regionReader - a callback function that wraps the readByte method from
 ///   MemoryObject.
 ///
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 981701f52764..b92427a7e91a 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -95,8 +95,6 @@ class MCSubtargetInfo;
 class MemoryObject;
 class raw_ostream;
 
-struct EDInstInfo;
-
 namespace X86Disassembler {
 
 /// X86GenericDisassembler - Generic disassembler for all X86 platforms.
@@ -122,8 +120,6 @@ public:
                               raw_ostream &vStream,
                               raw_ostream &cStream) const;
 
-  /// getEDInfo - See MCDisassembler.
-  const EDInstInfo *getEDInfo() const;
 private:
   DisassemblerMode              fMode;
 };
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 85d8a991dd6e..e40edba6d689 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -61,7 +61,7 @@ static int modRMRequired(OpcodeType type,
                          InstructionContext insnContext,
                          uint8_t opcode) {
   const struct ContextDecision* decision = 0;
-  
+
   switch (type) {
   case ONEBYTE:
     decision = &ONEBYTE_SYM;
@@ -102,7 +102,7 @@ static InstrUID decode(OpcodeType type,
                        uint8_t opcode,
                        uint8_t modRM) {
   const struct ModRMDecision* dec = 0;
-  
+
   switch (type) {
   case ONEBYTE:
     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
@@ -123,7 +123,7 @@ static InstrUID decode(OpcodeType type,
     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
     break;
   }
-  
+
   switch (dec->modrm_type) {
   default:
     debug("Corrupt table!  Unknown modrm_type");
@@ -171,10 +171,10 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
  */
 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
-  
+
   if (!ret)
     ++(insn->readerCursor);
-  
+
   return ret;
 }
 
@@ -238,19 +238,19 @@ CONSUME_FUNC(consumeUInt64, uint64_t)
  */
 static void dbgprintf(struct InternalInstruction* insn,
                       const char* format,
-                      ...) {  
+                      ...) {
   char buffer[256];
   va_list ap;
-  
+
   if (!insn->dlog)
     return;
-    
+
   va_start(ap, format);
   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
   va_end(ap);
-  
+
   insn->dlog(insn->dlogArg, buffer);
-  
+
   return;
 }
 
@@ -305,27 +305,40 @@ static int readPrefixes(struct InternalInstruction* insn) {
   BOOL prefixGroups[4] = { FALSE };
   uint64_t prefixLocation;
   uint8_t byte = 0;
-  
+
   BOOL hasAdSize = FALSE;
   BOOL hasOpSize = FALSE;
-  
+
   dbgprintf(insn, "readPrefixes()");
-    
+
   while (isPrefix) {
     prefixLocation = insn->readerCursor;
-    
+
     if (consumeByte(insn, &byte))
       return -1;
 
     /*
-     * If the first byte is a LOCK prefix break and let it be disassembled
-     * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
-     * FIXME there is currently no way to get the disassembler to print the
-     * lock prefix if it is not the first byte.
+     * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
+     * break and let it be disassembled as a normal "instruction".
      */
-    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
-      break;
-    
+    if (insn->readerCursor - 1 == insn->startLocation
+        && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
+      uint8_t nextByte;
+      if (byte == 0xf0)
+        break;
+      if (lookAtByte(insn, &nextByte))
+        return -1;
+      if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
+        if (consumeByte(insn, &nextByte))
+          return -1;
+        if (lookAtByte(insn, &nextByte))
+          return -1;
+        unconsumeByte(insn);
+      }
+      if (nextByte != 0x0f && nextByte != 0x90)
+        break;
+    }
+
     switch (byte) {
     case 0xf0:  /* LOCK */
     case 0xf2:  /* REPNE/REPNZ */
@@ -387,21 +400,21 @@ static int readPrefixes(struct InternalInstruction* insn) {
       isPrefix = FALSE;
       break;
     }
-    
+
     if (isPrefix)
       dbgprintf(insn, "Found prefix 0x%hhx", byte);
   }
-    
+
   insn->vexSize = 0;
-  
+
   if (byte == 0xc4) {
     uint8_t byte1;
-      
+
     if (lookAtByte(insn, &byte1)) {
       dbgprintf(insn, "Couldn't read second byte of VEX");
       return -1;
     }
-    
+
     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
       insn->vexSize = 3;
       insn->necessaryPrefixLocation = insn->readerCursor - 1;
@@ -410,67 +423,67 @@ static int readPrefixes(struct InternalInstruction* insn) {
       unconsumeByte(insn);
       insn->necessaryPrefixLocation = insn->readerCursor - 1;
     }
-    
+
     if (insn->vexSize == 3) {
       insn->vexPrefix[0] = byte;
       consumeByte(insn, &insn->vexPrefix[1]);
       consumeByte(insn, &insn->vexPrefix[2]);
 
       /* We simulate the REX prefix for simplicity's sake */
-   
+
       if (insn->mode == MODE_64BIT) {
-        insn->rexPrefix = 0x40 
+        insn->rexPrefix = 0x40
                         | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
                         | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
                         | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
                         | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
       }
-    
+
       switch (ppFromVEX3of3(insn->vexPrefix[2]))
       {
       default:
         break;
       case VEX_PREFIX_66:
-        hasOpSize = TRUE;      
+        hasOpSize = TRUE;
         break;
       }
-    
+
       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
     }
   }
   else if (byte == 0xc5) {
     uint8_t byte1;
-    
+
     if (lookAtByte(insn, &byte1)) {
       dbgprintf(insn, "Couldn't read second byte of VEX");
       return -1;
     }
-      
+
     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
       insn->vexSize = 2;
     }
     else {
       unconsumeByte(insn);
     }
-    
+
     if (insn->vexSize == 2) {
       insn->vexPrefix[0] = byte;
       consumeByte(insn, &insn->vexPrefix[1]);
-        
+
       if (insn->mode == MODE_64BIT) {
-        insn->rexPrefix = 0x40 
+        insn->rexPrefix = 0x40
                         | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
       }
-        
+
       switch (ppFromVEX2of2(insn->vexPrefix[1]))
       {
       default:
         break;
       case VEX_PREFIX_66:
-        hasOpSize = TRUE;      
+        hasOpSize = TRUE;
         break;
       }
-         
+
       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
     }
   }
@@ -478,17 +491,17 @@ static int readPrefixes(struct InternalInstruction* insn) {
     if (insn->mode == MODE_64BIT) {
       if ((byte & 0xf0) == 0x40) {
         uint8_t opcodeByte;
-          
+
         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
           dbgprintf(insn, "Redundant REX prefix");
           return -1;
         }
-          
+
         insn->rexPrefix = byte;
         insn->necessaryPrefixLocation = insn->readerCursor - 2;
-          
+
         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
-      } else {                
+      } else {
         unconsumeByte(insn);
         insn->necessaryPrefixLocation = insn->readerCursor - 1;
       }
@@ -526,7 +539,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
       insn->immediateSize      = (hasOpSize ? 2 : 4);
     }
   }
-  
+
   return 0;
 }
 
@@ -537,22 +550,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
  * @param insn  - The instruction whose opcode is to be read.
  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
  */
-static int readOpcode(struct InternalInstruction* insn) {  
+static int readOpcode(struct InternalInstruction* insn) {
   /* Determine the length of the primary opcode */
-  
+
   uint8_t current;
-  
+
   dbgprintf(insn, "readOpcode()");
-  
+
   insn->opcodeType = ONEBYTE;
-    
+
   if (insn->vexSize == 3)
   {
     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
     {
     default:
       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
-      return -1;      
+      return -1;
     case 0:
       break;
     case VEX_LOB_0F:
@@ -564,7 +577,7 @@ static int readOpcode(struct InternalInstruction* insn) {
       insn->threeByteEscape = 0x38;
       insn->opcodeType = THREEBYTE_38;
       return consumeByte(insn, &insn->opcode);
-    case VEX_LOB_0F3A:    
+    case VEX_LOB_0F3A:
       insn->twoByteEscape = 0x0f;
       insn->threeByteEscape = 0x3a;
       insn->opcodeType = THREEBYTE_3A;
@@ -577,68 +590,68 @@ static int readOpcode(struct InternalInstruction* insn) {
     insn->opcodeType = TWOBYTE;
     return consumeByte(insn, &insn->opcode);
   }
-    
+
   if (consumeByte(insn, &current))
     return -1;
-  
+
   if (current == 0x0f) {
     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
-    
+
     insn->twoByteEscape = current;
-    
+
     if (consumeByte(insn, &current))
       return -1;
-    
+
     if (current == 0x38) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_38;
     } else if (current == 0x3a) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_3A;
     } else if (current == 0xa6) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_A6;
     } else if (current == 0xa7) {
       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
-      
+
       insn->threeByteEscape = current;
-      
+
       if (consumeByte(insn, &current))
         return -1;
-      
+
       insn->opcodeType = THREEBYTE_A7;
     } else {
       dbgprintf(insn, "Didn't find a three-byte escape prefix");
-      
+
       insn->opcodeType = TWOBYTE;
     }
   }
-  
+
   /*
    * At this point we have consumed the full opcode.
    * Anything we consume from here on must be unconsumed.
    */
-  
+
   insn->opcode = current;
-  
+
   return 0;
 }
 
@@ -660,19 +673,19 @@ static int getIDWithAttrMask(uint16_t* instructionID,
                              struct InternalInstruction* insn,
                              uint8_t attrMask) {
   BOOL hasModRMExtension;
-  
+
   uint8_t instructionClass;
 
   instructionClass = contextForAttrs(attrMask);
-  
+
   hasModRMExtension = modRMRequired(insn->opcodeType,
                                     instructionClass,
                                     insn->opcode);
-  
+
   if (hasModRMExtension) {
     if (readModRM(insn))
       return -1;
-    
+
     *instructionID = decode(insn->opcodeType,
                             instructionClass,
                             insn->opcode,
@@ -683,7 +696,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
                             insn->opcode,
                             0);
   }
-      
+
   return 0;
 }
 
@@ -696,7 +709,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
  */
 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
   off_t i;
-  
+
   for (i = 0;; i++) {
     if (orig[i] == '\0' && equiv[i] == '\0')
       return TRUE;
@@ -715,8 +728,8 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
 }
 
 /*
- * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
- *   appropriate for extended and escape opcodes.  Determines the attributes and 
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ *   appropriate for extended and escape opcodes.  Determines the attributes and
  *   context for the instruction before doing so.
  *
  * @param insn  - The instruction whose ID is to be determined.
@@ -726,21 +739,21 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
 static int getID(struct InternalInstruction* insn, const void *miiArg) {
   uint8_t attrMask;
   uint16_t instructionID;
-  
+
   dbgprintf(insn, "getID()");
-    
+
   attrMask = ATTR_NONE;
 
   if (insn->mode == MODE_64BIT)
     attrMask |= ATTR_64BIT;
-    
+
   if (insn->vexSize) {
     attrMask |= ATTR_VEX;
 
     if (insn->vexSize == 3) {
       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
       case VEX_PREFIX_66:
-        attrMask |= ATTR_OPSIZE;    
+        attrMask |= ATTR_OPSIZE;
         break;
       case VEX_PREFIX_F3:
         attrMask |= ATTR_XS;
@@ -749,14 +762,14 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
         attrMask |= ATTR_XD;
         break;
       }
-    
+
       if (lFromVEX3of3(insn->vexPrefix[2]))
         attrMask |= ATTR_VEXL;
     }
     else if (insn->vexSize == 2) {
       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
       case VEX_PREFIX_66:
-        attrMask |= ATTR_OPSIZE;    
+        attrMask |= ATTR_OPSIZE;
         break;
       case VEX_PREFIX_F3:
         attrMask |= ATTR_XS;
@@ -765,7 +778,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
         attrMask |= ATTR_XD;
         break;
       }
-    
+
       if (lFromVEX2of2(insn->vexPrefix[1]))
         attrMask |= ATTR_VEXL;
     }
@@ -836,26 +849,26 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
      * conservative, but in the specific case where OpSize is present but not
      * in the right place we check if there's a 16-bit operation.
      */
-    
+
     const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithOpsize;
     const char *specName, *specWithOpSizeName;
-    
+
     spec = specifierForUID(instructionID);
-    
+
     if (getIDWithAttrMask(&instructionIDWithOpsize,
                           insn,
                           attrMask | ATTR_OPSIZE)) {
-      /* 
+      /*
        * ModRM required with OpSize but not present; give up and return version
        * without OpSize set
        */
-      
+
       insn->instructionID = instructionID;
       insn->spec = spec;
       return 0;
     }
-    
+
     specName = x86DisassemblerGetInstrName(instructionID, miiArg);
     specWithOpSizeName =
       x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
@@ -882,10 +895,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
     const struct InstructionSpecifier *specWithNewOpcode;
 
     spec = specifierForUID(instructionID);
-    
+
     /* Borrow opcode from one of the other XCHGar opcodes */
     insn->opcode = 0x91;
-   
+
     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
                           insn,
                           attrMask)) {
@@ -906,10 +919,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
 
     return 0;
   }
-  
+
   insn->instructionID = instructionID;
   insn->spec = specifierForUID(insn->instructionID);
-  
+
   return 0;
 }
 
@@ -924,14 +937,14 @@ static int readSIB(struct InternalInstruction* insn) {
   SIBIndex sibIndexBase = 0;
   SIBBase sibBaseBase = 0;
   uint8_t index, base;
-  
+
   dbgprintf(insn, "readSIB()");
-  
+
   if (insn->consumedSIB)
     return 0;
-  
+
   insn->consumedSIB = TRUE;
-  
+
   switch (insn->addressSize) {
   case 2:
     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
@@ -949,9 +962,9 @@ static int readSIB(struct InternalInstruction* insn) {
 
   if (consumeByte(insn, &insn->sib))
     return -1;
-  
+
   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
-  
+
   switch (index) {
   case 0x4:
     insn->sibIndex = SIB_INDEX_NONE;
@@ -963,7 +976,7 @@ static int readSIB(struct InternalInstruction* insn) {
       insn->sibIndex = SIB_INDEX_NONE;
     break;
   }
-  
+
   switch (scaleFromSIB(insn->sib)) {
   case 0:
     insn->sibScale = 1;
@@ -978,9 +991,9 @@ static int readSIB(struct InternalInstruction* insn) {
     insn->sibScale = 8;
     break;
   }
-  
+
   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
-  
+
   switch (base) {
   case 0x5:
     switch (modFromModRM(insn->modRM)) {
@@ -990,12 +1003,12 @@ static int readSIB(struct InternalInstruction* insn) {
       break;
     case 0x1:
       insn->eaDisplacement = EA_DISP_8;
-      insn->sibBase = (insn->addressSize == 4 ? 
+      insn->sibBase = (insn->addressSize == 4 ?
                        SIB_BASE_EBP : SIB_BASE_RBP);
       break;
     case 0x2:
       insn->eaDisplacement = EA_DISP_32;
-      insn->sibBase = (insn->addressSize == 4 ? 
+      insn->sibBase = (insn->addressSize == 4 ?
                        SIB_BASE_EBP : SIB_BASE_RBP);
       break;
     case 0x3:
@@ -1007,7 +1020,7 @@ static int readSIB(struct InternalInstruction* insn) {
     insn->sibBase = (SIBBase)(sibBaseBase + base);
     break;
   }
-  
+
   return 0;
 }
 
@@ -1015,22 +1028,22 @@ static int readSIB(struct InternalInstruction* insn) {
  * readDisplacement - Consumes the displacement of an instruction.
  *
  * @param insn  - The instruction whose displacement is to be read.
- * @return      - 0 if the displacement byte was successfully read; nonzero 
+ * @return      - 0 if the displacement byte was successfully read; nonzero
  *                otherwise.
  */
-static int readDisplacement(struct InternalInstruction* insn) {  
+static int readDisplacement(struct InternalInstruction* insn) {
   int8_t d8;
   int16_t d16;
   int32_t d32;
-  
+
   dbgprintf(insn, "readDisplacement()");
-  
+
   if (insn->consumedDisplacement)
     return 0;
-  
+
   insn->consumedDisplacement = TRUE;
   insn->displacementOffset = insn->readerCursor - insn->startLocation;
-  
+
   switch (insn->eaDisplacement) {
   case EA_DISP_NONE:
     insn->consumedDisplacement = FALSE;
@@ -1051,7 +1064,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
     insn->displacement = d32;
     break;
   }
-  
+
   insn->consumedDisplacement = TRUE;
   return 0;
 }
@@ -1063,22 +1076,22 @@ static int readDisplacement(struct InternalInstruction* insn) {
  * @param insn  - The instruction whose addressing information is to be read.
  * @return      - 0 if the information was successfully read; nonzero otherwise.
  */
-static int readModRM(struct InternalInstruction* insn) {  
+static int readModRM(struct InternalInstruction* insn) {
   uint8_t mod, rm, reg;
-  
+
   dbgprintf(insn, "readModRM()");
-  
+
   if (insn->consumedModRM)
     return 0;
-  
+
   if (consumeByte(insn, &insn->modRM))
     return -1;
   insn->consumedModRM = TRUE;
-  
+
   mod     = modFromModRM(insn->modRM);
   rm      = rmFromModRM(insn->modRM);
   reg     = regFromModRM(insn->modRM);
-  
+
   /*
    * This goes by insn->registerSize to pick the correct register, which messes
    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
@@ -1098,16 +1111,16 @@ static int readModRM(struct InternalInstruction* insn) {
     insn->eaRegBase = EA_REG_RAX;
     break;
   }
-  
+
   reg |= rFromREX(insn->rexPrefix) << 3;
   rm  |= bFromREX(insn->rexPrefix) << 3;
-  
+
   insn->reg = (Reg)(insn->regBase + reg);
-  
+
   switch (insn->addressSize) {
   case 2:
     insn->eaBaseBase = EA_BASE_BX_SI;
-     
+
     switch (mod) {
     case 0x0:
       if (rm == 0x6) {
@@ -1142,14 +1155,14 @@ static int readModRM(struct InternalInstruction* insn) {
   case 4:
   case 8:
     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
-    
+
     switch (mod) {
     case 0x0:
       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
       switch (rm) {
       case 0x4:
       case 0xc:   /* in case REXW.b is set */
-        insn->eaBase = (insn->addressSize == 4 ? 
+        insn->eaBase = (insn->addressSize == 4 ?
                         EA_BASE_sib : EA_BASE_sib64);
         readSIB(insn);
         if (readDisplacement(insn))
@@ -1191,7 +1204,7 @@ static int readModRM(struct InternalInstruction* insn) {
     }
     break;
   } /* switch (insn->addressSize) */
-  
+
   return 0;
 }
 
@@ -1274,12 +1287,12 @@ GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
  * @return      - 0 if fixup was successful; -1 if the register returned was
  *                invalid for its class.
  */
-static int fixupReg(struct InternalInstruction *insn, 
+static int fixupReg(struct InternalInstruction *insn,
                     const struct OperandSpecifier *op) {
   uint8_t valid;
-  
+
   dbgprintf(insn, "fixupReg()");
-  
+
   switch ((OperandEncoding)op->encoding) {
   default:
     debug("Expected a REG or R/M encoding in fixupReg");
@@ -1311,12 +1324,12 @@ static int fixupReg(struct InternalInstruction *insn,
     }
     break;
   }
-  
+
   return 0;
 }
 
 /*
- * readOpcodeModifier - Reads an operand from the opcode field of an 
+ * readOpcodeModifier - Reads an operand from the opcode field of an
  *   instruction.  Handles AddRegFrm instructions.
  *
  * @param insn    - The instruction whose opcode field is to be read.
@@ -1326,12 +1339,12 @@ static int fixupReg(struct InternalInstruction *insn,
  */
 static int readOpcodeModifier(struct InternalInstruction* insn) {
   dbgprintf(insn, "readOpcodeModifier()");
-  
+
   if (insn->consumedOpcodeModifier)
     return 0;
-  
+
   insn->consumedOpcodeModifier = TRUE;
-  
+
   switch (insn->spec->modifierType) {
   default:
     debug("Unknown modifier type.");
@@ -1345,11 +1358,11 @@ static int readOpcodeModifier(struct InternalInstruction* insn) {
   case MODIFIER_MODRM:
     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
     return 0;
-  }  
+  }
 }
 
 /*
- * readOpcodeRegister - Reads an operand from the opcode field of an 
+ * readOpcodeRegister - Reads an operand from the opcode field of an
  *   instruction and interprets it appropriately given the operand width.
  *   Handles AddRegFrm instructions.
  *
@@ -1364,39 +1377,39 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
 
   if (readOpcodeModifier(insn))
     return -1;
-  
+
   if (size == 0)
     size = insn->registerSize;
-  
+
   switch (size) {
   case 1:
-    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 
+    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
                                                   | insn->opcodeModifier));
-    if (insn->rexPrefix && 
+    if (insn->rexPrefix &&
         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
     }
-      
+
     break;
   case 2:
     insn->opcodeRegister = (Reg)(MODRM_REG_AX
-                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                 + ((bFromREX(insn->rexPrefix) << 3)
                                     | insn->opcodeModifier));
     break;
   case 4:
     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
-                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                 + ((bFromREX(insn->rexPrefix) << 3)
                                     | insn->opcodeModifier));
     break;
   case 8:
-    insn->opcodeRegister = (Reg)(MODRM_REG_RAX 
-                                 + ((bFromREX(insn->rexPrefix) << 3) 
+    insn->opcodeRegister = (Reg)(MODRM_REG_RAX
+                                 + ((bFromREX(insn->rexPrefix) << 3)
                                     | insn->opcodeModifier));
     break;
   }
-  
+
   return 0;
 }
 
@@ -1414,20 +1427,20 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   uint16_t imm16;
   uint32_t imm32;
   uint64_t imm64;
-  
+
   dbgprintf(insn, "readImmediate()");
-  
+
   if (insn->numImmediatesConsumed == 2) {
     debug("Already consumed two immediates");
     return -1;
   }
-  
+
   if (size == 0)
     size = insn->immediateSize;
   else
     insn->immediateSize = size;
   insn->immediateOffset = insn->readerCursor - insn->startLocation;
-  
+
   switch (size) {
   case 1:
     if (consumeByte(insn, &imm8))
@@ -1450,9 +1463,9 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
     insn->immediates[insn->numImmediatesConsumed] = imm64;
     break;
   }
-  
+
   insn->numImmediatesConsumed++;
-  
+
   return 0;
 }
 
@@ -1465,7 +1478,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
  */
 static int readVVVV(struct InternalInstruction* insn) {
   dbgprintf(insn, "readVVVV()");
-        
+
   if (insn->vexSize == 3)
     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
   else if (insn->vexSize == 2)
@@ -1490,14 +1503,14 @@ static int readOperands(struct InternalInstruction* insn) {
   int index;
   int hasVVVV, needVVVV;
   int sawRegImm = 0;
-  
+
   dbgprintf(insn, "readOperands()");
 
   /* If non-zero vvvv specified, need to make sure one of the operands
      uses it. */
   hasVVVV = !readVVVV(insn);
   needVVVV = hasVVVV && (insn->vvvv != 0);
-  
+
   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
     switch (x86OperandSets[insn->spec->operands][index].encoding) {
     case ENCODING_NONE:
@@ -1599,7 +1612,7 @@ static int readOperands(struct InternalInstruction* insn) {
 
   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
   if (needVVVV) return -1;
-  
+
   return 0;
 }
 
@@ -1607,7 +1620,7 @@ static int readOperands(struct InternalInstruction* insn) {
  * decodeInstruction - Reads and interprets a full instruction provided by the
  *   user.
  *
- * @param insn      - A pointer to the instruction to be populated.  Must be 
+ * @param insn      - A pointer to the instruction to be populated.  Must be
  *                    pre-allocated.
  * @param reader    - The function to be used to read the instruction's bytes.
  * @param readerArg - A generic argument to be passed to the reader to store
@@ -1632,7 +1645,7 @@ int decodeInstruction(struct InternalInstruction* insn,
                       uint64_t startLoc,
                       DisassemblerMode mode) {
   memset(insn, 0, sizeof(struct InternalInstruction));
-    
+
   insn->reader = reader;
   insn->readerArg = readerArg;
   insn->dlog = logger;
@@ -1641,7 +1654,7 @@ int decodeInstruction(struct InternalInstruction* insn,
   insn->readerCursor = startLoc;
   insn->mode = mode;
   insn->numImmediatesConsumed = 0;
-  
+
   if (readPrefixes(insn)       ||
       readOpcode(insn)         ||
       getID(insn, miiArg)      ||
@@ -1650,14 +1663,14 @@ int decodeInstruction(struct InternalInstruction* insn,
     return -1;
 
   insn->operands = &x86OperandSets[insn->spec->operands][0];
-  
+
   insn->length = insn->readerCursor - insn->startLocation;
-  
+
   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
             startLoc, insn->readerCursor, insn->length);
-    
+
   if (insn->length > 15)
     dbgprintf(insn, "Instruction exceeds 15-byte limit");
-  
+
   return 0;
 }
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index a4bd1147bc51..e357710b20eb 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -14,12 +14,12 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "X86ATTInstPrinter.h"
-#include "X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCInst.h"
+#include "X86InstComments.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -131,7 +131,7 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isImm())
-    O << Op.getImm();
+    O << formatImm(Op.getImm());
   else {
     assert(Op.isExpr() && "unknown pcrel immediate operand");
     // If a symbolic branch target was added as a constant expression then print
@@ -157,7 +157,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   } else if (Op.isImm()) {
     // Print X86 immediates as signed values.
     O << markup("<imm:")
-      << '$' << (int64_t)Op.getImm()
+      << '$' << formatImm((int64_t)Op.getImm())
       << markup(">");
     
     if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
@@ -189,7 +189,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
   if (DispSpec.isImm()) {
     int64_t DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
-      O << DispVal;
+      O << formatImm(DispVal);
   } else {
     assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
     O << *DispSpec.getExpr();
@@ -207,7 +207,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
       if (ScaleVal != 1) {
         O << ','
 	  << markup("<imm:")
-          << ScaleVal
+          << ScaleVal // never printed in hex.
 	  << markup(">");
       }
     }
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 64ac5e685f76..0f6eeb19bccd 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -34,10 +34,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   switch (MI->getOpcode()) {
   case X86::INSERTPSrr:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
-    break;
   case X86::VINSERTPSrr:
     DestName = getRegName(MI->getOperand(0).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -46,10 +42,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::MOVLHPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVLHPSMask(2, ShuffleMask);
-    break;
   case X86::VMOVLHPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -58,10 +50,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::MOVHLPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVHLPSMask(2, ShuffleMask);
-    break;
   case X86::VMOVHLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -69,6 +57,29 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeMOVHLPSMask(2, ShuffleMask);
     break;
 
+  case X86::PALIGNR128rr:
+  case X86::VPALIGNR128rr:
+    Src1Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PALIGNR128rm:
+  case X86::VPALIGNR128rm:
+    Src2Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePALIGNRMask(MVT::v16i8,
+                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+  case X86::VPALIGNR256rr:
+    Src1Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPALIGNR256rm:
+    Src2Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePALIGNRMask(MVT::v32i8,
+                      MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+
   case X86::PSHUFDri:
   case X86::VPSHUFDri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -131,15 +142,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::PUNPCKHBWrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHBWrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
-    break;
   case X86::VPUNPCKHBWrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKHBWrm:
   case X86::VPUNPCKHBWrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -154,15 +160,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
     break;
   case X86::PUNPCKHWDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHWDrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
-    break;
   case X86::VPUNPCKHWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKHWDrm:
   case X86::VPUNPCKHWDrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -177,15 +178,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
     break;
   case X86::PUNPCKHDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
-    break;
   case X86::VPUNPCKHDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKHDQrm:
   case X86::VPUNPCKHDQrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -200,15 +196,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
     break;
   case X86::PUNPCKHQDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHQDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
-    break;
   case X86::VPUNPCKHQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKHQDQrm:
   case X86::VPUNPCKHQDQrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -224,15 +215,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::PUNPCKLBWrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLBWrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
-    break;
   case X86::VPUNPCKLBWrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKLBWrm:
   case X86::VPUNPCKLBWrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -247,15 +233,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
     break;
   case X86::PUNPCKLWDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLWDrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
-    break;
   case X86::VPUNPCKLWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKLWDrm:
   case X86::VPUNPCKLWDrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -270,15 +251,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
     break;
   case X86::PUNPCKLDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
-    break;
   case X86::VPUNPCKLDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKLDQrm:
   case X86::VPUNPCKLDQrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -293,15 +269,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
     break;
   case X86::PUNPCKLQDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLQDQrm:
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
-    break;
   case X86::VPUNPCKLQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::PUNPCKLQDQrm:
   case X86::VPUNPCKLQDQrm:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -317,16 +288,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::SHUFPDrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::SHUFPDrmi:
-    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
-                    ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
   case X86::VSHUFPDrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::SHUFPDrmi:
   case X86::VSHUFPDrmi:
     DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
                     ShuffleMask);
@@ -344,16 +309,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::SHUFPSrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::SHUFPSrmi:
-    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
-                    ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
   case X86::VSHUFPSrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::SHUFPSrmi:
   case X86::VSHUFPSrmi:
     DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
                     ShuffleMask);
@@ -371,15 +330,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::UNPCKLPDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKLPDrm:
-    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
   case X86::VUNPCKLPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::UNPCKLPDrm:
   case X86::VUNPCKLPDrm:
     DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -394,15 +348,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::UNPCKLPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKLPSrm:
-    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
   case X86::VUNPCKLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::UNPCKLPSrm:
   case X86::VUNPCKLPSrm:
     DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -417,15 +366,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::UNPCKHPDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKHPDrm:
-    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
   case X86::VUNPCKHPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::UNPCKHPDrm:
   case X86::VUNPCKHPDrm:
     DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -440,15 +384,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::UNPCKHPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKHPSrm:
-    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
-    break;
   case X86::VUNPCKHPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::UNPCKHPSrm:
   case X86::VUNPCKHPSrm:
     DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index d67aec7f10ef..141f4a4dd856 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -14,11 +14,11 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "X86IntelInstPrinter.h"
-#include "X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCInst.h"
+#include "X86InstComments.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 467edadc7e09..598ddee56d21 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -113,7 +113,7 @@ public:
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup,
                             uint64_t Value,
-                            const MCInstFragment *DF,
+                            const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
@@ -255,7 +255,7 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
 
 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
                                          uint64_t Value,
-                                         const MCInstFragment *DF,
+                                         const MCRelaxableFragment *DF,
                                          const MCAsmLayout &Layout) const {
   // Relax if the value is too big for a (signed) i8.
   return int64_t(Value) != int64_t(int8_t(Value));
@@ -279,9 +279,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
   Res.setOpcode(RelaxedOp);
 }
 
-/// writeNopData - Write optimal nops to the output file for the \p Count
-/// bytes.  This returns the number of bytes written.  It may return 0 if
-/// the \p Count is more than the maximum optimal nops.
+/// \brief Write a sequence of optimal nops to the output, covering \p Count
+/// bytes.
+/// \return - true on success, false on failure
 bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   static const uint8_t Nops[10][10] = {
     // nop
@@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
     return true;
   }
 
-  // Write an optimal sequence for the first 15 bytes.
-  const uint64_t OptimalCount = (Count < 16) ? Count : 15;
-  const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
-  for (uint64_t i = 0, e = Prefixes; i != e; i++)
-    OW->Write8(0x66);
-  const uint64_t Rest = OptimalCount - Prefixes;
-  for (uint64_t i = 0, e = Rest; i != e; i++)
-    OW->Write8(Nops[Rest - 1][i]);
-
-  // Finish with single byte nops.
-  for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
-   OW->Write8(0x90);
+  // 15 is the longest single nop instruction.  Emit as many 15-byte nops as
+  // needed, then emit a nop of the remaining length.
+  do {
+    const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15);
+    const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
+    for (uint8_t i = 0; i < Prefixes; i++)
+      OW->Write8(0x66);
+    const uint8_t Rest = ThisNopLength - Prefixes;
+    for (uint8_t i = 0; i < Rest; i++)
+      OW->Write8(Nops[Rest - 1][i]);
+    Count -= ThisNopLength;
+  } while (Count != 0);
 
   return true;
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 7ea1961dec90..36695600707e 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -104,7 +104,7 @@ namespace X86II {
 
     /// MO_TLSLD - On a symbol operand this indicates that the immediate is
     /// the offset of the GOT entry with the TLS index for the module that
-    /// contains the symbol. When this index is passed to a call to to
+    /// contains the symbol. When this index is passed to a call to
     /// __tls_get_addr, the function will return the base address of the TLS
     /// block for the symbol. Used in the x86-64 local dynamic TLS access model.
     ///
@@ -114,7 +114,7 @@ namespace X86II {
 
     /// MO_TLSLDM - On a symbol operand this indicates that the immediate is
     /// the offset of the GOT entry with the TLS index for the module that
-    /// contains the symbol. When this index is passed to a call to to
+    /// contains the symbol. When this index is passed to a call to
     /// ___tls_get_addr, the function will return the base address of the TLS
     /// block for the symbol. Used in the IA32 local dynamic TLS access model.
     ///
@@ -276,9 +276,9 @@ namespace X86II {
     MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
     MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
     MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
-    MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50,
-    MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54,
-    MRM_DE = 55, MRM_DF = 56,
+    MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
+    MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
+    MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
@@ -574,16 +574,13 @@ namespace X86II {
         ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
       return FirstMemOp;
     }
-    case X86II::MRM_C1: case X86II::MRM_C2:
-    case X86II::MRM_C3: case X86II::MRM_C4:
-    case X86II::MRM_C8: case X86II::MRM_C9:
-    case X86II::MRM_E8: case X86II::MRM_F0:
-    case X86II::MRM_F8: case X86II::MRM_F9:
-    case X86II::MRM_D0: case X86II::MRM_D1:
-    case X86II::MRM_D4: case X86II::MRM_D5:
-    case X86II::MRM_D8: case X86II::MRM_D9:
-    case X86II::MRM_DA: case X86II::MRM_DB:
-    case X86II::MRM_DC: case X86II::MRM_DD:
+    case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+    case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+    case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+    case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
+    case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
+    case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+    case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
     case X86II::MRM_DE: case X86II::MRM_DF:
       return -1;
     }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 16488eb7ae7e..7815ae98c9bd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -44,7 +44,7 @@ void X86MCAsmInfoDarwin::anchor() { }
 X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
   bool is64Bit = T.getArch() == Triple::x86_64;
   if (is64Bit)
-    PointerSize = 8;
+    PointerSize = CalleeSaveStackSlotSize = 8;
 
   AssemblerDialect = AsmWriterFlavor;
 
@@ -76,8 +76,16 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
 void X86ELFMCAsmInfo::anchor() { }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
-  if (T.getArch() == Triple::x86_64)
-    PointerSize = 8;
+  bool is64Bit = T.getArch() == Triple::x86_64;
+  bool isX32 = T.getEnvironment() == Triple::GNUX32;
+
+  // For ELF, x86-64 pointer size depends on the ABI.
+  // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
+  // with the x32 ABI, pointer size remains the default 4.
+  PointerSize = (is64Bit && !isX32) ? 8 : 4;
+
+  // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
+  CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
 
   AssemblerDialect = AsmWriterFlavor;
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 122204ae75c8..776cee1e35cc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                            raw_ostream &OS) const {
   bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
   bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
 
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     //  dst(ModR/M), src1(ModR/M)
     //  dst(ModR/M), src1(ModR/M), imm8
     //
+    //  FMA4:
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
     if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_R = 0x0;
     CurOp++;
 
     if (HasVEX_4V)
       VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+    if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+      CurOp++;
+
     if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_B = 0x0;
     CurOp++;
@@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     // MRMDestReg instructions forms:
     //  dst(ModR/M), src(ModR/M)
     //  dst(ModR/M), src(ModR/M), imm8
-    if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+    //  dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+    if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_B = 0x0;
-    if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+    CurOp++;
+
+    if (HasVEX_4V)
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+    if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_R = 0x0;
     break;
   case X86II::MRM0r: case X86II::MRM1r:
@@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
 
   case X86II::MRMDestReg:
     EmitByte(BaseOpcode, CurByte, OS);
+    SrcRegNum = CurOp + 1;
+
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+      ++SrcRegNum;
+
     EmitRegModRMByte(MI.getOperand(CurOp),
-                     GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
-    CurOp += 2;
+                     GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
     break;
 
   case X86II::MRMDestMem:
@@ -1117,16 +1136,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                      TSFlags, CurByte, OS, Fixups);
     CurOp += X86::AddrNumOperands;
     break;
-  case X86II::MRM_C1: case X86II::MRM_C2:
-  case X86II::MRM_C3: case X86II::MRM_C4:
-  case X86II::MRM_C8: case X86II::MRM_C9:
-  case X86II::MRM_D0: case X86II::MRM_D1:
-  case X86II::MRM_D4: case X86II::MRM_D5:
-  case X86II::MRM_D8: case X86II::MRM_D9:
-  case X86II::MRM_DA: case X86II::MRM_DB:
-  case X86II::MRM_DC: case X86II::MRM_DD:
-  case X86II::MRM_DE: case X86II::MRM_DF:
-  case X86II::MRM_E8: case X86II::MRM_F0:
+  case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
+  case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
+  case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+  case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+  case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+  case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+  case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
   case X86II::MRM_F8: case X86II::MRM_F9:
     EmitByte(BaseOpcode, CurByte, OS);
 
@@ -1143,6 +1159,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     case X86II::MRM_D1: MRM = 0xD1; break;
     case X86II::MRM_D4: MRM = 0xD4; break;
     case X86II::MRM_D5: MRM = 0xD5; break;
+    case X86II::MRM_D6: MRM = 0xD6; break;
     case X86II::MRM_D8: MRM = 0xD8; break;
     case X86II::MRM_D9: MRM = 0xD9; break;
     case X86II::MRM_DA: MRM = 0xDA; break;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 287c9f137a58..5e84530cd729 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -12,19 +12,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86MCTargetDesc.h"
-#include "X86MCAsmInfo.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
 #include "InstPrinter/X86IntelInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/Host.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_REGINFO_MC_DESC
@@ -257,7 +257,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitX86MCRegisterInfo(X, RA,
                         X86_MC::getDwarfRegFlavour(TT, false),
-                        X86_MC::getDwarfRegFlavour(TT, true));
+                        X86_MC::getDwarfRegFlavour(TT, true),
+                        RA);
   X86_MC::InitLLVM2SEHRegisterMapping(X);
   return X;
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 7ff058edbc23..64f005c469bc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,18 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Object/MachOFormat.h"
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 949661eb99e9..e518fecf044f 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -16,8 +16,7 @@ BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \
 		X86GenAsmWriter.inc X86GenAsmMatcher.inc \
                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
                 X86GenDisassemblerTables.inc X86GenFastISel.inc \
-                X86GenCallingConv.inc X86GenSubtargetInfo.inc \
-		X86GenEDInfo.inc
+                X86GenCallingConv.inc X86GenSubtargetInfo.inc
 
 DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
 
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 40110353fc62..496b704ee85f 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -953,3 +953,12 @@ similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should
 turn into hsubpd also.
 
 //===---------------------------------------------------------------------===//
+
+define <2 x i32> @foo(<2 x double> %in) {
+  %x = fptosi <2 x double> %in to <2 x i32>
+  ret <2 x i32> %x
+}
+
+Should compile into cvttpd2dq instead of being scalarized into 2 cvttsd2si.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 6a8a4fdf2520..b4285a071879 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1568,43 +1568,6 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
   Core 2, and "Generic"
 
 //===---------------------------------------------------------------------===//
-
-Testcase:
-int a(int x) { return (x & 127) > 31; }
-
-Current output:
-	movl	4(%esp), %eax
-	andl	$127, %eax
-	cmpl	$31, %eax
-	seta	%al
-	movzbl	%al, %eax
-	ret
-
-Ideal output:
-	xorl	%eax, %eax
-	testl	$96, 4(%esp)
-	setne	%al
-	ret
-
-This should definitely be done in instcombine, canonicalizing the range
-condition into a != condition.  We get this IR:
-
-define i32 @a(i32 %x) nounwind readnone {
-entry:
-	%0 = and i32 %x, 127		; <i32> [#uses=1]
-	%1 = icmp ugt i32 %0, 31		; <i1> [#uses=1]
-	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
-	ret i32 %2
-}
-
-Instcombine prefers to strength reduce relational comparisons to equality
-comparisons when possible, this should be another case of that.  This could
-be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
-looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
-be redesigned to use ComputeMaskedBits and friends.
-
-
-//===---------------------------------------------------------------------===//
 Testcase:
 int x(int a) { return (a&0xf0)>>4; }
 
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 52a67f763b0a..815d23588f11 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 8b87c1f9c8ad..bbd490411f2d 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -61,6 +61,24 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
     ShuffleMask.push_back(NElts+i);
 }
 
+void DecodePALIGNRMask(MVT VT, unsigned Imm,
+                       SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
+
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  unsigned NumLaneElts = NumElts / NumLanes;
+
+  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+    for (unsigned i = 0; i != NumLaneElts; ++i) {
+      unsigned Base = i + Offset;
+      // if i+offset is out of this lane then we actually need the other source
+      if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
+      ShuffleMask.push_back(Base + l);
+    }
+  }
+}
+
 /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
 /// VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 70d8171a8154..017ab325ec51 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -35,6 +35,8 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
 // <0,2> or <0,1,4,5>
 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
 
+void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
 void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index dce5b4d2b008..1f9919f15955 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -63,11 +63,12 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
 ///
 FunctionPass *createEmitX86CodeToMemory();
 
-/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
-/// which determines whether the frame pointer register should be
-/// reserved in case dynamic stack alignment is later required.
-///
-FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+/// \brief Creates an X86-specific Target Transformation Info pass.
+ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
+
+/// createX86PadShortFunctions - Return a pass that pads short functions
+/// with NOOPs. This will prevent a stall when returning on the Atom.
+FunctionPass *createX86PadShortFunctions();
 
 } // End llvm namespace
 
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8ad0bc08ac57..1dcc344e7f0d 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -120,11 +120,25 @@ def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
                                       "Support BMI2 instructions">;
 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
                                       "Support RTM instructions">;
+def FeatureHLE     : SubtargetFeature<"hle", "HasHLE", "true",
+                                      "Support HLE">;
+def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
+                                      "Support ADX instructions">;
+def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+                                      "Support PRFCHW instructions">;
+def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+                                      "Support RDSEED instruction">;
 def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
-                          "HasSlowDivide", "true",
-                          "Use small divide for positive values less than 256">;
+                                     "HasSlowDivide", "true",
+                                     "Use small divide for positive values less than 256">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+                                     "PadShortFunctions", "true",
+                                     "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+                                     "CallRegIndirect", "true",
+                                     "Call register indirect">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -138,9 +152,6 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
 
-class AtomProc<string Name, list<SubtargetFeature> Features>
- : ProcessorModel<Name, AtomModel, Features>;
-
 def : Proc<"generic",         []>;
 def : Proc<"i386",            []>;
 def : Proc<"i486",            []>;
@@ -155,47 +166,63 @@ def : Proc<"pentium3m",       [FeatureSSE1, FeatureSlowBTMem]>;
 def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
 def : Proc<"pentium4",        [FeatureSSE2]>;
 def : Proc<"pentium4m",       [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64",          [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona",          [FeatureSSE3, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"core2",           [FeatureSSSE3, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"penryn",          [FeatureSSE41, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : AtomProc<"atom",        [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
-                               FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
-                               FeatureSlowDivide]>;
+def : Proc<"x86-64",          [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem]>;
+// Intel Core Duo.
+def : ProcessorModel<"yonah", SandyBridgeModel,
+                     [FeatureSSE3, FeatureSlowBTMem]>;
+
+// NetBurst.
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",   [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Intel Core 2 Solo/Duo.
+def : ProcessorModel<"core2", SandyBridgeModel,
+                     [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"penryn", SandyBridgeModel,
+                     [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Atom.
+def : ProcessorModel<"atom", AtomModel,
+                     [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+                      FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+                      FeatureSlowDivide,
+                      FeatureCallRegIndirect,
+                      FeaturePadShortFunctions]>;
+
 // "Arrandale" along with corei3 and corei5
-def : Proc<"corei7",          [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES]>;
-def : Proc<"nehalem",         [FeatureSSE42,  FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT]>;
+def : ProcessorModel<"corei7", SandyBridgeModel,
+                     [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+
+def : ProcessorModel<"nehalem", SandyBridgeModel,
+                     [FeatureSSE42,  FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT]>;
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere",        [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"westmere", SandyBridgeModel,
+                     [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
+                      FeaturePCLMUL]>;
 // Sandy Bridge
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
-def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"corei7-avx", SandyBridgeModel,
+                     [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
 // Ivy Bridge
-def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL,
-                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
+def : ProcessorModel<"core-avx-i", SandyBridgeModel,
+                     [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+                      FeatureF16C, FeatureFSGSBase]>;
 
 // Haswell
-def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL, FeatureRDRAND,
-                               FeatureF16C, FeatureFSGSBase,
-                               FeatureMOVBE, FeatureLZCNT, FeatureBMI,
-                               FeatureBMI2, FeatureFMA,
-                               FeatureRTM]>;
+def : ProcessorModel<"core-avx2", HaswellModel,
+                     [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+                      FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+                      FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+                      FeatureHLE]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index fdd712520b44..6b228b0b0329 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,33 +13,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -201,7 +201,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLVP_PIC_BASE:
     O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
     break;
-  case X86II::MO_SECREL:      O << "@SECREL";      break;
+  case X86II::MO_SECREL:    O << "@SECREL32";  break;
   }
 }
 
@@ -252,14 +252,15 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
   }
 
   case MachineOperand::MO_Immediate:
-    O << '$' << MO.getImm();
+    if (AsmVariant == 0) O << '$';
+    O << MO.getImm();
     return;
 
   case MachineOperand::MO_JumpTableIndex:
   case MachineOperand::MO_ConstantPoolIndex:
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol: {
-    O << '$';
+    if (AsmVariant == 0) O << '$';
     printSymbolOperand(MO, O);
     break;
   }
@@ -355,19 +356,23 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
     NeedPlus = true;
   }
 
-  assert (DispSpec.isImm() && "Displacement is not an immediate!");
-  int64_t DispVal = DispSpec.getImm();
-  if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
-    if (NeedPlus) {
-      if (DispVal > 0)
-        O << " + ";
-      else {
-        O << " - ";
-        DispVal = -DispVal;
+  if (!DispSpec.isImm()) {
+    if (NeedPlus) O << " + ";
+    printOperand(MI, Op+3, O, Modifier, AsmVariant);
+  } else {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+      if (NeedPlus) {
+        if (DispVal > 0)
+          O << " + ";
+        else {
+          O << " - ";
+          DispVal = -DispVal;
+        }
       }
+      O << DispVal;
     }
-    O << DispVal;
-  }  
+  }
   O << ']';
 }
 
@@ -543,7 +548,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
                                         MCSA_IndirectSymbol);
         // hlt; hlt; hlt; hlt; hlt     hlt = 0xf4.
         const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
-        OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+        OutStreamer.EmitBytes(StringRef(HltInsts, 5));
       }
 
       Stubs.clear();
@@ -569,7 +574,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         // .long 0
         if (MCSym.getInt())
           // External to current translation unit.
-          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+          OutStreamer.EmitIntValue(0, 4/*size*/);
         else
           // Internal to current translation unit.
           //
@@ -578,8 +583,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
           // using NLPs.  However, sometimes the types are local to the file. So
           // we need to fill in the value for the NLP in those cases.
           OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
-                                                        OutContext),
-                                4/*size*/, 0/*addrspace*/);
+                                                        OutContext), 4/*size*/);
       }
       Stubs.clear();
       OutStreamer.AddBlankLine();
@@ -596,8 +600,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         // .long _foo
         OutStreamer.EmitValue(MCSymbolRefExpr::
                               Create(Stubs[i].second.getPointer(),
-                                     OutContext),
-                              4/*size*/, 0/*addrspace*/);
+                                     OutContext), 4/*size*/);
       }
       Stubs.clear();
       OutStreamer.AddBlankLine();
@@ -663,7 +666,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
           name += ",DATA";
         else
         name += ",data";
-        OutStreamer.EmitBytes(name, 0);
+        OutStreamer.EmitBytes(name);
       }
 
       for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
@@ -672,7 +675,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         else
           name = " -export:";
         name += DLLExportedFns[i]->getName();
-        OutStreamer.EmitBytes(name, 0);
+        OutStreamer.EmitBytes(name);
       }
     }
   }
@@ -692,7 +695,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
         OutStreamer.EmitLabel(Stubs[i].first);
         OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(), 0);
+                                    TD->getPointerSize());
       }
       Stubs.clear();
     }
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 61eb14e036d0..bc7496bad144 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,10 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// AT&T assembly code printer class.
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef X86ASMPRINTER_H
 #define X86ASMPRINTER_H
@@ -35,7 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   }
 
   virtual const char *getPassName() const LLVM_OVERRIDE {
-    return "X86 AT&T-Style Assembly Printer";
+    return "X86 Assembly / Object Emitter";
   }
 
   const X86Subtarget &getSubtarget() const { return *Subtarget; }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index a5a8dc18e41d..0dfeb42f1a4d 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,8 +15,8 @@
 #define X86COFF_MACHINEMODULEINFO_H
 
 #include "X86MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 
 namespace llvm {
   class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 6786756c7faf..9eafbd55a5ae 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -103,6 +103,15 @@ def RetCC_Intel_OCL_BI : CallingConv<[
   CCDelegateTo<RetCC_X86Common>
 ]>;
 
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_HiPE : CallingConv<[
+  // Promote all types to i32
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Return: HP, P, VAL1, VAL2
+  CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
+]>;
+
 // X86-64 C return-value convention.
 def RetCC_X86_64_C : CallingConv<[
   // The X86-64 calling convention always returns FP values in XMM0.
@@ -123,17 +132,30 @@ def RetCC_X86_Win64_C : CallingConv<[
   CCDelegateTo<RetCC_X86_64_C>
 ]>;
 
+// X86-64 HiPE return-value convention.
+def RetCC_X86_64_HiPE : CallingConv<[
+  // Promote all types to i64
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Return: HP, P, VAL1, VAL2
+  CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
+]>;
 
 // This is the root return-value convention for the X86-32 backend.
 def RetCC_X86_32 : CallingConv<[
   // If FastCC, use RetCC_X86_32_Fast.
   CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+  // If HiPE, use RetCC_X86_32_HiPE.
+  CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+
   // Otherwise, use RetCC_X86_32_C.
   CCDelegateTo<RetCC_X86_32_C>
 ]>;
 
 // This is the root return-value convention for the X86-64 backend.
 def RetCC_X86_64 : CallingConv<[
+  // HiPE uses RetCC_X86_64_HiPE
+  CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
 
@@ -254,29 +276,6 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[f80], CCAssignToStack<0, 0>>
 ]>;
 
-// X86-64 Intel OpenCL built-ins calling convention.
-def CC_Intel_OCL_BI : CallingConv<[
-  CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>,
-
-  CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
-  CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8,  R9 ]>>>,
-
-  CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>,
-  CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>,
-
- // The SSE vector arguments are passed in XMM registers.
-  CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
-           CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
-  
-  // The 256-bit vector arguments are passed in YMM registers.
-  CCIfType<[v8f32, v4f64, v8i32, v4i64],
-                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
-  
-  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
-  CCDelegateTo<CC_X86_64_C>
-]>;
-
-
 def CC_X86_64_GHC : CallingConv<[
   // Promote i8/i16/i32 arguments to i64.
   CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -291,6 +290,18 @@ def CC_X86_64_GHC : CallingConv<[
             CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
 ]>;
 
+def CC_X86_64_HiPE : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3
+  CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
+
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
 //===----------------------------------------------------------------------===//
 // X86 C Calling Convention
 //===----------------------------------------------------------------------===//
@@ -376,8 +387,8 @@ def CC_X86_32_ThisCall : CallingConv<[
   // Promote i8/i16 arguments to i32.
   CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
-  // Pass sret arguments indirectly through EAX
-  CCIfSRet<CCAssignToReg<[EAX]>>,
+  // Pass sret arguments indirectly through stack.
+  CCIfSRet<CCAssignToStack<4, 4>>,
 
   // The first integer argument is passed in ECX
   CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -422,6 +433,42 @@ def CC_X86_32_GHC : CallingConv<[
   CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
 ]>;
 
+def CC_X86_32_HiPE : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2
+  CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>,
+
+  // Integer/Float values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>
+]>;
+
+// X86-64 Intel OpenCL built-ins calling convention.
+def CC_Intel_OCL_BI : CallingConv<[
+
+  CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
+  CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8,  R9 ]>>>,
+
+  CCIfType<[i32], CCIfSubtarget<"is64Bit()", CCAssignToReg<[EDI, ESI, EDX, ECX]>>>,
+  CCIfType<[i64], CCIfSubtarget<"is64Bit()", CCAssignToReg<[RDI, RSI, RDX, RCX]>>>,
+
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+  // The SSE vector arguments are passed in XMM registers.
+  CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+           CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+  // The 256-bit vector arguments are passed in YMM registers.
+  CCIfType<[v8f32, v4f64, v8i32, v4i64],
+           CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
+
+  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+  CCIfSubtarget<"is64Bit()",       CCDelegateTo<CC_X86_64_C>>,
+  CCDelegateTo<CC_X86_32_C>
+]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Root Argument Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -432,6 +479,7 @@ def CC_X86_32 : CallingConv<[
   CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
   CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+  CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
 
   // Otherwise, drop to normal X86-32 CC
   CCDelegateTo<CC_X86_32_C>
@@ -440,6 +488,7 @@ def CC_X86_32 : CallingConv<[
 // This is the root argument convention for the X86-64 backend.
 def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+  CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
 
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -470,6 +519,9 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
 def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
                                      (sequence "XMM%u", 6, 15))>;
 
+def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
+                                           R11, R12, R13, R14, R15, RBP,
+                                           (sequence "XMM%u", 0, 15))>;
 
 // Standard C + YMM6-15
 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 44db563818b1..2518e02e2a40 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -13,23 +13,23 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "x86-emitter"
+#include "X86.h"
 #include "X86InstrInfo.h"
 #include "X86JITInfo.h"
+#include "X86Relocations.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "X86Relocations.h"
-#include "X86.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -124,7 +124,7 @@ template<class CodeEmitter>
 } // end anonymous namespace.
 
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified templated MachineCodeEmitter object.
+/// to the specified JITCodeEmitter object.
 FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                                 JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
                                                const MCInstrDesc *Desc) const {
   bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
   bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
 
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
 
       if (HasVEX_4V)
         VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+      if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+        CurOp++;
+
       if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
         VEX_B = 0x0;
       CurOp++;
@@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
       // MRMDestReg instructions forms:
       //  dst(ModR/M), src(ModR/M)
       //  dst(ModR/M), src(ModR/M), imm8
-      if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      //  dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
         VEX_B = 0x0;
-      if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+      CurOp++;
+
+      if (HasVEX_4V)
+        VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
         VEX_R = 0x0;
       break;
     case X86II::MRM0r: case X86II::MRM1r:
@@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
 
   case X86II::MRMDestReg: {
     MCE.emitByte(BaseOpcode);
+
+    unsigned SrcRegNum = CurOp+1;
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
     emitRegModRMByte(MI.getOperand(CurOp).getReg(),
-                     getX86RegNum(MI.getOperand(CurOp+1).getReg()));
-    CurOp += 2;
+                     getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+    CurOp = SrcRegNum + 1;
     break;
   }
   case X86II::MRMDestMem: {
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
index f321778db24b..69b4c71651d7 100644
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -11,7 +11,7 @@
 ;;
 ;;===----------------------------------------------------------------------===
 
-extrn X86CompilationCallback2: PROC
+extrn LLVMX86CompilationCallback2: PROC
 
 .code
 X86CompilationCallback proc
@@ -42,7 +42,7 @@ X86CompilationCallback proc
     ; Pass prev frame and return address.
     mov     rcx, rbp
     mov     rdx, qword ptr [rbp+8]
-    call    X86CompilationCallback2
+    call    LLVMX86CompilationCallback2
 
     ; Restore all XMM arg registers.
     movaps  xmm3, [rsp+48+32]
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index d4627c74cb1c..cadec682a435 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -14,24 +14,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
-#include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
+#include "X86InstrBuilder.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -75,6 +75,8 @@ public:
   virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
                              const LoadInst *LI);
 
+  virtual bool FastLowerArguments();
+
 #include "X86GenFastISel.inc"
 
 private:
@@ -297,7 +299,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
     case MVT::i32: Opc = X86::MOV32mi; break;
     case MVT::i64:
       // Must be a 32-bit sign extended value.
-      if ((int)CI->getSExtValue() == CI->getSExtValue())
+      if (isInt<32>(CI->getSExtValue()))
         Opc = X86::MOV64mi32;
       break;
     }
@@ -326,12 +328,11 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
                                     unsigned &ResultReg) {
   unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
                            Src, /*TODO: Kill=*/false);
-
-  if (RR != 0) {
-    ResultReg = RR;
-    return true;
-  } else
+  if (RR == 0)
     return false;
+
+  ResultReg = RR;
+  return true;
 }
 
 /// X86SelectAddress - Attempt to fill in an address from the given value.
@@ -727,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
 
   // Don't handle popping bytes on return for now.
   if (X86MFInfo->getBytesToPopOnReturn() != 0)
-    return 0;
+    return false;
 
   // fastcc with -tailcallopt is intended to provide a guaranteed
   // tail call optimization. Fastisel doesn't know how to do that.
@@ -738,10 +739,12 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
   if (F.isVarArg())
     return false;
 
+  // Build a list of return value registers.
+  SmallVector<unsigned, 4> RetRegs;
+
   if (Ret->getNumOperands() > 0) {
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
-                  Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
@@ -806,25 +809,30 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
             DstReg).addReg(SrcReg);
 
-    // Mark the register as live out of the function.
-    MRI.addLiveOut(VA.getLocReg());
+    // Add register to return instruction.
+    RetRegs.push_back(VA.getLocReg());
   }
 
   // The x86-64 ABI for returning structs by value requires that we copy
   // the sret argument into %rax for the return. We saved the argument into
   // a virtual register in the entry block, so now we copy the value out
-  // and into %rax.
-  if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
+  // and into %rax. We also do the same with %eax for Win32.
+  if (F.hasStructRetAttr() &&
+      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
     unsigned Reg = X86MFInfo->getSRetReturnReg();
     assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments()!");
+    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            X86::RAX).addReg(Reg);
-    MRI.addLiveOut(X86::RAX);
+            RetReg).addReg(Reg);
+    RetRegs.push_back(RetReg);
   }
 
   // Now emit the RET.
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+  MachineInstrBuilder MIB =
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+    MIB.addReg(RetRegs[i], RegState::Implicit);
   return true;
 }
 
@@ -1373,7 +1381,6 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
     else if (Len >= 2)
       VT = MVT::i16;
     else {
-      assert(Len == 1);
       VT = MVT::i8;
     }
 
@@ -1517,6 +1524,81 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
   }
 }
 
+bool X86FastISel::FastLowerArguments() {
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  if (Subtarget->isTargetWin64())
+    return false;
+
+  const Function *F = FuncInfo.Fn;
+  if (F->isVarArg())
+    return false;
+
+  CallingConv::ID CC = F->getCallingConv();
+  if (CC != CallingConv::C)
+    return false;
+  
+  if (!Subtarget->is64Bit())
+    return false;
+  
+  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
+  unsigned Idx = 1;
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++Idx) {
+    if (Idx > 6)
+      return false;
+
+    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+      return false;
+
+    Type *ArgTy = I->getType();
+    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+      return false;
+
+    EVT ArgVT = TLI.getValueType(ArgTy);
+    if (!ArgVT.isSimple()) return false;
+    switch (ArgVT.getSimpleVT().SimpleTy) {
+    case MVT::i32:
+    case MVT::i64:
+      break;
+    default:
+      return false;
+    }
+  }
+
+  static const uint16_t GPR32ArgRegs[] = {
+    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+  };
+  static const uint16_t GPR64ArgRegs[] = {
+    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
+  };
+
+  Idx = 0;
+  const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
+  const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++Idx) {
+    if (I->use_empty())
+      continue;
+    bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
+    const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
+    unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
+    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+    // Without this, EmitLiveInCopies may eliminate the livein if its only
+    // use is a bitcast (which isn't turned into an instruction).
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(DstReg, getKillRegState(true));
+    UpdateValueMap(I, ResultReg);
+  }
+  return true;
+}
+
 bool X86FastISel::X86SelectCall(const Instruction *I) {
   const CallInst *CI = cast<CallInst>(I);
   const Value *Callee = CI->getCalledValue();
@@ -1529,6 +1611,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
     return X86VisitIntrinsicCall(*II);
 
+  // Allow SelectionDAG isel to handle tail calls.
+  if (cast<CallInst>(I)->isTailCall())
+    return false;
+
   return DoSelectCall(I, 0);
 }
 
@@ -1541,9 +1627,9 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
   CallingConv::ID CC = CS.getCallingConv();
   if (CC == CallingConv::Fast || CC == CallingConv::GHC)
     return 0;
-  if (!CS.paramHasAttr(1, Attributes::StructRet))
+  if (!CS.paramHasAttr(1, Attribute::StructRet))
     return 0;
-  if (CS.paramHasAttr(1, Attributes::InReg))
+  if (CS.paramHasAttr(1, Attribute::InReg))
     return 0;
   return 4;
 }
@@ -1581,8 +1667,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
-                Outs, TLI);
+  GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
                                            *FuncInfo.MF, FTy->isVarArg(),
                                            Outs, FTy->getContext());
@@ -1622,12 +1707,12 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     Value *ArgVal = *i;
     ISD::ArgFlagsTy Flags;
     unsigned AttrInd = i - CS.arg_begin() + 1;
-    if (CS.paramHasAttr(AttrInd, Attributes::SExt))
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
       Flags.setSExt();
-    if (CS.paramHasAttr(AttrInd, Attributes::ZExt))
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
       Flags.setZExt();
 
-    if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) {
+    if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
       PointerType *Ty = cast<PointerType>(ArgVal->getType());
       Type *ElementTy = Ty->getElementType();
       unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
@@ -1641,9 +1726,9 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
         return false;
     }
 
-    if (CS.paramHasAttr(AttrInd, Attributes::InReg))
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg))
       Flags.setInReg();
-    if (CS.paramHasAttr(AttrInd, Attributes::Nest))
+    if (CS.paramHasAttr(AttrInd, Attribute::Nest))
       Flags.setNest();
 
     // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
@@ -1905,17 +1990,17 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   ComputeValueVTs(TLI, I->getType(), RetTys);
   for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
     EVT VT = RetTys[i];
-    EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+    MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
     unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
     for (unsigned j = 0; j != NumRegs; ++j) {
       ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT.getSimpleVT();
+      MyFlags.VT = RegisterVT;
       MyFlags.Used = !CS.getInstruction()->use_empty();
-      if (CS.paramHasAttr(0, Attributes::SExt))
+      if (CS.paramHasAttr(0, Attribute::SExt))
         MyFlags.Flags.setSExt();
-      if (CS.paramHasAttr(0, Attributes::ZExt))
+      if (CS.paramHasAttr(0, Attribute::ZExt))
         MyFlags.Flags.setZExt();
-      if (CS.paramHasAttr(0, Attributes::InReg))
+      if (CS.paramHasAttr(0, Attribute::InReg))
         MyFlags.Flags.setInReg();
       Ins.push_back(MyFlags);
     }
@@ -2154,13 +2239,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
 unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
   MVT VT;
   if (!isTypeLegal(CF->getType(), VT))
-    return false;
+    return 0;
 
   // Get opcode and regclass for the given zero.
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
   switch (VT.SimpleTy) {
-  default: return false;
+  default: return 0;
   case MVT::f32:
     if (X86ScalarSSEf32) {
       Opc = X86::FsFLD0SS;
@@ -2181,7 +2266,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
     break;
   case MVT::f80:
     // No f80 support yet.
-    return false;
+    return 0;
   }
 
   unsigned ResultReg = createResultReg(RC);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 791f5982af7c..0585b43a4640 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,17 +26,17 @@
 #define DEBUG_TYPE "x86-codegen"
 #include "X86.h"
 #include "X86InstrInfo.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -111,7 +111,7 @@ namespace {
     EdgeBundles *Bundles;
 
     // Return a bitmask of FP registers in block's live-in list.
-    unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+    static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
       unsigned Mask = 0;
       for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
            E = MBB->livein_end(); I != E; ++I) {
@@ -198,7 +198,7 @@ namespace {
     }
 
     /// getScratchReg - Return an FP register that is not currently in use.
-    unsigned getScratchReg() {
+    unsigned getScratchReg() const {
       for (int i = NumFPRegs - 1; i >= 8; --i)
         if (!isLive(i))
           return i;
@@ -206,7 +206,7 @@ namespace {
     }
 
     /// isScratchReg - Returns trus if RegNo is a scratch FP register.
-    bool isScratchReg(unsigned RegNo) {
+    static bool isScratchReg(unsigned RegNo) {
       return RegNo > 8 && RegNo < NumFPRegs;
     }
 
@@ -311,7 +311,7 @@ namespace {
     void handleSpecialFP(MachineBasicBlock::iterator &I);
 
     // Check if a COPY instruction is using FP registers.
-    bool isFPCopy(MachineInstr *MI) {
+    static bool isFPCopy(MachineInstr *MI) {
       unsigned DstReg = MI->getOperand(0).getReg();
       unsigned SrcReg = MI->getOperand(1).getReg();
 
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 369589d469a6..54cbd40274a7 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -17,18 +17,18 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -50,13 +50,13 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           RegInfo->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken() ||
+          MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
           MMI.callsUnwindInit() || MMI.callsEHReturn());
 }
 
-static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
-  if (is64Bit) {
+static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
+  if (IsLP64) {
     if (isInt<8>(Imm))
       return X86::SUB64ri8;
     return X86::SUB64ri32;
@@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
   }
 }
 
-static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
-  if (is64Bit) {
+static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
+  if (IsLP64) {
     if (isInt<8>(Imm))
       return X86::ADD64ri8;
     return X86::ADD64ri32;
@@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
   }
 }
 
-static unsigned getLEArOpcode(unsigned is64Bit) {
-  return is64Bit ? X86::LEA64r : X86::LEA32r;
+static unsigned getLEArOpcode(unsigned IsLP64) {
+  return IsLP64 ? X86::LEA64r : X86::LEA32r;
 }
 
 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
@@ -145,17 +145,17 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
 static
 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
                   unsigned StackPtr, int64_t NumBytes,
-                  bool Is64Bit, bool UseLEA,
+                  bool Is64Bit, bool IsLP64, bool UseLEA,
                   const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
   bool isSub = NumBytes < 0;
   uint64_t Offset = isSub ? -NumBytes : NumBytes;
   unsigned Opc;
   if (UseLEA)
-    Opc = getLEArOpcode(Is64Bit);
+    Opc = getLEArOpcode(IsLP64);
   else
     Opc = isSub
-      ? getSUBriOpcode(Is64Bit, Offset)
-      : getADDriOpcode(Is64Bit, Offset);
+      ? getSUBriOpcode(IsLP64, Offset)
+      : getADDriOpcode(IsLP64, Offset);
 
   uint64_t Chunk = (1LL << 31) - 1;
   DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   return CompactUnwindEncoding;
 }
 
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
+       re = MRI.reg_end(); ri != re; ++ri)
+    if (ri->isCopy())
+      return true;
+
+  return false;
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -644,6 +660,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
   bool HasFP = hasFP(MF);
   bool Is64Bit = STI.is64Bit();
+  bool IsLP64 = STI.isTarget64BitLP64();
   bool IsWin64 = STI.isTargetWin64();
   bool UseLEA = STI.useLeaForSP();
   unsigned StackAlign = getStackAlignment();
@@ -673,12 +690,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
   // function, and use up to 128 bytes of stack space, don't have a frame
   // pointer, calls, or dynamic alloca then we do not need to adjust the
-  // stack pointer (we fit in the Red Zone).
-  if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) &&
+  // stack pointer (we fit in the Red Zone). We also check that we don't
+  // push and pop from the stack.
+  if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                   Attribute::NoRedZone) &&
       !RegInfo->needsStackRealignment(MF) &&
       !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
       !MFI->adjustsStack() &&                           // No calls.
       !IsWin64 &&                                       // Win64 has no Red Zone
+      !usesTheStack(MF) &&                              // Don't push and pop.
       !MF.getTarget().Options.EnableSegmentedStacks) {  // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
@@ -692,7 +712,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   if (TailCallReturnAddrDelta < 0) {
     MachineInstr *MI =
       BuildMI(MBB, MBBI, DL,
-              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+              TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
               StackPtr)
         .addReg(StackPtr)
         .addImm(-TailCallReturnAddrDelta)
@@ -908,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     // MSVC x64's __chkstk needs to adjust %rsp.
     // FIXME: %rax preserves the offset and should be available.
     if (isSPUpdateNeeded)
-      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+      emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
                    UseLEA, TII, *RegInfo);
 
     if (isEAXAlive) {
@@ -920,7 +940,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
         MBB.insert(MBBI, MI);
     }
   } else if (NumBytes)
-    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
                  UseLEA, TII, *RegInfo);
 
   // If we need a base pointer, set it up here. It's whatever the value
@@ -977,6 +997,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc DL = MBBI->getDebugLoc();
   bool Is64Bit = STI.is64Bit();
+  bool IsLP64 = STI.isTarget64BitLP64();
   bool UseLEA = STI.useLeaForSP();
   unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
@@ -1062,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     if (RegInfo->needsStackRealignment(MF))
       MBBI = FirstCSPop;
     if (CSSize != 0) {
-      unsigned Opc = getLEArOpcode(Is64Bit);
+      unsigned Opc = getLEArOpcode(IsLP64);
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
                    FramePtr, false, -CSSize);
     } else {
@@ -1072,7 +1093,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   } else if (NumBytes) {
     // Adjust stack pointer back: ESP += numbytes.
-    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
+                 TII, *RegInfo);
   }
 
   // We're returning from function via eh_return.
@@ -1107,7 +1129,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     if (Offset) {
       // Check for possible merge with preceding ADD instruction.
       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
+      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
+                   UseLEA, TII, *RegInfo);
     }
 
     // Jump to label or value in register.
@@ -1138,7 +1161,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
 
     MachineInstr *NewMI = prior(MBBI);
-    NewMI->copyImplicitOps(MBBI);
+    NewMI->copyImplicitOps(MF, MBBI);
 
     // Delete the pseudo instruction TCRETURN.
     MBB.erase(MBBI);
@@ -1150,7 +1173,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Check for possible merge with preceding ADD instruction.
     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
+                 *RegInfo);
   }
 }
 
@@ -1362,17 +1386,25 @@ HasNestArgument(const MachineFunction *MF) {
   return false;
 }
 
-
-/// GetScratchRegister - Get a register for performing work in the segmented
-/// stack prologue. Depending on platform and the properties of the function
-/// either one or two registers will be needed. Set primary to true for
-/// the first register, false for the second.
+/// GetScratchRegister - Get a temp register for performing work in the
+/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
+/// and the properties of the function either one or two registers will be
+/// needed. Set primary to true for the first register, false for the second.
 static unsigned
 GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+  CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+
+  // Erlang stuff.
+  if (CallingConvention == CallingConv::HiPE) {
+    if (Is64Bit)
+      return Primary ? X86::R14 : X86::R13;
+    else
+      return Primary ? X86::EBX : X86::EDI;
+  }
+
   if (Is64Bit)
     return Primary ? X86::R11 : X86::R12;
 
-  CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
   bool IsNested = HasNestArgument(&MF);
 
   if (CallingConvention == CallingConv::X86_FastCall ||
@@ -1400,7 +1432,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   bool Is64Bit = STI.is64Bit();
   unsigned TlsReg, TlsOffset;
   DebugLoc DL;
-  const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
 
   unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
@@ -1408,8 +1439,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
 
   if (MF.getFunction()->isVarArg())
     report_fatal_error("Segmented stacks do not support vararg functions.");
-  if (!ST->isTargetLinux() && !ST->isTargetDarwin() &&
-      !ST->isTargetWin32() && !ST->isTargetFreeBSD())
+  if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
+      !STI.isTargetWin32() && !STI.isTargetFreeBSD())
     report_fatal_error("Segmented stacks not supported on this platform.");
 
   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
@@ -1447,13 +1478,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
 
   // Read the limit off the current stacklet off the stack_guard location.
   if (Is64Bit) {
-    if (ST->isTargetLinux()) {
+    if (STI.isTargetLinux()) {
       TlsReg = X86::FS;
       TlsOffset = 0x70;
-    } else if (ST->isTargetDarwin()) {
+    } else if (STI.isTargetDarwin()) {
       TlsReg = X86::GS;
       TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
-    } else if (ST->isTargetFreeBSD()) {
+    } else if (STI.isTargetFreeBSD()) {
       TlsReg = X86::FS;
       TlsOffset = 0x18;
     } else {
@@ -1469,16 +1500,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
     BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
       .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   } else {
-    if (ST->isTargetLinux()) {
+    if (STI.isTargetLinux()) {
       TlsReg = X86::GS;
       TlsOffset = 0x30;
-    } else if (ST->isTargetDarwin()) {
+    } else if (STI.isTargetDarwin()) {
       TlsReg = X86::GS;
       TlsOffset = 0x48 + 90*4;
-    } else if (ST->isTargetWin32()) {
+    } else if (STI.isTargetWin32()) {
       TlsReg = X86::FS;
       TlsOffset = 0x14; // pvArbitrary, reserved for application use
-    } else if (ST->isTargetFreeBSD()) {
+    } else if (STI.isTargetFreeBSD()) {
       report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
     } else {
       report_fatal_error("Segmented stacks not supported on this platform.");
@@ -1490,10 +1521,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
       BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
 
-    if (ST->isTargetLinux() || ST->isTargetWin32()) {
+    if (STI.isTargetLinux() || STI.isTargetWin32()) {
       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
         .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
-    } else if (ST->isTargetDarwin()) {
+    } else if (STI.isTargetDarwin()) {
 
       // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
       unsigned ScratchReg2;
@@ -1579,3 +1610,228 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   MF.verify();
 #endif
 }
+
+/// Erlang programs may need a special prologue to handle the stack size they
+/// might need at runtime. That is because Erlang/OTP does not implement a C
+/// stack but uses a custom implementation of hybrid stack/heap architecture.
+/// (for more information see Eric Stenman's Ph.D. thesis:
+/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
+///
+/// CheckStack:
+///	  temp0 = sp - MaxStack
+///	  if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// OldStart:
+///	  ...
+/// IncStack:
+///	  call inc_stack   # doubles the stack space
+///	  temp0 = sp - MaxStack
+///	  if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize();
+  const bool Is64Bit = STI.is64Bit();
+  DebugLoc DL;
+  // HiPE-specific values
+  const unsigned HipeLeafWords = 24;
+  const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
+  const unsigned Guaranteed = HipeLeafWords * SlotSize;
+  unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
+                            MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
+  unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
+
+  assert(STI.isTargetLinux() &&
+         "HiPE prologue is only supported on Linux operating systems.");
+
+  // Compute the largest caller's frame that is needed to fit the callees'
+  // frames. This 'MaxStack' is computed from:
+  //
+  // a) the fixed frame size, which is the space needed for all spilled temps,
+  // b) outgoing on-stack parameter areas, and
+  // c) the minimum stack space this function needs to make available for the
+  //    functions it calls (a tunable ABI property).
+  if (MFI->hasCalls()) {
+    unsigned MoreStackForCalls = 0;
+
+    for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
+         MBBI != MBBE; ++MBBI)
+      for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
+           MI != ME; ++MI) {
+        if (!MI->isCall())
+          continue;
+
+        // Get callee operand.
+        const MachineOperand &MO = MI->getOperand(0);
+
+        // Only take account of global function calls (no closures etc.).
+        if (!MO.isGlobal())
+          continue;
+
+        const Function *F = dyn_cast<Function>(MO.getGlobal());
+        if (!F)
+          continue;
+
+        // Do not update 'MaxStack' for primitive and built-in functions
+        // (encoded with names either starting with "erlang."/"bif_" or not
+        // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
+        // "_", such as the BIF "suspend_0") as they are executed on another
+        // stack.
+        if (F->getName().find("erlang.") != StringRef::npos ||
+            F->getName().find("bif_") != StringRef::npos ||
+            F->getName().find_first_of("._") == StringRef::npos)
+          continue;
+
+        unsigned CalleeStkArity =
+          F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
+        if (HipeLeafWords - 1 > CalleeStkArity)
+          MoreStackForCalls = std::max(MoreStackForCalls,
+                               (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
+      }
+    MaxStack += MoreStackForCalls;
+  }
+
+  // If the stack frame needed is larger than the guaranteed then runtime checks
+  // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
+  if (MaxStack > Guaranteed) {
+    MachineBasicBlock &prologueMBB = MF.front();
+    MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
+    MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
+
+    for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
+           E = prologueMBB.livein_end(); I != E; I++) {
+      stackCheckMBB->addLiveIn(*I);
+      incStackMBB->addLiveIn(*I);
+    }
+
+    MF.push_front(incStackMBB);
+    MF.push_front(stackCheckMBB);
+
+    unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
+    unsigned LEAop, CMPop, CALLop;
+    if (Is64Bit) {
+      SPReg = X86::RSP;
+      PReg  = X86::RBP;
+      LEAop = X86::LEA64r;
+      CMPop = X86::CMP64rm;
+      CALLop = X86::CALL64pcrel32;
+      SPLimitOffset = 0x90;
+    } else {
+      SPReg = X86::ESP;
+      PReg  = X86::EBP;
+      LEAop = X86::LEA32r;
+      CMPop = X86::CMP32rm;
+      CALLop = X86::CALLpcrel32;
+      SPLimitOffset = 0x4c;
+    }
+
+    ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+    assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+           "HiPE prologue scratch register is live-in");
+
+    // Create new MBB for StackCheck:
+    addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
+                 SPReg, false, -MaxStack);
+    // SPLimitOffset is in a fixed heap location (pointed by BP).
+    addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
+                 .addReg(ScratchReg), PReg, false, SPLimitOffset);
+    BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
+
+    // Create new MBB for IncStack:
+    BuildMI(incStackMBB, DL, TII.get(CALLop)).
+      addExternalSymbol("inc_stack_0");
+    addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
+                 SPReg, false, -MaxStack);
+    addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
+                 .addReg(ScratchReg), PReg, false, SPLimitOffset);
+    BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
+
+    stackCheckMBB->addSuccessor(&prologueMBB, 99);
+    stackCheckMBB->addSuccessor(incStackMBB, 1);
+    incStackMBB->addSuccessor(&prologueMBB, 99);
+    incStackMBB->addSuccessor(incStackMBB, 1);
+  }
+#ifdef XDEBUG
+  MF.verify();
+#endif
+}
+
+void X86FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  const X86RegisterInfo &RegInfo = *TM.getRegisterInfo();
+  unsigned StackPtr = RegInfo.getStackRegister();
+  bool reseveCallFrame = hasReservedCallFrame(MF);
+  int Opcode = I->getOpcode();
+  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+  bool IsLP64 = STI.isTarget64BitLP64();
+  DebugLoc DL = I->getDebugLoc();
+  uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+  uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+  I = MBB.erase(I);
+
+  if (!reseveCallFrame) {
+    // If the stack pointer can be changed after prologue, turn the
+    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+    // adjcallstackdown instruction into 'add ESP, <amt>'
+    // TODO: consider using push / pop instead of sub + store / add
+    if (Amount == 0)
+      return;
+
+    // We need to keep the stack aligned properly.  To do this, we round the
+    // amount of space needed for the outgoing arguments up to the next
+    // alignment boundary.
+    unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+    MachineInstr *New = 0;
+    if (Opcode == TII.getCallFrameSetupOpcode()) {
+      New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
+                    StackPtr)
+        .addReg(StackPtr)
+        .addImm(Amount);
+    } else {
+      assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+      // Factor out the amount the callee already popped.
+      Amount -= CalleeAmt;
+
+      if (Amount) {
+        unsigned Opc = getADDriOpcode(IsLP64, Amount);
+        New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+          .addReg(StackPtr).addImm(Amount);
+      }
+    }
+
+    if (New) {
+      // The EFLAGS implicit def is dead.
+      New->getOperand(3).setIsDead();
+
+      // Replace the pseudo instruction with a new instruction.
+      MBB.insert(I, New);
+    }
+
+    return;
+  }
+
+  if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.  We do this until we have
+    // more advanced stack pointer tracking ability.
+    unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
+    MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr).addImm(CalleeAmt);
+
+    // The EFLAGS implicit def is dead.
+    New->getOperand(3).setIsDead();
+
+    // We are not tracking the stack pointer adjustment by the callee, so make
+    // sure we restore the stack pointer immediately after the call, there may
+    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+    MachineBasicBlock::iterator B = MBB.begin();
+    while (I != B && !llvm::prior(I)->isCall())
+      --I;
+    MBB.insert(I, New);
+  }
+}
+
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index dc515dc39c79..3f08b9a2e8d2 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -43,6 +43,8 @@ public:
 
   void adjustForSegmentedStacks(MachineFunction &MF) const;
 
+  void adjustForHiPEPrologue(MachineFunction &MF) const;
+
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
 
@@ -63,6 +65,10 @@ public:
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const;
   uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 99f557417b7c..6041669f8182 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -19,24 +19,21 @@
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
@@ -283,13 +280,13 @@ namespace {
 
     /// getTargetMachine - Return a reference to the TargetMachine, casted
     /// to the target-specific type.
-    const X86TargetMachine &getTargetMachine() {
+    const X86TargetMachine &getTargetMachine() const {
       return static_cast<const X86TargetMachine &>(TM);
     }
 
     /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
     /// to the target-specific type.
-    const X86InstrInfo *getInstrInfo() {
+    const X86InstrInfo *getInstrInfo() const {
       return getTargetMachine().getInstrInfo();
     }
   };
@@ -423,6 +420,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 
   if (!Chain.getNumOperands())
     return false;
+  // Since we are not checking for AA here, conservatively abort if the chain
+  // writes to memory. It's not safe to move the callee (a load) across a store.
+  if (isa<MemSDNode>(Chain.getNode()) &&
+      cast<MemSDNode>(Chain.getNode())->writeMem())
+    return false;
   if (Chain.getOperand(0).getNode() == Callee.getNode())
     return true;
   if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
@@ -434,17 +436,19 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 
 void X86DAGToDAGISel::PreprocessISelDAG() {
   // OptForSize is used in pattern predicates that isel is matching.
-  OptForSize = MF->getFunction()->getFnAttributes().
-    hasAttribute(Attributes::OptimizeForSize);
+  OptForSize = MF->getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
 
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ) {
     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 
     if (OptLevel != CodeGenOpt::None &&
-        (N->getOpcode() == X86ISD::CALL ||
+        // Only does this when target favors doesn't favor register indirect
+        // call.
+        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
          (N->getOpcode() == X86ISD::TC_RETURN &&
-          // Only does this if load can be foled into TC_RETURN.
+          // Only does this if load can be folded into TC_RETURN.
           (Subtarget->is64Bit() ||
            getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
       /// Also try moving call address load from outside callseq_start to just
@@ -1040,8 +1044,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
         AM.IndexReg = ShVal;
         return false;
       }
-    break;
     }
+    break;
 
   case ISD::SRL: {
     // Scale must not be used already.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b35fb514bf94..69341869aa3e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -14,20 +14,15 @@
 
 #define DEBUG_TYPE "x86-isel"
 #include "X86ISelLowering.h"
+#include "Utils/X86ShuffleDecode.h"
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
-#include "Utils/X86ShuffleDecode.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -35,14 +30,19 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VariadicFunction.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
   unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
                                * ElemsPerChunk);
 
+  // If the input is a buildvector just emit a smaller one.
+  if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+                       Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
+
   SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
   SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
                                VecIdx);
@@ -181,9 +186,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
 
-  // Bypass i32 with i8 on Atom when compiling with O2
-  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+  // Bypass expensive divides on Atom when compiling with O2
+  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
     addBypassSlowDiv(32, 8);
+    if (Subtarget->is64Bit())
+      addBypassSlowDiv(64, 16);
+  }
 
   if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
     // Setup Windows compiler runtime calls.
@@ -368,7 +376,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
-  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
+  setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
+  setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
+  setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
+  setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
+  setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
+  setOperationAction(ISD::BR_CC            , MVT::i64,   Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
   if (Subtarget->is64Bit())
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
@@ -456,7 +470,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
   }
   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
-  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
   // support continuation, user-level threading, and etc.. As a result, no
   // other SjLj exception interfaces are implemented and please don't build
@@ -605,10 +619,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
 
     // We don't support sin/cos/fmod
-    setOperationAction(ISD::FSIN , MVT::f64, Expand);
-    setOperationAction(ISD::FCOS , MVT::f64, Expand);
-    setOperationAction(ISD::FSIN , MVT::f32, Expand);
-    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+    setOperationAction(ISD::FSIN   , MVT::f64, Expand);
+    setOperationAction(ISD::FCOS   , MVT::f64, Expand);
+    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+    setOperationAction(ISD::FSIN   , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS   , MVT::f32, Expand);
+    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 
     // Expand FP immediates into loads from the stack, except for the special
     // cases we handle.
@@ -633,8 +649,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
     // We don't support sin/cos/fmod
-    setOperationAction(ISD::FSIN , MVT::f32, Expand);
-    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+    setOperationAction(ISD::FSIN   , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS   , MVT::f32, Expand);
+    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 
     // Special cases we handle for FP constants.
     addLegalFPImmediate(APFloat(+0.0f)); // xorps
@@ -644,8 +661,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
 
     if (!TM.Options.UnsafeFPMath) {
-      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
-      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
+      setOperationAction(ISD::FSIN   , MVT::f64, Expand);
+      setOperationAction(ISD::FCOS   , MVT::f64, Expand);
+      setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
     }
   } else if (!TM.Options.UseSoftFloat) {
     // f32 and f64 in x87.
@@ -659,10 +677,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
     if (!TM.Options.UnsafeFPMath) {
-      setOperationAction(ISD::FSIN           , MVT::f32  , Expand);
-      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
-      setOperationAction(ISD::FCOS           , MVT::f32  , Expand);
-      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
+      setOperationAction(ISD::FSIN   , MVT::f64, Expand);
+      setOperationAction(ISD::FSIN   , MVT::f32, Expand);
+      setOperationAction(ISD::FCOS   , MVT::f64, Expand);
+      setOperationAction(ISD::FCOS   , MVT::f32, Expand);
+      setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+      setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
     }
     addLegalFPImmediate(APFloat(+0.0)); // FLD0
     addLegalFPImmediate(APFloat(+1.0)); // FLD1
@@ -699,8 +719,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     }
 
     if (!TM.Options.UnsafeFPMath) {
-      setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
-      setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
+      setOperationAction(ISD::FSIN   , MVT::f80, Expand);
+      setOperationAction(ISD::FCOS   , MVT::f80, Expand);
+      setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
     }
 
     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
@@ -725,74 +746,81 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // First set operation action for all vector types to either promote
   // (for widening) or expand (for scalarization). Then we will selectively
   // turn on ones that can be effectively codegen'd.
-  for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
-           VT <= MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-    setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
-    setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
-    setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
-    setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FMA,  (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
-    setOperationAction(ISD::TRUNCATE,  (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::SIGN_EXTEND,  (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::ZERO_EXTEND,  (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::ANY_EXTEND,  (MVT::SimpleValueType)VT, Expand);
-    setOperationAction(ISD::VSELECT,  (MVT::SimpleValueType)VT, Expand);
+  for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+           i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT VT = (MVT::SimpleValueType)i;
+    setOperationAction(ISD::ADD , VT, Expand);
+    setOperationAction(ISD::SUB , VT, Expand);
+    setOperationAction(ISD::FADD, VT, Expand);
+    setOperationAction(ISD::FNEG, VT, Expand);
+    setOperationAction(ISD::FSUB, VT, Expand);
+    setOperationAction(ISD::MUL , VT, Expand);
+    setOperationAction(ISD::FMUL, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::FDIV, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::LOAD, VT, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
+    setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
+    setOperationAction(ISD::FABS, VT, Expand);
+    setOperationAction(ISD::FSIN, VT, Expand);
+    setOperationAction(ISD::FSINCOS, VT, Expand);
+    setOperationAction(ISD::FCOS, VT, Expand);
+    setOperationAction(ISD::FSINCOS, VT, Expand);
+    setOperationAction(ISD::FREM, VT, Expand);
+    setOperationAction(ISD::FMA,  VT, Expand);
+    setOperationAction(ISD::FPOWI, VT, Expand);
+    setOperationAction(ISD::FSQRT, VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+    setOperationAction(ISD::FFLOOR, VT, Expand);
+    setOperationAction(ISD::FCEIL, VT, Expand);
+    setOperationAction(ISD::FTRUNC, VT, Expand);
+    setOperationAction(ISD::FRINT, VT, Expand);
+    setOperationAction(ISD::FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::UDIVREM, VT, Expand);
+    setOperationAction(ISD::FPOW, VT, Expand);
+    setOperationAction(ISD::CTPOP, VT, Expand);
+    setOperationAction(ISD::CTTZ, VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+    setOperationAction(ISD::CTLZ, VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+    setOperationAction(ISD::SHL, VT, Expand);
+    setOperationAction(ISD::SRA, VT, Expand);
+    setOperationAction(ISD::SRL, VT, Expand);
+    setOperationAction(ISD::ROTL, VT, Expand);
+    setOperationAction(ISD::ROTR, VT, Expand);
+    setOperationAction(ISD::BSWAP, VT, Expand);
+    setOperationAction(ISD::SETCC, VT, Expand);
+    setOperationAction(ISD::FLOG, VT, Expand);
+    setOperationAction(ISD::FLOG2, VT, Expand);
+    setOperationAction(ISD::FLOG10, VT, Expand);
+    setOperationAction(ISD::FEXP, VT, Expand);
+    setOperationAction(ISD::FEXP2, VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+    setOperationAction(ISD::TRUNCATE, VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
+    setOperationAction(ISD::ANY_EXTEND, VT, Expand);
+    setOperationAction(ISD::VSELECT, VT, Expand);
     for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
              InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
-      setTruncStoreAction((MVT::SimpleValueType)VT,
+      setTruncStoreAction(VT,
                           (MVT::SimpleValueType)InnerVT, Expand);
-    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, Expand);
   }
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -865,6 +893,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
     setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
     setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
+    setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
     setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
@@ -973,7 +1002,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
 
     setOperationAction(ISD::FFLOOR,             MVT::v4f32, Legal);
+    setOperationAction(ISD::FCEIL,              MVT::v4f32, Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::v4f32, Legal);
+    setOperationAction(ISD::FRINT,              MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::v4f32, Legal);
     setOperationAction(ISD::FFLOOR,             MVT::v2f64, Legal);
+    setOperationAction(ISD::FCEIL,              MVT::v2f64, Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::v2f64, Legal);
+    setOperationAction(ISD::FRINT,              MVT::v2f64, Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::v2f64, Legal);
 
     // FIXME: Do we need to handle scalar-to-vector here?
     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
@@ -1016,26 +1053,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
 
-    if (Subtarget->hasAVX2()) {
-      setOperationAction(ISD::SRL,             MVT::v2i64, Legal);
-      setOperationAction(ISD::SRL,             MVT::v4i32, Legal);
+    // In the customized shift lowering, the legal cases in AVX2 will be
+    // recognized.
+    setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
+    setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
 
-      setOperationAction(ISD::SHL,             MVT::v2i64, Legal);
-      setOperationAction(ISD::SHL,             MVT::v4i32, Legal);
-
-      setOperationAction(ISD::SRA,             MVT::v4i32, Legal);
-    } else {
-      setOperationAction(ISD::SRL,             MVT::v2i64, Custom);
-      setOperationAction(ISD::SRL,             MVT::v4i32, Custom);
+    setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
+    setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
 
-      setOperationAction(ISD::SHL,             MVT::v2i64, Custom);
-      setOperationAction(ISD::SHL,             MVT::v4i32, Custom);
+    setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
 
-      setOperationAction(ISD::SRA,             MVT::v4i32, Custom);
-    }
+    setOperationAction(ISD::SDIV,              MVT::v8i16, Custom);
+    setOperationAction(ISD::SDIV,              MVT::v4i32, Custom);
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
@@ -1053,6 +1085,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
     setOperationAction(ISD::FFLOOR,             MVT::v8f32, Legal);
+    setOperationAction(ISD::FCEIL,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::v8f32, Legal);
+    setOperationAction(ISD::FRINT,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::v8f32, Legal);
     setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
     setOperationAction(ISD::FABS,               MVT::v8f32, Custom);
 
@@ -1062,14 +1098,20 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
     setOperationAction(ISD::FFLOOR,             MVT::v4f64, Legal);
+    setOperationAction(ISD::FCEIL,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::v4f64, Legal);
+    setOperationAction(ISD::FRINT,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::v4f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
     setOperationAction(ISD::FABS,               MVT::v4f64, Custom);
 
     setOperationAction(ISD::TRUNCATE,           MVT::v8i16, Custom);
+    setOperationAction(ISD::TRUNCATE,           MVT::v4i32, Custom);
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Custom);
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
 
@@ -1088,6 +1130,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
     setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
 
+    setOperationAction(ISD::SDIV,              MVT::v16i16, Custom);
+
     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
@@ -1102,16 +1146,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VSELECT,           MVT::v8i32, Legal);
     setOperationAction(ISD::VSELECT,           MVT::v8f32, Legal);
 
+    setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
+    setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
+    setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
+    setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
+
     if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
-      setOperationAction(ISD::FMA,             MVT::v8f32, Custom);
-      setOperationAction(ISD::FMA,             MVT::v4f64, Custom);
-      setOperationAction(ISD::FMA,             MVT::v4f32, Custom);
-      setOperationAction(ISD::FMA,             MVT::v2f64, Custom);
-      setOperationAction(ISD::FMA,             MVT::f32, Custom);
-      setOperationAction(ISD::FMA,             MVT::f64, Custom);
+      setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
+      setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
+      setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
+      setOperationAction(ISD::FMA,             MVT::v2f64, Legal);
+      setOperationAction(ISD::FMA,             MVT::f32, Legal);
+      setOperationAction(ISD::FMA,             MVT::f64, Legal);
     }
 
-    if (Subtarget->hasAVX2()) {
+    if (Subtarget->hasInt256()) {
       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
@@ -1129,13 +1180,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
 
-      setOperationAction(ISD::SRL,             MVT::v4i64, Legal);
-      setOperationAction(ISD::SRL,             MVT::v8i32, Legal);
-
-      setOperationAction(ISD::SHL,             MVT::v4i64, Legal);
-      setOperationAction(ISD::SHL,             MVT::v8i32, Legal);
-
-      setOperationAction(ISD::SRA,             MVT::v8i32, Legal);
+      setOperationAction(ISD::SDIV,            MVT::v8i32, Custom);
     } else {
       setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
       setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
@@ -1151,15 +1196,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
       // Don't lower v32i8 because there is no 128-bit byte mul
+    }
 
-      setOperationAction(ISD::SRL,             MVT::v4i64, Custom);
-      setOperationAction(ISD::SRL,             MVT::v8i32, Custom);
+    // In the customized shift lowering, the legal cases in AVX2 will be
+    // recognized.
+    setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
+    setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
 
-      setOperationAction(ISD::SHL,             MVT::v4i64, Custom);
-      setOperationAction(ISD::SHL,             MVT::v8i32, Custom);
+    setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
+    setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
 
-      setOperationAction(ISD::SRA,             MVT::v8i32, Custom);
-    }
+    setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
 
     // Custom lower several nodes for 256-bit types.
     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1217,7 +1264,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 
-
   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
   // handle type legalization for these operations here.
   //
@@ -1246,6 +1292,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setLibcallName(RTLIB::SRA_I128, 0);
   }
 
+  // Combine sin / cos into one node or libcall if possible.
+  if (Subtarget->hasSinCos()) {
+    setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+    setLibcallName(RTLIB::SINCOS_F64, "sincos");
+    if (Subtarget->isTargetDarwin()) {
+      // For MacOSX, we don't want to the normal expansion of a libcall to
+      // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+      // traffic.
+      setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+      setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+    }
+  }
+
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -1266,6 +1325,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   setTargetDAGCombine(ISD::TRUNCATE);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
@@ -1277,28 +1337,25 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // On Darwin, -Os means optimize for size without hurting performance,
   // do not reduce the limit.
-  maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
-  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
-  maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
-  maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
-  maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
-  maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+  MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
   setPrefLoopAlignment(4); // 2^4 bytes.
-  benefitFromCodePlacementOpt = true;
 
   // Predictable cmov don't hurt on atom because it's in-order.
-  predictableSelectIsExpensive = !Subtarget->isAtom();
+  PredictableSelectIsExpensive = !Subtarget->isAtom();
 
   setPrefFunctionAlignment(4); // 2^4 bytes.
 }
 
-
 EVT X86TargetLowering::getSetCCResultType(EVT VT) const {
   if (!VT.isVector()) return MVT::i8;
   return VT.changeVectorElementTypeToInteger();
 }
 
-
 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
 /// the desired ByVal argument alignment.
 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
@@ -1348,34 +1405,30 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
 /// lowering. If DstAlign is zero that means it's safe to destination
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
-/// probably because the source does not need to be loaded. If
-/// 'IsZeroVal' is true, that means it's safe to return a
-/// non-scalar-integer type, e.g. empty string source, constant, or loaded
-/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-/// constant so it does not need to be loaded.
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
 /// It returns EVT::Other if the type should be determined using generic
 /// target-independent logic.
 EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        unsigned DstAlign, unsigned SrcAlign,
-                                       bool IsZeroVal,
+                                       bool IsMemset, bool ZeroMemset,
                                        bool MemcpyStrSrc,
                                        MachineFunction &MF) const {
-  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
-  // linux.  This is because the stack realignment code can't handle certain
-  // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = MF.getFunction();
-  if (IsZeroVal &&
-      !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
+  if ((!IsMemset || ZeroMemset) &&
+      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
         (Subtarget->isUnalignedMemAccessFast() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
-          (SrcAlign == 0 || SrcAlign >= 16))) &&
-        Subtarget->getStackAlignment() >= 16) {
-      if (Subtarget->getStackAlignment() >= 32) {
-        if (Subtarget->hasAVX2())
+          (SrcAlign == 0 || SrcAlign >= 16)))) {
+      if (Size >= 32) {
+        if (Subtarget->hasInt256())
           return MVT::v8i32;
-        if (Subtarget->hasAVX())
+        if (Subtarget->hasFp256())
           return MVT::v8f32;
       }
       if (Subtarget->hasSSE2())
@@ -1384,7 +1437,6 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         return MVT::v4f32;
     } else if (!MemcpyStrSrc && Size >= 8 &&
                !Subtarget->is64Bit() &&
-               Subtarget->getStackAlignment() >= 8 &&
                Subtarget->hasSSE2()) {
       // Do not use f64 to lower memcpy if source is string constant. It's
       // better to use i32 to avoid the loads.
@@ -1396,6 +1448,21 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
   return MVT::i32;
 }
 
+bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
+  if (VT == MVT::f32)
+    return X86ScalarSSEf32;
+  else if (VT == MVT::f64)
+    return X86ScalarSSEf64;
+  return true;
+}
+
+bool
+X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+  if (Fast)
+    *Fast = Subtarget->isUnalignedMemAccessFast();
+  return true;
+}
+
 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
 /// current function.  The returned value is a member of the
 /// MachineJumpTableInfo::JTEntryKind enum.
@@ -1449,10 +1516,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
 
 // FIXME: Why this routine is here? Move to RegInfo!
 std::pair<const TargetRegisterClass*, uint8_t>
-X86TargetLowering::findRepresentativeClass(EVT VT) const{
+X86TargetLowering::findRepresentativeClass(MVT VT) const{
   const TargetRegisterClass *RRC = 0;
   uint8_t Cost = 1;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.SimpleTy) {
   default:
     return TargetLowering::findRepresentativeClass(VT);
   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
@@ -1494,7 +1561,6 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
   return true;
 }
 
-
 //===----------------------------------------------------------------------===//
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -1526,14 +1592,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
                  RVLocs, *DAG.getContext());
   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
 
-  // Add the regs to the liveout set for the function.
-  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-  for (unsigned i = 0; i != RVLocs.size(); ++i)
-    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
-      MRI.addLiveOut(RVLocs[i].getLocReg());
-
   SDValue Flag;
-
   SmallVector<SDValue, 6> RetOps;
   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   // Operand #1 = Bytes To Pop
@@ -1602,14 +1661,16 @@ X86TargetLowering::LowerReturn(SDValue Chain,
 
     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
-  // The x86-64 ABI for returning structs by value requires that we copy
-  // the sret argument into %rax for the return. We saved the argument into
-  // a virtual register in the entry block, so now we copy the value out
-  // and into %rax.
-  if (Subtarget->is64Bit() &&
-      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+  // The x86-64 ABIs require that for returning structs by value we copy
+  // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // Win32 requires us to put the sret argument to %eax as well.
+  // We saved the argument into a virtual register in the entry block,
+  // so now we copy the value out and into %rax/%eax.
+  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
+      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
     MachineFunction &MF = DAG.getMachineFunction();
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
@@ -1617,11 +1678,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
-    Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+    unsigned RetValReg
+        = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+          X86::RAX : X86::EAX;
+    Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
     Flag = Chain.getValue(1);
 
-    // RAX now acts like a return value.
-    MRI.addLiveOut(X86::RAX);
+    // RAX/EAX now acts like a return value.
+    RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -1666,8 +1730,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   return true;
 }
 
-EVT
-X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+MVT
+X86TargetLowering::getTypeForExtArgOrReturn(MVT VT,
                                             ISD::NodeType ExtendKind) const {
   MVT ReturnMVT;
   // TODO: Is this also valid on 32-bit?
@@ -1676,7 +1740,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
   else
     ReturnMVT = MVT::i32;
 
-  EVT MinVT = getRegisterType(Context, ReturnMVT);
+  MVT MinVT = getRegisterType(ReturnMVT);
   return VT.bitsLT(MinVT) ? MinVT : VT;
 }
 
@@ -1698,7 +1762,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
 
   // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
     CCValAssign &VA = RVLocs[i];
     EVT CopyVT = VA.getValVT();
 
@@ -1742,7 +1806,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   return Chain;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                C & StdCall & Fast Calling Convention implementation
 //===----------------------------------------------------------------------===//
@@ -1806,7 +1869,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
 /// IsTailCallConvention - Return true if the calling convention is one that
 /// supports tail call optimization.
 static bool IsTailCallConvention(CallingConv::ID CC) {
-  return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+  return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+          CC == CallingConv::HiPE);
 }
 
 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -1893,7 +1957,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   bool IsWin64 = Subtarget->isTargetWin64();
 
   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
-         "Var args not supported with calling convention fastcc or ghc");
+         "Var args not supported with calling convention fastcc, ghc or hipe");
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -1955,10 +2019,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
       if (VA.isExtInLoc()) {
         // Handle MMX values passed in XMM regs.
-        if (RegVT.isVector()) {
-          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
-                                 ArgValue);
-        } else
+        if (RegVT.isVector())
+          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
+        else
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
     } else {
@@ -1974,14 +2037,18 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     InVals.push_back(ArgValue);
   }
 
-  // The x86-64 ABI for returning structs by value requires that we copy
-  // the sret argument into %rax for the return. Save the argument into
-  // a virtual register so that we can access it from the return points.
-  if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+  // The x86-64 ABIs require that for returning structs by value we copy
+  // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // Win32 requires us to put the sret argument to %eax as well.
+  // Save the argument into a virtual register so that we can access it
+  // from the return points.
+  if (MF.getFunction()->hasStructRetAttr() &&
+      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
     if (!Reg) {
-      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      MVT PtrTy = getPointerTy();
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
       FuncInfo->setSRetReturnReg(Reg);
     }
     SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -2034,8 +2101,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                        TotalNumIntRegs);
 
-      bool NoImplicitFloatOps = Fn->getFnAttributes().
-        hasAttribute(Attributes::NoImplicitFloat);
+      bool NoImplicitFloatOps = Fn->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
       assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
              "SSE register cannot be used when SSE is disabled!");
       assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2238,7 +2305,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   }
 
   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
-         "Var args not supported with calling convention fastcc or ghc");
+         "Var args not supported with calling convention fastcc, ghc or hipe");
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -2513,8 +2580,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         OpFlags = X86II::MO_DARWIN_STUB;
       } else if (Subtarget->isPICStyleRIPRel() &&
                  isa<Function>(GV) &&
-                 cast<Function>(GV)->getFnAttributes().
-                   hasAttribute(Attributes::NonLazyBind)) {
+                 cast<Function>(GV)->getAttributes().
+                   hasAttribute(AttributeSet::FunctionIndex,
+                                Attribute::NonLazyBind)) {
         // If the function is marked as non-lazy, generate an indirect call
         // which loads from the GOT directly. This avoids runtime overhead
         // at the cost of eager binding (and one extra byte of encoding).
@@ -2594,8 +2662,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // This isn't right, although it's probably harmless on x86; liveouts
     // should be computed from returns not tail calls.  Consider a void
     // function making a tail call to a function returning int.
-    return DAG.getNode(X86ISD::TC_RETURN, dl,
-                       NodeTys, &Ops[0], Ops.size());
+    return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   }
 
   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -2632,7 +2699,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                          Ins, dl, DAG, InVals);
 }
 
-
 //===----------------------------------------------------------------------===//
 //                Fast Calling Convention (tail call) implementation
 //===----------------------------------------------------------------------===//
@@ -2754,7 +2820,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                                     SelectionDAG& DAG) const {
+                                                     SelectionDAG &DAG) const {
   if (!IsTailCallConvention(CalleeCC) &&
       CalleeCC != CallingConv::C)
     return false;
@@ -2793,7 +2859,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
 
   // An stdcall caller is expected to clean up its arguments; the callee
   // isn't going to do that.
-  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+  if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
     return false;
 
   // Do not sibcall optimize vararg calls unless all arguments are passed via
@@ -2913,9 +2979,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     // callee-saved registers are restored. These happen to be the same
     // registers used to pass 'inreg' arguments so watch out for those.
     if (!Subtarget->is64Bit() &&
-        !isa<GlobalAddressSDNode>(Callee) &&
-        !isa<ExternalSymbolSDNode>(Callee)) {
+        ((!isa<GlobalAddressSDNode>(Callee) &&
+          !isa<ExternalSymbolSDNode>(Callee)) ||
+         getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
       unsigned NumInRegs = 0;
+      // In PIC we need an extra register to formulate the address computation
+      // for the callee.
+      unsigned MaxInRegs =
+          (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
         if (!VA.isRegLoc())
@@ -2924,7 +2996,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
         switch (Reg) {
         default: break;
         case X86::EAX: case X86::EDX: case X86::ECX:
-          if (++NumInRegs == 3)
+          if (++NumInRegs == MaxInRegs)
             return false;
           break;
         }
@@ -2941,7 +3013,6 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
   return X86::createFastISel(funcInfo, libInfo);
 }
 
-
 //===----------------------------------------------------------------------===//
 //                           Other Lowering Hooks
 //===----------------------------------------------------------------------===//
@@ -2961,7 +3032,7 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
   case X86ISD::SHUFP:
-  case X86ISD::PALIGN:
+  case X86ISD::PALIGNR:
   case X86ISD::MOVLHPS:
   case X86ISD::MOVLHPD:
   case X86ISD::MOVHLPS:
@@ -3011,7 +3082,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
                                     SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
-  case X86ISD::PALIGN:
+  case X86ISD::PALIGNR:
   case X86ISD::SHUFP:
   case X86ISD::VPERM2X128:
     return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -3052,7 +3123,6 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
 }
 
-
 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
                                        bool hasSymbolicDisplacement) {
   // Offset should fit into 32 bit immediate field.
@@ -3103,6 +3173,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
     return TailCallOpt;
   case CallingConv::GHC:
     return TailCallOpt;
+  case CallingConv::HiPE:
+    return TailCallOpt;
   }
 }
 
@@ -3233,9 +3305,7 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
 /// specified value.
 static bool isUndefOrEqual(int Val, int CmpVal) {
-  if (Val < 0 || Val == CmpVal)
-    return true;
-  return false;
+  return (Val < 0 || Val == CmpVal);
 }
 
 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
@@ -3262,8 +3332,8 @@ static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
 
 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
-  if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
     return false;
 
   // Lower quadword copied in order or undef.
@@ -3291,8 +3361,8 @@ static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
 
 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
-  if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
     return false;
 
   // Upper quadword copied in order.
@@ -3322,8 +3392,8 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
 /// is suitable for input to PALIGNR.
 static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
                           const X86Subtarget *Subtarget) {
-  if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
-      (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
+  if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+      (VT.is256BitVector() && !Subtarget->hasInt256()))
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -3410,9 +3480,9 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
 /// specifies a shuffle of elements that is suitable for input to 128/256-bit
 /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
 /// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
                         bool Commuted = false) {
-  if (!HasAVX && VT.getSizeInBits() == 256)
+  if (!HasFp256 && VT.is256BitVector())
     return false;
 
   unsigned NumElems = VT.getVectorNumElements();
@@ -3547,7 +3617,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
 static
 SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
                                SelectionDAG &DAG) {
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
   DebugLoc dl = SVOp->getDebugLoc();
 
   if (VT != MVT::v8i32 && VT != MVT::v8f32)
@@ -3591,14 +3661,14 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
 static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
-                         bool HasAVX2, bool V2IsSplat = false) {
+                         bool HasInt256, bool V2IsSplat = false) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
-  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3630,14 +3700,14 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
 static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
-                         bool HasAVX2, bool V2IsSplat = false) {
+                         bool HasInt256, bool V2IsSplat = false) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
-  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3667,22 +3737,22 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
 /// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
-                                  bool HasAVX2) {
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
   unsigned NumElts = VT.getVectorNumElements();
+  bool Is256BitVec = VT.is256BitVector();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
-  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+  if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
   // FIXME: Need a better way to get rid of this, there's no latency difference
   // between UNPCKLPD and MOVDDUP, the later should always be checked first and
   // the former later. We should also remove the "_undef" special mask.
-  if (NumElts == 4 && VT.getSizeInBits() == 256)
+  if (NumElts == 4 && Is256BitVec)
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3710,14 +3780,14 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
 /// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
-  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+  if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3766,8 +3836,8 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
 /// The first half comes from the second half of V1 and the second half from the
 /// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX || !VT.is256BitVector())
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+  if (!HasFp256 || !VT.is256BitVector())
     return false;
 
   // The shuffle result is divided into half A and half B. In total the two
@@ -3798,7 +3868,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
 /// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
 static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
 
   unsigned HalfSize = VT.getVectorNumElements()/2;
 
@@ -3826,13 +3896,13 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
 /// to the same elements of the low, but to the higher half of the source.
 /// In VPERMILPD the two lanes could be shuffled independently of each other
 /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX)
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+  if (!HasFp256)
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
   // Only match 256-bit with 32/64-bit types
-  if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
+  if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
     return false;
 
   unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3888,8 +3958,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
 
   unsigned NumElems = VT.getVectorNumElements();
 
-  if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
-      (VT.getSizeInBits() == 256 && NumElems != 8))
+  if ((VT.is128BitVector() && NumElems != 4) ||
+      (VT.is256BitVector() && NumElems != 8))
     return false;
 
   // "i+1" is the value the indexed mask element must have
@@ -3911,8 +3981,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
 
   unsigned NumElems = VT.getVectorNumElements();
 
-  if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
-      (VT.getSizeInBits() == 256 && NumElems != 8))
+  if ((VT.is128BitVector() && NumElems != 4) ||
+      (VT.is256BitVector() && NumElems != 8))
     return false;
 
   // "i" is the value the indexed mask element must have
@@ -3927,8 +3997,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX || !VT.is256BitVector())
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+  if (!HasFp256 || !VT.is256BitVector())
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -3972,9 +4042,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) {
   uint64_t Index =
     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
 
-  unsigned VL = N->getValueType(0).getVectorNumElements();
-  unsigned VBits = N->getValueType(0).getSizeInBits();
-  unsigned ElSize = VBits / VL;
+  MVT VT = N->getValueType(0).getSimpleVT();
+  unsigned ElSize = VT.getVectorElementType().getSizeInBits();
   bool Result = (Index * ElSize) % 128 == 0;
 
   return Result;
@@ -3991,9 +4060,8 @@ bool X86::isVINSERTF128Index(SDNode *N) {
   uint64_t Index =
     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
 
-  unsigned VL = N->getValueType(0).getVectorNumElements();
-  unsigned VBits = N->getValueType(0).getSizeInBits();
-  unsigned ElSize = VBits / VL;
+  MVT VT = N->getValueType(0).getSimpleVT();
+  unsigned ElSize = VT.getVectorElementType().getSizeInBits();
   bool Result = (Index * ElSize) % 128 == 0;
 
   return Result;
@@ -4003,7 +4071,7 @@ bool X86::isVINSERTF128Index(SDNode *N) {
 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
 /// Handles 128-bit and 256-bit.
 static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
+  MVT VT = N->getValueType(0).getSimpleVT();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for PSHUF/SHUFP");
@@ -4033,7 +4101,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
 static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
+  MVT VT = N->getValueType(0).getSimpleVT();
 
   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
          "Unsupported vector type for PSHUFHW");
@@ -4057,7 +4125,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
 static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
+  MVT VT = N->getValueType(0).getSimpleVT();
 
   assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
          "Unsupported vector type for PSHUFHW");
@@ -4081,7 +4149,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
 /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
 static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
   unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -4112,8 +4180,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
   uint64_t Index =
     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
 
-  EVT VecVT = N->getOperand(0).getValueType();
-  EVT ElVT = VecVT.getVectorElementType();
+  MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+  MVT ElVT = VecVT.getVectorElementType();
 
   unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
   return Index / NumElemsPerChunk;
@@ -4129,8 +4197,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
   uint64_t Index =
     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
 
-  EVT VecVT = N->getValueType(0);
-  EVT ElVT = VecVT.getVectorElementType();
+  MVT VecVT = N->getValueType(0).getSimpleVT();
+  MVT ElVT = VecVT.getVectorElementType();
 
   unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
   return Index / NumElemsPerChunk;
@@ -4140,7 +4208,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
 /// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
 /// Handles 256-bit.
 static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
+  MVT VT = N->getValueType(0).getSimpleVT();
 
   unsigned NumElts = VT.getVectorNumElements();
 
@@ -4160,17 +4228,18 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
 /// constant +0.0.
 bool X86::isZeroNode(SDValue Elt) {
-  return ((isa<ConstantSDNode>(Elt) &&
-           cast<ConstantSDNode>(Elt)->isNullValue()) ||
-          (isa<ConstantFPSDNode>(Elt) &&
-           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
+    return CN->isNullValue();
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
+    return CFP->getValueAPF().isPosZero();
+  return false;
 }
 
 /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
 /// their permute mask.
 static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
                                     SelectionDAG &DAG) {
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
   unsigned NumElems = VT.getVectorNumElements();
   SmallVector<int, 8> MaskVec;
 
@@ -4319,12 +4388,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
 static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
                              SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
-  unsigned Size = VT.getSizeInBits();
 
   // Always build SSE zero vectors as <4 x i32> bitcasted
   // to their dest type. This ensures they get CSE'd.
   SDValue Vec;
-  if (Size == 128) {  // SSE
+  if (VT.is128BitVector()) {  // SSE
     if (Subtarget->hasSSE2()) {  // SSE2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -4332,8 +4400,8 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
     }
-  } else if (Size == 256) { // AVX
-    if (Subtarget->hasAVX2()) { // AVX2
+  } else if (VT.is256BitVector()) { // AVX
+    if (Subtarget->hasInt256()) { // AVX2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
@@ -4354,22 +4422,21 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
 /// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
                              DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
-  unsigned Size = VT.getSizeInBits();
 
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
   SDValue Vec;
-  if (Size == 256) {
-    if (HasAVX2) { // AVX2
+  if (VT.is256BitVector()) {
+    if (HasInt256) { // AVX2
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
     } else { // AVX
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
     }
-  } else if (Size == 128) {
+  } else if (VT.is128BitVector()) {
     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
   } else
     llvm_unreachable("Unexpected vector type");
@@ -4448,14 +4515,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
 static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
   EVT VT = V.getValueType();
   DebugLoc dl = V.getDebugLoc();
-  unsigned Size = VT.getSizeInBits();
 
-  if (Size == 128) {
+  if (VT.is128BitVector()) {
     V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
     int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
     V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
                              &SplatMask[0]);
-  } else if (Size == 256) {
+  } else if (VT.is256BitVector()) {
     // To use VPERMILPS to splat scalars, the second half of indicies must
     // refer to the higher part, which is a duplication of the lower one,
     // because VPERMILPS can only handle in-lane permutations.
@@ -4479,14 +4545,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
 
   int EltNo = SV->getSplatIndex();
   int NumElems = SrcVT.getVectorNumElements();
-  unsigned Size = SrcVT.getSizeInBits();
+  bool Is256BitVec = SrcVT.is256BitVector();
 
-  assert(((Size == 128 && NumElems > 4) || Size == 256) &&
-          "Unknown how to promote splat for type");
+  assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
+         "Unknown how to promote splat for type");
 
   // Extract the 128-bit part containing the splat element and update
   // the splat element index when it refers to the higher register.
-  if (Size == 256) {
+  if (Is256BitVec) {
     V1 = Extract128BitVector(V1, EltNo, DAG, dl);
     if (EltNo >= NumElems/2)
       EltNo -= NumElems/2;
@@ -4503,7 +4569,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
   // Recreate the 256-bit vector and place the same 128-bit vector
   // into the low and high part. This is necessary because we want
   // to use VPERM* to shuffle the vectors
-  if (Size == 256) {
+  if (Is256BitVec) {
     V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
   }
 
@@ -4555,6 +4621,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
   case X86ISD::MOVLHPS:
     DecodeMOVLHPSMask(NumElems, Mask);
     break;
+  case X86ISD::PALIGNR:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    break;
   case X86ISD::PSHUFD:
   case X86ISD::VPERMILP:
     ImmN = N->getOperand(N->getNumOperands()-1);
@@ -4598,7 +4668,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
   case X86ISD::MOVLPS:
   case X86ISD::MOVSHDUP:
   case X86ISD::MOVSLDUP:
-  case X86ISD::PALIGN:
     // Not yet implemented
     return false;
   default: llvm_unreachable("unknown target shuffle node");
@@ -4893,7 +4962,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
   return DAG.getNode(ISD::BITCAST, dl, VT,
                      DAG.getNode(Opc, dl, ShVT, SrcOp,
                              DAG.getConstant(NumBits,
-                                  TLI.getShiftAmountTy(SrcOp.getValueType()))));
+                                  TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
 }
 
 SDValue
@@ -5063,10 +5132,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 /// or SDValue() otherwise.
 SDValue
 X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
-  if (!Subtarget->hasAVX())
+  if (!Subtarget->hasFp256())
     return SDValue();
 
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   DebugLoc dl = Op.getDebugLoc();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
@@ -5109,7 +5178,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
           Sc.getOpcode() != ISD::BUILD_VECTOR) {
 
-        if (!Subtarget->hasAVX2())
+        if (!Subtarget->hasInt256())
           return SDValue();
 
         // Use the register form of the broadcast instruction available on AVX2.
@@ -5136,7 +5205,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
   // Handle the broadcasting a single constant scalar from the constant pool
   // into a vector. On Sandybridge it is still better to load a constant vector
   // from the constant pool and not to broadcast it from a scalar.
-  if (ConstSplatVal && Subtarget->hasAVX2()) {
+  if (ConstSplatVal && Subtarget->hasInt256()) {
     EVT CVT = Ld.getValueType();
     assert(!CVT.isVector() && "Must not broadcast a vector type");
     unsigned ScalarSize = CVT.getSizeInBits();
@@ -5164,7 +5233,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
 
   // Handle AVX2 in-register broadcasts.
-  if (!IsLoad && Subtarget->hasAVX2() &&
+  if (!IsLoad && Subtarget->hasInt256() &&
       (ScalarSize == 32 || (Is256 && ScalarSize == 64)))
     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
 
@@ -5177,7 +5246,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
 
   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
   // double since there is no vbroadcastsd xmm
-  if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
+  if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
   }
@@ -5264,8 +5333,8 @@ SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
-  EVT VT = Op.getValueType();
-  EVT ExtVT = VT.getVectorElementType();
+  MVT VT = Op.getValueType().getSimpleVT();
+  MVT ExtVT = VT.getVectorElementType();
   unsigned NumElems = Op.getNumOperands();
 
   // Vectors containing all zeros can be matched by pxor and xorps later
@@ -5281,11 +5350,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
   // vpcmpeqd on 256-bit vectors.
-  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
-    if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
+  if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
+    if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
       return Op;
 
-    return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
+    return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
   }
 
   SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
@@ -5596,7 +5665,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 // to create 256-bit vectors from two other 128-bit ones.
 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
-  EVT ResVT = Op.getValueType();
+  MVT ResVT = Op.getValueType().getSimpleVT();
 
   assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
 
@@ -5623,63 +5692,51 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
   MVT VT = SVOp->getValueType(0).getSimpleVT();
+  MVT EltVT = VT.getVectorElementType();
   unsigned NumElems = VT.getVectorNumElements();
 
-  if (!Subtarget->hasSSE41())
+  if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+    return SDValue();
+  if (!Subtarget->hasInt256() && VT == MVT::v16i16)
     return SDValue();
 
-  unsigned ISDNo = 0;
-  MVT OpTy;
-
-  switch (VT.SimpleTy) {
-  default: return SDValue();
-  case MVT::v8i16:
-    ISDNo = X86ISD::BLENDPW;
-    OpTy = MVT::v8i16;
-    break;
-  case MVT::v4i32:
-  case MVT::v4f32:
-    ISDNo = X86ISD::BLENDPS;
-    OpTy = MVT::v4f32;
-    break;
-  case MVT::v2i64:
-  case MVT::v2f64:
-    ISDNo = X86ISD::BLENDPD;
-    OpTy = MVT::v2f64;
-    break;
-  case MVT::v8i32:
-  case MVT::v8f32:
-    if (!Subtarget->hasAVX())
-      return SDValue();
-    ISDNo = X86ISD::BLENDPS;
-    OpTy = MVT::v8f32;
-    break;
-  case MVT::v4i64:
-  case MVT::v4f64:
-    if (!Subtarget->hasAVX())
-      return SDValue();
-    ISDNo = X86ISD::BLENDPD;
-    OpTy = MVT::v4f64;
-    break;
-  }
-  assert(ISDNo && "Invalid Op Number");
+  // Check the mask for BLEND and build the value.
+  unsigned MaskValue = 0;
+  // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+  unsigned NumLanes = (NumElems-1)/8 + 1;
+  unsigned NumElemsInLane = NumElems / NumLanes;
 
-  unsigned MaskVals = 0;
+  // Blend for v16i16 should be symetric for the both lanes.
+  for (unsigned i = 0; i < NumElemsInLane; ++i) {
 
-  for (unsigned i = 0; i != NumElems; ++i) {
+    int SndLaneEltIdx = (NumLanes == 2) ?
+      SVOp->getMaskElt(i + NumElemsInLane) : -1;
     int EltIdx = SVOp->getMaskElt(i);
-    if (EltIdx == (int)i || EltIdx < 0)
-      MaskVals |= (1<<i);
-    else if (EltIdx == (int)(i + NumElems))
-      continue; // Bit is set to zero;
+
+    if ((EltIdx < 0 || EltIdx == (int)i) &&
+        (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+      continue;
+
+    if (((unsigned)EltIdx == (i + NumElems)) &&
+        (SndLaneEltIdx < 0 ||
+         (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
+      MaskValue |= (1<<i);
     else
       return SDValue();
   }
 
-  V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
-  V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
-  SDValue Ret =  DAG.getNode(ISDNo, dl, OpTy, V1, V2,
-                             DAG.getConstant(MaskVals, MVT::i32));
+  // Convert i32 vectors to floating point if it is not AVX2.
+  // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+  MVT BlendVT = VT;
+  if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+    BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+                               NumElems);
+    V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
+  }
+
+  SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+                            DAG.getConstant(MaskValue, MVT::i32));
   return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
 }
 
@@ -5814,6 +5871,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
     }
   }
 
+  // Promote splats to a larger type which usually leads to more efficient code.
+  // FIXME: Is this true if pshufb is available?
+  if (SVOp->isSplat())
+    return PromoteSplat(SVOp, DAG);
+
   // If we have SSSE3, and all words of the result are from 1 input vector,
   // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
   // is present, fall back to case 4.
@@ -5829,7 +5891,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
       int EltIdx = MaskVals[i] * 2;
       int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
       int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
-      pshufbMask.push_back(DAG.getConstant(Idx0,   MVT::i8));
+      pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
       pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
     }
     V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
@@ -5947,6 +6009,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
   DebugLoc dl = SVOp->getDebugLoc();
   ArrayRef<int> MaskVals = SVOp->getMask();
 
+  // Promote splats to a larger type which usually leads to more efficient code.
+  // FIXME: Is this true if pshufb is available?
+  if (SVOp->isSplat())
+    return PromoteSplat(SVOp, DAG);
+
   // If we have SSSE3, case 1 is generated when all result bytes come from
   // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
   // present, fall back to case 3.
@@ -6065,7 +6132,7 @@ static
 SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
                                  const X86Subtarget *Subtarget,
                                  SelectionDAG &DAG) {
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
@@ -6079,7 +6146,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
   // (1) one of input vector is undefined or zeroinitializer.
   // The mask value 0x80 puts 0 in the corresponding slot of the vector.
   // And (2) the mask indexes don't cross the 128-bit lane.
-  if (VT != MVT::v32i8 || !Subtarget->hasAVX2() ||
+  if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
       (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
     return SDValue();
 
@@ -6112,8 +6179,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
 /// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
 static
 SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
-                                 SelectionDAG &DAG, DebugLoc dl) {
+                                 SelectionDAG &DAG) {
   MVT VT = SVOp->getValueType(0).getSimpleVT();
+  DebugLoc dl = SVOp->getDebugLoc();
   unsigned NumElems = VT.getVectorNumElements();
   MVT NewVT;
   unsigned Scale;
@@ -6149,7 +6217,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
 
 /// getVZextMovL - Return a zero-extending vector move low node.
 ///
-static SDValue getVZextMovL(EVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, EVT OpVT,
                             SDValue SrcOp, SelectionDAG &DAG,
                             const X86Subtarget *Subtarget, DebugLoc dl) {
   if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -6191,14 +6259,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
   if (NewOp.getNode())
     return NewOp;
 
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
 
   unsigned NumElems = VT.getVectorNumElements();
   unsigned NumLaneElems = NumElems / 2;
 
   DebugLoc dl = SVOp->getDebugLoc();
-  MVT EltVT = VT.getVectorElementType().getSimpleVT();
-  EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+  MVT EltVT = VT.getVectorElementType();
+  MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
   SDValue Output[2];
 
   SmallVector<int, 16> Mask;
@@ -6303,7 +6371,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getValueType(0).getSimpleVT();
 
   assert(VT.is128BitVector() && "Unsupported vector size");
 
@@ -6452,23 +6520,6 @@ static bool MayFoldVectorLoad(SDValue V) {
   return MayFoldLoad(V);
 }
 
-// FIXME: the version above should always be used. Since there's
-// a bug where several vector shuffles can't be folded because the
-// DAG is not updated during lowering and a node claims to have two
-// uses while it only has one, use this version, and let isel match
-// another instruction if the load really happens to have more than
-// one use. Remove this version after this bug get fixed.
-// rdar://8434668, PR8156
-static bool RelaxedMayFoldVectorLoad(SDValue V) {
-  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
-    V = V.getOperand(0);
-  if (ISD::isNormalLoad(V.getNode()))
-    return true;
-  return false;
-}
-
 static
 SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
@@ -6574,7 +6625,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
 
 // Reduce a vector shuffle to zext.
 SDValue
-X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
   // PMOVZX is only available from SSE41.
   if (!Subtarget->hasSSE41())
     return SDValue();
@@ -6582,7 +6633,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
 
   // Only AVX2 support 256-bit vector integer extending.
-  if (!Subtarget->hasAVX2() && VT.is256BitVector())
+  if (!Subtarget->hasInt256() && VT.is256BitVector())
     return SDValue();
 
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -6618,9 +6669,10 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
       return SDValue();
   }
 
+  LLVMContext *Context = DAG.getContext();
   unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
-  EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
-  EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+  EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+  EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
 
   if (!isTypeLegal(NVT))
     return SDValue();
@@ -6639,8 +6691,21 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
     // If it's foldable, i.e. normal load with single use, we will let code
     // selection to fold it. Otherwise, we will short the conversion sequence.
     if (CIdx && CIdx->getZExtValue() == 0 &&
-        (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+        (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+      if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+        // The "ext_vec_elt" node is wider than the result node.
+        // In this case we should extract subvector from V.
+        // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+        unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+        EVT FullVT = V.getValueType();
+        EVT SubVecVT = EVT::getVectorVT(*Context, 
+                                        FullVT.getVectorElementType(),
+                                        FullVT.getVectorNumElements()/Ratio);
+        V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V, 
+                        DAG.getIntPtrConstant(0));
+      }
       V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+    }
   }
 
   return DAG.getNode(ISD::BITCAST, DL, VT,
@@ -6650,7 +6715,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   DebugLoc dl = Op.getDebugLoc();
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -6660,25 +6725,14 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
 
   // Handle splat operations
   if (SVOp->isSplat()) {
-    unsigned NumElem = VT.getVectorNumElements();
-    int Size = VT.getSizeInBits();
-
     // Use vbroadcast whenever the splat comes from a foldable load
     SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
     if (Broadcast.getNode())
       return Broadcast;
-
-    // Handle splats by matching through known shuffle masks
-    if ((Size == 128 && NumElem <= 4) ||
-        (Size == 256 && NumElem < 8))
-      return SDValue();
-
-    // All remaning splats are promoted to target supported vector shuffles.
-    return PromoteSplat(SVOp, DAG);
   }
 
   // Check integer expanding shuffles.
-  SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+  SDValue NewOp = LowerVectorIntExtend(Op, DAG);
   if (NewOp.getNode())
     return NewOp;
 
@@ -6686,7 +6740,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
   // do it!
   if (VT == MVT::v8i16  || VT == MVT::v16i8 ||
       VT == MVT::v16i16 || VT == MVT::v32i8) {
-    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
     if (NewOp.getNode())
       return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
   } else if ((VT == MVT::v4i32 ||
@@ -6694,18 +6748,18 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
     // FIXME: Figure out a cleaner way to do this.
     // Try to make use of movq to zero out the top part.
     if (ISD::isBuildVectorAllZeros(V2.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
       if (NewOp.getNode()) {
-        EVT NewVT = NewOp.getValueType();
+        MVT NewVT = NewOp.getValueType().getSimpleVT();
         if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
                                NewVT, true, false))
           return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
                               DAG, Subtarget, dl);
       }
     } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
-      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
       if (NewOp.getNode()) {
-        EVT NewVT = NewOp.getValueType();
+        MVT NewVT = NewOp.getValueType().getSimpleVT();
         if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
           return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
                               DAG, Subtarget, dl);
@@ -6720,7 +6774,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   DebugLoc dl = Op.getDebugLoc();
   unsigned NumElems = VT.getVectorNumElements();
   bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
@@ -6728,11 +6782,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   bool V1IsSplat = false;
   bool V2IsSplat = false;
   bool HasSSE2 = Subtarget->hasSSE2();
-  bool HasAVX    = Subtarget->hasAVX();
-  bool HasAVX2   = Subtarget->hasAVX2();
+  bool HasFp256    = Subtarget->hasFp256();
+  bool HasInt256   = Subtarget->hasInt256();
   MachineFunction &MF = DAG.getMachineFunction();
-  bool OptForSize = MF.getFunction()->getFnAttributes().
-    hasAttribute(Attributes::OptimizeForSize);
+  bool OptForSize = MF.getFunction()->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
 
   assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
 
@@ -6766,20 +6820,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
   // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
   // unpckh_undef). Only use pshufd if speed is more important than size.
-  if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+  if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+  if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
-      V2IsUndef && RelaxedMayFoldVectorLoad(V1))
+      V2IsUndef && MayFoldVectorLoad(V1))
     return getMOVDDup(Op, dl, V1, DAG);
 
   if (isMOVHLPS_v_undef_Mask(M, VT))
     return getMOVHighToLow(Op, dl, DAG);
 
   // Use to match splats
-  if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef &&
+  if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
       (VT == MVT::v2f64 || VT == MVT::v2i64))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
@@ -6792,12 +6846,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
     unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
 
-    if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
-      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
-
     if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
       return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
 
+    if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+                                  DAG);
+
     return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
                                 TargetMask, DAG);
   }
@@ -6810,7 +6865,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   if (isShift && ShVal.hasOneUse()) {
     // If the shifted value has multiple uses, it may be cheaper to use
     // v_set0 + movlhps or movhlps, etc.
-    EVT EltVT = VT.getVectorElementType();
+    MVT EltVT = VT.getVectorElementType();
     ShAmt *= EltVT.getSizeInBits();
     return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
   }
@@ -6828,7 +6883,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // FIXME: fold these into legal mask.
-  if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2))
+  if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
     return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
 
   if (isMOVHLPSMask(M, VT))
@@ -6849,7 +6904,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
   if (isShift) {
     // No better options. Use a vshldq / vsrldq.
-    EVT EltVT = VT.getVectorElementType();
+    MVT EltVT = VT.getVectorElementType();
     ShAmt *= EltVT.getSizeInBits();
     return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
   }
@@ -6878,10 +6933,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getMOVL(DAG, dl, VT, V2, V1);
   }
 
-  if (isUNPCKLMask(M, VT, HasAVX2))
+  if (isUNPCKLMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-  if (isUNPCKHMask(M, VT, HasAVX2))
+  if (isUNPCKHMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
@@ -6890,9 +6945,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     // new vector_shuffle with the corrected mask.p
     SmallVector<int, 8> NewMask(M.begin(), M.end());
     NormalizeMask(NewMask, NumElems);
-    if (isUNPCKLMask(NewMask, VT, HasAVX2, true))
+    if (isUNPCKLMask(NewMask, VT, HasInt256, true))
       return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
-    if (isUNPCKHMask(NewMask, VT, HasAVX2, true))
+    if (isUNPCKHMask(NewMask, VT, HasInt256, true))
       return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
   }
 
@@ -6904,15 +6959,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     std::swap(V1IsSplat, V2IsSplat);
     Commuted = false;
 
-    if (isUNPCKLMask(M, VT, HasAVX2))
+    if (isUNPCKLMask(M, VT, HasInt256))
       return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-    if (isUNPCKHMask(M, VT, HasAVX2))
+    if (isUNPCKHMask(M, VT, HasInt256))
       return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
   }
 
   // Normalize the node to match x86 shuffle ops if needed
-  if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true)))
+  if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
     return CommuteVectorShuffle(SVOp, DAG);
 
   // The checks below are all present in isShuffleMaskLegal, but they are
@@ -6920,7 +6975,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   // nodes, and remove one by one until they don't return Op anymore.
 
   if (isPALIGNRMask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+    return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
                                 getShufflePALIGNRImmediate(SVOp),
                                 DAG);
 
@@ -6930,23 +6985,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
       return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   }
 
-  if (isPSHUFHWMask(M, VT, HasAVX2))
+  if (isPSHUFHWMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
                                 getShufflePSHUFHWImmediate(SVOp),
                                 DAG);
 
-  if (isPSHUFLWMask(M, VT, HasAVX2))
+  if (isPSHUFLWMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
                                 getShufflePSHUFLWImmediate(SVOp),
                                 DAG);
 
-  if (isSHUFPMask(M, VT, HasAVX))
+  if (isSHUFPMask(M, VT, HasFp256))
     return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
                                 getShuffleSHUFImmediate(SVOp), DAG);
 
-  if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+  if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+  if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   //===--------------------------------------------------------------------===//
@@ -6955,12 +7010,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   //
 
   // Handle VMOVDDUPY permutations
-  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
+  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
     return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
 
   // Handle VPERMILPS/D* permutations
-  if (isVPERMILPMask(M, VT, HasAVX)) {
-    if (HasAVX2 && VT == MVT::v8i32)
+  if (isVPERMILPMask(M, VT, HasFp256)) {
+    if (HasInt256 && VT == MVT::v8i32)
       return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
                                   getShuffleSHUFImmediate(SVOp), DAG);
     return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
@@ -6968,7 +7023,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Handle VPERM2F128/VPERM2I128 permutations
-  if (isVPERM2X128Mask(M, VT, HasAVX))
+  if (isVPERM2X128Mask(M, VT, HasFp256))
     return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
                                 V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
 
@@ -6976,7 +7031,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   if (BlendOp.getNode())
     return BlendOp;
 
-  if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+  if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
     SmallVector<SDValue, 8> permclMask;
     for (unsigned i = 0; i != 8; ++i) {
       permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
@@ -6988,11 +7043,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
                        DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
   }
 
-  if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+  if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
     return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
                                 getShuffleCLImmediate(SVOp), DAG);
 
-
   //===--------------------------------------------------------------------===//
   // Since no target specific shuffle was selected for this generic one,
   // lower it into other known shuffles. FIXME: this isn't true yet, but
@@ -7030,13 +7084,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
-                                                SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
+static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType().getSimpleVT();
   DebugLoc dl = Op.getDebugLoc();
 
-  if (!Op.getOperand(0).getValueType().is128BitVector())
+  if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
     return SDValue();
 
   if (VT.getSizeInBits() == 8) {
@@ -7094,7 +7146,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
   return SDValue();
 }
 
-
 SDValue
 X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                                            SelectionDAG &DAG) const {
@@ -7102,7 +7153,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     return SDValue();
 
   SDValue Vec = Op.getOperand(0);
-  EVT VecVT = Vec.getValueType();
+  MVT VecVT = Vec.getValueType().getSimpleVT();
 
   // If this is a 256-bit vector result, first extract the 128-bit vector and
   // then extract the element from the 128-bit vector.
@@ -7129,7 +7180,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
       return Res;
   }
 
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   DebugLoc dl = Op.getDebugLoc();
   // TODO: handle v16i8.
   if (VT.getSizeInBits() == 16) {
@@ -7142,7 +7193,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                                                  MVT::v4i32, Vec),
                                      Op.getOperand(1)));
     // Transform it so it match pextrw which produces a 32-bit result.
-    EVT EltVT = MVT::i32;
+    MVT EltVT = MVT::i32;
     SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
                                   Op.getOperand(0), Op.getOperand(1));
     SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
@@ -7157,7 +7208,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
 
     // SHUFPS the element to the lowest double word, then movss.
     int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
-    EVT VVT = Op.getOperand(0).getValueType();
+    MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
     SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
                                        DAG.getUNDEF(VVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7176,7 +7227,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
     // to a f64mem, the whole operation is folded into a single MOVHPDmr.
     int Mask[2] = { 1, -1 };
-    EVT VVT = Op.getOperand(0).getValueType();
+    MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
     SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
                                        DAG.getUNDEF(VVT), Mask);
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7186,11 +7237,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   return SDValue();
 }
 
-SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT.getVectorElementType();
+static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType().getSimpleVT();
+  MVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Op.getDebugLoc();
 
   SDValue N0 = Op.getOperand(0);
@@ -7243,8 +7292,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
 
 SDValue
 X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT.getVectorElementType();
+  MVT VT = Op.getValueType().getSimpleVT();
+  MVT EltVT = VT.getVectorElementType();
 
   DebugLoc dl = Op.getDebugLoc();
   SDValue N0 = Op.getOperand(0);
@@ -7292,7 +7341,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
-  EVT OpVT = Op.getValueType();
+  MVT OpVT = Op.getValueType().getSimpleVT();
 
   // If this is a 256-bit vector result, first insert into a 128-bit
   // vector and then insert into the 256-bit vector.
@@ -7323,7 +7372,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 // upper bits of a vector.
 static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
                                       SelectionDAG &DAG) {
-  if (Subtarget->hasAVX()) {
+  if (Subtarget->hasFp256()) {
     DebugLoc dl = Op.getNode()->getDebugLoc();
     SDValue Vec = Op.getNode()->getOperand(0);
     SDValue Idx = Op.getNode()->getOperand(1);
@@ -7343,7 +7392,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
 // the upper bits of a vector.
 static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
                                      SelectionDAG &DAG) {
-  if (Subtarget->hasAVX()) {
+  if (Subtarget->hasFp256()) {
     DebugLoc dl = Op.getNode()->getDebugLoc();
     SDValue Vec = Op.getNode()->getOperand(0);
     SDValue SubVec = Op.getNode()->getOperand(1);
@@ -7459,7 +7508,6 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
   Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
 
-
   // With PIC, the address is actually $g + Offset.
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
       !Subtarget->is64Bit()) {
@@ -7508,8 +7556,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue
 X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
-                                      int64_t Offset,
-                                      SelectionDAG &DAG) const {
+                                      int64_t Offset, SelectionDAG &DAG) const {
   // Create the TargetGlobalAddress node, folding in the constant
   // offset if it is legal.
   unsigned char OpFlags =
@@ -7729,7 +7776,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
       case TLSModel::LocalExec:
         return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
                                    Subtarget->is64Bit(),
-                         getTargetMachine().getRelocationModel() == Reloc::PIC_);
+                        getTargetMachine().getRelocationModel() == Reloc::PIC_);
     }
     llvm_unreachable("Unknown TLS model.");
   }
@@ -7779,7 +7826,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
                               Chain.getValue(1));
   }
 
-  if (Subtarget->isTargetWindows()) {
+  if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
     // Just use the implicit TLS architecture
     // Need to generate someting similar to:
     //   mov     rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7799,18 +7846,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     SDValue Chain = DAG.getEntryNode();
 
     // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
-    // %gs:0x58 (64-bit).
+    // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+    // use its literal value of 0x2C.
     Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
                                         ? Type::getInt8PtrTy(*DAG.getContext(),
                                                              256)
                                         : Type::getInt32PtrTy(*DAG.getContext(),
                                                               257));
 
-    SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
-                                        Subtarget->is64Bit()
-                                        ? DAG.getIntPtrConstant(0x58)
-                                        : DAG.getExternalSymbol("_tls_array",
-                                                                getPointerTy()),
+    SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+      (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+        DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+    SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
                                         MachinePointerInfo(Ptr),
                                         false, false, false, 0);
 
@@ -7846,7 +7894,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for this target.");
 }
 
-
 /// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
 /// and take a 2 x i32 value to shift plus a shift amount.
 SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
@@ -8013,9 +8060,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
 
   SmallVector<Constant*,2> CV1;
   CV1.push_back(
-        ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+                                      APInt(64, 0x4330000000000000ULL))));
   CV1.push_back(
-        ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+                                      APInt(64, 0x4530000000000000ULL))));
   Constant *C1 = ConstantVector::get(CV1);
   SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
 
@@ -8109,7 +8158,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
           SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
          "Custom UINT_TO_FP is not supported!");
 
-  EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+  EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+                             SVT.getVectorNumElements());
   return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
                      DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
 }
@@ -8202,8 +8252,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
 }
 
-std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
+std::pair<SDValue,SDValue>
+X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+                                    bool IsSigned, bool IsReplace) const {
   DebugLoc DL = Op.getDebugLoc();
 
   EVT DstTy = Op.getValueType();
@@ -8295,46 +8346,197 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
   }
 }
 
-SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
+                              const X86Subtarget *Subtarget) {
+  MVT VT = Op->getValueType(0).getSimpleVT();
+  SDValue In = Op->getOperand(0);
+  MVT InVT = In.getValueType().getSimpleVT();
+  DebugLoc dl = Op->getDebugLoc();
+
+  // Optimize vectors in AVX mode:
+  //
+  //   v8i16 -> v8i32
+  //   Use vpunpcklwd for 4 lower elements  v8i16 -> v4i32.
+  //   Use vpunpckhwd for 4 upper elements  v8i16 -> v4i32.
+  //   Concat upper and lower parts.
+  //
+  //   v4i32 -> v4i64
+  //   Use vpunpckldq for 4 lower elements  v4i32 -> v2i64.
+  //   Use vpunpckhdq for 4 upper elements  v4i32 -> v2i64.
+  //   Concat upper and lower parts.
+  //
+
+  if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+      ((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
+    return SDValue();
+
+  if (Subtarget->hasInt256())
+    return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
+
+  SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+  SDValue Undef = DAG.getUNDEF(InVT);
+  bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
+  SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+  SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
+
+  MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
+                             VT.getVectorNumElements()/2);
+
+  OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+  OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
+SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  if (Subtarget->hasFp256()) {
+    SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+    if (Res.getNode())
+      return Res;
+  }
+
+  return SDValue();
+}
+SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
+                                            SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   SDValue In = Op.getOperand(0);
-  EVT SVT = In.getValueType();
+  MVT SVT = In.getValueType().getSimpleVT();
+
+  if (Subtarget->hasFp256()) {
+    SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
+    if (Res.getNode())
+      return Res;
+  }
 
   if (!VT.is256BitVector() || !SVT.is128BitVector() ||
       VT.getVectorNumElements() != SVT.getVectorNumElements())
     return SDValue();
 
-  assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+  assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
 
   // AVX2 has better support of integer extending.
-  if (Subtarget->hasAVX2())
+  if (Subtarget->hasInt256())
     return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
 
   SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
   static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
   SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
-                           DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0]));
+                           DAG.getVectorShuffle(MVT::v8i16, DL, In,
+                                                DAG.getUNDEF(MVT::v8i16),
+                                                &Mask[0]));
 
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
 }
 
-SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  EVT SVT = Op.getOperand(0).getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
+  SDValue In = Op.getOperand(0);
+  MVT SVT = In.getValueType().getSimpleVT();
 
-  if (!VT.is128BitVector() || !SVT.is256BitVector() ||
-      VT.getVectorNumElements() != SVT.getVectorNumElements())
+  if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+    // On AVX2, v4i64 -> v4i32 becomes VPERMD.
+    if (Subtarget->hasInt256()) {
+      static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
+      In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
+      In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
+                                ShufMask);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
+                         DAG.getIntPtrConstant(0));
+    }
+
+    // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS.
+    SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+                               DAG.getIntPtrConstant(0));
+    SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+                               DAG.getIntPtrConstant(2));
+
+    OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+    OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+    // The PSHUFD mask:
+    static const int ShufMask1[] = {0, 2, 0, 0};
+    SDValue Undef = DAG.getUNDEF(VT);
+    OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1);
+    OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1);
+
+    // The MOVLHPS mask:
+    static const int ShufMask2[] = {0, 1, 4, 5};
+    return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
+  }
+
+  if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+    // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
+    if (Subtarget->hasInt256()) {
+      In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
+
+      SmallVector<SDValue,32> pshufbMask;
+      for (unsigned i = 0; i < 2; ++i) {
+        pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
+        for (unsigned j = 0; j < 8; ++j)
+          pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+      }
+      SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8,
+                               &pshufbMask[0], 32);
+      In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
+      In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
+
+      static const int ShufMask[] = {0,  2,  -1,  -1};
+      In = DAG.getVectorShuffle(MVT::v4i64, DL,  In, DAG.getUNDEF(MVT::v4i64),
+                                &ShufMask[0]);
+      In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+                       DAG.getIntPtrConstant(0));
+      return DAG.getNode(ISD::BITCAST, DL, VT, In);
+    }
+
+    SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+                               DAG.getIntPtrConstant(0));
+
+    SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+                               DAG.getIntPtrConstant(4));
+
+    OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
+    OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
+
+    // The PSHUFB mask:
+    static const int ShufMask1[] = {0,  1,  4,  5,  8,  9, 12, 13,
+                                   -1, -1, -1, -1, -1, -1, -1, -1};
+
+    SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+    OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
+    OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
+
+    OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
+    OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
+
+    // The MOVLHPS Mask:
+    static const int ShufMask2[] = {0, 1, 4, 5};
+    SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
+    return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
+  }
+
+  // Handle truncation of V256 to V128 using shuffles.
+  if (!VT.is128BitVector() || !SVT.is256BitVector())
     return SDValue();
 
-  assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+  assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
+         "Invalid op");
+  assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
 
   unsigned NumElems = VT.getVectorNumElements();
   EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
                              NumElems * 2);
 
-  SDValue In = Op.getOperand(0);
   SmallVector<int, 16> MaskVec(NumElems * 2, -1);
   // Prepare truncation shuffle mask
   for (unsigned i = 0; i != NumElems; ++i)
@@ -8348,9 +8550,10 @@ SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
                                            SelectionDAG &DAG) const {
-  if (Op.getValueType().isVector()) {
-    if (Op.getValueType() == MVT::v8i16)
-      return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(),
+  MVT VT = Op.getValueType().getSimpleVT();
+  if (VT.isVector()) {
+    if (VT == MVT::v8i16)
+      return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT,
                          DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
                                      MVT::v8i32, Op.getOperand(0)));
     return SDValue();
@@ -8389,12 +8592,11 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
   return FIST;
 }
 
-SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
-                                          SelectionDAG &DAG) const {
+static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
   DebugLoc DL = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   SDValue In = Op.getOperand(0);
-  EVT SVT = In.getValueType();
+  MVT SVT = In.getValueType().getSimpleVT();
 
   assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
 
@@ -8406,8 +8608,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
 SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT;
+  MVT VT = Op.getValueType().getSimpleVT();
+  MVT EltVT = VT;
   unsigned NumElts = VT == MVT::f64 ? 2 : 4;
   if (VT.isVector()) {
     EltVT = VT.getVectorElementType();
@@ -8415,9 +8617,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
   }
   Constant *C;
   if (EltVT == MVT::f64)
-    C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+    C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+                                          APInt(64, ~(1ULL << 63))));
   else
-    C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+    C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+                                          APInt(32, ~(1U << 31))));
   C = ConstantVector::getSplat(NumElts, C);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8438,8 +8642,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   LLVMContext *Context = DAG.getContext();
   DebugLoc dl = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT;
+  MVT VT = Op.getValueType().getSimpleVT();
+  MVT EltVT = VT;
   unsigned NumElts = VT == MVT::f64 ? 2 : 4;
   if (VT.isVector()) {
     EltVT = VT.getVectorElementType();
@@ -8447,9 +8651,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   }
   Constant *C;
   if (EltVT == MVT::f64)
-    C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+    C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+                                          APInt(64, 1ULL << 63)));
   else
-    C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+    C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+                                          APInt(32, 1U << 31)));
   C = ConstantVector::getSplat(NumElts, C);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8473,8 +8679,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   DebugLoc dl = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  EVT SrcVT = Op1.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
+  MVT SrcVT = Op1.getValueType().getSimpleVT();
 
   // If second operand is smaller, extend it first.
   if (SrcVT.bitsLT(VT)) {
@@ -8493,13 +8699,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   // First get the sign bit of second operand.
   SmallVector<Constant*,4> CV;
   if (SrcVT == MVT::f64) {
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+    const fltSemantics &Sem = APFloat::IEEEdouble;
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
   } else {
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    const fltSemantics &Sem = APFloat::IEEEsingle;
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
   }
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8522,13 +8730,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   // Clear first operand sign bit.
   CV.clear();
   if (VT == MVT::f64) {
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+    const fltSemantics &Sem = APFloat::IEEEdouble;
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+                                                   APInt(64, ~(1ULL << 63)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
   } else {
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
-    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    const fltSemantics &Sem = APFloat::IEEEsingle;
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+                                                   APInt(32, ~(1U << 31)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
   }
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8544,7 +8756,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
 static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
   SDValue N0 = Op.getOperand(0);
   DebugLoc dl = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
 
   // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
   SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
@@ -8554,7 +8766,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
 
 // LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
 //
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
+                                                  SelectionDAG &DAG) const {
   assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
 
   if (!Subtarget->hasSSE41())
@@ -8899,6 +9112,11 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
   return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
 }
 
+static bool isAllOnes(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isAllOnesValue();
+}
+
 /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
 /// if it's possible.
 SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
@@ -8947,6 +9165,14 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
   }
 
   if (LHS.getNode()) {
+    // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
+    // the condition code later.
+    bool Invert = false;
+    if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
+      Invert = true;
+      LHS = LHS.getOperand(0);
+    }
+
     // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
     // instruction.  Since the shift amount is in-range-or-undefined, we know
     // that doing a bittest on the i32 value is ok.  We extend to i32 because
@@ -8962,7 +9188,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
       RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
 
     SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
-    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    // Flip the condition if the LHS was a not instruction
+    if (Invert)
+      Cond = X86::GetOppositeBranchCondition(Cond);
     return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                        DAG.getConstant(Cond, MVT::i8), BT);
   }
@@ -8970,65 +9199,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
   return SDValue();
 }
 
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
-  if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
-
-  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
-  SDValue Op0 = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
-  // Optimize to BT if possible.
-  // Lower (X & (1 << N)) == 0 to BT(X, N).
-  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
-  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
-  if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
-      Op1.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(Op1)->isNullValue() &&
-      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
-    if (NewSetCC.getNode())
-      return NewSetCC;
-  }
-
-  // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
-  // these.
-  if (Op1.getOpcode() == ISD::Constant &&
-      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
-       cast<ConstantSDNode>(Op1)->isNullValue()) &&
-      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
-    // If the input is a setcc, then reuse the input setcc or use a new one with
-    // the inverted condition.
-    if (Op0.getOpcode() == X86ISD::SETCC) {
-      X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
-      bool Invert = (CC == ISD::SETNE) ^
-        cast<ConstantSDNode>(Op1)->isNullValue();
-      if (!Invert) return Op0;
-
-      CCode = X86::GetOppositeBranchCondition(CCode);
-      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
-    }
-  }
-
-  bool isFP = Op1.getValueType().isFloatingPoint();
-  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
-  if (X86CC == X86::COND_INVALID)
-    return SDValue();
-
-  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
-  EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
-  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                     DAG.getConstant(X86CC, MVT::i8), EFLAGS);
-}
-
 // Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
 // ones, and then concatenate the result back.
 static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
 
   assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
          "Unsupported value type for operation");
@@ -9048,27 +9222,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
   SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
 
   // Issue the operation on the smaller types and concatenate the result back
-  MVT EltVT = VT.getVectorElementType().getSimpleVT();
-  EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+  MVT EltVT = VT.getVectorElementType();
+  MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
                      DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
 }
 
-
-SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+                           SelectionDAG &DAG) {
   SDValue Cond;
   SDValue Op0 = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue CC = Op.getOperand(2);
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getValueType().getSimpleVT();
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
-  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+  bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
   DebugLoc dl = Op.getDebugLoc();
 
   if (isFP) {
 #ifndef NDEBUG
-    EVT EltVT = Op0.getValueType().getVectorElementType();
+    MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
     assert(EltVT == MVT::f32 || EltVT == MVT::f64);
 #endif
 
@@ -9133,7 +9307,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Break 256-bit integer vector compare into smaller ones.
-  if (VT.is256BitVector() && !Subtarget->hasAVX2())
+  if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntVSETCC(Op, DAG);
 
   // We are handling one of the integer comparisons here.  Since SSE only has
@@ -9163,8 +9337,28 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   if (VT == MVT::v2i64) {
     if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
       return SDValue();
-    if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
-      return SDValue();
+    if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+      // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
+      // pcmpeqd + pshufd + pand.
+      assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+
+      // First cast everything to the right type,
+      Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+      Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+      // Do the compare.
+      SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
+
+      // Make sure the lower and upper halves are both all-ones.
+      const int Mask[] = { 1, 0, 3, 2 };
+      SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
+      Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
+
+      if (Invert)
+        Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+      return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+    }
   }
 
   // Since SSE has no unsigned integer comparisons, we need to flip  the sign
@@ -9189,6 +9383,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   return Result;
 }
 
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+  MVT VT = Op.getValueType().getSimpleVT();
+
+  if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+  assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  // Optimize to BT if possible.
+  // Lower (X & (1 << N)) == 0 to BT(X, N).
+  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+  if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+      Op1.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op1)->isNullValue() &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+    if (NewSetCC.getNode())
+      return NewSetCC;
+  }
+
+  // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
+  // these.
+  if (Op1.getOpcode() == ISD::Constant &&
+      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+    // If the input is a setcc, then reuse the input setcc or use a new one with
+    // the inverted condition.
+    if (Op0.getOpcode() == X86ISD::SETCC) {
+      X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+      bool Invert = (CC == ISD::SETNE) ^
+        cast<ConstantSDNode>(Op1)->isNullValue();
+      if (!Invert) return Op0;
+
+      CCode = X86::GetOppositeBranchCondition(CCode);
+      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+    }
+  }
+
+  bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+  if (X86CC == X86::COND_INVALID)
+    return SDValue();
+
+  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+  EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
+  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                     DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
 // isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
 static bool isX86LogicalCmp(SDValue Op) {
   unsigned Opc = Op.getNode()->getOpcode();
@@ -9220,11 +9471,6 @@ static bool isZero(SDValue V) {
   return C && C->isNullValue();
 }
 
-static bool isAllOnes(SDValue V) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
-  return C && C->isAllOnesValue();
-}
-
 static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
   if (V.getOpcode() != ISD::TRUNCATE)
     return false;
@@ -9316,7 +9562,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
     SDValue Cmp = Cond.getOperand(1);
     unsigned Opc = Cmp.getOpcode();
-    EVT VT = Op.getValueType();
+    MVT VT = Op.getValueType().getSimpleVT();
 
     bool IllegalFPCMov = false;
     if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -9425,6 +9671,53 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
 }
 
+SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  MVT VT = Op->getValueType(0).getSimpleVT();
+  SDValue In = Op->getOperand(0);
+  MVT InVT = In.getValueType().getSimpleVT();
+  DebugLoc dl = Op->getDebugLoc();
+
+  if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
+      (VT != MVT::v8i32 || InVT != MVT::v8i16))
+    return SDValue();
+
+  if (Subtarget->hasInt256())
+    return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
+
+  // Optimize vectors in AVX mode
+  // Sign extend  v8i16 to v8i32 and
+  //              v4i32 to v4i64
+  //
+  // Divide input vector into two parts
+  // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+  // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+  // concat the vectors to original VT
+
+  unsigned NumElems = InVT.getVectorNumElements();
+  SDValue Undef = DAG.getUNDEF(InVT);
+
+  SmallVector<int,8> ShufMask1(NumElems, -1);
+  for (unsigned i = 0; i != NumElems/2; ++i)
+    ShufMask1[i] = i;
+
+  SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
+
+  SmallVector<int,8> ShufMask2(NumElems, -1);
+  for (unsigned i = 0; i != NumElems/2; ++i)
+    ShufMask2[i] = i + NumElems/2;
+
+  SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
+
+  MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
+                                VT.getVectorNumElements()/2);
+
+  OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
+  OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+}
+
 // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
 // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
 // from the AND / OR.
@@ -9713,7 +10006,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
                      Chain, Dest, CC, Cond);
 }
 
-
 // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
 // Calls to _alloca is needed to probe the stack when allocating more than 4k
 // bytes in one go. Touching the stack at 4K increments is necessary to ensure
@@ -9876,8 +10168,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
     // Sanity Check: Make sure using fp_offset makes sense.
     assert(!getTargetMachine().Options.UseSoftFloat &&
            !(DAG.getMachineFunction()
-                .getFunction()->getFnAttributes()
-                .hasAttribute(Attributes::NoImplicitFloat)) &&
+                .getFunction()->getAttributes()
+                .hasAttribute(AttributeSet::FunctionIndex,
+                              Attribute::NoImplicitFloat)) &&
            Subtarget->hasSSE1());
   }
 
@@ -9925,7 +10218,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
-// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// getTargetVShiftNode - Handle vector element shifts where the shift amount
 // may or may not be a constant. Takes immediate version of shift as input.
 static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
                                    SDValue SrcOp, SDValue ShAmt,
@@ -10082,6 +10375,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
 
+  // SSE2/AVX2 sub with unsigned saturation intrinsics
+  case Intrinsic::x86_sse2_psubus_b:
+  case Intrinsic::x86_sse2_psubus_w:
+  case Intrinsic::x86_avx2_psubus_b:
+  case Intrinsic::x86_avx2_psubus_w:
+    return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+
   // SSE3/AVX horizontal add/sub intrinsics
   case Intrinsic::x86_sse3_hadd_ps:
   case Intrinsic::x86_sse3_hadd_pd:
@@ -10131,6 +10432,100 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
                        Op.getOperand(1), Op.getOperand(2));
   }
 
+  // SSE2/SSE41/AVX2 integer max/min intrinsics.
+  case Intrinsic::x86_sse2_pmaxu_b:
+  case Intrinsic::x86_sse41_pmaxuw:
+  case Intrinsic::x86_sse41_pmaxud:
+  case Intrinsic::x86_avx2_pmaxu_b:
+  case Intrinsic::x86_avx2_pmaxu_w:
+  case Intrinsic::x86_avx2_pmaxu_d:
+  case Intrinsic::x86_sse2_pminu_b:
+  case Intrinsic::x86_sse41_pminuw:
+  case Intrinsic::x86_sse41_pminud:
+  case Intrinsic::x86_avx2_pminu_b:
+  case Intrinsic::x86_avx2_pminu_w:
+  case Intrinsic::x86_avx2_pminu_d:
+  case Intrinsic::x86_sse41_pmaxsb:
+  case Intrinsic::x86_sse2_pmaxs_w:
+  case Intrinsic::x86_sse41_pmaxsd:
+  case Intrinsic::x86_avx2_pmaxs_b:
+  case Intrinsic::x86_avx2_pmaxs_w:
+  case Intrinsic::x86_avx2_pmaxs_d:
+  case Intrinsic::x86_sse41_pminsb:
+  case Intrinsic::x86_sse2_pmins_w:
+  case Intrinsic::x86_sse41_pminsd:
+  case Intrinsic::x86_avx2_pmins_b:
+  case Intrinsic::x86_avx2_pmins_w:
+  case Intrinsic::x86_avx2_pmins_d: {
+    unsigned Opcode;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_sse2_pmaxu_b:
+    case Intrinsic::x86_sse41_pmaxuw:
+    case Intrinsic::x86_sse41_pmaxud:
+    case Intrinsic::x86_avx2_pmaxu_b:
+    case Intrinsic::x86_avx2_pmaxu_w:
+    case Intrinsic::x86_avx2_pmaxu_d:
+      Opcode = X86ISD::UMAX;
+      break;
+    case Intrinsic::x86_sse2_pminu_b:
+    case Intrinsic::x86_sse41_pminuw:
+    case Intrinsic::x86_sse41_pminud:
+    case Intrinsic::x86_avx2_pminu_b:
+    case Intrinsic::x86_avx2_pminu_w:
+    case Intrinsic::x86_avx2_pminu_d:
+      Opcode = X86ISD::UMIN;
+      break;
+    case Intrinsic::x86_sse41_pmaxsb:
+    case Intrinsic::x86_sse2_pmaxs_w:
+    case Intrinsic::x86_sse41_pmaxsd:
+    case Intrinsic::x86_avx2_pmaxs_b:
+    case Intrinsic::x86_avx2_pmaxs_w:
+    case Intrinsic::x86_avx2_pmaxs_d:
+      Opcode = X86ISD::SMAX;
+      break;
+    case Intrinsic::x86_sse41_pminsb:
+    case Intrinsic::x86_sse2_pmins_w:
+    case Intrinsic::x86_sse41_pminsd:
+    case Intrinsic::x86_avx2_pmins_b:
+    case Intrinsic::x86_avx2_pmins_w:
+    case Intrinsic::x86_avx2_pmins_d:
+      Opcode = X86ISD::SMIN;
+      break;
+    }
+    return DAG.getNode(Opcode, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+
+  // SSE/SSE2/AVX floating point max/min intrinsics.
+  case Intrinsic::x86_sse_max_ps:
+  case Intrinsic::x86_sse2_max_pd:
+  case Intrinsic::x86_avx_max_ps_256:
+  case Intrinsic::x86_avx_max_pd_256:
+  case Intrinsic::x86_sse_min_ps:
+  case Intrinsic::x86_sse2_min_pd:
+  case Intrinsic::x86_avx_min_ps_256:
+  case Intrinsic::x86_avx_min_pd_256: {
+    unsigned Opcode;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_sse_max_ps:
+    case Intrinsic::x86_sse2_max_pd:
+    case Intrinsic::x86_avx_max_ps_256:
+    case Intrinsic::x86_avx_max_pd_256:
+      Opcode = X86ISD::FMAX;
+      break;
+    case Intrinsic::x86_sse_min_ps:
+    case Intrinsic::x86_sse2_min_pd:
+    case Intrinsic::x86_avx_min_ps_256:
+    case Intrinsic::x86_avx_min_pd_256:
+      Opcode = X86ISD::FMIN;
+      break;
+    }
+    return DAG.getNode(Opcode, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+
   // AVX2 variable shift intrinsics
   case Intrinsic::x86_avx2_psllv_d:
   case Intrinsic::x86_avx2_psllv_q:
@@ -10198,6 +10593,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(1));
 
+  case Intrinsic::x86_sse_sqrt_ps:
+  case Intrinsic::x86_sse2_sqrt_pd:
+  case Intrinsic::x86_avx_sqrt_ps_256:
+  case Intrinsic::x86_avx_sqrt_pd_256:
+    return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1));
+
   // ptest and testp intrinsics. The intrinsic these come from are designed to
   // return an integer value, not just an instruction so lower it to the ptest
   // or testp pattern and a setcc for the result.
@@ -10513,16 +10914,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
 
-  // RDRAND intrinsics.
+  // RDRAND/RDSEED intrinsics.
   case Intrinsic::x86_rdrand_16:
   case Intrinsic::x86_rdrand_32:
-  case Intrinsic::x86_rdrand_64: {
+  case Intrinsic::x86_rdrand_64:
+  case Intrinsic::x86_rdseed_16:
+  case Intrinsic::x86_rdseed_32:
+  case Intrinsic::x86_rdseed_64: {
+    unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
+                       IntNo == Intrinsic::x86_rdseed_32 ||
+                       IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
+                                                            X86ISD::RDRAND;
     // Emit the node with the right value type.
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
-    SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0));
+    SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
 
-    // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise
-    // return the value from Rand, which is always 0, casted to i32.
+    // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
+    // Otherwise return the value from Rand, which is always 0, casted to i32.
     SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
                       DAG.getConstant(1, Op->getValueType(1)),
                       DAG.getConstant(X86::COND_B, MVT::i32),
@@ -10535,6 +10943,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
                        SDValue(Result.getNode(), 2));
   }
+
+  // XTEST intrinsics.
+  case Intrinsic::x86_xtest: {
+    SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
+    SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                DAG.getConstant(X86::COND_NE, MVT::i8),
+                                InTrans);
+    SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
+    return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
+                       Ret, SDValue(InTrans.getNode(), 1));
+  }
   }
 }
 
@@ -10710,7 +11130,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
 
       // Check that ECX wasn't needed by an 'inreg' parameter.
       FunctionType *FTy = Func->getFunctionType();
-      const AttrListPtr &Attrs = Func->getAttributes();
+      const AttributeSet &Attrs = Func->getAttributes();
 
       if (!Attrs.isEmpty() && !Func->isVarArg()) {
         unsigned InRegCount = 0;
@@ -10718,7 +11138,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
 
         for (FunctionType::param_iterator I = FTy->param_begin(),
              E = FTy->param_end(); I != E; ++I, ++Idx)
-          if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
+          if (Attrs.hasAttribute(Idx, Attribute::InReg))
             // FIXME: should only count parameters that are lowered to integers.
             InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
 
@@ -10808,7 +11228,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
-
   MachineMemOperand *MMO =
    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
                            MachineMemOperand::MOStore, 2, 2);
@@ -10841,7 +11260,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                             DAG.getConstant(1, MVT::i16)),
                 DAG.getConstant(3, MVT::i16));
 
-
   return DAG.getNode((VT.getSizeInBits() < 16 ?
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
 }
@@ -10970,17 +11388,43 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
                         SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
 
   // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.is256BitVector() && !Subtarget->hasAVX2())
+  if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntArith(Op, DAG);
 
+  SDValue A = Op.getOperand(0);
+  SDValue B = Op.getOperand(1);
+
+  // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
+  if (VT == MVT::v4i32) {
+    assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
+           "Should not custom lower when pmuldq is available!");
+
+    // Extract the odd parts.
+    const int UnpackMask[] = { 1, -1, 3, -1 };
+    SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
+    SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
+
+    // Multiply the even parts.
+    SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
+    // Now multiply odd parts.
+    SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
+
+    Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
+    Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
+
+    // Merge the two vectors back together with a shuffle. This expands into 2
+    // shuffles.
+    const int ShufMask[] = { 0, 4, 2, 6 };
+    return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
+  }
+
   assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
          "Only know how to lower V2I64/V4I64 multiply");
 
-  DebugLoc dl = Op.getDebugLoc();
-
   //  Ahi = psrlqi(a, 32);
   //  Bhi = psrlqi(b, 32);
   //
@@ -10992,9 +11436,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   //  AhiBlo = psllqi(AhiBlo, 32);
   //  return AloBlo + AloBhi + AhiBlo;
 
-  SDValue A = Op.getOperand(0);
-  SDValue B = Op.getOperand(1);
-
   SDValue ShAmt = DAG.getConstant(32, MVT::i32);
 
   SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
@@ -11018,16 +11459,55 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
 }
 
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT EltTy = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue N0 = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Lower sdiv X, pow2-const.
+  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+  if (!C)
+    return SDValue();
+
+  APInt SplatValue, SplatUndef;
+  unsigned MinSplatBits;
+  bool HasAnyUndefs;
+  if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs))
+    return SDValue();
+
+  if ((SplatValue != 0) &&
+      (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
+    unsigned lg2 = SplatValue.countTrailingZeros();
+    // Splat the sign bit.
+    SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
+    SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+    // Add (N0 < 0) ? abs2 - 1 : 0;
+    SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
+    SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+    SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
+    SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
+    SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (SplatValue.isNonNegative())
+      return SRA;
+
+    SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
+    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
+    return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+  }
+  return SDValue();
+}
 
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+                                         const X86Subtarget *Subtarget) {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   SDValue R = Op.getOperand(0);
   SDValue Amt = Op.getOperand(1);
-  LLVMContext *Context = DAG.getContext();
-
-  if (!Subtarget->hasSSE2())
-    return SDValue();
 
   // Optimize shl/srl/sra with constant shift amount.
   if (isSplatVector(Amt.getNode())) {
@@ -11036,7 +11516,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
       uint64_t ShiftAmt = C->getZExtValue();
 
       if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
-          (Subtarget->hasAVX2() &&
+          (Subtarget->hasInt256() &&
            (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
         if (Op.getOpcode() == ISD::SHL)
           return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
@@ -11093,7 +11573,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
         llvm_unreachable("Unknown shift opcode.");
       }
 
-      if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+      if (Subtarget->hasInt256() && VT == MVT::v32i8) {
         if (Op.getOpcode() == ISD::SHL) {
           // Make a large shift.
           SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
@@ -11139,19 +11619,229 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+  if (!Subtarget->is64Bit() &&
+      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+      Amt.getOpcode() == ISD::BITCAST &&
+      Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+    Amt = Amt.getOperand(0);
+    unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+                     VT.getVectorNumElements();
+    unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+    uint64_t ShiftAmt = 0;
+    for (unsigned i = 0; i != Ratio; ++i) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+      if (C == 0)
+        return SDValue();
+      // 6 == Log2(64)
+      ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+    }
+    // Check remaining shift amounts.
+    for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+      uint64_t ShAmt = 0;
+      for (unsigned j = 0; j != Ratio; ++j) {
+        ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+        if (C == 0)
+          return SDValue();
+        // 6 == Log2(64)
+        ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+      }
+      if (ShAmt != ShiftAmt)
+        return SDValue();
+    }
+    switch (Op.getOpcode()) {
+    default:
+      llvm_unreachable("Unknown shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+                         DAG.getConstant(ShiftAmt, MVT::i32));
+    case ISD::SRL:
+      return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+                         DAG.getConstant(ShiftAmt, MVT::i32));
+    case ISD::SRA:
+      return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+                         DAG.getConstant(ShiftAmt, MVT::i32));
+    }
+  }
+
+  return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+                                        const X86Subtarget* Subtarget) {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue R = Op.getOperand(0);
+  SDValue Amt = Op.getOperand(1);
+
+  if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+      VT == MVT::v4i32 || VT == MVT::v8i16 ||
+      (Subtarget->hasInt256() &&
+       ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+        VT == MVT::v8i32 || VT == MVT::v16i16))) {
+    SDValue BaseShAmt;
+    EVT EltVT = VT.getVectorElementType();
+
+    if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElts = VT.getVectorNumElements();
+      unsigned i, j;
+      for (i = 0; i != NumElts; ++i) {
+        if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+          continue;
+        break;
+      }
+      for (j = i; j != NumElts; ++j) {
+        SDValue Arg = Amt.getOperand(j);
+        if (Arg.getOpcode() == ISD::UNDEF) continue;
+        if (Arg != Amt.getOperand(i))
+          break;
+      }
+      if (i != NumElts && j == NumElts)
+        BaseShAmt = Amt.getOperand(i);
+    } else {
+      if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+        Amt = Amt.getOperand(0);
+      if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+               cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+        SDValue InVec = Amt.getOperand(0);
+        if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+          unsigned NumElts = InVec.getValueType().getVectorNumElements();
+          unsigned i = 0;
+          for (; i != NumElts; ++i) {
+            SDValue Arg = InVec.getOperand(i);
+            if (Arg.getOpcode() == ISD::UNDEF) continue;
+            BaseShAmt = Arg;
+            break;
+          }
+        } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+           if (ConstantSDNode *C =
+               dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+             unsigned SplatIdx =
+               cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+             if (C->getZExtValue() == SplatIdx)
+               BaseShAmt = InVec.getOperand(1);
+           }
+        }
+        if (BaseShAmt.getNode() == 0)
+          BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+                                  DAG.getIntPtrConstant(0));
+      }
+    }
+
+    if (BaseShAmt.getNode()) {
+      if (EltVT.bitsGT(MVT::i32))
+        BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+      else if (EltVT.bitsLT(MVT::i32))
+        BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+      switch (Op.getOpcode()) {
+      default:
+        llvm_unreachable("Unknown shift opcode!");
+      case ISD::SHL:
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: return SDValue();
+        case MVT::v2i64:
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v4i64:
+        case MVT::v8i32:
+        case MVT::v16i16:
+          return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+        }
+      case ISD::SRA:
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: return SDValue();
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v8i32:
+        case MVT::v16i16:
+          return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+        }
+      case ISD::SRL:
+        switch (VT.getSimpleVT().SimpleTy) {
+        default: return SDValue();
+        case MVT::v2i64:
+        case MVT::v4i32:
+        case MVT::v8i16:
+        case MVT::v4i64:
+        case MVT::v8i32:
+        case MVT::v16i16:
+          return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+        }
+      }
+    }
+  }
+
+  // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+  if (!Subtarget->is64Bit() &&
+      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+      Amt.getOpcode() == ISD::BITCAST &&
+      Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+    Amt = Amt.getOperand(0);
+    unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+                     VT.getVectorNumElements();
+    std::vector<SDValue> Vals(Ratio);
+    for (unsigned i = 0; i != Ratio; ++i)
+      Vals[i] = Amt.getOperand(i);
+    for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+      for (unsigned j = 0; j != Ratio; ++j)
+        if (Vals[j] != Amt.getOperand(i + j))
+          return SDValue();
+    }
+    switch (Op.getOpcode()) {
+    default:
+      llvm_unreachable("Unknown shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+    case ISD::SRL:
+      return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+    case ISD::SRA:
+      return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue R = Op.getOperand(0);
+  SDValue Amt = Op.getOperand(1);
+  SDValue V;
+
+  if (!Subtarget->hasSSE2())
+    return SDValue();
+
+  V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+  if (V.getNode())
+    return V;
+
+  V = LowerScalarVariableShift(Op, DAG, Subtarget);
+  if (V.getNode())
+      return V;
+
+  // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+  if (Subtarget->hasInt256()) {
+    if (Op.getOpcode() == ISD::SRL &&
+        (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+         VT == MVT::v4i64 || VT == MVT::v8i32))
+      return Op;
+    if (Op.getOpcode() == ISD::SHL &&
+        (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+         VT == MVT::v4i64 || VT == MVT::v8i32))
+      return Op;
+    if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+      return Op;
+  }
+
   // Lower SHL with variable shift amount.
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
-    Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
-                     DAG.getConstant(23, MVT::i32));
+    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
 
-    const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
-    Constant *C = ConstantDataVector::get(*Context, CV);
-    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
-    SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                                 MachinePointerInfo::getConstantPool(),
-                                 false, false, false, 16);
-
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
     Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
     Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
     return DAG.getNode(ISD::MUL, dl, VT, Op, R);
@@ -11160,8 +11850,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
     assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
 
     // a = a << 5;
-    Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
-                     DAG.getConstant(5, MVT::i32));
+    Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
     Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
 
     // Turn 'a' into a mask suitable for VSELECT
@@ -11336,9 +12025,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
     default: return SDValue();
     case MVT::v8i32:
     case MVT::v16i16:
-      if (!Subtarget->hasAVX())
+      if (!Subtarget->hasFp256())
         return SDValue();
-      if (!Subtarget->hasAVX2()) {
+      if (!Subtarget->hasInt256()) {
         // needs to be split
         unsigned NumElems = VT.getVectorNumElements();
 
@@ -11364,14 +12053,28 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
       // fall through
     case MVT::v4i32:
     case MVT::v8i16: {
-      SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
-                                         Op.getOperand(0), ShAmt, DAG);
+      // (sext (vzext x)) -> (vsext x)
+      SDValue Op0 = Op.getOperand(0);
+      SDValue Op00 = Op0.getOperand(0);
+      SDValue Tmp1;
+      // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+      if (Op0.getOpcode() == ISD::BITCAST &&
+          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+        Tmp1 = LowerVectorIntExtend(Op00, DAG);
+      if (Tmp1.getNode()) {
+        SDValue Tmp1Op0 = Tmp1.getOperand(0);
+        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+               "This optimization is invalid without a VZEXT.");
+        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+      }
+
+      // If the above didn't work, then just use Shift-Left + Shift-Right.
+      Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
       return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
     }
   }
 }
 
-
 static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
                               SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
@@ -11456,7 +12159,6 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
 }
 
-
 static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
                              SelectionDAG &DAG) {
   EVT T = Op.getValueType();
@@ -11595,6 +12297,43 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
                      Op.getOperand(1), Op.getOperand(2));
 }
 
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+  // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+  // which returns the values in two XMM registers.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Arg = Op.getOperand(0);
+  EVT ArgVT = Arg.getValueType();
+  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+  ArgListTy Args;
+  ArgListEntry Entry;
+
+  Entry.Node = Arg;
+  Entry.Ty = ArgTy;
+  Entry.isSExt = false;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+
+  // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+  // the small struct {f32, f32} is returned in (eax, edx). For f64,
+  // the results are returned via SRet in memory.
+  const char *LibcallName = (ArgVT == MVT::f64)
+    ? "__sincos_stret" : "__sincosf_stret";
+  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+  TargetLowering::
+    CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+                         false, false, false, false, 0,
+                         CallingConv::C, /*isTaillCall=*/false,
+                         /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+                         Callee, Args, DAG, dl);
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+  return CallResult.first;
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11624,11 +12363,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SRL_PARTS:          return LowerShiftParts(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
   case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
-  case ISD::TRUNCATE:           return lowerTRUNCATE(Op, DAG);
-  case ISD::ZERO_EXTEND:        return lowerZERO_EXTEND(Op, DAG);
+  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
+  case ISD::ZERO_EXTEND:        return LowerZERO_EXTEND(Op, DAG);
+  case ISD::SIGN_EXTEND:        return LowerSIGN_EXTEND(Op, DAG);
+  case ISD::ANY_EXTEND:         return LowerANY_EXTEND(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
-  case ISD::FP_EXTEND:          return lowerFP_EXTEND(Op, DAG);
+  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
   case ISD::FABS:               return LowerFABS(Op, DAG);
   case ISD::FNEG:               return LowerFNEG(Op, DAG);
   case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
@@ -11674,6 +12415,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ADD:                return LowerADD(Op, DAG);
   case ISD::SUB:                return LowerSUB(Op, DAG);
+  case ISD::SDIV:               return LowerSDIV(Op, DAG);
+  case ISD::FSINCOS:            return LowerFSINCOS(Op, DAG);
   }
 }
 
@@ -11727,6 +12470,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
   DebugLoc dl = N->getDebugLoc();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Do not know how to custom type legalize this operation!");
@@ -11760,7 +12504,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::UINT_TO_FP: {
-    if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+    assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+    if (N->getOperand(0).getValueType() != MVT::v2i32 ||
         N->getValueType(0) != MVT::v2f32)
       return;
     SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
@@ -11776,6 +12521,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_ROUND: {
+    if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
+        return;
     SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
     Results.push_back(V);
     return;
@@ -11942,13 +12689,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGN:              return "X86ISD::PSIGN";
   case X86ISD::BLENDV:             return "X86ISD::BLENDV";
-  case X86ISD::BLENDPW:            return "X86ISD::BLENDPW";
-  case X86ISD::BLENDPS:            return "X86ISD::BLENDPS";
-  case X86ISD::BLENDPD:            return "X86ISD::BLENDPD";
+  case X86ISD::BLENDI:             return "X86ISD::BLENDI";
+  case X86ISD::SUBUS:              return "X86ISD::SUBUS";
   case X86ISD::HADD:               return "X86ISD::HADD";
   case X86ISD::HSUB:               return "X86ISD::HSUB";
   case X86ISD::FHADD:              return "X86ISD::FHADD";
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
+  case X86ISD::UMAX:               return "X86ISD::UMAX";
+  case X86ISD::UMIN:               return "X86ISD::UMIN";
+  case X86ISD::SMAX:               return "X86ISD::SMAX";
+  case X86ISD::SMIN:               return "X86ISD::SMIN";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
   case X86ISD::FMAXC:              return "X86ISD::FMAXC";
@@ -12001,14 +12751,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::OR:                 return "X86ISD::OR";
   case X86ISD::XOR:                return "X86ISD::XOR";
   case X86ISD::AND:                return "X86ISD::AND";
-  case X86ISD::ANDN:               return "X86ISD::ANDN";
   case X86ISD::BLSI:               return "X86ISD::BLSI";
   case X86ISD::BLSMSK:             return "X86ISD::BLSMSK";
   case X86ISD::BLSR:               return "X86ISD::BLSR";
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
   case X86ISD::PTEST:              return "X86ISD::PTEST";
   case X86ISD::TESTP:              return "X86ISD::TESTP";
-  case X86ISD::PALIGN:             return "X86ISD::PALIGN";
+  case X86ISD::PALIGNR:            return "X86ISD::PALIGNR";
   case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
   case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
   case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
@@ -12039,6 +12788,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
   case X86ISD::SAHF:               return "X86ISD::SAHF";
   case X86ISD::RDRAND:             return "X86ISD::RDRAND";
+  case X86ISD::RDSEED:             return "X86ISD::RDSEED";
   case X86ISD::FMADD:              return "X86ISD::FMADD";
   case X86ISD::FMSUB:              return "X86ISD::FMSUB";
   case X86ISD::FNMADD:             return "X86ISD::FNMADD";
@@ -12047,6 +12797,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FMSUBADD:           return "X86ISD::FMSUBADD";
   case X86ISD::PCMPESTRI:          return "X86ISD::PCMPESTRI";
   case X86ISD::PCMPISTRI:          return "X86ISD::PCMPISTRI";
+  case X86ISD::XTEST:              return "X86ISD::XTEST";
   }
 }
 
@@ -12104,24 +12855,21 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
-
 bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  if (NumBits1 <= NumBits2)
-    return false;
-  return true;
+  return NumBits1 > NumBits2;
 }
 
 bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
-  return Imm == (int32_t)Imm;
+  return isInt<32>(Imm);
 }
 
 bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
   // Can also use sub to handle negated immediates.
-  return Imm == (int32_t)Imm;
+  return isInt<32>(Imm);
 }
 
 bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
@@ -12129,9 +12877,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
     return false;
   unsigned NumBits1 = VT1.getSizeInBits();
   unsigned NumBits2 = VT2.getSizeInBits();
-  if (NumBits1 <= NumBits2)
-    return false;
-  return true;
+  return NumBits1 > NumBits2;
 }
 
 bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -12144,6 +12890,30 @@ bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
 }
 
+bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  EVT VT1 = Val.getValueType();
+  if (isZExtFree(VT1, VT2))
+    return true;
+
+  if (Val.getOpcode() != ISD::LOAD)
+    return false;
+
+  if (!VT1.isSimple() || !VT1.isInteger() ||
+      !VT2.isSimple() || !VT2.isInteger())
+    return false;
+
+  switch (VT1.getSimpleVT().SimpleTy) {
+  default: break;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    // X86 has 8, 16, and 32-bit zero-extending loads.
+    return true;
+  }
+
+  return false;
+}
+
 bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
   // i16 instructions are longer (0x66 prefix) and potentially slower.
   return !(VT1 == MVT::i32 && VT2 == MVT::i16);
@@ -12164,15 +12934,15 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   return (VT.getVectorNumElements() == 2 ||
           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
           isMOVLMask(M, VT) ||
-          isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
+          isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
           isPSHUFDMask(M, VT) ||
-          isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) ||
-          isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) ||
+          isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
+          isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
           isPALIGNRMask(M, VT, Subtarget) ||
-          isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
-          isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
-          isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
-          isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2()));
+          isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
+          isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
+          isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
+          isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
 }
 
 bool
@@ -12185,8 +12955,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
   if (NumElts == 4 && VT.is128BitVector()) {
     return (isMOVLMask(Mask, VT)  ||
             isCommutedMOVLMask(Mask, VT, true) ||
-            isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
-            isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true));
+            isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
+            isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
   }
   return false;
 }
@@ -12379,13 +13149,16 @@ static unsigned getPseudoCMOVOpc(EVT VT) {
 // to
 //
 //    ...
-//    EAX = LOAD MI.addr
+//    t1 = LOAD MI.addr
 // loop:
-//    t1 = OP MI.val, EAX
-//    LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+//    t4 = phi(t1, t3 / loop)
+//    t2 = OP MI.val, t4
+//    EAX = t4
+//    LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
+//    t3 = EAX
 //    JNE loop
 // sink:
-//    dst = EAX
+//    dst = t3
 //    ...
 MachineBasicBlock *
 X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
@@ -12400,7 +13173,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
   MachineFunction::iterator I = MBB;
   ++I;
 
-  assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+  assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
          "Unexpected number of operands");
 
   assert(MI->hasOneMemOperand() &&
@@ -12422,7 +13195,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
 
   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
   MVT::SimpleValueType VT = *RC->vt_begin();
-  unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
+  unsigned t1 = MRI.createVirtualRegister(RC);
+  unsigned t2 = MRI.createVirtualRegister(RC);
+  unsigned t3 = MRI.createVirtualRegister(RC);
+  unsigned t4 = MRI.createVirtualRegister(RC);
+  unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
 
   unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
   unsigned LOADOpc = getLoadOpcode(VT);
@@ -12430,12 +13207,16 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
   // For the atomic load-arith operator, we generate
   //
   //  thisMBB:
-  //    EAX = LOAD [MI.addr]
+  //    t1 = LOAD [MI.addr]
   //  mainMBB:
+  //    t4 = phi(t1 / thisMBB, t3 / mainMBB)
   //    t1 = OP MI.val, EAX
+  //    EAX = t4
   //    LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+  //    t3 = EAX
   //    JNE mainMBB
   //  sinkMBB:
+  //    dst = t3
 
   MachineBasicBlock *thisMBB = MBB;
   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@@ -12451,23 +13232,34 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
 
   // thisMBB:
-  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
-  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+    if (NewMO.isReg())
+      NewMO.setIsKill(false);
+    MIB.addOperand(NewMO);
+  }
+  for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+    unsigned flags = (*MMOI)->getFlags();
+    flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+    MachineMemOperand *MMO =
+      MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+                               (*MMOI)->getSize(),
+                               (*MMOI)->getBaseAlignment(),
+                               (*MMOI)->getTBAAInfo(),
+                               (*MMOI)->getRanges());
+    MIB.addMemOperand(MMO);
+  }
 
   thisMBB->addSuccessor(mainMBB);
 
   // mainMBB:
   MachineBasicBlock *origMainMBB = mainMBB;
-  mainMBB->addLiveIn(AccPhyReg);
 
-  // Copy AccPhyReg as it is used more than once.
-  unsigned AccReg = MRI.createVirtualRegister(RC);
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
-    .addReg(AccPhyReg);
+  // Add a PHI.
+  MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
+                        .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
 
-  unsigned t1 = MRI.createVirtualRegister(RC);
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default:
@@ -12485,20 +13277,20 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
   case X86::ATOMXOR32:
   case X86::ATOMXOR64: {
     unsigned ARITHOpc = getNonAtomicOpcode(Opc);
-    BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
-      .addReg(AccReg);
+    BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
+      .addReg(t4);
     break;
   }
   case X86::ATOMNAND8:
   case X86::ATOMNAND16:
   case X86::ATOMNAND32:
   case X86::ATOMNAND64: {
-    unsigned t2 = MRI.createVirtualRegister(RC);
+    unsigned Tmp = MRI.createVirtualRegister(RC);
     unsigned NOTOpc;
     unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
-    BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
-      .addReg(AccReg);
-    BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+    BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
+      .addReg(t4);
+    BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
     break;
   }
   case X86::ATOMMAX8:
@@ -12522,20 +13314,22 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
 
     BuildMI(mainMBB, DL, TII->get(CMPOpc))
       .addReg(SrcReg)
-      .addReg(AccReg);
+      .addReg(t4);
 
     if (Subtarget->hasCMov()) {
       if (VT != MVT::i8) {
         // Native support
-        BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+        BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
           .addReg(SrcReg)
-          .addReg(AccReg);
+          .addReg(t4);
       } else {
         // Promote i8 to i32 to use CMOV32
-        const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32);
+        const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+        const TargetRegisterClass *RC32 =
+          TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
         unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
         unsigned AccReg32 = MRI.createVirtualRegister(RC32);
-        unsigned t2 = MRI.createVirtualRegister(RC32);
+        unsigned Tmp = MRI.createVirtualRegister(RC32);
 
         unsigned Undef = MRI.createVirtualRegister(RC32);
         BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
@@ -12546,15 +13340,15 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
           .addImm(X86::sub_8bit);
         BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
           .addReg(Undef)
-          .addReg(AccReg)
+          .addReg(t4)
           .addImm(X86::sub_8bit);
 
-        BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+        BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
           .addReg(SrcReg32)
           .addReg(AccReg32);
 
-        BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1)
-          .addReg(t2, 0, X86::sub_8bit);
+        BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
+          .addReg(Tmp, 0, X86::sub_8bit);
       }
     } else {
       // Use pseudo select and lower them.
@@ -12563,36 +13357,47 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
       unsigned SelOpc = getPseudoCMOVOpc(VT);
       X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
       assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
-      MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
-              .addReg(SrcReg).addReg(AccReg)
+      MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
+              .addReg(SrcReg).addReg(t4)
               .addImm(CC);
       mainMBB = EmitLoweredSelect(MIB, mainMBB);
+      // Replace the original PHI node as mainMBB is changed after CMOV
+      // lowering.
+      BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
+        .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
+      Phi->eraseFromParent();
     }
     break;
   }
   }
 
-  // Copy AccPhyReg back from virtual register.
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
-    .addReg(AccReg);
+  // Copy PhyReg back from virtual register.
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
+    .addReg(t4);
 
   MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
-  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
-  MIB.addReg(t1);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+    if (NewMO.isReg())
+      NewMO.setIsKill(false);
+    MIB.addOperand(NewMO);
+  }
+  MIB.addReg(t2);
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
+  // Copy PhyReg back to virtual register.
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
+    .addReg(PhyReg);
+
   BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
 
   mainMBB->addSuccessor(origMainMBB);
   mainMBB->addSuccessor(sinkMBB);
 
   // sinkMBB:
-  sinkMBB->addLiveIn(AccPhyReg);
-
   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
           TII->get(TargetOpcode::COPY), DstReg)
-    .addReg(AccPhyReg);
+    .addReg(t3);
 
   MI->eraseFromParent();
   return sinkMBB;
@@ -12609,15 +13414,24 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
 // to
 //
 //    ...
-//    EAX = LOAD [MI.addr + 0]
-//    EDX = LOAD [MI.addr + 4]
+//    t1L = LOAD [MI.addr + 0]
+//    t1H = LOAD [MI.addr + 4]
 // loop:
-//    EBX = OP MI.val.lo, EAX
-//    ECX = OP MI.val.hi, EDX
+//    t4L = phi(t1L, t3L / loop)
+//    t4H = phi(t1H, t3H / loop)
+//    t2L = OP MI.val.lo, t4L
+//    t2H = OP MI.val.hi, t4H
+//    EAX = t4L
+//    EDX = t4H
+//    EBX = t2L
+//    ECX = t2H
 //    LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+//    t3L = EAX
+//    t3H = EDX
 //    JNE loop
 // sink:
-//    dst = EDX:EAX
+//    dstL = t3L
+//    dstH = t3H
 //    ...
 MachineBasicBlock *
 X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
@@ -12632,7 +13446,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
   MachineFunction::iterator I = MBB;
   ++I;
 
-  assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+  assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
          "Unexpected number of operands");
 
   assert(MI->hasOneMemOperand() &&
@@ -12658,20 +13472,37 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
   const TargetRegisterClass *RC = &X86::GR32RegClass;
   const TargetRegisterClass *RC8 = &X86::GR8RegClass;
 
+  unsigned t1L = MRI.createVirtualRegister(RC);
+  unsigned t1H = MRI.createVirtualRegister(RC);
+  unsigned t2L = MRI.createVirtualRegister(RC);
+  unsigned t2H = MRI.createVirtualRegister(RC);
+  unsigned t3L = MRI.createVirtualRegister(RC);
+  unsigned t3H = MRI.createVirtualRegister(RC);
+  unsigned t4L = MRI.createVirtualRegister(RC);
+  unsigned t4H = MRI.createVirtualRegister(RC);
+
   unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
   unsigned LOADOpc = X86::MOV32rm;
 
   // For the atomic load-arith operator, we generate
   //
   //  thisMBB:
-  //    EAX = LOAD [MI.addr + 0]
-  //    EDX = LOAD [MI.addr + 4]
+  //    t1L = LOAD [MI.addr + 0]
+  //    t1H = LOAD [MI.addr + 4]
   //  mainMBB:
-  //    EBX = OP MI.vallo, EAX
-  //    ECX = OP MI.valhi, EDX
+  //    t4L = phi(t1L / thisMBB, t3L / mainMBB)
+  //    t4H = phi(t1H / thisMBB, t3H / mainMBB)
+  //    t2L = OP MI.val.lo, t4L
+  //    t2H = OP MI.val.hi, t4H
+  //    EBX = t2L
+  //    ECX = t2H
   //    LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
-  //    JNE mainMBB
+  //    t3L = EAX
+  //    t3H = EDX
+  //    JNE loop
   //  sinkMBB:
+  //    dstL = t3L
+  //    dstH = t3H
 
   MachineBasicBlock *thisMBB = MBB;
   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@@ -12688,35 +13519,50 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
 
   // thisMBB:
   // Lo
-  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
-  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+    if (NewMO.isReg())
+      NewMO.setIsKill(false);
+    MIB.addOperand(NewMO);
+  }
+  for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
+    unsigned flags = (*MMOI)->getFlags();
+    flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
+    MachineMemOperand *MMO =
+      MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
+                               (*MMOI)->getSize(),
+                               (*MMOI)->getBaseAlignment(),
+                               (*MMOI)->getTBAAInfo(),
+                               (*MMOI)->getRanges());
+    MIB.addMemOperand(MMO);
+  };
+  MachineInstr *LowMI = MIB;
+
   // Hi
-  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+  MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
   for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
-    if (i == X86::AddrDisp)
+    if (i == X86::AddrDisp) {
       MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
-    else
-      MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+    } else {
+      MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+      if (NewMO.isReg())
+        NewMO.setIsKill(false);
+      MIB.addOperand(NewMO);
+    }
   }
-  MIB.setMemRefs(MMOBegin, MMOEnd);
+  MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
 
   thisMBB->addSuccessor(mainMBB);
 
   // mainMBB:
   MachineBasicBlock *origMainMBB = mainMBB;
-  mainMBB->addLiveIn(X86::EAX);
-  mainMBB->addLiveIn(X86::EDX);
-
-  // Copy EDX:EAX as they are used more than once.
-  unsigned LoReg = MRI.createVirtualRegister(RC);
-  unsigned HiReg = MRI.createVirtualRegister(RC);
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
 
-  unsigned t1L = MRI.createVirtualRegister(RC);
-  unsigned t1H = MRI.createVirtualRegister(RC);
+  // Add PHIs.
+  MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
+                        .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+  MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
+                        .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
 
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
@@ -12729,19 +13575,23 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
   case X86::ATOMSUB6432: {
     unsigned HiOpc;
     unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
-    BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg);
-    BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg);
+    BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
+      .addReg(SrcLoReg);
+    BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
+      .addReg(SrcHiReg);
     break;
   }
   case X86::ATOMNAND6432: {
     unsigned HiOpc, NOTOpc;
     unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
-    unsigned t2L = MRI.createVirtualRegister(RC);
-    unsigned t2H = MRI.createVirtualRegister(RC);
-    BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
-    BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
-    BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
-    BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+    unsigned TmpL = MRI.createVirtualRegister(RC);
+    unsigned TmpH = MRI.createVirtualRegister(RC);
+    BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
+      .addReg(t4L);
+    BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
+      .addReg(t4H);
+    BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
+    BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
     break;
   }
   case X86::ATOMMAX6432:
@@ -12757,12 +13607,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
     unsigned cc = MRI.createVirtualRegister(RC);
     // cl := cmp src_lo, lo
     BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
-      .addReg(SrcLoReg).addReg(LoReg);
+      .addReg(SrcLoReg).addReg(t4L);
     BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
     BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
     // ch := cmp src_hi, hi
     BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
-      .addReg(SrcHiReg).addReg(HiReg);
+      .addReg(SrcHiReg).addReg(t4H);
     BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
     BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
     // cc := if (src_hi == hi) ? cl : ch;
@@ -12777,58 +13627,74 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
     }
     BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
     if (Subtarget->hasCMov()) {
-      BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
-        .addReg(SrcLoReg).addReg(LoReg);
-      BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
-        .addReg(SrcHiReg).addReg(HiReg);
+      BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
+        .addReg(SrcLoReg).addReg(t4L);
+      BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
+        .addReg(SrcHiReg).addReg(t4H);
     } else {
-      MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
-              .addReg(SrcLoReg).addReg(LoReg)
+      MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
+              .addReg(SrcLoReg).addReg(t4L)
               .addImm(X86::COND_NE);
       mainMBB = EmitLoweredSelect(MIB, mainMBB);
-      MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
-              .addReg(SrcHiReg).addReg(HiReg)
+      // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
+      // 2nd CMOV lowering.
+      mainMBB->addLiveIn(X86::EFLAGS);
+      MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
+              .addReg(SrcHiReg).addReg(t4H)
               .addImm(X86::COND_NE);
       mainMBB = EmitLoweredSelect(MIB, mainMBB);
+      // Replace the original PHI node as mainMBB is changed after CMOV
+      // lowering.
+      BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
+        .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
+      BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
+        .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
+      PhiL->eraseFromParent();
+      PhiH->eraseFromParent();
     }
     break;
   }
   case X86::ATOMSWAP6432: {
     unsigned HiOpc;
     unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
-    BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
-    BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+    BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
+    BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
     break;
   }
   }
 
   // Copy EDX:EAX back from HiReg:LoReg
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
   // Copy ECX:EBX from t1H:t1L
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
-  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
 
   MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
-  for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
-    MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+  for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+    MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
+    if (NewMO.isReg())
+      NewMO.setIsKill(false);
+    MIB.addOperand(NewMO);
+  }
   MIB.setMemRefs(MMOBegin, MMOEnd);
 
+  // Copy EDX:EAX back to t3H:t3L
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
+  BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
+
   BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
 
   mainMBB->addSuccessor(origMainMBB);
   mainMBB->addSuccessor(sinkMBB);
 
   // sinkMBB:
-  sinkMBB->addLiveIn(X86::EAX);
-  sinkMBB->addLiveIn(X86::EDX);
-
   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
           TII->get(TargetOpcode::COPY), DstLoReg)
-    .addReg(X86::EAX);
+    .addReg(t3L);
   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
           TII->get(TargetOpcode::COPY), DstHiReg)
-    .addReg(X86::EDX);
+    .addReg(t3H);
 
   MI->eraseFromParent();
   return sinkMBB;
@@ -13239,7 +14105,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
     MBB->addSuccessor(EndMBB);
   }
 
-  unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+  unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
   // In the XMM save block, save all the XMM argument registers.
   for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -14203,6 +15069,18 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
                                   Ld->getAlignment(),
                                   false/*isVolatile*/, true/*ReadMem*/,
                                   false/*WriteMem*/);
+
+        // Make sure the newly-created LOAD is in the same position as Ld in
+        // terms of dependency. We create a TokenFactor for Ld and ResNode,
+        // and update uses of Ld's output chain to use the TokenFactor.
+        if (Ld->hasAnyUseOfValue(1)) {
+          SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
+          DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+          DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
+                                 SDValue(ResNode.getNode(), 1));
+        }
+
         return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
       }
     }
@@ -14248,7 +15126,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
-  if (Subtarget->hasAVX() && VT.is256BitVector() &&
+  if (Subtarget->hasFp256() && VT.is256BitVector() &&
       N->getOpcode() == ISD::VECTOR_SHUFFLE)
     return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
 
@@ -14266,127 +15144,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
 }
 
-
 /// PerformTruncateCombine - Converts truncate operation to
 /// a sequence of vector shuffle operations.
 /// It is possible when we truncate 256-bit vector to 128-bit vector
 static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
                                       const X86Subtarget *Subtarget)  {
-  if (!DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  if (!Subtarget->hasAVX())
-    return SDValue();
-
-  EVT VT = N->getValueType(0);
-  SDValue Op = N->getOperand(0);
-  EVT OpVT = Op.getValueType();
-  DebugLoc dl = N->getDebugLoc();
-
-  if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
-
-    if (Subtarget->hasAVX2()) {
-      // AVX2: v4i64 -> v4i32
-
-      // VPERMD
-      static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
-
-      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op);
-      Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32),
-                                ShufMask);
-
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
-                         DAG.getIntPtrConstant(0));
-    }
-
-    // AVX: v4i64 -> v4i32
-    SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
-                               DAG.getIntPtrConstant(0));
-
-    SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
-                               DAG.getIntPtrConstant(2));
-
-    OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
-    OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
-
-    // PSHUFD
-    static const int ShufMask1[] = {0, 2, 0, 0};
-
-    SDValue Undef = DAG.getUNDEF(VT);
-    OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
-    OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
-
-    // MOVLHPS
-    static const int ShufMask2[] = {0, 1, 4, 5};
-
-    return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
-  }
-
-  if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
-
-    if (Subtarget->hasAVX2()) {
-      // AVX2: v8i32 -> v8i16
-
-      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
-
-      // PSHUFB
-      SmallVector<SDValue,32> pshufbMask;
-      for (unsigned i = 0; i < 2; ++i) {
-        pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
-        pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
-        for (unsigned j = 0; j < 8; ++j)
-          pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
-      }
-      SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8,
-                               &pshufbMask[0], 32);
-      Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV);
-
-      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op);
-
-      static const int ShufMask[] = {0,  2,  -1,  -1};
-      Op = DAG.getVectorShuffle(MVT::v4i64, dl,  Op, DAG.getUNDEF(MVT::v4i64),
-                                &ShufMask[0]);
-
-      Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
-                       DAG.getIntPtrConstant(0));
-
-      return DAG.getNode(ISD::BITCAST, dl, VT, Op);
-    }
-
-    SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
-                               DAG.getIntPtrConstant(0));
-
-    SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
-                               DAG.getIntPtrConstant(4));
-
-    OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
-    OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
-
-    // PSHUFB
-    static const int ShufMask1[] = {0,  1,  4,  5,  8,  9, 12, 13,
-                                   -1, -1, -1, -1, -1, -1, -1, -1};
-
-    SDValue Undef = DAG.getUNDEF(MVT::v16i8);
-    OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
-    OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
-
-    OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
-    OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
-
-    // MOVLHPS
-    static const int ShufMask2[] = {0, 1, 4, 5};
-
-    SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
-  }
-
   return SDValue();
 }
 
@@ -14581,6 +15344,76 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
+static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
+                                   SDValue RHS, SelectionDAG &DAG,
+                                   const X86Subtarget *Subtarget) {
+  if (!VT.isVector())
+    return 0;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return 0;
+  case MVT::v32i8:
+  case MVT::v16i16:
+  case MVT::v8i32:
+    if (!Subtarget->hasAVX2())
+      return 0;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+    if (!Subtarget->hasSSE2())
+      return 0;
+  }
+
+  // SSE2 has only a small subset of the operations.
+  bool hasUnsigned = Subtarget->hasSSE41() ||
+                     (Subtarget->hasSSE2() && VT == MVT::v16i8);
+  bool hasSigned = Subtarget->hasSSE41() ||
+                   (Subtarget->hasSSE2() && VT == MVT::v8i16);
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+  // Check for x CC y ? x : y.
+  if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+      DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+    switch (CC) {
+    default: break;
+    case ISD::SETULT:
+    case ISD::SETULE:
+      return hasUnsigned ? X86ISD::UMIN : 0;
+    case ISD::SETUGT:
+    case ISD::SETUGE:
+      return hasUnsigned ? X86ISD::UMAX : 0;
+    case ISD::SETLT:
+    case ISD::SETLE:
+      return hasSigned ? X86ISD::SMIN : 0;
+    case ISD::SETGT:
+    case ISD::SETGE:
+      return hasSigned ? X86ISD::SMAX : 0;
+    }
+  // Check for x CC y ? y : x -- a min/max with reversed arms.
+  } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+             DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+    switch (CC) {
+    default: break;
+    case ISD::SETULT:
+    case ISD::SETULE:
+      return hasUnsigned ? X86ISD::UMAX : 0;
+    case ISD::SETUGT:
+    case ISD::SETUGE:
+      return hasUnsigned ? X86ISD::UMIN : 0;
+    case ISD::SETLT:
+    case ISD::SETLE:
+      return hasSigned ? X86ISD::SMAX : 0;
+    case ISD::SETGT:
+    case ISD::SETGE:
+      return hasSigned ? X86ISD::SMIN : 0;
+    }
+  }
+
+  return 0;
+}
+
 /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
 /// nodes.
 static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
@@ -14861,6 +15694,67 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // Match VSELECTs into subs with unsigned saturation.
+  if (!DCI.isBeforeLegalize() &&
+      N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+      // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
+      ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
+       (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+    // Check if one of the arms of the VSELECT is a zero vector. If it's on the
+    // left side invert the predicate to simplify logic below.
+    SDValue Other;
+    if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
+      Other = RHS;
+      CC = ISD::getSetCCInverse(CC, true);
+    } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
+      Other = LHS;
+    }
+
+    if (Other.getNode() && Other->getNumOperands() == 2 &&
+        DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
+      SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
+      SDValue CondRHS = Cond->getOperand(1);
+
+      // Look for a general sub with unsigned saturation first.
+      // x >= y ? x-y : 0 --> subus x, y
+      // x >  y ? x-y : 0 --> subus x, y
+      if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
+          Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
+        return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+
+      // If the RHS is a constant we have to reverse the const canonicalization.
+      // x > C-1 ? x+-C : 0 --> subus x, C
+      if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+          isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
+        APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+        if (CondRHS.getConstantOperandVal(0) == -A-1)
+          return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
+                             DAG.getConstant(-A, VT));
+      }
+
+      // Another special case: If C was a sign bit, the sub has been
+      // canonicalized into a xor.
+      // FIXME: Would it be better to use ComputeMaskedBits to determine whether
+      //        it's safe to decanonicalize the xor?
+      // x s< 0 ? x^C : 0 --> subus x, C
+      if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+          ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+          isSplatVector(OpRHS.getNode())) {
+        APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
+        if (A.isSignBit())
+          return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+      }
+    }
+  }
+
+  // Try to match a min/max vector operation.
+  if (!DCI.isBeforeLegalize() &&
+      N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
+    if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
+      return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+
   // If we know that this node is legal then we know that it is going to be
   // matched by one of the SSE/AVX BLEND instructions. These instructions only
   // depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -14935,8 +15829,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
     // Quit if the constant is neither 0 or 1.
     return SDValue();
 
-  // Skip 'zext' node.
-  if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+  // Skip 'zext' or 'trunc' node.
+  if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+      SetCC.getOpcode() == ISD::TRUNCATE)
     SetCC = SetCC.getOperand(0);
 
   switch (SetCC.getOpcode()) {
@@ -14955,9 +15850,15 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
       return SDValue();
     // Quit if false value is not a constant.
     if (!FVal) {
-      // A special case for rdrand, where 0 is set if false cond is found.
       SDValue Op = SetCC.getOperand(0);
-      if (Op.getOpcode() != X86ISD::RDRAND)
+      // Skip 'zext' or 'trunc' node.
+      if (Op.getOpcode() == ISD::ZERO_EXTEND ||
+          Op.getOpcode() == ISD::TRUNCATE)
+        Op = Op.getOperand(0);
+      // A special case for rdrand/rdseed, where 0 is set if false cond is
+      // found.
+      if ((Op.getOpcode() != X86ISD::RDRAND &&
+           Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
         return SDValue();
     }
     // Quit if false value is not the constant 0 or 1.
@@ -15137,7 +16038,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
     ConstantSDNode *CmpAgainst = 0;
     if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
         (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
-        dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+        !isa<ConstantSDNode>(Cond.getOperand(0))) {
 
       if (CC == X86::COND_NE &&
           CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
@@ -15158,7 +16059,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-
 /// PerformMulCombine - Optimize a single multiply with constant into two
 /// in order to implement it with two cheaper instructions, e.g.
 /// LEA + SHL, LEA + LEA.
@@ -15247,7 +16147,6 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
-
   // Hardware support for vector shifts is sparse which makes us scalarize the
   // vector operations in many cases. Also, on sandybridge ADD is faster than
   // shl.
@@ -15271,127 +16170,14 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
   if (N->getOpcode() == ISD::SHL) {
     SDValue V = PerformSHLCombine(N, DAG);
     if (V.getNode()) return V;
   }
 
-  // On X86 with SSE2 support, we can transform this to a vector shift if
-  // all elements are shifted by the same amount.  We can't do this in legalize
-  // because the a constant vector is typically transformed to a constant pool
-  // so we have no knowledge of the shift amount.
-  if (!Subtarget->hasSSE2())
-    return SDValue();
-
-  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
-      (!Subtarget->hasAVX2() ||
-       (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
-    return SDValue();
-
-  SDValue ShAmtOp = N->getOperand(1);
-  EVT EltVT = VT.getVectorElementType();
-  DebugLoc DL = N->getDebugLoc();
-  SDValue BaseShAmt = SDValue();
-  if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
-    unsigned NumElts = VT.getVectorNumElements();
-    unsigned i = 0;
-    for (; i != NumElts; ++i) {
-      SDValue Arg = ShAmtOp.getOperand(i);
-      if (Arg.getOpcode() == ISD::UNDEF) continue;
-      BaseShAmt = Arg;
-      break;
-    }
-    // Handle the case where the build_vector is all undef
-    // FIXME: Should DAG allow this?
-    if (i == NumElts)
-      return SDValue();
-
-    for (; i != NumElts; ++i) {
-      SDValue Arg = ShAmtOp.getOperand(i);
-      if (Arg.getOpcode() == ISD::UNDEF) continue;
-      if (Arg != BaseShAmt) {
-        return SDValue();
-      }
-    }
-  } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
-             cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
-    SDValue InVec = ShAmtOp.getOperand(0);
-    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
-      unsigned NumElts = InVec.getValueType().getVectorNumElements();
-      unsigned i = 0;
-      for (; i != NumElts; ++i) {
-        SDValue Arg = InVec.getOperand(i);
-        if (Arg.getOpcode() == ISD::UNDEF) continue;
-        BaseShAmt = Arg;
-        break;
-      }
-    } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
-       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
-         unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
-         if (C->getZExtValue() == SplatIdx)
-           BaseShAmt = InVec.getOperand(1);
-       }
-    }
-    if (BaseShAmt.getNode() == 0) {
-      // Don't create instructions with illegal types after legalize
-      // types has run.
-      if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
-          !DCI.isBeforeLegalize())
-        return SDValue();
-
-      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
-                              DAG.getIntPtrConstant(0));
-    }
-  } else
-    return SDValue();
-
-  // The shift amount is an i32.
-  if (EltVT.bitsGT(MVT::i32))
-    BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
-  else if (EltVT.bitsLT(MVT::i32))
-    BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
-
-  // The shift amount is identical so we can do a vector shift.
-  SDValue  ValOp = N->getOperand(0);
-  switch (N->getOpcode()) {
-  default:
-    llvm_unreachable("Unknown shift opcode!");
-  case ISD::SHL:
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
-    case MVT::v2i64:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v4i64:
-    case MVT::v8i32:
-    case MVT::v16i16:
-      return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
-    }
-  case ISD::SRA:
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v8i32:
-    case MVT::v16i16:
-      return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
-    }
-  case ISD::SRL:
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
-    case MVT::v2i64:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v4i64:
-    case MVT::v8i32:
-    case MVT::v16i16:
-      return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
-    }
-  }
+  return SDValue();
 }
 
-
 // CMPEQCombine - Recognize the distinctive  (AND (setcc ...) (setcc ..))
 // where both setccs reference the same FP CMP, and rewrite for CMPEQSS
 // and friends.  Likewise for OR -> CMPNEQSS.
@@ -15420,8 +16206,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
     if (VT == MVT::f32 || VT == MVT::f64) {
       bool ExpectingFlags = false;
       // Check for any users that want flags:
-      for (SDNode::use_iterator UI = N->use_begin(),
-             UE = N->use_end();
+      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
            !ExpectingFlags && UI != UE; ++UI)
         switch (UI->getOpcode()) {
         default:
@@ -15500,9 +16285,92 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
   return false;
 }
 
+// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
+// register. In most cases we actually compare or select YMM-sized registers
+// and mixing the two types creates horrible code. This method optimizes
+// some of the transition sequences.
+static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (!VT.is256BitVector())
+    return SDValue();
+
+  assert((N->getOpcode() == ISD::ANY_EXTEND ||
+          N->getOpcode() == ISD::ZERO_EXTEND ||
+          N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+
+  SDValue Narrow = N->getOperand(0);
+  EVT NarrowVT = Narrow->getValueType(0);
+  if (!NarrowVT.is128BitVector())
+    return SDValue();
+
+  if (Narrow->getOpcode() != ISD::XOR &&
+      Narrow->getOpcode() != ISD::AND &&
+      Narrow->getOpcode() != ISD::OR)
+    return SDValue();
+
+  SDValue N0  = Narrow->getOperand(0);
+  SDValue N1  = Narrow->getOperand(1);
+  DebugLoc DL = Narrow->getDebugLoc();
+
+  // The Left side has to be a trunc.
+  if (N0.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  // The type of the truncated inputs.
+  EVT WideVT = N0->getOperand(0)->getValueType(0);
+  if (WideVT != VT)
+    return SDValue();
+
+  // The right side has to be a 'trunc' or a constant vector.
+  bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
+  bool RHSConst = (isSplatVector(N1.getNode()) &&
+                   isa<ConstantSDNode>(N1->getOperand(0)));
+  if (!RHSTrunc && !RHSConst)
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
+    return SDValue();
+
+  // Set N0 and N1 to hold the inputs to the new wide operation.
+  N0 = N0->getOperand(0);
+  if (RHSConst) {
+    N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+                     N1->getOperand(0));
+    SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
+    N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());
+  } else if (RHSTrunc) {
+    N1 = N1->getOperand(0);
+  }
+
+  // Generate the wide operation.
+  SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
+  unsigned Opcode = N->getOpcode();
+  switch (Opcode) {
+  case ISD::ANY_EXTEND:
+    return Op;
+  case ISD::ZERO_EXTEND: {
+    unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+    APInt Mask = APInt::getAllOnesValue(InBits);
+    Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+    return DAG.getNode(ISD::AND, DL, VT,
+                       Op, DAG.getConstant(Mask, VT));
+  }
+  case ISD::SIGN_EXTEND:
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
+                       Op, DAG.getValueType(NarrowVT));
+  default:
+    llvm_unreachable("Unexpected opcode");
+  }
+}
+
 static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -15510,9 +16378,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   if (R.getNode())
     return R;
 
-  EVT VT = N->getValueType(0);
-
-  // Create ANDN, BLSI, and BLSR instructions
+  // Create BLSI, and BLSR instructions
   // BLSI is X & (-X)
   // BLSR is X & (X-1)
   if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
@@ -15520,13 +16386,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
     SDValue N1 = N->getOperand(1);
     DebugLoc DL = N->getDebugLoc();
 
-    // Check LHS for not
-    if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1)))
-      return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1);
-    // Check RHS for not
-    if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
-      return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
-
     // Check LHS for neg
     if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
         isZero(N0.getOperand(0)))
@@ -15579,6 +16438,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -15586,15 +16446,13 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   if (R.getNode())
     return R;
 
-  EVT VT = N->getValueType(0);
-
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
   // look for psign/blend
   if (VT == MVT::v2i64 || VT == MVT::v4i64) {
     if (!Subtarget->hasSSSE3() ||
-        (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+        (VT == MVT::v4i64 && !Subtarget->hasInt256()))
       return SDValue();
 
     // Canonicalize pandn to RHS
@@ -15628,13 +16486,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
       // Validate that the Mask operand is a vector sra node.
       // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
       // there is no psrai.b
-      if (Mask.getOpcode() != X86ISD::VSRAI)
-        return SDValue();
-
-      // Check that the SRA is all signbits.
-      SDValue SraC = Mask.getOperand(1);
-      unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
       unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+      unsigned SraAmt = ~0;
+      if (Mask.getOpcode() == ISD::SRA) {
+        SDValue Amt = Mask.getOperand(1);
+        if (isSplatVector(Amt.getNode())) {
+          SDValue SclrAmt = Amt->getOperand(0);
+          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+            SraAmt = C->getZExtValue();
+        }
+      } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+        SDValue SraC = Mask.getOperand(1);
+        SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
+      }
       if ((SraAmt + 1) != EltBits)
         return SDValue();
 
@@ -15762,6 +16626,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
 static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -15775,8 +16640,6 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget->hasBMI())
     return SDValue();
 
-  EVT VT = N->getValueType(0);
-
   if (VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
@@ -15807,23 +16670,61 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
   EVT MemVT = Ld->getMemoryVT();
   DebugLoc dl = Ld->getDebugLoc();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned RegSz = RegVT.getSizeInBits();
 
+  // On Sandybridge unaligned 256bit loads are inefficient.
   ISD::LoadExtType Ext = Ld->getExtensionType();
+  unsigned Alignment = Ld->getAlignment();
+  bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
+  if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
+      !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+    unsigned NumElems = RegVT.getVectorNumElements();
+    if (NumElems < 2)
+      return SDValue();
+
+    SDValue Ptr = Ld->getBasePtr();
+    SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
+
+    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+                                  NumElems/2);
+    SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+                                Ld->getPointerInfo(), Ld->isVolatile(),
+                                Ld->isNonTemporal(), Ld->isInvariant(),
+                                Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+    SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+                                Ld->getPointerInfo(), Ld->isVolatile(),
+                                Ld->isNonTemporal(), Ld->isInvariant(),
+                                std::min(16U, Alignment));
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             Load1.getValue(1),
+                             Load2.getValue(1));
+
+    SDValue NewVec = DAG.getUNDEF(RegVT);
+    NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
+    NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
+    return DCI.CombineTo(N, NewVec, TF, true);
+  }
 
   // If this is a vector EXT Load then attempt to optimize it using a
-  // shuffle. We need SSSE3 shuffles.
+  // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
+  // expansion is still better than scalar code.
+  // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll
+  // emit a shuffle and a arithmetic shift.
   // TODO: It is possible to support ZExt by zeroing the undef values
   // during the shuffle phase or after the shuffle.
-  if (RegVT.isVector() && RegVT.isInteger() &&
-      Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
+  if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() &&
+      (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) {
     assert(MemVT != RegVT && "Cannot extend to the same type");
     assert(MemVT.isVector() && "Must load a vector from memory");
 
     unsigned NumElems = RegVT.getVectorNumElements();
-    unsigned RegSz = RegVT.getSizeInBits();
     unsigned MemSz = MemVT.getSizeInBits();
     assert(RegSz > MemSz && "Register size must be greater than the mem size");
 
+    if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256())
+      return SDValue();
+
     // All sizes must be a power of two.
     if (!isPowerOf2_32(RegSz * MemSz * NumElems))
       return SDValue();
@@ -15847,16 +16748,23 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
     // Calculate the number of scalar loads that we need to perform
     // in order to load our vector from memory.
     unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
+    if (Ext == ISD::SEXTLOAD && NumLoads > 1)
+      return SDValue();
+
+    unsigned loadRegZize = RegSz;
+    if (Ext == ISD::SEXTLOAD && RegSz == 256)
+      loadRegZize /= 2;
 
     // Represent our vector as a sequence of elements which are the
     // largest scalar that we can load.
     EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
-      RegSz/SclrLoadTy.getSizeInBits());
+      loadRegZize/SclrLoadTy.getSizeInBits());
 
     // Represent the data using the same element type that is stored in
     // memory. In practice, we ''widen'' MemVT.
-    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
-                                  RegSz/MemVT.getScalarType().getSizeInBits());
+    EVT WideVecVT =
+          EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+                       loadRegZize/MemVT.getScalarType().getSizeInBits());
 
     assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
       "Invalid vector type");
@@ -15897,6 +16805,39 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
     SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
     unsigned SizeRatio = RegSz/MemSz;
 
+    if (Ext == ISD::SEXTLOAD) {
+      // If we have SSE4.1 we can directly emit a VSEXT node.
+      if (Subtarget->hasSSE41()) {
+        SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+        return DCI.CombineTo(N, Sext, TF, true);
+      }
+
+      // Otherwise we'll shuffle the small elements in the high bits of the
+      // larger type and perform an arithmetic shift. If the shift is not legal
+      // it's better to scalarize.
+      if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT))
+        return SDValue();
+
+      // Redistribute the loaded elements into the different locations.
+      SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+      for (unsigned i = 0; i != NumElems; ++i)
+        ShuffleVec[i*SizeRatio + SizeRatio-1] = i;
+
+      SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+                                           DAG.getUNDEF(WideVecVT),
+                                           &ShuffleVec[0]);
+
+      Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
+
+      // Build the arithmetic shift.
+      unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
+                     MemVT.getVectorElementType().getSizeInBits();
+      Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+                          DAG.getConstant(Amt, RegVT));
+
+      return DCI.CombineTo(N, Shuff, TF, true);
+    }
+
     // Redistribute the loaded elements into the different locations.
     SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
     for (unsigned i = 0; i != NumElems; ++i)
@@ -15930,11 +16871,16 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   // On Sandy Bridge, 256-bit memory operations are executed by two
   // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
   // memory  operation.
-  if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
-      StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
-      StoredVal.getNumOperands() == 2) {
-    SDValue Value0 = StoredVal.getOperand(0);
-    SDValue Value1 = StoredVal.getOperand(1);
+  unsigned Alignment = St->getAlignment();
+  bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
+  if (VT.is256BitVector() && !Subtarget->hasInt256() &&
+      StVT == VT && !IsAligned) {
+    unsigned NumElems = VT.getVectorNumElements();
+    if (NumElems < 2)
+      return SDValue();
+
+    SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+    SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
 
     SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
     SDValue Ptr0 = St->getBasePtr();
@@ -15942,10 +16888,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
                                 St->getPointerInfo(), St->isVolatile(),
-                                St->isNonTemporal(), St->getAlignment());
+                                St->isNonTemporal(), Alignment);
     SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
                                 St->getPointerInfo(), St->isVolatile(),
-                                St->isNonTemporal(), St->getAlignment());
+                                St->isNonTemporal(),
+                                std::min(16U, Alignment));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
   }
 
@@ -16030,7 +16977,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                Chains.size());
   }
 
-
   // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering
   // the FP state in cases where an emms may be missing.
   // A preferable solution to the general problem is to figure out the right
@@ -16041,8 +16987,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   const Function *F = DAG.getMachineFunction().getFunction();
-  bool NoImplicitFloatOps = F->getFnAttributes().
-    hasAttribute(Attributes::NoImplicitFloat);
+  bool NoImplicitFloatOps = F->getAttributes().
+    hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
   bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
                      && Subtarget->hasSSE2();
   if ((VT.isVector() ||
@@ -16278,7 +17224,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal adds from adds of shuffles.
   if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+       (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, true))
     return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -16293,7 +17239,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal subs from subs of shuffles.
   if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+       (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, false))
     return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -16336,7 +17282,6 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
                      N->getOperand(0), N->getOperand(1));
 }
 
-
 /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
 static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
   // FAND(0.0, x) -> 0.0
@@ -16382,58 +17327,57 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  const X86Subtarget *Subtarget) {
-  if (!DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  if (!Subtarget->hasAVX())
+static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 
+                                               const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector())
     return SDValue();
 
-  EVT VT = N->getValueType(0);
-  SDValue Op = N->getOperand(0);
-  EVT OpVT = Op.getValueType();
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
   DebugLoc dl = N->getDebugLoc();
 
-  if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
-      (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
-
-    if (Subtarget->hasAVX2())
-      return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
-
-    // Optimize vectors in AVX mode
-    // Sign extend  v8i16 to v8i32 and
-    //              v4i32 to v4i64
-    //
-    // Divide input vector into two parts
-    // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
-    // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
-    // concat the vectors to original VT
-
-    unsigned NumElems = OpVT.getVectorNumElements();
-    SDValue Undef = DAG.getUNDEF(OpVT);
-
-    SmallVector<int,8> ShufMask1(NumElems, -1);
-    for (unsigned i = 0; i != NumElems/2; ++i)
-      ShufMask1[i] = i;
-
-    SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
+  // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
+  // both SSE and AVX2 since there is no sign-extended shift right
+  // operation on a vector with 64-bit elements.
+  //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
+  // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
+  if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)) {
+    SDValue N00 = N0.getOperand(0);
 
-    SmallVector<int,8> ShufMask2(NumElems, -1);
-    for (unsigned i = 0; i != NumElems/2; ++i)
-      ShufMask2[i] = i + NumElems/2;
+    // EXTLOAD has a better solution on AVX2, 
+    // it may be replaced with X86ISD::VSEXT node.
+    if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+      if (!ISD::isNormalLoad(N00.getNode()))
+        return SDValue();
 
-    SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
+    if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
+        SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, 
+                                  N00, N1);
+      return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
+    }
+  }
+  return SDValue();
+}
 
-    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
-                                  VT.getVectorNumElements()/2);
+static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const X86Subtarget *Subtarget) {
+  if (!DCI.isBeforeLegalizeOps())
+    return SDValue();
 
-    OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
-    OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+  if (!Subtarget->hasFp256())
+    return SDValue();
 
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+  EVT VT = N->getValueType(0);
+  if (VT.isVector() && VT.getSizeInBits() == 256) {
+    SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+    if (R.getNode())
+      return R;
   }
+
   return SDValue();
 }
 
@@ -16487,58 +17431,26 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
   DebugLoc dl = N->getDebugLoc();
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
-  EVT OpVT = N0.getValueType();
 
   if (N0.getOpcode() == ISD::AND &&
       N0.hasOneUse() &&
       N0.getOperand(0).hasOneUse()) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getOpcode() != X86ISD::SETCC_CARRY)
-      return SDValue();
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (!C || C->getZExtValue() != 1)
-      return SDValue();
-    return DAG.getNode(ISD::AND, dl, VT,
-                       DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
-                                   N00.getOperand(0), N00.getOperand(1)),
-                       DAG.getConstant(1, VT));
+    if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 1)
+        return SDValue();
+      return DAG.getNode(ISD::AND, dl, VT,
+                         DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+                                     N00.getOperand(0), N00.getOperand(1)),
+                         DAG.getConstant(1, VT));
+    }
   }
 
-  // Optimize vectors in AVX mode:
-  //
-  //   v8i16 -> v8i32
-  //   Use vpunpcklwd for 4 lower elements  v8i16 -> v4i32.
-  //   Use vpunpckhwd for 4 upper elements  v8i16 -> v4i32.
-  //   Concat upper and lower parts.
-  //
-  //   v4i32 -> v4i64
-  //   Use vpunpckldq for 4 lower elements  v4i32 -> v2i64.
-  //   Use vpunpckhdq for 4 upper elements  v4i32 -> v2i64.
-  //   Concat upper and lower parts.
-  //
-  if (!DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  if (!Subtarget->hasAVX())
-    return SDValue();
-
-  if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
-      ((VT == MVT::v4i64) && (OpVT == MVT::v4i32)))  {
-
-    if (Subtarget->hasAVX2())
-      return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
-
-    SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
-    SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec);
-    SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec);
-
-    EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-                               VT.getVectorNumElements()/2);
-
-    OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
-    OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
-
-    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+  if (VT.is256BitVector()) {
+    SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+    if (R.getNode())
+      return R;
   }
 
   return SDValue();
@@ -16570,8 +17482,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-// Helper function of PerformSETCCCombine. It is to materialize "setb reg" 
-// as "sbb reg,reg", since it can be extended without zext and produces 
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
 // an all-ones bit which is more useful than 0/1 in some cases.
 static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
   return DAG.getNode(ISD::AND, DL, MVT::i8,
@@ -16589,13 +17501,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
   SDValue EFLAGS = N->getOperand(1);
 
   if (CC == X86::COND_A) {
-    // Try to convert COND_A into COND_B in an attempt to facilitate 
+    // Try to convert COND_A into COND_B in an attempt to facilitate
     // materializing "setb reg".
     //
     // Do not flip "e > c", where "c" is a constant, because Cmp instruction
     // cannot take an immediate as its first operand.
     //
-    if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && 
+    if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
         EFLAGS.getValueType().isInteger() &&
         !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
       SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
@@ -16751,7 +17663,7 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal adds from adds of shuffles.
   if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
-       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+       (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
 
@@ -16784,7 +17696,7 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
   // Try to synthesize horizontal adds from adds of shuffles.
   EVT VT = N->getValueType(0);
   if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
-       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+       (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
 
@@ -16803,7 +17715,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
   if (In.getOpcode() != X86ISD::VZEXT)
     return SDValue();
 
-  return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+  return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+                     In.getOperand(0));
 }
 
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
@@ -16841,13 +17754,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
+  case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
   case ISD::TRUNCATE:       return PerformTruncateCombine(N, DAG,DCI,Subtarget);
   case ISD::SETCC:          return PerformISDSETCCCombine(N, DAG);
   case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG, DCI, Subtarget);
   case X86ISD::BRCOND:      return PerformBrCondCombine(N, DAG, DCI, Subtarget);
   case X86ISD::VZEXT:       return performVZEXTCombine(N, DAG, DCI, Subtarget);
   case X86ISD::SHUFP:       // Handle all target specific shuffles
-  case X86ISD::PALIGN:
+  case X86ISD::PALIGNR:
   case X86ISD::UNPCKH:
   case X86ISD::UNPCKL:
   case X86ISD::MOVHLPS:
@@ -17030,7 +17944,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
       AsmPieces.clear();
       const std::string &ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
-      std::sort(AsmPieces.begin(), AsmPieces.end());
+      array_pod_sort(AsmPieces.begin(), AsmPieces.end());
       if (AsmPieces.size() == 4 &&
           AsmPieces[0] == "~{cc}" &&
           AsmPieces[1] == "~{dirflag}" &&
@@ -17048,7 +17962,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
       AsmPieces.clear();
       const std::string &ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
-      std::sort(AsmPieces.begin(), AsmPieces.end());
+      array_pod_sort(AsmPieces.begin(), AsmPieces.end());
       if (AsmPieces.size() == 4 &&
           AsmPieces[0] == "~{cc}" &&
           AsmPieces[1] == "~{dirflag}" &&
@@ -17074,8 +17988,6 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   return false;
 }
 
-
-
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 X86TargetLowering::ConstraintType
@@ -17152,17 +18064,17 @@ TargetLowering::ConstraintWeight
   case 'f':
   case 't':
   case 'u':
-      if (type->isFloatingPointTy())
-        weight = CW_SpecificReg;
-      break;
+    if (type->isFloatingPointTy())
+      weight = CW_SpecificReg;
+    break;
   case 'y':
-      if (type->isX86_MMXTy() && Subtarget->hasMMX())
-        weight = CW_SpecificReg;
-      break;
+    if (type->isX86_MMXTy() && Subtarget->hasMMX())
+      weight = CW_SpecificReg;
+    break;
   case 'x':
   case 'Y':
     if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
-        ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX()))
+        ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
       weight = CW_Register;
     break;
   case 'I':
@@ -17530,7 +18442,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   // really want an 8-bit or 32-bit register, map to the appropriate register
   // class and return the appropriate register.
   if (Res.second == &X86::GR16RegClass) {
-    if (VT == MVT::i8) {
+    if (VT == MVT::i8 || VT == MVT::i1) {
       unsigned DestReg = 0;
       switch (Res.first) {
       default: break;
@@ -17543,7 +18455,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         Res.first = DestReg;
         Res.second = &X86::GR8RegClass;
       }
-    } else if (VT == MVT::i32) {
+    } else if (VT == MVT::i32 || VT == MVT::f32) {
       unsigned DestReg = 0;
       switch (Res.first) {
       default: break;
@@ -17560,7 +18472,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         Res.first = DestReg;
         Res.second = &X86::GR32RegClass;
       }
-    } else if (VT == MVT::i64) {
+    } else if (VT == MVT::i64 || VT == MVT::f64) {
       unsigned DestReg = 0;
       switch (Res.first) {
       default: break;
@@ -17598,207 +18510,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
   return Res;
 }
-
-//===----------------------------------------------------------------------===//
-//
-// X86 cost model.
-//
-//===----------------------------------------------------------------------===//
-
-struct X86CostTblEntry {
-  int ISD;
-  MVT Type;
-  unsigned Cost;
-};
-
-static int
-FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
-  for (unsigned int i = 0; i < len; ++i)
-    if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
-      return i;
-
-  // Could not find an entry.
-  return -1;
-}
-
-struct X86TypeConversionCostTblEntry {
-  int ISD;
-  MVT Dst;
-  MVT Src;
-  unsigned Cost;
-};
-
-static int
-FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
-                   int ISD, MVT Dst, MVT Src) {
-  for (unsigned int i = 0; i < len; ++i)
-    if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
-      return i;
-
-  // Could not find an entry.
-  return -1;
-}
-
-unsigned
-X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
-                                                     Type *Ty) const {
-  // Legalize the type.
-  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty);
-
-  int ISD = InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
-  static const X86CostTblEntry AVX1CostTable[] = {
-    // We don't have to scalarize unsupported ops. We can issue two half-sized
-    // operations and we only need to extract the upper YMM half.
-    // Two ops + 1 extract + 1 insert = 4.
-    { ISD::MUL,     MVT::v8i32,    4 },
-    { ISD::SUB,     MVT::v8i32,    4 },
-    { ISD::ADD,     MVT::v8i32,    4 },
-    { ISD::MUL,     MVT::v4i64,    4 },
-    { ISD::SUB,     MVT::v4i64,    4 },
-    { ISD::ADD,     MVT::v4i64,    4 },
-    };
-
-  // Look for AVX1 lowering tricks.
-  if (ST.hasAVX()) {
-    int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
-                          LT.second);
-    if (Idx != -1)
-      return LT.first * AVX1CostTable[Idx].Cost;
-  }
-  // Fallback to the default implementation.
-  return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
-}
-
-unsigned
-X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                                 unsigned Index) const {
-  assert(Val->isVectorTy() && "This must be a vector type");
-
-  if (Index != -1U) {
-    // Legalize the type.
-    std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val);
-
-    // This type is legalized to a scalar type.
-    if (!LT.second.isVector())
-      return 0;
-
-    // The type may be split. Normalize the index to the new type.
-    unsigned Width = LT.second.getVectorNumElements();
-    Index = Index % Width;
-
-    // Floating point scalars are already located in index #0.
-    if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
-      return 0;
-  }
-
-  return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
-}
-
-unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode,
-                                                          Type *ValTy,
-                                                          Type *CondTy) const {
-  // Legalize the type.
-  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy);
-
-  MVT MTy = LT.second;
-
-  int ISD = InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  const X86Subtarget &ST =
-  TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
-  static const X86CostTblEntry SSE42CostTbl[] = {
-    { ISD::SETCC,   MVT::v2f64,   1 },
-    { ISD::SETCC,   MVT::v4f32,   1 },
-    { ISD::SETCC,   MVT::v2i64,   1 },
-    { ISD::SETCC,   MVT::v4i32,   1 },
-    { ISD::SETCC,   MVT::v8i16,   1 },
-    { ISD::SETCC,   MVT::v16i8,   1 },
-  };
-
-  static const X86CostTblEntry AVX1CostTbl[] = {
-    { ISD::SETCC,   MVT::v4f64,   1 },
-    { ISD::SETCC,   MVT::v8f32,   1 },
-    // AVX1 does not support 8-wide integer compare.
-    { ISD::SETCC,   MVT::v4i64,   4 },
-    { ISD::SETCC,   MVT::v8i32,   4 },
-    { ISD::SETCC,   MVT::v16i16,  4 },
-    { ISD::SETCC,   MVT::v32i8,   4 },
-  };
-
-  static const X86CostTblEntry AVX2CostTbl[] = {
-    { ISD::SETCC,   MVT::v4i64,   1 },
-    { ISD::SETCC,   MVT::v8i32,   1 },
-    { ISD::SETCC,   MVT::v16i16,  1 },
-    { ISD::SETCC,   MVT::v32i8,   1 },
-  };
-
-  if (ST.hasSSE42()) {
-    int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
-    if (Idx != -1)
-      return LT.first * SSE42CostTbl[Idx].Cost;
-  }
-
-  if (ST.hasAVX()) {
-    int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
-    if (Idx != -1)
-      return LT.first * AVX1CostTbl[Idx].Cost;
-  }
-
-  if (ST.hasAVX2()) {
-    int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
-    if (Idx != -1)
-      return LT.first * AVX2CostTbl[Idx].Cost;
-  }
-
-  return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy);
-}
-
-unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode,
-                                                        Type *Dst,
-                                                        Type *Src) const {
-  int ISD = InstructionOpcodeToISD(Opcode);
-  assert(ISD && "Invalid opcode");
-
-  EVT SrcTy = TLI->getValueType(Src);
-  EVT DstTy = TLI->getValueType(Dst);
-
-  if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
-
-  const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
-
-  static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
-    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
-    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
-    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
-    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
-    { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 1 },
-    { ISD::TRUNCATE,    MVT::v8i16, MVT::v8i32, 1 },
-    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
-    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
-    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  1 },
-    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  1 },
-    { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 1 },
-    { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
-    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
-    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1,  9 },
-    { ISD::TRUNCATE,    MVT::v8i32, MVT::v8i64, 3 },
-  };
-
-  if (ST.hasAVX()) {
-    int Idx = FindInConvertTable(AVXConversionTbl,
-                                 array_lengthof(AVXConversionTbl),
-                                 ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
-    if (Idx != -1)
-      return AVXConversionTbl[Idx].Cost;
-  }
-
-  return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src);
-}
-
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 465c6036ada6..5725f7aea581 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -15,15 +15,14 @@
 #ifndef X86ISELLOWERING_H
 #define X86ISELLOWERING_H
 
-#include "X86Subtarget.h"
-#include "X86RegisterInfo.h"
 #include "X86MachineFunctionInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetOptions.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 
 namespace llvm {
   namespace X86ISD {
@@ -176,13 +175,14 @@ namespace llvm {
       /// PSIGN - Copy integer sign.
       PSIGN,
 
-      /// BLENDV - Blend where the selector is an XMM.
+      /// BLENDV - Blend where the selector is a register.
       BLENDV,
 
-      /// BLENDxx - Blend where the selector is an immediate.
-      BLENDPW,
-      BLENDPS,
-      BLENDPD,
+      /// BLENDI - Blend where the selector is an immediate.
+      BLENDI,
+
+      // SUBUS - Integer sub with unsigned saturation.
+      SUBUS,
 
       /// HADD - Integer horizontal add.
       HADD,
@@ -196,6 +196,12 @@ namespace llvm {
       /// FHSUB - Floating point horizontal sub.
       FHSUB,
 
+      /// UMAX, UMIN - Unsigned integer max and min.
+      UMAX, UMIN,
+
+      /// SMAX, SMIN - Signed integer max and min.
+      SMAX, SMIN,
+
       /// FMAX, FMIN - Floating point max and min.
       ///
       FMAX, FMIN,
@@ -228,11 +234,8 @@ namespace llvm {
       // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
       EH_SJLJ_LONGJMP,
 
-      /// TC_RETURN - Tail call return.
-      ///   operand #0 chain
-      ///   operand #1 callee (register or absolute)
-      ///   operand #2 stack adjustment
-      ///   operand #3 optional in flag
+      /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+      /// the list of operands.
       TC_RETURN,
 
       // VZEXT_MOVL - Vector move low and zero extend.
@@ -272,8 +275,6 @@ namespace llvm {
       ADD, SUB, ADC, SBB, SMUL,
       INC, DEC, OR, XOR, AND,
 
-      ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
-
       BLSI,   // BLSI - Extract lowest set isolated bit
       BLSMSK, // BLSMSK - Get mask up to lowest set bit
       BLSR,   // BLSR - Reset lowest set bit
@@ -290,7 +291,7 @@ namespace llvm {
       TESTP,
 
       // Several flavors of instructions with vector shuffle behaviors.
-      PALIGN,
+      PALIGNR,
       PSHUFD,
       PSHUFHW,
       PSHUFLW,
@@ -355,10 +356,17 @@ namespace llvm {
       // RDRAND - Get a random integer and indicate whether it is valid in CF.
       RDRAND,
 
+      // RDSEED - Get a NIST SP800-90B & C compliant random integer and
+      // indicate whether it is valid in CF.
+      RDSEED,
+
       // PCMP*STRI
       PCMPISTRI,
       PCMPESTRI,
 
+      // XTEST - Test if in transactional execution.
+      XTEST,
+
       // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
       // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
       // Atomic 64-bit binary operations.
@@ -470,7 +478,7 @@ namespace llvm {
 
     virtual unsigned getJumpTableEncoding() const;
 
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
 
     virtual const MCExpr *
     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
@@ -496,23 +504,29 @@ namespace llvm {
     /// lowering. If DstAlign is zero that means it's safe to destination
     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
     /// means there isn't a need to check it against alignment requirement,
-    /// probably because the source does not need to be loaded. If
-    /// 'IsZeroVal' is true, that means it's safe to return a
-    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
-    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
-    /// constant so it does not need to be loaded.
+    /// probably because the source does not need to be loaded. If 'IsMemset' is
+    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+    /// source is constant so it does not need to be loaded.
     /// It returns EVT::Other if the type should be determined using generic
     /// target-independent logic.
     virtual EVT
-    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool IsZeroVal, bool MemcpyStrSrc,
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 
+                        bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
+    /// isSafeMemOpType - Returns true if it's safe to use load / store of the
+    /// specified type to expand memcpy / memset inline. This is mostly true
+    /// for all types except for some special cases. For example, on X86
+    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
+    /// also does type conversion. Note the specified type doesn't have to be
+    /// legal as the hook is used before type legalization.
+    virtual bool isSafeMemOpType(MVT VT) const;
+
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
-    /// unaligned memory accesses. of the specified type.
-    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
-      return true;
-    }
+    /// unaligned memory accesses. of the specified type. Returns whether it
+    /// is "fast" by reference in the second argument.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
@@ -630,6 +644,7 @@ namespace llvm {
     /// result out to 64 bits.
     virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+    virtual bool isZExtFree(SDValue Val, EVT VT2) const;
 
     /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
     /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
@@ -710,7 +725,7 @@ namespace llvm {
 
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
-    findRepresentativeClass(EVT VT) const;
+    findRepresentativeClass(MVT VT) const;
 
   private:
     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
@@ -783,9 +798,7 @@ namespace llvm {
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -800,18 +813,18 @@ namespace llvm {
     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerToBT(SDValue And, ISD::CondCode CC,
                       DebugLoc dl, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
@@ -828,8 +841,9 @@ namespace llvm {
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
-
+    SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
 
     // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
     SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
@@ -838,7 +852,7 @@ namespace llvm {
 
     SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
 
-    SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -861,9 +875,8 @@ namespace llvm {
 
     virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
 
-    virtual EVT
-    getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
-                             ISD::NodeType ExtendKind) const;
+    virtual MVT
+    getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const;
 
     virtual bool
     CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -932,23 +945,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-
-  class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
-  public:
-    explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
-    VectorTargetTransformImpl(TL) {}
-
-    virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
-
-    virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                                        unsigned Index) const;
-
-    unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                Type *CondTy) const;
-
-    virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                                      Type *Src) const;
-  };
 }
 
 #endif    // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 54b91c3edb8b..ba1aede3c1a0 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -84,15 +84,16 @@ defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd">;
 defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
 
 
-def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+                   [(int_x86_mmx_femms)]>;
 
-def PREFETCH  : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
-                       "prefetch $addr", []>;
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+                      "prefetch\t$addr",
+                      [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>;
 
-// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in
-def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
-                       "prefetchw $addr", []>;
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+                  [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB,
+                Requires<[HasPrefetchW]>;
 
 // "3DNowA" instructions
 defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f790611b8f8c..225e9720da0c 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
 
 //===----------------------------------------------------------------------===//
 // LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
 let neverHasSideEffects = 1 in
 def LEA16r   : I<0x8D, MRMSrcMem,
                  (outs GR16:$dst), (ins i32mem:$src),
@@ -29,48 +29,59 @@ def LEA32r   : I<0x8D, MRMSrcMem,
 def LEA64_32r : I<0x8D, MRMSrcMem,
                   (outs GR32:$dst), (ins lea64_32mem:$src),
                   "lea{l}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+                  [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>,
                   Requires<[In64BitMode]>;
 
 let isReMaterializable = 1 in
-def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
                   "lea{q}\t{$src|$dst}, {$dst|$src}",
                   [(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 //  Fixed-Register Multiplication and Division Instructions.
 //
 
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+  // Memory operand.
+  ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+  // Register reads (implicit or explicit).
+  ReadAfterLd, ReadAfterLd]>;
+
 // Extra precision multiplication
 
 // AL is really implied by AX, but the registers in Defs must match the
 // SDNode results (i8, i32).
+// AL,AH = AL*GR8
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, GR8:$src)),
-                (implicit EFLAGS)], IIC_MUL8>;     // AL,AH = AL*GR8
-
+                (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
 def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
-               "mul{w}\t$src", 
-               [], IIC_MUL16_REG>, OpSize;    // AX,DX = AX*GR16
-
+               "mul{w}\t$src",
+               [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
 def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
-               "mul{l}\t$src",   // EAX,EDX = EAX*GR32
+               "mul{l}\t$src",
                [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
-               IIC_MUL32_REG>;
+               IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
 def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
-                "mul{q}\t$src",          // RAX,RDX = RAX*GR64
+                "mul{q}\t$src",
                 [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
-                IIC_MUL64>;
-
+                IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                "mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, (loadi8 addr:$src))),
-                (implicit EFLAGS)], IIC_MUL8>;   // AL,AH = AL*[mem8]
-
+                (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
 let mayLoad = 1, neverHasSideEffects = 1 in {
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
                "mul{w}\t$src",
-               [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+               [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
               "mul{l}\t$src",
-              [], IIC_MUL32_MEM>;          // EAX,EDX = EAX*[mem32]
+              [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q}\t$src", [], IIC_MUL64>;   // RAX,RDX = RAX*[mem64]
+                "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
 }
 
 let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", [],
-              IIC_IMUL8>; // AL,AH = AL*GR8
+              IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", [],
-              IIC_IMUL16_RR>, OpSize;    // AX,DX = AX*GR16
+              IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", [],
-              IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+              IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
-              IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+              IIC_IMUL64_RR>, Sched<[WriteIMul]>;
 
 let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b}\t$src", [], IIC_IMUL8>;    // AL,AH = AL*[mem8]
+                "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
-                // AX,DX = AX*[mem16]
+                "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+              SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l}\t$src", [], IIC_IMUL32_MEM>;  // EAX,EDX = EAX*[mem32]
+                "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q}\t$src", [], IIC_IMUL64>;  // RAX,RDX = RAX*[mem64]
+                 "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
 }
 } // neverHasSideEffects
 
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
 let Defs = [EFLAGS] in {
 let Constraints = "$src1 = $dst" in {
 
-let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
 // Register-Register Signed Integer Multiply
 def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
                   [(set GR64:$dst, EFLAGS,
                         (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
                  TB;
-}
+} // isCommutable, SchedRW
 
 // Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
 def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
                                   (ins GR16:$src1, i16mem:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -158,7 +180,7 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
                        (X86smul_flag GR16:$src1, (load addr:$src2)))],
                        IIC_IMUL16_RM>,
                TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
                  (ins GR32:$src1, i32mem:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, EFLAGS,
@@ -172,18 +194,20 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
                         (X86smul_flag GR64:$src1, (load addr:$src2)))],
                         IIC_IMUL64_RM>,
                TB;
+} // SchedRW
 } // Constraints = "$src1 = $dst"
 
 } // Defs = [EFLAGS]
 
 // Surprisingly enough, these are not two address instructions!
 let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
 // Register-Integer Signed Integer Multiply
 def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                       (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, EFLAGS, 
-                            (X86smul_flag GR16:$src1, imm:$src2))], 
+                      [(set GR16:$dst, EFLAGS,
+                            (X86smul_flag GR16:$src1, imm:$src2))],
                             IIC_IMUL16_RRI>, OpSize;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                      (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       [(set GR64:$dst, EFLAGS,
                             (X86smul_flag GR64:$src1, i64immSExt8:$src2))],
                             IIC_IMUL64_RRI>;
-
+} // SchedRW
 
 // Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
 def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
                       (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,12 +285,15 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                             (X86smul_flag (load addr:$src1),
                                           i64immSExt8:$src2))],
                                           IIC_IMUL64_RMI>;
+} // SchedRW
 } // Defs = [EFLAGS]
 
 
 
 
 // unsigned division/remainder
+let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "div{b}\t$src", [], IIC_DIV8_REG>;
@@ -279,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
                 "div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
 
 let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "div{b}\t$src", [], IIC_DIV8_MEM>;
+               "div{b}\t$src", [], IIC_DIV8_MEM>,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "div{w}\t$src", [], IIC_DIV16>, OpSize;
+               "div{w}\t$src", [], IIC_DIV16>, OpSize,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
-               "div{l}\t$src", [], IIC_DIV32>;
+               "div{l}\t$src", [], IIC_DIV32>,
+             SchedLoadReg<WriteIDivLd>;
 // RDX:RAX/[mem64] = RAX,RDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
-                "div{q}\t$src", [], IIC_DIV64>;
+                "div{q}\t$src", [], IIC_DIV64>,
+             SchedLoadReg<WriteIDivLd>;
 }
 
 // Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "idiv{b}\t$src", [], IIC_IDIV8>;
@@ -310,21 +344,27 @@ def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
                 "idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
 
 let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "idiv{b}\t$src", [], IIC_IDIV8>;
+               "idiv{b}\t$src", [], IIC_IDIV8>,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+               "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
-               "idiv{l}\t$src", [], IIC_IDIV32>;
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+               "idiv{l}\t$src", [], IIC_IDIV32>,
+             SchedLoadReg<WriteIDivLd>;
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
 def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
-                "idiv{q}\t$src", [], IIC_IDIV64>;
+                "idiv{q}\t$src", [], IIC_IDIV64>,
+             SchedLoadReg<WriteIDivLd>;
 }
+} // hasSideEffects = 0
 
 //===----------------------------------------------------------------------===//
 //  Two address Instructions.
@@ -333,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
 // unary instructions
 let CodeSize = 2 in {
 let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
                "neg{b}\t$dst",
                [(set GR8:$dst, (ineg GR8:$src1)),
@@ -349,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
 def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
                 [(set GR64:$dst, (ineg GR64:$src1)),
                  (implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
 def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
                "neg{b}\t$dst",
                [(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -366,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
 def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
                 [(store (ineg (loadi64 addr:$dst)), addr:$dst),
                  (implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
 } // Defs = [EFLAGS]
 
 
 // Note: NOT does not set EFLAGS!
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 // Match xor -1 to not. Favors these over a move imm + xor to save code size.
 let AddedComplexity = 15 in {
 def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -386,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
 def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
                 [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
 }
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
+let SchedRW = [WriteALULd, WriteRMW] in {
 def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
                "not{b}\t$dst",
                [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -400,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
                [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
 def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
                 [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
 } // CodeSize
 
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
 let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let CodeSize = 2 in
 def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                "inc{b}\t$dst",
@@ -412,11 +457,11 @@ def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                IIC_UNARY_REG>;
 
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
                "inc{w}\t$dst",
                [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
              OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
                "inc{l}\t$dst",
                [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
                IIC_UNARY_REG>,
@@ -430,31 +475,31 @@ def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
 // In 64-bit mode, single byte INC and DEC cannot be encoded.
 let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
 // Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), 
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
                   "inc{w}\t$dst",
                   [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
                   IIC_UNARY_REG>,
                 OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), 
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
                   "inc{l}\t$dst",
                   [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
                   IIC_UNARY_REG>,
                 Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), 
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                   "dec{w}\t$dst",
                   [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
                   IIC_UNARY_REG>,
                 OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), 
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                   "dec{l}\t$dst",
                   [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
                   IIC_UNARY_REG>,
                 Requires<[In64BitMode]>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 2
 
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
   def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
                [(store (add (loadi8 addr:$dst), 1), addr:$dst),
                 (implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -469,7 +514,7 @@ let CodeSize = 2 in {
   def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
                   [(store (add (loadi64 addr:$dst), 1), addr:$dst),
                    (implicit EFLAGS)], IIC_UNARY_MEM>;
-                   
+
 // These are duplicates of their 32-bit counterparts. Only needed so X86 knows
 // how to unfold them.
 // FIXME: What is this for??
@@ -489,21 +534,21 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
                   [(store (add (loadi32 addr:$dst), -1), addr:$dst),
                     (implicit EFLAGS)], IIC_UNARY_MEM>,
                 Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
 let CodeSize = 2 in
 def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                "dec{b}\t$dst",
                [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
                IIC_UNARY_REG>;
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
                "dec{w}\t$dst",
                [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
                IIC_UNARY_REG>,
              OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
                "dec{l}\t$dst",
                [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
                IIC_UNARY_REG>,
@@ -512,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
                 [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
                 IIC_UNARY_REG>;
 } // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
 
 
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
   def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
                [(store (add (loadi8 addr:$dst), -1), addr:$dst),
                 (implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -530,7 +575,7 @@ let CodeSize = 2 in {
   def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
                   [(store (add (loadi64 addr:$dst), -1), addr:$dst),
                    (implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
 } // Defs = [EFLAGS]
 
 
@@ -544,57 +589,57 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
                   bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
   /// VT - This is the value type itself.
   ValueType VT = vt;
-  
+
   /// InstrSuffix - This is the suffix used on instructions with this type.  For
   /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
   string InstrSuffix = instrsuffix;
-  
+
   /// RegClass - This is the register class associated with this type.  For
   /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
   RegisterClass RegClass = regclass;
-  
+
   /// LoadNode - This is the load node associated with this type.  For
   /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
   PatFrag LoadNode = loadnode;
-  
+
   /// MemOperand - This is the memory operand associated with this type.  For
   /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
   X86MemOperand MemOperand = memoperand;
-  
+
   /// ImmEncoding - This is the encoding of an immediate of this type.  For
   /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32.  Note that i64 -> Imm32
   /// since the immediate fields of i64 instructions is a 32-bit sign extended
   /// value.
   ImmType ImmEncoding = immkind;
-  
+
   /// ImmOperand - This is the operand kind of an immediate of this type.  For
   /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm.  Note that i64 ->
   /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
   /// extended value.
   Operand ImmOperand = immoperand;
-  
+
   /// ImmOperator - This is the operator that should be used to match an
   /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
   SDPatternOperator ImmOperator = immoperator;
-  
+
   /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
   /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm.  This is
   /// only used for instructions that have a sign-extended imm8 field form.
   Operand Imm8Operand = imm8operand;
-  
+
   /// Imm8Operator - This is the operator that should be used to match an 8-bit
   /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
   SDPatternOperator Imm8Operator = imm8operator;
-  
+
   /// HasOddOpcode - This bit is true if the instruction should have an odd (as
   /// opposed to even) opcode.  Operations on i8 are usually even, operations on
   /// other datatypes are odd.
   bit HasOddOpcode = hasOddOpcode;
-  
+
   /// HasOpSizePrefix - This bit is set to true if the instruction should have
   /// the 0x66 operand size prefix.  This is set for i16 types.
   bit HasOpSizePrefix = hasOpSizePrefix;
-  
+
   /// HasREX_WPrefix - This bit is set to true if the instruction should have
   /// the 0x40 REX prefix.  This is set for i64 types.
   bit HasREX_WPrefix = hasREX_WPrefix;
@@ -624,12 +669,12 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
 /// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
 /// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
 ///    or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, 
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
           string mnemonic, string args, list<dag> pattern,
           InstrItinClass itin = IIC_BIN_NONMEM>
   : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
        opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
-      f, outs, ins, 
+      f, outs, ins,
       !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern,
       itin> {
 
@@ -644,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Format f = MRMDestReg>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+    Sched<[WriteALU]>;
 
 // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
 // just a regclass (no eflags) as a result.
@@ -687,18 +733,22 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
   : ITy<opcode, MRMSrcReg, typeinfo,
         (outs typeinfo.RegClass:$dst),
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   // The disassembler should know about this, but not the asmparser.
   let isCodeGenOnly = 1;
+  let hasSideEffects = 0;
 }
 
 // BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
 class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
   : ITy<opcode, MRMSrcReg, typeinfo, (outs),
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   // The disassembler should know about this, but not the asmparser.
   let isCodeGenOnly = 1;
+  let hasSideEffects = 0;
 }
 
 // BinOpRM - Instructions like "add reg, reg, [mem]".
@@ -706,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               dag outlist, list<dag> pattern>
   : ITy<opcode, MRMSrcMem, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+    Sched<[WriteALULd, ReadAfterLd]>;
 
 // BinOpRM_R - Instructions like "add reg, reg, [mem]".
 class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -742,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Format f, dag outlist, list<dag> pattern>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   let ImmT = typeinfo.ImmEncoding;
 }
 
@@ -764,13 +816,13 @@ class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
 class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                  SDNode opnode, Format f>
   : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
-            [(set typeinfo.RegClass:$dst, EFLAGS, 
+            [(set typeinfo.RegClass:$dst, EFLAGS,
                 (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
 // BinOpRI_RFF - Instructions like "adc reg, reg, imm".
 class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                  SDNode opnode, Format f>
   : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
-            [(set typeinfo.RegClass:$dst, EFLAGS, 
+            [(set typeinfo.RegClass:$dst, EFLAGS,
                 (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
                         EFLAGS))]>;
 
@@ -779,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                Format f, dag outlist, list<dag> pattern>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+    Sched<[WriteALU]> {
   let ImmT = Imm8; // Always 8-bit immediate.
 }
 
@@ -789,7 +842,7 @@ class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
              [(set typeinfo.RegClass:$dst,
                (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
-               
+
 // BinOpRI8_F - Instructions like "cmp reg, imm8".
 class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                   SDNode opnode, Format f>
@@ -817,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               list<dag> pattern>
   : ITy<opcode, MRMDestMem, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+    Sched<[WriteALULd, WriteRMW]>;
 
 // BinOpMR_RMW - Instructions like "add [mem], reg".
 class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -845,21 +899,22 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
               Format f, list<dag> pattern, bits<8> opcode = 0x80>
   : ITy<opcode, f, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+    Sched<[WriteALULd, WriteRMW]> {
   let ImmT = typeinfo.ImmEncoding;
 }
 
 // BinOpMI_RMW - Instructions like "add [mem], imm".
 class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
                   SDNode opnode, Format f>
-  : BinOpMI<mnemonic, typeinfo, f, 
+  : BinOpMI<mnemonic, typeinfo, f,
             [(store (opnode (typeinfo.VT (load addr:$dst)),
                             typeinfo.ImmOperator:$src), addr:$dst),
              (implicit EFLAGS)]>;
 // BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
 class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
                   SDNode opnode, Format f>
-  : BinOpMI<mnemonic, typeinfo, f, 
+  : BinOpMI<mnemonic, typeinfo, f,
             [(store (opnode (typeinfo.VT (load addr:$dst)),
                             typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
              (implicit EFLAGS)]>;
@@ -867,7 +922,7 @@ class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
 // BinOpMI_F - Instructions like "cmp [mem], imm".
 class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
                 SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
-  : BinOpMI<mnemonic, typeinfo, f, 
+  : BinOpMI<mnemonic, typeinfo, f,
             [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
                                                typeinfo.ImmOperator:$src))],
             opcode>;
@@ -877,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
                Format f, list<dag> pattern>
   : ITy<0x82, f, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+    Sched<[WriteALULd, WriteRMW]> {
   let ImmT = Imm8; // Always 8-bit immediate.
 }
 
@@ -909,10 +965,11 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Register areg, string operands>
   : ITy<opcode, RawFrm, typeinfo,
         (outs), (ins typeinfo.ImmOperand:$src),
-        mnemonic, operands, []> {
+        mnemonic, operands, []>, Sched<[WriteALU]> {
   let ImmT = typeinfo.ImmEncoding;
   let Uses = [areg];
   let Defs = [areg];
+  let hasSideEffects = 0;
 }
 
 /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
@@ -928,61 +985,61 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
     let Constraints = "$src1 = $dst" in {
       let isCommutable = CommutableRR,
           isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
-        def #NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
-        def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
-        def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
-        def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+        def NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+        def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+        def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+        def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
       } // isCommutable
 
-      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
-      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
-      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
-      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+      def NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
 
-      def #NAME#8rm   : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
-      def #NAME#16rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
-      def #NAME#32rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
-      def #NAME#64rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+      def NAME#8rm   : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+      def NAME#16rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+      def NAME#32rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+      def NAME#64rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
 
       let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
         // NOTE: These are order specific, we want the ri8 forms to be listed
         // first so that they are slightly preferred to the ri forms.
-        def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
-        def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
-        def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
-
-        def #NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
-        def #NAME#16ri  : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
-        def #NAME#32ri  : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
-        def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+        def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+        def NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+        def NAME#16ri  : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def NAME#32ri  : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
       }
     } // Constraints = "$src1 = $dst"
 
-    def #NAME#8mr    : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
-    def #NAME#16mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
-    def #NAME#32mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
-    def #NAME#64mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+    def NAME#8mr    : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+    def NAME#16mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+    def NAME#32mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+    def NAME#64mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
 
     // NOTE: These are order specific, we want the mi8 forms to be listed
     // first so that they are slightly preferred to the mi forms.
-    def #NAME#16mi8  : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
-    def #NAME#32mi8  : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
-    def #NAME#64mi8  : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
-                       
-    def #NAME#8mi    : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
-    def #NAME#16mi   : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
-    def #NAME#32mi   : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
-    def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
-
-    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
-                              "{$src, %al|AL, $src}">;
-    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
-                              "{$src, %ax|AX, $src}">;
-    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
-                              "{$src, %eax|EAX, $src}">;
-    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
-                              "{$src, %rax|RAX, $src}">;
-  }                          
+    def NAME#16mi8  : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def NAME#32mi8  : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def NAME#64mi8  : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+    def NAME#8mi    : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+    def NAME#16mi   : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def NAME#32mi   : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+    def NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+                             "{$src, %al|AL, $src}">;
+    def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+                             "{$src, %ax|AX, $src}">;
+    def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+                             "{$src, %eax|EAX, $src}">;
+    def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+                             "{$src, %rax|RAX, $src}">;
+  }
 }
 
 /// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
@@ -999,61 +1056,61 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
     let Constraints = "$src1 = $dst" in {
       let isCommutable = CommutableRR,
           isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
-        def #NAME#8rr  : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
-        def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
-        def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
-        def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+        def NAME#8rr  : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+        def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+        def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+        def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
       } // isCommutable
 
-      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
-      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
-      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
-      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+      def NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
 
-      def #NAME#8rm   : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
-      def #NAME#16rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
-      def #NAME#32rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
-      def #NAME#64rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+      def NAME#8rm   : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+      def NAME#16rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+      def NAME#32rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+      def NAME#64rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
 
       let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
         // NOTE: These are order specific, we want the ri8 forms to be listed
         // first so that they are slightly preferred to the ri forms.
-        def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
-        def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
-        def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
-
-        def #NAME#8ri   : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
-        def #NAME#16ri  : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
-        def #NAME#32ri  : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
-        def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+        def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+        def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+        def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+        def NAME#8ri   : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+        def NAME#16ri  : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+        def NAME#32ri  : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+        def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
       }
     } // Constraints = "$src1 = $dst"
 
-    def #NAME#8mr    : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
-    def #NAME#16mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
-    def #NAME#32mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
-    def #NAME#64mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+    def NAME#8mr    : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+    def NAME#16mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+    def NAME#32mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+    def NAME#64mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
 
     // NOTE: These are order specific, we want the mi8 forms to be listed
     // first so that they are slightly preferred to the mi forms.
-    def #NAME#16mi8  : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
-    def #NAME#32mi8  : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
-    def #NAME#64mi8  : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
-                       
-    def #NAME#8mi    : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
-    def #NAME#16mi   : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
-    def #NAME#32mi   : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
-    def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
-
-    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
-                              "{$src, %al|AL, $src}">;
-    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
-                              "{$src, %ax|AX, $src}">;
-    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
-                              "{$src, %eax|EAX, $src}">;
-    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, 
-                              "{$src, %rax|RAX, $src}">;
-  }                          
+    def NAME#16mi8  : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def NAME#32mi8  : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def NAME#64mi8  : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+    def NAME#8mi    : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+    def NAME#16mi   : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def NAME#32mi   : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+    def NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+                             "{$src, %al|AL, $src}">;
+    def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+                             "{$src, %ax|AX, $src}">;
+    def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+                             "{$src, %eax|EAX, $src}">;
+    def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+                             "{$src, %rax|RAX, $src}">;
+  }
 }
 
 /// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
@@ -1067,60 +1124,60 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
   let Defs = [EFLAGS] in {
     let isCommutable = CommutableRR,
         isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
-      def #NAME#8rr  : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
-      def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
-      def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
-      def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+      def NAME#8rr  : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+      def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+      def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+      def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
     } // isCommutable
 
-    def #NAME#8rr_REV  : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
-    def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
-    def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
-    def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
+    def NAME#8rr_REV  : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+    def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+    def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+    def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
 
-    def #NAME#8rm   : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
-    def #NAME#16rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
-    def #NAME#32rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
-    def #NAME#64rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+    def NAME#8rm   : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+    def NAME#16rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+    def NAME#32rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+    def NAME#64rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
 
     let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
       // NOTE: These are order specific, we want the ri8 forms to be listed
       // first so that they are slightly preferred to the ri forms.
-      def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
-      def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
-      def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
-      
-      def #NAME#8ri   : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
-      def #NAME#16ri  : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
-      def #NAME#32ri  : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
-      def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+      def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+      def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+      def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+      def NAME#8ri   : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+      def NAME#16ri  : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+      def NAME#32ri  : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+      def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
     }
 
-    def #NAME#8mr    : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
-    def #NAME#16mr   : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
-    def #NAME#32mr   : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
-    def #NAME#64mr   : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+    def NAME#8mr    : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+    def NAME#16mr   : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+    def NAME#32mr   : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+    def NAME#64mr   : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
 
     // NOTE: These are order specific, we want the mi8 forms to be listed
     // first so that they are slightly preferred to the mi forms.
-    def #NAME#16mi8  : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
-    def #NAME#32mi8  : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
-    def #NAME#64mi8  : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
-                       
-    def #NAME#8mi    : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
-    def #NAME#16mi   : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
-    def #NAME#32mi   : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
-    def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
-
-    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
-                              "{$src, %al|AL, $src}">;
-    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
-                              "{$src, %ax|AX, $src}">;
-    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
-                              "{$src, %eax|EAX, $src}">;
-    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
-                              "{$src, %rax|RAX, $src}">;
-  }                          
+    def NAME#16mi8  : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+    def NAME#32mi8  : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+    def NAME#64mi8  : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+    def NAME#8mi    : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+    def NAME#16mi   : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+    def NAME#32mi   : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+    def NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+    def NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
+                             "{$src, %al|AL, $src}">;
+    def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
+                             "{$src, %ax|AX, $src}">;
+    def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
+                             "{$src, %eax|EAX, $src}">;
+    def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
+                             "{$src, %rax|RAX, $src}">;
+  }
 }
 
 
@@ -1180,7 +1237,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
   def TEST16mi   : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
   def TEST32mi   : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
   def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
-                     
+
   def TEST8i8    : BinOpAI<0xA8, "test", Xi8 , AL,
                            "{$src, %al|AL, $src}">;
   def TEST16i16  : BinOpAI<0xA8, "test", Xi16, AX,
@@ -1194,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
   // register class is constrained to GR8_NOREX.
   let isPseudo = 1 in
   def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
-                        "", [], IIC_BIN_NONMEM>;
+                        "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1204,12 +1261,13 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
                     PatFrag ld_frag> {
   def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))],
-            IIC_BIN_NONMEM>;
+            [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
+            IIC_BIN_NONMEM>, Sched<[WriteALU]>;
   def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [(set RC:$dst, EFLAGS,
-             (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+             (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+           Sched<[WriteALULd, ReadAfterLd]>;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1217,6 +1275,17 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
   defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
 }
 
+let Predicates = [HasBMI] in {
+  def : Pat<(and (not GR32:$src1), GR32:$src2),
+            (ANDN32rr GR32:$src1, GR32:$src2)>;
+  def : Pat<(and (not GR64:$src1), GR64:$src2),
+            (ANDN64rr GR64:$src1, GR64:$src2)>;
+  def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+            (ANDN32rm GR32:$src1, addr:$src2)>;
+  def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+            (ANDN64rm GR64:$src1, addr:$src2)>;
+}
+
 //===----------------------------------------------------------------------===//
 // MULX Instruction
 //
@@ -1225,12 +1294,12 @@ let neverHasSideEffects = 1 in {
   let isCommutable = 1 in
   def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
              !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-             [], IIC_MUL8>, T8XD, VEX_4V;
+             [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>;
 
   let mayLoad = 1 in
   def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
              !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-             [], IIC_MUL8>, T8XD, VEX_4V;
+             [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>;
 }
 }
 
@@ -1240,3 +1309,53 @@ let Predicates = [HasBMI2] in {
   let Uses = [RDX] in
     defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
 }
+
+//===----------------------------------------------------------------------===//
+// ADCX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+  let SchedRW = [WriteALU] in {
+  def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+             "adcx{l}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_NONMEM>, T8, OpSize;
+
+  def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+             "adcx{q}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+  } // SchedRW
+
+  let mayLoad = 1, SchedRW = [WriteALULd] in {
+  def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+             "adcx{l}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_MEM>, T8, OpSize;
+ 
+  def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+             "adcx{q}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// ADOX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+  let SchedRW = [WriteALU] in {
+  def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+             "adox{l}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_NONMEM>, T8XS;
+
+  def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+             "adox{q}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+  } // SchedRW
+
+  let mayLoad = 1, SchedRW = [WriteALULd] in {
+  def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+             "adox{l}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_MEM>, T8XS;
+ 
+  def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+             "adox{q}\t{$src, $dst|$dst, $src}",
+             [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+  }
+}
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index adeaf5410dcc..a967a4da5cf7 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -16,20 +16,20 @@
 // SetCC instructions.
 multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
   let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
-      isCommutable = 1 in {
-    def #NAME#16rr
+      isCommutable = 1, SchedRW = [WriteALU] in {
+    def NAME#16rr
       : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
           [(set GR16:$dst,
                 (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))],
                 IIC_CMOV16_RR>,TB,OpSize;
-    def #NAME#32rr
+    def NAME#32rr
       : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
           !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
           [(set GR32:$dst,
                 (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))],
                 IIC_CMOV32_RR>, TB;
-    def #NAME#64rr
+    def NAME#64rr
       :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
           !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
           [(set GR64:$dst,
@@ -37,19 +37,20 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
                 IIC_CMOV32_RR>, TB;
   }
 
-  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
-    def #NAME#16rm
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      SchedRW = [WriteALULd, ReadAfterLd] in {
+    def NAME#16rm
       : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
           [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
                                     CondNode, EFLAGS))], IIC_CMOV16_RM>,
                                     TB, OpSize;
-    def #NAME#32rm
+    def NAME#32rm
       : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
           !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
           [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                     CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
-    def #NAME#64rm
+    def NAME#64rm
       :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
           !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
           [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
@@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
     def r    : I<opc, MRM0r,  (outs GR8:$dst), (ins),
                      !strconcat(Mnemonic, "\t$dst"),
                      [(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
-                     IIC_SET_R>, TB;
+                     IIC_SET_R>, TB, Sched<[WriteALU]>;
     def m    : I<opc, MRM0m,  (outs), (ins i8mem:$dst),
                      !strconcat(Mnemonic, "\t$dst"),
                      [(store (X86setcc OpNode, EFLAGS), addr:$dst)],
-                     IIC_SET_M>, TB;
+                     IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
   } // Uses = [EFLAGS]
 }
 
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 9e6f27988f71..d9ff0c63c55f 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -149,11 +149,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
 //===----------------------------------------------------------------------===//
 // EH Pseudo Instructions
 //
+let SchedRW = [WriteSystem] in {
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
                     "ret\t#eh_return, addr: $addr",
-                    [(X86ehret GR32:$addr)], IIC_RET>;
+                    [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
 
 }
 
@@ -161,7 +162,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
                      "ret\t#eh_return, addr: $addr",
-                     [(X86ehret GR64:$addr)], IIC_RET>;
+                     [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
 
 }
 
@@ -186,6 +187,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
                           Requires<[In64BitMode]>;
   }
 }
+} // SchedRW
 
 let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
   def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
@@ -220,7 +222,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in {
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
-                 [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
+                 [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
 // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
 // encoding and avoids a partial-register update sometimes, but doing so
@@ -229,11 +231,12 @@ def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
 // to an MCInst.
 def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
                  "",
-                 [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
+                 [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+                 Sched<[WriteZero]>;
 
 // FIXME: Set encoding to pseudo.
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
+                 [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 }
 
 // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -245,7 +248,7 @@ def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
 let Defs = [EFLAGS], isCodeGenOnly=1,
     AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
+                 [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -254,10 +257,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
                         "", [(set GR64:$dst, i64immZExt32:$src)],
-                        IIC_ALU_NONMEM>;
+                        IIC_ALU_NONMEM>, Sched<[WriteALU]>;
 
 // Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
 // FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
 // However, Pat<> can't replicate the destination reg into the inputs of the
 // result.
@@ -320,6 +323,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
 //===----------------------------------------------------------------------===//
 // String Pseudo Instructions
 //
+let SchedRW = [WriteMicrocoded] in {
 let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
 def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
                     [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
@@ -382,6 +386,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
                       [(X86rep_stos i64)], IIC_REP_STOS>, REP,
                      Requires<[In64BitMode]>;
 }
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
@@ -513,18 +518,22 @@ def CMOV_RFP80 : I<0, Pseudo,
 
 multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
   let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
-    def #NAME#8  : I<0, Pseudo, (outs GR8:$dst),
-                     (ins i8mem:$ptr, GR8:$val),
-                     !strconcat(mnemonic, "8 PSEUDO!"), []>;
-    def #NAME#16 : I<0, Pseudo,(outs GR16:$dst),
-                     (ins i16mem:$ptr, GR16:$val),
-                     !strconcat(mnemonic, "16 PSEUDO!"), []>;
-    def #NAME#32 : I<0, Pseudo, (outs GR32:$dst),
-                     (ins i32mem:$ptr, GR32:$val),
-                     !strconcat(mnemonic, "32 PSEUDO!"), []>;
-    def #NAME#64 : I<0, Pseudo, (outs GR64:$dst),
-                     (ins i64mem:$ptr, GR64:$val),
-                     !strconcat(mnemonic, "64 PSEUDO!"), []>;
+    let Defs = [EFLAGS, AL] in
+    def NAME#8  : I<0, Pseudo, (outs GR8:$dst),
+                    (ins i8mem:$ptr, GR8:$val),
+                    !strconcat(mnemonic, "8 PSEUDO!"), []>;
+    let Defs = [EFLAGS, AX] in
+    def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
+                    (ins i16mem:$ptr, GR16:$val),
+                    !strconcat(mnemonic, "16 PSEUDO!"), []>;
+    let Defs = [EFLAGS, EAX] in
+    def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
+                    (ins i32mem:$ptr, GR32:$val),
+                    !strconcat(mnemonic, "32 PSEUDO!"), []>;
+    let Defs = [EFLAGS, RAX] in
+    def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
+                    (ins i64mem:$ptr, GR64:$val),
+                    !strconcat(mnemonic, "64 PSEUDO!"), []>;
   }
 }
 
@@ -559,10 +568,11 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
 
 multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
-  let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in
-    def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
-                       (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
-                       !strconcat(mnemonic, "6432 PSEUDO!"), []>;
+  let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
+      mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
+    def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                      (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+                      !strconcat(mnemonic, "6432 PSEUDO!"), []>;
 }
 
 defm ATOMAND  : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
@@ -589,12 +599,13 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
 let isCodeGenOnly = 1, Defs = [EFLAGS] in
 def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
                       "or{l}\t{$zero, $dst|$dst, $zero}",
-                      [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
+                      [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK,
+                    Sched<[WriteALULd, WriteRMW]>;
 
 let hasSideEffects = 1 in
 def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
                      "#MEMBARRIER",
-                     [(X86MemBarrier)]>;
+                     [(X86MemBarrier)]>, Sched<[WriteLoad]>;
 
 // RegOpc corresponds to the mr version of the instruction
 // ImmOpc corresponds to the mi version of the instruction
@@ -602,79 +613,80 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
 // ImmMod corresponds to the instruction format of the mi and mi8 versions
 multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
                            Format ImmMod, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-
-def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
-                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
-                   MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
-                   !strconcat(mnemonic, "{b}\t",
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+    SchedRW = [WriteALULd, WriteRMW] in {
+
+def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                  RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+                  MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+                  !strconcat(mnemonic, "{b}\t",
+                             "{$src2, $dst|$dst, $src2}"),
+                  [], IIC_ALU_NONMEM>, LOCK;
+def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+                   MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                   !strconcat(mnemonic, "{w}\t",
+                              "{$src2, $dst|$dst, $src2}"),
+                   [], IIC_ALU_NONMEM>, OpSize, LOCK;
+def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+                   MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                   !strconcat(mnemonic, "{l}\t",
                               "{$src2, $dst|$dst, $src2}"),
                    [], IIC_ALU_NONMEM>, LOCK;
-def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
-                    MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    !strconcat(mnemonic, "{w}\t",
-                               "{$src2, $dst|$dst, $src2}"),
-                    [], IIC_ALU_NONMEM>, OpSize, LOCK;
-def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
-                    RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
-                    MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    !strconcat(mnemonic, "{l}\t",
+                    MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    !strconcat(mnemonic, "{q}\t",
                                "{$src2, $dst|$dst, $src2}"),
                     [], IIC_ALU_NONMEM>, LOCK;
-def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
-                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
-                     MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                     !strconcat(mnemonic, "{q}\t",
-                                "{$src2, $dst|$dst, $src2}"),
-                     [], IIC_ALU_NONMEM>, LOCK;
-
-def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
-                     ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
-                     ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
-                     !strconcat(mnemonic, "{b}\t",
-                                "{$src2, $dst|$dst, $src2}"),
-                     [], IIC_ALU_MEM>, LOCK;
-
-def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
-                       ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
-                       ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
-                       !strconcat(mnemonic, "{w}\t",
-                                  "{$src2, $dst|$dst, $src2}"),
-                       [], IIC_ALU_MEM>, OpSize, LOCK;
-
-def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
-                       ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
-                       ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
-                       !strconcat(mnemonic, "{l}\t",
-                                  "{$src2, $dst|$dst, $src2}"),
-                       [], IIC_ALU_MEM>, LOCK;
-
-def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
-                          ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
-                          ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
-                          !strconcat(mnemonic, "{q}\t",
-                                     "{$src2, $dst|$dst, $src2}"),
-                          [], IIC_ALU_MEM>, LOCK;
 
-def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
-                       ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
-                       ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
-                       !strconcat(mnemonic, "{w}\t",
-                                  "{$src2, $dst|$dst, $src2}"),
-                       [], IIC_ALU_MEM>, OpSize, LOCK;
-def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                    ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+                    ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+                    !strconcat(mnemonic, "{b}\t",
+                               "{$src2, $dst|$dst, $src2}"),
+                    [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+                      ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+                      !strconcat(mnemonic, "{w}\t",
+                                 "{$src2, $dst|$dst, $src2}"),
+                      [], IIC_ALU_MEM>, OpSize, LOCK;
+
+def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+                      ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+                      !strconcat(mnemonic, "{l}\t",
+                                 "{$src2, $dst|$dst, $src2}"),
+                      [], IIC_ALU_MEM>, LOCK;
+
+def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+                         ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+                         ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+                         !strconcat(mnemonic, "{q}\t",
+                                    "{$src2, $dst|$dst, $src2}"),
+                         [], IIC_ALU_MEM>, LOCK;
+
+def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                      ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+                      !strconcat(mnemonic, "{w}\t",
+                                 "{$src2, $dst|$dst, $src2}"),
+                      [], IIC_ALU_MEM>, OpSize, LOCK;
+def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+                      ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+                      !strconcat(mnemonic, "{l}\t",
+                                 "{$src2, $dst|$dst, $src2}"),
+                      [], IIC_ALU_MEM>, LOCK;
+def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
-                       ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
-                       !strconcat(mnemonic, "{l}\t",
+                       ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+                       !strconcat(mnemonic, "{q}\t",
                                   "{$src2, $dst|$dst, $src2}"),
                        [], IIC_ALU_MEM>, LOCK;
-def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
-                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
-                        ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
-                        !strconcat(mnemonic, "{q}\t",
-                                   "{$src2, $dst|$dst, $src2}"),
-                        [], IIC_ALU_MEM>, LOCK;
 
 }
 
@@ -689,20 +701,21 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">;
 // Optimized codegen when the non-memory output is not used.
 multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-
-def #NAME#8m  : I<Opc8, Form, (outs), (ins i8mem :$dst),
-                  !strconcat(mnemonic, "{b}\t$dst"),
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+    SchedRW = [WriteALULd, WriteRMW] in {
+
+def NAME#8m  : I<Opc8, Form, (outs), (ins i8mem :$dst),
+                 !strconcat(mnemonic, "{b}\t$dst"),
+                 [], IIC_UNARY_MEM>, LOCK;
+def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
+                 !strconcat(mnemonic, "{w}\t$dst"),
+                 [], IIC_UNARY_MEM>, OpSize, LOCK;
+def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
+                 !strconcat(mnemonic, "{l}\t$dst"),
+                 [], IIC_UNARY_MEM>, LOCK;
+def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
+                  !strconcat(mnemonic, "{q}\t$dst"),
                   [], IIC_UNARY_MEM>, LOCK;
-def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
-                  !strconcat(mnemonic, "{w}\t$dst"),
-                  [], IIC_UNARY_MEM>, OpSize, LOCK;
-def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
-                  !strconcat(mnemonic, "{l}\t$dst"),
-                  [], IIC_UNARY_MEM>, LOCK;
-def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
-                   !strconcat(mnemonic, "{q}\t$dst"),
-                   [], IIC_UNARY_MEM>, LOCK;
 }
 }
 
@@ -714,43 +727,44 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
                          SDPatternOperator frag, X86MemOperand x86memop,
                          InstrItinClass itin> {
 let isCodeGenOnly = 1 in {
-  def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr),
-                 !strconcat(mnemonic, "\t$ptr"),
-                 [(frag addr:$ptr)], itin>, TB, LOCK;
+  def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
+               !strconcat(mnemonic, "\t$ptr"),
+               [(frag addr:$ptr)], itin>, TB, LOCK;
 }
 }
 
 multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
                           string mnemonic, SDPatternOperator frag,
                           InstrItinClass itin8, InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in {
   let Defs = [AL, EFLAGS], Uses = [AL] in
-  def #NAME#8  : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
-                   !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
-                   [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
+  def NAME#8  : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
+                  !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
+                  [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK;
   let Defs = [AX, EFLAGS], Uses = [AX] in
-  def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
-                   !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
-                   [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
+  def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
+                  !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
+                  [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK;
   let Defs = [EAX, EFLAGS], Uses = [EAX] in
-  def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
-                   !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
-                   [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
+  def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
+                  !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
+                  [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK;
   let Defs = [RAX, EFLAGS], Uses = [RAX] in
-  def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
-                    !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
-                    [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
+  def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
+                   !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
+                   [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK;
 }
 }
 
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+    SchedRW = [WriteALULd, WriteRMW] in {
 defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
                                 X86cas8, i64mem,
                                 IIC_CMPX_LOCK_8B>;
 }
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
-    Predicates = [HasCmpxchg16b] in {
+    Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in {
 defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
                                  X86cas16, i128mem,
                                  IIC_CMPX_LOCK_16B>, REX_W;
@@ -763,34 +777,35 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg",
 multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
                              string frag,
                              InstrItinClass itin8, InstrItinClass itin> {
-  let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
-    def #NAME#8  : I<opc8, MRMSrcMem, (outs GR8:$dst),
-                     (ins GR8:$val, i8mem:$ptr),
-                     !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
-                     [(set GR8:$dst,
-                           (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
-                     itin8>;
-    def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
-                     (ins GR16:$val, i16mem:$ptr),
-                     !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+  let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1,
+      SchedRW = [WriteALULd, WriteRMW] in {
+    def NAME#8  : I<opc8, MRMSrcMem, (outs GR8:$dst),
+                    (ins GR8:$val, i8mem:$ptr),
+                    !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+                    [(set GR8:$dst,
+                          (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+                    itin8>;
+    def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
+                    (ins GR16:$val, i16mem:$ptr),
+                    !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+                    [(set
+                       GR16:$dst,
+                       (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+                    itin>, OpSize;
+    def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
+                    (ins GR32:$val, i32mem:$ptr),
+                    !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+                    [(set
+                       GR32:$dst,
+                       (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+                    itin>;
+    def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
+                     (ins GR64:$val, i64mem:$ptr),
+                     !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
                      [(set
-                        GR16:$dst,
-                        (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
-                     itin>, OpSize;
-    def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
-                     (ins GR32:$val, i32mem:$ptr),
-                     !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
-                     [(set
-                        GR32:$dst,
-                        (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+                        GR64:$dst,
+                        (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
                      itin>;
-    def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
-                      (ins GR64:$val, i64mem:$ptr),
-                      !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
-                      [(set
-                         GR64:$dst,
-                         (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
-                      itin>;
   }
 }
 
@@ -985,9 +1000,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
 // This corresponds to add $foo@tpoff, %rax
 def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
           (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
-          (MOV64rm tglobaltlsaddr :$dst)>;
 
 
 // Direct PC relative function call for small code model. 32-bit displacement
@@ -1076,12 +1088,14 @@ def : Pat<(X86cmp GR64:$src1, 0),
 // inverted.
 multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
                   Instruction Inst64> {
-  def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
-            (Inst16 GR16:$src2, addr:$src1)>;
-  def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
-            (Inst32 GR32:$src2, addr:$src1)>;
-  def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
-            (Inst64 GR64:$src2, addr:$src1)>;
+  let Predicates = [HasCMov] in {
+    def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+              (Inst16 GR16:$src2, addr:$src1)>;
+    def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+              (Inst32 GR32:$src2, addr:$src1)>;
+    def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+              (Inst64 GR64:$src2, addr:$src1)>;
+  }
 }
 
 defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
@@ -1185,7 +1199,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
 
 
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
+// Try this before the selecting to OR.
+let AddedComplexity = 5, SchedRW = [WriteALU] in {
 
 let isConvertibleToThreeAddress = 1,
     Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
@@ -1232,7 +1247,7 @@ def ADD64ri32_DB : I<0, Pseudo,
                       [(set GR64:$dst, (or_is_add GR64:$src1,
                                                   i64immSExt32:$src2))]>;
 }
-} // AddedComplexity
+} // AddedComplexity, SchedRW
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index bfe954114c55..0e696513d47c 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -20,7 +20,7 @@
 // The X86retflag return instructions are variadic because we may add ST0 and
 // ST1 arguments when returning values on the x87 stack.
 let isTerminator = 1, isReturn = 1, isBarrier = 1,
-    hasCtrlDep = 1, FPForm = SpecialFP in {
+    hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
   def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
                     "ret",
                     [(X86retflag 0)], IIC_RET>;
@@ -46,7 +46,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
 }
 
 // Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
   def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
                         "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
   def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
@@ -58,7 +58,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
 }
 
 // Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
   multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
     def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
                        IIC_Jcc>;
@@ -85,7 +85,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
 defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
 
 // jcx/jecx/jrcx instructions.
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
   // These are the 32-bit versions of this instruction for the asmparser.  In
   // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
   // jecxz.
@@ -110,36 +110,46 @@ let isBranch = 1, isTerminator = 1 in {
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
-                     [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
+                     [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>,
+                   Sched<[WriteJump]>;
   def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
-                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
+                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
+                   Requires<[In32BitMode]>, Sched<[WriteJumpLd]>;
 
   def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
-                     [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
+                     [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
+                   Sched<[WriteJump]>;
   def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
-                     [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>;
+                     [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
+                   Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
 
   def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
                           (ins i16imm:$off, i16imm:$seg),
-                          "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize;
+                          "ljmp{w}\t{$seg, $off|$off, $seg}", [],
+                          IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>;
   def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
                           (ins i32imm:$off, i16imm:$seg),
-                          "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
+                          "ljmp{l}\t{$seg, $off|$off, $seg}", [],
+                          IIC_JMP_FAR_PTR>, Sched<[WriteJump]>;
   def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+                   Sched<[WriteJump]>;
 
   def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
-                     "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
+                     "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize,
+                   Sched<[WriteJumpLd]>;
   def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
-                     "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+                     "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+                   Sched<[WriteJumpLd]>;
 }
 
 
 // Loop instructions
-
+let SchedRW = [WriteJump] in {
 def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
 def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
 def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
+}
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -152,27 +162,32 @@ let isCall = 1 in
   let Uses = [ESP] in {
     def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
                            (outs), (ins i32imm_pcrel:$dst),
-                           "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
+                           "call{l}\t$dst", [], IIC_CALL_RI>,
+                      Requires<[In32BitMode]>, Sched<[WriteJump]>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
                         "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
-                         Requires<[In32BitMode]>;
+                      Requires<[In32BitMode]>, Sched<[WriteJump]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
-                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
-                        Requires<[In32BitMode]>;
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
+                        IIC_CALL_MEM>,
+                      Requires<[In32BitMode,FavorMemIndirectCall]>,
+                      Sched<[WriteJumpLd]>;
 
     def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
                              (ins i16imm:$off, i16imm:$seg),
                              "lcall{w}\t{$seg, $off|$off, $seg}", [],
-                             IIC_CALL_FAR_PTR>, OpSize;
+                             IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>;
     def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
                              (ins i32imm:$off, i16imm:$seg),
                              "lcall{l}\t{$seg, $off|$off, $seg}", [],
-                             IIC_CALL_FAR_PTR>;
+                             IIC_CALL_FAR_PTR>, Sched<[WriteJump]>;
 
     def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
-                        "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize;
+                        "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize,
+                      Sched<[WriteJumpLd]>;
     def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
-                        "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+                        "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>,
+                      Sched<[WriteJumpLd]>;
 
     // callw for 16 bit code for the assembler.
     let isAsmParserOnly = 1 in
@@ -185,7 +200,7 @@ let isCall = 1 in
 // Tail call stuff.
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
   let Uses = [ESP] in {
   def TCRETURNdi : PseudoI<(outs),
                      (ins i32imm_pcrel:$dst, i32imm:$offset), []>;
@@ -216,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
 // RSP is marked as a use to prevent stack-pointer assignments that appear
 // immediately before calls from potentially appearing dead. Uses for argument
 // registers are added manually.
-let isCall = 1, Uses = [RSP] in {
+let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
   // NOTE: this pattern doesn't match "X86call imm", because we do not know
   // that the offset between an arbitrary immediate and the call will fit in
   // the 32-bit pcrel field that we have.
@@ -231,7 +246,7 @@ let isCall = 1, Uses = [RSP] in {
   def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                         "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
                         IIC_CALL_MEM>,
-                      Requires<[In64BitMode]>;
+                      Requires<[In64BitMode,FavorMemIndirectCall]>;
 
   def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
                        "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
@@ -245,13 +260,12 @@ let isCall = 1, isCodeGenOnly = 1 in
     def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
                       (outs), (ins i64i32imm_pcrel:$dst),
                       "call{q}\t$dst", [], IIC_CALL_RI>,
-                    Requires<[IsWin64]>;
+                    Requires<[IsWin64]>, Sched<[WriteJump]>;
   }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in
-  let Uses = [RSP],
-      usesCustomInserter = 1 in {
+    isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
+    SchedRW = [WriteJump] in {
   def TCRETURNdi64 : PseudoI<(outs),
                       (ins i64i32imm_pcrel:$dst, i32imm:$offset),
                       []>;
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2eb454ded21b..6dc7175357b3 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in {
 let neverHasSideEffects = 1 in {
 def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
                    "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
                    "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALULd]>;
 } // neverHasSideEffects = 1
 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+                   [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+                   Sched<[WriteALU]>;
 def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+                   Sched<[WriteALULd]>;
 def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+                   [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+                   Sched<[WriteALU]>;
 def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
-                   TB;
+                   TB, Sched<[WriteALULd]>;
 
 let neverHasSideEffects = 1 in {
 def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
                    "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
                    "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
-                   TB, OpSize;
+                   TB, OpSize, Sched<[WriteALULd]>;
 } // neverHasSideEffects = 1
 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+                   [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALULd]>;
 def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+                   [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
-                   TB;
+                   TB, Sched<[WriteALULd]>;
 
 // These are the same as the regular MOVZX32rr8 and MOVZX32rm8
 // except that they use GR32_NOREX for the output operand register class
@@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
                          (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                         [], IIC_MOVZX>, TB;
+                         [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
 let mayLoad = 1 in
 def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
                          (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                         [], IIC_MOVZX>, TB;
+                         [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
 }
 
 // MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -106,38 +112,42 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
 // were generalized, this would require a special register class.
 def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
                     "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+                    [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+                    Sched<[WriteALU]>;
 def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
                     "movs{bq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
-                    TB;
+                    TB, Sched<[WriteALULd]>;
 def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
                     "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+                    [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+                    Sched<[WriteALU]>;
 def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                     "movs{wq|x}\t{$src, $dst|$dst, $src}",
                     [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
-                    TB;
+                    TB, Sched<[WriteALULd]>;
 def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
+                    [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+                    Sched<[WriteALU]>;
 def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+                    Sched<[WriteALULd]>;
 
 // movzbq and movzwq encodings for the disassembler
 def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
                        "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALU]>;
 def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
                        "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALULd]>;
 def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
                        "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALU]>;
 def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                        "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
-                       TB;
+                       TB, Sched<[WriteALULd]>;
 
 // FIXME: These should be Pat patterns.
 let isCodeGenOnly = 1 in {
@@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in {
 // Use movzbl instead of movzbq when the destination is a register; it's
 // equivalent due to implicit zero-extending, and it has a smaller encoding.
 def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                   "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+                   "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
                    "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
-                   TB;
+                   TB, Sched<[WriteALULd]>;
 // Use movzwl instead of movzwq when the destination is a register; it's
 // equivalent due to implicit zero-extending, and it has a smaller encoding.
 def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                   "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+                   "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+                   Sched<[WriteALU]>;
 def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                    "", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
-                   IIC_MOVZX>, TB;
+                   IIC_MOVZX>, TB, Sched<[WriteALULd]>;
 
 // There's no movzlq instruction, but movl can be used for this purpose, using
 // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
 // necessarily all zero. In such cases, we fall back to these explicit zext
 // instructions.
 def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                    "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
+                    "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+                    Sched<[WriteALU]>;
 def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     "", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
-                    IIC_MOVZX>;
+                    IIC_MOVZX>, Sched<[WriteALULd]>;
 }
 
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 959d91a9ab6b..7759a8a2dabb 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -60,14 +60,14 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                        PatFrag MemFrag128, PatFrag MemFrag256,
                        SDNode Op, ValueType OpTy128, ValueType OpTy256> {
   defm r213 : fma3p_rm<opc213,
-                       !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+                       !strconcat(OpcodeStr, "213", PackTy),
                        MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
 let neverHasSideEffects = 1 in {
   defm r132 : fma3p_rm<opc132,
-                       !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+                       !strconcat(OpcodeStr, "132", PackTy),
                        MemFrag128, MemFrag256, OpTy128, OpTy256>;
   defm r231 : fma3p_rm<opc231,
-                       !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+                       !strconcat(OpcodeStr, "231", PackTy),
                        MemFrag128, MemFrag256, OpTy128, OpTy256>;
 } // neverHasSideEffects = 1
 }
@@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                        X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
                        ComplexPattern mem_cpat> {
 let neverHasSideEffects = 1 in {
-  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
                        x86memop, RC, OpVT, mem_frag>;
-  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
                        x86memop, RC, OpVT, mem_frag>;
 }
 
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
                      x86memop, RC, OpVT, mem_frag, OpNode>,
-            fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+            fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
                          memop, mem_cpat, Int, RC>;
 }
 
@@ -220,7 +220,7 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
            [(set RC:$dst,
              (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
 // For disassembler
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1, hasSideEffects = 0 in
   def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
                (ins RC:$src1, RC:$src2, RC:$src3),
                !strconcat(OpcodeStr,
@@ -294,7 +294,7 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
            [(set VR256:$dst, (OpNode VR256:$src1,
                               (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L;
 // For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
   def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
@@ -307,8 +307,6 @@ let isCodeGenOnly = 1 in {
 } // isCodeGenOnly = 1
 }
 
-let Predicates = [HasFMA4] in {
-
 defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
                   fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
                             int_x86_fma_vfmadd_ss>;
@@ -338,29 +336,33 @@ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
                   fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
                             int_x86_fma_vfnmsub_sd>;
 
-defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
-                          memopv4f32, memopv8f32>;
-defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
-                          memopv2f64, memopv4f64>;
-defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
-                          memopv4f32, memopv8f32>;
-defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
-                          memopv2f64, memopv4f64>;
-defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
-                          memopv4f32, memopv8f32>;
-defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
-                          memopv2f64, memopv4f64>;
-defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
-                          memopv4f32, memopv8f32>;
-defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
-                          memopv2f64, memopv4f64>;
-defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
-                          memopv4f32, memopv8f32>;
-defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
-                          memopv2f64, memopv4f64>;
-defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
-                          memopv4f32, memopv8f32>;
-defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
-                          memopv2f64, memopv4f64>;
-} // HasFMA4
+let ExeDomain = SSEPackedSingle in {
+  defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+                            memopv4f32, memopv8f32>;
+  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+                            memopv4f32, memopv8f32>;
+  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+                            memopv4f32, memopv8f32>;
+  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+                            memopv4f32, memopv8f32>;
+  defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+                            memopv4f32, memopv8f32>;
+  defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+                            memopv4f32, memopv8f32>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+                            memopv2f64, memopv4f64>;
+  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+                            memopv2f64, memopv4f64>;
+  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+                            memopv2f64, memopv4f64>;
+  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+                            memopv2f64, memopv4f64>;
+  defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+                            memopv2f64, memopv4f64>;
+  defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+                            memopv2f64, memopv4f64>;
+}
 
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 568726e08ece..2224a08d59f4 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -422,7 +422,7 @@ def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
 def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
 }
 
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
 def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src",
                     IIC_FLD>;
 def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src",
@@ -436,7 +436,7 @@ def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src",
 def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src",
                     IIC_FILD>;
 }
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst",
                     IIC_FST>;
 def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst",
@@ -481,7 +481,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
                     [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
 } // Predicates = [HasSSE3]
 
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst",
   IIC_FST>;
 def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
@@ -491,6 +491,7 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
 }
 
 // FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
 def LD_Frr   : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op",
                    IIC_FLD>, D9;
 def ST_Frr   : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op",
@@ -499,6 +500,7 @@ def ST_FPrr  : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op",
                    IIC_FST>, DD;
 def XCH_F    : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op",
                    IIC_FXCH>, D9;
+}
 
 // Floating point constant loads.
 let isReMaterializable = 1 in {
@@ -516,19 +518,23 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
                 [(set RFP80:$dst, fpimm1)]>;
 }
 
+let SchedRW = [WriteZero] in {
 def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9;
 def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9;
-
+}
 
 // Floating point compares.
+let SchedRW = [WriteFAdd] in {
 def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
 def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
 def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
                         [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
 } // Defs = [FPSW]
 
+let SchedRW = [WriteFAdd] in {
 // CC = ST(0) cmp ST(i)
 let Defs = [EFLAGS, FPSW] in {
 def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
@@ -566,8 +572,10 @@ def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
 def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
                    "fcompi\t$reg", IIC_FCOMI>, DF;
 }
+} // SchedRW
 
 // Floating point flag ops.
+let SchedRW = [WriteALU] in {
 let Defs = [AX], Uses = [FPSW] in
 def FNSTSW16r : I<0xE0, RawFrm,                  // AX = fp flags
                   (outs), (ins), "fnstsw %ax",
@@ -576,23 +584,26 @@ def FNSTSW16r : I<0xE0, RawFrm,                  // AX = fp flags
 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
                   (outs), (ins i16mem:$dst), "fnstcw\t$dst",
                   [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>;
-                  
+} // SchedRW
 let mayLoad = 1 in
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
-                  (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>;
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>,
+                Sched<[WriteLoad]>;
 
 // FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
 let Defs = [FPSW] in
 def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB;
 def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
                 "ffree\t$reg", IIC_FFREE>, DD;
-
 // Clear exceptions
 
 let Defs = [FPSW] in
 def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB;
+} // SchedRW
 
 // Operandless floating-point instructions for the disassembler.
+let SchedRW = [WriteMicrocoded] in {
 def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>;
 
 def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9;
@@ -627,6 +638,7 @@ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
 def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
                   "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W,
                   Requires<[In64BitMode]>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 268e9fc9c017..0ef9491eb7fc 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -45,14 +45,15 @@ def MRM_D0 : Format<45>;
 def MRM_D1 : Format<46>;
 def MRM_D4 : Format<47>;
 def MRM_D5 : Format<48>;
-def MRM_D8 : Format<49>;
-def MRM_D9 : Format<50>;
-def MRM_DA : Format<51>;
-def MRM_DB : Format<52>;
-def MRM_DC : Format<53>;
-def MRM_DD : Format<54>;
-def MRM_DE : Format<55>;
-def MRM_DF : Format<56>;
+def MRM_D6 : Format<49>;
+def MRM_D8 : Format<50>;
+def MRM_D9 : Format<51>;
+def MRM_DA : Format<52>;
+def MRM_DB : Format<53>;
+def MRM_DC : Format<54>;
+def MRM_DD : Format<55>;
+def MRM_DE : Format<56>;
+def MRM_DF : Format<57>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -208,47 +209,47 @@ class PseudoI<dag oops, dag iops, list<dag> pattern>
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
-        list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+        list<dag> pattern, InstrItinClass itin = NoItinerary,
         Domain d = GenericDomain>
   : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+           list<dag> pattern, InstrItinClass itin = NoItinerary,
            Domain d = GenericDomain>
   : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
            : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
@@ -257,12 +258,12 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
 // FPStack Instruction Templates:
 // FPI - Floating Point Instruction template.
 class FPI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          InstrItinClass itin = IIC_DEFAULT>
+          InstrItinClass itin = NoItinerary>
   : I<o, F, outs, ins, asm, [], itin> {}
 
 // FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
 class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
-           InstrItinClass itin = IIC_DEFAULT>
+           InstrItinClass itin = NoItinerary>
   : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
   let FPForm = fp;
   let Pattern = pattern;
@@ -275,14 +276,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
 //   Iseg32 - 16-bit segment selector, 32-bit offset
 
 class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
@@ -292,7 +293,7 @@ def __xs : XS;
 
 // SI - SSE 1 & 2 scalar instructions
 class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-         list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+         list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
             !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -303,7 +304,7 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
 
 // SIi8 - SSE 1 & 2 scalar instructions
 class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
             !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
@@ -350,25 +351,25 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   VPSI  - SSE1 instructions with TB prefix in AVX form.
 
 class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
 class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>;
 class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
         Requires<[UseSSE1]>;
 class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
         Requires<[UseSSE1]>;
 class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
         Requires<[HasAVX]>;
 class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
         Requires<[HasAVX]>;
 
@@ -388,42 +389,42 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //               MMX operands.
 
 class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
 class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>;
 class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>;
 class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
 class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[UseSSE2]>;
 class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[UseSSE2]>;
 class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
         Requires<[HasAVX]>;
 class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
         Requires<[HasAVX]>;
 class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
         OpSize, Requires<[HasAVX]>;
 class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
 class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-                list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+                list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
 
 // SSE3 Instruction Templates:
@@ -433,15 +434,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   S3DI  - SSE3 instructions with XD prefix.
 
 class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
         Requires<[UseSSE3]>;
 class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
         Requires<[UseSSE3]>;
 class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+          list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[UseSSE3]>;
 
@@ -458,19 +459,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
 // classes. They need to be enabled even if AVX is enabled.
 
 class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[UseSSSE3]>;
 class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[UseSSSE3]>;
 class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasSSSE3]>;
 class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasSSSE3]>;
 
@@ -480,11 +481,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
 //
 class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[UseSSE41]>;
 class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[UseSSE41]>;
 
@@ -492,19 +493,19 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 // 
 //   SS428I - SSE 4.2 instructions with T8 prefix.
 class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[UseSSE42]>;
 
 //   SS42FI - SSE 4.2 instructions with T8XD prefix.
 // NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
 class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
 
 //   SS42AI = SSE 4.2 instructions with TA prefix
 class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[UseSSE42]>;
 
@@ -514,11 +515,11 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   AVX8I - AVX instructions with T8 and OpSize prefix.
 //   AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
 class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
         Requires<[HasAVX]>;
 class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
         Requires<[HasAVX]>;
 
@@ -528,11 +529,11 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   AVX28I - AVX2 instructions with T8 and OpSize prefix.
 //   AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
 class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
         Requires<[HasAVX2]>;
 class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+              list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
         Requires<[HasAVX2]>;
 
@@ -541,53 +542,53 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 // AES8I
 // These use the same encoding as the SSE4.2 T8 and TA encodings.
 class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag>pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasAES]>;
 
 class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasAES]>;
 
 // PCLMUL Instruction Templates
 class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+               list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, Requires<[HasPCLMUL]>;
 
 class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-                  list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+                  list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>;
 
 // FMA3 Instruction Templates
 class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag>pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, T8,
-        OpSize, VEX_4V, Requires<[HasFMA]>;
+        OpSize, VEX_4V, FMASC, Requires<[HasFMA]>;
 
 // FMA4 Instruction Templates
 class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
-      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
-        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+           list<dag>pattern, InstrItinClass itin = NoItinerary>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, TA,
+        OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>;
 
 // XOP 2, 3 and 4 Operand Instruction Template
 class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
          XOP, XOP9, Requires<[HasXOP]>;
 
 // XOP 2, 3 and 4 Operand Instruction Templates with imm byte
 class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
          XOP, XOP8, Requires<[HasXOP]>;
 
 //  XOP 5 operand instruction (VEX encoding!)
 class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
 
@@ -595,33 +596,33 @@ class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
 //
 
 class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
-         list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+         list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
 
 class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
 
 // MMX Instruction templates
@@ -635,23 +636,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
 // MMXID  - MMX instructions with XD prefix.
 // MMXIS  - MMX instructions with XS prefix.
 class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
 class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
 class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
 class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
 class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+             list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
 class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
 class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 73ba0011df1b..2a72fb6f7b2a 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
 def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
                                        SDTCisFP<1>, SDTCisVT<3, i8>]>;
 
+def X86umin    : SDNode<"X86ISD::UMIN",      SDTIntBinOp>;
+def X86umax    : SDNode<"X86ISD::UMAX",      SDTIntBinOp>;
+def X86smin    : SDNode<"X86ISD::SMIN",      SDTIntBinOp>;
+def X86smax    : SDNode<"X86ISD::SMAX",      SDTIntBinOp>;
+
 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
 
@@ -128,6 +133,7 @@ def X86vsrai   : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
                                           SDTCisVec<1>,
                                           SDTCisSameAs<2, 1>]>;
+def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
 
@@ -154,7 +160,7 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
 def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
 
-def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -187,9 +193,7 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 
-def X86Blendpw   : SDNode<"X86ISD::BLENDPW",   SDTBlend>;
-def X86Blendps   : SDNode<"X86ISD::BLENDPS",   SDTBlend>;
-def X86Blendpd   : SDNode<"X86ISD::BLENDPD",   SDTBlend>;
+def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
 def X86Fmsub     : SDNode<"X86ISD::FMSUB",     SDTFma>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5a99ff004d48..7ba542c87520 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -17,15 +17,15 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
@@ -297,7 +297,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::DIV32r,      X86::DIV32m,        TB_FOLDED_LOAD },
     { X86::DIV64r,      X86::DIV64m,        TB_FOLDED_LOAD },
     { X86::DIV8r,       X86::DIV8m,         TB_FOLDED_LOAD },
-    { X86::EXTRACTPSrr, X86::EXTRACTPSmr,   TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::EXTRACTPSrr, X86::EXTRACTPSmr,   TB_FOLDED_STORE },
     { X86::FsMOVAPDrr,  X86::MOVSDmr,       TB_FOLDED_STORE | TB_NO_REVERSE },
     { X86::FsMOVAPSrr,  X86::MOVSSmr,       TB_FOLDED_STORE | TB_NO_REVERSE },
     { X86::IDIV16r,     X86::IDIV16m,       TB_FOLDED_LOAD },
@@ -355,7 +355,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD },
     { X86::TEST8ri,     X86::TEST8mi,       TB_FOLDED_LOAD },
     // AVX 128-bit versions of foldable instructions
-    { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE  },
     { X86::FsVMOVAPDrr, X86::VMOVSDmr,      TB_FOLDED_STORE | TB_NO_REVERSE },
     { X86::FsVMOVAPSrr, X86::VMOVSSmr,      TB_FOLDED_STORE | TB_NO_REVERSE },
     { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -467,9 +467,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
     { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        0 },
     { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
-    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int,         TB_ALIGN_16 },
     { X86::SQRTPSr,         X86::SQRTPSm,             TB_ALIGN_16 },
-    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int,         TB_ALIGN_16 },
     { X86::SQRTSDr,         X86::SQRTSDm,             0 },
     { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         0 },
     { X86::SQRTSSr,         X86::SQRTSSm,             0 },
@@ -510,27 +508,25 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 },
     { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         TB_ALIGN_16 },
     { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         TB_ALIGN_16 },
-    { X86::VMOVUPDrr,       X86::VMOVUPDrm,           TB_ALIGN_16 },
+    { X86::VMOVUPDrr,       X86::VMOVUPDrm,           0 },
     { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 },
     { X86::VMOVZDI2PDIrr,   X86::VMOVZDI2PDIrm,       0 },
     { X86::VMOVZQI2PQIrr,   X86::VMOVZQI2PQIrm,       0 },
     { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm,    TB_ALIGN_16 },
-    { X86::VPABSBrr128,     X86::VPABSBrm128,         TB_ALIGN_16 },
-    { X86::VPABSDrr128,     X86::VPABSDrm128,         TB_ALIGN_16 },
-    { X86::VPABSWrr128,     X86::VPABSWrm128,         TB_ALIGN_16 },
-    { X86::VPERMILPDri,     X86::VPERMILPDmi,         TB_ALIGN_16 },
-    { X86::VPERMILPSri,     X86::VPERMILPSmi,         TB_ALIGN_16 },
-    { X86::VPSHUFDri,       X86::VPSHUFDmi,           TB_ALIGN_16 },
-    { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          TB_ALIGN_16 },
-    { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          TB_ALIGN_16 },
-    { X86::VRCPPSr,         X86::VRCPPSm,             TB_ALIGN_16 },
-    { X86::VRCPPSr_Int,     X86::VRCPPSm_Int,         TB_ALIGN_16 },
-    { X86::VRSQRTPSr,       X86::VRSQRTPSm,           TB_ALIGN_16 },
-    { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       TB_ALIGN_16 },
-    { X86::VSQRTPDr,        X86::VSQRTPDm,            TB_ALIGN_16 },
-    { X86::VSQRTPDr_Int,    X86::VSQRTPDm_Int,        TB_ALIGN_16 },
-    { X86::VSQRTPSr,        X86::VSQRTPSm,            TB_ALIGN_16 },
-    { X86::VSQRTPSr_Int,    X86::VSQRTPSm_Int,        TB_ALIGN_16 },
+    { X86::VPABSBrr128,     X86::VPABSBrm128,         0 },
+    { X86::VPABSDrr128,     X86::VPABSDrm128,         0 },
+    { X86::VPABSWrr128,     X86::VPABSWrm128,         0 },
+    { X86::VPERMILPDri,     X86::VPERMILPDmi,         0 },
+    { X86::VPERMILPSri,     X86::VPERMILPSmi,         0 },
+    { X86::VPSHUFDri,       X86::VPSHUFDmi,           0 },
+    { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          0 },
+    { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          0 },
+    { X86::VRCPPSr,         X86::VRCPPSm,             0 },
+    { X86::VRCPPSr_Int,     X86::VRCPPSm_Int,         0 },
+    { X86::VRSQRTPSr,       X86::VRSQRTPSm,           0 },
+    { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       0 },
+    { X86::VSQRTPDr,        X86::VSQRTPDm,            0 },
+    { X86::VSQRTPSr,        X86::VSQRTPSm,            0 },
     { X86::VUCOMISDrr,      X86::VUCOMISDrm,          0 },
     { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 },
     { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
@@ -541,28 +537,41 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_32 },
     { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 },
     { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },
-    { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        TB_ALIGN_32 },
-    { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        TB_ALIGN_32 },
+    { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        0 },
+    { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        0 },
 
     // AVX2 foldable instructions
-    { X86::VPABSBrr256,     X86::VPABSBrm256,         TB_ALIGN_32 },
-    { X86::VPABSDrr256,     X86::VPABSDrm256,         TB_ALIGN_32 },
-    { X86::VPABSWrr256,     X86::VPABSWrm256,         TB_ALIGN_32 },
-    { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          TB_ALIGN_32 },
-    { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         TB_ALIGN_32 },
-    { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         TB_ALIGN_32 },
-    { X86::VRCPPSYr,        X86::VRCPPSYm,            TB_ALIGN_32 },
-    { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        TB_ALIGN_32 },
-    { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          TB_ALIGN_32 },
-    { X86::VRSQRTPSYr_Int,  X86::VRSQRTPSYm_Int,      TB_ALIGN_32 },
-    { X86::VSQRTPDYr,       X86::VSQRTPDYm,           TB_ALIGN_32 },
-    { X86::VSQRTPDYr_Int,   X86::VSQRTPDYm_Int,       TB_ALIGN_32 },
-    { X86::VSQRTPSYr,       X86::VSQRTPSYm,           TB_ALIGN_32 },
-    { X86::VSQRTPSYr_Int,   X86::VSQRTPSYm_Int,       TB_ALIGN_32 },
+    { X86::VPABSBrr256,     X86::VPABSBrm256,         0 },
+    { X86::VPABSDrr256,     X86::VPABSDrm256,         0 },
+    { X86::VPABSWrr256,     X86::VPABSWrm256,         0 },
+    { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          0 },
+    { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         0 },
+    { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         0 },
+    { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
+    { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        0 },
+    { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          0 },
+    { X86::VSQRTPDYr,       X86::VSQRTPDYm,           0 },
+    { X86::VSQRTPSYr,       X86::VSQRTPSYm,           0 },
     { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
     { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
 
-    // BMI/BMI2 foldable instructions
+    // BMI/BMI2/LZCNT/POPCNT foldable instructions
+    { X86::BEXTR32rr,       X86::BEXTR32rm,           0 },
+    { X86::BEXTR64rr,       X86::BEXTR64rm,           0 },
+    { X86::BLSI32rr,        X86::BLSI32rm,            0 },
+    { X86::BLSI64rr,        X86::BLSI64rm,            0 },
+    { X86::BLSMSK32rr,      X86::BLSMSK32rm,          0 },
+    { X86::BLSMSK64rr,      X86::BLSMSK64rm,          0 },
+    { X86::BLSR32rr,        X86::BLSR32rm,            0 },
+    { X86::BLSR64rr,        X86::BLSR64rm,            0 },
+    { X86::BZHI32rr,        X86::BZHI32rm,            0 },
+    { X86::BZHI64rr,        X86::BZHI64rm,            0 },
+    { X86::LZCNT16rr,       X86::LZCNT16rm,           0 },
+    { X86::LZCNT32rr,       X86::LZCNT32rm,           0 },
+    { X86::LZCNT64rr,       X86::LZCNT64rm,           0 },
+    { X86::POPCNT16rr,      X86::POPCNT16rm,          0 },
+    { X86::POPCNT32rr,      X86::POPCNT32rm,          0 },
+    { X86::POPCNT64rr,      X86::POPCNT64rm,          0 },
     { X86::RORX32ri,        X86::RORX32mi,            0 },
     { X86::RORX64ri,        X86::RORX64mi,            0 },
     { X86::SARX32rr,        X86::SARX32rm,            0 },
@@ -571,6 +580,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::SHRX64rr,        X86::SHRX64rm,            0 },
     { X86::SHLX32rr,        X86::SHLX32rm,            0 },
     { X86::SHLX64rr,        X86::SHLX64rm,            0 },
+    { X86::TZCNT16rr,       X86::TZCNT16rm,           0 },
+    { X86::TZCNT32rr,       X86::TZCNT32rm,           0 },
+    { X86::TZCNT64rr,       X86::TZCNT64rm,           0 },
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -691,21 +703,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm,      0 },
     { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      0 },
     { X86::MAXPDrr,         X86::MAXPDrm,       TB_ALIGN_16 },
-    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int,   TB_ALIGN_16 },
     { X86::MAXPSrr,         X86::MAXPSrm,       TB_ALIGN_16 },
-    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int,   TB_ALIGN_16 },
     { X86::MAXSDrr,         X86::MAXSDrm,       0 },
-    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int,   0 },
     { X86::MAXSSrr,         X86::MAXSSrm,       0 },
-    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int,   0 },
     { X86::MINPDrr,         X86::MINPDrm,       TB_ALIGN_16 },
-    { X86::MINPDrr_Int,     X86::MINPDrm_Int,   TB_ALIGN_16 },
     { X86::MINPSrr,         X86::MINPSrm,       TB_ALIGN_16 },
-    { X86::MINPSrr_Int,     X86::MINPSrm_Int,   TB_ALIGN_16 },
     { X86::MINSDrr,         X86::MINSDrm,       0 },
-    { X86::MINSDrr_Int,     X86::MINSDrm_Int,   0 },
     { X86::MINSSrr,         X86::MINSSrm,       0 },
-    { X86::MINSSrr_Int,     X86::MINSSrm_Int,   0 },
     { X86::MPSADBWrri,      X86::MPSADBWrmi,    TB_ALIGN_16 },
     { X86::MULPDrr,         X86::MULPDrm,       TB_ALIGN_16 },
     { X86::MULPSrr,         X86::MULPSrm,       TB_ALIGN_16 },
@@ -756,6 +760,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::PMAXUBrr,        X86::PMAXUBrm,      TB_ALIGN_16 },
     { X86::PMINSWrr,        X86::PMINSWrm,      TB_ALIGN_16 },
     { X86::PMINUBrr,        X86::PMINUBrm,      TB_ALIGN_16 },
+    { X86::PMINSBrr,        X86::PMINSBrm,      TB_ALIGN_16 },
+    { X86::PMINSDrr,        X86::PMINSDrm,      TB_ALIGN_16 },
+    { X86::PMINUDrr,        X86::PMINUDrm,      TB_ALIGN_16 },
+    { X86::PMINUWrr,        X86::PMINUWrm,      TB_ALIGN_16 },
+    { X86::PMAXSBrr,        X86::PMAXSBrm,      TB_ALIGN_16 },
+    { X86::PMAXSDrr,        X86::PMAXSDrm,      TB_ALIGN_16 },
+    { X86::PMAXUDrr,        X86::PMAXUDrm,      TB_ALIGN_16 },
+    { X86::PMAXUWrr,        X86::PMAXUWrm,      TB_ALIGN_16 },
     { X86::PMULDQrr,        X86::PMULDQrm,      TB_ALIGN_16 },
     { X86::PMULHRSWrr128,   X86::PMULHRSWrm128, TB_ALIGN_16 },
     { X86::PMULHUWrr,       X86::PMULHUWrm,     TB_ALIGN_16 },
@@ -827,31 +839,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 },
     { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 },
     { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 },
-    { X86::VCVTTPD2DQrr,      X86::VCVTTPD2DQXrm,      TB_ALIGN_16 },
-    { X86::VCVTTPS2DQrr,      X86::VCVTTPS2DQrm,       TB_ALIGN_16 },
+    { X86::VCVTTPD2DQrr,      X86::VCVTTPD2DQXrm,      0 },
+    { X86::VCVTTPS2DQrr,      X86::VCVTTPS2DQrm,       0 },
     { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
     { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
     { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
-    { X86::VADDPDrr,          X86::VADDPDrm,           TB_ALIGN_16 },
-    { X86::VADDPSrr,          X86::VADDPSrm,           TB_ALIGN_16 },
+    { X86::VADDPDrr,          X86::VADDPDrm,           0 },
+    { X86::VADDPSrr,          X86::VADDPSrm,           0 },
     { X86::VADDSDrr,          X86::VADDSDrm,           0 },
     { X86::VADDSSrr,          X86::VADDSSrm,           0 },
-    { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        TB_ALIGN_16 },
-    { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        TB_ALIGN_16 },
-    { X86::VANDNPDrr,         X86::VANDNPDrm,          TB_ALIGN_16 },
-    { X86::VANDNPSrr,         X86::VANDNPSrm,          TB_ALIGN_16 },
-    { X86::VANDPDrr,          X86::VANDPDrm,           TB_ALIGN_16 },
-    { X86::VANDPSrr,          X86::VANDPSrm,           TB_ALIGN_16 },
-    { X86::VBLENDPDrri,       X86::VBLENDPDrmi,        TB_ALIGN_16 },
-    { X86::VBLENDPSrri,       X86::VBLENDPSrmi,        TB_ALIGN_16 },
-    { X86::VBLENDVPDrr,       X86::VBLENDVPDrm,        TB_ALIGN_16 },
-    { X86::VBLENDVPSrr,       X86::VBLENDVPSrm,        TB_ALIGN_16 },
-    { X86::VCMPPDrri,         X86::VCMPPDrmi,          TB_ALIGN_16 },
-    { X86::VCMPPSrri,         X86::VCMPPSrmi,          TB_ALIGN_16 },
+    { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        0 },
+    { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        0 },
+    { X86::VANDNPDrr,         X86::VANDNPDrm,          0 },
+    { X86::VANDNPSrr,         X86::VANDNPSrm,          0 },
+    { X86::VANDPDrr,          X86::VANDPDrm,           0 },
+    { X86::VANDPSrr,          X86::VANDPSrm,           0 },
+    { X86::VBLENDPDrri,       X86::VBLENDPDrmi,        0 },
+    { X86::VBLENDPSrri,       X86::VBLENDPSrmi,        0 },
+    { X86::VBLENDVPDrr,       X86::VBLENDVPDrm,        0 },
+    { X86::VBLENDVPSrr,       X86::VBLENDVPSrm,        0 },
+    { X86::VCMPPDrri,         X86::VCMPPDrmi,          0 },
+    { X86::VCMPPSrri,         X86::VCMPPSrmi,          0 },
     { X86::VCMPSDrr,          X86::VCMPSDrm,           0 },
     { X86::VCMPSSrr,          X86::VCMPSSrm,           0 },
-    { X86::VDIVPDrr,          X86::VDIVPDrm,           TB_ALIGN_16 },
-    { X86::VDIVPSrr,          X86::VDIVPSrm,           TB_ALIGN_16 },
+    { X86::VDIVPDrr,          X86::VDIVPDrm,           0 },
+    { X86::VDIVPSrr,          X86::VDIVPSrm,           0 },
     { X86::VDIVSDrr,          X86::VDIVSDrm,           0 },
     { X86::VDIVSSrr,          X86::VDIVSSrm,           0 },
     { X86::VFsANDNPDrr,       X86::VFsANDNPDrm,        TB_ALIGN_16 },
@@ -862,263 +874,267 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFsORPSrr,         X86::VFsORPSrm,          TB_ALIGN_16 },
     { X86::VFsXORPDrr,        X86::VFsXORPDrm,         TB_ALIGN_16 },
     { X86::VFsXORPSrr,        X86::VFsXORPSrm,         TB_ALIGN_16 },
-    { X86::VHADDPDrr,         X86::VHADDPDrm,          TB_ALIGN_16 },
-    { X86::VHADDPSrr,         X86::VHADDPSrm,          TB_ALIGN_16 },
-    { X86::VHSUBPDrr,         X86::VHSUBPDrm,          TB_ALIGN_16 },
-    { X86::VHSUBPSrr,         X86::VHSUBPSrm,          TB_ALIGN_16 },
+    { X86::VHADDPDrr,         X86::VHADDPDrm,          0 },
+    { X86::VHADDPSrr,         X86::VHADDPSrm,          0 },
+    { X86::VHSUBPDrr,         X86::VHSUBPDrm,          0 },
+    { X86::VHSUBPSrr,         X86::VHSUBPSrm,          0 },
     { X86::Int_VCMPSDrr,      X86::Int_VCMPSDrm,       0 },
     { X86::Int_VCMPSSrr,      X86::Int_VCMPSSrm,       0 },
-    { X86::VMAXPDrr,          X86::VMAXPDrm,           TB_ALIGN_16 },
-    { X86::VMAXPDrr_Int,      X86::VMAXPDrm_Int,       TB_ALIGN_16 },
-    { X86::VMAXPSrr,          X86::VMAXPSrm,           TB_ALIGN_16 },
-    { X86::VMAXPSrr_Int,      X86::VMAXPSrm_Int,       TB_ALIGN_16 },
+    { X86::VMAXPDrr,          X86::VMAXPDrm,           0 },
+    { X86::VMAXPSrr,          X86::VMAXPSrm,           0 },
     { X86::VMAXSDrr,          X86::VMAXSDrm,           0 },
-    { X86::VMAXSDrr_Int,      X86::VMAXSDrm_Int,       0 },
     { X86::VMAXSSrr,          X86::VMAXSSrm,           0 },
-    { X86::VMAXSSrr_Int,      X86::VMAXSSrm_Int,       0 },
-    { X86::VMINPDrr,          X86::VMINPDrm,           TB_ALIGN_16 },
-    { X86::VMINPDrr_Int,      X86::VMINPDrm_Int,       TB_ALIGN_16 },
-    { X86::VMINPSrr,          X86::VMINPSrm,           TB_ALIGN_16 },
-    { X86::VMINPSrr_Int,      X86::VMINPSrm_Int,       TB_ALIGN_16 },
+    { X86::VMINPDrr,          X86::VMINPDrm,           0 },
+    { X86::VMINPSrr,          X86::VMINPSrm,           0 },
     { X86::VMINSDrr,          X86::VMINSDrm,           0 },
-    { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       0 },
     { X86::VMINSSrr,          X86::VMINSSrm,           0 },
-    { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       0 },
-    { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        TB_ALIGN_16 },
-    { X86::VMULPDrr,          X86::VMULPDrm,           TB_ALIGN_16 },
-    { X86::VMULPSrr,          X86::VMULPSrm,           TB_ALIGN_16 },
+    { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        0 },
+    { X86::VMULPDrr,          X86::VMULPDrm,           0 },
+    { X86::VMULPSrr,          X86::VMULPSrm,           0 },
     { X86::VMULSDrr,          X86::VMULSDrm,           0 },
     { X86::VMULSSrr,          X86::VMULSSrm,           0 },
-    { X86::VORPDrr,           X86::VORPDrm,            TB_ALIGN_16 },
-    { X86::VORPSrr,           X86::VORPSrm,            TB_ALIGN_16 },
-    { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        TB_ALIGN_16 },
-    { X86::VPACKSSWBrr,       X86::VPACKSSWBrm,        TB_ALIGN_16 },
-    { X86::VPACKUSDWrr,       X86::VPACKUSDWrm,        TB_ALIGN_16 },
-    { X86::VPACKUSWBrr,       X86::VPACKUSWBrm,        TB_ALIGN_16 },
-    { X86::VPADDBrr,          X86::VPADDBrm,           TB_ALIGN_16 },
-    { X86::VPADDDrr,          X86::VPADDDrm,           TB_ALIGN_16 },
-    { X86::VPADDQrr,          X86::VPADDQrm,           TB_ALIGN_16 },
-    { X86::VPADDSBrr,         X86::VPADDSBrm,          TB_ALIGN_16 },
-    { X86::VPADDSWrr,         X86::VPADDSWrm,          TB_ALIGN_16 },
-    { X86::VPADDUSBrr,        X86::VPADDUSBrm,         TB_ALIGN_16 },
-    { X86::VPADDUSWrr,        X86::VPADDUSWrm,         TB_ALIGN_16 },
-    { X86::VPADDWrr,          X86::VPADDWrm,           TB_ALIGN_16 },
-    { X86::VPALIGNR128rr,     X86::VPALIGNR128rm,      TB_ALIGN_16 },
-    { X86::VPANDNrr,          X86::VPANDNrm,           TB_ALIGN_16 },
-    { X86::VPANDrr,           X86::VPANDrm,            TB_ALIGN_16 },
-    { X86::VPAVGBrr,          X86::VPAVGBrm,           TB_ALIGN_16 },
-    { X86::VPAVGWrr,          X86::VPAVGWrm,           TB_ALIGN_16 },
-    { X86::VPBLENDWrri,       X86::VPBLENDWrmi,        TB_ALIGN_16 },
-    { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         TB_ALIGN_16 },
-    { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         TB_ALIGN_16 },
-    { X86::VPCMPEQQrr,        X86::VPCMPEQQrm,         TB_ALIGN_16 },
-    { X86::VPCMPEQWrr,        X86::VPCMPEQWrm,         TB_ALIGN_16 },
-    { X86::VPCMPGTBrr,        X86::VPCMPGTBrm,         TB_ALIGN_16 },
-    { X86::VPCMPGTDrr,        X86::VPCMPGTDrm,         TB_ALIGN_16 },
-    { X86::VPCMPGTQrr,        X86::VPCMPGTQrm,         TB_ALIGN_16 },
-    { X86::VPCMPGTWrr,        X86::VPCMPGTWrm,         TB_ALIGN_16 },
-    { X86::VPHADDDrr,         X86::VPHADDDrm,          TB_ALIGN_16 },
-    { X86::VPHADDSWrr128,     X86::VPHADDSWrm128,      TB_ALIGN_16 },
-    { X86::VPHADDWrr,         X86::VPHADDWrm,          TB_ALIGN_16 },
-    { X86::VPHSUBDrr,         X86::VPHSUBDrm,          TB_ALIGN_16 },
-    { X86::VPHSUBSWrr128,     X86::VPHSUBSWrm128,      TB_ALIGN_16 },
-    { X86::VPHSUBWrr,         X86::VPHSUBWrm,          TB_ALIGN_16 },
-    { X86::VPERMILPDrr,       X86::VPERMILPDrm,        TB_ALIGN_16 },
-    { X86::VPERMILPSrr,       X86::VPERMILPSrm,        TB_ALIGN_16 },
-    { X86::VPINSRWrri,        X86::VPINSRWrmi,         TB_ALIGN_16 },
-    { X86::VPMADDUBSWrr128,   X86::VPMADDUBSWrm128,    TB_ALIGN_16 },
-    { X86::VPMADDWDrr,        X86::VPMADDWDrm,         TB_ALIGN_16 },
-    { X86::VPMAXSWrr,         X86::VPMAXSWrm,          TB_ALIGN_16 },
-    { X86::VPMAXUBrr,         X86::VPMAXUBrm,          TB_ALIGN_16 },
-    { X86::VPMINSWrr,         X86::VPMINSWrm,          TB_ALIGN_16 },
-    { X86::VPMINUBrr,         X86::VPMINUBrm,          TB_ALIGN_16 },
-    { X86::VPMULDQrr,         X86::VPMULDQrm,          TB_ALIGN_16 },
-    { X86::VPMULHRSWrr128,    X86::VPMULHRSWrm128,     TB_ALIGN_16 },
-    { X86::VPMULHUWrr,        X86::VPMULHUWrm,         TB_ALIGN_16 },
-    { X86::VPMULHWrr,         X86::VPMULHWrm,          TB_ALIGN_16 },
-    { X86::VPMULLDrr,         X86::VPMULLDrm,          TB_ALIGN_16 },
-    { X86::VPMULLWrr,         X86::VPMULLWrm,          TB_ALIGN_16 },
-    { X86::VPMULUDQrr,        X86::VPMULUDQrm,         TB_ALIGN_16 },
-    { X86::VPORrr,            X86::VPORrm,             TB_ALIGN_16 },
-    { X86::VPSADBWrr,         X86::VPSADBWrm,          TB_ALIGN_16 },
-    { X86::VPSHUFBrr,         X86::VPSHUFBrm,          TB_ALIGN_16 },
-    { X86::VPSIGNBrr,         X86::VPSIGNBrm,          TB_ALIGN_16 },
-    { X86::VPSIGNWrr,         X86::VPSIGNWrm,          TB_ALIGN_16 },
-    { X86::VPSIGNDrr,         X86::VPSIGNDrm,          TB_ALIGN_16 },
-    { X86::VPSLLDrr,          X86::VPSLLDrm,           TB_ALIGN_16 },
-    { X86::VPSLLQrr,          X86::VPSLLQrm,           TB_ALIGN_16 },
-    { X86::VPSLLWrr,          X86::VPSLLWrm,           TB_ALIGN_16 },
-    { X86::VPSRADrr,          X86::VPSRADrm,           TB_ALIGN_16 },
-    { X86::VPSRAWrr,          X86::VPSRAWrm,           TB_ALIGN_16 },
-    { X86::VPSRLDrr,          X86::VPSRLDrm,           TB_ALIGN_16 },
-    { X86::VPSRLQrr,          X86::VPSRLQrm,           TB_ALIGN_16 },
-    { X86::VPSRLWrr,          X86::VPSRLWrm,           TB_ALIGN_16 },
-    { X86::VPSUBBrr,          X86::VPSUBBrm,           TB_ALIGN_16 },
-    { X86::VPSUBDrr,          X86::VPSUBDrm,           TB_ALIGN_16 },
-    { X86::VPSUBSBrr,         X86::VPSUBSBrm,          TB_ALIGN_16 },
-    { X86::VPSUBSWrr,         X86::VPSUBSWrm,          TB_ALIGN_16 },
-    { X86::VPSUBWrr,          X86::VPSUBWrm,           TB_ALIGN_16 },
-    { X86::VPUNPCKHBWrr,      X86::VPUNPCKHBWrm,       TB_ALIGN_16 },
-    { X86::VPUNPCKHDQrr,      X86::VPUNPCKHDQrm,       TB_ALIGN_16 },
-    { X86::VPUNPCKHQDQrr,     X86::VPUNPCKHQDQrm,      TB_ALIGN_16 },
-    { X86::VPUNPCKHWDrr,      X86::VPUNPCKHWDrm,       TB_ALIGN_16 },
-    { X86::VPUNPCKLBWrr,      X86::VPUNPCKLBWrm,       TB_ALIGN_16 },
-    { X86::VPUNPCKLDQrr,      X86::VPUNPCKLDQrm,       TB_ALIGN_16 },
-    { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      TB_ALIGN_16 },
-    { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       TB_ALIGN_16 },
-    { X86::VPXORrr,           X86::VPXORrm,            TB_ALIGN_16 },
-    { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         TB_ALIGN_16 },
-    { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         TB_ALIGN_16 },
-    { X86::VSUBPDrr,          X86::VSUBPDrm,           TB_ALIGN_16 },
-    { X86::VSUBPSrr,          X86::VSUBPSrm,           TB_ALIGN_16 },
+    { X86::VORPDrr,           X86::VORPDrm,            0 },
+    { X86::VORPSrr,           X86::VORPSrm,            0 },
+    { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        0 },
+    { X86::VPACKSSWBrr,       X86::VPACKSSWBrm,        0 },
+    { X86::VPACKUSDWrr,       X86::VPACKUSDWrm,        0 },
+    { X86::VPACKUSWBrr,       X86::VPACKUSWBrm,        0 },
+    { X86::VPADDBrr,          X86::VPADDBrm,           0 },
+    { X86::VPADDDrr,          X86::VPADDDrm,           0 },
+    { X86::VPADDQrr,          X86::VPADDQrm,           0 },
+    { X86::VPADDSBrr,         X86::VPADDSBrm,          0 },
+    { X86::VPADDSWrr,         X86::VPADDSWrm,          0 },
+    { X86::VPADDUSBrr,        X86::VPADDUSBrm,         0 },
+    { X86::VPADDUSWrr,        X86::VPADDUSWrm,         0 },
+    { X86::VPADDWrr,          X86::VPADDWrm,           0 },
+    { X86::VPALIGNR128rr,     X86::VPALIGNR128rm,      0 },
+    { X86::VPANDNrr,          X86::VPANDNrm,           0 },
+    { X86::VPANDrr,           X86::VPANDrm,            0 },
+    { X86::VPAVGBrr,          X86::VPAVGBrm,           0 },
+    { X86::VPAVGWrr,          X86::VPAVGWrm,           0 },
+    { X86::VPBLENDWrri,       X86::VPBLENDWrmi,        0 },
+    { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         0 },
+    { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         0 },
+    { X86::VPCMPEQQrr,        X86::VPCMPEQQrm,         0 },
+    { X86::VPCMPEQWrr,        X86::VPCMPEQWrm,         0 },
+    { X86::VPCMPGTBrr,        X86::VPCMPGTBrm,         0 },
+    { X86::VPCMPGTDrr,        X86::VPCMPGTDrm,         0 },
+    { X86::VPCMPGTQrr,        X86::VPCMPGTQrm,         0 },
+    { X86::VPCMPGTWrr,        X86::VPCMPGTWrm,         0 },
+    { X86::VPHADDDrr,         X86::VPHADDDrm,          0 },
+    { X86::VPHADDSWrr128,     X86::VPHADDSWrm128,      0 },
+    { X86::VPHADDWrr,         X86::VPHADDWrm,          0 },
+    { X86::VPHSUBDrr,         X86::VPHSUBDrm,          0 },
+    { X86::VPHSUBSWrr128,     X86::VPHSUBSWrm128,      0 },
+    { X86::VPHSUBWrr,         X86::VPHSUBWrm,          0 },
+    { X86::VPERMILPDrr,       X86::VPERMILPDrm,        0 },
+    { X86::VPERMILPSrr,       X86::VPERMILPSrm,        0 },
+    { X86::VPINSRWrri,        X86::VPINSRWrmi,         0 },
+    { X86::VPMADDUBSWrr128,   X86::VPMADDUBSWrm128,    0 },
+    { X86::VPMADDWDrr,        X86::VPMADDWDrm,         0 },
+    { X86::VPMAXSWrr,         X86::VPMAXSWrm,          0 },
+    { X86::VPMAXUBrr,         X86::VPMAXUBrm,          0 },
+    { X86::VPMINSWrr,         X86::VPMINSWrm,          0 },
+    { X86::VPMINUBrr,         X86::VPMINUBrm,          0 },
+    { X86::VPMINSBrr,         X86::VPMINSBrm,          0 },
+    { X86::VPMINSDrr,         X86::VPMINSDrm,          0 },
+    { X86::VPMINUDrr,         X86::VPMINUDrm,          0 },
+    { X86::VPMINUWrr,         X86::VPMINUWrm,          0 },
+    { X86::VPMAXSBrr,         X86::VPMAXSBrm,          0 },
+    { X86::VPMAXSDrr,         X86::VPMAXSDrm,          0 },
+    { X86::VPMAXUDrr,         X86::VPMAXUDrm,          0 },
+    { X86::VPMAXUWrr,         X86::VPMAXUWrm,          0 },
+    { X86::VPMULDQrr,         X86::VPMULDQrm,          0 },
+    { X86::VPMULHRSWrr128,    X86::VPMULHRSWrm128,     0 },
+    { X86::VPMULHUWrr,        X86::VPMULHUWrm,         0 },
+    { X86::VPMULHWrr,         X86::VPMULHWrm,          0 },
+    { X86::VPMULLDrr,         X86::VPMULLDrm,          0 },
+    { X86::VPMULLWrr,         X86::VPMULLWrm,          0 },
+    { X86::VPMULUDQrr,        X86::VPMULUDQrm,         0 },
+    { X86::VPORrr,            X86::VPORrm,             0 },
+    { X86::VPSADBWrr,         X86::VPSADBWrm,          0 },
+    { X86::VPSHUFBrr,         X86::VPSHUFBrm,          0 },
+    { X86::VPSIGNBrr,         X86::VPSIGNBrm,          0 },
+    { X86::VPSIGNWrr,         X86::VPSIGNWrm,          0 },
+    { X86::VPSIGNDrr,         X86::VPSIGNDrm,          0 },
+    { X86::VPSLLDrr,          X86::VPSLLDrm,           0 },
+    { X86::VPSLLQrr,          X86::VPSLLQrm,           0 },
+    { X86::VPSLLWrr,          X86::VPSLLWrm,           0 },
+    { X86::VPSRADrr,          X86::VPSRADrm,           0 },
+    { X86::VPSRAWrr,          X86::VPSRAWrm,           0 },
+    { X86::VPSRLDrr,          X86::VPSRLDrm,           0 },
+    { X86::VPSRLQrr,          X86::VPSRLQrm,           0 },
+    { X86::VPSRLWrr,          X86::VPSRLWrm,           0 },
+    { X86::VPSUBBrr,          X86::VPSUBBrm,           0 },
+    { X86::VPSUBDrr,          X86::VPSUBDrm,           0 },
+    { X86::VPSUBSBrr,         X86::VPSUBSBrm,          0 },
+    { X86::VPSUBSWrr,         X86::VPSUBSWrm,          0 },
+    { X86::VPSUBWrr,          X86::VPSUBWrm,           0 },
+    { X86::VPUNPCKHBWrr,      X86::VPUNPCKHBWrm,       0 },
+    { X86::VPUNPCKHDQrr,      X86::VPUNPCKHDQrm,       0 },
+    { X86::VPUNPCKHQDQrr,     X86::VPUNPCKHQDQrm,      0 },
+    { X86::VPUNPCKHWDrr,      X86::VPUNPCKHWDrm,       0 },
+    { X86::VPUNPCKLBWrr,      X86::VPUNPCKLBWrm,       0 },
+    { X86::VPUNPCKLDQrr,      X86::VPUNPCKLDQrm,       0 },
+    { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      0 },
+    { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       0 },
+    { X86::VPXORrr,           X86::VPXORrm,            0 },
+    { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
+    { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
+    { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
+    { X86::VSUBPSrr,          X86::VSUBPSrm,           0 },
     { X86::VSUBSDrr,          X86::VSUBSDrm,           0 },
     { X86::VSUBSSrr,          X86::VSUBSSrm,           0 },
-    { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        TB_ALIGN_16 },
-    { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        TB_ALIGN_16 },
-    { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        TB_ALIGN_16 },
-    { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        TB_ALIGN_16 },
-    { X86::VXORPDrr,          X86::VXORPDrm,           TB_ALIGN_16 },
-    { X86::VXORPSrr,          X86::VXORPSrm,           TB_ALIGN_16 },
+    { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        0 },
+    { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        0 },
+    { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        0 },
+    { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        0 },
+    { X86::VXORPDrr,          X86::VXORPDrm,           0 },
+    { X86::VXORPSrr,          X86::VXORPSrm,           0 },
     // AVX 256-bit foldable instructions
-    { X86::VADDPDYrr,         X86::VADDPDYrm,          TB_ALIGN_32 },
-    { X86::VADDPSYrr,         X86::VADDPSYrm,          TB_ALIGN_32 },
-    { X86::VADDSUBPDYrr,      X86::VADDSUBPDYrm,       TB_ALIGN_32 },
-    { X86::VADDSUBPSYrr,      X86::VADDSUBPSYrm,       TB_ALIGN_32 },
-    { X86::VANDNPDYrr,        X86::VANDNPDYrm,         TB_ALIGN_32 },
-    { X86::VANDNPSYrr,        X86::VANDNPSYrm,         TB_ALIGN_32 },
-    { X86::VANDPDYrr,         X86::VANDPDYrm,          TB_ALIGN_32 },
-    { X86::VANDPSYrr,         X86::VANDPSYrm,          TB_ALIGN_32 },
-    { X86::VBLENDPDYrri,      X86::VBLENDPDYrmi,       TB_ALIGN_32 },
-    { X86::VBLENDPSYrri,      X86::VBLENDPSYrmi,       TB_ALIGN_32 },
-    { X86::VBLENDVPDYrr,      X86::VBLENDVPDYrm,       TB_ALIGN_32 },
-    { X86::VBLENDVPSYrr,      X86::VBLENDVPSYrm,       TB_ALIGN_32 },
-    { X86::VCMPPDYrri,        X86::VCMPPDYrmi,         TB_ALIGN_32 },
-    { X86::VCMPPSYrri,        X86::VCMPPSYrmi,         TB_ALIGN_32 },
-    { X86::VDIVPDYrr,         X86::VDIVPDYrm,          TB_ALIGN_32 },
-    { X86::VDIVPSYrr,         X86::VDIVPSYrm,          TB_ALIGN_32 },
-    { X86::VHADDPDYrr,        X86::VHADDPDYrm,         TB_ALIGN_32 },
-    { X86::VHADDPSYrr,        X86::VHADDPSYrm,         TB_ALIGN_32 },
-    { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         TB_ALIGN_32 },
-    { X86::VHSUBPSYrr,        X86::VHSUBPSYrm,         TB_ALIGN_32 },
-    { X86::VINSERTF128rr,     X86::VINSERTF128rm,      TB_ALIGN_32 },
-    { X86::VMAXPDYrr,         X86::VMAXPDYrm,          TB_ALIGN_32 },
-    { X86::VMAXPDYrr_Int,     X86::VMAXPDYrm_Int,      TB_ALIGN_32 },
-    { X86::VMAXPSYrr,         X86::VMAXPSYrm,          TB_ALIGN_32 },
-    { X86::VMAXPSYrr_Int,     X86::VMAXPSYrm_Int,      TB_ALIGN_32 },
-    { X86::VMINPDYrr,         X86::VMINPDYrm,          TB_ALIGN_32 },
-    { X86::VMINPDYrr_Int,     X86::VMINPDYrm_Int,      TB_ALIGN_32 },
-    { X86::VMINPSYrr,         X86::VMINPSYrm,          TB_ALIGN_32 },
-    { X86::VMINPSYrr_Int,     X86::VMINPSYrm_Int,      TB_ALIGN_32 },
-    { X86::VMULPDYrr,         X86::VMULPDYrm,          TB_ALIGN_32 },
-    { X86::VMULPSYrr,         X86::VMULPSYrm,          TB_ALIGN_32 },
-    { X86::VORPDYrr,          X86::VORPDYrm,           TB_ALIGN_32 },
-    { X86::VORPSYrr,          X86::VORPSYrm,           TB_ALIGN_32 },
-    { X86::VPERM2F128rr,      X86::VPERM2F128rm,       TB_ALIGN_32 },
-    { X86::VPERMILPDYrr,      X86::VPERMILPDYrm,       TB_ALIGN_32 },
-    { X86::VPERMILPSYrr,      X86::VPERMILPSYrm,       TB_ALIGN_32 },
-    { X86::VSHUFPDYrri,       X86::VSHUFPDYrmi,        TB_ALIGN_32 },
-    { X86::VSHUFPSYrri,       X86::VSHUFPSYrmi,        TB_ALIGN_32 },
-    { X86::VSUBPDYrr,         X86::VSUBPDYrm,          TB_ALIGN_32 },
-    { X86::VSUBPSYrr,         X86::VSUBPSYrm,          TB_ALIGN_32 },
-    { X86::VUNPCKHPDYrr,      X86::VUNPCKHPDYrm,       TB_ALIGN_32 },
-    { X86::VUNPCKHPSYrr,      X86::VUNPCKHPSYrm,       TB_ALIGN_32 },
-    { X86::VUNPCKLPDYrr,      X86::VUNPCKLPDYrm,       TB_ALIGN_32 },
-    { X86::VUNPCKLPSYrr,      X86::VUNPCKLPSYrm,       TB_ALIGN_32 },
-    { X86::VXORPDYrr,         X86::VXORPDYrm,          TB_ALIGN_32 },
-    { X86::VXORPSYrr,         X86::VXORPSYrm,          TB_ALIGN_32 },
+    { X86::VADDPDYrr,         X86::VADDPDYrm,          0 },
+    { X86::VADDPSYrr,         X86::VADDPSYrm,          0 },
+    { X86::VADDSUBPDYrr,      X86::VADDSUBPDYrm,       0 },
+    { X86::VADDSUBPSYrr,      X86::VADDSUBPSYrm,       0 },
+    { X86::VANDNPDYrr,        X86::VANDNPDYrm,         0 },
+    { X86::VANDNPSYrr,        X86::VANDNPSYrm,         0 },
+    { X86::VANDPDYrr,         X86::VANDPDYrm,          0 },
+    { X86::VANDPSYrr,         X86::VANDPSYrm,          0 },
+    { X86::VBLENDPDYrri,      X86::VBLENDPDYrmi,       0 },
+    { X86::VBLENDPSYrri,      X86::VBLENDPSYrmi,       0 },
+    { X86::VBLENDVPDYrr,      X86::VBLENDVPDYrm,       0 },
+    { X86::VBLENDVPSYrr,      X86::VBLENDVPSYrm,       0 },
+    { X86::VCMPPDYrri,        X86::VCMPPDYrmi,         0 },
+    { X86::VCMPPSYrri,        X86::VCMPPSYrmi,         0 },
+    { X86::VDIVPDYrr,         X86::VDIVPDYrm,          0 },
+    { X86::VDIVPSYrr,         X86::VDIVPSYrm,          0 },
+    { X86::VHADDPDYrr,        X86::VHADDPDYrm,         0 },
+    { X86::VHADDPSYrr,        X86::VHADDPSYrm,         0 },
+    { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         0 },
+    { X86::VHSUBPSYrr,        X86::VHSUBPSYrm,         0 },
+    { X86::VINSERTF128rr,     X86::VINSERTF128rm,      0 },
+    { X86::VMAXPDYrr,         X86::VMAXPDYrm,          0 },
+    { X86::VMAXPSYrr,         X86::VMAXPSYrm,          0 },
+    { X86::VMINPDYrr,         X86::VMINPDYrm,          0 },
+    { X86::VMINPSYrr,         X86::VMINPSYrm,          0 },
+    { X86::VMULPDYrr,         X86::VMULPDYrm,          0 },
+    { X86::VMULPSYrr,         X86::VMULPSYrm,          0 },
+    { X86::VORPDYrr,          X86::VORPDYrm,           0 },
+    { X86::VORPSYrr,          X86::VORPSYrm,           0 },
+    { X86::VPERM2F128rr,      X86::VPERM2F128rm,       0 },
+    { X86::VPERMILPDYrr,      X86::VPERMILPDYrm,       0 },
+    { X86::VPERMILPSYrr,      X86::VPERMILPSYrm,       0 },
+    { X86::VSHUFPDYrri,       X86::VSHUFPDYrmi,        0 },
+    { X86::VSHUFPSYrri,       X86::VSHUFPSYrmi,        0 },
+    { X86::VSUBPDYrr,         X86::VSUBPDYrm,          0 },
+    { X86::VSUBPSYrr,         X86::VSUBPSYrm,          0 },
+    { X86::VUNPCKHPDYrr,      X86::VUNPCKHPDYrm,       0 },
+    { X86::VUNPCKHPSYrr,      X86::VUNPCKHPSYrm,       0 },
+    { X86::VUNPCKLPDYrr,      X86::VUNPCKLPDYrm,       0 },
+    { X86::VUNPCKLPSYrr,      X86::VUNPCKLPSYrm,       0 },
+    { X86::VXORPDYrr,         X86::VXORPDYrm,          0 },
+    { X86::VXORPSYrr,         X86::VXORPSYrm,          0 },
     // AVX2 foldable instructions
-    { X86::VINSERTI128rr,     X86::VINSERTI128rm,      TB_ALIGN_16 },
-    { X86::VPACKSSDWYrr,      X86::VPACKSSDWYrm,       TB_ALIGN_32 },
-    { X86::VPACKSSWBYrr,      X86::VPACKSSWBYrm,       TB_ALIGN_32 },
-    { X86::VPACKUSDWYrr,      X86::VPACKUSDWYrm,       TB_ALIGN_32 },
-    { X86::VPACKUSWBYrr,      X86::VPACKUSWBYrm,       TB_ALIGN_32 },
-    { X86::VPADDBYrr,         X86::VPADDBYrm,          TB_ALIGN_32 },
-    { X86::VPADDDYrr,         X86::VPADDDYrm,          TB_ALIGN_32 },
-    { X86::VPADDQYrr,         X86::VPADDQYrm,          TB_ALIGN_32 },
-    { X86::VPADDSBYrr,        X86::VPADDSBYrm,         TB_ALIGN_32 },
-    { X86::VPADDSWYrr,        X86::VPADDSWYrm,         TB_ALIGN_32 },
-    { X86::VPADDUSBYrr,       X86::VPADDUSBYrm,        TB_ALIGN_32 },
-    { X86::VPADDUSWYrr,       X86::VPADDUSWYrm,        TB_ALIGN_32 },
-    { X86::VPADDWYrr,         X86::VPADDWYrm,          TB_ALIGN_32 },
-    { X86::VPALIGNR256rr,     X86::VPALIGNR256rm,      TB_ALIGN_32 },
-    { X86::VPANDNYrr,         X86::VPANDNYrm,          TB_ALIGN_32 },
-    { X86::VPANDYrr,          X86::VPANDYrm,           TB_ALIGN_32 },
-    { X86::VPAVGBYrr,         X86::VPAVGBYrm,          TB_ALIGN_32 },
-    { X86::VPAVGWYrr,         X86::VPAVGWYrm,          TB_ALIGN_32 },
-    { X86::VPBLENDDrri,       X86::VPBLENDDrmi,        TB_ALIGN_32 },
-    { X86::VPBLENDDYrri,      X86::VPBLENDDYrmi,       TB_ALIGN_32 },
-    { X86::VPBLENDWYrri,      X86::VPBLENDWYrmi,       TB_ALIGN_32 },
-    { X86::VPCMPEQBYrr,       X86::VPCMPEQBYrm,        TB_ALIGN_32 },
-    { X86::VPCMPEQDYrr,       X86::VPCMPEQDYrm,        TB_ALIGN_32 },
-    { X86::VPCMPEQQYrr,       X86::VPCMPEQQYrm,        TB_ALIGN_32 },
-    { X86::VPCMPEQWYrr,       X86::VPCMPEQWYrm,        TB_ALIGN_32 },
-    { X86::VPCMPGTBYrr,       X86::VPCMPGTBYrm,        TB_ALIGN_32 },
-    { X86::VPCMPGTDYrr,       X86::VPCMPGTDYrm,        TB_ALIGN_32 },
-    { X86::VPCMPGTQYrr,       X86::VPCMPGTQYrm,        TB_ALIGN_32 },
-    { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        TB_ALIGN_32 },
-    { X86::VPERM2I128rr,      X86::VPERM2I128rm,       TB_ALIGN_32 },
-    { X86::VPERMDYrr,         X86::VPERMDYrm,          TB_ALIGN_32 },
-    { X86::VPERMPDYri,        X86::VPERMPDYmi,         TB_ALIGN_32 },
-    { X86::VPERMPSYrr,        X86::VPERMPSYrm,         TB_ALIGN_32 },
-    { X86::VPERMQYri,         X86::VPERMQYmi,          TB_ALIGN_32 },
-    { X86::VPHADDDYrr,        X86::VPHADDDYrm,         TB_ALIGN_32 },
-    { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      TB_ALIGN_32 },
-    { X86::VPHADDWYrr,        X86::VPHADDWYrm,         TB_ALIGN_32 },
-    { X86::VPHSUBDYrr,        X86::VPHSUBDYrm,         TB_ALIGN_32 },
-    { X86::VPHSUBSWrr256,     X86::VPHSUBSWrm256,      TB_ALIGN_32 },
-    { X86::VPHSUBWYrr,        X86::VPHSUBWYrm,         TB_ALIGN_32 },
-    { X86::VPMADDUBSWrr256,   X86::VPMADDUBSWrm256,    TB_ALIGN_32 },
-    { X86::VPMADDWDYrr,       X86::VPMADDWDYrm,        TB_ALIGN_32 },
-    { X86::VPMAXSWYrr,        X86::VPMAXSWYrm,         TB_ALIGN_32 },
-    { X86::VPMAXUBYrr,        X86::VPMAXUBYrm,         TB_ALIGN_32 },
-    { X86::VPMINSWYrr,        X86::VPMINSWYrm,         TB_ALIGN_32 },
-    { X86::VPMINUBYrr,        X86::VPMINUBYrm,         TB_ALIGN_32 },
-    { X86::VMPSADBWYrri,      X86::VMPSADBWYrmi,       TB_ALIGN_32 },
-    { X86::VPMULDQYrr,        X86::VPMULDQYrm,         TB_ALIGN_32 },
-    { X86::VPMULHRSWrr256,    X86::VPMULHRSWrm256,     TB_ALIGN_32 },
-    { X86::VPMULHUWYrr,       X86::VPMULHUWYrm,        TB_ALIGN_32 },
-    { X86::VPMULHWYrr,        X86::VPMULHWYrm,         TB_ALIGN_32 },
-    { X86::VPMULLDYrr,        X86::VPMULLDYrm,         TB_ALIGN_32 },
-    { X86::VPMULLWYrr,        X86::VPMULLWYrm,         TB_ALIGN_32 },
-    { X86::VPMULUDQYrr,       X86::VPMULUDQYrm,        TB_ALIGN_32 },
-    { X86::VPORYrr,           X86::VPORYrm,            TB_ALIGN_32 },
-    { X86::VPSADBWYrr,        X86::VPSADBWYrm,         TB_ALIGN_32 },
-    { X86::VPSHUFBYrr,        X86::VPSHUFBYrm,         TB_ALIGN_32 },
-    { X86::VPSIGNBYrr,        X86::VPSIGNBYrm,         TB_ALIGN_32 },
-    { X86::VPSIGNWYrr,        X86::VPSIGNWYrm,         TB_ALIGN_32 },
-    { X86::VPSIGNDYrr,        X86::VPSIGNDYrm,         TB_ALIGN_32 },
-    { X86::VPSLLDYrr,         X86::VPSLLDYrm,          TB_ALIGN_16 },
-    { X86::VPSLLQYrr,         X86::VPSLLQYrm,          TB_ALIGN_16 },
-    { X86::VPSLLWYrr,         X86::VPSLLWYrm,          TB_ALIGN_16 },
-    { X86::VPSLLVDrr,         X86::VPSLLVDrm,          TB_ALIGN_16 },
-    { X86::VPSLLVDYrr,        X86::VPSLLVDYrm,         TB_ALIGN_32 },
-    { X86::VPSLLVQrr,         X86::VPSLLVQrm,          TB_ALIGN_16 },
-    { X86::VPSLLVQYrr,        X86::VPSLLVQYrm,         TB_ALIGN_32 },
-    { X86::VPSRADYrr,         X86::VPSRADYrm,          TB_ALIGN_16 },
-    { X86::VPSRAWYrr,         X86::VPSRAWYrm,          TB_ALIGN_16 },
-    { X86::VPSRAVDrr,         X86::VPSRAVDrm,          TB_ALIGN_16 },
-    { X86::VPSRAVDYrr,        X86::VPSRAVDYrm,         TB_ALIGN_32 },
-    { X86::VPSRLDYrr,         X86::VPSRLDYrm,          TB_ALIGN_16 },
-    { X86::VPSRLQYrr,         X86::VPSRLQYrm,          TB_ALIGN_16 },
-    { X86::VPSRLWYrr,         X86::VPSRLWYrm,          TB_ALIGN_16 },
-    { X86::VPSRLVDrr,         X86::VPSRLVDrm,          TB_ALIGN_16 },
-    { X86::VPSRLVDYrr,        X86::VPSRLVDYrm,         TB_ALIGN_32 },
-    { X86::VPSRLVQrr,         X86::VPSRLVQrm,          TB_ALIGN_16 },
-    { X86::VPSRLVQYrr,        X86::VPSRLVQYrm,         TB_ALIGN_32 },
-    { X86::VPSUBBYrr,         X86::VPSUBBYrm,          TB_ALIGN_32 },
-    { X86::VPSUBDYrr,         X86::VPSUBDYrm,          TB_ALIGN_32 },
-    { X86::VPSUBSBYrr,        X86::VPSUBSBYrm,         TB_ALIGN_32 },
-    { X86::VPSUBSWYrr,        X86::VPSUBSWYrm,         TB_ALIGN_32 },
-    { X86::VPSUBWYrr,         X86::VPSUBWYrm,          TB_ALIGN_32 },
-    { X86::VPUNPCKHBWYrr,     X86::VPUNPCKHBWYrm,      TB_ALIGN_32 },
-    { X86::VPUNPCKHDQYrr,     X86::VPUNPCKHDQYrm,      TB_ALIGN_32 },
-    { X86::VPUNPCKHQDQYrr,    X86::VPUNPCKHQDQYrm,     TB_ALIGN_16 },
-    { X86::VPUNPCKHWDYrr,     X86::VPUNPCKHWDYrm,      TB_ALIGN_32 },
-    { X86::VPUNPCKLBWYrr,     X86::VPUNPCKLBWYrm,      TB_ALIGN_32 },
-    { X86::VPUNPCKLDQYrr,     X86::VPUNPCKLDQYrm,      TB_ALIGN_32 },
-    { X86::VPUNPCKLQDQYrr,    X86::VPUNPCKLQDQYrm,     TB_ALIGN_32 },
-    { X86::VPUNPCKLWDYrr,     X86::VPUNPCKLWDYrm,      TB_ALIGN_32 },
-    { X86::VPXORYrr,          X86::VPXORYrm,           TB_ALIGN_32 },
+    { X86::VINSERTI128rr,     X86::VINSERTI128rm,      0 },
+    { X86::VPACKSSDWYrr,      X86::VPACKSSDWYrm,       0 },
+    { X86::VPACKSSWBYrr,      X86::VPACKSSWBYrm,       0 },
+    { X86::VPACKUSDWYrr,      X86::VPACKUSDWYrm,       0 },
+    { X86::VPACKUSWBYrr,      X86::VPACKUSWBYrm,       0 },
+    { X86::VPADDBYrr,         X86::VPADDBYrm,          0 },
+    { X86::VPADDDYrr,         X86::VPADDDYrm,          0 },
+    { X86::VPADDQYrr,         X86::VPADDQYrm,          0 },
+    { X86::VPADDSBYrr,        X86::VPADDSBYrm,         0 },
+    { X86::VPADDSWYrr,        X86::VPADDSWYrm,         0 },
+    { X86::VPADDUSBYrr,       X86::VPADDUSBYrm,        0 },
+    { X86::VPADDUSWYrr,       X86::VPADDUSWYrm,        0 },
+    { X86::VPADDWYrr,         X86::VPADDWYrm,          0 },
+    { X86::VPALIGNR256rr,     X86::VPALIGNR256rm,      0 },
+    { X86::VPANDNYrr,         X86::VPANDNYrm,          0 },
+    { X86::VPANDYrr,          X86::VPANDYrm,           0 },
+    { X86::VPAVGBYrr,         X86::VPAVGBYrm,          0 },
+    { X86::VPAVGWYrr,         X86::VPAVGWYrm,          0 },
+    { X86::VPBLENDDrri,       X86::VPBLENDDrmi,        0 },
+    { X86::VPBLENDDYrri,      X86::VPBLENDDYrmi,       0 },
+    { X86::VPBLENDWYrri,      X86::VPBLENDWYrmi,       0 },
+    { X86::VPCMPEQBYrr,       X86::VPCMPEQBYrm,        0 },
+    { X86::VPCMPEQDYrr,       X86::VPCMPEQDYrm,        0 },
+    { X86::VPCMPEQQYrr,       X86::VPCMPEQQYrm,        0 },
+    { X86::VPCMPEQWYrr,       X86::VPCMPEQWYrm,        0 },
+    { X86::VPCMPGTBYrr,       X86::VPCMPGTBYrm,        0 },
+    { X86::VPCMPGTDYrr,       X86::VPCMPGTDYrm,        0 },
+    { X86::VPCMPGTQYrr,       X86::VPCMPGTQYrm,        0 },
+    { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        0 },
+    { X86::VPERM2I128rr,      X86::VPERM2I128rm,       0 },
+    { X86::VPERMDYrr,         X86::VPERMDYrm,          0 },
+    { X86::VPERMPDYri,        X86::VPERMPDYmi,         0 },
+    { X86::VPERMPSYrr,        X86::VPERMPSYrm,         0 },
+    { X86::VPERMQYri,         X86::VPERMQYmi,          0 },
+    { X86::VPHADDDYrr,        X86::VPHADDDYrm,         0 },
+    { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      0 },
+    { X86::VPHADDWYrr,        X86::VPHADDWYrm,         0 },
+    { X86::VPHSUBDYrr,        X86::VPHSUBDYrm,         0 },
+    { X86::VPHSUBSWrr256,     X86::VPHSUBSWrm256,      0 },
+    { X86::VPHSUBWYrr,        X86::VPHSUBWYrm,         0 },
+    { X86::VPMADDUBSWrr256,   X86::VPMADDUBSWrm256,    0 },
+    { X86::VPMADDWDYrr,       X86::VPMADDWDYrm,        0 },
+    { X86::VPMAXSWYrr,        X86::VPMAXSWYrm,         0 },
+    { X86::VPMAXUBYrr,        X86::VPMAXUBYrm,         0 },
+    { X86::VPMINSWYrr,        X86::VPMINSWYrm,         0 },
+    { X86::VPMINUBYrr,        X86::VPMINUBYrm,         0 },
+    { X86::VPMINSBYrr,        X86::VPMINSBYrm,         0 },
+    { X86::VPMINSDYrr,        X86::VPMINSDYrm,         0 },
+    { X86::VPMINUDYrr,        X86::VPMINUDYrm,         0 },
+    { X86::VPMINUWYrr,        X86::VPMINUWYrm,         0 },
+    { X86::VPMAXSBYrr,        X86::VPMAXSBYrm,         0 },
+    { X86::VPMAXSDYrr,        X86::VPMAXSDYrm,         0 },
+    { X86::VPMAXUDYrr,        X86::VPMAXUDYrm,         0 },
+    { X86::VPMAXUWYrr,        X86::VPMAXUWYrm,         0 },
+    { X86::VMPSADBWYrri,      X86::VMPSADBWYrmi,       0 },
+    { X86::VPMULDQYrr,        X86::VPMULDQYrm,         0 },
+    { X86::VPMULHRSWrr256,    X86::VPMULHRSWrm256,     0 },
+    { X86::VPMULHUWYrr,       X86::VPMULHUWYrm,        0 },
+    { X86::VPMULHWYrr,        X86::VPMULHWYrm,         0 },
+    { X86::VPMULLDYrr,        X86::VPMULLDYrm,         0 },
+    { X86::VPMULLWYrr,        X86::VPMULLWYrm,         0 },
+    { X86::VPMULUDQYrr,       X86::VPMULUDQYrm,        0 },
+    { X86::VPORYrr,           X86::VPORYrm,            0 },
+    { X86::VPSADBWYrr,        X86::VPSADBWYrm,         0 },
+    { X86::VPSHUFBYrr,        X86::VPSHUFBYrm,         0 },
+    { X86::VPSIGNBYrr,        X86::VPSIGNBYrm,         0 },
+    { X86::VPSIGNWYrr,        X86::VPSIGNWYrm,         0 },
+    { X86::VPSIGNDYrr,        X86::VPSIGNDYrm,         0 },
+    { X86::VPSLLDYrr,         X86::VPSLLDYrm,          0 },
+    { X86::VPSLLQYrr,         X86::VPSLLQYrm,          0 },
+    { X86::VPSLLWYrr,         X86::VPSLLWYrm,          0 },
+    { X86::VPSLLVDrr,         X86::VPSLLVDrm,          0 },
+    { X86::VPSLLVDYrr,        X86::VPSLLVDYrm,         0 },
+    { X86::VPSLLVQrr,         X86::VPSLLVQrm,          0 },
+    { X86::VPSLLVQYrr,        X86::VPSLLVQYrm,         0 },
+    { X86::VPSRADYrr,         X86::VPSRADYrm,          0 },
+    { X86::VPSRAWYrr,         X86::VPSRAWYrm,          0 },
+    { X86::VPSRAVDrr,         X86::VPSRAVDrm,          0 },
+    { X86::VPSRAVDYrr,        X86::VPSRAVDYrm,         0 },
+    { X86::VPSRLDYrr,         X86::VPSRLDYrm,          0 },
+    { X86::VPSRLQYrr,         X86::VPSRLQYrm,          0 },
+    { X86::VPSRLWYrr,         X86::VPSRLWYrm,          0 },
+    { X86::VPSRLVDrr,         X86::VPSRLVDrm,          0 },
+    { X86::VPSRLVDYrr,        X86::VPSRLVDYrm,         0 },
+    { X86::VPSRLVQrr,         X86::VPSRLVQrm,          0 },
+    { X86::VPSRLVQYrr,        X86::VPSRLVQYrm,         0 },
+    { X86::VPSUBBYrr,         X86::VPSUBBYrm,          0 },
+    { X86::VPSUBDYrr,         X86::VPSUBDYrm,          0 },
+    { X86::VPSUBSBYrr,        X86::VPSUBSBYrm,         0 },
+    { X86::VPSUBSWYrr,        X86::VPSUBSWYrm,         0 },
+    { X86::VPSUBWYrr,         X86::VPSUBWYrm,          0 },
+    { X86::VPUNPCKHBWYrr,     X86::VPUNPCKHBWYrm,      0 },
+    { X86::VPUNPCKHDQYrr,     X86::VPUNPCKHDQYrm,      0 },
+    { X86::VPUNPCKHQDQYrr,    X86::VPUNPCKHQDQYrm,     0 },
+    { X86::VPUNPCKHWDYrr,     X86::VPUNPCKHWDYrm,      0 },
+    { X86::VPUNPCKLBWYrr,     X86::VPUNPCKLBWYrm,      0 },
+    { X86::VPUNPCKLDQYrr,     X86::VPUNPCKLDQYrm,      0 },
+    { X86::VPUNPCKLQDQYrr,    X86::VPUNPCKLQDQYrm,     0 },
+    { X86::VPUNPCKLWDYrr,     X86::VPUNPCKLWDYrm,      0 },
+    { X86::VPXORYrr,          X86::VPXORYrm,           0 },
     // FIXME: add AVX 256-bit foldable instructions
 
     // FMA4 foldable patterns
@@ -1156,8 +1172,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    TB_ALIGN_32 },
 
     // BMI/BMI2 foldable instructions
+    { X86::ANDN32rr,          X86::ANDN32rm,            0 },
+    { X86::ANDN64rr,          X86::ANDN64rm,            0 },
     { X86::MULX32rr,          X86::MULX32rm,            0 },
     { X86::MULX64rr,          X86::MULX64rm,            0 },
+    { X86::PDEP32rr,          X86::PDEP32rm,            0 },
+    { X86::PDEP64rr,          X86::PDEP64rm,            0 },
+    { X86::PEXT32rr,          X86::PEXT32rm,            0 },
+    { X86::PEXT64rr,          X86::PEXT64rm,            0 },
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1551,16 +1573,19 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
   case X86::MOVUPSrm:
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
+  case X86::MOVDQUrm:
   case X86::VMOVSSrm:
   case X86::VMOVSDrm:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
   case X86::VMOVDQArm:
+  case X86::VMOVDQUrm:
   case X86::VMOVAPSYrm:
   case X86::VMOVUPSYrm:
   case X86::VMOVAPDYrm:
   case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
   case X86::FsVMOVAPSrm:
@@ -2159,7 +2184,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     }
     MI->setDesc(get(Opc));
     MI->getOperand(3).setImm(Size-Amt);
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
   }
   case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
   case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
@@ -2238,7 +2263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     // Fallthrough intended.
   }
   default:
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
   }
 }
 
@@ -2840,6 +2865,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
 
   // Moving EFLAGS to / from another register requires a push and a pop.
+  // Notice that we have to adjust the stack if we don't want to clobber the
+  // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
   if (SrcReg == X86::EFLAGS) {
     if (X86::GR64RegClass.contains(DestReg)) {
       BuildMI(MBB, MI, DL, get(X86::PUSHF64));
@@ -3149,19 +3176,15 @@ inline static bool isDefConvertible(MachineInstr *MI) {
   case X86::SUB8ri:    case X86::SUB64rr:  case X86::SUB32rr:
   case X86::SUB16rr:   case X86::SUB8rr:   case X86::SUB64rm:
   case X86::SUB32rm:   case X86::SUB16rm:  case X86::SUB8rm:
-  case X86::DEC64r:  case X86::DEC32r:  case X86::DEC16r: case X86::DEC8r:
-  case X86::DEC64m:  case X86::DEC32m:  case X86::DEC16m: case X86::DEC8m:
+  case X86::DEC64r:    case X86::DEC32r:   case X86::DEC16r: case X86::DEC8r:
   case X86::DEC64_32r: case X86::DEC64_16r:
-  case X86::DEC64_32m: case X86::DEC64_16m:
   case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
   case X86::ADD32ri8:  case X86::ADD16ri:  case X86::ADD16ri8:
   case X86::ADD8ri:    case X86::ADD64rr:  case X86::ADD32rr:
   case X86::ADD16rr:   case X86::ADD8rr:   case X86::ADD64rm:
   case X86::ADD32rm:   case X86::ADD16rm:  case X86::ADD8rm:
-  case X86::INC64r:  case X86::INC32r:  case X86::INC16r: case X86::INC8r:
-  case X86::INC64m:  case X86::INC32m:  case X86::INC16m: case X86::INC8m:
+  case X86::INC64r:    case X86::INC32r:   case X86::INC16r: case X86::INC8r:
   case X86::INC64_32r: case X86::INC64_16r:
-  case X86::INC64_32m: case X86::INC64_16m:
   case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
   case X86::AND32ri8:  case X86::AND16ri:  case X86::AND16ri8:
   case X86::AND8ri:    case X86::AND64rr:  case X86::AND32rr:
@@ -3177,6 +3200,8 @@ inline static bool isDefConvertible(MachineInstr *MI) {
   case X86::OR8ri:     case X86::OR64rr:   case X86::OR32rr:
   case X86::OR16rr:    case X86::OR8rr:    case X86::OR64rm:
   case X86::OR32rm:    case X86::OR16rm:   case X86::OR8rm:
+  case X86::ANDN32rr:  case X86::ANDN32rm:
+  case X86::ANDN64rr:  case X86::ANDN64rm:
     return true;
   }
 }
@@ -3499,43 +3524,44 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
 /// to:
 ///   %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
 ///
-static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) {
+static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
+                             const MCInstrDesc &Desc) {
   assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
-  unsigned Reg = MI->getOperand(0).getReg();
-  MI->setDesc(Desc);
+  unsigned Reg = MIB->getOperand(0).getReg();
+  MIB->setDesc(Desc);
 
   // MachineInstr::addOperand() will insert explicit operands before any
   // implicit operands.
-  MachineInstrBuilder(MI).addReg(Reg, RegState::Undef)
-                         .addReg(Reg, RegState::Undef);
+  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
   // But we don't trust that.
-  assert(MI->getOperand(1).getReg() == Reg &&
-         MI->getOperand(2).getReg() == Reg && "Misplaced operand");
+  assert(MIB->getOperand(1).getReg() == Reg &&
+         MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
   return true;
 }
 
 bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   switch (MI->getOpcode()) {
   case X86::SETB_C8r:
-    return Expand2AddrUndef(MI, get(X86::SBB8rr));
+    return Expand2AddrUndef(MIB, get(X86::SBB8rr));
   case X86::SETB_C16r:
-    return Expand2AddrUndef(MI, get(X86::SBB16rr));
+    return Expand2AddrUndef(MIB, get(X86::SBB16rr));
   case X86::SETB_C32r:
-    return Expand2AddrUndef(MI, get(X86::SBB32rr));
+    return Expand2AddrUndef(MIB, get(X86::SBB32rr));
   case X86::SETB_C64r:
-    return Expand2AddrUndef(MI, get(X86::SBB64rr));
+    return Expand2AddrUndef(MIB, get(X86::SBB64rr));
   case X86::V_SET0:
   case X86::FsFLD0SS:
   case X86::FsFLD0SD:
-    return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
+    return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
   case X86::AVX_SET0:
     assert(HasAVX && "AVX not supported");
-    return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
+    return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
   case X86::V_SETALLONES:
-    return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
+    return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
   case X86::AVX2_SETALLONES:
-    return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr));
+    return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
   case X86::TEST8ri_NOREX:
     MI->setDesc(get(X86::TEST8ri));
     return true;
@@ -3561,9 +3587,10 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
                                      MachineInstr *MI,
                                      const TargetInstrInfo &TII) {
   // Create the base instruction with the memory operand as the first part.
+  // Omit the implicit operands, something BuildMI can't do.
   MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                               MI->getDebugLoc(), true);
-  MachineInstrBuilder MIB(NewMI);
+  MachineInstrBuilder MIB(MF, NewMI);
   unsigned NumAddrOps = MOs.size();
   for (unsigned i = 0; i != NumAddrOps; ++i)
     MIB.addOperand(MOs[i]);
@@ -3587,9 +3614,10 @@ static MachineInstr *FuseInst(MachineFunction &MF,
                               unsigned Opcode, unsigned OpNo,
                               const SmallVectorImpl<MachineOperand> &MOs,
                               MachineInstr *MI, const TargetInstrInfo &TII) {
+  // Omit the implicit operands, something BuildMI can't do.
   MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                               MI->getDebugLoc(), true);
-  MachineInstrBuilder MIB(NewMI);
+  MachineInstrBuilder MIB(MF, NewMI);
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
@@ -3627,7 +3655,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                     const SmallVectorImpl<MachineOperand> &MOs,
                                     unsigned Size, unsigned Align) const {
   const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
   bool isTwoAddrFold = false;
+
+  // Atom favors register form of call. So, we do not fold loads into calls
+  // when X86Subtarget is Atom.
+  if (isCallRegIndirect &&
+    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
+    return NULL;
+  }
+
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
     MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -3836,8 +3873,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
-  if (!MF.getFunction()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize) &&
+  if (!MF.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
       hasPartialRegUpdate(MI->getOpcode()))
     return 0;
 
@@ -3878,8 +3915,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
-  if (!MF.getFunction()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize) &&
+  if (!MF.getFunction()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
       hasPartialRegUpdate(MI->getOpcode()))
     return 0;
 
@@ -3982,6 +4019,21 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     break;
   }
   default: {
+    if ((LoadMI->getOpcode() == X86::MOVSSrm ||
+         LoadMI->getOpcode() == X86::VMOVSSrm) &&
+        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+          > 4)
+      // These instructions only load 32 bits, we can't fold them if the
+      // destination register is wider than 32 bits (4 bytes).
+      return NULL;
+    if ((LoadMI->getOpcode() == X86::MOVSDrm ||
+         LoadMI->getOpcode() == X86::VMOVSDrm) &&
+        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+          > 8)
+      // These instructions only load 64 bits, we can't fold them if the
+      // destination register is wider than 64 bits (8 bytes).
+      return NULL;
+
     // Folding a normal load. Just copy the load's address operands.
     unsigned NumOps = LoadMI->getDesc().getNumOperands();
     for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -4049,7 +4101,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
 
   if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
     return true;
-  return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+  return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
 }
 
 bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
@@ -4114,7 +4166,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   // Emit the data processing instruction.
   MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
-  MachineInstrBuilder MIB(DataMI);
+  MachineInstrBuilder MIB(MF, DataMI);
 
   if (FoldedStore)
     MIB.addReg(Reg, RegState::Define);
@@ -4620,13 +4672,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
   case X86::DIVSSrr:
   case X86::DIVSSrr_Int:
   case X86::SQRTPDm:
-  case X86::SQRTPDm_Int:
   case X86::SQRTPDr:
-  case X86::SQRTPDr_Int:
   case X86::SQRTPSm:
-  case X86::SQRTPSm_Int:
   case X86::SQRTPSr:
-  case X86::SQRTPSr_Int:
   case X86::SQRTSDm:
   case X86::SQRTSDm_Int:
   case X86::SQRTSDr:
@@ -4645,13 +4693,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
   case X86::VDIVSSrr:
   case X86::VDIVSSrr_Int:
   case X86::VSQRTPDm:
-  case X86::VSQRTPDm_Int:
   case X86::VSQRTPDr:
-  case X86::VSQRTPDr_Int:
   case X86::VSQRTPSm:
-  case X86::VSQRTPSm_Int:
   case X86::VSQRTPSr:
-  case X86::VSQRTPSr_Int:
   case X86::VSQRTSDm:
   case X86::VSQRTSDm_Int:
   case X86::VSQRTSDr:
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 650fa95d7f23..ccc1aa2e35a5 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -142,6 +142,9 @@ def X86sahf    : SDNode<"X86ISD::SAHF",     SDTX86sahf>;
 def X86rdrand  : SDNode<"X86ISD::RDRAND",   SDTX86rdrand,
                         [SDNPHasChain, SDNPSideEffect]>;
 
+def X86rdseed  : SDNode<"X86ISD::RDSEED",   SDTX86rdrand,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
                         [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
                          SDNPMayLoad, SDNPMemOperand]>;
@@ -247,9 +250,9 @@ def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
 def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
 
-def X86blsi_flag : SDNode<"X86ISD::BLSI",  SDTUnaryArithWithFlags>;
-def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK",  SDTUnaryArithWithFlags>;
-def X86blsr_flag : SDNode<"X86ISD::BLSR",  SDTUnaryArithWithFlags>;
+def X86blsi   : SDNode<"X86ISD::BLSI",   SDTIntUnaryOp>;
+def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
+def X86blsr   : SDNode<"X86ISD::BLSR",   SDTIntUnaryOp>;
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
@@ -525,6 +528,13 @@ def lea64_32mem : Operand<i32> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+// Memory operands that use 64-bit pointers in both ILP32 and LP64.
+def lea64mem : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
 
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
@@ -535,6 +545,12 @@ def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
 def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
                                [add, sub, mul, X86mul_imm, shl, or, frameindex],
                                []>;
+// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
+def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+                                  [add, sub, mul, X86mul_imm, shl, or,
+                                   frameindex, X86WrapperRIP],
+                                  []>;
+
 def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
@@ -590,13 +606,19 @@ def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
 def HasBMI       : Predicate<"Subtarget->hasBMI()">;
 def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
 def HasRTM       : Predicate<"Subtarget->hasRTM()">;
+def HasHLE       : Predicate<"Subtarget->hasHLE()">;
+def HasTSX       : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
+def HasADX       : Predicate<"Subtarget->hasADX()">;
+def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
+def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
+def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
 def In32BitMode  : Predicate<"!Subtarget->is64Bit()">,
-                             AssemblerPredicate<"!Mode64Bit">;
+                             AssemblerPredicate<"!Mode64Bit", "32-bit mode">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
-                             AssemblerPredicate<"Mode64Bit">;
+                             AssemblerPredicate<"Mode64Bit", "64-bit mode">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
 def NotNaCl      : Predicate<"!Subtarget->isTargetNaCl()">;
@@ -612,6 +634,7 @@ def OptForSize   : Predicate<"OptForSize">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def FavorMemIndirectCall  : Predicate<"!Subtarget->callRegIndirect()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
@@ -744,7 +767,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
 //
 
 // Nop
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteZero] in {
   def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>;
   def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
                 "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize;
@@ -755,8 +778,9 @@ let neverHasSideEffects = 1 in {
 
 // Constructing a stack frame.
 def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
-                 "enter\t$len, $lvl", [], IIC_ENTER>;
+                 "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>;
 
+let SchedRW = [WriteALU] in {
 let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
 def LEAVE    : I<0xC9, RawFrm,
                  (outs), (ins), "leave", [], IIC_LEAVE>,
@@ -766,13 +790,14 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
 def LEAVE64  : I<0xC9, RawFrm,
                  (outs), (ins), "leave", [], IIC_LEAVE>,
                  Requires<[In64BitMode]>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions.
 //
 
 let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
 def POP16r  : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
                 IIC_POP_REG16>, OpSize;
 def POP32r  : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
@@ -789,9 +814,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
 def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
 def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
                Requires<[In32BitMode]>;
-}
+} // mayLoad, SchedRW
 
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def PUSH16r  : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
                  IIC_PUSH_REG>, OpSize;
 def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
@@ -818,29 +843,30 @@ def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
 def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
                Requires<[In32BitMode]>;
 
-}
+} // mayStore, SchedRW
 }
 
 let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
 def POP64r   : I<0x58, AddRegFrm,
                  (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
 def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
                 IIC_POP_REG>;
 def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
                 IIC_POP_MEM>;
-}
-let mayStore = 1 in {
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
 def PUSH64r  : I<0x50, AddRegFrm,
                  (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
 def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
                  IIC_PUSH_REG>;
 def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
                  IIC_PUSH_MEM>;
-}
+} // mayStore, SchedRW
 }
 
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+    SchedRW = [WriteStore] in {
 def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
                      "push{q}\t$imm", [], IIC_PUSH_IMM>;
 def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
@@ -851,25 +877,24 @@ def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
 
 let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
 def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
-               Requires<[In64BitMode]>;
+               Requires<[In64BitMode]>, Sched<[WriteLoad]>;
 let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
 def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
-                 Requires<[In64BitMode]>;
-
-
+                 Requires<[In64BitMode]>, Sched<[WriteStore]>;
 
 let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
-    mayLoad=1, neverHasSideEffects=1 in {
-def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>,
+    mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
+def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
                Requires<[In32BitMode]>;
 }
 let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
-    mayStore=1, neverHasSideEffects=1 in {
-def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>,
+    mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
+def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
                Requires<[In32BitMode]>;
 }
 
-let Constraints = "$src = $dst" in {    // GR32 = bswap GR32
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
 def BSWAP32r : I<0xC8, AddRegFrm,
                  (outs GR32:$dst), (ins GR32:$src),
                  "bswap{l}\t$dst",
@@ -878,60 +903,63 @@ def BSWAP32r : I<0xC8, AddRegFrm,
 def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
                   "bswap{q}\t$dst",
                   [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 // Bit scan instructions.
 let Defs = [EFLAGS] in {
 def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
-                  IIC_BSF>, TB, OpSize;
+                  IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
 def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsf{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
-                  IIC_BSF>, TB, OpSize;
+                  IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
 def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
-                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB;
+                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+               Sched<[WriteShift]>;
 def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsf{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
-                 IIC_BSF>, TB;
+                 IIC_BSF>, TB, Sched<[WriteShiftLd]>;
 def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                   "bsf{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
-                  IIC_BSF>, TB;
+                  IIC_BSF>, TB, Sched<[WriteShift]>;
 def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "bsf{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
-                  IIC_BSF>, TB;
+                  IIC_BSF>, TB, Sched<[WriteShiftLd]>;
 
 def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
-                 TB, OpSize;
+                 TB, OpSize, Sched<[WriteShift]>;
 def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                  "bsr{w}\t{$src, $dst|$dst, $src}",
                  [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
                  IIC_BSR>, TB,
-                 OpSize;
+                 OpSize, Sched<[WriteShiftLd]>;
 def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
-                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB;
+                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+               Sched<[WriteShift]>;
 def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                  "bsr{l}\t{$src, $dst|$dst, $src}",
                  [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
-                 IIC_BSR>, TB;
+                 IIC_BSR>, TB, Sched<[WriteShiftLd]>;
 def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                   "bsr{q}\t{$src, $dst|$dst, $src}",
-                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB;
+                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+               Sched<[WriteShift]>;
 def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "bsr{q}\t{$src, $dst|$dst, $src}",
                   [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
-                  IIC_BSR>, TB;
+                  IIC_BSR>, TB, Sched<[WriteShiftLd]>;
 } // Defs = [EFLAGS]
 
-
+let SchedRW = [WriteMicrocoded] in {
 // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
 let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
 def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>;
@@ -959,12 +987,12 @@ def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>;
 def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize;
 def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>;
 def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
-
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
-
+let SchedRW = [WriteMove] in {
 let neverHasSideEffects = 1 in {
 def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
@@ -975,6 +1003,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
 def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
 }
+
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
@@ -992,7 +1021,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
 }
+} // SchedRW
 
+let SchedRW = [WriteStore] in {
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
                    [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
@@ -1005,9 +1036,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
 def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
 
 /// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
 /// 32-bit offset from the PC.  These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
 def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
                    "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
                    Requires<[In32BitMode]>;
@@ -1026,6 +1059,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
 def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
                       "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
                      Requires<[In32BitMode]>;
+}
 
 // FIXME: These definitions are utterly broken
 // Just leave them commented out for now because they're useless outside
@@ -1043,7 +1077,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
 */
 
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
 def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
                    "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
 def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1054,7 +1088,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                      "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
 }
 
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
 def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}",
                 [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
@@ -1069,6 +1103,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                  [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
 }
 
+let SchedRW = [WriteStore] in {
 def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}",
                 [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1081,6 +1116,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
 def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}",
                  [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
 
 // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
 // that they can be used for copying and storing h registers, which can't be
@@ -1089,34 +1125,37 @@ let isCodeGenOnly = 1 in {
 let neverHasSideEffects = 1 in
 def MOV8rr_NOREX : I<0x88, MRMDestReg,
                      (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
-                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [], IIC_MOV>;
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [], IIC_MOV>,
+                   Sched<[WriteMove]>;
 let mayStore = 1 in
 def MOV8mr_NOREX : I<0x88, MRMDestMem,
                      (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [],
-                     IIC_MOV_MEM>;
+                     IIC_MOV_MEM>, Sched<[WriteStore]>;
 let mayLoad = 1, neverHasSideEffects = 1,
     canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
                      (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", [],
-                     IIC_MOV_MEM>;
+                     IIC_MOV_MEM>, Sched<[WriteLoad]>;
 }
 
 
 // Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
 let Defs = [EFLAGS], Uses = [AH] in
 def SAHF     : I<0x9E, RawFrm, (outs),  (ins), "sahf",
                  [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
 let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
 def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", [],
                 IIC_AHF>;  // AH = flags
-
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Bit tests instructions: BT, BTS, BTR, BTC.
 
 let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
 def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
@@ -1127,31 +1166,35 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
 def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                "bt{q}\t{$src2, $src1|$src1, $src2}",
                [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
 
 // Unlike with the register+register form, the memory+register form of the
 // bt instruction does not ignore the high bits of the index. From ISel's
 // perspective, this is pretty bizarre. Make these instructions disassembly
 // only for now.
 
-def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
-               "bt{w}\t{$src2, $src1|$src1, $src2}",
-//               [(X86bt (loadi16 addr:$src1), GR16:$src2),
-//                (implicit EFLAGS)]
-               [], IIC_BT_MR
-               >, OpSize, TB, Requires<[FastBTMem]>;
-def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
-               "bt{l}\t{$src2, $src1|$src1, $src2}",
-//               [(X86bt (loadi32 addr:$src1), GR32:$src2),
-//                (implicit EFLAGS)]
-               [], IIC_BT_MR
-               >, TB, Requires<[FastBTMem]>;
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
-               "bt{q}\t{$src2, $src1|$src1, $src2}",
-//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
-//                (implicit EFLAGS)]
-                [], IIC_BT_MR
-                >, TB;
-
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
+  def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                 "bt{w}\t{$src2, $src1|$src1, $src2}",
+  //               [(X86bt (loadi16 addr:$src1), GR16:$src2),
+  //                (implicit EFLAGS)]
+                 [], IIC_BT_MR
+                 >, OpSize, TB, Requires<[FastBTMem]>;
+  def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                 "bt{l}\t{$src2, $src1|$src1, $src2}",
+  //               [(X86bt (loadi32 addr:$src1), GR32:$src2),
+  //                (implicit EFLAGS)]
+                 [], IIC_BT_MR
+                 >, TB, Requires<[FastBTMem]>;
+  def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "bt{q}\t{$src2, $src1|$src1, $src2}",
+  //               [(X86bt (loadi64 addr:$src1), GR64:$src2),
+  //                (implicit EFLAGS)]
+                  [], IIC_BT_MR
+                  >, TB;
+}
+
+let SchedRW = [WriteALU] in {
 def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
@@ -1164,10 +1207,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
                 IIC_BT_RI>, TB;
+} // SchedRW
 
 // Note that these instructions don't need FastBTMem because that
 // only applies when the other operand is in a register. When it's
 // an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
 def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                 "bt{w}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
@@ -1180,8 +1225,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                 "bt{q}\t{$src2, $src1|$src1, $src2}",
                 [(set EFLAGS, (X86bt (loadi64 addr:$src1),
                                      i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
 
-
+let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
 def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
                 OpSize, TB;
@@ -1189,6 +1236,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
 def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
                 OpSize, TB;
@@ -1196,6 +1246,9 @@ def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
 def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
 def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
                     OpSize, TB;
@@ -1203,6 +1256,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
 def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
                     OpSize, TB;
@@ -1210,7 +1266,9 @@ def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
 def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
 
+let SchedRW = [WriteALU] in {
 def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
                 OpSize, TB;
@@ -1218,6 +1276,9 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
 def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
                 OpSize, TB;
@@ -1225,6 +1286,9 @@ def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
 def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
 def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
                     OpSize, TB;
@@ -1232,6 +1296,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
 def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
                     OpSize, TB;
@@ -1239,7 +1306,9 @@ def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
 def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
 
+let SchedRW = [WriteALU] in {
 def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
                 OpSize, TB;
@@ -1247,6 +1316,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
 def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                 "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
                 OpSize, TB;
@@ -1254,6 +1326,9 @@ def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                 "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
 def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
                  "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
+}
+
+let SchedRW = [WriteALU] in {
 def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
                     OpSize, TB;
@@ -1261,6 +1336,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
 def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
                     "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
                     "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
                     OpSize, TB;
@@ -1268,6 +1346,8 @@ def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
                     "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
 def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
                     "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
+}
+} // hasSideEffects = 0
 } // Defs = [EFLAGS]
 
 
@@ -1279,41 +1359,42 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
 // operand is referenced, the atomicity is ensured.
 multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag,
                        InstrItinClass itin> {
-  let Constraints = "$val = $dst" in {
-    def #NAME#8rm  : I<opc8, MRMSrcMem, (outs GR8:$dst),
-                       (ins GR8:$val, i8mem:$ptr),
-                       !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
-                       [(set
-                          GR8:$dst,
-                          (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
-                       itin>;
-    def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
-                       (ins GR16:$val, i16mem:$ptr),
-                       !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+  let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
+    def NAME#8rm  : I<opc8, MRMSrcMem, (outs GR8:$dst),
+                      (ins GR8:$val, i8mem:$ptr),
+                      !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+                      [(set
+                         GR8:$dst,
+                         (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))],
+                      itin>;
+    def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+                      (ins GR16:$val, i16mem:$ptr),
+                      !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+                      [(set
+                         GR16:$dst,
+                         (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
+                      itin>, OpSize;
+    def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$val, i32mem:$ptr),
+                      !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+                      [(set
+                         GR32:$dst,
+                         (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+                      itin>;
+    def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+                       (ins GR64:$val, i64mem:$ptr),
+                       !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
                        [(set
-                          GR16:$dst,
-                          (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))],
-                       itin>, OpSize;
-    def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
-                       (ins GR32:$val, i32mem:$ptr),
-                       !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
-                       [(set
-                          GR32:$dst,
-                          (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))],
+                         GR64:$dst,
+                         (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
                        itin>;
-    def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
-                        (ins GR64:$val, i64mem:$ptr),
-                        !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
-                        [(set
-                          GR64:$dst,
-                          (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))],
-                        itin>;
   }
 }
 
 defm XCHG    : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>;
 
 // Swap between registers.
+let SchedRW = [WriteALU] in {
 let Constraints = "$val = $dst" in {
 def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
                 "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>;
@@ -1338,9 +1419,9 @@ def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src),
                    Requires<[In64BitMode]>;
 def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
                   "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>;
+} // SchedRW
 
-
-
+let SchedRW = [WriteALU] in {
 def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                 "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
 def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1350,8 +1431,9 @@ def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
                  "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
 def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                    "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB;
+} // SchedRW
 
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
 def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                  "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB;
 def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
@@ -1364,6 +1446,7 @@ def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 
 }
 
+let SchedRW = [WriteALU] in {
 def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
                    "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
                    IIC_CMPXCHG_REG8>, TB;
@@ -1376,7 +1459,9 @@ def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
 def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
                       "cmpxchg{q}\t{$src, $dst|$dst, $src}", [],
                       IIC_CMPXCHG_REG>, TB;
+} // SchedRW
 
+let SchedRW = [WriteALULd, WriteRMW] in {
 let mayLoad = 1, mayStore = 1 in {
 def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
                      "cmpxchg{b}\t{$src, $dst|$dst, $src}", [],
@@ -1400,7 +1485,7 @@ let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
 def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
                     "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>,
                     TB, Requires<[HasCmpxchg16b]>;
-
+} // SchedRW
 
 
 // Lock instruction prefix
@@ -1423,17 +1508,21 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
 
 
 // String manipulation instructions
+let SchedRW = [WriteMicrocoded] in {
 def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>;
 def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize;
 def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>;
 def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>;
+}
 
+let SchedRW = [WriteSystem] in {
 def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>;
 def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize;
 def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>;
-
+}
 
 // Flag instructions
+let SchedRW = [WriteALU] in {
 def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
 def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
 def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
@@ -1443,10 +1532,13 @@ def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
 def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
 
 def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
+}
 
 // Table lookup instructions
-def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
+           Sched<[WriteLoad]>;
 
+let SchedRW = [WriteMicrocoded] in {
 // ASCII Adjust After Addition
 // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
 def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
@@ -1476,7 +1568,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
 // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
 def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
             Requires<[In32BitMode]>;
+} // SchedRW
 
+let SchedRW = [WriteSystem] in {
 // Check Array Index Against Bounds
 def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                    "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
@@ -1486,17 +1580,19 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
                    Requires<[In32BitMode]>;
 
 // Adjust RPL Field of Segment Selector
-def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
                  "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
                  Requires<[In32BitMode]>;
-def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                  "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
                  Requires<[In32BitMode]>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // MOVBE Instructions
 //
 let Predicates = [HasMOVBE] in {
+  let SchedRW = [WriteALULd] in {
   def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                     "movbe{w}\t{$src, $dst|$dst, $src}",
                     [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
@@ -1509,6 +1605,8 @@ let Predicates = [HasMOVBE] in {
                      "movbe{q}\t{$src, $dst|$dst, $src}",
                      [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
                      T8;
+  }
+  let SchedRW = [WriteStore] in {
   def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
                     "movbe{w}\t{$src, $dst|$dst, $src}",
                     [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
@@ -1521,6 +1619,7 @@ let Predicates = [HasMOVBE] in {
                      "movbe{q}\t{$src, $dst|$dst, $src}",
                      [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
                      T8;
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -1539,6 +1638,21 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in {
 }
 
 //===----------------------------------------------------------------------===//
+// RDSEED Instruction
+//
+let Predicates = [HasRDSEED], Defs = [EFLAGS] in {
+  def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins),
+                    "rdseed{w}\t$dst",
+                    [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB;
+  def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+                    "rdseed{l}\t$dst",
+                    [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB;
+  def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins),
+                     "rdseed{q}\t$dst",
+                     [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
 // LZCNT Instruction
 //
 let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
@@ -1605,26 +1719,26 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
                   PatFrag ld_frag> {
   def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
              !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
-             [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V;
+             [(set RC:$dst, (OpNode RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V;
   def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
              !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
-             [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>,
+             [(set RC:$dst, (OpNode (ld_frag addr:$src))), (implicit EFLAGS)]>,
              T8, VEX_4V;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
   defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem,
-                        X86blsr_flag, loadi32>;
+                        X86blsr, loadi32>;
   defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem,
-                        X86blsr_flag, loadi64>, VEX_W;
+                        X86blsr, loadi64>, VEX_W;
   defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem,
-                          X86blsmsk_flag, loadi32>;
+                          X86blsmsk, loadi32>;
   defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem,
-                          X86blsmsk_flag, loadi64>, VEX_W;
+                          X86blsmsk, loadi64>, VEX_W;
   defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem,
-                        X86blsi_flag, loadi32>;
+                        X86blsi, loadi32>;
   defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem,
-                        X86blsi_flag, loadi64>, VEX_W;
+                        X86blsi, loadi64>, VEX_W;
 }
 
 multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -1886,6 +2000,8 @@ def : InstAlias<"fmulp",        (MUL_FPrST0  ST1)>;
 def : InstAlias<"fdivp",        (DIVR_FPrST0 ST1)>;
 def : InstAlias<"fdivrp",       (DIV_FPrST0  ST1)>;
 def : InstAlias<"fxch",         (XCH_F       ST1)>;
+def : InstAlias<"fcom",         (COM_FST0r   ST1)>;
+def : InstAlias<"fcomp",        (COMP_FST0r  ST1)>;
 def : InstAlias<"fcomi",        (COM_FIr     ST1)>;
 def : InstAlias<"fcompi",       (COM_FIPr    ST1)>;
 def : InstAlias<"fucom",        (UCOM_Fr     ST1)>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 127af6f7f93a..49721df7c118 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -20,6 +20,7 @@
 // MMX Multiclasses
 //===----------------------------------------------------------------------===//
 
+let Sched = WriteVecALU in {
 def MMX_INTALU_ITINS : OpndItins<
   IIC_MMX_ALU_RR, IIC_MMX_ALU_RM
 >;
@@ -35,11 +36,14 @@ def MMX_PHADDSUBW : OpndItins<
 def MMX_PHADDSUBD : OpndItins<
   IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM
 >;
+}
 
+let Sched = WriteVecIMul in
 def MMX_PMUL_ITINS : OpndItins<
   IIC_MMX_PMUL, IIC_MMX_PMUL
 >;
 
+let Sched = WriteVecALU in {
 def MMX_PSADBW_ITINS : OpndItins<
   IIC_MMX_PSADBW, IIC_MMX_PSADBW
 >;
@@ -47,11 +51,13 @@ def MMX_PSADBW_ITINS : OpndItins<
 def MMX_MISC_FUNC_ITINS : OpndItins<
   IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG
 >;
+}
 
 def MMX_SHIFT_ITINS : ShiftOpndItins<
   IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI
 >;
 
+let Sched = WriteShuffle in {
 def MMX_UNPCK_H_ITINS : OpndItins<
   IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM
 >;
@@ -67,7 +73,9 @@ def MMX_PCK_ITINS : OpndItins<
 def MMX_PSHUF_ITINS : OpndItins<
   IIC_MMX_PSHUF, IIC_MMX_PSHUF
 >;
+} // Sched
 
+let Sched = WriteCvtF2I in {
 def MMX_CVT_PD_ITINS : OpndItins<
   IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM
 >;
@@ -75,6 +83,7 @@ def MMX_CVT_PD_ITINS : OpndItins<
 def MMX_CVT_PS_ITINS : OpndItins<
   IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM
 >;
+}
 
 let Constraints = "$src1 = $dst" in {
   // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
@@ -84,7 +93,8 @@ let Constraints = "$src1 = $dst" in {
     def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
                  (ins VR64:$src1, VR64:$src2),
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> {
+                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+              Sched<[itins.Sched]> {
       let isCommutable = Commutable;
     }
     def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
@@ -92,7 +102,7 @@ let Constraints = "$src1 = $dst" in {
                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                  [(set VR64:$dst, (IntId VR64:$src1,
                                    (bitconvert (load_mmx addr:$src2))))],
-                 itins.rm>;
+                 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 
   multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -101,17 +111,19 @@ let Constraints = "$src1 = $dst" in {
     def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
                                   (ins VR64:$src1, VR64:$src2),
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>;
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>,
+             Sched<[WriteVecShift]>;
     def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
                                   (ins VR64:$src1, i64mem:$src2),
                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                   [(set VR64:$dst, (IntId VR64:$src1,
                                     (bitconvert (load_mmx addr:$src2))))],
-                  itins.rm>;
+                  itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
     def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
                                    (ins VR64:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>;
+           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>,
+           Sched<[WriteVecShift]>;
   }
 }
 
@@ -120,13 +132,14 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
                                Intrinsic IntId64, OpndItins itins> {
   def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>;
+                   [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>,
+             Sched<[itins.Sched]>;
 
   def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                    [(set VR64:$dst,
                      (IntId64 (bitconvert (memopmmx addr:$src))))],
-                   itins.rm>;
+                   itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
 /// Binary MMX instructions requiring SSSE3.
@@ -137,13 +150,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
   def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
        (ins VR64:$src1, VR64:$src2),
         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>;
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>,
+      Sched<[itins.Sched]>;
   def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
        (ins VR64:$src1, i64mem:$src2),
         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
        [(set VR64:$dst,
          (IntId64 VR64:$src1,
-          (bitconvert (memopmmx addr:$src2))))], itins.rm>;
+          (bitconvert (memopmmx addr:$src2))))], itins.rm>,
+      Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 }
 
@@ -164,9 +179,11 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
                          string asm, OpndItins itins, Domain d> {
   def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                  [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>;
+                  [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>,
+            Sched<[itins.Sched]>;
   def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                  [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>;
+                  [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>,
+            Sched<[itins.Sched.Folded]>;
 }
 
 multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -174,11 +191,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
                     PatFrag ld_frag, string asm, Domain d> {
   def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
               asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 
-              IIC_DEFAULT, d>;
+              NoItinerary, d>;
   def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
                    (ins DstRC:$src1, x86memop:$src2), asm,
               [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 
-              IIC_DEFAULT, d>;
+              NoItinerary, d>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -197,16 +214,17 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, 
                          (x86mmx (scalar_to_vector GR32:$src)))],
-                        IIC_MMX_MOV_MM_RM>;
+                        IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 let canFoldAsLoad = 1 in
 def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst,
                         (x86mmx (scalar_to_vector (loadi32 addr:$src))))],
-                        IIC_MMX_MOV_MM_RM>;
+                        IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
 let mayStore = 1 in
 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
-                        "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
+                        "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>,
+                   Sched<[WriteStore]>;
 
 // Low word of MMX to GPR.
 def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
@@ -214,16 +232,18 @@ def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
 def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
                          "movd\t{$src, $dst|$dst, $src}",
                          [(set GR32:$dst,
-                          (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
+                          (MMX_X86movd2w (x86mmx VR64:$src)))],
+                          IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
 
 let neverHasSideEffects = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movd\t{$src, $dst|$dst, $src}",
-                             [], IIC_MMX_MOV_MM_RM>;
+                             [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
+let SchedRW = [WriteMove] in {
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
                                "movd\t{$src, $dst|$dst, $src}", 
@@ -237,6 +257,9 @@ let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}", [],
                         IIC_MMX_MOVQ_RR>;
+} // SchedRW
+
+let SchedRW = [WriteLoad] in {
 let canFoldAsLoad = 1 in
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
@@ -246,7 +269,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(store (x86mmx VR64:$src), addr:$dst)],
                         IIC_MMX_MOVQ_RM>;
+} // SchedRW
 
+let SchedRW = [WriteMove] in {
 def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
                              (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
@@ -271,11 +296,12 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
 def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
                               (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
                               [], IIC_MMX_MOVQ_RR>;
+} // SchedRW
 
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                          "movntq\t{$src, $dst|$dst, $src}",
                          [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
-                         IIC_MMX_MOVQ_RM>;
+                         IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
 
 let AddedComplexity = 15 in
 // movd to MMX register zero-extends
@@ -283,7 +309,7 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
                              "movd\t{$src, $dst|$dst, $src}",
               [(set VR64:$dst,
                     (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
-                            IIC_MMX_MOV_MM_RM>;
+                            IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
 let AddedComplexity = 20 in
 def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
                            (ins i32mem:$src),
@@ -291,7 +317,7 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
           [(set VR64:$dst,
                 (x86mmx (X86vzmovl (x86mmx
                                    (scalar_to_vector (loadi32 addr:$src))))))],
-                            IIC_MMX_MOV_MM_RM>;
+                            IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
 
 // Arithmetic Instructions
 defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
@@ -491,14 +517,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                              (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))],
-                          IIC_MMX_PSHUF>;
+                          IIC_MMX_PSHUF>, Sched<[WriteShuffle]>;
 def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
                           (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
                           "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set VR64:$dst,
                              (int_x86_sse_pshuf_w (load_mmx addr:$src1),
                                                    imm:$src2))],
-                          IIC_MMX_PSHUF>;
+                          IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
 
 
 
@@ -532,7 +558,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
                            "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                            [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
                                              (iPTR imm:$src2)))],
-                           IIC_MMX_PEXTR>;
+                           IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
 let Constraints = "$src1 = $dst" in {
   def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst), 
@@ -540,7 +566,7 @@ let Constraints = "$src1 = $dst" in {
                       "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                         GR32:$src2, (iPTR imm:$src3)))],
-                      IIC_MMX_PINSRW>;
+                      IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
 
   def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
                      (outs VR64:$dst),
@@ -549,7 +575,7 @@ let Constraints = "$src1 = $dst" in {
                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
                                          (i32 (anyext (loadi16 addr:$src2))),
                                        (iPTR imm:$src3)))],
-                     IIC_MMX_PINSRW>;
+                     IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 // Mask creation
@@ -570,6 +596,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
           (x86mmx (MMX_MOVQ64rm addr:$src))>;
 
 // Misc.
+let SchedRW = [WriteShuffle] in {
 let Uses = [EDI] in
 def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                         "maskmovq\t{$mask, $src|$src, $mask}",
@@ -580,6 +607,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
                            IIC_MMX_MASKMOV>;
+}
 
 // 64-bit bit convert.
 let Predicates = [HasSSE2] in {
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 6f48d7ed7fe1..384238741b18 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
 class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
   InstrItinClass rr = arg_rr;
   InstrItinClass rm = arg_rm;
+  // InstrSchedModel info.
+  X86FoldableSchedWrite Sched = WriteFAdd;
 }
 
 class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
 
 
 // scalar
+let Sched = WriteFAdd in {
 def SSE_ALU_F32S : OpndItins<
   IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
 >;
@@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins<
 def SSE_ALU_F64S : OpndItins<
   IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
 >;
+}
 
 def SSE_ALU_ITINS_S : SizeItins<
   SSE_ALU_F32S, SSE_ALU_F64S
 >;
 
+let Sched = WriteFMul in {
 def SSE_MUL_F32S : OpndItins<
   IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
 >;
@@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins<
 def SSE_MUL_F64S : OpndItins<
   IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
 >;
+}
 
 def SSE_MUL_ITINS_S : SizeItins<
   SSE_MUL_F32S, SSE_MUL_F64S
 >;
 
+let Sched = WriteFDiv in {
 def SSE_DIV_F32S : OpndItins<
   IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
 >;
@@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins<
 def SSE_DIV_F64S : OpndItins<
   IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
 >;
+}
 
 def SSE_DIV_ITINS_S : SizeItins<
   SSE_DIV_F32S, SSE_DIV_F64S
 >;
 
 // parallel
+let Sched = WriteFAdd in {
 def SSE_ALU_F32P : OpndItins<
   IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
 >;
@@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins<
 def SSE_ALU_F64P : OpndItins<
   IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
 >;
+}
 
 def SSE_ALU_ITINS_P : SizeItins<
   SSE_ALU_F32P, SSE_ALU_F64P
 >;
 
+let Sched = WriteFMul in {
 def SSE_MUL_F32P : OpndItins<
   IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
 >;
@@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins<
 def SSE_MUL_F64P : OpndItins<
   IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
 >;
+}
 
 def SSE_MUL_ITINS_P : SizeItins<
   SSE_MUL_F32P, SSE_MUL_F64P
 >;
 
+let Sched = WriteFDiv in {
 def SSE_DIV_F32P : OpndItins<
   IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
 >;
@@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins<
 def SSE_DIV_F64P : OpndItins<
   IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
 >;
+}
 
 def SSE_DIV_ITINS_P : SizeItins<
   SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins<
   IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
 >;
 
+let Sched = WriteVecALU in {
 def SSE_INTALU_ITINS_P : OpndItins<
   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
 >;
@@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
 def SSE_INTALUQ_ITINS_P : OpndItins<
   IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
 >;
+}
 
+let Sched = WriteVecIMul in
 def SSE_INTMUL_ITINS_P : OpndItins<
   IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
 >;
@@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+       Sched<[itins.Sched]>;
   }
   def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(
                  !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, RC:$src2))], itins.rr>;
+             RC:$src1, RC:$src2))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
                                           SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, mem_cpat:$src2))], itins.rm>;
+             RC:$src1, mem_cpat:$src2))], itins.rm>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// sse12_fp_packed - SSE 1 & 2 packed instructions class
@@ -189,54 +210,36 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+       Sched<[itins.Sched]>;
   let mayLoad = 1 in
     def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
-          itins.rm, d>;
+          itins.rm, d>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
 multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
                                       string OpcodeStr, X86MemOperand x86memop,
                                       list<dag> pat_rr, list<dag> pat_rm,
-                                      bit Is2Addr = 1,
-                                      bit rr_hasSideEffects = 0> {
-  let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
+                                      bit Is2Addr = 1> {
+  let isCommutable = 1, hasSideEffects = 0 in
     def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       pat_rr, IIC_DEFAULT, d>;
+       pat_rr, NoItinerary, d>,
+       Sched<[WriteVecLogic]>;
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       pat_rm, IIC_DEFAULT, d>;
-}
-
-/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
-multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                           string asm, string SSEVer, string FPSizeStr,
-                           X86MemOperand x86memop, PatFrag mem_frag,
-                           Domain d, OpndItins itins, bit Is2Addr = 1> {
-  def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
-       !if(Is2Addr,
-           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-           [(set RC:$dst, (!cast<Intrinsic>(
-                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
-                 RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
-  def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
-       !if(Is2Addr,
-           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!cast<Intrinsic>(
-                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
+       pat_rm, NoItinerary, d>,
+       Sched<[WriteVecLogicLd, ReadAfterLd]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -367,7 +370,7 @@ let Predicates = [HasAVX] in {
 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
 // This is expanded by ExpandPostRAPseudos.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1 in {
+    isPseudo = 1, SchedRW = [WriteZero] in {
   def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
                    [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
   def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -384,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-zeros value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1 in {
+    isPseudo = 1, SchedRW = [WriteZero] in {
 def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
                [(set VR128:$dst, (v4f32 immAllZerosV))]>;
 }
@@ -401,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
 // at the rename stage without using any execution unit, so SET0PSY
 // and SET0PDY can be used for vector int instructions without penalty
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1, Predicates = [HasAVX] in {
+    isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
 def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
                  [(set VR256:$dst, (v8f32 immAllZerosV))]>;
 }
@@ -439,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
 // We set canFoldAsLoad because this can be converted to a constant-pool
 // load of an all-ones value if folding it would be beneficial.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isPseudo = 1 in {
+    isPseudo = 1, SchedRW = [WriteZero] in {
   def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
                        [(set VR128:$dst, (v4i32 immAllOnesV))]>;
   let Predicates = [HasAVX2] in
@@ -458,93 +461,70 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
 //===----------------------------------------------------------------------===//
 
-class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
-      SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
-      [(set VR128:$dst, (vt (OpNode VR128:$src1,
-                             (scalar_to_vector RC:$src2))))],
-      IIC_SSE_MOV_S_RR>;
+multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
+                         X86MemOperand x86memop, string base_opc,
+                         string asm_opr> {
+  def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
+              (ins VR128:$src1, RC:$src2),
+              !strconcat(base_opc, asm_opr),
+              [(set VR128:$dst, (vt (OpNode VR128:$src1,
+                                 (scalar_to_vector RC:$src2))))],
+              IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
 
-// Loading from memory automatically zeroing upper bits.
-class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
-                    PatFrag mem_pat, string OpcodeStr> :
-      SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                        [(set RC:$dst, (mem_pat addr:$src))],
-                        IIC_SSE_MOV_S_RM>;
-
-// AVX
-def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
-                "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
-                VEX_LIG;
-def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
-                "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
-                VEX_LIG;
-
-// For the disassembler
-let isCodeGenOnly = 1 in {
-  def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
-                        (ins VR128:$src1, FR32:$src2),
-                        "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
-                        IIC_SSE_MOV_S_RR>,
-                        XS, VEX_4V, VEX_LIG;
-  def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
-                        (ins VR128:$src1, FR64:$src2),
-                        "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
-                        IIC_SSE_MOV_S_RR>,
-                        XD, VEX_4V, VEX_LIG;
+  // For the disassembler
+  let isCodeGenOnly = 1, hasSideEffects = 0 in
+  def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+                  (ins VR128:$src1, RC:$src2),
+                  !strconcat(base_opc, asm_opr),
+                  [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
 }
 
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
-                 VEX_LIG;
-  let AddedComplexity = 20 in
-    def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
-                   VEX_LIG;
-}
+multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
+                      X86MemOperand x86memop, string OpcodeStr> {
+  // AVX
+  defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+                              VEX_4V, VEX_LIG;
 
-def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
-                  "movss\t{$src, $dst|$dst, $src}",
-                  [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
-                  XS, VEX, VEX_LIG;
-def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
-                  "movsd\t{$src, $dst|$dst, $src}",
-                  [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
-                  XD, VEX, VEX_LIG;
+  def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                     VEX, VEX_LIG, Sched<[WriteStore]>;
+  // SSE1 & 2
+  let Constraints = "$src1 = $dst" in {
+    defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+                              "\t{$src2, $dst|$dst, $src2}">;
+  }
 
-// SSE1 & 2
-let Constraints = "$src1 = $dst" in {
-  def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
-                          "movss\t{$src2, $dst|$dst, $src2}">, XS;
-  def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
-                          "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+  def NAME#mr   : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                     [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                  Sched<[WriteStore]>;
+}
 
-  // For the disassembler
-  let isCodeGenOnly = 1 in {
-    def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
-                         (ins VR128:$src1, FR32:$src2),
-                         "movss\t{$src2, $dst|$dst, $src2}", [],
-                         IIC_SSE_MOV_S_RR>, XS;
-    def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
-                         (ins VR128:$src1, FR64:$src2),
-                         "movsd\t{$src2, $dst|$dst, $src2}", [],
-                         IIC_SSE_MOV_S_RR>, XD;
-  }
+// Loading from memory automatically zeroing upper bits.
+multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+                         PatFrag mem_pat, string OpcodeStr> {
+  def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                     [(set RC:$dst, (mem_pat addr:$src))],
+                     IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+  def NAME#rm   : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                     [(set RC:$dst, (mem_pat addr:$src))],
+                     IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
 }
 
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+  defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
 
   let AddedComplexity = 20 in
-    def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+    defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
 }
 
-def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
-                  "movss\t{$src, $dst|$dst, $src}",
-                  [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
-                  "movsd\t{$src, $dst|$dst, $src}",
-                  [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-
 // Patterns
 let Predicates = [HasAVX] in {
   let AddedComplexity = 15 in {
@@ -791,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
                             bit IsReMaterializable = 1> {
 let neverHasSideEffects = 1 in
   def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+           Sched<[WriteMove]>;
 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+           Sched<[WriteLoad]>;
 }
 
 defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -836,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
                               "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
                               TB, OpSize;
 
+let SchedRW = [WriteStore] in {
 def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -868,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v4f64 VR256:$src), addr:$dst)],
                    IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
 
 // For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
   def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
                           (ins VR128:$src),
                           "movaps\t{$src, $dst|$dst, $src}", [],
@@ -926,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
 def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
           (VMOVUPDYmr addr:$dst, VR256:$src)>;
 
+let SchedRW = [WriteStore] in {
 def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -942,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
                    [(store (v2f64 VR128:$src), addr:$dst)],
                    IIC_SSE_MOVU_P_MR>;
+} // SchedRW
 
 // For disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
   def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                          "movaps\t{$src, $dst|$dst, $src}", [],
                          IIC_SSE_MOVA_P_RR>;
@@ -1055,7 +1041,7 @@ let Predicates = [HasAVX] in {
             (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
   def : Pat<(store (v8i16 (extract_subvector
                            (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
-            (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+            (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
   def : Pat<(store (v16i8 (extract_subvector
                            (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
             (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
@@ -1090,7 +1076,7 @@ let Predicates = [UseSSE1] in {
 
 // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
 // bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
 def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                        "movaps\t{$src, $dst|$dst, $src}", [],
                        IIC_SSE_MOVA_P_RR>, VEX;
@@ -1107,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
 
 // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
 // bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
 let isCodeGenOnly = 1 in {
   def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
                          "movaps\t{$src, $dst|$dst, $src}",
@@ -1132,36 +1118,46 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
 // SSE 1 & 2 - Move Low packed FP Instructions
 //===----------------------------------------------------------------------===//
 
-multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
-                                 SDNode psnode, SDNode pdnode, string base_opc,
-                                 string asm_opr, InstrItinClass itin> {
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+                                      string base_opc, string asm_opr,
+                                      InstrItinClass itin> {
   def PSrm : PI<opc, MRMSrcMem,
          (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
          !strconcat(base_opc, "s", asm_opr),
-     [(set RC:$dst,
-       (psnode RC:$src1,
+     [(set VR128:$dst,
+       (psnode VR128:$src1,
               (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
-              itin, SSEPackedSingle>, TB;
+              itin, SSEPackedSingle>, TB,
+     Sched<[WriteShuffleLd, ReadAfterLd]>;
 
   def PDrm : PI<opc, MRMSrcMem,
-         (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
          !strconcat(base_opc, "d", asm_opr),
-     [(set RC:$dst, (v2f64 (pdnode RC:$src1,
+     [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
                               (scalar_to_vector (loadf64 addr:$src2)))))],
-              itin, SSEPackedDouble>, TB, OpSize;
+              itin, SSEPackedDouble>, TB, OpSize,
+     Sched<[WriteShuffleLd, ReadAfterLd]>;
+
 }
 
-let AddedComplexity = 20 in {
-  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
-                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     IIC_SSE_MOV_LH>, VEX_4V;
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
+                                 string base_opc, InstrItinClass itin> {
+  defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+                                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                                    itin>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+  defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+                                    "\t{$src2, $dst|$dst, $src2}",
+                                    itin>;
 }
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
-  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
-                                   "\t{$src2, $dst|$dst, $src2}",
-                                   IIC_SSE_MOV_LH>;
+
+let AddedComplexity = 20 in {
+  defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
+                                    IIC_SSE_MOV_LH>;
 }
 
+let SchedRW = [WriteStore] in {
 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1182,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    [(store (f64 (vector_extract (v2f64 VR128:$src),
                                  (iPTR 0))), addr:$dst)],
                                  IIC_SSE_MOV_LH>;
+} // SchedRW
 
 let Predicates = [HasAVX] in {
   // Shuffle with VMOVLPS
@@ -1257,16 +1254,11 @@ let Predicates = [UseSSE2] in {
 //===----------------------------------------------------------------------===//
 
 let AddedComplexity = 20 in {
-  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
-                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     IIC_SSE_MOV_LH>, VEX_4V;
-}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
-  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
-                                   "\t{$src2, $dst|$dst, $src2}",
-                                   IIC_SSE_MOV_LH>;
+  defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
+                                    IIC_SSE_MOV_LH>;
 }
 
+let SchedRW = [WriteStore] in {
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1291,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    [(store (f64 (vector_extract
                                  (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
                                  (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
 
 let Predicates = [HasAVX] in {
   // VMOVHPS patterns
@@ -1341,14 +1334,14 @@ let AddedComplexity = 20 in {
                       [(set VR128:$dst,
                         (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
                         IIC_SSE_MOV_LH>,
-                      VEX_4V;
+                      VEX_4V, Sched<[WriteShuffle]>;
   def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set VR128:$dst,
                         (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
                         IIC_SSE_MOV_LH>,
-                      VEX_4V;
+                      VEX_4V, Sched<[WriteShuffle]>;
 }
 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
   def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1356,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
                       "movlhps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
-                        IIC_SSE_MOV_LH>;
+                        IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
   def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
                         (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
-                        IIC_SSE_MOV_LH>;
+                        IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -1397,22 +1390,27 @@ def SSE_CVT_PD : OpndItins<
   IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
 >;
 
+let Sched = WriteCvtI2F in
 def SSE_CVT_PS : OpndItins<
   IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
 >;
 
+let Sched = WriteCvtI2F in
 def SSE_CVT_Scalar : OpndItins<
   IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
 >;
 
+let Sched = WriteCvtF2I in
 def SSE_CVT_SS2SI_32 : OpndItins<
   IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
 >;
 
+let Sched = WriteCvtF2I in
 def SSE_CVT_SS2SI_64 : OpndItins<
   IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
 >;
 
+let Sched = WriteCvtF2I in
 def SSE_CVT_SD2SI : OpndItins<
   IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
 >;
@@ -1422,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                      string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
                         [(set DstRC:$dst, (OpNode SrcRC:$src))],
-                        itins.rr>;
+                        itins.rr>, Sched<[itins.Sched]>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
                         [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
-                        itins.rm>;
+                        itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
 multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1433,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                        OpndItins itins> {
 let neverHasSideEffects = 1 in {
   def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-             [], itins.rr, d>;
+             [], itins.rr, d>, Sched<[itins.Sched]>;
   let mayLoad = 1 in
   def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-             [], itins.rm, d>;
+             [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
 }
 }
 
@@ -1444,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                           X86MemOperand x86memop, string asm> {
 let neverHasSideEffects = 1 in {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
-              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+           Sched<[WriteCvtI2F]>;
   let mayLoad = 1 in
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src),
-              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+           Sched<[WriteCvtI2FLd, ReadAfterLd]>;
 } // neverHasSideEffects = 1
 }
 
@@ -1457,7 +1457,7 @@ defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                                 SSE_CVT_SS2SI_32>,
                                 XS, VEX, VEX_LIG;
 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                                "cvttss2si\t{$src, $dst|$dst, $src}",
                                 SSE_CVT_SS2SI_64>,
                                 XS, VEX, VEX_W, VEX_LIG;
 defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1465,26 +1465,43 @@ defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
                                 SSE_CVT_SD2SI>,
                                 XD, VEX, VEX_LIG;
 defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                                "cvttsd2si\t{$src, $dst|$dst, $src}",
                                 SSE_CVT_SD2SI>,
                                 XD, VEX, VEX_W, VEX_LIG;
 
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
 // The assembler can recognize rr 64-bit instructions by seeing a rxx
 // register, but the same isn't true when only using memory operands,
 // provide other assembly "l" and "q" forms to address this explicitly
 // where appropriate to do so.
-defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
+defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">,
                                   XS, VEX_4V, VEX_LIG;
 defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
                                   XS, VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
+defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
                                   XD, VEX_4V, VEX_LIG;
 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
                                   XD, VEX_4V, VEX_W, VEX_LIG;
 
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
-                (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
-def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+                (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
                 (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
 
 let Predicates = [HasAVX] in {
@@ -1511,27 +1528,49 @@ defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                       "cvttss2si\t{$src, $dst|$dst, $src}",
                       SSE_CVT_SS2SI_32>, XS;
 defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                      "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                      "cvttss2si\t{$src, $dst|$dst, $src}",
                       SSE_CVT_SS2SI_64>, XS, REX_W;
 defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
                       "cvttsd2si\t{$src, $dst|$dst, $src}",
                       SSE_CVT_SD2SI>, XD;
 defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                      "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                      "cvttsd2si\t{$src, $dst|$dst, $src}",
                       SSE_CVT_SD2SI>, XD, REX_W;
 defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
-                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
+                      "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
                       SSE_CVT_Scalar>, XS;
 defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
                       "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
                       SSE_CVT_Scalar>, XS, REX_W;
 defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
-                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
+                      "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
                       SSE_CVT_Scalar>, XD;
 defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
                       "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
                       SSE_CVT_Scalar>, XD, REX_W;
 
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+                (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+                (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+
 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
 // and/or XMM operand(s).
 
@@ -1540,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+           Sched<[itins.Sched]>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+              [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+           Sched<[itins.Sched.Folded]>;
 }
 
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1555,38 +1596,38 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
                   !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                   !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
               [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
-              itins.rr>;
+              itins.rr>, Sched<[itins.Sched]>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src2),
               !if(Is2Addr,
                   !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                   !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
               [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
-              itins.rm>;
+              itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
-                  int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+                  int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
                   SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
 defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
-                    int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+                    int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
                     SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
 
 defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                 sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+                 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
 defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
-                   sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+                   sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
 
 
 defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
+          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
           SSE_CVT_Scalar, 0>, XS, VEX_4V;
 defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
           int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
           SSE_CVT_Scalar, 0>, XS, VEX_4V,
           VEX_W;
 defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
+          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
           SSE_CVT_Scalar, 0>, XD, VEX_4V;
 defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
           int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
@@ -1596,13 +1637,13 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
 let Constraints = "$src1 = $dst" in {
   defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
                         int_x86_sse_cvtsi2ss, i32mem, loadi32,
-                        "cvtsi2ss", SSE_CVT_Scalar>, XS;
+                        "cvtsi2ss{l}", SSE_CVT_Scalar>, XS;
   defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
                         int_x86_sse_cvtsi642ss, i64mem, loadi64,
                         "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
   defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
                         int_x86_sse2_cvtsi2sd, i32mem, loadi32,
-                        "cvtsi2sd", SSE_CVT_Scalar>, XD;
+                        "cvtsi2sd{l}", SSE_CVT_Scalar>, XD;
   defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
                         int_x86_sse2_cvtsi642sd, i64mem, loadi64,
                         "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
@@ -1616,40 +1657,40 @@ defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
                                     SSE_CVT_SS2SI_32>, XS, VEX;
 defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                    int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
-                                   "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+                                   "cvttss2si", SSE_CVT_SS2SI_64>,
                                    XS, VEX, VEX_W;
 defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
                                     sdmem, sse_load_f64, "cvttsd2si",
                                     SSE_CVT_SD2SI>, XD, VEX;
 defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                   int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
-                                  "cvttsd2si{q}", SSE_CVT_SD2SI>,
+                                  "cvttsd2si", SSE_CVT_SD2SI>,
                                   XD, VEX, VEX_W;
 defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
                                     ssmem, sse_load_f32, "cvttss2si",
                                     SSE_CVT_SS2SI_32>, XS;
 defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                    int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
-                                   "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
+                                   "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W;
 defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
                                     sdmem, sse_load_f64, "cvttsd2si",
                                     SSE_CVT_SD2SI>, XD;
 defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                   int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
-                                  "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+                                  "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
 
 defm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
-                                  ssmem, sse_load_f32, "cvtss2si{l}",
+                                  ssmem, sse_load_f32, "cvtss2si",
                                   SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
 defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
-                                  ssmem, sse_load_f32, "cvtss2si{q}",
+                                  ssmem, sse_load_f32, "cvtss2si",
                                   SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
 
 defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
-                               ssmem, sse_load_f32, "cvtss2si{l}",
+                               ssmem, sse_load_f32, "cvtss2si",
                                SSE_CVT_SS2SI_32>, XS;
 defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
-                                 ssmem, sse_load_f32, "cvtss2si{q}",
+                                 ssmem, sse_load_f32, "cvtss2si",
                                  SSE_CVT_SS2SI_64>, XS, REX_W;
 
 defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
@@ -1666,6 +1707,40 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
                             SSEPackedSingle, SSE_CVT_PS>,
                             TB, Requires<[UseSSE2]>;
 
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}",
+                (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
+                (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTSD2SIrr GR32:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}",
+                (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+                (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+
 /// SSE 2 Only
 
 // Convert scalar double to scalar single
@@ -1673,13 +1748,15 @@ let neverHasSideEffects = 1 in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                        (ins FR64:$src1, FR64:$src2),
                       "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
-                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
+                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+                      Sched<[WriteCvtF2F]>;
 let mayLoad = 1 in
 def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                        (ins FR64:$src1, f64mem:$src2),
                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [], IIC_SSE_CVT_Scalar_RM>,
-                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+                      Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
@@ -1688,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround FR64:$src))],
-                      IIC_SSE_CVT_Scalar_RR>;
+                      IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
 def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))],
                       IIC_SSE_CVT_Scalar_RM>,
                       XD,
-                  Requires<[UseSSE2, OptForSize]>;
+                  Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
 
 def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
-                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+                       Sched<[WriteCvtF2F]>;
 def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))],
-                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 
 let Constraints = "$src1 = $dst" in {
 def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1715,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
-                       IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
+                       IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+                       Sched<[WriteCvtF2F]>;
 def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
                        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
                        "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
                                           VR128:$src1, sse_load_f64:$src2))],
-                       IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
+                       IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 // Convert scalar single to scalar double
@@ -1731,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                     (ins FR32:$src1, FR32:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [], IIC_SSE_CVT_Scalar_RR>,
-                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+                    Sched<[WriteCvtF2F]>;
 let mayLoad = 1 in
 def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     (ins FR32:$src1, f32mem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [], IIC_SSE_CVT_Scalar_RM>,
-                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 def : Pat<(f64 (fextend FR32:$src)),
@@ -1756,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (fextend FR32:$src))],
                    IIC_SSE_CVT_Scalar_RR>, XS,
-                 Requires<[UseSSE2]>;
+                 Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (extloadf32 addr:$src))],
                    IIC_SSE_CVT_Scalar_RM>, XS,
-                 Requires<[UseSSE2, OptForSize]>;
+                 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
 
 // extload f32 -> f64.  This matches load+fextend because we have a hack in
 // the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1778,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
-                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
+                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+                    Sched<[WriteCvtF2F]>;
 def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
-                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
+                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
-                    IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
+                    IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+                    Sched<[WriteCvtF2F]>;
 def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst,
                       (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
-                    IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
+                    IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+                    Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
 // Convert packed single/double fp to doubleword
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
-                       IIC_SSE_CVT_PS_RR>, VEX;
+                       IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
-                       IIC_SSE_CVT_PS_RM>, VEX;
+                       IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
                           (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
-                        IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+                        IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
                         [(set VR256:$dst,
                           (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
-                        IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+                        IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
-                     IIC_SSE_CVT_PS_RR>;
+                     IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtps2dq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
-                     IIC_SSE_CVT_PS_RM>;
+                     IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 
 // Convert Packed Double FP to Packed DW Integers
@@ -1839,7 +1926,7 @@ let Predicates = [HasAVX] in {
 def VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
-                       VEX;
+                       VEX, Sched<[WriteCvtF2I]>;
 
 // XMM only
 def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1847,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
 def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+                       Sched<[WriteCvtF2ILd]>;
 
 // YMM only
 def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
+                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+                       Sched<[WriteCvtF2I]>;
 def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
-                       VEX, VEX_L;
+                       VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
 }
@@ -1867,11 +1956,11 @@ def CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
-                      IIC_SSE_CVT_PD_RM>;
+                      IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
 def CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
-                      IIC_SSE_CVT_PD_RR>;
+                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
 
 // Convert with truncation packed single/double fp to doubleword
 // SSE2 packed instructions with XS prefix
@@ -1879,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                            (int_x86_sse2_cvttps2dq VR128:$src))],
-                         IIC_SSE_CVT_PS_RR>, VEX;
+                         IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvttps2dq
                                             (memopv4f32 addr:$src)))],
-                         IIC_SSE_CVT_PS_RM>, VEX;
+                         IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst,
                             (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
-                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+                          IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
                                              (memopv8f32 addr:$src)))],
-                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+                          IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+                          Sched<[WriteCvtF2ILd]>;
 
 def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
-                       IIC_SSE_CVT_PS_RR>;
+                       IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
-                       IIC_SSE_CVT_PS_RM>;
+                       IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1954,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                               (int_x86_sse2_cvttpd2dq VR128:$src))],
-                              IIC_SSE_CVT_PD_RR>, VEX;
+                              IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
 
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
@@ -1967,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttpd2dqx\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                             (memopv2f64 addr:$src)))],
-                         IIC_SSE_CVT_PD_RM>, VEX;
+                         IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 
 // YMM only
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                            (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
-                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
                           (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
-                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
 def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
 
@@ -1993,12 +2083,13 @@ let Predicates = [HasAVX] in {
 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
-                      IIC_SSE_CVT_PD_RR>;
+                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
                                         (memopv2f64 addr:$src)))],
-                                        IIC_SSE_CVT_PD_RM>;
+                                        IIC_SSE_CVT_PD_RM>,
+                      Sched<[WriteCvtF2ILd]>;
 
 // Convert packed single to packed double
 let Predicates = [HasAVX] in {
@@ -2006,32 +2097,32 @@ let Predicates = [HasAVX] in {
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>, TB, VEX;
+                     IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
-                    IIC_SSE_CVT_PD_RM>, TB, VEX;
+                    IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
+                     IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
-                     IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
+                     IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
 }
 
 let Predicates = [UseSSE2] in {
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
-                       IIC_SSE_CVT_PD_RR>, TB;
+                       IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                    "cvtps2pd\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
-                   IIC_SSE_CVT_PD_RM>, TB;
+                   IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
 }
 
 // Convert Packed DW Integers to Packed Double FP
@@ -2039,30 +2130,33 @@ let Predicates = [HasAVX] in {
 let neverHasSideEffects = 1, mayLoad = 1 in
 def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                     []>, VEX;
+                     []>, VEX, Sched<[WriteCvtI2FLd]>;
 def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
-                       (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+                       (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+                   Sched<[WriteCvtI2F]>;
 def VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
                        (int_x86_avx_cvtdq2_pd_256
-                        (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
+                        (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+                    Sched<[WriteCvtI2FLd]>;
 def VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                      "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR256:$dst,
-                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
+                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+                    Sched<[WriteCvtI2F]>;
 }
 
 let neverHasSideEffects = 1, mayLoad = 1 in
 def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
-                       IIC_SSE_CVT_PD_RR>;
+                       IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
 def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
-                       IIC_SSE_CVT_PD_RM>;
+                       IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
 
 // AVX 256-bit register conversion intrinsics
 let Predicates = [HasAVX] in {
@@ -2079,7 +2173,7 @@ let Predicates = [HasAVX] in {
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
-                       IIC_SSE_CVT_PD_RR>, VEX;
+                       IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
 
 // XMM only
 def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2088,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                         "cvtpd2psx\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
-                        IIC_SSE_CVT_PD_RM>, VEX;
+                        IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
 
 // YMM only
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
-                        IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+                        IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
-                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
 def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
 
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
-                     IIC_SSE_CVT_PD_RR>;
+                     IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
-                     IIC_SSE_CVT_PD_RM>;
+                     IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
 
 
 // AVX 256-bit register conversion intrinsics
@@ -2165,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
   def rr : SIi8<0xC2, MRMSrcReg,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
-                itins.rr>;
+                itins.rr>, Sched<[itins.Sched]>;
   def rm : SIi8<0xC2, MRMSrcMem,
                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1),
                                          (ld_frag addr:$src2), imm:$cc))],
-                                         itins.rm>;
+                                         itins.rm>,
+           Sched<[itins.Sched.Folded, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let neverHasSideEffects = 1 in {
     def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
                       (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
-                      IIC_SSE_ALU_F32S_RR>;
+                      IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
     let mayLoad = 1 in
     def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
                       (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
-                      IIC_SSE_ALU_F32S_RM>;
+                      IIC_SSE_ALU_F32S_RM>,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
@@ -2213,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
                       (ins VR128:$src1, VR128:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                VR128:$src, imm:$cc))],
-                                               itins.rr>;
+                                               itins.rr>,
+           Sched<[itins.Sched]>;
   def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
                       (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
                                                (load addr:$src), imm:$cc))],
-                                               itins.rm>;
+                                               itins.rm>,
+           Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 // Aliases to match intrinsics which expect XMM operand(s).
@@ -2248,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
   def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
-                     IIC_SSE_COMIS_RR, d>;
+                     IIC_SSE_COMIS_RR, d>,
+          Sched<[WriteFAdd]>;
   def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1),
                                            (ld_frag addr:$src2)))],
-                                           IIC_SSE_COMIS_RM, d>;
+                                           IIC_SSE_COMIS_RM, d>,
+          Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
 let Defs = [EFLAGS] in {
@@ -2310,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
   def rri : PIi8<0xC2, MRMSrcReg,
              (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
              [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
-             IIC_SSE_CMPP_RR, d>;
+             IIC_SSE_CMPP_RR, d>,
+            Sched<[WriteFAdd]>;
   def rmi : PIi8<0xC2, MRMSrcMem,
              (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
              [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
-             IIC_SSE_CMPP_RM, d>;
+             IIC_SSE_CMPP_RM, d>,
+            Sched<[WriteFAddLd, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let neverHasSideEffects = 1 in {
     def rri_alt : PIi8<0xC2, MRMSrcReg,
                (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
-               asm_alt, [], IIC_SSE_CMPP_RR, d>;
+               asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
     def rmi_alt : PIi8<0xC2, MRMSrcMem,
                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
-               asm_alt, [], IIC_SSE_CMPP_RM, d>;
+               asm_alt, [], IIC_SSE_CMPP_RM, d>,
+               Sched<[WriteFAddLd, ReadAfterLd]>;
   }
 }
 
@@ -2399,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
   def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
                    (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
-                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+            Sched<[WriteShuffleLd, ReadAfterLd]>;
   let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
     def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
                    (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
                    [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
-                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+              Sched<[WriteShuffle]>;
 }
 
 defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2488,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
                 asm, [(set RC:$dst,
                            (vt (OpNode RC:$src1, RC:$src2)))],
-                           IIC_SSE_UNPCK, d>;
+                           IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
     def rm : PI<opc, MRMSrcMem,
                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
                 asm, [(set RC:$dst,
                            (vt (OpNode RC:$src1,
                                        (mem_frag addr:$src2))))],
-                                       IIC_SSE_UNPCK, d>;
+                                       IIC_SSE_UNPCK, d>,
+             Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2585,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
                                 Domain d> {
   def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                     [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+                     [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+             Sched<[WriteVecLogic]>;
   def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
-                IIC_SSE_MOVMSK, d>, REX_W;
+                IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -2616,18 +2723,18 @@ let Predicates = [HasAVX] in {
   // Assembler Only
   def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
-             SSEPackedSingle>, TB, VEX;
+             SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
   def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
              "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
              SSEPackedDouble>, TB,
-             OpSize, VEX;
+             OpSize, VEX, Sched<[WriteVecLogic]>;
   def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
-             SSEPackedSingle>, TB, VEX, VEX_L;
+             SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
   def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
              "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
              SSEPackedDouble>, TB,
-             OpSize, VEX, VEX_L;
+             OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
 }
 
 defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2657,17 +2764,16 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
 /// PDI_binop_rm - Simple SSE2 binary operator.
 multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                         ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
-                        X86MemOperand x86memop,
-                        OpndItins itins,
-                        bit IsCommutable = 0,
-                        bit Is2Addr = 1> {
+                        X86MemOperand x86memop, OpndItins itins,
+                        bit IsCommutable, bit Is2Addr> {
   let isCommutable = IsCommutable in
   def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
        (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
@@ -2675,44 +2781,35 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpVT (OpNode RC:$src1,
                                      (bitconvert (memop_frag addr:$src2)))))],
-                                     itins.rm>;
+                                     itins.rm>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 } // ExeDomain = SSEPackedInt
 
-// These are ordered here for pattern ordering requirements with the fp versions
+multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
+                         ValueType OpVT128, ValueType OpVT256,
+                         OpndItins itins, bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+  defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
+                    VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
 
-let Predicates = [HasAVX] in {
-defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
-                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPOR  : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
-                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
-                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
-defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
-                          i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
+                           memopv2i64, i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+  defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
+                               OpVT256, VR256, memopv4i64, i256mem, itins,
+                               IsCommutable, 0>, VEX_4V, VEX_L;
 }
 
-let Constraints = "$src1 = $dst" in {
-defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
-                         i128mem, SSE_BIT_ITINS_P, 1>;
-defm POR  : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
-                         i128mem, SSE_BIT_ITINS_P, 1>;
-defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
-                         i128mem, SSE_BIT_ITINS_P, 1>;
-defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
-                          i128mem, SSE_BIT_ITINS_P, 0>;
-} // Constraints = "$src1 = $dst"
+// These are ordered here for pattern ordering requirements with the fp versions
 
-let Predicates = [HasAVX2] in {
-defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
-                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPORY  : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
-                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
-                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
-                            i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
+defm PAND  : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm POR   : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PXOR  : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
+                           SSE_BIT_ITINS_P, 0>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Logical Instructions
@@ -2757,6 +2854,20 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
 ///
 multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
                                    SDNode OpNode> {
+  defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+        !strconcat(OpcodeStr, "ps"), f256mem,
+        [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+        [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+                           (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+
+  defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+        !strconcat(OpcodeStr, "pd"), f256mem,
+        [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                  (bc_v4i64 (v4f64 VR256:$src2))))],
+        [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                  (memopv4i64 addr:$src2)))], 0>,
+                                  TB, OpSize, VEX_4V, VEX_L;
+
   // In AVX no need to add a pattern for 128-bit logical rr ps, because they
   // are all promoted to v2i64, and the patterns are covered by the int
   // version. This is needed in SSE only, because v2i64 isn't supported on
@@ -2764,7 +2875,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
   defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
        !strconcat(OpcodeStr, "ps"), f128mem, [],
        [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                 (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
+                                 (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
 
   defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
        !strconcat(OpcodeStr, "pd"), f128mem,
@@ -2773,6 +2884,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
        [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
                                  (memopv2i64 addr:$src2)))], 0>,
                                                  TB, OpSize, VEX_4V;
+
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
@@ -2789,31 +2901,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
   }
 }
 
-/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
-///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
-                                     SDNode OpNode> {
-    defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
-          !strconcat(OpcodeStr, "ps"), f256mem,
-          [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
-          [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
-                             (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
-
-    defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
-          !strconcat(OpcodeStr, "pd"), f256mem,
-          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
-                                    (bc_v4i64 (v4f64 VR256:$src2))))],
-          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
-                                    (memopv4i64 addr:$src2)))], 0>,
-                                    TB, OpSize, VEX_4V, VEX_L;
-}
-
-// AVX 256-bit packed logical ops forms
-defm VAND  : sse12_fp_packed_logical_y<0x54, "and", and>;
-defm VOR   : sse12_fp_packed_logical_y<0x56, "or", or>;
-defm VXOR  : sse12_fp_packed_logical_y<0x57, "xor", xor>;
-defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
-
 defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
 defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
 defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
@@ -2848,26 +2935,32 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             itins.d, Is2Addr>, XD;
 }
 
-multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                   SizeItins itins,
-                                   bit Is2Addr = 1> {
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SizeItins itins> {
+let Predicates = [HasAVX] in {
+  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+                               VR128, v4f32, f128mem, memopv4f32,
+                               SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
+  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+                               VR128, v2f64, f128mem, memopv2f64,
+                               SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
+
+  defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
+                        OpNode, VR256, v8f32, f256mem, memopv8f32,
+                        SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
+  defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
+                        OpNode, VR256, v4f64, f256mem, memopv4f64,
+                        SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
   defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
-              v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
-              TB;
+                            v4f32, f128mem, memopv4f32, SSEPackedSingle,
+                            itins.s, 1>, TB;
   defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
-              v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
-              TB, OpSize;
+                            v2f64, f128mem, memopv2f64, SSEPackedDouble,
+                            itins.d, 1>, TB, OpSize;
 }
-
-multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
-                                    SDNode OpNode,
-                                    SizeItins itins> {
-  defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
-                v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
-                TB, VEX_L;
-  defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
-                v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
-                TB, OpSize, VEX_L;
 }
 
 multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
@@ -2881,116 +2974,69 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
      itins.d, Is2Addr>, XD;
 }
 
-multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
-                                      SizeItins itins,
-                                      bit Is2Addr = 1> {
-  defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
-     !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
-                              SSEPackedSingle, itins.s, Is2Addr>,
-                              TB;
-
-  defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
-     !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
-                              SSEPackedDouble, itins.d, Is2Addr>,
-                              TB, OpSize;
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
+let isCommutable = 0 in {
+  defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
+  defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
+  defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
+  defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
 }
 
-multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
-                                        SizeItins itins> {
-  defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
-     !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
-      SSEPackedSingle, itins.s, 0>, TB, VEX_L;
-
-  defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
-     !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
-      SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L;
+let isCodeGenOnly = 1 in {
+  defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+  defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
 }
 
-// Binary Arithmetic instructions
 defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
             basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
               VEX_4V, VEX_LIG;
-defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
-            basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
-              VEX_4V;
 defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
             basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
               VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
-            basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
-              VEX_4V;
 
 let isCommutable = 0 in {
   defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
               basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
                 VEX_4V, VEX_LIG;
-  defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
-                VEX_4V;
   defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
               basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
                 VEX_4V, VEX_LIG;
-  defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
-                VEX_4V;
   defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
               basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
                 VEX_4V, VEX_LIG;
-  defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
-              basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
-                VEX_4V;
   defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
               basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
                 VEX_4V, VEX_LIG;
-  defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
-              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
-                VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
   defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
-             basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
              basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
   defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
-             basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
              basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
 
   let isCommutable = 0 in {
     defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
-               basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
                basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
     defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
-               basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
                basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
     defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
-               basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
-               basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
-               basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
+               basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
     defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
-               basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
-               basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
-               basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
+               basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
   }
 }
 
 let isCodeGenOnly = 1 in {
   defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
        VEX_4V, VEX_LIG;
-  defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
-       basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
   defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
        VEX_4V, VEX_LIG;
-  defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
-       basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
   let Constraints = "$src1 = $dst" in {
-    defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
-         basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
-    defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
-         basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+    defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+    defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
   }
 }
 
@@ -3002,6 +3048,7 @@ let isCodeGenOnly = 1 in {
 ///
 /// And, we have a special variant form for a full-vector intrinsic form.
 
+let Sched = WriteFSqrt in {
 def SSE_SQRTP : OpndItins<
   IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
 >;
@@ -3009,7 +3056,9 @@ def SSE_SQRTP : OpndItins<
 def SSE_SQRTS : OpndItins<
   IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
 >;
+}
 
+let Sched = WriteFRcp in {
 def SSE_RCPP : OpndItins<
   IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
 >;
@@ -3017,13 +3066,36 @@ def SSE_RCPP : OpndItins<
 def SSE_RCPS : OpndItins<
   IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
 >;
+}
 
 /// sse1_fp_unop_s - SSE1 unops in scalar form.
 multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
                           SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+  def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+                      (ins FR32:$src1, FR32:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+  let mayLoad = 1 in {
+  def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+                      (ins FR32:$src1,f32mem:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG,
+                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, ssmem:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  }
+}
+
   def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode FR32:$src))]>;
+                [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
   // For scalar unary operations, fold a load into the operation
   // only in OptForSize mode. It eliminates an instruction, but it also
   // eliminates a whole-register clobber (the load), so it introduces a
@@ -3031,204 +3103,238 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
   def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
-            Requires<[UseSSE1, OptForSize]>;
+            Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
   def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+                    [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+                Sched<[itins.Sched]>;
   def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
                     !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+                    [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+                Sched<[itins.Sched.Folded]>;
 }
 
-/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
-multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
-  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
-                !strconcat(OpcodeStr,
-                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
+multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+  def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
+                       (ins FR32:$src1, FR32:$src2),
+                       !strconcat("v", OpcodeStr,
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
   let mayLoad = 1 in {
-  def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
-                !strconcat(OpcodeStr,
-                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
-  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                (ins VR128:$src1, ssmem:$src2),
-                !strconcat(OpcodeStr,
-                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+                      (ins FR32:$src1,f32mem:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG,
+                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, ssmem:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  }
+}
+
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
+            Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
+  let Constraints = "$src1 = $dst" in {
+    def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, VR128:$src2),
+                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                      [], itins.rr>, Sched<[itins.Sched]>;
+    let mayLoad = 1, hasSideEffects = 0 in
+    def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, ssmem:$src2),
+                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+                      [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 }
 
 /// sse1_fp_unop_p - SSE1 unops in packed form.
 multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           OpndItins itins> {
-  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
-  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+let Predicates = [HasAVX] in {
+  def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       !strconcat("v", OpcodeStr,
+                                  "ps\t{$src, $dst|$dst, $src}"),
+                       [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
+                       itins.rr>, VEX, Sched<[itins.Sched]>;
+  def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       !strconcat("v", OpcodeStr,
+                                  "ps\t{$src, $dst|$dst, $src}"),
+                       [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+                       itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+  def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                        !strconcat("v", OpcodeStr,
+                                   "ps\t{$src, $dst|$dst, $src}"),
+                        [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+                        itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+  def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                        !strconcat("v", OpcodeStr,
+                                   "ps\t{$src, $dst|$dst, $src}"),
+                        [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+                        itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
 }
 
-/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
-multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            OpndItins itins> {
-  def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-              [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
-              itins.rr>, VEX_L;
-  def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+            Sched<[itins.Sched]>;
+  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
-                itins.rm>, VEX_L;
+                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+            Sched<[itins.Sched.Folded]>;
 }
 
 /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
 multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic V4F32Int, OpndItins itins> {
+                              Intrinsic V4F32Int, Intrinsic V8F32Int,
+                              OpndItins itins> {
+let Predicates = [HasAVX] in {
+  def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                           !strconcat("v", OpcodeStr,
+                                      "ps\t{$src, $dst|$dst, $src}"),
+                           [(set VR128:$dst, (V4F32Int VR128:$src))],
+                           itins.rr>, VEX, Sched<[itins.Sched]>;
+  def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                          !strconcat("v", OpcodeStr,
+                          "ps\t{$src, $dst|$dst, $src}"),
+                          [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+                          itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+  def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                            !strconcat("v", OpcodeStr,
+                                       "ps\t{$src, $dst|$dst, $src}"),
+                            [(set VR256:$dst, (V8F32Int VR256:$src))],
+                            itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+  def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
+                          (ins f256mem:$src),
+                          !strconcat("v", OpcodeStr,
+                                    "ps\t{$src, $dst|$dst, $src}"),
+                          [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+                          itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
   def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V4F32Int VR128:$src))],
-                    itins.rr>;
+                    itins.rr>, Sched<[itins.Sched]>;
   def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
-                    itins.rm>;
-}
-
-/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
-multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
-                                Intrinsic V4F32Int, OpndItins itins> {
-  def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V4F32Int VR256:$src))],
-                    itins.rr>, VEX_L;
-  def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
-                    itins.rm>, VEX_L;
+                    itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
 /// sse2_fp_unop_s - SSE2 unops in scalar form.
 multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
                           SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
+let Predicates = [HasAVX], hasSideEffects = 0 in {
+  def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
+                      (ins FR64:$src1, FR64:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
+  let mayLoad = 1 in {
+  def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+                      (ins FR64:$src1,f64mem:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG,
+                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, sdmem:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      []>, VEX_4V, VEX_LIG,
+                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
+  }
+}
+
   def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+            Sched<[itins.Sched]>;
   // See the comments in sse1_fp_unop_s for why this is OptForSize.
   def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
                 [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
-            Requires<[UseSSE2, OptForSize]>;
+            Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
   def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+                Sched<[itins.Sched]>;
   def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
-}
-
-/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
-let hasSideEffects = 0 in
-multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
-  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
-               !strconcat(OpcodeStr,
-                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
-  let mayLoad = 1 in {
-  def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
-               !strconcat(OpcodeStr,
-                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
-  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
-               (ins VR128:$src1, sdmem:$src2),
-               !strconcat(OpcodeStr,
-                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
-  }
+                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+                Sched<[itins.Sched.Folded]>;
 }
 
 /// sse2_fp_unop_p - SSE2 unops in vector forms.
 multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
                           SDNode OpNode, OpndItins itins> {
+let Predicates = [HasAVX] in {
+  def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       !strconcat("v", OpcodeStr,
+                                  "pd\t{$src, $dst|$dst, $src}"),
+                       [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
+                       itins.rr>, VEX, Sched<[itins.Sched]>;
+  def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       !strconcat("v", OpcodeStr,
+                                  "pd\t{$src, $dst|$dst, $src}"),
+                       [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+                       itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+  def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                        !strconcat("v", OpcodeStr,
+                                   "pd\t{$src, $dst|$dst, $src}"),
+                        [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+                        itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+  def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                        !strconcat("v", OpcodeStr,
+                                   "pd\t{$src, $dst|$dst, $src}"),
+                        [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+                        itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+}
+
   def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+            Sched<[itins.Sched]>;
   def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+            Sched<[itins.Sched.Folded]>;
 }
 
-/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
-multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                          OpndItins itins> {
-  def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-              [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
-              itins.rr>, VEX_L;
-  def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
-                itins.rm>, VEX_L;
-}
-
-/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic V2F64Int, OpndItins itins> {
-  def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V2F64Int VR128:$src))],
-                    itins.rr>;
-  def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
-                    itins.rm>;
-}
-
-/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
-multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
-                                Intrinsic V2F64Int, OpndItins itins> {
-  def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V2F64Int VR256:$src))],
-                    itins.rr>, VEX_L;
-  def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
-                    itins.rm>, VEX_L;
-}
+// Square root.
+defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
+                            SSE_SQRTS>,
+             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>,
+             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd,
+                            SSE_SQRTS>,
+             sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>;
 
-let Predicates = [HasAVX] in {
-  // Square root.
-  defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt">,
-                sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
-
-  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
-                sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
-                sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
-                sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
-                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
-                                   SSE_SQRTP>,
-                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
-                                    SSE_SQRTP>,
-                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
-                                    SSE_SQRTP>,
-                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
-                                    SSE_SQRTP>,
-                VEX;
-
-  // Reciprocal approximations. Note that these typically require refinement
-  // in order to obtain suitable precision.
-  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
-  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
-                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
-                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
-                                    SSE_SQRTP>,
-                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
-                                    SSE_SQRTP>, VEX;
-
-  defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
-  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
-                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
-                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
-                                    SSE_RCPP>,
-                sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
-                                    SSE_RCPP>, VEX;
-}
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>,
+             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+                                int_x86_avx_rsqrt_ps_256, SSE_SQRTP>;
+defm RCP   : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
+             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>,
+             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
+                                int_x86_avx_rcp_ps_256, SSE_RCPP>;
 
 def : Pat<(f32 (fsqrt FR32:$src)),
           (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
@@ -3283,59 +3389,11 @@ let Predicates = [HasAVX] in {
             (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
 }
 
-// Square root.
-defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
-                            SSE_SQRTS>,
-             sse1_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
-             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps, SSE_SQRTS>,
-             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd,
-                            SSE_SQRTS>,
-             sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
-             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
-
-/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
-multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                               Intrinsic F32Int, OpndItins itins> {
-  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode FR32:$src))]>;
-  // For scalar unary operations, fold a load into the operation
-  // only in OptForSize mode. It eliminates an instruction, but it also
-  // eliminates a whole-register clobber (the load), so it introduces a
-  // partial register update condition.
-  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
-            Requires<[UseSSE1, OptForSize]>;
-  let Constraints = "$src1 = $dst" in {
-    def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src2),
-                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      [], itins.rr>;
-    def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, ssmem:$src2),
-                      !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                      [], itins.rm>;
-  }
-}
-
 // Reciprocal approximations. Note that these typically require refinement
 // in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
-                             SSE_SQRTS>,
-             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
-             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
-                            SSE_SQRTS>;
 let Predicates = [UseSSE1] in {
   def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
             (RSQRTSSr_Int VR128:$src, VR128:$src)>;
-}
-
-defm RCP   : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
-                             SSE_RCPS>,
-             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
-             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
-let Predicates = [UseSSE1] in {
   def : Pat<(int_x86_sse_rcp_ss VR128:$src),
             (RCPSSr_Int VR128:$src, VR128:$src)>;
 }
@@ -3347,52 +3405,48 @@ let Predicates = [UseSSE1] in {
 //===----------------------------------------------------------------------===//
 
 let AddedComplexity = 400 in { // Prefer non-temporal versions
-  def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
-                       (ins f128mem:$dst, VR128:$src),
-                       "movntps\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v4f32 VR128:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX;
-  def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
-                       (ins f128mem:$dst, VR128:$src),
-                       "movntpd\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v2f64 VR128:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX;
-
-  let ExeDomain = SSEPackedInt in
-  def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
-                           (ins f128mem:$dst, VR128:$src),
-                           "movntdq\t{$src, $dst|$dst, $src}",
-                           [(alignednontemporalstore (v2i64 VR128:$src),
-                                                     addr:$dst)],
-                                                     IIC_SSE_MOVNT>, VEX;
-
-  def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
-            (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
-  def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
-                       (ins f256mem:$dst, VR256:$src),
-                       "movntps\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v8f32 VR256:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX, VEX_L;
-  def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
-                       (ins f256mem:$dst, VR256:$src),
-                       "movntpd\t{$src, $dst|$dst, $src}",
-                       [(alignednontemporalstore (v4f64 VR256:$src),
-                                                 addr:$dst)],
-                                                 IIC_SSE_MOVNT>, VEX, VEX_L;
-  let ExeDomain = SSEPackedInt in
-  def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
-                      (ins f256mem:$dst, VR256:$src),
-                      "movntdq\t{$src, $dst|$dst, $src}",
-                      [(alignednontemporalstore (v4i64 VR256:$src),
-                                                addr:$dst)],
-                                                IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+                     (ins f128mem:$dst, VR128:$src),
+                     "movntps\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v4f32 VR128:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+                     (ins f128mem:$dst, VR128:$src),
+                     "movntpd\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v2f64 VR128:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
+                         (ins f128mem:$dst, VR128:$src),
+                         "movntdq\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v2i64 VR128:$src),
+                                                   addr:$dst)],
+                                                   IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+                     (ins f256mem:$dst, VR256:$src),
+                     "movntps\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v8f32 VR256:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+                     (ins f256mem:$dst, VR256:$src),
+                     "movntpd\t{$src, $dst|$dst, $src}",
+                     [(alignednontemporalstore (v4f64 VR256:$src),
+                                               addr:$dst)],
+                                               IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+                    (ins f256mem:$dst, VR256:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4i64 VR256:$src),
+                                              addr:$dst)],
+                                              IIC_SSE_MOVNT>, VEX, VEX_L;
 
-let AddedComplexity = 400 in { // Prefer non-temporal versions
 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntps\t{$src, $dst|$dst, $src}",
                     [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3408,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
                     IIC_SSE_MOVNT>;
 
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
-          (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
 // There is no AVX form for instructions below this point
 def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3422,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                      [(nontemporalstore (i64 GR64:$src), addr:$dst)],
                      IIC_SSE_MOVNT>,
                   TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+          (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Prefetch and memory fence
 //===----------------------------------------------------------------------===//
 
 // Prefetch intrinsic.
-let Predicates = [HasSSE1] in {
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
 def PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
     "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
     IIC_SSE_PREFETCH>, TB;
@@ -3444,6 +3502,8 @@ def PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
     IIC_SSE_PREFETCH>, TB;
 }
 
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
 // Flush cache
 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
                "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
@@ -3463,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
                "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
                TB, Requires<[HasSSE2]>;
+} // SchedRW
 
 def : Pat<(X86SFence), (SFENCE)>;
 def : Pat<(X86LFence), (LFENCE)>;
@@ -3474,17 +3535,17 @@ def : Pat<(X86MFence), (MFENCE)>;
 
 def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                   "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
-                  IIC_SSE_LDMXCSR>, VEX;
+                  IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
 def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                   "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
-                  IIC_SSE_STMXCSR>, VEX;
+                  IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
 
 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
                   "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
-                  IIC_SSE_LDMXCSR>;
+                  IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>;
 def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
                   "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
-                  IIC_SSE_STMXCSR>;
+                  IIC_SSE_STMXCSR>, Sched<[WriteStore]>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3492,23 +3553,23 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
 
 let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
 def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
                     VEX;
 def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
                     VEX, VEX_L;
-}
 def VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
                     VEX;
 def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
                     VEX, VEX_L;
+}
 
 // For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
 def VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                         "movdqa\t{$src, $dst|$dst, $src}", [],
                         IIC_SSE_MOVA_P_RR>,
@@ -3525,7 +3586,8 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
                         IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
 }
 
-let canFoldAsLoad = 1, mayLoad = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+    neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
 def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
                    VEX;
@@ -3542,7 +3604,7 @@ let Predicates = [HasAVX] in {
 }
 }
 
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
 def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
                      (ins i128mem:$dst, VR128:$src),
                      "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3561,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
 }
 }
 
+let SchedRW = [WriteMove] in {
 let neverHasSideEffects = 1 in
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3570,7 +3633,7 @@ def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
 
 // For Disassembler
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
 def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                        "movdqa\t{$src, $dst|$dst, $src}", [],
                        IIC_SSE_MOVA_P_RR>;
@@ -3579,8 +3642,10 @@ def MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                        "movdqu\t{$src, $dst|$dst, $src}",
                        [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
 }
+} // SchedRW
 
-let canFoldAsLoad = 1, mayLoad = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+    neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3592,7 +3657,7 @@ def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                  XS, Requires<[UseSSE2]>;
 }
 
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
 def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3604,30 +3669,23 @@ def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                  XS, Requires<[UseSSE2]>;
 }
 
-// Intrinsic forms of MOVDQU load and store
-def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                       "vmovdqu\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
-                       IIC_SSE_MOVU_P_MR>,
-                     XS, VEX, Requires<[HasAVX]>;
-
-def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                       "movdqu\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
-                       IIC_SSE_MOVU_P_MR>,
-                     XS, Requires<[UseSSE2]>;
-
 } // ExeDomain = SSEPackedInt
 
 let Predicates = [HasAVX] in {
+  def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+            (VMOVDQUmr addr:$dst, VR128:$src)>;
   def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
             (VMOVDQUYmr addr:$dst, VR256:$src)>;
 }
+let Predicates = [UseSSE2] in
+def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+          (MOVDQUmr addr:$dst, VR128:$src)>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Arithmetic Instructions
 //===---------------------------------------------------------------------===//
 
+let Sched = WriteVecIMul in
 def SSE_PMADD : OpndItins<
   IIC_SSE_PMADD, IIC_SSE_PMADD
 >;
@@ -3646,14 +3704,33 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+      Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
-       itins.rm>;
+       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
+multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
+                             Intrinsic IntId256, OpndItins itins,
+                             bit IsCommutable = 0> {
+let Predicates = [HasAVX] in
+  defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
+                                 VR128, memopv2i64, i128mem, itins,
+                                 IsCommutable, 0>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+  defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
+                               i128mem, itins, IsCommutable, 1>;
+
+let Predicates = [HasAVX2] in
+  defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
+                                   VR256, memopv4i64, i256mem, itins,
+                                   IsCommutable, 0>, VEX_4V, VEX_L;
 }
 
 multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
@@ -3669,23 +3746,25 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
-        itins.rr>;
+        itins.rr>, Sched<[WriteVecShift]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, i128mem:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode RC:$src1,
-                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+      Sched<[WriteVecShiftLd, ReadAfterLd]>;
   def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
        (ins RC:$src1, i32i8imm:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+       Sched<[WriteVecShift]>;
 }
 
-/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
+/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
 multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
                          ValueType DstVT, ValueType SrcVT, RegisterClass RC,
                          PatFrag memop_frag, X86MemOperand x86memop,
@@ -3697,260 +3776,88 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+       Sched<[itins.Sched]>;
   def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
-                                     (bitconvert (memop_frag addr:$src2)))))]>;
+                                     (bitconvert (memop_frag addr:$src2)))))]>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 } // ExeDomain = SSEPackedInt
 
-// 128-bit Integer Arithmetic
+defm PADDB   : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PADDW   : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
+                             SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
+                             SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB   : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PSUBW   : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
+                             SSE_INTALUQ_ITINS_P, 0>;
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PMINUB  : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PMINSW  : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 1>;
 
-let Predicates = [HasAVX] in {
-defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
-                            i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
-                            VEX_4V;
-defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
-                            i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
-                            i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDQ  : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
-                            i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
-                            i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
-                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
-                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
-                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
-                            i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+// Intrinsic forms
+defm PSUBSB  : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
+                                 int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>;
+defm PSUBSW  : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+                                 int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>;
+defm PADDSB  : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+                                 int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDSW  : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+                                 int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+                                 int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+                                 int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+                                 int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW  : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+                                 int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+                                 int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+defm PAVGB   : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+                                 int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
+defm PAVGW   : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+                                 int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
+defm PSADBW  : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+                                 int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+
+let Predicates = [HasAVX] in
 defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
                               memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
                               VEX_4V;
-
-// Intrinsic forms
-defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
-defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_PMADD, 1, 0>, VEX_4V;
-defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPADDBY  : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDWY  : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDDY  : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDQY  : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
-                             i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
-                             i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSUBBY  : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBWY  : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBDY  : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
-                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBQY  : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
-                             i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L;
+let Predicates = [HasAVX2] in
 defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
                                VR256, memopv4i64, i256mem,
                                SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-
-// Intrinsic forms
-defm VPSUBSBY  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBSWY  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPADDSBY  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDSWY  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMULHWY  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_PMADD, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGBY   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPAVGWY   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINUBY  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMINSWY  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXUBY  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPMAXSWY  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-defm VPSADBWY  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
-                                  VR256, memopv4i64, i256mem,
-                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
-                           i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
-                           i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
-                           i128mem, SSE_INTALU_ITINS_P, 1>;
-defm PADDQ  : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
-                           i128mem, SSE_INTALUQ_ITINS_P, 1>;
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
-                           i128mem, SSE_INTMUL_ITINS_P, 1>;
-defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
-                          i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
-                          i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
-                          i128mem, SSE_INTALU_ITINS_P>;
-defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
-                          i128mem, SSE_INTALUQ_ITINS_P>;
+let Constraints = "$src1 = $dst" in
 defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
                              memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
 
-// Intrinsic forms
-defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P>;
-defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P>;
-defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P>;
-defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P>;
-defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTMUL_ITINS_P, 1>;
-defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTMUL_ITINS_P, 1>;
-defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
-                                VR128, memopv2i64, i128mem,
-                                SSE_PMADD, 1>;
-defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1>;
-
-} // Constraints = "$src1 = $dst"
-
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Logical Instructions
 //===---------------------------------------------------------------------===//
@@ -3983,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                             VR128, v4i32, v4i32, bc_v4i32,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 128-bit logical shifts.
   def VPSLLDQri : PDIi8<0x73, MRM7r,
                     (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -4029,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                              VR256, v8i32, v4i32, bc_v4i32,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 256-bit logical shifts.
   def VPSLLDQYri : PDIi8<0x73, MRM7r,
                     (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -4075,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
                            VR128, v4i32, v4i32, bc_v4i32,
                            SSE_INTSHIFT_ITINS_P>;
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
   // 128-bit logical shifts.
   def PSLLDQri : PDIi8<0x73, MRM7r,
                        (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -4132,186 +4039,109 @@ let Predicates = [UseSSE2] in {
 // SSE2 - Packed Integer Comparison Instructions
 //===---------------------------------------------------------------------===//
 
-let Predicates = [HasAVX] in {
-  defm VPCMPEQB  : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-  defm VPCMPEQW  : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-  defm VPCMPEQD  : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
-  defm VPCMPGTB  : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-  defm VPCMPGTW  : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-  defm VPCMPGTD  : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
-                                VR128, memopv2i64, i128mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-  defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
-                                VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-  defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
-                                VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-  defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
-                                VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L;
-  defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
-                                VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-  defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
-                                VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-  defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
-                                VR256, memopv4i64, i256mem,
-                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-  defm PCMPEQB  : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
-                               VR128, memopv2i64, i128mem,
-                               SSE_INTALU_ITINS_P, 1>;
-  defm PCMPEQW  : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
-                               VR128, memopv2i64, i128mem,
-                               SSE_INTALU_ITINS_P, 1>;
-  defm PCMPEQD  : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
-                               VR128, memopv2i64, i128mem,
-                               SSE_INTALU_ITINS_P, 1>;
-  defm PCMPGTB  : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
-                               VR128, memopv2i64, i128mem,
-                               SSE_INTALU_ITINS_P>;
-  defm PCMPGTW  : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
-                               VR128, memopv2i64, i128mem,
-                               SSE_INTALU_ITINS_P>;
-  defm PCMPGTD  : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
-                               VR128, memopv2i64, i128mem,
-                               SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
+                             SSE_INTALU_ITINS_P, 1>;
+defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 0>;
+defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
+                             SSE_INTALU_ITINS_P, 0>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Pack Instructions
 //===---------------------------------------------------------------------===//
 
-let Predicates = [HasAVX] in {
-defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
-                                  VR128, memopv2i64, i128mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
-                                  VR128, memopv2i64, i128mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
-                                  VR128, memopv2i64, i128mem,
-                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
-                                   VR256, memopv4i64, i256mem,
-                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
-                                   VR256, memopv4i64, i256mem,
-                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
-                                   VR256, memopv4i64, i256mem,
-                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P>;
-defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P>;
-defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
-                                 VR128, memopv2i64, i128mem,
-                                 SSE_INTALU_ITINS_P>;
-} // Constraints = "$src1 = $dst"
+defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+                                  int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>;
+defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+                                  int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>;
+defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+                                  int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Shuffle Instructions
 //===---------------------------------------------------------------------===//
 
 let ExeDomain = SSEPackedInt in {
-multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def ri : Ii8<0x70, MRMSrcReg,
-             (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
-             !strconcat(OpcodeStr,
-                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
-              IIC_SSE_PSHUF>;
-def mi : Ii8<0x70, MRMSrcMem,
-             (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
-             !strconcat(OpcodeStr,
-                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR128:$dst,
-                (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
-                             (i8 imm:$src2))))],
-                             IIC_SSE_PSHUF>;
-}
-
-multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
-def Yri : Ii8<0x70, MRMSrcReg,
-              (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
-              !strconcat(OpcodeStr,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
-def Ymi : Ii8<0x70, MRMSrcMem,
-              (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
-              !strconcat(OpcodeStr,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR256:$dst,
-                (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
-                             (i8 imm:$src2))))]>;
-}
-} // ExeDomain = SSEPackedInt
-
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
+                         SDNode OpNode> {
 let Predicates = [HasAVX] in {
- let AddedComplexity = 5 in
-  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
-
- // SSE2 with ImmT == Imm8 and XS prefix.
-  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
-
- // SSE2 with ImmT == Imm8 and XD prefix.
-  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
-
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
-           (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
-           (VPSHUFDri VR128:$src1, imm:$imm)>;
+  def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, i8imm:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      [(set VR128:$dst,
+                        (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+                      IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
+  def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
+                      (ins i128mem:$src1, i8imm:$src2),
+                      !strconcat("v", OpcodeStr,
+                                 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                     [(set VR128:$dst,
+                       (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+                        (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+                  Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [HasAVX2] in {
-  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>,
-                                TB, OpSize, VEX,VEX_L;
-  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>,
-                                  XS, VEX, VEX_L;
-  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>,
-                                  XD, VEX, VEX_L;
+  def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
+                       (ins VR256:$src1, i8imm:$src2),
+                       !strconcat("v", OpcodeStr,
+                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                       [(set VR256:$dst,
+                         (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
+                       IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
+  def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
+                       (ins i256mem:$src1, i8imm:$src2),
+                       !strconcat("v", OpcodeStr,
+                                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                      [(set VR256:$dst,
+                        (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
+                         (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+                   Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [UseSSE2] in {
- let AddedComplexity = 5 in
-  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
+  def ri : Ii8<0x70, MRMSrcReg,
+               (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+               !strconcat(OpcodeStr,
+                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set VR128:$dst,
+                  (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
+                IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
+  def mi : Ii8<0x70, MRMSrcMem,
+               (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+               !strconcat(OpcodeStr,
+                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set VR128:$dst,
+                  (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
+                          (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+           Sched<[WriteShuffleLd]>;
+}
+}
+} // ExeDomain = SSEPackedInt
 
- // SSE2 with ImmT == Imm8 and XS prefix.
-  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
+defm PSHUFD  : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize;
+defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
+defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
 
- // SSE2 with ImmT == Imm8 and XD prefix.
-  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
+let Predicates = [HasAVX] in {
+  def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+            (VPSHUFDmi addr:$src1, imm:$imm)>;
+  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+            (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
 
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
-           (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
-           (PSHUFDri VR128:$src1, imm:$imm)>;
+let Predicates = [UseSSE2] in {
+  def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+            (PSHUFDmi addr:$src1, imm:$imm)>;
+  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+            (PSHUFDri VR128:$src1, imm:$imm)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4327,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
           !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
           !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
-      IIC_SSE_UNPCK>;
+      IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
   def rm : PDI<opc, MRMSrcMem,
       (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
       !if(Is2Addr,
@@ -4336,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
       [(set VR128:$dst, (OpNode VR128:$src1,
                                   (bc_frag (memopv2i64
                                                addr:$src2))))],
-                                               IIC_SSE_UNPCK>;
+                                               IIC_SSE_UNPCK>,
+      Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4344,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
   def Yrr : PDI<opc, MRMSrcReg,
       (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
       !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+      Sched<[WriteShuffle]>;
   def Yrm : PDI<opc, MRMSrcMem,
       (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
       !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
       [(set VR256:$dst, (OpNode VR256:$src1,
-                                  (bc_frag (memopv4i64 addr:$src2))))]>;
+                                  (bc_frag (memopv4i64 addr:$src2))))]>,
+      Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4426,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
-         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+       Sched<[WriteShuffle]>;
   def rmi : Ii8<0xC4, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1,
                         i16mem:$src2, i32i8imm:$src3),
@@ -4435,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
          (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
-                    imm:$src3))], IIC_SSE_PINSRW>;
+                    imm:$src3))], IIC_SSE_PINSRW>,
+       Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
 // Extract
@@ -4444,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
-                                                imm:$src2))]>, TB, OpSize, VEX;
+                                                imm:$src2))]>, TB, OpSize, VEX,
+                Sched<[WriteShuffle]>;
 def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
-                                                imm:$src2))], IIC_SSE_PEXTRW>;
+                                                imm:$src2))], IIC_SSE_PEXTRW>,
+               Sched<[WriteShuffleLd, ReadAfterLd]>;
 
 // Insert
 let Predicates = [HasAVX] in {
@@ -4457,7 +4294,7 @@ let Predicates = [HasAVX] in {
   def  VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
        "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-       []>, TB, OpSize, VEX_4V;
+       []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
 }
 
 let Constraints = "$src1 = $dst" in
@@ -4469,7 +4306,7 @@ let Constraints = "$src1 = $dst" in
 // SSE2 - Packed Mask Creation
 //===---------------------------------------------------------------------===//
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
 
 def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4497,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
 // SSE2 - Conditional Store
 //===---------------------------------------------------------------------===//
 
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
 
 let Uses = [EDI] in
 def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4536,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
-                        VEX;
+                        VEX, Sched<[WriteMove]>;
 def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
                         IIC_SSE_MOVDQ>,
-                      VEX;
+                      VEX, Sched<[WriteLoad]>;
 def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
-                          IIC_SSE_MOVDQ>, VEX;
+                          IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))],
-                       IIC_SSE_MOVDQ>, VEX;
+                       IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+                  Sched<[WriteMove]>;
 def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
-                        IIC_SSE_MOVDQ>;
+                        IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
-                          IIC_SSE_MOVDQ>;
+                          IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))],
-                       IIC_SSE_MOVDQ>;
+                       IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
 
 //===---------------------------------------------------------------------===//
 // Move Int Doubleword to Single Scalar
@@ -4578,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
 def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))],
-                      IIC_SSE_MOVDQ>, VEX;
+                      IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 
 def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
                       IIC_SSE_MOVDQ>,
-                      VEX;
+                      VEX, Sched<[WriteLoad]>;
 def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert GR32:$src))],
-                      IIC_SSE_MOVDQ>;
+                      IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
 
 def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
-                      IIC_SSE_MOVDQ>;
+                      IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int to Packed Double Int
@@ -4601,26 +4439,29 @@ def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
 def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
-                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+                    Sched<[WriteMove]>;
 def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
-                                     VEX;
+                                     VEX, Sched<[WriteLoad]>;
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
-                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+                   Sched<[WriteMove]>;
 def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)],
-                                     IIC_SSE_MOVDQ>;
+                                     IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int first element to Doubleword Int
 //
+let SchedRW = [WriteMove] in {
 def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "vmov{d|q}\t{$src, $dst|$dst, $src}",
                           [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4633,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
                                                          (iPTR 0)))],
                                                          IIC_SSE_MOVD_ToGP>;
+} //SchedRW
 
 //===---------------------------------------------------------------------===//
 // Bitcast FR64 <-> GR64
@@ -4641,28 +4483,28 @@ let Predicates = [HasAVX] in
 def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                         [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
-                        VEX;
+                        VEX, Sched<[WriteLoad]>;
 def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                          "mov{d|q}\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (bitconvert FR64:$src))],
-                         IIC_SSE_MOVDQ>, VEX;
+                         IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
 def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
                          [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
-                         IIC_SSE_MOVDQ>, VEX;
+                         IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
 
 def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
-                       IIC_SSE_MOVDQ>;
+                       IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
 def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set GR64:$dst, (bitconvert FR64:$src))],
-                       IIC_SSE_MOVD_ToGP>;
+                       IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
 def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
-                       IIC_SSE_MOVDQ>;
+                       IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
 //===---------------------------------------------------------------------===//
 // Move Scalar Single to Double Int
@@ -4670,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
 def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))],
-                      IIC_SSE_MOVD_ToGP>, VEX;
+                      IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
 def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>, VEX;
+                      IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
 def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32:$src))],
-                      IIC_SSE_MOVD_ToGP>;
+                      IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
 def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>;
+                      IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
 //===---------------------------------------------------------------------===//
 // Patterns and instructions to describe movd/movq to XMM register zero-extends
 //
+let SchedRW = [WriteMove] in {
 let AddedComplexity = 15 in {
 def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                        "movd\t{$src, $dst|$dst, $src}",
@@ -4712,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                                       (v2i64 (scalar_to_vector GR64:$src)))))],
                                       IIC_SSE_MOVDQ>;
 }
+} // SchedRW
 
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
 def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
@@ -4726,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                          (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
                                                    (loadi32 addr:$src))))))],
                                                    IIC_SSE_MOVDQ>;
-}
+} // AddedComplexity, SchedRW
 
 let Predicates = [HasAVX] in {
   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
@@ -4775,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
 //===---------------------------------------------------------------------===//
 // Move Quadword Int to Packed Quadword Int
 //
+
+let SchedRW = [WriteLoad] in {
 def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
@@ -4786,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                       (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
                       IIC_SSE_MOVDQ>, XS,
                     Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
 
 //===---------------------------------------------------------------------===//
 // Move Packed Quadword Int to Quadword Int
 //
+let SchedRW = [WriteStore] in {
 def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -4800,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)],
                                     IIC_SSE_MOVDQ>;
+} // SchedRW
 
 //===---------------------------------------------------------------------===//
 // Store / copy lower 64-bits of a XMM register.
 //
 def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
-                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+                  Sched<[WriteStore]>;
 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
-                     IIC_SSE_MOVDQ>;
+                     IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
 let AddedComplexity = 20 in
 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4819,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
                                                  (loadi64 addr:$src))))))],
                                                  IIC_SSE_MOVDQ>,
-                     XS, VEX, Requires<[HasAVX]>;
+                     XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
 
 let AddedComplexity = 20 in
 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4828,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
                                                  (loadi64 addr:$src))))))],
                                                  IIC_SSE_MOVDQ>,
-                     XS, Requires<[UseSSE2]>;
+                     XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
 
 let Predicates = [HasAVX], AddedComplexity = 20 in {
   def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4858,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
 // IA32 document. movq xmm1, xmm2 does clear the high bits.
 //
+let SchedRW = [WriteVecLogic] in {
 let AddedComplexity = 15 in
 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -4870,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
                     IIC_SSE_MOVQ_RR>,
                       XS, Requires<[UseSSE2]>;
+} // SchedRW
 
+let SchedRW = [WriteVecLogicLd] in {
 let AddedComplexity = 20 in
 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
@@ -4886,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                                              IIC_SSE_MOVDQ>,
                       XS, Requires<[UseSSE2]>;
 }
+} // SchedRW
 
 let AddedComplexity = 20 in {
   let Predicates = [HasAVX] in {
@@ -4903,6 +4757,7 @@ let AddedComplexity = 20 in {
 }
 
 // Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
 def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                       "movq\t{$src, $dst|$dst, $src}", [],
                       IIC_SSE_MOVDQ>, VEX, VEX_W;
@@ -4913,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "movd\t{$src, $dst|$dst, $src}", [],
                           IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
 
 // Instructions for the disassembler
 // xr = XMM register
 // xm = mem64
 
+let SchedRW = [WriteMove] in {
 let Predicates = [HasAVX] in
 def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
 def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
 
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
@@ -4933,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
 def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                       [(set RC:$dst, (vt (OpNode RC:$src)))],
-                      IIC_SSE_MOV_LH>;
+                      IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
 def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                       [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
-                      IIC_SSE_MOV_LH>;
+                      IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4993,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> {
 let neverHasSideEffects = 1 in
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [], IIC_SSE_MOV_LH>;
+                    [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (v2f64 (X86Movddup
                               (scalar_to_vector (loadf64 addr:$src)))))],
-                              IIC_SSE_MOV_LH>;
+                              IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
 }
 
 // FIXME: Merge with above classe when there're patterns for the ymm version
 multiclass sse3_replicate_dfp_y<string OpcodeStr> {
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+                    Sched<[WriteShuffle]>;
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (v4f64 (X86Movddup
-                              (scalar_to_vector (loadf64 addr:$src)))))]>;
+                              (scalar_to_vector (loadf64 addr:$src)))))]>,
+                    Sched<[WriteShuffleLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -5059,6 +4919,7 @@ let Predicates = [UseSSE3] in {
 // SSE3 - Move Unaligned Integer
 //===---------------------------------------------------------------------===//
 
+let SchedRW = [WriteLoad] in {
 let Predicates = [HasAVX] in {
   def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "vlddqu\t{$src, $dst|$dst, $src}",
@@ -5072,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "lddqu\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
                    IIC_SSE_LDDQU>;
+}
 
 //===---------------------------------------------------------------------===//
 // SSE3 - Arithmetic
@@ -5085,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
+       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+       Sched<[itins.Sched]>;
   def rm : I<0xD0, MRMSrcMem,
        (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
+       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -5128,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+      Sched<[WriteFAdd]>;
 
   def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
-        IIC_SSE_HADDSUB_RM>;
+        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
                   X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -5143,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+      Sched<[WriteFAdd]>;
 
   def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
-        IIC_SSE_HADDSUB_RM>;
+        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -5199,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
-                    OpSize;
+                    OpSize, Sched<[WriteVecALU]>;
 
   def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
                     (ins i128mem:$src),
@@ -5207,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     [(set VR128:$dst,
                       (IntId128
                        (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
-                    OpSize;
+                    OpSize, Sched<[WriteVecALULd]>;
 }
 
 /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
@@ -5217,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
                     (ins VR256:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst, (IntId256 VR256:$src))]>,
-                    OpSize;
+                    OpSize, Sched<[WriteVecALU]>;
 
   def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
                     (ins i256mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (IntId256
-                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
+                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+                    Sched<[WriteVecALULd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -5256,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
 // SSSE3 - Packed Binary Operator Instructions
 //===---------------------------------------------------------------------===//
 
+let Sched = WriteVecALU in {
 def SSE_PHADDSUBD : OpndItins<
   IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
 >;
@@ -5265,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins<
 def SSE_PHADDSUBW : OpndItins<
   IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
 >;
+}
+let Sched = WriteShuffle in
 def SSE_PSHUFB : OpndItins<
   IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
 >;
+let Sched = WriteVecALU in
 def SSE_PSIGN : OpndItins<
   IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
 >;
+let Sched = WriteVecIMul in
 def SSE_PMULHRSW : OpndItins<
   IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
 >;
@@ -5287,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
-       OpSize;
+       OpSize, Sched<[itins.Sched]>;
   def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
@@ -5295,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
          (OpVT (OpNode RC:$src1,
-          (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+          (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -5309,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
-       OpSize;
+       OpSize, Sched<[itins.Sched]>;
   def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
        (ins VR128:$src1, i128mem:$src2),
        !if(Is2Addr,
@@ -5317,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+       Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -5451,7 +5325,7 @@ defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
 // SSSE3 - Packed Align Instruction Patterns
 //===---------------------------------------------------------------------===//
 
-multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
   let neverHasSideEffects = 1 in {
   def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -5459,7 +5333,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      [], IIC_SSE_PALIGNR>, OpSize;
+      [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
   let mayLoad = 1 in
   def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5467,63 +5341,63 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      [], IIC_SSE_PALIGNR>, OpSize;
+      [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
   }
 }
 
-multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
   let neverHasSideEffects = 1 in {
   def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
       (ins VR256:$src1, VR256:$src2, i8imm:$src3),
       !strconcat(asm,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-      []>, OpSize;
+      []>, OpSize, Sched<[WriteShuffle]>;
   let mayLoad = 1 in
   def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
       (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
       !strconcat(asm,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-      []>, OpSize;
+      []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
   }
 }
 
 let Predicates = [HasAVX] in
-  defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+  defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
 let Predicates = [HasAVX2] in
-  defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L;
+  defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
 let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
-  defm PALIGN : ssse3_palign<"palignr">;
+  defm PALIGN : ssse3_palignr<"palignr">;
 
 let Predicates = [HasAVX2] in {
-def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
 }
 
 let Predicates = [HasAVX] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
 }
 
 let Predicates = [UseSSSE3] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
 }
 
@@ -5531,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
 // SSSE3 - Thread synchronization
 //===---------------------------------------------------------------------===//
 
+let SchedRW = [WriteSystem] in {
 let usesCustomInserter = 1 in {
 def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
                 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
@@ -5544,6 +5419,7 @@ let Uses = [ECX, EAX] in
 def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
                 [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
                 TB, Requires<[HasSSE3]>;
+} // SchedRW
 
 def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
 def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
@@ -5850,6 +5726,55 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
 defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
 defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
 
+let Predicates = [HasAVX2] in {
+  def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
+  def : Pat<(v8i32  (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
+  def : Pat<(v4i64  (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
+
+  def : Pat<(v8i32  (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+  def : Pat<(v4i64  (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
+
+  def : Pat<(v4i64  (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+
+  def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
+            (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+  def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
+            (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+  def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
+            (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+  def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
+            (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+  def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
+            (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+  def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
+            (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+  def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
+            (VPMOVSXWDYrm addr:$src)>;
+  def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
+            (VPMOVSXDQYrm addr:$src)>;
+
+  def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVSXBDYrm addr:$src)>;
+  def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVSXBDYrm addr:$src)>;
+
+  def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVSXWQYrm addr:$src)>;
+  def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVSXWQYrm addr:$src)>;
+
+  def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 
+                    (scalar_to_vector (loadi32 addr:$src))))))),
+            (VPMOVSXBQYrm addr:$src)>;
+}
+
 let Predicates = [HasAVX] in {
   // Common patterns involving scalar load
   def : Pat<(int_x86_sse41_pmovsxbq
@@ -5864,6 +5789,15 @@ let Predicates = [HasAVX] in {
 }
 
 let Predicates = [UseSSE41] in {
+  def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
+  def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
+
+  def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
+
+  def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+
   // Common patterns involving scalar load
   def : Pat<(int_x86_sse41_pmovsxbq
               (bitconvert (v4i32 (X86vzmovl
@@ -5874,6 +5808,34 @@ let Predicates = [UseSSE41] in {
               (bitconvert (v4i32 (X86vzmovl
                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
             (PMOVZXBQrm addr:$src)>;
+
+  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (PMOVSXWDrm addr:$src)>;
+  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (PMOVSXWDrm addr:$src)>;
+  def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+                    (scalar_to_vector (loadi32 addr:$src))))))),
+            (PMOVSXBDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+                    (scalar_to_vector (loadi32 addr:$src))))))),
+            (PMOVSXWQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+                    (scalar_to_vector (extloadi32i16 addr:$src))))))),
+            (PMOVSXBQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (PMOVSXDQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (PMOVSXDQrm addr:$src)>;
+  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (PMOVSXBWrm addr:$src)>;
+  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (PMOVSXBWrm addr:$src)>;
 }
 
 let Predicates = [HasAVX2] in {
@@ -5934,6 +5896,44 @@ let Predicates = [HasAVX] in {
             (VPMOVZXDQrm addr:$src)>;
   def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
             (VPMOVZXDQrm addr:$src)>;
+
+  def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
+  def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
+
+  def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+  def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
+
+  def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+
+  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVSXWDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVSXDQrm addr:$src)>;
+  def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVSXWDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVSXDQrm addr:$src)>;
+  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64
+                    (scalar_to_vector (loadi64 addr:$src))))))),
+            (VPMOVSXBWrm addr:$src)>;
+  def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64
+                    (scalar_to_vector (loadf64 addr:$src))))))),
+            (VPMOVSXBWrm addr:$src)>;
+
+  def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32
+                    (scalar_to_vector (loadi32 addr:$src))))))),
+            (VPMOVSXBDrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32
+                    (scalar_to_vector (loadi32 addr:$src))))))),
+            (VPMOVSXWQrm addr:$src)>;
+  def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32
+                    (scalar_to_vector (extloadi32i16 addr:$src))))))),
+            (VPMOVSXBQrm addr:$src)>;
 }
 
 let Predicates = [UseSSE41] in {
@@ -6273,6 +6273,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
                             Intrinsic F64Int, bit Is2Addr = 1> {
 let ExeDomain = GenericDomain in {
   // Operation, reg.
+  let hasSideEffects = 0 in
   def SSr : SS4AIi8<opcss, MRMSrcReg,
       (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
       !if(Is2Addr,
@@ -6306,6 +6307,7 @@ let ExeDomain = GenericDomain in {
         OpSize;
 
   // Operation, reg.
+  let hasSideEffects = 0 in
   def SDr : SS4AIi8<opcsd, MRMSrcReg,
         (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
         !if(Is2Addr,
@@ -6378,12 +6380,47 @@ let Predicates = [HasAVX] in {
 
   def : Pat<(v4f32 (ffloor VR128:$src)),
             (VROUNDPSr VR128:$src, (i32 0x1))>;
+  def : Pat<(v4f32 (fnearbyint VR128:$src)),
+            (VROUNDPSr VR128:$src, (i32 0xC))>;
+  def : Pat<(v4f32 (fceil VR128:$src)),
+            (VROUNDPSr VR128:$src, (i32 0x2))>;
+  def : Pat<(v4f32 (frint VR128:$src)),
+            (VROUNDPSr VR128:$src, (i32 0x4))>;
+  def : Pat<(v4f32 (ftrunc VR128:$src)),
+            (VROUNDPSr VR128:$src, (i32 0x3))>;
+
   def : Pat<(v2f64 (ffloor VR128:$src)),
             (VROUNDPDr VR128:$src, (i32 0x1))>;
+  def : Pat<(v2f64 (fnearbyint VR128:$src)),
+            (VROUNDPDr VR128:$src, (i32 0xC))>;
+  def : Pat<(v2f64 (fceil VR128:$src)),
+            (VROUNDPDr VR128:$src, (i32 0x2))>;
+  def : Pat<(v2f64 (frint VR128:$src)),
+            (VROUNDPDr VR128:$src, (i32 0x4))>;
+  def : Pat<(v2f64 (ftrunc VR128:$src)),
+            (VROUNDPDr VR128:$src, (i32 0x3))>;
+
   def : Pat<(v8f32 (ffloor VR256:$src)),
             (VROUNDYPSr VR256:$src, (i32 0x1))>;
+  def : Pat<(v8f32 (fnearbyint VR256:$src)),
+            (VROUNDYPSr VR256:$src, (i32 0xC))>;
+  def : Pat<(v8f32 (fceil VR256:$src)),
+            (VROUNDYPSr VR256:$src, (i32 0x2))>;
+  def : Pat<(v8f32 (frint VR256:$src)),
+            (VROUNDYPSr VR256:$src, (i32 0x4))>;
+  def : Pat<(v8f32 (ftrunc VR256:$src)),
+            (VROUNDYPSr VR256:$src, (i32 0x3))>;
+
   def : Pat<(v4f64 (ffloor VR256:$src)),
             (VROUNDYPDr VR256:$src, (i32 0x1))>;
+  def : Pat<(v4f64 (fnearbyint VR256:$src)),
+            (VROUNDYPDr VR256:$src, (i32 0xC))>;
+  def : Pat<(v4f64 (fceil VR256:$src)),
+            (VROUNDYPDr VR256:$src, (i32 0x2))>;
+  def : Pat<(v4f64 (frint VR256:$src)),
+            (VROUNDYPDr VR256:$src, (i32 0x4))>;
+  def : Pat<(v4f64 (ftrunc VR256:$src)),
+            (VROUNDYPDr VR256:$src, (i32 0x3))>;
 }
 
 defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6417,8 +6454,25 @@ let Predicates = [UseSSE41] in {
 
   def : Pat<(v4f32 (ffloor VR128:$src)),
             (ROUNDPSr VR128:$src, (i32 0x1))>;
+  def : Pat<(v4f32 (fnearbyint VR128:$src)),
+            (ROUNDPSr VR128:$src, (i32 0xC))>;
+  def : Pat<(v4f32 (fceil VR128:$src)),
+            (ROUNDPSr VR128:$src, (i32 0x2))>;
+  def : Pat<(v4f32 (frint VR128:$src)),
+            (ROUNDPSr VR128:$src, (i32 0x4))>;
+  def : Pat<(v4f32 (ftrunc VR128:$src)),
+            (ROUNDPSr VR128:$src, (i32 0x3))>;
+
   def : Pat<(v2f64 (ffloor VR128:$src)),
             (ROUNDPDr VR128:$src, (i32 0x1))>;
+  def : Pat<(v2f64 (fnearbyint VR128:$src)),
+            (ROUNDPDr VR128:$src, (i32 0xC))>;
+  def : Pat<(v2f64 (fceil VR128:$src)),
+            (ROUNDPDr VR128:$src, (i32 0x2))>;
+  def : Pat<(v2f64 (frint VR128:$src)),
+            (ROUNDPDr VR128:$src, (i32 0x4))>;
+  def : Pat<(v2f64 (ftrunc VR128:$src)),
+            (ROUNDPDr VR128:$src, (i32 0x3))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -6575,67 +6629,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
           (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
 }
 
-let Predicates = [HasAVX] in {
-  let isCommutable = 0 in
-  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
-                                                         0>, VEX_4V;
-  defm VPMINSB   : SS41I_binop_rm_int<0x38, "vpminsb",   int_x86_sse41_pminsb,
-                                                         0>, VEX_4V;
-  defm VPMINSD   : SS41I_binop_rm_int<0x39, "vpminsd",   int_x86_sse41_pminsd,
-                                                         0>, VEX_4V;
-  defm VPMINUD   : SS41I_binop_rm_int<0x3B, "vpminud",   int_x86_sse41_pminud,
-                                                         0>, VEX_4V;
-  defm VPMINUW   : SS41I_binop_rm_int<0x3A, "vpminuw",   int_x86_sse41_pminuw,
-                                                         0>, VEX_4V;
-  defm VPMAXSB   : SS41I_binop_rm_int<0x3C, "vpmaxsb",   int_x86_sse41_pmaxsb,
-                                                         0>, VEX_4V;
-  defm VPMAXSD   : SS41I_binop_rm_int<0x3D, "vpmaxsd",   int_x86_sse41_pmaxsd,
-                                                         0>, VEX_4V;
-  defm VPMAXUD   : SS41I_binop_rm_int<0x3F, "vpmaxud",   int_x86_sse41_pmaxud,
-                                                         0>, VEX_4V;
-  defm VPMAXUW   : SS41I_binop_rm_int<0x3E, "vpmaxuw",   int_x86_sse41_pmaxuw,
-                                                         0>, VEX_4V;
-  defm VPMULDQ   : SS41I_binop_rm_int<0x28, "vpmuldq",   int_x86_sse41_pmuldq,
-                                                         0>, VEX_4V;
-}
-
-let Predicates = [HasAVX2] in {
-  let isCommutable = 0 in
-  defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
-                                        int_x86_avx2_packusdw>, VEX_4V, VEX_L;
-  defm VPMINSB   : SS41I_binop_rm_int_y<0x38, "vpminsb",
-                                        int_x86_avx2_pmins_b>, VEX_4V, VEX_L;
-  defm VPMINSD   : SS41I_binop_rm_int_y<0x39, "vpminsd",
-                                        int_x86_avx2_pmins_d>, VEX_4V, VEX_L;
-  defm VPMINUD   : SS41I_binop_rm_int_y<0x3B, "vpminud",
-                                        int_x86_avx2_pminu_d>, VEX_4V, VEX_L;
-  defm VPMINUW   : SS41I_binop_rm_int_y<0x3A, "vpminuw",
-                                        int_x86_avx2_pminu_w>, VEX_4V, VEX_L;
-  defm VPMAXSB   : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
-                                        int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L;
-  defm VPMAXSD   : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
-                                        int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L;
-  defm VPMAXUD   : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
-                                        int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L;
-  defm VPMAXUW   : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
-                                        int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L;
-  defm VPMULDQ   : SS41I_binop_rm_int_y<0x28, "vpmuldq",
-                                        int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
-  let isCommutable = 0 in
-  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
-  defm PMINSB   : SS41I_binop_rm_int<0x38, "pminsb",   int_x86_sse41_pminsb>;
-  defm PMINSD   : SS41I_binop_rm_int<0x39, "pminsd",   int_x86_sse41_pminsd>;
-  defm PMINUD   : SS41I_binop_rm_int<0x3B, "pminud",   int_x86_sse41_pminud>;
-  defm PMINUW   : SS41I_binop_rm_int<0x3A, "pminuw",   int_x86_sse41_pminuw>;
-  defm PMAXSB   : SS41I_binop_rm_int<0x3C, "pmaxsb",   int_x86_sse41_pmaxsb>;
-  defm PMAXSD   : SS41I_binop_rm_int<0x3D, "pmaxsd",   int_x86_sse41_pmaxsd>;
-  defm PMAXUD   : SS41I_binop_rm_int<0x3F, "pmaxud",   int_x86_sse41_pmaxud>;
-  defm PMAXUW   : SS41I_binop_rm_int<0x3E, "pmaxuw",   int_x86_sse41_pmaxuw>;
-  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",   int_x86_sse41_pmuldq>;
-}
 
 /// SS48I_binop_rm - Simple SSE41 binary operator.
 multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -6659,6 +6652,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 let Predicates = [HasAVX] in {
+  let isCommutable = 0 in
+  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+                                                         0>, VEX_4V;
+  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+                                  memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPMULDQ   : SS41I_binop_rm_int<0x28, "vpmuldq",   int_x86_sse41_pmuldq,
+                                                         0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+  let isCommutable = 0 in
+  defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+                                        int_x86_avx2_packusdw>, VEX_4V, VEX_L;
+  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+  defm VPMULDQ   : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+                                        int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+  let isCommutable = 0 in
+  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+                                 memopv2i64, i128mem>;
+  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",   int_x86_sse41_pmuldq>;
+}
+
+let Predicates = [HasAVX] in {
   defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
                                 memopv2i64, i128mem, 0>, VEX_4V;
   defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
@@ -6776,7 +6839,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
-                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+                  NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 
   def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
                   (ins RC:$src1, x86memop:$src2, RC:$src3),
@@ -6785,7 +6848,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                   [(set RC:$dst,
                         (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
                                RC:$src3))],
-                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+                  NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 }
 
 let Predicates = [HasAVX] in {
@@ -6839,31 +6902,31 @@ let Predicates = [HasAVX] in {
                             (v4f64 VR256:$src2))),
             (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
 
-  def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+  def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
                                (imm:$mask))),
-            (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
-  def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+            (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+  def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
                                (imm:$mask))),
-            (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+            (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
 
-  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
                                (imm:$mask))),
-            (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+            (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
                                (imm:$mask))),
-            (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+            (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
                                (imm:$mask))),
-            (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+            (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
 }
 
 let Predicates = [HasAVX2] in {
   def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
                             (v32i8 VR256:$src2))),
-            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+            (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+  def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
                                (imm:$mask))),
-            (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+            (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
 }
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
@@ -6927,15 +6990,15 @@ let Predicates = [UseSSE41] in {
                             (v2f64 VR128:$src2))),
             (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
 
-  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
                                (imm:$mask))),
-            (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+            (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
                                (imm:$mask))),
-            (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+            (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
                                (imm:$mask))),
-            (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+            (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
 
 }
 
@@ -7821,6 +7884,13 @@ defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
                                     VR256, memopv4i64, i256mem>, VEX_L;
 }
 
+def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
+                  imm:$mask)),
+          (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
+                  imm:$mask)),
+          (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+
 //===----------------------------------------------------------------------===//
 // VPBROADCAST - Load from memory and broadcast to all elements of the
 //               destination operand
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 893488c159ea..5b6298b541bc 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -15,7 +15,7 @@
 
 let Defs = [EFLAGS] in {
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shl{b}\t{%cl, $dst|$dst, CL}",
@@ -51,6 +51,7 @@ def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst),
 
 // NOTE: We don't include patterns for shifts of a register by one, because
 // 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+let hasSideEffects = 0 in {
 def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
                  "shl{b}\t$dst", [], IIC_SR>;
 def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
@@ -59,10 +60,12 @@ def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
                  "shl{l}\t$dst", [], IIC_SR>;
 def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
                  "shl{q}\t$dst", [], IIC_SR>;
+} // hasSideEffects = 0
 } // isConvertibleToThreeAddress = 1
-} // Constraints = "$src = $dst" 
+} // Constraints = "$src = $dst", SchedRW
 
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
 // using CL?
 let Uses = [CL] in {
@@ -116,8 +119,9 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
                   "shl{q}\t$dst",
                  [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                  IIC_SR>;
+} // SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
@@ -161,9 +165,10 @@ def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
 def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
                  "shr{q}\t$dst",
                  [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
@@ -214,8 +219,9 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
                   "shr{q}\t$dst",
                  [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                  IIC_SR>;
+} // SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
@@ -271,9 +277,10 @@ def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
                  "sar{q}\t$dst",
                  [(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
                  IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
@@ -328,12 +335,14 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
                   "sar{q}\t$dst",
                  [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                  IIC_SR>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Rotate instructions
 //===----------------------------------------------------------------------===//
 
-let Constraints = "$src1 = $dst" in {
+let hasSideEffects = 0 in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
                "rcl{b}\t$dst", [], IIC_SR>;
 def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
@@ -402,6 +411,7 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
 
 } // Constraints = "$src = $dst"
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
                "rcl{b}\t$dst", [], IIC_SR>;
 def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -455,8 +465,10 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
 def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
                   "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 }
+} // SchedRW
+} // hasSideEffects = 0
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -508,8 +520,9 @@ def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
                   "rol{q}\t$dst",
                   [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
                   IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
                  "rol{b}\t{%cl, $dst|$dst, CL}",
@@ -564,8 +577,9 @@ def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
                  "rol{q}\t$dst",
                [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                IIC_SR>;
+} // SchedRW
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 let Uses = [CL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
@@ -616,8 +630,9 @@ def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   "ror{q}\t$dst",
                   [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
                   IIC_SR>;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
@@ -672,13 +687,14 @@ def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
                  "ror{q}\t$dst",
                [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
                IIC_SR>;
+} // SchedRW
 
 
 //===----------------------------------------------------------------------===//
 // Double shift instructions (generalizations of rotate)
 //===----------------------------------------------------------------------===//
 
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 
 let Uses = [CL] in {
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
@@ -761,8 +777,9 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
                                        (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
                  TB;
 }
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
 
+let SchedRW = [WriteShiftLd, WriteRMW] in {
 let Uses = [CL] in {
 def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                    "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -836,6 +853,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
                                        (i8 imm:$src3)), addr:$dst)],
                                        IIC_SHD64_MEM_IM>,
                  TB;
+} // SchedRW
 
 } // Defs = [EFLAGS]
 
@@ -853,12 +871,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let neverHasSideEffects = 1 in {
   def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
                !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []>, TAXD, VEX;
+               []>, TAXD, VEX, Sched<[WriteShift]>;
   let mayLoad = 1 in
   def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
                (ins x86memop:$src1, i8imm:$src2),
                !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               []>, TAXD, VEX;
+               []>, TAXD, VEX, Sched<[WriteShiftLd]>;
 }
 }
 
@@ -866,11 +884,17 @@ multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let neverHasSideEffects = 1 in {
   def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX_4VOp3;
+             VEX_4VOp3, Sched<[WriteShift]>;
   let mayLoad = 1 in
   def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX_4VOp3;
+             VEX_4VOp3,
+             Sched<[WriteShiftLd,
+                    // x86memop:$src1
+                    ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                    ReadDefault,
+                    // RC:$src1
+                    ReadAfterLd]>;
 }
 }
 
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index ea716bfd6bd8..053417ccde63 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+let SchedRW = [WriteSystem] in {
 let Defs = [RAX, RDX] in
   def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
               TB;
@@ -35,6 +36,7 @@ let Uses = [EFLAGS] in
   def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
 def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
               [(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
 
 def : Pat<(debugtrap),
           (INT3)>;
@@ -43,6 +45,7 @@ def : Pat<(debugtrap),
 // FIXME: This doesn't work because InstAlias can't match immediate constants.
 //def : InstAlias<"int\t$3", (INT3)>;
 
+let SchedRW = [WriteSystem] in {
 
 def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
               [(int_x86_int imm:$trap)], IIC_INT>;
@@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
 def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
 def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
              Requires<[In64BitMode]>;
+} // SchedRW
 
 
 //===----------------------------------------------------------------------===//
 //  Input/Output Instructions.
 //
+let SchedRW = [WriteSystem] in {
 let Defs = [AL], Uses = [DX] in
 def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
                "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
@@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
 def IN8  : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
 def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>,  OpSize;
 def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Moves to and from debug registers
 
+let SchedRW = [WriteSystem] in {
 def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
 def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
@@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
 def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Moves to and from control registers
 
+let SchedRW = [WriteSystem] in {
 def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
 def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
@@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
 def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
                 "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Segment override instruction prefixes
@@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
 // Moves to and from segment registers.
 //
 
+let SchedRW = [WriteMove] in {
 def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
                 "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
 def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
@@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
 def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Segmentation support instructions.
 
+let SchedRW = [WriteSystem] in {
 def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
 
 def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
@@ -347,16 +360,18 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
               "verw\t$seg", [], IIC_VERW_MEM>, TB;
 def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
               "verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Descriptor-table support instructions
 
+let SchedRW = [WriteSystem] in {
 def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
-              "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
+              "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
 def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
               "sgdt\t$dst", [], IIC_SGDT>, TB;
 def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
-              "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
+              "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
 def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
               "sidt\t$dst", []>, TB;
 def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
@@ -374,20 +389,22 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
                  "sldt{q}\t$dst", [], IIC_SLDT>, TB;
 
 def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
-              "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
+              "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
 def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
               "lgdt\t$src", [], IIC_LGDT>, TB;
 def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
-              "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
+              "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
 def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
               "lidt\t$src", [], IIC_LIDT>, TB;
 def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
                 "lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
 def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
                 "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
-                
+} // SchedRW
+
 //===----------------------------------------------------------------------===//
 // Specialized register support
+let SchedRW = [WriteSystem] in {
 def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
 def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
 def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
@@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
                 "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
                 
 def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // Cache instructions
+let SchedRW = [WriteSystem] in {
 def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
 def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // XSAVE instructions
+let SchedRW = [WriteSystem] in {
 let Defs = [RDX, RAX], Uses = [RCX] in
   def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
 
@@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in {
   def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
                     "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
 }
+} // SchedRW
 
 //===----------------------------------------------------------------------===//
 // VIA PadLock crypto instructions
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index ad55058ede6c..363a190aa854 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -15,6 +15,9 @@
 //===----------------------------------------------------------------------===//
 // TSX instructions
 
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+                     [SDNPHasChain, SDNPSideEffect]>;
+
 let usesCustomInserter = 1 in
 def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
                "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
@@ -22,11 +25,15 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
 
 let isBranch = 1, isTerminator = 1, Defs = [EAX] in
 def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
-                         "xbegin\t$dst", []>;
+                         "xbegin\t$dst", []>, Requires<[HasRTM]>;
 
 def XEND : I<0x01, MRM_D5, (outs), (ins),
              "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
 
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+              "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+
 def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
                  "xabort\t$imm",
                  [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 764aa5d4f236..44d8cce05413 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -16,7 +16,7 @@
 #include "X86Relocations.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Valgrind.h"
@@ -79,7 +79,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
 # define CFI(x)
 #endif
 
-// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional
 // callee saved registers, for the fastcc calling convention.
 extern "C" {
 #if defined(X86_64_JIT)
@@ -131,12 +131,12 @@ extern "C" {
     "subq    $32, %rsp\n"
     "movq    %rbp, %rcx\n"    // Pass prev frame and return address
     "movq    8(%rbp), %rdx\n"
-    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "call    " ASMPREFIX "LLVMX86CompilationCallback2\n"
     "addq    $32, %rsp\n"
 #else
     "movq    %rbp, %rdi\n"    // Pass prev frame and return address
     "movq    8(%rbp), %rsi\n"
-    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "call    " ASMPREFIX "LLVMX86CompilationCallback2\n"
 #endif
     // Restore all XMM arg registers
     "movaps  112(%rsp), %xmm7\n"
@@ -213,7 +213,7 @@ extern "C" {
     "movl    4(%ebp), %eax\n" // Pass prev frame and return address
     "movl    %eax, 4(%esp)\n"
     "movl    %ebp, (%esp)\n"
-    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "call    " ASMPREFIX "LLVMX86CompilationCallback2\n"
     "movl    %ebp, %esp\n"    // Restore ESP
     CFI(".cfi_def_cfa_register %esp\n")
     "subl    $12, %esp\n"
@@ -269,7 +269,7 @@ extern "C" {
     "movl    4(%ebp), %eax\n" // Pass prev frame and return address
     "movl    %eax, 4(%esp)\n"
     "movl    %ebp, (%esp)\n"
-    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "call    " ASMPREFIX "LLVMX86CompilationCallback2\n"
     "addl    $16, %esp\n"
     "movaps  48(%esp), %xmm3\n"
     CFI(".cfi_restore %xmm3\n")
@@ -300,10 +300,7 @@ extern "C" {
     SIZE(X86CompilationCallback_SSE)
   );
 # else
-  // the following function is called only from this translation unit,
-  // unless we are under 64bit Windows with MSC, where there is
-  // no support for inline assembly
-  static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+  void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
 
   _declspec(naked) void X86CompilationCallback(void) {
     __asm {
@@ -317,7 +314,7 @@ extern "C" {
       mov   eax, dword ptr [ebp+4]
       mov   dword ptr [esp+4], eax
       mov   dword ptr [esp], ebp
-      call  X86CompilationCallback2
+      call  LLVMX86CompilationCallback2
       mov   esp, ebp
       sub   esp, 12
       pop   ecx
@@ -337,20 +334,17 @@ extern "C" {
 #endif
 }
 
-/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// This is the target-specific function invoked by the
 /// function stub when we did not know the real target of a call.  This function
 /// must locate the start of the stub or call site and pass it into the JIT
 /// compiler function.
 extern "C" {
-#if !(defined (X86_64_JIT) && defined(_MSC_VER))
- // the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
- // no support for inline assembly
-static
-#endif
-void LLVM_ATTRIBUTE_USED
-X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr,
+                                                         intptr_t RetAddr) {
   intptr_t *RetAddrLoc = &StackPtr[1];
+  // We are reading raw stack data here. Tell MemorySanitizer that it is
+  // sufficiently initialized.
+  __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc));
   assert(*RetAddrLoc == RetAddr &&
          "Could not find return address on the stack!");
 
@@ -517,7 +511,7 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
 
   // This used to use 0xCD, but that value is used by JITMemoryManager to
   // initialize the buffer with garbage, which means it may follow a
-  // noreturn function call, confusing X86CompilationCallback2.  PR 4929.
+  // noreturn function call, confusing LLVMX86CompilationCallback2.  PR 4929.
   JCE.emitByte(0xCE);   // Interrupt - Just a marker identifying the stub!
   return Result;
 }
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index d7c08dfb0fdf..f916327378a9 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -14,8 +14,8 @@
 #ifndef X86JITINFO_H
 #define X86JITINFO_H
 
-#include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Target/TargetJITInfo.h"
 
 namespace llvm {
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index cfd68f74b7b2..a8a9fd8accde 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -13,19 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "X86COFFMachineModuleInfo.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
-#include "llvm/Type.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 namespace {
@@ -238,7 +239,8 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
     if (!MI->getOperand(OpNo+i).isReg()) continue;
 
     unsigned Reg = MI->getOperand(OpNo+i).getReg();
-    if (Reg == 0) continue;
+    // LEAs can use RIP-relative addressing, and RIP has no sub/super register.
+    if (Reg == 0 || Reg == X86::RIP) continue;
 
     MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
   }
@@ -405,6 +407,57 @@ ReSimplify:
     LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
 
+  // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+  // if one of the registers is extended, but other isn't.
+  case X86::VMOVAPDrr:
+  case X86::VMOVAPDYrr:
+  case X86::VMOVAPSrr:
+  case X86::VMOVAPSYrr:
+  case X86::VMOVDQArr:
+  case X86::VMOVDQAYrr:
+  case X86::VMOVDQUrr:
+  case X86::VMOVDQUYrr:
+  case X86::VMOVUPDrr:
+  case X86::VMOVUPDYrr:
+  case X86::VMOVUPSrr:
+  case X86::VMOVUPSYrr: {
+    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+        X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+      unsigned NewOpc;
+      switch (OutMI.getOpcode()) {
+      default: llvm_unreachable("Invalid opcode");
+      case X86::VMOVAPDrr:  NewOpc = X86::VMOVAPDrr_REV;  break;
+      case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+      case X86::VMOVAPSrr:  NewOpc = X86::VMOVAPSrr_REV;  break;
+      case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+      case X86::VMOVDQArr:  NewOpc = X86::VMOVDQArr_REV;  break;
+      case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+      case X86::VMOVDQUrr:  NewOpc = X86::VMOVDQUrr_REV;  break;
+      case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+      case X86::VMOVUPDrr:  NewOpc = X86::VMOVUPDrr_REV;  break;
+      case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+      case X86::VMOVUPSrr:  NewOpc = X86::VMOVUPSrr_REV;  break;
+      case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+      }
+      OutMI.setOpcode(NewOpc);
+    }
+    break;
+  }
+  case X86::VMOVSDrr:
+  case X86::VMOVSSrr: {
+    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+        X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+      unsigned NewOpc;
+      switch (OutMI.getOpcode()) {
+      default: llvm_unreachable("Invalid opcode");
+      case X86::VMOVSDrr:   NewOpc = X86::VMOVSDrr_REV;   break;
+      case X86::VMOVSSrr:   NewOpc = X86::VMOVSSrr_REV;   break;
+      }
+      OutMI.setOpcode(NewOpc);
+    }
+    break;
+  }
+
   // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
   // inputs modeled as normal uses instead of implicit uses.  As such, truncate
   // off all but the first operand (the callee).  FIXME: Change isel.
@@ -549,18 +602,14 @@ ReSimplify:
     OutMI.setOpcode(X86::RET);
     break;
 
-  case X86::MORESTACK_RET_RESTORE_R10: {
-    MCInst retInst;
-
+  case X86::MORESTACK_RET_RESTORE_R10:
     OutMI.setOpcode(X86::MOV64rr);
     OutMI.addOperand(MCOperand::CreateReg(X86::R10));
     OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
 
-    retInst.setOpcode(X86::RET);
-    AsmPrinter.OutStreamer.EmitInstruction(retInst);
+    AsmPrinter.OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
     break;
   }
-  }
 }
 
 static void LowerTlsAddr(MCStreamer &OutStreamer,
@@ -574,11 +623,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
 
   MCContext &context = OutStreamer.getContext();
 
-  if (needsPadding) {
-    MCInst prefix;
-    prefix.setOpcode(X86::DATA16_PREFIX);
-    OutStreamer.EmitInstruction(prefix);
-  }
+  if (needsPadding)
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
 
   MCSymbolRefExpr::VariantKind SRVK;
   switch (MI.getOpcode()) {
@@ -628,20 +674,11 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
   OutStreamer.EmitInstruction(LEA);
 
   if (needsPadding) {
-    MCInst prefix;
-    prefix.setOpcode(X86::DATA16_PREFIX);
-    OutStreamer.EmitInstruction(prefix);
-    prefix.setOpcode(X86::DATA16_PREFIX);
-    OutStreamer.EmitInstruction(prefix);
-    prefix.setOpcode(X86::REX64_PREFIX);
-    OutStreamer.EmitInstruction(prefix);
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX));
   }
 
-  MCInst call;
-  if (is64Bits)
-    call.setOpcode(X86::CALL64pcrel32);
-  else
-    call.setOpcode(X86::CALLpcrel32);
   StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
   MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
   const MCSymbolRefExpr *tlsRef =
@@ -649,8 +686,9 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
                             MCSymbolRefExpr::VK_PLT,
                             context);
 
-  call.addOperand(MCOperand::CreateExpr(tlsRef));
-  OutStreamer.EmitInstruction(call);
+  OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
+                                                     : X86::CALLpcrel32)
+    .addExpr(tlsRef));
 }
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -694,7 +732,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
 
   case X86::MOVPC32r: {
-    MCInst TmpInst;
     // This is a pseudo op for a two instruction sequence with a label, which
     // looks like:
     //     call "L1$pb"
@@ -703,20 +740,17 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     // Emit the call.
     MCSymbol *PICBase = MF->getPICBaseSymbol();
-    TmpInst.setOpcode(X86::CALLpcrel32);
     // FIXME: We would like an efficient form for this, so we don't have to do a
     // lot of extra uniquing.
-    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
-                                                                 OutContext)));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::CALLpcrel32)
+      .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
 
     // Emit the label.
     OutStreamer.EmitLabel(PICBase);
 
     // popl $reg
-    TmpInst.setOpcode(X86::POP32r);
-    TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::POP32r)
+      .addReg(MI->getOperand(0).getReg()));
     return;
   }
 
@@ -746,12 +780,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
                                       DotExpr, OutContext);
 
-    MCInst TmpInst;
-    TmpInst.setOpcode(X86::ADD32ri);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
-    TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
-    OutStreamer.EmitInstruction(TmpInst);
+    OutStreamer.EmitInstruction(MCInstBuilder(X86::ADD32ri)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg())
+      .addExpr(DotExpr));
     return;
   }
   }
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
new file mode 100644
index 000000000000..83e75ea994ca
--- /dev/null
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -0,0 +1,212 @@
+//===-------- X86PadShortFunction.cpp - pad short functions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will pad short functions to prevent
+// a stall if a function returns before the return address is ready. This
+// is needed for some Intel Atom processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+
+#define DEBUG_TYPE "x86-pad-short-functions"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumBBsPadded, "Number of basic blocks padded");
+
+namespace {
+  struct VisitedBBInfo {
+    // HasReturn - Whether the BB contains a return instruction
+    bool HasReturn;
+
+    // Cycles - Number of cycles until return if HasReturn is true, otherwise
+    // number of cycles until end of the BB
+    unsigned int Cycles;
+
+    VisitedBBInfo() : HasReturn(false), Cycles(0) {}
+    VisitedBBInfo(bool HasReturn, unsigned int Cycles)
+      : HasReturn(HasReturn), Cycles(Cycles) {}
+  };
+
+  struct PadShortFunc : public MachineFunctionPass {
+    static char ID;
+    PadShortFunc() : MachineFunctionPass(ID)
+                   , Threshold(4), TM(0), TII(0) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "X86 Atom pad short functions";
+    }
+
+  private:
+    void findReturns(MachineBasicBlock *MBB,
+                     unsigned int Cycles = 0);
+
+    bool cyclesUntilReturn(MachineBasicBlock *MBB,
+                           unsigned int &Cycles);
+
+    void addPadding(MachineBasicBlock *MBB,
+                    MachineBasicBlock::iterator &MBBI,
+                    unsigned int NOOPsToAdd);
+
+    const unsigned int Threshold;
+
+    // ReturnBBs - Maps basic blocks that return to the minimum number of
+    // cycles until the return, starting from the entry block.
+    DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
+
+    // VisitedBBs - Cache of previously visited BBs.
+    DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
+
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+  };
+
+  char PadShortFunc::ID = 0;
+}
+
+FunctionPass *llvm::createX86PadShortFunctions() {
+  return new PadShortFunc();
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, inserting
+/// NOOP instructions before early exits.
+bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
+  const AttributeSet &FnAttrs = MF.getFunction()->getAttributes();
+  if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                           Attribute::OptimizeForSize) ||
+      FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                           Attribute::MinSize)) {
+    return false;
+  }
+
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+
+  // Search through basic blocks and mark the ones that have early returns
+  ReturnBBs.clear();
+  VisitedBBs.clear();
+  findReturns(MF.begin());
+
+  bool MadeChange = false;
+
+  MachineBasicBlock *MBB;
+  unsigned int Cycles = 0;
+
+  // Pad the identified basic blocks with NOOPs
+  for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
+       I != ReturnBBs.end(); ++I) {
+    MBB = I->first;
+    Cycles = I->second;
+
+    if (Cycles < Threshold) {
+      // BB ends in a return. Skip over any DBG_VALUE instructions
+      // trailing the terminator.
+      assert(MBB->size() > 0 &&
+             "Basic block should contain at least a RET but is empty");
+      MachineBasicBlock::iterator ReturnLoc = --MBB->end();
+
+      while (ReturnLoc->isDebugValue())
+        --ReturnLoc;
+      assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
+             "Basic block does not end with RET");
+
+      addPadding(MBB, ReturnLoc, Threshold - Cycles);
+      NumBBsPadded++;
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+/// findReturn - Starting at MBB, follow control flow and add all
+/// basic blocks that contain a return to ReturnBBs.
+void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
+  // If this BB has a return, note how many cycles it takes to get there.
+  bool hasReturn = cyclesUntilReturn(MBB, Cycles);
+  if (Cycles >= Threshold)
+    return;
+
+  if (hasReturn) {
+    ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
+    return;
+  }
+
+  // Follow branches in BB and look for returns
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
+       I != MBB->succ_end(); ++I) {
+    if (*I == MBB)
+      continue;
+    findReturns(*I, Cycles);
+  }
+}
+
+/// cyclesUntilReturn - return true if the MBB has a return instruction,
+/// and return false otherwise.
+/// Cycles will be incremented by the number of cycles taken to reach the
+/// return or the end of the BB, whichever occurs first.
+bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
+                                     unsigned int &Cycles) {
+  // Return cached result if BB was previously visited
+  DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
+    = VisitedBBs.find(MBB);
+  if (it != VisitedBBs.end()) {
+    VisitedBBInfo BBInfo = it->second;
+    Cycles += BBInfo.Cycles;
+    return BBInfo.HasReturn;
+  }
+
+  unsigned int CyclesToEnd = 0;
+
+  for (MachineBasicBlock::iterator MBBI = MBB->begin();
+        MBBI != MBB->end(); ++MBBI) {
+    MachineInstr *MI = MBBI;
+    // Mark basic blocks with a return instruction. Calls to other
+    // functions do not count because the called function will be padded,
+    // if necessary.
+    if (MI->isReturn() && !MI->isCall()) {
+      VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
+      Cycles += CyclesToEnd;
+      return true;
+    }
+
+    CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+  }
+
+  VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
+  Cycles += CyclesToEnd;
+  return false;
+}
+
+/// addPadding - Add the given number of NOOP instructions to the function
+/// just prior to the return at MBBI
+void PadShortFunc::addPadding(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned int NOOPsToAdd) {
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  while (NOOPsToAdd-- > 0) {
+    BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+    BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP));
+  }
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 73ac7477427f..16886e432d19 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -19,25 +19,25 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CommandLine.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "X86GenRegisterInfo.inc"
@@ -50,16 +50,18 @@ ForceStackAlign("force-align-stack",
                            " needed for the function."),
                  cl::init(false), cl::Hidden);
 
-cl::opt<bool>
+static cl::opt<bool>
 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
           cl::desc("Enable use of a base pointer for complex stack frames"));
 
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
-  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
-                         ? X86::RIP : X86::EIP,
+  : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+                         ? X86::RIP : X86::EIP),
                        X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
-                       X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)),
+                       X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true),
+                       (tm.getSubtarget<X86Subtarget>().is64Bit()
+                         ? X86::RIP : X86::EIP)),
                        TM(tm), TII(tii) {
   X86_MC::InitLLVM2SEHRegisterMapping(this);
 
@@ -175,21 +177,27 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
 const TargetRegisterClass *
 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
                                                                          const {
+  const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
   switch (Kind) {
   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
   case 0: // Normal GPRs.
-    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+    if (Subtarget.isTarget64BitLP64())
       return &X86::GR64RegClass;
     return &X86::GR32RegClass;
   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
-    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+    if (Subtarget.isTarget64BitLP64())
       return &X86::GR64_NOSPRegClass;
     return &X86::GR32_NOSPRegClass;
   case 2: // Available for tailcall (not callee-saved GPRs).
-    if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+    if (Subtarget.isTargetWin64())
       return &X86::GR64_TCW64RegClass;
-    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+    else if (Subtarget.is64Bit())
       return &X86::GR64_TCRegClass;
+
+    const Function *F = MF.getFunction();
+    bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+    if (hasHipeCC)
+      return &X86::GR32RegClass;
     return &X86::GR32_TCRegClass;
   }
 }
@@ -227,36 +235,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 
 const uint16_t *
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  bool callsEHReturn = false;
-  bool ghcCall = false;
-  bool oclBiCall = false;
-  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
-
-  if (MF) {
-    callsEHReturn = MF->getMMI().callsEHReturn();
-    const Function *F = MF->getFunction();
-    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
-    oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
-  }
-
-  if (ghcCall)
+  switch (MF->getFunction()->getCallingConv()) {
+  case CallingConv::GHC:
+  case CallingConv::HiPE:
     return CSR_NoRegs_SaveList;
-  if (oclBiCall) {
+
+  case CallingConv::Intel_OCL_BI: {
+    bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
     if (HasAVX && IsWin64)
-        return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+      return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
     if (HasAVX && Is64Bit)
-        return CSR_64_Intel_OCL_BI_AVX_SaveList;
+      return CSR_64_Intel_OCL_BI_AVX_SaveList;
     if (!HasAVX && !IsWin64 && Is64Bit)
-        return CSR_64_Intel_OCL_BI_SaveList;
+      return CSR_64_Intel_OCL_BI_SaveList;
+    break;
+  }
+
+  case CallingConv::Cold:
+    if (Is64Bit)
+      return CSR_MostRegs_64_SaveList;
+    break;
+
+  default:
+    break;
   }
+
+  bool CallsEHReturn = MF->getMMI().callsEHReturn();
   if (Is64Bit) {
     if (IsWin64)
       return CSR_Win64_SaveList;
-    if (callsEHReturn)
+    if (CallsEHReturn)
       return CSR_64EHRet_SaveList;
     return CSR_64_SaveList;
   }
-  if (callsEHReturn)
+  if (CallsEHReturn)
     return CSR_32EHRet_SaveList;
   return CSR_32_SaveList;
 }
@@ -273,10 +285,12 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
     if (!HasAVX && !IsWin64 && Is64Bit)
       return CSR_64_Intel_OCL_BI_RegMask;
   }
-  if (CC == CallingConv::GHC)
+  if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
     return CSR_NoRegs_RegMask;
   if (!Is64Bit)
     return CSR_32_RegMask;
+  if (CC == CallingConv::Cold)
+    return CSR_MostRegs_64_RegMask;
   if (IsWin64)
     return CSR_Win64_RegMask;
   return CSR_64_RegMask;
@@ -380,7 +394,13 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
 
    // When we need stack realignment and there are dynamic allocas, we can't
    // reference off of the stack pointer, so we reserve a base pointer.
-   if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+   //
+   // This is also true if the function contain MS-style inline assembly.  We
+   // do this because if any stack changes occur in the inline assembly, e.g.,
+   // "pusha", then any C local variable or C argument references in the
+   // inline assembly will be wrong because the SP is not properly tracked.
+   if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) ||
+       MF.hasMSInlineAsm())
      return true;
 
    return false;
@@ -410,7 +430,8 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   bool requiresRealignment =
     ((MFI->getMaxAlignment() > StackAlign) ||
-     F->getFnAttributes().hasAttribute(Attributes::StackAlignment));
+     F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                     Attribute::StackAlignment));
 
   // If we've requested that we force align the stack do so now.
   if (ForceStackAlign)
@@ -430,123 +451,16 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   return false;
 }
 
-static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
-  if (is64Bit) {
-    if (isInt<8>(Imm))
-      return X86::SUB64ri8;
-    return X86::SUB64ri32;
-  } else {
-    if (isInt<8>(Imm))
-      return X86::SUB32ri8;
-    return X86::SUB32ri;
-  }
-}
-
-static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
-  if (is64Bit) {
-    if (isInt<8>(Imm))
-      return X86::ADD64ri8;
-    return X86::ADD64ri32;
-  } else {
-    if (isInt<8>(Imm))
-      return X86::ADD32ri8;
-    return X86::ADD32ri;
-  }
-}
-
-void X86RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
-  int Opcode = I->getOpcode();
-  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
-  DebugLoc DL = I->getDebugLoc();
-  uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
-  uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
-  I = MBB.erase(I);
-
-  if (!reseveCallFrame) {
-    // If the stack pointer can be changed after prologue, turn the
-    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
-    // adjcallstackdown instruction into 'add ESP, <amt>'
-    // TODO: consider using push / pop instead of sub + store / add
-    if (Amount == 0)
-      return;
-
-    // We need to keep the stack aligned properly.  To do this, we round the
-    // amount of space needed for the outgoing arguments up to the next
-    // alignment boundary.
-    unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
-    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
-    MachineInstr *New = 0;
-    if (Opcode == TII.getCallFrameSetupOpcode()) {
-      New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
-                    StackPtr)
-        .addReg(StackPtr)
-        .addImm(Amount);
-    } else {
-      assert(Opcode == TII.getCallFrameDestroyOpcode());
-
-      // Factor out the amount the callee already popped.
-      Amount -= CalleeAmt;
-
-      if (Amount) {
-        unsigned Opc = getADDriOpcode(Is64Bit, Amount);
-        New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
-          .addReg(StackPtr).addImm(Amount);
-      }
-    }
-
-    if (New) {
-      // The EFLAGS implicit def is dead.
-      New->getOperand(3).setIsDead();
-
-      // Replace the pseudo instruction with a new instruction.
-      MBB.insert(I, New);
-    }
-
-    return;
-  }
-
-  if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
-    // If we are performing frame pointer elimination and if the callee pops
-    // something off the stack pointer, add it back.  We do this until we have
-    // more advanced stack pointer tracking ability.
-    unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
-    MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
-      .addReg(StackPtr).addImm(CalleeAmt);
-
-    // The EFLAGS implicit def is dead.
-    New->getOperand(3).setIsDead();
-
-    // We are not tracking the stack pointer adjustment by the callee, so make
-    // sure we restore the stack pointer immediately after the call, there may
-    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
-    MachineBasicBlock::iterator B = MBB.begin();
-    while (I != B && !llvm::prior(I)->isCall())
-      --I;
-    MBB.insert(I, New);
-  }
-}
-
 void
 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                     int SPAdj, RegScavenger *RS) const {
+                                     int SPAdj, unsigned FIOperandNum,
+                                     RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
-  unsigned i = 0;
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   unsigned BasePtr;
 
   unsigned Opc = MI.getOpcode();
@@ -562,7 +476,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // This must be part of a four operand memory reference.  Replace the
   // FrameIndex with base register with EBP.  Add an offset to the offset.
-  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+  MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
 
   // Now add the frame object offset to the offset from EBP.
   int FIOffset;
@@ -573,17 +487,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   } else
     FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
 
-  if (MI.getOperand(i+3).isImm()) {
+  if (MI.getOperand(FIOperandNum+3).isImm()) {
     // Offset is a 32-bit integer.
-    int Imm = (int)(MI.getOperand(i + 3).getImm());
+    int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
     int Offset = FIOffset + Imm;
     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
            "Requesting 64-bit offset in 32-bit immediate!");
-    MI.getOperand(i + 3).ChangeToImmediate(Offset);
+    MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
-    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
-    MI.getOperand(i+3).setOffset(Offset);
+    uint64_t Offset = FIOffset +
+      (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
+    MI.getOperand(FIOperandNum + 3).setOffset(Offset);
   }
 }
 
@@ -608,7 +523,15 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
   case MVT::i8:
     if (High) {
       switch (Reg) {
-      default: return getX86SubSuperRegister(Reg, MVT::i64, High);
+      default: return getX86SubSuperRegister(Reg, MVT::i64);
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SI;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DI;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BP;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SP;
       case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
         return X86::AH;
       case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -728,22 +651,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
       return X86::R15D;
     }
   case MVT::i64:
-    // For 64-bit mode if we've requested a "high" register and the
-    // Q or r constraints we want one of these high registers or
-    // just the register name otherwise.
-    if (High) {
-      switch (Reg) {
-      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
-        return X86::SI;
-      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
-        return X86::DI;
-      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
-        return X86::BP;
-      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
-        return X86::SP;
-      // Fallthrough.
-      }
-    }
     switch (Reg) {
     default: llvm_unreachable("Unexpected register");
     case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
@@ -782,46 +689,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
   }
 }
 }
-
-namespace {
-  struct MSAH : public MachineFunctionPass {
-    static char ID;
-    MSAH() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF) {
-      const X86TargetMachine *TM =
-        static_cast<const X86TargetMachine *>(&MF.getTarget());
-      const TargetFrameLowering *TFI = TM->getFrameLowering();
-      MachineRegisterInfo &RI = MF.getRegInfo();
-      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-      unsigned StackAlignment = TFI->getStackAlignment();
-
-      // Be over-conservative: scan over all vreg defs and find whether vector
-      // registers are used. If yes, there is a possibility that vector register
-      // will be spilled and thus require dynamic stack realignment.
-      for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
-        unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-        if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
-          FuncInfo->setForceFramePointer(true);
-          return true;
-        }
-      }
-      // Nothing to do
-      return false;
-    }
-
-    virtual const char *getPassName() const {
-      return "X86 Maximal Stack Alignment Check";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-
-  char MSAH::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 7932ede8dd65..b9d7b8cf8b9a 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -117,12 +117,9 @@ public:
   bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
                             int &FrameIdx) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MI) const;
-
   void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
new file mode 100644
index 000000000000..7de6791f2e48
--- /dev/null
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -0,0 +1,126 @@
+//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Haswell to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def HaswellModel : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and HW can decode 4
+  // instructions per cycle.
+  let IssueWidth = 4;
+  let MinLatency = 0; // 0 = Out-of-order execution.
+  let LoadLatency = 4;
+  let ILPWindow = 40;
+  let MispredictPenalty = 16;
+}
+
+let SchedModel = HaswellModel in {
+
+// Haswell can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, 6 and 7 handle all computation.
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores. Port 7 can handle address calculations.
+def HWPort0 : ProcResource<1>;
+def HWPort1 : ProcResource<1>;
+def HWPort2 : ProcResource<1>;
+def HWPort3 : ProcResource<1>;
+def HWPort4 : ProcResource<1>;
+def HWPort5 : ProcResource<1>;
+def HWPort6 : ProcResource<1>;
+def HWPort7 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def HWPort23  : ProcResGroup<[HWPort2, HWPort3]>;
+def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
+def HWPort05  : ProcResGroup<[HWPort0, HWPort5]>;
+def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>;
+def HWPort15  : ProcResGroup<[HWPort1, HWPort5]>;
+def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
+def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
+
+// Integer division issued on port 0.
+def HWDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
+                          ProcResourceKind ExePort,
+                          int Lat> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+  // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> {
+     let Latency = !add(Lat, 4);
+  }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [HWPort4]>;
+
+def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
+def : WriteRes<WriteLoad,  [HWPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove,  [HWPort0156]>;
+def : WriteRes<WriteZero,  []>;
+
+defm : HWWriteResPair<WriteALU,   HWPort0156, 1>;
+defm : HWWriteResPair<WriteIMul,  HWPort1,   3>;
+defm : HWWriteResPair<WriteShift, HWPort056,  1>;
+defm : HWWriteResPair<WriteJump,  HWPort5,   1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [HWPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> {
+  let Latency = 25;
+  let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> {
+  let Latency = 29;
+  let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : HWWriteResPair<WriteFAdd,   HWPort1, 3>;
+defm : HWWriteResPair<WriteFMul,   HWPort0, 5>;
+defm : HWWriteResPair<WriteFDiv,   HWPort0, 12>; // 10-14 cycles.
+defm : HWWriteResPair<WriteFRcp,   HWPort0, 5>;
+defm : HWWriteResPair<WriteFSqrt,  HWPort0, 15>;
+defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
+defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
+defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
+
+// Vector integer operations.
+defm : HWWriteResPair<WriteVecShift, HWPort05,  1>;
+defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
+defm : HWWriteResPair<WriteVecALU,   HWPort15,  1>;
+defm : HWWriteResPair<WriteVecIMul,  HWPort0,   5>;
+defm : HWWriteResPair<WriteShuffle,  HWPort15,  1>;
+
+def : WriteRes<WriteSystem,     [HWPort0156]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
new file mode 100644
index 000000000000..74d5f1b6eba8
--- /dev/null
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -0,0 +1,122 @@
+//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Sandy Bridge to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SandyBridgeModel : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and SB can decode 4
+  // instructions per cycle.
+  // FIXME: Identify instructions that aren't a single fused micro-op.
+  let IssueWidth = 4;
+  let MinLatency = 0; // 0 = Out-of-order execution.
+  let LoadLatency = 4;
+  let ILPWindow = 30;
+  let MispredictPenalty = 16;
+}
+
+let SchedModel = SandyBridgeModel in {
+
+// Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
+
+// Ports 0, 1, and 5 handle all computation.
+def SBPort0 : ProcResource<1>;
+def SBPort1 : ProcResource<1>;
+def SBPort5 : ProcResource<1>;
+
+// Ports 2 and 3 are identical. They handle loads and the address half of
+// stores.
+def SBPort23 : ProcResource<2>;
+
+// Port 4 gets the data half of stores. Store data can be available later than
+// the store address, but since we don't model the latency of stores, we can
+// ignore that.
+def SBPort4 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SBPort05  : ProcResGroup<[SBPort0, SBPort5]>;
+def SBPort15  : ProcResGroup<[SBPort1, SBPort5]>;
+def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
+
+// Integer division issued on port 0.
+def SBDivider : ProcResource<1>;
+
+// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 4>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
+                          ProcResourceKind ExePort,
+                          int Lat> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+  // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> {
+     let Latency = !add(Lat, 4);
+  }
+}
+
+// A folded store needs a cycle on port 4 for the store data, but it does not
+// need an extra port 2/3 cycle to recompute the address.
+def : WriteRes<WriteRMW, [SBPort4]>;
+
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad,  [SBPort23]> { let Latency = 4; }
+def : WriteRes<WriteMove,  [SBPort015]>;
+def : WriteRes<WriteZero,  []>;
+
+defm : SBWriteResPair<WriteALU,   SBPort015, 1>;
+defm : SBWriteResPair<WriteIMul,  SBPort1,   3>;
+defm : SBWriteResPair<WriteShift, SBPort05,  1>;
+defm : SBWriteResPair<WriteJump,  SBPort5,   1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SBPort15]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> {
+  let Latency = 25;
+  let ResourceCycles = [1, 10];
+}
+def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> {
+  let Latency = 29;
+  let ResourceCycles = [1, 1, 10];
+}
+
+// Scalar and vector floating point.
+defm : SBWriteResPair<WriteFAdd,   SBPort1, 3>;
+defm : SBWriteResPair<WriteFMul,   SBPort0, 5>;
+defm : SBWriteResPair<WriteFDiv,   SBPort0, 12>; // 10-14 cycles.
+defm : SBWriteResPair<WriteFRcp,   SBPort0, 5>;
+defm : SBWriteResPair<WriteFSqrt,  SBPort0, 15>;
+defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
+defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
+defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
+
+// Vector integer operations.
+defm : SBWriteResPair<WriteVecShift, SBPort05,  1>;
+defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
+defm : SBWriteResPair<WriteVecALU,   SBPort15,  1>;
+defm : SBWriteResPair<WriteVecIMul,  SBPort0,   5>;
+defm : SBWriteResPair<WriteShuffle,  SBPort15,  1>;
+
+def : WriteRes<WriteSystem,     [SBPort015]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
+} // SchedModel
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index c14407f9ac1b..9fbde88b7100 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -7,9 +7,94 @@
 //
 //===----------------------------------------------------------------------===//
 
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+  // The SchedWrite to use when a load is folded into the instruction.
+  SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+  // Register-Memory operation.
+  def Ld : SchedWrite;
+  // Register-Register operation.
+  def NAME : X86FoldableSchedWrite {
+    let Folded = !cast<SchedWrite>(NAME#"Ld");
+  }
+}
+
+// Arithmetic.
+defm WriteALU  : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def  WriteLEA  : SchedWrite;        // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad  : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove  : SchedWrite;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd  : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul  : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv  : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp  : X86SchedWritePair; // Floating point reciprocal.
+defm WriteFMA   : X86SchedWritePair; // Fused Multiply Add.
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Vector integer operations.
+defm WriteVecALU   : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle  : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for X86
-def IIC_DEFAULT     : InstrItinClass;
 def IIC_ALU_MEM     : InstrItinClass;
 def IIC_ALU_NONMEM  : InstrItinClass;
 def IIC_LEA         : InstrItinClass;
@@ -470,12 +555,19 @@ def IIC_NOP : InstrItinClass;
 // latencies. Since these latencies are not used for pipeline hazards,
 // they do not need to be exact.
 //
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
 // The GenericModel contains no instruciton itineraries.
 def GenericModel : SchedMachineModel {
   let IssueWidth = 4;
   let MinLatency = 0;
   let LoadLatency = 4;
   let HighLatency = 10;
+  let ILPWindow = 10;
 }
 
 include "X86ScheduleAtom.td"
+include "X86SchedSandyBridge.td"
+include "X86SchedHaswell.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 87102614cc8b..cce8f1b11436 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -33,7 +33,6 @@ def AtomItineraries : ProcessorItineraries<
   // InstrItinData<class, [InstrStage<N, [P0], 0>,  InstrStage<N, [P1]>] >,
   //
   // Default is 1 cycle, port0 or port1
-  InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >,
   InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
   InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
   InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
@@ -525,6 +524,7 @@ def AtomModel : SchedMachineModel {
                        // OperandCycles may be used for expected latency.
   let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
   let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+  let ILPWindow = 0; // Always try to hide expected latency.
 
   let Itineraries = AtomItineraries;
 }
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 723e50cc1886..f934fdd85914 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "x86-selectiondag-info"
 #include "X86TargetMachine.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DerivedTypes.h"
 using namespace llvm;
 
 X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
@@ -202,6 +202,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
       SrcPtrInfo.getAddrSpace() >= 256)
     return SDValue();
 
+  // ESI might be used as a base pointer, in that case we can't simply overwrite
+  // the register.  Fall back to generic code.
+  const X86RegisterInfo *TRI =
+      static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
+  if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
+      TRI->getBaseRegister() == X86::ESI)
+    return SDValue();
+
   MVT AVT;
   if (Align & 1)
     AVT = MVT::i8;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index d1ed68028771..14619b63927b 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -14,11 +14,13 @@
 #define DEBUG_TYPE "subtarget"
 #include "X86Subtarget.h"
 #include "X86InstrInfo.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 
@@ -35,8 +37,7 @@ using namespace llvm;
 /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
 /// current subtarget according to how we should reference it in a non-pcrel
 /// context.
-unsigned char X86Subtarget::
-ClassifyBlockAddressReference() const {
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
   if (isPICStyleGOT())    // 32-bit ELF targets.
     return X86II::MO_GOTOFF;
 
@@ -155,6 +156,12 @@ const char *X86Subtarget::getBZeroEntry() const {
   return 0;
 }
 
+bool X86Subtarget::hasSinCos() const {
+  return getTargetTriple().isMacOSX() &&
+    !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+    is64Bit();
+}
+
 /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
 /// to immediate address.
 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
@@ -234,12 +241,20 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
       ToggleFeature(X86::FeatureSlowBTMem);
     }
 
-    // If it's Nehalem, unaligned memory access is fast.
-    // Include Westmere and Sandy Bridge as well.
-    // FIXME: add later processors.
-    if (IsIntel && ((Family == 6 && Model == 26) ||
-        (Family == 6 && Model == 44) ||
-        (Family == 6 && Model == 42))) {
+    // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+    // memory access is fast. We hard code model numbers here because they
+    // aren't strictly increasing for Intel chips it seems.
+    if (IsIntel &&
+        ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+                                           //          Jasper Froest
+         (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
+         (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+         (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+         (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+         (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+         (Family == 6 && Model == 0x2A) || // SandyBridge
+         (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+         (Family == 6 && Model == 0x3A))) {// IvyBridge
       IsUAMemFast = true;
       ToggleFeature(X86::FeatureFastUAMem);
     }
@@ -267,6 +282,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
         HasLZCNT = true;
         ToggleFeature(X86::FeatureLZCNT);
       }
+      if (IsIntel && ((ECX >> 8) & 0x1)) {
+        HasPRFCHW = true;
+        ToggleFeature(X86::FeaturePRFCHW);
+      }
       if (IsAMD) {
         if ((ECX >> 6) & 0x1) {
           HasSSE4A = true;
@@ -294,6 +313,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
         HasBMI = true;
         ToggleFeature(X86::FeatureBMI);
       }
+      if ((EBX >> 4) & 0x1) {
+        HasHLE = true;
+        ToggleFeature(X86::FeatureHLE);
+      }
       if (IsIntel && ((EBX >> 5) & 0x1)) {
         X86SSELevel = AVX2;
         ToggleFeature(X86::FeatureAVX2);
@@ -306,48 +329,35 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
         HasRTM = true;
         ToggleFeature(X86::FeatureRTM);
       }
+      if (IsIntel && ((EBX >> 19) & 0x1)) {
+        HasADX = true;
+        ToggleFeature(X86::FeatureADX);
+      }
+      if (IsIntel && ((EBX >> 18) & 0x1)) {
+        HasRDSEED = true;
+        ToggleFeature(X86::FeatureRDSEED);
+      }
     }
   }
 }
 
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS,
-                           unsigned StackAlignOverride, bool is64Bit)
-  : X86GenSubtargetInfo(TT, CPU, FS)
-  , X86ProcFamily(Others)
-  , PICStyle(PICStyles::None)
-  , X86SSELevel(NoMMXSSE)
-  , X863DNowLevel(NoThreeDNow)
-  , HasCMov(false)
-  , HasX86_64(false)
-  , HasPOPCNT(false)
-  , HasSSE4A(false)
-  , HasAES(false)
-  , HasPCLMUL(false)
-  , HasFMA(false)
-  , HasFMA4(false)
-  , HasXOP(false)
-  , HasMOVBE(false)
-  , HasRDRAND(false)
-  , HasF16C(false)
-  , HasFSGSBase(false)
-  , HasLZCNT(false)
-  , HasBMI(false)
-  , HasBMI2(false)
-  , HasRTM(false)
-  , IsBTMemSlow(false)
-  , IsUAMemFast(false)
-  , HasVectorUAMem(false)
-  , HasCmpxchg16b(false)
-  , UseLeaForSP(false)
-  , HasSlowDivide(false)
-  , PostRAScheduler(false)
-  , stackAlignment(4)
-  // FIXME: this is a known good value for Yonah. How about others?
-  , MaxInlineSizeThreshold(128)
-  , TargetTriple(TT)
-  , In64BitMode(is64Bit) {
-  // Determine default and user specified characteristics
+void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+  Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+                                           "target-cpu");
+  Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+                                          "target-features");
+  std::string CPU =
+    !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+  std::string FS =
+    !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+  if (!FS.empty()) {
+    initializeEnvironment();
+    resetSubtargetFeatures(CPU, FS);
+  }
+}
+
+void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
   std::string CPUName = CPU;
   if (!FS.empty() || !CPU.empty()) {
     if (CPUName.empty()) {
@@ -424,6 +434,57 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
     stackAlignment = 16;
 }
 
+void X86Subtarget::initializeEnvironment() {
+  X86SSELevel = NoMMXSSE;
+  X863DNowLevel = NoThreeDNow;
+  HasCMov = false;
+  HasX86_64 = false;
+  HasPOPCNT = false;
+  HasSSE4A = false;
+  HasAES = false;
+  HasPCLMUL = false;
+  HasFMA = false;
+  HasFMA4 = false;
+  HasXOP = false;
+  HasMOVBE = false;
+  HasRDRAND = false;
+  HasF16C = false;
+  HasFSGSBase = false;
+  HasLZCNT = false;
+  HasBMI = false;
+  HasBMI2 = false;
+  HasRTM = false;
+  HasHLE = false;
+  HasADX = false;
+  HasPRFCHW = false;
+  HasRDSEED = false;
+  IsBTMemSlow = false;
+  IsUAMemFast = false;
+  HasVectorUAMem = false;
+  HasCmpxchg16b = false;
+  UseLeaForSP = false;
+  HasSlowDivide = false;
+  PostRAScheduler = false;
+  PadShortFunctions = false;
+  CallRegIndirect = false;
+  stackAlignment = 4;
+  // FIXME: this is a known good value for Yonah. How about others?
+  MaxInlineSizeThreshold = 128;
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS,
+                           unsigned StackAlignOverride, bool is64Bit)
+  : X86GenSubtargetInfo(TT, CPU, FS)
+  , X86ProcFamily(Others)
+  , PICStyle(PICStyles::None)
+  , TargetTriple(TT)
+  , StackAlignOverride(StackAlignOverride)
+  , In64BitMode(is64Bit) {
+  initializeEnvironment();
+  resetSubtargetFeatures(CPU, FS);
+}
+
 bool X86Subtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
            TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8bf4cc77f762..6fbdb1d5f00f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,8 +14,8 @@
 #ifndef X86SUBTARGET_H
 #define X86SUBTARGET_H
 
-#include "llvm/CallingConv.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
@@ -121,6 +121,18 @@ protected:
   /// HasRTM - Processor has RTM instructions.
   bool HasRTM;
 
+  /// HasHLE - Processor has HLE.
+  bool HasHLE;
+
+  /// HasADX - Processor has ADX instructions.
+  bool HasADX;
+
+  /// HasPRFCHW - Processor has PRFCHW instructions.
+  bool HasPRFCHW;
+
+  /// HasRDSEED - Processor has RDSEED instructions.
+  bool HasRDSEED;
+
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
@@ -146,6 +158,14 @@ protected:
   /// PostRAScheduler - True if using post-register-allocation scheduler.
   bool PostRAScheduler;
 
+  /// PadShortFunctions - True if the short functions should be padded to prevent
+  /// a stall when returning too early.
+  bool PadShortFunctions;
+
+  /// CallRegIndirect - True if the Calls with memory reference should be converted
+  /// to a register-based indirect call.
+  bool CallRegIndirect;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -161,11 +181,13 @@ protected:
   InstrItineraryData InstrItins;
 
 private:
+  /// StackAlignOverride - Override the stack alignment.
+  unsigned StackAlignOverride;
+
   /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
   bool In64BitMode;
 
 public:
-
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
@@ -190,7 +212,26 @@ public:
   /// instruction.
   void AutoDetectSubtargetFeatures();
 
-  bool is64Bit() const { return In64BitMode; }
+  /// \brief Reset the features for the X86 target.
+  virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+  void initializeEnvironment();
+  void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
+  /// Is this x86_64? (disregarding specific ABI / programming model)
+  bool is64Bit() const {
+    return In64BitMode;
+  }
+
+  /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
+  bool isTarget64BitILP32() const {
+    return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32);
+  }
+
+  /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
+  bool isTarget64BitLP64() const {
+    return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
+  }
 
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
@@ -205,6 +246,8 @@ public:
   bool hasSSE42() const { return X86SSELevel >= SSE42; }
   bool hasAVX() const { return X86SSELevel >= AVX; }
   bool hasAVX2() const { return X86SSELevel >= AVX2; }
+  bool hasFp256() const { return hasAVX(); }
+  bool hasInt256() const { return hasAVX2(); }
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
@@ -223,12 +266,18 @@ public:
   bool hasBMI() const { return HasBMI; }
   bool hasBMI2() const { return HasBMI2; }
   bool hasRTM() const { return HasRTM; }
+  bool hasHLE() const { return HasHLE; }
+  bool hasADX() const { return HasADX; }
+  bool hasPRFCHW() const { return HasPRFCHW; }
+  bool hasRDSEED() const { return HasRDSEED; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasSlowDivide() const { return HasSlowDivide; }
+  bool padShortFunctions() const { return PadShortFunctions; }
+  bool callRegIndirect() const { return CallRegIndirect; }
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
 
@@ -247,7 +296,7 @@ public:
   }
   bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
   bool isTargetNaCl() const {
-    return TargetTriple.getOS() == Triple::NativeClient;
+    return TargetTriple.getOS() == Triple::NaCl;
   }
   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
@@ -308,6 +357,10 @@ public:
   /// memset with zero passed as the second argument. Otherwise it
   /// returns null.
   const char *getBZeroEntry() const;
+  
+  /// This function returns true if the target has sincos() routine in its
+  /// compiler runtime or math libraries.
+  bool hasSinCos() const;
 
   /// enablePostRAScheduler - run for Atom optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 158f9dc06693..8aa58a204260 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -13,13 +13,13 @@
 
 #include "X86TargetMachine.h"
 #include "X86.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 extern "C" void LLVMInitializeX86Target() {
@@ -46,10 +46,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
                "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
                "n8:16:32-S128"),
     InstrInfo(*this),
-    TSInfo(*this),
     TLInfo(*this),
-    JITInfo(*this),
-    STTI(&TLInfo), VTTI(&TLInfo) {
+    TSInfo(*this),
+    JITInfo(*this) {
 }
 
 void X86_64TargetMachine::anchor() { }
@@ -60,13 +59,16 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
   : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
-    DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
-               "n8:16:32:64-S128"),
+    // The x32 ABI dictates the ILP32 programming model for x64.
+    DL(getSubtargetImpl()->isTarget64BitILP32() ?
+        "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+        "n8:16:32:64-S128" :
+        "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+        "n8:16:32:64-S128"),
     InstrInfo(*this),
-    TSInfo(*this),
     TLInfo(*this),
-    JITInfo(*this),
-    STTI(&TLInfo), VTTI(&TLInfo){
+    TSInfo(*this),
+    JITInfo(*this) {
 }
 
 /// X86TargetMachine ctor - Create an X86 target.
@@ -121,6 +123,19 @@ X86EarlyIfConv("x86-early-ifcvt",
 	       cl::desc("Enable early if-conversion on X86"));
 
 //===----------------------------------------------------------------------===//
+// X86 Analysis Pass Setup
+//===----------------------------------------------------------------------===//
+
+void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  // Add first the target-independent BasicTTI pass, then our X86 pass. This
+  // allows the X86 pass to delegate to the target independent layer when
+  // appropriate.
+  PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+  PM.add(createX86TargetTransformInfoPass(this));
+}
+
+
+//===----------------------------------------------------------------------===//
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
 
@@ -140,6 +155,7 @@ public:
   }
 
   virtual bool addInstSelector();
+  virtual bool addILPOpts();
   virtual bool addPreRegAlloc();
   virtual bool addPostRegAlloc();
   virtual bool addPreEmitPass();
@@ -147,12 +163,7 @@ public:
 } // namespace
 
 TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
-  X86PassConfig *PC = new X86PassConfig(this, PM);
-
-  if (X86EarlyIfConv && Subtarget.hasCMov())
-    PC->enablePass(&EarlyIfConverterID);
-
-  return PC;
+  return new X86PassConfig(this, PM);
 }
 
 bool X86PassConfig::addInstSelector() {
@@ -170,8 +181,15 @@ bool X86PassConfig::addInstSelector() {
   return false;
 }
 
+bool X86PassConfig::addILPOpts() {
+  if (X86EarlyIfConv && getX86Subtarget().hasCMov()) {
+    addPass(&EarlyIfConverterID);
+    return true;
+  }
+  return false;
+}
+
 bool X86PassConfig::addPreRegAlloc() {
-  addPass(createX86MaxStackAlignmentHeuristicPass());
   return false;  // -print-machineinstr shouldn't print after this.
 }
 
@@ -192,6 +210,12 @@ bool X86PassConfig::addPreEmitPass() {
     ShouldPrint = true;
   }
 
+  if (getOptLevel() != CodeGenOpt::None &&
+      getX86Subtarget().padShortFunctions()) {
+    addPass(createX86PadShortFunctions());
+    ShouldPrint = true;
+  }
+
   return ShouldPrint;
 }
 
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 12311a1abfbd..174d3918318d 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -15,16 +15,15 @@
 #define X86TARGETMACHINE_H
 
 #include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86ISelLowering.h"
 #include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
 #include "X86JITInfo.h"
 #include "X86SelectionDAGInfo.h"
 #include "X86Subtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
@@ -65,6 +64,9 @@ public:
     return &InstrItins;
   }
 
+  /// \brief Register X86 analysis passes with a pass manager.
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+
   // Set up the pass pipeline.
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
@@ -78,11 +80,9 @@ class X86_32TargetMachine : public X86TargetMachine {
   virtual void anchor();
   const DataLayout  DL; // Calculates type size & alignment
   X86InstrInfo      InstrInfo;
-  X86SelectionDAGInfo TSInfo;
   X86TargetLowering TLInfo;
+  X86SelectionDAGInfo TSInfo;
   X86JITInfo        JITInfo;
-  ScalarTargetTransformImpl STTI;
-  X86VectorTargetTransformInfo VTTI;
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -101,12 +101,6 @@ public:
   virtual       X86JITInfo       *getJITInfo()         {
     return &JITInfo;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
 };
 
 /// X86_64TargetMachine - X86 64-bit target machine.
@@ -115,11 +109,9 @@ class X86_64TargetMachine : public X86TargetMachine {
   virtual void anchor();
   const DataLayout  DL; // Calculates type size & alignment
   X86InstrInfo      InstrInfo;
-  X86SelectionDAGInfo TSInfo;
   X86TargetLowering TLInfo;
+  X86SelectionDAGInfo TSInfo;
   X86JITInfo        JITInfo;
-  ScalarTargetTransformImpl STTI;
-  X86VectorTargetTransformInfo VTTI;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -138,12 +130,6 @@ public:
   virtual       X86JITInfo       *getJITInfo()         {
     return &JITInfo;
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 92aee0dd3fcf..871dacd6a1c1 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -8,23 +8,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86TargetObjectFile.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
+
 using namespace llvm;
 using namespace dwarf;
 
 const MCExpr *X86_64MachoTargetObjectFile::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                               MachineModuleInfo *MMI, unsigned Encoding,
-                               MCStreamer &Streamer) const {
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                        MachineModuleInfo *MMI, unsigned Encoding,
+                        MCStreamer &Streamer) const {
 
   // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
   // is an indirect pc-relative reference.
@@ -37,7 +33,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
   }
 
   return TargetLoweringObjectFileMachO::
-    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 MCSymbol *X86_64MachoTargetObjectFile::
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 2d320c594cb9..9d26d389d4de 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -11,8 +11,8 @@
 #define LLVM_TARGET_X86_TARGETOBJECTFILE_H
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
@@ -21,9 +21,9 @@ namespace llvm {
   class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
   public:
     virtual const MCExpr *
-    getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                   MachineModuleInfo *MMI, unsigned Encoding,
-                                   MCStreamer &Streamer) const;
+    getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                            MachineModuleInfo *MMI, unsigned Encoding,
+                            MCStreamer &Streamer) const;
 
     // getCFIPersonalitySymbol - The symbol that gets passed to
     // .cfi_personality.
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
new file mode 100644
index 000000000000..a98c6991192c
--- /dev/null
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -0,0 +1,495 @@
+//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// X86 target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86tti"
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeX86TTIPass(PassRegistry &);
+}
+
+namespace {
+
+class X86TTI : public ImmutablePass, public TargetTransformInfo {
+  const X86TargetMachine *TM;
+  const X86Subtarget *ST;
+  const X86TargetLowering *TLI;
+
+  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+  /// are set if the result needs to be inserted and/or extracted from vectors.
+  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+  X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+    llvm_unreachable("This pass cannot be directly constructed");
+  }
+
+  X86TTI(const X86TargetMachine *TM)
+      : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+        TLI(TM->getTargetLowering()) {
+    initializeX86TTIPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void initializePass() {
+    pushTTIStack(this);
+  }
+
+  virtual void finalizePass() {
+    popTTIStack();
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    TargetTransformInfo::getAnalysisUsage(AU);
+  }
+
+  /// Pass identification.
+  static char ID;
+
+  /// Provide necessary pointer adjustments for the two base classes.
+  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    if (ID == &TargetTransformInfo::ID)
+      return (TargetTransformInfo*)this;
+    return this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+  virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  virtual unsigned getNumberOfRegisters(bool Vector) const;
+  virtual unsigned getRegisterBitWidth(bool Vector) const;
+  virtual unsigned getMaximumUnrollFactor() const;
+  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                          OperandValueKind,
+                                          OperandValueKind) const;
+  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                                  int Index, Type *SubTp) const;
+  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                                    Type *Src) const;
+  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                      Type *CondTy) const;
+  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                      unsigned Index) const;
+  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                                   unsigned Alignment,
+                                   unsigned AddressSpace) const;
+
+  /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti",
+                   "X86 Target Transform Info", true, true, false)
+char X86TTI::ID = 0;
+
+ImmutablePass *
+llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
+  return new X86TTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// X86 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  // TODO: Currently the __builtin_popcount() implementation using SSE3
+  //   instructions is inefficient. Once the problem is fixed, we should
+  //   call ST->hasSSE3() instead of ST->hasSSE4().
+  return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+}
+
+unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
+  if (Vector && !ST->hasSSE1())
+    return 0;
+
+  if (ST->is64Bit())
+    return 16;
+  return 8;
+}
+
+unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
+  if (Vector) {
+    if (ST->hasAVX()) return 256;
+    if (ST->hasSSE1()) return 128;
+    return 0;
+  }
+
+  if (ST->is64Bit())
+    return 64;
+  return 32;
+
+}
+
+unsigned X86TTI::getMaximumUnrollFactor() const {
+  if (ST->isAtom())
+    return 1;
+
+  // Sandybridge and Haswell have multiple execution ports and pipelined
+  // vector units.
+  if (ST->hasAVX())
+    return 4;
+
+  return 2;
+}
+
+unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                        OperandValueKind Op1Info,
+                                        OperandValueKind Op2Info) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  static const CostTblEntry<MVT> AVX2CostTable[] = {
+    // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+    // customize them to detect the cases where shift amount is a scalar one.
+    { ISD::SHL,     MVT::v4i32,    1 },
+    { ISD::SRL,     MVT::v4i32,    1 },
+    { ISD::SRA,     MVT::v4i32,    1 },
+    { ISD::SHL,     MVT::v8i32,    1 },
+    { ISD::SRL,     MVT::v8i32,    1 },
+    { ISD::SRA,     MVT::v8i32,    1 },
+    { ISD::SHL,     MVT::v2i64,    1 },
+    { ISD::SRL,     MVT::v2i64,    1 },
+    { ISD::SHL,     MVT::v4i64,    1 },
+    { ISD::SRL,     MVT::v4i64,    1 },
+
+    { ISD::SHL,  MVT::v32i8,  42 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v16i16,  16*10 }, // Scalarized.
+
+    { ISD::SRL,  MVT::v32i8,  32*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v16i16,  8*10 }, // Scalarized.
+
+    { ISD::SRA,  MVT::v32i8,  32*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v16i16,  16*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i64,  4*10 }, // Scalarized.
+  };
+
+  // Look for AVX2 lowering tricks.
+  if (ST->hasAVX2()) {
+    int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX2CostTable[Idx].Cost;
+  }
+
+  static const CostTblEntry<MVT> SSE2UniformConstCostTable[] = {
+    // We don't correctly identify costs of casts because they are marked as
+    // custom.
+    // Constant splats are cheaper for the following instructions.
+    { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
+    { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+
+    { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+
+    { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
+    { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasSSE2()) {
+    int Idx = CostTableLookup<MVT>(SSE2UniformConstCostTable,
+                                   array_lengthof(SSE2UniformConstCostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+  }
+
+
+  static const CostTblEntry<MVT> SSE2CostTable[] = {
+    // We don't correctly identify costs of casts because they are marked as
+    // custom.
+    // For some cases, where the shift amount is a scalar we would be able
+    // to generate better code. Unfortunately, when this is the case the value
+    // (the splat) will get hoisted out of the loop, thereby making it invisible
+    // to ISel. The cost model must return worst case assumptions because it is
+    // used for vectorization and we don't want to make vectorized code worse
+    // than scalar code.
+    { ISD::SHL,  MVT::v16i8,  30 }, // cmpeqb sequence.
+    { ISD::SHL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v4i32,  2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
+
+    { ISD::SRL,  MVT::v16i8,  16*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
+
+    { ISD::SRA,  MVT::v16i8,  16*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v8i16,  8*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
+  };
+
+  if (ST->hasSSE2()) {
+    int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * SSE2CostTable[Idx].Cost;
+  }
+
+  static const CostTblEntry<MVT> AVX1CostTable[] = {
+    // We don't have to scalarize unsupported ops. We can issue two half-sized
+    // operations and we only need to extract the upper YMM half.
+    // Two ops + 1 extract + 1 insert = 4.
+    { ISD::MUL,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v8i32,    4 },
+    { ISD::ADD,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v4i64,    4 },
+    { ISD::ADD,     MVT::v4i64,    4 },
+    // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+    // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
+    // Because we believe v4i64 to be a legal type, we must also include the
+    // split factor of two in the cost table. Therefore, the cost here is 18
+    // instead of 9.
+    { ISD::MUL,     MVT::v4i64,    18 },
+  };
+
+  // Look for AVX1 lowering tricks.
+  if (ST->hasAVX() && !ST->hasAVX2()) {
+    int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable),
+                                   ISD, LT.second);
+    if (Idx != -1)
+      return LT.first * AVX1CostTable[Idx].Cost;
+  }
+
+  // Custom lowering of vectors.
+  static const CostTblEntry<MVT> CustomLowered[] = {
+    // A v2i64/v4i64 and multiply is custom lowered as a series of long
+    // multiplies(3), shifts(4) and adds(2).
+    { ISD::MUL,     MVT::v2i64,    9 },
+    { ISD::MUL,     MVT::v4i64,    9 },
+  };
+  int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered),
+                                 ISD, LT.second);
+  if (Idx != -1)
+    return LT.first * CustomLowered[Idx].Cost;
+
+  // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
+  // 2x pmuludq, 2x shuffle.
+  if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
+      !ST->hasSSE41())
+    return 6;
+
+  // Fallback to the default implementation.
+  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+                                                     Op2Info);
+}
+
+unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                                Type *SubTp) const {
+  // We only estimate the cost of reverse shuffles.
+  if (Kind != SK_Reverse)
+    return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+  unsigned Cost = 1;
+  if (LT.second.getSizeInBits() > 128)
+    Cost = 3; // Extract + insert + copy.
+
+  // Multiple by the number of parts.
+  return Cost * LT.first;
+}
+
+unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  EVT SrcTy = TLI->getValueType(Src);
+  EVT DstTy = TLI->getValueType(Dst);
+
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+  static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
+    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+    { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
+    { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 1 },
+    { ISD::TRUNCATE,    MVT::v8i16, MVT::v8i32, 1 },
+
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i1,  8 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i8,  8 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i1,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i8,  3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i16, 3 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i32, 1 },
+
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i1,  6 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i8,  5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 9 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1,  7 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 6 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i1,  7 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i32, 6 },
+
+    { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 1 },
+    { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
+    { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
+    { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1,  9 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1,  8 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,  6 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
+    { ISD::TRUNCATE,    MVT::v8i32, MVT::v8i64, 3 },
+  };
+
+  if (ST->hasAVX()) {
+    int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl,
+                                 array_lengthof(AVXConversionTbl),
+                                 ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+    if (Idx != -1)
+      return AVXConversionTbl[Idx].Cost;
+  }
+
+  return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                    Type *CondTy) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+  MVT MTy = LT.second;
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+
+  static const CostTblEntry<MVT> SSE42CostTbl[] = {
+    { ISD::SETCC,   MVT::v2f64,   1 },
+    { ISD::SETCC,   MVT::v4f32,   1 },
+    { ISD::SETCC,   MVT::v2i64,   1 },
+    { ISD::SETCC,   MVT::v4i32,   1 },
+    { ISD::SETCC,   MVT::v8i16,   1 },
+    { ISD::SETCC,   MVT::v16i8,   1 },
+  };
+
+  static const CostTblEntry<MVT> AVX1CostTbl[] = {
+    { ISD::SETCC,   MVT::v4f64,   1 },
+    { ISD::SETCC,   MVT::v8f32,   1 },
+    // AVX1 does not support 8-wide integer compare.
+    { ISD::SETCC,   MVT::v4i64,   4 },
+    { ISD::SETCC,   MVT::v8i32,   4 },
+    { ISD::SETCC,   MVT::v16i16,  4 },
+    { ISD::SETCC,   MVT::v32i8,   4 },
+  };
+
+  static const CostTblEntry<MVT> AVX2CostTbl[] = {
+    { ISD::SETCC,   MVT::v4i64,   1 },
+    { ISD::SETCC,   MVT::v8i32,   1 },
+    { ISD::SETCC,   MVT::v16i16,  1 },
+    { ISD::SETCC,   MVT::v32i8,   1 },
+  };
+
+  if (ST->hasAVX2()) {
+    int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX2CostTbl[Idx].Cost;
+  }
+
+  if (ST->hasAVX()) {
+    int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * AVX1CostTbl[Idx].Cost;
+  }
+
+  if (ST->hasSSE42()) {
+    int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+    if (Idx != -1)
+      return LT.first * SSE42CostTbl[Idx].Cost;
+  }
+
+  return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                    unsigned Index) const {
+  assert(Val->isVectorTy() && "This must be a vector type");
+
+  if (Index != -1U) {
+    // Legalize the type.
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+
+    // This type is legalized to a scalar type.
+    if (!LT.second.isVector())
+      return 0;
+
+    // The type may be split. Normalize the index to the new type.
+    unsigned Width = LT.second.getVectorNumElements();
+    Index = Index % Width;
+
+    // Floating point scalars are already located in index #0.
+    if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+      return 0;
+  }
+
+  return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                 unsigned AddressSpace) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+         "Invalid Opcode");
+
+  // Each load/store unit costs 1.
+  unsigned Cost = LT.first * 1;
+
+  // On Sandybridge 256bit load/stores are double pumped
+  // (but not on Haswell).
+  if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
+    Cost*=2;
+
+  return Cost;
+}
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index c4a58874a414..0f77948c0eff 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
   return false;
 }
 
+static bool clobbersAllYmmRegs(const MachineOperand &MO) {
+  for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
+    if (!MO.clobbersPhysReg(reg))
+      return false;
+  }
+  return true;
+}
+
 static bool hasYmmReg(MachineInstr *MI) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
+    if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+      return true;
     if (!MO.isReg())
       continue;
     if (MO.isDebug())
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index ca94f03a6496..099ad390d2a7 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS XCore.td)
 
 tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM XCoreGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv)
@@ -15,6 +16,7 @@ add_llvm_target(XCoreCodeGen
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
   XCoreMachineFunctionInfo.cpp
+  XCoreMCInstLower.cpp
   XCoreRegisterInfo.cpp
   XCoreSubtarget.cpp
   XCoreTargetMachine.cpp
@@ -24,5 +26,7 @@ add_llvm_target(XCoreCodeGen
 
 add_dependencies(LLVMXCoreCodeGen intrinsics_gen)
 
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/Disassembler/CMakeLists.txt b/lib/Target/XCore/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..cdc5d993b8bf
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMXCoreDisassembler
+  XCoreDisassembler.cpp
+  )
+
+add_dependencies(LLVMXCoreDisassembler XCoreCommonTableGen)
diff --git a/lib/Target/XCore/Disassembler/LLVMBuild.txt b/lib/Target/XCore/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..028de2cb3433
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreDisassembler
+parent = XCore
+required_libraries = MC Support XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/Disassembler/Makefile b/lib/Target/XCore/Disassembler/Makefile
new file mode 100644
index 000000000000..4caffdd1da6a
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/Disassembler/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDisassembler
+
+# Hack: we need to include 'main' XCore target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
new file mode 100644
index 000000000000..7b99967c4f32
--- /dev/null
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -0,0 +1,800 @@
+//===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the XCore Disassembler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreRegisterInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// \brief A disassembler class for XCore.
+class XCoreDisassembler : public MCDisassembler {
+  const MCRegisterInfo *RegInfo;
+public:
+  XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) :
+    MCDisassembler(STI), RegInfo(Info) {}
+
+  /// \brief See MCDisassembler.
+  virtual DecodeStatus getInstruction(MCInst &instr,
+                                      uint64_t &size,
+                                      const MemoryObject &region,
+                                      uint64_t address,
+                                      raw_ostream &vStream,
+                                      raw_ostream &cStream) const;
+
+  const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+}
+
+static bool readInstruction16(const MemoryObject &region,
+                              uint64_t address,
+                              uint64_t &size,
+                              uint16_t &insn) {
+  uint8_t Bytes[4];
+
+  // We want to read exactly 2 Bytes of data.
+  if (region.readBytes(address, 2, Bytes, NULL) == -1) {
+    size = 0;
+    return false;
+  }
+  // Encoded as a little-endian 16-bit word in the stream.
+  insn = (Bytes[0] <<  0) | (Bytes[1] <<  8);
+  return true;
+}
+
+static bool readInstruction32(const MemoryObject &region,
+                              uint64_t address,
+                              uint64_t &size,
+                              uint32_t &insn) {
+  uint8_t Bytes[4];
+
+  // We want to read exactly 4 Bytes of data.
+  if (region.readBytes(address, 4, Bytes, NULL) == -1) {
+    size = 0;
+    return false;
+  }
+  // Encoded as a little-endian 32-bit word in the stream.
+  insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) |
+         (Bytes[3] << 24);
+  return true;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+  const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D);
+  return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+}
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+                                              unsigned RegNo,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+                                             unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+                                      uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+                                       uint64_t Address, const void *Decoder);
+
+static DecodeStatus Decode2RInstruction(MCInst &Inst,
+                                        unsigned Insn,
+                                        uint64_t Address,
+                                        const void *Decoder);
+
+static DecodeStatus Decode2RImmInstruction(MCInst &Inst,
+                                           unsigned Insn,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
+static DecodeStatus DecodeR2RInstruction(MCInst &Inst,
+                                         unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+
+static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeRUSInstruction(MCInst &Inst,
+                                         unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+
+static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst,
+                                             unsigned Insn,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
+static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst,
+                                                   unsigned Insn,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+
+static DecodeStatus DecodeL2RInstruction(MCInst &Inst,
+                                         unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+
+static DecodeStatus DecodeLR2RInstruction(MCInst &Inst,
+                                          unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
+static DecodeStatus Decode3RInstruction(MCInst &Inst,
+                                        unsigned Insn,
+                                        uint64_t Address,
+                                        const void *Decoder);
+
+static DecodeStatus Decode3RImmInstruction(MCInst &Inst,
+                                           unsigned Insn,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
+static DecodeStatus Decode2RUSInstruction(MCInst &Inst,
+                                          unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
+static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst,
+                                              unsigned Insn,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
+static DecodeStatus DecodeL3RInstruction(MCInst &Inst,
+                                         unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+
+static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst,
+                                               unsigned Insn,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst,
+                                           unsigned Insn,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
+static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst,
+                                               unsigned Insn,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeL6RInstruction(MCInst &Inst,
+                                         unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+
+static DecodeStatus DecodeL5RInstruction(MCInst &Inst,
+                                         unsigned Insn,
+                                         uint64_t Address,
+                                         const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst,
+                                               unsigned Insn,
+                                               uint64_t Address,
+                                               const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst,
+                                                     unsigned Insn,
+                                                     uint64_t Address,
+                                                     const void *Decoder);
+
+#include "XCoreGenDisassemblerTables.inc"
+
+static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
+                                              unsigned RegNo,
+                                              uint64_t Address,
+                                              const void *Decoder)
+{
+  if (RegNo > 11)
+    return MCDisassembler::Fail;
+  unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
+                                             unsigned RegNo,
+                                             uint64_t Address,
+                                             const void *Decoder)
+{
+  if (RegNo > 15)
+    return MCDisassembler::Fail;
+  unsigned Reg = getReg(Decoder, XCore::RRegsRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
+                                      uint64_t Address, const void *Decoder) {
+  if (Val > 11)
+    return MCDisassembler::Fail;
+  static unsigned Values[] = {
+    32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
+  };
+  Inst.addOperand(MCOperand::CreateImm(Values[Val]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+                                       uint64_t Address, const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(Val));
+  Inst.addOperand(MCOperand::CreateImm(0));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) {
+  unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+  if (Combined < 27)
+    return MCDisassembler::Fail;
+  if (fieldFromInstruction(Insn, 5, 1)) {
+    if (Combined == 31)
+      return MCDisassembler::Fail;
+    Combined += 5;
+  }
+  Combined -= 27;
+  unsigned Op1High = Combined % 3;
+  unsigned Op2High = Combined / 3;
+  Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2);
+  Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 0, 2);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2,
+                     unsigned &Op3) {
+  unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+  if (Combined >= 27)
+    return MCDisassembler::Fail;
+
+  unsigned Op1High = Combined % 3;
+  unsigned Op2High = (Combined / 3) % 3;
+  unsigned Op3High = Combined / 9;
+  Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 4, 2);
+  Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 2, 2);
+  Op3 = (Op3High << 2) | fieldFromInstruction(Insn, 0, 2);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+                         const void *Decoder) {
+  // Try and decode as a 3R instruction.
+  unsigned Opcode = fieldFromInstruction(Insn, 11, 5);
+  switch (Opcode) {
+  case 0x0:
+    Inst.setOpcode(XCore::STW_2rus);
+    return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x1:
+    Inst.setOpcode(XCore::LDW_2rus);
+    return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x2:
+    Inst.setOpcode(XCore::ADD_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x3:
+    Inst.setOpcode(XCore::SUB_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x4:
+    Inst.setOpcode(XCore::SHL_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x5:
+    Inst.setOpcode(XCore::SHR_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x6:
+    Inst.setOpcode(XCore::EQ_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x7:
+    Inst.setOpcode(XCore::AND_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x8:
+    Inst.setOpcode(XCore::OR_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x9:
+    Inst.setOpcode(XCore::LDW_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x10:
+    Inst.setOpcode(XCore::LD16S_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x11:
+    Inst.setOpcode(XCore::LD8U_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x12:
+    Inst.setOpcode(XCore::ADD_2rus);
+    return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x13:
+    Inst.setOpcode(XCore::SUB_2rus);
+    return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x14:
+    Inst.setOpcode(XCore::SHL_2rus);
+    return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+  case 0x15:
+    Inst.setOpcode(XCore::SHR_2rus);
+    return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+  case 0x16:
+    Inst.setOpcode(XCore::EQ_2rus);
+    return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x17:
+    Inst.setOpcode(XCore::TSETR_3r);
+    return Decode3RImmInstruction(Inst, Insn, Address, Decoder);
+  case 0x18:
+    Inst.setOpcode(XCore::LSS_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x19:
+    Inst.setOpcode(XCore::LSU_3r);
+    return Decode3RInstruction(Inst, Insn, Address, Decoder);
+  }
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                    const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                       const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  Inst.addOperand(MCOperand::CreateImm(Op1));
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                     const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                          const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                     const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  Inst.addOperand(MCOperand::CreateImm(Op2));
+  return S;
+}
+
+static DecodeStatus
+DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                         const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeBitpOperand(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                               const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeBitpOperand(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+                          const void *Decoder) {
+  // Try and decode as a L3R / L2RUS instruction.
+  unsigned Opcode = fieldFromInstruction(Insn, 16, 4) |
+                    fieldFromInstruction(Insn, 27, 5) << 4;
+  switch (Opcode) {
+  case 0x0c:
+    Inst.setOpcode(XCore::STW_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x1c:
+    Inst.setOpcode(XCore::XOR_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x2c:
+    Inst.setOpcode(XCore::ASHR_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x3c:
+    Inst.setOpcode(XCore::LDAWF_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x4c:
+    Inst.setOpcode(XCore::LDAWB_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x5c:
+    Inst.setOpcode(XCore::LDA16F_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x6c:
+    Inst.setOpcode(XCore::LDA16B_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x7c:
+    Inst.setOpcode(XCore::MUL_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x8c:
+    Inst.setOpcode(XCore::DIVS_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x9c:
+    Inst.setOpcode(XCore::DIVU_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x10c:
+    Inst.setOpcode(XCore::ST16_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x11c:
+    Inst.setOpcode(XCore::ST8_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x12c:
+    Inst.setOpcode(XCore::ASHR_l2rus);
+    return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+  case 0x12d:
+    Inst.setOpcode(XCore::OUTPW_l2rus);
+    return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+  case 0x12e:
+    Inst.setOpcode(XCore::INPW_l2rus);
+    return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+  case 0x13c:
+    Inst.setOpcode(XCore::LDAWF_l2rus);
+    return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x14c:
+    Inst.setOpcode(XCore::LDAWB_l2rus);
+    return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+  case 0x15c:
+    Inst.setOpcode(XCore::CRC_l3r);
+    return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder);
+  case 0x18c:
+    Inst.setOpcode(XCore::REMS_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  case 0x19c:
+    Inst.setOpcode(XCore::REMU_l3r);
+    return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+  }
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                               const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+                                        Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                               const void *Decoder) {
+  unsigned Op1, Op2;
+  DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
+                                        Op1, Op2);
+  if (S != MCDisassembler::Success)
+    return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                    const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                       const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    Inst.addOperand(MCOperand::CreateImm(Op1));
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                      const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    Inst.addOperand(MCOperand::CreateImm(Op3));
+  }
+  return S;
+}
+
+static DecodeStatus
+Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                      const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeBitpOperand(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                     const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S =
+    Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                           const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S =
+  Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                       const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S =
+  Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    Inst.addOperand(MCOperand::CreateImm(Op3));
+  }
+  return S;
+}
+
+static DecodeStatus
+DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                           const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  DecodeStatus S =
+  Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeBitpOperand(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                     const void *Decoder) {
+  unsigned Op1, Op2, Op3, Op4, Op5, Op6;
+  DecodeStatus S =
+    Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S != MCDisassembler::Success)
+    return S;
+  S = Decode3OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5, Op6);
+  if (S != MCDisassembler::Success)
+    return S;
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op6, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+                     const void *Decoder) {
+  // Try and decode as a L6R instruction.
+  Inst.clear();
+  unsigned Opcode = fieldFromInstruction(Insn, 27, 5);
+  switch (Opcode) {
+  case 0x00:
+    Inst.setOpcode(XCore::LMUL_l6r);
+    return DecodeL6RInstruction(Inst, Insn, Address, Decoder);
+  }
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                     const void *Decoder) {
+  unsigned Op1, Op2, Op3, Op4, Op5;
+  DecodeStatus S =
+    Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S != MCDisassembler::Success)
+    return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+  S = Decode2OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5);
+  if (S != MCDisassembler::Success)
+    return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+
+  DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+  return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                           const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+  DecodeStatus S =
+    Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+  }
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder) {
+  unsigned Op1, Op2, Op3;
+  unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+  DecodeStatus S =
+  Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+  }
+  if (S == MCDisassembler::Success) {
+    DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+    DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+  }
+  return S;
+}
+
+MCDisassembler::DecodeStatus
+XCoreDisassembler::getInstruction(MCInst &instr,
+                                  uint64_t &Size,
+                                  const MemoryObject &Region,
+                                  uint64_t Address,
+                                  raw_ostream &vStream,
+                                  raw_ostream &cStream) const {
+  uint16_t insn16;
+
+  if (!readInstruction16(Region, Address, Size, insn16)) {
+    return Fail;
+  }
+
+  // Calling the auto-generated decoder function.
+  DecodeStatus Result = decodeInstruction(DecoderTable16, instr, insn16,
+                                          Address, this, STI);
+  if (Result != Fail) {
+    Size = 2;
+    return Result;
+  }
+
+  uint32_t insn32;
+
+  if (!readInstruction32(Region, Address, Size, insn32)) {
+    return Fail;
+  }
+
+  // Calling the auto-generated decoder function.
+  Result = decodeInstruction(DecoderTable32, instr, insn32, Address, this, STI);
+  if (Result != Fail) {
+    Size = 4;
+    return Result;
+  }
+
+  return Fail;
+}
+
+namespace llvm {
+  extern Target TheXCoreTarget;
+}
+
+static MCDisassembler *createXCoreDisassembler(const Target &T,
+                                               const MCSubtargetInfo &STI) {
+  return new XCoreDisassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeXCoreDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheXCoreTarget,
+                                         createXCoreDisassembler);
+}
diff --git a/lib/Target/XCore/InstPrinter/CMakeLists.txt b/lib/Target/XCore/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..930e733cd7f1
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMXCoreAsmPrinter
+  XCoreInstPrinter.cpp
+  )
+
+add_dependencies(LLVMXCoreAsmPrinter XCoreCommonTableGen)
diff --git a/lib/Target/XCore/InstPrinter/LLVMBuild.txt b/lib/Target/XCore/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..8750bc7acedc
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreAsmPrinter
+parent = XCore
+required_libraries = MC Support
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/InstPrinter/Makefile b/lib/Target/XCore/InstPrinter/Makefile
new file mode 100644
index 000000000000..1c1c61299c39
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreAsmPrinter
+
+# Hack: we need to include 'main' xcore target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
new file mode 100644
index 000000000000..1592351c3861
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -0,0 +1,97 @@
+//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an XCore MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCoreInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << StringRef(getRegisterName(RegNo)).lower();
+}
+
+void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                 StringRef Annot) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void XCoreInstPrinter::
+printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) {
+  report_fatal_error("can't handle InlineJT");
+}
+
+void XCoreInstPrinter::
+printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) {
+  report_fatal_error("can't handle InlineJT32");
+}
+
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+  int Offset = 0;
+  const MCSymbolRefExpr *SRE;
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+    assert(SRE && CE && "Binary expression must be sym+const.");
+    Offset = CE->getValue();
+  } else {
+    SRE = dyn_cast<MCSymbolRefExpr>(Expr);
+    assert(SRE && "Unexpected MCExpr type.");
+  }
+  assert(SRE->getKind() == MCSymbolRefExpr::VK_None);
+
+  OS << SRE->getSymbol();
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+}
+
+void XCoreInstPrinter::
+printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  printExpr(Op.getExpr(), O);
+}
+
+void XCoreInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  printOperand(MI, opNum, O);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+
+  O << "+";
+  printOperand(MI, opNum+1, O);
+}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
new file mode 100644
index 000000000000..772c515b5c9e
--- /dev/null
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -0,0 +1,44 @@
+//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the XCoreInstPrinter class,
+/// which is used to print XCore MCInst to a .s file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTPRINTER_H
+#define XCOREINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class TargetMachine;
+
+class XCoreInstPrinter : public MCInstPrinter {
+public:
+  XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                  const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+private:
+  void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
+  void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
index 53b4a9e3f5f7..59e64ad0855c 100644
--- a/lib/Target/XCore/LLVMBuild.txt
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -16,13 +16,14 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = MCTargetDesc TargetInfo
+subdirectories = Disassembler InstPrinter MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
 name = XCore
 parent = Target
 has_asmprinter = 1
+has_disassembler = 1
 
 [component_1]
 type = Library
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
index a80c939b4372..8213f9e42883 100644
--- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = XCoreDesc
 parent = XCore
-required_libraries = MC XCoreInfo
+required_libraries = MC XCoreAsmPrinter XCoreInfo
 add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index bbfdd4356f2a..b5b072dcbda6 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCoreMCTargetDesc.h"
+#include "InstPrinter/XCoreInstPrinter.h"
 #include "XCoreMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -69,6 +70,15 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
+static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+                                               unsigned SyntaxVariant,
+                                               const MCAsmInfo &MAI,
+                                               const MCInstrInfo &MII,
+                                               const MCRegisterInfo &MRI,
+                                               const MCSubtargetInfo &STI) {
+  return new XCoreInstPrinter(MAI, MII, MRI);
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeXCoreTargetMC() {
   // Register the MC asm info.
@@ -87,4 +97,8 @@ extern "C" void LLVMInitializeXCoreTargetMC() {
   // Register the MC subtarget info.
   TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
                                           createXCoreMCSubtargetInfo);
+
+  // Register the MCInstPrinter
+  TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
+                                        createXCoreMCInstPrinter);
 }
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index b823c4ed37e9..92ddc8860876 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -14,10 +14,10 @@ TARGET = XCore
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \
 		XCoreGenAsmWriter.inc \
-                XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
-		XCoreGenSubtargetInfo.inc
+		XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
+		XCoreGenDisassemblerTables.inc XCoreGenSubtargetInfo.inc
 
-DIRS = TargetInfo MCTargetDesc
+DIRS = Disassembler InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 9a0971d1e45f..00e34e04fbe5 100644
--- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCore.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 04a1dd5e95be..e9a6d88fd68e 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -41,7 +41,13 @@ def : Proc<"xs1b-generic", []>;
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def XCoreAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
 def XCore : Target {
   // Pull in Instruction Info:
   let InstructionSet = XCoreInstrInfo;
+  let AssemblyWriters = [XCoreAsmWriter];
 }
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index caae56227214..0d146ba4d98d 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -14,31 +14,34 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "XCore.h"
+#include "InstPrinter/XCoreInstPrinter.h"
 #include "XCoreInstrInfo.h"
+#include "XCoreMCInstLower.h"
 #include "XCoreSubtarget.h"
 #include "XCoreTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include <algorithm>
 #include <cctype>
 using namespace llvm;
@@ -52,16 +55,17 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
 namespace {
   class XCoreAsmPrinter : public AsmPrinter {
     const XCoreSubtarget &Subtarget;
+    XCoreMCInstLower MCInstLowering;
     void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
   public:
     explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()),
+        MCInstLowering(*this) {}
 
     virtual const char *getPassName() const {
       return "XCore Assembly Printer";
     }
 
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
     void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
                        const std::string &directive = ".jmptable");
     void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
@@ -75,18 +79,14 @@ namespace {
     void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
     virtual void EmitGlobalVariable(const GlobalVariable *GV);
 
-    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
-    static const char *getRegisterName(unsigned RegNo);
-
     void EmitFunctionEntryLabel();
     void EmitInstruction(const MachineInstr *MI);
+    void EmitFunctionBodyStart();
     void EmitFunctionBodyEnd();
     virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
   };
 } // end of anonymous namespace
 
-#include "XCoreGenAsmWriter.inc"
-
 void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
   assert(((GV->hasExternalLinkage() ||
     GV->hasWeakLinkage()) ||
@@ -171,12 +171,16 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // The ABI requires that unsigned scalar types smaller than 32 bits
   // are padded to 32 bits.
   if (Size < 4)
-    OutStreamer.EmitZeros(4 - Size, 0);
+    OutStreamer.EmitZeros(4 - Size);
   
   // Mark the end of the global
   OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
 }
 
+void XCoreAsmPrinter::EmitFunctionBodyStart() {
+  MCInstLowering.Initialize(Mang, &MF->getContext());
+}
+
 /// EmitFunctionBodyEnd - Targets can override this to emit stuff after
 /// the last basic block in the function.
 void XCoreAsmPrinter::EmitFunctionBodyEnd() {
@@ -192,17 +196,6 @@ void XCoreAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
-void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
-                                      raw_ostream &O) {
-  printOperand(MI, opNum, O);
-  
-  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
-    return;
-  
-  O << "+";
-  printOperand(MI, opNum+1, O);
-}
-
 void XCoreAsmPrinter::
 printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
               const std::string &directive) {
@@ -225,7 +218,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    O << getRegisterName(MO.getReg());
+    O << XCoreInstPrinter::getRegisterName(MO.getReg());
     break;
   case MachineOperand::MO_Immediate:
     O << MO.getImm();
@@ -270,7 +263,7 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
     }
 
-printOperand(MI, OpNo, O);
+  printOperand(MI, OpNo, O);
   return false;
 }
 
@@ -317,15 +310,30 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   case XCore::ADD_2rus:
     if (MI->getOperand(2).getImm() == 0) {
-      O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
-        << getRegisterName(MI->getOperand(1).getReg());
+      O << "\tmov "
+        << XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", "
+        << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg());
       OutStreamer.EmitRawText(O.str());
       return;
     }
     break;
+  case XCore::BR_JT:
+  case XCore::BR_JT32:
+    O << "\tbru "
+      << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg()) << '\n';
+    if (MI->getOpcode() == XCore::BR_JT)
+      printInlineJT(MI, 0, O);
+    else
+      printInlineJT32(MI, 0, O);
+    O << '\n';
+    OutStreamer.EmitRawText(O.str());
+    return;
   }
-  printInstruction(MI, O);
-  OutStreamer.EmitRawText(O.str());
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+
+  OutStreamer.EmitInstruction(TmpInst);
 }
 
 // Force static initialization.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e18d97384d3d..beeb07f831c6 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -16,16 +16,16 @@
 #include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -98,13 +98,10 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   bool FP = hasFP(MF);
-  const AttrListPtr &PAL = MF.getFunction()->getAttributes();
+  const AttributeSet &PAL = MF.getFunction()->getAttributes();
 
-  for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I)
-    if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) {
-      loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
-      break;
-    }
+  if (PAL.hasAttrSomewhere(Attribute::Nest))
+    loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
 
   // Work out frame sizes.
   int FrameSize = MFI->getStackSize();
@@ -264,7 +261,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
       MBB.erase(MBBI);
     } else {
-      int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+      int Opcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
       BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
     }
   }
@@ -335,6 +332,58 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  if (!hasReservedCallFrame(MF)) {
+    // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+    // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      assert(Amount%4 == 0);
+      Amount /= 4;
+
+      bool isU6 = isImmU6(Amount);
+      if (!isU6 && !isImmU16(Amount)) {
+        // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+        errs() << "eliminateCallFramePseudoInstr size too big: "
+               << Amount << "\n";
+#endif
+        llvm_unreachable(0);
+      }
+
+      MachineInstr *New;
+      if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+        int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+          .addImm(Amount);
+      } else {
+        assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+        int Opcode = isU6 ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+          .addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+  
+  MBB.erase(I);
+}
+
 void
 XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                      RegScavenger *RS) const {
@@ -360,7 +409,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   }
   if (RegInfo->requiresRegisterScavenging(MF)) {
     // Reserve a slot close to SP or frame pointer.
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                        RC->getAlignment(),
                                                        false));
   }
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index db1bbb60d968..ebad62f2fa53 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -39,6 +39,10 @@ namespace llvm {
                                      const std::vector<CalleeSavedInfo> &CSI,
                                      const TargetRegisterInfo *TRI) const;
 
+    void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const;
+
     bool hasFP(const MachineFunction &MF) const;
 
     void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 7564fbad7d45..fbf86c523054 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -13,23 +13,23 @@
 
 #include "XCore.h"
 #include "XCoreTargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
 /// XCoreDAGToDAGISel - XCore specific code to select XCore machine
@@ -211,15 +211,10 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
                                   Ops, 4);
   }
-  case ISD::INTRINSIC_WO_CHAIN: {
-    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
-    switch (IntNo) {
-    case Intrinsic::xcore_crc8:
-      SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) };
-      return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
-                                    Ops, 3);
-    }
-    break;
+  case XCoreISD::CRC8: {
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+    return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+                                  Ops, 3);
   }
   case ISD::BRIND:
     if (SDNode *ResNode = SelectBRIND(N))
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9e7816e21f80..a5d2be88db7d 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -14,17 +14,11 @@
 #define DEBUG_TYPE "xcore-lower"
 
 #include "XCoreISelLowering.h"
-#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
-#include "XCoreTargetObjectFile.h"
-#include "XCoreTargetMachine.h"
+#include "XCoreMachineFunctionInfo.h"
 #include "XCoreSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreTargetObjectFile.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +27,12 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -54,6 +54,7 @@ getTargetNodeName(unsigned Opcode) const
     case XCoreISD::LMUL              : return "XCoreISD::LMUL";
     case XCoreISD::MACCU             : return "XCoreISD::MACCU";
     case XCoreISD::MACCS             : return "XCoreISD::MACCS";
+    case XCoreISD::CRC8              : return "XCoreISD::CRC8";
     case XCoreISD::BR_JT             : return "XCoreISD::BR_JT";
     case XCoreISD::BR_JT32           : return "XCoreISD::BR_JT32";
     default                          : return NULL;
@@ -83,7 +84,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
 
   // XCore does not have the NodeTypes below.
-  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::i32,   Expand);
   setOperationAction(ISD::SELECT_CC, MVT::i32,   Custom);
   setOperationAction(ISD::ADDC, MVT::i32, Expand);
   setOperationAction(ISD::ADDE, MVT::i32, Expand);
@@ -152,9 +153,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 
-  maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
-  maxStoresPerMemmove = maxStoresPerMemmoveOptSize
-    = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4;
+  MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize
+    = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2;
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::STORE);
@@ -167,24 +171,25 @@ SDValue XCoreTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode())
   {
-  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
-  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
-  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
-  case ISD::BR_JT:            return LowerBR_JT(Op, DAG);
-  case ISD::LOAD:             return LowerLOAD(Op, DAG);
-  case ISD::STORE:            return LowerSTORE(Op, DAG);
-  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
-  case ISD::VAARG:            return LowerVAARG(Op, DAG);
-  case ISD::VASTART:          return LowerVASTART(Op, DAG);
-  case ISD::SMUL_LOHI:        return LowerSMUL_LOHI(Op, DAG);
-  case ISD::UMUL_LOHI:        return LowerUMUL_LOHI(Op, DAG);
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
+  case ISD::LOAD:               return LowerLOAD(Op, DAG);
+  case ISD::STORE:              return LowerSTORE(Op, DAG);
+  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+  case ISD::VAARG:              return LowerVAARG(Op, DAG);
+  case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  case ISD::SMUL_LOHI:          return LowerSMUL_LOHI(Op, DAG);
+  case ISD::UMUL_LOHI:          return LowerUMUL_LOHI(Op, DAG);
   // FIXME: Remove these when LegalizeDAGTypes lands.
   case ISD::ADD:
-  case ISD::SUB:              return ExpandADDSUB(Op.getNode(), DAG);
-  case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
-  case ISD::INIT_TRAMPOLINE:  return LowerINIT_TRAMPOLINE(Op, DAG);
-  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::SUB:                return ExpandADDSUB(Op.getNode(), DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
+  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   default:
     llvm_unreachable("unimplemented operand");
   }
@@ -225,20 +230,16 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
 {
   // FIXME there is no actual debug info here
   DebugLoc dl = GA.getDebugLoc();
-  if (isa<Function>(GV)) {
-    return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+  const GlobalValue *UnderlyingGV = GV;
+  // If GV is an alias then use the aliasee to determine the wrapper type
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+    UnderlyingGV = GA->resolveAliasedGlobal();
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
+    if (GVar->isConstant())
+      return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+    return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
   }
-  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-  if (!GVar) {
-    // If GV is an alias then use the aliasee to determine constness
-    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
-  }
-  bool isConst = GVar && GVar->isConstant();
-  if (isConst) {
-    return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
-  }
-  return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+  return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
 }
 
 SDValue XCoreTargetLowering::
@@ -740,13 +741,13 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
   unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
                                                    XCoreISD::LSUB;
   SDValue Zero = DAG.getConstant(0, MVT::i32);
-  SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
-                                  LHSL, RHSL, Zero);
-  SDValue Lo(Carry.getNode(), 1);
+  SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                           LHSL, RHSL, Zero);
+  SDValue Carry(Lo.getNode(), 1);
 
-  SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
-                                  LHSH, RHSH, Carry);
-  SDValue Hi(Ignored.getNode(), 1);
+  SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                           LHSH, RHSH, Carry);
+  SDValue Ignored(Hi.getNode(), 1);
   // Merge the pieces
   return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
 }
@@ -862,6 +863,23 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
 }
 
+SDValue XCoreTargetLowering::
+LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (IntNo) {
+    case Intrinsic::xcore_crc8:
+      EVT VT = Op.getValueType();
+      SDValue Data =
+        DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT),
+                    Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3));
+      SDValue Crc(Data.getNode(), 1);
+      SDValue Results[] = { Crc, Data };
+      return DAG.getMergeValues(Results, 2, DL);
+  }
+  return SDValue();
+}
+
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -1231,15 +1249,11 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
   // Analyze return values.
   CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
 
-  // If this is the first return lowered for this function, add
-  // the regs to the liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
   SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // Return on XCore is always a "retsp 0"
+  RetOps.push_back(DAG.getConstant(0, MVT::i32));
 
   // Copy the result values into the output registers.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1252,15 +1266,17 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
     // guarantee that all emitted copies are
     // stuck together, avoiding something bad
     Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
 
-  // Return on XCore is always a "retsp 0"
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
   if (Flag.getNode())
-    return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
-                       Chain, DAG.getConstant(0, MVT::i32), Flag);
-  else // Return Void
-    return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
-                       Chain, DAG.getConstant(0, MVT::i32));
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+                     &RetOps[0], RetOps.size());
 }
 
 //===----------------------------------------------------------------------===//
@@ -1357,13 +1373,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Carry = DAG.getConstant(0, VT);
       SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
                                    DAG.getConstant(1, VT));
-      SDValue Ops [] = { Carry, Result };
+      SDValue Ops[] = { Result, Carry };
       return DAG.getMergeValues(Ops, 2, dl);
     }
 
     // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
     // low bit set
-    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
@@ -1371,7 +1387,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       if ((KnownZero & Mask) == Mask) {
         SDValue Carry = DAG.getConstant(0, VT);
         SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
-        SDValue Ops [] = { Carry, Result };
+        SDValue Ops[] = { Result, Carry };
         return DAG.getMergeValues(Ops, 2, dl);
       }
     }
@@ -1395,14 +1411,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
         SDValue Borrow = N2;
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
                                      DAG.getConstant(0, VT), N2);
-        SDValue Ops [] = { Borrow, Result };
+        SDValue Ops[] = { Result, Borrow };
         return DAG.getMergeValues(Ops, 2, dl);
       }
     }
 
     // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
     // low bit set
-    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
@@ -1410,7 +1426,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       if ((KnownZero & Mask) == Mask) {
         SDValue Borrow = DAG.getConstant(0, VT);
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
-        SDValue Ops [] = { Borrow, Result };
+        SDValue Ops[] = { Result, Borrow };
         return DAG.getMergeValues(Ops, 2, dl);
       }
     }
@@ -1436,11 +1452,15 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       // If the high result is unused fold to add(a, b)
       if (N->hasNUsesOfValue(0, 0)) {
         SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
-        SDValue Ops [] = { Lo, Lo };
+        SDValue Ops[] = { Lo, Lo };
         return DAG.getMergeValues(Ops, 2, dl);
       }
       // Otherwise fold to ladd(a, b, 0)
-      return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+      SDValue Result =
+        DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+      SDValue Carry(Result.getNode(), 1);
+      SDValue Ops[] = { Carry, Result };
+      return DAG.getMergeValues(Ops, 2, dl);
     }
   }
   break;
@@ -1534,7 +1554,7 @@ void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   default: break;
   case XCoreISD::LADD:
   case XCoreISD::LSUB:
-    if (Op.getResNo() == 0) {
+    if (Op.getResNo() == 1) {
       // Top bits of carry / borrow are clear.
       KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
                                         KnownZero.getBitWidth() - 1);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 2874f00e4763..8d258f5054c1 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -63,6 +63,9 @@ namespace llvm {
       // Corresponds to MACCS instruction
       MACCS,
 
+      // Corresponds to CRC8 instruction
+      CRC8,
+
       // Jumptable branch.
       BR_JT,
 
@@ -81,7 +84,7 @@ namespace llvm {
     explicit XCoreTargetLowering(XCoreTargetMachine &TM);
 
     virtual unsigned getJumpTableEncoding() const;
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -147,6 +150,7 @@ namespace llvm {
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
     // Inline asm support
     std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 1963a70fb30d..379cc39aa617 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -10,7 +10,7 @@
 //===----------------------------------------------------------------------===//
 // Instruction format superclass
 //===----------------------------------------------------------------------===//
-class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstXCore<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
     : Instruction {
   field bits<32> Inst;
 
@@ -19,102 +19,259 @@ class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
   dag InOperandList = ins;
   let AsmString   = asmstr;
   let Pattern = pattern;
+  let Size = sz;
+  field bits<32> SoftFail = 0;
 }
 
 // XCore pseudo instructions format
 class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : InstXCore<outs, ins, asmstr, pattern>;
+   : InstXCore<0, outs, ins, asmstr, pattern> {
+  let isPseudo = 1;
+}
 
 //===----------------------------------------------------------------------===//
 // Instruction formats
 //===----------------------------------------------------------------------===//
 
-class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _F3R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  let Inst{15-11} = opc;
+  let DecoderMethod = "Decode3RInstruction";
+}
+
+// 3R with first operand as an immediate. Used for TSETR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F3RImm<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : _F3R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "Decode3RImmInstruction";
+}
+
+class _FL3R<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  let Inst{31-27} = opc{8-4};
+  let Inst{26-20} = 0b1111110;
+  let Inst{19-16} = opc{3-0};
+
+  let Inst{15-11} = 0b11111;
+  let DecoderMethod = "DecodeL3RInstruction";
+}
+
+// L3R with first operand as both a source and a destination.
+class _FL3RSrcDst<bits<9> opc, dag outs, dag ins, string asmstr,
+                  list<dag> pattern> : _FL3R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeL3RSrcDstInstruction";
+}
+
+class _F2RUS<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  let Inst{15-11} = opc;
+  let DecoderMethod = "Decode2RUSInstruction";
+}
+
+// 2RUS with bitp operand
+class _F2RUSBitp<bits<5> opc, dag outs, dag ins, string asmstr,
+                 list<dag> pattern>
+    : _F2RUS<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "Decode2RUSBitpInstruction";
+}
+
+class _FL2RUS<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  let Inst{31-27} = opc{8-4};
+  let Inst{26-20} = 0b1111110;
+  let Inst{19-16} = opc{3-0};
+
+  let Inst{15-11} = 0b11111;
+  let DecoderMethod = "DecodeL2RUSInstruction";
+}
+
+// L2RUS with bitp operand
+class _FL2RUSBitp<bits<9> opc, dag outs, dag ins, string asmstr,
+                  list<dag> pattern>
+    : _FL2RUS<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeL2RUSBitpInstruction";
+}
+
+class _FRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  bits<4> a;
+  bits<6> b;
+
+  let Inst{15-10} = opc;
+  let Inst{9-6} = a;
+  let Inst{5-0} = b;
 }
 
-class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FLRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  bits<4> a;
+  bits<16> b;
+
+  let Inst{31-26} = opc;
+  let Inst{25-22} = a;
+  let Inst{21-16} = b{5-0};
+  let Inst{15-10} = 0b111100;
+  let Inst{9-0} = b{15-6};
 }
 
-class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  bits<6> a;
+
+  let Inst{15-6} = opc;
+  let Inst{5-0} = a;
 }
 
-class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FLU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  bits<16> a;
+
+  let Inst{31-22} = opc;
+  let Inst{21-16} = a{5-0};
+  let Inst{15-10} = 0b111100;
+  let Inst{9-0} = a{15-6};
 }
 
-class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  bits<10> a;
+
+  let Inst{15-10} = opc;
+  let Inst{9-0} = a;
 }
 
-class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FLU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  bits<20> a;
+
+  let Inst{31-26} = opc;
+  let Inst{25-16} = a{9-0};
+  let Inst{15-10} = 0b111100;
+  let Inst{9-0} = a{19-10};
 }
 
-class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  let Inst{15-11} = opc{5-1};
+  let Inst{4} = opc{0};
+  let DecoderMethod = "Decode2RInstruction";
 }
 
-class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+// 2R with first operand as an immediate. Used for TSETMR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F2RImm<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : _F2R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "Decode2RImmInstruction";
 }
 
-class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+// 2R with first operand as both a source and a destination.
+class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+                 list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "Decode2RSrcDstInstruction";
 }
 
-class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+// Same as 2R with last two operands swapped
+class _FR2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : _F2R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeR2RInstruction";
 }
 
-class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FRUS<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  let Inst{15-11} = opc{5-1};
+  let Inst{4} = opc{0};
+  let DecoderMethod = "DecodeRUSInstruction";
 }
 
-class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+// RUS with bitp operand
+class _FRUSBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+                list<dag> pattern>
+    : _FRUS<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeRUSBitpInstruction";
 }
 
-class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+// RUS with first operand as both a source and a destination and a bitp second
+// operand
+class _FRUSSrcDstBitp<bits<6> opc, dag outs, dag ins, string asmstr,
+                      list<dag> pattern>
+    : _FRUS<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeRUSSrcDstBitpInstruction";
 }
 
-class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FL2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  let Inst{31-27} = opc{9-5};
+  let Inst{26-20} = 0b1111110;
+  let Inst{19-16} = opc{4-1};
+
+  let Inst{15-11} = 0b11111;
+  let Inst{4} = opc{0};
+  let DecoderMethod = "DecodeL2RInstruction";
 }
 
-class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+// Same as L2R with last two operands swapped
+class _FLR2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : _FL2R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeLR2RInstruction";
 }
 
-class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _F1R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  bits<4> a;
+
+  let Inst{15-11} = opc{5-1};
+  let Inst{10-5} = 0b111111;
+  let Inst{4} = opc{0};
+  let Inst{3-0} = a;
 }
 
-class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<2, outs, ins, asmstr, pattern> {
+  let Inst{15-11} = opc{9-5};
+  let Inst{10-5} = 0b111111;
+  let Inst{4-0} = opc{4-0};
 }
 
-class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstXCore<outs, ins, asmstr, pattern> {
-  let Inst{31-0} = 0;
+class _FL4R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  bits<4> d;
+
+  let Inst{31-27} = opc{5-1};
+  let Inst{26-21} = 0b111111;
+  let Inst{20} = opc{0};
+  let Inst{19-16} = d;
+  let Inst{15-11} = 0b11111;
+}
+
+// L4R with 4th operand as both a source and a destination.
+class _FL4RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+                  list<dag> pattern>
+    : _FL4R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeL4RSrcDstInstruction";
+}
+
+// L4R with 1st and 4th operand as both a source and a destination.
+class _FL4RSrcDstSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+                        list<dag> pattern>
+    : _FL4R<opc, outs, ins, asmstr, pattern> {
+  let DecoderMethod = "DecodeL4RSrcDstSrcDstInstruction";
+}
+
+class _FL5R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  let Inst{31-27} = opc{5-1};
+  let Inst{20} = opc{0};
+  let Inst{15-11} = 0b11111;
+
+  let DecoderMethod = "DecodeL5RInstruction";
+}
+
+class _FL6R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<4, outs, ins, asmstr, pattern> {
+  let Inst{31-27} = opc;
+  let Inst{15-11} = 0b11111;
+
+  let DecoderMethod = "DecodeL6RInstruction";
 }
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 0a3008d7ab33..e457e0dbf027 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCoreInstrInfo.h"
-#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "XCoreMachineFunctionInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 3e7666bdb936..03653cb2b3de 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -32,8 +32,8 @@ def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                              SDNPVariadic]>;
 
-def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTBrind,
-                         [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>;
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad, SDNPVariadic]>;
 
 def SDT_XCoreBR_JT    : SDTypeProfile<0, 2,
                                       [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -182,6 +182,7 @@ def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
 // Address operands
 def MEMii : Operand<i32> {
   let PrintMethod = "printMemOperand";
+  let DecoderMethod = "DecodeMEMiiOperand";
   let MIOperandInfo = (ops i32imm, i32imm);
 }
 
@@ -200,154 +201,117 @@ def InlineJT32 : Operand<i32> {
 
 // Three operand short
 
-multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
-  def _3r: _F3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
-  def _2rus : _F2RUS<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+multiclass F3R_2RUS<bits<5> opc1, bits<5> opc2, string OpcStr, SDNode OpNode> {
+  def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                !strconcat(OpcStr, " $dst, $b, $c"),
+                [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                     !strconcat(OpcStr, " $dst, $b, $c"),
+                     [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
 }
 
-multiclass F3R_2RUS_np<string OpcStr> {
-  def _3r: _F3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 []>;
-  def _2rus : _F2RUS<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 []>;
+multiclass F3R_2RUS_np<bits<5> opc1, bits<5> opc2, string OpcStr> {
+  def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                !strconcat(OpcStr, " $dst, $b, $c"), []>;
+  def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                     !strconcat(OpcStr, " $dst, $b, $c"), []>;
 }
 
-multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
-  def _3r: _F3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
-  def _2rus : _F2RUS<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+multiclass F3R_2RBITP<bits<5> opc1, bits<5> opc2, string OpcStr,
+                      SDNode OpNode> {
+  def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                !strconcat(OpcStr, " $dst, $b, $c"),
+                [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _2rus : _F2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                         !strconcat(OpcStr, " $dst, $b, $c"),
+                         [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
 }
 
-class F3R<string OpcStr, SDNode OpNode> : _F3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+class F3R<bits<5> opc, string OpcStr, SDNode OpNode> :
+  _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+       !strconcat(OpcStr, " $dst, $b, $c"),
+       [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
 
-class F3R_np<string OpcStr> : _F3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 []>;
+class F3R_np<bits<5> opc, string OpcStr> :
+  _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+       !strconcat(OpcStr, " $dst, $b, $c"), []>;
 // Three operand long
 
 /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
-multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
-  def _l3r: _FL3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
-  def _l2rus : _FL2RUS<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+multiclass FL3R_L2RUS<bits<9> opc1, bits<9> opc2, string OpcStr,
+                      SDNode OpNode> {
+  def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                  !strconcat(OpcStr, " $dst, $b, $c"),
+                  [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _l2rus : _FL2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                       !strconcat(OpcStr, " $dst, $b, $c"),
+                       [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
 }
 
 /// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
-multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
-  def _l3r: _FL3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
-  def _l2rus : _FL2RUS<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+multiclass FL3R_L2RBITP<bits<9> opc1, bits<9> opc2, string OpcStr,
+                        SDNode OpNode> {
+  def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                  !strconcat(OpcStr, " $dst, $b, $c"),
+                  [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _l2rus : _FL2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                           !strconcat(OpcStr, " $dst, $b, $c"),
+                           [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
 }
 
-class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
-                 !strconcat(OpcStr, " $dst, $b, $c"),
-                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+class FL3R<bits<9> opc, string OpcStr, SDNode OpNode> :
+  _FL3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+        !strconcat(OpcStr, " $dst, $b, $c"),
+        [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
 
 // Register - U6
 // Operand register - U6
-multiclass FRU6_LRU6_branch<string OpcStr> {
-  def _ru6: _FRU6<
-                 (outs), (ins GRRegs:$cond, brtarget:$dest),
-                 !strconcat(OpcStr, " $cond, $dest"),
-                 []>;
-  def _lru6: _FLRU6<
-                 (outs), (ins GRRegs:$cond, brtarget:$dest),
-                 !strconcat(OpcStr, " $cond, $dest"),
-                 []>;
+multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
+  def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+                  !strconcat(OpcStr, " $a, $b"), []>;
+  def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+                    !strconcat(OpcStr, " $a, $b"), []>;
 }
 
-multiclass FRU6_LRU6_cp<string OpcStr> {
-  def _ru6: _FRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$a),
-                 !strconcat(OpcStr, " $dst, cp[$a]"),
-                 []>;
-  def _lru6: _FLRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$a),
-                 !strconcat(OpcStr, " $dst, cp[$a]"),
-                 []>;
+multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
+  def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+                  !strconcat(OpcStr, " $a, -$b"), []>;
+  def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+                    !strconcat(OpcStr, " $a, -$b"), []>;
 }
 
-// U6
-multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
-  def _u6: _FU6<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 [(OpNode immU6:$b)]>;
-  def _lu6: _FLU6<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 [(OpNode immU16:$b)]>;
+multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
+  def _ru6: _FRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
+                  !strconcat(OpcStr, " $a, cp[$b]"), []>;
+  def _lru6: _FLRU6<opc, (outs RRegs:$a), (ins i32imm:$b),
+                    !strconcat(OpcStr, " $a, cp[$b]"), []>;
 }
-multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
-  def _u6: _FU6<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 [(Int immU6:$b)]>;
-  def _lu6: _FLU6<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 [(Int immU16:$b)]>;
+
+// U6
+multiclass FU6_LU6<bits<10> opc, string OpcStr, SDNode OpNode> {
+  def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+                [(OpNode immU6:$a)]>;
+  def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+                  [(OpNode immU16:$a)]>;
 }
 
-multiclass FU6_LU6_np<string OpcStr> {
-  def _u6: _FU6<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 []>;
-  def _lu6: _FLU6<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 []>;
+multiclass FU6_LU6_int<bits<10> opc, string OpcStr, Intrinsic Int> {
+  def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+                [(Int immU6:$a)]>;
+  def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+                  [(Int immU16:$a)]>;
 }
 
-// U10
-multiclass FU10_LU10_np<string OpcStr> {
-  def _u10: _FU10<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 []>;
-  def _lu10: _FLU10<
-                 (outs), (ins i32imm:$b),
-                 !strconcat(OpcStr, " $b"),
-                 []>;
+multiclass FU6_LU6_np<bits<10> opc, string OpcStr> {
+  def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
+  def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
 }
 
 // Two operand short
 
-class F2R_np<string OpcStr> : _F2R<
-                 (outs GRRegs:$dst), (ins GRRegs:$b),
-                 !strconcat(OpcStr, " $dst, $b"),
-                 []>;
+class F2R_np<bits<6> opc, string OpcStr> :
+  _F2R<opc, (outs GRRegs:$dst), (ins GRRegs:$b),
+       !strconcat(OpcStr, " $dst, $b"), []>;
 
 // Two operand long
 
@@ -357,23 +321,23 @@ class F2R_np<string OpcStr> : _F2R<
 
 let Defs = [SP], Uses = [SP] in {
 def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
-                               "${:comment} ADJCALLSTACKDOWN $amt",
+                               "# ADJCALLSTACKDOWN $amt",
                                [(callseq_start timm:$amt)]>;
 def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                            "${:comment} ADJCALLSTACKUP $amt1",
+                            "# ADJCALLSTACKUP $amt1",
                             [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
 def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
-                             "${:comment} LDWFI $dst, $addr",
+                             "# LDWFI $dst, $addr",
                              [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
 
 def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
-                             "${:comment} LDAWFI $dst, $addr",
+                             "# LDAWFI $dst, $addr",
                              [(set GRRegs:$dst, ADDRspii:$addr)]>;
 
 def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
-                            "${:comment} STWFI $src, $addr",
+                            "# STWFI $src, $addr",
                             [(store GRRegs:$src, ADDRspii:$addr)]>;
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
@@ -381,7 +345,7 @@ def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
 let usesCustomInserter = 1 in {
   def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
                               (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
-                              "${:comment} SELECT_CC PSEUDO!",
+                              "# SELECT_CC PSEUDO!",
                               [(set GRRegs:$dst,
                                  (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
 }
@@ -391,572 +355,564 @@ let usesCustomInserter = 1 in {
 //===----------------------------------------------------------------------===//
 
 // Three operand short
-defm ADD : F3R_2RUS<"add", add>;
-defm SUB : F3R_2RUS<"sub", sub>;
+defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>;
+defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>;
 let neverHasSideEffects = 1 in {
-defm EQ : F3R_2RUS_np<"eq">;
-def LSS_3r : F3R_np<"lss">;
-def LSU_3r : F3R_np<"lsu">;
+defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">;
+def LSS_3r : F3R_np<0b11000, "lss">;
+def LSU_3r : F3R_np<0b11001, "lsu">;
 }
-def AND_3r : F3R<"and", and>;
-def OR_3r : F3R<"or", or>;
+def AND_3r : F3R<0b00111, "and", and>;
+def OR_3r : F3R<0b01000, "or", or>;
 
 let mayLoad=1 in {
-def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "ldw $dst, $addr[$offset]",
-                  []>;
+def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst),
+                  (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldw $dst, $addr[$offset]", []>;
 
-def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
-                  "ldw $dst, $addr[$offset]",
-                  []>;
+def LDW_2rus : _F2RUS<0b00001, (outs GRRegs:$dst),
+                      (ins GRRegs:$addr, i32imm:$offset),
+                      "ldw $dst, $addr[$offset]", []>;
 
-def LD16S_3r :  _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "ld16s $dst, $addr[$offset]",
-                  []>;
+def LD16S_3r :  _F3R<0b10000, (outs GRRegs:$dst),
+                     (ins GRRegs:$addr, GRRegs:$offset),
+                     "ld16s $dst, $addr[$offset]", []>;
 
-def LD8U_3r :  _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "ld8u $dst, $addr[$offset]",
-                  []>;
+def LD8U_3r :  _F3R<0b10001, (outs GRRegs:$dst),
+                    (ins GRRegs:$addr, GRRegs:$offset),
+                    "ld8u $dst, $addr[$offset]", []>;
 }
 
 let mayStore=1 in {
-def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
-                  "stw $val, $addr[$offset]",
-                  []>;
+def STW_l3r : _FL3R<0b000001100, (outs),
+                    (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                    "stw $val, $addr[$offset]", []>;
 
-def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
-                  "stw $val, $addr[$offset]",
-                  []>;
+def STW_2rus : _F2RUS<0b0000, (outs),
+                      (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+                      "stw $val, $addr[$offset]", []>;
 }
 
-defm SHL : F3R_2RBITP<"shl", shl>;
-defm SHR : F3R_2RBITP<"shr", srl>;
-// TODO tsetr
+defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>;
+defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>;
+
+// The first operand is treated as an immediate since it refers to a register
+// number in another thread.
+def TSETR_3r : _F3RImm<0b10111, (outs), (ins i32imm:$a, GRRegs:$b, GRRegs:$c),
+                       "set t[$c]:r$a, $b", []>;
 
 // Three operand long
-def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "ldaw $dst, $addr[$offset]",
-                  [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst),
+                      (ins GRRegs:$addr, GRRegs:$offset),
+                      "ldaw $dst, $addr[$offset]",
+                      [(set GRRegs:$dst,
+                         (ldawf GRRegs:$addr, GRRegs:$offset))]>;
 
 let neverHasSideEffects = 1 in
-def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
-                    (ins GRRegs:$addr, i32imm:$offset),
-                    "ldaw $dst, $addr[$offset]",
-                    []>;
+def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst),
+                          (ins GRRegs:$addr, i32imm:$offset),
+                          "ldaw $dst, $addr[$offset]", []>;
 
-def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "ldaw $dst, $addr[-$offset]",
-                  [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst),
+                      (ins GRRegs:$addr, GRRegs:$offset),
+                      "ldaw $dst, $addr[-$offset]",
+                      [(set GRRegs:$dst,
+                         (ldawb GRRegs:$addr, GRRegs:$offset))]>;
 
 let neverHasSideEffects = 1 in
-def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
-                    (ins GRRegs:$addr, i32imm:$offset),
-                    "ldaw $dst, $addr[-$offset]",
-                    []>;
-
-def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "lda16 $dst, $addr[$offset]",
-                  [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
-
-def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
-                  "lda16 $dst, $addr[-$offset]",
-                  [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
-
-def MUL_l3r : FL3R<"mul", mul>;
+def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst),
+                         (ins GRRegs:$addr, i32imm:$offset),
+                         "ldaw $dst, $addr[-$offset]", []>;
+
+def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst),
+                       (ins GRRegs:$addr, GRRegs:$offset),
+                       "lda16 $dst, $addr[$offset]",
+                       [(set GRRegs:$dst,
+                          (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<0b001101100, (outs GRRegs:$dst),
+                       (ins GRRegs:$addr, GRRegs:$offset),
+                       "lda16 $dst, $addr[-$offset]",
+                       [(set GRRegs:$dst,
+                          (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<0b001111100, "mul", mul>;
 // Instructions which may trap are marked as side effecting.
 let hasSideEffects = 1 in {
-def DIVS_l3r : FL3R<"divs", sdiv>;
-def DIVU_l3r : FL3R<"divu", udiv>;
-def REMS_l3r : FL3R<"rems", srem>;
-def REMU_l3r : FL3R<"remu", urem>;
+def DIVS_l3r : FL3R<0b010001100, "divs", sdiv>;
+def DIVU_l3r : FL3R<0b010011100, "divu", udiv>;
+def REMS_l3r : FL3R<0b110001100, "rems", srem>;
+def REMU_l3r : FL3R<0b110011100, "remu", urem>;
 }
-def XOR_l3r : FL3R<"xor", xor>;
-defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+def XOR_l3r : FL3R<0b000011100, "xor", xor>;
+defm ASHR : FL3R_L2RBITP<0b000101100, 0b100101100, "ashr", sra>;
 
 let Constraints = "$src1 = $dst" in
-def CRC_l3r : _FL3R<(outs GRRegs:$dst),
-                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
-                     "crc32 $dst, $src2, $src3",
-                     [(set GRRegs:$dst,
-                        (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
-                                         GRRegs:$src3))]>;
+def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst),
+                          (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                          "crc32 $dst, $src2, $src3",
+                          [(set GRRegs:$dst,
+                             (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+                                              GRRegs:$src3))]>;
 
-// TODO inpw, outpw
 let mayStore=1 in {
-def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
-                "st16 $val, $addr[$offset]",
-                []>;
+def ST16_l3r : _FL3R<0b100001100, (outs),
+                     (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                     "st16 $val, $addr[$offset]", []>;
 
-def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
-                "st8 $val, $addr[$offset]",
-                []>;
+def ST8_l3r : _FL3R<0b100011100, (outs),
+                    (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                    "st8 $val, $addr[$offset]", []>;
 }
 
-// Four operand long
-let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
-def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
-                      GRRegs:$src4),
-                    "maccu $dst1, $dst2, $src3, $src4",
-                    []>;
+def INPW_l2rus : _FL2RUSBitp<0b100101110, (outs GRRegs:$a),
+                             (ins GRRegs:$b, i32imm:$c), "inpw $a, res[$b], $c",
+                             []>;
 
-def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
-                      GRRegs:$src4),
-                    "maccs $dst1, $dst2, $src3, $src4",
-                    []>;
+def OUTPW_l2rus : _FL2RUSBitp<0b100101101, (outs),
+                              (ins GRRegs:$a, GRRegs:$b, i32imm:$c),
+                              "outpw res[$b], $a, $c", []>;
+
+// Four operand long
+let Constraints = "$e = $a,$f = $b" in {
+def MACCU_l4r : _FL4RSrcDstSrcDst<
+  0b000001, (outs GRRegs:$a, GRRegs:$b),
+  (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccu $a, $b, $c, $d", []>;
+
+def MACCS_l4r : _FL4RSrcDstSrcDst<
+  0b000010, (outs GRRegs:$a, GRRegs:$b),
+  (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccs $a, $b, $c, $d", []>;
 }
 
-let Constraints = "$src1 = $dst1" in
-def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
-                    "crc8 $dst1, $dst2, $src2, $src3",
-                    []>;
+let Constraints = "$e = $b" in
+def CRC8_l4r : _FL4RSrcDst<0b000000, (outs GRRegs:$a, GRRegs:$b),
+                           (ins GRRegs:$e, GRRegs:$c, GRRegs:$d),
+                           "crc8 $b, $a, $c, $d", []>;
 
 // Five operand long
 
-def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
-                    "ladd $dst1, $dst2, $src1, $src2, $src3",
-                    []>;
+def LADD_l5r : _FL5R<0b000001, (outs GRRegs:$dst1, GRRegs:$dst2),
+                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                     "ladd $dst2, $dst1, $src1, $src2, $src3",
+                     []>;
 
-def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
-                    "lsub $dst1, $dst2, $src1, $src2, $src3",
-                    []>;
+def LSUB_l5r : _FL5R<0b000010, (outs GRRegs:$dst1, GRRegs:$dst2),
+                     (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                     "lsub $dst2, $dst1, $src1, $src2, $src3", []>;
 
-def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
-                    "ldiv $dst1, $dst2, $src1, $src2, $src3",
-                    []>;
+def LDIVU_l5r : _FL5R<0b000000, (outs GRRegs:$dst1, GRRegs:$dst2),
+                      (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                      "ldivu $dst1, $dst2, $src3, $src1, $src2", []>;
 
 // Six operand long
 
-def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
-                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
-                      GRRegs:$src4),
-                    "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
-                    []>;
+def LMUL_l6r : _FL6R<
+  0b00000, (outs GRRegs:$dst1, GRRegs:$dst2),
+  (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4),
+  "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>;
 
 // Register - U6
 
 //let Uses = [DP] in ...
 let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
-                    "ldaw $dst, dp[$a]",
-                    []>;
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+                      "ldaw $a, dp[$b]", []>;
 
 let isReMaterializable = 1 in                    
-def LDAWDP_lru6: _FLRU6<
-                    (outs GRRegs:$dst), (ins MEMii:$a),
-                    "ldaw $dst, dp[$a]",
-                    [(set GRRegs:$dst, ADDRdpii:$a)]>;
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+                        "ldaw $a, dp[$b]",
+                        [(set RRegs:$a, ADDRdpii:$b)]>;
 
 let mayLoad=1 in
-def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
-                    "ldw $dst, dp[$a]",
-                    []>;
-                    
-def LDWDP_lru6: _FLRU6<
-                    (outs GRRegs:$dst), (ins MEMii:$a),
-                    "ldw $dst, dp[$a]",
-                    [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+                     "ldw $a, dp[$b]", []>;
+
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+                       "ldw $a, dp[$b]",
+                       [(set RRegs:$a, (load ADDRdpii:$b))]>;
 
 let mayStore=1 in
-def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
-                  "stw $val, dp[$addr]",
-                  []>;
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+                      "stw $a, dp[$b]", []>;
 
-def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
-                  "stw $val, dp[$addr]",
-                  [(store GRRegs:$val, ADDRdpii:$addr)]>;
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+                        "stw $a, dp[$b]",
+                        [(store RRegs:$a, ADDRdpii:$b)]>;
 
 //let Uses = [CP] in ..
 let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
-defm LDWCP : FRU6_LRU6_cp<"ldw">;
+defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">;
 
 let Uses = [SP] in {
 let mayStore=1 in {
-def STWSP_ru6 : _FRU6<
-                 (outs), (ins GRRegs:$val, i32imm:$index),
-                 "stw $val, sp[$index]",
-                 [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
-
-def STWSP_lru6 : _FLRU6<
-                 (outs), (ins GRRegs:$val, i32imm:$index),
-                 "stw $val, sp[$index]",
-                 [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+def STWSP_ru6 : _FRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
+                      "stw $a, sp[$b]",
+                      [(XCoreStwsp RRegs:$a, immU6:$b)]>;
+
+def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins RRegs:$a, i32imm:$b),
+                        "stw $a, sp[$b]",
+                        [(XCoreStwsp RRegs:$a, immU16:$b)]>;
 }
 
 let mayLoad=1 in {
-def LDWSP_ru6 : _FRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$b),
-                 "ldw $dst, sp[$b]",
-                 []>;
+def LDWSP_ru6 : _FRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
+                      "ldw $a, sp[$b]", []>;
 
-def LDWSP_lru6 : _FLRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$b),
-                 "ldw $dst, sp[$b]",
-                 []>;
+def LDWSP_lru6 : _FLRU6<0b010111, (outs RRegs:$a), (ins i32imm:$b),
+                        "ldw $a, sp[$b]", []>;
 }
 
 let neverHasSideEffects = 1 in {
-def LDAWSP_ru6 : _FRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$b),
-                 "ldaw $dst, sp[$b]",
-                 []>;
+def LDAWSP_ru6 : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+                       "ldaw $a, sp[$b]", []>;
 
-def LDAWSP_lru6 : _FLRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$b),
-                 "ldaw $dst, sp[$b]",
-                 []>;
-
-def LDAWSP_ru6_RRegs : _FRU6<
-                 (outs RRegs:$dst), (ins i32imm:$b),
-                 "ldaw $dst, sp[$b]",
-                 []>;
-
-def LDAWSP_lru6_RRegs : _FLRU6<
-                 (outs RRegs:$dst), (ins i32imm:$b),
-                 "ldaw $dst, sp[$b]",
-                 []>;
+def LDAWSP_lru6 : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+                         "ldaw $a, sp[$b]", []>;
 }
 }
 
 let isReMaterializable = 1 in {
-def LDC_ru6 : _FRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$b),
-                 "ldc $dst, $b",
-                 [(set GRRegs:$dst, immU6:$b)]>;
-
-def LDC_lru6 : _FLRU6<
-                 (outs GRRegs:$dst), (ins i32imm:$b),
-                 "ldc $dst, $b",
-                 [(set GRRegs:$dst, immU16:$b)]>;
+def LDC_ru6 : _FRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+                    "ldc $a, $b", [(set RRegs:$a, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<0b011010, (outs RRegs:$a), (ins i32imm:$b),
+                      "ldc $a, $b", [(set RRegs:$a, immU16:$b)]>;
 }
 
-def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
-                  "setc res[$r], $val",
-                  [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+                     "setc res[$a], $b",
+                     [(int_xcore_setc GRRegs:$a, immU6:$b)]>;
 
-def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
-                  "setc res[$r], $val",
-                  [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+def SETC_lru6 : _FLRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+                       "setc res[$a], $b",
+                       [(int_xcore_setc GRRegs:$a, immU16:$b)]>;
 
 // Operand register - U6
 let isBranch = 1, isTerminator = 1 in {
-defm BRFT: FRU6_LRU6_branch<"bt">;
-defm BRBT: FRU6_LRU6_branch<"bt">;
-defm BRFF: FRU6_LRU6_branch<"bf">;
-defm BRBF: FRU6_LRU6_branch<"bf">;
+defm BRFT: FRU6_LRU6_branch<0b011100, "bt">;
+defm BRBT: FRU6_LRU6_backwards_branch<0b011101, "bt">;
+defm BRFF: FRU6_LRU6_branch<0b011110, "bf">;
+defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">;
 }
 
 // U6
 let Defs = [SP], Uses = [SP] in {
 let neverHasSideEffects = 1 in
-defm EXTSP : FU6_LU6_np<"extsp">;
+defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">;
+
 let mayStore = 1 in
-defm ENTSP : FU6_LU6_np<"entsp">;
+defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">;
 
 let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
-defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>;
 }
 }
 
-// TODO extdp, kentsp, krestsp, blat
-// getsr, kalli
+let neverHasSideEffects = 1 in
+defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">;
+
+let Uses = [R11], isCall=1 in
+defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
+
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def BRBU_u6 : _FU6<
-                 (outs),
-                 (ins brtarget:$target),
-                 "bu $target",
-                 []>;
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
 
-def BRBU_lu6 : _FLU6<
-                 (outs),
-                 (ins brtarget:$target),
-                 "bu $target",
-                 []>;
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
 
-def BRFU_u6 : _FU6<
-                 (outs),
-                 (ins brtarget:$target),
-                 "bu $target",
-                 []>;
+def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
 
-def BRFU_lu6 : _FLU6<
-                 (outs),
-                 (ins brtarget:$target),
-                 "bu $target",
-                 []>;
+def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
 }
 
 //let Uses = [CP] in ...
 let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
-                    "ldaw r11, cp[$a]",
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
                     []>;
 
 let Defs = [R11], isReMaterializable = 1 in
-def LDAWCP_lu6: _FLRU6<
-                    (outs), (ins MEMii:$a),
-                    "ldaw r11, cp[$a]",
-                    [(set R11, ADDRcpii:$a)]>;
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+                      [(set R11, ADDRcpii:$a)]>;
+
+let Defs = [R11] in
+defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
 
-defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>;
 
-defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>;
 
 // setsr may cause a branch if it is used to enable events. clrsr may
 // branch if it is executed while events are enabled.
-let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
-defm SETSR_branch : FU6_LU6_np<"setsr">;
-defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+    isCodeGenOnly = 1 in {
+defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">;
+defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">;
 }
 
+defm KCALL : FU6_LU6_np<0b0111001111, "kcall">;
+
+let Uses = [SP], Defs = [SP], mayStore = 1 in
+defm KENTSP : FU6_LU6_np<0b0111101110, "kentsp">;
+
+let Uses = [SP], Defs = [SP], mayLoad = 1 in
+defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
+
 // U10
-// TODO ldwcpl, blacp
 
 let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
-def LDAP_u10 : _FU10<
-                  (outs),
-                  (ins i32imm:$addr),
-                  "ldap r11, $addr",
-                  []>;
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
 
 let Defs = [R11], isReMaterializable = 1 in
-def LDAP_lu10 : _FLU10<
-                  (outs),
-                  (ins i32imm:$addr),
-                  "ldap r11, $addr",
-                  [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+                        [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
 
-let Defs = [R11], isReMaterializable = 1 in
-def LDAP_lu10_ba : _FLU10<(outs),
-                          (ins i32imm:$addr),
-                          "ldap r11, $addr",
-                          [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+                           [(set R11, (pcrelwrapper tblockaddress:$a))]>;
 
 let isCall=1,
 // All calls clobber the link register and the non-callee-saved registers:
 Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BL_u10 : _FU10<
-                  (outs), (ins calltarget:$target),
-                  "bl $target",
-                  [(XCoreBranchLink immU10:$target)]>;
-
-def BL_lu10 : _FLU10<
-                  (outs), (ins calltarget:$target),
-                  "bl $target",
-                  [(XCoreBranchLink immU20:$target)]>;
+def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+                     [(XCoreBranchLink immU10:$a)]>;
+
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+                       [(XCoreBranchLink immU20:$a)]>;
+}
+
+let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
+    neverHasSideEffects = 1 in {
+def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>;
+
+def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]",
+                        []>;
 }
 
 // Two operand short
-// TODO eet, eef, tsetmr
-def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
-                 "not $dst, $b",
-                 [(set GRRegs:$dst, (not GRRegs:$b))]>;
+def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b),
+                "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>;
 
-def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
-                 "neg $dst, $b",
-                 [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+def NEG : _F2R<0b100100, (outs GRRegs:$dst), (ins GRRegs:$b),
+                "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
 
 let Constraints = "$src1 = $dst" in {
-def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
-                      "sext $dst, $src2",
-                      [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
-                                                         immBitp:$src2))]>;
-
-def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
-                     "sext $dst, $src2",
-                     [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
-                                                        GRRegs:$src2))]>;
-
-def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
-                      "zext $dst, $src2",
-                      [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
-                                                         immBitp:$src2))]>;
-
-def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
-                     "zext $dst, $src2",
-                     [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
-                                                        GRRegs:$src2))]>;
-
-def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
-                 "andnot $dst, $src2",
-                 [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+def SEXT_rus :
+  _FRUSSrcDstBitp<0b001101, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+                  "sext $dst, $src2",
+                  [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1,
+                                                     immBitp:$src2))]>;
+
+def SEXT_2r :
+  _F2RSrcDst<0b001100, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+             "sext $dst, $src2",
+             [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ZEXT_rus :
+  _FRUSSrcDstBitp<0b010001, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+                  "zext $dst, $src2",
+                  [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1,
+                                                     immBitp:$src2))]>;
+
+def ZEXT_2r :
+  _F2RSrcDst<0b010000, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+             "zext $dst, $src2",
+             [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, GRRegs:$src2))]>;
+
+def ANDNOT_2r :
+  _F2RSrcDst<0b001010, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+             "andnot $dst, $src2",
+             [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
 }
 
 let isReMaterializable = 1, neverHasSideEffects = 1 in
-def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
-                 "mkmsk $dst, $size",
-                 []>;
+def MKMSK_rus : _FRUSBitp<0b101001, (outs GRRegs:$dst), (ins i32imm:$size),
+                          "mkmsk $dst, $size", []>;
 
-def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
-                 "mkmsk $dst, $size",
-                 [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
+def MKMSK_2r : _F2R<0b101000, (outs GRRegs:$dst), (ins GRRegs:$size),
+                    "mkmsk $dst, $size",
+                    [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
 
-def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
-                 "getr $dst, $type",
-                 [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+def GETR_rus : _FRUS<0b100000, (outs GRRegs:$dst), (ins i32imm:$type),
+                     "getr $dst, $type",
+                     [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
 
-def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
-                 "getts $dst, res[$r]",
-                 [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+def GETTS_2r : _F2R<0b001110, (outs GRRegs:$dst), (ins GRRegs:$r),
+                    "getts $dst, res[$r]",
+                    [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
 
-def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                 "setpt res[$r], $val",
-                 [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+def SETPT_2r : _FR2R<0b001111, (outs), (ins GRRegs:$r, GRRegs:$val),
+                     "setpt res[$r], $val",
+                     [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
 
-def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                 "outct res[$r], $val",
-                 [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+def OUTCT_2r : _F2R<0b010010, (outs), (ins GRRegs:$r, GRRegs:$val),
+                    "outct res[$r], $val",
+                    [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
 
-def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
-                 "outct res[$r], $val",
-                 [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+def OUTCT_rus : _FRUS<0b010011, (outs), (ins GRRegs:$r, i32imm:$val),
+                       "outct res[$r], $val",
+                       [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
 
-def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                 "outt res[$r], $val",
-                 [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+def OUTT_2r : _FR2R<0b000011, (outs), (ins GRRegs:$r, GRRegs:$val),
+                    "outt res[$r], $val",
+                    [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
 
-def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                 "out res[$r], $val",
-                 [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+def OUT_2r : _FR2R<0b101010, (outs), (ins GRRegs:$r, GRRegs:$val),
+                   "out res[$r], $val",
+                   [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
 
 let Constraints = "$src = $dst" in
-def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
-                 "outshr res[$r], $src",
-                 [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r,
-                                                      GRRegs:$src))]>;
+def OUTSHR_2r :
+  _F2RSrcDst<0b101011, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+             "outshr res[$r], $src",
+             [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
 
-def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
-                 "inct $dst, res[$r]",
-                 [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+def INCT_2r : _F2R<0b100001, (outs GRRegs:$dst), (ins GRRegs:$r),
+                   "inct $dst, res[$r]",
+                   [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
 
-def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
-                 "int $dst, res[$r]",
-                 [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+def INT_2r : _F2R<0b100011, (outs GRRegs:$dst), (ins GRRegs:$r),
+                  "int $dst, res[$r]",
+                  [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
 
-def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+def IN_2r : _F2R<0b101100, (outs GRRegs:$dst), (ins GRRegs:$r),
                  "in $dst, res[$r]",
                  [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
 
 let Constraints = "$src = $dst" in
-def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
-                 "inshr $dst, res[$r]",
-                 [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r,
-                                                     GRRegs:$src))]>;
+def INSHR_2r :
+  _F2RSrcDst<0b101101, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r),
+             "inshr $dst, res[$r]",
+             [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
 
-def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                 "chkct res[$r], $val",
-                 [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+def CHKCT_2r : _F2R<0b110010, (outs), (ins GRRegs:$r, GRRegs:$val),
+                    "chkct res[$r], $val",
+                    [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
 
-def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
-                 "chkct res[$r], $val",
-                 [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+def CHKCT_rus : _FRUSBitp<0b110011, (outs), (ins GRRegs:$r, i32imm:$val),
+                          "chkct res[$r], $val",
+                          [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
 
-def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+def TESTCT_2r : _F2R<0b101111, (outs GRRegs:$dst), (ins GRRegs:$src),
                      "testct $dst, res[$src]",
                      [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>;
 
-def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+def TESTWCT_2r : _F2R<0b110001, (outs GRRegs:$dst), (ins GRRegs:$src),
                       "testwct $dst, res[$src]",
                       [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>;
 
-def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                 "setd res[$r], $val",
-                 [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val),
+                    "setd res[$r], $val",
+                    [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
 
-def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+def SETPSC_2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+                      "setpsc res[$src1], $src2",
+                      [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
+def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r),
                     "getst $dst, res[$r]",
                     [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
 
-def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITSP_2r : _F2R<0b000100, (outs), (ins GRRegs:$src, GRRegs:$t),
                      "init t[$t]:sp, $src",
                      [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
 
-def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITPC_2r : _F2R<0b000000, (outs), (ins GRRegs:$src, GRRegs:$t),
                      "init t[$t]:pc, $src",
                      [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
 
-def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITCP_2r : _F2R<0b000110, (outs), (ins GRRegs:$src, GRRegs:$t),
                      "init t[$t]:cp, $src",
                      [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
 
-def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def INITDP_2r : _F2R<0b000010, (outs), (ins GRRegs:$src, GRRegs:$t),
                      "init t[$t]:dp, $src",
                      [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
 
+def PEEK_2r : _F2R<0b101110, (outs GRRegs:$dst), (ins GRRegs:$src),
+                    "peek $dst, res[$src]",
+                    [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+
+def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src),
+                     "endin $dst, res[$src]",
+                     [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+
+def EEF_2r : _F2R<0b001011, (outs), (ins GRRegs:$a, GRRegs:$b),
+                  "eef $a, res[$b]", []>;
+
+def EET_2r : _F2R<0b001001, (outs), (ins GRRegs:$a, GRRegs:$b),
+                  "eet $a, res[$b]", []>;
+
+def TSETMR_2r : _F2RImm<0b000111, (outs), (ins i32imm:$a, GRRegs:$b),
+                        "tsetmr r$a, $b", []>;
+
 // Two operand long
-// getd, testlcl
-def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
-                 "bitrev $dst, $src",
-                 [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+                       "bitrev $dst, $src",
+                       [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+                        "byterev $dst, $src",
+                        [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
 
-def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
-                 "byterev $dst, $src",
-                 [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
+                    "clz $dst, $src",
+                    [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
 
-def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
-                 "clz $dst, $src",
-                 [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+def GETD_l2r : _FL2R<0b0001111001, (outs GRRegs:$dst), (ins GRRegs:$src),
+                     "getd $dst, res[$src]", []>;
 
-def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                  "setc res[$r], $val",
-                  [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+def GETN_l2r : _FL2R<0b0011011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+                     "getn $dst, res[$src]", []>;
 
-def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
-                  "settw res[$r], $val",
-                  [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val),
+                     "setc res[$r], $val",
+                     [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
 
-def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
-                 "get $dst, ps[$src]",
-                 [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+def SETTW_l2r : _FLR2R<0b0010011001, (outs), (ins GRRegs:$r, GRRegs:$val),
+                       "settw res[$r], $val",
+                       [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
 
-def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
-                 "set ps[$src1], $src2",
-                 [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+def GETPS_l2r : _FL2R<0b0001011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+                      "get $dst, ps[$src]",
+                      [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
 
-def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+def SETPS_l2r : _FLR2R<0b0001111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+                       "set ps[$src1], $src2",
+                       [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<0b0001011000, (outs), (ins GRRegs:$src, GRRegs:$t),
                        "init t[$t]:lr, $src",
                        [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
 
-def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
-                       "setclk res[$src1], $src2",
-                       [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
-
-def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
-                       "setrdy res[$src1], $src2",
-                       [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+                        "setclk res[$src1], $src2",
+                        [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
 
-def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
-                       "setpsc res[$src1], $src2",
-                       [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+def SETN_l2r : _FLR2R<0b0011011000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+                      "setn res[$src1], $src2", []>;
 
-def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
-                      "peek $dst, res[$src]",
-                      [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>;
+def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+                        "setrdy res[$src1], $src2",
+                        [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
 
-def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
-                       "endin $dst, res[$src]",
-                       [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+def TESTLCL_l2r : _FL2R<0b0010011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+                        "testlcl $dst, res[$src]", []>;
 
 // One operand short
-// TODO edu, eeu, waitet, waitef, tstart, clrtp
-// setdp, setcp, setev, kcall
-// dgetreg
-def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
-                    "msync res[$i]",
-                    [(int_xcore_msync GRRegs:$i)]>;
-def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
-                    "mjoin res[$i]",
-                    [(int_xcore_mjoin GRRegs:$i)]>;
+def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a),
+                    "msync res[$a]",
+                    [(int_xcore_msync GRRegs:$a)]>;
+def MJOIN_1r : _F1R<0b000101, (outs), (ins GRRegs:$a),
+                    "mjoin res[$a]",
+                    [(int_xcore_mjoin GRRegs:$a)]>;
 
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
-def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
-                 "bau $addr",
-                 [(brind GRRegs:$addr)]>;
+def BAU_1r : _F1R<0b001001, (outs), (ins GRRegs:$a),
+                 "bau $a",
+                 [(brind GRRegs:$a)]>;
 
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
 def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
@@ -968,88 +924,150 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
                               "bru $i\n$t",
                               [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
 
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BRU_1r : _F1R<0b001010, (outs), (ins GRRegs:$a), "bru $a", []>;
+
 let Defs=[SP], neverHasSideEffects=1 in
-def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
-                 "set sp, $src",
-                 []>;
+def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>;
 
 let hasCtrlDep = 1 in 
-def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
-                 "ecallt $src",
+def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a),
+                 "ecallt $a",
                  []>;
 
 let hasCtrlDep = 1 in 
-def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
-                 "ecallf $src",
+def ECALLF_1r : _F1R<0b010010, (outs), (ins GRRegs:$a),
+                 "ecallf $a",
                  []>;
 
 let isCall=1, 
 // All calls clobber the link register and the non-callee-saved registers:
 Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BLA_1r : _F1R<(outs), (ins GRRegs:$addr),
-                 "bla $addr",
-                 [(XCoreBranchLink GRRegs:$addr)]>;
+def BLA_1r : _F1R<0b001000, (outs), (ins GRRegs:$a),
+                 "bla $a",
+                 [(XCoreBranchLink GRRegs:$a)]>;
 }
 
-def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
-                 "syncr res[$r]",
-                 [(int_xcore_syncr GRRegs:$r)]>;
+def SYNCR_1r : _F1R<0b100001, (outs), (ins GRRegs:$a),
+                 "syncr res[$a]",
+                 [(int_xcore_syncr GRRegs:$a)]>;
 
-def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
-               "freer res[$r]",
-               [(int_xcore_freer GRRegs:$r)]>;
+def FREER_1r : _F1R<0b000100, (outs), (ins GRRegs:$a),
+               "freer res[$a]",
+               [(int_xcore_freer GRRegs:$a)]>;
 
 let Uses=[R11] in {
-def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
-                   "setv res[$r], r11",
-                   [(int_xcore_setv GRRegs:$r, R11)]>;
+def SETV_1r : _F1R<0b010001, (outs), (ins GRRegs:$a),
+                   "setv res[$a], r11",
+                   [(int_xcore_setv GRRegs:$a, R11)]>;
 
-def SETEV_1r : _F1R<(outs), (ins GRRegs:$r),
-                    "setev res[$r], r11",
-                    [(int_xcore_setev GRRegs:$r, R11)]>;
+def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a),
+                    "setev res[$a], r11",
+                    [(int_xcore_setev GRRegs:$a, R11)]>;
 }
 
-def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
-               "eeu res[$r]",
-               [(int_xcore_eeu GRRegs:$r)]>;
+def DGETREG_1r : _F1R<0b001110, (outs GRRegs:$a), (ins), "dgetreg $a", []>;
+
+def EDU_1r : _F1R<0b000000, (outs), (ins GRRegs:$a), "edu res[$a]", []>;
+
+def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a),
+               "eeu res[$a]",
+               [(int_xcore_eeu GRRegs:$a)]>;
+
+def KCALL_1r : _F1R<0b010000, (outs), (ins GRRegs:$a), "kcall $a", []>;
+
+def WAITEF_1R : _F1R<0b000011, (outs), (ins GRRegs:$a), "waitef $a", []>;
+
+def WAITET_1R : _F1R<0b000010, (outs), (ins GRRegs:$a), "waitet $a", []>;
+
+def TSTART_1R : _F1R<0b000110, (outs), (ins GRRegs:$a), "start t[$a]", []>;
+
+def CLRPT_1R : _F1R<0b100000, (outs), (ins GRRegs:$a), "clrpt res[$a]", []>;
 
 // Zero operand short
-// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
-// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
-// dentsp, drestsp
 
-def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>;
+
+def DCALL_0R : _F0R<0b0000011100, (outs), (ins), "dcall", []>;
+
+let Defs = [SP], Uses = [SP] in
+def DENTSP_0R : _F0R<0b0001001100, (outs), (ins), "dentsp", []>;
+
+let Defs = [SP] in
+def DRESTSP_0R : _F0R<0b0001001101, (outs), (ins), "drestsp", []>;
+
+def DRET_0R : _F0R<0b0000011110, (outs), (ins), "dret", []>;
+
+def FREET_0R : _F0R<0b0000001111, (outs), (ins), "freet", []>;
 
 let Defs = [R11] in {
-def GETID_0R : _F0R<(outs), (ins),
+def GETID_0R : _F0R<0b0001001110, (outs), (ins),
                     "get r11, id",
                     [(set R11, (int_xcore_getid))]>;
 
-def GETED_0R : _F0R<(outs), (ins),
+def GETED_0R : _F0R<0b0000111110, (outs), (ins),
                     "get r11, ed",
                     [(set R11, (int_xcore_geted))]>;
 
-def GETET_0R : _F0R<(outs), (ins),
+def GETET_0R : _F0R<0b0000111111, (outs), (ins),
                     "get r11, et",
                     [(set R11, (int_xcore_getet))]>;
+
+def GETKEP_0R : _F0R<0b0001001111, (outs), (ins),
+                     "get r11, kep", []>;
+
+def GETKSP_0R : _F0R<0b0001011100, (outs), (ins),
+                     "get r11, ksp", []>;
+}
+
+let Defs = [SP] in
+def KRET_0R : _F0R<0b0000011101, (outs), (ins), "kret", []>;
+
+let Uses = [SP], mayLoad = 1 in {
+def LDET_0R : _F0R<0b0001011110, (outs), (ins), "ldw et, sp[4]", []>;
+
+def LDSED_0R : _F0R<0b0001011101, (outs), (ins), "ldw sed, sp[3]", []>;
+
+def LDSPC_0R : _F0R<0b0000101100, (outs), (ins), "ldw spc, sp[1]", []>;
+
+def LDSSR_0R : _F0R<0b0000101110, (outs), (ins), "ldw ssr, sp[2]", []>;
 }
 
-def SSYNC_0r : _F0R<(outs), (ins),
+let Uses=[R11] in
+def SETKEP_0R : _F0R<0b0000011111, (outs), (ins), "set kep, r11", []>;
+
+def SSYNC_0r : _F0R<0b0000001110, (outs), (ins),
                     "ssync",
                     [(int_xcore_ssync)]>;
 
+let Uses = [SP], mayStore = 1 in {
+def STET_0R : _F0R<0b0000111101, (outs), (ins), "stw et, sp[4]", []>;
+
+def STSED_0R : _F0R<0b0000111100, (outs), (ins), "stw sed, sp[3]", []>;
+
+def STSPC_0R : _F0R<0b0000101101, (outs), (ins), "stw spc, sp[1]", []>;
+
+def STSSR_0R : _F0R<0b0000101111, (outs), (ins), "stw ssr, sp[2]", []>;
+}
+
 let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
     hasSideEffects = 1 in
-def WAITEU_0R : _F0R<(outs), (ins),
-                 "waiteu",
-                 [(brind (int_xcore_waitevent))]>;
+def WAITEU_0R : _F0R<0b0000001100, (outs), (ins),
+                     "waiteu",
+                     [(brind (int_xcore_waitevent))]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
 
-def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
-def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BLRF_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BLRF_lu10 texternalsym:$addr)>;
 
 /// sext_inreg
 def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
@@ -1091,7 +1109,7 @@ def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
           (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
 
 def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
-          (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+          (STW_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
 def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
           (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
 def : Pat<(store GRRegs:$val, GRRegs:$addr),
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
new file mode 100644
index 000000000000..f96eda9fcb9f
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -0,0 +1,117 @@
+//===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower XCore MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+#include "XCoreMCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+XCoreMCInstLower::XCoreMCInstLower(class AsmPrinter &asmprinter)
+: Printer(asmprinter) {}
+
+void XCoreMCInstLower::Initialize(Mangler *M, MCContext *C) {
+  Mang = M;
+  Ctx = C;
+}
+
+MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                               MachineOperandType MOTy,
+                                               unsigned Offset) const {
+  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+  const MCSymbol *Symbol;
+
+  switch (MOTy) {
+    case MachineOperand::MO_MachineBasicBlock:
+      Symbol = MO.getMBB()->getSymbol();
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      Symbol = Mang->getSymbol(MO.getGlobal());
+      Offset += MO.getOffset();
+      break;
+    case MachineOperand::MO_BlockAddress:
+      Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+      Offset += MO.getOffset();
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+      Offset += MO.getOffset();
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      Symbol = Printer.GetJTISymbol(MO.getIndex());
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      Symbol = Printer.GetCPISymbol(MO.getIndex());
+      Offset += MO.getOffset();
+      break;
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+
+  const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+  if (!Offset)
+    return MCOperand::CreateExpr(MCSym);
+
+  // Assume offset is never negative.
+  assert(Offset > 0);
+
+  const MCConstantExpr *OffsetExpr =  MCConstantExpr::Create(Offset, *Ctx);
+  const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+  return MCOperand::CreateExpr(Add);
+}
+
+MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
+                                         unsigned offset) const {
+  MachineOperandType MOTy = MO.getType();
+
+  switch (MOTy) {
+    default: llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) break;
+      return MCOperand::CreateReg(MO.getReg());
+    case MachineOperand::MO_Immediate:
+      return MCOperand::CreateImm(MO.getImm() + offset);
+    case MachineOperand::MO_MachineBasicBlock:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+    case MachineOperand::MO_JumpTableIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_BlockAddress:
+      return LowerSymbolOperand(MO, MOTy, offset);
+    case MachineOperand::MO_RegisterMask:
+      break;
+  }
+
+  return MCOperand();
+}
+
+void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCOp = LowerOperand(MO);
+
+    if (MCOp.isValid())
+      OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
new file mode 100644
index 000000000000..28e702bb9884
--- /dev/null
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -0,0 +1,42 @@
+//===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCINSTLOWER_H
+#define XCOREMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MachineInstr;
+  class MachineFunction;
+  class Mangler;
+  class AsmPrinter;
+
+/// \brief This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY XCoreMCInstLower {
+  typedef MachineOperand::MachineOperandType MachineOperandType;
+  MCContext *Ctx;
+  Mangler *Mang;
+  AsmPrinter &Printer;
+public:
+  XCoreMCInstLower(class AsmPrinter &asmprinter);
+  void Initialize(Mangler *mang, MCContext *C);
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+  MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
+
+private:
+  MCOperand LowerSymbolOperand(const MachineOperand &MO,
+                               MachineOperandType MOTy, unsigned Offset) const;
+};
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index f869fcf26de3..69d5de3e03ad 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -14,8 +14,8 @@
 #ifndef XCOREMACHINEFUNCTIONINFO_H
 #define XCOREMACHINEFUNCTIONINFO_H
 
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include <vector>
 
 namespace llvm {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index be5855abcd0b..49b563497c0b 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -12,25 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCoreRegisterInfo.h"
-#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "XCoreGenRegisterInfo.inc"
@@ -101,72 +101,14 @@ XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
   return false;
 }
 
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void XCoreRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
-    // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
-    MachineInstr *Old = I;
-    uint64_t Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = TFI->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      assert(Amount%4 == 0);
-      Amount /= 4;
-
-      bool isU6 = isImmU6(Amount);
-      if (!isU6 && !isImmU16(Amount)) {
-        // FIX could emit multiple instructions in this case.
-#ifndef NDEBUG
-        errs() << "eliminateCallFramePseudoInstr size too big: "
-               << Amount << "\n";
-#endif
-        llvm_unreachable(0);
-      }
-
-      MachineInstr *New;
-      if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
-        int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
-        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
-          .addImm(Amount);
-      } else {
-        assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
-        int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
-        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
-          .addImm(Amount);
-      }
-
-      // Replace the pseudo instruction with a new instruction...
-      MBB.insert(I, New);
-    }
-  }
-  
-  MBB.erase(I);
-}
-
 void
 XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                       int SPAdj, RegScavenger *RS) const {
+                                       int SPAdj, unsigned FIOperandNum,
+                                       RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
   MachineInstr &MI = *II;
   DebugLoc dl = MI.getDebugLoc();
-  unsigned i = 0;
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  MachineOperand &FrameOp = MI.getOperand(i);
+  MachineOperand &FrameOp = MI.getOperand(FIOperandNum);
   int FrameIndex = FrameOp.getIndex();
 
   MachineFunction &MF = *MI.getParent()->getParent();
@@ -190,14 +132,14 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Special handling of DBG_VALUE instructions.
   if (MI.isDebugValue()) {
-    MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
-    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
     return;
   }
 
   // fold constant into offset.
-  Offset += MI.getOperand(i + 1).getImm();
-  MI.getOperand(i + 1).ChangeToImmediate(0);
+  Offset += MI.getOperand(FIOperandNum + 1).getImm();
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
   
   assert(Offset%4 == 0 && "Misaligned stack offset");
 
@@ -231,7 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::STWFI:
-        BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r))
               .addReg(Reg, getKillRegState(isKill))
               .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index c4dcb6b533c2..1db32489cf8d 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -54,12 +54,9 @@ public:
 
   bool useFPForScavengingIndex(const MachineFunction &MF) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
+                           int SPAdj, unsigned FIOperandNum,
+                           RegScavenger *RS = NULL) const;
 
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 9edfda1f5007..6694b2882aca 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -45,12 +45,15 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
 def GRRegs : RegisterClass<"XCore", [i32], 32,
   // Return values and arguments
   (add R0, R1, R2, R3,
-  // Not preserved across procedure calls
-  R11,
   // Callee save
-  R4, R5, R6, R7, R8, R9, R10)>;
+  R4, R5, R6, R7, R8, R9, R10,
+  // Not preserved across procedure calls
+  R11)>;
 
 // Reserved
-def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
+def RRegs : RegisterClass<"XCore", [i32], 32,
+  (add R0, R1, R2, R3,
+   R4, R5, R6, R7, R8, R9, R10,
+   R11, CP, DP, SP, LR)> {
   let isAllocatable = 0;
 }
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 8d0f254e087a..5ac4dbc4bc07 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef XCORESUBTARGET_H
 #define XCORESUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index d5a932c5189d..28c3d12c05fe 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -12,9 +12,9 @@
 
 #include "XCoreTargetMachine.h"
 #include "XCore.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
     InstrInfo(),
     FrameLowering(Subtarget),
     TLInfo(*this),
-    TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) {
+    TSInfo(*this) {
 }
 
 namespace {
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index c60c6a37f95b..eb9a1aa420eb 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,13 +15,12 @@
 #define XCORETARGETMACHINE_H
 
 #include "XCoreFrameLowering.h"
-#include "XCoreSubtarget.h"
-#include "XCoreInstrInfo.h"
 #include "XCoreISelLowering.h"
+#include "XCoreInstrInfo.h"
 #include "XCoreSelectionDAGInfo.h"
+#include "XCoreSubtarget.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
@@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreFrameLowering FrameLowering;
   XCoreTargetLowering TLInfo;
   XCoreSelectionDAGInfo TSInfo;
-  ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
 public:
   XCoreTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -56,12 +53,6 @@ public:
   virtual const TargetRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
-    return &STTI;
-  }
-  virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
-    return &VTTI;
-  }
   virtual const DataLayout       *getDataLayout() const { return &DL; }
 
   // Pass Pipeline Configuration
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 7f4e1c1b4fd7..820389935b38 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -11,8 +11,8 @@
 #include "XCoreSubtarget.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index de1353e6c12d..2bb6e9059094 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -5,3 +5,4 @@ add_subdirectory(Scalar)
 add_subdirectory(IPO)
 add_subdirectory(Vectorize)
 add_subdirectory(Hello)
+add_subdirectory(ObjCARC)
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index b0e22de8d7ed..9f2343b3b313 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hello"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
-#include "llvm/Function.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(HelloCounter, "Counts number of functions greeted");
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index be48b2063fb6..e6fa4edf612e 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -31,21 +31,21 @@
 
 #define DEBUG_TYPE "argpromotion"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include <set>
 using namespace llvm;
 
@@ -153,8 +153,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   SmallPtrSet<Argument*, 8> ArgsToPromote;
   SmallPtrSet<Argument*, 8> ByValArgsToTransform;
   for (unsigned i = 0; i != PointerArgs.size(); ++i) {
-    bool isByVal=F->getParamAttributes(PointerArgs[i].second+1).
-      hasAttribute(Attributes::ByVal);
+    bool isByVal=F->getAttributes().
+      hasAttribute(PointerArgs[i].second+1, Attribute::ByVal);
     Argument *PtrArg = PointerArgs[i].first;
     Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
 
@@ -511,17 +511,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // what the new GEP/Load instructions we are inserting look like.
   std::map<IndicesVector, LoadInst*> OriginalLoads;
 
-  // Attributes - Keep track of the parameter attributes for the arguments
+  // Attribute - Keep track of the parameter attributes for the arguments
   // that we are *not* promoting. For the ones that we do promote, the parameter
   // attributes are lost
-  SmallVector<AttributeWithIndex, 8> AttributesVec;
-  const AttrListPtr &PAL = F->getAttributes();
+  SmallVector<AttributeSet, 8> AttributesVec;
+  const AttributeSet &PAL = F->getAttributes();
 
   // Add any return attributes.
-  Attributes attrs = PAL.getRetAttributes();
-  if (attrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                                                    attrs));
+  if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+    AttributesVec.push_back(AttributeSet::get(F->getContext(),
+                                              PAL.getRetAttributes()));
 
   // First, determine the new argument list
   unsigned ArgIndex = 1;
@@ -537,9 +536,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     } else if (!ArgsToPromote.count(I)) {
       // Unchanged argument
       Params.push_back(I->getType());
-      Attributes attrs = PAL.getParamAttributes(ArgIndex);
-      if (attrs.hasAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+      AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
+      if (attrs.hasAttributes(ArgIndex)) {
+        AttrBuilder B(attrs, ArgIndex);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+      }
     } else if (I->use_empty()) {
       // Dead argument (which are always marked as promotable)
       ++NumArgumentsDead;
@@ -591,10 +593,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   }
 
   // Add any function attributes.
-  attrs = PAL.getFnAttributes();
-  if (attrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                    attrs));
+  if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+    AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
+                                              PAL.getFnAttributes()));
 
   Type *RetTy = FTy->getReturnType();
 
@@ -611,7 +612,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   
   // Recompute the parameter attributes list based on the new arguments for
   // the function.
-  NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec));
+  NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
   AttributesVec.clear();
 
   F->getParent()->getFunctionList().insert(F, NF);
@@ -636,13 +637,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     CallSite CS(F->use_back());
     assert(CS.getCalledFunction() == F);
     Instruction *Call = CS.getInstruction();
-    const AttrListPtr &CallPAL = CS.getAttributes();
+    const AttributeSet &CallPAL = CS.getAttributes();
 
     // Add any return attributes.
-    Attributes attrs = CallPAL.getRetAttributes();
-    if (attrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                                                      attrs));
+    if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
+      AttributesVec.push_back(AttributeSet::get(F->getContext(),
+                                                CallPAL.getRetAttributes()));
 
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
@@ -653,10 +653,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
         Args.push_back(*AI);          // Unmodified argument
 
-        Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
-        if (Attrs.hasAttributes())
-          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
-
+        if (CallPAL.hasAttributes(ArgIndex)) {
+          AttrBuilder B(CallPAL, ArgIndex);
+          AttributesVec.
+            push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+        }
       } else if (ByValArgsToTransform.count(I)) {
         // Emit a GEP and load for each element of the struct.
         Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -715,28 +716,29 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     // Push any varargs arguments on the list.
     for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
       Args.push_back(*AI);
-      Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
-      if (Attrs.hasAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+      if (CallPAL.hasAttributes(ArgIndex)) {
+        AttrBuilder B(CallPAL, ArgIndex);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+      }
     }
 
     // Add any function attributes.
-    attrs = CallPAL.getFnAttributes();
-    if (attrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                      attrs));
+    if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+      AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+                                                CallPAL.getFnAttributes()));
 
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
-      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(),
+      cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
                                                             AttributesVec));
     } else {
       New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
-      cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(),
+      cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
                                                           AttributesVec));
       if (cast<CallInst>(Call)->isTailCall())
         cast<CallInst>(New)->setTailCall();
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index e2f012657fdd..8336d3ad3479 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,15 +19,15 @@
 
 #define DEBUG_TYPE "constmerge"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 using namespace llvm;
 
 STATISTIC(NumMerged, "Number of global constants merged");
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4cfd0b235ab8..49ef1e75f1cd 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -19,23 +19,23 @@
 
 #define DEBUG_TYPE "deadargelim"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constant.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/DIBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include <map>
 #include <set>
 using namespace llvm;
@@ -271,16 +271,15 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
     Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
 
     // Drop any attributes that were on the vararg arguments.
-    AttrListPtr PAL = CS.getAttributes();
-    if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
-      SmallVector<AttributeWithIndex, 8> AttributesVec;
-      for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
-        AttributesVec.push_back(PAL.getSlot(i));
-      Attributes FnAttrs = PAL.getFnAttributes();
-      if (FnAttrs.hasAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                        FnAttrs));
-      PAL = AttrListPtr::get(Fn.getContext(), AttributesVec);
+    AttributeSet PAL = CS.getAttributes();
+    if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
+      SmallVector<AttributeSet, 8> AttributesVec;
+      for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
+        AttributesVec.push_back(PAL.getSlotAttributes(i));
+      if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+        AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
+                                                  PAL.getFnAttributes()));
+      PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
     }
 
     Instruction *New;
@@ -351,7 +350,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
   if (Fn.use_empty())
     return false;
 
-  llvm::SmallVector<unsigned, 8> UnusedArgs;
+  SmallVector<unsigned, 8> UnusedArgs;
   for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); 
        I != E; ++I) {
     Argument *Arg = I;
@@ -697,15 +696,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   std::vector<Type*> Params;
 
   // Set up to build a new list of parameter attributes.
-  SmallVector<AttributeWithIndex, 8> AttributesVec;
-  const AttrListPtr &PAL = F->getAttributes();
-
-  // The existing function return attributes.
-  Attributes RAttrs = PAL.getRetAttributes();
-  Attributes FnAttrs = PAL.getFnAttributes();
+  SmallVector<AttributeSet, 8> AttributesVec;
+  const AttributeSet &PAL = F->getAttributes();
 
   // Find out the new return value.
-
   Type *RetTy = FTy->getReturnType();
   Type *NRetTy = NULL;
   unsigned RetCount = NumRetVals(F);
@@ -759,22 +753,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
   assert(NRetTy && "No new return type found?");
 
+  // The existing function return attributes.
+  AttributeSet RAttrs = PAL.getRetAttributes();
+
   // Remove any incompatible attributes, but only if we removed all return
   // values. Otherwise, ensure that we don't have any conflicting attributes
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
   if (NRetTy->isVoidTy())
     RAttrs =
-      Attributes::get(NRetTy->getContext(), AttrBuilder(RAttrs).
-                      removeAttributes(Attributes::typeIncompatible(NRetTy)));
+      AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex,
+                        AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+         removeAttributes(AttributeFuncs::
+                          typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
+                          AttributeSet::ReturnIndex));
   else
-    assert(!AttrBuilder(RAttrs).
-             hasAttributes(Attributes::typeIncompatible(NRetTy)) &&
+    assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+             hasAttributes(AttributeFuncs::
+                           typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
+                           AttributeSet::ReturnIndex) &&
            "Return attributes no longer compatible?");
 
-  if (RAttrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                                                    RAttrs));
+  if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+    AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
 
   // Remember which arguments are still alive.
   SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -791,9 +792,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
       // Get the original parameter attributes (skipping the first one, that is
       // for the return value.
-      Attributes Attrs = PAL.getParamAttributes(i + 1);
-      if (Attrs.hasAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+      if (PAL.hasAttributes(i + 1)) {
+        AttrBuilder B(PAL, i + 1);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+      }
     } else {
       ++NumArgumentsEliminated;
       DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
@@ -801,12 +804,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     }
   }
 
-  if (FnAttrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                    FnAttrs));
+  if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+    AttributesVec.push_back(AttributeSet::get(F->getContext(),
+                                              PAL.getFnAttributes()));
 
   // Reconstruct the AttributesList based on the vector we constructed.
-  AttrListPtr NewPAL = AttrListPtr::get(F->getContext(), AttributesVec);
+  AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
 
   // Create the new function type based on the recomputed parameters.
   FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -833,18 +836,21 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Instruction *Call = CS.getInstruction();
 
     AttributesVec.clear();
-    const AttrListPtr &CallPAL = CS.getAttributes();
+    const AttributeSet &CallPAL = CS.getAttributes();
 
     // The call return attributes.
-    Attributes RAttrs = CallPAL.getRetAttributes();
-    Attributes FnAttrs = CallPAL.getFnAttributes();
+    AttributeSet RAttrs = CallPAL.getRetAttributes();
+
     // Adjust in case the function was changed to return void.
     RAttrs =
-      Attributes::get(NF->getContext(), AttrBuilder(RAttrs).
-           removeAttributes(Attributes::typeIncompatible(NF->getReturnType())));
-    if (RAttrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                                                      RAttrs));
+      AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex,
+                        AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+        removeAttributes(AttributeFuncs::
+                         typeIncompatible(NF->getReturnType(),
+                                          AttributeSet::ReturnIndex),
+                         AttributeSet::ReturnIndex));
+    if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+      AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
 
     // Declare these outside of the loops, so we can reuse them for the second
     // loop, which loops the varargs.
@@ -856,25 +862,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ArgAlive[i]) {
         Args.push_back(*I);
         // Get original parameter attributes, but skip return attributes.
-        Attributes Attrs = CallPAL.getParamAttributes(i + 1);
-        if (Attrs.hasAttributes())
-          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+        if (CallPAL.hasAttributes(i + 1)) {
+          AttrBuilder B(CallPAL, i + 1);
+          AttributesVec.
+            push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+        }
       }
 
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
       Args.push_back(*I);
-      Attributes Attrs = CallPAL.getParamAttributes(i + 1);
-      if (Attrs.hasAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+      if (CallPAL.hasAttributes(i + 1)) {
+        AttrBuilder B(CallPAL, i + 1);
+        AttributesVec.
+          push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+      }
     }
 
-    if (FnAttrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                      FnAttrs));
+    if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+      AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+                                                CallPAL.getFnAttributes()));
 
     // Reconstruct the AttributesList based on the vector we constructed.
-    AttrListPtr NewCallPAL = AttrListPtr::get(F->getContext(), AttributesVec);
+    AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
 
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 6716deb9e47b..fa3d72ddcf16 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Constants.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -60,7 +60,7 @@ namespace {
             continue;
         }
 
-        bool Local = I->hasLocalLinkage();
+        bool Local = I->isDiscardableIfUnused();
         if (Local)
           I->setVisibility(GlobalValue::HiddenVisibility);
 
@@ -80,7 +80,7 @@ namespace {
             continue;
         }
 
-        bool Local = I->hasLocalLinkage();
+        bool Local = I->isDiscardableIfUnused();
         if (Local)
           I->setVisibility(GlobalValue::HiddenVisibility);
 
@@ -97,7 +97,7 @@ namespace {
         Module::alias_iterator CurI = I;
         ++I;
 
-        if (CurI->hasLocalLinkage()) {
+        if (CurI->isDiscardableIfUnused()) {
           CurI->setVisibility(GlobalValue::HiddenVisibility);
           CurI->setLinkage(GlobalValue::ExternalLinkage);
         }
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 18409f77b3fa..bc5109b4d48d 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,4 +1,4 @@
-//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,30 +14,34 @@
 // to the function does not create any copies of the pointer value that
 // outlive the call.  This more or less means that the pointer is only
 // dereferenced, and not returned from the function or stored in a global.
+// Finally, well-known library call declarations are marked with all
+// attributes that are consistent with the function's standard definition.
 // This pass is implemented as a bottom-up traversal of the call-graph.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "functionattrs"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 STATISTIC(NumReadNone, "Number of functions marked readnone");
 STATISTIC(NumReadOnly, "Number of functions marked readonly");
 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
 STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 
 namespace {
   struct FunctionAttrs : public CallGraphSCCPass {
@@ -62,14 +66,63 @@ namespace {
     // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
     bool AddNoAliasAttrs(const CallGraphSCC &SCC);
 
+    // Utility methods used by inferPrototypeAttributes to add attributes
+    // and maintain annotation statistics.
+
+    void setDoesNotAccessMemory(Function &F) {
+      if (!F.doesNotAccessMemory()) {
+	F.setDoesNotAccessMemory();
+	++NumAnnotated;
+      }
+    }
+
+    void setOnlyReadsMemory(Function &F) {
+      if (!F.onlyReadsMemory()) {
+	F.setOnlyReadsMemory();
+	++NumAnnotated;
+      }
+    }
+
+    void setDoesNotThrow(Function &F) {
+      if (!F.doesNotThrow()) {
+	F.setDoesNotThrow();
+	++NumAnnotated;
+      }
+    }
+
+    void setDoesNotCapture(Function &F, unsigned n) {
+      if (!F.doesNotCapture(n)) {
+	F.setDoesNotCapture(n);
+	++NumAnnotated;
+      }
+    }
+
+    void setDoesNotAlias(Function &F, unsigned n) {
+      if (!F.doesNotAlias(n)) {
+	F.setDoesNotAlias(n);
+	++NumAnnotated;
+      }
+    }
+
+    // inferPrototypeAttributes - Analyze the name and prototype of the
+    // given function and set any applicable attributes.  Returns true
+    // if any attributes were set and false otherwise.
+    bool inferPrototypeAttributes(Function &F);
+
+    // annotateLibraryCalls - Adds attributes to well-known standard library
+    // call declarations.
+    bool annotateLibraryCalls(const CallGraphSCC &SCC);
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
   private:
     AliasAnalysis *AA;
+    TargetLibraryInfo *TLI;
   };
 }
 
@@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 
@@ -213,16 +267,15 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
 
     // Clear out any existing attributes.
     AttrBuilder B;
-    B.addAttribute(Attributes::ReadOnly)
-      .addAttribute(Attributes::ReadNone);
-    F->removeAttribute(AttrListPtr::FunctionIndex,
-                       Attributes::get(F->getContext(), B));
+    B.addAttribute(Attribute::ReadOnly)
+      .addAttribute(Attribute::ReadNone);
+    F->removeAttributes(AttributeSet::FunctionIndex,
+                        AttributeSet::get(F->getContext(),
+                                          AttributeSet::FunctionIndex, B));
 
     // Add in the new attribute.
-    B.clear();
-    B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
-    F->addAttribute(AttrListPtr::FunctionIndex,
-                    Attributes::get(F->getContext(), B));
+    F->addAttribute(AttributeSet::FunctionIndex,
+                    ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
 
     if (ReadsMemory)
       ++NumReadOnly;
@@ -358,7 +411,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
   ArgumentGraph AG;
 
   AttrBuilder B;
-  B.addAttribute(Attributes::NoCapture);
+  B.addAttribute(Attribute::NoCapture);
 
   // Check each function in turn, determining which pointer arguments are not
   // captured.
@@ -381,7 +434,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
       for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
            A != E; ++A) {
         if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
-          A->addAttr(Attributes::get(F->getContext(), B));
+          A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
           ++NumNoCapture;
           Changed = true;
         }
@@ -396,7 +449,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
         if (!Tracker.Captured) {
           if (Tracker.Uses.empty()) {
             // If it's trivially not captured, mark it nocapture now.
-            A->addAttr(Attributes::get(F->getContext(), B));
+            A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
             ++NumNoCapture;
             Changed = true;
           } else {
@@ -431,7 +484,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
           ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
         ArgumentSCC[0]->
           Definition->
-          addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B));
+          addAttr(AttributeSet::get(ArgumentSCC[0]->Definition->getContext(),
+                                    ArgumentSCC[0]->Definition->getArgNo() + 1,
+                                    B));
         ++NumNoCapture;
         Changed = true;
       }
@@ -473,7 +528,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
 
     for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
       Argument *A = ArgumentSCC[i]->Definition;
-      A->addAttr(Attributes::get(A->getContext(), B));
+      A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
       ++NumNoCapture;
       Changed = true;
     }
@@ -530,7 +585,7 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
         case Instruction::Call:
         case Instruction::Invoke: {
           CallSite CS(RVI);
-          if (CS.paramHasAttr(0, Attributes::NoAlias))
+          if (CS.paramHasAttr(0, Attribute::NoAlias))
             break;
           if (CS.getCalledFunction() &&
               SCCNodes.count(CS.getCalledFunction()))
@@ -597,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
   return MadeChange;
 }
 
+/// inferPrototypeAttributes - Analyze the name and prototype of the
+/// given function and set any applicable attributes.  Returns true
+/// if any attributes were set and false otherwise.
+bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
+  FunctionType *FTy = F.getFunctionType();
+  LibFunc::Func TheLibFunc;
+  if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
+    return false;
+
+  switch (TheLibFunc) {
+  case LibFunc::strlen:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::strchr:
+  case LibFunc::strrchr:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isIntegerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::strcpy:
+  case LibFunc::stpcpy:
+  case LibFunc::strcat:
+  case LibFunc::strtol:
+  case LibFunc::strtod:
+  case LibFunc::strtof:
+  case LibFunc::strtoul:
+  case LibFunc::strtoll:
+  case LibFunc::strtold:
+  case LibFunc::strncat:
+  case LibFunc::strncpy:
+  case LibFunc::stpncpy:
+  case LibFunc::strtoull:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strxfrm:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strcmp:
+  case LibFunc::strspn:
+  case LibFunc::strncmp:
+  case LibFunc::strcspn:
+  case LibFunc::strcoll:
+  case LibFunc::strcasecmp:
+  case LibFunc::strncasecmp:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strstr:
+  case LibFunc::strpbrk:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::strtok:
+  case LibFunc::strtok_r:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::scanf:
+  case LibFunc::setbuf:
+  case LibFunc::setvbuf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::strdup:
+  case LibFunc::strndup:
+    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::stat:
+  case LibFunc::sscanf:
+  case LibFunc::sprintf:
+  case LibFunc::statvfs:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::snprintf:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 3);
+    break;
+  case LibFunc::setitimer:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    setDoesNotCapture(F, 3);
+    break;
+  case LibFunc::system:
+    if (FTy->getNumParams() != 1 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "system" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::malloc:
+    if (FTy->getNumParams() != 1 ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::memcmp:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::memchr:
+  case LibFunc::memrchr:
+    if (FTy->getNumParams() != 3)
+      return false;
+    setOnlyReadsMemory(F);
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::modf:
+  case LibFunc::modff:
+  case LibFunc::modfl:
+  case LibFunc::memcpy:
+  case LibFunc::memccpy:
+  case LibFunc::memmove:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::memalign:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::mkdir:
+  case LibFunc::mktime:
+    if (FTy->getNumParams() == 0 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::realloc:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::read:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "read" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::rmdir:
+  case LibFunc::rewind:
+  case LibFunc::remove:
+  case LibFunc::realpath:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::rename:
+  case LibFunc::readlink:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::write:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "write" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::bcopy:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::bcmp:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setOnlyReadsMemory(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::bzero:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::calloc:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::chmod:
+  case LibFunc::chown:
+  case LibFunc::ctermid:
+  case LibFunc::clearerr:
+  case LibFunc::closedir:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::atoi:
+  case LibFunc::atol:
+  case LibFunc::atof:
+  case LibFunc::atoll:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setOnlyReadsMemory(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::access:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::fopen:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fdopen:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::feof:
+  case LibFunc::free:
+  case LibFunc::fseek:
+  case LibFunc::ftell:
+  case LibFunc::fgetc:
+  case LibFunc::fseeko:
+  case LibFunc::ftello:
+  case LibFunc::fileno:
+  case LibFunc::fflush:
+  case LibFunc::fclose:
+  case LibFunc::fsetpos:
+  case LibFunc::flockfile:
+  case LibFunc::funlockfile:
+  case LibFunc::ftrylockfile:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::ferror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setOnlyReadsMemory(F);
+    break;
+  case LibFunc::fputc:
+  case LibFunc::fstat:
+  case LibFunc::frexp:
+  case LibFunc::frexpf:
+  case LibFunc::frexpl:
+  case LibFunc::fstatvfs:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fgets:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 3);
+  case LibFunc::fread:
+  case LibFunc::fwrite:
+    if (FTy->getNumParams() != 4 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(3)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 4);
+  case LibFunc::fputs:
+  case LibFunc::fscanf:
+  case LibFunc::fprintf:
+  case LibFunc::fgetpos:
+    if (FTy->getNumParams() < 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::getc:
+  case LibFunc::getlogin_r:
+  case LibFunc::getc_unlocked:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::getenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setOnlyReadsMemory(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::gets:
+  case LibFunc::getchar:
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::getitimer:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::getpwnam:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::ungetc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::uname:
+  case LibFunc::unlink:
+  case LibFunc::unsetenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::utime:
+  case LibFunc::utimes:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::puts:
+  case LibFunc::printf:
+  case LibFunc::perror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::pread:
+  case LibFunc::pwrite:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; these are valid pthread cancellation points.
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::putchar:
+    setDoesNotThrow(F);
+    break;
+  case LibFunc::popen:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::pclose:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::vscanf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::vsscanf:
+  case LibFunc::vfscanf:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::valloc:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::vprintf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::vfprintf:
+  case LibFunc::vsprintf:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::vsnprintf:
+    if (FTy->getNumParams() != 4 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 3);
+    break;
+  case LibFunc::open:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::opendir:
+    if (FTy->getNumParams() != 1 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::tmpfile:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::times:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::htonl:
+  case LibFunc::htons:
+  case LibFunc::ntohl:
+  case LibFunc::ntohs:
+    setDoesNotThrow(F);
+    setDoesNotAccessMemory(F);
+    break;
+  case LibFunc::lstat:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::lchown:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::qsort:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+      return false;
+    // May throw; places call through function pointer.
+    setDoesNotCapture(F, 4);
+    break;
+  case LibFunc::dunder_strdup:
+  case LibFunc::dunder_strndup:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::dunder_strtok_r:
+    if (FTy->getNumParams() != 3 ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::under_IO_getc:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::under_IO_putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::dunder_isoc99_scanf:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::stat64:
+  case LibFunc::lstat64:
+  case LibFunc::statvfs64:
+  case LibFunc::dunder_isoc99_sscanf:
+    if (FTy->getNumParams() < 1 ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fopen64:
+    if (FTy->getNumParams() != 2 ||
+        !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    setDoesNotCapture(F, 1);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::fseeko64:
+  case LibFunc::ftello64:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 1);
+    break;
+  case LibFunc::tmpfile64:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotAlias(F, 0);
+    break;
+  case LibFunc::fstat64:
+  case LibFunc::fstatvfs64:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    setDoesNotThrow(F);
+    setDoesNotCapture(F, 2);
+    break;
+  case LibFunc::open64:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    setDoesNotCapture(F, 1);
+    break;
+  default:
+    // Didn't mark any attributes.
+    return false;
+  }
+
+  return true;
+}
+
+/// annotateLibraryCalls - Adds attributes to well-known standard library
+/// call declarations.
+bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+  bool MadeChange = false;
+
+  // Check each function in turn annotating well-known library function
+  // declarations with attributes.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F != 0 && F->isDeclaration())
+      MadeChange |= inferPrototypeAttributes(*F);
+  }
+
+  return MadeChange;
+}
+
 bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
   AA = &getAnalysis<AliasAnalysis>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
 
-  bool Changed = AddReadAttrs(SCC);
+  bool Changed = annotateLibraryCalls(SCC);
+  Changed |= AddReadAttrs(SCC);
   Changed |= AddNoCaptureAttrs(SCC);
   Changed |= AddNoAliasAttrs(SCC);
   return Changed;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 18c1c7b00051..dc99492990a3 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -17,11 +17,11 @@
 
 #define DEBUG_TYPE "globaldce"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 using namespace llvm;
 
 STATISTIC(NumAliases  , "Number of global aliases removed");
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 591278fa62c8..b035a821b4cf 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -15,29 +15,29 @@
 
 #define DEBUG_TYPE "globalopt"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -148,17 +148,13 @@ struct GlobalStatus {
   /// an instruction (e.g. a constant expr or GV initializer).
   bool HasNonInstructionUser;
 
-  /// HasPHIUser - Set to true if this global has a user that is a PHI node.
-  bool HasPHIUser;
-
   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
   AtomicOrdering Ordering;
 
   GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
                    StoredOnceValue(0), AccessingFunction(0),
                    HasMultipleAccessingFunctions(false),
-                   HasNonInstructionUser(false), HasPHIUser(false),
-                   Ordering(NotAtomic) {}
+                   HasNonInstructionUser(false), Ordering(NotAtomic) {}
 };
 
 }
@@ -200,11 +196,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
     const User *U = *UI;
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
       GS.HasNonInstructionUser = true;
-      
+
       // If the result of the constantexpr isn't pointer type, then we won't
       // know to expect it in various places.  Just reject early.
       if (!isa<PointerType>(CE->getType())) return true;
-      
+
       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
       if (!GS.HasMultipleAccessingFunctions) {
@@ -274,7 +270,6 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
         // have to be careful about infinite recursion.
         if (PHIUsers.insert(PN))  // Not already visited.
           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
-        GS.HasPHIUser = true;
       } else if (isa<CmpInst>(I)) {
         GS.isCompared = true;
       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
@@ -453,8 +448,8 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
       Dead[i].second->eraseFromParent();
       Instruction *I = Dead[i].first;
       do {
-	if (isAllocationFn(I, TLI))
-	  break;
+        if (isAllocationFn(I, TLI))
+          break;
         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
         if (!J)
           break;
@@ -475,8 +470,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
                                        DataLayout *TD, TargetLibraryInfo *TLI) {
   bool Changed = false;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
-    User *U = *UI++;
+  SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+  while (!WorkList.empty()) {
+    User *U = WorkList.pop_back_val();
 
     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (Init) {
@@ -539,7 +535,6 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
       // us, and if they are all dead, nuke them without remorse.
       if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
-        // This could have invalidated UI, start over from scratch.
         CleanupConstantGlobalUsers(V, Init, TD, TLI);
         return true;
       }
@@ -1830,7 +1825,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
                                              GlobalValue::InternalLinkage,
                                         ConstantInt::getFalse(GV->getContext()),
                                              GV->getName()+".b",
-                                             GV->getThreadLocalMode());
+                                             GV->getThreadLocalMode(),
+                                             GV->getType()->getAddressSpace());
   GV->getParent()->getGlobalList().insert(GV, NewGV);
 
   Constant *InitVal = GV->getInitializer();
@@ -1850,10 +1846,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
       bool StoringOther = SI->getOperand(0) == OtherVal;
       // Only do this if we weren't storing a loaded value.
       Value *StoreVal;
-      if (StoringOther || SI->getOperand(0) == InitVal)
+      if (StoringOther || SI->getOperand(0) == InitVal) {
         StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
                                     StoringOther);
-      else {
+      } else {
         // Otherwise, we are storing a previously loaded copy.  To do this,
         // change the copy from copying the original value to just copying the
         // bool.
@@ -1892,6 +1888,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
     UI->eraseFromParent();
   }
 
+  // Retain the name of the old global variable. People who are debugging their
+  // programs may expect these variables to be named the same.
+  NewGV->takeName(GV);
   GV->eraseFromParent();
   return true;
 }
@@ -1994,7 +1993,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     return Changed;
 
   } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
-    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
     GV->setConstant(true);
 
     // Clean up any obviously simplifiable users now.
@@ -2070,14 +2069,14 @@ static void ChangeCalleesToFastCall(Function *F) {
   }
 }
 
-static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) {
+static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
-    if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest))
+    unsigned Index = Attrs.getSlotIndex(i);
+    if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
       continue;
 
     // There can be only one.
-    return Attrs.removeAttr(C, Attrs.getSlot(i).Index,
-                            Attributes::get(C, Attributes::Nest));
+    return Attrs.removeAttribute(C, Index, Attribute::Nest);
   }
 
   return Attrs;
@@ -2118,7 +2117,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
         Changed = true;
       }
 
-      if (F->getAttributes().hasAttrSomewhere(Attributes::Nest) &&
+      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
           !F->hasAddressTaken()) {
         // The function is not used by a trampoline intrinsic, so it is safe
         // to remove the 'nest' attribute.
@@ -2157,7 +2156,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   if (GV == 0) return 0;
-  
+
   // Verify that the initializer is simple enough for us to handle. We are
   // only allowed to optimize the initializer if it is unique.
   if (!GV->hasUniqueInitializer()) return 0;
@@ -2263,7 +2262,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
 }
 
 
-static inline bool 
+static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
                             SmallPtrSet<Constant*, 8> &SimpleConstants,
                             const DataLayout *TD);
@@ -2285,7 +2284,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
       isa<GlobalValue>(C))
     return true;
-  
+
   // Aggregate values are safe if all their elements are.
   if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
       isa<ConstantVector>(C)) {
@@ -2296,7 +2295,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
     }
     return true;
   }
-  
+
   // We don't know exactly what relocations are allowed in constant expressions,
   // so we allow &global+constantoffset, which is safe and uniformly supported
   // across targets.
@@ -2314,14 +2313,14 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
                TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
       return false;
     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
-      
+
   // GEP is fine if it is simple + constant offset.
   case Instruction::GetElementPtr:
     for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
       if (!isa<ConstantInt>(CE->getOperand(i)))
         return false;
     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
-      
+
   case Instruction::Add:
     // We allow simple+cst.
     if (!isa<ConstantInt>(CE->getOperand(1)))
@@ -2331,7 +2330,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   return false;
 }
 
-static inline bool 
+static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
                             SmallPtrSet<Constant*, 8> &SimpleConstants,
                             const DataLayout *TD) {
@@ -2379,7 +2378,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
         return false;
 
       return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
-    
+
     // A constantexpr bitcast from a pointer to another pointer is a no-op,
     // and we know how to evaluate it by moving the bitcast from the pointer
     // operand to the value operand.
@@ -2390,7 +2389,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
       return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
     }
   }
-  
+
   return false;
 }
 
@@ -2420,7 +2419,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     // Return the modified struct.
     return ConstantStruct::get(STy, Elts);
   }
-  
+
   ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
   SequentialType *InitTy = cast<SequentialType>(Init->getType());
 
@@ -2589,31 +2588,45 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
   while (1) {
     Constant *InstResult = 0;
 
+    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
-      if (!SI->isSimple()) return false;  // no volatile/atomic accesses.
+      if (!SI->isSimple()) {
+        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+        return false;  // no volatile/atomic accesses.
+      }
       Constant *Ptr = getVal(SI->getOperand(1));
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
-      if (!isSimpleEnoughPointerToCommit(Ptr))
+        DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+      }
+      if (!isSimpleEnoughPointerToCommit(Ptr)) {
         // If this is too complex for us to commit, reject it.
+        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
         return false;
-      
+      }
+
       Constant *Val = getVal(SI->getOperand(0));
 
       // If this might be too difficult for the backend to handle (e.g. the addr
       // of one global variable divided by another) then we can't commit it.
-      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
+      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
+        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+              << "\n");
         return false;
-        
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+      }
+
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
         if (CE->getOpcode() == Instruction::BitCast) {
+          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
           // If we're evaluating a store through a bitcast, then we need
           // to pull the bitcast off the pointer type and push it onto the
           // stored value.
           Ptr = CE->getOperand(0);
-          
+
           Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
-          
+
           // In order to push the bitcast onto the stored value, a bitcast
           // from NewTy to Val's type must be legal.  If it's not, we can try
           // introspecting NewTy to find a legal conversion.
@@ -2635,32 +2648,45 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
             // If we can't improve the situation by introspecting NewTy,
             // we have to give up.
             } else {
+              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+                    "evaluate.\n");
               return false;
             }
           }
-          
+
           // If we found compatible types, go ahead and push the bitcast
           // onto the stored value.
           Val = ConstantExpr::getBitCast(Val, NewTy);
+
+          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
         }
-          
+      }
+
       MutatedMemory[Ptr] = Val;
     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
       InstResult = ConstantExpr::get(BO->getOpcode(),
                                      getVal(BO->getOperand(0)),
                                      getVal(BO->getOperand(1)));
+      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+            << "\n");
     } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
       InstResult = ConstantExpr::getCompare(CI->getPredicate(),
                                             getVal(CI->getOperand(0)),
                                             getVal(CI->getOperand(1)));
+      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+            << "\n");
     } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
       InstResult = ConstantExpr::getCast(CI->getOpcode(),
                                          getVal(CI->getOperand(0)),
                                          CI->getType());
+      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+            << "\n");
     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
       InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
                                            getVal(SI->getOperand(1)),
                                            getVal(SI->getOperand(2)));
+      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+            << "\n");
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
       Constant *P = getVal(GEP->getOperand(0));
       SmallVector<Constant*, 8> GEPOps;
@@ -2670,41 +2696,70 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       InstResult =
         ConstantExpr::getGetElementPtr(P, GEPOps,
                                        cast<GEPOperator>(GEP)->isInBounds());
+      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+            << "\n");
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
-      if (!LI->isSimple()) return false;  // no volatile/atomic accesses.
+
+      if (!LI->isSimple()) {
+        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+        return false;  // no volatile/atomic accesses.
+      }
+
       Constant *Ptr = getVal(LI->getOperand(0));
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
         Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+        DEBUG(dbgs() << "Found a constant pointer expression, constant "
+              "folding: " << *Ptr << "\n");
+      }
       InstResult = ComputeLoadResult(Ptr);
-      if (InstResult == 0) return false; // Could not evaluate load.
+      if (InstResult == 0) {
+        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+              "\n");
+        return false; // Could not evaluate load.
+      }
+
+      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
-      if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
+      if (AI->isArrayAllocation()) {
+        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+        return false;  // Cannot handle array allocs.
+      }
       Type *Ty = AI->getType()->getElementType();
       AllocaTmps.push_back(new GlobalVariable(Ty, false,
                                               GlobalValue::InternalLinkage,
                                               UndefValue::get(Ty),
                                               AI->getName()));
       InstResult = AllocaTmps.back();
+      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
       CallSite CS(CurInst);
 
       // Debug info can safely be ignored here.
       if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+        DEBUG(dbgs() << "Ignoring debug info.\n");
         ++CurInst;
         continue;
       }
 
       // Cannot handle inline asm.
-      if (isa<InlineAsm>(CS.getCalledValue())) return false;
+      if (isa<InlineAsm>(CS.getCalledValue())) {
+        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+        return false;
+      }
 
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
         if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
-          if (MSI->isVolatile()) return false;
+          if (MSI->isVolatile()) {
+            DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+                  "intrinsic.\n");
+            return false;
+          }
           Constant *Ptr = getVal(MSI->getDest());
           Constant *Val = getVal(MSI->getValue());
           Constant *DestVal = ComputeLoadResult(getVal(Ptr));
           if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
             // This memset is a no-op.
+            DEBUG(dbgs() << "Ignoring no-op memset.\n");
             ++CurInst;
             continue;
           }
@@ -2712,6 +2767,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
 
         if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
             II->getIntrinsicID() == Intrinsic::lifetime_end) {
+          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
           ++CurInst;
           continue;
         }
@@ -2719,8 +2775,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
         if (II->getIntrinsicID() == Intrinsic::invariant_start) {
           // We don't insert an entry into Values, as it doesn't have a
           // meaningful return value.
-          if (!II->use_empty())
+          if (!II->use_empty()) {
+            DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
             return false;
+          }
           ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
           Value *PtrArg = getVal(II->getArgOperand(1));
           Value *Ptr = PtrArg->stripPointerCasts();
@@ -2728,20 +2786,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
             Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
             if (!Size->isAllOnesValue() &&
                 Size->getValue().getLimitedValue() >=
-                TD->getTypeStoreSize(ElemTy))
+                TD->getTypeStoreSize(ElemTy)) {
               Invariants.insert(GV);
+              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+                    << "\n");
+            } else {
+              DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+                    "invariant.\n");
+            }
           }
           // Continue even if we do nothing.
           ++CurInst;
           continue;
         }
+
+        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
         return false;
       }
 
       // Resolve function pointers.
       Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
-      if (!Callee || Callee->mayBeOverridden())
+      if (!Callee || Callee->mayBeOverridden()) {
+        DEBUG(dbgs() << "Can not resolve function pointer.\n");
         return false;  // Cannot resolve.
+      }
 
       SmallVector<Constant*, 8> Formals;
       for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
@@ -2751,22 +2819,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
         // If this is a function we can constant fold, do it.
         if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
           InstResult = C;
+          DEBUG(dbgs() << "Constant folded function call. Result: " <<
+                *InstResult << "\n");
         } else {
+          DEBUG(dbgs() << "Can not constant fold function call.\n");
           return false;
         }
       } else {
-        if (Callee->getFunctionType()->isVarArg())
+        if (Callee->getFunctionType()->isVarArg()) {
+          DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
           return false;
+        }
 
-        Constant *RetVal;
+        Constant *RetVal = 0;
         // Execute the call, if successful, use the return value.
         ValueStack.push_back(new DenseMap<Value*, Constant*>);
-        if (!EvaluateFunction(Callee, RetVal, Formals))
+        if (!EvaluateFunction(Callee, RetVal, Formals)) {
+          DEBUG(dbgs() << "Failed to evaluate function.\n");
           return false;
+        }
         delete ValueStack.pop_back_val();
         InstResult = RetVal;
+
+        if (InstResult != NULL) {
+          DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
+                InstResult << "\n\n");
+        } else {
+          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+        }
       }
     } else if (isa<TerminatorInst>(CurInst)) {
+      DEBUG(dbgs() << "Found a terminator instruction.\n");
+
       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
         if (BI->isUnconditional()) {
           NextBB = BI->getSuccessor(0);
@@ -2792,26 +2876,31 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
         NextBB = 0;
       } else {
         // invoke, unwind, resume, unreachable.
+        DEBUG(dbgs() << "Can not handle terminator.");
         return false;  // Cannot handle this terminator.
       }
 
       // We succeeded at evaluating this block!
+      DEBUG(dbgs() << "Successfully evaluated block.\n");
       return true;
     } else {
       // Did not know how to evaluate this!
+      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+            "\n");
       return false;
     }
 
     if (!CurInst->use_empty()) {
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
         InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
-      
+
       setVal(CurInst, InstResult);
     }
 
     // If we just processed an invoke, we finished evaluating the block.
     if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
       NextBB = II->getNormalDest();
+      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
       return true;
     }
 
@@ -2850,6 +2939,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
 
   while (1) {
     BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+    DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
     if (!EvaluateBlock(CurInst, NextBB))
       return false;
 
@@ -2891,7 +2982,7 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
   Constant *RetValDummy;
   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
                                            SmallVector<Constant*, 0>());
-  
+
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
@@ -2929,6 +3020,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
       }
       break;
     }
+    DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
 
     // We cannot simplify external ctor functions.
     if (F->empty()) continue;
@@ -3011,13 +3103,13 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
     return 0;
 
   Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
-  
+
   if (!Fn)
     return 0;
 
   FunctionType *FTy = Fn->getFunctionType();
-  
-  // Checking that the function has the right return type, the right number of 
+
+  // Checking that the function has the right return type, the right number of
   // parameters and that they all have pointer types should be enough.
   if (!FTy->getReturnType()->isIntegerTy() ||
       FTy->getNumParams() != 3 ||
@@ -3092,7 +3184,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
   // and remove them.
   bool Changed = false;
 
-  for (Function::use_iterator I = CXAAtExitFn->use_begin(), 
+  for (Function::use_iterator I = CXAAtExitFn->use_begin(),
        E = CXAAtExitFn->use_end(); I != E;) {
     // We're only interested in calls. Theoretically, we could handle invoke
     // instructions as well, but neither llvm-gcc nor clang generate invokes
@@ -3101,7 +3193,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
     if (!CI)
       continue;
 
-    Function *DtorFn = 
+    Function *DtorFn =
       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
     if (!DtorFn)
       continue;
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index d757e1fdb1da..4ac1dfc09682 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -17,14 +17,14 @@
 
 #define DEBUG_TYPE "ipconstprop"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 STATISTIC(NumArgumentsProped, "Number of args turned into constants");
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index b1c36c15db0b..a0095dad1af7 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -13,47 +13,58 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline"
-#include "llvm/CallingConv.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
 
 namespace {
 
-  // AlwaysInliner only inlines functions that are mark as "always inline".
-  class AlwaysInliner : public Inliner {
-  public:
-    // Use extremely low threshold.
-    AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
-      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
-    }
-    AlwaysInliner(bool InsertLifetime) : Inliner(ID, -2000000000,
-                                                 InsertLifetime) {
-      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
-    }
-    static char ID; // Pass identification, replacement for typeid
-    virtual InlineCost getInlineCost(CallSite CS);
-    virtual bool doFinalization(CallGraph &CG) {
-      return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
-    }
-    virtual bool doInitialization(CallGraph &CG);
-  };
+/// \brief Inliner pass which only handles "always inline" functions.
+class AlwaysInliner : public Inliner {
+  InlineCostAnalysis *ICA;
+
+public:
+  // Use extremely low threshold.
+  AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) {
+    initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  AlwaysInliner(bool InsertLifetime)
+      : Inliner(ID, -2000000000, InsertLifetime), ICA(0) {
+    initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  static char ID; // Pass identification, replacement for typeid
+
+  virtual InlineCost getInlineCost(CallSite CS);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual bool runOnSCC(CallGraphSCC &SCC);
+
+  using llvm::Pass::doFinalization;
+  virtual bool doFinalization(CallGraph &CG) {
+    return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
+  }
+};
+
 }
 
 char AlwaysInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
                 "Inliner for always_inline functions", false, false)
 INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
 INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
                 "Inliner for always_inline functions", false, false)
 
@@ -63,35 +74,6 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
   return new AlwaysInliner(InsertLifetime);
 }
 
-/// \brief Minimal filter to detect invalid constructs for inlining.
-static bool isInlineViable(Function &F) {
-  bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    // Disallow inlining of functions which contain an indirect branch.
-    if (isa<IndirectBrInst>(BI->getTerminator()))
-      return false;
-
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-         ++II) {
-      CallSite CS(II);
-      if (!CS)
-        continue;
-
-      // Disallow recursive calls.
-      if (&F == CS.getCalledFunction())
-        return false;
-
-      // Disallow calls which expose returns-twice to a function not previously
-      // attributed as such.
-      if (!ReturnsTwice && CS.isCall() &&
-          cast<CallInst>(CS.getInstruction())->canReturnTwice())
-        return false;
-    }
-  }
-
-  return true;
-}
-
 /// \brief Get the inline cost for the always-inliner.
 ///
 /// The always inliner *only* handles functions which are marked with the
@@ -106,27 +88,25 @@ static bool isInlineViable(Function &F) {
 /// likely not worth it in practice.
 InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
   Function *Callee = CS.getCalledFunction();
-  // We assume indirect calls aren't calling an always-inline function.
-  if (!Callee) return InlineCost::getNever();
 
-  // We can't inline calls to external functions.
-  // FIXME: We shouldn't even get here.
-  if (Callee->isDeclaration()) return InlineCost::getNever();
+  // Only inline direct calls to functions with always-inline attributes
+  // that are viable for inlining. FIXME: We shouldn't even get here for
+  // declarations.
+  if (Callee && !Callee->isDeclaration() &&
+      Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::AlwaysInline) &&
+      ICA->isInlineViable(*Callee))
+    return InlineCost::getAlways();
 
-  // Return never for anything not marked as always inline.
-  if (!Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
-    return InlineCost::getNever();
-
-  // Do some minimal analysis to preclude non-viable functions.
-  if (!isInlineViable(*Callee))
-    return InlineCost::getNever();
+  return InlineCost::getNever();
+}
 
-  // Otherwise, force inlining.
-  return InlineCost::getAlways();
+bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
+  ICA = &getAnalysis<InlineCostAnalysis>();
+  return Inliner::runOnSCC(SCC);
 }
 
-// doInitialization - Initializes the vector of functions that have not
-// been annotated with the "always inline" attribute.
-bool AlwaysInliner::doInitialization(CallGraph &CG) {
-  return false;
+void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<InlineCostAnalysis>();
+  Inliner::getAnalysisUsage(AU);
 }
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index bf0b1f91a210..a4f702604188 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,44 +12,57 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline"
-#include "llvm/CallingConv.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/DataLayout.h"
 
 using namespace llvm;
 
 namespace {
 
-  class SimpleInliner : public Inliner {
-    InlineCostAnalyzer CA;
-  public:
-    SimpleInliner() : Inliner(ID) {
-      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
-    }
-    SimpleInliner(int Threshold) : Inliner(ID, Threshold,
-                                           /*InsertLifetime*/true) {
-      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
-    }
-    static char ID; // Pass identification, replacement for typeid
-    InlineCost getInlineCost(CallSite CS) {
-      return CA.getInlineCost(CS, getInlineThreshold(CS));
-    }
-    virtual bool doInitialization(CallGraph &CG);
-  };
-}
+/// \brief Actaul inliner pass implementation.
+///
+/// The common implementation of the inlining logic is shared between this
+/// inliner pass and the always inliner pass. The two passes use different cost
+/// analyses to determine when to inline.
+class SimpleInliner : public Inliner {
+  InlineCostAnalysis *ICA;
+
+public:
+  SimpleInliner() : Inliner(ID), ICA(0) {
+    initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  SimpleInliner(int Threshold)
+      : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) {
+    initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  static char ID; // Pass identification, replacement for typeid
+
+  InlineCost getInlineCost(CallSite CS) {
+    return ICA->getInlineCost(CS, getInlineThreshold(CS));
+  }
+
+  virtual bool runOnSCC(CallGraphSCC &SCC);
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+} // end anonymous namespace
 
 char SimpleInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
 INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
 INITIALIZE_PASS_END(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
 
@@ -59,10 +72,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
   return new SimpleInliner(Threshold);
 }
 
-// doInitialization - Initializes the vector of functions that have been
-// annotated with the noinline attribute.
-bool SimpleInliner::doInitialization(CallGraph &CG) {
-  CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
-  return false;
+bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
+  ICA = &getAnalysis<InlineCostAnalysis>();
+  return Inliner::runOnSCC(SCC);
 }
 
+void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<InlineCostAnalysis>();
+  Inliner::getAnalysisUsage(AU);
+}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index abcb25fd4555..663ddb75f423 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -14,22 +14,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumInlined, "Number of functions inlined");
@@ -64,14 +64,48 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
 /// getAnalysisUsage - For this class, we declare that we require and preserve
 /// the call graph.  If the derived class implements this method, it should
 /// always explicitly call the implementation here.
-void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
-  CallGraphSCCPass::getAnalysisUsage(Info);
+void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  CallGraphSCCPass::getAnalysisUsage(AU);
 }
 
 
 typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
 InlinedArrayAllocasTy;
 
+/// \brief If the inlined function had a higher stack protection level than the
+/// calling function, then bump up the caller's stack protection level.
+static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
+  // If upgrading the SSP attribute, clear out the old SSP Attributes first.
+  // Having multiple SSP attributes doesn't actually hurt, but it adds useless
+  // clutter to the IR.
+  AttrBuilder B;
+  B.addAttribute(Attribute::StackProtect)
+    .addAttribute(Attribute::StackProtectStrong);
+  AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
+                                              AttributeSet::FunctionIndex,
+                                              B);
+  AttributeSet CallerAttr = Caller->getAttributes(),
+               CalleeAttr = Callee->getAttributes();
+
+  if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+                              Attribute::StackProtectReq)) {
+    Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+    Caller->addFnAttr(Attribute::StackProtectReq);
+  } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+                                     Attribute::StackProtectStrong) &&
+             !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::StackProtectReq)) {
+    Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+    Caller->addFnAttr(Attribute::StackProtectStrong);
+  } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+                                     Attribute::StackProtect) &&
+           !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+                                    Attribute::StackProtectReq) &&
+           !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+                                    Attribute::StackProtectStrong))
+    Caller->addFnAttr(Attribute::StackProtect);
+}
+
 /// InlineCallIfPossible - If it is possible to inline the specified call site,
 /// do so and update the CallGraph for this operation.
 ///
@@ -91,13 +125,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
   if (!InlineFunction(CS, IFI, InsertLifetime))
     return false;
 
-  // If the inlined function had a higher stack protection level than the
-  // calling function, then bump up the caller's stack protection level.
-  if (Callee->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
-    Caller->addFnAttr(Attributes::StackProtectReq);
-  else if (Callee->getFnAttributes().hasAttribute(Attributes::StackProtect) &&
-           !Caller->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
-    Caller->addFnAttr(Attributes::StackProtect);
+  AdjustCallerSSPLevel(Caller, Callee);
 
   // Look at all of the allocas that we inlined through this call site.  If we
   // have already inlined other allocas through other calls into this function,
@@ -209,16 +237,21 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
   // would decrease the threshold.
   Function *Caller = CS.getCaller();
   bool OptSize = Caller && !Caller->isDeclaration() &&
-    Caller->getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+    Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                         Attribute::OptimizeForSize);
   if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
       OptSizeThreshold < thres)
     thres = OptSizeThreshold;
 
-  // Listen to the inlinehint attribute when it would increase the threshold.
+  // Listen to the inlinehint attribute when it would increase the threshold
+  // and the caller does not need to minimize its size.
   Function *Callee = CS.getCalledFunction();
   bool InlineHint = Callee && !Callee->isDeclaration() &&
-    Callee->getFnAttributes().hasAttribute(Attributes::InlineHint);
-  if (InlineHint && HintThreshold > thres)
+    Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                         Attribute::InlineHint);
+  if (InlineHint && HintThreshold > thres
+      && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                               Attribute::MinSize))
     thres = HintThreshold;
 
   return thres;
@@ -534,7 +567,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
     // about always-inline functions. This is a bit of a hack to share code
     // between here and the InlineAlways pass.
     if (AlwaysInlineOnly &&
-        !F->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
+        !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                         Attribute::AlwaysInline))
       continue;
 
     // If the only remaining users of the function are dead constants, remove
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index aa629cc0c6fb..4bfab5b0afbd 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -14,14 +14,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "internalize"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 #include <fstream>
 #include <set>
 using namespace llvm;
@@ -48,8 +48,10 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit InternalizePass();
-    explicit InternalizePass(const std::vector <const char *>& exportList);
+    explicit InternalizePass(ArrayRef<const char *> exportList);
     void LoadFile(const char *Filename);
+    void ClearExportList();
+    void AddToExportList(const std::string &val);
     virtual bool runOnModule(Module &M);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -72,10 +74,10 @@ InternalizePass::InternalizePass()
     ExternalNames.insert(APIList.begin(), APIList.end());
 }
 
-InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
   : ModulePass(ID){
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
-  for(std::vector<const char *>::const_iterator itr = exportList.begin();
+  for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
         itr != exportList.end(); itr++) {
     ExternalNames.insert(*itr);
   }
@@ -97,6 +99,14 @@ void InternalizePass::LoadFile(const char *Filename) {
   }
 }
 
+void InternalizePass::ClearExportList() {
+  ExternalNames.clear();
+}
+
+void InternalizePass::AddToExportList(const std::string &val) {
+  ExternalNames.insert(val);
+}
+
 bool InternalizePass::runOnModule(Module &M) {
   CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
@@ -173,6 +183,6 @@ ModulePass *llvm::createInternalizePass() {
   return new InternalizePass();
 }
 
-ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
   return new InternalizePass(el);
 }
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index b18c9150f440..124cbb6f0549 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils
+required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils ObjCARC
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 97d7cdced0e3..8282a8e6fabc 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -16,16 +16,16 @@
 
 #define DEBUG_TYPE "loop-extract"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/Statistic.h"
 #include <fstream>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 44283ddce7ae..892100f0585a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -45,25 +45,25 @@
 
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include <vector>
 using namespace llvm;
 
@@ -346,13 +346,11 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
                                          const GEPOperator *GEP2) {
   // When we have target data, we can reduce the GEP down to the value in bytes
   // added to the address.
-  if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
-    SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
-    SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
-    uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
-                                            Indices1);
-    uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
-                                            Indices2);
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 1;
+  APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
+  if (TD &&
+      GEP1->accumulateConstantOffset(*TD, Offset1) &&
+      GEP2->accumulateConstantOffset(*TD, Offset2)) {
     return Offset1 == Offset2;
   }
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 9c9910bd5cc8..fa518cb0abb6 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "partialinlining"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CFG.h"
 using namespace llvm;
 
 STATISTIC(NumPartialInlined, "Number of functions partially inlined");
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 05253fcddab3..027a9f2a6871 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -14,21 +14,17 @@
 
 
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
-
 #include "llvm-c/Transforms/PassManagerBuilder.h"
-
-#include "llvm/PassManager.h"
-#include "llvm/DefaultPasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Vectorize.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ManagedStatic.h"
 
 using namespace llvm;
 
@@ -190,10 +186,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
-  if (LoopVectorize) {
+  if (LoopVectorize && OptLevel > 2)
     MPM.add(createLoopVectorizePass());
-    MPM.add(createLICMPass());
-  }
 
   if (!DisableUnrollLoops)
     MPM.add(createLoopUnrollPass());          // Unroll small loops
@@ -220,6 +214,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
       MPM.add(createGVNPass());                   // Remove redundancies
     else
       MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
+
+    // BBVectorize may have significantly shortened a loop body; unroll again.
+    if (!DisableUnrollLoops)
+      MPM.add(createLoopUnrollPass());
   }
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
@@ -323,7 +321,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   PM.add(createGlobalDCEPass());
 }
 
-LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) {
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
   PassManagerBuilder *PMB = new PassManagerBuilder();
   return wrap(PMB);
 }
@@ -393,9 +391,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
 
 void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
                                                   LLVMPassManagerRef PM,
-                                                  bool Internalize,
-                                                  bool RunInliner) {
+                                                  LLVMBool Internalize,
+                                                  LLVMBool RunInliner) {
   PassManagerBuilder *Builder = unwrap(PMB);
   PassManagerBase *LPM = unwrap(PM);
-  Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
+  Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
 }
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index fb4ecbfe7b08..73d9323195bb 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -16,16 +16,16 @@
 
 #define DEBUG_TYPE "prune-eh"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include <algorithm>
 using namespace llvm;
@@ -140,15 +140,17 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
       AttrBuilder NewAttributes;
 
       if (!SCCMightUnwind)
-        NewAttributes.addAttribute(Attributes::NoUnwind);
+        NewAttributes.addAttribute(Attribute::NoUnwind);
       if (!SCCMightReturn)
-        NewAttributes.addAttribute(Attributes::NoReturn);
+        NewAttributes.addAttribute(Attribute::NoReturn);
 
       Function *F = (*I)->getFunction();
-      const AttrListPtr &PAL = F->getAttributes();
-      const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0,
-                                            Attributes::get(F->getContext(),
-                                                            NewAttributes));
+      const AttributeSet &PAL = F->getAttributes();
+      const AttributeSet &NPAL =
+        PAL.addAttributes(F->getContext(), AttributeSet::FunctionIndex,
+                          AttributeSet::get(F->getContext(),
+                                            AttributeSet::FunctionIndex,
+                                            NewAttributes));
       if (PAL != NPAL) {
         MadeChange = true;
         F->setAttributes(NPAL);
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index b5f09ecccaf2..f00830aadaad 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -16,9 +16,9 @@
 
 #define DEBUG_TYPE "strip-dead-prototypes"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 using namespace llvm;
 
 STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 80bfc1cdb2c5..5f8681ff454e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -21,17 +21,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Pass.h"
-#include "llvm/TypeFinder.h"
-#include "llvm/ValueSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 7467eca7ab1f..1f6a3a5e335d 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,12 +11,12 @@
 #define INSTCOMBINE_INSTCOMBINE_H
 
 #include "InstCombineWorklist.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Support/InstVisitor.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 
@@ -27,7 +27,7 @@ namespace llvm {
   class DbgDeclareInst;
   class MemIntrinsic;
   class MemSetInst;
-  
+
 /// SelectPatternFlavor - We can match a variety of different patterns for
 /// select operations.
 enum SelectPatternFlavor {
@@ -36,7 +36,7 @@ enum SelectPatternFlavor {
   SPF_SMAX, SPF_UMAX
   //SPF_ABS - TODO.
 };
-  
+
 /// getComplexity:  Assign a complexity or rank value to LLVM Values...
 ///   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static inline unsigned getComplexity(Value *V) {
@@ -51,23 +51,23 @@ static inline unsigned getComplexity(Value *V) {
   return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
 }
 
-  
+
 /// InstCombineIRInserter - This is an IRBuilder insertion helper that works
 /// just like the normal insertion helper, but also adds any new instructions
 /// to the instcombine worklist.
-class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter 
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
     : public IRBuilderDefaultInserter<true> {
   InstCombineWorklist &Worklist;
 public:
   InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
-  
+
   void InsertHelper(Instruction *I, const Twine &Name,
                     BasicBlock *BB, BasicBlock::iterator InsertPt) const {
     IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
     Worklist.Add(I);
   }
 };
-  
+
 /// InstCombiner - The -instcombine pass.
 class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
@@ -76,6 +76,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   TargetLibraryInfo *TLI;
   bool MadeIRChange;
   LibCallSimplifier *Simplifier;
+  bool MinimizeSize;
 public:
   /// Worklist - All of the instructions that need to be simplified.
   InstCombineWorklist Worklist;
@@ -84,15 +85,16 @@ public:
   /// instructions into the worklist when they are created.
   typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
   BuilderTy *Builder;
-      
+
   static char ID; // Pass identification, replacement for typeid
   InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+    MinimizeSize = false;
     initializeInstCombinerPass(*PassRegistry::getPassRegistry());
   }
 
 public:
   virtual bool runOnFunction(Function &F);
-  
+
   bool DoOneIteration(Function &F, unsigned ItNum);
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -114,6 +116,8 @@ public:
   Instruction *visitSub(BinaryOperator &I);
   Instruction *visitFSub(BinaryOperator &I);
   Instruction *visitMul(BinaryOperator &I);
+  Value *foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+                       Instruction *InsertBefore);
   Instruction *visitFMul(BinaryOperator &I);
   Instruction *visitURem(BinaryOperator &I);
   Instruction *visitSRem(BinaryOperator &I);
@@ -207,11 +211,11 @@ public:
 private:
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
-  Value *dyn_castFNegVal(Value *V) const;
-  Type *FindElementAtOffset(Type *Ty, int64_t Offset, 
+  Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
+  Type *FindElementAtOffset(Type *Ty, int64_t Offset,
                                   SmallVectorImpl<Value*> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
-                                 
+
   /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
   /// results in any code being generated and is interesting to optimize out. If
   /// the cast can be eliminated by some other simple transformation, we prefer
@@ -243,7 +247,7 @@ public:
     return New;
   }
 
-  // InsertNewInstWith - same as InsertNewInstBefore, but also sets the 
+  // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
   // debug loc.
   //
   Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
@@ -259,10 +263,10 @@ public:
   //
   Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
     Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
-    
+
     // If we are replacing the instruction with itself, this must be in a
     // segment of unreachable code, so just clobber the instruction.
-    if (&I == V) 
+    if (&I == V)
       V = UndefValue::get(I.getType());
 
     DEBUG(errs() << "IC: Replacing " << I << "\n"
@@ -292,13 +296,13 @@ public:
     MadeIRChange = true;
     return 0;  // Don't do anything with FI
   }
-      
+
   void ComputeMaskedBits(Value *V, APInt &KnownZero,
                          APInt &KnownOne, unsigned Depth = 0) const {
     return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
   }
-  
-  bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+
+  bool MaskedValueIsZero(Value *V, const APInt &Mask,
                          unsigned Depth = 0) const {
     return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
   }
@@ -321,21 +325,26 @@ private:
 
   /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
   /// based on the demanded bits.
-  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
+  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
                                  APInt& KnownZero, APInt& KnownOne,
                                  unsigned Depth);
-  bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+  bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
                             APInt& KnownZero, APInt& KnownOne,
                             unsigned Depth=0);
-      
+  /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
+  /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
+  Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
+                                    APInt DemandedMask, APInt &KnownZero,
+                                    APInt &KnownOne);
+
   /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
   /// SimplifyDemandedBits knows about.  See if the instruction has any
   /// properties that allow us to simplify its operands.
   bool SimplifyDemandedInstructionBits(Instruction &Inst);
-      
+
   Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                     APInt& UndefElts, unsigned Depth = 0);
-    
+
   // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
   // which has a PHI node as operand #0, see if we can fold the instruction
   // into the PHI (which is only possible if all operands to the PHI are
@@ -351,10 +360,10 @@ private:
   Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
 
-  
+
   Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
                         ConstantInt *AndRHS, BinaryOperator &TheAnd);
-  
+
   Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
                             bool isSub, Instruction &I);
   Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
@@ -373,8 +382,8 @@ private:
   Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
 };
 
-      
-  
+
+
 } // end namespace llvm.
 
 #endif
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d8257e64d837..7595da08d3e8 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -13,16 +13,840 @@
 
 #include "InstCombine.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
+namespace {
+
+  /// Class representing coefficient of floating-point addend.
+  /// This class needs to be highly efficient, which is especially true for
+  /// the constructor. As of I write this comment, the cost of the default
+  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 
+  /// perform write-merging).
+  /// 
+  class FAddendCoef {
+  public:
+    // The constructor has to initialize a APFloat, which is uncessary for
+    // most addends which have coefficient either 1 or -1. So, the constructor
+    // is expensive. In order to avoid the cost of the constructor, we should
+    // reuse some instances whenever possible. The pre-created instances
+    // FAddCombine::Add[0-5] embodies this idea.
+    //
+    FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
+    ~FAddendCoef();
+  
+    void set(short C) {
+      assert(!insaneIntVal(C) && "Insane coefficient");
+      IsFp = false; IntVal = C;
+    }
+  
+    void set(const APFloat& C);
+
+    void negate();
+  
+    bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
+    Value *getValue(Type *) const;
+  
+    // If possible, don't define operator+/operator- etc because these
+    // operators inevitably call FAddendCoef's constructor which is not cheap.
+    void operator=(const FAddendCoef &A);
+    void operator+=(const FAddendCoef &A);
+    void operator-=(const FAddendCoef &A);
+    void operator*=(const FAddendCoef &S);
+  
+    bool isOne() const { return isInt() && IntVal == 1; }
+    bool isTwo() const { return isInt() && IntVal == 2; }
+    bool isMinusOne() const { return isInt() && IntVal == -1; }
+    bool isMinusTwo() const { return isInt() && IntVal == -2; }
+  
+  private:
+    bool insaneIntVal(int V) { return V > 4 || V < -4; }
+    APFloat *getFpValPtr(void)
+      { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
+    const APFloat *getFpValPtr(void) const
+      { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
+
+    const APFloat &getFpVal(void) const {
+      assert(IsFp && BufHasFpVal && "Incorret state");
+      return *getFpValPtr();
+    }
+
+    APFloat &getFpVal(void)
+      { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
+  
+    bool isInt() const { return !IsFp; }
+
+    // If the coefficient is represented by an integer, promote it to a
+    // floating point. 
+    void convertToFpType(const fltSemantics &Sem);
+
+    // Construct an APFloat from a signed integer.
+    // TODO: We should get rid of this function when APFloat can be constructed
+    //       from an *SIGNED* integer. 
+    APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
+  private:
+
+    bool IsFp;
+  
+    // True iff FpValBuf contains an instance of APFloat.
+    bool BufHasFpVal;
+  
+    // The integer coefficient of an individual addend is either 1 or -1,
+    // and we try to simplify at most 4 addends from neighboring at most
+    // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
+    // is overkill of this end.
+    short IntVal;
+
+    AlignedCharArrayUnion<APFloat> FpValBuf;
+  };
+  
+  /// FAddend is used to represent floating-point addend. An addend is
+  /// represented as <C, V>, where the V is a symbolic value, and C is a
+  /// constant coefficient. A constant addend is represented as <C, 0>.
+  ///
+  class FAddend {
+  public:
+    FAddend() { Val = 0; }
+  
+    Value *getSymVal (void) const { return Val; }
+    const FAddendCoef &getCoef(void) const { return Coeff; }
+  
+    bool isConstant() const { return Val == 0; }
+    bool isZero() const { return Coeff.isZero(); }
+
+    void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
+    void set(const APFloat& Coefficient, Value *V)
+      { Coeff.set(Coefficient); Val = V; }
+    void set(const ConstantFP* Coefficient, Value *V)
+      { Coeff.set(Coefficient->getValueAPF()); Val = V; }
+  
+    void negate() { Coeff.negate(); }
+  
+    /// Drill down the U-D chain one step to find the definition of V, and
+    /// try to break the definition into one or two addends.
+    static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
+  
+    /// Similar to FAddend::drillDownOneStep() except that the value being
+    /// splitted is the addend itself.
+    unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
+  
+    void operator+=(const FAddend &T) {
+      assert((Val == T.Val) && "Symbolic-values disagree");
+      Coeff += T.Coeff;
+    }
+
+  private:
+    void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
+  
+    // This addend has the value of "Coeff * Val".
+    Value *Val;
+    FAddendCoef Coeff;
+  };
+  
+  /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
+  /// with its neighboring at most two instructions.
+  ///
+  class FAddCombine {
+  public:
+    FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
+    Value *simplify(Instruction *FAdd);
+  
+  private:
+    typedef SmallVector<const FAddend*, 4> AddendVect;
+  
+    Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
+
+    Value *performFactorization(Instruction *I);
+
+    /// Convert given addend to a Value
+    Value *createAddendVal(const FAddend &A, bool& NeedNeg);
+    
+    /// Return the number of instructions needed to emit the N-ary addition.
+    unsigned calcInstrNumber(const AddendVect& Vect);
+    Value *createFSub(Value *Opnd0, Value *Opnd1);
+    Value *createFAdd(Value *Opnd0, Value *Opnd1);
+    Value *createFMul(Value *Opnd0, Value *Opnd1);
+    Value *createFDiv(Value *Opnd0, Value *Opnd1);
+    Value *createFNeg(Value *V);
+    Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
+    void createInstPostProc(Instruction *NewInst);
+  
+    InstCombiner::BuilderTy *Builder;
+    Instruction *Instr;
+  
+  private:
+     // Debugging stuff are clustered here.
+    #ifndef NDEBUG
+      unsigned CreateInstrNum;
+      void initCreateInstNum() { CreateInstrNum = 0; }
+      void incCreateInstNum() { CreateInstrNum++; }
+    #else
+      void initCreateInstNum() {}
+      void incCreateInstNum() {}
+    #endif
+  };
+} 
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of
+//    {FAddendCoef, FAddend, FAddition, FAddCombine}.
+//
+//===----------------------------------------------------------------------===//
+FAddendCoef::~FAddendCoef() {
+  if (BufHasFpVal)
+    getFpValPtr()->~APFloat();
+}
+
+void FAddendCoef::set(const APFloat& C) {
+  APFloat *P = getFpValPtr();
+
+  if (isInt()) {
+    // As the buffer is meanless byte stream, we cannot call
+    // APFloat::operator=().
+    new(P) APFloat(C);
+  } else
+    *P = C;
+
+  IsFp = BufHasFpVal = true; 
+}
+
+void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
+  if (!isInt())
+    return;
+
+  APFloat *P = getFpValPtr();
+  if (IntVal > 0)
+    new(P) APFloat(Sem, IntVal);
+  else {
+    new(P) APFloat(Sem, 0 - IntVal);
+    P->changeSign();
+  }
+  IsFp = BufHasFpVal = true; 
+}
+
+APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
+  if (Val >= 0)
+    return APFloat(Sem, Val);
+
+  APFloat T(Sem, 0 - Val);
+  T.changeSign();
+
+  return T;
+}
+
+void FAddendCoef::operator=(const FAddendCoef &That) {
+  if (That.isInt())
+    set(That.IntVal);
+  else
+    set(That.getFpVal());
+}
+
+void FAddendCoef::operator+=(const FAddendCoef &That) {
+  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+  if (isInt() == That.isInt()) {
+    if (isInt())
+      IntVal += That.IntVal;
+    else
+      getFpVal().add(That.getFpVal(), RndMode);
+    return;
+  }
+  
+  if (isInt()) {
+    const APFloat &T = That.getFpVal();
+    convertToFpType(T.getSemantics());
+    getFpVal().add(T, RndMode);
+    return;
+  }
+  
+  APFloat &T = getFpVal();
+  T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
+}
+
+void FAddendCoef::operator-=(const FAddendCoef &That) {
+  enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven;
+  if (isInt() == That.isInt()) {
+    if (isInt())
+      IntVal -= That.IntVal;
+    else
+      getFpVal().subtract(That.getFpVal(), RndMode);
+    return;
+  }
+  
+  if (isInt()) {
+    const APFloat &T = That.getFpVal();
+    convertToFpType(T.getSemantics());
+    getFpVal().subtract(T, RndMode);
+    return;
+  }
+
+  APFloat &T = getFpVal();
+  T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode);
+}
+
+void FAddendCoef::operator*=(const FAddendCoef &That) {
+  if (That.isOne())
+    return;
+
+  if (That.isMinusOne()) {
+    negate();
+    return;
+  }
+
+  if (isInt() && That.isInt()) {
+    int Res = IntVal * (int)That.IntVal;
+    assert(!insaneIntVal(Res) && "Insane int value");
+    IntVal = Res;
+    return;
+  }
+
+  const fltSemantics &Semantic = 
+    isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
+
+  if (isInt())
+    convertToFpType(Semantic);
+  APFloat &F0 = getFpVal();
+
+  if (That.isInt())
+    F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
+                APFloat::rmNearestTiesToEven);
+  else
+    F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
+
+  return;
+}
+
+void FAddendCoef::negate() {
+  if (isInt())
+    IntVal = 0 - IntVal;
+  else
+    getFpVal().changeSign();
+}
+
+Value *FAddendCoef::getValue(Type *Ty) const {
+  return isInt() ?
+    ConstantFP::get(Ty, float(IntVal)) :
+    ConstantFP::get(Ty->getContext(), getFpVal());
+}
+
+// The definition of <Val>     Addends
+// =========================================
+//  A + B                     <1, A>, <1,B>
+//  A - B                     <1, A>, <1,B>
+//  0 - B                     <-1, B>
+//  C * A,                    <C, A>
+//  A + C                     <1, A> <C, NULL> 
+//  0 +/- 0                   <0, NULL> (corner case)
+//
+// Legend: A and B are not constant, C is constant
+// 
+unsigned FAddend::drillValueDownOneStep
+  (Value *Val, FAddend &Addend0, FAddend &Addend1) {
+  Instruction *I = 0;
+  if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
+    return 0;
+
+  unsigned Opcode = I->getOpcode();
+
+  if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
+    ConstantFP *C0, *C1;
+    Value *Opnd0 = I->getOperand(0);
+    Value *Opnd1 = I->getOperand(1);
+    if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
+      Opnd0 = 0;
+
+    if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
+      Opnd1 = 0;
+
+    if (Opnd0) {
+      if (!C0)
+        Addend0.set(1, Opnd0);
+      else
+        Addend0.set(C0, 0);
+    }
+
+    if (Opnd1) {
+      FAddend &Addend = Opnd0 ? Addend1 : Addend0;
+      if (!C1)
+        Addend.set(1, Opnd1);
+      else
+        Addend.set(C1, 0);
+      if (Opcode == Instruction::FSub)
+        Addend.negate();
+    }
+
+    if (Opnd0 || Opnd1)
+      return Opnd0 && Opnd1 ? 2 : 1;
+
+    // Both operands are zero. Weird!
+    Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
+    return 1;
+  }
+
+  if (I->getOpcode() == Instruction::FMul) {
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+    if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
+      Addend0.set(C, V1);
+      return 1;
+    }
+
+    if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
+      Addend0.set(C, V0);
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+// Try to break *this* addend into two addends. e.g. Suppose this addend is
+// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
+// i.e. <2.3, X> and <2.3, Y>.
+//
+unsigned FAddend::drillAddendDownOneStep
+  (FAddend &Addend0, FAddend &Addend1) const {
+  if (isConstant())
+    return 0;
+
+  unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
+  if (!BreakNum || Coeff.isOne()) 
+    return BreakNum;
+
+  Addend0.Scale(Coeff);
+
+  if (BreakNum == 2)
+    Addend1.Scale(Coeff);
+
+  return BreakNum;
+}
+
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+//   Instruction "I" is                Simplified into
+// -------------------------------------------------------
+//   (x * y) +/- (x * z)               x * (y +/- z)
+//   (y / x) +/- (z / x)               (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+  assert((I->getOpcode() == Instruction::FAdd ||
+          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+  
+  Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+  Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+  
+  if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+    return 0;
+
+  bool isMpy = false;
+  if (I0->getOpcode() == Instruction::FMul)
+    isMpy = true;
+  else if (I0->getOpcode() != Instruction::FDiv)
+    return 0;
+
+  Value *Opnd0_0 = I0->getOperand(0);
+  Value *Opnd0_1 = I0->getOperand(1);
+  Value *Opnd1_0 = I1->getOperand(0);
+  Value *Opnd1_1 = I1->getOperand(1);
+
+  //  Input Instr I       Factor   AddSub0  AddSub1 
+  //  ----------------------------------------------
+  // (x*y) +/- (x*z)        x        y         z
+  // (y/x) +/- (z/x)        x        y         z
+  //
+  Value *Factor = 0;
+  Value *AddSub0 = 0, *AddSub1 = 0;
+  
+  if (isMpy) {
+    if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+      Factor = Opnd0_0;
+    else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+      Factor = Opnd0_1;
+
+    if (Factor) {
+      AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+      AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+    }
+  } else if (Opnd0_1 == Opnd1_1) {
+    Factor = Opnd0_1;
+    AddSub0 = Opnd0_0;
+    AddSub1 = Opnd1_0;
+  }
+
+  if (!Factor)
+    return 0;
+
+  // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+  Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+                      createFAdd(AddSub0, AddSub1) :
+                      createFSub(AddSub0, AddSub1);
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+    const APFloat &F = CFP->getValueAPF();
+    if (!F.isNormal() || F.isDenormal())
+      return 0;
+  }
+
+  if (isMpy)
+    return createFMul(Factor, NewAddSub);
+ 
+  return createFDiv(NewAddSub, Factor);
+}
+
+Value *FAddCombine::simplify(Instruction *I) {
+  assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
+
+  // Currently we are not able to handle vector type.
+  if (I->getType()->isVectorTy())
+    return 0;
+
+  assert((I->getOpcode() == Instruction::FAdd ||
+          I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+  // Save the instruction before calling other member-functions. 
+  Instr = I;
+
+  FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
+
+  unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
+
+  // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
+  unsigned Opnd0_ExpNum = 0;
+  unsigned Opnd1_ExpNum = 0;
+
+  if (!Opnd0.isConstant()) 
+    Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
+
+  // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
+  if (OpndNum == 2 && !Opnd1.isConstant())
+    Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
+
+  // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
+  if (Opnd0_ExpNum && Opnd1_ExpNum) {
+    AddendVect AllOpnds;
+    AllOpnds.push_back(&Opnd0_0);
+    AllOpnds.push_back(&Opnd1_0);
+    if (Opnd0_ExpNum == 2)
+      AllOpnds.push_back(&Opnd0_1);
+    if (Opnd1_ExpNum == 2)
+      AllOpnds.push_back(&Opnd1_1);
+
+    // Compute instruction quota. We should save at least one instruction.
+    unsigned InstQuota = 0;
+
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&  
+                 (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
+
+    if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
+      return R;
+  }
+
+  if (OpndNum != 2) {
+    // The input instruction is : "I=0.0 +/- V". If the "V" were able to be
+    // splitted into two addends, say "V = X - Y", the instruction would have
+    // been optimized into "I = Y - X" in the previous steps.
+    //
+    const FAddendCoef &CE = Opnd0.getCoef();
+    return CE.isOne() ? Opnd0.getSymVal() : 0;
+  }
+
+  // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
+  if (Opnd1_ExpNum) {
+    AddendVect AllOpnds;
+    AllOpnds.push_back(&Opnd0);
+    AllOpnds.push_back(&Opnd1_0);
+    if (Opnd1_ExpNum == 2)
+      AllOpnds.push_back(&Opnd1_1);
+
+    if (Value *R = simplifyFAdd(AllOpnds, 1))
+      return R;
+  }
+
+  // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
+  if (Opnd0_ExpNum) {
+    AddendVect AllOpnds;
+    AllOpnds.push_back(&Opnd1);
+    AllOpnds.push_back(&Opnd0_0);
+    if (Opnd0_ExpNum == 2)
+      AllOpnds.push_back(&Opnd0_1);
+
+    if (Value *R = simplifyFAdd(AllOpnds, 1))
+      return R;
+  }
+
+  // step 6: Try factorization as the last resort, 
+  return performFactorization(I);
+}
+
+Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
+
+  unsigned AddendNum = Addends.size();
+  assert(AddendNum <= 4 && "Too many addends");
+
+  // For saving intermediate results; 
+  unsigned NextTmpIdx = 0;
+  FAddend TmpResult[3];
+
+  // Points to the constant addend of the resulting simplified expression.
+  // If the resulting expr has constant-addend, this constant-addend is
+  // desirable to reside at the top of the resulting expression tree. Placing
+  // constant close to supper-expr(s) will potentially reveal some optimization
+  // opportunities in super-expr(s).
+  //
+  const FAddend *ConstAdd = 0;
+
+  // Simplified addends are placed <SimpVect>.
+  AddendVect SimpVect;
+
+  // The outer loop works on one symbolic-value at a time. Suppose the input
+  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 
+  // The symbolic-values will be processed in this order: x, y, z.
+  //
+  for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
+
+    const FAddend *ThisAddend = Addends[SymIdx];
+    if (!ThisAddend) {
+      // This addend was processed before.
+      continue;
+    }
+
+    Value *Val = ThisAddend->getSymVal();
+    unsigned StartIdx = SimpVect.size();
+    SimpVect.push_back(ThisAddend);
+
+    // The inner loop collects addends sharing same symbolic-value, and these
+    // addends will be later on folded into a single addend. Following above
+    // example, if the symbolic value "y" is being processed, the inner loop
+    // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
+    // be later on folded into "<b1+b2, y>".
+    //
+    for (unsigned SameSymIdx = SymIdx + 1;
+         SameSymIdx < AddendNum; SameSymIdx++) {
+      const FAddend *T = Addends[SameSymIdx];
+      if (T && T->getSymVal() == Val) {
+        // Set null such that next iteration of the outer loop will not process
+        // this addend again.
+        Addends[SameSymIdx] = 0; 
+        SimpVect.push_back(T);
+      }
+    }
+
+    // If multiple addends share same symbolic value, fold them together.
+    if (StartIdx + 1 != SimpVect.size()) {
+      FAddend &R = TmpResult[NextTmpIdx ++];
+      R = *SimpVect[StartIdx];
+      for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
+        R += *SimpVect[Idx];
+
+      // Pop all addends being folded and push the resulting folded addend.
+      SimpVect.resize(StartIdx); 
+      if (Val != 0) {
+        if (!R.isZero()) {
+          SimpVect.push_back(&R);
+        }
+      } else {
+        // Don't push constant addend at this time. It will be the last element
+        // of <SimpVect>.
+        ConstAdd = &R;
+      }
+    }
+  }
+
+  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 
+         "out-of-bound access");
+
+  if (ConstAdd)
+    SimpVect.push_back(ConstAdd);
+
+  Value *Result;
+  if (!SimpVect.empty())
+    Result = createNaryFAdd(SimpVect, InstrQuota);
+  else {
+    // The addition is folded to 0.0.
+    Result = ConstantFP::get(Instr->getType(), 0.0);
+  }
+
+  return Result;
+}
+
+Value *FAddCombine::createNaryFAdd
+  (const AddendVect &Opnds, unsigned InstrQuota) {
+  assert(!Opnds.empty() && "Expect at least one addend");
+
+  // Step 1: Check if the # of instructions needed exceeds the quota.
+  // 
+  unsigned InstrNeeded = calcInstrNumber(Opnds);
+  if (InstrNeeded > InstrQuota)
+    return 0;
+
+  initCreateInstNum();
+
+  // step 2: Emit the N-ary addition.
+  // Note that at most three instructions are involved in Fadd-InstCombine: the
+  // addition in question, and at most two neighboring instructions.
+  // The resulting optimized addition should have at least one less instruction
+  // than the original addition expression tree. This implies that the resulting
+  // N-ary addition has at most two instructions, and we don't need to worry
+  // about tree-height when constructing the N-ary addition.
+
+  Value *LastVal = 0;
+  bool LastValNeedNeg = false;
+
+  // Iterate the addends, creating fadd/fsub using adjacent two addends.
+  for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
+       I != E; I++) {
+    bool NeedNeg; 
+    Value *V = createAddendVal(**I, NeedNeg);
+    if (!LastVal) {
+      LastVal = V;
+      LastValNeedNeg = NeedNeg;
+      continue;
+    }
+
+    if (LastValNeedNeg == NeedNeg) {
+      LastVal = createFAdd(LastVal, V);
+      continue;
+    }
+
+    if (LastValNeedNeg)
+      LastVal = createFSub(V, LastVal);
+    else
+      LastVal = createFSub(LastVal, V);
+
+    LastValNeedNeg = false;
+  }
+
+  if (LastValNeedNeg) {
+    LastVal = createFNeg(LastVal);
+  }
+
+  #ifndef NDEBUG
+    assert(CreateInstrNum == InstrNeeded && 
+           "Inconsistent in instruction numbers");
+  #endif
+
+  return LastVal;
+}
+
+Value *FAddCombine::createFSub
+  (Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFSub(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+Value *FAddCombine::createFNeg(Value *V) {
+  Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0));
+  return createFSub(Zero, V);
+}
+
+Value *FAddCombine::createFAdd
+  (Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFMul(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+  Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    createInstPostProc(I);
+  return V;
+}
+
+void FAddCombine::createInstPostProc(Instruction *NewInstr) {
+  NewInstr->setDebugLoc(Instr->getDebugLoc());
+
+  // Keep track of the number of instruction created.
+  incCreateInstNum();
+
+  // Propagate fast-math flags
+  NewInstr->setFastMathFlags(Instr->getFastMathFlags());
+}
+
+// Return the number of instruction needed to emit the N-ary addition.
+// NOTE: Keep this function in sync with createAddendVal().
+unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
+  unsigned OpndNum = Opnds.size();
+  unsigned InstrNeeded = OpndNum - 1;
+
+  // The number of addends in the form of "(-1)*x". 
+  unsigned NegOpndNum = 0; 
+
+  // Adjust the number of instructions needed to emit the N-ary add.
+  for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
+       I != E; I++) {
+    const FAddend *Opnd = *I;
+    if (Opnd->isConstant())
+      continue;
+
+    const FAddendCoef &CE = Opnd->getCoef();
+    if (CE.isMinusOne() || CE.isMinusTwo())
+      NegOpndNum++;
+
+    // Let the addend be "c * x". If "c == +/-1", the value of the addend
+    // is immediately available; otherwise, it needs exactly one instruction
+    // to evaluate the value.
+    if (!CE.isMinusOne() && !CE.isOne())
+      InstrNeeded++;
+  }
+  if (NegOpndNum == OpndNum)
+    InstrNeeded++;
+  return InstrNeeded;
+}
+
+// Input Addend        Value           NeedNeg(output)
+// ================================================================
+// Constant C          C               false
+// <+/-1, V>           V               coefficient is -1
+// <2/-2, V>          "fadd V, V"      coefficient is -2
+// <C, V>             "fmul V, C"      false
+//
+// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
+Value *FAddCombine::createAddendVal
+  (const FAddend &Opnd, bool &NeedNeg) {
+  const FAddendCoef &Coeff = Opnd.getCoef();
+
+  if (Opnd.isConstant()) {
+    NeedNeg = false;
+    return Coeff.getValue(Instr->getType());
+  }
+
+  Value *OpndVal = Opnd.getSymVal();
+
+  if (Coeff.isMinusOne() || Coeff.isOne()) {
+    NeedNeg = Coeff.isMinusOne();
+    return OpndVal;
+  }
+
+  if (Coeff.isTwo() || Coeff.isMinusTwo()) {
+    NeedNeg = Coeff.isMinusTwo();
+    return createFAdd(OpndVal, OpndVal);
+  }
+
+  NeedNeg = false;
+  return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
+}
+
 /// AddOne - Add one to a ConstantInt.
 static Constant *AddOne(Constant *C) {
   return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
 }
+
 /// SubOne - Subtract one from a ConstantInt.
 static Constant *SubOne(ConstantInt *C) {
   return ConstantInt::get(C->getContext(), C->getValue()-1);
@@ -37,10 +861,10 @@ static Constant *SubOne(ConstantInt *C) {
 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
   if (!V->hasOneUse() || !V->getType()->isIntegerTy())
     return 0;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return 0;
-  
+
   if (I->getOpcode() == Instruction::Mul)
     if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
       return I->getOperand(0);
@@ -64,22 +888,22 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
   // There are different heuristics we can use for this.  Here are some simple
   // ones.
-  
-  // Add has the property that adding any two 2's complement numbers can only 
+
+  // Add has the property that adding any two 2's complement numbers can only
   // have one carry bit which can change a sign.  As such, if LHS and RHS each
   // have at least two sign bits, we know that the addition of the two values
   // will sign extend fine.
   if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
     return true;
-  
-  
+
+
   // If one of the operands only has one non-zero bit, and if the other operand
   // has a known-zero bit in a more significant place than it (not including the
   // sign bit) the ripple may go up to and fill the zero, but won't change the
   // sign.  For example, (X & ~4) + 1.
-  
+
   // TODO: Implement.
-  
+
   return false;
 }
 
@@ -100,7 +924,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     const APInt &Val = CI->getValue();
     if (Val.isSignBit())
       return BinaryOperator::CreateXor(LHS, RHS);
-    
+
     // See if SimplifyDemandedBits can simplify this.  This handles stuff like
     // (X & 254)+1 -> (X&254)|1
     if (SimplifyDemandedInstructionBits(I))
@@ -110,7 +934,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
       if (ZI->getSrcTy()->isIntegerTy(1))
         return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
-    
+
     Value *XorLHS = 0; ConstantInt *XorRHS = 0;
     if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
       uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
@@ -124,13 +948,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         else if (XorRHS->getValue().isPowerOf2())
           ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
       }
-      
+
       if (ExtendAmt) {
         APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
         if (!MaskedValueIsZero(XorLHS, Mask))
           ExtendAmt = 0;
       }
-      
+
       if (ExtendAmt) {
         Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
         Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
@@ -175,7 +999,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
       }
-    
+
     return BinaryOperator::CreateSub(RHS, LHSV);
   }
 
@@ -209,7 +1033,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       APInt RHSKnownOne(IT->getBitWidth(), 0);
       APInt RHSKnownZero(IT->getBitWidth(), 0);
       ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
-      
+
       // No bits in common -> bitwise or.
       if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
         return BinaryOperator::CreateOr(LHS, RHS);
@@ -251,7 +1075,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       // See if all bits from the first bit set in the Add RHS up are included
       // in the mask.  First, get the rightmost bit.
       const APInt &AddRHSV = CRHS->getValue();
-      
+
       // Form a mask of all bits from the lowest bit added through the top.
       APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
 
@@ -289,7 +1113,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the true select value.
         return SelectInst::Create(SI->getCondition(), N, A);
-      
+
       if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
         // Fold the add into the false select value.
         return SelectInst::Create(SI->getCondition(), A, N);
@@ -301,18 +1125,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
     // (add (sext x), cst) --> (sext (add x, cst'))
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
-      Constant *CI = 
+      Constant *CI =
         ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
           ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
           WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
         // Insert the new, smaller add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               CI, "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
     }
-    
+
     // (add (sext x), (sext y)) --> (sext (add int x, y))
     if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
       // Only do this if x/y have the same type, if at last one of them has a
@@ -323,7 +1147,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                              RHSConv->getOperand(0), "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
@@ -351,18 +1175,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
-  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-    // X + 0 --> X
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
-                              (I.getType())->getValueAPF()))
-        return ReplaceInstUsesWith(I, LHS);
-    }
+  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD))
+    return ReplaceInstUsesWith(I, V);
 
-    if (isa<PHINode>(LHS))
-      if (Instruction *NV = FoldOpIntoPhi(I))
-        return NV;
-  }
+  if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
 
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
@@ -374,11 +1192,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     if (Value *V = dyn_castFNegVal(RHS))
       return BinaryOperator::CreateFSub(LHS, V);
 
-  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
-    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
-      return ReplaceInstUsesWith(I, LHS);
-
   // Check for (fadd double (sitofp x), y), see if we can merge this into an
   // integer add followed by a promotion.
   if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
@@ -388,7 +1201,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     // requires a constant pool load, and generally allows the add to be better
     // instcombined.
     if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
-      Constant *CI = 
+      Constant *CI =
       ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
       if (LHSConv->hasOneUse() &&
           ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
@@ -399,7 +1212,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
-    
+
     // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
     if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
       // Only do this if x/y have the same type, if at last one of them has a
@@ -410,13 +1223,18 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
           WillNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0))) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
                                               RHSConv->getOperand(0),"addconv");
         return new SIToFPInst(NewAdd, I.getType());
       }
     }
   }
-  
+
+  if (I.hasUnsafeAlgebra()) {
+    if (Value *V = FAddCombine(Builder).simplify(&I))
+      return ReplaceInstUsesWith(I, V);
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -428,7 +1246,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
 Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
                                                Type *Ty) {
   assert(TD && "Must have target data info for this");
-  
+
   // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
   // this.
   bool Swapped = false;
@@ -451,7 +1269,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
       }
     }
   }
-  
+
   if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
     // X - (gep X, ...)
     if (RHSGEP->getOperand(0) == LHS) {
@@ -467,16 +1285,16 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
       }
     }
   }
-  
+
   // Avoid duplicating the arithmetic if GEP2 has non-constant indices and
   // multiple users.
   if (GEP1 == 0 ||
       (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
     return 0;
-  
+
   // Emit the offset of the GEP and an intptr_t.
   Value *Result = EmitGEPOffset(GEP1);
-  
+
   // If we had a constant expression GEP on the other side offsetting the
   // pointer, subtract it from the offset we have.
   if (GEP2) {
@@ -517,7 +1335,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   // Replace (-1 - A) with (~A).
   if (match(Op0, m_AllOnes()))
     return BinaryOperator::CreateNot(Op1);
-  
+
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
     // C - ~X == X + (1+C)
     Value *X = 0;
@@ -551,20 +1369,30 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     if (SimplifyDemandedInstructionBits(I))
       return &I;
+
+    // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
+    if (C->isZero() && match(Op1, m_ZExt(m_Value(X))))
+      if (X->getType()->isIntegerTy(1))
+        return CastInst::CreateSExtOrBitCast(X, Op1->getType());
+
+    // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
+    if (C->isZero() && match(Op1, m_SExt(m_Value(X))))
+      if (X->getType()->isIntegerTy(1))
+        return CastInst::CreateZExtOrBitCast(X, Op1->getType());
   }
 
-  
+
   { Value *Y;
     // X-(X+Y) == -Y    X-(Y+X) == -Y
     if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
         match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
       return BinaryOperator::CreateNeg(Y);
-    
+
     // (X-Y)-X == -Y
     if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
       return BinaryOperator::CreateNeg(Y);
   }
-  
+
   if (Op1->hasOneUse()) {
     Value *X = 0, *Y = 0, *Z = 0;
     Constant *C = 0;
@@ -581,7 +1409,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
       return BinaryOperator::CreateAnd(Op0,
                                   Builder->CreateNot(Y, Y->getName() + ".not"));
-    
+
     // 0 - (X sdiv C)  -> (X sdiv -C)
     if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
         match(Op0, m_Zero()))
@@ -604,14 +1432,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
       return BinaryOperator::CreateMul(Op0, C);
     }
-    
+
     // X - A*-B -> X + A*B
     // X - -A*B -> X + A*B
     Value *A, *B;
     if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
         match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
       return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
-      
+
     // X - A*CI -> X + A*-CI
     // X - CI*A -> X + A*-CI
     if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
@@ -630,7 +1458,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     if (X == dyn_castFoldableMul(Op1, C2))
       return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
   }
-  
+
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   if (TD) {
@@ -639,23 +1467,31 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         match(Op1, m_PtrToInt(m_Value(RHSOp))))
       if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
         return ReplaceInstUsesWith(I, Res);
-    
+
     // trunc(p)-trunc(q) -> trunc(p-q)
     if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
         match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
       if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
         return ReplaceInstUsesWith(I, Res);
   }
-  
+
   return 0;
 }
 
 Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD))
+    return ReplaceInstUsesWith(I, V);
+
   // If this is a 'B = x-(-A)', change to B = x+A...
   if (Value *V = dyn_castFNegVal(Op1))
     return BinaryOperator::CreateFAdd(Op0, V);
 
+  if (I.hasUnsafeAlgebra()) {
+    if (Value *V = FAddCombine(Builder).simplify(&I))
+      return ReplaceInstUsesWith(I, V);
+  }
+
   return 0;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7d0af0d80226..990cbc3d594e 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -12,18 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
 using namespace llvm;
 using namespace PatternMatch;
 
 
 /// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
-  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue() + 1);
 }
 /// SubOne - Subtract one from a ConstantInt.
 static Constant *SubOne(ConstantInt *C) {
@@ -36,15 +36,15 @@ static inline bool isFreeToInvert(Value *V) {
   // ~(~(X)) -> X.
   if (BinaryOperator::isNot(V))
     return true;
-  
+
   // Constants can be considered to be not'ed values.
   if (isa<ConstantInt>(V))
     return true;
-  
+
   // Compares can be inverted if they have a single use.
   if (CmpInst *CI = dyn_cast<CmpInst>(V))
     return CI->hasOneUse();
-  
+
   return false;
 }
 
@@ -56,7 +56,7 @@ static inline Value *dyn_castNotVal(Value *V) {
     if (!isFreeToInvert(Operand))
       return Operand;
   }
-  
+
   // Constants can be considered to be not'ed values...
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
     return ConstantInt::get(C->getType(), ~C->getValue());
@@ -91,7 +91,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 }
 
 /// getNewICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or a brand 
+/// opcode and two operands into either a constant true or false, or a brand
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
 static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
@@ -118,7 +118,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
   case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
   case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
   case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
-  case 7: 
+  case 7:
     if (!isordered) return ConstantInt::getTrue(LHS->getContext());
     Pred = FCmpInst::FCMP_ORD; break;
   }
@@ -154,7 +154,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
         Or->takeName(Op);
         return BinaryOperator::CreateAnd(Or, AndRHS);
       }
-      
+
       ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
       if (TogetherCI && !TogetherCI->isZero()){
         // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
@@ -166,7 +166,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
         return BinaryOperator::CreateOr(And, OpRHS);
       }
     }
-    
+
     break;
   case Instruction::Add:
     if (Op->hasOneUse()) {
@@ -215,7 +215,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     if (CI->getValue() == ShlMask)
       // Masking out bits that the shift already masks.
       return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
-    
+
     if (CI != AndRHS) {                  // Reducing bits set in and.
       TheAnd.setOperand(1, CI);
       return &TheAnd;
@@ -236,7 +236,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
     if (CI->getValue() == ShrMask)
       // Masking out bits that the shift already masks.
       return ReplaceInstUsesWith(TheAnd, Op);
-    
+
     if (CI != AndRHS) {
       TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
       return &TheAnd;
@@ -269,22 +269,22 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 
 /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
 /// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
-/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
+/// (V-Lo) \<u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
 Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                      bool isSigned, bool Inside) {
-  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
-    
+
   if (Inside) {
     if (Lo == Hi)  // Trivially false.
       return ConstantInt::getFalse(V->getContext());
 
     // V >= Min && V < Hi --> V < Hi
     if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-      ICmpInst::Predicate pred = (isSigned ? 
+      ICmpInst::Predicate pred = (isSigned ?
         ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
       return Builder->CreateICmp(pred, V, Hi);
     }
@@ -302,7 +302,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
   // V < Min || V >= Hi -> V > Hi-1
   Hi = SubOne(cast<ConstantInt>(Hi));
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-    ICmpInst::Predicate pred = (isSigned ? 
+    ICmpInst::Predicate pred = (isSigned ?
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
     return Builder->CreateICmp(pred, V, Hi);
   }
@@ -327,14 +327,14 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
   // look for the first zero bit after the run of ones
   MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
   // look for the first non-zero bit
-  ME = V.getActiveBits(); 
+  ME = V.getActiveBits();
   return true;
 }
 
 /// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
 /// where isSub determines whether the operator is a sub.  If we can fold one of
 /// the following xforms:
-/// 
+///
 /// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
 /// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
 /// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
@@ -355,8 +355,8 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   case Instruction::And:
     if (ConstantExpr::getAnd(N, Mask) == Mask) {
       // If the AndRHS is a power of two minus one (0+1+), this is simple.
-      if ((Mask->getValue().countLeadingZeros() + 
-           Mask->getValue().countPopulation()) == 
+      if ((Mask->getValue().countLeadingZeros() +
+           Mask->getValue().countPopulation()) ==
           Mask->getValue().getBitWidth())
         break;
 
@@ -375,33 +375,33 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   case Instruction::Or:
   case Instruction::Xor:
     // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
-    if ((Mask->getValue().countLeadingZeros() + 
+    if ((Mask->getValue().countLeadingZeros() +
          Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
         && ConstantExpr::getAnd(N, Mask)->isNullValue())
       break;
     return 0;
   }
-  
+
   if (isSub)
     return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
   return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
 }
 
 /// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
-/// One of A and B is considered the mask, the other the value. This is 
-/// described as the "AMask" or "BMask" part of the enum. If the enum 
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
 /// contains only "Mask", then both A and B can be considered masks.
 /// If A is the mask, then it was proven, that (A & C) == C. This
 /// is trivial if C == A, or C == 0. If both A and C are constants, this
 /// proof is also easy.
 /// For the following explanations we assume that A is the mask.
-/// The part "AllOnes" declares, that the comparison is true only 
+/// The part "AllOnes" declares, that the comparison is true only
 /// if (A & B) == A, or all bits of A are set in B.
 ///   Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
-/// The part "AllZeroes" declares, that the comparison is true only 
+/// The part "AllZeroes" declares, that the comparison is true only
 /// if (A & B) == 0, or all bits of A are cleared in B.
 ///   Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
-/// The part "Mixed" declares, that (A & B) == C and C might or might not 
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
 /// contain any number of one bits and zero bits.
 ///   Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
 /// The Part "Not" means, that in above descriptions "==" should be replaced
@@ -425,16 +425,16 @@ enum MaskedICmpType {
 
 /// return the set of pattern classes (from MaskedICmpType)
 /// that (icmp SCC (A & B), C) satisfies
-static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                                     ICmpInst::Predicate SCC)
 {
   ConstantInt *ACst = dyn_cast<ConstantInt>(A);
   ConstantInt *BCst = dyn_cast<ConstantInt>(B);
   ConstantInt *CCst = dyn_cast<ConstantInt>(C);
   bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
-  bool icmp_abit = (ACst != 0 && !ACst->isZero() && 
+  bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
                     ACst->getValue().isPowerOf2());
-  bool icmp_bbit = (BCst != 0 && !BCst->isZero() && 
+  bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
                     BCst->getValue().isPowerOf2());
   unsigned result = 0;
   if (CCst != 0 && CCst->isZero()) {
@@ -449,12 +449,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                           FoldMskICmp_BMask_NotMixed));
     if (icmp_abit)
       result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
-                            FoldMskICmp_AMask_NotMixed) 
+                            FoldMskICmp_AMask_NotMixed)
                          : (FoldMskICmp_AMask_AllOnes |
                             FoldMskICmp_AMask_Mixed));
     if (icmp_bbit)
       result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
-                            FoldMskICmp_BMask_NotMixed) 
+                            FoldMskICmp_BMask_NotMixed)
                          : (FoldMskICmp_BMask_AllOnes |
                             FoldMskICmp_BMask_Mixed));
     return result;
@@ -469,26 +469,23 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                             FoldMskICmp_AMask_NotMixed)
                          : (FoldMskICmp_Mask_AllZeroes |
                             FoldMskICmp_AMask_Mixed));
-  }
-  else if (ACst != 0 && CCst != 0 &&
-        ConstantExpr::getAnd(ACst, CCst) == CCst) {
+  } else if (ACst != 0 && CCst != 0 &&
+             ConstantExpr::getAnd(ACst, CCst) == CCst) {
     result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
                        : FoldMskICmp_AMask_NotMixed);
   }
-  if (B == C) 
-  {
+  if (B == C) {
     result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
                           FoldMskICmp_BMask_Mixed)
                        : (FoldMskICmp_BMask_NotAllOnes |
                           FoldMskICmp_BMask_NotMixed));
     if (icmp_bbit)
       result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
-                            FoldMskICmp_BMask_NotMixed) 
+                            FoldMskICmp_BMask_NotMixed)
                          : (FoldMskICmp_Mask_AllZeroes |
                             FoldMskICmp_BMask_Mixed));
-  }
-  else if (BCst != 0 && CCst != 0 &&
-        ConstantExpr::getAnd(BCst, CCst) == CCst) {
+  } else if (BCst != 0 && CCst != 0 &&
+             ConstantExpr::getAnd(BCst, CCst) == CCst) {
     result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
                        : FoldMskICmp_BMask_NotMixed);
   }
@@ -531,7 +528,7 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
 /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// return the set of pattern classes (from MaskedICmpType)
 /// that both LHS and RHS satisfy
-static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
                                              Value*& B, Value*& C,
                                              Value*& D, Value*& E,
                                              ICmpInst *LHS, ICmpInst *RHS,
@@ -542,10 +539,10 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
   if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
 
   // Here comes the tricky part:
-  // LHS might be of the form L11 & L12 == X, X == L21 & L22, 
+  // LHS might be of the form L11 & L12 == X, X == L21 & L22,
   // and L11 & L12 == L21 & L22. The same goes for RHS.
   // Now we must find those components L** and R**, that are equal, so
-  // that we can extract the parameters A, B, C, D, and E for the canonical 
+  // that we can extract the parameters A, B, C, D, and E for the canonical
   // above.
   Value *L1 = LHS->getOperand(0);
   Value *L2 = LHS->getOperand(1);
@@ -610,14 +607,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
 
   if (L11 == A) {
     B = L12; C = L2;
-  }
-  else if (L12 == A) {
+  } else if (L12 == A) {
     B = L11; C = L2;
-  }
-  else if (L21 == A) {
+  } else if (L21 == A) {
     B = L22; C = L1;
-  }
-  else if (L22 == A) {
+  } else if (L22 == A) {
     B = L21; C = L1;
   }
 
@@ -643,32 +637,32 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
     mask >>= 1; // treat "Not"-states as normal states
 
   if (mask & FoldMskICmp_Mask_AllZeroes) {
-    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 
+    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
     // -> (icmp eq (A & (B|D)), 0)
     Value* newOr = Builder->CreateOr(B, D);
     Value* newAnd = Builder->CreateAnd(A, newOr);
     // we can't use C as zero, because we might actually handle
-    //   (icmp ne (A & B), B) & (icmp ne (A & D), D) 
+    //   (icmp ne (A & B), B) & (icmp ne (A & D), D)
     // with B and D, having a single bit set
     Value* zero = Constant::getNullValue(A->getType());
     return Builder->CreateICmp(NEWCC, newAnd, zero);
   }
-  else if (mask & FoldMskICmp_BMask_AllOnes) {
-    // (icmp eq (A & B), B) & (icmp eq (A & D), D) 
+  if (mask & FoldMskICmp_BMask_AllOnes) {
+    // (icmp eq (A & B), B) & (icmp eq (A & D), D)
     // -> (icmp eq (A & (B|D)), (B|D))
     Value* newOr = Builder->CreateOr(B, D);
     Value* newAnd = Builder->CreateAnd(A, newOr);
     return Builder->CreateICmp(NEWCC, newAnd, newOr);
-  }     
-  else if (mask & FoldMskICmp_AMask_AllOnes) {
-    // (icmp eq (A & B), A) & (icmp eq (A & D), A) 
+  }
+  if (mask & FoldMskICmp_AMask_AllOnes) {
+    // (icmp eq (A & B), A) & (icmp eq (A & D), A)
     // -> (icmp eq (A & (B&D)), A)
     Value* newAnd1 = Builder->CreateAnd(B, D);
     Value* newAnd = Builder->CreateAnd(A, newAnd1);
     return Builder->CreateICmp(NEWCC, newAnd, A);
   }
-  else if (mask & FoldMskICmp_BMask_Mixed) {
-    // (icmp eq (A & B), C) & (icmp eq (A & D), E) 
+  if (mask & FoldMskICmp_BMask_Mixed) {
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E)
     // We already know that B & C == C && D & E == E.
     // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
     // C and E, which are shared by both the mask B and the mask D, don't
@@ -680,7 +674,7 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
     ConstantInt *DCst = dyn_cast<ConstantInt>(D);
     if (DCst == 0) return 0;
     // we can't simply use C and E, because we might actually handle
-    //   (icmp ne (A & B), B) & (icmp eq (A & D), D) 
+    //   (icmp ne (A & B), B) & (icmp eq (A & D), D)
     // with B and D, having a single bit set
 
     ConstantInt *CCst = dyn_cast<ConstantInt>(C);
@@ -727,13 +721,13 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
   if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
     return V;
-  
+
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
   ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
   ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
   if (LHSCst == 0 || RHSCst == 0) return 0;
-  
+
   if (LHSCst == RHSCst && LHSCC == RHSCC) {
     // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
     // where C is a power of 2
@@ -742,7 +736,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
-    
+
     // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
     if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
@@ -759,14 +753,13 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
 
     // (trunc x) == C1 & (and x, CA) == C2
+    // (and x, CA) == C2 & (trunc x) == C1
     if (match(Val2, m_Trunc(m_Value(V))) &&
         match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
       SmallCst = RHSCst;
       BigCst = LHSCst;
-    }
-    // (and x, CA) == C2 & (trunc x) == C1
-    else if (match(Val, m_Trunc(m_Value(V))) &&
-             match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
+    } else if (match(Val, m_Trunc(m_Value(V))) &&
+               match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
       SmallCst = LHSCst;
       BigCst = RHSCst;
     }
@@ -789,7 +782,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // From here on, we only handle:
   //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return 0;
-  
+
   // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
   if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
       RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
@@ -799,9 +792,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 
   // Make a constant range that's the intersection of the two icmp ranges.
   // If the intersection is empty, we know that the result is false.
-  ConstantRange LHSRange = 
+  ConstantRange LHSRange =
     ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
-  ConstantRange RHSRange = 
+  ConstantRange RHSRange =
     ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
 
   if (LHSRange.intersectWith(RHSRange).isEmptySet())
@@ -810,16 +803,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // We can't fold (ugt x, C) & (sgt x, C2).
   if (!PredicatesFoldable(LHSCC, RHSCC))
     return 0;
-    
+
   // Ensure that the larger constant is on the RHS.
   bool ShouldSwap;
   if (CmpInst::isSigned(LHSCC) ||
-      (ICmpInst::isEquality(LHSCC) && 
+      (ICmpInst::isEquality(LHSCC) &&
        CmpInst::isSigned(RHSCC)))
     ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
   else
     ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
-    
+
   if (ShouldSwap) {
     std::swap(LHS, RHS);
     std::swap(LHSCst, RHSCst);
@@ -829,8 +822,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // At this point, we know we have two icmp instructions
   // comparing a value against two constants and and'ing the result
   // together.  Because of the above check, we know that we only have
-  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
-  // (from the icmp folding check above), that the two constants 
+  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
+  // (from the icmp folding check above), that the two constants
   // are not equal and that the larger constant is on the RHS
   assert(LHSCst != RHSCst && "Compares not folded above?");
 
@@ -932,7 +925,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     }
     break;
   }
- 
+
   return 0;
 }
 
@@ -951,7 +944,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
           return ConstantInt::getFalse(LHS->getContext());
         return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
       }
-    
+
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp ord x,x" is "fcmp ord x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
@@ -959,18 +952,18 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
       return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
     return 0;
   }
-  
+
   Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
   Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
   FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-  
-  
+
+
   if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
     // Swap RHS operands to match LHS.
     Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
     std::swap(Op1LHS, Op1RHS);
   }
-  
+
   if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
     // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
     if (Op0CC == Op1CC)
@@ -981,7 +974,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
       return RHS;
     if (Op1CC == FCmpInst::FCMP_TRUE)
       return LHS;
-    
+
     bool Op0Ordered;
     bool Op1Ordered;
     unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
@@ -1001,7 +994,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
         return LHS;
       if (Op0Ordered && (Op0Ordered == Op1Ordered))
         return RHS;
-      
+
       // uno && oeq -> uno && (ord && eq) -> false
       if (!Op0Ordered)
         return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1025,10 +1018,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return ReplaceInstUsesWith(I, V);
 
-  // See if we can simplify any instructions used by the instruction whose sole 
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
-    return &I;  
+    return &I;
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
     const APInt &AndRHSMask = AndRHS->getValue();
@@ -1043,7 +1036,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       case Instruction::Or: {
         // If the mask is only needed on one incoming arm, push it up.
         if (!Op0I->hasOneUse()) break;
-          
+
         APInt NotAndRHS(~AndRHSMask);
         if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
           // Not masking anything out for the LHS, move to RHS.
@@ -1103,12 +1096,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         }
         break;
       }
-          
+
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
         if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
           return Res;
     }
-    
+
     // If this is an integer truncation, and if the source is an 'and' with
     // immediate, transform it.  This frequently occurs for bitfield accesses.
     {
@@ -1116,7 +1109,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
         // Change: and (trunc (and X, YC) to T), C2
         // into  : and (trunc X to T), trunc(YC) & C2
-        // This will fold the two constants together, which may allow 
+        // This will fold the two constants together, which may allow
         // other simplifications.
         Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
         Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
@@ -1143,7 +1136,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                                       I.getName()+".demorgan");
         return BinaryOperator::CreateNot(Or);
       }
-  
+
   {
     Value *A = 0, *B = 0, *C = 0, *D = 0;
     // (A|B) & ~(A&B) -> A^B
@@ -1151,13 +1144,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
         ((A == C && B == D) || (A == D && B == C)))
       return BinaryOperator::CreateXor(A, B);
-    
+
     // ~(A&B) & (A|B) -> A^B
     if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
         match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
         ((A == C && B == D) || (A == D && B == C)))
       return BinaryOperator::CreateXor(A, B);
-    
+
     // A&(A^B) => A & ~B
     {
       Value *tmpOp0 = Op0;
@@ -1193,19 +1186,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
       return BinaryOperator::CreateAnd(A, Op0);
   }
-  
+
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
       if (Value *Res = FoldAndOfICmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-  
+
   // If and'ing two fcmp, try combine them into one.
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
       if (Value *Res = FoldAndOfFCmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-  
-  
+
+
   // fold (and (cast A), (cast B)) -> (cast (and A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
@@ -1214,21 +1207,21 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
           SrcTy == Op1C->getOperand(0)->getType() &&
           SrcTy->isIntOrIntVectorTy()) {
         Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
-        
+
         // Only do this if the casts both really cause code to be generated.
         if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
             ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
           Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
-        
+
         // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
           if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
             if (Value *Res = FoldAndOfICmps(LHS, RHS))
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-        
+
         // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
@@ -1237,21 +1230,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
       }
     }
-    
+
   // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
   if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
     if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
-      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
         Value *NewOp =
           Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
                              SI0->getName());
-        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
                                       SI1->getOperand(1));
       }
   }
 
+  {
+    Value *X = 0;
+    bool OpsSwapped = false;
+    // Canonicalize SExt or Not to the LHS
+    if (match(Op1, m_SExt(m_Value())) ||
+        match(Op1, m_Not(m_Value()))) {
+      std::swap(Op0, Op1);
+      OpsSwapped = true;
+    }
+
+    // Fold (and (sext bool to A), B) --> (select bool, B, 0)
+    if (match(Op0, m_SExt(m_Value(X))) &&
+        X->getType()->getScalarType()->isIntegerTy(1)) {
+      Value *Zero = Constant::getNullValue(Op1->getType());
+      return SelectInst::Create(X, Op1, Zero);
+    }
+
+    // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
+    if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
+        X->getType()->getScalarType()->isIntegerTy(1)) {
+      Value *Zero = Constant::getNullValue(Op0->getType());
+      return SelectInst::Create(X, Zero, Op1);
+    }
+
+    if (OpsSwapped)
+      std::swap(Op0, Op1);
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -1288,11 +1309,11 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
              CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
                                ByteValues);
     }
-  
+
     // If this is a logical shift by a constant multiple of 8, recurse with
     // OverallLeftShift and ByteMask adjusted.
     if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
-      unsigned ShAmt = 
+      unsigned ShAmt =
         cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
       // Ensure the shift amount is defined and of a byte value.
       if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
@@ -1313,7 +1334,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
       if (OverallLeftShift >= (int)ByteValues.size()) return true;
       if (OverallLeftShift <= -(int)ByteValues.size()) return true;
 
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
                                ByteValues);
     }
 
@@ -1325,20 +1346,20 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
       unsigned NumBytes = ByteValues.size();
       APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
       const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
-      
+
       for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
         // If this byte is masked out by a later operation, we don't care what
         // the and mask is.
         if ((ByteMask & (1 << i)) == 0)
           continue;
-        
+
         // If the AndMask is all zeros for this byte, clear the bit.
         APInt MaskB = AndMask & Byte;
         if (MaskB == 0) {
           ByteMask &= ~(1U << i);
           continue;
         }
-        
+
         // If the AndMask is not all ones for this byte, it's not a bytezap.
         if (MaskB != Byte)
           return true;
@@ -1346,11 +1367,11 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
         // Otherwise, this byte is kept.
       }
 
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
                                ByteValues);
     }
   }
-  
+
   // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
   // the input value to the bswap.  Some observations: 1) if more than one byte
   // is demanded from this input, then it could not be successfully assembled
@@ -1358,7 +1379,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
   // their ultimate destination.
   if (!isPowerOf2_32(ByteMask)) return true;
   unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
-  
+
   // 2) The input and ultimate destinations must line up: if byte 3 of an i32
   // is demanded, it needs to go into byte 0 of the result.  This means that the
   // byte needs to be shifted until it lands in the right byte bucket.  The
@@ -1368,7 +1389,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
   unsigned DestByteNo = InputByteNo + OverallLeftShift;
   if (ByteValues.size()-1-DestByteNo != InputByteNo)
     return true;
-  
+
   // If the destination byte value is already defined, the values are or'd
   // together, which isn't a bswap (unless it's an or of the same bits).
   if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
@@ -1381,25 +1402,25 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
 /// If so, insert the new bswap intrinsic and return it.
 Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
-  if (!ITy || ITy->getBitWidth() % 16 || 
+  if (!ITy || ITy->getBitWidth() % 16 ||
       // ByteMask only allows up to 32-byte values.
-      ITy->getBitWidth() > 32*8) 
+      ITy->getBitWidth() > 32*8)
     return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
-  
+
   /// ByteValues - For each byte of the result, we keep track of which value
   /// defines each byte.
   SmallVector<Value*, 8> ByteValues;
   ByteValues.resize(ITy->getBitWidth()/8);
-    
+
   // Try to find all the pieces corresponding to the bswap.
   uint32_t ByteMask = ~0U >> (32-ByteValues.size());
   if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
     return 0;
-  
+
   // Check to see if all of the bytes come from the same value.
   Value *V = ByteValues[0];
   if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
-  
+
   // Check to make sure that all of the bytes come from the same value.
   for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
     if (ByteValues[i] != V)
@@ -1425,7 +1446,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
     return SelectInst::Create(Cond, C, B);
   if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, B);
-  
+
   // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
   if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
     return SelectInst::Create(Cond, C, D);
@@ -1483,33 +1504,33 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   // From here on, we only handle:
   //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return 0;
-  
+
   // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
   if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
       RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
       LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
       RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
     return 0;
-  
+
   // We can't fold (ugt x, C) | (sgt x, C2).
   if (!PredicatesFoldable(LHSCC, RHSCC))
     return 0;
-  
+
   // Ensure that the larger constant is on the RHS.
   bool ShouldSwap;
   if (CmpInst::isSigned(LHSCC) ||
-      (ICmpInst::isEquality(LHSCC) && 
+      (ICmpInst::isEquality(LHSCC) &&
        CmpInst::isSigned(RHSCC)))
     ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
   else
     ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
-  
+
   if (ShouldSwap) {
     std::swap(LHS, RHS);
     std::swap(LHSCst, RHSCst);
     std::swap(LHSCC, RHSCC);
   }
-  
+
   // At this point, we know we have two icmp instructions
   // comparing a value against two constants and or'ing the result
   // together.  Because of the above check, we know that we only have
@@ -1531,6 +1552,20 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
         return Builder->CreateICmpULT(Add, AddCST);
       }
+
+      if (LHS->getOperand(0) == RHS->getOperand(0)) {
+        // if LHSCst and RHSCst differ only by one bit:
+        // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
+        assert(LHSCst->getValue().ule(LHSCst->getValue()));
+
+        APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
+        if (Xor.isPowerOf2()) {
+          Value *NegCst = Builder->getInt(~Xor);
+          Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst);
+          return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst);
+        }
+      }
+
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
     case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
@@ -1632,7 +1667,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 /// function.
 Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      RHS->getPredicate() == FCmpInst::FCMP_UNO &&
       LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
     if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
       if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1640,25 +1675,25 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
         // true.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
           return ConstantInt::getTrue(LHS->getContext());
-        
+
         // Otherwise, no need to compare the two constants, compare the
         // rest.
         return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
       }
-    
+
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp uno x,x" is "fcmp uno x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
       return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
-    
+
     return 0;
   }
-  
+
   Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
   Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
   FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
-  
+
   if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
     // Swap RHS operands to match LHS.
     Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
@@ -1692,7 +1727,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
 ///     ((A | B) & C1) | (B & C2)
 ///
 /// into:
-/// 
+///
 ///     (A & C1) | B
 ///
 /// when the XOR of the two constants is "all ones" (-1).
@@ -1727,7 +1762,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return ReplaceInstUsesWith(I, V);
 
-  // See if we can simplify any instructions used by the instruction whose sole 
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
@@ -1741,7 +1776,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         Op0->hasOneUse()) {
       Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
-      return BinaryOperator::CreateAnd(Or, 
+      return BinaryOperator::CreateAnd(Or,
                          ConstantInt::get(I.getContext(),
                                           RHS->getValue() | C1->getValue()));
     }
@@ -1778,7 +1813,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (Instruction *BSwap = MatchBSwap(I))
       return BSwap;
   }
-  
+
   // (X^C)|Y -> (X|Y)^C iff Y&C == 0
   if (Op0->hasOneUse() &&
       match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
@@ -1827,7 +1862,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
             return ReplaceInstUsesWith(I, B);
         }
       }
-      
+
       if ((C1->getValue() & C2->getValue()) == 0) {
         // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
         // iff (C1&C2) == 0 and (N&~C1) == 0
@@ -1844,7 +1879,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
           return BinaryOperator::CreateAnd(B,
                                ConstantInt::get(B->getContext(),
                                                 C1->getValue()|C2->getValue()));
-        
+
         // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
         // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
         ConstantInt *C3 = 0, *C4 = 0;
@@ -1904,16 +1939,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       if (Ret) return Ret;
     }
   }
-  
+
   // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
   if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
     if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
-      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
         Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
                                          SI0->getName());
-        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp,
                                       SI1->getOperand(1));
       }
   }
@@ -1975,13 +2010,13 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
       if (Value *Res = FoldOrOfICmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-    
+
   // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
       if (Value *Res = FoldOrOfFCmps(LHS, RHS))
         return ReplaceInstUsesWith(I, Res);
-  
+
   // fold (or (cast A), (cast B)) -> (cast (or A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
     CastInst *Op1C = dyn_cast<CastInst>(Op1);
@@ -1999,14 +2034,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
           Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
-        
+
         // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
           if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
             if (Value *Res = FoldOrOfICmps(LHS, RHS))
               return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-        
+
         // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
         // cast is otherwise not optimizable.  This happens for vector sexts.
         if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
@@ -2035,7 +2070,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     Inner->takeName(Op0);
     return BinaryOperator::CreateOr(Inner, C1);
   }
-  
+
+  // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
+  // Since this OR statement hasn't been optimized further yet, we hope
+  // that this transformation will allow the new ORs to be optimized.
+  {
+    Value *X = 0, *Y = 0;
+    if (Op0->hasOneUse() && Op1->hasOneUse() &&
+        match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
+        match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
+      Value *orTrue = Builder->CreateOr(A, C);
+      Value *orFalse = Builder->CreateOr(B, D);
+      return SelectInst::Create(X, orTrue, orFalse);
+    }
+  }
+
   return Changed ? &I : 0;
 }
 
@@ -2050,7 +2099,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return ReplaceInstUsesWith(I, V);
 
-  // See if we can simplify any instructions used by the instruction whose sole 
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
@@ -2058,7 +2107,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   // Is this a ~ operation?
   if (Value *NotOp = dyn_castNotVal(&I)) {
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
-      if (Op0I->getOpcode() == Instruction::And || 
+      if (Op0I->getOpcode() == Instruction::And ||
           Op0I->getOpcode() == Instruction::Or) {
         // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
         // ~(~X | Y) === (X & ~Y) - De Morgan's Law
@@ -2072,10 +2121,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             return BinaryOperator::CreateOr(Op0NotVal, NotY);
           return BinaryOperator::CreateAnd(Op0NotVal, NotY);
         }
-        
+
         // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
         // ~(X | Y) === (~X & ~Y) - De Morgan's Law
-        if (isFreeToInvert(Op0I->getOperand(0)) && 
+        if (isFreeToInvert(Op0I->getOperand(0)) &&
             isFreeToInvert(Op0I->getOperand(1))) {
           Value *NotX =
             Builder->CreateNot(Op0I->getOperand(0), "notlhs");
@@ -2093,8 +2142,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
   }
-  
-  
+
+
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     if (RHS->isOne() && Op0->hasOneUse())
       // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
@@ -2109,7 +2158,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (CI->hasOneUse() && Op0C->hasOneUse()) {
           Instruction::CastOps Opcode = Op0C->getOpcode();
           if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
-              (RHS == ConstantExpr::getCast(Opcode, 
+              (RHS == ConstantExpr::getCast(Opcode,
                                            ConstantInt::getTrue(I.getContext()),
                                             Op0C->getDestTy()))) {
             CI->setPredicate(CI->getInversePredicate());
@@ -2128,7 +2177,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                                       ConstantInt::get(I.getType(), 1));
           return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
         }
-          
+
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
@@ -2152,13 +2201,34 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             // Anything in both C1 and C2 is known to be zero, remove it from
             // NewRHS.
             Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
-            NewRHS = ConstantExpr::getAnd(NewRHS, 
+            NewRHS = ConstantExpr::getAnd(NewRHS,
                                        ConstantExpr::getNot(CommonBits));
             Worklist.Add(Op0I);
             I.setOperand(0, Op0I->getOperand(0));
             I.setOperand(1, NewRHS);
             return &I;
           }
+        } else if (Op0I->getOpcode() == Instruction::LShr) {
+          // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+          // E1 = "X ^ C1"
+          BinaryOperator *E1;
+          ConstantInt *C1;
+          if (Op0I->hasOneUse() &&
+              (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
+              E1->getOpcode() == Instruction::Xor &&
+              (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
+            // fold (C1 >> C2) ^ C3
+            ConstantInt *C2 = Op0CI, *C3 = RHS;
+            APInt FoldConst = C1->getValue().lshr(C2->getValue());
+            FoldConst ^= C3->getValue();
+            // Prepare the two operands.
+            Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+            Opnd0->takeName(Op0I);
+            cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
+            Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
+
+            return BinaryOperator::CreateXor(Opnd0, FoldVal);
+          }
         }
       }
     }
@@ -2184,7 +2254,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         I.swapOperands();     // Simplified below.
         std::swap(Op0, Op1);
       }
-    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
                Op1I->hasOneUse()){
       if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
         Op1I->swapOperands();
@@ -2196,7 +2266,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
   }
-  
+
   BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
   if (Op0I) {
     Value *A, *B;
@@ -2206,7 +2276,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         std::swap(A, B);
       if (B == Op1)                                  // (A|B)^B == A & ~B
         return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
-    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
                Op0I->hasOneUse()){
       if (A == Op1)                                        // (A&B)^A -> (B&A)^A
         std::swap(A, B);
@@ -2216,31 +2286,31 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       }
     }
   }
-  
+
   // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
-  if (Op0I && Op1I && Op0I->isShift() && 
-      Op0I->getOpcode() == Op1I->getOpcode() && 
+  if (Op0I && Op1I && Op0I->isShift() &&
+      Op0I->getOpcode() == Op1I->getOpcode() &&
       Op0I->getOperand(1) == Op1I->getOperand(1) &&
       (Op0I->hasOneUse() || Op1I->hasOneUse())) {
     Value *NewOp =
       Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
                          Op0I->getName());
-    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp,
                                   Op1I->getOperand(1));
   }
-    
+
   if (Op0I && Op1I) {
     Value *A, *B, *C, *D;
     // (A & B)^(A | B) -> A ^ B
     if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
         match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
-      if ((A == C && B == D) || (A == D && B == C)) 
+      if ((A == C && B == D) || (A == D && B == C))
         return BinaryOperator::CreateXor(A, B);
     }
     // (A | B)^(A & B) -> A ^ B
     if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
         match(Op1I, m_And(m_Value(C), m_Value(D)))) {
-      if ((A == C && B == D) || (A == D && B == C)) 
+      if ((A == C && B == D) || (A == D && B == C))
         return BinaryOperator::CreateXor(A, B);
     }
   }
@@ -2257,7 +2327,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
           Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
           unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
           bool isSigned = LHS->isSigned() || RHS->isSigned();
-          return ReplaceInstUsesWith(I, 
+          return ReplaceInstUsesWith(I,
                                getNewICmpValue(isSigned, Code, Op0, Op1,
                                                Builder));
         }
@@ -2270,9 +2340,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         Type *SrcTy = Op0C->getOperand(0)->getType();
         if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
             // Only do this if the casts both really cause code to be generated.
-            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0),
                                I.getType()) &&
-            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0),
                                I.getType())) {
           Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
                                             Op1C->getOperand(0), I.getName());
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 48f270429e5a..64cd1bd27891 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,12 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
+using namespace PatternMatch;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
 
 /// getPromotedType - Return the specified type promoted as it would be to pass
 /// though a va_arg area.
@@ -273,25 +278,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
     return 0;
   }
-  case Intrinsic::bswap:
+  case Intrinsic::bswap: {
+    Value *IIOperand = II->getArgOperand(0);
+    Value *X = 0;
+
     // bswap(bswap(x)) -> x
-    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
-      if (Operand->getIntrinsicID() == Intrinsic::bswap)
-        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+    if (match(IIOperand, m_BSwap(m_Value(X))))
+        return ReplaceInstUsesWith(CI, X);
 
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
-    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
-      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
-        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
-          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
-                       TI->getType()->getPrimitiveSizeInBits();
-          Value *CV = ConstantInt::get(Operand->getType(), C);
-          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
-          return new TruncInst(V, TI->getType());
-        }
+    if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
+      unsigned C = X->getType()->getPrimitiveSizeInBits() -
+        IIOperand->getType()->getPrimitiveSizeInBits();
+      Value *CV = ConstantInt::get(X->getType(), C);
+      Value *V = Builder->CreateLShr(X, CV);
+      return new TruncInst(V, IIOperand->getType());
     }
-
     break;
+  }
+
   case Intrinsic::powi:
     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // powi(x, 0) -> 1.0
@@ -690,7 +695,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         if (Splat->isOne()) {
           if (Zext)
             return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
-          // else    
+          // else
           return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
         }
       }
@@ -785,8 +790,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
   if (CI->getCalledFunction() == 0) return 0;
 
-  if (Value *With = Simplifier->optimizeCall(CI))
-    return ReplaceInstUsesWith(*CI, With);
+  if (Value *With = Simplifier->optimizeCall(CI)) {
+    ++NumSimplified;
+    return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
+  }
 
   return 0;
 }
@@ -894,7 +901,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       new StoreInst(ConstantInt::getTrue(Callee->getContext()),
                 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
                                   OldCall);
-      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // If OldCall does not return void then replaceAllUsesWith undef.
       // This allows ValueHandlers and custom metadata to adjust itself.
       if (!OldCall->getType()->isVoidTy())
         ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
@@ -977,7 +984,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   if (Callee == 0)
     return false;
   Instruction *Caller = CS.getInstruction();
-  const AttrListPtr &CallerPAL = CS.getAttributes();
+  const AttributeSet &CallerPAL = CS.getAttributes();
 
   // Okay, this is a cast from a function to a different type.  Unless doing so
   // would cause a type conversion of one of our arguments, change this call to
@@ -1007,8 +1014,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this return value.
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
-      AttrBuilder RAttrs = CallerPAL.getRetAttributes();
-      if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy)))
+      AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+      if (RAttrs.
+          hasAttributes(AttributeFuncs::
+                        typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+                        AttributeSet::ReturnIndex))
         return false;   // Attribute not compatible with transformed value.
     }
 
@@ -1037,14 +1047,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (!CastInst::isCastable(ActTy, ParamTy))
       return false;   // Cannot transform this parameter value.
 
-    Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
-    if (AttrBuilder(Attrs).
-          hasAttributes(Attributes::typeIncompatible(ParamTy)))
+    if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
+          hasAttributes(AttributeFuncs::
+                        typeIncompatible(ParamTy, i + 1), i + 1))
       return false;   // Attribute not compatible with transformed value.
 
     // If the parameter is passed as a byval argument, then we have to have a
     // sized type and the sized type has to have the same size as the old type.
-    if (ParamTy != ActTy && Attrs.hasAttribute(Attributes::ByVal)) {
+    if (ParamTy != ActTy &&
+        CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
+                                                         Attribute::ByVal)) {
       PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
       if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
         return false;
@@ -1093,10 +1105,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     // won't be dropping them.  Check that these extra arguments have attributes
     // that are compatible with being a vararg call argument.
     for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
-      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+      unsigned Index = CallerPAL.getSlotIndex(i - 1);
+      if (Index <= FT->getNumParams())
         break;
-      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
-      if (PAttrs.hasIncompatibleWithVarArgsAttrs())
+
+      // Check if it has an attribute that's incompatible with varargs.
+      AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
+      if (PAttrs.hasAttribute(Index, Attribute::StructRet))
         return false;
     }
 
@@ -1105,21 +1120,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // inserting cast instructions as necessary.
   std::vector<Value*> Args;
   Args.reserve(NumActualArgs);
-  SmallVector<AttributeWithIndex, 8> attrVec;
+  SmallVector<AttributeSet, 8> attrVec;
   attrVec.reserve(NumCommonArgs);
 
   // Get any return attributes.
-  AttrBuilder RAttrs = CallerPAL.getRetAttributes();
+  AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
 
   // If the return value is not being used, the type may not be compatible
   // with the existing attributes.  Wipe out any problematic attributes.
-  RAttrs.removeAttributes(Attributes::typeIncompatible(NewRetTy));
+  RAttrs.
+    removeAttributes(AttributeFuncs::
+                     typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+                     AttributeSet::ReturnIndex);
 
   // Add the new return attributes.
   if (RAttrs.hasAttributes())
-    attrVec.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                              Attributes::get(FT->getContext(), RAttrs)));
+    attrVec.push_back(AttributeSet::get(Caller->getContext(),
+                                        AttributeSet::ReturnIndex, RAttrs));
 
   AI = CS.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1133,9 +1150,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     }
 
     // Add any parameter attributes.
-    Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+    AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
     if (PAttrs.hasAttributes())
-      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+      attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
+                                          PAttrs));
   }
 
   // If the function takes more arguments than the call was taking, add them
@@ -1145,10 +1163,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // If we are removing arguments to the function, emit an obnoxious warning.
   if (FT->getNumParams() < NumActualArgs) {
-    if (!FT->isVarArg()) {
-      errs() << "WARNING: While resolving call to function '"
-             << Callee->getName() << "' arguments were dropped!\n";
-    } else {
+    // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
+    if (FT->isVarArg()) {
       // Add all of the arguments in their promoted form to the arg list.
       for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
         Type *PTy = getPromotedType((*AI)->getType());
@@ -1162,23 +1178,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         }
 
         // Add any parameter attributes.
-        Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+        AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
         if (PAttrs.hasAttributes())
-          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+          attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
+                                              PAttrs));
       }
     }
   }
 
-  Attributes FnAttrs = CallerPAL.getFnAttributes();
-  if (FnAttrs.hasAttributes())
-    attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                              FnAttrs));
+  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+  if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
+    attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
 
   if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
 
-  const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(),
-                                                     attrVec);
+  const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
+                                                       attrVec);
 
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -1238,13 +1254,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   Value *Callee = CS.getCalledValue();
   PointerType *PTy = cast<PointerType>(Callee->getType());
   FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-  const AttrListPtr &Attrs = CS.getAttributes();
+  const AttributeSet &Attrs = CS.getAttributes();
 
   // If the call already has the 'nest' attribute somewhere then give up -
   // otherwise 'nest' would occur twice after splicing in the chain.
-  for (unsigned I = 0, E = Attrs.getNumAttrs(); I != E; ++I)
-    if (Attrs.getAttributesAtIndex(I).hasAttribute(Attributes::Nest))
-      return 0;
+  if (Attrs.hasAttrSomewhere(Attribute::Nest))
+    return 0;
 
   assert(Tramp &&
          "transformCallThroughTrampoline called with incorrect CallSite.");
@@ -1253,16 +1268,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   PointerType *NestFPTy = cast<PointerType>(NestF->getType());
   FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
 
-  const AttrListPtr &NestAttrs = NestF->getAttributes();
+  const AttributeSet &NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
     Type *NestTy = 0;
-    Attributes NestAttr;
+    AttributeSet NestAttr;
 
     // Look for a parameter marked with the 'nest' attribute.
     for (FunctionType::param_iterator I = NestFTy->param_begin(),
          E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
-      if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attributes::Nest)){
+      if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
         // Record the parameter type and any other attributes.
         NestTy = *I;
         NestAttr = NestAttrs.getParamAttributes(NestIdx);
@@ -1274,17 +1289,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       std::vector<Value*> NewArgs;
       NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
 
-      SmallVector<AttributeWithIndex, 8> NewAttrs;
+      SmallVector<AttributeSet, 8> NewAttrs;
       NewAttrs.reserve(Attrs.getNumSlots() + 1);
 
       // Insert the nest argument into the call argument list, which may
       // mean appending it.  Likewise for attributes.
 
       // Add any result attributes.
-      Attributes Attr = Attrs.getRetAttributes();
-      if (Attr.hasAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
-                                                   Attr));
+      if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+        NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+                                             Attrs.getRetAttributes()));
 
       {
         unsigned Idx = 1;
@@ -1296,7 +1310,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
             if (NestVal->getType() != NestTy)
               NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
             NewArgs.push_back(NestVal);
-            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+            NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+                                                 NestAttr));
           }
 
           if (I == E)
@@ -1304,20 +1319,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
           // Add the original argument and attributes.
           NewArgs.push_back(*I);
-          Attr = Attrs.getParamAttributes(Idx);
-          if (Attr.hasAttributes())
-            NewAttrs.push_back
-              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+          AttributeSet Attr = Attrs.getParamAttributes(Idx);
+          if (Attr.hasAttributes(Idx)) {
+            AttrBuilder B(Attr, Idx);
+            NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+                                                 Idx + (Idx >= NestIdx), B));
+          }
 
           ++Idx, ++I;
         } while (1);
       }
 
       // Add any function attributes.
-      Attr = Attrs.getFnAttributes();
-      if (Attr.hasAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
-                                                   Attr));
+      if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+        NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
+                                             Attrs.getFnAttributes()));
 
       // The trampoline may have been bitcast to a bogus type (FTy).
       // Handle this by synthesizing a new function type, equal to FTy
@@ -1356,7 +1372,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
         NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
-      const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs);
+      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index bb59db8e7ba1..2ee1278d23dc 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -13,9 +13,9 @@
 
 #include "InstCombine.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
     Scale  = 0;
     return ConstantInt::get(Val->getType(), 0);
   }
-  
+
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
     // Cannot look past anything that might overflow.
     OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
@@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
         Offset = 0;
         return I->getOperand(0);
       }
-      
+
       if (I->getOpcode() == Instruction::Mul) {
         // This value is scaled by 'RHS'.
         Scale = RHS->getZExtValue();
         Offset = 0;
         return I->getOperand(0);
       }
-      
+
       if (I->getOpcode() == Instruction::Add) {
-        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // We have X+C.  Check to see if we really have (X*C2)+C1,
         // where C1 is divisible by C2.
         unsigned SubScale;
-        Value *SubVal = 
+        Value *SubVal =
           DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
         Offset += RHS->getZExtValue();
         Scale = SubScale;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   if (!TD) return 0;
 
   PointerType *PTy = cast<PointerType>(CI.getType());
-  
+
   BuilderTy AllocaBuilder(*Builder);
   AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
 
@@ -104,13 +104,19 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
   if (CastElTySize == 0 || AllocElTySize == 0) return 0;
 
+  // If the allocation has multiple uses, only promote it if we're not
+  // shrinking the amount of memory being allocated.
+  uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+  uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+  if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
   // See if we can satisfy the modulus by pulling a scale out of the array
   // size argument.
   unsigned ArraySizeScale;
   uint64_t ArrayOffset;
   Value *NumElements = // See if the array size is a decomposable linear expr.
     DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
- 
+
   // If we can now satisfy the modulus, by using a non-1 scale, we really can
   // do the xform.
   if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
@@ -125,17 +131,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
     // Insert before the alloca, not before the cast.
     Amt = AllocaBuilder.CreateMul(Amt, NumElements);
   }
-  
+
   if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
     Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
                                   Offset, true);
     Amt = AllocaBuilder.CreateAdd(Amt, Off);
   }
-  
+
   AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
   New->setAlignment(AI.getAlignment());
   New->takeName(&AI);
-  
+
   // If the allocation has multiple real uses, insert a cast and change all
   // things that used it to use the new cast.  This will also hack on CI, but it
   // will die soon.
@@ -148,10 +154,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   return ReplaceInstUsesWith(CI, New);
 }
 
-/// EvaluateInDifferentType - Given an expression that 
+/// EvaluateInDifferentType - Given an expression that
 /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
 /// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, 
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
@@ -181,7 +187,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
     Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
     break;
-  }    
+  }
   case Instruction::Trunc:
   case Instruction::ZExt:
   case Instruction::SExt:
@@ -190,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     // new.
     if (I->getOperand(0)->getType() == Ty)
       return I->getOperand(0);
-    
+
     // Otherwise, must be the same type of cast, so just reinsert a new one.
     // This also handles the case of zext(trunc(x)) -> zext(x).
     Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
@@ -212,11 +218,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     Res = NPN;
     break;
   }
-  default: 
+  default:
     // TODO: Can handle more cases here.
     llvm_unreachable("Unreachable!");
   }
-  
+
   Res->takeName(I);
   return InsertNewInstWith(Res, *I);
 }
@@ -224,7 +230,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
 
 /// This function is a wrapper around CastInst::isEliminableCastPair. It
 /// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps 
+static Instruction::CastOps
 isEliminableCastPair(
   const CastInst *CI, ///< The first cast instruction
   unsigned opcode,       ///< The opcode of the second cast instruction
@@ -253,7 +259,7 @@ isEliminableCastPair(
   if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
       (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
     Res = 0;
-  
+
   return Instruction::CastOps(Res);
 }
 
@@ -265,18 +271,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
                                       Type *Ty) {
   // Noop casts and casts of constants should be eliminated trivially.
   if (V->getType() == Ty || isa<Constant>(V)) return false;
-  
+
   // If this is another cast that can be eliminated, we prefer to have it
   // eliminated.
   if (const CastInst *CI = dyn_cast<CastInst>(V))
     if (isEliminableCastPair(CI, opc, Ty, TD))
       return false;
-  
+
   // If this is a vector sext from a compare, then we don't want to break the
   // idiom where each element of the extended vector is either zero or all ones.
   if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
     return false;
-  
+
   return true;
 }
 
@@ -288,7 +294,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
   // eliminate it now.
   if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
-    if (Instruction::CastOps opc = 
+    if (Instruction::CastOps opc =
         isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
       // The first cast (CSrc) is eliminable so we need to fix up or replace
       // the second cast (CI). CSrc will then have a good chance of being dead.
@@ -311,7 +317,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
       if (Instruction *NV = FoldOpIntoPhi(CI))
         return NV;
   }
-  
+
   return 0;
 }
 
@@ -330,15 +336,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
   // We can always evaluate constants in another type.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   Type *OrigTy = V->getType();
-  
+
   // If this is an extension from the dest type, we can eliminate it, even if it
   // has multiple uses.
-  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 
+  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
       I->getOperand(0)->getType() == Ty)
     return true;
 
@@ -423,29 +429,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
     // TODO: Can handle more cases here.
     break;
   }
-  
+
   return false;
 }
 
 Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
-  
-  // See if we can simplify any instructions used by the input whose sole 
+
+  // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
-  
+
   Value *Src = CI.getOperand(0);
   Type *DestTy = CI.getType(), *SrcTy = Src->getType();
-  
+
   // Attempt to truncate the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
       CanEvaluateTruncated(Src, DestTy)) {
-      
+
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -462,7 +468,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
-  
+
   // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
   Value *A = 0; ConstantInt *Cst = 0;
   if (Src->hasOneUse() &&
@@ -472,7 +478,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
     // between ASize and ResultSize.
     unsigned ASize = A->getType()->getPrimitiveSizeInBits();
-    
+
     // If the shift amount is larger than the size of A, then the result is
     // known to be zero because all the input bits got shifted out.
     if (Cst->getZExtValue() >= ASize)
@@ -485,7 +491,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     Shift->takeName(Src);
     return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
   }
-  
+
   // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
   // type isn't non-native.
   if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
@@ -508,7 +514,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
   // cast to integer to avoid the comparison.
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
     const APInt &Op1CV = Op1C->getValue();
-      
+
     // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
     // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
     if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
@@ -538,14 +544,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
     // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
     // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
     // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
         // This only works for EQ and NE
         ICI->isEquality()) {
       // If Op1C some other power of two, convert:
       uint32_t BitWidth = Op1C->getType()->getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
       ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
-        
+
       APInt KnownZeroMask(~KnownZero);
       if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
         if (!DoXform) return ICI;
@@ -559,7 +565,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
           Res = ConstantExpr::getZExt(Res, CI.getType());
           return ReplaceInstUsesWith(CI, Res);
         }
-          
+
         uint32_t ShiftAmt = KnownZeroMask.logBase2();
         Value *In = ICI->getOperand(0);
         if (ShiftAmt) {
@@ -568,12 +574,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
           In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
                                    In->getName()+".lobit");
         }
-          
+
         if ((Op1CV != 0) == isNE) { // Toggle the low bit.
           Constant *One = ConstantInt::get(In->getType(), 1);
           In = Builder->CreateXor(In, One);
         }
-          
+
         if (CI.getType() == In->getType())
           return ReplaceInstUsesWith(CI, In);
         return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
@@ -646,19 +652,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
   BitsToClear = 0;
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If the input is a truncate from the destination type, we can trivially
   // eliminate it.
   if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
     return true;
-  
+
   // We can't extend or shrink something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
-  
+
   unsigned Opc = I->getOpcode(), Tmp;
   switch (Opc) {
   case Instruction::ZExt:  // zext(zext(x)) -> zext(x).
@@ -678,7 +684,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
     // These can all be promoted if neither operand has 'bits to clear'.
     if (BitsToClear == 0 && Tmp == 0)
       return true;
-      
+
     // If the operation is an AND/OR/XOR and the bits to clear are zero in the
     // other side, BitsToClear is ok.
     if (Tmp == 0 &&
@@ -691,10 +697,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
                             APInt::getHighBitsSet(VSize, BitsToClear)))
         return true;
     }
-      
+
     // Otherwise, we don't know how to analyze this BitsToClear case yet.
     return false;
-      
+
   case Instruction::LShr:
     // We can promote lshr(x, cst) if we can promote x.  This requires the
     // ultimate 'and' to clear out the high zero bits we're clearing out though.
@@ -716,7 +722,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
         Tmp != BitsToClear)
       return false;
     return true;
-      
+
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
     // get into trouble with cyclic PHIs here because we only consider
@@ -739,48 +745,48 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
 }
 
 Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
-  // If this zero extend is only used by a truncate, let the truncate by
+  // If this zero extend is only used by a truncate, let the truncate be
   // eliminated before we try to optimize this zext.
   if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
     return 0;
-  
+
   // If one of the common conversion will work, do it.
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
 
-  // See if we can simplify any instructions used by the input whose sole 
+  // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
-  
+
   Value *Src = CI.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
-  
+
   // Attempt to extend the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
   unsigned BitsToClear;
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
+      CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
     assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
            "Unreasonable BitsToClear");
-    
+
     // Okay, we can transform this!  Insert the new expression now.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
           " to avoid zero extend: " << CI);
     Value *Res = EvaluateInDifferentType(Src, DestTy, false);
     assert(Res->getType() == DestTy);
-    
+
     uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
     uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-    
+
     // If the high bits are already filled with zeros, just replace this
     // cast with the result.
     if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
                                                      DestBitSize-SrcBitsKept)))
       return ReplaceInstUsesWith(CI, Res);
-    
+
     // We need to emit an AND to clear the high bits.
     Constant *C = ConstantInt::get(Res->getType(),
                                APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
@@ -792,7 +798,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   // 'and' which will be much cheaper than the pair of casts.
   if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) {   // A->B->C cast
     // TODO: Subsume this into EvaluateInDifferentType.
-    
+
     // Get the sizes of the types involved.  We know that the intermediate type
     // will be smaller than A or C, but don't know the relation between A and C.
     Value *A = CSrc->getOperand(0);
@@ -809,7 +815,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
       Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
       return new ZExtInst(And, CI.getType());
     }
-    
+
     if (SrcSize == DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
       return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
@@ -818,7 +824,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     if (SrcSize > DstSize) {
       Value *Trunc = Builder->CreateTrunc(A, CI.getType());
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
-      return BinaryOperator::CreateAnd(Trunc, 
+      return BinaryOperator::CreateAnd(Trunc,
                                        ConstantInt::get(Trunc->getType(),
                                                         AndValue));
     }
@@ -876,7 +882,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     Value *New = Builder->CreateZExt(X, CI.getType());
     return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
   }
-  
+
   return 0;
 }
 
@@ -989,14 +995,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
   // If this is a constant, it can be trivially promoted.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If this is a truncate from the dest type, we can trivially eliminate it.
   if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
     return true;
-  
+
   // We can't extend or shrink something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
@@ -1015,14 +1021,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
     // These operators can all arbitrarily be extended if their inputs can.
     return CanEvaluateSExtd(I->getOperand(0), Ty) &&
            CanEvaluateSExtd(I->getOperand(1), Ty);
-      
+
   //case Instruction::Shl:   TODO
   //case Instruction::LShr:  TODO
-      
+
   case Instruction::Select:
     return CanEvaluateSExtd(I->getOperand(1), Ty) &&
            CanEvaluateSExtd(I->getOperand(2), Ty);
-      
+
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
     // get into trouble with cyclic PHIs here because we only consider
@@ -1036,24 +1042,24 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
     // TODO: Can handle more cases here.
     break;
   }
-  
+
   return false;
 }
 
 Instruction *InstCombiner::visitSExt(SExtInst &CI) {
-  // If this sign extend is only used by a truncate, let the truncate by
-  // eliminated before we try to optimize this zext.
+  // If this sign extend is only used by a truncate, let the truncate be
+  // eliminated before we try to optimize this sext.
   if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
     return 0;
-  
+
   if (Instruction *I = commonCastTransforms(CI))
     return I;
-  
-  // See if we can simplify any instructions used by the input whose sole 
+
+  // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
     return &CI;
-  
+
   Value *Src = CI.getOperand(0);
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
 
@@ -1076,7 +1082,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     // cast with the result.
     if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
       return ReplaceInstUsesWith(CI, Res);
-    
+
     // We need to emit a shl + ashr to do the sign extend.
     Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
     return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
@@ -1089,7 +1095,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
       uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
       uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-      
+
       // We need to emit a shl + ashr to do the sign extend.
       Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
       Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
@@ -1125,7 +1131,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     A = Builder->CreateShl(A, ShAmtV, CI.getName());
     return BinaryOperator::CreateAShr(A, ShAmtV);
   }
-  
+
   return 0;
 }
 
@@ -1147,7 +1153,7 @@ static Value *LookThroughFPExtensions(Value *V) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::FPExt)
       return LookThroughFPExtensions(I->getOperand(0));
-  
+
   // If this value is a constant, return the constant in the smallest FP type
   // that can accurately represent it.  This allows us to turn
   // (float)((double)X+2.0) into x+2.0f.
@@ -1166,14 +1172,14 @@ static Value *LookThroughFPExtensions(Value *V) {
       return V;
     // Don't try to shrink to various long double types.
   }
-  
+
   return V;
 }
 
 Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   if (Instruction *I = commonCastTransforms(CI))
     return I;
-  
+
   // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
   // smaller than the destination type, we can eliminate the truncate by doing
   // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well
@@ -1190,7 +1196,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       Type *SrcTy = OpI->getType();
       Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
       Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
-      if (LHSTrunc->getType() != SrcTy && 
+      if (LHSTrunc->getType() != SrcTy &&
           RHSTrunc->getType() != SrcTy) {
         unsigned DstSize = CI.getType()->getScalarSizeInBits();
         // If the source types were both smaller than the destination type of
@@ -1202,10 +1208,36 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
           return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
         }
       }
-      break;  
+      break;
+    }
+
+    // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+    if (BinaryOperator::isFNeg(OpI)) {
+      Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
+                                                 CI.getType());
+      return BinaryOperator::CreateFNeg(InnerTrunc);
     }
   }
-  
+
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
+  if (II) {
+    switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::fabs: {
+        // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+        Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
+                                                   CI.getType());
+        Type *IntrinsicType[] = { CI.getType() };
+        Function *Overload =
+          Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
+                                    II->getIntrinsicID(), IntrinsicType);
+
+        Value *Args[] = { InnerTrunc };
+        return CallInst::Create(Overload, Args, II->getName());
+      }
+    }
+  }
+
   // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
@@ -1220,7 +1252,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         Arg->getOperand(0)->getType()->isFloatTy()) {
       Function *Callee = Call->getCalledFunction();
       Module *M = CI.getParent()->getParent()->getParent();
-      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", 
+      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
                                                    Callee->getAttributes(),
                                                    Builder->getFloatTy(),
                                                    Builder->getFloatTy(),
@@ -1228,15 +1260,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
                                        "sqrtfcall");
       ret->setAttributes(Callee->getAttributes());
-      
-      
+
+
       // Remove the old Call.  With -fmath-errno, it won't get marked readnone.
       ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
       EraseInstFromFunction(*Call);
       return ret;
     }
   }
-  
+
   return 0;
 }
 
@@ -1254,7 +1286,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
   // This is safe if the intermediate type has enough bits in its mantissa to
   // accurately represent all values of X.  For example, do not do this with
   // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui. 
+  // 'X' value would cause an undefined result for the fptoui.
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
       (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
@@ -1268,19 +1300,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
   Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
   if (OpI == 0)
     return commonCastTransforms(FI);
-  
+
   // fptosi(sitofp(X)) --> X
   // fptosi(uitofp(X)) --> X
   // This is safe if the intermediate type has enough bits in its mantissa to
   // accurately represent all values of X.  For example, do not do this with
   // i64->float->i64.  This is also safe for sitofp case, because any negative
-  // 'X' value would cause an undefined result for the fptoui. 
+  // 'X' value would cause an undefined result for the fptoui.
   if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
       OpI->getOperand(0)->getType() == FI.getType() &&
       (int)FI.getType()->getScalarSizeInBits() <=
                     OpI->getType()->getFPMantissaWidth())
     return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-  
+
   return commonCastTransforms(FI);
 }
 
@@ -1296,21 +1328,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
-  if (TD) {
-    if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
-        TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreateTrunc(CI.getOperand(0),
-                                      TD->getIntPtrType(CI.getContext()));
-      return new IntToPtrInst(P, CI.getType());
-    }
-    if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
-        TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreateZExt(CI.getOperand(0),
-                                     TD->getIntPtrType(CI.getContext()));
-      return new IntToPtrInst(P, CI.getType());
-    }
+  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+      TD->getPointerSizeInBits()) {
+    Type *Ty = TD->getIntPtrType(CI.getContext());
+    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+    Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+    return new IntToPtrInst(P, CI.getType());
   }
-  
+
   if (Instruction *I = commonCastTransforms(CI))
     return I;
 
@@ -1320,34 +1347,32 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
 /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
 Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
   Value *Src = CI.getOperand(0);
-  
+
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
     // If casting the result of a getelementptr instruction with no offset, turn
     // this into a cast of the original pointer!
     if (GEP->hasAllZeroIndices()) {
       // Changing the cast operand is usually not a good idea but it is safe
-      // here because the pointer operand is being replaced with another 
+      // here because the pointer operand is being replaced with another
       // pointer operand so the opcode doesn't need to change.
       Worklist.Add(GEP);
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
-    
+
     // If the GEP has a single use, and the base pointer is a bitcast, and the
     // GEP computes a constant offset, see if we can convert these three
     // instructions into fewer.  This typically happens with unions and other
     // non-type-safe code.
+    APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
     if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
-        GEP->hasAllConstantIndices()) {
-      SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
-      int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
-
+        GEP->accumulateConstantOffset(*TD, Offset)) {
       // Get the base pointer input of the bitcast, and the type it points to.
       Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
       Type *GEPIdxTy =
       cast<PointerType>(OrigBase->getType())->getElementType();
       SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+      if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
         // If we were able to index down into an element, create the GEP
         // and bitcast the result.  This eliminates one bitcast, potentially
         // two.
@@ -1355,15 +1380,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
         Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
         Builder->CreateGEP(OrigBase, NewIndices);
         NGEP->takeName(GEP);
-        
+
         if (isa<BitCastInst>(CI))
           return new BitCastInst(NGEP, CI.getType());
         assert(isa<PtrToIntInst>(CI));
         return new PtrToIntInst(NGEP, CI.getType());
-      }      
+      }
     }
   }
-  
+
   return commonCastTransforms(CI);
 }
 
@@ -1371,19 +1396,15 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // If the destination integer type is not the intptr_t type for this target,
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
-  if (TD) {
-    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                         TD->getIntPtrType(CI.getContext()));
-      return new TruncInst(P, CI.getType());
-    }
-    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
-      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                         TD->getIntPtrType(CI.getContext()));
-      return new ZExtInst(P, CI.getType());
-    }
+  if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
+    Type *Ty = TD->getIntPtrType(CI.getContext());
+    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+    Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
+    return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
   }
-  
+
   return commonPointerCastTransforms(CI);
 }
 
@@ -1398,33 +1419,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
   // element size, or the input is a multiple of the output element size.
   // Convert the input type to have the same element type as the output.
   VectorType *SrcTy = cast<VectorType>(InVal->getType());
-  
+
   if (SrcTy->getElementType() != DestTy->getElementType()) {
     // The input types don't need to be identical, but for now they must be the
     // same size.  There is no specific reason we couldn't handle things like
     // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
-    // there yet. 
+    // there yet.
     if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
         DestTy->getElementType()->getPrimitiveSizeInBits())
       return 0;
-    
+
     SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
     InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
   }
-  
+
   // Now that the element types match, get the shuffle mask and RHS of the
   // shuffle to use, which depends on whether we're increasing or decreasing the
   // size of the input.
   SmallVector<uint32_t, 16> ShuffleMask;
   Value *V2;
-  
+
   if (SrcTy->getNumElements() > DestTy->getNumElements()) {
     // If we're shrinking the number of elements, just shuffle in the low
     // elements from the input and use undef as the second shuffle input.
     V2 = UndefValue::get(SrcTy);
     for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
       ShuffleMask.push_back(i);
-    
+
   } else {
     // If we're increasing the number of elements, shuffle in all of the
     // elements from InVal and fill the rest of the result elements with zeros
@@ -1438,7 +1459,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
     for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
       ShuffleMask.push_back(SrcElts);
   }
-  
+
   return new ShuffleVectorInst(InVal, V2,
                                ConstantDataVector::get(V2->getContext(),
                                                        ShuffleMask));
@@ -1465,7 +1486,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
                                      Type *VecEltTy) {
   // Undef values never contribute useful bits to the result.
   if (isa<UndefValue>(V)) return true;
-  
+
   // If we got down to a value of the right type, we win, try inserting into the
   // right element.
   if (V->getType() == VecEltTy) {
@@ -1473,15 +1494,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     if (Constant *C = dyn_cast<Constant>(V))
       if (C->isNullValue())
         return true;
-    
+
     // Fail if multiple elements are inserted into this slot.
     if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
       return false;
-    
+
     Elements[ElementIndex] = V;
     return true;
   }
-  
+
   if (Constant *C = dyn_cast<Constant>(V)) {
     // Figure out the # elements this provides, and bitcast it or slice it up
     // as required.
@@ -1492,7 +1513,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     if (NumElts == 1)
       return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
                                       ElementIndex, Elements, VecEltTy);
-    
+
     // Okay, this is a constant that covers multiple elements.  Slice it up into
     // pieces and insert each element-sized piece into the vector.
     if (!isa<IntegerType>(C->getType()))
@@ -1500,7 +1521,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
                                        C->getType()->getPrimitiveSizeInBits()));
     unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
     Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
-    
+
     for (unsigned i = 0; i != NumElts; ++i) {
       Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
                                                                i*ElementSize));
@@ -1510,23 +1531,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     }
     return true;
   }
-  
+
   if (!V->hasOneUse()) return false;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return false;
   switch (I->getOpcode()) {
   default: return false; // Unhandled case.
   case Instruction::BitCast:
     return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);  
+                                    Elements, VecEltTy);
   case Instruction::ZExt:
     if (!isMultipleOfTypeSize(
                           I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
                               VecEltTy))
       return false;
     return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);  
+                                    Elements, VecEltTy);
   case Instruction::Or:
     return CollectInsertionElements(I->getOperand(0), ElementIndex,
                                     Elements, VecEltTy) &&
@@ -1538,11 +1559,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     if (CI == 0) return false;
     if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
     unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-    
+
     return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
                                     Elements, VecEltTy);
   }
-      
+
   }
 }
 
@@ -1577,11 +1598,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
   Value *Result = Constant::getNullValue(CI.getType());
   for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
     if (Elements[i] == 0) continue;  // Unset element.
-    
+
     Result = IC.Builder->CreateInsertElement(Result, Elements[i],
                                              IC.Builder->getInt32(i));
   }
-  
+
   return Result;
 }
 
@@ -1589,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
 /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
 /// bitcast.  The various long double bitcasts can't get in here.
 static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+  // We need to know the target byte order to perform this optimization.
+  if (!IC.getDataLayout()) return 0;
+
   Value *Src = CI.getOperand(0);
   Type *DestTy = CI.getType();
 
@@ -1609,11 +1633,14 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
                                 VecTy->getPrimitiveSizeInBits() / DestWidth);
         VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
       }
-    
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+
+      unsigned Elt = 0;
+      if (IC.getDataLayout()->isBigEndian())
+        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
   }
-  
+
   // bitcast(trunc(lshr(bitcast(somevector), cst))
   ConstantInt *ShAmt = 0;
   if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
@@ -1630,8 +1657,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
                                 VecTy->getPrimitiveSizeInBits() / DestWidth);
         VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
       }
-      
+
       unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      if (IC.getDataLayout()->isBigEndian())
+        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
       return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
     }
   }
@@ -1654,12 +1683,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     PointerType *SrcPTy = cast<PointerType>(SrcTy);
     Type *DstElTy = DstPTy->getElementType();
     Type *SrcElTy = SrcPTy->getElementType();
-    
+
     // If the address spaces don't match, don't eliminate the bitcast, which is
     // required for changing types.
     if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
       return 0;
-    
+
     // If we are casting a alloca to a pointer to a type of the same
     // size, rewrite the allocation instruction to allocate the "right" type.
     // There is no need to modify malloc calls because it is their bitcast that
@@ -1667,14 +1696,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
       if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
         return V;
-    
+
     // If the source and destination are pointers, and this cast is equivalent
     // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
     // This can enhance SROA and other transforms that want type-safe pointers.
     Constant *ZeroUInt =
       Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
     unsigned NumZeros = 0;
-    while (SrcElTy != DstElTy && 
+    while (SrcElTy != DstElTy &&
            isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
            SrcElTy->getNumContainedTypes() /* not "{}" */) {
       SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
@@ -1687,7 +1716,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       return GetElementPtrInst::CreateInBounds(Src, Idxs);
     }
   }
-  
+
   // Try to optimize int -> float bitcasts.
   if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
     if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
@@ -1700,7 +1729,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
     }
-    
+
     if (isa<IntegerType>(SrcTy)) {
       // If this is a cast from an integer to vector, check to see if the input
       // is a trunc or zext of a bitcast from vector.  If so, we can replace all
@@ -1713,7 +1742,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
                                                cast<VectorType>(DestTy), *this))
               return I;
       }
-      
+
       // If the input is an 'or' instruction, we may be doing shifts and ors to
       // assemble the elements of the vector manually.  Try to rip the code out
       // and replace it with insertelements.
@@ -1723,18 +1752,29 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   }
 
   if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
-    if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
-      Value *Elem = 
-        Builder->CreateExtractElement(Src,
-                   Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
-      return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+    if (SrcVTy->getNumElements() == 1) {
+      // If our destination is not a vector, then make this a straight
+      // scalar-scalar cast.
+      if (!DestTy->isVectorTy()) {
+        Value *Elem =
+          Builder->CreateExtractElement(Src,
+                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+        return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+      }
+
+      // Otherwise, see if our source is an insert. If so, then use the scalar
+      // component directly.
+      if (InsertElementInst *IEI =
+            dyn_cast<InsertElementInst>(CI.getOperand(0)))
+        return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
+                                DestTy);
     }
   }
 
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
     // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
     // a bitcast to a vector with the same # elts.
-    if (SVI->hasOneUse() && DestTy->isVectorTy() && 
+    if (SVI->hasOneUse() && DestTy->isVectorTy() &&
         cast<VectorType>(DestTy)->getNumElements() ==
               SVI->getType()->getNumElements() &&
         SVI->getType()->getNumElements() ==
@@ -1743,9 +1783,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       // If either of the operands is a cast from CI.getType(), then
       // evaluating the shuffle in the casted destination's type will allow
       // us to eliminate at least one cast.
-      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && 
+      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
            Tmp->getOperand(0)->getType() == DestTy) ||
-          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && 
+          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
            Tmp->getOperand(0)->getType() == DestTy)) {
         Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
         Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
@@ -1755,7 +1795,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       }
     }
   }
-  
+
   if (SrcTy->isPointerTy())
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c3f8fe15d30..a96e754f3dd0 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   }
 }
 
+/// Returns true if the exploded icmp can be expressed as a signed comparison
+/// to zero and updates the predicate accordingly.
+/// The signedness of the comparison is preserved.
+static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) {
+  if (!ICmpInst::isSigned(pred))
+    return false;
+
+  if (RHS->isZero())
+    return ICmpInst::isRelational(pred);
+
+  if (RHS->isOne()) {
+    if (pred == ICmpInst::ICMP_SLT) {
+      pred = ICmpInst::ICMP_SLE;
+      return true;
+    }
+  } else if (RHS->isAllOnesValue()) {
+    if (pred == ICmpInst::ICMP_SGT) {
+      pred = ICmpInst::ICMP_SGE;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 // isHighOnes - Return true if the constant is of the form 1+0+.
 // This is the same as lowones(~X).
 static bool isHighOnes(const ConstantInt *CI) {
@@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   }
 
 
-  // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+  // If a magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
-  if (ArrayElementCount <= 32 ||
-      (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
-    Type *Ty;
-    if (ArrayElementCount <= 32)
+  {
+    Type *Ty = 0;
+
+    // Look for an appropriate type:
+    // - The type of Idx if the magic fits
+    // - The smallest fitting legal type if we have a DataLayout
+    // - Default to i32
+    if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
+      Ty = Idx->getType();
+    else if (TD)
+      Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+    else if (ArrayElementCount <= 32)
       Ty = Type::getInt32Ty(Init->getContext());
-    else
-      Ty = Type::getInt64Ty(Init->getContext());
-    Value *V = Builder->CreateIntCast(Idx, Ty, false);
-    V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
-    V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
-    return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+
+    if (Ty != 0) {
+      Value *V = Builder->CreateIntCast(Idx, Ty, false);
+      V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+      V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+      return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+    }
   }
 
   return 0;
@@ -1226,6 +1260,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         ICI.setOperand(0, NewAnd);
         return &ICI;
       }
+
+      // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any
+      // bit set in (X & AndCST) will produce a result greater than RHSV.
+      if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
+        unsigned NTZ = AndCST->getValue().countTrailingZeros();
+        if ((NTZ < AndCST->getBitWidth()) &&
+            APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV))
+          return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
+                              Constant::getNullValue(RHS->getType()));
+      }
     }
 
     // Try to optimize things like "A[i]&42 == 0" to index computations.
@@ -1263,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     break;
   }
 
+  case Instruction::Mul: {       // (icmp pred (mul X, Val), CI)
+    ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!Val) break;
+
+    // If this is a signed comparison to 0 and the mul is sign preserving,
+    // use the mul LHS operand instead.
+    ICmpInst::Predicate pred = ICI.getPredicate();
+    if (isSignTest(pred, RHS) && !Val->isZero() &&
+        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+      return new ICmpInst(Val->isNegative() ?
+                          ICmpInst::getSwappedPredicate(pred) : pred,
+                          LHSI->getOperand(0),
+                          Constant::getNullValue(RHS->getType()));
+
+    break;
+  }
+
   case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
     ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
     if (!ShAmt) break;
@@ -1294,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
                             ConstantExpr::getLShr(RHS, ShAmt));
 
+      // If the shift is NSW and we compare to 0, then it is just shifting out
+      // sign bits, no need for an AND either.
+      if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantExpr::getLShr(RHS, ShAmt));
+
       if (LHSI->hasOneUse()) {
         // Otherwise strength reduce the shift into an and.
         uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1308,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       }
     }
 
+    // If this is a signed comparison to 0 and the shift is sign preserving,
+    // use the shift LHS operand instead.
+    ICmpInst::Predicate pred = ICI.getPredicate();
+    if (isSignTest(pred, RHS) &&
+        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
+      return new ICmpInst(pred,
+                          LHSI->getOperand(0),
+                          Constant::getNullValue(RHS->getType()));
+
     // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
     bool TrueIfSigned = false;
     if (LHSI->hasOneUse() &&
@@ -1321,6 +1397,26 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
                           And, Constant::getNullValue(And->getType()));
     }
+
+    // Transform (icmp pred iM (shl iM %v, N), CI)
+    // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+    // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
+    // This enables to get rid of the shift in favor of a trunc which can be
+    // free on the target. It has the additional benefit of comparing to a
+    // smaller constant, which will be target friendly.
+    unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
+    if (LHSI->hasOneUse() &&
+        Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+      Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
+      Constant *NCI = ConstantExpr::getTrunc(
+                        ConstantExpr::getAShr(RHS,
+                          ConstantInt::get(RHS->getType(), Amt)),
+                        NTy);
+      return new ICmpInst(ICI.getPredicate(),
+                          Builder->CreateTrunc(LHSI->getOperand(0), NTy),
+                          NCI);
+    }
+
     break;
   }
 
@@ -1502,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
             return new ICmpInst(pred, X, NegX);
           }
         }
+        break;
+      case Instruction::Mul:
+        if (RHSV == 0 && BO->hasNoSignedWrap()) {
+          if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+            // The trivial case (mul X, 0) is handled by InstSimplify
+            // General case : (mul X, C) != 0 iff X != 0
+            //                (mul X, C) == 0 iff X == 0
+            if (!BOC->isZero())
+              return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                  Constant::getNullValue(RHS->getType()));
+          }
+        }
+        break;
       default: break;
       }
     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4d106fc18853..337cfe32a869 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
@@ -150,26 +150,6 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
   return 0;
 }
 
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) {
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::BitCast ||
-        (CE->getOpcode() == Instruction::GetElementPtr &&
-         cast<GEPOperator>(CE)->hasAllZeroIndices()))
-      return getPointeeAlignment(CE->getOperand(0), TD);
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    if (!GV->isDeclaration())
-      return TD.getPreferredAlignment(GV);
-
-  if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
-    if (PT->getElementType()->isSized())
-      return TD.getABITypeAlignment(PT->getElementType());
-
-  return 0;
-}
-
 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Ensure that the alloca array size argument has type intptr_t, so that
   // any casting is exposed early.
@@ -265,7 +245,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     }
   }
 
-  if (TD) {
+  if (AI.getAlignment()) {
     // Check to see if this allocation is only modified by a memcpy/memmove from
     // a constant global whose alignment is equal to or exceeds that of the
     // allocation.  If this is the case, we can change all users to use
@@ -274,7 +254,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     // is only subsequently read.
     SmallVector<Instruction *, 4> ToDelete;
     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
-      if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+      unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
+                                                        AI.getAlignment(), TD);
+      if (AI.getAlignment() <= SourceAlign) {
         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
@@ -820,6 +802,13 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   InsertNewInstBefore(NewSI, *BBI);
   NewSI->setDebugLoc(OtherStore->getDebugLoc()); 
 
+  // If the two stores had the same TBAA tag, preserve it.
+  if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
+    if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
+                               OtherStore->getMetadata(LLVMContext::MD_tbaa))))
+      NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+  
   // Nuke the old stores.
   EraseInstFromFunction(SI);
   EraseInstFromFunction(*OtherStore);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cefe45ec862c..173f2bf63304 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
                       m_Value(B))) &&
       // The "1" can be any value known to be a power of 2.
-      isPowerOfTwo(PowerOf2, IC.getDataLayout())) {
+      isKnownToBeAPowerOfTwo(PowerOf2)) {
     A = IC.Builder->CreateSub(A, B);
     return IC.Builder->CreateShl(PowerOf2, A);
   }
@@ -45,8 +45,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
   // inexact.  Similarly for <<.
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
-    if (I->isLogicalShift() &&
-        isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) {
+    if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) {
       // We know that this is an exact/nuw shift and that the input is a
       // non-zero context as well.
       if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) {
@@ -252,24 +251,136 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+//
+// Detect pattern:
+//
+// log2(Y*0.5)
+//
+// And check for corresponding fast math flags
+//
+
+static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
+
+   if (!Op->hasOneUse())
+     return;
+
+   IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
+   if (!II)
+     return;
+   if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
+     return;
+   Log2 = II;
+
+   Value *OpLog2Of = II->getArgOperand(0);
+   if (!OpLog2Of->hasOneUse())
+     return;
+
+   Instruction *I = dyn_cast<Instruction>(OpLog2Of);
+   if (!I)
+     return;
+   if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+     return;
+              
+   ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
+   if (CFP && CFP->isExactlyValue(0.5)) {
+     Y = I->getOperand(1);
+     return;
+   }
+   CFP = dyn_cast<ConstantFP>(I->getOperand(1));
+   if (CFP && CFP->isExactlyValue(0.5))
+     Y = I->getOperand(0);
+} 
+
+/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
+/// true iff the given value is FMul or FDiv with one and only one operand
+/// being a normal constant (i.e. not Zero/NaN/Infinity).
+static bool isFMulOrFDivWithConstant(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I || (I->getOpcode() != Instruction::FMul && 
+             I->getOpcode() != Instruction::FDiv))
+    return false;
+
+  ConstantFP *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
+  ConstantFP *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
+
+  if (C0 && C1)
+    return false;
+
+  return (C0 && C0->getValueAPF().isNormal()) ||
+         (C1 && C1->getValueAPF().isNormal());
+}
+
+static bool isNormalFp(const ConstantFP *C) {
+  const APFloat &Flt = C->getValueAPF();
+  return Flt.isNormal() && !Flt.isDenormal();
+}
+
+/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
+/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
+/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
+/// This function is to simplify "FMulOrDiv * C" and returns the 
+/// resulting expression. Note that this function could return NULL in
+/// case the constants cannot be folded into a normal floating-point.
+/// 
+Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+                                   Instruction *InsertBefore) {
+  assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
+
+  Value *Opnd0 = FMulOrDiv->getOperand(0);
+  Value *Opnd1 = FMulOrDiv->getOperand(1);
+
+  ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
+  ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+
+  BinaryOperator *R = 0;
+
+  // (X * C0) * C => X * (C0*C)
+  if (FMulOrDiv->getOpcode() == Instruction::FMul) {
+    Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C);
+    if (isNormalFp(cast<ConstantFP>(F)))
+      R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F);
+  } else {
+    if (C0) {
+      // (C0 / X) * C => (C0 * C) / X
+      ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+      if (isNormalFp(F))
+        R = BinaryOperator::CreateFDiv(F, Opnd1);
+    } else {
+      // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
+      ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
+      if (isNormalFp(F)) {
+        R = BinaryOperator::CreateFMul(Opnd0, F);
+      } else {
+        // (X / C1) * C => X / (C1/C) 
+        Constant *F = ConstantExpr::getFDiv(C1, C);
+        if (isNormalFp(cast<ConstantFP>(F)))
+          R = BinaryOperator::CreateFDiv(Opnd0, F);
+      }
+    }
+  }
+
+  if (R) {
+    R->setHasUnsafeAlgebra(true);
+    InsertNewInstWith(R, *InsertBefore);
+  }
+
+  return R;
+}
+
 Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // Simplify mul instructions with a constant RHS.
-  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
-    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
-      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
-      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->isExactlyValue(1.0))
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'fmul double %X, 1.0'
-    } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) {
-      // As above, vector X*splat(1.0) -> X in all defined cases.
-      if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
-        if (F->isExactlyValue(1.0))
-          return ReplaceInstUsesWith(I, Op0);
-    }
+  if (isa<Constant>(Op0))
+    std::swap(Op0, Op1);
+
+  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  bool AllowReassociate = I.hasUnsafeAlgebra();
 
+  // Simplify mul instructions with a constant RHS.
+  if (isa<Constant>(Op1)) {
     // Try to fold constant mul into select arguments.
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -278,11 +389,146 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     if (isa<PHINode>(Op0))
       if (Instruction *NV = FoldOpIntoPhi(I))
         return NV;
+
+    ConstantFP *C = dyn_cast<ConstantFP>(Op1);
+    if (C && AllowReassociate && C->getValueAPF().isNormal()) {
+      // Let MDC denote an expression in one of these forms:
+      // X * C, C/X, X/C, where C is a constant.
+      //
+      // Try to simplify "MDC * Constant"
+      if (isFMulOrFDivWithConstant(Op0)) {
+        Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I);
+        if (V)
+          return ReplaceInstUsesWith(I, V);
+      }
+
+      // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
+      Instruction *FAddSub = dyn_cast<Instruction>(Op0);
+      if (FAddSub &&
+          (FAddSub->getOpcode() == Instruction::FAdd ||
+           FAddSub->getOpcode() == Instruction::FSub)) {
+        Value *Opnd0 = FAddSub->getOperand(0);
+        Value *Opnd1 = FAddSub->getOperand(1);
+        ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
+        ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+        bool Swap = false;
+        if (C0) {
+          std::swap(C0, C1);
+          std::swap(Opnd0, Opnd1);
+          Swap = true; 
+        }
+
+        if (C1 && C1->getValueAPF().isNormal() &&
+            isFMulOrFDivWithConstant(Opnd0)) {
+          Value *M1 = ConstantExpr::getFMul(C1, C);
+          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? 
+                      foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
+                      0;
+          if (M0 && M1) {
+            if (Swap && FAddSub->getOpcode() == Instruction::FSub)
+              std::swap(M0, M1);
+
+            Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
+                        BinaryOperator::CreateFAdd(M0, M1) :
+                        BinaryOperator::CreateFSub(M0, M1);
+            Instruction *RI = cast<Instruction>(R);
+            RI->copyFastMathFlags(&I);
+            return RI;
+          }
+        }
+      }
+    }
+  }
+
+
+  // Under unsafe algebra do:
+  // X * log2(0.5*Y) = X*log2(Y) - X
+  if (I.hasUnsafeAlgebra()) {
+    Value *OpX = NULL;
+    Value *OpY = NULL;
+    IntrinsicInst *Log2;
+    detectLog2OfHalf(Op0, OpY, Log2);
+    if (OpY) {
+      OpX = Op1;
+    } else {
+      detectLog2OfHalf(Op1, OpY, Log2);
+      if (OpY) {
+        OpX = Op0;
+      }
+    }
+    // if pattern detected emit alternate sequence
+    if (OpX && OpY) {
+      Log2->setArgOperand(0, OpY);
+      Value *FMulVal = Builder->CreateFMul(OpX, Log2);
+      Instruction *FMul = cast<Instruction>(FMulVal);
+      FMul->copyFastMathFlags(Log2);
+      Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
+      FSub->copyFastMathFlags(Log2);
+      return FSub;
+    }
   }
 
-  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
-    if (Value *Op1v = dyn_castFNegVal(Op1))
-      return BinaryOperator::CreateFMul(Op0v, Op1v);
+  // Handle symmetric situation in a 2-iteration loop
+  Value *Opnd0 = Op0;
+  Value *Opnd1 = Op1;
+  for (int i = 0; i < 2; i++) {
+    bool IgnoreZeroSign = I.hasNoSignedZeros();
+    if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+      Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
+      Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
+
+      // -X * -Y => X*Y
+      if (N1)
+        return BinaryOperator::CreateFMul(N0, N1);
+
+      if (Opnd0->hasOneUse()) {
+        // -X * Y => -(X*Y) (Promote negation as high as possible)
+        Value *T = Builder->CreateFMul(N0, Opnd1);
+        cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
+        Instruction *Neg = BinaryOperator::CreateFNeg(T);
+        if (I.getFastMathFlags().any()) {
+          cast<Instruction>(T)->copyFastMathFlags(&I);
+          Neg->copyFastMathFlags(&I);
+        }
+        return Neg;
+      }
+    }
+
+    // (X*Y) * X => (X*X) * Y where Y != X
+    //  The purpose is two-fold: 
+    //   1) to form a power expression (of X).
+    //   2) potentially shorten the critical path: After transformation, the
+    //  latency of the instruction Y is amortized by the expression of X*X,
+    //  and therefore Y is in a "less critical" position compared to what it
+    //  was before the transformation.
+    //
+    if (AllowReassociate) {
+      Value *Opnd0_0, *Opnd0_1;
+      if (Opnd0->hasOneUse() &&
+          match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
+        Value *Y = 0;
+        if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
+          Y = Opnd0_1;
+        else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
+          Y = Opnd0_0;
+
+        if (Y) {
+          Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
+          T->copyFastMathFlags(&I);
+          T->setDebugLoc(I.getDebugLoc());
+
+          Instruction *R = BinaryOperator::CreateFMul(T, Y);
+          R->copyFastMathFlags(&I);
+          return R;
+        }
+      }
+    }
+
+    if (!isa<Constant>(Op1))
+      std::swap(Opnd0, Opnd1);
+    else
+      break;
+  }
 
   return Changed ? &I : 0;
 }
@@ -567,21 +813,140 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   return 0;
 }
 
+/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
+/// FP value and:
+///    1) 1/C is exact, or 
+///    2) reciprocal is allowed.
+/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// returned; otherwise, NULL is returned.
+///
+static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
+                                             ConstantFP *Divisor,
+                                             bool AllowReciprocal) {
+  const APFloat &FpVal = Divisor->getValueAPF();
+  APFloat Reciprocal(FpVal.getSemantics());
+  bool Cvt = FpVal.getExactInverse(&Reciprocal);
+    
+  if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
+    Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
+    (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
+    Cvt = !Reciprocal.isDenormal();
+  }
+
+  if (!Cvt)
+    return 0;
+
+  ConstantFP *R;
+  R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
+  return BinaryOperator::CreateFMul(Dividend, R);
+}
+
 Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
+  bool AllowReassociate = I.hasUnsafeAlgebra();
+  bool AllowReciprocal = I.hasAllowReciprocal();
+
   if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-    const APFloat &Op1F = Op1C->getValueAPF();
-
-    // If the divisor has an exact multiplicative inverse we can turn the fdiv
-    // into a cheaper fmul.
-    APFloat Reciprocal(Op1F.getSemantics());
-    if (Op1F.getExactInverse(&Reciprocal)) {
-      ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
-      return BinaryOperator::CreateFMul(Op0, RFP);
+    if (AllowReassociate) {
+      ConstantFP *C1 = 0;
+      ConstantFP *C2 = Op1C;
+      Value *X;
+      Instruction *Res = 0;
+
+      if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) {
+        // (X*C1)/C2 => X * (C1/C2)
+        //
+        Constant *C = ConstantExpr::getFDiv(C1, C2);
+        const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+        if (F.isNormal() && !F.isDenormal())
+          Res = BinaryOperator::CreateFMul(X, C);
+      } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) {
+        // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
+        //
+        Constant *C = ConstantExpr::getFMul(C1, C2);
+        const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+        if (F.isNormal() && !F.isDenormal()) {
+          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), 
+                                         AllowReciprocal);
+          if (!Res)
+            Res = BinaryOperator::CreateFDiv(X, C); 
+        }
+      }
+
+      if (Res) {
+        Res->setFastMathFlags(I.getFastMathFlags());
+        return Res;
+      }
+    }
+
+    // X / C => X * 1/C
+    if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal))
+      return T;
+
+    return 0;
+  }
+
+  if (AllowReassociate && isa<ConstantFP>(Op0)) {
+    ConstantFP *C1 = cast<ConstantFP>(Op0), *C2;
+    Constant *Fold = 0;
+    Value *X;
+    bool CreateDiv = true;
+
+    // C1 / (X*C2) => (C1/C2) / X
+    if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2))))
+      Fold = ConstantExpr::getFDiv(C1, C2);
+    else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) {
+      // C1 / (X/C2) => (C1*C2) / X
+      Fold = ConstantExpr::getFMul(C1, C2);
+    } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) {
+      // C1 / (C2/X) => (C1/C2) * X
+      Fold = ConstantExpr::getFDiv(C1, C2);
+      CreateDiv = false;
+    }
+
+    if (Fold) {
+      const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
+      if (FoldC.isNormal() && !FoldC.isDenormal()) {
+        Instruction *R = CreateDiv ? 
+                         BinaryOperator::CreateFDiv(Fold, X) :
+                         BinaryOperator::CreateFMul(X, Fold);
+        R->setFastMathFlags(I.getFastMathFlags());
+        return R;
+      }
+    }
+    return 0;
+  }
+
+  if (AllowReassociate) {
+    Value *X, *Y;
+    Value *NewInst = 0;
+    Instruction *SimpR = 0;
+
+    if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
+      // (X/Y) / Z => X / (Y*Z)
+      //
+      if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) {
+        NewInst = Builder->CreateFMul(Y, Op1);
+        SimpR = BinaryOperator::CreateFDiv(X, NewInst);
+      }
+    } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) {
+      // Z / (X/Y) => Z*Y / X
+      //
+      if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) {
+        NewInst = Builder->CreateFMul(Op0, Y);
+        SimpR = BinaryOperator::CreateFDiv(NewInst, X);
+      }
+    }
+
+    if (NewInst) {
+      if (Instruction *T = dyn_cast<Instruction>(NewInst))
+        T->setDebugLoc(I.getDebugLoc());
+      SimpR->setFastMathFlags(I.getFastMathFlags());
+      return SimpR;
     }
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index de9c77e6005a..b0a998cca76e 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/DataLayout.h"
 using namespace llvm;
 
 /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a2d4c888f2cf..121aa1f8d73f 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Support/PatternMatch.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     // If this is a non-volatile load or a cast from the same type,
     // merge.
     if (TI->isCast()) {
-      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+      Type *FIOpndTy = FI->getOperand(0)->getType();
+      if (TI->getOperand(0)->getType() != FIOpndTy)
         return 0;
       // The select condition may be a vector. We may only change the operand
       // type if the vector width remains the same (and matches the condition).
       Type *CondTy = SI.getCondition()->getType();
-      if (CondTy->isVectorTy() && CondTy->getVectorNumElements() !=
-          FI->getOperand(0)->getType()->getVectorNumElements())
+      if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
+          CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
         return 0;
     } else {
       return 0;  // unknown unary op.
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 57021f1bef84..8cf76e5e8a9f 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -49,7 +49,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
     I.setOperand(1, Rem);
     return &I;
   }
-  
+
   return 0;
 }
 
@@ -70,10 +70,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
   // We can always evaluate constants shifted.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If this is the opposite shift, we can directly reuse the input of the shift
   // if the needed bits are already zero in the input.  This allows us to reuse
   // the value which means that we don't care if the shift has multiple uses.
@@ -95,14 +95,14 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
         return CanEvaluateTruncated(I->getOperand(0), Ty);
       }
 #endif
-      
+
     }
   }
-  
+
   // We can't mutate something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
-  
+
   switch (I->getOpcode()) {
   default: return false;
   case Instruction::And:
@@ -111,7 +111,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
     // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
     return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
            CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
-      
+
   case Instruction::Shl: {
     // We can often fold the shift into shifts-by-a-constant.
     CI = dyn_cast<ConstantInt>(I->getOperand(1));
@@ -119,10 +119,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
 
     // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
     if (isLeftShift) return true;
-    
+
     // We can always turn shl(c)+shr(c) -> and(c2).
     if (CI->getValue() == NumBits) return true;
-      
+
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
 
     // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
@@ -133,20 +133,20 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                        APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
-      
+
     return false;
   }
   case Instruction::LShr: {
     // We can often fold the shift into shifts-by-a-constant.
     CI = dyn_cast<ConstantInt>(I->getOperand(1));
     if (CI == 0) return false;
-    
+
     // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
     if (!isLeftShift) return true;
-    
+
     // We can always turn lshr(c)+shl(c) -> and(c2).
     if (CI->getValue() == NumBits) return true;
-      
+
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
 
     // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
@@ -157,7 +157,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                           APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
-      
+
     return false;
   }
   case Instruction::Select: {
@@ -175,7 +175,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
         return false;
     return true;
   }
-  }      
+  }
 }
 
 /// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
@@ -194,7 +194,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                                          IC.getTargetLibraryInfo());
     return V;
   }
-  
+
   Instruction *I = cast<Instruction>(V);
   IC.Worklist.Add(I);
 
@@ -207,7 +207,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
     I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
     return I;
-    
+
   case Instruction::Shl: {
     BinaryOperator *BO = cast<BinaryOperator>(I);
     unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
@@ -227,7 +227,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       BO->setHasNoSignedWrap(false);
       return I;
     }
-    
+
     // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
     // zeros.
     if (CI->getValue() == NumBits) {
@@ -240,7 +240,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       }
       return V;
     }
-    
+
     // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
     // the and won't be needed.
     assert(CI->getZExtValue() > NumBits);
@@ -255,19 +255,19 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
     // We only accept shifts-by-a-constant in CanEvaluateShifted.
     ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-    
+
     // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
     if (!isLeftShift) {
       // If this is oversized composite shift, then unsigned shifts get 0.
       unsigned NewShAmt = NumBits+CI->getZExtValue();
       if (NewShAmt >= TypeWidth)
         return Constant::getNullValue(BO->getType());
-      
+
       BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
       BO->setIsExact(false);
       return I;
     }
-    
+
     // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
     // zeros.
     if (CI->getValue() == NumBits) {
@@ -280,7 +280,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       }
       return V;
     }
-    
+
     // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
     // the and won't be needed.
     assert(CI->getZExtValue() > NumBits);
@@ -289,7 +289,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     BO->setIsExact(false);
     return BO;
   }
-    
+
   case Instruction::Select:
     I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
     I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
@@ -304,7 +304,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                                               NumBits, isLeftShift, IC));
     return PN;
   }
-  }      
+  }
 }
 
 
@@ -312,24 +312,24 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
 Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                                BinaryOperator &I) {
   bool isLeftShift = I.getOpcode() == Instruction::Shl;
-  
-  
+
+
   // See if we can propagate this shift into the input, this covers the trivial
   // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
   if (I.getOpcode() != Instruction::AShr &&
       CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
     DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
               " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
-    
-    return ReplaceInstUsesWith(I, 
+
+    return ReplaceInstUsesWith(I,
                  GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
   }
-  
-  
-  // See if we can simplify any instructions used by the instruction whose sole 
+
+
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
-  
+
   // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
   // a signed shift.
   //
@@ -340,14 +340,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
     return &I;
   }
-  
+
   // ((X*C1) << C2) == (X * (C1 << C2))
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
         return BinaryOperator::CreateMul(BO->getOperand(0),
                                         ConstantExpr::getShl(BOOp, Op1));
-  
+
   // Try to fold constant and into select arguments.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
     if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -355,7 +355,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   if (isa<PHINode>(Op0))
     if (Instruction *NV = FoldOpIntoPhi(I))
       return NV;
-  
+
   // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
   if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
     Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
@@ -364,7 +364,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     // require that the input operand is a shift-by-constant so that we have
     // confidence that the shifts will get folded together.  We could do this
     // xform in more cases, but it is unlikely to be profitable.
-    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
         isa<ConstantInt>(TrOp->getOperand(1))) {
       // Okay, we'll do this xform.  Make the shift of shift.
       Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
@@ -378,7 +378,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
       unsigned DstSize = TI->getType()->getScalarSizeInBits();
       APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
-      
+
       // The mask we constructed says what the trunc would do if occurring
       // between the shifts.  We want to know the effect *after* the second
       // shift.  We know that it is a logical shift by a constant, so adjust the
@@ -399,7 +399,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       return new TruncInst(And, I.getType());
     }
   }
-  
+
   if (Op0->hasOneUse()) {
     if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
       // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -425,14 +425,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                      APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
         }
-        
+
         // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
         Value *Op0BOOp1 = Op0BO->getOperand(1);
         if (isLeftShift && Op0BOOp1->hasOneUse() &&
-            match(Op0BOOp1, 
-                  m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
-                        m_ConstantInt(CC))) &&
-            cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+            match(Op0BOOp1,
+                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+                        m_ConstantInt(CC)))) {
           Value *YS =   // (Y << C)
             Builder->CreateShl(Op0BO->getOperand(0), Op1,
                                          Op0BO->getName());
@@ -442,7 +441,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
         }
       }
-        
+
       // FALL THROUGH.
       case Instruction::Sub: {
         // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -458,34 +457,32 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                      APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
         }
-        
+
         // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
         if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
             match(Op0BO->getOperand(0),
-                  m_And(m_Shr(m_Value(V1), m_Value(V2)),
-                        m_ConstantInt(CC))) && V2 == Op1 &&
-            cast<BinaryOperator>(Op0BO->getOperand(0))
-                ->getOperand(0)->hasOneUse()) {
+                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
+                        m_ConstantInt(CC))) && V2 == Op1) {
           Value *YS = // (Y << C)
             Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
           // X & (CC << C)
           Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                          V1->getName()+".mask");
-          
+
           return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
         }
-        
+
         break;
       }
       }
-      
-      
+
+
       // If the operand is an bitwise operator with a constant RHS, and the
       // shift is the only use, we can pull it out of the shift.
       if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
         bool isValid = true;     // Valid only for And, Or, Xor
         bool highBitSet = false; // Transform if high bit of constant set?
-        
+
         switch (Op0BO->getOpcode()) {
         default: isValid = false; break;   // Do not perform transform!
         case Instruction::Add:
@@ -499,7 +496,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           highBitSet = true;
           break;
         }
-        
+
         // If this is a signed shift right, and the high bit is modified
         // by the logical operation, do not perform the transformation.
         // The highBitSet boolean indicates the value of the high bit of
@@ -508,26 +505,26 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         //
         if (isValid && I.getOpcode() == Instruction::AShr)
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
-        
+
         if (isValid) {
           Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
-          
+
           Value *NewShift =
             Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
           NewShift->takeName(Op0BO);
-          
+
           return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                         NewRHS);
         }
       }
     }
   }
-  
+
   // Find out if this is a shift of a shift by a constant.
   BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
   if (ShiftOp && !ShiftOp->isShift())
     ShiftOp = 0;
-  
+
   if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
 
     // This is a constant shift of a constant shift. Be careful about hiding
@@ -548,9 +545,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
     if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
     Value *X = ShiftOp->getOperand(0);
-    
+
     IntegerType *Ty = cast<IntegerType>(I.getType());
-    
+
     // Check for (X << c1) << c2  and  (X >> c1) >> c2
     if (I.getOpcode() == ShiftOp->getOpcode()) {
       uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
@@ -561,11 +558,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
         AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
       }
-      
+
       return BinaryOperator::Create(I.getOpcode(), X,
                                     ConstantInt::get(Ty, AmtSum));
     }
-    
+
     if (ShiftAmt1 == ShiftAmt2) {
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr &&
@@ -605,7 +602,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return NewLShr;
         }
         Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
-        
+
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
@@ -653,12 +650,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return NewShl;
         }
         Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-        
+
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
       }
-      
+
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
       // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
       if (I.getOpcode() == Instruction::AShr &&
@@ -682,21 +679,21 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
                                  I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
                                  TD))
     return ReplaceInstUsesWith(I, V);
-  
+
   if (Instruction *V = commonShiftTransforms(I))
     return V;
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
     unsigned ShAmt = Op1C->getZExtValue();
-    
+
     // If the shifted-out value is known-zero, then this is a NUW shift.
-    if (!I.hasNoUnsignedWrap() && 
+    if (!I.hasNoUnsignedWrap() &&
         MaskedValueIsZero(I.getOperand(0),
                           APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
           I.setHasNoUnsignedWrap();
           return &I;
         }
-    
+
     // If the shifted out value is all signbits, this is a NSW shift.
     if (!I.hasNoSignedWrap() &&
         ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
@@ -712,7 +709,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
       match(I.getOperand(1), m_Constant(C2)))
     return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
 
-  return 0;    
+  return 0;
 }
 
 Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
@@ -722,9 +719,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
 
   if (Instruction *R = commonShiftTransforms(I))
     return R;
-  
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     unsigned ShAmt = Op1C->getZExtValue();
 
@@ -743,15 +740,15 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
         return new ZExtInst(Cmp, II->getType());
       }
     }
-  
+
     // If the shifted-out value is known-zero, then this is an exact shift.
-    if (!I.isExact() && 
+    if (!I.isExact() &&
         MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
       I.setIsExact();
       return &I;
-    }    
+    }
   }
-  
+
   return 0;
 }
 
@@ -762,12 +759,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
 
   if (Instruction *R = commonShiftTransforms(I))
     return R;
-  
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     unsigned ShAmt = Op1C->getZExtValue();
-    
+
     // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
     // have a sign-extend idiom.
     Value *X;
@@ -791,23 +788,23 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     }
 
     // If the shifted-out value is known-zero, then this is an exact shift.
-    if (!I.isExact() && 
+    if (!I.isExact() &&
         MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
       I.setIsExact();
       return &I;
     }
-  }            
-  
+  }
+
   // See if we can turn a signed shr into an unsigned shr.
   if (MaskedValueIsZero(Op0,
                         APInt::getSignBit(I.getType()->getScalarSizeInBits())))
     return BinaryOperator::CreateLShr(Op0, Op1);
-  
+
   // Arithmetic shifting an all-sign-bit value is a no-op.
   unsigned NumSignBits = ComputeNumSignBits(Op0);
   if (NumSignBits == Op0->getType()->getScalarSizeInBits())
     return ReplaceInstUsesWith(I, Op0);
-  
+
   return 0;
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 602b20337144..8add1ea618d3 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -14,17 +14,18 @@
 
 
 #include "InstCombine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
-
-/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
 /// specified instruction is a constant integer.  If so, check to see if there
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
-static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
                                    APInt Demanded) {
   assert(I && "No instruction?");
   assert(OpNo < I->getNumOperands() && "Operand index too large");
@@ -53,8 +54,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
   unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
-  
-  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 
+
+  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
                                      KnownZero, KnownOne, 0);
   if (V == 0) return false;
   if (V == &Inst) return true;
@@ -65,7 +66,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
 /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
 /// specified instruction operand if possible, updating it in place.  It returns
 /// true if it made any change and false otherwise.
-bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
                                         APInt &KnownZero, APInt &KnownOne,
                                         unsigned Depth) {
   Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
@@ -86,7 +87,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
 /// to be one in the expression.  KnownZero contains all the bits that are known
 /// to be zero in the expression. These are provided to potentially allow the
 /// caller (which might recursively be SimplifyDemandedBits itself) to simplify
-/// the expression. KnownOne and KnownZero always follow the invariant that 
+/// the expression. KnownOne and KnownZero always follow the invariant that
 /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
 /// the bits in KnownOne and KnownZero may only be accurate for those bits set
 /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
@@ -133,10 +134,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       return 0;
     return UndefValue::get(VTy);
   }
-  
+
   if (Depth == 6)        // Limit search depth.
     return 0;
-  
+
   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
   APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
 
@@ -158,61 +159,74 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // If either the LHS or the RHS are Zero, the result is zero.
       ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
-      
+
       // If all of the demanded bits are known 1 on one side, return the other.
       // These bits cannot contribute to the result of the 'and' in this
       // context.
-      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
           (DemandedMask & ~LHSKnownZero))
         return I->getOperand(0);
-      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
           (DemandedMask & ~RHSKnownZero))
         return I->getOperand(1);
-      
+
       // If all of the demanded bits in the inputs are known zeros, return zero.
       if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
         return Constant::getNullValue(VTy);
-      
+
     } else if (I->getOpcode() == Instruction::Or) {
       // We can simplify (X|Y) -> X or Y in the user's context if we know that
       // only bits from X or Y are demanded.
-      
+
       // If either the LHS or the RHS are One, the result is One.
       ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
       ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
-      
+
       // If all of the demanded bits are known zero on one side, return the
       // other.  These bits cannot contribute to the result of the 'or' in this
       // context.
-      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
           (DemandedMask & ~LHSKnownOne))
         return I->getOperand(0);
-      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
           (DemandedMask & ~RHSKnownOne))
         return I->getOperand(1);
-      
+
       // If all of the potentially set bits on one side are known to be set on
       // the other side, just use the 'other' side.
-      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
           (DemandedMask & (~RHSKnownZero)))
         return I->getOperand(0);
-      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
           (DemandedMask & (~LHSKnownZero)))
         return I->getOperand(1);
+    } else if (I->getOpcode() == Instruction::Xor) {
+      // We can simplify (X^Y) -> X or Y in the user's context if we know that
+      // only bits from X or Y are demanded.
+
+      ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+
+      // If all of the demanded bits are known zero on one side, return the
+      // other.
+      if ((DemandedMask & RHSKnownZero) == DemandedMask)
+        return I->getOperand(0);
+      if ((DemandedMask & LHSKnownZero) == DemandedMask)
+        return I->getOperand(1);
     }
-    
+
     // Compute the KnownZero/KnownOne bits to simplify things downstream.
     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
     return 0;
   }
-  
+
   // If this is the root being simplified, allow it to have multiple uses,
   // just set the DemandedMask to all bits so that we can try to simplify the
   // operands.  This allows visitTruncInst (for example) to simplify the
   // operand of a trunc without duplicating all the logic below.
   if (Depth == 0 && !V->hasOneUse())
     DemandedMask = APInt::getAllOnesValue(BitWidth);
-  
+
   switch (I->getOpcode()) {
   default:
     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
@@ -224,26 +238,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
 
     // If all of the demanded bits are known 1 on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
-    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
         (DemandedMask & ~LHSKnownZero))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
         (DemandedMask & ~RHSKnownZero))
       return I->getOperand(1);
-    
+
     // If all of the demanded bits in the inputs are known zeros, return zero.
     if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
       return Constant::getNullValue(VTy);
-      
+
     // If the RHS is a constant, see if we can simplify it.
     if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
       return I;
-      
+
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne = RHSKnownOne & LHSKnownOne;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
@@ -251,36 +265,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Or:
     // If either the LHS or the RHS are One, the result is One.
-    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
-    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
         (DemandedMask & ~LHSKnownOne))
       return I->getOperand(0);
-    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
         (DemandedMask & ~RHSKnownOne))
       return I->getOperand(1);
 
     // If all of the potentially set bits on one side are known to be set on
     // the other side, just use the 'other' side.
-    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
         (DemandedMask & (~RHSKnownZero)))
       return I->getOperand(0);
-    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
         (DemandedMask & (~LHSKnownZero)))
       return I->getOperand(1);
-        
+
     // If the RHS is a constant, see if we can simplify it.
     if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
-          
+
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero = RHSKnownZero & LHSKnownZero;
     // Output known-1 are known to be set if set in either the LHS | RHS.
@@ -289,34 +303,34 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::Xor: {
     if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
     if ((DemandedMask & RHSKnownZero) == DemandedMask)
       return I->getOperand(0);
     if ((DemandedMask & LHSKnownZero) == DemandedMask)
       return I->getOperand(1);
-    
+
     // If all of the demanded bits are known to be zero on one side or the
     // other, turn this into an *inclusive* or.
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
-      Instruction *Or = 
+      Instruction *Or =
         BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                  I->getName());
       return InsertNewInstWith(Or, *I);
     }
-    
+
     // If all of the demanded bits on one side are known, and all of the set
     // bits on that side are also known to be set on the other side, turn this
     // into an AND, as we know the bits will be cleared.
     //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
-    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
+    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {
       // all known
       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
         Constant *AndC = Constant::getIntegerValue(VTy,
@@ -325,12 +339,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         return InsertNewInstWith(And, *I);
       }
     }
-    
+
     // If the RHS is a constant, see if we can simplify it.
     // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
     if (ShrinkDemandedConstant(I, 1, DemandedMask))
       return I;
-    
+
     // If our LHS is an 'and' and if it has one use, and if any of the bits we
     // are flipping are known to be set, then the xor is just resetting those
     // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
@@ -343,12 +357,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
         ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
         APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
-        
+
         Constant *AndC =
           ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
         Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
         InsertNewInstWith(NewAnd, *I);
-        
+
         Constant *XorC =
           ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
         Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);
@@ -364,17 +378,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::Select:
     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
                              RHSKnownZero, RHSKnownOne, Depth+1) ||
-        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
                              LHSKnownZero, LHSKnownOne, Depth+1))
       return I;
-    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
-    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
-    
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+
     // If the operands are constants, see if we can simplify them.
     if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
         ShrinkDemandedConstant(I, 2, DemandedMask))
       return I;
-    
+
     // Only known if known in both the LHS and RHS.
     KnownOne = RHSKnownOne & LHSKnownOne;
     KnownZero = RHSKnownZero & LHSKnownZero;
@@ -384,13 +398,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     DemandedMask = DemandedMask.zext(truncBf);
     KnownZero = KnownZero.zext(truncBf);
     KnownOne = KnownOne.zext(truncBf);
-    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              KnownZero, KnownOne, Depth+1))
       return I;
     DemandedMask = DemandedMask.trunc(BitWidth);
     KnownZero = KnownZero.trunc(BitWidth);
     KnownOne = KnownOne.trunc(BitWidth);
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     break;
   }
   case Instruction::BitCast:
@@ -413,12 +427,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
                              KnownZero, KnownOne, Depth+1))
       return I;
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     break;
   case Instruction::ZExt: {
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-    
+
     DemandedMask = DemandedMask.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
@@ -428,7 +442,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     DemandedMask = DemandedMask.zext(BitWidth);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
     // The top bits are known to be zero.
     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
     break;
@@ -436,8 +450,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   case Instruction::SExt: {
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
-    
-    APInt InputDemandedBits = DemandedMask & 
+
+    APInt InputDemandedBits = DemandedMask &
                               APInt::getLowBitsSet(BitWidth, SrcBitWidth);
 
     APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
@@ -445,7 +459,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // bit is demanded.
     if ((NewBits & DemandedMask) != 0)
       InputDemandedBits.setBit(SrcBitWidth-1);
-      
+
     InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
@@ -455,8 +469,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     InputDemandedBits = InputDemandedBits.zext(BitWidth);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
-    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
-      
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
 
@@ -476,7 +490,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // are not demanded, then the add doesn't demand them from its input
     // either.
     unsigned NLZ = DemandedMask.countLeadingZeros();
-      
+
     // If there is a constant on the RHS, there are a variety of xformations
     // we can do.
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -484,13 +498,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // won't work if the RHS is zero.
       if (RHS->isZero())
         break;
-      
+
       // If the top bit of the output is demanded, demand everything from the
       // input.  Otherwise, we demand all the input bits except NLZ top bits.
       APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
 
       // Find information about known zero/one bits in the input.
-      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 
+      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
                                LHSKnownZero, LHSKnownOne, Depth+1))
         return I;
 
@@ -498,11 +512,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // the constant.
       if (ShrinkDemandedConstant(I, 1, InDemandedBits))
         return I;
-      
+
       // Avoid excess work.
       if (LHSKnownZero == 0 && LHSKnownOne == 0)
         break;
-      
+
       // Turn it into OR if input bits are zero.
       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
         Instruction *Or =
@@ -510,26 +524,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
                                    I->getName());
         return InsertNewInstWith(Or, *I);
       }
-      
+
       // We can say something about the output known-zero and known-one bits,
       // depending on potential carries from the input constant and the
       // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
       // bits set and the RHS constant is 0x01001, then we know we have a known
       // one mask of 0x00001 and a known zero mask of 0xE0F0E.
-      
+
       // To compute this, we first compute the potential carry bits.  These are
       // the bits which may be modified.  I'm not aware of a better way to do
       // this scan.
       const APInt &RHSVal = RHS->getValue();
       APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
-      
+
       // Now that we know which bits have carries, compute the known-1/0 sets.
-      
+
       // Bits are known one if they are known zero in one operand and one in the
       // other, and there is no input carry.
-      KnownOne = ((LHSKnownZero & RHSVal) | 
+      KnownOne = ((LHSKnownZero & RHSVal) |
                   (LHSKnownOne & ~RHSVal)) & ~CarryBits;
-      
+
       // Bits are known zero if they are known zero in both operands and there
       // is no input carry.
       KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
@@ -580,17 +594,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      {
+        Value *VarX; ConstantInt *C1;
+        if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
+          Instruction *Shr = cast<Instruction>(I->getOperand(0));
+          Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
+                                                KnownZero, KnownOne);
+          if (R)
+            return R;
+        }
+      }
+
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
-      
+
       // If the shift is NUW/NSW, then it does demand the high bits.
       ShlOperator *IOp = cast<ShlOperator>(I);
       if (IOp->hasNoSignedWrap())
         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
       else if (IOp->hasNoUnsignedWrap())
         DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-      
-      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
+
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
       assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
@@ -605,15 +630,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // For a logical shift right
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
-      
+
       // Unsigned shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-      
+
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
       if (cast<LShrOperator>(I)->isExact())
         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-      
+
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -637,28 +662,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       Instruction *NewVal = BinaryOperator::CreateLShr(
                         I->getOperand(0), I->getOperand(1), I->getName());
       return InsertNewInstWith(NewVal, *I);
-    }    
+    }
 
     // If the sign bit is the only bit demanded by this ashr, then there is no
     // need to do it, the shift doesn't change the high bit.
     if (DemandedMask.isSignBit())
       return I->getOperand(0);
-    
+
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
-      
+
       // Signed shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
       // If any of the "high bits" are demanded, we should set the sign bit as
       // demanded.
       if (DemandedMask.countLeadingZeros() <= ShiftAmt)
         DemandedMaskIn.setBit(BitWidth-1);
-      
+
       // If the shift is exact, then it does demand the low bits (and knows that
       // they are zero).
       if (cast<AShrOperator>(I)->isExact())
         DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-      
+
       if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
                                KnownZero, KnownOne, Depth+1))
         return I;
@@ -667,15 +692,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-        
+
       // Handle the sign bits.
       APInt SignBit(APInt::getSignBit(BitWidth));
       // Adjust to where it is now in the mask.
-      SignBit = APIntOps::lshr(SignBit, ShiftAmt);  
-        
+      SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 
+      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
           (HighBits & ~DemandedMask) == HighBits) {
         // Perform the logical shift right.
         BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
@@ -718,7 +743,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
           KnownOne |= ~LowBits;
 
-        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
       }
     }
 
@@ -756,7 +781,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // just shift the input byte into position to eliminate the bswap.
         unsigned NLZ = DemandedMask.countLeadingZeros();
         unsigned NTZ = DemandedMask.countTrailingZeros();
-          
+
         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
         // have 14 leading zeros, round to 8.
@@ -766,7 +791,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         if (BitWidth-NLZ-NTZ == 8) {
           unsigned ResultBit = NTZ;
           unsigned InputBit = BitWidth-NTZ-8;
-          
+
           // Replace this with either a left or right shift to get the byte into
           // the right place.
           Instruction *NewVal;
@@ -779,7 +804,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
           NewVal->takeName(I);
           return InsertNewInstWith(NewVal, *I);
         }
-          
+
         // TODO: Could compute known zero/one bits based on the input.
         break;
       }
@@ -792,7 +817,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
     break;
   }
-  
+
   // If the client is only demanding bits that we know, return the known
   // constant.
   if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
@@ -800,6 +825,81 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   return 0;
 }
 
+/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
+/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
+/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
+/// of "C2-C1".
+///
+/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
+/// ..., bn}, without considering the specific value X is holding.
+/// This transformation is legal iff one of following conditions is hold:
+///  1) All the bit in S are 0, in this case E1 == E2.
+///  2) We don't care those bits in S, per the input DemandedMask.
+///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
+///     rest bits.
+///
+/// Currently we only test condition 2).
+///
+/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
+/// not successful.
+Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
+  Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
+
+  unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
+  unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+
+  KnownOne.clearAllBits();
+  KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
+  KnownZero &= DemandedMask;
+
+  if (ShlAmt == 0 || ShrAmt == 0)
+    return 0;
+
+  Value *VarX = Shr->getOperand(0);
+  Type *Ty = VarX->getType();
+
+  APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+  APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+
+  bool isLshr = (Shr->getOpcode() == Instruction::LShr);
+  BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
+                      (BitMask1.ashr(ShrAmt) << ShlAmt);
+
+  if (ShrAmt <= ShlAmt) {
+    BitMask2 <<= (ShlAmt - ShrAmt);
+  } else {
+    BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
+                        BitMask2.ashr(ShrAmt - ShlAmt);
+  }
+
+  // Check if condition-2 (see the comment to this function) is satified.
+  if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
+    if (ShrAmt == ShlAmt)
+      return VarX;
+
+    if (!Shr->hasOneUse())
+      return 0;
+
+    BinaryOperator *New;
+    if (ShrAmt < ShlAmt) {
+      Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
+      New = BinaryOperator::CreateShl(VarX, Amt);
+      BinaryOperator *Orig = cast<BinaryOperator>(Shl);
+      New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
+      New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
+    } else {
+      Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
+      New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
+                     BinaryOperator::CreateAShr(VarX, Amt);
+      if (cast<BinaryOperator>(Shr)->isExact())
+        New->setIsExact(true);
+    }
+
+    return InsertNewInstWith(New, *Shl);
+  }
+
+  return 0;
+}
 
 /// SimplifyDemandedVectorElts - The specified value produces a vector with
 /// any number of elements. DemandedElts contains the set of elements that are
@@ -821,14 +921,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     UndefElts = EltMask;
     return 0;
   }
-  
+
   if (DemandedElts == 0) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
     return UndefValue::get(V->getType());
   }
 
   UndefElts = 0;
-  
+
   // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
   if (Constant *C = dyn_cast<Constant>(V)) {
     // Check if this is identity. If so, return 0 since we are not simplifying
@@ -838,7 +938,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
     Constant *Undef = UndefValue::get(EltTy);
-    
+
     SmallVector<Constant*, 16> Elts;
     for (unsigned i = 0; i != VWidth; ++i) {
       if (!DemandedElts[i]) {   // If not demanded, set to undef.
@@ -846,10 +946,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         UndefElts.setBit(i);
         continue;
       }
-      
+
       Constant *Elt = C->getAggregateElement(i);
       if (Elt == 0) return 0;
-      
+
       if (isa<UndefValue>(Elt)) {   // Already undef.
         Elts.push_back(Undef);
         UndefElts.setBit(i);
@@ -857,12 +957,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         Elts.push_back(Elt);
       }
     }
-    
+
     // If we changed the constant, return it.
     Constant *NewCV = ConstantVector::get(Elts);
     return NewCV != C ? NewCV : 0;
   }
-  
+
   // Limit search depth.
   if (Depth == 10)
     return 0;
@@ -881,16 +981,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // Conservatively assume that all elements are needed.
     DemandedElts = EltMask;
   }
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return 0;        // Only analyze instructions.
-  
+
   bool MadeChange = false;
   APInt UndefElts2(VWidth, 0);
   Value *TmpV;
   switch (I->getOpcode()) {
   default: break;
-    
+
   case Instruction::InsertElement: {
     // If this is a variable index, we don't know which element it overwrites.
     // demand exactly the same input as we produce.
@@ -903,7 +1003,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
       break;
     }
-    
+
     // If this is inserting an element that isn't demanded, remove this
     // insertelement.
     unsigned IdxNo = Idx->getZExtValue();
@@ -911,7 +1011,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       Worklist.Add(I);
       return I->getOperand(0);
     }
-    
+
     // Otherwise, the element inserted overwrites whatever was there, so the
     // input demanded set is simpler than the output set.
     APInt DemandedElts2 = DemandedElts;
@@ -1007,7 +1107,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
                                       UndefElts2, Depth+1);
     if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
-      
+
     // Output elements are undefined if both are undefined.
     UndefElts &= UndefElts2;
     break;
@@ -1028,7 +1128,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     } else if (VWidth > InVWidth) {
       // Untested so far.
       break;
-      
+
       // If there are more elements in the result than there are in the source,
       // then an input element is live if any of the corresponding output
       // elements are live.
@@ -1040,7 +1140,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     } else {
       // Untested so far.
       break;
-      
+
       // If there are more elements in the source than there are in the result,
       // then an input element is live if the corresponding output element is
       // live.
@@ -1049,7 +1149,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         if (DemandedElts[InIdx/Ratio])
           InputDemandedElts.setBit(InIdx);
     }
-    
+
     // div/rem demand all inputs, because they don't want divide by zero.
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
                                       UndefElts2, Depth+1);
@@ -1057,7 +1157,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       I->setOperand(0, TmpV);
       MadeChange = true;
     }
-    
+
     UndefElts = UndefElts2;
     if (VWidth > InVWidth) {
       llvm_unreachable("Unimp");
@@ -1092,7 +1192,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
                                       UndefElts2, Depth+1);
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
-      
+
     // Output elements are undefined if both are undefined.  Consider things
     // like undef&0.  The result is known zero, not undef.
     UndefElts &= UndefElts2;
@@ -1103,13 +1203,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                       UndefElts, Depth+1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
     break;
-    
+
   case Instruction::Call: {
     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
     if (!II) break;
     switch (II->getIntrinsicID()) {
     default: break;
-      
+
     // Binary vector operations that work column-wise.  A dest element is a
     // function of the corresponding input elements from the two inputs.
     case Intrinsic::x86_sse_sub_ss:
@@ -1140,11 +1240,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
           Value *LHS = II->getArgOperand(0);
           Value *RHS = II->getArgOperand(1);
           // Extract the element as scalars.
-          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, 
+          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
           RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
             ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
-          
+
           switch (II->getIntrinsicID()) {
           default: llvm_unreachable("Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
@@ -1158,7 +1258,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                                          II->getName()), *II);
             break;
           }
-          
+
           Instruction *New =
             InsertElementInst::Create(
               UndefValue::get(II->getType()), TmpV,
@@ -1166,9 +1266,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                       II->getName());
           InsertNewInstWith(New, *II);
           return New;
-        }            
+        }
       }
-        
+
       // Output elements are undefined if both are undefined.  Consider things
       // like undef&0.  The result is known zero, not undef.
       UndefElts &= UndefElts2;
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dd7ea14e8a89..4f71db1a4b09 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
 using namespace llvm;
+using namespace PatternMatch;
 
 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
 /// is to leave as a vector operation.  isConstant indicates whether we're
@@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
     return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
   }
 
+  // Extract a value from a vector add operation with a constant zero.
+  Value *Val = 0; Constant *Con = 0;
+  if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
+    if (Con->getAggregateElement(EltNo)->isNullValue())
+      return FindScalarElement(Val, EltNo);
+  }
+
   // Otherwise, we don't know.
   return 0;
 }
@@ -295,12 +304,12 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
     return V;
   }
-  
+
   if (isa<ConstantAggregateZero>(V)) {
     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
     return V;
   }
-  
+
   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
     Value *VecOp    = IEI->getOperand(0);
@@ -595,12 +604,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   // ShuffleVectorInst is equivalent to the original one.
   for (unsigned i = 0; i < VWidth; ++i) {
     int eltMask;
-    if (Mask[i] == -1) {
+    if (Mask[i] < 0) {
       // This element is an undef value.
       eltMask = -1;
     } else if (Mask[i] < (int)LHSWidth) {
       // This element is from left hand side vector operand.
-      // 
+      //
       // If LHS is going to be replaced (case 1, 2, or 4), calculate the
       // new mask value for the element.
       if (newLHS != LHS) {
@@ -609,8 +618,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
         // with a -1 mask value.
         if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
           eltMask = -1;
-      }
-      else
+      } else
         eltMask = Mask[i];
     } else {
       // This element is from right hand side vector operand
@@ -630,8 +638,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
                  && "should have been check above");
           eltMask = -1;
         }
-      }
-      else
+      } else
         eltMask = Mask[i]-LHSWidth;
 
       // If LHS's width is changed, shift the mask value accordingly.
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index ea654ae9ed0a..49efce5c4f22 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -11,28 +11,28 @@
 #define INSTCOMBINE_WORKLIST_H
 
 #define DEBUG_TYPE "instcombine"
-#include "llvm/Instruction.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
-  
+
 /// InstCombineWorklist - This is the worklist management logic for
 /// InstCombine.
 class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
   SmallVector<Instruction*, 256> Worklist;
   DenseMap<Instruction*, unsigned> WorklistMap;
-  
+
   void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
   InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
 public:
   InstCombineWorklist() {}
-  
+
   bool isEmpty() const { return Worklist.empty(); }
-  
+
   /// Add - Add the specified instruction to the worklist if it isn't already
   /// in it.
   void Add(Instruction *I) {
@@ -41,12 +41,12 @@ public:
       Worklist.push_back(I);
     }
   }
-  
+
   void AddValue(Value *V) {
     if (Instruction *I = dyn_cast<Instruction>(V))
       Add(I);
   }
-  
+
   /// AddInitialGroup - Add the specified batch of stuff in reverse order.
   /// which should only be done when the worklist is empty and when the group
   /// has no duplicates.
@@ -61,25 +61,25 @@ public:
       Worklist.push_back(I);
     }
   }
-  
+
   // Remove - remove I from the worklist if it exists.
   void Remove(Instruction *I) {
     DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
     if (It == WorklistMap.end()) return; // Not in worklist.
-    
+
     // Don't bother moving everything down, just null out the slot.
     Worklist[It->second] = 0;
-    
+
     WorklistMap.erase(It);
   }
-  
+
   Instruction *RemoveOne() {
     Instruction *I = Worklist.back();
     Worklist.pop_back();
     WorklistMap.erase(I);
     return I;
   }
-  
+
   /// AddUsersToWorkList - When an instruction is simplified, add all users of
   /// the instruction to the work lists because they might get more simplified
   /// now.
@@ -89,18 +89,18 @@ public:
          UI != UE; ++UI)
       Add(cast<Instruction>(*UI));
   }
-  
-  
+
+
   /// Zap - check that the worklist is empty and nuke the backing store for
   /// the map if it is large.
   void Zap() {
     assert(WorklistMap.empty() && "Worklist empty, but map not?");
-    
+
     // Do an explicit clear, this shrinks the map if needed.
     WorklistMap.clear();
   }
 };
-  
+
 } // end namespace llvm.
 
 #endif
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9a46f25e66ff..c6115e3e91fe 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -36,22 +36,23 @@
 #define DEBUG_TYPE "instcombine"
 #include "llvm/Transforms/Scalar.h"
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm-c/Initialization.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <climits>
 using namespace llvm;
@@ -65,6 +66,11 @@ STATISTIC(NumExpand,    "Number of expansions");
 STATISTIC(NumFactor   , "Number of factorizations");
 STATISTIC(NumReassoc  , "Number of reassociations");
 
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+                                   cl::init(false),
+                                   cl::desc("Enable unsafe double to float "
+                                            "shrinking for math lib calls"));
+
 // Initialization Routines
 void llvm::initializeInstCombine(PassRegistry &Registry) {
   initializeInstCombinerPass(Registry);
@@ -156,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
   return !Overflow;
 }
 
+/// Conservatively clears subclassOptionalData after a reassociation or
+/// commutation. We preserve fast-math flags when applicable as they can be
+/// preserved.
+static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
+  FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
+  if (!FPMO) {
+    I.clearSubclassOptionalData();
+    return;
+  }
+
+  FastMathFlags FMF = I.getFastMathFlags();
+  I.clearSubclassOptionalData();
+  I.setFastMathFlags(FMF);
+}
+
 /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
 /// operators which are associative or commutative:
 //
@@ -213,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
             I.clearSubclassOptionalData();
             I.setHasNoSignedWrap(true);
           } else {
-            I.clearSubclassOptionalData();
+            ClearSubclassDataAfterReassociation(I);
           }
 
           Changed = true;
@@ -235,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, C);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
           Changed = true;
           ++NumReassoc;
           continue;
@@ -257,7 +278,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, B);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
           Changed = true;
           ++NumReassoc;
           continue;
@@ -277,7 +298,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
           I.setOperand(1, V);
           // Conservatively clear the optional flags, since they may not be
           // preserved by the reassociation.
-          I.clearSubclassOptionalData();
+          ClearSubclassDataAfterReassociation(I);
           Changed = true;
           ++NumReassoc;
           continue;
@@ -304,7 +325,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         I.setOperand(1, Folded);
         // Conservatively clear the optional flags, since they may not be
         // preserved by the reassociation.
-        I.clearSubclassOptionalData();
+        ClearSubclassDataAfterReassociation(I);
 
         Changed = true;
         continue;
@@ -510,8 +531,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
 // instruction if the LHS is a constant negative zero (which is the 'negate'
 // form).
 //
-Value *InstCombiner::dyn_castFNegVal(Value *V) const {
-  if (BinaryOperator::isFNeg(V))
+Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
+  if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
     return BinaryOperator::getFNegArgument(V);
 
   // Constants can be considered to be negated values if they can be folded.
@@ -1303,17 +1324,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   /// into a gep of the original struct.  This is important for SROA and alias
   /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+    APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
     if (TD &&
-        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() &&
+        !isa<BitCastInst>(BCI->getOperand(0)) &&
+        GEP.accumulateConstantOffset(*TD, Offset) &&
         StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
 
-      // Determine how much the GEP moves the pointer.
-      SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end());
-      int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops);
-
       // If this GEP instruction doesn't move the pointer, just replace the GEP
       // with a bitcast of the real input to the dest type.
-      if (Offset == 0) {
+      if (!Offset) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
         if (isa<AllocaInst>(BCI->getOperand(0)) ||
@@ -1337,7 +1356,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       SmallVector<Value*, 8> NewIndices;
       Type *InTy =
         cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
-      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+      if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP = GEP.isInBounds() ?
           Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
           Builder->CreateGEP(BCI->getOperand(0), NewIndices);
@@ -1471,6 +1490,62 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
   return 0;
 }
 
+/// \brief Move the call to free before a NULL test.
+///
+/// Check if this free is accessed after its argument has been test
+/// against NULL (property 0).
+/// If yes, it is legal to move this call in its predecessor block.
+///
+/// The move is performed only if the block containing the call to free
+/// will be removed, i.e.:
+/// 1. it has only one predecessor P, and P has two successors
+/// 2. it contains the call and an unconditional branch
+/// 3. its successor is the same as its predecessor's successor
+///
+/// The profitability is out-of concern here and this function should
+/// be called only if the caller knows this transformation would be
+/// profitable (e.g., for code size).
+static Instruction *
+tryToMoveFreeBeforeNullTest(CallInst &FI) {
+  Value *Op = FI.getArgOperand(0);
+  BasicBlock *FreeInstrBB = FI.getParent();
+  BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
+
+  // Validate part of constraint #1: Only one predecessor
+  // FIXME: We can extend the number of predecessor, but in that case, we
+  //        would duplicate the call to free in each predecessor and it may
+  //        not be profitable even for code size.
+  if (!PredBB)
+    return 0;
+
+  // Validate constraint #2: Does this block contains only the call to
+  //                         free and an unconditional branch?
+  // FIXME: We could check if we can speculate everything in the
+  //        predecessor block
+  if (FreeInstrBB->size() != 2)
+    return 0;
+  BasicBlock *SuccBB;
+  if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
+    return 0;
+
+  // Validate the rest of constraint #1 by matching on the pred branch.
+  TerminatorInst *TI = PredBB->getTerminator();
+  BasicBlock *TrueBB, *FalseBB;
+  ICmpInst::Predicate Pred;
+  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
+    return 0;
+  if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
+    return 0;
+
+  // Validate constraint #3: Ensure the null case just falls through.
+  if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
+    return 0;
+  assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
+         "Broken CFG: missing edge from predecessor to successor");
+
+  FI.moveBefore(TI);
+  return &FI;
+}
 
 
 Instruction *InstCombiner::visitFree(CallInst &FI) {
@@ -1489,6 +1564,16 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
   if (isa<ConstantPointerNull>(Op))
     return EraseInstFromFunction(FI);
 
+  // If we optimize for code size, try to move the call to free before the null
+  // test so that simplify cfg can remove the empty block and dead code
+  // elimination the branch. I.e., helps to turn something like:
+  // if (foo) free(foo);
+  // into
+  // free(foo);
+  if (MinimizeSize)
+    if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
+      return I;
+
   return 0;
 }
 
@@ -2374,7 +2459,7 @@ public:
   InstCombinerLibCallSimplifier(const DataLayout *TD,
                                 const TargetLibraryInfo *TLI,
                                 InstCombiner *IC)
-    : LibCallSimplifier(TD, TLI) {
+    : LibCallSimplifier(TD, TLI, UnsafeFPShrink) {
     this->IC = IC;
   }
 
@@ -2389,6 +2474,9 @@ public:
 bool InstCombiner::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<DataLayout>();
   TLI = &getAnalysis<TargetLibraryInfo>();
+  // Minimizing size?
+  MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                Attribute::MinSize);
 
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b7be4625ca8d..623c4705061e 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,41 +15,47 @@
 
 #define DEBUG_TYPE "asan"
 
-#include "BlackList.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
-
-#include <string>
 #include <algorithm>
+#include <string>
 
 using namespace llvm;
 
 static const uint64_t kDefaultShadowScale = 3;
 static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
 static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
-static const uint64_t kDefaultShadowOffsetAndroid = 0;
+static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000;  // < 2G.
+static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
 
 static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
 static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
@@ -59,16 +65,22 @@ static const char *kAsanModuleCtorName = "asan.module_ctor";
 static const char *kAsanModuleDtorName = "asan.module_dtor";
 static const int   kAsanCtorAndCtorPriority = 1;
 static const char *kAsanReportErrorTemplate = "__asan_report_";
+static const char *kAsanReportLoadN = "__asan_report_load_n";
+static const char *kAsanReportStoreN = "__asan_report_store_n";
 static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
 static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
 static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init";
+static const char *kAsanInitName = "__asan_init_v3";
 static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *kAsanMappingScaleName = "__asan_mapping_scale";
 static const char *kAsanStackMallocName = "__asan_stack_malloc";
 static const char *kAsanStackFreeName = "__asan_stack_free";
+static const char *kAsanGenPrefix = "__asan_gen_";
+static const char *kAsanPoisonStackMemoryName = "__asan_poison_stack_memory";
+static const char *kAsanUnpoisonStackMemoryName =
+    "__asan_unpoison_stack_memory";
 
 static const int kAsanStackLeftRedzoneMagic = 0xf1;
 static const int kAsanStackMidRedzoneMagic = 0xf2;
@@ -112,9 +124,10 @@ static cl::opt<bool> ClInitializers("asan-initialization-order",
        cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
        cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
-// This flag may need to be replaced with -fasan-blacklist.
-static cl::opt<std::string>  ClBlackListFile("asan-blacklist",
-       cl::desc("File containing the list of functions to ignore "
+static cl::opt<bool> ClRealignStack("asan-realign-stack",
+       cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true));
+static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
+       cl::desc("File containing the list of objects to ignore "
                 "during instrumentation"), cl::Hidden);
 
 // These flags allow to change the shadow mapping.
@@ -124,6 +137,9 @@ static cl::opt<int> ClMappingScale("asan-mapping-scale",
        cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
 static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
        cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
+static cl::opt<bool> ClShort64BitOffset("asan-short-64bit-mapping-offset",
+       cl::desc("Use short immediate constant as the mapping offset for 64bit"),
+       cl::Hidden, cl::init(true));
 
 // Optimization flags. Not user visible, used mostly for testing
 // and benchmarking the tool.
@@ -135,6 +151,10 @@ static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
 static cl::opt<bool> ClOptGlobals("asan-opt-globals",
        cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
 
+static cl::opt<bool> ClCheckLifetime("asan-check-lifetime",
+       cl::desc("Use llvm.lifetime intrinsics to insert extra checks"),
+       cl::Hidden, cl::init(false));
+
 // Debug flags.
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
                             cl::init(0));
@@ -148,74 +168,332 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
                                cl::Hidden, cl::init(-1));
 
 namespace {
+/// A set of dynamically initialized globals extracted from metadata.
+class SetOfDynamicallyInitializedGlobals {
+ public:
+  void Init(Module& M) {
+    // Clang generates metadata identifying all dynamically initialized globals.
+    NamedMDNode *DynamicGlobals =
+        M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+    if (!DynamicGlobals)
+      return;
+    for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+      MDNode *MDN = DynamicGlobals->getOperand(i);
+      assert(MDN->getNumOperands() == 1);
+      Value *VG = MDN->getOperand(0);
+      // The optimizer may optimize away a global entirely, in which case we
+      // cannot instrument access to it.
+      if (!VG)
+        continue;
+      DynInitGlobals.insert(cast<GlobalVariable>(VG));
+    }
+  }
+  bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; }
+ private:
+  SmallSet<GlobalValue*, 32> DynInitGlobals;
+};
+
+/// This struct defines the shadow mapping using the rule:
+///   shadow = (mem >> Scale) ADD-or-OR Offset.
+struct ShadowMapping {
+  int Scale;
+  uint64_t Offset;
+  bool OrShadowOffset;
+};
+
+static ShadowMapping getShadowMapping(const Module &M, int LongSize,
+                                      bool ZeroBaseShadow) {
+  llvm::Triple TargetTriple(M.getTargetTriple());
+  bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
+  bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+  bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64;
+  bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
+
+  ShadowMapping Mapping;
+
+  // OR-ing shadow offset if more efficient (at least on x86),
+  // but on ppc64 we have to use add since the shadow offset is not neccesary
+  // 1/8-th of the address space.
+  Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset;
+
+  Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 :
+      (LongSize == 32 ? kDefaultShadowOffset32 :
+       IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64);
+  if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) {
+    assert(LongSize == 64);
+    Mapping.Offset = kDefaultShort64bitShadowOffset;
+  }
+  if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) {
+    // Zero offset log is the special case.
+    Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog;
+  }
+
+  Mapping.Scale = kDefaultShadowScale;
+  if (ClMappingScale) {
+    Mapping.Scale = ClMappingScale;
+  }
+
+  return Mapping;
+}
+
+static size_t RedzoneSizeForScale(int MappingScale) {
+  // Redzone used for stack and globals is at least 32 bytes.
+  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+  return std::max(32U, 1U << MappingScale);
+}
+
 /// AddressSanitizer: instrument the code in module to find memory bugs.
 struct AddressSanitizer : public FunctionPass {
-  AddressSanitizer();
-  virtual const char *getPassName() const;
+  AddressSanitizer(bool CheckInitOrder = true,
+                   bool CheckUseAfterReturn = false,
+                   bool CheckLifetime = false,
+                   StringRef BlacklistFile = StringRef(),
+                   bool ZeroBaseShadow = false)
+      : FunctionPass(ID),
+        CheckInitOrder(CheckInitOrder || ClInitializers),
+        CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn),
+        CheckLifetime(CheckLifetime || ClCheckLifetime),
+        BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+                                            : BlacklistFile),
+        ZeroBaseShadow(ZeroBaseShadow) {}
+  virtual const char *getPassName() const {
+    return "AddressSanitizerFunctionPass";
+  }
   void instrumentMop(Instruction *I);
-  void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
-                         Value *Addr, uint32_t TypeSize, bool IsWrite);
+  void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
+                         Value *Addr, uint32_t TypeSize, bool IsWrite,
+                         Value *SizeArgument);
   Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                            Value *ShadowValue, uint32_t TypeSize);
   Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
-                                 bool IsWrite, size_t AccessSizeIndex);
+                                 bool IsWrite, size_t AccessSizeIndex,
+                                 Value *SizeArgument);
   bool instrumentMemIntrinsic(MemIntrinsic *MI);
   void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
                                    Value *Size,
                                    Instruction *InsertBefore, bool IsWrite);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool runOnFunction(Function &F);
-  void createInitializerPoisonCalls(Module &M,
-                                    Value *FirstAddr, Value *LastAddr);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
-  bool poisonStackInFunction(Function &F);
+  void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
   virtual bool doInitialization(Module &M);
-  virtual bool doFinalization(Module &M);
-  bool insertGlobalRedzones(Module &M);
   static char ID;  // Pass identification, replacement for typeid
 
  private:
-  uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
-    Type *Ty = AI->getAllocatedType();
-    uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
-    return SizeInBytes;
-  }
-  uint64_t getAlignedSize(uint64_t SizeInBytes) {
-    return ((SizeInBytes + RedzoneSize - 1)
-            / RedzoneSize) * RedzoneSize;
-  }
-  uint64_t getAlignedAllocaSize(AllocaInst *AI) {
-    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
-    return getAlignedSize(SizeInBytes);
-  }
+  void initializeCallbacks(Module &M);
 
-  Function *checkInterfaceFunction(Constant *FuncOrBitcast);
   bool ShouldInstrumentGlobal(GlobalVariable *G);
-  void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
-                   Value *ShadowBase, bool DoPoison);
   bool LooksLikeCodeInBug11395(Instruction *I);
   void FindDynamicInitializers(Module &M);
-  bool HasDynamicInitializer(GlobalVariable *G);
+
+  bool CheckInitOrder;
+  bool CheckUseAfterReturn;
+  bool CheckLifetime;
+  SmallString<64> BlacklistFile;
+  bool ZeroBaseShadow;
 
   LLVMContext *C;
   DataLayout *TD;
-  uint64_t MappingOffset;
-  int MappingScale;
-  size_t RedzoneSize;
   int LongSize;
   Type *IntptrTy;
-  Type *IntptrPtrTy;
+  ShadowMapping Mapping;
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
-  Function *AsanStackMallocFunc, *AsanStackFreeFunc;
   Function *AsanHandleNoReturnFunc;
-  Instruction *CtorInsertBefore;
   OwningPtr<BlackList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
+  // This array is indexed by AccessIsWrite.
+  Function *AsanErrorCallbackSized[2];
   InlineAsm *EmptyAsm;
-  SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
-  SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan;
+  SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+
+  friend struct FunctionStackPoisoner;
+};
+
+class AddressSanitizerModule : public ModulePass {
+ public:
+  AddressSanitizerModule(bool CheckInitOrder = true,
+                         StringRef BlacklistFile = StringRef(),
+                         bool ZeroBaseShadow = false)
+      : ModulePass(ID),
+        CheckInitOrder(CheckInitOrder || ClInitializers),
+        BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+                                            : BlacklistFile),
+        ZeroBaseShadow(ZeroBaseShadow) {}
+  bool runOnModule(Module &M);
+  static char ID;  // Pass identification, replacement for typeid
+  virtual const char *getPassName() const {
+    return "AddressSanitizerModule";
+  }
+
+ private:
+  void initializeCallbacks(Module &M);
+
+  bool ShouldInstrumentGlobal(GlobalVariable *G);
+  void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
+  size_t RedzoneSize() const {
+    return RedzoneSizeForScale(Mapping.Scale);
+  }
+
+  bool CheckInitOrder;
+  SmallString<64> BlacklistFile;
+  bool ZeroBaseShadow;
+
+  OwningPtr<BlackList> BL;
+  SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+  Type *IntptrTy;
+  LLVMContext *C;
+  DataLayout *TD;
+  ShadowMapping Mapping;
+  Function *AsanPoisonGlobals;
+  Function *AsanUnpoisonGlobals;
+  Function *AsanRegisterGlobals;
+  Function *AsanUnregisterGlobals;
+};
+
+// Stack poisoning does not play well with exception handling.
+// When an exception is thrown, we essentially bypass the code
+// that unpoisones the stack. This is why the run-time library has
+// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
+// stack in the interceptor. This however does not work inside the
+// actual function which catches the exception. Most likely because the
+// compiler hoists the load of the shadow value somewhere too high.
+// This causes asan to report a non-existing bug on 453.povray.
+// It sounds like an LLVM bug.
+struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
+  Function &F;
+  AddressSanitizer &ASan;
+  DIBuilder DIB;
+  LLVMContext *C;
+  Type *IntptrTy;
+  Type *IntptrPtrTy;
+  ShadowMapping Mapping;
+
+  SmallVector<AllocaInst*, 16> AllocaVec;
+  SmallVector<Instruction*, 8> RetVec;
+  uint64_t TotalStackSize;
+  unsigned StackAlignment;
+
+  Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+  Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
+
+  // Stores a place and arguments of poisoning/unpoisoning call for alloca.
+  struct AllocaPoisonCall {
+    IntrinsicInst *InsBefore;
+    uint64_t Size;
+    bool DoPoison;
+  };
+  SmallVector<AllocaPoisonCall, 8> AllocaPoisonCallVec;
+
+  // Maps Value to an AllocaInst from which the Value is originated.
+  typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy;
+  AllocaForValueMapTy AllocaForValue;
+
+  FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
+      : F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C),
+        IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)),
+        Mapping(ASan.Mapping),
+        TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {}
+
+  bool runOnFunction() {
+    if (!ClStack) return false;
+    // Collect alloca, ret, lifetime instructions etc.
+    for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+         DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+      BasicBlock *BB = *DI;
+      visit(*BB);
+    }
+    if (AllocaVec.empty()) return false;
+
+    initializeCallbacks(*F.getParent());
+
+    poisonStack();
+
+    if (ClDebugStack) {
+      DEBUG(dbgs() << F);
+    }
+    return true;
+  }
+
+  // Finds all static Alloca instructions and puts
+  // poisoned red zones around all of them.
+  // Then unpoison everything back before the function returns.
+  void poisonStack();
+
+  // ----------------------- Visitors.
+  /// \brief Collect all Ret instructions.
+  void visitReturnInst(ReturnInst &RI) {
+    RetVec.push_back(&RI);
+  }
+
+  /// \brief Collect Alloca instructions we want (and can) handle.
+  void visitAllocaInst(AllocaInst &AI) {
+    if (!isInterestingAlloca(AI)) return;
+
+    StackAlignment = std::max(StackAlignment, AI.getAlignment());
+    AllocaVec.push_back(&AI);
+    uint64_t AlignedSize =  getAlignedAllocaSize(&AI);
+    TotalStackSize += AlignedSize;
+  }
+
+  /// \brief Collect lifetime intrinsic calls to check for use-after-scope
+  /// errors.
+  void visitIntrinsicInst(IntrinsicInst &II) {
+    if (!ASan.CheckLifetime) return;
+    Intrinsic::ID ID = II.getIntrinsicID();
+    if (ID != Intrinsic::lifetime_start &&
+        ID != Intrinsic::lifetime_end)
+      return;
+    // Found lifetime intrinsic, add ASan instrumentation if necessary.
+    ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
+    // If size argument is undefined, don't do anything.
+    if (Size->isMinusOne()) return;
+    // Check that size doesn't saturate uint64_t and can
+    // be stored in IntptrTy.
+    const uint64_t SizeValue = Size->getValue().getLimitedValue();
+    if (SizeValue == ~0ULL ||
+        !ConstantInt::isValueValidForType(IntptrTy, SizeValue))
+      return;
+    // Find alloca instruction that corresponds to llvm.lifetime argument.
+    AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
+    if (!AI) return;
+    bool DoPoison = (ID == Intrinsic::lifetime_end);
+    AllocaPoisonCall APC = {&II, SizeValue, DoPoison};
+    AllocaPoisonCallVec.push_back(APC);
+  }
+
+  // ---------------------- Helpers.
+  void initializeCallbacks(Module &M);
+
+  // Check if we want (and can) handle this alloca.
+  bool isInterestingAlloca(AllocaInst &AI) {
+    return (!AI.isArrayAllocation() &&
+            AI.isStaticAlloca() &&
+            AI.getAllocatedType()->isSized());
+  }
+
+  size_t RedzoneSize() const {
+    return RedzoneSizeForScale(Mapping.Scale);
+  }
+  uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+    Type *Ty = AI->getAllocatedType();
+    uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
+    return SizeInBytes;
+  }
+  uint64_t getAlignedSize(uint64_t SizeInBytes) {
+    size_t RZ = RedzoneSize();
+    return ((SizeInBytes + RZ - 1) / RZ) * RZ;
+  }
+  uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+    return getAlignedSize(SizeInBytes);
+  }
+  /// Finds alloca where the value comes from.
+  AllocaInst *findAllocaForValue(Value *V);
+  void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+                      Value *ShadowBase, bool DoPoison);
+  void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
 };
 
 }  // namespace
@@ -224,13 +502,21 @@ char AddressSanitizer::ID = 0;
 INITIALIZE_PASS(AddressSanitizer, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
     false, false)
-AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { }
-FunctionPass *llvm::createAddressSanitizerPass() {
-  return new AddressSanitizer();
+FunctionPass *llvm::createAddressSanitizerFunctionPass(
+    bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime,
+    StringRef BlacklistFile, bool ZeroBaseShadow) {
+  return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn,
+                              CheckLifetime, BlacklistFile, ZeroBaseShadow);
 }
 
-const char *AddressSanitizer::getPassName() const {
-  return "AddressSanitizer";
+char AddressSanitizerModule::ID = 0;
+INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
+    "ModulePass", false, false)
+ModulePass *llvm::createAddressSanitizerModulePass(
+    bool CheckInitOrder, StringRef BlacklistFile, bool ZeroBaseShadow) {
+  return new AddressSanitizerModule(CheckInitOrder, BlacklistFile,
+                                    ZeroBaseShadow);
 }
 
 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -242,38 +528,44 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
 // Create a constant for Str so that we can pass it to the run-time lib.
 static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
-  return new GlobalVariable(M, StrConst->getType(), true,
-                            GlobalValue::PrivateLinkage, StrConst, "");
+  GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
+                            GlobalValue::PrivateLinkage, StrConst,
+                            kAsanGenPrefix);
+  GV->setUnnamedAddr(true);  // Ok to merge these.
+  GV->setAlignment(1);  // Strings may not be merged w/o setting align 1.
+  return GV;
+}
+
+static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
+  return G->getName().find(kAsanGenPrefix) == 0;
 }
 
 Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
   // Shadow >> scale
-  Shadow = IRB.CreateLShr(Shadow, MappingScale);
-  if (MappingOffset == 0)
+  Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+  if (Mapping.Offset == 0)
     return Shadow;
   // (Shadow >> scale) | offset
-  return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy,
-                                               MappingOffset));
+  if (Mapping.OrShadowOffset)
+    return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
+  else
+    return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
 }
 
 void AddressSanitizer::instrumentMemIntrinsicParam(
     Instruction *OrigIns,
     Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
+  IRBuilder<> IRB(InsertBefore);
+  if (Size->getType() != IntptrTy)
+    Size = IRB.CreateIntCast(Size, IntptrTy, false);
   // Check the first byte.
-  {
-    IRBuilder<> IRB(InsertBefore);
-    instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
-  }
+  instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size);
   // Check the last byte.
-  {
-    IRBuilder<> IRB(InsertBefore);
-    Value *SizeMinusOne = IRB.CreateSub(
-        Size, ConstantInt::get(Size->getType(), 1));
-    SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
-    Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
-    Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
-    instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
-  }
+  IRB.SetInsertPoint(InsertBefore);
+  Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1));
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne);
+  instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size);
 }
 
 // Instrument memset/memmove/memcpy
@@ -328,30 +620,6 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
   return NULL;
 }
 
-void AddressSanitizer::FindDynamicInitializers(Module& M) {
-  // Clang generates metadata identifying all dynamically initialized globals.
-  NamedMDNode *DynamicGlobals =
-      M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
-  if (!DynamicGlobals)
-    return;
-  for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
-    MDNode *MDN = DynamicGlobals->getOperand(i);
-    assert(MDN->getNumOperands() == 1);
-    Value *VG = MDN->getOperand(0);
-    // The optimizer may optimize away a global entirely, in which case we
-    // cannot instrument access to it.
-    if (!VG)
-      continue;
-
-    GlobalVariable *G = cast<GlobalVariable>(VG);
-    DynamicallyInitializedGlobals.insert(G);
-  }
-}
-// Returns true if a global variable is initialized dynamically in this TU.
-bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
-  return DynamicallyInitializedGlobals.count(G);
-}
-
 void AddressSanitizer::instrumentMop(Instruction *I) {
   bool IsWrite = false;
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -360,14 +628,12 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
     if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
       // If initialization order checking is disabled, a simple access to a
       // dynamically initialized global is always valid.
-      if (!ClInitializers)
+      if (!CheckInitOrder)
         return;
       // If a global variable does not have dynamic initialization we don't
-      // have to instrument it.  However, if a global has external linkage, we
-      // assume it has dynamic initialization, as it may have an initializer
-      // in a different TU.
-      if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
-          !HasDynamicInitializer(G))
+      // have to instrument it.  However, if a global does not have initailizer
+      // at all, we assume it has dynamic initializer (in other TU).
+      if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
         return;
     }
   }
@@ -378,21 +644,31 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
   assert(OrigTy->isSized());
   uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
 
-  if (TypeSize != 8  && TypeSize != 16 &&
-      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
-    // Ignore all unusual sizes.
-    return;
-  }
+  assert((TypeSize % 8) == 0);
 
+  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
+  if (TypeSize == 8  || TypeSize == 16 ||
+      TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
+    return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0);
+  // Instrument unusual size (but still multiple of 8).
+  // We can not do it with a single check, so we do 1-byte check for the first
+  // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+  // to report the actual access size.
   IRBuilder<> IRB(I);
-  instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
+  Value *LastByte =  IRB.CreateIntToPtr(
+      IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy),
+                    ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+      OrigPtrTy);
+  Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+  instrumentAddress(I, I, Addr, 8, IsWrite, Size);
+  instrumentAddress(I, I, LastByte, 8, IsWrite, Size);
 }
 
 // Validate the result of Module::getOrInsertFunction called for an interface
 // function of AddressSanitizer. If the instrumented module defines a function
 // with the same name, their prototypes must match, otherwise
 // getOrInsertFunction returns a bitcast.
-Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
+static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
   if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
   FuncOrBitcast->dump();
   report_fatal_error("trying to redefine an AddressSanitizer "
@@ -401,10 +677,12 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
 
 Instruction *AddressSanitizer::generateCrashCode(
     Instruction *InsertBefore, Value *Addr,
-    bool IsWrite, size_t AccessSizeIndex) {
+    bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) {
   IRBuilder<> IRB(InsertBefore);
-  CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
-                                  Addr);
+  CallInst *Call = SizeArgument
+    ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument)
+    : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+
   // We don't do Call->setDoesNotReturn() because the BB already has
   // UnreachableInst at the end.
   // This EmptyAsm is required to avoid callback merge.
@@ -415,7 +693,7 @@ Instruction *AddressSanitizer::generateCrashCode(
 Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                                             Value *ShadowValue,
                                             uint32_t TypeSize) {
-  size_t Granularity = 1 << MappingScale;
+  size_t Granularity = 1 << Mapping.Scale;
   // Addr & (Granularity - 1)
   Value *LastAccessedByte = IRB.CreateAnd(
       AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
@@ -431,12 +709,14 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
 }
 
 void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
-                                         IRBuilder<> &IRB, Value *Addr,
-                                         uint32_t TypeSize, bool IsWrite) {
+                                         Instruction *InsertBefore,
+                                         Value *Addr, uint32_t TypeSize,
+                                         bool IsWrite, Value *SizeArgument) {
+  IRBuilder<> IRB(InsertBefore);
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
 
   Type *ShadowTy  = IntegerType::get(
-      *C, std::max(8U, TypeSize >> MappingScale));
+      *C, std::max(8U, TypeSize >> Mapping.Scale));
   Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
   Value *ShadowPtr = memToShadow(AddrLong, IRB);
   Value *CmpVal = Constant::getNullValue(ShadowTy);
@@ -445,7 +725,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
 
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
   size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
-  size_t Granularity = 1 << MappingScale;
+  size_t Granularity = 1 << Mapping.Scale;
   TerminatorInst *CrashTerm = 0;
 
   if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
@@ -464,14 +744,13 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true);
   }
 
-  Instruction *Crash =
-      generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+  Instruction *Crash = generateCrashCode(
+      CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument);
   Crash->setDebugLoc(OrigIns->getDebugLoc());
 }
 
-void AddressSanitizer::createInitializerPoisonCalls(Module &M,
-                                                    Value *FirstAddr,
-                                                    Value *LastAddr) {
+void AddressSanitizerModule::createInitializerPoisonCalls(
+    Module &M, GlobalValue *ModuleName) {
   // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
   Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
   // If that function is not present, this TU contains no globals, or they have
@@ -482,16 +761,9 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M,
   // Set up the arguments to our poison/unpoison functions.
   IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
 
-  // Declare our poisoning and unpoisoning functions.
-  Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
-  AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
-  Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
-  AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
-
   // Add a call to poison all external globals before the given function starts.
-  IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+  Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+  IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
 
   // Add calls to unpoison all globals before each return instruction.
   for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
@@ -502,14 +774,14 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M,
   }
 }
 
-bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
   Type *Ty = cast<PointerType>(G->getType())->getElementType();
   DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
 
   if (BL->isIn(*G)) return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
-  if (GlobalsCreatedByAsan.count(G)) return false;  // Our own global.
+  if (GlobalWasGeneratedByAsan(G)) return false;  // Our own global.
   // Touch only those globals that will not be defined in other modules.
   // Don't handle ODR type linkages since other modules may be built w/o asan.
   if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
@@ -522,7 +794,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
   if (G->isThreadLocal())
     return false;
   // For now, just ignore this Alloca if the alignment is large.
-  if (G->getAlignment() > RedzoneSize) return false;
+  if (G->getAlignment() > RedzoneSize()) return false;
 
   // Ignore all the globals with the names starting with "\01L_OBJC_".
   // Many of those are put into the .cstring section. The linker compresses
@@ -561,10 +833,43 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
   return true;
 }
 
+void AddressSanitizerModule::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+  // Declare our poisoning and unpoisoning functions.
+  AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL));
+  AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+  AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+  AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+  // Declare functions that register/unregister globals.
+  AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanRegisterGlobalsName, IRB.getVoidTy(),
+      IntptrTy, IntptrTy, NULL));
+  AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
+  AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanUnregisterGlobalsName,
+      IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+}
+
 // This function replaces all global variables with new variables that have
 // trailing redzones. It also creates a function that poisons
 // redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizer::insertGlobalRedzones(Module &M) {
+bool AddressSanitizerModule::runOnModule(Module &M) {
+  if (!ClGlobals) return false;
+  TD = getAnalysisIfAvailable<DataLayout>();
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(BlacklistFile));
+  if (BL->isIn(M)) return false;
+  C = &(M.getContext());
+  int LongSize = TD->getPointerSizeInBits();
+  IntptrTy = Type::getIntNTy(*C, LongSize);
+  Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
+  initializeCallbacks(M);
+  DynamicallyInitializedGlobals.Init(M);
+
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
 
   for (Module::GlobalListType::iterator G = M.global_begin(),
@@ -581,32 +886,48 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
   //   size_t size;
   //   size_t size_with_redzone;
   //   const char *name;
+  //   const char *module_name;
   //   size_t has_dynamic_init;
   // We initialize an array of such structures and pass it to a run-time call.
   StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
                                                IntptrTy, IntptrTy,
-                                               IntptrTy, NULL);
+                                               IntptrTy, IntptrTy, NULL);
   SmallVector<Constant *, 16> Initializers(n), DynamicInit;
 
-  IRBuilder<> IRB(CtorInsertBefore);
 
-  if (ClInitializers)
-    FindDynamicInitializers(M);
+  Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+  assert(CtorFunc);
+  IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+
+  bool HasDynamicallyInitializedGlobals = false;
 
-  // The addresses of the first and last dynamically initialized globals in
-  // this TU.  Used in initialization order checking.
-  Value *FirstDynamic = 0, *LastDynamic = 0;
+  GlobalVariable *ModuleName = createPrivateGlobalForString(
+      M, M.getModuleIdentifier());
+  // We shouldn't merge same module names, as this string serves as unique
+  // module ID in runtime.
+  ModuleName->setUnnamedAddr(false);
 
   for (size_t i = 0; i < n; i++) {
+    static const uint64_t kMaxGlobalRedzone = 1 << 18;
     GlobalVariable *G = GlobalsToChange[i];
     PointerType *PtrTy = cast<PointerType>(G->getType());
     Type *Ty = PtrTy->getElementType();
     uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
-    uint64_t RightRedzoneSize = RedzoneSize +
-        (RedzoneSize - (SizeInBytes % RedzoneSize));
+    uint64_t MinRZ = RedzoneSize();
+    // MinRZ <= RZ <= kMaxGlobalRedzone
+    // and trying to make RZ to be ~ 1/4 of SizeInBytes.
+    uint64_t RZ = std::max(MinRZ,
+                         std::min(kMaxGlobalRedzone,
+                                  (SizeInBytes / MinRZ / 4) * MinRZ));
+    uint64_t RightRedzoneSize = RZ;
+    // Round up to MinRZ
+    if (SizeInBytes % MinRZ)
+      RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+    assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
     // Determine whether this global should be poisoned in initialization.
-    bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
+    bool GlobalHasDynamicInitializer =
+        DynamicallyInitializedGlobals.Contains(G);
     // Don't check initialization order if this global is blacklisted.
     GlobalHasDynamicInitializer &= !BL->isInInit(*G);
 
@@ -615,18 +936,14 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
         NewTy, G->getInitializer(),
         Constant::getNullValue(RightRedZoneTy), NULL);
 
-    SmallString<2048> DescriptionOfGlobal = G->getName();
-    DescriptionOfGlobal += " (";
-    DescriptionOfGlobal += M.getModuleIdentifier();
-    DescriptionOfGlobal += ")";
-    GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+    GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
 
     // Create a new global variable with enough space for a redzone.
     GlobalVariable *NewGlobal = new GlobalVariable(
         M, NewTy, G->isConstant(), G->getLinkage(),
         NewInitializer, "", G, G->getThreadLocalMode());
     NewGlobal->copyAttributesFrom(G);
-    NewGlobal->setAlignment(RedzoneSize);
+    NewGlobal->setAlignment(MinRZ);
 
     Value *Indices2[2];
     Indices2[0] = IRB.getInt32(0);
@@ -643,15 +960,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
         ConstantInt::get(IntptrTy, SizeInBytes),
         ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
         ConstantExpr::getPointerCast(Name, IntptrTy),
+        ConstantExpr::getPointerCast(ModuleName, IntptrTy),
         ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
         NULL);
 
     // Populate the first and last globals declared in this TU.
-    if (ClInitializers && GlobalHasDynamicInitializer) {
-      LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
-      if (FirstDynamic == 0)
-        FirstDynamic = LastDynamic;
-    }
+    if (CheckInitOrder && GlobalHasDynamicInitializer)
+      HasDynamicallyInitializedGlobals = true;
 
     DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
   }
@@ -662,14 +977,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
       ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (ClInitializers && FirstDynamic && LastDynamic)
-    createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
-
-  Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanRegisterGlobalsName, IRB.getVoidTy(),
-      IntptrTy, IntptrTy, NULL));
-  AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
-
+  if (CheckInitOrder && HasDynamicallyInitializedGlobals)
+    createInitializerPoisonCalls(M, ModuleName);
   IRB.CreateCall2(AsanRegisterGlobals,
                   IRB.CreatePointerCast(AllGlobals, IntptrTy),
                   ConstantInt::get(IntptrTy, n));
@@ -681,12 +990,6 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
       GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
   BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
   IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
-  Function *AsanUnregisterGlobals =
-      checkInterfaceFunction(M.getOrInsertFunction(
-          kAsanUnregisterGlobalsName,
-          IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
-  AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
-
   IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
                        IRB.CreatePointerCast(AllGlobals, IntptrTy),
                        ConstantInt::get(IntptrTy, n));
@@ -696,33 +999,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
   return true;
 }
 
-// virtual
-bool AddressSanitizer::doInitialization(Module &M) {
-  // Initialize the private fields. No one has accessed them before.
-  TD = getAnalysisIfAvailable<DataLayout>();
-
-  if (!TD)
-    return false;
-  BL.reset(new BlackList(ClBlackListFile));
-
-  C = &(M.getContext());
-  LongSize = TD->getPointerSizeInBits();
-  IntptrTy = Type::getIntNTy(*C, LongSize);
-  IntptrPtrTy = PointerType::get(IntptrTy, 0);
-
-  AsanCtorFunction = Function::Create(
-      FunctionType::get(Type::getVoidTy(*C), false),
-      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
-  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
-  CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB);
-
-  // call __asan_init in the module ctor.
-  IRBuilder<> IRB(CtorInsertBefore);
-  AsanInitFunction = checkInterfaceFunction(
-      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
-  AsanInitFunction->setLinkage(Function::ExternalLinkage);
-  IRB.CreateCall(AsanInitFunction);
-
+void AddressSanitizer::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
   // Create __asan_report* callbacks.
   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
@@ -736,74 +1014,68 @@ bool AddressSanitizer::doInitialization(Module &M) {
               FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
     }
   }
+  AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
+              kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
+              kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
 
-  AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
-  AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanStackFreeName, IRB.getVoidTy(),
-      IntptrTy, IntptrTy, IntptrTy, NULL));
   AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
-
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
+}
 
-  llvm::Triple targetTriple(M.getTargetTriple());
-  bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android;
+void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const {
+  // Tell the values of mapping offset and scale to the run-time.
+  GlobalValue *asan_mapping_offset =
+      new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+                     ConstantInt::get(IntptrTy, Mapping.Offset),
+                     kAsanMappingOffsetName);
+  // Read the global, otherwise it may be optimized away.
+  IRB.CreateLoad(asan_mapping_offset, true);
+
+  GlobalValue *asan_mapping_scale =
+      new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+                         ConstantInt::get(IntptrTy, Mapping.Scale),
+                         kAsanMappingScaleName);
+  // Read the global, otherwise it may be optimized away.
+  IRB.CreateLoad(asan_mapping_scale, true);
+}
 
-  MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid :
-    (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64);
-  if (ClMappingOffsetLog >= 0) {
-    if (ClMappingOffsetLog == 0) {
-      // special case
-      MappingOffset = 0;
-    } else {
-      MappingOffset = 1ULL << ClMappingOffsetLog;
-    }
-  }
-  MappingScale = kDefaultShadowScale;
-  if (ClMappingScale) {
-    MappingScale = ClMappingScale;
-  }
-  // Redzone used for stack and globals is at least 32 bytes.
-  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
-  RedzoneSize = std::max(32, (int)(1 << MappingScale));
+// virtual
+bool AddressSanitizer::doInitialization(Module &M) {
+  // Initialize the private fields. No one has accessed them before.
+  TD = getAnalysisIfAvailable<DataLayout>();
 
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(BlacklistFile));
+  DynamicallyInitializedGlobals.Init(M);
 
-  if (ClMappingOffsetLog >= 0) {
-    // Tell the run-time the current values of mapping offset and scale.
-    GlobalValue *asan_mapping_offset =
-        new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
-                       ConstantInt::get(IntptrTy, MappingOffset),
-                       kAsanMappingOffsetName);
-    // Read the global, otherwise it may be optimized away.
-    IRB.CreateLoad(asan_mapping_offset, true);
-  }
-  if (ClMappingScale) {
-    GlobalValue *asan_mapping_scale =
-        new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
-                           ConstantInt::get(IntptrTy, MappingScale),
-                           kAsanMappingScaleName);
-    // Read the global, otherwise it may be optimized away.
-    IRB.CreateLoad(asan_mapping_scale, true);
-  }
+  C = &(M.getContext());
+  LongSize = TD->getPointerSizeInBits();
+  IntptrTy = Type::getIntNTy(*C, LongSize);
 
-  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
+  AsanCtorFunction = Function::Create(
+      FunctionType::get(Type::getVoidTy(*C), false),
+      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
+  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
+  // call __asan_init in the module ctor.
+  IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
+  AsanInitFunction = checkInterfaceFunction(
+      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
+  AsanInitFunction->setLinkage(Function::ExternalLinkage);
+  IRB.CreateCall(AsanInitFunction);
 
-  return true;
-}
+  Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
+  emitShadowMapping(M, IRB);
 
-bool AddressSanitizer::doFinalization(Module &M) {
-  // We transform the globals at the very end so that the optimization analysis
-  // works on the original globals.
-  if (ClGlobals)
-    return insertGlobalRedzones(M);
-  return false;
+  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
+  return true;
 }
 
-
 bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // For each NSObject descendant having a +load method, this method is invoked
   // by the ObjC runtime before any of the static constructors is called.
@@ -823,12 +1095,15 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
 bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
+  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
   DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
+  initializeCallbacks(*F.getParent());
 
-  // If needed, insert __asan_init before checking for AddressSafety attr.
+  // If needed, insert __asan_init before checking for SanitizeAddress attr.
   maybeInsertAsanInitAtFunctionEntry(F);
 
-  if (!F.getFnAttributes().hasAttribute(Attributes::AddressSafety))
+  if (!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::SanitizeAddress))
     return false;
 
   if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
@@ -857,12 +1132,12 @@ bool AddressSanitizer::runOnFunction(Function &F) {
       } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
         // ok, take it.
       } else {
-        if (CallInst *CI = dyn_cast<CallInst>(BI)) {
+        CallSite CS(BI);
+        if (CS) {
           // A call inside BB.
           TempsToInstrument.clear();
-          if (CI->doesNotReturn()) {
-            NoReturnCalls.push_back(CI);
-          }
+          if (CS.doesNotReturn())
+            NoReturnCalls.push_back(CS.getInstruction());
         }
         continue;
       }
@@ -887,7 +1162,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
     NumInstrumented++;
   }
 
-  bool ChangedStack = poisonStackInFunction(F);
+  FunctionStackPoisoner FSP(F, *this);
+  bool ChangedStack = FSP.runOnFunction();
 
   // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
   // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
@@ -912,10 +1188,10 @@ static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
 
 static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
                                             size_t Size,
-                                            size_t RedzoneSize,
+                                            size_t RZSize,
                                             size_t ShadowGranularity,
                                             uint8_t Magic) {
-  for (size_t i = 0; i < RedzoneSize;
+  for (size_t i = 0; i < RZSize;
        i+= ShadowGranularity, Shadow++) {
     if (i + ShadowGranularity <= Size) {
       *Shadow = 0;  // fully addressable
@@ -927,10 +1203,35 @@ static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
   }
 }
 
-void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
-                                   IRBuilder<> IRB,
-                                   Value *ShadowBase, bool DoPoison) {
-  size_t ShadowRZSize = RedzoneSize >> MappingScale;
+// Workaround for bug 11395: we don't want to instrument stack in functions
+// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
+// FIXME: remove once the bug 11395 is fixed.
+bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
+  if (LongSize != 32) return false;
+  CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI || !CI->isInlineAsm()) return false;
+  if (CI->getNumArgOperands() <= 5) return false;
+  // We have inline assembly with quite a few arguments.
+  return true;
+}
+
+void FunctionStackPoisoner::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+  AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
+  AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanStackFreeName, IRB.getVoidTy(),
+      IntptrTy, IntptrTy, IntptrTy, NULL));
+  AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+}
+
+void FunctionStackPoisoner::poisonRedZones(
+  const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
+  bool DoPoison) {
+  size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
   assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
   Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
   Type *RZPtrTy = PointerType::get(RZTy, 0);
@@ -946,12 +1247,12 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
   IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
 
   // poison all other red zones.
-  uint64_t Pos = RedzoneSize;
+  uint64_t Pos = RedzoneSize();
   for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
     AllocaInst *AI = AllocaVec[i];
     uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
     uint64_t AlignedSize = getAlignedAllocaSize(AI);
-    assert(AlignedSize - SizeInBytes < RedzoneSize);
+    assert(AlignedSize - SizeInBytes < RedzoneSize());
     Value *Ptr = NULL;
 
     Pos += AlignedSize;
@@ -961,13 +1262,13 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
       // Poison the partial redzone at right
       Ptr = IRB.CreateAdd(
           ShadowBase, ConstantInt::get(IntptrTy,
-                                       (Pos >> MappingScale) - ShadowRZSize));
-      size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes);
+                                       (Pos >> Mapping.Scale) - ShadowRZSize));
+      size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes);
       uint32_t Poison = 0;
       if (DoPoison) {
         PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
-                                        RedzoneSize,
-                                        1ULL << MappingScale,
+                                        RedzoneSize(),
+                                        1ULL << Mapping.Scale,
                                         kAsanStackPartialRedzoneMagic);
       }
       Value *PartialPoison = ConstantInt::get(RZTy, Poison);
@@ -976,76 +1277,23 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
 
     // Poison the full redzone at right.
     Ptr = IRB.CreateAdd(ShadowBase,
-                        ConstantInt::get(IntptrTy, Pos >> MappingScale));
-    Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid;
+                        ConstantInt::get(IntptrTy, Pos >> Mapping.Scale));
+    bool LastAlloca = (i == AllocaVec.size() - 1);
+    Value *Poison = LastAlloca ? PoisonRight : PoisonMid;
     IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
 
-    Pos += RedzoneSize;
+    Pos += RedzoneSize();
   }
 }
 
-// Workaround for bug 11395: we don't want to instrument stack in functions
-// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
-// FIXME: remove once the bug 11395 is fixed.
-bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
-  if (LongSize != 32) return false;
-  CallInst *CI = dyn_cast<CallInst>(I);
-  if (!CI || !CI->isInlineAsm()) return false;
-  if (CI->getNumArgOperands() <= 5) return false;
-  // We have inline assembly with quite a few arguments.
-  return true;
-}
-
-// Find all static Alloca instructions and put
-// poisoned red zones around all of them.
-// Then unpoison everything back before the function returns.
-//
-// Stack poisoning does not play well with exception handling.
-// When an exception is thrown, we essentially bypass the code
-// that unpoisones the stack. This is why the run-time library has
-// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
-// stack in the interceptor. This however does not work inside the
-// actual function which catches the exception. Most likely because the
-// compiler hoists the load of the shadow value somewhere too high.
-// This causes asan to report a non-existing bug on 453.povray.
-// It sounds like an LLVM bug.
-bool AddressSanitizer::poisonStackInFunction(Function &F) {
-  if (!ClStack) return false;
-  SmallVector<AllocaInst*, 16> AllocaVec;
-  SmallVector<Instruction*, 8> RetVec;
-  uint64_t TotalSize = 0;
+void FunctionStackPoisoner::poisonStack() {
+  uint64_t LocalStackSize = TotalStackSize +
+                            (AllocaVec.size() + 1) * RedzoneSize();
 
-  // Filter out Alloca instructions we want (and can) handle.
-  // Collect Ret instructions.
-  for (Function::iterator FI = F.begin(), FE = F.end();
-       FI != FE; ++FI) {
-    BasicBlock &BB = *FI;
-    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
-         BI != BE; ++BI) {
-      if (isa<ReturnInst>(BI)) {
-          RetVec.push_back(BI);
-          continue;
-      }
-
-      AllocaInst *AI = dyn_cast<AllocaInst>(BI);
-      if (!AI) continue;
-      if (AI->isArrayAllocation()) continue;
-      if (!AI->isStaticAlloca()) continue;
-      if (!AI->getAllocatedType()->isSized()) continue;
-      if (AI->getAlignment() > RedzoneSize) continue;
-      AllocaVec.push_back(AI);
-      uint64_t AlignedSize =  getAlignedAllocaSize(AI);
-      TotalSize += AlignedSize;
-    }
-  }
-
-  if (AllocaVec.empty()) return false;
-
-  uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize;
-
-  bool DoStackMalloc = ClUseAfterReturn
+  bool DoStackMalloc = ASan.CheckUseAfterReturn
       && LocalStackSize <= kMaxStackMallocSize;
 
+  assert(AllocaVec.size() > 0);
   Instruction *InsBefore = AllocaVec[0];
   IRBuilder<> IRB(InsBefore);
 
@@ -1053,7 +1301,9 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
   Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
   AllocaInst *MyAlloca =
       new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
-  MyAlloca->setAlignment(RedzoneSize);
+  if (ClRealignStack && StackAlignment < RedzoneSize())
+    StackAlignment = RedzoneSize();
+  MyAlloca->setAlignment(StackAlignment);
   assert(MyAlloca->isStaticAlloca());
   Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
   Value *LocalStackBase = OrigStackBase;
@@ -1063,12 +1313,24 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
         ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
   }
 
-  // This string will be parsed by the run-time (DescribeStackAddress).
+  // This string will be parsed by the run-time (DescribeAddressIfStack).
   SmallString<2048> StackDescriptionStorage;
   raw_svector_ostream StackDescription(StackDescriptionStorage);
-  StackDescription << F.getName() << " " << AllocaVec.size() << " ";
+  StackDescription << AllocaVec.size() << " ";
+
+  // Insert poison calls for lifetime intrinsics for alloca.
+  bool HavePoisonedAllocas = false;
+  for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
+    const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
+    IntrinsicInst *II = APC.InsBefore;
+    AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+    assert(AI);
+    IRBuilder<> IRB(II);
+    poisonAlloca(AI, APC.Size, IRB, APC.DoPoison);
+    HavePoisonedAllocas |= APC.DoPoison;
+  }
 
-  uint64_t Pos = RedzoneSize;
+  uint64_t Pos = RedzoneSize();
   // Replace Alloca instructions with base+offset.
   for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
     AllocaInst *AI = AllocaVec[i];
@@ -1077,57 +1339,115 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
     StackDescription << Pos << " " << SizeInBytes << " "
                      << Name.size() << " " << Name << " ";
     uint64_t AlignedSize = getAlignedAllocaSize(AI);
-    assert((AlignedSize % RedzoneSize) == 0);
-    AI->replaceAllUsesWith(
-        IRB.CreateIntToPtr(
+    assert((AlignedSize % RedzoneSize()) == 0);
+    Value *NewAllocaPtr = IRB.CreateIntToPtr(
             IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
-            AI->getType()));
-    Pos += AlignedSize + RedzoneSize;
+            AI->getType());
+    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
+    AI->replaceAllUsesWith(NewAllocaPtr);
+    Pos += AlignedSize + RedzoneSize();
   }
   assert(Pos == LocalStackSize);
 
-  // Write the Magic value and the frame description constant to the redzone.
+  // The left-most redzone has enough space for at least 4 pointers.
+  // Write the Magic value to redzone[0].
   Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
   IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
                   BasePlus0);
-  Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
-                                   ConstantInt::get(IntptrTy, LongSize/8));
-  BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
+  // Write the frame description constant to redzone[1].
+  Value *BasePlus1 = IRB.CreateIntToPtr(
+    IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
+    IntptrPtrTy);
   GlobalVariable *StackDescriptionGlobal =
       createPrivateGlobalForString(*F.getParent(), StackDescription.str());
-  GlobalsCreatedByAsan.insert(StackDescriptionGlobal);
-  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
+  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
+                                             IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
+  // Write the PC to redzone[2].
+  Value *BasePlus2 = IRB.CreateIntToPtr(
+    IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
+                                                   2 * ASan.LongSize/8)),
+    IntptrPtrTy);
+  IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
 
   // Poison the stack redzones at the entry.
-  Value *ShadowBase = memToShadow(LocalStackBase, IRB);
-  PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
+  Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
+  poisonRedZones(AllocaVec, IRB, ShadowBase, true);
 
   // Unpoison the stack before all ret instructions.
   for (size_t i = 0, n = RetVec.size(); i < n; i++) {
     Instruction *Ret = RetVec[i];
     IRBuilder<> IRBRet(Ret);
-
     // Mark the current frame as retired.
     IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
                        BasePlus0);
     // Unpoison the stack.
-    PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false);
-
+    poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
     if (DoStackMalloc) {
+      // In use-after-return mode, mark the whole stack frame unaddressable.
       IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
                          ConstantInt::get(IntptrTy, LocalStackSize),
                          OrigStackBase);
+    } else if (HavePoisonedAllocas) {
+      // If we poisoned some allocas in llvm.lifetime analysis,
+      // unpoison whole stack frame now.
+      assert(LocalStackBase == OrigStackBase);
+      poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false);
     }
   }
 
   // We are done. Remove the old unused alloca instructions.
   for (size_t i = 0, n = AllocaVec.size(); i < n; i++)
     AllocaVec[i]->eraseFromParent();
+}
 
-  if (ClDebugStack) {
-    DEBUG(dbgs() << F);
-  }
+void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
+                                         IRBuilder<> IRB, bool DoPoison) {
+  // For now just insert the call to ASan runtime.
+  Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
+  Value *SizeArg = ConstantInt::get(IntptrTy, Size);
+  IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc
+                           : AsanUnpoisonStackMemoryFunc,
+                  AddrArg, SizeArg);
+}
 
-  return true;
+// Handling llvm.lifetime intrinsics for a given %alloca:
+// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca.
+// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect
+//     invalid accesses) and unpoison it for llvm.lifetime.start (the memory
+//     could be poisoned by previous llvm.lifetime.end instruction, as the
+//     variable may go in and out of scope several times, e.g. in loops).
+// (3) if we poisoned at least one %alloca in a function,
+//     unpoison the whole stack frame at function exit.
+
+AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    // We're intested only in allocas we can handle.
+    return isInterestingAlloca(*AI) ? AI : 0;
+  // See if we've already calculated (or started to calculate) alloca for a
+  // given value.
+  AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
+  if (I != AllocaForValue.end())
+    return I->second;
+  // Store 0 while we're calculating alloca for value V to avoid
+  // infinite recursion if the value references itself.
+  AllocaForValue[V] = 0;
+  AllocaInst *Res = 0;
+  if (CastInst *CI = dyn_cast<CastInst>(V))
+    Res = findAllocaForValue(CI->getOperand(0));
+  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *IncValue = PN->getIncomingValue(i);
+      // Allow self-referencing phi-nodes.
+      if (IncValue == PN) continue;
+      AllocaInst *IncValueAI = findAllocaForValue(IncValue);
+      // AI for incoming values should exist and should all be equal.
+      if (IncValueAI == 0 || (Res != 0 && IncValueAI != Res))
+        return 0;
+      Res = IncValueAI;
+    }
+  }
+  if (Res != 0)
+    AllocaForValue[V] = Res;
+  return Res;
 }
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index ef34b8a56d88..927982d2af47 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -13,26 +13,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <utility>
-#include <string>
-
-#include "BlackList.h"
+#include "llvm/Transforms/Utils/BlackList.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
 
 namespace llvm {
 
 BlackList::BlackList(const StringRef Path) {
   // Validate and open blacklist file.
-  if (!Path.size()) return;
+  if (Path.empty()) return;
   OwningPtr<MemoryBuffer> File;
   if (error_code EC = MemoryBuffer::getFile(Path, File)) {
     report_fatal_error("Can't open blacklist file: " + Path + ": " +
@@ -52,6 +52,10 @@ BlackList::BlackList(const StringRef Path) {
     std::pair<StringRef, StringRef> SplitLine = I->split(":");
     StringRef Prefix = SplitLine.first;
     std::string Regexp = SplitLine.second;
+    if (Regexp.empty()) {
+      // Missing ':' in the line.
+      report_fatal_error("malformed blacklist line: " + SplitLine.first);
+    }
 
     // Replace * with .*
     for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
@@ -68,38 +72,54 @@ BlackList::BlackList(const StringRef Path) {
     }
 
     // Add this regexp into the proper group by its prefix.
-    if (Regexps[Prefix].size())
+    if (!Regexps[Prefix].empty())
       Regexps[Prefix] += "|";
     Regexps[Prefix] += Regexp;
   }
 
   // Iterate through each of the prefixes, and create Regexs for them.
-  for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end();
-       I != E; ++I) {
+  for (StringMap<std::string>::const_iterator I = Regexps.begin(),
+       E = Regexps.end(); I != E; ++I) {
     Entries[I->getKey()] = new Regex(I->getValue());
   }
 }
 
-bool BlackList::isIn(const Function &F) {
+bool BlackList::isIn(const Function &F) const {
   return isIn(*F.getParent()) || inSection("fun", F.getName());
 }
 
-bool BlackList::isIn(const GlobalVariable &G) {
+bool BlackList::isIn(const GlobalVariable &G) const {
   return isIn(*G.getParent()) || inSection("global", G.getName());
 }
 
-bool BlackList::isIn(const Module &M) {
+bool BlackList::isIn(const Module &M) const {
   return inSection("src", M.getModuleIdentifier());
 }
 
-bool BlackList::isInInit(const GlobalVariable &G) {
-  return isIn(*G.getParent()) || inSection("global-init", G.getName());
+static StringRef GetGVTypeString(const GlobalVariable &G) {
+  // Types of GlobalVariables are always pointer types.
+  Type *GType = G.getType()->getElementType();
+  // For now we support blacklisting struct types only.
+  if (StructType *SGType = dyn_cast<StructType>(GType)) {
+    if (!SGType->isLiteral())
+      return SGType->getName();
+  }
+  return "<unknown type>";
+}
+
+bool BlackList::isInInit(const GlobalVariable &G) const {
+  return (isIn(*G.getParent()) ||
+          inSection("global-init", G.getName()) ||
+          inSection("global-init-type", GetGVTypeString(G)));
 }
 
 bool BlackList::inSection(const StringRef Section,
-                                  const StringRef Query) {
-  Regex *FunctionRegex = Entries[Section];
-  return FunctionRegex ? FunctionRegex->match(Query) : false;
+                          const StringRef Query) const {
+  StringMap<Regex*>::const_iterator I = Entries.find(Section);
+  if (I == Entries.end()) return false;
+
+  Regex *FunctionRegex = I->getValue();
+  return FunctionRegex->match(Query);
 }
 
 }  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h
deleted file mode 100644
index f3c05a5058cc..000000000000
--- a/lib/Transforms/Instrumentation/BlackList.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions or global
-// variables based on a user-supplied blacklist.
-//
-// The blacklist disables instrumentation of various functions and global
-// variables.  Each line contains a prefix, followed by a wild card expression.
-// Empty lines and lines starting with "#" are ignored.
-// ---
-// # Blacklisted items:
-// fun:*_ZN4base6subtle*
-// global:*global_with_bad_access_or_initialization*
-// global-init:*global_with_initialization_issues*
-// src:file_with_tricky_code.cc
-// ---
-// Note that the wild card is in fact an llvm::Regex, but * is automatically
-// replaced with .*
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "llvm/ADT/StringMap.h"
-
-namespace llvm {
-class Function;
-class GlobalVariable;
-class Module;
-class Regex;
-class StringRef;
-
-class BlackList {
- public:
-  BlackList(const StringRef Path);
-  // Returns whether either this function or it's source file are blacklisted.
-  bool isIn(const Function &F);
-  // Returns whether either this global or it's source file are blacklisted.
-  bool isIn(const GlobalVariable &G);
-  // Returns whether this module is blacklisted by filename.
-  bool isIn(const Module &M);
-  // Returns whether a global should be excluded from initialization checking.
-  bool isInInit(const GlobalVariable &G);
- private:
-  StringMap<Regex*> Entries;
-
-  bool inSection(const StringRef Section, const StringRef Query);
-};
-
-}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 7810b1b8a3ef..b094d42568f0 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "bounds-checking"
-#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Pass.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Instrumentation.h"
 using namespace llvm;
 
 static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
@@ -41,7 +41,7 @@ namespace {
   struct BoundsChecking : public FunctionPass {
     static char ID;
 
-    BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){
+    BoundsChecking() : FunctionPass(ID) {
       initializeBoundsCheckingPass(*PassRegistry::getPassRegistry());
     }
 
@@ -59,7 +59,6 @@ namespace {
     BuilderTy *Builder;
     Instruction *Inst;
     BasicBlock *TrapBB;
-    unsigned Penalty;
 
     BasicBlock *getTrapBB();
     void emitBranchToTrap(Value *Cmp = 0);
@@ -109,6 +108,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
     else
       Cmp = 0; // unconditional branch
   }
+  ++ChecksAdded;
 
   Instruction *Inst = Builder->GetInsertPoint();
   BasicBlock *OldBB = Inst->getParent();
@@ -163,7 +163,6 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
   }
   emitBranchToTrap(Or);
 
-  ++ChecksAdded;
   return true;
 }
 
@@ -208,6 +207,6 @@ bool BoundsChecking::runOnFunction(Function &F) {
   return MadeChange;
 }
 
-FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) {
-  return new BoundsChecking(Penalty);
+FunctionPass *llvm::createBoundsCheckingPass() {
+  return new BoundsChecking();
 }
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 058f68c7cecd..1c9e0536794a 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMInstrumentation
   BoundsChecking.cpp
   EdgeProfiling.cpp
   GCOVProfiling.cpp
+  MemorySanitizer.cpp
   Instrumentation.cpp
   OptimalEdgeProfiling.cpp
   PathProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index e8ef2654d256..a2459fbafe18 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -18,13 +18,13 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-edge-profiling"
 
+#include "llvm/Transforms/Instrumentation.h"
 #include "ProfilingUtils.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index e9192e5cdd52..2edd151869e0 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -16,21 +16,23 @@
 
 #define DEBUG_TYPE "insert-gcov-profiling"
 
-#include "ProfilingUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "ProfilingUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/UniqueVector.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/raw_ostream.h"
@@ -39,30 +41,57 @@
 #include <utility>
 using namespace llvm;
 
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+                   cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+  GCOVOptions Options;
+  Options.EmitNotes = true;
+  Options.EmitData = true;
+  Options.UseCfgChecksum = false;
+  Options.NoRedZone = false;
+  Options.FunctionNamesInData = true;
+
+  if (DefaultGCOVVersion.size() != 4) {
+    llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+                             DefaultGCOVVersion);
+  }
+  memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+  return Options;
+}
+
 namespace {
   class GCOVProfiler : public ModulePass {
   public:
     static char ID;
-    GCOVProfiler()
-        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
-          UseExtraChecksum(false) {
+    GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+      ReversedVersion[0] = Options.Version[3];
+      ReversedVersion[1] = Options.Version[2];
+      ReversedVersion[2] = Options.Version[1];
+      ReversedVersion[3] = Options.Version[0];
+      ReversedVersion[4] = '\0';
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
-    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
-                 bool useExtraChecksum = false)
-        : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
-          Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
-      assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+    GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+      assert((Options.EmitNotes || Options.EmitData) &&
+             "GCOVProfiler asked to do nothing?");
+      ReversedVersion[0] = Options.Version[3];
+      ReversedVersion[1] = Options.Version[2];
+      ReversedVersion[2] = Options.Version[1];
+      ReversedVersion[3] = Options.Version[0];
+      ReversedVersion[4] = '\0';
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
     virtual const char *getPassName() const {
       return "GCOV Profiler";
     }
+
   private:
     bool runOnModule(Module &M);
 
-    // Create the GCNO files for the Module based on DebugInfo.
-    void emitGCNO();
+    // Create the .gcno files for the Module based on DebugInfo.
+    void emitProfileNotes();
 
     // Modify the program to track transitions along edges and call into the
     // profiling runtime to emit .gcda files when run.
@@ -73,6 +102,8 @@ namespace {
     Constant *getIncrementIndirectCounterFunc();
     Constant *getEmitFunctionFunc();
     Constant *getEmitArcsFunc();
+    Constant *getDeleteWriteoutFunctionListFunc();
+    Constant *getDeleteFlushFunctionListFunc();
     Constant *getEndFileFunc();
 
     // Create or retrieve an i32 state value that is used to represent the
@@ -83,21 +114,22 @@ namespace {
     // block number.
     GlobalVariable *buildEdgeLookupTable(Function *F,
                                          GlobalVariable *Counter,
-                                         const UniqueVector<BasicBlock *> &Preds,
-                                         const UniqueVector<BasicBlock *> &Succs);
+                                         const UniqueVector<BasicBlock *>&Preds,
+                                         const UniqueVector<BasicBlock*>&Succs);
 
     // Add the function to write out all our counters to the global destructor
     // list.
-    void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+    Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+                                                       MDNode*> >);
+    Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
     void insertIndirectCounterIncrement();
-    void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
 
     std::string mangleName(DICompileUnit CU, const char *NewStem);
 
-    bool EmitNotes;
-    bool EmitData;
-    bool Use402Format;
-    bool UseExtraChecksum;
+    GCOVOptions Options;
+
+    // Reversed, NUL-terminated copy of Options.Version.
+    char ReversedVersion[5];  
 
     Module *M;
     LLVMContext *Ctx;
@@ -108,10 +140,14 @@ char GCOVProfiler::ID = 0;
 INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
                 "Insert instrumentation for GCOV profiling", false, false)
 
-ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
-                                         bool Use402Format,
-                                         bool UseExtraChecksum) {
-  return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+  return new GCOVProfiler(Options);
+}
+
+static std::string getFunctionName(DISubprogram SP) {
+  if (!SP.getLinkageName().empty())
+    return SP.getLinkageName();
+  return SP.getName();
 }
 
 namespace {
@@ -249,8 +285,8 @@ namespace {
   // object users can construct, the blocks and lines will be rooted here.
   class GCOVFunction : public GCOVRecord {
    public:
-    GCOVFunction(DISubprogram SP, raw_ostream *os,
-                 bool Use402Format, bool UseExtraChecksum) {
+    GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+                 bool UseCfgChecksum) {
       this->os = os;
 
       Function *F = SP.getFunction();
@@ -262,17 +298,16 @@ namespace {
       ReturnBlock = new GCOVBlock(i++, os);
 
       writeBytes(FunctionTag, 4);
-      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
           1 + lengthOfGCOVString(SP.getFilename()) + 1;
-      if (UseExtraChecksum)
+      if (UseCfgChecksum)
         ++BlockLen;
       write(BlockLen);
-      uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
       write(Ident);
       write(0);  // lineno checksum
-      if (UseExtraChecksum)
+      if (UseCfgChecksum)
         write(0);  // cfg checksum
-      writeGCOVString(SP.getName());
+      writeGCOVString(getFunctionName(SP));
       writeGCOVString(SP.getFilename());
       write(SP.getLineNumber());
     }
@@ -347,19 +382,23 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
 
   SmallString<128> Filename = CU.getFilename();
   sys::path::replace_extension(Filename, NewStem);
-  return sys::path::filename(Filename.str());
+  StringRef FName = sys::path::filename(Filename);
+  SmallString<128> CurPath;
+  if (sys::fs::current_path(CurPath)) return FName;
+  sys::path::append(CurPath, FName.str());
+  return CurPath.str();
 }
 
 bool GCOVProfiler::runOnModule(Module &M) {
   this->M = &M;
   Ctx = &M.getContext();
 
-  if (EmitNotes) emitGCNO();
-  if (EmitData) return emitProfileArcs();
+  if (Options.EmitNotes) emitProfileNotes();
+  if (Options.EmitData) return emitProfileArcs();
   return false;
 }
 
-void GCOVProfiler::emitGCNO() {
+void GCOVProfiler::emitProfileNotes() {
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
   if (!CU_Nodes) return;
 
@@ -372,10 +411,9 @@ void GCOVProfiler::emitGCNO() {
     std::string ErrorInfo;
     raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
                        raw_fd_ostream::F_Binary);
-    if (!Use402Format)
-      out.write("oncg*404MVLL", 12);
-    else
-      out.write("oncg*204MVLL", 12);
+    out.write("oncg", 4);
+    out.write(ReversedVersion, 4);
+    out.write("MVLL", 4);
 
     DIArray SPs = CU.getSubprograms();
     for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
@@ -384,7 +422,7 @@ void GCOVProfiler::emitGCNO() {
 
       Function *F = SP.getFunction();
       if (!F) continue;
-      GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+      GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
 
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
         GCOVBlock &Block = Func.getBlock(BB);
@@ -465,21 +503,18 @@ bool GCOVProfiler::emitProfileArcs() {
             Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                                 Edge);
             Value *Count = Builder.CreateLoad(Counter);
-            Count = Builder.CreateAdd(Count,
-                                      ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
           } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-            Value *Sel = Builder.CreateSelect(
-              BI->getCondition(),
-              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
-              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+            Value *Sel = Builder.CreateSelect(BI->getCondition(),
+                                              Builder.getInt64(Edge),
+                                              Builder.getInt64(Edge + 1));
             SmallVector<Value *, 2> Idx;
-            Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+            Idx.push_back(Builder.getInt64(0));
             Idx.push_back(Sel);
             Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
             Value *Count = Builder.CreateLoad(Counter);
-            Count = Builder.CreateAdd(Count,
-                                      ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+            Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
           } else {
             ComplexEdgePreds.insert(BB);
@@ -496,10 +531,9 @@ bool GCOVProfiler::emitProfileArcs() {
                                ComplexEdgePreds, ComplexEdgeSuccs);
         GlobalVariable *EdgeState = getEdgeStateValue();
         
-        Type *Int32Ty = Type::getInt32Ty(*Ctx);
         for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
           IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
-          Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+          Builder.CreateStore(Builder.getInt32(i), EdgeState);
         }
         for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
           // call runtime to perform increment
@@ -518,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() {
       }
     }
 
-    insertCounterWriteout(CountersBySP);
-    insertFlush(CountersBySP);
+    Function *WriteoutF = insertCounterWriteout(CountersBySP);
+    Function *FlushF = insertFlush(CountersBySP);
+
+    // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+    // be executed at exit and the "__llvm_gcov_flush" function to be executed
+    // when "__gcov_flush" is called.
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+    Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+                                   "__llvm_gcov_init", M);
+    F->setUnnamedAddr(true);
+    F->setLinkage(GlobalValue::InternalLinkage);
+    F->addFnAttr(Attribute::NoInline);
+    if (Options.NoRedZone)
+      F->addFnAttr(Attribute::NoRedZone);
+
+    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+    IRBuilder<> Builder(BB);
+
+    FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+    Type *Params[] = {
+      PointerType::get(FTy, 0),
+      PointerType::get(FTy, 0)
+    };
+    FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+    // Inialize the environment and register the local writeout and flush
+    // functions.
+    Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+    Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
+    Builder.CreateRetVoid();
+
+    appendToGlobalCtors(*M, F, 0);
   }
 
   if (InsertIndCounterIncrCode)
@@ -540,13 +604,13 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
   // read it. Threads and invoke make this untrue.
 
   // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+  size_t TableSize = Succs.size() * Preds.size();
   Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
-  ArrayType *EdgeTableTy = ArrayType::get(
-      Int64PtrTy, Succs.size() * Preds.size());
+  ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
 
-  Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
+  OwningArrayPtr<Constant *> EdgeTable(new Constant*[TableSize]);
   Constant *NullValue = Constant::getNullValue(Int64PtrTy);
-  for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i)
+  for (size_t i = 0; i != TableSize; ++i)
     EdgeTable[i] = NullValue;
 
   unsigned Edge = 0;
@@ -556,8 +620,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
     if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
       for (int i = 0; i != Successors; ++i) {
         BasicBlock *Succ = TI->getSuccessor(i);
-        IRBuilder<> builder(Succ);
-        Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+        IRBuilder<> Builder(Succ);
+        Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                             Edge + i);
         EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
                   (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
@@ -566,7 +630,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
     Edge += Successors;
   }
 
-  ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
+  ArrayRef<Constant*> V(&EdgeTable[0], TableSize);
   GlobalVariable *EdgeTableGV =
       new GlobalVariable(
           *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
@@ -577,8 +641,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
 }
 
 Constant *GCOVProfiler::getStartFileFunc() {
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Type::getInt8PtrTy(*Ctx), false);
+  Type *Args[] = {
+    Type::getInt8PtrTy(*Ctx),  // const char *orig_filename
+    Type::getInt8PtrTy(*Ctx),  // const char version[4]
+  };
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
   return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
 }
 
@@ -594,9 +661,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
 }
 
 Constant *GCOVProfiler::getEmitFunctionFunc() {
-  Type *Args[2] = {
+  Type *Args[3] = {
     Type::getInt32Ty(*Ctx),    // uint32_t ident
     Type::getInt8PtrTy(*Ctx),  // const char *function_name
+    Type::getInt8Ty(*Ctx),     // uint8_t use_extra_checksum
   };
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
   return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
@@ -607,11 +675,20 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
   };
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Args, false);
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
 }
 
+Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
+}
+
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
 Constant *GCOVProfiler::getEndFileFunc() {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -630,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
   return GV;
 }
 
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
     ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
   FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -638,6 +715,9 @@ void GCOVProfiler::insertCounterWriteout(
     WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
                                  "__llvm_gcov_writeout", M);
   WriteoutF->setUnnamedAddr(true);
+  WriteoutF->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    WriteoutF->addFnAttr(Attribute::NoRedZone);
 
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
   IRBuilder<> Builder(BB);
@@ -652,48 +732,31 @@ void GCOVProfiler::insertCounterWriteout(
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
       DICompileUnit CU(CU_Nodes->getOperand(i));
       std::string FilenameGcda = mangleName(CU, "gcda");
-      Builder.CreateCall(StartFile,
-                         Builder.CreateGlobalStringPtr(FilenameGcda));
-      for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
-             I = CountersBySP.begin(), E = CountersBySP.end();
-           I != E; ++I) {
-        DISubprogram SP(I->second);
-        intptr_t ident = reinterpret_cast<intptr_t>(I->second);
-        Builder.CreateCall2(EmitFunction,
-                            ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
-                            Builder.CreateGlobalStringPtr(SP.getName()));
-        
-        GlobalVariable *GV = I->first;
+      Builder.CreateCall2(StartFile,
+                          Builder.CreateGlobalStringPtr(FilenameGcda),
+                          Builder.CreateGlobalStringPtr(ReversedVersion));
+      for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
+        DISubprogram SP(CountersBySP[j].second);
+        Builder.CreateCall3(
+            EmitFunction, Builder.getInt32(j),
+            Options.FunctionNamesInData ?
+              Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
+              Constant::getNullValue(Builder.getInt8PtrTy()),
+            Builder.getInt8(Options.UseCfgChecksum));
+
+        GlobalVariable *GV = CountersBySP[j].first;
         unsigned Arcs =
           cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
         Builder.CreateCall2(EmitArcs,
-                            ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+                            Builder.getInt32(Arcs),
                             Builder.CreateConstGEP2_64(GV, 0, 0));
       }
       Builder.CreateCall(EndFile);
     }
   }
-  Builder.CreateRetVoid();
 
-  // Create a small bit of code that registers the "__llvm_gcov_writeout"
-  // function to be executed at exit.
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-  Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
-                                 "__llvm_gcov_init", M);
-  F->setUnnamedAddr(true);
-  F->setLinkage(GlobalValue::InternalLinkage);
-  F->addFnAttr(Attributes::NoInline);
-
-  BB = BasicBlock::Create(*Ctx, "entry", F);
-  Builder.SetInsertPoint(BB);
-
-  FTy = FunctionType::get(Type::getInt32Ty(*Ctx),
-                          PointerType::get(FTy, 0), false);
-  Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
-  Builder.CreateCall(AtExitFn, WriteoutF);
   Builder.CreateRetVoid();
-
-  appendToGlobalCtors(*M, F, 0);
+  return WriteoutF;
 }
 
 void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -701,11 +764,9 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
     cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc());
   Fn->setUnnamedAddr(true);
   Fn->setLinkage(GlobalValue::InternalLinkage);
-  Fn->addFnAttr(Attributes::NoInline);
-
-  Type *Int32Ty = Type::getInt32Ty(*Ctx);
-  Type *Int64Ty = Type::getInt64Ty(*Ctx);
-  Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff);
+  Fn->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    Fn->addFnAttr(Attribute::NoRedZone);
 
   // Create basic blocks for function.
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
@@ -720,26 +781,27 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Argument *Arg = Fn->arg_begin();
   Arg->setName("predecessor");
   Value *Pred = Builder.CreateLoad(Arg, "pred");
-  Value *Cond = Builder.CreateICmpEQ(Pred, NegOne);
+  Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
   BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
 
   Builder.SetInsertPoint(PredNotNegOne);
 
   // uint64_t *counter = counters[pred];
   // if (!counter) return;
-  Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty);
+  Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
   Arg = llvm::next(Fn->arg_begin());
   Arg->setName("counters");
   Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
   Value *Counter = Builder.CreateLoad(GEP, "counter");
   Cond = Builder.CreateICmpEQ(Counter,
-                              Constant::getNullValue(Int64Ty->getPointerTo()));
+                              Constant::getNullValue(
+                                  Builder.getInt64Ty()->getPointerTo()));
   Builder.CreateCondBr(Cond, Exit, CounterEnd);
 
   // ++*counter;
   Builder.SetInsertPoint(CounterEnd);
   Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
-                                 ConstantInt::get(Int64Ty, 1));
+                                 Builder.getInt64(1));
   Builder.CreateStore(Add, Counter);
   Builder.CreateBr(Exit);
 
@@ -748,16 +810,19 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Builder.CreateRetVoid();
 }
 
-void GCOVProfiler::
+Function *GCOVProfiler::
 insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-  Function *FlushF = M->getFunction("__gcov_flush");
+  Function *FlushF = M->getFunction("__llvm_gcov_flush");
   if (!FlushF)
     FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
-                              "__gcov_flush", M);
+                              "__llvm_gcov_flush", M);
   else
     FlushF->setLinkage(GlobalValue::InternalLinkage);
   FlushF->setUnnamedAddr(true);
+  FlushF->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    FlushF->addFnAttr(Attribute::NoRedZone);
 
   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
 
@@ -781,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   if (RetTy == Type::getVoidTy(*Ctx))
     Builder.CreateRetVoid();
   else if (RetTy->isIntegerTy())
-    // Used if __gcov_flush was implicitly declared.
+    // Used if __llvm_gcov_flush was implicitly declared.
     Builder.CreateRet(ConstantInt::get(RetTy, 0));
   else
-    report_fatal_error("invalid return type for __gcov_flush");
+    report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+  return FlushF;
 }
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 1e0b4a348a17..8ba102559bb6 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -21,11 +21,13 @@ using namespace llvm;
 /// library.
 void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeAddressSanitizerPass(Registry);
+  initializeAddressSanitizerModulePass(Registry);
   initializeBoundsCheckingPass(Registry);
   initializeEdgeProfilerPass(Registry);
   initializeGCOVProfilerPass(Registry);
   initializeOptimalEdgeProfilerPass(Registry);
   initializePathProfilerPass(Registry);
+  initializeMemorySanitizerPass(Registry);
   initializeThreadSanitizerPass(Registry);
 }
 
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index a4bb5a66af6d..363539b2886f 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/EquivalenceClasses.h"
-#include <vector>
+#include "llvm/IR/BasicBlock.h"
 #include <algorithm>
+#include <vector>
 
 namespace llvm {
 
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
new file mode 100644
index 000000000000..4e75904ded4f
--- /dev/null
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -0,0 +1,1985 @@
+//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of MemorySanitizer, a detector of uninitialized
+/// reads.
+///
+/// Status: early prototype.
+///
+/// The algorithm of the tool is similar to Memcheck
+/// (http://goo.gl/QKbem). We associate a few shadow bits with every
+/// byte of the application memory, poison the shadow of the malloc-ed
+/// or alloca-ed memory, load the shadow bits on every memory read,
+/// propagate the shadow bits through some of the arithmetic
+/// instruction (including MOV), store the shadow bits on every memory
+/// write, report a bug on some other instructions (e.g. JMP) if the
+/// associated shadow is poisoned.
+///
+/// But there are differences too. The first and the major one:
+/// compiler instrumentation instead of binary instrumentation. This
+/// gives us much better register allocation, possible compiler
+/// optimizations and a fast start-up. But this brings the major issue
+/// as well: msan needs to see all program events, including system
+/// calls and reads/writes in system libraries, so we either need to
+/// compile *everything* with msan or use a binary translation
+/// component (e.g. DynamoRIO) to instrument pre-built libraries.
+/// Another difference from Memcheck is that we use 8 shadow bits per
+/// byte of application memory and use a direct shadow mapping. This
+/// greatly simplifies the instrumentation code and avoids races on
+/// shadow updates (Memcheck is single-threaded so races are not a
+/// concern there. Memcheck uses 2 shadow bits per byte with a slow
+/// path storage that uses 8 bits per byte).
+///
+/// The default value of shadow is 0, which means "clean" (not poisoned).
+///
+/// Every module initializer should call __msan_init to ensure that the
+/// shadow memory is ready. On error, __msan_warning is called. Since
+/// parameters and return values may be passed via registers, we have a
+/// specialized thread-local shadow for return values
+/// (__msan_retval_tls) and parameters (__msan_param_tls).
+///
+///                           Origin tracking.
+///
+/// MemorySanitizer can track origins (allocation points) of all uninitialized
+/// values. This behavior is controlled with a flag (msan-track-origins) and is
+/// disabled by default.
+///
+/// Origins are 4-byte values created and interpreted by the runtime library.
+/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
+/// of application memory. Propagation of origins is basically a bunch of
+/// "select" instructions that pick the origin of a dirty argument, if an
+/// instruction has one.
+///
+/// Every 4 aligned, consecutive bytes of application memory have one origin
+/// value associated with them. If these bytes contain uninitialized data
+/// coming from 2 different allocations, the last store wins. Because of this,
+/// MemorySanitizer reports can show unrelated origins, but this is unlikely in
+/// practice.
+///
+/// Origins are meaningless for fully initialized values, so MemorySanitizer
+/// avoids storing origin to memory when a fully initialized value is stored.
+/// This way it avoids needless overwritting origin of the 4-byte region on
+/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msan"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+static const uint64_t kShadowMask32 = 1ULL << 31;
+static const uint64_t kShadowMask64 = 1ULL << 46;
+static const uint64_t kOriginOffset32 = 1ULL << 30;
+static const uint64_t kOriginOffset64 = 1ULL << 45;
+static const unsigned kMinOriginAlignment = 4;
+static const unsigned kShadowTLSAlignment = 8;
+
+/// \brief Track origins of uninitialized values.
+///
+/// Adds a section to MemorySanitizer report that points to the allocation
+/// (stack or heap) the uninitialized bits came from originally.
+static cl::opt<bool> ClTrackOrigins("msan-track-origins",
+       cl::desc("Track origins (allocation sites) of poisoned memory"),
+       cl::Hidden, cl::init(false));
+static cl::opt<bool> ClKeepGoing("msan-keep-going",
+       cl::desc("keep going after reporting a UMR"),
+       cl::Hidden, cl::init(false));
+static cl::opt<bool> ClPoisonStack("msan-poison-stack",
+       cl::desc("poison uninitialized stack variables"),
+       cl::Hidden, cl::init(true));
+static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
+       cl::desc("poison uninitialized stack variables with a call"),
+       cl::Hidden, cl::init(false));
+static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
+       cl::desc("poison uninitialized stack variables with the given patter"),
+       cl::Hidden, cl::init(0xff));
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+       cl::desc("poison undef temps"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
+       cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
+       cl::desc("exact handling of relational integer ICmp"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin",
+       cl::desc("store origin for clean (fully initialized) values"),
+       cl::Hidden, cl::init(false));
+
+// This flag controls whether we check the shadow of the address
+// operand of load or store. Such bugs are very rare, since load from
+// a garbage address typically results in SEGV, but still happen
+// (e.g. only lower bits of address are garbage, or the access happens
+// early at program startup where malloc-ed memory is more likely to
+// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
+static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
+       cl::desc("report accesses through a pointer which has poisoned shadow"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
+       cl::desc("print out instructions with default strict semantics"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<std::string>  ClBlacklistFile("msan-blacklist",
+       cl::desc("File containing the list of functions where MemorySanitizer "
+                "should not report bugs"), cl::Hidden);
+
+namespace {
+
+/// \brief An instrumentation pass implementing detection of uninitialized
+/// reads.
+///
+/// MemorySanitizer: instrument the code in module to find
+/// uninitialized reads.
+class MemorySanitizer : public FunctionPass {
+ public:
+  MemorySanitizer(bool TrackOrigins = false,
+                  StringRef BlacklistFile = StringRef())
+    : FunctionPass(ID),
+      TrackOrigins(TrackOrigins || ClTrackOrigins),
+      TD(0),
+      WarningFn(0),
+      BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+                                          : BlacklistFile) { }
+  const char *getPassName() const { return "MemorySanitizer"; }
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  static char ID;  // Pass identification, replacement for typeid.
+
+ private:
+  void initializeCallbacks(Module &M);
+
+  /// \brief Track origins (allocation points) of uninitialized values.
+  bool TrackOrigins;
+
+  DataLayout *TD;
+  LLVMContext *C;
+  Type *IntptrTy;
+  Type *OriginTy;
+  /// \brief Thread-local shadow storage for function parameters.
+  GlobalVariable *ParamTLS;
+  /// \brief Thread-local origin storage for function parameters.
+  GlobalVariable *ParamOriginTLS;
+  /// \brief Thread-local shadow storage for function return value.
+  GlobalVariable *RetvalTLS;
+  /// \brief Thread-local origin storage for function return value.
+  GlobalVariable *RetvalOriginTLS;
+  /// \brief Thread-local shadow storage for in-register va_arg function
+  /// parameters (x86_64-specific).
+  GlobalVariable *VAArgTLS;
+  /// \brief Thread-local shadow storage for va_arg overflow area
+  /// (x86_64-specific).
+  GlobalVariable *VAArgOverflowSizeTLS;
+  /// \brief Thread-local space used to pass origin value to the UMR reporting
+  /// function.
+  GlobalVariable *OriginTLS;
+
+  /// \brief The run-time callback to print a warning.
+  Value *WarningFn;
+  /// \brief Run-time helper that copies origin info for a memory range.
+  Value *MsanCopyOriginFn;
+  /// \brief Run-time helper that generates a new origin value for a stack
+  /// allocation.
+  Value *MsanSetAllocaOriginFn;
+  /// \brief Run-time helper that poisons stack on function entry.
+  Value *MsanPoisonStackFn;
+  /// \brief MSan runtime replacements for memmove, memcpy and memset.
+  Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+
+  /// \brief Address mask used in application-to-shadow address calculation.
+  /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask.
+  uint64_t ShadowMask;
+  /// \brief Offset of the origin shadow from the "normal" shadow.
+  /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL
+  uint64_t OriginOffset;
+  /// \brief Branch weights for error reporting.
+  MDNode *ColdCallWeights;
+  /// \brief Branch weights for origin store.
+  MDNode *OriginStoreWeights;
+  /// \bried Path to blacklist file.
+  SmallString<64> BlacklistFile;
+  /// \brief The blacklist.
+  OwningPtr<BlackList> BL;
+  /// \brief An empty volatile inline asm that prevents callback merge.
+  InlineAsm *EmptyAsm;
+
+  friend struct MemorySanitizerVisitor;
+  friend struct VarArgAMD64Helper;
+};
+}  // namespace
+
+char MemorySanitizer::ID = 0;
+INITIALIZE_PASS(MemorySanitizer, "msan",
+                "MemorySanitizer: detects uninitialized reads.",
+                false, false)
+
+FunctionPass *llvm::createMemorySanitizerPass(bool TrackOrigins,
+                                              StringRef BlacklistFile) {
+  return new MemorySanitizer(TrackOrigins, BlacklistFile);
+}
+
+/// \brief Create a non-const global initialized with the given string.
+///
+/// Creates a writable global for Str so that we can pass it to the
+/// run-time lib. Runtime uses first 4 bytes of the string to store the
+/// frame ID, so the string needs to be mutable.
+static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
+                                                            StringRef Str) {
+  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
+                            GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+
+/// \brief Insert extern declaration of runtime-provided functions and globals.
+void MemorySanitizer::initializeCallbacks(Module &M) {
+  // Only do this once.
+  if (WarningFn)
+    return;
+
+  IRBuilder<> IRB(*C);
+  // Create the callback.
+  // FIXME: this function should have "Cold" calling conv,
+  // which is not yet implemented.
+  StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
+                                        : "__msan_warning_noreturn";
+  WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
+
+  MsanCopyOriginFn = M.getOrInsertFunction(
+    "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
+    IRB.getInt8PtrTy(), IntptrTy, NULL);
+  MsanSetAllocaOriginFn = M.getOrInsertFunction(
+    "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+    IRB.getInt8PtrTy(), NULL);
+  MsanPoisonStackFn = M.getOrInsertFunction(
+    "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
+  MemmoveFn = M.getOrInsertFunction(
+    "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IRB.getInt8PtrTy(), IntptrTy, NULL);
+  MemcpyFn = M.getOrInsertFunction(
+    "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IntptrTy, NULL);
+  MemsetFn = M.getOrInsertFunction(
+    "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+    IntptrTy, NULL);
+
+  // Create globals.
+  RetvalTLS = new GlobalVariable(
+    M, ArrayType::get(IRB.getInt64Ty(), 8), false,
+    GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  RetvalOriginTLS = new GlobalVariable(
+    M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
+    "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+  ParamTLS = new GlobalVariable(
+    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+    GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  ParamOriginTLS = new GlobalVariable(
+    M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
+    0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+  VAArgTLS = new GlobalVariable(
+    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+    GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  VAArgOverflowSizeTLS = new GlobalVariable(
+    M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
+    "__msan_va_arg_overflow_size_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  OriginTLS = new GlobalVariable(
+    M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
+    "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+  // We insert an empty inline asm after __msan_report* to avoid callback merge.
+  EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+                            StringRef(""), StringRef(""),
+                            /*hasSideEffects=*/true);
+}
+
+/// \brief Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
+bool MemorySanitizer::doInitialization(Module &M) {
+  TD = getAnalysisIfAvailable<DataLayout>();
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(BlacklistFile));
+  C = &(M.getContext());
+  unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
+  switch (PtrSize) {
+    case 64:
+      ShadowMask = kShadowMask64;
+      OriginOffset = kOriginOffset64;
+      break;
+    case 32:
+      ShadowMask = kShadowMask32;
+      OriginOffset = kOriginOffset32;
+      break;
+    default:
+      report_fatal_error("unsupported pointer size");
+      break;
+  }
+
+  IRBuilder<> IRB(*C);
+  IntptrTy = IRB.getIntPtrTy(TD);
+  OriginTy = IRB.getInt32Ty();
+
+  ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+  OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+
+  // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
+  appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
+                      "__msan_init", IRB.getVoidTy(), NULL)), 0);
+
+  new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+                     IRB.getInt32(TrackOrigins), "__msan_track_origins");
+
+  new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+                     IRB.getInt32(ClKeepGoing), "__msan_keep_going");
+
+  return true;
+}
+
+namespace {
+
+/// \brief A helper class that handles instrumentation of VarArg
+/// functions on a particular platform.
+///
+/// Implementations are expected to insert the instrumentation
+/// necessary to propagate argument shadow through VarArg function
+/// calls. Visit* methods are called during an InstVisitor pass over
+/// the function, and should avoid creating new basic blocks. A new
+/// instance of this class is created for each instrumented function.
+struct VarArgHelper {
+  /// \brief Visit a CallSite.
+  virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
+
+  /// \brief Visit a va_start call.
+  virtual void visitVAStartInst(VAStartInst &I) = 0;
+
+  /// \brief Visit a va_copy call.
+  virtual void visitVACopyInst(VACopyInst &I) = 0;
+
+  /// \brief Finalize function instrumentation.
+  ///
+  /// This method is called after visiting all interesting (see above)
+  /// instructions in a function.
+  virtual void finalizeInstrumentation() = 0;
+
+  virtual ~VarArgHelper() {}
+};
+
+struct MemorySanitizerVisitor;
+
+VarArgHelper*
+CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+                   MemorySanitizerVisitor &Visitor);
+
+/// This class does all the work for a given function. Store and Load
+/// instructions store and load corresponding shadow and origin
+/// values. Most instructions propagate shadow from arguments to their
+/// return values. Certain instructions (most importantly, BranchInst)
+/// test their argument shadow and print reports (with a runtime call) if it's
+/// non-zero.
+struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
+  Function &F;
+  MemorySanitizer &MS;
+  SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
+  ValueMap<Value*, Value*> ShadowMap, OriginMap;
+  bool InsertChecks;
+  bool LoadShadow;
+  OwningPtr<VarArgHelper> VAHelper;
+
+  struct ShadowOriginAndInsertPoint {
+    Instruction *Shadow;
+    Instruction *Origin;
+    Instruction *OrigIns;
+    ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+      : Shadow(S), Origin(O), OrigIns(I) { }
+    ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
+  };
+  SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+  SmallVector<Instruction*, 16> StoreList;
+
+  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
+      : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+    LoadShadow = InsertChecks =
+        !MS.BL->isIn(F) &&
+        F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::SanitizeMemory);
+
+    DEBUG(if (!InsertChecks)
+          dbgs() << "MemorySanitizer is not inserting checks into '"
+                 << F.getName() << "'\n");
+  }
+
+  void materializeStores() {
+    for (size_t i = 0, n = StoreList.size(); i < n; i++) {
+      StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]);
+
+      IRBuilder<> IRB(&I);
+      Value *Val = I.getValueOperand();
+      Value *Addr = I.getPointerOperand();
+      Value *Shadow = getShadow(Val);
+      Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
+
+      StoreInst *NewSI =
+        IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+      DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
+      (void)NewSI;
+
+      if (ClCheckAccessAddress)
+        insertCheck(Addr, &I);
+
+      if (MS.TrackOrigins) {
+        unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+        if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) {
+          IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB),
+                                 Alignment);
+        } else {
+          Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+
+          Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
+          // TODO(eugenis): handle non-zero constant shadow by inserting an
+          // unconditional check (can not simply fail compilation as this could
+          // be in the dead code).
+          if (Cst)
+            continue;
+
+          Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+              getCleanShadow(ConvertedShadow), "_mscmp");
+          Instruction *CheckTerm =
+            SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false,
+                                      MS.OriginStoreWeights);
+          IRBuilder<> IRBNew(CheckTerm);
+          IRBNew.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRBNew),
+                                    Alignment);
+        }
+      }
+    }
+  }
+
+  void materializeChecks() {
+    for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
+      Instruction *Shadow = InstrumentationList[i].Shadow;
+      Instruction *OrigIns = InstrumentationList[i].OrigIns;
+      IRBuilder<> IRB(OrigIns);
+      DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
+      Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+      DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
+      Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+                                    getCleanShadow(ConvertedShadow), "_mscmp");
+      Instruction *CheckTerm =
+        SplitBlockAndInsertIfThen(cast<Instruction>(Cmp),
+                                  /* Unreachable */ !ClKeepGoing,
+                                  MS.ColdCallWeights);
+
+      IRB.SetInsertPoint(CheckTerm);
+      if (MS.TrackOrigins) {
+        Instruction *Origin = InstrumentationList[i].Origin;
+        IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
+                        MS.OriginTLS);
+      }
+      CallInst *Call = IRB.CreateCall(MS.WarningFn);
+      Call->setDebugLoc(OrigIns->getDebugLoc());
+      IRB.CreateCall(MS.EmptyAsm);
+      DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
+    }
+    DEBUG(dbgs() << "DONE:\n" << F);
+  }
+
+  /// \brief Add MemorySanitizer instrumentation to a function.
+  bool runOnFunction() {
+    MS.initializeCallbacks(*F.getParent());
+    if (!MS.TD) return false;
+
+    // In the presence of unreachable blocks, we may see Phi nodes with
+    // incoming nodes from such blocks. Since InstVisitor skips unreachable
+    // blocks, such nodes will not have any shadow value associated with them.
+    // It's easier to remove unreachable blocks than deal with missing shadow.
+    removeUnreachableBlocks(F);
+
+    // Iterate all BBs in depth-first order and create shadow instructions
+    // for all instructions (where applicable).
+    // For PHI nodes we create dummy shadow PHIs which will be finalized later.
+    for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+         DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+      BasicBlock *BB = *DI;
+      visit(*BB);
+    }
+
+    // Finalize PHI nodes.
+    for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
+      PHINode *PN = ShadowPHINodes[i];
+      PHINode *PNS = cast<PHINode>(getShadow(PN));
+      PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
+      size_t NumValues = PN->getNumIncomingValues();
+      for (size_t v = 0; v < NumValues; v++) {
+        PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
+        if (PNO)
+          PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
+      }
+    }
+
+    VAHelper->finalizeInstrumentation();
+
+    // Delayed instrumentation of StoreInst.
+    // This may add new checks to be inserted later.
+    materializeStores();
+
+    // Insert shadow value checks.
+    materializeChecks();
+
+    return true;
+  }
+
+  /// \brief Compute the shadow type that corresponds to a given Value.
+  Type *getShadowTy(Value *V) {
+    return getShadowTy(V->getType());
+  }
+
+  /// \brief Compute the shadow type that corresponds to a given Type.
+  Type *getShadowTy(Type *OrigTy) {
+    if (!OrigTy->isSized()) {
+      return 0;
+    }
+    // For integer type, shadow is the same as the original type.
+    // This may return weird-sized types like i1.
+    if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
+      return IT;
+    if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
+      uint32_t EltSize = MS.TD->getTypeSizeInBits(VT->getElementType());
+      return VectorType::get(IntegerType::get(*MS.C, EltSize),
+                             VT->getNumElements());
+    }
+    if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
+      SmallVector<Type*, 4> Elements;
+      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+        Elements.push_back(getShadowTy(ST->getElementType(i)));
+      StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
+      DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
+      return Res;
+    }
+    uint32_t TypeSize = MS.TD->getTypeSizeInBits(OrigTy);
+    return IntegerType::get(*MS.C, TypeSize);
+  }
+
+  /// \brief Flatten a vector type.
+  Type *getShadowTyNoVec(Type *ty) {
+    if (VectorType *vt = dyn_cast<VectorType>(ty))
+      return IntegerType::get(*MS.C, vt->getBitWidth());
+    return ty;
+  }
+
+  /// \brief Convert a shadow value to it's flattened variant.
+  Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
+    Type *Ty = V->getType();
+    Type *NoVecTy = getShadowTyNoVec(Ty);
+    if (Ty == NoVecTy) return V;
+    return IRB.CreateBitCast(V, NoVecTy);
+  }
+
+  /// \brief Compute the shadow address that corresponds to a given application
+  /// address.
+  ///
+  /// Shadow = Addr & ~ShadowMask.
+  Value *getShadowPtr(Value *Addr, Type *ShadowTy,
+                      IRBuilder<> &IRB) {
+    Value *ShadowLong =
+      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
+                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
+    return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
+  }
+
+  /// \brief Compute the origin address that corresponds to a given application
+  /// address.
+  ///
+  /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL
+  Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) {
+    Value *ShadowLong =
+      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
+                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
+    Value *Add =
+      IRB.CreateAdd(ShadowLong,
+                    ConstantInt::get(MS.IntptrTy, MS.OriginOffset));
+    Value *SecondAnd =
+      IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL));
+    return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0));
+  }
+
+  /// \brief Compute the shadow address for a given function argument.
+  ///
+  /// Shadow = ParamTLS+ArgOffset.
+  Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
+                                 int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+                              "_msarg");
+  }
+
+  /// \brief Compute the origin address for a given function argument.
+  Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
+                                 int ArgOffset) {
+    if (!MS.TrackOrigins) return 0;
+    Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_o");
+  }
+
+  /// \brief Compute the shadow address for a retval.
+  Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
+    Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy);
+    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+                              "_msret");
+  }
+
+  /// \brief Compute the origin address for a retval.
+  Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
+    // We keep a single origin for the entire retval. Might be too optimistic.
+    return MS.RetvalOriginTLS;
+  }
+
+  /// \brief Set SV to be the shadow value for V.
+  void setShadow(Value *V, Value *SV) {
+    assert(!ShadowMap.count(V) && "Values may only have one shadow");
+    ShadowMap[V] = SV;
+  }
+
+  /// \brief Set Origin to be the origin value for V.
+  void setOrigin(Value *V, Value *Origin) {
+    if (!MS.TrackOrigins) return;
+    assert(!OriginMap.count(V) && "Values may only have one origin");
+    DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
+    OriginMap[V] = Origin;
+  }
+
+  /// \brief Create a clean shadow value for a given value.
+  ///
+  /// Clean shadow (all zeroes) means all bits of the value are defined
+  /// (initialized).
+  Constant *getCleanShadow(Value *V) {
+    Type *ShadowTy = getShadowTy(V);
+    if (!ShadowTy)
+      return 0;
+    return Constant::getNullValue(ShadowTy);
+  }
+
+  /// \brief Create a dirty shadow of a given shadow type.
+  Constant *getPoisonedShadow(Type *ShadowTy) {
+    assert(ShadowTy);
+    if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
+      return Constant::getAllOnesValue(ShadowTy);
+    StructType *ST = cast<StructType>(ShadowTy);
+    SmallVector<Constant *, 4> Vals;
+    for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+      Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
+    return ConstantStruct::get(ST, Vals);
+  }
+
+  /// \brief Create a dirty shadow for a given value.
+  Constant *getPoisonedShadow(Value *V) {
+    Type *ShadowTy = getShadowTy(V);
+    if (!ShadowTy)
+      return 0;
+    return getPoisonedShadow(ShadowTy);
+  }
+
+  /// \brief Create a clean (zero) origin.
+  Value *getCleanOrigin() {
+    return Constant::getNullValue(MS.OriginTy);
+  }
+
+  /// \brief Get the shadow value for a given Value.
+  ///
+  /// This function either returns the value set earlier with setShadow,
+  /// or extracts if from ParamTLS (for function arguments).
+  Value *getShadow(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      // For instructions the shadow is already stored in the map.
+      Value *Shadow = ShadowMap[V];
+      if (!Shadow) {
+        DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
+        (void)I;
+        assert(Shadow && "No shadow for a value");
+      }
+      return Shadow;
+    }
+    if (UndefValue *U = dyn_cast<UndefValue>(V)) {
+      Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
+      DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
+      (void)U;
+      return AllOnes;
+    }
+    if (Argument *A = dyn_cast<Argument>(V)) {
+      // For arguments we compute the shadow on demand and store it in the map.
+      Value **ShadowPtr = &ShadowMap[V];
+      if (*ShadowPtr)
+        return *ShadowPtr;
+      Function *F = A->getParent();
+      IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
+      unsigned ArgOffset = 0;
+      for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+           AI != AE; ++AI) {
+        if (!AI->getType()->isSized()) {
+          DEBUG(dbgs() << "Arg is not sized\n");
+          continue;
+        }
+        unsigned Size = AI->hasByValAttr()
+          ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType())
+          : MS.TD->getTypeAllocSize(AI->getType());
+        if (A == AI) {
+          Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
+          if (AI->hasByValAttr()) {
+            // ByVal pointer itself has clean shadow. We copy the actual
+            // argument shadow to the underlying memory.
+            Value *Cpy = EntryIRB.CreateMemCpy(
+              getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
+              Base, Size, AI->getParamAlignment());
+            DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
+            (void)Cpy;
+            *ShadowPtr = getCleanShadow(V);
+          } else {
+            *ShadowPtr = EntryIRB.CreateLoad(Base);
+          }
+          DEBUG(dbgs() << "  ARG:    "  << *AI << " ==> " <<
+                **ShadowPtr << "\n");
+          if (MS.TrackOrigins) {
+            Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset);
+            setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
+          }
+        }
+        ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+      }
+      assert(*ShadowPtr && "Could not find shadow for an argument");
+      return *ShadowPtr;
+    }
+    // For everything else the shadow is zero.
+    return getCleanShadow(V);
+  }
+
+  /// \brief Get the shadow for i-th argument of the instruction I.
+  Value *getShadow(Instruction *I, int i) {
+    return getShadow(I->getOperand(i));
+  }
+
+  /// \brief Get the origin for a value.
+  Value *getOrigin(Value *V) {
+    if (!MS.TrackOrigins) return 0;
+    if (isa<Instruction>(V) || isa<Argument>(V)) {
+      Value *Origin = OriginMap[V];
+      if (!Origin) {
+        DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n");
+        Origin = getCleanOrigin();
+      }
+      return Origin;
+    }
+    return getCleanOrigin();
+  }
+
+  /// \brief Get the origin for i-th argument of the instruction I.
+  Value *getOrigin(Instruction *I, int i) {
+    return getOrigin(I->getOperand(i));
+  }
+
+  /// \brief Remember the place where a shadow check should be inserted.
+  ///
+  /// This location will be later instrumented with a check that will print a
+  /// UMR warning in runtime if the value is not fully defined.
+  void insertCheck(Value *Val, Instruction *OrigIns) {
+    assert(Val);
+    if (!InsertChecks) return;
+    Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+    if (!Shadow) return;
+#ifndef NDEBUG
+    Type *ShadowTy = Shadow->getType();
+    assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
+           "Can only insert checks for integer and vector shadow types");
+#endif
+    Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+    InstrumentationList.push_back(
+      ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+  }
+
+  // ------------------- Visitors.
+
+  /// \brief Instrument LoadInst
+  ///
+  /// Loads the corresponding shadow and (optionally) origin.
+  /// Optionally, checks that the load address is fully defined.
+  void visitLoadInst(LoadInst &I) {
+    assert(I.getType()->isSized() && "Load type must have size");
+    IRBuilder<> IRB(&I);
+    Type *ShadowTy = getShadowTy(&I);
+    Value *Addr = I.getPointerOperand();
+    if (LoadShadow) {
+      Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+      setShadow(&I,
+                IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+    }
+
+    if (ClCheckAccessAddress)
+      insertCheck(I.getPointerOperand(), &I);
+
+    if (MS.TrackOrigins) {
+      if (LoadShadow) {
+        unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+        setOrigin(&I,
+                  IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
+      } else {
+        setOrigin(&I, getCleanOrigin());
+      }
+    }
+  }
+
+  /// \brief Instrument StoreInst
+  ///
+  /// Stores the corresponding shadow and (optionally) origin.
+  /// Optionally, checks that the store address is fully defined.
+  void visitStoreInst(StoreInst &I) {
+    StoreList.push_back(&I);
+  }
+
+  // Vector manipulation.
+  void visitExtractElementInst(ExtractElementInst &I) {
+    insertCheck(I.getOperand(1), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
+              "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitInsertElementInst(InsertElementInst &I) {
+    insertCheck(I.getOperand(2), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
+              I.getOperand(2), "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  void visitShuffleVectorInst(ShuffleVectorInst &I) {
+    insertCheck(I.getOperand(2), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
+              I.getOperand(2), "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  // Casts.
+  void visitSExtInst(SExtInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitZExtInst(ZExtInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitTruncInst(TruncInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitBitCastInst(BitCastInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitPtrToIntInst(PtrToIntInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+             "_msprop_ptrtoint"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitIntToPtrInst(IntToPtrInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+             "_msprop_inttoptr"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
+  void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
+  void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
+
+  /// \brief Propagate shadow for bitwise AND.
+  ///
+  /// This code is exact, i.e. if, for example, a bit in the left argument
+  /// is defined and 0, then neither the value not definedness of the
+  /// corresponding bit in B don't affect the resulting shadow.
+  void visitAnd(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    //  "And" of 0 and a poisoned value results in unpoisoned value.
+    //  1&1 => 1;     0&1 => 0;     p&1 => p;
+    //  1&0 => 0;     0&0 => 0;     p&0 => 0;
+    //  1&p => p;     0&p => 0;     p&p => p;
+    //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *V1 = I.getOperand(0);
+    Value *V2 = I.getOperand(1);
+    if (V1->getType() != S1->getType()) {
+      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+    }
+    Value *S1S2 = IRB.CreateAnd(S1, S2);
+    Value *V1S2 = IRB.CreateAnd(V1, S2);
+    Value *S1V2 = IRB.CreateAnd(S1, V2);
+    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+    setOriginForNaryOp(I);
+  }
+
+  void visitOr(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    //  "Or" of 1 and a poisoned value results in unpoisoned value.
+    //  1|1 => 1;     0|1 => 1;     p|1 => 1;
+    //  1|0 => 1;     0|0 => 0;     p|0 => p;
+    //  1|p => 1;     0|p => p;     p|p => p;
+    //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *V1 = IRB.CreateNot(I.getOperand(0));
+    Value *V2 = IRB.CreateNot(I.getOperand(1));
+    if (V1->getType() != S1->getType()) {
+      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+    }
+    Value *S1S2 = IRB.CreateAnd(S1, S2);
+    Value *V1S2 = IRB.CreateAnd(V1, S2);
+    Value *S1V2 = IRB.CreateAnd(S1, V2);
+    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+    setOriginForNaryOp(I);
+  }
+
+  /// \brief Default propagation of shadow and/or origin.
+  ///
+  /// This class implements the general case of shadow propagation, used in all
+  /// cases where we don't know and/or don't care about what the operation
+  /// actually does. It converts all input shadow values to a common type
+  /// (extending or truncating as necessary), and bitwise OR's them.
+  ///
+  /// This is much cheaper than inserting checks (i.e. requiring inputs to be
+  /// fully initialized), and less prone to false positives.
+  ///
+  /// This class also implements the general case of origin propagation. For a
+  /// Nary operation, result origin is set to the origin of an argument that is
+  /// not entirely initialized. If there is more than one such arguments, the
+  /// rightmost of them is picked. It does not matter which one is picked if all
+  /// arguments are initialized.
+  template <bool CombineShadow>
+  class Combiner {
+    Value *Shadow;
+    Value *Origin;
+    IRBuilder<> &IRB;
+    MemorySanitizerVisitor *MSV;
+
+  public:
+    Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) :
+      Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {}
+
+    /// \brief Add a pair of shadow and origin values to the mix.
+    Combiner &Add(Value *OpShadow, Value *OpOrigin) {
+      if (CombineShadow) {
+        assert(OpShadow);
+        if (!Shadow)
+          Shadow = OpShadow;
+        else {
+          OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
+          Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
+        }
+      }
+
+      if (MSV->MS.TrackOrigins) {
+        assert(OpOrigin);
+        if (!Origin) {
+          Origin = OpOrigin;
+        } else {
+          Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
+          Value *Cond = IRB.CreateICmpNE(FlatShadow,
+                                         MSV->getCleanShadow(FlatShadow));
+          Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+        }
+      }
+      return *this;
+    }
+
+    /// \brief Add an application value to the mix.
+    Combiner &Add(Value *V) {
+      Value *OpShadow = MSV->getShadow(V);
+      Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : 0;
+      return Add(OpShadow, OpOrigin);
+    }
+
+    /// \brief Set the current combined values as the given instruction's shadow
+    /// and origin.
+    void Done(Instruction *I) {
+      if (CombineShadow) {
+        assert(Shadow);
+        Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
+        MSV->setShadow(I, Shadow);
+      }
+      if (MSV->MS.TrackOrigins) {
+        assert(Origin);
+        MSV->setOrigin(I, Origin);
+      }
+    }
+  };
+
+  typedef Combiner<true> ShadowAndOriginCombiner;
+  typedef Combiner<false> OriginCombiner;
+
+  /// \brief Propagate origin for arbitrary operation.
+  void setOriginForNaryOp(Instruction &I) {
+    if (!MS.TrackOrigins) return;
+    IRBuilder<> IRB(&I);
+    OriginCombiner OC(this, IRB);
+    for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+      OC.Add(OI->get());
+    OC.Done(&I);
+  }
+
+  size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
+    assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
+           "Vector of pointers is not a valid shadow type");
+    return Ty->isVectorTy() ?
+      Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
+      Ty->getPrimitiveSizeInBits();
+  }
+
+  /// \brief Cast between two shadow types, extending or truncating as
+  /// necessary.
+  Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+    Type *srcTy = V->getType();
+    if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
+      return IRB.CreateIntCast(V, dstTy, false);
+    if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
+        dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
+      return IRB.CreateIntCast(V, dstTy, false);
+    size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
+    size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
+    Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
+    Value *V2 =
+      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+    return IRB.CreateBitCast(V2, dstTy);
+    // TODO: handle struct types.
+  }
+
+  /// \brief Propagate shadow for arbitrary operation.
+  void handleShadowOr(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    ShadowAndOriginCombiner SC(this, IRB);
+    for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+      SC.Add(OI->get());
+    SC.Done(&I);
+  }
+
+  void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
+  void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
+  void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
+  void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
+  void visitSub(BinaryOperator &I) { handleShadowOr(I); }
+  void visitXor(BinaryOperator &I) { handleShadowOr(I); }
+  void visitMul(BinaryOperator &I) { handleShadowOr(I); }
+
+  void handleDiv(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    // Strict on the second argument.
+    insertCheck(I.getOperand(1), &I);
+    setShadow(&I, getShadow(&I, 0));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitUDiv(BinaryOperator &I) { handleDiv(I); }
+  void visitSDiv(BinaryOperator &I) { handleDiv(I); }
+  void visitFDiv(BinaryOperator &I) { handleDiv(I); }
+  void visitURem(BinaryOperator &I) { handleDiv(I); }
+  void visitSRem(BinaryOperator &I) { handleDiv(I); }
+  void visitFRem(BinaryOperator &I) { handleDiv(I); }
+
+  /// \brief Instrument == and != comparisons.
+  ///
+  /// Sometimes the comparison result is known even if some of the bits of the
+  /// arguments are not.
+  void handleEqualityComparison(ICmpInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *Sa = getShadow(A);
+    Value *Sb = getShadow(B);
+
+    // Get rid of pointers and vectors of pointers.
+    // For ints (and vectors of ints), types of A and Sa match,
+    // and this is a no-op.
+    A = IRB.CreatePointerCast(A, Sa->getType());
+    B = IRB.CreatePointerCast(B, Sb->getType());
+
+    // A == B  <==>  (C = A^B) == 0
+    // A != B  <==>  (C = A^B) != 0
+    // Sc = Sa | Sb
+    Value *C = IRB.CreateXor(A, B);
+    Value *Sc = IRB.CreateOr(Sa, Sb);
+    // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
+    // Result is defined if one of the following is true
+    // * there is a defined 1 bit in C
+    // * C is fully defined
+    // Si = !(C & ~Sc) && Sc
+    Value *Zero = Constant::getNullValue(Sc->getType());
+    Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
+    Value *Si =
+      IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
+                    IRB.CreateICmpEQ(
+                      IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
+    Si->setName("_msprop_icmp");
+    setShadow(&I, Si);
+    setOriginForNaryOp(I);
+  }
+
+  /// \brief Build the lowest possible value of V, taking into account V's
+  ///        uninitialized bits.
+  Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+                                bool isSigned) {
+    if (isSigned) {
+      // Split shadow into sign bit and other bits.
+      Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+      Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+      // Maximise the undefined shadow bit, minimize other undefined bits.
+      return
+        IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
+    } else {
+      // Minimize undefined bits.
+      return IRB.CreateAnd(A, IRB.CreateNot(Sa));
+    }
+  }
+
+  /// \brief Build the highest possible value of V, taking into account V's
+  ///        uninitialized bits.
+  Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+                                bool isSigned) {
+    if (isSigned) {
+      // Split shadow into sign bit and other bits.
+      Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+      Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+      // Minimise the undefined shadow bit, maximise other undefined bits.
+      return
+        IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
+    } else {
+      // Maximize undefined bits.
+      return IRB.CreateOr(A, Sa);
+    }
+  }
+
+  /// \brief Instrument relational comparisons.
+  ///
+  /// This function does exact shadow propagation for all relational
+  /// comparisons of integers, pointers and vectors of those.
+  /// FIXME: output seems suboptimal when one of the operands is a constant
+  void handleRelationalComparisonExact(ICmpInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *Sa = getShadow(A);
+    Value *Sb = getShadow(B);
+
+    // Get rid of pointers and vectors of pointers.
+    // For ints (and vectors of ints), types of A and Sa match,
+    // and this is a no-op.
+    A = IRB.CreatePointerCast(A, Sa->getType());
+    B = IRB.CreatePointerCast(B, Sb->getType());
+
+    // Let [a0, a1] be the interval of possible values of A, taking into account
+    // its undefined bits. Let [b0, b1] be the interval of possible values of B.
+    // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
+    bool IsSigned = I.isSigned();
+    Value *S1 = IRB.CreateICmp(I.getPredicate(),
+                               getLowestPossibleValue(IRB, A, Sa, IsSigned),
+                               getHighestPossibleValue(IRB, B, Sb, IsSigned));
+    Value *S2 = IRB.CreateICmp(I.getPredicate(),
+                               getHighestPossibleValue(IRB, A, Sa, IsSigned),
+                               getLowestPossibleValue(IRB, B, Sb, IsSigned));
+    Value *Si = IRB.CreateXor(S1, S2);
+    setShadow(&I, Si);
+    setOriginForNaryOp(I);
+  }
+
+  /// \brief Instrument signed relational comparisons.
+  ///
+  /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
+  /// propagating the highest bit of the shadow. Everything else is delegated
+  /// to handleShadowOr().
+  void handleSignedRelationalComparison(ICmpInst &I) {
+    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+    Value* op = NULL;
+    CmpInst::Predicate pre = I.getPredicate();
+    if (constOp0 && constOp0->isNullValue() &&
+        (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
+      op = I.getOperand(1);
+    } else if (constOp1 && constOp1->isNullValue() &&
+               (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
+      op = I.getOperand(0);
+    }
+    if (op) {
+      IRBuilder<> IRB(&I);
+      Value* Shadow =
+        IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
+      setShadow(&I, Shadow);
+      setOrigin(&I, getOrigin(op));
+    } else {
+      handleShadowOr(I);
+    }
+  }
+
+  void visitICmpInst(ICmpInst &I) {
+    if (!ClHandleICmp) {
+      handleShadowOr(I);
+      return;
+    }
+    if (I.isEquality()) {
+      handleEqualityComparison(I);
+      return;
+    }
+
+    assert(I.isRelational());
+    if (ClHandleICmpExact) {
+      handleRelationalComparisonExact(I);
+      return;
+    }
+    if (I.isSigned()) {
+      handleSignedRelationalComparison(I);
+      return;
+    }
+
+    assert(I.isUnsigned());
+    if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
+      handleRelationalComparisonExact(I);
+      return;
+    }
+
+    handleShadowOr(I);
+  }
+
+  void visitFCmpInst(FCmpInst &I) {
+    handleShadowOr(I);
+  }
+
+  void handleShift(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    // If any of the S2 bits are poisoned, the whole thing is poisoned.
+    // Otherwise perform the same shift on S1.
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
+                                   S2->getType());
+    Value *V2 = I.getOperand(1);
+    Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
+    setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+    setOriginForNaryOp(I);
+  }
+
+  void visitShl(BinaryOperator &I) { handleShift(I); }
+  void visitAShr(BinaryOperator &I) { handleShift(I); }
+  void visitLShr(BinaryOperator &I) { handleShift(I); }
+
+  /// \brief Instrument llvm.memmove
+  ///
+  /// At this point we don't know if llvm.memmove will be inlined or not.
+  /// If we don't instrument it and it gets inlined,
+  /// our interceptor will not kick in and we will lose the memmove.
+  /// If we instrument the call here, but it does not get inlined,
+  /// we will memove the shadow twice: which is bad in case
+  /// of overlapping regions. So, we simply lower the intrinsic to a call.
+  ///
+  /// Similar situation exists for memcpy and memset.
+  void visitMemMoveInst(MemMoveInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall3(
+      MS.MemmoveFn,
+      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    I.eraseFromParent();
+  }
+
+  // Similar to memmove: avoid copying shadow twice.
+  // This is somewhat unfortunate as it may slowdown small constant memcpys.
+  // FIXME: consider doing manual inline for small constant sizes and proper
+  // alignment.
+  void visitMemCpyInst(MemCpyInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall3(
+      MS.MemcpyFn,
+      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    I.eraseFromParent();
+  }
+
+  // Same as memcpy.
+  void visitMemSetInst(MemSetInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall3(
+      MS.MemsetFn,
+      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    I.eraseFromParent();
+  }
+
+  void visitVAStartInst(VAStartInst &I) {
+    VAHelper->visitVAStartInst(I);
+  }
+
+  void visitVACopyInst(VACopyInst &I) {
+    VAHelper->visitVACopyInst(I);
+  }
+
+  enum IntrinsicKind {
+    IK_DoesNotAccessMemory,
+    IK_OnlyReadsMemory,
+    IK_WritesMemory
+  };
+
+  static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
+    const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
+    const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
+    const int OnlyReadsMemory = IK_OnlyReadsMemory;
+    const int OnlyAccessesArgumentPointees = IK_WritesMemory;
+    const int UnknownModRefBehavior = IK_WritesMemory;
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#define ModRefBehavior IntrinsicKind
+#include "llvm/IR/Intrinsics.gen"
+#undef ModRefBehavior
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+  }
+
+  /// \brief Handle vector store-like intrinsics.
+  ///
+  /// Instrument intrinsics that look like a simple SIMD store: writes memory,
+  /// has 1 pointer argument and 1 vector argument, returns void.
+  bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value* Addr = I.getArgOperand(0);
+    Value *Shadow = getShadow(&I, 1);
+    Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
+
+    // We don't know the pointer alignment (could be unaligned SSE store!).
+    // Have to assume to worst case.
+    IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
+
+    if (ClCheckAccessAddress)
+      insertCheck(Addr, &I);
+
+    // FIXME: use ClStoreCleanOrigin
+    // FIXME: factor out common code from materializeStores
+    if (MS.TrackOrigins)
+      IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB));
+    return true;
+  }
+
+  /// \brief Handle vector load-like intrinsics.
+  ///
+  /// Instrument intrinsics that look like a simple SIMD load: reads memory,
+  /// has 1 pointer argument, returns a vector.
+  bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getArgOperand(0);
+
+    Type *ShadowTy = getShadowTy(&I);
+    if (LoadShadow) {
+      Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+      // We don't know the pointer alignment (could be unaligned SSE load!).
+      // Have to assume to worst case.
+      setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+    }
+
+
+    if (ClCheckAccessAddress)
+      insertCheck(Addr, &I);
+
+    if (MS.TrackOrigins) {
+      if (LoadShadow)
+        setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+      else
+        setOrigin(&I, getCleanOrigin());
+    }
+    return true;
+  }
+
+  /// \brief Handle (SIMD arithmetic)-like intrinsics.
+  ///
+  /// Instrument intrinsics with any number of arguments of the same type,
+  /// equal to the return type. The type should be simple (no aggregates or
+  /// pointers; vectors are fine).
+  /// Caller guarantees that this intrinsic does not access memory.
+  bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
+    Type *RetTy = I.getType();
+    if (!(RetTy->isIntOrIntVectorTy() ||
+          RetTy->isFPOrFPVectorTy() ||
+          RetTy->isX86_MMXTy()))
+      return false;
+
+    unsigned NumArgOperands = I.getNumArgOperands();
+
+    for (unsigned i = 0; i < NumArgOperands; ++i) {
+      Type *Ty = I.getArgOperand(i)->getType();
+      if (Ty != RetTy)
+        return false;
+    }
+
+    IRBuilder<> IRB(&I);
+    ShadowAndOriginCombiner SC(this, IRB);
+    for (unsigned i = 0; i < NumArgOperands; ++i)
+      SC.Add(I.getArgOperand(i));
+    SC.Done(&I);
+
+    return true;
+  }
+
+  /// \brief Heuristically instrument unknown intrinsics.
+  ///
+  /// The main purpose of this code is to do something reasonable with all
+  /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
+  /// We recognize several classes of intrinsics by their argument types and
+  /// ModRefBehaviour and apply special intrumentation when we are reasonably
+  /// sure that we know what the intrinsic does.
+  ///
+  /// We special-case intrinsics where this approach fails. See llvm.bswap
+  /// handling as an example of that.
+  bool handleUnknownIntrinsic(IntrinsicInst &I) {
+    unsigned NumArgOperands = I.getNumArgOperands();
+    if (NumArgOperands == 0)
+      return false;
+
+    Intrinsic::ID iid = I.getIntrinsicID();
+    IntrinsicKind IK = getIntrinsicKind(iid);
+    bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
+    bool WritesMemory = IK == IK_WritesMemory;
+    assert(!(OnlyReadsMemory && WritesMemory));
+
+    if (NumArgOperands == 2 &&
+        I.getArgOperand(0)->getType()->isPointerTy() &&
+        I.getArgOperand(1)->getType()->isVectorTy() &&
+        I.getType()->isVoidTy() &&
+        WritesMemory) {
+      // This looks like a vector store.
+      return handleVectorStoreIntrinsic(I);
+    }
+
+    if (NumArgOperands == 1 &&
+        I.getArgOperand(0)->getType()->isPointerTy() &&
+        I.getType()->isVectorTy() &&
+        OnlyReadsMemory) {
+      // This looks like a vector load.
+      return handleVectorLoadIntrinsic(I);
+    }
+
+    if (!OnlyReadsMemory && !WritesMemory)
+      if (maybeHandleSimpleNomemIntrinsic(I))
+        return true;
+
+    // FIXME: detect and handle SSE maskstore/maskload
+    return false;
+  }
+
+  void handleBswap(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Op = I.getArgOperand(0);
+    Type *OpType = Op->getType();
+    Function *BswapFunc = Intrinsic::getDeclaration(
+      F.getParent(), Intrinsic::bswap, ArrayRef<Type*>(&OpType, 1));
+    setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
+    setOrigin(&I, getOrigin(Op));
+  }
+
+  void visitIntrinsicInst(IntrinsicInst &I) {
+    switch (I.getIntrinsicID()) {
+    case llvm::Intrinsic::bswap:
+      handleBswap(I);
+      break;
+    default:
+      if (!handleUnknownIntrinsic(I))
+        visitInstruction(I);
+      break;
+    }
+  }
+
+  void visitCallSite(CallSite CS) {
+    Instruction &I = *CS.getInstruction();
+    assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
+    if (CS.isCall()) {
+      CallInst *Call = cast<CallInst>(&I);
+
+      // For inline asm, do the usual thing: check argument shadow and mark all
+      // outputs as clean. Note that any side effects of the inline asm that are
+      // not immediately visible in its constraints are not handled.
+      if (Call->isInlineAsm()) {
+        visitInstruction(I);
+        return;
+      }
+
+      // Allow only tail calls with the same types, otherwise
+      // we may have a false positive: shadow for a non-void RetVal
+      // will get propagated to a void RetVal.
+      if (Call->isTailCall() && Call->getType() != Call->getParent()->getType())
+        Call->setTailCall(false);
+
+      assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
+
+      // We are going to insert code that relies on the fact that the callee
+      // will become a non-readonly function after it is instrumented by us. To
+      // prevent this code from being optimized out, mark that function
+      // non-readonly in advance.
+      if (Function *Func = Call->getCalledFunction()) {
+        // Clear out readonly/readnone attributes.
+        AttrBuilder B;
+        B.addAttribute(Attribute::ReadOnly)
+          .addAttribute(Attribute::ReadNone);
+        Func->removeAttributes(AttributeSet::FunctionIndex,
+                               AttributeSet::get(Func->getContext(),
+                                                 AttributeSet::FunctionIndex,
+                                                 B));
+      }
+    }
+    IRBuilder<> IRB(&I);
+    unsigned ArgOffset = 0;
+    DEBUG(dbgs() << "  CallSite: " << I << "\n");
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned i = ArgIt - CS.arg_begin();
+      if (!A->getType()->isSized()) {
+        DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
+        continue;
+      }
+      unsigned Size = 0;
+      Value *Store = 0;
+      // Compute the Shadow for arg even if it is ByVal, because
+      // in that case getShadow() will copy the actual arg shadow to
+      // __msan_param_tls.
+      Value *ArgShadow = getShadow(A);
+      Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
+      DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
+            " Shadow: " << *ArgShadow << "\n");
+      if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
+        assert(A->getType()->isPointerTy() &&
+               "ByVal argument is not a pointer!");
+        Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType());
+        unsigned Alignment = CS.getParamAlignment(i + 1);
+        Store = IRB.CreateMemCpy(ArgShadowBase,
+                                 getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
+                                 Size, Alignment);
+      } else {
+        Size = MS.TD->getTypeAllocSize(A->getType());
+        Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+                                       kShadowTLSAlignment);
+      }
+      if (MS.TrackOrigins)
+        IRB.CreateStore(getOrigin(A),
+                        getOriginPtrForArgument(A, IRB, ArgOffset));
+      (void)Store;
+      assert(Size != 0 && Store != 0);
+      DEBUG(dbgs() << "  Param:" << *Store << "\n");
+      ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+    }
+    DEBUG(dbgs() << "  done with call args\n");
+
+    FunctionType *FT =
+      cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+    if (FT->isVarArg()) {
+      VAHelper->visitCallSite(CS, IRB);
+    }
+
+    // Now, get the shadow for the RetVal.
+    if (!I.getType()->isSized()) return;
+    IRBuilder<> IRBBefore(&I);
+    // Untill we have full dynamic coverage, make sure the retval shadow is 0.
+    Value *Base = getShadowPtrForRetval(&I, IRBBefore);
+    IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
+    Instruction *NextInsn = 0;
+    if (CS.isCall()) {
+      NextInsn = I.getNextNode();
+    } else {
+      BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
+      if (!NormalDest->getSinglePredecessor()) {
+        // FIXME: this case is tricky, so we are just conservative here.
+        // Perhaps we need to split the edge between this BB and NormalDest,
+        // but a naive attempt to use SplitEdge leads to a crash.
+        setShadow(&I, getCleanShadow(&I));
+        setOrigin(&I, getCleanOrigin());
+        return;
+      }
+      NextInsn = NormalDest->getFirstInsertionPt();
+      assert(NextInsn &&
+             "Could not find insertion point for retval shadow load");
+    }
+    IRBuilder<> IRBAfter(NextInsn);
+    Value *RetvalShadow =
+      IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
+                                 kShadowTLSAlignment, "_msret");
+    setShadow(&I, RetvalShadow);
+    if (MS.TrackOrigins)
+      setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
+  }
+
+  void visitReturnInst(ReturnInst &I) {
+    IRBuilder<> IRB(&I);
+    if (Value *RetVal = I.getReturnValue()) {
+      // Set the shadow for the RetVal.
+      Value *Shadow = getShadow(RetVal);
+      Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+      DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
+      IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+      if (MS.TrackOrigins)
+        IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
+    }
+  }
+
+  void visitPHINode(PHINode &I) {
+    IRBuilder<> IRB(&I);
+    ShadowPHINodes.push_back(&I);
+    setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
+                                "_msphi_s"));
+    if (MS.TrackOrigins)
+      setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
+                                  "_msphi_o"));
+  }
+
+  void visitAllocaInst(AllocaInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    if (!ClPoisonStack) return;
+    IRBuilder<> IRB(I.getNextNode());
+    uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
+    if (ClPoisonStackWithCall) {
+      IRB.CreateCall2(MS.MsanPoisonStackFn,
+                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+                      ConstantInt::get(MS.IntptrTy, Size));
+    } else {
+      Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
+      IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern),
+                       Size, I.getAlignment());
+    }
+
+    if (MS.TrackOrigins) {
+      setOrigin(&I, getCleanOrigin());
+      SmallString<2048> StackDescriptionStorage;
+      raw_svector_ostream StackDescription(StackDescriptionStorage);
+      // We create a string with a description of the stack allocation and
+      // pass it into __msan_set_alloca_origin.
+      // It will be printed by the run-time if stack-originated UMR is found.
+      // The first 4 bytes of the string are set to '----' and will be replaced
+      // by __msan_va_arg_overflow_size_tls at the first call.
+      StackDescription << "----" << I.getName() << "@" << F.getName();
+      Value *Descr =
+          createPrivateNonConstGlobalForString(*F.getParent(),
+                                               StackDescription.str());
+      IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+                      ConstantInt::get(MS.IntptrTy, Size),
+                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+    }
+  }
+
+  void visitSelectInst(SelectInst& I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I,  IRB.CreateSelect(I.getCondition(),
+              getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
+              "_msprop"));
+    if (MS.TrackOrigins) {
+      // Origins are always i32, so any vector conditions must be flattened.
+      // FIXME: consider tracking vector origins for app vectors?
+      Value *Cond = I.getCondition();
+      if (Cond->getType()->isVectorTy()) {
+        Value *ConvertedShadow = convertToShadowTyNoVec(Cond, IRB);
+        Cond = IRB.CreateICmpNE(ConvertedShadow,
+                                getCleanShadow(ConvertedShadow), "_mso_select");
+      }
+      setOrigin(&I, IRB.CreateSelect(Cond,
+                getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue())));
+    }
+  }
+
+  void visitLandingPadInst(LandingPadInst &I) {
+    // Do nothing.
+    // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitGetElementPtrInst(GetElementPtrInst &I) {
+    handleShadowOr(I);
+  }
+
+  void visitExtractValueInst(ExtractValueInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Agg = I.getAggregateOperand();
+    DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
+    Value *AggShadow = getShadow(Agg);
+    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
+    Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
+    DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
+    setShadow(&I, ResShadow);
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitInsertValueInst(InsertValueInst &I) {
+    IRBuilder<> IRB(&I);
+    DEBUG(dbgs() << "InsertValue:  " << I << "\n");
+    Value *AggShadow = getShadow(I.getAggregateOperand());
+    Value *InsShadow = getShadow(I.getInsertedValueOperand());
+    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
+    DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
+    Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
+    DEBUG(dbgs() << "   Res:        " << *Res << "\n");
+    setShadow(&I, Res);
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void dumpInst(Instruction &I) {
+    if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+      errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
+    } else {
+      errs() << "ZZZ " << I.getOpcodeName() << "\n";
+    }
+    errs() << "QQQ " << I << "\n";
+  }
+
+  void visitResumeInst(ResumeInst &I) {
+    DEBUG(dbgs() << "Resume: " << I << "\n");
+    // Nothing to do here.
+  }
+
+  void visitInstruction(Instruction &I) {
+    // Everything else: stop propagating and check for poisoned shadow.
+    if (ClDumpStrictInstructions)
+      dumpInst(I);
+    DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+    for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
+      insertCheck(I.getOperand(i), &I);
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+};
+
+/// \brief AMD64-specific implementation of VarArgHelper.
+struct VarArgAMD64Helper : public VarArgHelper {
+  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
+  // See a comment in visitCallSite for more details.
+  static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
+  static const unsigned AMD64FpEndOffset = 176;
+
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy;
+  Value *VAArgOverflowSize;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV)
+    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
+
+  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+  ArgKind classifyArgument(Value* arg) {
+    // A very rough approximation of X86_64 argument classification rules.
+    Type *T = arg->getType();
+    if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
+      return AK_FloatingPoint;
+    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+      return AK_GeneralPurpose;
+    if (T->isPointerTy())
+      return AK_GeneralPurpose;
+    return AK_Memory;
+  }
+
+  // For VarArg functions, store the argument shadow in an ABI-specific format
+  // that corresponds to va_list layout.
+  // We do this because Clang lowers va_arg in the frontend, and this pass
+  // only sees the low level code that deals with va_list internals.
+  // A much easier alternative (provided that Clang emits va_arg instructions)
+  // would have been to associate each live instance of va_list with a copy of
+  // MSanParamTLS, and extract shadow on va_arg() call in the argument list
+  // order.
+  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {
+    unsigned GpOffset = 0;
+    unsigned FpOffset = AMD64GpEndOffset;
+    unsigned OverflowOffset = AMD64FpEndOffset;
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      ArgKind AK = classifyArgument(A);
+      if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
+        AK = AK_Memory;
+      if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
+        AK = AK_Memory;
+      Value *Base;
+      switch (AK) {
+      case AK_GeneralPurpose:
+        Base = getShadowPtrForVAArgument(A, IRB, GpOffset);
+        GpOffset += 8;
+        break;
+      case AK_FloatingPoint:
+        Base = getShadowPtrForVAArgument(A, IRB, FpOffset);
+        FpOffset += 16;
+        break;
+      case AK_Memory:
+        uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType());
+        Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
+        OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+      }
+      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+    }
+    Constant *OverflowSize =
+      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// \brief Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB,
+                                   int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */24, /* alignment */8, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */24, /* alignment */8, false);
+  }
+
+  void finalizeInstrumentation() {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
+                      VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+
+      Value *RegSaveAreaPtrPtr =
+        IRB.CreateIntToPtr(
+          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        ConstantInt::get(MS.IntptrTy, 16)),
+          Type::getInt64PtrTy(*MS.C));
+      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+      Value *RegSaveAreaShadowPtr =
+        MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy,
+                       AMD64FpEndOffset, 16);
+
+      Value *OverflowArgAreaPtrPtr =
+        IRB.CreateIntToPtr(
+          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        ConstantInt::get(MS.IntptrTy, 8)),
+          Type::getInt64PtrTy(*MS.C));
+      Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
+      Value *OverflowArgAreaShadowPtr =
+        MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
+      Value *SrcPtr =
+        getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
+    }
+  }
+};
+
+VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+                                 MemorySanitizerVisitor &Visitor) {
+  return new VarArgAMD64Helper(Func, Msan, Visitor);
+}
+
+}  // namespace
+
+bool MemorySanitizer::runOnFunction(Function &F) {
+  MemorySanitizerVisitor Visitor(F, *this);
+
+  // Clear out readonly/readnone attributes.
+  AttrBuilder B;
+  B.addAttribute(Attribute::ReadOnly)
+    .addAttribute(Attribute::ReadNone);
+  F.removeAttributes(AttributeSet::FunctionIndex,
+                     AttributeSet::get(F.getContext(),
+                                       AttributeSet::FunctionIndex, B));
+
+  return Visitor.runOnFunction();
+}
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 1fe12545d294..b45aef65bc76 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -13,20 +13,20 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "llvm/Transforms/Instrumentation.h"
+#include "MaximumSpanningTree.h"
 #include "ProfilingUtils.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "MaximumSpanningTree.h"
 using namespace llvm;
 
 STATISTIC(NumEdgesInserted, "The # of edges inserted.");
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index cc27146ebcf0..7de73269cf2b 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -45,24 +45,23 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-path-profiling"
 
-#include "llvm/DerivedTypes.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "ProfilingUtils.h"
 #include "llvm/Analysis/PathNumbering.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
 #include "llvm/Pass.h"
-#include "llvm/TypeBuilder.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include <vector>
 
 #define HASH_THRESHHOLD 100000
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index de57cd173483..4b3de6d7fc38 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -15,11 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ProfilingUtils.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 
 void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
                                    GlobalValue *Array,
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 9e10fc4416de..299060a42fe8 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,31 +21,32 @@
 
 #define DEBUG_TYPE "tsan"
 
-#include "BlackList.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
 
-static cl::opt<std::string>  ClBlackListFile("tsan-blacklist",
+static cl::opt<std::string>  ClBlacklistFile("tsan-blacklist",
        cl::desc("Blacklist file"), cl::Hidden);
 static cl::opt<bool>  ClInstrumentMemoryAccesses(
     "tsan-instrument-memory-accesses", cl::init(true),
@@ -56,6 +57,9 @@ static cl::opt<bool>  ClInstrumentFuncEntryExit(
 static cl::opt<bool>  ClInstrumentAtomics(
     "tsan-instrument-atomics", cl::init(true),
     cl::desc("Instrument atomics"), cl::Hidden);
+static cl::opt<bool>  ClInstrumentMemIntrinsics(
+    "tsan-instrument-memintrinsics", cl::init(true),
+    cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
 
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite,
           "Number of reads ignored due to following writes");
 STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
 STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
 STATISTIC(NumOmittedReadsFromConstantGlobals,
           "Number of reads from constant globals");
 STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
@@ -71,21 +76,29 @@ namespace {
 
 /// ThreadSanitizer: instrument the code in module to find races.
 struct ThreadSanitizer : public FunctionPass {
-  ThreadSanitizer();
+  ThreadSanitizer(StringRef BlacklistFile = StringRef())
+      : FunctionPass(ID),
+        TD(0),
+        BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+                                            : BlacklistFile) { }
   const char *getPassName() const;
   bool runOnFunction(Function &F);
   bool doInitialization(Module &M);
   static char ID;  // Pass identification, replacement for typeid.
 
  private:
+  void initializeCallbacks(Module &M);
   bool instrumentLoadOrStore(Instruction *I);
   bool instrumentAtomic(Instruction *I);
+  bool instrumentMemIntrinsic(Instruction *I);
   void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
                                       SmallVectorImpl<Instruction*> &All);
   bool addrPointsToConstantData(Value *Addr);
   int getMemoryAccessFuncIndex(Value *Addr);
 
   DataLayout *TD;
+  Type *IntptrTy;
+  SmallString<64> BlacklistFile;
   OwningPtr<BlackList> BL;
   IntegerType *OrdTy;
   // Callbacks to run-time library are computed in doInitialization.
@@ -102,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass {
   Function *TsanAtomicThreadFence;
   Function *TsanAtomicSignalFence;
   Function *TsanVptrUpdate;
+  Function *TsanVptrLoad;
+  Function *MemmoveFn, *MemcpyFn, *MemsetFn;
 };
 }  // namespace
 
@@ -114,13 +129,8 @@ const char *ThreadSanitizer::getPassName() const {
   return "ThreadSanitizer";
 }
 
-ThreadSanitizer::ThreadSanitizer()
-  : FunctionPass(ID),
-  TD(NULL) {
-}
-
-FunctionPass *llvm::createThreadSanitizerPass() {
-  return new ThreadSanitizer();
+FunctionPass *llvm::createThreadSanitizerPass(StringRef BlacklistFile) {
+  return new ThreadSanitizer(BlacklistFile);
 }
 
 static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
@@ -130,18 +140,8 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
   report_fatal_error("ThreadSanitizer interface function redefined");
 }
 
-bool ThreadSanitizer::doInitialization(Module &M) {
-  TD = getAnalysisIfAvailable<DataLayout>();
-  if (!TD)
-    return false;
-  BL.reset(new BlackList(ClBlackListFile));
-
-  // Always insert a call to __tsan_init into the module's CTORs.
+void ThreadSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
-  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
-                                          IRB.getVoidTy(), NULL);
-  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
-
   // Initialize the callbacks.
   TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
@@ -188,6 +188,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
         NamePart = "_fetch_or";
       else if (op == AtomicRMWInst::Xor)
         NamePart = "_fetch_xor";
+      else if (op == AtomicRMWInst::Nand)
+        NamePart = "_fetch_nand";
       else
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
@@ -198,15 +200,42 @@ bool ThreadSanitizer::doInitialization(Module &M) {
     SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
                                   "_compare_exchange_val");
     TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, NULL));
+        AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, NULL));
   }
   TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), NULL));
+  TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
   TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
   TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+
+  MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
+    "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IRB.getInt8PtrTy(), IntptrTy, NULL));
+  MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
+    "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IntptrTy, NULL));
+  MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
+    "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+    IntptrTy, NULL));
+}
+
+bool ThreadSanitizer::doInitialization(Module &M) {
+  TD = getAnalysisIfAvailable<DataLayout>();
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(BlacklistFile));
+
+  // Always insert a call to __tsan_init into the module's CTORs.
+  IRBuilder<> IRB(M.getContext());
+  IntptrTy = IRB.getIntPtrTy(TD);
+  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
+                                          IRB.getVoidTy(), NULL);
+  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+
   return true;
 }
 
@@ -297,10 +326,12 @@ static bool isAtomic(Instruction *I) {
 bool ThreadSanitizer::runOnFunction(Function &F) {
   if (!TD) return false;
   if (BL->isIn(F)) return false;
+  initializeCallbacks(*F.getParent());
   SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> AllLoadsAndStores;
   SmallVector<Instruction*, 8> LocalLoadsAndStores;
   SmallVector<Instruction*, 8> AtomicAccesses;
+  SmallVector<Instruction*, 8> MemIntrinCalls;
   bool Res = false;
   bool HasCalls = false;
 
@@ -317,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
       else if (isa<ReturnInst>(BI))
         RetVec.push_back(BI);
       else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+        if (isa<MemIntrinsic>(BI))
+          MemIntrinCalls.push_back(BI);
         HasCalls = true;
         chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
       }
@@ -340,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
       Res |= instrumentAtomic(AtomicAccesses[i]);
     }
 
+  if (ClInstrumentMemIntrinsics)
+    for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) {
+      Res |= instrumentMemIntrinsic(MemIntrinCalls[i]);
+    }
+
   // Instrument function entry/exit points if there were instrumented accesses.
   if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
@@ -378,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
     NumInstrumentedVtableWrites++;
     return true;
   }
+  if (!IsWrite && isVtableAccess(I)) {
+    IRB.CreateCall(TsanVptrLoad,
+                   IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+    NumInstrumentedVtableReads++;
+    return true;
+  }
   Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
   IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
   if (IsWrite) NumInstrumentedWrites++;
@@ -391,7 +435,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
     case NotAtomic:              assert(false);
     case Unordered:              // Fall-through.
     case Monotonic:              v = 0; break;
- // case Consume:                v = 1; break;  // Not specified yet.
+    // case Consume:                v = 1; break;  // Not specified yet.
     case Acquire:                v = 2; break;
     case Release:                v = 3; break;
     case AcquireRelease:         v = 4; break;
@@ -400,6 +444,55 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
   return IRB->getInt32(v);
 }
 
+static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+  uint32_t v = 0;
+  switch (ord) {
+    case NotAtomic:              assert(false);
+    case Unordered:              // Fall-through.
+    case Monotonic:              v = 0; break;
+    // case Consume:                v = 1; break;  // Not specified yet.
+    case Acquire:                v = 2; break;
+    case Release:                v = 0; break;
+    case AcquireRelease:         v = 2; break;
+    case SequentiallyConsistent: v = 5; break;
+  }
+  return IRB->getInt32(v);
+}
+
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+  IRBuilder<> IRB(I);
+  if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+    IRB.CreateCall3(MemsetFn,
+      IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+      IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+    I->eraseFromParent();
+  } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+    IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+      IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false));
+    I->eraseFromParent();
+  }
+  return false;
+}
+
+// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
+// standards.  For background see C++11 standard.  A slightly older, publically
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
 bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
   IRBuilder<> IRB(I);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -461,7 +554,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
                      IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
                      IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
-                     createOrdering(&IRB, CASI->getOrdering())};
+                     createOrdering(&IRB, CASI->getOrdering()),
+                     createFailOrdering(&IRB, CASI->getOrdering())};
     CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
     ReplaceInstWithInst(I, C);
   } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index f7bca064c7e1..15e9fba0a765 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC
 
 [component_0]
 type = Group
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index 8b1df92fa28b..c390517d07cd 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC
 
 include $(LEVEL)/Makefile.config
 
diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt
new file mode 100644
index 000000000000..233deb398011
--- /dev/null
+++ b/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMObjCARCOpts
+  ObjCARC.cpp
+  ObjCARCOpts.cpp
+  ObjCARCExpand.cpp
+  ObjCARCAPElim.cpp
+  ObjCARCAliasAnalysis.cpp
+  ObjCARCUtil.cpp
+  ObjCARCContract.cpp
+  DependencyAnalysis.cpp
+  ProvenanceAnalysis.cpp
+  )
+
+add_dependencies(LLVMObjCARCOpts intrinsics_gen)
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
new file mode 100644
index 000000000000..8f917aeb3725
--- /dev/null
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -0,0 +1,262 @@
+//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines special dependency analysis routines used in Objective C
+/// ARC Optimizations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-dependency"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/Support/CFG.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// Test whether the given instruction can result in a reference count
+/// modification (positive or negative) for the pointer's object.
+bool
+llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                                ProvenanceAnalysis &PA,
+                                InstructionClass Class) {
+  switch (Class) {
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_IntrinsicUser:
+  case IC_User:
+    // These operations never directly modify a reference count.
+    return false;
+  default: break;
+  }
+
+  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  assert(CS && "Only calls can alter reference counts!");
+
+  // See if AliasAnalysis can help us with the call.
+  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+  if (AliasAnalysis::onlyReadsMemory(MRB))
+    return false;
+  if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+         I != E; ++I) {
+      const Value *Op = *I;
+      if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  }
+
+  // Assume the worst.
+  return true;
+}
+
+/// Test whether the given instruction can "use" the given pointer's object in a
+/// way that requires the reference count to be positive.
+bool
+llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
+                      ProvenanceAnalysis &PA, InstructionClass Class) {
+  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+  if (Class == IC_Call)
+    return false;
+
+  // Consider various instructions which may have pointer arguments which are
+  // not "uses".
+  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+    // Comparing a pointer with null, or any other constant, isn't really a use,
+    // because we don't care what the pointer points to, or about the values
+    // of any other dynamic reference-counted pointers.
+    if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
+      return false;
+  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+    // For calls, just check the arguments (and not the callee operand).
+    for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+         OE = CS.arg_end(); OI != OE; ++OI) {
+      const Value *Op = *OI;
+      if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    // Special-case stores, because we don't care about the stored value, just
+    // the store address.
+    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+    // If we can't tell what the underlying object was, assume there is a
+    // dependence.
+    return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
+  }
+
+  // Check each operand for a match.
+  for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+       OI != OE; ++OI) {
+    const Value *Op = *OI;
+    if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+      return true;
+  }
+  return false;
+}
+
+/// Test if there can be dependencies on Inst through Arg. This function only
+/// tests dependencies relevant for removing pairs of calls.
+bool
+llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
+                       const Value *Arg, ProvenanceAnalysis &PA) {
+  // If we've reached the definition of Arg, stop.
+  if (Inst == Arg)
+    return true;
+
+  switch (Flavor) {
+  case NeedsPositiveRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanUse(Inst, Arg, PA, Class);
+    }
+  }
+
+  case AutoreleasePoolBoundary: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+      // These mark the end and begin of an autorelease pool scope.
+      return true;
+    default:
+      // Nothing else does this.
+      return false;
+    }
+  }
+
+  case CanChangeRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+      // Conservatively assume this can decrement any count.
+      return true;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanAlterRefCount(Inst, Arg, PA, Class);
+    }
+  }
+
+  case RetainAutoreleaseDep:
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+      // Don't merge an objc_autorelease with an objc_retain inside a different
+      // autoreleasepool scope.
+      return true;
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Nothing else matters for objc_retainAutorelease formation.
+      return false;
+    }
+
+  case RetainAutoreleaseRVDep: {
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Anything that can autorelease interrupts
+      // retainAutoreleaseReturnValue formation.
+      return CanInterruptRV(Class);
+    }
+  }
+
+  case RetainRVDep:
+    return CanInterruptRV(GetBasicInstructionClass(Inst));
+  }
+
+  llvm_unreachable("Invalid dependence flavor");
+}
+
+/// Walk up the CFG from StartPos (which is in StartBB) and find local and
+/// non-local dependencies on Arg.
+///
+/// TODO: Cache results?
+void
+llvm::objcarc::FindDependencies(DependenceKind Flavor,
+                                const Value *Arg,
+                                BasicBlock *StartBB, Instruction *StartInst,
+                                SmallPtrSet<Instruction *, 4> &DependingInsts,
+                                SmallPtrSet<const BasicBlock *, 4> &Visited,
+                                ProvenanceAnalysis &PA) {
+  BasicBlock::iterator StartPos = StartInst;
+
+  SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+  Worklist.push_back(std::make_pair(StartBB, StartPos));
+  do {
+    std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+      Worklist.pop_back_val();
+    BasicBlock *LocalStartBB = Pair.first;
+    BasicBlock::iterator LocalStartPos = Pair.second;
+    BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+    for (;;) {
+      if (LocalStartPos == StartBBBegin) {
+        pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+        if (PI == PE)
+          // If we've reached the function entry, produce a null dependence.
+          DependingInsts.insert(0);
+        else
+          // Add the predecessors to the worklist.
+          do {
+            BasicBlock *PredBB = *PI;
+            if (Visited.insert(PredBB))
+              Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+          } while (++PI != PE);
+        break;
+      }
+
+      Instruction *Inst = --LocalStartPos;
+      if (Depends(Flavor, Inst, Arg, PA)) {
+        DependingInsts.insert(Inst);
+        break;
+      }
+    }
+  } while (!Worklist.empty());
+
+  // Determine whether the original StartBB post-dominates all of the blocks we
+  // visited. If not, insert a sentinal indicating that most optimizations are
+  // not safe.
+  for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+       E = Visited.end(); I != E; ++I) {
+    const BasicBlock *BB = *I;
+    if (BB == StartBB)
+      continue;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+      const BasicBlock *Succ = *SI;
+      if (Succ != StartBB && !Visited.count(Succ)) {
+        DependingInsts.insert(reinterpret_cast<Instruction *>(-1));
+        return;
+      }
+    }
+  }
+}
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h
new file mode 100644
index 000000000000..24d358b30ab1
--- /dev/null
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -0,0 +1,79 @@
+//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares special dependency analysis routines used in Objective C
+/// ARC Optimizations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+  class BasicBlock;
+  class Instruction;
+  class Value;
+}
+
+namespace llvm {
+namespace objcarc {
+
+class ProvenanceAnalysis;
+
+/// \enum DependenceKind
+/// \brief Defines different dependence kinds among various ARC constructs.
+///
+/// There are several kinds of dependence-like concepts in use here.
+///
+enum DependenceKind {
+  NeedsPositiveRetainCount,
+  AutoreleasePoolBoundary,
+  CanChangeRetainCount,
+  RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
+  RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
+  RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
+};
+
+void FindDependencies(DependenceKind Flavor,
+                      const Value *Arg,
+                      BasicBlock *StartBB, Instruction *StartInst,
+                      SmallPtrSet<Instruction *, 4> &DependingInstructions,
+                      SmallPtrSet<const BasicBlock *, 4> &Visited,
+                      ProvenanceAnalysis &PA);
+
+bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+        ProvenanceAnalysis &PA);
+
+/// Test whether the given instruction can "use" the given pointer's object in a
+/// way that requires the reference count to be positive.
+bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+       InstructionClass Class);
+
+/// Test whether the given instruction can result in a reference count
+/// modification (positive or negative) for the pointer's object.
+bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                 ProvenanceAnalysis &PA, InstructionClass Class);
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
diff --git a/lib/Transforms/ObjCARC/LLVMBuild.txt b/lib/Transforms/ObjCARC/LLVMBuild.txt
new file mode 100644
index 000000000000..90a233851a3c
--- /dev/null
+++ b/lib/Transforms/ObjCARC/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/ObjCARC/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ObjCARC
+parent = Transforms
+library_name = ObjCARCOpts
+required_libraries = Analysis Core Support TransformUtils
diff --git a/lib/Transforms/ObjCARC/Makefile b/lib/Transforms/ObjCARC/Makefile
new file mode 100644
index 000000000000..2a34e21714f1
--- /dev/null
+++ b/lib/Transforms/ObjCARC/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/ObjCARC/Makefile ---------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMObjCARCOpts
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
new file mode 100644
index 000000000000..53a31b0de178
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -0,0 +1,48 @@
+//===-- ObjCARC.cpp -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  class PassRegistry;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+                       cl::location(EnableARCOpts),
+                       cl::init(true));
+
+/// initializeObjCARCOptsPasses - Initialize all passes linked into the
+/// ObjCARCOpts library.
+void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
+  initializeObjCARCAliasAnalysisPass(Registry);
+  initializeObjCARCAPElimPass(Registry);
+  initializeObjCARCExpandPass(Registry);
+  initializeObjCARCContractPass(Registry);
+  initializeObjCARCOptPass(Registry);
+}
+
+void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R) {
+  initializeObjCARCOpts(*unwrap(R));
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
new file mode 100644
index 000000000000..39670f339e9f
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -0,0 +1,395 @@
+//===- ObjCARC.h - ObjC ARC Optimization --------------*- mode: c++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines common definitions/declarations used by the ObjC ARC
+/// Optimizer. ARC stands for Automatic Reference Counting and is a system for
+/// managing reference counts for objects in Objective C.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+#define LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/ObjCARC.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+extern bool EnableARCOpts;
+
+/// \brief Test if the given module looks interesting to run ARC optimization
+/// on.
+static inline bool ModuleHasARC(const Module &M) {
+  return
+    M.getNamedValue("objc_retain") ||
+    M.getNamedValue("objc_release") ||
+    M.getNamedValue("objc_autorelease") ||
+    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+    M.getNamedValue("objc_retainBlock") ||
+    M.getNamedValue("objc_autoreleaseReturnValue") ||
+    M.getNamedValue("objc_autoreleasePoolPush") ||
+    M.getNamedValue("objc_loadWeakRetained") ||
+    M.getNamedValue("objc_loadWeak") ||
+    M.getNamedValue("objc_destroyWeak") ||
+    M.getNamedValue("objc_storeWeak") ||
+    M.getNamedValue("objc_initWeak") ||
+    M.getNamedValue("objc_moveWeak") ||
+    M.getNamedValue("objc_copyWeak") ||
+    M.getNamedValue("objc_retainedObject") ||
+    M.getNamedValue("objc_unretainedObject") ||
+    M.getNamedValue("objc_unretainedPointer") ||
+    M.getNamedValue("clang.arc.use");
+}
+
+/// \enum InstructionClass
+/// \brief A simple classification for instructions.
+enum InstructionClass {
+  IC_Retain,              ///< objc_retain
+  IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
+  IC_RetainBlock,         ///< objc_retainBlock
+  IC_Release,             ///< objc_release
+  IC_Autorelease,         ///< objc_autorelease
+  IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
+  IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+  IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
+  IC_NoopCast,            ///< objc_retainedObject, etc.
+  IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+  IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+  IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
+  IC_StoreWeak,           ///< objc_storeWeak (primitive)
+  IC_InitWeak,            ///< objc_initWeak (derived)
+  IC_LoadWeak,            ///< objc_loadWeak (derived)
+  IC_MoveWeak,            ///< objc_moveWeak (derived)
+  IC_CopyWeak,            ///< objc_copyWeak (derived)
+  IC_DestroyWeak,         ///< objc_destroyWeak (derived)
+  IC_StoreStrong,         ///< objc_storeStrong (derived)
+  IC_IntrinsicUser,       ///< clang.arc.use
+  IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
+  IC_Call,                ///< could call objc_release
+  IC_User,                ///< could "use" a pointer
+  IC_None                 ///< anything else
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
+
+/// \brief Test if the given class is a kind of user.
+inline static bool IsUser(InstructionClass Class) {
+  return Class == IC_User ||
+         Class == IC_CallOrUser ||
+         Class == IC_IntrinsicUser;
+}
+
+/// \brief Test if the given class is objc_retain or equivalent.
+static inline bool IsRetain(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV;
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+static inline bool IsAutorelease(InstructionClass Class) {
+  return Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+static inline bool IsForwarding(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_NoopCast;
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+static inline bool IsNoopOnNull(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock;
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+static inline bool IsAlwaysTail(InstructionClass Class) {
+  // IC_RetainBlock may be given a stack argument.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+static inline bool IsNeverTail(InstructionClass Class) {
+  /// It is never safe to tail call objc_autorelease since by tail calling
+  /// objc_autorelease, we also tail call -[NSObject autorelease] which supports
+  /// fast autoreleasing causing our object to be potentially reclaimed from the
+  /// autorelease pool which violates the semantics of __autoreleasing types in
+  /// ARC.
+  return Class == IC_Autorelease;
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+static inline bool IsNoThrow(InstructionClass Class) {
+  // objc_retainBlock is not nounwind because it calls user copy constructors
+  // which could theoretically throw.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_AutoreleasepoolPush ||
+         Class == IC_AutoreleasepoolPop;
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+static inline bool
+CanInterruptRV(InstructionClass Class) {
+  switch (Class) {
+  case IC_AutoreleasepoolPop:
+  case IC_CallOrUser:
+  case IC_Call:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// \brief Determine if F is one of the special known Functions.  If it isn't,
+/// return IC_CallOrUser.
+InstructionClass GetFunctionClass(const Function *F);
+
+/// \brief Determine which objc runtime call instruction class V belongs to.
+///
+/// This is similar to GetInstructionClass except that it only detects objc
+/// runtime calls. This allows it to be faster.
+///
+static inline InstructionClass GetBasicInstructionClass(const Value *V) {
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (const Function *F = CI->getCalledFunction())
+      return GetFunctionClass(F);
+    // Otherwise, be conservative.
+    return IC_CallOrUser;
+  }
+
+  // Otherwise, be conservative.
+  return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
+}
+
+/// \brief Determine what kind of construct V is.
+InstructionClass GetInstructionClass(const Value *V);
+
+/// \brief This is a wrapper around getUnderlyingObject which also knows how to
+/// look through objc_retain and objc_autorelease calls, which we know to return
+/// their argument verbatim.
+static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
+  for (;;) {
+    V = GetUnderlyingObject(V);
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+
+  return V;
+}
+
+/// \brief This is a wrapper around Value::stripPointerCasts which also knows
+/// how to look through objc_retain and objc_autorelease calls, which we know to
+/// return their argument verbatim.
+static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// \brief This is a wrapper around Value::stripPointerCasts which also knows
+/// how to look through objc_retain and objc_autorelease calls, which we know to
+/// return their argument verbatim.
+static inline Value *StripPointerCastsAndObjCCalls(Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// \brief Assuming the given instruction is one of the special calls such as
+/// objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static inline Value *GetObjCArg(Value *Inst) {
+  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+static inline bool IsNullOrUndef(const Value *V) {
+  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static inline bool IsNoopInstruction(const Instruction *I) {
+  return isa<BitCastInst>(I) ||
+    (isa<GetElementPtrInst>(I) &&
+     cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+
+/// \brief Erase the given instruction.
+///
+/// Many ObjC calls return their argument verbatim,
+/// so if it's such a call and the return value has users, replace them with the
+/// argument value.
+///
+static inline void EraseInstruction(Instruction *CI) {
+  Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+  bool Unused = CI->use_empty();
+
+  if (!Unused) {
+    // Replace the return value with the argument.
+    assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+           "Can't delete non-forwarding instruction with users!");
+    CI->replaceAllUsesWith(OldArg);
+  }
+
+  CI->eraseFromParent();
+
+  if (Unused)
+    RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// \brief Test whether the given value is possible a retainable object pointer.
+static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
+  // Pointers to static or stack storage are not valid retainable object
+  // pointers.
+  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+    return false;
+  // Special arguments can not be a valid retainable object pointer.
+  if (const Argument *Arg = dyn_cast<Argument>(Op))
+    if (Arg->hasByValAttr() ||
+        Arg->hasNestAttr() ||
+        Arg->hasStructRetAttr())
+      return false;
+  // Only consider values with pointer types.
+  //
+  // It seemes intuitive to exclude function pointer types as well, since
+  // functions are never retainable object pointers, however clang occasionally
+  // bitcasts retainable object pointers to function-pointer type temporarily.
+  PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+  if (!Ty)
+    return false;
+  // Conservatively assume anything else is a potential retainable object
+  // pointer.
+  return true;
+}
+
+static inline bool IsPotentialRetainableObjPtr(const Value *Op,
+                                               AliasAnalysis &AA) {
+  // First make the rudimentary check.
+  if (!IsPotentialRetainableObjPtr(Op))
+    return false;
+
+  // Objects in constant memory are not reference-counted.
+  if (AA.pointsToConstantMemory(Op))
+    return false;
+
+  // Pointers in constant memory are not pointing to reference-counted objects.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+    if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+      return false;
+
+  // Otherwise assume the worst.
+  return true;
+}
+
+/// \brief Helper for GetInstructionClass. Determines what kind of construct CS
+/// is.
+static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I)
+    if (IsPotentialRetainableObjPtr(*I))
+      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+  return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// \brief Return true if this value refers to a distinct and identifiable
+/// object.
+///
+/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
+/// special knowledge of ObjC conventions.
+static inline bool IsObjCIdentifiedObject(const Value *V) {
+  // Assume that call results and arguments have their own "provenance".
+  // Constants (including GlobalVariables) and Allocas are never
+  // reference-counted.
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+      isa<Argument>(V) || isa<Constant>(V) ||
+      isa<AllocaInst>(V))
+    return true;
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    const Value *Pointer =
+      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+      // A constant pointer can't be pointing to an object on the heap. It may
+      // be reference-counted, but it won't be deleted.
+      if (GV->isConstant())
+        return true;
+      StringRef Name = GV->getName();
+      // These special variables are known to hold values which are not
+      // reference-counted pointers.
+      if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+          Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+          Name.startswith("\01l_objc_msgSend_fixup_"))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_OBJCARC_H
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
new file mode 100644
index 000000000000..00d9864953dc
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -0,0 +1,175 @@
+//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file implements optimizations which remove extraneous
+/// autorelease pools.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-ap-elim"
+#include "ObjCARC.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+  /// \brief Autorelease pool elimination.
+  class ObjCARCAPElim : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnModule(Module &M);
+
+    static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
+    static bool OptimizeBB(BasicBlock *BB);
+
+  public:
+    static char ID;
+    ObjCARCAPElim() : ModulePass(ID) {
+      initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim,
+                "objc-arc-apelim",
+                "ObjC ARC autorelease pool elimination",
+                false, false)
+
+Pass *llvm::createObjCARCAPElimPass() {
+  return new ObjCARCAPElim();
+}
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+/// Interprocedurally determine if calls made by the given call site can
+/// possibly produce autoreleases.
+bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
+  if (const Function *Callee = CS.getCalledFunction()) {
+    if (Callee->isDeclaration() || Callee->mayBeOverridden())
+      return true;
+    for (Function::const_iterator I = Callee->begin(), E = Callee->end();
+         I != E; ++I) {
+      const BasicBlock *BB = I;
+      for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
+           J != F; ++J)
+        if (ImmutableCallSite JCS = ImmutableCallSite(J))
+          // This recursion depth limit is arbitrary. It's just great
+          // enough to cover known interesting testcases.
+          if (Depth < 3 &&
+              !JCS.onlyReadsMemory() &&
+              MayAutorelease(JCS, Depth + 1))
+            return true;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+  bool Changed = false;
+
+  Instruction *Push = 0;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPush:
+      Push = Inst;
+      break;
+    case IC_AutoreleasepoolPop:
+      // If this pop matches a push and nothing in between can autorelease,
+      // zap the pair.
+      if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+        Changed = true;
+        DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
+                        "autorelease pair:\n"
+                        "                           Pop: " << *Inst << "\n"
+                     << "                           Push: " << *Push << "\n");
+        Inst->eraseFromParent();
+        Push->eraseFromParent();
+      }
+      Push = 0;
+      break;
+    case IC_CallOrUser:
+      if (MayAutorelease(ImmutableCallSite(Inst)))
+        Push = 0;
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!ModuleHasARC(M))
+    return false;
+
+  // Find the llvm.global_ctors variable, as the first step in
+  // identifying the global constructors. In theory, unnecessary autorelease
+  // pools could occur anywhere, but in practice it's pretty rare. Global
+  // ctors are a place where autorelease pools get inserted automatically,
+  // so it's pretty common for them to be unnecessary, and it's pretty
+  // profitable to eliminate them.
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return false;
+
+  assert(GV->hasDefinitiveInitializer() &&
+         "llvm.global_ctors is uncooperative!");
+
+  bool Changed = false;
+
+  // Dig the constructor functions out of GV's initializer.
+  ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+  for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
+       OI != OE; ++OI) {
+    Value *Op = *OI;
+    // llvm.global_ctors is an array of pairs where the second members
+    // are constructor functions.
+    Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+    // If the user used a constructor function with the wrong signature and
+    // it got bitcasted or whatever, look the other way.
+    if (!F)
+      continue;
+    // Only look at function definitions.
+    if (F->isDeclaration())
+      continue;
+    // Only look at functions with one basic block.
+    if (llvm::next(F->begin()) != F->end())
+      continue;
+    // Ok, a single-block constructor function definition. Try to optimize it.
+    Changed |= OptimizeBB(F->begin());
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
new file mode 100644
index 000000000000..46b2de713745
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -0,0 +1,162 @@
+//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -*- mode: c++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-aa"
+#include "ObjCARC.h"
+#include "ObjCARCAliasAnalysis.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassSupport.h"
+
+namespace llvm {
+  class Function;
+  class Value;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+                   "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+  return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+  // precise alias query.
+  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+  AliasResult Result =
+    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+                         Location(SB, LocB.Size, LocB.TBAATag));
+  if (Result != MayAlias)
+    return Result;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *UA = GetUnderlyingObjCPtr(SA);
+  const Value *UB = GetUnderlyingObjCPtr(SB);
+  if (UA != SA || UB != SB) {
+    Result = AliasAnalysis::alias(Location(UA), Location(UB));
+    // We can't use MustAlias or PartialAlias results here because
+    // GetUnderlyingObjCPtr may return an offsetted pointer value.
+    if (Result == NoAlias)
+      return NoAlias;
+  }
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                             bool OrLocal) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making
+  // a precise alias query.
+  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+                                            OrLocal))
+    return true;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *U = GetUnderlyingObjCPtr(S);
+  if (U != S)
+    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  // We have nothing to do. Just chain to the next AliasAnalysis.
+  return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefBehavior(F);
+
+  switch (GetFunctionClass(F)) {
+  case IC_NoopCast:
+    return DoesNotAccessMemory;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  switch (GetBasicInstructionClass(CS.getInstruction())) {
+  case IC_Retain:
+  case IC_RetainRV:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_NoopCast:
+  case IC_AutoreleasepoolPush:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    // These functions don't access any memory visible to the compiler.
+    // Note that this doesn't include objc_retainBlock, because it updates
+    // pointers when it copies block data.
+    return NoModRef;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                    ImmutableCallSite CS2) {
+  // TODO: Theoretically we could check for dependencies between objc_* calls
+  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
new file mode 100644
index 000000000000..7abe995a5ce7
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -0,0 +1,74 @@
+//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- mode: c++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+namespace objcarc {
+
+  /// \brief This is a simple alias analysis implementation that uses knowledge
+  /// of ARC constructs to answer queries.
+  ///
+  /// TODO: This class could be generalized to know about other ObjC-specific
+  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+  /// even though their offsets are dynamic.
+  class ObjCARCAliasAnalysis : public ImmutablePass,
+                               public AliasAnalysis {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    /// This method is used when a pass implements an analysis interface through
+    /// multiple inheritance.  If needed, it should override this to adjust the
+    /// this pointer as needed for the specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return static_cast<AliasAnalysis *>(this);
+      return this;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
+  };
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
new file mode 100644
index 000000000000..b96c64fe81de
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -0,0 +1,541 @@
+//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines late ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file mainly deals with ``contracting'' multiple lower level
+/// operations into singular higher level operations through pattern matching.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#define DEBUG_TYPE "objc-arc-contract"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+  /// \brief Late ARC optimizations
+  ///
+  /// These change the IR in a way that makes it difficult to be analyzed by
+  /// ObjCARCOpt, so it's run late.
+  class ObjCARCContract : public FunctionPass {
+    bool Changed;
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    ProvenanceAnalysis PA;
+
+    /// A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// Declarations for ObjC runtime functions, for use in creating calls to
+    /// them. These are initialized lazily to avoid cluttering up the Module
+    /// with unused declarations.
+
+    /// Declaration for objc_storeStrong().
+    Constant *StoreStrongCallee;
+    /// Declaration for objc_retainAutorelease().
+    Constant *RetainAutoreleaseCallee;
+    /// Declaration for objc_retainAutoreleaseReturnValue().
+    Constant *RetainAutoreleaseRVCallee;
+
+    /// The inline asm string to insert between calls and RetainRV calls to make
+    /// the optimization work on targets which need it.
+    const MDString *RetainRVMarker;
+
+    /// The set of inserted objc_storeStrong calls. If at the end of walking the
+    /// function we have found no alloca instructions, these calls can be marked
+    /// "tail".
+    SmallPtrSet<CallInst *, 8> StoreStrongCalls;
+
+    Constant *getStoreStrongCallee(Module *M);
+    Constant *getRetainAutoreleaseCallee(Module *M);
+    Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+    bool ContractAutorelease(Function &F, Instruction *Autorelease,
+                             InstructionClass Class,
+                             SmallPtrSet<Instruction *, 4>
+                               &DependingInstructions,
+                             SmallPtrSet<const BasicBlock *, 4>
+                               &Visited);
+
+    void ContractRelease(Instruction *Release,
+                         inst_iterator &Iter);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+  public:
+    static char ID;
+    ObjCARCContract() : FunctionPass(ID) {
+      initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+                      "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+                    "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+  return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<DominatorTree>();
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+  if (!StoreStrongCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *I8XX = PointerType::getUnqual(I8X);
+    Type *Params[] = { I8XX, I8X };
+
+    AttributeSet Attr = AttributeSet()
+      .addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                    Attribute::NoUnwind)
+      .addAttribute(M->getContext(), 1, Attribute::NoCapture);
+
+    StoreStrongCallee =
+      M->getOrInsertFunction(
+        "objc_storeStrong",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attr);
+  }
+  return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+  if (!RetainAutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    RetainAutoreleaseCallee =
+      M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute);
+  }
+  return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+  if (!RetainAutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    RetainAutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+                             Attribute);
+  }
+  return RetainAutoreleaseRVCallee;
+}
+
+/// Merge an autorelease with a retain into a fused call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+                                     InstructionClass Class,
+                                     SmallPtrSet<Instruction *, 4>
+                                       &DependingInstructions,
+                                     SmallPtrSet<const BasicBlock *, 4>
+                                       &Visited) {
+  const Value *Arg = GetObjCArg(Autorelease);
+
+  // Check that there are no instructions between the retain and the autorelease
+  // (such as an autorelease_pop) which may change the count.
+  CallInst *Retain = 0;
+  if (Class == IC_AutoreleaseRV)
+    FindDependencies(RetainAutoreleaseRVDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+  else
+    FindDependencies(RetainAutoreleaseDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+
+  Visited.clear();
+  if (DependingInstructions.size() != 1) {
+    DependingInstructions.clear();
+    return false;
+  }
+
+  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+  DependingInstructions.clear();
+
+  if (!Retain ||
+      GetBasicInstructionClass(Retain) != IC_Retain ||
+      GetObjCArg(Retain) != Arg)
+    return false;
+
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
+                  "retain/autorelease. Erasing: " << *Autorelease << "\n"
+                  "                                      Old Retain: "
+               << *Retain << "\n");
+
+  if (Class == IC_AutoreleaseRV)
+    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+  else
+    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+  DEBUG(dbgs() << "                                      New Retain: "
+               << *Retain << "\n");
+
+  EraseInstruction(Autorelease);
+  return true;
+}
+
+/// Attempt to merge an objc_release with a store, load, and objc_retain to form
+/// an objc_storeStrong. This can be a little tricky because the instructions
+/// don't always appear in order, and there may be unrelated intervening
+/// instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+                                      inst_iterator &Iter) {
+  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+  if (!Load || !Load->isSimple()) return;
+
+  // For now, require everything to be in one basic block.
+  BasicBlock *BB = Release->getParent();
+  if (Load->getParent() != BB) return;
+
+  // Walk down to find the store and the release, which may be in either order.
+  BasicBlock::iterator I = Load, End = BB->end();
+  ++I;
+  AliasAnalysis::Location Loc = AA->getLocation(Load);
+  StoreInst *Store = 0;
+  bool SawRelease = false;
+  for (; !Store || !SawRelease; ++I) {
+    if (I == End)
+      return;
+
+    Instruction *Inst = I;
+    if (Inst == Release) {
+      SawRelease = true;
+      continue;
+    }
+
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+
+    // Unrelated retains are harmless.
+    if (IsRetain(Class))
+      continue;
+
+    if (Store) {
+      // The store is the point where we're going to put the objc_storeStrong,
+      // so make sure there are no uses after it.
+      if (CanUse(Inst, Load, PA, Class))
+        return;
+    } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
+      // We are moving the load down to the store, so check for anything
+      // else which writes to the memory between the load and the store.
+      Store = dyn_cast<StoreInst>(Inst);
+      if (!Store || !Store->isSimple()) return;
+      if (Store->getPointerOperand() != Loc.Ptr) return;
+    }
+  }
+
+  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+  // Walk up to find the retain.
+  I = Store;
+  BasicBlock::iterator Begin = BB->begin();
+  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+    --I;
+  Instruction *Retain = I;
+  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+  if (GetObjCArg(Retain) != New) return;
+
+  Changed = true;
+  ++NumStoreStrongs;
+
+  LLVMContext &C = Release->getContext();
+  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  Type *I8XX = PointerType::getUnqual(I8X);
+
+  Value *Args[] = { Load->getPointerOperand(), New };
+  if (Args[0]->getType() != I8XX)
+    Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+  if (Args[1]->getType() != I8X)
+    Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+  CallInst *StoreStrong =
+    CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+                     Args, "", Store);
+  StoreStrong->setDoesNotThrow();
+  StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+  // We can't set the tail flag yet, because we haven't yet determined
+  // whether there are any escaping allocas. Remember this call, so that
+  // we can set the tail flag once we know it's safe.
+  StoreStrongCalls.insert(StoreStrong);
+
+  if (&*Iter == Store) ++Iter;
+  Store->eraseFromParent();
+  Release->eraseFromParent();
+  EraseInstruction(Retain);
+  if (Load->use_empty())
+    Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+  // If nothing in the Module uses ARC, don't do anything.
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // These are initialized lazily.
+  StoreStrongCallee = 0;
+  RetainAutoreleaseCallee = 0;
+  RetainAutoreleaseRVCallee = 0;
+
+  // Initialize RetainRVMarker.
+  RetainRVMarker = 0;
+  if (NamedMDNode *NMD =
+        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+    if (NMD->getNumOperands() == 1) {
+      const MDNode *N = NMD->getOperand(0);
+      if (N->getNumOperands() == 1)
+        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+          RetainRVMarker = S;
+    }
+
+  return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+  // keyword. Be conservative if the function has variadic arguments.
+  // It seems that functions which "return twice" are also unsafe for the
+  // "tail" argument, because they are setjmp, which could need to
+  // return to an earlier stack state.
+  bool TailOkForStoreStrongs = !F.isVarArg() &&
+                               !F.callsFunctionThatReturnsTwice();
+
+  // For ObjC library calls which return their argument, replace uses of the
+  // argument with uses of the call return value, if it dominates the use. This
+  // reduces register pressure.
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");
+
+    // Only these library routines return their argument. In particular,
+    // objc_retainBlock does not necessarily return its argument.
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      break;
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+        continue;
+      break;
+    case IC_RetainRV: {
+      // If we're compiling for a target which needs a special inline-asm
+      // marker to do the retainAutoreleasedReturnValue optimization,
+      // insert it now.
+      if (!RetainRVMarker)
+        break;
+      BasicBlock::iterator BBI = Inst;
+      BasicBlock *InstParent = Inst->getParent();
+
+      // Step up to see if the call immediately precedes the RetainRV call.
+      // If it's an invoke, we have to cross a block boundary. And we have
+      // to carefully dodge no-op instructions.
+      do {
+        if (&*BBI == InstParent->begin()) {
+          BasicBlock *Pred = InstParent->getSinglePredecessor();
+          if (!Pred)
+            goto decline_rv_optimization;
+          BBI = Pred->getTerminator();
+          break;
+        }
+        --BBI;
+      } while (IsNoopInstruction(BBI));
+
+      if (&*BBI == GetObjCArg(Inst)) {
+        DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
+                        "retainAutoreleasedReturnValue optimization.\n");
+        Changed = true;
+        InlineAsm *IA =
+          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+                                           /*isVarArg=*/false),
+                         RetainRVMarker->getString(),
+                         /*Constraints=*/"", /*hasSideEffects=*/true);
+        CallInst::Create(IA, "", Inst);
+      }
+    decline_rv_optimization:
+      break;
+    }
+    case IC_InitWeak: {
+      // objc_initWeak(p, null) => *p = null
+      CallInst *CI = cast<CallInst>(Inst);
+      if (IsNullOrUndef(CI->getArgOperand(1))) {
+        Value *Null =
+          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+        Changed = true;
+        new StoreInst(Null, CI->getArgOperand(0), CI);
+
+        DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
+                     << "                 New = " << *Null << "\n");
+
+        CI->replaceAllUsesWith(Null);
+        CI->eraseFromParent();
+      }
+      continue;
+    }
+    case IC_Release:
+      ContractRelease(Inst, I);
+      continue;
+    case IC_User:
+      // Be conservative if the function has any alloca instructions.
+      // Technically we only care about escaping alloca instructions,
+      // but this is sufficient to handle some interesting cases.
+      if (isa<AllocaInst>(Inst))
+        TailOkForStoreStrongs = false;
+      continue;
+    case IC_IntrinsicUser:
+      // Remove calls to @clang.arc.use(...).
+      Inst->eraseFromParent();
+      continue;
+    default:
+      continue;
+    }
+
+    DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");
+
+    // Don't use GetObjCArg because we don't want to look through bitcasts
+    // and such; to do the replacement, the argument must have type i8*.
+    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+    for (;;) {
+      // If we're compiling bugpointed code, don't get in trouble.
+      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+        break;
+      // Look through the uses of the pointer.
+      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+           UI != UE; ) {
+        Use &U = UI.getUse();
+        unsigned OperandNo = UI.getOperandNo();
+        ++UI; // Increment UI now, because we may unlink its element.
+
+        // If the call's return value dominates a use of the call's argument
+        // value, rewrite the use to use the return value. We check for
+        // reachability here because an unreachable call is considered to
+        // trivially dominate itself, which would lead us to rewriting its
+        // argument in terms of its return value, which would lead to
+        // infinite loops in GetObjCArg.
+        if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
+          Changed = true;
+          Instruction *Replacement = Inst;
+          Type *UseTy = U.get()->getType();
+          if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
+            // For PHI nodes, insert the bitcast in the predecessor block.
+            unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+            BasicBlock *BB = PHI->getIncomingBlock(ValNo);
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            &BB->back());
+            // While we're here, rewrite all edges for this PHI, rather
+            // than just one use at a time, to minimize the number of
+            // bitcasts we emit.
+            for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+              if (PHI->getIncomingBlock(i) == BB) {
+                // Keep the UI iterator valid.
+                if (&PHI->getOperandUse(
+                      PHINode::getOperandNumForIncomingValue(i)) ==
+                    &UI.getUse())
+                  ++UI;
+                PHI->setIncomingValue(i, Replacement);
+              }
+          } else {
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            cast<Instruction>(U.getUser()));
+            U.set(Replacement);
+          }
+        }
+      }
+
+      // If Arg is a no-op casted pointer, strip one level of casts and iterate.
+      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+        Arg = BI->getOperand(0);
+      else if (isa<GEPOperator>(Arg) &&
+               cast<GEPOperator>(Arg)->hasAllZeroIndices())
+        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+      else if (isa<GlobalAlias>(Arg) &&
+               !cast<GlobalAlias>(Arg)->mayBeOverridden())
+        Arg = cast<GlobalAlias>(Arg)->getAliasee();
+      else
+        break;
+    }
+  }
+
+  // If this function has no escaping allocas or suspicious vararg usage,
+  // objc_storeStrong calls can be marked with the "tail" keyword.
+  if (TailOkForStoreStrongs)
+    for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
+         E = StoreStrongCalls.end(); I != E; ++I)
+      (*I)->setTailCall();
+  StoreStrongCalls.clear();
+
+  return Changed;
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
new file mode 100644
index 000000000000..39bf8f38735b
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -0,0 +1,128 @@
+//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file deals with early optimizations which perform certain
+/// cleanup operations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-expand"
+
+#include "ObjCARC.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  class Module;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+  /// \brief Early ARC transformations.
+  class ObjCARCExpand : public FunctionPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    /// A flag indicating whether this optimization pass should run.
+    bool Run;
+
+  public:
+    static char ID;
+    ObjCARCExpand() : FunctionPass(ID) {
+      initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+                "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+  return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  bool Changed = false;
+
+  DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n");
+
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
+
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_Retain:
+    case IC_RetainRV:
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV: {
+      // These calls return their argument verbatim, as a low-level
+      // optimization. However, this makes high-level optimizations
+      // harder. Undo any uses of this optimization that the front-end
+      // emitted here. We'll redo them in the contract pass.
+      Changed = true;
+      Value *Value = cast<CallInst>(Inst)->getArgOperand(0);
+      DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n"
+                      "               New = " << *Value << "\n");
+      Inst->replaceAllUsesWith(Value);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n");
+
+  return Changed;
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
new file mode 100644
index 000000000000..92d6fc4767c2
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -0,0 +1,3026 @@
+//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// The optimizations performed include elimination of redundant, partially
+/// redundant, and inconsequential reference count operations, elimination of
+/// redundant weak pointer operations, and numerous minor simplifications.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-opts"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ObjCARCAliasAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
+/// @{
+
+namespace {
+  /// \brief An associative container with fast insertion-order (deterministic)
+  /// iteration over its elements. Plus the special blot operation.
+  template<class KeyT, class ValueT>
+  class MapVector {
+    /// Map keys to indices in Vector.
+    typedef DenseMap<KeyT, size_t> MapTy;
+    MapTy Map;
+
+    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+    /// Keys and values.
+    VectorTy Vector;
+
+  public:
+    typedef typename VectorTy::iterator iterator;
+    typedef typename VectorTy::const_iterator const_iterator;
+    iterator begin() { return Vector.begin(); }
+    iterator end() { return Vector.end(); }
+    const_iterator begin() const { return Vector.begin(); }
+    const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+    ~MapVector() {
+      assert(Vector.size() >= Map.size()); // May differ due to blotting.
+      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+           I != E; ++I) {
+        assert(I->second < Vector.size());
+        assert(Vector[I->second].first == I->first);
+      }
+      for (typename VectorTy::const_iterator I = Vector.begin(),
+           E = Vector.end(); I != E; ++I)
+        assert(!I->first ||
+               (Map.count(I->first) &&
+                Map[I->first] == size_t(I - Vector.begin())));
+    }
+#endif
+
+    ValueT &operator[](const KeyT &Arg) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(Arg, size_t(0)));
+      if (Pair.second) {
+        size_t Num = Vector.size();
+        Pair.first->second = Num;
+        Vector.push_back(std::make_pair(Arg, ValueT()));
+        return Vector[Num].second;
+      }
+      return Vector[Pair.first->second].second;
+    }
+
+    std::pair<iterator, bool>
+    insert(const std::pair<KeyT, ValueT> &InsertPair) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+      if (Pair.second) {
+        size_t Num = Vector.size();
+        Pair.first->second = Num;
+        Vector.push_back(InsertPair);
+        return std::make_pair(Vector.begin() + Num, true);
+      }
+      return std::make_pair(Vector.begin() + Pair.first->second, false);
+    }
+
+    const_iterator find(const KeyT &Key) const {
+      typename MapTy::const_iterator It = Map.find(Key);
+      if (It == Map.end()) return Vector.end();
+      return Vector.begin() + It->second;
+    }
+
+    /// This is similar to erase, but instead of removing the element from the
+    /// vector, it just zeros out the key in the vector. This leaves iterators
+    /// intact, but clients must be prepared for zeroed-out keys when iterating.
+    void blot(const KeyT &Key) {
+      typename MapTy::iterator It = Map.find(Key);
+      if (It == Map.end()) return;
+      Vector[It->second].first = KeyT();
+      Map.erase(It);
+    }
+
+    void clear() {
+      Map.clear();
+      Vector.clear();
+    }
+  };
+}
+
+/// @}
+///
+/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
+/// @{
+
+/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon
+/// as it finds a value with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+  if (Arg->hasOneUse()) {
+    if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+      return FindSingleUseIdentifiedObject(BC->getOperand(0));
+    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+      if (GEP->hasAllZeroIndices())
+        return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+    if (IsForwarding(GetBasicInstructionClass(Arg)))
+      return FindSingleUseIdentifiedObject(
+               cast<CallInst>(Arg)->getArgOperand(0));
+    if (!IsObjCIdentifiedObject(Arg))
+      return 0;
+    return Arg;
+  }
+
+  // If we found an identifiable object but it has multiple uses, but they are
+  // trivial uses, we can still consider this to be a single-use value.
+  if (IsObjCIdentifiedObject(Arg)) {
+    for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+         UI != UE; ++UI) {
+      const User *U = *UI;
+      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+         return 0;
+    }
+
+    return Arg;
+  }
+
+  return 0;
+}
+
+/// \brief Test whether the given retainable object pointer escapes.
+///
+/// This differs from regular escape analysis in that a use as an
+/// argument to a call is not considered an escape.
+///
+static bool DoesRetainableObjPtrEscape(const User *Ptr) {
+  DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
+
+  // Walk the def-use chains.
+  SmallVector<const Value *, 4> Worklist;
+  Worklist.push_back(Ptr);
+  // If Ptr has any operands add them as well.
+  for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
+       ++I) {
+    Worklist.push_back(*I);
+  }
+
+  // Ensure we do not visit any value twice.
+  SmallPtrSet<const Value *, 8> VisitedSet;
+
+  do {
+    const Value *V = Worklist.pop_back_val();
+
+    DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      const User *UUser = *UI;
+
+      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+
+      // Special - Use by a call (callee or argument) is not considered
+      // to be an escape.
+      switch (GetBasicInstructionClass(UUser)) {
+      case IC_StoreWeak:
+      case IC_InitWeak:
+      case IC_StoreStrong:
+      case IC_Autorelease:
+      case IC_AutoreleaseRV: {
+        DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
+              "arguments. Pointer Escapes!\n");
+        // These special functions make copies of their pointer arguments.
+        return true;
+      }
+      case IC_IntrinsicUser:
+        // Use by the use intrinsic is not an escape.
+        continue;
+      case IC_User:
+      case IC_None:
+        // Use by an instruction which copies the value is an escape if the
+        // result is an escape.
+        if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+            isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+
+          if (VisitedSet.insert(UUser)) {
+            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
+                  "Ptr escapes if result escapes. Adding to list.\n");
+            Worklist.push_back(UUser);
+          } else {
+            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
+                  "\n");
+          }
+          continue;
+        }
+        // Use by a load is not an escape.
+        if (isa<LoadInst>(UUser))
+          continue;
+        // Use by a store is not an escape if the use is the address.
+        if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+          if (V != SI->getValueOperand())
+            continue;
+        break;
+      default:
+        // Regular calls and other stuff are not considered escapes.
+        continue;
+      }
+      // Otherwise, conservatively assume an escape.
+      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+      return true;
+    }
+  } while (!Worklist.empty());
+
+  // No escapes found.
+  DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+  return false;
+}
+
+/// @}
+///
+/// \defgroup ARCOpt ARC Optimization.
+/// @{
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
+
+STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets,        "Number of return value forwarding "
+                          "retain+autoreleaes eliminated");
+STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+
+namespace {
+  /// \enum Sequence
+  ///
+  /// \brief A sequence of states that a pointer may go through in which an
+  /// objc_retain and objc_release are actually needed.
+  enum Sequence {
+    S_None,
+    S_Retain,         ///< objc_retain(x).
+    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement.
+    S_Use,            ///< any use of x.
+    S_Stop,           ///< like S_Release, but code motion is stopped.
+    S_Release,        ///< objc_release(x).
+    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release.
+  };
+
+  raw_ostream &operator<<(raw_ostream &OS, const Sequence S)
+    LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<<(raw_ostream &OS, const Sequence S) {
+    switch (S) {
+    case S_None:
+      return OS << "S_None";
+    case S_Retain:
+      return OS << "S_Retain";
+    case S_CanRelease:
+      return OS << "S_CanRelease";
+    case S_Use:
+      return OS << "S_Use";
+    case S_Release:
+      return OS << "S_Release";
+    case S_MovableRelease:
+      return OS << "S_MovableRelease";
+    case S_Stop:
+      return OS << "S_Stop";
+    }
+    llvm_unreachable("Unknown sequence type.");
+  }
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+  // The easy cases.
+  if (A == B)
+    return A;
+  if (A == S_None || B == S_None)
+    return S_None;
+
+  if (A > B) std::swap(A, B);
+  if (TopDown) {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Retain || A == S_CanRelease) &&
+        (B == S_CanRelease || B == S_Use))
+      return B;
+  } else {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Use || A == S_CanRelease) &&
+        (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
+      return A;
+    // If both sides are releases, choose the more conservative one.
+    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+      return A;
+    if (A == S_Release && B == S_MovableRelease)
+      return A;
+  }
+
+  return S_None;
+}
+
+namespace {
+  /// \brief Unidirectional information about either a
+  /// retain-decrement-use-release sequence or release-use-decrement-retain
+  /// reverese sequence.
+  struct RRInfo {
+    /// After an objc_retain, the reference count of the referenced
+    /// object is known to be positive. Similarly, before an objc_release, the
+    /// reference count of the referenced object is known to be positive. If
+    /// there are retain-release pairs in code regions where the retain count
+    /// is known to be positive, they can be eliminated, regardless of any side
+    /// effects between them.
+    ///
+    /// Also, a retain+release pair nested within another retain+release
+    /// pair all on the known same pointer value can be eliminated, regardless
+    /// of any intervening side effects.
+    ///
+    /// KnownSafe is true when either of these conditions is satisfied.
+    bool KnownSafe;
+
+    /// True of the objc_release calls are all marked with the "tail" keyword.
+    bool IsTailCallRelease;
+
+    /// If the Calls are objc_release calls and they all have a
+    /// clang.imprecise_release tag, this is the metadata tag.
+    MDNode *ReleaseMetadata;
+
+    /// For a top-down sequence, the set of objc_retains or
+    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+    SmallPtrSet<Instruction *, 2> Calls;
+
+    /// The set of optimal insert positions for moving calls in the opposite
+    /// sequence.
+    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+    RRInfo() :
+      KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
+
+    void clear();
+  };
+}
+
+void RRInfo::clear() {
+  KnownSafe = false;
+  IsTailCallRelease = false;
+  ReleaseMetadata = 0;
+  Calls.clear();
+  ReverseInsertPts.clear();
+}
+
+namespace {
+  /// \brief This class summarizes several per-pointer runtime properties which
+  /// are propogated through the flow graph.
+  class PtrState {
+    /// True if the reference count is known to be incremented.
+    bool KnownPositiveRefCount;
+
+    /// True of we've seen an opportunity for partial RR elimination, such as
+    /// pushing calls into a CFG triangle or into one side of a CFG diamond.
+    bool Partial;
+
+    /// The current position in the sequence.
+    Sequence Seq : 8;
+
+  public:
+    /// Unidirectional information about the current sequence.
+    ///
+    /// TODO: Encapsulate this better.
+    RRInfo RRI;
+
+    PtrState() : KnownPositiveRefCount(false), Partial(false),
+                 Seq(S_None) {}
+
+    void SetKnownPositiveRefCount() {
+      KnownPositiveRefCount = true;
+    }
+
+    void ClearKnownPositiveRefCount() {
+      KnownPositiveRefCount = false;
+    }
+
+    bool HasKnownPositiveRefCount() const {
+      return KnownPositiveRefCount;
+    }
+
+    void SetSeq(Sequence NewSeq) {
+      Seq = NewSeq;
+    }
+
+    Sequence GetSeq() const {
+      return Seq;
+    }
+
+    void ClearSequenceProgress() {
+      ResetSequenceProgress(S_None);
+    }
+
+    void ResetSequenceProgress(Sequence NewSeq) {
+      Seq = NewSeq;
+      Partial = false;
+      RRI.clear();
+    }
+
+    void Merge(const PtrState &Other, bool TopDown);
+  };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+  KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
+
+  // If we're not in a sequence (anymore), drop all associated state.
+  if (Seq == S_None) {
+    Partial = false;
+    RRI.clear();
+  } else if (Partial || Other.Partial) {
+    // If we're doing a merge on a path that's previously seen a partial
+    // merge, conservatively drop the sequence, to avoid doing partial
+    // RR elimination. If the branch predicates for the two merge differ,
+    // mixing them is unsafe.
+    ClearSequenceProgress();
+  } else {
+    // Conservatively merge the ReleaseMetadata information.
+    if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+      RRI.ReleaseMetadata = 0;
+
+    RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
+    RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
+                            Other.RRI.IsTailCallRelease;
+    RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+
+    // Merge the insert point sets. If there are any differences,
+    // that makes this a partial merge.
+    Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
+    for (SmallPtrSet<Instruction *, 2>::const_iterator
+         I = Other.RRI.ReverseInsertPts.begin(),
+         E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+      Partial |= RRI.ReverseInsertPts.insert(*I);
+  }
+}
+
+namespace {
+  /// \brief Per-BasicBlock state.
+  class BBState {
+    /// The number of unique control paths from the entry which can reach this
+    /// block.
+    unsigned TopDownPathCount;
+
+    /// The number of unique control paths to exits from this block.
+    unsigned BottomUpPathCount;
+
+    /// A type for PerPtrTopDown and PerPtrBottomUp.
+    typedef MapVector<const Value *, PtrState> MapTy;
+
+    /// The top-down traversal uses this to record information known about a
+    /// pointer at the bottom of each block.
+    MapTy PerPtrTopDown;
+
+    /// The bottom-up traversal uses this to record information known about a
+    /// pointer at the top of each block.
+    MapTy PerPtrBottomUp;
+
+    /// Effective predecessors of the current block ignoring ignorable edges and
+    /// ignored backedges.
+    SmallVector<BasicBlock *, 2> Preds;
+    /// Effective successors of the current block ignoring ignorable edges and
+    /// ignored backedges.
+    SmallVector<BasicBlock *, 2> Succs;
+
+  public:
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+    typedef MapTy::iterator ptr_iterator;
+    typedef MapTy::const_iterator ptr_const_iterator;
+
+    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+    ptr_const_iterator top_down_ptr_begin() const {
+      return PerPtrTopDown.begin();
+    }
+    ptr_const_iterator top_down_ptr_end() const {
+      return PerPtrTopDown.end();
+    }
+
+    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+    ptr_const_iterator bottom_up_ptr_begin() const {
+      return PerPtrBottomUp.begin();
+    }
+    ptr_const_iterator bottom_up_ptr_end() const {
+      return PerPtrBottomUp.end();
+    }
+
+    /// Mark this block as being an entry block, which has one path from the
+    /// entry by definition.
+    void SetAsEntry() { TopDownPathCount = 1; }
+
+    /// Mark this block as being an exit block, which has one path to an exit by
+    /// definition.
+    void SetAsExit()  { BottomUpPathCount = 1; }
+
+    PtrState &getPtrTopDownState(const Value *Arg) {
+      return PerPtrTopDown[Arg];
+    }
+
+    PtrState &getPtrBottomUpState(const Value *Arg) {
+      return PerPtrBottomUp[Arg];
+    }
+
+    void clearBottomUpPointers() {
+      PerPtrBottomUp.clear();
+    }
+
+    void clearTopDownPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void InitFromPred(const BBState &Other);
+    void InitFromSucc(const BBState &Other);
+    void MergePred(const BBState &Other);
+    void MergeSucc(const BBState &Other);
+
+    /// Return the number of possible unique paths from an entry to an exit
+    /// which pass through this block. This is only valid after both the
+    /// top-down and bottom-up traversals are complete.
+    unsigned GetAllPathCount() const {
+      assert(TopDownPathCount != 0);
+      assert(BottomUpPathCount != 0);
+      return TopDownPathCount * BottomUpPathCount;
+    }
+
+    // Specialized CFG utilities.
+    typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
+    edge_iterator pred_begin() { return Preds.begin(); }
+    edge_iterator pred_end() { return Preds.end(); }
+    edge_iterator succ_begin() { return Succs.begin(); }
+    edge_iterator succ_end() { return Succs.end(); }
+
+    void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
+    void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
+
+    bool isExit() const { return Succs.empty(); }
+  };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+  PerPtrTopDown = Other.PerPtrTopDown;
+  TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+  PerPtrBottomUp = Other.PerPtrBottomUp;
+  BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// The top-down traversal uses this to merge information about predecessors to
+/// form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+  // Other.TopDownPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  TopDownPathCount += Other.TopDownPathCount;
+
+  // Check for overflow. If we have overflow, fall back to conservative
+  // behavior.
+  if (TopDownPathCount < Other.TopDownPathCount) {
+    clearTopDownPointers();
+    return;
+  }
+
+  // For each entry in the other set, if our set has an entry with the same key,
+  // merge the entries. Otherwise, copy the entry and merge it with an empty
+  // entry.
+  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/true);
+  }
+
+  // For each entry in our set, if the other set doesn't have an entry with the
+  // same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = top_down_ptr_begin(),
+       ME = top_down_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// The bottom-up traversal uses this to merge information about successors to
+/// form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+  // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  BottomUpPathCount += Other.BottomUpPathCount;
+
+  // Check for overflow. If we have overflow, fall back to conservative
+  // behavior.
+  if (BottomUpPathCount < Other.BottomUpPathCount) {
+    clearBottomUpPointers();
+    return;
+  }
+
+  // For each entry in the other set, if our set has an entry with the
+  // same key, merge the entries. Otherwise, copy the entry and merge
+  // it with an empty entry.
+  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/false);
+  }
+
+  // For each entry in our set, if the other set doesn't have an entry
+  // with the same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = bottom_up_ptr_begin(),
+       ME = bottom_up_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+// Only enable ARC Annotations if we are building a debug version of
+// libObjCARCOpts.
+#ifndef NDEBUG
+#define ARC_ANNOTATIONS
+#endif
+
+// Define some macros along the lines of DEBUG and some helper functions to make
+// it cleaner to create annotations in the source code and to no-op when not
+// building in debug mode.
+#ifdef ARC_ANNOTATIONS
+
+#include "llvm/Support/CommandLine.h"
+
+/// Enable/disable ARC sequence annotations.
+static cl::opt<bool>
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+
+/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
+/// instruction so that we can track backwards when post processing via the llvm
+/// arc annotation processor tool. If the function is an
+static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
+                                         Value *Ptr) {
+  MDString *Hash = 0;
+
+  // If pointer is a result of an instruction and it does not have a source
+  // MDNode it, attach a new MDNode onto it. If pointer is a result of
+  // an instruction and does have a source MDNode attached to it, return a
+  // reference to said Node. Otherwise just return 0.
+  if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
+    MDNode *Node;
+    if (!(Node = Inst->getMetadata(NodeId))) {
+      // We do not have any node. Generate and attatch the hash MDString to the
+      // instruction.
+
+      // We just use an MDString to ensure that this metadata gets written out
+      // of line at the module level and to provide a very simple format
+      // encoding the information herein. Both of these makes it simpler to
+      // parse the annotations by a simple external program.
+      std::string Str;
+      raw_string_ostream os(Str);
+      os << "(" << Inst->getParent()->getParent()->getName() << ",%"
+         << Inst->getName() << ")";
+
+      Hash = MDString::get(Inst->getContext(), os.str());
+      Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
+    } else {
+      // We have a node. Grab its hash and return it.
+      assert(Node->getNumOperands() == 1 &&
+        "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
+      Hash = cast<MDString>(Node->getOperand(0));
+    }
+  } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+    std::string str;
+    raw_string_ostream os(str);
+    os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
+       << ")";
+    Hash = MDString::get(Arg->getContext(), os.str());
+  }
+
+  return Hash;
+}
+
+static std::string SequenceToString(Sequence A) {
+  std::string str;
+  raw_string_ostream os(str);
+  os << A;
+  return os.str();
+}
+
+/// Helper function to change a Sequence into a String object using our overload
+/// for raw_ostream so we only have printing code in one location.
+static MDString *SequenceToMDString(LLVMContext &Context,
+                                    Sequence A) {
+  return MDString::get(Context, SequenceToString(A));
+}
+
+/// A simple function to generate a MDNode which describes the change in state
+/// for Value *Ptr caused by Instruction *Inst.
+static void AppendMDNodeToInstForPtr(unsigned NodeId,
+                                     Instruction *Inst,
+                                     Value *Ptr,
+                                     MDString *PtrSourceMDNodeID,
+                                     Sequence OldSeq,
+                                     Sequence NewSeq) {
+  MDNode *Node = 0;
+  Value *tmp[3] = {PtrSourceMDNodeID,
+                   SequenceToMDString(Inst->getContext(),
+                                      OldSeq),
+                   SequenceToMDString(Inst->getContext(),
+                                      NewSeq)};
+  Node = MDNode::get(Inst->getContext(),
+                     ArrayRef<Value*>(tmp, 3));
+
+  Inst->setMetadata(NodeId, Node);
+}
+
+/// Add to the beginning of the basic block llvm.ptr.annotations which show the
+/// state of a pointer at the entrance to a basic block.
+static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
+                                            Value *Ptr, Sequence Seq) {
+  Module *M = BB->getParent()->getParent();
+  LLVMContext &C = M->getContext();
+  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  Type *I8XX = PointerType::getUnqual(I8X);
+  Type *Params[] = {I8XX, I8XX};
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+                                        ArrayRef<Type*>(Params, 2),
+                                        /*isVarArg=*/false);
+  Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+  IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
+
+  Value *PtrName;
+  StringRef Tmp = Ptr->getName();
+  if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+                                                         Tmp + "_STR");
+    PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                                 cast<Constant>(ActualPtrName), Tmp);
+  }
+
+  Value *S;
+  std::string SeqStr = SequenceToString(Seq);
+  if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+                                                         SeqStr + "_STR");
+    S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                           cast<Constant>(ActualPtrName), SeqStr);
+  }
+
+  Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Add to the end of the basic block llvm.ptr.annotations which show the state
+/// of the pointer at the bottom of the basic block.
+static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
+                                              Value *Ptr, Sequence Seq) {
+  Module *M = BB->getParent()->getParent();
+  LLVMContext &C = M->getContext();
+  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  Type *I8XX = PointerType::getUnqual(I8X);
+  Type *Params[] = {I8XX, I8XX};
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C),
+                                        ArrayRef<Type*>(Params, 2),
+                                        /*isVarArg=*/false);
+  Constant *Callee = M->getOrInsertFunction(Name, FTy);
+
+  IRBuilder<> Builder(BB, llvm::prior(BB->end()));
+
+  Value *PtrName;
+  StringRef Tmp = Ptr->getName();
+  if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
+                                                         Tmp + "_STR");
+    PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                                 cast<Constant>(ActualPtrName), Tmp);
+  }
+
+  Value *S;
+  std::string SeqStr = SequenceToString(Seq);
+  if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+    Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
+                                                         SeqStr + "_STR");
+    S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
+                           cast<Constant>(ActualPtrName), SeqStr);
+  }
+  Builder.CreateCall2(Callee, PtrName, S);
+}
+
+/// Adds a source annotation to pointer and a state change annotation to Inst
+/// referencing the source annotation and the old/new state of pointer.
+static void GenerateARCAnnotation(unsigned InstMDId,
+                                  unsigned PtrMDId,
+                                  Instruction *Inst,
+                                  Value *Ptr,
+                                  Sequence OldSeq,
+                                  Sequence NewSeq) {
+  if (EnableARCAnnotations) {
+    // First generate the source annotation on our pointer. This will return an
+    // MDString* if Ptr actually comes from an instruction implying we can put
+    // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
+    // then we know that our pointer is from an Argument so we put a reference
+    // to the argument number.
+    //
+    // The point of this is to make it easy for the
+    // llvm-arc-annotation-processor tool to cross reference where the source
+    // pointer is in the LLVM IR since the LLVM IR parser does not submit such
+    // information via debug info for backends to use (since why would anyone
+    // need such a thing from LLVM IR besides in non standard cases
+    // [i.e. this]).
+    MDString *SourcePtrMDNode =
+      AppendMDNodeToSourcePtr(PtrMDId, Ptr);
+    AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
+                             NewSeq);
+  }
+}
+
+// The actual interface for accessing the above functionality is defined via
+// some simple macros which are defined below. We do this so that the user does
+// not need to pass in what metadata id is needed resulting in cleaner code and
+// additionally since it provides an easy way to conditionally no-op all
+// annotation support in a non-debug build.
+
+/// Use this macro to annotate a sequence state change when processing
+/// instructions bottom up,
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)                          \
+  GenerateARCAnnotation(ARCAnnotationBottomUpMDKind,                    \
+                        ARCAnnotationProvenanceSourceMDKind, (inst),    \
+                        const_cast<Value*>(ptr), (old), (new))
+/// Use this macro to annotate a sequence state change when processing
+/// instructions top down.
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)                           \
+  GenerateARCAnnotation(ARCAnnotationTopDownMDKind,                     \
+                        ARCAnnotationProvenanceSourceMDKind, (inst),    \
+                        const_cast<Value*>(ptr), (old), (new))
+
+#define ANNOTATE_BB(_states, _bb, _name, _type, _direction)                   \
+  do {                                                                        \
+  if (EnableARCAnnotations) {                                                 \
+    for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(),   \
+          E = (_states)._direction##_ptr_end(); I != E; ++I) {                \
+      Value *Ptr = const_cast<Value*>(I->first);                              \
+      Sequence Seq = I->second.GetSeq();                                      \
+      GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);           \
+    }                                                                         \
+  }                                                                           \
+} while (0)
+
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
+                Entrance, bottom_up)
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+                Terminator, bottom_up)
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+                Entrance, top_down)
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+                Terminator, top_down)
+
+#else // !ARC_ANNOTATION
+// If annotations are off, noop.
+#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
+#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
+#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
+#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
+#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
+#endif // !ARC_ANNOTATION
+
+namespace {
+  /// \brief The main ARC optimization pass.
+  class ObjCARCOpt : public FunctionPass {
+    bool Changed;
+    ProvenanceAnalysis PA;
+
+    /// A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// Declarations for ObjC runtime functions, for use in creating calls to
+    /// them. These are initialized lazily to avoid cluttering up the Module
+    /// with unused declarations.
+
+    /// Declaration for ObjC runtime function
+    /// objc_retainAutoreleasedReturnValue.
+    Constant *RetainRVCallee;
+    /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
+    Constant *AutoreleaseRVCallee;
+    /// Declaration for ObjC runtime function objc_release.
+    Constant *ReleaseCallee;
+    /// Declaration for ObjC runtime function objc_retain.
+    Constant *RetainCallee;
+    /// Declaration for ObjC runtime function objc_retainBlock.
+    Constant *RetainBlockCallee;
+    /// Declaration for ObjC runtime function objc_autorelease.
+    Constant *AutoreleaseCallee;
+
+    /// Flags which determine whether each of the interesting runtine functions
+    /// is in fact used in the current function.
+    unsigned UsedInThisFunction;
+
+    /// The Metadata Kind for clang.imprecise_release metadata.
+    unsigned ImpreciseReleaseMDKind;
+
+    /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+    unsigned CopyOnEscapeMDKind;
+
+    /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+    unsigned NoObjCARCExceptionsMDKind;
+
+#ifdef ARC_ANNOTATIONS
+    /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
+    unsigned ARCAnnotationBottomUpMDKind;
+    /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
+    unsigned ARCAnnotationTopDownMDKind;
+    /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
+    unsigned ARCAnnotationProvenanceSourceMDKind;
+#endif // ARC_ANNOATIONS
+
+    Constant *getRetainRVCallee(Module *M);
+    Constant *getAutoreleaseRVCallee(Module *M);
+    Constant *getReleaseCallee(Module *M);
+    Constant *getRetainCallee(Module *M);
+    Constant *getRetainBlockCallee(Module *M);
+    Constant *getAutoreleaseCallee(Module *M);
+
+    bool IsRetainBlockOptimizable(const Instruction *Inst);
+
+    void OptimizeRetainCall(Function &F, Instruction *Retain);
+    bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+    void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+                                   InstructionClass &Class);
+    bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
+                                 InstructionClass &Class);
+    void OptimizeIndividualCalls(Function &F);
+
+    void CheckForCFGHazards(const BasicBlock *BB,
+                            DenseMap<const BasicBlock *, BBState> &BBStates,
+                            BBState &MyStates) const;
+    bool VisitInstructionBottomUp(Instruction *Inst,
+                                  BasicBlock *BB,
+                                  MapVector<Value *, RRInfo> &Retains,
+                                  BBState &MyStates);
+    bool VisitBottomUp(BasicBlock *BB,
+                       DenseMap<const BasicBlock *, BBState> &BBStates,
+                       MapVector<Value *, RRInfo> &Retains);
+    bool VisitInstructionTopDown(Instruction *Inst,
+                                 DenseMap<Value *, RRInfo> &Releases,
+                                 BBState &MyStates);
+    bool VisitTopDown(BasicBlock *BB,
+                      DenseMap<const BasicBlock *, BBState> &BBStates,
+                      DenseMap<Value *, RRInfo> &Releases);
+    bool Visit(Function &F,
+               DenseMap<const BasicBlock *, BBState> &BBStates,
+               MapVector<Value *, RRInfo> &Retains,
+               DenseMap<Value *, RRInfo> &Releases);
+
+    void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+                   MapVector<Value *, RRInfo> &Retains,
+                   DenseMap<Value *, RRInfo> &Releases,
+                   SmallVectorImpl<Instruction *> &DeadInsts,
+                   Module *M);
+
+    bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates,
+                               MapVector<Value *, RRInfo> &Retains,
+                               DenseMap<Value *, RRInfo> &Releases,
+                               Module *M,
+                               SmallVector<Instruction *, 4> &NewRetains,
+                               SmallVector<Instruction *, 4> &NewReleases,
+                               SmallVector<Instruction *, 8> &DeadInsts,
+                               RRInfo &RetainsToMove,
+                               RRInfo &ReleasesToMove,
+                               Value *Arg,
+                               bool KnownSafe,
+                               bool &AnyPairsCompletelyEliminated);
+
+    bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+                              MapVector<Value *, RRInfo> &Retains,
+                              DenseMap<Value *, RRInfo> &Releases,
+                              Module *M);
+
+    void OptimizeWeakCalls(Function &F);
+
+    bool OptimizeSequences(Function &F);
+
+    void OptimizeReturns(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+    virtual void releaseMemory();
+
+  public:
+    static char ID;
+    ObjCARCOpt() : FunctionPass(ID) {
+      initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+                      "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+                    "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+  return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<ObjCARCAliasAnalysis>();
+  AU.addRequired<AliasAnalysis>();
+  // ARC optimization doesn't currently split critical edges.
+  AU.setPreservesCFG();
+}
+
+bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
+  // Without the magic metadata tag, we have to assume this might be an
+  // objc_retainBlock call inserted to convert a block pointer to an id,
+  // in which case it really is needed.
+  if (!Inst->getMetadata(CopyOnEscapeMDKind))
+    return false;
+
+  // If the pointer "escapes" (not including being used in a call),
+  // the copy may be needed.
+  if (DoesRetainableObjPtrEscape(Inst))
+    return false;
+
+  // Otherwise, it's not needed.
+  return true;
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+  if (!RetainRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    RetainRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+                             Attribute);
+  }
+  return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+  if (!AutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    AutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+                             Attribute);
+  }
+  return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+  if (!ReleaseCallee) {
+    LLVMContext &C = M->getContext();
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    ReleaseCallee =
+      M->getOrInsertFunction(
+        "objc_release",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attribute);
+  }
+  return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+  if (!RetainCallee) {
+    LLVMContext &C = M->getContext();
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    RetainCallee =
+      M->getOrInsertFunction(
+        "objc_retain",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attribute);
+  }
+  return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
+  if (!RetainBlockCallee) {
+    LLVMContext &C = M->getContext();
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    // objc_retainBlock is not nounwind because it calls user copy constructors
+    // which could theoretically throw.
+    RetainBlockCallee =
+      M->getOrInsertFunction(
+        "objc_retainBlock",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        AttributeSet());
+  }
+  return RetainBlockCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+  if (!AutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    AutoreleaseCallee =
+      M->getOrInsertFunction(
+        "objc_autorelease",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attribute);
+  }
+  return AutoreleaseCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+  ImmutableCallSite CS(GetObjCArg(Retain));
+  const Instruction *Call = CS.getInstruction();
+  if (!Call) return;
+  if (Call->getParent() != Retain->getParent()) return;
+
+  // Check that the call is next to the retain.
+  BasicBlock::const_iterator I = Call;
+  ++I;
+  while (IsNoopInstruction(I)) ++I;
+  if (&*I != Retain)
+    return;
+
+  // Turn it to an objc_retainAutoreleasedReturnValue..
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
+                  "objc_retain => objc_retainAutoreleasedReturnValue"
+                  " since the operand is a return value.\n"
+                  "                                Old: "
+               << *Retain << "\n");
+
+  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+  DEBUG(dbgs() << "                                New: "
+               << *Retain << "\n");
+}
+
+/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
+/// not a return value.  Or, if it can be paired with an
+/// objc_autoreleaseReturnValue, delete the pair and return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+  // Check for the argument being from an immediately preceding call or invoke.
+  const Value *Arg = GetObjCArg(RetainRV);
+  ImmutableCallSite CS(Arg);
+  if (const Instruction *Call = CS.getInstruction()) {
+    if (Call->getParent() == RetainRV->getParent()) {
+      BasicBlock::const_iterator I = Call;
+      ++I;
+      while (IsNoopInstruction(I)) ++I;
+      if (&*I == RetainRV)
+        return false;
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      BasicBlock *RetainRVParent = RetainRV->getParent();
+      if (II->getNormalDest() == RetainRVParent) {
+        BasicBlock::const_iterator I = RetainRVParent->begin();
+        while (IsNoopInstruction(I)) ++I;
+        if (&*I == RetainRV)
+          return false;
+      }
+    }
+  }
+
+  // Check for being preceded by an objc_autoreleaseReturnValue on the same
+  // pointer. In this case, we can delete the pair.
+  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+  if (I != Begin) {
+    do --I; while (I != Begin && IsNoopInstruction(I));
+    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+        GetObjCArg(I) == Arg) {
+      Changed = true;
+      ++NumPeeps;
+
+      DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
+                   << "                                  Erasing " << *RetainRV
+                   << "\n");
+
+      EraseInstruction(I);
+      EraseInstruction(RetainRV);
+      return true;
+    }
+  }
+
+  // Turn it to a plain objc_retain.
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
+                  "objc_retainAutoreleasedReturnValue => "
+                  "objc_retain since the operand is not a return value.\n"
+                  "                                  Old: "
+               << *RetainRV << "\n");
+
+  cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+
+  DEBUG(dbgs() << "                                  New: "
+               << *RetainRV << "\n");
+
+  return false;
+}
+
+/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
+/// used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+                                      InstructionClass &Class) {
+  // Check for a return of the pointer value.
+  const Value *Ptr = GetObjCArg(AutoreleaseRV);
+  SmallVector<const Value *, 2> Users;
+  Users.push_back(Ptr);
+  do {
+    Ptr = Users.pop_back_val();
+    for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+         UI != UE; ++UI) {
+      const User *I = *UI;
+      if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+        return;
+      if (isa<BitCastInst>(I))
+        Users.push_back(I);
+    }
+  } while (!Users.empty());
+
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
+                  "objc_autoreleaseReturnValue => "
+                  "objc_autorelease since its operand is not used as a return "
+                  "value.\n"
+                  "                                       Old: "
+               << *AutoreleaseRV << "\n");
+
+  CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
+  AutoreleaseRVCI->
+    setCalledFunction(getAutoreleaseCallee(F.getParent()));
+  AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
+  Class = IC_Autorelease;
+
+  DEBUG(dbgs() << "                                       New: "
+               << *AutoreleaseRV << "\n");
+
+}
+
+// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
+// calls.
+//
+// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
+// does not escape (following the rules of block escaping), strength reduce the
+// objc_retainBlock to an objc_retain.
+//
+// TODO: If an objc_retainBlock call is dominated period by a previous
+// objc_retainBlock call, strength reduce the objc_retainBlock to an
+// objc_retain.
+bool
+ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
+                                    InstructionClass &Class) {
+  assert(GetBasicInstructionClass(Inst) == Class);
+  assert(IC_RetainBlock == Class);
+
+  // If we can not optimize Inst, return false.
+  if (!IsRetainBlockOptimizable(Inst))
+    return false;
+
+  CallInst *RetainBlock = cast<CallInst>(Inst);
+  RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
+  // Remove copy_on_escape metadata.
+  RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
+  Class = IC_Retain;
+
+  return true;
+}
+
+/// Visit each call, one at a time, and make simplifications without doing any
+/// additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  // Reset all the flags in preparation for recomputing them.
+  UsedInThisFunction = 0;
+
+  // Visit all objc_* calls in F.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+
+    DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
+          << Class << "; " << *Inst << "\n");
+
+    switch (Class) {
+    default: break;
+
+    // Delete no-op casts. These function calls have special semantics, but
+    // the semantics are entirely implemented via lowering in the front-end,
+    // so by the time they reach the optimizer, they are just no-op calls
+    // which return their argument.
+    //
+    // There are gray areas here, as the ability to cast reference-counted
+    // pointers to raw void* and back allows code to break ARC assumptions,
+    // however these are currently considered to be unimportant.
+    case IC_NoopCast:
+      Changed = true;
+      ++NumNoops;
+      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
+                   " " << *Inst << "\n");
+      EraseInstruction(Inst);
+      continue;
+
+    // If the pointer-to-weak-pointer is null, it's undefined behavior.
+    case IC_StoreWeak:
+    case IC_LoadWeak:
+    case IC_LoadWeakRetained:
+    case IC_InitWeak:
+    case IC_DestroyWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (IsNullOrUndef(CI->getArgOperand(0))) {
+        Changed = true;
+        Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        llvm::Value *NewValue = UndefValue::get(CI->getType());
+        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
+                        "pointer-to-weak-pointer is undefined behavior.\n"
+                        "                                     Old = " << *CI <<
+                        "\n                                     New = " <<
+                        *NewValue << "\n");
+        CI->replaceAllUsesWith(NewValue);
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_CopyWeak:
+    case IC_MoveWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (IsNullOrUndef(CI->getArgOperand(0)) ||
+          IsNullOrUndef(CI->getArgOperand(1))) {
+        Changed = true;
+        Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+
+        llvm::Value *NewValue = UndefValue::get(CI->getType());
+        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
+                        "pointer-to-weak-pointer is undefined behavior.\n"
+                        "                                     Old = " << *CI <<
+                        "\n                                     New = " <<
+                        *NewValue << "\n");
+
+        CI->replaceAllUsesWith(NewValue);
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_RetainBlock:
+      // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+      // onto the objc_retain peephole optimizations. Otherwise break.
+      if (!OptimizeRetainBlockCall(F, Inst, Class))
+        break;
+      // FALLTHROUGH
+    case IC_Retain:
+      OptimizeRetainCall(F, Inst);
+      break;
+    case IC_RetainRV:
+      if (OptimizeRetainRVCall(F, Inst))
+        continue;
+      break;
+    case IC_AutoreleaseRV:
+      OptimizeAutoreleaseRVCall(F, Inst, Class);
+      break;
+    }
+
+    // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+    if (IsAutorelease(Class) && Inst->use_empty()) {
+      CallInst *Call = cast<CallInst>(Inst);
+      const Value *Arg = Call->getArgOperand(0);
+      Arg = FindSingleUseIdentifiedObject(Arg);
+      if (Arg) {
+        Changed = true;
+        ++NumAutoreleases;
+
+        // Create the declaration lazily.
+        LLVMContext &C = Inst->getContext();
+        CallInst *NewCall =
+          CallInst::Create(getReleaseCallee(F.getParent()),
+                           Call->getArgOperand(0), "", Call);
+        NewCall->setMetadata(ImpreciseReleaseMDKind,
+                             MDNode::get(C, ArrayRef<Value *>()));
+
+        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
+                        "objc_autorelease(x) with objc_release(x) since x is "
+                        "otherwise unused.\n"
+                        "                                     Old: " << *Call <<
+                        "\n                                     New: " <<
+                        *NewCall << "\n");
+
+        EraseInstruction(Call);
+        Inst = NewCall;
+        Class = IC_Release;
+      }
+    }
+
+    // For functions which can never be passed stack arguments, add
+    // a tail keyword.
+    if (IsAlwaysTail(Class)) {
+      Changed = true;
+      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
+            " to function since it can never be passed stack args: " << *Inst <<
+            "\n");
+      cast<CallInst>(Inst)->setTailCall();
+    }
+
+    // Ensure that functions that can never have a "tail" keyword due to the
+    // semantics of ARC truly do not do so.
+    if (IsNeverTail(Class)) {
+      Changed = true;
+      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
+            "keyword from function: " << *Inst <<
+            "\n");
+      cast<CallInst>(Inst)->setTailCall(false);
+    }
+
+    // Set nounwind as needed.
+    if (IsNoThrow(Class)) {
+      Changed = true;
+      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
+            " class. Setting nounwind on: " << *Inst << "\n");
+      cast<CallInst>(Inst)->setDoesNotThrow();
+    }
+
+    if (!IsNoopOnNull(Class)) {
+      UsedInThisFunction |= 1 << Class;
+      continue;
+    }
+
+    const Value *Arg = GetObjCArg(Inst);
+
+    // ARC calls with null are no-ops. Delete them.
+    if (IsNullOrUndef(Arg)) {
+      Changed = true;
+      ++NumNoops;
+      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
+            " null are no-ops. Erasing: " << *Inst << "\n");
+      EraseInstruction(Inst);
+      continue;
+    }
+
+    // Keep track of which of retain, release, autorelease, and retain_block
+    // are actually present in this function.
+    UsedInThisFunction |= 1 << Class;
+
+    // If Arg is a PHI, and one or more incoming values to the
+    // PHI are null, and the call is control-equivalent to the PHI, and there
+    // are no relevant side effects between the PHI and the call, the call
+    // could be pushed up to just those paths with non-null incoming values.
+    // For now, don't bother splitting critical edges for this.
+    SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+    Worklist.push_back(std::make_pair(Inst, Arg));
+    do {
+      std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+      Inst = Pair.first;
+      Arg = Pair.second;
+
+      const PHINode *PN = dyn_cast<PHINode>(Arg);
+      if (!PN) continue;
+
+      // Determine if the PHI has any null operands, or any incoming
+      // critical edges.
+      bool HasNull = false;
+      bool HasCriticalEdges = false;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *Incoming =
+          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+        if (IsNullOrUndef(Incoming))
+          HasNull = true;
+        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+                   .getNumSuccessors() != 1) {
+          HasCriticalEdges = true;
+          break;
+        }
+      }
+      // If we have null operands and no critical edges, optimize.
+      if (!HasCriticalEdges && HasNull) {
+        SmallPtrSet<Instruction *, 4> DependingInstructions;
+        SmallPtrSet<const BasicBlock *, 4> Visited;
+
+        // Check that there is nothing that cares about the reference
+        // count between the call and the phi.
+        switch (Class) {
+        case IC_Retain:
+        case IC_RetainBlock:
+          // These can always be moved up.
+          break;
+        case IC_Release:
+          // These can't be moved across things that care about the retain
+          // count.
+          FindDependencies(NeedsPositiveRetainCount, Arg,
+                           Inst->getParent(), Inst,
+                           DependingInstructions, Visited, PA);
+          break;
+        case IC_Autorelease:
+          // These can't be moved across autorelease pool scope boundaries.
+          FindDependencies(AutoreleasePoolBoundary, Arg,
+                           Inst->getParent(), Inst,
+                           DependingInstructions, Visited, PA);
+          break;
+        case IC_RetainRV:
+        case IC_AutoreleaseRV:
+          // Don't move these; the RV optimization depends on the autoreleaseRV
+          // being tail called, and the retainRV being immediately after a call
+          // (which might still happen if we get lucky with codegen layout, but
+          // it's not worth taking the chance).
+          continue;
+        default:
+          llvm_unreachable("Invalid dependence flavor");
+        }
+
+        if (DependingInstructions.size() == 1 &&
+            *DependingInstructions.begin() == PN) {
+          Changed = true;
+          ++NumPartialNoops;
+          // Clone the call into each predecessor that has a non-null value.
+          CallInst *CInst = cast<CallInst>(Inst);
+          Type *ParamTy = CInst->getArgOperand(0)->getType();
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+            Value *Incoming =
+              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+            if (!IsNullOrUndef(Incoming)) {
+              CallInst *Clone = cast<CallInst>(CInst->clone());
+              Value *Op = PN->getIncomingValue(i);
+              Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+              if (Op->getType() != ParamTy)
+                Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+              Clone->setArgOperand(0, Op);
+              Clone->insertBefore(InsertPos);
+
+              DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+                           << *CInst << "\n"
+                           "                                     And inserting "
+                           "clone at " << *InsertPos << "\n");
+              Worklist.push_back(std::make_pair(Clone, Incoming));
+            }
+          }
+          // Erase the original call.
+          DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
+          EraseInstruction(CInst);
+          continue;
+        }
+      }
+    } while (!Worklist.empty());
+  }
+  DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// Check for critical edges, loop boundaries, irreducible control flow, or
+/// other CFG structures where moving code across the edge would result in it
+/// being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+                               DenseMap<const BasicBlock *, BBState> &BBStates,
+                               BBState &MyStates) const {
+  // If any top-down local-use or possible-dec has a succ which is earlier in
+  // the sequence, forget it.
+  for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
+       E = MyStates.top_down_ptr_end(); I != E; ++I)
+    switch (I->second.GetSeq()) {
+    default: break;
+    case S_Use: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      PtrState &S = I->second;
+      succ_const_iterator SI(TI), SE(TI, false);
+
+      for (; SI != SE; ++SI) {
+        Sequence SuccSSeq = S_None;
+        bool SuccSRRIKnownSafe = false;
+        // If VisitBottomUp has pointer information for this successor, take
+        // what we know about it.
+        DenseMap<const BasicBlock *, BBState>::iterator BBI =
+          BBStates.find(*SI);
+        assert(BBI != BBStates.end());
+        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+        SuccSSeq = SuccS.GetSeq();
+        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+        switch (SuccSSeq) {
+        case S_None:
+        case S_CanRelease: {
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+            S.ClearSequenceProgress();
+            break;
+          }
+          continue;
+        }
+        case S_Use:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+            AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        S.ClearSequenceProgress();
+      break;
+    }
+    case S_CanRelease: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      PtrState &S = I->second;
+      succ_const_iterator SI(TI), SE(TI, false);
+
+      for (; SI != SE; ++SI) {
+        Sequence SuccSSeq = S_None;
+        bool SuccSRRIKnownSafe = false;
+        // If VisitBottomUp has pointer information for this successor, take
+        // what we know about it.
+        DenseMap<const BasicBlock *, BBState>::iterator BBI =
+          BBStates.find(*SI);
+        assert(BBI != BBStates.end());
+        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+        SuccSSeq = SuccS.GetSeq();
+        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+        switch (SuccSSeq) {
+        case S_None: {
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+            S.ClearSequenceProgress();
+            break;
+          }
+          continue;
+        }
+        case S_CanRelease:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+        case S_Use:
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+            AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        S.ClearSequenceProgress();
+      break;
+    }
+    }
+}
+
+bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+                                     BasicBlock *BB,
+                                     MapVector<Value *, RRInfo> &Retains,
+                                     BBState &MyStates) {
+  bool NestingDetected = false;
+  InstructionClass Class = GetInstructionClass(Inst);
+  const Value *Arg = 0;
+
+  switch (Class) {
+  case IC_Release: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+    // If we see two releases in a row on the same pointer. If so, make
+    // a note, and we'll cicle back to revisit it after we've
+    // hopefully eliminated the second release, which may allow us to
+    // eliminate the first release too.
+    // Theoretically we could implement removal of nested retain+release
+    // pairs by making PtrState hold a stack of states, but this is
+    // simple and avoids adding overhead for the non-nested case.
+    if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
+      DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
+                      "releases (i.e. a release pair)\n");
+      NestingDetected = true;
+    }
+
+    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+    Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+    ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
+    S.ResetSequenceProgress(NewSeq);
+    S.RRI.ReleaseMetadata = ReleaseMetadata;
+    S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
+    S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+    S.RRI.Calls.insert(Inst);
+    S.SetKnownPositiveRefCount();
+    break;
+  }
+  case IC_RetainBlock:
+    // In OptimizeIndividualCalls, we have strength reduced all optimizable
+    // objc_retainBlocks to objc_retains. Thus at this point any
+    // objc_retainBlocks that we see are not optimizable.
+    break;
+  case IC_Retain:
+  case IC_RetainRV: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrBottomUpState(Arg);
+    S.SetKnownPositiveRefCount();
+
+    Sequence OldSeq = S.GetSeq();
+    switch (OldSeq) {
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+    case S_Use:
+      S.RRI.ReverseInsertPts.clear();
+      // FALL THROUGH
+    case S_CanRelease:
+      // Don't do retain+release tracking for IC_RetainRV, because it's
+      // better to let it remain as the first instruction after a call.
+      if (Class != IC_RetainRV)
+        Retains[Inst] = S.RRI;
+      S.ClearSequenceProgress();
+      break;
+    case S_None:
+      break;
+    case S_Retain:
+      llvm_unreachable("bottom-up pointer in retain state!");
+    }
+    ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
+    return NestingDetected;
+  }
+  case IC_AutoreleasepoolPop:
+    // Conservatively, clear MyStates for all known pointers.
+    MyStates.clearBottomUpPointers();
+    return NestingDetected;
+  case IC_AutoreleasepoolPush:
+  case IC_None:
+    // These are irrelevant.
+    return NestingDetected;
+  default:
+    break;
+  }
+
+  // Consider any other possible effects of this instruction on each
+  // pointer being tracked.
+  for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+       ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+    const Value *Ptr = MI->first;
+    if (Ptr == Arg)
+      continue; // Handled above.
+    PtrState &S = MI->second;
+    Sequence Seq = S.GetSeq();
+
+    // Check for possible releases.
+    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      S.ClearKnownPositiveRefCount();
+      switch (Seq) {
+      case S_Use:
+        S.SetSeq(S_CanRelease);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
+        continue;
+      case S_CanRelease:
+      case S_Release:
+      case S_MovableRelease:
+      case S_Stop:
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+    }
+
+    // Check for possible direct uses.
+    switch (Seq) {
+    case S_Release:
+    case S_MovableRelease:
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        assert(S.RRI.ReverseInsertPts.empty());
+        // If this is an invoke instruction, we're scanning it as part of
+        // one of its successor blocks, since we can't insert code after it
+        // in its own block, and we don't want to split critical edges.
+        if (isa<InvokeInst>(Inst))
+          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+        else
+          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+        S.SetSeq(S_Use);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+      } else if (Seq == S_Release && IsUser(Class)) {
+        // Non-movable releases depend on any possible objc pointer use.
+        S.SetSeq(S_Stop);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
+        assert(S.RRI.ReverseInsertPts.empty());
+        // As above; handle invoke specially.
+        if (isa<InvokeInst>(Inst))
+          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+        else
+          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+      }
+      break;
+    case S_Stop:
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        S.SetSeq(S_Use);
+        ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
+      }
+      break;
+    case S_CanRelease:
+    case S_Use:
+    case S_None:
+      break;
+    case S_Retain:
+      llvm_unreachable("bottom-up pointer in retain state!");
+    }
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+                          DenseMap<const BasicBlock *, BBState> &BBStates,
+                          MapVector<Value *, RRInfo> &Retains) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each successor to compute the initial state
+  // for the current block.
+  BBState::edge_iterator SI(MyStates.succ_begin()),
+                         SE(MyStates.succ_end());
+  if (SI != SE) {
+    const BasicBlock *Succ = *SI;
+    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+    assert(I != BBStates.end());
+    MyStates.InitFromSucc(I->second);
+    ++SI;
+    for (; SI != SE; ++SI) {
+      Succ = *SI;
+      I = BBStates.find(Succ);
+      assert(I != BBStates.end());
+      MyStates.MergeSucc(I->second);
+    }
+  }
+
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // bottom of the basic block.
+  ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+
+  // Visit all the instructions, bottom-up.
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+    Instruction *Inst = llvm::prior(I);
+
+    // Invoke instructions are visited as part of their successors (below).
+    if (isa<InvokeInst>(Inst))
+      continue;
+
+    DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+
+    NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+  }
+
+  // If there's a predecessor with an invoke, visit the invoke as if it were
+  // part of this block, since we can't insert code after an invoke in its own
+  // block, and we don't want to split critical edges.
+  for (BBState::edge_iterator PI(MyStates.pred_begin()),
+       PE(MyStates.pred_end()); PI != PE; ++PI) {
+    BasicBlock *Pred = *PI;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
+      NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
+  }
+
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // top of the basic block.
+  ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+                                    DenseMap<Value *, RRInfo> &Releases,
+                                    BBState &MyStates) {
+  bool NestingDetected = false;
+  InstructionClass Class = GetInstructionClass(Inst);
+  const Value *Arg = 0;
+
+  switch (Class) {
+  case IC_RetainBlock:
+    // In OptimizeIndividualCalls, we have strength reduced all optimizable
+    // objc_retainBlocks to objc_retains. Thus at this point any
+    // objc_retainBlocks that we see are not optimizable.
+    break;
+  case IC_Retain:
+  case IC_RetainRV: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+    // Don't do retain+release tracking for IC_RetainRV, because it's
+    // better to let it remain as the first instruction after a call.
+    if (Class != IC_RetainRV) {
+      // If we see two retains in a row on the same pointer. If so, make
+      // a note, and we'll cicle back to revisit it after we've
+      // hopefully eliminated the second retain, which may allow us to
+      // eliminate the first retain too.
+      // Theoretically we could implement removal of nested retain+release
+      // pairs by making PtrState hold a stack of states, but this is
+      // simple and avoids adding overhead for the non-nested case.
+      if (S.GetSeq() == S_Retain)
+        NestingDetected = true;
+
+      ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
+      S.ResetSequenceProgress(S_Retain);
+      S.RRI.KnownSafe = S.HasKnownPositiveRefCount();
+      S.RRI.Calls.insert(Inst);
+    }
+
+    S.SetKnownPositiveRefCount();
+
+    // A retain can be a potential use; procede to the generic checking
+    // code below.
+    break;
+  }
+  case IC_Release: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrTopDownState(Arg);
+    S.ClearKnownPositiveRefCount();
+
+    switch (S.GetSeq()) {
+    case S_Retain:
+    case S_CanRelease:
+      S.RRI.ReverseInsertPts.clear();
+      // FALL THROUGH
+    case S_Use:
+      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      Releases[Inst] = S.RRI;
+      ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
+      S.ClearSequenceProgress();
+      break;
+    case S_None:
+      break;
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+      llvm_unreachable("top-down pointer in release state!");
+    }
+    break;
+  }
+  case IC_AutoreleasepoolPop:
+    // Conservatively, clear MyStates for all known pointers.
+    MyStates.clearTopDownPointers();
+    return NestingDetected;
+  case IC_AutoreleasepoolPush:
+  case IC_None:
+    // These are irrelevant.
+    return NestingDetected;
+  default:
+    break;
+  }
+
+  // Consider any other possible effects of this instruction on each
+  // pointer being tracked.
+  for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+       ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+    const Value *Ptr = MI->first;
+    if (Ptr == Arg)
+      continue; // Handled above.
+    PtrState &S = MI->second;
+    Sequence Seq = S.GetSeq();
+
+    // Check for possible releases.
+    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      S.ClearKnownPositiveRefCount();
+      switch (Seq) {
+      case S_Retain:
+        S.SetSeq(S_CanRelease);
+        ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
+        assert(S.RRI.ReverseInsertPts.empty());
+        S.RRI.ReverseInsertPts.insert(Inst);
+
+        // One call can't cause a transition from S_Retain to S_CanRelease
+        // and S_CanRelease to S_Use. If we've made the first transition,
+        // we're done.
+        continue;
+      case S_Use:
+      case S_CanRelease:
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+    }
+
+    // Check for possible direct uses.
+    switch (Seq) {
+    case S_CanRelease:
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        S.SetSeq(S_Use);
+        ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
+      }
+      break;
+    case S_Retain:
+    case S_Use:
+    case S_None:
+      break;
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+      llvm_unreachable("top-down pointer in release state!");
+    }
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+                         DenseMap<const BasicBlock *, BBState> &BBStates,
+                         DenseMap<Value *, RRInfo> &Releases) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each predecessor to compute the initial state
+  // for the current block.
+  BBState::edge_iterator PI(MyStates.pred_begin()),
+                         PE(MyStates.pred_end());
+  if (PI != PE) {
+    const BasicBlock *Pred = *PI;
+    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+    assert(I != BBStates.end());
+    MyStates.InitFromPred(I->second);
+    ++PI;
+    for (; PI != PE; ++PI) {
+      Pred = *PI;
+      I = BBStates.find(Pred);
+      assert(I != BBStates.end());
+      MyStates.MergePred(I->second);
+    }
+  }
+
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // top of the basic block.
+  ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+
+  // Visit all the instructions, top-down.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    Instruction *Inst = I;
+
+    DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+
+    NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+  }
+
+  // If ARC Annotations are enabled, output the current state of pointers at the
+  // bottom of the basic block.
+  ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
+
+  CheckForCFGHazards(BB, BBStates, MyStates);
+  return NestingDetected;
+}
+
+static void
+ComputePostOrders(Function &F,
+                  SmallVectorImpl<BasicBlock *> &PostOrder,
+                  SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
+                  unsigned NoObjCARCExceptionsMDKind,
+                  DenseMap<const BasicBlock *, BBState> &BBStates) {
+  /// The visited set, for doing DFS walks.
+  SmallPtrSet<BasicBlock *, 16> Visited;
+
+  // Do DFS, computing the PostOrder.
+  SmallPtrSet<BasicBlock *, 16> OnStack;
+  SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+
+  // Functions always have exactly one entry block, and we don't have
+  // any other block that we treat like an entry block.
+  BasicBlock *EntryBB = &F.getEntryBlock();
+  BBState &MyStates = BBStates[EntryBB];
+  MyStates.SetAsEntry();
+  TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+  SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
+  Visited.insert(EntryBB);
+  OnStack.insert(EntryBB);
+  do {
+  dfs_next_succ:
+    BasicBlock *CurrBB = SuccStack.back().first;
+    TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
+    succ_iterator SE(TI, false);
+
+    while (SuccStack.back().second != SE) {
+      BasicBlock *SuccBB = *SuccStack.back().second++;
+      if (Visited.insert(SuccBB)) {
+        TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
+        SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+        BBStates[CurrBB].addSucc(SuccBB);
+        BBState &SuccStates = BBStates[SuccBB];
+        SuccStates.addPred(CurrBB);
+        OnStack.insert(SuccBB);
+        goto dfs_next_succ;
+      }
+
+      if (!OnStack.count(SuccBB)) {
+        BBStates[CurrBB].addSucc(SuccBB);
+        BBStates[SuccBB].addPred(CurrBB);
+      }
+    }
+    OnStack.erase(CurrBB);
+    PostOrder.push_back(CurrBB);
+    SuccStack.pop_back();
+  } while (!SuccStack.empty());
+
+  Visited.clear();
+
+  // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+  // Functions may have many exits, and there also blocks which we treat
+  // as exits due to ignored edges.
+  SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *ExitBB = I;
+    BBState &MyStates = BBStates[ExitBB];
+    if (!MyStates.isExit())
+      continue;
+
+    MyStates.SetAsExit();
+
+    PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
+    Visited.insert(ExitBB);
+    while (!PredStack.empty()) {
+    reverse_dfs_next_succ:
+      BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
+      while (PredStack.back().second != PE) {
+        BasicBlock *BB = *PredStack.back().second++;
+        if (Visited.insert(BB)) {
+          PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
+          goto reverse_dfs_next_succ;
+        }
+      }
+      ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+    }
+  }
+}
+
+// Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+                  DenseMap<const BasicBlock *, BBState> &BBStates,
+                  MapVector<Value *, RRInfo> &Retains,
+                  DenseMap<Value *, RRInfo> &Releases) {
+
+  // Use reverse-postorder traversals, because we magically know that loops
+  // will be well behaved, i.e. they won't repeatedly call retain on a single
+  // pointer without doing a release. We can't use the ReversePostOrderTraversal
+  // class here because we want the reverse-CFG postorder to consider each
+  // function exit point, and we want to ignore selected cycle edges.
+  SmallVector<BasicBlock *, 16> PostOrder;
+  SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+  ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
+                    NoObjCARCExceptionsMDKind,
+                    BBStates);
+
+  // Use reverse-postorder on the reverse CFG for bottom-up.
+  bool BottomUpNestingDetected = false;
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+       ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+       I != E; ++I)
+    BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
+
+  // Use reverse-postorder for top-down.
+  bool TopDownNestingDetected = false;
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+       PostOrder.rbegin(), E = PostOrder.rend();
+       I != E; ++I)
+    TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
+
+  return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+                           RRInfo &RetainsToMove,
+                           RRInfo &ReleasesToMove,
+                           MapVector<Value *, RRInfo> &Retains,
+                           DenseMap<Value *, RRInfo> &Releases,
+                           SmallVectorImpl<Instruction *> &DeadInsts,
+                           Module *M) {
+  Type *ArgTy = Arg->getType();
+  Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
+
+  // Insert the new retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = ReleasesToMove.ReverseInsertPts.begin(),
+       PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call =
+      CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt);
+    Call->setDoesNotThrow();
+    Call->setTailCall();
+
+    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
+                 << "\n"
+                    "                       At insertion point: " << *InsertPt
+                 << "\n");
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = RetainsToMove.ReverseInsertPts.begin(),
+       PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+                                      "", InsertPt);
+    // Attach a clang.imprecise_release metadata tag, if appropriate.
+    if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+      Call->setMetadata(ImpreciseReleaseMDKind, M);
+    Call->setDoesNotThrow();
+    if (ReleasesToMove.IsTailCallRelease)
+      Call->setTailCall();
+
+    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
+                 << "\n"
+                    "                       At insertion point: " << *InsertPt
+                 << "\n");
+  }
+
+  // Delete the original retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = RetainsToMove.Calls.begin(),
+       AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRetain = *AI;
+    Retains.blot(OrigRetain);
+    DeadInsts.push_back(OrigRetain);
+    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
+                    "\n");
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = ReleasesToMove.Calls.begin(),
+       AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRelease = *AI;
+    Releases.erase(OrigRelease);
+    DeadInsts.push_back(OrigRelease);
+    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
+                 << "\n");
+  }
+}
+
+bool
+ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
+                                    &BBStates,
+                                  MapVector<Value *, RRInfo> &Retains,
+                                  DenseMap<Value *, RRInfo> &Releases,
+                                  Module *M,
+                                  SmallVector<Instruction *, 4> &NewRetains,
+                                  SmallVector<Instruction *, 4> &NewReleases,
+                                  SmallVector<Instruction *, 8> &DeadInsts,
+                                  RRInfo &RetainsToMove,
+                                  RRInfo &ReleasesToMove,
+                                  Value *Arg,
+                                  bool KnownSafe,
+                                  bool &AnyPairsCompletelyEliminated) {
+  // If a pair happens in a region where it is known that the reference count
+  // is already incremented, we can similarly ignore possible decrements.
+  bool KnownSafeTD = true, KnownSafeBU = true;
+
+  // Connect the dots between the top-down-collected RetainsToMove and
+  // bottom-up-collected ReleasesToMove to form sets of related calls.
+  // This is an iterative process so that we connect multiple releases
+  // to multiple retains if needed.
+  unsigned OldDelta = 0;
+  unsigned NewDelta = 0;
+  unsigned OldCount = 0;
+  unsigned NewCount = 0;
+  bool FirstRelease = true;
+  for (;;) {
+    for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+      Instruction *NewRetain = *NI;
+      MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+      assert(It != Retains.end());
+      const RRInfo &NewRetainRRI = It->second;
+      KnownSafeTD &= NewRetainRRI.KnownSafe;
+      for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewRetainRRI.Calls.begin(),
+             LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+        Instruction *NewRetainRelease = *LI;
+        DenseMap<Value *, RRInfo>::const_iterator Jt =
+          Releases.find(NewRetainRelease);
+        if (Jt == Releases.end())
+          return false;
+        const RRInfo &NewRetainReleaseRRI = Jt->second;
+        assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+        if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+          OldDelta -=
+            BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+          // Merge the ReleaseMetadata and IsTailCallRelease values.
+          if (FirstRelease) {
+            ReleasesToMove.ReleaseMetadata =
+              NewRetainReleaseRRI.ReleaseMetadata;
+            ReleasesToMove.IsTailCallRelease =
+              NewRetainReleaseRRI.IsTailCallRelease;
+            FirstRelease = false;
+          } else {
+            if (ReleasesToMove.ReleaseMetadata !=
+                NewRetainReleaseRRI.ReleaseMetadata)
+              ReleasesToMove.ReleaseMetadata = 0;
+            if (ReleasesToMove.IsTailCallRelease !=
+                NewRetainReleaseRRI.IsTailCallRelease)
+              ReleasesToMove.IsTailCallRelease = false;
+          }
+
+          // Collect the optimal insertion points.
+          if (!KnownSafe)
+            for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+                   RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+                 RI != RE; ++RI) {
+              Instruction *RIP = *RI;
+              if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+                NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+            }
+          NewReleases.push_back(NewRetainRelease);
+        }
+      }
+    }
+    NewRetains.clear();
+    if (NewReleases.empty()) break;
+
+    // Back the other way.
+    for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+      Instruction *NewRelease = *NI;
+      DenseMap<Value *, RRInfo>::const_iterator It =
+        Releases.find(NewRelease);
+      assert(It != Releases.end());
+      const RRInfo &NewReleaseRRI = It->second;
+      KnownSafeBU &= NewReleaseRRI.KnownSafe;
+      for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewReleaseRRI.Calls.begin(),
+             LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+        Instruction *NewReleaseRetain = *LI;
+        MapVector<Value *, RRInfo>::const_iterator Jt =
+          Retains.find(NewReleaseRetain);
+        if (Jt == Retains.end())
+          return false;
+        const RRInfo &NewReleaseRetainRRI = Jt->second;
+        assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+        if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+          unsigned PathCount =
+            BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+          OldDelta += PathCount;
+          OldCount += PathCount;
+
+          // Collect the optimal insertion points.
+          if (!KnownSafe)
+            for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+                   RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+                 RI != RE; ++RI) {
+              Instruction *RIP = *RI;
+              if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+                PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+                NewDelta += PathCount;
+                NewCount += PathCount;
+              }
+            }
+          NewRetains.push_back(NewReleaseRetain);
+        }
+      }
+    }
+    NewReleases.clear();
+    if (NewRetains.empty()) break;
+  }
+
+  // If the pointer is known incremented or nested, we can safely delete the
+  // pair regardless of what's between them.
+  if (KnownSafeTD || KnownSafeBU) {
+    RetainsToMove.ReverseInsertPts.clear();
+    ReleasesToMove.ReverseInsertPts.clear();
+    NewCount = 0;
+  } else {
+    // Determine whether the new insertion points we computed preserve the
+    // balance of retain and release calls through the program.
+    // TODO: If the fully aggressive solution isn't valid, try to find a
+    // less aggressive solution which is.
+    if (NewDelta != 0)
+      return false;
+  }
+
+  // Determine whether the original call points are balanced in the retain and
+  // release calls through the program. If not, conservatively don't touch
+  // them.
+  // TODO: It's theoretically possible to do code motion in this case, as
+  // long as the existing imbalances are maintained.
+  if (OldDelta != 0)
+    return false;
+
+  Changed = true;
+  assert(OldCount != 0 && "Unreachable code?");
+  NumRRs += OldCount - NewCount;
+  // Set to true if we completely removed any RR pairs.
+  AnyPairsCompletelyEliminated = NewCount == 0;
+
+  // We can move calls!
+  return true;
+}
+
+/// Identify pairings between the retains and releases, and delete and/or move
+/// them.
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+                                   &BBStates,
+                                 MapVector<Value *, RRInfo> &Retains,
+                                 DenseMap<Value *, RRInfo> &Releases,
+                                 Module *M) {
+  bool AnyPairsCompletelyEliminated = false;
+  RRInfo RetainsToMove;
+  RRInfo ReleasesToMove;
+  SmallVector<Instruction *, 4> NewRetains;
+  SmallVector<Instruction *, 4> NewReleases;
+  SmallVector<Instruction *, 8> DeadInsts;
+
+  // Visit each retain.
+  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+       E = Retains.end(); I != E; ++I) {
+    Value *V = I->first;
+    if (!V) continue; // blotted
+
+    Instruction *Retain = cast<Instruction>(V);
+
+    DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
+          << "\n");
+
+    Value *Arg = GetObjCArg(Retain);
+
+    // If the object being released is in static or stack storage, we know it's
+    // not being managed by ObjC reference counting, so we can delete pairs
+    // regardless of what possible decrements or uses lie between them.
+    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+    // A constant pointer can't be pointing to an object on the heap. It may
+    // be reference-counted, but it won't be deleted.
+    if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+      if (const GlobalVariable *GV =
+            dyn_cast<GlobalVariable>(
+              StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+        if (GV->isConstant())
+          KnownSafe = true;
+
+    // Connect the dots between the top-down-collected RetainsToMove and
+    // bottom-up-collected ReleasesToMove to form sets of related calls.
+    NewRetains.push_back(Retain);
+    bool PerformMoveCalls =
+      ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains,
+                            NewReleases, DeadInsts, RetainsToMove,
+                            ReleasesToMove, Arg, KnownSafe,
+                            AnyPairsCompletelyEliminated);
+
+#ifdef ARC_ANNOTATIONS
+    // Do not move calls if ARC annotations are requested. If we were to move
+    // calls in this case, we would not be able
+    PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
+#endif // ARC_ANNOTATIONS
+
+    if (PerformMoveCalls) {
+      // Ok, everything checks out and we're all set. Let's move/delete some
+      // code!
+      MoveCalls(Arg, RetainsToMove, ReleasesToMove,
+                Retains, Releases, DeadInsts, M);
+    }
+
+    // Clean up state for next retain.
+    NewReleases.clear();
+    NewRetains.clear();
+    RetainsToMove.clear();
+    ReleasesToMove.clear();
+  }
+
+  // Now that we're done moving everything, we can delete the newly dead
+  // instructions, as we no longer need them as insert points.
+  while (!DeadInsts.empty())
+    EraseInstruction(DeadInsts.pop_back_val());
+
+  return AnyPairsCompletelyEliminated;
+}
+
+/// Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+  // itself because it uses AliasAnalysis and we need to do provenance
+  // queries instead.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
+          "\n");
+
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+      continue;
+
+    // Delete objc_loadWeak calls with no users.
+    if (Class == IC_LoadWeak && Inst->use_empty()) {
+      Inst->eraseFromParent();
+      continue;
+    }
+
+    // TODO: For now, just look for an earlier available version of this value
+    // within the same block. Theoretically, we could do memdep-style non-local
+    // analysis too, but that would want caching. A better approach would be to
+    // use the technique that EarlyCSE uses.
+    inst_iterator Current = llvm::prior(I);
+    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+    for (BasicBlock::iterator B = CurrentBB->begin(),
+                              J = Current.getInstructionIterator();
+         J != B; --J) {
+      Instruction *EarlierInst = &*llvm::prior(J);
+      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+      switch (EarlierClass) {
+      case IC_LoadWeak:
+      case IC_LoadWeakRetained: {
+        // If this is loading from the same pointer, replace this load's value
+        // with that one.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall);
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_StoreWeak:
+      case IC_InitWeak: {
+        // If this is storing to the same pointer and has the same size etc.
+        // replace this load's value with the stored value.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_MoveWeak:
+      case IC_CopyWeak:
+        // TOOD: Grab the copied value.
+        goto clobbered;
+      case IC_AutoreleasepoolPush:
+      case IC_None:
+      case IC_IntrinsicUser:
+      case IC_User:
+        // Weak pointers are only modified through the weak entry points
+        // (and arbitrary calls, which could call the weak entry points).
+        break;
+      default:
+        // Anything else could modify the weak pointer.
+        goto clobbered;
+      }
+    }
+  clobbered:;
+  }
+
+  // Then, for each destroyWeak with an alloca operand, check to see if
+  // the alloca and all its users can be zapped.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_DestroyWeak)
+      continue;
+
+    CallInst *Call = cast<CallInst>(Inst);
+    Value *Arg = Call->getArgOperand(0);
+    if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ++UI) {
+        const Instruction *UserInst = cast<Instruction>(*UI);
+        switch (GetBasicInstructionClass(UserInst)) {
+        case IC_InitWeak:
+        case IC_StoreWeak:
+        case IC_DestroyWeak:
+          continue;
+        default:
+          goto done;
+        }
+      }
+      Changed = true;
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ) {
+        CallInst *UserInst = cast<CallInst>(*UI++);
+        switch (GetBasicInstructionClass(UserInst)) {
+        case IC_InitWeak:
+        case IC_StoreWeak:
+          // These functions return their second argument.
+          UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
+          break;
+        case IC_DestroyWeak:
+          // No return value.
+          break;
+        default:
+          llvm_unreachable("alloca really is used!");
+        }
+        UserInst->eraseFromParent();
+      }
+      Alloca->eraseFromParent();
+    done:;
+    }
+  }
+
+  DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
+
+}
+
+/// Identify program paths which execute sequences of retains and releases which
+/// can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+  /// Releases, Retains - These are used to store the results of the main flow
+  /// analysis. These use Value* as the key instead of Instruction* so that the
+  /// map stays valid when we get around to rewriting code and calls get
+  /// replaced by arguments.
+  DenseMap<Value *, RRInfo> Releases;
+  MapVector<Value *, RRInfo> Retains;
+
+  /// This is used during the traversal of the function to track the
+  /// states for each identified object at each block.
+  DenseMap<const BasicBlock *, BBState> BBStates;
+
+  // Analyze the CFG of the function, and all instructions.
+  bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+  // Transform.
+  return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
+         NestingDetected;
+}
+
+/// Check if there is a dependent call earlier that does not have anything in
+/// between the Retain and the call that can affect the reference count of their
+/// shared pointer argument. Note that Retain need not be in BB.
+static bool
+HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
+                             SmallPtrSet<Instruction *, 4> &DepInsts,
+                             SmallPtrSet<const BasicBlock *, 4> &Visited,
+                             ProvenanceAnalysis &PA) {
+  FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
+                   DepInsts, Visited, PA);
+  if (DepInsts.size() != 1)
+    return false;
+
+  CallInst *Call =
+    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+
+  // Check that the pointer is the return value of the call.
+  if (!Call || Arg != Call)
+    return false;
+
+  // Check that the call is a regular call.
+  InstructionClass Class = GetBasicInstructionClass(Call);
+  if (Class != IC_CallOrUser && Class != IC_Call)
+    return false;
+
+  return true;
+}
+
+/// Find a dependent retain that precedes the given autorelease for which there
+/// is nothing in between the two instructions that can affect the ref count of
+/// Arg.
+static CallInst *
+FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
+                                  Instruction *Autorelease,
+                                  SmallPtrSet<Instruction *, 4> &DepInsts,
+                                  SmallPtrSet<const BasicBlock *, 4> &Visited,
+                                  ProvenanceAnalysis &PA) {
+  FindDependencies(CanChangeRetainCount, Arg,
+                   BB, Autorelease, DepInsts, Visited, PA);
+  if (DepInsts.size() != 1)
+    return 0;
+  
+  CallInst *Retain =
+    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+  
+  // Check that we found a retain with the same argument.
+  if (!Retain ||
+      !IsRetain(GetBasicInstructionClass(Retain)) ||
+      GetObjCArg(Retain) != Arg) {
+    return 0;
+  }
+  
+  return Retain;
+}
+
+/// Look for an ``autorelease'' instruction dependent on Arg such that there are
+/// no instructions dependent on Arg that need a positive ref count in between
+/// the autorelease and the ret.
+static CallInst *
+FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
+                                       ReturnInst *Ret,
+                                       SmallPtrSet<Instruction *, 4> &DepInsts,
+                                       SmallPtrSet<const BasicBlock *, 4> &V,
+                                       ProvenanceAnalysis &PA) {
+  FindDependencies(NeedsPositiveRetainCount, Arg,
+                   BB, Ret, DepInsts, V, PA);
+  if (DepInsts.size() != 1)
+    return 0;
+  
+  CallInst *Autorelease =
+    dyn_cast_or_null<CallInst>(*DepInsts.begin());
+  if (!Autorelease)
+    return 0;
+  InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+  if (!IsAutorelease(AutoreleaseClass))
+    return 0;
+  if (GetObjCArg(Autorelease) != Arg)
+    return 0;
+  
+  return Autorelease;
+}
+
+/// Look for this pattern:
+/// \code
+///    %call = call i8* @something(...)
+///    %2 = call i8* @objc_retain(i8* %call)
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+/// \endcode
+/// And delete the retain and autorelease.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+  if (!F.getReturnType()->isPointerTy())
+    return;
+
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    BasicBlock *BB = FI;
+    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+
+    DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+
+    if (!Ret)
+      continue;
+    
+    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+    
+    // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+    // dependent on Arg such that there are no instructions dependent on Arg
+    // that need a positive ref count in between the autorelease and Ret.
+    CallInst *Autorelease =
+      FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
+                                             DependingInstructions, Visited,
+                                             PA);
+    if (Autorelease) {
+      DependingInstructions.clear();
+      Visited.clear();
+      
+      CallInst *Retain =
+        FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+                                          DependingInstructions, Visited, PA);
+      if (Retain) {
+        DependingInstructions.clear();
+        Visited.clear();
+        
+        // Check that there is nothing that can affect the reference count
+        // between the retain and the call.  Note that Retain need not be in BB.
+        if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
+                                         Visited, PA)) {
+          // If so, we can zap the retain and autorelease.
+          Changed = true;
+          ++NumRets;
+          DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
+                       << "\n                             Erasing: "
+                       << *Autorelease << "\n");
+          EraseInstruction(Retain);
+          EraseInstruction(Autorelease);
+        }
+      }
+    }
+    
+    DependingInstructions.clear();
+    Visited.clear();
+  }
+
+  DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // Identify the imprecise release metadata kind.
+  ImpreciseReleaseMDKind =
+    M.getContext().getMDKindID("clang.imprecise_release");
+  CopyOnEscapeMDKind =
+    M.getContext().getMDKindID("clang.arc.copy_on_escape");
+  NoObjCARCExceptionsMDKind =
+    M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+#ifdef ARC_ANNOTATIONS
+  ARCAnnotationBottomUpMDKind =
+    M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
+  ARCAnnotationTopDownMDKind =
+    M.getContext().getMDKindID("llvm.arc.annotation.topdown");
+  ARCAnnotationProvenanceSourceMDKind =
+    M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
+#endif // ARC_ANNOTATIONS
+
+  // Intuitively, objc_retain and others are nocapture, however in practice
+  // they are not, because they return their argument value. And objc_release
+  // calls finalizers which can have arbitrary side effects.
+
+  // These are initialized lazily.
+  RetainRVCallee = 0;
+  AutoreleaseRVCallee = 0;
+  ReleaseCallee = 0;
+  RetainCallee = 0;
+  RetainBlockCallee = 0;
+  AutoreleaseCallee = 0;
+
+  return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+
+  DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // This pass performs several distinct transformations. As a compile-time aid
+  // when compiling code that isn't ObjC, skip these if the relevant ObjC
+  // library functions aren't declared.
+
+  // Preliminary optimizations. This also computs UsedInThisFunction.
+  OptimizeIndividualCalls(F);
+
+  // Optimizations for weak pointers.
+  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+                            (1 << IC_LoadWeakRetained) |
+                            (1 << IC_StoreWeak) |
+                            (1 << IC_InitWeak) |
+                            (1 << IC_CopyWeak) |
+                            (1 << IC_MoveWeak) |
+                            (1 << IC_DestroyWeak)))
+    OptimizeWeakCalls(F);
+
+  // Optimizations for retain+release pairs.
+  if (UsedInThisFunction & ((1 << IC_Retain) |
+                            (1 << IC_RetainRV) |
+                            (1 << IC_RetainBlock)))
+    if (UsedInThisFunction & (1 << IC_Release))
+      // Run OptimizeSequences until it either stops making changes or
+      // no retain+release pair nesting is detected.
+      while (OptimizeSequences(F)) {}
+
+  // Optimizations if objc_autorelease is used.
+  if (UsedInThisFunction & ((1 << IC_Autorelease) |
+                            (1 << IC_AutoreleaseRV)))
+    OptimizeReturns(F);
+
+  DEBUG(dbgs() << "\n");
+
+  return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+  PA.clear();
+}
+
+/// @}
+///
diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
new file mode 100644
index 000000000000..03e12d4fd763
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -0,0 +1,252 @@
+//===- ObjCARCUtil.cpp - ObjC ARC Optimization --------*- mode: c++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+                                       const InstructionClass Class) {
+  switch (Class) {
+  case IC_Retain:
+    return OS << "IC_Retain";
+  case IC_RetainRV:
+    return OS << "IC_RetainRV";
+  case IC_RetainBlock:
+    return OS << "IC_RetainBlock";
+  case IC_Release:
+    return OS << "IC_Release";
+  case IC_Autorelease:
+    return OS << "IC_Autorelease";
+  case IC_AutoreleaseRV:
+    return OS << "IC_AutoreleaseRV";
+  case IC_AutoreleasepoolPush:
+    return OS << "IC_AutoreleasepoolPush";
+  case IC_AutoreleasepoolPop:
+    return OS << "IC_AutoreleasepoolPop";
+  case IC_NoopCast:
+    return OS << "IC_NoopCast";
+  case IC_FusedRetainAutorelease:
+    return OS << "IC_FusedRetainAutorelease";
+  case IC_FusedRetainAutoreleaseRV:
+    return OS << "IC_FusedRetainAutoreleaseRV";
+  case IC_LoadWeakRetained:
+    return OS << "IC_LoadWeakRetained";
+  case IC_StoreWeak:
+    return OS << "IC_StoreWeak";
+  case IC_InitWeak:
+    return OS << "IC_InitWeak";
+  case IC_LoadWeak:
+    return OS << "IC_LoadWeak";
+  case IC_MoveWeak:
+    return OS << "IC_MoveWeak";
+  case IC_CopyWeak:
+    return OS << "IC_CopyWeak";
+  case IC_DestroyWeak:
+    return OS << "IC_DestroyWeak";
+  case IC_StoreStrong:
+    return OS << "IC_StoreStrong";
+  case IC_CallOrUser:
+    return OS << "IC_CallOrUser";
+  case IC_Call:
+    return OS << "IC_Call";
+  case IC_User:
+    return OS << "IC_User";
+  case IC_IntrinsicUser:
+    return OS << "IC_IntrinsicUser";
+  case IC_None:
+    return OS << "IC_None";
+  }
+  llvm_unreachable("Unknown instruction class!");
+}
+
+InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
+  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+  // No (mandatory) arguments.
+  if (AI == AE)
+    return StringSwitch<InstructionClass>(F->getName())
+      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
+      .Case("clang.arc.use", IC_IntrinsicUser)
+      .Default(IC_CallOrUser);
+
+  // One argument.
+  const Argument *A0 = AI++;
+  if (AI == AE)
+    // Argument is a pointer.
+    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+      Type *ETy = PTy->getElementType();
+      // Argument is i8*.
+      if (ETy->isIntegerTy(8))
+        return StringSwitch<InstructionClass>(F->getName())
+          .Case("objc_retain",                IC_Retain)
+          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+          .Case("objc_retainBlock",           IC_RetainBlock)
+          .Case("objc_release",               IC_Release)
+          .Case("objc_autorelease",           IC_Autorelease)
+          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
+          .Case("objc_retainedObject",        IC_NoopCast)
+          .Case("objc_unretainedObject",      IC_NoopCast)
+          .Case("objc_unretainedPointer",     IC_NoopCast)
+          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
+          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
+          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+          .Default(IC_CallOrUser);
+
+      // Argument is i8**
+      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+        if (Pte->getElementType()->isIntegerTy(8))
+          return StringSwitch<InstructionClass>(F->getName())
+            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
+            .Case("objc_loadWeak",              IC_LoadWeak)
+            .Case("objc_destroyWeak",           IC_DestroyWeak)
+            .Default(IC_CallOrUser);
+    }
+
+  // Two arguments, first is i8**.
+  const Argument *A1 = AI++;
+  if (AI == AE)
+    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+        if (Pte->getElementType()->isIntegerTy(8))
+          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+            Type *ETy1 = PTy1->getElementType();
+            // Second argument is i8*
+            if (ETy1->isIntegerTy(8))
+              return StringSwitch<InstructionClass>(F->getName())
+                .Case("objc_storeWeak",             IC_StoreWeak)
+                .Case("objc_initWeak",              IC_InitWeak)
+                .Case("objc_storeStrong",           IC_StoreStrong)
+                .Default(IC_CallOrUser);
+            // Second argument is i8**.
+            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+              if (Pte1->getElementType()->isIntegerTy(8))
+                return StringSwitch<InstructionClass>(F->getName())
+                  .Case("objc_moveWeak",              IC_MoveWeak)
+                  .Case("objc_copyWeak",              IC_CopyWeak)
+                  // Ignore annotation calls. This is important to stop the
+                  // optimizer from treating annotations as uses which would
+                  // make the state of the pointers they are attempting to
+                  // elucidate to be incorrect.
+                  .Case("llvm.arc.annotation.topdown.bbstart", IC_None)
+                  .Case("llvm.arc.annotation.topdown.bbend", IC_None)
+                  .Case("llvm.arc.annotation.bottomup.bbstart", IC_None)
+                  .Case("llvm.arc.annotation.bottomup.bbend", IC_None)
+                  .Default(IC_CallOrUser);
+          }
+
+  // Anything else.
+  return IC_CallOrUser;
+}
+
+/// \brief Determine what kind of construct V is.
+InstructionClass
+llvm::objcarc::GetInstructionClass(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Any instruction other than bitcast and gep with a pointer operand have a
+    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+    // to a subsequent use, rather than using it themselves, in this sense.
+    // As a short cut, several other opcodes are known to have no pointer
+    // operands of interest. And ret is never followed by a release, so it's
+    // not interesting to examine.
+    switch (I->getOpcode()) {
+    case Instruction::Call: {
+      const CallInst *CI = cast<CallInst>(I);
+      // Check for calls to special functions.
+      if (const Function *F = CI->getCalledFunction()) {
+        InstructionClass Class = GetFunctionClass(F);
+        if (Class != IC_CallOrUser)
+          return Class;
+
+        // None of the intrinsic functions do objc_release. For intrinsics, the
+        // only question is whether or not they may be users.
+        switch (F->getIntrinsicID()) {
+        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+        case Intrinsic::stacksave: case Intrinsic::stackrestore:
+        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+        case Intrinsic::objectsize: case Intrinsic::prefetch:
+        case Intrinsic::stackprotector:
+        case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
+        case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
+        case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
+        case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
+        case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+        case Intrinsic::invariant_start: case Intrinsic::invariant_end:
+        // Don't let dbg info affect our results.
+        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+          // Short cut: Some intrinsics obviously don't use ObjC pointers.
+          return IC_None;
+        default:
+          break;
+        }
+      }
+      return GetCallSiteClass(CI);
+    }
+    case Instruction::Invoke:
+      return GetCallSiteClass(cast<InvokeInst>(I));
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::Select: case Instruction::PHI:
+    case Instruction::Ret: case Instruction::Br:
+    case Instruction::Switch: case Instruction::IndirectBr:
+    case Instruction::Alloca: case Instruction::VAArg:
+    case Instruction::Add: case Instruction::FAdd:
+    case Instruction::Sub: case Instruction::FSub:
+    case Instruction::Mul: case Instruction::FMul:
+    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+    case Instruction::And: case Instruction::Or: case Instruction::Xor:
+    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+    case Instruction::IntToPtr: case Instruction::FCmp:
+    case Instruction::FPTrunc: case Instruction::FPExt:
+    case Instruction::FPToUI: case Instruction::FPToSI:
+    case Instruction::UIToFP: case Instruction::SIToFP:
+    case Instruction::InsertElement: case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+      break;
+    case Instruction::ICmp:
+      // Comparing a pointer with null, or any other constant, isn't an
+      // interesting use, because we don't care what the pointer points to, or
+      // about the values of any other dynamic reference-counted pointers.
+      if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+        return IC_User;
+      break;
+    default:
+      // For anything else, check all the operands.
+      // Note that this includes both operands of a Store: while the first
+      // operand isn't actually being dereferenced, it is being stored to
+      // memory where we can no longer track who might read it and dereference
+      // it, so we have to consider it potentially used.
+      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+           OI != OE; ++OI)
+        if (IsPotentialRetainableObjPtr(*OI))
+          return IC_User;
+    }
+  }
+
+  // Otherwise, it's totally inert for ARC purposes.
+  return IC_None;
+}
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
new file mode 100644
index 000000000000..ae3c6282cf83
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -0,0 +1,177 @@
+//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a special form of Alias Analysis called ``Provenance
+/// Analysis''. The word ``provenance'' refers to the history of the ownership
+/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to
+/// use various techniques to determine if locally
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
+                                       const Value *B) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for relations between the values on corresponding arms.
+  if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+    if (A->getCondition() == SB->getCondition())
+      return related(A->getTrueValue(), SB->getTrueValue()) ||
+             related(A->getFalseValue(), SB->getFalseValue());
+
+  // Check both arms of the Select node individually.
+  return related(A->getTrueValue(), B) ||
+         related(A->getFalseValue(), B);
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
+                                    const Value *B) {
+  // If the values are PHIs in the same block, we can do a more precise as well
+  // as efficient check: just check for relations between the values on
+  // corresponding edges.
+  if (const PHINode *PNB = dyn_cast<PHINode>(B))
+    if (PNB->getParent() == A->getParent()) {
+      for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+        if (related(A->getIncomingValue(i),
+                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+          return true;
+      return false;
+    }
+
+  // Check each unique source of the PHI node against B.
+  SmallPtrSet<const Value *, 4> UniqueSrc;
+  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+    const Value *PV1 = A->getIncomingValue(i);
+    if (UniqueSrc.insert(PV1) && related(PV1, B))
+      return true;
+  }
+
+  // All of the arms checked out.
+  return false;
+}
+
+/// Test if the value of P, or any value covered by its provenance, is ever
+/// stored within the function (not counting callees).
+static bool IsStoredObjCPointer(const Value *P) {
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Worklist;
+  Worklist.push_back(P);
+  Visited.insert(P);
+  do {
+    P = Worklist.pop_back_val();
+    for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+         UI != UE; ++UI) {
+      const User *Ur = *UI;
+      if (isa<StoreInst>(Ur)) {
+        if (UI.getOperandNo() == 0)
+          // The pointer is stored.
+          return true;
+        // The pointed is stored through.
+        continue;
+      }
+      if (isa<CallInst>(Ur))
+        // The pointer is passed as an argument, ignore this.
+        continue;
+      if (isa<PtrToIntInst>(P))
+        // Assume the worst.
+        return true;
+      if (Visited.insert(Ur))
+        Worklist.push_back(Ur);
+    }
+  } while (!Worklist.empty());
+
+  // Everything checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A,
+                                      const Value *B) {
+  // Skip past provenance pass-throughs.
+  A = GetUnderlyingObjCPtr(A);
+  B = GetUnderlyingObjCPtr(B);
+
+  // Quick check.
+  if (A == B)
+    return true;
+
+  // Ask regular AliasAnalysis, for a first approximation.
+  switch (AA->alias(A, B)) {
+  case AliasAnalysis::NoAlias:
+    return false;
+  case AliasAnalysis::MustAlias:
+  case AliasAnalysis::PartialAlias:
+    return true;
+  case AliasAnalysis::MayAlias:
+    break;
+  }
+
+  bool AIsIdentified = IsObjCIdentifiedObject(A);
+  bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+  // An ObjC-Identified object can't alias a load if it is never locally stored.
+  if (AIsIdentified) {
+    // Check for an obvious escape.
+    if (isa<LoadInst>(B))
+      return IsStoredObjCPointer(A);
+    if (BIsIdentified) {
+      // Check for an obvious escape.
+      if (isa<LoadInst>(A))
+        return IsStoredObjCPointer(B);
+      // Both pointers are identified and escapes aren't an evident problem.
+      return false;
+    }
+  } else if (BIsIdentified) {
+    // Check for an obvious escape.
+    if (isa<LoadInst>(A))
+      return IsStoredObjCPointer(B);
+  }
+
+   // Special handling for PHI and Select.
+  if (const PHINode *PN = dyn_cast<PHINode>(A))
+    return relatedPHI(PN, B);
+  if (const PHINode *PN = dyn_cast<PHINode>(B))
+    return relatedPHI(PN, A);
+  if (const SelectInst *S = dyn_cast<SelectInst>(A))
+    return relatedSelect(S, B);
+  if (const SelectInst *S = dyn_cast<SelectInst>(B))
+    return relatedSelect(S, A);
+
+  // Conservative.
+  return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A,
+                                 const Value *B) {
+  // Begin by inserting a conservative value into the map. If the insertion
+  // fails, we have the answer already. If it succeeds, leave it there until we
+  // compute the real answer to guard against recursive queries.
+  if (A > B) std::swap(A, B);
+  std::pair<CachedResultsTy::iterator, bool> Pair =
+    CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  bool Result = relatedCheck(A, B);
+  CachedResults[ValuePairTy(A, B)] = Result;
+  return Result;
+}
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
new file mode 100644
index 000000000000..ec449fd8e747
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -0,0 +1,80 @@
+//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares a special form of Alias Analysis called ``Provenance
+/// Analysis''. The word ``provenance'' refers to the history of the ownership
+/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to
+/// use various techniques to determine if locally
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  class Value;
+  class AliasAnalysis;
+  class PHINode;
+  class SelectInst;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief This is similar to BasicAliasAnalysis, and it uses many of the same
+/// techniques, except it uses special ObjC-specific reasoning about pointer
+/// relationships.
+///
+/// In this context ``Provenance'' is defined as the history of an object's
+/// ownership. Thus ``Provenance Analysis'' is defined by using the notion of
+/// an ``independent provenance source'' of a pointer to determine whether or
+/// not two pointers have the same provenance source and thus could
+/// potentially be related.
+class ProvenanceAnalysis {
+  AliasAnalysis *AA;
+
+  typedef std::pair<const Value *, const Value *> ValuePairTy;
+  typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+  CachedResultsTy CachedResults;
+
+  bool relatedCheck(const Value *A, const Value *B);
+  bool relatedSelect(const SelectInst *A, const Value *B);
+  bool relatedPHI(const PHINode *A, const Value *B);
+
+  void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+  ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+
+public:
+  ProvenanceAnalysis() {}
+
+  void setAA(AliasAnalysis *aa) { AA = aa; }
+
+  AliasAnalysis *getAA() const { return AA; }
+
+  bool related(const Value *A, const Value *B);
+
+  void clear() {
+    CachedResults.clear();
+  }
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index b344952cc5fc..a09730864051 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -16,16 +16,16 @@
 
 #define DEBUG_TYPE "adce"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
 using namespace llvm;
 
 STATISTIC(NumRemoved, "Number of instructions removed");
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index cee550265622..e755008808f6 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -27,12 +27,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "block-placement"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Scalar.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b3fc6e338c00..fd55e082ac7d 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
-  ObjCARC.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 123ed0f4f3de..015fd2e6e6fc 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -15,22 +15,23 @@
 
 #define DEBUG_TYPE "codegenprepare"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -38,10 +39,8 @@
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
@@ -106,6 +105,8 @@ namespace {
       }
     bool runOnFunction(Function &F);
 
+    const char *getPassName() const { return "CodeGen Prepare"; }
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
@@ -125,7 +126,7 @@ namespace {
     bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
     bool OptimizeSelectInst(SelectInst *SI);
-    bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+    bool DupRetToEnableTailCallOpts(BasicBlock *BB);
     bool PlaceDbgValues(Function &F);
   };
 }
@@ -148,11 +149,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
-  OptSize = F.getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::OptimizeForSize);
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
-  if (TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
     const DenseMap<unsigned int, unsigned int> &BypassWidths =
        TLI->getBypassSlowDivWidths();
     for (Function::iterator I = F.begin(); I != F.end(); I++)
@@ -194,9 +196,20 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
           WorkList.insert(*II);
     }
 
-    for (SmallPtrSet<BasicBlock*, 8>::iterator
-           I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
-      DeleteDeadBlock(*I);
+    // Delete the dead blocks and any of their dead successors.
+    MadeChange |= !WorkList.empty();
+    while (!WorkList.empty()) {
+      BasicBlock *BB = *WorkList.begin();
+      WorkList.erase(BB);
+      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+
+      DeleteDeadBlock(BB);
+      
+      for (SmallVectorImpl<BasicBlock*>::iterator
+             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+        if (pred_begin(*II) == pred_end(*II))
+          WorkList.insert(*II);
+    }
 
     // Merge pairs of basic blocks with unconditional branches, connected by
     // a single edge.
@@ -689,10 +702,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
 ///   %tmp2 = tail call i32 @f2()
 ///   ret i32 %tmp2
 /// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
   if (!TLI)
     return false;
 
+  ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+  if (!RI)
+    return false;
+
   PHINode *PN = 0;
   BitCastInst *BCI = 0;
   Value *V = RI->getReturnValue();
@@ -706,16 +723,15 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
       return false;
   }
 
-  BasicBlock *BB = RI->getParent();
   if (PN && PN->getParent() != BB)
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
   // See llvm::isInTailCallPosition().
   const Function *F = BB->getParent();
-  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
-      CallerRetAttr.hasAttribute(Attributes::SExt))
+  AttributeSet CallerAttrs = F->getAttributes();
+  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
     return false;
 
   // Make sure there are no instructions between the PHI and return, or that the
@@ -772,11 +788,11 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
 
     // Conservatively require the attributes of the call to match those of the
     // return. Ignore noalias because it doesn't affect the call sequence.
-    Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
-    if (AttrBuilder(CalleeRetAttr).
-          removeAttribute(Attributes::NoAlias) !=
-        AttrBuilder(CallerRetAttr).
-          removeAttribute(Attributes::NoAlias))
+    AttributeSet CalleeAttrs = CS.getAttributes();
+    if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+          removeAttribute(Attribute::NoAlias) !=
+        AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+          removeAttribute(Attribute::NoAlias))
       continue;
 
     // Make sure the call instruction is followed by an unconditional branch to
@@ -803,6 +819,629 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
 // Memory Optimization
 //===----------------------------------------------------------------------===//
 
+namespace {
+
+/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+  Value *BaseReg;
+  Value *ScaledReg;
+  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+  void print(raw_ostream &OS) const;
+  void dump() const;
+  
+  bool operator==(const ExtAddrMode& O) const {
+    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+  }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+  AM.print(OS);
+  return OS;
+}
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+  bool NeedPlus = false;
+  OS << "[";
+  if (BaseGV) {
+    OS << (NeedPlus ? " + " : "")
+       << "GV:";
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  if (BaseOffs)
+    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+  if (BaseReg) {
+    OS << (NeedPlus ? " + " : "")
+       << "Base:";
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+  if (Scale) {
+    OS << (NeedPlus ? " + " : "")
+       << Scale << "*";
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  OS << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ExtAddrMode::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+#endif
+
+
+/// \brief A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+  SmallVectorImpl<Instruction*> &AddrModeInsts;
+  const TargetLowering &TLI;
+
+  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+  /// the memory instruction that we're computing this address for.
+  Type *AccessTy;
+  Instruction *MemoryInst;
+  
+  /// AddrMode - This is the addressing mode that we're building up.  This is
+  /// part of the return value of this addressing mode matching stuff.
+  ExtAddrMode &AddrMode;
+  
+  /// IgnoreProfitability - This is set to true when we should not do
+  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
+  /// always returns true.
+  bool IgnoreProfitability;
+  
+  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
+                        const TargetLowering &T, Type *AT,
+                        Instruction *MI, ExtAddrMode &AM)
+    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
+    IgnoreProfitability = false;
+  }
+public:
+  
+  /// Match - Find the maximal addressing mode that a load/store of V can fold,
+  /// give an access type of AccessTy.  This returns a list of involved
+  /// instructions in AddrModeInsts.
+  static ExtAddrMode Match(Value *V, Type *AccessTy,
+                           Instruction *MemoryInst,
+                           SmallVectorImpl<Instruction*> &AddrModeInsts,
+                           const TargetLowering &TLI) {
+    ExtAddrMode Result;
+
+    bool Success = 
+      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+                            MemoryInst, Result).MatchAddr(V, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+    return Result;
+  }
+private:
+  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+  bool MatchAddr(Value *V, unsigned Depth);
+  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
+  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+                                            ExtAddrMode &AMBefore,
+                                            ExtAddrMode &AMAfter);
+  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+};
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+                                             unsigned Depth) {
+  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+  // mode.  Just process that directly.
+  if (Scale == 1)
+    return MatchAddr(ScaleReg, Depth);
+  
+  // If the scale is 0, it takes nothing to add this.
+  if (Scale == 0)
+    return true;
+  
+  // If we already have a scale of this value, we can add to it, otherwise, we
+  // need an available scale field.
+  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+    return false;
+
+  ExtAddrMode TestAddrMode = AddrMode;
+
+  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
+  // [A+B + A*7] -> [B+A*8].
+  TestAddrMode.Scale += Scale;
+  TestAddrMode.ScaledReg = ScaleReg;
+
+  // If the new address isn't legal, bail out.
+  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+    return false;
+
+  // It was legal, so commit it.
+  AddrMode = TestAddrMode;
+  
+  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
+  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
+  // X*Scale + C*Scale to addr mode.
+  ConstantInt *CI = 0; Value *AddLHS = 0;
+  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
+      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.ScaledReg = AddLHS;
+    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+      
+    // If this addressing mode is legal, commit it and remember that we folded
+    // this instruction.
+    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+      AddrMode = TestAddrMode;
+      return true;
+    }
+  }
+
+  // Otherwise, not (x+c)*scale, just return what we have.
+  return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::BitCast:
+    // Don't touch identity bitcasts.
+    if (I->getType() == I->getOperand(0)->getType())
+      return false;
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return true;
+  case Instruction::IntToPtr:
+    // We know the input is intptr_t, so this is foldable.
+    return true;
+  case Instruction::Add:
+    return true;
+  case Instruction::Mul:
+  case Instruction::Shl:
+    // Can only handle X*C and X << C.
+    return isa<ConstantInt>(I->getOperand(1));
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode.  If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+                                               unsigned Depth) {
+  // Avoid exponential behavior on extremely deep expression trees.
+  if (Depth >= 5) return false;
+  
+  switch (Opcode) {
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return MatchAddr(AddrInst->getOperand(0), Depth);
+  case Instruction::IntToPtr:
+    // This inttoptr is a no-op if the integer type is pointer sized.
+    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+        TLI.getPointerTy())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::BitCast:
+    // BitCast is always a noop, and we can handle it as long as it is
+    // int->int or pointer->pointer (we don't want int<->fp or something).
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+        // Don't touch identity bitcasts.  These were probably put here by LSR,
+        // and we don't want to mess around with them.  Assume it knows what it
+        // is doing.
+        AddrInst->getOperand(0)->getType() != AddrInst->getType())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::Add: {
+    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(0), Depth+1))
+      return true;
+    
+    // Restore the old addr mode info.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    
+    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
+    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(1), Depth+1))
+      return true;
+    
+    // Otherwise we definitely can't merge the ADD in.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    break;
+  }
+  //case Instruction::Or:
+  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+  //break;
+  case Instruction::Mul:
+  case Instruction::Shl: {
+    // Can only handle X*C and X << C.
+    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+    if (!RHS) return false;
+    int64_t Scale = RHS->getSExtValue();
+    if (Opcode == Instruction::Shl)
+      Scale = 1LL << Scale;
+    
+    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+  }
+  case Instruction::GetElementPtr: {
+    // Scan the GEP.  We check it if it contains constant offsets and at most
+    // one variable offset.
+    int VariableOperand = -1;
+    unsigned VariableScale = 0;
+    
+    int64_t ConstantOffset = 0;
+    const DataLayout *TD = TLI.getDataLayout();
+    gep_type_iterator GTI = gep_type_begin(AddrInst);
+    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx =
+          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+        ConstantOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+          ConstantOffset += CI->getSExtValue()*TypeSize;
+        } else if (TypeSize) {  // Scales of zero don't do anything.
+          // We only allow one variable index at the moment.
+          if (VariableOperand != -1)
+            return false;
+          
+          // Remember the variable index.
+          VariableOperand = i;
+          VariableScale = TypeSize;
+        }
+      }
+    }
+    
+    // A common case is for the GEP to only do a constant offset.  In this case,
+    // just add it to the disp field and check validity.
+    if (VariableOperand == -1) {
+      AddrMode.BaseOffs += ConstantOffset;
+      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+        // Check to see if we can fold the base pointer in too.
+        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+          return true;
+      }
+      AddrMode.BaseOffs -= ConstantOffset;
+      return false;
+    }
+
+    // Save the valid addressing mode in case we can't match.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // See if the scale and offset amount is valid for this target.
+    AddrMode.BaseOffs += ConstantOffset;
+
+    // Match the base operand of the GEP.
+    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+      // If it couldn't be matched, just stuff the value in a register.
+      if (AddrMode.HasBaseReg) {
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+    }
+
+    // Match the remaining variable portion of the GEP.
+    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+                          Depth)) {
+      // If it couldn't be matched, try stuffing the base into a register
+      // instead of matching it, and retrying the match of the scale.
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      if (AddrMode.HasBaseReg)
+        return false;
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+      AddrMode.BaseOffs += ConstantOffset;
+      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+                            VariableScale, Depth)) {
+        // If even that didn't work, bail.
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+    }
+
+    return true;
+  }
+  }
+  return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode.  If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+    // Fold in immediates if legal for the target.
+    AddrMode.BaseOffs += CI->getSExtValue();
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.BaseOffs -= CI->getSExtValue();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+    // If this is a global variable, try to fold it into the addressing mode.
+    if (AddrMode.BaseGV == 0) {
+      AddrMode.BaseGV = GV;
+      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+        return true;
+      AddrMode.BaseGV = 0;
+    }
+  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // Check to see if it is possible to fold this operation.
+    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+      // Okay, it's possible to fold this.  Check to see if it is actually
+      // *profitable* to do so.  We use a simple cost model to avoid increasing
+      // register pressure too much.
+      if (I->hasOneUse() ||
+          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+        AddrModeInsts.push_back(I);
+        return true;
+      }
+      
+      // It isn't profitable to do this, roll back.
+      //cerr << "NOT FOLDING: " << *I;
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+      return true;
+  } else if (isa<ConstantPointerNull>(Addr)) {
+    // Null pointer gets folded without affecting the addressing mode.
+    return true;
+  }
+
+  // Worse case, the target should support [reg] addressing modes. :)
+  if (!AddrMode.HasBaseReg) {
+    AddrMode.HasBaseReg = true;
+    AddrMode.BaseReg = Addr;
+    // Still check for legality in case the target supports [imm] but not [i+r].
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.HasBaseReg = false;
+    AddrMode.BaseReg = 0;
+  }
+
+  // If the base register is already taken, see if we can do [r+r].
+  if (AddrMode.Scale == 0) {
+    AddrMode.Scale = 1;
+    AddrMode.ScaledReg = Addr;
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.Scale = 0;
+    AddrMode.ScaledReg = 0;
+  }
+  // Couldn't match.
+  return false;
+}
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands.  If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+                                    const TargetLowering &TLI) {
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // If this asm operand is our Value*, and if it isn't an indirect memory
+    // operand, we can't fold it!
+    if (OpInfo.CallOperandVal == OpVal &&
+        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+         !OpInfo.isIndirect))
+      return false;
+  }
+
+  return true;
+}
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+                              const TargetLowering &TLI) {
+  // If we already considered this instruction, we're done.
+  if (!ConsideredInsts.insert(I))
+    return false;
+  
+  // If this is an obviously unfoldable instruction, bail out.
+  if (!MightBeFoldableInst(I))
+    return true;
+
+  // Loop over all the uses, recursively processing them.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      unsigned opNo = UI.getOperandNo();
+      if (opNo == 0) return true; // Storing addr, not into addr.
+      MemoryUses.push_back(std::make_pair(SI, opNo));
+      continue;
+    }
+    
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      if (!IA) return true;
+      
+      // If this is a memory operand, we're cool, otherwise bail out.
+      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+        return true;
+      continue;
+    }
+    
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+                          TLI))
+      return true;
+  }
+
+  return false;
+}
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into.  If so, there is no cost to
+/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+                                                   Value *KnownLive2) {
+  // If Val is either of the known-live values, we know it is live!
+  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+    return true;
+  
+  // All values other than instructions and arguments (e.g. constants) are live.
+  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+  
+  // If Val is a constant sized alloca in the entry block, it is live, this is
+  // true because it is just a reference to the stack/frame pointer, which is
+  // live for the whole function.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+    if (AI->isStaticAlloca())
+      return true;
+  
+  // Check to see if this value is already used in the memory instruction's
+  // block.  If so, it's already live into the block at the very least, so we
+  // can reasonably fold it.
+  return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it.  However, the specified instruction has multiple
+/// uses.  Given this, it may actually increase register pressure to fold it
+/// into the load.  For example, consider this code:
+///
+///     X = ...
+///     Y = X+1
+///     use(Y)   -> nonload/store
+///     Z = Y+1
+///     load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
+/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case.  This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+                                     ExtAddrMode &AMAfter) {
+  if (IgnoreProfitability) return true;
+  
+  // AMBefore is the addressing mode before this instruction was folded into it,
+  // and AMAfter is the addressing mode after the instruction was folded.  Get
+  // the set of registers referenced by AMAfter and subtract out those
+  // referenced by AMBefore: this is the set of values which folding in this
+  // address extends the lifetime of.
+  //
+  // Note that there are only two potential values being referenced here,
+  // BaseReg and ScaleReg (global addresses are always available, as are any
+  // folded immediates).
+  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+  
+  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+  // lifetime wasn't extended by adding this instruction.
+  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    BaseReg = 0;
+  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    ScaledReg = 0;
+
+  // If folding this instruction (and it's subexprs) didn't extend any live
+  // ranges, we're ok with it.
+  if (BaseReg == 0 && ScaledReg == 0)
+    return true;
+
+  // If all uses of this instruction are ultimately load/store/inlineasm's,
+  // check to see if their addressing modes will include this instruction.  If
+  // so, we can fold it into all uses, so it doesn't matter if it has multiple
+  // uses.
+  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallPtrSet<Instruction*, 16> ConsideredInsts;
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+    return false;  // Has a non-memory, non-foldable use!
+  
+  // Now that we know that all uses of this instruction are part of a chain of
+  // computation involving only operations that could theoretically be folded
+  // into a memory use, loop over each of these uses and see if they could
+  // *actually* fold the instruction.
+  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+    Instruction *User = MemoryUses[i].first;
+    unsigned OpNo = MemoryUses[i].second;
+    
+    // Get the access type of this use.  If the use isn't a pointer, we don't
+    // know what it accesses.
+    Value *Address = User->getOperand(OpNo);
+    if (!Address->getType()->isPointerTy())
+      return false;
+    Type *AddressAccessTy =
+      cast<PointerType>(Address->getType())->getElementType();
+    
+    // Do a match against the root of this address, ignoring profitability. This
+    // will tell us if the addressing mode for the memory operation will
+    // *actually* cover the shared instruction.
+    ExtAddrMode Result;
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+                                  MemoryInst, Result);
+    Matcher.IgnoreProfitability = true;
+    bool Success = Matcher.MatchAddr(Address, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+
+    // If the match didn't cover I, then it won't be shared by it.
+    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+                  I) == MatchedAddrModeInsts.end())
+      return false;
+    
+    MatchedAddrModeInsts.clear();
+  }
+  
+  return true;
+}
+
+} // end anonymous namespace
+
 /// IsNonLocalValue - Return true if the specified values are defined in a
 /// different basic block than BB.
 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
@@ -1319,9 +1958,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
   if (CallInst *CI = dyn_cast<CallInst>(I))
     return OptimizeCallInst(CI);
 
-  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
-    return DupRetToEnableTailCallOpts(RI);
-
   if (SelectInst *SI = dyn_cast<SelectInst>(I))
     return OptimizeSelectInst(SI);
 
@@ -1339,6 +1975,8 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
   while (CurInstIterator != BB.end())
     MadeChange |= OptimizeInst(CurInstIterator++);
 
+  MadeChange |= DupRetToEnableTailCallOpts(&BB);
+
   return MadeChange;
 }
 
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 369720b3dcef..d5a96eceb993 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -20,14 +20,14 @@
 
 #define DEBUG_TYPE "constprop"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Constant.h"
-#include "llvm/Instruction.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 3ec6f3dcc31b..995782e1bc6b 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,15 +13,17 @@
 
 #define DEBUG_TYPE "correlated-value-propagation"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumPhis,      "Number of phis propagated");
@@ -97,12 +99,29 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
     Value *Incoming = P->getIncomingValue(i);
     if (isa<Constant>(Incoming)) continue;
 
-    Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
-                                         P->getIncomingBlock(i),
-                                         BB);
-    if (!C) continue;
+    Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB);
+
+    // Look if the incoming value is a select with a constant but LVI tells us
+    // that the incoming value can never be that constant. In that case replace
+    // the incoming value with the other value of the select. This often allows
+    // us to remove the select later.
+    if (!V) {
+      SelectInst *SI = dyn_cast<SelectInst>(Incoming);
+      if (!SI) continue;
+
+      Constant *C = dyn_cast<Constant>(SI->getFalseValue());
+      if (!C) continue;
+
+      if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
+                                  P->getIncomingBlock(i), BB) !=
+          LazyValueInfo::False)
+        continue;
+
+      DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
+      V = SI->getTrueValue();
+    }
 
-    P->setIncomingValue(i, C);
+    P->setIncomingValue(i, V);
     Changed = true;
   }
 
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index a2e074fae896..e8a090af40c3 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -18,12 +18,12 @@
 
 #define DEBUG_TYPE "dce"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Instruction.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 736cc05e043e..57432c7d71d8 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -17,25 +17,25 @@
 
 #define DEBUG_TYPE "dse"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 STATISTIC(NumFastStores, "Number of stores deleted");
@@ -376,10 +376,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
   // Check to see if the later store is to the entire object (either a global,
   // an alloca, or a byval argument).  If so, then it clearly overwrites any
   // other store to the same object.
-  const DataLayout &TD = *AA.getDataLayout();
+  const DataLayout *TD = AA.getDataLayout();
 
-  const Value *UO1 = GetUnderlyingObject(P1, &TD),
-              *UO2 = GetUnderlyingObject(P2, &TD);
+  const Value *UO1 = GetUnderlyingObject(P1, TD),
+              *UO2 = GetUnderlyingObject(P2, TD);
 
   // If we can't resolve the same pointers to the same object, then we can't
   // analyze them at all.
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 101009dd64c7..3c08634bfe22 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -14,18 +14,18 @@
 
 #define DEBUG_TYPE "early-cse"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <deque>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index f003e0669966..129af8d45d6f 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -17,11 +17,6 @@
 
 #define DEBUG_TYPE "gvn"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
@@ -37,11 +32,16 @@
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -849,8 +849,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
     return -1;
 
   int64_t StoreOffset = 0, LoadOffset = 0;
-  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
-  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
+  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
   if (StoreBase != LoadBase)
     return -1;
 
@@ -945,7 +945,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
   // then we should widen it!
   int64_t LoadOffs = 0;
   const Value *LoadBase =
-    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
   unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
 
   unsigned Size = MemoryDependenceAnalysis::
@@ -1526,10 +1526,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
 
-  bool isSinglePred = false;
   bool allSingleSucc = true;
   while (TmpBB->getSinglePredecessor()) {
-    isSinglePred = true;
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
       return false;
@@ -1548,28 +1546,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   assert(TmpBB);
   LoadBB = TmpBB;
 
-  // FIXME: It is extremely unclear what this loop is doing, other than
-  // artificially restricting loadpre.
-  if (isSinglePred) {
-    bool isHot = false;
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-      const AvailableValueInBlock &AV = ValuesPerBlock[i];
-      if (AV.isSimpleValue())
-        // "Hot" Instruction is in some loop (because it dominates its dep.
-        // instruction).
-        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
-          if (DT->dominates(LI, I)) {
-            isHot = true;
-            break;
-          }
-    }
-
-    // We are interested only in "hot" instructions. We don't want to do any
-    // mis-optimizations here.
-    if (!isHot)
-      return false;
-  }
-
   // Check to see how many predecessors have the loaded value fully
   // available.
   DenseMap<BasicBlock*, Value*> PredLoads;
@@ -1738,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   return true;
 }
 
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // Patch the replacement so that it is not more restrictive than the value
   // being replaced.
   BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1780,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
   }
 }
 
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
-  patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+  patchReplacementInstruction(I, Repl);
   I->replaceAllUsesWith(Repl);
 }
 
@@ -1943,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
     }
 
     // Remove it!
-    patchAndReplaceAllUsesWith(AvailableVal, L);
+    patchAndReplaceAllUsesWith(L, AvailableVal);
     if (DepLI->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(DepLI);
     markInstructionForDeletion(L);
@@ -2284,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
   }
 
   // Remove it!
-  patchAndReplaceAllUsesWith(repl, I);
+  patchAndReplaceAllUsesWith(I, repl);
   if (MD && repl->getType()->getScalarType()->isPointerTy())
     MD->invalidateCachedPointerInfo(repl);
   markInstructionForDeletion(I);
@@ -2371,8 +2347,8 @@ bool GVN::processBlock(BasicBlock *BB) {
          E = InstrsToErase.end(); I != E; ++I) {
       DEBUG(dbgs() << "GVN removed: " << **I << '\n');
       if (MD) MD->removeInstruction(*I);
-      (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
+      (*I)->eraseFromParent();
     }
     InstrsToErase.clear();
 
@@ -2389,7 +2365,7 @@ bool GVN::processBlock(BasicBlock *BB) {
 /// control flow patterns and attempts to perform simple PRE at the join point.
 bool GVN::performPRE(Function &F) {
   bool Changed = false;
-  DenseMap<BasicBlock*, Value*> predMap;
+  SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
   for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
        DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
     BasicBlock *CurrentBlock = *DI;
@@ -2445,19 +2421,22 @@ bool GVN::performPRE(Function &F) {
         if (P == CurrentBlock) {
           NumWithout = 2;
           break;
-        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
+        } else if (!DT->isReachableFromEntry(P))  {
           NumWithout = 2;
           break;
         }
 
         Value* predV = findLeader(P, ValNo);
         if (predV == 0) {
+          predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
           PREPred = P;
           ++NumWithout;
         } else if (predV == CurInst) {
+          /* CurInst dominates this predecessor. */
           NumWithout = 2;
+          break;
         } else {
-          predMap[P] = predV;
+          predMap.push_back(std::make_pair(predV, P));
           ++NumWith;
         }
       }
@@ -2504,15 +2483,14 @@ bool GVN::performPRE(Function &F) {
       // the PRE predecessor.  This is typically because of loads which
       // are not value numbered precisely.
       if (!success) {
-        delete PREInstr;
         DEBUG(verifyRemoved(PREInstr));
+        delete PREInstr;
         continue;
       }
 
       PREInstr->insertBefore(PREPred->getTerminator());
       PREInstr->setName(CurInst->getName() + ".pre");
       PREInstr->setDebugLoc(CurInst->getDebugLoc());
-      predMap[PREPred] = PREInstr;
       VN.add(PREInstr, ValNo);
       ++NumGVNPRE;
 
@@ -2520,13 +2498,14 @@ bool GVN::performPRE(Function &F) {
       addToLeaderTable(ValNo, PREInstr, PREPred);
 
       // Create a PHI to make the value available in this block.
-      pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
-      PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
+      PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
                                      CurInst->getName() + ".pre-phi",
                                      CurrentBlock->begin());
-      for (pred_iterator PI = PB; PI != PE; ++PI) {
-        BasicBlock *P = *PI;
-        Phi->addIncoming(predMap[P], P);
+      for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
+        if (Value *V = predMap[i].first)
+          Phi->addIncoming(V, predMap[i].second);
+        else
+          Phi->addIncoming(PREInstr, PREPred);
       }
 
       VN.add(Phi, ValNo);
@@ -2551,8 +2530,8 @@ bool GVN::performPRE(Function &F) {
 
       DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
       if (MD) MD->removeInstruction(CurInst);
-      CurInst->eraseFromParent();
       DEBUG(verifyRemoved(CurInst));
+      CurInst->eraseFromParent();
       Changed = true;
     }
   }
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 6301aad6106b..5d02c68a7a47 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,21 +53,28 @@
 
 #define DEBUG_TYPE "global-merge"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+                  	cl::desc("Enable global merge pass on constants"),
+                  	cl::init(false));
+
 STATISTIC(NumMerged      , "Number of globals merged");
 namespace {
   class GlobalMerge : public FunctionPass {
@@ -76,7 +83,24 @@ namespace {
     const TargetLowering *TLI;
 
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                 Module &M, bool isConst) const;
+                 Module &M, bool isConst, unsigned AddrSpace) const;
+
+    /// \brief Check if the given variable has been identified as must keep
+    /// \pre setMustKeepGlobalVariables must have been called on the Module that
+    ///      contains GV
+    bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+      return MustKeepGlobalVariables.count(GV);
+    }
+
+    /// Collect every variables marked as "used" or used in a landing pad
+    /// instruction for this Module.
+    void setMustKeepGlobalVariables(Module &M);
+
+    /// Collect every variables marked as "used"
+    void collectUsedGlobalVariables(Module &M);
+
+    /// Keep track of the GlobalVariable that must not be merged away
+    SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
@@ -87,6 +111,7 @@ namespace {
 
     virtual bool doInitialization(Module &M);
     virtual bool runOnFunction(Function &F);
+    virtual bool doFinalization(Module &M);
 
     const char *getPassName() const {
       return "Merge internal globals";
@@ -118,7 +143,7 @@ INITIALIZE_PASS(GlobalMerge, "global-merge",
 
 
 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                             Module &M, bool isConst) const {
+                          Module &M, bool isConst, unsigned AddrSpace) const {
   const DataLayout *TD = TLI->getDataLayout();
 
   // FIXME: Infer the maximum possible offset depending on the actual users
@@ -150,7 +175,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
     Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
     GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
                                                   GlobalValue::InternalLinkage,
-                                                  MergedInit, "_MergedGlobals");
+                                                  MergedInit, "_MergedGlobals",
+                                                  0, GlobalVariable::NotThreadLocal,
+                                                  AddrSpace);
     for (size_t k = i; k < j; ++k) {
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
@@ -167,12 +194,51 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   return true;
 }
 
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+  // Extract global variables from llvm.used array
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  if (!GV || !GV->hasInitializer()) return;
+
+  // Should be an array of 'i8*'.
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (InitList == 0) return;
+ 
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (const GlobalVariable *G =
+        dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+      MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+  collectUsedGlobalVariables(M);
+
+  for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+       ++IFn) {
+    for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+         IBB != IEndBB; ++IBB) {
+      // Follow the inwoke link to find the landing pad instruction
+      const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+      if (!II) continue;
+
+      const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+      // Look for globals in the clauses of the landing pad instruction
+      for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+           Idx != NumClauses; ++Idx)
+        if (const GlobalVariable *GV =
+            dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+                                     ->stripPointerCasts()))
+          MustKeepGlobalVariables.insert(GV);
+    }
+  }
+}
 
 bool GlobalMerge::doInitialization(Module &M) {
-  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+  DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+                                                        BSSGlobals;
   const DataLayout *TD = TLI->getDataLayout();
   unsigned MaxOffset = TLI->getMaximalGlobalOffset();
   bool Changed = false;
+  setMustKeepGlobalVariables(M);
 
   // Grab all non-const globals.
   for (Module::global_iterator I = M.global_begin(),
@@ -181,6 +247,11 @@ bool GlobalMerge::doInitialization(Module &M) {
     if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
       continue;
 
+    PointerType *PT = dyn_cast<PointerType>(I->getType());
+    assert(PT && "Global variable is not a pointer!");
+
+    unsigned AddressSpace = PT->getAddressSpace();
+
     // Ignore fancy-aligned globals for now.
     unsigned Alignment = TD->getPreferredAlignment(I);
     Type *Ty = I->getType()->getElementType();
@@ -192,27 +263,36 @@ bool GlobalMerge::doInitialization(Module &M) {
         I->getName().startswith(".llvm."))
       continue;
 
+    // Ignore all "required" globals:
+    if (isMustKeepGlobalVariable(I))
+      continue;
+
     if (TD->getTypeAllocSize(Ty) < MaxOffset) {
       if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
           .isBSSLocal())
-        BSSGlobals.push_back(I);
+        BSSGlobals[AddressSpace].push_back(I);
       else if (I->isConstant())
-        ConstGlobals.push_back(I);
+        ConstGlobals[AddressSpace].push_back(I);
       else
-        Globals.push_back(I);
+        Globals[AddressSpace].push_back(I);
     }
   }
 
-  if (Globals.size() > 1)
-    Changed |= doMerge(Globals, M, false);
-  if (BSSGlobals.size() > 1)
-    Changed |= doMerge(BSSGlobals, M, false);
+  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+       I = Globals.begin(), E = Globals.end(); I != E; ++I)
+    if (I->second.size() > 1)
+      Changed |= doMerge(I->second, M, false, I->first);
+
+  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+       I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
+    if (I->second.size() > 1)
+      Changed |= doMerge(I->second, M, false, I->first);
 
-  // FIXME: This currently breaks the EH processing due to way how the
-  // typeinfo detection works. We might want to detect the TIs and ignore
-  // them in the future.
-  // if (ConstGlobals.size() > 1)
-  //  Changed |= doMerge(ConstGlobals, M, true);
+  if (EnableGlobalMergeOnConst)
+    for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+         I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+      if (I->second.size() > 1)
+        Changed |= doMerge(I->second, M, true, I->first);
 
   return Changed;
 }
@@ -221,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) {
   return false;
 }
 
+bool GlobalMerge::doFinalization(Module &M) {
+  MustKeepGlobalVariables.clear();
+  return false;
+}
+
 Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
   return new GlobalMerge(tli);
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 310fd6147aa9..8e76c78f5ac3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -26,28 +26,28 @@
 
 #define DEBUG_TYPE "indvars"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumWidened     , "Number of indvars widened");
@@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         if (!SE->isLoopInvariant(ExitValue, L))
           continue;
 
+        // Computing the value outside of the loop brings no benefit if :
+        //  - it is definitely used inside the loop in a way which can not be
+        //    optimized away.
+        //  - no use outside of the loop can take advantage of hoisting the
+        //    computation out of the loop
+        if (ExitValue->getSCEVType()>=scMulExpr) {
+          unsigned NumHardInternalUses = 0;
+          unsigned NumSoftExternalUses = 0;
+          unsigned NumUses = 0;
+          for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
+               IB!=IE && NumUses<=6 ; ++IB) {
+            Instruction *UseInstr = cast<Instruction>(*IB);
+            unsigned Opc = UseInstr->getOpcode();
+            NumUses++;
+            if (L->contains(UseInstr)) {
+              if (Opc == Instruction::Call || Opc == Instruction::Ret)
+                NumHardInternalUses++;
+            } else {
+              if (Opc == Instruction::PHI) {
+                // Do not count the Phi as a use. LCSSA may have inserted
+                // plenty of trivial ones.
+                NumUses--;
+                for (Value::use_iterator PB=UseInstr->use_begin(),
+                                         PE=UseInstr->use_end();
+                     PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+                  unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
+                  if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
+                    NumSoftExternalUses++;
+                }
+                continue;
+              }
+              if (Opc != Instruction::Call && Opc != Instruction::Ret)
+                NumSoftExternalUses++;
+            }
+          }
+          if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses)
+            continue;
+        }
+
         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index e7ffa09f1767..b61c5ba56e0c 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -13,28 +13,28 @@
 
 #define DEBUG_TYPE "jump-threading"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
 STATISTIC(NumThreads, "Number of jumps threaded");
@@ -216,19 +216,24 @@ bool JumpThreading::runOnFunction(Function &F) {
 }
 
 /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+/// thread across it. Stop scanning the block when passing the threshold.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+                                             unsigned Threshold) {
   /// Ignore PHI nodes, these will be flattened when duplication happens.
   BasicBlock::const_iterator I = BB->getFirstNonPHI();
 
   // FIXME: THREADING will delete values that are just used to compute the
   // branch, so they shouldn't count against the duplication cost.
 
-
   // Sum up the cost of each instruction until we get to the terminator.  Don't
   // include the terminator because the copy won't include it.
   unsigned Size = 0;
   for (; !isa<TerminatorInst>(I); ++I) {
+
+    // Stop scanning the block if we've reached the threshold.
+    if (Size > Threshold)
+      return Size;
+
     // Debugger intrinsics don't incur code size.
     if (isa<DbgInfoIntrinsic>(I)) continue;
 
@@ -244,7 +249,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
     // as having cost of 2 total, and if they are a vector intrinsic, we model
     // them as having cost 1.
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (!isa<IntrinsicInst>(CI))
+      if (CI->hasFnAttr(Attribute::NoDuplicate))
+        // Blocks with NoDuplicate are modelled as having infinite cost, so they
+        // are never duplicated.
+        return ~0U;
+      else if (!isa<IntrinsicInst>(CI))
         Size += 3;
       else if (!CI->getType()->isVectorTy())
         Size += 1;
@@ -1337,7 +1346,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     return false;
   }
 
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
   if (JumpThreadCost > Threshold) {
     DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
           << "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -1481,7 +1490,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     return false;
   }
 
-  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
   if (DuplicationCost > Threshold) {
     DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
           << "' - Cost is too high: " << DuplicationCost << "\n");
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 4818437c243a..f94cd2a073ef 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -32,27 +32,28 @@
 
 #define DEBUG_TYPE "licm"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -90,6 +91,8 @@ namespace {
       AU.addRequired<TargetLibraryInfo>();
     }
 
+    using llvm::Pass::doFinalization;
+
     bool doFinalization() {
       assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
       return false;
@@ -437,13 +440,12 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
   }
 
   // Only these instructions are hoistable/sinkable.
-  bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
-                            isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
-                            isa<CmpInst>(I)    || isa<InsertElementInst>(I) ||
-                            isa<ExtractElementInst>(I) ||
-                            isa<ShuffleVectorInst>(I));
-  if (!HoistableKind)
-      return false;
+  if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
+      !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
+      !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
+      !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
+      !isa<InsertValueInst>(I))
+    return false;
 
   return isSafeToExecuteUnconditionally(I);
 }
@@ -663,16 +665,18 @@ namespace {
     AliasSetTracker &AST;
     DebugLoc DL;
     int Alignment;
+    MDNode *TBAATag;
   public:
     LoopPromoter(Value *SP,
                  const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
                  SmallPtrSet<Value*, 4> &PMA,
                  SmallVectorImpl<BasicBlock*> &LEB,
                  SmallVectorImpl<Instruction*> &LIP,
-                 AliasSetTracker &ast, DebugLoc dl, int alignment)
+                 AliasSetTracker &ast, DebugLoc dl, int alignment,
+                 MDNode *TBAATag)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP),
         PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
-        AST(ast), DL(dl), Alignment(alignment) {}
+        AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
 
     virtual bool isInstInList(Instruction *I,
                               const SmallVectorImpl<Instruction*> &) const {
@@ -696,6 +700,7 @@ namespace {
         StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
         NewSI->setAlignment(Alignment);
         NewSI->setDebugLoc(DL);
+        if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
       }
     }
 
@@ -749,10 +754,11 @@ void LICM::PromoteAliasSet(AliasSet &AS,
   // We start with an alignment of one and try to find instructions that allow
   // us to prove better alignment.
   unsigned Alignment = 1;
+  MDNode *TBAATag = 0;
 
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
-  // different sizes.
+  // different sizes.  While we are at it, collect alignment and TBAA info.
   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
     Value *ASIV = ASI->getValue();
     PointerMustAliases.insert(ASIV);
@@ -794,8 +800,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
         // instruction will be executed, update the alignment.
         // Larger is better, with the exception of 0 being the best alignment.
         unsigned InstAlignment = store->getAlignment();
-        if ((InstAlignment > Alignment || InstAlignment == 0)
-            && (Alignment != 0))
+        if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
           if (isGuaranteedToExecute(*Use)) {
             GuaranteedToExecute = true;
             Alignment = InstAlignment;
@@ -807,6 +812,15 @@ void LICM::PromoteAliasSet(AliasSet &AS,
       } else
         return; // Not a load or store.
 
+      // Merge the TBAA tags.
+      if (LoopUses.empty()) {
+        // On the first load/store, just take its TBAA tag.
+        TBAATag = Use->getMetadata(LLVMContext::MD_tbaa);
+      } else if (TBAATag) {
+        TBAATag = MDNode::getMostGenericTBAA(TBAATag,
+                                       Use->getMetadata(LLVMContext::MD_tbaa));
+      }
+      
       LoopUses.push_back(Use);
     }
   }
@@ -839,7 +853,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        InsertPts, *CurAST, DL, Alignment);
+                        InsertPts, *CurAST, DL, Alignment, TBAATag);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
@@ -848,6 +862,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
                  Preheader->getTerminator());
   PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
+  if (TBAATag) PreheaderLoad->setMetadata(LLVMContext::MD_tbaa, TBAATag);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
   // Rewrite all the loads in the loop and remember all the definitions from
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 3771f5aa97b4..0b62050b17a0 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -16,11 +16,11 @@
 
 #define DEBUG_TYPE "loop-delete"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 STATISTIC(NumDeleted, "Number of loops deleted");
@@ -34,13 +34,9 @@ namespace {
     }
 
     // Possibly eliminate loop L if it is dead.
-    bool runOnLoop(Loop* L, LPPassManager& LPM);
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
 
-    bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
-                    SmallVector<BasicBlock*, 4>& exitBlocks,
-                    bool &Changed, BasicBlock *Preheader);
-
-    virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
       AU.addRequired<LoopInfo>();
       AU.addRequired<ScalarEvolution>();
@@ -53,6 +49,12 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
     }
+
+  private:
+    bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks,
+                    SmallVector<BasicBlock*, 4> &exitBlocks,
+                    bool &Changed, BasicBlock *Preheader);
+
   };
 }
 
@@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
                 "Delete dead loops", false, false)
 
-Pass* llvm::createLoopDeletionPass() {
+Pass *llvm::createLoopDeletionPass() {
   return new LoopDeletion();
 }
 
-/// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
+/// isLoopDead - Determined if a loop is dead.  This assumes that we've already
 /// checked for unique exit and exiting blocks, and that the code is in LCSSA
 /// form.
-bool LoopDeletion::IsLoopDead(Loop* L,
-                              SmallVector<BasicBlock*, 4>& exitingBlocks,
-                              SmallVector<BasicBlock*, 4>& exitBlocks,
+bool LoopDeletion::isLoopDead(Loop *L,
+                              SmallVector<BasicBlock*, 4> &exitingBlocks,
+                              SmallVector<BasicBlock*, 4> &exitBlocks,
                               bool &Changed, BasicBlock *Preheader) {
-  BasicBlock* exitBlock = exitBlocks[0];
+  BasicBlock *exitBlock = exitBlocks[0];
 
   // Make sure that all PHI entries coming from the loop are loop invariant.
   // Because the code is in LCSSA form, any values used outside of the loop
@@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L,
   // sufficient to guarantee that no loop-variant values are used outside
   // of the loop.
   BasicBlock::iterator BI = exitBlock->begin();
-  while (PHINode* P = dyn_cast<PHINode>(BI)) {
-    Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+  while (PHINode *P = dyn_cast<PHINode>(BI)) {
+    Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
 
     // Make sure all exiting blocks produce the same incoming value for the exit
     // block.  If there are different incoming values for different exiting
     // blocks, then it is impossible to statically determine which value should
     // be used.
-    for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+    for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) {
       if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
         return false;
     }
 
-    if (Instruction* I = dyn_cast<Instruction>(incoming))
+    if (Instruction *I = dyn_cast<Instruction>(incoming))
       if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
         return false;
 
@@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L,
 /// so could change the halting/non-halting nature of a program.
 /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
 /// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
   // We can only remove the loop if there is a preheader that we can
   // branch from after removing it.
-  BasicBlock* preheader = L->getLoopPreheader();
+  BasicBlock *preheader = L->getLoopPreheader();
   if (!preheader)
     return false;
 
@@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Finally, we have to check that the loop really is dead.
   bool Changed = false;
-  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+  if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
     return Changed;
 
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
-  ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
   const SCEV *S = SE.getMaxBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(S))
     return Changed;
 
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.
-  BasicBlock* exitBlock = exitBlocks[0];
+  BasicBlock *exitBlock = exitBlocks[0];
 
   // Because we're deleting a large chunk of code at once, the sequence in which
   // we remove things is very important to avoid invalidation issues.  Don't
@@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   SE.forgetLoop(L);
 
   // Connect the preheader directly to the exit block.
-  TerminatorInst* TI = preheader->getTerminator();
+  TerminatorInst *TI = preheader->getTerminator();
   TI->replaceUsesOfWith(L->getHeader(), exitBlock);
 
   // Rewrite phis in the exit block to get their inputs from
   // the preheader instead of the exiting block.
-  BasicBlock* exitingBlock = exitingBlocks[0];
+  BasicBlock *exitingBlock = exitingBlocks[0];
   BasicBlock::iterator BI = exitBlock->begin();
-  while (PHINode* P = dyn_cast<PHINode>(BI)) {
+  while (PHINode *P = dyn_cast<PHINode>(BI)) {
     int j = P->getBasicBlockIndex(exitingBlock);
     assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
     P->setIncomingBlock(j, preheader);
@@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Update the dominator tree and remove the instructions and blocks that will
   // be deleted from the reference counting scheme.
-  DominatorTree& DT = getAnalysis<DominatorTree>();
+  DominatorTree &DT = getAnalysis<DominatorTree>();
   SmallVector<DomTreeNode*, 8> ChildNodes;
   for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
        LI != LE; ++LI) {
@@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
 
   // Finally, the blocks from loopinfo.  This has to happen late because
   // otherwise our loop iterators won't work.
-  LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+  LoopInfo &loopInfo = getAnalysis<LoopInfo>();
   SmallPtrSet<BasicBlock*, 8> blocks;
   blocks.insert(L->block_begin(), L->block_end());
   for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a44e798f121b..8258719a0200 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -43,18 +43,19 @@
 
 #define DEBUG_TYPE "loop-idiom"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -63,16 +64,83 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
 
 namespace {
+
+  class LoopIdiomRecognize;
+
+  /// This class defines some utility functions for loop idiom recognization.
+  class LIRUtil {
+  public:
+    /// Return true iff the block contains nothing but an uncondition branch
+    /// (aka goto instruction).
+    static bool isAlmostEmpty(BasicBlock *);
+
+    static BranchInst *getBranch(BasicBlock *BB) {
+      return dyn_cast<BranchInst>(BB->getTerminator());
+    }
+
+    /// Return the condition of the branch terminating the given basic block.
+    static Value *getBrCondtion(BasicBlock *);
+
+    /// Derive the precondition block (i.e the block that guards the loop 
+    /// preheader) from the given preheader.
+    static BasicBlock *getPrecondBb(BasicBlock *PreHead);
+  };
+
+  /// This class is to recoginize idioms of population-count conducted in
+  /// a noncountable loop. Currently it only recognizes this pattern:
+  /// \code
+  ///   while(x) {cnt++; ...; x &= x - 1; ...}
+  /// \endcode
+  class NclPopcountRecognize {
+    LoopIdiomRecognize &LIR;
+    Loop *CurLoop;
+    BasicBlock *PreCondBB;
+
+    typedef IRBuilder<> IRBuilderTy;
+
+  public:
+    explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
+    bool recognize();
+
+  private:
+    /// Take a glimpse of the loop to see if we need to go ahead recoginizing
+    /// the idiom.
+    bool preliminaryScreen();
+
+    /// Check if the given conditional branch is based on the comparison
+    /// beween a variable and zero, and if the variable is non-zero, the
+    /// control yeilds to the loop entry. If the branch matches the behavior,
+    /// the variable involved in the comparion is returned. This function will
+    /// be called to see if the precondition and postcondition of the loop 
+    /// are in desirable form.
+    Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
+
+    /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
+    /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+    /// is set to the corresponding phi node. 3) \p Var is set to the value
+    /// whose population bits are being counted.
+    bool detectIdiom
+      (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
+
+    /// Insert ctpop intrinsic function and some obviously dead instructions.
+    void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
+
+    /// Create llvm.ctpop.* intrinsic function.
+    CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
+  };
+
   class LoopIdiomRecognize : public LoopPass {
     Loop *CurLoop;
     const DataLayout *TD;
     DominatorTree *DT;
     ScalarEvolution *SE;
     TargetLibraryInfo *TLI;
+    const TargetTransformInfo *TTI;
   public:
     static char ID;
     explicit LoopIdiomRecognize() : LoopPass(ID) {
       initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+      TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -109,7 +177,34 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addRequired<DominatorTree>();
       AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetTransformInfo>();
+    }
+
+    const DataLayout *getDataLayout() {
+      return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>();
+    }
+
+    DominatorTree *getDominatorTree() {
+      return DT ? DT : (DT=&getAnalysis<DominatorTree>());
+    }
+
+    ScalarEvolution *getScalarEvolution() {
+      return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
     }
+
+    TargetLibraryInfo *getTargetLibraryInfo() {
+      return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
+    }
+
+    const TargetTransformInfo *getTargetTransformInfo() {
+      return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
+    }
+
+    Loop *getLoop() const { return CurLoop; }
+
+  private:
+    bool runOnNoncountableLoop();
+    bool runOnCountableLoop();
   };
 }
 
@@ -123,6 +218,7 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
                     false, false)
 
@@ -172,24 +268,393 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
       deleteDeadInstruction(I, SE, TLI);
 }
 
-bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
-  CurLoop = L;
+//===----------------------------------------------------------------------===//
+//
+//          Implementation of LIRUtil
+//
+//===----------------------------------------------------------------------===//
 
-  // If the loop could not be converted to canonical form, it must have an
-  // indirectbr in it, just give up.
-  if (!L->getLoopPreheader())
+// This fucntion will return true iff the given block contains nothing but goto. 
+// A typical usage of this function is to check if the preheader fucntion is 
+// "almost" empty such that generated intrinsic function can be moved across 
+// preheader and to be placed at the end of the preconditiona block without 
+// concerning of breaking data dependence.
+bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
+  if (BranchInst *Br = getBranch(BB)) {
+    return Br->isUnconditional() && BB->size() == 1;
+  }
+  return false;
+}
+
+Value *LIRUtil::getBrCondtion(BasicBlock *BB) {
+  BranchInst *Br = getBranch(BB);
+  return Br ? Br->getCondition() : 0;
+}
+
+BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
+  if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
+    BranchInst *Br = getBranch(BB);
+    return Br && Br->isConditional() ? BB : 0;
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+//          Implementation of NclPopcountRecognize
+//
+//===----------------------------------------------------------------------===//
+
+NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
+  LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+}
+
+bool NclPopcountRecognize::preliminaryScreen() {
+  const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
+  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
     return false;
 
-  // Disable loop idiom recognition if the function's name is a common idiom.
-  StringRef Name = L->getHeader()->getParent()->getName();
-  if (Name == "memset" || Name == "memcpy")
+  // Counting population are usually conducted by few arithmetic instrutions.
+  // Such instructions can be easilly "absorbed" by vacant slots in a
+  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+  // in a compact loop.
+
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
     return false;
 
-  // The trip count of the loop must be analyzable.
-  SE = &getAnalysis<ScalarEvolution>();
-  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+  BasicBlock *LoopBody = *(CurLoop->block_begin());
+  if (LoopBody->size() >= 20) {
+    // The loop is too big, bail out.
+    return false;
+  }
+
+  // It should have a preheader containing nothing but a goto instruction.
+  BasicBlock *PreHead = CurLoop->getLoopPreheader();
+  if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
+    return false;
+
+  // It should have a precondition block where the generated popcount instrinsic
+  // function will be inserted.
+  PreCondBB = LIRUtil::getPrecondBb(PreHead);
+  if (!PreCondBB)
+    return false;
+ 
+  return true;
+}
+
+Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
+                                             BasicBlock *LoopEntry) const {
+  if (!Br || !Br->isConditional())
+    return 0;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
+  if (!Cond)
+    return 0;
+
+  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+  if (!CmpZero || !CmpZero->isZero())
+    return 0;
+
+  ICmpInst::Predicate Pred = Cond->getPredicate();
+  if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
+      (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
+    return Cond->getOperand(0);
+
+  return 0;
+}
+
+bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
+                                       PHINode *&CntPhi,
+                                       Value *&Var) const {
+  // Following code tries to detect this idiom:
+  //
+  //    if (x0 != 0)
+  //      goto loop-exit // the precondition of the loop
+  //    cnt0 = init-val;
+  //    do {
+  //       x1 = phi (x0, x2);
+  //       cnt1 = phi(cnt0, cnt2);
+  //
+  //       cnt2 = cnt1 + 1;
+  //        ...
+  //       x2 = x1 & (x1 - 1);
+  //        ...
+  //    } while(x != 0);
+  //
+  // loop-exit:
+  //
+
+  // step 1: Check to see if the look-back branch match this pattern:
+  //    "if (a!=0) goto loop-entry".
+  BasicBlock *LoopEntry;
+  Instruction *DefX2, *CountInst;
+  Value *VarX1, *VarX0;
+  PHINode *PhiX, *CountPhi;
+
+  DefX2 = CountInst = 0;
+  VarX1 = VarX0 = 0;
+  PhiX = CountPhi = 0;
+  LoopEntry = *(CurLoop->block_begin());
+
+  // step 1: Check if the loop-back branch is in desirable form.
+  {
+    if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
+      DefX2 = dyn_cast<Instruction>(T);
+    else
+      return false;
+  }
+
+  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+  {
+    if (!DefX2 || DefX2->getOpcode() != Instruction::And)
+      return false;
+
+    BinaryOperator *SubOneOp;
+
+    if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+      VarX1 = DefX2->getOperand(1);
+    else {
+      VarX1 = DefX2->getOperand(0);
+      SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+    }
+    if (!SubOneOp)
+      return false;
+
+    Instruction *SubInst = cast<Instruction>(SubOneOp);
+    ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+    if (!Dec ||
+        !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+          (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
+      return false;
+    }
+  }
+
+  // step 3: Check the recurrence of variable X
+  {
+    PhiX = dyn_cast<PHINode>(VarX1);
+    if (!PhiX ||
+        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+      return false;
+    }
+  }
+
+  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+  {
+    CountInst = NULL;
+    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
+           IterE = LoopEntry->end(); Iter != IterE; Iter++) {
+      Instruction *Inst = Iter;
+      if (Inst->getOpcode() != Instruction::Add)
+        continue;
+
+      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+      if (!Inc || !Inc->isOne())
+        continue;
+
+      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+      if (!Phi || Phi->getParent() != LoopEntry)
+        continue;
+
+      // Check if the result of the instruction is live of the loop.
+      bool LiveOutLoop = false;
+      for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+             I != E;  I++) {
+        if ((cast<Instruction>(*I))->getParent() != LoopEntry) {
+          LiveOutLoop = true; break;
+        }
+      }
+
+      if (LiveOutLoop) {
+        CountInst = Inst;
+        CountPhi = Phi;
+        break;
+      }
+    }
+
+    if (!CountInst)
+      return false;
+  }
+
+  // step 5: check if the precondition is in this form:
+  //   "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+  {
+    BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
+    Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
+    if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+      return false;
+
+    CntInst = CountInst;
+    CntPhi = CountPhi;
+    Var = T;
+  }
+
+  return true;
+}
+
+void NclPopcountRecognize::transform(Instruction *CntInst,
+                                     PHINode *CntPhi, Value *Var) {
+
+  ScalarEvolution *SE = LIR.getScalarEvolution();
+  TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
+  BasicBlock *PreHead = CurLoop->getLoopPreheader();
+  BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
+  const DebugLoc DL = CntInst->getDebugLoc();
+
+  // Assuming before transformation, the loop is following:
+  //  if (x) // the precondition
+  //     do { cnt++; x &= x - 1; } while(x);
+ 
+  // Step 1: Insert the ctpop instruction at the end of the precondition block
+  IRBuilderTy Builder(PreCondBr);
+  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+  {
+    PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+    NewCount = PopCntZext =
+      Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+    if (NewCount != PopCnt)
+      (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+    // TripCnt is exactly the number of iterations the loop has
+    TripCnt = NewCount;
+
+    // If the popoulation counter's initial value is not zero, insert Add Inst.
+    Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+    if (!InitConst || !InitConst->isZero()) {
+      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+      (cast<Instruction>(NewCount))->setDebugLoc(DL);
+    }
+  }
+
+  // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
+  //   "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
+  //   function would be partial dead code, and downstream passes will drag
+  //   it back from the precondition block to the preheader.
+  {
+    ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+    Value *Opnd0 = PopCntZext;
+    Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+    if (PreCond->getOperand(0) != Var)
+      std::swap(Opnd0, Opnd1);
+
+    ICmpInst *NewPreCond =
+      cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+    PreCond->replaceAllUsesWith(NewPreCond);
+
+    deleteDeadInstruction(PreCond, *SE, TLI);
+  }
+
+  // Step 3: Note that the population count is exactly the trip count of the
+  // loop in question, which enble us to to convert the loop from noncountable
+  // loop into a countable one. The benefit is twofold:
+  //
+  //  - If the loop only counts population, the entire loop become dead after
+  //    the transformation. It is lots easier to prove a countable loop dead
+  //    than to prove a noncountable one. (In some C dialects, a infite loop
+  //    isn't dead even if it computes nothing useful. In general, DCE needs
+  //    to prove a noncountable loop finite before safely delete it.)
+  //
+  //  - If the loop also performs something else, it remains alive.
+  //    Since it is transformed to countable form, it can be aggressively
+  //    optimized by some optimizations which are in general not applicable
+  //    to a noncountable loop.
+  //
+  // After this step, this loop (conceptually) would look like following:
+  //   newcnt = __builtin_ctpop(x);
+  //   t = newcnt;
+  //   if (x)
+  //     do { cnt++; x &= x-1; t--) } while (t > 0);
+  BasicBlock *Body = *(CurLoop->block_begin());
+  {
+    BranchInst *LbBr = LIRUtil::getBranch(Body);
+    ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+    Type *Ty = TripCnt->getType();
+
+    PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
+
+    Builder.SetInsertPoint(LbCond);
+    Value *Opnd1 = cast<Value>(TcPhi);
+    Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
+    Instruction *TcDec =
+      cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
+
+    TcPhi->addIncoming(TripCnt, PreHead);
+    TcPhi->addIncoming(TcDec, Body);
+
+    CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
+      CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+    LbCond->setPredicate(Pred);
+    LbCond->setOperand(0, TcDec);
+    LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
+  }
+
+  // Step 4: All the references to the original population counter outside
+  //  the loop are replaced with the NewCount -- the value returned from
+  //  __builtin_ctpop().
+  {
+    SmallVector<Value *, 4> CntUses;
+    for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
+         I != E; I++) {
+      if (cast<Instruction>(*I)->getParent() != Body)
+        CntUses.push_back(*I);
+    }
+    for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
+      (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
+    }
+  }
+
+  // step 5: Forget the "non-computable" trip-count SCEV associated with the
+  //   loop. The loop would otherwise not be deleted even if it becomes empty.
+  SE->forgetLoop(CurLoop);
+}
+
+CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, 
+                                                      Value *Val, DebugLoc DL) {
+  Value *Ops[] = { Val };
+  Type *Tys[] = { Val->getType() };
+
+  Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
+  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+  CI->setDebugLoc(DL);
+
+  return CI;
+}
+
+/// recognize - detect population count idiom in a non-countable loop. If
+///   detected, transform the relevant code to popcount intrinsic function
+///   call, and return true; otherwise, return false.
+bool NclPopcountRecognize::recognize() {
+
+  if (!LIR.getTargetTransformInfo())
+    return false;
+
+  LIR.getScalarEvolution();
+
+  if (!preliminaryScreen())
     return false;
-  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+
+  Instruction *CntInst;
+  PHINode *CntPhi;
+  Value *Val;
+  if (!detectIdiom(CntInst, CntPhi, Val))
+    return false;
+
+  transform(CntInst, CntPhi, Val);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//
+//          Implementation of LoopIdiomRecognize
+//
+//===----------------------------------------------------------------------===//
+
+bool LoopIdiomRecognize::runOnCountableLoop() {
+  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
   if (isa<SCEVCouldNotCompute>(BECount)) return false;
 
   // If this loop executes exactly one time, then it should be peeled, not
@@ -199,24 +664,29 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
       return false;
 
   // We require target data for now.
-  TD = getAnalysisIfAvailable<DataLayout>();
-  if (TD == 0) return false;
+  if (!getDataLayout())
+    return false;
+
+  // set DT 
+  (void)getDominatorTree();
 
-  DT = &getAnalysis<DominatorTree>();
   LoopInfo &LI = getAnalysis<LoopInfo>();
   TLI = &getAnalysis<TargetLibraryInfo>();
 
+  // set TLI 
+  (void)getTargetLibraryInfo();
+
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
 
   DEBUG(dbgs() << "loop-idiom Scanning: F["
-               << L->getHeader()->getParent()->getName()
-               << "] Loop %" << L->getHeader()->getName() << "\n");
+               << CurLoop->getHeader()->getParent()->getName()
+               << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
 
   bool MadeChange = false;
   // Scan all the blocks in the loop that are not in subloops.
-  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
-       ++BI) {
+  for (Loop::block_iterator BI = CurLoop->block_begin(),
+         E = CurLoop->block_end(); BI != E; ++BI) {
     // Ignore blocks in subloops.
     if (LI.getLoopFor(*BI) != CurLoop)
       continue;
@@ -226,6 +696,33 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   return MadeChange;
 }
 
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+  NclPopcountRecognize Popcount(*this);
+  if (Popcount.recognize())
+    return true;
+
+  return false;
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+  CurLoop = L;
+
+  // If the loop could not be converted to canonical form, it must have an
+  // indirectbr in it, just give up.
+  if (!L->getLoopPreheader())
+    return false;
+
+  // Disable loop idiom recognition if the function's name is a common idiom.
+  StringRef Name = L->getHeader()->getParent()->getName();
+  if (Name == "memset" || Name == "memcpy")
+    return false;
+
+  SE = &getAnalysis<ScalarEvolution>();
+  if (SE->hasLoopInvariantBackedgeTakenCount(L))
+    return runOnCountableLoop();
+  return runOnNoncountableLoop();
+}
+
 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
 /// with the specified backedge count.  This block is known to be in the current
 /// loop and not in any subloops.
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 558f62e6b439..a23860aad80e 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -12,17 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-instsimplify"
-#include "llvm/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumSimplified, "Number of redundant instructions simplified");
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index abe07aa9d34d..e98ae953e532 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,20 +13,21 @@
 
 #define DEBUG_TYPE "loop-rotate"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 #define MAX_HEADER_SIZE 16
@@ -51,6 +52,7 @@ namespace {
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -59,11 +61,13 @@ namespace {
 
   private:
     LoopInfo *LI;
+    const TargetTransformInfo *TTI;
   };
 }
 
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -75,6 +79,7 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 /// the loop is rotated at least once.
 bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
+  TTI = &getAnalysis<TargetTransformInfo>();
 
   // Simplify the loop latch before attempting to rotate the header
   // upward. Rotation may not be needed if the loop tail can be folded into the
@@ -274,10 +279,16 @@ bool LoopRotate::rotateLoop(Loop *L) {
   if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
     return false;
 
-  // Check size of original header and reject loop if it is very big.
+  // Check size of original header and reject loop if it is very big or we can't
+  // duplicate blocks inside it.
   {
     CodeMetrics Metrics;
-    Metrics.analyzeBasicBlock(OrigHeader);
+    Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+    if (Metrics.notDuplicatable) {
+      DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
+            << " instructions: "; L->dump());
+      return false;
+    }
     if (Metrics.NumInsts > MAX_HEADER_SIZE)
       return false;
   }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 958348d9faad..73e44d7edf5e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,8 +37,8 @@
 //
 // TODO: Handle multiple loops at a time.
 //
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-//       instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+//       of a GlobalValue?
 //
 // TODO: When truncation is free, truncate ICmp users' operands to make it a
 //       smaller encoding (on x86 at least).
@@ -54,27 +54,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-reduce"
-#include "llvm/AddressingMode.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -224,16 +224,24 @@ namespace {
 /// computing satisfying a use. It may include broken-out immediates and scaled
 /// registers.
 struct Formula {
-  /// AM - This is used to represent complex addressing, as well as other kinds
-  /// of interesting uses.
-  AddrMode AM;
+  /// Global base address used for complex addressing.
+  GlobalValue *BaseGV;
+
+  /// Base offset for complex addressing.
+  int64_t BaseOffset;
+
+  /// Whether any complex addressing has a base register.
+  bool HasBaseReg;
+
+  /// The scale of any complex addressing.
+  int64_t Scale;
 
   /// BaseRegs - The list of "base" registers for this use. When this is
-  /// non-empty, AM.HasBaseReg should be set to true.
-  SmallVector<const SCEV *, 2> BaseRegs;
+  /// non-empty,
+  SmallVector<const SCEV *, 4> BaseRegs;
 
   /// ScaledReg - The 'scaled' register for this use. This should be non-null
-  /// when AM.Scale is not zero.
+  /// when Scale is not zero.
   const SCEV *ScaledReg;
 
   /// UnfoldedOffset - An additional constant offset which added near the
@@ -241,7 +249,9 @@ struct Formula {
   /// live in an add immediate field rather than a register.
   int64_t UnfoldedOffset;
 
-  Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+  Formula()
+      : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+        UnfoldedOffset(0) {}
 
   void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
 
@@ -327,13 +337,13 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
     const SCEV *Sum = SE.getAddExpr(Good);
     if (!Sum->isZero())
       BaseRegs.push_back(Sum);
-    AM.HasBaseReg = true;
+    HasBaseReg = true;
   }
   if (!Bad.empty()) {
     const SCEV *Sum = SE.getAddExpr(Bad);
     if (!Sum->isZero())
       BaseRegs.push_back(Sum);
-    AM.HasBaseReg = true;
+    HasBaseReg = true;
   }
 }
 
@@ -349,7 +359,7 @@ unsigned Formula::getNumRegs() const {
 Type *Formula::getType() const {
   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
          ScaledReg ? ScaledReg->getType() :
-         AM.BaseGV ? AM.BaseGV->getType() :
+         BaseGV ? BaseGV->getType() :
          0;
 }
 
@@ -382,29 +392,29 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
 
 void Formula::print(raw_ostream &OS) const {
   bool First = true;
-  if (AM.BaseGV) {
+  if (BaseGV) {
     if (!First) OS << " + "; else First = false;
-    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
   }
-  if (AM.BaseOffs != 0) {
+  if (BaseOffset != 0) {
     if (!First) OS << " + "; else First = false;
-    OS << AM.BaseOffs;
+    OS << BaseOffset;
   }
   for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
        E = BaseRegs.end(); I != E; ++I) {
     if (!First) OS << " + "; else First = false;
     OS << "reg(" << **I << ')';
   }
-  if (AM.HasBaseReg && BaseRegs.empty()) {
+  if (HasBaseReg && BaseRegs.empty()) {
     if (!First) OS << " + "; else First = false;
     OS << "**error: HasBaseReg**";
-  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+  } else if (!HasBaseReg && !BaseRegs.empty()) {
     if (!First) OS << " + "; else First = false;
     OS << "**error: !HasBaseReg**";
   }
-  if (AM.Scale != 0) {
+  if (Scale != 0) {
     if (!First) OS << " + "; else First = false;
-    OS << AM.Scale << "*reg(";
+    OS << Scale << "*reg(";
     if (ScaledReg)
       OS << *ScaledReg;
     else
@@ -885,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
   }
   if (Regs.insert(Reg)) {
     RateRegister(Reg, Regs, L, SE, DT);
-    if (isLoser())
+    if (LoserRegs && isLoser())
       LoserRegs->insert(Reg);
   }
 }
@@ -927,8 +937,8 @@ void Cost::RateFormula(const Formula &F,
   // Tally up the non-zero immediates.
   for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
        E = Offsets.end(); I != E; ++I) {
-    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
-    if (F.AM.BaseGV)
+    int64_t Offset = (uint64_t)*I + F.BaseOffset;
+    if (F.BaseGV)
       ImmCost += 64; // Handle symbolic values conservatively.
                      // TODO: This should probably be the pointer size.
     else if (Offset != 0)
@@ -1078,19 +1088,19 @@ namespace {
 /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
 /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
 struct UniquifierDenseMapInfo {
-  static SmallVector<const SCEV *, 2> getEmptyKey() {
-    SmallVector<const SCEV *, 2> V;
+  static SmallVector<const SCEV *, 4> getEmptyKey() {
+    SmallVector<const SCEV *, 4>  V;
     V.push_back(reinterpret_cast<const SCEV *>(-1));
     return V;
   }
 
-  static SmallVector<const SCEV *, 2> getTombstoneKey() {
-    SmallVector<const SCEV *, 2> V;
+  static SmallVector<const SCEV *, 4> getTombstoneKey() {
+    SmallVector<const SCEV *, 4> V;
     V.push_back(reinterpret_cast<const SCEV *>(-2));
     return V;
   }
 
-  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+  static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
     unsigned Result = 0;
     for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
          E = V.end(); I != E; ++I)
@@ -1098,8 +1108,8 @@ struct UniquifierDenseMapInfo {
     return Result;
   }
 
-  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
-                      const SmallVector<const SCEV *, 2> &RHS) {
+  static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+                      const SmallVector<const SCEV *, 4> &RHS) {
     return LHS == RHS;
   }
 };
@@ -1110,7 +1120,7 @@ struct UniquifierDenseMapInfo {
 /// the user itself, and information about how the use may be satisfied.
 /// TODO: Represent multiple users of the same expression in common?
 class LSRUse {
-  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+  DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
 
 public:
   /// KindType - An enum for a kind of use, indicating what types of
@@ -1169,7 +1179,7 @@ public:
 /// HasFormula - Test whether this use as a formula which has the same
 /// registers as the given formula.
 bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
-  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
   std::sort(Key.begin(), Key.end());
@@ -1179,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
 /// InsertFormula - If the given formula has not yet been inserted, add it to
 /// the list, and return true. Return false otherwise.
 bool LSRUse::InsertFormula(const Formula &F) {
-  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
   std::sort(Key.begin(), Key.end());
@@ -1270,46 +1280,42 @@ void LSRUse::dump() const {
 /// isLegalUse - Test whether the use described by AM is "legal", meaning it can
 /// be completely folded into the user instruction at isel time. This includes
 /// address-mode folding and special icmp tricks.
-static bool isLegalUse(const AddrMode &AM,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+                       Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+                       bool HasBaseReg, int64_t Scale) {
   switch (Kind) {
   case LSRUse::Address:
-    // If we have low-level target information, ask the target if it can
-    // completely fold this address.
-    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+    return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
 
     // Otherwise, just guess that reg+reg addressing is legal.
-    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+    //return ;
 
   case LSRUse::ICmpZero:
     // There's not even a target hook for querying whether it would be legal to
     // fold a GV into an ICmp.
-    if (AM.BaseGV)
+    if (BaseGV)
       return false;
 
     // ICmp only has two operands; don't allow more than two non-trivial parts.
-    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+    if (Scale != 0 && HasBaseReg && BaseOffset != 0)
       return false;
 
     // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
     // putting the scaled register in the other operand of the icmp.
-    if (AM.Scale != 0 && AM.Scale != -1)
+    if (Scale != 0 && Scale != -1)
       return false;
 
     // If we have low-level target information, ask the target if it can fold an
     // integer immediate on an icmp.
-    if (AM.BaseOffs != 0) {
-      if (!TLI)
-        return false;
+    if (BaseOffset != 0) {
       // We have one of:
-      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
-      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+      // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
       // Offs is the ICmp immediate.
-      int64_t Offs = AM.BaseOffs;
-      if (AM.Scale == 0)
-        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
-      return TLI->isLegalICmpImmediate(Offs);
+      if (Scale == 0)
+        // The cast does the right thing with INT64_MIN.
+        BaseOffset = -(uint64_t)BaseOffset;
+      return TTI.isLegalICmpImmediate(BaseOffset);
     }
 
     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1317,92 +1323,87 @@ static bool isLegalUse(const AddrMode &AM,
 
   case LSRUse::Basic:
     // Only handle single-register values.
-    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+    return !BaseGV && Scale == 0 && BaseOffset == 0;
 
   case LSRUse::Special:
     // Special case Basic to handle -1 scales.
-    return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
+    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
   }
 
   llvm_unreachable("Invalid LSRUse Kind!");
 }
 
-static bool isLegalUse(AddrMode AM,
-                       int64_t MinOffset, int64_t MaxOffset,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+                       int64_t Scale) {
   // Check for overflow.
-  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+  if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
       (MinOffset > 0))
     return false;
-  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
-  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
-    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
-    // Check for overflow.
-    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
-        (MaxOffset > 0))
-      return false;
-    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
-    return isLegalUse(AM, Kind, AccessTy, TLI);
-  }
-  return false;
+  MinOffset = (uint64_t)BaseOffset + MinOffset;
+  if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+      (MaxOffset > 0))
+    return false;
+  MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+                    Scale) &&
+         isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+}
+
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       const Formula &F) {
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+                    F.BaseOffset, F.HasBaseReg, F.Scale);
 }
 
-static bool isAlwaysFoldable(int64_t BaseOffs,
-                             GlobalValue *BaseGV,
-                             bool HasBaseReg,
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
                              LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI) {
+                             GlobalValue *BaseGV, int64_t BaseOffset,
+                             bool HasBaseReg) {
   // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
 
   // Conservatively, create an address with an immediate and a
   // base and a scale.
-  AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
   // Canonicalize a scale of 1 to a base register if the formula doesn't
   // already have a base register.
-  if (!AM.HasBaseReg && AM.Scale == 1) {
-    AM.Scale = 0;
-    AM.HasBaseReg = true;
+  if (!HasBaseReg && Scale == 1) {
+    Scale = 0;
+    HasBaseReg = true;
   }
 
-  return isLegalUse(AM, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
 }
 
-static bool isAlwaysFoldable(const SCEV *S,
-                             int64_t MinOffset, int64_t MaxOffset,
-                             bool HasBaseReg,
-                             LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI,
-                             ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+                             ScalarEvolution &SE, int64_t MinOffset,
+                             int64_t MaxOffset, LSRUse::KindType Kind,
+                             Type *AccessTy, const SCEV *S, bool HasBaseReg) {
   // Fast-path: zero is always foldable.
   if (S->isZero()) return true;
 
   // Conservatively, create an address with an immediate and a
   // base and a scale.
-  int64_t BaseOffs = ExtractImmediate(S, SE);
+  int64_t BaseOffset = ExtractImmediate(S, SE);
   GlobalValue *BaseGV = ExtractSymbol(S, SE);
 
   // If there's anything else involved, it's not foldable.
   if (!S->isZero()) return false;
 
   // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
 
   // Conservatively, create an address with an immediate and a
   // base and a scale.
-  AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
-  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+                    BaseOffset, HasBaseReg, Scale);
 }
 
 namespace {
@@ -1502,7 +1503,7 @@ class LSRInstance {
   ScalarEvolution &SE;
   DominatorTree &DT;
   LoopInfo &LI;
-  const TargetLowering *const TLI;
+  const TargetTransformInfo &TTI;
   Loop *const L;
   bool Changed;
 
@@ -1638,7 +1639,7 @@ class LSRInstance {
                          Pass *P);
 
 public:
-  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+  LSRInstance(Loop *L, Pass *P);
 
   bool getChanged() const { return Changed; }
 
@@ -1688,12 +1689,9 @@ void LSRInstance::OptimizeShadowIV() {
     }
     if (!DestTy) continue;
 
-    if (TLI) {
-      // If target does not support DestTy natively then do not apply
-      // this transformation.
-      EVT DVT = TLI->getValueType(DestTy);
-      if (!TLI->isTypeLegal(DVT)) continue;
-    }
+    // If target does not support DestTy natively then do not apply
+    // this transformation.
+    if (!TTI.isTypeLegal(DestTy)) continue;
 
     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
     if (!PH) continue;
@@ -1897,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   if (ICmpInst::isTrueWhenEqual(Pred)) {
     // Look for n+1, and grab n.
     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
-      if (isa<ConstantInt>(BO->getOperand(1)) &&
-          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
-          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
-        NewRHS = BO->getOperand(0);
+      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+           NewRHS = BO->getOperand(0);
     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
-      if (isa<ConstantInt>(BO->getOperand(1)) &&
-          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
-          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
-        NewRHS = BO->getOperand(0);
+      if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
+        if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+          NewRHS = BO->getOperand(0);
     if (!NewRHS)
       return Cond;
   } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
@@ -2015,18 +2011,17 @@ LSRInstance::OptimizeLoopTermCond() {
             if (C->getValue().getMinSignedBits() >= 64 ||
                 C->getValue().isMinSignedValue())
               goto decline_post_inc;
-            // Without TLI, assume that any stride might be valid, and so any
-            // use might be shared.
-            if (!TLI)
-              goto decline_post_inc;
             // Check for possible scaled-address reuse.
             Type *AccessTy = getAccessType(UI->getUser());
-            AddrMode AM;
-            AM.Scale = C->getSExtValue();
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            int64_t Scale = C->getSExtValue();
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
               goto decline_post_inc;
-            AM.Scale = -AM.Scale;
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            Scale = -Scale;
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
               goto decline_post_inc;
           }
         }
@@ -2096,13 +2091,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
     return false;
   // Conservatively assume HasBaseReg is true for now.
   if (NewOffset < LU.MinOffset) {
-    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          LU.MaxOffset - NewOffset, HasBaseReg))
       return false;
     NewMinOffset = NewOffset;
   } else if (NewOffset > LU.MaxOffset) {
-    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          NewOffset - LU.MinOffset, HasBaseReg))
       return false;
     NewMaxOffset = NewOffset;
   }
@@ -2131,7 +2126,8 @@ LSRInstance::getUse(const SCEV *&Expr,
   int64_t Offset = ExtractImmediate(Expr, SE);
 
   // Basic uses can't accept any offset, for example.
-  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                        Offset, /*HasBaseReg=*/ true)) {
     Expr = Copy;
     Offset = 0;
   }
@@ -2199,10 +2195,10 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
         // as OrigF.
         if (F.BaseRegs == OrigF.BaseRegs &&
             F.ScaledReg == OrigF.ScaledReg &&
-            F.AM.BaseGV == OrigF.AM.BaseGV &&
-            F.AM.Scale == OrigF.AM.Scale &&
+            F.BaseGV == OrigF.BaseGV &&
+            F.Scale == OrigF.Scale &&
             F.UnfoldedOffset == OrigF.UnfoldedOffset) {
-          if (F.AM.BaseOffs == 0)
+          if (F.BaseOffset == 0)
             return &LU;
           // This is the formula where all the registers and symbols matched;
           // there aren't going to be any others. Since we declined it, we
@@ -2396,7 +2392,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
 /// TODO: Consider IVInc free if it's already used in another chains.
 static bool
 isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
-                  ScalarEvolution &SE, const TargetLowering *TLI) {
+                  ScalarEvolution &SE, const TargetTransformInfo &TTI) {
   if (StressIVChain)
     return true;
 
@@ -2539,6 +2535,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
     // Add this IV user to the end of the chain.
     IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
   }
+  IVChain &Chain = IVChainVec[ChainIdx];
 
   SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
   // This chain's NearUsers become FarUsers.
@@ -2556,8 +2553,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
   for (Value::use_iterator UseIter = IVOper->use_begin(),
          UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
     Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
-    if (!OtherUse || OtherUse == UserInst)
+    if (!OtherUse)
       continue;
+    // Uses in the chain will no longer be uses if the chain is formed.
+    // Include the head of the chain in this iteration (not Chain.begin()).
+    IVChain::const_iterator IncIter = Chain.Incs.begin();
+    IVChain::const_iterator IncEnd = Chain.Incs.end();
+    for( ; IncIter != IncEnd; ++IncIter) {
+      if (IncIter->UserInst == OtherUse)
+        break;
+    }
+    if (IncIter != IncEnd)
+      continue;
+
     if (SE.isSCEVable(OtherUse->getType())
         && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
         && IU.isIVUserOrOperand(OtherUse)) {
@@ -2654,7 +2662,7 @@ void LSRInstance::CollectChains() {
   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
        UsersIdx < NChains; ++UsersIdx) {
     if (!isProfitableChain(IVChainVec[UsersIdx],
-                           ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
       continue;
     // Preserve the chain at UsesIdx.
     if (ChainIdx != UsersIdx)
@@ -2681,7 +2689,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
 
 /// Return true if the IVInc can be folded into an addressing mode.
 static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
-                             Value *Operand, const TargetLowering *TLI) {
+                             Value *Operand, const TargetTransformInfo &TTI) {
   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
   if (!IncConst || !isAddressUse(UserInst, Operand))
     return false;
@@ -2690,8 +2698,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
     return false;
 
   int64_t IncOffset = IncConst->getValue()->getSExtValue();
-  if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
-                       LSRUse::Address, getAccessType(UserInst), TLI))
+  if (!isAlwaysFoldable(TTI, LSRUse::Address,
+                        getAccessType(UserInst), /*BaseGV=*/ 0,
+                        IncOffset, /*HaseBaseReg=*/ false))
     return false;
 
   return true;
@@ -2705,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
   // by LSR.
   const IVInc &Head = Chain.Incs[0];
   User::op_iterator IVOpEnd = Head.UserInst->op_end();
+  // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
   User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
                                              IVOpEnd, L, SE);
   Value *IVSrc = 0;
@@ -2762,7 +2772,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
 
       // If an IV increment can't be folded, use it as the next IV value.
       if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
-                            TLI)) {
+                            TTI)) {
         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
         IVSrc = IVOper;
         LeftOverExpr = 0;
@@ -2904,7 +2914,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
                                        LSRUse &LU, size_t LUIdx) {
   Formula F;
   F.BaseRegs.push_back(S);
-  F.AM.HasBaseReg = true;
+  F.HasBaseReg = true;
   bool Inserted = InsertFormula(LU, LUIdx, F);
   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
 }
@@ -3106,9 +3116,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Don't pull a constant into a register if the constant could be folded
       // into an immediate field.
-      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+      if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, *J, Base.getNumRegs() > 1))
         continue;
 
       // Collect all operands except *J.
@@ -3120,9 +3129,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
       // Don't leave just a constant behind in a register if the constant could
       // be folded into an immediate field.
       if (InnerAddOps.size() == 1 &&
-          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+          isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
         continue;
 
       const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,10 +3140,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Add the remaining pieces of the add back into the new formula.
       const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
-      if (TLI && InnerSumSC &&
+      if (InnerSumSC &&
           SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   InnerSumSC->getValue()->getZExtValue())) {
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  InnerSumSC->getValue()->getZExtValue())) {
         F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                            InnerSumSC->getValue()->getZExtValue();
         F.BaseRegs.erase(F.BaseRegs.begin() + i);
@@ -3144,9 +3152,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Add J as its own register, or an unfolded immediate.
       const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
-      if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   SC->getValue()->getZExtValue()))
+      if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  SC->getValue()->getZExtValue()))
         F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                            SC->getValue()->getZExtValue();
       else
@@ -3195,7 +3203,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
                                           Formula Base) {
   // We can't add a symbolic offset if the address already contains one.
-  if (Base.AM.BaseGV) return;
+  if (Base.BaseGV) return;
 
   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
     const SCEV *G = Base.BaseRegs[i];
@@ -3203,9 +3211,8 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
     if (G->isZero() || !GV)
       continue;
     Formula F = Base;
-    F.AM.BaseGV = GV;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    F.BaseGV = GV;
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
       continue;
     F.BaseRegs[i] = G;
     (void)InsertFormula(LU, LUIdx, F);
@@ -3228,9 +3235,9 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
     for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
          E = Worklist.end(); I != E; ++I) {
       Formula F = Base;
-      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
-      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
-                     LU.Kind, LU.AccessTy, TLI)) {
+      F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+      if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+                     LU.AccessTy, F)) {
         // Add the offset to the base register.
         const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
         // If it cancelled out, drop the base register, otherwise update it.
@@ -3248,9 +3255,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
     if (G->isZero() || Imm == 0)
       continue;
     Formula F = Base;
-    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
       continue;
     F.BaseRegs[i] = G;
     (void)InsertFormula(LU, LUIdx, F);
@@ -3271,7 +3277,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
   // Don't do this if there is more than one offset.
   if (LU.MinOffset != LU.MaxOffset) return;
 
-  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+  assert(!Base.BaseGV && "ICmpZero use is not legal!");
 
   // Check each interesting stride.
   for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3279,10 +3285,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     int64_t Factor = *I;
 
     // Check that the multiplication doesn't overflow.
-    if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+    if (Base.BaseOffset == INT64_MIN && Factor == -1)
       continue;
-    int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
-    if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+    int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+    if (NewBaseOffset / Factor != Base.BaseOffset)
       continue;
 
     // Check that multiplying with the use offset doesn't overflow.
@@ -3294,14 +3300,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
       continue;
 
     Formula F = Base;
-    F.AM.BaseOffs = NewBaseOffs;
+    F.BaseOffset = NewBaseOffset;
 
     // Check that this scale is legal.
-    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
       continue;
 
     // Compensate for the use having MinOffset built into it.
-    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+    F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
 
     const SCEV *FactorS = SE.getConstant(IntTy, Factor);
 
@@ -3342,23 +3348,23 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   if (!IntTy) return;
 
   // If this Formula already has a scaled register, we can't add another one.
-  if (Base.AM.Scale != 0) return;
+  if (Base.Scale != 0) return;
 
   // Check each interesting stride.
   for (SmallSetVector<int64_t, 8>::const_iterator
        I = Factors.begin(), E = Factors.end(); I != E; ++I) {
     int64_t Factor = *I;
 
-    Base.AM.Scale = Factor;
-    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    Base.Scale = Factor;
+    Base.HasBaseReg = Base.BaseRegs.size() > 1;
     // Check whether this scale is going to be legal.
-    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI)) {
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                    Base)) {
       // As a special-case, handle special out-of-loop Basic users specially.
       // TODO: Reconsider this special case.
       if (LU.Kind == LSRUse::Basic &&
-          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                     LSRUse::Special, LU.AccessTy, TLI) &&
+          isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+                     LU.AccessTy, Base) &&
           LU.AllFixupsOutsideLoop)
         LU.Kind = LSRUse::Special;
       else
@@ -3367,7 +3373,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
     // For an ICmpZero, negating a solitary base register won't lead to
     // new solutions.
     if (LU.Kind == LSRUse::ICmpZero &&
-        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+        !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
       continue;
     // For each addrec base reg, apply the scale, if possible.
     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
@@ -3391,11 +3397,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
 
 /// GenerateTruncates - Generate reuse formulae from different IV types.
 void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
-  // This requires TargetLowering to tell us which truncates are free.
-  if (!TLI) return;
-
   // Don't bother truncating symbolic values.
-  if (Base.AM.BaseGV) return;
+  if (Base.BaseGV) return;
 
   // Determine the integer type for the base formula.
   Type *DstTy = Base.getType();
@@ -3405,7 +3408,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   for (SmallSetVector<Type *, 4>::const_iterator
        I = Types.begin(), E = Types.end(); I != E; ++I) {
     Type *SrcTy = *I;
-    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+    if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
 
       if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3552,16 +3555,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
       const Formula &F = LU.Formulae[L];
       // Use the immediate in the scaled register.
       if (F.ScaledReg == OrigReg) {
-        int64_t Offs = (uint64_t)F.AM.BaseOffs +
-                       Imm * (uint64_t)F.AM.Scale;
+        int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
         // Don't create 50 + reg(-50).
         if (F.referencesReg(SE.getSCEV(
-                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+                   ConstantInt::get(IntTy, -(uint64_t)Offset))))
           continue;
         Formula NewF = F;
-        NewF.AM.BaseOffs = Offs;
-        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                        LU.Kind, LU.AccessTy, TLI))
+        NewF.BaseOffset = Offset;
+        if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        NewF))
           continue;
         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
 
@@ -3570,9 +3572,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         // immediate itself, then the formula isn't worthwhile.
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
           if (C->getValue()->isNegative() !=
-                (NewF.AM.BaseOffs < 0) &&
-              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
-                .ule(abs64(NewF.AM.BaseOffs)))
+                (NewF.BaseOffset < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+                .ule(abs64(NewF.BaseOffset)))
             continue;
 
         // OK, looks good.
@@ -3584,11 +3586,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           if (BaseReg != OrigReg)
             continue;
           Formula NewF = F;
-          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
-          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI)) {
-            if (!TLI ||
-                !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+          NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+          if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, NewF)) {
+            if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
               continue;
             NewF = F;
             NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3602,11 +3603,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
                J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
                J != JE; ++J)
             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
-              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
-                   abs64(NewF.AM.BaseOffs)) &&
+              if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+                   abs64(NewF.BaseOffset)) &&
                   (C->getValue()->getValue() +
-                   NewF.AM.BaseOffs).countTrailingZeros() >=
-                   CountTrailingZeros_64(NewF.AM.BaseOffs))
+                   NewF.BaseOffset).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.BaseOffset))
                 goto skip_formula;
 
           // Ok, looks good.
@@ -3667,7 +3668,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 
   // Collect the best formula for each unique set of shared registers. This
   // is reset for each use.
-  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+  typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
     BestFormulaeTy;
   BestFormulaeTy BestFormulae;
 
@@ -3702,7 +3703,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
               dbgs() << "\n");
       }
       else {
-        SmallVector<const SCEV *, 2> Key;
+        SmallVector<const SCEV *, 4> Key;
         for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
                JE = F.BaseRegs.end(); J != JE; ++J) {
           const SCEV *Reg = *J;
@@ -3804,7 +3805,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
             Formula NewF = F;
-            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseOffset += C->getValue()->getSExtValue();
             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                 (I - F.BaseRegs.begin()));
             if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3817,9 +3818,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
             }
           } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
             if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
-              if (!F.AM.BaseGV) {
+              if (!F.BaseGV) {
                 Formula NewF = F;
-                NewF.AM.BaseGV = GV;
+                NewF.BaseGV = GV;
                 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                     (I - F.BaseRegs.begin()));
                 if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3848,84 +3849,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
 /// for expressions like A, A+1, A+2, etc., allocate a single register for
 /// them.
 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
-  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
-    DEBUG(dbgs() << "The search space is too complex.\n");
+  if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+    return;
 
-    DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
-                    "separated by a constant offset will use the same "
-                    "registers.\n");
+  DEBUG(dbgs() << "The search space is too complex.\n"
+                  "Narrowing the search space by assuming that uses separated "
+                  "by a constant offset will use the same registers.\n");
 
-    // This is especially useful for unrolled loops.
+  // This is especially useful for unrolled loops.
 
-    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
-      LSRUse &LU = Uses[LUIdx];
-      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
-           E = LU.Formulae.end(); I != E; ++I) {
-        const Formula &F = *I;
-        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
-          if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
-            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
-                                   /*HasBaseReg=*/false,
-                                   LU.Kind, LU.AccessTy)) {
-              DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
-                    dbgs() << '\n');
-
-              LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
-
-              // Update the relocs to reference the new use.
-              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
-                   E = Fixups.end(); I != E; ++I) {
-                LSRFixup &Fixup = *I;
-                if (Fixup.LUIdx == LUIdx) {
-                  Fixup.LUIdx = LUThatHas - &Uses.front();
-                  Fixup.Offset += F.AM.BaseOffs;
-                  // Add the new offset to LUThatHas' offset list.
-                  if (LUThatHas->Offsets.back() != Fixup.Offset) {
-                    LUThatHas->Offsets.push_back(Fixup.Offset);
-                    if (Fixup.Offset > LUThatHas->MaxOffset)
-                      LUThatHas->MaxOffset = Fixup.Offset;
-                    if (Fixup.Offset < LUThatHas->MinOffset)
-                      LUThatHas->MinOffset = Fixup.Offset;
-                  }
-                  DEBUG(dbgs() << "New fixup has offset "
-                               << Fixup.Offset << '\n');
-                }
-                if (Fixup.LUIdx == NumUses-1)
-                  Fixup.LUIdx = LUIdx;
-              }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+         E = LU.Formulae.end(); I != E; ++I) {
+      const Formula &F = *I;
+      if (F.BaseOffset == 0 || F.Scale != 0)
+        continue;
 
-              // Delete formulae from the new use which are no longer legal.
-              bool Any = false;
-              for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
-                Formula &F = LUThatHas->Formulae[i];
-                if (!isLegalUse(F.AM,
-                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
-                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
-                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
-                        dbgs() << '\n');
-                  LUThatHas->DeleteFormula(F);
-                  --i;
-                  --e;
-                  Any = true;
-                }
-              }
-              if (Any)
-                LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+      LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
+      if (!LUThatHas)
+        continue;
 
-              // Delete the old use.
-              DeleteUse(LU, LUIdx);
-              --LUIdx;
-              --NumUses;
-              break;
-            }
+      if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
+                              LU.Kind, LU.AccessTy))
+        continue;
+
+      DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+
+      LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+      // Update the relocs to reference the new use.
+      for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+           E = Fixups.end(); I != E; ++I) {
+        LSRFixup &Fixup = *I;
+        if (Fixup.LUIdx == LUIdx) {
+          Fixup.LUIdx = LUThatHas - &Uses.front();
+          Fixup.Offset += F.BaseOffset;
+          // Add the new offset to LUThatHas' offset list.
+          if (LUThatHas->Offsets.back() != Fixup.Offset) {
+            LUThatHas->Offsets.push_back(Fixup.Offset);
+            if (Fixup.Offset > LUThatHas->MaxOffset)
+              LUThatHas->MaxOffset = Fixup.Offset;
+            if (Fixup.Offset < LUThatHas->MinOffset)
+              LUThatHas->MinOffset = Fixup.Offset;
           }
+          DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
         }
+        if (Fixup.LUIdx == NumUses-1)
+          Fixup.LUIdx = LUIdx;
       }
-    }
 
-    DEBUG(dbgs() << "After pre-selection:\n";
-          print_uses(dbgs()));
+      // Delete formulae from the new use which are no longer legal.
+      bool Any = false;
+      for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+        Formula &F = LUThatHas->Formulae[i];
+        if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                        LUThatHas->Kind, LUThatHas->AccessTy, F)) {
+          DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                dbgs() << '\n');
+          LUThatHas->DeleteFormula(F);
+          --i;
+          --e;
+          Any = true;
+        }
+      }
+
+      if (Any)
+        LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+      // Delete the old use.
+      DeleteUse(LU, LUIdx);
+      --LUIdx;
+      --NumUses;
+      break;
+    }
   }
+
+  DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
 }
 
 /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
@@ -4308,7 +4308,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
 
   // Expand the ScaledReg portion.
   Value *ICmpScaledV = 0;
-  if (F.AM.Scale != 0) {
+  if (F.Scale != 0) {
     const SCEV *ScaledS = F.ScaledReg;
 
     // If we're expanding for a post-inc user, make the post-inc adjustment.
@@ -4321,7 +4321,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // An interesting way of "folding" with an icmp is to use a negated
       // scale, which we'll implement by inserting it into the other operand
       // of the icmp.
-      assert(F.AM.Scale == -1 &&
+      assert(F.Scale == -1 &&
              "The only scale supported by ICmpZero uses is -1!");
       ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
     } else {
@@ -4336,20 +4336,20 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       }
       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
       ScaledS = SE.getMulExpr(ScaledS,
-                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
+                              SE.getConstant(ScaledS->getType(), F.Scale));
       Ops.push_back(ScaledS);
     }
   }
 
   // Expand the GV portion.
-  if (F.AM.BaseGV) {
+  if (F.BaseGV) {
     // Flush the operand list to suppress SCEVExpander hoisting.
     if (!Ops.empty()) {
       Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
       Ops.clear();
       Ops.push_back(SE.getUnknown(FullV));
     }
-    Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+    Ops.push_back(SE.getUnknown(F.BaseGV));
   }
 
   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
@@ -4361,7 +4361,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   }
 
   // Expand the immediate portion.
-  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
   if (Offset != 0) {
     if (LU.Kind == LSRUse::ICmpZero) {
       // The other interesting way of "folding" with an ICmpZero is to use a
@@ -4402,9 +4402,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (LU.Kind == LSRUse::ICmpZero) {
     ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
     DeadInsts.push_back(CI->getOperand(1));
-    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+    assert(!F.BaseGV && "ICmp does not support folding a global value and "
                            "a scale at the same time!");
-    if (F.AM.Scale == -1) {
+    if (F.Scale == -1) {
       if (ICmpScaledV->getType() != OpTy) {
         Instruction *Cast =
           CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
@@ -4414,7 +4414,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       }
       CI->setOperand(1, ICmpScaledV);
     } else {
-      assert(F.AM.Scale == 0 &&
+      assert(F.Scale == 0 &&
              "ICmp does not support folding a global value and "
              "a scale at the same time!");
       Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4589,13 +4589,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
 }
 
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
-  : IU(P->getAnalysis<IVUsers>()),
-    SE(P->getAnalysis<ScalarEvolution>()),
-    DT(P->getAnalysis<DominatorTree>()),
-    LI(P->getAnalysis<LoopInfo>()),
-    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+    : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+      DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+      TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+      IVIncInsertPos(0) {
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
     return;
@@ -4678,14 +4676,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
 
 #ifndef NDEBUG
   // Formulae should be legal.
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-       E = Uses.end(); I != E; ++I) {
-     const LSRUse &LU = *I;
-     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
-          JE = LU.Formulae.end(); J != JE; ++J)
-        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI) &&
-               "Illegal formula generated!");
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+       I != E; ++I) {
+    const LSRUse &LU = *I;
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+                                                  JE = LU.Formulae.end();
+         J != JE; ++J)
+      assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        *J) && "Illegal formula generated!");
   };
 #endif
 
@@ -4757,13 +4755,9 @@ void LSRInstance::dump() const {
 namespace {
 
 class LoopStrengthReduce : public LoopPass {
-  /// TLI - Keep a pointer of a TargetLowering to consult for determining
-  /// transformation profitability.
-  const TargetLowering *const TLI;
-
 public:
   static char ID; // Pass ID, replacement for typeid
-  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+  LoopStrengthReduce();
 
 private:
   bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4775,6 +4769,7 @@ private:
 char LoopStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(IVUsers)
@@ -4784,14 +4779,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
 
 
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+  return new LoopStrengthReduce();
 }
 
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(ID), TLI(tli) {
-    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
-  }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+  initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
 
 void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update
@@ -4810,24 +4804,27 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<IVUsers>();
   AU.addPreserved<IVUsers>();
+  AU.addRequired<TargetTransformInfo>();
 }
 
 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   bool Changed = false;
 
   // Run the main LSR transformation.
-  Changed |= LSRInstance(TLI, L, this).getChanged();
+  Changed |= LSRInstance(L, this).getChanged();
 
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
-  if (EnablePhiElim) {
+  if (EnablePhiElim && L->isLoopSimplifyForm()) {
     SmallVector<WeakVH, 16> DeadInsts;
     SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
 #ifndef NDEBUG
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
-    unsigned numFolded = Rewriter.
-      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+    unsigned numFolded =
+        Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+                                     DeadInsts,
+                                     &getAnalysis<TargetTransformInfo>());
     if (numFolded) {
       Changed = true;
       DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 0d781ac97725..80d060b926ea 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -13,16 +13,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-unroll"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/DataLayout.h"
 #include <climits>
 
 using namespace llvm;
@@ -90,6 +91,7 @@ namespace {
       AU.addPreservedID(LCSSAID);
       AU.addRequired<ScalarEvolution>();
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
       // If loop unroll does not preserve dom info then LCSSA pass on next
       // loop will receive invalid dom info.
@@ -101,6 +103,7 @@ namespace {
 
 char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -113,12 +116,14 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
 
 /// ApproximateLoopSize - Approximate the size of the loop.
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
-                                    const DataLayout *TD) {
+                                    bool &NotDuplicatable,
+                                    const TargetTransformInfo &TTI) {
   CodeMetrics Metrics;
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
-    Metrics.analyzeBasicBlock(*I, TD);
+    Metrics.analyzeBasicBlock(*I, TTI);
   NumCalls = Metrics.NumInlineCandidates;
+  NotDuplicatable = Metrics.notDuplicatable;
 
   unsigned LoopSize = Metrics.NumInsts;
 
@@ -133,6 +138,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   LoopInfo *LI = &getAnalysis<LoopInfo>();
   ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -145,8 +151,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // not user specified.
   unsigned Threshold = CurrentThreshold;
   if (!UserThreshold &&
-      Header->getParent()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize))
+      Header->getParent()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex,
+                     Attribute::OptimizeForSize))
     Threshold = OptSizeUnrollThreshold;
 
   // Find trip count and trip multiple if count is not available
@@ -179,10 +186,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Enforce the threshold.
   if (Threshold != NoThreshold) {
-    const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
     unsigned NumInlineCandidates;
-    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
+    bool notDuplicatable;
+    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
+                                            notDuplicatable, TTI);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+    if (notDuplicatable) {
+      DEBUG(dbgs() << "  Not unrolling loop which contains non duplicatable"
+            << " instructions.\n");
+      return false;
+    }
     if (NumInlineCandidates != 0) {
       DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
       return false;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 047b43eb84fc..0e8199f2fd5c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -28,25 +28,26 @@
 
 #define DEBUG_TYPE "loop-unswitch"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <map>
 #include <set>
@@ -101,7 +102,7 @@ namespace {
 
       // Analyze loop. Check its size, calculate is it possible to unswitch
       // it. Returns true if we can unswitch this loop.
-      bool countLoop(const Loop* L);
+      bool countLoop(const Loop* L, const TargetTransformInfo &TTI);
 
       // Clean all data related to given loop.
       void forgetLoop(const Loop* L);
@@ -170,6 +171,7 @@ namespace {
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
     }
 
   private:
@@ -221,7 +223,7 @@ namespace {
 
 // Analyze loop. Check its size, calculate is it possible to unswitch
 // it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop* L) {
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
 
   std::pair<LoopPropsMapIt, bool> InsertRes =
       LoopsProperties.insert(std::make_pair(L, LoopProperties()));
@@ -243,11 +245,18 @@ bool LUAnalysisCache::countLoop(const Loop* L) {
     for (Loop::block_iterator I = L->block_begin(),
            E = L->block_end();
          I != E; ++I)
-      Metrics.analyzeBasicBlock(*I);
+      Metrics.analyzeBasicBlock(*I, TTI);
 
     Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
     Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
     MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
+
+    if (Metrics.notDuplicatable) {
+      DEBUG(dbgs() << "NOT unswitching loop %"
+            << L->getHeader()->getName() << ", contents cannot be "
+            << "duplicated!\n");
+      return false;
+    }
   }
 
   if (!Props.CanBeUnswitchedCount) {
@@ -327,6 +336,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
 char LoopUnswitch::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -417,7 +427,7 @@ bool LoopUnswitch::processCurrentLoop() {
 
   // Probably we reach the quota of branches for this loop. If so
   // stop unswitching.
-  if (!BranchesInfo.countLoop(currentLoop))
+  if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
     return false;
 
   // Loop over all of the basic blocks in the loop.  If we find an interior
@@ -639,7 +649,8 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
 
   // Do not do non-trivial unswitch while optimizing for size.
   if (OptimizeForSize ||
-      F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
+      F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::OptimizeForSize))
     return false;
 
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 7419a6543e7e..8ced4946c832 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,9 +14,9 @@
 
 #define DEBUG_TYPE "loweratomic"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 517657cf526c..be0f0e8a25f6 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,20 +14,20 @@
 
 #define DEBUG_TYPE "memcpyopt"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <list>
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
deleted file mode 100644
index dfdf50549da4..000000000000
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ /dev/null
@@ -1,4232 +0,0 @@
-//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines ObjC ARC optimizations. ARC stands for
-// Automatic Reference Counting and is a system for managing reference counts
-// for objects in Objective C.
-//
-// The optimizations performed include elimination of redundant, partially
-// redundant, and inconsequential reference count operations, elimination of
-// redundant weak pointer operations, pattern-matching and replacement of
-// low-level operations into higher-level operations, and numerous minor
-// simplifications.
-//
-// This file also defines a simple ARC-aware AliasAnalysis.
-//
-// WARNING: This file knows about certain library functions. It recognizes them
-// by name, and hardwires knowledge of their semantics.
-//
-// WARNING: This file knows about how certain Objective-C library functions are
-// used. Naive LLVM IR transformations which would otherwise be
-// behavior-preserving may break these assumptions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "objc-arc"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/DenseMap.h"
-using namespace llvm;
-
-// A handy option to enable/disable all optimizations in this file.
-static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
-
-//===----------------------------------------------------------------------===//
-// Misc. Utilities
-//===----------------------------------------------------------------------===//
-
-namespace {
-  /// MapVector - An associative container with fast insertion-order
-  /// (deterministic) iteration over its elements. Plus the special
-  /// blot operation.
-  template<class KeyT, class ValueT>
-  class MapVector {
-    /// Map - Map keys to indices in Vector.
-    typedef DenseMap<KeyT, size_t> MapTy;
-    MapTy Map;
-
-    /// Vector - Keys and values.
-    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
-    VectorTy Vector;
-
-  public:
-    typedef typename VectorTy::iterator iterator;
-    typedef typename VectorTy::const_iterator const_iterator;
-    iterator begin() { return Vector.begin(); }
-    iterator end() { return Vector.end(); }
-    const_iterator begin() const { return Vector.begin(); }
-    const_iterator end() const { return Vector.end(); }
-
-#ifdef XDEBUG
-    ~MapVector() {
-      assert(Vector.size() >= Map.size()); // May differ due to blotting.
-      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
-           I != E; ++I) {
-        assert(I->second < Vector.size());
-        assert(Vector[I->second].first == I->first);
-      }
-      for (typename VectorTy::const_iterator I = Vector.begin(),
-           E = Vector.end(); I != E; ++I)
-        assert(!I->first ||
-               (Map.count(I->first) &&
-                Map[I->first] == size_t(I - Vector.begin())));
-    }
-#endif
-
-    ValueT &operator[](const KeyT &Arg) {
-      std::pair<typename MapTy::iterator, bool> Pair =
-        Map.insert(std::make_pair(Arg, size_t(0)));
-      if (Pair.second) {
-        size_t Num = Vector.size();
-        Pair.first->second = Num;
-        Vector.push_back(std::make_pair(Arg, ValueT()));
-        return Vector[Num].second;
-      }
-      return Vector[Pair.first->second].second;
-    }
-
-    std::pair<iterator, bool>
-    insert(const std::pair<KeyT, ValueT> &InsertPair) {
-      std::pair<typename MapTy::iterator, bool> Pair =
-        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
-      if (Pair.second) {
-        size_t Num = Vector.size();
-        Pair.first->second = Num;
-        Vector.push_back(InsertPair);
-        return std::make_pair(Vector.begin() + Num, true);
-      }
-      return std::make_pair(Vector.begin() + Pair.first->second, false);
-    }
-
-    const_iterator find(const KeyT &Key) const {
-      typename MapTy::const_iterator It = Map.find(Key);
-      if (It == Map.end()) return Vector.end();
-      return Vector.begin() + It->second;
-    }
-
-    /// blot - This is similar to erase, but instead of removing the element
-    /// from the vector, it just zeros out the key in the vector. This leaves
-    /// iterators intact, but clients must be prepared for zeroed-out keys when
-    /// iterating.
-    void blot(const KeyT &Key) {
-      typename MapTy::iterator It = Map.find(Key);
-      if (It == Map.end()) return;
-      Vector[It->second].first = KeyT();
-      Map.erase(It);
-    }
-
-    void clear() {
-      Map.clear();
-      Vector.clear();
-    }
-  };
-}
-
-//===----------------------------------------------------------------------===//
-// ARC Utilities.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/ADT/StringSwitch.h"
-
-namespace {
-  /// InstructionClass - A simple classification for instructions.
-  enum InstructionClass {
-    IC_Retain,              ///< objc_retain
-    IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
-    IC_RetainBlock,         ///< objc_retainBlock
-    IC_Release,             ///< objc_release
-    IC_Autorelease,         ///< objc_autorelease
-    IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
-    IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
-    IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
-    IC_NoopCast,            ///< objc_retainedObject, etc.
-    IC_FusedRetainAutorelease, ///< objc_retainAutorelease
-    IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
-    IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
-    IC_StoreWeak,           ///< objc_storeWeak (primitive)
-    IC_InitWeak,            ///< objc_initWeak (derived)
-    IC_LoadWeak,            ///< objc_loadWeak (derived)
-    IC_MoveWeak,            ///< objc_moveWeak (derived)
-    IC_CopyWeak,            ///< objc_copyWeak (derived)
-    IC_DestroyWeak,         ///< objc_destroyWeak (derived)
-    IC_StoreStrong,         ///< objc_storeStrong (derived)
-    IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
-    IC_Call,                ///< could call objc_release
-    IC_User,                ///< could "use" a pointer
-    IC_None                 ///< anything else
-  };
-}
-
-/// IsPotentialUse - Test whether the given value is possible a
-/// reference-counted pointer.
-static bool IsPotentialUse(const Value *Op) {
-  // Pointers to static or stack storage are not reference-counted pointers.
-  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
-    return false;
-  // Special arguments are not reference-counted.
-  if (const Argument *Arg = dyn_cast<Argument>(Op))
-    if (Arg->hasByValAttr() ||
-        Arg->hasNestAttr() ||
-        Arg->hasStructRetAttr())
-      return false;
-  // Only consider values with pointer types.
-  // It seemes intuitive to exclude function pointer types as well, since
-  // functions are never reference-counted, however clang occasionally
-  // bitcasts reference-counted pointers to function-pointer type
-  // temporarily.
-  PointerType *Ty = dyn_cast<PointerType>(Op->getType());
-  if (!Ty)
-    return false;
-  // Conservatively assume anything else is a potential use.
-  return true;
-}
-
-/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
-/// of construct CS is.
-static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
-  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-       I != E; ++I)
-    if (IsPotentialUse(*I))
-      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
-
-  return CS.onlyReadsMemory() ? IC_None : IC_Call;
-}
-
-/// GetFunctionClass - Determine if F is one of the special known Functions.
-/// If it isn't, return IC_CallOrUser.
-static InstructionClass GetFunctionClass(const Function *F) {
-  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-
-  // No arguments.
-  if (AI == AE)
-    return StringSwitch<InstructionClass>(F->getName())
-      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
-      .Default(IC_CallOrUser);
-
-  // One argument.
-  const Argument *A0 = AI++;
-  if (AI == AE)
-    // Argument is a pointer.
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
-      Type *ETy = PTy->getElementType();
-      // Argument is i8*.
-      if (ETy->isIntegerTy(8))
-        return StringSwitch<InstructionClass>(F->getName())
-          .Case("objc_retain",                IC_Retain)
-          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
-          .Case("objc_retainBlock",           IC_RetainBlock)
-          .Case("objc_release",               IC_Release)
-          .Case("objc_autorelease",           IC_Autorelease)
-          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
-          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
-          .Case("objc_retainedObject",        IC_NoopCast)
-          .Case("objc_unretainedObject",      IC_NoopCast)
-          .Case("objc_unretainedPointer",     IC_NoopCast)
-          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
-          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
-          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
-          .Default(IC_CallOrUser);
-
-      // Argument is i8**
-      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
-        if (Pte->getElementType()->isIntegerTy(8))
-          return StringSwitch<InstructionClass>(F->getName())
-            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
-            .Case("objc_loadWeak",              IC_LoadWeak)
-            .Case("objc_destroyWeak",           IC_DestroyWeak)
-            .Default(IC_CallOrUser);
-    }
-
-  // Two arguments, first is i8**.
-  const Argument *A1 = AI++;
-  if (AI == AE)
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
-      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
-        if (Pte->getElementType()->isIntegerTy(8))
-          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
-            Type *ETy1 = PTy1->getElementType();
-            // Second argument is i8*
-            if (ETy1->isIntegerTy(8))
-              return StringSwitch<InstructionClass>(F->getName())
-                     .Case("objc_storeWeak",             IC_StoreWeak)
-                     .Case("objc_initWeak",              IC_InitWeak)
-                     .Case("objc_storeStrong",           IC_StoreStrong)
-                     .Default(IC_CallOrUser);
-            // Second argument is i8**.
-            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
-              if (Pte1->getElementType()->isIntegerTy(8))
-                return StringSwitch<InstructionClass>(F->getName())
-                       .Case("objc_moveWeak",              IC_MoveWeak)
-                       .Case("objc_copyWeak",              IC_CopyWeak)
-                       .Default(IC_CallOrUser);
-          }
-
-  // Anything else.
-  return IC_CallOrUser;
-}
-
-/// GetInstructionClass - Determine what kind of construct V is.
-static InstructionClass GetInstructionClass(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V)) {
-    // Any instruction other than bitcast and gep with a pointer operand have a
-    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
-    // to a subsequent use, rather than using it themselves, in this sense.
-    // As a short cut, several other opcodes are known to have no pointer
-    // operands of interest. And ret is never followed by a release, so it's
-    // not interesting to examine.
-    switch (I->getOpcode()) {
-    case Instruction::Call: {
-      const CallInst *CI = cast<CallInst>(I);
-      // Check for calls to special functions.
-      if (const Function *F = CI->getCalledFunction()) {
-        InstructionClass Class = GetFunctionClass(F);
-        if (Class != IC_CallOrUser)
-          return Class;
-
-        // None of the intrinsic functions do objc_release. For intrinsics, the
-        // only question is whether or not they may be users.
-        switch (F->getIntrinsicID()) {
-        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
-        case Intrinsic::stacksave: case Intrinsic::stackrestore:
-        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
-        case Intrinsic::objectsize: case Intrinsic::prefetch:
-        case Intrinsic::stackprotector:
-        case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
-        case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
-        case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
-        case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
-        case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
-        case Intrinsic::invariant_start: case Intrinsic::invariant_end:
-        // Don't let dbg info affect our results.
-        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
-          // Short cut: Some intrinsics obviously don't use ObjC pointers.
-          return IC_None;
-        default:
-          break;
-        }
-      }
-      return GetCallSiteClass(CI);
-    }
-    case Instruction::Invoke:
-      return GetCallSiteClass(cast<InvokeInst>(I));
-    case Instruction::BitCast:
-    case Instruction::GetElementPtr:
-    case Instruction::Select: case Instruction::PHI:
-    case Instruction::Ret: case Instruction::Br:
-    case Instruction::Switch: case Instruction::IndirectBr:
-    case Instruction::Alloca: case Instruction::VAArg:
-    case Instruction::Add: case Instruction::FAdd:
-    case Instruction::Sub: case Instruction::FSub:
-    case Instruction::Mul: case Instruction::FMul:
-    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
-    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
-    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
-    case Instruction::And: case Instruction::Or: case Instruction::Xor:
-    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
-    case Instruction::IntToPtr: case Instruction::FCmp:
-    case Instruction::FPTrunc: case Instruction::FPExt:
-    case Instruction::FPToUI: case Instruction::FPToSI:
-    case Instruction::UIToFP: case Instruction::SIToFP:
-    case Instruction::InsertElement: case Instruction::ExtractElement:
-    case Instruction::ShuffleVector:
-    case Instruction::ExtractValue:
-      break;
-    case Instruction::ICmp:
-      // Comparing a pointer with null, or any other constant, isn't an
-      // interesting use, because we don't care what the pointer points to, or
-      // about the values of any other dynamic reference-counted pointers.
-      if (IsPotentialUse(I->getOperand(1)))
-        return IC_User;
-      break;
-    default:
-      // For anything else, check all the operands.
-      // Note that this includes both operands of a Store: while the first
-      // operand isn't actually being dereferenced, it is being stored to
-      // memory where we can no longer track who might read it and dereference
-      // it, so we have to consider it potentially used.
-      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
-           OI != OE; ++OI)
-        if (IsPotentialUse(*OI))
-          return IC_User;
-    }
-  }
-
-  // Otherwise, it's totally inert for ARC purposes.
-  return IC_None;
-}
-
-/// GetBasicInstructionClass - Determine what kind of construct V is. This is
-/// similar to GetInstructionClass except that it only detects objc runtine
-/// calls. This allows it to be faster.
-static InstructionClass GetBasicInstructionClass(const Value *V) {
-  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
-    if (const Function *F = CI->getCalledFunction())
-      return GetFunctionClass(F);
-    // Otherwise, be conservative.
-    return IC_CallOrUser;
-  }
-
-  // Otherwise, be conservative.
-  return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
-}
-
-/// IsRetain - Test if the given class is objc_retain or
-/// equivalent.
-static bool IsRetain(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV;
-}
-
-/// IsAutorelease - Test if the given class is objc_autorelease or
-/// equivalent.
-static bool IsAutorelease(InstructionClass Class) {
-  return Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV;
-}
-
-/// IsForwarding - Test if the given class represents instructions which return
-/// their argument verbatim.
-static bool IsForwarding(InstructionClass Class) {
-  // objc_retainBlock technically doesn't always return its argument
-  // verbatim, but it doesn't matter for our purposes here.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_RetainBlock ||
-         Class == IC_NoopCast;
-}
-
-/// IsNoopOnNull - Test if the given class represents instructions which do
-/// nothing if passed a null pointer.
-static bool IsNoopOnNull(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Release ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_RetainBlock;
-}
-
-/// IsAlwaysTail - Test if the given class represents instructions which are
-/// always safe to mark with the "tail" keyword.
-static bool IsAlwaysTail(InstructionClass Class) {
-  // IC_RetainBlock may be given a stack argument.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV;
-}
-
-/// IsNoThrow - Test if the given class represents instructions which are always
-/// safe to mark with the nounwind attribute..
-static bool IsNoThrow(InstructionClass Class) {
-  // objc_retainBlock is not nounwind because it calls user copy constructors
-  // which could theoretically throw.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Release ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_AutoreleasepoolPush ||
-         Class == IC_AutoreleasepoolPop;
-}
-
-/// EraseInstruction - Erase the given instruction. Many ObjC calls return their
-/// argument verbatim, so if it's such a call and the return value has users,
-/// replace them with the argument value.
-static void EraseInstruction(Instruction *CI) {
-  Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
-
-  bool Unused = CI->use_empty();
-
-  if (!Unused) {
-    // Replace the return value with the argument.
-    assert(IsForwarding(GetBasicInstructionClass(CI)) &&
-           "Can't delete non-forwarding instruction with users!");
-    CI->replaceAllUsesWith(OldArg);
-  }
-
-  CI->eraseFromParent();
-
-  if (Unused)
-    RecursivelyDeleteTriviallyDeadInstructions(OldArg);
-}
-
-/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
-/// also knows how to look through objc_retain and objc_autorelease calls, which
-/// we know to return their argument verbatim.
-static const Value *GetUnderlyingObjCPtr(const Value *V) {
-  for (;;) {
-    V = GetUnderlyingObject(V);
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-
-  return V;
-}
-
-/// StripPointerCastsAndObjCCalls - This is a wrapper around
-/// Value::stripPointerCasts which also knows how to look through objc_retain
-/// and objc_autorelease calls, which we know to return their argument verbatim.
-static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
-  for (;;) {
-    V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-  return V;
-}
-
-/// StripPointerCastsAndObjCCalls - This is a wrapper around
-/// Value::stripPointerCasts which also knows how to look through objc_retain
-/// and objc_autorelease calls, which we know to return their argument verbatim.
-static Value *StripPointerCastsAndObjCCalls(Value *V) {
-  for (;;) {
-    V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-  return V;
-}
-
-/// GetObjCArg - Assuming the given instruction is one of the special calls such
-/// as objc_retain or objc_release, return the argument value, stripped of no-op
-/// casts and forwarding calls.
-static Value *GetObjCArg(Value *Inst) {
-  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
-}
-
-/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
-/// isObjCIdentifiedObject, except that it uses special knowledge of
-/// ObjC conventions...
-static bool IsObjCIdentifiedObject(const Value *V) {
-  // Assume that call results and arguments have their own "provenance".
-  // Constants (including GlobalVariables) and Allocas are never
-  // reference-counted.
-  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
-      isa<Argument>(V) || isa<Constant>(V) ||
-      isa<AllocaInst>(V))
-    return true;
-
-  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
-    const Value *Pointer =
-      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
-      // A constant pointer can't be pointing to an object on the heap. It may
-      // be reference-counted, but it won't be deleted.
-      if (GV->isConstant())
-        return true;
-      StringRef Name = GV->getName();
-      // These special variables are known to hold values which are not
-      // reference-counted pointers.
-      if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
-          Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
-          Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
-          Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
-          Name.startswith("\01l_objc_msgSend_fixup_"))
-        return true;
-    }
-  }
-
-  return false;
-}
-
-/// FindSingleUseIdentifiedObject - This is similar to
-/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
-/// with multiple uses.
-static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
-  if (Arg->hasOneUse()) {
-    if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
-      return FindSingleUseIdentifiedObject(BC->getOperand(0));
-    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
-      if (GEP->hasAllZeroIndices())
-        return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
-    if (IsForwarding(GetBasicInstructionClass(Arg)))
-      return FindSingleUseIdentifiedObject(
-               cast<CallInst>(Arg)->getArgOperand(0));
-    if (!IsObjCIdentifiedObject(Arg))
-      return 0;
-    return Arg;
-  }
-
-  // If we found an identifiable object but it has multiple uses, but they are
-  // trivial uses, we can still consider this to be a single-use value.
-  if (IsObjCIdentifiedObject(Arg)) {
-    for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
-         UI != UE; ++UI) {
-      const User *U = *UI;
-      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
-         return 0;
-    }
-
-    return Arg;
-  }
-
-  return 0;
-}
-
-/// ModuleHasARC - Test if the given module looks interesting to run ARC
-/// optimization on.
-static bool ModuleHasARC(const Module &M) {
-  return
-    M.getNamedValue("objc_retain") ||
-    M.getNamedValue("objc_release") ||
-    M.getNamedValue("objc_autorelease") ||
-    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
-    M.getNamedValue("objc_retainBlock") ||
-    M.getNamedValue("objc_autoreleaseReturnValue") ||
-    M.getNamedValue("objc_autoreleasePoolPush") ||
-    M.getNamedValue("objc_loadWeakRetained") ||
-    M.getNamedValue("objc_loadWeak") ||
-    M.getNamedValue("objc_destroyWeak") ||
-    M.getNamedValue("objc_storeWeak") ||
-    M.getNamedValue("objc_initWeak") ||
-    M.getNamedValue("objc_moveWeak") ||
-    M.getNamedValue("objc_copyWeak") ||
-    M.getNamedValue("objc_retainedObject") ||
-    M.getNamedValue("objc_unretainedObject") ||
-    M.getNamedValue("objc_unretainedPointer");
-}
-
-/// DoesObjCBlockEscape - Test whether the given pointer, which is an
-/// Objective C block pointer, does not "escape". This differs from regular
-/// escape analysis in that a use as an argument to a call is not considered
-/// an escape.
-static bool DoesObjCBlockEscape(const Value *BlockPtr) {
-  // Walk the def-use chains.
-  SmallVector<const Value *, 4> Worklist;
-  Worklist.push_back(BlockPtr);
-  do {
-    const Value *V = Worklist.pop_back_val();
-    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
-         UI != UE; ++UI) {
-      const User *UUser = *UI;
-      // Special - Use by a call (callee or argument) is not considered
-      // to be an escape.
-      switch (GetBasicInstructionClass(UUser)) {
-      case IC_StoreWeak:
-      case IC_InitWeak:
-      case IC_StoreStrong:
-      case IC_Autorelease:
-      case IC_AutoreleaseRV:
-        // These special functions make copies of their pointer arguments.
-        return true;
-      case IC_User:
-      case IC_None:
-        // Use by an instruction which copies the value is an escape if the
-        // result is an escape.
-        if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
-            isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-          Worklist.push_back(UUser);
-          continue;
-        }
-        // Use by a load is not an escape.
-        if (isa<LoadInst>(UUser))
-          continue;
-        // Use by a store is not an escape if the use is the address.
-        if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
-          if (V != SI->getValueOperand())
-            continue;
-        break;
-      default:
-        // Regular calls and other stuff are not considered escapes.
-        continue;
-      }
-      // Otherwise, conservatively assume an escape.
-      return true;
-    }
-  } while (!Worklist.empty());
-
-  // No escapes found.
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC AliasAnalysis.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Pass.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
-
-namespace {
-  /// ObjCARCAliasAnalysis - This is a simple alias analysis
-  /// implementation that uses knowledge of ARC constructs to answer queries.
-  ///
-  /// TODO: This class could be generalized to know about other ObjC-specific
-  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
-  /// even though their offsets are dynamic.
-  class ObjCARCAliasAnalysis : public ImmutablePass,
-                               public AliasAnalysis {
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
-      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
-    }
-
-  private:
-    virtual void initializePass() {
-      InitializeAliasAnalysis(this);
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const void *PI) {
-      if (PI == &AliasAnalysis::ID)
-        return static_cast<AliasAnalysis *>(this);
-      return this;
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual AliasResult alias(const Location &LocA, const Location &LocB);
-    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
-    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
-    virtual ModRefBehavior getModRefBehavior(const Function *F);
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Location &Loc);
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                       ImmutableCallSite CS2);
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char ObjCARCAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
-                   "ObjC-ARC-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
-  return new ObjCARCAliasAnalysis();
-}
-
-void
-ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AliasAnalysis::getAnalysisUsage(AU);
-}
-
-AliasAnalysis::AliasResult
-ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::alias(LocA, LocB);
-
-  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
-  // precise alias query.
-  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
-  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
-  AliasResult Result =
-    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
-                         Location(SB, LocB.Size, LocB.TBAATag));
-  if (Result != MayAlias)
-    return Result;
-
-  // If that failed, climb to the underlying object, including climbing through
-  // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *UA = GetUnderlyingObjCPtr(SA);
-  const Value *UB = GetUnderlyingObjCPtr(SB);
-  if (UA != SA || UB != SB) {
-    Result = AliasAnalysis::alias(Location(UA), Location(UB));
-    // We can't use MustAlias or PartialAlias results here because
-    // GetUnderlyingObjCPtr may return an offsetted pointer value.
-    if (Result == NoAlias)
-      return NoAlias;
-  }
-
-  // If that failed, fail. We don't need to chain here, since that's covered
-  // by the earlier precise query.
-  return MayAlias;
-}
-
-bool
-ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
-                                             bool OrLocal) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
-
-  // First, strip off no-ops, including ObjC-specific no-ops, and try making
-  // a precise alias query.
-  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
-  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
-                                            OrLocal))
-    return true;
-
-  // If that failed, climb to the underlying object, including climbing through
-  // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *U = GetUnderlyingObjCPtr(S);
-  if (U != S)
-    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
-
-  // If that failed, fail. We don't need to chain here, since that's covered
-  // by the earlier precise query.
-  return false;
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
-  // We have nothing to do. Just chain to the next AliasAnalysis.
-  return AliasAnalysis::getModRefBehavior(CS);
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::getModRefBehavior(F);
-
-  switch (GetFunctionClass(F)) {
-  case IC_NoopCast:
-    return DoesNotAccessMemory;
-  default:
-    break;
-  }
-
-  return AliasAnalysis::getModRefBehavior(F);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::getModRefInfo(CS, Loc);
-
-  switch (GetBasicInstructionClass(CS.getInstruction())) {
-  case IC_Retain:
-  case IC_RetainRV:
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_NoopCast:
-  case IC_AutoreleasepoolPush:
-  case IC_FusedRetainAutorelease:
-  case IC_FusedRetainAutoreleaseRV:
-    // These functions don't access any memory visible to the compiler.
-    // Note that this doesn't include objc_retainBlock, because it updates
-    // pointers when it copies block data.
-    return NoModRef;
-  default:
-    break;
-  }
-
-  return AliasAnalysis::getModRefInfo(CS, Loc);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
-                                    ImmutableCallSite CS2) {
-  // TODO: Theoretically we could check for dependencies between objc_* calls
-  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
-  return AliasAnalysis::getModRefInfo(CS1, CS2);
-}
-
-//===----------------------------------------------------------------------===//
-// ARC expansion.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Transforms/Scalar.h"
-
-namespace {
-  /// ObjCARCExpand - Early ARC transformations.
-  class ObjCARCExpand : public FunctionPass {
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-
-    /// Run - A flag indicating whether this optimization pass should run.
-    bool Run;
-
-  public:
-    static char ID;
-    ObjCARCExpand() : FunctionPass(ID) {
-      initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCExpand::ID = 0;
-INITIALIZE_PASS(ObjCARCExpand,
-                "objc-arc-expand", "ObjC ARC expansion", false, false)
-
-Pass *llvm::createObjCARCExpandPass() {
-  return new ObjCARCExpand();
-}
-
-void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-}
-
-bool ObjCARCExpand::doInitialization(Module &M) {
-  Run = ModuleHasARC(M);
-  return false;
-}
-
-bool ObjCARCExpand::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  bool Changed = false;
-
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
-    Instruction *Inst = &*I;
-
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_Retain:
-    case IC_RetainRV:
-    case IC_Autorelease:
-    case IC_AutoreleaseRV:
-    case IC_FusedRetainAutorelease:
-    case IC_FusedRetainAutoreleaseRV:
-      // These calls return their argument verbatim, as a low-level
-      // optimization. However, this makes high-level optimizations
-      // harder. Undo any uses of this optimization that the front-end
-      // emitted here. We'll redo them in the contract pass.
-      Changed = true;
-      Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
-      break;
-    default:
-      break;
-    }
-  }
-
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC autorelease pool elimination.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Constants.h"
-#include "llvm/ADT/STLExtras.h"
-
-namespace {
-  /// ObjCARCAPElim - Autorelease pool elimination.
-  class ObjCARCAPElim : public ModulePass {
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool runOnModule(Module &M);
-
-    static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
-    static bool OptimizeBB(BasicBlock *BB);
-
-  public:
-    static char ID;
-    ObjCARCAPElim() : ModulePass(ID) {
-      initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCAPElim::ID = 0;
-INITIALIZE_PASS(ObjCARCAPElim,
-                "objc-arc-apelim",
-                "ObjC ARC autorelease pool elimination",
-                false, false)
-
-Pass *llvm::createObjCARCAPElimPass() {
-  return new ObjCARCAPElim();
-}
-
-void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-}
-
-/// MayAutorelease - Interprocedurally determine if calls made by the
-/// given call site can possibly produce autoreleases.
-bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
-  if (const Function *Callee = CS.getCalledFunction()) {
-    if (Callee->isDeclaration() || Callee->mayBeOverridden())
-      return true;
-    for (Function::const_iterator I = Callee->begin(), E = Callee->end();
-         I != E; ++I) {
-      const BasicBlock *BB = I;
-      for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
-           J != F; ++J)
-        if (ImmutableCallSite JCS = ImmutableCallSite(J))
-          // This recursion depth limit is arbitrary. It's just great
-          // enough to cover known interesting testcases.
-          if (Depth < 3 &&
-              !JCS.onlyReadsMemory() &&
-              MayAutorelease(JCS, Depth + 1))
-            return true;
-    }
-    return false;
-  }
-
-  return true;
-}
-
-bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
-  bool Changed = false;
-
-  Instruction *Push = 0;
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-    Instruction *Inst = I++;
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_AutoreleasepoolPush:
-      Push = Inst;
-      break;
-    case IC_AutoreleasepoolPop:
-      // If this pop matches a push and nothing in between can autorelease,
-      // zap the pair.
-      if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
-        Changed = true;
-        Inst->eraseFromParent();
-        Push->eraseFromParent();
-      }
-      Push = 0;
-      break;
-    case IC_CallOrUser:
-      if (MayAutorelease(ImmutableCallSite(Inst)))
-        Push = 0;
-      break;
-    default:
-      break;
-    }
-  }
-
-  return Changed;
-}
-
-bool ObjCARCAPElim::runOnModule(Module &M) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!ModuleHasARC(M))
-    return false;
-
-  // Find the llvm.global_ctors variable, as the first step in
-  // identifying the global constructors. In theory, unnecessary autorelease
-  // pools could occur anywhere, but in practice it's pretty rare. Global
-  // ctors are a place where autorelease pools get inserted automatically,
-  // so it's pretty common for them to be unnecessary, and it's pretty
-  // profitable to eliminate them.
-  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
-  if (!GV)
-    return false;
-
-  assert(GV->hasDefinitiveInitializer() &&
-         "llvm.global_ctors is uncooperative!");
-
-  bool Changed = false;
-
-  // Dig the constructor functions out of GV's initializer.
-  ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
-  for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
-       OI != OE; ++OI) {
-    Value *Op = *OI;
-    // llvm.global_ctors is an array of pairs where the second members
-    // are constructor functions.
-    Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
-    // If the user used a constructor function with the wrong signature and
-    // it got bitcasted or whatever, look the other way.
-    if (!F)
-      continue;
-    // Only look at function definitions.
-    if (F->isDeclaration())
-      continue;
-    // Only look at functions with one basic block.
-    if (llvm::next(F->begin()) != F->end())
-      continue;
-    // Ok, a single-block constructor function definition. Try to optimize it.
-    Changed |= OptimizeBB(F->begin());
-  }
-
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC optimization.
-//===----------------------------------------------------------------------===//
-
-// TODO: On code like this:
-//
-// objc_retain(%x)
-// stuff_that_cannot_release()
-// objc_autorelease(%x)
-// stuff_that_cannot_release()
-// objc_retain(%x)
-// stuff_that_cannot_release()
-// objc_autorelease(%x)
-//
-// The second retain and autorelease can be deleted.
-
-// TODO: It should be possible to delete
-// objc_autoreleasePoolPush and objc_autoreleasePoolPop
-// pairs if nothing is actually autoreleased between them. Also, autorelease
-// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
-// after inlining) can be turned into plain release calls.
-
-// TODO: Critical-edge splitting. If the optimial insertion point is
-// a critical edge, the current algorithm has to fail, because it doesn't
-// know how to split edges. It should be possible to make the optimizer
-// think in terms of edges, rather than blocks, and then split critical
-// edges on demand.
-
-// TODO: OptimizeSequences could generalized to be Interprocedural.
-
-// TODO: Recognize that a bunch of other objc runtime calls have
-// non-escaping arguments and non-releasing arguments, and may be
-// non-autoreleasing.
-
-// TODO: Sink autorelease calls as far as possible. Unfortunately we
-// usually can't sink them past other calls, which would be the main
-// case where it would be useful.
-
-// TODO: The pointer returned from objc_loadWeakRetained is retained.
-
-// TODO: Delete release+retain pairs (rare).
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
-
-STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
-STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
-STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
-STATISTIC(NumRets,        "Number of return value forwarding "
-                          "retain+autoreleaes eliminated");
-STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
-STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
-
-namespace {
-  /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
-  /// uses many of the same techniques, except it uses special ObjC-specific
-  /// reasoning about pointer relationships.
-  class ProvenanceAnalysis {
-    AliasAnalysis *AA;
-
-    typedef std::pair<const Value *, const Value *> ValuePairTy;
-    typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
-    CachedResultsTy CachedResults;
-
-    bool relatedCheck(const Value *A, const Value *B);
-    bool relatedSelect(const SelectInst *A, const Value *B);
-    bool relatedPHI(const PHINode *A, const Value *B);
-
-    void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
-    ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
-
-  public:
-    ProvenanceAnalysis() {}
-
-    void setAA(AliasAnalysis *aa) { AA = aa; }
-
-    AliasAnalysis *getAA() const { return AA; }
-
-    bool related(const Value *A, const Value *B);
-
-    void clear() {
-      CachedResults.clear();
-    }
-  };
-}
-
-bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
-  // If the values are Selects with the same condition, we can do a more precise
-  // check: just check for relations between the values on corresponding arms.
-  if (const SelectInst *SB = dyn_cast<SelectInst>(B))
-    if (A->getCondition() == SB->getCondition())
-      return related(A->getTrueValue(), SB->getTrueValue()) ||
-             related(A->getFalseValue(), SB->getFalseValue());
-
-  // Check both arms of the Select node individually.
-  return related(A->getTrueValue(), B) ||
-         related(A->getFalseValue(), B);
-}
-
-bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
-  // If the values are PHIs in the same block, we can do a more precise as well
-  // as efficient check: just check for relations between the values on
-  // corresponding edges.
-  if (const PHINode *PNB = dyn_cast<PHINode>(B))
-    if (PNB->getParent() == A->getParent()) {
-      for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
-        if (related(A->getIncomingValue(i),
-                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
-          return true;
-      return false;
-    }
-
-  // Check each unique source of the PHI node against B.
-  SmallPtrSet<const Value *, 4> UniqueSrc;
-  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
-    const Value *PV1 = A->getIncomingValue(i);
-    if (UniqueSrc.insert(PV1) && related(PV1, B))
-      return true;
-  }
-
-  // All of the arms checked out.
-  return false;
-}
-
-/// isStoredObjCPointer - Test if the value of P, or any value covered by its
-/// provenance, is ever stored within the function (not counting callees).
-static bool isStoredObjCPointer(const Value *P) {
-  SmallPtrSet<const Value *, 8> Visited;
-  SmallVector<const Value *, 8> Worklist;
-  Worklist.push_back(P);
-  Visited.insert(P);
-  do {
-    P = Worklist.pop_back_val();
-    for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
-         UI != UE; ++UI) {
-      const User *Ur = *UI;
-      if (isa<StoreInst>(Ur)) {
-        if (UI.getOperandNo() == 0)
-          // The pointer is stored.
-          return true;
-        // The pointed is stored through.
-        continue;
-      }
-      if (isa<CallInst>(Ur))
-        // The pointer is passed as an argument, ignore this.
-        continue;
-      if (isa<PtrToIntInst>(P))
-        // Assume the worst.
-        return true;
-      if (Visited.insert(Ur))
-        Worklist.push_back(Ur);
-    }
-  } while (!Worklist.empty());
-
-  // Everything checked out.
-  return false;
-}
-
-bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
-  // Skip past provenance pass-throughs.
-  A = GetUnderlyingObjCPtr(A);
-  B = GetUnderlyingObjCPtr(B);
-
-  // Quick check.
-  if (A == B)
-    return true;
-
-  // Ask regular AliasAnalysis, for a first approximation.
-  switch (AA->alias(A, B)) {
-  case AliasAnalysis::NoAlias:
-    return false;
-  case AliasAnalysis::MustAlias:
-  case AliasAnalysis::PartialAlias:
-    return true;
-  case AliasAnalysis::MayAlias:
-    break;
-  }
-
-  bool AIsIdentified = IsObjCIdentifiedObject(A);
-  bool BIsIdentified = IsObjCIdentifiedObject(B);
-
-  // An ObjC-Identified object can't alias a load if it is never locally stored.
-  if (AIsIdentified) {
-    // Check for an obvious escape.
-    if (isa<LoadInst>(B))
-      return isStoredObjCPointer(A);
-    if (BIsIdentified) {
-      // Check for an obvious escape.
-      if (isa<LoadInst>(A))
-        return isStoredObjCPointer(B);
-      // Both pointers are identified and escapes aren't an evident problem.
-      return false;
-    }
-  } else if (BIsIdentified) {
-    // Check for an obvious escape.
-    if (isa<LoadInst>(A))
-      return isStoredObjCPointer(B);
-  }
-
-   // Special handling for PHI and Select.
-  if (const PHINode *PN = dyn_cast<PHINode>(A))
-    return relatedPHI(PN, B);
-  if (const PHINode *PN = dyn_cast<PHINode>(B))
-    return relatedPHI(PN, A);
-  if (const SelectInst *S = dyn_cast<SelectInst>(A))
-    return relatedSelect(S, B);
-  if (const SelectInst *S = dyn_cast<SelectInst>(B))
-    return relatedSelect(S, A);
-
-  // Conservative.
-  return true;
-}
-
-bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
-  // Begin by inserting a conservative value into the map. If the insertion
-  // fails, we have the answer already. If it succeeds, leave it there until we
-  // compute the real answer to guard against recursive queries.
-  if (A > B) std::swap(A, B);
-  std::pair<CachedResultsTy::iterator, bool> Pair =
-    CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
-  if (!Pair.second)
-    return Pair.first->second;
-
-  bool Result = relatedCheck(A, B);
-  CachedResults[ValuePairTy(A, B)] = Result;
-  return Result;
-}
-
-namespace {
-  // Sequence - A sequence of states that a pointer may go through in which an
-  // objc_retain and objc_release are actually needed.
-  enum Sequence {
-    S_None,
-    S_Retain,         ///< objc_retain(x)
-    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement
-    S_Use,            ///< any use of x
-    S_Stop,           ///< like S_Release, but code motion is stopped
-    S_Release,        ///< objc_release(x)
-    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release
-  };
-}
-
-static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
-  // The easy cases.
-  if (A == B)
-    return A;
-  if (A == S_None || B == S_None)
-    return S_None;
-
-  if (A > B) std::swap(A, B);
-  if (TopDown) {
-    // Choose the side which is further along in the sequence.
-    if ((A == S_Retain || A == S_CanRelease) &&
-        (B == S_CanRelease || B == S_Use))
-      return B;
-  } else {
-    // Choose the side which is further along in the sequence.
-    if ((A == S_Use || A == S_CanRelease) &&
-        (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
-      return A;
-    // If both sides are releases, choose the more conservative one.
-    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
-      return A;
-    if (A == S_Release && B == S_MovableRelease)
-      return A;
-  }
-
-  return S_None;
-}
-
-namespace {
-  /// RRInfo - Unidirectional information about either a
-  /// retain-decrement-use-release sequence or release-use-decrement-retain
-  /// reverese sequence.
-  struct RRInfo {
-    /// KnownSafe - After an objc_retain, the reference count of the referenced
-    /// object is known to be positive. Similarly, before an objc_release, the
-    /// reference count of the referenced object is known to be positive. If
-    /// there are retain-release pairs in code regions where the retain count
-    /// is known to be positive, they can be eliminated, regardless of any side
-    /// effects between them.
-    ///
-    /// Also, a retain+release pair nested within another retain+release
-    /// pair all on the known same pointer value can be eliminated, regardless
-    /// of any intervening side effects.
-    ///
-    /// KnownSafe is true when either of these conditions is satisfied.
-    bool KnownSafe;
-
-    /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
-    /// opposed to objc_retain calls).
-    bool IsRetainBlock;
-
-    /// IsTailCallRelease - True of the objc_release calls are all marked
-    /// with the "tail" keyword.
-    bool IsTailCallRelease;
-
-    /// ReleaseMetadata - If the Calls are objc_release calls and they all have
-    /// a clang.imprecise_release tag, this is the metadata tag.
-    MDNode *ReleaseMetadata;
-
-    /// Calls - For a top-down sequence, the set of objc_retains or
-    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
-    SmallPtrSet<Instruction *, 2> Calls;
-
-    /// ReverseInsertPts - The set of optimal insert positions for
-    /// moving calls in the opposite sequence.
-    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
-
-    RRInfo() :
-      KnownSafe(false), IsRetainBlock(false),
-      IsTailCallRelease(false),
-      ReleaseMetadata(0) {}
-
-    void clear();
-  };
-}
-
-void RRInfo::clear() {
-  KnownSafe = false;
-  IsRetainBlock = false;
-  IsTailCallRelease = false;
-  ReleaseMetadata = 0;
-  Calls.clear();
-  ReverseInsertPts.clear();
-}
-
-namespace {
-  /// PtrState - This class summarizes several per-pointer runtime properties
-  /// which are propogated through the flow graph.
-  class PtrState {
-    /// KnownPositiveRefCount - True if the reference count is known to
-    /// be incremented.
-    bool KnownPositiveRefCount;
-
-    /// Partial - True of we've seen an opportunity for partial RR elimination,
-    /// such as pushing calls into a CFG triangle or into one side of a
-    /// CFG diamond.
-    bool Partial;
-
-    /// Seq - The current position in the sequence.
-    Sequence Seq : 8;
-
-  public:
-    /// RRI - Unidirectional information about the current sequence.
-    /// TODO: Encapsulate this better.
-    RRInfo RRI;
-
-    PtrState() : KnownPositiveRefCount(false), Partial(false),
-                 Seq(S_None) {}
-
-    void SetKnownPositiveRefCount() {
-      KnownPositiveRefCount = true;
-    }
-
-    void ClearRefCount() {
-      KnownPositiveRefCount = false;
-    }
-
-    bool IsKnownIncremented() const {
-      return KnownPositiveRefCount;
-    }
-
-    void SetSeq(Sequence NewSeq) {
-      Seq = NewSeq;
-    }
-
-    Sequence GetSeq() const {
-      return Seq;
-    }
-
-    void ClearSequenceProgress() {
-      ResetSequenceProgress(S_None);
-    }
-
-    void ResetSequenceProgress(Sequence NewSeq) {
-      Seq = NewSeq;
-      Partial = false;
-      RRI.clear();
-    }
-
-    void Merge(const PtrState &Other, bool TopDown);
-  };
-}
-
-void
-PtrState::Merge(const PtrState &Other, bool TopDown) {
-  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
-  KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
-
-  // We can't merge a plain objc_retain with an objc_retainBlock.
-  if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
-    Seq = S_None;
-
-  // If we're not in a sequence (anymore), drop all associated state.
-  if (Seq == S_None) {
-    Partial = false;
-    RRI.clear();
-  } else if (Partial || Other.Partial) {
-    // If we're doing a merge on a path that's previously seen a partial
-    // merge, conservatively drop the sequence, to avoid doing partial
-    // RR elimination. If the branch predicates for the two merge differ,
-    // mixing them is unsafe.
-    ClearSequenceProgress();
-  } else {
-    // Conservatively merge the ReleaseMetadata information.
-    if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
-      RRI.ReleaseMetadata = 0;
-
-    RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
-    RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
-                            Other.RRI.IsTailCallRelease;
-    RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
-
-    // Merge the insert point sets. If there are any differences,
-    // that makes this a partial merge.
-    Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
-    for (SmallPtrSet<Instruction *, 2>::const_iterator
-         I = Other.RRI.ReverseInsertPts.begin(),
-         E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
-      Partial |= RRI.ReverseInsertPts.insert(*I);
-  }
-}
-
-namespace {
-  /// BBState - Per-BasicBlock state.
-  class BBState {
-    /// TopDownPathCount - The number of unique control paths from the entry
-    /// which can reach this block.
-    unsigned TopDownPathCount;
-
-    /// BottomUpPathCount - The number of unique control paths to exits
-    /// from this block.
-    unsigned BottomUpPathCount;
-
-    /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
-    typedef MapVector<const Value *, PtrState> MapTy;
-
-    /// PerPtrTopDown - The top-down traversal uses this to record information
-    /// known about a pointer at the bottom of each block.
-    MapTy PerPtrTopDown;
-
-    /// PerPtrBottomUp - The bottom-up traversal uses this to record information
-    /// known about a pointer at the top of each block.
-    MapTy PerPtrBottomUp;
-
-    /// Preds, Succs - Effective successors and predecessors of the current
-    /// block (this ignores ignorable edges and ignored backedges).
-    SmallVector<BasicBlock *, 2> Preds;
-    SmallVector<BasicBlock *, 2> Succs;
-
-  public:
-    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
-
-    typedef MapTy::iterator ptr_iterator;
-    typedef MapTy::const_iterator ptr_const_iterator;
-
-    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
-    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
-    ptr_const_iterator top_down_ptr_begin() const {
-      return PerPtrTopDown.begin();
-    }
-    ptr_const_iterator top_down_ptr_end() const {
-      return PerPtrTopDown.end();
-    }
-
-    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
-    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
-    ptr_const_iterator bottom_up_ptr_begin() const {
-      return PerPtrBottomUp.begin();
-    }
-    ptr_const_iterator bottom_up_ptr_end() const {
-      return PerPtrBottomUp.end();
-    }
-
-    /// SetAsEntry - Mark this block as being an entry block, which has one
-    /// path from the entry by definition.
-    void SetAsEntry() { TopDownPathCount = 1; }
-
-    /// SetAsExit - Mark this block as being an exit block, which has one
-    /// path to an exit by definition.
-    void SetAsExit()  { BottomUpPathCount = 1; }
-
-    PtrState &getPtrTopDownState(const Value *Arg) {
-      return PerPtrTopDown[Arg];
-    }
-
-    PtrState &getPtrBottomUpState(const Value *Arg) {
-      return PerPtrBottomUp[Arg];
-    }
-
-    void clearBottomUpPointers() {
-      PerPtrBottomUp.clear();
-    }
-
-    void clearTopDownPointers() {
-      PerPtrTopDown.clear();
-    }
-
-    void InitFromPred(const BBState &Other);
-    void InitFromSucc(const BBState &Other);
-    void MergePred(const BBState &Other);
-    void MergeSucc(const BBState &Other);
-
-    /// GetAllPathCount - Return the number of possible unique paths from an
-    /// entry to an exit which pass through this block. This is only valid
-    /// after both the top-down and bottom-up traversals are complete.
-    unsigned GetAllPathCount() const {
-      assert(TopDownPathCount != 0);
-      assert(BottomUpPathCount != 0);
-      return TopDownPathCount * BottomUpPathCount;
-    }
-
-    // Specialized CFG utilities.
-    typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
-    edge_iterator pred_begin() { return Preds.begin(); }
-    edge_iterator pred_end() { return Preds.end(); }
-    edge_iterator succ_begin() { return Succs.begin(); }
-    edge_iterator succ_end() { return Succs.end(); }
-
-    void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
-    void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
-
-    bool isExit() const { return Succs.empty(); }
-  };
-}
-
-void BBState::InitFromPred(const BBState &Other) {
-  PerPtrTopDown = Other.PerPtrTopDown;
-  TopDownPathCount = Other.TopDownPathCount;
-}
-
-void BBState::InitFromSucc(const BBState &Other) {
-  PerPtrBottomUp = Other.PerPtrBottomUp;
-  BottomUpPathCount = Other.BottomUpPathCount;
-}
-
-/// MergePred - The top-down traversal uses this to merge information about
-/// predecessors to form the initial state for a new block.
-void BBState::MergePred(const BBState &Other) {
-  // Other.TopDownPathCount can be 0, in which case it is either dead or a
-  // loop backedge. Loop backedges are special.
-  TopDownPathCount += Other.TopDownPathCount;
-
-  // Check for overflow. If we have overflow, fall back to conservative behavior.
-  if (TopDownPathCount < Other.TopDownPathCount) {
-    clearTopDownPointers();
-    return;
-  }
-
-  // For each entry in the other set, if our set has an entry with the same key,
-  // merge the entries. Otherwise, copy the entry and merge it with an empty
-  // entry.
-  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
-       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
-    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
-    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
-                             /*TopDown=*/true);
-  }
-
-  // For each entry in our set, if the other set doesn't have an entry with the
-  // same key, force it to merge with an empty entry.
-  for (ptr_iterator MI = top_down_ptr_begin(),
-       ME = top_down_ptr_end(); MI != ME; ++MI)
-    if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
-      MI->second.Merge(PtrState(), /*TopDown=*/true);
-}
-
-/// MergeSucc - The bottom-up traversal uses this to merge information about
-/// successors to form the initial state for a new block.
-void BBState::MergeSucc(const BBState &Other) {
-  // Other.BottomUpPathCount can be 0, in which case it is either dead or a
-  // loop backedge. Loop backedges are special.
-  BottomUpPathCount += Other.BottomUpPathCount;
-
-  // Check for overflow. If we have overflow, fall back to conservative behavior.
-  if (BottomUpPathCount < Other.BottomUpPathCount) {
-    clearBottomUpPointers();
-    return;
-  }
-
-  // For each entry in the other set, if our set has an entry with the
-  // same key, merge the entries. Otherwise, copy the entry and merge
-  // it with an empty entry.
-  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
-       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
-    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
-    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
-                             /*TopDown=*/false);
-  }
-
-  // For each entry in our set, if the other set doesn't have an entry
-  // with the same key, force it to merge with an empty entry.
-  for (ptr_iterator MI = bottom_up_ptr_begin(),
-       ME = bottom_up_ptr_end(); MI != ME; ++MI)
-    if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
-      MI->second.Merge(PtrState(), /*TopDown=*/false);
-}
-
-namespace {
-  /// ObjCARCOpt - The main ARC optimization pass.
-  class ObjCARCOpt : public FunctionPass {
-    bool Changed;
-    ProvenanceAnalysis PA;
-
-    /// Run - A flag indicating whether this optimization pass should run.
-    bool Run;
-
-    /// RetainRVCallee, etc. - Declarations for ObjC runtime
-    /// functions, for use in creating calls to them. These are initialized
-    /// lazily to avoid cluttering up the Module with unused declarations.
-    Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
-             *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee;
-
-    /// UsedInThisFunciton - Flags which determine whether each of the
-    /// interesting runtine functions is in fact used in the current function.
-    unsigned UsedInThisFunction;
-
-    /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
-    /// metadata.
-    unsigned ImpreciseReleaseMDKind;
-
-    /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
-    /// metadata.
-    unsigned CopyOnEscapeMDKind;
-
-    /// NoObjCARCExceptionsMDKind - The Metadata Kind for
-    /// clang.arc.no_objc_arc_exceptions metadata.
-    unsigned NoObjCARCExceptionsMDKind;
-
-    Constant *getRetainRVCallee(Module *M);
-    Constant *getAutoreleaseRVCallee(Module *M);
-    Constant *getReleaseCallee(Module *M);
-    Constant *getRetainCallee(Module *M);
-    Constant *getRetainBlockCallee(Module *M);
-    Constant *getAutoreleaseCallee(Module *M);
-
-    bool IsRetainBlockOptimizable(const Instruction *Inst);
-
-    void OptimizeRetainCall(Function &F, Instruction *Retain);
-    bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
-    void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
-    void OptimizeIndividualCalls(Function &F);
-
-    void CheckForCFGHazards(const BasicBlock *BB,
-                            DenseMap<const BasicBlock *, BBState> &BBStates,
-                            BBState &MyStates) const;
-    bool VisitInstructionBottomUp(Instruction *Inst,
-                                  BasicBlock *BB,
-                                  MapVector<Value *, RRInfo> &Retains,
-                                  BBState &MyStates);
-    bool VisitBottomUp(BasicBlock *BB,
-                       DenseMap<const BasicBlock *, BBState> &BBStates,
-                       MapVector<Value *, RRInfo> &Retains);
-    bool VisitInstructionTopDown(Instruction *Inst,
-                                 DenseMap<Value *, RRInfo> &Releases,
-                                 BBState &MyStates);
-    bool VisitTopDown(BasicBlock *BB,
-                      DenseMap<const BasicBlock *, BBState> &BBStates,
-                      DenseMap<Value *, RRInfo> &Releases);
-    bool Visit(Function &F,
-               DenseMap<const BasicBlock *, BBState> &BBStates,
-               MapVector<Value *, RRInfo> &Retains,
-               DenseMap<Value *, RRInfo> &Releases);
-
-    void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
-                   MapVector<Value *, RRInfo> &Retains,
-                   DenseMap<Value *, RRInfo> &Releases,
-                   SmallVectorImpl<Instruction *> &DeadInsts,
-                   Module *M);
-
-    bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
-                              MapVector<Value *, RRInfo> &Retains,
-                              DenseMap<Value *, RRInfo> &Releases,
-                              Module *M);
-
-    void OptimizeWeakCalls(Function &F);
-
-    bool OptimizeSequences(Function &F);
-
-    void OptimizeReturns(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-    virtual void releaseMemory();
-
-  public:
-    static char ID;
-    ObjCARCOpt() : FunctionPass(ID) {
-      initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCOpt::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCOpt,
-                      "objc-arc", "ObjC ARC optimization", false, false)
-INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
-INITIALIZE_PASS_END(ObjCARCOpt,
-                    "objc-arc", "ObjC ARC optimization", false, false)
-
-Pass *llvm::createObjCARCOptPass() {
-  return new ObjCARCOpt();
-}
-
-void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<ObjCARCAliasAnalysis>();
-  AU.addRequired<AliasAnalysis>();
-  // ARC optimization doesn't currently split critical edges.
-  AU.setPreservesCFG();
-}
-
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
-  // Without the magic metadata tag, we have to assume this might be an
-  // objc_retainBlock call inserted to convert a block pointer to an id,
-  // in which case it really is needed.
-  if (!Inst->getMetadata(CopyOnEscapeMDKind))
-    return false;
-
-  // If the pointer "escapes" (not including being used in a call),
-  // the copy may be needed.
-  if (DoesObjCBlockEscape(Inst))
-    return false;
-
-  // Otherwise, it's not needed.
-  return true;
-}
-
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
-  if (!RetainRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainRVCallee =
-      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
-                             Attributes);
-  }
-  return RetainRVCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
-  if (!AutoreleaseRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    AutoreleaseRVCallee =
-      M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
-                             Attributes);
-  }
-  return AutoreleaseRVCallee;
-}
-
-Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
-  if (!ReleaseCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    ReleaseCallee =
-      M->getOrInsertFunction(
-        "objc_release",
-        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return ReleaseCallee;
-}
-
-Constant *ObjCARCOpt::getRetainCallee(Module *M) {
-  if (!RetainCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainCallee =
-      M->getOrInsertFunction(
-        "objc_retain",
-        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return RetainCallee;
-}
-
-Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
-  if (!RetainBlockCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    // objc_retainBlock is not nounwind because it calls user copy constructors
-    // which could theoretically throw.
-    RetainBlockCallee =
-      M->getOrInsertFunction(
-        "objc_retainBlock",
-        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        AttrListPtr());
-  }
-  return RetainBlockCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
-  if (!AutoreleaseCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    AutoreleaseCallee =
-      M->getOrInsertFunction(
-        "objc_autorelease",
-        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return AutoreleaseCallee;
-}
-
-/// IsPotentialUse - Test whether the given value is possible a
-/// reference-counted pointer, including tests which utilize AliasAnalysis.
-static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
-  // First make the rudimentary check.
-  if (!IsPotentialUse(Op))
-    return false;
-
-  // Objects in constant memory are not reference-counted.
-  if (AA.pointsToConstantMemory(Op))
-    return false;
-
-  // Pointers in constant memory are not pointing to reference-counted objects.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
-    if (AA.pointsToConstantMemory(LI->getPointerOperand()))
-      return false;
-
-  // Otherwise assume the worst.
-  return true;
-}
-
-/// CanAlterRefCount - Test whether the given instruction can result in a
-/// reference count modification (positive or negative) for the pointer's
-/// object.
-static bool
-CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
-                 ProvenanceAnalysis &PA, InstructionClass Class) {
-  switch (Class) {
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_User:
-    // These operations never directly modify a reference count.
-    return false;
-  default: break;
-  }
-
-  ImmutableCallSite CS = static_cast<const Value *>(Inst);
-  assert(CS && "Only calls can alter reference counts!");
-
-  // See if AliasAnalysis can help us with the call.
-  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
-  if (AliasAnalysis::onlyReadsMemory(MRB))
-    return false;
-  if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
-    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-         I != E; ++I) {
-      const Value *Op = *I;
-      if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
-        return true;
-    }
-    return false;
-  }
-
-  // Assume the worst.
-  return true;
-}
-
-/// CanUse - Test whether the given instruction can "use" the given pointer's
-/// object in a way that requires the reference count to be positive.
-static bool
-CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
-       InstructionClass Class) {
-  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
-  if (Class == IC_Call)
-    return false;
-
-  // Consider various instructions which may have pointer arguments which are
-  // not "uses".
-  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
-    // Comparing a pointer with null, or any other constant, isn't really a use,
-    // because we don't care what the pointer points to, or about the values
-    // of any other dynamic reference-counted pointers.
-    if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
-      return false;
-  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
-    // For calls, just check the arguments (and not the callee operand).
-    for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
-         OE = CS.arg_end(); OI != OE; ++OI) {
-      const Value *Op = *OI;
-      if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
-        return true;
-    }
-    return false;
-  } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-    // Special-case stores, because we don't care about the stored value, just
-    // the store address.
-    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
-    // If we can't tell what the underlying object was, assume there is a
-    // dependence.
-    return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
-  }
-
-  // Check each operand for a match.
-  for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
-       OI != OE; ++OI) {
-    const Value *Op = *OI;
-    if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
-      return true;
-  }
-  return false;
-}
-
-/// CanInterruptRV - Test whether the given instruction can autorelease
-/// any pointer or cause an autoreleasepool pop.
-static bool
-CanInterruptRV(InstructionClass Class) {
-  switch (Class) {
-  case IC_AutoreleasepoolPop:
-  case IC_CallOrUser:
-  case IC_Call:
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_FusedRetainAutorelease:
-  case IC_FusedRetainAutoreleaseRV:
-    return true;
-  default:
-    return false;
-  }
-}
-
-namespace {
-  /// DependenceKind - There are several kinds of dependence-like concepts in
-  /// use here.
-  enum DependenceKind {
-    NeedsPositiveRetainCount,
-    AutoreleasePoolBoundary,
-    CanChangeRetainCount,
-    RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
-    RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
-    RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
-  };
-}
-
-/// Depends - Test if there can be dependencies on Inst through Arg. This
-/// function only tests dependencies relevant for removing pairs of calls.
-static bool
-Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
-        ProvenanceAnalysis &PA) {
-  // If we've reached the definition of Arg, stop.
-  if (Inst == Arg)
-    return true;
-
-  switch (Flavor) {
-  case NeedsPositiveRetainCount: {
-    InstructionClass Class = GetInstructionClass(Inst);
-    switch (Class) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      return false;
-    default:
-      return CanUse(Inst, Arg, PA, Class);
-    }
-  }
-
-  case AutoreleasePoolBoundary: {
-    InstructionClass Class = GetInstructionClass(Inst);
-    switch (Class) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-      // These mark the end and begin of an autorelease pool scope.
-      return true;
-    default:
-      // Nothing else does this.
-      return false;
-    }
-  }
-
-  case CanChangeRetainCount: {
-    InstructionClass Class = GetInstructionClass(Inst);
-    switch (Class) {
-    case IC_AutoreleasepoolPop:
-      // Conservatively assume this can decrement any count.
-      return true;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      return false;
-    default:
-      return CanAlterRefCount(Inst, Arg, PA, Class);
-    }
-  }
-
-  case RetainAutoreleaseDep:
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-      // Don't merge an objc_autorelease with an objc_retain inside a different
-      // autoreleasepool scope.
-      return true;
-    case IC_Retain:
-    case IC_RetainRV:
-      // Check for a retain of the same pointer for merging.
-      return GetObjCArg(Inst) == Arg;
-    default:
-      // Nothing else matters for objc_retainAutorelease formation.
-      return false;
-    }
-
-  case RetainAutoreleaseRVDep: {
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    switch (Class) {
-    case IC_Retain:
-    case IC_RetainRV:
-      // Check for a retain of the same pointer for merging.
-      return GetObjCArg(Inst) == Arg;
-    default:
-      // Anything that can autorelease interrupts
-      // retainAutoreleaseReturnValue formation.
-      return CanInterruptRV(Class);
-    }
-  }
-
-  case RetainRVDep:
-    return CanInterruptRV(GetBasicInstructionClass(Inst));
-  }
-
-  llvm_unreachable("Invalid dependence flavor");
-}
-
-/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
-/// find local and non-local dependencies on Arg.
-/// TODO: Cache results?
-static void
-FindDependencies(DependenceKind Flavor,
-                 const Value *Arg,
-                 BasicBlock *StartBB, Instruction *StartInst,
-                 SmallPtrSet<Instruction *, 4> &DependingInstructions,
-                 SmallPtrSet<const BasicBlock *, 4> &Visited,
-                 ProvenanceAnalysis &PA) {
-  BasicBlock::iterator StartPos = StartInst;
-
-  SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
-  Worklist.push_back(std::make_pair(StartBB, StartPos));
-  do {
-    std::pair<BasicBlock *, BasicBlock::iterator> Pair =
-      Worklist.pop_back_val();
-    BasicBlock *LocalStartBB = Pair.first;
-    BasicBlock::iterator LocalStartPos = Pair.second;
-    BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
-    for (;;) {
-      if (LocalStartPos == StartBBBegin) {
-        pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
-        if (PI == PE)
-          // If we've reached the function entry, produce a null dependence.
-          DependingInstructions.insert(0);
-        else
-          // Add the predecessors to the worklist.
-          do {
-            BasicBlock *PredBB = *PI;
-            if (Visited.insert(PredBB))
-              Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
-          } while (++PI != PE);
-        break;
-      }
-
-      Instruction *Inst = --LocalStartPos;
-      if (Depends(Flavor, Inst, Arg, PA)) {
-        DependingInstructions.insert(Inst);
-        break;
-      }
-    }
-  } while (!Worklist.empty());
-
-  // Determine whether the original StartBB post-dominates all of the blocks we
-  // visited. If not, insert a sentinal indicating that most optimizations are
-  // not safe.
-  for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
-       E = Visited.end(); I != E; ++I) {
-    const BasicBlock *BB = *I;
-    if (BB == StartBB)
-      continue;
-    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-      const BasicBlock *Succ = *SI;
-      if (Succ != StartBB && !Visited.count(Succ)) {
-        DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
-        return;
-      }
-    }
-  }
-}
-
-static bool isNullOrUndef(const Value *V) {
-  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
-}
-
-static bool isNoopInstruction(const Instruction *I) {
-  return isa<BitCastInst>(I) ||
-         (isa<GetElementPtrInst>(I) &&
-          cast<GetElementPtrInst>(I)->hasAllZeroIndices());
-}
-
-/// OptimizeRetainCall - Turn objc_retain into
-/// objc_retainAutoreleasedReturnValue if the operand is a return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
-  ImmutableCallSite CS(GetObjCArg(Retain));
-  const Instruction *Call = CS.getInstruction();
-  if (!Call) return;
-  if (Call->getParent() != Retain->getParent()) return;
-
-  // Check that the call is next to the retain.
-  BasicBlock::const_iterator I = Call;
-  ++I;
-  while (isNoopInstruction(I)) ++I;
-  if (&*I != Retain)
-    return;
-
-  // Turn it to an objc_retainAutoreleasedReturnValue..
-  Changed = true;
-  ++NumPeeps;
-  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-}
-
-/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
-/// objc_retain if the operand is not a return value.  Or, if it can be paired
-/// with an objc_autoreleaseReturnValue, delete the pair and return true.
-bool
-ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
-  // Check for the argument being from an immediately preceding call or invoke.
-  const Value *Arg = GetObjCArg(RetainRV);
-  ImmutableCallSite CS(Arg);
-  if (const Instruction *Call = CS.getInstruction()) {
-    if (Call->getParent() == RetainRV->getParent()) {
-      BasicBlock::const_iterator I = Call;
-      ++I;
-      while (isNoopInstruction(I)) ++I;
-      if (&*I == RetainRV)
-        return false;
-    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
-      BasicBlock *RetainRVParent = RetainRV->getParent();
-      if (II->getNormalDest() == RetainRVParent) {
-        BasicBlock::const_iterator I = RetainRVParent->begin();
-        while (isNoopInstruction(I)) ++I;
-        if (&*I == RetainRV)
-          return false;
-      }
-    }
-  }
-
-  // Check for being preceded by an objc_autoreleaseReturnValue on the same
-  // pointer. In this case, we can delete the pair.
-  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
-  if (I != Begin) {
-    do --I; while (I != Begin && isNoopInstruction(I));
-    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
-        GetObjCArg(I) == Arg) {
-      Changed = true;
-      ++NumPeeps;
-      EraseInstruction(I);
-      EraseInstruction(RetainRV);
-      return true;
-    }
-  }
-
-  // Turn it to a plain objc_retain.
-  Changed = true;
-  ++NumPeeps;
-  cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
-  return false;
-}
-
-/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
-/// objc_autorelease if the result is not used as a return value.
-void
-ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
-  // Check for a return of the pointer value.
-  const Value *Ptr = GetObjCArg(AutoreleaseRV);
-  SmallVector<const Value *, 2> Users;
-  Users.push_back(Ptr);
-  do {
-    Ptr = Users.pop_back_val();
-    for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
-         UI != UE; ++UI) {
-      const User *I = *UI;
-      if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
-        return;
-      if (isa<BitCastInst>(I))
-        Users.push_back(I);
-    }
-  } while (!Users.empty());
-
-  Changed = true;
-  ++NumPeeps;
-  cast<CallInst>(AutoreleaseRV)->
-    setCalledFunction(getAutoreleaseCallee(F.getParent()));
-}
-
-/// OptimizeIndividualCalls - Visit each call, one at a time, and make
-/// simplifications without doing any additional analysis.
-void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
-  // Reset all the flags in preparation for recomputing them.
-  UsedInThisFunction = 0;
-
-  // Visit all objc_* calls in F.
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-
-    switch (Class) {
-    default: break;
-
-    // Delete no-op casts. These function calls have special semantics, but
-    // the semantics are entirely implemented via lowering in the front-end,
-    // so by the time they reach the optimizer, they are just no-op calls
-    // which return their argument.
-    //
-    // There are gray areas here, as the ability to cast reference-counted
-    // pointers to raw void* and back allows code to break ARC assumptions,
-    // however these are currently considered to be unimportant.
-    case IC_NoopCast:
-      Changed = true;
-      ++NumNoops;
-      EraseInstruction(Inst);
-      continue;
-
-    // If the pointer-to-weak-pointer is null, it's undefined behavior.
-    case IC_StoreWeak:
-    case IC_LoadWeak:
-    case IC_LoadWeakRetained:
-    case IC_InitWeak:
-    case IC_DestroyWeak: {
-      CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(0))) {
-        Changed = true;
-        Type *Ty = CI->getArgOperand(0)->getType();
-        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
-                      Constant::getNullValue(Ty),
-                      CI);
-        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
-        CI->eraseFromParent();
-        continue;
-      }
-      break;
-    }
-    case IC_CopyWeak:
-    case IC_MoveWeak: {
-      CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(0)) ||
-          isNullOrUndef(CI->getArgOperand(1))) {
-        Changed = true;
-        Type *Ty = CI->getArgOperand(0)->getType();
-        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
-                      Constant::getNullValue(Ty),
-                      CI);
-        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
-        CI->eraseFromParent();
-        continue;
-      }
-      break;
-    }
-    case IC_Retain:
-      OptimizeRetainCall(F, Inst);
-      break;
-    case IC_RetainRV:
-      if (OptimizeRetainRVCall(F, Inst))
-        continue;
-      break;
-    case IC_AutoreleaseRV:
-      OptimizeAutoreleaseRVCall(F, Inst);
-      break;
-    }
-
-    // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
-    if (IsAutorelease(Class) && Inst->use_empty()) {
-      CallInst *Call = cast<CallInst>(Inst);
-      const Value *Arg = Call->getArgOperand(0);
-      Arg = FindSingleUseIdentifiedObject(Arg);
-      if (Arg) {
-        Changed = true;
-        ++NumAutoreleases;
-
-        // Create the declaration lazily.
-        LLVMContext &C = Inst->getContext();
-        CallInst *NewCall =
-          CallInst::Create(getReleaseCallee(F.getParent()),
-                           Call->getArgOperand(0), "", Call);
-        NewCall->setMetadata(ImpreciseReleaseMDKind,
-                             MDNode::get(C, ArrayRef<Value *>()));
-        EraseInstruction(Call);
-        Inst = NewCall;
-        Class = IC_Release;
-      }
-    }
-
-    // For functions which can never be passed stack arguments, add
-    // a tail keyword.
-    if (IsAlwaysTail(Class)) {
-      Changed = true;
-      cast<CallInst>(Inst)->setTailCall();
-    }
-
-    // Set nounwind as needed.
-    if (IsNoThrow(Class)) {
-      Changed = true;
-      cast<CallInst>(Inst)->setDoesNotThrow();
-    }
-
-    if (!IsNoopOnNull(Class)) {
-      UsedInThisFunction |= 1 << Class;
-      continue;
-    }
-
-    const Value *Arg = GetObjCArg(Inst);
-
-    // ARC calls with null are no-ops. Delete them.
-    if (isNullOrUndef(Arg)) {
-      Changed = true;
-      ++NumNoops;
-      EraseInstruction(Inst);
-      continue;
-    }
-
-    // Keep track of which of retain, release, autorelease, and retain_block
-    // are actually present in this function.
-    UsedInThisFunction |= 1 << Class;
-
-    // If Arg is a PHI, and one or more incoming values to the
-    // PHI are null, and the call is control-equivalent to the PHI, and there
-    // are no relevant side effects between the PHI and the call, the call
-    // could be pushed up to just those paths with non-null incoming values.
-    // For now, don't bother splitting critical edges for this.
-    SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
-    Worklist.push_back(std::make_pair(Inst, Arg));
-    do {
-      std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
-      Inst = Pair.first;
-      Arg = Pair.second;
-
-      const PHINode *PN = dyn_cast<PHINode>(Arg);
-      if (!PN) continue;
-
-      // Determine if the PHI has any null operands, or any incoming
-      // critical edges.
-      bool HasNull = false;
-      bool HasCriticalEdges = false;
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        Value *Incoming =
-          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
-        if (isNullOrUndef(Incoming))
-          HasNull = true;
-        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
-                   .getNumSuccessors() != 1) {
-          HasCriticalEdges = true;
-          break;
-        }
-      }
-      // If we have null operands and no critical edges, optimize.
-      if (!HasCriticalEdges && HasNull) {
-        SmallPtrSet<Instruction *, 4> DependingInstructions;
-        SmallPtrSet<const BasicBlock *, 4> Visited;
-
-        // Check that there is nothing that cares about the reference
-        // count between the call and the phi.
-        switch (Class) {
-        case IC_Retain:
-        case IC_RetainBlock:
-          // These can always be moved up.
-          break;
-        case IC_Release:
-          // These can't be moved across things that care about the retain
-          // count.
-          FindDependencies(NeedsPositiveRetainCount, Arg,
-                           Inst->getParent(), Inst,
-                           DependingInstructions, Visited, PA);
-          break;
-        case IC_Autorelease:
-          // These can't be moved across autorelease pool scope boundaries.
-          FindDependencies(AutoreleasePoolBoundary, Arg,
-                           Inst->getParent(), Inst,
-                           DependingInstructions, Visited, PA);
-          break;
-        case IC_RetainRV:
-        case IC_AutoreleaseRV:
-          // Don't move these; the RV optimization depends on the autoreleaseRV
-          // being tail called, and the retainRV being immediately after a call
-          // (which might still happen if we get lucky with codegen layout, but
-          // it's not worth taking the chance).
-          continue;
-        default:
-          llvm_unreachable("Invalid dependence flavor");
-        }
-
-        if (DependingInstructions.size() == 1 &&
-            *DependingInstructions.begin() == PN) {
-          Changed = true;
-          ++NumPartialNoops;
-          // Clone the call into each predecessor that has a non-null value.
-          CallInst *CInst = cast<CallInst>(Inst);
-          Type *ParamTy = CInst->getArgOperand(0)->getType();
-          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-            Value *Incoming =
-              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
-            if (!isNullOrUndef(Incoming)) {
-              CallInst *Clone = cast<CallInst>(CInst->clone());
-              Value *Op = PN->getIncomingValue(i);
-              Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
-              if (Op->getType() != ParamTy)
-                Op = new BitCastInst(Op, ParamTy, "", InsertPos);
-              Clone->setArgOperand(0, Op);
-              Clone->insertBefore(InsertPos);
-              Worklist.push_back(std::make_pair(Clone, Incoming));
-            }
-          }
-          // Erase the original call.
-          EraseInstruction(CInst);
-          continue;
-        }
-      }
-    } while (!Worklist.empty());
-  }
-}
-
-/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
-/// control flow, or other CFG structures where moving code across the edge
-/// would result in it being executed more.
-void
-ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
-                               DenseMap<const BasicBlock *, BBState> &BBStates,
-                               BBState &MyStates) const {
-  // If any top-down local-use or possible-dec has a succ which is earlier in
-  // the sequence, forget it.
-  for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
-       E = MyStates.top_down_ptr_end(); I != E; ++I)
-    switch (I->second.GetSeq()) {
-    default: break;
-    case S_Use: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      // If the terminator is an invoke marked with the
-      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
-      // ignored, for ARC purposes.
-      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
-        --SE;
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None:
-        case S_CanRelease: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_Use:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
-    case S_CanRelease: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      // If the terminator is an invoke marked with the
-      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
-      // ignored, for ARC purposes.
-      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
-        --SE;
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_CanRelease:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Use:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
-    }
-}
-
-bool
-ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
-                                     BasicBlock *BB,
-                                     MapVector<Value *, RRInfo> &Retains,
-                                     BBState &MyStates) {
-  bool NestingDetected = false;
-  InstructionClass Class = GetInstructionClass(Inst);
-  const Value *Arg = 0;
-
-  switch (Class) {
-  case IC_Release: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrBottomUpState(Arg);
-
-    // If we see two releases in a row on the same pointer. If so, make
-    // a note, and we'll cicle back to revisit it after we've
-    // hopefully eliminated the second release, which may allow us to
-    // eliminate the first release too.
-    // Theoretically we could implement removal of nested retain+release
-    // pairs by making PtrState hold a stack of states, but this is
-    // simple and avoids adding overhead for the non-nested case.
-    if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
-      NestingDetected = true;
-
-    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-    S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
-    S.RRI.ReleaseMetadata = ReleaseMetadata;
-    S.RRI.KnownSafe = S.IsKnownIncremented();
-    S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
-    S.RRI.Calls.insert(Inst);
-
-    S.SetKnownPositiveRefCount();
-    break;
-  }
-  case IC_RetainBlock:
-    // An objc_retainBlock call with just a use may need to be kept,
-    // because it may be copying a block from the stack to the heap.
-    if (!IsRetainBlockOptimizable(Inst))
-      break;
-    // FALLTHROUGH
-  case IC_Retain:
-  case IC_RetainRV: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrBottomUpState(Arg);
-    S.SetKnownPositiveRefCount();
-
-    switch (S.GetSeq()) {
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-    case S_Use:
-      S.RRI.ReverseInsertPts.clear();
-      // FALL THROUGH
-    case S_CanRelease:
-      // Don't do retain+release tracking for IC_RetainRV, because it's
-      // better to let it remain as the first instruction after a call.
-      if (Class != IC_RetainRV) {
-        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-        Retains[Inst] = S.RRI;
-      }
-      S.ClearSequenceProgress();
-      break;
-    case S_None:
-      break;
-    case S_Retain:
-      llvm_unreachable("bottom-up pointer in retain state!");
-    }
-    return NestingDetected;
-  }
-  case IC_AutoreleasepoolPop:
-    // Conservatively, clear MyStates for all known pointers.
-    MyStates.clearBottomUpPointers();
-    return NestingDetected;
-  case IC_AutoreleasepoolPush:
-  case IC_None:
-    // These are irrelevant.
-    return NestingDetected;
-  default:
-    break;
-  }
-
-  // Consider any other possible effects of this instruction on each
-  // pointer being tracked.
-  for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
-       ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
-    const Value *Ptr = MI->first;
-    if (Ptr == Arg)
-      continue; // Handled above.
-    PtrState &S = MI->second;
-    Sequence Seq = S.GetSeq();
-
-    // Check for possible releases.
-    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      S.ClearRefCount();
-      switch (Seq) {
-      case S_Use:
-        S.SetSeq(S_CanRelease);
-        continue;
-      case S_CanRelease:
-      case S_Release:
-      case S_MovableRelease:
-      case S_Stop:
-      case S_None:
-        break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
-      }
-    }
-
-    // Check for possible direct uses.
-    switch (Seq) {
-    case S_Release:
-    case S_MovableRelease:
-      if (CanUse(Inst, Ptr, PA, Class)) {
-        assert(S.RRI.ReverseInsertPts.empty());
-        // If this is an invoke instruction, we're scanning it as part of
-        // one of its successor blocks, since we can't insert code after it
-        // in its own block, and we don't want to split critical edges.
-        if (isa<InvokeInst>(Inst))
-          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
-        else
-          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
-        S.SetSeq(S_Use);
-      } else if (Seq == S_Release &&
-                 (Class == IC_User || Class == IC_CallOrUser)) {
-        // Non-movable releases depend on any possible objc pointer use.
-        S.SetSeq(S_Stop);
-        assert(S.RRI.ReverseInsertPts.empty());
-        // As above; handle invoke specially.
-        if (isa<InvokeInst>(Inst))
-          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
-        else
-          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
-      }
-      break;
-    case S_Stop:
-      if (CanUse(Inst, Ptr, PA, Class))
-        S.SetSeq(S_Use);
-      break;
-    case S_CanRelease:
-    case S_Use:
-    case S_None:
-      break;
-    case S_Retain:
-      llvm_unreachable("bottom-up pointer in retain state!");
-    }
-  }
-
-  return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
-                          DenseMap<const BasicBlock *, BBState> &BBStates,
-                          MapVector<Value *, RRInfo> &Retains) {
-  bool NestingDetected = false;
-  BBState &MyStates = BBStates[BB];
-
-  // Merge the states from each successor to compute the initial state
-  // for the current block.
-  BBState::edge_iterator SI(MyStates.succ_begin()),
-                         SE(MyStates.succ_end());
-  if (SI != SE) {
-    const BasicBlock *Succ = *SI;
-    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
-    assert(I != BBStates.end());
-    MyStates.InitFromSucc(I->second);
-    ++SI;
-    for (; SI != SE; ++SI) {
-      Succ = *SI;
-      I = BBStates.find(Succ);
-      assert(I != BBStates.end());
-      MyStates.MergeSucc(I->second);
-    }
-  }
-
-  // Visit all the instructions, bottom-up.
-  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
-    Instruction *Inst = llvm::prior(I);
-
-    // Invoke instructions are visited as part of their successors (below).
-    if (isa<InvokeInst>(Inst))
-      continue;
-
-    NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
-  }
-
-  // If there's a predecessor with an invoke, visit the invoke as if it were
-  // part of this block, since we can't insert code after an invoke in its own
-  // block, and we don't want to split critical edges.
-  for (BBState::edge_iterator PI(MyStates.pred_begin()),
-       PE(MyStates.pred_end()); PI != PE; ++PI) {
-    BasicBlock *Pred = *PI;
-    if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
-      NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
-  }
-
-  return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
-                                    DenseMap<Value *, RRInfo> &Releases,
-                                    BBState &MyStates) {
-  bool NestingDetected = false;
-  InstructionClass Class = GetInstructionClass(Inst);
-  const Value *Arg = 0;
-
-  switch (Class) {
-  case IC_RetainBlock:
-    // An objc_retainBlock call with just a use may need to be kept,
-    // because it may be copying a block from the stack to the heap.
-    if (!IsRetainBlockOptimizable(Inst))
-      break;
-    // FALLTHROUGH
-  case IC_Retain:
-  case IC_RetainRV: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrTopDownState(Arg);
-
-    // Don't do retain+release tracking for IC_RetainRV, because it's
-    // better to let it remain as the first instruction after a call.
-    if (Class != IC_RetainRV) {
-      // If we see two retains in a row on the same pointer. If so, make
-      // a note, and we'll cicle back to revisit it after we've
-      // hopefully eliminated the second retain, which may allow us to
-      // eliminate the first retain too.
-      // Theoretically we could implement removal of nested retain+release
-      // pairs by making PtrState hold a stack of states, but this is
-      // simple and avoids adding overhead for the non-nested case.
-      if (S.GetSeq() == S_Retain)
-        NestingDetected = true;
-
-      S.ResetSequenceProgress(S_Retain);
-      S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-      S.RRI.KnownSafe = S.IsKnownIncremented();
-      S.RRI.Calls.insert(Inst);
-    }
-
-    S.SetKnownPositiveRefCount();
-
-    // A retain can be a potential use; procede to the generic checking
-    // code below.
-    break;
-  }
-  case IC_Release: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrTopDownState(Arg);
-    S.ClearRefCount();
-
-    switch (S.GetSeq()) {
-    case S_Retain:
-    case S_CanRelease:
-      S.RRI.ReverseInsertPts.clear();
-      // FALL THROUGH
-    case S_Use:
-      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
-      Releases[Inst] = S.RRI;
-      S.ClearSequenceProgress();
-      break;
-    case S_None:
-      break;
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-      llvm_unreachable("top-down pointer in release state!");
-    }
-    break;
-  }
-  case IC_AutoreleasepoolPop:
-    // Conservatively, clear MyStates for all known pointers.
-    MyStates.clearTopDownPointers();
-    return NestingDetected;
-  case IC_AutoreleasepoolPush:
-  case IC_None:
-    // These are irrelevant.
-    return NestingDetected;
-  default:
-    break;
-  }
-
-  // Consider any other possible effects of this instruction on each
-  // pointer being tracked.
-  for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
-       ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
-    const Value *Ptr = MI->first;
-    if (Ptr == Arg)
-      continue; // Handled above.
-    PtrState &S = MI->second;
-    Sequence Seq = S.GetSeq();
-
-    // Check for possible releases.
-    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      S.ClearRefCount();
-      switch (Seq) {
-      case S_Retain:
-        S.SetSeq(S_CanRelease);
-        assert(S.RRI.ReverseInsertPts.empty());
-        S.RRI.ReverseInsertPts.insert(Inst);
-
-        // One call can't cause a transition from S_Retain to S_CanRelease
-        // and S_CanRelease to S_Use. If we've made the first transition,
-        // we're done.
-        continue;
-      case S_Use:
-      case S_CanRelease:
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
-      }
-    }
-
-    // Check for possible direct uses.
-    switch (Seq) {
-    case S_CanRelease:
-      if (CanUse(Inst, Ptr, PA, Class))
-        S.SetSeq(S_Use);
-      break;
-    case S_Retain:
-    case S_Use:
-    case S_None:
-      break;
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-      llvm_unreachable("top-down pointer in release state!");
-    }
-  }
-
-  return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitTopDown(BasicBlock *BB,
-                         DenseMap<const BasicBlock *, BBState> &BBStates,
-                         DenseMap<Value *, RRInfo> &Releases) {
-  bool NestingDetected = false;
-  BBState &MyStates = BBStates[BB];
-
-  // Merge the states from each predecessor to compute the initial state
-  // for the current block.
-  BBState::edge_iterator PI(MyStates.pred_begin()),
-                         PE(MyStates.pred_end());
-  if (PI != PE) {
-    const BasicBlock *Pred = *PI;
-    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
-    assert(I != BBStates.end());
-    MyStates.InitFromPred(I->second);
-    ++PI;
-    for (; PI != PE; ++PI) {
-      Pred = *PI;
-      I = BBStates.find(Pred);
-      assert(I != BBStates.end());
-      MyStates.MergePred(I->second);
-    }
-  }
-
-  // Visit all the instructions, top-down.
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    Instruction *Inst = I;
-    NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
-  }
-
-  CheckForCFGHazards(BB, BBStates, MyStates);
-  return NestingDetected;
-}
-
-static void
-ComputePostOrders(Function &F,
-                  SmallVectorImpl<BasicBlock *> &PostOrder,
-                  SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
-                  unsigned NoObjCARCExceptionsMDKind,
-                  DenseMap<const BasicBlock *, BBState> &BBStates) {
-  /// Visited - The visited set, for doing DFS walks.
-  SmallPtrSet<BasicBlock *, 16> Visited;
-
-  // Do DFS, computing the PostOrder.
-  SmallPtrSet<BasicBlock *, 16> OnStack;
-  SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
-
-  // Functions always have exactly one entry block, and we don't have
-  // any other block that we treat like an entry block.
-  BasicBlock *EntryBB = &F.getEntryBlock();
-  BBState &MyStates = BBStates[EntryBB];
-  MyStates.SetAsEntry();
-  TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
-  SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
-  Visited.insert(EntryBB);
-  OnStack.insert(EntryBB);
-  do {
-  dfs_next_succ:
-    BasicBlock *CurrBB = SuccStack.back().first;
-    TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
-    succ_iterator SE(TI, false);
-
-    // If the terminator is an invoke marked with the
-    // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
-    // ignored, for ARC purposes.
-    if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
-      --SE;
-
-    while (SuccStack.back().second != SE) {
-      BasicBlock *SuccBB = *SuccStack.back().second++;
-      if (Visited.insert(SuccBB)) {
-        TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
-        SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
-        BBStates[CurrBB].addSucc(SuccBB);
-        BBState &SuccStates = BBStates[SuccBB];
-        SuccStates.addPred(CurrBB);
-        OnStack.insert(SuccBB);
-        goto dfs_next_succ;
-      }
-
-      if (!OnStack.count(SuccBB)) {
-        BBStates[CurrBB].addSucc(SuccBB);
-        BBStates[SuccBB].addPred(CurrBB);
-      }
-    }
-    OnStack.erase(CurrBB);
-    PostOrder.push_back(CurrBB);
-    SuccStack.pop_back();
-  } while (!SuccStack.empty());
-
-  Visited.clear();
-
-  // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
-  // Functions may have many exits, and there also blocks which we treat
-  // as exits due to ignored edges.
-  SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *ExitBB = I;
-    BBState &MyStates = BBStates[ExitBB];
-    if (!MyStates.isExit())
-      continue;
-
-    MyStates.SetAsExit();
-
-    PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
-    Visited.insert(ExitBB);
-    while (!PredStack.empty()) {
-    reverse_dfs_next_succ:
-      BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
-      while (PredStack.back().second != PE) {
-        BasicBlock *BB = *PredStack.back().second++;
-        if (Visited.insert(BB)) {
-          PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
-          goto reverse_dfs_next_succ;
-        }
-      }
-      ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
-    }
-  }
-}
-
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
-                  DenseMap<const BasicBlock *, BBState> &BBStates,
-                  MapVector<Value *, RRInfo> &Retains,
-                  DenseMap<Value *, RRInfo> &Releases) {
-
-  // Use reverse-postorder traversals, because we magically know that loops
-  // will be well behaved, i.e. they won't repeatedly call retain on a single
-  // pointer without doing a release. We can't use the ReversePostOrderTraversal
-  // class here because we want the reverse-CFG postorder to consider each
-  // function exit point, and we want to ignore selected cycle edges.
-  SmallVector<BasicBlock *, 16> PostOrder;
-  SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
-  ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
-                    NoObjCARCExceptionsMDKind,
-                    BBStates);
-
-  // Use reverse-postorder on the reverse CFG for bottom-up.
-  bool BottomUpNestingDetected = false;
-  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
-       ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
-       I != E; ++I)
-    BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
-
-  // Use reverse-postorder for top-down.
-  bool TopDownNestingDetected = false;
-  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
-       PostOrder.rbegin(), E = PostOrder.rend();
-       I != E; ++I)
-    TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
-
-  return TopDownNestingDetected && BottomUpNestingDetected;
-}
-
-/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
-void ObjCARCOpt::MoveCalls(Value *Arg,
-                           RRInfo &RetainsToMove,
-                           RRInfo &ReleasesToMove,
-                           MapVector<Value *, RRInfo> &Retains,
-                           DenseMap<Value *, RRInfo> &Releases,
-                           SmallVectorImpl<Instruction *> &DeadInsts,
-                           Module *M) {
-  Type *ArgTy = Arg->getType();
-  Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
-
-  // Insert the new retain and release calls.
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       PI = ReleasesToMove.ReverseInsertPts.begin(),
-       PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
-    Instruction *InsertPt = *PI;
-    Value *MyArg = ArgTy == ParamTy ? Arg :
-                   new BitCastInst(Arg, ParamTy, "", InsertPt);
-    CallInst *Call =
-      CallInst::Create(RetainsToMove.IsRetainBlock ?
-                         getRetainBlockCallee(M) : getRetainCallee(M),
-                       MyArg, "", InsertPt);
-    Call->setDoesNotThrow();
-    if (RetainsToMove.IsRetainBlock)
-      Call->setMetadata(CopyOnEscapeMDKind,
-                        MDNode::get(M->getContext(), ArrayRef<Value *>()));
-    else
-      Call->setTailCall();
-  }
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       PI = RetainsToMove.ReverseInsertPts.begin(),
-       PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
-    Instruction *InsertPt = *PI;
-    Value *MyArg = ArgTy == ParamTy ? Arg :
-                   new BitCastInst(Arg, ParamTy, "", InsertPt);
-    CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
-                                      "", InsertPt);
-    // Attach a clang.imprecise_release metadata tag, if appropriate.
-    if (MDNode *M = ReleasesToMove.ReleaseMetadata)
-      Call->setMetadata(ImpreciseReleaseMDKind, M);
-    Call->setDoesNotThrow();
-    if (ReleasesToMove.IsTailCallRelease)
-      Call->setTailCall();
-  }
-
-  // Delete the original retain and release calls.
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       AI = RetainsToMove.Calls.begin(),
-       AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
-    Instruction *OrigRetain = *AI;
-    Retains.blot(OrigRetain);
-    DeadInsts.push_back(OrigRetain);
-  }
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       AI = ReleasesToMove.Calls.begin(),
-       AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
-    Instruction *OrigRelease = *AI;
-    Releases.erase(OrigRelease);
-    DeadInsts.push_back(OrigRelease);
-  }
-}
-
-/// PerformCodePlacement - Identify pairings between the retains and releases,
-/// and delete and/or move them.
-bool
-ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
-                                   &BBStates,
-                                 MapVector<Value *, RRInfo> &Retains,
-                                 DenseMap<Value *, RRInfo> &Releases,
-                                 Module *M) {
-  bool AnyPairsCompletelyEliminated = false;
-  RRInfo RetainsToMove;
-  RRInfo ReleasesToMove;
-  SmallVector<Instruction *, 4> NewRetains;
-  SmallVector<Instruction *, 4> NewReleases;
-  SmallVector<Instruction *, 8> DeadInsts;
-
-  // Visit each retain.
-  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
-       E = Retains.end(); I != E; ++I) {
-    Value *V = I->first;
-    if (!V) continue; // blotted
-
-    Instruction *Retain = cast<Instruction>(V);
-    Value *Arg = GetObjCArg(Retain);
-
-    // If the object being released is in static or stack storage, we know it's
-    // not being managed by ObjC reference counting, so we can delete pairs
-    // regardless of what possible decrements or uses lie between them.
-    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
-
-    // A constant pointer can't be pointing to an object on the heap. It may
-    // be reference-counted, but it won't be deleted.
-    if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
-      if (const GlobalVariable *GV =
-            dyn_cast<GlobalVariable>(
-              StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
-        if (GV->isConstant())
-          KnownSafe = true;
-
-    // If a pair happens in a region where it is known that the reference count
-    // is already incremented, we can similarly ignore possible decrements.
-    bool KnownSafeTD = true, KnownSafeBU = true;
-
-    // Connect the dots between the top-down-collected RetainsToMove and
-    // bottom-up-collected ReleasesToMove to form sets of related calls.
-    // This is an iterative process so that we connect multiple releases
-    // to multiple retains if needed.
-    unsigned OldDelta = 0;
-    unsigned NewDelta = 0;
-    unsigned OldCount = 0;
-    unsigned NewCount = 0;
-    bool FirstRelease = true;
-    bool FirstRetain = true;
-    NewRetains.push_back(Retain);
-    for (;;) {
-      for (SmallVectorImpl<Instruction *>::const_iterator
-           NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
-        Instruction *NewRetain = *NI;
-        MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
-        assert(It != Retains.end());
-        const RRInfo &NewRetainRRI = It->second;
-        KnownSafeTD &= NewRetainRRI.KnownSafe;
-        for (SmallPtrSet<Instruction *, 2>::const_iterator
-             LI = NewRetainRRI.Calls.begin(),
-             LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
-          Instruction *NewRetainRelease = *LI;
-          DenseMap<Value *, RRInfo>::const_iterator Jt =
-            Releases.find(NewRetainRelease);
-          if (Jt == Releases.end())
-            goto next_retain;
-          const RRInfo &NewRetainReleaseRRI = Jt->second;
-          assert(NewRetainReleaseRRI.Calls.count(NewRetain));
-          if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
-            OldDelta -=
-              BBStates[NewRetainRelease->getParent()].GetAllPathCount();
-
-            // Merge the ReleaseMetadata and IsTailCallRelease values.
-            if (FirstRelease) {
-              ReleasesToMove.ReleaseMetadata =
-                NewRetainReleaseRRI.ReleaseMetadata;
-              ReleasesToMove.IsTailCallRelease =
-                NewRetainReleaseRRI.IsTailCallRelease;
-              FirstRelease = false;
-            } else {
-              if (ReleasesToMove.ReleaseMetadata !=
-                    NewRetainReleaseRRI.ReleaseMetadata)
-                ReleasesToMove.ReleaseMetadata = 0;
-              if (ReleasesToMove.IsTailCallRelease !=
-                    NewRetainReleaseRRI.IsTailCallRelease)
-                ReleasesToMove.IsTailCallRelease = false;
-            }
-
-            // Collect the optimal insertion points.
-            if (!KnownSafe)
-              for (SmallPtrSet<Instruction *, 2>::const_iterator
-                   RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
-                   RE = NewRetainReleaseRRI.ReverseInsertPts.end();
-                   RI != RE; ++RI) {
-                Instruction *RIP = *RI;
-                if (ReleasesToMove.ReverseInsertPts.insert(RIP))
-                  NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
-              }
-            NewReleases.push_back(NewRetainRelease);
-          }
-        }
-      }
-      NewRetains.clear();
-      if (NewReleases.empty()) break;
-
-      // Back the other way.
-      for (SmallVectorImpl<Instruction *>::const_iterator
-           NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
-        Instruction *NewRelease = *NI;
-        DenseMap<Value *, RRInfo>::const_iterator It =
-          Releases.find(NewRelease);
-        assert(It != Releases.end());
-        const RRInfo &NewReleaseRRI = It->second;
-        KnownSafeBU &= NewReleaseRRI.KnownSafe;
-        for (SmallPtrSet<Instruction *, 2>::const_iterator
-             LI = NewReleaseRRI.Calls.begin(),
-             LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
-          Instruction *NewReleaseRetain = *LI;
-          MapVector<Value *, RRInfo>::const_iterator Jt =
-            Retains.find(NewReleaseRetain);
-          if (Jt == Retains.end())
-            goto next_retain;
-          const RRInfo &NewReleaseRetainRRI = Jt->second;
-          assert(NewReleaseRetainRRI.Calls.count(NewRelease));
-          if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
-            unsigned PathCount =
-              BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
-            OldDelta += PathCount;
-            OldCount += PathCount;
-
-            // Merge the IsRetainBlock values.
-            if (FirstRetain) {
-              RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
-              FirstRetain = false;
-            } else if (ReleasesToMove.IsRetainBlock !=
-                       NewReleaseRetainRRI.IsRetainBlock)
-              // It's not possible to merge the sequences if one uses
-              // objc_retain and the other uses objc_retainBlock.
-              goto next_retain;
-
-            // Collect the optimal insertion points.
-            if (!KnownSafe)
-              for (SmallPtrSet<Instruction *, 2>::const_iterator
-                   RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
-                   RE = NewReleaseRetainRRI.ReverseInsertPts.end();
-                   RI != RE; ++RI) {
-                Instruction *RIP = *RI;
-                if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
-                  PathCount = BBStates[RIP->getParent()].GetAllPathCount();
-                  NewDelta += PathCount;
-                  NewCount += PathCount;
-                }
-              }
-            NewRetains.push_back(NewReleaseRetain);
-          }
-        }
-      }
-      NewReleases.clear();
-      if (NewRetains.empty()) break;
-    }
-
-    // If the pointer is known incremented or nested, we can safely delete the
-    // pair regardless of what's between them.
-    if (KnownSafeTD || KnownSafeBU) {
-      RetainsToMove.ReverseInsertPts.clear();
-      ReleasesToMove.ReverseInsertPts.clear();
-      NewCount = 0;
-    } else {
-      // Determine whether the new insertion points we computed preserve the
-      // balance of retain and release calls through the program.
-      // TODO: If the fully aggressive solution isn't valid, try to find a
-      // less aggressive solution which is.
-      if (NewDelta != 0)
-        goto next_retain;
-    }
-
-    // Determine whether the original call points are balanced in the retain and
-    // release calls through the program. If not, conservatively don't touch
-    // them.
-    // TODO: It's theoretically possible to do code motion in this case, as
-    // long as the existing imbalances are maintained.
-    if (OldDelta != 0)
-      goto next_retain;
-
-    // Ok, everything checks out and we're all set. Let's move some code!
-    Changed = true;
-    assert(OldCount != 0 && "Unreachable code?");
-    AnyPairsCompletelyEliminated = NewCount == 0;
-    NumRRs += OldCount - NewCount;
-    MoveCalls(Arg, RetainsToMove, ReleasesToMove,
-              Retains, Releases, DeadInsts, M);
-
-  next_retain:
-    NewReleases.clear();
-    NewRetains.clear();
-    RetainsToMove.clear();
-    ReleasesToMove.clear();
-  }
-
-  // Now that we're done moving everything, we can delete the newly dead
-  // instructions, as we no longer need them as insert points.
-  while (!DeadInsts.empty())
-    EraseInstruction(DeadInsts.pop_back_val());
-
-  return AnyPairsCompletelyEliminated;
-}
-
-/// OptimizeWeakCalls - Weak pointer optimizations.
-void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
-  // First, do memdep-style RLE and S2L optimizations. We can't use memdep
-  // itself because it uses AliasAnalysis and we need to do provenance
-  // queries instead.
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
-      continue;
-
-    // Delete objc_loadWeak calls with no users.
-    if (Class == IC_LoadWeak && Inst->use_empty()) {
-      Inst->eraseFromParent();
-      continue;
-    }
-
-    // TODO: For now, just look for an earlier available version of this value
-    // within the same block. Theoretically, we could do memdep-style non-local
-    // analysis too, but that would want caching. A better approach would be to
-    // use the technique that EarlyCSE uses.
-    inst_iterator Current = llvm::prior(I);
-    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
-    for (BasicBlock::iterator B = CurrentBB->begin(),
-                              J = Current.getInstructionIterator();
-         J != B; --J) {
-      Instruction *EarlierInst = &*llvm::prior(J);
-      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
-      switch (EarlierClass) {
-      case IC_LoadWeak:
-      case IC_LoadWeakRetained: {
-        // If this is loading from the same pointer, replace this load's value
-        // with that one.
-        CallInst *Call = cast<CallInst>(Inst);
-        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
-        Value *Arg = Call->getArgOperand(0);
-        Value *EarlierArg = EarlierCall->getArgOperand(0);
-        switch (PA.getAA()->alias(Arg, EarlierArg)) {
-        case AliasAnalysis::MustAlias:
-          Changed = true;
-          // If the load has a builtin retain, insert a plain retain for it.
-          if (Class == IC_LoadWeakRetained) {
-            CallInst *CI =
-              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
-                               "", Call);
-            CI->setTailCall();
-          }
-          // Zap the fully redundant load.
-          Call->replaceAllUsesWith(EarlierCall);
-          Call->eraseFromParent();
-          goto clobbered;
-        case AliasAnalysis::MayAlias:
-        case AliasAnalysis::PartialAlias:
-          goto clobbered;
-        case AliasAnalysis::NoAlias:
-          break;
-        }
-        break;
-      }
-      case IC_StoreWeak:
-      case IC_InitWeak: {
-        // If this is storing to the same pointer and has the same size etc.
-        // replace this load's value with the stored value.
-        CallInst *Call = cast<CallInst>(Inst);
-        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
-        Value *Arg = Call->getArgOperand(0);
-        Value *EarlierArg = EarlierCall->getArgOperand(0);
-        switch (PA.getAA()->alias(Arg, EarlierArg)) {
-        case AliasAnalysis::MustAlias:
-          Changed = true;
-          // If the load has a builtin retain, insert a plain retain for it.
-          if (Class == IC_LoadWeakRetained) {
-            CallInst *CI =
-              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
-                               "", Call);
-            CI->setTailCall();
-          }
-          // Zap the fully redundant load.
-          Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
-          Call->eraseFromParent();
-          goto clobbered;
-        case AliasAnalysis::MayAlias:
-        case AliasAnalysis::PartialAlias:
-          goto clobbered;
-        case AliasAnalysis::NoAlias:
-          break;
-        }
-        break;
-      }
-      case IC_MoveWeak:
-      case IC_CopyWeak:
-        // TOOD: Grab the copied value.
-        goto clobbered;
-      case IC_AutoreleasepoolPush:
-      case IC_None:
-      case IC_User:
-        // Weak pointers are only modified through the weak entry points
-        // (and arbitrary calls, which could call the weak entry points).
-        break;
-      default:
-        // Anything else could modify the weak pointer.
-        goto clobbered;
-      }
-    }
-  clobbered:;
-  }
-
-  // Then, for each destroyWeak with an alloca operand, check to see if
-  // the alloca and all its users can be zapped.
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    if (Class != IC_DestroyWeak)
-      continue;
-
-    CallInst *Call = cast<CallInst>(Inst);
-    Value *Arg = Call->getArgOperand(0);
-    if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
-      for (Value::use_iterator UI = Alloca->use_begin(),
-           UE = Alloca->use_end(); UI != UE; ++UI) {
-        const Instruction *UserInst = cast<Instruction>(*UI);
-        switch (GetBasicInstructionClass(UserInst)) {
-        case IC_InitWeak:
-        case IC_StoreWeak:
-        case IC_DestroyWeak:
-          continue;
-        default:
-          goto done;
-        }
-      }
-      Changed = true;
-      for (Value::use_iterator UI = Alloca->use_begin(),
-           UE = Alloca->use_end(); UI != UE; ) {
-        CallInst *UserInst = cast<CallInst>(*UI++);
-        switch (GetBasicInstructionClass(UserInst)) {
-        case IC_InitWeak:
-        case IC_StoreWeak:
-          // These functions return their second argument.
-          UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
-          break;
-        case IC_DestroyWeak:
-          // No return value.
-          break;
-        default:
-          llvm_unreachable("alloca really is used!");
-        }
-        UserInst->eraseFromParent();
-      }
-      Alloca->eraseFromParent();
-    done:;
-    }
-  }
-}
-
-/// OptimizeSequences - Identify program paths which execute sequences of
-/// retains and releases which can be eliminated.
-bool ObjCARCOpt::OptimizeSequences(Function &F) {
-  /// Releases, Retains - These are used to store the results of the main flow
-  /// analysis. These use Value* as the key instead of Instruction* so that the
-  /// map stays valid when we get around to rewriting code and calls get
-  /// replaced by arguments.
-  DenseMap<Value *, RRInfo> Releases;
-  MapVector<Value *, RRInfo> Retains;
-
-  /// BBStates, This is used during the traversal of the function to track the
-  /// states for each identified object at each block.
-  DenseMap<const BasicBlock *, BBState> BBStates;
-
-  // Analyze the CFG of the function, and all instructions.
-  bool NestingDetected = Visit(F, BBStates, Retains, Releases);
-
-  // Transform.
-  return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
-         NestingDetected;
-}
-
-/// OptimizeReturns - Look for this pattern:
-/// \code
-///    %call = call i8* @something(...)
-///    %2 = call i8* @objc_retain(i8* %call)
-///    %3 = call i8* @objc_autorelease(i8* %2)
-///    ret i8* %3
-/// \endcode
-/// And delete the retain and autorelease.
-///
-/// Otherwise if it's just this:
-/// \code
-///    %3 = call i8* @objc_autorelease(i8* %2)
-///    ret i8* %3
-/// \endcode
-/// convert the autorelease to autoreleaseRV.
-void ObjCARCOpt::OptimizeReturns(Function &F) {
-  if (!F.getReturnType()->isPointerTy())
-    return;
-
-  SmallPtrSet<Instruction *, 4> DependingInstructions;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    BasicBlock *BB = FI;
-    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
-    if (!Ret) continue;
-
-    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-    FindDependencies(NeedsPositiveRetainCount, Arg,
-                     BB, Ret, DependingInstructions, Visited, PA);
-    if (DependingInstructions.size() != 1)
-      goto next_block;
-
-    {
-      CallInst *Autorelease =
-        dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-      if (!Autorelease)
-        goto next_block;
-      InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
-      if (!IsAutorelease(AutoreleaseClass))
-        goto next_block;
-      if (GetObjCArg(Autorelease) != Arg)
-        goto next_block;
-
-      DependingInstructions.clear();
-      Visited.clear();
-
-      // Check that there is nothing that can affect the reference
-      // count between the autorelease and the retain.
-      FindDependencies(CanChangeRetainCount, Arg,
-                       BB, Autorelease, DependingInstructions, Visited, PA);
-      if (DependingInstructions.size() != 1)
-        goto next_block;
-
-      {
-        CallInst *Retain =
-          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
-        // Check that we found a retain with the same argument.
-        if (!Retain ||
-            !IsRetain(GetBasicInstructionClass(Retain)) ||
-            GetObjCArg(Retain) != Arg)
-          goto next_block;
-
-        DependingInstructions.clear();
-        Visited.clear();
-
-        // Convert the autorelease to an autoreleaseRV, since it's
-        // returning the value.
-        if (AutoreleaseClass == IC_Autorelease) {
-          Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
-          AutoreleaseClass = IC_AutoreleaseRV;
-        }
-
-        // Check that there is nothing that can affect the reference
-        // count between the retain and the call.
-        // Note that Retain need not be in BB.
-        FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
-                         DependingInstructions, Visited, PA);
-        if (DependingInstructions.size() != 1)
-          goto next_block;
-
-        {
-          CallInst *Call =
-            dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
-          // Check that the pointer is the return value of the call.
-          if (!Call || Arg != Call)
-            goto next_block;
-
-          // Check that the call is a regular call.
-          InstructionClass Class = GetBasicInstructionClass(Call);
-          if (Class != IC_CallOrUser && Class != IC_Call)
-            goto next_block;
-
-          // If so, we can zap the retain and autorelease.
-          Changed = true;
-          ++NumRets;
-          EraseInstruction(Retain);
-          EraseInstruction(Autorelease);
-        }
-      }
-    }
-
-  next_block:
-    DependingInstructions.clear();
-    Visited.clear();
-  }
-}
-
-bool ObjCARCOpt::doInitialization(Module &M) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  Run = ModuleHasARC(M);
-  if (!Run)
-    return false;
-
-  // Identify the imprecise release metadata kind.
-  ImpreciseReleaseMDKind =
-    M.getContext().getMDKindID("clang.imprecise_release");
-  CopyOnEscapeMDKind =
-    M.getContext().getMDKindID("clang.arc.copy_on_escape");
-  NoObjCARCExceptionsMDKind =
-    M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-
-  // Intuitively, objc_retain and others are nocapture, however in practice
-  // they are not, because they return their argument value. And objc_release
-  // calls finalizers which can have arbitrary side effects.
-
-  // These are initialized lazily.
-  RetainRVCallee = 0;
-  AutoreleaseRVCallee = 0;
-  ReleaseCallee = 0;
-  RetainCallee = 0;
-  RetainBlockCallee = 0;
-  AutoreleaseCallee = 0;
-
-  return false;
-}
-
-bool ObjCARCOpt::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  Changed = false;
-
-  PA.setAA(&getAnalysis<AliasAnalysis>());
-
-  // This pass performs several distinct transformations. As a compile-time aid
-  // when compiling code that isn't ObjC, skip these if the relevant ObjC
-  // library functions aren't declared.
-
-  // Preliminary optimizations. This also computs UsedInThisFunction.
-  OptimizeIndividualCalls(F);
-
-  // Optimizations for weak pointers.
-  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
-                            (1 << IC_LoadWeakRetained) |
-                            (1 << IC_StoreWeak) |
-                            (1 << IC_InitWeak) |
-                            (1 << IC_CopyWeak) |
-                            (1 << IC_MoveWeak) |
-                            (1 << IC_DestroyWeak)))
-    OptimizeWeakCalls(F);
-
-  // Optimizations for retain+release pairs.
-  if (UsedInThisFunction & ((1 << IC_Retain) |
-                            (1 << IC_RetainRV) |
-                            (1 << IC_RetainBlock)))
-    if (UsedInThisFunction & (1 << IC_Release))
-      // Run OptimizeSequences until it either stops making changes or
-      // no retain+release pair nesting is detected.
-      while (OptimizeSequences(F)) {}
-
-  // Optimizations if objc_autorelease is used.
-  if (UsedInThisFunction & ((1 << IC_Autorelease) |
-                            (1 << IC_AutoreleaseRV)))
-    OptimizeReturns(F);
-
-  return Changed;
-}
-
-void ObjCARCOpt::releaseMemory() {
-  PA.clear();
-}
-
-//===----------------------------------------------------------------------===//
-// ARC contraction.
-//===----------------------------------------------------------------------===//
-
-// TODO: ObjCARCContract could insert PHI nodes when uses aren't
-// dominated by single calls.
-
-#include "llvm/Operator.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Analysis/Dominators.h"
-
-STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
-
-namespace {
-  /// ObjCARCContract - Late ARC optimizations.  These change the IR in a way
-  /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
-  class ObjCARCContract : public FunctionPass {
-    bool Changed;
-    AliasAnalysis *AA;
-    DominatorTree *DT;
-    ProvenanceAnalysis PA;
-
-    /// Run - A flag indicating whether this optimization pass should run.
-    bool Run;
-
-    /// StoreStrongCallee, etc. - Declarations for ObjC runtime
-    /// functions, for use in creating calls to them. These are initialized
-    /// lazily to avoid cluttering up the Module with unused declarations.
-    Constant *StoreStrongCallee,
-             *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
-
-    /// RetainRVMarker - The inline asm string to insert between calls and
-    /// RetainRV calls to make the optimization work on targets which need it.
-    const MDString *RetainRVMarker;
-
-    /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
-    /// at the end of walking the function we have found no alloca
-    /// instructions, these calls can be marked "tail".
-    SmallPtrSet<CallInst *, 8> StoreStrongCalls;
-
-    Constant *getStoreStrongCallee(Module *M);
-    Constant *getRetainAutoreleaseCallee(Module *M);
-    Constant *getRetainAutoreleaseRVCallee(Module *M);
-
-    bool ContractAutorelease(Function &F, Instruction *Autorelease,
-                             InstructionClass Class,
-                             SmallPtrSet<Instruction *, 4>
-                               &DependingInstructions,
-                             SmallPtrSet<const BasicBlock *, 4>
-                               &Visited);
-
-    void ContractRelease(Instruction *Release,
-                         inst_iterator &Iter);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-
-  public:
-    static char ID;
-    ObjCARCContract() : FunctionPass(ID) {
-      initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCContract::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCContract,
-                      "objc-arc-contract", "ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_END(ObjCARCContract,
-                    "objc-arc-contract", "ObjC ARC contraction", false, false)
-
-Pass *llvm::createObjCARCContractPass() {
-  return new ObjCARCContract();
-}
-
-void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
-  AU.addRequired<DominatorTree>();
-  AU.setPreservesCFG();
-}
-
-Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
-  if (!StoreStrongCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *I8XX = PointerType::getUnqual(I8X);
-    Type *Params[] = { I8XX, I8X };
-
-    AttrListPtr Attributes = AttrListPtr()
-      .addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-               Attributes::get(C, Attributes::NoUnwind))
-      .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture));
-
-    StoreStrongCallee =
-      M->getOrInsertFunction(
-        "objc_storeStrong",
-        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return StoreStrongCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
-  if (!RetainAutoreleaseCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainAutoreleaseCallee =
-      M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
-  }
-  return RetainAutoreleaseCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
-  if (!RetainAutoreleaseRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainAutoreleaseRVCallee =
-      M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
-                             Attributes);
-  }
-  return RetainAutoreleaseRVCallee;
-}
-
-/// ContractAutorelease - Merge an autorelease with a retain into a fused call.
-bool
-ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
-                                     InstructionClass Class,
-                                     SmallPtrSet<Instruction *, 4>
-                                       &DependingInstructions,
-                                     SmallPtrSet<const BasicBlock *, 4>
-                                       &Visited) {
-  const Value *Arg = GetObjCArg(Autorelease);
-
-  // Check that there are no instructions between the retain and the autorelease
-  // (such as an autorelease_pop) which may change the count.
-  CallInst *Retain = 0;
-  if (Class == IC_AutoreleaseRV)
-    FindDependencies(RetainAutoreleaseRVDep, Arg,
-                     Autorelease->getParent(), Autorelease,
-                     DependingInstructions, Visited, PA);
-  else
-    FindDependencies(RetainAutoreleaseDep, Arg,
-                     Autorelease->getParent(), Autorelease,
-                     DependingInstructions, Visited, PA);
-
-  Visited.clear();
-  if (DependingInstructions.size() != 1) {
-    DependingInstructions.clear();
-    return false;
-  }
-
-  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-  DependingInstructions.clear();
-
-  if (!Retain ||
-      GetBasicInstructionClass(Retain) != IC_Retain ||
-      GetObjCArg(Retain) != Arg)
-    return false;
-
-  Changed = true;
-  ++NumPeeps;
-
-  if (Class == IC_AutoreleaseRV)
-    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
-  else
-    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
-
-  EraseInstruction(Autorelease);
-  return true;
-}
-
-/// ContractRelease - Attempt to merge an objc_release with a store, load, and
-/// objc_retain to form an objc_storeStrong. This can be a little tricky because
-/// the instructions don't always appear in order, and there may be unrelated
-/// intervening instructions.
-void ObjCARCContract::ContractRelease(Instruction *Release,
-                                      inst_iterator &Iter) {
-  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
-  if (!Load || !Load->isSimple()) return;
-
-  // For now, require everything to be in one basic block.
-  BasicBlock *BB = Release->getParent();
-  if (Load->getParent() != BB) return;
-
-  // Walk down to find the store and the release, which may be in either order.
-  BasicBlock::iterator I = Load, End = BB->end();
-  ++I;
-  AliasAnalysis::Location Loc = AA->getLocation(Load);
-  StoreInst *Store = 0;
-  bool SawRelease = false;
-  for (; !Store || !SawRelease; ++I) {
-    if (I == End)
-      return;
-
-    Instruction *Inst = I;
-    if (Inst == Release) {
-      SawRelease = true;
-      continue;
-    }
-
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-
-    // Unrelated retains are harmless.
-    if (IsRetain(Class))
-      continue;
-
-    if (Store) {
-      // The store is the point where we're going to put the objc_storeStrong,
-      // so make sure there are no uses after it.
-      if (CanUse(Inst, Load, PA, Class))
-        return;
-    } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
-      // We are moving the load down to the store, so check for anything
-      // else which writes to the memory between the load and the store.
-      Store = dyn_cast<StoreInst>(Inst);
-      if (!Store || !Store->isSimple()) return;
-      if (Store->getPointerOperand() != Loc.Ptr) return;
-    }
-  }
-
-  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
-
-  // Walk up to find the retain.
-  I = Store;
-  BasicBlock::iterator Begin = BB->begin();
-  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
-    --I;
-  Instruction *Retain = I;
-  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
-  if (GetObjCArg(Retain) != New) return;
-
-  Changed = true;
-  ++NumStoreStrongs;
-
-  LLVMContext &C = Release->getContext();
-  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-  Type *I8XX = PointerType::getUnqual(I8X);
-
-  Value *Args[] = { Load->getPointerOperand(), New };
-  if (Args[0]->getType() != I8XX)
-    Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
-  if (Args[1]->getType() != I8X)
-    Args[1] = new BitCastInst(Args[1], I8X, "", Store);
-  CallInst *StoreStrong =
-    CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
-                     Args, "", Store);
-  StoreStrong->setDoesNotThrow();
-  StoreStrong->setDebugLoc(Store->getDebugLoc());
-
-  // We can't set the tail flag yet, because we haven't yet determined
-  // whether there are any escaping allocas. Remember this call, so that
-  // we can set the tail flag once we know it's safe.
-  StoreStrongCalls.insert(StoreStrong);
-
-  if (&*Iter == Store) ++Iter;
-  Store->eraseFromParent();
-  Release->eraseFromParent();
-  EraseInstruction(Retain);
-  if (Load->use_empty())
-    Load->eraseFromParent();
-}
-
-bool ObjCARCContract::doInitialization(Module &M) {
-  // If nothing in the Module uses ARC, don't do anything.
-  Run = ModuleHasARC(M);
-  if (!Run)
-    return false;
-
-  // These are initialized lazily.
-  StoreStrongCallee = 0;
-  RetainAutoreleaseCallee = 0;
-  RetainAutoreleaseRVCallee = 0;
-
-  // Initialize RetainRVMarker.
-  RetainRVMarker = 0;
-  if (NamedMDNode *NMD =
-        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
-    if (NMD->getNumOperands() == 1) {
-      const MDNode *N = NMD->getOperand(0);
-      if (N->getNumOperands() == 1)
-        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
-          RetainRVMarker = S;
-    }
-
-  return false;
-}
-
-bool ObjCARCContract::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  Changed = false;
-  AA = &getAnalysis<AliasAnalysis>();
-  DT = &getAnalysis<DominatorTree>();
-
-  PA.setAA(&getAnalysis<AliasAnalysis>());
-
-  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
-  // keyword. Be conservative if the function has variadic arguments.
-  // It seems that functions which "return twice" are also unsafe for the
-  // "tail" argument, because they are setjmp, which could need to
-  // return to an earlier stack state.
-  bool TailOkForStoreStrongs = !F.isVarArg() &&
-                               !F.callsFunctionThatReturnsTwice();
-
-  // For ObjC library calls which return their argument, replace uses of the
-  // argument with uses of the call return value, if it dominates the use. This
-  // reduces register pressure.
-  SmallPtrSet<Instruction *, 4> DependingInstructions;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-
-    // Only these library routines return their argument. In particular,
-    // objc_retainBlock does not necessarily return its argument.
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    switch (Class) {
-    case IC_Retain:
-    case IC_FusedRetainAutorelease:
-    case IC_FusedRetainAutoreleaseRV:
-      break;
-    case IC_Autorelease:
-    case IC_AutoreleaseRV:
-      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
-        continue;
-      break;
-    case IC_RetainRV: {
-      // If we're compiling for a target which needs a special inline-asm
-      // marker to do the retainAutoreleasedReturnValue optimization,
-      // insert it now.
-      if (!RetainRVMarker)
-        break;
-      BasicBlock::iterator BBI = Inst;
-      BasicBlock *InstParent = Inst->getParent();
-
-      // Step up to see if the call immediately precedes the RetainRV call.
-      // If it's an invoke, we have to cross a block boundary. And we have
-      // to carefully dodge no-op instructions.
-      do {
-        if (&*BBI == InstParent->begin()) {
-          BasicBlock *Pred = InstParent->getSinglePredecessor();
-          if (!Pred)
-            goto decline_rv_optimization;
-          BBI = Pred->getTerminator();
-          break;
-        }
-        --BBI;
-      } while (isNoopInstruction(BBI));
-
-      if (&*BBI == GetObjCArg(Inst)) {
-        Changed = true;
-        InlineAsm *IA =
-          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
-                                           /*isVarArg=*/false),
-                         RetainRVMarker->getString(),
-                         /*Constraints=*/"", /*hasSideEffects=*/true);
-        CallInst::Create(IA, "", Inst);
-      }
-    decline_rv_optimization:
-      break;
-    }
-    case IC_InitWeak: {
-      // objc_initWeak(p, null) => *p = null
-      CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(1))) {
-        Value *Null =
-          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
-        Changed = true;
-        new StoreInst(Null, CI->getArgOperand(0), CI);
-        CI->replaceAllUsesWith(Null);
-        CI->eraseFromParent();
-      }
-      continue;
-    }
-    case IC_Release:
-      ContractRelease(Inst, I);
-      continue;
-    case IC_User:
-      // Be conservative if the function has any alloca instructions.
-      // Technically we only care about escaping alloca instructions,
-      // but this is sufficient to handle some interesting cases.
-      if (isa<AllocaInst>(Inst))
-        TailOkForStoreStrongs = false;
-      continue;
-    default:
-      continue;
-    }
-
-    // Don't use GetObjCArg because we don't want to look through bitcasts
-    // and such; to do the replacement, the argument must have type i8*.
-    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
-    for (;;) {
-      // If we're compiling bugpointed code, don't get in trouble.
-      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
-        break;
-      // Look through the uses of the pointer.
-      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
-           UI != UE; ) {
-        Use &U = UI.getUse();
-        unsigned OperandNo = UI.getOperandNo();
-        ++UI; // Increment UI now, because we may unlink its element.
-
-        // If the call's return value dominates a use of the call's argument
-        // value, rewrite the use to use the return value. We check for
-        // reachability here because an unreachable call is considered to
-        // trivially dominate itself, which would lead us to rewriting its
-        // argument in terms of its return value, which would lead to
-        // infinite loops in GetObjCArg.
-        if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
-          Changed = true;
-          Instruction *Replacement = Inst;
-          Type *UseTy = U.get()->getType();
-          if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
-            // For PHI nodes, insert the bitcast in the predecessor block.
-            unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
-            BasicBlock *BB = PHI->getIncomingBlock(ValNo);
-            if (Replacement->getType() != UseTy)
-              Replacement = new BitCastInst(Replacement, UseTy, "",
-                                            &BB->back());
-            // While we're here, rewrite all edges for this PHI, rather
-            // than just one use at a time, to minimize the number of
-            // bitcasts we emit.
-            for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-              if (PHI->getIncomingBlock(i) == BB) {
-                // Keep the UI iterator valid.
-                if (&PHI->getOperandUse(
-                      PHINode::getOperandNumForIncomingValue(i)) ==
-                    &UI.getUse())
-                  ++UI;
-                PHI->setIncomingValue(i, Replacement);
-              }
-          } else {
-            if (Replacement->getType() != UseTy)
-              Replacement = new BitCastInst(Replacement, UseTy, "",
-                                            cast<Instruction>(U.getUser()));
-            U.set(Replacement);
-          }
-        }
-      }
-
-      // If Arg is a no-op casted pointer, strip one level of casts and iterate.
-      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
-        Arg = BI->getOperand(0);
-      else if (isa<GEPOperator>(Arg) &&
-               cast<GEPOperator>(Arg)->hasAllZeroIndices())
-        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
-      else if (isa<GlobalAlias>(Arg) &&
-               !cast<GlobalAlias>(Arg)->mayBeOverridden())
-        Arg = cast<GlobalAlias>(Arg)->getAliasee();
-      else
-        break;
-    }
-  }
-
-  // If this function has no escaping allocas or suspicious vararg usage,
-  // objc_storeStrong calls can be marked with the "tail" keyword.
-  if (TailOkForStoreStrongs)
-    for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
-         E = StoreStrongCalls.end(); I != E; ++I)
-      (*I)->setTailCall();
-  StoreStrongCalls.clear();
-
-  return Changed;
-}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 7a4079784bb7..7ee40273347b 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,24 +22,24 @@
 
 #define DEBUG_TYPE "reassociate"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -110,6 +110,55 @@ namespace {
       }
     };
   };
+  
+  /// Utility class representing a non-constant Xor-operand. We classify
+  /// non-constant Xor-Operands into two categories:
+  ///  C1) The operand is in the form "X & C", where C is a constant and C != ~0
+  ///  C2)
+  ///    C2.1) The operand is in the form of "X | C", where C is a non-zero
+  ///          constant.
+  ///    C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
+  ///          operand as "E | 0"
+  class XorOpnd {
+  public:
+    XorOpnd(Value *V);
+    const XorOpnd &operator=(const XorOpnd &That);
+
+    bool isInvalid() const { return SymbolicPart == 0; }
+    bool isOrExpr() const { return isOr; }
+    Value *getValue() const { return OrigVal; }
+    Value *getSymbolicPart() const { return SymbolicPart; }
+    unsigned getSymbolicRank() const { return SymbolicRank; }
+    const APInt &getConstPart() const { return ConstPart; }
+
+    void Invalidate() { SymbolicPart = OrigVal = 0; }
+    void setSymbolicRank(unsigned R) { SymbolicRank = R; }
+
+    // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
+    // The purpose is twofold:
+    // 1) Cluster together the operands sharing the same symbolic-value.
+    // 2) Operand having smaller symbolic-value-rank is permuted earlier, which 
+    //   could potentially shorten crital path, and expose more loop-invariants.
+    //   Note that values' rank are basically defined in RPO order (FIXME). 
+    //   So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier 
+    //   than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
+    //   "z" in the order of X-Y-Z is better than any other orders.
+    class PtrSortFunctor {
+      ArrayRef<XorOpnd> A;
+
+    public:
+      PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
+      bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
+        return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+      }
+    };
+  private:
+    Value *OrigVal;
+    Value *SymbolicPart;
+    APInt ConstPart;
+    unsigned SymbolicRank;
+    bool isOr;
+  };
 }
 
 namespace {
@@ -137,6 +186,11 @@ namespace {
     Value *OptimizeExpression(BinaryOperator *I,
                               SmallVectorImpl<ValueEntry> &Ops);
     Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+    Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+    bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd,
+                        Value *&Res);
+    bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+                        APInt &ConstOpnd, Value *&Res);
     bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
                                 SmallVectorImpl<Factor> &Factors);
     Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder,
@@ -148,6 +202,42 @@ namespace {
   };
 }
 
+XorOpnd::XorOpnd(Value *V) {
+  assert(!isa<ConstantInt>(V) && "No ConstantInt");
+  OrigVal = V;
+  Instruction *I = dyn_cast<Instruction>(V);
+  SymbolicRank = 0;
+
+  if (I && (I->getOpcode() == Instruction::Or ||
+            I->getOpcode() == Instruction::And)) {
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+    if (isa<ConstantInt>(V0))
+      std::swap(V0, V1);
+
+    if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) {
+      ConstPart = C->getValue();
+      SymbolicPart = V0;
+      isOr = (I->getOpcode() == Instruction::Or);
+      return;
+    }
+  }
+
+  // view the operand as "V | 0"
+  SymbolicPart = V;
+  ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth());
+  isOr = true;
+}
+
+const XorOpnd &XorOpnd::operator=(const XorOpnd &That) {
+  OrigVal = That.OrigVal;
+  SymbolicPart = That.SymbolicPart;
+  ConstPart = That.ConstPart;
+  SymbolicRank = That.SymbolicRank;
+  isOr = That.isOr;
+  return *this;
+}
+
 char Reassociate::ID = 0;
 INITIALIZE_PASS(Reassociate, "reassociate",
                 "Reassociate expressions", false, false)
@@ -423,10 +513,6 @@ static bool LinearizeExprTree(BinaryOperator *I,
   assert(Instruction::isAssociative(Opcode) &&
          Instruction::isCommutative(Opcode) &&
          "Expected an associative and commutative operation!");
-  // If we see an absorbing element then the entire expression must be equal to
-  // it.  For example, if this is a multiplication expression and zero occurs as
-  // an operand somewhere in it then the result of the expression must be zero.
-  Constant *Absorber = ConstantExpr::getBinOpAbsorber(Opcode, I->getType());
 
   // Visit all operands of the expression, keeping track of their weight (the
   // number of paths from the expression root to the operand, or if you like
@@ -474,13 +560,6 @@ static bool LinearizeExprTree(BinaryOperator *I,
       DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
       assert(!Op->use_empty() && "No uses, so how did we get to it?!");
 
-      // If the expression contains an absorbing element then there is no need
-      // to analyze it further: it must evaluate to the absorbing element.
-      if (Op == Absorber && !Weight.isMinValue()) {
-        Ops.push_back(std::make_pair(Absorber, APInt(Bitwidth, 1)));
-        return MadeChange;
-      }
-
       // If this is a binary operation of the right kind with only one use then
       // add its operands to the expression.
       if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
@@ -1051,6 +1130,241 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
   return 0;
 }
 
+/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// instruction with the given two operands, and return the resulting
+/// instruction. There are two special cases: 1) if the constant operand is 0,
+/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
+/// be returned.
+static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, 
+                             const APInt &ConstOpnd) {
+  if (ConstOpnd != 0) {
+    if (!ConstOpnd.isAllOnesValue()) {
+      LLVMContext &Ctx = Opnd->getType()->getContext();
+      Instruction *I;
+      I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd),
+                                    "and.ra", InsertBefore);
+      I->setDebugLoc(InsertBefore->getDebugLoc());
+      return I;
+    }
+    return Opnd;
+  }
+  return 0;
+}
+
+// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
+// into "R ^ C", where C would be 0, and R is a symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
+// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
+// and both "Res" and "ConstOpnd" remain unchanged.
+//  
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+                                 APInt &ConstOpnd, Value *&Res) {
+  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 
+  //                       = ((x | c1) ^ c1) ^ (c1 ^ c2)
+  //                       = (x & ~c1) ^ (c1 ^ c2)
+  // It is useful only when c1 == c2.
+  if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) {
+    if (!Opnd1->getValue()->hasOneUse())
+      return false;
+
+    const APInt &C1 = Opnd1->getConstPart();
+    if (C1 != ConstOpnd)
+      return false;
+
+    Value *X = Opnd1->getSymbolicPart();
+    Res = createAndInstr(I, X, ~C1);
+    // ConstOpnd was C2, now C1 ^ C2.
+    ConstOpnd ^= C1;
+
+    if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+      RedoInsts.insert(T);
+    return true;
+  }
+  return false;
+}
+
+                           
+// Helper function of OptimizeXor(). It tries to simplify
+// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
+// symbolic value. 
+// 
+// If it was successful, true is returned, and the "R" and "C" is returned 
+// via "Res" and "ConstOpnd", respectively (If the entire expression is
+// evaluated to a constant, the Res is set to NULL); otherwise, false is
+// returned, and both "Res" and "ConstOpnd" remain unchanged.
+bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
+                                 APInt &ConstOpnd, Value *&Res) {
+  Value *X = Opnd1->getSymbolicPart();
+  if (X != Opnd2->getSymbolicPart())
+    return false;
+
+  const APInt &C1 = Opnd1->getConstPart();
+  const APInt &C2 = Opnd2->getConstPart();
+
+  // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
+  int DeadInstNum = 1;
+  if (Opnd1->getValue()->hasOneUse())
+    DeadInstNum++;
+  if (Opnd2->getValue()->hasOneUse())
+    DeadInstNum++;
+
+  // Xor-Rule 2:
+  //  (x | c1) ^ (x & c2)
+  //   = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1
+  //   = (x & ~c1) ^ (x & c2) ^ c1               // Xor-Rule 1
+  //   = (x & c3) ^ c1, where c3 = ~c1 ^ c2      // Xor-rule 3
+  //
+  if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
+    if (Opnd2->isOrExpr())
+      std::swap(Opnd1, Opnd2);
+
+    APInt C3((~C1) ^ C2);
+
+    // Do not increase code size!
+    if (C3 != 0 && !C3.isAllOnesValue()) {
+      int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+      if (NewInstNum > DeadInstNum)
+        return false;
+    }
+
+    Res = createAndInstr(I, X, C3);
+    ConstOpnd ^= C1;
+
+  } else if (Opnd1->isOrExpr()) {
+    // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
+    //
+    APInt C3 = C1 ^ C2;
+    
+    // Do not increase code size
+    if (C3 != 0 && !C3.isAllOnesValue()) {
+      int NewInstNum = ConstOpnd != 0 ? 1 : 2;
+      if (NewInstNum > DeadInstNum)
+        return false;
+    }
+
+    Res = createAndInstr(I, X, C3);
+    ConstOpnd ^= C3;
+  } else {
+    // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
+    //
+    APInt C3 = C1 ^ C2;
+    Res = createAndInstr(I, X, C3);
+  }
+
+  // Put the original operands in the Redo list; hope they will be deleted
+  // as dead code.
+  if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue()))
+    RedoInsts.insert(T);
+  if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue()))
+    RedoInsts.insert(T);
+
+  return true;
+}
+
+/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
+/// to a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+Value *Reassociate::OptimizeXor(Instruction *I,
+                                SmallVectorImpl<ValueEntry> &Ops) {
+  if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
+    return V;
+      
+  if (Ops.size() == 1)
+    return 0;
+
+  SmallVector<XorOpnd, 8> Opnds;
+  SmallVector<unsigned, 8> OpndIndices;
+  Type *Ty = Ops[0].Op->getType();
+  APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
+
+  // Step 1: Convert ValueEntry to XorOpnd
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    Value *V = Ops[i].Op;
+    if (!isa<ConstantInt>(V)) {
+      XorOpnd O(V);
+      O.setSymbolicRank(getRank(O.getSymbolicPart()));
+      Opnds.push_back(O);
+      OpndIndices.push_back(Opnds.size() - 1);
+    } else
+      ConstOpnd ^= cast<ConstantInt>(V)->getValue();
+  }
+
+  // Step 2: Sort the Xor-Operands in a way such that the operands containing
+  //  the same symbolic value cluster together. For instance, the input operand
+  //  sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
+  //  ("x | 123", "x & 789", "y & 456").
+  std::sort(OpndIndices.begin(), OpndIndices.end(),
+            XorOpnd::PtrSortFunctor(Opnds));
+
+  // Step 3: Combine adjacent operands
+  XorOpnd *PrevOpnd = 0;
+  bool Changed = false;
+  for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
+    XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+    // The combined value
+    Value *CV;
+
+    // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
+    if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+      Changed = true;
+      if (CV)
+        *CurrOpnd = XorOpnd(CV);
+      else {
+        CurrOpnd->Invalidate();
+        continue;
+      }
+    }
+
+    if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
+      PrevOpnd = CurrOpnd;
+      continue;
+    }
+
+    // step 3.2: When previous and current operands share the same symbolic
+    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 
+    //    
+    if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
+      // Remove previous operand
+      PrevOpnd->Invalidate();
+      if (CV) {
+        *CurrOpnd = XorOpnd(CV);
+        PrevOpnd = CurrOpnd;
+      } else {
+        CurrOpnd->Invalidate();
+        PrevOpnd = 0;
+      }
+      Changed = true;
+    }
+  }
+
+  // Step 4: Reassemble the Ops
+  if (Changed) {
+    Ops.clear();
+    for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
+      XorOpnd &O = Opnds[i];
+      if (O.isInvalid())
+        continue;
+      ValueEntry VE(getRank(O.getValue()), O.getValue());
+      Ops.push_back(VE);
+    }
+    if (ConstOpnd != 0) {
+      Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd);
+      ValueEntry VE(getRank(C), C);
+      Ops.push_back(VE);
+    }
+    int Sz = Ops.size();
+    if (Sz == 1)
+      return Ops.back().Op;
+    else if (Sz == 0) {
+      assert(ConstOpnd == 0);
+      return ConstantInt::get(Ty->getContext(), ConstOpnd);
+    }
+  }
+
+  return 0;
+}
+
 /// OptimizeAdd - Optimize a series of operands to an 'add' instruction.  This
 /// optimizes based on identities.  If it can be reduced to a single Value, it
 /// is returned, otherwise the Ops list is mutated as necessary.
@@ -1442,11 +1756,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
   default: break;
   case Instruction::And:
   case Instruction::Or:
-  case Instruction::Xor:
     if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
       return Result;
     break;
 
+  case Instruction::Xor:
+    if (Value *Result = OptimizeXor(I, Ops))
+      return Result;
+    break;
+
   case Instruction::Add:
     if (Value *Result = OptimizeAdd(I, Ops))
       return Result;
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index ea1de63de70a..07f540a30127 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -18,15 +18,15 @@
 
 #define DEBUG_TYPE "reg2mem"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <list>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 686520e724c4..e30a2746b01e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -19,26 +19,26 @@
 
 #define DEBUG_TYPE "sccp"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -271,13 +271,6 @@ public:
     return I->second;
   }
 
-  /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
-    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
-      StructValueState.find(std::make_pair(V, i));
-    assert(I != StructValueState.end() && "V is not in valuemap!");
-    return I->second;
-  }*/
-
   /// getTrackedRetVals - Get the inferred return value map.
   ///
   const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
@@ -710,9 +703,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
     markConstant(&PN, OperandVal);      // Acquire operand value
 }
 
-
-
-
 void SCCPSolver::visitReturnInst(ReturnInst &I) {
   if (I.getNumOperands() == 0) return;  // ret void
 
@@ -1185,7 +1175,7 @@ void SCCPSolver::Solve() {
       DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
-      // bottom to constant
+      // bottom to constant, or to overdefined.
       //
       // Anything on this worklist that is overdefined need not be visited
       // since all of its users will have already been marked as overdefined
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 2d518f735be0..f6bb365216ff 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -25,44 +25,47 @@
 
 #define DEBUG_TYPE "sroa"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/PtrUseVisitor.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
 STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas,      "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted,        "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
 STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted,         "Number of instructions deleted");
-STATISTIC(NumVectorized,      "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
 
 /// Hidden option to force the pass to not use DomTree and mem2reg, instead
 /// forming SSA values through the SSAUpdater infrastructure.
@@ -70,112 +73,167 @@ static cl::opt<bool>
 ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
 
 namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+    public IRBuilderDefaultInserter<preserveNames> {
+  std::string Prefix;
+
 public:
-  /// \brief A common base class for representing a half-open byte range.
-  struct ByteRange {
-    /// \brief The beginning offset of the range.
-    uint64_t BeginOffset;
+  void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
 
-    /// \brief The ending offset, not included in the range.
-    uint64_t EndOffset;
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+                    BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+        I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+  }
+};
 
-    ByteRange() : BeginOffset(), EndOffset() {}
-    ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
-        : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+    public IRBuilderDefaultInserter<false> {
+public:
+  void SetNamePrefix(const Twine &P) {}
+};
 
-    /// \brief Support for ordering ranges.
-    ///
-    /// This provides an ordering over ranges such that start offsets are
-    /// always increasing, and within equal start offsets, the end offsets are
-    /// decreasing. Thus the spanning range comes first in a cluster with the
-    /// same start position.
-    bool operator<(const ByteRange &RHS) const {
-      if (BeginOffset < RHS.BeginOffset) return true;
-      if (BeginOffset > RHS.BeginOffset) return false;
-      if (EndOffset > RHS.EndOffset) return true;
-      return false;
-    }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+                        IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+                        IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
 
-    /// \brief Support comparison with a single offset to allow binary searches.
-    friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
-      return LHS.BeginOffset < RHSOffset;
-    }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+  /// \brief The beginning offset of the range.
+  uint64_t BeginOffset;
 
-    friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
-                                                const ByteRange &RHS) {
-      return LHSOffset < RHS.BeginOffset;
-    }
+  /// \brief The ending offset, not included in the range.
+  uint64_t EndOffset;
 
-    bool operator==(const ByteRange &RHS) const {
-      return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
-    }
-    bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
-  };
+  ByteRange() : BeginOffset(), EndOffset() {}
+  ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+      : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
 
-  /// \brief A partition of an alloca.
+  /// \brief Support for ordering ranges.
   ///
-  /// This structure represents a contiguous partition of the alloca. These are
-  /// formed by examining the uses of the alloca. During formation, they may
-  /// overlap but once an AllocaPartitioning is built, the Partitions within it
-  /// are all disjoint.
-  struct Partition : public ByteRange {
-    /// \brief Whether this partition is splittable into smaller partitions.
-    ///
-    /// We flag partitions as splittable when they are formed entirely due to
-    /// accesses by trivially splittable operations such as memset and memcpy.
-    bool IsSplittable;
+  /// This provides an ordering over ranges such that start offsets are
+  /// always increasing, and within equal start offsets, the end offsets are
+  /// decreasing. Thus the spanning range comes first in a cluster with the
+  /// same start position.
+  bool operator<(const ByteRange &RHS) const {
+    if (BeginOffset < RHS.BeginOffset) return true;
+    if (BeginOffset > RHS.BeginOffset) return false;
+    if (EndOffset > RHS.EndOffset) return true;
+    return false;
+  }
 
-    /// \brief Test whether a partition has been marked as dead.
-    bool isDead() const {
-      if (BeginOffset == UINT64_MAX) {
-        assert(EndOffset == UINT64_MAX);
-        return true;
-      }
-      return false;
-    }
+  /// \brief Support comparison with a single offset to allow binary searches.
+  friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+    return LHS.BeginOffset < RHSOffset;
+  }
+
+  friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+                                              const ByteRange &RHS) {
+    return LHSOffset < RHS.BeginOffset;
+  }
+
+  bool operator==(const ByteRange &RHS) const {
+    return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+  }
+  bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
 
-    /// \brief Kill a partition.
-    /// This is accomplished by setting both its beginning and end offset to
-    /// the maximum possible value.
-    void kill() {
-      assert(!isDead() && "He's Dead, Jim!");
-      BeginOffset = EndOffset = UINT64_MAX;
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+  /// \brief Whether this partition is splittable into smaller partitions.
+  ///
+  /// We flag partitions as splittable when they are formed entirely due to
+  /// accesses by trivially splittable operations such as memset and memcpy.
+  bool IsSplittable;
+
+  /// \brief Test whether a partition has been marked as dead.
+  bool isDead() const {
+    if (BeginOffset == UINT64_MAX) {
+      assert(EndOffset == UINT64_MAX);
+      return true;
     }
+    return false;
+  }
 
-    Partition() : ByteRange(), IsSplittable() {}
-    Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
-        : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
-  };
+  /// \brief Kill a partition.
+  /// This is accomplished by setting both its beginning and end offset to
+  /// the maximum possible value.
+  void kill() {
+    assert(!isDead() && "He's Dead, Jim!");
+    BeginOffset = EndOffset = UINT64_MAX;
+  }
+
+  Partition() : ByteRange(), IsSplittable() {}
+  Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+      : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+  /// \brief Combined storage for both the Use* and split state.
+  PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+  PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+  PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+               bool IsSplit)
+      : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
 
-  /// \brief A particular use of a partition of the alloca.
+  /// \brief The use in question. Provides access to both user and used value.
   ///
-  /// This structure is used to associate uses of a partition with it. They
-  /// mark the range of bytes which are referenced by a particular instruction,
-  /// and includes a handle to the user itself and the pointer value in use.
-  /// The bounds of these uses are determined by intersecting the bounds of the
-  /// memory use itself with a particular partition. As a consequence there is
-  /// intentionally overlap between various uses of the same partition.
-  struct PartitionUse : public ByteRange {
-    /// \brief The use in question. Provides access to both user and used value.
-    ///
-    /// Note that this may be null if the partition use is *dead*, that is, it
-    /// should be ignored.
-    Use *U;
+  /// Note that this may be null if the partition use is *dead*, that is, it
+  /// should be ignored.
+  Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
 
-    PartitionUse() : ByteRange(), U() {}
-    PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
-        : ByteRange(BeginOffset, EndOffset), U(U) {}
-  };
+  /// \brief Set the use for this partition use range.
+  void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+  /// \brief Whether this use is split across multiple partitions.
+  bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
 
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
   /// \brief Construct a partitioning of a particular alloca.
   ///
   /// Construction does most of the work for partitioning the alloca. This
@@ -334,7 +392,7 @@ private:
   class UseBuilder;
   friend class AllocaPartitioning::UseBuilder;
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// \brief Handle to alloca instruction to simplify method interfaces.
   AllocaInst &AI;
 #endif
@@ -404,106 +462,17 @@ private:
 };
 }
 
-template <typename DerivedT, typename RetT>
-class AllocaPartitioning::BuilderBase
-    : public InstVisitor<DerivedT, RetT> {
-public:
-  BuilderBase(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
-      : TD(TD),
-        AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
-        P(P) {
-    enqueueUsers(AI, 0);
-  }
-
-protected:
-  const DataLayout &TD;
-  const uint64_t AllocSize;
-  AllocaPartitioning &P;
-
-  SmallPtrSet<Use *, 8> VisitedUses;
-
-  struct OffsetUse {
-    Use *U;
-    int64_t Offset;
-  };
-  SmallVector<OffsetUse, 8> Queue;
-
-  // The active offset and use while visiting.
-  Use *U;
-  int64_t Offset;
-
-  void enqueueUsers(Instruction &I, int64_t UserOffset) {
-    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
-         UI != UE; ++UI) {
-      if (VisitedUses.insert(&UI.getUse())) {
-        OffsetUse OU = { &UI.getUse(), UserOffset };
-        Queue.push_back(OU);
-      }
-    }
-  }
-
-  bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
-    GEPOffset = Offset;
-    for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
-         GTI != GTE; ++GTI) {
-      ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
-      if (!OpC)
-        return false;
-      if (OpC->isZero())
-        continue;
-
-      // Handle a struct index, which adds its field offset to the pointer.
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-        unsigned ElementIdx = OpC->getZExtValue();
-        const StructLayout *SL = TD.getStructLayout(STy);
-        uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
-        // Check that we can continue to model this GEP in a signed 64-bit offset.
-        if (ElementOffset > INT64_MAX ||
-            (GEPOffset >= 0 &&
-             ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) {
-          DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
-                       << "what can be represented in an int64_t!\n"
-                       << "  alloca: " << P.AI << "\n");
-          return false;
-        }
-        if (GEPOffset < 0)
-          GEPOffset = ElementOffset + (uint64_t)-GEPOffset;
-        else
-          GEPOffset += ElementOffset;
-        continue;
-      }
-
-      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
-      Index *= APInt(Index.getBitWidth(),
-                     TD.getTypeAllocSize(GTI.getIndexedType()));
-      Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
-                     /*isSigned*/true);
-      // Check if the result can be stored in our int64_t offset.
-      if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) {
-        DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
-                     << "what can be represented in an int64_t!\n"
-                     << "  alloca: " << P.AI << "\n");
-        return false;
-      }
-
-      GEPOffset = Index.getSExtValue();
-    }
-    return true;
-  }
+static Value *foldSelectInst(SelectInst &SI) {
+  // If the condition being selected on is a constant or the same value is
+  // being selected between, fold the select. Yes this does (rarely) happen
+  // early on.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
+    return SI.getOperand(1+CI->isZero());
+  if (SI.getOperand(1) == SI.getOperand(2))
+    return SI.getOperand(1);
 
-  Value *foldSelectInst(SelectInst &SI) {
-    // If the condition being selected on is a constant or the same value is
-    // being selected between, fold the select. Yes this does (rarely) happen
-    // early on.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
-      return SI.getOperand(1+CI->isZero());
-    if (SI.getOperand(1) == SI.getOperand(2)) {
-      assert(*U == SI.getOperand(1));
-      return SI.getOperand(1);
-    }
-    return 0;
-  }
-};
+  return 0;
+}
 
 /// \brief Builder for the alloca partitioning.
 ///
@@ -511,67 +480,45 @@ protected:
 /// of an alloca and splitting the partitions for each load and store at each
 /// offset.
 class AllocaPartitioning::PartitionBuilder
-    : public BuilderBase<PartitionBuilder, bool> {
-  friend class InstVisitor<PartitionBuilder, bool>;
+    : public PtrUseVisitor<PartitionBuilder> {
+  friend class PtrUseVisitor<PartitionBuilder>;
+  friend class InstVisitor<PartitionBuilder>;
+  typedef PtrUseVisitor<PartitionBuilder> Base;
+
+  const uint64_t AllocSize;
+  AllocaPartitioning &P;
 
   SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
 
 public:
-  PartitionBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
-      : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {}
-
-  /// \brief Run the builder over the allocation.
-  bool operator()() {
-    // Note that we have to re-evaluate size on each trip through the loop as
-    // the queue grows at the tail.
-    for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
-      U = Queue[Idx].U;
-      Offset = Queue[Idx].Offset;
-      if (!visit(cast<Instruction>(U->getUser())))
-        return false;
-    }
-    return true;
-  }
+  PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P)
+      : PtrUseVisitor<PartitionBuilder>(DL),
+        AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())),
+        P(P) {}
 
 private:
-  bool markAsEscaping(Instruction &I) {
-    P.PointerEscapingInstr = &I;
-    return false;
-  }
-
-  void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
+  void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
                  bool IsSplittable = false) {
-    // Completely skip uses which have a zero size or don't overlap the
-    // allocation.
-    if (Size == 0 ||
-        (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
-        (Offset < 0 && (uint64_t)-Offset >= Size)) {
+    // Completely skip uses which have a zero size or start either before or
+    // past the end of the allocation.
+    if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
       DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
-                   << " which starts past the end of the " << AllocSize
-                   << " byte alloca:\n"
+                   << " which has zero size or starts outside of the "
+                   << AllocSize << " byte alloca:\n"
                    << "    alloca: " << P.AI << "\n"
                    << "       use: " << I << "\n");
       return;
     }
 
-    // Clamp the start to the beginning of the allocation.
-    if (Offset < 0) {
-      DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
-                   << " to start at the beginning of the alloca:\n"
-                   << "    alloca: " << P.AI << "\n"
-                   << "       use: " << I << "\n");
-      Size -= (uint64_t)-Offset;
-      Offset = 0;
-    }
-
-    uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+    uint64_t BeginOffset = Offset.getZExtValue();
+    uint64_t EndOffset = BeginOffset + Size;
 
     // Clamp the end offset to the end of the allocation. Note that this is
     // formulated to handle even the case where "BeginOffset + Size" overflows.
-    // NOTE! This may appear superficially to be something we could ignore
-    // entirely, but that is not so! There may be PHI-node uses where some
-    // instructions are dead but not others. We can't completely ignore the
-    // PHI node, and so have to record at least the information here.
+    // This may appear superficially to be something we could ignore entirely,
+    // but that is not so! There may be widened loads or PHI-node uses where
+    // some instructions are dead but not others. We can't completely ignore
+    // them, and so have to record at least the information here.
     assert(AllocSize >= BeginOffset); // Established above.
     if (Size > AllocSize - BeginOffset) {
       DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -585,9 +532,41 @@ private:
     P.Partitions.push_back(New);
   }
 
-  bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
-                         bool IsVolatile) {
-    uint64_t Size = TD.getTypeStoreSize(Ty);
+  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
+                         uint64_t Size, bool IsVolatile) {
+    // We allow splitting of loads and stores where the type is an integer type
+    // and cover the entire alloca. This prevents us from splitting over
+    // eagerly.
+    // FIXME: In the great blue eventually, we should eagerly split all integer
+    // loads and stores, and then have a separate step that merges adjacent
+    // alloca partitions into a single partition suitable for integer widening.
+    // Or we should skip the merge step and rely on GVN and other passes to
+    // merge adjacent loads and stores that survive mem2reg.
+    bool IsSplittable =
+        Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+
+    insertUse(I, Offset, Size, IsSplittable);
+  }
+
+  void visitLoadInst(LoadInst &LI) {
+    assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
+           "All simple FCA loads should have been pre-split");
+
+    if (!IsOffsetKnown)
+      return PI.setAborted(&LI);
+
+    uint64_t Size = DL.getTypeStoreSize(LI.getType());
+    return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
+  }
+
+  void visitStoreInst(StoreInst &SI) {
+    Value *ValOp = SI.getValueOperand();
+    if (ValOp == *U)
+      return PI.setEscapedAndAborted(&SI);
+    if (!IsOffsetKnown)
+      return PI.setAborted(&SI);
+
+    uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
 
     // If this memory access can be shown to *statically* extend outside the
     // bounds of of the allocation, it's behavior is undefined, so simply
@@ -596,73 +575,52 @@ private:
     // risk of overflow.
     // FIXME: We should instead consider the pointer to have escaped if this
     // function is being instrumented for addressing bugs or race conditions.
-    if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
-        Size > (AllocSize - (uint64_t)Offset)) {
-      DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
-                   << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size)) {
+      DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
                    << " which extends past the end of the " << AllocSize
                    << " byte alloca:\n"
                    << "    alloca: " << P.AI << "\n"
-                   << "       use: " << I << "\n");
-      return true;
+                   << "       use: " << SI << "\n");
+      return;
     }
 
-    // We allow splitting of loads and stores where the type is an integer type
-    // and which cover the entire alloca. Such integer loads and stores
-    // often require decomposition into fine grained loads and stores.
-    bool IsSplittable = false;
-    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
-      IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
-
-    insertUse(I, Offset, Size, IsSplittable);
-    return true;
-  }
-
-  bool visitBitCastInst(BitCastInst &BC) {
-    enqueueUsers(BC, Offset);
-    return true;
-  }
-
-  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
-    int64_t GEPOffset;
-    if (!computeConstantGEPOffset(GEPI, GEPOffset))
-      return markAsEscaping(GEPI);
-
-    enqueueUsers(GEPI, GEPOffset);
-    return true;
-  }
-
-  bool visitLoadInst(LoadInst &LI) {
-    assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
-           "All simple FCA loads should have been pre-split");
-    return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
-  }
-
-  bool visitStoreInst(StoreInst &SI) {
-    Value *ValOp = SI.getValueOperand();
-    if (ValOp == *U)
-      return markAsEscaping(SI);
-
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
-    return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+    handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
   }
 
 
-  bool visitMemSetInst(MemSetInst &II) {
+  void visitMemSetInst(MemSetInst &II) {
     assert(II.getRawDest() == *U && "Pointer use is not the destination?");
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    insertUse(II, Offset, Size, Length);
-    return true;
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+      // Zero-length mem transfer intrinsics can be ignored entirely.
+      return;
+
+    if (!IsOffsetKnown)
+      return PI.setAborted(&II);
+
+    insertUse(II, Offset,
+              Length ? Length->getLimitedValue()
+                     : AllocSize - Offset.getLimitedValue(),
+              (bool)Length);
   }
 
-  bool visitMemTransferInst(MemTransferInst &II) {
+  void visitMemTransferInst(MemTransferInst &II) {
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    if (!Size)
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
       // Zero-length mem transfer intrinsics can be ignored entirely.
-      return true;
+      return;
+
+    if (!IsOffsetKnown)
+      return PI.setAborted(&II);
+
+    uint64_t RawOffset = Offset.getLimitedValue();
+    uint64_t Size = Length ? Length->getLimitedValue()
+                           : AllocSize - RawOffset;
 
     MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
 
@@ -670,12 +628,12 @@ private:
     Offsets.IsSplittable = Length;
 
     if (*U == II.getRawDest()) {
-      Offsets.DestBegin = Offset;
-      Offsets.DestEnd = Offset + Size;
+      Offsets.DestBegin = RawOffset;
+      Offsets.DestEnd = RawOffset + Size;
     }
     if (*U == II.getRawSource()) {
-      Offsets.SourceBegin = Offset;
-      Offsets.SourceEnd = Offset + Size;
+      Offsets.SourceBegin = RawOffset;
+      Offsets.SourceEnd = RawOffset + Size;
     }
 
     // If we have set up end offsets for both the source and the destination,
@@ -688,7 +646,7 @@ private:
       // In that case, we can completely elide the transfer.
       if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) {
         P.Partitions[PrevIdx].kill();
-        return true;
+        return;
       }
 
       // Otherwise we have an offset transfer within the same alloca. We can't
@@ -701,7 +659,7 @@ private:
 
       // For non-volatile transfers this is a no-op.
       if (!II.isVolatile())
-        return true;
+        return;
 
       // Otherwise just suppress splitting.
       Offsets.IsSplittable = false;
@@ -721,23 +679,25 @@ private:
              "Already have intrinsic in map but haven't seen both ends");
       (void)Inserted;
     }
-
-    return true;
   }
 
   // Disable SRoA for any intrinsics except for lifetime invariants.
-  // FIXME: What about debug instrinsics? This matches old behavior, but
+  // FIXME: What about debug intrinsics? This matches old behavior, but
   // doesn't make sense.
-  bool visitIntrinsicInst(IntrinsicInst &II) {
+  void visitIntrinsicInst(IntrinsicInst &II) {
+    if (!IsOffsetKnown)
+      return PI.setAborted(&II);
+
     if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
         II.getIntrinsicID() == Intrinsic::lifetime_end) {
       ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
-      uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue());
+      uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
+                               Length->getLimitedValue());
       insertUse(II, Offset, Size, true);
-      return true;
+      return;
     }
 
-    return markAsEscaping(II);
+    Base::visitIntrinsicInst(II);
   }
 
   Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
@@ -757,14 +717,14 @@ private:
       llvm::tie(UsedI, I) = Uses.pop_back_val();
 
       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        Size = std::max(Size, TD.getTypeStoreSize(LI->getType()));
+        Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
         continue;
       }
       if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
         Value *Op = SI->getOperand(0);
         if (Op == UsedI)
           return SI;
-        Size = std::max(Size, TD.getTypeStoreSize(Op->getType()));
+        Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
         continue;
       }
 
@@ -785,54 +745,62 @@ private:
     return 0;
   }
 
-  bool visitPHINode(PHINode &PN) {
+  void visitPHINode(PHINode &PN) {
+    if (PN.use_empty())
+      return;
+    if (!IsOffsetKnown)
+      return PI.setAborted(&PN);
+
     // See if we already have computed info on this node.
     std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
     if (PHIInfo.first) {
       PHIInfo.second = true;
       insertUse(PN, Offset, PHIInfo.first);
-      return true;
+      return;
     }
 
     // Check for an unsafe use of the PHI node.
-    if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
-      return markAsEscaping(*EscapingI);
+    if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
+      return PI.setAborted(UnsafeI);
 
     insertUse(PN, Offset, PHIInfo.first);
-    return true;
   }
 
-  bool visitSelectInst(SelectInst &SI) {
+  void visitSelectInst(SelectInst &SI) {
+    if (SI.use_empty())
+      return;
     if (Value *Result = foldSelectInst(SI)) {
       if (Result == *U)
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
-        enqueueUsers(SI, Offset);
+        enqueueUsers(SI);
 
-      return true;
+      return;
     }
+    if (!IsOffsetKnown)
+      return PI.setAborted(&SI);
 
     // See if we already have computed info on this node.
     std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
     if (SelectInfo.first) {
       SelectInfo.second = true;
       insertUse(SI, Offset, SelectInfo.first);
-      return true;
+      return;
     }
 
     // Check for an unsafe use of the PHI node.
-    if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
-      return markAsEscaping(*EscapingI);
+    if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
+      return PI.setAborted(UnsafeI);
 
     insertUse(SI, Offset, SelectInfo.first);
-    return true;
   }
 
   /// \brief Disable SROA entirely if there are unhandled users of the alloca.
-  bool visitInstruction(Instruction &I) { return markAsEscaping(I); }
+  void visitInstruction(Instruction &I) {
+    PI.setAborted(&I);
+  }
 };
 
-
 /// \brief Use adder for the alloca partitioning.
 ///
 /// This class adds the uses of an alloca to all of the partitions which they
@@ -851,26 +819,22 @@ private:
 /// partition space is pre-sorted, and do a logarithmic search for the
 /// partition needed, making the total visit a classical ((N + M) * log(N))
 /// complexity operation.
-class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> {
+class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> {
+  friend class PtrUseVisitor<UseBuilder>;
   friend class InstVisitor<UseBuilder>;
+  typedef PtrUseVisitor<UseBuilder> Base;
+
+  const uint64_t AllocSize;
+  AllocaPartitioning &P;
 
   /// \brief Set to de-duplicate dead instructions found in the use walk.
   SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
 
 public:
   UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
-      : BuilderBase<UseBuilder>(TD, AI, P) {}
-
-  /// \brief Run the builder over the allocation.
-  void operator()() {
-    // Note that we have to re-evaluate size on each trip through the loop as
-    // the queue grows at the tail.
-    for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
-      U = Queue[Idx].U;
-      Offset = Queue[Idx].Offset;
-      this->visit(cast<Instruction>(U->getUser()));
-    }
-  }
+      : PtrUseVisitor<UseBuilder>(TD),
+        AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
+        P(P) {}
 
 private:
   void markAsDead(Instruction &I) {
@@ -878,20 +842,14 @@ private:
       P.DeadUsers.push_back(&I);
   }
 
-  void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
+  void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) {
     // If the use has a zero size or extends outside of the allocation, record
     // it as a dead use for elimination later.
-    if (Size == 0 || (uint64_t)Offset >= AllocSize ||
-        (Offset < 0 && (uint64_t)-Offset >= Size))
+    if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize))
       return markAsDead(User);
 
-    // Clamp the start to the beginning of the allocation.
-    if (Offset < 0) {
-      Size -= (uint64_t)-Offset;
-      Offset = 0;
-    }
-
-    uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+    uint64_t BeginOffset = Offset.getZExtValue();
+    uint64_t EndOffset = BeginOffset + Size;
 
     // Clamp the end offset to the end of the allocation. Note that this is
     // formulated to handle even the case where "BeginOffset + Size" overflows.
@@ -900,13 +858,14 @@ private:
       EndOffset = AllocSize;
 
     // NB: This only works if we have zero overlapping partitions.
-    iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
-    if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
-      B = llvm::prior(B);
-    for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
-         ++I) {
+    iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+    if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+      I = llvm::prior(I);
+    iterator E = P.end();
+    bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+    for (; I != E && I->BeginOffset < EndOffset; ++I) {
       PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
-                         std::min(I->EndOffset, EndOffset), U);
+                         std::min(I->EndOffset, EndOffset), U, IsSplit);
       P.use_push_back(I, NewPU);
       if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
         P.PHIOrSelectOpMap[U]
@@ -914,59 +873,63 @@ private:
     }
   }
 
-  void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
-    uint64_t Size = TD.getTypeStoreSize(Ty);
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of of the allocation, it's behavior is undefined, so simply
-    // ignore it. Note that this is more strict than the generic clamping
-    // behavior of insertUse.
-    if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
-        Size > (AllocSize - (uint64_t)Offset))
-      return markAsDead(I);
-
-    insertUse(I, Offset, Size);
-  }
-
   void visitBitCastInst(BitCastInst &BC) {
     if (BC.use_empty())
       return markAsDead(BC);
 
-    enqueueUsers(BC, Offset);
+    return Base::visitBitCastInst(BC);
   }
 
   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     if (GEPI.use_empty())
       return markAsDead(GEPI);
 
-    int64_t GEPOffset;
-    if (!computeConstantGEPOffset(GEPI, GEPOffset))
-      llvm_unreachable("Unable to compute constant offset for use");
-
-    enqueueUsers(GEPI, GEPOffset);
+    return Base::visitGetElementPtrInst(GEPI);
   }
 
   void visitLoadInst(LoadInst &LI) {
-    handleLoadOrStore(LI.getType(), LI, Offset);
+    assert(IsOffsetKnown);
+    uint64_t Size = DL.getTypeStoreSize(LI.getType());
+    insertUse(LI, Offset, Size);
   }
 
   void visitStoreInst(StoreInst &SI) {
-    handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+    assert(IsOffsetKnown);
+    uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+    // If this memory access can be shown to *statically* extend outside the
+    // bounds of of the allocation, it's behavior is undefined, so simply
+    // ignore it. Note that this is more strict than the generic clamping
+    // behavior of insertUse.
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size))
+      return markAsDead(SI);
+
+    insertUse(SI, Offset, Size);
   }
 
   void visitMemSetInst(MemSetInst &II) {
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    insertUse(II, Offset, Size);
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+      return markAsDead(II);
+
+    assert(IsOffsetKnown);
+    insertUse(II, Offset, Length ? Length->getLimitedValue()
+                                 : AllocSize - Offset.getLimitedValue());
   }
 
   void visitMemTransferInst(MemTransferInst &II) {
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    if (!Size)
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
       return markAsDead(II);
 
-    MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+    assert(IsOffsetKnown);
+    uint64_t Size = Length ? Length->getLimitedValue()
+                           : AllocSize - Offset.getLimitedValue();
+
+    const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
     if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
         Offsets.DestBegin == Offsets.SourceBegin)
       return markAsDead(II); // Skip identity transfers without side-effects.
@@ -975,34 +938,39 @@ private:
   }
 
   void visitIntrinsicInst(IntrinsicInst &II) {
+    assert(IsOffsetKnown);
     assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
            II.getIntrinsicID() == Intrinsic::lifetime_end);
 
     ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
-    insertUse(II, Offset,
-              std::min(AllocSize - Offset, Length->getLimitedValue()));
+    insertUse(II, Offset, std::min(Length->getLimitedValue(),
+                                   AllocSize - Offset.getLimitedValue()));
   }
 
-  void insertPHIOrSelect(Instruction &User, uint64_t Offset) {
+  void insertPHIOrSelect(Instruction &User, const APInt &Offset) {
     uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first;
 
     // For PHI and select operands outside the alloca, we can't nuke the entire
     // phi or select -- the other side might still be relevant, so we special
     // case them here and use a separate structure to track the operands
     // themselves which should be replaced with undef.
-    if (Offset >= AllocSize) {
+    if ((Offset.isNegative() && Offset.uge(Size)) ||
+        (!Offset.isNegative() && Offset.uge(AllocSize))) {
       P.DeadOperands.push_back(U);
       return;
     }
 
     insertUse(User, Offset, Size);
   }
+
   void visitPHINode(PHINode &PN) {
     if (PN.use_empty())
       return markAsDead(PN);
 
+    assert(IsOffsetKnown);
     insertPHIOrSelect(PN, Offset);
   }
+
   void visitSelectInst(SelectInst &SI) {
     if (SI.use_empty())
       return markAsDead(SI);
@@ -1011,7 +979,7 @@ private:
       if (Result == *U)
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
-        enqueueUsers(SI, Offset);
+        enqueueUsers(SI);
       else
         // Otherwise the operand to the select is dead, and we can replace it
         // with undef.
@@ -1020,6 +988,7 @@ private:
       return;
     }
 
+    assert(IsOffsetKnown);
     insertPHIOrSelect(SI, Offset);
   }
 
@@ -1126,13 +1095,20 @@ void AllocaPartitioning::splitAndMergePartitions() {
 
 AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
     :
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
       AI(AI),
 #endif
       PointerEscapingInstr(0) {
   PartitionBuilder PB(TD, AI, *this);
-  if (!PB())
+  PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI);
+  if (PtrI.isEscaped() || PtrI.isAborted()) {
+    // FIXME: We should sink the escape vs. abort info into the caller nicely,
+    // possibly by just storing the PtrInfo in the AllocaPartitioning.
+    PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
+                                                  : PtrI.getAbortingInst();
+    assert(PointerEscapingInstr && "Did not track a bad instruction");
     return;
+  }
 
   // Sort the uses. This arranges for the offsets to be in ascending order,
   // and the sizes to be in descending order.
@@ -1162,31 +1138,45 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
     splitAndMergePartitions();
   }
 
+  // Record how many partitions we end up with.
+  NumAllocaPartitions += Partitions.size();
+  MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
   // Now build up the user lists for each of these disjoint partitions by
   // re-walking the recursive users of the alloca.
   Uses.resize(Partitions.size());
   UseBuilder UB(TD, AI, *this);
-  UB();
+  PtrI = UB.visitPtr(AI);
+  assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
+  assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+  unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+  for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+    NumUses += Uses[Idx].size();
+#endif
+  NumAllocaPartitionUses += NumUses;
+  MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
 }
 
 Type *AllocaPartitioning::getCommonType(iterator I) const {
   Type *Ty = 0;
   for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
-    if (!UI->U)
+    Use *U = UI->getUse();
+    if (!U)
       continue; // Skip dead uses.
-    if (isa<IntrinsicInst>(*UI->U->getUser()))
+    if (isa<IntrinsicInst>(*U->getUser()))
       continue;
     if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
       continue;
 
     Type *UserTy = 0;
-    if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
       UserTy = LI->getType();
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+    else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
       UserTy = SI->getValueOperand()->getType();
-    } else {
+    else
       return 0; // Bail if we have weird uses.
-    }
 
     if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
       // If the type is larger than the partition, skip it. We only encounter
@@ -1222,13 +1212,13 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
 
 void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
                                     StringRef Indent) const {
-  for (const_use_iterator UI = use_begin(I), UE = use_end(I);
-       UI != UE; ++UI) {
-    if (!UI->U)
+  for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
+    if (!UI->getUse())
       continue; // Skip dead uses.
     OS << Indent << "  [" << UI->BeginOffset << "," << UI->EndOffset << ") "
-       << "used by: " << *UI->U->getUser() << "\n";
-    if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+       << "used by: " << *UI->getUse()->getUser() << "\n";
+    if (MemTransferInst *II =
+            dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
       const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
       bool IsDest;
       if (!MTO.IsSplittable)
@@ -1251,8 +1241,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const {
   }
 
   OS << "Partitioning of alloca: " << AI << "\n";
-  unsigned Num = 0;
-  for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) {
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
     print(OS, I);
     printUsers(OS, I);
   }
@@ -1323,18 +1312,18 @@ public:
     for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
            E = DVIs.end(); I != E; ++I) {
       DbgValueInst *DVI = *I;
-      Value *Arg = NULL;
+      Value *Arg = 0;
       if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
         // If an argument is zero extended then use argument directly. The ZExt
         // may be zapped by an optimization pass in future.
         if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
           Arg = dyn_cast<Argument>(ZExt->getOperand(0));
-        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+        else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
           Arg = dyn_cast<Argument>(SExt->getOperand(0));
         if (!Arg)
-          Arg = SI->getOperand(0);
+          Arg = SI->getValueOperand();
       } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Arg = LI->getOperand(0);
+        Arg = LI->getPointerOperand();
       } else {
         continue;
       }
@@ -1358,7 +1347,7 @@ namespace {
 /// 1) It takes allocations of aggregates and analyzes the ways in which they
 ///    are used to try to split them into smaller allocations, ideally of
 ///    a single scalar data type. It will split up memcpy and memset accesses
-///    as necessary and try to isolate invidual scalar accesses.
+///    as necessary and try to isolate individual scalar accesses.
 /// 2) It will transform accesses into forms which are suitable for SSA value
 ///    promotion. This can be replacing a memset with a scalar store of an
 ///    integer value, or it can involve speculating operations on a PHI or
@@ -1460,11 +1449,11 @@ public:
     // may be grown during speculation. However, we never need to re-visit the
     // new uses, and so we can use the initial size bound.
     for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
-      const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
-      if (!PU.U)
+      const PartitionUse &PU = P.getUse(PI, Idx);
+      if (!PU.getUse())
         continue; // Skip dead use.
 
-      visit(cast<Instruction>(PU.U->getUser()));
+      visit(cast<Instruction>(PU.getUse()->getUser()));
     }
   }
 
@@ -1520,8 +1509,7 @@ private:
     // We can only transform this if it is safe to push the loads into the
     // predecessor blocks. The only thing to watch out for is that we can't put
     // a possibly trapping load in the predecessor if it is a critical edge.
-    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
-         ++Idx) {
+    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
       TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
       Value *InVal = PN.getIncomingValue(Idx);
 
@@ -1559,12 +1547,12 @@ private:
     assert(!Loads.empty());
 
     Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
-    IRBuilder<> PHIBuilder(&PN);
+    IRBuilderTy PHIBuilder(&PN);
     PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
                                           PN.getName() + ".sroa.speculated");
 
     // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
-    // matter which one we get and if any differ, it doesn't matter.
+    // matter which one we get and if any differ.
     LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
     MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
     unsigned Align = SomeLoad->getAlignment();
@@ -1582,7 +1570,7 @@ private:
       TerminatorInst *TI = Pred->getTerminator();
       Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
       Value *InVal = PN.getIncomingValue(Idx);
-      IRBuilder<> PredBuilder(TI);
+      IRBuilderTy PredBuilder(TI);
 
       LoadInst *Load
         = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1609,8 +1597,8 @@ private:
       // inside the load.
       AllocaPartitioning::use_iterator UI
         = P.findPartitionUseForPHIOrSelectOperand(InUse);
-      assert(isa<PHINode>(*UI->U->getUser()));
-      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+      assert(isa<PHINode>(*UI->getUse()->getUser()));
+      UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
     }
     DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");
   }
@@ -1657,16 +1645,16 @@ private:
 
   void visitSelectInst(SelectInst &SI) {
     DEBUG(dbgs() << "    original: " << SI << "\n");
-    IRBuilder<> IRB(&SI);
 
     // If the select isn't safe to speculate, just use simple logic to emit it.
     SmallVector<LoadInst *, 4> Loads;
     if (!isSafeSelectToSpeculate(SI, Loads))
       return;
 
+    IRBuilderTy IRB(&SI);
     Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
     AllocaPartitioning::iterator PIs[2];
-    AllocaPartitioning::PartitionUse PUs[2];
+    PartitionUse PUs[2];
     for (unsigned i = 0, e = 2; i != e; ++i) {
       PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
       if (PIs[i] != P.end()) {
@@ -1677,7 +1665,7 @@ private:
         PUs[i] = *UI;
         // Clear out the use here so that the offsets into the use list remain
         // stable but this use is ignored when rewriting.
-        UI->U = 0;
+        UI->setUse(0);
       }
     }
 
@@ -1709,8 +1697,8 @@ private:
       for (unsigned i = 0, e = 2; i != e; ++i) {
         if (PIs[i] != P.end()) {
           Use *LoadUse = &Loads[i]->getOperandUse(0);
-          assert(PUs[i].U->get() == LoadUse->get());
-          PUs[i].U = LoadUse;
+          assert(PUs[i].getUse()->get() == LoadUse->get());
+          PUs[i].setUse(LoadUse);
           P.use_push_back(PIs[i], PUs[i]);
         }
       }
@@ -1723,51 +1711,12 @@ private:
 };
 }
 
-/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.
-///
-/// If the provided GEP is all-constant, the total byte offset formed by the
-/// GEP is computed and Offset is set to it. If the GEP has any non-constant
-/// operands, the function returns false and the value of Offset is unmodified.
-static bool accumulateGEPOffsets(const DataLayout &TD, GEPOperator &GEP,
-                                 APInt &Offset) {
-  APInt GEPOffset(Offset.getBitWidth(), 0);
-  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
-       GTI != GTE; ++GTI) {
-    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
-    if (!OpC)
-      return false;
-    if (OpC->isZero()) continue;
-
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = TD.getStructLayout(STy);
-      GEPOffset += APInt(Offset.getBitWidth(),
-                         SL->getElementOffset(ElementIdx));
-      continue;
-    }
-
-    APInt TypeSize(Offset.getBitWidth(),
-                   TD.getTypeAllocSize(GTI.getIndexedType()));
-    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
-      assert((VTy->getScalarSizeInBits() % 8) == 0 &&
-             "vector element size is not a multiple of 8, cannot GEP over it");
-      TypeSize = VTy->getScalarSizeInBits() / 8;
-    }
-
-    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;
-  }
-  Offset = GEPOffset;
-  return true;
-}
-
 /// \brief Build a GEP out of a base pointer and indices.
 ///
 /// This will return the BasePtr if that is valid, or build a new GEP
 /// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
-                       SmallVectorImpl<Value *> &Indices,
-                       const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+                       SmallVectorImpl<Value *> &Indices) {
   if (Indices.empty())
     return BasePtr;
 
@@ -1776,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+  return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
 }
 
 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1788,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
 /// TargetTy. If we can't find one with the same type, we at least try to use
 /// one with the same size. If none of that works, we just produce the GEP as
 /// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
                                     Value *BasePtr, Type *Ty, Type *TargetTy,
-                                    SmallVectorImpl<Value *> &Indices,
-                                    const Twine &Prefix) {
+                                    SmallVectorImpl<Value *> &Indices) {
   if (Ty == TargetTy)
-    return buildGEP(IRB, BasePtr, Indices, Prefix);
+    return buildGEP(IRB, BasePtr, Indices);
 
   // See if we can descend into a struct and locate a field with the correct
   // type.
@@ -1820,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
   if (ElementTy != TargetTy)
     Indices.erase(Indices.end() - NumLayers, Indices.end());
 
-  return buildGEP(IRB, BasePtr, Indices, Prefix);
+  return buildGEP(IRB, BasePtr, Indices);
 }
 
 /// \brief Recursively compute indices for a natural GEP.
 ///
 /// This is the recursive step for getNaturalGEPWithOffset that walks down the
 /// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
                                        Value *Ptr, Type *Ty, APInt &Offset,
                                        Type *TargetTy,
-                                       SmallVectorImpl<Value *> &Indices,
-                                       const Twine &Prefix) {
+                                       SmallVectorImpl<Value *> &Indices) {
   if (Offset == 0)
-    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+    return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
 
   // We can't recurse through pointer types.
   if (Ty->isPointerTy())
@@ -1843,7 +1790,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
   // extremely poorly defined currently. The long-term goal is to remove GEPing
   // over a vector from the IR completely.
   if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
-    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
+    unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType());
     if (ElementSizeInBits % 8)
       return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
     APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
@@ -1853,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
     Offset -= NumSkippedElements * ElementSize;
     Indices.push_back(IRB.getInt(NumSkippedElements));
     return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
-                                    Offset, TargetTy, Indices, Prefix);
+                                    Offset, TargetTy, Indices);
   }
 
   if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1866,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
     Offset -= NumSkippedElements * ElementSize;
     Indices.push_back(IRB.getInt(NumSkippedElements));
     return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                    Indices, Prefix);
+                                    Indices);
   }
 
   StructType *STy = dyn_cast<StructType>(Ty);
@@ -1885,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
 
   Indices.push_back(IRB.getInt32(Index));
   return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, Prefix);
+                                  Indices);
 }
 
 /// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1898,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
 /// Indices, and setting Ty to the result subtype.
 ///
 /// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
                                       Value *Ptr, APInt Offset, Type *TargetTy,
-                                      SmallVectorImpl<Value *> &Indices,
-                                      const Twine &Prefix) {
+                                      SmallVectorImpl<Value *> &Indices) {
   PointerType *Ty = cast<PointerType>(Ptr->getType());
 
   // Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1920,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
   Offset -= NumSkippedElements * ElementSize;
   Indices.push_back(IRB.getInt(NumSkippedElements));
   return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
-                                  Indices, Prefix);
+                                  Indices);
 }
 
 /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1935,12 +1881,11 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
 /// The strategy for finding the more natural GEPs is to peel off layers of the
 /// pointer, walking back through bit casts and GEPs, searching for a base
 /// pointer from which we can compute a natural GEP with the desired
-/// properities. The algorithm tries to fold as many constant indices into
+/// properties. The algorithm tries to fold as many constant indices into
 /// a single GEP as possible, thus making each GEP more independent of the
 /// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
-                             Value *Ptr, APInt Offset, Type *PointerTy,
-                             const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+                             Value *Ptr, APInt Offset, Type *PointerTy) {
   // Even though we don't look through PHI nodes, we could be called on an
   // instruction in an unreachable block, which may be on a cycle.
   SmallPtrSet<Value *, 4> Visited;
@@ -1963,7 +1908,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
     // First fold any existing GEPs into the offset.
     while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
       APInt GEPOffset(Offset.getBitWidth(), 0);
-      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
+      if (!GEP->accumulateConstantOffset(TD, GEPOffset))
         break;
       Offset += GEPOffset;
       Ptr = GEP->getPointerOperand();
@@ -1974,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
     // See if we can perform a natural GEP here.
     Indices.clear();
     if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
-                                           Indices, Prefix)) {
+                                           Indices)) {
       if (P->getType() == PointerTy) {
         // Zap any offset pointer that we ended up computing in previous rounds.
         if (OffsetPtr && OffsetPtr->use_empty())
@@ -2009,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
   if (!OffsetPtr) {
     if (!Int8Ptr) {
       Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
-                                  Prefix + ".raw_cast");
+                                  "raw_cast");
       Int8PtrOffset = Offset;
     }
 
     OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
       IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
-                            Prefix + ".raw_idx");
+                            "raw_idx");
   }
   Ptr = OffsetPtr;
 
   // On the off chance we were targeting i8*, guard the bitcast here.
   if (Ptr->getType() != PointerTy)
-    Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+    Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
 
   return Ptr;
 }
@@ -2035,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
 static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
   if (OldTy == NewTy)
     return true;
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+      if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+        return true;
   if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
     return false;
   if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -2057,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
 /// This will try various different casting techniques, such as bitcasts,
 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
 /// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                            Type *Ty) {
   assert(canConvertValue(DL, V->getType(), Ty) &&
          "Value not convertable to type");
   if (V->getType() == Ty)
     return V;
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+      if (NewITy->getBitWidth() > OldITy->getBitWidth())
+        return IRB.CreateZExt(V, NewITy);
   if (V->getType()->isIntegerTy() && Ty->isPointerTy())
     return IRB.CreateIntToPtr(V, Ty);
   if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -2090,19 +2043,19 @@ static bool isVectorPromotionViable(const DataLayout &TD,
   if (!Ty)
     return false;
 
-  uint64_t VecSize = TD.getTypeSizeInBits(Ty);
-  uint64_t ElementSize = Ty->getScalarSizeInBits();
+  uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType());
 
   // While the definition of LLVM vectors is bitpacked, we don't support sizes
   // that aren't byte sized.
   if (ElementSize % 8)
     return false;
-  assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");
-  VecSize /= 8;
+  assert((TD.getTypeSizeInBits(Ty) % 8) == 0 &&
+         "vector size not a multiple of element size?");
   ElementSize /= 8;
 
   for (; I != E; ++I) {
-    if (!I->U)
+    Use *U = I->getUse();
+    if (!U)
       continue; // Skip dead use.
 
     uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -2116,30 +2069,34 @@ static bool isVectorPromotionViable(const DataLayout &TD,
         EndIndex > Ty->getNumElements())
       return false;
 
-    // FIXME: We should build shuffle vector instructions to handle
-    // non-element-sized accesses.
-    if ((EndOffset - BeginOffset) != ElementSize &&
-        (EndOffset - BeginOffset) != VecSize)
-      return false;
+    assert(EndIndex > BeginIndex && "Empty vector!");
+    uint64_t NumElements = EndIndex - BeginIndex;
+    Type *PartitionTy
+      = (NumElements == 1) ? Ty->getElementType()
+                           : VectorType::get(Ty->getElementType(), NumElements);
 
-    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
       if (MI->isVolatile())
         return false;
-      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
         const AllocaPartitioning::MemTransferOffsets &MTO
           = P.getMemTransferOffsets(*MTI);
         if (!MTO.IsSplittable)
           return false;
       }
-    } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+    } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
       // Disable vector promotion when there are loads or stores of an FCA.
       return false;
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
       if (LI->isVolatile())
         return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+      if (!canConvertValue(TD, PartitionTy, LI->getType()))
+        return false;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
       if (SI->isVolatile())
         return false;
+      if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
+        return false;
     } else {
       return false;
     }
@@ -2178,13 +2135,14 @@ static bool isIntegerWideningViable(const DataLayout &TD,
 
   uint64_t Size = TD.getTypeStoreSize(AllocaTy);
 
-  // Check the uses to ensure the uses are (likely) promoteable integer uses.
+  // Check the uses to ensure the uses are (likely) promotable integer uses.
   // Also ensure that the alloca has a covering load or store. We don't want
-  // to widen the integer operotains only to fail to promote due to some other
+  // to widen the integer operations only to fail to promote due to some other
   // unsplittable entry (which we may make splittable later).
   bool WholeAllocaOp = false;
   for (; I != E; ++I) {
-    if (!I->U)
+    Use *U = I->getUse();
+    if (!U)
       continue; // Skip dead use.
 
     uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2195,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
     if (RelEnd > Size)
       return false;
 
-    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
       if (LI->isVolatile())
         return false;
       if (RelBegin == 0 && RelEnd == Size)
@@ -2210,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
       if (RelBegin != 0 || RelEnd != Size ||
           !canConvertValue(TD, AllocaTy, LI->getType()))
         return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
       Type *ValueTy = SI->getValueOperand()->getType();
       if (SI->isVolatile())
         return false;
@@ -2226,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
       if (RelBegin != 0 || RelEnd != Size ||
           !canConvertValue(TD, ValueTy, AllocaTy))
         return false;
-    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
-      if (MI->isVolatile())
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
+      if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
         return false;
-      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
         const AllocaPartitioning::MemTransferOffsets &MTO
           = P.getMemTransferOffsets(*MTI);
         if (!MTO.IsSplittable)
           return false;
       }
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
       if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
           II->getIntrinsicID() != Intrinsic::lifetime_end)
         return false;
@@ -2246,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
   return WholeAllocaOp;
 }
 
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
                              IntegerType *Ty, uint64_t Offset,
                              const Twine &Name) {
   DEBUG(dbgs() << "       start: " << *V << "\n");
@@ -2269,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
   return V;
 }
 
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
                             Value *V, uint64_t Offset, const Twine &Name) {
   IntegerType *IntTy = cast<IntegerType>(Old->getType());
   IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2300,6 +2258,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
   return V;
 }
 
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
+                            unsigned BeginIndex, unsigned EndIndex,
+                            const Twine &Name) {
+  VectorType *VecTy = cast<VectorType>(V->getType());
+  unsigned NumElements = EndIndex - BeginIndex;
+  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+  if (NumElements == VecTy->getNumElements())
+    return V;
+
+  if (NumElements == 1) {
+    V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+                                 Name + ".extract");
+    DEBUG(dbgs() << "     extract: " << *V << "\n");
+    return V;
+  }
+
+  SmallVector<Constant*, 8> Mask;
+  Mask.reserve(NumElements);
+  for (unsigned i = BeginIndex; i != EndIndex; ++i)
+    Mask.push_back(IRB.getInt32(i));
+  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+                              ConstantVector::get(Mask),
+                              Name + ".extract");
+  DEBUG(dbgs() << "     shuffle: " << *V << "\n");
+  return V;
+}
+
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
+                           unsigned BeginIndex, const Twine &Name) {
+  VectorType *VecTy = cast<VectorType>(Old->getType());
+  assert(VecTy && "Can only insert a vector into a vector");
+
+  VectorType *Ty = dyn_cast<VectorType>(V->getType());
+  if (!Ty) {
+    // Single element to insert.
+    V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
+                                Name + ".insert");
+    DEBUG(dbgs() <<  "     insert: " << *V << "\n");
+    return V;
+  }
+
+  assert(Ty->getNumElements() <= VecTy->getNumElements() &&
+         "Too many elements!");
+  if (Ty->getNumElements() == VecTy->getNumElements()) {
+    assert(V->getType() == VecTy && "Vector type mismatch");
+    return V;
+  }
+  unsigned EndIndex = BeginIndex + Ty->getNumElements();
+
+  // When inserting a smaller vector into the larger to store, we first
+  // use a shuffle vector to widen it with undef elements, and then
+  // a second shuffle vector to select between the loaded vector and the
+  // incoming vector.
+  SmallVector<Constant*, 8> Mask;
+  Mask.reserve(VecTy->getNumElements());
+  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+    if (i >= BeginIndex && i < EndIndex)
+      Mask.push_back(IRB.getInt32(i - BeginIndex));
+    else
+      Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+                              ConstantVector::get(Mask),
+                              Name + ".expand");
+  DEBUG(dbgs() << "    shuffle1: " << *V << "\n");
+
+  Mask.clear();
+  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+    if (i >= BeginIndex && i < EndIndex)
+      Mask.push_back(IRB.getInt32(i));
+    else
+      Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+  V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
+                              Name + "insert");
+  DEBUG(dbgs() << "    shuffle2: " << *V << "\n");
+  return V;
+}
+
 namespace {
 /// \brief Visitor to rewrite instructions using a partition of an alloca to
 /// use a new alloca.
@@ -2321,7 +2357,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
   // If we are rewriting an alloca partition which can be written as pure
   // vector operations, we stash extra information here. When VecTy is
-  // non-null, we have some strict guarantees about the rewriten alloca:
+  // non-null, we have some strict guarantees about the rewritten alloca:
   //   - The new alloca is exactly the size of the vector type here.
   //   - The accesses all either map to the entire vector or to a single
   //     element.
@@ -2340,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
   // The offset of the partition user currently being rewritten.
   uint64_t BeginOffset, EndOffset;
+  bool IsSplit;
   Use *OldUse;
   Instruction *OldPtr;
 
-  // The name prefix to use when rewriting instructions for this alloca.
-  std::string NamePrefix;
+  // Utility IR builder, whose name prefix is setup for each visited use, and
+  // the insertion point is set to point to the user.
+  IRBuilderTy IRB;
 
 public:
   AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2357,7 +2395,8 @@ public:
       NewAllocaEndOffset(NewEndOffset),
       NewAllocaTy(NewAI.getAllocatedType()),
       VecTy(), ElementTy(), ElementSize(), IntTy(),
-      BeginOffset(), EndOffset() {
+      BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+      IRB(NewAI.getContext(), ConstantFolder()) {
   }
 
   /// \brief Visit the users of the alloca partition and rewrite them.
@@ -2369,9 +2408,9 @@ public:
       ++NumVectorized;
       VecTy = cast<VectorType>(NewAI.getAllocatedType());
       ElementTy = VecTy->getElementType();
-      assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
+      assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 &&
              "Only multiple-of-8 sized vector elements are viable");
-      ElementSize = VecTy->getScalarSizeInBits() / 8;
+      ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8;
     } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
                                        NewAllocaBeginOffset, P, I, E)) {
       IntTy = Type::getIntNTy(NewAI.getContext(),
@@ -2379,14 +2418,21 @@ public:
     }
     bool CanSROA = true;
     for (; I != E; ++I) {
-      if (!I->U)
+      if (!I->getUse())
         continue; // Skip dead uses.
       BeginOffset = I->BeginOffset;
       EndOffset = I->EndOffset;
-      OldUse = I->U;
-      OldPtr = cast<Instruction>(I->U->get());
-      NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
-      CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+      IsSplit = I->isSplit();
+      OldUse = I->getUse();
+      OldPtr = cast<Instruction>(OldUse->get());
+
+      Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+      IRB.SetInsertPoint(OldUserI);
+      IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+      IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+                        ".");
+
+      CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
     }
     if (VecTy) {
       assert(CanSROA);
@@ -2408,14 +2454,10 @@ private:
     llvm_unreachable("No rewrite rule for this instruction!");
   }
 
-  Twine getName(const Twine &Suffix) {
-    return NamePrefix + Suffix;
-  }
-
-  Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+  Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
     assert(BeginOffset >= NewAllocaBeginOffset);
     APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
-    return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+    return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
   }
 
   /// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2450,13 +2492,13 @@ private:
     return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
   }
 
-  ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
+  unsigned getIndex(uint64_t Offset) {
     assert(VecTy && "Can only call getIndex when rewriting a vector");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
     uint32_t Index = RelOffset / ElementSize;
     assert(Index * ElementSize == RelOffset);
-    return IRB.getInt32(Index);
+    return Index;
   }
 
   void deleteIfTriviallyDead(Value *V) {
@@ -2465,28 +2507,27 @@ private:
       Pass.DeadInsts.insert(I);
   }
 
-  Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
+  Value *rewriteVectorizedLoadInst() {
+    unsigned BeginIndex = getIndex(BeginOffset);
+    unsigned EndIndex = getIndex(EndOffset);
+    assert(EndIndex > BeginIndex && "Empty vector!");
+
     Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
-    if (LI.getType() == VecTy->getElementType() ||
-        BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
-      V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset),
-                                   getName(".extract"));
-    }
-    return V;
+                                     "load");
+    return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
   }
 
-  Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+  Value *rewriteIntegerLoad(LoadInst &LI) {
     assert(IntTy && "We cannot insert an integer to the alloca");
     assert(!LI.isVolatile());
     Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
+                                     "load");
     V = convertValue(TD, IRB, V, IntTy);
     assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
     if (Offset > 0 || EndOffset < NewAllocaEndOffset)
       V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
-                         getName(".extract"));
+                         "extract");
     return V;
   }
 
@@ -2494,58 +2535,39 @@ private:
     DEBUG(dbgs() << "    original: " << LI << "\n");
     Value *OldOp = LI.getOperand(0);
     assert(OldOp == OldPtr);
-    IRBuilder<> IRB(&LI);
 
     uint64_t Size = EndOffset - BeginOffset;
-    bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of the original allocation it's behavior is undefined. Rather
-    // than trying to transform it, just replace it with undef.
-    // FIXME: We should do something more clever for functions being
-    // instrumented by asan.
-    // FIXME: Eventually, once ASan and friends can flush out bugs here, this
-    // should be transformed to a load of null making it unreachable.
-    uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
-    if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
-      LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
-      Pass.DeadInsts.insert(&LI);
-      deleteIfTriviallyDead(OldOp);
-      DEBUG(dbgs() << "          to: undef!!\n");
-      return true;
-    }
 
-    Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
-                                    : LI.getType();
+    Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+                             : LI.getType();
     bool IsPtrAdjusted = false;
     Value *V;
     if (VecTy) {
-      V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+      V = rewriteVectorizedLoadInst();
     } else if (IntTy && LI.getType()->isIntegerTy()) {
-      V = rewriteIntegerLoad(IRB, LI);
+      V = rewriteIntegerLoad(LI);
     } else if (BeginOffset == NewAllocaBeginOffset &&
                canConvertValue(TD, NewAllocaTy, LI.getType())) {
       V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                LI.isVolatile(), getName(".load"));
+                                LI.isVolatile(), "load");
     } else {
       Type *LTy = TargetTy->getPointerTo();
       V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
                                 getPartitionTypeAlign(TargetTy),
-                                LI.isVolatile(), getName(".load"));
+                                LI.isVolatile(), "load");
       IsPtrAdjusted = true;
     }
     V = convertValue(TD, IRB, V, TargetTy);
 
-    if (IsSplitIntLoad) {
+    if (IsSplit) {
       assert(!LI.isVolatile());
       assert(LI.getType()->isIntegerTy() &&
              "Only integer type loads and stores are split");
+      assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+             "Split load isn't smaller than original load");
       assert(LI.getType()->getIntegerBitWidth() ==
              TD.getTypeStoreSizeInBits(LI.getType()) &&
              "Non-byte-multiple bit width");
-      assert(LI.getType()->getIntegerBitWidth() ==
-             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
-             "Only alloca-wide loads can be split and recomposed");
       // Move the insertion point just past the load so that we can refer to it.
       IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
       // Create a placeholder value with the same type as LI to use as the
@@ -2555,7 +2577,7 @@ private:
       Value *Placeholder
         = new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
       V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
-                        getName(".insert"));
+                        "insert");
       LI.replaceAllUsesWith(V);
       Placeholder->replaceAllUsesWith(&LI);
       delete Placeholder;
@@ -2569,19 +2591,24 @@ private:
     return !LI.isVolatile() && !IsPtrAdjusted;
   }
 
-  bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+  bool rewriteVectorizedStoreInst(Value *V,
                                   StoreInst &SI, Value *OldOp) {
-    if (V->getType() == ElementTy ||
-        BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
-      if (V->getType() != ElementTy)
-        V = convertValue(TD, IRB, V, ElementTy);
-      LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                           getName(".load"));
-      V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
-                                  getName(".insert"));
-    } else if (V->getType() != VecTy) {
-      V = convertValue(TD, IRB, V, VecTy);
-    }
+    unsigned BeginIndex = getIndex(BeginOffset);
+    unsigned EndIndex = getIndex(EndOffset);
+    assert(EndIndex > BeginIndex && "Empty vector!");
+    unsigned NumElements = EndIndex - BeginIndex;
+    assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+    Type *PartitionTy
+      = (NumElements == 1) ? ElementTy
+                           : VectorType::get(ElementTy, NumElements);
+    if (V->getType() != PartitionTy)
+      V = convertValue(TD, IRB, V, PartitionTy);
+
+    // Mix in the existing elements.
+    Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                       "load");
+    V = insertVector(IRB, Old, V, BeginIndex, "vec");
+
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.insert(&SI);
 
@@ -2590,17 +2617,17 @@ private:
     return true;
   }
 
-  bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+  bool rewriteIntegerStore(Value *V, StoreInst &SI) {
     assert(IntTy && "We cannot extract an integer from the alloca");
     assert(!SI.isVolatile());
     if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
+                                         "oldload");
       Old = convertValue(TD, IRB, Old, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
       V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
-                        getName(".insert"));
+                        "insert");
     }
     V = convertValue(TD, IRB, V, NewAllocaTy);
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2614,7 +2641,6 @@ private:
     DEBUG(dbgs() << "    original: " << SI << "\n");
     Value *OldOp = SI.getOperand(1);
     assert(OldOp == OldPtr);
-    IRBuilder<> IRB(&SI);
 
     Value *V = SI.getValueOperand();
 
@@ -2627,23 +2653,21 @@ private:
     uint64_t Size = EndOffset - BeginOffset;
     if (Size < TD.getTypeStoreSize(V->getType())) {
       assert(!SI.isVolatile());
+      assert(IsSplit && "A seemingly split store isn't splittable");
       assert(V->getType()->isIntegerTy() &&
              "Only integer type loads and stores are split");
       assert(V->getType()->getIntegerBitWidth() ==
              TD.getTypeStoreSizeInBits(V->getType()) &&
              "Non-byte-multiple bit width");
-      assert(V->getType()->getIntegerBitWidth() ==
-             TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
-             "Only alloca-wide stores can be split and recomposed");
       IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
       V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
-                         getName(".extract"));
+                         "extract");
     }
 
     if (VecTy)
-      return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+      return rewriteVectorizedStoreInst(V, SI, OldOp);
     if (IntTy && V->getType()->isIntegerTy())
-      return rewriteIntegerStore(IRB, V, SI);
+      return rewriteIntegerStore(V, SI);
 
     StoreInst *NewSI;
     if (BeginOffset == NewAllocaBeginOffset &&
@@ -2665,9 +2689,42 @@ private:
     return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
   }
 
+  /// \brief Compute an integer value from splatting an i8 across the given
+  /// number of bytes.
+  ///
+  /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
+  /// call this routine.
+  /// FIXME: Heed the advice above.
+  ///
+  /// \param V The i8 value to splat.
+  /// \param Size The number of bytes in the output (assuming i8 is one byte)
+  Value *getIntegerSplat(Value *V, unsigned Size) {
+    assert(Size > 0 && "Expected a positive number of bytes.");
+    IntegerType *VTy = cast<IntegerType>(V->getType());
+    assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
+    if (Size == 1)
+      return V;
+
+    Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+    V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
+                      ConstantExpr::getUDiv(
+                        Constant::getAllOnesValue(SplatIntTy),
+                        ConstantExpr::getZExt(
+                          Constant::getAllOnesValue(V->getType()),
+                          SplatIntTy)),
+                      "isplat");
+    return V;
+  }
+
+  /// \brief Compute a vector splat for a given element value.
+  Value *getVectorSplat(Value *V, unsigned NumElements) {
+    V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
+    DEBUG(dbgs() << "       splat: " << *V << "\n");
+    return V;
+  }
+
   bool visitMemSetInst(MemSetInst &II) {
     DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
     assert(II.getRawDest() == OldPtr);
 
     // If the memset has a variable size, it cannot be split, just adjust the
@@ -2693,7 +2750,8 @@ private:
         (BeginOffset != NewAllocaBeginOffset ||
          EndOffset != NewAllocaEndOffset ||
          !AllocaTy->isSingleValueType() ||
-         !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+         !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
+         TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
       CallInst *New
@@ -2709,53 +2767,61 @@ private:
     // If we can represent this as a simple value, we have to build the actual
     // value to store, which requires expanding the byte present in memset to
     // a sensible representation for the alloca type. This is essentially
-    // splatting the byte to a sufficiently wide integer, bitcasting to the
-    // desired scalar type, and splatting it across any desired vector type.
-    uint64_t Size = EndOffset - BeginOffset;
-    Value *V = II.getValue();
-    IntegerType *VTy = cast<IntegerType>(V->getType());
-    Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
-    if (Size*8 > VTy->getBitWidth())
-      V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
-                        ConstantExpr::getUDiv(
-                          Constant::getAllOnesValue(SplatIntTy),
-                          ConstantExpr::getZExt(
-                            Constant::getAllOnesValue(V->getType()),
-                            SplatIntTy)),
-                        getName(".isplat"));
-
-    // If this is an element-wide memset of a vectorizable alloca, insert it.
-    if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
-                  EndOffset < NewAllocaEndOffset)) {
-      if (V->getType() != ScalarTy)
-        V = convertValue(TD, IRB, V, ScalarTy);
-      StoreInst *Store = IRB.CreateAlignedStore(
-        IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
-                                                      NewAI.getAlignment(),
-                                                      getName(".load")),
-                                V, getIndex(IRB, BeginOffset),
-                                getName(".insert")),
-        &NewAI, NewAI.getAlignment());
-      (void)Store;
-      DEBUG(dbgs() << "          to: " << *Store << "\n");
-      return true;
-    }
+    // splatting the byte to a sufficiently wide integer, splatting it across
+    // any desired vector width, and bitcasting to the final type.
+    Value *V;
+
+    if (VecTy) {
+      // If this is a memset of a vectorized alloca, insert it.
+      assert(ElementTy == ScalarTy);
+
+      unsigned BeginIndex = getIndex(BeginOffset);
+      unsigned EndIndex = getIndex(EndOffset);
+      assert(EndIndex > BeginIndex && "Empty vector!");
+      unsigned NumElements = EndIndex - BeginIndex;
+      assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+      Value *Splat =
+          getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
+      Splat = convertValue(TD, IRB, Splat, ElementTy);
+      if (NumElements > 1)
+        Splat = getVectorSplat(Splat, NumElements);
 
-    // If this is a memset on an alloca where we can widen stores, insert the
-    // set integer.
-    if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
-                  EndOffset < NewAllocaEndOffset)) {
-      assert(!II.isVolatile());
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
-      Old = convertValue(TD, IRB, Old, IntTy);
-      assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
-      uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
-    }
+                                         "oldload");
+      V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
+    } else if (IntTy) {
+      // If this is a memset on an alloca where we can widen stores, insert the
+      // set integer.
+      assert(!II.isVolatile());
+
+      uint64_t Size = EndOffset - BeginOffset;
+      V = getIntegerSplat(II.getValue(), Size);
+
+      if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
+                    EndOffset != NewAllocaBeginOffset)) {
+        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                           "oldload");
+        Old = convertValue(TD, IRB, Old, IntTy);
+        assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+        uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+        V = insertInteger(TD, IRB, Old, V, Offset, "insert");
+      } else {
+        assert(V->getType() == IntTy &&
+               "Wrong type for an alloca wide integer!");
+      }
+      V = convertValue(TD, IRB, V, AllocaTy);
+    } else {
+      // Established these invariants above.
+      assert(BeginOffset == NewAllocaBeginOffset);
+      assert(EndOffset == NewAllocaEndOffset);
+
+      V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
+      if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
+        V = getVectorSplat(V, AllocaVecTy->getNumElements());
 
-    if (V->getType() != AllocaTy)
       V = convertValue(TD, IRB, V, AllocaTy);
+    }
 
     Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                         II.isVolatile());
@@ -2769,7 +2835,6 @@ private:
     // them into two categories: split intrinsics and unsplit intrinsics.
 
     DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
 
     assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
     bool IsDest = II.getRawDest() == OldPtr;
@@ -2840,37 +2905,21 @@ private:
     // Record this instruction for deletion.
     Pass.DeadInsts.insert(&II);
 
-    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
-                         EndOffset == NewAllocaEndOffset;
-    bool IsVectorElement = VecTy && !IsWholeAlloca;
-    uint64_t Size = EndOffset - BeginOffset;
-    IntegerType *SubIntTy
-      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
-
-    Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
-                              : II.getRawDest()->getType();
-    if (!EmitMemCpy) {
-      if (IsVectorElement)
-        OtherPtrTy = VecTy->getElementType()->getPointerTo();
-      else if (IntTy && !IsWholeAlloca)
-        OtherPtrTy = SubIntTy->getPointerTo();
-      else
-        OtherPtrTy = NewAI.getType();
-    }
-
-    // Compute the other pointer, folding as much as possible to produce
-    // a single, simple GEP in most cases.
-    Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
-    OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
-                              getName("." + OtherPtr->getName()));
-
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
     // alloca that should be re-examined after rewriting this instruction.
+    Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
     if (AllocaInst *AI
           = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
       Pass.Worklist.insert(AI);
 
     if (EmitMemCpy) {
+      Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+                                : II.getRawDest()->getType();
+
+      // Compute the other pointer, folding as much as possible to produce
+      // a single, simple GEP in most cases.
+      OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
+
       Value *OurPtr
         = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
                                            : II.getRawSource()->getType());
@@ -2891,48 +2940,63 @@ private:
     if (!Align)
       Align = 1;
 
-    Value *SrcPtr = OtherPtr;
+    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+                         EndOffset == NewAllocaEndOffset;
+    uint64_t Size = EndOffset - BeginOffset;
+    unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
+    unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+    unsigned NumElements = EndIndex - BeginIndex;
+    IntegerType *SubIntTy
+      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+    Type *OtherPtrTy = NewAI.getType();
+    if (VecTy && !IsWholeAlloca) {
+      if (NumElements == 1)
+        OtherPtrTy = VecTy->getElementType();
+      else
+        OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
+
+      OtherPtrTy = OtherPtrTy->getPointerTo();
+    } else if (IntTy && !IsWholeAlloca) {
+      OtherPtrTy = SubIntTy->getPointerTo();
+    }
+
+    Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
     Value *DstPtr = &NewAI;
     if (!IsDest)
       std::swap(SrcPtr, DstPtr);
 
     Value *Src;
-    if (IsVectorElement && !IsDest) {
-      // We have to extract rather than load.
-      Src = IRB.CreateExtractElement(
-        IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
-        getIndex(IRB, BeginOffset),
-        getName(".copyextract"));
+    if (VecTy && !IsWholeAlloca && !IsDest) {
+      Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                  "load");
+      Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
     } else if (IntTy && !IsWholeAlloca && !IsDest) {
       Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                  getName(".load"));
+                                  "load");
       Src = convertValue(TD, IRB, Src, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+      Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
     } else {
       Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
-                                  getName(".copyload"));
+                                  "copyload");
     }
 
-    if (IntTy && !IsWholeAlloca && IsDest) {
+    if (VecTy && !IsWholeAlloca && IsDest) {
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                         getName(".oldload"));
+                                         "oldload");
+      Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
+    } else if (IntTy && !IsWholeAlloca && IsDest) {
+      Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                         "oldload");
       Old = convertValue(TD, IRB, Old, IntTy);
       assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
       uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+      Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
       Src = convertValue(TD, IRB, Src, NewAllocaTy);
     }
 
-    if (IsVectorElement && IsDest) {
-      // We have to insert into a loaded copy before storing.
-      Src = IRB.CreateInsertElement(
-        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
-        Src, getIndex(IRB, BeginOffset),
-        getName(".insert"));
-    }
-
     StoreInst *Store = cast<StoreInst>(
       IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
     (void)Store;
@@ -2944,7 +3008,6 @@ private:
     assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
            II.getIntrinsicID() == Intrinsic::lifetime_end);
     DEBUG(dbgs() << "    original: " << II << "\n");
-    IRBuilder<> IRB(&II);
     assert(II.getArgOperand(1) == OldPtr);
 
     // Record this instruction for deletion.
@@ -2960,6 +3023,7 @@ private:
     else
       New = IRB.CreateLifetimeEnd(Ptr, Size);
 
+    (void)New;
     DEBUG(dbgs() << "          to: " << *New << "\n");
     return true;
   }
@@ -2971,7 +3035,9 @@ private:
     // as local as possible to the PHI. To do that, we re-use the location of
     // the old pointer, which necessarily must be in the right position to
     // dominate the PHI.
-    IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+    IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+    PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+                             ".");
 
     Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
     // Replace the operands which were using the old pointer.
@@ -2984,7 +3050,6 @@ private:
 
   bool visitSelectInst(SelectInst &SI) {
     DEBUG(dbgs() << "    original: " << SI << "\n");
-    IRBuilder<> IRB(&SI);
 
     // Find the operand we need to rewrite here.
     bool IsTrueVal = SI.getTrueValue() == OldPtr;
@@ -3059,7 +3124,7 @@ private:
   class OpSplitter {
   protected:
     /// The builder used to form new instructions.
-    IRBuilder<> IRB;
+    IRBuilderTy IRB;
     /// The indices which to be used with insert- or extractvalue to select the
     /// appropriate value within the aggregate.
     SmallVector<unsigned, 4> Indices;
@@ -3136,9 +3201,8 @@ private:
     void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
-      Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices,
-                                                         Name + ".gep"),
-                                   Name + ".load");
+      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+      Value *Load = IRB.CreateLoad(GEP, Name + ".load");
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       DEBUG(dbgs() << "          to: " << *Load << "\n");
     }
@@ -3272,12 +3336,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
     Type *ElementTy = SeqTy->getElementType();
     uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
     uint64_t NumSkippedElements = Offset / ElementSize;
-    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
       if (NumSkippedElements >= ArrTy->getNumElements())
         return 0;
-    if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+    } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
       if (NumSkippedElements >= VecTy->getNumElements())
         return 0;
+    }
     Offset -= NumSkippedElements * ElementSize;
 
     // First check if we need to recurse.
@@ -3375,7 +3440,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
   for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
                                         UE = P.use_end(PI);
        UI != UE && !IsLive; ++UI)
-    if (UI->U)
+    if (UI->getUse())
       IsLive = true;
   if (!IsLive)
     return false; // No live uses left of this partition.
@@ -3411,7 +3476,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
   // Check for the case where we're going to rewrite to a new alloca of the
   // exact same type as the original, and with the same access offsets. In that
   // case, re-use the existing alloca, but still run through the rewriter to
-  // performe phi and select speculation.
+  // perform phi and select speculation.
   AllocaInst *NewAI;
   if (AllocaTy == AI.getAllocatedType()) {
     assert(PI->BeginOffset == 0 &&
@@ -3578,7 +3643,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
 /// If there is a domtree available, we attempt to promote using the full power
 /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
 /// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occured.
+/// promotion occurred.
 bool SROA::promoteAllocas(Function &F) {
   if (PromotableAllocas.empty())
     return false;
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 39630fd027f0..8a9c7da113c1 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -13,14 +13,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm-c/Transforms/Scalar.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
 
 using namespace llvm;
 
@@ -50,11 +50,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerAtomicPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
-  initializeObjCARCAliasAnalysisPass(Registry);
-  initializeObjCARCAPElimPass(Registry);
-  initializeObjCARCExpandPass(Registry);
-  initializeObjCARCContractPass(Registry);
-  initializeObjCARCOptPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index a46d09c32093..e590a374eac2 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -21,32 +21,32 @@
 
 #define DEBUG_TYPE "scalarrepl"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9f24bb635e88..c243d34fd7db 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -23,19 +23,19 @@
 
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Attributes.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
@@ -48,12 +48,19 @@ namespace {
     }
 
     virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetTransformInfo>();
+    }
   };
 }
 
 char CFGSimplifyPass::ID = 0;
-INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
-                "Simplify the CFG", false, false)
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
+                      false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
+                    false, false)
 
 // Public interface to the CFGSimplification pass
 FunctionPass *llvm::createCFGSimplificationPass() {
@@ -111,13 +118,11 @@ static bool markAliveBlocks(BasicBlock *BB,
 
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
+  Reachable.insert(BB);
   bool Changed = false;
   do {
     BB = Worklist.pop_back_val();
 
-    if (!Reachable.insert(BB))
-      continue;
-
     // Do a quick scan of the basic block, turning any obviously unreachable
     // instructions into LLVM unreachable insts.  The instruction combining pass
     // canonicalizes unreachable insts into stores to null or undef.
@@ -176,7 +181,8 @@ static bool markAliveBlocks(BasicBlock *BB,
 
     Changed |= ConstantFoldTerminator(BB, true);
     for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-      Worklist.push_back(*SI);
+      if (Reachable.insert(*SI))
+        Worklist.push_back(*SI);
   } while (!Worklist.empty());
   return Changed;
 }
@@ -294,8 +300,8 @@ static bool mergeEmptyReturnBlocks(Function &F) {
 
 /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
-                                   const TargetTransformInfo *TTI) {
+static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
+                                   const DataLayout *TD) {
   bool Changed = false;
   bool LocalChange = true;
   while (LocalChange) {
@@ -304,7 +310,7 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
     // Loop over all of the basic blocks and remove them if they are unneeded...
     //
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
-      if (SimplifyCFG(BBIt++, TD, TTI)) {
+      if (SimplifyCFG(BBIt++, TTI, TD)) {
         LocalChange = true;
         ++NumSimpl;
       }
@@ -318,12 +324,11 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
 // simplify the CFG.
 //
 bool CFGSimplifyPass::runOnFunction(Function &F) {
+  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
   const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
-  const TargetTransformInfo *TTI =
-      getAnalysisIfAvailable<TargetTransformInfo>();
   bool EverChanged = removeUnreachableBlocksFromFn(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
-  EverChanged |= iterativelySimplifyCFG(F, TD, TTI);
+  EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
 
   // If neither pass changed anything, we're done.
   if (!EverChanged) return false;
@@ -337,7 +342,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
     return true;
 
   do {
-    EverChanged = iterativelySimplifyCFG(F, TD, TTI);
+    EverChanged = iterativelySimplifyCFG(F, TTI, TD);
     EverChanged |= removeUnreachableBlocksFromFn(F);
   } while (EverChanged);
 
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 17d07cdb2d4d..3514e6c2aadc 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -17,32 +17,24 @@
 
 #define DEBUG_TYPE "simplify-libcalls"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 using namespace llvm;
 
-STATISTIC(NumSimplified, "Number of library calls simplified");
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 
-static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
-                                   cl::init(false),
-                                   cl::desc("Enable unsafe double to float "
-                                            "shrinking for math lib calls"));
 //===----------------------------------------------------------------------===//
 // Optimizer Base Class
 //===----------------------------------------------------------------------===//
@@ -87,677 +79,6 @@ public:
 
 
 //===----------------------------------------------------------------------===//
-// Helper Functions
-//===----------------------------------------------------------------------===//
-
-static bool CallHasFloatingPointArgument(const CallInst *CI) {
-  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
-       it != e; ++it) {
-    if ((*it)->getType()->isFloatingPointTy())
-      return true;
-  }
-  return false;
-}
-
-namespace {
-//===----------------------------------------------------------------------===//
-// Math Library Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-
-struct UnaryDoubleFPOpt : public LibCallOptimization {
-  bool CheckRetType;
-  UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
-        !FT->getParamType(0)->isDoubleTy())
-      return 0;
-
-    if (CheckRetType) {
-      // Check if all the uses for function like 'sin' are converted to float.
-      for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
-          ++UseI) {
-        FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
-        if (Cast == 0 || !Cast->getType()->isFloatTy())
-          return 0;
-      }
-    }
-
-    // If this is something like 'floor((double)floatval)', convert to floorf.
-    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
-    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
-      return 0;
-
-    // floor((double)floatval) -> (double)floorf(floatval)
-    Value *V = Cast->getOperand(0);
-    V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
-    return B.CreateFPExt(V, B.getDoubleTy());
-  }
-};
-
-//===---------------------------------------===//
-// 'cos*' Optimizations
-struct CosOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    Value *Ret = NULL;
-    if (UnsafeFPShrink && Callee->getName() == "cos" &&
-        TLI->has(LibFunc::cosf)) {
-      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
-      Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
-    }
-
-    FunctionType *FT = Callee->getFunctionType();
-    // Just make sure this has 1 argument of FP type, which matches the
-    // result type.
-    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isFloatingPointTy())
-      return Ret;
-
-    // cos(-x) -> cos(x)
-    Value *Op1 = CI->getArgOperand(0);
-    if (BinaryOperator::isFNeg(Op1)) {
-      BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
-      return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
-    }
-    return Ret;
-  }
-};
-
-//===---------------------------------------===//
-// 'pow*' Optimizations
-
-struct PowOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    Value *Ret = NULL;
-    if (UnsafeFPShrink && Callee->getName() == "pow" &&
-        TLI->has(LibFunc::powf)) {
-      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
-      Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
-    }
-
-    FunctionType *FT = Callee->getFunctionType();
-    // Just make sure this has 2 arguments of the same FP type, which match the
-    // result type.
-    if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        !FT->getParamType(0)->isFloatingPointTy())
-      return Ret;
-
-    Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
-    if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-      if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
-        return Op1C;
-      if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
-        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
-    }
-
-    ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
-    if (Op2C == 0) return Ret;
-
-    if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
-      return ConstantFP::get(CI->getType(), 1.0);
-
-    if (Op2C->isExactlyValue(0.5)) {
-      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
-      // This is faster than calling pow, and still handles negative zero
-      // and negative infinity correctly.
-      // TODO: In fast-math mode, this could be just sqrt(x).
-      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
-      Value *Inf = ConstantFP::getInfinity(CI->getType());
-      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
-      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
-                                         Callee->getAttributes());
-      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
-                                         Callee->getAttributes());
-      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
-      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
-      return Sel;
-    }
-
-    if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
-      return Op1;
-    if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
-      return B.CreateFMul(Op1, Op1, "pow2");
-    if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
-      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
-                          Op1, "powrecip");
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'exp2' Optimizations
-
-struct Exp2Opt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    Value *Ret = NULL;
-    if (UnsafeFPShrink && Callee->getName() == "exp2" &&
-        TLI->has(LibFunc::exp2)) {
-      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
-      Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
-    }
-
-    FunctionType *FT = Callee->getFunctionType();
-    // Just make sure this has 1 argument of FP type, which matches the
-    // result type.
-    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isFloatingPointTy())
-      return Ret;
-
-    Value *Op = CI->getArgOperand(0);
-    // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
-    // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
-    Value *LdExpArg = 0;
-    if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
-      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
-        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
-    } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
-      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
-        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
-    }
-
-    if (LdExpArg) {
-      const char *Name;
-      if (Op->getType()->isFloatTy())
-        Name = "ldexpf";
-      else if (Op->getType()->isDoubleTy())
-        Name = "ldexp";
-      else
-        Name = "ldexpl";
-
-      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
-      if (!Op->getType()->isFloatTy())
-        One = ConstantExpr::getFPExtend(One, Op->getType());
-
-      Module *M = Caller->getParent();
-      Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                             Op->getType(),
-                                             B.getInt32Ty(), NULL);
-      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
-      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
-        CI->setCallingConv(F->getCallingConv());
-
-      return CI;
-    }
-    return Ret;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Integer Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// 'ffs*' Optimizations
-
-struct FFSOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    // Just make sure this has 2 arguments of the same FP type, which match the
-    // result type.
-    if (FT->getNumParams() != 1 ||
-        !FT->getReturnType()->isIntegerTy(32) ||
-        !FT->getParamType(0)->isIntegerTy())
-      return 0;
-
-    Value *Op = CI->getArgOperand(0);
-
-    // Constant fold.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
-      if (CI->isZero()) // ffs(0) -> 0.
-        return B.getInt32(0);
-      // ffs(c) -> cttz(c)+1
-      return B.getInt32(CI->getValue().countTrailingZeros() + 1);
-    }
-
-    // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
-    Type *ArgType = Op->getType();
-    Value *F = Intrinsic::getDeclaration(Callee->getParent(),
-                                         Intrinsic::cttz, ArgType);
-    Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
-    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
-    V = B.CreateIntCast(V, B.getInt32Ty(), false);
-
-    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
-    return B.CreateSelect(Cond, V, B.getInt32(0));
-  }
-};
-
-//===---------------------------------------===//
-// 'isdigit' Optimizations
-
-struct IsDigitOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    // We require integer(i32)
-    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
-        !FT->getParamType(0)->isIntegerTy(32))
-      return 0;
-
-    // isdigit(c) -> (c-'0') <u 10
-    Value *Op = CI->getArgOperand(0);
-    Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
-    Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
-    return B.CreateZExt(Op, CI->getType());
-  }
-};
-
-//===---------------------------------------===//
-// 'isascii' Optimizations
-
-struct IsAsciiOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    // We require integer(i32)
-    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
-        !FT->getParamType(0)->isIntegerTy(32))
-      return 0;
-
-    // isascii(c) -> c <u 128
-    Value *Op = CI->getArgOperand(0);
-    Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
-    return B.CreateZExt(Op, CI->getType());
-  }
-};
-
-//===---------------------------------------===//
-// 'abs', 'labs', 'llabs' Optimizations
-
-struct AbsOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    // We require integer(integer) where the types agree.
-    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
-        FT->getParamType(0) != FT->getReturnType())
-      return 0;
-
-    // abs(x) -> x >s -1 ? x : -x
-    Value *Op = CI->getArgOperand(0);
-    Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
-                                 "ispos");
-    Value *Neg = B.CreateNeg(Op, "neg");
-    return B.CreateSelect(Pos, Op, Neg);
-  }
-};
-
-
-//===---------------------------------------===//
-// 'toascii' Optimizations
-
-struct ToAsciiOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    // We require i32(i32)
-    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isIntegerTy(32))
-      return 0;
-
-    // isascii(c) -> c & 0x7f
-    return B.CreateAnd(CI->getArgOperand(0),
-                       ConstantInt::get(CI->getType(),0x7F));
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Formatting and IO Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// 'printf' Optimizations
-
-struct PrintFOpt : public LibCallOptimization {
-  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
-                                   IRBuilder<> &B) {
-    // Check for a fixed format string.
-    StringRef FormatStr;
-    if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
-      return 0;
-
-    // Empty format string -> noop.
-    if (FormatStr.empty())  // Tolerate printf's declared void.
-      return CI->use_empty() ? (Value*)CI :
-                               ConstantInt::get(CI->getType(), 0);
-
-    // Do not do any of the following transformations if the printf return value
-    // is used, in general the printf return value is not compatible with either
-    // putchar() or puts().
-    if (!CI->use_empty())
-      return 0;
-
-    // printf("x") -> putchar('x'), even for '%'.
-    if (FormatStr.size() == 1) {
-      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
-      if (CI->use_empty() || !Res) return Res;
-      return B.CreateIntCast(Res, CI->getType(), true);
-    }
-
-    // printf("foo\n") --> puts("foo")
-    if (FormatStr[FormatStr.size()-1] == '\n' &&
-        FormatStr.find('%') == std::string::npos) {  // no format characters.
-      // Create a string literal with no \n on it.  We expect the constant merge
-      // pass to be run after this pass, to merge duplicate strings.
-      FormatStr = FormatStr.drop_back();
-      Value *GV = B.CreateGlobalString(FormatStr, "str");
-      Value *NewCI = EmitPutS(GV, B, TD, TLI);
-      return (CI->use_empty() || !NewCI) ?
-              NewCI :
-              ConstantInt::get(CI->getType(), FormatStr.size()+1);
-    }
-
-    // Optimize specific format strings.
-    // printf("%c", chr) --> putchar(chr)
-    if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
-        CI->getArgOperand(1)->getType()->isIntegerTy()) {
-      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
-
-      if (CI->use_empty() || !Res) return Res;
-      return B.CreateIntCast(Res, CI->getType(), true);
-    }
-
-    // printf("%s\n", str) --> puts(str)
-    if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
-        CI->getArgOperand(1)->getType()->isPointerTy()) {
-      return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
-    }
-    return 0;
-  }
-
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require one fixed pointer argument and an integer/void result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
-        !(FT->getReturnType()->isIntegerTy() ||
-          FT->getReturnType()->isVoidTy()))
-      return 0;
-
-    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
-      return V;
-    }
-
-    // printf(format, ...) -> iprintf(format, ...) if no floating point
-    // arguments.
-    if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
-      Module *M = B.GetInsertBlock()->getParent()->getParent();
-      Constant *IPrintFFn =
-        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
-      CallInst *New = cast<CallInst>(CI->clone());
-      New->setCalledFunction(IPrintFFn);
-      B.Insert(New);
-      return New;
-    }
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'sprintf' Optimizations
-
-struct SPrintFOpt : public LibCallOptimization {
-  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
-                                   IRBuilder<> &B) {
-    // Check for a fixed format string.
-    StringRef FormatStr;
-    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
-      return 0;
-
-    // If we just have a format string (nothing else crazy) transform it.
-    if (CI->getNumArgOperands() == 2) {
-      // Make sure there's no % in the constant array.  We could try to handle
-      // %% -> % in the future if we cared.
-      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
-        if (FormatStr[i] == '%')
-          return 0; // we found a format specifier, bail out.
-
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
-      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
-                                      FormatStr.size() + 1), 1);   // nul byte.
-      return ConstantInt::get(CI->getType(), FormatStr.size());
-    }
-
-    // The remaining optimizations require the format string to be "%s" or "%c"
-    // and have an extra operand.
-    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
-        CI->getNumArgOperands() < 3)
-      return 0;
-
-    // Decode the second character of the format string.
-    if (FormatStr[1] == 'c') {
-      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
-      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
-      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
-      B.CreateStore(V, Ptr);
-      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
-      B.CreateStore(B.getInt8(0), Ptr);
-
-      return ConstantInt::get(CI->getType(), 1);
-    }
-
-    if (FormatStr[1] == 's') {
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
-
-      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
-      if (!Len)
-        return 0;
-      Value *IncLen = B.CreateAdd(Len,
-                                  ConstantInt::get(Len->getType(), 1),
-                                  "leninc");
-      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
-
-      // The sprintf result is the unincremented number of bytes in the string.
-      return B.CreateIntCast(Len, CI->getType(), false);
-    }
-    return 0;
-  }
-
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed pointer arguments and an integer result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
-      return V;
-    }
-
-    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
-    // point arguments.
-    if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
-      Module *M = B.GetInsertBlock()->getParent()->getParent();
-      Constant *SIPrintFFn =
-        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
-      CallInst *New = cast<CallInst>(CI->clone());
-      New->setCalledFunction(SIPrintFFn);
-      B.Insert(New);
-      return New;
-    }
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'fwrite' Optimizations
-
-struct FWriteOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require a pointer, an integer, an integer, a pointer, returning integer.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isIntegerTy() ||
-        !FT->getParamType(2)->isIntegerTy() ||
-        !FT->getParamType(3)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    // Get the element size and count.
-    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-    if (!SizeC || !CountC) return 0;
-    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
-
-    // If this is writing zero records, remove the call (it's a noop).
-    if (Bytes == 0)
-      return ConstantInt::get(CI->getType(), 0);
-
-    // If this is writing one byte, turn it into fputc.
-    // This optimisation is only valid, if the return value is unused.
-    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
-      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
-      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'fputs' Optimizations
-
-struct FPutsOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    // Require two pointers.  Also, we can't optimize if return value is used.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !CI->use_empty())
-      return 0;
-
-    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
-    uint64_t Len = GetStringLength(CI->getArgOperand(0));
-    if (!Len) return 0;
-    // Known to have no uses (see above).
-    return EmitFWrite(CI->getArgOperand(0),
-                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-                      CI->getArgOperand(1), B, TD, TLI);
-  }
-};
-
-//===---------------------------------------===//
-// 'fprintf' Optimizations
-
-struct FPrintFOpt : public LibCallOptimization {
-  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
-                                   IRBuilder<> &B) {
-    // All the optimizations depend on the format string.
-    StringRef FormatStr;
-    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
-      return 0;
-
-    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
-    if (CI->getNumArgOperands() == 2) {
-      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
-        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
-          return 0; // We found a format specifier.
-
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 FormatStr.size()),
-                                CI->getArgOperand(0), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
-    }
-
-    // The remaining optimizations require the format string to be "%s" or "%c"
-    // and have an extra operand.
-    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
-        CI->getNumArgOperands() < 3)
-      return 0;
-
-    // Decode the second character of the format string.
-    if (FormatStr[1] == 'c') {
-      // fprintf(F, "%c", chr) --> fputc(chr, F)
-      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
-                               TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
-    }
-
-    if (FormatStr[1] == 's') {
-      // fprintf(F, "%s", str) --> fputs(str, F)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
-        return 0;
-      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
-    }
-    return 0;
-  }
-
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed paramters as pointers and integer result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
-      return V;
-    }
-
-    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
-    // floating point arguments.
-    if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
-      Module *M = B.GetInsertBlock()->getParent()->getParent();
-      Constant *FIPrintFFn =
-        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
-      CallInst *New = cast<CallInst>(CI->clone());
-      New->setCalledFunction(FIPrintFFn);
-      B.Insert(New);
-      return New;
-    }
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'puts' Optimizations
-
-struct PutsOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require one fixed pointer argument and an integer/void result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
-        !(FT->getReturnType()->isIntegerTy() ||
-          FT->getReturnType()->isVoidTy()))
-      return 0;
-
-    // Check for a constant string.
-    StringRef Str;
-    if (!getConstantStringInfo(CI->getArgOperand(0), Str))
-      return 0;
-
-    if (Str.empty() && CI->use_empty()) {
-      // puts("") -> putchar('\n')
-      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
-      if (CI->use_empty() || !Res) return Res;
-      return B.CreateIntCast(Res, CI->getType(), true);
-    }
-
-    return 0;
-  }
-};
-
-} // end anonymous namespace.
-
-//===----------------------------------------------------------------------===//
 // SimplifyLibCalls Pass Implementation
 //===----------------------------------------------------------------------===//
 
@@ -768,22 +89,9 @@ namespace {
     TargetLibraryInfo *TLI;
 
     StringMap<LibCallOptimization*> Optimizations;
-    // Math Library Optimizations
-    CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
-    UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
-    // Integer Optimizations
-    FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
-    ToAsciiOpt ToAscii;
-    // Formatting and IO Optimizations
-    SPrintFOpt SPrintF; PrintFOpt PrintF;
-    FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
-    PutsOpt Puts;
-
-    bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
-    SimplifyLibCalls() : FunctionPass(ID), UnaryDoubleFP(false),
-                         UnsafeUnaryDoubleFP(true) {
+    SimplifyLibCalls() : FunctionPass(ID) {
       initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
     void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
@@ -792,14 +100,6 @@ namespace {
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
-    void setDoesNotAccessMemory(Function &F);
-    void setOnlyReadsMemory(Function &F);
-    void setDoesNotThrow(Function &F);
-    void setDoesNotCapture(Function &F, unsigned n);
-    void setDoesNotAlias(Function &F, unsigned n);
-    bool doInitialization(Module &M);
-
-    void inferPrototypeAttributes(Function &F);
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<TargetLibraryInfo>();
     }
@@ -833,77 +133,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
 /// Optimizations - Populate the Optimizations map with all the optimizations
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
-  // Math Library Optimizations
-  Optimizations["cosf"] = &Cos;
-  Optimizations["cos"] = &Cos;
-  Optimizations["cosl"] = &Cos;
-  Optimizations["powf"] = &Pow;
-  Optimizations["pow"] = &Pow;
-  Optimizations["powl"] = &Pow;
-  Optimizations["llvm.pow.f32"] = &Pow;
-  Optimizations["llvm.pow.f64"] = &Pow;
-  Optimizations["llvm.pow.f80"] = &Pow;
-  Optimizations["llvm.pow.f128"] = &Pow;
-  Optimizations["llvm.pow.ppcf128"] = &Pow;
-  Optimizations["exp2l"] = &Exp2;
-  Optimizations["exp2"] = &Exp2;
-  Optimizations["exp2f"] = &Exp2;
-  Optimizations["llvm.exp2.ppcf128"] = &Exp2;
-  Optimizations["llvm.exp2.f128"] = &Exp2;
-  Optimizations["llvm.exp2.f80"] = &Exp2;
-  Optimizations["llvm.exp2.f64"] = &Exp2;
-  Optimizations["llvm.exp2.f32"] = &Exp2;
-
-  AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
-  AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
-  AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
-  AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
-  AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
-  AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
-  AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
-
-  if(UnsafeFPShrink) {
-    AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
-    AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
-  }
-
-  // Integer Optimizations
-  Optimizations["ffs"] = &FFS;
-  Optimizations["ffsl"] = &FFS;
-  Optimizations["ffsll"] = &FFS;
-  Optimizations["abs"] = &Abs;
-  Optimizations["labs"] = &Abs;
-  Optimizations["llabs"] = &Abs;
-  Optimizations["isdigit"] = &IsDigit;
-  Optimizations["isascii"] = &IsAscii;
-  Optimizations["toascii"] = &ToAscii;
-
-  // Formatting and IO Optimizations
-  Optimizations["sprintf"] = &SPrintF;
-  Optimizations["printf"] = &PrintF;
-  AddOpt(LibFunc::fwrite, &FWrite);
-  AddOpt(LibFunc::fputs, &FPuts);
-  Optimizations["fprintf"] = &FPrintF;
-  Optimizations["puts"] = &Puts;
 }
 
 
@@ -924,7 +153,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
       // Ignore non-calls.
       CallInst *CI = dyn_cast<CallInst>(I++);
-      if (!CI) continue;
+      if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue;
 
       // Ignore indirect calls and calls to non-external functions.
       Function *Callee = CI->getCalledFunction();
@@ -951,7 +180,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
 
       // Something changed!
       Changed = true;
-      ++NumSimplified;
 
       // Inspect the instruction after the call (which was potentially just
       // added) next.
@@ -968,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
   return Changed;
 }
 
-// Utility methods for doInitialization.
-
-void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
-  if (!F.doesNotAccessMemory()) {
-    F.setDoesNotAccessMemory();
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
-  if (!F.onlyReadsMemory()) {
-    F.setOnlyReadsMemory();
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setDoesNotThrow(Function &F) {
-  if (!F.doesNotThrow()) {
-    F.setDoesNotThrow();
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
-  if (!F.doesNotCapture(n)) {
-    F.setDoesNotCapture(n);
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
-  if (!F.doesNotAlias(n)) {
-    F.setDoesNotAlias(n);
-    ++NumAnnotated;
-    Modified = true;
-  }
-}
-
-
-void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
-  FunctionType *FTy = F.getFunctionType();
-
-  StringRef Name = F.getName();
-  switch (Name[0]) {
-  case 's':
-    if (Name == "strlen") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "strchr" ||
-               Name == "strrchr") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isIntegerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-    } else if (Name == "strcpy" ||
-               Name == "stpcpy" ||
-               Name == "strcat" ||
-               Name == "strtol" ||
-               Name == "strtod" ||
-               Name == "strtof" ||
-               Name == "strtoul" ||
-               Name == "strtoll" ||
-               Name == "strtold" ||
-               Name == "strncat" ||
-               Name == "strncpy" ||
-               Name == "stpncpy" ||
-               Name == "strtoull") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strxfrm") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strcmp" ||
-               Name == "strspn" ||
-               Name == "strncmp" ||
-               Name == "strcspn" ||
-               Name == "strcoll" ||
-               Name == "strcasecmp" ||
-               Name == "strncasecmp") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strstr" ||
-               Name == "strpbrk") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "strtok" ||
-               Name == "strtok_r") {
-      if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "scanf" ||
-               Name == "setbuf" ||
-               Name == "setvbuf") {
-      if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "strdup" ||
-               Name == "strndup") {
-      if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "stat" ||
-               Name == "sscanf" ||
-               Name == "sprintf" ||
-               Name == "statvfs") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "snprintf") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 3);
-    } else if (Name == "setitimer") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-      setDoesNotCapture(F, 3);
-    } else if (Name == "system") {
-      if (FTy->getNumParams() != 1 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      // May throw; "system" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'm':
-    if (Name == "malloc") {
-      if (FTy->getNumParams() != 1 ||
-          !FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "memcmp") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "memchr" ||
-               Name == "memrchr") {
-      if (FTy->getNumParams() != 3)
-        return;
-      setOnlyReadsMemory(F);
-      setDoesNotThrow(F);
-    } else if (Name == "modf" ||
-               Name == "modff" ||
-               Name == "modfl" ||
-               Name == "memcpy" ||
-               Name == "memccpy" ||
-               Name == "memmove") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "memalign") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotAlias(F, 0);
-    } else if (Name == "mkdir" ||
-               Name == "mktime") {
-      if (FTy->getNumParams() == 0 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'r':
-    if (Name == "realloc") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "read") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      // May throw; "read" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 2);
-    } else if (Name == "rmdir" ||
-               Name == "rewind" ||
-               Name == "remove" ||
-               Name == "realpath") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "rename" ||
-               Name == "readlink") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'w':
-    if (Name == "write") {
-      if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      // May throw; "write" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'b':
-    if (Name == "bcopy") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "bcmp") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setOnlyReadsMemory(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "bzero") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'c':
-    if (Name == "calloc") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "chmod" ||
-               Name == "chown" ||
-               Name == "ctermid" ||
-               Name == "clearerr" ||
-               Name == "closedir") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'a':
-    if (Name == "atoi" ||
-        Name == "atol" ||
-        Name == "atof" ||
-        Name == "atoll") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setOnlyReadsMemory(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "access") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'f':
-    if (Name == "fopen") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "fdopen") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "feof" ||
-               Name == "free" ||
-               Name == "fseek" ||
-               Name == "ftell" ||
-               Name == "fgetc" ||
-               Name == "fseeko" ||
-               Name == "ftello" ||
-               Name == "fileno" ||
-               Name == "fflush" ||
-               Name == "fclose" ||
-               Name == "fsetpos" ||
-               Name == "flockfile" ||
-               Name == "funlockfile" ||
-               Name == "ftrylockfile") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "ferror") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setOnlyReadsMemory(F);
-    } else if (Name == "fputc" ||
-               Name == "fstat" ||
-               Name == "frexp" ||
-               Name == "frexpf" ||
-               Name == "frexpl" ||
-               Name == "fstatvfs") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "fgets") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 3);
-    } else if (Name == "fread" ||
-               Name == "fwrite") {
-      if (FTy->getNumParams() != 4 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(3)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 4);
-    } else if (Name == "fputs" ||
-               Name == "fscanf" ||
-               Name == "fprintf" ||
-               Name == "fgetpos") {
-      if (FTy->getNumParams() < 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'g':
-    if (Name == "getc" ||
-        Name == "getlogin_r" ||
-        Name == "getc_unlocked") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "getenv") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setOnlyReadsMemory(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "gets" ||
-               Name == "getchar") {
-      setDoesNotThrow(F);
-    } else if (Name == "getitimer") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "getpwnam") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'u':
-    if (Name == "ungetc") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "uname" ||
-               Name == "unlink" ||
-               Name == "unsetenv") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "utime" ||
-               Name == "utimes") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 'p':
-    if (Name == "putc") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "puts" ||
-               Name == "printf" ||
-               Name == "perror") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "pread" ||
-               Name == "pwrite") {
-      if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      // May throw; these are valid pthread cancellation points.
-      setDoesNotCapture(F, 2);
-    } else if (Name == "putchar") {
-      setDoesNotThrow(F);
-    } else if (Name == "popen") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "pclose") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'v':
-    if (Name == "vscanf") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "vsscanf" ||
-               Name == "vfscanf") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "valloc") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "vprintf") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "vfprintf" ||
-               Name == "vsprintf") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "vsnprintf") {
-      if (FTy->getNumParams() != 4 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(2)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 3);
-    }
-    break;
-  case 'o':
-    if (Name == "open") {
-      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      // May throw; "open" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 1);
-    } else if (Name == "opendir") {
-      if (FTy->getNumParams() != 1 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 't':
-    if (Name == "tmpfile") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "times") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'h':
-    if (Name == "htonl" ||
-        Name == "htons") {
-      setDoesNotThrow(F);
-      setDoesNotAccessMemory(F);
-    }
-    break;
-  case 'n':
-    if (Name == "ntohl" ||
-        Name == "ntohs") {
-      setDoesNotThrow(F);
-      setDoesNotAccessMemory(F);
-    }
-    break;
-  case 'l':
-    if (Name == "lstat") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "lchown") {
-      if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  case 'q':
-    if (Name == "qsort") {
-      if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
-        return;
-      // May throw; places call through function pointer.
-      setDoesNotCapture(F, 4);
-    }
-    break;
-  case '_':
-    if (Name == "__strdup" ||
-        Name == "__strndup") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "__strtok_r") {
-      if (FTy->getNumParams() != 3 ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "_IO_getc") {
-      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "_IO_putc") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    }
-    break;
-  case 1:
-    if (Name == "\1__isoc99_scanf") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "\1stat64" ||
-               Name == "\1lstat64" ||
-               Name == "\1statvfs64" ||
-               Name == "\1__isoc99_sscanf") {
-      if (FTy->getNumParams() < 1 ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "\1fopen64") {
-      if (FTy->getNumParams() != 2 ||
-          !FTy->getReturnType()->isPointerTy() ||
-          !FTy->getParamType(0)->isPointerTy() ||
-          !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-      setDoesNotCapture(F, 1);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "\1fseeko64" ||
-               Name == "\1ftello64") {
-      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 1);
-    } else if (Name == "\1tmpfile64") {
-      if (!FTy->getReturnType()->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotAlias(F, 0);
-    } else if (Name == "\1fstat64" ||
-               Name == "\1fstatvfs64") {
-      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-        return;
-      setDoesNotThrow(F);
-      setDoesNotCapture(F, 2);
-    } else if (Name == "\1open64") {
-      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
-        return;
-      // May throw; "open" is a valid pthread cancellation point.
-      setDoesNotCapture(F, 1);
-    }
-    break;
-  }
-}
-
-/// doInitialization - Add attributes to well-known functions.
-///
-bool SimplifyLibCalls::doInitialization(Module &M) {
-  Modified = false;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    Function &F = *I;
-    if (F.isDeclaration() && F.hasName())
-      inferPrototypeAttributes(F);
-  }
-  return Modified;
-}
-
 // TODO:
 //   Additional cases that we need to add to this file:
 //
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 34f1d6c6221a..d4595bb373e6 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -14,13 +14,13 @@
 
 #define DEBUG_TYPE "sink"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6557d630a943..2002e680d195 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,25 +52,26 @@
 
 #define DEBUG_TYPE "tailcallelim"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumEliminated, "Number of tail calls removed");
@@ -79,11 +80,15 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 
 namespace {
   struct TailCallElim : public FunctionPass {
+    const TargetTransformInfo *TTI;
+
     static char ID; // Pass identification, replacement for typeid
     TailCallElim() : FunctionPass(ID) {
       initializeTailCallElimPass(*PassRegistry::getPassRegistry());
     }
 
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
     virtual bool runOnFunction(Function &F);
 
   private:
@@ -109,14 +114,21 @@ namespace {
 }
 
 char TailCallElim::ID = 0;
-INITIALIZE_PASS(TailCallElim, "tailcallelim",
-                "Tail Call Elimination", false, false)
+INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
+                      "Tail Call Elimination", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
+                    "Tail Call Elimination", false, false)
 
 // Public interface to the TailCallElimination pass
 FunctionPass *llvm::createTailCallEliminationPass() {
   return new TailCallElim();
 }
 
+void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfo>();
+}
+
 /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
 /// callees of this function.  We only do very simple analysis right now, this
 /// could be expanded in the future to use mod/ref information for particular
@@ -151,6 +163,7 @@ bool TailCallElim::runOnFunction(Function &F) {
   // right, so don't even try to convert it...
   if (F.getFunctionType()->isVarArg()) return false;
 
+  TTI = &getAnalysis<TargetTransformInfo>();
   BasicBlock *OldEntry = 0;
   bool TailCallsAreMarkedTail = false;
   SmallVector<PHINode*, 8> ArgumentPHIs;
@@ -391,7 +404,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
   if (BB == &F->getEntryBlock() &&
       FirstNonDbg(BB->front()) == CI &&
       FirstNonDbg(llvm::next(BB->begin())) == TI &&
-      callIsSmall(CI)) {
+      CI->getCalledFunction() &&
+      !TTI->isLoweredToCall(CI->getCalledFunction())) {
     // A single-block function with just a call and a return. Check that
     // the arguments match.
     CallSite::arg_iterator I = CallSite(CI).arg_begin(),
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
deleted file mode 100644
index 6815e411b421..000000000000
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ /dev/null
@@ -1,577 +0,0 @@
-//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements target addressing mode matcher class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CallSite.h"
-
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-void ExtAddrMode::print(raw_ostream &OS) const {
-  bool NeedPlus = false;
-  OS << "[";
-  if (BaseGV) {
-    OS << (NeedPlus ? " + " : "")
-       << "GV:";
-    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
-    NeedPlus = true;
-  }
-
-  if (BaseOffs)
-    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
-
-  if (BaseReg) {
-    OS << (NeedPlus ? " + " : "")
-       << "Base:";
-    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
-    NeedPlus = true;
-  }
-  if (Scale) {
-    OS << (NeedPlus ? " + " : "")
-       << Scale << "*";
-    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
-    NeedPlus = true;
-  }
-
-  OS << ']';
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ExtAddrMode::dump() const {
-  print(dbgs());
-  dbgs() << '\n';
-}
-#endif
-
-
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
-/// Return true and update AddrMode if this addr mode is legal for the target,
-/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
-                                             unsigned Depth) {
-  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
-  // mode.  Just process that directly.
-  if (Scale == 1)
-    return MatchAddr(ScaleReg, Depth);
-  
-  // If the scale is 0, it takes nothing to add this.
-  if (Scale == 0)
-    return true;
-  
-  // If we already have a scale of this value, we can add to it, otherwise, we
-  // need an available scale field.
-  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
-    return false;
-
-  ExtAddrMode TestAddrMode = AddrMode;
-
-  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
-  // [A+B + A*7] -> [B+A*8].
-  TestAddrMode.Scale += Scale;
-  TestAddrMode.ScaledReg = ScaleReg;
-
-  // If the new address isn't legal, bail out.
-  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
-    return false;
-
-  // It was legal, so commit it.
-  AddrMode = TestAddrMode;
-  
-  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
-  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
-  // X*Scale + C*Scale to addr mode.
-  ConstantInt *CI = 0; Value *AddLHS = 0;
-  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
-      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
-    TestAddrMode.ScaledReg = AddLHS;
-    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-      
-    // If this addressing mode is legal, commit it and remember that we folded
-    // this instruction.
-    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
-      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
-      AddrMode = TestAddrMode;
-      return true;
-    }
-  }
-
-  // Otherwise, not (x+c)*scale, just return what we have.
-  return true;
-}
-
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it.  This doesn't need to be perfect, but needs to accept at least
-/// the set of instructions that MatchOperationAddr can.
-static bool MightBeFoldableInst(Instruction *I) {
-  switch (I->getOpcode()) {
-  case Instruction::BitCast:
-    // Don't touch identity bitcasts.
-    if (I->getType() == I->getOperand(0)->getType())
-      return false;
-    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
-  case Instruction::PtrToInt:
-    // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return true;
-  case Instruction::IntToPtr:
-    // We know the input is intptr_t, so this is foldable.
-    return true;
-  case Instruction::Add:
-    return true;
-  case Instruction::Mul:
-  case Instruction::Shl:
-    // Can only handle X*C and X << C.
-    return isa<ConstantInt>(I->getOperand(1));
-  case Instruction::GetElementPtr:
-    return true;
-  default:
-    return false;
-  }
-}
-
-
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode.  If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
-                                               unsigned Depth) {
-  // Avoid exponential behavior on extremely deep expression trees.
-  if (Depth >= 5) return false;
-  
-  switch (Opcode) {
-  case Instruction::PtrToInt:
-    // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return MatchAddr(AddrInst->getOperand(0), Depth);
-  case Instruction::IntToPtr:
-    // This inttoptr is a no-op if the integer type is pointer sized.
-    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
-        TLI.getPointerTy())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
-    return false;
-  case Instruction::BitCast:
-    // BitCast is always a noop, and we can handle it as long as it is
-    // int->int or pointer->pointer (we don't want int<->fp or something).
-    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
-         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
-        // Don't touch identity bitcasts.  These were probably put here by LSR,
-        // and we don't want to mess around with them.  Assume it knows what it
-        // is doing.
-        AddrInst->getOperand(0)->getType() != AddrInst->getType())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
-    return false;
-  case Instruction::Add: {
-    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(0), Depth+1))
-      return true;
-    
-    // Restore the old addr mode info.
-    AddrMode = BackupAddrMode;
-    AddrModeInsts.resize(OldSize);
-    
-    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
-    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(1), Depth+1))
-      return true;
-    
-    // Otherwise we definitely can't merge the ADD in.
-    AddrMode = BackupAddrMode;
-    AddrModeInsts.resize(OldSize);
-    break;
-  }
-  //case Instruction::Or:
-  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
-  //break;
-  case Instruction::Mul:
-  case Instruction::Shl: {
-    // Can only handle X*C and X << C.
-    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
-    if (!RHS) return false;
-    int64_t Scale = RHS->getSExtValue();
-    if (Opcode == Instruction::Shl)
-      Scale = 1LL << Scale;
-    
-    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
-  }
-  case Instruction::GetElementPtr: {
-    // Scan the GEP.  We check it if it contains constant offsets and at most
-    // one variable offset.
-    int VariableOperand = -1;
-    unsigned VariableScale = 0;
-    
-    int64_t ConstantOffset = 0;
-    const DataLayout *TD = TLI.getDataLayout();
-    gep_type_iterator GTI = gep_type_begin(AddrInst);
-    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-        const StructLayout *SL = TD->getStructLayout(STy);
-        unsigned Idx =
-          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
-        ConstantOffset += SL->getElementOffset(Idx);
-      } else {
-        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
-          ConstantOffset += CI->getSExtValue()*TypeSize;
-        } else if (TypeSize) {  // Scales of zero don't do anything.
-          // We only allow one variable index at the moment.
-          if (VariableOperand != -1)
-            return false;
-          
-          // Remember the variable index.
-          VariableOperand = i;
-          VariableScale = TypeSize;
-        }
-      }
-    }
-    
-    // A common case is for the GEP to only do a constant offset.  In this case,
-    // just add it to the disp field and check validity.
-    if (VariableOperand == -1) {
-      AddrMode.BaseOffs += ConstantOffset;
-      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
-        // Check to see if we can fold the base pointer in too.
-        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
-          return true;
-      }
-      AddrMode.BaseOffs -= ConstantOffset;
-      return false;
-    }
-
-    // Save the valid addressing mode in case we can't match.
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-
-    // See if the scale and offset amount is valid for this target.
-    AddrMode.BaseOffs += ConstantOffset;
-
-    // Match the base operand of the GEP.
-    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
-      // If it couldn't be matched, just stuff the value in a register.
-      if (AddrMode.HasBaseReg) {
-        AddrMode = BackupAddrMode;
-        AddrModeInsts.resize(OldSize);
-        return false;
-      }
-      AddrMode.HasBaseReg = true;
-      AddrMode.BaseReg = AddrInst->getOperand(0);
-    }
-
-    // Match the remaining variable portion of the GEP.
-    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
-                          Depth)) {
-      // If it couldn't be matched, try stuffing the base into a register
-      // instead of matching it, and retrying the match of the scale.
-      AddrMode = BackupAddrMode;
-      AddrModeInsts.resize(OldSize);
-      if (AddrMode.HasBaseReg)
-        return false;
-      AddrMode.HasBaseReg = true;
-      AddrMode.BaseReg = AddrInst->getOperand(0);
-      AddrMode.BaseOffs += ConstantOffset;
-      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
-                            VariableScale, Depth)) {
-        // If even that didn't work, bail.
-        AddrMode = BackupAddrMode;
-        AddrModeInsts.resize(OldSize);
-        return false;
-      }
-    }
-
-    return true;
-  }
-  }
-  return false;
-}
-
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode.  If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
-///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
-    // Fold in immediates if legal for the target.
-    AddrMode.BaseOffs += CI->getSExtValue();
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.BaseOffs -= CI->getSExtValue();
-  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
-    // If this is a global variable, try to fold it into the addressing mode.
-    if (AddrMode.BaseGV == 0) {
-      AddrMode.BaseGV = GV;
-      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-        return true;
-      AddrMode.BaseGV = 0;
-    }
-  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
-    ExtAddrMode BackupAddrMode = AddrMode;
-    unsigned OldSize = AddrModeInsts.size();
-
-    // Check to see if it is possible to fold this operation.
-    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
-      // Okay, it's possible to fold this.  Check to see if it is actually
-      // *profitable* to do so.  We use a simple cost model to avoid increasing
-      // register pressure too much.
-      if (I->hasOneUse() ||
-          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
-        AddrModeInsts.push_back(I);
-        return true;
-      }
-      
-      // It isn't profitable to do this, roll back.
-      //cerr << "NOT FOLDING: " << *I;
-      AddrMode = BackupAddrMode;
-      AddrModeInsts.resize(OldSize);
-    }
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
-    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
-      return true;
-  } else if (isa<ConstantPointerNull>(Addr)) {
-    // Null pointer gets folded without affecting the addressing mode.
-    return true;
-  }
-
-  // Worse case, the target should support [reg] addressing modes. :)
-  if (!AddrMode.HasBaseReg) {
-    AddrMode.HasBaseReg = true;
-    AddrMode.BaseReg = Addr;
-    // Still check for legality in case the target supports [imm] but not [i+r].
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.HasBaseReg = false;
-    AddrMode.BaseReg = 0;
-  }
-
-  // If the base register is already taken, see if we can do [r+r].
-  if (AddrMode.Scale == 0) {
-    AddrMode.Scale = 1;
-    AddrMode.ScaledReg = Addr;
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
-      return true;
-    AddrMode.Scale = 0;
-    AddrMode.ScaledReg = 0;
-  }
-  // Couldn't match.
-  return false;
-}
-
-
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands.  If so, return true, otherwise
-/// return false.
-static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
-                                    const TargetLowering &TLI) {
-  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
-  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-    
-    // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
-    // If this asm operand is our Value*, and if it isn't an indirect memory
-    // operand, we can't fold it!
-    if (OpInfo.CallOperandVal == OpVal &&
-        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
-         !OpInfo.isIndirect))
-      return false;
-  }
-
-  return true;
-}
-
-
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use.  If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
-                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
-                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
-                              const TargetLowering &TLI) {
-  // If we already considered this instruction, we're done.
-  if (!ConsideredInsts.insert(I))
-    return false;
-  
-  // If this is an obviously unfoldable instruction, bail out.
-  if (!MightBeFoldableInst(I))
-    return true;
-
-  // Loop over all the uses, recursively processing them.
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
-
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
-      continue;
-    }
-    
-    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      unsigned opNo = UI.getOperandNo();
-      if (opNo == 0) return true; // Storing addr, not into addr.
-      MemoryUses.push_back(std::make_pair(SI, opNo));
-      continue;
-    }
-    
-    if (CallInst *CI = dyn_cast<CallInst>(U)) {
-      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
-      if (!IA) return true;
-      
-      // If this is a memory operand, we're cool, otherwise bail out.
-      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
-        return true;
-      continue;
-    }
-    
-    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
-                          TLI))
-      return true;
-  }
-
-  return false;
-}
-
-
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into.  If so, there is no cost to
-/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
-                                                   Value *KnownLive2) {
-  // If Val is either of the known-live values, we know it is live!
-  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
-    return true;
-  
-  // All values other than instructions and arguments (e.g. constants) are live.
-  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-  
-  // If Val is a constant sized alloca in the entry block, it is live, this is
-  // true because it is just a reference to the stack/frame pointer, which is
-  // live for the whole function.
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
-    if (AI->isStaticAlloca())
-      return true;
-  
-  // Check to see if this value is already used in the memory instruction's
-  // block.  If so, it's already live into the block at the very least, so we
-  // can reasonably fold it.
-  return Val->isUsedInBasicBlock(MemoryInst->getParent());
-}
-
-
-
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it.  However, the specified instruction has multiple
-/// uses.  Given this, it may actually increase register pressure to fold it
-/// into the load.  For example, consider this code:
-///
-///     X = ...
-///     Y = X+1
-///     use(Y)   -> nonload/store
-///     Z = Y+1
-///     load Z
-///
-/// In this case, Y has multiple uses, and can be folded into the load of Z
-/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
-/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
-/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
-/// number of computations either.
-///
-/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
-/// X was live across 'load Z' for other reasons, we actually *would* want to
-/// fold the addressing mode in the Z case.  This would make Y die earlier.
-bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
-                                     ExtAddrMode &AMAfter) {
-  if (IgnoreProfitability) return true;
-  
-  // AMBefore is the addressing mode before this instruction was folded into it,
-  // and AMAfter is the addressing mode after the instruction was folded.  Get
-  // the set of registers referenced by AMAfter and subtract out those
-  // referenced by AMBefore: this is the set of values which folding in this
-  // address extends the lifetime of.
-  //
-  // Note that there are only two potential values being referenced here,
-  // BaseReg and ScaleReg (global addresses are always available, as are any
-  // folded immediates).
-  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-  
-  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
-  // lifetime wasn't extended by adding this instruction.
-  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
-    BaseReg = 0;
-  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
-    ScaledReg = 0;
-
-  // If folding this instruction (and it's subexprs) didn't extend any live
-  // ranges, we're ok with it.
-  if (BaseReg == 0 && ScaledReg == 0)
-    return true;
-
-  // If all uses of this instruction are ultimately load/store/inlineasm's,
-  // check to see if their addressing modes will include this instruction.  If
-  // so, we can fold it into all uses, so it doesn't matter if it has multiple
-  // uses.
-  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
-  SmallPtrSet<Instruction*, 16> ConsideredInsts;
-  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
-    return false;  // Has a non-memory, non-foldable use!
-  
-  // Now that we know that all uses of this instruction are part of a chain of
-  // computation involving only operations that could theoretically be folded
-  // into a memory use, loop over each of these uses and see if they could
-  // *actually* fold the instruction.
-  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
-  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
-    Instruction *User = MemoryUses[i].first;
-    unsigned OpNo = MemoryUses[i].second;
-    
-    // Get the access type of this use.  If the use isn't a pointer, we don't
-    // know what it accesses.
-    Value *Address = User->getOperand(OpNo);
-    if (!Address->getType()->isPointerTy())
-      return false;
-    Type *AddressAccessTy =
-      cast<PointerType>(Address->getType())->getElementType();
-    
-    // Do a match against the root of this address, ignoring profitability. This
-    // will tell us if the addressing mode for the memory operation will
-    // *actually* cover the shared instruction.
-    ExtAddrMode Result;
-    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
-                                  MemoryInst, Result);
-    Matcher.IgnoreProfitability = true;
-    bool Success = Matcher.MatchAddr(Address, 0);
-    (void)Success; assert(Success && "Couldn't select *anything*?");
-
-    // If the match didn't cover I, then it won't be shared by it.
-    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
-                  I) == MatchedAddrModeInsts.end())
-      return false;
-    
-    MatchedAddrModeInsts.clear();
-  }
-  
-  return true;
-}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 9fea11391a1d..ba99d2e662e4 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -13,20 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Constant.h"
-#include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -37,12 +37,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
          // Can delete self loop.
          BB->getSinglePredecessor() == BB) && "Block is not dead!");
   TerminatorInst *BBTerm = BB->getTerminator();
-  
+
   // Loop through all of our successors and make sure they know that one
   // of their predecessors is going away.
   for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
     BBTerm->getSuccessor(i)->removePredecessor(BB);
-  
+
   // Zap all the instructions in the block.
   while (!BB->empty()) {
     Instruction &I = BB->back();
@@ -55,7 +55,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
       I.replaceAllUsesWith(UndefValue::get(I.getType()));
     BB->getInstList().pop_back();
   }
-  
+
   // Zap the block!
   BB->eraseFromParent();
 }
@@ -66,25 +66,25 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 /// when the block has exactly one predecessor.
 void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
   if (!isa<PHINode>(BB->begin())) return;
-  
+
   AliasAnalysis *AA = 0;
   MemoryDependenceAnalysis *MemDep = 0;
   if (P) {
     AA = P->getAnalysisIfAvailable<AliasAnalysis>();
     MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
   }
-  
+
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     if (PN->getIncomingValue(0) != PN)
       PN->replaceAllUsesWith(PN->getIncomingValue(0));
     else
       PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-    
+
     if (MemDep)
       MemDep->removeInstruction(PN);  // Memdep updates AA itself.
     else if (AA && isa<PointerType>(PN->getType()))
       AA->deleteValue(PN);
-    
+
     PN->eraseFromParent();
   }
 }
@@ -115,7 +115,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
 bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Don't merge away blocks who have their address taken.
   if (BB->hasAddressTaken()) return false;
-  
+
   // Can't merge if there are multiple predecessors, or no predecessors.
   BasicBlock *PredBB = BB->getUniquePredecessor();
   if (!PredBB) return false;
@@ -124,7 +124,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   if (PredBB == BB) return false;
   // Don't break invokes.
   if (isa<InvokeInst>(PredBB->getTerminator())) return false;
-  
+
   succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
   BasicBlock *OnlySucc = BB;
   for (; SI != SE; ++SI)
@@ -132,7 +132,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
       OnlySucc = 0;     // There are multiple distinct successors!
       break;
     }
-  
+
   // Can't merge if there are multiple successors.
   if (!OnlySucc) return false;
 
@@ -149,21 +149,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Begin by getting rid of unneeded PHIs.
   if (isa<PHINode>(BB->front()))
     FoldSingleEntryPHINodes(BB, P);
-  
+
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
-  
+
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(PredBB);
-  
+
   // Move all definitions in the successor to the predecessor...
   PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
-  
+
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
     PredBB->takeName(BB);
-  
+
   // Finally, erase the old block and update dominator info.
   if (P) {
     if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
@@ -176,16 +176,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
 
         DT->eraseNode(BB);
       }
-      
+
       if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
         LI->removeBlock(BB);
-      
+
       if (MemoryDependenceAnalysis *MD =
             P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
         MD->invalidateCachedPredecessors();
     }
   }
-  
+
   BB->eraseFromParent();
   return true;
 }
@@ -251,11 +251,11 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
   }
 }
 
-/// SplitEdge -  Split the edge connecting specified block. Pass P must 
-/// not be NULL. 
+/// SplitEdge -  Split the edge connecting specified block. Pass P must
+/// not be NULL.
 BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
   unsigned SuccNum = GetSuccessorNumber(BB, Succ);
-  
+
   // If this is a critical edge, let SplitCriticalEdge do it.
   TerminatorInst *LatchTerm = BB->getTerminator();
   if (SplitCriticalEdge(LatchTerm, SuccNum, P))
@@ -271,11 +271,11 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
     SP = NULL;
     return SplitBlock(Succ, Succ->begin(), P);
   }
-  
+
   // Otherwise, if BB has a single successor, split it at the bottom of the
   // block.
   assert(BB->getTerminator()->getNumSuccessors() == 1 &&
-         "Should have a single succ!"); 
+         "Should have a single succ!");
   return SplitBlock(BB, BB->getTerminator(), P);
 }
 
@@ -301,12 +301,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
     if (DomTreeNode *OldNode = DT->getNode(Old)) {
       std::vector<DomTreeNode *> Children;
       for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
-           I != E; ++I) 
+           I != E; ++I)
         Children.push_back(*I);
 
       DomTreeNode *NewNode = DT->addNewBlock(New,Old);
       for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
-             E = Children.end(); I != E; ++I) 
+             E = Children.end(); I != E; ++I)
         DT->changeImmediateDominator(*I, NewNode);
     }
   }
@@ -424,7 +424,7 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
       PHINode *NewPHI =
         PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
       if (AA) AA->copyValue(PN, NewPHI);
-      
+
       // Move all of the PHI values for 'Preds' to the new PHI.
       for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
         Value *V = PN->removeIncomingValue(Preds[i], false);
@@ -451,16 +451,16 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 /// preserve LoopSimplify (because it's complicated to handle the case where one
 /// of the edges being split is an exit of a loop with other exits).
 ///
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
                                          ArrayRef<BasicBlock*> Preds,
                                          const char *Suffix, Pass *P) {
   // Create new basic block, insert right before the original block.
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                          BB->getParent(), BB);
-  
+
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
-  
+
   // Move the edges from Preds to point to NewBB instead of BB.
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
     // This is slightly more strict than necessary; the minimum requirement
@@ -497,13 +497,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 /// block gets the remaining predecessors of OrigBB. The landingpad instruction
 /// OrigBB is clone into both of the new basic blocks. The new blocks are given
 /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
-/// 
+///
 /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
 /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
 /// it does not preserve LoopSimplify (because it's complicated to handle the
 /// case where one of the edges being split is an exit of a loop with other
 /// exits).
-/// 
+///
 void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                                        ArrayRef<BasicBlock*> Preds,
                                        const char *Suffix1, const char *Suffix2,
@@ -608,11 +608,11 @@ void llvm::FindFunctionBackedges(const Function &F,
   const BasicBlock *BB = &F.getEntryBlock();
   if (succ_begin(BB) == succ_end(BB))
     return;
-  
+
   SmallPtrSet<const BasicBlock*, 8> Visited;
   SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
   SmallPtrSet<const BasicBlock*, 8> InStack;
-  
+
   Visited.insert(BB);
   VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
   InStack.insert(BB);
@@ -620,7 +620,7 @@ void llvm::FindFunctionBackedges(const Function &F,
     std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
     const BasicBlock *ParentBB = Top.first;
     succ_const_iterator &I = Top.second;
-    
+
     bool FoundNew = false;
     while (I != succ_end(ParentBB)) {
       BB = *I++;
@@ -632,7 +632,7 @@ void llvm::FindFunctionBackedges(const Function &F,
       if (InStack.count(BB))
         Result.push_back(std::make_pair(ParentBB, BB));
     }
-    
+
     if (FoundNew) {
       // Go down one level if there is a unvisited successor.
       InStack.insert(BB);
@@ -641,7 +641,7 @@ void llvm::FindFunctionBackedges(const Function &F,
       // Go up one level.
       InStack.erase(VisitStack.pop_back_val().first);
     }
-  } while (!VisitStack.empty()); 
+  } while (!VisitStack.empty());
 }
 
 /// FoldReturnIntoUncondBranch - This method duplicates the specified return
@@ -655,7 +655,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   // Clone the return and add it to the end of the predecessor.
   Instruction *NewRet = RI->clone();
   Pred->getInstList().push_back(NewRet);
-      
+
   // If the return instruction returns a value, and if the value was a
   // PHI node in "BB", propagate the right value into the return.
   for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
@@ -679,7 +679,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
       }
     }
   }
-      
+
   // Update any PHI nodes in the returning block to realize that we no
   // longer branch to them.
   BB->removePredecessor(Pred);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 6b04e3d17b9b..8513772da2e8 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -17,17 +17,17 @@
 
 #define DEBUG_TYPE "break-crit-edges"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
 
 STATISTIC(NumBroken, "Number of blocks inserted");
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 74b2ee10e01d..6d13217df55d 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -12,17 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 
 using namespace llvm;
@@ -40,16 +38,16 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrLen = M->getOrInsertFunction("strlen",
-                                            AttrListPtr::get(M->getContext(),
-                                                             AWI),
+                                            AttributeSet::get(M->getContext(),
+                                                              AS),
                                             TD->getIntPtrType(Context),
                                             B.getInt8PtrTy(),
                                             NULL);
@@ -69,16 +67,16 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrNLen = M->getOrInsertFunction("strnlen",
-                                             AttrListPtr::get(M->getContext(),
-                                                              AWI),
+                                             AttributeSet::get(M->getContext(),
+                                                              AS),
                                              TD->getIntPtrType(Context),
                                              B.getInt8PtrTy(),
                                              TD->getIntPtrType(Context),
@@ -99,16 +97,16 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AttributeWithIndex AWI =
-    AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                            ArrayRef<Attributes::AttrVal>(AVs, 2));
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AttributeSet AS =
+    AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                      ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
   Constant *StrChr = M->getOrInsertFunction("strchr",
-                                            AttrListPtr::get(M->getContext(),
-                                                             AWI),
+                                            AttributeSet::get(M->getContext(),
+                                                             AS),
                                             I8Ptr, I8Ptr, I32Ty, NULL);
   CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
                                ConstantInt::get(I32Ty, C), "strchr");
@@ -125,17 +123,17 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *StrNCmp = M->getOrInsertFunction("strncmp",
-                                          AttrListPtr::get(M->getContext(),
-                                                           AWI),
+                                          AttributeSet::get(M->getContext(),
+                                                           AS),
                                           B.getInt32Ty(),
                                           B.getInt8PtrTy(),
                                           B.getInt8PtrTy(),
@@ -158,13 +156,13 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrCpy = M->getOrInsertFunction(Name,
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          I8Ptr, I8Ptr, I8Ptr, NULL);
   CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
                                Name);
@@ -182,14 +180,14 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrNCpy = M->getOrInsertFunction(Name,
-                                          AttrListPtr::get(M->getContext(),
-                                                           AWI),
+                                          AttributeSet::get(M->getContext(),
+                                                            AS),
                                           I8Ptr, I8Ptr, I8Ptr,
                                           Len->getType(), NULL);
   CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
@@ -209,12 +207,12 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                Attributes::NoUnwind);
+  AttributeSet AS;
+  AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                         Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
@@ -237,13 +235,13 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI;
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS;
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                         ArrayRef<Attribute::AttrKind>(AVs, 2));
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemChr = M->getOrInsertFunction("memchr",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
                                          B.getInt32Ty(),
@@ -265,16 +263,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            ArrayRef<Attribute::AttrKind>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCmp = M->getOrInsertFunction("memcmp",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AS),
                                          B.getInt32Ty(),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
@@ -293,7 +291,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
 /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
 Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
-                                  const AttrListPtr &Attrs) {
+                                  const AttributeSet &Attrs) {
   SmallString<20> NameBuffer;
   if (!Op->getType()->isDoubleTy()) {
     // If we need to add a suffix, copy into NameBuffer.
@@ -346,13 +344,13 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts",
-                                       AttrListPtr::get(M->getContext(), AWI),
+                                       AttributeSet::get(M->getContext(), AS),
                                        B.getInt32Ty(),
                                        B.getInt8PtrTy(),
                                        NULL);
@@ -370,14 +368,14 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[2];
+  AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction("fputc",
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AS),
                                B.getInt32Ty(),
                                B.getInt32Ty(), File->getType(),
                                NULL);
@@ -403,16 +401,16 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   StringRef FPutsName = TLI->getName(LibFunc::fputs);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction(FPutsName,
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AS),
                                B.getInt32Ty(),
                                B.getInt8PtrTy(),
                                File->getType(), NULL);
@@ -436,17 +434,17 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
     return 0;
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
-                                   Attributes::NoUnwind);
+  AttributeSet AS[3];
+  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+  AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture);
+  AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+                            Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction(FWriteName,
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AS),
                                TD->getIntPtrType(Context),
                                B.getInt8PtrTy(),
                                TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index bee2f7bcb6ea..1f517d038d19 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -16,11 +16,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "bypass-slow-division"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 
 using namespace llvm;
 
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
 
   // Compare operand values and branch
-  Value *ZeroV = MainBuilder.getInt32(0);
+  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
 
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
 
     // Get bitwidth of div/rem instruction
     IntegerType *T = cast<IntegerType>(J->getType());
-    int bitwidth = T->getBitWidth();
+    unsigned int bitwidth = T->getBitWidth();
 
     // Continue if bitwidth is not bypassed
     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 620209bccbc8..b71628bcb28e 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_llvm_library(LLVMTransformUtils
-  AddrModeMatcher.cpp
   BasicBlockUtils.cpp
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 7ba9f6d9d25d..63d7a1d52aa5 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,22 +14,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/ADT/SmallVector.h"
 #include <map>
 using namespace llvm;
 
@@ -94,19 +94,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     //Some arguments were deleted with the VMap. Copy arguments one by one
     for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
            E = OldFunc->arg_end(); I != E; ++I)
-      if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
-        Anew->addAttr( OldFunc->getAttributes()
-                       .getParamAttributes(I->getArgNo() + 1));
+      if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
+        AttributeSet attrs = OldFunc->getAttributes()
+          .getParamAttributes(I->getArgNo() + 1);
+        if (attrs.getNumSlots() > 0)
+          Anew->addAttr(attrs);
+      }
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(NewFunc->getContext(),
-                                    AttrListPtr::ReturnIndex,
-                                    OldFunc->getAttributes()
-                                     .getRetAttributes()));
+                           .addAttributes(NewFunc->getContext(),
+                                          AttributeSet::ReturnIndex,
+                                          OldFunc->getAttributes()));
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(NewFunc->getContext(),
-                                    AttrListPtr::FunctionIndex,
-                                    OldFunc->getAttributes()
-                                     .getFnAttributes()));
+                           .addAttributes(NewFunc->getContext(),
+                                          AttributeSet::FunctionIndex,
+                                          OldFunc->getAttributes()));
 
   }
 
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 1dac6b5b8bce..64df089e1b81 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constant.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
 
@@ -38,10 +38,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
    
-  // Copy all of the dependent libraries over.
-  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
-    New->addLibrary(*I);
-
   // Loop over all of the global variables, making corresponding globals in the
   // new module.  Here we add them to the VMap and to the new Module.  We
   // don't worry about attributes or initializers, they will come later.
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 9b099150a7af..8fa412a18b99 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CmpInstAnalysis.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 281714f4c100..f7c659f2193b 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,25 +14,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 99b58301634a..db525cdc24d8 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -7,11 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
 using namespace llvm;
 
 /// DemoteRegToStack - This function takes a virtual register computed by an
@@ -78,12 +79,21 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
     InsertPt = &I;
     ++InsertPt;
   } else {
-    // We cannot demote invoke instructions to the stack if their normal edge
-    // is critical.
     InvokeInst &II = cast<InvokeInst>(I);
-    assert(II.getNormalDest()->getSinglePredecessor() &&
-           "Cannot demote invoke with a critical successor!");
-    InsertPt = II.getNormalDest()->begin();
+    if (II.getNormalDest()->getSinglePredecessor())
+      InsertPt = II.getNormalDest()->getFirstInsertionPt();
+    else {
+      // We cannot demote invoke instructions to the stack if their normal edge
+      // is critical.  Therefore, split the critical edge and insert the store
+      // in the newly created basic block.
+      unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
+      TerminatorInst *TI = &cast<TerminatorInst>(I);
+      assert (isCriticalEdge(TI, SuccNum) &&
+              "Expected a critical edge!");
+      BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
+      assert (BB && "Unable to split critical edge.");
+      InsertPt = BB->getFirstInsertionPt();
+    }
   }
 
   for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
@@ -124,7 +134,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   }
 
   // Insert a load in place of the PHI and replace all uses.
-  Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+  BasicBlock::iterator InsertPt = P;
+
+  for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+    /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
+
+  Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
   P->replaceAllUsesWith(V);
 
   // Delete PHI.
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 009847f87bce..e9828d60cd55 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,21 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -82,7 +82,8 @@ namespace {
     /// a simple branch. When there is more than one predecessor, we need to
     /// split the landing pad block after the landingpad instruction and jump
     /// to there.
-    void forwardResume(ResumeInst *RI);
+    void forwardResume(ResumeInst *RI,
+                       SmallPtrSet<LandingPadInst*, 16> &InlinedLPads);
 
     /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
     /// destination block for the given basic block, using the values for the
@@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
 /// block. When the landing pad block has only one predecessor, this is a simple
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
+void InvokeInliningInfo::forwardResume(ResumeInst *RI,
+                               SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
   BasicBlock *Dest = getInnerResumeDest();
+  LandingPadInst *OuterLPad = getLandingPadInst();
   BasicBlock *Src = RI->getParent();
 
   BranchInst::Create(Dest, Src);
@@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
 
   InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
   RI->eraseFromParent();
+
+  // Append the clauses from the outer landing pad instruction into the inlined
+  // landing pad instructions.
+  for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+         E = InlinedLPads.end(); I != E; ++I) {
+    LandingPadInst *InlinedLPad = *I;
+    for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
+         OuterIdx != OuterNum; ++OuterIdx)
+      InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+  }
 }
 
 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
 
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
-  // rewrite.  If the code doesn't have calls or unwinds, we know there is
-  // nothing to rewrite.
-  if (!InlinedCodeInfo.ContainsCalls) {
-    // Now that everything is happy, we have one final detail.  The PHI nodes in
-    // the exception destination block still have entries due to the original
-    // invoke instruction.  Eliminate these entries (which might even delete the
-    // PHI node) now.
-    InvokeDest->removePredecessor(II->getParent());
-    return;
-  }
-
+  // rewrite.
   InvokeInliningInfo Invoke(II);
-  
+
+  // Get all of the inlined landing pad instructions.
+  SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
+  for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
+      InlinedLPads.insert(II->getLandingPadInst());
+
   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
     if (InlinedCodeInfo.ContainsCalls)
       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
@@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
         continue;
       }
 
+    // Forward any resumes that are remaining here.
     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
-      Invoke.forwardResume(RI);
+      Invoke.forwardResume(RI, InlinedLPads);
   }
 
   // Now that everything is happy, we have one final detail.  The PHI nodes in
   // the exception destination block still have entries due to the original
-  // invoke instruction.  Eliminate these entries (which might even delete the
+  // invoke instruction. Eliminate these entries (which might even delete the
   // PHI node) now.
   InvokeDest->removePredecessor(II->getParent());
 }
@@ -668,10 +678,29 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       if (hasLifetimeMarkers(AI))
         continue;
 
-      builder.CreateLifetimeStart(AI);
+      // Try to determine the size of the allocation.
+      ConstantInt *AllocaSize = 0;
+      if (ConstantInt *AIArraySize =
+          dyn_cast<ConstantInt>(AI->getArraySize())) {
+        if (IFI.TD) {
+          Type *AllocaType = AI->getAllocatedType();
+          uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+          uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+          assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
+          // Check that array size doesn't saturate uint64_t and doesn't
+          // overflow when it's multiplied by type size.
+          if (AllocaArraySize != ~0ULL &&
+              UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+            AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+                                          AllocaArraySize * AllocaTypeSize);
+          }
+        }
+      }
+
+      builder.CreateLifetimeStart(AI, AllocaSize);
       for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
         IRBuilder<> builder(Returns[ri]);
-        builder.CreateLifetimeEnd(AI);
+        builder.CreateLifetimeEnd(AI, AllocaSize);
       }
     }
   }
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 45c15de9437f..a020bc7398f5 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -15,9 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 55227e2714e6..3cb8ded8506a 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -15,11 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "integer-division"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IRBuilder.h"
 #include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
 
@@ -418,3 +418,107 @@ bool llvm::expandDivision(BinaryOperator *Div) {
 
   return true;
 }
+
+/// Generate code to compute the remainder of two integers of bitwidth up to 
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer 
+/// arithmetic.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+  assert((Rem->getOpcode() == Instruction::SRem ||
+          Rem->getOpcode() == Instruction::URem) &&
+          "Trying to expand remainder from a non-remainder function");
+
+  Type *RemTy = Rem->getType();
+  if (RemTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+  if (RemTyBitWidth > 32) 
+    llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+  if (RemTyBitWidth == 32) 
+    return expandRemainder(Rem);
+
+  // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+  // with 32 bit division.
+  IRBuilder<> Builder(Rem);
+
+  Value *ExtDividend;
+  Value *ExtDivisor;
+  Value *ExtRem;
+  Value *Trunc;
+  Type *Int32Ty = Builder.getInt32Ty();
+
+  if (Rem->getOpcode() == Instruction::SRem) {
+    ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+    ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+  } else {
+    ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+    ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+  }
+  Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+  Rem->replaceAllUsesWith(Trunc);
+  Rem->dropAllReferences();
+  Rem->eraseFromParent();
+
+  return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+  assert((Div->getOpcode() == Instruction::SDiv ||
+          Div->getOpcode() == Instruction::UDiv) &&
+          "Trying to expand division from a non-division function");
+
+  Type *DivTy = Div->getType();
+  if (DivTy->isVectorTy())
+    llvm_unreachable("Div over vectors not supported");
+
+  unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+  if (DivTyBitWidth > 32)
+    llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+  if (DivTyBitWidth == 32)
+    return expandDivision(Div);
+
+  // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+  // with 32 bit division.
+  IRBuilder<> Builder(Div);
+
+  Value *ExtDividend;
+  Value *ExtDivisor;
+  Value *ExtDiv;
+  Value *Trunc;
+  Type *Int32Ty = Builder.getInt32Ty();
+
+  if (Div->getOpcode() == Instruction::SDiv) {
+    ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+    ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+  } else {
+    ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+    ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);  
+  }
+  Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+  Div->replaceAllUsesWith(Trunc);
+  Div->dropAllReferences();
+  Div->eraseFromParent();
+
+  return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 5e05c83c3566..2d1b166c2101 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -29,17 +29,17 @@
 
 #define DEBUG_TYPE "lcssa"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a954d82c05bf..be80d34d960f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -13,33 +13,34 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Operator.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -604,7 +605,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   // possible to handle such cases, but difficult: it requires checking whether
   // BB dominates Succ, which is non-trivial to calculate in the case where
   // Succ has multiple predecessors.  Also, it requires checking whether
-  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // constructing the necessary self-referential PHI node doesn't introduce any
   // conflicts; this isn't too difficult, but the previous code for doing this
   // was incorrect.
   //
@@ -928,3 +929,73 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
 
   return 0;
 }
+
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+                                      DIBuilder &Builder) {
+  DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+  if (!DDI)
+    return false;
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return false;
+
+  // Create a copy of the original DIDescriptor for user variable, appending
+  // "deref" operation to a list of address elements, as new llvm.dbg.declare
+  // will take a value storing address of the memory for variable, not
+  // alloca itself.
+  Type *Int64Ty = Type::getInt64Ty(AI->getContext());
+  SmallVector<Value*, 4> NewDIVarAddress;
+  if (DIVar.hasComplexAddress()) {
+    for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) {
+      NewDIVarAddress.push_back(
+          ConstantInt::get(Int64Ty, DIVar.getAddrElement(i)));
+    }
+  }
+  NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref));
+  DIVariable NewDIVar = Builder.createComplexVariable(
+      DIVar.getTag(), DIVar.getContext(), DIVar.getName(),
+      DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(),
+      NewDIVarAddress, DIVar.getArgNumber());
+
+  // Insert llvm.dbg.declare in the same basic block as the original alloca,
+  // and remove old llvm.dbg.declare.
+  BasicBlock *BB = AI->getParent();
+  Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB);
+  DDI->eraseFromParent();
+  return true;
+}
+
+bool llvm::removeUnreachableBlocks(Function &F) {
+  SmallPtrSet<BasicBlock*, 16> Reachable;
+  SmallVector<BasicBlock*, 128> Worklist;
+  Worklist.push_back(&F.getEntryBlock());
+  Reachable.insert(&F.getEntryBlock());
+  do {
+    BasicBlock *BB = Worklist.pop_back_val();
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      if (Reachable.insert(*SI))
+        Worklist.push_back(*SI);
+  } while (!Worklist.empty());
+
+  if (Reachable.size() == F.size())
+    return false;
+
+  assert(Reachable.size() < F.size());
+  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
+    if (Reachable.count(I))
+      continue;
+
+    for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+      if (Reachable.count(*SI))
+        (*SI)->removePredecessor(I);
+    I->dropAllReferences();
+  }
+
+  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
+
+  return true;
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 9d9e20166564..37819cc9c917 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -39,26 +39,26 @@
 
 #define DEBUG_TYPE "loop-simplify"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 20237500c37f..cb581b3d13b9 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -18,12 +18,12 @@
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 67e17f4ca8e8..d801d5f2c2a4 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -23,12 +23,12 @@
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 02bdcda39194..4aee8ff51a4e 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lower-expect-intrinsic"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <vector>
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 930555424ded..9ec84d730e46 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -36,19 +36,19 @@
 
 #define DEBUG_TYPE "lowerinvoke"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <csetjmp>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 1547439b5c6b..955b853533b0 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,16 +14,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index f4ca81af6d87..61b3965d8f11 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -14,12 +14,12 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumPromoted, "Number of alloca's promoted");
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 233bc12d3cfd..3716f586ff06 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -13,16 +13,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/TypeFinder.h"
-
 using namespace llvm;
 
 namespace {
@@ -37,7 +36,7 @@ namespace {
       next = seed;
     }
 
-    int rand(void) {
+    int rand() {
       next = next * 1103515245 + 12345;
       return (unsigned int)(next / 65536) % 32768;
     }
@@ -73,13 +72,23 @@ namespace {
 
       // Rename all aliases
       for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
-           AI != AE; ++AI)
-        AI->setName("alias");
+           AI != AE; ++AI) {
+        StringRef Name = AI->getName();
+        if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+          continue;
 
+        AI->setName("alias");
+      }
+      
       // Rename all global variables
       for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
-           GI != GE; ++GI)
+           GI != GE; ++GI) {
+        StringRef Name = GI->getName();
+        if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+          continue;
+
         GI->setName("global");
+      }
 
       // Rename all struct types
       TypeFinder StructTypes;
@@ -96,6 +105,10 @@ namespace {
       // Rename all functions
       for (Module::iterator FI = M.begin(), FE = M.end();
            FI != FE; ++FI) {
+        StringRef Name = FI->getName();
+        if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+          continue;
+
         FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
         runOnFunction(*FI);
       }
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index dbcf3b2fe268..d090b487213b 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 558de9d12e6c..de335ec1a05c 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -27,26 +27,26 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Metadata.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <queue>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 72d4199a2a69..9d90fbe5654a 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ssaupdater"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
@@ -25,7 +26,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c767da624e19..681bf9c2b7a4 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -13,19 +13,6 @@
 
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -33,18 +20,31 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/NoFolder.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
-#include <set>
 #include <map>
+#include <set>
 using namespace llvm;
 
 static cl::opt<unsigned>
@@ -82,8 +82,8 @@ namespace {
   };
 
 class SimplifyCFGOpt {
+  const TargetTransformInfo &TTI;
   const DataLayout *const TD;
-  const TargetTransformInfo *const TTI;
 
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -103,8 +103,8 @@ class SimplifyCFGOpt {
   bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
 
 public:
-  SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti)
-      : TD(td), TTI(tti) {}
+  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
+      : TTI(TTI), TD(TD) {}
   bool run(BasicBlock *BB);
 };
 }
@@ -858,7 +858,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
 
       if (PredHasWeights) {
         GetBranchWeights(PTI, Weights);
-        // branch-weight metadata is inconsistant here.
+        // branch-weight metadata is inconsistent here.
         if (Weights.size() != 1 + PredCases.size())
           PredHasWeights = SuccHasWeights = false;
       } else if (SuccHasWeights)
@@ -870,7 +870,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       SmallVector<uint64_t, 8> SuccWeights;
       if (SuccHasWeights) {
         GetBranchWeights(TI, SuccWeights);
-        // branch-weight metadata is inconsistant here.
+        // branch-weight metadata is inconsistent here.
         if (SuccWeights.size() != 1 + BBCases.size())
           PredHasWeights = SuccHasWeights = false;
       } else if (PredHasWeights)
@@ -967,8 +967,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
         for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
                                     PTIHandled.begin(),
                E = PTIHandled.end(); I != E; ++I) {
-          if (PredHasWeights || SuccHasWeights) 
-            Weights.push_back(WeightsForHandled[*I]); 
+          if (PredHasWeights || SuccHasWeights)
+            Weights.push_back(WeightsForHandled[*I]);
           PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
           NewSuccessors.push_back(BBDefault);
         }
@@ -1193,7 +1193,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
        I != E; ++I) {
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
       Value *BB1V = PN->getIncomingValueForBlock(BB1);
-      Value *BB2V = PN->getIncomingValueForBlock(BB2); 
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
       MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
     } else {
       FirstNonPhiInBBEnd = &*I;
@@ -1202,7 +1202,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   }
   if (!FirstNonPhiInBBEnd)
     return false;
-  
+
 
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks.  We scan backward for obviously identical
@@ -1332,149 +1332,180 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   return Changed;
 }
 
-/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
-/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
-/// (for now, restricted to a single instruction that's side effect free) from
-/// the BB1 into the branch block to speculatively execute it.
+/// \brief Speculate a conditional basic block flattening the CFG.
+///
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
 ///
-/// Turn
-/// BB:
-///     %t1 = icmp
-///     br i1 %t1, label %BB1, label %BB2
-/// BB1:
-///     %t3 = add %t2, c
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+///   BB:
+///     %cmp = icmp ult %x, %y
+///     br i1 %cmp, label %EndBB, label %ThenBB
+///   ThenBB:
+///     %sub = sub %x, %y
 ///     br label BB2
-/// BB2:
-/// =>
-/// BB:
-///     %t1 = icmp
-///     %t4 = add %t2, c
-///     %t3 = select i1 %t1, %t2, %t3
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
-  // Only speculatively execution a single instruction (not counting the
-  // terminator) for now.
-  Instruction *HInst = NULL;
-  Instruction *Term = BB1->getTerminator();
-  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+///   EndBB:
+///     %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+///     ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+///   BB:
+///     %cmp = icmp ult %x, %y
+///     %sub = sub %x, %y
+///     %cond = select i1 %cmp, 0, %sub
+///     ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+  // Be conservative for now. FP select instruction can often be expensive.
+  Value *BrCond = BI->getCondition();
+  if (isa<FCmpInst>(BrCond))
+    return false;
+
+  BasicBlock *BB = BI->getParent();
+  BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+
+  // If ThenBB is actually on the false edge of the conditional branch, remember
+  // to swap the select operands later.
+  bool Invert = false;
+  if (ThenBB != BI->getSuccessor(0)) {
+    assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+    Invert = true;
+  }
+  assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+  // Keep a count of how many times instructions are used within CondBB when
+  // they are candidates for sinking into CondBB. Specifically:
+  // - They are defined in BB, and
+  // - They have no side effects, and
+  // - All of their uses are in CondBB.
+  SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+  unsigned SpeculationCost = 0;
+  for (BasicBlock::iterator BBI = ThenBB->begin(),
+                            BBE = llvm::prior(ThenBB->end());
        BBI != BBE; ++BBI) {
     Instruction *I = BBI;
     // Skip debug info.
-    if (isa<DbgInfoIntrinsic>(I)) continue;
-    if (I == Term) break;
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
 
-    if (HInst)
+    // Only speculatively execution a single instruction (not counting the
+    // terminator) for now.
+    ++SpeculationCost;
+    if (SpeculationCost > 1)
       return false;
-    HInst = I;
-  }
-
-  BasicBlock *BIParent = BI->getParent();
 
-  // Check the instruction to be hoisted, if there is one.
-  if (HInst) {
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(HInst))
+    if (!isSafeToSpeculativelyExecute(I))
       return false;
-    if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold)
+    if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
       return false;
 
     // Do not hoist the instruction if any of its operands are defined but not
     // used in this BB. The transformation will prevent the operand from
     // being sunk into the use block.
-    for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+    for (User::op_iterator i = I->op_begin(), e = I->op_end();
          i != e; ++i) {
       Instruction *OpI = dyn_cast<Instruction>(*i);
-      if (OpI && OpI->getParent() == BIParent &&
-          !OpI->mayHaveSideEffects() &&
-          !OpI->isUsedInBasicBlock(BIParent))
-        return false;
+      if (!OpI || OpI->getParent() != BB ||
+          OpI->mayHaveSideEffects())
+        continue; // Not a candidate for sinking.
+
+      ++SinkCandidateUseCounts[OpI];
     }
   }
 
-  // Be conservative for now. FP select instruction can often be expensive.
-  Value *BrCond = BI->getCondition();
-  if (isa<FCmpInst>(BrCond))
-    return false;
-
-  // If BB1 is actually on the false edge of the conditional branch, remember
-  // to swap the select operands later.
-  bool Invert = false;
-  if (BB1 != BI->getSuccessor(0)) {
-    assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
-    Invert = true;
-  }
+  // Consider any sink candidates which are only used in CondBB as costs for
+  // speculation. Note, while we iterate over a DenseMap here, we are summing
+  // and so iteration order isn't significant.
+  for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
+           SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+       I != E; ++I)
+    if (I->first->getNumUses() == I->second) {
+      ++SpeculationCost;
+      if (SpeculationCost > 1)
+        return false;
+    }
 
-  // Collect interesting PHIs, and scan for hazards.
-  SmallSetVector<std::pair<Value *, Value *>, 4> PHIs;
-  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
-  for (BasicBlock::iterator I = BB2->begin();
+  // Check that the PHI nodes can be converted to selects.
+  bool HaveRewritablePHIs = false;
+  for (BasicBlock::iterator I = EndBB->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    Value *BB1V = PN->getIncomingValueForBlock(BB1);
-    Value *BIParentV = PN->getIncomingValueForBlock(BIParent);
+    Value *OrigV = PN->getIncomingValueForBlock(BB);
+    Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
 
     // Skip PHIs which are trivial.
-    if (BB1V == BIParentV)
+    if (ThenV == OrigV)
       continue;
 
-    // Check for saftey.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) {
-      // An unfolded ConstantExpr could end up getting expanded into
-      // Instructions. Don't speculate this and another instruction at
-      // the same time.
-      if (HInst)
-        return false;
-      if (!isSafeToSpeculativelyExecute(CE))
-        return false;
-      if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
-        return false;
-    }
+    HaveRewritablePHIs = true;
+    ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
+    if (!CE)
+      continue; // Known safe and cheap.
 
-    // Ok, we may insert a select for this PHI.
-    PHIs.insert(std::make_pair(BB1V, BIParentV));
+    if (!isSafeToSpeculativelyExecute(CE))
+      return false;
+    if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+      return false;
+
+    // Account for the cost of an unfolded ConstantExpr which could end up
+    // getting expanded into Instructions.
+    // FIXME: This doesn't account for how many operations are combined in the
+    // constant expression.
+    ++SpeculationCost;
+    if (SpeculationCost > 1)
+      return false;
   }
 
   // If there are no PHIs to process, bail early. This helps ensure idempotence
   // as well.
-  if (PHIs.empty())
+  if (!HaveRewritablePHIs)
     return false;
 
   // If we get here, we can hoist the instruction and if-convert.
-  DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
+  DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
-  // Hoist the instruction.
-  if (HInst)
-    BIParent->getInstList().splice(BI, BB1->getInstList(), HInst);
+  // Hoist the instructions.
+  BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
+                           llvm::prior(ThenBB->end()));
 
   // Insert selects and rewrite the PHI operands.
   IRBuilder<true, NoFolder> Builder(BI);
-  for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
-    Value *TrueV = PHIs[i].first;
-    Value *FalseV = PHIs[i].second;
+  for (BasicBlock::iterator I = EndBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned OrigI = PN->getBasicBlockIndex(BB);
+    unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
+    Value *OrigV = PN->getIncomingValue(OrigI);
+    Value *ThenV = PN->getIncomingValue(ThenI);
+
+    // Skip PHIs which are trivial.
+    if (OrigV == ThenV)
+      continue;
 
     // Create a select whose true value is the speculatively executed value and
-    // false value is the previously determined FalseV.
-    SelectInst *SI;
+    // false value is the preexisting value. Swap them if the branch
+    // destinations were inverted.
+    Value *TrueV = ThenV, *FalseV = OrigV;
     if (Invert)
-      SI = cast<SelectInst>
-        (Builder.CreateSelect(BrCond, FalseV, TrueV,
-                              FalseV->getName() + "." + TrueV->getName()));
-    else
-      SI = cast<SelectInst>
-        (Builder.CreateSelect(BrCond, TrueV, FalseV,
-                              TrueV->getName() + "." + FalseV->getName()));
-
-    // Make the PHI node use the select for all incoming values for "then" and
-    // "if" blocks.
-    for (BasicBlock::iterator I = BB2->begin();
-         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      unsigned BB1I = PN->getBasicBlockIndex(BB1);
-      unsigned BIParentI = PN->getBasicBlockIndex(BIParent);
-      Value *BB1V = PN->getIncomingValue(BB1I);
-      Value *BIParentV = PN->getIncomingValue(BIParentI);
-      if (TrueV == BB1V && FalseV == BIParentV) {
-        PN->setIncomingValue(BB1I, SI);
-        PN->setIncomingValue(BIParentI, SI);
-      }
-    }
+      std::swap(TrueV, FalseV);
+    Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
+                                    TrueV->getName() + "." + FalseV->getName());
+    PN->setIncomingValue(OrigI, V);
+    PN->setIncomingValue(ThenI, V);
   }
 
   ++NumSpeculations;
@@ -2522,9 +2553,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 ///
 /// We prefer to split the edge to 'end' so that there is a true/false entry to
 /// the PHI, merging the third icmp into the switch.
-static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
-                                                  const DataLayout *TD,
-                                                  IRBuilder<> &Builder) {
+static bool TryToSimplifyUncondBranchWithICmpInIt(
+    ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
+    const DataLayout *TD) {
   BasicBlock *BB = ICI->getParent();
 
   // If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2557,7 +2588,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
       ICI->eraseFromParent();
     }
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // Ok, the block is reachable from the default dest.  If the constant we're
@@ -2573,7 +2604,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2758,9 +2789,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
       return false;
 
   // Turn all invokes that unwind here into calls and delete the basic block.
+  bool InvokeRequiresTableEntry = false;
+  bool Changed = false;
   for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
     InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+    if (II->hasFnAttr(Attribute::UWTable)) {
+      // Don't remove an `invoke' instruction if the ABI requires an entry into
+      // the table.
+      InvokeRequiresTableEntry = true;
+      continue;
+    }
+
     SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
     // Insert a call instruction before the invoke.
     CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
     Call->takeName(II);
@@ -2780,11 +2822,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
 
     // Finally, delete the invoke instruction!
     II->eraseFromParent();
+    Changed = true;
   }
 
-  // The landingpad is now unreachable.  Zap it.
-  BB->eraseFromParent();
-  return true;
+  if (!InvokeRequiresTableEntry)
+    // The landingpad is now unreachable.  Zap it.
+    BB->eraseFromParent();
+
+  return Changed;
 }
 
 bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3382,7 +3427,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
                                      ConstantInt *Offset,
                const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
                                      Constant *DefaultValue,
-                                     const DataLayout *TD) {
+                                     const DataLayout *TD)
+    : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
   assert(Values.size() && "Can't build lookup table without values!");
   assert(TableSize >= Values.size() && "Can't fit values in table!");
 
@@ -3510,23 +3556,44 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
 /// types of the results.
 static bool ShouldBuildLookupTable(SwitchInst *SI,
                                    uint64_t TableSize,
+                                   const TargetTransformInfo &TTI,
                                    const DataLayout *TD,
                             const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
-  // The table density should be at least 40%. This is the same criterion as for
-  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
-  // FIXME: Find the best cut-off.
   if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
     return false; // TableSize overflowed, or mul below might overflow.
-  if (SI->getNumCases() * 10 >= TableSize * 4)
-    return true;
 
-  // If each table would fit in a register, we should build it anyway.
+  bool AllTablesFitInRegister = true;
+  bool HasIllegalType = false;
   for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
        E = ResultTypes.end(); I != E; ++I) {
-    if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
-      return false;
+    Type *Ty = I->second;
+
+    // Saturate this flag to true.
+    HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+
+    // Saturate this flag to false.
+    AllTablesFitInRegister = AllTablesFitInRegister &&
+      SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+
+    // If both flags saturate, we're done. NOTE: This *only* works with
+    // saturating flags, and all flags have to saturate first due to the
+    // non-deterministic behavior of iterating over a dense map.
+    if (HasIllegalType && !AllTablesFitInRegister)
+      break;
   }
-  return true;
+
+  // If each table would fit in a register, we should build it anyway.
+  if (AllTablesFitInRegister)
+    return true;
+
+  // Don't build a table that doesn't fit in-register if it has illegal types.
+  if (HasIllegalType)
+    return false;
+
+  // The table density should be at least 40%. This is the same criterion as for
+  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+  // FIXME: Find the best cut-off.
+  return SI->getNumCases() * 10 >= TableSize * 4;
 }
 
 /// SwitchToLookupTable - If the switch is only used to initialize one or more
@@ -3534,13 +3601,12 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
 /// replace the switch with lookup tables.
 static bool SwitchToLookupTable(SwitchInst *SI,
                                 IRBuilder<> &Builder,
-                                const DataLayout* TD,
-                                const TargetTransformInfo *TTI) {
+                                const TargetTransformInfo &TTI,
+                                const DataLayout* TD) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
 
   // Only build lookup table when we have a target that supports it.
-  if (!TTI || !TTI->getScalarTargetTransformInfo() ||
-      !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
+  if (!TTI.shouldBuildLookupTables())
     return false;
 
   // FIXME: If the switch is too sparse for a lookup table, perhaps we could
@@ -3607,7 +3673,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
 
   APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
   uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
-  if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
+  if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
     return false;
 
   // Create the BB that does the lookups.
@@ -3672,12 +3738,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     // see if that predecessor totally determines the outcome of this switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
     Value *Cond = SI->getCondition();
     if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
       if (SimplifySwitchOnSelect(SI, Select))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
     // If the block only contains the switch, see if we can fold the block
     // away into any preds.
@@ -3687,22 +3753,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       ++BBI;
     if (SI == &*BBI)
       if (FoldValueComparisonIntoPredecessors(SI, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   // Remove unreachable cases.
   if (EliminateDeadSwitchCases(SI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   if (ForwardSwitchConditionToPHI(SI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
-  if (SwitchToLookupTable(SI, Builder, TD, TTI))
-    return SimplifyCFG(BB) | true;
+  if (SwitchToLookupTable(SI, Builder, TTI, TD))
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   return false;
 }
@@ -3739,7 +3805,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
 
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
-      return SimplifyCFG(BB) | true;
+      return SimplifyCFG(BB, TTI, TD) | true;
   }
   return Changed;
 }
@@ -3763,7 +3829,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
       if (I->isTerminator() &&
-          TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
         return true;
     }
 
@@ -3772,7 +3838,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
   if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
   return false;
 }
 
@@ -3787,7 +3853,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     // switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
@@ -3797,14 +3863,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       ++I;
     if (&*I == BI) {
       if (FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
     } else if (&*I == cast<Instruction>(BI->getCondition())){
       ++I;
       // Ignore dbg intrinsics.
       while (isa<DbgInfoIntrinsic>(I))
         ++I;
       if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
     }
   }
 
@@ -3816,7 +3882,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
   if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB) | true;
+    return SimplifyCFG(BB, TTI, TD) | true;
 
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
@@ -3825,7 +3891,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
     if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
       if (HoistThenElseCodeToIf(BI))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to successor #1.
@@ -3833,7 +3899,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (Succ0TI->getNumSuccessors() == 1 &&
           Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
         if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
-          return SimplifyCFG(BB) | true;
+          return SimplifyCFG(BB, TTI, TD) | true;
     }
   } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
     // If Successor #0 has multiple preds, we may be able to conditionally
@@ -3842,7 +3908,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     if (Succ1TI->getNumSuccessors() == 1 &&
         Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
       if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
   }
 
   // If this is a branch on a phi node in the current block, thread control
@@ -3850,14 +3916,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
       if (FoldCondBranchOnPHI(BI, TD))
-        return SimplifyCFG(BB) | true;
+        return SimplifyCFG(BB, TTI, TD) | true;
 
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
         if (SimplifyCondBranchToCondBranch(PBI, BI))
-          return SimplifyCFG(BB) | true;
+          return SimplifyCFG(BB, TTI, TD) | true;
 
   return false;
 }
@@ -3892,11 +3958,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
 
     // Load from null is undefined.
     if (LoadInst *LI = dyn_cast<LoadInst>(Use))
-      return LI->getPointerAddressSpace() == 0;
+      if (!LI->isVolatile())
+        return LI->getPointerAddressSpace() == 0;
 
     // Store to null is undefined.
     if (StoreInst *SI = dyn_cast<StoreInst>(Use))
-      return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+      if (!SI->isVolatile())
+        return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
   }
   return false;
 }
@@ -3998,7 +4066,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
 /// of the CFG.  It returns true if a modification was made.
 ///
-bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD,
-                       const TargetTransformInfo *TTI) {
-  return SimplifyCFGOpt(TD, TTI).run(BB);
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+                       const DataLayout *TD) {
+  return SimplifyCFGOpt(TTI, TD).run(BB);
 }
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 110f3808573e..41c207c3d5cb 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -15,18 +15,18 @@
 
 #define DEBUG_TYPE "indvars"
 
-#include "llvm/Instructions.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 65353dc46037..f9687e4d5890 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -15,17 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c3ea63852fed..c231704414fc 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -15,12 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 
@@ -48,6 +53,10 @@ public:
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
     =0;
 
+  /// ignoreCallingConv - Returns false if this transformation could possibly
+  /// change the calling convention.
+  virtual bool ignoreCallingConv() { return false; }
+
   Value *optimizeCall(CallInst *CI, const DataLayout *TD,
                       const TargetLibraryInfo *TLI,
                       const LibCallSimplifier *LCS, IRBuilder<> &B) {
@@ -59,7 +68,7 @@ public:
       Context = &CI->getCalledFunction()->getContext();
 
     // We never change the calling convention.
-    if (CI->getCallingConv() != llvm::CallingConv::C)
+    if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
       return NULL;
 
     return callOptimizer(CI->getCalledFunction(), CI, B);
@@ -100,6 +109,15 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
   return true;
 }
 
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+       it != e; ++it) {
+    if ((*it)->getType()->isFloatingPointTy())
+      return true;
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Fortified Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -713,6 +731,7 @@ struct StrNCpyOpt : public LibCallOptimization {
 };
 
 struct StrLenOpt : public LibCallOptimization {
+  virtual bool ignoreCallingConv() { return true; }
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
@@ -781,8 +800,7 @@ struct StrToOpt : public LibCallOptimization {
     if (isa<ConstantPointerNull>(EndPtr)) {
       // With a null EndPtr, this function won't capture the main argument.
       // It would be readonly too, except that it still may write to errno.
-      CI->addAttribute(1, Attributes::get(Callee->getContext(),
-                                          Attributes::NoCapture));
+      CI->addAttribute(1, Attribute::NoCapture);
     }
 
     return 0;
@@ -951,7 +969,14 @@ struct MemCmpOpt : public LibCallOptimization {
       // Make sure we're not reading out-of-bounds memory.
       if (Len > LHSStr.size() || Len > RHSStr.size())
         return 0;
-      uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      // Fold the memcmp and normalize the result.  This way we get consistent
+      // results across multiple platforms.
+      uint64_t Ret = 0;
+      int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      if (Cmp < 0)
+        Ret = -1;
+      else if (Cmp > 0)
+        Ret = 1;
       return ConstantInt::get(CI->getType(), Ret);
     }
 
@@ -1016,6 +1041,630 @@ struct MemSetOpt : public LibCallOptimization {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+  bool CheckRetType;
+  UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
+      return 0;
+
+    if (CheckRetType) {
+      // Check if all the uses for function like 'sin' are converted to float.
+      for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+          ++UseI) {
+        FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+        if (Cast == 0 || !Cast->getType()->isFloatTy())
+          return 0;
+      }
+    }
+
+    // If this is something like 'floor((double)floatval)', convert to floorf.
+    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+      return 0;
+
+    // floor((double)floatval) -> (double)floorf(floatval)
+    Value *V = Cast->getOperand(0);
+    V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+    return B.CreateFPExt(V, B.getDoubleTy());
+  }
+};
+
+struct UnsafeFPLibCallOptimization : public LibCallOptimization {
+  bool UnsafeFPShrink;
+  UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
+    this->UnsafeFPShrink = UnsafeFPShrink;
+  }
+};
+
+struct CosOpt : public UnsafeFPLibCallOptimization {
+  CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "cos" &&
+        TLI->has(LibFunc::cosf)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+    }
+
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return Ret;
+
+    // cos(-x) -> cos(x)
+    Value *Op1 = CI->getArgOperand(0);
+    if (BinaryOperator::isFNeg(Op1)) {
+      BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+      return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+    }
+    return Ret;
+  }
+};
+
+struct PowOpt : public UnsafeFPLibCallOptimization {
+  PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "pow" &&
+        TLI->has(LibFunc::powf)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+    }
+
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return Ret;
+
+    Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+    if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+      if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
+        return Op1C;
+      if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
+        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+    }
+
+    ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+    if (Op2C == 0) return Ret;
+
+    if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
+      return ConstantFP::get(CI->getType(), 1.0);
+
+    if (Op2C->isExactlyValue(0.5)) {
+      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+      // This is faster than calling pow, and still handles negative zero
+      // and negative infinity correctly.
+      // TODO: In fast-math mode, this could be just sqrt(x).
+      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+      Value *Inf = ConstantFP::getInfinity(CI->getType());
+      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+                                         Callee->getAttributes());
+      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+                                         Callee->getAttributes());
+      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+      return Sel;
+    }
+
+    if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
+      return Op1;
+    if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
+      return B.CreateFMul(Op1, Op1, "pow2");
+    if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+                          Op1, "powrecip");
+    return 0;
+  }
+};
+
+struct Exp2Opt : public UnsafeFPLibCallOptimization {
+  Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+        TLI->has(LibFunc::exp2)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+    }
+
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return Ret;
+
+    Value *Op = CI->getArgOperand(0);
+    // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
+    // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
+    Value *LdExpArg = 0;
+    if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+    } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+    }
+
+    if (LdExpArg) {
+      const char *Name;
+      if (Op->getType()->isFloatTy())
+        Name = "ldexpf";
+      else if (Op->getType()->isDoubleTy())
+        Name = "ldexp";
+      else
+        Name = "ldexpl";
+
+      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+      if (!Op->getType()->isFloatTy())
+        One = ConstantExpr::getFPExtend(One, Op->getType());
+
+      Module *M = Caller->getParent();
+      Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                             Op->getType(),
+                                             B.getInt32Ty(), NULL);
+      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+        CI->setCallingConv(F->getCallingConv());
+
+      return CI;
+    }
+    return Ret;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FFSOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 1 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        !FT->getParamType(0)->isIntegerTy())
+      return 0;
+
+    Value *Op = CI->getArgOperand(0);
+
+    // Constant fold.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+      if (CI->isZero()) // ffs(0) -> 0.
+        return B.getInt32(0);
+      // ffs(c) -> cttz(c)+1
+      return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+    }
+
+    // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+    Type *ArgType = Op->getType();
+    Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+                                         Intrinsic::cttz, ArgType);
+    Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+    V = B.CreateIntCast(V, B.getInt32Ty(), false);
+
+    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+    return B.CreateSelect(Cond, V, B.getInt32(0));
+  }
+};
+
+struct AbsOpt : public LibCallOptimization {
+  virtual bool ignoreCallingConv() { return true; }
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require integer(integer) where the types agree.
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        FT->getParamType(0) != FT->getReturnType())
+      return 0;
+
+    // abs(x) -> x >s -1 ? x : -x
+    Value *Op = CI->getArgOperand(0);
+    Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+                                 "ispos");
+    Value *Neg = B.CreateNeg(Op, "neg");
+    return B.CreateSelect(Pos, Op, Neg);
+  }
+};
+
+struct IsDigitOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isdigit(c) -> (c-'0') <u 10
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+    Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+struct IsAsciiOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isascii(c) -> c <u 128
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+struct ToAsciiOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // We require i32(i32)
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // toascii(c) -> c & 0x7f
+    return B.CreateAnd(CI->getArgOperand(0),
+                       ConstantInt::get(CI->getType(),0x7F));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct PrintFOpt : public LibCallOptimization {
+  Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+      return 0;
+
+    // Empty format string -> noop.
+    if (FormatStr.empty())  // Tolerate printf's declared void.
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 0);
+
+    // Do not do any of the following transformations if the printf return value
+    // is used, in general the printf return value is not compatible with either
+    // putchar() or puts().
+    if (!CI->use_empty())
+      return 0;
+
+    // printf("x") -> putchar('x'), even for '%'.
+    if (FormatStr.size() == 1) {
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("foo\n") --> puts("foo")
+    if (FormatStr[FormatStr.size()-1] == '\n' &&
+        FormatStr.find('%') == std::string::npos) {  // no format characters.
+      // Create a string literal with no \n on it.  We expect the constant merge
+      // pass to be run after this pass, to merge duplicate strings.
+      FormatStr = FormatStr.drop_back();
+      Value *GV = B.CreateGlobalString(FormatStr, "str");
+      Value *NewCI = EmitPutS(GV, B, TD, TLI);
+      return (CI->use_empty() || !NewCI) ?
+              NewCI :
+              ConstantInt::get(CI->getType(), FormatStr.size()+1);
+    }
+
+    // Optimize specific format strings.
+    // printf("%c", chr) --> putchar(chr)
+    if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isIntegerTy()) {
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("%s\n", str) --> puts(str)
+    if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isPointerTy()) {
+      return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // printf(format, ...) -> iprintf(format, ...) if no floating point
+    // arguments.
+    if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *IPrintFFn =
+        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(IPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct SPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // If we just have a format string (nothing else crazy) transform it.
+    if (CI->getNumArgOperands() == 2) {
+      // Make sure there's no % in the constant array.  We could try to handle
+      // %% -> % in the future if we cared.
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')
+          return 0; // we found a format specifier, bail out.
+
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                                      FormatStr.size() + 1), 1);   // nul byte.
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+      B.CreateStore(V, Ptr);
+      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+      B.CreateStore(B.getInt8(0), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+      if (!Len)
+        return 0;
+      Value *IncLen = B.CreateAdd(Len,
+                                  ConstantInt::get(Len->getType(), 1),
+                                  "leninc");
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+      // The sprintf result is the unincremented number of bytes in the string.
+      return B.CreateIntCast(Len, CI->getType(), false);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct FPrintFOpt : public LibCallOptimization {
+  Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // All the optimizations depend on the format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+    if (CI->getNumArgOperands() == 2) {
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
+          return 0; // We found a format specifier.
+
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+                                ConstantInt::get(TD->getIntPtrType(*Context),
+                                                 FormatStr.size()),
+                                CI->getArgOperand(0), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // fprintf(F, "%c", chr) --> fputc(chr, F)
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+                               TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+    }
+
+    if (FormatStr[1] == 's') {
+      // fprintf(F, "%s", str) --> fputs(str, F)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+        return 0;
+      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct FWriteOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require a pointer, an integer, an integer, a pointer, returning integer.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        !FT->getParamType(3)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    // Get the element size and count.
+    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!SizeC || !CountC) return 0;
+    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+    // If this is writing zero records, remove the call (it's a noop).
+    if (Bytes == 0)
+      return ConstantInt::get(CI->getType(), 0);
+
+    // If this is writing one byte, turn it into fputc.
+    // This optimisation is only valid, if the return value is unused.
+    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+    }
+
+    return 0;
+  }
+};
+
+struct FPutsOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    // Require two pointers.  Also, we can't optimize if return value is used.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !CI->use_empty())
+      return 0;
+
+    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+    uint64_t Len = GetStringLength(CI->getArgOperand(0));
+    if (!Len) return 0;
+    // Known to have no uses (see above).
+    return EmitFWrite(CI->getArgOperand(0),
+                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+                      CI->getArgOperand(1), B, TD, TLI);
+  }
+};
+
+struct PutsOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    // Check for a constant string.
+    StringRef Str;
+    if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+      return 0;
+
+    if (Str.empty() && CI->use_empty()) {
+      // puts("") -> putchar('\n')
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    return 0;
+  }
+};
+
 } // End anonymous namespace.
 
 namespace llvm {
@@ -1024,98 +1673,252 @@ class LibCallSimplifierImpl {
   const DataLayout *TD;
   const TargetLibraryInfo *TLI;
   const LibCallSimplifier *LCS;
-  StringMap<LibCallOptimization*> Optimizations;
-
-  // Fortified library call optimizations.
-  MemCpyChkOpt MemCpyChk;
-  MemMoveChkOpt MemMoveChk;
-  MemSetChkOpt MemSetChk;
-  StrCpyChkOpt StrCpyChk;
-  StpCpyChkOpt StpCpyChk;
-  StrNCpyChkOpt StrNCpyChk;
-
-  // String library call optimizations.
-  StrCatOpt StrCat;
-  StrNCatOpt StrNCat;
-  StrChrOpt StrChr;
-  StrRChrOpt StrRChr;
-  StrCmpOpt StrCmp;
-  StrNCmpOpt StrNCmp;
-  StrCpyOpt StrCpy;
-  StpCpyOpt StpCpy;
-  StrNCpyOpt StrNCpy;
-  StrLenOpt StrLen;
-  StrPBrkOpt StrPBrk;
-  StrToOpt StrTo;
-  StrSpnOpt StrSpn;
-  StrCSpnOpt StrCSpn;
-  StrStrOpt StrStr;
-
-  // Memory library call optimizations.
-  MemCmpOpt MemCmp;
-  MemCpyOpt MemCpy;
-  MemMoveOpt MemMove;
-  MemSetOpt MemSet;
-
-  void initOptimizations();
-  void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
+  bool UnsafeFPShrink;
+
+  // Math library call optimizations.
+  CosOpt Cos;
+  PowOpt Pow;
+  Exp2Opt Exp2;
 public:
   LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
-                        const LibCallSimplifier *LCS) {
+                        const LibCallSimplifier *LCS,
+                        bool UnsafeFPShrink = false)
+    : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
     this->TD = TD;
     this->TLI = TLI;
     this->LCS = LCS;
+    this->UnsafeFPShrink = UnsafeFPShrink;
   }
 
   Value *optimizeCall(CallInst *CI);
+  LibCallOptimization *lookupOptimization(CallInst *CI);
+  bool hasFloatVersion(StringRef FuncName);
 };
 
-void LibCallSimplifierImpl::initOptimizations() {
-  // Fortified library call optimizations.
-  Optimizations["__memcpy_chk"] = &MemCpyChk;
-  Optimizations["__memmove_chk"] = &MemMoveChk;
-  Optimizations["__memset_chk"] = &MemSetChk;
-  Optimizations["__strcpy_chk"] = &StrCpyChk;
-  Optimizations["__stpcpy_chk"] = &StpCpyChk;
-  Optimizations["__strncpy_chk"] = &StrNCpyChk;
-  Optimizations["__stpncpy_chk"] = &StrNCpyChk;
-
-  // String library call optimizations.
-  addOpt(LibFunc::strcat, &StrCat);
-  addOpt(LibFunc::strncat, &StrNCat);
-  addOpt(LibFunc::strchr, &StrChr);
-  addOpt(LibFunc::strrchr, &StrRChr);
-  addOpt(LibFunc::strcmp, &StrCmp);
-  addOpt(LibFunc::strncmp, &StrNCmp);
-  addOpt(LibFunc::strcpy, &StrCpy);
-  addOpt(LibFunc::stpcpy, &StpCpy);
-  addOpt(LibFunc::strncpy, &StrNCpy);
-  addOpt(LibFunc::strlen, &StrLen);
-  addOpt(LibFunc::strpbrk, &StrPBrk);
-  addOpt(LibFunc::strtol, &StrTo);
-  addOpt(LibFunc::strtod, &StrTo);
-  addOpt(LibFunc::strtof, &StrTo);
-  addOpt(LibFunc::strtoul, &StrTo);
-  addOpt(LibFunc::strtoll, &StrTo);
-  addOpt(LibFunc::strtold, &StrTo);
-  addOpt(LibFunc::strtoull, &StrTo);
-  addOpt(LibFunc::strspn, &StrSpn);
-  addOpt(LibFunc::strcspn, &StrCSpn);
-  addOpt(LibFunc::strstr, &StrStr);
-
-  // Memory library call optimizations.
-  addOpt(LibFunc::memcmp, &MemCmp);
-  addOpt(LibFunc::memcpy, &MemCpy);
-  addOpt(LibFunc::memmove, &MemMove);
-  addOpt(LibFunc::memset, &MemSet);
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+  LibFunc::Func Func;
+  SmallString<20> FloatFuncName = FuncName;
+  FloatFuncName += 'f';
+  if (TLI->getLibFunc(FloatFuncName, Func))
+    return TLI->has(Func);
+  return false;
 }
 
-Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
-  if (Optimizations.empty())
-    initOptimizations();
-
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+
+  // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+  LibFunc::Func Func;
   Function *Callee = CI->getCalledFunction();
-  LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+  StringRef FuncName = Callee->getName();
+
+  // Next check for intrinsics.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::pow:
+       return &Pow;
+    case Intrinsic::exp2:
+       return &Exp2;
+    default:
+       return 0;
+    }
+  }
+
+  // Then check for known library functions.
+  if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+    switch (Func) {
+      case LibFunc::strcat:
+        return &StrCat;
+      case LibFunc::strncat:
+        return &StrNCat;
+      case LibFunc::strchr:
+        return &StrChr;
+      case LibFunc::strrchr:
+        return &StrRChr;
+      case LibFunc::strcmp:
+        return &StrCmp;
+      case LibFunc::strncmp:
+        return &StrNCmp;
+      case LibFunc::strcpy:
+        return &StrCpy;
+      case LibFunc::stpcpy:
+        return &StpCpy;
+      case LibFunc::strncpy:
+        return &StrNCpy;
+      case LibFunc::strlen:
+        return &StrLen;
+      case LibFunc::strpbrk:
+        return &StrPBrk;
+      case LibFunc::strtol:
+      case LibFunc::strtod:
+      case LibFunc::strtof:
+      case LibFunc::strtoul:
+      case LibFunc::strtoll:
+      case LibFunc::strtold:
+      case LibFunc::strtoull:
+        return &StrTo;
+      case LibFunc::strspn:
+        return &StrSpn;
+      case LibFunc::strcspn:
+        return &StrCSpn;
+      case LibFunc::strstr:
+        return &StrStr;
+      case LibFunc::memcmp:
+        return &MemCmp;
+      case LibFunc::memcpy:
+        return &MemCpy;
+      case LibFunc::memmove:
+        return &MemMove;
+      case LibFunc::memset:
+        return &MemSet;
+      case LibFunc::cosf:
+      case LibFunc::cos:
+      case LibFunc::cosl:
+        return &Cos;
+      case LibFunc::powf:
+      case LibFunc::pow:
+      case LibFunc::powl:
+        return &Pow;
+      case LibFunc::exp2l:
+      case LibFunc::exp2:
+      case LibFunc::exp2f:
+        return &Exp2;
+      case LibFunc::ffs:
+      case LibFunc::ffsl:
+      case LibFunc::ffsll:
+        return &FFS;
+      case LibFunc::abs:
+      case LibFunc::labs:
+      case LibFunc::llabs:
+        return &Abs;
+      case LibFunc::isdigit:
+        return &IsDigit;
+      case LibFunc::isascii:
+        return &IsAscii;
+      case LibFunc::toascii:
+        return &ToAscii;
+      case LibFunc::printf:
+        return &PrintF;
+      case LibFunc::sprintf:
+        return &SPrintF;
+      case LibFunc::fprintf:
+        return &FPrintF;
+      case LibFunc::fwrite:
+        return &FWrite;
+      case LibFunc::fputs:
+        return &FPuts;
+      case LibFunc::puts:
+        return &Puts;
+      case LibFunc::ceil:
+      case LibFunc::fabs:
+      case LibFunc::floor:
+      case LibFunc::rint:
+      case LibFunc::round:
+      case LibFunc::nearbyint:
+      case LibFunc::trunc:
+        if (hasFloatVersion(FuncName))
+          return &UnaryDoubleFP;
+        return 0;
+      case LibFunc::acos:
+      case LibFunc::acosh:
+      case LibFunc::asin:
+      case LibFunc::asinh:
+      case LibFunc::atan:
+      case LibFunc::atanh:
+      case LibFunc::cbrt:
+      case LibFunc::cosh:
+      case LibFunc::exp:
+      case LibFunc::exp10:
+      case LibFunc::expm1:
+      case LibFunc::log:
+      case LibFunc::log10:
+      case LibFunc::log1p:
+      case LibFunc::log2:
+      case LibFunc::logb:
+      case LibFunc::sin:
+      case LibFunc::sinh:
+      case LibFunc::sqrt:
+      case LibFunc::tan:
+      case LibFunc::tanh:
+        if (UnsafeFPShrink && hasFloatVersion(FuncName))
+         return &UnsafeUnaryDoubleFP;
+        return 0;
+      case LibFunc::memcpy_chk:
+        return &MemCpyChk;
+      default:
+        return 0;
+      }
+  }
+
+  // Finally check for fortified library calls.
+  if (FuncName.endswith("_chk")) {
+    if (FuncName == "__memmove_chk")
+      return &MemMoveChk;
+    else if (FuncName == "__memset_chk")
+      return &MemSetChk;
+    else if (FuncName == "__strcpy_chk")
+      return &StrCpyChk;
+    else if (FuncName == "__stpcpy_chk")
+      return &StpCpyChk;
+    else if (FuncName == "__strncpy_chk")
+      return &StrNCpyChk;
+    else if (FuncName == "__stpncpy_chk")
+      return &StrNCpyChk;
+  }
+
+  return 0;
+
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+  LibCallOptimization *LCO = lookupOptimization(CI);
   if (LCO) {
     IRBuilder<> Builder(CI);
     return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
@@ -1123,14 +1926,10 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
   return 0;
 }
 
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
-  if (TLI->has(F))
-    Optimizations[TLI->getName(F)] = Opt;
-}
-
 LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
-                                     const TargetLibraryInfo *TLI) {
-  Impl = new LibCallSimplifierImpl(TD, TLI, this);
+                                     const TargetLibraryInfo *TLI,
+                                     bool UnsafeFPShrink) {
+  Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
 }
 
 LibCallSimplifier::~LibCallSimplifier() {
@@ -1138,6 +1937,7 @@ LibCallSimplifier::~LibCallSimplifier() {
 }
 
 Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+  if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
   return Impl->optimizeCall(CI);
 }
 
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index b1cad06dffe9..560f58160753 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 char UnifyFunctionExitNodes::ID = 0;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a30b09321b5e..b5941bdf2411 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Metadata.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
 using namespace llvm;
 
 // Out of line method to get vtable etc for class.
@@ -63,14 +63,29 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
+      if (OP == 0) continue;
+      Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+      // Use identity map if Mapped_Op is null and we can ignore missing
+      // entries.
+      if (Mapped_OP == OP ||
+          (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+        continue;
 
       // Ok, at least one operand needs remapping.  
       SmallVector<Value*, 4> Elts;
       Elts.reserve(MD->getNumOperands());
       for (i = 0; i != e; ++i) {
         Value *Op = MD->getOperand(i);
-        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
+        if (Op == 0)
+          Elts.push_back(0);
+        else {
+          Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+          // Use identity map if Mapped_Op is null and we can ignore missing
+          // entries.
+          if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+            Mapped_Op = Op;
+          Elts.push_back(Mapped_Op);
+        }
       }
       MDNode *NewMD = MDNode::get(V->getContext(), Elts);
       Dummy->replaceAllUsesWith(NewMD);
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index f7be3e312407..17900dabbefe 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -16,39 +16,38 @@
 
 #define BBV_NAME "bb-vectorize"
 #define DEBUG_TYPE BBV_NAME
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/DataLayout.h"
-#include "llvm/TargetTransformInfo.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Vectorize.h"
 #include <algorithm>
-#include <map>
 using namespace llvm;
 
 static cl::opt<bool>
@@ -89,6 +88,10 @@ MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
   cl::desc("The maximum number of pairable instructions per group"));
 
 static cl::opt<unsigned>
+MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden,
+  cl::desc("The maximum number of candidate instruction pairs per group"));
+
+static cl::opt<unsigned>
 MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
   cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
                        " a full cycle check"));
@@ -199,9 +202,7 @@ namespace {
       DT = &P->getAnalysis<DominatorTree>();
       SE = &P->getAnalysis<ScalarEvolution>();
       TD = P->getAnalysisIfAvailable<DataLayout>();
-      TTI = IgnoreTargetInfo ? 0 :
-        P->getAnalysisIfAvailable<TargetTransformInfo>();
-      VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
+      TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>();
     }
 
     typedef std::pair<Value *, Value *> ValuePair;
@@ -209,18 +210,12 @@ namespace {
     typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
     typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
     typedef std::pair<VPPair, unsigned> VPPairWithType;
-    typedef std::pair<std::multimap<Value *, Value *>::iterator,
-              std::multimap<Value *, Value *>::iterator> VPIteratorPair;
-    typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
-              std::multimap<ValuePair, ValuePair>::iterator>
-                VPPIteratorPair;
 
     AliasAnalysis *AA;
     DominatorTree *DT;
     ScalarEvolution *SE;
     DataLayout *TD;
-    TargetTransformInfo *TTI;
-    const VectorTargetTransformInfo *VTTI;
+    const TargetTransformInfo *TTI;
 
     // FIXME: const correct?
 
@@ -228,7 +223,7 @@ namespace {
 
     bool getCandidatePairs(BasicBlock &BB,
                        BasicBlock::iterator &Start,
-                       std::multimap<Value *, Value *> &CandidatePairs,
+                       DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
                        DenseSet<ValuePair> &FixedOrderPairs,
                        DenseMap<ValuePair, int> &CandidatePairCostSavings,
                        std::vector<Value *> &PairableInsts, bool NonPow2Len);
@@ -242,33 +237,36 @@ namespace {
       PairConnectionSplat
     };
 
-    void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
-                       std::vector<Value *> &PairableInsts,
-                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                       DenseMap<VPPair, unsigned> &PairConnectionTypes);
+    void computeConnectedPairs(
+             DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             DenseSet<ValuePair> &CandidatePairsSet,
+             std::vector<Value *> &PairableInsts,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseMap<VPPair, unsigned> &PairConnectionTypes);
 
     void buildDepMap(BasicBlock &BB,
-                       std::multimap<Value *, Value *> &CandidatePairs,
-                       std::vector<Value *> &PairableInsts,
-                       DenseSet<ValuePair> &PairableInstUsers);
-
-    void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
-                        DenseMap<ValuePair, int> &CandidatePairCostSavings,
-                        std::vector<Value *> &PairableInsts,
-                        DenseSet<ValuePair> &FixedOrderPairs,
-                        DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                        std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                        std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
-                        DenseSet<ValuePair> &PairableInstUsers,
-                        DenseMap<Value *, Value *>& ChosenPairs);
+             DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             std::vector<Value *> &PairableInsts,
+             DenseSet<ValuePair> &PairableInstUsers);
+
+    void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             DenseSet<ValuePair> &CandidatePairsSet,
+             DenseMap<ValuePair, int> &CandidatePairCostSavings,
+             std::vector<Value *> &PairableInsts,
+             DenseSet<ValuePair> &FixedOrderPairs,
+             DenseMap<VPPair, unsigned> &PairConnectionTypes,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+             DenseSet<ValuePair> &PairableInstUsers,
+             DenseMap<Value *, Value *>& ChosenPairs);
 
     void fuseChosenPairs(BasicBlock &BB,
-                     std::vector<Value *> &PairableInsts,
-                     DenseMap<Value *, Value *>& ChosenPairs,
-                     DenseSet<ValuePair> &FixedOrderPairs,
-                     DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                     std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                     std::multimap<ValuePair, ValuePair> &ConnectedPairDeps);
+             std::vector<Value *> &PairableInsts,
+             DenseMap<Value *, Value *>& ChosenPairs,
+             DenseSet<ValuePair> &FixedOrderPairs,
+             DenseMap<VPPair, unsigned> &PairConnectionTypes,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
 
 
     bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
@@ -280,56 +278,63 @@ namespace {
     bool trackUsesOfI(DenseSet<Value *> &Users,
                       AliasSetTracker &WriteSet, Instruction *I,
                       Instruction *J, bool UpdateUsers = true,
-                      std::multimap<Value *, Value *> *LoadMoveSet = 0);
+                      DenseSet<ValuePair> *LoadMoveSetPairs = 0);
 
-    void computePairsConnectedTo(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                      ValuePair P);
+  void computePairsConnectedTo(
+             DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             DenseSet<ValuePair> &CandidatePairsSet,
+             std::vector<Value *> &PairableInsts,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseMap<VPPair, unsigned> &PairConnectionTypes,
+             ValuePair P);
 
     bool pairsConflict(ValuePair P, ValuePair Q,
-                 DenseSet<ValuePair> &PairableInstUsers,
-                 std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0);
+             DenseSet<ValuePair> &PairableInstUsers,
+             DenseMap<ValuePair, std::vector<ValuePair> >
+               *PairableInstUserMap = 0,
+             DenseSet<VPPair> *PairableInstUserPairSet = 0);
 
     bool pairWillFormCycle(ValuePair P,
-                       std::multimap<ValuePair, ValuePair> &PairableInstUsers,
-                       DenseSet<ValuePair> &CurrentPairs);
-
-    void pruneTreeFor(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
-                      DenseMap<Value *, Value *> &ChosenPairs,
-                      DenseMap<ValuePair, size_t> &Tree,
-                      DenseSet<ValuePair> &PrunedTree, ValuePair J,
-                      bool UseCycleCheck);
-
-    void buildInitialTreeFor(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      DenseMap<Value *, Value *> &ChosenPairs,
-                      DenseMap<ValuePair, size_t> &Tree, ValuePair J);
-
-    void findBestTreeFor(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
-                      std::vector<Value *> &PairableInsts,
-                      DenseSet<ValuePair> &FixedOrderPairs,
-                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
-                      DenseMap<Value *, Value *> &ChosenPairs,
-                      DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
-                      int &BestEffSize, VPIteratorPair ChoiceRange,
-                      bool UseCycleCheck);
+             DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
+             DenseSet<ValuePair> &CurrentPairs);
+
+    void pruneDAGFor(
+             DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             std::vector<Value *> &PairableInsts,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseSet<ValuePair> &PairableInstUsers,
+             DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+             DenseSet<VPPair> &PairableInstUserPairSet,
+             DenseMap<Value *, Value *> &ChosenPairs,
+             DenseMap<ValuePair, size_t> &DAG,
+             DenseSet<ValuePair> &PrunedDAG, ValuePair J,
+             bool UseCycleCheck);
+
+    void buildInitialDAGFor(
+             DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             DenseSet<ValuePair> &CandidatePairsSet,
+             std::vector<Value *> &PairableInsts,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseSet<ValuePair> &PairableInstUsers,
+             DenseMap<Value *, Value *> &ChosenPairs,
+             DenseMap<ValuePair, size_t> &DAG, ValuePair J);
+
+    void findBestDAGFor(
+             DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+             DenseSet<ValuePair> &CandidatePairsSet,
+             DenseMap<ValuePair, int> &CandidatePairCostSavings,
+             std::vector<Value *> &PairableInsts,
+             DenseSet<ValuePair> &FixedOrderPairs,
+             DenseMap<VPPair, unsigned> &PairConnectionTypes,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+             DenseSet<ValuePair> &PairableInstUsers,
+             DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+             DenseSet<VPPair> &PairableInstUserPairSet,
+             DenseMap<Value *, Value *> &ChosenPairs,
+             DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
+             int &BestEffSize, Value *II, std::vector<Value *>&JJ,
+             bool UseCycleCheck);
 
     Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
                      Instruction *J, unsigned o);
@@ -361,20 +366,22 @@ namespace {
 
     void collectPairLoadMoveSet(BasicBlock &BB,
                      DenseMap<Value *, Value *> &ChosenPairs,
-                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs,
                      Instruction *I);
 
     void collectLoadMoveSet(BasicBlock &BB,
                      std::vector<Value *> &PairableInsts,
                      DenseMap<Value *, Value *> &ChosenPairs,
-                     std::multimap<Value *, Value *> &LoadMoveSet);
+                     DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs);
 
     bool canMoveUsesOfIAfterJ(BasicBlock &BB,
-                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs,
                      Instruction *I, Instruction *J);
 
     void moveUsesOfIAfterJ(BasicBlock &BB,
-                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs,
                      Instruction *&InsertionPt,
                      Instruction *I, Instruction *J);
 
@@ -387,7 +394,7 @@ namespace {
         return false;
       }
 
-      DEBUG(if (VTTI) dbgs() << "BBV: using target information\n");
+      DEBUG(if (TTI) dbgs() << "BBV: using target information\n");
 
       bool changed = false;
       // Iterate a sufficient number of times to merge types of size 1 bit,
@@ -395,7 +402,7 @@ namespace {
       // target vector register.
       unsigned n = 1;
       for (unsigned v = 2;
-           (VTTI || v <= Config.VectorBits) &&
+           (TTI || v <= Config.VectorBits) &&
            (!Config.MaxIter || n <= Config.MaxIter);
            v *= 2, ++n) {
         DEBUG(dbgs() << "BBV: fusing loop #" << n <<
@@ -426,9 +433,7 @@ namespace {
       DT = &getAnalysis<DominatorTree>();
       SE = &getAnalysis<ScalarEvolution>();
       TD = getAnalysisIfAvailable<DataLayout>();
-      TTI = IgnoreTargetInfo ? 0 :
-        getAnalysisIfAvailable<TargetTransformInfo>();
-      VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0;
+      TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>();
 
       return vectorizeBB(BB);
     }
@@ -438,6 +443,7 @@ namespace {
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<DominatorTree>();
       AU.addRequired<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
@@ -467,18 +473,18 @@ namespace {
 
     static inline void getInstructionTypes(Instruction *I,
                                            Type *&T1, Type *&T2) {
-      if (isa<StoreInst>(I)) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
         // For stores, it is the value type, not the pointer type that matters
         // because the value is what will come from a vector register.
   
-        Value *IVal = cast<StoreInst>(I)->getValueOperand();
+        Value *IVal = SI->getValueOperand();
         T1 = IVal->getType();
       } else {
         T1 = I->getType();
       }
   
-      if (I->isCast())
-        T2 = cast<CastInst>(I)->getSrcTy();
+      if (CastInst *CI = dyn_cast<CastInst>(I))
+        T2 = CI->getSrcTy();
       else
         T2 = T1;
 
@@ -504,7 +510,7 @@ namespace {
       // InsertElement and ExtractElement have a depth factor of zero. This is
       // for two reasons: First, they cannot be usefully fused. Second, because
       // the pass generates a lot of these, they can confuse the simple metric
-      // used to compare the trees in the next iteration. Thus, giving them a
+      // used to compare the dags in the next iteration. Thus, giving them a
       // weight of zero allows the pass to essentially ignore them in
       // subsequent iterations when looking for vectorization opportunities
       // while still tracking dependency chains that flow through those
@@ -520,7 +526,7 @@ namespace {
       return 1;
     }
 
-    // Returns the cost of the provided instruction using VTTI.
+    // Returns the cost of the provided instruction using TTI.
     // This does not handle loads and stores.
     unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
       switch (Opcode) {
@@ -531,7 +537,7 @@ namespace {
         // generate vector GEPs.
         return 0;
       case Instruction::Br:
-        return VTTI->getCFInstrCost(Opcode);
+        return TTI->getCFInstrCost(Opcode);
       case Instruction::PHI:
         return 0;
       case Instruction::Add:
@@ -552,11 +558,11 @@ namespace {
       case Instruction::And:
       case Instruction::Or:
       case Instruction::Xor:
-        return VTTI->getArithmeticInstrCost(Opcode, T1);
+        return TTI->getArithmeticInstrCost(Opcode, T1);
       case Instruction::Select:
       case Instruction::ICmp:
       case Instruction::FCmp:
-        return VTTI->getCmpSelInstrCost(Opcode, T1, T2);
+        return TTI->getCmpSelInstrCost(Opcode, T1, T2);
       case Instruction::ZExt:
       case Instruction::SExt:
       case Instruction::FPToUI:
@@ -570,7 +576,7 @@ namespace {
       case Instruction::FPTrunc:
       case Instruction::BitCast:
       case Instruction::ShuffleVector:
-        return VTTI->getCastInstrCost(Opcode, T1, T2);
+        return TTI->getCastInstrCost(Opcode, T1, T2);
       }
 
       return 1;
@@ -642,7 +648,7 @@ namespace {
       Function *F = I->getCalledFunction();
       if (!F) return false;
 
-      unsigned IID = F->getIntrinsicID();
+      Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
       if (!IID) return false;
 
       switch(IID) {
@@ -660,23 +666,11 @@ namespace {
       case Intrinsic::pow:
         return Config.VectorizeMath;
       case Intrinsic::fma:
+      case Intrinsic::fmuladd:
         return Config.VectorizeFMA;
       }
     }
 
-    // Returns true if J is the second element in some pair referenced by
-    // some multimap pair iterator pair.
-    template <typename V>
-    bool isSecondInIteratorPair(V J, std::pair<
-           typename std::multimap<V, V>::iterator,
-           typename std::multimap<V, V>::iterator> PairRange) {
-      for (typename std::multimap<V, V>::iterator K = PairRange.first;
-           K != PairRange.second; ++K)
-        if (K->second == J) return true;
-
-      return false;
-    }
-
     bool isPureIEChain(InsertElementInst *IE) {
       InsertElementInst *IENext = IE;
       do {
@@ -701,11 +695,12 @@ namespace {
     DenseMap<Value *, Value *> AllChosenPairs;
     DenseSet<ValuePair> AllFixedOrderPairs;
     DenseMap<VPPair, unsigned> AllPairConnectionTypes;
-    std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps;
+    DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs,
+                                                 AllConnectedPairDeps;
 
     do {
       std::vector<Value *> PairableInsts;
-      std::multimap<Value *, Value *> CandidatePairs;
+      DenseMap<Value *, std::vector<Value *> > CandidatePairs;
       DenseSet<ValuePair> FixedOrderPairs;
       DenseMap<ValuePair, int> CandidatePairCostSavings;
       ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
@@ -714,6 +709,14 @@ namespace {
                                          PairableInsts, NonPow2Len);
       if (PairableInsts.empty()) continue;
 
+      // Build the candidate pair set for faster lookups.
+      DenseSet<ValuePair> CandidatePairsSet;
+      for (DenseMap<Value *, std::vector<Value *> >::iterator I =
+           CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I)
+        for (std::vector<Value *>::iterator J = I->second.begin(),
+             JE = I->second.end(); J != JE; ++J)
+          CandidatePairsSet.insert(ValuePair(I->first, *J));
+
       // Now we have a map of all of the pairable instructions and we need to
       // select the best possible pairing. A good pairing is one such that the
       // users of the pair are also paired. This defines a (directed) forest
@@ -723,30 +726,33 @@ namespace {
       // Note that it only matters that both members of the second pair use some
       // element of the first pair (to allow for splatting).
 
-      std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps;
+      DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs,
+                                                   ConnectedPairDeps;
       DenseMap<VPPair, unsigned> PairConnectionTypes;
-      computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs,
-                            PairConnectionTypes);
+      computeConnectedPairs(CandidatePairs, CandidatePairsSet,
+                            PairableInsts, ConnectedPairs, PairConnectionTypes);
       if (ConnectedPairs.empty()) continue;
 
-      for (std::multimap<ValuePair, ValuePair>::iterator
+      for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
            I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
-           I != IE; ++I) {
-        ConnectedPairDeps.insert(VPPair(I->second, I->first));
-      }
+           I != IE; ++I)
+        for (std::vector<ValuePair>::iterator J = I->second.begin(),
+             JE = I->second.end(); J != JE; ++J)
+          ConnectedPairDeps[*J].push_back(I->first);
 
       // Build the pairable-instruction dependency map
       DenseSet<ValuePair> PairableInstUsers;
       buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
 
       // There is now a graph of the connected pairs. For each variable, pick
-      // the pairing with the largest tree meeting the depth requirement on at
-      // least one branch. Then select all pairings that are part of that tree
+      // the pairing with the largest dag meeting the depth requirement on at
+      // least one branch. Then select all pairings that are part of that dag
       // and remove them from the list of available pairings and pairable
       // variables.
 
       DenseMap<Value *, Value *> ChosenPairs;
-      choosePairs(CandidatePairs, CandidatePairCostSavings,
+      choosePairs(CandidatePairs, CandidatePairsSet,
+        CandidatePairCostSavings,
         PairableInsts, FixedOrderPairs, PairConnectionTypes,
         ConnectedPairs, ConnectedPairDeps,
         PairableInstUsers, ChosenPairs);
@@ -780,14 +786,15 @@ namespace {
         }
       }
 
-      for (std::multimap<ValuePair, ValuePair>::iterator
+      for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
            I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
-           I != IE; ++I) {
-        if (AllPairConnectionTypes.count(*I)) {
-          AllConnectedPairs.insert(*I);
-          AllConnectedPairDeps.insert(VPPair(I->second, I->first));
-        }
-      }
+           I != IE; ++I)
+        for (std::vector<ValuePair>::iterator J = I->second.begin(),
+          JE = I->second.end(); J != JE; ++J)
+          if (AllPairConnectionTypes.count(VPPair(I->first, *J))) {
+            AllConnectedPairs[I->first].push_back(*J);
+            AllConnectedPairDeps[*J].push_back(I->first);
+          }
     } while (ShouldContinue);
 
     if (AllChosenPairs.empty()) return false;
@@ -903,8 +910,8 @@ namespace {
          T2->getScalarType()->isPointerTy()))
       return false;
 
-    if (!VTTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
-                  T2->getPrimitiveSizeInBits() >= Config.VectorBits))
+    if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
+                 T2->getPrimitiveSizeInBits() >= Config.VectorBits))
       return false;
 
     return true;
@@ -913,7 +920,7 @@ namespace {
   // This function returns true if the two provided instructions are compatible
   // (meaning that they can be fused into a vector instruction). This assumes
   // that I has already been determined to be vectorizable and that J is not
-  // in the use tree of I.
+  // in the use dag of I.
   bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
                        bool IsSimpleLoadStore, bool NonPow2Len,
                        int &CostSavings, int &FixedOrder) {
@@ -935,7 +942,7 @@ namespace {
     unsigned MaxTypeBits = std::max(
       IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(),
       IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits());
-    if (!VTTI && MaxTypeBits > Config.VectorBits)
+    if (!TTI && MaxTypeBits > Config.VectorBits)
       return false;
 
     // FIXME: handle addsub-type operations!
@@ -967,21 +974,26 @@ namespace {
             return false;
         }
 
-        if (VTTI) {
-          unsigned ICost = VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
-                                                 IAlignment, IAddressSpace);
-          unsigned JCost = VTTI->getMemoryOpCost(J->getOpcode(), J->getType(),
-                                                 JAlignment, JAddressSpace);
-          unsigned VCost = VTTI->getMemoryOpCost(I->getOpcode(), VType,
-                                                 BottomAlignment,
-                                                 IAddressSpace);
+        if (TTI) {
+          unsigned ICost = TTI->getMemoryOpCost(I->getOpcode(), aTypeI,
+                                                IAlignment, IAddressSpace);
+          unsigned JCost = TTI->getMemoryOpCost(J->getOpcode(), aTypeJ,
+                                                JAlignment, JAddressSpace);
+          unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType,
+                                                BottomAlignment,
+                                                IAddressSpace);
+
+          ICost += TTI->getAddressComputationCost(aTypeI);
+          JCost += TTI->getAddressComputationCost(aTypeJ);
+          VCost += TTI->getAddressComputationCost(VType);
+
           if (VCost > ICost + JCost)
             return false;
 
           // We don't want to fuse to a type that will be split, even
           // if the two input types will also be split and there is no other
           // associated cost.
-          unsigned VParts = VTTI->getNumberOfParts(VType);
+          unsigned VParts = TTI->getNumberOfParts(VType);
           if (VParts > 1)
             return false;
           else if (!VParts && VCost == ICost + JCost)
@@ -992,11 +1004,17 @@ namespace {
       } else {
         return false;
       }
-    } else if (VTTI) {
+    } else if (TTI) {
       unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
       unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
       Type *VT1 = getVecTypeForPair(IT1, JT1),
            *VT2 = getVecTypeForPair(IT2, JT2);
+
+      // Note that this procedure is incorrect for insert and extract element
+      // instructions (because combining these often results in a shuffle),
+      // but this cost is ignored (because insert and extract element
+      // instructions are assigned a zero depth factor and are not really
+      // fused in general).
       unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
 
       if (VCost > ICost + JCost)
@@ -1005,8 +1023,8 @@ namespace {
       // We don't want to fuse to a type that will be split, even
       // if the two input types will also be split and there is no other
       // associated cost.
-      unsigned VParts1 = VTTI->getNumberOfParts(VT1),
-               VParts2 = VTTI->getNumberOfParts(VT2);
+      unsigned VParts1 = TTI->getNumberOfParts(VT1),
+               VParts2 = TTI->getNumberOfParts(VT2);
       if (VParts1 > 1 || VParts2 > 1)
         return false;
       else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
@@ -1019,14 +1037,67 @@ namespace {
     // vectorized, the second arguments must be equal.
     CallInst *CI = dyn_cast<CallInst>(I);
     Function *FI;
-    if (CI && (FI = CI->getCalledFunction()) &&
-        FI->getIntrinsicID() == Intrinsic::powi) {
-
-      Value *A1I = CI->getArgOperand(1),
-            *A1J = cast<CallInst>(J)->getArgOperand(1);
-      const SCEV *A1ISCEV = SE->getSCEV(A1I),
-                 *A1JSCEV = SE->getSCEV(A1J);
-      return (A1ISCEV == A1JSCEV);
+    if (CI && (FI = CI->getCalledFunction())) {
+      Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+      if (IID == Intrinsic::powi) {
+        Value *A1I = CI->getArgOperand(1),
+              *A1J = cast<CallInst>(J)->getArgOperand(1);
+        const SCEV *A1ISCEV = SE->getSCEV(A1I),
+                   *A1JSCEV = SE->getSCEV(A1J);
+        return (A1ISCEV == A1JSCEV);
+      }
+
+      if (IID && TTI) {
+        SmallVector<Type*, 4> Tys;
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+          Tys.push_back(CI->getArgOperand(i)->getType());
+        unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys);
+
+        Tys.clear();
+        CallInst *CJ = cast<CallInst>(J);
+        for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
+          Tys.push_back(CJ->getArgOperand(i)->getType());
+        unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys);
+
+        Tys.clear();
+        assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
+               "Intrinsic argument counts differ");
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+          if (IID == Intrinsic::powi && i == 1)
+            Tys.push_back(CI->getArgOperand(i)->getType());
+          else
+            Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
+                                            CJ->getArgOperand(i)->getType()));
+        }
+
+        Type *RetTy = getVecTypeForPair(IT1, JT1);
+        unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+
+        if (VCost > ICost + JCost)
+          return false;
+
+        // We don't want to fuse to a type that will be split, even
+        // if the two input types will also be split and there is no other
+        // associated cost.
+        unsigned RetParts = TTI->getNumberOfParts(RetTy);
+        if (RetParts > 1)
+          return false;
+        else if (!RetParts && VCost == ICost + JCost)
+          return false;
+
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+          if (!Tys[i]->isVectorTy())
+            continue;
+
+          unsigned NumParts = TTI->getNumberOfParts(Tys[i]);
+          if (NumParts > 1)
+            return false;
+          else if (!NumParts && VCost == ICost + JCost)
+            return false;
+        }
+
+        CostSavings = ICost + JCost - VCost;
+      }
     }
 
     return true;
@@ -1040,7 +1111,7 @@ namespace {
   // to contain any memory locations to which J writes. The function returns
   // true if J uses I. By default, alias analysis is used to determine
   // whether J reads from memory that overlaps with a location in WriteSet.
-  // If LoadMoveSet is not null, then it is a previously-computed multimap
+  // If LoadMoveSet is not null, then it is a previously-computed map
   // where the key is the memory-based user instruction and the value is
   // the instruction to be compared with I. So, if LoadMoveSet is provided,
   // then the alias analysis is not used. This is necessary because this
@@ -1050,7 +1121,7 @@ namespace {
   bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
                        AliasSetTracker &WriteSet, Instruction *I,
                        Instruction *J, bool UpdateUsers,
-                       std::multimap<Value *, Value *> *LoadMoveSet) {
+                       DenseSet<ValuePair> *LoadMoveSetPairs) {
     bool UsesI = false;
 
     // This instruction may already be marked as a user due, for example, to
@@ -1068,9 +1139,8 @@ namespace {
         }
       }
     if (!UsesI && J->mayReadFromMemory()) {
-      if (LoadMoveSet) {
-        VPIteratorPair JPairRange = LoadMoveSet->equal_range(J);
-        UsesI = isSecondInIteratorPair<Value*>(I, JPairRange);
+      if (LoadMoveSetPairs) {
+        UsesI = LoadMoveSetPairs->count(ValuePair(J, I));
       } else {
         for (AliasSetTracker::iterator W = WriteSet.begin(),
              WE = WriteSet.end(); W != WE; ++W) {
@@ -1094,10 +1164,11 @@ namespace {
   // basic block and collects all candidate pairs for vectorization.
   bool BBVectorize::getCandidatePairs(BasicBlock &BB,
                        BasicBlock::iterator &Start,
-                       std::multimap<Value *, Value *> &CandidatePairs,
+                       DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
                        DenseSet<ValuePair> &FixedOrderPairs,
                        DenseMap<ValuePair, int> &CandidatePairCostSavings,
                        std::vector<Value *> &PairableInsts, bool NonPow2Len) {
+    size_t TotalPairs = 0;
     BasicBlock::iterator E = BB.end();
     if (Start == E) return false;
 
@@ -1143,8 +1214,9 @@ namespace {
           PairableInsts.push_back(I);
         }
 
-        CandidatePairs.insert(ValuePair(I, J));
-        if (VTTI)
+        CandidatePairs[I].push_back(J);
+        ++TotalPairs;
+        if (TTI)
           CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
                                                             CostSavings));
 
@@ -1167,7 +1239,8 @@ namespace {
         // If we have already found too many pairs, break here and this function
         // will be called again starting after the last instruction selected
         // during this invocation.
-        if (PairableInsts.size() >= Config.MaxInsts) {
+        if (PairableInsts.size() >= Config.MaxInsts ||
+            TotalPairs >= Config.MaxPairs) {
           ShouldContinue = true;
           break;
         }
@@ -1187,11 +1260,12 @@ namespace {
   // it looks for pairs such that both members have an input which is an
   // output of PI or PJ.
   void BBVectorize::computePairsConnectedTo(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                      ValuePair P) {
+                  DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+                  DenseSet<ValuePair> &CandidatePairsSet,
+                  std::vector<Value *> &PairableInsts,
+                  DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+                  DenseMap<VPPair, unsigned> &PairConnectionTypes,
+                  ValuePair P) {
     StoreInst *SI, *SJ;
 
     // For each possible pairing for this variable, look at the uses of
@@ -1209,8 +1283,6 @@ namespace {
         continue;
       }
 
-      VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
-
       // For each use of the first variable, look for uses of the second
       // variable...
       for (Value::use_iterator J = P.second->use_begin(),
@@ -1219,19 +1291,17 @@ namespace {
             P.second == SJ->getPointerOperand())
           continue;
 
-        VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
-
         // Look for <I, J>:
-        if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+        if (CandidatePairsSet.count(ValuePair(*I, *J))) {
           VPPair VP(P, ValuePair(*I, *J));
-          ConnectedPairs.insert(VP);
+          ConnectedPairs[VP.first].push_back(VP.second);
           PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
         }
 
         // Look for <J, I>:
-        if (isSecondInIteratorPair<Value*>(*I, JPairRange)) {
+        if (CandidatePairsSet.count(ValuePair(*J, *I))) {
           VPPair VP(P, ValuePair(*J, *I));
-          ConnectedPairs.insert(VP);
+          ConnectedPairs[VP.first].push_back(VP.second);
           PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
         }
       }
@@ -1244,9 +1314,9 @@ namespace {
             P.first == SJ->getPointerOperand())
           continue;
 
-        if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+        if (CandidatePairsSet.count(ValuePair(*I, *J))) {
           VPPair VP(P, ValuePair(*I, *J));
-          ConnectedPairs.insert(VP);
+          ConnectedPairs[VP.first].push_back(VP.second);
           PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
         }
       }
@@ -1263,16 +1333,14 @@ namespace {
                P.second == SI->getPointerOperand())
         continue;
 
-      VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
-
       for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
         if ((SJ = dyn_cast<StoreInst>(*J)) &&
             P.second == SJ->getPointerOperand())
           continue;
 
-        if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+        if (CandidatePairsSet.count(ValuePair(*I, *J))) {
           VPPair VP(P, ValuePair(*I, *J));
-          ConnectedPairs.insert(VP);
+          ConnectedPairs[VP.first].push_back(VP.second);
           PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
         }
       }
@@ -1283,55 +1351,73 @@ namespace {
   // connected if some output of the first pair forms an input to both members
   // of the second pair.
   void BBVectorize::computeConnectedPairs(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseMap<VPPair, unsigned> &PairConnectionTypes) {
-
+                  DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+                  DenseSet<ValuePair> &CandidatePairsSet,
+                  std::vector<Value *> &PairableInsts,
+                  DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+                  DenseMap<VPPair, unsigned> &PairConnectionTypes) {
     for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
          PE = PairableInsts.end(); PI != PE; ++PI) {
-      VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI);
+      DenseMap<Value *, std::vector<Value *> >::iterator PP =
+        CandidatePairs.find(*PI);
+      if (PP == CandidatePairs.end())
+        continue;
 
-      for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
-           P != choiceRange.second; ++P)
-        computePairsConnectedTo(CandidatePairs, PairableInsts,
-                                ConnectedPairs, PairConnectionTypes, *P);
+      for (std::vector<Value *>::iterator P = PP->second.begin(),
+           E = PP->second.end(); P != E; ++P)
+        computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
+                                PairableInsts, ConnectedPairs,
+                                PairConnectionTypes, ValuePair(*PI, *P));
     }
 
-    DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
+    DEBUG(size_t TotalPairs = 0;
+          for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
+               ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I)
+            TotalPairs += I->second.size();
+          dbgs() << "BBV: found " << TotalPairs
                  << " pair connections.\n");
   }
 
   // This function builds a set of use tuples such that <A, B> is in the set
-  // if B is in the use tree of A. If B is in the use tree of A, then B
+  // if B is in the use dag of A. If B is in the use dag of A, then B
   // depends on the output of A.
   void BBVectorize::buildDepMap(
                       BasicBlock &BB,
-                      std::multimap<Value *, Value *> &CandidatePairs,
+                      DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
                       std::vector<Value *> &PairableInsts,
                       DenseSet<ValuePair> &PairableInstUsers) {
     DenseSet<Value *> IsInPair;
-    for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(),
-         E = CandidatePairs.end(); C != E; ++C) {
+    for (DenseMap<Value *, std::vector<Value *> >::iterator C =
+         CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) {
       IsInPair.insert(C->first);
-      IsInPair.insert(C->second);
+      IsInPair.insert(C->second.begin(), C->second.end());
     }
 
-    // Iterate through the basic block, recording all Users of each
+    // Iterate through the basic block, recording all users of each
     // pairable instruction.
 
-    BasicBlock::iterator E = BB.end();
+    BasicBlock::iterator E = BB.end(), EL =
+      BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
     for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
       if (IsInPair.find(I) == IsInPair.end()) continue;
 
       DenseSet<Value *> Users;
       AliasSetTracker WriteSet(*AA);
-      for (BasicBlock::iterator J = llvm::next(I); J != E; ++J)
+      for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
         (void) trackUsesOfI(Users, WriteSet, I, J);
 
+        if (J == EL)
+          break;
+      }
+
       for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
-           U != E; ++U)
+           U != E; ++U) {
+        if (IsInPair.find(*U) == IsInPair.end()) continue;
         PairableInstUsers.insert(ValuePair(I, *U));
+      }
+
+      if (I == EL)
+        break;
     }
   }
 
@@ -1339,8 +1425,9 @@ namespace {
   // input of pair Q is an output of pair P. If this is the case, then these
   // two pairs cannot be simultaneously fused.
   bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
-                     DenseSet<ValuePair> &PairableInstUsers,
-                     std::multimap<ValuePair, ValuePair> *PairableInstUserMap) {
+             DenseSet<ValuePair> &PairableInstUsers,
+             DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
+             DenseSet<VPPair> *PairableInstUserPairSet) {
     // Two pairs are in conflict if they are mutual Users of eachother.
     bool QUsesP = PairableInstUsers.count(ValuePair(P.first,  Q.first))  ||
                   PairableInstUsers.count(ValuePair(P.first,  Q.second)) ||
@@ -1353,17 +1440,14 @@ namespace {
     if (PairableInstUserMap) {
       // FIXME: The expensive part of the cycle check is not so much the cycle
       // check itself but this edge insertion procedure. This needs some
-      // profiling and probably a different data structure (same is true of
-      // most uses of std::multimap).
+      // profiling and probably a different data structure.
       if (PUsesQ) {
-        VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q);
-        if (!isSecondInIteratorPair(P, QPairRange))
-          PairableInstUserMap->insert(VPPair(Q, P));
+        if (PairableInstUserPairSet->insert(VPPair(Q, P)).second)
+          (*PairableInstUserMap)[Q].push_back(P);
       }
       if (QUsesP) {
-        VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P);
-        if (!isSecondInIteratorPair(Q, PPairRange))
-          PairableInstUserMap->insert(VPPair(P, Q));
+        if (PairableInstUserPairSet->insert(VPPair(P, Q)).second)
+          (*PairableInstUserMap)[P].push_back(Q);
       }
     }
 
@@ -1373,8 +1457,8 @@ namespace {
   // This function walks the use graph of current pairs to see if, starting
   // from P, the walk returns to P.
   bool BBVectorize::pairWillFormCycle(ValuePair P,
-                       std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
-                       DenseSet<ValuePair> &CurrentPairs) {
+             DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+             DenseSet<ValuePair> &CurrentPairs) {
     DEBUG(if (DebugCycleCheck)
             dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
                    << *P.second << "\n");
@@ -1391,36 +1475,41 @@ namespace {
       DEBUG(if (DebugCycleCheck)
               dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
                      << *QTop.second << "\n");
-      VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop);
-      for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first;
-           C != QPairRange.second; ++C) {
-        if (C->second == P) {
+      DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+        PairableInstUserMap.find(QTop);
+      if (QQ == PairableInstUserMap.end())
+        continue;
+
+      for (std::vector<ValuePair>::iterator C = QQ->second.begin(),
+           CE = QQ->second.end(); C != CE; ++C) {
+        if (*C == P) {
           DEBUG(dbgs()
                  << "BBV: rejected to prevent non-trivial cycle formation: "
-                 << *C->first.first << " <-> " << *C->first.second << "\n");
+                 << QTop.first << " <-> " << C->second << "\n");
           return true;
         }
 
-        if (CurrentPairs.count(C->second) && !Visited.count(C->second))
-          Q.push_back(C->second);
+        if (CurrentPairs.count(*C) && !Visited.count(*C))
+          Q.push_back(*C);
       }
     } while (!Q.empty());
 
     return false;
   }
 
-  // This function builds the initial tree of connected pairs with the
+  // This function builds the initial dag of connected pairs with the
   // pair J at the root.
-  void BBVectorize::buildInitialTreeFor(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      DenseMap<Value *, Value *> &ChosenPairs,
-                      DenseMap<ValuePair, size_t> &Tree, ValuePair J) {
-    // Each of these pairs is viewed as the root node of a Tree. The Tree
+  void BBVectorize::buildInitialDAGFor(
+                  DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+                  DenseSet<ValuePair> &CandidatePairsSet,
+                  std::vector<Value *> &PairableInsts,
+                  DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+                  DenseSet<ValuePair> &PairableInstUsers,
+                  DenseMap<Value *, Value *> &ChosenPairs,
+                  DenseMap<ValuePair, size_t> &DAG, ValuePair J) {
+    // Each of these pairs is viewed as the root node of a DAG. The DAG
     // is then walked (depth-first). As this happens, we keep track of
-    // the pairs that compose the Tree and the maximum depth of the Tree.
+    // the pairs that compose the DAG and the maximum depth of the DAG.
     SmallVector<ValuePairWithDepth, 32> Q;
     // General depth-first post-order traversal:
     Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
@@ -1430,69 +1519,65 @@ namespace {
       // Push each child onto the queue:
       bool MoreChildren = false;
       size_t MaxChildDepth = QTop.second;
-      VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first);
-      for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first;
-           k != qtRange.second; ++k) {
-        // Make sure that this child pair is still a candidate:
-        bool IsStillCand = false;
-        VPIteratorPair checkRange =
-          CandidatePairs.equal_range(k->second.first);
-        for (std::multimap<Value *, Value *>::iterator m = checkRange.first;
-             m != checkRange.second; ++m) {
-          if (m->second == k->second.second) {
-            IsStillCand = true;
-            break;
-          }
-        }
-
-        if (IsStillCand) {
-          DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second);
-          if (C == Tree.end()) {
-            size_t d = getDepthFactor(k->second.first);
-            Q.push_back(ValuePairWithDepth(k->second, QTop.second+d));
-            MoreChildren = true;
-          } else {
-            MaxChildDepth = std::max(MaxChildDepth, C->second);
+      DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+        ConnectedPairs.find(QTop.first);
+      if (QQ != ConnectedPairs.end())
+        for (std::vector<ValuePair>::iterator k = QQ->second.begin(),
+             ke = QQ->second.end(); k != ke; ++k) {
+          // Make sure that this child pair is still a candidate:
+          if (CandidatePairsSet.count(*k)) {
+            DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k);
+            if (C == DAG.end()) {
+              size_t d = getDepthFactor(k->first);
+              Q.push_back(ValuePairWithDepth(*k, QTop.second+d));
+              MoreChildren = true;
+            } else {
+              MaxChildDepth = std::max(MaxChildDepth, C->second);
+            }
           }
         }
-      }
 
       if (!MoreChildren) {
-        // Record the current pair as part of the Tree:
-        Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
+        // Record the current pair as part of the DAG:
+        DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
         Q.pop_back();
       }
     } while (!Q.empty());
   }
 
-  // Given some initial tree, prune it by removing conflicting pairs (pairs
+  // Given some initial dag, prune it by removing conflicting pairs (pairs
   // that cannot be simultaneously chosen for vectorization).
-  void BBVectorize::pruneTreeFor(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      std::vector<Value *> &PairableInsts,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
-                      DenseMap<Value *, Value *> &ChosenPairs,
-                      DenseMap<ValuePair, size_t> &Tree,
-                      DenseSet<ValuePair> &PrunedTree, ValuePair J,
-                      bool UseCycleCheck) {
+  void BBVectorize::pruneDAGFor(
+              DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+              std::vector<Value *> &PairableInsts,
+              DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+              DenseSet<ValuePair> &PairableInstUsers,
+              DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+              DenseSet<VPPair> &PairableInstUserPairSet,
+              DenseMap<Value *, Value *> &ChosenPairs,
+              DenseMap<ValuePair, size_t> &DAG,
+              DenseSet<ValuePair> &PrunedDAG, ValuePair J,
+              bool UseCycleCheck) {
     SmallVector<ValuePairWithDepth, 32> Q;
     // General depth-first post-order traversal:
     Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
     do {
       ValuePairWithDepth QTop = Q.pop_back_val();
-      PrunedTree.insert(QTop.first);
+      PrunedDAG.insert(QTop.first);
 
       // Visit each child, pruning as necessary...
       SmallVector<ValuePairWithDepth, 8> BestChildren;
-      VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
-      for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
-           K != QTopRange.second; ++K) {
-        DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second);
-        if (C == Tree.end()) continue;
+      DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+        ConnectedPairs.find(QTop.first);
+      if (QQ == ConnectedPairs.end())
+        continue;
+
+      for (std::vector<ValuePair>::iterator K = QQ->second.begin(),
+           KE = QQ->second.end(); K != KE; ++K) {
+        DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K);
+        if (C == DAG.end()) continue;
 
-        // This child is in the Tree, now we need to make sure it is the
+        // This child is in the DAG, now we need to make sure it is the
         // best of any conflicting children. There could be multiple
         // conflicting children, so first, determine if we're keeping
         // this child, then delete conflicting children as necessary.
@@ -1506,7 +1591,7 @@ namespace {
         // fusing (a,b) we have y .. a/b .. x where y is an input
         // to a/b and x is an output to a/b: x and y can no longer
         // be legally fused. To prevent this condition, we must
-        // make sure that a child pair added to the Tree is not
+        // make sure that a child pair added to the DAG is not
         // both an input and output of an already-selected pair.
 
         // Pairing-induced dependencies can also form from more complicated
@@ -1525,7 +1610,8 @@ namespace {
               C2->first.second == C->first.first ||
               C2->first.second == C->first.second ||
               pairsConflict(C2->first, C->first, PairableInstUsers,
-                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+                            UseCycleCheck ? &PairableInstUserMap : 0,
+                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
             if (C2->second >= C->second) {
               CanAdd = false;
               break;
@@ -1537,15 +1623,16 @@ namespace {
         if (!CanAdd) continue;
 
         // Even worse, this child could conflict with another node already
-        // selected for the Tree. If that is the case, ignore this child.
-        for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(),
-             E2 = PrunedTree.end(); T != E2; ++T) {
+        // selected for the DAG. If that is the case, ignore this child.
+        for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(),
+             E2 = PrunedDAG.end(); T != E2; ++T) {
           if (T->first == C->first.first ||
               T->first == C->first.second ||
               T->second == C->first.first ||
               T->second == C->first.second ||
               pairsConflict(*T, C->first, PairableInstUsers,
-                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+                            UseCycleCheck ? &PairableInstUserMap : 0,
+                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
             CanAdd = false;
             break;
           }
@@ -1562,7 +1649,8 @@ namespace {
               C2->first.second == C->first.first ||
               C2->first.second == C->first.second ||
               pairsConflict(C2->first, C->first, PairableInstUsers,
-                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+                            UseCycleCheck ? &PairableInstUserMap : 0,
+                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
             CanAdd = false;
             break;
           }
@@ -1577,7 +1665,8 @@ namespace {
               ChosenPairs.begin(), E2 = ChosenPairs.end();
              C2 != E2; ++C2) {
           if (pairsConflict(*C2, C->first, PairableInstUsers,
-                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+                            UseCycleCheck ? &PairableInstUserMap : 0,
+                            UseCycleCheck ? &PairableInstUserPairSet : 0)) {
             CanAdd = false;
             break;
           }
@@ -1589,7 +1678,7 @@ namespace {
         // To check for non-trivial cycles formed by the addition of the
         // current pair we've formed a list of all relevant pairs, now use a
         // graph walk to check for a cycle. We start from the current pair and
-        // walk the use tree to see if we again reach the current pair. If we
+        // walk the use dag to see if we again reach the current pair. If we
         // do, then the current pair is rejected.
 
         // FIXME: It may be more efficient to use a topological-ordering
@@ -1626,34 +1715,40 @@ namespace {
     } while (!Q.empty());
   }
 
-  // This function finds the best tree of mututally-compatible connected
+  // This function finds the best dag of mututally-compatible connected
   // pairs, given the choice of root pairs as an iterator range.
-  void BBVectorize::findBestTreeFor(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
-                      std::vector<Value *> &PairableInsts,
-                      DenseSet<ValuePair> &FixedOrderPairs,
-                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
-                      DenseMap<Value *, Value *> &ChosenPairs,
-                      DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
-                      int &BestEffSize, VPIteratorPair ChoiceRange,
-                      bool UseCycleCheck) {
-    for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
-         J != ChoiceRange.second; ++J) {
+  void BBVectorize::findBestDAGFor(
+              DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+              DenseSet<ValuePair> &CandidatePairsSet,
+              DenseMap<ValuePair, int> &CandidatePairCostSavings,
+              std::vector<Value *> &PairableInsts,
+              DenseSet<ValuePair> &FixedOrderPairs,
+              DenseMap<VPPair, unsigned> &PairConnectionTypes,
+              DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+              DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+              DenseSet<ValuePair> &PairableInstUsers,
+              DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+              DenseSet<VPPair> &PairableInstUserPairSet,
+              DenseMap<Value *, Value *> &ChosenPairs,
+              DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
+              int &BestEffSize, Value *II, std::vector<Value *>&JJ,
+              bool UseCycleCheck) {
+    for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
+         J != JE; ++J) {
+      ValuePair IJ(II, *J);
+      if (!CandidatePairsSet.count(IJ))
+        continue;
 
       // Before going any further, make sure that this pair does not
       // conflict with any already-selected pairs (see comment below
-      // near the Tree pruning for more details).
+      // near the DAG pruning for more details).
       DenseSet<ValuePair> ChosenPairSet;
       bool DoesConflict = false;
       for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
            E = ChosenPairs.end(); C != E; ++C) {
-        if (pairsConflict(*C, *J, PairableInstUsers,
-                          UseCycleCheck ? &PairableInstUserMap : 0)) {
+        if (pairsConflict(*C, IJ, PairableInstUsers,
+                          UseCycleCheck ? &PairableInstUserMap : 0,
+                          UseCycleCheck ? &PairableInstUserPairSet : 0)) {
           DoesConflict = true;
           break;
         }
@@ -1663,40 +1758,42 @@ namespace {
       if (DoesConflict) continue;
 
       if (UseCycleCheck &&
-          pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet))
+          pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
         continue;
 
-      DenseMap<ValuePair, size_t> Tree;
-      buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
-                          PairableInstUsers, ChosenPairs, Tree, *J);
+      DenseMap<ValuePair, size_t> DAG;
+      buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
+                          PairableInsts, ConnectedPairs,
+                          PairableInstUsers, ChosenPairs, DAG, IJ);
 
       // Because we'll keep the child with the largest depth, the largest
-      // depth is still the same in the unpruned Tree.
-      size_t MaxDepth = Tree.lookup(*J);
+      // depth is still the same in the unpruned DAG.
+      size_t MaxDepth = DAG.lookup(IJ);
 
-      DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {"
-                   << *J->first << " <-> " << *J->second << "} of depth " <<
-                   MaxDepth << " and size " << Tree.size() << "\n");
+      DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
+                   << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
+                   MaxDepth << " and size " << DAG.size() << "\n");
 
-      // At this point the Tree has been constructed, but, may contain
+      // At this point the DAG has been constructed, but, may contain
       // contradictory children (meaning that different children of
-      // some tree node may be attempting to fuse the same instruction).
-      // So now we walk the tree again, in the case of a conflict,
+      // some dag node may be attempting to fuse the same instruction).
+      // So now we walk the dag again, in the case of a conflict,
       // keep only the child with the largest depth. To break a tie,
       // favor the first child.
 
-      DenseSet<ValuePair> PrunedTree;
-      pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
-                   PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
-                   PrunedTree, *J, UseCycleCheck);
+      DenseSet<ValuePair> PrunedDAG;
+      pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
+                   PairableInstUsers, PairableInstUserMap,
+                   PairableInstUserPairSet,
+                   ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
 
       int EffSize = 0;
-      if (VTTI) {
-        DenseSet<Value *> PrunedTreeInstrs;
-        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
-             E = PrunedTree.end(); S != E; ++S) {
-          PrunedTreeInstrs.insert(S->first);
-          PrunedTreeInstrs.insert(S->second);
+      if (TTI) {
+        DenseSet<Value *> PrunedDAGInstrs;
+        for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+             E = PrunedDAG.end(); S != E; ++S) {
+          PrunedDAGInstrs.insert(S->first);
+          PrunedDAGInstrs.insert(S->second);
         }
 
         // The set of pairs that have already contributed to the total cost.
@@ -1709,8 +1806,8 @@ namespace {
 
         // The node weights represent the cost savings associated with
         // fusing the pair of instructions.
-        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
-             E = PrunedTree.end(); S != E; ++S) {
+        for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+             E = PrunedDAG.end(); S != E; ++S) {
           if (!isa<ShuffleVectorInst>(S->first) &&
               !isa<InsertElementInst>(S->first) &&
               !isa<ExtractElementInst>(S->first))
@@ -1728,15 +1825,17 @@ namespace {
 
           // The edge weights contribute in a negative sense: they represent
           // the cost of shuffles.
-          VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S);
-          if (IP.first != ConnectedPairDeps.end()) {
+          DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS =
+            ConnectedPairDeps.find(*S);
+          if (SS != ConnectedPairDeps.end()) {
             unsigned NumDepsDirect = 0, NumDepsSwap = 0;
-            for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
-                 Q != IP.second; ++Q) {
-              if (!PrunedTree.count(Q->second))
+            for (std::vector<ValuePair>::iterator T = SS->second.begin(),
+                 TE = SS->second.end(); T != TE; ++T) {
+              VPPair Q(*S, *T);
+              if (!PrunedDAG.count(Q.second))
                 continue;
               DenseMap<VPPair, unsigned>::iterator R =
-                PairConnectionTypes.find(VPPair(Q->second, Q->first));
+                PairConnectionTypes.find(VPPair(Q.second, Q.first));
               assert(R != PairConnectionTypes.end() &&
                      "Cannot find pair connection type");
               if (R->second == PairConnectionDirect)
@@ -1752,24 +1851,35 @@ namespace {
               ((NumDepsSwap > NumDepsDirect) ||
                 FixedOrderPairs.count(ValuePair(S->second, S->first)));
 
-            for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
-                 Q != IP.second; ++Q) {
-              if (!PrunedTree.count(Q->second))
+            for (std::vector<ValuePair>::iterator T = SS->second.begin(),
+                 TE = SS->second.end(); T != TE; ++T) {
+              VPPair Q(*S, *T);
+              if (!PrunedDAG.count(Q.second))
                 continue;
               DenseMap<VPPair, unsigned>::iterator R =
-                PairConnectionTypes.find(VPPair(Q->second, Q->first));
+                PairConnectionTypes.find(VPPair(Q.second, Q.first));
               assert(R != PairConnectionTypes.end() &&
                      "Cannot find pair connection type");
-              Type *Ty1 = Q->second.first->getType(),
-                   *Ty2 = Q->second.second->getType();
+              Type *Ty1 = Q.second.first->getType(),
+                   *Ty2 = Q.second.second->getType();
               Type *VTy = getVecTypeForPair(Ty1, Ty2);
               if ((R->second == PairConnectionDirect && FlipOrder) ||
                   (R->second == PairConnectionSwap && !FlipOrder)  ||
                   R->second == PairConnectionSplat) {
                 int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
                                                    VTy, VTy);
+
+                if (VTy->getVectorNumElements() == 2) {
+                  if (R->second == PairConnectionSplat)
+                    ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+                      TargetTransformInfo::SK_Broadcast, VTy));
+                  else
+                    ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+                      TargetTransformInfo::SK_Reverse, VTy));
+                }
+
                 DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
-                  *Q->second.first << " <-> " << *Q->second.second <<
+                  *Q.second.first << " <-> " << *Q.second.second <<
                     "} -> {" <<
                   *S->first << " <-> " << *S->second << "} = " <<
                    ESContrib << "\n");
@@ -1796,7 +1906,7 @@ namespace {
               }
               if (isa<ExtractElementInst>(*I))
                 continue;
-              if (PrunedTreeInstrs.count(*I))
+              if (PrunedDAGInstrs.count(*I))
                 continue;
               NeedsExtraction = true;
               break;
@@ -1804,11 +1914,13 @@ namespace {
 
             if (NeedsExtraction) {
               int ESContrib;
-              if (Ty1->isVectorTy())
+              if (Ty1->isVectorTy()) {
                 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
                                                Ty1, VTy);
-              else
-                ESContrib = (int) VTTI->getVectorInstrCost(
+                ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+                  TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1));
+              } else
+                ESContrib = (int) TTI->getVectorInstrCost(
                                     Instruction::ExtractElement, VTy, 0);
 
               DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
@@ -1826,7 +1938,7 @@ namespace {
               }
               if (isa<ExtractElementInst>(*I))
                 continue;
-              if (PrunedTreeInstrs.count(*I))
+              if (PrunedDAGInstrs.count(*I))
                 continue;
               NeedsExtraction = true;
               break;
@@ -1834,11 +1946,14 @@ namespace {
 
             if (NeedsExtraction) {
               int ESContrib;
-              if (Ty2->isVectorTy())
+              if (Ty2->isVectorTy()) {
                 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
                                                Ty2, VTy);
-              else
-                ESContrib = (int) VTTI->getVectorInstrCost(
+                ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+                  TargetTransformInfo::SK_ExtractSubvector, VTy,
+                  Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2));
+              } else
+                ESContrib = (int) TTI->getVectorInstrCost(
                                     Instruction::ExtractElement, VTy, 1);
               DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
                 *S->second << "} = " << ESContrib << "\n");
@@ -1865,7 +1980,7 @@ namespace {
               ValuePair VPR = ValuePair(O2, O1);
 
               // Internal edges are not handled here.
-              if (PrunedTree.count(VP) || PrunedTree.count(VPR))
+              if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
                 continue;
 
               Type *Ty1 = O1->getType(),
@@ -1913,22 +2028,26 @@ namespace {
               } else if (IncomingPairs.count(VPR)) {
                 ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
                                                VTy, VTy);
+
+                if (VTy->getVectorNumElements() == 2)
+                  ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+                    TargetTransformInfo::SK_Reverse, VTy));
               } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
-                ESContrib = (int) VTTI->getVectorInstrCost(
+                ESContrib = (int) TTI->getVectorInstrCost(
                                     Instruction::InsertElement, VTy, 0);
-                ESContrib += (int) VTTI->getVectorInstrCost(
+                ESContrib += (int) TTI->getVectorInstrCost(
                                      Instruction::InsertElement, VTy, 1);
               } else if (!Ty1->isVectorTy()) {
                 // O1 needs to be inserted into a vector of size O2, and then
                 // both need to be shuffled together.
-                ESContrib = (int) VTTI->getVectorInstrCost(
+                ESContrib = (int) TTI->getVectorInstrCost(
                                     Instruction::InsertElement, Ty2, 0);
                 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
                                                 VTy, Ty2);
               } else if (!Ty2->isVectorTy()) {
                 // O2 needs to be inserted into a vector of size O1, and then
                 // both need to be shuffled together.
-                ESContrib = (int) VTTI->getVectorInstrCost(
+                ESContrib = (int) TTI->getVectorInstrCost(
                                     Instruction::InsertElement, Ty1, 0);
                 ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
                                                 VTy, Ty1);
@@ -1955,27 +2074,27 @@ namespace {
 
         if (!HasNontrivialInsts) {
           DEBUG(if (DebugPairSelection) dbgs() <<
-                "\tNo non-trivial instructions in tree;"
+                "\tNo non-trivial instructions in DAG;"
                 " override to zero effective size\n");
           EffSize = 0;
         }
       } else {
-        for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
-             E = PrunedTree.end(); S != E; ++S)
+        for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+             E = PrunedDAG.end(); S != E; ++S)
           EffSize += (int) getDepthFactor(S->first);
       }
 
       DEBUG(if (DebugPairSelection)
-             dbgs() << "BBV: found pruned Tree for pair {"
-             << *J->first << " <-> " << *J->second << "} of depth " <<
-             MaxDepth << " and size " << PrunedTree.size() <<
+             dbgs() << "BBV: found pruned DAG for pair {"
+             << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
+             MaxDepth << " and size " << PrunedDAG.size() <<
             " (effective size: " << EffSize << ")\n");
-      if (((VTTI && !UseChainDepthWithTI) ||
+      if (((TTI && !UseChainDepthWithTI) ||
             MaxDepth >= Config.ReqChainDepth) &&
           EffSize > 0 && EffSize > BestEffSize) {
         BestMaxDepth = MaxDepth;
         BestEffSize = EffSize;
-        BestTree = PrunedTree;
+        BestDAG = PrunedDAG;
       }
     }
   }
@@ -1983,66 +2102,98 @@ namespace {
   // Given the list of candidate pairs, this function selects those
   // that will be fused into vector instructions.
   void BBVectorize::choosePairs(
-                      std::multimap<Value *, Value *> &CandidatePairs,
-                      DenseMap<ValuePair, int> &CandidatePairCostSavings,
-                      std::vector<Value *> &PairableInsts,
-                      DenseSet<ValuePair> &FixedOrderPairs,
-                      DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                      std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
-                      DenseSet<ValuePair> &PairableInstUsers,
-                      DenseMap<Value *, Value *>& ChosenPairs) {
+                DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+                DenseSet<ValuePair> &CandidatePairsSet,
+                DenseMap<ValuePair, int> &CandidatePairCostSavings,
+                std::vector<Value *> &PairableInsts,
+                DenseSet<ValuePair> &FixedOrderPairs,
+                DenseMap<VPPair, unsigned> &PairConnectionTypes,
+                DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+                DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+                DenseSet<ValuePair> &PairableInstUsers,
+                DenseMap<Value *, Value *>& ChosenPairs) {
     bool UseCycleCheck =
-     CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
-    std::multimap<ValuePair, ValuePair> PairableInstUserMap;
+     CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck;
+
+    DenseMap<Value *, std::vector<Value *> > CandidatePairs2;
+    for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(),
+         E = CandidatePairsSet.end(); I != E; ++I) {
+      std::vector<Value *> &JJ = CandidatePairs2[I->second];
+      if (JJ.empty()) JJ.reserve(32);
+      JJ.push_back(I->first);
+    }
+
+    DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap;
+    DenseSet<VPPair> PairableInstUserPairSet;
     for (std::vector<Value *>::iterator I = PairableInsts.begin(),
          E = PairableInsts.end(); I != E; ++I) {
       // The number of possible pairings for this variable:
-      size_t NumChoices = CandidatePairs.count(*I);
+      size_t NumChoices = CandidatePairs.lookup(*I).size();
       if (!NumChoices) continue;
 
-      VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
+      std::vector<Value *> &JJ = CandidatePairs[*I];
 
-      // The best pair to choose and its tree:
+      // The best pair to choose and its dag:
       size_t BestMaxDepth = 0;
       int BestEffSize = 0;
-      DenseSet<ValuePair> BestTree;
-      findBestTreeFor(CandidatePairs, CandidatePairCostSavings,
+      DenseSet<ValuePair> BestDAG;
+      findBestDAGFor(CandidatePairs, CandidatePairsSet,
+                      CandidatePairCostSavings,
                       PairableInsts, FixedOrderPairs, PairConnectionTypes,
                       ConnectedPairs, ConnectedPairDeps,
-                      PairableInstUsers, PairableInstUserMap, ChosenPairs,
-                      BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
+                      PairableInstUsers, PairableInstUserMap,
+                      PairableInstUserPairSet, ChosenPairs,
+                      BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
                       UseCycleCheck);
 
-      // A tree has been chosen (or not) at this point. If no tree was
+      if (BestDAG.empty())
+        continue;
+
+      // A dag has been chosen (or not) at this point. If no dag was
       // chosen, then this instruction, I, cannot be paired (and is no longer
       // considered).
 
-      DEBUG(if (BestTree.size() > 0)
-              dbgs() << "BBV: selected pairs in the best tree for: "
-                     << *cast<Instruction>(*I) << "\n");
+      DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: "
+                   << *cast<Instruction>(*I) << "\n");
 
-      for (DenseSet<ValuePair>::iterator S = BestTree.begin(),
-           SE2 = BestTree.end(); S != SE2; ++S) {
-        // Insert the members of this tree into the list of chosen pairs.
+      for (DenseSet<ValuePair>::iterator S = BestDAG.begin(),
+           SE2 = BestDAG.end(); S != SE2; ++S) {
+        // Insert the members of this dag into the list of chosen pairs.
         ChosenPairs.insert(ValuePair(S->first, S->second));
         DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
                *S->second << "\n");
 
-        // Remove all candidate pairs that have values in the chosen tree.
-        for (std::multimap<Value *, Value *>::iterator K =
-               CandidatePairs.begin(); K != CandidatePairs.end();) {
-          if (K->first == S->first || K->second == S->first ||
-              K->second == S->second || K->first == S->second) {
-            // Don't remove the actual pair chosen so that it can be used
-            // in subsequent tree selections.
-            if (!(K->first == S->first && K->second == S->second))
-              CandidatePairs.erase(K++);
-            else
-              ++K;
-          } else {
-            ++K;
-          }
+        // Remove all candidate pairs that have values in the chosen dag.
+        std::vector<Value *> &KK = CandidatePairs[S->first];
+        for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end();
+             K != KE; ++K) {
+          if (*K == S->second)
+            continue;
+
+          CandidatePairsSet.erase(ValuePair(S->first, *K));
+        }
+
+        std::vector<Value *> &LL = CandidatePairs2[S->second];
+        for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end();
+             L != LE; ++L) {
+          if (*L == S->first)
+            continue;
+
+          CandidatePairsSet.erase(ValuePair(*L, S->second));
+        }
+
+        std::vector<Value *> &MM = CandidatePairs[S->second];
+        for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
+             M != ME; ++M) {
+          assert(*M != S->first && "Flipped pair in candidate list?");
+          CandidatePairsSet.erase(ValuePair(S->second, *M));
+        }
+
+        std::vector<Value *> &NN = CandidatePairs2[S->first];
+        for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end();
+             N != NE; ++N) {
+          assert(*N != S->second && "Flipped pair in candidate list?");
+          CandidatePairsSet.erase(ValuePair(*N, S->first));
         }
       }
     }
@@ -2550,7 +2701,7 @@ namespace {
         continue;
       } else if (isa<CallInst>(I)) {
         Function *F = cast<CallInst>(I)->getCalledFunction();
-        unsigned IID = F->getIntrinsicID();
+        Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
         if (o == NumOperands-1) {
           BasicBlock &BB = *I->getParent();
 
@@ -2559,8 +2710,7 @@ namespace {
           Type *ArgTypeJ = J->getType();
           Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
 
-          ReplacedOperands[o] = Intrinsic::getDeclaration(M,
-            (Intrinsic::ID) IID, VArgType);
+          ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
           continue;
         } else if (IID == Intrinsic::powi && o == 1) {
           // The second argument of powi is a single integer and we've already
@@ -2647,7 +2797,7 @@ namespace {
 
   // Move all uses of the function I (including pairing-induced uses) after J.
   bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
-                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs,
                      Instruction *I, Instruction *J) {
     // Skip to the first instruction past I.
     BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
@@ -2655,18 +2805,18 @@ namespace {
     DenseSet<Value *> Users;
     AliasSetTracker WriteSet(*AA);
     for (; cast<Instruction>(L) != J; ++L)
-      (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet);
+      (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
 
     assert(cast<Instruction>(L) == J &&
       "Tracking has not proceeded far enough to check for dependencies");
     // If J is now in the use set of I, then trackUsesOfI will return true
     // and we have a dependency cycle (and the fusing operation must abort).
-    return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet);
+    return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs);
   }
 
   // Move all uses of the function I (including pairing-induced uses) after J.
   void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
-                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs,
                      Instruction *&InsertionPt,
                      Instruction *I, Instruction *J) {
     // Skip to the first instruction past I.
@@ -2675,7 +2825,7 @@ namespace {
     DenseSet<Value *> Users;
     AliasSetTracker WriteSet(*AA);
     for (; cast<Instruction>(L) != J;) {
-      if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) {
+      if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
         // Move this instruction
         Instruction *InstToMove = L; ++L;
 
@@ -2695,7 +2845,8 @@ namespace {
   // to be moved after J (the second instruction) when the pair is fused.
   void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
                      DenseMap<Value *, Value *> &ChosenPairs,
-                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs,
                      Instruction *I) {
     // Skip to the first instruction past I.
     BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
@@ -2708,8 +2859,10 @@ namespace {
     // could be before I if this is an inverted input.
     for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
       if (trackUsesOfI(Users, WriteSet, I, L)) {
-        if (L->mayReadFromMemory())
-          LoadMoveSet.insert(ValuePair(L, I));
+        if (L->mayReadFromMemory()) {
+          LoadMoveSet[L].push_back(I);
+          LoadMoveSetPairs.insert(ValuePair(L, I));
+        }
       }
     }
   }
@@ -2718,20 +2871,22 @@ namespace {
   // are chosen for vectorization, we can end up in a situation where the
   // aliasing analysis starts returning different query results as the
   // process of fusing instruction pairs continues. Because the algorithm
-  // relies on finding the same use trees here as were found earlier, we'll
+  // relies on finding the same use dags here as were found earlier, we'll
   // need to precompute the necessary aliasing information here and then
   // manually update it during the fusion process.
   void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
                      std::vector<Value *> &PairableInsts,
                      DenseMap<Value *, Value *> &ChosenPairs,
-                     std::multimap<Value *, Value *> &LoadMoveSet) {
+                     DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+                     DenseSet<ValuePair> &LoadMoveSetPairs) {
     for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
          PIE = PairableInsts.end(); PI != PIE; ++PI) {
       DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
       if (P == ChosenPairs.end()) continue;
 
       Instruction *I = cast<Instruction>(P->first);
-      collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I);
+      collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
+                             LoadMoveSetPairs, I);
     }
   }
 
@@ -2767,12 +2922,12 @@ namespace {
   // because the vector instruction is inserted in the location of the pair's
   // second member).
   void BBVectorize::fuseChosenPairs(BasicBlock &BB,
-                     std::vector<Value *> &PairableInsts,
-                     DenseMap<Value *, Value *> &ChosenPairs,
-                     DenseSet<ValuePair> &FixedOrderPairs,
-                     DenseMap<VPPair, unsigned> &PairConnectionTypes,
-                     std::multimap<ValuePair, ValuePair> &ConnectedPairs,
-                     std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) {
+             std::vector<Value *> &PairableInsts,
+             DenseMap<Value *, Value *> &ChosenPairs,
+             DenseSet<ValuePair> &FixedOrderPairs,
+             DenseMap<VPPair, unsigned> &PairConnectionTypes,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+             DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
     LLVMContext& Context = BB.getContext();
 
     // During the vectorization process, the order of the pairs to be fused
@@ -2786,8 +2941,10 @@ namespace {
          E = FlippedPairs.end(); P != E; ++P)
       ChosenPairs.insert(*P);
 
-    std::multimap<Value *, Value *> LoadMoveSet;
-    collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
+    DenseMap<Value *, std::vector<Value *> > LoadMoveSet;
+    DenseSet<ValuePair> LoadMoveSetPairs;
+    collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
+                       LoadMoveSet, LoadMoveSetPairs);
 
     DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
 
@@ -2819,7 +2976,7 @@ namespace {
       ChosenPairs.erase(FP);
       ChosenPairs.erase(P);
 
-      if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) {
+      if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
         DEBUG(dbgs() << "BBV: fusion of: " << *I <<
                " <-> " << *J <<
                " aborted because of non-trivial dependency cycle\n");
@@ -2836,18 +2993,20 @@ namespace {
         // of dependencies connected via swaps, and those directly connected,
         // and flip the order if the number of swaps is greater.
         bool OrigOrder = true;
-        VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J));
-        if (IP.first == ConnectedPairDeps.end()) {
-          IP = ConnectedPairDeps.equal_range(ValuePair(J, I));
+        DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ =
+          ConnectedPairDeps.find(ValuePair(I, J));
+        if (IJ == ConnectedPairDeps.end()) {
+          IJ = ConnectedPairDeps.find(ValuePair(J, I));
           OrigOrder = false;
         }
 
-        if (IP.first != ConnectedPairDeps.end()) {
+        if (IJ != ConnectedPairDeps.end()) {
           unsigned NumDepsDirect = 0, NumDepsSwap = 0;
-          for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
-               Q != IP.second; ++Q) {
+          for (std::vector<ValuePair>::iterator T = IJ->second.begin(),
+               TE = IJ->second.end(); T != TE; ++T) {
+            VPPair Q(IJ->first, *T);
             DenseMap<VPPair, unsigned>::iterator R =
-              PairConnectionTypes.find(VPPair(Q->second, Q->first));
+              PairConnectionTypes.find(VPPair(Q.second, Q.first));
             assert(R != PairConnectionTypes.end() &&
                    "Cannot find pair connection type");
             if (R->second == PairConnectionDirect)
@@ -2873,17 +3032,20 @@ namespace {
 
       // If the pair being fused uses the opposite order from that in the pair
       // connection map, then we need to flip the types.
-      VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L));
-      for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
-           Q != IP.second; ++Q) {
-        DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q);
-        assert(R != PairConnectionTypes.end() &&
-               "Cannot find pair connection type");
-        if (R->second == PairConnectionDirect)
-          R->second = PairConnectionSwap;
-        else if (R->second == PairConnectionSwap)
-          R->second = PairConnectionDirect;
-      }
+      DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL =
+        ConnectedPairs.find(ValuePair(H, L));
+      if (HL != ConnectedPairs.end())
+        for (std::vector<ValuePair>::iterator T = HL->second.begin(),
+             TE = HL->second.end(); T != TE; ++T) {
+          VPPair Q(HL->first, *T);
+          DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q);
+          assert(R != PairConnectionTypes.end() &&
+                 "Cannot find pair connection type");
+          if (R->second == PairConnectionDirect)
+            R->second = PairConnectionSwap;
+          else if (R->second == PairConnectionSwap)
+            R->second = PairConnectionDirect;
+        }
 
       bool LBeforeH = !FlipPairOrder;
       unsigned NumOperands = I->getNumOperands();
@@ -2915,12 +3077,12 @@ namespace {
       Instruction *K1 = 0, *K2 = 0;
       replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
 
-      // The use tree of the first original instruction must be moved to after
-      // the location of the second instruction. The entire use tree of the
-      // first instruction is disjoint from the input tree of the second
+      // The use dag of the first original instruction must be moved to after
+      // the location of the second instruction. The entire use dag of the
+      // first instruction is disjoint from the input dag of the second
       // (by definition), and so commutes with it.
 
-      moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J);
+      moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
 
       if (!isa<StoreInst>(I)) {
         L->replaceAllUsesWith(K1);
@@ -2937,17 +3099,23 @@ namespace {
       // yet-to-be-fused pair. The loads in question are the keys of the map.
       if (I->mayReadFromMemory()) {
         std::vector<ValuePair> NewSetMembers;
-        VPIteratorPair IPairRange = LoadMoveSet.equal_range(I);
-        VPIteratorPair JPairRange = LoadMoveSet.equal_range(J);
-        for (std::multimap<Value *, Value *>::iterator N = IPairRange.first;
-             N != IPairRange.second; ++N)
-          NewSetMembers.push_back(ValuePair(K, N->second));
-        for (std::multimap<Value *, Value *>::iterator N = JPairRange.first;
-             N != JPairRange.second; ++N)
-          NewSetMembers.push_back(ValuePair(K, N->second));
+        DenseMap<Value *, std::vector<Value *> >::iterator II =
+          LoadMoveSet.find(I);
+        if (II != LoadMoveSet.end())
+          for (std::vector<Value *>::iterator N = II->second.begin(),
+               NE = II->second.end(); N != NE; ++N)
+            NewSetMembers.push_back(ValuePair(K, *N));
+        DenseMap<Value *, std::vector<Value *> >::iterator JJ =
+          LoadMoveSet.find(J);
+        if (JJ != LoadMoveSet.end())
+          for (std::vector<Value *>::iterator N = JJ->second.begin(),
+               NE = JJ->second.end(); N != NE; ++N)
+            NewSetMembers.push_back(ValuePair(K, *N));
         for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
-             AE = NewSetMembers.end(); A != AE; ++A)
-          LoadMoveSet.insert(*A);
+             AE = NewSetMembers.end(); A != AE; ++A) {
+          LoadMoveSet[A->first].push_back(A->second);
+          LoadMoveSetPairs.insert(*A);
+        }
       }
 
       // Before removing I, set the iterator to the next instruction.
@@ -2972,6 +3140,7 @@ char BBVectorize::ID = 0;
 static const char bb_vectorize_name[] = "Basic-Block Vectorization";
 INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
@@ -3006,6 +3175,7 @@ VectorizeConfig::VectorizeConfig() {
   MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
   SplatBreaksChain = ::SplatBreaksChain;
   MaxInsts = ::MaxInsts;
+  MaxPairs = ::MaxPairs;
   MaxIter = ::MaxIter;
   Pow2LenOnly = ::Pow2LenOnly;
   NoMemOpBoost = ::NoMemOpBoost;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a7ef248e6e3d..acf2b819b813 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9,10 +9,10 @@
 //
 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
 // and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizes uses (will use) the codegen
+// in the codegen. However, the vectorizer uses (will use) the codegen
 // interfaces to generate IR that is likely to result in an optimal binary.
 //
-// The loop vectorizer combines consecutive loop iteration into a single
+// The loop vectorizer combines consecutive loop iterations into a single
 // 'wide' iteration. After this transformation the index is incremented
 // by the SIMD vector width, and not by one.
 //
@@ -20,67 +20,107 @@
 // 1. The main loop pass that drives the different parts.
 // 2. LoopVectorizationLegality - A unit that checks for the legality
 //    of the vectorization.
-// 3. SingleBlockLoopVectorizer - A unit that performs the actual
+// 3. InnerLoopVectorizer - A unit that performs the actual
 //    widening of instructions.
 // 4. LoopVectorizationCostModel - A unit that checks for the profitability
 //    of vectorization. It decides on the optimal vector width, which
 //    can be one, if vectorization is not profitable.
+//
 //===----------------------------------------------------------------------===//
 //
 // The reduction-variable vectorization is based on the paper:
 //  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
 //
 // Variable uniformity checks are inspired by:
-// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+//  Karrenberg, R. and Hack, S. Whole Function Vectorization.
 //
 // Other ideas/concepts are from:
 //  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
 //
+//  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
+//  Vectorizing Compilers.
+//
 //===----------------------------------------------------------------------===//
+
 #define LV_NAME "loop-vectorize"
 #define DEBUG_TYPE LV_NAME
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Value.h"
-#include "llvm/Function.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
+
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
+#include <map>
+
 using namespace llvm;
 
 static cl::opt<unsigned>
 VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
-          cl::desc("Set the default vectorization width. Zero is autoselect."));
+                    cl::desc("Sets the SIMD width. Zero is autoselect."));
+
+static cl::opt<unsigned>
+VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden,
+                    cl::desc("Sets the vectorization unroll count. "
+                             "Zero is autoselect."));
+
+static cl::opt<bool>
+EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+                   cl::desc("Enable if-conversion during vectorization."));
 
 /// We don't vectorize loops with a known constant trip count below this number.
-const unsigned TinyTripCountThreshold = 16;
+static cl::opt<unsigned>
+TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
+                             cl::Hidden,
+                             cl::desc("Don't vectorize loops with a constant "
+                                      "trip count that is smaller than this "
+                                      "value."));
+
+/// We don't unroll loops with a known constant trip count below this number.
+static const unsigned TinyTripCountUnrollThreshold = 128;
 
 /// When performing a runtime memory check, do not check more than this
 /// number of pointers. Notice that the check is quadratic!
-const unsigned RuntimeMemoryCheckThreshold = 2;
+static const unsigned RuntimeMemoryCheckThreshold = 4;
+
+/// We use a metadata with this name  to indicate that a scalar loop was
+/// vectorized and that we don't need to re-vectorize it if we run into it
+/// again.
+static const char*
+AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
 
 namespace {
 
@@ -88,7 +128,7 @@ namespace {
 class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 
-/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
+/// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
 /// scalars. This class also implements the following features:
@@ -97,36 +137,61 @@ class LoopVectorizationCostModel;
 /// * It handles the code generation for reduction variables.
 /// * Scalarization (implementation using scalars) of un-vectorizable
 ///   instructions.
-/// SingleBlockLoopVectorizer does not perform any vectorization-legality
+/// InnerLoopVectorizer does not perform any vectorization-legality
 /// checks, and relies on the caller to check for the different legality
-/// aspects. The SingleBlockLoopVectorizer relies on the
+/// aspects. The InnerLoopVectorizer relies on the
 /// LoopVectorizationLegality class to provide information about the induction
 /// and reduction variables that were found to a given vectorization factor.
-class SingleBlockLoopVectorizer {
+class InnerLoopVectorizer {
 public:
-  /// Ctor.
-  SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
-                            DominatorTree *dt, LPPassManager *Lpm,
-                            unsigned VecWidth):
-  OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth),
-  Builder(Se->getContext()), Induction(0), OldInduction(0) { }
+  InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+                      DominatorTree *DT, DataLayout *DL,
+                      const TargetLibraryInfo *TLI, unsigned VecWidth,
+                      unsigned UnrollFactor)
+      : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+        VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
+        OldInduction(0), WidenMap(UnrollFactor) {}
 
   // Perform the actual loop widening (vectorization).
   void vectorize(LoopVectorizationLegality *Legal) {
-    ///Create a new empty loop. Unlink the old loop and connect the new one.
+    // Create a new empty loop. Unlink the old loop and connect the new one.
     createEmptyLoop(Legal);
-    /// Widen each instruction in the old loop to a new one in the new loop.
-    /// Use the Legality module to find the induction and reduction variables.
+    // Widen each instruction in the old loop to a new one in the new loop.
+    // Use the Legality module to find the induction and reduction variables.
     vectorizeLoop(Legal);
     // Register the new loop and update the analysis passes.
     updateAnalysis();
- }
+  }
 
 private:
+  /// A small list of PHINodes.
+  typedef SmallVector<PHINode*, 4> PhiVector;
+  /// When we unroll loops we have multiple vector values for each scalar.
+  /// This data structure holds the unrolled and vectorized values that
+  /// originated from one scalar instruction.
+  typedef SmallVector<Value*, 2> VectorParts;
+
+  /// Add code that checks at runtime if the accessed arrays overlap.
+  /// Returns the comparator value or NULL if no check is needed.
+  Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
+                               Instruction *Loc);
   /// Create an empty loop, based on the loop ranges of the old loop.
   void createEmptyLoop(LoopVectorizationLegality *Legal);
   /// Copy and widen the instructions from the old loop.
   void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+  /// A helper function that computes the predicate of the block BB, assuming
+  /// that the header block of the loop is set to True. It returns the *entry*
+  /// mask for the block BB.
+  VectorParts createBlockInMask(BasicBlock *BB);
+  /// A helper function that computes the predicate of the edge between SRC
+  /// and DST.
+  VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
+
+  /// A helper function to vectorize a single BB within the innermost loop.
+  void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
+                            PhiVector *PV);
+
   /// Insert the new loop to the loop hierarchy and pass manager
   /// and update the analysis passes.
   void updateAnalysis();
@@ -135,6 +200,10 @@ private:
   /// of scalars.
   void scalarizeInstruction(Instruction *Instr);
 
+  /// Vectorize Load and Store instructions,
+  void vectorizeMemoryInstruction(Instruction *Instr,
+                                  LoopVectorizationLegality *Legal);
+
   /// Create a broadcast instruction. This method generates a broadcast
   /// instruction (shuffle) for loop invariant values and for the induction
   /// value. If this is the induction variable then we extend it to N, N+1, ...
@@ -142,37 +211,82 @@ private:
   /// element.
   Value *getBroadcastInstrs(Value *V);
 
-  /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 ..
-  /// for each element in the vector. Starting from zero.
-  Value *getConsecutiveVector(Value* Val);
+  /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
+  /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
+  /// The sequence starts at StartIndex.
+  Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate);
 
   /// When we go over instructions in the basic block we rely on previous
   /// values within the current basic block or on loop invariant values.
   /// When we widen (vectorize) values we place them in the map. If the values
   /// are not within the map, they have to be loop invariant, so we simply
   /// broadcast them into a vector.
-  Value *getVectorValue(Value *V);
+  VectorParts &getVectorValue(Value *V);
+
+  /// Generate a shuffle sequence that will reverse the vector Vec.
+  Value *reverseVector(Value *Vec);
+
+  /// This is a helper class that holds the vectorizer state. It maps scalar
+  /// instructions to vector instructions. When the code is 'unrolled' then
+  /// then a single scalar value is mapped to multiple vector parts. The parts
+  /// are stored in the VectorPart type.
+  struct ValueMap {
+    /// C'tor.  UnrollFactor controls the number of vectors ('parts') that
+    /// are mapped.
+    ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {}
+
+    /// \return True if 'Key' is saved in the Value Map.
+    bool has(Value *Key) const { return MapStorage.count(Key); }
+
+    /// Initializes a new entry in the map. Sets all of the vector parts to the
+    /// save value in 'Val'.
+    /// \return A reference to a vector with splat values.
+    VectorParts &splat(Value *Key, Value *Val) {
+      VectorParts &Entry = MapStorage[Key];
+      Entry.assign(UF, Val);
+      return Entry;
+    }
 
-  /// Get a uniform vector of constant integers. We use this to get
-  /// vectors of ones and zeros for the reduction code.
-  Constant* getUniformVector(unsigned Val, Type* ScalarTy);
+    ///\return A reference to the value that is stored at 'Key'.
+    VectorParts &get(Value *Key) {
+      VectorParts &Entry = MapStorage[Key];
+      if (Entry.empty())
+        Entry.resize(UF);
+      assert(Entry.size() == UF);
+      return Entry;
+    }
 
-  typedef DenseMap<Value*, Value*> ValueMap;
+  private:
+    /// The unroll factor. Each entry in the map stores this number of vector
+    /// elements.
+    unsigned UF;
+
+    /// Map storage. We use std::map and not DenseMap because insertions to a
+    /// dense map invalidates its iterators.
+    std::map<Value *, VectorParts> MapStorage;
+  };
 
   /// The original loop.
   Loop *OrigLoop;
-  // Scev analysis to use.
+  /// Scev analysis to use.
   ScalarEvolution *SE;
-  // Loop Info.
+  /// Loop Info.
   LoopInfo *LI;
-  // Dominator Tree.
+  /// Dominator Tree.
   DominatorTree *DT;
-  // Loop Pass Manager;
-  LPPassManager *LPM;
-  // The vectorization factor to use.
+  /// Data Layout.
+  DataLayout *DL;
+  /// Target Library Info.
+  const TargetLibraryInfo *TLI;
+
+  /// The vectorization SIMD factor to use. Each vector will have this many
+  /// vector elements.
   unsigned VF;
+  /// The vectorization unroll factor to use. Each scalar is vectorized to this
+  /// many different vector instructions.
+  unsigned UF;
 
-  // The builder that we use
+  /// The builder that we use
   IRBuilder<> Builder;
 
   // --- Vectorization state ---
@@ -189,14 +303,14 @@ private:
   BasicBlock *LoopVectorBody;
   ///The scalar loop body.
   BasicBlock *LoopScalarBody;
-  ///The first bypass block.
-  BasicBlock *LoopBypassBlock;
+  /// A list of all bypass blocks. The first block is the entry of the loop.
+  SmallVector<BasicBlock *, 4> LoopBypassBlocks;
 
   /// The new Induction variable which was added to the new block.
   PHINode *Induction;
   /// The induction variable of the old basic block.
   PHINode *OldInduction;
-  // Maps scalars to widened vectors.
+  /// Maps scalars to widened vectors.
   ValueMap WidenMap;
 };
 
@@ -207,36 +321,48 @@ private:
 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
 ///   will change the order of memory accesses in a way that will change the
 ///   correctness of the program.
-/// * Scalars checks - The code in canVectorizeBlock checks for a number
-///   of different conditions, such as the availability of a single induction
-///   variable, that all types are supported and vectorize-able, etc.
-/// This code reflects the capabilities of SingleBlockLoopVectorizer.
-/// This class is also used by SingleBlockLoopVectorizer for identifying
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
+/// checks for a number of different conditions, such as the availability of a
+/// single induction variable, that all types are supported and vectorize-able,
+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
+/// This class is also used by InnerLoopVectorizer for identifying
 /// induction variable and the different reduction variables.
 class LoopVectorizationLegality {
 public:
-  LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl):
-  TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { }
+  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
+                            DominatorTree *DT, TargetTransformInfo* TTI,
+                            AliasAnalysis *AA, TargetLibraryInfo *TLI)
+      : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
+        Induction(0) {}
 
-  /// This represents the kinds of reductions that we support.
+  /// This enum represents the kinds of reductions that we support.
   enum ReductionKind {
-    NoReduction, /// Not a reduction.
-    IntegerAdd,  /// Sum of numbers.
-    IntegerMult, /// Product of numbers.
-    IntegerOr,   /// Bitwise or logical OR of numbers.
-    IntegerAnd,  /// Bitwise or logical AND of numbers.
-    IntegerXor   /// Bitwise or logical XOR of numbers.
+    RK_NoReduction, ///< Not a reduction.
+    RK_IntegerAdd,  ///< Sum of integers.
+    RK_IntegerMult, ///< Product of integers.
+    RK_IntegerOr,   ///< Bitwise or logical OR of numbers.
+    RK_IntegerAnd,  ///< Bitwise or logical AND of numbers.
+    RK_IntegerXor,  ///< Bitwise or logical XOR of numbers.
+    RK_FloatAdd,    ///< Sum of floats.
+    RK_FloatMult    ///< Product of floats.
+  };
+
+  /// This enum represents the kinds of inductions that we support.
+  enum InductionKind {
+    IK_NoInduction,         ///< Not an induction variable.
+    IK_IntInduction,        ///< Integer induction variable. Step = 1.
+    IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
+    IK_PtrInduction,        ///< Pointer induction var. Step = sizeof(elem).
+    IK_ReversePtrInduction  ///< Reverse ptr indvar. Step = - sizeof(elem).
   };
 
   /// This POD struct holds information about reduction variables.
   struct ReductionDescriptor {
-    // Default C'tor
-    ReductionDescriptor():
-    StartValue(0), LoopExitInstr(0), Kind(NoReduction) {}
+    ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
+      Kind(RK_NoReduction) {}
 
-    // C'tor.
-    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K):
-    StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
+        : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
 
     // The starting value of the reduction.
     // It does not have to be zero!
@@ -250,52 +376,113 @@ public:
   // This POD struct holds information about the memory runtime legality
   // check that a group of pointers do not overlap.
   struct RuntimePointerCheck {
+    RuntimePointerCheck() : Need(false) {}
+
+    /// Reset the state of the pointer runtime information.
+    void reset() {
+      Need = false;
+      Pointers.clear();
+      Starts.clear();
+      Ends.clear();
+    }
+
+    /// Insert a pointer and calculate the start and end SCEVs.
+    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+
     /// This flag indicates if we need to add the runtime check.
     bool Need;
     /// Holds the pointers that we need to check.
     SmallVector<Value*, 2> Pointers;
+    /// Holds the pointer value at the beginning of the loop.
+    SmallVector<const SCEV*, 2> Starts;
+    /// Holds the pointer value at the end of the loop.
+    SmallVector<const SCEV*, 2> Ends;
+  };
+
+  /// A POD for saving information about induction variables.
+  struct InductionInfo {
+    InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
+    InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
+    /// Start value.
+    Value *StartValue;
+    /// Induction kind.
+    InductionKind IK;
   };
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
   typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
 
+  /// InductionList saves induction variables and maps them to the
+  /// induction descriptor.
+  typedef MapVector<PHINode*, InductionInfo> InductionList;
+
+  /// Alias(Multi)Map stores the values (GEPs or underlying objects and their
+  /// respective Store/Load instruction(s) to calculate aliasing.
+  typedef MapVector<Value*, Instruction* > AliasMap;
+  typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
+
   /// Returns true if it is legal to vectorize this loop.
   /// This does not mean that it is profitable to vectorize this
   /// loop, only that it is legal to do so.
   bool canVectorize();
 
   /// Returns the Induction variable.
-  PHINode *getInduction() {return Induction;}
+  PHINode *getInduction() { return Induction; }
 
   /// Returns the reduction variables found in the loop.
   ReductionList *getReductionVars() { return &Reductions; }
 
-  /// Check if the pointer returned by this GEP is consecutive
-  /// when the index is vectorized. This happens when the last
-  /// index of the GEP is consecutive, like the induction variable.
+  /// Returns the induction variables found in the loop.
+  InductionList *getInductionVars() { return &Inductions; }
+
+  /// Returns True if V is an induction variable in this loop.
+  bool isInductionVariable(const Value *V);
+
+  /// Return true if the block BB needs to be predicated in order for the loop
+  /// to be vectorized.
+  bool blockNeedsPredication(BasicBlock *BB);
+
+  /// Check if this  pointer is consecutive when vectorizing. This happens
+  /// when the last index of the GEP is the induction variable, or that the
+  /// pointer itself is an induction variable.
   /// This check allows us to vectorize A[idx] into a wide load/store.
-  bool isConsecutiveGep(Value *Ptr);
+  /// Returns:
+  /// 0 - Stride is unknown or non consecutive.
+  /// 1 - Address is consecutive.
+  /// -1 - Address is consecutive, and decreasing.
+  int isConsecutivePtr(Value *Ptr);
 
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V);
 
   /// Returns true if this instruction will remain scalar after vectorization.
-  bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
 
   /// Returns the information that we collected about runtime memory check.
-  RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; }
+  RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
   /// and we only need to check individual instructions.
-  bool canVectorizeBlock(BasicBlock &BB);
+  bool canVectorizeInstrs();
 
   /// When we vectorize loops we may change the order in which
   /// we read and write from memory. This method checks if it is
   /// legal to vectorize the code, considering only memory constrains.
-  /// Returns true if BB is vectorizable
-  bool canVectorizeMemory(BasicBlock &BB);
+  /// Returns true if the loop is vectorizable
+  bool canVectorizeMemory();
+
+  /// Return true if we can vectorize this loop using the IF-conversion
+  /// transformation.
+  bool canVectorizeWithIfConvert();
+
+  /// Collect the variables that need to stay uniform after vectorization.
+  void collectLoopUniforms();
+
+  /// Return true if all of the instructions in the block can be speculatively
+  /// executed.
+  bool blockCanBePredicated(BasicBlock *BB);
 
   /// Returns True, if 'Phi' is the kind of reduction variable for type
   /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
@@ -303,10 +490,19 @@ private:
   /// Returns true if the instruction I can be a reduction variable of type
   /// 'Kind'.
   bool isReductionInstr(Instruction *I, ReductionKind Kind);
-  /// Returns True, if 'Phi' is an induction variable.
-  bool isInductionVariable(PHINode *Phi);
+  /// Returns the induction kind of Phi. This function may return NoInduction
+  /// if the PHI is not an induction variable.
+  InductionKind isInductionVariable(PHINode *Phi);
   /// Return true if can compute the address bounds of Ptr within the loop.
   bool hasComputableBounds(Value *Ptr);
+  /// Return true if there is the chance of write reorder.
+  bool hasPossibleGlobalWriteReorder(Value *Object,
+                                     Instruction *Inst,
+                                     AliasMultiMap &WriteObjects,
+                                     unsigned MaxByteWidth);
+  /// Return the AA location for a load or a store.
+  AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
+
 
   /// The loop that we evaluate.
   Loop *TheLoop;
@@ -314,13 +510,27 @@ private:
   ScalarEvolution *SE;
   /// DataLayout analysis.
   DataLayout *DL;
+  /// Dominators.
+  DominatorTree *DT;
+  /// Target Info.
+  TargetTransformInfo *TTI;
+  /// Alias Analysis.
+  AliasAnalysis *AA;
+  /// Target Library Info.
+  TargetLibraryInfo *TLI;
 
   //  ---  vectorization state --- //
 
-  /// Holds the induction variable.
+  /// Holds the integer induction variable. This is the counter of the
+  /// loop.
   PHINode *Induction;
   /// Holds the reduction variables.
   ReductionList Reductions;
+  /// Holds all of the induction variables that we found in the loop.
+  /// Notice that inductions don't need to start at zero and that induction
+  /// variables can be pointers.
+  InductionList Inductions;
+
   /// Allowed outside users. This holds the reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value*, 4> AllowedExit;
@@ -334,23 +544,57 @@ private:
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
 /// vectorization.
-/// In many cases vectorization is not profitable. This can happen because
-/// of a number of reasons. In this class we mainly attempt to predict
-/// the expected speedup/slowdowns due to the supported instruction set.
-/// We use the VectorTargetTransformInfo to query the different backends
-/// for the cost of different operations.
+/// In many cases vectorization is not profitable. This can happen because of
+/// a number of reasons. In this class we mainly attempt to predict the
+/// expected speedup/slowdowns due to the supported instruction set. We use the
+/// TargetTransformInfo to query the different backends for the cost of
+/// different operations.
 class LoopVectorizationCostModel {
 public:
-  /// C'tor.
-  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
-                             LoopVectorizationLegality *Leg,
-                             const VectorTargetTransformInfo *Vtti):
-  TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
+  LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
+                             LoopVectorizationLegality *Legal,
+                             const TargetTransformInfo &TTI,
+                             DataLayout *DL, const TargetLibraryInfo *TLI)
+      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
+
+  /// Information about vectorization costs
+  struct VectorizationFactor {
+    unsigned Width; // Vector width with best cost
+    unsigned Cost; // Cost of the loop with that width
+  };
+  /// \return The most profitable vectorization factor and the cost of that VF.
+  /// This method checks every power of two up to VF. If UserVF is not ZERO
+  /// then this vectorization factor will be selected if vectorization is
+  /// possible.
+  VectorizationFactor selectVectorizationFactor(bool OptForSize,
+                                                unsigned UserVF);
+
+  /// \return The size (in bits) of the widest type in the code that
+  /// needs to be vectorized. We ignore values that remain scalar such as
+  /// 64 bit loop indices.
+  unsigned getWidestType();
+
+  /// \return The most profitable unroll factor.
+  /// If UserUF is non-zero then this method finds the best unroll-factor
+  /// based on register pressure and other parameters.
+  /// VF and LoopCost are the selected vectorization factor and the cost of the
+  /// selected VF.
+  unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
+                              unsigned LoopCost);
+
+  /// \brief A struct that represents some properties of the register usage
+  /// of a loop.
+  struct RegisterUsage {
+    /// Holds the number of loop invariant values that are used in the loop.
+    unsigned LoopInvariantRegs;
+    /// Holds the maximum number of concurrent live intervals in the loop.
+    unsigned MaxLocalUsers;
+    /// Holds the number of instructions in the loop.
+    unsigned NumInstructions;
+  };
 
-  /// Returns the most profitable vectorization factor for the loop that is
-  /// smaller or equal to the VF argument. This method checks every power
-  /// of two up to VF.
-  unsigned findBestVectorizationFactor(unsigned VF = 8);
+  /// \return  information about the register usage of the loop.
+  RegisterUsage calculateRegisterUsage();
 
 private:
   /// Returns the expected execution cost. The unit of the cost does
@@ -368,21 +612,32 @@ private:
   /// the scalar type.
   static Type* ToVectorTy(Type *Scalar, unsigned VF);
 
+  /// Returns whether the instruction is a load or store and will be a emitted
+  /// as a vector operation.
+  bool isConsecutiveLoadOrStore(Instruction *I);
+
   /// The loop that we evaluate.
   Loop *TheLoop;
   /// Scev analysis.
   ScalarEvolution *SE;
-
+  /// Loop Info analysis.
+  LoopInfo *LI;
   /// Vectorization legality.
   LoopVectorizationLegality *Legal;
   /// Vector target information.
-  const VectorTargetTransformInfo *VTTI;
+  const TargetTransformInfo &TTI;
+  /// Target data layout information.
+  DataLayout *DL;
+  /// Target Library Info.
+  const TargetLibraryInfo *TLI;
 };
 
+/// The LoopVectorize Pass.
 struct LoopVectorize : public LoopPass {
-  static char ID; // Pass identification, replacement for typeid
+  /// Pass identification, replacement for typeid
+  static char ID;
 
-  LoopVectorize() : LoopPass(ID) {
+  explicit LoopVectorize() : LoopPass(ID) {
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -391,6 +646,8 @@ struct LoopVectorize : public LoopPass {
   LoopInfo *LI;
   TargetTransformInfo *TTI;
   DominatorTree *DT;
+  AliasAnalysis *AA;
+  TargetLibraryInfo *TLI;
 
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
     // We only vectorize innermost loops.
@@ -400,45 +657,57 @@ struct LoopVectorize : public LoopPass {
     SE = &getAnalysis<ScalarEvolution>();
     DL = getAnalysisIfAvailable<DataLayout>();
     LI = &getAnalysis<LoopInfo>();
-    TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+    TTI = &getAnalysis<TargetTransformInfo>();
     DT = &getAnalysis<DominatorTree>();
+    AA = getAnalysisIfAvailable<AliasAnalysis>();
+    TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
     DEBUG(dbgs() << "LV: Checking a loop in \"" <<
           L->getHeader()->getParent()->getName() << "\"\n");
 
     // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DL);
+    LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing.\n");
       return false;
     }
 
-    // Select the preffered vectorization factor.
-    unsigned VF = 1;
-    if (VectorizationFactor == 0) {
-      const VectorTargetTransformInfo *VTTI = 0;
-      if (TTI)
-        VTTI = TTI->getVectorTargetTransformInfo();
-      // Use the cost model.
-      LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
-      VF = CM.findBestVectorizationFactor();
-
-      if (VF == 1) {
-        DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-        return false;
-      }
+    // Use the cost model.
+    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
+
+    // Check the function attributes to find out if this function should be
+    // optimized for size.
+    Function *F = L->getHeader()->getParent();
+    Attribute::AttrKind SzAttr = Attribute::OptimizeForSize;
+    Attribute::AttrKind FlAttr = Attribute::NoImplicitFloat;
+    unsigned FnIndex = AttributeSet::FunctionIndex;
+    bool OptForSize = F->getAttributes().hasAttribute(FnIndex, SzAttr);
+    bool NoFloat = F->getAttributes().hasAttribute(FnIndex, FlAttr);
+
+    if (NoFloat) {
+      DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
+            "attribute is used.\n");
+      return false;
+    }
 
-    } else {
-      // Use the user command flag.
-      VF = VectorizationFactor;
+    // Select the optimal vectorization factor.
+    LoopVectorizationCostModel::VectorizationFactor VF;
+    VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
+    // Select the unroll factor.
+    unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll,
+                                        VF.Width, VF.Cost);
+
+    if (VF.Width == 1) {
+      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+      return false;
     }
 
-    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
-          L->getHeader()->getParent()->getParent()->getModuleIdentifier()<<
-          "\n");
+    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
+          F->getParent()->getModuleIdentifier()<<"\n");
+    DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
 
-    // If we decided that it is *legal* to vectorizer the loop then do it.
-    SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF);
+    // If we decided that it is *legal* to vectorize the loop then do it.
+    InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
     LB.vectorize(&LVL);
 
     DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -449,52 +718,75 @@ struct LoopVectorize : public LoopPass {
     LoopPass::getAnalysisUsage(AU);
     AU.addRequiredID(LoopSimplifyID);
     AU.addRequiredID(LCSSAID);
+    AU.addRequired<DominatorTree>();
     AU.addRequired<LoopInfo>();
     AU.addRequired<ScalarEvolution>();
-    AU.addRequired<DominatorTree>();
+    AU.addRequired<TargetTransformInfo>();
     AU.addPreserved<LoopInfo>();
     AU.addPreserved<DominatorTree>();
   }
 
 };
 
-Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
-  // Instructions that access the old induction variable
-  // actually want to get the new one.
-  if (V == OldInduction)
-    V = Induction;
-  // Create the types.
-  LLVMContext &C = V->getContext();
-  Type *VTy = VectorType::get(V->getType(), VF);
-  Type *I32 = IntegerType::getInt32Ty(C);
-  Constant *Zero = ConstantInt::get(I32, 0);
-  Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF));
-  Value *UndefVal = UndefValue::get(VTy);
-  // Insert the value into a new vector.
-  Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero);
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
+// LoopVectorizationCostModel.
+//===----------------------------------------------------------------------===//
+
+void
+LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
+                                                       Loop *Lp, Value *Ptr) {
+  const SCEV *Sc = SE->getSCEV(Ptr);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+  assert(AR && "Invalid addrec expression");
+  const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch());
+  const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+  Pointers.push_back(Ptr);
+  Starts.push_back(AR->getStart());
+  Ends.push_back(ScEnd);
+}
+
+Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
+  // Save the current insertion location.
+  Instruction *Loc = Builder.GetInsertPoint();
+
+  // We need to place the broadcast of invariant variables outside the loop.
+  Instruction *Instr = dyn_cast<Instruction>(V);
+  bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
+  bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
+
+  // Place the code for broadcasting invariant variables in the new preheader.
+  if (Invariant)
+    Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+
   // Broadcast the scalar into all locations in the vector.
-  Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros,
-                                             "broadcast");
-  // We are accessing the induction variable. Make sure to promote the
-  // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes.
-  if (V == Induction)
-    return getConsecutiveVector(Shuf);
+  Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
+
+  // Restore the builder insertion point.
+  if (Invariant)
+    Builder.SetInsertPoint(Loc);
+
   return Shuf;
 }
 
-Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
+                                                 bool Negate) {
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   assert(Val->getType()->getScalarType()->isIntegerTy() &&
          "Elem must be an integer");
   // Create the types.
   Type *ITy = Val->getType()->getScalarType();
   VectorType *Ty = cast<VectorType>(Val->getType());
-  unsigned VLen = Ty->getNumElements();
+  int VLen = Ty->getNumElements();
   SmallVector<Constant*, 8> Indices;
 
   // Create a vector of consecutive numbers from zero to VF.
-  for (unsigned i = 0; i < VLen; ++i)
-    Indices.push_back(ConstantInt::get(ITy, i));
+  for (int i = 0; i < VLen; ++i) {
+    int Idx = Negate ? (-i): i;
+    Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx));
+  }
 
   // Add the consecutive indices to the vector value.
   Constant *Cv = ConstantVector::get(Indices);
@@ -502,20 +794,58 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
   return Builder.CreateAdd(Val, Cv, "induction");
 }
 
-bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
+int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+  assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
+  // Make sure that the pointer does not point to structs.
+  if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+    return 0;
+
+  // If this value is a pointer induction variable we know it is consecutive.
+  PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
+  if (Phi && Inductions.count(Phi)) {
+    InductionInfo II = Inductions[Phi];
+    if (IK_PtrInduction == II.IK)
+      return 1;
+    else if (IK_ReversePtrInduction == II.IK)
+      return -1;
+  }
+
   GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
   if (!Gep)
-    return false;
+    return 0;
 
   unsigned NumOperands = Gep->getNumOperands();
   Value *LastIndex = Gep->getOperand(NumOperands - 1);
 
+  Value *GpPtr = Gep->getPointerOperand();
+  // If this GEP value is a consecutive pointer induction variable and all of
+  // the indices are constant then we know it is consecutive. We can
+  Phi = dyn_cast<PHINode>(GpPtr);
+  if (Phi && Inductions.count(Phi)) {
+
+    // Make sure that the pointer does not point to structs.
+    PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
+    if (GepPtrType->getElementType()->isAggregateType())
+      return 0;
+
+    // Make sure that all of the index operands are loop invariant.
+    for (unsigned i = 1; i < NumOperands; ++i)
+      if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+        return 0;
+
+    InductionInfo II = Inductions[Phi];
+    if (IK_PtrInduction == II.IK)
+      return 1;
+    else if (IK_ReversePtrInduction == II.IK)
+      return -1;
+  }
+
   // Check that all of the gep indices are uniform except for the last.
   for (unsigned i = 0; i < NumOperands - 1; ++i)
     if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
-      return false;
+      return 0;
 
-  // We can emit wide load/stores only of the last index is the induction
+  // We can emit wide load/stores only if the last index is the induction
   // variable.
   const SCEV *Last = SE->getSCEV(LastIndex);
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
@@ -524,44 +854,153 @@ bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
     // The memory is consecutive because the last index is consecutive
     // and all other indices are loop invariant.
     if (Step->isOne())
-      return true;
+      return 1;
+    if (Step->isAllOnesValue())
+      return -1;
   }
 
-  return false;
+  return 0;
 }
 
 bool LoopVectorizationLegality::isUniform(Value *V) {
   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
 }
 
-Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
+InnerLoopVectorizer::VectorParts&
+InnerLoopVectorizer::getVectorValue(Value *V) {
+  assert(V != Induction && "The new induction variable should not be used.");
   assert(!V->getType()->isVectorTy() && "Can't widen a vector");
-  // If we saved a vectorized copy of V, use it.
-  Value *&MapEntry = WidenMap[V];
-  if (MapEntry)
-    return MapEntry;
 
-  // Broadcast V and save the value for future uses.
+  // If we have this scalar in the map, return it.
+  if (WidenMap.has(V))
+    return WidenMap.get(V);
+
+  // If this scalar is unknown, assume that it is a constant or that it is
+  // loop invariant. Broadcast V and save the value for future uses.
   Value *B = getBroadcastInstrs(V);
-  MapEntry = B;
-  return B;
+  return WidenMap.splat(V, B);
 }
 
-Constant*
-SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
-  SmallVector<Constant*, 8> Indices;
-  // Create a vector of consecutive numbers from zero to VF.
+Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
+  assert(Vec->getType()->isVectorTy() && "Invalid type");
+  SmallVector<Constant*, 8> ShuffleMask;
   for (unsigned i = 0; i < VF; ++i)
-    Indices.push_back(ConstantInt::get(ScalarTy, Val, true));
+    ShuffleMask.push_back(Builder.getInt32(VF - i - 1));
 
-  // Add the consecutive indices to the vector value.
-  return ConstantVector::get(Indices);
+  return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()),
+                                     ConstantVector::get(ShuffleMask),
+                                     "reverse");
 }
 
-void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+
+void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
+                                             LoopVectorizationLegality *Legal) {
+  // Attempt to issue a wide load.
+  LoadInst *LI = dyn_cast<LoadInst>(Instr);
+  StoreInst *SI = dyn_cast<StoreInst>(Instr);
+
+  assert((LI || SI) && "Invalid Load/Store instruction");
+
+  Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+  Type *DataTy = VectorType::get(ScalarDataTy, VF);
+  Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+  unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+
+  // If the pointer is loop invariant or if it is non consecutive,
+  // scalarize the load.
+  int Stride = Legal->isConsecutivePtr(Ptr);
+  bool Reverse = Stride < 0;
+  bool UniformLoad = LI && Legal->isUniform(Ptr);
+  if (Stride == 0 || UniformLoad)
+    return scalarizeInstruction(Instr);
+
+  Constant *Zero = Builder.getInt32(0);
+  VectorParts &Entry = WidenMap.get(Instr);
+
+  // Handle consecutive loads/stores.
+  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+  if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
+    Value *PtrOperand = Gep->getPointerOperand();
+    Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
+    FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
+
+    // Create the new GEP with the new induction variable.
+    GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+    Gep2->setOperand(0, FirstBasePtr);
+    Gep2->setName("gep.indvar.base");
+    Ptr = Builder.Insert(Gep2);
+  } else if (Gep) {
+    assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
+                               OrigLoop) && "Base ptr must be invariant");
+
+    // The last index does not have to be the induction. It can be
+    // consecutive and be a function of the index. For example A[I+1];
+    unsigned NumOperands = Gep->getNumOperands();
+
+    Value *LastGepOperand = Gep->getOperand(NumOperands - 1);
+    VectorParts &GEPParts = getVectorValue(LastGepOperand);
+    Value *LastIndex = GEPParts[0];
+    LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+    // Create the new GEP with the new induction variable.
+    GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+    Gep2->setOperand(NumOperands - 1, LastIndex);
+    Gep2->setName("gep.indvar.idx");
+    Ptr = Builder.Insert(Gep2);
+  } else {
+    // Use the induction element ptr.
+    assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+    VectorParts &PtrVal = getVectorValue(Ptr);
+    Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
+  }
+
+  // Handle Stores:
+  if (SI) {
+    assert(!Legal->isUniform(SI->getPointerOperand()) &&
+           "We do not allow storing to uniform addresses");
+
+    VectorParts &StoredVal = getVectorValue(SI->getValueOperand());
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      // Calculate the pointer for the specific unroll-part.
+      Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+
+      if (Reverse) {
+        // If we store to reverse consecutive memory locations then we need
+        // to reverse the order of elements in the stored value.
+        StoredVal[Part] = reverseVector(StoredVal[Part]);
+        // If the address is consecutive but reversed, then the
+        // wide store needs to start at the last vector element.
+        PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
+        PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+      }
+
+      Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+      Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
+    }
+  }
+
+  for (unsigned Part = 0; Part < UF; ++Part) {
+    // Calculate the pointer for the specific unroll-part.
+    Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+
+    if (Reverse) {
+      // If the address is consecutive but reversed, then the
+      // wide store needs to start at the last vector element.
+      PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
+      PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+    }
+
+    Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+    Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
+    cast<LoadInst>(LI)->setAlignment(Alignment);
+    Entry[Part] = Reverse ? reverseVector(LI) :  LI;
+  }
+}
+
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
   // Holds vector parameters or scalars, in case of uniform vals.
-  SmallVector<Value*, 8> Params;
+  SmallVector<VectorParts, 4> Params;
 
   // Find all of the vectorized parameters.
   for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
@@ -569,7 +1008,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
 
     // If we are accessing the old induction variable, use the new one.
     if (SrcOp == OldInduction) {
-      Params.push_back(getBroadcastInstrs(Induction));
+      Params.push_back(getVectorValue(SrcOp));
       continue;
     }
 
@@ -578,13 +1017,15 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
 
     // If the src is an instruction that appeared earlier in the basic block
     // then it should already be vectorized.
-    if (SrcInst && SrcInst->getParent() == Instr->getParent()) {
-      assert(WidenMap.count(SrcInst) && "Source operand is unavailable");
+    if (SrcInst && OrigLoop->contains(SrcInst)) {
+      assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
       // The parameter is a vector value from earlier.
-      Params.push_back(WidenMap[SrcInst]);
+      Params.push_back(WidenMap.get(SrcInst));
     } else {
       // The parameter is a scalar from outside the loop. Maybe even a constant.
-      Params.push_back(SrcOp);
+      VectorParts Scalars;
+      Scalars.append(UF, SrcOp);
+      Params.push_back(Scalars);
     }
   }
 
@@ -593,112 +1034,185 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
 
   // Does this instruction return a value ?
   bool IsVoidRetTy = Instr->getType()->isVoidTy();
-  Value *VecResults = 0;
 
-  // If we have a return value, create an empty vector. We place the scalarized
-  // instructions in this vector.
-  if (!IsVoidRetTy)
-    VecResults = UndefValue::get(VectorType::get(Instr->getType(), VF));
+  Value *UndefVec = IsVoidRetTy ? 0 :
+    UndefValue::get(VectorType::get(Instr->getType(), VF));
+  // Create a new entry in the WidenMap and initialize it to Undef or Null.
+  VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
 
   // For each scalar that we create:
-  for (unsigned i = 0; i < VF; ++i) {
-    Instruction *Cloned = Instr->clone();
-    if (!IsVoidRetTy)
-      Cloned->setName(Instr->getName() + ".cloned");
-    // Replace the operands of the cloned instrucions with extracted scalars.
-    for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
-      Value *Op = Params[op];
-      // Param is a vector. Need to extract the right lane.
-      if (Op->getType()->isVectorTy())
-        Op = Builder.CreateExtractElement(Op, Builder.getInt32(i));
-      Cloned->setOperand(op, Op);
+  for (unsigned Width = 0; Width < VF; ++Width) {
+    // For each vector unroll 'part':
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      Instruction *Cloned = Instr->clone();
+      if (!IsVoidRetTy)
+        Cloned->setName(Instr->getName() + ".cloned");
+      // Replace the operands of the cloned instrucions with extracted scalars.
+      for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+        Value *Op = Params[op][Part];
+        // Param is a vector. Need to extract the right lane.
+        if (Op->getType()->isVectorTy())
+          Op = Builder.CreateExtractElement(Op, Builder.getInt32(Width));
+        Cloned->setOperand(op, Op);
+      }
+
+      // Place the cloned scalar in the new loop.
+      Builder.Insert(Cloned);
+
+      // If the original scalar returns a value we need to place it in a vector
+      // so that future users will be able to use it.
+      if (!IsVoidRetTy)
+        VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
+                                                       Builder.getInt32(Width));
+    }
+  }
+}
+
+Instruction *
+InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
+                                     Instruction *Loc) {
+  LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
+  Legal->getRuntimePointerCheck();
+
+  if (!PtrRtCheck->Need)
+    return NULL;
+
+  Instruction *MemoryRuntimeCheck = 0;
+  unsigned NumPointers = PtrRtCheck->Pointers.size();
+  SmallVector<Value* , 2> Starts;
+  SmallVector<Value* , 2> Ends;
+
+  SCEVExpander Exp(*SE, "induction");
+
+  // Use this type for pointer arithmetic.
+  Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
+
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    Value *Ptr = PtrRtCheck->Pointers[i];
+    const SCEV *Sc = SE->getSCEV(Ptr);
+
+    if (SE->isLoopInvariant(Sc, OrigLoop)) {
+      DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
+            *Ptr <<"\n");
+      Starts.push_back(Ptr);
+      Ends.push_back(Ptr);
+    } else {
+      DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+
+      Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
+      Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
+      Starts.push_back(Start);
+      Ends.push_back(End);
     }
+  }
+
+  IRBuilder<> ChkBuilder(Loc);
+
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    for (unsigned j = i+1; j < NumPointers; ++j) {
+      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
+      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
+      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy, "bc");
+      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy, "bc");
 
-    // Place the cloned scalar in the new loop.
-    Builder.Insert(Cloned);
+      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+      if (MemoryRuntimeCheck)
+        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
+                                         "conflict.rdx");
 
-    // If the original scalar returns a value we need to place it in a vector
-    // so that future users will be able to use it.
-    if (!IsVoidRetTy)
-      VecResults = Builder.CreateInsertElement(VecResults, Cloned,
-                                               Builder.getInt32(i));
+      MemoryRuntimeCheck = cast<Instruction>(IsConflict);
+    }
   }
 
-  if (!IsVoidRetTy)
-    WidenMap[Instr] = VecResults;
+  return MemoryRuntimeCheck;
 }
 
 void
-SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   /*
    In this function we generate a new loop. The new loop will contain
    the vectorized instructions while the old loop will continue to run the
    scalar remainder.
 
-    [ ] <-- vector loop bypass.
-  /  |
- /   v
-|   [ ]     <-- vector pre header.
-|    |
-|    v
-|   [  ] \
-|   [  ]_|   <-- vector loop.
-|    |
- \   v
-   >[ ]   <--- middle-block.
-  /  |
- /   v
-|   [ ]     <--- new preheader.
-|    |
-|    v
-|   [ ] \
-|   [ ]_|   <-- old scalar loop to handle remainder.
- \   |
-  \  v
-   >[ ]     <-- exit block.
+       [ ] <-- vector loop bypass (may consist of multiple blocks).
+     /  |
+    /   v
+   |   [ ]     <-- vector pre header.
+   |    |
+   |    v
+   |   [  ] \
+   |   [  ]_|   <-- vector loop.
+   |    |
+    \   v
+      >[ ]   <--- middle-block.
+     /  |
+    /   v
+   |   [ ]     <--- new preheader.
+   |    |
+   |    v
+   |   [ ] \
+   |   [ ]_|   <-- old scalar loop to handle remainder.
+    \   |
+     \  v
+      >[ ]     <-- exit block.
    ...
    */
 
+  BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+  BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+  BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+  assert(ExitBlock && "Must have an exit block");
+
+  // Mark the old scalar loop with metadata that tells us not to vectorize this
+  // loop again if we run into it.
+  MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+  OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
+
+  // Some loops have a single integer induction variable, while other loops
+  // don't. One example is c++ iterators that often have multiple pointer
+  // induction variables. In the code below we also support a case where we
+  // don't have a single induction variable.
   OldInduction = Legal->getInduction();
-  assert(OldInduction && "We must have a single phi node.");
-  Type *IdxTy = OldInduction->getType();
+  Type *IdxTy = OldInduction ? OldInduction->getType() :
+  DL->getIntPtrType(SE->getContext());
 
   // Find the loop boundaries.
-  const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+  const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
   assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
 
   // Get the total trip count from the count by adding 1.
   ExitCount = SE->getAddExpr(ExitCount,
                              SE->getConstant(ExitCount->getType(), 1));
-  // We may need to extend the index in case there is a type mismatch.
-  // We know that the count starts at zero and does not overflow.
-  // We are using Zext because it should be less expensive.
-  if (ExitCount->getType() != IdxTy)
-    ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
 
-  // This is the original scalar-loop preheader.
-  BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
-  BasicBlock *ExitBlock = OrigLoop->getExitBlock();
-  assert(ExitBlock && "Must have an exit block");
+  // Expand the trip count and place the new instructions in the preheader.
+  // Notice that the pre-header does not change, only the loop body.
+  SCEVExpander Exp(*SE, "induction");
+
+  // Count holds the overall loop count (N).
+  Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+                                   BypassBlock->getTerminator());
 
-  // The loop index does not have to start at Zero. It starts with this value.
-  Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
+  // The loop index does not have to start at Zero. Find the original start
+  // value from the induction PHI node. If we don't have an induction variable
+  // then we know that it starts at zero.
+  Value *StartIdx = OldInduction ?
+  OldInduction->getIncomingValueForBlock(BypassBlock):
+  ConstantInt::get(IdxTy, 0);
 
-  assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
   assert(BypassBlock && "Invalid loop structure");
+  LoopBypassBlocks.push_back(BypassBlock);
 
+  // Split the single block loop into the two loop structure described above.
   BasicBlock *VectorPH =
-      BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
-  BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(),
-                                                 "vector.body");
-
-  BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(),
-                                                  "middle.block");
+  BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
+  BasicBlock *VecBody =
+  VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
+  BasicBlock *MiddleBlock =
+  VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
   BasicBlock *ScalarPH =
-    MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(),
-                                 "scalar.preheader");
-  // Find the induction variable.
-  BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+  MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
 
   // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
   // inside the loop.
@@ -706,105 +1220,167 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
   // Generate the induction variable.
   Induction = Builder.CreatePHI(IdxTy, 2, "index");
-  Constant *Step = ConstantInt::get(IdxTy, VF);
+  // The loop step is equal to the vectorization factor (num of SIMD elements)
+  // times the unroll factor (num of SIMD instructions).
+  Constant *Step = ConstantInt::get(IdxTy, VF * UF);
 
-  // Expand the trip count and place the new instructions in the preheader.
-  // Notice that the pre-header does not change, only the loop body.
-  SCEVExpander Exp(*SE, "induction");
-  Instruction *Loc = BypassBlock->getTerminator();
+  // This is the IR builder that we use to add all of the logic for bypassing
+  // the new vector loop.
+  IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
 
-  // Count holds the overall loop count (N).
-  Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+  // We may need to extend the index in case there is a type mismatch.
+  // We know that the count starts at zero and does not overflow.
+  if (Count->getType() != IdxTy) {
+    // The exit count can be of pointer type. Convert it to the correct
+    // integer type.
+    if (ExitCount->getType()->isPointerTy())
+      Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
+    else
+      Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
+  }
 
   // Add the start index to the loop count to get the new end index.
-  Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
+  Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
 
   // Now we need to generate the expression for N - (N % VF), which is
   // the part that the vectorized body will execute.
-  Constant *CIVF = ConstantInt::get(IdxTy, VF);
-  Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc);
-  Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc);
-  Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
-                                                     "end.idx.rnd.down", Loc);
-
-  // Now, compare the new count to zero. If it is zero, jump to the scalar part.
-  Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
-                               IdxEndRoundDown,
-                               StartIdx,
-                               "cmp.zero", Loc);
+  Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
+  Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
+  Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
+                                                     "end.idx.rnd.down");
+
+  // Now, compare the new count to zero. If it is zero skip the vector loop and
+  // jump to the scalar loop.
+  Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
+                                          "cmp.zero");
+
+  BasicBlock *LastBypassBlock = BypassBlock;
+
+  // Generate the code that checks in runtime if arrays overlap. We put the
+  // checks into a separate block to make the more common case of few elements
+  // faster.
+  Instruction *MemRuntimeCheck = addRuntimeCheck(Legal,
+                                                 BypassBlock->getTerminator());
+  if (MemRuntimeCheck) {
+    // Create a new block containing the memory check.
+    BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck,
+                                                          "vector.memcheck");
+    LoopBypassBlocks.push_back(CheckBlock);
+
+    // Replace the branch into the memory check block with a conditional branch
+    // for the "few elements case".
+    Instruction *OldTerm = BypassBlock->getTerminator();
+    BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
+    OldTerm->eraseFromParent();
+
+    Cmp = MemRuntimeCheck;
+    LastBypassBlock = CheckBlock;
+  }
 
-  LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
-    Legal->getRuntimePointerCheck();
-  Value *MemoryRuntimeCheck = 0;
-  if (PtrRtCheck->Need) {
-    unsigned NumPointers = PtrRtCheck->Pointers.size();
-    SmallVector<Value* , 2> Starts;
-    SmallVector<Value* , 2> Ends;
-
-    // Use this type for pointer arithmetic.
-    Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
-
-    for (unsigned i=0; i < NumPointers; ++i) {
-      Value *Ptr = PtrRtCheck->Pointers[i];
-      const SCEV *Sc = SE->getSCEV(Ptr);
-
-      if (SE->isLoopInvariant(Sc, OrigLoop)) {
-        DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" <<
-              *Ptr <<"\n");
-        Starts.push_back(Ptr);
-        Ends.push_back(Ptr);
-      } else {
-        DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
-        const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
-        Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc);
-        const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
-        const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
-        assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range.");
-        Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc);
-        Starts.push_back(Start);
-        Ends.push_back(End);
-      }
-    }
+  LastBypassBlock->getTerminator()->eraseFromParent();
+  BranchInst::Create(MiddleBlock, VectorPH, Cmp,
+                     LastBypassBlock);
 
-    for (unsigned i=0; i < NumPointers; ++i) {
-      for (unsigned j=i+1; j < NumPointers; ++j) {
-        Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
-                                      Starts[0], Ends[1], "bound0", Loc);
-        Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
-                                      Starts[1], Ends[0], "bound1", Loc);
-        Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
-                                                    "found.conflict", Loc);
-        if (MemoryRuntimeCheck) {
-          MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or,
-                                                      MemoryRuntimeCheck,
-                                                      IsConflict,
-                                                      "conflict.rdx", Loc);
-        } else {
-          MemoryRuntimeCheck = IsConflict;
-        }
-      }
+  // We are going to resume the execution of the scalar loop.
+  // Go over all of the induction variables that we found and fix the
+  // PHIs that are left in the scalar version of the loop.
+  // The starting values of PHI nodes depend on the counter of the last
+  // iteration in the vectorized loop.
+  // If we come from a bypass edge then we need to start from the original
+  // start value.
+
+  // This variable saves the new starting index for the scalar loop.
+  PHINode *ResumeIndex = 0;
+  LoopVectorizationLegality::InductionList::iterator I, E;
+  LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+  for (I = List->begin(), E = List->end(); I != E; ++I) {
+    PHINode *OrigPhi = I->first;
+    LoopVectorizationLegality::InductionInfo II = I->second;
+    PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+                                         MiddleBlock->getTerminator());
+    Value *EndValue = 0;
+    switch (II.IK) {
+    case LoopVectorizationLegality::IK_NoInduction:
+      llvm_unreachable("Unknown induction");
+    case LoopVectorizationLegality::IK_IntInduction: {
+      // Handle the integer induction counter:
+      assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
+      assert(OrigPhi == OldInduction && "Unknown integer PHI");
+      // We know what the end value is.
+      EndValue = IdxEndRoundDown;
+      // We also know which PHI node holds it.
+      ResumeIndex = ResumeVal;
+      break;
+    }
+    case LoopVectorizationLegality::IK_ReverseIntInduction: {
+      // Convert the CountRoundDown variable to the PHI size.
+      unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
+      unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
+      Value *CRD = CountRoundDown;
+      if (CRDSize > IISize)
+        CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
+                               II.StartValue->getType(), "tr.crd",
+                               LoopBypassBlocks.back()->getTerminator());
+      else if (CRDSize < IISize)
+        CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
+                               II.StartValue->getType(),
+                               "sext.crd",
+                               LoopBypassBlocks.back()->getTerminator());
+      // Handle reverse integer induction counter:
+      EndValue =
+        BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
+                                  LoopBypassBlocks.back()->getTerminator());
+      break;
+    }
+    case LoopVectorizationLegality::IK_PtrInduction: {
+      // For pointer induction variables, calculate the offset using
+      // the end index.
+      EndValue =
+        GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end",
+                                  LoopBypassBlocks.back()->getTerminator());
+      break;
+    }
+    case LoopVectorizationLegality::IK_ReversePtrInduction: {
+      // The value at the end of the loop for the reverse pointer is calculated
+      // by creating a GEP with a negative index starting from the start value.
+      Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
+      Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown,
+                                  "rev.ind.end",
+                                  LoopBypassBlocks.back()->getTerminator());
+      EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx,
+                                  "rev.ptr.ind.end",
+                                  LoopBypassBlocks.back()->getTerminator());
+      break;
     }
-  }// end of need-runtime-check code.
+    }// end of case
 
-  // If we are using memory runtime checks, include them in.
-  if (MemoryRuntimeCheck) {
-    Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck,
-                                 "CntOrMem", Loc);
+    // The new PHI merges the original incoming value, in case of a bypass,
+    // or the value at the end of the vectorized loop.
+    for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+      ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+    ResumeVal->addIncoming(EndValue, VecBody);
+
+    // Fix the scalar body counter (PHI node).
+    unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
+    OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
   }
 
-  BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
-  // Remove the old terminator.
-  Loc->eraseFromParent();
+  // If we are generating a new induction variable then we also need to
+  // generate the code that calculates the exit value. This value is not
+  // simply the end of the counter because we may skip the vectorized body
+  // in case of a runtime check.
+  if (!OldInduction){
+    assert(!ResumeIndex && "Unexpected resume value found");
+    ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
+                                  MiddleBlock->getTerminator());
+    for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+      ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
+    ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+  }
 
-  // We are going to resume the execution of the scalar loop.
-  // This PHI decides on what number to start. If we come from the
-  // vector loop then we need to start with the end index minus the
-  // index modulo VF. If we come from a bypass edge then we need to start
-  // from the real start.
-  PHINode* ResumeIndex = PHINode::Create(IdxTy, 2, "resume.idx",
-                                         MiddleBlock->getTerminator());
-  ResumeIndex->addIncoming(StartIdx, BypassBlock);
-  ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+  // Make sure that we found the index where scalar loop needs to continue.
+  assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
+         "Invalid resume Index");
 
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
@@ -828,26 +1404,27 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   // Now we have two terminators. Remove the old one from the block.
   VecBody->getTerminator()->eraseFromParent();
 
-  // Fix the scalar body iteration count.
-  unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
-  OldInduction->setIncomingValue(BlockIdx, ResumeIndex);
-
   // Get ready to start creating new instructions into the vectorized body.
   Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
 
-  // Register the new loop.
+  // Create and register the new vector loop.
   Loop* Lp = new Loop();
-  LPM->insertLoop(Lp, OrigLoop->getParentLoop());
-
-  Lp->addBasicBlockToLoop(VecBody, LI->getBase());
-
   Loop *ParentLoop = OrigLoop->getParentLoop();
+
+  // Insert the new loop into the loop nest and register the new basic blocks.
   if (ParentLoop) {
+    ParentLoop->addChildLoop(Lp);
+    for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+      ParentLoop->addBasicBlockToLoop(LoopBypassBlocks[I], LI->getBase());
     ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
     ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
     ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+  } else {
+    LI->addTopLevelLoop(Lp);
   }
 
+  Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
   // Save the state.
   LoopVectorPreHeader = VectorPH;
   LoopScalarPreHeader = ScalarPH;
@@ -855,32 +1432,164 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   LoopExitBlock = ExitBlock;
   LoopVectorBody = VecBody;
   LoopScalarBody = OldBasicBlock;
-  LoopBypassBlock = BypassBlock;
 }
 
 /// This function returns the identity element (or neutral element) for
 /// the operation K.
-static unsigned
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K) {
+static Constant*
+getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
   switch (K) {
-  case LoopVectorizationLegality::IntegerXor:
-  case LoopVectorizationLegality::IntegerAdd:
-  case LoopVectorizationLegality::IntegerOr:
+  case LoopVectorizationLegality:: RK_IntegerXor:
+  case LoopVectorizationLegality:: RK_IntegerAdd:
+  case LoopVectorizationLegality:: RK_IntegerOr:
     // Adding, Xoring, Oring zero to a number does not change it.
-    return 0;
-  case LoopVectorizationLegality::IntegerMult:
+    return ConstantInt::get(Tp, 0);
+  case LoopVectorizationLegality:: RK_IntegerMult:
     // Multiplying a number by 1 does not change it.
-    return 1;
-  case LoopVectorizationLegality::IntegerAnd:
+    return ConstantInt::get(Tp, 1);
+  case LoopVectorizationLegality:: RK_IntegerAnd:
     // AND-ing a number with an all-1 value does not change it.
-    return -1;
+    return ConstantInt::get(Tp, -1, true);
+  case LoopVectorizationLegality:: RK_FloatMult:
+    // Multiplying a number by 1 does not change it.
+    return ConstantFP::get(Tp, 1.0L);
+  case LoopVectorizationLegality:: RK_FloatAdd:
+    // Adding zero to a number does not change it.
+    return ConstantFP::get(Tp, 0.0L);
   default:
     llvm_unreachable("Unknown reduction kind");
   }
 }
 
+static Intrinsic::ID
+getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
+  // If we have an intrinsic call, check if it is trivially vectorizable.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::sqrt:
+    case Intrinsic::sin:
+    case Intrinsic::cos:
+    case Intrinsic::exp:
+    case Intrinsic::exp2:
+    case Intrinsic::log:
+    case Intrinsic::log10:
+    case Intrinsic::log2:
+    case Intrinsic::fabs:
+    case Intrinsic::floor:
+    case Intrinsic::ceil:
+    case Intrinsic::trunc:
+    case Intrinsic::rint:
+    case Intrinsic::nearbyint:
+    case Intrinsic::pow:
+    case Intrinsic::fma:
+    case Intrinsic::fmuladd:
+      return II->getIntrinsicID();
+    default:
+      return Intrinsic::not_intrinsic;
+    }
+  }
+
+  if (!TLI)
+    return Intrinsic::not_intrinsic;
+
+  LibFunc::Func Func;
+  Function *F = CI->getCalledFunction();
+  // We're going to make assumptions on the semantics of the functions, check
+  // that the target knows that it's available in this environment.
+  if (!F || !TLI->getLibFunc(F->getName(), Func))
+    return Intrinsic::not_intrinsic;
+
+  // Otherwise check if we have a call to a function that can be turned into a
+  // vector intrinsic.
+  switch (Func) {
+  default:
+    break;
+  case LibFunc::sin:
+  case LibFunc::sinf:
+  case LibFunc::sinl:
+    return Intrinsic::sin;
+  case LibFunc::cos:
+  case LibFunc::cosf:
+  case LibFunc::cosl:
+    return Intrinsic::cos;
+  case LibFunc::exp:
+  case LibFunc::expf:
+  case LibFunc::expl:
+    return Intrinsic::exp;
+  case LibFunc::exp2:
+  case LibFunc::exp2f:
+  case LibFunc::exp2l:
+    return Intrinsic::exp2;
+  case LibFunc::log:
+  case LibFunc::logf:
+  case LibFunc::logl:
+    return Intrinsic::log;
+  case LibFunc::log10:
+  case LibFunc::log10f:
+  case LibFunc::log10l:
+    return Intrinsic::log10;
+  case LibFunc::log2:
+  case LibFunc::log2f:
+  case LibFunc::log2l:
+    return Intrinsic::log2;
+  case LibFunc::fabs:
+  case LibFunc::fabsf:
+  case LibFunc::fabsl:
+    return Intrinsic::fabs;
+  case LibFunc::floor:
+  case LibFunc::floorf:
+  case LibFunc::floorl:
+    return Intrinsic::floor;
+  case LibFunc::ceil:
+  case LibFunc::ceilf:
+  case LibFunc::ceill:
+    return Intrinsic::ceil;
+  case LibFunc::trunc:
+  case LibFunc::truncf:
+  case LibFunc::truncl:
+    return Intrinsic::trunc;
+  case LibFunc::rint:
+  case LibFunc::rintf:
+  case LibFunc::rintl:
+    return Intrinsic::rint;
+  case LibFunc::nearbyint:
+  case LibFunc::nearbyintf:
+  case LibFunc::nearbyintl:
+    return Intrinsic::nearbyint;
+  case LibFunc::pow:
+  case LibFunc::powf:
+  case LibFunc::powl:
+    return Intrinsic::pow;
+  }
+
+  return Intrinsic::not_intrinsic;
+}
+
+/// This function translates the reduction kind to an LLVM binary operator.
+static Instruction::BinaryOps
+getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
+  switch (Kind) {
+    case LoopVectorizationLegality::RK_IntegerAdd:
+      return Instruction::Add;
+    case LoopVectorizationLegality::RK_IntegerMult:
+      return Instruction::Mul;
+    case LoopVectorizationLegality::RK_IntegerOr:
+      return Instruction::Or;
+    case LoopVectorizationLegality::RK_IntegerAnd:
+      return Instruction::And;
+    case LoopVectorizationLegality::RK_IntegerXor:
+      return Instruction::Xor;
+    case LoopVectorizationLegality::RK_FloatMult:
+      return Instruction::FMul;
+    case LoopVectorizationLegality::RK_FloatAdd:
+      return Instruction::FAdd;
+    default:
+      llvm_unreachable("Unknown reduction operation");
+  }
+}
+
 void
-SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   //===------------------------------------------------===//
   //
   // Notice: any optimization or new instruction that go
@@ -888,208 +1597,29 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   // the cost-model.
   //
   //===------------------------------------------------===//
-  typedef SmallVector<PHINode*, 4> PhiVector;
-  BasicBlock &BB = *OrigLoop->getHeader();
-  Constant *Zero = ConstantInt::get(
-    IntegerType::getInt32Ty(BB.getContext()), 0);
+  Constant *Zero = Builder.getInt32(0);
 
   // In order to support reduction variables we need to be able to vectorize
   // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
-  // steages. First, we create a new vector PHI node with no incoming edges.
+  // stages. First, we create a new vector PHI node with no incoming edges.
   // We use this value when we vectorize all of the instructions that use the
   // PHI. Next, after all of the instructions in the block are complete we
   // add the new incoming edges to the PHI. At this point all of the
   // instructions in the basic block are vectorized, so we can use them to
   // construct the PHI.
-  PhiVector PHIsToFix;
+  PhiVector RdxPHIsToFix;
 
-  // For each instruction in the old loop.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *Inst = it;
+  // Scan the loop in a topological order to ensure that defs are vectorized
+  // before users.
+  LoopBlocksDFS DFS(OrigLoop);
+  DFS.perform(LI);
 
-    switch (Inst->getOpcode()) {
-      case Instruction::Br:
-        // Nothing to do for PHIs and BR, since we already took care of the
-        // loop control flow instructions.
-        continue;
-      case Instruction::PHI:{
-        PHINode* P = cast<PHINode>(Inst);
-        // Special handling for the induction var.
-        if (OldInduction == Inst)
-          continue;
-        // This is phase one of vectorizing PHIs.
-        // This has to be a reduction variable.
-        assert(Legal->getReductionVars()->count(P) && "Not a Reduction");
-        Type *VecTy = VectorType::get(Inst->getType(), VF);
-        WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
-        PHIsToFix.push_back(P);
-        continue;
-      }
-      case Instruction::Add:
-      case Instruction::FAdd:
-      case Instruction::Sub:
-      case Instruction::FSub:
-      case Instruction::Mul:
-      case Instruction::FMul:
-      case Instruction::UDiv:
-      case Instruction::SDiv:
-      case Instruction::FDiv:
-      case Instruction::URem:
-      case Instruction::SRem:
-      case Instruction::FRem:
-      case Instruction::Shl:
-      case Instruction::LShr:
-      case Instruction::AShr:
-      case Instruction::And:
-      case Instruction::Or:
-      case Instruction::Xor: {
-        // Just widen binops.
-        BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-        Value *A = getVectorValue(Inst->getOperand(0));
-        Value *B = getVectorValue(Inst->getOperand(1));
-
-        // Use this vector value for all users of the original instruction.
-        Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
-        WidenMap[Inst] = V;
-
-        // Update the NSW, NUW and Exact flags.
-        BinaryOperator *VecOp = cast<BinaryOperator>(V);
-        if (isa<OverflowingBinaryOperator>(BinOp)) {
-          VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
-          VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
-        }
-        if (isa<PossiblyExactOperator>(VecOp))
-          VecOp->setIsExact(BinOp->isExact());
-        break;
-      }
-      case Instruction::Select: {
-        // Widen selects.
-        // If the selector is loop invariant we can create a select
-        // instruction with a scalar condition. Otherwise, use vector-select.
-        Value *Cond = Inst->getOperand(0);
-        bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
-
-        // The condition can be loop invariant  but still defined inside the
-        // loop. This means that we can't just use the original 'cond' value.
-        // We have to take the 'vectorized' value and pick the first lane.
-        // Instcombine will make this a no-op.
-        Cond = getVectorValue(Cond);
-        if (InvariantCond)
-          Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
-
-        Value *Op0 = getVectorValue(Inst->getOperand(1));
-        Value *Op1 = getVectorValue(Inst->getOperand(2));
-        WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1);
-        break;
-      }
-
-      case Instruction::ICmp:
-      case Instruction::FCmp: {
-        // Widen compares. Generate vector compares.
-        bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
-        CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
-        Value *A = getVectorValue(Inst->getOperand(0));
-        Value *B = getVectorValue(Inst->getOperand(1));
-        if (FCmp)
-          WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
-        else
-          WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
-        break;
-      }
+  // Vectorize all of the blocks in the original loop.
+  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+       be = DFS.endRPO(); bb != be; ++bb)
+    vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
 
-      case Instruction::Store: {
-        // Attempt to issue a wide store.
-        StoreInst *SI = dyn_cast<StoreInst>(Inst);
-        Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
-        Value *Ptr = SI->getPointerOperand();
-        unsigned Alignment = SI->getAlignment();
-
-        assert(!Legal->isUniform(Ptr) &&
-               "We do not allow storing to uniform addresses");
-
-        GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-
-        // This store does not use GEPs.
-        if (!Legal->isConsecutiveGep(Gep)) {
-          scalarizeInstruction(Inst);
-          break;
-        }
-
-        // The last index does not have to be the induction. It can be
-        // consecutive and be a function of the index. For example A[I+1];
-        unsigned NumOperands = Gep->getNumOperands();
-        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
-        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
-        // Create the new GEP with the new induction variable.
-        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-        Gep2->setOperand(NumOperands - 1, LastIndex);
-        Ptr = Builder.Insert(Gep2);
-        Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
-        Value *Val = getVectorValue(SI->getValueOperand());
-        Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
-        break;
-      }
-      case Instruction::Load: {
-        // Attempt to issue a wide load.
-        LoadInst *LI = dyn_cast<LoadInst>(Inst);
-        Type *RetTy = VectorType::get(LI->getType(), VF);
-        Value *Ptr = LI->getPointerOperand();
-        unsigned Alignment = LI->getAlignment();
-        GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-
-        // If we don't have a gep, or that the pointer is loop invariant,
-        // scalarize the load.
-        if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) {
-          scalarizeInstruction(Inst);
-          break;
-        }
-
-        // The last index does not have to be the induction. It can be
-        // consecutive and be a function of the index. For example A[I+1];
-        unsigned NumOperands = Gep->getNumOperands();
-        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
-        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
-        // Create the new GEP with the new induction variable.
-        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-        Gep2->setOperand(NumOperands - 1, LastIndex);
-        Ptr = Builder.Insert(Gep2);
-        Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
-        LI = Builder.CreateLoad(Ptr);
-        LI->setAlignment(Alignment);
-        // Use this vector value for all users of the load.
-        WidenMap[Inst] = LI;
-        break;
-      }
-      case Instruction::ZExt:
-      case Instruction::SExt:
-      case Instruction::FPToUI:
-      case Instruction::FPToSI:
-      case Instruction::FPExt:
-      case Instruction::PtrToInt:
-      case Instruction::IntToPtr:
-      case Instruction::SIToFP:
-      case Instruction::UIToFP:
-      case Instruction::Trunc:
-      case Instruction::FPTrunc:
-      case Instruction::BitCast: {
-        /// Vectorize bitcasts.
-        CastInst *CI = dyn_cast<CastInst>(Inst);
-        Value *A = getVectorValue(Inst->getOperand(0));
-        Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
-        WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
-        break;
-      }
-
-      default:
-        /// All other instructions are unsupported. Scalarize them.
-        scalarizeInstruction(Inst);
-        break;
-    }// end of switch.
-  }// end of for_each instr.
-
-  // At this point every instruction in the original loop is widended to
+  // At this point every instruction in the original loop is widened to
   // a vector form. We are almost done. Now, we need to fix the PHI nodes
   // that we vectorized. The PHI nodes are currently empty because we did
   // not want to introduce cycles. Notice that the remaining PHI nodes
@@ -1098,38 +1628,36 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   // Create the 'reduced' values for each of the induction vars.
   // The reduced values are the vector values that we scalarize and combine
   // after the loop is finished.
-  for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end();
+  for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
        it != e; ++it) {
     PHINode *RdxPhi = *it;
-    PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
     assert(RdxPhi && "Unable to recover vectorized PHI");
 
     // Find the reduction variable descriptor.
     assert(Legal->getReductionVars()->count(RdxPhi) &&
            "Unable to find the reduction variable");
     LoopVectorizationLegality::ReductionDescriptor RdxDesc =
-      (*Legal->getReductionVars())[RdxPhi];
+    (*Legal->getReductionVars())[RdxPhi];
 
     // We need to generate a reduction vector from the incoming scalar.
     // To do so, we need to generate the 'identity' vector and overide
     // one of the elements with the incoming scalar reduction. We need
     // to do it in the vector-loop preheader.
-    Builder.SetInsertPoint(LoopBypassBlock->getTerminator());
+    Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
 
     // This is the vector-clone of the value that leaves the loop.
-    Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
-    Type *VecTy = VectorExit->getType();
+    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+    Type *VecTy = VectorExit[0]->getType();
 
     // Find the reduction identity variable. Zero for addition, or, xor,
     // one for multiplication, -1 for And.
-    Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind),
-                                          VecTy->getScalarType());
+    Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
+    Constant *Identity = ConstantVector::getSplat(VF, Iden);
 
     // This vector is the Identity vector where the first element is the
     // incoming scalar reduction.
     Value *VectorStart = Builder.CreateInsertElement(Identity,
-                                                    RdxDesc.StartValue, Zero);
-
+                                                     RdxDesc.StartValue, Zero);
 
     // Fix the vector-loop phi.
     // We created the induction variable so we know that the
@@ -1138,10 +1666,17 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Reductions do not have to start at zero. They can start with
     // any loop invariant values.
-    VecRdxPhi->addIncoming(VectorStart, VecPreheader);
-    unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
-    Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
-    VecRdxPhi->addIncoming(Val, LoopVectorBody);
+    VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
+    BasicBlock *Latch = OrigLoop->getLoopLatch();
+    Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
+    VectorParts &Val = getVectorValue(LoopVal);
+    for (unsigned part = 0; part < UF; ++part) {
+      // Make sure to add the reduction stat value only to the 
+      // first unroll part.
+      Value *StartVal = (part == 0) ? VectorStart : Identity;
+      cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
+      cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
+    }
 
     // Before each round, move the insertion point right between
     // the PHIs and the values we are going to write.
@@ -1149,40 +1684,56 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     // instructions.
     Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
 
-    // This PHINode contains the vectorized reduction variable, or
-    // the initial value vector, if we bypass the vector loop.
-    PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
-    NewPhi->addIncoming(VectorStart, LoopBypassBlock);
-    NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody);
-
-    // Extract the first scalar.
-    Value *Scalar0 =
-      Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
-    // Extract and reduce the remaining vector elements.
-    for (unsigned i=1; i < VF; ++i) {
-      Value *Scalar1 =
-        Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
-      switch (RdxDesc.Kind) {
-        case LoopVectorizationLegality::IntegerAdd:
-          Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerMult:
-          Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerOr:
-          Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerAnd:
-          Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerXor:
-          Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
-          break;
-        default:
-          llvm_unreachable("Unknown reduction operation");
-      }
+    VectorParts RdxParts;
+    for (unsigned part = 0; part < UF; ++part) {
+      // This PHINode contains the vectorized reduction variable, or
+      // the initial value vector, if we bypass the vector loop.
+      VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
+      PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
+      Value *StartVal = (part == 0) ? VectorStart : Identity;
+      for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+        NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
+      NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
+      RdxParts.push_back(NewPhi);
+    }
+
+    // Reduce all of the unrolled parts into a single vector.
+    Value *ReducedPartRdx = RdxParts[0];
+    for (unsigned part = 1; part < UF; ++part) {
+      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
+      ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
+                                           "bin.rdx");
     }
 
+    // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+    // and vector ops, reducing the set of values being computed by half each
+    // round.
+    assert(isPowerOf2_32(VF) &&
+           "Reduction emission only supported for pow2 vectors!");
+    Value *TmpVec = ReducedPartRdx;
+    SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+    for (unsigned i = VF; i != 1; i >>= 1) {
+      // Move the upper half of the vector to the lower half.
+      for (unsigned j = 0; j != i/2; ++j)
+        ShuffleMask[j] = Builder.getInt32(i/2 + j);
+
+      // Fill the rest of the mask with undef.
+      std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+                UndefValue::get(Builder.getInt32Ty()));
+
+      Value *Shuf =
+        Builder.CreateShuffleVector(TmpVec,
+                                    UndefValue::get(TmpVec->getType()),
+                                    ConstantVector::get(ShuffleMask),
+                                    "rdx.shuf");
+
+      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
+      TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+    }
+
+    // The result is in the first element of the vector.
+    Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+
     // Now, we need to fix the users of the reduction variable
     // inside and outside of the scalar remainder loop.
     // We know that the loop is in LCSSA form. We need to update the
@@ -1207,24 +1758,378 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Fix the scalar loop reduction variable with the incoming reduction sum
     // from the vector body and from the backedge value.
-    int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
-    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
+    int IncomingEdgeBlockIdx =
+    (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
+    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
+    // Pick the other block.
+    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
     (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
     (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
   }// end of for each redux variable.
+
+  // The Loop exit block may have single value PHI nodes where the incoming
+  // value is 'undef'. While vectorizing we only handled real values that
+  // were defined inside the loop. Here we handle the 'undef case'.
+  // See PR14725.
+  for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+       LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+    PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+    if (!LCSSAPhi) continue;
+    if (LCSSAPhi->getNumIncomingValues() == 1)
+      LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
+                            LoopMiddleBlock);
+  }
+}
+
+InnerLoopVectorizer::VectorParts
+InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+  assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
+         "Invalid edge");
+
+  VectorParts SrcMask = createBlockInMask(Src);
+
+  // The terminator has to be a branch inst!
+  BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
+  assert(BI && "Unexpected terminator found");
+
+  if (BI->isConditional()) {
+    VectorParts EdgeMask = getVectorValue(BI->getCondition());
+
+    if (BI->getSuccessor(0) != Dst)
+      for (unsigned part = 0; part < UF; ++part)
+        EdgeMask[part] = Builder.CreateNot(EdgeMask[part]);
+
+    for (unsigned part = 0; part < UF; ++part)
+      EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]);
+    return EdgeMask;
+  }
+
+  return SrcMask;
+}
+
+InnerLoopVectorizer::VectorParts
+InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+  assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+
+  // Loop incoming mask is all-one.
+  if (OrigLoop->getHeader() == BB) {
+    Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
+    return getVectorValue(C);
+  }
+
+  // This is the block mask. We OR all incoming edges, and with zero.
+  Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
+  VectorParts BlockMask = getVectorValue(Zero);
+
+  // For each pred:
+  for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it) {
+    VectorParts EM = createEdgeMask(*it, BB);
+    for (unsigned part = 0; part < UF; ++part)
+      BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]);
+  }
+
+  return BlockMask;
+}
+
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+                                          BasicBlock *BB, PhiVector *PV) {
+  // For each instruction in the old loop.
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    VectorParts &Entry = WidenMap.get(it);
+    switch (it->getOpcode()) {
+    case Instruction::Br:
+      // Nothing to do for PHIs and BR, since we already took care of the
+      // loop control flow instructions.
+      continue;
+    case Instruction::PHI:{
+      PHINode* P = cast<PHINode>(it);
+      // Handle reduction variables:
+      if (Legal->getReductionVars()->count(P)) {
+        for (unsigned part = 0; part < UF; ++part) {
+          // This is phase one of vectorizing PHIs.
+          Type *VecTy = VectorType::get(it->getType(), VF);
+          Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
+                                        LoopVectorBody-> getFirstInsertionPt());
+        }
+        PV->push_back(P);
+        continue;
+      }
+
+      // Check for PHI nodes that are lowered to vector selects.
+      if (P->getParent() != OrigLoop->getHeader()) {
+        // We know that all PHIs in non header blocks are converted into
+        // selects, so we don't have to worry about the insertion order and we
+        // can just use the builder.
+
+        // At this point we generate the predication tree. There may be
+        // duplications since this is a simple recursive scan, but future
+        // optimizations will clean it up.
+        VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
+                                               P->getParent());
+
+        for (unsigned part = 0; part < UF; ++part) {
+        VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
+        VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
+          Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
+                                             "predphi");
+        }
+        continue;
+      }
+
+      // This PHINode must be an induction variable.
+      // Make sure that we know about it.
+      assert(Legal->getInductionVars()->count(P) &&
+             "Not an induction variable");
+
+      LoopVectorizationLegality::InductionInfo II =
+        Legal->getInductionVars()->lookup(P);
+
+      switch (II.IK) {
+      case LoopVectorizationLegality::IK_NoInduction:
+        llvm_unreachable("Unknown induction");
+      case LoopVectorizationLegality::IK_IntInduction: {
+        assert(P == OldInduction && "Unexpected PHI");
+        Value *Broadcasted = getBroadcastInstrs(Induction);
+        // After broadcasting the induction variable we need to make the
+        // vector consecutive by adding 0, 1, 2 ...
+        for (unsigned part = 0; part < UF; ++part)
+          Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+        continue;
+      }
+      case LoopVectorizationLegality::IK_ReverseIntInduction:
+      case LoopVectorizationLegality::IK_PtrInduction:
+      case LoopVectorizationLegality::IK_ReversePtrInduction:
+        // Handle reverse integer and pointer inductions.
+        Value *StartIdx = 0;
+        // If we have a single integer induction variable then use it.
+        // Otherwise, start counting at zero.
+        if (OldInduction) {
+          LoopVectorizationLegality::InductionInfo OldII =
+            Legal->getInductionVars()->lookup(OldInduction);
+          StartIdx = OldII.StartValue;
+        } else {
+          StartIdx = ConstantInt::get(Induction->getType(), 0);
+        }
+        // This is the normalized GEP that starts counting at zero.
+        Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+                                                 "normalized.idx");
+
+        // Handle the reverse integer induction variable case.
+        if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
+          IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+          Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+                                                 "resize.norm.idx");
+          Value *ReverseInd  = Builder.CreateSub(II.StartValue, CNI,
+                                                 "reverse.idx");
+
+          // This is a new value so do not hoist it out.
+          Value *Broadcasted = getBroadcastInstrs(ReverseInd);
+          // After broadcasting the induction variable we need to make the
+          // vector consecutive by adding  ... -3, -2, -1, 0.
+          for (unsigned part = 0; part < UF; ++part)
+            Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true);
+          continue;
+        }
+
+        // Handle the pointer induction variable case.
+        assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+        // Is this a reverse induction ptr or a consecutive induction ptr.
+        bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+                        II.IK);
+
+        // This is the vector of results. Notice that we don't generate
+        // vector geps because scalar geps result in better code.
+        for (unsigned part = 0; part < UF; ++part) {
+          Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+          for (unsigned int i = 0; i < VF; ++i) {
+            int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+            Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+            Value *GlobalIdx;
+            if (!Reverse)
+              GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+            else
+              GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
+            Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+                                               "next.gep");
+            VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+                                                 Builder.getInt32(i),
+                                                 "insert.gep");
+          }
+          Entry[part] = VecVal;
+        }
+        continue;
+      }
+
+    }// End of PHI.
+
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      // Just widen binops.
+      BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+      VectorParts &A = getVectorValue(it->getOperand(0));
+      VectorParts &B = getVectorValue(it->getOperand(1));
+
+      // Use this vector value for all users of the original instruction.
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
+
+        // Update the NSW, NUW and Exact flags. Notice: V can be an Undef.
+        BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V);
+        if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
+          VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
+          VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
+        }
+        if (VecOp && isa<PossiblyExactOperator>(VecOp))
+          VecOp->setIsExact(BinOp->isExact());
+
+        Entry[Part] = V;
+      }
+      break;
+    }
+    case Instruction::Select: {
+      // Widen selects.
+      // If the selector is loop invariant we can create a select
+      // instruction with a scalar condition. Otherwise, use vector-select.
+      bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
+                                               OrigLoop);
+
+      // The condition can be loop invariant  but still defined inside the
+      // loop. This means that we can't just use the original 'cond' value.
+      // We have to take the 'vectorized' value and pick the first lane.
+      // Instcombine will make this a no-op.
+      VectorParts &Cond = getVectorValue(it->getOperand(0));
+      VectorParts &Op0  = getVectorValue(it->getOperand(1));
+      VectorParts &Op1  = getVectorValue(it->getOperand(2));
+      Value *ScalarCond = Builder.CreateExtractElement(Cond[0],
+                                                       Builder.getInt32(0));
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        Entry[Part] = Builder.CreateSelect(
+          InvariantCond ? ScalarCond : Cond[Part],
+          Op0[Part],
+          Op1[Part]);
+      }
+      break;
+    }
+
+    case Instruction::ICmp:
+    case Instruction::FCmp: {
+      // Widen compares. Generate vector compares.
+      bool FCmp = (it->getOpcode() == Instruction::FCmp);
+      CmpInst *Cmp = dyn_cast<CmpInst>(it);
+      VectorParts &A = getVectorValue(it->getOperand(0));
+      VectorParts &B = getVectorValue(it->getOperand(1));
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        Value *C = 0;
+        if (FCmp)
+          C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
+        else
+          C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+        Entry[Part] = C;
+      }
+      break;
+    }
+
+    case Instruction::Store:
+    case Instruction::Load:
+        vectorizeMemoryInstruction(it, Legal);
+        break;
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast: {
+      CastInst *CI = dyn_cast<CastInst>(it);
+      /// Optimize the special case where the source is the induction
+      /// variable. Notice that we can only optimize the 'trunc' case
+      /// because: a. FP conversions lose precision, b. sext/zext may wrap,
+      /// c. other casts depend on pointer size.
+      if (CI->getOperand(0) == OldInduction &&
+          it->getOpcode() == Instruction::Trunc) {
+        Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+                                               CI->getType());
+        Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+        for (unsigned Part = 0; Part < UF; ++Part)
+          Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
+        break;
+      }
+      /// Vectorize casts.
+      Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+
+      VectorParts &A = getVectorValue(it->getOperand(0));
+      for (unsigned Part = 0; Part < UF; ++Part)
+        Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
+      break;
+    }
+
+    case Instruction::Call: {
+      // Ignore dbg intrinsics.
+      if (isa<DbgInfoIntrinsic>(it))
+        break;
+
+      Module *M = BB->getParent()->getParent();
+      CallInst *CI = cast<CallInst>(it);
+      Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+      assert(ID && "Not an intrinsic call!");
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        SmallVector<Value*, 4> Args;
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+          VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
+          Args.push_back(Arg[Part]);
+        }
+        Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
+        Function *F = Intrinsic::getDeclaration(M, ID, Tys);
+        Entry[Part] = Builder.CreateCall(F, Args);
+      }
+      break;
+    }
+
+    default:
+      // All other instructions are unsupported. Scalarize them.
+      scalarizeInstruction(it);
+      break;
+    }// end of switch.
+  }// end of for_each instr.
 }
 
-void SingleBlockLoopVectorizer::updateAnalysis() {
-  // The original basic block.
+void InnerLoopVectorizer::updateAnalysis() {
+  // Forget the original basic block.
   SE->forgetLoop(OrigLoop);
 
   // Update the dominator tree information.
-  assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) &&
+  assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
          "Entry does not dominate exit.");
 
-  DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock);
+  for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+    DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
+  DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
   DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
-  DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock);
+  DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
   DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
   DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
   DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
@@ -1232,45 +2137,94 @@ void SingleBlockLoopVectorizer::updateAnalysis() {
   DEBUG(DT->verifyAnalysis());
 }
 
-bool LoopVectorizationLegality::canVectorize() {
-  if (!TheLoop->getLoopPreheader()) {
-    assert(false && "No preheader!!");
-    DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
-    return  false;
+bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
+  if (!EnableIfConversion)
+    return false;
+
+  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
+  std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+
+  // Collect the blocks that need predication.
+  for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+    BasicBlock *BB = LoopBlocks[i];
+
+    // We don't support switch statements inside loops.
+    if (!isa<BranchInst>(BB->getTerminator()))
+      return false;
+
+    // We must have at most two predecessors because we need to convert
+    // all PHIs to selects.
+    unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
+    if (Preds > 2)
+      return false;
+
+    // We must be able to predicate all blocks that need to be predicated.
+    if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
+      return false;
   }
 
-  // We can only vectorize single basic block loops.
+  // We can if-convert this loop.
+  return true;
+}
+
+bool LoopVectorizationLegality::canVectorize() {
+  assert(TheLoop->getLoopPreheader() && "No preheader!!");
+
+  // We can only vectorize innermost loops.
+  if (TheLoop->getSubLoopsVector().size())
+    return false;
+
+  // We must have a single backedge.
+  if (TheLoop->getNumBackEdges() != 1)
+    return false;
+
+  // We must have a single exiting block.
+  if (!TheLoop->getExitingBlock())
+    return false;
+
   unsigned NumBlocks = TheLoop->getNumBlocks();
-  if (NumBlocks != 1) {
-    DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
+
+  // Check if we can if-convert non single-bb loops.
+  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
+    DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
     return false;
   }
 
   // We need to have a loop header.
-  BasicBlock *BB = TheLoop->getHeader();
-  DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
+  BasicBlock *Latch = TheLoop->getLoopLatch();
+  DEBUG(dbgs() << "LV: Found a loop: " <<
+        TheLoop->getHeader()->getName() << "\n");
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
+  const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch);
   if (ExitCount == SE->getCouldNotCompute()) {
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
     return false;
   }
 
   // Do not loop-vectorize loops with a tiny trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB);
-  if (TC > 0u && TC < TinyTripCountThreshold) {
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
+  if (TC > 0u && TC < TinyTripCountVectorThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
           "This loop is not worth vectorizing.\n");
     return false;
   }
 
+  // Check if we can vectorize the instructions and CFG in this loop.
+  if (!canVectorizeInstrs()) {
+    DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
+    return false;
+  }
+
   // Go over each instruction and look at memory deps.
-  if (!canVectorizeBlock(*BB)) {
-    DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+  if (!canVectorizeMemory()) {
+    DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
     return false;
   }
 
+  // Collect all of the variables that remain uniform after vectorization.
+  collectLoopUniforms();
+
   DEBUG(dbgs() << "LV: We can vectorize this loop" <<
         (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
         <<"!\n");
@@ -1281,130 +2235,220 @@ bool LoopVectorizationLegality::canVectorize() {
   return true;
 }
 
-bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
-  // Scan the instructions in the block and look for hazards.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *I = it;
+bool LoopVectorizationLegality::canVectorizeInstrs() {
+  BasicBlock *PreHeader = TheLoop->getLoopPreheader();
+  BasicBlock *Header = TheLoop->getHeader();
 
-    PHINode *Phi = dyn_cast<PHINode>(I);
-    if (Phi) {
-      // This should not happen because the loop should be normalized.
-      if (Phi->getNumIncomingValues() != 2) {
-        DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
-        return false;
-      }
-      // We only look at integer phi nodes.
-      if (!Phi->getType()->isIntegerTy()) {
-        DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
-        return false;
-      }
+  // If we marked the scalar loop as "already vectorized" then no need
+  // to vectorize it again.
+  if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
+    DEBUG(dbgs() << "LV: This loop was vectorized before\n");
+    return false;
+  }
+
+  // For each block in the loop.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
 
-      if (isInductionVariable(Phi)) {
-        if (Induction) {
-          DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+    // Scan the instructions in the block and look for hazards.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+
+      if (PHINode *Phi = dyn_cast<PHINode>(it)) {
+        // This should not happen because the loop should be normalized.
+        if (Phi->getNumIncomingValues() != 2) {
+          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
           return false;
         }
-        DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
-        Induction = Phi;
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerAdd)) {
-        DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerMult)) {
-        DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerOr)) {
-        DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerAnd)) {
-        DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerXor)) {
-        DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
 
-      DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
-      return false;
-    }// end of PHI handling
+        // Check that this PHI type is allowed.
+        if (!Phi->getType()->isIntegerTy() &&
+            !Phi->getType()->isFloatingPointTy() &&
+            !Phi->getType()->isPointerTy()) {
+          DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
+          return false;
+        }
 
-    // We still don't handle functions.
-    CallInst *CI = dyn_cast<CallInst>(I);
-    if (CI) {
-      DEBUG(dbgs() << "LV: Found a call site.\n");
-      return false;
-    }
+        // If this PHINode is not in the header block, then we know that we
+        // can convert it to select during if-conversion. No need to check if
+        // the PHIs in this block are induction or reduction variables.
+        if (*bb != Header)
+          continue;
 
-    // We do not re-vectorize vectors.
-    if (!VectorType::isValidElementType(I->getType()) &&
-        !I->getType()->isVoidTy()) {
-      DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
-      return false;
-    }
+        // This is the value coming from the preheader.
+        Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+        // Check if this is an induction variable.
+        InductionKind IK = isInductionVariable(Phi);
+
+        if (IK_NoInduction != IK) {
+          // Int inductions are special because we only allow one IV.
+          if (IK == IK_IntInduction) {
+            if (Induction) {
+              DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+              return false;
+            }
+            Induction = Phi;
+          }
+
+          DEBUG(dbgs() << "LV: Found an induction variable.\n");
+          Inductions[Phi] = InductionInfo(StartValue, IK);
+          continue;
+        }
 
-    // Reduction instructions are allowed to have exit users.
-    // All other instructions must not have external users.
-    if (!AllowedExit.count(I))
-      //Check that all of the users of the loop are inside the BB.
-      for (Value::use_iterator it = I->use_begin(), e = I->use_end();
-           it != e; ++it) {
-        Instruction *U = cast<Instruction>(*it);
-        // This user may be a reduction exit value.
-        BasicBlock *Parent = U->getParent();
-        if (Parent != &BB) {
-          DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+        if (AddReductionVar(Phi, RK_IntegerAdd)) {
+          DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, RK_IntegerMult)) {
+          DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, RK_IntegerOr)) {
+          DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, RK_IntegerAnd)) {
+          DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, RK_IntegerXor)) {
+          DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, RK_FloatMult)) {
+          DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, RK_FloatAdd)) {
+          DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+
+        DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+        return false;
+      }// end of PHI handling
+
+      // We still don't handle functions. However, we can ignore dbg intrinsic
+      // calls and we do handle certain intrinsic and libm functions.
+      CallInst *CI = dyn_cast<CallInst>(it);
+      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+        DEBUG(dbgs() << "LV: Found a call site.\n");
+        return false;
+      }
+
+      // Check that the instruction return type is vectorizable.
+      if (!VectorType::isValidElementType(it->getType()) &&
+          !it->getType()->isVoidTy()) {
+        DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+        return false;
+      }
+
+      // Check that the stored type is vectorizable.
+      if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
+        Type *T = ST->getValueOperand()->getType();
+        if (!VectorType::isValidElementType(T))
           return false;
+      }
+
+      // Reduction instructions are allowed to have exit users.
+      // All other instructions must not have external users.
+      if (!AllowedExit.count(it))
+        //Check that all of the users of the loop are inside the BB.
+        for (Value::use_iterator I = it->use_begin(), E = it->use_end();
+             I != E; ++I) {
+          Instruction *U = cast<Instruction>(*I);
+          // This user may be a reduction exit value.
+          if (!TheLoop->contains(U)) {
+            DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+            return false;
+          }
         }
-    }
-  } // next instr.
+    } // next instr.
+
+  }
 
   if (!Induction) {
-      DEBUG(dbgs() << "LV: Did not find an induction var.\n");
-      return false;
+    DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+    assert(getInductionVars()->size() && "No induction variables");
   }
 
-  // Don't vectorize if the memory dependencies do not allow vectorization.
-  if (!canVectorizeMemory(BB))
-    return false;
+  return true;
+}
 
+void LoopVectorizationLegality::collectLoopUniforms() {
   // We now know that the loop is vectorizable!
   // Collect variables that will remain uniform after vectorization.
   std::vector<Value*> Worklist;
+  BasicBlock *Latch = TheLoop->getLoopLatch();
 
   // Start with the conditional branch and walk up the block.
-  Worklist.push_back(BB.getTerminator()->getOperand(0));
+  Worklist.push_back(Latch->getTerminator()->getOperand(0));
 
   while (Worklist.size()) {
     Instruction *I = dyn_cast<Instruction>(Worklist.back());
     Worklist.pop_back();
-    // Look at instructions inside this block.
-    if (!I) continue;
-    if (I->getParent() != &BB) continue;
 
+    // Look at instructions inside this loop.
     // Stop when reaching PHI nodes.
-    if (isa<PHINode>(I)) {
-      assert(I == Induction && "Found a uniform PHI that is not the induction");
-      break;
-    }
+    // TODO: we need to follow values all over the loop, not only in this block.
+    if (!I || !TheLoop->contains(I) || isa<PHINode>(I))
+      continue;
 
     // This is a known uniform.
     Uniforms.insert(I);
 
     // Insert all operands.
-    for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+    for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) {
       Worklist.push_back(I->getOperand(i));
     }
   }
+}
 
-  return true;
+AliasAnalysis::Location
+LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
+  if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+    return AA->getLocation(Store);
+  else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+    return AA->getLocation(Load);
+
+  llvm_unreachable("Should be either load or store instruction");
 }
 
-bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
+bool
+LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
+                                                Value *Object,
+                                                Instruction *Inst,
+                                                AliasMultiMap& WriteObjects,
+                                                unsigned MaxByteWidth) {
+
+  AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
+
+  std::vector<Instruction*>::iterator
+              it = WriteObjects[Object].begin(),
+              end = WriteObjects[Object].end();
+
+  for (; it != end; ++it) {
+    Instruction* I = *it;
+    if (I == Inst)
+      continue;
+
+    AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
+    if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
+                  ThatLoc.getWithNewSize(MaxByteWidth)))
+      return true;
+  }
+  return false;
+}
+
+bool LoopVectorizationLegality::canVectorizeMemory() {
+
+  if (TheLoop->isAnnotatedParallel()) {
+    DEBUG(dbgs()
+          << "LV: A loop annotated parallel, ignore memory dependency "
+          << "checks.\n");
+    return true;
+  }
+
   typedef SmallVector<Value*, 16> ValueVector;
   typedef SmallPtrSet<Value*, 16> ValueSet;
   // Holds the Load and Store *instructions*.
@@ -1413,35 +2457,40 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
   PtrRtCheck.Pointers.clear();
   PtrRtCheck.Need = false;
 
-  // Scan the BB and collect legal loads and stores.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *I = it;
-
-    // If this is a load, save it. If this instruction can read from memory
-    // but is not a load, then we quit. Notice that we don't handle function
-    // calls that read or write.
-    if (I->mayReadFromMemory()) {
-      LoadInst *Ld = dyn_cast<LoadInst>(I);
-      if (!Ld) return false;
-      if (!Ld->isSimple()) {
-        DEBUG(dbgs() << "LV: Found a non-simple load.\n");
-        return false;
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+
+    // Scan the BB and collect legal loads and stores.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+
+      // If this is a load, save it. If this instruction can read from memory
+      // but is not a load, then we quit. Notice that we don't handle function
+      // calls that read or write.
+      if (it->mayReadFromMemory()) {
+        LoadInst *Ld = dyn_cast<LoadInst>(it);
+        if (!Ld) return false;
+        if (!Ld->isSimple()) {
+          DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+          return false;
+        }
+        Loads.push_back(Ld);
+        continue;
       }
-      Loads.push_back(Ld);
-      continue;
-    }
 
-    // Save store instructions. Abort if other instructions write to memory.
-    if (I->mayWriteToMemory()) {
-      StoreInst *St = dyn_cast<StoreInst>(I);
-      if (!St) return false;
-      if (!St->isSimple()) {
-        DEBUG(dbgs() << "LV: Found a non-simple store.\n");
-        return false;
+      // Save 'store' instructions. Abort if other instructions write to memory.
+      if (it->mayWriteToMemory()) {
+        StoreInst *St = dyn_cast<StoreInst>(it);
+        if (!St) return false;
+        if (!St->isSimple()) {
+          DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+          return false;
+        }
+        Stores.push_back(St);
       }
-      Stores.push_back(St);
-    }
-  } // next instr.
+    } // next instr.
+  } // next block.
 
   // Now we have two lists that hold the loads and the stores.
   // Next, we find the pointers that they use.
@@ -1449,13 +2498,14 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
   // Check if we see any stores. If there are no stores, then we don't
   // care if the pointers are *restrict*.
   if (!Stores.size()) {
-        DEBUG(dbgs() << "LV: Found a read-only loop!\n");
-        return true;
+    DEBUG(dbgs() << "LV: Found a read-only loop!\n");
+    return true;
   }
 
-  // Holds the read and read-write *pointers* that we find.
-  ValueVector Reads;
-  ValueVector ReadWrites;
+  // Holds the read and read-write *pointers* that we find. These maps hold
+  // unique values for pointers (so no need for multi-map).
+  AliasMap Reads;
+  AliasMap ReadWrites;
 
   // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
   // multiple times on the same object. If the ptr is accessed twice, once
@@ -1466,8 +2516,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
 
   ValueVector::iterator I, IE;
   for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
-    StoreInst *ST = dyn_cast<StoreInst>(*I);
-    assert(ST && "Bad StoreInst");
+    StoreInst *ST = cast<StoreInst>(*I);
     Value* Ptr = ST->getPointerOperand();
 
     if (isUniform(Ptr)) {
@@ -1478,12 +2527,11 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
     // If we did *not* see this pointer before, insert it to
     // the read-write list. At this phase it is only a 'write' list.
     if (Seen.insert(Ptr))
-      ReadWrites.push_back(Ptr);
+      ReadWrites.insert(std::make_pair(Ptr, ST));
   }
 
   for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
-    LoadInst *LD = dyn_cast<LoadInst>(*I);
-    assert(LD && "Bad LoadInst");
+    LoadInst *LD = cast<LoadInst>(*I);
     Value* Ptr = LD->getPointerOperand();
     // If we did *not* see this pointer before, insert it to the
     // read list. If we *did* see it before, then it is already in
@@ -1493,8 +2541,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
     // If the address of i is unknown (for example A[B[i]]) then we may
     // read a few words, modify, and write a few words, and some of the
     // words may be written to the same address.
-    if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr))
-      Reads.push_back(Ptr);
+    if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
+      Reads.insert(std::make_pair(Ptr, LD));
   }
 
   // If we write (or read-write) to a single destination and there are no
@@ -1506,84 +2554,156 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
 
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
-  bool RT = true;
-  for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
-    if (hasComputableBounds(*I)) {
-      PtrRtCheck.Pointers.push_back(*I);
-      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+  bool CanDoRT = true;
+  AliasMap::iterator MI, ME;
+  for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+    Value *V = (*MI).first;
+    if (hasComputableBounds(V)) {
+      PtrRtCheck.insert(SE, TheLoop, V);
+      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
     } else {
-      RT = false;
+      CanDoRT = false;
       break;
     }
-  for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
-    if (hasComputableBounds(*I)) {
-      PtrRtCheck.Pointers.push_back(*I);
-      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+  }
+  for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+    Value *V = (*MI).first;
+    if (hasComputableBounds(V)) {
+      PtrRtCheck.insert(SE, TheLoop, V);
+      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
     } else {
-      RT = false;
+      CanDoRT = false;
       break;
     }
+  }
 
   // Check that we did not collect too many pointers or found a
   // unsizeable pointer.
-  if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
-    PtrRtCheck.Pointers.clear();
-    RT = false;
+  if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+    PtrRtCheck.reset();
+    CanDoRT = false;
   }
 
-  PtrRtCheck.Need = RT;
-
-  if (RT) {
+  if (CanDoRT) {
     DEBUG(dbgs() << "LV: We can perform a memory runtime check if needed.\n");
   }
 
+  bool NeedRTCheck = false;
+
+  // Biggest vectorized access possible, vector width * unroll factor.
+  // TODO: We're being very pessimistic here, find a way to know the
+  // real access width before getting here.
+  unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
+                           TTI->getMaximumUnrollFactor();
   // Now that the pointers are in two lists (Reads and ReadWrites), we
   // can check that there are no conflicts between each of the writes and
   // between the writes to the reads.
-  ValueSet WriteObjects;
+  // Note that WriteObjects duplicates the stores (indexed now by underlying
+  // objects) to avoid pointing to elements inside ReadWrites.
+  // TODO: Maybe create a new type where they can interact without duplication.
+  AliasMultiMap WriteObjects;
   ValueVector TempObjects;
 
   // Check that the read-writes do not conflict with other read-write
   // pointers.
-  for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) {
-    GetUnderlyingObjects(*I, TempObjects, DL);
-    for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
-         it != e; ++it) {
-      if (!isIdentifiedObject(*it)) {
-        DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
-        return RT;
+  bool AllWritesIdentified = true;
+  for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+    Value *Val = (*MI).first;
+    Instruction *Inst = (*MI).second;
+
+    GetUnderlyingObjects(Val, TempObjects, DL);
+    for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+         UI != UE; ++UI) {
+      if (!isIdentifiedObject(*UI)) {
+        DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
+        NeedRTCheck = true;
+        AllWritesIdentified = false;
       }
-      if (!WriteObjects.insert(*it)) {
+
+      // Never seen it before, can't alias.
+      if (WriteObjects[*UI].empty()) {
+        DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
+        WriteObjects[*UI].push_back(Inst);
+        continue;
+      }
+      // Direct alias found.
+      if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+        DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+              << **UI <<"\n");
+        return false;
+      }
+      DEBUG(dbgs() << "LV: Found a conflicting global value:"
+            << **UI <<"\n");
+      DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
+      DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+      // If global alias, make sure they do alias.
+      if (hasPossibleGlobalWriteReorder(*UI,
+                                        Inst,
+                                        WriteObjects,
+                                        MaxByteWidth)) {
         DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
-              << **it <<"\n");
-        return RT;
+              << *UI <<"\n");
+        return false;
       }
+
+      // Didn't alias, insert into map for further reference.
+      WriteObjects[*UI].push_back(Inst);
     }
     TempObjects.clear();
   }
 
   /// Check that the reads don't conflict with the read-writes.
-  for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) {
-    GetUnderlyingObjects(*I, TempObjects, DL);
-    for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
-         it != e; ++it) {
-      if (!isIdentifiedObject(*it)) {
-        DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
-        return RT;
+  for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+    Value *Val = (*MI).first;
+    GetUnderlyingObjects(Val, TempObjects, DL);
+    for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+         UI != UE; ++UI) {
+      // If all of the writes are identified then we don't care if the read
+      // pointer is identified or not.
+      if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
+        DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
+        NeedRTCheck = true;
       }
-      if (WriteObjects.count(*it)) {
-        DEBUG(dbgs() << "LV: Found a possible read/write reorder:"
-              << **it <<"\n");
-        return RT;
+
+      // Never seen it before, can't alias.
+      if (WriteObjects[*UI].empty())
+        continue;
+      // Direct alias found.
+      if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+        DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+              << **UI <<"\n");
+        return false;
+      }
+      DEBUG(dbgs() << "LV: Found a global value:  "
+            << **UI <<"\n");
+      Instruction *Inst = (*MI).second;
+      DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
+      DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+      // If global alias, make sure they do alias.
+      if (hasPossibleGlobalWriteReorder(*UI,
+                                        Inst,
+                                        WriteObjects,
+                                        MaxByteWidth)) {
+        DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
+              << *UI <<"\n");
+        return false;
       }
     }
     TempObjects.clear();
   }
 
-  // It is safe to vectorize and we don't need any runtime checks.
-  DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
-  PtrRtCheck.Pointers.clear();
-  PtrRtCheck.Need = false;
+  PtrRtCheck.Need = NeedRTCheck;
+  if (NeedRTCheck && !CanDoRT) {
+    DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
+          "the array bounds.\n");
+    PtrRtCheck.reset();
+    return false;
+  }
+
+  DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
+        " need a runtime memory check.\n");
   return true;
 }
 
@@ -1592,38 +2712,43 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
   if (Phi->getNumIncomingValues() != 2)
     return false;
 
-  // Find the possible incoming reduction variable.
-  BasicBlock *BB = Phi->getParent();
-  int SelfEdgeIdx = Phi->getBasicBlockIndex(BB);
-  int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry.
-  Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx);
+  // Reduction variables are only found in the loop header block.
+  if (Phi->getParent() != TheLoop->getHeader())
+    return false;
+
+  // Obtain the reduction start value from the value that comes from the loop
+  // preheader.
+  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
 
   // ExitInstruction is the single value which is used outside the loop.
   // We only allow for a single reduction value to be used outside the loop.
   // This includes users of the reduction, variables (which form a cycle
   // which ends in the phi node).
   Instruction *ExitInstruction = 0;
+  // Indicates that we found a binary operation in our scan.
+  bool FoundBinOp = false;
 
   // Iter is our iterator. We start with the PHI node and scan for all of the
-  // users of this instruction. All users must be instructions which can be
+  // users of this instruction. All users must be instructions that can be
   // used as reduction variables (such as ADD). We may have a single
-  // out-of-block user. They cycle must end with the original PHI.
-  // Also, we can't have multiple block-local users.
+  // out-of-block user. The cycle must end with the original PHI.
   Instruction *Iter = Phi;
   while (true) {
-    // Any reduction instr must be of one of the allowed kinds.
-    if (!isReductionInstr(Iter, Kind))
+    // If the instruction has no users then this is a broken
+    // chain and can't be a reduction variable.
+    if (Iter->use_empty())
       return false;
 
-    // Did we found a user inside this block ?
+    // Did we find a user inside this loop already ?
     bool FoundInBlockUser = false;
-    // Did we reach the initial PHI node ?
+    // Did we reach the initial PHI node already ?
     bool FoundStartPHI = false;
 
-    // If the instruction has no users then this is a broken
-    // chain and can't be a reduction variable.
-    if (Iter->use_empty())
-      return false;
+    // Is this a bin op ?
+    FoundBinOp |= !isa<PHINode>(Iter);
+
+    // Remember the current instruction.
+    Instruction *OldIter = Iter;
 
     // For each of the *users* of iter.
     for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
@@ -1634,75 +2759,171 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
         FoundStartPHI = true;
         continue;
       }
+
       // Check if we found the exit user.
       BasicBlock *Parent = U->getParent();
-      if (Parent != BB) {
-        // We must have a single exit instruction.
+      if (!TheLoop->contains(Parent)) {
+        // Exit if you find multiple outside users.
         if (ExitInstruction != 0)
           return false;
         ExitInstruction = Iter;
       }
+
+      // We allow in-loop PHINodes which are not the original reduction PHI
+      // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
+      // structure) then don't skip this PHI.
+      if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
+          U->getParent() != TheLoop->getHeader() &&
+          TheLoop->contains(U) &&
+          Iter->hasNUsesOrMore(2))
+        continue;
+
       // We can't have multiple inside users.
       if (FoundInBlockUser)
         return false;
       FoundInBlockUser = true;
+
+      // Any reduction instr must be of one of the allowed kinds.
+      if (!isReductionInstr(U, Kind))
+        return false;
+
+      // Reductions of instructions such as Div, and Sub is only
+      // possible if the LHS is the reduction variable.
+      if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+        return false;
+
       Iter = U;
     }
 
+    // If all uses were skipped this can't be a reduction variable.
+    if (Iter == OldIter)
+      return false;
+
     // We found a reduction var if we have reached the original
     // phi node and we only have a single instruction with out-of-loop
     // users.
-   if (FoundStartPHI && ExitInstruction) {
-     // This instruction is allowed to have out-of-loop users.
-     AllowedExit.insert(ExitInstruction);
-
-     // Save the description of this reduction variable.
-     ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
-     Reductions[Phi] = RD;
-     return true;
-   }
+    if (FoundStartPHI) {
+      // This instruction is allowed to have out-of-loop users.
+      AllowedExit.insert(ExitInstruction);
+
+      // Save the description of this reduction variable.
+      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+      Reductions[Phi] = RD;
+      // We've ended the cycle. This is a reduction variable if we have an
+      // outside user and it has a binary op.
+      return FoundBinOp && ExitInstruction;
+    }
   }
 }
 
 bool
 LoopVectorizationLegality::isReductionInstr(Instruction *I,
                                             ReductionKind Kind) {
-    switch (I->getOpcode()) {
-    default:
-      return false;
-    case Instruction::PHI:
-      // possibly.
-      return true;
-    case Instruction::Add:
-    case Instruction::Sub:
-      return Kind == IntegerAdd;
-    case Instruction::Mul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-      return Kind == IntegerMult;
-    case Instruction::And:
-      return Kind == IntegerAnd;
-    case Instruction::Or:
-      return Kind == IntegerOr;
-    case Instruction::Xor:
-      return Kind == IntegerXor;
-    }
+  bool FP = I->getType()->isFloatingPointTy();
+  bool FastMath = (FP && I->isCommutative() && I->isAssociative());
+
+  switch (I->getOpcode()) {
+  default:
+    return false;
+  case Instruction::PHI:
+      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
+        return false;
+    // possibly.
+    return true;
+  case Instruction::Sub:
+  case Instruction::Add:
+    return Kind == RK_IntegerAdd;
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::Mul:
+    return Kind == RK_IntegerMult;
+  case Instruction::And:
+    return Kind == RK_IntegerAnd;
+  case Instruction::Or:
+    return Kind == RK_IntegerOr;
+  case Instruction::Xor:
+    return Kind == RK_IntegerXor;
+  case Instruction::FMul:
+    return Kind == RK_FloatMult && FastMath;
+  case Instruction::FAdd:
+    return Kind == RK_FloatAdd && FastMath;
+   }
 }
 
-bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
-  // Check that the PHI is consecutive and starts at zero.
+LoopVectorizationLegality::InductionKind
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+  Type *PhiTy = Phi->getType();
+  // We only handle integer and pointer inductions variables.
+  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+    return IK_NoInduction;
+
+  // Check that the PHI is consecutive.
   const SCEV *PhiScev = SE->getSCEV(Phi);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
   if (!AR) {
     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return false;
+    return IK_NoInduction;
   }
   const SCEV *Step = AR->getStepRecurrence(*SE);
 
-  if (!Step->isOne()) {
-    DEBUG(dbgs() << "LV: PHI stride does not equal one.\n");
+  // Integer inductions need to have a stride of one.
+  if (PhiTy->isIntegerTy()) {
+    if (Step->isOne())
+      return IK_IntInduction;
+    if (Step->isAllOnesValue())
+      return IK_ReverseIntInduction;
+    return IK_NoInduction;
+  }
+
+  // Calculate the pointer stride and check if it is consecutive.
+  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+  if (!C)
+    return IK_NoInduction;
+
+  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+  uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+  if (C->getValue()->equalsInt(Size))
+    return IK_PtrInduction;
+  else if (C->getValue()->equalsInt(0 - Size))
+    return IK_ReversePtrInduction;
+
+  return IK_NoInduction;
+}
+
+bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
+  Value *In0 = const_cast<Value*>(V);
+  PHINode *PN = dyn_cast_or_null<PHINode>(In0);
+  if (!PN)
     return false;
+
+  return Inductions.count(PN);
+}
+
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
+  assert(TheLoop->contains(BB) && "Unknown block used");
+
+  // Blocks that do not dominate the latch need predication.
+  BasicBlock* Latch = TheLoop->getLoopLatch();
+  return !DT->dominates(BB, Latch);
+}
+
+bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    // We don't predicate loads/stores at the moment.
+    if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+      return false;
+
+    // The instructions below can trap.
+    switch (it->getOpcode()) {
+    default: continue;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+             return false;
+    }
   }
+
   return true;
 }
 
@@ -1715,11 +2936,64 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
   return AR->isAffine();
 }
 
-unsigned
-LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
-  if (!VTTI) {
-    DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
-    return 1;
+LoopVectorizationCostModel::VectorizationFactor
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
+                                                      unsigned UserVF) {
+  // Width 1 means no vectorize
+  VectorizationFactor Factor = { 1U, 0U };
+  if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+    DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+    return Factor;
+  }
+
+  // Find the trip count.
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+  DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+
+  unsigned WidestType = getWidestType();
+  unsigned WidestRegister = TTI.getRegisterBitWidth(true);
+  unsigned MaxVectorSize = WidestRegister / WidestType;
+  DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+  DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+
+  if (MaxVectorSize == 0) {
+    DEBUG(dbgs() << "LV: The target has no vector registers.\n");
+    MaxVectorSize = 1;
+  }
+
+  assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements"
+         " into one vector!");
+
+  unsigned VF = MaxVectorSize;
+
+  // If we optimize the program for size, avoid creating the tail loop.
+  if (OptForSize) {
+    // If we are unable to calculate the trip count then don't try to vectorize.
+    if (TC < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return Factor;
+    }
+
+    // Find the maximum SIMD width that can fit within the trip count.
+    VF = TC % MaxVectorSize;
+
+    if (VF == 0)
+      VF = MaxVectorSize;
+
+    // If the trip count that we found modulo the vectorization factor is not
+    // zero then we require a tail.
+    if (VF < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return Factor;
+    }
+  }
+
+  if (UserVF != 0) {
+    assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+    DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+
+    Factor.Width = UserVF;
+    return Factor;
   }
 
   float Cost = expectedCost(1);
@@ -1739,23 +3013,278 @@ LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
   }
 
   DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
-  return Width;
+  Factor.Width = Width;
+  Factor.Cost = Width * Cost;
+  return Factor;
 }
 
-unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
-  // We can only estimate the cost of single basic block loops.
-  assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
+unsigned LoopVectorizationCostModel::getWidestType() {
+  unsigned MaxWidth = 8;
+
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+    BasicBlock *BB = *bb;
+
+    // For each instruction in the loop.
+    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+      Type *T = it->getType();
 
-  BasicBlock *BB = TheLoop->getHeader();
+      // Only examine Loads, Stores and PHINodes.
+      if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
+        continue;
+
+      // Examine PHI nodes that are reduction variables.
+      if (PHINode *PN = dyn_cast<PHINode>(it))
+        if (!Legal->getReductionVars()->count(PN))
+          continue;
+
+      // Examine the stored values.
+      if (StoreInst *ST = dyn_cast<StoreInst>(it))
+        T = ST->getValueOperand()->getType();
+
+      // Ignore loaded pointer types and stored pointer types that are not
+      // consecutive. However, we do want to take consecutive stores/loads of
+      // pointer vectors into account.
+      if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
+        continue;
+
+      MaxWidth = std::max(MaxWidth,
+                          (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+    }
+  }
+
+  return MaxWidth;
+}
+
+unsigned
+LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
+                                               unsigned UserUF,
+                                               unsigned VF,
+                                               unsigned LoopCost) {
+
+  // -- The unroll heuristics --
+  // We unroll the loop in order to expose ILP and reduce the loop overhead.
+  // There are many micro-architectural considerations that we can't predict
+  // at this level. For example frontend pressure (on decode or fetch) due to
+  // code size, or the number and capabilities of the execution ports.
+  //
+  // We use the following heuristics to select the unroll factor:
+  // 1. If the code has reductions the we unroll in order to break the cross
+  // iteration dependency.
+  // 2. If the loop is really small then we unroll in order to reduce the loop
+  // overhead.
+  // 3. We don't unroll if we think that we will spill registers to memory due
+  // to the increased register pressure.
+
+  // Use the user preference, unless 'auto' is selected.
+  if (UserUF != 0)
+    return UserUF;
+
+  // When we optimize for size we don't unroll.
+  if (OptForSize)
+    return 1;
+
+  // Do not unroll loops with a relatively small trip count.
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop,
+                                              TheLoop->getLoopLatch());
+  if (TC > 1 && TC < TinyTripCountUnrollThreshold)
+    return 1;
+
+  unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true);
+  DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters <<
+        " vector registers\n");
+
+  LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
+  // We divide by these constants so assume that we have at least one
+  // instruction that uses at least one register.
+  R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
+  R.NumInstructions = std::max(R.NumInstructions, 1U);
+
+  // We calculate the unroll factor using the following formula.
+  // Subtract the number of loop invariants from the number of available
+  // registers. These registers are used by all of the unrolled instances.
+  // Next, divide the remaining registers by the number of registers that is
+  // required by the loop, in order to estimate how many parallel instances
+  // fit without causing spills.
+  unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
+
+  // Clamp the unroll factor ranges to reasonable factors.
+  unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
+
+  // If we did not calculate the cost for VF (because the user selected the VF)
+  // then we calculate the cost of VF here.
+  if (LoopCost == 0)
+    LoopCost = expectedCost(VF);
+
+  // Clamp the calculated UF to be between the 1 and the max unroll factor
+  // that the target allows.
+  if (UF > MaxUnrollSize)
+    UF = MaxUnrollSize;
+  else if (UF < 1)
+    UF = 1;
+
+  if (Legal->getReductionVars()->size()) {
+    DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+    return UF;
+  }
+
+  // We want to unroll tiny loops in order to reduce the loop overhead.
+  // We assume that the cost overhead is 1 and we use the cost model
+  // to estimate the cost of the loop and unroll until the cost of the
+  // loop overhead is about 5% of the cost of the loop.
+  DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
+  if (LoopCost < 20) {
+    DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
+    unsigned NewUF = 20/LoopCost + 1;
+    return std::min(NewUF, UF);
+  }
+
+  DEBUG(dbgs() << "LV: Not Unrolling. \n");
+  return 1;
+}
+
+LoopVectorizationCostModel::RegisterUsage
+LoopVectorizationCostModel::calculateRegisterUsage() {
+  // This function calculates the register usage by measuring the highest number
+  // of values that are alive at a single location. Obviously, this is a very
+  // rough estimation. We scan the loop in a topological order in order and
+  // assign a number to each instruction. We use RPO to ensure that defs are
+  // met before their users. We assume that each instruction that has in-loop
+  // users starts an interval. We record every time that an in-loop value is
+  // used, so we have a list of the first and last occurrences of each
+  // instruction. Next, we transpose this data structure into a multi map that
+  // holds the list of intervals that *end* at a specific location. This multi
+  // map allows us to perform a linear search. We scan the instructions linearly
+  // and record each time that a new interval starts, by placing it in a set.
+  // If we find this value in the multi-map then we remove it from the set.
+  // The max register usage is the maximum size of the set.
+  // We also search for instructions that are defined outside the loop, but are
+  // used inside the loop. We need this number separately from the max-interval
+  // usage number because when we unroll, loop-invariant values do not take
+  // more register.
+  LoopBlocksDFS DFS(TheLoop);
+  DFS.perform(LI);
+
+  RegisterUsage R;
+  R.NumInstructions = 0;
+
+  // Each 'key' in the map opens a new interval. The values
+  // of the map are the index of the 'last seen' usage of the
+  // instruction that is the key.
+  typedef DenseMap<Instruction*, unsigned> IntervalMap;
+  // Maps instruction to its index.
+  DenseMap<unsigned, Instruction*> IdxToInstr;
+  // Marks the end of each interval.
+  IntervalMap EndPoint;
+  // Saves the list of instruction indices that are used in the loop.
+  SmallSet<Instruction*, 8> Ends;
+  // Saves the list of values that are used in the loop but are
+  // defined outside the loop, such as arguments and constants.
+  SmallPtrSet<Value*, 8> LoopInvariants;
+
+  unsigned Index = 0;
+  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+       be = DFS.endRPO(); bb != be; ++bb) {
+    R.NumInstructions += (*bb)->size();
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+      Instruction *I = it;
+      IdxToInstr[Index++] = I;
+
+      // Save the end location of each USE.
+      for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+        Value *U = I->getOperand(i);
+        Instruction *Instr = dyn_cast<Instruction>(U);
+
+        // Ignore non-instruction values such as arguments, constants, etc.
+        if (!Instr) continue;
+
+        // If this instruction is outside the loop then record it and continue.
+        if (!TheLoop->contains(Instr)) {
+          LoopInvariants.insert(Instr);
+          continue;
+        }
+
+        // Overwrite previous end points.
+        EndPoint[Instr] = Index;
+        Ends.insert(Instr);
+      }
+    }
+  }
+
+  // Saves the list of intervals that end with the index in 'key'.
+  typedef SmallVector<Instruction*, 2> InstrList;
+  DenseMap<unsigned, InstrList> TransposeEnds;
+
+  // Transpose the EndPoints to a list of values that end at each index.
+  for (IntervalMap::iterator it = EndPoint.begin(), e = EndPoint.end();
+       it != e; ++it)
+    TransposeEnds[it->second].push_back(it->first);
+
+  SmallSet<Instruction*, 8> OpenIntervals;
+  unsigned MaxUsage = 0;
+
+
+  DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+  for (unsigned int i = 0; i < Index; ++i) {
+    Instruction *I = IdxToInstr[i];
+    // Ignore instructions that are never used within the loop.
+    if (!Ends.count(I)) continue;
+
+    // Remove all of the instructions that end at this location.
+    InstrList &List = TransposeEnds[i];
+    for (unsigned int j=0, e = List.size(); j < e; ++j)
+      OpenIntervals.erase(List[j]);
+
+    // Count the number of live interals.
+    MaxUsage = std::max(MaxUsage, OpenIntervals.size());
+
+    DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
+          OpenIntervals.size() <<"\n");
+
+    // Add the current instruction to the list of open intervals.
+    OpenIntervals.insert(I);
+  }
+
+  unsigned Invariant = LoopInvariants.size();
+  DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << " \n");
+  DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << " \n");
+  DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << " \n");
+
+  R.LoopInvariantRegs = Invariant;
+  R.MaxLocalUsers = MaxUsage;
+  return R;
+}
+
+unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
   unsigned Cost = 0;
 
-  // For each instruction in the old loop.
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    Instruction *Inst = it;
-    unsigned C = getInstructionCost(Inst, VF);
-    Cost += C;
-    DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
-          " For instruction: "<< *Inst << "\n");
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+    unsigned BlockCost = 0;
+    BasicBlock *BB = *bb;
+
+    // For each instruction in the old loop.
+    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+      // Skip dbg intrinsics.
+      if (isa<DbgInfoIntrinsic>(it))
+        continue;
+
+      unsigned C = getInstructionCost(it, VF);
+      Cost += C;
+      DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
+            VF << " For instruction: "<< *it << "\n");
+    }
+
+    // We assume that if-converted blocks have a 50% chance of being executed.
+    // When the code is scalar then some of the blocks are avoided due to CF.
+    // When the code is vectorized we execute all code paths.
+    if (Legal->blockNeedsPredication(*bb) && VF == 1)
+      BlockCost /= 2;
+
+    Cost += BlockCost;
   }
 
   return Cost;
@@ -1763,8 +3292,6 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
 
 unsigned
 LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
-  assert(VTTI && "Invalid vector target transformation info");
-
   // If we know that this instruction will remain uniform, check the cost of
   // the scalar version.
   if (Legal->isUniformAfterVectorization(I))
@@ -1773,147 +3300,173 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   Type *RetTy = I->getType();
   Type *VectorTy = ToVectorTy(RetTy, VF);
 
-
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
-    case Instruction::GetElementPtr:
-      // We mark this instruction as zero-cost because scalar GEPs are usually
-      // lowered to the intruction addressing mode. At the moment we don't
-      // generate vector geps.
-      return 0;
-    case Instruction::Br: {
-      return VTTI->getCFInstrCost(I->getOpcode());
-    }
-    case Instruction::PHI:
-      return 0;
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor: {
-      return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
-    }
-    case Instruction::Select: {
-      SelectInst *SI = cast<SelectInst>(I);
-      const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
-      bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
-      Type *CondTy = SI->getCondition()->getType();
-      if (ScalarCond)
-        CondTy = VectorType::get(CondTy, VF);
-
-      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
-    }
-    case Instruction::ICmp:
-    case Instruction::FCmp: {
-      Type *ValTy = I->getOperand(0)->getType();
-      VectorTy = ToVectorTy(ValTy, VF);
-      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
-    }
-    case Instruction::Store: {
-      StoreInst *SI = cast<StoreInst>(I);
-      Type *ValTy = SI->getValueOperand()->getType();
-      VectorTy = ToVectorTy(ValTy, VF);
-
-      if (VF == 1)
-        return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
-                              SI->getAlignment(), SI->getPointerAddressSpace());
-
-      // Scalarized stores.
-      if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
-        unsigned Cost = 0;
-        unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
-                                              ValTy);
-        // The cost of extracting from the value vector.
-        Cost += VF * (ExtCost);
-        // The cost of the scalar stores.
-        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
-                                           ValTy->getScalarType(),
-                                           SI->getAlignment(),
-                                           SI->getPointerAddressSpace());
-        return Cost;
-      }
-
-      // Wide stores.
-      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
-                                   SI->getPointerAddressSpace());
-    }
-    case Instruction::Load: {
-      LoadInst *LI = cast<LoadInst>(I);
-
-      if (VF == 1)
-        return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
-                                     LI->getAlignment(),
-                                     LI->getPointerAddressSpace());
-
-      // Scalarized loads.
-      if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
-        unsigned Cost = 0;
-        unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
-        // The cost of inserting the loaded value into the result vector.
-        Cost += VF * (InCost);
-        // The cost of the scalar stores.
-        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
-                                           RetTy->getScalarType(),
-                                           LI->getAlignment(),
-                                           LI->getPointerAddressSpace());
-        return Cost;
+  case Instruction::GetElementPtr:
+    // We mark this instruction as zero-cost because the cost of GEPs in
+    // vectorized code depends on whether the corresponding memory instruction
+    // is scalarized or not. Therefore, we handle GEPs with the memory
+    // instruction cost.
+    return 0;
+  case Instruction::Br: {
+    return TTI.getCFInstrCost(I->getOpcode());
+  }
+  case Instruction::PHI:
+    //TODO: IF-converted IFs become selects.
+    return 0;
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    // Certain instructions can be cheaper to vectorize if they have a constant
+    // second vector operand. One example of this are shifts on x86.
+    TargetTransformInfo::OperandValueKind Op1VK =
+      TargetTransformInfo::OK_AnyValue;
+    TargetTransformInfo::OperandValueKind Op2VK =
+      TargetTransformInfo::OK_AnyValue;
+
+    if (isa<ConstantInt>(I->getOperand(1)))
+      Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+
+    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+  }
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+    bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+    Type *CondTy = SI->getCondition()->getType();
+    if (!ScalarCond)
+      CondTy = VectorType::get(CondTy, VF);
+
+    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+  }
+  case Instruction::ICmp:
+  case Instruction::FCmp: {
+    Type *ValTy = I->getOperand(0)->getType();
+    VectorTy = ToVectorTy(ValTy, VF);
+    return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
+  }
+  case Instruction::Store:
+  case Instruction::Load: {
+    StoreInst *SI = dyn_cast<StoreInst>(I);
+    LoadInst *LI = dyn_cast<LoadInst>(I);
+    Type *ValTy = (SI ? SI->getValueOperand()->getType() :
+                   LI->getType());
+    VectorTy = ToVectorTy(ValTy, VF);
+
+    unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
+    unsigned AS = SI ? SI->getPointerAddressSpace() :
+      LI->getPointerAddressSpace();
+    Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
+    // We add the cost of address computation here instead of with the gep
+    // instruction because only here we know whether the operation is
+    // scalarized.
+    if (VF == 1)
+      return TTI.getAddressComputationCost(VectorTy) +
+        TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+
+    // Scalarized loads/stores.
+    int Stride = Legal->isConsecutivePtr(Ptr);
+    bool Reverse = Stride < 0;
+    if (0 == Stride) {
+      unsigned Cost = 0;
+      // The cost of extracting from the value vector and pointer vector.
+      Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
+      for (unsigned i = 0; i < VF; ++i) {
+        //  The cost of extracting the pointer operand.
+        Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
+        // In case of STORE, the cost of ExtractElement from the vector.
+        // In case of LOAD, the cost of InsertElement into the returned
+        // vector.
+        Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement :
+                                            Instruction::InsertElement,
+                                            VectorTy, i);
       }
 
-      // Wide loads.
-      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
-                                   LI->getPointerAddressSpace());
-    }
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::FPExt:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::SIToFP:
-    case Instruction::UIToFP:
-    case Instruction::Trunc:
-    case Instruction::FPTrunc:
-    case Instruction::BitCast: {
-      Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
-      return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+      // The cost of the scalar loads/stores.
+      Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
+      Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
+                                       Alignment, AS);
+      return Cost;
     }
-    default: {
-      // We are scalarizing the instruction. Return the cost of the scalar
-      // instruction, plus the cost of insert and extract into vector
-      // elements, times the vector width.
-      unsigned Cost = 0;
 
-      bool IsVoid = RetTy->isVoidTy();
+    // Wide load/stores.
+    unsigned Cost = TTI.getAddressComputationCost(VectorTy);
+    Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
-      unsigned InsCost = (IsVoid ? 0 :
-                          VTTI->getInstrCost(Instruction::InsertElement,
-                                             VectorTy));
-
-      unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
-                                            VectorTy);
+    if (Reverse)
+      Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
+                                  VectorTy, 0);
+    return Cost;
+  }
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    // We optimize the truncation of induction variable.
+    // The cost of these is the same as the scalar operation.
+    if (I->getOpcode() == Instruction::Trunc &&
+        Legal->isInductionVariable(I->getOperand(0)))
+      return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
+                                  I->getOperand(0)->getType());
+
+    Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+    return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+  }
+  case Instruction::Call: {
+    CallInst *CI = cast<CallInst>(I);
+    Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+    assert(ID && "Not an intrinsic call!");
+    Type *RetTy = ToVectorTy(CI->getType(), VF);
+    SmallVector<Type*, 4> Tys;
+    for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+      Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+    return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+  }
+  default: {
+    // We are scalarizing the instruction. Return the cost of the scalar
+    // instruction, plus the cost of insert and extract into vector
+    // elements, times the vector width.
+    unsigned Cost = 0;
+
+    if (!RetTy->isVoidTy() && VF != 1) {
+      unsigned InsCost = TTI.getVectorInstrCost(Instruction::InsertElement,
+                                                VectorTy);
+      unsigned ExtCost = TTI.getVectorInstrCost(Instruction::ExtractElement,
+                                                VectorTy);
 
       // The cost of inserting the results plus extracting each one of the
       // operands.
       Cost += VF * (InsCost + ExtCost * I->getNumOperands());
-
-      // The cost of executing VF copies of the scalar instruction.
-      Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
-      return Cost;
     }
+
+    // The cost of executing VF copies of the scalar instruction. This opcode
+    // is unknown. Assume that it is the same as 'mul'.
+    Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
+    return Cost;
+  }
   }// end of switch.
 }
 
@@ -1923,12 +3476,11 @@ Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
   return VectorType::get(Scalar, VF);
 }
 
-} // namespace
-
 char LoopVectorize::ID = 0;
 static const char lv_name[] = "Loop Vectorization";
 INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
@@ -1939,3 +3491,14 @@ namespace llvm {
   }
 }
 
+bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
+  // Check for a store.
+  if (StoreInst *ST = dyn_cast<StoreInst>(Inst))
+    return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0;
+
+  // Check for a load.
+  if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+    return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
+
+  return false;
+}
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index d26973a7b380..19eefd2f87e0 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-//===-- Vectorize.cpp -----------------------------------------------------===//
+   //===-- Vectorize.cpp -----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,13 +13,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm-c/Transforms/Vectorize.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Vectorize.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
 
 using namespace llvm;
 
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
deleted file mode 100644
index b72c17f667fb..000000000000
--- a/lib/VMCore/AsmWriter.cpp
+++ /dev/null
@@ -1,2160 +0,0 @@
-//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This library implements the functionality defined in llvm/Assembly/Writer.h
-//
-// Note that these routines must be extremely tolerant of various errors in the
-// LLVM code, because it can be used for debugging transformations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/AssemblyAnnotationWriter.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/Module.h"
-#include "llvm/TypeFinder.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/FormattedStream.h"
-#include <algorithm>
-#include <cctype>
-using namespace llvm;
-
-// Make virtual table appear in this compilation unit.
-AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
-
-//===----------------------------------------------------------------------===//
-// Helper Functions
-//===----------------------------------------------------------------------===//
-
-static const Module *getModuleFromVal(const Value *V) {
-  if (const Argument *MA = dyn_cast<Argument>(V))
-    return MA->getParent() ? MA->getParent()->getParent() : 0;
-
-  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
-    return BB->getParent() ? BB->getParent()->getParent() : 0;
-
-  if (const Instruction *I = dyn_cast<Instruction>(V)) {
-    const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
-    return M ? M->getParent() : 0;
-  }
-
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return GV->getParent();
-  return 0;
-}
-
-static void PrintCallingConv(unsigned cc, raw_ostream &Out)
-{
-  switch (cc) {
-    case CallingConv::Fast:         Out << "fastcc"; break;
-    case CallingConv::Cold:         Out << "coldcc"; break;
-    case CallingConv::X86_StdCall:  Out << "x86_stdcallcc"; break;
-    case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
-    case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
-    case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
-    case CallingConv::ARM_APCS:     Out << "arm_apcscc"; break;
-    case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc"; break;
-    case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc"; break;
-    case CallingConv::MSP430_INTR:  Out << "msp430_intrcc"; break;
-    case CallingConv::PTX_Kernel:   Out << "ptx_kernel"; break;
-    case CallingConv::PTX_Device:   Out << "ptx_device"; break;
-    default:                        Out << "cc" << cc; break;
-  }
-}
- 
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
-  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
-    unsigned char C = Name[i];
-    if (isprint(C) && C != '\\' && C != '"')
-      Out << C;
-    else
-      Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
-  }
-}
-
-enum PrefixType {
-  GlobalPrefix,
-  LabelPrefix,
-  LocalPrefix,
-  NoPrefix
-};
-
-/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
-/// prefixed with % (if the string only contains simple characters) or is
-/// surrounded with ""'s (if it has special chars in it).  Print it out.
-static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
-  assert(!Name.empty() && "Cannot get empty name!");
-  switch (Prefix) {
-  case NoPrefix: break;
-  case GlobalPrefix: OS << '@'; break;
-  case LabelPrefix:  break;
-  case LocalPrefix:  OS << '%'; break;
-  }
-
-  // Scan the name to see if it needs quotes first.
-  bool NeedsQuotes = isdigit(Name[0]);
-  if (!NeedsQuotes) {
-    for (unsigned i = 0, e = Name.size(); i != e; ++i) {
-      // By making this unsigned, the value passed in to isalnum will always be
-      // in the range 0-255.  This is important when building with MSVC because
-      // its implementation will assert.  This situation can arise when dealing
-      // with UTF-8 multibyte characters.
-      unsigned char C = Name[i];
-      if (!isalnum(C) && C != '-' && C != '.' && C != '_') {
-        NeedsQuotes = true;
-        break;
-      }
-    }
-  }
-
-  // If we didn't need any quotes, just write out the name in one blast.
-  if (!NeedsQuotes) {
-    OS << Name;
-    return;
-  }
-
-  // Okay, we need quotes.  Output the quotes and escape any scary characters as
-  // needed.
-  OS << '"';
-  PrintEscapedString(Name, OS);
-  OS << '"';
-}
-
-/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
-/// prefixed with % (if the string only contains simple characters) or is
-/// surrounded with ""'s (if it has special chars in it).  Print it out.
-static void PrintLLVMName(raw_ostream &OS, const Value *V) {
-  PrintLLVMName(OS, V->getName(),
-                isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
-}
-
-//===----------------------------------------------------------------------===//
-// TypePrinting Class: Type printing machinery
-//===----------------------------------------------------------------------===//
-
-/// TypePrinting - Type printing machinery.
-namespace {
-class TypePrinting {
-  TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION;
-  void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION;
-public:
-
-  /// NamedTypes - The named types that are used by the current module.
-  TypeFinder NamedTypes;
-
-  /// NumberedTypes - The numbered types, along with their value.
-  DenseMap<StructType*, unsigned> NumberedTypes;
-
-
-  TypePrinting() {}
-  ~TypePrinting() {}
-
-  void incorporateTypes(const Module &M);
-
-  void print(Type *Ty, raw_ostream &OS);
-
-  void printStructBody(StructType *Ty, raw_ostream &OS);
-};
-} // end anonymous namespace.
-
-
-void TypePrinting::incorporateTypes(const Module &M) {
-  NamedTypes.run(M, false);
-
-  // The list of struct types we got back includes all the struct types, split
-  // the unnamed ones out to a numbering and remove the anonymous structs.
-  unsigned NextNumber = 0;
-
-  std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
-  for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
-    StructType *STy = *I;
-
-    // Ignore anonymous types.
-    if (STy->isLiteral())
-      continue;
-
-    if (STy->getName().empty())
-      NumberedTypes[STy] = NextNumber++;
-    else
-      *NextToUse++ = STy;
-  }
-
-  NamedTypes.erase(NextToUse, NamedTypes.end());
-}
-
-
-/// CalcTypeName - Write the specified type to the specified raw_ostream, making
-/// use of type names or up references to shorten the type name where possible.
-void TypePrinting::print(Type *Ty, raw_ostream &OS) {
-  switch (Ty->getTypeID()) {
-  case Type::VoidTyID:      OS << "void"; break;
-  case Type::HalfTyID:      OS << "half"; break;
-  case Type::FloatTyID:     OS << "float"; break;
-  case Type::DoubleTyID:    OS << "double"; break;
-  case Type::X86_FP80TyID:  OS << "x86_fp80"; break;
-  case Type::FP128TyID:     OS << "fp128"; break;
-  case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
-  case Type::LabelTyID:     OS << "label"; break;
-  case Type::MetadataTyID:  OS << "metadata"; break;
-  case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
-  case Type::IntegerTyID:
-    OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
-    return;
-
-  case Type::FunctionTyID: {
-    FunctionType *FTy = cast<FunctionType>(Ty);
-    print(FTy->getReturnType(), OS);
-    OS << " (";
-    for (FunctionType::param_iterator I = FTy->param_begin(),
-         E = FTy->param_end(); I != E; ++I) {
-      if (I != FTy->param_begin())
-        OS << ", ";
-      print(*I, OS);
-    }
-    if (FTy->isVarArg()) {
-      if (FTy->getNumParams()) OS << ", ";
-      OS << "...";
-    }
-    OS << ')';
-    return;
-  }
-  case Type::StructTyID: {
-    StructType *STy = cast<StructType>(Ty);
-
-    if (STy->isLiteral())
-      return printStructBody(STy, OS);
-
-    if (!STy->getName().empty())
-      return PrintLLVMName(OS, STy->getName(), LocalPrefix);
-
-    DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
-    if (I != NumberedTypes.end())
-      OS << '%' << I->second;
-    else  // Not enumerated, print the hex address.
-      OS << "%\"type " << STy << '\"';
-    return;
-  }
-  case Type::PointerTyID: {
-    PointerType *PTy = cast<PointerType>(Ty);
-    print(PTy->getElementType(), OS);
-    if (unsigned AddressSpace = PTy->getAddressSpace())
-      OS << " addrspace(" << AddressSpace << ')';
-    OS << '*';
-    return;
-  }
-  case Type::ArrayTyID: {
-    ArrayType *ATy = cast<ArrayType>(Ty);
-    OS << '[' << ATy->getNumElements() << " x ";
-    print(ATy->getElementType(), OS);
-    OS << ']';
-    return;
-  }
-  case Type::VectorTyID: {
-    VectorType *PTy = cast<VectorType>(Ty);
-    OS << "<" << PTy->getNumElements() << " x ";
-    print(PTy->getElementType(), OS);
-    OS << '>';
-    return;
-  }
-  default:
-    OS << "<unrecognized-type>";
-    return;
-  }
-}
-
-void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
-  if (STy->isOpaque()) {
-    OS << "opaque";
-    return;
-  }
-
-  if (STy->isPacked())
-    OS << '<';
-
-  if (STy->getNumElements() == 0) {
-    OS << "{}";
-  } else {
-    StructType::element_iterator I = STy->element_begin();
-    OS << "{ ";
-    print(*I++, OS);
-    for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
-      OS << ", ";
-      print(*I, OS);
-    }
-
-    OS << " }";
-  }
-  if (STy->isPacked())
-    OS << '>';
-}
-
-
-
-//===----------------------------------------------------------------------===//
-// SlotTracker Class: Enumerate slot numbers for unnamed values
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-/// This class provides computation of slot numbers for LLVM Assembly writing.
-///
-class SlotTracker {
-public:
-  /// ValueMap - A mapping of Values to slot numbers.
-  typedef DenseMap<const Value*, unsigned> ValueMap;
-
-private:
-  /// TheModule - The module for which we are holding slot numbers.
-  const Module* TheModule;
-
-  /// TheFunction - The function for which we are holding slot numbers.
-  const Function* TheFunction;
-  bool FunctionProcessed;
-
-  /// mMap - The slot map for the module level data.
-  ValueMap mMap;
-  unsigned mNext;
-
-  /// fMap - The slot map for the function level data.
-  ValueMap fMap;
-  unsigned fNext;
-
-  /// mdnMap - Map for MDNodes.
-  DenseMap<const MDNode*, unsigned> mdnMap;
-  unsigned mdnNext;
-public:
-  /// Construct from a module
-  explicit SlotTracker(const Module *M);
-  /// Construct from a function, starting out in incorp state.
-  explicit SlotTracker(const Function *F);
-
-  /// Return the slot number of the specified value in it's type
-  /// plane.  If something is not in the SlotTracker, return -1.
-  int getLocalSlot(const Value *V);
-  int getGlobalSlot(const GlobalValue *V);
-  int getMetadataSlot(const MDNode *N);
-
-  /// If you'd like to deal with a function instead of just a module, use
-  /// this method to get its data into the SlotTracker.
-  void incorporateFunction(const Function *F) {
-    TheFunction = F;
-    FunctionProcessed = false;
-  }
-
-  /// After calling incorporateFunction, use this method to remove the
-  /// most recently incorporated function from the SlotTracker. This
-  /// will reset the state of the machine back to just the module contents.
-  void purgeFunction();
-
-  /// MDNode map iterators.
-  typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
-  mdn_iterator mdn_begin() { return mdnMap.begin(); }
-  mdn_iterator mdn_end() { return mdnMap.end(); }
-  unsigned mdn_size() const { return mdnMap.size(); }
-  bool mdn_empty() const { return mdnMap.empty(); }
-
-  /// This function does the actual initialization.
-  inline void initialize();
-
-  // Implementation Details
-private:
-  /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
-  void CreateModuleSlot(const GlobalValue *V);
-
-  /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
-  void CreateMetadataSlot(const MDNode *N);
-
-  /// CreateFunctionSlot - Insert the specified Value* into the slot table.
-  void CreateFunctionSlot(const Value *V);
-
-  /// Add all of the module level global variables (and their initializers)
-  /// and function declarations, but not the contents of those functions.
-  void processModule();
-
-  /// Add all of the functions arguments, basic blocks, and instructions.
-  void processFunction();
-
-  SlotTracker(const SlotTracker &) LLVM_DELETED_FUNCTION;
-  void operator=(const SlotTracker &) LLVM_DELETED_FUNCTION;
-};
-
-}  // end anonymous namespace
-
-
-static SlotTracker *createSlotTracker(const Value *V) {
-  if (const Argument *FA = dyn_cast<Argument>(V))
-    return new SlotTracker(FA->getParent());
-
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    if (I->getParent())
-      return new SlotTracker(I->getParent()->getParent());
-
-  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
-    return new SlotTracker(BB->getParent());
-
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return new SlotTracker(GV->getParent());
-
-  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
-    return new SlotTracker(GA->getParent());
-
-  if (const Function *Func = dyn_cast<Function>(V))
-    return new SlotTracker(Func);
-
-  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
-    if (!MD->isFunctionLocal())
-      return new SlotTracker(MD->getFunction());
-
-    return new SlotTracker((Function *)0);
-  }
-
-  return 0;
-}
-
-#if 0
-#define ST_DEBUG(X) dbgs() << X
-#else
-#define ST_DEBUG(X)
-#endif
-
-// Module level constructor. Causes the contents of the Module (sans functions)
-// to be added to the slot table.
-SlotTracker::SlotTracker(const Module *M)
-  : TheModule(M), TheFunction(0), FunctionProcessed(false),
-    mNext(0), fNext(0),  mdnNext(0) {
-}
-
-// Function level constructor. Causes the contents of the Module and the one
-// function provided to be added to the slot table.
-SlotTracker::SlotTracker(const Function *F)
-  : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
-    mNext(0), fNext(0), mdnNext(0) {
-}
-
-inline void SlotTracker::initialize() {
-  if (TheModule) {
-    processModule();
-    TheModule = 0; ///< Prevent re-processing next time we're called.
-  }
-
-  if (TheFunction && !FunctionProcessed)
-    processFunction();
-}
-
-// Iterate through all the global variables, functions, and global
-// variable initializers and create slots for them.
-void SlotTracker::processModule() {
-  ST_DEBUG("begin processModule!\n");
-
-  // Add all of the unnamed global variables to the value table.
-  for (Module::const_global_iterator I = TheModule->global_begin(),
-         E = TheModule->global_end(); I != E; ++I) {
-    if (!I->hasName())
-      CreateModuleSlot(I);
-  }
-
-  // Add metadata used by named metadata.
-  for (Module::const_named_metadata_iterator
-         I = TheModule->named_metadata_begin(),
-         E = TheModule->named_metadata_end(); I != E; ++I) {
-    const NamedMDNode *NMD = I;
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      CreateMetadataSlot(NMD->getOperand(i));
-  }
-
-  // Add all the unnamed functions to the table.
-  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
-       I != E; ++I)
-    if (!I->hasName())
-      CreateModuleSlot(I);
-
-  ST_DEBUG("end processModule!\n");
-}
-
-// Process the arguments, basic blocks, and instructions  of a function.
-void SlotTracker::processFunction() {
-  ST_DEBUG("begin processFunction!\n");
-  fNext = 0;
-
-  // Add all the function arguments with no names.
-  for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
-      AE = TheFunction->arg_end(); AI != AE; ++AI)
-    if (!AI->hasName())
-      CreateFunctionSlot(AI);
-
-  ST_DEBUG("Inserting Instructions:\n");
-
-  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
-
-  // Add all of the basic blocks and instructions with no names.
-  for (Function::const_iterator BB = TheFunction->begin(),
-       E = TheFunction->end(); BB != E; ++BB) {
-    if (!BB->hasName())
-      CreateFunctionSlot(BB);
-
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
-         ++I) {
-      if (!I->getType()->isVoidTy() && !I->hasName())
-        CreateFunctionSlot(I);
-
-      // Intrinsics can directly use metadata.  We allow direct calls to any
-      // llvm.foo function here, because the target may not be linked into the
-      // optimizer.
-      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-        if (Function *F = CI->getCalledFunction())
-          if (F->getName().startswith("llvm."))
-            for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-              if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
-                CreateMetadataSlot(N);
-      }
-
-      // Process metadata attached with this instruction.
-      I->getAllMetadata(MDForInst);
-      for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
-        CreateMetadataSlot(MDForInst[i].second);
-      MDForInst.clear();
-    }
-  }
-
-  FunctionProcessed = true;
-
-  ST_DEBUG("end processFunction!\n");
-}
-
-/// Clean up after incorporating a function. This is the only way to get out of
-/// the function incorporation state that affects get*Slot/Create*Slot. Function
-/// incorporation state is indicated by TheFunction != 0.
-void SlotTracker::purgeFunction() {
-  ST_DEBUG("begin purgeFunction!\n");
-  fMap.clear(); // Simply discard the function level map
-  TheFunction = 0;
-  FunctionProcessed = false;
-  ST_DEBUG("end purgeFunction!\n");
-}
-
-/// getGlobalSlot - Get the slot number of a global value.
-int SlotTracker::getGlobalSlot(const GlobalValue *V) {
-  // Check for uninitialized state and do lazy initialization.
-  initialize();
-
-  // Find the value in the module map
-  ValueMap::iterator MI = mMap.find(V);
-  return MI == mMap.end() ? -1 : (int)MI->second;
-}
-
-/// getMetadataSlot - Get the slot number of a MDNode.
-int SlotTracker::getMetadataSlot(const MDNode *N) {
-  // Check for uninitialized state and do lazy initialization.
-  initialize();
-
-  // Find the MDNode in the module map
-  mdn_iterator MI = mdnMap.find(N);
-  return MI == mdnMap.end() ? -1 : (int)MI->second;
-}
-
-
-/// getLocalSlot - Get the slot number for a value that is local to a function.
-int SlotTracker::getLocalSlot(const Value *V) {
-  assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
-
-  // Check for uninitialized state and do lazy initialization.
-  initialize();
-
-  ValueMap::iterator FI = fMap.find(V);
-  return FI == fMap.end() ? -1 : (int)FI->second;
-}
-
-
-/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
-void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
-  assert(V && "Can't insert a null Value into SlotTracker!");
-  assert(!V->getType()->isVoidTy() && "Doesn't need a slot!");
-  assert(!V->hasName() && "Doesn't need a slot!");
-
-  unsigned DestSlot = mNext++;
-  mMap[V] = DestSlot;
-
-  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
-           DestSlot << " [");
-  // G = Global, F = Function, A = Alias, o = other
-  ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
-            (isa<Function>(V) ? 'F' :
-             (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
-}
-
-/// CreateSlot - Create a new slot for the specified value if it has no name.
-void SlotTracker::CreateFunctionSlot(const Value *V) {
-  assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!");
-
-  unsigned DestSlot = fNext++;
-  fMap[V] = DestSlot;
-
-  // G = Global, F = Function, o = other
-  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
-           DestSlot << " [o]\n");
-}
-
-/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
-void SlotTracker::CreateMetadataSlot(const MDNode *N) {
-  assert(N && "Can't insert a null Value into SlotTracker!");
-
-  // Don't insert if N is a function-local metadata, these are always printed
-  // inline.
-  if (!N->isFunctionLocal()) {
-    mdn_iterator I = mdnMap.find(N);
-    if (I != mdnMap.end())
-      return;
-
-    unsigned DestSlot = mdnNext++;
-    mdnMap[N] = DestSlot;
-  }
-
-  // Recursively add any MDNodes referenced by operands.
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (const MDNode *Op = dyn_cast_or_null<MDNode>(N->getOperand(i)))
-      CreateMetadataSlot(Op);
-}
-
-//===----------------------------------------------------------------------===//
-// AsmWriter Implementation
-//===----------------------------------------------------------------------===//
-
-static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
-                                   TypePrinting *TypePrinter,
-                                   SlotTracker *Machine,
-                                   const Module *Context);
-
-
-
-static const char *getPredicateText(unsigned predicate) {
-  const char * pred = "unknown";
-  switch (predicate) {
-  case FCmpInst::FCMP_FALSE: pred = "false"; break;
-  case FCmpInst::FCMP_OEQ:   pred = "oeq"; break;
-  case FCmpInst::FCMP_OGT:   pred = "ogt"; break;
-  case FCmpInst::FCMP_OGE:   pred = "oge"; break;
-  case FCmpInst::FCMP_OLT:   pred = "olt"; break;
-  case FCmpInst::FCMP_OLE:   pred = "ole"; break;
-  case FCmpInst::FCMP_ONE:   pred = "one"; break;
-  case FCmpInst::FCMP_ORD:   pred = "ord"; break;
-  case FCmpInst::FCMP_UNO:   pred = "uno"; break;
-  case FCmpInst::FCMP_UEQ:   pred = "ueq"; break;
-  case FCmpInst::FCMP_UGT:   pred = "ugt"; break;
-  case FCmpInst::FCMP_UGE:   pred = "uge"; break;
-  case FCmpInst::FCMP_ULT:   pred = "ult"; break;
-  case FCmpInst::FCMP_ULE:   pred = "ule"; break;
-  case FCmpInst::FCMP_UNE:   pred = "une"; break;
-  case FCmpInst::FCMP_TRUE:  pred = "true"; break;
-  case ICmpInst::ICMP_EQ:    pred = "eq"; break;
-  case ICmpInst::ICMP_NE:    pred = "ne"; break;
-  case ICmpInst::ICMP_SGT:   pred = "sgt"; break;
-  case ICmpInst::ICMP_SGE:   pred = "sge"; break;
-  case ICmpInst::ICMP_SLT:   pred = "slt"; break;
-  case ICmpInst::ICMP_SLE:   pred = "sle"; break;
-  case ICmpInst::ICMP_UGT:   pred = "ugt"; break;
-  case ICmpInst::ICMP_UGE:   pred = "uge"; break;
-  case ICmpInst::ICMP_ULT:   pred = "ult"; break;
-  case ICmpInst::ICMP_ULE:   pred = "ule"; break;
-  }
-  return pred;
-}
-
-static void writeAtomicRMWOperation(raw_ostream &Out,
-                                    AtomicRMWInst::BinOp Op) {
-  switch (Op) {
-  default: Out << " <unknown operation " << Op << ">"; break;
-  case AtomicRMWInst::Xchg: Out << " xchg"; break;
-  case AtomicRMWInst::Add:  Out << " add"; break;
-  case AtomicRMWInst::Sub:  Out << " sub"; break;
-  case AtomicRMWInst::And:  Out << " and"; break;
-  case AtomicRMWInst::Nand: Out << " nand"; break;
-  case AtomicRMWInst::Or:   Out << " or"; break;
-  case AtomicRMWInst::Xor:  Out << " xor"; break;
-  case AtomicRMWInst::Max:  Out << " max"; break;
-  case AtomicRMWInst::Min:  Out << " min"; break;
-  case AtomicRMWInst::UMax: Out << " umax"; break;
-  case AtomicRMWInst::UMin: Out << " umin"; break;
-  }
-}
-
-static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
-  if (const OverflowingBinaryOperator *OBO =
-        dyn_cast<OverflowingBinaryOperator>(U)) {
-    if (OBO->hasNoUnsignedWrap())
-      Out << " nuw";
-    if (OBO->hasNoSignedWrap())
-      Out << " nsw";
-  } else if (const PossiblyExactOperator *Div =
-               dyn_cast<PossiblyExactOperator>(U)) {
-    if (Div->isExact())
-      Out << " exact";
-  } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-    if (GEP->isInBounds())
-      Out << " inbounds";
-  }
-}
-
-static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
-                                  TypePrinting &TypePrinter,
-                                  SlotTracker *Machine,
-                                  const Module *Context) {
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (CI->getType()->isIntegerTy(1)) {
-      Out << (CI->getZExtValue() ? "true" : "false");
-      return;
-    }
-    Out << CI->getValue();
-    return;
-  }
-
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle ||
-        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) {
-      // We would like to output the FP constant value in exponential notation,
-      // but we cannot do this if doing so will lose precision.  Check here to
-      // make sure that we only output it in exponential format if we can parse
-      // the value back and get the same value.
-      //
-      bool ignored;
-      bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
-      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
-      bool isInf = CFP->getValueAPF().isInfinity();
-      bool isNaN = CFP->getValueAPF().isNaN();
-      if (!isHalf && !isInf && !isNaN) {
-        double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
-                                CFP->getValueAPF().convertToFloat();
-        SmallString<128> StrVal;
-        raw_svector_ostream(StrVal) << Val;
-
-        // Check to make sure that the stringized number is not some string like
-        // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
-        // that the string matches the "[-+]?[0-9]" regex.
-        //
-        if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
-            ((StrVal[0] == '-' || StrVal[0] == '+') &&
-             (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
-          // Reparse stringized version!
-          if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
-            Out << StrVal.str();
-            return;
-          }
-        }
-      }
-      // Otherwise we could not reparse it to exactly the same value, so we must
-      // output the string in hexadecimal format!  Note that loading and storing
-      // floating point types changes the bits of NaNs on some hosts, notably
-      // x86, so we must not use these types.
-      assert(sizeof(double) == sizeof(uint64_t) &&
-             "assuming that double is 64 bits!");
-      char Buffer[40];
-      APFloat apf = CFP->getValueAPF();
-      // Halves and floats are represented in ASCII IR as double, convert.
-      if (!isDouble)
-        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                          &ignored);
-      Out << "0x" <<
-              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
-                            Buffer+40);
-      return;
-    }
-
-    // Either half, or some form of long double.
-    // These appear as a magic letter identifying the type, then a
-    // fixed number of hex digits.
-    Out << "0x";
-    // Bit position, in the current word, of the next nibble to print.
-    int shiftcount;
-
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
-      Out << 'K';
-      // api needed to prevent premature destruction
-      APInt api = CFP->getValueAPF().bitcastToAPInt();
-      const uint64_t* p = api.getRawData();
-      uint64_t word = p[1];
-      shiftcount = 12;
-      int width = api.getBitWidth();
-      for (int j=0; j<width; j+=4, shiftcount-=4) {
-        unsigned int nibble = (word>>shiftcount) & 15;
-        if (nibble < 10)
-          Out << (unsigned char)(nibble + '0');
-        else
-          Out << (unsigned char)(nibble - 10 + 'A');
-        if (shiftcount == 0 && j+4 < width) {
-          word = *p;
-          shiftcount = 64;
-          if (width-j-4 < 64)
-            shiftcount = width-j-4;
-        }
-      }
-      return;
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) {
-      shiftcount = 60;
-      Out << 'L';
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) {
-      shiftcount = 60;
-      Out << 'M';
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) {
-      shiftcount = 12;
-      Out << 'H';
-    } else
-      llvm_unreachable("Unsupported floating point type");
-    // api needed to prevent premature destruction
-    APInt api = CFP->getValueAPF().bitcastToAPInt();
-    const uint64_t* p = api.getRawData();
-    uint64_t word = *p;
-    int width = api.getBitWidth();
-    for (int j=0; j<width; j+=4, shiftcount-=4) {
-      unsigned int nibble = (word>>shiftcount) & 15;
-      if (nibble < 10)
-        Out << (unsigned char)(nibble + '0');
-      else
-        Out << (unsigned char)(nibble - 10 + 'A');
-      if (shiftcount == 0 && j+4 < width) {
-        word = *(++p);
-        shiftcount = 64;
-        if (width-j-4 < 64)
-          shiftcount = width-j-4;
-      }
-    }
-    return;
-  }
-
-  if (isa<ConstantAggregateZero>(CV)) {
-    Out << "zeroinitializer";
-    return;
-  }
-
-  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
-    Out << "blockaddress(";
-    WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
-                           Context);
-    Out << ", ";
-    WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
-                           Context);
-    Out << ")";
-    return;
-  }
-
-  if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
-    Type *ETy = CA->getType()->getElementType();
-    Out << '[';
-    TypePrinter.print(ETy, Out);
-    Out << ' ';
-    WriteAsOperandInternal(Out, CA->getOperand(0),
-                           &TypePrinter, Machine,
-                           Context);
-    for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
-      Out << ", ";
-      TypePrinter.print(ETy, Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
-                             Context);
-    }
-    Out << ']';
-    return;
-  }
-  
-  if (const ConstantDataArray *CA = dyn_cast<ConstantDataArray>(CV)) {
-    // As a special case, print the array as a string if it is an array of
-    // i8 with ConstantInt values.
-    if (CA->isString()) {
-      Out << "c\"";
-      PrintEscapedString(CA->getAsString(), Out);
-      Out << '"';
-      return;
-    }
-
-    Type *ETy = CA->getType()->getElementType();
-    Out << '[';
-    TypePrinter.print(ETy, Out);
-    Out << ' ';
-    WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
-                           &TypePrinter, Machine,
-                           Context);
-    for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
-      Out << ", ";
-      TypePrinter.print(ETy, Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
-                             Machine, Context);
-    }
-    Out << ']';
-    return;
-  }
-
-
-  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
-    if (CS->getType()->isPacked())
-      Out << '<';
-    Out << '{';
-    unsigned N = CS->getNumOperands();
-    if (N) {
-      Out << ' ';
-      TypePrinter.print(CS->getOperand(0)->getType(), Out);
-      Out << ' ';
-
-      WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
-                             Context);
-
-      for (unsigned i = 1; i < N; i++) {
-        Out << ", ";
-        TypePrinter.print(CS->getOperand(i)->getType(), Out);
-        Out << ' ';
-
-        WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
-                               Context);
-      }
-      Out << ' ';
-    }
-
-    Out << '}';
-    if (CS->getType()->isPacked())
-      Out << '>';
-    return;
-  }
-
-  if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
-    Type *ETy = CV->getType()->getVectorElementType();
-    Out << '<';
-    TypePrinter.print(ETy, Out);
-    Out << ' ';
-    WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
-                           Machine, Context);
-    for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){
-      Out << ", ";
-      TypePrinter.print(ETy, Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
-                             Machine, Context);
-    }
-    Out << '>';
-    return;
-  }
-
-  if (isa<ConstantPointerNull>(CV)) {
-    Out << "null";
-    return;
-  }
-
-  if (isa<UndefValue>(CV)) {
-    Out << "undef";
-    return;
-  }
-
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
-    Out << CE->getOpcodeName();
-    WriteOptimizationInfo(Out, CE);
-    if (CE->isCompare())
-      Out << ' ' << getPredicateText(CE->getPredicate());
-    Out << " (";
-
-    for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
-      TypePrinter.print((*OI)->getType(), Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
-      if (OI+1 != CE->op_end())
-        Out << ", ";
-    }
-
-    if (CE->hasIndices()) {
-      ArrayRef<unsigned> Indices = CE->getIndices();
-      for (unsigned i = 0, e = Indices.size(); i != e; ++i)
-        Out << ", " << Indices[i];
-    }
-
-    if (CE->isCast()) {
-      Out << " to ";
-      TypePrinter.print(CE->getType(), Out);
-    }
-
-    Out << ')';
-    return;
-  }
-
-  Out << "<placeholder or erroneous Constant>";
-}
-
-static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
-                                    TypePrinting *TypePrinter,
-                                    SlotTracker *Machine,
-                                    const Module *Context) {
-  Out << "!{";
-  for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
-    const Value *V = Node->getOperand(mi);
-    if (V == 0)
-      Out << "null";
-    else {
-      TypePrinter->print(V->getType(), Out);
-      Out << ' ';
-      WriteAsOperandInternal(Out, Node->getOperand(mi),
-                             TypePrinter, Machine, Context);
-    }
-    if (mi + 1 != me)
-      Out << ", ";
-  }
-
-  Out << "}";
-}
-
-
-/// WriteAsOperand - Write the name of the specified value out to the specified
-/// ostream.  This can be useful when you just want to print int %reg126, not
-/// the whole instruction that generated it.
-///
-static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
-                                   TypePrinting *TypePrinter,
-                                   SlotTracker *Machine,
-                                   const Module *Context) {
-  if (V->hasName()) {
-    PrintLLVMName(Out, V);
-    return;
-  }
-
-  const Constant *CV = dyn_cast<Constant>(V);
-  if (CV && !isa<GlobalValue>(CV)) {
-    assert(TypePrinter && "Constants require TypePrinting!");
-    WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
-    return;
-  }
-
-  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
-    Out << "asm ";
-    if (IA->hasSideEffects())
-      Out << "sideeffect ";
-    if (IA->isAlignStack())
-      Out << "alignstack ";
-    // We don't emit the AD_ATT dialect as it's the assumed default.
-    if (IA->getDialect() == InlineAsm::AD_Intel)
-      Out << "inteldialect ";
-    Out << '"';
-    PrintEscapedString(IA->getAsmString(), Out);
-    Out << "\", \"";
-    PrintEscapedString(IA->getConstraintString(), Out);
-    Out << '"';
-    return;
-  }
-
-  if (const MDNode *N = dyn_cast<MDNode>(V)) {
-    if (N->isFunctionLocal()) {
-      // Print metadata inline, not via slot reference number.
-      WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
-      return;
-    }
-
-    if (!Machine) {
-      if (N->isFunctionLocal())
-        Machine = new SlotTracker(N->getFunction());
-      else
-        Machine = new SlotTracker(Context);
-    }
-    int Slot = Machine->getMetadataSlot(N);
-    if (Slot == -1)
-      Out << "<badref>";
-    else
-      Out << '!' << Slot;
-    return;
-  }
-
-  if (const MDString *MDS = dyn_cast<MDString>(V)) {
-    Out << "!\"";
-    PrintEscapedString(MDS->getString(), Out);
-    Out << '"';
-    return;
-  }
-
-  if (V->getValueID() == Value::PseudoSourceValueVal ||
-      V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
-    V->print(Out);
-    return;
-  }
-
-  char Prefix = '%';
-  int Slot;
-  // If we have a SlotTracker, use it.
-  if (Machine) {
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-      Slot = Machine->getGlobalSlot(GV);
-      Prefix = '@';
-    } else {
-      Slot = Machine->getLocalSlot(V);
-
-      // If the local value didn't succeed, then we may be referring to a value
-      // from a different function.  Translate it, as this can happen when using
-      // address of blocks.
-      if (Slot == -1)
-        if ((Machine = createSlotTracker(V))) {
-          Slot = Machine->getLocalSlot(V);
-          delete Machine;
-        }
-    }
-  } else if ((Machine = createSlotTracker(V))) {
-    // Otherwise, create one to get the # and then destroy it.
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-      Slot = Machine->getGlobalSlot(GV);
-      Prefix = '@';
-    } else {
-      Slot = Machine->getLocalSlot(V);
-    }
-    delete Machine;
-    Machine = 0;
-  } else {
-    Slot = -1;
-  }
-
-  if (Slot != -1)
-    Out << Prefix << Slot;
-  else
-    Out << "<badref>";
-}
-
-void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
-                          bool PrintType, const Module *Context) {
-
-  // Fast path: Don't construct and populate a TypePrinting object if we
-  // won't be needing any types printed.
-  if (!PrintType &&
-      ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
-       V->hasName() || isa<GlobalValue>(V))) {
-    WriteAsOperandInternal(Out, V, 0, 0, Context);
-    return;
-  }
-
-  if (Context == 0) Context = getModuleFromVal(V);
-
-  TypePrinting TypePrinter;
-  if (Context)
-    TypePrinter.incorporateTypes(*Context);
-  if (PrintType) {
-    TypePrinter.print(V->getType(), Out);
-    Out << ' ';
-  }
-
-  WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
-}
-
-namespace {
-
-class AssemblyWriter {
-  formatted_raw_ostream &Out;
-  SlotTracker &Machine;
-  const Module *TheModule;
-  TypePrinting TypePrinter;
-  AssemblyAnnotationWriter *AnnotationWriter;
-
-public:
-  inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                        const Module *M,
-                        AssemblyAnnotationWriter *AAW)
-    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
-    if (M)
-      TypePrinter.incorporateTypes(*M);
-  }
-
-  void printMDNodeBody(const MDNode *MD);
-  void printNamedMDNode(const NamedMDNode *NMD);
-
-  void printModule(const Module *M);
-
-  void writeOperand(const Value *Op, bool PrintType);
-  void writeParamOperand(const Value *Operand, Attributes Attrs);
-  void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
-
-  void writeAllMDNodes();
-
-  void printTypeIdentities();
-  void printGlobal(const GlobalVariable *GV);
-  void printAlias(const GlobalAlias *GV);
-  void printFunction(const Function *F);
-  void printArgument(const Argument *FA, Attributes Attrs);
-  void printBasicBlock(const BasicBlock *BB);
-  void printInstruction(const Instruction &I);
-
-private:
-  // printInfoComment - Print a little comment after the instruction indicating
-  // which slot it occupies.
-  void printInfoComment(const Value &V);
-};
-}  // end of anonymous namespace
-
-void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
-  if (Operand == 0) {
-    Out << "<null operand!>";
-    return;
-  }
-  if (PrintType) {
-    TypePrinter.print(Operand->getType(), Out);
-    Out << ' ';
-  }
-  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
-}
-
-void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
-                                 SynchronizationScope SynchScope) {
-  if (Ordering == NotAtomic)
-    return;
-
-  switch (SynchScope) {
-  case SingleThread: Out << " singlethread"; break;
-  case CrossThread: break;
-  }
-
-  switch (Ordering) {
-  default: Out << " <bad ordering " << int(Ordering) << ">"; break;
-  case Unordered: Out << " unordered"; break;
-  case Monotonic: Out << " monotonic"; break;
-  case Acquire: Out << " acquire"; break;
-  case Release: Out << " release"; break;
-  case AcquireRelease: Out << " acq_rel"; break;
-  case SequentiallyConsistent: Out << " seq_cst"; break;
-  }
-}
-
-void AssemblyWriter::writeParamOperand(const Value *Operand,
-                                       Attributes Attrs) {
-  if (Operand == 0) {
-    Out << "<null operand!>";
-    return;
-  }
-
-  // Print the type
-  TypePrinter.print(Operand->getType(), Out);
-  // Print parameter attributes list
-  if (Attrs.hasAttributes())
-    Out << ' ' << Attrs.getAsString();
-  Out << ' ';
-  // Print the operand
-  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
-}
-
-void AssemblyWriter::printModule(const Module *M) {
-  if (!M->getModuleIdentifier().empty() &&
-      // Don't print the ID if it will start a new line (which would
-      // require a comment char before it).
-      M->getModuleIdentifier().find('\n') == std::string::npos)
-    Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
-
-  if (!M->getDataLayout().empty())
-    Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
-  if (!M->getTargetTriple().empty())
-    Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
-
-  if (!M->getModuleInlineAsm().empty()) {
-    // Split the string into lines, to make it easier to read the .ll file.
-    std::string Asm = M->getModuleInlineAsm();
-    size_t CurPos = 0;
-    size_t NewLine = Asm.find_first_of('\n', CurPos);
-    Out << '\n';
-    while (NewLine != std::string::npos) {
-      // We found a newline, print the portion of the asm string from the
-      // last newline up to this newline.
-      Out << "module asm \"";
-      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
-                         Out);
-      Out << "\"\n";
-      CurPos = NewLine+1;
-      NewLine = Asm.find_first_of('\n', CurPos);
-    }
-    std::string rest(Asm.begin()+CurPos, Asm.end());
-    if (!rest.empty()) {
-      Out << "module asm \"";
-      PrintEscapedString(rest, Out);
-      Out << "\"\n";
-    }
-  }
-
-  // Loop over the dependent libraries and emit them.
-  Module::lib_iterator LI = M->lib_begin();
-  Module::lib_iterator LE = M->lib_end();
-  if (LI != LE) {
-    Out << '\n';
-    Out << "deplibs = [ ";
-    while (LI != LE) {
-      Out << '"' << *LI << '"';
-      ++LI;
-      if (LI != LE)
-        Out << ", ";
-    }
-    Out << " ]";
-  }
-
-  printTypeIdentities();
-
-  // Output all globals.
-  if (!M->global_empty()) Out << '\n';
-  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I) {
-    printGlobal(I); Out << '\n';
-  }
-
-  // Output all aliases.
-  if (!M->alias_empty()) Out << "\n";
-  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
-       I != E; ++I)
-    printAlias(I);
-
-  // Output all of the functions.
-  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
-    printFunction(I);
-
-  // Output named metadata.
-  if (!M->named_metadata_empty()) Out << '\n';
-
-  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
-       E = M->named_metadata_end(); I != E; ++I)
-    printNamedMDNode(I);
-
-  // Output metadata.
-  if (!Machine.mdn_empty()) {
-    Out << '\n';
-    writeAllMDNodes();
-  }
-}
-
-void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
-  Out << '!';
-  StringRef Name = NMD->getName();
-  if (Name.empty()) {
-    Out << "<empty name> ";
-  } else {
-    if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' ||
-        Name[0] == '.' || Name[0] == '_')
-      Out << Name[0];
-    else
-      Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
-    for (unsigned i = 1, e = Name.size(); i != e; ++i) {
-      unsigned char C = Name[i];
-      if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_')
-        Out << C;
-      else
-        Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
-    }
-  }
-  Out << " = !{";
-  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-    if (i) Out << ", ";
-    int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
-    if (Slot == -1)
-      Out << "<badref>";
-    else
-      Out << '!' << Slot;
-  }
-  Out << "}\n";
-}
-
-
-static void PrintLinkage(GlobalValue::LinkageTypes LT,
-                         formatted_raw_ostream &Out) {
-  switch (LT) {
-  case GlobalValue::ExternalLinkage: break;
-  case GlobalValue::PrivateLinkage:       Out << "private ";        break;
-  case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
-  case GlobalValue::LinkerPrivateWeakLinkage:
-    Out << "linker_private_weak ";
-    break;
-  case GlobalValue::InternalLinkage:      Out << "internal ";       break;
-  case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
-  case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
-  case GlobalValue::LinkOnceODRAutoHideLinkage:
-    Out << "linkonce_odr_auto_hide ";
-    break;
-  case GlobalValue::WeakAnyLinkage:       Out << "weak ";           break;
-  case GlobalValue::WeakODRLinkage:       Out << "weak_odr ";       break;
-  case GlobalValue::CommonLinkage:        Out << "common ";         break;
-  case GlobalValue::AppendingLinkage:     Out << "appending ";      break;
-  case GlobalValue::DLLImportLinkage:     Out << "dllimport ";      break;
-  case GlobalValue::DLLExportLinkage:     Out << "dllexport ";      break;
-  case GlobalValue::ExternalWeakLinkage:  Out << "extern_weak ";    break;
-  case GlobalValue::AvailableExternallyLinkage:
-    Out << "available_externally ";
-    break;
-  }
-}
-
-
-static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
-                            formatted_raw_ostream &Out) {
-  switch (Vis) {
-  case GlobalValue::DefaultVisibility: break;
-  case GlobalValue::HiddenVisibility:    Out << "hidden "; break;
-  case GlobalValue::ProtectedVisibility: Out << "protected "; break;
-  }
-}
-
-static void PrintThreadLocalModel(GlobalVariable::ThreadLocalMode TLM,
-                                  formatted_raw_ostream &Out) {
-  switch (TLM) {
-    case GlobalVariable::NotThreadLocal:
-      break;
-    case GlobalVariable::GeneralDynamicTLSModel:
-      Out << "thread_local ";
-      break;
-    case GlobalVariable::LocalDynamicTLSModel:
-      Out << "thread_local(localdynamic) ";
-      break;
-    case GlobalVariable::InitialExecTLSModel:
-      Out << "thread_local(initialexec) ";
-      break;
-    case GlobalVariable::LocalExecTLSModel:
-      Out << "thread_local(localexec) ";
-      break;
-  }
-}
-
-void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
-  if (GV->isMaterializable())
-    Out << "; Materializable\n";
-
-  WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
-  Out << " = ";
-
-  if (!GV->hasInitializer() && GV->hasExternalLinkage())
-    Out << "external ";
-
-  PrintLinkage(GV->getLinkage(), Out);
-  PrintVisibility(GV->getVisibility(), Out);
-  PrintThreadLocalModel(GV->getThreadLocalMode(), Out);
-
-  if (unsigned AddressSpace = GV->getType()->getAddressSpace())
-    Out << "addrspace(" << AddressSpace << ") ";
-  if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
-  Out << (GV->isConstant() ? "constant " : "global ");
-  TypePrinter.print(GV->getType()->getElementType(), Out);
-
-  if (GV->hasInitializer()) {
-    Out << ' ';
-    writeOperand(GV->getInitializer(), false);
-  }
-
-  if (GV->hasSection()) {
-    Out << ", section \"";
-    PrintEscapedString(GV->getSection(), Out);
-    Out << '"';
-  }
-  if (GV->getAlignment())
-    Out << ", align " << GV->getAlignment();
-
-  printInfoComment(*GV);
-}
-
-void AssemblyWriter::printAlias(const GlobalAlias *GA) {
-  if (GA->isMaterializable())
-    Out << "; Materializable\n";
-
-  // Don't crash when dumping partially built GA
-  if (!GA->hasName())
-    Out << "<<nameless>> = ";
-  else {
-    PrintLLVMName(Out, GA);
-    Out << " = ";
-  }
-  PrintVisibility(GA->getVisibility(), Out);
-
-  Out << "alias ";
-
-  PrintLinkage(GA->getLinkage(), Out);
-
-  const Constant *Aliasee = GA->getAliasee();
-
-  if (Aliasee == 0) {
-    TypePrinter.print(GA->getType(), Out);
-    Out << " <<NULL ALIASEE>>";
-  } else {
-    writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
-  }
-
-  printInfoComment(*GA);
-  Out << '\n';
-}
-
-void AssemblyWriter::printTypeIdentities() {
-  if (TypePrinter.NumberedTypes.empty() &&
-      TypePrinter.NamedTypes.empty())
-    return;
-
-  Out << '\n';
-
-  // We know all the numbers that each type is used and we know that it is a
-  // dense assignment.  Convert the map to an index table.
-  std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
-  for (DenseMap<StructType*, unsigned>::iterator I =
-       TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
-       I != E; ++I) {
-    assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
-    NumberedTypes[I->second] = I->first;
-  }
-
-  // Emit all numbered types.
-  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
-    Out << '%' << i << " = type ";
-
-    // Make sure we print out at least one level of the type structure, so
-    // that we do not get %2 = type %2
-    TypePrinter.printStructBody(NumberedTypes[i], Out);
-    Out << '\n';
-  }
-
-  for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
-    PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
-    Out << " = type ";
-
-    // Make sure we print out at least one level of the type structure, so
-    // that we do not get %FILE = type %FILE
-    TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
-    Out << '\n';
-  }
-}
-
-/// printFunction - Print all aspects of a function.
-///
-void AssemblyWriter::printFunction(const Function *F) {
-  // Print out the return type and name.
-  Out << '\n';
-
-  if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
-
-  if (F->isMaterializable())
-    Out << "; Materializable\n";
-
-  if (F->isDeclaration())
-    Out << "declare ";
-  else
-    Out << "define ";
-
-  PrintLinkage(F->getLinkage(), Out);
-  PrintVisibility(F->getVisibility(), Out);
-
-  // Print the calling convention.
-  if (F->getCallingConv() != CallingConv::C) {
-    PrintCallingConv(F->getCallingConv(), Out);
-    Out << " ";
-  }
-
-  FunctionType *FT = F->getFunctionType();
-  const AttrListPtr &Attrs = F->getAttributes();
-  Attributes RetAttrs = Attrs.getRetAttributes();
-  if (RetAttrs.hasAttributes())
-    Out <<  Attrs.getRetAttributes().getAsString() << ' ';
-  TypePrinter.print(F->getReturnType(), Out);
-  Out << ' ';
-  WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
-  Out << '(';
-  Machine.incorporateFunction(F);
-
-  // Loop over the arguments, printing them...
-
-  unsigned Idx = 1;
-  if (!F->isDeclaration()) {
-    // If this isn't a declaration, print the argument names as well.
-    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-         I != E; ++I) {
-      // Insert commas as we go... the first arg doesn't get a comma
-      if (I != F->arg_begin()) Out << ", ";
-      printArgument(I, Attrs.getParamAttributes(Idx));
-      Idx++;
-    }
-  } else {
-    // Otherwise, print the types from the function type.
-    for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
-      // Insert commas as we go... the first arg doesn't get a comma
-      if (i) Out << ", ";
-
-      // Output type...
-      TypePrinter.print(FT->getParamType(i), Out);
-
-      Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
-      if (ArgAttrs.hasAttributes())
-        Out << ' ' << ArgAttrs.getAsString();
-    }
-  }
-
-  // Finish printing arguments...
-  if (FT->isVarArg()) {
-    if (FT->getNumParams()) Out << ", ";
-    Out << "...";  // Output varargs portion of signature!
-  }
-  Out << ')';
-  if (F->hasUnnamedAddr())
-    Out << " unnamed_addr";
-  Attributes FnAttrs = Attrs.getFnAttributes();
-  if (FnAttrs.hasAttributes())
-    Out << ' ' << Attrs.getFnAttributes().getAsString();
-  if (F->hasSection()) {
-    Out << " section \"";
-    PrintEscapedString(F->getSection(), Out);
-    Out << '"';
-  }
-  if (F->getAlignment())
-    Out << " align " << F->getAlignment();
-  if (F->hasGC())
-    Out << " gc \"" << F->getGC() << '"';
-  if (F->isDeclaration()) {
-    Out << '\n';
-  } else {
-    Out << " {";
-    // Output all of the function's basic blocks.
-    for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
-      printBasicBlock(I);
-
-    Out << "}\n";
-  }
-
-  Machine.purgeFunction();
-}
-
-/// printArgument - This member is called for every argument that is passed into
-/// the function.  Simply print it out
-///
-void AssemblyWriter::printArgument(const Argument *Arg,
-                                   Attributes Attrs) {
-  // Output type...
-  TypePrinter.print(Arg->getType(), Out);
-
-  // Output parameter attributes list
-  if (Attrs.hasAttributes())
-    Out << ' ' << Attrs.getAsString();
-
-  // Output name, if available...
-  if (Arg->hasName()) {
-    Out << ' ';
-    PrintLLVMName(Out, Arg);
-  }
-}
-
-/// printBasicBlock - This member is called for each basic block in a method.
-///
-void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
-  if (BB->hasName()) {              // Print out the label if it exists...
-    Out << "\n";
-    PrintLLVMName(Out, BB->getName(), LabelPrefix);
-    Out << ':';
-  } else if (!BB->use_empty()) {      // Don't print block # of no uses...
-    Out << "\n; <label>:";
-    int Slot = Machine.getLocalSlot(BB);
-    if (Slot != -1)
-      Out << Slot;
-    else
-      Out << "<badref>";
-  }
-
-  if (BB->getParent() == 0) {
-    Out.PadToColumn(50);
-    Out << "; Error: Block without parent!";
-  } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
-    // Output predecessors for the block.
-    Out.PadToColumn(50);
-    Out << ";";
-    const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-
-    if (PI == PE) {
-      Out << " No predecessors!";
-    } else {
-      Out << " preds = ";
-      writeOperand(*PI, false);
-      for (++PI; PI != PE; ++PI) {
-        Out << ", ";
-        writeOperand(*PI, false);
-      }
-    }
-  }
-
-  Out << "\n";
-
-  if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
-
-  // Output all of the instructions in the basic block...
-  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    printInstruction(*I);
-    Out << '\n';
-  }
-
-  if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
-}
-
-/// printInfoComment - Print a little comment after the instruction indicating
-/// which slot it occupies.
-///
-void AssemblyWriter::printInfoComment(const Value &V) {
-  if (AnnotationWriter) {
-    AnnotationWriter->printInfoComment(V, Out);
-    return;
-  }
-}
-
-// This member is called for each Instruction in a function..
-void AssemblyWriter::printInstruction(const Instruction &I) {
-  if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
-
-  // Print out indentation for an instruction.
-  Out << "  ";
-
-  // Print out name if it exists...
-  if (I.hasName()) {
-    PrintLLVMName(Out, &I);
-    Out << " = ";
-  } else if (!I.getType()->isVoidTy()) {
-    // Print out the def slot taken.
-    int SlotNum = Machine.getLocalSlot(&I);
-    if (SlotNum == -1)
-      Out << "<badref> = ";
-    else
-      Out << '%' << SlotNum << " = ";
-  }
-
-  if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall())
-    Out << "tail ";
-
-  // Print out the opcode...
-  Out << I.getOpcodeName();
-
-  // If this is an atomic load or store, print out the atomic marker.
-  if ((isa<LoadInst>(I)  && cast<LoadInst>(I).isAtomic()) ||
-      (isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic()))
-    Out << " atomic";
-
-  // If this is a volatile operation, print out the volatile marker.
-  if ((isa<LoadInst>(I)  && cast<LoadInst>(I).isVolatile()) ||
-      (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) ||
-      (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isVolatile()) ||
-      (isa<AtomicRMWInst>(I) && cast<AtomicRMWInst>(I).isVolatile()))
-    Out << " volatile";
-
-  // Print out optimization information.
-  WriteOptimizationInfo(Out, &I);
-
-  // Print out the compare instruction predicates
-  if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
-    Out << ' ' << getPredicateText(CI->getPredicate());
-
-  // Print out the atomicrmw operation
-  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
-    writeAtomicRMWOperation(Out, RMWI->getOperation());
-
-  // Print out the type of the operands...
-  const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
-
-  // Special case conditional branches to swizzle the condition out to the front
-  if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
-    BranchInst &BI(cast<BranchInst>(I));
-    Out << ' ';
-    writeOperand(BI.getCondition(), true);
-    Out << ", ";
-    writeOperand(BI.getSuccessor(0), true);
-    Out << ", ";
-    writeOperand(BI.getSuccessor(1), true);
-
-  } else if (isa<SwitchInst>(I)) {
-    SwitchInst& SI(cast<SwitchInst>(I));
-    // Special case switch instruction to get formatting nice and correct.
-    Out << ' ';
-    writeOperand(SI.getCondition(), true);
-    Out << ", ";
-    writeOperand(SI.getDefaultDest(), true);
-    Out << " [";
-    for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
-         i != e; ++i) {
-      Out << "\n    ";
-      writeOperand(i.getCaseValue(), true);
-      Out << ", ";
-      writeOperand(i.getCaseSuccessor(), true);
-    }
-    Out << "\n  ]";
-  } else if (isa<IndirectBrInst>(I)) {
-    // Special case indirectbr instruction to get formatting nice and correct.
-    Out << ' ';
-    writeOperand(Operand, true);
-    Out << ", [";
-
-    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
-      if (i != 1)
-        Out << ", ";
-      writeOperand(I.getOperand(i), true);
-    }
-    Out << ']';
-  } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
-    Out << ' ';
-    TypePrinter.print(I.getType(), Out);
-    Out << ' ';
-
-    for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
-      if (op) Out << ", ";
-      Out << "[ ";
-      writeOperand(PN->getIncomingValue(op), false); Out << ", ";
-      writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
-    }
-  } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
-    Out << ' ';
-    writeOperand(I.getOperand(0), true);
-    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
-      Out << ", " << *i;
-  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
-    Out << ' ';
-    writeOperand(I.getOperand(0), true); Out << ", ";
-    writeOperand(I.getOperand(1), true);
-    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
-      Out << ", " << *i;
-  } else if (const LandingPadInst *LPI = dyn_cast<LandingPadInst>(&I)) {
-    Out << ' ';
-    TypePrinter.print(I.getType(), Out);
-    Out << " personality ";
-    writeOperand(I.getOperand(0), true); Out << '\n';
-
-    if (LPI->isCleanup())
-      Out << "          cleanup";
-
-    for (unsigned i = 0, e = LPI->getNumClauses(); i != e; ++i) {
-      if (i != 0 || LPI->isCleanup()) Out << "\n";
-      if (LPI->isCatch(i))
-        Out << "          catch ";
-      else
-        Out << "          filter ";
-
-      writeOperand(LPI->getClause(i), true);
-    }
-  } else if (isa<ReturnInst>(I) && !Operand) {
-    Out << " void";
-  } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
-    // Print the calling convention being used.
-    if (CI->getCallingConv() != CallingConv::C) {
-      Out << " ";
-      PrintCallingConv(CI->getCallingConv(), Out);
-    }
-
-    Operand = CI->getCalledValue();
-    PointerType *PTy = cast<PointerType>(Operand->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-    Type *RetTy = FTy->getReturnType();
-    const AttrListPtr &PAL = CI->getAttributes();
-
-    if (PAL.getRetAttributes().hasAttributes())
-      Out << ' ' << PAL.getRetAttributes().getAsString();
-
-    // If possible, print out the short form of the call instruction.  We can
-    // only do this if the first argument is a pointer to a nonvararg function,
-    // and if the return type is not a pointer to a function.
-    //
-    Out << ' ';
-    if (!FTy->isVarArg() &&
-        (!RetTy->isPointerTy() ||
-         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
-      TypePrinter.print(RetTy, Out);
-      Out << ' ';
-      writeOperand(Operand, false);
-    } else {
-      writeOperand(Operand, true);
-    }
-    Out << '(';
-    for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
-      if (op > 0)
-        Out << ", ";
-      writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
-    }
-    Out << ')';
-    if (PAL.getFnAttributes().hasAttributes())
-      Out << ' ' << PAL.getFnAttributes().getAsString();
-  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
-    Operand = II->getCalledValue();
-    PointerType *PTy = cast<PointerType>(Operand->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-    Type *RetTy = FTy->getReturnType();
-    const AttrListPtr &PAL = II->getAttributes();
-
-    // Print the calling convention being used.
-    if (II->getCallingConv() != CallingConv::C) {
-      Out << " ";
-      PrintCallingConv(II->getCallingConv(), Out);
-    }
-
-    if (PAL.getRetAttributes().hasAttributes())
-      Out << ' ' << PAL.getRetAttributes().getAsString();
-
-    // If possible, print out the short form of the invoke instruction. We can
-    // only do this if the first argument is a pointer to a nonvararg function,
-    // and if the return type is not a pointer to a function.
-    //
-    Out << ' ';
-    if (!FTy->isVarArg() &&
-        (!RetTy->isPointerTy() ||
-         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
-      TypePrinter.print(RetTy, Out);
-      Out << ' ';
-      writeOperand(Operand, false);
-    } else {
-      writeOperand(Operand, true);
-    }
-    Out << '(';
-    for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
-      if (op)
-        Out << ", ";
-      writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
-    }
-
-    Out << ')';
-    if (PAL.getFnAttributes().hasAttributes())
-      Out << ' ' << PAL.getFnAttributes().getAsString();
-
-    Out << "\n          to ";
-    writeOperand(II->getNormalDest(), true);
-    Out << " unwind ";
-    writeOperand(II->getUnwindDest(), true);
-
-  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
-    Out << ' ';
-    TypePrinter.print(AI->getType()->getElementType(), Out);
-    if (!AI->getArraySize() || AI->isArrayAllocation()) {
-      Out << ", ";
-      writeOperand(AI->getArraySize(), true);
-    }
-    if (AI->getAlignment()) {
-      Out << ", align " << AI->getAlignment();
-    }
-  } else if (isa<CastInst>(I)) {
-    if (Operand) {
-      Out << ' ';
-      writeOperand(Operand, true);   // Work with broken code
-    }
-    Out << " to ";
-    TypePrinter.print(I.getType(), Out);
-  } else if (isa<VAArgInst>(I)) {
-    if (Operand) {
-      Out << ' ';
-      writeOperand(Operand, true);   // Work with broken code
-    }
-    Out << ", ";
-    TypePrinter.print(I.getType(), Out);
-  } else if (Operand) {   // Print the normal way.
-
-    // PrintAllTypes - Instructions who have operands of all the same type
-    // omit the type from all but the first operand.  If the instruction has
-    // different type operands (for example br), then they are all printed.
-    bool PrintAllTypes = false;
-    Type *TheType = Operand->getType();
-
-    // Select, Store and ShuffleVector always print all types.
-    if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
-        || isa<ReturnInst>(I)) {
-      PrintAllTypes = true;
-    } else {
-      for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
-        Operand = I.getOperand(i);
-        // note that Operand shouldn't be null, but the test helps make dump()
-        // more tolerant of malformed IR
-        if (Operand && Operand->getType() != TheType) {
-          PrintAllTypes = true;    // We have differing types!  Print them all!
-          break;
-        }
-      }
-    }
-
-    if (!PrintAllTypes) {
-      Out << ' ';
-      TypePrinter.print(TheType, Out);
-    }
-
-    Out << ' ';
-    for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
-      if (i) Out << ", ";
-      writeOperand(I.getOperand(i), PrintAllTypes);
-    }
-  }
-
-  // Print atomic ordering/alignment for memory operations
-  if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
-    if (LI->isAtomic())
-      writeAtomic(LI->getOrdering(), LI->getSynchScope());
-    if (LI->getAlignment())
-      Out << ", align " << LI->getAlignment();
-  } else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
-    if (SI->isAtomic())
-      writeAtomic(SI->getOrdering(), SI->getSynchScope());
-    if (SI->getAlignment())
-      Out << ", align " << SI->getAlignment();
-  } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) {
-    writeAtomic(CXI->getOrdering(), CXI->getSynchScope());
-  } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) {
-    writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope());
-  } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) {
-    writeAtomic(FI->getOrdering(), FI->getSynchScope());
-  }
-
-  // Print Metadata info.
-  SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
-  I.getAllMetadata(InstMD);
-  if (!InstMD.empty()) {
-    SmallVector<StringRef, 8> MDNames;
-    I.getType()->getContext().getMDKindNames(MDNames);
-    for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
-      unsigned Kind = InstMD[i].first;
-       if (Kind < MDNames.size()) {
-         Out << ", !" << MDNames[Kind];
-      } else {
-        Out << ", !<unknown kind #" << Kind << ">";
-      }
-      Out << ' ';
-      WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
-                             TheModule);
-    }
-  }
-  printInfoComment(I);
-}
-
-static void WriteMDNodeComment(const MDNode *Node,
-                               formatted_raw_ostream &Out) {
-  if (Node->getNumOperands() < 1)
-    return;
-
-  Value *Op = Node->getOperand(0);
-  if (!Op || !isa<ConstantInt>(Op) || cast<ConstantInt>(Op)->getBitWidth() < 32)
-    return;
-
-  DIDescriptor Desc(Node);
-  if (Desc.getVersion() < LLVMDebugVersion11)
-    return;
-
-  unsigned Tag = Desc.getTag();
-  Out.PadToColumn(50);
-  if (dwarf::TagString(Tag)) {
-    Out << "; ";
-    Desc.print(Out);
-  } else if (Tag == dwarf::DW_TAG_user_base) {
-    Out << "; [ DW_TAG_user_base ]";
-  }
-}
-
-void AssemblyWriter::writeAllMDNodes() {
-  SmallVector<const MDNode *, 16> Nodes;
-  Nodes.resize(Machine.mdn_size());
-  for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
-       I != E; ++I)
-    Nodes[I->second] = cast<MDNode>(I->first);
-
-  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
-    Out << '!' << i << " = metadata ";
-    printMDNodeBody(Nodes[i]);
-  }
-}
-
-void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
-  WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
-  WriteMDNodeComment(Node, Out);
-  Out << "\n";
-}
-
-//===----------------------------------------------------------------------===//
-//                       External Interface declarations
-//===----------------------------------------------------------------------===//
-
-void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
-  SlotTracker SlotTable(this);
-  formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, this, AAW);
-  W.printModule(this);
-}
-
-void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
-  SlotTracker SlotTable(getParent());
-  formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, getParent(), AAW);
-  W.printNamedMDNode(this);
-}
-
-void Type::print(raw_ostream &OS) const {
-  if (this == 0) {
-    OS << "<null Type>";
-    return;
-  }
-  TypePrinting TP;
-  TP.print(const_cast<Type*>(this), OS);
-
-  // If the type is a named struct type, print the body as well.
-  if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
-    if (!STy->isLiteral()) {
-      OS << " = type ";
-      TP.printStructBody(STy, OS);
-    }
-}
-
-void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
-  if (this == 0) {
-    ROS << "printing a <null> value\n";
-    return;
-  }
-  formatted_raw_ostream OS(ROS);
-  if (const Instruction *I = dyn_cast<Instruction>(this)) {
-    const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
-    SlotTracker SlotTable(F);
-    AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
-    W.printInstruction(*I);
-  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
-    SlotTracker SlotTable(BB->getParent());
-    AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
-    W.printBasicBlock(BB);
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
-    SlotTracker SlotTable(GV->getParent());
-    AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
-    if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
-      W.printGlobal(V);
-    else if (const Function *F = dyn_cast<Function>(GV))
-      W.printFunction(F);
-    else
-      W.printAlias(cast<GlobalAlias>(GV));
-  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
-    const Function *F = N->getFunction();
-    SlotTracker SlotTable(F);
-    AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
-    W.printMDNodeBody(N);
-  } else if (const Constant *C = dyn_cast<Constant>(this)) {
-    TypePrinting TypePrinter;
-    TypePrinter.print(C->getType(), OS);
-    OS << ' ';
-    WriteConstantInternal(OS, C, TypePrinter, 0, 0);
-  } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
-             isa<Argument>(this)) {
-    WriteAsOperand(OS, this, true, 0);
-  } else {
-    // Otherwise we don't know what it is. Call the virtual function to
-    // allow a subclass to print itself.
-    printCustom(OS);
-  }
-}
-
-// Value::printCustom - subclasses should override this to implement printing.
-void Value::printCustom(raw_ostream &OS) const {
-  llvm_unreachable("Unknown value to print out!");
-}
-
-// Value::dump - allow easy printing of Values from the debugger.
-void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
-
-// Type::dump - allow easy printing of Types from the debugger.
-void Type::dump() const { print(dbgs()); }
-
-// Module::dump() - Allow printing of Modules from the debugger.
-void Module::dump() const { print(dbgs(), 0); }
-
-// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
-void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
deleted file mode 100644
index f1268e6ef86b..000000000000
--- a/lib/VMCore/Attributes.cpp
+++ /dev/null
@@ -1,547 +0,0 @@
-//===-- Attributes.cpp - Implement AttributesList -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Attributes, AttributeImpl, AttrBuilder,
-// AttributeListImpl, and AttrListPtr classes.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Attributes.h"
-#include "AttributesImpl.h"
-#include "LLVMContextImpl.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/Support/Atomic.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Attributes Implementation
-//===----------------------------------------------------------------------===//
-
-Attributes Attributes::get(LLVMContext &Context, ArrayRef<AttrVal> Vals) {
-  AttrBuilder B;
-  for (ArrayRef<AttrVal>::iterator I = Vals.begin(), E = Vals.end();
-       I != E; ++I)
-    B.addAttribute(*I);
-  return Attributes::get(Context, B);
-}
-
-Attributes Attributes::get(LLVMContext &Context, AttrBuilder &B) {
-  // If there are no attributes, return an empty Attributes class.
-  if (!B.hasAttributes())
-    return Attributes();
-
-  // Otherwise, build a key to look up the existing attributes.
-  LLVMContextImpl *pImpl = Context.pImpl;
-  FoldingSetNodeID ID;
-  ID.AddInteger(B.Raw());
-
-  void *InsertPoint;
-  AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
-
-  if (!PA) {
-    // If we didn't find any existing attributes of the same shape then create a
-    // new one and insert it.
-    PA = new AttributesImpl(B.Raw());
-    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
-  }
-
-  // Return the AttributesList that we found or created.
-  return Attributes(PA);
-}
-
-bool Attributes::hasAttribute(AttrVal Val) const {
-  return Attrs && Attrs->hasAttribute(Val);
-}
-
-bool Attributes::hasAttributes() const {
-  return Attrs && Attrs->hasAttributes();
-}
-
-bool Attributes::hasAttributes(const Attributes &A) const {
-  return Attrs && Attrs->hasAttributes(A);
-}
-
-/// This returns the alignment field of an attribute as a byte alignment value.
-unsigned Attributes::getAlignment() const {
-  if (!hasAttribute(Attributes::Alignment))
-    return 0;
-  return 1U << ((Attrs->getAlignment() >> 16) - 1);
-}
-
-/// This returns the stack alignment field of an attribute as a byte alignment
-/// value.
-unsigned Attributes::getStackAlignment() const {
-  if (!hasAttribute(Attributes::StackAlignment))
-    return 0;
-  return 1U << ((Attrs->getStackAlignment() >> 26) - 1);
-}
-
-uint64_t Attributes::Raw() const {
-  return Attrs ? Attrs->Raw() : 0;
-}
-
-Attributes Attributes::typeIncompatible(Type *Ty) {
-  AttrBuilder Incompatible;
-
-  if (!Ty->isIntegerTy())
-    // Attributes that only apply to integers.
-    Incompatible.addAttribute(Attributes::SExt)
-      .addAttribute(Attributes::ZExt);
-
-  if (!Ty->isPointerTy())
-    // Attributes that only apply to pointers.
-    Incompatible.addAttribute(Attributes::ByVal)
-      .addAttribute(Attributes::Nest)
-      .addAttribute(Attributes::NoAlias)
-      .addAttribute(Attributes::NoCapture)
-      .addAttribute(Attributes::StructRet);
-
-  return Attributes::get(Ty->getContext(), Incompatible);
-}
-
-/// encodeLLVMAttributesForBitcode - This returns an integer containing an
-/// encoding of all the LLVM attributes found in the given attribute bitset.
-/// Any change to this encoding is a breaking change to bitcode compatibility.
-uint64_t Attributes::encodeLLVMAttributesForBitcode(Attributes Attrs) {
-  // FIXME: It doesn't make sense to store the alignment information as an
-  // expanded out value, we should store it as a log2 value.  However, we can't
-  // just change that here without breaking bitcode compatibility.  If this ever
-  // becomes a problem in practice, we should introduce new tag numbers in the
-  // bitcode file and have those tags use a more efficiently encoded alignment
-  // field.
-
-  // Store the alignment in the bitcode as a 16-bit raw value instead of a 5-bit
-  // log2 encoded value. Shift the bits above the alignment up by 11 bits.
-  uint64_t EncodedAttrs = Attrs.Raw() & 0xffff;
-  if (Attrs.hasAttribute(Attributes::Alignment))
-    EncodedAttrs |= Attrs.getAlignment() << 16;
-  EncodedAttrs |= (Attrs.Raw() & (0xffffULL << 21)) << 11;
-  return EncodedAttrs;
-}
-
-/// decodeLLVMAttributesForBitcode - This returns an attribute bitset containing
-/// the LLVM attributes that have been decoded from the given integer.  This
-/// function must stay in sync with 'encodeLLVMAttributesForBitcode'.
-Attributes Attributes::decodeLLVMAttributesForBitcode(LLVMContext &C,
-                                                      uint64_t EncodedAttrs) {
-  // The alignment is stored as a 16-bit raw value from bits 31--16.  We shift
-  // the bits above 31 down by 11 bits.
-  unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
-  assert((!Alignment || isPowerOf2_32(Alignment)) &&
-         "Alignment must be a power of two.");
-
-  AttrBuilder B(EncodedAttrs & 0xffff);
-  if (Alignment)
-    B.addAlignmentAttr(Alignment);
-  B.addRawValue((EncodedAttrs & (0xffffULL << 32)) >> 11);
-  return Attributes::get(C, B);
-}
-
-std::string Attributes::getAsString() const {
-  std::string Result;
-  if (hasAttribute(Attributes::ZExt))
-    Result += "zeroext ";
-  if (hasAttribute(Attributes::SExt))
-    Result += "signext ";
-  if (hasAttribute(Attributes::NoReturn))
-    Result += "noreturn ";
-  if (hasAttribute(Attributes::NoUnwind))
-    Result += "nounwind ";
-  if (hasAttribute(Attributes::UWTable))
-    Result += "uwtable ";
-  if (hasAttribute(Attributes::ReturnsTwice))
-    Result += "returns_twice ";
-  if (hasAttribute(Attributes::InReg))
-    Result += "inreg ";
-  if (hasAttribute(Attributes::NoAlias))
-    Result += "noalias ";
-  if (hasAttribute(Attributes::NoCapture))
-    Result += "nocapture ";
-  if (hasAttribute(Attributes::StructRet))
-    Result += "sret ";
-  if (hasAttribute(Attributes::ByVal))
-    Result += "byval ";
-  if (hasAttribute(Attributes::Nest))
-    Result += "nest ";
-  if (hasAttribute(Attributes::ReadNone))
-    Result += "readnone ";
-  if (hasAttribute(Attributes::ReadOnly))
-    Result += "readonly ";
-  if (hasAttribute(Attributes::OptimizeForSize))
-    Result += "optsize ";
-  if (hasAttribute(Attributes::NoInline))
-    Result += "noinline ";
-  if (hasAttribute(Attributes::InlineHint))
-    Result += "inlinehint ";
-  if (hasAttribute(Attributes::AlwaysInline))
-    Result += "alwaysinline ";
-  if (hasAttribute(Attributes::StackProtect))
-    Result += "ssp ";
-  if (hasAttribute(Attributes::StackProtectReq))
-    Result += "sspreq ";
-  if (hasAttribute(Attributes::NoRedZone))
-    Result += "noredzone ";
-  if (hasAttribute(Attributes::NoImplicitFloat))
-    Result += "noimplicitfloat ";
-  if (hasAttribute(Attributes::Naked))
-    Result += "naked ";
-  if (hasAttribute(Attributes::NonLazyBind))
-    Result += "nonlazybind ";
-  if (hasAttribute(Attributes::AddressSafety))
-    Result += "address_safety ";
-  if (hasAttribute(Attributes::MinSize))
-    Result += "minsize ";
-  if (hasAttribute(Attributes::StackAlignment)) {
-    Result += "alignstack(";
-    Result += utostr(getStackAlignment());
-    Result += ") ";
-  }
-  if (hasAttribute(Attributes::Alignment)) {
-    Result += "align ";
-    Result += utostr(getAlignment());
-    Result += " ";
-  }
-  // Trim the trailing space.
-  assert(!Result.empty() && "Unknown attribute!");
-  Result.erase(Result.end()-1);
-  return Result;
-}
-
-//===----------------------------------------------------------------------===//
-// AttrBuilder Implementation
-//===----------------------------------------------------------------------===//
-
-AttrBuilder &AttrBuilder::addAttribute(Attributes::AttrVal Val){
-  Bits |= AttributesImpl::getAttrMask(Val);
-  return *this;
-}
-
-AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
-  Bits |= Val;
-  return *this;
-}
-
-AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
-  if (Align == 0) return *this;
-  assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
-  assert(Align <= 0x40000000 && "Alignment too large.");
-  Bits |= (Log2_32(Align) + 1) << 16;
-  return *this;
-}
-AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align){
-  // Default alignment, allow the target to define how to align it.
-  if (Align == 0) return *this;
-  assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
-  assert(Align <= 0x100 && "Alignment too large.");
-  Bits |= (Log2_32(Align) + 1) << 26;
-  return *this;
-}
-
-AttrBuilder &AttrBuilder::removeAttribute(Attributes::AttrVal Val) {
-  Bits &= ~AttributesImpl::getAttrMask(Val);
-  return *this;
-}
-
-AttrBuilder &AttrBuilder::addAttributes(const Attributes &A) {
-  Bits |= A.Raw();
-  return *this;
-}
-
-AttrBuilder &AttrBuilder::removeAttributes(const Attributes &A){
-  Bits &= ~A.Raw();
-  return *this;
-}
-
-bool AttrBuilder::hasAttribute(Attributes::AttrVal A) const {
-  return Bits & AttributesImpl::getAttrMask(A);
-}
-
-bool AttrBuilder::hasAttributes() const {
-  return Bits != 0;
-}
-bool AttrBuilder::hasAttributes(const Attributes &A) const {
-  return Bits & A.Raw();
-}
-bool AttrBuilder::hasAlignmentAttr() const {
-  return Bits & AttributesImpl::getAttrMask(Attributes::Alignment);
-}
-
-uint64_t AttrBuilder::getAlignment() const {
-  if (!hasAlignmentAttr())
-    return 0;
-  return 1U <<
-    (((Bits & AttributesImpl::getAttrMask(Attributes::Alignment)) >> 16) - 1);
-}
-
-uint64_t AttrBuilder::getStackAlignment() const {
-  if (!hasAlignmentAttr())
-    return 0;
-  return 1U <<
-    (((Bits & AttributesImpl::getAttrMask(Attributes::StackAlignment))>>26)-1);
-}
-
-//===----------------------------------------------------------------------===//
-// AttributeImpl Definition
-//===----------------------------------------------------------------------===//
-
-uint64_t AttributesImpl::getAttrMask(uint64_t Val) {
-  switch (Val) {
-  case Attributes::None:            return 0;
-  case Attributes::ZExt:            return 1 << 0;
-  case Attributes::SExt:            return 1 << 1;
-  case Attributes::NoReturn:        return 1 << 2;
-  case Attributes::InReg:           return 1 << 3;
-  case Attributes::StructRet:       return 1 << 4;
-  case Attributes::NoUnwind:        return 1 << 5;
-  case Attributes::NoAlias:         return 1 << 6;
-  case Attributes::ByVal:           return 1 << 7;
-  case Attributes::Nest:            return 1 << 8;
-  case Attributes::ReadNone:        return 1 << 9;
-  case Attributes::ReadOnly:        return 1 << 10;
-  case Attributes::NoInline:        return 1 << 11;
-  case Attributes::AlwaysInline:    return 1 << 12;
-  case Attributes::OptimizeForSize: return 1 << 13;
-  case Attributes::StackProtect:    return 1 << 14;
-  case Attributes::StackProtectReq: return 1 << 15;
-  case Attributes::Alignment:       return 31 << 16;
-  case Attributes::NoCapture:       return 1 << 21;
-  case Attributes::NoRedZone:       return 1 << 22;
-  case Attributes::NoImplicitFloat: return 1 << 23;
-  case Attributes::Naked:           return 1 << 24;
-  case Attributes::InlineHint:      return 1 << 25;
-  case Attributes::StackAlignment:  return 7 << 26;
-  case Attributes::ReturnsTwice:    return 1 << 29;
-  case Attributes::UWTable:         return 1 << 30;
-  case Attributes::NonLazyBind:     return 1U << 31;
-  case Attributes::AddressSafety:   return 1ULL << 32;
-  case Attributes::MinSize:         return 1ULL << 33;
-  }
-  llvm_unreachable("Unsupported attribute type");
-}
-
-bool AttributesImpl::hasAttribute(uint64_t A) const {
-  return (Bits & getAttrMask(A)) != 0;
-}
-
-bool AttributesImpl::hasAttributes() const {
-  return Bits != 0;
-}
-
-bool AttributesImpl::hasAttributes(const Attributes &A) const {
-  return Bits & A.Raw();        // FIXME: Raw() won't work here in the future.
-}
-
-uint64_t AttributesImpl::getAlignment() const {
-  return Bits & getAttrMask(Attributes::Alignment);
-}
-
-uint64_t AttributesImpl::getStackAlignment() const {
-  return Bits & getAttrMask(Attributes::StackAlignment);
-}
-
-//===----------------------------------------------------------------------===//
-// AttributeListImpl Definition
-//===----------------------------------------------------------------------===//
-
-AttrListPtr AttrListPtr::get(LLVMContext &C,
-                             ArrayRef<AttributeWithIndex> Attrs) {
-  // If there are no attributes then return a null AttributesList pointer.
-  if (Attrs.empty())
-    return AttrListPtr();
-
-#ifndef NDEBUG
-  for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-    assert(Attrs[i].Attrs.hasAttributes() &&
-           "Pointless attribute!");
-    assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
-           "Misordered AttributesList!");
-  }
-#endif
-
-  // Otherwise, build a key to look up the existing attributes.
-  LLVMContextImpl *pImpl = C.pImpl;
-  FoldingSetNodeID ID;
-  AttributeListImpl::Profile(ID, Attrs);
-
-  void *InsertPoint;
-  AttributeListImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID,
-                                                                InsertPoint);
-
-  // If we didn't find any existing attributes of the same shape then
-  // create a new one and insert it.
-  if (!PA) {
-    PA = new AttributeListImpl(Attrs);
-    pImpl->AttrsLists.InsertNode(PA, InsertPoint);
-  }
-
-  // Return the AttributesList that we found or created.
-  return AttrListPtr(PA);
-}
-
-//===----------------------------------------------------------------------===//
-// AttrListPtr Method Implementations
-//===----------------------------------------------------------------------===//
-
-const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
-  if (AttrList == RHS.AttrList) return *this;
-
-  AttrList = RHS.AttrList;
-  return *this;
-}
-
-/// getNumSlots - Return the number of slots used in this attribute list.
-/// This is the number of arguments that have an attribute set on them
-/// (including the function itself).
-unsigned AttrListPtr::getNumSlots() const {
-  return AttrList ? AttrList->Attrs.size() : 0;
-}
-
-/// getSlot - Return the AttributeWithIndex at the specified slot.  This
-/// holds a number plus a set of attributes.
-const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
-  assert(AttrList && Slot < AttrList->Attrs.size() && "Slot # out of range!");
-  return AttrList->Attrs[Slot];
-}
-
-/// getAttributes - The attributes for the specified index are returned.
-/// Attributes for the result are denoted with Idx = 0.  Function notes are
-/// denoted with idx = ~0.
-Attributes AttrListPtr::getAttributes(unsigned Idx) const {
-  if (AttrList == 0) return Attributes();
-
-  const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
-  for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
-    if (Attrs[i].Index == Idx)
-      return Attrs[i].Attrs;
-
-  return Attributes();
-}
-
-/// hasAttrSomewhere - Return true if the specified attribute is set for at
-/// least one parameter or for the return value.
-bool AttrListPtr::hasAttrSomewhere(Attributes::AttrVal Attr) const {
-  if (AttrList == 0) return false;
-
-  const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
-  for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
-    if (Attrs[i].Attrs.hasAttribute(Attr))
-      return true;
-
-  return false;
-}
-
-unsigned AttrListPtr::getNumAttrs() const {
-  return AttrList ? AttrList->Attrs.size() : 0;
-}
-
-Attributes &AttrListPtr::getAttributesAtIndex(unsigned i) const {
-  assert(AttrList && "Trying to get an attribute from an empty list!");
-  assert(i < AttrList->Attrs.size() && "Index out of range!");
-  return AttrList->Attrs[i].Attrs;
-}
-
-AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
-                                 Attributes Attrs) const {
-  Attributes OldAttrs = getAttributes(Idx);
-#ifndef NDEBUG
-  // FIXME it is not obvious how this should work for alignment.
-  // For now, say we can't change a known alignment.
-  unsigned OldAlign = OldAttrs.getAlignment();
-  unsigned NewAlign = Attrs.getAlignment();
-  assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
-         "Attempt to change alignment!");
-#endif
-
-  AttrBuilder NewAttrs =
-    AttrBuilder(OldAttrs).addAttributes(Attrs);
-  if (NewAttrs == AttrBuilder(OldAttrs))
-    return *this;
-
-  SmallVector<AttributeWithIndex, 8> NewAttrList;
-  if (AttrList == 0)
-    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-  else {
-    const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
-    unsigned i = 0, e = OldAttrList.size();
-    // Copy attributes for arguments before this one.
-    for (; i != e && OldAttrList[i].Index < Idx; ++i)
-      NewAttrList.push_back(OldAttrList[i]);
-
-    // If there are attributes already at this index, merge them in.
-    if (i != e && OldAttrList[i].Index == Idx) {
-      Attrs =
-        Attributes::get(C, AttrBuilder(Attrs).
-                        addAttributes(OldAttrList[i].Attrs));
-      ++i;
-    }
-
-    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-
-    // Copy attributes for arguments after this one.
-    NewAttrList.insert(NewAttrList.end(),
-                       OldAttrList.begin()+i, OldAttrList.end());
-  }
-
-  return get(C, NewAttrList);
-}
-
-AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
-                                    Attributes Attrs) const {
-#ifndef NDEBUG
-  // FIXME it is not obvious how this should work for alignment.
-  // For now, say we can't pass in alignment, which no current use does.
-  assert(!Attrs.hasAttribute(Attributes::Alignment) &&
-         "Attempt to exclude alignment!");
-#endif
-  if (AttrList == 0) return AttrListPtr();
-
-  Attributes OldAttrs = getAttributes(Idx);
-  AttrBuilder NewAttrs =
-    AttrBuilder(OldAttrs).removeAttributes(Attrs);
-  if (NewAttrs == AttrBuilder(OldAttrs))
-    return *this;
-
-  SmallVector<AttributeWithIndex, 8> NewAttrList;
-  const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
-  unsigned i = 0, e = OldAttrList.size();
-
-  // Copy attributes for arguments before this one.
-  for (; i != e && OldAttrList[i].Index < Idx; ++i)
-    NewAttrList.push_back(OldAttrList[i]);
-
-  // If there are attributes already at this index, merge them in.
-  assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
-  Attrs = Attributes::get(C, AttrBuilder(OldAttrList[i].Attrs).
-                          removeAttributes(Attrs));
-  ++i;
-  if (Attrs.hasAttributes()) // If any attributes left for this param, add them.
-    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
-
-  // Copy attributes for arguments after this one.
-  NewAttrList.insert(NewAttrList.end(),
-                     OldAttrList.begin()+i, OldAttrList.end());
-
-  return get(C, NewAttrList);
-}
-
-void AttrListPtr::dump() const {
-  dbgs() << "PAL[ ";
-  for (unsigned i = 0; i < getNumSlots(); ++i) {
-    const AttributeWithIndex &PAWI = getSlot(i);
-    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs.getAsString() << "} ";
-  }
-
-  dbgs() << "]\n";
-}
diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
deleted file mode 100644
index 5c107e1ebba3..000000000000
--- a/lib/VMCore/AttributesImpl.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines various helper methods and classes used by LLVMContextImpl
-// for creating and managing attributes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ATTRIBUTESIMPL_H
-#define LLVM_ATTRIBUTESIMPL_H
-
-#include "llvm/Attributes.h"
-#include "llvm/ADT/FoldingSet.h"
-
-namespace llvm {
-
-class AttributesImpl : public FoldingSetNode {
-  uint64_t Bits;                // FIXME: We will be expanding this.
-public:
-  AttributesImpl(uint64_t bits) : Bits(bits) {}
-
-  bool hasAttribute(uint64_t A) const;
-
-  bool hasAttributes() const;
-  bool hasAttributes(const Attributes &A) const;
-
-  uint64_t getAlignment() const;
-  uint64_t getStackAlignment() const;
-
-  uint64_t Raw() const { return Bits; } // FIXME: Remove.
-
-  static uint64_t getAttrMask(uint64_t Val);
-
-  void Profile(FoldingSetNodeID &ID) const {
-    Profile(ID, Bits);
-  }
-  static void Profile(FoldingSetNodeID &ID, uint64_t Bits) {
-    ID.AddInteger(Bits);
-  }
-};
-
-class AttributeListImpl : public FoldingSetNode {
-  // AttributesList is uniqued, these should not be publicly available.
-  void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
-  AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
-public:
-  SmallVector<AttributeWithIndex, 4> Attrs;
-
-  AttributeListImpl(ArrayRef<AttributeWithIndex> attrs)
-    : Attrs(attrs.begin(), attrs.end()) {}
-
-  void Profile(FoldingSetNodeID &ID) const {
-    Profile(ID, Attrs);
-  }
-  static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeWithIndex> Attrs){
-    for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-      ID.AddInteger(Attrs[i].Attrs.Raw());
-      ID.AddInteger(Attrs[i].Index);
-    }
-  }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
deleted file mode 100644
index 5fff460e8bc4..000000000000
--- a/lib/VMCore/AutoUpgrade.cpp
+++ /dev/null
@@ -1,393 +0,0 @@
-//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the auto-upgrade helper functions 
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/AutoUpgrade.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instruction.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstring>
-using namespace llvm;
-
-// Upgrade the declarations of the SSE4.1 functions whose arguments have
-// changed their type from v4f32 to v2i64.
-static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
-                                 Function *&NewFn) {
-  // Check whether this is an old version of the function, which received
-  // v4f32 arguments.
-  Type *Arg0Type = F->getFunctionType()->getParamType(0);
-  if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
-    return false;
-
-  // Yes, it's old, replace it with new version.
-  F->setName(F->getName() + ".old");
-  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
-  return true;
-}
-
-static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
-  assert(F && "Illegal to upgrade a non-existent Function.");
-
-  // Quickly eliminate it, if it's not a candidate.
-  StringRef Name = F->getName();
-  if (Name.size() <= 8 || !Name.startswith("llvm."))
-    return false;
-  Name = Name.substr(5); // Strip off "llvm."
-
-  switch (Name[0]) {
-  default: break;
-  case 'a': {
-    if (Name.startswith("arm.neon.vclz")) {
-      Type* args[2] = {
-        F->arg_begin()->getType(), 
-        Type::getInt1Ty(F->getContext())
-      };
-      // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
-      // the end of the name. Change name from llvm.arm.neon.vclz.* to
-      //  llvm.ctlz.*
-      FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
-      NewFn = Function::Create(fType, F->getLinkage(), 
-                               "llvm.ctlz." + Name.substr(14), F->getParent());
-      return true;
-    }
-    if (Name.startswith("arm.neon.vcnt")) {
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
-                                        F->arg_begin()->getType());
-      return true;
-    }
-    break;
-  }
-  case 'c': {
-    if (Name.startswith("ctlz.") && F->arg_size() == 1) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
-                                        F->arg_begin()->getType());
-      return true;
-    }
-    if (Name.startswith("cttz.") && F->arg_size() == 1) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
-                                        F->arg_begin()->getType());
-      return true;
-    }
-    break;
-  }
-  case 'x': {
-    if (Name.startswith("x86.sse2.pcmpeq.") ||
-        Name.startswith("x86.sse2.pcmpgt.") ||
-        Name.startswith("x86.avx2.pcmpeq.") ||
-        Name.startswith("x86.avx2.pcmpgt.") ||
-        Name.startswith("x86.avx.vpermil.") ||
-        Name == "x86.avx.movnt.dq.256" ||
-        Name == "x86.avx.movnt.pd.256" ||
-        Name == "x86.avx.movnt.ps.256" ||
-        (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
-      NewFn = 0;
-      return true;
-    }
-    // SSE4.1 ptest functions may have an old signature.
-    if (Name.startswith("x86.sse41.ptest")) {
-      if (Name == "x86.sse41.ptestc")
-        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
-      if (Name == "x86.sse41.ptestz")
-        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
-      if (Name == "x86.sse41.ptestnzc")
-        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
-    }
-    // frcz.ss/sd may need to have an argument dropped
-    if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(),
-                                        Intrinsic::x86_xop_vfrcz_ss);
-      return true;
-    }
-    if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(),
-                                        Intrinsic::x86_xop_vfrcz_sd);
-      return true;
-    }
-    // Fix the FMA4 intrinsics to remove the 4
-    if (Name.startswith("x86.fma4.")) {
-      F->setName("llvm.x86.fma" + Name.substr(8));
-      NewFn = F;
-      return true;
-    }
-    break;
-  }
-  }
-
-  //  This may not belong here. This function is effectively being overloaded
-  //  to both detect an intrinsic which needs upgrading, and to provide the
-  //  upgraded form of the intrinsic. We should perhaps have two separate
-  //  functions for this.
-  return false;
-}
-
-bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
-  NewFn = 0;
-  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
-
-  // Upgrade intrinsic attributes.  This does not change the function.
-  if (NewFn)
-    F = NewFn;
-  if (unsigned id = F->getIntrinsicID())
-    F->setAttributes(Intrinsic::getAttributes(F->getContext(),
-                                              (Intrinsic::ID)id));
-  return Upgraded;
-}
-
-bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
-  // Nothing to do yet.
-  return false;
-}
-
-// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
-// upgraded intrinsic. All argument and return casting must be provided in
-// order to seamlessly integrate with existing context.
-void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
-  Function *F = CI->getCalledFunction();
-  LLVMContext &C = CI->getContext();
-  IRBuilder<> Builder(C);
-  Builder.SetInsertPoint(CI->getParent(), CI);
-
-  assert(F && "Intrinsic call is not direct?");
-
-  if (!NewFn) {
-    // Get the Function's name.
-    StringRef Name = F->getName();
-
-    Value *Rep;
-    // Upgrade packed integer vector compares intrinsics to compare instructions
-    if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
-        Name.startswith("llvm.x86.avx2.pcmpeq.")) {
-      Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
-                                 "pcmpeq");
-      // need to sign extend since icmp returns vector of i1
-      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
-    } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
-               Name.startswith("llvm.x86.avx2.pcmpgt.")) {
-      Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
-                                  "pcmpgt");
-      // need to sign extend since icmp returns vector of i1
-      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
-    } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
-               Name == "llvm.x86.avx.movnt.ps.256" ||
-               Name == "llvm.x86.avx.movnt.pd.256") {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      Module *M = F->getParent();
-      SmallVector<Value *, 1> Elts;
-      Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-      MDNode *Node = MDNode::get(C, Elts);
-
-      Value *Arg0 = CI->getArgOperand(0);
-      Value *Arg1 = CI->getArgOperand(1);
-
-      // Convert the type of the pointer to a pointer to the stored type.
-      Value *BC = Builder.CreateBitCast(Arg0,
-                                        PointerType::getUnqual(Arg1->getType()),
-                                        "cast");
-      StoreInst *SI = Builder.CreateStore(Arg1, BC);
-      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
-      SI->setAlignment(16);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
-      return;
-    } else if (Name.startswith("llvm.x86.xop.vpcom")) {
-      Intrinsic::ID intID;
-      if (Name.endswith("ub"))
-        intID = Intrinsic::x86_xop_vpcomub;
-      else if (Name.endswith("uw"))
-        intID = Intrinsic::x86_xop_vpcomuw;
-      else if (Name.endswith("ud"))
-        intID = Intrinsic::x86_xop_vpcomud;
-      else if (Name.endswith("uq"))
-        intID = Intrinsic::x86_xop_vpcomuq;
-      else if (Name.endswith("b"))
-        intID = Intrinsic::x86_xop_vpcomb;
-      else if (Name.endswith("w"))
-        intID = Intrinsic::x86_xop_vpcomw;
-      else if (Name.endswith("d"))
-        intID = Intrinsic::x86_xop_vpcomd;
-      else if (Name.endswith("q"))
-        intID = Intrinsic::x86_xop_vpcomq;
-      else
-        llvm_unreachable("Unknown suffix");
-
-      Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
-      unsigned Imm;
-      if (Name.startswith("lt"))
-        Imm = 0;
-      else if (Name.startswith("le"))
-        Imm = 1;
-      else if (Name.startswith("gt"))
-        Imm = 2;
-      else if (Name.startswith("ge"))
-        Imm = 3;
-      else if (Name.startswith("eq"))
-        Imm = 4;
-      else if (Name.startswith("ne"))
-        Imm = 5;
-      else if (Name.startswith("true"))
-        Imm = 6;
-      else if (Name.startswith("false"))
-        Imm = 7;
-      else
-        llvm_unreachable("Unknown condition");
-
-      Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
-      Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
-                                CI->getArgOperand(1), Builder.getInt8(Imm));
-    } else {
-      bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
-      if (Name == "llvm.x86.avx.vpermil.pd.256")
-        PD256 = true;
-      else if (Name == "llvm.x86.avx.vpermil.pd")
-        PD128 = true;
-      else if (Name == "llvm.x86.avx.vpermil.ps.256")
-        PS256 = true;
-      else if (Name == "llvm.x86.avx.vpermil.ps")
-        PS128 = true;
-
-      if (PD256 || PD128 || PS256 || PS128) {
-        Value *Op0 = CI->getArgOperand(0);
-        unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
-        SmallVector<Constant*, 8> Idxs;
-
-        if (PD128)
-          for (unsigned i = 0; i != 2; ++i)
-            Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
-        else if (PD256)
-          for (unsigned l = 0; l != 4; l+=2)
-            for (unsigned i = 0; i != 2; ++i)
-              Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
-        else if (PS128)
-          for (unsigned i = 0; i != 4; ++i)
-            Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
-        else if (PS256)
-          for (unsigned l = 0; l != 8; l+=4)
-            for (unsigned i = 0; i != 4; ++i)
-              Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
-        else
-          llvm_unreachable("Unexpected function");
-
-        Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
-      } else {
-        llvm_unreachable("Unknown function for CallInst upgrade.");
-      }
-    }
-
-    CI->replaceAllUsesWith(Rep);
-    CI->eraseFromParent();
-    return;
-  }
-
-  std::string Name = CI->getName().str();
-  CI->setName(Name + ".old");
-
-  switch (NewFn->getIntrinsicID()) {
-  default:
-    llvm_unreachable("Unknown function for CallInst upgrade.");
-
-  case Intrinsic::ctlz:
-  case Intrinsic::cttz:
-    assert(CI->getNumArgOperands() == 1 &&
-           "Mismatch between function args and call args");
-    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
-                                               Builder.getFalse(), Name));
-    CI->eraseFromParent();
-    return;
-
-  case Intrinsic::arm_neon_vclz: {
-    // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
-    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
-                                               Builder.getFalse(),
-                                               "llvm.ctlz." + Name.substr(14)));
-    CI->eraseFromParent();
-    return;
-  }
-  case Intrinsic::ctpop: {
-    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
-    CI->eraseFromParent();
-    return;
-  }
-
-  case Intrinsic::x86_xop_vfrcz_ss:
-  case Intrinsic::x86_xop_vfrcz_sd:
-    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
-                                              Name));
-    CI->eraseFromParent();
-    return;
-
-  case Intrinsic::x86_sse41_ptestc:
-  case Intrinsic::x86_sse41_ptestz:
-  case Intrinsic::x86_sse41_ptestnzc: {
-    // The arguments for these intrinsics used to be v4f32, and changed
-    // to v2i64. This is purely a nop, since those are bitwise intrinsics.
-    // So, the only thing required is a bitcast for both arguments.
-    // First, check the arguments have the old type.
-    Value *Arg0 = CI->getArgOperand(0);
-    if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
-      return;
-
-    // Old intrinsic, add bitcasts
-    Value *Arg1 = CI->getArgOperand(1);
-
-    Value *BC0 =
-      Builder.CreateBitCast(Arg0,
-                            VectorType::get(Type::getInt64Ty(C), 2),
-                            "cast");
-    Value *BC1 =
-      Builder.CreateBitCast(Arg1,
-                            VectorType::get(Type::getInt64Ty(C), 2),
-                            "cast");
-
-    CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
-    CI->replaceAllUsesWith(NewCall);
-    CI->eraseFromParent();
-    return;
-  }
-  }
-}
-
-// This tests each Function to determine if it needs upgrading. When we find 
-// one we are interested in, we then upgrade all calls to reflect the new 
-// function.
-void llvm::UpgradeCallsToIntrinsic(Function* F) {
-  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
-
-  // Upgrade the function and check if it is a totaly new function.
-  Function *NewFn;
-  if (UpgradeIntrinsicFunction(F, NewFn)) {
-    if (NewFn != F) {
-      // Replace all uses to the old function with the new one if necessary.
-      for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
-           UI != UE; ) {
-        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
-          UpgradeIntrinsicCall(CI, NewFn);
-      }
-      // Remove old function, no longer used, from the module.
-      F->eraseFromParent();
-    }
-  }
-}
-
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
deleted file mode 100644
index d353b0adcff7..000000000000
--- a/lib/VMCore/BasicBlock.cpp
+++ /dev/null
@@ -1,371 +0,0 @@
-//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the BasicBlock class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/LeakDetector.h"
-#include "SymbolTableListTraitsImpl.h"
-#include <algorithm>
-using namespace llvm;
-
-ValueSymbolTable *BasicBlock::getValueSymbolTable() {
-  if (Function *F = getParent())
-    return &F->getValueSymbolTable();
-  return 0;
-}
-
-LLVMContext &BasicBlock::getContext() const {
-  return getType()->getContext();
-}
-
-// Explicit instantiation of SymbolTableListTraits since some of the methods
-// are not in the public header file...
-template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
-
-
-BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
-                       BasicBlock *InsertBefore)
-  : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
-
-  // Make sure that we get added to a function
-  LeakDetector::addGarbageObject(this);
-
-  if (InsertBefore) {
-    assert(NewParent &&
-           "Cannot insert block before another block with no function!");
-    NewParent->getBasicBlockList().insert(InsertBefore, this);
-  } else if (NewParent) {
-    NewParent->getBasicBlockList().push_back(this);
-  }
-
-  setName(Name);
-}
-
-
-BasicBlock::~BasicBlock() {
-  // If the address of the block is taken and it is being deleted (e.g. because
-  // it is dead), this means that there is either a dangling constant expr
-  // hanging off the block, or an undefined use of the block (source code
-  // expecting the address of a label to keep the block alive even though there
-  // is no indirect branch).  Handle these cases by zapping the BlockAddress
-  // nodes.  There are no other possible uses at this point.
-  if (hasAddressTaken()) {
-    assert(!use_empty() && "There should be at least one blockaddress!");
-    Constant *Replacement =
-      ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
-    while (!use_empty()) {
-      BlockAddress *BA = cast<BlockAddress>(use_back());
-      BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
-                                                       BA->getType()));
-      BA->destroyConstant();
-    }
-  }
-
-  assert(getParent() == 0 && "BasicBlock still linked into the program!");
-  dropAllReferences();
-  InstList.clear();
-}
-
-void BasicBlock::setParent(Function *parent) {
-  if (getParent())
-    LeakDetector::addGarbageObject(this);
-
-  // Set Parent=parent, updating instruction symtab entries as appropriate.
-  InstList.setSymTabObject(&Parent, parent);
-
-  if (getParent())
-    LeakDetector::removeGarbageObject(this);
-}
-
-void BasicBlock::removeFromParent() {
-  getParent()->getBasicBlockList().remove(this);
-}
-
-void BasicBlock::eraseFromParent() {
-  getParent()->getBasicBlockList().erase(this);
-}
-
-/// moveBefore - Unlink this basic block from its current function and
-/// insert it into the function that MovePos lives in, right before MovePos.
-void BasicBlock::moveBefore(BasicBlock *MovePos) {
-  MovePos->getParent()->getBasicBlockList().splice(MovePos,
-                       getParent()->getBasicBlockList(), this);
-}
-
-/// moveAfter - Unlink this basic block from its current function and
-/// insert it into the function that MovePos lives in, right after MovePos.
-void BasicBlock::moveAfter(BasicBlock *MovePos) {
-  Function::iterator I = MovePos;
-  MovePos->getParent()->getBasicBlockList().splice(++I,
-                                       getParent()->getBasicBlockList(), this);
-}
-
-
-TerminatorInst *BasicBlock::getTerminator() {
-  if (InstList.empty()) return 0;
-  return dyn_cast<TerminatorInst>(&InstList.back());
-}
-
-const TerminatorInst *BasicBlock::getTerminator() const {
-  if (InstList.empty()) return 0;
-  return dyn_cast<TerminatorInst>(&InstList.back());
-}
-
-Instruction* BasicBlock::getFirstNonPHI() {
-  BasicBlock::iterator i = begin();
-  // All valid basic blocks should have a terminator,
-  // which is not a PHINode. If we have an invalid basic
-  // block we'll get an assertion failure when dereferencing
-  // a past-the-end iterator.
-  while (isa<PHINode>(i)) ++i;
-  return &*i;
-}
-
-Instruction* BasicBlock::getFirstNonPHIOrDbg() {
-  BasicBlock::iterator i = begin();
-  // All valid basic blocks should have a terminator,
-  // which is not a PHINode. If we have an invalid basic
-  // block we'll get an assertion failure when dereferencing
-  // a past-the-end iterator.
-  while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
-  return &*i;
-}
-
-Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
-  // All valid basic blocks should have a terminator,
-  // which is not a PHINode. If we have an invalid basic
-  // block we'll get an assertion failure when dereferencing
-  // a past-the-end iterator.
-  BasicBlock::iterator i = begin();
-  for (;; ++i) {
-    if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
-      continue;
-
-    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
-    if (!II)
-      break;
-    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-        II->getIntrinsicID() != Intrinsic::lifetime_end)
-      break;
-  }
-  return &*i;
-}
-
-BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
-  iterator InsertPt = getFirstNonPHI();
-  if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
-  return InsertPt;
-}
-
-void BasicBlock::dropAllReferences() {
-  for(iterator I = begin(), E = end(); I != E; ++I)
-    I->dropAllReferences();
-}
-
-/// getSinglePredecessor - If this basic block has a single predecessor block,
-/// return the block, otherwise return a null pointer.
-BasicBlock *BasicBlock::getSinglePredecessor() {
-  pred_iterator PI = pred_begin(this), E = pred_end(this);
-  if (PI == E) return 0;         // No preds.
-  BasicBlock *ThePred = *PI;
-  ++PI;
-  return (PI == E) ? ThePred : 0 /*multiple preds*/;
-}
-
-/// getUniquePredecessor - If this basic block has a unique predecessor block,
-/// return the block, otherwise return a null pointer.
-/// Note that unique predecessor doesn't mean single edge, there can be
-/// multiple edges from the unique predecessor to this block (for example
-/// a switch statement with multiple cases having the same destination).
-BasicBlock *BasicBlock::getUniquePredecessor() {
-  pred_iterator PI = pred_begin(this), E = pred_end(this);
-  if (PI == E) return 0; // No preds.
-  BasicBlock *PredBB = *PI;
-  ++PI;
-  for (;PI != E; ++PI) {
-    if (*PI != PredBB)
-      return 0;
-    // The same predecessor appears multiple times in the predecessor list.
-    // This is OK.
-  }
-  return PredBB;
-}
-
-/// removePredecessor - This method is used to notify a BasicBlock that the
-/// specified Predecessor of the block is no longer able to reach it.  This is
-/// actually not used to update the Predecessor list, but is actually used to
-/// update the PHI nodes that reside in the block.  Note that this should be
-/// called while the predecessor still refers to this block.
-///
-void BasicBlock::removePredecessor(BasicBlock *Pred,
-                                   bool DontDeleteUselessPHIs) {
-  assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
-          find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
-         "removePredecessor: BB is not a predecessor!");
-
-  if (InstList.empty()) return;
-  PHINode *APN = dyn_cast<PHINode>(&front());
-  if (!APN) return;   // Quick exit.
-
-  // If there are exactly two predecessors, then we want to nuke the PHI nodes
-  // altogether.  However, we cannot do this, if this in this case:
-  //
-  //  Loop:
-  //    %x = phi [X, Loop]
-  //    %x2 = add %x, 1         ;; This would become %x2 = add %x2, 1
-  //    br Loop                 ;; %x2 does not dominate all uses
-  //
-  // This is because the PHI node input is actually taken from the predecessor
-  // basic block.  The only case this can happen is with a self loop, so we
-  // check for this case explicitly now.
-  //
-  unsigned max_idx = APN->getNumIncomingValues();
-  assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
-  if (max_idx == 2) {
-    BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
-
-    // Disable PHI elimination!
-    if (this == Other) max_idx = 3;
-  }
-
-  // <= Two predecessors BEFORE I remove one?
-  if (max_idx <= 2 && !DontDeleteUselessPHIs) {
-    // Yup, loop through and nuke the PHI nodes
-    while (PHINode *PN = dyn_cast<PHINode>(&front())) {
-      // Remove the predecessor first.
-      PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
-
-      // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
-      if (max_idx == 2) {
-        if (PN->getIncomingValue(0) != PN)
-          PN->replaceAllUsesWith(PN->getIncomingValue(0));
-        else
-          // We are left with an infinite loop with no entries: kill the PHI.
-          PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-        getInstList().pop_front();    // Remove the PHI node
-      }
-
-      // If the PHI node already only had one entry, it got deleted by
-      // removeIncomingValue.
-    }
-  } else {
-    // Okay, now we know that we need to remove predecessor #pred_idx from all
-    // PHI nodes.  Iterate over each PHI node fixing them up
-    PHINode *PN;
-    for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
-      ++II;
-      PN->removeIncomingValue(Pred, false);
-      // If all incoming values to the Phi are the same, we can replace the Phi
-      // with that value.
-      Value* PNV = 0;
-      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
-        if (PNV != PN) {
-          PN->replaceAllUsesWith(PNV);
-          PN->eraseFromParent();
-        }
-    }
-  }
-}
-
-
-/// splitBasicBlock - This splits a basic block into two at the specified
-/// instruction.  Note that all instructions BEFORE the specified iterator stay
-/// as part of the original basic block, an unconditional branch is added to
-/// the new BB, and the rest of the instructions in the BB are moved to the new
-/// BB, including the old terminator.  This invalidates the iterator.
-///
-/// Note that this only works on well formed basic blocks (must have a
-/// terminator), and 'I' must not be the end of instruction list (which would
-/// cause a degenerate basic block to be formed, having a terminator inside of
-/// the basic block).
-///
-BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
-  assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
-  assert(I != InstList.end() &&
-         "Trying to get me to create degenerate basic block!");
-
-  BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
-                               .getNodePtrUnchecked();
-  BasicBlock *New = BasicBlock::Create(getContext(), BBName,
-                                       getParent(), InsertBefore);
-
-  // Move all of the specified instructions from the original basic block into
-  // the new basic block.
-  New->getInstList().splice(New->end(), this->getInstList(), I, end());
-
-  // Add a branch instruction to the newly formed basic block.
-  BranchInst::Create(New, this);
-
-  // Now we must loop through all of the successors of the New block (which
-  // _were_ the successors of the 'this' block), and update any PHI nodes in
-  // successors.  If there were PHI nodes in the successors, then they need to
-  // know that incoming branches will be from New, not from Old.
-  //
-  for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
-    // Loop over any phi nodes in the basic block, updating the BB field of
-    // incoming values...
-    BasicBlock *Successor = *I;
-    PHINode *PN;
-    for (BasicBlock::iterator II = Successor->begin();
-         (PN = dyn_cast<PHINode>(II)); ++II) {
-      int IDX = PN->getBasicBlockIndex(this);
-      while (IDX != -1) {
-        PN->setIncomingBlock((unsigned)IDX, New);
-        IDX = PN->getBasicBlockIndex(this);
-      }
-    }
-  }
-  return New;
-}
-
-void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
-  TerminatorInst *TI = getTerminator();
-  if (!TI)
-    // Cope with being called on a BasicBlock that doesn't have a terminator
-    // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
-    return;
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    BasicBlock *Succ = TI->getSuccessor(i);
-    // N.B. Succ might not be a complete BasicBlock, so don't assume
-    // that it ends with a non-phi instruction.
-    for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
-      PHINode *PN = dyn_cast<PHINode>(II);
-      if (!PN)
-        break;
-      int i;
-      while ((i = PN->getBasicBlockIndex(this)) >= 0)
-        PN->setIncomingBlock(i, New);
-    }
-  }
-}
-
-/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's
-/// the destination of the 'unwind' edge of an invoke instruction.
-bool BasicBlock::isLandingPad() const {
-  return isa<LandingPadInst>(getFirstNonPHI());
-}
-
-/// getLandingPadInst() - Return the landingpad instruction associated with
-/// the landing pad.
-LandingPadInst *BasicBlock::getLandingPadInst() {
-  return dyn_cast<LandingPadInst>(getFirstNonPHI());
-}
-const LandingPadInst *BasicBlock::getLandingPadInst() const {
-  return dyn_cast<LandingPadInst>(getFirstNonPHI());
-}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
deleted file mode 100644
index 06eab0e8f026..000000000000
--- a/lib/VMCore/CMakeLists.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-add_llvm_library(LLVMCore
-  AsmWriter.cpp
-  Attributes.cpp
-  AutoUpgrade.cpp
-  BasicBlock.cpp
-  ConstantFold.cpp
-  Constants.cpp
-  Core.cpp
-  DataLayout.cpp
-  DebugInfo.cpp
-  DebugLoc.cpp
-  DIBuilder.cpp
-  Dominators.cpp
-  Function.cpp
-  GCOV.cpp
-  GVMaterializer.cpp
-  Globals.cpp
-  IRBuilder.cpp
-  InlineAsm.cpp
-  Instruction.cpp
-  Instructions.cpp
-  IntrinsicInst.cpp
-  LLVMContext.cpp
-  LLVMContextImpl.cpp
-  LeakDetector.cpp
-  Metadata.cpp
-  Module.cpp
-  Pass.cpp
-  PassManager.cpp
-  PassRegistry.cpp
-  PrintModulePass.cpp
-  Type.cpp
-  TypeFinder.cpp
-  TargetTransformInfo.cpp
-  Use.cpp
-  User.cpp
-  Value.cpp
-  ValueSymbolTable.cpp
-  ValueTypes.cpp
-  Verifier.cpp
-  )
-
-# Workaround: It takes over 20 minutes to compile with msvc10.
-# FIXME: Suppressing optimizations to core libraries would not be good thing.
-if( MSVC_VERSION LESS 1700 )
-set_property(
-  SOURCE Function.cpp
-  PROPERTY COMPILE_FLAGS "/Og-"
-  )
-endif()
-
-add_dependencies(LLVMCore intrinsics_gen)
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
deleted file mode 100644
index fe3edac42e76..000000000000
--- a/lib/VMCore/ConstantFold.cpp
+++ /dev/null
@@ -1,2066 +0,0 @@
-//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements folding of constants for LLVM.  This implements the
-// (internal) ConstantFold.h interface, which is used by the
-// ConstantExpr::get* methods to automatically fold constants when possible.
-//
-// The current constant folding implementation is implemented in two pieces: the
-// pieces that don't need DataLayout, and the pieces that do. This is to avoid
-// a dependence in VMCore on Target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ConstantFold.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Operator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
-#include <limits>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//                ConstantFold*Instruction Implementations
-//===----------------------------------------------------------------------===//
-
-/// BitCastConstantVector - Convert the specified vector Constant node to the
-/// specified vector type.  At this point, we know that the elements of the
-/// input vector constant are all simple integer or FP values.
-static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
-
-  if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
-  if (CV->isNullValue()) return Constant::getNullValue(DstTy);
-
-  // If this cast changes element count then we can't handle it here:
-  // doing so requires endianness information.  This should be handled by
-  // Analysis/ConstantFolding.cpp
-  unsigned NumElts = DstTy->getNumElements();
-  if (NumElts != CV->getType()->getVectorNumElements())
-    return 0;
-  
-  Type *DstEltTy = DstTy->getElementType();
-
-  SmallVector<Constant*, 16> Result;
-  Type *Ty = IntegerType::get(CV->getContext(), 32);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    Constant *C =
-      ConstantExpr::getExtractElement(CV, ConstantInt::get(Ty, i));
-    C = ConstantExpr::getBitCast(C, DstEltTy);
-    Result.push_back(C);
-  }
-
-  return ConstantVector::get(Result);
-}
-
-/// This function determines which opcode to use to fold two constant cast 
-/// expressions together. It uses CastInst::isEliminableCastPair to determine
-/// the opcode. Consequently its just a wrapper around that function.
-/// @brief Determine if it is valid to fold a cast of a cast
-static unsigned
-foldConstantCastPair(
-  unsigned opc,          ///< opcode of the second cast constant expression
-  ConstantExpr *Op,      ///< the first cast constant expression
-  Type *DstTy      ///< desintation type of the first cast
-) {
-  assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
-  assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
-  assert(CastInst::isCast(opc) && "Invalid cast opcode");
-
-  // The the types and opcodes for the two Cast constant expressions
-  Type *SrcTy = Op->getOperand(0)->getType();
-  Type *MidTy = Op->getType();
-  Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
-  Instruction::CastOps secondOp = Instruction::CastOps(opc);
-
-  // Assume that pointers are never more than 64 bits wide.
-  IntegerType *FakeIntPtrTy = Type::getInt64Ty(DstTy->getContext());
-
-  // Let CastInst::isEliminableCastPair do the heavy lifting.
-  return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
-                                        FakeIntPtrTy, FakeIntPtrTy,
-                                        FakeIntPtrTy);
-}
-
-static Constant *FoldBitCast(Constant *V, Type *DestTy) {
-  Type *SrcTy = V->getType();
-  if (SrcTy == DestTy)
-    return V; // no-op cast
-
-  // Check to see if we are casting a pointer to an aggregate to a pointer to
-  // the first element.  If so, return the appropriate GEP instruction.
-  if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
-    if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
-      if (PTy->getAddressSpace() == DPTy->getAddressSpace()
-          && DPTy->getElementType()->isSized()) {
-        SmallVector<Value*, 8> IdxList;
-        Value *Zero =
-          Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
-        IdxList.push_back(Zero);
-        Type *ElTy = PTy->getElementType();
-        while (ElTy != DPTy->getElementType()) {
-          if (StructType *STy = dyn_cast<StructType>(ElTy)) {
-            if (STy->getNumElements() == 0) break;
-            ElTy = STy->getElementType(0);
-            IdxList.push_back(Zero);
-          } else if (SequentialType *STy = 
-                     dyn_cast<SequentialType>(ElTy)) {
-            if (ElTy->isPointerTy()) break;  // Can't index into pointers!
-            ElTy = STy->getElementType();
-            IdxList.push_back(Zero);
-          } else {
-            break;
-          }
-        }
-
-        if (ElTy == DPTy->getElementType())
-          // This GEP is inbounds because all indices are zero.
-          return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
-      }
-
-  // Handle casts from one vector constant to another.  We know that the src 
-  // and dest type have the same size (otherwise its an illegal cast).
-  if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
-    if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
-      assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
-             "Not cast between same sized vectors!");
-      SrcTy = NULL;
-      // First, check for null.  Undef is already handled.
-      if (isa<ConstantAggregateZero>(V))
-        return Constant::getNullValue(DestTy);
-
-      // Handle ConstantVector and ConstantAggregateVector.
-      return BitCastConstantVector(V, DestPTy);
-    }
-
-    // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
-    // This allows for other simplifications (although some of them
-    // can only be handled by Analysis/ConstantFolding.cpp).
-    if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
-      return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
-  }
-
-  // Finally, implement bitcast folding now.   The code below doesn't handle
-  // bitcast right.
-  if (isa<ConstantPointerNull>(V))  // ptr->ptr cast.
-    return ConstantPointerNull::get(cast<PointerType>(DestTy));
-
-  // Handle integral constant input.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    if (DestTy->isIntegerTy())
-      // Integral -> Integral. This is a no-op because the bit widths must
-      // be the same. Consequently, we just fold to V.
-      return V;
-
-    if (DestTy->isFloatingPointTy())
-      return ConstantFP::get(DestTy->getContext(),
-                             APFloat(CI->getValue(),
-                                     !DestTy->isPPC_FP128Ty()));
-
-    // Otherwise, can't fold this (vector?)
-    return 0;
-  }
-
-  // Handle ConstantFP input: FP -> Integral.
-  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
-    return ConstantInt::get(FP->getContext(),
-                            FP->getValueAPF().bitcastToAPInt());
-
-  return 0;
-}
-
-
-/// ExtractConstantBytes - V is an integer constant which only has a subset of
-/// its bytes used.  The bytes used are indicated by ByteStart (which is the
-/// first byte used, counting from the least significant byte) and ByteSize,
-/// which is the number of bytes used.
-///
-/// This function analyzes the specified constant to see if the specified byte
-/// range can be returned as a simplified constant.  If so, the constant is
-/// returned, otherwise null is returned.
-/// 
-static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
-                                      unsigned ByteSize) {
-  assert(C->getType()->isIntegerTy() &&
-         (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
-         "Non-byte sized integer input");
-  unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
-  assert(ByteSize && "Must be accessing some piece");
-  assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
-  assert(ByteSize != CSize && "Should not extract everything");
-  
-  // Constant Integers are simple.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-    APInt V = CI->getValue();
-    if (ByteStart)
-      V = V.lshr(ByteStart*8);
-    V = V.trunc(ByteSize*8);
-    return ConstantInt::get(CI->getContext(), V);
-  }
-  
-  // In the input is a constant expr, we might be able to recursively simplify.
-  // If not, we definitely can't do anything.
-  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
-  if (CE == 0) return 0;
-  
-  switch (CE->getOpcode()) {
-  default: return 0;
-  case Instruction::Or: {
-    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
-    if (RHS == 0)
-      return 0;
-    
-    // X | -1 -> -1.
-    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
-      if (RHSC->isAllOnesValue())
-        return RHSC;
-    
-    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
-    if (LHS == 0)
-      return 0;
-    return ConstantExpr::getOr(LHS, RHS);
-  }
-  case Instruction::And: {
-    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
-    if (RHS == 0)
-      return 0;
-    
-    // X & 0 -> 0.
-    if (RHS->isNullValue())
-      return RHS;
-    
-    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
-    if (LHS == 0)
-      return 0;
-    return ConstantExpr::getAnd(LHS, RHS);
-  }
-  case Instruction::LShr: {
-    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
-    if (Amt == 0)
-      return 0;
-    unsigned ShAmt = Amt->getZExtValue();
-    // Cannot analyze non-byte shifts.
-    if ((ShAmt & 7) != 0)
-      return 0;
-    ShAmt >>= 3;
-    
-    // If the extract is known to be all zeros, return zero.
-    if (ByteStart >= CSize-ShAmt)
-      return Constant::getNullValue(IntegerType::get(CE->getContext(),
-                                                     ByteSize*8));
-    // If the extract is known to be fully in the input, extract it.
-    if (ByteStart+ByteSize+ShAmt <= CSize)
-      return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
-    
-    // TODO: Handle the 'partially zero' case.
-    return 0;
-  }
-    
-  case Instruction::Shl: {
-    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
-    if (Amt == 0)
-      return 0;
-    unsigned ShAmt = Amt->getZExtValue();
-    // Cannot analyze non-byte shifts.
-    if ((ShAmt & 7) != 0)
-      return 0;
-    ShAmt >>= 3;
-    
-    // If the extract is known to be all zeros, return zero.
-    if (ByteStart+ByteSize <= ShAmt)
-      return Constant::getNullValue(IntegerType::get(CE->getContext(),
-                                                     ByteSize*8));
-    // If the extract is known to be fully in the input, extract it.
-    if (ByteStart >= ShAmt)
-      return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
-    
-    // TODO: Handle the 'partially zero' case.
-    return 0;
-  }
-      
-  case Instruction::ZExt: {
-    unsigned SrcBitSize =
-      cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
-    
-    // If extracting something that is completely zero, return 0.
-    if (ByteStart*8 >= SrcBitSize)
-      return Constant::getNullValue(IntegerType::get(CE->getContext(),
-                                                     ByteSize*8));
-
-    // If exactly extracting the input, return it.
-    if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
-      return CE->getOperand(0);
-    
-    // If extracting something completely in the input, if if the input is a
-    // multiple of 8 bits, recurse.
-    if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
-      return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
-      
-    // Otherwise, if extracting a subset of the input, which is not multiple of
-    // 8 bits, do a shift and trunc to get the bits.
-    if ((ByteStart+ByteSize)*8 < SrcBitSize) {
-      assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
-      Constant *Res = CE->getOperand(0);
-      if (ByteStart)
-        Res = ConstantExpr::getLShr(Res, 
-                                 ConstantInt::get(Res->getType(), ByteStart*8));
-      return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
-                                                          ByteSize*8));
-    }
-    
-    // TODO: Handle the 'partially zero' case.
-    return 0;
-  }
-  }
-}
-
-/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
-/// on Ty, with any known factors factored out. If Folded is false,
-/// return null if no factoring was possible, to avoid endlessly
-/// bouncing an unfoldable expression back into the top-level folder.
-///
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
-                                 bool Folded) {
-  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
-    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
-    return ConstantExpr::getNUWMul(E, N);
-  }
-
-  if (StructType *STy = dyn_cast<StructType>(Ty))
-    if (!STy->isPacked()) {
-      unsigned NumElems = STy->getNumElements();
-      // An empty struct has size zero.
-      if (NumElems == 0)
-        return ConstantExpr::getNullValue(DestTy);
-      // Check for a struct with all members having the same size.
-      Constant *MemberSize =
-        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
-      bool AllSame = true;
-      for (unsigned i = 1; i != NumElems; ++i)
-        if (MemberSize !=
-            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
-          AllSame = false;
-          break;
-        }
-      if (AllSame) {
-        Constant *N = ConstantInt::get(DestTy, NumElems);
-        return ConstantExpr::getNUWMul(MemberSize, N);
-      }
-    }
-
-  // Pointer size doesn't depend on the pointee type, so canonicalize them
-  // to an arbitrary pointee.
-  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
-    if (!PTy->getElementType()->isIntegerTy(1))
-      return
-        getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
-                                         PTy->getAddressSpace()),
-                        DestTy, true);
-
-  // If there's no interesting folding happening, bail so that we don't create
-  // a constant that looks like it needs folding but really doesn't.
-  if (!Folded)
-    return 0;
-
-  // Base case: Get a regular sizeof expression.
-  Constant *C = ConstantExpr::getSizeOf(Ty);
-  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
-                                                    DestTy, false),
-                            C, DestTy);
-  return C;
-}
-
-/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
-/// on Ty, with any known factors factored out. If Folded is false,
-/// return null if no factoring was possible, to avoid endlessly
-/// bouncing an unfoldable expression back into the top-level folder.
-///
-static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
-                                  bool Folded) {
-  // The alignment of an array is equal to the alignment of the
-  // array element. Note that this is not always true for vectors.
-  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
-    C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
-                                                      DestTy,
-                                                      false),
-                              C, DestTy);
-    return C;
-  }
-
-  if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    // Packed structs always have an alignment of 1.
-    if (STy->isPacked())
-      return ConstantInt::get(DestTy, 1);
-
-    // Otherwise, struct alignment is the maximum alignment of any member.
-    // Without target data, we can't compare much, but we can check to see
-    // if all the members have the same alignment.
-    unsigned NumElems = STy->getNumElements();
-    // An empty struct has minimal alignment.
-    if (NumElems == 0)
-      return ConstantInt::get(DestTy, 1);
-    // Check for a struct with all members having the same alignment.
-    Constant *MemberAlign =
-      getFoldedAlignOf(STy->getElementType(0), DestTy, true);
-    bool AllSame = true;
-    for (unsigned i = 1; i != NumElems; ++i)
-      if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
-        AllSame = false;
-        break;
-      }
-    if (AllSame)
-      return MemberAlign;
-  }
-
-  // Pointer alignment doesn't depend on the pointee type, so canonicalize them
-  // to an arbitrary pointee.
-  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
-    if (!PTy->getElementType()->isIntegerTy(1))
-      return
-        getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
-                                                           1),
-                                          PTy->getAddressSpace()),
-                         DestTy, true);
-
-  // If there's no interesting folding happening, bail so that we don't create
-  // a constant that looks like it needs folding but really doesn't.
-  if (!Folded)
-    return 0;
-
-  // Base case: Get a regular alignof expression.
-  Constant *C = ConstantExpr::getAlignOf(Ty);
-  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
-                                                    DestTy, false),
-                            C, DestTy);
-  return C;
-}
-
-/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
-/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
-/// return null if no factoring was possible, to avoid endlessly
-/// bouncing an unfoldable expression back into the top-level folder.
-///
-static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
-                                   Type *DestTy,
-                                   bool Folded) {
-  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
-                                                                DestTy, false),
-                                        FieldNo, DestTy);
-    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
-    return ConstantExpr::getNUWMul(E, N);
-  }
-
-  if (StructType *STy = dyn_cast<StructType>(Ty))
-    if (!STy->isPacked()) {
-      unsigned NumElems = STy->getNumElements();
-      // An empty struct has no members.
-      if (NumElems == 0)
-        return 0;
-      // Check for a struct with all members having the same size.
-      Constant *MemberSize =
-        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
-      bool AllSame = true;
-      for (unsigned i = 1; i != NumElems; ++i)
-        if (MemberSize !=
-            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
-          AllSame = false;
-          break;
-        }
-      if (AllSame) {
-        Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
-                                                                    false,
-                                                                    DestTy,
-                                                                    false),
-                                            FieldNo, DestTy);
-        return ConstantExpr::getNUWMul(MemberSize, N);
-      }
-    }
-
-  // If there's no interesting folding happening, bail so that we don't create
-  // a constant that looks like it needs folding but really doesn't.
-  if (!Folded)
-    return 0;
-
-  // Base case: Get a regular offsetof expression.
-  Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
-  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
-                                                    DestTy, false),
-                            C, DestTy);
-  return C;
-}
-
-Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
-                                            Type *DestTy) {
-  if (isa<UndefValue>(V)) {
-    // zext(undef) = 0, because the top bits will be zero.
-    // sext(undef) = 0, because the top bits will all be the same.
-    // [us]itofp(undef) = 0, because the result value is bounded.
-    if (opc == Instruction::ZExt || opc == Instruction::SExt ||
-        opc == Instruction::UIToFP || opc == Instruction::SIToFP)
-      return Constant::getNullValue(DestTy);
-    return UndefValue::get(DestTy);
-  }
-
-  if (V->isNullValue() && !DestTy->isX86_MMXTy())
-    return Constant::getNullValue(DestTy);
-
-  // If the cast operand is a constant expression, there's a few things we can
-  // do to try to simplify it.
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->isCast()) {
-      // Try hard to fold cast of cast because they are often eliminable.
-      if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
-        return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
-    } else if (CE->getOpcode() == Instruction::GetElementPtr) {
-      // If all of the indexes in the GEP are null values, there is no pointer
-      // adjustment going on.  We might as well cast the source pointer.
-      bool isAllNull = true;
-      for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
-        if (!CE->getOperand(i)->isNullValue()) {
-          isAllNull = false;
-          break;
-        }
-      if (isAllNull)
-        // This is casting one pointer type to another, always BitCast
-        return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
-    }
-  }
-
-  // If the cast operand is a constant vector, perform the cast by
-  // operating on each element. In the cast of bitcasts, the element
-  // count may be mismatched; don't attempt to handle that here.
-  if ((isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) &&
-      DestTy->isVectorTy() &&
-      DestTy->getVectorNumElements() == V->getType()->getVectorNumElements()) {
-    SmallVector<Constant*, 16> res;
-    VectorType *DestVecTy = cast<VectorType>(DestTy);
-    Type *DstEltTy = DestVecTy->getElementType();
-    Type *Ty = IntegerType::get(V->getContext(), 32);
-    for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i) {
-      Constant *C =
-        ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
-      res.push_back(ConstantExpr::getCast(opc, C, DstEltTy));
-    }
-    return ConstantVector::get(res);
-  }
-
-  // We actually have to do a cast now. Perform the cast according to the
-  // opcode specified.
-  switch (opc) {
-  default:
-    llvm_unreachable("Failed to cast constant expression");
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
-      bool ignored;
-      APFloat Val = FPC->getValueAPF();
-      Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf :
-                  DestTy->isFloatTy() ? APFloat::IEEEsingle :
-                  DestTy->isDoubleTy() ? APFloat::IEEEdouble :
-                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
-                  DestTy->isFP128Ty() ? APFloat::IEEEquad :
-                  DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble :
-                  APFloat::Bogus,
-                  APFloat::rmNearestTiesToEven, &ignored);
-      return ConstantFP::get(V->getContext(), Val);
-    }
-    return 0; // Can't fold.
-  case Instruction::FPToUI: 
-  case Instruction::FPToSI:
-    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
-      const APFloat &V = FPC->getValueAPF();
-      bool ignored;
-      uint64_t x[2]; 
-      uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
-      (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
-                                APFloat::rmTowardZero, &ignored);
-      APInt Val(DestBitWidth, x);
-      return ConstantInt::get(FPC->getContext(), Val);
-    }
-    return 0; // Can't fold.
-  case Instruction::IntToPtr:   //always treated as unsigned
-    if (V->isNullValue())       // Is it an integral null value?
-      return ConstantPointerNull::get(cast<PointerType>(DestTy));
-    return 0;                   // Other pointer types cannot be casted
-  case Instruction::PtrToInt:   // always treated as unsigned
-    // Is it a null pointer value?
-    if (V->isNullValue())
-      return ConstantInt::get(DestTy, 0);
-    // If this is a sizeof-like expression, pull out multiplications by
-    // known factors to expose them to subsequent folding. If it's an
-    // alignof-like expression, factor out known factors.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      if (CE->getOpcode() == Instruction::GetElementPtr &&
-          CE->getOperand(0)->isNullValue()) {
-        Type *Ty =
-          cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
-        if (CE->getNumOperands() == 2) {
-          // Handle a sizeof-like expression.
-          Constant *Idx = CE->getOperand(1);
-          bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
-          if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
-            Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
-                                                                DestTy, false),
-                                        Idx, DestTy);
-            return ConstantExpr::getMul(C, Idx);
-          }
-        } else if (CE->getNumOperands() == 3 &&
-                   CE->getOperand(1)->isNullValue()) {
-          // Handle an alignof-like expression.
-          if (StructType *STy = dyn_cast<StructType>(Ty))
-            if (!STy->isPacked()) {
-              ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
-              if (CI->isOne() &&
-                  STy->getNumElements() == 2 &&
-                  STy->getElementType(0)->isIntegerTy(1)) {
-                return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
-              }
-            }
-          // Handle an offsetof-like expression.
-          if (Ty->isStructTy() || Ty->isArrayTy()) {
-            if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
-                                                DestTy, false))
-              return C;
-          }
-        }
-      }
-    // Other pointer types cannot be casted
-    return 0;
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      APInt api = CI->getValue();
-      APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()),
-                  !DestTy->isPPC_FP128Ty() /* isEEEE */);
-      (void)apf.convertFromAPInt(api, 
-                                 opc==Instruction::SIToFP,
-                                 APFloat::rmNearestTiesToEven);
-      return ConstantFP::get(V->getContext(), apf);
-    }
-    return 0;
-  case Instruction::ZExt:
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
-      return ConstantInt::get(V->getContext(),
-                              CI->getValue().zext(BitWidth));
-    }
-    return 0;
-  case Instruction::SExt:
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
-      return ConstantInt::get(V->getContext(),
-                              CI->getValue().sext(BitWidth));
-    }
-    return 0;
-  case Instruction::Trunc: {
-    uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      return ConstantInt::get(V->getContext(),
-                              CI->getValue().trunc(DestBitWidth));
-    }
-    
-    // The input must be a constantexpr.  See if we can simplify this based on
-    // the bytes we are demanding.  Only do this if the source and dest are an
-    // even multiple of a byte.
-    if ((DestBitWidth & 7) == 0 &&
-        (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0)
-      if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
-        return Res;
-      
-    return 0;
-  }
-  case Instruction::BitCast:
-    return FoldBitCast(V, DestTy);
-  }
-}
-
-Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
-                                              Constant *V1, Constant *V2) {
-  // Check for i1 and vector true/false conditions.
-  if (Cond->isNullValue()) return V2;
-  if (Cond->isAllOnesValue()) return V1;
-
-  // If the condition is a vector constant, fold the result elementwise.
-  if (ConstantVector *CondV = dyn_cast<ConstantVector>(Cond)) {
-    SmallVector<Constant*, 16> Result;
-    Type *Ty = IntegerType::get(CondV->getContext(), 32);
-    for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){
-      ConstantInt *Cond = dyn_cast<ConstantInt>(CondV->getOperand(i));
-      if (Cond == 0) break;
-      
-      Constant *V = Cond->isNullValue() ? V2 : V1;
-      Constant *Res = ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i));
-      Result.push_back(Res);
-    }
-    
-    // If we were able to build the vector, return it.
-    if (Result.size() == V1->getType()->getVectorNumElements())
-      return ConstantVector::get(Result);
-  }
-
-  if (isa<UndefValue>(Cond)) {
-    if (isa<UndefValue>(V1)) return V1;
-    return V2;
-  }
-  if (isa<UndefValue>(V1)) return V2;
-  if (isa<UndefValue>(V2)) return V1;
-  if (V1 == V2) return V1;
-
-  if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
-    if (TrueVal->getOpcode() == Instruction::Select)
-      if (TrueVal->getOperand(0) == Cond)
-        return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
-  }
-  if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
-    if (FalseVal->getOpcode() == Instruction::Select)
-      if (FalseVal->getOperand(0) == Cond)
-        return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
-  }
-
-  return 0;
-}
-
-Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
-                                                      Constant *Idx) {
-  if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
-    return UndefValue::get(Val->getType()->getVectorElementType());
-  if (Val->isNullValue())  // ee(zero, x) -> zero
-    return Constant::getNullValue(Val->getType()->getVectorElementType());
-  // ee({w,x,y,z}, undef) -> undef
-  if (isa<UndefValue>(Idx))
-    return UndefValue::get(Val->getType()->getVectorElementType());
-
-  if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
-    uint64_t Index = CIdx->getZExtValue();
-    // ee({w,x,y,z}, wrong_value) -> undef
-    if (Index >= Val->getType()->getVectorNumElements())
-      return UndefValue::get(Val->getType()->getVectorElementType());
-    return Val->getAggregateElement(Index);
-  }
-  return 0;
-}
-
-Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
-                                                     Constant *Elt,
-                                                     Constant *Idx) {
-  ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
-  if (!CIdx) return 0;
-  const APInt &IdxVal = CIdx->getValue();
-  
-  SmallVector<Constant*, 16> Result;
-  Type *Ty = IntegerType::get(Val->getContext(), 32);
-  for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){
-    if (i == IdxVal) {
-      Result.push_back(Elt);
-      continue;
-    }
-    
-    Constant *C =
-      ConstantExpr::getExtractElement(Val, ConstantInt::get(Ty, i));
-    Result.push_back(C);
-  }
-  
-  return ConstantVector::get(Result);
-}
-
-Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
-                                                     Constant *V2,
-                                                     Constant *Mask) {
-  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
-  Type *EltTy = V1->getType()->getVectorElementType();
-
-  // Undefined shuffle mask -> undefined value.
-  if (isa<UndefValue>(Mask))
-    return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
-
-  // Don't break the bitcode reader hack.
-  if (isa<ConstantExpr>(Mask)) return 0;
-  
-  unsigned SrcNumElts = V1->getType()->getVectorNumElements();
-
-  // Loop over the shuffle mask, evaluating each element.
-  SmallVector<Constant*, 32> Result;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    int Elt = ShuffleVectorInst::getMaskValue(Mask, i);
-    if (Elt == -1) {
-      Result.push_back(UndefValue::get(EltTy));
-      continue;
-    }
-    Constant *InElt;
-    if (unsigned(Elt) >= SrcNumElts*2)
-      InElt = UndefValue::get(EltTy);
-    else if (unsigned(Elt) >= SrcNumElts) {
-      Type *Ty = IntegerType::get(V2->getContext(), 32);
-      InElt =
-        ConstantExpr::getExtractElement(V2,
-                                        ConstantInt::get(Ty, Elt - SrcNumElts));
-    } else {
-      Type *Ty = IntegerType::get(V1->getContext(), 32);
-      InElt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, Elt));
-    }
-    Result.push_back(InElt);
-  }
-
-  return ConstantVector::get(Result);
-}
-
-Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
-                                                    ArrayRef<unsigned> Idxs) {
-  // Base case: no indices, so return the entire value.
-  if (Idxs.empty())
-    return Agg;
-
-  if (Constant *C = Agg->getAggregateElement(Idxs[0]))
-    return ConstantFoldExtractValueInstruction(C, Idxs.slice(1));
-
-  return 0;
-}
-
-Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
-                                                   Constant *Val,
-                                                   ArrayRef<unsigned> Idxs) {
-  // Base case: no indices, so replace the entire value.
-  if (Idxs.empty())
-    return Val;
-
-  unsigned NumElts;
-  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
-    NumElts = ST->getNumElements();
-  else if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
-    NumElts = AT->getNumElements();
-  else
-    NumElts = AT->getVectorNumElements();
-  
-  SmallVector<Constant*, 32> Result;
-  for (unsigned i = 0; i != NumElts; ++i) {
-    Constant *C = Agg->getAggregateElement(i);
-    if (C == 0) return 0;
-    
-    if (Idxs[0] == i)
-      C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
-    
-    Result.push_back(C);
-  }
-  
-  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
-    return ConstantStruct::get(ST, Result);
-  if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
-    return ConstantArray::get(AT, Result);
-  return ConstantVector::get(Result);
-}
-
-
-Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
-                                              Constant *C1, Constant *C2) {
-  // Handle UndefValue up front.
-  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
-    switch (Opcode) {
-    case Instruction::Xor:
-      if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
-        // Handle undef ^ undef -> 0 special case. This is a common
-        // idiom (misuse).
-        return Constant::getNullValue(C1->getType());
-      // Fallthrough
-    case Instruction::Add:
-    case Instruction::Sub:
-      return UndefValue::get(C1->getType());
-    case Instruction::And:
-      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
-        return C1;
-      return Constant::getNullValue(C1->getType());   // undef & X -> 0
-    case Instruction::Mul: {
-      ConstantInt *CI;
-      // X * undef -> undef   if X is odd or undef
-      if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
-          ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
-          (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
-        return UndefValue::get(C1->getType());
-
-      // X * undef -> 0       otherwise
-      return Constant::getNullValue(C1->getType());
-    }
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-      // undef / 1 -> undef
-      if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
-        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
-          if (CI2->isOne())
-            return C1;
-      // FALL THROUGH
-    case Instruction::URem:
-    case Instruction::SRem:
-      if (!isa<UndefValue>(C2))                    // undef / X -> 0
-        return Constant::getNullValue(C1->getType());
-      return C2;                                   // X / undef -> undef
-    case Instruction::Or:                          // X | undef -> -1
-      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
-        return C1;
-      return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
-    case Instruction::LShr:
-      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
-        return C1;                                  // undef lshr undef -> undef
-      return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
-                                                    // undef lshr X -> 0
-    case Instruction::AShr:
-      if (!isa<UndefValue>(C2))                     // undef ashr X --> all ones
-        return Constant::getAllOnesValue(C1->getType());
-      else if (isa<UndefValue>(C1)) 
-        return C1;                                  // undef ashr undef -> undef
-      else
-        return C1;                                  // X ashr undef --> X
-    case Instruction::Shl:
-      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
-        return C1;                                  // undef shl undef -> undef
-      // undef << X -> 0   or   X << undef -> 0
-      return Constant::getNullValue(C1->getType());
-    }
-  }
-
-  // Handle simplifications when the RHS is a constant int.
-  if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
-    switch (Opcode) {
-    case Instruction::Add:
-      if (CI2->equalsInt(0)) return C1;                         // X + 0 == X
-      break;
-    case Instruction::Sub:
-      if (CI2->equalsInt(0)) return C1;                         // X - 0 == X
-      break;
-    case Instruction::Mul:
-      if (CI2->equalsInt(0)) return C2;                         // X * 0 == 0
-      if (CI2->equalsInt(1))
-        return C1;                                              // X * 1 == X
-      break;
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-      if (CI2->equalsInt(1))
-        return C1;                                            // X / 1 == X
-      if (CI2->equalsInt(0))
-        return UndefValue::get(CI2->getType());               // X / 0 == undef
-      break;
-    case Instruction::URem:
-    case Instruction::SRem:
-      if (CI2->equalsInt(1))
-        return Constant::getNullValue(CI2->getType());        // X % 1 == 0
-      if (CI2->equalsInt(0))
-        return UndefValue::get(CI2->getType());               // X % 0 == undef
-      break;
-    case Instruction::And:
-      if (CI2->isZero()) return C2;                           // X & 0 == 0
-      if (CI2->isAllOnesValue())
-        return C1;                                            // X & -1 == X
-
-      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-        // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
-        if (CE1->getOpcode() == Instruction::ZExt) {
-          unsigned DstWidth = CI2->getType()->getBitWidth();
-          unsigned SrcWidth =
-            CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
-          APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
-          if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
-            return C1;
-        }
-
-        // If and'ing the address of a global with a constant, fold it.
-        if (CE1->getOpcode() == Instruction::PtrToInt && 
-            isa<GlobalValue>(CE1->getOperand(0))) {
-          GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
-
-          // Functions are at least 4-byte aligned.
-          unsigned GVAlign = GV->getAlignment();
-          if (isa<Function>(GV))
-            GVAlign = std::max(GVAlign, 4U);
-
-          if (GVAlign > 1) {
-            unsigned DstWidth = CI2->getType()->getBitWidth();
-            unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
-            APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
-
-            // If checking bits we know are clear, return zero.
-            if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
-              return Constant::getNullValue(CI2->getType());
-          }
-        }
-      }
-      break;
-    case Instruction::Or:
-      if (CI2->equalsInt(0)) return C1;    // X | 0 == X
-      if (CI2->isAllOnesValue())
-        return C2;                         // X | -1 == -1
-      break;
-    case Instruction::Xor:
-      if (CI2->equalsInt(0)) return C1;    // X ^ 0 == X
-
-      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-        switch (CE1->getOpcode()) {
-        default: break;
-        case Instruction::ICmp:
-        case Instruction::FCmp:
-          // cmp pred ^ true -> cmp !pred
-          assert(CI2->equalsInt(1));
-          CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
-          pred = CmpInst::getInversePredicate(pred);
-          return ConstantExpr::getCompare(pred, CE1->getOperand(0),
-                                          CE1->getOperand(1));
-        }
-      }
-      break;
-    case Instruction::AShr:
-      // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
-      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
-        if (CE1->getOpcode() == Instruction::ZExt)  // Top bits known zero.
-          return ConstantExpr::getLShr(C1, C2);
-      break;
-    }
-  } else if (isa<ConstantInt>(C1)) {
-    // If C1 is a ConstantInt and C2 is not, swap the operands.
-    if (Instruction::isCommutative(Opcode))
-      return ConstantExpr::get(Opcode, C2, C1);
-  }
-
-  // At this point we know neither constant is an UndefValue.
-  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
-    if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
-      const APInt &C1V = CI1->getValue();
-      const APInt &C2V = CI2->getValue();
-      switch (Opcode) {
-      default:
-        break;
-      case Instruction::Add:     
-        return ConstantInt::get(CI1->getContext(), C1V + C2V);
-      case Instruction::Sub:     
-        return ConstantInt::get(CI1->getContext(), C1V - C2V);
-      case Instruction::Mul:     
-        return ConstantInt::get(CI1->getContext(), C1V * C2V);
-      case Instruction::UDiv:
-        assert(!CI2->isNullValue() && "Div by zero handled above");
-        return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
-      case Instruction::SDiv:
-        assert(!CI2->isNullValue() && "Div by zero handled above");
-        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
-          return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
-        return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
-      case Instruction::URem:
-        assert(!CI2->isNullValue() && "Div by zero handled above");
-        return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
-      case Instruction::SRem:
-        assert(!CI2->isNullValue() && "Div by zero handled above");
-        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
-          return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
-        return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
-      case Instruction::And:
-        return ConstantInt::get(CI1->getContext(), C1V & C2V);
-      case Instruction::Or:
-        return ConstantInt::get(CI1->getContext(), C1V | C2V);
-      case Instruction::Xor:
-        return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
-      case Instruction::Shl: {
-        uint32_t shiftAmt = C2V.getZExtValue();
-        if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
-        else
-          return UndefValue::get(C1->getType()); // too big shift is undef
-      }
-      case Instruction::LShr: {
-        uint32_t shiftAmt = C2V.getZExtValue();
-        if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
-        else
-          return UndefValue::get(C1->getType()); // too big shift is undef
-      }
-      case Instruction::AShr: {
-        uint32_t shiftAmt = C2V.getZExtValue();
-        if (shiftAmt < C1V.getBitWidth())
-          return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
-        else
-          return UndefValue::get(C1->getType()); // too big shift is undef
-      }
-      }
-    }
-
-    switch (Opcode) {
-    case Instruction::SDiv:
-    case Instruction::UDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::Shl:
-      if (CI1->equalsInt(0)) return C1;
-      break;
-    default:
-      break;
-    }
-  } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
-    if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
-      APFloat C1V = CFP1->getValueAPF();
-      APFloat C2V = CFP2->getValueAPF();
-      APFloat C3V = C1V;  // copy for modification
-      switch (Opcode) {
-      default:                   
-        break;
-      case Instruction::FAdd:
-        (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C1->getContext(), C3V);
-      case Instruction::FSub:
-        (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C1->getContext(), C3V);
-      case Instruction::FMul:
-        (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C1->getContext(), C3V);
-      case Instruction::FDiv:
-        (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C1->getContext(), C3V);
-      case Instruction::FRem:
-        (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
-        return ConstantFP::get(C1->getContext(), C3V);
-      }
-    }
-  } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
-    // Perform elementwise folding.
-    SmallVector<Constant*, 16> Result;
-    Type *Ty = IntegerType::get(VTy->getContext(), 32);
-    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *LHS =
-        ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
-      Constant *RHS =
-        ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
-      
-      Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
-    }
-    
-    return ConstantVector::get(Result);
-  }
-
-  if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-    // There are many possible foldings we could do here.  We should probably
-    // at least fold add of a pointer with an integer into the appropriate
-    // getelementptr.  This will improve alias analysis a bit.
-
-    // Given ((a + b) + c), if (b + c) folds to something interesting, return
-    // (a + (b + c)).
-    if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
-      Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
-      if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
-        return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
-    }
-  } else if (isa<ConstantExpr>(C2)) {
-    // If C2 is a constant expr and C1 isn't, flop them around and fold the
-    // other way if possible.
-    if (Instruction::isCommutative(Opcode))
-      return ConstantFoldBinaryInstruction(Opcode, C2, C1);
-  }
-
-  // i1 can be simplified in many cases.
-  if (C1->getType()->isIntegerTy(1)) {
-    switch (Opcode) {
-    case Instruction::Add:
-    case Instruction::Sub:
-      return ConstantExpr::getXor(C1, C2);
-    case Instruction::Mul:
-      return ConstantExpr::getAnd(C1, C2);
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-      // We can assume that C2 == 0.  If it were one the result would be
-      // undefined because the shift value is as large as the bitwidth.
-      return C1;
-    case Instruction::SDiv:
-    case Instruction::UDiv:
-      // We can assume that C2 == 1.  If it were zero the result would be
-      // undefined through division by zero.
-      return C1;
-    case Instruction::URem:
-    case Instruction::SRem:
-      // We can assume that C2 == 1.  If it were zero the result would be
-      // undefined through division by zero.
-      return ConstantInt::getFalse(C1->getContext());
-    default:
-      break;
-    }
-  }
-
-  // We don't know how to fold this.
-  return 0;
-}
-
-/// isZeroSizedType - This type is zero sized if its an array or structure of
-/// zero sized types.  The only leaf zero sized type is an empty structure.
-static bool isMaybeZeroSizedType(Type *Ty) {
-  if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    if (STy->isOpaque()) return true;  // Can't say.
-
-    // If all of elements have zero size, this does too.
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-      if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
-    return true;
-
-  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    return isMaybeZeroSizedType(ATy->getElementType());
-  }
-  return false;
-}
-
-/// IdxCompare - Compare the two constants as though they were getelementptr
-/// indices.  This allows coersion of the types to be the same thing.
-///
-/// If the two constants are the "same" (after coersion), return 0.  If the
-/// first is less than the second, return -1, if the second is less than the
-/// first, return 1.  If the constants are not integral, return -2.
-///
-static int IdxCompare(Constant *C1, Constant *C2, Type *ElTy) {
-  if (C1 == C2) return 0;
-
-  // Ok, we found a different index.  If they are not ConstantInt, we can't do
-  // anything with them.
-  if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
-    return -2; // don't know!
-
-  // Ok, we have two differing integer indices.  Sign extend them to be the same
-  // type.  Long is always big enough, so we use it.
-  if (!C1->getType()->isIntegerTy(64))
-    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
-
-  if (!C2->getType()->isIntegerTy(64))
-    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
-
-  if (C1 == C2) return 0;  // They are equal
-
-  // If the type being indexed over is really just a zero sized type, there is
-  // no pointer difference being made here.
-  if (isMaybeZeroSizedType(ElTy))
-    return -2; // dunno.
-
-  // If they are really different, now that they are the same type, then we
-  // found a difference!
-  if (cast<ConstantInt>(C1)->getSExtValue() < 
-      cast<ConstantInt>(C2)->getSExtValue())
-    return -1;
-  else
-    return 1;
-}
-
-/// evaluateFCmpRelation - This function determines if there is anything we can
-/// decide about the two constants provided.  This doesn't need to handle simple
-/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
-/// If we can determine that the two constants have a particular relation to 
-/// each other, we should return the corresponding FCmpInst predicate, 
-/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
-/// ConstantFoldCompareInstruction.
-///
-/// To simplify this code we canonicalize the relation so that the first
-/// operand is always the most "complex" of the two.  We consider ConstantFP
-/// to be the simplest, and ConstantExprs to be the most complex.
-static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
-  assert(V1->getType() == V2->getType() &&
-         "Cannot compare values of different types!");
-
-  // Handle degenerate case quickly
-  if (V1 == V2) return FCmpInst::FCMP_OEQ;
-
-  if (!isa<ConstantExpr>(V1)) {
-    if (!isa<ConstantExpr>(V2)) {
-      // We distilled thisUse the standard constant folder for a few cases
-      ConstantInt *R = 0;
-      R = dyn_cast<ConstantInt>(
-                      ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
-      if (R && !R->isZero()) 
-        return FCmpInst::FCMP_OEQ;
-      R = dyn_cast<ConstantInt>(
-                      ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
-      if (R && !R->isZero()) 
-        return FCmpInst::FCMP_OLT;
-      R = dyn_cast<ConstantInt>(
-                      ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
-      if (R && !R->isZero()) 
-        return FCmpInst::FCMP_OGT;
-
-      // Nothing more we can do
-      return FCmpInst::BAD_FCMP_PREDICATE;
-    }
-
-    // If the first operand is simple and second is ConstantExpr, swap operands.
-    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
-    if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
-      return FCmpInst::getSwappedPredicate(SwappedRelation);
-  } else {
-    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
-    // constantexpr or a simple constant.
-    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
-    switch (CE1->getOpcode()) {
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-      // We might be able to do something with these but we don't right now.
-      break;
-    default:
-      break;
-    }
-  }
-  // There are MANY other foldings that we could perform here.  They will
-  // probably be added on demand, as they seem needed.
-  return FCmpInst::BAD_FCMP_PREDICATE;
-}
-
-/// evaluateICmpRelation - This function determines if there is anything we can
-/// decide about the two constants provided.  This doesn't need to handle simple
-/// things like integer comparisons, but should instead handle ConstantExprs
-/// and GlobalValues.  If we can determine that the two constants have a
-/// particular relation to each other, we should return the corresponding ICmp
-/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
-///
-/// To simplify this code we canonicalize the relation so that the first
-/// operand is always the most "complex" of the two.  We consider simple
-/// constants (like ConstantInt) to be the simplest, followed by
-/// GlobalValues, followed by ConstantExpr's (the most complex).
-///
-static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
-                                                bool isSigned) {
-  assert(V1->getType() == V2->getType() &&
-         "Cannot compare different types of values!");
-  if (V1 == V2) return ICmpInst::ICMP_EQ;
-
-  if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
-      !isa<BlockAddress>(V1)) {
-    if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
-        !isa<BlockAddress>(V2)) {
-      // We distilled this down to a simple case, use the standard constant
-      // folder.
-      ConstantInt *R = 0;
-      ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
-      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
-      if (R && !R->isZero()) 
-        return pred;
-      pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
-      if (R && !R->isZero())
-        return pred;
-      pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
-      if (R && !R->isZero())
-        return pred;
-
-      // If we couldn't figure it out, bail.
-      return ICmpInst::BAD_ICMP_PREDICATE;
-    }
-
-    // If the first operand is simple, swap operands.
-    ICmpInst::Predicate SwappedRelation = 
-      evaluateICmpRelation(V2, V1, isSigned);
-    if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
-      return ICmpInst::getSwappedPredicate(SwappedRelation);
-
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
-    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
-      ICmpInst::Predicate SwappedRelation = 
-        evaluateICmpRelation(V2, V1, isSigned);
-      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
-        return ICmpInst::getSwappedPredicate(SwappedRelation);
-      return ICmpInst::BAD_ICMP_PREDICATE;
-    }
-
-    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
-    // constant (which, since the types must match, means that it's a
-    // ConstantPointerNull).
-    if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
-      // Don't try to decide equality of aliases.
-      if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
-        if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
-          return ICmpInst::ICMP_NE;
-    } else if (isa<BlockAddress>(V2)) {
-      return ICmpInst::ICMP_NE; // Globals never equal labels.
-    } else {
-      assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
-      // GlobalVals can never be null unless they have external weak linkage.
-      // We don't try to evaluate aliases here.
-      if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
-        return ICmpInst::ICMP_NE;
-    }
-  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
-    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
-      ICmpInst::Predicate SwappedRelation = 
-        evaluateICmpRelation(V2, V1, isSigned);
-      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
-        return ICmpInst::getSwappedPredicate(SwappedRelation);
-      return ICmpInst::BAD_ICMP_PREDICATE;
-    }
-    
-    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
-    // constant (which, since the types must match, means that it is a
-    // ConstantPointerNull).
-    if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
-      // Block address in another function can't equal this one, but block
-      // addresses in the current function might be the same if blocks are
-      // empty.
-      if (BA2->getFunction() != BA->getFunction())
-        return ICmpInst::ICMP_NE;
-    } else {
-      // Block addresses aren't null, don't equal the address of globals.
-      assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
-             "Canonicalization guarantee!");
-      return ICmpInst::ICMP_NE;
-    }
-  } else {
-    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
-    // constantexpr, a global, block address, or a simple constant.
-    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
-    Constant *CE1Op0 = CE1->getOperand(0);
-
-    switch (CE1->getOpcode()) {
-    case Instruction::Trunc:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-      break; // We can't evaluate floating point casts or truncations.
-
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::BitCast:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-      // If the cast is not actually changing bits, and the second operand is a
-      // null pointer, do the comparison with the pre-casted value.
-      if (V2->isNullValue() &&
-          (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
-        if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
-        if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
-        return evaluateICmpRelation(CE1Op0,
-                                    Constant::getNullValue(CE1Op0->getType()), 
-                                    isSigned);
-      }
-      break;
-
-    case Instruction::GetElementPtr:
-      // Ok, since this is a getelementptr, we know that the constant has a
-      // pointer type.  Check the various cases.
-      if (isa<ConstantPointerNull>(V2)) {
-        // If we are comparing a GEP to a null pointer, check to see if the base
-        // of the GEP equals the null pointer.
-        if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
-          if (GV->hasExternalWeakLinkage())
-            // Weak linkage GVals could be zero or not. We're comparing that
-            // to null pointer so its greater-or-equal
-            return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
-          else 
-            // If its not weak linkage, the GVal must have a non-zero address
-            // so the result is greater-than
-            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-        } else if (isa<ConstantPointerNull>(CE1Op0)) {
-          // If we are indexing from a null pointer, check to see if we have any
-          // non-zero indices.
-          for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
-            if (!CE1->getOperand(i)->isNullValue())
-              // Offsetting from null, must not be equal.
-              return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-          // Only zero indexes from null, must still be zero.
-          return ICmpInst::ICMP_EQ;
-        }
-        // Otherwise, we can't really say if the first operand is null or not.
-      } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
-        if (isa<ConstantPointerNull>(CE1Op0)) {
-          if (GV2->hasExternalWeakLinkage())
-            // Weak linkage GVals could be zero or not. We're comparing it to
-            // a null pointer, so its less-or-equal
-            return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-          else
-            // If its not weak linkage, the GVal must have a non-zero address
-            // so the result is less-than
-            return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-        } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
-          if (GV == GV2) {
-            // If this is a getelementptr of the same global, then it must be
-            // different.  Because the types must match, the getelementptr could
-            // only have at most one index, and because we fold getelementptr's
-            // with a single zero index, it must be nonzero.
-            assert(CE1->getNumOperands() == 2 &&
-                   !CE1->getOperand(1)->isNullValue() &&
-                   "Surprising getelementptr!");
-            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-          } else {
-            // If they are different globals, we don't know what the value is,
-            // but they can't be equal.
-            return ICmpInst::ICMP_NE;
-          }
-        }
-      } else {
-        ConstantExpr *CE2 = cast<ConstantExpr>(V2);
-        Constant *CE2Op0 = CE2->getOperand(0);
-
-        // There are MANY other foldings that we could perform here.  They will
-        // probably be added on demand, as they seem needed.
-        switch (CE2->getOpcode()) {
-        default: break;
-        case Instruction::GetElementPtr:
-          // By far the most common case to handle is when the base pointers are
-          // obviously to the same or different globals.
-          if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
-            if (CE1Op0 != CE2Op0) // Don't know relative ordering, but not equal
-              return ICmpInst::ICMP_NE;
-            // Ok, we know that both getelementptr instructions are based on the
-            // same global.  From this, we can precisely determine the relative
-            // ordering of the resultant pointers.
-            unsigned i = 1;
-
-            // The logic below assumes that the result of the comparison
-            // can be determined by finding the first index that differs.
-            // This doesn't work if there is over-indexing in any
-            // subsequent indices, so check for that case first.
-            if (!CE1->isGEPWithNoNotionalOverIndexing() ||
-                !CE2->isGEPWithNoNotionalOverIndexing())
-               return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
-
-            // Compare all of the operands the GEP's have in common.
-            gep_type_iterator GTI = gep_type_begin(CE1);
-            for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
-                 ++i, ++GTI)
-              switch (IdxCompare(CE1->getOperand(i),
-                                 CE2->getOperand(i), GTI.getIndexedType())) {
-              case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
-              case 1:  return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
-              case -2: return ICmpInst::BAD_ICMP_PREDICATE;
-              }
-
-            // Ok, we ran out of things they have in common.  If any leftovers
-            // are non-zero then we have a difference, otherwise we are equal.
-            for (; i < CE1->getNumOperands(); ++i)
-              if (!CE1->getOperand(i)->isNullValue()) {
-                if (isa<ConstantInt>(CE1->getOperand(i)))
-                  return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-                else
-                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
-              }
-
-            for (; i < CE2->getNumOperands(); ++i)
-              if (!CE2->getOperand(i)->isNullValue()) {
-                if (isa<ConstantInt>(CE2->getOperand(i)))
-                  return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
-                else
-                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
-              }
-            return ICmpInst::ICMP_EQ;
-          }
-        }
-      }
-    default:
-      break;
-    }
-  }
-
-  return ICmpInst::BAD_ICMP_PREDICATE;
-}
-
-Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, 
-                                               Constant *C1, Constant *C2) {
-  Type *ResultTy;
-  if (VectorType *VT = dyn_cast<VectorType>(C1->getType()))
-    ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
-                               VT->getNumElements());
-  else
-    ResultTy = Type::getInt1Ty(C1->getContext());
-
-  // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
-  if (pred == FCmpInst::FCMP_FALSE)
-    return Constant::getNullValue(ResultTy);
-
-  if (pred == FCmpInst::FCMP_TRUE)
-    return Constant::getAllOnesValue(ResultTy);
-
-  // Handle some degenerate cases first
-  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
-    // For EQ and NE, we can always pick a value for the undef to make the
-    // predicate pass or fail, so we can return undef.
-    // Also, if both operands are undef, we can return undef.
-    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
-        (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
-      return UndefValue::get(ResultTy);
-    // Otherwise, pick the same value as the non-undef operand, and fold
-    // it to true or false.
-    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
-  }
-
-  // icmp eq/ne(null,GV) -> false/true
-  if (C1->isNullValue()) {
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
-      // Don't try to evaluate aliases.  External weak GV can be null.
-      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
-        if (pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse(C1->getContext());
-        else if (pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue(C1->getContext());
-      }
-  // icmp eq/ne(GV,null) -> false/true
-  } else if (C2->isNullValue()) {
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
-      // Don't try to evaluate aliases.  External weak GV can be null.
-      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
-        if (pred == ICmpInst::ICMP_EQ)
-          return ConstantInt::getFalse(C1->getContext());
-        else if (pred == ICmpInst::ICMP_NE)
-          return ConstantInt::getTrue(C1->getContext());
-      }
-  }
-
-  // If the comparison is a comparison between two i1's, simplify it.
-  if (C1->getType()->isIntegerTy(1)) {
-    switch(pred) {
-    case ICmpInst::ICMP_EQ:
-      if (isa<ConstantInt>(C2))
-        return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
-      return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
-    case ICmpInst::ICMP_NE:
-      return ConstantExpr::getXor(C1, C2);
-    default:
-      break;
-    }
-  }
-
-  if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
-    APInt V1 = cast<ConstantInt>(C1)->getValue();
-    APInt V2 = cast<ConstantInt>(C2)->getValue();
-    switch (pred) {
-    default: llvm_unreachable("Invalid ICmp Predicate");
-    case ICmpInst::ICMP_EQ:  return ConstantInt::get(ResultTy, V1 == V2);
-    case ICmpInst::ICMP_NE:  return ConstantInt::get(ResultTy, V1 != V2);
-    case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
-    case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
-    case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
-    case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
-    case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
-    case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
-    case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
-    case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
-    }
-  } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
-    APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
-    APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
-    APFloat::cmpResult R = C1V.compare(C2V);
-    switch (pred) {
-    default: llvm_unreachable("Invalid FCmp Predicate");
-    case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
-    case FCmpInst::FCMP_TRUE:  return Constant::getAllOnesValue(ResultTy);
-    case FCmpInst::FCMP_UNO:
-      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
-    case FCmpInst::FCMP_ORD:
-      return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
-    case FCmpInst::FCMP_UEQ:
-      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
-                                        R==APFloat::cmpEqual);
-    case FCmpInst::FCMP_OEQ:   
-      return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
-    case FCmpInst::FCMP_UNE:
-      return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
-    case FCmpInst::FCMP_ONE:   
-      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
-                                        R==APFloat::cmpGreaterThan);
-    case FCmpInst::FCMP_ULT: 
-      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
-                                        R==APFloat::cmpLessThan);
-    case FCmpInst::FCMP_OLT:   
-      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
-    case FCmpInst::FCMP_UGT:
-      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
-                                        R==APFloat::cmpGreaterThan);
-    case FCmpInst::FCMP_OGT:
-      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
-    case FCmpInst::FCMP_ULE:
-      return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
-    case FCmpInst::FCMP_OLE: 
-      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
-                                        R==APFloat::cmpEqual);
-    case FCmpInst::FCMP_UGE:
-      return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
-    case FCmpInst::FCMP_OGE: 
-      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
-                                        R==APFloat::cmpEqual);
-    }
-  } else if (C1->getType()->isVectorTy()) {
-    // If we can constant fold the comparison of each element, constant fold
-    // the whole vector comparison.
-    SmallVector<Constant*, 4> ResElts;
-    Type *Ty = IntegerType::get(C1->getContext(), 32);
-    // Compare the elements, producing an i1 result or constant expr.
-    for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){
-      Constant *C1E =
-        ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
-      Constant *C2E =
-        ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
-      
-      ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
-    }
-    
-    return ConstantVector::get(ResElts);
-  }
-
-  if (C1->getType()->isFloatingPointTy()) {
-    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateFCmpRelation(C1, C2)) {
-    default: llvm_unreachable("Unknown relation!");
-    case FCmpInst::FCMP_UNO:
-    case FCmpInst::FCMP_ORD:
-    case FCmpInst::FCMP_UEQ:
-    case FCmpInst::FCMP_UNE:
-    case FCmpInst::FCMP_ULT:
-    case FCmpInst::FCMP_UGT:
-    case FCmpInst::FCMP_ULE:
-    case FCmpInst::FCMP_UGE:
-    case FCmpInst::FCMP_TRUE:
-    case FCmpInst::FCMP_FALSE:
-    case FCmpInst::BAD_FCMP_PREDICATE:
-      break; // Couldn't determine anything about these constants.
-    case FCmpInst::FCMP_OEQ: // We know that C1 == C2
-      Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
-                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
-                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
-      break;
-    case FCmpInst::FCMP_OLT: // We know that C1 < C2
-      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
-                pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
-                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
-      break;
-    case FCmpInst::FCMP_OGT: // We know that C1 > C2
-      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
-                pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
-                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
-      break;
-    case FCmpInst::FCMP_OLE: // We know that C1 <= C2
-      // We can only partially decide this relation.
-      if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
-        Result = 0;
-      else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
-        Result = 1;
-      break;
-    case FCmpInst::FCMP_OGE: // We known that C1 >= C2
-      // We can only partially decide this relation.
-      if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
-        Result = 0;
-      else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
-        Result = 1;
-      break;
-    case FCmpInst::FCMP_ONE: // We know that C1 != C2
-      // We can only partially decide this relation.
-      if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ) 
-        Result = 0;
-      else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE) 
-        Result = 1;
-      break;
-    }
-
-    // If we evaluated the result, return it now.
-    if (Result != -1)
-      return ConstantInt::get(ResultTy, Result);
-
-  } else {
-    // Evaluate the relation between the two constants, per the predicate.
-    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
-    default: llvm_unreachable("Unknown relational!");
-    case ICmpInst::BAD_ICMP_PREDICATE:
-      break;  // Couldn't determine anything about these constants.
-    case ICmpInst::ICMP_EQ:   // We know the constants are equal!
-      // If we know the constants are equal, we can decide the result of this
-      // computation precisely.
-      Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
-      break;
-    case ICmpInst::ICMP_ULT:
-      switch (pred) {
-      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
-        Result = 1; break;
-      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
-        Result = 0; break;
-      }
-      break;
-    case ICmpInst::ICMP_SLT:
-      switch (pred) {
-      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
-        Result = 1; break;
-      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
-        Result = 0; break;
-      }
-      break;
-    case ICmpInst::ICMP_UGT:
-      switch (pred) {
-      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
-        Result = 1; break;
-      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
-        Result = 0; break;
-      }
-      break;
-    case ICmpInst::ICMP_SGT:
-      switch (pred) {
-      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
-        Result = 1; break;
-      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
-        Result = 0; break;
-      }
-      break;
-    case ICmpInst::ICMP_ULE:
-      if (pred == ICmpInst::ICMP_UGT) Result = 0;
-      if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
-      break;
-    case ICmpInst::ICMP_SLE:
-      if (pred == ICmpInst::ICMP_SGT) Result = 0;
-      if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
-      break;
-    case ICmpInst::ICMP_UGE:
-      if (pred == ICmpInst::ICMP_ULT) Result = 0;
-      if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
-      break;
-    case ICmpInst::ICMP_SGE:
-      if (pred == ICmpInst::ICMP_SLT) Result = 0;
-      if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
-      break;
-    case ICmpInst::ICMP_NE:
-      if (pred == ICmpInst::ICMP_EQ) Result = 0;
-      if (pred == ICmpInst::ICMP_NE) Result = 1;
-      break;
-    }
-
-    // If we evaluated the result, return it now.
-    if (Result != -1)
-      return ConstantInt::get(ResultTy, Result);
-
-    // If the right hand side is a bitcast, try using its inverse to simplify
-    // it by moving it to the left hand side.  We can't do this if it would turn
-    // a vector compare into a scalar compare or visa versa.
-    if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
-      Constant *CE2Op0 = CE2->getOperand(0);
-      if (CE2->getOpcode() == Instruction::BitCast &&
-          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
-        Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
-        return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
-      }
-    }
-
-    // If the left hand side is an extension, try eliminating it.
-    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
-          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
-        Constant *CE1Op0 = CE1->getOperand(0);
-        Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
-        if (CE1Inverse == CE1Op0) {
-          // Check whether we can safely truncate the right hand side.
-          Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
-          if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
-            return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
-          }
-        }
-      }
-    }
-
-    if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
-        (C1->isNullValue() && !C2->isNullValue())) {
-      // If C2 is a constant expr and C1 isn't, flip them around and fold the
-      // other way if possible.
-      // Also, if C1 is null and C2 isn't, flip them around.
-      pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
-      return ConstantExpr::getICmp(pred, C2, C1);
-    }
-  }
-  return 0;
-}
-
-/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
-/// is "inbounds".
-template<typename IndexTy>
-static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
-  // No indices means nothing that could be out of bounds.
-  if (Idxs.empty()) return true;
-
-  // If the first index is zero, it's in bounds.
-  if (cast<Constant>(Idxs[0])->isNullValue()) return true;
-
-  // If the first index is one and all the rest are zero, it's in bounds,
-  // by the one-past-the-end rule.
-  if (!cast<ConstantInt>(Idxs[0])->isOne())
-    return false;
-  for (unsigned i = 1, e = Idxs.size(); i != e; ++i)
-    if (!cast<Constant>(Idxs[i])->isNullValue())
-      return false;
-  return true;
-}
-
-template<typename IndexTy>
-static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
-                                               bool inBounds,
-                                               ArrayRef<IndexTy> Idxs) {
-  if (Idxs.empty()) return C;
-  Constant *Idx0 = cast<Constant>(Idxs[0]);
-  if ((Idxs.size() == 1 && Idx0->isNullValue()))
-    return C;
-
-  if (isa<UndefValue>(C)) {
-    PointerType *Ptr = cast<PointerType>(C->getType());
-    Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
-    assert(Ty != 0 && "Invalid indices for GEP!");
-    return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
-  }
-
-  if (C->isNullValue()) {
-    bool isNull = true;
-    for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
-      if (!cast<Constant>(Idxs[i])->isNullValue()) {
-        isNull = false;
-        break;
-      }
-    if (isNull) {
-      PointerType *Ptr = cast<PointerType>(C->getType());
-      Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
-      assert(Ty != 0 && "Invalid indices for GEP!");
-      return ConstantPointerNull::get(PointerType::get(Ty,
-                                                       Ptr->getAddressSpace()));
-    }
-  }
-
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-    // Combine Indices - If the source pointer to this getelementptr instruction
-    // is a getelementptr instruction, combine the indices of the two
-    // getelementptr instructions into a single instruction.
-    //
-    if (CE->getOpcode() == Instruction::GetElementPtr) {
-      Type *LastTy = 0;
-      for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
-           I != E; ++I)
-        LastTy = *I;
-
-      if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
-        SmallVector<Value*, 16> NewIndices;
-        NewIndices.reserve(Idxs.size() + CE->getNumOperands());
-        for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
-          NewIndices.push_back(CE->getOperand(i));
-
-        // Add the last index of the source with the first index of the new GEP.
-        // Make sure to handle the case when they are actually different types.
-        Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
-        // Otherwise it must be an array.
-        if (!Idx0->isNullValue()) {
-          Type *IdxTy = Combined->getType();
-          if (IdxTy != Idx0->getType()) {
-            Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
-            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
-            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
-            Combined = ConstantExpr::get(Instruction::Add, C1, C2);
-          } else {
-            Combined =
-              ConstantExpr::get(Instruction::Add, Idx0, Combined);
-          }
-        }
-
-        NewIndices.push_back(Combined);
-        NewIndices.append(Idxs.begin() + 1, Idxs.end());
-        return
-          ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
-                                         inBounds &&
-                                           cast<GEPOperator>(CE)->isInBounds());
-      }
-    }
-
-    // Implement folding of:
-    //    i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
-    //                        i64 0, i64 0)
-    // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
-    //
-    if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
-      if (PointerType *SPT =
-          dyn_cast<PointerType>(CE->getOperand(0)->getType()))
-        if (ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
-          if (ArrayType *CAT =
-        dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
-            if (CAT->getElementType() == SAT->getElementType())
-              return
-                ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
-                                               Idxs, inBounds);
-    }
-  }
-
-  // Check to see if any array indices are not within the corresponding
-  // notional array bounds. If so, try to determine if they can be factored
-  // out into preceding dimensions.
-  bool Unknown = false;
-  SmallVector<Constant *, 8> NewIdxs;
-  Type *Ty = C->getType();
-  Type *Prev = 0;
-  for (unsigned i = 0, e = Idxs.size(); i != e;
-       Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
-      if (ArrayType *ATy = dyn_cast<ArrayType>(Ty))
-        if (ATy->getNumElements() <= INT64_MAX &&
-            ATy->getNumElements() != 0 &&
-            CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
-          if (isa<SequentialType>(Prev)) {
-            // It's out of range, but we can factor it into the prior
-            // dimension.
-            NewIdxs.resize(Idxs.size());
-            ConstantInt *Factor = ConstantInt::get(CI->getType(),
-                                                   ATy->getNumElements());
-            NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
-
-            Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
-            Constant *Div = ConstantExpr::getSDiv(CI, Factor);
-
-            // Before adding, extend both operands to i64 to avoid
-            // overflow trouble.
-            if (!PrevIdx->getType()->isIntegerTy(64))
-              PrevIdx = ConstantExpr::getSExt(PrevIdx,
-                                           Type::getInt64Ty(Div->getContext()));
-            if (!Div->getType()->isIntegerTy(64))
-              Div = ConstantExpr::getSExt(Div,
-                                          Type::getInt64Ty(Div->getContext()));
-
-            NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
-          } else {
-            // It's out of range, but the prior dimension is a struct
-            // so we can't do anything about it.
-            Unknown = true;
-          }
-        }
-    } else {
-      // We don't know if it's in range or not.
-      Unknown = true;
-    }
-  }
-
-  // If we did any factoring, start over with the adjusted indices.
-  if (!NewIdxs.empty()) {
-    for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
-      if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
-    return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
-  }
-
-  // If all indices are known integers and normalized, we can do a simple
-  // check for the "inbounds" property.
-  if (!Unknown && !inBounds &&
-      isa<GlobalVariable>(C) && isInBoundsIndices(Idxs))
-    return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
-
-  return 0;
-}
-
-Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
-                                          bool inBounds,
-                                          ArrayRef<Constant *> Idxs) {
-  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
-}
-
-Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
-                                          bool inBounds,
-                                          ArrayRef<Value *> Idxs) {
-  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs);
-}
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
deleted file mode 100644
index edd6a73b0867..000000000000
--- a/lib/VMCore/Constants.cpp
+++ /dev/null
@@ -1,2671 +0,0 @@
-//===-- Constants.cpp - Implement Constant nodes --------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Constant* classes.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Constants.h"
-#include "LLVMContextImpl.h"
-#include "ConstantFold.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-#include <cstdarg>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//                              Constant Class
-//===----------------------------------------------------------------------===//
-
-void Constant::anchor() { }
-
-bool Constant::isNegativeZeroValue() const {
-  // Floating point values have an explicit -0.0 value.
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
-    return CFP->isZero() && CFP->isNegative();
-
-  // Otherwise, just use +0.0.
-  return isNullValue();
-}
-
-bool Constant::isNullValue() const {
-  // 0 is null.
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
-    return CI->isZero();
-
-  // +0.0 is null.
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
-    return CFP->isZero() && !CFP->isNegative();
-
-  // constant zero is zero for aggregates and cpnull is null for pointers.
-  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
-}
-
-bool Constant::isAllOnesValue() const {
-  // Check for -1 integers
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
-    return CI->isMinusOne();
-
-  // Check for FP which are bitcasted from -1 integers
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
-    return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
-
-  // Check for constant vectors which are splats of -1 values.
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
-    if (Constant *Splat = CV->getSplatValue())
-      return Splat->isAllOnesValue();
-
-  // Check for constant vectors which are splats of -1 values.
-  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
-    if (Constant *Splat = CV->getSplatValue())
-      return Splat->isAllOnesValue();
-
-  return false;
-}
-
-// Constructor to create a '0' constant of arbitrary type...
-Constant *Constant::getNullValue(Type *Ty) {
-  switch (Ty->getTypeID()) {
-  case Type::IntegerTyID:
-    return ConstantInt::get(Ty, 0);
-  case Type::HalfTyID:
-    return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEhalf));
-  case Type::FloatTyID:
-    return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEsingle));
-  case Type::DoubleTyID:
-    return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEdouble));
-  case Type::X86_FP80TyID:
-    return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::x87DoubleExtended));
-  case Type::FP128TyID:
-    return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEquad));
-  case Type::PPC_FP128TyID:
-    return ConstantFP::get(Ty->getContext(),
-                           APFloat(APInt::getNullValue(128)));
-  case Type::PointerTyID:
-    return ConstantPointerNull::get(cast<PointerType>(Ty));
-  case Type::StructTyID:
-  case Type::ArrayTyID:
-  case Type::VectorTyID:
-    return ConstantAggregateZero::get(Ty);
-  default:
-    // Function, Label, or Opaque type?
-    llvm_unreachable("Cannot create a null constant of that type!");
-  }
-}
-
-Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
-  Type *ScalarTy = Ty->getScalarType();
-
-  // Create the base integer constant.
-  Constant *C = ConstantInt::get(Ty->getContext(), V);
-
-  // Convert an integer to a pointer, if necessary.
-  if (PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
-    C = ConstantExpr::getIntToPtr(C, PTy);
-
-  // Broadcast a scalar to a vector, if necessary.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    C = ConstantVector::getSplat(VTy->getNumElements(), C);
-
-  return C;
-}
-
-Constant *Constant::getAllOnesValue(Type *Ty) {
-  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
-    return ConstantInt::get(Ty->getContext(),
-                            APInt::getAllOnesValue(ITy->getBitWidth()));
-
-  if (Ty->isFloatingPointTy()) {
-    APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
-                                          !Ty->isPPC_FP128Ty());
-    return ConstantFP::get(Ty->getContext(), FL);
-  }
-
-  VectorType *VTy = cast<VectorType>(Ty);
-  return ConstantVector::getSplat(VTy->getNumElements(),
-                                  getAllOnesValue(VTy->getElementType()));
-}
-
-/// getAggregateElement - For aggregates (struct/array/vector) return the
-/// constant that corresponds to the specified element if possible, or null if
-/// not.  This can return null if the element index is a ConstantExpr, or if
-/// 'this' is a constant expr.
-Constant *Constant::getAggregateElement(unsigned Elt) const {
-  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(this))
-    return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0;
-
-  if (const ConstantArray *CA = dyn_cast<ConstantArray>(this))
-    return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0;
-
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
-    return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0;
-
-  if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
-    return CAZ->getElementValue(Elt);
-
-  if (const UndefValue *UV = dyn_cast<UndefValue>(this))
-    return UV->getElementValue(Elt);
-
-  if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
-    return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0;
-  return 0;
-}
-
-Constant *Constant::getAggregateElement(Constant *Elt) const {
-  assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
-    return getAggregateElement(CI->getZExtValue());
-  return 0;
-}
-
-
-void Constant::destroyConstantImpl() {
-  // When a Constant is destroyed, there may be lingering
-  // references to the constant by other constants in the constant pool.  These
-  // constants are implicitly dependent on the module that is being deleted,
-  // but they don't know that.  Because we only find out when the CPV is
-  // deleted, we must now notify all of our users (that should only be
-  // Constants) that they are, in fact, invalid now and should be deleted.
-  //
-  while (!use_empty()) {
-    Value *V = use_back();
-#ifndef NDEBUG      // Only in -g mode...
-    if (!isa<Constant>(V)) {
-      dbgs() << "While deleting: " << *this
-             << "\n\nUse still stuck around after Def is destroyed: "
-             << *V << "\n\n";
-    }
-#endif
-    assert(isa<Constant>(V) && "References remain to Constant being destroyed");
-    cast<Constant>(V)->destroyConstant();
-
-    // The constant should remove itself from our use list...
-    assert((use_empty() || use_back() != V) && "Constant not removed!");
-  }
-
-  // Value has no outstanding references it is safe to delete it now...
-  delete this;
-}
-
-/// canTrap - Return true if evaluation of this constant could trap.  This is
-/// true for things like constant expressions that could divide by zero.
-bool Constant::canTrap() const {
-  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
-  // The only thing that could possibly trap are constant exprs.
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
-  if (!CE) return false;
-
-  // ConstantExpr traps if any operands can trap.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (CE->getOperand(i)->canTrap())
-      return true;
-
-  // Otherwise, only specific operations can trap.
-  switch (CE->getOpcode()) {
-  default:
-    return false;
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::FDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
-  case Instruction::FRem:
-    // Div and rem can trap if the RHS is not known to be non-zero.
-    if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
-      return true;
-    return false;
-  }
-}
-
-/// isThreadDependent - Return true if the value can vary between threads.
-bool Constant::isThreadDependent() const {
-  SmallPtrSet<const Constant*, 64> Visited;
-  SmallVector<const Constant*, 64> WorkList;
-  WorkList.push_back(this);
-  Visited.insert(this);
-
-  while (!WorkList.empty()) {
-    const Constant *C = WorkList.pop_back_val();
-
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-      if (GV->isThreadLocal())
-        return true;
-    }
-
-    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
-      const Constant *D = dyn_cast<Constant>(C->getOperand(I));
-      if (!D)
-        continue;
-      if (Visited.insert(D))
-        WorkList.push_back(D);
-    }
-  }
-
-  return false;
-}
-
-/// isConstantUsed - Return true if the constant has users other than constant
-/// exprs and other dangling things.
-bool Constant::isConstantUsed() const {
-  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
-    const Constant *UC = dyn_cast<Constant>(*UI);
-    if (UC == 0 || isa<GlobalValue>(UC))
-      return true;
-
-    if (UC->isConstantUsed())
-      return true;
-  }
-  return false;
-}
-
-
-
-/// getRelocationInfo - This method classifies the entry according to
-/// whether or not it may generate a relocation entry.  This must be
-/// conservative, so if it might codegen to a relocatable entry, it should say
-/// so.  The return values are:
-/// 
-///  NoRelocation: This constant pool entry is guaranteed to never have a
-///     relocation applied to it (because it holds a simple constant like
-///     '4').
-///  LocalRelocation: This entry has relocations, but the entries are
-///     guaranteed to be resolvable by the static linker, so the dynamic
-///     linker will never see them.
-///  GlobalRelocations: This entry may have arbitrary relocations.
-///
-/// FIXME: This really should not be in VMCore.
-Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
-    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
-      return LocalRelocation;  // Local to this file/library.
-    return GlobalRelocations;    // Global reference.
-  }
-  
-  if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
-    return BA->getFunction()->getRelocationInfo();
-  
-  // While raw uses of blockaddress need to be relocated, differences between
-  // two of them don't when they are for labels in the same function.  This is a
-  // common idiom when creating a table for the indirect goto extension, so we
-  // handle it efficiently here.
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
-    if (CE->getOpcode() == Instruction::Sub) {
-      ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
-      ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
-      if (LHS && RHS &&
-          LHS->getOpcode() == Instruction::PtrToInt &&
-          RHS->getOpcode() == Instruction::PtrToInt &&
-          isa<BlockAddress>(LHS->getOperand(0)) &&
-          isa<BlockAddress>(RHS->getOperand(0)) &&
-          cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
-            cast<BlockAddress>(RHS->getOperand(0))->getFunction())
-        return NoRelocation;
-    }
-
-  PossibleRelocationsTy Result = NoRelocation;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    Result = std::max(Result,
-                      cast<Constant>(getOperand(i))->getRelocationInfo());
-
-  return Result;
-}
-
-/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
-/// it.  This involves recursively eliminating any dead users of the
-/// constantexpr.
-static bool removeDeadUsersOfConstant(const Constant *C) {
-  if (isa<GlobalValue>(C)) return false; // Cannot remove this
-
-  while (!C->use_empty()) {
-    const Constant *User = dyn_cast<Constant>(C->use_back());
-    if (!User) return false; // Non-constant usage;
-    if (!removeDeadUsersOfConstant(User))
-      return false; // Constant wasn't dead
-  }
-
-  const_cast<Constant*>(C)->destroyConstant();
-  return true;
-}
-
-
-/// removeDeadConstantUsers - If there are any dead constant users dangling
-/// off of this constant, remove them.  This method is useful for clients
-/// that want to check to see if a global is unused, but don't want to deal
-/// with potentially dead constants hanging off of the globals.
-void Constant::removeDeadConstantUsers() const {
-  Value::const_use_iterator I = use_begin(), E = use_end();
-  Value::const_use_iterator LastNonDeadUser = E;
-  while (I != E) {
-    const Constant *User = dyn_cast<Constant>(*I);
-    if (User == 0) {
-      LastNonDeadUser = I;
-      ++I;
-      continue;
-    }
-
-    if (!removeDeadUsersOfConstant(User)) {
-      // If the constant wasn't dead, remember that this was the last live use
-      // and move on to the next constant.
-      LastNonDeadUser = I;
-      ++I;
-      continue;
-    }
-
-    // If the constant was dead, then the iterator is invalidated.
-    if (LastNonDeadUser == E) {
-      I = use_begin();
-      if (I == E) break;
-    } else {
-      I = LastNonDeadUser;
-      ++I;
-    }
-  }
-}
-
-
-
-//===----------------------------------------------------------------------===//
-//                                ConstantInt
-//===----------------------------------------------------------------------===//
-
-void ConstantInt::anchor() { }
-
-ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
-  : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
-  assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
-}
-
-ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-  if (!pImpl->TheTrueVal)
-    pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
-  return pImpl->TheTrueVal;
-}
-
-ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-  if (!pImpl->TheFalseVal)
-    pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
-  return pImpl->TheFalseVal;
-}
-
-Constant *ConstantInt::getTrue(Type *Ty) {
-  VectorType *VTy = dyn_cast<VectorType>(Ty);
-  if (!VTy) {
-    assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
-    return ConstantInt::getTrue(Ty->getContext());
-  }
-  assert(VTy->getElementType()->isIntegerTy(1) &&
-         "True must be vector of i1 or i1.");
-  return ConstantVector::getSplat(VTy->getNumElements(),
-                                  ConstantInt::getTrue(Ty->getContext()));
-}
-
-Constant *ConstantInt::getFalse(Type *Ty) {
-  VectorType *VTy = dyn_cast<VectorType>(Ty);
-  if (!VTy) {
-    assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
-    return ConstantInt::getFalse(Ty->getContext());
-  }
-  assert(VTy->getElementType()->isIntegerTy(1) &&
-         "False must be vector of i1 or i1.");
-  return ConstantVector::getSplat(VTy->getNumElements(),
-                                  ConstantInt::getFalse(Ty->getContext()));
-}
-
-
-// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
-// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
-// operator== and operator!= to ensure that the DenseMap doesn't attempt to
-// compare APInt's of different widths, which would violate an APInt class
-// invariant which generates an assertion.
-ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
-  // Get the corresponding integer type for the bit width of the value.
-  IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
-  // get an existing value or the insertion position
-  DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
-  ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; 
-  if (!Slot) Slot = new ConstantInt(ITy, V);
-  return Slot;
-}
-
-Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
-  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
-
-  // For vectors, broadcast the value.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::getSplat(VTy->getNumElements(), C);
-
-  return C;
-}
-
-ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, 
-                              bool isSigned) {
-  return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
-}
-
-ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) {
-  return get(Ty, V, true);
-}
-
-Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
-  return get(Ty, V, true);
-}
-
-Constant *ConstantInt::get(Type *Ty, const APInt& V) {
-  ConstantInt *C = get(Ty->getContext(), V);
-  assert(C->getType() == Ty->getScalarType() &&
-         "ConstantInt type doesn't match the type implied by its value!");
-
-  // For vectors, broadcast the value.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::getSplat(VTy->getNumElements(), C);
-
-  return C;
-}
-
-ConstantInt *ConstantInt::get(IntegerType* Ty, StringRef Str,
-                              uint8_t radix) {
-  return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
-}
-
-//===----------------------------------------------------------------------===//
-//                                ConstantFP
-//===----------------------------------------------------------------------===//
-
-static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
-  if (Ty->isHalfTy())
-    return &APFloat::IEEEhalf;
-  if (Ty->isFloatTy())
-    return &APFloat::IEEEsingle;
-  if (Ty->isDoubleTy())
-    return &APFloat::IEEEdouble;
-  if (Ty->isX86_FP80Ty())
-    return &APFloat::x87DoubleExtended;
-  else if (Ty->isFP128Ty())
-    return &APFloat::IEEEquad;
-
-  assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
-  return &APFloat::PPCDoubleDouble;
-}
-
-void ConstantFP::anchor() { }
-
-/// get() - This returns a constant fp for the specified value in the
-/// specified type.  This should only be used for simple constant values like
-/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant *ConstantFP::get(Type *Ty, double V) {
-  LLVMContext &Context = Ty->getContext();
-
-  APFloat FV(V);
-  bool ignored;
-  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
-             APFloat::rmNearestTiesToEven, &ignored);
-  Constant *C = get(Context, FV);
-
-  // For vectors, broadcast the value.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::getSplat(VTy->getNumElements(), C);
-
-  return C;
-}
-
-
-Constant *ConstantFP::get(Type *Ty, StringRef Str) {
-  LLVMContext &Context = Ty->getContext();
-
-  APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
-  Constant *C = get(Context, FV);
-
-  // For vectors, broadcast the value.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::getSplat(VTy->getNumElements(), C);
-
-  return C; 
-}
-
-
-ConstantFP *ConstantFP::getNegativeZero(Type *Ty) {
-  LLVMContext &Context = Ty->getContext();
-  APFloat apf = cast<ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
-  apf.changeSign();
-  return get(Context, apf);
-}
-
-
-Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
-  Type *ScalarTy = Ty->getScalarType();
-  if (ScalarTy->isFloatingPointTy()) {
-    Constant *C = getNegativeZero(ScalarTy);
-    if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-      return ConstantVector::getSplat(VTy->getNumElements(), C);
-    return C;
-  }
-
-  return Constant::getNullValue(Ty);
-}
-
-
-// ConstantFP accessors.
-ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
-  DenseMapAPFloatKeyInfo::KeyTy Key(V);
-
-  LLVMContextImpl* pImpl = Context.pImpl;
-
-  ConstantFP *&Slot = pImpl->FPConstants[Key];
-
-  if (!Slot) {
-    Type *Ty;
-    if (&V.getSemantics() == &APFloat::IEEEhalf)
-      Ty = Type::getHalfTy(Context);
-    else if (&V.getSemantics() == &APFloat::IEEEsingle)
-      Ty = Type::getFloatTy(Context);
-    else if (&V.getSemantics() == &APFloat::IEEEdouble)
-      Ty = Type::getDoubleTy(Context);
-    else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
-      Ty = Type::getX86_FP80Ty(Context);
-    else if (&V.getSemantics() == &APFloat::IEEEquad)
-      Ty = Type::getFP128Ty(Context);
-    else {
-      assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
-             "Unknown FP format");
-      Ty = Type::getPPC_FP128Ty(Context);
-    }
-    Slot = new ConstantFP(Ty, V);
-  }
-
-  return Slot;
-}
-
-ConstantFP *ConstantFP::getInfinity(Type *Ty, bool Negative) {
-  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
-  return ConstantFP::get(Ty->getContext(),
-                         APFloat::getInf(Semantics, Negative));
-}
-
-ConstantFP::ConstantFP(Type *Ty, const APFloat& V)
-  : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
-  assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
-         "FP type Mismatch");
-}
-
-bool ConstantFP::isExactlyValue(const APFloat &V) const {
-  return Val.bitwiseIsEqual(V);
-}
-
-//===----------------------------------------------------------------------===//
-//                   ConstantAggregateZero Implementation
-//===----------------------------------------------------------------------===//
-
-/// getSequentialElement - If this CAZ has array or vector type, return a zero
-/// with the right element type.
-Constant *ConstantAggregateZero::getSequentialElement() const {
-  return Constant::getNullValue(getType()->getSequentialElementType());
-}
-
-/// getStructElement - If this CAZ has struct type, return a zero with the
-/// right element type for the specified element.
-Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const {
-  return Constant::getNullValue(getType()->getStructElementType(Elt));
-}
-
-/// getElementValue - Return a zero of the right value for the specified GEP
-/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
-Constant *ConstantAggregateZero::getElementValue(Constant *C) const {
-  if (isa<SequentialType>(getType()))
-    return getSequentialElement();
-  return getStructElement(cast<ConstantInt>(C)->getZExtValue());
-}
-
-/// getElementValue - Return a zero of the right value for the specified GEP
-/// index.
-Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
-  if (isa<SequentialType>(getType()))
-    return getSequentialElement();
-  return getStructElement(Idx);
-}
-
-
-//===----------------------------------------------------------------------===//
-//                         UndefValue Implementation
-//===----------------------------------------------------------------------===//
-
-/// getSequentialElement - If this undef has array or vector type, return an
-/// undef with the right element type.
-UndefValue *UndefValue::getSequentialElement() const {
-  return UndefValue::get(getType()->getSequentialElementType());
-}
-
-/// getStructElement - If this undef has struct type, return a zero with the
-/// right element type for the specified element.
-UndefValue *UndefValue::getStructElement(unsigned Elt) const {
-  return UndefValue::get(getType()->getStructElementType(Elt));
-}
-
-/// getElementValue - Return an undef of the right value for the specified GEP
-/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
-UndefValue *UndefValue::getElementValue(Constant *C) const {
-  if (isa<SequentialType>(getType()))
-    return getSequentialElement();
-  return getStructElement(cast<ConstantInt>(C)->getZExtValue());
-}
-
-/// getElementValue - Return an undef of the right value for the specified GEP
-/// index.
-UndefValue *UndefValue::getElementValue(unsigned Idx) const {
-  if (isa<SequentialType>(getType()))
-    return getSequentialElement();
-  return getStructElement(Idx);
-}
-
-
-
-//===----------------------------------------------------------------------===//
-//                            ConstantXXX Classes
-//===----------------------------------------------------------------------===//
-
-template <typename ItTy, typename EltTy>
-static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
-  for (; Start != End; ++Start)
-    if (*Start != Elt)
-      return false;
-  return true;
-}
-
-ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
-  : Constant(T, ConstantArrayVal,
-             OperandTraits<ConstantArray>::op_end(this) - V.size(),
-             V.size()) {
-  assert(V.size() == T->getNumElements() &&
-         "Invalid initializer vector for constant array");
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    assert(V[i]->getType() == T->getElementType() &&
-           "Initializer for array element doesn't match array element type!");
-  std::copy(V.begin(), V.end(), op_begin());
-}
-
-Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
-  // Empty arrays are canonicalized to ConstantAggregateZero.
-  if (V.empty())
-    return ConstantAggregateZero::get(Ty);
-
-  for (unsigned i = 0, e = V.size(); i != e; ++i) {
-    assert(V[i]->getType() == Ty->getElementType() &&
-           "Wrong type in array element initializer");
-  }
-  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
-
-  // If this is an all-zero array, return a ConstantAggregateZero object.  If
-  // all undef, return an UndefValue, if "all simple", then return a
-  // ConstantDataArray.
-  Constant *C = V[0];
-  if (isa<UndefValue>(C) && rangeOnlyContains(V.begin(), V.end(), C))
-    return UndefValue::get(Ty);
-
-  if (C->isNullValue() && rangeOnlyContains(V.begin(), V.end(), C))
-    return ConstantAggregateZero::get(Ty);
-
-  // Check to see if all of the elements are ConstantFP or ConstantInt and if
-  // the element type is compatible with ConstantDataVector.  If so, use it.
-  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
-    // We speculatively build the elements here even if it turns out that there
-    // is a constantexpr or something else weird in the array, since it is so
-    // uncommon for that to happen.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-      if (CI->getType()->isIntegerTy(8)) {
-        SmallVector<uint8_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(16)) {
-        SmallVector<uint16_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(32)) {
-        SmallVector<uint32_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(64)) {
-        SmallVector<uint64_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      }
-    }
-
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-      if (CFP->getType()->isFloatTy()) {
-        SmallVector<float, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToFloat());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CFP->getType()->isDoubleTy()) {
-        SmallVector<double, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToDouble());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      }
-    }
-  }
-
-  // Otherwise, we really do want to create a ConstantArray.
-  return pImpl->ArrayConstants.getOrCreate(Ty, V);
-}
-
-/// getTypeForElements - Return an anonymous struct type to use for a constant
-/// with the specified set of elements.  The list must not be empty.
-StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
-                                               ArrayRef<Constant*> V,
-                                               bool Packed) {
-  unsigned VecSize = V.size();
-  SmallVector<Type*, 16> EltTypes(VecSize);
-  for (unsigned i = 0; i != VecSize; ++i)
-    EltTypes[i] = V[i]->getType();
-
-  return StructType::get(Context, EltTypes, Packed);
-}
-
-
-StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
-                                               bool Packed) {
-  assert(!V.empty() &&
-         "ConstantStruct::getTypeForElements cannot be called on empty list");
-  return getTypeForElements(V[0]->getContext(), V, Packed);
-}
-
-
-ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V)
-  : Constant(T, ConstantStructVal,
-             OperandTraits<ConstantStruct>::op_end(this) - V.size(),
-             V.size()) {
-  assert(V.size() == T->getNumElements() &&
-         "Invalid initializer vector for constant structure");
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    assert((T->isOpaque() || V[i]->getType() == T->getElementType(i)) &&
-           "Initializer for struct element doesn't match struct element type!");
-  std::copy(V.begin(), V.end(), op_begin());
-}
-
-// ConstantStruct accessors.
-Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
-  assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
-         "Incorrect # elements specified to ConstantStruct::get");
-
-  // Create a ConstantAggregateZero value if all elements are zeros.
-  bool isZero = true;
-  bool isUndef = false;
-  
-  if (!V.empty()) {
-    isUndef = isa<UndefValue>(V[0]);
-    isZero = V[0]->isNullValue();
-    if (isUndef || isZero) {
-      for (unsigned i = 0, e = V.size(); i != e; ++i) {
-        if (!V[i]->isNullValue())
-          isZero = false;
-        if (!isa<UndefValue>(V[i]))
-          isUndef = false;
-      }
-    }
-  }
-  if (isZero)
-    return ConstantAggregateZero::get(ST);
-  if (isUndef)
-    return UndefValue::get(ST);
-
-  return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
-}
-
-Constant *ConstantStruct::get(StructType *T, ...) {
-  va_list ap;
-  SmallVector<Constant*, 8> Values;
-  va_start(ap, T);
-  while (Constant *Val = va_arg(ap, llvm::Constant*))
-    Values.push_back(Val);
-  va_end(ap);
-  return get(T, Values);
-}
-
-ConstantVector::ConstantVector(VectorType *T, ArrayRef<Constant *> V)
-  : Constant(T, ConstantVectorVal,
-             OperandTraits<ConstantVector>::op_end(this) - V.size(),
-             V.size()) {
-  for (size_t i = 0, e = V.size(); i != e; i++)
-    assert(V[i]->getType() == T->getElementType() &&
-           "Initializer for vector element doesn't match vector element type!");
-  std::copy(V.begin(), V.end(), op_begin());
-}
-
-// ConstantVector accessors.
-Constant *ConstantVector::get(ArrayRef<Constant*> V) {
-  assert(!V.empty() && "Vectors can't be empty");
-  VectorType *T = VectorType::get(V.front()->getType(), V.size());
-  LLVMContextImpl *pImpl = T->getContext().pImpl;
-
-  // If this is an all-undef or all-zero vector, return a
-  // ConstantAggregateZero or UndefValue.
-  Constant *C = V[0];
-  bool isZero = C->isNullValue();
-  bool isUndef = isa<UndefValue>(C);
-
-  if (isZero || isUndef) {
-    for (unsigned i = 1, e = V.size(); i != e; ++i)
-      if (V[i] != C) {
-        isZero = isUndef = false;
-        break;
-      }
-  }
-
-  if (isZero)
-    return ConstantAggregateZero::get(T);
-  if (isUndef)
-    return UndefValue::get(T);
-
-  // Check to see if all of the elements are ConstantFP or ConstantInt and if
-  // the element type is compatible with ConstantDataVector.  If so, use it.
-  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
-    // We speculatively build the elements here even if it turns out that there
-    // is a constantexpr or something else weird in the array, since it is so
-    // uncommon for that to happen.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-      if (CI->getType()->isIntegerTy(8)) {
-        SmallVector<uint8_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(16)) {
-        SmallVector<uint16_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(32)) {
-        SmallVector<uint32_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(64)) {
-        SmallVector<uint64_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      }
-    }
-
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-      if (CFP->getType()->isFloatTy()) {
-        SmallVector<float, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToFloat());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CFP->getType()->isDoubleTy()) {
-        SmallVector<double, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(CFP->getValueAPF().convertToDouble());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      }
-    }
-  }
-
-  // Otherwise, the element type isn't compatible with ConstantDataVector, or
-  // the operand list constants a ConstantExpr or something else strange.
-  return pImpl->VectorConstants.getOrCreate(T, V);
-}
-
-Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
-  // If this splat is compatible with ConstantDataVector, use it instead of
-  // ConstantVector.
-  if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
-      ConstantDataSequential::isElementTypeCompatible(V->getType()))
-    return ConstantDataVector::getSplat(NumElts, V);
-
-  SmallVector<Constant*, 32> Elts(NumElts, V);
-  return get(Elts);
-}
-
-
-// Utility function for determining if a ConstantExpr is a CastOp or not. This
-// can't be inline because we don't want to #include Instruction.h into
-// Constant.h
-bool ConstantExpr::isCast() const {
-  return Instruction::isCast(getOpcode());
-}
-
-bool ConstantExpr::isCompare() const {
-  return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
-}
-
-bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
-  if (getOpcode() != Instruction::GetElementPtr) return false;
-
-  gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
-  User::const_op_iterator OI = llvm::next(this->op_begin());
-
-  // Skip the first index, as it has no static limit.
-  ++GEPI;
-  ++OI;
-
-  // The remaining indices must be compile-time known integers within the
-  // bounds of the corresponding notional static array types.
-  for (; GEPI != E; ++GEPI, ++OI) {
-    ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
-    if (!CI) return false;
-    if (ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
-      if (CI->getValue().getActiveBits() > 64 ||
-          CI->getZExtValue() >= ATy->getNumElements())
-        return false;
-  }
-
-  // All the indices checked out.
-  return true;
-}
-
-bool ConstantExpr::hasIndices() const {
-  return getOpcode() == Instruction::ExtractValue ||
-         getOpcode() == Instruction::InsertValue;
-}
-
-ArrayRef<unsigned> ConstantExpr::getIndices() const {
-  if (const ExtractValueConstantExpr *EVCE =
-        dyn_cast<ExtractValueConstantExpr>(this))
-    return EVCE->Indices;
-
-  return cast<InsertValueConstantExpr>(this)->Indices;
-}
-
-unsigned ConstantExpr::getPredicate() const {
-  assert(isCompare());
-  return ((const CompareConstantExpr*)this)->predicate;
-}
-
-/// getWithOperandReplaced - Return a constant expression identical to this
-/// one, but with the specified operand set to the specified value.
-Constant *
-ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
-  assert(Op->getType() == getOperand(OpNo)->getType() &&
-         "Replacing operand with value of different type!");
-  if (getOperand(OpNo) == Op)
-    return const_cast<ConstantExpr*>(this);
-
-  SmallVector<Constant*, 8> NewOps;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    NewOps.push_back(i == OpNo ? Op : getOperand(i));
-
-  return getWithOperands(NewOps);
-}
-
-/// getWithOperands - This returns the current constant expression with the
-/// operands replaced with the specified values.  The specified array must
-/// have the same number of operands as our current one.
-Constant *ConstantExpr::
-getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
-  assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
-  bool AnyChange = Ty != getType();
-  for (unsigned i = 0; i != Ops.size(); ++i)
-    AnyChange |= Ops[i] != getOperand(i);
-
-  if (!AnyChange)  // No operands changed, return self.
-    return const_cast<ConstantExpr*>(this);
-
-  switch (getOpcode()) {
-  case Instruction::Trunc:
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::BitCast:
-    return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
-  case Instruction::Select:
-    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
-  case Instruction::InsertElement:
-    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
-  case Instruction::ExtractElement:
-    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
-  case Instruction::InsertValue:
-    return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices());
-  case Instruction::ExtractValue:
-    return ConstantExpr::getExtractValue(Ops[0], getIndices());
-  case Instruction::ShuffleVector:
-    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
-  case Instruction::GetElementPtr:
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
-                                      cast<GEPOperator>(this)->isInBounds());
-  case Instruction::ICmp:
-  case Instruction::FCmp:
-    return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
-  default:
-    assert(getNumOperands() == 2 && "Must be binary operator?");
-    return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
-  }
-}
-
-
-//===----------------------------------------------------------------------===//
-//                      isValueValidForType implementations
-
-bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
-  unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay
-  if (Ty->isIntegerTy(1))
-    return Val == 0 || Val == 1;
-  if (NumBits >= 64)
-    return true; // always true, has to fit in largest type
-  uint64_t Max = (1ll << NumBits) - 1;
-  return Val <= Max;
-}
-
-bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
-  unsigned NumBits = Ty->getIntegerBitWidth();
-  if (Ty->isIntegerTy(1))
-    return Val == 0 || Val == 1 || Val == -1;
-  if (NumBits >= 64)
-    return true; // always true, has to fit in largest type
-  int64_t Min = -(1ll << (NumBits-1));
-  int64_t Max = (1ll << (NumBits-1)) - 1;
-  return (Val >= Min && Val <= Max);
-}
-
-bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
-  // convert modifies in place, so make a copy.
-  APFloat Val2 = APFloat(Val);
-  bool losesInfo;
-  switch (Ty->getTypeID()) {
-  default:
-    return false;         // These can't be represented as floating point!
-
-  // FIXME rounding mode needs to be more flexible
-  case Type::HalfTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEhalf)
-      return true;
-    Val2.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &losesInfo);
-    return !losesInfo;
-  }
-  case Type::FloatTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEsingle)
-      return true;
-    Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
-    return !losesInfo;
-  }
-  case Type::DoubleTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEhalf ||
-        &Val2.getSemantics() == &APFloat::IEEEsingle ||
-        &Val2.getSemantics() == &APFloat::IEEEdouble)
-      return true;
-    Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
-    return !losesInfo;
-  }
-  case Type::X86_FP80TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
-           &Val2.getSemantics() == &APFloat::IEEEsingle || 
-           &Val2.getSemantics() == &APFloat::IEEEdouble ||
-           &Val2.getSemantics() == &APFloat::x87DoubleExtended;
-  case Type::FP128TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
-           &Val2.getSemantics() == &APFloat::IEEEsingle || 
-           &Val2.getSemantics() == &APFloat::IEEEdouble ||
-           &Val2.getSemantics() == &APFloat::IEEEquad;
-  case Type::PPC_FP128TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
-           &Val2.getSemantics() == &APFloat::IEEEsingle || 
-           &Val2.getSemantics() == &APFloat::IEEEdouble ||
-           &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
-  }
-}
-
-
-//===----------------------------------------------------------------------===//
-//                      Factory Function Implementation
-
-ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
-  assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
-         "Cannot create an aggregate zero of non-aggregate type!");
-  
-  ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
-  if (Entry == 0)
-    Entry = new ConstantAggregateZero(Ty);
-
-  return Entry;
-}
-
-/// destroyConstant - Remove the constant from the constant table.
-///
-void ConstantAggregateZero::destroyConstant() {
-  getContext().pImpl->CAZConstants.erase(getType());
-  destroyConstantImpl();
-}
-
-/// destroyConstant - Remove the constant from the constant table...
-///
-void ConstantArray::destroyConstant() {
-  getType()->getContext().pImpl->ArrayConstants.remove(this);
-  destroyConstantImpl();
-}
-
-
-//---- ConstantStruct::get() implementation...
-//
-
-// destroyConstant - Remove the constant from the constant table...
-//
-void ConstantStruct::destroyConstant() {
-  getType()->getContext().pImpl->StructConstants.remove(this);
-  destroyConstantImpl();
-}
-
-// destroyConstant - Remove the constant from the constant table...
-//
-void ConstantVector::destroyConstant() {
-  getType()->getContext().pImpl->VectorConstants.remove(this);
-  destroyConstantImpl();
-}
-
-/// getSplatValue - If this is a splat constant, where all of the
-/// elements have the same value, return that value. Otherwise return null.
-Constant *ConstantVector::getSplatValue() const {
-  // Check out first element.
-  Constant *Elt = getOperand(0);
-  // Then make sure all remaining elements point to the same value.
-  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
-    if (getOperand(I) != Elt)
-      return 0;
-  return Elt;
-}
-
-//---- ConstantPointerNull::get() implementation.
-//
-
-ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
-  ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
-  if (Entry == 0)
-    Entry = new ConstantPointerNull(Ty);
-
-  return Entry;
-}
-
-// destroyConstant - Remove the constant from the constant table...
-//
-void ConstantPointerNull::destroyConstant() {
-  getContext().pImpl->CPNConstants.erase(getType());
-  // Free the constant and any dangling references to it.
-  destroyConstantImpl();
-}
-
-
-//---- UndefValue::get() implementation.
-//
-
-UndefValue *UndefValue::get(Type *Ty) {
-  UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
-  if (Entry == 0)
-    Entry = new UndefValue(Ty);
-
-  return Entry;
-}
-
-// destroyConstant - Remove the constant from the constant table.
-//
-void UndefValue::destroyConstant() {
-  // Free the constant and any dangling references to it.
-  getContext().pImpl->UVConstants.erase(getType());
-  destroyConstantImpl();
-}
-
-//---- BlockAddress::get() implementation.
-//
-
-BlockAddress *BlockAddress::get(BasicBlock *BB) {
-  assert(BB->getParent() != 0 && "Block must have a parent");
-  return get(BB->getParent(), BB);
-}
-
-BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
-  BlockAddress *&BA =
-    F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
-  if (BA == 0)
-    BA = new BlockAddress(F, BB);
-
-  assert(BA->getFunction() == F && "Basic block moved between functions");
-  return BA;
-}
-
-BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
-: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
-           &Op<0>(), 2) {
-  setOperand(0, F);
-  setOperand(1, BB);
-  BB->AdjustBlockAddressRefCount(1);
-}
-
-
-// destroyConstant - Remove the constant from the constant table.
-//
-void BlockAddress::destroyConstant() {
-  getFunction()->getType()->getContext().pImpl
-    ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
-  getBasicBlock()->AdjustBlockAddressRefCount(-1);
-  destroyConstantImpl();
-}
-
-void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
-  // This could be replacing either the Basic Block or the Function.  In either
-  // case, we have to remove the map entry.
-  Function *NewF = getFunction();
-  BasicBlock *NewBB = getBasicBlock();
-
-  if (U == &Op<0>())
-    NewF = cast<Function>(To);
-  else
-    NewBB = cast<BasicBlock>(To);
-
-  // See if the 'new' entry already exists, if not, just update this in place
-  // and return early.
-  BlockAddress *&NewBA =
-    getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
-  if (NewBA == 0) {
-    getBasicBlock()->AdjustBlockAddressRefCount(-1);
-
-    // Remove the old entry, this can't cause the map to rehash (just a
-    // tombstone will get added).
-    getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
-                                                            getBasicBlock()));
-    NewBA = this;
-    setOperand(0, NewF);
-    setOperand(1, NewBB);
-    getBasicBlock()->AdjustBlockAddressRefCount(1);
-    return;
-  }
-
-  // Otherwise, I do need to replace this with an existing value.
-  assert(NewBA != this && "I didn't contain From!");
-
-  // Everyone using this now uses the replacement.
-  replaceAllUsesWith(NewBA);
-
-  destroyConstant();
-}
-
-//---- ConstantExpr::get() implementations.
-//
-
-/// This is a utility function to handle folding of casts and lookup of the
-/// cast in the ExprConstants map. It is used by the various get* methods below.
-static inline Constant *getFoldedCast(
-  Instruction::CastOps opc, Constant *C, Type *Ty) {
-  assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
-  // Fold a few common cases
-  if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
-    return FC;
-
-  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> argVec(1, C);
-  ExprMapKeyType Key(opc, argVec);
-
-  return pImpl->ExprConstants.getOrCreate(Ty, Key);
-}
-
-Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
-  Instruction::CastOps opc = Instruction::CastOps(oc);
-  assert(Instruction::isCast(opc) && "opcode out of range");
-  assert(C && Ty && "Null arguments to getCast");
-  assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
-
-  switch (opc) {
-  default:
-    llvm_unreachable("Invalid cast opcode");
-  case Instruction::Trunc:    return getTrunc(C, Ty);
-  case Instruction::ZExt:     return getZExt(C, Ty);
-  case Instruction::SExt:     return getSExt(C, Ty);
-  case Instruction::FPTrunc:  return getFPTrunc(C, Ty);
-  case Instruction::FPExt:    return getFPExtend(C, Ty);
-  case Instruction::UIToFP:   return getUIToFP(C, Ty);
-  case Instruction::SIToFP:   return getSIToFP(C, Ty);
-  case Instruction::FPToUI:   return getFPToUI(C, Ty);
-  case Instruction::FPToSI:   return getFPToSI(C, Ty);
-  case Instruction::PtrToInt: return getPtrToInt(C, Ty);
-  case Instruction::IntToPtr: return getIntToPtr(C, Ty);
-  case Instruction::BitCast:  return getBitCast(C, Ty);
-  }
-}
-
-Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
-  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getBitCast(C, Ty);
-  return getZExt(C, Ty);
-}
-
-Constant *ConstantExpr::getSExtOrBitCast(Constant *C, Type *Ty) {
-  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getBitCast(C, Ty);
-  return getSExt(C, Ty);
-}
-
-Constant *ConstantExpr::getTruncOrBitCast(Constant *C, Type *Ty) {
-  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return getBitCast(C, Ty);
-  return getTrunc(C, Ty);
-}
-
-Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) {
-  assert(S->getType()->isPointerTy() && "Invalid cast");
-  assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
-
-  if (Ty->isIntegerTy())
-    return getPtrToInt(S, Ty);
-  return getBitCast(S, Ty);
-}
-
-Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, 
-                                       bool isSigned) {
-  assert(C->getType()->isIntOrIntVectorTy() &&
-         Ty->isIntOrIntVectorTy() && "Invalid cast");
-  unsigned SrcBits = C->getType()->getScalarSizeInBits();
-  unsigned DstBits = Ty->getScalarSizeInBits();
-  Instruction::CastOps opcode =
-    (SrcBits == DstBits ? Instruction::BitCast :
-     (SrcBits > DstBits ? Instruction::Trunc :
-      (isSigned ? Instruction::SExt : Instruction::ZExt)));
-  return getCast(opcode, C, Ty);
-}
-
-Constant *ConstantExpr::getFPCast(Constant *C, Type *Ty) {
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
-         "Invalid cast");
-  unsigned SrcBits = C->getType()->getScalarSizeInBits();
-  unsigned DstBits = Ty->getScalarSizeInBits();
-  if (SrcBits == DstBits)
-    return C; // Avoid a useless cast
-  Instruction::CastOps opcode =
-    (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
-  return getCast(opcode, C, Ty);
-}
-
-Constant *ConstantExpr::getTrunc(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
-  assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
-  assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
-         "SrcTy must be larger than DestTy for Trunc!");
-
-  return getFoldedCast(Instruction::Trunc, C, Ty);
-}
-
-Constant *ConstantExpr::getSExt(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
-  assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
-  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
-         "SrcTy must be smaller than DestTy for SExt!");
-
-  return getFoldedCast(Instruction::SExt, C, Ty);
-}
-
-Constant *ConstantExpr::getZExt(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
-  assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
-  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
-         "SrcTy must be smaller than DestTy for ZExt!");
-
-  return getFoldedCast(Instruction::ZExt, C, Ty);
-}
-
-Constant *ConstantExpr::getFPTrunc(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
-         C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
-         "This is an illegal floating point truncation!");
-  return getFoldedCast(Instruction::FPTrunc, C, Ty);
-}
-
-Constant *ConstantExpr::getFPExtend(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
-         C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
-         "This is an illegal floating point extension!");
-  return getFoldedCast(Instruction::FPExt, C, Ty);
-}
-
-Constant *ConstantExpr::getUIToFP(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
-         "This is an illegal uint to floating point cast!");
-  return getFoldedCast(Instruction::UIToFP, C, Ty);
-}
-
-Constant *ConstantExpr::getSIToFP(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
-         "This is an illegal sint to floating point cast!");
-  return getFoldedCast(Instruction::SIToFP, C, Ty);
-}
-
-Constant *ConstantExpr::getFPToUI(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
-         "This is an illegal floating point to uint cast!");
-  return getFoldedCast(Instruction::FPToUI, C, Ty);
-}
-
-Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
-#ifndef NDEBUG
-  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
-  bool toVec = Ty->getTypeID() == Type::VectorTyID;
-#endif
-  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
-         "This is an illegal floating point to sint cast!");
-  return getFoldedCast(Instruction::FPToSI, C, Ty);
-}
-
-Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
-  assert(C->getType()->getScalarType()->isPointerTy() &&
-         "PtrToInt source must be pointer or pointer vector");
-  assert(DstTy->getScalarType()->isIntegerTy() && 
-         "PtrToInt destination must be integer or integer vector");
-  assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
-  if (isa<VectorType>(C->getType()))
-    assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
-           "Invalid cast between a different number of vector elements");
-  return getFoldedCast(Instruction::PtrToInt, C, DstTy);
-}
-
-Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
-  assert(C->getType()->getScalarType()->isIntegerTy() &&
-         "IntToPtr source must be integer or integer vector");
-  assert(DstTy->getScalarType()->isPointerTy() &&
-         "IntToPtr destination must be a pointer or pointer vector");
-  assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
-  if (isa<VectorType>(C->getType()))
-    assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
-           "Invalid cast between a different number of vector elements");
-  return getFoldedCast(Instruction::IntToPtr, C, DstTy);
-}
-
-Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) {
-  assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
-         "Invalid constantexpr bitcast!");
-
-  // It is common to ask for a bitcast of a value to its own type, handle this
-  // speedily.
-  if (C->getType() == DstTy) return C;
-
-  return getFoldedCast(Instruction::BitCast, C, DstTy);
-}
-
-Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
-                            unsigned Flags) {
-  // Check the operands for consistency first.
-  assert(Opcode >= Instruction::BinaryOpsBegin &&
-         Opcode <  Instruction::BinaryOpsEnd   &&
-         "Invalid opcode in binary constant expression");
-  assert(C1->getType() == C2->getType() &&
-         "Operand types in binary constant expression should match");
-
-#ifndef NDEBUG
-  switch (Opcode) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create an integer operation on a non-integer type!");
-    break;
-  case Instruction::FAdd:
-  case Instruction::FSub:
-  case Instruction::FMul:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVectorTy() &&
-           "Tried to create a floating-point operation on a "
-           "non-floating-point type!");
-    break;
-  case Instruction::UDiv: 
-  case Instruction::SDiv: 
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
-  case Instruction::FDiv:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
-  case Instruction::URem: 
-  case Instruction::SRem: 
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
-  case Instruction::FRem:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVectorTy() &&
-           "Tried to create an arithmetic operation on a non-arithmetic type!");
-    break;
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create a logical operation on a non-integral type!");
-    break;
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-    assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVectorTy() &&
-           "Tried to create a shift operation on a non-integer type!");
-    break;
-  default:
-    break;
-  }
-#endif
-
-  if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
-    return FC;          // Fold a few common cases.
-
-  std::vector<Constant*> argVec(1, C1);
-  argVec.push_back(C2);
-  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
-
-  LLVMContextImpl *pImpl = C1->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
-}
-
-Constant *ConstantExpr::getSizeOf(Type* Ty) {
-  // sizeof is implemented as: (i64) gep (Ty*)null, 1
-  // Note that a non-inbounds gep is used, as null isn't within any object.
-  Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
-  Constant *GEP = getGetElementPtr(
-                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
-  return getPtrToInt(GEP, 
-                     Type::getInt64Ty(Ty->getContext()));
-}
-
-Constant *ConstantExpr::getAlignOf(Type* Ty) {
-  // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
-  // Note that a non-inbounds gep is used, as null isn't within any object.
-  Type *AligningTy = 
-    StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
-  Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
-  Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
-  Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
-  Constant *Indices[2] = { Zero, One };
-  Constant *GEP = getGetElementPtr(NullPtr, Indices);
-  return getPtrToInt(GEP,
-                     Type::getInt64Ty(Ty->getContext()));
-}
-
-Constant *ConstantExpr::getOffsetOf(StructType* STy, unsigned FieldNo) {
-  return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
-                                           FieldNo));
-}
-
-Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
-  // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
-  // Note that a non-inbounds gep is used, as null isn't within any object.
-  Constant *GEPIdx[] = {
-    ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
-    FieldNo
-  };
-  Constant *GEP = getGetElementPtr(
-                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
-  return getPtrToInt(GEP,
-                     Type::getInt64Ty(Ty->getContext()));
-}
-
-Constant *ConstantExpr::getCompare(unsigned short Predicate, 
-                                   Constant *C1, Constant *C2) {
-  assert(C1->getType() == C2->getType() && "Op types should be identical!");
-
-  switch (Predicate) {
-  default: llvm_unreachable("Invalid CmpInst predicate");
-  case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
-  case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
-  case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
-  case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
-  case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
-  case CmpInst::FCMP_TRUE:
-    return getFCmp(Predicate, C1, C2);
-
-  case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
-  case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
-  case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
-  case CmpInst::ICMP_SLE:
-    return getICmp(Predicate, C1, C2);
-  }
-}
-
-Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
-  assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
-
-  if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
-    return SC;        // Fold common cases
-
-  std::vector<Constant*> argVec(3, C);
-  argVec[1] = V1;
-  argVec[2] = V2;
-  ExprMapKeyType Key(Instruction::Select, argVec);
-
-  LLVMContextImpl *pImpl = C->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
-                                         bool InBounds) {
-  if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
-    return FC;          // Fold a few common cases.
-
-  // Get the result type of the getelementptr!
-  Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
-  assert(Ty && "GEP indices invalid!");
-  unsigned AS = C->getType()->getPointerAddressSpace();
-  Type *ReqTy = Ty->getPointerTo(AS);
-
-  assert(C->getType()->isPointerTy() &&
-         "Non-pointer type for constant GetElementPtr expression");
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec;
-  ArgVec.reserve(1 + Idxs.size());
-  ArgVec.push_back(C);
-  for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
-    ArgVec.push_back(cast<Constant>(Idxs[i]));
-  const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
-                           InBounds ? GEPOperator::IsInBounds : 0);
-
-  LLVMContextImpl *pImpl = C->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
-Constant *
-ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
-  assert(LHS->getType() == RHS->getType());
-  assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
-         pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
-
-  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
-    return FC;          // Fold a few common cases...
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec;
-  ArgVec.push_back(LHS);
-  ArgVec.push_back(RHS);
-  // Get the key type with both the opcode and predicate
-  const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
-
-  Type *ResultTy = Type::getInt1Ty(LHS->getContext());
-  if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
-    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
-
-  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
-}
-
-Constant *
-ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
-  assert(LHS->getType() == RHS->getType());
-  assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
-
-  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
-    return FC;          // Fold a few common cases...
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec;
-  ArgVec.push_back(LHS);
-  ArgVec.push_back(RHS);
-  // Get the key type with both the opcode and predicate
-  const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
-
-  Type *ResultTy = Type::getInt1Ty(LHS->getContext());
-  if (VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
-    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
-
-  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
-}
-
-Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
-  assert(Val->getType()->isVectorTy() &&
-         "Tried to create extractelement operation on non-vector type!");
-  assert(Idx->getType()->isIntegerTy(32) &&
-         "Extractelement index must be i32 type!");
-
-  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
-    return FC;          // Fold a few common cases.
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, Val);
-  ArgVec.push_back(Idx);
-  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
-
-  LLVMContextImpl *pImpl = Val->getContext().pImpl;
-  Type *ReqTy = Val->getType()->getVectorElementType();
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
-Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, 
-                                         Constant *Idx) {
-  assert(Val->getType()->isVectorTy() &&
-         "Tried to create insertelement operation on non-vector type!");
-  assert(Elt->getType() == Val->getType()->getVectorElementType() &&
-         "Insertelement types must match!");
-  assert(Idx->getType()->isIntegerTy(32) &&
-         "Insertelement index must be i32 type!");
-
-  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
-    return FC;          // Fold a few common cases.
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, Val);
-  ArgVec.push_back(Elt);
-  ArgVec.push_back(Idx);
-  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
-
-  LLVMContextImpl *pImpl = Val->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
-}
-
-Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
-                                         Constant *Mask) {
-  assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
-         "Invalid shuffle vector constant expr operands!");
-
-  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
-    return FC;          // Fold a few common cases.
-
-  unsigned NElts = Mask->getType()->getVectorNumElements();
-  Type *EltTy = V1->getType()->getVectorElementType();
-  Type *ShufTy = VectorType::get(EltTy, NElts);
-
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, V1);
-  ArgVec.push_back(V2);
-  ArgVec.push_back(Mask);
-  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
-
-  LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
-}
-
-Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
-                                       ArrayRef<unsigned> Idxs) {
-  assert(ExtractValueInst::getIndexedType(Agg->getType(),
-                                          Idxs) == Val->getType() &&
-         "insertvalue indices invalid!");
-  assert(Agg->getType()->isFirstClassType() &&
-         "Non-first-class type for constant insertvalue expression");
-  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
-  assert(FC && "insertvalue constant expr couldn't be folded!");
-  return FC;
-}
-
-Constant *ConstantExpr::getExtractValue(Constant *Agg,
-                                        ArrayRef<unsigned> Idxs) {
-  assert(Agg->getType()->isFirstClassType() &&
-         "Tried to create extractelement operation on non-first-class type!");
-
-  Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
-  (void)ReqTy;
-  assert(ReqTy && "extractvalue indices invalid!");
-
-  assert(Agg->getType()->isFirstClassType() &&
-         "Non-first-class type for constant extractvalue expression");
-  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
-  assert(FC && "ExtractValue constant expr couldn't be folded!");
-  return FC;
-}
-
-Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
-  assert(C->getType()->isIntOrIntVectorTy() &&
-         "Cannot NEG a nonintegral value!");
-  return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
-                C, HasNUW, HasNSW);
-}
-
-Constant *ConstantExpr::getFNeg(Constant *C) {
-  assert(C->getType()->isFPOrFPVectorTy() &&
-         "Cannot FNEG a non-floating-point value!");
-  return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
-}
-
-Constant *ConstantExpr::getNot(Constant *C) {
-  assert(C->getType()->isIntOrIntVectorTy() &&
-         "Cannot NOT a nonintegral value!");
-  return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
-}
-
-Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
-                               bool HasNUW, bool HasNSW) {
-  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
-                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
-  return get(Instruction::Add, C1, C2, Flags);
-}
-
-Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
-  return get(Instruction::FAdd, C1, C2);
-}
-
-Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
-                               bool HasNUW, bool HasNSW) {
-  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
-                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
-  return get(Instruction::Sub, C1, C2, Flags);
-}
-
-Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
-  return get(Instruction::FSub, C1, C2);
-}
-
-Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
-                               bool HasNUW, bool HasNSW) {
-  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
-                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
-  return get(Instruction::Mul, C1, C2, Flags);
-}
-
-Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
-  return get(Instruction::FMul, C1, C2);
-}
-
-Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
-  return get(Instruction::UDiv, C1, C2,
-             isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
-  return get(Instruction::SDiv, C1, C2,
-             isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
-  return get(Instruction::FDiv, C1, C2);
-}
-
-Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
-  return get(Instruction::URem, C1, C2);
-}
-
-Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
-  return get(Instruction::SRem, C1, C2);
-}
-
-Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
-  return get(Instruction::FRem, C1, C2);
-}
-
-Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
-  return get(Instruction::And, C1, C2);
-}
-
-Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
-  return get(Instruction::Or, C1, C2);
-}
-
-Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
-  return get(Instruction::Xor, C1, C2);
-}
-
-Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
-                               bool HasNUW, bool HasNSW) {
-  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
-                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
-  return get(Instruction::Shl, C1, C2, Flags);
-}
-
-Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
-  return get(Instruction::LShr, C1, C2,
-             isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
-  return get(Instruction::AShr, C1, C2,
-             isExact ? PossiblyExactOperator::IsExact : 0);
-}
-
-/// getBinOpIdentity - Return the identity for the given binary operation,
-/// i.e. a constant C such that X op C = X and C op X = X for every X.  It
-/// returns null if the operator doesn't have an identity.
-Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) {
-  switch (Opcode) {
-  default:
-    // Doesn't have an identity.
-    return 0;
-
-  case Instruction::Add:
-  case Instruction::Or:
-  case Instruction::Xor:
-    return Constant::getNullValue(Ty);
-
-  case Instruction::Mul:
-    return ConstantInt::get(Ty, 1);
-
-  case Instruction::And:
-    return Constant::getAllOnesValue(Ty);
-  }
-}
-
-/// getBinOpAbsorber - Return the absorbing element for the given binary
-/// operation, i.e. a constant C such that X op C = C and C op X = C for
-/// every X.  For example, this returns zero for integer multiplication.
-/// It returns null if the operator doesn't have an absorbing element.
-Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
-  switch (Opcode) {
-  default:
-    // Doesn't have an absorber.
-    return 0;
-
-  case Instruction::Or:
-    return Constant::getAllOnesValue(Ty);
-
-  case Instruction::And:
-  case Instruction::Mul:
-    return Constant::getNullValue(Ty);
-  }
-}
-
-// destroyConstant - Remove the constant from the constant table...
-//
-void ConstantExpr::destroyConstant() {
-  getType()->getContext().pImpl->ExprConstants.remove(this);
-  destroyConstantImpl();
-}
-
-const char *ConstantExpr::getOpcodeName() const {
-  return Instruction::getOpcodeName(getOpcode());
-}
-
-
-
-GetElementPtrConstantExpr::
-GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
-                          Type *DestTy)
-  : ConstantExpr(DestTy, Instruction::GetElementPtr,
-                 OperandTraits<GetElementPtrConstantExpr>::op_end(this)
-                 - (IdxList.size()+1), IdxList.size()+1) {
-  OperandList[0] = C;
-  for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
-    OperandList[i+1] = IdxList[i];
-}
-
-//===----------------------------------------------------------------------===//
-//                       ConstantData* implementations
-
-void ConstantDataArray::anchor() {}
-void ConstantDataVector::anchor() {}
-
-/// getElementType - Return the element type of the array/vector.
-Type *ConstantDataSequential::getElementType() const {
-  return getType()->getElementType();
-}
-
-StringRef ConstantDataSequential::getRawDataValues() const {
-  return StringRef(DataElements, getNumElements()*getElementByteSize());
-}
-
-/// isElementTypeCompatible - Return true if a ConstantDataSequential can be
-/// formed with a vector or array of the specified element type.
-/// ConstantDataArray only works with normal float and int types that are
-/// stored densely in memory, not with things like i42 or x86_f80.
-bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
-  if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
-  if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
-    switch (IT->getBitWidth()) {
-    case 8:
-    case 16:
-    case 32:
-    case 64:
-      return true;
-    default: break;
-    }
-  }
-  return false;
-}
-
-/// getNumElements - Return the number of elements in the array or vector.
-unsigned ConstantDataSequential::getNumElements() const {
-  if (ArrayType *AT = dyn_cast<ArrayType>(getType()))
-    return AT->getNumElements();
-  return getType()->getVectorNumElements();
-}
-
-
-/// getElementByteSize - Return the size in bytes of the elements in the data.
-uint64_t ConstantDataSequential::getElementByteSize() const {
-  return getElementType()->getPrimitiveSizeInBits()/8;
-}
-
-/// getElementPointer - Return the start of the specified element.
-const char *ConstantDataSequential::getElementPointer(unsigned Elt) const {
-  assert(Elt < getNumElements() && "Invalid Elt");
-  return DataElements+Elt*getElementByteSize();
-}
-
-
-/// isAllZeros - return true if the array is empty or all zeros.
-static bool isAllZeros(StringRef Arr) {
-  for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I)
-    if (*I != 0)
-      return false;
-  return true;
-}
-
-/// getImpl - This is the underlying implementation of all of the
-/// ConstantDataSequential::get methods.  They all thunk down to here, providing
-/// the correct element type.  We take the bytes in as a StringRef because
-/// we *want* an underlying "char*" to avoid TBAA type punning violations.
-Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
-  assert(isElementTypeCompatible(Ty->getSequentialElementType()));
-  // If the elements are all zero or there are no elements, return a CAZ, which
-  // is more dense and canonical.
-  if (isAllZeros(Elements))
-    return ConstantAggregateZero::get(Ty);
-
-  // Do a lookup to see if we have already formed one of these.
-  StringMap<ConstantDataSequential*>::MapEntryTy &Slot =
-    Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements);
-
-  // The bucket can point to a linked list of different CDS's that have the same
-  // body but different types.  For example, 0,0,0,1 could be a 4 element array
-  // of i8, or a 1-element array of i32.  They'll both end up in the same
-  /// StringMap bucket, linked up by their Next pointers.  Walk the list.
-  ConstantDataSequential **Entry = &Slot.getValue();
-  for (ConstantDataSequential *Node = *Entry; Node != 0;
-       Entry = &Node->Next, Node = *Entry)
-    if (Node->getType() == Ty)
-      return Node;
-
-  // Okay, we didn't get a hit.  Create a node of the right class, link it in,
-  // and return it.
-  if (isa<ArrayType>(Ty))
-    return *Entry = new ConstantDataArray(Ty, Slot.getKeyData());
-
-  assert(isa<VectorType>(Ty));
-  return *Entry = new ConstantDataVector(Ty, Slot.getKeyData());
-}
-
-void ConstantDataSequential::destroyConstant() {
-  // Remove the constant from the StringMap.
-  StringMap<ConstantDataSequential*> &CDSConstants = 
-    getType()->getContext().pImpl->CDSConstants;
-
-  StringMap<ConstantDataSequential*>::iterator Slot =
-    CDSConstants.find(getRawDataValues());
-
-  assert(Slot != CDSConstants.end() && "CDS not found in uniquing table");
-
-  ConstantDataSequential **Entry = &Slot->getValue();
-
-  // Remove the entry from the hash table.
-  if ((*Entry)->Next == 0) {
-    // If there is only one value in the bucket (common case) it must be this
-    // entry, and removing the entry should remove the bucket completely.
-    assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
-    getContext().pImpl->CDSConstants.erase(Slot);
-  } else {
-    // Otherwise, there are multiple entries linked off the bucket, unlink the 
-    // node we care about but keep the bucket around.
-    for (ConstantDataSequential *Node = *Entry; ;
-         Entry = &Node->Next, Node = *Entry) {
-      assert(Node && "Didn't find entry in its uniquing hash table!");
-      // If we found our entry, unlink it from the list and we're done.
-      if (Node == this) {
-        *Entry = Node->Next;
-        break;
-      }
-    }
-  }
-
-  // If we were part of a list, make sure that we don't delete the list that is
-  // still owned by the uniquing map.
-  Next = 0;
-
-  // Finally, actually delete it.
-  destroyConstantImpl();
-}
-
-/// get() constructors - Return a constant with array type with an element
-/// count and element type matching the ArrayRef passed in.  Note that this
-/// can return a ConstantAggregateZero object.
-Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) {
-  Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
-}
-Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
-  Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
-}
-Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
-  Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
-}
-Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
-  Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
-}
-Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
-  Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
-}
-Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
-  Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
-}
-
-/// getString - This method constructs a CDS and initializes it with a text
-/// string. The default behavior (AddNull==true) causes a null terminator to
-/// be placed at the end of the array (increasing the length of the string by
-/// one more than the StringRef would normally indicate.  Pass AddNull=false
-/// to disable this behavior.
-Constant *ConstantDataArray::getString(LLVMContext &Context,
-                                       StringRef Str, bool AddNull) {
-  if (!AddNull) {
-    const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
-    return get(Context, ArrayRef<uint8_t>(const_cast<uint8_t *>(Data),
-               Str.size()));
-  }
-
-  SmallVector<uint8_t, 64> ElementVals;
-  ElementVals.append(Str.begin(), Str.end());
-  ElementVals.push_back(0);
-  return get(Context, ElementVals);
-}
-
-/// get() constructors - Return a constant with vector type with an element
-/// count and element type matching the ArrayRef passed in.  Note that this
-/// can return a ConstantAggregateZero object.
-Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){
-  Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty);
-}
-Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
-  Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty);
-}
-Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
-  Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
-}
-Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
-  Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
-}
-Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
-  Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty);
-}
-Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
-  Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
-  const char *Data = reinterpret_cast<const char *>(Elts.data());
-  return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty);
-}
-
-Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
-  assert(isElementTypeCompatible(V->getType()) &&
-         "Element type not compatible with ConstantData");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    if (CI->getType()->isIntegerTy(8)) {
-      SmallVector<uint8_t, 16> Elts(NumElts, CI->getZExtValue());
-      return get(V->getContext(), Elts);
-    }
-    if (CI->getType()->isIntegerTy(16)) {
-      SmallVector<uint16_t, 16> Elts(NumElts, CI->getZExtValue());
-      return get(V->getContext(), Elts);
-    }
-    if (CI->getType()->isIntegerTy(32)) {
-      SmallVector<uint32_t, 16> Elts(NumElts, CI->getZExtValue());
-      return get(V->getContext(), Elts);
-    }
-    assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type");
-    SmallVector<uint64_t, 16> Elts(NumElts, CI->getZExtValue());
-    return get(V->getContext(), Elts);
-  }
-
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType()->isFloatTy()) {
-      SmallVector<float, 16> Elts(NumElts, CFP->getValueAPF().convertToFloat());
-      return get(V->getContext(), Elts);
-    }
-    if (CFP->getType()->isDoubleTy()) {
-      SmallVector<double, 16> Elts(NumElts,
-                                   CFP->getValueAPF().convertToDouble());
-      return get(V->getContext(), Elts);
-    }
-  }
-  return ConstantVector::getSplat(NumElts, V);
-}
-
-
-/// getElementAsInteger - If this is a sequential container of integers (of
-/// any size), return the specified element in the low bits of a uint64_t.
-uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
-  assert(isa<IntegerType>(getElementType()) &&
-         "Accessor can only be used when element is an integer");
-  const char *EltPtr = getElementPointer(Elt);
-
-  // The data is stored in host byte order, make sure to cast back to the right
-  // type to load with the right endianness.
-  switch (getElementType()->getIntegerBitWidth()) {
-  default: llvm_unreachable("Invalid bitwidth for CDS");
-  case 8:
-    return *const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(EltPtr));
-  case 16:
-    return *const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(EltPtr));
-  case 32:
-    return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(EltPtr));
-  case 64:
-    return *const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(EltPtr));
-  }
-}
-
-/// getElementAsAPFloat - If this is a sequential container of floating point
-/// type, return the specified element as an APFloat.
-APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
-  const char *EltPtr = getElementPointer(Elt);
-
-  switch (getElementType()->getTypeID()) {
-  default:
-    llvm_unreachable("Accessor can only be used when element is float/double!");
-  case Type::FloatTyID: {
-      const float *FloatPrt = reinterpret_cast<const float *>(EltPtr);
-      return APFloat(*const_cast<float *>(FloatPrt));
-    }
-  case Type::DoubleTyID: {
-      const double *DoublePtr = reinterpret_cast<const double *>(EltPtr);
-      return APFloat(*const_cast<double *>(DoublePtr));
-    }
-  }
-}
-
-/// getElementAsFloat - If this is an sequential container of floats, return
-/// the specified element as a float.
-float ConstantDataSequential::getElementAsFloat(unsigned Elt) const {
-  assert(getElementType()->isFloatTy() &&
-         "Accessor can only be used when element is a 'float'");
-  const float *EltPtr = reinterpret_cast<const float *>(getElementPointer(Elt));
-  return *const_cast<float *>(EltPtr);
-}
-
-/// getElementAsDouble - If this is an sequential container of doubles, return
-/// the specified element as a float.
-double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
-  assert(getElementType()->isDoubleTy() &&
-         "Accessor can only be used when element is a 'float'");
-  const double *EltPtr =
-      reinterpret_cast<const double *>(getElementPointer(Elt));
-  return *const_cast<double *>(EltPtr);
-}
-
-/// getElementAsConstant - Return a Constant for a specified index's element.
-/// Note that this has to compute a new constant to return, so it isn't as
-/// efficient as getElementAsInteger/Float/Double.
-Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
-  if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
-    return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
-
-  return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
-}
-
-/// isString - This method returns true if this is an array of i8.
-bool ConstantDataSequential::isString() const {
-  return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(8);
-}
-
-/// isCString - This method returns true if the array "isString", ends with a
-/// nul byte, and does not contains any other nul bytes.
-bool ConstantDataSequential::isCString() const {
-  if (!isString())
-    return false;
-
-  StringRef Str = getAsString();
-
-  // The last value must be nul.
-  if (Str.back() != 0) return false;
-
-  // Other elements must be non-nul.
-  return Str.drop_back().find(0) == StringRef::npos;
-}
-
-/// getSplatValue - If this is a splat constant, meaning that all of the
-/// elements have the same value, return that value. Otherwise return NULL.
-Constant *ConstantDataVector::getSplatValue() const {
-  const char *Base = getRawDataValues().data();
-
-  // Compare elements 1+ to the 0'th element.
-  unsigned EltSize = getElementByteSize();
-  for (unsigned i = 1, e = getNumElements(); i != e; ++i)
-    if (memcmp(Base, Base+i*EltSize, EltSize))
-      return 0;
-
-  // If they're all the same, return the 0th one as a representative.
-  return getElementAsConstant(0);
-}
-
-//===----------------------------------------------------------------------===//
-//                replaceUsesOfWithOnConstant implementations
-
-/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
-/// 'From' to be uses of 'To'.  This must update the uniquing data structures
-/// etc.
-///
-/// Note that we intentionally replace all uses of From with To here.  Consider
-/// a large array that uses 'From' 1000 times.  By handling this case all here,
-/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
-/// single invocation handles all 1000 uses.  Handling them one at a time would
-/// work, but would be really slow because it would have to unique each updated
-/// array instance.
-///
-void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                Use *U) {
-  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
-  Constant *ToC = cast<Constant>(To);
-
-  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
-
-  SmallVector<Constant*, 8> Values;
-  LLVMContextImpl::ArrayConstantsTy::LookupKey Lookup;
-  Lookup.first = cast<ArrayType>(getType());
-  Values.reserve(getNumOperands());  // Build replacement array.
-
-  // Fill values with the modified operands of the constant array.  Also,
-  // compute whether this turns into an all-zeros array.
-  unsigned NumUpdated = 0;
-
-  // Keep track of whether all the values in the array are "ToC".
-  bool AllSame = true;
-  for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
-    Constant *Val = cast<Constant>(O->get());
-    if (Val == From) {
-      Val = ToC;
-      ++NumUpdated;
-    }
-    Values.push_back(Val);
-    AllSame &= Val == ToC;
-  }
-
-  Constant *Replacement = 0;
-  if (AllSame && ToC->isNullValue()) {
-    Replacement = ConstantAggregateZero::get(getType());
-  } else if (AllSame && isa<UndefValue>(ToC)) {
-    Replacement = UndefValue::get(getType());
-  } else {
-    // Check to see if we have this array type already.
-    Lookup.second = makeArrayRef(Values);
-    LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
-      pImpl->ArrayConstants.find(Lookup);
-
-    if (I != pImpl->ArrayConstants.map_end()) {
-      Replacement = I->first;
-    } else {
-      // Okay, the new shape doesn't exist in the system yet.  Instead of
-      // creating a new constant array, inserting it, replaceallusesof'ing the
-      // old with the new, then deleting the old... just update the current one
-      // in place!
-      pImpl->ArrayConstants.remove(this);
-
-      // Update to the new value.  Optimize for the case when we have a single
-      // operand that we're changing, but handle bulk updates efficiently.
-      if (NumUpdated == 1) {
-        unsigned OperandToUpdate = U - OperandList;
-        assert(getOperand(OperandToUpdate) == From &&
-               "ReplaceAllUsesWith broken!");
-        setOperand(OperandToUpdate, ToC);
-      } else {
-        for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-          if (getOperand(i) == From)
-            setOperand(i, ToC);
-      }
-      pImpl->ArrayConstants.insert(this);
-      return;
-    }
-  }
-
-  // Otherwise, I do need to replace this with an existing value.
-  assert(Replacement != this && "I didn't contain From!");
-
-  // Everyone using this now uses the replacement.
-  replaceAllUsesWith(Replacement);
-
-  // Delete the old constant!
-  destroyConstant();
-}
-
-void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                 Use *U) {
-  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
-  Constant *ToC = cast<Constant>(To);
-
-  unsigned OperandToUpdate = U-OperandList;
-  assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
-
-  SmallVector<Constant*, 8> Values;
-  LLVMContextImpl::StructConstantsTy::LookupKey Lookup;
-  Lookup.first = cast<StructType>(getType());
-  Values.reserve(getNumOperands());  // Build replacement struct.
-
-  // Fill values with the modified operands of the constant struct.  Also,
-  // compute whether this turns into an all-zeros struct.
-  bool isAllZeros = false;
-  bool isAllUndef = false;
-  if (ToC->isNullValue()) {
-    isAllZeros = true;
-    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
-      Constant *Val = cast<Constant>(O->get());
-      Values.push_back(Val);
-      if (isAllZeros) isAllZeros = Val->isNullValue();
-    }
-  } else if (isa<UndefValue>(ToC)) {
-    isAllUndef = true;
-    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
-      Constant *Val = cast<Constant>(O->get());
-      Values.push_back(Val);
-      if (isAllUndef) isAllUndef = isa<UndefValue>(Val);
-    }
-  } else {
-    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
-      Values.push_back(cast<Constant>(O->get()));
-  }
-  Values[OperandToUpdate] = ToC;
-
-  LLVMContextImpl *pImpl = getContext().pImpl;
-
-  Constant *Replacement = 0;
-  if (isAllZeros) {
-    Replacement = ConstantAggregateZero::get(getType());
-  } else if (isAllUndef) {
-    Replacement = UndefValue::get(getType());
-  } else {
-    // Check to see if we have this struct type already.
-    Lookup.second = makeArrayRef(Values);
-    LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
-      pImpl->StructConstants.find(Lookup);
-
-    if (I != pImpl->StructConstants.map_end()) {
-      Replacement = I->first;
-    } else {
-      // Okay, the new shape doesn't exist in the system yet.  Instead of
-      // creating a new constant struct, inserting it, replaceallusesof'ing the
-      // old with the new, then deleting the old... just update the current one
-      // in place!
-      pImpl->StructConstants.remove(this);
-
-      // Update to the new value.
-      setOperand(OperandToUpdate, ToC);
-      pImpl->StructConstants.insert(this);
-      return;
-    }
-  }
-
-  assert(Replacement != this && "I didn't contain From!");
-
-  // Everyone using this now uses the replacement.
-  replaceAllUsesWith(Replacement);
-
-  // Delete the old constant!
-  destroyConstant();
-}
-
-void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                 Use *U) {
-  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
-
-  SmallVector<Constant*, 8> Values;
-  Values.reserve(getNumOperands());  // Build replacement array...
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    Constant *Val = getOperand(i);
-    if (Val == From) Val = cast<Constant>(To);
-    Values.push_back(Val);
-  }
-
-  Constant *Replacement = get(Values);
-  assert(Replacement != this && "I didn't contain From!");
-
-  // Everyone using this now uses the replacement.
-  replaceAllUsesWith(Replacement);
-
-  // Delete the old constant!
-  destroyConstant();
-}
-
-void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
-                                               Use *U) {
-  assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
-  Constant *To = cast<Constant>(ToV);
-
-  SmallVector<Constant*, 8> NewOps;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    Constant *Op = getOperand(i);
-    NewOps.push_back(Op == From ? To : Op);
-  }
-
-  Constant *Replacement = getWithOperands(NewOps);
-  assert(Replacement != this && "I didn't contain From!");
-
-  // Everyone using this now uses the replacement.
-  replaceAllUsesWith(Replacement);
-
-  // Delete the old constant!
-  destroyConstant();
-}
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
deleted file mode 100644
index 996eb12d69ea..000000000000
--- a/lib/VMCore/ConstantsContext.h
+++ /dev/null
@@ -1,774 +0,0 @@
-//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines various helper methods and classes used by
-// LLVMContextImpl for creating and managing constants.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CONSTANTSCONTEXT_H
-#define LLVM_CONSTANTSCONTEXT_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Operator.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <map>
-
-namespace llvm {
-template<class ValType>
-struct ConstantTraits;
-
-/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement unary constant exprs.
-class UnaryConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-  UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty)
-    : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
-    Op<0>() = C;
-  }
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement binary constant exprs.
-class BinaryConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
-                     unsigned Flags)
-    : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    SubclassOptionalData = Flags;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// SelectConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement select constant exprs.
-class SelectConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-    : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    Op<2>() = C3;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// ExtractElementConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// extractelement constant exprs.
-class ExtractElementConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  ExtractElementConstantExpr(Constant *C1, Constant *C2)
-    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
-                   Instruction::ExtractElement, &Op<0>(), 2) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// InsertElementConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// insertelement constant exprs.
-class InsertElementConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
-                   &Op<0>(), 3) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    Op<2>() = C3;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// ShuffleVectorConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// shufflevector constant exprs.
-class ShuffleVectorConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
-  ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
-  : ConstantExpr(VectorType::get(
-                   cast<VectorType>(C1->getType())->getElementType(),
-                   cast<VectorType>(C3->getType())->getNumElements()),
-                 Instruction::ShuffleVector, 
-                 &Op<0>(), 3) {
-    Op<0>() = C1;
-    Op<1>() = C2;
-    Op<2>() = C3;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// ExtractValueConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// extractvalue constant exprs.
-class ExtractValueConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
-  ExtractValueConstantExpr(Constant *Agg,
-                           const SmallVector<unsigned, 4> &IdxList,
-                           Type *DestTy)
-    : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
-      Indices(IdxList) {
-    Op<0>() = Agg;
-  }
-
-  /// Indices - These identify which value to extract.
-  const SmallVector<unsigned, 4> Indices;
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-/// InsertValueConstantExpr - This class is private to
-/// Constants.cpp, and is used behind the scenes to implement
-/// insertvalue constant exprs.
-class InsertValueConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  InsertValueConstantExpr(Constant *Agg, Constant *Val,
-                          const SmallVector<unsigned, 4> &IdxList,
-                          Type *DestTy)
-    : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
-      Indices(IdxList) {
-    Op<0>() = Agg;
-    Op<1>() = Val;
-  }
-
-  /// Indices - These identify the position for the insertion.
-  const SmallVector<unsigned, 4> Indices;
-
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-
-/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
-/// used behind the scenes to implement getelementpr constant exprs.
-class GetElementPtrConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
-                            Type *DestTy);
-public:
-  static GetElementPtrConstantExpr *Create(Constant *C,
-                                           ArrayRef<Constant*> IdxList,
-                                           Type *DestTy,
-                                           unsigned Flags) {
-    GetElementPtrConstantExpr *Result =
-      new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
-    Result->SubclassOptionalData = Flags;
-    return Result;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-// CompareConstantExpr - This class is private to Constants.cpp, and is used
-// behind the scenes to implement ICmp and FCmp constant expressions. This is
-// needed in order to store the predicate value for these instructions.
-class CompareConstantExpr : public ConstantExpr {
-  virtual void anchor();
-  void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
-public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
-  unsigned short predicate;
-  CompareConstantExpr(Type *ty, Instruction::OtherOps opc,
-                      unsigned short pred,  Constant* LHS, Constant* RHS)
-    : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
-    Op<0>() = LHS;
-    Op<1>() = RHS;
-  }
-  /// Transparently provide more efficient getOperand methods.
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-
-template <>
-struct OperandTraits<UnaryConstantExpr> :
-  public FixedNumOperandTraits<UnaryConstantExpr, 1> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
-
-template <>
-struct OperandTraits<BinaryConstantExpr> :
-  public FixedNumOperandTraits<BinaryConstantExpr, 2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
-
-template <>
-struct OperandTraits<SelectConstantExpr> :
-  public FixedNumOperandTraits<SelectConstantExpr, 3> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
-
-template <>
-struct OperandTraits<ExtractElementConstantExpr> :
-  public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
-
-template <>
-struct OperandTraits<InsertElementConstantExpr> :
-  public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
-
-template <>
-struct OperandTraits<ShuffleVectorConstantExpr> :
-    public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
-
-template <>
-struct OperandTraits<ExtractValueConstantExpr> :
-  public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
-
-template <>
-struct OperandTraits<InsertValueConstantExpr> :
-  public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
-
-template <>
-struct OperandTraits<GetElementPtrConstantExpr> :
-  public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
-
-
-template <>
-struct OperandTraits<CompareConstantExpr> :
-  public FixedNumOperandTraits<CompareConstantExpr, 2> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
-
-struct ExprMapKeyType {
-  ExprMapKeyType(unsigned opc,
-      ArrayRef<Constant*> ops,
-      unsigned short flags = 0,
-      unsigned short optionalflags = 0,
-      ArrayRef<unsigned> inds = ArrayRef<unsigned>())
-        : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
-        operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
-  uint8_t opcode;
-  uint8_t subclassoptionaldata;
-  uint16_t subclassdata;
-  std::vector<Constant*> operands;
-  SmallVector<unsigned, 4> indices;
-  bool operator==(const ExprMapKeyType& that) const {
-    return this->opcode == that.opcode &&
-           this->subclassdata == that.subclassdata &&
-           this->subclassoptionaldata == that.subclassoptionaldata &&
-           this->operands == that.operands &&
-           this->indices == that.indices;
-  }
-  bool operator<(const ExprMapKeyType & that) const {
-    if (this->opcode != that.opcode) return this->opcode < that.opcode;
-    if (this->operands != that.operands) return this->operands < that.operands;
-    if (this->subclassdata != that.subclassdata)
-      return this->subclassdata < that.subclassdata;
-    if (this->subclassoptionaldata != that.subclassoptionaldata)
-      return this->subclassoptionaldata < that.subclassoptionaldata;
-    if (this->indices != that.indices) return this->indices < that.indices;
-    return false;
-  }
-
-  bool operator!=(const ExprMapKeyType& that) const {
-    return !(*this == that);
-  }
-};
-
-struct InlineAsmKeyType {
-  InlineAsmKeyType(StringRef AsmString,
-                   StringRef Constraints, bool hasSideEffects,
-                   bool isAlignStack, InlineAsm::AsmDialect asmDialect)
-    : asm_string(AsmString), constraints(Constraints),
-      has_side_effects(hasSideEffects), is_align_stack(isAlignStack),
-      asm_dialect(asmDialect) {}
-  std::string asm_string;
-  std::string constraints;
-  bool has_side_effects;
-  bool is_align_stack;
-  InlineAsm::AsmDialect asm_dialect;
-  bool operator==(const InlineAsmKeyType& that) const {
-    return this->asm_string == that.asm_string &&
-           this->constraints == that.constraints &&
-           this->has_side_effects == that.has_side_effects &&
-           this->is_align_stack == that.is_align_stack &&
-           this->asm_dialect == that.asm_dialect;
-  }
-  bool operator<(const InlineAsmKeyType& that) const {
-    if (this->asm_string != that.asm_string)
-      return this->asm_string < that.asm_string;
-    if (this->constraints != that.constraints)
-      return this->constraints < that.constraints;
-    if (this->has_side_effects != that.has_side_effects)
-      return this->has_side_effects < that.has_side_effects;
-    if (this->is_align_stack != that.is_align_stack)
-      return this->is_align_stack < that.is_align_stack;
-    if (this->asm_dialect != that.asm_dialect)
-      return this->asm_dialect < that.asm_dialect;
-    return false;
-  }
-
-  bool operator!=(const InlineAsmKeyType& that) const {
-    return !(*this == that);
-  }
-};
-
-// The number of operands for each ConstantCreator::create method is
-// determined by the ConstantTraits template.
-// ConstantCreator - A class that is used to create constants by
-// ConstantUniqueMap*.  This class should be partially specialized if there is
-// something strange that needs to be done to interface to the ctor for the
-// constant.
-//
-template<typename T, typename Alloc>
-struct ConstantTraits< std::vector<T, Alloc> > {
-  static unsigned uses(const std::vector<T, Alloc>& v) {
-    return v.size();
-  }
-};
-
-template<>
-struct ConstantTraits<Constant *> {
-  static unsigned uses(Constant * const & v) {
-    return 1;
-  }
-};
-
-template<class ConstantClass, class TypeClass, class ValType>
-struct ConstantCreator {
-  static ConstantClass *create(TypeClass *Ty, const ValType &V) {
-    return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
-  }
-};
-
-template<class ConstantClass, class TypeClass>
-struct ConstantArrayCreator {
-  static ConstantClass *create(TypeClass *Ty, ArrayRef<Constant*> V) {
-    return new(V.size()) ConstantClass(Ty, V);
-  }
-};
-
-template<class ConstantClass>
-struct ConstantKeyData {
-  typedef void ValType;
-  static ValType getValType(ConstantClass *C) {
-    llvm_unreachable("Unknown Constant type!");
-  }
-};
-
-template<>
-struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
-  static ConstantExpr *create(Type *Ty, const ExprMapKeyType &V,
-      unsigned short pred = 0) {
-    if (Instruction::isCast(V.opcode))
-      return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
-    if ((V.opcode >= Instruction::BinaryOpsBegin &&
-         V.opcode < Instruction::BinaryOpsEnd))
-      return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
-                                    V.subclassoptionaldata);
-    if (V.opcode == Instruction::Select)
-      return new SelectConstantExpr(V.operands[0], V.operands[1], 
-                                    V.operands[2]);
-    if (V.opcode == Instruction::ExtractElement)
-      return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
-    if (V.opcode == Instruction::InsertElement)
-      return new InsertElementConstantExpr(V.operands[0], V.operands[1],
-                                           V.operands[2]);
-    if (V.opcode == Instruction::ShuffleVector)
-      return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
-                                           V.operands[2]);
-    if (V.opcode == Instruction::InsertValue)
-      return new InsertValueConstantExpr(V.operands[0], V.operands[1],
-                                         V.indices, Ty);
-    if (V.opcode == Instruction::ExtractValue)
-      return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
-    if (V.opcode == Instruction::GetElementPtr) {
-      std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
-      return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
-                                               V.subclassoptionaldata);
-    }
-
-    // The compare instructions are weird. We have to encode the predicate
-    // value and it is combined with the instruction opcode by multiplying
-    // the opcode by one hundred. We must decode this to get the predicate.
-    if (V.opcode == Instruction::ICmp)
-      return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
-                                     V.operands[0], V.operands[1]);
-    if (V.opcode == Instruction::FCmp) 
-      return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
-                                     V.operands[0], V.operands[1]);
-    llvm_unreachable("Invalid ConstantExpr!");
-  }
-};
-
-template<>
-struct ConstantKeyData<ConstantExpr> {
-  typedef ExprMapKeyType ValType;
-  static ValType getValType(ConstantExpr *CE) {
-    std::vector<Constant*> Operands;
-    Operands.reserve(CE->getNumOperands());
-    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
-      Operands.push_back(cast<Constant>(CE->getOperand(i)));
-    return ExprMapKeyType(CE->getOpcode(), Operands,
-        CE->isCompare() ? CE->getPredicate() : 0,
-        CE->getRawSubclassOptionalData(),
-        CE->hasIndices() ?
-          CE->getIndices() : ArrayRef<unsigned>());
-  }
-};
-
-template<>
-struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
-  static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
-    return new InlineAsm(Ty, Key.asm_string, Key.constraints,
-                         Key.has_side_effects, Key.is_align_stack,
-                         Key.asm_dialect);
-  }
-};
-
-template<>
-struct ConstantKeyData<InlineAsm> {
-  typedef InlineAsmKeyType ValType;
-  static ValType getValType(InlineAsm *Asm) {
-    return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
-                            Asm->hasSideEffects(), Asm->isAlignStack(),
-                            Asm->getDialect());
-  }
-};
-
-template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
-         bool HasLargeKey = false /*true for arrays and structs*/ >
-class ConstantUniqueMap {
-public:
-  typedef std::pair<TypeClass*, ValType> MapKey;
-  typedef std::map<MapKey, ConstantClass *> MapTy;
-  typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
-private:
-  /// Map - This is the main map from the element descriptor to the Constants.
-  /// This is the primary way we avoid creating two of the same shape
-  /// constant.
-  MapTy Map;
-    
-  /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
-  /// from the constants to their element in Map.  This is important for
-  /// removal of constants from the array, which would otherwise have to scan
-  /// through the map with very large keys.
-  InverseMapTy InverseMap;
-
-public:
-  typename MapTy::iterator map_begin() { return Map.begin(); }
-  typename MapTy::iterator map_end() { return Map.end(); }
-
-  void freeConstants() {
-    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
-         I != E; ++I) {
-      // Asserts that use_empty().
-      delete I->second;
-    }
-  }
-    
-  /// InsertOrGetItem - Return an iterator for the specified element.
-  /// If the element exists in the map, the returned iterator points to the
-  /// entry and Exists=true.  If not, the iterator points to the newly
-  /// inserted entry and returns Exists=false.  Newly inserted entries have
-  /// I->second == 0, and should be filled in.
-  typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
-                                 &InsertVal,
-                                 bool &Exists) {
-    std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
-    Exists = !IP.second;
-    return IP.first;
-  }
-    
-private:
-  typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
-    if (HasLargeKey) {
-      typename InverseMapTy::iterator IMI = InverseMap.find(CP);
-      assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
-             IMI->second->second == CP &&
-             "InverseMap corrupt!");
-      return IMI->second;
-    }
-      
-    typename MapTy::iterator I =
-      Map.find(MapKey(static_cast<TypeClass*>(CP->getType()),
-                      ConstantKeyData<ConstantClass>::getValType(CP)));
-    if (I == Map.end() || I->second != CP) {
-      // FIXME: This should not use a linear scan.  If this gets to be a
-      // performance problem, someone should look at this.
-      for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
-        /* empty */;
-    }
-    return I;
-  }
-
-  ConstantClass *Create(TypeClass *Ty, ValRefType V,
-                        typename MapTy::iterator I) {
-    ConstantClass* Result =
-      ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
-
-    assert(Result->getType() == Ty && "Type specified is not correct!");
-    I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
-
-    if (HasLargeKey)  // Remember the reverse mapping if needed.
-      InverseMap.insert(std::make_pair(Result, I));
-
-    return Result;
-  }
-public:
-    
-  /// getOrCreate - Return the specified constant from the map, creating it if
-  /// necessary.
-  ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) {
-    MapKey Lookup(Ty, V);
-    ConstantClass* Result = 0;
-    
-    typename MapTy::iterator I = Map.find(Lookup);
-    // Is it in the map?  
-    if (I != Map.end())
-      Result = I->second;
-        
-    if (!Result) {
-      // If no preexisting value, create one now...
-      Result = Create(Ty, V, I);
-    }
-        
-    return Result;
-  }
-
-  void remove(ConstantClass *CP) {
-    typename MapTy::iterator I = FindExistingElement(CP);
-    assert(I != Map.end() && "Constant not found in constant table!");
-    assert(I->second == CP && "Didn't find correct element?");
-
-    if (HasLargeKey)  // Remember the reverse mapping if needed.
-      InverseMap.erase(CP);
-
-    Map.erase(I);
-  }
-
-  /// MoveConstantToNewSlot - If we are about to change C to be the element
-  /// specified by I, update our internal data structures to reflect this
-  /// fact.
-  void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
-    // First, remove the old location of the specified constant in the map.
-    typename MapTy::iterator OldI = FindExistingElement(C);
-    assert(OldI != Map.end() && "Constant not found in constant table!");
-    assert(OldI->second == C && "Didn't find correct element?");
-      
-     // Remove the old entry from the map.
-    Map.erase(OldI);
-    
-    // Update the inverse map so that we know that this constant is now
-    // located at descriptor I.
-    if (HasLargeKey) {
-      assert(I->second == C && "Bad inversemap entry!");
-      InverseMap[C] = I;
-    }
-  }
-
-  void dump() const {
-    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
-  }
-};
-
-// Unique map for aggregate constants
-template<class TypeClass, class ConstantClass>
-class ConstantAggrUniqueMap {
-public:
-  typedef ArrayRef<Constant*> Operands;
-  typedef std::pair<TypeClass*, Operands> LookupKey;
-private:
-  struct MapInfo {
-    typedef DenseMapInfo<ConstantClass*> ConstantClassInfo;
-    typedef DenseMapInfo<Constant*> ConstantInfo;
-    typedef DenseMapInfo<TypeClass*> TypeClassInfo;
-    static inline ConstantClass* getEmptyKey() {
-      return ConstantClassInfo::getEmptyKey();
-    }
-    static inline ConstantClass* getTombstoneKey() {
-      return ConstantClassInfo::getTombstoneKey();
-    }
-    static unsigned getHashValue(const ConstantClass *CP) {
-      SmallVector<Constant*, 8> CPOperands;
-      CPOperands.reserve(CP->getNumOperands());
-      for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
-        CPOperands.push_back(CP->getOperand(I));
-      return getHashValue(LookupKey(CP->getType(), CPOperands));
-    }
-    static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
-      return LHS == RHS;
-    }
-    static unsigned getHashValue(const LookupKey &Val) {
-      return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
-                                                        Val.second.end()));
-    }
-    static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
-      if (RHS == getEmptyKey() || RHS == getTombstoneKey())
-        return false;
-      if (LHS.first != RHS->getType()
-          || LHS.second.size() != RHS->getNumOperands())
-        return false;
-      for (unsigned I = 0, E = RHS->getNumOperands(); I < E; ++I) {
-        if (LHS.second[I] != RHS->getOperand(I))
-          return false;
-      }
-      return true;
-    }
-  };
-public:
-  typedef DenseMap<ConstantClass *, char, MapInfo> MapTy;
-
-private:
-  /// Map - This is the main map from the element descriptor to the Constants.
-  /// This is the primary way we avoid creating two of the same shape
-  /// constant.
-  MapTy Map;
-
-public:
-  typename MapTy::iterator map_begin() { return Map.begin(); }
-  typename MapTy::iterator map_end() { return Map.end(); }
-
-  void freeConstants() {
-    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
-         I != E; ++I) {
-      // Asserts that use_empty().
-      delete I->first;
-    }
-  }
-
-private:
-  typename MapTy::iterator findExistingElement(ConstantClass *CP) {
-    return Map.find(CP);
-  }
-
-  ConstantClass *Create(TypeClass *Ty, Operands V, typename MapTy::iterator I) {
-    ConstantClass* Result =
-      ConstantArrayCreator<ConstantClass,TypeClass>::create(Ty, V);
-
-    assert(Result->getType() == Ty && "Type specified is not correct!");
-    Map[Result] = '\0';
-
-    return Result;
-  }
-public:
-
-  /// getOrCreate - Return the specified constant from the map, creating it if
-  /// necessary.
-  ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
-    LookupKey Lookup(Ty, V);
-    ConstantClass* Result = 0;
-
-    typename MapTy::iterator I = Map.find_as(Lookup);
-    // Is it in the map?
-    if (I != Map.end())
-      Result = I->first;
-
-    if (!Result) {
-      // If no preexisting value, create one now...
-      Result = Create(Ty, V, I);
-    }
-
-    return Result;
-  }
-
-  /// Find the constant by lookup key.
-  typename MapTy::iterator find(LookupKey Lookup) {
-    return Map.find_as(Lookup);
-  }
-
-  /// Insert the constant into its proper slot.
-  void insert(ConstantClass *CP) {
-    Map[CP] = '\0';
-  }
-
-  /// Remove this constant from the map
-  void remove(ConstantClass *CP) {
-    typename MapTy::iterator I = findExistingElement(CP);
-    assert(I != Map.end() && "Constant not found in constant table!");
-    assert(I->first == CP && "Didn't find correct element?");
-    Map.erase(I);
-  }
-
-  void dump() const {
-    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
-  }
-};
-
-}
-
-#endif
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
deleted file mode 100644
index 847bc134ddb7..000000000000
--- a/lib/VMCore/Core.cpp
+++ /dev/null
@@ -1,2410 +0,0 @@
-//===-- Core.cpp ----------------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the common infrastructure (including the C bindings)
-// for libLLVMCore.a, which implements the LLVM intermediate representation.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm-c/Core.h"
-#include "llvm/Attributes.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <cassert>
-#include <cstdlib>
-#include <cstring>
-
-using namespace llvm;
-
-void llvm::initializeCore(PassRegistry &Registry) {
-  initializeDominatorTreePass(Registry);
-  initializePrintModulePassPass(Registry);
-  initializePrintFunctionPassPass(Registry);
-  initializeVerifierPass(Registry);
-  initializePreVerifierPass(Registry);
-}
-
-void LLVMInitializeCore(LLVMPassRegistryRef R) {
-  initializeCore(*unwrap(R));
-}
-
-/*===-- Error handling ----------------------------------------------------===*/
-
-void LLVMDisposeMessage(char *Message) {
-  free(Message);
-}
-
-
-/*===-- Operations on contexts --------------------------------------------===*/
-
-LLVMContextRef LLVMContextCreate() {
-  return wrap(new LLVMContext());
-}
-
-LLVMContextRef LLVMGetGlobalContext() {
-  return wrap(&getGlobalContext());
-}
-
-void LLVMContextDispose(LLVMContextRef C) {
-  delete unwrap(C);
-}
-
-unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
-                                  unsigned SLen) {
-  return unwrap(C)->getMDKindID(StringRef(Name, SLen));
-}
-
-unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
-  return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
-}
-
-
-/*===-- Operations on modules ---------------------------------------------===*/
-
-LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
-  return wrap(new Module(ModuleID, getGlobalContext()));
-}
-
-LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, 
-                                                LLVMContextRef C) {
-  return wrap(new Module(ModuleID, *unwrap(C)));
-}
-
-void LLVMDisposeModule(LLVMModuleRef M) {
-  delete unwrap(M);
-}
-
-/*--.. Data layout .........................................................--*/
-const char * LLVMGetDataLayout(LLVMModuleRef M) {
-  return unwrap(M)->getDataLayout().c_str();
-}
-
-void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
-  unwrap(M)->setDataLayout(Triple);
-}
-
-/*--.. Target triple .......................................................--*/
-const char * LLVMGetTarget(LLVMModuleRef M) {
-  return unwrap(M)->getTargetTriple().c_str();
-}
-
-void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
-  unwrap(M)->setTargetTriple(Triple);
-}
-
-void LLVMDumpModule(LLVMModuleRef M) {
-  unwrap(M)->dump();
-}
-
-LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
-                               char **ErrorMessage) {
-  std::string error;
-  raw_fd_ostream dest(Filename, error);
-  if (!error.empty()) {
-    *ErrorMessage = strdup(error.c_str());
-    return true;
-  }
-
-  unwrap(M)->print(dest, NULL);
-
-  if (!error.empty()) {
-    *ErrorMessage = strdup(error.c_str());
-    return true;
-  }
-  dest.flush();
-  return false;
-}
-
-/*--.. Operations on inline assembler ......................................--*/
-void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
-  unwrap(M)->setModuleInlineAsm(StringRef(Asm));
-}
-
-
-/*--.. Operations on module contexts ......................................--*/
-LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
-  return wrap(&unwrap(M)->getContext());
-}
-
-
-/*===-- Operations on types -----------------------------------------------===*/
-
-/*--.. Operations on all types (mostly) ....................................--*/
-
-LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
-  switch (unwrap(Ty)->getTypeID()) {
-  default: llvm_unreachable("Unhandled TypeID.");
-  case Type::VoidTyID:
-    return LLVMVoidTypeKind;
-  case Type::HalfTyID:
-    return LLVMHalfTypeKind;
-  case Type::FloatTyID:
-    return LLVMFloatTypeKind;
-  case Type::DoubleTyID:
-    return LLVMDoubleTypeKind;
-  case Type::X86_FP80TyID:
-    return LLVMX86_FP80TypeKind;
-  case Type::FP128TyID:
-    return LLVMFP128TypeKind;
-  case Type::PPC_FP128TyID:
-    return LLVMPPC_FP128TypeKind;
-  case Type::LabelTyID:
-    return LLVMLabelTypeKind;
-  case Type::MetadataTyID:
-    return LLVMMetadataTypeKind;
-  case Type::IntegerTyID:
-    return LLVMIntegerTypeKind;
-  case Type::FunctionTyID:
-    return LLVMFunctionTypeKind;
-  case Type::StructTyID:
-    return LLVMStructTypeKind;
-  case Type::ArrayTyID:
-    return LLVMArrayTypeKind;
-  case Type::PointerTyID:
-    return LLVMPointerTypeKind;
-  case Type::VectorTyID:
-    return LLVMVectorTypeKind;
-  case Type::X86_MMXTyID:
-    return LLVMX86_MMXTypeKind;
-  }
-}
-
-LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty)
-{
-    return unwrap(Ty)->isSized();
-}
-
-LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
-  return wrap(&unwrap(Ty)->getContext());
-}
-
-/*--.. Operations on integer types .........................................--*/
-
-LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C)  {
-  return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C)  {
-  return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
-  return wrap(IntegerType::get(*unwrap(C), NumBits));
-}
-
-LLVMTypeRef LLVMInt1Type(void)  {
-  return LLVMInt1TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMInt8Type(void)  {
-  return LLVMInt8TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMInt16Type(void) {
-  return LLVMInt16TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMInt32Type(void) {
-  return LLVMInt32TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMInt64Type(void) {
-  return LLVMInt64TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMIntType(unsigned NumBits) {
-  return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
-}
-
-unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
-  return unwrap<IntegerType>(IntegerTy)->getBitWidth();
-}
-
-/*--.. Operations on real types ............................................--*/
-
-LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getHalfTy(*unwrap(C));
-}
-LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
-}
-LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
-}
-LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
-}
-LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
-  return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
-}
-
-LLVMTypeRef LLVMHalfType(void) {
-  return LLVMHalfTypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMFloatType(void) {
-  return LLVMFloatTypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMDoubleType(void) {
-  return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMX86FP80Type(void) {
-  return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMFP128Type(void) {
-  return LLVMFP128TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMPPCFP128Type(void) {
-  return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMX86MMXType(void) {
-  return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
-}
-
-/*--.. Operations on function types ........................................--*/
-
-LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
-                             LLVMTypeRef *ParamTypes, unsigned ParamCount,
-                             LLVMBool IsVarArg) {
-  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
-  return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
-}
-
-LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
-  return unwrap<FunctionType>(FunctionTy)->isVarArg();
-}
-
-LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
-  return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
-}
-
-unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
-  return unwrap<FunctionType>(FunctionTy)->getNumParams();
-}
-
-void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
-  FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
-  for (FunctionType::param_iterator I = Ty->param_begin(),
-                                    E = Ty->param_end(); I != E; ++I)
-    *Dest++ = wrap(*I);
-}
-
-/*--.. Operations on struct types ..........................................--*/
-
-LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
-                           unsigned ElementCount, LLVMBool Packed) {
-  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
-  return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
-}
-
-LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
-                           unsigned ElementCount, LLVMBool Packed) {
-  return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
-                                 ElementCount, Packed);
-}
-
-LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
-{
-  return wrap(StructType::create(*unwrap(C), Name));
-}
-
-const char *LLVMGetStructName(LLVMTypeRef Ty)
-{
-  StructType *Type = unwrap<StructType>(Ty);
-  if (!Type->hasName())
-    return 0;
-  return Type->getName().data();
-}
-
-void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
-                       unsigned ElementCount, LLVMBool Packed) {
-  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
-  unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
-}
-
-unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
-  return unwrap<StructType>(StructTy)->getNumElements();
-}
-
-void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
-  StructType *Ty = unwrap<StructType>(StructTy);
-  for (StructType::element_iterator I = Ty->element_begin(),
-                                    E = Ty->element_end(); I != E; ++I)
-    *Dest++ = wrap(*I);
-}
-
-LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
-  return unwrap<StructType>(StructTy)->isPacked();
-}
-
-LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
-  return unwrap<StructType>(StructTy)->isOpaque();
-}
-
-LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
-  return wrap(unwrap(M)->getTypeByName(Name));
-}
-
-/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
-
-LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
-  return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
-}
-
-LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
-  return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
-}
-
-LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
-  return wrap(VectorType::get(unwrap(ElementType), ElementCount));
-}
-
-LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
-  return wrap(unwrap<SequentialType>(Ty)->getElementType());
-}
-
-unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
-  return unwrap<ArrayType>(ArrayTy)->getNumElements();
-}
-
-unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
-  return unwrap<PointerType>(PointerTy)->getAddressSpace();
-}
-
-unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
-  return unwrap<VectorType>(VectorTy)->getNumElements();
-}
-
-/*--.. Operations on other types ...........................................--*/
-
-LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
-  return wrap(Type::getVoidTy(*unwrap(C)));
-}
-LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
-  return wrap(Type::getLabelTy(*unwrap(C)));
-}
-
-LLVMTypeRef LLVMVoidType(void)  {
-  return LLVMVoidTypeInContext(LLVMGetGlobalContext());
-}
-LLVMTypeRef LLVMLabelType(void) {
-  return LLVMLabelTypeInContext(LLVMGetGlobalContext());
-}
-
-/*===-- Operations on values ----------------------------------------------===*/
-
-/*--.. Operations on all values ............................................--*/
-
-LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
-  return wrap(unwrap(Val)->getType());
-}
-
-const char *LLVMGetValueName(LLVMValueRef Val) {
-  return unwrap(Val)->getName().data();
-}
-
-void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
-  unwrap(Val)->setName(Name);
-}
-
-void LLVMDumpValue(LLVMValueRef Val) {
-  unwrap(Val)->dump();
-}
-
-void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
-  unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
-}
-
-int LLVMHasMetadata(LLVMValueRef Inst) {
-  return unwrap<Instruction>(Inst)->hasMetadata();
-}
-
-LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
-  return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
-}
-
-void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
-  unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
-}
-
-/*--.. Conversion functions ................................................--*/
-
-#define LLVM_DEFINE_VALUE_CAST(name)                                       \
-  LLVMValueRef LLVMIsA##name(LLVMValueRef Val) {                           \
-    return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
-  }
-
-LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
-
-/*--.. Operations on Uses ..................................................--*/
-LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
-  Value *V = unwrap(Val);
-  Value::use_iterator I = V->use_begin();
-  if (I == V->use_end())
-    return 0;
-  return wrap(&(I.getUse()));
-}
-
-LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
-  Use *Next = unwrap(U)->getNext();
-  if (Next)
-    return wrap(Next);
-  return 0;
-}
-
-LLVMValueRef LLVMGetUser(LLVMUseRef U) {
-  return wrap(unwrap(U)->getUser());
-}
-
-LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
-  return wrap(unwrap(U)->get());
-}
-
-/*--.. Operations on Users .................................................--*/
-LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
-  Value *V = unwrap(Val);
-  if (MDNode *MD = dyn_cast<MDNode>(V))
-      return wrap(MD->getOperand(Index));
-  return wrap(cast<User>(V)->getOperand(Index));
-}
-
-void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
-  unwrap<User>(Val)->setOperand(Index, unwrap(Op));
-}
-
-int LLVMGetNumOperands(LLVMValueRef Val) {
-  Value *V = unwrap(Val);
-  if (MDNode *MD = dyn_cast<MDNode>(V))
-      return MD->getNumOperands();
-  return cast<User>(V)->getNumOperands();
-}
-
-/*--.. Operations on constants of any type .................................--*/
-
-LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
-  return wrap(Constant::getNullValue(unwrap(Ty)));
-}
-
-LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
-  return wrap(Constant::getAllOnesValue(unwrap(Ty)));
-}
-
-LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
-  return wrap(UndefValue::get(unwrap(Ty)));
-}
-
-LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
-  return isa<Constant>(unwrap(Ty));
-}
-
-LLVMBool LLVMIsNull(LLVMValueRef Val) {
-  if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
-    return C->isNullValue();
-  return false;
-}
-
-LLVMBool LLVMIsUndef(LLVMValueRef Val) {
-  return isa<UndefValue>(unwrap(Val));
-}
-
-LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
-  return
-      wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
-}
-
-/*--.. Operations on metadata nodes ........................................--*/
-
-LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
-                                   unsigned SLen) {
-  return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
-}
-
-LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
-  return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
-}
-
-LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
-                                 unsigned Count) {
-  return wrap(MDNode::get(*unwrap(C),
-                          makeArrayRef(unwrap<Value>(Vals, Count), Count)));
-}
-
-LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
-  return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
-}
-
-const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) {
-  if (const MDString *S = dyn_cast<MDString>(unwrap(V))) {
-    *Len = S->getString().size();
-    return S->getString().data();
-  }
-  *Len = 0;
-  return 0;
-}
-
-unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V)
-{
-  return cast<MDNode>(unwrap(V))->getNumOperands();
-}
-
-void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest)
-{
-  const MDNode *N = cast<MDNode>(unwrap(V));
-  const unsigned numOperands = N->getNumOperands();
-  for (unsigned i = 0; i < numOperands; i++)
-    Dest[i] = wrap(N->getOperand(i));
-}
-
-unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name)
-{
-  if (NamedMDNode *N = unwrap(M)->getNamedMetadata(name)) {
-    return N->getNumOperands();
-  }
-  return 0;
-}
-
-void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest)
-{
-  NamedMDNode *N = unwrap(M)->getNamedMetadata(name);
-  if (!N)
-    return;
-  for (unsigned i=0;i<N->getNumOperands();i++)
-    Dest[i] = wrap(N->getOperand(i));
-}
-
-void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
-                                 LLVMValueRef Val)
-{
-  NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
-  if (!N)
-    return;
-  MDNode *Op = Val ? unwrap<MDNode>(Val) : NULL;
-  if (Op)
-    N->addOperand(Op);
-}
-
-/*--.. Operations on scalar constants ......................................--*/
-
-LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
-                          LLVMBool SignExtend) {
-  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
-}
-
-LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
-                                              unsigned NumWords,
-                                              const uint64_t Words[]) {
-    IntegerType *Ty = unwrap<IntegerType>(IntTy);
-    return wrap(ConstantInt::get(Ty->getContext(),
-                                 APInt(Ty->getBitWidth(),
-                                       makeArrayRef(Words, NumWords))));
-}
-
-LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
-                                  uint8_t Radix) {
-  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
-                               Radix));
-}
-
-LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
-                                         unsigned SLen, uint8_t Radix) {
-  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
-                               Radix));
-}
-
-LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
-  return wrap(ConstantFP::get(unwrap(RealTy), N));
-}
-
-LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
-  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
-}
-
-LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
-                                          unsigned SLen) {
-  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
-}
-
-unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
-  return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
-}
-
-long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
-  return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
-}
-
-/*--.. Operations on composite constants ...................................--*/
-
-LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
-                                      unsigned Length,
-                                      LLVMBool DontNullTerminate) {
-  /* Inverted the sense of AddNull because ', 0)' is a
-     better mnemonic for null termination than ', 1)'. */
-  return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length),
-                                           DontNullTerminate == 0));
-}
-LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
-                                      LLVMValueRef *ConstantVals,
-                                      unsigned Count, LLVMBool Packed) {
-  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
-  return wrap(ConstantStruct::getAnon(*unwrap(C), makeArrayRef(Elements, Count),
-                                      Packed != 0));
-}
-
-LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
-                             LLVMBool DontNullTerminate) {
-  return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
-                                  DontNullTerminate);
-}
-LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
-                            LLVMValueRef *ConstantVals, unsigned Length) {
-  ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
-  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
-}
-LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
-                             LLVMBool Packed) {
-  return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
-                                  Packed);
-}
-
-LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
-                                  LLVMValueRef *ConstantVals,
-                                  unsigned Count) {
-  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
-  StructType *Ty = cast<StructType>(unwrap(StructTy));
-
-  return wrap(ConstantStruct::get(Ty, makeArrayRef(Elements, Count)));
-}
-
-LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
-  return wrap(ConstantVector::get(makeArrayRef(
-                            unwrap<Constant>(ScalarConstantVals, Size), Size)));
-}
-
-/*-- Opcode mapping */
-
-static LLVMOpcode map_to_llvmopcode(int opcode)
-{
-    switch (opcode) {
-      default: llvm_unreachable("Unhandled Opcode.");
-#define HANDLE_INST(num, opc, clas) case num: return LLVM##opc;
-#include "llvm/Instruction.def"
-#undef HANDLE_INST
-    }
-}
-
-static int map_from_llvmopcode(LLVMOpcode code)
-{
-    switch (code) {
-#define HANDLE_INST(num, opc, clas) case LLVM##opc: return num;
-#include "llvm/Instruction.def"
-#undef HANDLE_INST
-    }
-    llvm_unreachable("Unhandled Opcode.");
-}
-
-/*--.. Constant expressions ................................................--*/
-
-LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
-  return map_to_llvmopcode(unwrap<ConstantExpr>(ConstantVal)->getOpcode());
-}
-
-LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
-  return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
-}
-
-LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
-  return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
-}
-
-LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
-}
-
-LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
-}
-
-LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
-}
-
-
-LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
-}
-
-LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
-  return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
-}
-
-LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
-                                   unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
-                             LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
-                                      unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
-                             LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
-                                      unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
-                                   unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
-                             LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
-                                      unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
-                             LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
-                                      unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
-                                   unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
-                             LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
-                                      unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
-                             LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
-                                      unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
-                                LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
-                                         unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
-                                   unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
-                                  unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
-                                   unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
-                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getICmp(Predicate,
-                                    unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
-                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getFCmp(Predicate,
-                                    unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
-                                   unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
-  return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
-                                    unwrap<Constant>(RHSConstant)));
-}
-
-LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
-                          LLVMValueRef *ConstantIndices, unsigned NumIndices) {
-  ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
-                               NumIndices);
-  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
-                                             IdxList));
-}
-
-LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
-                                  LLVMValueRef *ConstantIndices,
-                                  unsigned NumIndices) {
-  Constant* Val = unwrap<Constant>(ConstantVal);
-  ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
-                               NumIndices);
-  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
-}
-
-LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
-                                     unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
-                                    unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
-                                    unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
-                                       unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
-                                        unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
-                                      unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
-                                      unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
-                                      unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
-                                      unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
-                                        unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
-                                        unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
-                                       unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
-                                    LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
-                                             unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
-                                    LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
-                                             unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
-                                     LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
-                                              unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
-                                  LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
-                                           unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
-                              LLVMBool isSigned) {
-  return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
-                                           unwrap(ToType), isSigned));
-}
-
-LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
-  return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
-                                      unwrap(ToType)));
-}
-
-LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
-                             LLVMValueRef ConstantIfTrue,
-                             LLVMValueRef ConstantIfFalse) {
-  return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
-                                      unwrap<Constant>(ConstantIfTrue),
-                                      unwrap<Constant>(ConstantIfFalse)));
-}
-
-LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
-                                     LLVMValueRef IndexConstant) {
-  return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
-                                              unwrap<Constant>(IndexConstant)));
-}
-
-LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
-                                    LLVMValueRef ElementValueConstant,
-                                    LLVMValueRef IndexConstant) {
-  return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
-                                         unwrap<Constant>(ElementValueConstant),
-                                             unwrap<Constant>(IndexConstant)));
-}
-
-LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
-                                    LLVMValueRef VectorBConstant,
-                                    LLVMValueRef MaskConstant) {
-  return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
-                                             unwrap<Constant>(VectorBConstant),
-                                             unwrap<Constant>(MaskConstant)));
-}
-
-LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
-                                   unsigned NumIdx) {
-  return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
-                                            makeArrayRef(IdxList, NumIdx)));
-}
-
-LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
-                                  LLVMValueRef ElementValueConstant,
-                                  unsigned *IdxList, unsigned NumIdx) {
-  return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
-                                         unwrap<Constant>(ElementValueConstant),
-                                           makeArrayRef(IdxList, NumIdx)));
-}
-
-LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
-                                const char *Constraints,
-                                LLVMBool HasSideEffects,
-                                LLVMBool IsAlignStack) {
-  return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
-                             Constraints, HasSideEffects, IsAlignStack));
-}
-
-LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
-  return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
-}
-
-/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
-
-LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
-  return wrap(unwrap<GlobalValue>(Global)->getParent());
-}
-
-LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
-  return unwrap<GlobalValue>(Global)->isDeclaration();
-}
-
-LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
-  switch (unwrap<GlobalValue>(Global)->getLinkage()) {
-  case GlobalValue::ExternalLinkage:
-    return LLVMExternalLinkage;
-  case GlobalValue::AvailableExternallyLinkage:
-    return LLVMAvailableExternallyLinkage;
-  case GlobalValue::LinkOnceAnyLinkage:
-    return LLVMLinkOnceAnyLinkage;
-  case GlobalValue::LinkOnceODRLinkage:
-    return LLVMLinkOnceODRLinkage;
-  case GlobalValue::LinkOnceODRAutoHideLinkage:
-    return LLVMLinkOnceODRAutoHideLinkage;
-  case GlobalValue::WeakAnyLinkage:
-    return LLVMWeakAnyLinkage;
-  case GlobalValue::WeakODRLinkage:
-    return LLVMWeakODRLinkage;
-  case GlobalValue::AppendingLinkage:
-    return LLVMAppendingLinkage;
-  case GlobalValue::InternalLinkage:
-    return LLVMInternalLinkage;
-  case GlobalValue::PrivateLinkage:
-    return LLVMPrivateLinkage;
-  case GlobalValue::LinkerPrivateLinkage:
-    return LLVMLinkerPrivateLinkage;
-  case GlobalValue::LinkerPrivateWeakLinkage:
-    return LLVMLinkerPrivateWeakLinkage;
-  case GlobalValue::DLLImportLinkage:
-    return LLVMDLLImportLinkage;
-  case GlobalValue::DLLExportLinkage:
-    return LLVMDLLExportLinkage;
-  case GlobalValue::ExternalWeakLinkage:
-    return LLVMExternalWeakLinkage;
-  case GlobalValue::CommonLinkage:
-    return LLVMCommonLinkage;
-  }
-
-  llvm_unreachable("Invalid GlobalValue linkage!");
-}
-
-void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
-  GlobalValue *GV = unwrap<GlobalValue>(Global);
-
-  switch (Linkage) {
-  case LLVMExternalLinkage:
-    GV->setLinkage(GlobalValue::ExternalLinkage);
-    break;
-  case LLVMAvailableExternallyLinkage:
-    GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
-    break;
-  case LLVMLinkOnceAnyLinkage:
-    GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
-    break;
-  case LLVMLinkOnceODRLinkage:
-    GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
-    break;
-  case LLVMLinkOnceODRAutoHideLinkage:
-    GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage);
-    break;
-  case LLVMWeakAnyLinkage:
-    GV->setLinkage(GlobalValue::WeakAnyLinkage);
-    break;
-  case LLVMWeakODRLinkage:
-    GV->setLinkage(GlobalValue::WeakODRLinkage);
-    break;
-  case LLVMAppendingLinkage:
-    GV->setLinkage(GlobalValue::AppendingLinkage);
-    break;
-  case LLVMInternalLinkage:
-    GV->setLinkage(GlobalValue::InternalLinkage);
-    break;
-  case LLVMPrivateLinkage:
-    GV->setLinkage(GlobalValue::PrivateLinkage);
-    break;
-  case LLVMLinkerPrivateLinkage:
-    GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
-    break;
-  case LLVMLinkerPrivateWeakLinkage:
-    GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
-    break;
-  case LLVMDLLImportLinkage:
-    GV->setLinkage(GlobalValue::DLLImportLinkage);
-    break;
-  case LLVMDLLExportLinkage:
-    GV->setLinkage(GlobalValue::DLLExportLinkage);
-    break;
-  case LLVMExternalWeakLinkage:
-    GV->setLinkage(GlobalValue::ExternalWeakLinkage);
-    break;
-  case LLVMGhostLinkage:
-    DEBUG(errs()
-          << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
-    break;
-  case LLVMCommonLinkage:
-    GV->setLinkage(GlobalValue::CommonLinkage);
-    break;
-  }
-}
-
-const char *LLVMGetSection(LLVMValueRef Global) {
-  return unwrap<GlobalValue>(Global)->getSection().c_str();
-}
-
-void LLVMSetSection(LLVMValueRef Global, const char *Section) {
-  unwrap<GlobalValue>(Global)->setSection(Section);
-}
-
-LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
-  return static_cast<LLVMVisibility>(
-    unwrap<GlobalValue>(Global)->getVisibility());
-}
-
-void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
-  unwrap<GlobalValue>(Global)
-    ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
-}
-
-unsigned LLVMGetAlignment(LLVMValueRef Global) {
-  return unwrap<GlobalValue>(Global)->getAlignment();
-}
-
-void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
-  unwrap<GlobalValue>(Global)->setAlignment(Bytes);
-}
-
-/*--.. Operations on global variables ......................................--*/
-
-LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
-  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
-                                 GlobalValue::ExternalLinkage, 0, Name));
-}
-
-LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
-                                         const char *Name,
-                                         unsigned AddressSpace) {
-  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
-                                 GlobalValue::ExternalLinkage, 0, Name, 0,
-                                 GlobalVariable::NotThreadLocal, AddressSpace));
-}
-
-LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
-  return wrap(unwrap(M)->getNamedGlobal(Name));
-}
-
-LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
-  Module *Mod = unwrap(M);
-  Module::global_iterator I = Mod->global_begin();
-  if (I == Mod->global_end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
-  Module *Mod = unwrap(M);
-  Module::global_iterator I = Mod->global_end();
-  if (I == Mod->global_begin())
-    return 0;
-  return wrap(--I);
-}
-
-LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
-  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
-  Module::global_iterator I = GV;
-  if (++I == GV->getParent()->global_end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
-  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
-  Module::global_iterator I = GV;
-  if (I == GV->getParent()->global_begin())
-    return 0;
-  return wrap(--I);
-}
-
-void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
-  unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
-}
-
-LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
-  GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
-  if ( !GV->hasInitializer() )
-    return 0;
-  return wrap(GV->getInitializer());
-}
-
-void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
-  unwrap<GlobalVariable>(GlobalVar)
-    ->setInitializer(unwrap<Constant>(ConstantVal));
-}
-
-LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
-  return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
-}
-
-void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
-  unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
-}
-
-LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
-  return unwrap<GlobalVariable>(GlobalVar)->isConstant();
-}
-
-void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
-  unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
-}
-
-/*--.. Operations on aliases ......................................--*/
-
-LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
-                          const char *Name) {
-  return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
-                              unwrap<Constant>(Aliasee), unwrap (M)));
-}
-
-/*--.. Operations on functions .............................................--*/
-
-LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
-                             LLVMTypeRef FunctionTy) {
-  return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
-                               GlobalValue::ExternalLinkage, Name, unwrap(M)));
-}
-
-LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
-  return wrap(unwrap(M)->getFunction(Name));
-}
-
-LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
-  Module *Mod = unwrap(M);
-  Module::iterator I = Mod->begin();
-  if (I == Mod->end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
-  Module *Mod = unwrap(M);
-  Module::iterator I = Mod->end();
-  if (I == Mod->begin())
-    return 0;
-  return wrap(--I);
-}
-
-LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  Module::iterator I = Func;
-  if (++I == Func->getParent()->end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  Module::iterator I = Func;
-  if (I == Func->getParent()->begin())
-    return 0;
-  return wrap(--I);
-}
-
-void LLVMDeleteFunction(LLVMValueRef Fn) {
-  unwrap<Function>(Fn)->eraseFromParent();
-}
-
-unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
-  if (Function *F = dyn_cast<Function>(unwrap(Fn)))
-    return F->getIntrinsicID();
-  return 0;
-}
-
-unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
-  return unwrap<Function>(Fn)->getCallingConv();
-}
-
-void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
-  return unwrap<Function>(Fn)->setCallingConv(
-    static_cast<CallingConv::ID>(CC));
-}
-
-const char *LLVMGetGC(LLVMValueRef Fn) {
-  Function *F = unwrap<Function>(Fn);
-  return F->hasGC()? F->getGC() : 0;
-}
-
-void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
-  Function *F = unwrap<Function>(Fn);
-  if (GC)
-    F->setGC(GC);
-  else
-    F->clearGC();
-}
-
-void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
-  Function *Func = unwrap<Function>(Fn);
-  const AttrListPtr PAL = Func->getAttributes();
-  AttrBuilder B(PA);
-  const AttrListPtr PALnew =
-    PAL.addAttr(Func->getContext(), AttrListPtr::FunctionIndex,
-                Attributes::get(Func->getContext(), B));
-  Func->setAttributes(PALnew);
-}
-
-void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
-  Function *Func = unwrap<Function>(Fn);
-  const AttrListPtr PAL = Func->getAttributes();
-  AttrBuilder B(PA);
-  const AttrListPtr PALnew =
-    PAL.removeAttr(Func->getContext(), AttrListPtr::FunctionIndex,
-                   Attributes::get(Func->getContext(), B));
-  Func->setAttributes(PALnew);
-}
-
-LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  const AttrListPtr PAL = Func->getAttributes();
-  Attributes attr = PAL.getFnAttributes();
-  return (LLVMAttribute)attr.Raw();
-}
-
-/*--.. Operations on parameters ............................................--*/
-
-unsigned LLVMCountParams(LLVMValueRef FnRef) {
-  // This function is strictly redundant to
-  //   LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
-  return unwrap<Function>(FnRef)->arg_size();
-}
-
-void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
-  Function *Fn = unwrap<Function>(FnRef);
-  for (Function::arg_iterator I = Fn->arg_begin(),
-                              E = Fn->arg_end(); I != E; I++)
-    *ParamRefs++ = wrap(I);
-}
-
-LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
-  Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
-  while (index --> 0)
-    AI++;
-  return wrap(AI);
-}
-
-LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
-  return wrap(unwrap<Argument>(V)->getParent());
-}
-
-LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  Function::arg_iterator I = Func->arg_begin();
-  if (I == Func->arg_end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  Function::arg_iterator I = Func->arg_end();
-  if (I == Func->arg_begin())
-    return 0;
-  return wrap(--I);
-}
-
-LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
-  Argument *A = unwrap<Argument>(Arg);
-  Function::arg_iterator I = A;
-  if (++I == A->getParent()->arg_end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
-  Argument *A = unwrap<Argument>(Arg);
-  Function::arg_iterator I = A;
-  if (I == A->getParent()->arg_begin())
-    return 0;
-  return wrap(--I);
-}
-
-void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  Argument *A = unwrap<Argument>(Arg);
-  AttrBuilder B(PA);
-  A->addAttr(Attributes::get(A->getContext(), B));
-}
-
-void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  Argument *A = unwrap<Argument>(Arg);
-  AttrBuilder B(PA);
-  A->removeAttr(Attributes::get(A->getContext(), B));
-}
-
-LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
-  Argument *A = unwrap<Argument>(Arg);
-  Attributes attr = A->getParent()->getAttributes().getParamAttributes(
-    A->getArgNo()+1);
-  return (LLVMAttribute)attr.Raw();
-}
-  
-
-void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
-  AttrBuilder B;
-  B.addAlignmentAttr(align);
-  unwrap<Argument>(Arg)->addAttr(Attributes::
-                                 get(unwrap<Argument>(Arg)->getContext(), B));
-}
-
-/*--.. Operations on basic blocks ..........................................--*/
-
-LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
-  return wrap(static_cast<Value*>(unwrap(BB)));
-}
-
-LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
-  return isa<BasicBlock>(unwrap(Val));
-}
-
-LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
-  return wrap(unwrap<BasicBlock>(Val));
-}
-
-LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
-  return wrap(unwrap(BB)->getParent());
-}
-
-LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB) {
-  return wrap(unwrap(BB)->getTerminator());
-}
-
-unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
-  return unwrap<Function>(FnRef)->size();
-}
-
-void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
-  Function *Fn = unwrap<Function>(FnRef);
-  for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
-    *BasicBlocksRefs++ = wrap(I);
-}
-
-LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
-  return wrap(&unwrap<Function>(Fn)->getEntryBlock());
-}
-
-LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  Function::iterator I = Func->begin();
-  if (I == Func->end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  Function::iterator I = Func->end();
-  if (I == Func->begin())
-    return 0;
-  return wrap(--I);
-}
-
-LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
-  BasicBlock *Block = unwrap(BB);
-  Function::iterator I = Block;
-  if (++I == Block->getParent()->end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
-  BasicBlock *Block = unwrap(BB);
-  Function::iterator I = Block;
-  if (I == Block->getParent()->begin())
-    return 0;
-  return wrap(--I);
-}
-
-LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
-                                                LLVMValueRef FnRef,
-                                                const char *Name) {
-  return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
-}
-
-LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
-  return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
-}
-
-LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
-                                                LLVMBasicBlockRef BBRef,
-                                                const char *Name) {
-  BasicBlock *BB = unwrap(BBRef);
-  return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
-}
-
-LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
-                                       const char *Name) {
-  return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
-}
-
-void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
-  unwrap(BBRef)->eraseFromParent();
-}
-
-void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BBRef) {
-  unwrap(BBRef)->removeFromParent();
-}
-
-void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
-  unwrap(BB)->moveBefore(unwrap(MovePos));
-}
-
-void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
-  unwrap(BB)->moveAfter(unwrap(MovePos));
-}
-
-/*--.. Operations on instructions ..........................................--*/
-
-LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
-  return wrap(unwrap<Instruction>(Inst)->getParent());
-}
-
-LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
-  BasicBlock *Block = unwrap(BB);
-  BasicBlock::iterator I = Block->begin();
-  if (I == Block->end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
-  BasicBlock *Block = unwrap(BB);
-  BasicBlock::iterator I = Block->end();
-  if (I == Block->begin())
-    return 0;
-  return wrap(--I);
-}
-
-LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
-  Instruction *Instr = unwrap<Instruction>(Inst);
-  BasicBlock::iterator I = Instr;
-  if (++I == Instr->getParent()->end())
-    return 0;
-  return wrap(I);
-}
-
-LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
-  Instruction *Instr = unwrap<Instruction>(Inst);
-  BasicBlock::iterator I = Instr;
-  if (I == Instr->getParent()->begin())
-    return 0;
-  return wrap(--I);
-}
-
-void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
-  unwrap<Instruction>(Inst)->eraseFromParent();
-}
-
-LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) {
-  if (ICmpInst *I = dyn_cast<ICmpInst>(unwrap(Inst)))
-    return (LLVMIntPredicate)I->getPredicate();
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(unwrap(Inst)))
-    if (CE->getOpcode() == Instruction::ICmp)
-      return (LLVMIntPredicate)CE->getPredicate();
-  return (LLVMIntPredicate)0;
-}
-
-LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) {
-  if (Instruction *C = dyn_cast<Instruction>(unwrap(Inst)))
-    return map_to_llvmopcode(C->getOpcode());
-  return (LLVMOpcode)0;
-}
-
-/*--.. Call and invoke instructions ........................................--*/
-
-unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
-  Value *V = unwrap(Instr);
-  if (CallInst *CI = dyn_cast<CallInst>(V))
-    return CI->getCallingConv();
-  if (InvokeInst *II = dyn_cast<InvokeInst>(V))
-    return II->getCallingConv();
-  llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
-}
-
-void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
-  Value *V = unwrap(Instr);
-  if (CallInst *CI = dyn_cast<CallInst>(V))
-    return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
-  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
-    return II->setCallingConv(static_cast<CallingConv::ID>(CC));
-  llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
-}
-
-void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
-                           LLVMAttribute PA) {
-  CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  AttrBuilder B(PA);
-  Call.setAttributes(
-    Call.getAttributes().addAttr(Call->getContext(), index,
-                                 Attributes::get(Call->getContext(), B)));
-}
-
-void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
-                              LLVMAttribute PA) {
-  CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  AttrBuilder B(PA);
-  Call.setAttributes(
-    Call.getAttributes().removeAttr(Call->getContext(), index,
-                                    Attributes::get(Call->getContext(), B)));
-}
-
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
-                                unsigned align) {
-  CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  AttrBuilder B;
-  B.addAlignmentAttr(align);
-  Call.setAttributes(Call.getAttributes().addAttr(Call->getContext(), index,
-                                       Attributes::get(Call->getContext(), B)));
-}
-
-/*--.. Operations on call instructions (only) ..............................--*/
-
-LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
-  return unwrap<CallInst>(Call)->isTailCall();
-}
-
-void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
-  unwrap<CallInst>(Call)->setTailCall(isTailCall);
-}
-
-/*--.. Operations on switch instructions (only) ............................--*/
-
-LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) {
-  return wrap(unwrap<SwitchInst>(Switch)->getDefaultDest());
-}
-
-/*--.. Operations on phi nodes .............................................--*/
-
-void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
-                     LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
-  PHINode *PhiVal = unwrap<PHINode>(PhiNode);
-  for (unsigned I = 0; I != Count; ++I)
-    PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
-}
-
-unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
-  return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
-}
-
-LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
-  return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
-}
-
-LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
-  return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
-}
-
-
-/*===-- Instruction builders ----------------------------------------------===*/
-
-LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
-  return wrap(new IRBuilder<>(*unwrap(C)));
-}
-
-LLVMBuilderRef LLVMCreateBuilder(void) {
-  return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
-}
-
-void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
-                         LLVMValueRef Instr) {
-  BasicBlock *BB = unwrap(Block);
-  Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
-  unwrap(Builder)->SetInsertPoint(BB, I);
-}
-
-void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
-  Instruction *I = unwrap<Instruction>(Instr);
-  unwrap(Builder)->SetInsertPoint(I->getParent(), I);
-}
-
-void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
-  BasicBlock *BB = unwrap(Block);
-  unwrap(Builder)->SetInsertPoint(BB);
-}
-
-LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
-   return wrap(unwrap(Builder)->GetInsertBlock());
-}
-
-void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
-  unwrap(Builder)->ClearInsertionPoint();
-}
-
-void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
-  unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
-}
-
-void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
-                                   const char *Name) {
-  unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
-}
-
-void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
-  delete unwrap(Builder);
-}
-
-/*--.. Metadata builders ...................................................--*/
-
-void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
-  MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
-  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
-}
-
-LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
-  return wrap(unwrap(Builder)->getCurrentDebugLocation()
-              .getAsMDNode(unwrap(Builder)->getContext()));
-}
-
-void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
-  unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
-}
-
-
-/*--.. Instruction builders ................................................--*/
-
-LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
-  return wrap(unwrap(B)->CreateRetVoid());
-}
-
-LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
-  return wrap(unwrap(B)->CreateRet(unwrap(V)));
-}
-
-LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
-                                   unsigned N) {
-  return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
-}
-
-LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
-  return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
-}
-
-LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
-                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
-  return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
-}
-
-LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
-                             LLVMBasicBlockRef Else, unsigned NumCases) {
-  return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
-}
-
-LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
-                                 unsigned NumDests) {
-  return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
-}
-
-LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
-                             LLVMValueRef *Args, unsigned NumArgs,
-                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
-                             const char *Name) {
-  return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
-                                      makeArrayRef(unwrap(Args), NumArgs),
-                                      Name));
-}
-
-LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                 LLVMValueRef PersFn, unsigned NumClauses,
-                                 const char *Name) {
-  return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty),
-                                          cast<Function>(unwrap(PersFn)),
-                                          NumClauses, Name));
-}
-
-LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) {
-  return wrap(unwrap(B)->CreateResume(unwrap(Exn)));
-}
-
-LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
-  return wrap(unwrap(B)->CreateUnreachable());
-}
-
-void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
-                 LLVMBasicBlockRef Dest) {
-  unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
-}
-
-void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
-  unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
-}
-
-void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal) {
-  unwrap<LandingPadInst>(LandingPad)->
-    addClause(cast<Constant>(unwrap(ClauseVal)));
-}
-
-void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val) {
-  unwrap<LandingPadInst>(LandingPad)->setCleanup(Val);
-}
-
-/*--.. Arithmetic ..........................................................--*/
-
-LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
-                                LLVMValueRef RHS, const char *Name) {
-  return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                         const char *Name) {
-  return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
-                          const char *Name) {
-  return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
-                            LLVMValueRef LHS, LLVMValueRef RHS,
-                            const char *Name) {
-  return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(map_from_llvmopcode(Op)), unwrap(LHS),
-                                     unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
-  return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
-}
-
-LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
-                             const char *Name) {
-  return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
-}
-
-LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
-                             const char *Name) {
-  return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
-}
-
-LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
-  return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
-}
-
-LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
-  return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
-}
-
-/*--.. Memory ..............................................................--*/
-
-LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
-                             const char *Name) {
-  Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
-  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
-  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
-                                               ITy, unwrap(Ty), AllocSize, 
-                                               0, 0, "");
-  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
-}
-
-LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                  LLVMValueRef Val, const char *Name) {
-  Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
-  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
-  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
-                                               ITy, unwrap(Ty), AllocSize, 
-                                               unwrap(Val), 0, "");
-  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
-}
-
-LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
-                             const char *Name) {
-  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
-}
-
-LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
-                                  LLVMValueRef Val, const char *Name) {
-  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
-}
-
-LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
-  return wrap(unwrap(B)->Insert(
-     CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
-}
-
-
-LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
-}
-
-LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, 
-                            LLVMValueRef PointerVal) {
-  return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
-}
-
-LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
-                          LLVMValueRef *Indices, unsigned NumIndices,
-                          const char *Name) {
-  ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name));
-}
-
-LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
-                                  LLVMValueRef *Indices, unsigned NumIndices,
-                                  const char *Name) {
-  ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
-}
-
-LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
-                                unsigned Idx, const char *Name) {
-  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
-}
-
-LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
-                                   const char *Name) {
-  return wrap(unwrap(B)->CreateGlobalString(Str, Name));
-}
-
-LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
-                                      const char *Name) {
-  return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
-}
-
-LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
-  Value *P = unwrap<Value>(MemAccessInst);
-  if (LoadInst *LI = dyn_cast<LoadInst>(P))
-    return LI->isVolatile();
-  return cast<StoreInst>(P)->isVolatile();
-}
-
-void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
-  Value *P = unwrap<Value>(MemAccessInst);
-  if (LoadInst *LI = dyn_cast<LoadInst>(P))
-    return LI->setVolatile(isVolatile);
-  return cast<StoreInst>(P)->setVolatile(isVolatile);
-}
-
-/*--.. Casts ...............................................................--*/
-
-LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
-                            LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
-                           LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
-                           LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
-                             LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
-                             LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
-                             LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
-                             LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
-                              LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
-                            LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
-                               LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
-                               LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
-                              LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
-                                    LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
-                                             Name));
-}
-
-LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
-                                    LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
-                                             Name));
-}
-
-LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
-                                     LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
-                                              Name));
-}
-
-LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
-                           LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateCast(Instruction::CastOps(map_from_llvmopcode(Op)), unwrap(Val),
-                                    unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
-                                  LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
-}
-
-LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
-                              LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
-                                       /*isSigned*/true, Name));
-}
-
-LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
-                             LLVMTypeRef DestTy, const char *Name) {
-  return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
-}
-
-/*--.. Comparisons .........................................................--*/
-
-LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
-                           LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
-                                    unwrap(LHS), unwrap(RHS), Name));
-}
-
-LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
-                           LLVMValueRef LHS, LLVMValueRef RHS,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
-                                    unwrap(LHS), unwrap(RHS), Name));
-}
-
-/*--.. Miscellaneous instructions ..........................................--*/
-
-LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
-  return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name));
-}
-
-LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
-                           LLVMValueRef *Args, unsigned NumArgs,
-                           const char *Name) {
-  return wrap(unwrap(B)->CreateCall(unwrap(Fn),
-                                    makeArrayRef(unwrap(Args), NumArgs),
-                                    Name));
-}
-
-LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
-                             LLVMValueRef Then, LLVMValueRef Else,
-                             const char *Name) {
-  return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
-                                      Name));
-}
-
-LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
-                            LLVMTypeRef Ty, const char *Name) {
-  return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
-}
-
-LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
-                                      LLVMValueRef Index, const char *Name) {
-  return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
-                                              Name));
-}
-
-LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
-                                    LLVMValueRef EltVal, LLVMValueRef Index,
-                                    const char *Name) {
-  return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
-                                             unwrap(Index), Name));
-}
-
-LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
-                                    LLVMValueRef V2, LLVMValueRef Mask,
-                                    const char *Name) {
-  return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
-                                             unwrap(Mask), Name));
-}
-
-LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
-                                   unsigned Index, const char *Name) {
-  return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
-}
-
-LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
-                                  LLVMValueRef EltVal, unsigned Index,
-                                  const char *Name) {
-  return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
-                                           Index, Name));
-}
-
-LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
-                             const char *Name) {
-  return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
-}
-
-LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
-                                const char *Name) {
-  return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
-}
-
-LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
-                              LLVMValueRef RHS, const char *Name) {
-  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
-}
-
-
-/*===-- Module providers --------------------------------------------------===*/
-
-LLVMModuleProviderRef
-LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
-  return reinterpret_cast<LLVMModuleProviderRef>(M);
-}
-
-void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
-  delete unwrap(MP);
-}
-
-
-/*===-- Memory buffers ----------------------------------------------------===*/
-
-LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
-    const char *Path,
-    LLVMMemoryBufferRef *OutMemBuf,
-    char **OutMessage) {
-
-  OwningPtr<MemoryBuffer> MB;
-  error_code ec;
-  if (!(ec = MemoryBuffer::getFile(Path, MB))) {
-    *OutMemBuf = wrap(MB.take());
-    return 0;
-  }
-
-  *OutMessage = strdup(ec.message().c_str());
-  return 1;
-}
-
-LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
-                                         char **OutMessage) {
-  OwningPtr<MemoryBuffer> MB;
-  error_code ec;
-  if (!(ec = MemoryBuffer::getSTDIN(MB))) {
-    *OutMemBuf = wrap(MB.take());
-    return 0;
-  }
-
-  *OutMessage = strdup(ec.message().c_str());
-  return 1;
-}
-
-void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
-  delete unwrap(MemBuf);
-}
-
-/*===-- Pass Registry -----------------------------------------------------===*/
-
-LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
-  return wrap(PassRegistry::getPassRegistry());
-}
-
-/*===-- Pass Manager ------------------------------------------------------===*/
-
-LLVMPassManagerRef LLVMCreatePassManager() {
-  return wrap(new PassManager());
-}
-
-LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
-  return wrap(new FunctionPassManager(unwrap(M)));
-}
-
-LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
-  return LLVMCreateFunctionPassManagerForModule(
-                                            reinterpret_cast<LLVMModuleRef>(P));
-}
-
-LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
-  return unwrap<PassManager>(PM)->run(*unwrap(M));
-}
-
-LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
-  return unwrap<FunctionPassManager>(FPM)->doInitialization();
-}
-
-LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
-  return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
-}
-
-LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
-  return unwrap<FunctionPassManager>(FPM)->doFinalization();
-}
-
-void LLVMDisposePassManager(LLVMPassManagerRef PM) {
-  delete unwrap(PM);
-}
diff --git a/lib/VMCore/DIBuilder.cpp b/lib/VMCore/DIBuilder.cpp
deleted file mode 100644
index 152b825523da..000000000000
--- a/lib/VMCore/DIBuilder.cpp
+++ /dev/null
@@ -1,1045 +0,0 @@
-//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the DIBuilder.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DIBuilder.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
-
-using namespace llvm;
-using namespace llvm::dwarf;
-
-static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
-  assert((Tag & LLVMDebugVersionMask) == 0 &&
-         "Tag too large for debug encoding!");
-  return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
-}
-
-DIBuilder::DIBuilder(Module &m)
-  : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0),
-    TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0),
-    ValueFn(0)
-{}
-
-/// finalize - Construct any deferred debug info descriptors.
-void DIBuilder::finalize() {
-  DIArray Enums = getOrCreateArray(AllEnumTypes);
-  DIType(TempEnumTypes).replaceAllUsesWith(Enums);
-
-  DIArray RetainTypes = getOrCreateArray(AllRetainTypes);
-  DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
-
-  DIArray SPs = getOrCreateArray(AllSubprograms);
-  DIType(TempSubprograms).replaceAllUsesWith(SPs);
-  for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-    DISubprogram SP(SPs.getElement(i));
-    SmallVector<Value *, 4> Variables;
-    if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) {
-      for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii)
-        Variables.push_back(NMD->getOperand(ii));
-      NMD->eraseFromParent();
-    }
-    if (MDNode *Temp = SP.getVariablesNodes()) {
-      DIArray AV = getOrCreateArray(Variables);
-      DIType(Temp).replaceAllUsesWith(AV);
-    }
-  }
-
-  DIArray GVs = getOrCreateArray(AllGVs);
-  DIType(TempGVs).replaceAllUsesWith(GVs);
-}
-
-/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
-/// N.
-static MDNode *getNonCompileUnitScope(MDNode *N) {
-  if (DIDescriptor(N).isCompileUnit())
-    return NULL;
-  return N;
-}
-
-/// createCompileUnit - A CompileUnit provides an anchor for all debugging
-/// information generated during this instance of compilation.
-void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
-                                  StringRef Directory, StringRef Producer,
-                                  bool isOptimized, StringRef Flags,
-                                  unsigned RunTimeVer) {
-  assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) ||
-          (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
-         "Invalid Language tag");
-  assert(!Filename.empty() &&
-         "Unable to create compile unit without filename");
-  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
-  Value *THElts[] = { TempEnumTypes };
-  MDNode *EnumHolder = MDNode::get(VMContext, THElts);
-
-  TempRetainTypes = MDNode::getTemporary(VMContext, TElts);
-  Value *TRElts[] = { TempRetainTypes };
-  MDNode *RetainHolder = MDNode::get(VMContext, TRElts);
-
-  TempSubprograms = MDNode::getTemporary(VMContext, TElts);
-  Value *TSElts[] = { TempSubprograms };
-  MDNode *SPHolder = MDNode::get(VMContext, TSElts);
-
-  TempGVs = MDNode::getTemporary(VMContext, TElts);
-  Value *TVElts[] = { TempGVs };
-  MDNode *GVHolder = MDNode::get(VMContext, TVElts);
-
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
-    MDString::get(VMContext, Filename),
-    MDString::get(VMContext, Directory),
-    MDString::get(VMContext, Producer),
-    // Deprecate isMain field.
-    ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
-    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    MDString::get(VMContext, Flags),
-    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer),
-    EnumHolder,
-    RetainHolder,
-    SPHolder,
-    GVHolder
-  };
-  TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
-
-  // Create a named metadata so that it is easier to find cu in a module.
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
-  NMD->addOperand(TheCU);
-}
-
-/// createFile - Create a file descriptor to hold debugging information
-/// for a file.
-DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
-  assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
-  assert(!Filename.empty() && "Unable to create file without name");
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
-    MDString::get(VMContext, Filename),
-    MDString::get(VMContext, Directory),
-    NULL // TheCU
-  };
-  return DIFile(MDNode::get(VMContext, Elts));
-}
-
-/// createEnumerator - Create a single enumerator value.
-DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
-  assert(!Name.empty() && "Unable to create enumerator without name");
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
-    MDString::get(VMContext, Name),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
-  };
-  return DIEnumerator(MDNode::get(VMContext, Elts));
-}
-
-/// createNullPtrType - Create C++0x nullptr type.
-DIType DIBuilder::createNullPtrType(StringRef Name) {
-  assert(!Name.empty() && "Unable to create type without name");
-  // nullptr is encoded in DIBasicType format. Line number, filename,
-  // ,size, alignment, offset and flags are always empty here.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
-    NULL, //TheCU,
-    MDString::get(VMContext, Name),
-    NULL, // Filename
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0)  // Encoding
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createBasicType - Create debugging information entry for a basic
-/// type, e.g 'char'.
-DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
-                                  uint64_t AlignInBits,
-                                  unsigned Encoding) {
-  assert(!Name.empty() && "Unable to create type without name");
-  // Basic types are encoded in DIBasicType format. Line number, filename,
-  // offset and flags are always empty here.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
-    NULL, //TheCU,
-    MDString::get(VMContext, Name),
-    NULL, // Filename
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
-    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createQualifiedType - Create debugging information entry for a qualified
-/// type, e.g. 'const int'.
-DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
-  // Qualified types are encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, Tag),
-    NULL, //TheCU,
-    MDString::get(VMContext, StringRef()), // Empty name.
-    NULL, // Filename
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
-    FromTy
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createPointerType - Create debugging information entry for a pointer.
-DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
-                                    uint64_t AlignInBits, StringRef Name) {
-  // Pointer types are encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
-    NULL, //TheCU,
-    MDString::get(VMContext, Name),
-    NULL, // Filename
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
-    PointeeTy
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createReferenceType - Create debugging information entry for a reference
-/// type.
-DIType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
-  assert(RTy.Verify() && "Unable to create reference type");
-  // References are encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, Tag),
-    NULL, // TheCU,
-    NULL, // Name
-    NULL, // Filename
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
-    RTy
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createTypedef - Create debugging information entry for a typedef.
-DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
-                                unsigned LineNo, DIDescriptor Context) {
-  // typedefs are encoded in DIDerivedType format.
-  assert(Ty.Verify() && "Invalid typedef type!");
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
-    Ty
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createFriend - Create debugging information entry for a 'friend'.
-DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
-  // typedefs are encoded in DIDerivedType format.
-  assert(Ty.Verify() && "Invalid type!");
-  assert(FriendTy.Verify() && "Invalid friend type!");
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_friend),
-    Ty,
-    NULL, // Name
-    Ty.getFile(),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
-    FriendTy
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createInheritance - Create debugging information entry to establish
-/// inheritance relationship between two types.
-DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
-                                    uint64_t BaseOffset, unsigned Flags) {
-  assert(Ty.Verify() && "Unable to create inheritance");
-  // TAG_inheritance is encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
-    Ty,
-    NULL, // Name
-    Ty.getFile(),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
-    ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    BaseTy
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createMemberType - Create debugging information entry for a member.
-DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
-                                   DIFile File, unsigned LineNumber,
-                                   uint64_t SizeInBits, uint64_t AlignInBits,
-                                   uint64_t OffsetInBits, unsigned Flags,
-                                   DIType Ty) {
-  // TAG_member is encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    getNonCompileUnitScope(Scope),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    Ty
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createObjCIVar - Create debugging information entry for Objective-C
-/// instance variable.
-DIType DIBuilder::createObjCIVar(StringRef Name,
-                                 DIFile File, unsigned LineNumber,
-                                 uint64_t SizeInBits, uint64_t AlignInBits,
-                                 uint64_t OffsetInBits, unsigned Flags,
-                                 DIType Ty, StringRef PropertyName,
-                                 StringRef GetterName, StringRef SetterName,
-                                 unsigned PropertyAttributes) {
-  // TAG_member is encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    getNonCompileUnitScope(File),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    Ty,
-    MDString::get(VMContext, PropertyName),
-    MDString::get(VMContext, GetterName),
-    MDString::get(VMContext, SetterName),
-    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createObjCIVar - Create debugging information entry for Objective-C
-/// instance variable.
-DIType DIBuilder::createObjCIVar(StringRef Name,
-                                 DIFile File, unsigned LineNumber,
-                                 uint64_t SizeInBits, uint64_t AlignInBits,
-                                 uint64_t OffsetInBits, unsigned Flags,
-                                 DIType Ty, MDNode *PropertyNode) {
-  // TAG_member is encoded in DIDerivedType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    getNonCompileUnitScope(File),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    Ty,
-    PropertyNode
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createObjCProperty - Create debugging information entry for Objective-C
-/// property.
-DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
-                                             DIFile File, unsigned LineNumber,
-                                             StringRef GetterName,
-                                             StringRef SetterName, 
-                                             unsigned PropertyAttributes,
-                                             DIType Ty) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    MDString::get(VMContext, GetterName),
-    MDString::get(VMContext, SetterName),
-    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes),
-    Ty
-  };
-  return DIObjCProperty(MDNode::get(VMContext, Elts));
-}
-
-/// createTemplateTypeParameter - Create debugging information for template
-/// type parameter.
-DITemplateTypeParameter
-DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
-                                       DIType Ty, MDNode *File, unsigned LineNo,
-                                       unsigned ColumnNo) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    Ty,
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
-  };
-  return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
-}
-
-/// createTemplateValueParameter - Create debugging information for template
-/// value parameter.
-DITemplateValueParameter
-DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
-                                        DIType Ty, uint64_t Val,
-                                        MDNode *File, unsigned LineNo,
-                                        unsigned ColumnNo) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    Ty,
-    ConstantInt::get(Type::getInt64Ty(VMContext), Val),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
-  };
-  return DITemplateValueParameter(MDNode::get(VMContext, Elts));
-}
-
-/// createClassType - Create debugging information entry for a class.
-DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
-                                  DIFile File, unsigned LineNumber,
-                                  uint64_t SizeInBits, uint64_t AlignInBits,
-                                  uint64_t OffsetInBits, unsigned Flags,
-                                  DIType DerivedFrom, DIArray Elements,
-                                  MDNode *VTableHolder,
-                                  MDNode *TemplateParams) {
- // TAG_class_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    DerivedFrom,
-    Elements,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    VTableHolder,
-    TemplateParams
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createStructType - Create debugging information entry for a struct.
-DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
-                                   DIFile File, unsigned LineNumber,
-                                   uint64_t SizeInBits, uint64_t AlignInBits,
-                                   unsigned Flags, DIArray Elements,
-                                   unsigned RunTimeLang) {
- // TAG_structure_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    NULL,
-    Elements,
-    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createUnionType - Create debugging information entry for an union.
-DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
-                                  DIFile File,
-                                  unsigned LineNumber, uint64_t SizeInBits,
-                                  uint64_t AlignInBits, unsigned Flags,
-                                  DIArray Elements, unsigned RunTimeLang) {
-  // TAG_union_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
-    getNonCompileUnitScope(Scope),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    NULL,
-    Elements,
-    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createSubroutineType - Create subroutine type.
-DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
-  // TAG_subroutine_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    MDString::get(VMContext, ""),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    NULL,
-    ParameterTypes,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createEnumerationType - Create debugging information entry for an
-/// enumeration.
-DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
-                                        DIFile File, unsigned LineNumber,
-                                        uint64_t SizeInBits,
-                                        uint64_t AlignInBits,
-                                        DIArray Elements,
-                                        DIType ClassType) {
-  // TAG_enumeration_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
-    getNonCompileUnitScope(Scope),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ClassType,
-    Elements,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
-  };
-  MDNode *Node = MDNode::get(VMContext, Elts);
-  AllEnumTypes.push_back(Node);
-  return DIType(Node);
-}
-
-/// createArrayType - Create debugging information entry for an array.
-DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
-                                  DIType Ty, DIArray Subscripts) {
-  // TAG_array_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
-    NULL, //TheCU,
-    MDString::get(VMContext, ""),
-    NULL, //TheCU,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    Ty,
-    Subscripts,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createVectorType - Create debugging information entry for a vector.
-DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
-                                   DIType Ty, DIArray Subscripts) {
-  // TAG_vector_type is encoded in DICompositeType format.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
-    NULL, //TheCU,
-    MDString::get(VMContext, ""),
-    NULL, //TheCU,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    Ty,
-    Subscripts,
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
-  };
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createArtificialType - Create a new DIType with "artificial" flag set.
-DIType DIBuilder::createArtificialType(DIType Ty) {
-  if (Ty.isArtificial())
-    return Ty;
-
-  SmallVector<Value *, 9> Elts;
-  MDNode *N = Ty;
-  assert (N && "Unexpected input DIType!");
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    if (Value *V = N->getOperand(i))
-      Elts.push_back(V);
-    else
-      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
-  }
-
-  unsigned CurFlags = Ty.getFlags();
-  CurFlags = CurFlags | DIType::FlagArtificial;
-
-  // Flags are stored at this slot.
-  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
-
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// createArtificialType - Create a new DIType with "artificial" flag set.
-DIType DIBuilder::createObjectPointerType(DIType Ty) {
-  if (Ty.isObjectPointer())
-    return Ty;
-
-  SmallVector<Value *, 9> Elts;
-  MDNode *N = Ty;
-  assert (N && "Unexpected input DIType!");
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    if (Value *V = N->getOperand(i))
-      Elts.push_back(V);
-    else
-      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
-  }
-
-  unsigned CurFlags = Ty.getFlags();
-  CurFlags = CurFlags | (DIType::FlagObjectPointer | DIType::FlagArtificial);
-
-  // Flags are stored at this slot.
-  Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
-
-  return DIType(MDNode::get(VMContext, Elts));
-}
-
-/// retainType - Retain DIType in a module even if it is not referenced
-/// through debug info anchors.
-void DIBuilder::retainType(DIType T) {
-  AllRetainTypes.push_back(T);
-}
-
-/// createUnspecifiedParameter - Create unspeicified type descriptor
-/// for the subroutine type.
-DIDescriptor DIBuilder::createUnspecifiedParameter() {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
-  };
-  return DIDescriptor(MDNode::get(VMContext, Elts));
-}
-
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType() {
-  // Give the temporary MDNode a tag. It doesn't matter what tag we
-  // use here as long as DIType accepts it.
-  Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
-  return DIType(Node);
-}
-
-/// createTemporaryType - Create a temporary forward-declared type.
-DIType DIBuilder::createTemporaryType(DIFile F) {
-  // Give the temporary MDNode a tag. It doesn't matter what tag we
-  // use here as long as DIType accepts it.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, DW_TAG_base_type),
-    TheCU,
-    NULL,
-    F
-  };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
-  return DIType(Node);
-}
-
-/// createForwardDecl - Create a temporary forward-declared type that
-/// can be RAUW'd if the full type is seen.
-DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name,
-                                    DIDescriptor Scope, DIFile F,
-                                    unsigned Line, unsigned RuntimeLang,
-                                    uint64_t SizeInBits,
-                                    uint64_t AlignInBits) {
-  // Create a temporary MDNode.
-  Value *Elts[] = {
-    GetTagConstant(VMContext, Tag),
-    getNonCompileUnitScope(Scope),
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
-    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
-    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext),
-                     DIDescriptor::FlagFwdDecl),
-    NULL,
-    DIArray(),
-    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
-  };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
-  return DIType(Node);
-}
-
-/// getOrCreateArray - Get a DIArray, create one if required.
-DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
-  if (Elements.empty()) {
-    Value *Null = Constant::getNullValue(Type::getInt32Ty(VMContext));
-    return DIArray(MDNode::get(VMContext, Null));
-  }
-  return DIArray(MDNode::get(VMContext, Elements));
-}
-
-/// getOrCreateSubrange - Create a descriptor for a value range.  This
-/// implicitly uniques the values returned.
-DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
-  };
-
-  return DISubrange(MDNode::get(VMContext, Elts));
-}
-
-/// createGlobalVariable - Create a new descriptor for the specified global.
-DIGlobalVariable DIBuilder::
-createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
-                     DIType Ty, bool isLocalToUnit, Value *Val) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    NULL, // TheCU,
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, Name),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    Ty,
-    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
-    Val
-  };
-  MDNode *Node = MDNode::get(VMContext, Elts);
-  AllGVs.push_back(Node);
-  return DIGlobalVariable(Node);
-}
-
-/// createStaticVariable - Create a new descriptor for the specified static
-/// variable.
-DIGlobalVariable DIBuilder::
-createStaticVariable(DIDescriptor Context, StringRef Name,
-                     StringRef LinkageName, DIFile F, unsigned LineNumber,
-                     DIType Ty, bool isLocalToUnit, Value *Val) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, LinkageName),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
-    Ty,
-    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
-    Val
-  };
-  MDNode *Node = MDNode::get(VMContext, Elts);
-  AllGVs.push_back(Node);
-  return DIGlobalVariable(Node);
-}
-
-/// createVariable - Create a new descriptor for the specified variable.
-DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
-                                          StringRef Name, DIFile File,
-                                          unsigned LineNo, DIType Ty,
-                                          bool AlwaysPreserve, unsigned Flags,
-                                          unsigned ArgNo) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, Tag),
-    getNonCompileUnitScope(Scope),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
-    Ty,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    Constant::getNullValue(Type::getInt32Ty(VMContext))
-  };
-  MDNode *Node = MDNode::get(VMContext, Elts);
-  if (AlwaysPreserve) {
-    // The optimizer may remove local variable. If there is an interest
-    // to preserve variable info in such situation then stash it in a
-    // named mdnode.
-    DISubprogram Fn(getDISubprogram(Scope));
-    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn);
-    FnLocals->addOperand(Node);
-  }
-  return DIVariable(Node);
-}
-
-/// createComplexVariable - Create a new descriptor for the specified variable
-/// which has a complex address expression for its address.
-DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
-                                            StringRef Name, DIFile F,
-                                            unsigned LineNo,
-                                            DIType Ty, ArrayRef<Value *> Addr,
-                                            unsigned ArgNo) {
-  SmallVector<Value *, 15> Elts;
-  Elts.push_back(GetTagConstant(VMContext, Tag));
-  Elts.push_back(getNonCompileUnitScope(Scope)),
-  Elts.push_back(MDString::get(VMContext, Name));
-  Elts.push_back(F);
-  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
-                                  (LineNo | (ArgNo << 24))));
-  Elts.push_back(Ty);
-  Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
-  Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
-  Elts.append(Addr.begin(), Addr.end());
-
-  return DIVariable(MDNode::get(VMContext, Elts));
-}
-
-/// createFunction - Create a new descriptor for the specified function.
-DISubprogram DIBuilder::createFunction(DIDescriptor Context,
-                                       StringRef Name,
-                                       StringRef LinkageName,
-                                       DIFile File, unsigned LineNo,
-                                       DIType Ty,
-                                       bool isLocalToUnit, bool isDefinition,
-                                       unsigned ScopeLine,
-                                       unsigned Flags, bool isOptimized,
-                                       Function *Fn,
-                                       MDNode *TParams,
-                                       MDNode *Decl) {
-  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  MDNode *Temp = MDNode::getTemporary(VMContext, TElts);
-  Value *TVElts[] = { Temp };
-  MDNode *THolder = MDNode::get(VMContext, TVElts);
-
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, LinkageName),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty,
-    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    NULL,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    Fn,
-    TParams,
-    Decl,
-    THolder,
-    ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine)
-  };
-  MDNode *Node = MDNode::get(VMContext, Elts);
-
-  // Create a named metadata so that we do not lose this mdnode.
-  AllSubprograms.push_back(Node);
-  return DISubprogram(Node);
-}
-
-/// createMethod - Create a new descriptor for the specified C++ method.
-DISubprogram DIBuilder::createMethod(DIDescriptor Context,
-                                     StringRef Name,
-                                     StringRef LinkageName,
-                                     DIFile F,
-                                     unsigned LineNo, DIType Ty,
-                                     bool isLocalToUnit,
-                                     bool isDefinition,
-                                     unsigned VK, unsigned VIndex,
-                                     MDNode *VTableHolder,
-                                     unsigned Flags,
-                                     bool isOptimized,
-                                     Function *Fn,
-                                     MDNode *TParam) {
-  Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  MDNode *Temp = MDNode::getTemporary(VMContext, TElts);
-  Value *TVElts[] = { Temp };
-  MDNode *THolder = MDNode::get(VMContext, TVElts);
-
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    getNonCompileUnitScope(Context),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, Name),
-    MDString::get(VMContext, LinkageName),
-    F,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
-    Ty,
-    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
-    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
-    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
-    VTableHolder,
-    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
-    Fn,
-    TParam,
-    Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    THolder,
-    // FIXME: Do we want to use different scope/lines?
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
-  };
-  MDNode *Node = MDNode::get(VMContext, Elts);
-  return DISubprogram(Node);
-}
-
-/// createNameSpace - This creates new descriptor for a namespace
-/// with the specified parent scope.
-DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
-                                       DIFile File, unsigned LineNo) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
-    getNonCompileUnitScope(Scope),
-    MDString::get(VMContext, Name),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
-  };
-  return DINameSpace(MDNode::get(VMContext, Elts));
-}
-
-/// createLexicalBlockFile - This creates a new MDNode that encapsulates
-/// an existing scope with a new filename.
-DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
-                                                     DIFile File) {
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
-    Scope,
-    File
-  };
-  return DILexicalBlockFile(MDNode::get(VMContext, Elts));
-}
-
-DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
-                                             unsigned Line, unsigned Col) {
-  // Defeat MDNode uniqing for lexical blocks by using unique id.
-  static unsigned int unique_id = 0;
-  Value *Elts[] = {
-    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
-    getNonCompileUnitScope(Scope),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
-    ConstantInt::get(Type::getInt32Ty(VMContext), Col),
-    File,
-    ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
-  };
-  return DILexicalBlock(MDNode::get(VMContext, Elts));
-}
-
-/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      Instruction *InsertBefore) {
-  assert(Storage && "no storage passed to dbg.declare");
-  assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
-  if (!DeclareFn)
-    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
-
-  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
-  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
-}
-
-/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      BasicBlock *InsertAtEnd) {
-  assert(Storage && "no storage passed to dbg.declare");
-  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
-  if (!DeclareFn)
-    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
-
-  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
-
-  // If this block already has a terminator then insert this intrinsic
-  // before the terminator.
-  if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, "", T);
-  else
-    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
-}
-
-/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                Instruction *InsertBefore) {
-  assert(V && "no value passed to dbg.value");
-  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
-  if (!ValueFn)
-    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
-
-  Value *Args[] = { MDNode::get(V->getContext(), V),
-                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
-                    VarInfo };
-  return CallInst::Create(ValueFn, Args, "", InsertBefore);
-}
-
-/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
-Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                BasicBlock *InsertAtEnd) {
-  assert(V && "no value passed to dbg.value");
-  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
-  if (!ValueFn)
-    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
-
-  Value *Args[] = { MDNode::get(V->getContext(), V),
-                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
-                    VarInfo };
-  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
-}
diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp
deleted file mode 100644
index 19cf0f5cd3e8..000000000000
--- a/lib/VMCore/DataLayout.cpp
+++ /dev/null
@@ -1,749 +0,0 @@
-//===-- DataLayout.cpp - Data size & alignment routines --------------------==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines layout properties related to datatype size/offset/alignment
-// information.
-//
-// This structure should be created once, filled in if the defaults are not
-// correct and then passed around by const&.  None of the members functions
-// require modification to the object.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DataLayout.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/ADT/DenseMap.h"
-#include <algorithm>
-#include <cstdlib>
-using namespace llvm;
-
-// Handle the Pass registration stuff necessary to use DataLayout's.
-
-// Register the default SparcV9 implementation...
-INITIALIZE_PASS(DataLayout, "datalayout", "Data Layout", false, true)
-char DataLayout::ID = 0;
-
-//===----------------------------------------------------------------------===//
-// Support for StructLayout
-//===----------------------------------------------------------------------===//
-
-StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
-  assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
-  StructAlignment = 0;
-  StructSize = 0;
-  NumElements = ST->getNumElements();
-
-  // Loop over each of the elements, placing them in memory.
-  for (unsigned i = 0, e = NumElements; i != e; ++i) {
-    Type *Ty = ST->getElementType(i);
-    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
-
-    // Add padding if necessary to align the data element properly.
-    if ((StructSize & (TyAlign-1)) != 0)
-      StructSize = DataLayout::RoundUpAlignment(StructSize, TyAlign);
-
-    // Keep track of maximum alignment constraint.
-    StructAlignment = std::max(TyAlign, StructAlignment);
-
-    MemberOffsets[i] = StructSize;
-    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
-  }
-
-  // Empty structures have alignment of 1 byte.
-  if (StructAlignment == 0) StructAlignment = 1;
-
-  // Add padding to the end of the struct so that it could be put in an array
-  // and all array elements would be aligned correctly.
-  if ((StructSize & (StructAlignment-1)) != 0)
-    StructSize = DataLayout::RoundUpAlignment(StructSize, StructAlignment);
-}
-
-
-/// getElementContainingOffset - Given a valid offset into the structure,
-/// return the structure index that contains it.
-unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
-  const uint64_t *SI =
-    std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
-  assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
-  --SI;
-  assert(*SI <= Offset && "upper_bound didn't work");
-  assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
-         (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
-         "Upper bound didn't work!");
-
-  // Multiple fields can have the same offset if any of them are zero sized.
-  // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
-  // at the i32 element, because it is the last element at that offset.  This is
-  // the right one to return, because anything after it will have a higher
-  // offset, implying that this element is non-empty.
-  return SI-&MemberOffsets[0];
-}
-
-//===----------------------------------------------------------------------===//
-// LayoutAlignElem, LayoutAlign support
-//===----------------------------------------------------------------------===//
-
-LayoutAlignElem
-LayoutAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
-                     unsigned pref_align, uint32_t bit_width) {
-  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
-  LayoutAlignElem retval;
-  retval.AlignType = align_type;
-  retval.ABIAlign = abi_align;
-  retval.PrefAlign = pref_align;
-  retval.TypeBitWidth = bit_width;
-  return retval;
-}
-
-bool
-LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
-  return (AlignType == rhs.AlignType
-          && ABIAlign == rhs.ABIAlign
-          && PrefAlign == rhs.PrefAlign
-          && TypeBitWidth == rhs.TypeBitWidth);
-}
-
-const LayoutAlignElem
-DataLayout::InvalidAlignmentElem =
-            LayoutAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
-
-//===----------------------------------------------------------------------===//
-// PointerAlignElem, PointerAlign support
-//===----------------------------------------------------------------------===//
-
-PointerAlignElem
-PointerAlignElem::get(uint32_t addr_space, unsigned abi_align,
-                     unsigned pref_align, uint32_t bit_width) {
-  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
-  PointerAlignElem retval;
-  retval.AddressSpace = addr_space;
-  retval.ABIAlign = abi_align;
-  retval.PrefAlign = pref_align;
-  retval.TypeBitWidth = bit_width;
-  return retval;
-}
-
-bool
-PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
-  return (ABIAlign == rhs.ABIAlign
-          && AddressSpace == rhs.AddressSpace
-          && PrefAlign == rhs.PrefAlign
-          && TypeBitWidth == rhs.TypeBitWidth);
-}
-
-const PointerAlignElem
-DataLayout::InvalidPointerElem = PointerAlignElem::get(~0U, 0U, 0U, 0U);
-
-//===----------------------------------------------------------------------===//
-//                       DataLayout Class Implementation
-//===----------------------------------------------------------------------===//
-
-/// getInt - Get an integer ignoring errors.
-static int getInt(StringRef R) {
-  int Result = 0;
-  R.getAsInteger(10, Result);
-  return Result;
-}
-
-void DataLayout::init() {
-  initializeDataLayoutPass(*PassRegistry::getPassRegistry());
-
-  LayoutMap = 0;
-  LittleEndian = false;
-  StackNaturalAlign = 0;
-
-  // Default alignments
-  setAlignment(INTEGER_ALIGN,   1,  1, 1);   // i1
-  setAlignment(INTEGER_ALIGN,   1,  1, 8);   // i8
-  setAlignment(INTEGER_ALIGN,   2,  2, 16);  // i16
-  setAlignment(INTEGER_ALIGN,   4,  4, 32);  // i32
-  setAlignment(INTEGER_ALIGN,   4,  8, 64);  // i64
-  setAlignment(FLOAT_ALIGN,     2,  2, 16);  // half
-  setAlignment(FLOAT_ALIGN,     4,  4, 32);  // float
-  setAlignment(FLOAT_ALIGN,     8,  8, 64);  // double
-  setAlignment(FLOAT_ALIGN,    16, 16, 128); // ppcf128, quad, ...
-  setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32, v1i64, ...
-  setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
-  setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
-  setPointerAlignment(0, 8, 8, 8);
-}
-
-std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
-
-  if (td)
-    td->init();
-
-  while (!Desc.empty()) {
-    std::pair<StringRef, StringRef> Split = Desc.split('-');
-    StringRef Token = Split.first;
-    Desc = Split.second;
-
-    if (Token.empty())
-      continue;
-
-    Split = Token.split(':');
-    StringRef Specifier = Split.first;
-    Token = Split.second;
-
-    assert(!Specifier.empty() && "Can't be empty here");
-
-    switch (Specifier[0]) {
-    case 'E':
-      if (td)
-        td->LittleEndian = false;
-      break;
-    case 'e':
-      if (td)
-        td->LittleEndian = true;
-      break;
-    case 'p': {
-      int AddrSpace = 0;
-      if (Specifier.size() > 1) {
-        AddrSpace = getInt(Specifier.substr(1));
-        if (AddrSpace < 0 || AddrSpace > (1 << 24))
-          return "Invalid address space, must be a positive 24bit integer";
-      }
-      Split = Token.split(':');
-      int PointerMemSizeBits = getInt(Split.first);
-      if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0)
-        return "invalid pointer size, must be a positive 8-bit multiple";
-
-      // Pointer ABI alignment.
-      Split = Split.second.split(':');
-      int PointerABIAlignBits = getInt(Split.first);
-      if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) {
-        return "invalid pointer ABI alignment, "
-               "must be a positive 8-bit multiple";
-      }
-
-      // Pointer preferred alignment.
-      Split = Split.second.split(':');
-      int PointerPrefAlignBits = getInt(Split.first);
-      if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) {
-        return "invalid pointer preferred alignment, "
-               "must be a positive 8-bit multiple";
-      }
-
-      if (PointerPrefAlignBits == 0)
-        PointerPrefAlignBits = PointerABIAlignBits;
-      if (td)
-        td->setPointerAlignment(AddrSpace, PointerABIAlignBits/8,
-            PointerPrefAlignBits/8, PointerMemSizeBits/8);
-      break;
-    }
-    case 'i':
-    case 'v':
-    case 'f':
-    case 'a':
-    case 's': {
-      AlignTypeEnum AlignType;
-      char field = Specifier[0];
-      switch (field) {
-      default:
-      case 'i': AlignType = INTEGER_ALIGN; break;
-      case 'v': AlignType = VECTOR_ALIGN; break;
-      case 'f': AlignType = FLOAT_ALIGN; break;
-      case 'a': AlignType = AGGREGATE_ALIGN; break;
-      case 's': AlignType = STACK_ALIGN; break;
-      }
-      int Size = getInt(Specifier.substr(1));
-      if (Size < 0) {
-        return std::string("invalid ") + field + "-size field, "
-               "must be positive";
-      }
-
-      Split = Token.split(':');
-      int ABIAlignBits = getInt(Split.first);
-      if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) {
-        return std::string("invalid ") + field +"-abi-alignment field, "
-               "must be a positive 8-bit multiple";
-      }
-      unsigned ABIAlign = ABIAlignBits / 8;
-
-      Split = Split.second.split(':');
-
-      int PrefAlignBits = getInt(Split.first);
-      if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) {
-        return std::string("invalid ") + field +"-preferred-alignment field, "
-               "must be a positive 8-bit multiple";
-      }
-      unsigned PrefAlign = PrefAlignBits / 8;
-      if (PrefAlign == 0)
-        PrefAlign = ABIAlign;
-
-      if (td)
-        td->setAlignment(AlignType, ABIAlign, PrefAlign, Size);
-      break;
-    }
-    case 'n':  // Native integer types.
-      Specifier = Specifier.substr(1);
-      do {
-        int Width = getInt(Specifier);
-        if (Width <= 0) {
-          return std::string("invalid native integer size \'") +
-            Specifier.str() + "\', must be a positive integer.";
-        }
-        if (td && Width != 0)
-          td->LegalIntWidths.push_back(Width);
-        Split = Token.split(':');
-        Specifier = Split.first;
-        Token = Split.second;
-      } while (!Specifier.empty() || !Token.empty());
-      break;
-    case 'S': { // Stack natural alignment.
-      int StackNaturalAlignBits = getInt(Specifier.substr(1));
-      if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) {
-        return "invalid natural stack alignment (S-field), "
-               "must be a positive 8-bit multiple";
-      }
-      if (td)
-        td->StackNaturalAlign = StackNaturalAlignBits / 8;
-      break;
-    }
-    default:
-      break;
-    }
-  }
-
-  return "";
-}
-
-/// Default ctor.
-///
-/// @note This has to exist, because this is a pass, but it should never be
-/// used.
-DataLayout::DataLayout() : ImmutablePass(ID) {
-  report_fatal_error("Bad DataLayout ctor used.  "
-                    "Tool did not specify a DataLayout to use?");
-}
-
-DataLayout::DataLayout(const Module *M)
-  : ImmutablePass(ID) {
-  std::string errMsg = parseSpecifier(M->getDataLayout(), this);
-  assert(errMsg == "" && "Module M has malformed data layout string.");
-  (void)errMsg;
-}
-
-void
-DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
-                         unsigned pref_align, uint32_t bit_width) {
-  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
-  assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield");
-  assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield");
-  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
-    if (Alignments[i].AlignType == (unsigned)align_type &&
-        Alignments[i].TypeBitWidth == bit_width) {
-      // Update the abi, preferred alignments.
-      Alignments[i].ABIAlign = abi_align;
-      Alignments[i].PrefAlign = pref_align;
-      return;
-    }
-  }
-
-  Alignments.push_back(LayoutAlignElem::get(align_type, abi_align,
-                                            pref_align, bit_width));
-}
-
-void
-DataLayout::setPointerAlignment(uint32_t addr_space, unsigned abi_align,
-                         unsigned pref_align, uint32_t bit_width) {
-  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
-  DenseMap<unsigned,PointerAlignElem>::iterator val = Pointers.find(addr_space);
-  if (val == Pointers.end()) {
-    Pointers[addr_space] = PointerAlignElem::get(addr_space,
-          abi_align, pref_align, bit_width);
-  } else {
-    val->second.ABIAlign = abi_align;
-    val->second.PrefAlign = pref_align;
-    val->second.TypeBitWidth = bit_width;
-  }
-}
-
-/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
-/// preferred if ABIInfo = false) the layout wants for the specified datatype.
-unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
-                                      uint32_t BitWidth, bool ABIInfo,
-                                      Type *Ty) const {
-  // Check to see if we have an exact match and remember the best match we see.
-  int BestMatchIdx = -1;
-  int LargestInt = -1;
-  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
-    if (Alignments[i].AlignType == (unsigned)AlignType &&
-        Alignments[i].TypeBitWidth == BitWidth)
-      return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
-
-    // The best match so far depends on what we're looking for.
-     if (AlignType == INTEGER_ALIGN &&
-         Alignments[i].AlignType == INTEGER_ALIGN) {
-      // The "best match" for integers is the smallest size that is larger than
-      // the BitWidth requested.
-      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
-           Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
-        BestMatchIdx = i;
-      // However, if there isn't one that's larger, then we must use the
-      // largest one we have (see below)
-      if (LargestInt == -1 ||
-          Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
-        LargestInt = i;
-    }
-  }
-
-  // Okay, we didn't find an exact solution.  Fall back here depending on what
-  // is being looked for.
-  if (BestMatchIdx == -1) {
-    // If we didn't find an integer alignment, fall back on most conservative.
-    if (AlignType == INTEGER_ALIGN) {
-      BestMatchIdx = LargestInt;
-    } else {
-      assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
-
-      // By default, use natural alignment for vector types. This is consistent
-      // with what clang and llvm-gcc do.
-      unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
-      Align *= cast<VectorType>(Ty)->getNumElements();
-      // If the alignment is not a power of 2, round up to the next power of 2.
-      // This happens for non-power-of-2 length vectors.
-      if (Align & (Align-1))
-        Align = NextPowerOf2(Align);
-      return Align;
-    }
-  }
-
-  // Since we got a "best match" index, just return it.
-  return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
-                 : Alignments[BestMatchIdx].PrefAlign;
-}
-
-namespace {
-
-class StructLayoutMap {
-  typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy;
-  LayoutInfoTy LayoutInfo;
-
-public:
-  virtual ~StructLayoutMap() {
-    // Remove any layouts.
-    for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
-         I != E; ++I) {
-      StructLayout *Value = I->second;
-      Value->~StructLayout();
-      free(Value);
-    }
-  }
-
-  StructLayout *&operator[](StructType *STy) {
-    return LayoutInfo[STy];
-  }
-
-  // for debugging...
-  virtual void dump() const {}
-};
-
-} // end anonymous namespace
-
-DataLayout::~DataLayout() {
-  delete static_cast<StructLayoutMap*>(LayoutMap);
-}
-
-const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
-  if (!LayoutMap)
-    LayoutMap = new StructLayoutMap();
-
-  StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
-  StructLayout *&SL = (*STM)[Ty];
-  if (SL) return SL;
-
-  // Otherwise, create the struct layout.  Because it is variable length, we
-  // malloc it, then use placement new.
-  int NumElts = Ty->getNumElements();
-  StructLayout *L =
-    (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
-
-  // Set SL before calling StructLayout's ctor.  The ctor could cause other
-  // entries to be added to TheMap, invalidating our reference.
-  SL = L;
-
-  new (L) StructLayout(Ty, *this);
-
-  return L;
-}
-
-std::string DataLayout::getStringRepresentation() const {
-  std::string Result;
-  raw_string_ostream OS(Result);
-
-  OS << (LittleEndian ? "e" : "E");
-  SmallVector<unsigned, 8> addrSpaces;
-  // Lets get all of the known address spaces and sort them
-  // into increasing order so that we can emit the string
-  // in a cleaner format.
-  for (DenseMap<unsigned, PointerAlignElem>::const_iterator
-      pib = Pointers.begin(), pie = Pointers.end();
-      pib != pie; ++pib) {
-    addrSpaces.push_back(pib->first);
-  }
-  std::sort(addrSpaces.begin(), addrSpaces.end());
-  for (SmallVector<unsigned, 8>::iterator asb = addrSpaces.begin(),
-      ase = addrSpaces.end(); asb != ase; ++asb) {
-    const PointerAlignElem &PI = Pointers.find(*asb)->second;
-    OS << "-p";
-    if (PI.AddressSpace) {
-      OS << PI.AddressSpace;
-    }
-     OS << ":" << PI.TypeBitWidth*8 << ':' << PI.ABIAlign*8
-        << ':' << PI.PrefAlign*8;
-  }
-  OS << "-S" << StackNaturalAlign*8;
-
-  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
-    const LayoutAlignElem &AI = Alignments[i];
-    OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
-       << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
-  }
-
-  if (!LegalIntWidths.empty()) {
-    OS << "-n" << (unsigned)LegalIntWidths[0];
-
-    for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
-      OS << ':' << (unsigned)LegalIntWidths[i];
-  }
-  return OS.str();
-}
-
-
-uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
-  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
-  switch (Ty->getTypeID()) {
-  case Type::LabelTyID:
-    return getPointerSizeInBits(0);
-  case Type::PointerTyID: {
-    unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
-    return getPointerSizeInBits(AS);
-    }
-  case Type::ArrayTyID: {
-    ArrayType *ATy = cast<ArrayType>(Ty);
-    return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
-  }
-  case Type::StructTyID:
-    // Get the layout annotation... which is lazily created on demand.
-    return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
-  case Type::IntegerTyID:
-    return cast<IntegerType>(Ty)->getBitWidth();
-  case Type::VoidTyID:
-    return 8;
-  case Type::HalfTyID:
-    return 16;
-  case Type::FloatTyID:
-    return 32;
-  case Type::DoubleTyID:
-  case Type::X86_MMXTyID:
-    return 64;
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:
-    return 128;
-  // In memory objects this is always aligned to a higher boundary, but
-  // only 80 bits contain information.
-  case Type::X86_FP80TyID:
-    return 80;
-  case Type::VectorTyID: {
-    VectorType *VTy = cast<VectorType>(Ty);
-    return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType());
-  }
-  default:
-    llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
-  }
-}
-
-/*!
-  \param abi_or_pref Flag that determines which alignment is returned. true
-  returns the ABI alignment, false returns the preferred alignment.
-  \param Ty The underlying type for which alignment is determined.
-
-  Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
-  == false) for the requested type \a Ty.
- */
-unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
-  int AlignType = -1;
-
-  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
-  switch (Ty->getTypeID()) {
-  // Early escape for the non-numeric types.
-  case Type::LabelTyID:
-    return (abi_or_pref
-            ? getPointerABIAlignment(0)
-            : getPointerPrefAlignment(0));
-  case Type::PointerTyID: {
-    unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace();
-    return (abi_or_pref
-            ? getPointerABIAlignment(AS)
-            : getPointerPrefAlignment(AS));
-    }
-  case Type::ArrayTyID:
-    return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
-
-  case Type::StructTyID: {
-    // Packed structure types always have an ABI alignment of one.
-    if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
-      return 1;
-
-    // Get the layout annotation... which is lazily created on demand.
-    const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
-    unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
-    return std::max(Align, Layout->getAlignment());
-  }
-  case Type::IntegerTyID:
-  case Type::VoidTyID:
-    AlignType = INTEGER_ALIGN;
-    break;
-  case Type::HalfTyID:
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-  // PPC_FP128TyID and FP128TyID have different data contents, but the
-  // same size and alignment, so they look the same here.
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:
-  case Type::X86_FP80TyID:
-    AlignType = FLOAT_ALIGN;
-    break;
-  case Type::X86_MMXTyID:
-  case Type::VectorTyID:
-    AlignType = VECTOR_ALIGN;
-    break;
-  default:
-    llvm_unreachable("Bad type for getAlignment!!!");
-  }
-
-  return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
-                          abi_or_pref, Ty);
-}
-
-unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
-  return getAlignment(Ty, true);
-}
-
-/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
-/// an integer type of the specified bitwidth.
-unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
-  return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
-}
-
-
-unsigned DataLayout::getCallFrameTypeAlignment(Type *Ty) const {
-  for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
-    if (Alignments[i].AlignType == STACK_ALIGN)
-      return Alignments[i].ABIAlign;
-
-  return getABITypeAlignment(Ty);
-}
-
-unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
-  return getAlignment(Ty, false);
-}
-
-unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const {
-  unsigned Align = getPrefTypeAlignment(Ty);
-  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
-  return Log2_32(Align);
-}
-
-/// getIntPtrType - Return an integer type with size at least as big as that
-/// of a pointer in the given address space.
-IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
-                                       unsigned AddressSpace) const {
-  return IntegerType::get(C, getPointerSizeInBits(AddressSpace));
-}
-
-/// getIntPtrType - Return an integer (vector of integer) type with size at
-/// least as big as that of a pointer of the given pointer (vector of pointer)
-/// type.
-Type *DataLayout::getIntPtrType(Type *Ty) const {
-  assert(Ty->isPtrOrPtrVectorTy() &&
-         "Expected a pointer or pointer vector type.");
-  unsigned NumBits = getTypeSizeInBits(Ty->getScalarType());
-  IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
-  if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
-    return VectorType::get(IntTy, VecTy->getNumElements());
-  return IntTy;
-}
-
-uint64_t DataLayout::getIndexedOffset(Type *ptrTy,
-                                      ArrayRef<Value *> Indices) const {
-  Type *Ty = ptrTy;
-  assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
-  uint64_t Result = 0;
-
-  generic_gep_type_iterator<Value* const*>
-    TI = gep_type_begin(ptrTy, Indices);
-  for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX;
-       ++CurIDX, ++TI) {
-    if (StructType *STy = dyn_cast<StructType>(*TI)) {
-      assert(Indices[CurIDX]->getType() ==
-             Type::getInt32Ty(ptrTy->getContext()) &&
-             "Illegal struct idx");
-      unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
-
-      // Get structure layout information...
-      const StructLayout *Layout = getStructLayout(STy);
-
-      // Add in the offset, as calculated by the structure layout info...
-      Result += Layout->getElementOffset(FieldNo);
-
-      // Update Ty to refer to current element
-      Ty = STy->getElementType(FieldNo);
-    } else {
-      // Update Ty to refer to current element
-      Ty = cast<SequentialType>(Ty)->getElementType();
-
-      // Get the array index and the size of each array element.
-      if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
-        Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
-    }
-  }
-
-  return Result;
-}
-
-/// getPreferredAlignment - Return the preferred alignment of the specified
-/// global.  This includes an explicitly requested alignment (if the global
-/// has one).
-unsigned DataLayout::getPreferredAlignment(const GlobalVariable *GV) const {
-  Type *ElemType = GV->getType()->getElementType();
-  unsigned Alignment = getPrefTypeAlignment(ElemType);
-  unsigned GVAlignment = GV->getAlignment();
-  if (GVAlignment >= Alignment) {
-    Alignment = GVAlignment;
-  } else if (GVAlignment != 0) {
-    Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
-  }
-
-  if (GV->hasInitializer() && GVAlignment == 0) {
-    if (Alignment < 16) {
-      // If the global is not external, see if it is large.  If so, give it a
-      // larger alignment.
-      if (getTypeSizeInBits(ElemType) > 128)
-        Alignment = 16;    // 16-byte alignment.
-    }
-  }
-  return Alignment;
-}
-
-/// getPreferredAlignmentLog - Return the preferred alignment of the
-/// specified global, returned in log form.  This includes an explicitly
-/// requested alignment (if the global has one).
-unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const {
-  return Log2_32(getPreferredAlignment(GV));
-}
diff --git a/lib/VMCore/DebugInfo.cpp b/lib/VMCore/DebugInfo.cpp
deleted file mode 100644
index 3029ce273434..000000000000
--- a/lib/VMCore/DebugInfo.cpp
+++ /dev/null
@@ -1,1178 +0,0 @@
-//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the helper classes used to build and interpret debug
-// information in LLVM IR form.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-using namespace llvm::dwarf;
-
-//===----------------------------------------------------------------------===//
-// DIDescriptor
-//===----------------------------------------------------------------------===//
-
-DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
-}
-
-DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
-}
-
-DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) {
-}
-
-DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
-}
-
-DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
-}
-
-DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
-}
-
-StringRef
-DIDescriptor::getStringField(unsigned Elt) const {
-  if (DbgNode == 0)
-    return StringRef();
-
-  if (Elt < DbgNode->getNumOperands())
-    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
-      return MDS->getString();
-
-  return StringRef();
-}
-
-uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (DbgNode == 0)
-    return 0;
-
-  if (Elt < DbgNode->getNumOperands())
-    if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  if (DbgNode == 0)
-    return DIDescriptor();
-
-  if (Elt < DbgNode->getNumOperands())
-    return
-      DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
-  return DIDescriptor();
-}
-
-GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  if (DbgNode == 0)
-    return 0;
-
-  if (Elt < DbgNode->getNumOperands())
-      return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
-  return 0;
-}
-
-Constant *DIDescriptor::getConstantField(unsigned Elt) const {
-  if (DbgNode == 0)
-    return 0;
-
-  if (Elt < DbgNode->getNumOperands())
-      return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
-  return 0;
-}
-
-Function *DIDescriptor::getFunctionField(unsigned Elt) const {
-  if (DbgNode == 0)
-    return 0;
-
-  if (Elt < DbgNode->getNumOperands())
-      return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
-  return 0;
-}
-
-void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
-  if (DbgNode == 0)
-    return;
-
-  if (Elt < DbgNode->getNumOperands()) {
-    MDNode *Node = const_cast<MDNode*>(DbgNode);
-    Node->replaceOperandWith(Elt, F);
-  }
-}
-
-unsigned DIVariable::getNumAddrElements() const {
-  if (getVersion() <= LLVMDebugVersion8)
-    return DbgNode->getNumOperands()-6;
-  if (getVersion() == LLVMDebugVersion9)
-    return DbgNode->getNumOperands()-7;
-  return DbgNode->getNumOperands()-8;
-}
-
-/// getInlinedAt - If this variable is inlined then return inline location.
-MDNode *DIVariable::getInlinedAt() const {
-  if (getVersion() <= LLVMDebugVersion9)
-    return NULL;
-  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7));
-}
-
-//===----------------------------------------------------------------------===//
-// Predicates
-//===----------------------------------------------------------------------===//
-
-/// isBasicType - Return true if the specified tag is legal for
-/// DIBasicType.
-bool DIDescriptor::isBasicType() const {
-  if (!DbgNode) return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_base_type:
-  case dwarf::DW_TAG_unspecified_type:
-    return true;
-  default:
-    return false;
-  }
-}
-
-/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
-bool DIDescriptor::isDerivedType() const {
-  if (!DbgNode) return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_typedef:
-  case dwarf::DW_TAG_pointer_type:
-  case dwarf::DW_TAG_reference_type:
-  case dwarf::DW_TAG_rvalue_reference_type:
-  case dwarf::DW_TAG_const_type:
-  case dwarf::DW_TAG_volatile_type:
-  case dwarf::DW_TAG_restrict_type:
-  case dwarf::DW_TAG_member:
-  case dwarf::DW_TAG_inheritance:
-  case dwarf::DW_TAG_friend:
-    return true;
-  default:
-    // CompositeTypes are currently modelled as DerivedTypes.
-    return isCompositeType();
-  }
-}
-
-/// isCompositeType - Return true if the specified tag is legal for
-/// DICompositeType.
-bool DIDescriptor::isCompositeType() const {
-  if (!DbgNode) return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_array_type:
-  case dwarf::DW_TAG_structure_type:
-  case dwarf::DW_TAG_union_type:
-  case dwarf::DW_TAG_enumeration_type:
-  case dwarf::DW_TAG_vector_type:
-  case dwarf::DW_TAG_subroutine_type:
-  case dwarf::DW_TAG_class_type:
-    return true;
-  default:
-    return false;
-  }
-}
-
-/// isVariable - Return true if the specified tag is legal for DIVariable.
-bool DIDescriptor::isVariable() const {
-  if (!DbgNode) return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_auto_variable:
-  case dwarf::DW_TAG_arg_variable:
-  case dwarf::DW_TAG_return_variable:
-    return true;
-  default:
-    return false;
-  }
-}
-
-/// isType - Return true if the specified tag is legal for DIType.
-bool DIDescriptor::isType() const {
-  return isBasicType() || isCompositeType() || isDerivedType();
-}
-
-/// isSubprogram - Return true if the specified tag is legal for
-/// DISubprogram.
-bool DIDescriptor::isSubprogram() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
-}
-
-/// isGlobalVariable - Return true if the specified tag is legal for
-/// DIGlobalVariable.
-bool DIDescriptor::isGlobalVariable() const {
-  return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
-                     getTag() == dwarf::DW_TAG_constant);
-}
-
-/// isGlobal - Return true if the specified tag is legal for DIGlobal.
-bool DIDescriptor::isGlobal() const {
-  return isGlobalVariable();
-}
-
-/// isUnspecifiedParmeter - Return true if the specified tag is
-/// DW_TAG_unspecified_parameters.
-bool DIDescriptor::isUnspecifiedParameter() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
-}
-
-/// isScope - Return true if the specified tag is one of the scope
-/// related tag.
-bool DIDescriptor::isScope() const {
-  if (!DbgNode) return false;
-  switch (getTag()) {
-  case dwarf::DW_TAG_compile_unit:
-  case dwarf::DW_TAG_lexical_block:
-  case dwarf::DW_TAG_subprogram:
-  case dwarf::DW_TAG_namespace:
-    return true;
-  default:
-    break;
-  }
-  return false;
-}
-
-/// isTemplateTypeParameter - Return true if the specified tag is
-/// DW_TAG_template_type_parameter.
-bool DIDescriptor::isTemplateTypeParameter() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
-}
-
-/// isTemplateValueParameter - Return true if the specified tag is
-/// DW_TAG_template_value_parameter.
-bool DIDescriptor::isTemplateValueParameter() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
-}
-
-/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
-bool DIDescriptor::isCompileUnit() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
-}
-
-/// isFile - Return true if the specified tag is DW_TAG_file_type.
-bool DIDescriptor::isFile() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_file_type;
-}
-
-/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
-bool DIDescriptor::isNameSpace() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_namespace;
-}
-
-/// isLexicalBlockFile - Return true if the specified descriptor is a
-/// lexical block with an extra file.
-bool DIDescriptor::isLexicalBlockFile() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
-    (DbgNode->getNumOperands() == 3);
-}
-
-/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
-bool DIDescriptor::isLexicalBlock() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
-    (DbgNode->getNumOperands() > 3);
-}
-
-/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
-bool DIDescriptor::isSubrange() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
-}
-
-/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
-bool DIDescriptor::isEnumerator() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
-}
-
-/// isObjCProperty - Return true if the specified tag is DW_TAG
-bool DIDescriptor::isObjCProperty() const {
-  return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
-}
-//===----------------------------------------------------------------------===//
-// Simple Descriptor Constructors and other Methods
-//===----------------------------------------------------------------------===//
-
-DIType::DIType(const MDNode *N) : DIScope(N) {
-  if (!N) return;
-  if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
-    DbgNode = 0;
-  }
-}
-
-unsigned DIArray::getNumElements() const {
-  if (!DbgNode)
-    return 0;
-  return DbgNode->getNumOperands();
-}
-
-/// replaceAllUsesWith - Replace all uses of debug info referenced by
-/// this descriptor.
-void DIType::replaceAllUsesWith(DIDescriptor &D) {
-  if (!DbgNode)
-    return;
-
-  // Since we use a TrackingVH for the node, its easy for clients to manufacture
-  // legitimate situations where they want to replaceAllUsesWith() on something
-  // which, due to uniquing, has merged with the source. We shield clients from
-  // this detail by allowing a value to be replaced with replaceAllUsesWith()
-  // itself.
-  if (DbgNode != D) {
-    MDNode *Node = const_cast<MDNode*>(DbgNode);
-    const MDNode *DN = D;
-    const Value *V = cast_or_null<Value>(DN);
-    Node->replaceAllUsesWith(const_cast<Value*>(V));
-    MDNode::deleteTemporary(Node);
-  }
-}
-
-/// replaceAllUsesWith - Replace all uses of debug info referenced by
-/// this descriptor.
-void DIType::replaceAllUsesWith(MDNode *D) {
-  if (!DbgNode)
-    return;
-
-  // Since we use a TrackingVH for the node, its easy for clients to manufacture
-  // legitimate situations where they want to replaceAllUsesWith() on something
-  // which, due to uniquing, has merged with the source. We shield clients from
-  // this detail by allowing a value to be replaced with replaceAllUsesWith()
-  // itself.
-  if (DbgNode != D) {
-    MDNode *Node = const_cast<MDNode*>(DbgNode);
-    const MDNode *DN = D;
-    const Value *V = cast_or_null<Value>(DN);
-    Node->replaceAllUsesWith(const_cast<Value*>(V));
-    MDNode::deleteTemporary(Node);
-  }
-}
-
-/// isUnsignedDIType - Return true if type encoding is unsigned.
-bool DIType::isUnsignedDIType() {
-  DIDerivedType DTy(DbgNode);
-  if (DTy.Verify())
-    return DTy.getTypeDerivedFrom().isUnsignedDIType();
-
-  DIBasicType BTy(DbgNode);
-  if (BTy.Verify()) {
-    unsigned Encoding = BTy.getEncoding();
-    if (Encoding == dwarf::DW_ATE_unsigned ||
-        Encoding == dwarf::DW_ATE_unsigned_char)
-      return true;
-  }
-  return false;
-}
-
-/// Verify - Verify that a compile unit is well formed.
-bool DICompileUnit::Verify() const {
-  if (!DbgNode)
-    return false;
-  StringRef N = getFilename();
-  if (N.empty())
-    return false;
-  // It is possible that directory and produce string is empty.
-  return true;
-}
-
-/// Verify - Verify that an ObjC property is well formed.
-bool DIObjCProperty::Verify() const {
-  if (!DbgNode)
-    return false;
-  unsigned Tag = getTag();
-  if (Tag != dwarf::DW_TAG_APPLE_property) return false;
-  DIType Ty = getType();
-  if (!Ty.Verify()) return false;
-
-  // Don't worry about the rest of the strings for now.
-  return true;
-}
-
-/// Verify - Verify that a type descriptor is well formed.
-bool DIType::Verify() const {
-  if (!DbgNode)
-    return false;
-  if (getContext() && !getContext().Verify())
-    return false;
-  unsigned Tag = getTag();
-  if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
-      Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
-      Tag != dwarf::DW_TAG_reference_type &&
-      Tag != dwarf::DW_TAG_rvalue_reference_type &&
-      Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_vector_type &&
-      Tag != dwarf::DW_TAG_array_type &&
-      Tag != dwarf::DW_TAG_enumeration_type &&
-      Tag != dwarf::DW_TAG_subroutine_type &&
-      getFilename().empty())
-    return false;
-  return true;
-}
-
-/// Verify - Verify that a basic type descriptor is well formed.
-bool DIBasicType::Verify() const {
-  return isBasicType();
-}
-
-/// Verify - Verify that a derived type descriptor is well formed.
-bool DIDerivedType::Verify() const {
-  return isDerivedType();
-}
-
-/// Verify - Verify that a composite type descriptor is well formed.
-bool DICompositeType::Verify() const {
-  if (!DbgNode)
-    return false;
-  if (getContext() && !getContext().Verify())
-    return false;
-
-  return true;
-}
-
-/// Verify - Verify that a subprogram descriptor is well formed.
-bool DISubprogram::Verify() const {
-  if (!DbgNode)
-    return false;
-
-  if (getContext() && !getContext().Verify())
-    return false;
-
-  DICompositeType Ty = getType();
-  if (!Ty.Verify())
-    return false;
-  return true;
-}
-
-/// Verify - Verify that a global variable descriptor is well formed.
-bool DIGlobalVariable::Verify() const {
-  if (!DbgNode)
-    return false;
-
-  if (getDisplayName().empty())
-    return false;
-
-  if (getContext() && !getContext().Verify())
-    return false;
-
-  DIType Ty = getType();
-  if (!Ty.Verify())
-    return false;
-
-  if (!getGlobal() && !getConstant())
-    return false;
-
-  return true;
-}
-
-/// Verify - Verify that a variable descriptor is well formed.
-bool DIVariable::Verify() const {
-  if (!DbgNode)
-    return false;
-
-  if (getContext() && !getContext().Verify())
-    return false;
-
-  DIType Ty = getType();
-  if (!Ty.Verify())
-    return false;
-
-  return true;
-}
-
-/// Verify - Verify that a location descriptor is well formed.
-bool DILocation::Verify() const {
-  if (!DbgNode)
-    return false;
-
-  return DbgNode->getNumOperands() == 4;
-}
-
-/// Verify - Verify that a namespace descriptor is well formed.
-bool DINameSpace::Verify() const {
-  if (!DbgNode)
-    return false;
-  if (getName().empty())
-    return false;
-  return true;
-}
-
-/// getOriginalTypeSize - If this type is derived from a base type then
-/// return base type size.
-uint64_t DIDerivedType::getOriginalTypeSize() const {
-  unsigned Tag = getTag();
-
-  if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
-      Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
-      Tag != dwarf::DW_TAG_restrict_type)
-    return getSizeInBits();
-
-  DIType BaseType = getTypeDerivedFrom();
-
-  // If this type is not derived from any type then take conservative approach.
-  if (!BaseType.isValid())
-    return getSizeInBits();
-
-  // If this is a derived type, go ahead and get the base type, unless it's a
-  // reference then it's just the size of the field. Pointer types have no need
-  // of this since they're a different type of qualification on the type.
-  if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
-      BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
-    return getSizeInBits();
-
-  if (BaseType.isDerivedType())
-    return DIDerivedType(BaseType).getOriginalTypeSize();
-
-  return BaseType.getSizeInBits();
-}
-
-/// getObjCProperty - Return property node, if this ivar is associated with one.
-MDNode *DIDerivedType::getObjCProperty() const {
-  if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10)
-    return NULL;
-  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
-}
-
-/// isInlinedFnArgument - Return true if this variable provides debugging
-/// information for an inlined function arguments.
-bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
-  assert(CurFn && "Invalid function");
-  if (!getContext().isSubprogram())
-    return false;
-  // This variable is not inlined function argument if its scope
-  // does not describe current function.
-  return !DISubprogram(getContext()).describes(CurFn);
-}
-
-/// describes - Return true if this subprogram provides debugging
-/// information for the function F.
-bool DISubprogram::describes(const Function *F) {
-  assert(F && "Invalid function");
-  if (F == getFunction())
-    return true;
-  StringRef Name = getLinkageName();
-  if (Name.empty())
-    Name = getName();
-  if (F->getName() == Name)
-    return true;
-  return false;
-}
-
-unsigned DISubprogram::isOptimized() const {
-  assert (DbgNode && "Invalid subprogram descriptor!");
-  if (DbgNode->getNumOperands() == 16)
-    return getUnsignedField(15);
-  return 0;
-}
-
-MDNode *DISubprogram::getVariablesNodes() const {
-  if (!DbgNode || DbgNode->getNumOperands() <= 19)
-    return NULL;
-  if (MDNode *Temp = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
-    return dyn_cast_or_null<MDNode>(Temp->getOperand(0));
-  return NULL;
-}
-
-DIArray DISubprogram::getVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() <= 19)
-    return DIArray();
-  if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)))
-    if (MDNode *A = dyn_cast_or_null<MDNode>(T->getOperand(0)))
-      return DIArray(A);
-  return DIArray();
-}
-
-StringRef DIScope::getFilename() const {
-  if (!DbgNode)
-    return StringRef();
-  if (isLexicalBlockFile())
-    return DILexicalBlockFile(DbgNode).getFilename();
-  if (isLexicalBlock())
-    return DILexicalBlock(DbgNode).getFilename();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getFilename();
-  if (isCompileUnit())
-    return DICompileUnit(DbgNode).getFilename();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getFilename();
-  if (isType())
-    return DIType(DbgNode).getFilename();
-  if (isFile())
-    return DIFile(DbgNode).getFilename();
-  llvm_unreachable("Invalid DIScope!");
-}
-
-StringRef DIScope::getDirectory() const {
-  if (!DbgNode)
-    return StringRef();
-  if (isLexicalBlockFile())
-    return DILexicalBlockFile(DbgNode).getDirectory();
-  if (isLexicalBlock())
-    return DILexicalBlock(DbgNode).getDirectory();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getDirectory();
-  if (isCompileUnit())
-    return DICompileUnit(DbgNode).getDirectory();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getDirectory();
-  if (isType())
-    return DIType(DbgNode).getDirectory();
-  if (isFile())
-    return DIFile(DbgNode).getDirectory();
-  llvm_unreachable("Invalid DIScope!");
-}
-
-DIArray DICompileUnit::getEnumTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 14)
-    return DIArray();
-
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
-    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
-      return DIArray(A);
-  return DIArray();
-}
-
-DIArray DICompileUnit::getRetainedTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 14)
-    return DIArray();
-
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
-    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
-      return DIArray(A);
-  return DIArray();
-}
-
-DIArray DICompileUnit::getSubprograms() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 14)
-    return DIArray();
-
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
-    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
-      return DIArray(A);
-  return DIArray();
-}
-
-
-DIArray DICompileUnit::getGlobalVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 14)
-    return DIArray();
-
-  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13)))
-    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
-      return DIArray(A);
-  return DIArray();
-}
-
-/// fixupObjcLikeName - Replace contains special characters used
-/// in a typical Objective-C names with '.' in a given string.
-static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
-  bool isObjCLike = false;
-  for (size_t i = 0, e = Str.size(); i < e; ++i) {
-    char C = Str[i];
-    if (C == '[')
-      isObjCLike = true;
-
-    if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
-                       C == '+' || C == '(' || C == ')'))
-      Out.push_back('.');
-    else
-      Out.push_back(C);
-  }
-}
-
-/// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
-/// suitable to hold function specific information.
-NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
-  SmallString<32> Name = StringRef("llvm.dbg.lv.");
-  StringRef FName = "fn";
-  if (Fn.getFunction())
-    FName = Fn.getFunction()->getName();
-  else
-    FName = Fn.getName();
-  char One = '\1';
-  if (FName.startswith(StringRef(&One, 1)))
-    FName = FName.substr(1);
-  fixupObjcLikeName(FName, Name);
-  return M.getNamedMetadata(Name.str());
-}
-
-/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
-/// to hold function specific information.
-NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
-  SmallString<32> Name = StringRef("llvm.dbg.lv.");
-  StringRef FName = "fn";
-  if (Fn.getFunction())
-    FName = Fn.getFunction()->getName();
-  else
-    FName = Fn.getName();
-  char One = '\1';
-  if (FName.startswith(StringRef(&One, 1)))
-    FName = FName.substr(1);
-  fixupObjcLikeName(FName, Name);
-  
-  return M.getOrInsertNamedMetadata(Name.str());
-}
-
-/// createInlinedVariable - Create a new inlined variable based on current
-/// variable.
-/// @param DV            Current Variable.
-/// @param InlinedScope  Location at current variable is inlined.
-DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
-                                       LLVMContext &VMContext) {
-  SmallVector<Value *, 16> Elts;
-  // Insert inlined scope as 7th element.
-  for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
-    i == 7 ? Elts.push_back(InlinedScope) :
-             Elts.push_back(DV->getOperand(i));
-  return DIVariable(MDNode::get(VMContext, Elts));
-}
-
-/// cleanseInlinedVariable - Remove inlined scope from the variable.
-DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
-  SmallVector<Value *, 16> Elts;
-  // Insert inlined scope as 7th element.
-  for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
-    i == 7 ? 
-      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))):
-      Elts.push_back(DV->getOperand(i));
-  return DIVariable(MDNode::get(VMContext, Elts));
-}
-
-/// getDISubprogram - Find subprogram that is enclosing this scope.
-DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
-  DIDescriptor D(Scope);
-  if (D.isSubprogram())
-    return DISubprogram(Scope);
-
-  if (D.isLexicalBlockFile())
-    return getDISubprogram(DILexicalBlockFile(Scope).getContext());
-  
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(Scope).getContext());
-
-  return DISubprogram();
-}
-
-/// getDICompositeType - Find underlying composite type.
-DICompositeType llvm::getDICompositeType(DIType T) {
-  if (T.isCompositeType())
-    return DICompositeType(T);
-
-  if (T.isDerivedType())
-    return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
-
-  return DICompositeType();
-}
-
-/// isSubprogramContext - Return true if Context is either a subprogram
-/// or another context nested inside a subprogram.
-bool llvm::isSubprogramContext(const MDNode *Context) {
-  if (!Context)
-    return false;
-  DIDescriptor D(Context);
-  if (D.isSubprogram())
-    return true;
-  if (D.isType())
-    return isSubprogramContext(DIType(Context).getContext());
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoFinder implementations.
-//===----------------------------------------------------------------------===//
-
-/// processModule - Process entire module and collect debug info.
-void DebugInfoFinder::processModule(Module &M) {
-  if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
-    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
-      addCompileUnit(CU);
-      if (CU.getVersion() > LLVMDebugVersion10) {
-        DIArray GVs = CU.getGlobalVariables();
-        for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-          DIGlobalVariable DIG(GVs.getElement(i));
-          if (addGlobalVariable(DIG))
-            processType(DIG.getType());
-        }
-        DIArray SPs = CU.getSubprograms();
-        for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-          processSubprogram(DISubprogram(SPs.getElement(i)));
-        DIArray EnumTypes = CU.getEnumTypes();
-        for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
-          processType(DIType(EnumTypes.getElement(i)));
-        DIArray RetainedTypes = CU.getRetainedTypes();
-        for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
-          processType(DIType(RetainedTypes.getElement(i)));
-        return;
-      }
-    }
-  }
-
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
-      for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
-           ++BI) {
-        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
-          processDeclare(DDI);
-
-        DebugLoc Loc = BI->getDebugLoc();
-        if (Loc.isUnknown())
-          continue;
-
-        LLVMContext &Ctx = BI->getContext();
-        DIDescriptor Scope(Loc.getScope(Ctx));
-
-        if (Scope.isCompileUnit())
-          addCompileUnit(DICompileUnit(Scope));
-        else if (Scope.isSubprogram())
-          processSubprogram(DISubprogram(Scope));
-        else if (Scope.isLexicalBlockFile()) {
-          DILexicalBlockFile DBF = DILexicalBlockFile(Scope);
-          processLexicalBlock(DILexicalBlock(DBF.getScope()));
-        }
-        else if (Scope.isLexicalBlock())
-          processLexicalBlock(DILexicalBlock(Scope));
-
-        if (MDNode *IA = Loc.getInlinedAt(Ctx))
-          processLocation(DILocation(IA));
-      }
-
-  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
-      DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
-      if (addGlobalVariable(DIG)) {
-        if (DIG.getVersion() <= LLVMDebugVersion10)
-          addCompileUnit(DIG.getCompileUnit());
-        processType(DIG.getType());
-      }
-    }
-  }
-
-  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      processSubprogram(DISubprogram(NMD->getOperand(i)));
-}
-
-/// processLocation - Process DILocation.
-void DebugInfoFinder::processLocation(DILocation Loc) {
-  if (!Loc.Verify()) return;
-  DIDescriptor S(Loc.getScope());
-  if (S.isCompileUnit())
-    addCompileUnit(DICompileUnit(S));
-  else if (S.isSubprogram())
-    processSubprogram(DISubprogram(S));
-  else if (S.isLexicalBlock())
-    processLexicalBlock(DILexicalBlock(S));
-  else if (S.isLexicalBlockFile()) {
-    DILexicalBlockFile DBF = DILexicalBlockFile(S);
-    processLexicalBlock(DILexicalBlock(DBF.getScope()));
-  }
-  processLocation(Loc.getOrigLocation());
-}
-
-/// processType - Process DIType.
-void DebugInfoFinder::processType(DIType DT) {
-  if (!addType(DT))
-    return;
-  if (DT.getVersion() <= LLVMDebugVersion10)
-    addCompileUnit(DT.getCompileUnit());
-  if (DT.isCompositeType()) {
-    DICompositeType DCT(DT);
-    processType(DCT.getTypeDerivedFrom());
-    DIArray DA = DCT.getTypeArray();
-    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
-      DIDescriptor D = DA.getElement(i);
-      if (D.isType())
-        processType(DIType(D));
-      else if (D.isSubprogram())
-        processSubprogram(DISubprogram(D));
-    }
-  } else if (DT.isDerivedType()) {
-    DIDerivedType DDT(DT);
-    processType(DDT.getTypeDerivedFrom());
-  }
-}
-
-/// processLexicalBlock
-void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
-  DIScope Context = LB.getContext();
-  if (Context.isLexicalBlock())
-    return processLexicalBlock(DILexicalBlock(Context));
-  else if (Context.isLexicalBlockFile()) {
-    DILexicalBlockFile DBF = DILexicalBlockFile(Context);
-    return processLexicalBlock(DILexicalBlock(DBF.getScope()));
-  }
-  else
-    return processSubprogram(DISubprogram(Context));
-}
-
-/// processSubprogram - Process DISubprogram.
-void DebugInfoFinder::processSubprogram(DISubprogram SP) {
-  if (!addSubprogram(SP))
-    return;
-  if (SP.getVersion() <= LLVMDebugVersion10)
-    addCompileUnit(SP.getCompileUnit());
-  processType(SP.getType());
-}
-
-/// processDeclare - Process DbgDeclareInst.
-void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
-  MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
-  if (!N) return;
-
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
-    return;
-
-  if (!NodesSeen.insert(DV))
-    return;
-  if (DIVariable(N).getVersion() <= LLVMDebugVersion10)
-    addCompileUnit(DIVariable(N).getCompileUnit());
-  processType(DIVariable(N).getType());
-}
-
-/// addType - Add type into Tys.
-bool DebugInfoFinder::addType(DIType DT) {
-  if (!DT.isValid())
-    return false;
-
-  if (!NodesSeen.insert(DT))
-    return false;
-
-  TYs.push_back(DT);
-  return true;
-}
-
-/// addCompileUnit - Add compile unit into CUs.
-bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
-  if (!CU.Verify())
-    return false;
-
-  if (!NodesSeen.insert(CU))
-    return false;
-
-  CUs.push_back(CU);
-  return true;
-}
-
-/// addGlobalVariable - Add global variable into GVs.
-bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
-  if (!DIDescriptor(DIG).isGlobalVariable())
-    return false;
-
-  if (!NodesSeen.insert(DIG))
-    return false;
-
-  GVs.push_back(DIG);
-  return true;
-}
-
-// addSubprogram - Add subprgoram into SPs.
-bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
-  if (!DIDescriptor(SP).isSubprogram())
-    return false;
-
-  if (!NodesSeen.insert(SP))
-    return false;
-
-  SPs.push_back(SP);
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-// DIDescriptor: dump routines for all descriptors.
-//===----------------------------------------------------------------------===//
-
-/// dump - Print descriptor to dbgs() with a newline.
-void DIDescriptor::dump() const {
-  print(dbgs()); dbgs() << '\n';
-}
-
-/// print - Print descriptor.
-void DIDescriptor::print(raw_ostream &OS) const {
-  if (!DbgNode) return;
-
-  if (const char *Tag = dwarf::TagString(getTag()))
-    OS << "[ " << Tag << " ]";
-
-  if (this->isSubrange()) {
-    DISubrange(DbgNode).printInternal(OS);
-  } else if (this->isCompileUnit()) {
-    DICompileUnit(DbgNode).printInternal(OS);
-  } else if (this->isFile()) {
-    DIFile(DbgNode).printInternal(OS);
-  } else if (this->isEnumerator()) {
-    DIEnumerator(DbgNode).printInternal(OS);
-  } else if (this->isBasicType()) {
-    DIType(DbgNode).printInternal(OS);
-  } else if (this->isDerivedType()) {
-    DIDerivedType(DbgNode).printInternal(OS);
-  } else if (this->isCompositeType()) {
-    DICompositeType(DbgNode).printInternal(OS);
-  } else if (this->isSubprogram()) {
-    DISubprogram(DbgNode).printInternal(OS);
-  } else if (this->isGlobalVariable()) {
-    DIGlobalVariable(DbgNode).printInternal(OS);
-  } else if (this->isVariable()) {
-    DIVariable(DbgNode).printInternal(OS);
-  } else if (this->isObjCProperty()) {
-    DIObjCProperty(DbgNode).printInternal(OS);
-  } else if (this->isScope()) {
-    DIScope(DbgNode).printInternal(OS);
-  }
-}
-
-void DISubrange::printInternal(raw_ostream &OS) const {
-  OS << " [" << getLo() << ", " << getHi() << ']';
-}
-
-void DIScope::printInternal(raw_ostream &OS) const {
-  OS << " [" << getDirectory() << "/" << getFilename() << ']';
-}
-
-void DICompileUnit::printInternal(raw_ostream &OS) const {
-  DIScope::printInternal(OS);
-  if (unsigned Lang = getLanguage())
-    OS << " [" << dwarf::LanguageString(Lang) << ']';
-}
-
-void DIEnumerator::printInternal(raw_ostream &OS) const {
-  OS << " [" << getName() << " :: " << getEnumValue() << ']';
-}
-
-void DIType::printInternal(raw_ostream &OS) const {
-  if (!DbgNode) return;
-
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << "]";
-
-  // TODO: Print context?
-
-  OS << " [line " << getLineNumber()
-     << ", size " << getSizeInBits()
-     << ", align " << getAlignInBits()
-     << ", offset " << getOffsetInBits();
-  if (isBasicType())
-    if (const char *Enc = 
-        dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
-      OS << ", enc " << Enc;
-  OS << "]";
-
-  if (isPrivate())
-    OS << " [private]";
-  else if (isProtected())
-    OS << " [protected]";
-
-  if (isForwardDecl())
-    OS << " [fwd]";
-}
-
-void DIDerivedType::printInternal(raw_ostream &OS) const {
-  DIType::printInternal(OS);
-  OS << " [from " << getTypeDerivedFrom().getName() << ']';
-}
-
-void DICompositeType::printInternal(raw_ostream &OS) const {
-  DIType::printInternal(OS);
-  DIArray A = getTypeArray();
-  OS << " [" << A.getNumElements() << " elements]";
-}
-
-void DISubprogram::printInternal(raw_ostream &OS) const {
-  // TODO : Print context
-  OS << " [line " << getLineNumber() << ']';
-
-  if (isLocalToUnit())
-    OS << " [local]";
-
-  if (isDefinition())
-    OS << " [def]";
-
-  if (getScopeLineNumber() != getLineNumber())
-    OS << " [scope " << getScopeLineNumber() << "]";
-
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << ']';
-}
-
-void DIGlobalVariable::printInternal(raw_ostream &OS) const {
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << ']';
-
-  OS << " [line " << getLineNumber() << ']';
-
-  // TODO : Print context
-
-  if (isLocalToUnit())
-    OS << " [local]";
-
-  if (isDefinition())
-    OS << " [def]";
-}
-
-void DIVariable::printInternal(raw_ostream &OS) const {
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << " [" << Res << ']';
-
-  OS << " [line " << getLineNumber() << ']';
-}
-
-void DIObjCProperty::printInternal(raw_ostream &OS) const {
-  StringRef Name = getObjCPropertyName();
-  if (!Name.empty())
-    OS << " [" << Name << ']';
-
-  OS << " [line " << getLineNumber()
-     << ", properties " << getUnsignedField(6) << ']';
-}
-
-static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
-                          const LLVMContext &Ctx) {
-  if (!DL.isUnknown()) {          // Print source line info.
-    DIScope Scope(DL.getScope(Ctx));
-    // Omit the directory, because it's likely to be long and uninteresting.
-    if (Scope.Verify())
-      CommentOS << Scope.getFilename();
-    else
-      CommentOS << "<unknown>";
-    CommentOS << ':' << DL.getLine();
-    if (DL.getCol() != 0)
-      CommentOS << ':' << DL.getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      CommentOS << " @[ ";
-      printDebugLoc(InlinedAtDL, CommentOS, Ctx);
-      CommentOS << " ]";
-    }
-  }
-}
-
-void DIVariable::printExtendedName(raw_ostream &OS) const {
-  const LLVMContext &Ctx = DbgNode->getContext();
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << Res << "," << getLineNumber();
-  if (MDNode *InlinedAt = getInlinedAt()) {
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[";
-      printDebugLoc(InlinedAtDL, OS, Ctx);
-      OS << "]";
-    }
-  }
-}
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
deleted file mode 100644
index c6a30536e69f..000000000000
--- a/lib/VMCore/DebugLoc.cpp
+++ /dev/null
@@ -1,315 +0,0 @@
-//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/ADT/DenseMapInfo.h"
-#include "LLVMContextImpl.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// DebugLoc Implementation
-//===----------------------------------------------------------------------===//
-
-MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
-  if (ScopeIdx == 0) return 0;
-  
-  if (ScopeIdx > 0) {
-    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
-    // position specified.
-    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
-           "Invalid ScopeIdx!");
-    return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
-  }
-  
-  // Otherwise, the index is in the ScopeInlinedAtRecords array.
-  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
-         "Invalid ScopeIdx");
-  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
-}
-
-MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
-  // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
-  // position specified.  Zero is invalid.
-  if (ScopeIdx >= 0) return 0;
-  
-  // Otherwise, the index is in the ScopeInlinedAtRecords array.
-  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
-         "Invalid ScopeIdx");
-  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
-}
-
-/// Return both the Scope and the InlinedAt values.
-void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
-                                    const LLVMContext &Ctx) const {
-  if (ScopeIdx == 0) {
-    Scope = IA = 0;
-    return;
-  }
-  
-  if (ScopeIdx > 0) {
-    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
-    // position specified.
-    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
-           "Invalid ScopeIdx!");
-    Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
-    IA = 0;
-    return;
-  }
-  
-  // Otherwise, the index is in the ScopeInlinedAtRecords array.
-  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
-         "Invalid ScopeIdx");
-  Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
-  IA    = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
-}
-
-
-DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
-                       MDNode *Scope, MDNode *InlinedAt) {
-  DebugLoc Result;
-  
-  // If no scope is available, this is an unknown location.
-  if (Scope == 0) return Result;
-  
-  // Saturate line and col to "unknown".
-  if (Col > 255) Col = 0;
-  if (Line >= (1 << 24)) Line = 0;
-  Result.LineCol = Line | (Col << 24);
-  
-  LLVMContext &Ctx = Scope->getContext();
-  
-  // If there is no inlined-at location, use the ScopeRecords array.
-  if (InlinedAt == 0)
-    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
-  else
-    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
-                                                                InlinedAt, 0);
-
-  return Result;
-}
-
-/// getAsMDNode - This method converts the compressed DebugLoc node into a
-/// DILocation compatible MDNode.
-MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
-  if (isUnknown()) return 0;
-  
-  MDNode *Scope, *IA;
-  getScopeAndInlinedAt(Scope, IA, Ctx);
-  assert(Scope && "If scope is null, this should be isUnknown()");
-  
-  LLVMContext &Ctx2 = Scope->getContext();
-  Type *Int32 = Type::getInt32Ty(Ctx2);
-  Value *Elts[] = {
-    ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
-    Scope, IA
-  };
-  return MDNode::get(Ctx2, Elts);
-}
-
-/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
-DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
-  DILocation Loc(N);
-  MDNode *Scope = Loc.getScope();
-  if (Scope == 0) return DebugLoc();
-  return get(Loc.getLineNumber(), Loc.getColumnNumber(), Scope,
-             Loc.getOrigLocation());
-}
-
-/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
-DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
-  DILexicalBlock LexBlock(N);
-  MDNode *Scope = LexBlock.getContext();
-  if (Scope == 0) return DebugLoc();
-  return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, NULL);
-}
-
-void DebugLoc::dump(const LLVMContext &Ctx) const {
-#ifndef NDEBUG
-  if (!isUnknown()) {
-    dbgs() << getLine();
-    if (getCol() != 0)
-      dbgs() << ',' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      dbgs() << " @ ";
-      InlinedAtDL.dump(Ctx);
-    } else
-      dbgs() << "\n";
-  }
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-// DenseMap specialization
-//===----------------------------------------------------------------------===//
-
-unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
-  return static_cast<unsigned>(hash_combine(Key.LineCol, Key.ScopeIdx));
-}
-
-//===----------------------------------------------------------------------===//
-// LLVMContextImpl Implementation
-//===----------------------------------------------------------------------===//
-
-int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
-                                                 int ExistingIdx) {
-  // If we already have an entry for this scope, return it.
-  int &Idx = ScopeRecordIdx[Scope];
-  if (Idx) return Idx;
-  
-  // If we don't have an entry, but ExistingIdx is specified, use it.
-  if (ExistingIdx)
-    return Idx = ExistingIdx;
-  
-  // Otherwise add a new entry.
-  
-  // Start out ScopeRecords with a minimal reasonable size to avoid
-  // excessive reallocation starting out.
-  if (ScopeRecords.empty())
-    ScopeRecords.reserve(128);
-  
-  // Index is biased by 1 for index.
-  Idx = ScopeRecords.size()+1;
-  ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
-  return Idx;
-}
-
-int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
-                                                    int ExistingIdx) {
-  // If we already have an entry, return it.
-  int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
-  if (Idx) return Idx;
-  
-  // If we don't have an entry, but ExistingIdx is specified, use it.
-  if (ExistingIdx)
-    return Idx = ExistingIdx;
-  
-  // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
-  // excessive reallocation starting out.
-  if (ScopeInlinedAtRecords.empty())
-    ScopeInlinedAtRecords.reserve(128);
-    
-  // Index is biased by 1 and negated.
-  Idx = -ScopeInlinedAtRecords.size()-1;
-  ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
-                                                 DebugRecVH(IA, this, Idx)));
-  return Idx;
-}
-
-
-//===----------------------------------------------------------------------===//
-// DebugRecVH Implementation
-//===----------------------------------------------------------------------===//
-
-/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
-/// to drop to null and we need remove our entry from the DenseMap.
-void DebugRecVH::deleted() {
-  // If this is a non-canonical reference, just drop the value to null, we know
-  // it doesn't have a map entry.
-  if (Idx == 0) {
-    setValPtr(0);
-    return;
-  }
-    
-  MDNode *Cur = get();
-  
-  // If the index is positive, it is an entry in ScopeRecords.
-  if (Idx > 0) {
-    assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
-    Ctx->ScopeRecordIdx.erase(Cur);
-    // Reset this VH to null and we're done.
-    setValPtr(0);
-    Idx = 0;
-    return;
-  }
-  
-  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
-  // is the scope or the inlined-at record entry.
-  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
-  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
-  assert((this == &Entry.first || this == &Entry.second) &&
-         "Mapping out of date!");
-  
-  MDNode *OldScope = Entry.first.get();
-  MDNode *OldInlinedAt = Entry.second.get();
-  assert(OldScope != 0 && OldInlinedAt != 0 &&
-         "Entry should be non-canonical if either val dropped to null");
-
-  // Otherwise, we do have an entry in it, nuke it and we're done.
-  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
-         "Mapping out of date");
-  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
-  
-  // Reset this VH to null.  Drop both 'Idx' values to null to indicate that
-  // we're in non-canonical form now.
-  setValPtr(0);
-  Entry.first.Idx = Entry.second.Idx = 0;
-}
-
-void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
-  // If being replaced with a non-mdnode value (e.g. undef) handle this as if
-  // the mdnode got deleted.
-  MDNode *NewVal = dyn_cast<MDNode>(NewVa);
-  if (NewVal == 0) return deleted();
-  
-  // If this is a non-canonical reference, just change it, we know it already
-  // doesn't have a map entry.
-  if (Idx == 0) {
-    setValPtr(NewVa);
-    return;
-  }
-  
-  MDNode *OldVal = get();
-  assert(OldVal != NewVa && "Node replaced with self?");
-  
-  // If the index is positive, it is an entry in ScopeRecords.
-  if (Idx > 0) {
-    assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
-    Ctx->ScopeRecordIdx.erase(OldVal);
-    setValPtr(NewVal);
-
-    int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
-    
-    // If NewVal already has an entry, this becomes a non-canonical reference,
-    // just drop Idx to 0 to signify this.
-    if (NewEntry != Idx)
-      Idx = 0;
-    return;
-  }
-  
-  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
-  // is the scope or the inlined-at record entry.
-  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
-  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
-  assert((this == &Entry.first || this == &Entry.second) &&
-         "Mapping out of date!");
-  
-  MDNode *OldScope = Entry.first.get();
-  MDNode *OldInlinedAt = Entry.second.get();
-  assert(OldScope != 0 && OldInlinedAt != 0 &&
-         "Entry should be non-canonical if either val dropped to null");
-  
-  // Otherwise, we do have an entry in it, nuke it and we're done.
-  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
-         "Mapping out of date");
-  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
-  
-  // Reset this VH to the new value.
-  setValPtr(NewVal);
-
-  int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
-                                                   Entry.second.get(), Idx);
-  // If NewVal already has an entry, this becomes a non-canonical reference,
-  // just drop Idx to 0 to signify this.
-  if (NewIdx != Idx) {
-    std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
-    Entry.first.Idx = Entry.second.Idx = 0;
-  }
-}
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
deleted file mode 100644
index 77b2403d87dd..000000000000
--- a/lib/VMCore/Dominators.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-//===- Dominators.cpp - Dominator Calculation -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements simple dominator construction algorithms for finding
-// forward dominators.  Postdominators are available in libanalysis, but are not
-// included in libvmcore, because it's not needed.  Forward dominators are
-// needed to support the Verifier pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CommandLine.h"
-#include <algorithm>
-using namespace llvm;
-
-// Always verify dominfo if expensive checking is enabled.
-#ifdef XDEBUG
-static bool VerifyDomInfo = true;
-#else
-static bool VerifyDomInfo = false;
-#endif
-static cl::opt<bool,true>
-VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
-               cl::desc("Verify dominator info (time consuming)"));
-
-bool BasicBlockEdge::isSingleEdge() const {
-  const TerminatorInst *TI = Start->getTerminator();
-  unsigned NumEdgesToEnd = 0;
-  for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
-    if (TI->getSuccessor(i) == End)
-      ++NumEdgesToEnd;
-    if (NumEdgesToEnd >= 2)
-      return false;
-  }
-  assert(NumEdgesToEnd == 1);
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-//  DominatorTree Implementation
-//===----------------------------------------------------------------------===//
-//
-// Provide public access to DominatorTree information.  Implementation details
-// can be found in DominatorInternals.h.
-//
-//===----------------------------------------------------------------------===//
-
-TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
-TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
-
-char DominatorTree::ID = 0;
-INITIALIZE_PASS(DominatorTree, "domtree",
-                "Dominator Tree Construction", true, true)
-
-bool DominatorTree::runOnFunction(Function &F) {
-  DT->recalculate(F);
-  return false;
-}
-
-void DominatorTree::verifyAnalysis() const {
-  if (!VerifyDomInfo) return;
-
-  Function &F = *getRoot()->getParent();
-
-  DominatorTree OtherDT;
-  OtherDT.getBase().recalculate(F);
-  if (compare(OtherDT)) {
-    errs() << "DominatorTree is not up to date!\nComputed:\n";
-    print(errs());
-    errs() << "\nActual:\n";
-    OtherDT.print(errs());
-    abort();
-  }
-}
-
-void DominatorTree::print(raw_ostream &OS, const Module *) const {
-  DT->print(OS);
-}
-
-// dominates - Return true if Def dominates a use in User. This performs
-// the special checks necessary if Def and User are in the same basic block.
-// Note that Def doesn't dominate a use in Def itself!
-bool DominatorTree::dominates(const Instruction *Def,
-                              const Instruction *User) const {
-  const BasicBlock *UseBB = User->getParent();
-  const BasicBlock *DefBB = Def->getParent();
-
-  // Any unreachable use is dominated, even if Def == User.
-  if (!isReachableFromEntry(UseBB))
-    return true;
-
-  // Unreachable definitions don't dominate anything.
-  if (!isReachableFromEntry(DefBB))
-    return false;
-
-  // An instruction doesn't dominate a use in itself.
-  if (Def == User)
-    return false;
-
-  // The value defined by an invoke dominates an instruction only if
-  // it dominates every instruction in UseBB.
-  // A PHI is dominated only if the instruction dominates every possible use
-  // in the UseBB.
-  if (isa<InvokeInst>(Def) || isa<PHINode>(User))
-    return dominates(Def, UseBB);
-
-  if (DefBB != UseBB)
-    return dominates(DefBB, UseBB);
-
-  // Loop through the basic block until we find Def or User.
-  BasicBlock::const_iterator I = DefBB->begin();
-  for (; &*I != Def && &*I != User; ++I)
-    /*empty*/;
-
-  return &*I == Def;
-}
-
-// true if Def would dominate a use in any instruction in UseBB.
-// note that dominates(Def, Def->getParent()) is false.
-bool DominatorTree::dominates(const Instruction *Def,
-                              const BasicBlock *UseBB) const {
-  const BasicBlock *DefBB = Def->getParent();
-
-  // Any unreachable use is dominated, even if DefBB == UseBB.
-  if (!isReachableFromEntry(UseBB))
-    return true;
-
-  // Unreachable definitions don't dominate anything.
-  if (!isReachableFromEntry(DefBB))
-    return false;
-
-  if (DefBB == UseBB)
-    return false;
-
-  const InvokeInst *II = dyn_cast<InvokeInst>(Def);
-  if (!II)
-    return dominates(DefBB, UseBB);
-
-  // Invoke results are only usable in the normal destination, not in the
-  // exceptional destination.
-  BasicBlock *NormalDest = II->getNormalDest();
-  BasicBlockEdge E(DefBB, NormalDest);
-  return dominates(E, UseBB);
-}
-
-bool DominatorTree::dominates(const BasicBlockEdge &BBE,
-                              const BasicBlock *UseBB) const {
-  // Assert that we have a single edge. We could handle them by simply
-  // returning false, but since isSingleEdge is linear on the number of
-  // edges, the callers can normally handle them more efficiently.
-  assert(BBE.isSingleEdge());
-
-  // If the BB the edge ends in doesn't dominate the use BB, then the
-  // edge also doesn't.
-  const BasicBlock *Start = BBE.getStart();
-  const BasicBlock *End = BBE.getEnd();
-  if (!dominates(End, UseBB))
-    return false;
-
-  // Simple case: if the end BB has a single predecessor, the fact that it
-  // dominates the use block implies that the edge also does.
-  if (End->getSinglePredecessor())
-    return true;
-
-  // The normal edge from the invoke is critical. Conceptually, what we would
-  // like to do is split it and check if the new block dominates the use.
-  // With X being the new block, the graph would look like:
-  //
-  //        DefBB
-  //          /\      .  .
-  //         /  \     .  .
-  //        /    \    .  .
-  //       /      \   |  |
-  //      A        X  B  C
-  //      |         \ | /
-  //      .          \|/
-  //      .      NormalDest
-  //      .
-  //
-  // Given the definition of dominance, NormalDest is dominated by X iff X
-  // dominates all of NormalDest's predecessors (X, B, C in the example). X
-  // trivially dominates itself, so we only have to find if it dominates the
-  // other predecessors. Since the only way out of X is via NormalDest, X can
-  // only properly dominate a node if NormalDest dominates that node too.
-  for (const_pred_iterator PI = pred_begin(End), E = pred_end(End);
-       PI != E; ++PI) {
-    const BasicBlock *BB = *PI;
-    if (BB == Start)
-      continue;
-
-    if (!dominates(End, BB))
-      return false;
-  }
-  return true;
-}
-
-bool DominatorTree::dominates(const BasicBlockEdge &BBE,
-                              const Use &U) const {
-  // Assert that we have a single edge. We could handle them by simply
-  // returning false, but since isSingleEdge is linear on the number of
-  // edges, the callers can normally handle them more efficiently.
-  assert(BBE.isSingleEdge());
-
-  Instruction *UserInst = cast<Instruction>(U.getUser());
-  // A PHI in the end of the edge is dominated by it.
-  PHINode *PN = dyn_cast<PHINode>(UserInst);
-  if (PN && PN->getParent() == BBE.getEnd() &&
-      PN->getIncomingBlock(U) == BBE.getStart())
-    return true;
-
-  // Otherwise use the edge-dominates-block query, which
-  // handles the crazy critical edge cases properly.
-  const BasicBlock *UseBB;
-  if (PN)
-    UseBB = PN->getIncomingBlock(U);
-  else
-    UseBB = UserInst->getParent();
-  return dominates(BBE, UseBB);
-}
-
-bool DominatorTree::dominates(const Instruction *Def,
-                              const Use &U) const {
-  Instruction *UserInst = cast<Instruction>(U.getUser());
-  const BasicBlock *DefBB = Def->getParent();
-
-  // Determine the block in which the use happens. PHI nodes use
-  // their operands on edges; simulate this by thinking of the use
-  // happening at the end of the predecessor block.
-  const BasicBlock *UseBB;
-  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
-    UseBB = PN->getIncomingBlock(U);
-  else
-    UseBB = UserInst->getParent();
-
-  // Any unreachable use is dominated, even if Def == User.
-  if (!isReachableFromEntry(UseBB))
-    return true;
-
-  // Unreachable definitions don't dominate anything.
-  if (!isReachableFromEntry(DefBB))
-    return false;
-
-  // Invoke instructions define their return values on the edges
-  // to their normal successors, so we have to handle them specially.
-  // Among other things, this means they don't dominate anything in
-  // their own block, except possibly a phi, so we don't need to
-  // walk the block in any case.
-  if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
-    BasicBlock *NormalDest = II->getNormalDest();
-    BasicBlockEdge E(DefBB, NormalDest);
-    return dominates(E, U);
-  }
-
-  // If the def and use are in different blocks, do a simple CFG dominator
-  // tree query.
-  if (DefBB != UseBB)
-    return dominates(DefBB, UseBB);
-
-  // Ok, def and use are in the same block. If the def is an invoke, it
-  // doesn't dominate anything in the block. If it's a PHI, it dominates
-  // everything in the block.
-  if (isa<PHINode>(UserInst))
-    return true;
-
-  // Otherwise, just loop through the basic block until we find Def or User.
-  BasicBlock::const_iterator I = DefBB->begin();
-  for (; &*I != Def && &*I != UserInst; ++I)
-    /*empty*/;
-
-  return &*I != UserInst;
-}
-
-bool DominatorTree::isReachableFromEntry(const Use &U) const {
-  Instruction *I = dyn_cast<Instruction>(U.getUser());
-
-  // ConstantExprs aren't really reachable from the entry block, but they
-  // don't need to be treated like unreachable code either.
-  if (!I) return true;
-
-  // PHI nodes use their operands on their incoming edges.
-  if (PHINode *PN = dyn_cast<PHINode>(I))
-    return isReachableFromEntry(PN->getIncomingBlock(U));
-
-  // Everything else uses their operands in their own block.
-  return isReachableFromEntry(I->getParent());
-}
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
deleted file mode 100644
index 9c4f2d939952..000000000000
--- a/lib/VMCore/Function.cpp
+++ /dev/null
@@ -1,668 +0,0 @@
-//===-- Function.cpp - Implement the Global object classes ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Function class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/LeakDetector.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/StringPool.h"
-#include "llvm/Support/RWMutex.h"
-#include "llvm/Support/Threading.h"
-#include "SymbolTableListTraitsImpl.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-using namespace llvm;
-
-// Explicit instantiations of SymbolTableListTraits since some of the methods
-// are not in the public header file...
-template class llvm::SymbolTableListTraits<Argument, Function>;
-template class llvm::SymbolTableListTraits<BasicBlock, Function>;
-
-//===----------------------------------------------------------------------===//
-// Argument Implementation
-//===----------------------------------------------------------------------===//
-
-void Argument::anchor() { }
-
-Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
-  : Value(Ty, Value::ArgumentVal) {
-  Parent = 0;
-
-  // Make sure that we get added to a function
-  LeakDetector::addGarbageObject(this);
-
-  if (Par)
-    Par->getArgumentList().push_back(this);
-  setName(Name);
-}
-
-void Argument::setParent(Function *parent) {
-  if (getParent())
-    LeakDetector::addGarbageObject(this);
-  Parent = parent;
-  if (getParent())
-    LeakDetector::removeGarbageObject(this);
-}
-
-/// getArgNo - Return the index of this formal argument in its containing
-/// function.  For example in "void foo(int a, float b)" a is 0 and b is 1. 
-unsigned Argument::getArgNo() const {
-  const Function *F = getParent();
-  assert(F && "Argument is not in a function");
-  
-  Function::const_arg_iterator AI = F->arg_begin();
-  unsigned ArgIdx = 0;
-  for (; &*AI != this; ++AI)
-    ++ArgIdx;
-
-  return ArgIdx;
-}
-
-/// hasByValAttr - Return true if this argument has the byval attribute on it
-/// in its containing function.
-bool Argument::hasByValAttr() const {
-  if (!getType()->isPointerTy()) return false;
-  return getParent()->getParamAttributes(getArgNo()+1).
-    hasAttribute(Attributes::ByVal);
-}
-
-unsigned Argument::getParamAlignment() const {
-  assert(getType()->isPointerTy() && "Only pointers have alignments");
-  return getParent()->getParamAlignment(getArgNo()+1);
-  
-}
-
-/// hasNestAttr - Return true if this argument has the nest attribute on
-/// it in its containing function.
-bool Argument::hasNestAttr() const {
-  if (!getType()->isPointerTy()) return false;
-  return getParent()->getParamAttributes(getArgNo()+1).
-    hasAttribute(Attributes::Nest);
-}
-
-/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
-/// it in its containing function.
-bool Argument::hasNoAliasAttr() const {
-  if (!getType()->isPointerTy()) return false;
-  return getParent()->getParamAttributes(getArgNo()+1).
-    hasAttribute(Attributes::NoAlias);
-}
-
-/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
-/// on it in its containing function.
-bool Argument::hasNoCaptureAttr() const {
-  if (!getType()->isPointerTy()) return false;
-  return getParent()->getParamAttributes(getArgNo()+1).
-    hasAttribute(Attributes::NoCapture);
-}
-
-/// hasSRetAttr - Return true if this argument has the sret attribute on
-/// it in its containing function.
-bool Argument::hasStructRetAttr() const {
-  if (!getType()->isPointerTy()) return false;
-  if (this != getParent()->arg_begin())
-    return false; // StructRet param must be first param
-  return getParent()->getParamAttributes(1).
-    hasAttribute(Attributes::StructRet);
-}
-
-/// addAttr - Add a Attribute to an argument
-void Argument::addAttr(Attributes attr) {
-  getParent()->addAttribute(getArgNo() + 1, attr);
-}
-
-/// removeAttr - Remove a Attribute from an argument
-void Argument::removeAttr(Attributes attr) {
-  getParent()->removeAttribute(getArgNo() + 1, attr);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Helper Methods in Function
-//===----------------------------------------------------------------------===//
-
-LLVMContext &Function::getContext() const {
-  return getType()->getContext();
-}
-
-FunctionType *Function::getFunctionType() const {
-  return cast<FunctionType>(getType()->getElementType());
-}
-
-bool Function::isVarArg() const {
-  return getFunctionType()->isVarArg();
-}
-
-Type *Function::getReturnType() const {
-  return getFunctionType()->getReturnType();
-}
-
-void Function::removeFromParent() {
-  getParent()->getFunctionList().remove(this);
-}
-
-void Function::eraseFromParent() {
-  getParent()->getFunctionList().erase(this);
-}
-
-//===----------------------------------------------------------------------===//
-// Function Implementation
-//===----------------------------------------------------------------------===//
-
-Function::Function(FunctionType *Ty, LinkageTypes Linkage,
-                   const Twine &name, Module *ParentModule)
-  : GlobalValue(PointerType::getUnqual(Ty), 
-                Value::FunctionVal, 0, 0, Linkage, name) {
-  assert(FunctionType::isValidReturnType(getReturnType()) &&
-         "invalid return type");
-  SymTab = new ValueSymbolTable();
-
-  // If the function has arguments, mark them as lazily built.
-  if (Ty->getNumParams())
-    setValueSubclassData(1);   // Set the "has lazy arguments" bit.
-  
-  // Make sure that we get added to a function
-  LeakDetector::addGarbageObject(this);
-
-  if (ParentModule)
-    ParentModule->getFunctionList().push_back(this);
-
-  // Ensure intrinsics have the right parameter attributes.
-  if (unsigned IID = getIntrinsicID())
-    setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
-
-}
-
-Function::~Function() {
-  dropAllReferences();    // After this it is safe to delete instructions.
-
-  // Delete all of the method arguments and unlink from symbol table...
-  ArgumentList.clear();
-  delete SymTab;
-
-  // Remove the function from the on-the-side GC table.
-  clearGC();
-}
-
-void Function::BuildLazyArguments() const {
-  // Create the arguments vector, all arguments start out unnamed.
-  FunctionType *FT = getFunctionType();
-  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
-    assert(!FT->getParamType(i)->isVoidTy() &&
-           "Cannot have void typed arguments!");
-    ArgumentList.push_back(new Argument(FT->getParamType(i)));
-  }
-  
-  // Clear the lazy arguments bit.
-  unsigned SDC = getSubclassDataFromValue();
-  const_cast<Function*>(this)->setValueSubclassData(SDC &= ~1);
-}
-
-size_t Function::arg_size() const {
-  return getFunctionType()->getNumParams();
-}
-bool Function::arg_empty() const {
-  return getFunctionType()->getNumParams() == 0;
-}
-
-void Function::setParent(Module *parent) {
-  if (getParent())
-    LeakDetector::addGarbageObject(this);
-  Parent = parent;
-  if (getParent())
-    LeakDetector::removeGarbageObject(this);
-}
-
-// dropAllReferences() - This function causes all the subinstructions to "let
-// go" of all references that they are maintaining.  This allows one to
-// 'delete' a whole class at a time, even though there may be circular
-// references... first all references are dropped, and all use counts go to
-// zero.  Then everything is deleted for real.  Note that no operations are
-// valid on an object that has "dropped all references", except operator
-// delete.
-//
-void Function::dropAllReferences() {
-  for (iterator I = begin(), E = end(); I != E; ++I)
-    I->dropAllReferences();
-  
-  // Delete all basic blocks. They are now unused, except possibly by
-  // blockaddresses, but BasicBlock's destructor takes care of those.
-  while (!BasicBlocks.empty())
-    BasicBlocks.begin()->eraseFromParent();
-}
-
-void Function::addAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(getContext(), i, attr);
-  setAttributes(PAL);
-}
-
-void Function::removeAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(getContext(), i, attr);
-  setAttributes(PAL);
-}
-
-// Maintain the GC name for each function in an on-the-side table. This saves
-// allocating an additional word in Function for programs which do not use GC
-// (i.e., most programs) at the cost of increased overhead for clients which do
-// use GC.
-static DenseMap<const Function*,PooledStringPtr> *GCNames;
-static StringPool *GCNamePool;
-static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
-
-bool Function::hasGC() const {
-  sys::SmartScopedReader<true> Reader(*GCLock);
-  return GCNames && GCNames->count(this);
-}
-
-const char *Function::getGC() const {
-  assert(hasGC() && "Function has no collector");
-  sys::SmartScopedReader<true> Reader(*GCLock);
-  return *(*GCNames)[this];
-}
-
-void Function::setGC(const char *Str) {
-  sys::SmartScopedWriter<true> Writer(*GCLock);
-  if (!GCNamePool)
-    GCNamePool = new StringPool();
-  if (!GCNames)
-    GCNames = new DenseMap<const Function*,PooledStringPtr>();
-  (*GCNames)[this] = GCNamePool->intern(Str);
-}
-
-void Function::clearGC() {
-  sys::SmartScopedWriter<true> Writer(*GCLock);
-  if (GCNames) {
-    GCNames->erase(this);
-    if (GCNames->empty()) {
-      delete GCNames;
-      GCNames = 0;
-      if (GCNamePool->empty()) {
-        delete GCNamePool;
-        GCNamePool = 0;
-      }
-    }
-  }
-}
-
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a Function) from the Function Src to this one.
-void Function::copyAttributesFrom(const GlobalValue *Src) {
-  assert(isa<Function>(Src) && "Expected a Function!");
-  GlobalValue::copyAttributesFrom(Src);
-  const Function *SrcF = cast<Function>(Src);
-  setCallingConv(SrcF->getCallingConv());
-  setAttributes(SrcF->getAttributes());
-  if (SrcF->hasGC())
-    setGC(SrcF->getGC());
-  else
-    clearGC();
-}
-
-/// getIntrinsicID - This method returns the ID number of the specified
-/// function, or Intrinsic::not_intrinsic if the function is not an
-/// intrinsic, or if the pointer is null.  This value is always defined to be
-/// zero to allow easy checking for whether a function is intrinsic or not.  The
-/// particular intrinsic functions which correspond to this value are defined in
-/// llvm/Intrinsics.h.
-///
-unsigned Function::getIntrinsicID() const {
-  const ValueName *ValName = this->getValueName();
-  if (!ValName)
-    return 0;
-  unsigned Len = ValName->getKeyLength();
-  const char *Name = ValName->getKeyData();
-  
-  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
-      || Name[2] != 'v' || Name[3] != 'm')
-    return 0;  // All intrinsics start with 'llvm.'
-
-#define GET_FUNCTION_RECOGNIZER
-#include "llvm/Intrinsics.gen"
-#undef GET_FUNCTION_RECOGNIZER
-  return 0;
-}
-
-std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
-  assert(id < num_intrinsics && "Invalid intrinsic ID!");
-  static const char * const Table[] = {
-    "not_intrinsic",
-#define GET_INTRINSIC_NAME_TABLE
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_NAME_TABLE
-  };
-  if (Tys.empty())
-    return Table[id];
-  std::string Result(Table[id]);
-  for (unsigned i = 0; i < Tys.size(); ++i) {
-    if (PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
-      Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + 
-                EVT::getEVT(PTyp->getElementType()).getEVTString();
-    }
-    else if (Tys[i])
-      Result += "." + EVT::getEVT(Tys[i]).getEVTString();
-  }
-  return Result;
-}
-
-
-/// IIT_Info - These are enumerators that describe the entries returned by the
-/// getIntrinsicInfoTableEntries function.
-///
-/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
-enum IIT_Info {
-  // Common values should be encoded with 0-15.
-  IIT_Done = 0,
-  IIT_I1   = 1,
-  IIT_I8   = 2,
-  IIT_I16  = 3,
-  IIT_I32  = 4,
-  IIT_I64  = 5,
-  IIT_F32  = 6,
-  IIT_F64  = 7,
-  IIT_V2   = 8,
-  IIT_V4   = 9,
-  IIT_V8   = 10,
-  IIT_V16  = 11,
-  IIT_V32  = 12,
-  IIT_MMX  = 13,
-  IIT_PTR  = 14,
-  IIT_ARG  = 15,
-  
-  // Values from 16+ are only encodable with the inefficient encoding.
-  IIT_METADATA = 16,
-  IIT_EMPTYSTRUCT = 17,
-  IIT_STRUCT2 = 18,
-  IIT_STRUCT3 = 19,
-  IIT_STRUCT4 = 20,
-  IIT_STRUCT5 = 21,
-  IIT_EXTEND_VEC_ARG = 22,
-  IIT_TRUNC_VEC_ARG = 23,
-  IIT_ANYPTR = 24
-};
-
-
-static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
-                      SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) {
-  IIT_Info Info = IIT_Info(Infos[NextElt++]);
-  unsigned StructElts = 2;
-  using namespace Intrinsic;
-  
-  switch (Info) {
-  case IIT_Done:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0));
-    return;
-  case IIT_MMX:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
-    return;
-  case IIT_METADATA:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0));
-    return;
-  case IIT_F32:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0));
-    return;
-  case IIT_F64:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Double, 0));
-    return;
-  case IIT_I1:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
-    return;
-  case IIT_I8:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
-    return;
-  case IIT_I16:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer,16));
-    return;
-  case IIT_I32:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 32));
-    return;
-  case IIT_I64:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
-    return;
-  case IIT_V2:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  case IIT_V4:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 4));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  case IIT_V8:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 8));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  case IIT_V16:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 16));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  case IIT_V32:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  case IIT_PTR:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  case IIT_ANYPTR: {  // [ANYPTR addrspace, subtype]
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 
-                                             Infos[NextElt++]));
-    DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  }
-  case IIT_ARG: {
-    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
-    return;
-  }
-  case IIT_EXTEND_VEC_ARG: {
-    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::ExtendVecArgument,
-                                             ArgInfo));
-    return;
-  }
-  case IIT_TRUNC_VEC_ARG: {
-    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::TruncVecArgument,
-                                             ArgInfo));
-    return;
-  }
-  case IIT_EMPTYSTRUCT:
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
-    return;
-  case IIT_STRUCT5: ++StructElts; // FALL THROUGH.
-  case IIT_STRUCT4: ++StructElts; // FALL THROUGH.
-  case IIT_STRUCT3: ++StructElts; // FALL THROUGH.
-  case IIT_STRUCT2: {
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts));
-
-    for (unsigned i = 0; i != StructElts; ++i)
-      DecodeIITType(NextElt, Infos, OutputTable);
-    return;
-  }
-  }
-  llvm_unreachable("unhandled");
-}
-
-
-#define GET_INTRINSIC_GENERATOR_GLOBAL
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_GENERATOR_GLOBAL
-
-void Intrinsic::getIntrinsicInfoTableEntries(ID id, 
-                                             SmallVectorImpl<IITDescriptor> &T){
-  // Check to see if the intrinsic's type was expressible by the table.
-  unsigned TableVal = IIT_Table[id-1];
-  
-  // Decode the TableVal into an array of IITValues.
-  SmallVector<unsigned char, 8> IITValues;
-  ArrayRef<unsigned char> IITEntries;
-  unsigned NextElt = 0;
-  if ((TableVal >> 31) != 0) {
-    // This is an offset into the IIT_LongEncodingTable.
-    IITEntries = IIT_LongEncodingTable;
-    
-    // Strip sentinel bit.
-    NextElt = (TableVal << 1) >> 1;
-  } else {
-    // Decode the TableVal into an array of IITValues.  If the entry was encoded
-    // into a single word in the table itself, decode it now.
-    do {
-      IITValues.push_back(TableVal & 0xF);
-      TableVal >>= 4;
-    } while (TableVal);
-    
-    IITEntries = IITValues;
-    NextElt = 0;
-  }
-
-  // Okay, decode the table into the output vector of IITDescriptors.
-  DecodeIITType(NextElt, IITEntries, T);
-  while (NextElt != IITEntries.size() && IITEntries[NextElt] != 0)
-    DecodeIITType(NextElt, IITEntries, T);
-}
-
-
-static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
-                             ArrayRef<Type*> Tys, LLVMContext &Context) {
-  using namespace Intrinsic;
-  IITDescriptor D = Infos.front();
-  Infos = Infos.slice(1);
-  
-  switch (D.Kind) {
-  case IITDescriptor::Void: return Type::getVoidTy(Context);
-  case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
-  case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
-  case IITDescriptor::Float: return Type::getFloatTy(Context);
-  case IITDescriptor::Double: return Type::getDoubleTy(Context);
-      
-  case IITDescriptor::Integer:
-    return IntegerType::get(Context, D.Integer_Width);
-  case IITDescriptor::Vector:
-    return VectorType::get(DecodeFixedType(Infos, Tys, Context),D.Vector_Width);
-  case IITDescriptor::Pointer:
-    return PointerType::get(DecodeFixedType(Infos, Tys, Context),
-                            D.Pointer_AddressSpace);
-  case IITDescriptor::Struct: {
-    Type *Elts[5];
-    assert(D.Struct_NumElements <= 5 && "Can't handle this yet");
-    for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
-      Elts[i] = DecodeFixedType(Infos, Tys, Context);
-    return StructType::get(Context, ArrayRef<Type*>(Elts,D.Struct_NumElements));
-  }
-
-  case IITDescriptor::Argument:
-    return Tys[D.getArgumentNumber()];
-  case IITDescriptor::ExtendVecArgument:
-    return VectorType::getExtendedElementVectorType(cast<VectorType>(
-                                                  Tys[D.getArgumentNumber()]));
-      
-  case IITDescriptor::TruncVecArgument:
-    return VectorType::getTruncatedElementVectorType(cast<VectorType>(
-                                                  Tys[D.getArgumentNumber()]));
-  }
-  llvm_unreachable("unhandled");
-}
-
-
-
-FunctionType *Intrinsic::getType(LLVMContext &Context,
-                                 ID id, ArrayRef<Type*> Tys) {
-  SmallVector<IITDescriptor, 8> Table;
-  getIntrinsicInfoTableEntries(id, Table);
-  
-  ArrayRef<IITDescriptor> TableRef = Table;
-  Type *ResultTy = DecodeFixedType(TableRef, Tys, Context);
-    
-  SmallVector<Type*, 8> ArgTys;
-  while (!TableRef.empty())
-    ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context));
-
-  return FunctionType::get(ResultTy, ArgTys, false); 
-}
-
-bool Intrinsic::isOverloaded(ID id) {
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
-}
-
-/// This defines the "Intrinsic::getAttributes(ID id)" method.
-#define GET_INTRINSIC_ATTRIBUTES
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_ATTRIBUTES
-
-Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
-  // There can never be multiple globals with the same name of different types,
-  // because intrinsics must be a specific type.
-  return
-    cast<Function>(M->getOrInsertFunction(getName(id, Tys),
-                                          getType(M->getContext(), id, Tys)));
-}
-
-// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
-#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-#include "llvm/Intrinsics.gen"
-#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-
-/// hasAddressTaken - returns true if there are any uses of this function
-/// other than direct calls or invokes to it.
-bool Function::hasAddressTaken(const User* *PutOffender) const {
-  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
-    const User *U = *I;
-    if (isa<BlockAddress>(U))
-      continue;
-    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
-      return PutOffender ? (*PutOffender = U, true) : true;
-    ImmutableCallSite CS(cast<Instruction>(U));
-    if (!CS.isCallee(I))
-      return PutOffender ? (*PutOffender = U, true) : true;
-  }
-  return false;
-}
-
-bool Function::isDefTriviallyDead() const {
-  // Check the linkage
-  if (!hasLinkOnceLinkage() && !hasLocalLinkage() &&
-      !hasAvailableExternallyLinkage())
-    return false;
-
-  // Check if the function is used by anything other than a blockaddress.
-  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I)
-    if (!isa<BlockAddress>(*I))
-      return false;
-
-  return true;
-}
-
-/// callsFunctionThatReturnsTwice - Return true if the function has a call to
-/// setjmp or other function that gcc recognizes as "returning twice".
-bool Function::callsFunctionThatReturnsTwice() const {
-  for (const_inst_iterator
-         I = inst_begin(this), E = inst_end(this); I != E; ++I) {
-    const CallInst* callInst = dyn_cast<CallInst>(&*I);
-    if (!callInst)
-      continue;
-    if (callInst->canReturnTwice())
-      return true;
-  }
-
-  return false;
-}
-
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
deleted file mode 100644
index c428b889c3ec..000000000000
--- a/lib/VMCore/Globals.cpp
+++ /dev/null
@@ -1,263 +0,0 @@
-//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the GlobalValue & GlobalVariable classes for the VMCore
-// library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Constants.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LeakDetector.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//                            GlobalValue Class
-//===----------------------------------------------------------------------===//
-
-bool GlobalValue::isMaterializable() const {
-  return getParent() && getParent()->isMaterializable(this);
-}
-bool GlobalValue::isDematerializable() const {
-  return getParent() && getParent()->isDematerializable(this);
-}
-bool GlobalValue::Materialize(std::string *ErrInfo) {
-  return getParent()->Materialize(this, ErrInfo);
-}
-void GlobalValue::Dematerialize() {
-  getParent()->Dematerialize(this);
-}
-
-/// Override destroyConstant to make sure it doesn't get called on
-/// GlobalValue's because they shouldn't be treated like other constants.
-void GlobalValue::destroyConstant() {
-  llvm_unreachable("You can't GV->destroyConstant()!");
-}
-
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a GlobalValue) from the GlobalValue Src to this one.
-void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
-  setAlignment(Src->getAlignment());
-  setSection(Src->getSection());
-  setVisibility(Src->getVisibility());
-  setUnnamedAddr(Src->hasUnnamedAddr());
-}
-
-void GlobalValue::setAlignment(unsigned Align) {
-  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
-  assert(Align <= MaximumAlignment &&
-         "Alignment is greater than MaximumAlignment!");
-  Alignment = Log2_32(Align) + 1;
-  assert(getAlignment() == Align && "Alignment representation error!");
-}
-
-bool GlobalValue::isDeclaration() const {
-  // Globals are definitions if they have an initializer.
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
-    return GV->getNumOperands() == 0;
-
-  // Functions are definitions if they have a body.
-  if (const Function *F = dyn_cast<Function>(this))
-    return F->empty();
-
-  // Aliases are always definitions.
-  assert(isa<GlobalAlias>(this));
-  return false;
-}
-  
-//===----------------------------------------------------------------------===//
-// GlobalVariable Implementation
-//===----------------------------------------------------------------------===//
-
-GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
-                               Constant *InitVal, const Twine &Name,
-                               ThreadLocalMode TLMode, unsigned AddressSpace)
-  : GlobalValue(PointerType::get(Ty, AddressSpace),
-                Value::GlobalVariableVal,
-                OperandTraits<GlobalVariable>::op_begin(this),
-                InitVal != 0, Link, Name),
-    isConstantGlobal(constant), threadLocalMode(TLMode) {
-  if (InitVal) {
-    assert(InitVal->getType() == Ty &&
-           "Initializer should be the same type as the GlobalVariable!");
-    Op<0>() = InitVal;
-  }
-
-  LeakDetector::addGarbageObject(this);
-}
-
-GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
-                               LinkageTypes Link, Constant *InitVal,
-                               const Twine &Name,
-                               GlobalVariable *Before, ThreadLocalMode TLMode,
-                               unsigned AddressSpace)
-  : GlobalValue(PointerType::get(Ty, AddressSpace),
-                Value::GlobalVariableVal,
-                OperandTraits<GlobalVariable>::op_begin(this),
-                InitVal != 0, Link, Name),
-    isConstantGlobal(constant), threadLocalMode(TLMode) {
-  if (InitVal) {
-    assert(InitVal->getType() == Ty &&
-           "Initializer should be the same type as the GlobalVariable!");
-    Op<0>() = InitVal;
-  }
-  
-  LeakDetector::addGarbageObject(this);
-  
-  if (Before)
-    Before->getParent()->getGlobalList().insert(Before, this);
-  else
-    M.getGlobalList().push_back(this);
-}
-
-void GlobalVariable::setParent(Module *parent) {
-  if (getParent())
-    LeakDetector::addGarbageObject(this);
-  Parent = parent;
-  if (getParent())
-    LeakDetector::removeGarbageObject(this);
-}
-
-void GlobalVariable::removeFromParent() {
-  getParent()->getGlobalList().remove(this);
-}
-
-void GlobalVariable::eraseFromParent() {
-  getParent()->getGlobalList().erase(this);
-}
-
-void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                 Use *U) {
-  // If you call this, then you better know this GVar has a constant
-  // initializer worth replacing. Enforce that here.
-  assert(getNumOperands() == 1 &&
-         "Attempt to replace uses of Constants on a GVar with no initializer");
-
-  // And, since you know it has an initializer, the From value better be
-  // the initializer :)
-  assert(getOperand(0) == From &&
-         "Attempt to replace wrong constant initializer in GVar");
-
-  // And, you better have a constant for the replacement value
-  assert(isa<Constant>(To) &&
-         "Attempt to replace GVar initializer with non-constant");
-
-  // Okay, preconditions out of the way, replace the constant initializer.
-  this->setOperand(0, cast<Constant>(To));
-}
-
-void GlobalVariable::setInitializer(Constant *InitVal) {
-  if (InitVal == 0) {
-    if (hasInitializer()) {
-      Op<0>().set(0);
-      NumOperands = 0;
-    }
-  } else {
-    assert(InitVal->getType() == getType()->getElementType() &&
-           "Initializer type must match GlobalVariable type");
-    if (!hasInitializer())
-      NumOperands = 1;
-    Op<0>().set(InitVal);
-  }
-}
-
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a GlobalVariable) from the GlobalVariable Src to this one.
-void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
-  assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
-  GlobalValue::copyAttributesFrom(Src);
-  const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
-  setThreadLocal(SrcVar->isThreadLocal());
-}
-
-
-//===----------------------------------------------------------------------===//
-// GlobalAlias Implementation
-//===----------------------------------------------------------------------===//
-
-GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link,
-                         const Twine &Name, Constant* aliasee,
-                         Module *ParentModule)
-  : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
-  LeakDetector::addGarbageObject(this);
-
-  if (aliasee)
-    assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
-  Op<0>() = aliasee;
-
-  if (ParentModule)
-    ParentModule->getAliasList().push_back(this);
-}
-
-void GlobalAlias::setParent(Module *parent) {
-  if (getParent())
-    LeakDetector::addGarbageObject(this);
-  Parent = parent;
-  if (getParent())
-    LeakDetector::removeGarbageObject(this);
-}
-
-void GlobalAlias::removeFromParent() {
-  getParent()->getAliasList().remove(this);
-}
-
-void GlobalAlias::eraseFromParent() {
-  getParent()->getAliasList().erase(this);
-}
-
-void GlobalAlias::setAliasee(Constant *Aliasee) {
-  assert((!Aliasee || Aliasee->getType() == getType()) &&
-         "Alias and aliasee types should match!");
-  
-  setOperand(0, Aliasee);
-}
-
-const GlobalValue *GlobalAlias::getAliasedGlobal() const {
-  const Constant *C = getAliasee();
-  if (C == 0) return 0;
-  
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    return GV;
-
-  const ConstantExpr *CE = cast<ConstantExpr>(C);
-  assert((CE->getOpcode() == Instruction::BitCast || 
-          CE->getOpcode() == Instruction::GetElementPtr) &&
-         "Unsupported aliasee");
-  
-  return cast<GlobalValue>(CE->getOperand(0));
-}
-
-const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
-  SmallPtrSet<const GlobalValue*, 3> Visited;
-
-  // Check if we need to stop early.
-  if (stopOnWeak && mayBeOverridden())
-    return this;
-
-  const GlobalValue *GV = getAliasedGlobal();
-  Visited.insert(GV);
-
-  // Iterate over aliasing chain, stopping on weak alias if necessary.
-  while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
-    if (stopOnWeak && GA->mayBeOverridden())
-      break;
-
-    GV = GA->getAliasedGlobal();
-
-    if (!Visited.insert(GV))
-      return 0;
-  }
-
-  return GV;
-}
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
deleted file mode 100644
index 04f08fe28e00..000000000000
--- a/lib/VMCore/IRBuilder.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===---- IRBuilder.cpp - Builder for LLVM Instrs -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the IRBuilder class, which is used as a convenient way
-// to create LLVM instructions with a consistent and simplified interface.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-using namespace llvm;
-
-/// CreateGlobalString - Make a new global variable with an initializer that
-/// has array of i8 type filled in with the nul terminated string value
-/// specified.  If Name is specified, it is the name of the global variable
-/// created.
-Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
-  Constant *StrConstant = ConstantDataArray::getString(Context, Str);
-  Module &M = *BB->getParent()->getParent();
-  GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
-                                          true, GlobalValue::PrivateLinkage,
-                                          StrConstant);
-  GV->setName(Name);
-  GV->setUnnamedAddr(true);
-  return GV;
-}
-
-Type *IRBuilderBase::getCurrentFunctionReturnType() const {
-  assert(BB && BB->getParent() && "No current function!");
-  return BB->getParent()->getReturnType();
-}
-
-Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
-  PointerType *PT = cast<PointerType>(Ptr->getType());
-  if (PT->getElementType()->isIntegerTy(8))
-    return Ptr;
-  
-  // Otherwise, we need to insert a bitcast.
-  PT = getInt8PtrTy(PT->getAddressSpace());
-  BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
-  BB->getInstList().insert(InsertPt, BCI);
-  SetInstDebugLocation(BCI);
-  return BCI;
-}
-
-static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
-                                  IRBuilderBase *Builder) {
-  CallInst *CI = CallInst::Create(Callee, Ops, "");
-  Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
-  Builder->SetInstDebugLocation(CI);
-  return CI;  
-}
-
-CallInst *IRBuilderBase::
-CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
-             bool isVolatile, MDNode *TBAATag) {
-  Ptr = getCastedInt8PtrValue(Ptr);
-  Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
-  Type *Tys[] = { Ptr->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
-  
-  CallInst *CI = createCallHelper(TheFn, Ops, this);
-  
-  // Set the TBAA info if present.
-  if (TBAATag)
-    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-  
-  return CI;
-}
-
-CallInst *IRBuilderBase::
-CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
-             bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) {
-  Dst = getCastedInt8PtrValue(Dst);
-  Src = getCastedInt8PtrValue(Src);
-
-  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
-  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
-  
-  CallInst *CI = createCallHelper(TheFn, Ops, this);
-  
-  // Set the TBAA info if present.
-  if (TBAATag)
-    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
-  // Set the TBAA Struct info if present.
-  if (TBAAStructTag)
-    CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
-  
-  return CI;  
-}
-
-CallInst *IRBuilderBase::
-CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
-              bool isVolatile, MDNode *TBAATag) {
-  Dst = getCastedInt8PtrValue(Dst);
-  Src = getCastedInt8PtrValue(Src);
-  
-  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
-  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
-  
-  CallInst *CI = createCallHelper(TheFn, Ops, this);
-  
-  // Set the TBAA info if present.
-  if (TBAATag)
-    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-  
-  return CI;  
-}
-
-CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
-  assert(isa<PointerType>(Ptr->getType()) &&
-         "lifetime.start only applies to pointers.");
-  Ptr = getCastedInt8PtrValue(Ptr);
-  if (!Size)
-    Size = getInt64(-1);
-  else
-    assert(Size->getType() == getInt64Ty() &&
-           "lifetime.start requires the size to be an i64");
-  Value *Ops[] = { Size, Ptr };
-  Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
-  return createCallHelper(TheFn, Ops, this);
-}
-
-CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
-  assert(isa<PointerType>(Ptr->getType()) &&
-         "lifetime.end only applies to pointers.");
-  Ptr = getCastedInt8PtrValue(Ptr);
-  if (!Size)
-    Size = getInt64(-1);
-  else
-    assert(Size->getType() == getInt64Ty() &&
-           "lifetime.end requires the size to be an i64");
-  Value *Ops[] = { Size, Ptr };
-  Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
-  return createCallHelper(TheFn, Ops, this);
-}
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
deleted file mode 100644
index 2e636aacfde8..000000000000
--- a/lib/VMCore/InlineAsm.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the InlineAsm class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InlineAsm.h"
-#include "ConstantsContext.h"
-#include "LLVMContextImpl.h"
-#include "llvm/DerivedTypes.h"
-#include <algorithm>
-#include <cctype>
-using namespace llvm;
-
-// Implement the first virtual method in this class in this file so the
-// InlineAsm vtable is emitted here.
-InlineAsm::~InlineAsm() {
-}
-
-
-InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString,
-                          StringRef Constraints, bool hasSideEffects,
-                          bool isAlignStack, AsmDialect asmDialect) {
-  InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack,
-                       asmDialect);
-  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
-  return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
-}
-
-InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString,
-                     const std::string &constraints, bool hasSideEffects,
-                     bool isAlignStack, AsmDialect asmDialect)
-  : Value(Ty, Value::InlineAsmVal),
-    AsmString(asmString), Constraints(constraints),
-    HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
-    Dialect(asmDialect) {
-
-  // Do various checks on the constraint string and type.
-  assert(Verify(getFunctionType(), constraints) &&
-         "Function type not legal for constraints!");
-}
-
-void InlineAsm::destroyConstant() {
-  getType()->getContext().pImpl->InlineAsms.remove(this);
-  delete this;
-}
-
-FunctionType *InlineAsm::getFunctionType() const {
-  return cast<FunctionType>(getType()->getElementType());
-}
-    
-///Default constructor.
-InlineAsm::ConstraintInfo::ConstraintInfo() :
-  Type(isInput), isEarlyClobber(false),
-  MatchingInput(-1), isCommutative(false),
-  isIndirect(false), isMultipleAlternative(false),
-  currentAlternativeIndex(0) {
-}
-
-/// Copy constructor.
-InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
-  Type(other.Type), isEarlyClobber(other.isEarlyClobber),
-  MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
-  isIndirect(other.isIndirect), Codes(other.Codes),
-  isMultipleAlternative(other.isMultipleAlternative),
-  multipleAlternatives(other.multipleAlternatives),
-  currentAlternativeIndex(other.currentAlternativeIndex) {
-}
-
-/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
-/// fields in this structure.  If the constraint string is not understood,
-/// return true, otherwise return false.
-bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
-                     InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
-  StringRef::iterator I = Str.begin(), E = Str.end();
-  unsigned multipleAlternativeCount = Str.count('|') + 1;
-  unsigned multipleAlternativeIndex = 0;
-  ConstraintCodeVector *pCodes = &Codes;
-  
-  // Initialize
-  isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
-  if (isMultipleAlternative) {
-    multipleAlternatives.resize(multipleAlternativeCount);
-    pCodes = &multipleAlternatives[0].Codes;
-  }
-  Type = isInput;
-  isEarlyClobber = false;
-  MatchingInput = -1;
-  isCommutative = false;
-  isIndirect = false;
-  currentAlternativeIndex = 0;
-  
-  // Parse prefixes.
-  if (*I == '~') {
-    Type = isClobber;
-    ++I;
-  } else if (*I == '=') {
-    ++I;
-    Type = isOutput;
-  }
-  
-  if (*I == '*') {
-    isIndirect = true;
-    ++I;
-  }
-  
-  if (I == E) return true;  // Just a prefix, like "==" or "~".
-  
-  // Parse the modifiers.
-  bool DoneWithModifiers = false;
-  while (!DoneWithModifiers) {
-    switch (*I) {
-    default:
-      DoneWithModifiers = true;
-      break;
-    case '&':     // Early clobber.
-      if (Type != isOutput ||      // Cannot early clobber anything but output.
-          isEarlyClobber)          // Reject &&&&&&
-        return true;
-      isEarlyClobber = true;
-      break;
-    case '%':     // Commutative.
-      if (Type == isClobber ||     // Cannot commute clobbers.
-          isCommutative)           // Reject %%%%%
-        return true;
-      isCommutative = true;
-      break;
-    case '#':     // Comment.
-    case '*':     // Register preferencing.
-      return true;     // Not supported.
-    }
-    
-    if (!DoneWithModifiers) {
-      ++I;
-      if (I == E) return true;   // Just prefixes and modifiers!
-    }
-  }
-  
-  // Parse the various constraints.
-  while (I != E) {
-    if (*I == '{') {   // Physical register reference.
-      // Find the end of the register name.
-      StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
-      if (ConstraintEnd == E) return true;  // "{foo"
-      pCodes->push_back(std::string(I, ConstraintEnd+1));
-      I = ConstraintEnd+1;
-    } else if (isdigit(*I)) {     // Matching Constraint
-      // Maximal munch numbers.
-      StringRef::iterator NumStart = I;
-      while (I != E && isdigit(*I))
-        ++I;
-      pCodes->push_back(std::string(NumStart, I));
-      unsigned N = atoi(pCodes->back().c_str());
-      // Check that this is a valid matching constraint!
-      if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
-          Type != isInput)
-        return true;  // Invalid constraint number.
-      
-      // If Operand N already has a matching input, reject this.  An output
-      // can't be constrained to the same value as multiple inputs.
-      if (isMultipleAlternative) {
-        InlineAsm::SubConstraintInfo &scInfo =
-          ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
-        if (scInfo.MatchingInput != -1)
-          return true;
-        // Note that operand #n has a matching input.
-        scInfo.MatchingInput = ConstraintsSoFar.size();
-      } else {
-        if (ConstraintsSoFar[N].hasMatchingInput())
-          return true;
-        // Note that operand #n has a matching input.
-        ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
-        }
-    } else if (*I == '|') {
-      multipleAlternativeIndex++;
-      pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
-      ++I;
-    } else if (*I == '^') {
-      // Multi-letter constraint
-      // FIXME: For now assuming these are 2-character constraints.
-      pCodes->push_back(std::string(I+1, I+3));
-      I += 3;
-    } else {
-      // Single letter constraint.
-      pCodes->push_back(std::string(I, I+1));
-      ++I;
-    }
-  }
-
-  return false;
-}
-
-/// selectAlternative - Point this constraint to the alternative constraint
-/// indicated by the index.
-void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
-  if (index < multipleAlternatives.size()) {
-    currentAlternativeIndex = index;
-    InlineAsm::SubConstraintInfo &scInfo =
-      multipleAlternatives[currentAlternativeIndex];
-    MatchingInput = scInfo.MatchingInput;
-    Codes = scInfo.Codes;
-  }
-}
-
-InlineAsm::ConstraintInfoVector
-InlineAsm::ParseConstraints(StringRef Constraints) {
-  ConstraintInfoVector Result;
-  
-  // Scan the constraints string.
-  for (StringRef::iterator I = Constraints.begin(),
-         E = Constraints.end(); I != E; ) {
-    ConstraintInfo Info;
-
-    // Find the end of this constraint.
-    StringRef::iterator ConstraintEnd = std::find(I, E, ',');
-
-    if (ConstraintEnd == I ||  // Empty constraint like ",,"
-        Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
-      Result.clear();          // Erroneous constraint?
-      break;
-    }
-
-    Result.push_back(Info);
-    
-    // ConstraintEnd may be either the next comma or the end of the string.  In
-    // the former case, we skip the comma.
-    I = ConstraintEnd;
-    if (I != E) {
-      ++I;
-      if (I == E) { Result.clear(); break; }    // don't allow "xyz,"
-    }
-  }
-  
-  return Result;
-}
-
-/// Verify - Verify that the specified constraint string is reasonable for the
-/// specified function type, and otherwise validate the constraint string.
-bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
-  if (Ty->isVarArg()) return false;
-  
-  ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
-  
-  // Error parsing constraints.
-  if (Constraints.empty() && !ConstStr.empty()) return false;
-  
-  unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
-  unsigned NumIndirect = 0;
-  
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    switch (Constraints[i].Type) {
-    case InlineAsm::isOutput:
-      if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
-        return false;  // outputs before inputs and clobbers.
-      if (!Constraints[i].isIndirect) {
-        ++NumOutputs;
-        break;
-      }
-      ++NumIndirect;
-      // FALLTHROUGH for Indirect Outputs.
-    case InlineAsm::isInput:
-      if (NumClobbers) return false;               // inputs before clobbers.
-      ++NumInputs;
-      break;
-    case InlineAsm::isClobber:
-      ++NumClobbers;
-      break;
-    }
-  }
-  
-  switch (NumOutputs) {
-  case 0:
-    if (!Ty->getReturnType()->isVoidTy()) return false;
-    break;
-  case 1:
-    if (Ty->getReturnType()->isStructTy()) return false;
-    break;
-  default:
-    StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
-    if (STy == 0 || STy->getNumElements() != NumOutputs)
-      return false;
-    break;
-  }      
-  
-  if (Ty->getNumParams() != NumInputs) return false;
-  return true;
-}
-
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
deleted file mode 100644
index 66379a049312..000000000000
--- a/lib/VMCore/Instruction.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-//===-- Instruction.cpp - Implement the Instruction class -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Instruction class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Instruction.h"
-#include "llvm/Type.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/LeakDetector.h"
-using namespace llvm;
-
-Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
-                         Instruction *InsertBefore)
-  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
-  // Make sure that we get added to a basicblock
-  LeakDetector::addGarbageObject(this);
-
-  // If requested, insert this instruction into a basic block...
-  if (InsertBefore) {
-    assert(InsertBefore->getParent() &&
-           "Instruction to insert before is not in a basic block!");
-    InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
-  }
-}
-
-Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
-                         BasicBlock *InsertAtEnd)
-  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
-  // Make sure that we get added to a basicblock
-  LeakDetector::addGarbageObject(this);
-
-  // append this instruction into the basic block
-  assert(InsertAtEnd && "Basic block to append to may not be NULL!");
-  InsertAtEnd->getInstList().push_back(this);
-}
-
-
-// Out of line virtual method, so the vtable, etc has a home.
-Instruction::~Instruction() {
-  assert(Parent == 0 && "Instruction still linked in the program!");
-  if (hasMetadataHashEntry())
-    clearMetadataHashEntries();
-}
-
-
-void Instruction::setParent(BasicBlock *P) {
-  if (getParent()) {
-    if (!P) LeakDetector::addGarbageObject(this);
-  } else {
-    if (P) LeakDetector::removeGarbageObject(this);
-  }
-
-  Parent = P;
-}
-
-void Instruction::removeFromParent() {
-  getParent()->getInstList().remove(this);
-}
-
-void Instruction::eraseFromParent() {
-  getParent()->getInstList().erase(this);
-}
-
-/// insertBefore - Insert an unlinked instructions into a basic block
-/// immediately before the specified instruction.
-void Instruction::insertBefore(Instruction *InsertPos) {
-  InsertPos->getParent()->getInstList().insert(InsertPos, this);
-}
-
-/// insertAfter - Insert an unlinked instructions into a basic block
-/// immediately after the specified instruction.
-void Instruction::insertAfter(Instruction *InsertPos) {
-  InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
-}
-
-/// moveBefore - Unlink this instruction from its current basic block and
-/// insert it into the basic block that MovePos lives in, right before
-/// MovePos.
-void Instruction::moveBefore(Instruction *MovePos) {
-  MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
-                                             this);
-}
-
-
-const char *Instruction::getOpcodeName(unsigned OpCode) {
-  switch (OpCode) {
-  // Terminators
-  case Ret:    return "ret";
-  case Br:     return "br";
-  case Switch: return "switch";
-  case IndirectBr: return "indirectbr";
-  case Invoke: return "invoke";
-  case Resume: return "resume";
-  case Unreachable: return "unreachable";
-
-  // Standard binary operators...
-  case Add: return "add";
-  case FAdd: return "fadd";
-  case Sub: return "sub";
-  case FSub: return "fsub";
-  case Mul: return "mul";
-  case FMul: return "fmul";
-  case UDiv: return "udiv";
-  case SDiv: return "sdiv";
-  case FDiv: return "fdiv";
-  case URem: return "urem";
-  case SRem: return "srem";
-  case FRem: return "frem";
-
-  // Logical operators...
-  case And: return "and";
-  case Or : return "or";
-  case Xor: return "xor";
-
-  // Memory instructions...
-  case Alloca:        return "alloca";
-  case Load:          return "load";
-  case Store:         return "store";
-  case AtomicCmpXchg: return "cmpxchg";
-  case AtomicRMW:     return "atomicrmw";
-  case Fence:         return "fence";
-  case GetElementPtr: return "getelementptr";
-
-  // Convert instructions...
-  case Trunc:     return "trunc";
-  case ZExt:      return "zext";
-  case SExt:      return "sext";
-  case FPTrunc:   return "fptrunc";
-  case FPExt:     return "fpext";
-  case FPToUI:    return "fptoui";
-  case FPToSI:    return "fptosi";
-  case UIToFP:    return "uitofp";
-  case SIToFP:    return "sitofp";
-  case IntToPtr:  return "inttoptr";
-  case PtrToInt:  return "ptrtoint";
-  case BitCast:   return "bitcast";
-
-  // Other instructions...
-  case ICmp:           return "icmp";
-  case FCmp:           return "fcmp";
-  case PHI:            return "phi";
-  case Select:         return "select";
-  case Call:           return "call";
-  case Shl:            return "shl";
-  case LShr:           return "lshr";
-  case AShr:           return "ashr";
-  case VAArg:          return "va_arg";
-  case ExtractElement: return "extractelement";
-  case InsertElement:  return "insertelement";
-  case ShuffleVector:  return "shufflevector";
-  case ExtractValue:   return "extractvalue";
-  case InsertValue:    return "insertvalue";
-  case LandingPad:     return "landingpad";
-
-  default: return "<Invalid operator> ";
-  }
-}
-
-/// isIdenticalTo - Return true if the specified instruction is exactly
-/// identical to the current one.  This means that all operands match and any
-/// extra information (e.g. load is volatile) agree.
-bool Instruction::isIdenticalTo(const Instruction *I) const {
-  return isIdenticalToWhenDefined(I) &&
-         SubclassOptionalData == I->SubclassOptionalData;
-}
-
-/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
-/// ignores the SubclassOptionalData flags, which specify conditions
-/// under which the instruction's result is undefined.
-bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
-  if (getOpcode() != I->getOpcode() ||
-      getNumOperands() != I->getNumOperands() ||
-      getType() != I->getType())
-    return false;
-
-  // We have two instructions of identical opcode and #operands.  Check to see
-  // if all operands are the same.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (getOperand(i) != I->getOperand(i))
-      return false;
-
-  // Check special state that is a part of some instructions.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
-    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
-           LI->getAlignment() == cast<LoadInst>(I)->getAlignment() &&
-           LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
-           LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
-    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
-           SI->getAlignment() == cast<StoreInst>(I)->getAlignment() &&
-           SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
-           SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
-  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
-    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
-  if (const CallInst *CI = dyn_cast<CallInst>(this))
-    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
-           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
-           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
-  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
-    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
-           CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
-    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
-    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
-  if (const FenceInst *FI = dyn_cast<FenceInst>(this))
-    return FI->getOrdering() == cast<FenceInst>(FI)->getOrdering() &&
-           FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope();
-  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
-    return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
-           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
-           CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
-  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
-    return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
-           RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
-           RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
-           RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
-  if (const PHINode *thisPHI = dyn_cast<PHINode>(this)) {
-    const PHINode *otherPHI = cast<PHINode>(I);
-    for (unsigned i = 0, e = thisPHI->getNumOperands(); i != e; ++i) {
-      if (thisPHI->getIncomingBlock(i) != otherPHI->getIncomingBlock(i))
-        return false;
-    }
-    return true;
-  }
-  return true;
-}
-
-// isSameOperationAs
-// This should be kept in sync with isEquivalentOperation in
-// lib/Transforms/IPO/MergeFunctions.cpp.
-bool Instruction::isSameOperationAs(const Instruction *I,
-                                    unsigned flags) const {
-  bool IgnoreAlignment = flags & CompareIgnoringAlignment;
-  bool UseScalarTypes  = flags & CompareUsingScalarTypes;
-
-  if (getOpcode() != I->getOpcode() ||
-      getNumOperands() != I->getNumOperands() ||
-      (UseScalarTypes ?
-       getType()->getScalarType() != I->getType()->getScalarType() :
-       getType() != I->getType()))
-    return false;
-
-  // We have two instructions of identical opcode and #operands.  Check to see
-  // if all operands are the same type
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (UseScalarTypes ?
-        getOperand(i)->getType()->getScalarType() !=
-          I->getOperand(i)->getType()->getScalarType() :
-        getOperand(i)->getType() != I->getOperand(i)->getType())
-      return false;
-
-  // Check special state that is a part of some instructions.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
-    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
-           (LI->getAlignment() == cast<LoadInst>(I)->getAlignment() ||
-            IgnoreAlignment) &&
-           LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
-           LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
-    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
-           (SI->getAlignment() == cast<StoreInst>(I)->getAlignment() ||
-            IgnoreAlignment) &&
-           SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
-           SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
-  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
-    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
-  if (const CallInst *CI = dyn_cast<CallInst>(this))
-    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
-           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
-           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
-  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
-    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
-           CI->getAttributes() ==
-             cast<InvokeInst>(I)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
-    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
-    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
-  if (const FenceInst *FI = dyn_cast<FenceInst>(this))
-    return FI->getOrdering() == cast<FenceInst>(I)->getOrdering() &&
-           FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope();
-  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
-    return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
-           CXI->getOrdering() == cast<AtomicCmpXchgInst>(I)->getOrdering() &&
-           CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
-  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
-    return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
-           RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
-           RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
-           RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
-
-  return true;
-}
-
-/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
-/// specified block.  Note that PHI nodes are considered to evaluate their
-/// operands in the corresponding predecessor block.
-bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
-  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
-    // PHI nodes uses values in the corresponding predecessor block.  For other
-    // instructions, just check to see whether the parent of the use matches up.
-    const User *U = *UI;
-    const PHINode *PN = dyn_cast<PHINode>(U);
-    if (PN == 0) {
-      if (cast<Instruction>(U)->getParent() != BB)
-        return true;
-      continue;
-    }
-
-    if (PN->getIncomingBlock(UI) != BB)
-      return true;
-  }
-  return false;
-}
-
-/// mayReadFromMemory - Return true if this instruction may read memory.
-///
-bool Instruction::mayReadFromMemory() const {
-  switch (getOpcode()) {
-  default: return false;
-  case Instruction::VAArg:
-  case Instruction::Load:
-  case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory
-  case Instruction::AtomicCmpXchg:
-  case Instruction::AtomicRMW:
-    return true;
-  case Instruction::Call:
-    return !cast<CallInst>(this)->doesNotAccessMemory();
-  case Instruction::Invoke:
-    return !cast<InvokeInst>(this)->doesNotAccessMemory();
-  case Instruction::Store:
-    return !cast<StoreInst>(this)->isUnordered();
-  }
-}
-
-/// mayWriteToMemory - Return true if this instruction may modify memory.
-///
-bool Instruction::mayWriteToMemory() const {
-  switch (getOpcode()) {
-  default: return false;
-  case Instruction::Fence: // FIXME: refine definition of mayWriteToMemory
-  case Instruction::Store:
-  case Instruction::VAArg:
-  case Instruction::AtomicCmpXchg:
-  case Instruction::AtomicRMW:
-    return true;
-  case Instruction::Call:
-    return !cast<CallInst>(this)->onlyReadsMemory();
-  case Instruction::Invoke:
-    return !cast<InvokeInst>(this)->onlyReadsMemory();
-  case Instruction::Load:
-    return !cast<LoadInst>(this)->isUnordered();
-  }
-}
-
-/// mayThrow - Return true if this instruction may throw an exception.
-///
-bool Instruction::mayThrow() const {
-  if (const CallInst *CI = dyn_cast<CallInst>(this))
-    return !CI->doesNotThrow();
-  return isa<ResumeInst>(this);
-}
-
-/// isAssociative - Return true if the instruction is associative:
-///
-///   Associative operators satisfy:  x op (y op z) === (x op y) op z
-///
-/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
-///
-bool Instruction::isAssociative(unsigned Opcode) {
-  return Opcode == And || Opcode == Or || Opcode == Xor ||
-         Opcode == Add || Opcode == Mul;
-}
-
-/// isCommutative - Return true if the instruction is commutative:
-///
-///   Commutative operators satisfy: (x op y) === (y op x)
-///
-/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
-/// applied to any type.
-///
-bool Instruction::isCommutative(unsigned op) {
-  switch (op) {
-  case Add:
-  case FAdd:
-  case Mul:
-  case FMul:
-  case And:
-  case Or:
-  case Xor:
-    return true;
-  default:
-    return false;
-  }
-}
-
-/// isIdempotent - Return true if the instruction is idempotent:
-///
-///   Idempotent operators satisfy:  x op x === x
-///
-/// In LLVM, the And and Or operators are idempotent.
-///
-bool Instruction::isIdempotent(unsigned Opcode) {
-  return Opcode == And || Opcode == Or;
-}
-
-/// isNilpotent - Return true if the instruction is nilpotent:
-///
-///   Nilpotent operators satisfy:  x op x === Id,
-///
-///   where Id is the identity for the operator, i.e. a constant such that
-///     x op Id === x and Id op x === x for all x.
-///
-/// In LLVM, the Xor operator is nilpotent.
-///
-bool Instruction::isNilpotent(unsigned Opcode) {
-  return Opcode == Xor;
-}
-
-Instruction *Instruction::clone() const {
-  Instruction *New = clone_impl();
-  New->SubclassOptionalData = SubclassOptionalData;
-  if (!hasMetadata())
-    return New;
-  
-  // Otherwise, enumerate and copy over metadata from the old instruction to the
-  // new one.
-  SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
-  getAllMetadataOtherThanDebugLoc(TheMDs);
-  for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
-    New->setMetadata(TheMDs[i].first, TheMDs[i].second);
-  
-  New->setDebugLoc(getDebugLoc());
-  return New;
-}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
deleted file mode 100644
index 94bd2a15632d..000000000000
--- a/lib/VMCore/Instructions.cpp
+++ /dev/null
@@ -1,3544 +0,0 @@
-//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements all of the non-inline methods for the LLVM instruction
-// classes.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMContextImpl.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//                            CallSite Class
-//===----------------------------------------------------------------------===//
-
-User::op_iterator CallSite::getCallee() const {
-  Instruction *II(getInstruction());
-  return isCall()
-    ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
-    : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
-}
-
-//===----------------------------------------------------------------------===//
-//                            TerminatorInst Class
-//===----------------------------------------------------------------------===//
-
-// Out of line virtual method, so the vtable, etc has a home.
-TerminatorInst::~TerminatorInst() {
-}
-
-//===----------------------------------------------------------------------===//
-//                           UnaryInstruction Class
-//===----------------------------------------------------------------------===//
-
-// Out of line virtual method, so the vtable, etc has a home.
-UnaryInstruction::~UnaryInstruction() {
-}
-
-//===----------------------------------------------------------------------===//
-//                              SelectInst Class
-//===----------------------------------------------------------------------===//
-
-/// areInvalidOperands - Return a string if the specified operands are invalid
-/// for a select operation, otherwise return null.
-const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
-  if (Op1->getType() != Op2->getType())
-    return "both values to select must have same type";
-  
-  if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
-    // Vector select.
-    if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
-      return "vector select condition element type must be i1";
-    VectorType *ET = dyn_cast<VectorType>(Op1->getType());
-    if (ET == 0)
-      return "selected values for vector select must be vectors";
-    if (ET->getNumElements() != VT->getNumElements())
-      return "vector select requires selected vectors to have "
-                   "the same vector length as select condition";
-  } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
-    return "select condition must be i1 or <n x i1>";
-  }
-  return 0;
-}
-
-
-//===----------------------------------------------------------------------===//
-//                               PHINode Class
-//===----------------------------------------------------------------------===//
-
-PHINode::PHINode(const PHINode &PN)
-  : Instruction(PN.getType(), Instruction::PHI,
-                allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
-    ReservedSpace(PN.getNumOperands()) {
-  std::copy(PN.op_begin(), PN.op_end(), op_begin());
-  std::copy(PN.block_begin(), PN.block_end(), block_begin());
-  SubclassOptionalData = PN.SubclassOptionalData;
-}
-
-PHINode::~PHINode() {
-  dropHungoffUses();
-}
-
-Use *PHINode::allocHungoffUses(unsigned N) const {
-  // Allocate the array of Uses of the incoming values, followed by a pointer
-  // (with bottom bit set) to the User, followed by the array of pointers to
-  // the incoming basic blocks.
-  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
-    + N * sizeof(BasicBlock*);
-  Use *Begin = static_cast<Use*>(::operator new(size));
-  Use *End = Begin + N;
-  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
-  return Use::initTags(Begin, End);
-}
-
-// removeIncomingValue - Remove an incoming value.  This is useful if a
-// predecessor basic block is deleted.
-Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
-  Value *Removed = getIncomingValue(Idx);
-
-  // Move everything after this operand down.
-  //
-  // FIXME: we could just swap with the end of the list, then erase.  However,
-  // clients might not expect this to happen.  The code as it is thrashes the
-  // use/def lists, which is kinda lame.
-  std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
-  std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
-
-  // Nuke the last value.
-  Op<-1>().set(0);
-  --NumOperands;
-
-  // If the PHI node is dead, because it has zero entries, nuke it now.
-  if (getNumOperands() == 0 && DeletePHIIfEmpty) {
-    // If anyone is using this PHI, make them use a dummy value instead...
-    replaceAllUsesWith(UndefValue::get(getType()));
-    eraseFromParent();
-  }
-  return Removed;
-}
-
-/// growOperands - grow operands - This grows the operand list in response
-/// to a push_back style of operation.  This grows the number of ops by 1.5
-/// times.
-///
-void PHINode::growOperands() {
-  unsigned e = getNumOperands();
-  unsigned NumOps = e + e / 2;
-  if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
-
-  Use *OldOps = op_begin();
-  BasicBlock **OldBlocks = block_begin();
-
-  ReservedSpace = NumOps;
-  OperandList = allocHungoffUses(ReservedSpace);
-
-  std::copy(OldOps, OldOps + e, op_begin());
-  std::copy(OldBlocks, OldBlocks + e, block_begin());
-
-  Use::zap(OldOps, OldOps + e, true);
-}
-
-/// hasConstantValue - If the specified PHI node always merges together the same
-/// value, return the value, otherwise return null.
-Value *PHINode::hasConstantValue() const {
-  // Exploit the fact that phi nodes always have at least one entry.
-  Value *ConstantValue = getIncomingValue(0);
-  for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
-    if (getIncomingValue(i) != ConstantValue && getIncomingValue(i) != this) {
-      if (ConstantValue != this)
-        return 0; // Incoming values not all the same.
-       // The case where the first value is this PHI.
-      ConstantValue = getIncomingValue(i);
-    }
-  if (ConstantValue == this)
-    return UndefValue::get(getType());
-  return ConstantValue;
-}
-
-//===----------------------------------------------------------------------===//
-//                       LandingPadInst Implementation
-//===----------------------------------------------------------------------===//
-
-LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                               unsigned NumReservedValues, const Twine &NameStr,
-                               Instruction *InsertBefore)
-  : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) {
-  init(PersonalityFn, 1 + NumReservedValues, NameStr);
-}
-
-LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
-                               unsigned NumReservedValues, const Twine &NameStr,
-                               BasicBlock *InsertAtEnd)
-  : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) {
-  init(PersonalityFn, 1 + NumReservedValues, NameStr);
-}
-
-LandingPadInst::LandingPadInst(const LandingPadInst &LP)
-  : Instruction(LP.getType(), Instruction::LandingPad,
-                allocHungoffUses(LP.getNumOperands()), LP.getNumOperands()),
-    ReservedSpace(LP.getNumOperands()) {
-  Use *OL = OperandList, *InOL = LP.OperandList;
-  for (unsigned I = 0, E = ReservedSpace; I != E; ++I)
-    OL[I] = InOL[I];
-
-  setCleanup(LP.isCleanup());
-}
-
-LandingPadInst::~LandingPadInst() {
-  dropHungoffUses();
-}
-
-LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
-                                       unsigned NumReservedClauses,
-                                       const Twine &NameStr,
-                                       Instruction *InsertBefore) {
-  return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
-                            InsertBefore);
-}
-
-LandingPadInst *LandingPadInst::Create(Type *RetTy, Value *PersonalityFn,
-                                       unsigned NumReservedClauses,
-                                       const Twine &NameStr,
-                                       BasicBlock *InsertAtEnd) {
-  return new LandingPadInst(RetTy, PersonalityFn, NumReservedClauses, NameStr,
-                            InsertAtEnd);
-}
-
-void LandingPadInst::init(Value *PersFn, unsigned NumReservedValues,
-                          const Twine &NameStr) {
-  ReservedSpace = NumReservedValues;
-  NumOperands = 1;
-  OperandList = allocHungoffUses(ReservedSpace);
-  OperandList[0] = PersFn;
-  setName(NameStr);
-  setCleanup(false);
-}
-
-/// growOperands - grow operands - This grows the operand list in response to a
-/// push_back style of operation. This grows the number of ops by 2 times.
-void LandingPadInst::growOperands(unsigned Size) {
-  unsigned e = getNumOperands();
-  if (ReservedSpace >= e + Size) return;
-  ReservedSpace = (e + Size / 2) * 2;
-
-  Use *NewOps = allocHungoffUses(ReservedSpace);
-  Use *OldOps = OperandList;
-  for (unsigned i = 0; i != e; ++i)
-      NewOps[i] = OldOps[i];
-
-  OperandList = NewOps;
-  Use::zap(OldOps, OldOps + e, true);
-}
-
-void LandingPadInst::addClause(Value *Val) {
-  unsigned OpNo = getNumOperands();
-  growOperands(1);
-  assert(OpNo < ReservedSpace && "Growing didn't work!");
-  ++NumOperands;
-  OperandList[OpNo] = Val;
-}
-
-//===----------------------------------------------------------------------===//
-//                        CallInst Implementation
-//===----------------------------------------------------------------------===//
-
-CallInst::~CallInst() {
-}
-
-void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
-  assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
-  Op<-1>() = Func;
-
-#ifndef NDEBUG
-  FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-
-  assert((Args.size() == FTy->getNumParams() ||
-          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
-         "Calling a function with bad signature!");
-
-  for (unsigned i = 0; i != Args.size(); ++i)
-    assert((i >= FTy->getNumParams() || 
-            FTy->getParamType(i) == Args[i]->getType()) &&
-           "Calling a function with a bad signature!");
-#endif
-
-  std::copy(Args.begin(), Args.end(), op_begin());
-  setName(NameStr);
-}
-
-void CallInst::init(Value *Func, const Twine &NameStr) {
-  assert(NumOperands == 1 && "NumOperands not set up?");
-  Op<-1>() = Func;
-
-#ifndef NDEBUG
-  FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-
-  assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
-#endif
-
-  setName(NameStr);
-}
-
-CallInst::CallInst(Value *Func, const Twine &Name,
-                   Instruction *InsertBefore)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - 1,
-                1, InsertBefore) {
-  init(Func, Name);
-}
-
-CallInst::CallInst(Value *Func, const Twine &Name,
-                   BasicBlock *InsertAtEnd)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - 1,
-                1, InsertAtEnd) {
-  init(Func, Name);
-}
-
-CallInst::CallInst(const CallInst &CI)
-  : Instruction(CI.getType(), Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
-                CI.getNumOperands()) {
-  setAttributes(CI.getAttributes());
-  setTailCall(CI.isTailCall());
-  setCallingConv(CI.getCallingConv());
-    
-  std::copy(CI.op_begin(), CI.op_end(), op_begin());
-  SubclassOptionalData = CI.SubclassOptionalData;
-}
-
-void CallInst::addAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(getContext(), i, attr);
-  setAttributes(PAL);
-}
-
-void CallInst::removeAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(getContext(), i, attr);
-  setAttributes(PAL);
-}
-
-bool CallInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex)
-      .hasAttribute(A))
-    return true;
-  if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
-  return false;
-}
-
-bool CallInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(i).hasAttribute(A))
-    return true;
-  if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(i).hasAttribute(A);
-  return false;
-}
-
-/// IsConstantOne - Return true only if val is constant int 1
-static bool IsConstantOne(Value *val) {
-  assert(val && "IsConstantOne does not work with NULL val");
-  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
-}
-
-static Instruction *createMalloc(Instruction *InsertBefore,
-                                 BasicBlock *InsertAtEnd, Type *IntPtrTy,
-                                 Type *AllocTy, Value *AllocSize, 
-                                 Value *ArraySize, Function *MallocF,
-                                 const Twine &Name) {
-  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
-         "createMalloc needs either InsertBefore or InsertAtEnd");
-
-  // malloc(type) becomes: 
-  //       bitcast (i8* malloc(typeSize)) to type*
-  // malloc(type, arraySize) becomes:
-  //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
-  if (!ArraySize)
-    ArraySize = ConstantInt::get(IntPtrTy, 1);
-  else if (ArraySize->getType() != IntPtrTy) {
-    if (InsertBefore)
-      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
-                                              "", InsertBefore);
-    else
-      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
-                                              "", InsertAtEnd);
-  }
-
-  if (!IsConstantOne(ArraySize)) {
-    if (IsConstantOne(AllocSize)) {
-      AllocSize = ArraySize;         // Operand * 1 = Operand
-    } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
-      Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
-                                                     false /*ZExt*/);
-      // Malloc arg is constant product of type size and array size
-      AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
-    } else {
-      // Multiply type size by the array size...
-      if (InsertBefore)
-        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
-                                              "mallocsize", InsertBefore);
-      else
-        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
-                                              "mallocsize", InsertAtEnd);
-    }
-  }
-
-  assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
-  // Create the call to Malloc.
-  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
-  Module* M = BB->getParent()->getParent();
-  Type *BPTy = Type::getInt8PtrTy(BB->getContext());
-  Value *MallocFunc = MallocF;
-  if (!MallocFunc)
-    // prototype malloc as "void *malloc(size_t)"
-    MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
-  PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
-  CallInst *MCall = NULL;
-  Instruction *Result = NULL;
-  if (InsertBefore) {
-    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
-    Result = MCall;
-    if (Result->getType() != AllocPtrType)
-      // Create a cast instruction to convert to the right type...
-      Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
-  } else {
-    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
-    Result = MCall;
-    if (Result->getType() != AllocPtrType) {
-      InsertAtEnd->getInstList().push_back(MCall);
-      // Create a cast instruction to convert to the right type...
-      Result = new BitCastInst(MCall, AllocPtrType, Name);
-    }
-  }
-  MCall->setTailCall();
-  if (Function *F = dyn_cast<Function>(MallocFunc)) {
-    MCall->setCallingConv(F->getCallingConv());
-    if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
-  }
-  assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
-
-  return Result;
-}
-
-/// CreateMalloc - Generate the IR for a call to malloc:
-/// 1. Compute the malloc call's argument as the specified type's size,
-///    possibly multiplied by the array size if the array size is not
-///    constant 1.
-/// 2. Call malloc with that argument.
-/// 3. Bitcast the result of the malloc call to the specified type.
-Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
-                                    Type *IntPtrTy, Type *AllocTy,
-                                    Value *AllocSize, Value *ArraySize,
-                                    Function * MallocF,
-                                    const Twine &Name) {
-  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
-                      ArraySize, MallocF, Name);
-}
-
-/// CreateMalloc - Generate the IR for a call to malloc:
-/// 1. Compute the malloc call's argument as the specified type's size,
-///    possibly multiplied by the array size if the array size is not
-///    constant 1.
-/// 2. Call malloc with that argument.
-/// 3. Bitcast the result of the malloc call to the specified type.
-/// Note: This function does not add the bitcast to the basic block, that is the
-/// responsibility of the caller.
-Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
-                                    Type *IntPtrTy, Type *AllocTy,
-                                    Value *AllocSize, Value *ArraySize, 
-                                    Function *MallocF, const Twine &Name) {
-  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
-                      ArraySize, MallocF, Name);
-}
-
-static Instruction* createFree(Value* Source, Instruction *InsertBefore,
-                               BasicBlock *InsertAtEnd) {
-  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
-         "createFree needs either InsertBefore or InsertAtEnd");
-  assert(Source->getType()->isPointerTy() &&
-         "Can not free something of nonpointer type!");
-
-  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
-  Module* M = BB->getParent()->getParent();
-
-  Type *VoidTy = Type::getVoidTy(M->getContext());
-  Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
-  // prototype free as "void free(void*)"
-  Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
-  CallInst* Result = NULL;
-  Value *PtrCast = Source;
-  if (InsertBefore) {
-    if (Source->getType() != IntPtrTy)
-      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
-    Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
-  } else {
-    if (Source->getType() != IntPtrTy)
-      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
-    Result = CallInst::Create(FreeFunc, PtrCast, "");
-  }
-  Result->setTailCall();
-  if (Function *F = dyn_cast<Function>(FreeFunc))
-    Result->setCallingConv(F->getCallingConv());
-
-  return Result;
-}
-
-/// CreateFree - Generate the IR for a call to the builtin free function.
-Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
-  return createFree(Source, InsertBefore, NULL);
-}
-
-/// CreateFree - Generate the IR for a call to the builtin free function.
-/// Note: This function does not add the call to the basic block, that is the
-/// responsibility of the caller.
-Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
-  Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
-  assert(FreeCall && "CreateFree did not create a CallInst");
-  return FreeCall;
-}
-
-//===----------------------------------------------------------------------===//
-//                        InvokeInst Implementation
-//===----------------------------------------------------------------------===//
-
-void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
-                      ArrayRef<Value *> Args, const Twine &NameStr) {
-  assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
-  Op<-3>() = Fn;
-  Op<-2>() = IfNormal;
-  Op<-1>() = IfException;
-
-#ifndef NDEBUG
-  FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
-
-  assert(((Args.size() == FTy->getNumParams()) ||
-          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
-         "Invoking a function with bad signature");
-
-  for (unsigned i = 0, e = Args.size(); i != e; i++)
-    assert((i >= FTy->getNumParams() || 
-            FTy->getParamType(i) == Args[i]->getType()) &&
-           "Invoking a function with a bad signature!");
-#endif
-
-  std::copy(Args.begin(), Args.end(), op_begin());
-  setName(NameStr);
-}
-
-InvokeInst::InvokeInst(const InvokeInst &II)
-  : TerminatorInst(II.getType(), Instruction::Invoke,
-                   OperandTraits<InvokeInst>::op_end(this)
-                   - II.getNumOperands(),
-                   II.getNumOperands()) {
-  setAttributes(II.getAttributes());
-  setCallingConv(II.getCallingConv());
-  std::copy(II.op_begin(), II.op_end(), op_begin());
-  SubclassOptionalData = II.SubclassOptionalData;
-}
-
-BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-unsigned InvokeInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  return setSuccessor(idx, B);
-}
-
-bool InvokeInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex).
-      hasAttribute(A))
-    return true;
-  if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
-  return false;
-}
-
-bool InvokeInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(i).hasAttribute(A))
-    return true;
-  if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(i).hasAttribute(A);
-  return false;
-}
-
-void InvokeInst::addAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(getContext(), i, attr);
-  setAttributes(PAL);
-}
-
-void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(getContext(), i, attr);
-  setAttributes(PAL);
-}
-
-LandingPadInst *InvokeInst::getLandingPadInst() const {
-  return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
-}
-
-//===----------------------------------------------------------------------===//
-//                        ReturnInst Implementation
-//===----------------------------------------------------------------------===//
-
-ReturnInst::ReturnInst(const ReturnInst &RI)
-  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this) -
-                     RI.getNumOperands(),
-                   RI.getNumOperands()) {
-  if (RI.getNumOperands())
-    Op<0>() = RI.Op<0>();
-  SubclassOptionalData = RI.SubclassOptionalData;
-}
-
-ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
-                   InsertBefore) {
-  if (retVal)
-    Op<0>() = retVal;
-}
-ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
-                   InsertAtEnd) {
-  if (retVal)
-    Op<0>() = retVal;
-}
-ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
-                   OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
-}
-
-unsigned ReturnInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
-/// emit the vtable for the class in this translation unit.
-void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("ReturnInst has no successors!");
-}
-
-BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("ReturnInst has no successors!");
-}
-
-ReturnInst::~ReturnInst() {
-}
-
-//===----------------------------------------------------------------------===//
-//                        ResumeInst Implementation
-//===----------------------------------------------------------------------===//
-
-ResumeInst::ResumeInst(const ResumeInst &RI)
-  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Resume,
-                   OperandTraits<ResumeInst>::op_begin(this), 1) {
-  Op<0>() = RI.Op<0>();
-}
-
-ResumeInst::ResumeInst(Value *Exn, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
-                   OperandTraits<ResumeInst>::op_begin(this), 1, InsertBefore) {
-  Op<0>() = Exn;
-}
-
-ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Exn->getContext()), Instruction::Resume,
-                   OperandTraits<ResumeInst>::op_begin(this), 1, InsertAtEnd) {
-  Op<0>() = Exn;
-}
-
-unsigned ResumeInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("ResumeInst has no successors!");
-}
-
-BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("ResumeInst has no successors!");
-}
-
-//===----------------------------------------------------------------------===//
-//                      UnreachableInst Implementation
-//===----------------------------------------------------------------------===//
-
-UnreachableInst::UnreachableInst(LLVMContext &Context, 
-                                 Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
-                   0, 0, InsertBefore) {
-}
-UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
-                   0, 0, InsertAtEnd) {
-}
-
-unsigned UnreachableInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("UnreachableInst has no successors!");
-}
-
-BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("UnreachableInst has no successors!");
-}
-
-//===----------------------------------------------------------------------===//
-//                        BranchInst Implementation
-//===----------------------------------------------------------------------===//
-
-void BranchInst::AssertOK() {
-  if (isConditional())
-    assert(getCondition()->getType()->isIntegerTy(1) &&
-           "May only branch on boolean predicates!");
-}
-
-BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 1,
-                   1, InsertBefore) {
-  assert(IfTrue != 0 && "Branch destination may not be null!");
-  Op<-1>() = IfTrue;
-}
-BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
-                       Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 3,
-                   3, InsertBefore) {
-  Op<-1>() = IfTrue;
-  Op<-2>() = IfFalse;
-  Op<-3>() = Cond;
-#ifndef NDEBUG
-  AssertOK();
-#endif
-}
-
-BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 1,
-                   1, InsertAtEnd) {
-  assert(IfTrue != 0 && "Branch destination may not be null!");
-  Op<-1>() = IfTrue;
-}
-
-BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
-           BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
-                   OperandTraits<BranchInst>::op_end(this) - 3,
-                   3, InsertAtEnd) {
-  Op<-1>() = IfTrue;
-  Op<-2>() = IfFalse;
-  Op<-3>() = Cond;
-#ifndef NDEBUG
-  AssertOK();
-#endif
-}
-
-
-BranchInst::BranchInst(const BranchInst &BI) :
-  TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
-                 OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
-                 BI.getNumOperands()) {
-  Op<-1>() = BI.Op<-1>();
-  if (BI.getNumOperands() != 1) {
-    assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
-    Op<-3>() = BI.Op<-3>();
-    Op<-2>() = BI.Op<-2>();
-  }
-  SubclassOptionalData = BI.SubclassOptionalData;
-}
-
-void BranchInst::swapSuccessors() {
-  assert(isConditional() &&
-         "Cannot swap successors of an unconditional branch");
-  Op<-1>().swap(Op<-2>());
-
-  // Update profile metadata if present and it matches our structural
-  // expectations.
-  MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
-  if (!ProfileData || ProfileData->getNumOperands() != 3)
-    return;
-
-  // The first operand is the name. Fetch them backwards and build a new one.
-  Value *Ops[] = {
-    ProfileData->getOperand(0),
-    ProfileData->getOperand(2),
-    ProfileData->getOperand(1)
-  };
-  setMetadata(LLVMContext::MD_prof,
-              MDNode::get(ProfileData->getContext(), Ops));
-}
-
-BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-unsigned BranchInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
-
-//===----------------------------------------------------------------------===//
-//                        AllocaInst Implementation
-//===----------------------------------------------------------------------===//
-
-static Value *getAISize(LLVMContext &Context, Value *Amt) {
-  if (!Amt)
-    Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
-  else {
-    assert(!isa<BasicBlock>(Amt) &&
-           "Passed basic block into allocation size parameter! Use other ctor");
-    assert(Amt->getType()->isIntegerTy() &&
-           "Allocation array size is not an integer!");
-  }
-  return Amt;
-}
-
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
-                       const Twine &Name, Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
-
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
-                       const Twine &Name, BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
-
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
-                       Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), 0), InsertBefore) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
-
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
-                       BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), 0), InsertAtEnd) {
-  setAlignment(0);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
-
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
-                       const Twine &Name, Instruction *InsertBefore)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
-  setAlignment(Align);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
-
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
-                       const Twine &Name, BasicBlock *InsertAtEnd)
-  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
-                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
-  setAlignment(Align);
-  assert(!Ty->isVoidTy() && "Cannot allocate void!");
-  setName(Name);
-}
-
-// Out of line virtual method, so the vtable, etc has a home.
-AllocaInst::~AllocaInst() {
-}
-
-void AllocaInst::setAlignment(unsigned Align) {
-  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
-  assert(Align <= MaximumAlignment &&
-         "Alignment is greater than MaximumAlignment!");
-  setInstructionSubclassData(Log2_32(Align) + 1);
-  assert(getAlignment() == Align && "Alignment representation error!");
-}
-
-bool AllocaInst::isArrayAllocation() const {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
-    return !CI->isOne();
-  return true;
-}
-
-Type *AllocaInst::getAllocatedType() const {
-  return getType()->getElementType();
-}
-
-/// isStaticAlloca - Return true if this alloca is in the entry block of the
-/// function and is a constant size.  If so, the code generator will fold it
-/// into the prolog/epilog code, so it is basically free.
-bool AllocaInst::isStaticAlloca() const {
-  // Must be constant size.
-  if (!isa<ConstantInt>(getArraySize())) return false;
-  
-  // Must be in the entry block.
-  const BasicBlock *Parent = getParent();
-  return Parent == &Parent->getParent()->front();
-}
-
-//===----------------------------------------------------------------------===//
-//                           LoadInst Implementation
-//===----------------------------------------------------------------------===//
-
-void LoadInst::AssertOK() {
-  assert(getOperand(0)->getType()->isPointerTy() &&
-         "Ptr must have pointer type.");
-  assert(!(isAtomic() && getAlignment() == 0) &&
-         "Alignment required for atomic load");
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
-                   Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
-                   BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
-                   unsigned Align, Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
-                   unsigned Align, BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
-                   unsigned Align, AtomicOrdering Order,
-                   SynchronizationScope SynchScope,
-                   Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(Order, SynchScope);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
-                   unsigned Align, AtomicOrdering Order,
-                   SynchronizationScope SynchScope,
-                   BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(Order, SynchScope);
-  AssertOK();
-  setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
-                   Instruction *InsertBef)
-: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                   Load, Ptr, InsertBef) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
-                   BasicBlock *InsertAE)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertAE) {
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-  if (Name && Name[0]) setName(Name);
-}
-
-void LoadInst::setAlignment(unsigned Align) {
-  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
-  assert(Align <= MaximumAlignment &&
-         "Alignment is greater than MaximumAlignment!");
-  setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
-                             ((Log2_32(Align)+1)<<1));
-  assert(getAlignment() == Align && "Alignment representation error!");
-}
-
-//===----------------------------------------------------------------------===//
-//                           StoreInst Implementation
-//===----------------------------------------------------------------------===//
-
-void StoreInst::AssertOK() {
-  assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
-  assert(getOperand(1)->getType()->isPointerTy() &&
-         "Ptr must have pointer type!");
-  assert(getOperand(0)->getType() ==
-                 cast<PointerType>(getOperand(1)->getType())->getElementType()
-         && "Ptr must be a pointer to Val type!");
-  assert(!(isAtomic() && getAlignment() == 0) &&
-         "Alignment required for atomic load");
-}
-
-
-StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(false);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     unsigned Align, Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     unsigned Align, AtomicOrdering Order,
-                     SynchronizationScope SynchScope,
-                     Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertBefore) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(Order, SynchScope);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(0);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     unsigned Align, BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(NotAtomic);
-  AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
-                     unsigned Align, AtomicOrdering Order,
-                     SynchronizationScope SynchScope,
-                     BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(val->getContext()), Store,
-                OperandTraits<StoreInst>::op_begin(this),
-                OperandTraits<StoreInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = val;
-  Op<1>() = addr;
-  setVolatile(isVolatile);
-  setAlignment(Align);
-  setAtomic(Order, SynchScope);
-  AssertOK();
-}
-
-void StoreInst::setAlignment(unsigned Align) {
-  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
-  assert(Align <= MaximumAlignment &&
-         "Alignment is greater than MaximumAlignment!");
-  setInstructionSubclassData((getSubclassDataFromInstruction() & ~(31 << 1)) |
-                             ((Log2_32(Align)+1) << 1));
-  assert(getAlignment() == Align && "Alignment representation error!");
-}
-
-//===----------------------------------------------------------------------===//
-//                       AtomicCmpXchgInst Implementation
-//===----------------------------------------------------------------------===//
-
-void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
-                             AtomicOrdering Ordering,
-                             SynchronizationScope SynchScope) {
-  Op<0>() = Ptr;
-  Op<1>() = Cmp;
-  Op<2>() = NewVal;
-  setOrdering(Ordering);
-  setSynchScope(SynchScope);
-
-  assert(getOperand(0) && getOperand(1) && getOperand(2) &&
-         "All operands must be non-null!");
-  assert(getOperand(0)->getType()->isPointerTy() &&
-         "Ptr must have pointer type!");
-  assert(getOperand(1)->getType() ==
-                 cast<PointerType>(getOperand(0)->getType())->getElementType()
-         && "Ptr must be a pointer to Cmp type!");
-  assert(getOperand(2)->getType() ==
-                 cast<PointerType>(getOperand(0)->getType())->getElementType()
-         && "Ptr must be a pointer to NewVal type!");
-  assert(Ordering != NotAtomic &&
-         "AtomicCmpXchg instructions must be atomic!");
-}
-
-AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                                     AtomicOrdering Ordering,
-                                     SynchronizationScope SynchScope,
-                                     Instruction *InsertBefore)
-  : Instruction(Cmp->getType(), AtomicCmpXchg,
-                OperandTraits<AtomicCmpXchgInst>::op_begin(this),
-                OperandTraits<AtomicCmpXchgInst>::operands(this),
-                InsertBefore) {
-  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
-}
-
-AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
-                                     AtomicOrdering Ordering,
-                                     SynchronizationScope SynchScope,
-                                     BasicBlock *InsertAtEnd)
-  : Instruction(Cmp->getType(), AtomicCmpXchg,
-                OperandTraits<AtomicCmpXchgInst>::op_begin(this),
-                OperandTraits<AtomicCmpXchgInst>::operands(this),
-                InsertAtEnd) {
-  Init(Ptr, Cmp, NewVal, Ordering, SynchScope);
-}
- 
-//===----------------------------------------------------------------------===//
-//                       AtomicRMWInst Implementation
-//===----------------------------------------------------------------------===//
-
-void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
-                         AtomicOrdering Ordering,
-                         SynchronizationScope SynchScope) {
-  Op<0>() = Ptr;
-  Op<1>() = Val;
-  setOperation(Operation);
-  setOrdering(Ordering);
-  setSynchScope(SynchScope);
-
-  assert(getOperand(0) && getOperand(1) &&
-         "All operands must be non-null!");
-  assert(getOperand(0)->getType()->isPointerTy() &&
-         "Ptr must have pointer type!");
-  assert(getOperand(1)->getType() ==
-         cast<PointerType>(getOperand(0)->getType())->getElementType()
-         && "Ptr must be a pointer to Val type!");
-  assert(Ordering != NotAtomic &&
-         "AtomicRMW instructions must be atomic!");
-}
-
-AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
-                             AtomicOrdering Ordering,
-                             SynchronizationScope SynchScope,
-                             Instruction *InsertBefore)
-  : Instruction(Val->getType(), AtomicRMW,
-                OperandTraits<AtomicRMWInst>::op_begin(this),
-                OperandTraits<AtomicRMWInst>::operands(this),
-                InsertBefore) {
-  Init(Operation, Ptr, Val, Ordering, SynchScope);
-}
-
-AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
-                             AtomicOrdering Ordering,
-                             SynchronizationScope SynchScope,
-                             BasicBlock *InsertAtEnd)
-  : Instruction(Val->getType(), AtomicRMW,
-                OperandTraits<AtomicRMWInst>::op_begin(this),
-                OperandTraits<AtomicRMWInst>::operands(this),
-                InsertAtEnd) {
-  Init(Operation, Ptr, Val, Ordering, SynchScope);
-}
-
-//===----------------------------------------------------------------------===//
-//                       FenceInst Implementation
-//===----------------------------------------------------------------------===//
-
-FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, 
-                     SynchronizationScope SynchScope,
-                     Instruction *InsertBefore)
-  : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) {
-  setOrdering(Ordering);
-  setSynchScope(SynchScope);
-}
-
-FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, 
-                     SynchronizationScope SynchScope,
-                     BasicBlock *InsertAtEnd)
-  : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) {
-  setOrdering(Ordering);
-  setSynchScope(SynchScope);
-}
-
-//===----------------------------------------------------------------------===//
-//                       GetElementPtrInst Implementation
-//===----------------------------------------------------------------------===//
-
-void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
-                             const Twine &Name) {
-  assert(NumOperands == 1 + IdxList.size() && "NumOperands not initialized?");
-  OperandList[0] = Ptr;
-  std::copy(IdxList.begin(), IdxList.end(), op_begin() + 1);
-  setName(Name);
-}
-
-GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
-  : Instruction(GEPI.getType(), GetElementPtr,
-                OperandTraits<GetElementPtrInst>::op_end(this)
-                - GEPI.getNumOperands(),
-                GEPI.getNumOperands()) {
-  std::copy(GEPI.op_begin(), GEPI.op_end(), op_begin());
-  SubclassOptionalData = GEPI.SubclassOptionalData;
-}
-
-/// getIndexedType - Returns the type of the element that would be accessed with
-/// a gep instruction with the specified parameters.
-///
-/// The Idxs pointer should point to a continuous piece of memory containing the
-/// indices, either as Value* or uint64_t.
-///
-/// A null type is returned if the indices are invalid for the specified
-/// pointer type.
-///
-template <typename IndexTy>
-static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
-  if (Ptr->isVectorTy()) {
-    assert(IdxList.size() == 1 &&
-      "GEP with vector pointers must have a single index");
-    PointerType *PTy = dyn_cast<PointerType>(
-        cast<VectorType>(Ptr)->getElementType());
-    assert(PTy && "Gep with invalid vector pointer found");
-    return PTy->getElementType();
-  }
-
-  PointerType *PTy = dyn_cast<PointerType>(Ptr);
-  if (!PTy) return 0;   // Type isn't a pointer type!
-  Type *Agg = PTy->getElementType();
-
-  // Handle the special case of the empty set index set, which is always valid.
-  if (IdxList.empty())
-    return Agg;
-
-  // If there is at least one index, the top level type must be sized, otherwise
-  // it cannot be 'stepped over'.
-  if (!Agg->isSized())
-    return 0;
-
-  unsigned CurIdx = 1;
-  for (; CurIdx != IdxList.size(); ++CurIdx) {
-    CompositeType *CT = dyn_cast<CompositeType>(Agg);
-    if (!CT || CT->isPointerTy()) return 0;
-    IndexTy Index = IdxList[CurIdx];
-    if (!CT->indexValid(Index)) return 0;
-    Agg = CT->getTypeAtIndex(Index);
-  }
-  return CurIdx == IdxList.size() ? Agg : 0;
-}
-
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
-}
-
-Type *GetElementPtrInst::getIndexedType(Type *Ptr,
-                                        ArrayRef<Constant *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
-}
-
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
-}
-
-/// hasAllZeroIndices - Return true if all of the indices of this GEP are
-/// zeros.  If so, the result pointer and the first operand have the same
-/// value, just potentially different types.
-bool GetElementPtrInst::hasAllZeroIndices() const {
-  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
-      if (!CI->isZero()) return false;
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-/// hasAllConstantIndices - Return true if all of the indices of this GEP are
-/// constant integers.  If so, the result pointer and the first operand have
-/// a constant offset between them.
-bool GetElementPtrInst::hasAllConstantIndices() const {
-  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
-    if (!isa<ConstantInt>(getOperand(i)))
-      return false;
-  }
-  return true;
-}
-
-void GetElementPtrInst::setIsInBounds(bool B) {
-  cast<GEPOperator>(this)->setIsInBounds(B);
-}
-
-bool GetElementPtrInst::isInBounds() const {
-  return cast<GEPOperator>(this)->isInBounds();
-}
-
-//===----------------------------------------------------------------------===//
-//                           ExtractElementInst Implementation
-//===----------------------------------------------------------------------===//
-
-ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
-                                       const Twine &Name,
-                                       Instruction *InsertBef)
-  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
-                ExtractElement,
-                OperandTraits<ExtractElementInst>::op_begin(this),
-                2, InsertBef) {
-  assert(isValidOperands(Val, Index) &&
-         "Invalid extractelement instruction operands!");
-  Op<0>() = Val;
-  Op<1>() = Index;
-  setName(Name);
-}
-
-ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
-                                       const Twine &Name,
-                                       BasicBlock *InsertAE)
-  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
-                ExtractElement,
-                OperandTraits<ExtractElementInst>::op_begin(this),
-                2, InsertAE) {
-  assert(isValidOperands(Val, Index) &&
-         "Invalid extractelement instruction operands!");
-
-  Op<0>() = Val;
-  Op<1>() = Index;
-  setName(Name);
-}
-
-
-bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
-  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
-    return false;
-  return true;
-}
-
-
-//===----------------------------------------------------------------------===//
-//                           InsertElementInst Implementation
-//===----------------------------------------------------------------------===//
-
-InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
-                                     const Twine &Name,
-                                     Instruction *InsertBef)
-  : Instruction(Vec->getType(), InsertElement,
-                OperandTraits<InsertElementInst>::op_begin(this),
-                3, InsertBef) {
-  assert(isValidOperands(Vec, Elt, Index) &&
-         "Invalid insertelement instruction operands!");
-  Op<0>() = Vec;
-  Op<1>() = Elt;
-  Op<2>() = Index;
-  setName(Name);
-}
-
-InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
-                                     const Twine &Name,
-                                     BasicBlock *InsertAE)
-  : Instruction(Vec->getType(), InsertElement,
-                OperandTraits<InsertElementInst>::op_begin(this),
-                3, InsertAE) {
-  assert(isValidOperands(Vec, Elt, Index) &&
-         "Invalid insertelement instruction operands!");
-
-  Op<0>() = Vec;
-  Op<1>() = Elt;
-  Op<2>() = Index;
-  setName(Name);
-}
-
-bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, 
-                                        const Value *Index) {
-  if (!Vec->getType()->isVectorTy())
-    return false;   // First operand of insertelement must be vector type.
-  
-  if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
-    return false;// Second operand of insertelement must be vector element type.
-    
-  if (!Index->getType()->isIntegerTy(32))
-    return false;  // Third operand of insertelement must be i32.
-  return true;
-}
-
-
-//===----------------------------------------------------------------------===//
-//                      ShuffleVectorInst Implementation
-//===----------------------------------------------------------------------===//
-
-ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                                     const Twine &Name,
-                                     Instruction *InsertBefore)
-: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
-                cast<VectorType>(Mask->getType())->getNumElements()),
-              ShuffleVector,
-              OperandTraits<ShuffleVectorInst>::op_begin(this),
-              OperandTraits<ShuffleVectorInst>::operands(this),
-              InsertBefore) {
-  assert(isValidOperands(V1, V2, Mask) &&
-         "Invalid shuffle vector instruction operands!");
-  Op<0>() = V1;
-  Op<1>() = V2;
-  Op<2>() = Mask;
-  setName(Name);
-}
-
-ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                                     const Twine &Name,
-                                     BasicBlock *InsertAtEnd)
-: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
-                cast<VectorType>(Mask->getType())->getNumElements()),
-              ShuffleVector,
-              OperandTraits<ShuffleVectorInst>::op_begin(this),
-              OperandTraits<ShuffleVectorInst>::operands(this),
-              InsertAtEnd) {
-  assert(isValidOperands(V1, V2, Mask) &&
-         "Invalid shuffle vector instruction operands!");
-
-  Op<0>() = V1;
-  Op<1>() = V2;
-  Op<2>() = Mask;
-  setName(Name);
-}
-
-bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
-                                        const Value *Mask) {
-  // V1 and V2 must be vectors of the same type.
-  if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
-    return false;
-  
-  // Mask must be vector of i32.
-  VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
-  if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
-    return false;
-
-  // Check to see if Mask is valid.
-  if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask))
-    return true;
-
-  if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
-    unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
-    for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
-        if (CI->uge(V1Size*2))
-          return false;
-      } else if (!isa<UndefValue>(MV->getOperand(i))) {
-        return false;
-      }
-    }
-    return true;
-  }
-  
-  if (const ConstantDataSequential *CDS =
-        dyn_cast<ConstantDataSequential>(Mask)) {
-    unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
-    for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
-      if (CDS->getElementAsInteger(i) >= V1Size*2)
-        return false;
-    return true;
-  }
-  
-  // The bitcode reader can create a place holder for a forward reference
-  // used as the shuffle mask. When this occurs, the shuffle mask will
-  // fall into this case and fail. To avoid this error, do this bit of
-  // ugliness to allow such a mask pass.
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask))
-    if (CE->getOpcode() == Instruction::UserOp1)
-      return true;
-
-  return false;
-}
-
-/// getMaskValue - Return the index from the shuffle mask for the specified
-/// output result.  This is either -1 if the element is undef or a number less
-/// than 2*numelements.
-int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
-  assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
-  if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
-    return CDS->getElementAsInteger(i);
-  Constant *C = Mask->getAggregateElement(i);
-  if (isa<UndefValue>(C))
-    return -1;
-  return cast<ConstantInt>(C)->getZExtValue();
-}
-
-/// getShuffleMask - Return the full mask for this instruction, where each
-/// element is the element number and undef's are returned as -1.
-void ShuffleVectorInst::getShuffleMask(Constant *Mask,
-                                       SmallVectorImpl<int> &Result) {
-  unsigned NumElts = Mask->getType()->getVectorNumElements();
-  
-  if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) {
-    for (unsigned i = 0; i != NumElts; ++i)
-      Result.push_back(CDS->getElementAsInteger(i));
-    return;
-  }    
-  for (unsigned i = 0; i != NumElts; ++i) {
-    Constant *C = Mask->getAggregateElement(i);
-    Result.push_back(isa<UndefValue>(C) ? -1 :
-                     cast<ConstantInt>(C)->getZExtValue());
-  }
-}
-
-
-//===----------------------------------------------------------------------===//
-//                             InsertValueInst Class
-//===----------------------------------------------------------------------===//
-
-void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, 
-                           const Twine &Name) {
-  assert(NumOperands == 2 && "NumOperands not initialized?");
-
-  // There's no fundamental reason why we require at least one index
-  // (other than weirdness with &*IdxBegin being invalid; see
-  // getelementptr's init routine for example). But there's no
-  // present need to support it.
-  assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
-
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
-         Val->getType() && "Inserted value must match indexed type!");
-  Op<0>() = Agg;
-  Op<1>() = Val;
-
-  Indices.append(Idxs.begin(), Idxs.end());
-  setName(Name);
-}
-
-InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
-  : Instruction(IVI.getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this), 2),
-    Indices(IVI.Indices) {
-  Op<0>() = IVI.getOperand(0);
-  Op<1>() = IVI.getOperand(1);
-  SubclassOptionalData = IVI.SubclassOptionalData;
-}
-
-//===----------------------------------------------------------------------===//
-//                             ExtractValueInst Class
-//===----------------------------------------------------------------------===//
-
-void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
-  assert(NumOperands == 1 && "NumOperands not initialized?");
-
-  // There's no fundamental reason why we require at least one index.
-  // But there's no present need to support it.
-  assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
-
-  Indices.append(Idxs.begin(), Idxs.end());
-  setName(Name);
-}
-
-ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
-  : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
-    Indices(EVI.Indices) {
-  SubclassOptionalData = EVI.SubclassOptionalData;
-}
-
-// getIndexedType - Returns the type of the element that would be extracted
-// with an extractvalue instruction with the specified parameters.
-//
-// A null type is returned if the indices are invalid for the specified
-// pointer type.
-//
-Type *ExtractValueInst::getIndexedType(Type *Agg,
-                                       ArrayRef<unsigned> Idxs) {
-  for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
-    unsigned Index = Idxs[CurIdx];
-    // We can't use CompositeType::indexValid(Index) here.
-    // indexValid() always returns true for arrays because getelementptr allows
-    // out-of-bounds indices. Since we don't allow those for extractvalue and
-    // insertvalue we need to check array indexing manually.
-    // Since the only other types we can index into are struct types it's just
-    // as easy to check those manually as well.
-    if (ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
-      if (Index >= AT->getNumElements())
-        return 0;
-    } else if (StructType *ST = dyn_cast<StructType>(Agg)) {
-      if (Index >= ST->getNumElements())
-        return 0;
-    } else {
-      // Not a valid type to index into.
-      return 0;
-    }
-
-    Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
-  }
-  return const_cast<Type*>(Agg);
-}
-
-//===----------------------------------------------------------------------===//
-//                             BinaryOperator Class
-//===----------------------------------------------------------------------===//
-
-BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
-                               Type *Ty, const Twine &Name,
-                               Instruction *InsertBefore)
-  : Instruction(Ty, iType,
-                OperandTraits<BinaryOperator>::op_begin(this),
-                OperandTraits<BinaryOperator>::operands(this),
-                InsertBefore) {
-  Op<0>() = S1;
-  Op<1>() = S2;
-  init(iType);
-  setName(Name);
-}
-
-BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
-                               Type *Ty, const Twine &Name,
-                               BasicBlock *InsertAtEnd)
-  : Instruction(Ty, iType,
-                OperandTraits<BinaryOperator>::op_begin(this),
-                OperandTraits<BinaryOperator>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = S1;
-  Op<1>() = S2;
-  init(iType);
-  setName(Name);
-}
-
-
-void BinaryOperator::init(BinaryOps iType) {
-  Value *LHS = getOperand(0), *RHS = getOperand(1);
-  (void)LHS; (void)RHS; // Silence warnings.
-  assert(LHS->getType() == RHS->getType() &&
-         "Binary operator operand types must match!");
-#ifndef NDEBUG
-  switch (iType) {
-  case Add: case Sub:
-  case Mul:
-    assert(getType() == LHS->getType() &&
-           "Arithmetic operation should return same type as operands!");
-    assert(getType()->isIntOrIntVectorTy() &&
-           "Tried to create an integer operation on a non-integer type!");
-    break;
-  case FAdd: case FSub:
-  case FMul:
-    assert(getType() == LHS->getType() &&
-           "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVectorTy() &&
-           "Tried to create a floating-point operation on a "
-           "non-floating-point type!");
-    break;
-  case UDiv: 
-  case SDiv: 
-    assert(getType() == LHS->getType() &&
-           "Arithmetic operation should return same type as operands!");
-    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
-            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
-           "Incorrect operand type (not integer) for S/UDIV");
-    break;
-  case FDiv:
-    assert(getType() == LHS->getType() &&
-           "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVectorTy() &&
-           "Incorrect operand type (not floating point) for FDIV");
-    break;
-  case URem: 
-  case SRem: 
-    assert(getType() == LHS->getType() &&
-           "Arithmetic operation should return same type as operands!");
-    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
-            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
-           "Incorrect operand type (not integer) for S/UREM");
-    break;
-  case FRem:
-    assert(getType() == LHS->getType() &&
-           "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVectorTy() &&
-           "Incorrect operand type (not floating point) for FREM");
-    break;
-  case Shl:
-  case LShr:
-  case AShr:
-    assert(getType() == LHS->getType() &&
-           "Shift operation should return same type as operands!");
-    assert((getType()->isIntegerTy() ||
-            (getType()->isVectorTy() && 
-             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
-           "Tried to create a shift operation on a non-integral type!");
-    break;
-  case And: case Or:
-  case Xor:
-    assert(getType() == LHS->getType() &&
-           "Logical operation should return same type as operands!");
-    assert((getType()->isIntegerTy() ||
-            (getType()->isVectorTy() && 
-             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
-           "Tried to create a logical operation on a non-integral type!");
-    break;
-  default:
-    break;
-  }
-#endif
-}
-
-BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
-                                       const Twine &Name,
-                                       Instruction *InsertBefore) {
-  assert(S1->getType() == S2->getType() &&
-         "Cannot create binary operator with two operands of differing type!");
-  return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
-}
-
-BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
-                                       const Twine &Name,
-                                       BasicBlock *InsertAtEnd) {
-  BinaryOperator *Res = Create(Op, S1, S2, Name);
-  InsertAtEnd->getInstList().push_back(Res);
-  return Res;
-}
-
-BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
-                                          Instruction *InsertBefore) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return new BinaryOperator(Instruction::Sub,
-                            zero, Op,
-                            Op->getType(), Name, InsertBefore);
-}
-
-BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
-                                          BasicBlock *InsertAtEnd) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return new BinaryOperator(Instruction::Sub,
-                            zero, Op,
-                            Op->getType(), Name, InsertAtEnd);
-}
-
-BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
-                                             Instruction *InsertBefore) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
-}
-
-BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
-                                             BasicBlock *InsertAtEnd) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
-}
-
-BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
-                                             Instruction *InsertBefore) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
-}
-
-BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
-                                             BasicBlock *InsertAtEnd) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
-}
-
-BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
-                                           Instruction *InsertBefore) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return new BinaryOperator(Instruction::FSub, zero, Op,
-                            Op->getType(), Name, InsertBefore);
-}
-
-BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
-                                           BasicBlock *InsertAtEnd) {
-  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return new BinaryOperator(Instruction::FSub, zero, Op,
-                            Op->getType(), Name, InsertAtEnd);
-}
-
-BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
-                                          Instruction *InsertBefore) {
-  Constant *C = Constant::getAllOnesValue(Op->getType());
-  return new BinaryOperator(Instruction::Xor, Op, C,
-                            Op->getType(), Name, InsertBefore);
-}
-
-BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
-                                          BasicBlock *InsertAtEnd) {
-  Constant *AllOnes = Constant::getAllOnesValue(Op->getType());
-  return new BinaryOperator(Instruction::Xor, Op, AllOnes,
-                            Op->getType(), Name, InsertAtEnd);
-}
-
-
-// isConstantAllOnes - Helper function for several functions below
-static inline bool isConstantAllOnes(const Value *V) {
-  if (const Constant *C = dyn_cast<Constant>(V))
-    return C->isAllOnesValue();
-  return false;
-}
-
-bool BinaryOperator::isNeg(const Value *V) {
-  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
-    if (Bop->getOpcode() == Instruction::Sub)
-      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
-        return C->isNegativeZeroValue();
-  return false;
-}
-
-bool BinaryOperator::isFNeg(const Value *V) {
-  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
-    if (Bop->getOpcode() == Instruction::FSub)
-      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
-        return C->isNegativeZeroValue();
-  return false;
-}
-
-bool BinaryOperator::isNot(const Value *V) {
-  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
-    return (Bop->getOpcode() == Instruction::Xor &&
-            (isConstantAllOnes(Bop->getOperand(1)) ||
-             isConstantAllOnes(Bop->getOperand(0))));
-  return false;
-}
-
-Value *BinaryOperator::getNegArgument(Value *BinOp) {
-  return cast<BinaryOperator>(BinOp)->getOperand(1);
-}
-
-const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
-  return getNegArgument(const_cast<Value*>(BinOp));
-}
-
-Value *BinaryOperator::getFNegArgument(Value *BinOp) {
-  return cast<BinaryOperator>(BinOp)->getOperand(1);
-}
-
-const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
-  return getFNegArgument(const_cast<Value*>(BinOp));
-}
-
-Value *BinaryOperator::getNotArgument(Value *BinOp) {
-  assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
-  BinaryOperator *BO = cast<BinaryOperator>(BinOp);
-  Value *Op0 = BO->getOperand(0);
-  Value *Op1 = BO->getOperand(1);
-  if (isConstantAllOnes(Op0)) return Op1;
-
-  assert(isConstantAllOnes(Op1));
-  return Op0;
-}
-
-const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
-  return getNotArgument(const_cast<Value*>(BinOp));
-}
-
-
-// swapOperands - Exchange the two operands to this instruction.  This
-// instruction is safe to use on any binary instruction and does not
-// modify the semantics of the instruction.  If the instruction is
-// order dependent (SetLT f.e.) the opcode is changed.
-//
-bool BinaryOperator::swapOperands() {
-  if (!isCommutative())
-    return true; // Can't commute operands
-  Op<0>().swap(Op<1>());
-  return false;
-}
-
-void BinaryOperator::setHasNoUnsignedWrap(bool b) {
-  cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
-}
-
-void BinaryOperator::setHasNoSignedWrap(bool b) {
-  cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
-}
-
-void BinaryOperator::setIsExact(bool b) {
-  cast<PossiblyExactOperator>(this)->setIsExact(b);
-}
-
-bool BinaryOperator::hasNoUnsignedWrap() const {
-  return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
-}
-
-bool BinaryOperator::hasNoSignedWrap() const {
-  return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
-}
-
-bool BinaryOperator::isExact() const {
-  return cast<PossiblyExactOperator>(this)->isExact();
-}
-
-//===----------------------------------------------------------------------===//
-//                             FPMathOperator Class
-//===----------------------------------------------------------------------===//
-
-/// getFPAccuracy - Get the maximum error permitted by this operation in ULPs.
-/// An accuracy of 0.0 means that the operation should be performed with the
-/// default precision.
-float FPMathOperator::getFPAccuracy() const {
-  const MDNode *MD =
-    cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath);
-  if (!MD)
-    return 0.0;
-  ConstantFP *Accuracy = cast<ConstantFP>(MD->getOperand(0));
-  return Accuracy->getValueAPF().convertToFloat();
-}
-
-
-//===----------------------------------------------------------------------===//
-//                                CastInst Class
-//===----------------------------------------------------------------------===//
-
-void CastInst::anchor() {}
-
-// Just determine if this cast only deals with integral->integral conversion.
-bool CastInst::isIntegerCast() const {
-  switch (getOpcode()) {
-    default: return false;
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::Trunc:
-      return true;
-    case Instruction::BitCast:
-      return getOperand(0)->getType()->isIntegerTy() &&
-        getType()->isIntegerTy();
-  }
-}
-
-bool CastInst::isLosslessCast() const {
-  // Only BitCast can be lossless, exit fast if we're not BitCast
-  if (getOpcode() != Instruction::BitCast)
-    return false;
-
-  // Identity cast is always lossless
-  Type* SrcTy = getOperand(0)->getType();
-  Type* DstTy = getType();
-  if (SrcTy == DstTy)
-    return true;
-  
-  // Pointer to pointer is always lossless.
-  if (SrcTy->isPointerTy())
-    return DstTy->isPointerTy();
-  return false;  // Other types have no identity values
-}
-
-/// This function determines if the CastInst does not require any bits to be
-/// changed in order to effect the cast. Essentially, it identifies cases where
-/// no code gen is necessary for the cast, hence the name no-op cast.  For 
-/// example, the following are all no-op casts:
-/// # bitcast i32* %x to i8*
-/// # bitcast <2 x i32> %x to <4 x i16> 
-/// # ptrtoint i32* %x to i32     ; on 32-bit plaforms only
-/// @brief Determine if the described cast is a no-op.
-bool CastInst::isNoopCast(Instruction::CastOps Opcode,
-                          Type *SrcTy,
-                          Type *DestTy,
-                          Type *IntPtrTy) {
-  switch (Opcode) {
-    default: llvm_unreachable("Invalid CastOp");
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt: 
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-      return false; // These always modify bits
-    case Instruction::BitCast:
-      return true;  // BitCast never modifies bits.
-    case Instruction::PtrToInt:
-      return IntPtrTy->getScalarSizeInBits() ==
-             DestTy->getScalarSizeInBits();
-    case Instruction::IntToPtr:
-      return IntPtrTy->getScalarSizeInBits() ==
-             SrcTy->getScalarSizeInBits();
-  }
-}
-
-/// @brief Determine if a cast is a no-op.
-bool CastInst::isNoopCast(Type *IntPtrTy) const {
-  return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
-}
-
-/// This function determines if a pair of casts can be eliminated and what 
-/// opcode should be used in the elimination. This assumes that there are two 
-/// instructions like this:
-/// *  %F = firstOpcode SrcTy %x to MidTy
-/// *  %S = secondOpcode MidTy %F to DstTy
-/// The function returns a resultOpcode so these two casts can be replaced with:
-/// *  %Replacement = resultOpcode %SrcTy %x to DstTy
-/// If no such cast is permited, the function returns 0.
-unsigned CastInst::isEliminableCastPair(
-  Instruction::CastOps firstOp, Instruction::CastOps secondOp,
-  Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy,
-  Type *DstIntPtrTy) {
-  // Define the 144 possibilities for these two cast instructions. The values
-  // in this matrix determine what to do in a given situation and select the
-  // case in the switch below.  The rows correspond to firstOp, the columns 
-  // correspond to secondOp.  In looking at the table below, keep in  mind
-  // the following cast properties:
-  //
-  //          Size Compare       Source               Destination
-  // Operator  Src ? Size   Type       Sign         Type       Sign
-  // -------- ------------ -------------------   ---------------------
-  // TRUNC         >       Integer      Any        Integral     Any
-  // ZEXT          <       Integral   Unsigned     Integer      Any
-  // SEXT          <       Integral    Signed      Integer      Any
-  // FPTOUI       n/a      FloatPt      n/a        Integral   Unsigned
-  // FPTOSI       n/a      FloatPt      n/a        Integral    Signed 
-  // UITOFP       n/a      Integral   Unsigned     FloatPt      n/a   
-  // SITOFP       n/a      Integral    Signed      FloatPt      n/a   
-  // FPTRUNC       >       FloatPt      n/a        FloatPt      n/a   
-  // FPEXT         <       FloatPt      n/a        FloatPt      n/a   
-  // PTRTOINT     n/a      Pointer      n/a        Integral   Unsigned
-  // INTTOPTR     n/a      Integral   Unsigned     Pointer      n/a
-  // BITCAST       =       FirstClass   n/a       FirstClass    n/a   
-  //
-  // NOTE: some transforms are safe, but we consider them to be non-profitable.
-  // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
-  // into "fptoui double to i64", but this loses information about the range
-  // of the produced value (we no longer know the top-part is all zeros). 
-  // Further this conversion is often much more expensive for typical hardware,
-  // and causes issues when building libgcc.  We disallow fptosi+sext for the 
-  // same reason.
-  const unsigned numCastOps = 
-    Instruction::CastOpsEnd - Instruction::CastOpsBegin;
-  static const uint8_t CastResults[numCastOps][numCastOps] = {
-    // T        F  F  U  S  F  F  P  I  B   -+
-    // R  Z  S  P  P  I  I  T  P  2  N  T    |
-    // U  E  E  2  2  2  2  R  E  I  T  C    +- secondOp
-    // N  X  X  U  S  F  F  N  X  N  2  V    |
-    // C  T  T  I  I  P  P  C  T  T  P  T   -+
-    {  1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc      -+
-    {  8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt        |
-    {  8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt        |
-    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI      |
-    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI      |
-    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP      +- firstOp
-    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP      |
-    { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc     |
-    { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt       |
-    {  1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt    |
-    { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr    |
-    {  5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast    -+
-  };
-  
-  // If either of the casts are a bitcast from scalar to vector, disallow the
-  // merging. However, bitcast of A->B->A are allowed.
-  bool isFirstBitcast  = (firstOp == Instruction::BitCast);
-  bool isSecondBitcast = (secondOp == Instruction::BitCast);
-  bool chainedBitcast  = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast);
-
-  // Check if any of the bitcasts convert scalars<->vectors.
-  if ((isFirstBitcast  && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
-      (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
-    // Unless we are bitcasing to the original type, disallow optimizations.
-    if (!chainedBitcast) return 0;
-
-  int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
-                            [secondOp-Instruction::CastOpsBegin];
-  switch (ElimCase) {
-    case 0: 
-      // categorically disallowed
-      return 0;
-    case 1: 
-      // allowed, use first cast's opcode
-      return firstOp;
-    case 2: 
-      // allowed, use second cast's opcode
-      return secondOp;
-    case 3: 
-      // no-op cast in second op implies firstOp as long as the DestTy 
-      // is integer and we are not converting between a vector and a
-      // non vector type.
-      if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
-        return firstOp;
-      return 0;
-    case 4:
-      // no-op cast in second op implies firstOp as long as the DestTy
-      // is floating point.
-      if (DstTy->isFloatingPointTy())
-        return firstOp;
-      return 0;
-    case 5: 
-      // no-op cast in first op implies secondOp as long as the SrcTy
-      // is an integer.
-      if (SrcTy->isIntegerTy())
-        return secondOp;
-      return 0;
-    case 6:
-      // no-op cast in first op implies secondOp as long as the SrcTy
-      // is a floating point.
-      if (SrcTy->isFloatingPointTy())
-        return secondOp;
-      return 0;
-    case 7: { 
-      // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
-      if (!SrcIntPtrTy || DstIntPtrTy != SrcIntPtrTy)
-        return 0;
-      unsigned PtrSize = SrcIntPtrTy->getScalarSizeInBits();
-      unsigned MidSize = MidTy->getScalarSizeInBits();
-      if (MidSize >= PtrSize)
-        return Instruction::BitCast;
-      return 0;
-    }
-    case 8: {
-      // ext, trunc -> bitcast,    if the SrcTy and DstTy are same size
-      // ext, trunc -> ext,        if sizeof(SrcTy) < sizeof(DstTy)
-      // ext, trunc -> trunc,      if sizeof(SrcTy) > sizeof(DstTy)
-      unsigned SrcSize = SrcTy->getScalarSizeInBits();
-      unsigned DstSize = DstTy->getScalarSizeInBits();
-      if (SrcSize == DstSize)
-        return Instruction::BitCast;
-      else if (SrcSize < DstSize)
-        return firstOp;
-      return secondOp;
-    }
-    case 9: // zext, sext -> zext, because sext can't sign extend after zext
-      return Instruction::ZExt;
-    case 10:
-      // fpext followed by ftrunc is allowed if the bit size returned to is
-      // the same as the original, in which case its just a bitcast
-      if (SrcTy == DstTy)
-        return Instruction::BitCast;
-      return 0; // If the types are not the same we can't eliminate it.
-    case 11:
-      // bitcast followed by ptrtoint is allowed as long as the bitcast
-      // is a pointer to pointer cast.
-      if (SrcTy->isPointerTy() && MidTy->isPointerTy())
-        return secondOp;
-      return 0;
-    case 12:
-      // inttoptr, bitcast -> intptr  if bitcast is a ptr to ptr cast
-      if (MidTy->isPointerTy() && DstTy->isPointerTy())
-        return firstOp;
-      return 0;
-    case 13: {
-      // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
-      if (!MidIntPtrTy)
-        return 0;
-      unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits();
-      unsigned SrcSize = SrcTy->getScalarSizeInBits();
-      unsigned DstSize = DstTy->getScalarSizeInBits();
-      if (SrcSize <= PtrSize && SrcSize == DstSize)
-        return Instruction::BitCast;
-      return 0;
-    }
-    case 99: 
-      // cast combination can't happen (error in input). This is for all cases
-      // where the MidTy is not the same for the two cast instructions.
-      llvm_unreachable("Invalid Cast Combination");
-    default:
-      llvm_unreachable("Error in CastResults table!!!");
-  }
-}
-
-CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, 
-  const Twine &Name, Instruction *InsertBefore) {
-  assert(castIsValid(op, S, Ty) && "Invalid cast!");
-  // Construct and return the appropriate CastInst subclass
-  switch (op) {
-    case Trunc:    return new TruncInst    (S, Ty, Name, InsertBefore);
-    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertBefore);
-    case SExt:     return new SExtInst     (S, Ty, Name, InsertBefore);
-    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertBefore);
-    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertBefore);
-    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertBefore);
-    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertBefore);
-    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertBefore);
-    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertBefore);
-    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
-    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
-    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertBefore);
-    default: llvm_unreachable("Invalid opcode provided");
-  }
-}
-
-CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
-  const Twine &Name, BasicBlock *InsertAtEnd) {
-  assert(castIsValid(op, S, Ty) && "Invalid cast!");
-  // Construct and return the appropriate CastInst subclass
-  switch (op) {
-    case Trunc:    return new TruncInst    (S, Ty, Name, InsertAtEnd);
-    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertAtEnd);
-    case SExt:     return new SExtInst     (S, Ty, Name, InsertAtEnd);
-    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertAtEnd);
-    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertAtEnd);
-    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertAtEnd);
-    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertAtEnd);
-    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertAtEnd);
-    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertAtEnd);
-    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
-    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
-    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertAtEnd);
-    default: llvm_unreachable("Invalid opcode provided");
-  }
-}
-
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, 
-                                        const Twine &Name,
-                                        Instruction *InsertBefore) {
-  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
-  return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
-}
-
-CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, 
-                                        const Twine &Name,
-                                        BasicBlock *InsertAtEnd) {
-  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
-  return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
-}
-
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, 
-                                        const Twine &Name,
-                                        Instruction *InsertBefore) {
-  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
-  return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
-}
-
-CastInst *CastInst::CreateSExtOrBitCast(Value *S, Type *Ty, 
-                                        const Twine &Name,
-                                        BasicBlock *InsertAtEnd) {
-  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
-  return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
-}
-
-CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
-                                         const Twine &Name,
-                                         Instruction *InsertBefore) {
-  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
-  return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
-}
-
-CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty,
-                                         const Twine &Name, 
-                                         BasicBlock *InsertAtEnd) {
-  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
-    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
-  return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
-}
-
-CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty,
-                                      const Twine &Name,
-                                      BasicBlock *InsertAtEnd) {
-  assert(S->getType()->isPointerTy() && "Invalid cast");
-  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
-         "Invalid cast");
-
-  if (Ty->isIntegerTy())
-    return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
-  return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
-}
-
-/// @brief Create a BitCast or a PtrToInt cast instruction
-CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, 
-                                      const Twine &Name, 
-                                      Instruction *InsertBefore) {
-  assert(S->getType()->isPointerTy() && "Invalid cast");
-  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
-         "Invalid cast");
-
-  if (Ty->isIntegerTy())
-    return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
-  return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
-}
-
-CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, 
-                                      bool isSigned, const Twine &Name,
-                                      Instruction *InsertBefore) {
-  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
-         "Invalid integer cast");
-  unsigned SrcBits = C->getType()->getScalarSizeInBits();
-  unsigned DstBits = Ty->getScalarSizeInBits();
-  Instruction::CastOps opcode =
-    (SrcBits == DstBits ? Instruction::BitCast :
-     (SrcBits > DstBits ? Instruction::Trunc :
-      (isSigned ? Instruction::SExt : Instruction::ZExt)));
-  return Create(opcode, C, Ty, Name, InsertBefore);
-}
-
-CastInst *CastInst::CreateIntegerCast(Value *C, Type *Ty, 
-                                      bool isSigned, const Twine &Name,
-                                      BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
-         "Invalid cast");
-  unsigned SrcBits = C->getType()->getScalarSizeInBits();
-  unsigned DstBits = Ty->getScalarSizeInBits();
-  Instruction::CastOps opcode =
-    (SrcBits == DstBits ? Instruction::BitCast :
-     (SrcBits > DstBits ? Instruction::Trunc :
-      (isSigned ? Instruction::SExt : Instruction::ZExt)));
-  return Create(opcode, C, Ty, Name, InsertAtEnd);
-}
-
-CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, 
-                                 const Twine &Name, 
-                                 Instruction *InsertBefore) {
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
-         "Invalid cast");
-  unsigned SrcBits = C->getType()->getScalarSizeInBits();
-  unsigned DstBits = Ty->getScalarSizeInBits();
-  Instruction::CastOps opcode =
-    (SrcBits == DstBits ? Instruction::BitCast :
-     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
-  return Create(opcode, C, Ty, Name, InsertBefore);
-}
-
-CastInst *CastInst::CreateFPCast(Value *C, Type *Ty, 
-                                 const Twine &Name, 
-                                 BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
-         "Invalid cast");
-  unsigned SrcBits = C->getType()->getScalarSizeInBits();
-  unsigned DstBits = Ty->getScalarSizeInBits();
-  Instruction::CastOps opcode =
-    (SrcBits == DstBits ? Instruction::BitCast :
-     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
-  return Create(opcode, C, Ty, Name, InsertAtEnd);
-}
-
-// Check whether it is valid to call getCastOpcode for these types.
-// This routine must be kept in sync with getCastOpcode.
-bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
-  if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
-    return false;
-
-  if (SrcTy == DestTy)
-    return true;
-
-  if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
-    if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
-      if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
-        // An element by element cast.  Valid if casting the elements is valid.
-        SrcTy = SrcVecTy->getElementType();
-        DestTy = DestVecTy->getElementType();
-      }
-
-  // Get the bit sizes, we'll need these
-  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr
-  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
-
-  // Run through the possibilities ...
-  if (DestTy->isIntegerTy()) {               // Casting to integral
-    if (SrcTy->isIntegerTy()) {                // Casting from integral
-        return true;
-    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
-      return true;
-    } else if (SrcTy->isVectorTy()) {          // Casting from vector
-      return DestBits == SrcBits;
-    } else {                                   // Casting from something else
-      return SrcTy->isPointerTy();
-    }
-  } else if (DestTy->isFloatingPointTy()) {  // Casting to floating pt
-    if (SrcTy->isIntegerTy()) {                // Casting from integral
-      return true;
-    } else if (SrcTy->isFloatingPointTy()) {   // Casting from floating pt
-      return true;
-    } else if (SrcTy->isVectorTy()) {          // Casting from vector
-      return DestBits == SrcBits;
-    } else {                                   // Casting from something else
-      return false;
-    }
-  } else if (DestTy->isVectorTy()) {         // Casting to vector
-    return DestBits == SrcBits;
-  } else if (DestTy->isPointerTy()) {        // Casting to pointer
-    if (SrcTy->isPointerTy()) {                // Casting from pointer
-      return true;
-    } else if (SrcTy->isIntegerTy()) {         // Casting from integral
-      return true;
-    } else {                                   // Casting from something else
-      return false;
-    }
-  } else if (DestTy->isX86_MMXTy()) {
-    if (SrcTy->isVectorTy()) {
-      return DestBits == SrcBits;       // 64-bit vector to MMX
-    } else {
-      return false;
-    }
-  } else {                                   // Casting to something else
-    return false;
-  }
-}
-
-// Provide a way to get a "cast" where the cast opcode is inferred from the 
-// types and size of the operand. This, basically, is a parallel of the 
-// logic in the castIsValid function below.  This axiom should hold:
-//   castIsValid( getCastOpcode(Val, Ty), Val, Ty)
-// should not assert in castIsValid. In other words, this produces a "correct"
-// casting opcode for the arguments passed to it.
-// This routine must be kept in sync with isCastable.
-Instruction::CastOps
-CastInst::getCastOpcode(
-  const Value *Src, bool SrcIsSigned, Type *DestTy, bool DestIsSigned) {
-  Type *SrcTy = Src->getType();
-
-  assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
-         "Only first class types are castable!");
-
-  if (SrcTy == DestTy)
-    return BitCast;
-
-  if (VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy))
-    if (VectorType *DestVecTy = dyn_cast<VectorType>(DestTy))
-      if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) {
-        // An element by element cast.  Find the appropriate opcode based on the
-        // element types.
-        SrcTy = SrcVecTy->getElementType();
-        DestTy = DestVecTy->getElementType();
-      }
-
-  // Get the bit sizes, we'll need these
-  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();   // 0 for ptr
-  unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr
-
-  // Run through the possibilities ...
-  if (DestTy->isIntegerTy()) {                      // Casting to integral
-    if (SrcTy->isIntegerTy()) {                     // Casting from integral
-      if (DestBits < SrcBits)
-        return Trunc;                               // int -> smaller int
-      else if (DestBits > SrcBits) {                // its an extension
-        if (SrcIsSigned)
-          return SExt;                              // signed -> SEXT
-        else
-          return ZExt;                              // unsigned -> ZEXT
-      } else {
-        return BitCast;                             // Same size, No-op cast
-      }
-    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
-      if (DestIsSigned) 
-        return FPToSI;                              // FP -> sint
-      else
-        return FPToUI;                              // FP -> uint 
-    } else if (SrcTy->isVectorTy()) {
-      assert(DestBits == SrcBits &&
-             "Casting vector to integer of different width");
-      return BitCast;                             // Same size, no-op cast
-    } else {
-      assert(SrcTy->isPointerTy() &&
-             "Casting from a value that is not first-class type");
-      return PtrToInt;                              // ptr -> int
-    }
-  } else if (DestTy->isFloatingPointTy()) {         // Casting to floating pt
-    if (SrcTy->isIntegerTy()) {                     // Casting from integral
-      if (SrcIsSigned)
-        return SIToFP;                              // sint -> FP
-      else
-        return UIToFP;                              // uint -> FP
-    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
-      if (DestBits < SrcBits) {
-        return FPTrunc;                             // FP -> smaller FP
-      } else if (DestBits > SrcBits) {
-        return FPExt;                               // FP -> larger FP
-      } else  {
-        return BitCast;                             // same size, no-op cast
-      }
-    } else if (SrcTy->isVectorTy()) {
-      assert(DestBits == SrcBits &&
-             "Casting vector to floating point of different width");
-      return BitCast;                             // same size, no-op cast
-    }
-    llvm_unreachable("Casting pointer or non-first class to float");
-  } else if (DestTy->isVectorTy()) {
-    assert(DestBits == SrcBits &&
-           "Illegal cast to vector (wrong type or size)");
-    return BitCast;
-  } else if (DestTy->isPointerTy()) {
-    if (SrcTy->isPointerTy()) {
-      return BitCast;                               // ptr -> ptr
-    } else if (SrcTy->isIntegerTy()) {
-      return IntToPtr;                              // int -> ptr
-    }
-    llvm_unreachable("Casting pointer to other than pointer or int");
-  } else if (DestTy->isX86_MMXTy()) {
-    if (SrcTy->isVectorTy()) {
-      assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
-      return BitCast;                               // 64-bit vector to MMX
-    }
-    llvm_unreachable("Illegal cast to X86_MMX");
-  }
-  llvm_unreachable("Casting to type that is not first-class");
-}
-
-//===----------------------------------------------------------------------===//
-//                    CastInst SubClass Constructors
-//===----------------------------------------------------------------------===//
-
-/// Check that the construction parameters for a CastInst are correct. This
-/// could be broken out into the separate constructors but it is useful to have
-/// it in one place and to eliminate the redundant code for getting the sizes
-/// of the types involved.
-bool 
-CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
-
-  // Check for type sanity on the arguments
-  Type *SrcTy = S->getType();
-  if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
-      SrcTy->isAggregateType() || DstTy->isAggregateType())
-    return false;
-
-  // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
-  unsigned DstBitSize = DstTy->getScalarSizeInBits();
-
-  // If these are vector types, get the lengths of the vectors (using zero for
-  // scalar types means that checking that vector lengths match also checks that
-  // scalars are not being converted to vectors or vectors to scalars).
-  unsigned SrcLength = SrcTy->isVectorTy() ?
-    cast<VectorType>(SrcTy)->getNumElements() : 0;
-  unsigned DstLength = DstTy->isVectorTy() ?
-    cast<VectorType>(DstTy)->getNumElements() : 0;
-
-  // Switch on the opcode provided
-  switch (op) {
-  default: return false; // This is an input error
-  case Instruction::Trunc:
-    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
-      SrcLength == DstLength && SrcBitSize > DstBitSize;
-  case Instruction::ZExt:
-    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
-      SrcLength == DstLength && SrcBitSize < DstBitSize;
-  case Instruction::SExt: 
-    return SrcTy->isIntOrIntVectorTy() && DstTy->isIntOrIntVectorTy() &&
-      SrcLength == DstLength && SrcBitSize < DstBitSize;
-  case Instruction::FPTrunc:
-    return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
-      SrcLength == DstLength && SrcBitSize > DstBitSize;
-  case Instruction::FPExt:
-    return SrcTy->isFPOrFPVectorTy() && DstTy->isFPOrFPVectorTy() &&
-      SrcLength == DstLength && SrcBitSize < DstBitSize;
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-    return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy() &&
-      SrcLength == DstLength;
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-    return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
-      SrcLength == DstLength;
-  case Instruction::PtrToInt:
-    if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
-      return false;
-    if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
-      if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
-        return false;
-    return SrcTy->getScalarType()->isPointerTy() &&
-           DstTy->getScalarType()->isIntegerTy();
-  case Instruction::IntToPtr:
-    if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
-      return false;
-    if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
-      if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
-        return false;
-    return SrcTy->getScalarType()->isIntegerTy() &&
-           DstTy->getScalarType()->isPointerTy();
-  case Instruction::BitCast:
-    // BitCast implies a no-op cast of type only. No bits change.
-    // However, you can't cast pointers to anything but pointers.
-    if (SrcTy->isPointerTy() != DstTy->isPointerTy())
-      return false;
-
-    // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
-    // these cases, the cast is okay if the source and destination bit widths
-    // are identical.
-    return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
-  }
-}
-
-TruncInst::TruncInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
-}
-
-TruncInst::TruncInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
-}
-
-ZExtInst::ZExtInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-)  : CastInst(Ty, ZExt, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
-}
-
-ZExtInst::ZExtInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-)  : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
-}
-SExtInst::SExtInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, SExt, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
-}
-
-SExtInst::SExtInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-)  : CastInst(Ty, SExt, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
-}
-
-FPTruncInst::FPTruncInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
-}
-
-FPTruncInst::FPTruncInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
-}
-
-FPExtInst::FPExtInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPExt, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
-}
-
-FPExtInst::FPExtInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
-}
-
-UIToFPInst::UIToFPInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
-}
-
-UIToFPInst::UIToFPInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
-}
-
-SIToFPInst::SIToFPInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
-}
-
-SIToFPInst::SIToFPInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
-}
-
-FPToUIInst::FPToUIInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
-}
-
-FPToUIInst::FPToUIInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
-}
-
-FPToSIInst::FPToSIInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
-}
-
-FPToSIInst::FPToSIInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
-}
-
-PtrToIntInst::PtrToIntInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
-}
-
-PtrToIntInst::PtrToIntInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
-}
-
-IntToPtrInst::IntToPtrInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
-}
-
-IntToPtrInst::IntToPtrInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
-}
-
-BitCastInst::BitCastInst(
-  Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore
-) : CastInst(Ty, BitCast, S, Name, InsertBefore) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
-}
-
-BitCastInst::BitCastInst(
-  Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
-) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { 
-  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
-}
-
-//===----------------------------------------------------------------------===//
-//                               CmpInst Classes
-//===----------------------------------------------------------------------===//
-
-void CmpInst::anchor() {}
-
-CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
-                 Value *LHS, Value *RHS, const Twine &Name,
-                 Instruction *InsertBefore)
-  : Instruction(ty, op,
-                OperandTraits<CmpInst>::op_begin(this),
-                OperandTraits<CmpInst>::operands(this),
-                InsertBefore) {
-    Op<0>() = LHS;
-    Op<1>() = RHS;
-  setPredicate((Predicate)predicate);
-  setName(Name);
-}
-
-CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
-                 Value *LHS, Value *RHS, const Twine &Name,
-                 BasicBlock *InsertAtEnd)
-  : Instruction(ty, op,
-                OperandTraits<CmpInst>::op_begin(this),
-                OperandTraits<CmpInst>::operands(this),
-                InsertAtEnd) {
-  Op<0>() = LHS;
-  Op<1>() = RHS;
-  setPredicate((Predicate)predicate);
-  setName(Name);
-}
-
-CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate,
-                Value *S1, Value *S2, 
-                const Twine &Name, Instruction *InsertBefore) {
-  if (Op == Instruction::ICmp) {
-    if (InsertBefore)
-      return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
-                          S1, S2, Name);
-    else
-      return new ICmpInst(CmpInst::Predicate(predicate),
-                          S1, S2, Name);
-  }
-  
-  if (InsertBefore)
-    return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
-                        S1, S2, Name);
-  else
-    return new FCmpInst(CmpInst::Predicate(predicate),
-                        S1, S2, Name);
-}
-
-CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
-                const Twine &Name, BasicBlock *InsertAtEnd) {
-  if (Op == Instruction::ICmp) {
-    return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
-                        S1, S2, Name);
-  }
-  return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
-                      S1, S2, Name);
-}
-
-void CmpInst::swapOperands() {
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
-    IC->swapOperands();
-  else
-    cast<FCmpInst>(this)->swapOperands();
-}
-
-bool CmpInst::isCommutative() const {
-  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
-    return IC->isCommutative();
-  return cast<FCmpInst>(this)->isCommutative();
-}
-
-bool CmpInst::isEquality() const {
-  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
-    return IC->isEquality();
-  return cast<FCmpInst>(this)->isEquality();
-}
-
-
-CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
-  switch (pred) {
-    default: llvm_unreachable("Unknown cmp predicate!");
-    case ICMP_EQ: return ICMP_NE;
-    case ICMP_NE: return ICMP_EQ;
-    case ICMP_UGT: return ICMP_ULE;
-    case ICMP_ULT: return ICMP_UGE;
-    case ICMP_UGE: return ICMP_ULT;
-    case ICMP_ULE: return ICMP_UGT;
-    case ICMP_SGT: return ICMP_SLE;
-    case ICMP_SLT: return ICMP_SGE;
-    case ICMP_SGE: return ICMP_SLT;
-    case ICMP_SLE: return ICMP_SGT;
-
-    case FCMP_OEQ: return FCMP_UNE;
-    case FCMP_ONE: return FCMP_UEQ;
-    case FCMP_OGT: return FCMP_ULE;
-    case FCMP_OLT: return FCMP_UGE;
-    case FCMP_OGE: return FCMP_ULT;
-    case FCMP_OLE: return FCMP_UGT;
-    case FCMP_UEQ: return FCMP_ONE;
-    case FCMP_UNE: return FCMP_OEQ;
-    case FCMP_UGT: return FCMP_OLE;
-    case FCMP_ULT: return FCMP_OGE;
-    case FCMP_UGE: return FCMP_OLT;
-    case FCMP_ULE: return FCMP_OGT;
-    case FCMP_ORD: return FCMP_UNO;
-    case FCMP_UNO: return FCMP_ORD;
-    case FCMP_TRUE: return FCMP_FALSE;
-    case FCMP_FALSE: return FCMP_TRUE;
-  }
-}
-
-ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
-  switch (pred) {
-    default: llvm_unreachable("Unknown icmp predicate!");
-    case ICMP_EQ: case ICMP_NE: 
-    case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: 
-       return pred;
-    case ICMP_UGT: return ICMP_SGT;
-    case ICMP_ULT: return ICMP_SLT;
-    case ICMP_UGE: return ICMP_SGE;
-    case ICMP_ULE: return ICMP_SLE;
-  }
-}
-
-ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
-  switch (pred) {
-    default: llvm_unreachable("Unknown icmp predicate!");
-    case ICMP_EQ: case ICMP_NE: 
-    case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: 
-       return pred;
-    case ICMP_SGT: return ICMP_UGT;
-    case ICMP_SLT: return ICMP_ULT;
-    case ICMP_SGE: return ICMP_UGE;
-    case ICMP_SLE: return ICMP_ULE;
-  }
-}
-
-/// Initialize a set of values that all satisfy the condition with C.
-///
-ConstantRange 
-ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
-  APInt Lower(C);
-  APInt Upper(C);
-  uint32_t BitWidth = C.getBitWidth();
-  switch (pred) {
-  default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
-  case ICmpInst::ICMP_EQ: Upper++; break;
-  case ICmpInst::ICMP_NE: Lower++; break;
-  case ICmpInst::ICMP_ULT:
-    Lower = APInt::getMinValue(BitWidth);
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_SLT:
-    Lower = APInt::getSignedMinValue(BitWidth);
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_UGT: 
-    Lower++; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_SGT:
-    Lower++; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_ULE: 
-    Lower = APInt::getMinValue(BitWidth); Upper++; 
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  case ICmpInst::ICMP_SLE: 
-    Lower = APInt::getSignedMinValue(BitWidth); Upper++; 
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  case ICmpInst::ICMP_UGE:
-    Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  case ICmpInst::ICMP_SGE:
-    Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  }
-  return ConstantRange(Lower, Upper);
-}
-
-CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
-  switch (pred) {
-    default: llvm_unreachable("Unknown cmp predicate!");
-    case ICMP_EQ: case ICMP_NE:
-      return pred;
-    case ICMP_SGT: return ICMP_SLT;
-    case ICMP_SLT: return ICMP_SGT;
-    case ICMP_SGE: return ICMP_SLE;
-    case ICMP_SLE: return ICMP_SGE;
-    case ICMP_UGT: return ICMP_ULT;
-    case ICMP_ULT: return ICMP_UGT;
-    case ICMP_UGE: return ICMP_ULE;
-    case ICMP_ULE: return ICMP_UGE;
-  
-    case FCMP_FALSE: case FCMP_TRUE:
-    case FCMP_OEQ: case FCMP_ONE:
-    case FCMP_UEQ: case FCMP_UNE:
-    case FCMP_ORD: case FCMP_UNO:
-      return pred;
-    case FCMP_OGT: return FCMP_OLT;
-    case FCMP_OLT: return FCMP_OGT;
-    case FCMP_OGE: return FCMP_OLE;
-    case FCMP_OLE: return FCMP_OGE;
-    case FCMP_UGT: return FCMP_ULT;
-    case FCMP_ULT: return FCMP_UGT;
-    case FCMP_UGE: return FCMP_ULE;
-    case FCMP_ULE: return FCMP_UGE;
-  }
-}
-
-bool CmpInst::isUnsigned(unsigned short predicate) {
-  switch (predicate) {
-    default: return false;
-    case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: 
-    case ICmpInst::ICMP_UGE: return true;
-  }
-}
-
-bool CmpInst::isSigned(unsigned short predicate) {
-  switch (predicate) {
-    default: return false;
-    case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: 
-    case ICmpInst::ICMP_SGE: return true;
-  }
-}
-
-bool CmpInst::isOrdered(unsigned short predicate) {
-  switch (predicate) {
-    default: return false;
-    case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: 
-    case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE: 
-    case FCmpInst::FCMP_ORD: return true;
-  }
-}
-      
-bool CmpInst::isUnordered(unsigned short predicate) {
-  switch (predicate) {
-    default: return false;
-    case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: 
-    case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE: 
-    case FCmpInst::FCMP_UNO: return true;
-  }
-}
-
-bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
-  switch(predicate) {
-    default: return false;
-    case ICMP_EQ:   case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
-    case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
-  }
-}
-
-bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
-  switch(predicate) {
-  case ICMP_NE:    case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
-  case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
-  default: return false;
-  }
-}
-
-
-//===----------------------------------------------------------------------===//
-//                        SwitchInst Implementation
-//===----------------------------------------------------------------------===//
-
-void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
-  assert(Value && Default && NumReserved);
-  ReservedSpace = NumReserved;
-  NumOperands = 2;
-  OperandList = allocHungoffUses(ReservedSpace);
-
-  OperandList[0] = Value;
-  OperandList[1] = Default;
-}
-
-/// SwitchInst ctor - Create a new switch instruction, specifying a value to
-/// switch on and a default destination.  The number of additional cases can
-/// be specified here to make memory allocation more efficient.  This
-/// constructor can also autoinsert before another instruction.
-SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
-                       Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
-                   0, 0, InsertBefore) {
-  init(Value, Default, 2+NumCases*2);
-}
-
-/// SwitchInst ctor - Create a new switch instruction, specifying a value to
-/// switch on and a default destination.  The number of additional cases can
-/// be specified here to make memory allocation more efficient.  This
-/// constructor also autoinserts at the end of the specified BasicBlock.
-SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
-                       BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
-                   0, 0, InsertAtEnd) {
-  init(Value, Default, 2+NumCases*2);
-}
-
-SwitchInst::SwitchInst(const SwitchInst &SI)
-  : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
-  init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
-  NumOperands = SI.getNumOperands();
-  Use *OL = OperandList, *InOL = SI.OperandList;
-  for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
-    OL[i] = InOL[i];
-    OL[i+1] = InOL[i+1];
-  }
-  TheSubsets = SI.TheSubsets;
-  SubclassOptionalData = SI.SubclassOptionalData;
-}
-
-SwitchInst::~SwitchInst() {
-  dropHungoffUses();
-}
-
-
-/// addCase - Add an entry to the switch instruction...
-///
-void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
-  IntegersSubsetToBB Mapping;
-  
-  // FIXME: Currently we work with ConstantInt based cases.
-  // So inititalize IntItem container directly from ConstantInt.
-  Mapping.add(IntItem::fromConstantInt(OnVal));
-  IntegersSubset CaseRanges = Mapping.getCase();
-  addCase(CaseRanges, Dest);
-}
-
-void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) {
-  unsigned NewCaseIdx = getNumCases(); 
-  unsigned OpNo = NumOperands;
-  if (OpNo+2 > ReservedSpace)
-    growOperands();  // Get more space!
-  // Initialize some new operands.
-  assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
-  NumOperands = OpNo+2;
-
-  SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal);
-  
-  CaseIt Case(this, NewCaseIdx, TheSubsetsIt);
-  Case.updateCaseValueOperand(OnVal);
-  Case.setSuccessor(Dest);
-}
-
-/// removeCase - This method removes the specified case and its successor
-/// from the switch instruction.
-void SwitchInst::removeCase(CaseIt& i) {
-  unsigned idx = i.getCaseIndex();
-  
-  assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
-
-  unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
-
-  // Overwrite this case with the end of the list.
-  if (2 + (idx + 1) * 2 != NumOps) {
-    OL[2 + idx * 2] = OL[NumOps - 2];
-    OL[2 + idx * 2 + 1] = OL[NumOps - 1];
-  }
-
-  // Nuke the last value.
-  OL[NumOps-2].set(0);
-  OL[NumOps-2+1].set(0);
-
-  // Do the same with TheCases collection:
-  if (i.SubsetIt != --TheSubsets.end()) {
-    *i.SubsetIt = TheSubsets.back();
-    TheSubsets.pop_back();
-  } else {
-    TheSubsets.pop_back();
-    i.SubsetIt = TheSubsets.end();
-  }
-  
-  NumOperands = NumOps-2;
-}
-
-/// growOperands - grow operands - This grows the operand list in response
-/// to a push_back style of operation.  This grows the number of ops by 3 times.
-///
-void SwitchInst::growOperands() {
-  unsigned e = getNumOperands();
-  unsigned NumOps = e*3;
-
-  ReservedSpace = NumOps;
-  Use *NewOps = allocHungoffUses(NumOps);
-  Use *OldOps = OperandList;
-  for (unsigned i = 0; i != e; ++i) {
-      NewOps[i] = OldOps[i];
-  }
-  OperandList = NewOps;
-  Use::zap(OldOps, OldOps + e, true);
-}
-
-
-BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-unsigned SwitchInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
-//===----------------------------------------------------------------------===//
-//                        IndirectBrInst Implementation
-//===----------------------------------------------------------------------===//
-
-void IndirectBrInst::init(Value *Address, unsigned NumDests) {
-  assert(Address && Address->getType()->isPointerTy() &&
-         "Address of indirectbr must be a pointer");
-  ReservedSpace = 1+NumDests;
-  NumOperands = 1;
-  OperandList = allocHungoffUses(ReservedSpace);
-  
-  OperandList[0] = Address;
-}
-
-
-/// growOperands - grow operands - This grows the operand list in response
-/// to a push_back style of operation.  This grows the number of ops by 2 times.
-///
-void IndirectBrInst::growOperands() {
-  unsigned e = getNumOperands();
-  unsigned NumOps = e*2;
-  
-  ReservedSpace = NumOps;
-  Use *NewOps = allocHungoffUses(NumOps);
-  Use *OldOps = OperandList;
-  for (unsigned i = 0; i != e; ++i)
-    NewOps[i] = OldOps[i];
-  OperandList = NewOps;
-  Use::zap(OldOps, OldOps + e, true);
-}
-
-IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
-                               Instruction *InsertBefore)
-: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
-                 0, 0, InsertBefore) {
-  init(Address, NumCases);
-}
-
-IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
-                               BasicBlock *InsertAtEnd)
-: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
-                 0, 0, InsertAtEnd) {
-  init(Address, NumCases);
-}
-
-IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
-  : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
-                   allocHungoffUses(IBI.getNumOperands()),
-                   IBI.getNumOperands()) {
-  Use *OL = OperandList, *InOL = IBI.OperandList;
-  for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
-    OL[i] = InOL[i];
-  SubclassOptionalData = IBI.SubclassOptionalData;
-}
-
-IndirectBrInst::~IndirectBrInst() {
-  dropHungoffUses();
-}
-
-/// addDestination - Add a destination.
-///
-void IndirectBrInst::addDestination(BasicBlock *DestBB) {
-  unsigned OpNo = NumOperands;
-  if (OpNo+1 > ReservedSpace)
-    growOperands();  // Get more space!
-  // Initialize some new operands.
-  assert(OpNo < ReservedSpace && "Growing didn't work!");
-  NumOperands = OpNo+1;
-  OperandList[OpNo] = DestBB;
-}
-
-/// removeDestination - This method removes the specified successor from the
-/// indirectbr instruction.
-void IndirectBrInst::removeDestination(unsigned idx) {
-  assert(idx < getNumOperands()-1 && "Successor index out of range!");
-  
-  unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
-
-  // Replace this value with the last one.
-  OL[idx+1] = OL[NumOps-1];
-  
-  // Nuke the last value.
-  OL[NumOps-1].set(0);
-  NumOperands = NumOps-1;
-}
-
-BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-unsigned IndirectBrInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
-//===----------------------------------------------------------------------===//
-//                           clone_impl() implementations
-//===----------------------------------------------------------------------===//
-
-// Define these methods here so vtables don't get emitted into every translation
-// unit that uses these classes.
-
-GetElementPtrInst *GetElementPtrInst::clone_impl() const {
-  return new (getNumOperands()) GetElementPtrInst(*this);
-}
-
-BinaryOperator *BinaryOperator::clone_impl() const {
-  return Create(getOpcode(), Op<0>(), Op<1>());
-}
-
-FCmpInst* FCmpInst::clone_impl() const {
-  return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
-}
-
-ICmpInst* ICmpInst::clone_impl() const {
-  return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
-}
-
-ExtractValueInst *ExtractValueInst::clone_impl() const {
-  return new ExtractValueInst(*this);
-}
-
-InsertValueInst *InsertValueInst::clone_impl() const {
-  return new InsertValueInst(*this);
-}
-
-AllocaInst *AllocaInst::clone_impl() const {
-  return new AllocaInst(getAllocatedType(),
-                        (Value*)getOperand(0),
-                        getAlignment());
-}
-
-LoadInst *LoadInst::clone_impl() const {
-  return new LoadInst(getOperand(0), Twine(), isVolatile(),
-                      getAlignment(), getOrdering(), getSynchScope());
-}
-
-StoreInst *StoreInst::clone_impl() const {
-  return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
-                       getAlignment(), getOrdering(), getSynchScope());
-  
-}
-
-AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
-  AtomicCmpXchgInst *Result =
-    new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
-                          getOrdering(), getSynchScope());
-  Result->setVolatile(isVolatile());
-  return Result;
-}
-
-AtomicRMWInst *AtomicRMWInst::clone_impl() const {
-  AtomicRMWInst *Result =
-    new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
-                      getOrdering(), getSynchScope());
-  Result->setVolatile(isVolatile());
-  return Result;
-}
-
-FenceInst *FenceInst::clone_impl() const {
-  return new FenceInst(getContext(), getOrdering(), getSynchScope());
-}
-
-TruncInst *TruncInst::clone_impl() const {
-  return new TruncInst(getOperand(0), getType());
-}
-
-ZExtInst *ZExtInst::clone_impl() const {
-  return new ZExtInst(getOperand(0), getType());
-}
-
-SExtInst *SExtInst::clone_impl() const {
-  return new SExtInst(getOperand(0), getType());
-}
-
-FPTruncInst *FPTruncInst::clone_impl() const {
-  return new FPTruncInst(getOperand(0), getType());
-}
-
-FPExtInst *FPExtInst::clone_impl() const {
-  return new FPExtInst(getOperand(0), getType());
-}
-
-UIToFPInst *UIToFPInst::clone_impl() const {
-  return new UIToFPInst(getOperand(0), getType());
-}
-
-SIToFPInst *SIToFPInst::clone_impl() const {
-  return new SIToFPInst(getOperand(0), getType());
-}
-
-FPToUIInst *FPToUIInst::clone_impl() const {
-  return new FPToUIInst(getOperand(0), getType());
-}
-
-FPToSIInst *FPToSIInst::clone_impl() const {
-  return new FPToSIInst(getOperand(0), getType());
-}
-
-PtrToIntInst *PtrToIntInst::clone_impl() const {
-  return new PtrToIntInst(getOperand(0), getType());
-}
-
-IntToPtrInst *IntToPtrInst::clone_impl() const {
-  return new IntToPtrInst(getOperand(0), getType());
-}
-
-BitCastInst *BitCastInst::clone_impl() const {
-  return new BitCastInst(getOperand(0), getType());
-}
-
-CallInst *CallInst::clone_impl() const {
-  return  new(getNumOperands()) CallInst(*this);
-}
-
-SelectInst *SelectInst::clone_impl() const {
-  return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
-}
-
-VAArgInst *VAArgInst::clone_impl() const {
-  return new VAArgInst(getOperand(0), getType());
-}
-
-ExtractElementInst *ExtractElementInst::clone_impl() const {
-  return ExtractElementInst::Create(getOperand(0), getOperand(1));
-}
-
-InsertElementInst *InsertElementInst::clone_impl() const {
-  return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2));
-}
-
-ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
-  return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2));
-}
-
-PHINode *PHINode::clone_impl() const {
-  return new PHINode(*this);
-}
-
-LandingPadInst *LandingPadInst::clone_impl() const {
-  return new LandingPadInst(*this);
-}
-
-ReturnInst *ReturnInst::clone_impl() const {
-  return new(getNumOperands()) ReturnInst(*this);
-}
-
-BranchInst *BranchInst::clone_impl() const {
-  return new(getNumOperands()) BranchInst(*this);
-}
-
-SwitchInst *SwitchInst::clone_impl() const {
-  return new SwitchInst(*this);
-}
-
-IndirectBrInst *IndirectBrInst::clone_impl() const {
-  return new IndirectBrInst(*this);
-}
-
-
-InvokeInst *InvokeInst::clone_impl() const {
-  return new(getNumOperands()) InvokeInst(*this);
-}
-
-ResumeInst *ResumeInst::clone_impl() const {
-  return new(1) ResumeInst(*this);
-}
-
-UnreachableInst *UnreachableInst::clone_impl() const {
-  LLVMContext &Context = getContext();
-  return new UnreachableInst(Context);
-}
diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp
deleted file mode 100644
index ac8ec2086b18..000000000000
--- a/lib/VMCore/IntrinsicInst.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements methods that make it really easy to deal with intrinsic
-// functions.
-//
-// All intrinsic function calls are instances of the call instruction, so these
-// are all subclasses of the CallInst class.  Note that none of these classes
-// has state or virtual methods, which is an important part of this gross/neat
-// hack working.
-// 
-// In some cases, arguments to intrinsics need to be generic and are defined as
-// type pointer to empty struct { }*.  To access the real item of interest the
-// cast instruction needs to be stripped away. 
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Metadata.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
-///
-
-static Value *CastOperand(Value *C) {
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (CE->isCast())
-      return CE->getOperand(0);
-  return NULL;
-}
-
-Value *DbgInfoIntrinsic::StripCast(Value *C) {
-  if (Value *CO = CastOperand(C)) {
-    C = StripCast(CO);
-  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (GV->hasInitializer())
-      if (Value *CO = CastOperand(GV->getInitializer()))
-        C = StripCast(CO);
-  }
-  return dyn_cast<GlobalVariable>(C);
-}
-
-//===----------------------------------------------------------------------===//
-/// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
-///
-
-Value *DbgDeclareInst::getAddress() const {
-  if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
-    return MD->getOperand(0);
-  else
-    return NULL;
-}
-
-//===----------------------------------------------------------------------===//
-/// DbgValueInst - This represents the llvm.dbg.value instruction.
-///
-
-const Value *DbgValueInst::getValue() const {
-  return cast<MDNode>(getArgOperand(0))->getOperand(0);
-}
-
-Value *DbgValueInst::getValue() {
-  return cast<MDNode>(getArgOperand(0))->getOperand(0);
-}
diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt
deleted file mode 100644
index bca8b2c97e95..000000000000
--- a/lib/VMCore/LLVMBuild.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-;===- ./lib/VMCore/LLVMBuild.txt -------------------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = Core
-parent = Libraries
-required_libraries = Support
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
deleted file mode 100644
index 2446ec996d04..000000000000
--- a/lib/VMCore/LLVMContext.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- LLVMContext.cpp - Implement LLVMContext -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file implements LLVMContext, as a wrapper around the opaque
-//  class LLVMContextImpl.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Constants.h"
-#include "llvm/Instruction.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/SourceMgr.h"
-#include "LLVMContextImpl.h"
-#include <cctype>
-using namespace llvm;
-
-static ManagedStatic<LLVMContext> GlobalContext;
-
-LLVMContext& llvm::getGlobalContext() {
-  return *GlobalContext;
-}
-
-LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
-  // Create the fixed metadata kinds. This is done in the same order as the
-  // MD_* enum values so that they correspond.
-
-  // Create the 'dbg' metadata kind.
-  unsigned DbgID = getMDKindID("dbg");
-  assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
-
-  // Create the 'tbaa' metadata kind.
-  unsigned TBAAID = getMDKindID("tbaa");
-  assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
-
-  // Create the 'prof' metadata kind.
-  unsigned ProfID = getMDKindID("prof");
-  assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
-
-  // Create the 'fpmath' metadata kind.
-  unsigned FPAccuracyID = getMDKindID("fpmath");
-  assert(FPAccuracyID == MD_fpmath && "fpmath kind id drifted");
-  (void)FPAccuracyID;
-
-  // Create the 'range' metadata kind.
-  unsigned RangeID = getMDKindID("range");
-  assert(RangeID == MD_range && "range kind id drifted");
-  (void)RangeID;
-
-  // Create the 'tbaa.struct' metadata kind.
-  unsigned TBAAStructID = getMDKindID("tbaa.struct");
-  assert(TBAAStructID == MD_tbaa_struct && "tbaa.struct kind id drifted");
-  (void)TBAAStructID;
-}
-LLVMContext::~LLVMContext() { delete pImpl; }
-
-void LLVMContext::addModule(Module *M) {
-  pImpl->OwnedModules.insert(M);
-}
-
-void LLVMContext::removeModule(Module *M) {
-  pImpl->OwnedModules.erase(M);
-}
-
-//===----------------------------------------------------------------------===//
-// Recoverable Backend Errors
-//===----------------------------------------------------------------------===//
-
-void LLVMContext::
-setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
-                              void *DiagContext) {
-  pImpl->InlineAsmDiagHandler = DiagHandler;
-  pImpl->InlineAsmDiagContext = DiagContext;
-}
-
-/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
-/// setInlineAsmDiagnosticHandler.
-LLVMContext::InlineAsmDiagHandlerTy
-LLVMContext::getInlineAsmDiagnosticHandler() const {
-  return pImpl->InlineAsmDiagHandler;
-}
-
-/// getInlineAsmDiagnosticContext - Return the diagnostic context set by
-/// setInlineAsmDiagnosticHandler.
-void *LLVMContext::getInlineAsmDiagnosticContext() const {
-  return pImpl->InlineAsmDiagContext;
-}
-
-void LLVMContext::emitError(const Twine &ErrorStr) {
-  emitError(0U, ErrorStr);
-}
-
-void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
-  unsigned LocCookie = 0;
-  if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
-    if (SrcLoc->getNumOperands() != 0)
-      if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
-        LocCookie = CI->getZExtValue();
-  }
-  return emitError(LocCookie, ErrorStr);
-}
-
-void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
-  // If there is no error handler installed, just print the error and exit.
-  if (pImpl->InlineAsmDiagHandler == 0) {
-    errs() << "error: " << ErrorStr << "\n";
-    exit(1);
-  }
-
-  // If we do have an error handler, we can report the error and keep going.
-  SMDiagnostic Diag("", SourceMgr::DK_Error, ErrorStr.str());
-
-  pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
-}
-
-//===----------------------------------------------------------------------===//
-// Metadata Kind Uniquing
-//===----------------------------------------------------------------------===//
-
-#ifndef NDEBUG
-/// isValidName - Return true if Name is a valid custom metadata handler name.
-static bool isValidName(StringRef MDName) {
-  if (MDName.empty())
-    return false;
-
-  if (!std::isalpha(MDName[0]))
-    return false;
-
-  for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
-       ++I) {
-    if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
-      return false;
-  }
-  return true;
-}
-#endif
-
-/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
-unsigned LLVMContext::getMDKindID(StringRef Name) const {
-  assert(isValidName(Name) && "Invalid MDNode name");
-
-  // If this is new, assign it its ID.
-  return
-    pImpl->CustomMDKindNames.GetOrCreateValue(
-      Name, pImpl->CustomMDKindNames.size()).second;
-}
-
-/// getHandlerNames - Populate client supplied smallvector using custome
-/// metadata name and ID.
-void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
-  Names.resize(pImpl->CustomMDKindNames.size());
-  for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
-       E = pImpl->CustomMDKindNames.end(); I != E; ++I)
-    Names[I->second] = I->first();
-}
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
deleted file mode 100644
index d35d2844b89b..000000000000
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file implements the opaque LLVMContextImpl.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMContextImpl.h"
-#include "llvm/Attributes.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-using namespace llvm;
-
-LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
-  : TheTrueVal(0), TheFalseVal(0),
-    VoidTy(C, Type::VoidTyID),
-    LabelTy(C, Type::LabelTyID),
-    HalfTy(C, Type::HalfTyID),
-    FloatTy(C, Type::FloatTyID),
-    DoubleTy(C, Type::DoubleTyID),
-    MetadataTy(C, Type::MetadataTyID),
-    X86_FP80Ty(C, Type::X86_FP80TyID),
-    FP128Ty(C, Type::FP128TyID),
-    PPC_FP128Ty(C, Type::PPC_FP128TyID),
-    X86_MMXTy(C, Type::X86_MMXTyID),
-    Int1Ty(C, 1),
-    Int8Ty(C, 8),
-    Int16Ty(C, 16),
-    Int32Ty(C, 32),
-    Int64Ty(C, 64) {
-  InlineAsmDiagHandler = 0;
-  InlineAsmDiagContext = 0;
-  NamedStructTypesUniqueID = 0;
-}
-
-namespace {
-struct DropReferences {
-  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
-  // is a Constant*.
-  template<typename PairT>
-  void operator()(const PairT &P) {
-    P.second->dropAllReferences();
-  }
-};
-
-// Temporary - drops pair.first instead of second.
-struct DropFirst {
-  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
-  // is a Constant*.
-  template<typename PairT>
-  void operator()(const PairT &P) {
-    P.first->dropAllReferences();
-  }
-};
-}
-
-LLVMContextImpl::~LLVMContextImpl() {
-  // NOTE: We need to delete the contents of OwnedModules, but we have to
-  // duplicate it into a temporary vector, because the destructor of Module
-  // will try to remove itself from OwnedModules set.  This would cause
-  // iterator invalidation if we iterated on the set directly.
-  std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
-  DeleteContainerPointers(Modules);
-  
-  // Free the constants.  This is important to do here to ensure that they are
-  // freed before the LeakDetector is torn down.
-  std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
-                DropReferences());
-  std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
-                DropFirst());
-  std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
-                DropFirst());
-  std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
-                DropFirst());
-  ExprConstants.freeConstants();
-  ArrayConstants.freeConstants();
-  StructConstants.freeConstants();
-  VectorConstants.freeConstants();
-  DeleteContainerSeconds(CAZConstants);
-  DeleteContainerSeconds(CPNConstants);
-  DeleteContainerSeconds(UVConstants);
-  InlineAsms.freeConstants();
-  DeleteContainerSeconds(IntConstants);
-  DeleteContainerSeconds(FPConstants);
-  
-  for (StringMap<ConstantDataSequential*>::iterator I = CDSConstants.begin(),
-       E = CDSConstants.end(); I != E; ++I)
-    delete I->second;
-  CDSConstants.clear();
-
-  // Destroy attributes.
-  for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
-         E = AttrsSet.end(); I != E; ) {
-    FoldingSetIterator<AttributesImpl> Elem = I++;
-    delete &*Elem;
-  }
-
-  // Destroy attribute lists.
-  for (FoldingSetIterator<AttributeListImpl> I = AttrsLists.begin(),
-         E = AttrsLists.end(); I != E; ) {
-    FoldingSetIterator<AttributeListImpl> Elem = I++;
-    delete &*Elem;
-  }
-
-  // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
-  // and the NonUniquedMDNodes sets, so copy the values out first.
-  SmallVector<MDNode*, 8> MDNodes;
-  MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
-  for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
-       I != E; ++I)
-    MDNodes.push_back(&*I);
-  MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
-  for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
-         E = MDNodes.end(); I != E; ++I)
-    (*I)->destroy();
-  assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
-         "Destroying all MDNodes didn't empty the Context's sets.");
-
-  // Destroy MDStrings.
-  DeleteContainerSeconds(MDStringCache);
-}
-
-// ConstantsContext anchors
-void UnaryConstantExpr::anchor() { }
-
-void BinaryConstantExpr::anchor() { }
-
-void SelectConstantExpr::anchor() { }
-
-void ExtractElementConstantExpr::anchor() { }
-
-void InsertElementConstantExpr::anchor() { }
-
-void ShuffleVectorConstantExpr::anchor() { }
-
-void ExtractValueConstantExpr::anchor() { }
-
-void InsertValueConstantExpr::anchor() { }
-
-void GetElementPtrConstantExpr::anchor() { }
-
-void CompareConstantExpr::anchor() { }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
deleted file mode 100644
index 90cf424a3c92..000000000000
--- a/lib/VMCore/LLVMContextImpl.h
+++ /dev/null
@@ -1,369 +0,0 @@
-//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file declares LLVMContextImpl, the opaque implementation 
-//  of LLVMContext.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LLVMCONTEXT_IMPL_H
-#define LLVM_LLVMCONTEXT_IMPL_H
-
-#include "llvm/LLVMContext.h"
-#include "AttributesImpl.h"
-#include "ConstantsContext.h"
-#include "LeaksContext.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Metadata.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Hashing.h"
-#include <vector>
-
-namespace llvm {
-
-class ConstantInt;
-class ConstantFP;
-class LLVMContext;
-class Type;
-class Value;
-
-struct DenseMapAPIntKeyInfo {
-  struct KeyTy {
-    APInt val;
-    Type* type;
-    KeyTy(const APInt& V, Type* Ty) : val(V), type(Ty) {}
-    KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
-    bool operator==(const KeyTy& that) const {
-      return type == that.type && this->val == that.val;
-    }
-    bool operator!=(const KeyTy& that) const {
-      return !this->operator==(that);
-    }
-    friend hash_code hash_value(const KeyTy &Key) {
-      return hash_combine(Key.type, Key.val);
-    }
-  };
-  static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
-  static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
-  static unsigned getHashValue(const KeyTy &Key) {
-    return static_cast<unsigned>(hash_value(Key));
-  }
-  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
-    return LHS == RHS;
-  }
-};
-
-struct DenseMapAPFloatKeyInfo {
-  struct KeyTy {
-    APFloat val;
-    KeyTy(const APFloat& V) : val(V){}
-    KeyTy(const KeyTy& that) : val(that.val) {}
-    bool operator==(const KeyTy& that) const {
-      return this->val.bitwiseIsEqual(that.val);
-    }
-    bool operator!=(const KeyTy& that) const {
-      return !this->operator==(that);
-    }
-    friend hash_code hash_value(const KeyTy &Key) {
-      return hash_combine(Key.val);
-    }
-  };
-  static inline KeyTy getEmptyKey() { 
-    return KeyTy(APFloat(APFloat::Bogus,1));
-  }
-  static inline KeyTy getTombstoneKey() { 
-    return KeyTy(APFloat(APFloat::Bogus,2)); 
-  }
-  static unsigned getHashValue(const KeyTy &Key) {
-    return static_cast<unsigned>(hash_value(Key));
-  }
-  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
-    return LHS == RHS;
-  }
-};
-
-struct AnonStructTypeKeyInfo {
-  struct KeyTy {
-    ArrayRef<Type*> ETypes;
-    bool isPacked;
-    KeyTy(const ArrayRef<Type*>& E, bool P) :
-      ETypes(E), isPacked(P) {}
-    KeyTy(const KeyTy& that) :
-      ETypes(that.ETypes), isPacked(that.isPacked) {}
-    KeyTy(const StructType* ST) :
-      ETypes(ArrayRef<Type*>(ST->element_begin(), ST->element_end())),
-      isPacked(ST->isPacked()) {}
-    bool operator==(const KeyTy& that) const {
-      if (isPacked != that.isPacked)
-        return false;
-      if (ETypes != that.ETypes)
-        return false;
-      return true;
-    }
-    bool operator!=(const KeyTy& that) const {
-      return !this->operator==(that);
-    }
-  };
-  static inline StructType* getEmptyKey() {
-    return DenseMapInfo<StructType*>::getEmptyKey();
-  }
-  static inline StructType* getTombstoneKey() {
-    return DenseMapInfo<StructType*>::getTombstoneKey();
-  }
-  static unsigned getHashValue(const KeyTy& Key) {
-    return hash_combine(hash_combine_range(Key.ETypes.begin(),
-                                           Key.ETypes.end()),
-                        Key.isPacked);
-  }
-  static unsigned getHashValue(const StructType *ST) {
-    return getHashValue(KeyTy(ST));
-  }
-  static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
-    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
-      return false;
-    return LHS == KeyTy(RHS);
-  }
-  static bool isEqual(const StructType *LHS, const StructType *RHS) {
-    return LHS == RHS;
-  }
-};
-
-struct FunctionTypeKeyInfo {
-  struct KeyTy {
-    const Type *ReturnType;
-    ArrayRef<Type*> Params;
-    bool isVarArg;
-    KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
-      ReturnType(R), Params(P), isVarArg(V) {}
-    KeyTy(const KeyTy& that) :
-      ReturnType(that.ReturnType),
-      Params(that.Params),
-      isVarArg(that.isVarArg) {}
-    KeyTy(const FunctionType* FT) :
-      ReturnType(FT->getReturnType()),
-      Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
-      isVarArg(FT->isVarArg()) {}
-    bool operator==(const KeyTy& that) const {
-      if (ReturnType != that.ReturnType)
-        return false;
-      if (isVarArg != that.isVarArg)
-        return false;
-      if (Params != that.Params)
-        return false;
-      return true;
-    }
-    bool operator!=(const KeyTy& that) const {
-      return !this->operator==(that);
-    }
-  };
-  static inline FunctionType* getEmptyKey() {
-    return DenseMapInfo<FunctionType*>::getEmptyKey();
-  }
-  static inline FunctionType* getTombstoneKey() {
-    return DenseMapInfo<FunctionType*>::getTombstoneKey();
-  }
-  static unsigned getHashValue(const KeyTy& Key) {
-    return hash_combine(Key.ReturnType,
-                        hash_combine_range(Key.Params.begin(),
-                                           Key.Params.end()),
-                        Key.isVarArg);
-  }
-  static unsigned getHashValue(const FunctionType *FT) {
-    return getHashValue(KeyTy(FT));
-  }
-  static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
-    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
-      return false;
-    return LHS == KeyTy(RHS);
-  }
-  static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) {
-    return LHS == RHS;
-  }
-};
-
-// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a
-// shortcut to avoid comparing all operands.
-template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> {
-  static bool Equals(const MDNode &X, const FoldingSetNodeID &ID,
-                     unsigned IDHash, FoldingSetNodeID &TempID) {
-    assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?");
-    // First, check if the cached hashes match.  If they don't we can skip the
-    // expensive operand walk.
-    if (X.Hash != IDHash)
-      return false;
-
-    // If they match we have to compare the operands.
-    X.Profile(TempID);
-    return TempID == ID;
-  }
-  static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) {
-    return X.Hash; // Return cached hash.
-  }
-};
-
-/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
-/// up to date as MDNodes mutate.  This class is implemented in DebugLoc.cpp.
-class DebugRecVH : public CallbackVH {
-  /// Ctx - This is the LLVM Context being referenced.
-  LLVMContextImpl *Ctx;
-  
-  /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
-  /// this reference lives in.  If this is zero, then it represents a
-  /// non-canonical entry that has no DenseMap value.  This can happen due to
-  /// RAUW.
-  int Idx;
-public:
-  DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
-    : CallbackVH(n), Ctx(ctx), Idx(idx) {}
-  
-  MDNode *get() const {
-    return cast_or_null<MDNode>(getValPtr());
-  }
-  
-  virtual void deleted();
-  virtual void allUsesReplacedWith(Value *VNew);
-};
-  
-class LLVMContextImpl {
-public:
-  /// OwnedModules - The set of modules instantiated in this context, and which
-  /// will be automatically deleted if this context is deleted.
-  SmallPtrSet<Module*, 4> OwnedModules;
-  
-  LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
-  void *InlineAsmDiagContext;
-  
-  typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
-                         DenseMapAPIntKeyInfo> IntMapTy;
-  IntMapTy IntConstants;
-  
-  typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, 
-                         DenseMapAPFloatKeyInfo> FPMapTy;
-  FPMapTy FPConstants;
-
-  FoldingSet<AttributesImpl> AttrsSet;
-  FoldingSet<AttributeListImpl> AttrsLists;
-
-  StringMap<Value*> MDStringCache;
-
-  FoldingSet<MDNode> MDNodeSet;
-
-  // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
-  // aren't in the MDNodeSet, but they're still shared between objects, so no
-  // one object can destroy them.  This set allows us to at least destroy them
-  // on Context destruction.
-  SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
-  
-  DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
-
-  typedef ConstantAggrUniqueMap<ArrayType, ConstantArray> ArrayConstantsTy;
-  ArrayConstantsTy ArrayConstants;
-  
-  typedef ConstantAggrUniqueMap<StructType, ConstantStruct> StructConstantsTy;
-  StructConstantsTy StructConstants;
-  
-  typedef ConstantAggrUniqueMap<VectorType, ConstantVector> VectorConstantsTy;
-  VectorConstantsTy VectorConstants;
-  
-  DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
-
-  DenseMap<Type*, UndefValue*> UVConstants;
-  
-  StringMap<ConstantDataSequential*> CDSConstants;
-
-  
-  DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
-  ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
-    ExprConstants;
-
-  ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
-                    InlineAsm> InlineAsms;
-  
-  ConstantInt *TheTrueVal;
-  ConstantInt *TheFalseVal;
-  
-  LeakDetectorImpl<Value> LLVMObjects;
-  
-  // Basic type instances.
-  Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
-  Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
-  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
-
-  
-  /// TypeAllocator - All dynamically allocated types are allocated from this.
-  /// They live forever until the context is torn down.
-  BumpPtrAllocator TypeAllocator;
-  
-  DenseMap<unsigned, IntegerType*> IntegerTypes;
-  
-  typedef DenseMap<FunctionType*, bool, FunctionTypeKeyInfo> FunctionTypeMap;
-  FunctionTypeMap FunctionTypes;
-  typedef DenseMap<StructType*, bool, AnonStructTypeKeyInfo> StructTypeMap;
-  StructTypeMap AnonStructTypes;
-  StringMap<StructType*> NamedStructTypes;
-  unsigned NamedStructTypesUniqueID;
-    
-  DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
-  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
-  DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
-  DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
-
-
-  /// ValueHandles - This map keeps track of all of the value handles that are
-  /// watching a Value*.  The Value::HasValueHandle bit is used to know
-  // whether or not a value has an entry in this map.
-  typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
-  ValueHandlesTy ValueHandles;
-  
-  /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
-  StringMap<unsigned> CustomMDKindNames;
-  
-  typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy;
-  typedef SmallVector<MDPairTy, 2> MDMapTy;
-
-  /// MetadataStore - Collection of per-instruction metadata used in this
-  /// context.
-  DenseMap<const Instruction *, MDMapTy> MetadataStore;
-  
-  /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
-  /// entry with no "inlined at" element.
-  DenseMap<MDNode*, int> ScopeRecordIdx;
-  
-  /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
-  /// index.  The ValueHandle ensures that ScopeRecordIdx stays up to date if
-  /// the MDNode is RAUW'd.
-  std::vector<DebugRecVH> ScopeRecords;
-  
-  /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
-  /// scope/inlined-at pair.
-  DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
-  
-  /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
-  /// for an index.  The ValueHandle ensures that ScopeINlinedAtIdx stays up
-  /// to date.
-  std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
-  
-  int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
-  int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
-  
-  LLVMContextImpl(LLVMContext &C);
-  ~LLVMContextImpl();
-};
-
-}
-
-#endif
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
deleted file mode 100644
index f6651e93e273..000000000000
--- a/lib/VMCore/LeakDetector.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LeakDetector class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMContextImpl.h"
-#include "llvm/Support/LeakDetector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/Threading.h"
-#include "llvm/Value.h"
-using namespace llvm;
-
-static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
-static ManagedStatic<LeakDetectorImpl<void> > Objects;
-
-static void clearGarbage(LLVMContext &Context) {
-  Objects->clear();
-  Context.pImpl->LLVMObjects.clear();
-}
-
-void LeakDetector::addGarbageObjectImpl(void *Object) {
-  sys::SmartScopedLock<true> Lock(*ObjectsLock);
-  Objects->addGarbage(Object);
-}
-
-void LeakDetector::addGarbageObjectImpl(const Value *Object) {
-  LLVMContextImpl *pImpl = Object->getContext().pImpl;
-  pImpl->LLVMObjects.addGarbage(Object);
-}
-
-void LeakDetector::removeGarbageObjectImpl(void *Object) {
-  sys::SmartScopedLock<true> Lock(*ObjectsLock);
-  Objects->removeGarbage(Object);
-}
-
-void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
-  LLVMContextImpl *pImpl = Object->getContext().pImpl;
-  pImpl->LLVMObjects.removeGarbage(Object);
-}
-
-void LeakDetector::checkForGarbageImpl(LLVMContext &Context, 
-                                       const std::string &Message) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-  sys::SmartScopedLock<true> Lock(*ObjectsLock);
-  
-  Objects->setName("GENERIC");
-  pImpl->LLVMObjects.setName("LLVM");
-  
-  // use non-short-circuit version so that both checks are performed
-  if (Objects->hasGarbage(Message) |
-      pImpl->LLVMObjects.hasGarbage(Message))
-    errs() << "\nThis is probably because you removed an object, but didn't "
-           << "delete it.  Please check your code for memory leaks.\n";
-
-  // Clear out results so we don't get duplicate warnings on
-  // next call...
-  clearGarbage(Context);
-}
diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h
deleted file mode 100644
index b9e59d46b7ad..000000000000
--- a/lib/VMCore/LeaksContext.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines various helper methods and classes used by
-// LLVMContextImpl for leaks detectors.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Value.h"
-#include "llvm/ADT/SmallPtrSet.h"
-
-namespace llvm {
-
-template <class T>
-struct PrinterTrait {
-  static void print(const T* P) { errs() << P; }
-};
-
-template<>
-struct PrinterTrait<Value> {
-  static void print(const Value* P) { errs() << *P; }
-};
-
-template <typename T>
-struct LeakDetectorImpl {
-  explicit LeakDetectorImpl(const char* const name = "") : 
-    Cache(0), Name(name) { }
-
-  void clear() {
-    Cache = 0;
-    Ts.clear();
-  }
-    
-  void setName(const char* n) { 
-    Name = n;
-  }
-    
-  // Because the most common usage pattern, by far, is to add a
-  // garbage object, then remove it immediately, we optimize this
-  // case.  When an object is added, it is not added to the set
-  // immediately, it is added to the CachedValue Value.  If it is
-  // immediately removed, no set search need be performed.
-  void addGarbage(const T* o) {
-    assert(Ts.count(o) == 0 && "Object already in set!");
-    if (Cache) {
-      assert(Cache != o && "Object already in set!");
-      Ts.insert(Cache);
-    }
-    Cache = o;
-  }
-
-  void removeGarbage(const T* o) {
-    if (o == Cache)
-      Cache = 0; // Cache hit
-    else
-      Ts.erase(o);
-  }
-
-  bool hasGarbage(const std::string& Message) {
-    addGarbage(0); // Flush the Cache
-
-    assert(Cache == 0 && "No value should be cached anymore!");
-
-    if (!Ts.empty()) {
-      errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
-      for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
-           E = Ts.end(); I != E; ++I) {
-        errs() << '\t';
-        PrinterTrait<T>::print(*I);
-        errs() << '\n';
-      }
-      errs() << '\n';
-
-      return true;
-    }
-    
-    return false;
-  }
-
-private:
-  SmallPtrSet<const T*, 8> Ts;
-  const T* Cache;
-  const char* Name;
-};
-
-}
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
deleted file mode 100644
index 8b9865152e24..000000000000
--- a/lib/VMCore/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-##===- lib/VMCore/Makefile ---------------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../..
-LIBRARYNAME = LLVMCore
-BUILD_ARCHIVE = 1
-
-BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
-
-include $(LEVEL)/Makefile.common
-
-GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
-
-INTRINSICTD  := $(PROJ_SRC_ROOT)/include/llvm/Intrinsics.td
-INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/Intrinsics*.td)
-
-$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(LLVM_TBLGEN)
-	$(Echo) Building Intrinsics.gen.tmp from Intrinsics.td
-	$(Verb) $(LLVMTableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic
-
-$(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
-	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
-	  $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \
-	    changed significantly. )
-
-install-local:: $(GENFILE)
-	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
-	$(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
deleted file mode 100644
index 95e5a8b2f969..000000000000
--- a/lib/VMCore/Metadata.cpp
+++ /dev/null
@@ -1,744 +0,0 @@
-//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Metadata classes.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Metadata.h"
-#include "LLVMContextImpl.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Instruction.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "SymbolTableListTraitsImpl.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/LeakDetector.h"
-#include "llvm/Support/ValueHandle.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// MDString implementation.
-//
-
-void MDString::anchor() { }
-
-MDString::MDString(LLVMContext &C)
-  : Value(Type::getMetadataTy(C), Value::MDStringVal) {}
-
-MDString *MDString::get(LLVMContext &Context, StringRef Str) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-  StringMapEntry<Value*> &Entry =
-    pImpl->MDStringCache.GetOrCreateValue(Str);
-  Value *&S = Entry.getValue();
-  if (!S) S = new MDString(Context);
-  S->setValueName(&Entry);
-  return cast<MDString>(S);
-}
-
-//===----------------------------------------------------------------------===//
-// MDNodeOperand implementation.
-//
-
-// Use CallbackVH to hold MDNode operands.
-namespace llvm {
-class MDNodeOperand : public CallbackVH {
-  MDNode *getParent() {
-    MDNodeOperand *Cur = this;
-
-    while (Cur->getValPtrInt() != 1)
-      --Cur;
-
-    assert(Cur->getValPtrInt() == 1 &&
-           "Couldn't find the beginning of the operand list!");
-    return reinterpret_cast<MDNode*>(Cur) - 1;
-  }
-
-public:
-  MDNodeOperand(Value *V) : CallbackVH(V) {}
-  ~MDNodeOperand() {}
-
-  void set(Value *V) {
-    unsigned IsFirst = this->getValPtrInt();
-    this->setValPtr(V);
-    this->setAsFirstOperand(IsFirst);
-  }
-
-  /// setAsFirstOperand - Accessor method to mark the operand as the first in
-  /// the list.
-  void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); }
-
-  virtual void deleted();
-  virtual void allUsesReplacedWith(Value *NV);
-};
-} // end namespace llvm.
-
-
-void MDNodeOperand::deleted() {
-  getParent()->replaceOperand(this, 0);
-}
-
-void MDNodeOperand::allUsesReplacedWith(Value *NV) {
-  getParent()->replaceOperand(this, NV);
-}
-
-//===----------------------------------------------------------------------===//
-// MDNode implementation.
-//
-
-/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
-/// the end of the MDNode.
-static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
-  // Use <= instead of < to permit a one-past-the-end address.
-  assert(Op <= N->getNumOperands() && "Invalid operand number");
-  return reinterpret_cast<MDNodeOperand*>(N + 1) + Op;
-}
-
-void MDNode::replaceOperandWith(unsigned i, Value *Val) {
-  MDNodeOperand *Op = getOperandPtr(this, i);
-  replaceOperand(Op, Val);
-}
-
-MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
-: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
-  NumOperands = Vals.size();
-
-  if (isFunctionLocal)
-    setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
-
-  // Initialize the operand list, which is co-allocated on the end of the node.
-  unsigned i = 0;
-  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
-       Op != E; ++Op, ++i) {
-    new (Op) MDNodeOperand(Vals[i]);
-
-    // Mark the first MDNodeOperand as being the first in the list of operands.
-    if (i == 0)
-      Op->setAsFirstOperand(1);
-  }
-}
-
-/// ~MDNode - Destroy MDNode.
-MDNode::~MDNode() {
-  assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
-         "Not being destroyed through destroy()?");
-  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
-  if (isNotUniqued()) {
-    pImpl->NonUniquedMDNodes.erase(this);
-  } else {
-    pImpl->MDNodeSet.RemoveNode(this);
-  }
-
-  // Destroy the operands.
-  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
-       Op != E; ++Op)
-    Op->~MDNodeOperand();
-}
-
-static const Function *getFunctionForValue(Value *V) {
-  if (!V) return NULL;
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
-    BasicBlock *BB = I->getParent();
-    return BB ? BB->getParent() : 0;
-  }
-  if (Argument *A = dyn_cast<Argument>(V))
-    return A->getParent();
-  if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
-    return BB->getParent();
-  if (MDNode *MD = dyn_cast<MDNode>(V))
-    return MD->getFunction();
-  return NULL;
-}
-
-#ifndef NDEBUG
-static const Function *assertLocalFunction(const MDNode *N) {
-  if (!N->isFunctionLocal()) return 0;
-
-  // FIXME: This does not handle cyclic function local metadata.
-  const Function *F = 0, *NewF = 0;
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    if (Value *V = N->getOperand(i)) {
-      if (MDNode *MD = dyn_cast<MDNode>(V))
-        NewF = assertLocalFunction(MD);
-      else
-        NewF = getFunctionForValue(V);
-    }
-    if (F == 0)
-      F = NewF;
-    else 
-      assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
-  }
-  return F;
-}
-#endif
-
-// getFunction - If this metadata is function-local and recursively has a
-// function-local operand, return the first such operand's parent function.
-// Otherwise, return null. getFunction() should not be used for performance-
-// critical code because it recursively visits all the MDNode's operands.  
-const Function *MDNode::getFunction() const {
-#ifndef NDEBUG
-  return assertLocalFunction(this);
-#else
-  if (!isFunctionLocal()) return NULL;
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (const Function *F = getFunctionForValue(getOperand(i)))
-      return F;
-  return NULL;
-#endif
-}
-
-// destroy - Delete this node.  Only when there are no uses.
-void MDNode::destroy() {
-  setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
-  // Placement delete, then free the memory.
-  this->~MDNode();
-  free(this);
-}
-
-/// isFunctionLocalValue - Return true if this is a value that would require a
-/// function-local MDNode.
-static bool isFunctionLocalValue(Value *V) {
-  return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
-         (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
-}
-
-MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
-                          FunctionLocalness FL, bool Insert) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-
-  // Add all the operand pointers. Note that we don't have to add the
-  // isFunctionLocal bit because that's implied by the operands.
-  // Note that if the operands are later nulled out, the node will be
-  // removed from the uniquing map.
-  FoldingSetNodeID ID;
-  for (unsigned i = 0; i != Vals.size(); ++i)
-    ID.AddPointer(Vals[i]);
-
-  void *InsertPoint;
-  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
-
-  if (N || !Insert)
-    return N;
-
-  bool isFunctionLocal = false;
-  switch (FL) {
-  case FL_Unknown:
-    for (unsigned i = 0; i != Vals.size(); ++i) {
-      Value *V = Vals[i];
-      if (!V) continue;
-      if (isFunctionLocalValue(V)) {
-        isFunctionLocal = true;
-        break;
-      }
-    }
-    break;
-  case FL_No:
-    isFunctionLocal = false;
-    break;
-  case FL_Yes:
-    isFunctionLocal = true;
-    break;
-  }
-
-  // Coallocate space for the node and Operands together, then placement new.
-  void *Ptr = malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
-  N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
-
-  // Cache the operand hash.
-  N->Hash = ID.ComputeHash();
-
-  // InsertPoint will have been set by the FindNodeOrInsertPos call.
-  pImpl->MDNodeSet.InsertNode(N, InsertPoint);
-
-  return N;
-}
-
-MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
-  return getMDNode(Context, Vals, FL_Unknown);
-}
-
-MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context,
-                                      ArrayRef<Value*> Vals,
-                                      bool isFunctionLocal) {
-  return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No);
-}
-
-MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) {
-  return getMDNode(Context, Vals, FL_Unknown, false);
-}
-
-MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) {
-  MDNode *N =
-    (MDNode *)malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand));
-  N = new (N) MDNode(Context, Vals, FL_No);
-  N->setValueSubclassData(N->getSubclassDataFromValue() |
-                          NotUniquedBit);
-  LeakDetector::addGarbageObject(N);
-  return N;
-}
-
-void MDNode::deleteTemporary(MDNode *N) {
-  assert(N->use_empty() && "Temporary MDNode has uses!");
-  assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
-         "Deleting a non-temporary uniqued node!");
-  assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
-         "Deleting a non-temporary non-uniqued node!");
-  assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
-         "Temporary MDNode does not have NotUniquedBit set!");
-  assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
-         "Temporary MDNode has DestroyFlag set!");
-  LeakDetector::removeGarbageObject(N);
-  N->destroy();
-}
-
-/// getOperand - Return specified operand.
-Value *MDNode::getOperand(unsigned i) const {
-  return *getOperandPtr(const_cast<MDNode*>(this), i);
-}
-
-void MDNode::Profile(FoldingSetNodeID &ID) const {
-  // Add all the operand pointers. Note that we don't have to add the
-  // isFunctionLocal bit because that's implied by the operands.
-  // Note that if the operands are later nulled out, the node will be
-  // removed from the uniquing map.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    ID.AddPointer(getOperand(i));
-}
-
-void MDNode::setIsNotUniqued() {
-  setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
-  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
-  pImpl->NonUniquedMDNodes.insert(this);
-}
-
-// Replace value from this node's operand list.
-void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
-  Value *From = *Op;
-
-  // If is possible that someone did GV->RAUW(inst), replacing a global variable
-  // with an instruction or some other function-local object.  If this is a
-  // non-function-local MDNode, it can't point to a function-local object.
-  // Handle this case by implicitly dropping the MDNode reference to null.
-  // Likewise if the MDNode is function-local but for a different function.
-  if (To && isFunctionLocalValue(To)) {
-    if (!isFunctionLocal())
-      To = 0;
-    else {
-      const Function *F = getFunction();
-      const Function *FV = getFunctionForValue(To);
-      // Metadata can be function-local without having an associated function.
-      // So only consider functions to have changed if non-null.
-      if (F && FV && F != FV)
-        To = 0;
-    }
-  }
-  
-  if (From == To)
-    return;
-
-  // Update the operand.
-  Op->set(To);
-
-  // If this node is already not being uniqued (because one of the operands
-  // already went to null), then there is nothing else to do here.
-  if (isNotUniqued()) return;
-
-  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
-
-  // Remove "this" from the context map.  FoldingSet doesn't have to reprofile
-  // this node to remove it, so we don't care what state the operands are in.
-  pImpl->MDNodeSet.RemoveNode(this);
-
-  // If we are dropping an argument to null, we choose to not unique the MDNode
-  // anymore.  This commonly occurs during destruction, and uniquing these
-  // brings little reuse.  Also, this means we don't need to include
-  // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
-  if (To == 0) {
-    setIsNotUniqued();
-    return;
-  }
-
-  // Now that the node is out of the folding set, get ready to reinsert it.
-  // First, check to see if another node with the same operands already exists
-  // in the set.  If so, then this node is redundant.
-  FoldingSetNodeID ID;
-  Profile(ID);
-  void *InsertPoint;
-  if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
-    replaceAllUsesWith(N);
-    destroy();
-    return;
-  }
-
-  // Cache the operand hash.
-  Hash = ID.ComputeHash();
-  // InsertPoint will have been set by the FindNodeOrInsertPos call.
-  pImpl->MDNodeSet.InsertNode(this, InsertPoint);
-
-  // If this MDValue was previously function-local but no longer is, clear
-  // its function-local flag.
-  if (isFunctionLocal() && !isFunctionLocalValue(To)) {
-    bool isStillFunctionLocal = false;
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-      Value *V = getOperand(i);
-      if (!V) continue;
-      if (isFunctionLocalValue(V)) {
-        isStillFunctionLocal = true;
-        break;
-      }
-    }
-    if (!isStillFunctionLocal)
-      setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
-  }
-}
-
-MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
-  if (!A || !B)
-    return NULL;
-
-  if (A == B)
-    return A;
-
-  SmallVector<MDNode *, 4> PathA;
-  MDNode *T = A;
-  while (T) {
-    PathA.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
-  }
-
-  SmallVector<MDNode *, 4> PathB;
-  T = B;
-  while (T) {
-    PathB.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
-  }
-
-  int IA = PathA.size() - 1;
-  int IB = PathB.size() - 1;
-
-  MDNode *Ret = 0;
-  while (IA >= 0 && IB >=0) {
-    if (PathA[IA] == PathB[IB])
-      Ret = PathA[IA];
-    else
-      break;
-    --IA;
-    --IB;
-  }
-  return Ret;
-}
-
-MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
-  if (!A || !B)
-    return NULL;
-
-  APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF();
-  APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF();
-  if (AVal.compare(BVal) == APFloat::cmpLessThan)
-    return A;
-  return B;
-}
-
-static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
-  return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
-}
-
-static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) {
-  return !A.intersectWith(B).isEmptySet() || isContiguous(A, B);
-}
-
-static bool tryMergeRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
-                          ConstantInt *High) {
-  ConstantRange NewRange(Low->getValue(), High->getValue());
-  unsigned Size = EndPoints.size();
-  APInt LB = cast<ConstantInt>(EndPoints[Size - 2])->getValue();
-  APInt LE = cast<ConstantInt>(EndPoints[Size - 1])->getValue();
-  ConstantRange LastRange(LB, LE);
-  if (canBeMerged(NewRange, LastRange)) {
-    ConstantRange Union = LastRange.unionWith(NewRange);
-    Type *Ty = High->getType();
-    EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower());
-    EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper());
-    return true;
-  }
-  return false;
-}
-
-static void addRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low,
-                     ConstantInt *High) {
-  if (!EndPoints.empty())
-    if (tryMergeRange(EndPoints, Low, High))
-      return;
-
-  EndPoints.push_back(Low);
-  EndPoints.push_back(High);
-}
-
-MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
-  // Given two ranges, we want to compute the union of the ranges. This
-  // is slightly complitade by having to combine the intervals and merge
-  // the ones that overlap.
-
-  if (!A || !B)
-    return NULL;
-
-  if (A == B)
-    return A;
-
-  // First, walk both lists in older of the lower boundary of each interval.
-  // At each step, try to merge the new interval to the last one we adedd.
-  SmallVector<Value*, 4> EndPoints;
-  int AI = 0;
-  int BI = 0;
-  int AN = A->getNumOperands() / 2;
-  int BN = B->getNumOperands() / 2;
-  while (AI < AN && BI < BN) {
-    ConstantInt *ALow = cast<ConstantInt>(A->getOperand(2 * AI));
-    ConstantInt *BLow = cast<ConstantInt>(B->getOperand(2 * BI));
-
-    if (ALow->getValue().slt(BLow->getValue())) {
-      addRange(EndPoints, ALow, cast<ConstantInt>(A->getOperand(2 * AI + 1)));
-      ++AI;
-    } else {
-      addRange(EndPoints, BLow, cast<ConstantInt>(B->getOperand(2 * BI + 1)));
-      ++BI;
-    }
-  }
-  while (AI < AN) {
-    addRange(EndPoints, cast<ConstantInt>(A->getOperand(2 * AI)),
-             cast<ConstantInt>(A->getOperand(2 * AI + 1)));
-    ++AI;
-  }
-  while (BI < BN) {
-    addRange(EndPoints, cast<ConstantInt>(B->getOperand(2 * BI)),
-             cast<ConstantInt>(B->getOperand(2 * BI + 1)));
-    ++BI;
-  }
-
-  // If we have more than 2 ranges (4 endpoints) we have to try to merge
-  // the last and first ones.
-  unsigned Size = EndPoints.size();
-  if (Size > 4) {
-    ConstantInt *FB = cast<ConstantInt>(EndPoints[0]);
-    ConstantInt *FE = cast<ConstantInt>(EndPoints[1]);
-    if (tryMergeRange(EndPoints, FB, FE)) {
-      for (unsigned i = 0; i < Size - 2; ++i) {
-        EndPoints[i] = EndPoints[i + 2];
-      }
-      EndPoints.resize(Size - 2);
-    }
-  }
-
-  // If in the end we have a single range, it is possible that it is now the
-  // full range. Just drop the metadata in that case.
-  if (EndPoints.size() == 2) {
-    ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(),
-                        cast<ConstantInt>(EndPoints[1])->getValue());
-    if (Range.isFullSet())
-      return NULL;
-  }
-
-  return MDNode::get(A->getContext(), EndPoints);
-}
-
-//===----------------------------------------------------------------------===//
-// NamedMDNode implementation.
-//
-
-static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
-  return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
-}
-
-NamedMDNode::NamedMDNode(const Twine &N)
-  : Name(N.str()), Parent(0),
-    Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
-}
-
-NamedMDNode::~NamedMDNode() {
-  dropAllReferences();
-  delete &getNMDOps(Operands);
-}
-
-/// getNumOperands - Return number of NamedMDNode operands.
-unsigned NamedMDNode::getNumOperands() const {
-  return (unsigned)getNMDOps(Operands).size();
-}
-
-/// getOperand - Return specified operand.
-MDNode *NamedMDNode::getOperand(unsigned i) const {
-  assert(i < getNumOperands() && "Invalid Operand number!");
-  return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
-}
-
-/// addOperand - Add metadata Operand.
-void NamedMDNode::addOperand(MDNode *M) {
-  assert(!M->isFunctionLocal() &&
-         "NamedMDNode operands must not be function-local!");
-  getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
-}
-
-/// eraseFromParent - Drop all references and remove the node from parent
-/// module.
-void NamedMDNode::eraseFromParent() {
-  getParent()->eraseNamedMetadata(this);
-}
-
-/// dropAllReferences - Remove all uses and clear node vector.
-void NamedMDNode::dropAllReferences() {
-  getNMDOps(Operands).clear();
-}
-
-/// getName - Return a constant reference to this named metadata's name.
-StringRef NamedMDNode::getName() const {
-  return StringRef(Name);
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction Metadata method implementations.
-//
-
-void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
-  if (Node == 0 && !hasMetadata()) return;
-  setMetadata(getContext().getMDKindID(Kind), Node);
-}
-
-MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
-  return getMetadataImpl(getContext().getMDKindID(Kind));
-}
-
-/// setMetadata - Set the metadata of of the specified kind to the specified
-/// node.  This updates/replaces metadata if already present, or removes it if
-/// Node is null.
-void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
-  if (Node == 0 && !hasMetadata()) return;
-
-  // Handle 'dbg' as a special case since it is not stored in the hash table.
-  if (KindID == LLVMContext::MD_dbg) {
-    DbgLoc = DebugLoc::getFromDILocation(Node);
-    return;
-  }
-  
-  // Handle the case when we're adding/updating metadata on an instruction.
-  if (Node) {
-    LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
-    assert(!Info.empty() == hasMetadataHashEntry() &&
-           "HasMetadata bit is wonked");
-    if (Info.empty()) {
-      setHasMetadataHashEntry(true);
-    } else {
-      // Handle replacement of an existing value.
-      for (unsigned i = 0, e = Info.size(); i != e; ++i)
-        if (Info[i].first == KindID) {
-          Info[i].second = Node;
-          return;
-        }
-    }
-
-    // No replacement, just add it to the list.
-    Info.push_back(std::make_pair(KindID, Node));
-    return;
-  }
-
-  // Otherwise, we're removing metadata from an instruction.
-  assert((hasMetadataHashEntry() ==
-          getContext().pImpl->MetadataStore.count(this)) &&
-         "HasMetadata bit out of date!");
-  if (!hasMetadataHashEntry())
-    return;  // Nothing to remove!
-  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
-
-  // Common case is removing the only entry.
-  if (Info.size() == 1 && Info[0].first == KindID) {
-    getContext().pImpl->MetadataStore.erase(this);
-    setHasMetadataHashEntry(false);
-    return;
-  }
-
-  // Handle removal of an existing value.
-  for (unsigned i = 0, e = Info.size(); i != e; ++i)
-    if (Info[i].first == KindID) {
-      Info[i] = Info.back();
-      Info.pop_back();
-      assert(!Info.empty() && "Removing last entry should be handled above");
-      return;
-    }
-  // Otherwise, removing an entry that doesn't exist on the instruction.
-}
-
-MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
-  // Handle 'dbg' as a special case since it is not stored in the hash table.
-  if (KindID == LLVMContext::MD_dbg)
-    return DbgLoc.getAsMDNode(getContext());
-  
-  if (!hasMetadataHashEntry()) return 0;
-  
-  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
-  assert(!Info.empty() && "bit out of sync with hash table");
-
-  for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
-       I != E; ++I)
-    if (I->first == KindID)
-      return I->second;
-  return 0;
-}
-
-void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
-                                       MDNode*> > &Result) const {
-  Result.clear();
-  
-  // Handle 'dbg' as a special case since it is not stored in the hash table.
-  if (!DbgLoc.isUnknown()) {
-    Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
-                                    DbgLoc.getAsMDNode(getContext())));
-    if (!hasMetadataHashEntry()) return;
-  }
-  
-  assert(hasMetadataHashEntry() &&
-         getContext().pImpl->MetadataStore.count(this) &&
-         "Shouldn't have called this");
-  const LLVMContextImpl::MDMapTy &Info =
-    getContext().pImpl->MetadataStore.find(this)->second;
-  assert(!Info.empty() && "Shouldn't have called this");
-
-  Result.append(Info.begin(), Info.end());
-
-  // Sort the resulting array so it is stable.
-  if (Result.size() > 1)
-    array_pod_sort(Result.begin(), Result.end());
-}
-
-void Instruction::
-getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
-                                    MDNode*> > &Result) const {
-  Result.clear();
-  assert(hasMetadataHashEntry() &&
-         getContext().pImpl->MetadataStore.count(this) &&
-         "Shouldn't have called this");
-  const LLVMContextImpl::MDMapTy &Info =
-    getContext().pImpl->MetadataStore.find(this)->second;
-  assert(!Info.empty() && "Shouldn't have called this");
-  Result.append(Info.begin(), Info.end());
-
-  // Sort the resulting array so it is stable.
-  if (Result.size() > 1)
-    array_pod_sort(Result.begin(), Result.end());
-}
-
-/// clearMetadataHashEntries - Clear all hashtable-based metadata from
-/// this instruction.
-void Instruction::clearMetadataHashEntries() {
-  assert(hasMetadataHashEntry() && "Caller should check");
-  getContext().pImpl->MetadataStore.erase(this);
-  setHasMetadataHashEntry(false);
-}
-
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
deleted file mode 100644
index 5b5176b3c70b..000000000000
--- a/lib/VMCore/Module.cpp
+++ /dev/null
@@ -1,469 +0,0 @@
-//===-- Module.cpp - Implement the Module class ---------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Module class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Module.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GVMaterializer.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/LeakDetector.h"
-#include "SymbolTableListTraitsImpl.h"
-#include <algorithm>
-#include <cstdarg>
-#include <cstdlib>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Methods to implement the globals and functions lists.
-//
-
-// Explicit instantiations of SymbolTableListTraits since some of the methods
-// are not in the public header file.
-template class llvm::SymbolTableListTraits<Function, Module>;
-template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
-template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
-
-//===----------------------------------------------------------------------===//
-// Primitive Module methods.
-//
-
-Module::Module(StringRef MID, LLVMContext& C)
-  : Context(C), Materializer(NULL), ModuleID(MID) {
-  ValSymTab = new ValueSymbolTable();
-  NamedMDSymTab = new StringMap<NamedMDNode *>();
-  Context.addModule(this);
-}
-
-Module::~Module() {
-  Context.removeModule(this);
-  dropAllReferences();
-  GlobalList.clear();
-  FunctionList.clear();
-  AliasList.clear();
-  LibraryList.clear();
-  NamedMDList.clear();
-  delete ValSymTab;
-  delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
-}
-
-/// Target endian information.
-Module::Endianness Module::getEndianness() const {
-  StringRef temp = DataLayout;
-  Module::Endianness ret = AnyEndianness;
-
-  while (!temp.empty()) {
-    std::pair<StringRef, StringRef> P = getToken(temp, "-");
-
-    StringRef token = P.first;
-    temp = P.second;
-
-    if (token[0] == 'e') {
-      ret = LittleEndian;
-    } else if (token[0] == 'E') {
-      ret = BigEndian;
-    }
-  }
-
-  return ret;
-}
-
-/// Target Pointer Size information.
-Module::PointerSize Module::getPointerSize() const {
-  StringRef temp = DataLayout;
-  Module::PointerSize ret = AnyPointerSize;
-
-  while (!temp.empty()) {
-    std::pair<StringRef, StringRef> TmpP = getToken(temp, "-");
-    temp = TmpP.second;
-    TmpP = getToken(TmpP.first, ":");
-    StringRef token = TmpP.second, signalToken = TmpP.first;
-
-    if (signalToken[0] == 'p') {
-      int size = 0;
-      getToken(token, ":").first.getAsInteger(10, size);
-      if (size == 32)
-        ret = Pointer32;
-      else if (size == 64)
-        ret = Pointer64;
-    }
-  }
-
-  return ret;
-}
-
-/// getNamedValue - Return the first global value in the module with
-/// the specified name, of arbitrary type.  This method returns null
-/// if a global with the specified name is not found.
-GlobalValue *Module::getNamedValue(StringRef Name) const {
-  return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
-}
-
-/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
-/// This ID is uniqued across modules in the current LLVMContext.
-unsigned Module::getMDKindID(StringRef Name) const {
-  return Context.getMDKindID(Name);
-}
-
-/// getMDKindNames - Populate client supplied SmallVector with the name for
-/// custom metadata IDs registered in this LLVMContext.   ID #0 is not used,
-/// so it is filled in as an empty string.
-void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
-  return Context.getMDKindNames(Result);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Methods for easy access to the functions in the module.
-//
-
-// getOrInsertFunction - Look up the specified function in the module symbol
-// table.  If it does not exist, add a prototype for the function and return
-// it.  This is nice because it allows most passes to get away with not handling
-// the symbol table directly for this common task.
-//
-Constant *Module::getOrInsertFunction(StringRef Name,
-                                      FunctionType *Ty,
-                                      AttrListPtr AttributeList) {
-  // See if we have a definition for the specified function already.
-  GlobalValue *F = getNamedValue(Name);
-  if (F == 0) {
-    // Nope, add it
-    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
-    if (!New->isIntrinsic())       // Intrinsics get attrs set on construction
-      New->setAttributes(AttributeList);
-    FunctionList.push_back(New);
-    return New;                    // Return the new prototype.
-  }
-
-  // Okay, the function exists.  Does it have externally visible linkage?
-  if (F->hasLocalLinkage()) {
-    // Clear the function's name.
-    F->setName("");
-    // Retry, now there won't be a conflict.
-    Constant *NewF = getOrInsertFunction(Name, Ty);
-    F->setName(Name);
-    return NewF;
-  }
-
-  // If the function exists but has the wrong type, return a bitcast to the
-  // right type.
-  if (F->getType() != PointerType::getUnqual(Ty))
-    return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
-
-  // Otherwise, we just found the existing function or a prototype.
-  return F;
-}
-
-Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
-                                             FunctionType *Ty,
-                                             AttrListPtr AttributeList) {
-  // See if we have a definition for the specified function already.
-  GlobalValue *F = getNamedValue(Name);
-  if (F == 0) {
-    // Nope, add it
-    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
-    New->setAttributes(AttributeList);
-    FunctionList.push_back(New);
-    return New; // Return the new prototype.
-  }
-
-  // Otherwise, we just found the existing function or a prototype.
-  return F;
-}
-
-Constant *Module::getOrInsertFunction(StringRef Name,
-                                      FunctionType *Ty) {
-  return getOrInsertFunction(Name, Ty, AttrListPtr());
-}
-
-// getOrInsertFunction - Look up the specified function in the module symbol
-// table.  If it does not exist, add a prototype for the function and return it.
-// This version of the method takes a null terminated list of function
-// arguments, which makes it easier for clients to use.
-//
-Constant *Module::getOrInsertFunction(StringRef Name,
-                                      AttrListPtr AttributeList,
-                                      Type *RetTy, ...) {
-  va_list Args;
-  va_start(Args, RetTy);
-
-  // Build the list of argument types...
-  std::vector<Type*> ArgTys;
-  while (Type *ArgTy = va_arg(Args, Type*))
-    ArgTys.push_back(ArgTy);
-
-  va_end(Args);
-
-  // Build the function type and chain to the other getOrInsertFunction...
-  return getOrInsertFunction(Name,
-                             FunctionType::get(RetTy, ArgTys, false),
-                             AttributeList);
-}
-
-Constant *Module::getOrInsertFunction(StringRef Name,
-                                      Type *RetTy, ...) {
-  va_list Args;
-  va_start(Args, RetTy);
-
-  // Build the list of argument types...
-  std::vector<Type*> ArgTys;
-  while (Type *ArgTy = va_arg(Args, Type*))
-    ArgTys.push_back(ArgTy);
-
-  va_end(Args);
-
-  // Build the function type and chain to the other getOrInsertFunction...
-  return getOrInsertFunction(Name,
-                             FunctionType::get(RetTy, ArgTys, false),
-                             AttrListPtr());
-}
-
-// getFunction - Look up the specified function in the module symbol table.
-// If it does not exist, return null.
-//
-Function *Module::getFunction(StringRef Name) const {
-  return dyn_cast_or_null<Function>(getNamedValue(Name));
-}
-
-//===----------------------------------------------------------------------===//
-// Methods for easy access to the global variables in the module.
-//
-
-/// getGlobalVariable - Look up the specified global variable in the module
-/// symbol table.  If it does not exist, return null.  The type argument
-/// should be the underlying type of the global, i.e., it should not have
-/// the top-level PointerType, which represents the address of the global.
-/// If AllowLocal is set to true, this function will return types that
-/// have an local. By default, these types are not returned.
-///
-GlobalVariable *Module::getGlobalVariable(StringRef Name,
-                                          bool AllowLocal) const {
-  if (GlobalVariable *Result =
-      dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
-    if (AllowLocal || !Result->hasLocalLinkage())
-      return Result;
-  return 0;
-}
-
-/// getOrInsertGlobal - Look up the specified global in the module symbol table.
-///   1. If it does not exist, add a declaration of the global and return it.
-///   2. Else, the global exists but has the wrong type: return the function
-///      with a constantexpr cast to the right type.
-///   3. Finally, if the existing global is the correct delclaration, return the
-///      existing global.
-Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
-  // See if we have a definition for the specified global already.
-  GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
-  if (GV == 0) {
-    // Nope, add it
-    GlobalVariable *New =
-      new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
-                         0, Name);
-     return New;                    // Return the new declaration.
-  }
-
-  // If the variable exists but has the wrong type, return a bitcast to the
-  // right type.
-  if (GV->getType() != PointerType::getUnqual(Ty))
-    return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
-
-  // Otherwise, we just found the existing function or a prototype.
-  return GV;
-}
-
-//===----------------------------------------------------------------------===//
-// Methods for easy access to the global variables in the module.
-//
-
-// getNamedAlias - Look up the specified global in the module symbol table.
-// If it does not exist, return null.
-//
-GlobalAlias *Module::getNamedAlias(StringRef Name) const {
-  return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
-}
-
-/// getNamedMetadata - Return the first NamedMDNode in the module with the
-/// specified name. This method returns null if a NamedMDNode with the
-/// specified name is not found.
-NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
-  SmallString<256> NameData;
-  StringRef NameRef = Name.toStringRef(NameData);
-  return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
-}
-
-/// getOrInsertNamedMetadata - Return the first named MDNode in the module
-/// with the specified name. This method returns a new NamedMDNode if a
-/// NamedMDNode with the specified name is not found.
-NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
-  NamedMDNode *&NMD =
-    (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
-  if (!NMD) {
-    NMD = new NamedMDNode(Name);
-    NMD->setParent(this);
-    NamedMDList.push_back(NMD);
-  }
-  return NMD;
-}
-
-/// eraseNamedMetadata - Remove the given NamedMDNode from this module and
-/// delete it.
-void Module::eraseNamedMetadata(NamedMDNode *NMD) {
-  static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
-  NamedMDList.erase(NMD);
-}
-
-/// getModuleFlagsMetadata - Returns the module flags in the provided vector.
-void Module::
-getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
-  const NamedMDNode *ModFlags = getModuleFlagsMetadata();
-  if (!ModFlags) return;
-
-  for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) {
-    MDNode *Flag = ModFlags->getOperand(i);
-    ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
-    MDString *Key = cast<MDString>(Flag->getOperand(1));
-    Value *Val = Flag->getOperand(2);
-    Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
-                                    Key, Val));
-  }
-}
-
-/// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
-/// represents module-level flags. This method returns null if there are no
-/// module-level flags.
-NamedMDNode *Module::getModuleFlagsMetadata() const {
-  return getNamedMetadata("llvm.module.flags");
-}
-
-/// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module that
-/// represents module-level flags. If module-level flags aren't found, it
-/// creates the named metadata that contains them.
-NamedMDNode *Module::getOrInsertModuleFlagsMetadata() {
-  return getOrInsertNamedMetadata("llvm.module.flags");
-}
-
-/// addModuleFlag - Add a module-level flag to the module-level flags
-/// metadata. It will create the module-level flags named metadata if it doesn't
-/// already exist.
-void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
-                           Value *Val) {
-  Type *Int32Ty = Type::getInt32Ty(Context);
-  Value *Ops[3] = {
-    ConstantInt::get(Int32Ty, Behavior), MDString::get(Context, Key), Val
-  };
-  getOrInsertModuleFlagsMetadata()->addOperand(MDNode::get(Context, Ops));
-}
-void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
-                           uint32_t Val) {
-  Type *Int32Ty = Type::getInt32Ty(Context);
-  addModuleFlag(Behavior, Key, ConstantInt::get(Int32Ty, Val));
-}
-void Module::addModuleFlag(MDNode *Node) {
-  assert(Node->getNumOperands() == 3 &&
-         "Invalid number of operands for module flag!");
-  assert(isa<ConstantInt>(Node->getOperand(0)) &&
-         isa<MDString>(Node->getOperand(1)) &&
-         "Invalid operand types for module flag!");
-  getOrInsertModuleFlagsMetadata()->addOperand(Node);
-}
-
-//===----------------------------------------------------------------------===//
-// Methods to control the materialization of GlobalValues in the Module.
-//
-void Module::setMaterializer(GVMaterializer *GVM) {
-  assert(!Materializer &&
-         "Module already has a GVMaterializer.  Call MaterializeAllPermanently"
-         " to clear it out before setting another one.");
-  Materializer.reset(GVM);
-}
-
-bool Module::isMaterializable(const GlobalValue *GV) const {
-  if (Materializer)
-    return Materializer->isMaterializable(GV);
-  return false;
-}
-
-bool Module::isDematerializable(const GlobalValue *GV) const {
-  if (Materializer)
-    return Materializer->isDematerializable(GV);
-  return false;
-}
-
-bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
-  if (Materializer)
-    return Materializer->Materialize(GV, ErrInfo);
-  return false;
-}
-
-void Module::Dematerialize(GlobalValue *GV) {
-  if (Materializer)
-    return Materializer->Dematerialize(GV);
-}
-
-bool Module::MaterializeAll(std::string *ErrInfo) {
-  if (!Materializer)
-    return false;
-  return Materializer->MaterializeModule(this, ErrInfo);
-}
-
-bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
-  if (MaterializeAll(ErrInfo))
-    return true;
-  Materializer.reset();
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// Other module related stuff.
-//
-
-
-// dropAllReferences() - This function causes all the subelements to "let go"
-// of all references that they are maintaining.  This allows one to 'delete' a
-// whole module at a time, even though there may be circular references... first
-// all references are dropped, and all use counts go to zero.  Then everything
-// is deleted for real.  Note that no operations are valid on an object that
-// has "dropped all references", except operator delete.
-//
-void Module::dropAllReferences() {
-  for(Module::iterator I = begin(), E = end(); I != E; ++I)
-    I->dropAllReferences();
-
-  for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
-    I->dropAllReferences();
-
-  for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
-    I->dropAllReferences();
-}
-
-void Module::addLibrary(StringRef Lib) {
-  for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
-    if (*I == Lib)
-      return;
-  LibraryList.push_back(Lib);
-}
-
-void Module::removeLibrary(StringRef Lib) {
-  LibraryListType::iterator I = LibraryList.begin();
-  LibraryListType::iterator E = LibraryList.end();
-  for (;I != E; ++I)
-    if (*I == Lib) {
-      LibraryList.erase(I);
-      return;
-    }
-}
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
deleted file mode 100644
index 994a7ffceea5..000000000000
--- a/lib/VMCore/Pass.cpp
+++ /dev/null
@@ -1,297 +0,0 @@
-//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LLVM Pass infrastructure.  It is primarily
-// responsible with ensuring that passes are executed and batched together
-// optimally.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/PassNameParser.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Pass Implementation
-//
-
-// Force out-of-line virtual method.
-Pass::~Pass() {
-  delete Resolver;
-}
-
-// Force out-of-line virtual method.
-ModulePass::~ModulePass() { }
-
-Pass *ModulePass::createPrinterPass(raw_ostream &O,
-                                    const std::string &Banner) const {
-  return createPrintModulePass(&O, false, Banner);
-}
-
-PassManagerType ModulePass::getPotentialPassManagerType() const {
-  return PMT_ModulePassManager;
-}
-
-bool Pass::mustPreserveAnalysisID(char &AID) const {
-  return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
-}
-
-// dumpPassStructure - Implement the -debug-pass=Structure option
-void Pass::dumpPassStructure(unsigned Offset) {
-  dbgs().indent(Offset*2) << getPassName() << "\n";
-}
-
-/// getPassName - Return a nice clean name for a pass.  This usually
-/// implemented in terms of the name that is registered by one of the
-/// Registration templates, but can be overloaded directly.
-///
-const char *Pass::getPassName() const {
-  AnalysisID AID =  getPassID();
-  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
-  if (PI)
-    return PI->getPassName();
-  return "Unnamed pass: implement Pass::getPassName()";
-}
-
-void Pass::preparePassManager(PMStack &) {
-  // By default, don't do anything.
-}
-
-PassManagerType Pass::getPotentialPassManagerType() const {
-  // Default implementation.
-  return PMT_Unknown;
-}
-
-void Pass::getAnalysisUsage(AnalysisUsage &) const {
-  // By default, no analysis results are used, all are invalidated.
-}
-
-void Pass::releaseMemory() {
-  // By default, don't do anything.
-}
-
-void Pass::verifyAnalysis() const {
-  // By default, don't do anything.
-}
-
-void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
-  return this;
-}
-
-ImmutablePass *Pass::getAsImmutablePass() {
-  return 0;
-}
-
-PMDataManager *Pass::getAsPMDataManager() {
-  return 0;
-}
-
-void Pass::setResolver(AnalysisResolver *AR) {
-  assert(!Resolver && "Resolver is already set");
-  Resolver = AR;
-}
-
-// print - Print out the internal state of the pass.  This is called by Analyze
-// to print out the contents of an analysis.  Otherwise it is not necessary to
-// implement this method.
-//
-void Pass::print(raw_ostream &O,const Module*) const {
-  O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
-}
-
-// dump - call print(cerr);
-void Pass::dump() const {
-  print(dbgs(), 0);
-}
-
-//===----------------------------------------------------------------------===//
-// ImmutablePass Implementation
-//
-// Force out-of-line virtual method.
-ImmutablePass::~ImmutablePass() { }
-
-void ImmutablePass::initializePass() {
-  // By default, don't do anything.
-}
-
-//===----------------------------------------------------------------------===//
-// FunctionPass Implementation
-//
-
-Pass *FunctionPass::createPrinterPass(raw_ostream &O,
-                                      const std::string &Banner) const {
-  return createPrintFunctionPass(Banner, &O);
-}
-
-bool FunctionPass::doInitialization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
-bool FunctionPass::doFinalization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
-PassManagerType FunctionPass::getPotentialPassManagerType() const {
-  return PMT_FunctionPassManager;
-}
-
-//===----------------------------------------------------------------------===//
-// BasicBlockPass Implementation
-//
-
-Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
-                                        const std::string &Banner) const {
-
-  llvm_unreachable("BasicBlockPass printing unsupported.");
-}
-
-bool BasicBlockPass::doInitialization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
-bool BasicBlockPass::doInitialization(Function &) {
-  // By default, don't do anything.
-  return false;
-}
-
-bool BasicBlockPass::doFinalization(Function &) {
-  // By default, don't do anything.
-  return false;
-}
-
-bool BasicBlockPass::doFinalization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
-PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
-  return PMT_BasicBlockPassManager;
-}
-
-const PassInfo *Pass::lookupPassInfo(const void *TI) {
-  return PassRegistry::getPassRegistry()->getPassInfo(TI);
-}
-
-const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
-  return PassRegistry::getPassRegistry()->getPassInfo(Arg);
-}
-
-Pass *Pass::createPass(AnalysisID ID) {
-  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
-  if (!PI)
-    return NULL;
-  return PI->createPass();
-}
-
-Pass *PassInfo::createPass() const {
-  assert((!isAnalysisGroup() || NormalCtor) &&
-         "No default implementation found for analysis group!");
-  assert(NormalCtor &&
-         "Cannot call createPass on PassInfo without default ctor!");
-  return NormalCtor();
-}
-
-//===----------------------------------------------------------------------===//
-//                  Analysis Group Implementation Code
-//===----------------------------------------------------------------------===//
-
-// RegisterAGBase implementation
-//
-RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
-                               const void *PassID, bool isDefault)
-    : PassInfo(Name, InterfaceID) {
-  PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
-                                                         *this, isDefault);
-}
-
-//===----------------------------------------------------------------------===//
-// PassRegistrationListener implementation
-//
-
-// PassRegistrationListener ctor - Add the current object to the list of
-// PassRegistrationListeners...
-PassRegistrationListener::PassRegistrationListener() {
-  PassRegistry::getPassRegistry()->addRegistrationListener(this);
-}
-
-// dtor - Remove object from list of listeners...
-PassRegistrationListener::~PassRegistrationListener() {
-  PassRegistry::getPassRegistry()->removeRegistrationListener(this);
-}
-
-// enumeratePasses - Iterate over the registered passes, calling the
-// passEnumerate callback on each PassInfo object.
-//
-void PassRegistrationListener::enumeratePasses() {
-  PassRegistry::getPassRegistry()->enumerateWith(this);
-}
-
-PassNameParser::~PassNameParser() {}
-
-//===----------------------------------------------------------------------===//
-//   AnalysisUsage Class Implementation
-//
-
-namespace {
-  struct GetCFGOnlyPasses : public PassRegistrationListener {
-    typedef AnalysisUsage::VectorType VectorType;
-    VectorType &CFGOnlyList;
-    GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
-
-    void passEnumerate(const PassInfo *P) {
-      if (P->isCFGOnlyPass())
-        CFGOnlyList.push_back(P->getTypeInfo());
-    }
-  };
-}
-
-// setPreservesCFG - This function should be called to by the pass, iff they do
-// not:
-//
-//  1. Add or remove basic blocks from the function
-//  2. Modify terminator instructions in any way.
-//
-// This function annotates the AnalysisUsage info object to say that analyses
-// that only depend on the CFG are preserved by this pass.
-//
-void AnalysisUsage::setPreservesCFG() {
-  // Since this transformation doesn't modify the CFG, it preserves all analyses
-  // that only depend on the CFG (like dominators, loop info, etc...)
-  GetCFGOnlyPasses(Preserved).enumeratePasses();
-}
-
-AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
-  const PassInfo *PI = Pass::lookupPassInfo(Arg);
-  // If the pass exists, preserve it. Otherwise silently do nothing.
-  if (PI) Preserved.push_back(PI->getTypeInfo());
-  return *this;
-}
-
-AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
-  Required.push_back(ID);
-  return *this;
-}
-
-AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
-  Required.push_back(&ID);
-  return *this;
-}
-
-AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
-  Required.push_back(&ID);
-  RequiredTransitive.push_back(&ID);
-  return *this;
-}
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
deleted file mode 100644
index 53f11499e4b9..000000000000
--- a/lib/VMCore/PassManager.cpp
+++ /dev/null
@@ -1,1861 +0,0 @@
-//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LLVM Pass Manager infrastructure.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "llvm/PassManagers.h"
-#include "llvm/PassManager.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Module.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/PassNameParser.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
-#include <algorithm>
-#include <map>
-using namespace llvm;
-
-// See PassManagers.h for Pass Manager infrastructure overview.
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// Pass debugging information.  Often it is useful to find out what pass is
-// running when a crash occurs in a utility.  When this library is compiled with
-// debugging on, a command line option (--debug-pass) is enabled that causes the
-// pass name to be printed before it executes.
-//
-
-// Different debug levels that can be enabled...
-enum PassDebugLevel {
-  None, Arguments, Structure, Executions, Details
-};
-
-static cl::opt<enum PassDebugLevel>
-PassDebugging("debug-pass", cl::Hidden,
-                  cl::desc("Print PassManager debugging information"),
-                  cl::values(
-  clEnumVal(None      , "disable debug output"),
-  clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
-  clEnumVal(Structure , "print pass structure before run()"),
-  clEnumVal(Executions, "print pass name before it is executed"),
-  clEnumVal(Details   , "print pass details when it is executed"),
-                             clEnumValEnd));
-
-typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
-PassOptionList;
-
-// Print IR out before/after specified passes.
-static PassOptionList
-PrintBefore("print-before",
-            llvm::cl::desc("Print IR before specified passes"),
-            cl::Hidden);
-
-static PassOptionList
-PrintAfter("print-after",
-           llvm::cl::desc("Print IR after specified passes"),
-           cl::Hidden);
-
-static cl::opt<bool>
-PrintBeforeAll("print-before-all",
-               llvm::cl::desc("Print IR before each pass"),
-               cl::init(false));
-static cl::opt<bool>
-PrintAfterAll("print-after-all",
-              llvm::cl::desc("Print IR after each pass"),
-              cl::init(false));
-
-/// This is a helper to determine whether to print IR before or
-/// after a pass.
-
-static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
-                                         PassOptionList &PassesToPrint) {
-  for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
-    const llvm::PassInfo *PassInf = PassesToPrint[i];
-    if (PassInf)
-      if (PassInf->getPassArgument() == PI->getPassArgument()) {
-        return true;
-      }
-  }
-  return false;
-}
-
-/// This is a utility to check whether a pass should have IR dumped
-/// before it.
-static bool ShouldPrintBeforePass(const PassInfo *PI) {
-  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
-}
-
-/// This is a utility to check whether a pass should have IR dumped
-/// after it.
-static bool ShouldPrintAfterPass(const PassInfo *PI) {
-  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
-}
-
-} // End of llvm namespace
-
-/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
-/// or higher is specified.
-bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
-  return PassDebugging >= Executions;
-}
-
-
-
-
-void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
-  if (V == 0 && M == 0)
-    OS << "Releasing pass '";
-  else
-    OS << "Running pass '";
-
-  OS << P->getPassName() << "'";
-
-  if (M) {
-    OS << " on module '" << M->getModuleIdentifier() << "'.\n";
-    return;
-  }
-  if (V == 0) {
-    OS << '\n';
-    return;
-  }
-
-  OS << " on ";
-  if (isa<Function>(V))
-    OS << "function";
-  else if (isa<BasicBlock>(V))
-    OS << "basic block";
-  else
-    OS << "value";
-
-  OS << " '";
-  WriteAsOperand(OS, V, /*PrintTy=*/false, M);
-  OS << "'\n";
-}
-
-
-namespace {
-
-//===----------------------------------------------------------------------===//
-// BBPassManager
-//
-/// BBPassManager manages BasicBlockPass. It batches all the
-/// pass together and sequence them to process one basic block before
-/// processing next basic block.
-class BBPassManager : public PMDataManager, public FunctionPass {
-
-public:
-  static char ID;
-  explicit BBPassManager()
-    : PMDataManager(), FunctionPass(ID) {}
-
-  /// Execute all of the passes scheduled for execution.  Keep track of
-  /// whether any of the passes modifies the function, and if so, return true.
-  bool runOnFunction(Function &F);
-
-  /// Pass Manager itself does not invalidate any analysis info.
-  void getAnalysisUsage(AnalysisUsage &Info) const {
-    Info.setPreservesAll();
-  }
-
-  bool doInitialization(Module &M);
-  bool doInitialization(Function &F);
-  bool doFinalization(Module &M);
-  bool doFinalization(Function &F);
-
-  virtual PMDataManager *getAsPMDataManager() { return this; }
-  virtual Pass *getAsPass() { return this; }
-
-  virtual const char *getPassName() const {
-    return "BasicBlock Pass Manager";
-  }
-
-  // Print passes managed by this manager
-  void dumpPassStructure(unsigned Offset) {
-    llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n";
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-      BasicBlockPass *BP = getContainedPass(Index);
-      BP->dumpPassStructure(Offset + 1);
-      dumpLastUses(BP, Offset+1);
-    }
-  }
-
-  BasicBlockPass *getContainedPass(unsigned N) {
-    assert(N < PassVector.size() && "Pass number out of range!");
-    BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
-    return BP;
-  }
-
-  virtual PassManagerType getPassManagerType() const {
-    return PMT_BasicBlockPassManager;
-  }
-};
-
-char BBPassManager::ID = 0;
-}
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// FunctionPassManagerImpl
-//
-/// FunctionPassManagerImpl manages FPPassManagers
-class FunctionPassManagerImpl : public Pass,
-                                public PMDataManager,
-                                public PMTopLevelManager {
-  virtual void anchor();
-private:
-  bool wasRun;
-public:
-  static char ID;
-  explicit FunctionPassManagerImpl() :
-    Pass(PT_PassManager, ID), PMDataManager(),
-    PMTopLevelManager(new FPPassManager()), wasRun(false) {}
-
-  /// add - Add a pass to the queue of passes to run.  This passes ownership of
-  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
-  /// will be destroyed as well, so there is no need to delete the pass.  This
-  /// implies that all passes MUST be allocated with 'new'.
-  void add(Pass *P) {
-    schedulePass(P);
-  }
-
-  /// createPrinterPass - Get a function printer pass.
-  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
-    return createPrintFunctionPass(Banner, &O);
-  }
-
-  // Prepare for running an on the fly pass, freeing memory if needed
-  // from a previous run.
-  void releaseMemoryOnTheFly();
-
-  /// run - Execute all of the passes scheduled for execution.  Keep track of
-  /// whether any of the passes modifies the module, and if so, return true.
-  bool run(Function &F);
-
-  /// doInitialization - Run all of the initializers for the function passes.
-  ///
-  bool doInitialization(Module &M);
-
-  /// doFinalization - Run all of the finalizers for the function passes.
-  ///
-  bool doFinalization(Module &M);
-
-
-  virtual PMDataManager *getAsPMDataManager() { return this; }
-  virtual Pass *getAsPass() { return this; }
-  virtual PassManagerType getTopLevelPassManagerType() {
-    return PMT_FunctionPassManager;
-  }
-
-  /// Pass Manager itself does not invalidate any analysis info.
-  void getAnalysisUsage(AnalysisUsage &Info) const {
-    Info.setPreservesAll();
-  }
-
-  FPPassManager *getContainedManager(unsigned N) {
-    assert(N < PassManagers.size() && "Pass number out of range!");
-    FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
-    return FP;
-  }
-};
-
-void FunctionPassManagerImpl::anchor() {}
-
-char FunctionPassManagerImpl::ID = 0;
-
-//===----------------------------------------------------------------------===//
-// MPPassManager
-//
-/// MPPassManager manages ModulePasses and function pass managers.
-/// It batches all Module passes and function pass managers together and
-/// sequences them to process one module.
-class MPPassManager : public Pass, public PMDataManager {
-public:
-  static char ID;
-  explicit MPPassManager() :
-    Pass(PT_PassManager, ID), PMDataManager() { }
-
-  // Delete on the fly managers.
-  virtual ~MPPassManager() {
-    for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
-           I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
-         I != E; ++I) {
-      FunctionPassManagerImpl *FPP = I->second;
-      delete FPP;
-    }
-  }
-
-  /// createPrinterPass - Get a module printer pass.
-  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
-    return createPrintModulePass(&O, false, Banner);
-  }
-
-  /// run - Execute all of the passes scheduled for execution.  Keep track of
-  /// whether any of the passes modifies the module, and if so, return true.
-  bool runOnModule(Module &M);
-
-  /// Pass Manager itself does not invalidate any analysis info.
-  void getAnalysisUsage(AnalysisUsage &Info) const {
-    Info.setPreservesAll();
-  }
-
-  /// Add RequiredPass into list of lower level passes required by pass P.
-  /// RequiredPass is run on the fly by Pass Manager when P requests it
-  /// through getAnalysis interface.
-  virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
-
-  /// Return function pass corresponding to PassInfo PI, that is
-  /// required by module pass MP. Instantiate analysis pass, by using
-  /// its runOnFunction() for function F.
-  virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
-
-  virtual const char *getPassName() const {
-    return "Module Pass Manager";
-  }
-
-  virtual PMDataManager *getAsPMDataManager() { return this; }
-  virtual Pass *getAsPass() { return this; }
-
-  // Print passes managed by this manager
-  void dumpPassStructure(unsigned Offset) {
-    llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n";
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-      ModulePass *MP = getContainedPass(Index);
-      MP->dumpPassStructure(Offset + 1);
-      std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
-        OnTheFlyManagers.find(MP);
-      if (I != OnTheFlyManagers.end())
-        I->second->dumpPassStructure(Offset + 2);
-      dumpLastUses(MP, Offset+1);
-    }
-  }
-
-  ModulePass *getContainedPass(unsigned N) {
-    assert(N < PassVector.size() && "Pass number out of range!");
-    return static_cast<ModulePass *>(PassVector[N]);
-  }
-
-  virtual PassManagerType getPassManagerType() const {
-    return PMT_ModulePassManager;
-  }
-
- private:
-  /// Collection of on the fly FPPassManagers. These managers manage
-  /// function passes that are required by module passes.
-  std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
-};
-
-char MPPassManager::ID = 0;
-//===----------------------------------------------------------------------===//
-// PassManagerImpl
-//
-
-/// PassManagerImpl manages MPPassManagers
-class PassManagerImpl : public Pass,
-                        public PMDataManager,
-                        public PMTopLevelManager {
-  virtual void anchor();
-
-public:
-  static char ID;
-  explicit PassManagerImpl() :
-    Pass(PT_PassManager, ID), PMDataManager(),
-                              PMTopLevelManager(new MPPassManager()) {}
-
-  /// add - Add a pass to the queue of passes to run.  This passes ownership of
-  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
-  /// will be destroyed as well, so there is no need to delete the pass.  This
-  /// implies that all passes MUST be allocated with 'new'.
-  void add(Pass *P) {
-    schedulePass(P);
-  }
-
-  /// createPrinterPass - Get a module printer pass.
-  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
-    return createPrintModulePass(&O, false, Banner);
-  }
-
-  /// run - Execute all of the passes scheduled for execution.  Keep track of
-  /// whether any of the passes modifies the module, and if so, return true.
-  bool run(Module &M);
-
-  /// Pass Manager itself does not invalidate any analysis info.
-  void getAnalysisUsage(AnalysisUsage &Info) const {
-    Info.setPreservesAll();
-  }
-
-  virtual PMDataManager *getAsPMDataManager() { return this; }
-  virtual Pass *getAsPass() { return this; }
-  virtual PassManagerType getTopLevelPassManagerType() {
-    return PMT_ModulePassManager;
-  }
-
-  MPPassManager *getContainedManager(unsigned N) {
-    assert(N < PassManagers.size() && "Pass number out of range!");
-    MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
-    return MP;
-  }
-};
-
-void PassManagerImpl::anchor() {}
-
-char PassManagerImpl::ID = 0;
-} // End of llvm namespace
-
-namespace {
-
-//===----------------------------------------------------------------------===//
-/// TimingInfo Class - This class is used to calculate information about the
-/// amount of time each pass takes to execute.  This only happens when
-/// -time-passes is enabled on the command line.
-///
-
-static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
-
-class TimingInfo {
-  DenseMap<Pass*, Timer*> TimingData;
-  TimerGroup TG;
-public:
-  // Use 'create' member to get this.
-  TimingInfo() : TG("... Pass execution timing report ...") {}
-
-  // TimingDtor - Print out information about timing information
-  ~TimingInfo() {
-    // Delete all of the timers, which accumulate their info into the
-    // TimerGroup.
-    for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
-         E = TimingData.end(); I != E; ++I)
-      delete I->second;
-    // TimerGroup is deleted next, printing the report.
-  }
-
-  // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
-  // to a non null value (if the -time-passes option is enabled) or it leaves it
-  // null.  It may be called multiple times.
-  static void createTheTimeInfo();
-
-  /// getPassTimer - Return the timer for the specified pass if it exists.
-  Timer *getPassTimer(Pass *P) {
-    if (P->getAsPMDataManager())
-      return 0;
-
-    sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
-    Timer *&T = TimingData[P];
-    if (T == 0)
-      T = new Timer(P->getPassName(), TG);
-    return T;
-  }
-};
-
-} // End of anon namespace
-
-static TimingInfo *TheTimeInfo;
-
-//===----------------------------------------------------------------------===//
-// PMTopLevelManager implementation
-
-/// Initialize top level manager. Create first pass manager.
-PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
-  PMDM->setTopLevelManager(this);
-  addPassManager(PMDM);
-  activeStack.push(PMDM);
-}
-
-/// Set pass P as the last user of the given analysis passes.
-void
-PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
-  unsigned PDepth = 0;
-  if (P->getResolver())
-    PDepth = P->getResolver()->getPMDataManager().getDepth();
-
-  for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
-         E = AnalysisPasses.end(); I != E; ++I) {
-    Pass *AP = *I;
-    LastUser[AP] = P;
-
-    if (P == AP)
-      continue;
-
-    // Update the last users of passes that are required transitive by AP.
-    AnalysisUsage *AnUsage = findAnalysisUsage(AP);
-    const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
-    SmallVector<Pass *, 12> LastUses;
-    SmallVector<Pass *, 12> LastPMUses;
-    for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
-         E = IDs.end(); I != E; ++I) {
-      Pass *AnalysisPass = findAnalysisPass(*I);
-      assert(AnalysisPass && "Expected analysis pass to exist.");
-      AnalysisResolver *AR = AnalysisPass->getResolver();
-      assert(AR && "Expected analysis resolver to exist.");
-      unsigned APDepth = AR->getPMDataManager().getDepth();
-
-      if (PDepth == APDepth)
-        LastUses.push_back(AnalysisPass);
-      else if (PDepth > APDepth)
-        LastPMUses.push_back(AnalysisPass);
-    }
-
-    setLastUser(LastUses, P);
-
-    // If this pass has a corresponding pass manager, push higher level
-    // analysis to this pass manager.
-    if (P->getResolver())
-      setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
-
-
-    // If AP is the last user of other passes then make P last user of
-    // such passes.
-    for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
-           LUE = LastUser.end(); LUI != LUE; ++LUI) {
-      if (LUI->second == AP)
-        // DenseMap iterator is not invalidated here because
-        // this is just updating existing entries.
-        LastUser[LUI->first] = P;
-    }
-  }
-}
-
-/// Collect passes whose last user is P
-void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
-                                        Pass *P) {
-  DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
-    InversedLastUser.find(P);
-  if (DMI == InversedLastUser.end())
-    return;
-
-  SmallPtrSet<Pass *, 8> &LU = DMI->second;
-  for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
-         E = LU.end(); I != E; ++I) {
-    LastUses.push_back(*I);
-  }
-
-}
-
-AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
-  AnalysisUsage *AnUsage = NULL;
-  DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
-  if (DMI != AnUsageMap.end())
-    AnUsage = DMI->second;
-  else {
-    AnUsage = new AnalysisUsage();
-    P->getAnalysisUsage(*AnUsage);
-    AnUsageMap[P] = AnUsage;
-  }
-  return AnUsage;
-}
-
-/// Schedule pass P for execution. Make sure that passes required by
-/// P are run before P is run. Update analysis info maintained by
-/// the manager. Remove dead passes. This is a recursive function.
-void PMTopLevelManager::schedulePass(Pass *P) {
-
-  // TODO : Allocate function manager for this pass, other wise required set
-  // may be inserted into previous function manager
-
-  // Give pass a chance to prepare the stage.
-  P->preparePassManager(activeStack);
-
-  // If P is an analysis pass and it is available then do not
-  // generate the analysis again. Stale analysis info should not be
-  // available at this point.
-  const PassInfo *PI =
-    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
-  if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
-    delete P;
-    return;
-  }
-
-  AnalysisUsage *AnUsage = findAnalysisUsage(P);
-
-  bool checkAnalysis = true;
-  while (checkAnalysis) {
-    checkAnalysis = false;
-
-    const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
-    for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
-           E = RequiredSet.end(); I != E; ++I) {
-
-      Pass *AnalysisPass = findAnalysisPass(*I);
-      if (!AnalysisPass) {
-        const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
-
-        if (PI == NULL) {
-          // Pass P is not in the global PassRegistry
-          dbgs() << "Pass '"  << P->getPassName() << "' is not initialized." << "\n";
-          dbgs() << "Verify if there is a pass dependency cycle." << "\n";
-          dbgs() << "Required Passes:" << "\n";
-          for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(),
-                 E = RequiredSet.end(); I2 != E && I2 != I; ++I2) {
-            Pass *AnalysisPass2 = findAnalysisPass(*I2);
-            if (AnalysisPass2) {
-              dbgs() << "\t" << AnalysisPass2->getPassName() << "\n";
-            }
-            else {
-              dbgs() << "\t"   << "Error: Required pass not found! Possible causes:"  << "\n";
-              dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)"    << "\n";
-              dbgs() << "\t\t" << "- Corruption of the global PassRegistry"           << "\n";
-            }
-          }
-        }
-
-        assert(PI && "Expected required passes to be initialized");
-        AnalysisPass = PI->createPass();
-        if (P->getPotentialPassManagerType () ==
-            AnalysisPass->getPotentialPassManagerType())
-          // Schedule analysis pass that is managed by the same pass manager.
-          schedulePass(AnalysisPass);
-        else if (P->getPotentialPassManagerType () >
-                 AnalysisPass->getPotentialPassManagerType()) {
-          // Schedule analysis pass that is managed by a new manager.
-          schedulePass(AnalysisPass);
-          // Recheck analysis passes to ensure that required analyses that
-          // are already checked are still available.
-          checkAnalysis = true;
-        }
-        else
-          // Do not schedule this analysis. Lower level analsyis
-          // passes are run on the fly.
-          delete AnalysisPass;
-      }
-    }
-  }
-
-  // Now all required passes are available.
-  if (ImmutablePass *IP = P->getAsImmutablePass()) {
-    // P is a immutable pass and it will be managed by this
-    // top level manager. Set up analysis resolver to connect them.
-    PMDataManager *DM = getAsPMDataManager();
-    AnalysisResolver *AR = new AnalysisResolver(*DM);
-    P->setResolver(AR);
-    DM->initializeAnalysisImpl(P);
-    addImmutablePass(IP);
-    DM->recordAvailableAnalysis(IP);
-    return;
-  }
-
-  if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
-    Pass *PP = P->createPrinterPass(
-      dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
-    PP->assignPassManager(activeStack, getTopLevelPassManagerType());
-  }
-
-  // Add the requested pass to the best available pass manager.
-  P->assignPassManager(activeStack, getTopLevelPassManagerType());
-
-  if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
-    Pass *PP = P->createPrinterPass(
-      dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
-    PP->assignPassManager(activeStack, getTopLevelPassManagerType());
-  }
-}
-
-/// Find the pass that implements Analysis AID. Search immutable
-/// passes and all pass managers. If desired pass is not found
-/// then return NULL.
-Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
-
-  // Check pass managers
-  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
-         E = PassManagers.end(); I != E; ++I)
-    if (Pass *P = (*I)->findAnalysisPass(AID, false))
-      return P;
-
-  // Check other pass managers
-  for (SmallVectorImpl<PMDataManager *>::iterator
-         I = IndirectPassManagers.begin(),
-         E = IndirectPassManagers.end(); I != E; ++I)
-    if (Pass *P = (*I)->findAnalysisPass(AID, false))
-      return P;
-
-  // Check the immutable passes. Iterate in reverse order so that we find
-  // the most recently registered passes first.
-  for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
-       ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
-    AnalysisID PI = (*I)->getPassID();
-    if (PI == AID)
-      return *I;
-
-    // If Pass not found then check the interfaces implemented by Immutable Pass
-    const PassInfo *PassInf =
-      PassRegistry::getPassRegistry()->getPassInfo(PI);
-    assert(PassInf && "Expected all immutable passes to be initialized");
-    const std::vector<const PassInfo*> &ImmPI =
-      PassInf->getInterfacesImplemented();
-    for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
-         EE = ImmPI.end(); II != EE; ++II) {
-      if ((*II)->getTypeInfo() == AID)
-        return *I;
-    }
-  }
-
-  return 0;
-}
-
-// Print passes managed by this top level manager.
-void PMTopLevelManager::dumpPasses() const {
-
-  if (PassDebugging < Structure)
-    return;
-
-  // Print out the immutable passes
-  for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
-    ImmutablePasses[i]->dumpPassStructure(0);
-  }
-
-  // Every class that derives from PMDataManager also derives from Pass
-  // (sometimes indirectly), but there's no inheritance relationship
-  // between PMDataManager and Pass, so we have to getAsPass to get
-  // from a PMDataManager* to a Pass*.
-  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
-         E = PassManagers.end(); I != E; ++I)
-    (*I)->getAsPass()->dumpPassStructure(1);
-}
-
-void PMTopLevelManager::dumpArguments() const {
-
-  if (PassDebugging < Arguments)
-    return;
-
-  dbgs() << "Pass Arguments: ";
-  for (SmallVector<ImmutablePass *, 8>::const_iterator I =
-       ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
-    if (const PassInfo *PI =
-        PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) {
-      assert(PI && "Expected all immutable passes to be initialized");
-      if (!PI->isAnalysisGroup())
-        dbgs() << " -" << PI->getPassArgument();
-    }
-  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
-         E = PassManagers.end(); I != E; ++I)
-    (*I)->dumpPassArguments();
-  dbgs() << "\n";
-}
-
-void PMTopLevelManager::initializeAllAnalysisInfo() {
-  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
-         E = PassManagers.end(); I != E; ++I)
-    (*I)->initializeAnalysisInfo();
-
-  // Initailize other pass managers
-  for (SmallVectorImpl<PMDataManager *>::iterator
-       I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
-       I != E; ++I)
-    (*I)->initializeAnalysisInfo();
-
-  for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
-        DME = LastUser.end(); DMI != DME; ++DMI) {
-    DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
-      InversedLastUser.find(DMI->second);
-    if (InvDMI != InversedLastUser.end()) {
-      SmallPtrSet<Pass *, 8> &L = InvDMI->second;
-      L.insert(DMI->first);
-    } else {
-      SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
-      InversedLastUser[DMI->second] = L;
-    }
-  }
-}
-
-/// Destructor
-PMTopLevelManager::~PMTopLevelManager() {
-  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
-         E = PassManagers.end(); I != E; ++I)
-    delete *I;
-
-  for (SmallVectorImpl<ImmutablePass *>::iterator
-         I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
-    delete *I;
-
-  for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
-         DME = AnUsageMap.end(); DMI != DME; ++DMI)
-    delete DMI->second;
-}
-
-//===----------------------------------------------------------------------===//
-// PMDataManager implementation
-
-/// Augement AvailableAnalysis by adding analysis made available by pass P.
-void PMDataManager::recordAvailableAnalysis(Pass *P) {
-  AnalysisID PI = P->getPassID();
-
-  AvailableAnalysis[PI] = P;
-
-  assert(!AvailableAnalysis.empty());
-
-  // This pass is the current implementation of all of the interfaces it
-  // implements as well.
-  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
-  if (PInf == 0) return;
-  const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
-  for (unsigned i = 0, e = II.size(); i != e; ++i)
-    AvailableAnalysis[II[i]->getTypeInfo()] = P;
-}
-
-// Return true if P preserves high level analysis used by other
-// passes managed by this manager
-bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
-  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-  if (AnUsage->getPreservesAll())
-    return true;
-
-  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
-  for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
-         E = HigherLevelAnalysis.end(); I  != E; ++I) {
-    Pass *P1 = *I;
-    if (P1->getAsImmutablePass() == 0 &&
-        std::find(PreservedSet.begin(), PreservedSet.end(),
-                  P1->getPassID()) ==
-           PreservedSet.end())
-      return false;
-  }
-
-  return true;
-}
-
-/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
-void PMDataManager::verifyPreservedAnalysis(Pass *P) {
-  // Don't do this unless assertions are enabled.
-#ifdef NDEBUG
-  return;
-#endif
-  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
-
-  // Verify preserved analysis
-  for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
-         E = PreservedSet.end(); I != E; ++I) {
-    AnalysisID AID = *I;
-    if (Pass *AP = findAnalysisPass(AID, true)) {
-      TimeRegion PassTimer(getPassTimer(AP));
-      AP->verifyAnalysis();
-    }
-  }
-}
-
-/// Remove Analysis not preserved by Pass P
-void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
-  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-  if (AnUsage->getPreservesAll())
-    return;
-
-  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
-  for (std::map<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
-         E = AvailableAnalysis.end(); I != E; ) {
-    std::map<AnalysisID, Pass*>::iterator Info = I++;
-    if (Info->second->getAsImmutablePass() == 0 &&
-        std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
-        PreservedSet.end()) {
-      // Remove this analysis
-      if (PassDebugging >= Details) {
-        Pass *S = Info->second;
-        dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
-        dbgs() << S->getPassName() << "'\n";
-      }
-      AvailableAnalysis.erase(Info);
-    }
-  }
-
-  // Check inherited analysis also. If P is not preserving analysis
-  // provided by parent manager then remove it here.
-  for (unsigned Index = 0; Index < PMT_Last; ++Index) {
-
-    if (!InheritedAnalysis[Index])
-      continue;
-
-    for (std::map<AnalysisID, Pass*>::iterator
-           I = InheritedAnalysis[Index]->begin(),
-           E = InheritedAnalysis[Index]->end(); I != E; ) {
-      std::map<AnalysisID, Pass *>::iterator Info = I++;
-      if (Info->second->getAsImmutablePass() == 0 &&
-          std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
-             PreservedSet.end()) {
-        // Remove this analysis
-        if (PassDebugging >= Details) {
-          Pass *S = Info->second;
-          dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
-          dbgs() << S->getPassName() << "'\n";
-        }
-        InheritedAnalysis[Index]->erase(Info);
-      }
-    }
-  }
-}
-
-/// Remove analysis passes that are not used any longer
-void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
-                                     enum PassDebuggingString DBG_STR) {
-
-  SmallVector<Pass *, 12> DeadPasses;
-
-  // If this is a on the fly manager then it does not have TPM.
-  if (!TPM)
-    return;
-
-  TPM->collectLastUses(DeadPasses, P);
-
-  if (PassDebugging >= Details && !DeadPasses.empty()) {
-    dbgs() << " -*- '" <<  P->getPassName();
-    dbgs() << "' is the last user of following pass instances.";
-    dbgs() << " Free these instances\n";
-  }
-
-  for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
-         E = DeadPasses.end(); I != E; ++I)
-    freePass(*I, Msg, DBG_STR);
-}
-
-void PMDataManager::freePass(Pass *P, StringRef Msg,
-                             enum PassDebuggingString DBG_STR) {
-  dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
-
-  {
-    // If the pass crashes releasing memory, remember this.
-    PassManagerPrettyStackEntry X(P);
-    TimeRegion PassTimer(getPassTimer(P));
-
-    P->releaseMemory();
-  }
-
-  AnalysisID PI = P->getPassID();
-  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
-    // Remove the pass itself (if it is not already removed).
-    AvailableAnalysis.erase(PI);
-
-    // Remove all interfaces this pass implements, for which it is also
-    // listed as the available implementation.
-    const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
-    for (unsigned i = 0, e = II.size(); i != e; ++i) {
-      std::map<AnalysisID, Pass*>::iterator Pos =
-        AvailableAnalysis.find(II[i]->getTypeInfo());
-      if (Pos != AvailableAnalysis.end() && Pos->second == P)
-        AvailableAnalysis.erase(Pos);
-    }
-  }
-}
-
-/// Add pass P into the PassVector. Update
-/// AvailableAnalysis appropriately if ProcessAnalysis is true.
-void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
-  // This manager is going to manage pass P. Set up analysis resolver
-  // to connect them.
-  AnalysisResolver *AR = new AnalysisResolver(*this);
-  P->setResolver(AR);
-
-  // If a FunctionPass F is the last user of ModulePass info M
-  // then the F's manager, not F, records itself as a last user of M.
-  SmallVector<Pass *, 12> TransferLastUses;
-
-  if (!ProcessAnalysis) {
-    // Add pass
-    PassVector.push_back(P);
-    return;
-  }
-
-  // At the moment, this pass is the last user of all required passes.
-  SmallVector<Pass *, 12> LastUses;
-  SmallVector<Pass *, 8> RequiredPasses;
-  SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
-
-  unsigned PDepth = this->getDepth();
-
-  collectRequiredAnalysis(RequiredPasses,
-                          ReqAnalysisNotAvailable, P);
-  for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
-         E = RequiredPasses.end(); I != E; ++I) {
-    Pass *PRequired = *I;
-    unsigned RDepth = 0;
-
-    assert(PRequired->getResolver() && "Analysis Resolver is not set");
-    PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
-    RDepth = DM.getDepth();
-
-    if (PDepth == RDepth)
-      LastUses.push_back(PRequired);
-    else if (PDepth > RDepth) {
-      // Let the parent claim responsibility of last use
-      TransferLastUses.push_back(PRequired);
-      // Keep track of higher level analysis used by this manager.
-      HigherLevelAnalysis.push_back(PRequired);
-    } else
-      llvm_unreachable("Unable to accommodate Required Pass");
-  }
-
-  // Set P as P's last user until someone starts using P.
-  // However, if P is a Pass Manager then it does not need
-  // to record its last user.
-  if (P->getAsPMDataManager() == 0)
-    LastUses.push_back(P);
-  TPM->setLastUser(LastUses, P);
-
-  if (!TransferLastUses.empty()) {
-    Pass *My_PM = getAsPass();
-    TPM->setLastUser(TransferLastUses, My_PM);
-    TransferLastUses.clear();
-  }
-
-  // Now, take care of required analyses that are not available.
-  for (SmallVectorImpl<AnalysisID>::iterator
-         I = ReqAnalysisNotAvailable.begin(),
-         E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
-    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
-    Pass *AnalysisPass = PI->createPass();
-    this->addLowerLevelRequiredPass(P, AnalysisPass);
-  }
-
-  // Take a note of analysis required and made available by this pass.
-  // Remove the analysis not preserved by this pass
-  removeNotPreservedAnalysis(P);
-  recordAvailableAnalysis(P);
-
-  // Add pass
-  PassVector.push_back(P);
-}
-
-
-/// Populate RP with analysis pass that are required by
-/// pass P and are available. Populate RP_NotAvail with analysis
-/// pass that are required by pass P but are not available.
-void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
-                                       SmallVectorImpl<AnalysisID> &RP_NotAvail,
-                                            Pass *P) {
-  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-  const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
-  for (AnalysisUsage::VectorType::const_iterator
-         I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
-    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
-      RP.push_back(AnalysisPass);
-    else
-      RP_NotAvail.push_back(*I);
-  }
-
-  const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
-  for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
-         E = IDs.end(); I != E; ++I) {
-    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
-      RP.push_back(AnalysisPass);
-    else
-      RP_NotAvail.push_back(*I);
-  }
-}
-
-// All Required analyses should be available to the pass as it runs!  Here
-// we fill in the AnalysisImpls member of the pass so that it can
-// successfully use the getAnalysis() method to retrieve the
-// implementations it needs.
-//
-void PMDataManager::initializeAnalysisImpl(Pass *P) {
-  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-
-  for (AnalysisUsage::VectorType::const_iterator
-         I = AnUsage->getRequiredSet().begin(),
-         E = AnUsage->getRequiredSet().end(); I != E; ++I) {
-    Pass *Impl = findAnalysisPass(*I, true);
-    if (Impl == 0)
-      // This may be analysis pass that is initialized on the fly.
-      // If that is not the case then it will raise an assert when it is used.
-      continue;
-    AnalysisResolver *AR = P->getResolver();
-    assert(AR && "Analysis Resolver is not set");
-    AR->addAnalysisImplsPair(*I, Impl);
-  }
-}
-
-/// Find the pass that implements Analysis AID. If desired pass is not found
-/// then return NULL.
-Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
-
-  // Check if AvailableAnalysis map has one entry.
-  std::map<AnalysisID, Pass*>::const_iterator I =  AvailableAnalysis.find(AID);
-
-  if (I != AvailableAnalysis.end())
-    return I->second;
-
-  // Search Parents through TopLevelManager
-  if (SearchParent)
-    return TPM->findAnalysisPass(AID);
-
-  return NULL;
-}
-
-// Print list of passes that are last used by P.
-void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
-
-  SmallVector<Pass *, 12> LUses;
-
-  // If this is a on the fly manager then it does not have TPM.
-  if (!TPM)
-    return;
-
-  TPM->collectLastUses(LUses, P);
-
-  for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
-         E = LUses.end(); I != E; ++I) {
-    llvm::dbgs() << "--" << std::string(Offset*2, ' ');
-    (*I)->dumpPassStructure(0);
-  }
-}
-
-void PMDataManager::dumpPassArguments() const {
-  for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
-        E = PassVector.end(); I != E; ++I) {
-    if (PMDataManager *PMD = (*I)->getAsPMDataManager())
-      PMD->dumpPassArguments();
-    else
-      if (const PassInfo *PI =
-            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
-        if (!PI->isAnalysisGroup())
-          dbgs() << " -" << PI->getPassArgument();
-  }
-}
-
-void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
-                                 enum PassDebuggingString S2,
-                                 StringRef Msg) {
-  if (PassDebugging < Executions)
-    return;
-  dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
-  switch (S1) {
-  case EXECUTION_MSG:
-    dbgs() << "Executing Pass '" << P->getPassName();
-    break;
-  case MODIFICATION_MSG:
-    dbgs() << "Made Modification '" << P->getPassName();
-    break;
-  case FREEING_MSG:
-    dbgs() << " Freeing Pass '" << P->getPassName();
-    break;
-  default:
-    break;
-  }
-  switch (S2) {
-  case ON_BASICBLOCK_MSG:
-    dbgs() << "' on BasicBlock '" << Msg << "'...\n";
-    break;
-  case ON_FUNCTION_MSG:
-    dbgs() << "' on Function '" << Msg << "'...\n";
-    break;
-  case ON_MODULE_MSG:
-    dbgs() << "' on Module '"  << Msg << "'...\n";
-    break;
-  case ON_REGION_MSG:
-    dbgs() << "' on Region '"  << Msg << "'...\n";
-    break;
-  case ON_LOOP_MSG:
-    dbgs() << "' on Loop '" << Msg << "'...\n";
-    break;
-  case ON_CG_MSG:
-    dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
-    break;
-  default:
-    break;
-  }
-}
-
-void PMDataManager::dumpRequiredSet(const Pass *P) const {
-  if (PassDebugging < Details)
-    return;
-
-  AnalysisUsage analysisUsage;
-  P->getAnalysisUsage(analysisUsage);
-  dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
-}
-
-void PMDataManager::dumpPreservedSet(const Pass *P) const {
-  if (PassDebugging < Details)
-    return;
-
-  AnalysisUsage analysisUsage;
-  P->getAnalysisUsage(analysisUsage);
-  dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
-}
-
-void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
-                                   const AnalysisUsage::VectorType &Set) const {
-  assert(PassDebugging >= Details);
-  if (Set.empty())
-    return;
-  dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
-  for (unsigned i = 0; i != Set.size(); ++i) {
-    if (i) dbgs() << ',';
-    const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
-    if (!PInf) {
-      // Some preserved passes, such as AliasAnalysis, may not be initialized by
-      // all drivers.
-      dbgs() << " Uninitialized Pass";
-      continue;
-    }
-    dbgs() << ' ' << PInf->getPassName();
-  }
-  dbgs() << '\n';
-}
-
-/// Add RequiredPass into list of lower level passes required by pass P.
-/// RequiredPass is run on the fly by Pass Manager when P requests it
-/// through getAnalysis interface.
-/// This should be handled by specific pass manager.
-void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
-  if (TPM) {
-    TPM->dumpArguments();
-    TPM->dumpPasses();
-  }
-
-  // Module Level pass may required Function Level analysis info
-  // (e.g. dominator info). Pass manager uses on the fly function pass manager
-  // to provide this on demand. In that case, in Pass manager terminology,
-  // module level pass is requiring lower level analysis info managed by
-  // lower level pass manager.
-
-  // When Pass manager is not able to order required analysis info, Pass manager
-  // checks whether any lower level manager will be able to provide this
-  // analysis info on demand or not.
-#ifndef NDEBUG
-  dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
-  dbgs() << "' required by '" << P->getPassName() << "'\n";
-#endif
-  llvm_unreachable("Unable to schedule pass");
-}
-
-Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
-  llvm_unreachable("Unable to find on the fly pass");
-}
-
-// Destructor
-PMDataManager::~PMDataManager() {
-  for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
-         E = PassVector.end(); I != E; ++I)
-    delete *I;
-}
-
-//===----------------------------------------------------------------------===//
-// NOTE: Is this the right place to define this method ?
-// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
-Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
-  return PM.findAnalysisPass(ID, dir);
-}
-
-Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
-                                     Function &F) {
-  return PM.getOnTheFlyPass(P, AnalysisPI, F);
-}
-
-//===----------------------------------------------------------------------===//
-// BBPassManager implementation
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnBasicBlock method.  Keep track of whether any of the passes modifies
-/// the function, and if so, return true.
-bool BBPassManager::runOnFunction(Function &F) {
-  if (F.isDeclaration())
-    return false;
-
-  bool Changed = doInitialization(F);
-
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-      BasicBlockPass *BP = getContainedPass(Index);
-      bool LocalChanged = false;
-
-      dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
-      dumpRequiredSet(BP);
-
-      initializeAnalysisImpl(BP);
-
-      {
-        // If the pass crashes, remember this.
-        PassManagerPrettyStackEntry X(BP, *I);
-        TimeRegion PassTimer(getPassTimer(BP));
-
-        LocalChanged |= BP->runOnBasicBlock(*I);
-      }
-
-      Changed |= LocalChanged;
-      if (LocalChanged)
-        dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
-                     I->getName());
-      dumpPreservedSet(BP);
-
-      verifyPreservedAnalysis(BP);
-      removeNotPreservedAnalysis(BP);
-      recordAvailableAnalysis(BP);
-      removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
-    }
-
-  return doFinalization(F) || Changed;
-}
-
-// Implement doInitialization and doFinalization
-bool BBPassManager::doInitialization(Module &M) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
-    Changed |= getContainedPass(Index)->doInitialization(M);
-
-  return Changed;
-}
-
-bool BBPassManager::doFinalization(Module &M) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
-    Changed |= getContainedPass(Index)->doFinalization(M);
-
-  return Changed;
-}
-
-bool BBPassManager::doInitialization(Function &F) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-    BasicBlockPass *BP = getContainedPass(Index);
-    Changed |= BP->doInitialization(F);
-  }
-
-  return Changed;
-}
-
-bool BBPassManager::doFinalization(Function &F) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-    BasicBlockPass *BP = getContainedPass(Index);
-    Changed |= BP->doFinalization(F);
-  }
-
-  return Changed;
-}
-
-
-//===----------------------------------------------------------------------===//
-// FunctionPassManager implementation
-
-/// Create new Function pass manager
-FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
-  FPM = new FunctionPassManagerImpl();
-  // FPM is the top level manager.
-  FPM->setTopLevelManager(FPM);
-
-  AnalysisResolver *AR = new AnalysisResolver(*FPM);
-  FPM->setResolver(AR);
-}
-
-FunctionPassManager::~FunctionPassManager() {
-  delete FPM;
-}
-
-/// add - Add a pass to the queue of passes to run.  This passes
-/// ownership of the Pass to the PassManager.  When the
-/// PassManager_X is destroyed, the pass will be destroyed as well, so
-/// there is no need to delete the pass. (TODO delete passes.)
-/// This implies that all passes MUST be allocated with 'new'.
-void FunctionPassManager::add(Pass *P) {
-  FPM->add(P);
-}
-
-/// run - Execute all of the passes scheduled for execution.  Keep
-/// track of whether any of the passes modifies the function, and if
-/// so, return true.
-///
-bool FunctionPassManager::run(Function &F) {
-  if (F.isMaterializable()) {
-    std::string errstr;
-    if (F.Materialize(&errstr))
-      report_fatal_error("Error reading bitcode file: " + Twine(errstr));
-  }
-  return FPM->run(F);
-}
-
-
-/// doInitialization - Run all of the initializers for the function passes.
-///
-bool FunctionPassManager::doInitialization() {
-  return FPM->doInitialization(*M);
-}
-
-/// doFinalization - Run all of the finalizers for the function passes.
-///
-bool FunctionPassManager::doFinalization() {
-  return FPM->doFinalization(*M);
-}
-
-//===----------------------------------------------------------------------===//
-// FunctionPassManagerImpl implementation
-//
-bool FunctionPassManagerImpl::doInitialization(Module &M) {
-  bool Changed = false;
-
-  dumpArguments();
-  dumpPasses();
-
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    Changed |= getContainedManager(Index)->doInitialization(M);
-
-  return Changed;
-}
-
-bool FunctionPassManagerImpl::doFinalization(Module &M) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    Changed |= getContainedManager(Index)->doFinalization(M);
-
-  return Changed;
-}
-
-/// cleanup - After running all passes, clean up pass manager cache.
-void FPPassManager::cleanup() {
- for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-    FunctionPass *FP = getContainedPass(Index);
-    AnalysisResolver *AR = FP->getResolver();
-    assert(AR && "Analysis Resolver is not set");
-    AR->clearAnalysisImpls();
- }
-}
-
-void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
-  if (!wasRun)
-    return;
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
-    FPPassManager *FPPM = getContainedManager(Index);
-    for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
-      FPPM->getContainedPass(Index)->releaseMemory();
-    }
-  }
-  wasRun = false;
-}
-
-// Execute all the passes managed by this top level manager.
-// Return true if any function is modified by a pass.
-bool FunctionPassManagerImpl::run(Function &F) {
-  bool Changed = false;
-  TimingInfo::createTheTimeInfo();
-
-  initializeAllAnalysisInfo();
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    Changed |= getContainedManager(Index)->runOnFunction(F);
-
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    getContainedManager(Index)->cleanup();
-
-  wasRun = true;
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// FPPassManager implementation
-
-char FPPassManager::ID = 0;
-/// Print passes managed by this manager
-void FPPassManager::dumpPassStructure(unsigned Offset) {
-  dbgs().indent(Offset*2) << "FunctionPass Manager\n";
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-    FunctionPass *FP = getContainedPass(Index);
-    FP->dumpPassStructure(Offset + 1);
-    dumpLastUses(FP, Offset+1);
-  }
-}
-
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnFunction method.  Keep track of whether any of the passes modifies
-/// the function, and if so, return true.
-bool FPPassManager::runOnFunction(Function &F) {
-  if (F.isDeclaration())
-    return false;
-
-  bool Changed = false;
-
-  // Collect inherited analysis from Module level pass manager.
-  populateInheritedAnalysis(TPM->activeStack);
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-    FunctionPass *FP = getContainedPass(Index);
-    bool LocalChanged = false;
-
-    dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
-    dumpRequiredSet(FP);
-
-    initializeAnalysisImpl(FP);
-
-    {
-      PassManagerPrettyStackEntry X(FP, F);
-      TimeRegion PassTimer(getPassTimer(FP));
-
-      LocalChanged |= FP->runOnFunction(F);
-    }
-
-    Changed |= LocalChanged;
-    if (LocalChanged)
-      dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
-    dumpPreservedSet(FP);
-
-    verifyPreservedAnalysis(FP);
-    removeNotPreservedAnalysis(FP);
-    recordAvailableAnalysis(FP);
-    removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
-  }
-  return Changed;
-}
-
-bool FPPassManager::runOnModule(Module &M) {
-  bool Changed = doInitialization(M);
-
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    Changed |= runOnFunction(*I);
-
-  return doFinalization(M) || Changed;
-}
-
-bool FPPassManager::doInitialization(Module &M) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
-    Changed |= getContainedPass(Index)->doInitialization(M);
-
-  return Changed;
-}
-
-bool FPPassManager::doFinalization(Module &M) {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
-    Changed |= getContainedPass(Index)->doFinalization(M);
-
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// MPPassManager implementation
-
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnModule method.  Keep track of whether any of the passes modifies
-/// the module, and if so, return true.
-bool
-MPPassManager::runOnModule(Module &M) {
-  bool Changed = false;
-
-  // Initialize on-the-fly passes
-  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
-       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
-       I != E; ++I) {
-    FunctionPassManagerImpl *FPP = I->second;
-    Changed |= FPP->doInitialization(M);
-  }
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
-    ModulePass *MP = getContainedPass(Index);
-    bool LocalChanged = false;
-
-    dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
-    dumpRequiredSet(MP);
-
-    initializeAnalysisImpl(MP);
-
-    {
-      PassManagerPrettyStackEntry X(MP, M);
-      TimeRegion PassTimer(getPassTimer(MP));
-
-      LocalChanged |= MP->runOnModule(M);
-    }
-
-    Changed |= LocalChanged;
-    if (LocalChanged)
-      dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
-                   M.getModuleIdentifier());
-    dumpPreservedSet(MP);
-
-    verifyPreservedAnalysis(MP);
-    removeNotPreservedAnalysis(MP);
-    recordAvailableAnalysis(MP);
-    removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
-  }
-
-  // Finalize on-the-fly passes
-  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
-       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
-       I != E; ++I) {
-    FunctionPassManagerImpl *FPP = I->second;
-    // We don't know when is the last time an on-the-fly pass is run,
-    // so we need to releaseMemory / finalize here
-    FPP->releaseMemoryOnTheFly();
-    Changed |= FPP->doFinalization(M);
-  }
-  return Changed;
-}
-
-/// Add RequiredPass into list of lower level passes required by pass P.
-/// RequiredPass is run on the fly by Pass Manager when P requests it
-/// through getAnalysis interface.
-void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
-  assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
-         "Unable to handle Pass that requires lower level Analysis pass");
-  assert((P->getPotentialPassManagerType() <
-          RequiredPass->getPotentialPassManagerType()) &&
-         "Unable to handle Pass that requires lower level Analysis pass");
-
-  FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
-  if (!FPP) {
-    FPP = new FunctionPassManagerImpl();
-    // FPP is the top level manager.
-    FPP->setTopLevelManager(FPP);
-
-    OnTheFlyManagers[P] = FPP;
-  }
-  FPP->add(RequiredPass);
-
-  // Register P as the last user of RequiredPass.
-  if (RequiredPass) {
-    SmallVector<Pass *, 1> LU;
-    LU.push_back(RequiredPass);
-    FPP->setLastUser(LU,  P);
-  }
-}
-
-/// Return function pass corresponding to PassInfo PI, that is
-/// required by module pass MP. Instantiate analysis pass, by using
-/// its runOnFunction() for function F.
-Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
-  FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
-  assert(FPP && "Unable to find on the fly pass");
-
-  FPP->releaseMemoryOnTheFly();
-  FPP->run(F);
-  return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
-}
-
-
-//===----------------------------------------------------------------------===//
-// PassManagerImpl implementation
-//
-/// run - Execute all of the passes scheduled for execution.  Keep track of
-/// whether any of the passes modifies the module, and if so, return true.
-bool PassManagerImpl::run(Module &M) {
-  bool Changed = false;
-  TimingInfo::createTheTimeInfo();
-
-  dumpArguments();
-  dumpPasses();
-
-  initializeAllAnalysisInfo();
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    Changed |= getContainedManager(Index)->runOnModule(M);
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// PassManager implementation
-
-/// Create new pass manager
-PassManager::PassManager() {
-  PM = new PassManagerImpl();
-  // PM is the top level manager
-  PM->setTopLevelManager(PM);
-}
-
-PassManager::~PassManager() {
-  delete PM;
-}
-
-/// add - Add a pass to the queue of passes to run.  This passes ownership of
-/// the Pass to the PassManager.  When the PassManager is destroyed, the pass
-/// will be destroyed as well, so there is no need to delete the pass.  This
-/// implies that all passes MUST be allocated with 'new'.
-void PassManager::add(Pass *P) {
-  PM->add(P);
-}
-
-/// run - Execute all of the passes scheduled for execution.  Keep track of
-/// whether any of the passes modifies the module, and if so, return true.
-bool PassManager::run(Module &M) {
-  return PM->run(M);
-}
-
-//===----------------------------------------------------------------------===//
-// TimingInfo Class - This class is used to calculate information about the
-// amount of time each pass takes to execute.  This only happens with
-// -time-passes is enabled on the command line.
-//
-bool llvm::TimePassesIsEnabled = false;
-static cl::opt<bool,true>
-EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
-            cl::desc("Time each pass, printing elapsed time for each on exit"));
-
-// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
-// a non null value (if the -time-passes option is enabled) or it leaves it
-// null.  It may be called multiple times.
-void TimingInfo::createTheTimeInfo() {
-  if (!TimePassesIsEnabled || TheTimeInfo) return;
-
-  // Constructed the first time this is called, iff -time-passes is enabled.
-  // This guarantees that the object will be constructed before static globals,
-  // thus it will be destroyed before them.
-  static ManagedStatic<TimingInfo> TTI;
-  TheTimeInfo = &*TTI;
-}
-
-/// If TimingInfo is enabled then start pass timer.
-Timer *llvm::getPassTimer(Pass *P) {
-  if (TheTimeInfo)
-    return TheTimeInfo->getPassTimer(P);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// PMStack implementation
-//
-
-// Pop Pass Manager from the stack and clear its analysis info.
-void PMStack::pop() {
-
-  PMDataManager *Top = this->top();
-  Top->initializeAnalysisInfo();
-
-  S.pop_back();
-}
-
-// Push PM on the stack and set its top level manager.
-void PMStack::push(PMDataManager *PM) {
-  assert(PM && "Unable to push. Pass Manager expected");
-  assert(PM->getDepth()==0 && "Pass Manager depth set too early");
-
-  if (!this->empty()) {
-    assert(PM->getPassManagerType() > this->top()->getPassManagerType()
-           && "pushing bad pass manager to PMStack");
-    PMTopLevelManager *TPM = this->top()->getTopLevelManager();
-
-    assert(TPM && "Unable to find top level manager");
-    TPM->addIndirectPassManager(PM);
-    PM->setTopLevelManager(TPM);
-    PM->setDepth(this->top()->getDepth()+1);
-  }
-  else {
-    assert((PM->getPassManagerType() == PMT_ModulePassManager
-           || PM->getPassManagerType() == PMT_FunctionPassManager)
-           && "pushing bad pass manager to PMStack");
-    PM->setDepth(1);
-  }
-
-  S.push_back(PM);
-}
-
-// Dump content of the pass manager stack.
-void PMStack::dump() const {
-  for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
-         E = S.end(); I != E; ++I)
-    dbgs() << (*I)->getAsPass()->getPassName() << ' ';
-
-  if (!S.empty())
-    dbgs() << '\n';
-}
-
-/// Find appropriate Module Pass Manager in the PM Stack and
-/// add self into that manager.
-void ModulePass::assignPassManager(PMStack &PMS,
-                                   PassManagerType PreferredType) {
-  // Find Module Pass Manager
-  while (!PMS.empty()) {
-    PassManagerType TopPMType = PMS.top()->getPassManagerType();
-    if (TopPMType == PreferredType)
-      break; // We found desired pass manager
-    else if (TopPMType > PMT_ModulePassManager)
-      PMS.pop();    // Pop children pass managers
-    else
-      break;
-  }
-  assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
-  PMS.top()->add(this);
-}
-
-/// Find appropriate Function Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
-void FunctionPass::assignPassManager(PMStack &PMS,
-                                     PassManagerType PreferredType) {
-
-  // Find Function Pass Manager
-  while (!PMS.empty()) {
-    if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
-      PMS.pop();
-    else
-      break;
-  }
-
-  // Create new Function Pass Manager if needed.
-  FPPassManager *FPP;
-  if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
-    FPP = (FPPassManager *)PMS.top();
-  } else {
-    assert(!PMS.empty() && "Unable to create Function Pass Manager");
-    PMDataManager *PMD = PMS.top();
-
-    // [1] Create new Function Pass Manager
-    FPP = new FPPassManager();
-    FPP->populateInheritedAnalysis(PMS);
-
-    // [2] Set up new manager's top level manager
-    PMTopLevelManager *TPM = PMD->getTopLevelManager();
-    TPM->addIndirectPassManager(FPP);
-
-    // [3] Assign manager to manage this new manager. This may create
-    // and push new managers into PMS
-    FPP->assignPassManager(PMS, PMD->getPassManagerType());
-
-    // [4] Push new manager into PMS
-    PMS.push(FPP);
-  }
-
-  // Assign FPP as the manager of this pass.
-  FPP->add(this);
-}
-
-/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
-void BasicBlockPass::assignPassManager(PMStack &PMS,
-                                       PassManagerType PreferredType) {
-  BBPassManager *BBP;
-
-  // Basic Pass Manager is a leaf pass manager. It does not handle
-  // any other pass manager.
-  if (!PMS.empty() &&
-      PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
-    BBP = (BBPassManager *)PMS.top();
-  } else {
-    // If leaf manager is not Basic Block Pass manager then create new
-    // basic Block Pass manager.
-    assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
-    PMDataManager *PMD = PMS.top();
-
-    // [1] Create new Basic Block Manager
-    BBP = new BBPassManager();
-
-    // [2] Set up new manager's top level manager
-    // Basic Block Pass Manager does not live by itself
-    PMTopLevelManager *TPM = PMD->getTopLevelManager();
-    TPM->addIndirectPassManager(BBP);
-
-    // [3] Assign manager to manage this new manager. This may create
-    // and push new managers into PMS
-    BBP->assignPassManager(PMS, PreferredType);
-
-    // [4] Push new manager into PMS
-    PMS.push(BBP);
-  }
-
-  // Assign BBP as the manager of this pass.
-  BBP->add(this);
-}
-
-PassManagerBase::~PassManagerBase() {}
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
deleted file mode 100644
index 2df65572c592..000000000000
--- a/lib/VMCore/PassRegistry.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PassRegistry, with which passes are registered on
-// initialization, and supports the PassManager in dependency resolution.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Function.h"
-#include <vector>
-
-using namespace llvm;
-
-// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
-// Unfortunately, passes are registered with static ctors, and having
-// llvm_shutdown clear this map prevents successful resurrection after
-// llvm_shutdown is run.  Ideally we should find a solution so that we don't
-// leak the map, AND can still resurrect after shutdown.
-static ManagedStatic<PassRegistry> PassRegistryObj;
-PassRegistry *PassRegistry::getPassRegistry() {
-  return &*PassRegistryObj;
-}
-
-static ManagedStatic<sys::SmartMutex<true> > Lock;
-
-//===----------------------------------------------------------------------===//
-// PassRegistryImpl
-//
-
-namespace {
-struct PassRegistryImpl {
-  /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
-  typedef DenseMap<const void*, const PassInfo*> MapType;
-  MapType PassInfoMap;
-  
-  typedef StringMap<const PassInfo*> StringMapType;
-  StringMapType PassInfoStringMap;
-  
-  /// AnalysisGroupInfo - Keep track of information for each analysis group.
-  struct AnalysisGroupInfo {
-    SmallPtrSet<const PassInfo *, 8> Implementations;
-  };
-  DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
-  
-  std::vector<const PassInfo*> ToFree;
-  std::vector<PassRegistrationListener*> Listeners;
-};
-} // end anonymous namespace
-
-void *PassRegistry::getImpl() const {
-  if (!pImpl)
-    pImpl = new PassRegistryImpl();
-  return pImpl;
-}
-
-//===----------------------------------------------------------------------===//
-// Accessors
-//
-
-PassRegistry::~PassRegistry() {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
-  
-  for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
-       E = Impl->ToFree.end(); I != E; ++I)
-    delete *I;
-  
-  delete Impl;
-  pImpl = 0;
-}
-
-const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
-  return I != Impl->PassInfoMap.end() ? I->second : 0;
-}
-
-const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  PassRegistryImpl::StringMapType::const_iterator
-    I = Impl->PassInfoStringMap.find(Arg);
-  return I != Impl->PassInfoStringMap.end() ? I->second : 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Pass Registration mechanism
-//
-
-void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  bool Inserted =
-    Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
-  assert(Inserted && "Pass registered multiple times!");
-  (void)Inserted;
-  Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
-  
-  // Notify any listeners.
-  for (std::vector<PassRegistrationListener*>::iterator
-       I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
-    (*I)->passRegistered(&PI);
-  
-  if (ShouldFree) Impl->ToFree.push_back(&PI);
-}
-
-void PassRegistry::unregisterPass(const PassInfo &PI) {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  PassRegistryImpl::MapType::iterator I = 
-    Impl->PassInfoMap.find(PI.getTypeInfo());
-  assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
-  
-  // Remove pass from the map.
-  Impl->PassInfoMap.erase(I);
-  Impl->PassInfoStringMap.erase(PI.getPassArgument());
-}
-
-void PassRegistry::enumerateWith(PassRegistrationListener *L) {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
-       E = Impl->PassInfoMap.end(); I != E; ++I)
-    L->passEnumerate(I->second);
-}
-
-
-/// Analysis Group Mechanisms.
-void PassRegistry::registerAnalysisGroup(const void *InterfaceID, 
-                                         const void *PassID,
-                                         PassInfo& Registeree,
-                                         bool isDefault,
-                                         bool ShouldFree) {
-  PassInfo *InterfaceInfo =  const_cast<PassInfo*>(getPassInfo(InterfaceID));
-  if (InterfaceInfo == 0) {
-    // First reference to Interface, register it now.
-    registerPass(Registeree);
-    InterfaceInfo = &Registeree;
-  }
-  assert(Registeree.isAnalysisGroup() && 
-         "Trying to join an analysis group that is a normal pass!");
-
-  if (PassID) {
-    PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
-    assert(ImplementationInfo &&
-           "Must register pass before adding to AnalysisGroup!");
-
-    sys::SmartScopedLock<true> Guard(*Lock);
-    
-    // Make sure we keep track of the fact that the implementation implements
-    // the interface.
-    ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
-
-    PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-    PassRegistryImpl::AnalysisGroupInfo &AGI =
-      Impl->AnalysisGroupInfoMap[InterfaceInfo];
-    assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
-           "Cannot add a pass to the same analysis group more than once!");
-    AGI.Implementations.insert(ImplementationInfo);
-    if (isDefault) {
-      assert(InterfaceInfo->getNormalCtor() == 0 &&
-             "Default implementation for analysis group already specified!");
-      assert(ImplementationInfo->getNormalCtor() &&
-           "Cannot specify pass as default if it does not have a default ctor");
-      InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
-    }
-  }
-  
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  if (ShouldFree) Impl->ToFree.push_back(&Registeree);
-}
-
-void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  Impl->Listeners.push_back(L);
-}
-
-void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
-  sys::SmartScopedLock<true> Guard(*Lock);
-  
-  // NOTE: This is necessary, because removeRegistrationListener() can be called
-  // as part of the llvm_shutdown sequence.  Since we have no control over the
-  // order of that sequence, we need to gracefully handle the case where the
-  // PassRegistry is destructed before the object that triggers this call.
-  if (!pImpl) return;
-  
-  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
-  std::vector<PassRegistrationListener*>::iterator I =
-    std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
-  assert(I != Impl->Listeners.end() &&
-         "PassRegistrationListener not registered!");
-  Impl->Listeners.erase(I);
-}
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
deleted file mode 100644
index 1f1fbc91bc31..000000000000
--- a/lib/VMCore/PrintModulePass.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//===--- VMCore/PrintModulePass.cpp - Module/Function Printer -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PrintModulePass and PrintFunctionPass implementations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Assembly/PrintModulePass.h"
-
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-
-  class PrintModulePass : public ModulePass {
-    std::string Banner;
-    raw_ostream *Out;       // raw_ostream to print on
-    bool DeleteStream;      // Delete the ostream in our dtor?
-  public:
-    static char ID;
-    PrintModulePass() : ModulePass(ID), Out(&dbgs()), 
-      DeleteStream(false) {}
-    PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
-        : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
-    
-    ~PrintModulePass() {
-      if (DeleteStream) delete Out;
-    }
-    
-    bool runOnModule(Module &M) {
-      (*Out) << Banner << M;
-      return false;
-    }
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-  };
-  
-  class PrintFunctionPass : public FunctionPass {
-    std::string Banner;     // String to print before each function
-    raw_ostream *Out;       // raw_ostream to print on
-    bool DeleteStream;      // Delete the ostream in our dtor?
-  public:
-    static char ID;
-    PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()), 
-                          DeleteStream(false) {}
-    PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
-      : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
-    
-    ~PrintFunctionPass() {
-      if (DeleteStream) delete Out;
-    }
-    
-    // runOnFunction - This pass just prints a banner followed by the
-    // function as it's processed.
-    //
-    bool runOnFunction(Function &F) {
-      (*Out) << Banner << static_cast<Value&>(F);
-      return false;
-    }
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-  };
-}
-
-char PrintModulePass::ID = 0;
-INITIALIZE_PASS(PrintModulePass, "print-module",
-                "Print module to stderr", false, false)
-char PrintFunctionPass::ID = 0;
-INITIALIZE_PASS(PrintFunctionPass, "print-function",
-                "Print function to stderr", false, false)
-
-/// createPrintModulePass - Create and return a pass that writes the
-/// module to the specified raw_ostream.
-ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS, 
-                                        bool DeleteStream,
-                                        const std::string &Banner) {
-  return new PrintModulePass(Banner, OS, DeleteStream);
-}
-
-/// createPrintFunctionPass - Create and return a pass that prints
-/// functions to the specified raw_ostream as they are processed.
-FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
-                                            llvm::raw_ostream *OS, 
-                                            bool DeleteStream) {
-  return new PrintFunctionPass(Banner, OS, DeleteStream);
-}
-
diff --git a/lib/VMCore/SymbolTableListTraitsImpl.h b/lib/VMCore/SymbolTableListTraitsImpl.h
deleted file mode 100644
index 72687bb5e0b2..000000000000
--- a/lib/VMCore/SymbolTableListTraitsImpl.h
+++ /dev/null
@@ -1,118 +0,0 @@
-//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the stickier parts of the SymbolTableListTraits class,
-// and is explicitly instantiated where needed to avoid defining all this code
-// in a widely used header.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
-#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
-
-#include "llvm/SymbolTableListTraits.h"
-#include "llvm/ValueSymbolTable.h"
-
-namespace llvm {
-
-/// setSymTabObject - This is called when (f.e.) the parent of a basic block
-/// changes.  This requires us to remove all the instruction symtab entries from
-/// the current function and reinsert them into the new function.
-template<typename ValueSubClass, typename ItemParentClass>
-template<typename TPtr>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::setSymTabObject(TPtr *Dest, TPtr Src) {
-  // Get the old symtab and value list before doing the assignment.
-  ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
-
-  // Do it.
-  *Dest = Src;
-  
-  // Get the new SymTab object.
-  ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
-  
-  // If there is nothing to do, quick exit.
-  if (OldST == NewST) return;
-  
-  // Move all the elements from the old symtab to the new one.
-  iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
-  if (ItemList.empty()) return;
-  
-  if (OldST) {
-    // Remove all entries from the previous symtab.
-    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
-         I != ItemList.end(); ++I)
-      if (I->hasName())
-        OldST->removeValueName(I->getValueName());
-  }
-
-  if (NewST) {
-    // Add all of the items to the new symtab.
-    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
-         I != ItemList.end(); ++I)
-      if (I->hasName())
-        NewST->reinsertValue(I);
-  }
-  
-}
-
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::addNodeToList(ValueSubClass *V) {
-  assert(V->getParent() == 0 && "Value already in a container!!");
-  ItemParentClass *Owner = getListOwner();
-  V->setParent(Owner);
-  if (V->hasName())
-    if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
-      ST->reinsertValue(V);
-}
-
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::removeNodeFromList(ValueSubClass *V) {
-  V->setParent(0);
-  if (V->hasName())
-    if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
-      ST->removeValueName(V->getValueName());
-}
-
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
-                        ilist_iterator<ValueSubClass> first,
-                        ilist_iterator<ValueSubClass> last) {
-  // We only have to do work here if transferring instructions between BBs
-  ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
-  if (NewIP == OldIP) return;  // No work to do at all...
-
-  // We only have to update symbol table entries if we are transferring the
-  // instructions to a different symtab object...
-  ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
-  ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
-  if (NewST != OldST) {
-    for (; first != last; ++first) {
-      ValueSubClass &V = *first;
-      bool HasName = V.hasName();
-      if (OldST && HasName)
-        OldST->removeValueName(V.getValueName());
-      V.setParent(NewIP);
-      if (NewST && HasName)
-        NewST->reinsertValue(&V);
-    }
-  } else {
-    // Just transferring between blocks in the same function, simply update the
-    // parent fields in the instructions...
-    for (; first != last; ++first)
-      first->setParent(NewIP);
-  }
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp
deleted file mode 100644
index e91c29c45699..000000000000
--- a/lib/VMCore/TargetTransformInfo.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- llvm/VMCore/TargetTransformInfo.cpp ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-/// Default ctor.
-///
-/// @note This has to exist, because this is a pass, but it should never be
-/// used.
-TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) {
-  /// You are seeing this error because your pass required the TTI
-  /// using a call to "getAnalysis<TargetTransformInfo>()", and you did
-  /// not initialize a machine target which can provide the TTI.
-  /// You should use "getAnalysisIfAvailable<TargetTransformInfo>()" instead.
-  report_fatal_error("Bad TargetTransformInfo ctor used.  "
-                     "Tool did not specify a TargetTransformInfo to use?");
-}
-
-INITIALIZE_PASS(TargetTransformInfo, "targettransforminfo",
-                "Target Transform Info", false, true)
-char TargetTransformInfo::ID = 0;
-
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
deleted file mode 100644
index 1656ab2cab3a..000000000000
--- a/lib/VMCore/Type.cpp
+++ /dev/null
@@ -1,762 +0,0 @@
-//===-- Type.cpp - Implement the Type class -------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Type class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMContextImpl.h"
-#include "llvm/Module.h"
-#include <algorithm>
-#include <cstdarg>
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//                         Type Class Implementation
-//===----------------------------------------------------------------------===//
-
-Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
-  switch (IDNumber) {
-  case VoidTyID      : return getVoidTy(C);
-  case HalfTyID      : return getHalfTy(C);
-  case FloatTyID     : return getFloatTy(C);
-  case DoubleTyID    : return getDoubleTy(C);
-  case X86_FP80TyID  : return getX86_FP80Ty(C);
-  case FP128TyID     : return getFP128Ty(C);
-  case PPC_FP128TyID : return getPPC_FP128Ty(C);
-  case LabelTyID     : return getLabelTy(C);
-  case MetadataTyID  : return getMetadataTy(C);
-  case X86_MMXTyID   : return getX86_MMXTy(C);
-  default:
-    return 0;
-  }
-}
-
-/// getScalarType - If this is a vector type, return the element type,
-/// otherwise return this.
-Type *Type::getScalarType() {
-  if (VectorType *VTy = dyn_cast<VectorType>(this))
-    return VTy->getElementType();
-  return this;
-}
-
-const Type *Type::getScalarType() const {
-  if (const VectorType *VTy = dyn_cast<VectorType>(this))
-    return VTy->getElementType();
-  return this;
-}
-
-/// isIntegerTy - Return true if this is an IntegerType of the specified width.
-bool Type::isIntegerTy(unsigned Bitwidth) const {
-  return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
-}
-
-// canLosslesslyBitCastTo - Return true if this type can be converted to
-// 'Ty' without any reinterpretation of bits.  For example, i8* to i32*.
-//
-bool Type::canLosslesslyBitCastTo(Type *Ty) const {
-  // Identity cast means no change so return true
-  if (this == Ty) 
-    return true;
-  
-  // They are not convertible unless they are at least first class types
-  if (!this->isFirstClassType() || !Ty->isFirstClassType())
-    return false;
-
-  // Vector -> Vector conversions are always lossless if the two vector types
-  // have the same size, otherwise not.  Also, 64-bit vector types can be
-  // converted to x86mmx.
-  if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
-    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
-      return thisPTy->getBitWidth() == thatPTy->getBitWidth();
-    if (Ty->getTypeID() == Type::X86_MMXTyID &&
-        thisPTy->getBitWidth() == 64)
-      return true;
-  }
-
-  if (this->getTypeID() == Type::X86_MMXTyID)
-    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
-      if (thatPTy->getBitWidth() == 64)
-        return true;
-
-  // At this point we have only various mismatches of the first class types
-  // remaining and ptr->ptr. Just select the lossless conversions. Everything
-  // else is not lossless.
-  if (this->isPointerTy())
-    return Ty->isPointerTy();
-  return false;  // Other types have no identity values
-}
-
-bool Type::isEmptyTy() const {
-  const ArrayType *ATy = dyn_cast<ArrayType>(this);
-  if (ATy) {
-    unsigned NumElements = ATy->getNumElements();
-    return NumElements == 0 || ATy->getElementType()->isEmptyTy();
-  }
-
-  const StructType *STy = dyn_cast<StructType>(this);
-  if (STy) {
-    unsigned NumElements = STy->getNumElements();
-    for (unsigned i = 0; i < NumElements; ++i)
-      if (!STy->getElementType(i)->isEmptyTy())
-        return false;
-    return true;
-  }
-
-  return false;
-}
-
-unsigned Type::getPrimitiveSizeInBits() const {
-  switch (getTypeID()) {
-  case Type::HalfTyID: return 16;
-  case Type::FloatTyID: return 32;
-  case Type::DoubleTyID: return 64;
-  case Type::X86_FP80TyID: return 80;
-  case Type::FP128TyID: return 128;
-  case Type::PPC_FP128TyID: return 128;
-  case Type::X86_MMXTyID: return 64;
-  case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
-  case Type::VectorTyID:  return cast<VectorType>(this)->getBitWidth();
-  default: return 0;
-  }
-}
-
-/// getScalarSizeInBits - If this is a vector type, return the
-/// getPrimitiveSizeInBits value for the element type. Otherwise return the
-/// getPrimitiveSizeInBits value for this type.
-unsigned Type::getScalarSizeInBits() {
-  return getScalarType()->getPrimitiveSizeInBits();
-}
-
-/// getFPMantissaWidth - Return the width of the mantissa of this type.  This
-/// is only valid on floating point types.  If the FP type does not
-/// have a stable mantissa (e.g. ppc long double), this method returns -1.
-int Type::getFPMantissaWidth() const {
-  if (const VectorType *VTy = dyn_cast<VectorType>(this))
-    return VTy->getElementType()->getFPMantissaWidth();
-  assert(isFloatingPointTy() && "Not a floating point type!");
-  if (getTypeID() == HalfTyID) return 11;
-  if (getTypeID() == FloatTyID) return 24;
-  if (getTypeID() == DoubleTyID) return 53;
-  if (getTypeID() == X86_FP80TyID) return 64;
-  if (getTypeID() == FP128TyID) return 113;
-  assert(getTypeID() == PPC_FP128TyID && "unknown fp type");
-  return -1;
-}
-
-/// isSizedDerivedType - Derived types like structures and arrays are sized
-/// iff all of the members of the type are sized as well.  Since asking for
-/// their size is relatively uncommon, move this operation out of line.
-bool Type::isSizedDerivedType() const {
-  if (this->isIntegerTy())
-    return true;
-
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
-    return ATy->getElementType()->isSized();
-
-  if (const VectorType *VTy = dyn_cast<VectorType>(this))
-    return VTy->getElementType()->isSized();
-
-  if (!this->isStructTy()) 
-    return false;
-
-  return cast<StructType>(this)->isSized();
-}
-
-//===----------------------------------------------------------------------===//
-//                         Subclass Helper Methods
-//===----------------------------------------------------------------------===//
-
-unsigned Type::getIntegerBitWidth() const {
-  return cast<IntegerType>(this)->getBitWidth();
-}
-
-bool Type::isFunctionVarArg() const {
-  return cast<FunctionType>(this)->isVarArg();
-}
-
-Type *Type::getFunctionParamType(unsigned i) const {
-  return cast<FunctionType>(this)->getParamType(i);
-}
-
-unsigned Type::getFunctionNumParams() const {
-  return cast<FunctionType>(this)->getNumParams();
-}
-
-StringRef Type::getStructName() const {
-  return cast<StructType>(this)->getName();
-}
-
-unsigned Type::getStructNumElements() const {
-  return cast<StructType>(this)->getNumElements();
-}
-
-Type *Type::getStructElementType(unsigned N) const {
-  return cast<StructType>(this)->getElementType(N);
-}
-
-Type *Type::getSequentialElementType() const {
-  return cast<SequentialType>(this)->getElementType();
-}
-
-uint64_t Type::getArrayNumElements() const {
-  return cast<ArrayType>(this)->getNumElements();
-}
-
-unsigned Type::getVectorNumElements() const {
-  return cast<VectorType>(this)->getNumElements();
-}
-
-unsigned Type::getPointerAddressSpace() const {
-  return cast<PointerType>(getScalarType())->getAddressSpace();
-}
-
-
-//===----------------------------------------------------------------------===//
-//                          Primitive 'Type' data
-//===----------------------------------------------------------------------===//
-
-Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
-Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
-Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
-Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
-Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
-Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
-Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
-Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
-Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
-Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
-
-IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
-IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
-IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
-IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
-IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
-
-IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
-  return IntegerType::get(C, N);
-}
-
-PointerType *Type::getHalfPtrTy(LLVMContext &C, unsigned AS) {
-  return getHalfTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
-  return getFloatTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
-  return getDoubleTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
-  return getX86_FP80Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
-  return getFP128Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
-  return getPPC_FP128Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
-  return getX86_MMXTy(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
-  return getIntNTy(C, N)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
-  return getInt1Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
-  return getInt8Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
-  return getInt16Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
-  return getInt32Ty(C)->getPointerTo(AS);
-}
-
-PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
-  return getInt64Ty(C)->getPointerTo(AS);
-}
-
-
-//===----------------------------------------------------------------------===//
-//                       IntegerType Implementation
-//===----------------------------------------------------------------------===//
-
-IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
-  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
-  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
-  
-  // Check for the built-in integer types
-  switch (NumBits) {
-  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
-  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
-  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
-  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
-  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
-  default: 
-    break;
-  }
-  
-  IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
-  
-  if (Entry == 0)
-    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
-  
-  return Entry;
-}
-
-bool IntegerType::isPowerOf2ByteWidth() const {
-  unsigned BitWidth = getBitWidth();
-  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
-}
-
-APInt IntegerType::getMask() const {
-  return APInt::getAllOnesValue(getBitWidth());
-}
-
-//===----------------------------------------------------------------------===//
-//                       FunctionType Implementation
-//===----------------------------------------------------------------------===//
-
-FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
-                           bool IsVarArgs)
-  : Type(Result->getContext(), FunctionTyID) {
-  Type **SubTys = reinterpret_cast<Type**>(this+1);
-  assert(isValidReturnType(Result) && "invalid return type for function");
-  setSubclassData(IsVarArgs);
-
-  SubTys[0] = const_cast<Type*>(Result);
-
-  for (unsigned i = 0, e = Params.size(); i != e; ++i) {
-    assert(isValidArgumentType(Params[i]) &&
-           "Not a valid type for function argument!");
-    SubTys[i+1] = Params[i];
-  }
-
-  ContainedTys = SubTys;
-  NumContainedTys = Params.size() + 1; // + 1 for result type
-}
-
-// FunctionType::get - The factory function for the FunctionType class.
-FunctionType *FunctionType::get(Type *ReturnType,
-                                ArrayRef<Type*> Params, bool isVarArg) {
-  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
-  FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
-  LLVMContextImpl::FunctionTypeMap::iterator I =
-    pImpl->FunctionTypes.find_as(Key);
-  FunctionType *FT;
-
-  if (I == pImpl->FunctionTypes.end()) {
-    FT = (FunctionType*) pImpl->TypeAllocator.
-      Allocate(sizeof(FunctionType) + sizeof(Type*) * (Params.size() + 1),
-               AlignOf<FunctionType>::Alignment);
-    new (FT) FunctionType(ReturnType, Params, isVarArg);
-    pImpl->FunctionTypes[FT] = true;
-  } else {
-    FT = I->first;
-  }
-
-  return FT;
-}
-
-FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
-  return get(Result, ArrayRef<Type *>(), isVarArg);
-}
-
-/// isValidReturnType - Return true if the specified type is valid as a return
-/// type.
-bool FunctionType::isValidReturnType(Type *RetTy) {
-  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
-  !RetTy->isMetadataTy();
-}
-
-/// isValidArgumentType - Return true if the specified type is valid as an
-/// argument type.
-bool FunctionType::isValidArgumentType(Type *ArgTy) {
-  return ArgTy->isFirstClassType();
-}
-
-//===----------------------------------------------------------------------===//
-//                       StructType Implementation
-//===----------------------------------------------------------------------===//
-
-// Primitive Constructors.
-
-StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, 
-                            bool isPacked) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-  AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
-  LLVMContextImpl::StructTypeMap::iterator I =
-    pImpl->AnonStructTypes.find_as(Key);
-  StructType *ST;
-
-  if (I == pImpl->AnonStructTypes.end()) {
-    // Value not found.  Create a new type!
-    ST = new (Context.pImpl->TypeAllocator) StructType(Context);
-    ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
-    ST->setBody(ETypes, isPacked);
-    Context.pImpl->AnonStructTypes[ST] = true;
-  } else {
-    ST = I->first;
-  }
-
-  return ST;
-}
-
-void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
-  assert(isOpaque() && "Struct body already set!");
-  
-  setSubclassData(getSubclassData() | SCDB_HasBody);
-  if (isPacked)
-    setSubclassData(getSubclassData() | SCDB_Packed);
-
-  unsigned NumElements = Elements.size();
-  Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
-  memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
-  
-  ContainedTys = Elts;
-  NumContainedTys = NumElements;
-}
-
-void StructType::setName(StringRef Name) {
-  if (Name == getName()) return;
-
-  StringMap<StructType *> &SymbolTable = getContext().pImpl->NamedStructTypes;
-  typedef StringMap<StructType *>::MapEntryTy EntryTy;
-
-  // If this struct already had a name, remove its symbol table entry. Don't
-  // delete the data yet because it may be part of the new name.
-  if (SymbolTableEntry)
-    SymbolTable.remove((EntryTy *)SymbolTableEntry);
-
-  // If this is just removing the name, we're done.
-  if (Name.empty()) {
-    if (SymbolTableEntry) {
-      // Delete the old string data.
-      ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
-      SymbolTableEntry = 0;
-    }
-    return;
-  }
-  
-  // Look up the entry for the name.
-  EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
-  
-  // While we have a name collision, try a random rename.
-  if (Entry->getValue()) {
-    SmallString<64> TempStr(Name);
-    TempStr.push_back('.');
-    raw_svector_ostream TmpStream(TempStr);
-    unsigned NameSize = Name.size();
-   
-    do {
-      TempStr.resize(NameSize + 1);
-      TmpStream.resync();
-      TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
-      
-      Entry = &getContext().pImpl->
-                 NamedStructTypes.GetOrCreateValue(TmpStream.str());
-    } while (Entry->getValue());
-  }
-
-  // Okay, we found an entry that isn't used.  It's us!
-  Entry->setValue(this);
-
-  // Delete the old string data.
-  if (SymbolTableEntry)
-    ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
-  SymbolTableEntry = Entry;
-}
-
-//===----------------------------------------------------------------------===//
-// StructType Helper functions.
-
-StructType *StructType::create(LLVMContext &Context, StringRef Name) {
-  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
-  if (!Name.empty())
-    ST->setName(Name);
-  return ST;
-}
-
-StructType *StructType::get(LLVMContext &Context, bool isPacked) {
-  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
-}
-
-StructType *StructType::get(Type *type, ...) {
-  assert(type != 0 && "Cannot create a struct type with no elements with this");
-  LLVMContext &Ctx = type->getContext();
-  va_list ap;
-  SmallVector<llvm::Type*, 8> StructFields;
-  va_start(ap, type);
-  while (type) {
-    StructFields.push_back(type);
-    type = va_arg(ap, llvm::Type*);
-  }
-  return llvm::StructType::get(Ctx, StructFields);
-}
-
-StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements,
-                               StringRef Name, bool isPacked) {
-  StructType *ST = create(Context, Name);
-  ST->setBody(Elements, isPacked);
-  return ST;
-}
-
-StructType *StructType::create(LLVMContext &Context, ArrayRef<Type*> Elements) {
-  return create(Context, Elements, StringRef());
-}
-
-StructType *StructType::create(LLVMContext &Context) {
-  return create(Context, StringRef());
-}
-
-StructType *StructType::create(ArrayRef<Type*> Elements, StringRef Name,
-                               bool isPacked) {
-  assert(!Elements.empty() &&
-         "This method may not be invoked with an empty list");
-  return create(Elements[0]->getContext(), Elements, Name, isPacked);
-}
-
-StructType *StructType::create(ArrayRef<Type*> Elements) {
-  assert(!Elements.empty() &&
-         "This method may not be invoked with an empty list");
-  return create(Elements[0]->getContext(), Elements, StringRef());
-}
-
-StructType *StructType::create(StringRef Name, Type *type, ...) {
-  assert(type != 0 && "Cannot create a struct type with no elements with this");
-  LLVMContext &Ctx = type->getContext();
-  va_list ap;
-  SmallVector<llvm::Type*, 8> StructFields;
-  va_start(ap, type);
-  while (type) {
-    StructFields.push_back(type);
-    type = va_arg(ap, llvm::Type*);
-  }
-  return llvm::StructType::create(Ctx, StructFields, Name);
-}
-
-bool StructType::isSized() const {
-  if ((getSubclassData() & SCDB_IsSized) != 0)
-    return true;
-  if (isOpaque())
-    return false;
-
-  // Okay, our struct is sized if all of the elements are, but if one of the
-  // elements is opaque, the struct isn't sized *yet*, but may become sized in
-  // the future, so just bail out without caching.
-  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
-    if (!(*I)->isSized())
-      return false;
-
-  // Here we cheat a bit and cast away const-ness. The goal is to memoize when
-  // we find a sized type, as types can only move from opaque to sized, not the
-  // other way.
-  const_cast<StructType*>(this)->setSubclassData(
-    getSubclassData() | SCDB_IsSized);
-  return true;
-}
-
-StringRef StructType::getName() const {
-  assert(!isLiteral() && "Literal structs never have names");
-  if (SymbolTableEntry == 0) return StringRef();
-  
-  return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
-}
-
-void StructType::setBody(Type *type, ...) {
-  assert(type != 0 && "Cannot create a struct type with no elements with this");
-  va_list ap;
-  SmallVector<llvm::Type*, 8> StructFields;
-  va_start(ap, type);
-  while (type) {
-    StructFields.push_back(type);
-    type = va_arg(ap, llvm::Type*);
-  }
-  setBody(StructFields);
-}
-
-bool StructType::isValidElementType(Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
-}
-
-/// isLayoutIdentical - Return true if this is layout identical to the
-/// specified struct.
-bool StructType::isLayoutIdentical(StructType *Other) const {
-  if (this == Other) return true;
-  
-  if (isPacked() != Other->isPacked() ||
-      getNumElements() != Other->getNumElements())
-    return false;
-  
-  return std::equal(element_begin(), element_end(), Other->element_begin());
-}
-
-/// getTypeByName - Return the type with the specified name, or null if there
-/// is none by that name.
-StructType *Module::getTypeByName(StringRef Name) const {
-  StringMap<StructType*>::iterator I =
-    getContext().pImpl->NamedStructTypes.find(Name);
-  if (I != getContext().pImpl->NamedStructTypes.end())
-    return I->second;
-  return 0;
-}
-
-
-//===----------------------------------------------------------------------===//
-//                       CompositeType Implementation
-//===----------------------------------------------------------------------===//
-
-Type *CompositeType::getTypeAtIndex(const Value *V) {
-  if (StructType *STy = dyn_cast<StructType>(this)) {
-    unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
-    assert(indexValid(Idx) && "Invalid structure index!");
-    return STy->getElementType(Idx);
-  }
-  
-  return cast<SequentialType>(this)->getElementType();
-}
-Type *CompositeType::getTypeAtIndex(unsigned Idx) {
-  if (StructType *STy = dyn_cast<StructType>(this)) {
-    assert(indexValid(Idx) && "Invalid structure index!");
-    return STy->getElementType(Idx);
-  }
-  
-  return cast<SequentialType>(this)->getElementType();
-}
-bool CompositeType::indexValid(const Value *V) const {
-  if (const StructType *STy = dyn_cast<StructType>(this)) {
-    // Structure indexes require 32-bit integer constants.
-    if (V->getType()->isIntegerTy(32))
-      if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
-        return CU->getZExtValue() < STy->getNumElements();
-    return false;
-  }
-  
-  // Sequential types can be indexed by any integer.
-  return V->getType()->isIntegerTy();
-}
-
-bool CompositeType::indexValid(unsigned Idx) const {
-  if (const StructType *STy = dyn_cast<StructType>(this))
-    return Idx < STy->getNumElements();
-  // Sequential types can be indexed by any integer.
-  return true;
-}
-
-
-//===----------------------------------------------------------------------===//
-//                           ArrayType Implementation
-//===----------------------------------------------------------------------===//
-
-ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
-  : SequentialType(ArrayTyID, ElType) {
-  NumElements = NumEl;
-}
-
-ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
-  Type *ElementType = const_cast<Type*>(elementType);
-  assert(isValidElementType(ElementType) && "Invalid type for array element!");
-    
-  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
-  ArrayType *&Entry = 
-    pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
-  
-  if (Entry == 0)
-    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
-  return Entry;
-}
-
-bool ArrayType::isValidElementType(Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
-}
-
-//===----------------------------------------------------------------------===//
-//                          VectorType Implementation
-//===----------------------------------------------------------------------===//
-
-VectorType::VectorType(Type *ElType, unsigned NumEl)
-  : SequentialType(VectorTyID, ElType) {
-  NumElements = NumEl;
-}
-
-VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
-  Type *ElementType = const_cast<Type*>(elementType);
-  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
-  assert(isValidElementType(ElementType) &&
-         "Elements of a VectorType must be a primitive type");
-  
-  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
-  VectorType *&Entry = ElementType->getContext().pImpl
-    ->VectorTypes[std::make_pair(ElementType, NumElements)];
-  
-  if (Entry == 0)
-    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
-  return Entry;
-}
-
-bool VectorType::isValidElementType(Type *ElemTy) {
-  if (PointerType *PTy = dyn_cast<PointerType>(ElemTy))
-    ElemTy = PTy->getElementType();
-  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
-}
-
-//===----------------------------------------------------------------------===//
-//                         PointerType Implementation
-//===----------------------------------------------------------------------===//
-
-PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
-  assert(EltTy && "Can't get a pointer to <null> type!");
-  assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
-  
-  LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
-  
-  // Since AddressSpace #0 is the common case, we special case it.
-  PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
-     : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
-
-  if (Entry == 0)
-    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
-  return Entry;
-}
-
-
-PointerType::PointerType(Type *E, unsigned AddrSpace)
-  : SequentialType(PointerTyID, E) {
-#ifndef NDEBUG
-  const unsigned oldNCT = NumContainedTys;
-#endif
-  setSubclassData(AddrSpace);
-  // Check for miscompile. PR11652.
-  assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
-}
-
-PointerType *Type::getPointerTo(unsigned addrs) {
-  return PointerType::get(this, addrs);
-}
-
-bool PointerType::isValidElementType(Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy();
-}
diff --git a/lib/VMCore/TypeFinder.cpp b/lib/VMCore/TypeFinder.cpp
deleted file mode 100644
index 4de649fb3f4c..000000000000
--- a/lib/VMCore/TypeFinder.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TypeFinder class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TypeFinder.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-void TypeFinder::run(const Module &M, bool onlyNamed) {
-  OnlyNamed = onlyNamed;
-
-  // Get types from global variables.
-  for (Module::const_global_iterator I = M.global_begin(),
-         E = M.global_end(); I != E; ++I) {
-    incorporateType(I->getType());
-    if (I->hasInitializer())
-      incorporateValue(I->getInitializer());
-  }
-
-  // Get types from aliases.
-  for (Module::const_alias_iterator I = M.alias_begin(),
-         E = M.alias_end(); I != E; ++I) {
-    incorporateType(I->getType());
-    if (const Value *Aliasee = I->getAliasee())
-      incorporateValue(Aliasee);
-  }
-
-  // Get types from functions.
-  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
-  for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
-    incorporateType(FI->getType());
-
-    // First incorporate the arguments.
-    for (Function::const_arg_iterator AI = FI->arg_begin(),
-           AE = FI->arg_end(); AI != AE; ++AI)
-      incorporateValue(AI);
-
-    for (Function::const_iterator BB = FI->begin(), E = FI->end();
-         BB != E;++BB)
-      for (BasicBlock::const_iterator II = BB->begin(),
-             E = BB->end(); II != E; ++II) {
-        const Instruction &I = *II;
-
-        // Incorporate the type of the instruction.
-        incorporateType(I.getType());
-
-        // Incorporate non-instruction operand types. (We are incorporating all
-        // instructions with this loop.)
-        for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
-             OI != OE; ++OI)
-          if (!isa<Instruction>(OI))
-            incorporateValue(*OI);
-
-        // Incorporate types hiding in metadata.
-        I.getAllMetadataOtherThanDebugLoc(MDForInst);
-        for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
-          incorporateMDNode(MDForInst[i].second);
-
-        MDForInst.clear();
-      }
-  }
-
-  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
-         E = M.named_metadata_end(); I != E; ++I) {
-    const NamedMDNode *NMD = I;
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      incorporateMDNode(NMD->getOperand(i));
-  }
-}
-
-void TypeFinder::clear() {
-  VisitedConstants.clear();
-  VisitedTypes.clear();
-  StructTypes.clear();
-}
-
-/// incorporateType - This method adds the type to the list of used structures
-/// if it's not in there already.
-void TypeFinder::incorporateType(Type *Ty) {
-  // Check to see if we're already visited this type.
-  if (!VisitedTypes.insert(Ty).second)
-    return;
-
-  // If this is a structure or opaque type, add a name for the type.
-  if (StructType *STy = dyn_cast<StructType>(Ty))
-    if (!OnlyNamed || STy->hasName())
-      StructTypes.push_back(STy);
-
-  // Recursively walk all contained types.
-  for (Type::subtype_iterator I = Ty->subtype_begin(),
-         E = Ty->subtype_end(); I != E; ++I)
-    incorporateType(*I);
-}
-
-/// incorporateValue - This method is used to walk operand lists finding types
-/// hiding in constant expressions and other operands that won't be walked in
-/// other ways.  GlobalValues, basic blocks, instructions, and inst operands are
-/// all explicitly enumerated.
-void TypeFinder::incorporateValue(const Value *V) {
-  if (const MDNode *M = dyn_cast<MDNode>(V))
-    return incorporateMDNode(M);
-
-  if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
-
-  // Already visited?
-  if (!VisitedConstants.insert(V).second)
-    return;
-
-  // Check this type.
-  incorporateType(V->getType());
-
-  // If this is an instruction, we incorporate it separately.
-  if (isa<Instruction>(V))
-    return;
-
-  // Look in operands for types.
-  const User *U = cast<User>(V);
-  for (Constant::const_op_iterator I = U->op_begin(),
-         E = U->op_end(); I != E;++I)
-    incorporateValue(*I);
-}
-
-/// incorporateMDNode - This method is used to walk the operands of an MDNode to
-/// find types hiding within.
-void TypeFinder::incorporateMDNode(const MDNode *V) {
-  // Already visited?
-  if (!VisitedConstants.insert(V).second)
-    return;
-
-  // Look in operands for types.
-  for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
-    if (Value *Op = V->getOperand(i))
-      incorporateValue(Op);
-}
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
deleted file mode 100644
index 0128adc3f776..000000000000
--- a/lib/VMCore/Use.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-//===-- Use.cpp - Implement the Use class ---------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the algorithm for finding the User of a Use.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Value.h"
-#include <new>
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-//                         Use swap Implementation
-//===----------------------------------------------------------------------===//
-
-void Use::swap(Use &RHS) {
-  Value *V1(Val);
-  Value *V2(RHS.Val);
-  if (V1 != V2) {
-    if (V1) {
-      removeFromList();
-    }
-
-    if (V2) {
-      RHS.removeFromList();
-      Val = V2;
-      V2->addUse(*this);
-    } else {
-      Val = 0;
-    }
-
-    if (V1) {
-      RHS.Val = V1;
-      V1->addUse(RHS);
-    } else {
-      RHS.Val = 0;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//                         Use getImpliedUser Implementation
-//===----------------------------------------------------------------------===//
-
-const Use *Use::getImpliedUser() const {
-  const Use *Current = this;
-
-  while (true) {
-    unsigned Tag = (Current++)->Prev.getInt();
-    switch (Tag) {
-      case zeroDigitTag:
-      case oneDigitTag:
-        continue;
-
-      case stopTag: {
-        ++Current;
-        ptrdiff_t Offset = 1;
-        while (true) {
-          unsigned Tag = Current->Prev.getInt();
-          switch (Tag) {
-            case zeroDigitTag:
-            case oneDigitTag:
-              ++Current;
-              Offset = (Offset << 1) + Tag;
-              continue;
-            default:
-              return Current + Offset;
-          }
-        }
-      }
-
-      case fullStopTag:
-        return Current;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//                         Use initTags Implementation
-//===----------------------------------------------------------------------===//
-
-Use *Use::initTags(Use * const Start, Use *Stop) {
-  ptrdiff_t Done = 0;
-  while (Done < 20) {
-    if (Start == Stop--)
-      return Start;
-    static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
-                                         oneDigitTag, oneDigitTag, stopTag,
-                                         zeroDigitTag, oneDigitTag, oneDigitTag,
-                                         stopTag, zeroDigitTag, oneDigitTag,
-                                         zeroDigitTag, oneDigitTag, stopTag,
-                                         oneDigitTag, oneDigitTag, oneDigitTag,
-                                         oneDigitTag, stopTag
-                                       };
-    new(Stop) Use(tags[Done++]);
-  }
-
-  ptrdiff_t Count = Done;
-  while (Start != Stop) {
-    --Stop;
-    if (!Count) {
-      new(Stop) Use(stopTag);
-      ++Done;
-      Count = Done;
-    } else {
-      new(Stop) Use(PrevPtrTag(Count & 1));
-      Count >>= 1;
-      ++Done;
-    }
-  }
-
-  return Start;
-}
-
-//===----------------------------------------------------------------------===//
-//                         Use zap Implementation
-//===----------------------------------------------------------------------===//
-
-void Use::zap(Use *Start, const Use *Stop, bool del) {
-  while (Start != Stop)
-    (--Stop)->~Use();
-  if (del)
-    ::operator delete(Start);
-}
-
-//===----------------------------------------------------------------------===//
-//                         Use getUser Implementation
-//===----------------------------------------------------------------------===//
-
-User *Use::getUser() const {
-  const Use *End = getImpliedUser();
-  const UserRef *ref = reinterpret_cast<const UserRef*>(End);
-  return ref->getInt()
-    ? ref->getPointer()
-    : (User*)End;
-}
-
-} // End llvm namespace
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
deleted file mode 100644
index e847ce6ee5cd..000000000000
--- a/lib/VMCore/User.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- User.cpp - Implement the User class -------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Constant.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/User.h"
-#include "llvm/Operator.h"
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-//                                 User Class
-//===----------------------------------------------------------------------===//
-
-void User::anchor() {}
-
-// replaceUsesOfWith - Replaces all references to the "From" definition with
-// references to the "To" definition.
-//
-void User::replaceUsesOfWith(Value *From, Value *To) {
-  if (From == To) return;   // Duh what?
-
-  assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
-         "Cannot call User::replaceUsesOfWith on a constant!");
-
-  for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
-    if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
-      // The side effects of this setOperand call include linking to
-      // "To", adding "this" to the uses list of To, and
-      // most importantly, removing "this" from the use list of "From".
-      setOperand(i, To); // Fix it now...
-    }
-}
-
-//===----------------------------------------------------------------------===//
-//                         User allocHungoffUses Implementation
-//===----------------------------------------------------------------------===//
-
-Use *User::allocHungoffUses(unsigned N) const {
-  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
-  // the User.
-  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
-  Use *Begin = static_cast<Use*>(::operator new(size));
-  Use *End = Begin + N;
-  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
-  return Use::initTags(Begin, End);
-}
-
-//===----------------------------------------------------------------------===//
-//                         User operator new Implementations
-//===----------------------------------------------------------------------===//
-
-void *User::operator new(size_t s, unsigned Us) {
-  void *Storage = ::operator new(s + sizeof(Use) * Us);
-  Use *Start = static_cast<Use*>(Storage);
-  Use *End = Start + Us;
-  User *Obj = reinterpret_cast<User*>(End);
-  Obj->OperandList = Start;
-  Obj->NumOperands = Us;
-  Use::initTags(Start, End);
-  return Obj;
-}
-
-//===----------------------------------------------------------------------===//
-//                         User operator delete Implementation
-//===----------------------------------------------------------------------===//
-
-void User::operator delete(void *Usr) {
-  User *Start = static_cast<User*>(Usr);
-  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
-  // If there were hung-off uses, they will have been freed already and
-  // NumOperands reset to 0, so here we just free the User itself.
-  ::operator delete(Storage);
-}
-
-//===----------------------------------------------------------------------===//
-//                             Operator Class
-//===----------------------------------------------------------------------===//
-
-Operator::~Operator() {
-  llvm_unreachable("should never destroy an Operator");
-}
-
-} // End llvm namespace
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
deleted file mode 100644
index 8d0720dc1223..000000000000
--- a/lib/VMCore/Value.cpp
+++ /dev/null
@@ -1,694 +0,0 @@
-//===-- Value.cpp - Implement the Value class -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Value, ValueHandle, and User classes.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMContextImpl.h"
-#include "llvm/Constant.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Operator.h"
-#include "llvm/Module.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LeakDetector.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseMap.h"
-#include <algorithm>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//                                Value Class
-//===----------------------------------------------------------------------===//
-
-static inline Type *checkType(Type *Ty) {
-  assert(Ty && "Value defined with a null type: Error!");
-  return const_cast<Type*>(Ty);
-}
-
-Value::Value(Type *ty, unsigned scid)
-  : SubclassID(scid), HasValueHandle(0),
-    SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
-    UseList(0), Name(0) {
-  // FIXME: Why isn't this in the subclass gunk??
-  if (isa<CallInst>(this) || isa<InvokeInst>(this))
-    assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
-           "invalid CallInst type!");
-  else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
-    assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
-           "Cannot create non-first-class values except for constants!");
-}
-
-Value::~Value() {
-  // Notify all ValueHandles (if present) that this value is going away.
-  if (HasValueHandle)
-    ValueHandleBase::ValueIsDeleted(this);
-
-#ifndef NDEBUG      // Only in -g mode...
-  // Check to make sure that there are no uses of this value that are still
-  // around when the value is destroyed.  If there are, then we have a dangling
-  // reference and something is wrong.  This code is here to print out what is
-  // still being referenced.  The value in question should be printed as
-  // a <badref>
-  //
-  if (!use_empty()) {
-    dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
-    for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
-      dbgs() << "Use still stuck around after Def is destroyed:"
-           << **I << "\n";
-  }
-#endif
-  assert(use_empty() && "Uses remain when a value is destroyed!");
-
-  // If this value is named, destroy the name.  This should not be in a symtab
-  // at this point.
-  if (Name && SubclassID != MDStringVal)
-    Name->Destroy();
-
-  // There should be no uses of this object anymore, remove it.
-  LeakDetector::removeGarbageObject(this);
-}
-
-/// hasNUses - Return true if this Value has exactly N users.
-///
-bool Value::hasNUses(unsigned N) const {
-  const_use_iterator UI = use_begin(), E = use_end();
-
-  for (; N; --N, ++UI)
-    if (UI == E) return false;  // Too few.
-  return UI == E;
-}
-
-/// hasNUsesOrMore - Return true if this value has N users or more.  This is
-/// logically equivalent to getNumUses() >= N.
-///
-bool Value::hasNUsesOrMore(unsigned N) const {
-  const_use_iterator UI = use_begin(), E = use_end();
-
-  for (; N; --N, ++UI)
-    if (UI == E) return false;  // Too few.
-
-  return true;
-}
-
-/// isUsedInBasicBlock - Return true if this value is used in the specified
-/// basic block.
-bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
-  // Start by scanning over the instructions looking for a use before we start
-  // the expensive use iteration.
-  unsigned MaxBlockSize = 3;
-  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
-      return true;
-    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
-      break;
-  }
-
-  if (MaxBlockSize != 0) // We scanned the entire block and found no use.
-    return false;
-
-  for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
-    const Instruction *User = dyn_cast<Instruction>(*I);
-    if (User && User->getParent() == BB)
-      return true;
-  }
-  return false;
-}
-
-
-/// getNumUses - This method computes the number of uses of this Value.  This
-/// is a linear time operation.  Use hasOneUse or hasNUses to check for specific
-/// values.
-unsigned Value::getNumUses() const {
-  return (unsigned)std::distance(use_begin(), use_end());
-}
-
-static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
-  ST = 0;
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
-    if (BasicBlock *P = I->getParent())
-      if (Function *PP = P->getParent())
-        ST = &PP->getValueSymbolTable();
-  } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
-    if (Function *P = BB->getParent())
-      ST = &P->getValueSymbolTable();
-  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-    if (Module *P = GV->getParent())
-      ST = &P->getValueSymbolTable();
-  } else if (Argument *A = dyn_cast<Argument>(V)) {
-    if (Function *P = A->getParent())
-      ST = &P->getValueSymbolTable();
-  } else if (isa<MDString>(V))
-    return true;
-  else {
-    assert(isa<Constant>(V) && "Unknown value type!");
-    return true;  // no name is setable for this.
-  }
-  return false;
-}
-
-StringRef Value::getName() const {
-  // Make sure the empty string is still a C string. For historical reasons,
-  // some clients want to call .data() on the result and expect it to be null
-  // terminated.
-  if (!Name) return StringRef("", 0);
-  return Name->getKey();
-}
-
-void Value::setName(const Twine &NewName) {
-  assert(SubclassID != MDStringVal &&
-         "Cannot set the name of MDString with this method!");
-
-  // Fast path for common IRBuilder case of setName("") when there is no name.
-  if (NewName.isTriviallyEmpty() && !hasName())
-    return;
-
-  SmallString<256> NameData;
-  StringRef NameRef = NewName.toStringRef(NameData);
-
-  // Name isn't changing?
-  if (getName() == NameRef)
-    return;
-
-  assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
-
-  // Get the symbol table to update for this object.
-  ValueSymbolTable *ST;
-  if (getSymTab(this, ST))
-    return;  // Cannot set a name on this value (e.g. constant).
-
-  if (!ST) { // No symbol table to update?  Just do the change.
-    if (NameRef.empty()) {
-      // Free the name for this value.
-      Name->Destroy();
-      Name = 0;
-      return;
-    }
-
-    if (Name)
-      Name->Destroy();
-
-    // NOTE: Could optimize for the case the name is shrinking to not deallocate
-    // then reallocated.
-
-    // Create the new name.
-    Name = ValueName::Create(NameRef.begin(), NameRef.end());
-    Name->setValue(this);
-    return;
-  }
-
-  // NOTE: Could optimize for the case the name is shrinking to not deallocate
-  // then reallocated.
-  if (hasName()) {
-    // Remove old name.
-    ST->removeValueName(Name);
-    Name->Destroy();
-    Name = 0;
-
-    if (NameRef.empty())
-      return;
-  }
-
-  // Name is changing to something new.
-  Name = ST->createValueName(NameRef, this);
-}
-
-
-/// takeName - transfer the name from V to this value, setting V's name to
-/// empty.  It is an error to call V->takeName(V).
-void Value::takeName(Value *V) {
-  assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
-
-  ValueSymbolTable *ST = 0;
-  // If this value has a name, drop it.
-  if (hasName()) {
-    // Get the symtab this is in.
-    if (getSymTab(this, ST)) {
-      // We can't set a name on this value, but we need to clear V's name if
-      // it has one.
-      if (V->hasName()) V->setName("");
-      return;  // Cannot set a name on this value (e.g. constant).
-    }
-
-    // Remove old name.
-    if (ST)
-      ST->removeValueName(Name);
-    Name->Destroy();
-    Name = 0;
-  }
-
-  // Now we know that this has no name.
-
-  // If V has no name either, we're done.
-  if (!V->hasName()) return;
-
-  // Get this's symtab if we didn't before.
-  if (!ST) {
-    if (getSymTab(this, ST)) {
-      // Clear V's name.
-      V->setName("");
-      return;  // Cannot set a name on this value (e.g. constant).
-    }
-  }
-
-  // Get V's ST, this should always succed, because V has a name.
-  ValueSymbolTable *VST;
-  bool Failure = getSymTab(V, VST);
-  assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
-
-  // If these values are both in the same symtab, we can do this very fast.
-  // This works even if both values have no symtab yet.
-  if (ST == VST) {
-    // Take the name!
-    Name = V->Name;
-    V->Name = 0;
-    Name->setValue(this);
-    return;
-  }
-
-  // Otherwise, things are slightly more complex.  Remove V's name from VST and
-  // then reinsert it into ST.
-
-  if (VST)
-    VST->removeValueName(V->Name);
-  Name = V->Name;
-  V->Name = 0;
-  Name->setValue(this);
-
-  if (ST)
-    ST->reinsertValue(this);
-}
-
-
-void Value::replaceAllUsesWith(Value *New) {
-  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
-  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
-  assert(New->getType() == getType() &&
-         "replaceAllUses of value with new value of different type!");
-
-  // Notify all ValueHandles (if present) that this value is going away.
-  if (HasValueHandle)
-    ValueHandleBase::ValueIsRAUWd(this, New);
-  
-  while (!use_empty()) {
-    Use &U = *UseList;
-    // Must handle Constants specially, we cannot call replaceUsesOfWith on a
-    // constant because they are uniqued.
-    if (Constant *C = dyn_cast<Constant>(U.getUser())) {
-      if (!isa<GlobalValue>(C)) {
-        C->replaceUsesOfWithOnConstant(this, New, &U);
-        continue;
-      }
-    }
-    
-    U.set(New);
-  }
-  
-  if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
-    BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
-}
-
-namespace {
-// Various metrics for how much to strip off of pointers.
-enum PointerStripKind {
-  PSK_ZeroIndices,
-  PSK_InBoundsConstantIndices,
-  PSK_InBounds
-};
-
-template <PointerStripKind StripKind>
-static Value *stripPointerCastsAndOffsets(Value *V) {
-  if (!V->getType()->isPointerTy())
-    return V;
-
-  // Even though we don't look through PHI nodes, we could be called on an
-  // instruction in an unreachable block, which may be on a cycle.
-  SmallPtrSet<Value *, 4> Visited;
-
-  Visited.insert(V);
-  do {
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      switch (StripKind) {
-      case PSK_ZeroIndices:
-        if (!GEP->hasAllZeroIndices())
-          return V;
-        break;
-      case PSK_InBoundsConstantIndices:
-        if (!GEP->hasAllConstantIndices())
-          return V;
-        // fallthrough
-      case PSK_InBounds:
-        if (!GEP->isInBounds())
-          return V;
-        break;
-      }
-      V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
-      V = cast<Operator>(V)->getOperand(0);
-    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->mayBeOverridden())
-        return V;
-      V = GA->getAliasee();
-    } else {
-      return V;
-    }
-    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
-  } while (Visited.insert(V));
-
-  return V;
-}
-} // namespace
-
-Value *Value::stripPointerCasts() {
-  return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
-}
-
-Value *Value::stripInBoundsConstantOffsets() {
-  return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
-}
-
-Value *Value::stripInBoundsOffsets() {
-  return stripPointerCastsAndOffsets<PSK_InBounds>(this);
-}
-
-/// isDereferenceablePointer - Test if this value is always a pointer to
-/// allocated and suitably aligned memory for a simple load or store.
-static bool isDereferenceablePointer(const Value *V,
-                                     SmallPtrSet<const Value *, 32> &Visited) {
-  // Note that it is not safe to speculate into a malloc'd region because
-  // malloc may return null.
-  // It's also not always safe to follow a bitcast, for example:
-  //   bitcast i8* (alloca i8) to i32*
-  // would result in a 4-byte load from a 1-byte alloca. Some cases could
-  // be handled using DataLayout to check sizes and alignments though.
-
-  // These are obviously ok.
-  if (isa<AllocaInst>(V)) return true;
-
-  // Global variables which can't collapse to null are ok.
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return !GV->hasExternalWeakLinkage();
-
-  // byval arguments are ok.
-  if (const Argument *A = dyn_cast<Argument>(V))
-    return A->hasByValAttr();
-
-  // For GEPs, determine if the indexing lands within the allocated object.
-  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-    // Conservatively require that the base pointer be fully dereferenceable.
-    if (!Visited.insert(GEP->getOperand(0)))
-      return false;
-    if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
-      return false;
-    // Check the indices.
-    gep_type_iterator GTI = gep_type_begin(GEP);
-    for (User::const_op_iterator I = GEP->op_begin()+1,
-         E = GEP->op_end(); I != E; ++I) {
-      Value *Index = *I;
-      Type *Ty = *GTI++;
-      // Struct indices can't be out of bounds.
-      if (isa<StructType>(Ty))
-        continue;
-      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
-      if (!CI)
-        return false;
-      // Zero is always ok.
-      if (CI->isZero())
-        continue;
-      // Check to see that it's within the bounds of an array.
-      ArrayType *ATy = dyn_cast<ArrayType>(Ty);
-      if (!ATy)
-        return false;
-      if (CI->getValue().getActiveBits() > 64)
-        return false;
-      if (CI->getZExtValue() >= ATy->getNumElements())
-        return false;
-    }
-    // Indices check out; this is dereferenceable.
-    return true;
-  }
-
-  // If we don't know, assume the worst.
-  return false;
-}
-
-/// isDereferenceablePointer - Test if this value is always a pointer to
-/// allocated and suitably aligned memory for a simple load or store.
-bool Value::isDereferenceablePointer() const {
-  SmallPtrSet<const Value *, 32> Visited;
-  return ::isDereferenceablePointer(this, Visited);
-}
-
-/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
-/// return the value in the PHI node corresponding to PredBB.  If not, return
-/// ourself.  This is useful if you want to know the value something has in a
-/// predecessor block.
-Value *Value::DoPHITranslation(const BasicBlock *CurBB,
-                               const BasicBlock *PredBB) {
-  PHINode *PN = dyn_cast<PHINode>(this);
-  if (PN && PN->getParent() == CurBB)
-    return PN->getIncomingValueForBlock(PredBB);
-  return this;
-}
-
-LLVMContext &Value::getContext() const { return VTy->getContext(); }
-
-//===----------------------------------------------------------------------===//
-//                             ValueHandleBase Class
-//===----------------------------------------------------------------------===//
-
-/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
-/// List is known to point into the existing use list.
-void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
-  assert(List && "Handle list is null?");
-
-  // Splice ourselves into the list.
-  Next = *List;
-  *List = this;
-  setPrevPtr(List);
-  if (Next) {
-    Next->setPrevPtr(&Next);
-    assert(VP.getPointer() == Next->VP.getPointer() && "Added to wrong list?");
-  }
-}
-
-void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
-  assert(List && "Must insert after existing node");
-
-  Next = List->Next;
-  setPrevPtr(&List->Next);
-  List->Next = this;
-  if (Next)
-    Next->setPrevPtr(&Next);
-}
-
-/// AddToUseList - Add this ValueHandle to the use list for VP.
-void ValueHandleBase::AddToUseList() {
-  assert(VP.getPointer() && "Null pointer doesn't have a use list!");
-
-  LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
-
-  if (VP.getPointer()->HasValueHandle) {
-    // If this value already has a ValueHandle, then it must be in the
-    // ValueHandles map already.
-    ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()];
-    assert(Entry != 0 && "Value doesn't have any handles?");
-    AddToExistingUseList(&Entry);
-    return;
-  }
-
-  // Ok, it doesn't have any handles yet, so we must insert it into the
-  // DenseMap.  However, doing this insertion could cause the DenseMap to
-  // reallocate itself, which would invalidate all of the PrevP pointers that
-  // point into the old table.  Handle this by checking for reallocation and
-  // updating the stale pointers only if needed.
-  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
-  const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
-
-  ValueHandleBase *&Entry = Handles[VP.getPointer()];
-  assert(Entry == 0 && "Value really did already have handles?");
-  AddToExistingUseList(&Entry);
-  VP.getPointer()->HasValueHandle = true;
-
-  // If reallocation didn't happen or if this was the first insertion, don't
-  // walk the table.
-  if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
-      Handles.size() == 1) {
-    return;
-  }
-
-  // Okay, reallocation did happen.  Fix the Prev Pointers.
-  for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
-       E = Handles.end(); I != E; ++I) {
-    assert(I->second && I->first == I->second->VP.getPointer() &&
-           "List invariant broken!");
-    I->second->setPrevPtr(&I->second);
-  }
-}
-
-/// RemoveFromUseList - Remove this ValueHandle from its current use list.
-void ValueHandleBase::RemoveFromUseList() {
-  assert(VP.getPointer() && VP.getPointer()->HasValueHandle &&
-         "Pointer doesn't have a use list!");
-
-  // Unlink this from its use list.
-  ValueHandleBase **PrevPtr = getPrevPtr();
-  assert(*PrevPtr == this && "List invariant broken");
-
-  *PrevPtr = Next;
-  if (Next) {
-    assert(Next->getPrevPtr() == &Next && "List invariant broken");
-    Next->setPrevPtr(PrevPtr);
-    return;
-  }
-
-  // If the Next pointer was null, then it is possible that this was the last
-  // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
-  // map.
-  LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
-  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
-  if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
-    Handles.erase(VP.getPointer());
-    VP.getPointer()->HasValueHandle = false;
-  }
-}
-
-
-void ValueHandleBase::ValueIsDeleted(Value *V) {
-  assert(V->HasValueHandle && "Should only be called if ValueHandles present");
-
-  // Get the linked list base, which is guaranteed to exist since the
-  // HasValueHandle flag is set.
-  LLVMContextImpl *pImpl = V->getContext().pImpl;
-  ValueHandleBase *Entry = pImpl->ValueHandles[V];
-  assert(Entry && "Value bit set but no entries exist");
-
-  // We use a local ValueHandleBase as an iterator so that ValueHandles can add
-  // and remove themselves from the list without breaking our iteration.  This
-  // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
-  // Note that we deliberately do not the support the case when dropping a value
-  // handle results in a new value handle being permanently added to the list
-  // (as might occur in theory for CallbackVH's): the new value handle will not
-  // be processed and the checking code will mete out righteous punishment if
-  // the handle is still present once we have finished processing all the other
-  // value handles (it is fine to momentarily add then remove a value handle).
-  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
-    Iterator.RemoveFromUseList();
-    Iterator.AddToExistingUseListAfter(Entry);
-    assert(Entry->Next == &Iterator && "Loop invariant broken.");
-
-    switch (Entry->getKind()) {
-    case Assert:
-      break;
-    case Tracking:
-      // Mark that this value has been deleted by setting it to an invalid Value
-      // pointer.
-      Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
-      break;
-    case Weak:
-      // Weak just goes to null, which will unlink it from the list.
-      Entry->operator=(0);
-      break;
-    case Callback:
-      // Forward to the subclass's implementation.
-      static_cast<CallbackVH*>(Entry)->deleted();
-      break;
-    }
-  }
-
-  // All callbacks, weak references, and assertingVHs should be dropped by now.
-  if (V->HasValueHandle) {
-#ifndef NDEBUG      // Only in +Asserts mode...
-    dbgs() << "While deleting: " << *V->getType() << " %" << V->getName()
-           << "\n";
-    if (pImpl->ValueHandles[V]->getKind() == Assert)
-      llvm_unreachable("An asserting value handle still pointed to this"
-                       " value!");
-
-#endif
-    llvm_unreachable("All references to V were not removed?");
-  }
-}
-
-
-void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
-  assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
-  assert(Old != New && "Changing value into itself!");
-
-  // Get the linked list base, which is guaranteed to exist since the
-  // HasValueHandle flag is set.
-  LLVMContextImpl *pImpl = Old->getContext().pImpl;
-  ValueHandleBase *Entry = pImpl->ValueHandles[Old];
-
-  assert(Entry && "Value bit set but no entries exist");
-
-  // We use a local ValueHandleBase as an iterator so that
-  // ValueHandles can add and remove themselves from the list without
-  // breaking our iteration.  This is not really an AssertingVH; we
-  // just have to give ValueHandleBase some kind.
-  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
-    Iterator.RemoveFromUseList();
-    Iterator.AddToExistingUseListAfter(Entry);
-    assert(Entry->Next == &Iterator && "Loop invariant broken.");
-
-    switch (Entry->getKind()) {
-    case Assert:
-      // Asserting handle does not follow RAUW implicitly.
-      break;
-    case Tracking:
-      // Tracking goes to new value like a WeakVH. Note that this may make it
-      // something incompatible with its templated type. We don't want to have a
-      // virtual (or inline) interface to handle this though, so instead we make
-      // the TrackingVH accessors guarantee that a client never sees this value.
-
-      // FALLTHROUGH
-    case Weak:
-      // Weak goes to the new value, which will unlink it from Old's list.
-      Entry->operator=(New);
-      break;
-    case Callback:
-      // Forward to the subclass's implementation.
-      static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
-      break;
-    }
-  }
-
-#ifndef NDEBUG
-  // If any new tracking or weak value handles were added while processing the
-  // list, then complain about it now.
-  if (Old->HasValueHandle)
-    for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
-      switch (Entry->getKind()) {
-      case Tracking:
-      case Weak:
-        dbgs() << "After RAUW from " << *Old->getType() << " %"
-               << Old->getName() << " to " << *New->getType() << " %"
-               << New->getName() << "\n";
-        llvm_unreachable("A tracking or weak value handle still pointed to the"
-                         " old value!\n");
-      default:
-        break;
-      }
-#endif
-}
-
-// Default implementation for CallbackVH.
-void CallbackVH::allUsesReplacedWith(Value *) {}
-
-void CallbackVH::deleted() {
-  setValPtr(NULL);
-}
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
deleted file mode 100644
index f1c970361a50..000000000000
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ValueSymbolTable class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "valuesymtab"
-#include "llvm/GlobalValue.h"
-#include "llvm/Type.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-// Class destructor
-ValueSymbolTable::~ValueSymbolTable() {
-#ifndef NDEBUG   // Only do this in -g mode...
-  for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
-    dbgs() << "Value still in symbol table! Type = '"
-           << *VI->getValue()->getType() << "' Name = '"
-           << VI->getKeyData() << "'\n";
-  assert(vmap.empty() && "Values remain in symbol table!");
-#endif
-}
-
-// Insert a value into the symbol table with the specified name...
-//
-void ValueSymbolTable::reinsertValue(Value* V) {
-  assert(V->hasName() && "Can't insert nameless Value into symbol table");
-
-  // Try inserting the name, assuming it won't conflict.
-  if (vmap.insert(V->Name)) {
-    //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
-    return;
-  }
-  
-  // Otherwise, there is a naming conflict.  Rename this value.
-  SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
-
-  // The name is too already used, just free it so we can allocate a new name.
-  V->Name->Destroy();
-  
-  unsigned BaseSize = UniqueName.size();
-  while (1) {
-    // Trim any suffix off and append the next number.
-    UniqueName.resize(BaseSize);
-    raw_svector_ostream(UniqueName) << ++LastUnique;
-
-    // Try insert the vmap entry with this suffix.
-    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
-    if (NewName.getValue() == 0) {
-      // Newly inserted name.  Success!
-      NewName.setValue(V);
-      V->Name = &NewName;
-     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
-      return;
-    }
-  }
-}
-
-void ValueSymbolTable::removeValueName(ValueName *V) {
-  //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
-  // Remove the value from the symbol table.
-  vmap.remove(V);
-}
-
-/// createValueName - This method attempts to create a value name and insert
-/// it into the symbol table with the specified name.  If it conflicts, it
-/// auto-renames the name and returns that instead.
-ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
-  // In the common case, the name is not already in the symbol table.
-  ValueName &Entry = vmap.GetOrCreateValue(Name);
-  if (Entry.getValue() == 0) {
-    Entry.setValue(V);
-    //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
-    //           << *V << "\n");
-    return &Entry;
-  }
-  
-  // Otherwise, there is a naming conflict.  Rename this value.
-  SmallString<256> UniqueName(Name.begin(), Name.end());
-  
-  while (1) {
-    // Trim any suffix off and append the next number.
-    UniqueName.resize(Name.size());
-    raw_svector_ostream(UniqueName) << ++LastUnique;
-    
-    // Try insert the vmap entry with this suffix.
-    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
-    if (NewName.getValue() == 0) {
-      // Newly inserted name.  Success!
-      NewName.setValue(V);
-     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
-      return &NewName;
-    }
-  }
-}
-
-
-// dump - print out the symbol table
-//
-void ValueSymbolTable::dump() const {
-  //DEBUG(dbgs() << "ValueSymbolTable:\n");
-  for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    //DEBUG(dbgs() << "  '" << I->getKeyData() << "' = ");
-    I->getValue()->dump();
-    //DEBUG(dbgs() << "\n");
-  }
-}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
deleted file mode 100644
index 2ee9f0f4c99f..000000000000
--- a/lib/VMCore/ValueTypes.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements methods in the CodeGen/ValueTypes.h header.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Support/ErrorHandling.h"
-using namespace llvm;
-
-EVT EVT::changeExtendedVectorElementTypeToInteger() const {
-  LLVMContext &Context = LLVMTy->getContext();
-  EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
-  return getVectorVT(Context, IntTy, getVectorNumElements());
-}
-
-EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
-  EVT VT;
-  VT.LLVMTy = IntegerType::get(Context, BitWidth);
-  assert(VT.isExtended() && "Type is not extended!");
-  return VT;
-}
-
-EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
-                             unsigned NumElements) {
-  EVT ResultVT;
-  ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
-  assert(ResultVT.isExtended() && "Type is not extended!");
-  return ResultVT;
-}
-
-bool EVT::isExtendedFloatingPoint() const {
-  assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isFPOrFPVectorTy();
-}
-
-bool EVT::isExtendedInteger() const {
-  assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isIntOrIntVectorTy();
-}
-
-bool EVT::isExtendedVector() const {
-  assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isVectorTy();
-}
-
-bool EVT::isExtended16BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 16;
-}
-
-bool EVT::isExtended32BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 32;
-}
-
-bool EVT::isExtended64BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 64;
-}
-
-bool EVT::isExtended128BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 128;
-}
-
-bool EVT::isExtended256BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 256;
-}
-
-bool EVT::isExtended512BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 512;
-}
-
-bool EVT::isExtended1024BitVector() const {
-  return isExtendedVector() && getExtendedSizeInBits() == 1024;
-}
-
-EVT EVT::getExtendedVectorElementType() const {
-  assert(isExtended() && "Type is not extended!");
-  return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
-}
-
-unsigned EVT::getExtendedVectorNumElements() const {
-  assert(isExtended() && "Type is not extended!");
-  return cast<VectorType>(LLVMTy)->getNumElements();
-}
-
-unsigned EVT::getExtendedSizeInBits() const {
-  assert(isExtended() && "Type is not extended!");
-  if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
-    return ITy->getBitWidth();
-  if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
-    return VTy->getBitWidth();
-  llvm_unreachable("Unrecognized extended type!");
-}
-
-/// getEVTString - This function returns value type as a string, e.g. "i32".
-std::string EVT::getEVTString() const {
-  switch (V.SimpleTy) {
-  default:
-    if (isVector())
-      return "v" + utostr(getVectorNumElements()) +
-             getVectorElementType().getEVTString();
-    if (isInteger())
-      return "i" + utostr(getSizeInBits());
-    llvm_unreachable("Invalid EVT!");
-  case MVT::i1:      return "i1";
-  case MVT::i8:      return "i8";
-  case MVT::i16:     return "i16";
-  case MVT::i32:     return "i32";
-  case MVT::i64:     return "i64";
-  case MVT::i128:    return "i128";
-  case MVT::f16:     return "f16";
-  case MVT::f32:     return "f32";
-  case MVT::f64:     return "f64";
-  case MVT::f80:     return "f80";
-  case MVT::f128:    return "f128";
-  case MVT::ppcf128: return "ppcf128";
-  case MVT::isVoid:  return "isVoid";
-  case MVT::Other:   return "ch";
-  case MVT::Glue:    return "glue";
-  case MVT::x86mmx:  return "x86mmx";
-  case MVT::v2i1:    return "v2i1";
-  case MVT::v4i1:    return "v4i1";
-  case MVT::v8i1:    return "v8i1";
-  case MVT::v16i1:   return "v16i1";
-  case MVT::v2i8:    return "v2i8";
-  case MVT::v4i8:    return "v4i8";
-  case MVT::v8i8:    return "v8i8";
-  case MVT::v16i8:   return "v16i8";
-  case MVT::v32i8:   return "v32i8";
-  case MVT::v1i16:   return "v1i16";
-  case MVT::v2i16:   return "v2i16";
-  case MVT::v4i16:   return "v4i16";
-  case MVT::v8i16:   return "v8i16";
-  case MVT::v16i16:  return "v16i16";
-  case MVT::v1i32:   return "v1i32";
-  case MVT::v2i32:   return "v2i32";
-  case MVT::v4i32:   return "v4i32";
-  case MVT::v8i32:   return "v8i32";
-  case MVT::v16i32:  return "v16i32";
-  case MVT::v1i64:   return "v1i64";
-  case MVT::v2i64:   return "v2i64";
-  case MVT::v4i64:   return "v4i64";
-  case MVT::v8i64:   return "v8i64";
-  case MVT::v16i64:  return "v16i64";
-  case MVT::v2f32:   return "v2f32";
-  case MVT::v2f16:   return "v2f16";
-  case MVT::v4f32:   return "v4f32";
-  case MVT::v8f32:   return "v8f32";
-  case MVT::v2f64:   return "v2f64";
-  case MVT::v4f64:   return "v4f64";
-  case MVT::Metadata:return "Metadata";
-  case MVT::Untyped: return "Untyped";
-  }
-}
-
-/// getTypeForEVT - This method returns an LLVM type corresponding to the
-/// specified EVT.  For integer types, this returns an unsigned type.  Note
-/// that this will abort for types that cannot be represented.
-Type *EVT::getTypeForEVT(LLVMContext &Context) const {
-  switch (V.SimpleTy) {
-  default:
-    assert(isExtended() && "Type is not extended!");
-    return LLVMTy;
-  case MVT::isVoid:  return Type::getVoidTy(Context);
-  case MVT::i1:      return Type::getInt1Ty(Context);
-  case MVT::i8:      return Type::getInt8Ty(Context);
-  case MVT::i16:     return Type::getInt16Ty(Context);
-  case MVT::i32:     return Type::getInt32Ty(Context);
-  case MVT::i64:     return Type::getInt64Ty(Context);
-  case MVT::i128:    return IntegerType::get(Context, 128);
-  case MVT::f16:     return Type::getHalfTy(Context);
-  case MVT::f32:     return Type::getFloatTy(Context);
-  case MVT::f64:     return Type::getDoubleTy(Context);
-  case MVT::f80:     return Type::getX86_FP80Ty(Context);
-  case MVT::f128:    return Type::getFP128Ty(Context);
-  case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
-  case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
-  case MVT::v2i1:    return VectorType::get(Type::getInt1Ty(Context), 2);
-  case MVT::v4i1:    return VectorType::get(Type::getInt1Ty(Context), 4);
-  case MVT::v8i1:    return VectorType::get(Type::getInt1Ty(Context), 8);
-  case MVT::v16i1:   return VectorType::get(Type::getInt1Ty(Context), 16);
-  case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
-  case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
-  case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
-  case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
-  case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
-  case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1);
-  case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
-  case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
-  case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
-  case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
-  case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
-  case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
-  case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
-  case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
-  case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16);
-  case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
-  case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
-  case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
-  case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
-  case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16);
-  case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2);
-  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
-  case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
-  case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
-  case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
-  case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4); 
-  case MVT::Metadata: return Type::getMetadataTy(Context);
- }
-}
-
-/// getEVT - Return the value type corresponding to the specified type.  This
-/// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types
-/// are returned as Other, otherwise they are invalid.
-EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
-  switch (Ty->getTypeID()) {
-  default:
-    if (HandleUnknown) return MVT(MVT::Other);
-    llvm_unreachable("Unknown type!");
-  case Type::VoidTyID:
-    return MVT::isVoid;
-  case Type::IntegerTyID:
-    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
-  case Type::HalfTyID:      return MVT(MVT::f16);
-  case Type::FloatTyID:     return MVT(MVT::f32);
-  case Type::DoubleTyID:    return MVT(MVT::f64);
-  case Type::X86_FP80TyID:  return MVT(MVT::f80);
-  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
-  case Type::FP128TyID:     return MVT(MVT::f128);
-  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
-  case Type::PointerTyID:   return MVT(MVT::iPTR);
-  case Type::VectorTyID: {
-    VectorType *VTy = cast<VectorType>(Ty);
-    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
-                       VTy->getNumElements());
-  }
-  }
-}
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
deleted file mode 100644
index eb40b09d29f7..000000000000
--- a/lib/VMCore/Verifier.cpp
+++ /dev/null
@@ -1,1997 +0,0 @@
-//===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the function verifier interface, that can be used for some
-// sanity checking of input to the system.
-//
-// Note that this does not provide full `Java style' security and verifications,
-// instead it just tries to ensure that code is well-formed.
-//
-//  * Both of a binary operator's parameters are of the same type
-//  * Verify that the indices of mem access instructions match other operands
-//  * Verify that arithmetic and other things are only performed on first-class
-//    types.  Verify that shifts & logicals only happen on integrals f.e.
-//  * All of the constants in a switch statement are of the correct type
-//  * The code is in valid SSA form
-//  * It should be illegal to put a label into any other type (like a structure)
-//    or to return one. [except constant arrays!]
-//  * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
-//  * PHI nodes must have an entry for each predecessor, with no extras.
-//  * PHI nodes must be the first thing in a basic block, all grouped together
-//  * PHI nodes must have at least one entry
-//  * All basic blocks should only end with terminator insts, not contain them
-//  * The entry node to a function must not have predecessors
-//  * All Instructions must be embedded into a basic block
-//  * Functions cannot take a void-typed parameter
-//  * Verify that a function's argument list agrees with it's declared type.
-//  * It is illegal to specify a name for a void value.
-//  * It is illegal to have a internal global value with no initializer
-//  * It is illegal to have a ret instruction that returns a value that does not
-//    agree with the function return value type.
-//  * Function call argument types match the function prototype
-//  * A landing pad is defined by a landingpad instruction, and can be jumped to
-//    only by the unwind edge of an invoke instruction.
-//  * A landingpad instruction must be the first non-PHI instruction in the
-//    block.
-//  * All landingpad instructions must use the same personality function with
-//    the same function.
-//  * All other things that are tested by asserts spread about the code...
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cstdarg>
-using namespace llvm;
-
-namespace {  // Anonymous namespace for class
-  struct PreVerifier : public FunctionPass {
-    static char ID; // Pass ID, replacement for typeid
-
-    PreVerifier() : FunctionPass(ID) {
-      initializePreVerifierPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-
-    // Check that the prerequisites for successful DominatorTree construction
-    // are satisfied.
-    bool runOnFunction(Function &F) {
-      bool Broken = false;
-
-      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-        if (I->empty() || !I->back().isTerminator()) {
-          dbgs() << "Basic Block in function '" << F.getName() 
-                 << "' does not have terminator!\n";
-          WriteAsOperand(dbgs(), I, true);
-          dbgs() << "\n";
-          Broken = true;
-        }
-      }
-
-      if (Broken)
-        report_fatal_error("Broken module, no Basic Block terminator!");
-
-      return false;
-    }
-  };
-}
-
-char PreVerifier::ID = 0;
-INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", 
-                false, false)
-static char &PreVerifyID = PreVerifier::ID;
-
-namespace {
-  struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
-    static char ID; // Pass ID, replacement for typeid
-    bool Broken;          // Is this module found to be broken?
-    VerifierFailureAction action;
-                          // What to do if verification fails.
-    Module *Mod;          // Module we are verifying right now
-    LLVMContext *Context; // Context within which we are verifying
-    DominatorTree *DT;    // Dominator Tree, caution can be null!
-
-    std::string Messages;
-    raw_string_ostream MessagesStr;
-
-    /// InstInThisBlock - when verifying a basic block, keep track of all of the
-    /// instructions we have seen so far.  This allows us to do efficient
-    /// dominance checks for the case when an instruction has an operand that is
-    /// an instruction in the same block.
-    SmallPtrSet<Instruction*, 16> InstsInThisBlock;
-
-    /// MDNodes - keep track of the metadata nodes that have been checked
-    /// already.
-    SmallPtrSet<MDNode *, 32> MDNodes;
-
-    /// PersonalityFn - The personality function referenced by the
-    /// LandingPadInsts. All LandingPadInsts within the same function must use
-    /// the same personality function.
-    const Value *PersonalityFn;
-
-    Verifier()
-      : FunctionPass(ID), Broken(false),
-        action(AbortProcessAction), Mod(0), Context(0), DT(0),
-        MessagesStr(Messages), PersonalityFn(0) {
-      initializeVerifierPass(*PassRegistry::getPassRegistry());
-    }
-    explicit Verifier(VerifierFailureAction ctn)
-      : FunctionPass(ID), Broken(false), action(ctn), Mod(0),
-        Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) {
-      initializeVerifierPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool doInitialization(Module &M) {
-      Mod = &M;
-      Context = &M.getContext();
-
-      // We must abort before returning back to the pass manager, or else the
-      // pass manager may try to run other passes on the broken module.
-      return abortIfBroken();
-    }
-
-    bool runOnFunction(Function &F) {
-      // Get dominator information if we are being run by PassManager
-      DT = &getAnalysis<DominatorTree>();
-
-      Mod = F.getParent();
-      if (!Context) Context = &F.getContext();
-
-      visit(F);
-      InstsInThisBlock.clear();
-      PersonalityFn = 0;
-
-      // We must abort before returning back to the pass manager, or else the
-      // pass manager may try to run other passes on the broken module.
-      return abortIfBroken();
-    }
-
-    bool doFinalization(Module &M) {
-      // Scan through, checking all of the external function's linkage now...
-      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-        visitGlobalValue(*I);
-
-        // Check to make sure function prototypes are okay.
-        if (I->isDeclaration()) visitFunction(*I);
-      }
-
-      for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
-           I != E; ++I)
-        visitGlobalVariable(*I);
-
-      for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 
-           I != E; ++I)
-        visitGlobalAlias(*I);
-
-      for (Module::named_metadata_iterator I = M.named_metadata_begin(),
-           E = M.named_metadata_end(); I != E; ++I)
-        visitNamedMDNode(*I);
-
-      // If the module is broken, abort at this time.
-      return abortIfBroken();
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      AU.addRequiredID(PreVerifyID);
-      AU.addRequired<DominatorTree>();
-    }
-
-    /// abortIfBroken - If the module is broken and we are supposed to abort on
-    /// this condition, do so.
-    ///
-    bool abortIfBroken() {
-      if (!Broken) return false;
-      MessagesStr << "Broken module found, ";
-      switch (action) {
-      case AbortProcessAction:
-        MessagesStr << "compilation aborted!\n";
-        dbgs() << MessagesStr.str();
-        // Client should choose different reaction if abort is not desired
-        abort();
-      case PrintMessageAction:
-        MessagesStr << "verification continues.\n";
-        dbgs() << MessagesStr.str();
-        return false;
-      case ReturnStatusAction:
-        MessagesStr << "compilation terminated.\n";
-        return true;
-      }
-      llvm_unreachable("Invalid action");
-    }
-
-
-    // Verification methods...
-    void visitGlobalValue(GlobalValue &GV);
-    void visitGlobalVariable(GlobalVariable &GV);
-    void visitGlobalAlias(GlobalAlias &GA);
-    void visitNamedMDNode(NamedMDNode &NMD);
-    void visitMDNode(MDNode &MD, Function *F);
-    void visitFunction(Function &F);
-    void visitBasicBlock(BasicBlock &BB);
-    using InstVisitor<Verifier>::visit;
-
-    void visit(Instruction &I);
-
-    void visitTruncInst(TruncInst &I);
-    void visitZExtInst(ZExtInst &I);
-    void visitSExtInst(SExtInst &I);
-    void visitFPTruncInst(FPTruncInst &I);
-    void visitFPExtInst(FPExtInst &I);
-    void visitFPToUIInst(FPToUIInst &I);
-    void visitFPToSIInst(FPToSIInst &I);
-    void visitUIToFPInst(UIToFPInst &I);
-    void visitSIToFPInst(SIToFPInst &I);
-    void visitIntToPtrInst(IntToPtrInst &I);
-    void visitPtrToIntInst(PtrToIntInst &I);
-    void visitBitCastInst(BitCastInst &I);
-    void visitPHINode(PHINode &PN);
-    void visitBinaryOperator(BinaryOperator &B);
-    void visitICmpInst(ICmpInst &IC);
-    void visitFCmpInst(FCmpInst &FC);
-    void visitExtractElementInst(ExtractElementInst &EI);
-    void visitInsertElementInst(InsertElementInst &EI);
-    void visitShuffleVectorInst(ShuffleVectorInst &EI);
-    void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
-    void visitCallInst(CallInst &CI);
-    void visitInvokeInst(InvokeInst &II);
-    void visitGetElementPtrInst(GetElementPtrInst &GEP);
-    void visitLoadInst(LoadInst &LI);
-    void visitStoreInst(StoreInst &SI);
-    void verifyDominatesUse(Instruction &I, unsigned i);
-    void visitInstruction(Instruction &I);
-    void visitTerminatorInst(TerminatorInst &I);
-    void visitBranchInst(BranchInst &BI);
-    void visitReturnInst(ReturnInst &RI);
-    void visitSwitchInst(SwitchInst &SI);
-    void visitIndirectBrInst(IndirectBrInst &BI);
-    void visitSelectInst(SelectInst &SI);
-    void visitUserOp1(Instruction &I);
-    void visitUserOp2(Instruction &I) { visitUserOp1(I); }
-    void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
-    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
-    void visitAtomicRMWInst(AtomicRMWInst &RMWI);
-    void visitFenceInst(FenceInst &FI);
-    void visitAllocaInst(AllocaInst &AI);
-    void visitExtractValueInst(ExtractValueInst &EVI);
-    void visitInsertValueInst(InsertValueInst &IVI);
-    void visitLandingPadInst(LandingPadInst &LPI);
-
-    void VerifyCallSite(CallSite CS);
-    bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty,
-                          int VT, unsigned ArgNo, std::string &Suffix);
-    bool VerifyIntrinsicType(Type *Ty,
-                             ArrayRef<Intrinsic::IITDescriptor> &Infos,
-                             SmallVectorImpl<Type*> &ArgTys);
-    void VerifyParameterAttrs(Attributes Attrs, Type *Ty,
-                              bool isReturnValue, const Value *V);
-    void VerifyFunctionAttrs(FunctionType *FT, const AttrListPtr &Attrs,
-                             const Value *V);
-
-    void WriteValue(const Value *V) {
-      if (!V) return;
-      if (isa<Instruction>(V)) {
-        MessagesStr << *V << '\n';
-      } else {
-        WriteAsOperand(MessagesStr, V, true, Mod);
-        MessagesStr << '\n';
-      }
-    }
-
-    void WriteType(Type *T) {
-      if (!T) return;
-      MessagesStr << ' ' << *T;
-    }
-
-
-    // CheckFailed - A check failed, so print out the condition and the message
-    // that failed.  This provides a nice place to put a breakpoint if you want
-    // to see why something is not correct.
-    void CheckFailed(const Twine &Message,
-                     const Value *V1 = 0, const Value *V2 = 0,
-                     const Value *V3 = 0, const Value *V4 = 0) {
-      MessagesStr << Message.str() << "\n";
-      WriteValue(V1);
-      WriteValue(V2);
-      WriteValue(V3);
-      WriteValue(V4);
-      Broken = true;
-    }
-
-    void CheckFailed(const Twine &Message, const Value *V1,
-                     Type *T2, const Value *V3 = 0) {
-      MessagesStr << Message.str() << "\n";
-      WriteValue(V1);
-      WriteType(T2);
-      WriteValue(V3);
-      Broken = true;
-    }
-
-    void CheckFailed(const Twine &Message, Type *T1,
-                     Type *T2 = 0, Type *T3 = 0) {
-      MessagesStr << Message.str() << "\n";
-      WriteType(T1);
-      WriteType(T2);
-      WriteType(T3);
-      Broken = true;
-    }
-  };
-} // End anonymous namespace
-
-char Verifier::ID = 0;
-INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
-INITIALIZE_PASS_DEPENDENCY(PreVerifier)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
-
-// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, M) \
-  do { if (!(C)) { CheckFailed(M); return; } } while (0)
-#define Assert1(C, M, V1) \
-  do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
-#define Assert2(C, M, V1, V2) \
-  do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
-#define Assert3(C, M, V1, V2, V3) \
-  do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
-#define Assert4(C, M, V1, V2, V3, V4) \
-  do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
-
-void Verifier::visit(Instruction &I) {
-  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-    Assert1(I.getOperand(i) != 0, "Operand is null", &I);
-  InstVisitor<Verifier>::visit(I);
-}
-
-
-void Verifier::visitGlobalValue(GlobalValue &GV) {
-  Assert1(!GV.isDeclaration() ||
-          GV.isMaterializable() ||
-          GV.hasExternalLinkage() ||
-          GV.hasDLLImportLinkage() ||
-          GV.hasExternalWeakLinkage() ||
-          (isa<GlobalAlias>(GV) &&
-           (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
-  "Global is external, but doesn't have external or dllimport or weak linkage!",
-          &GV);
-
-  Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
-          "Global is marked as dllimport, but not external", &GV);
-
-  Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
-          "Only global variables can have appending linkage!", &GV);
-
-  if (GV.hasAppendingLinkage()) {
-    GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
-    Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
-            "Only global arrays can have appending linkage!", GVar);
-  }
-
-  Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(),
-          "linkonce_odr_auto_hide can only have default visibility!",
-          &GV);
-}
-
-void Verifier::visitGlobalVariable(GlobalVariable &GV) {
-  if (GV.hasInitializer()) {
-    Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
-            "Global variable initializer type does not match global "
-            "variable type!", &GV);
-
-    // If the global has common linkage, it must have a zero initializer and
-    // cannot be constant.
-    if (GV.hasCommonLinkage()) {
-      Assert1(GV.getInitializer()->isNullValue(),
-              "'common' global must have a zero initializer!", &GV);
-      Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
-              &GV);
-    }
-  } else {
-    Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
-            GV.hasExternalWeakLinkage(),
-            "invalid linkage type for global declaration", &GV);
-  }
-
-  if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
-                       GV.getName() == "llvm.global_dtors")) {
-    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
-            "invalid linkage for intrinsic global variable", &GV);
-    // Don't worry about emitting an error for it not being an array,
-    // visitGlobalValue will complain on appending non-array.
-    if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
-      StructType *STy = dyn_cast<StructType>(ATy->getElementType());
-      PointerType *FuncPtrTy =
-          FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
-      Assert1(STy && STy->getNumElements() == 2 &&
-              STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
-              STy->getTypeAtIndex(1) == FuncPtrTy,
-              "wrong type for intrinsic global variable", &GV);
-    }
-  }
-
-  visitGlobalValue(GV);
-}
-
-void Verifier::visitGlobalAlias(GlobalAlias &GA) {
-  Assert1(!GA.getName().empty(),
-          "Alias name cannot be empty!", &GA);
-  Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
-          GA.hasWeakLinkage(),
-          "Alias should have external or external weak linkage!", &GA);
-  Assert1(GA.getAliasee(),
-          "Aliasee cannot be NULL!", &GA);
-  Assert1(GA.getType() == GA.getAliasee()->getType(),
-          "Alias and aliasee types should match!", &GA);
-  Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
-
-  if (!isa<GlobalValue>(GA.getAliasee())) {
-    const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
-    Assert1(CE && 
-            (CE->getOpcode() == Instruction::BitCast ||
-             CE->getOpcode() == Instruction::GetElementPtr) &&
-            isa<GlobalValue>(CE->getOperand(0)),
-            "Aliasee should be either GlobalValue or bitcast of GlobalValue",
-            &GA);
-  }
-
-  const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
-  Assert1(Aliasee,
-          "Aliasing chain should end with function or global variable", &GA);
-
-  visitGlobalValue(GA);
-}
-
-void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
-  for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
-    MDNode *MD = NMD.getOperand(i);
-    if (!MD)
-      continue;
-
-    Assert1(!MD->isFunctionLocal(),
-            "Named metadata operand cannot be function local!", MD);
-    visitMDNode(*MD, 0);
-  }
-}
-
-void Verifier::visitMDNode(MDNode &MD, Function *F) {
-  // Only visit each node once.  Metadata can be mutually recursive, so this
-  // avoids infinite recursion here, as well as being an optimization.
-  if (!MDNodes.insert(&MD))
-    return;
-
-  for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
-    Value *Op = MD.getOperand(i);
-    if (!Op)
-      continue;
-    if (isa<Constant>(Op) || isa<MDString>(Op))
-      continue;
-    if (MDNode *N = dyn_cast<MDNode>(Op)) {
-      Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
-              "Global metadata operand cannot be function local!", &MD, N);
-      visitMDNode(*N, F);
-      continue;
-    }
-    Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
-
-    // If this was an instruction, bb, or argument, verify that it is in the
-    // function that we expect.
-    Function *ActualF = 0;
-    if (Instruction *I = dyn_cast<Instruction>(Op))
-      ActualF = I->getParent()->getParent();
-    else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
-      ActualF = BB->getParent();
-    else if (Argument *A = dyn_cast<Argument>(Op))
-      ActualF = A->getParent();
-    assert(ActualF && "Unimplemented function local metadata case!");
-
-    Assert2(ActualF == F, "function-local metadata used in wrong function",
-            &MD, Op);
-  }
-}
-
-// VerifyParameterAttrs - Check the given attributes for an argument or return
-// value of the specified type.  The value V is printed in error messages.
-void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
-                                    bool isReturnValue, const Value *V) {
-  if (!Attrs.hasAttributes())
-    return;
-
-  Assert1(!Attrs.hasFunctionOnlyAttrs(),
-          "Some attributes in '" + Attrs.getAsString() +
-          "' only apply to functions!", V);
-
-  if (isReturnValue)
-    Assert1(!Attrs.hasParameterOnlyAttrs(),
-            "Attributes 'byval', 'nest', 'sret', and 'nocapture' "
-            "do not apply to return values!", V);
-
-  // Check for mutually incompatible attributes.
-  Assert1(!((Attrs.hasAttribute(Attributes::ByVal) &&
-             Attrs.hasAttribute(Attributes::Nest)) ||
-            (Attrs.hasAttribute(Attributes::ByVal) &&
-             Attrs.hasAttribute(Attributes::StructRet)) ||
-            (Attrs.hasAttribute(Attributes::Nest) &&
-             Attrs.hasAttribute(Attributes::StructRet))), "Attributes "
-          "'byval, nest, and sret' are incompatible!", V);
-
-  Assert1(!((Attrs.hasAttribute(Attributes::ByVal) &&
-             Attrs.hasAttribute(Attributes::Nest)) ||
-            (Attrs.hasAttribute(Attributes::ByVal) &&
-             Attrs.hasAttribute(Attributes::InReg)) ||
-            (Attrs.hasAttribute(Attributes::Nest) &&
-             Attrs.hasAttribute(Attributes::InReg))), "Attributes "
-          "'byval, nest, and inreg' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Attributes::ZExt) &&
-            Attrs.hasAttribute(Attributes::SExt)), "Attributes "
-          "'zeroext and signext' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Attributes::ReadNone) &&
-            Attrs.hasAttribute(Attributes::ReadOnly)), "Attributes "
-          "'readnone and readonly' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(Attributes::NoInline) &&
-            Attrs.hasAttribute(Attributes::AlwaysInline)), "Attributes "
-          "'noinline and alwaysinline' are incompatible!", V);
-
-  Assert1(!AttrBuilder(Attrs).
-            hasAttributes(Attributes::typeIncompatible(Ty)),
-          "Wrong types for attribute: " +
-          Attributes::typeIncompatible(Ty).getAsString(), V);
-
-  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
-    Assert1(!Attrs.hasAttribute(Attributes::ByVal) ||
-            PTy->getElementType()->isSized(),
-            "Attribute 'byval' does not support unsized types!", V);
-  else
-    Assert1(!Attrs.hasAttribute(Attributes::ByVal),
-            "Attribute 'byval' only applies to parameters with pointer type!",
-            V);
-}
-
-// VerifyFunctionAttrs - Check parameter attributes against a function type.
-// The value V is printed in error messages.
-void Verifier::VerifyFunctionAttrs(FunctionType *FT,
-                                   const AttrListPtr &Attrs,
-                                   const Value *V) {
-  if (Attrs.isEmpty())
-    return;
-
-  bool SawNest = false;
-
-  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
-    const AttributeWithIndex &Attr = Attrs.getSlot(i);
-
-    Type *Ty;
-    if (Attr.Index == 0)
-      Ty = FT->getReturnType();
-    else if (Attr.Index-1 < FT->getNumParams())
-      Ty = FT->getParamType(Attr.Index-1);
-    else
-      break;  // VarArgs attributes, verified elsewhere.
-
-    VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
-
-    if (Attr.Attrs.hasAttribute(Attributes::Nest)) {
-      Assert1(!SawNest, "More than one parameter has attribute nest!", V);
-      SawNest = true;
-    }
-
-    if (Attr.Attrs.hasAttribute(Attributes::StructRet))
-      Assert1(Attr.Index == 1, "Attribute sret not on first parameter!", V);
-  }
-
-  Attributes FAttrs = Attrs.getFnAttributes();
-  AttrBuilder NotFn(FAttrs);
-  NotFn.removeFunctionOnlyAttrs();
-  Assert1(!NotFn.hasAttributes(), "Attributes '" +
-          Attributes::get(V->getContext(), NotFn).getAsString() +
-          "' do not apply to the function!", V);
-
-  // Check for mutually incompatible attributes.
-  Assert1(!((FAttrs.hasAttribute(Attributes::ByVal) &&
-             FAttrs.hasAttribute(Attributes::Nest)) ||
-            (FAttrs.hasAttribute(Attributes::ByVal) &&
-             FAttrs.hasAttribute(Attributes::StructRet)) ||
-            (FAttrs.hasAttribute(Attributes::Nest) &&
-             FAttrs.hasAttribute(Attributes::StructRet))), "Attributes "
-          "'byval, nest, and sret' are incompatible!", V);
-
-  Assert1(!((FAttrs.hasAttribute(Attributes::ByVal) &&
-             FAttrs.hasAttribute(Attributes::Nest)) ||
-            (FAttrs.hasAttribute(Attributes::ByVal) &&
-             FAttrs.hasAttribute(Attributes::InReg)) ||
-            (FAttrs.hasAttribute(Attributes::Nest) &&
-             FAttrs.hasAttribute(Attributes::InReg))), "Attributes "
-          "'byval, nest, and inreg' are incompatible!", V);
-
-  Assert1(!(FAttrs.hasAttribute(Attributes::ZExt) &&
-            FAttrs.hasAttribute(Attributes::SExt)), "Attributes "
-          "'zeroext and signext' are incompatible!", V);
-
-  Assert1(!(FAttrs.hasAttribute(Attributes::ReadNone) &&
-            FAttrs.hasAttribute(Attributes::ReadOnly)), "Attributes "
-          "'readnone and readonly' are incompatible!", V);
-
-  Assert1(!(FAttrs.hasAttribute(Attributes::NoInline) &&
-            FAttrs.hasAttribute(Attributes::AlwaysInline)), "Attributes "
-          "'noinline and alwaysinline' are incompatible!", V);
-}
-
-static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
-  if (Attrs.isEmpty())
-    return true;
-
-  unsigned LastSlot = Attrs.getNumSlots() - 1;
-  unsigned LastIndex = Attrs.getSlot(LastSlot).Index;
-  if (LastIndex <= Params
-      || (LastIndex == (unsigned)~0
-          && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params)))  
-    return true;
-
-  return false;
-}
-
-// visitFunction - Verify that a function is ok.
-//
-void Verifier::visitFunction(Function &F) {
-  // Check function arguments.
-  FunctionType *FT = F.getFunctionType();
-  unsigned NumArgs = F.arg_size();
-
-  Assert1(Context == &F.getContext(),
-          "Function context does not match Module context!", &F);
-
-  Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
-  Assert2(FT->getNumParams() == NumArgs,
-          "# formal arguments must match # of arguments for function type!",
-          &F, FT);
-  Assert1(F.getReturnType()->isFirstClassType() ||
-          F.getReturnType()->isVoidTy() || 
-          F.getReturnType()->isStructTy(),
-          "Functions cannot return aggregate values!", &F);
-
-  Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
-          "Invalid struct return type!", &F);
-
-  const AttrListPtr &Attrs = F.getAttributes();
-
-  Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
-          "Attributes after last parameter!", &F);
-
-  // Check function attributes.
-  VerifyFunctionAttrs(FT, Attrs, &F);
-
-  // Check that this function meets the restrictions on this calling convention.
-  switch (F.getCallingConv()) {
-  default:
-    break;
-  case CallingConv::C:
-    break;
-  case CallingConv::Fast:
-  case CallingConv::Cold:
-  case CallingConv::X86_FastCall:
-  case CallingConv::X86_ThisCall:
-  case CallingConv::Intel_OCL_BI:
-  case CallingConv::PTX_Kernel:
-  case CallingConv::PTX_Device:
-    Assert1(!F.isVarArg(),
-            "Varargs functions must have C calling conventions!", &F);
-    break;
-  }
-
-  bool isLLVMdotName = F.getName().size() >= 5 &&
-                       F.getName().substr(0, 5) == "llvm.";
-
-  // Check that the argument values match the function type for this function...
-  unsigned i = 0;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
-       I != E; ++I, ++i) {
-    Assert2(I->getType() == FT->getParamType(i),
-            "Argument value does not match function argument type!",
-            I, FT->getParamType(i));
-    Assert1(I->getType()->isFirstClassType(),
-            "Function arguments must have first-class types!", I);
-    if (!isLLVMdotName)
-      Assert2(!I->getType()->isMetadataTy(),
-              "Function takes metadata but isn't an intrinsic", I, &F);
-  }
-
-  if (F.isMaterializable()) {
-    // Function has a body somewhere we can't see.
-  } else if (F.isDeclaration()) {
-    Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
-            F.hasExternalWeakLinkage(),
-            "invalid linkage type for function declaration", &F);
-  } else {
-    // Verify that this function (which has a body) is not named "llvm.*".  It
-    // is not legal to define intrinsics.
-    Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
-    
-    // Check the entry node
-    BasicBlock *Entry = &F.getEntryBlock();
-    Assert1(pred_begin(Entry) == pred_end(Entry),
-            "Entry block to function must not have predecessors!", Entry);
-    
-    // The address of the entry block cannot be taken, unless it is dead.
-    if (Entry->hasAddressTaken()) {
-      Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
-              "blockaddress may not be used with the entry block!", Entry);
-    }
-  }
- 
-  // If this function is actually an intrinsic, verify that it is only used in
-  // direct call/invokes, never having its "address taken".
-  if (F.getIntrinsicID()) {
-    const User *U;
-    if (F.hasAddressTaken(&U))
-      Assert1(0, "Invalid user of intrinsic instruction!", U); 
-  }
-}
-
-// verifyBasicBlock - Verify that a basic block is well formed...
-//
-void Verifier::visitBasicBlock(BasicBlock &BB) {
-  InstsInThisBlock.clear();
-
-  // Ensure that basic blocks have terminators!
-  Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
-
-  // Check constraints that this basic block imposes on all of the PHI nodes in
-  // it.
-  if (isa<PHINode>(BB.front())) {
-    SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
-    SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
-    std::sort(Preds.begin(), Preds.end());
-    PHINode *PN;
-    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
-      // Ensure that PHI nodes have at least one entry!
-      Assert1(PN->getNumIncomingValues() != 0,
-              "PHI nodes must have at least one entry.  If the block is dead, "
-              "the PHI should be removed!", PN);
-      Assert1(PN->getNumIncomingValues() == Preds.size(),
-              "PHINode should have one entry for each predecessor of its "
-              "parent basic block!", PN);
-
-      // Get and sort all incoming values in the PHI node...
-      Values.clear();
-      Values.reserve(PN->getNumIncomingValues());
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Values.push_back(std::make_pair(PN->getIncomingBlock(i),
-                                        PN->getIncomingValue(i)));
-      std::sort(Values.begin(), Values.end());
-
-      for (unsigned i = 0, e = Values.size(); i != e; ++i) {
-        // Check to make sure that if there is more than one entry for a
-        // particular basic block in this PHI node, that the incoming values are
-        // all identical.
-        //
-        Assert4(i == 0 || Values[i].first  != Values[i-1].first ||
-                Values[i].second == Values[i-1].second,
-                "PHI node has multiple entries for the same basic block with "
-                "different incoming values!", PN, Values[i].first,
-                Values[i].second, Values[i-1].second);
-
-        // Check to make sure that the predecessors and PHI node entries are
-        // matched up.
-        Assert3(Values[i].first == Preds[i],
-                "PHI node entries do not match predecessors!", PN,
-                Values[i].first, Preds[i]);
-      }
-    }
-  }
-}
-
-void Verifier::visitTerminatorInst(TerminatorInst &I) {
-  // Ensure that terminators only exist at the end of the basic block.
-  Assert1(&I == I.getParent()->getTerminator(),
-          "Terminator found in the middle of a basic block!", I.getParent());
-  visitInstruction(I);
-}
-
-void Verifier::visitBranchInst(BranchInst &BI) {
-  if (BI.isConditional()) {
-    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
-            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
-  }
-  visitTerminatorInst(BI);
-}
-
-void Verifier::visitReturnInst(ReturnInst &RI) {
-  Function *F = RI.getParent()->getParent();
-  unsigned N = RI.getNumOperands();
-  if (F->getReturnType()->isVoidTy()) 
-    Assert2(N == 0,
-            "Found return instr that returns non-void in Function of void "
-            "return type!", &RI, F->getReturnType());
-  else
-    Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
-            "Function return type does not match operand "
-            "type of return inst!", &RI, F->getReturnType());
-
-  // Check to make sure that the return value has necessary properties for
-  // terminators...
-  visitTerminatorInst(RI);
-}
-
-void Verifier::visitSwitchInst(SwitchInst &SI) {
-  // Check to make sure that all of the constants in the switch instruction
-  // have the same type as the switched-on value.
-  Type *SwitchTy = SI.getCondition()->getType();
-  IntegerType *IntTy = cast<IntegerType>(SwitchTy);
-  IntegersSubsetToBB Mapping;
-  std::map<IntegersSubset::Range, unsigned> RangeSetMap;
-  for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
-    IntegersSubset CaseRanges = i.getCaseValueEx();
-    for (unsigned ri = 0, rie = CaseRanges.getNumItems(); ri < rie; ++ri) {
-      IntegersSubset::Range r = CaseRanges.getItem(ri);
-      Assert1(((const APInt&)r.getLow()).getBitWidth() == IntTy->getBitWidth(),
-              "Switch constants must all be same type as switch value!", &SI);
-      Assert1(((const APInt&)r.getHigh()).getBitWidth() == IntTy->getBitWidth(),
-              "Switch constants must all be same type as switch value!", &SI);
-      Mapping.add(r);
-      RangeSetMap[r] = i.getCaseIndex();
-    }
-  }
-  
-  IntegersSubsetToBB::RangeIterator errItem;
-  if (!Mapping.verify(errItem)) {
-    unsigned CaseIndex = RangeSetMap[errItem->first];
-    SwitchInst::CaseIt i(&SI, CaseIndex);
-    Assert2(false, "Duplicate integer as switch case", &SI, i.getCaseValueEx());
-  }
-  
-  visitTerminatorInst(SI);
-}
-
-void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
-  Assert1(BI.getAddress()->getType()->isPointerTy(),
-          "Indirectbr operand must have pointer type!", &BI);
-  for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
-    Assert1(BI.getDestination(i)->getType()->isLabelTy(),
-            "Indirectbr destinations must all have pointer type!", &BI);
-
-  visitTerminatorInst(BI);
-}
-
-void Verifier::visitSelectInst(SelectInst &SI) {
-  Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
-                                          SI.getOperand(2)),
-          "Invalid operands for select instruction!", &SI);
-
-  Assert1(SI.getTrueValue()->getType() == SI.getType(),
-          "Select values must have same type as select instruction!", &SI);
-  visitInstruction(SI);
-}
-
-/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
-/// a pass, if any exist, it's an error.
-///
-void Verifier::visitUserOp1(Instruction &I) {
-  Assert1(0, "User-defined operators should not live outside of a pass!", &I);
-}
-
-void Verifier::visitTruncInst(TruncInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
-  unsigned DestBitSize = DestTy->getScalarSizeInBits();
-
-  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "trunc source and destination must both be a vector or neither", &I);
-  Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitZExtInst(ZExtInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  // Get the size of the types in bits, we'll need this later
-  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "zext source and destination must both be a vector or neither", &I);
-  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
-  unsigned DestBitSize = DestTy->getScalarSizeInBits();
-
-  Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitSExtInst(SExtInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
-  unsigned DestBitSize = DestTy->getScalarSizeInBits();
-
-  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "sext source and destination must both be a vector or neither", &I);
-  Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitFPTruncInst(FPTruncInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-  // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
-  unsigned DestBitSize = DestTy->getScalarSizeInBits();
-
-  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "fptrunc source and destination must both be a vector or neither",&I);
-  Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitFPExtInst(FPExtInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
-  unsigned DestBitSize = DestTy->getScalarSizeInBits();
-
-  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "fpext source and destination must both be a vector or neither", &I);
-  Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitUIToFPInst(UIToFPInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  bool SrcVec = SrcTy->isVectorTy();
-  bool DstVec = DestTy->isVectorTy();
-
-  Assert1(SrcVec == DstVec,
-          "UIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVectorTy(),
-          "UIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),
-          "UIToFP result must be FP or FP vector", &I);
-
-  if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "UIToFP source and dest vector length mismatch", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitSIToFPInst(SIToFPInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  bool SrcVec = SrcTy->isVectorTy();
-  bool DstVec = DestTy->isVectorTy();
-
-  Assert1(SrcVec == DstVec,
-          "SIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVectorTy(),
-          "SIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVectorTy(),
-          "SIToFP result must be FP or FP vector", &I);
-
-  if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "SIToFP source and dest vector length mismatch", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitFPToUIInst(FPToUIInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  bool SrcVec = SrcTy->isVectorTy();
-  bool DstVec = DestTy->isVectorTy();
-
-  Assert1(SrcVec == DstVec,
-          "FPToUI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
-          &I);
-  Assert1(DestTy->isIntOrIntVectorTy(),
-          "FPToUI result must be integer or integer vector", &I);
-
-  if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "FPToUI source and dest vector length mismatch", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitFPToSIInst(FPToSIInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  bool SrcVec = SrcTy->isVectorTy();
-  bool DstVec = DestTy->isVectorTy();
-
-  Assert1(SrcVec == DstVec,
-          "FPToSI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVectorTy(),
-          "FPToSI source must be FP or FP vector", &I);
-  Assert1(DestTy->isIntOrIntVectorTy(),
-          "FPToSI result must be integer or integer vector", &I);
-
-  if (SrcVec && DstVec)
-    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
-            cast<VectorType>(DestTy)->getNumElements(),
-            "FPToSI source and dest vector length mismatch", &I);
-
-  visitInstruction(I);
-}
-
-void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  Assert1(SrcTy->getScalarType()->isPointerTy(),
-          "PtrToInt source must be pointer", &I);
-  Assert1(DestTy->getScalarType()->isIntegerTy(),
-          "PtrToInt result must be integral", &I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "PtrToInt type mismatch", &I);
-
-  if (SrcTy->isVectorTy()) {
-    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
-    VectorType *VDest = dyn_cast<VectorType>(DestTy);
-    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
-          "PtrToInt Vector width mismatch", &I);
-  }
-
-  visitInstruction(I);
-}
-
-void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  Assert1(SrcTy->getScalarType()->isIntegerTy(),
-          "IntToPtr source must be an integral", &I);
-  Assert1(DestTy->getScalarType()->isPointerTy(),
-          "IntToPtr result must be a pointer",&I);
-  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
-          "IntToPtr type mismatch", &I);
-  if (SrcTy->isVectorTy()) {
-    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
-    VectorType *VDest = dyn_cast<VectorType>(DestTy);
-    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
-          "IntToPtr Vector width mismatch", &I);
-  }
-  visitInstruction(I);
-}
-
-void Verifier::visitBitCastInst(BitCastInst &I) {
-  // Get the source and destination types
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DestTy = I.getType();
-
-  // Get the size of the types in bits, we'll need this later
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
-
-  // BitCast implies a no-op cast of type only. No bits change.
-  // However, you can't cast pointers to anything but pointers.
-  Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
-          "Bitcast requires both operands to be pointer or neither", &I);
-  Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
-
-  // Disallow aggregates.
-  Assert1(!SrcTy->isAggregateType(),
-          "Bitcast operand must not be aggregate", &I);
-  Assert1(!DestTy->isAggregateType(),
-          "Bitcast type must not be aggregate", &I);
-
-  visitInstruction(I);
-}
-
-/// visitPHINode - Ensure that a PHI node is well formed.
-///
-void Verifier::visitPHINode(PHINode &PN) {
-  // Ensure that the PHI nodes are all grouped together at the top of the block.
-  // This can be tested by checking whether the instruction before this is
-  // either nonexistent (because this is begin()) or is a PHI node.  If not,
-  // then there is some other instruction before a PHI.
-  Assert2(&PN == &PN.getParent()->front() || 
-          isa<PHINode>(--BasicBlock::iterator(&PN)),
-          "PHI nodes not grouped at top of basic block!",
-          &PN, PN.getParent());
-
-  // Check that all of the values of the PHI node have the same type as the
-  // result, and that the incoming blocks are really basic blocks.
-  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
-            "PHI node operands are not the same type as the result!", &PN);
-  }
-
-  // All other PHI node constraints are checked in the visitBasicBlock method.
-
-  visitInstruction(PN);
-}
-
-void Verifier::VerifyCallSite(CallSite CS) {
-  Instruction *I = CS.getInstruction();
-
-  Assert1(CS.getCalledValue()->getType()->isPointerTy(),
-          "Called function must be a pointer!", I);
-  PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
-
-  Assert1(FPTy->getElementType()->isFunctionTy(),
-          "Called function is not pointer to function type!", I);
-  FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
-
-  // Verify that the correct number of arguments are being passed
-  if (FTy->isVarArg())
-    Assert1(CS.arg_size() >= FTy->getNumParams(),
-            "Called function requires more parameters than were provided!",I);
-  else
-    Assert1(CS.arg_size() == FTy->getNumParams(),
-            "Incorrect number of arguments passed to called function!", I);
-
-  // Verify that all arguments to the call match the function type.
-  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
-            "Call parameter type does not match function signature!",
-            CS.getArgument(i), FTy->getParamType(i), I);
-
-  const AttrListPtr &Attrs = CS.getAttributes();
-
-  Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
-          "Attributes after last parameter!", I);
-
-  // Verify call attributes.
-  VerifyFunctionAttrs(FTy, Attrs, I);
-
-  if (FTy->isVarArg())
-    // Check attributes on the varargs part.
-    for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
-      Attributes Attr = Attrs.getParamAttributes(Idx);
-
-      VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
-
-      Assert1(!Attr.hasIncompatibleWithVarArgsAttrs(),
-              "Attribute 'sret' cannot be used for vararg call arguments!", I);
-    }
-
-  // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (CS.getCalledFunction() == 0 ||
-      !CS.getCalledFunction()->getName().startswith("llvm.")) {
-    for (FunctionType::param_iterator PI = FTy->param_begin(),
-           PE = FTy->param_end(); PI != PE; ++PI)
-      Assert1(!(*PI)->isMetadataTy(),
-              "Function has metadata parameter but isn't an intrinsic", I);
-  }
-
-  visitInstruction(*I);
-}
-
-void Verifier::visitCallInst(CallInst &CI) {
-  VerifyCallSite(&CI);
-
-  if (Function *F = CI.getCalledFunction())
-    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
-      visitIntrinsicFunctionCall(ID, CI);
-}
-
-void Verifier::visitInvokeInst(InvokeInst &II) {
-  VerifyCallSite(&II);
-
-  // Verify that there is a landingpad instruction as the first non-PHI
-  // instruction of the 'unwind' destination.
-  Assert1(II.getUnwindDest()->isLandingPad(),
-          "The unwind destination does not have a landingpad instruction!",&II);
-
-  visitTerminatorInst(II);
-}
-
-/// visitBinaryOperator - Check that both arguments to the binary operator are
-/// of the same type!
-///
-void Verifier::visitBinaryOperator(BinaryOperator &B) {
-  Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
-          "Both operands to a binary operator are not of the same type!", &B);
-
-  switch (B.getOpcode()) {
-  // Check that integer arithmetic operators are only used with
-  // integral operands.
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-  case Instruction::SDiv:
-  case Instruction::UDiv:
-  case Instruction::SRem:
-  case Instruction::URem:
-    Assert1(B.getType()->isIntOrIntVectorTy(),
-            "Integer arithmetic operators only work with integral types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Integer arithmetic operators must have same type "
-            "for operands and result!", &B);
-    break;
-  // Check that floating-point arithmetic operators are only used with
-  // floating-point operands.
-  case Instruction::FAdd:
-  case Instruction::FSub:
-  case Instruction::FMul:
-  case Instruction::FDiv:
-  case Instruction::FRem:
-    Assert1(B.getType()->isFPOrFPVectorTy(),
-            "Floating-point arithmetic operators only work with "
-            "floating-point types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Floating-point arithmetic operators must have same type "
-            "for operands and result!", &B);
-    break;
-  // Check that logical operators are only used with integral operands.
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-    Assert1(B.getType()->isIntOrIntVectorTy(),
-            "Logical operators only work with integral types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Logical operators must have same type for operands and result!",
-            &B);
-    break;
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-    Assert1(B.getType()->isIntOrIntVectorTy(),
-            "Shifts only work with integral types!", &B);
-    Assert1(B.getType() == B.getOperand(0)->getType(),
-            "Shift return type must be same as operands!", &B);
-    break;
-  default:
-    llvm_unreachable("Unknown BinaryOperator opcode!");
-  }
-
-  visitInstruction(B);
-}
-
-void Verifier::visitICmpInst(ICmpInst &IC) {
-  // Check that the operands are the same type
-  Type *Op0Ty = IC.getOperand(0)->getType();
-  Type *Op1Ty = IC.getOperand(1)->getType();
-  Assert1(Op0Ty == Op1Ty,
-          "Both operands to ICmp instruction are not of the same type!", &IC);
-  // Check that the operands are the right type
-  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
-          "Invalid operand types for ICmp instruction", &IC);
-  // Check that the predicate is valid.
-  Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
-          IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
-          "Invalid predicate in ICmp instruction!", &IC);
-
-  visitInstruction(IC);
-}
-
-void Verifier::visitFCmpInst(FCmpInst &FC) {
-  // Check that the operands are the same type
-  Type *Op0Ty = FC.getOperand(0)->getType();
-  Type *Op1Ty = FC.getOperand(1)->getType();
-  Assert1(Op0Ty == Op1Ty,
-          "Both operands to FCmp instruction are not of the same type!", &FC);
-  // Check that the operands are the right type
-  Assert1(Op0Ty->isFPOrFPVectorTy(),
-          "Invalid operand types for FCmp instruction", &FC);
-  // Check that the predicate is valid.
-  Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
-          FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
-          "Invalid predicate in FCmp instruction!", &FC);
-
-  visitInstruction(FC);
-}
-
-void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
-  Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
-                                              EI.getOperand(1)),
-          "Invalid extractelement operands!", &EI);
-  visitInstruction(EI);
-}
-
-void Verifier::visitInsertElementInst(InsertElementInst &IE) {
-  Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
-                                             IE.getOperand(1),
-                                             IE.getOperand(2)),
-          "Invalid insertelement operands!", &IE);
-  visitInstruction(IE);
-}
-
-void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
-  Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
-                                             SV.getOperand(2)),
-          "Invalid shufflevector operands!", &SV);
-  visitInstruction(SV);
-}
-
-void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-  Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
-
-  Assert1(isa<PointerType>(TargetTy),
-    "GEP base pointer is not a vector or a vector of pointers", &GEP);
-  Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
-          "GEP into unsized type!", &GEP);
-
-  SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
-  Type *ElTy =
-    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
-  Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
-
-  if (GEP.getPointerOperandType()->isPointerTy()) {
-    // Validate GEPs with scalar indices.
-    Assert2(GEP.getType()->isPointerTy() &&
-           cast<PointerType>(GEP.getType())->getElementType() == ElTy,
-           "GEP is not of right type for indices!", &GEP, ElTy);
-  } else {
-    // Validate GEPs with a vector index.
-    Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP);
-    Value *Index = Idxs[0];
-    Type  *IndexTy = Index->getType();
-    Assert1(IndexTy->isVectorTy(),
-      "Vector GEP must have vector indices!", &GEP);
-    Assert1(GEP.getType()->isVectorTy(),
-      "Vector GEP must return a vector value", &GEP);
-    Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType();
-    Assert1(ElemPtr->isPointerTy(),
-      "Vector GEP pointer operand is not a pointer!", &GEP);
-    unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements();
-    unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements();
-    Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
-    Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(),
-      "Vector GEP type does not match pointer type!", &GEP);
-  }
-  visitInstruction(GEP);
-}
-
-static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
-  return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
-}
-
-void Verifier::visitLoadInst(LoadInst &LI) {
-  PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
-  Assert1(PTy, "Load operand must be a pointer.", &LI);
-  Type *ElTy = PTy->getElementType();
-  Assert2(ElTy == LI.getType(),
-          "Load result type does not match pointer operand type!", &LI, ElTy);
-  if (LI.isAtomic()) {
-    Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
-            "Load cannot have Release ordering", &LI);
-    Assert1(LI.getAlignment() != 0,
-            "Atomic load must specify explicit alignment", &LI);
-    if (!ElTy->isPointerTy()) {
-      Assert2(ElTy->isIntegerTy(),
-              "atomic store operand must have integer type!",
-              &LI, ElTy);
-      unsigned Size = ElTy->getPrimitiveSizeInBits();
-      Assert2(Size >= 8 && !(Size & (Size - 1)),
-              "atomic store operand must be power-of-two byte-sized integer",
-              &LI, ElTy);
-    }
-  } else {
-    Assert1(LI.getSynchScope() == CrossThread,
-            "Non-atomic load cannot have SynchronizationScope specified", &LI);
-  }
-
-  if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) {
-    unsigned NumOperands = Range->getNumOperands();
-    Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
-    unsigned NumRanges = NumOperands / 2;
-    Assert1(NumRanges >= 1, "It should have at least one range!", Range);
-
-    ConstantRange LastRange(1); // Dummy initial value
-    for (unsigned i = 0; i < NumRanges; ++i) {
-      ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
-      Assert1(Low, "The lower limit must be an integer!", Low);
-      ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
-      Assert1(High, "The upper limit must be an integer!", High);
-      Assert1(High->getType() == Low->getType() &&
-              High->getType() == ElTy, "Range types must match load type!",
-              &LI);
-
-      APInt HighV = High->getValue();
-      APInt LowV = Low->getValue();
-      ConstantRange CurRange(LowV, HighV);
-      Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
-              "Range must not be empty!", Range);
-      if (i != 0) {
-        Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
-                "Intervals are overlapping", Range);
-        Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
-                Range);
-        Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
-                Range);
-      }
-      LastRange = ConstantRange(LowV, HighV);
-    }
-    if (NumRanges > 2) {
-      APInt FirstLow =
-        dyn_cast<ConstantInt>(Range->getOperand(0))->getValue();
-      APInt FirstHigh =
-        dyn_cast<ConstantInt>(Range->getOperand(1))->getValue();
-      ConstantRange FirstRange(FirstLow, FirstHigh);
-      Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
-              "Intervals are overlapping", Range);
-      Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
-              Range);
-    }
-
-
-  }
-
-  visitInstruction(LI);
-}
-
-void Verifier::visitStoreInst(StoreInst &SI) {
-  PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
-  Assert1(PTy, "Store operand must be a pointer.", &SI);
-  Type *ElTy = PTy->getElementType();
-  Assert2(ElTy == SI.getOperand(0)->getType(),
-          "Stored value type does not match pointer operand type!",
-          &SI, ElTy);
-  if (SI.isAtomic()) {
-    Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
-            "Store cannot have Acquire ordering", &SI);
-    Assert1(SI.getAlignment() != 0,
-            "Atomic store must specify explicit alignment", &SI);
-    if (!ElTy->isPointerTy()) {
-      Assert2(ElTy->isIntegerTy(),
-              "atomic store operand must have integer type!",
-              &SI, ElTy);
-      unsigned Size = ElTy->getPrimitiveSizeInBits();
-      Assert2(Size >= 8 && !(Size & (Size - 1)),
-              "atomic store operand must be power-of-two byte-sized integer",
-              &SI, ElTy);
-    }
-  } else {
-    Assert1(SI.getSynchScope() == CrossThread,
-            "Non-atomic store cannot have SynchronizationScope specified", &SI);
-  }
-  visitInstruction(SI);
-}
-
-void Verifier::visitAllocaInst(AllocaInst &AI) {
-  PointerType *PTy = AI.getType();
-  Assert1(PTy->getAddressSpace() == 0, 
-          "Allocation instruction pointer not in the generic address space!",
-          &AI);
-  Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
-          &AI);
-  Assert1(AI.getArraySize()->getType()->isIntegerTy(),
-          "Alloca array size must have integer type", &AI);
-  visitInstruction(AI);
-}
-
-void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
-  Assert1(CXI.getOrdering() != NotAtomic,
-          "cmpxchg instructions must be atomic.", &CXI);
-  Assert1(CXI.getOrdering() != Unordered,
-          "cmpxchg instructions cannot be unordered.", &CXI);
-  PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
-  Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
-  Type *ElTy = PTy->getElementType();
-  Assert2(ElTy->isIntegerTy(),
-          "cmpxchg operand must have integer type!",
-          &CXI, ElTy);
-  unsigned Size = ElTy->getPrimitiveSizeInBits();
-  Assert2(Size >= 8 && !(Size & (Size - 1)),
-          "cmpxchg operand must be power-of-two byte-sized integer",
-          &CXI, ElTy);
-  Assert2(ElTy == CXI.getOperand(1)->getType(),
-          "Expected value type does not match pointer operand type!",
-          &CXI, ElTy);
-  Assert2(ElTy == CXI.getOperand(2)->getType(),
-          "Stored value type does not match pointer operand type!",
-          &CXI, ElTy);
-  visitInstruction(CXI);
-}
-
-void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
-  Assert1(RMWI.getOrdering() != NotAtomic,
-          "atomicrmw instructions must be atomic.", &RMWI);
-  Assert1(RMWI.getOrdering() != Unordered,
-          "atomicrmw instructions cannot be unordered.", &RMWI);
-  PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
-  Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
-  Type *ElTy = PTy->getElementType();
-  Assert2(ElTy->isIntegerTy(),
-          "atomicrmw operand must have integer type!",
-          &RMWI, ElTy);
-  unsigned Size = ElTy->getPrimitiveSizeInBits();
-  Assert2(Size >= 8 && !(Size & (Size - 1)),
-          "atomicrmw operand must be power-of-two byte-sized integer",
-          &RMWI, ElTy);
-  Assert2(ElTy == RMWI.getOperand(1)->getType(),
-          "Argument value type does not match pointer operand type!",
-          &RMWI, ElTy);
-  Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
-          RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
-          "Invalid binary operation!", &RMWI);
-  visitInstruction(RMWI);
-}
-
-void Verifier::visitFenceInst(FenceInst &FI) {
-  const AtomicOrdering Ordering = FI.getOrdering();
-  Assert1(Ordering == Acquire || Ordering == Release ||
-          Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
-          "fence instructions may only have "
-          "acquire, release, acq_rel, or seq_cst ordering.", &FI);
-  visitInstruction(FI);
-}
-
-void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
-  Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
-                                           EVI.getIndices()) ==
-          EVI.getType(),
-          "Invalid ExtractValueInst operands!", &EVI);
-  
-  visitInstruction(EVI);
-}
-
-void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
-  Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
-                                           IVI.getIndices()) ==
-          IVI.getOperand(1)->getType(),
-          "Invalid InsertValueInst operands!", &IVI);
-  
-  visitInstruction(IVI);
-}
-
-void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
-  BasicBlock *BB = LPI.getParent();
-
-  // The landingpad instruction is ill-formed if it doesn't have any clauses and
-  // isn't a cleanup.
-  Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(),
-          "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
-
-  // The landingpad instruction defines its parent as a landing pad block. The
-  // landing pad block may be branched to only by the unwind edge of an invoke.
-  for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
-    const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
-    Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
-            "Block containing LandingPadInst must be jumped to "
-            "only by the unwind edge of an invoke.", &LPI);
-  }
-
-  // The landingpad instruction must be the first non-PHI instruction in the
-  // block.
-  Assert1(LPI.getParent()->getLandingPadInst() == &LPI,
-          "LandingPadInst not the first non-PHI instruction in the block.",
-          &LPI);
-
-  // The personality functions for all landingpad instructions within the same
-  // function should match.
-  if (PersonalityFn)
-    Assert1(LPI.getPersonalityFn() == PersonalityFn,
-            "Personality function doesn't match others in function", &LPI);
-  PersonalityFn = LPI.getPersonalityFn();
-
-  // All operands must be constants.
-  Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
-          &LPI);
-  for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
-    Value *Clause = LPI.getClause(i);
-    Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI);
-    if (LPI.isCatch(i)) {
-      Assert1(isa<PointerType>(Clause->getType()),
-              "Catch operand does not have pointer type!", &LPI);
-    } else {
-      Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
-      Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
-              "Filter operand is not an array of constants!", &LPI);
-    }
-  }
-
-  visitInstruction(LPI);
-}
-
-void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
-  Instruction *Op = cast<Instruction>(I.getOperand(i));
-  // If the we have an invalid invoke, don't try to compute the dominance.
-  // We already reject it in the invoke specific checks and the dominance
-  // computation doesn't handle multiple edges.
-  if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
-    if (II->getNormalDest() == II->getUnwindDest())
-      return;
-  }
-
-  const Use &U = I.getOperandUse(i);
-  Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U),
-          "Instruction does not dominate all uses!", Op, &I);
-}
-
-/// verifyInstruction - Verify that an instruction is well formed.
-///
-void Verifier::visitInstruction(Instruction &I) {
-  BasicBlock *BB = I.getParent();
-  Assert1(BB, "Instruction not embedded in basic block!", &I);
-
-  if (!isa<PHINode>(I)) {   // Check that non-phi nodes are not self referential
-    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
-         UI != UE; ++UI)
-      Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
-              "Only PHI nodes may reference their own value!", &I);
-  }
-
-  // Check that void typed values don't have names
-  Assert1(!I.getType()->isVoidTy() || !I.hasName(),
-          "Instruction has a name, but provides a void value!", &I);
-
-  // Check that the return value of the instruction is either void or a legal
-  // value type.
-  Assert1(I.getType()->isVoidTy() || 
-          I.getType()->isFirstClassType(),
-          "Instruction returns a non-scalar type!", &I);
-
-  // Check that the instruction doesn't produce metadata. Calls are already
-  // checked against the callee type.
-  Assert1(!I.getType()->isMetadataTy() ||
-          isa<CallInst>(I) || isa<InvokeInst>(I),
-          "Invalid use of metadata!", &I);
-
-  // Check that all uses of the instruction, if they are instructions
-  // themselves, actually have parent basic blocks.  If the use is not an
-  // instruction, it is an error!
-  for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
-       UI != UE; ++UI) {
-    if (Instruction *Used = dyn_cast<Instruction>(*UI))
-      Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
-              " embedded in a basic block!", &I, Used);
-    else {
-      CheckFailed("Use of instruction is not an instruction!", *UI);
-      return;
-    }
-  }
-
-  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-    Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
-
-    // Check to make sure that only first-class-values are operands to
-    // instructions.
-    if (!I.getOperand(i)->getType()->isFirstClassType()) {
-      Assert1(0, "Instruction operands must be first-class values!", &I);
-    }
-
-    if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
-      // Check to make sure that the "address of" an intrinsic function is never
-      // taken.
-      Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 : 0),
-              "Cannot take the address of an intrinsic!", &I);
-      Assert1(!F->isIntrinsic() || isa<CallInst>(I) ||
-              F->getIntrinsicID() == Intrinsic::donothing,
-              "Cannot invoke an intrinsinc other than donothing", &I);
-      Assert1(F->getParent() == Mod, "Referencing function in another module!",
-              &I);
-    } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
-      Assert1(OpBB->getParent() == BB->getParent(),
-              "Referring to a basic block in another function!", &I);
-    } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
-      Assert1(OpArg->getParent() == BB->getParent(),
-              "Referring to an argument in another function!", &I);
-    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
-      Assert1(GV->getParent() == Mod, "Referencing global in another module!",
-              &I);
-    } else if (isa<Instruction>(I.getOperand(i))) {
-      verifyDominatesUse(I, i);
-    } else if (isa<InlineAsm>(I.getOperand(i))) {
-      Assert1((i + 1 == e && isa<CallInst>(I)) ||
-              (i + 3 == e && isa<InvokeInst>(I)),
-              "Cannot take the address of an inline asm!", &I);
-    }
-  }
-
-  if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) {
-    Assert1(I.getType()->isFPOrFPVectorTy(),
-            "fpmath requires a floating point result!", &I);
-    Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
-    Value *Op0 = MD->getOperand(0);
-    if (ConstantFP *CFP0 = dyn_cast_or_null<ConstantFP>(Op0)) {
-      APFloat Accuracy = CFP0->getValueAPF();
-      Assert1(Accuracy.isNormal() && !Accuracy.isNegative(),
-              "fpmath accuracy not a positive number!", &I);
-    } else {
-      Assert1(false, "invalid fpmath accuracy!", &I);
-    }
-  }
-
-  MDNode *MD = I.getMetadata(LLVMContext::MD_range);
-  Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
-
-  InstsInThisBlock.insert(&I);
-}
-
-/// VerifyIntrinsicType - Verify that the specified type (which comes from an
-/// intrinsic argument or return value) matches the type constraints specified
-/// by the .td file (e.g. an "any integer" argument really is an integer).
-///
-/// This return true on error but does not print a message.
-bool Verifier::VerifyIntrinsicType(Type *Ty,
-                                   ArrayRef<Intrinsic::IITDescriptor> &Infos,
-                                   SmallVectorImpl<Type*> &ArgTys) {
-  using namespace Intrinsic;
-
-  // If we ran out of descriptors, there are too many arguments.
-  if (Infos.empty()) return true; 
-  IITDescriptor D = Infos.front();
-  Infos = Infos.slice(1);
-  
-  switch (D.Kind) {
-  case IITDescriptor::Void: return !Ty->isVoidTy();
-  case IITDescriptor::MMX:  return !Ty->isX86_MMXTy();
-  case IITDescriptor::Metadata: return !Ty->isMetadataTy();
-  case IITDescriptor::Float: return !Ty->isFloatTy();
-  case IITDescriptor::Double: return !Ty->isDoubleTy();
-  case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
-  case IITDescriptor::Vector: {
-    VectorType *VT = dyn_cast<VectorType>(Ty);
-    return VT == 0 || VT->getNumElements() != D.Vector_Width ||
-           VerifyIntrinsicType(VT->getElementType(), Infos, ArgTys);
-  }
-  case IITDescriptor::Pointer: {
-    PointerType *PT = dyn_cast<PointerType>(Ty);
-    return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace ||
-           VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys);
-  }
-      
-  case IITDescriptor::Struct: {
-    StructType *ST = dyn_cast<StructType>(Ty);
-    if (ST == 0 || ST->getNumElements() != D.Struct_NumElements)
-      return true;
-    
-    for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
-      if (VerifyIntrinsicType(ST->getElementType(i), Infos, ArgTys))
-        return true;
-    return false;
-  }
-      
-  case IITDescriptor::Argument:
-    // Two cases here - If this is the second occurrence of an argument, verify
-    // that the later instance matches the previous instance. 
-    if (D.getArgumentNumber() < ArgTys.size())
-      return Ty != ArgTys[D.getArgumentNumber()];  
-      
-    // Otherwise, if this is the first instance of an argument, record it and
-    // verify the "Any" kind.
-    assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error");
-    ArgTys.push_back(Ty);
-      
-    switch (D.getArgumentKind()) {
-    case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
-    case IITDescriptor::AK_AnyFloat:   return !Ty->isFPOrFPVectorTy();
-    case IITDescriptor::AK_AnyVector:  return !isa<VectorType>(Ty);
-    case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
-    }
-    llvm_unreachable("all argument kinds not covered");
-      
-  case IITDescriptor::ExtendVecArgument:
-    // This may only be used when referring to a previous vector argument.
-    return D.getArgumentNumber() >= ArgTys.size() ||
-           !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
-           VectorType::getExtendedElementVectorType(
-                       cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
-
-  case IITDescriptor::TruncVecArgument:
-    // This may only be used when referring to a previous vector argument.
-    return D.getArgumentNumber() >= ArgTys.size() ||
-           !isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
-           VectorType::getTruncatedElementVectorType(
-                         cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
-  }
-  llvm_unreachable("unhandled");
-}
-
-/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
-///
-void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
-  Function *IF = CI.getCalledFunction();
-  Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
-          IF);
-
-  // Verify that the intrinsic prototype lines up with what the .td files
-  // describe.
-  FunctionType *IFTy = IF->getFunctionType();
-  Assert1(!IFTy->isVarArg(), "Intrinsic prototypes are not varargs", IF);
-  
-  SmallVector<Intrinsic::IITDescriptor, 8> Table;
-  getIntrinsicInfoTableEntries(ID, Table);
-  ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
-
-  SmallVector<Type *, 4> ArgTys;
-  Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
-          "Intrinsic has incorrect return type!", IF);
-  for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
-    Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
-            "Intrinsic has incorrect argument type!", IF);
-  Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF);
-
-  // Now that we have the intrinsic ID and the actual argument types (and we
-  // know they are legal for the intrinsic!) get the intrinsic name through the
-  // usual means.  This allows us to verify the mangling of argument types into
-  // the name.
-  Assert1(Intrinsic::getName(ID, ArgTys) == IF->getName(),
-          "Intrinsic name not mangled correctly for type arguments!", IF);
-  
-  // If the intrinsic takes MDNode arguments, verify that they are either global
-  // or are local to *this* function.
-  for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
-    if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
-      visitMDNode(*MD, CI.getParent()->getParent());
-
-  switch (ID) {
-  default:
-    break;
-  case Intrinsic::ctlz:  // llvm.ctlz
-  case Intrinsic::cttz:  // llvm.cttz
-    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
-            "is_zero_undef argument of bit counting intrinsics must be a "
-            "constant int", &CI);
-    break;
-  case Intrinsic::dbg_declare: {  // llvm.dbg.declare
-    Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
-                "invalid llvm.dbg.declare intrinsic call 1", &CI);
-    MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
-    Assert1(MD->getNumOperands() == 1,
-                "invalid llvm.dbg.declare intrinsic call 2", &CI);
-  } break;
-  case Intrinsic::memcpy:
-  case Intrinsic::memmove:
-  case Intrinsic::memset:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
-            "alignment argument of memory intrinsics must be a constant int",
-            &CI);
-    Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
-            "isvolatile argument of memory intrinsics must be a constant int",
-            &CI);
-    break;
-  case Intrinsic::gcroot:
-  case Intrinsic::gcwrite:
-  case Intrinsic::gcread:
-    if (ID == Intrinsic::gcroot) {
-      AllocaInst *AI =
-        dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
-      Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
-      Assert1(isa<Constant>(CI.getArgOperand(1)),
-              "llvm.gcroot parameter #2 must be a constant.", &CI);
-      if (!AI->getType()->getElementType()->isPointerTy()) {
-        Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
-                "llvm.gcroot parameter #1 must either be a pointer alloca, "
-                "or argument #2 must be a non-null constant.", &CI);
-      }
-    }
-
-    Assert1(CI.getParent()->getParent()->hasGC(),
-            "Enclosing function does not use GC.", &CI);
-    break;
-  case Intrinsic::init_trampoline:
-    Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
-            "llvm.init_trampoline parameter #2 must resolve to a function.",
-            &CI);
-    break;
-  case Intrinsic::prefetch:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
-            isa<ConstantInt>(CI.getArgOperand(2)) &&
-            cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
-            cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
-            "invalid arguments to llvm.prefetch",
-            &CI);
-    break;
-  case Intrinsic::stackprotector:
-    Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
-            "llvm.stackprotector parameter #2 must resolve to an alloca.",
-            &CI);
-    break;
-  case Intrinsic::lifetime_start:
-  case Intrinsic::lifetime_end:
-  case Intrinsic::invariant_start:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
-            "size argument of memory use markers must be a constant integer",
-            &CI);
-    break;
-  case Intrinsic::invariant_end:
-    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
-            "llvm.invariant.end parameter #2 must be a constant integer", &CI);
-    break;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//  Implement the public interfaces to this file...
-//===----------------------------------------------------------------------===//
-
-FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
-  return new Verifier(action);
-}
-
-
-/// verifyFunction - Check a function for errors, printing messages on stderr.
-/// Return true if the function is corrupt.
-///
-bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
-  Function &F = const_cast<Function&>(f);
-  assert(!F.isDeclaration() && "Cannot verify external functions");
-
-  FunctionPassManager FPM(F.getParent());
-  Verifier *V = new Verifier(action);
-  FPM.add(V);
-  FPM.run(F);
-  return V->Broken;
-}
-
-/// verifyModule - Check a module for errors, printing messages on stderr.
-/// Return true if the module is corrupt.
-///
-bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
-                        std::string *ErrorInfo) {
-  PassManager PM;
-  Verifier *V = new Verifier(action);
-  PM.add(V);
-  PM.run(const_cast<Module&>(M));
-
-  if (ErrorInfo && V->Broken)
-    *ErrorInfo = V->MessagesStr.str();
-  return V->Broken;
-}
diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt
index 36751cd31dac..c19bb679d186 100644
--- a/projects/CMakeLists.txt
+++ b/projects/CMakeLists.txt
@@ -13,7 +13,10 @@ endforeach(entry)
 
 # Also add in the compiler-rt tree if present and we have a sufficiently
 # recent version of CMake.
-if(${CMAKE_VERSION} VERSION_GREATER 2.8.7 AND
-   ${LLVM_BUILD_RUNTIME})
-  add_llvm_external_project(compiler-rt)
+if(${LLVM_BUILD_RUNTIME})
+  if(${CMAKE_VERSION} VERSION_GREATER 2.8.7)
+    add_llvm_external_project(compiler-rt)
+  else()
+    message(WARNING "Can't build compiler-rt, CMake 2.8.8 required!")
+  endif()
 endif()
diff --git a/projects/sample/Makefile.llvm.config.in b/projects/sample/Makefile.llvm.config.in
index 9a85b3df6318..c7df998b26d3 100644
--- a/projects/sample/Makefile.llvm.config.in
+++ b/projects/sample/Makefile.llvm.config.in
@@ -184,6 +184,12 @@ RDYNAMIC := @RDYNAMIC@
 #ENABLE_LIBCPP = 0
 ENABLE_LIBCPP = @ENABLE_LIBCPP@
 
+# When ENABLE_CXX11 is enabled, LLVM uses c++11 mode by default to build.
+ENABLE_CXX11 = @ENABLE_CXX11@
+
+# When ENABLE_WERROR is enabled, we'll pass -Werror on the command line
+ENABLE_WERROR = @ENABLE_WERROR@
+
 # When ENABLE_OPTIMIZED is enabled, LLVM code is optimized and output is put
 # into the "Release" directories. Otherwise, LLVM code is not optimized and
 # output is put in the "Debug" directories.
diff --git a/projects/sample/Makefile.llvm.rules b/projects/sample/Makefile.llvm.rules
index 7ed1c1b4ed6b..30f54c45e20c 100644
--- a/projects/sample/Makefile.llvm.rules
+++ b/projects/sample/Makefile.llvm.rules
@@ -250,6 +250,15 @@ ifeq ($(ENABLE_LIBCPP),1)
   LD.Flags +=  -stdlib=libc++
 endif
 
+ifeq ($(ENABLE_CXX11),1)
+  CXX.Flags += -std=c++11
+endif
+
+ifeq ($(ENABLE_WERROR),1)
+  CXX.Flags += -Werror
+  C.Flags += -Werror
+endif
+
 ifeq ($(ENABLE_PROFILING),1)
   BuildMode := $(BuildMode)+Profile
   CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) -pg -g
@@ -492,16 +501,24 @@ ifeq ($(HOST_OS),Darwin)
 
   LoadableModuleOptions := -Wl,-flat_namespace -Wl,-undefined,suppress
   SharedLinkOptions := -dynamiclib
-  ifneq ($(ARCH),ARM)
-    SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+  ifdef DEPLOYMENT_TARGET
+    SharedLinkOptions += $(DEPLOYMENT_TARGET)
+  else
+    ifneq ($(ARCH),ARM)
+      SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+    endif
   endif
 else
   SharedLinkOptions=-shared
 endif
 
 ifeq ($(TARGET_OS),Darwin)
-  ifneq ($(ARCH),ARM)
-    TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+  ifdef DEPLOYMENT_TARGET
+    TargetCommonOpts += $(DEPLOYMENT_TARGET)
+  else
+    ifneq ($(ARCH),ARM)
+      TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+    endif
   endif
 endif
 
@@ -736,7 +753,7 @@ ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc)
 #----------------------------------------------------------
 
 ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE)))
-  ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])"
+  ECHOPATH := $(Verb)$(PYTHON) -u -c "import sys;print ' '.join(sys.argv[1:])"
 else
   ECHOPATH := $(Verb)$(ECHO)
 endif
diff --git a/projects/sample/autoconf/config.sub b/projects/sample/autoconf/config.sub
index 9942491533e8..9d22c1e52edd 100755
--- a/projects/sample/autoconf/config.sub
+++ b/projects/sample/autoconf/config.sub
@@ -251,7 +251,8 @@ case $basic_machine in
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
 	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+   | be32 | be64 \
+   | aarch64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@@ -359,6 +360,7 @@ case $basic_machine in
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+   | aarch64-* \
 	| avr-* | avr32-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index 8012c23412db..283bc12bb342 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -304,6 +304,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -380,6 +381,18 @@ case "$enableval" in
   *) AC_MSG_ERROR([Invalid setting for --enable-libcpp. Use "yes" or "no"]) ;;
 esac
 
+dnl --enable-cxx11 : check whether or not to use -std=c++11 on the command line
+AC_ARG_ENABLE(cxx11,
+              AS_HELP_STRING([--enable-cxx11],
+                             [Use c++11 if available (default is NO)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_CXX11,[1]) ;;
+  no)  AC_SUBST(ENABLE_CXX11,[0]) ;;
+  default) AC_SUBST(ENABLE_CXX11,[0]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-cxx11. Use "yes" or "no"]) ;;
+esac
+
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
  --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
@@ -407,6 +420,16 @@ else
   AC_SUBST(DISABLE_ASSERTIONS,[[DISABLE_ASSERTIONS=1]])
 fi
 
+dnl --enable-werror : check whether we want Werror on by default
+AC_ARG_ENABLE(werror,AS_HELP_STRING(
+  --enable-werror,[Compile with -Werror enabled (default is NO)]),, enableval="no")
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_WERROR,[1]) ;;
+  no)  AC_SUBST(ENABLE_WERROR,[0]) ;;
+  default) AC_SUBST(ENABLE_WERROR,[0]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-werror. Use "yes" or "no"]) ;;
+esac
+
 dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
 AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
   --enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
@@ -452,6 +475,7 @@ else
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AArch64)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -574,7 +598,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -582,8 +606,8 @@ case "$enableval" in
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
-        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -596,9 +620,9 @@ case "$enableval" in
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
-            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
diff --git a/projects/sample/configure b/projects/sample/configure
index cfbb6c69224a..a8fc4bff7e27 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -683,9 +683,11 @@ BUILD_EXEEXT
 BUILD_CXX
 CVSBUILD
 ENABLE_LIBCPP
+ENABLE_CXX11
 ENABLE_OPTIMIZED
 ENABLE_PROFILING
 DISABLE_ASSERTIONS
+ENABLE_WERROR
 ENABLE_EXPENSIVE_CHECKS
 EXPENSIVE_CHECKS
 DEBUG_RUNTIME
@@ -1375,10 +1377,12 @@ Optional Features:
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
   --enable-polly          Use polly if available (default is YES)
   --enable-libcpp         Use libc++ if available (default is NO)
+  --enable-cxx11          Use c++11 if available (default is NO)
   --enable-optimized      Compile with optimizations enabled (default is NO)
   --enable-profiling      Compile with profiling enabled (default is NO)
   --enable-assertions     Compile with assertion checks enabled (default is
                           YES)
+  --enable-werror         Compile with -Werror enabled (default is NO)
   --enable-expensive-checks
                           Compile with expensive debug checks enabled (default
                           is NO)
@@ -3840,6 +3844,7 @@ else
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -4942,6 +4947,25 @@ echo "$as_me: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\""
    { (exit 1); exit 1; }; } ;;
 esac
 
+# Check whether --enable-cxx11 was given.
+if test "${enable_cxx11+set}" = set; then
+  enableval=$enable_cxx11;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_CXX11=1
+ ;;
+  no)  ENABLE_CXX11=0
+ ;;
+  default) ENABLE_CXX11=0
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-optimized was given.
 if test "${enable_optimized+set}" = set; then
   enableval=$enable_optimized;
@@ -4987,6 +5011,25 @@ else
 
 fi
 
+# Check whether --enable-werror was given.
+if test "${enable_werror+set}" = set; then
+  enableval=$enable_werror;
+else
+  enableval="no"
+fi
+
+case "$enableval" in
+  yes) ENABLE_WERROR=1
+ ;;
+  no)  ENABLE_WERROR=0
+ ;;
+  default) ENABLE_WERROR=0
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-werror. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-werror. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-expensive-checks was given.
 if test "${enable_expensive_checks+set}" = set; then
   enableval=$enable_expensive_checks;
@@ -5059,6 +5102,8 @@ else
  ;;
     ARM)         TARGET_HAS_JIT=1
  ;;
+    AArch64)     TARGET_HAS_JIT=0
+ ;;
     Mips)        TARGET_HAS_JIT=1
  ;;
     XCore)       TARGET_HAS_JIT=0
@@ -5255,7 +5300,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5263,8 +5308,8 @@ case "$enableval" in
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        aarch64)  TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
-        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -5277,9 +5322,9 @@ case "$enableval" in
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
-            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -10308,7 +10353,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10311 "configure"
+#line 10356 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -21641,9 +21686,11 @@ BUILD_EXEEXT!$BUILD_EXEEXT$ac_delim
 BUILD_CXX!$BUILD_CXX$ac_delim
 CVSBUILD!$CVSBUILD$ac_delim
 ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim
+ENABLE_CXX11!$ENABLE_CXX11$ac_delim
 ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
 ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
 DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
+ENABLE_WERROR!$ENABLE_WERROR$ac_delim
 ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
 DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
@@ -21653,8 +21700,6 @@ TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 ENABLE_THREADS!$ENABLE_THREADS$ac_delim
-ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
-ENABLE_PIC!$ENABLE_PIC$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -21696,6 +21741,8 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
+ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
 ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
@@ -21787,7 +21834,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 89; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 91; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/projects/sample/tools/sample/main.c b/projects/sample/tools/sample/main.c
index 2880265f8450..ec0c3df605e9 100644
--- a/projects/sample/tools/sample/main.c
+++ b/projects/sample/tools/sample/main.c
@@ -1,10 +1,8 @@
+#include "sample.h"
 #include <stdio.h>
 #include <stdlib.h>
-
 #include <unistd.h>
 
-#include "sample.h"
-
 int
 main (int argc, char ** argv)
 {
diff --git a/runtime/libprofile/CMakeLists.txt b/runtime/libprofile/CMakeLists.txt
index 8609715b33f0..9044f768e17f 100644
--- a/runtime/libprofile/CMakeLists.txt
+++ b/runtime/libprofile/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(SOURCES
   BasicBlockTracing.c
   CommonProfiling.c
-  GCDAProfiling.c
   PathProfiling.c
   EdgeProfiling.c
   OptimalEdgeProfiling.c
diff --git a/runtime/libprofile/GCDAProfiling.c b/runtime/libprofile/GCDAProfiling.c
deleted file mode 100644
index f2dc4f79881f..000000000000
--- a/runtime/libprofile/GCDAProfiling.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*===- GCDAProfiling.c - Support library for GCDA file emission -----------===*\
-|*
-|*                     The LLVM Compiler Infrastructure
-|*
-|* This file is distributed under the University of Illinois Open Source
-|* License. See LICENSE.TXT for details.
-|* 
-|*===----------------------------------------------------------------------===*|
-|* 
-|* This file implements the call back routines for the gcov profiling
-|* instrumentation pass. Link against this library when running code through
-|* the -insert-gcov-profiling LLVM pass.
-|*
-|* We emit files in a corrupt version of GCOV's "gcda" file format. These files
-|* are only close enough that LCOV will happily parse them. Anything that lcov
-|* ignores is missing.
-|*
-|* TODO: gcov is multi-process safe by having each exit open the existing file
-|* and append to it. We'd like to achieve that and be thread-safe too.
-|*
-\*===----------------------------------------------------------------------===*/
-
-#include "llvm/Support/DataTypes.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#ifdef _WIN32
-#include <direct.h>
-#endif
-
-/* #define DEBUG_GCDAPROFILING */
-
-/*
- * --- GCOV file format I/O primitives ---
- */
-
-static FILE *output_file = NULL;
-
-static void write_int32(uint32_t i) {
-  fwrite(&i, 4, 1, output_file);
-}
-
-static void write_int64(uint64_t i) {
-  uint32_t lo = i >>  0;
-  uint32_t hi = i >> 32;
-  write_int32(lo);
-  write_int32(hi);
-}
-
-static uint32_t length_of_string(const char *s) {
-  return (strlen(s) / 4) + 1;
-}
-
-static void write_string(const char *s) {
-  uint32_t len = length_of_string(s);
-  write_int32(len);
-  fwrite(s, strlen(s), 1, output_file);
-  fwrite("\0\0\0\0", 4 - (strlen(s) % 4), 1, output_file);
-}
-
-static char *mangle_filename(const char *orig_filename) {
-  /* TODO: handle GCOV_PREFIX_STRIP */
-  const char *prefix;
-  char *filename = 0;
-
-  prefix = getenv("GCOV_PREFIX");
-
-  if (!prefix)
-    return strdup(orig_filename);
-
-  filename = malloc(strlen(prefix) + 1 + strlen(orig_filename) + 1);
-  strcpy(filename, prefix);
-  strcat(filename, "/");
-  strcat(filename, orig_filename);
-  return filename;
-}
-
-static void recursive_mkdir(const char *filename) {
-  char *pathname;
-  int i, e;
-
-  for (i = 1, e = strlen(filename); i != e; ++i) {
-    if (filename[i] != '/') continue;
-    pathname = malloc(i + 1);
-    strncpy(pathname, filename, i);
-    pathname[i] = '\0';
-#ifdef _WIN32
-    _mkdir(pathname);
-#else
-    mkdir(pathname, 0750);  /* some of these will fail, ignore it. */
-#endif
-    free(pathname);
-  }
-}
-
-/*
- * --- LLVM line counter API ---
- */
-
-/* A file in this case is a translation unit. Each .o file built with line
- * profiling enabled will emit to a different file. Only one file may be
- * started at a time.
- */
-void llvm_gcda_start_file(const char *orig_filename) {
-  char *filename;
-  filename = mangle_filename(orig_filename);
-  recursive_mkdir(filename);
-  output_file = fopen(filename, "w+b");
-
-  if (!output_file) {
-    const char *cptr = strrchr(orig_filename, '/');
-    output_file = fopen(cptr ? cptr + 1 : orig_filename, "w+b");
-
-    if (!output_file) {
-      fprintf(stderr, "LLVM profiling runtime: cannot open '%s': ",
-              cptr ? cptr + 1 : orig_filename);
-      perror("");
-      free(filename);
-      return;
-    }
-  }
-
-  /* gcda file, version 404*, stamp LLVM. */
-#ifdef __APPLE__
-  fwrite("adcg*204MVLL", 12, 1, output_file);
-#else
-  fwrite("adcg*404MVLL", 12, 1, output_file);
-#endif
-
-#ifdef DEBUG_GCDAPROFILING
-  printf("llvmgcda: [%s]\n", orig_filename);
-#endif
-
-  free(filename);
-}
-
-/* Given an array of pointers to counters (counters), increment the n-th one,
- * where we're also given a pointer to n (predecessor).
- */
-void llvm_gcda_increment_indirect_counter(uint32_t *predecessor,
-                                          uint64_t **counters) {
-  uint64_t *counter;
-  uint32_t pred;
-
-  pred = *predecessor;
-  if (pred == 0xffffffff)
-    return;
-  counter = counters[pred];
-
-  /* Don't crash if the pred# is out of sync. This can happen due to threads,
-     or because of a TODO in GCOVProfiling.cpp buildEdgeLookupTable(). */
-  if (counter)
-    ++*counter;
-#ifdef DEBUG_GCDAPROFILING
-  else
-    printf("llvmgcda: increment_indirect_counter counters=%x, pred=%u\n",
-           state_table_row, *predecessor);
-#endif
-}
-
-void llvm_gcda_emit_function(uint32_t ident, const char *function_name) {
-#ifdef DEBUG_GCDAPROFILING
-  printf("llvmgcda: function id=%x\n", ident);
-#endif
-  if (!output_file) return;
-
-  /* function tag */  
-  fwrite("\0\0\0\1", 4, 1, output_file);
-  write_int32(3 + 1 + length_of_string(function_name));
-  write_int32(ident);
-  write_int32(0);
-  write_int32(0);
-  write_string(function_name);
-}
-
-void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
-  uint32_t i;
-
-  /* Counter #1 (arcs) tag */
-  if (!output_file) return;
-  fwrite("\0\0\xa1\1", 4, 1, output_file);
-  write_int32(num_counters * 2);
-  for (i = 0; i < num_counters; ++i)
-    write_int64(counters[i]);
-
-#ifdef DEBUG_GCDAPROFILING
-  printf("llvmgcda:   %u arcs\n", num_counters);
-  for (i = 0; i < num_counters; ++i)
-    printf("llvmgcda:   %llu\n", (unsigned long long)counters[i]);
-#endif
-}
-
-void llvm_gcda_end_file() {
-  /* Write out EOF record. */
-  if (!output_file) return;
-  fwrite("\0\0\0\0\0\0\0\0", 8, 1, output_file);
-  fclose(output_file);
-  output_file = NULL;
-
-#ifdef DEBUG_GCDAPROFILING
-  printf("llvmgcda: -----\n");
-#endif
-}
diff --git a/runtime/libprofile/Makefile b/runtime/libprofile/Makefile
index 6e9225382a9e..6c3701b26871 100644
--- a/runtime/libprofile/Makefile
+++ b/runtime/libprofile/Makefile
@@ -16,8 +16,6 @@ endif
 LIBRARYNAME = profile_rt
 LINK_LIBS_IN_SHARED = 1
 SHARED_LIBRARY = 1
-EXTRA_DIST = libprofile.exports
-EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/libprofile.exports
 
 # Build and install this archive.                                                                                                                  
 BUILD_ARCHIVE = 1
@@ -50,7 +48,7 @@ ifeq ($(HOST_OS),Darwin)
     endif
 
     # If we're doing an Apple-style build, add the LTO object path.
-    ifeq ($(RC_BUILDIT),YES)
+    ifeq ($(RC_XBS),YES)
        TempFile           := $(shell mkdir -p ${OBJROOT}/dSYMs ; mktemp ${OBJROOT}/dSYMs/profile_rt-lto.XXXXXX)
        LLVMLibsOptions    := $(LLVMLibsOptions) \
                              -Wl,-object_path_lto -Wl,$(TempFile)
diff --git a/runtime/libprofile/libprofile.exports b/runtime/libprofile/libprofile.exports
deleted file mode 100644
index 2f25be692047..000000000000
--- a/runtime/libprofile/libprofile.exports
+++ /dev/null
@@ -1,12 +0,0 @@
-llvm_start_edge_profiling
-llvm_start_opt_edge_profiling
-llvm_start_path_profiling
-llvm_start_basic_block_tracing
-llvm_trace_basic_block
-llvm_increment_path_count
-llvm_decrement_path_count
-llvm_gcda_start_file
-llvm_gcda_increment_indirect_counter
-llvm_gcda_emit_function
-llvm_gcda_emit_arcs
-llvm_gcda_end_file
diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll
index 59725cfded05..c1cf587204cf 100644
--- a/test/Analysis/BasicAA/intrinsics.ll
+++ b/test/Analysis/BasicAA/intrinsics.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 
 ; CHECK:      define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR:#[0-9]+]]
 ; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
@@ -22,7 +22,7 @@ entry:
 ; CHECK:      define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %q = getelementptr i8* %p, i64 16
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR]]
 ; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
@@ -37,3 +37,6 @@ entry:
 
 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes [[ATTR]] = { nounwind }
diff --git a/test/Analysis/BasicAA/invariant_load.ll b/test/Analysis/BasicAA/invariant_load.ll
new file mode 100644
index 000000000000..cd6ddb92d210
--- /dev/null
+++ b/test/Analysis/BasicAA/invariant_load.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+; The input *.ll is obtained by manually annotating "invariant.load" to the 
+; two loads. With "invariant.load" metadata, the second load is redundant.
+;
+; int foo(int *p, char *q) {
+;     *q = (char)*p;
+;     return *p + 1;
+; }
+
+define i32 @foo(i32* nocapture %p, i8* nocapture %q) {
+entry:
+  %0 = load i32* %p, align 4, !tbaa !0, !invariant.load !3
+  %conv = trunc i32 %0 to i8
+  store i8 %conv, i8* %q, align 1, !tbaa !1
+  %1 = load i32* %p, align 4, !tbaa !0, !invariant.load !3
+  %add = add nsw i32 %1, 1
+  ret i32 %add
+
+; CHECK: foo
+; CHECK: %0 = load i32* %p
+; CHECK: store i8 %conv, i8* %q,
+; CHECK: %add = add nsw i32 %0, 1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{}
diff --git a/test/Analysis/BasicAA/phi-spec-order.ll b/test/Analysis/BasicAA/phi-spec-order.ll
new file mode 100644
index 000000000000..27d47bcd5bf3
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-spec-order.ll
@@ -0,0 +1,71 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+@X = external global [16000 x double], align 32
+@Y = external global [16000 x double], align 32
+
+define signext i32 @s000() nounwind {
+entry:
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.cond2.preheader
+  %lsr.iv4 = phi [16000 x double]* [ %i11, %for.body4 ], [ bitcast (double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 8)
+ to [16000 x double]*), %for.cond2.preheader ]
+  %lsr.iv1 = phi [16000 x double]* [ %i10, %for.body4 ], [ @X, %for.cond2.preheader ]
+
+; CHECK: NoAlias:{{[ \t]+}}[16000 x double]* %lsr.iv1, [16000 x double]* %lsr.iv4
+
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+  %lsr.iv46 = bitcast [16000 x double]* %lsr.iv4 to <4 x double>*
+  %lsr.iv12 = bitcast [16000 x double]* %lsr.iv1 to <4 x double>*
+  %scevgep11 = getelementptr <4 x double>* %lsr.iv46, i64 -2
+  %i6 = load <4 x double>* %scevgep11, align 32, !tbaa !0
+  %add = fadd <4 x double> %i6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  store <4 x double> %add, <4 x double>* %lsr.iv12, align 32, !tbaa !0
+  %scevgep10 = getelementptr <4 x double>* %lsr.iv46, i64 -1
+  %i7 = load <4 x double>* %scevgep10, align 32, !tbaa !0
+  %add.4 = fadd <4 x double> %i7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %scevgep9 = getelementptr <4 x double>* %lsr.iv12, i64 1
+  store <4 x double> %add.4, <4 x double>* %scevgep9, align 32, !tbaa !0
+  %i8 = load <4 x double>* %lsr.iv46, align 32, !tbaa !0
+  %add.8 = fadd <4 x double> %i8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %scevgep8 = getelementptr <4 x double>* %lsr.iv12, i64 2
+  store <4 x double> %add.8, <4 x double>* %scevgep8, align 32, !tbaa !0
+  %scevgep7 = getelementptr <4 x double>* %lsr.iv46, i64 1
+  %i9 = load <4 x double>* %scevgep7, align 32, !tbaa !0
+  %add.12 = fadd <4 x double> %i9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %scevgep3 = getelementptr <4 x double>* %lsr.iv12, i64 3
+  store <4 x double> %add.12, <4 x double>* %scevgep3, align 32, !tbaa !0
+
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep11, <4 x double>* %scevgep7
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep10, <4 x double>* %scevgep7
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep7, <4 x double>* %scevgep9
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep11, <4 x double>* %scevgep3
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep10, <4 x double>* %scevgep3
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep3, <4 x double>* %scevgep9
+
+  %lsr.iv.next = add i32 %lsr.iv, -16
+  %scevgep = getelementptr [16000 x double]* %lsr.iv1, i64 0, i64 16
+  %i10 = bitcast double* %scevgep to [16000 x double]*
+  %scevgep5 = getelementptr [16000 x double]* %lsr.iv4, i64 0, i64 16
+  %i11 = bitcast double* %scevgep5 to [16000 x double]*
+  %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+for.end:                                          ; preds = %for.body4
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:                                        ; preds = %for.end
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/BasicAA/phi-speculation.ll b/test/Analysis/BasicAA/phi-speculation.ll
index 21c65929862f..5e1e118d9855 100644
--- a/test/Analysis/BasicAA/phi-speculation.ll
+++ b/test/Analysis/BasicAA/phi-speculation.ll
@@ -4,9 +4,9 @@ target datalayout =
 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 
 ; ptr_phi and ptr2_phi do not alias.
+; CHECK: test_noalias_1
 ; CHECK: NoAlias: i32* %ptr2_phi, i32* %ptr_phi
-
-define i32 @test_noalias(i32* %ptr2, i32 %count, i32* %coeff) {
+define i32 @test_noalias_1(i32* %ptr2, i32 %count, i32* %coeff) {
 entry:
   %ptr = getelementptr inbounds i32* %ptr2, i64 1
   br label %while.body
@@ -31,3 +31,64 @@ while.body:
 the_exit:
   ret i32 %add
 }
+
+; CHECK: test_noalias_2
+; CHECK: NoAlias: i32* %ptr_outer_phi, i32* %ptr_outer_phi2
+; CHECK: NoAlias: i32* %ptr2_phi, i32* %ptr_phi
+define i32 @test_noalias_2(i32* %ptr2, i32 %count, i32* %coeff) {
+entry:
+  %ptr = getelementptr inbounds i32* %ptr2, i64 1
+  br label %outer.while.header
+
+outer.while.header:
+  %ptr_outer_phi = phi i32* [%ptr_inc_outer, %outer.while.backedge], [ %ptr, %entry]
+  %ptr_outer_phi2 = phi i32* [%ptr2_inc_outer, %outer.while.backedge], [ %ptr2, %entry]
+  %num.outer = phi i32 [ %count, %entry ], [ %dec.outer, %outer.while.backedge ]
+  br label %while.body
+
+while.body:
+  %num = phi i32 [ %count, %outer.while.header ], [ %dec, %while.body ]
+  %ptr_phi = phi i32* [ %ptr_outer_phi, %outer.while.header ], [ %ptr_inc, %while.body ]
+  %ptr2_phi = phi i32* [ %ptr_outer_phi2, %outer.while.header ], [ %ptr2_inc, %while.body ]
+  %result.09 = phi i32 [ 0 , %outer.while.header ], [ %add, %while.body ]
+  %dec = add nsw i32 %num, -1
+  %0 = load i32* %ptr_phi, align 4
+  store i32 %0, i32* %ptr2_phi, align 4
+  %1 = load i32* %coeff, align 4
+  %2 = load i32* %ptr_phi, align 4
+  %mul = mul nsw i32 %1, %2
+  %add = add nsw i32 %mul, %result.09
+  %tobool = icmp eq i32 %dec, 0
+  %ptr_inc = getelementptr inbounds i32* %ptr_phi, i64 1
+  %ptr2_inc = getelementptr inbounds i32* %ptr2_phi, i64 1
+  br i1 %tobool, label %outer.while.backedge, label %while.body
+
+outer.while.backedge:
+  %ptr_inc_outer = getelementptr inbounds i32* %ptr_phi, i64 1
+  %ptr2_inc_outer = getelementptr inbounds i32* %ptr2_phi, i64 1
+  %dec.outer = add nsw i32 %num.outer, -1
+  %br.cond = icmp eq i32 %dec.outer, 0
+  br i1 %br.cond, label %the_exit, label %outer.while.header
+
+the_exit:
+  ret i32 %add
+}
+
+; CHECK: test_noalias_3
+; CHECK: MayAlias: i8* %ptr2_phi, i8* %ptr_phi
+define i32 @test_noalias_3(i8* noalias %x, i8* noalias %y, i8* noalias %z,
+                           i32 %count) {
+entry:
+  br label %while.body
+
+while.body:
+  %num = phi i32 [ %count, %entry ], [ %dec, %while.body ]
+  %ptr_phi = phi i8* [ %x, %entry ], [ %z, %while.body ]
+  %ptr2_phi = phi i8* [ %y, %entry ], [ %ptr_phi, %while.body ]
+  %dec = add nsw i32 %num, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %the_exit, label %while.body
+
+the_exit:
+  ret i32 1
+}
diff --git a/test/Analysis/BasicAA/pure-const-dce.ll b/test/Analysis/BasicAA/pure-const-dce.ll
index 266e607b21a4..e48992860a60 100644
--- a/test/Analysis/BasicAA/pure-const-dce.ll
+++ b/test/Analysis/BasicAA/pure-const-dce.ll
@@ -4,11 +4,11 @@
 
 ; CHECK:      @test
 ; CHECK:      entry
-; CHECK:      %tmp0 = call i32 @TestConst(i32 5) readnone
-; CHECK-NEXT: %tmp1 = call i32 @TestPure(i32 6) readonly
+; CHECK:      %tmp0 = call i32 @TestConst(i32 5) [[READNONE:#[0-9]+]]
+; CHECK-NEXT: %tmp1 = call i32 @TestPure(i32 6) [[READONLY:#[0-9]+]]
 ; CHECK-NEXT: %tmp2 = call i32 @TestNone(i32 7)
 ; CHECK-NEXT: store i32 1, i32* @g
-; CHECK-NEXT: %tmp5 = call i32 @TestPure(i32 6) readonly
+; CHECK-NEXT: %tmp5 = call i32 @TestPure(i32 6) [[READONLY]]
 ; CHECK-NEXT: %tmp7 = call i32 @TestNone(i32 7)
 ; CHECK-NEXT: %tmp8 = call i32 @TestNone(i32 7)
 ; CHECK-NEXT: %sum0 = add i32 %tmp0, %tmp1
@@ -49,3 +49,6 @@ declare i32 @TestConst(i32) readnone
 declare i32 @TestPure(i32) readonly
 
 declare i32 @TestNone(i32)
+
+; CHECK: attributes [[READNONE]] = { readnone }
+; CHECK: attributes [[READONLY]] = { readonly }
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
new file mode 100644
index 000000000000..ba9d84cf3e23
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -0,0 +1,547 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define i32 @casts() {
+
+    ; -- scalars --
+  ; CHECK: cost of 1 {{.*}} sext
+  %r0 = sext i1 undef to i8
+  ; CHECK: cost of 1 {{.*}} zext
+  %r1 = zext i1 undef to i8
+  ; CHECK: cost of 1 {{.*}} sext
+  %r2 = sext i1 undef to i16
+  ; CHECK: cost of 1 {{.*}} zext
+  %r3 = zext i1 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r4 = sext i1 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r5 = zext i1 undef to i32
+  ; CHECK: cost of 1 {{.*}} sext
+  %r6 = sext i1 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r7 = zext i1 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r8 = trunc i8 undef to i1
+  ; CHECK: cost of 1 {{.*}} sext
+  %r9 = sext i8 undef to i16
+  ; CHECK: cost of 1 {{.*}} zext
+  %r10 = zext i8 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r11 = sext i8 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r12 = zext i8 undef to i32
+  ; CHECK: cost of 1 {{.*}} sext
+  %r13 = sext i8 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r14 = zext i8 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r15 = trunc i16 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r16 = trunc i16 undef to i8
+  ; CHECK: cost of 1 {{.*}} sext
+  %r17 = sext i16 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r18 = zext i16 undef to i32
+  ; CHECK: cost of 2 {{.*}} sext
+  %r19 = sext i16 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r20 = zext i16 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r21 = trunc i32 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r22 = trunc i32 undef to i8
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r23 = trunc i32 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r24 = sext i32 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r25 = zext i32 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r26 = trunc i64 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r27 = trunc i64 undef to i8
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r28 = trunc i64 undef to i16
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r29 = trunc i64 undef to i32
+
+    ; -- floating point conversions --
+  ; Moves between scalar and NEON registers.
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r30 = fptoui float undef to i1
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r31 = fptosi float undef to i1
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r32 = fptoui float undef to i8
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r33 = fptosi float undef to i8
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r34 = fptoui float undef to i16
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r35 = fptosi float undef to i16
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r36 = fptoui float undef to i32
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r37 = fptosi float undef to i32
+  ; CHECK: cost of 10 {{.*}} fptoui
+  %r38 = fptoui float undef to i64
+  ; CHECK: cost of 10 {{.*}} fptosi
+  %r39 = fptosi float undef to i64
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r40 = fptoui double undef to i1
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r41 = fptosi double undef to i1
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r42 = fptoui double undef to i8
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r43 = fptosi double undef to i8
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r44 = fptoui double undef to i16
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r45 = fptosi double undef to i16
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r46 = fptoui double undef to i32
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r47 = fptosi double undef to i32
+  ; Function call
+  ; CHECK: cost of 10 {{.*}} fptoui
+  %r48 = fptoui double undef to i64
+  ; CHECK: cost of 10 {{.*}} fptosi
+  %r49 = fptosi double undef to i64
+
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r50 = sitofp i1 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r51 = uitofp i1 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r52 = sitofp i1 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r53 = uitofp i1 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r54 = sitofp i8 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r55 = uitofp i8 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r56 = sitofp i8 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r57 = uitofp i8 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r58 = sitofp i16 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r59 = uitofp i16 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r60 = sitofp i16 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r61 = uitofp i16 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r62 = sitofp i32 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r63 = uitofp i32 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r64 = sitofp i32 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r65 = uitofp i32 undef to double
+  ; Function call
+  ; CHECK: cost of 10 {{.*}} sitofp
+  %r66 = sitofp i64 undef to float
+  ; CHECK: cost of 10 {{.*}} uitofp
+  %r67 = uitofp i64 undef to float
+  ; CHECK: cost of 10 {{.*}} sitofp
+  %r68 = sitofp i64 undef to double
+  ; CHECK: cost of 10 {{.*}} uitofp
+  %r69 = uitofp i64 undef to double
+
+  ; CHECK: cost of 3 {{.*}} sext
+  %r70 = sext <8 x i8> undef to <8 x i32>
+  ; CHECK: cost of 6 {{.*}} sext
+  %r71 = sext <16 x i8> undef to <16 x i32>
+  ; CHECK: cost of 3 {{.*}} zext
+  %r72 = zext <8 x i8> undef to <8 x i32>
+  ; CHECK: cost of 6 {{.*}} zext
+  %r73 = zext <16 x i8> undef to <16 x i32>
+
+  ; CHECK: cost of 7 {{.*}} sext
+  %rext_0 = sext <8 x i8> undef to <8 x i64>
+  ; CHECK: cost of 7 {{.*}} zext
+  %rext_1 = zext <8 x i8> undef to <8 x i64>
+  ; CHECK: cost of 6 {{.*}} sext
+  %rext_2 = sext <8 x i16> undef to <8 x i64>
+  ; CHECK: cost of 6 {{.*}} zext
+  %rext_3 = zext <8 x i16> undef to <8 x i64>
+  ; CHECK: cost of 3 {{.*}} sext
+  %rext_4 = sext <4 x i16> undef to <4 x i64>
+  ; CHECK: cost of 3 {{.*}} zext
+  %rext_5 = zext <4 x i16> undef to <4 x i64>
+
+  ; Vector cast cost of instructions lowering the cast to the stack.
+  ; CHECK: cost of 19 {{.*}} trunc
+  %r74 = trunc <8 x i32> undef to <8 x i8>
+  ; CHECK: cost of 38 {{.*}} trunc
+  %r75 = trunc <16 x i32> undef to <16 x i8>
+
+  ; Floating point truncation costs.
+  ; CHECK: cost of 1 {{.*}} fptrunc double
+  %r80 = fptrunc double undef to float
+  ; CHECK: cost of 2 {{.*}} fptrunc <2 x double
+  %r81 = fptrunc <2 x double> undef to <2 x float>
+  ; CHECK: cost of 4 {{.*}} fptrunc <4 x double
+  %r82 = fptrunc <4 x double> undef to <4 x float>
+  ; CHECK: cost of 8 {{.*}} fptrunc <8 x double
+  %r83 = fptrunc <8 x double> undef to <8 x float>
+  ; CHECK: cost of 16 {{.*}} fptrunc <16 x double
+  %r84 = fptrunc <16 x double> undef to <16 x float>
+
+  ; Floating point extension costs.
+  ; CHECK: cost of 1 {{.*}} fpext float
+  %r85 = fpext float undef to double
+  ; CHECK: cost of 2 {{.*}} fpext <2 x float
+  %r86 = fpext <2 x float> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} fpext <4 x float
+  %r87 = fpext <4 x float> undef to <4 x double>
+  ; CHECK: cost of 8 {{.*}} fpext <8 x float
+  %r88 = fpext <8 x float> undef to <8 x double>
+  ; CHECK: cost of 16 {{.*}} fpext <16 x float
+  %r89 = fpext <16 x float> undef to <16 x double>
+
+  ;; Floating point to integer vector casts.
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r90 = fptoui <2 x float> undef to <2 x i1>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r91 = fptosi <2 x float> undef to <2 x i1>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r92 = fptoui <2 x float> undef to <2 x i8>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r93 = fptosi <2 x float> undef to <2 x i8>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r94 = fptoui <2 x float> undef to <2 x i16>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r95 = fptosi <2 x float> undef to <2 x i16>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r96 = fptoui <2 x float> undef to <2 x i32>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r97 = fptosi <2 x float> undef to <2 x i32>
+  ; CHECK: cost of 24 {{.*}} fptoui
+  %r98 = fptoui <2 x float> undef to <2 x i64>
+  ; CHECK: cost of 24 {{.*}} fptosi
+  %r99 = fptosi <2 x float> undef to <2 x i64>
+
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r100 = fptoui <2 x double> undef to <2 x i1>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r101 = fptosi <2 x double> undef to <2 x i1>
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r102 = fptoui <2 x double> undef to <2 x i8>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r103 = fptosi <2 x double> undef to <2 x i8>
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r104 = fptoui <2 x double> undef to <2 x i16>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r105 = fptosi <2 x double> undef to <2 x i16>
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r106 = fptoui <2 x double> undef to <2 x i32>
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r107 = fptosi <2 x double> undef to <2 x i32>
+  ; CHECK: cost of 24 {{.*}} fptoui
+  %r108 = fptoui <2 x double> undef to <2 x i64>
+  ; CHECK: cost of 24 {{.*}} fptosi
+  %r109 = fptosi <2 x double> undef to <2 x i64>
+
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r110 = fptoui <4 x float> undef to <4 x i1>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r111 = fptosi <4 x float> undef to <4 x i1>
+  ; CHECK: cost of 3 {{.*}} fptoui
+  %r112 = fptoui <4 x float> undef to <4 x i8>
+  ; CHECK: cost of 3 {{.*}} fptosi
+  %r113 = fptosi <4 x float> undef to <4 x i8>
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r114 = fptoui <4 x float> undef to <4 x i16>
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r115 = fptosi <4 x float> undef to <4 x i16>
+  ; CHECK: cost of 1 {{.*}} fptoui
+  %r116 = fptoui <4 x float> undef to <4 x i32>
+  ; CHECK: cost of 1 {{.*}} fptosi
+  %r117 = fptosi <4 x float> undef to <4 x i32>
+  ; CHECK: cost of 48 {{.*}} fptoui
+  %r118 = fptoui <4 x float> undef to <4 x i64>
+  ; CHECK: cost of 48 {{.*}} fptosi
+  %r119 = fptosi <4 x float> undef to <4 x i64>
+
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r120 = fptoui <4 x double> undef to <4 x i1>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r121 = fptosi <4 x double> undef to <4 x i1>
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r122 = fptoui <4 x double> undef to <4 x i8>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r123 = fptosi <4 x double> undef to <4 x i8>
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r124 = fptoui <4 x double> undef to <4 x i16>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r125 = fptosi <4 x double> undef to <4 x i16>
+  ; CHECK: cost of 16 {{.*}} fptoui
+  %r126 = fptoui <4 x double> undef to <4 x i32>
+  ; CHECK: cost of 16 {{.*}} fptosi
+  %r127 = fptosi <4 x double> undef to <4 x i32>
+  ; CHECK: cost of 48 {{.*}} fptoui
+  %r128 = fptoui <4 x double> undef to <4 x i64>
+  ; CHECK: cost of 48 {{.*}} fptosi
+  %r129 = fptosi <4 x double> undef to <4 x i64>
+
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r130 = fptoui <8 x float> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r131 = fptosi <8 x float> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r132 = fptoui <8 x float> undef to <8 x i8>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r133 = fptosi <8 x float> undef to <8 x i8>
+  ; CHECK: cost of 4 {{.*}} fptoui
+  %r134 = fptoui <8 x float> undef to <8 x i16>
+  ; CHECK: cost of 4 {{.*}} fptosi
+  %r135 = fptosi <8 x float> undef to <8 x i16>
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r136 = fptoui <8 x float> undef to <8 x i32>
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r137 = fptosi <8 x float> undef to <8 x i32>
+  ; CHECK: cost of 96 {{.*}} fptoui
+  %r138 = fptoui <8 x float> undef to <8 x i64>
+  ; CHECK: cost of 96 {{.*}} fptosi
+  %r139 = fptosi <8 x float> undef to <8 x i64>
+
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r140 = fptoui <8 x double> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r141 = fptosi <8 x double> undef to <8 x i1>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r142 = fptoui <8 x double> undef to <8 x i8>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r143 = fptosi <8 x double> undef to <8 x i8>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r144 = fptoui <8 x double> undef to <8 x i16>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r145 = fptosi <8 x double> undef to <8 x i16>
+  ; CHECK: cost of 32 {{.*}} fptoui
+  %r146 = fptoui <8 x double> undef to <8 x i32>
+  ; CHECK: cost of 32 {{.*}} fptosi
+  %r147 = fptosi <8 x double> undef to <8 x i32>
+  ; CHECK: cost of 96 {{.*}} fptoui
+  %r148 = fptoui <8 x double> undef to <8 x i64>
+  ; CHECK: cost of 96 {{.*}} fptosi
+  %r149 = fptosi <8 x double> undef to <8 x i64>
+
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r150 = fptoui <16 x float> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r151 = fptosi <16 x float> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r152 = fptoui <16 x float> undef to <16 x i8>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r153 = fptosi <16 x float> undef to <16 x i8>
+  ; CHECK: cost of 8 {{.*}} fptoui
+  %r154 = fptoui <16 x float> undef to <16 x i16>
+  ; CHECK: cost of 8 {{.*}} fptosi
+  %r155 = fptosi <16 x float> undef to <16 x i16>
+  ; CHECK: cost of 4 {{.*}} fptoui
+  %r156 = fptoui <16 x float> undef to <16 x i32>
+  ; CHECK: cost of 4 {{.*}} fptosi
+  %r157 = fptosi <16 x float> undef to <16 x i32>
+  ; CHECK: cost of 192 {{.*}} fptoui
+  %r158 = fptoui <16 x float> undef to <16 x i64>
+  ; CHECK: cost of 192 {{.*}} fptosi
+  %r159 = fptosi <16 x float> undef to <16 x i64>
+
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r160 = fptoui <16 x double> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r161 = fptosi <16 x double> undef to <16 x i1>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r162 = fptoui <16 x double> undef to <16 x i8>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r163 = fptosi <16 x double> undef to <16 x i8>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r164 = fptoui <16 x double> undef to <16 x i16>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r165 = fptosi <16 x double> undef to <16 x i16>
+  ; CHECK: cost of 64 {{.*}} fptoui
+  %r166 = fptoui <16 x double> undef to <16 x i32>
+  ; CHECK: cost of 64 {{.*}} fptosi
+  %r167 = fptosi <16 x double> undef to <16 x i32>
+  ; CHECK: cost of 192 {{.*}} fptoui
+  %r168 = fptoui <16 x double> undef to <16 x i64>
+  ; CHECK: cost of 192 {{.*}} fptosi
+  %r169 = fptosi <16 x double> undef to <16 x i64>
+
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r170 = uitofp <2 x i1> undef to <2 x float>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r171 = sitofp <2 x i1> undef to <2 x float>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r172 = uitofp <2 x i8> undef to <2 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r173 = sitofp <2 x i8> undef to <2 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r174 = uitofp <2 x i16> undef to <2 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r175 = sitofp <2 x i16> undef to <2 x float>
+  ; CHECK: cost of 1 {{.*}} uitofp
+  %r176 = uitofp <2 x i32> undef to <2 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %r177 = sitofp <2 x i32> undef to <2 x float>
+  ; CHECK: cost of 24 {{.*}} uitofp
+  %r178 = uitofp <2 x i64> undef to <2 x float>
+  ; CHECK: cost of 24 {{.*}} sitofp
+  %r179 = sitofp <2 x i64> undef to <2 x float>
+
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r180 = uitofp <2 x i1> undef to <2 x double>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r181 = sitofp <2 x i1> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r182 = uitofp <2 x i8> undef to <2 x double>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r183 = sitofp <2 x i8> undef to <2 x double>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r184 = uitofp <2 x i16> undef to <2 x double>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r185 = sitofp <2 x i16> undef to <2 x double>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r186 = uitofp <2 x i32> undef to <2 x double>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r187 = sitofp <2 x i32> undef to <2 x double>
+  ; CHECK: cost of 24 {{.*}} uitofp
+  %r188 = uitofp <2 x i64> undef to <2 x double>
+  ; CHECK: cost of 24 {{.*}} sitofp
+  %r189 = sitofp <2 x i64> undef to <2 x double>
+
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r190 = uitofp <4 x i1> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r191 = sitofp <4 x i1> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} uitofp
+  %r192 = uitofp <4 x i8> undef to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %r193 = sitofp <4 x i8> undef to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r194 = uitofp <4 x i16> undef to <4 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r195 = sitofp <4 x i16> undef to <4 x float>
+  ; CHECK: cost of 1 {{.*}} uitofp
+  %r196 = uitofp <4 x i32> undef to <4 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %r197 = sitofp <4 x i32> undef to <4 x float>
+  ; CHECK: cost of 48 {{.*}} uitofp
+  %r198 = uitofp <4 x i64> undef to <4 x float>
+  ; CHECK: cost of 48 {{.*}} sitofp
+  %r199 = sitofp <4 x i64> undef to <4 x float>
+
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r200 = uitofp <4 x i1> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r201 = sitofp <4 x i1> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r202 = uitofp <4 x i8> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r203 = sitofp <4 x i8> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r204 = uitofp <4 x i16> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r205 = sitofp <4 x i16> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} uitofp
+  %r206 = uitofp <4 x i32> undef to <4 x double>
+  ; CHECK: cost of 16 {{.*}} sitofp
+  %r207 = sitofp <4 x i32> undef to <4 x double>
+  ; CHECK: cost of 48 {{.*}} uitofp
+  %r208 = uitofp <4 x i64> undef to <4 x double>
+  ; CHECK: cost of 48 {{.*}} sitofp
+  %r209 = sitofp <4 x i64> undef to <4 x double>
+
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r210 = uitofp <8 x i1> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r211 = sitofp <8 x i1> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r212 = uitofp <8 x i8> undef to <8 x float>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r213 = sitofp <8 x i8> undef to <8 x float>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r214 = uitofp <8 x i16> undef to <8 x float>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r215 = sitofp <8 x i16> undef to <8 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r216 = uitofp <8 x i32> undef to <8 x float>
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r217 = sitofp <8 x i32> undef to <8 x float>
+  ; CHECK: cost of 96 {{.*}} uitofp
+  %r218 = uitofp <8 x i64> undef to <8 x float>
+  ; CHECK: cost of 96 {{.*}} sitofp
+  %r219 = sitofp <8 x i64> undef to <8 x float>
+
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r220 = uitofp <8 x i1> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r221 = sitofp <8 x i1> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r222 = uitofp <8 x i8> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r223 = sitofp <8 x i8> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r224 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r225 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} uitofp
+  %r226 = uitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 32 {{.*}} sitofp
+  %r227 = sitofp <8 x i16> undef to <8 x double>
+  ; CHECK: cost of 96 {{.*}} uitofp
+  %r228 = uitofp <8 x i64> undef to <8 x double>
+  ; CHECK: cost of 96 {{.*}} sitofp
+  %r229 = sitofp <8 x i64> undef to <8 x double>
+
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r230 = uitofp <16 x i1> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r231 = sitofp <16 x i1> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r232 = uitofp <16 x i8> undef to <16 x float>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r233 = sitofp <16 x i8> undef to <16 x float>
+  ; CHECK: cost of 8 {{.*}} uitofp
+  %r234 = uitofp <16 x i16> undef to <16 x float>
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %r235 = sitofp <16 x i16> undef to <16 x float>
+  ; CHECK: cost of 4 {{.*}} uitofp
+  %r236 = uitofp <16 x i32> undef to <16 x float>
+  ; CHECK: cost of 4 {{.*}} sitofp
+  %r237 = sitofp <16 x i32> undef to <16 x float>
+  ; CHECK: cost of 192 {{.*}} uitofp
+  %r238 = uitofp <16 x i64> undef to <16 x float>
+  ; CHECK: cost of 192 {{.*}} sitofp
+  %r239 = sitofp <16 x i64> undef to <16 x float>
+
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r240 = uitofp <16 x i1> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r241 = sitofp <16 x i1> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r242 = uitofp <16 x i8> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r243 = sitofp <16 x i8> undef to <16 x double>
+  ; C4ECK: cost of 64 {{.*}} uitofp
+  %r244 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r245 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} uitofp
+  %r246 = uitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 64 {{.*}} sitofp
+  %r247 = sitofp <16 x i16> undef to <16 x double>
+  ; CHECK: cost of 192 {{.*}} uitofp
+  %r248 = uitofp <16 x i64> undef to <16 x double>
+  ; CHECK: cost of 192 {{.*}} sitofp
+  %r249 = sitofp <16 x i64> undef to <16 x double>
+
+  ;CHECK: cost of 0 {{.*}} ret
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
new file mode 100644
index 000000000000..a63b87d2ad11
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -0,0 +1,43 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define void @test_geps() {
+  ; Cost of scalar integer geps should be one. We can't always expect it to be
+  ; folded into the instruction addressing mode.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8*
+  %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16*
+  %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32*
+  %a2 = getelementptr inbounds i32* undef, i32 0
+
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64*
+  %a3 = getelementptr inbounds i64* undef, i32 0
+
+  ; Cost of scalar floating point geps should be one. We cannot fold the address
+  ; computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float*
+  %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double*
+  %a5 = getelementptr inbounds double* undef, i32 0
+
+
+  ; Cost of vector geps should be one. We cannot fold the address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll
new file mode 100644
index 000000000000..f951b08f9baa
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/insertelement.ll
@@ -0,0 +1,46 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; Multiple insert elements from loads into d subregisters are expensive on swift
+; due to renaming constraints.
+%T_i8v = type <8 x i8>
+%T_i8 = type i8
+; CHECK: insertelement_i8
+define void @insertelement_i8(%T_i8* %saddr,
+                           %T_i8v* %vaddr) {
+  %v0 = load %T_i8v* %vaddr
+  %v1 = load %T_i8* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
+  %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
+  store %T_i8v %v2, %T_i8v* %vaddr
+  ret void
+}
+
+
+%T_i16v = type <4 x i16>
+%T_i16 = type i16
+; CHECK: insertelement_i16
+define void @insertelement_i16(%T_i16* %saddr,
+                           %T_i16v* %vaddr) {
+  %v0 = load %T_i16v* %vaddr
+  %v1 = load %T_i16* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
+  %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
+  store %T_i16v %v2, %T_i16v* %vaddr
+  ret void
+}
+
+%T_i32v = type <2 x i32>
+%T_i32 = type i32
+; CHECK: insertelement_i32
+define void @insertelement_i32(%T_i32* %saddr,
+                           %T_i32v* %vaddr) {
+  %v0 = load %T_i32v* %vaddr
+  %v1 = load %T_i32* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
+  %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
+  store %T_i32v %v2, %T_i32v* %vaddr
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/lit.local.cfg b/test/Analysis/CostModel/ARM/lit.local.cfg
new file mode 100644
index 000000000000..cb77b09ef4ad
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll
new file mode 100644
index 000000000000..34ed1eefdaf4
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/select.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: casts
+define void @casts() {
+    ; Scalar values
+  ; CHECK: cost of 1 {{.*}} select
+  %v1 = select i1 undef, i8 undef, i8 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v2 = select i1 undef, i16 undef, i16 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v3 = select i1 undef, i32 undef, i32 undef
+  ; CHECK: cost of 2 {{.*}} select
+  %v4 = select i1 undef, i64 undef, i64 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v5 = select i1 undef, float undef, float undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v6 = select i1 undef, double undef, double undef
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} select
+  %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v8 = select <4 x i1>  undef, <4 x i8> undef, <4 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v9 = select <8 x i1>  undef, <8 x i8> undef, <8 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v10 = select <16 x i1>  undef, <16 x i8> undef, <16 x i8> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v12 = select <4 x i1>  undef, <4 x i16> undef, <4 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v13 = select <8 x i1>  undef, <8 x i16> undef, <8 x i16> undef
+  ; CHECK: cost of 40 {{.*}} select
+  %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v15 = select <4 x i1>  undef, <4 x i32> undef, <4 x i32> undef
+  ; CHECK: cost of 41 {{.*}} select
+  %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
+  ; CHECK: cost of 82 {{.*}} select
+  %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+  ; CHECK: cost of 19 {{.*}} select
+  %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+  ; CHECK: cost of 50 {{.*}} select
+  %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+  ; CHECK: cost of 100 {{.*}} select
+  %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v18 = select <4 x i1>  undef, <4 x float> undef, <4 x float> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v19 = select <2 x i1>  undef, <2 x double> undef, <2 x double> undef
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/shuffle.ll b/test/Analysis/CostModel/ARM/shuffle.ll
new file mode 100644
index 000000000000..c92d66880464
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/shuffle.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: shuffle
+define void @shuffle() {
+
+
+  ;; Reverse shuffles should be lowered to vrev and possibly a vext (for
+  ;; quadwords)
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll
new file mode 100644
index 000000000000..f51963d56fde
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @insert(i32 %arg) {
+  ; CHECK: cost of 13 {{.*}} insertelement
+  %x = insertelement <4 x i32> undef, i32 %arg, i32 0
+  ret i32 undef
+}
+
+define i32 @extract(<4 x i32> %arg) {
+  ; CHECK: cost of 13 {{.*}} extractelement
+  %x = extractelement <4 x i32> %arg, i32 0
+  ret i32 %x
+}
+
diff --git a/test/Analysis/CostModel/PowerPC/lit.local.cfg b/test/Analysis/CostModel/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..4019eca0bb88
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
new file mode 100644
index 000000000000..c77cce955abf
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @stores(i32 %arg) {
+
+  ; CHECK: cost of 1 {{.*}} store
+  store i8 undef, i8* undef, align 4
+  ; CHECK: cost of 1 {{.*}} store
+  store i16 undef, i16* undef, align 4
+  ; CHECK: cost of 1 {{.*}} store
+  store i32 undef, i32* undef, align 4
+  ; CHECK: cost of 2 {{.*}} store
+  store i64 undef, i64* undef, align 4
+  ; CHECK: cost of 4 {{.*}} store
+  store i128 undef, i128* undef, align 4
+
+  ret i32 undef
+}
+define i32 @loads(i32 %arg) {
+  ; CHECK: cost of 1 {{.*}} load
+  load i8* undef, align 4
+  ; CHECK: cost of 1 {{.*}} load
+  load i16* undef, align 4
+  ; CHECK: cost of 1 {{.*}} load
+  load i32* undef, align 4
+  ; CHECK: cost of 2 {{.*}} load
+  load i64* undef, align 4
+  ; CHECK: cost of 4 {{.*}} load
+  load i128* undef, align 4
+
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 37cca8d54067..85b442533f41 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -1,4 +1,6 @@
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -14,7 +16,7 @@ define i32 @add(i32 %arg) {
   %D = add <4 x i64> undef, undef
   ;CHECK: cost of 8 {{.*}} add
   %E = add <8 x i64> undef, undef
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
@@ -28,11 +30,41 @@ define i32 @xor(i32 %arg) {
   %C = xor <2 x i64> undef, undef
   ;CHECK: cost of 1 {{.*}} xor
   %D = xor <4 x i64> undef, undef
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
+; CHECK: mul
+define void @mul() {
+  ; A <2 x i32> gets expanded to a <2 x i64> vector.
+  ; A <2 x i64> vector multiply is implemented using
+  ; 3 PMULUDQ and 2 PADDS and 4 shifts.
+  ;CHECK: cost of 9 {{.*}} mul
+  %A0 = mul <2 x i32> undef, undef
+  ;CHECK: cost of 9 {{.*}} mul
+  %A1 = mul <2 x i64> undef, undef
+  ;CHECK: cost of 18 {{.*}} mul
+  %A2 = mul <4 x i64> undef, undef
+  ret void
+}
+
+; SSE3: sse3mull
+define void @sse3mull() {
+  ; SSE3: cost of 6 {{.*}} mul
+  %A0 = mul <4 x i32> undef, undef
+  ret void
+  ; SSE3: avx2mull
+}
+
+; AVX2: avx2mull
+define void @avx2mull() {
+  ; AVX2: cost of 9 {{.*}} mul
+  %A0 = mul <4 x i64> undef, undef
+  ret void
+  ; AVX2: fmul
+}
 
+; CHECK: fmul
 define i32 @fmul(i32 %arg) {
   ;CHECK: cost of 1 {{.*}} fmul
   %A = fmul <4 x float> undef, undef
@@ -40,3 +72,57 @@ define i32 @fmul(i32 %arg) {
   %B = fmul <8 x float> undef, undef
   ret i32 undef
 }
+
+; AVX: shift
+; AVX2: shift
+define void @shift() {
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A0 = shl <4 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A1 = shl <2 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B0 = lshr <4 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B1 = lshr <2 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} ashr
+  ; AVX2: cost of 1 {{.*}} ashr
+  %C0 = ashr <4 x i32> undef, undef
+  ; AVX: cost of 6 {{.*}} ashr
+  ; AVX2: cost of 20 {{.*}} ashr
+  %C1 = ashr <2 x i64> undef, undef
+
+  ret void
+}
+
+; AVX: avx2shift
+; AVX2: avx2shift
+define void @avx2shift() {
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A0 = shl <8 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} shl
+  ; AVX2: cost of 1 {{.*}} shl
+  %A1 = shl <4 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B0 = lshr <8 x i32> undef, undef
+  ; AVX: cost of 2 {{.*}} lshr
+  ; AVX2: cost of 1 {{.*}} lshr
+  %B1 = lshr <4 x i64> undef, undef
+
+  ; AVX: cost of 2 {{.*}} ashr
+  ; AVX2: cost of 1 {{.*}} ashr
+  %C0 = ashr <8 x i32> undef, undef
+  ; AVX: cost of 12 {{.*}} ashr
+  ; AVX2: cost of 40 {{.*}} ashr
+  %C1 = ashr <4 x i64> undef, undef
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index 75c97a781e7f..b69b3bf6304c 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -28,7 +28,7 @@ define i32 @add(i32 %arg) {
   ;CHECK: cost of 0 {{.*}} trunc
   %H = trunc i32 undef to i1
 
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
@@ -44,6 +44,10 @@ define i32 @zext_sext(<8 x i1> %in) {
   %B = zext <8 x i16> undef to <8 x i32>
   ;CHECK: cost of 1 {{.*}} sext
   %C = sext <4 x i32> undef to <4 x i64>
+  ;CHECK: cost of 6 {{.*}} sext
+  %C1 = sext <4 x i8> undef to <4 x i64>
+  ;CHECK: cost of 6 {{.*}} sext
+  %C2 = sext <4 x i16> undef to <4 x i64>
 
   ;CHECK: cost of 1 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
@@ -59,7 +63,7 @@ define i32 @zext_sext(<8 x i1> %in) {
   ret i32 undef
 }
 
-define i32 @masks(<8 x i1> %in) {
+define i32 @masks8(<8 x i1> %in) {
   ;CHECK: cost of 6 {{.*}} zext
   %Z = zext <8 x i1> %in to <8 x i32>
   ;CHECK: cost of 9 {{.*}} sext
@@ -67,3 +71,84 @@ define i32 @masks(<8 x i1> %in) {
   ret i32 undef
 }
 
+define i32 @masks4(<4 x i1> %in) {
+  ;CHECK: cost of 8 {{.*}} sext
+  %S = sext <4 x i1> %in to <4 x i64>
+  ret i32 undef
+}
+
+define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %A1 = sitofp <4 x i1> %a to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %A2 = sitofp <4 x i1> %a to <4 x double>
+
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %B1 = sitofp <4 x i8> %b to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %B2 = sitofp <4 x i8> %b to <4 x double>
+
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %C1 = sitofp <4 x i16> %c to <4 x float>
+  ; CHECK: cost of 3 {{.*}} sitofp
+  %C2 = sitofp <4 x i16> %c to <4 x double>
+
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D1 = sitofp <4 x i32> %d to <4 x float>
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D2 = sitofp <4 x i32> %d to <4 x double>
+  ret void
+}
+
+define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %A1 = sitofp <8 x i1> %a to <8 x float>
+
+  ; CHECK: cost of 8 {{.*}} sitofp
+  %B1 = sitofp <8 x i8> %b to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} sitofp
+  %C1 = sitofp <8 x i16> %c to <8 x float>
+
+  ; CHECK: cost of 1 {{.*}} sitofp
+  %D1 = sitofp <8 x i32> %d to <8 x float>
+  ret void
+}
+
+define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
+  ; CHECK: cost of 7 {{.*}} uitofp
+  %A1 = uitofp <4 x i1> %a to <4 x float>
+  ; CHECK: cost of 7 {{.*}} uitofp
+  %A2 = uitofp <4 x i1> %a to <4 x double>
+
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %B1 = uitofp <4 x i8> %b to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %B2 = uitofp <4 x i8> %b to <4 x double>
+
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %C1 = uitofp <4 x i16> %c to <4 x float>
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %C2 = uitofp <4 x i16> %c to <4 x double>
+
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %D1 = uitofp <4 x i32> %d to <4 x float>
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %D2 = uitofp <4 x i32> %d to <4 x double>
+  ret void
+}
+
+define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
+  ; CHECK: cost of 6 {{.*}} uitofp
+  %A1 = uitofp <8 x i1> %a to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} uitofp
+  %B1 = uitofp <8 x i8> %b to <8 x float>
+
+  ; CHECK: cost of 5 {{.*}} uitofp
+  %C1 = uitofp <8 x i16> %c to <8 x float>
+
+  ; CHECK: cost of 9 {{.*}} uitofp
+  %D1 = uitofp <8 x i32> %d to <8 x float>
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll
index f868bd18b54f..713b3742e920 100644
--- a/test/Analysis/CostModel/X86/cmp.ll
+++ b/test/Analysis/CostModel/X86/cmp.ll
@@ -1,41 +1,55 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=AVX1 %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=AVX2 %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 define i32 @cmp(i32 %arg) {
   ;  -- floats --
-  ;CHECK: cost of 1 {{.*}} fcmp
+  ;AVX1: cost of 1 {{.*}} fcmp
+  ;AVX2: cost of 1 {{.*}} fcmp
   %A = fcmp olt <2 x float> undef, undef
-  ;CHECK: cost of 1 {{.*}} fcmp
+  ;AVX1: cost of 1 {{.*}} fcmp
+  ;AVX2: cost of 1 {{.*}} fcmp
   %B = fcmp olt <4 x float> undef, undef
-  ;CHECK: cost of 1 {{.*}} fcmp
+  ;AVX1: cost of 1 {{.*}} fcmp
+  ;AVX2: cost of 1 {{.*}} fcmp
   %C = fcmp olt <8 x float> undef, undef
-  ;CHECK: cost of 1 {{.*}} fcmp
+  ;AVX1: cost of 1 {{.*}} fcmp
+  ;AVX2: cost of 1 {{.*}} fcmp
   %D = fcmp olt <2 x double> undef, undef
-  ;CHECK: cost of 1 {{.*}} fcmp
+  ;AVX1: cost of 1 {{.*}} fcmp
+  ;AVX2: cost of 1 {{.*}} fcmp
   %E = fcmp olt <4 x double> undef, undef
 
   ;  -- integers --
 
-  ;CHECK: cost of 1 {{.*}} icmp
+  ;AVX1: cost of 1 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %F = icmp eq <16 x i8> undef, undef
-  ;CHECK: cost of 1 {{.*}} icmp
+  ;AVX1: cost of 1 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %G = icmp eq <8 x i16> undef, undef
-  ;CHECK: cost of 1 {{.*}} icmp
+  ;AVX1: cost of 1 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %H = icmp eq <4 x i32> undef, undef
-  ;CHECK: cost of 1 {{.*}} icmp
+  ;AVX1: cost of 1 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %I = icmp eq <2 x i64> undef, undef
-  ;CHECK: cost of 4 {{.*}} icmp
+  ;AVX1: cost of 4 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %J = icmp eq <4 x i64> undef, undef
-  ;CHECK: cost of 4 {{.*}} icmp
+  ;AVX1: cost of 4 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %K = icmp eq <8 x i32> undef, undef
-  ;CHECK: cost of 4 {{.*}} icmp
+  ;AVX1: cost of 4 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %L = icmp eq <16 x i16> undef, undef
-  ;CHECK: cost of 4 {{.*}} icmp
+  ;AVX1: cost of 4 {{.*}} icmp
+  ;AVX2: cost of 1 {{.*}} icmp
   %M = icmp eq <32 x i8> undef, undef
 
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/gep.ll b/test/Analysis/CostModel/X86/gep.ll
new file mode 100644
index 000000000000..877184a3eaa8
--- /dev/null
+++ b/test/Analysis/CostModel/X86/gep.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+
+define void @test_geps() {
+  ; Cost of should be zero. We expect it to be folded into
+  ; the instruction addressing mode.
+;CHECK:  cost of 0 for instruction: {{.*}} getelementptr inbounds i8*
+  %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16*
+  %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32*
+  %a2 = getelementptr inbounds i32* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64*
+  %a3 = getelementptr inbounds i64* undef, i32 0
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float*
+  %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double*
+  %a5 = getelementptr inbounds double* undef, i32 0
+
+ ; Vector geps should also have zero cost.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/i32.ll b/test/Analysis/CostModel/X86/i32.ll
index 4015e0b1eef4..c2dce762a091 100644
--- a/test/Analysis/CostModel/X86/i32.ll
+++ b/test/Analysis/CostModel/X86/i32.ll
@@ -1,8 +1,6 @@
 ; RUN: opt < %s  -cost-model -analyze -mtriple=i386 -mcpu=corei7-avx | FileCheck %s
 
-
-;CHECK: cost of 2 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+;CHECK: cost of 0 {{.*}} ret
 define i32 @no_info(i32 %arg) {
   %e = add i64 undef, undef
   ret i32 undef
diff --git a/test/Analysis/CostModel/X86/intrinsic-cost.ll b/test/Analysis/CostModel/X86/intrinsic-cost.ll
new file mode 100644
index 000000000000..e235a36222a7
--- /dev/null
+++ b/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck %s -check-prefix=CORE2
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=corei7 -cost-model -analyze < %s | FileCheck %s -check-prefix=COREI7
+
+; If SSE4.1 roundps instruction is available it is cheap to lower, otherwise
+; it'll be scalarized into calls which are expensive.
+define void @test1(float* nocapture %f) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float* %f, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+  store <4 x float> %2, <4 x float>* %1, align 4
+  %index.next = add i64 %index, 4
+  %3 = icmp eq i64 %index.next, 1024
+  br i1 %3, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1':
+; CORE2: Cost Model: Found an estimated cost of 400 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+
+; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1':
+; COREI7: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)  nounwind readnone
diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll
new file mode 100644
index 000000000000..4195b1d879a1
--- /dev/null
+++ b/test/Analysis/CostModel/X86/load_store.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @stores(i32 %arg) {
+
+  ;CHECK: cost of 1 {{.*}} store
+  store i8 undef, i8* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store i16 undef, i16* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store i32 undef, i32* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store i64 undef, i64* undef, align 4
+  ;CHECK: cost of 2 {{.*}} store
+  store i128 undef, i128* undef, align 4
+
+  ;CHECK: cost of 1 {{.*}} store
+  store <4 x i16> undef, <4 x i16>* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store <4 x i32> undef, <4 x i32>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} store
+  store <4 x i64> undef, <4 x i64>* undef, align 4
+
+  ;CHECK: cost of 1 {{.*}} store
+  store <8 x i16> undef, <8 x i16>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} store
+  store <8 x i32> undef, <8 x i32>* undef, align 4
+  ;CHECK: cost of 4 {{.*}} store
+  store <8 x i64> undef, <8 x i64>* undef, align 4
+
+  ret i32 undef
+}
+define i32 @loads(i32 %arg) {
+  ;CHECK: cost of 1 {{.*}} load
+  load i8* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load i16* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load i32* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load i64* undef, align 4
+  ;CHECK: cost of 2 {{.*}} load
+  load i128* undef, align 4
+
+  ;CHECK: cost of 1 {{.*}} load
+  load <2 x i32>* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load <4 x i32>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} load
+  load <8 x i32>* undef, align 4
+
+
+  ;CHECK: cost of 1 {{.*}} load
+  load <2 x i64>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} load
+  load <4 x i64>* undef, align 4
+  ;CHECK: cost of 4 {{.*}} load
+  load <8 x i64>* undef, align 4
+
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll
new file mode 100644
index 000000000000..f35eea87164c
--- /dev/null
+++ b/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -0,0 +1,531 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+%shifttype = type <2 x i16>
+define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
+entry:
+  ; SSE2: shift2i16
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i16
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype %a , %b
+  ret %shifttype %0
+}
+
+%shifttype4i16 = type <4 x i16>
+define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
+entry:
+  ; SSE2: shift4i16
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i16
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype4i16 %a , %b
+  ret %shifttype4i16 %0
+}
+
+%shifttype8i16 = type <8 x i16>
+define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
+entry:
+  ; SSE2: shift8i16
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i16
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype8i16 %a , %b
+  ret %shifttype8i16 %0
+}
+
+%shifttype16i16 = type <16 x i16>
+define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
+entry:
+  ; SSE2: shift16i16
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i16
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype16i16 %a , %b
+  ret %shifttype16i16 %0
+}
+
+%shifttype32i16 = type <32 x i16>
+define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
+entry:
+  ; SSE2: shift32i16
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i16
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype32i16 %a , %b
+  ret %shifttype32i16 %0
+}
+
+%shifttype2i32 = type <2 x i32>
+define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
+entry:
+  ; SSE2: shift2i32
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i32
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype2i32 %a , %b
+  ret %shifttype2i32 %0
+}
+
+%shifttype4i32 = type <4 x i32>
+define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
+entry:
+  ; SSE2: shift4i32
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype4i32 %a , %b
+  ret %shifttype4i32 %0
+}
+
+%shifttype8i32 = type <8 x i32>
+define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
+entry:
+  ; SSE2: shift8i32
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype8i32 %a , %b
+  ret %shifttype8i32 %0
+}
+
+%shifttype16i32 = type <16 x i32>
+define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
+entry:
+  ; SSE2: shift16i32
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype16i32 %a , %b
+  ret %shifttype16i32 %0
+}
+
+%shifttype32i32 = type <32 x i32>
+define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
+entry:
+  ; SSE2: shift32i32
+  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i32
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype32i32 %a , %b
+  ret %shifttype32i32 %0
+}
+
+%shifttype2i64 = type <2 x i64>
+define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
+entry:
+  ; SSE2: shift2i64
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype2i64 %a , %b
+  ret %shifttype2i64 %0
+}
+
+%shifttype4i64 = type <4 x i64>
+define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
+entry:
+  ; SSE2: shift4i64
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype4i64 %a , %b
+  ret %shifttype4i64 %0
+}
+
+%shifttype8i64 = type <8 x i64>
+define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
+entry:
+  ; SSE2: shift8i64
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype8i64 %a , %b
+  ret %shifttype8i64 %0
+}
+
+%shifttype16i64 = type <16 x i64>
+define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
+entry:
+  ; SSE2: shift16i64
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype16i64 %a , %b
+  ret %shifttype16i64 %0
+}
+
+%shifttype32i64 = type <32 x i64>
+define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
+entry:
+  ; SSE2: shift32i64
+  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i64
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype32i64 %a , %b
+  ret %shifttype32i64 %0
+}
+
+%shifttype2i8 = type <2 x i8>
+define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
+entry:
+  ; SSE2: shift2i8
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i8
+  ; SSE2-CODEGEN: sarq %cl
+
+  %0 = ashr %shifttype2i8 %a , %b
+  ret %shifttype2i8 %0
+}
+
+%shifttype4i8 = type <4 x i8>
+define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
+entry:
+  ; SSE2: shift4i8
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i8
+  ; SSE2-CODEGEN: sarl %cl
+
+  %0 = ashr %shifttype4i8 %a , %b
+  ret %shifttype4i8 %0
+}
+
+%shifttype8i8 = type <8 x i8>
+define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
+entry:
+  ; SSE2: shift8i8
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i8
+  ; SSE2-CODEGEN: sarw %cl
+
+  %0 = ashr %shifttype8i8 %a , %b
+  ret %shifttype8i8 %0
+}
+
+%shifttype16i8 = type <16 x i8>
+define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
+entry:
+  ; SSE2: shift16i8
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i8
+  ; SSE2-CODEGEN: sarb %cl
+
+  %0 = ashr %shifttype16i8 %a , %b
+  ret %shifttype16i8 %0
+}
+
+%shifttype32i8 = type <32 x i8>
+define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
+entry:
+  ; SSE2: shift32i8
+  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i8
+  ; SSE2-CODEGEN: sarb %cl
+
+  %0 = ashr %shifttype32i8 %a , %b
+  ret %shifttype32i8 %0
+}
+
+; Test shift by a constant a value.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+  ; SSE2: shift2i16const
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i16const
+  ; SSE2-CODEGEN: sarq $
+
+  %0 = ashr %shifttypec %a , <i16 3, i16 3>
+  ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+  ; SSE2: shift4i16const
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i16const
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+  ; SSE2: shift8i16const
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i16const
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                  i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+                                         %shifttypec16i16 %b) {
+entry:
+  ; SSE2: shift16i16const
+  ; SSE2: cost of 2 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i16const
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+                                        %shifttypec32i16 %b) {
+entry:
+  ; SSE2: shift32i16const
+  ; SSE2: cost of 4 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i16const
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+  ; SSE2: shift2i32c
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i32c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3>
+  ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+  ; SSE2: shift4i32c
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i32c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+  ; SSE2: shift8i32c
+  ; SSE2: cost of 2 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i32c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                  i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+  ; SSE2: shift16i32c
+  ; SSE2: cost of 4 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i32c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+  ; SSE2: shift32i32c
+  ; getTypeConversion fails here and promotes this to a i64.
+  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i32c
+  ; SSE2-CODEGEN: psrad $3
+  %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+  ; SSE2: shift2i64c
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3>
+  ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+  ; SSE2: shift4i64c
+  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+  ; SSE2: shift8i64c
+  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i64c
+  ; SSE2-CODEGEN: sarq $3
+
+ %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                 i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+  ; SSE2: shift16i64c
+  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+  ; SSE2: shift32i64c
+  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i64c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+  ; SSE2: shift2i8c
+  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2-CODEGEN: shift2i8c
+  ; SSE2-CODEGEN: sarq $3
+
+  %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3>
+  ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+  ; SSE2: shift4i8c
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift4i8c
+  ; SSE2-CODEGEN: psrad $3
+
+  %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+  ; SSE2: shift8i8c
+  ; SSE2: cost of 1 {{.*}} ashr
+  ; SSE2-CODEGEN: shift8i8c
+  ; SSE2-CODEGEN: psraw $3
+
+  %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                 i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+  ; SSE2: shift16i8c
+  ; SSE2: cost of 4 {{.*}} ashr
+  ; SSE2-CODEGEN: shift16i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+  ; SSE2: shift32i8c
+  ; SSE2: cost of 8 {{.*}} ashr
+  ; SSE2-CODEGEN: shift32i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec32i8 %0
+}
+
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
new file mode 100644
index 000000000000..8d6ef3874208
--- /dev/null
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -0,0 +1,530 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+%shifttype = type <2 x i16>
+define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
+entry:
+  ; SSE2: shift2i16
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i16
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype %a , %b
+  ret %shifttype %0
+}
+
+%shifttype4i16 = type <4 x i16>
+define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
+entry:
+  ; SSE2: shift4i16
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype4i16 %a , %b
+  ret %shifttype4i16 %0
+}
+
+%shifttype8i16 = type <8 x i16>
+define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
+entry:
+  ; SSE2: shift8i16
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype8i16 %a , %b
+  ret %shifttype8i16 %0
+}
+
+%shifttype16i16 = type <16 x i16>
+define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
+entry:
+  ; SSE2: shift16i16
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype16i16 %a , %b
+  ret %shifttype16i16 %0
+}
+
+%shifttype32i16 = type <32 x i16>
+define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
+entry:
+  ; SSE2: shift32i16
+  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i16
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype32i16 %a , %b
+  ret %shifttype32i16 %0
+}
+
+%shifttype2i32 = type <2 x i32>
+define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
+entry:
+  ; SSE2: shift2i32
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i32
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype2i32 %a , %b
+  ret %shifttype2i32 %0
+}
+
+%shifttype4i32 = type <4 x i32>
+define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
+entry:
+  ; SSE2: shift4i32
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype4i32 %a , %b
+  ret %shifttype4i32 %0
+}
+
+%shifttype8i32 = type <8 x i32>
+define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
+entry:
+  ; SSE2: shift8i32
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype8i32 %a , %b
+  ret %shifttype8i32 %0
+}
+
+%shifttype16i32 = type <16 x i32>
+define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
+entry:
+  ; SSE2: shift16i32
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype16i32 %a , %b
+  ret %shifttype16i32 %0
+}
+
+%shifttype32i32 = type <32 x i32>
+define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
+entry:
+  ; SSE2: shift32i32
+  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i32
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype32i32 %a , %b
+  ret %shifttype32i32 %0
+}
+
+%shifttype2i64 = type <2 x i64>
+define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
+entry:
+  ; SSE2: shift2i64
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype2i64 %a , %b
+  ret %shifttype2i64 %0
+}
+
+%shifttype4i64 = type <4 x i64>
+define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
+entry:
+  ; SSE2: shift4i64
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype4i64 %a , %b
+  ret %shifttype4i64 %0
+}
+
+%shifttype8i64 = type <8 x i64>
+define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
+entry:
+  ; SSE2: shift8i64
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype8i64 %a , %b
+  ret %shifttype8i64 %0
+}
+
+%shifttype16i64 = type <16 x i64>
+define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
+entry:
+  ; SSE2: shift16i64
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype16i64 %a , %b
+  ret %shifttype16i64 %0
+}
+
+%shifttype32i64 = type <32 x i64>
+define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
+entry:
+  ; SSE2: shift32i64
+  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i64
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype32i64 %a , %b
+  ret %shifttype32i64 %0
+}
+
+%shifttype2i8 = type <2 x i8>
+define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
+entry:
+  ; SSE2: shift2i8
+  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i8
+  ; SSE2-CODEGEN: shrq %cl
+
+  %0 = lshr %shifttype2i8 %a , %b
+  ret %shifttype2i8 %0
+}
+
+%shifttype4i8 = type <4 x i8>
+define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
+entry:
+  ; SSE2: shift4i8
+  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i8
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype4i8 %a , %b
+  ret %shifttype4i8 %0
+}
+
+%shifttype8i8 = type <8 x i8>
+define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
+entry:
+  ; SSE2: shift8i8
+  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i8
+  ; SSE2-CODEGEN: shrl %cl
+
+  %0 = lshr %shifttype8i8 %a , %b
+  ret %shifttype8i8 %0
+}
+
+%shifttype16i8 = type <16 x i8>
+define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
+entry:
+  ; SSE2: shift16i8
+  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i8
+  ; SSE2-CODEGEN: shrb %cl
+
+  %0 = lshr %shifttype16i8 %a , %b
+  ret %shifttype16i8 %0
+}
+
+%shifttype32i8 = type <32 x i8>
+define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
+entry:
+  ; SSE2: shift32i8
+  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i8
+  ; SSE2-CODEGEN: shrb %cl
+
+  %0 = lshr %shifttype32i8 %a , %b
+  ret %shifttype32i8 %0
+}
+
+; Test shift by a constant vector.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+  ; SSE2: shift2i16const
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i16const
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec %a , <i16 3, i16 3>
+  ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+  ; SSE2: shift4i16const
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i16const
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+  ; SSE2: shift8i16const
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i16const
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                  i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+                                         %shifttypec16i16 %b) {
+entry:
+  ; SSE2: shift16i16const
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i16const
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+                                        %shifttypec32i16 %b) {
+entry:
+  ; SSE2: shift32i16const
+  ; SSE2: cost of 4 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i16const
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+  ; SSE2: shift2i32c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i32c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec2i32 %a , <i32 3, i32 3>
+  ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+  ; SSE2: shift4i32c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i32c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+  ; SSE2: shift8i32c
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i32c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                  i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+  ; SSE2: shift16i32c
+  ; SSE2: cost of 4 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i32c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+  ; SSE2: shift32i32c
+  ; getTypeConversion fails here and promotes this to a i64.
+  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i32c
+  ; SSE2-CODEGEN: psrld $3
+  %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+  ; SSE2: shift2i64c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec2i64 %a , <i64 3, i64 3>
+  ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+  ; SSE2: shift4i64c
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+  ; SSE2: shift8i64c
+  ; SSE2: cost of 4 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+ %0 = lshr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                 i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+  ; SSE2: shift16i64c
+  ; SSE2: cost of 8 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+  ; SSE2: shift32i64c
+  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i64c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+  ; SSE2: shift2i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift2i8c
+  ; SSE2-CODEGEN: psrlq $3
+
+  %0 = lshr %shifttypec2i8 %a , <i8 3, i8 3>
+  ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+  ; SSE2: shift4i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift4i8c
+  ; SSE2-CODEGEN: psrld $3
+
+  %0 = lshr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+  ; SSE2: shift8i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift8i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                 i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+  ; SSE2: shift16i8c
+  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2-CODEGEN: shift16i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+  ; SSE2: shift32i8c
+  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2-CODEGEN: shift32i8c
+  ; SSE2-CODEGEN: psrlw $3
+
+  %0 = lshr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec32i8 %0
+}
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
new file mode 100644
index 000000000000..f45a69879210
--- /dev/null
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -0,0 +1,530 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+%shifttype = type <2 x i16>
+define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
+entry:
+  ; SSE2: shift2i16
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i16
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype %a , %b
+  ret %shifttype %0
+}
+
+%shifttype4i16 = type <4 x i16>
+define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
+entry:
+  ; SSE2: shift4i16
+  ; SSE2: cost of 10 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i16
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype4i16 %a , %b
+  ret %shifttype4i16 %0
+}
+
+%shifttype8i16 = type <8 x i16>
+define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
+entry:
+  ; SSE2: shift8i16
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i16
+  ; SSE2-CODEGEN: shll %cl
+
+  %0 = shl %shifttype8i16 %a , %b
+  ret %shifttype8i16 %0
+}
+
+%shifttype16i16 = type <16 x i16>
+define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
+entry:
+  ; SSE2: shift16i16
+  ; SSE2: cost of 160 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i16
+  ; SSE2-CODEGEN: shll %cl
+
+  %0 = shl %shifttype16i16 %a , %b
+  ret %shifttype16i16 %0
+}
+
+%shifttype32i16 = type <32 x i16>
+define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
+entry:
+  ; SSE2: shift32i16
+  ; SSE2: cost of 320 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i16
+  ; SSE2-CODEGEN: shll %cl
+
+  %0 = shl %shifttype32i16 %a , %b
+  ret %shifttype32i16 %0
+}
+
+%shifttype2i32 = type <2 x i32>
+define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
+entry:
+  ; SSE2: shift2i32
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i32
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype2i32 %a , %b
+  ret %shifttype2i32 %0
+}
+
+%shifttype4i32 = type <4 x i32>
+define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
+entry:
+  ; SSE2: shift4i32
+  ; SSE2: cost of 10 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype4i32 %a , %b
+  ret %shifttype4i32 %0
+}
+
+%shifttype8i32 = type <8 x i32>
+define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
+entry:
+  ; SSE2: shift8i32
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype8i32 %a , %b
+  ret %shifttype8i32 %0
+}
+
+%shifttype16i32 = type <16 x i32>
+define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
+entry:
+  ; SSE2: shift16i32
+  ; SSE2: cost of 40 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype16i32 %a , %b
+  ret %shifttype16i32 %0
+}
+
+%shifttype32i32 = type <32 x i32>
+define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
+entry:
+  ; SSE2: shift32i32
+  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i32
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype32i32 %a , %b
+  ret %shifttype32i32 %0
+}
+
+%shifttype2i64 = type <2 x i64>
+define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
+entry:
+  ; SSE2: shift2i64
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype2i64 %a , %b
+  ret %shifttype2i64 %0
+}
+
+%shifttype4i64 = type <4 x i64>
+define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
+entry:
+  ; SSE2: shift4i64
+  ; SSE2: cost of 40 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype4i64 %a , %b
+  ret %shifttype4i64 %0
+}
+
+%shifttype8i64 = type <8 x i64>
+define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
+entry:
+  ; SSE2: shift8i64
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype8i64 %a , %b
+  ret %shifttype8i64 %0
+}
+
+%shifttype16i64 = type <16 x i64>
+define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
+entry:
+  ; SSE2: shift16i64
+  ; SSE2: cost of 160 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype16i64 %a , %b
+  ret %shifttype16i64 %0
+}
+
+%shifttype32i64 = type <32 x i64>
+define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
+entry:
+  ; SSE2: shift32i64
+  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i64
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype32i64 %a , %b
+  ret %shifttype32i64 %0
+}
+
+%shifttype2i8 = type <2 x i8>
+define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
+entry:
+  ; SSE2: shift2i8
+  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i8
+  ; SSE2-CODEGEN: shlq %cl
+
+  %0 = shl %shifttype2i8 %a , %b
+  ret %shifttype2i8 %0
+}
+
+%shifttype4i8 = type <4 x i8>
+define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
+entry:
+  ; SSE2: shift4i8
+  ; SSE2: cost of 10 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i8
+  ; SSE2-CODEGEN: pmuludq
+
+  %0 = shl %shifttype4i8 %a , %b
+  ret %shifttype4i8 %0
+}
+
+%shifttype8i8 = type <8 x i8>
+define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
+entry:
+  ; SSE2: shift8i8
+  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i8
+  ; SSE2-CODEGEN: shll
+
+  %0 = shl %shifttype8i8 %a , %b
+  ret %shifttype8i8 %0
+}
+
+%shifttype16i8 = type <16 x i8>
+define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
+entry:
+  ; SSE2: shift16i8
+  ; SSE2: cost of 30 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i8
+  ; SSE2-CODEGEN: cmpeqb
+
+  %0 = shl %shifttype16i8 %a , %b
+  ret %shifttype16i8 %0
+}
+
+%shifttype32i8 = type <32 x i8>
+define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
+entry:
+  ; SSE2: shift32i8
+  ; SSE2: cost of 60 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i8
+  ; SSE2-CODEGEN: cmpeqb
+
+  %0 = shl %shifttype32i8 %a , %b
+  ret %shifttype32i8 %0
+}
+
+; Test shift by a constant vector.
+
+%shifttypec = type <2 x i16>
+define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
+entry:
+  ; SSE2: shift2i16const
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i16const
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec %a , <i16 3, i16 3>
+  ret %shifttypec %0
+}
+
+%shifttypec4i16 = type <4 x i16>
+define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
+entry:
+  ; SSE2: shift4i16const
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i16const
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec4i16 %0
+}
+
+%shifttypec8i16 = type <8 x i16>
+define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
+entry:
+  ; SSE2: shift8i16const
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i16const
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                  i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec8i16 %0
+}
+
+%shifttypec16i16 = type <16 x i16>
+define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
+                                         %shifttypec16i16 %b) {
+entry:
+  ; SSE2: shift16i16const
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i16const
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec16i16 %0
+}
+
+%shifttypec32i16 = type <32 x i16>
+define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
+                                        %shifttypec32i16 %b) {
+entry:
+  ; SSE2: shift32i16const
+  ; SSE2: cost of 4 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i16const
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3,
+                                   i16 3, i16 3, i16 3, i16 3>
+  ret %shifttypec32i16 %0
+}
+
+%shifttypec2i32 = type <2 x i32>
+define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
+entry:
+  ; SSE2: shift2i32c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i32c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec2i32 %a , <i32 3, i32 3>
+  ret %shifttypec2i32 %0
+}
+
+%shifttypec4i32 = type <4 x i32>
+define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
+entry:
+  ; SSE2: shift4i32c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i32c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec4i32 %0
+}
+
+%shifttypec8i32 = type <8 x i32>
+define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
+entry:
+  ; SSE2: shift8i32c
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i32c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                  i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec8i32 %0
+}
+
+%shifttypec16i32 = type <16 x i32>
+define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
+entry:
+  ; SSE2: shift16i32c
+  ; SSE2: cost of 4 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i32c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec16i32 %0
+}
+
+%shifttypec32i32 = type <32 x i32>
+define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
+entry:
+  ; SSE2: shift32i32c
+  ; getTypeConversion fails here and promotes this to a i64.
+  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i32c
+  ; SSE2-CODEGEN: pslld $3
+  %0 = shl %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3,
+                                   i32 3, i32 3, i32 3, i32 3>
+  ret %shifttypec32i32 %0
+}
+
+%shifttypec2i64 = type <2 x i64>
+define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
+entry:
+  ; SSE2: shift2i64c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec2i64 %a , <i64 3, i64 3>
+  ret %shifttypec2i64 %0
+}
+
+%shifttypec4i64 = type <4 x i64>
+define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
+entry:
+  ; SSE2: shift4i64c
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec4i64 %0
+}
+
+%shifttypec8i64 = type <8 x i64>
+define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
+entry:
+  ; SSE2: shift8i64c
+  ; SSE2: cost of 4 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i64c
+  ; SSE2-CODEGEN: psllq $3
+
+ %0 = shl %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                 i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec8i64 %0
+}
+
+%shifttypec16i64 = type <16 x i64>
+define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
+entry:
+  ; SSE2: shift16i64c
+  ; SSE2: cost of 8 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3,
+                                   i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec16i64 %0
+}
+
+%shifttypec32i64 = type <32 x i64>
+define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
+entry:
+  ; SSE2: shift32i64c
+  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i64c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3,
+                                  i64 3, i64 3, i64 3, i64 3>
+  ret %shifttypec32i64 %0
+}
+
+%shifttypec2i8 = type <2 x i8>
+define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
+entry:
+  ; SSE2: shift2i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift2i8c
+  ; SSE2-CODEGEN: psllq $3
+
+  %0 = shl %shifttypec2i8 %a , <i8 3, i8 3>
+  ret %shifttypec2i8 %0
+}
+
+%shifttypec4i8 = type <4 x i8>
+define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
+entry:
+  ; SSE2: shift4i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift4i8c
+  ; SSE2-CODEGEN: pslld $3
+
+  %0 = shl %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec4i8 %0
+}
+
+%shifttypec8i8 = type <8 x i8>
+define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
+entry:
+  ; SSE2: shift8i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift8i8c
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                 i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec8i8 %0
+}
+
+%shifttypec16i8 = type <16 x i8>
+define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
+entry:
+  ; SSE2: shift16i8c
+  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2-CODEGEN: shift16i8c
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec16i8 %0
+}
+
+%shifttypec32i8 = type <32 x i8>
+define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
+entry:
+  ; SSE2: shift32i8c
+  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2-CODEGEN: shift32i8c
+  ; SSE2-CODEGEN: psllw $3
+
+  %0 = shl %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3,
+                                  i8 3, i8 3, i8 3, i8 3>
+  ret %shifttypec32i8 %0
+}
diff --git a/test/Analysis/CostModel/X86/tiny.ll b/test/Analysis/CostModel/X86/tiny.ll
index cc7b443a7dfc..0dafdadb5b15 100644
--- a/test/Analysis/CostModel/X86/tiny.ll
+++ b/test/Analysis/CostModel/X86/tiny.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: cost of 1 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+;CHECK: cost of 0 {{.*}} ret
 define i32 @no_info(i32 %arg) {
   %e = add i32 %arg, %arg
   ret i32 %e
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll
index 7919a9ca9a64..25b11145c661 100644
--- a/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -28,20 +28,21 @@ vector.body:                                      ; preds = %for.body.lr.ph, %ve
   %4 = getelementptr inbounds i32* %B, i64 %3
   ;CHECK: cost of 0 {{.*}} bitcast
   %5 = bitcast i32* %4 to <8 x i32>*
-  ;CHECK: cost of 1 {{.*}} load
+  ;CHECK: cost of 2 {{.*}} load
   %6 = load <8 x i32>* %5, align 4
   ;CHECK: cost of 4 {{.*}} mul
   %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   %8 = getelementptr inbounds i32* %A, i64 %index
   %9 = bitcast i32* %8 to <8 x i32>*
+  ;CHECK: cost of 2 {{.*}} load
   %10 = load <8 x i32>* %9, align 4
   ;CHECK: cost of 4 {{.*}} add
   %11 = add nsw <8 x i32> %10, %7
-  ;CHECK: cost of 1 {{.*}} store
+  ;CHECK: cost of 2 {{.*}} store
   store <8 x i32> %11, <8 x i32>* %9, align 4
   %index.next = add i64 %index, 8
   %12 = icmp eq i64 %index.next, %end.idx.rnd.down
-  ;CHECK: cost of 1 {{.*}} br
+  ;CHECK: cost of 0 {{.*}} br
   br i1 %12, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body, %for.body.lr.ph
@@ -65,11 +66,11 @@ for.body:                                         ; preds = %middle.block, %for.
   ;CHECK: cost of 0 {{.*}} trunc
   %16 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %16, %end
-  ;CHECK: cost of 1 {{.*}} br
+  ;CHECK: cost of 0 {{.*}} br
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %middle.block, %for.body, %entry
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/no_info.ll b/test/Analysis/CostModel/no_info.ll
index d20d56b79a7f..f3f165b1b52a 100644
--- a/test/Analysis/CostModel/no_info.ll
+++ b/test/Analysis/CostModel/no_info.ll
@@ -1,11 +1,8 @@
 ; RUN: opt < %s -cost-model -analyze | FileCheck %s
 
 ; The cost model does not have any target information so it can't make a decision.
-; Notice that OPT does not read the triple information from the module itself, only through the command line.
 
-; This info ignored:
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+; -- No triple in this module --
 
 ;CHECK: Unknown cost {{.*}} add
 ;CHECK: Unknown cost {{.*}} ret
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 8865ee94016f..003ee03ab0be 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -7,13 +7,20 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 ;;  for (long int i = 1; i <= 10; i++)
 ;;    for (long int j = 1; j <= 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j - 1];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j - 1];
 
 define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<= <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 1, %entry ], [ %inc8, %for.inc7 ]
@@ -31,7 +38,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %sub = add nsw i64 %add5, -1
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
   %0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [<= <>]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -51,14 +57,21 @@ for.end9:                                         ; preds = %for.inc7
 
 ;;  for (long int i = 1; i <= n; i++)
 ;;    for (long int j = 1; j <= m; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j - 1];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j - 1];
 
 define void @banerjee1(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [* <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   %0 = add i64 %n, 1
   br label %for.cond1.preheader
@@ -85,7 +98,6 @@ for.body3:                                        ; preds = %for.body3.preheader
   %sub = add nsw i64 %add5, -1
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
   %2 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [* <>]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.12, i64 1
   store i64 %2, i64* %B.addr.12, align 8
   %inc = add nsw i64 %j.03, 1
@@ -119,6 +131,13 @@ define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -136,7 +155,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add6 = add nsw i64 %add5, 100
   %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
   %0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -156,13 +174,20 @@ for.end10:                                        ; preds = %for.inc8
 
 ;;  for (long int i = 0; i < 10; i++)
 ;;    for (long int j = 0; j < 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j + 99];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j + 99];
 
 define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [> >]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -180,7 +205,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add6 = add nsw i64 %add5, 99
   %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
   %0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [> >]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -200,13 +224,20 @@ for.end10:                                        ; preds = %for.inc8
 
 ;;  for (long int i = 0; i < 10; i++)
 ;;    for (long int j = 0; j < 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j - 100];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j - 100];
 
 define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -224,7 +255,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %sub = add nsw i64 %add5, -100
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
   %0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -244,13 +274,20 @@ for.end9:                                         ; preds = %for.inc7
 
 ;;  for (long int i = 0; i < 10; i++)
 ;;    for (long int j = 0; j < 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j - 99];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j - 99];
 
 define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [< <]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -268,7 +305,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %sub = add nsw i64 %add5, -99
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
   %0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [< <]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -288,13 +324,20 @@ for.end9:                                         ; preds = %for.inc7
 
 ;;  for (long int i = 0; i < 10; i++)
 ;;    for (long int j = 0; j < 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j + 9];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j + 9];
 
 define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=> <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -312,7 +355,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add6 = add nsw i64 %add5, 9
   %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
   %0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [=> <>]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -332,13 +374,20 @@ for.end10:                                        ; preds = %for.inc8
 
 ;;  for (long int i = 0; i < 10; i++)
 ;;    for (long int j = 0; j < 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j + 10];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j + 10];
 
 define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [> <=]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -356,7 +405,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add6 = add nsw i64 %add5, 10
   %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
   %0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [> <=]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -376,13 +424,20 @@ for.end10:                                        ; preds = %for.inc8
 
 ;;  for (long int i = 0; i < 10; i++)
 ;;    for (long int j = 0; j < 10; j++) {
-;;      A[10*i + j] = ...
-;;      ... = A[10*i + j + 11];
+;;      A[10*i + j] = 0;
+;;      *B++ = A[10*i + j + 11];
 
 define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [> <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -400,7 +455,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add6 = add nsw i64 %add5, 11
   %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
   %0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [> <>]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -420,13 +474,20 @@ for.end10:                                        ; preds = %for.inc8
 
 ;;  for (long int i = 0; i < 20; i++)
 ;;    for (long int j = 0; j < 20; j++) {
-;;      A[30*i + 500*j] = ...
-;;      ... = A[i - 500*j + 11];
+;;      A[30*i + 500*j] = 0;
+;;      *B++ = A[i - 500*j + 11];
 
 define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [<= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -445,7 +506,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add6 = add nsw i64 %sub, 11
   %arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
   %1 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [<= =|<]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %1, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -465,13 +525,20 @@ for.end10:                                        ; preds = %for.inc8
 
 ;;  for (long int i = 0; i < 20; i++)
 ;;    for (long int j = 0; j < 20; j++) {
-;;      A[i + 500*j] = ...
-;;      ... = A[i - 500*j + 11];
+;;      A[i + 500*j] = 0;
+;;      *B++ = A[i - 500*j + 11];
 
 define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<> =]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -489,7 +556,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add5 = add nsw i64 %sub, 11
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
   %1 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [<> =]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %1, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -509,13 +575,20 @@ for.end9:                                         ; preds = %for.inc7
 
 ;;  for (long int i = 0; i < 20; i++)
 ;;    for (long int j = 0; j < 20; j++) {
-;;      A[300*i + j] = ...
-;;      ... = A[250*i - j + 11];
+;;      A[300*i + j] = 0;
+;;      *B++ = A[250*i - j + 11];
 
 define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<= <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -533,7 +606,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add5 = add nsw i64 %sub, 11
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
   %0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [<= <>]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -553,13 +625,20 @@ for.end9:                                         ; preds = %for.inc7
 
 ;;  for (long int i = 0; i < 20; i++)
 ;;    for (long int j = 0; j < 20; j++) {
-;;      A[100*i + j] = ...
-;;      ... = A[100*i - j + 11];
+;;      A[100*i + j] = 0;
+;;      *B++ = A[100*i - j + 11];
 
 define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [= <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -577,7 +656,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add5 = add nsw i64 %sub, 11
   %arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
   %0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [= <>]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
diff --git a/test/Analysis/DependenceAnalysis/Coupled.ll b/test/Analysis/DependenceAnalysis/Coupled.ll
index 60163fe7c2d0..8c77849ae847 100644
--- a/test/Analysis/DependenceAnalysis/Coupled.ll
+++ b/test/Analysis/DependenceAnalysis/Coupled.ll
@@ -5,15 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[i][i] = ...
-;;   ... = A[i + 10][i + 9]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[i][i] = i;
+;;    *B++ = A[i + 10][i + 9];
 
 define void @couple0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -23,27 +30,33 @@ for.body:                                         ; preds = %for.body, %entry
   %add2 = add nsw i64 %i.02, 10
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[i][i] = ...
-;;   ... = A[i + 9][i + 9]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[i][i] = i;
+;;    *B++ = A[i + 9][i + 9];
 
 define void @couple1([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [-9]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -53,27 +66,33 @@ for.body:                                         ; preds = %for.body, %entry
   %add2 = add nsw i64 %i.02, 9
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - consistent flow [-9]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[3*i - 6][3*i - 6] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[3*i - 6][3*i - 6] = i;
+;;    *B++ = A[i][i];
 
 define void @couple2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -85,27 +104,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[3*i - 6][3*i - 5] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[3*i - 6][3*i - 5] = i;
+;;    *B++ = A[i][i];
 
 define void @couple3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -117,27 +142,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[3*i - 6][3*i - n] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[3*i - 6][3*i - n] = i;
+;;    *B++ = A[i][i];
 
 define void @couple4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -150,27 +181,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx4, align 4
   %arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[3*i - n + 1][3*i - n] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[3*i - n + 1][3*i - n] = i;
+;;    *B++ = A[i][i];
 
 define void @couple5([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -185,27 +222,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx5, align 4
   %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[i][3*i - 6] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[i][3*i - 6] = i;
+;;    *B++ = A[i][i];
 
 define void @couple6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -215,27 +258,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - flow [=|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 50; i++)
-;;   A[i][3*i - 5] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 50; i++) {
+;;    A[i][3*i - 5] = i;
+;;    *B++ = A[i][i];
 
 define void @couple7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -245,27 +294,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 50
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i <= 15; i++)
-;;   A[3*i - 18][3 - i] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i <= 15; i++) {
+;;    A[3*i - 18][3 - i] = i;
+;;    *B++ = A[i][i];
 
 define void @couple8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -276,27 +331,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 16
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 16
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i <= 15; i++)
-;;   A[3*i - 18][2 - i] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i <= 15; i++) {
+;;    A[3*i - 18][2 - i] = i;
+;;    *B++ = A[i][i];
 
 define void @couple9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -307,27 +368,34 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 16
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 16
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i <= 15; i++)
-;;   A[3*i - 18][6 - i] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i <= 15; i++) {
+;;    A[3*i - 18][6 - i] = i;
+;;    *B++ = A[i][i];
 
 define void @couple10([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 3!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -338,28 +406,34 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [>] splitable!
-; CHECK: da analyze - split level = 1, iteration = 3!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 16
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 16
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i <= 15; i++)
-;;   A[3*i - 18][18 - i] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i <= 15; i++) {
+;;    A[3*i - 18][18 - i] = i;
+;;    *B++ = A[i][i];
 
 define void @couple11([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -370,28 +444,34 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [=|<] splitable!
-; CHECK: da analyze - split level = 1, iteration = 9!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 16
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 16
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i <= 12; i++)
-;;   A[3*i - 18][22 - i] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i <= 12; i++) {
+;;    A[3*i - 18][22 - i] = i;
+;;    *B++ = A[i][i];
 
 define void @couple12([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -402,28 +482,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [<] splitable!
-; CHECK: da analyze - split level = 1, iteration = 11!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 13
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 13
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 12; i++)
-;;   A[3*i - 18][22 - i] = ...
-;;   ... = A[i][i]
+;;  for (long int i = 0; i < 12; i++) {
+;;    A[3*i - 18][22 - i] = i;
+;;    *B++ = A[i][i];
 
 define void @couple13([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -434,27 +519,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 12
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 12
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-
-;; for (long int i = 0; i < 100; i++)
-;;   A[3*i - 18][18 - i][i] = ...
-;;   ... = A[i][i][i]
+;;  for (long int i = 0; i < 100; i++) {
+;;    A[3*i - 18][18 - i][i] = i;
+;;    *B++ = A[i][i][i];
 
 define void @couple14([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -465,28 +556,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - flow [=|<] splitable!
-; CHECK: da analyze - split level = 1, iteration = 9!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 100
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;; for (long int i = 0; i < 100; i++)
-;;   A[3*i - 18][22 - i][i] = ...
-;;   ... = A[i][i][i]
+;;  for (long int i = 0; i < 100; i++) {
+;;    A[3*i - 18][22 - i][i] = i;
+;;    *B++ = A[i][i][i];
 
 define void @couple15([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -497,12 +593,11 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
   %0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
-  %cmp = icmp slt i64 %inc, 100
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
diff --git a/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
index aa5d254a0ce2..81f55161c0df 100644
--- a/test/Analysis/DependenceAnalysis/ExactRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
@@ -6,15 +6,22 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 
 ;;  for (long int i = 0; i < 10; i++)
-;;    A[4*i + 10] = ...
+;;    A[4*i + 10] = i;
 ;;  for (long int j = 0; j < 10; j++)
-;;    ... = A[2*j + 1];
+;;    *B++ = A[2*j + 1];
 
 define void @rdiv0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = shl nsw i64 %i.03, 2
@@ -22,22 +29,24 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 10
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond5 = icmp ne i64 %inc, 10
+  br i1 %exitcond5, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %mul5 = shl nsw i64 %j.02, 1
   %add64 = or i64 %mul5, 1
   %arrayidx7 = getelementptr inbounds i32* %A, i64 %add64
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc9 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc9, 10
-  br i1 %cmp2, label %for.body4, label %for.end10
+  %exitcond = icmp ne i64 %inc9, 10
+  br i1 %exitcond, label %for.body4, label %for.end10
 
 for.end10:                                        ; preds = %for.body4
   ret void
@@ -45,15 +54,22 @@ for.end10:                                        ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i < 5; i++)
-;;    A[11*i - 45] = ...
+;;    A[11*i - 45] = i;
 ;;  for (long int j = 0; j < 10; j++)
-;;    ... = A[j];
+;;    *B++ = A[j];
 
 define void @rdiv1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, 11
@@ -61,20 +77,22 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 5
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 5
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 10
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 10
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -82,15 +100,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i <= 5; i++)
-;;    A[11*i - 45] = ...
+;;    A[11*i - 45] = i;
 ;;  for (long int j = 0; j < 10; j++)
-;;    ... = A[j];
+;;    *B++ = A[j];
 
 define void @rdiv2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, 11
@@ -98,20 +123,22 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 6
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 6
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 10
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 10
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -119,15 +146,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i < 5; i++)
-;;    A[11*i - 45] = ...
+;;    A[11*i - 45] = i;
 ;;  for (long int j = 0; j <= 10; j++)
-;;    ... = A[j];
+;;    *B++ = A[j];
 
 define void @rdiv3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, 11
@@ -135,20 +169,22 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 5
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 5
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 11
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 11
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -156,15 +192,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i <= 5; i++)
-;;    A[11*i - 45] = ...
+;;    A[11*i - 45] = i;
 ;;  for (long int j = 0; j <= 10; j++)
-;;    ... = A[j];
+;;    *B++ = A[j];
 
 define void @rdiv4(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, 11
@@ -172,20 +215,22 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 6
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 6
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 11
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 11
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -193,15 +238,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i < 5; i++)
-;;    A[-11*i + 45] = ...
+;;    A[-11*i + 45] = i;
 ;;  for (long int j = 0; j < 10; j++)
-;;    ... = A[-j];
+;;    *B++ = A[-j];
 
 define void @rdiv5(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -11
@@ -209,21 +261,23 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 5
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 5
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 10
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 10
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -231,15 +285,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i <= 5; i++)
-;;    A[-11*i + 45] = ...
+;;    A[-11*i + 45] = i;
 ;;  for (long int j = 0; j < 10; j++)
-;;    ... = A[-j];
+;;    *B++ = A[-j];
 
 define void @rdiv6(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -11
@@ -247,21 +308,23 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 6
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 6
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 10
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 10
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -269,15 +332,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i < 5; i++)
-;;    A[-11*i + 45] = ...
+;;    A[-11*i + 45] = i;
 ;;  for (long int j = 0; j <= 10; j++)
-;;    ... = A[-j];
+;;    *B++ = A[-j];
 
 define void @rdiv7(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -11
@@ -285,21 +355,23 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 5
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 5
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 11
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 11
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -307,15 +379,22 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i <= 5; i++)
-;;    A[-11*i + 45] = ...
+;;    A[-11*i + 45] = i;
 ;;  for (long int j = 0; j <= 10; j++)
-;;    ... = A[-j];
+;;    *B++ = A[-j];
 
 define void @rdiv8(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -11
@@ -323,21 +402,23 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, 6
-  br i1 %cmp, label %for.body, label %for.body4
+  %exitcond4 = icmp ne i64 %inc, 6
+  br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.body
+  br label %for.body4
 
-for.body4:                                        ; preds = %for.body4, %for.body
-  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
-  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc7, 11
-  br i1 %cmp2, label %for.body4, label %for.end8
+  %exitcond = icmp ne i64 %inc7, 11
+  br i1 %exitcond, label %for.body4, label %for.end8
 
 for.end8:                                         ; preds = %for.body4
   ret void
@@ -345,20 +426,27 @@ for.end8:                                         ; preds = %for.body4
 
 
 ;;  for (long int i = 0; i < 5; i++)
-;;    for (long int j = 0; j < 10; j++)
-;;      A[11*i - j] = ...
-;;      ... = A[45];
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[11*i - j] = i;
+;;      *B++ = A[45];
 
 define void @rdiv9(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc5, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc5
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -368,38 +456,46 @@ for.body3:                                        ; preds = %for.body3, %for.con
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32* %A, i64 45
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 10
-  br i1 %cmp2, label %for.body3, label %for.inc5
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc5
 
 for.inc5:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 10
   %inc6 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc6, 5
-  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+  %exitcond5 = icmp ne i64 %inc6, 5
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
 
 for.end7:                                         ; preds = %for.inc5
   ret void
 }
 
 
-;;  for (long int i = 0; i < 5; i++)
-;;    for (long int j = 0; j <= 10; j++)
-;;      A[11*i - j] = ...
-;;      ... = A[45];
+
+;;  for (long int i = 0; i <= 5; i++)
+;;    for (long int j = 0; j < 10; j++) {
+;;      A[11*i - j] = i;
+;;      *B++ = A[45];
 
 define void @rdiv10(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc5, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc5
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -409,38 +505,45 @@ for.body3:                                        ; preds = %for.body3, %for.con
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32* %A, i64 45
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 10
-  br i1 %cmp2, label %for.body3, label %for.inc5
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body3, label %for.inc5
 
 for.inc5:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 10
   %inc6 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc6, 6
-  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+  %exitcond5 = icmp ne i64 %inc6, 6
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
 
 for.end7:                                         ; preds = %for.inc5
   ret void
 }
 
 
-;;  for (long int i = 0; i <= 5; i++)
-;;    for (long int j = 0; j <= 10; j++)
-;;      A[11*i - j] = ...
-;;      ... = A[45];
+;;  for (long int i = 0; i < 5; i++)
+;;    for (long int j = 0; j <= 10; j++) {
+;;      A[11*i - j] = i;
+;;      *B++ = A[45];
 
 define void @rdiv11(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc5, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc5
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -450,38 +553,45 @@ for.body3:                                        ; preds = %for.body3, %for.con
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32* %A, i64 45
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 11
-  br i1 %cmp2, label %for.body3, label %for.inc5
+  %exitcond = icmp ne i64 %inc, 11
+  br i1 %exitcond, label %for.body3, label %for.inc5
 
 for.inc5:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 11
   %inc6 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc6, 5
-  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+  %exitcond5 = icmp ne i64 %inc6, 5
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
 
 for.end7:                                         ; preds = %for.inc5
   ret void
 }
 
 
-;;  for (long int i = 0; i < 5; i++)
-;;    for (long int j = 0; j < 10; j++)
-;;      A[11*i - j] = ...
-;;      ... = A[45];
+;;  for (long int i = 0; i <= 5; i++)
+;;    for (long int j = 0; j <= 10; j++) {
+;;      A[11*i - j] = i;
+;;      *B++ = A[45];
 
 define void @rdiv12(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc5, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc5
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -491,17 +601,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32* %A, i64 45
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [* *|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 11
-  br i1 %cmp2, label %for.body3, label %for.inc5
+  %exitcond = icmp ne i64 %inc, 11
+  br i1 %exitcond, label %for.body3, label %for.inc5
 
 for.inc5:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 11
   %inc6 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc6, 6
-  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+  %exitcond5 = icmp ne i64 %inc6, 6
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
 
 for.end7:                                         ; preds = %for.inc5
   ret void
diff --git a/test/Analysis/DependenceAnalysis/ExactSIV.ll b/test/Analysis/DependenceAnalysis/ExactSIV.ll
index 71e050246291..586bbe5096d6 100644
--- a/test/Analysis/DependenceAnalysis/ExactSIV.ll
+++ b/test/Analysis/DependenceAnalysis/ExactSIV.ll
@@ -6,14 +6,21 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 
 ;;  for (long unsigned i = 0; i < 10; i++) {
-;;    A[i + 10] = ...
-;;    ... = A[2*i + 1];
+;;    A[i + 10] = i;
+;;    *B++ = A[2*i + 1];
 
 define void @exact0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -24,12 +31,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add13 = or i64 %mul, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %add13
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [<=|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 10
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -37,14 +43,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 10; i++) {
-;;    A[4*i + 10] = ...
-;;    ... = A[2*i + 1];
+;;    A[4*i + 10] = i;
+;;    *B++ = A[2*i + 1];
 
 define void @exact1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -56,12 +69,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add23 = or i64 %mul1, 1
   %arrayidx3 = getelementptr inbounds i32* %A, i64 %add23
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 10
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -69,14 +81,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 10; i++) {
-;;    A[6*i] = ...
-;;    ... = A[i + 60];
+;;    A[6*i] = i;
+;;    *B++ = A[i + 60];
 
 define void @exact2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -86,12 +105,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 10
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -99,14 +117,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i <= 10; i++) {
-;;    A[6*i] = ...
-;;    ... = A[i + 60];
+;;    A[6*i] = i;
+;;    *B++ = A[i + 60];
 
 define void @exact3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -116,12 +141,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [>]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 11
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 11
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -129,14 +153,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 12; i++) {
-;;    A[6*i] = ...
-;;    ... = A[i + 60];
+;;    A[6*i] = i;
+;;    *B++ = A[i + 60];
 
 define void @exact4(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -146,12 +177,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [>]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 12
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 12
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -159,14 +189,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i <= 12; i++) {
-;;    A[6*i] = ...
-;;    ... = A[i + 60];
+;;    A[6*i] = i;
+;;    *B++ = A[i + 60];
 
 define void @exact5(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -176,12 +213,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 13
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 13
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -189,14 +225,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 18; i++) {
-;;    A[6*i] = ...
-;;    ... = A[i + 60];
+;;    A[6*i] = i;
+;;    *B++ = A[i + 60];
 
 define void @exact6(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -206,12 +249,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 18
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 18
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -219,14 +261,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i <= 18; i++) {
-;;    A[6*i] = ...
-;;    ... = A[i + 60];
+;;    A[6*i] = i;
+;;    *B++ = A[i + 60];
 
 define void @exact7(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -236,12 +285,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 19
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 19
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -249,14 +297,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 10; i++) {
-;;    A[-6*i] = ...
-;;    ... = A[-i - 60];
+;;    A[-6*i] = i;
+;;    *B++ = A[-i - 60];
 
 define void @exact8(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -266,12 +321,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 10
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -279,14 +333,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i <= 10; i++) {
-;;    A[-6*i] = ...
-;;    ... = A[-i - 60];
+;;    A[-6*i] = i;
+;;    *B++ = A[-i - 60];
 
 define void @exact9(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -296,12 +357,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [>]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 11
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 11
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -309,14 +369,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 12; i++) {
-;;    A[-6*i] = ...
-;;    ... = A[-i - 60];
+;;    A[-6*i] = i;
+;;    *B++ = A[-i - 60];
 
 define void @exact10(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -326,12 +393,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [>]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 12
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 12
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -339,14 +405,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i <= 12; i++) {
-;;    A[-6*i] = ...
-;;    ... = A[-i - 60];
+;;    A[-6*i] = i;
+;;    *B++ = A[-i - 60];
 
 define void @exact11(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -356,12 +429,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [=>|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 13
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 13
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -369,14 +441,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i < 18; i++) {
-;;    A[-6*i] = ...
-;;    ... = A[-i - 60];
+;;    A[-6*i] = i;
+;;    *B++ = A[-i - 60];
 
 define void @exact12(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -386,12 +465,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [=>|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 18
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 18
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
@@ -399,14 +477,21 @@ for.end:                                          ; preds = %for.body
 
 
 ;;  for (long unsigned i = 0; i <= 18; i++) {
-;;    A[-6*i] = ...
-;;    ... = A[-i - 60];
+;;    A[-6*i] = i;
+;;    *B++ = A[-i - 60];
 
 define void @exact13(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -416,12 +501,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 19
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 19
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 94c93a8a0dd4..a42212464f86 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -6,14 +6,21 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[2*i - 4*j] = ...
-;;      ... = A[6*i + 8*j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[2*i - 4*j] = i;
+;;      *B++ = A[6*i + 8*j];
 
 define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [=> *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -33,7 +40,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add = add nsw i64 %mul5, %mul6
   %arrayidx7 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - flow [=> *|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -52,14 +58,21 @@ for.end10:                                        ; preds = %for.inc8
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[2*i - 4*j] = ...
-;;      ... = A[6*i + 8*j + 1];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[2*i - 4*j] = i;
+;;      *B++ = A[6*i + 8*j + 1];
 
 define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
@@ -80,7 +93,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add7 = or i64 %add, 1
   %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
   %0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -99,14 +111,21 @@ for.end11:                                        ; preds = %for.inc9
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[2*i - 4*j + 1] = ...
-;;      ... = A[6*i + 8*j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[2*i - 4*j + 1] = i;
+;;      *B++ = A[6*i + 8*j];
 
 define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
@@ -127,7 +146,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add7 = add nsw i64 %mul5, %mul6
   %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
   %0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -146,14 +164,21 @@ for.end11:                                        ; preds = %for.inc9
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[i + 2*j] = ...
-;;      ... = A[i + 2*j - 1];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[i + 2*j] = i;
+;;      *B++ = A[i + 2*j - 1];
 
 define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [<> *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -172,7 +197,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %sub = add nsw i64 %add5, -1
   %arrayidx6 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - flow [<> *]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -190,16 +214,22 @@ for.end9:                                         ; preds = %for.inc7
 }
 
 
-;;  void gcd4(int *A, int *B, long int M, long int N) {
-;;    for (long int i = 0; i < 100; i++)
-;;      for (long int j = 0; j < 100; j++) {
-;;        A[5*i + 10*j*M + 9*M*N] = i;
-;;        *B++ = A[15*i + 20*j*M - 21*N*M + 4];
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[5*i + 10*j*M + 9*M*N] = i;
+;;      *B++ = A[15*i + 20*j*M - 21*N*M + 4];
 
 define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc17
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
@@ -228,7 +258,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add15 = add nsw i64 %sub, 4
   %arrayidx16 = getelementptr inbounds i32* %A, i64 %add15
   %0 = load i32* %arrayidx16, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -246,16 +275,22 @@ for.end19:                                        ; preds = %for.inc17
 }
 
 
-;;  void gcd5(int *A, int *B, long int M, long int N) {
-;;    for (long int i = 0; i < 100; i++)
-;;      for (long int j = 0; j < 100; j++) {
-;;        A[5*i + 10*j*M + 9*M*N] = i;
-;;        *B++ = A[15*i + 20*j*M - 21*N*M + 5];
+;;  for (long int i = 0; i < 100; i++)
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[5*i + 10*j*M + 9*M*N] = i;
+;;      *B++ = A[15*i + 20*j*M - 21*N*M + 5];
 
 define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [<> *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.cond1.preheader:                              ; preds = %entry, %for.inc17
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
@@ -284,7 +319,6 @@ for.body3:                                        ; preds = %for.cond1.preheader
   %add15 = add nsw i64 %sub, 5
   %arrayidx16 = getelementptr inbounds i32* %A, i64 %add15
   %0 = load i32* %arrayidx16, align 4
-; CHECK: da analyze - flow [<> *]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -302,17 +336,23 @@ for.end19:                                        ; preds = %for.inc17
 }
 
 
-;;  void gcd6(long int n, int A[][n], int *B) {
-;;    for (long int i = 0; i < n; i++)
-;;      for (long int j = 0; j < n; j++) {
-;;        A[2*i][4*j] = i;
-;;        *B++ = A[8*i][6*j + 1];
+;;  for (long int i = 0; i < n; i++)
+;;    for (long int j = 0; j < n; j++) {
+;;      A[2*i][4*j] = i;
+;;      *B++ = A[8*i][6*j + 1];
 
 define void @gcd6(i64 %n, i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -342,7 +382,6 @@ for.body3:                                        ; preds = %for.body3.preheader
   %arrayidx8.sum = add i64 %1, %add7
   %arrayidx9 = getelementptr inbounds i32* %A, i64 %arrayidx8.sum
   %2 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
   store i32 %2, i32* %B.addr.12, align 4
   %inc = add nsw i64 %j.03, 1
@@ -367,11 +406,10 @@ for.end12:                                        ; preds = %for.end12.loopexit,
 }
 
 
-;;  void gcd7(int n, int A[][n], int *B) {
-;;    for (int i = 0; i < n; i++)
-;;      for (int j = 0; j < n; j++) {
-;;        A[2*i][4*j] = i;
-;;        *B++ = A[8*i][6*j + 1];
+;;  for (int i = 0; i < n; i++)
+;;   for (int j = 0; j < n; j++) {
+;;    A[2*i][4*j] = i;
+;;   *B++ = A[8*i][6*j + 1];
 
 define void @gcd7(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
@@ -379,6 +417,13 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -419,7 +464,6 @@ for.body3:                                        ; preds = %for.body3.preheader
   %arrayidx11.sum = add i64 %10, %idxprom8
   %arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum
   %11 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - flow [* *|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
   store i32 %11, i32* %B.addr.12, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -446,17 +490,23 @@ for.end15:                                        ; preds = %for.end15.loopexit,
 }
 
 
-;;  void gcd8(int n, int *A, int *B) {
-;;    for (int i = 0; i < n; i++)
-;;      for (int j = 0; j < n; j++) {
-;;        A[n*2*i + 4*j] = i;
-;;        *B++ = A[n*8*i + 6*j + 1];
+;;  for (int i = 0; i < n; i++)
+;;    for (int j = 0; j < n; j++) {
+;;      A[n*2*i + 4*j] = i;
+;;      *B++ = A[n*8*i + 6*j + 1];
 
 define void @gcd8(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -492,7 +542,6 @@ for.body3:                                        ; preds = %for.body3.preheader
   %idxprom11 = sext i32 %add10 to i64
   %arrayidx12 = getelementptr inbounds i32* %A, i64 %idxprom11
   %5 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
   store i32 %5, i32* %B.addr.12, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -518,11 +567,10 @@ for.end15:                                        ; preds = %for.end15.loopexit,
 }
 
 
-;;  void gcd9(unsigned n, int A[][n], int *B) {
-;;    for (unsigned i = 0; i < n; i++)
-;;      for (unsigned j = 0; j < n; j++) {
-;;        A[2*i][4*j] = i;
-;;        *B++ = A[8*i][6*j + 1];
+;;  for (unsigned i = 0; i < n; i++)
+;;    for (unsigned j = 0; j < n; j++) {
+;;      A[2*i][4*j] = i;
+;;      *B++ = A[8*i][6*j + 1];
 
 define void @gcd9(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
@@ -530,6 +578,13 @@ entry:
   %cmp4 = icmp eq i32 %n, 0
   br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader
 
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - flow [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *]!
+
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
@@ -570,7 +625,6 @@ for.body3:                                        ; preds = %for.body3.preheader
   %arrayidx11.sum = add i64 %10, %idxprom8
   %arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum
   %11 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - flow [* *|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
   store i32 %11, i32* %B.addr.12, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/test/Analysis/DependenceAnalysis/Preliminary.ll b/test/Analysis/DependenceAnalysis/Preliminary.ll
index 3ef63fd5592f..f36b85a5951e 100644
--- a/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -1,111 +1,147 @@
-; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s
-
-; This series of tests is more interesting when debugging is enabled.
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
 
 ; ModuleID = 'Preliminary.bc'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;; may alias
-;; int p0(int n, int *A, int *B) {
+;;int p0(int n, int *A, int *B) {
 ;;  A[0] = n;
 ;;  return B[1];
 
 define i32 @p0(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   store i32 %n, i32* %A, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %arrayidx1 = getelementptr inbounds i32* %B, i64 1
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - confused!
   ret i32 %0
 }
 
 
-;; no alias
-;; int p1(int n, int *restrict A, int *restrict B) {
+;;int p1(int n, int *restrict A, int *restrict B) {
 ;;  A[0] = n;
 ;;  return B[1];
 
 define i32 @p1(i32 %n, i32* noalias %A, i32* noalias %B) nounwind uwtable ssp {
 entry:
   store i32 %n, i32* %A, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+
   %arrayidx1 = getelementptr inbounds i32* %B, i64 1
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   ret i32 %0
 }
 
-;; check loop nesting levels
-;;  for (long int i = 0; i < n; i++)
-;;    for (long int j = 0; j < n; j++)
-;;      for (long int k = 0; k < n; k++)
-;;        A[i][j][k] = ...
-;;      for (long int k = 0; k < n; k++)
-;;        ... = A[i + 3][j + 2][k + 1];
+
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        A[i][j][k] = i;
+;;      }
+;;      for (long int k = 0; k < n; k++) {
+;;        *B++ = A[i + 3][j + 2][k + 1];
 
 define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
 entry:
   %cmp10 = icmp sgt i64 %n, 0
-  br i1 %cmp10, label %for.cond1.preheader, label %for.end26
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-3 -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * *]!
 
-for.cond1.preheader:                              ; preds = %for.inc24, %entry
-  %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %entry ]
-  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %entry ]
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
+  %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
   %cmp26 = icmp sgt i64 %n, 0
-  br i1 %cmp26, label %for.cond4.preheader, label %for.inc24
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
 
-for.cond4.preheader:                              ; preds = %for.inc21, %for.cond1.preheader
-  %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond1.preheader ]
-  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond1.preheader ]
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
+  %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
   %cmp51 = icmp sgt i64 %n, 0
-  br i1 %cmp51, label %for.body6, label %for.cond10.loopexit
+  br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
 
-for.body6:                                        ; preds = %for.body6, %for.cond4.preheader
-  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.cond4.preheader ]
+for.body6.preheader:                              ; preds = %for.cond4.preheader
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6.preheader, %for.body6
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
   %arrayidx8 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
   store i64 %i.011, i64* %arrayidx8, align 8
   %inc = add nsw i64 %k.02, 1
-  %cmp5 = icmp slt i64 %inc, %n
-  br i1 %cmp5, label %for.body6, label %for.cond10.loopexit
+  %exitcond13 = icmp ne i64 %inc, %n
+  br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit:                     ; preds = %for.body6
+  br label %for.cond10.loopexit
 
-for.cond10.loopexit:                              ; preds = %for.body6, %for.cond4.preheader
+for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
   %cmp113 = icmp sgt i64 %n, 0
-  br i1 %cmp113, label %for.body12, label %for.inc21
+  br i1 %cmp113, label %for.body12.preheader, label %for.inc21
 
-for.body12:                                       ; preds = %for.body12, %for.cond10.loopexit
-  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.cond10.loopexit ]
-  %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.cond10.loopexit ]
+for.body12.preheader:                             ; preds = %for.cond10.loopexit
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body12.preheader, %for.body12
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+  %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
   %add = add nsw i64 %k9.05, 1
   %add13 = add nsw i64 %j.07, 2
   %add14 = add nsw i64 %i.011, 3
   %arrayidx17 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
   %0 = load i64* %arrayidx17, align 8
-; CHECK: da analyze - flow [-3 -2]!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.24, i64 1
   store i64 %0, i64* %B.addr.24, align 8
   %inc19 = add nsw i64 %k9.05, 1
-  %cmp11 = icmp slt i64 %inc19, %n
-  br i1 %cmp11, label %for.body12, label %for.inc21
+  %exitcond = icmp ne i64 %inc19, %n
+  br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
 
-for.inc21:                                        ; preds = %for.body12, %for.cond10.loopexit
-  %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %incdec.ptr, %for.body12 ]
+for.inc21.loopexit:                               ; preds = %for.body12
+  %scevgep = getelementptr i64* %B.addr.18, i64 %n
+  br label %for.inc21
+
+for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+  %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
   %inc22 = add nsw i64 %j.07, 1
-  %cmp2 = icmp slt i64 %inc22, %n
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc24
+  %exitcond14 = icmp ne i64 %inc22, %n
+  br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit:                               ; preds = %for.inc21
+  %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
+  br label %for.inc24
 
-for.inc24:                                        ; preds = %for.inc21, %for.cond1.preheader
-  %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc21 ]
+for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
   %inc25 = add nsw i64 %i.011, 1
-  %cmp = icmp slt i64 %inc25, %n
-  br i1 %cmp, label %for.cond1.preheader, label %for.end26
+  %exitcond15 = icmp ne i64 %inc25, %n
+  br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit:                               ; preds = %for.inc24
+  br label %for.end26
 
-for.end26:                                        ; preds = %for.inc24, %entry
+for.end26:                                        ; preds = %for.end26.loopexit, %entry
   ret void
 }
 
 
-;; classify subscripts
 ;;  for (long int i = 0; i < n; i++)
 ;;  for (long int j = 0; j < n; j++)
 ;;  for (long int k = 0; k < n; k++)
@@ -118,83 +154,127 @@ for.end26:                                        ; preds = %for.inc24, %entry
 ;;  for (long int s = 0; s < n; s++)
 ;;  for (long int u = 0; u < n; u++)
 ;;  for (long int t = 0; t < n; t++) {
-;;          A[i - 3] [j] [2] [k-1] [2*l + 1] [m] [p + q] [r + s] = ...
-;;    ... = A[i + 3] [2] [u] [1-k] [3*l - 1] [o] [1 + n] [t + 2];
+;;           A[i - 3] [j] [2] [k-1] [2*l + 1] [m] [p + q] [r + s] = i;
+;;    *B++ = A[i + 3] [2] [u] [1-k] [3*l - 1] [o] [1 + n] [t + 2];
 
 define void @p3(i64 %n, [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64* %B) nounwind uwtable ssp {
 entry:
   %cmp44 = icmp sgt i64 %n, 0
-  br i1 %cmp44, label %for.cond1.preheader, label %for.end90
+  br i1 %cmp44, label %for.cond1.preheader.preheader, label %for.end90
+
+; CHECK: da analyze - output [0 0 0 0 0 S * * * * S S]!
+; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable!
+; CHECK: da analyze - split level = 3, iteration = 1!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 S 0 0 S 0 S S S S 0 0]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * * * * * * * * * * *]!
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc88, %entry
-  %B.addr.046 = phi i64* [ %B.addr.1.lcssa, %for.inc88 ], [ %B, %entry ]
-  %i.045 = phi i64 [ %inc89, %for.inc88 ], [ 0, %entry ]
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc88
+  %B.addr.046 = phi i64* [ %B.addr.1.lcssa, %for.inc88 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.045 = phi i64 [ %inc89, %for.inc88 ], [ 0, %for.cond1.preheader.preheader ]
   %cmp240 = icmp sgt i64 %n, 0
-  br i1 %cmp240, label %for.cond4.preheader, label %for.inc88
+  br i1 %cmp240, label %for.cond4.preheader.preheader, label %for.inc88
 
-for.cond4.preheader:                              ; preds = %for.inc85, %for.cond1.preheader
-  %B.addr.142 = phi i64* [ %B.addr.2.lcssa, %for.inc85 ], [ %B.addr.046, %for.cond1.preheader ]
-  %j.041 = phi i64 [ %inc86, %for.inc85 ], [ 0, %for.cond1.preheader ]
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc85
+  %B.addr.142 = phi i64* [ %B.addr.2.lcssa, %for.inc85 ], [ %B.addr.046, %for.cond4.preheader.preheader ]
+  %j.041 = phi i64 [ %inc86, %for.inc85 ], [ 0, %for.cond4.preheader.preheader ]
   %cmp536 = icmp sgt i64 %n, 0
-  br i1 %cmp536, label %for.cond7.preheader, label %for.inc85
+  br i1 %cmp536, label %for.cond7.preheader.preheader, label %for.inc85
+
+for.cond7.preheader.preheader:                    ; preds = %for.cond4.preheader
+  br label %for.cond7.preheader
 
-for.cond7.preheader:                              ; preds = %for.inc82, %for.cond4.preheader
-  %B.addr.238 = phi i64* [ %B.addr.3.lcssa, %for.inc82 ], [ %B.addr.142, %for.cond4.preheader ]
-  %k.037 = phi i64 [ %inc83, %for.inc82 ], [ 0, %for.cond4.preheader ]
+for.cond7.preheader:                              ; preds = %for.cond7.preheader.preheader, %for.inc82
+  %B.addr.238 = phi i64* [ %B.addr.3.lcssa, %for.inc82 ], [ %B.addr.142, %for.cond7.preheader.preheader ]
+  %k.037 = phi i64 [ %inc83, %for.inc82 ], [ 0, %for.cond7.preheader.preheader ]
   %cmp832 = icmp sgt i64 %n, 0
-  br i1 %cmp832, label %for.cond10.preheader, label %for.inc82
+  br i1 %cmp832, label %for.cond10.preheader.preheader, label %for.inc82
+
+for.cond10.preheader.preheader:                   ; preds = %for.cond7.preheader
+  br label %for.cond10.preheader
 
-for.cond10.preheader:                             ; preds = %for.inc79, %for.cond7.preheader
-  %B.addr.334 = phi i64* [ %B.addr.4.lcssa, %for.inc79 ], [ %B.addr.238, %for.cond7.preheader ]
-  %l.033 = phi i64 [ %inc80, %for.inc79 ], [ 0, %for.cond7.preheader ]
+for.cond10.preheader:                             ; preds = %for.cond10.preheader.preheader, %for.inc79
+  %B.addr.334 = phi i64* [ %B.addr.4.lcssa, %for.inc79 ], [ %B.addr.238, %for.cond10.preheader.preheader ]
+  %l.033 = phi i64 [ %inc80, %for.inc79 ], [ 0, %for.cond10.preheader.preheader ]
   %cmp1128 = icmp sgt i64 %n, 0
-  br i1 %cmp1128, label %for.cond13.preheader, label %for.inc79
+  br i1 %cmp1128, label %for.cond13.preheader.preheader, label %for.inc79
 
-for.cond13.preheader:                             ; preds = %for.inc76, %for.cond10.preheader
-  %B.addr.430 = phi i64* [ %B.addr.5.lcssa, %for.inc76 ], [ %B.addr.334, %for.cond10.preheader ]
-  %m.029 = phi i64 [ %inc77, %for.inc76 ], [ 0, %for.cond10.preheader ]
+for.cond13.preheader.preheader:                   ; preds = %for.cond10.preheader
+  br label %for.cond13.preheader
+
+for.cond13.preheader:                             ; preds = %for.cond13.preheader.preheader, %for.inc76
+  %B.addr.430 = phi i64* [ %B.addr.5.lcssa, %for.inc76 ], [ %B.addr.334, %for.cond13.preheader.preheader ]
+  %m.029 = phi i64 [ %inc77, %for.inc76 ], [ 0, %for.cond13.preheader.preheader ]
   %cmp1424 = icmp sgt i64 %n, 0
-  br i1 %cmp1424, label %for.cond16.preheader, label %for.inc76
+  br i1 %cmp1424, label %for.cond16.preheader.preheader, label %for.inc76
+
+for.cond16.preheader.preheader:                   ; preds = %for.cond13.preheader
+  br label %for.cond16.preheader
 
-for.cond16.preheader:                             ; preds = %for.inc73, %for.cond13.preheader
-  %B.addr.526 = phi i64* [ %B.addr.6.lcssa, %for.inc73 ], [ %B.addr.430, %for.cond13.preheader ]
-  %o.025 = phi i64 [ %inc74, %for.inc73 ], [ 0, %for.cond13.preheader ]
+for.cond16.preheader:                             ; preds = %for.cond16.preheader.preheader, %for.inc73
+  %B.addr.526 = phi i64* [ %B.addr.6.lcssa, %for.inc73 ], [ %B.addr.430, %for.cond16.preheader.preheader ]
+  %o.025 = phi i64 [ %inc74, %for.inc73 ], [ 0, %for.cond16.preheader.preheader ]
   %cmp1720 = icmp sgt i64 %n, 0
-  br i1 %cmp1720, label %for.cond19.preheader, label %for.inc73
+  br i1 %cmp1720, label %for.cond19.preheader.preheader, label %for.inc73
 
-for.cond19.preheader:                             ; preds = %for.inc70, %for.cond16.preheader
-  %B.addr.622 = phi i64* [ %B.addr.7.lcssa, %for.inc70 ], [ %B.addr.526, %for.cond16.preheader ]
-  %p.021 = phi i64 [ %inc71, %for.inc70 ], [ 0, %for.cond16.preheader ]
+for.cond19.preheader.preheader:                   ; preds = %for.cond16.preheader
+  br label %for.cond19.preheader
+
+for.cond19.preheader:                             ; preds = %for.cond19.preheader.preheader, %for.inc70
+  %B.addr.622 = phi i64* [ %B.addr.7.lcssa, %for.inc70 ], [ %B.addr.526, %for.cond19.preheader.preheader ]
+  %p.021 = phi i64 [ %inc71, %for.inc70 ], [ 0, %for.cond19.preheader.preheader ]
   %cmp2016 = icmp sgt i64 %n, 0
-  br i1 %cmp2016, label %for.cond22.preheader, label %for.inc70
+  br i1 %cmp2016, label %for.cond22.preheader.preheader, label %for.inc70
+
+for.cond22.preheader.preheader:                   ; preds = %for.cond19.preheader
+  br label %for.cond22.preheader
 
-for.cond22.preheader:                             ; preds = %for.inc67, %for.cond19.preheader
-  %B.addr.718 = phi i64* [ %B.addr.8.lcssa, %for.inc67 ], [ %B.addr.622, %for.cond19.preheader ]
-  %q.017 = phi i64 [ %inc68, %for.inc67 ], [ 0, %for.cond19.preheader ]
+for.cond22.preheader:                             ; preds = %for.cond22.preheader.preheader, %for.inc67
+  %B.addr.718 = phi i64* [ %B.addr.8.lcssa, %for.inc67 ], [ %B.addr.622, %for.cond22.preheader.preheader ]
+  %q.017 = phi i64 [ %inc68, %for.inc67 ], [ 0, %for.cond22.preheader.preheader ]
   %cmp2312 = icmp sgt i64 %n, 0
-  br i1 %cmp2312, label %for.cond25.preheader, label %for.inc67
+  br i1 %cmp2312, label %for.cond25.preheader.preheader, label %for.inc67
 
-for.cond25.preheader:                             ; preds = %for.inc64, %for.cond22.preheader
-  %B.addr.814 = phi i64* [ %B.addr.9.lcssa, %for.inc64 ], [ %B.addr.718, %for.cond22.preheader ]
-  %r.013 = phi i64 [ %inc65, %for.inc64 ], [ 0, %for.cond22.preheader ]
+for.cond25.preheader.preheader:                   ; preds = %for.cond22.preheader
+  br label %for.cond25.preheader
+
+for.cond25.preheader:                             ; preds = %for.cond25.preheader.preheader, %for.inc64
+  %B.addr.814 = phi i64* [ %B.addr.9.lcssa, %for.inc64 ], [ %B.addr.718, %for.cond25.preheader.preheader ]
+  %r.013 = phi i64 [ %inc65, %for.inc64 ], [ 0, %for.cond25.preheader.preheader ]
   %cmp268 = icmp sgt i64 %n, 0
-  br i1 %cmp268, label %for.cond28.preheader, label %for.inc64
+  br i1 %cmp268, label %for.cond28.preheader.preheader, label %for.inc64
+
+for.cond28.preheader.preheader:                   ; preds = %for.cond25.preheader
+  br label %for.cond28.preheader
 
-for.cond28.preheader:                             ; preds = %for.inc61, %for.cond25.preheader
-  %B.addr.910 = phi i64* [ %B.addr.10.lcssa, %for.inc61 ], [ %B.addr.814, %for.cond25.preheader ]
-  %s.09 = phi i64 [ %inc62, %for.inc61 ], [ 0, %for.cond25.preheader ]
+for.cond28.preheader:                             ; preds = %for.cond28.preheader.preheader, %for.inc61
+  %B.addr.910 = phi i64* [ %B.addr.10.lcssa, %for.inc61 ], [ %B.addr.814, %for.cond28.preheader.preheader ]
+  %s.09 = phi i64 [ %inc62, %for.inc61 ], [ 0, %for.cond28.preheader.preheader ]
   %cmp294 = icmp sgt i64 %n, 0
-  br i1 %cmp294, label %for.cond31.preheader, label %for.inc61
+  br i1 %cmp294, label %for.cond31.preheader.preheader, label %for.inc61
 
-for.cond31.preheader:                             ; preds = %for.inc58, %for.cond28.preheader
-  %u.06 = phi i64 [ %inc59, %for.inc58 ], [ 0, %for.cond28.preheader ]
-  %B.addr.105 = phi i64* [ %B.addr.11.lcssa, %for.inc58 ], [ %B.addr.910, %for.cond28.preheader ]
+for.cond31.preheader.preheader:                   ; preds = %for.cond28.preheader
+  br label %for.cond31.preheader
+
+for.cond31.preheader:                             ; preds = %for.cond31.preheader.preheader, %for.inc58
+  %u.06 = phi i64 [ %inc59, %for.inc58 ], [ 0, %for.cond31.preheader.preheader ]
+  %B.addr.105 = phi i64* [ %B.addr.11.lcssa, %for.inc58 ], [ %B.addr.910, %for.cond31.preheader.preheader ]
   %cmp321 = icmp sgt i64 %n, 0
-  br i1 %cmp321, label %for.body33, label %for.inc58
+  br i1 %cmp321, label %for.body33.preheader, label %for.inc58
+
+for.body33.preheader:                             ; preds = %for.cond31.preheader
+  br label %for.body33
 
-for.body33:                                       ; preds = %for.body33, %for.cond31.preheader
-  %t.03 = phi i64 [ %inc, %for.body33 ], [ 0, %for.cond31.preheader ]
-  %B.addr.112 = phi i64* [ %incdec.ptr, %for.body33 ], [ %B.addr.105, %for.cond31.preheader ]
+for.body33:                                       ; preds = %for.body33.preheader, %for.body33
+  %t.03 = phi i64 [ %inc, %for.body33 ], [ 0, %for.body33.preheader ]
+  %B.addr.112 = phi i64* [ %incdec.ptr, %for.body33 ], [ %B.addr.105, %for.body33.preheader ]
   %add = add nsw i64 %r.013, %s.09
   %add34 = add nsw i64 %p.021, %q.017
   %mul = shl nsw i64 %l.033, 1
@@ -211,99 +291,153 @@ for.body33:                                       ; preds = %for.body33, %for.co
   %add49 = add nsw i64 %i.045, 3
   %arrayidx57 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %add49, i64 2, i64 %u.06, i64 %sub48, i64 %sub47, i64 %o.025, i64 %add45, i64 %add44
   %0 = load i64* %arrayidx57, align 8
-; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable!
-; CHECK: da analyze - split level = 3, iteration = 1!
   %incdec.ptr = getelementptr inbounds i64* %B.addr.112, i64 1
   store i64 %0, i64* %B.addr.112, align 8
   %inc = add nsw i64 %t.03, 1
-  %cmp32 = icmp slt i64 %inc, %n
-  br i1 %cmp32, label %for.body33, label %for.inc58
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body33, label %for.inc58.loopexit
+
+for.inc58.loopexit:                               ; preds = %for.body33
+  %scevgep = getelementptr i64* %B.addr.105, i64 %n
+  br label %for.inc58
 
-for.inc58:                                        ; preds = %for.body33, %for.cond31.preheader
-  %B.addr.11.lcssa = phi i64* [ %B.addr.105, %for.cond31.preheader ], [ %incdec.ptr, %for.body33 ]
+for.inc58:                                        ; preds = %for.inc58.loopexit, %for.cond31.preheader
+  %B.addr.11.lcssa = phi i64* [ %B.addr.105, %for.cond31.preheader ], [ %scevgep, %for.inc58.loopexit ]
   %inc59 = add nsw i64 %u.06, 1
-  %cmp29 = icmp slt i64 %inc59, %n
-  br i1 %cmp29, label %for.cond31.preheader, label %for.inc61
+  %exitcond48 = icmp ne i64 %inc59, %n
+  br i1 %exitcond48, label %for.cond31.preheader, label %for.inc61.loopexit
 
-for.inc61:                                        ; preds = %for.inc58, %for.cond28.preheader
-  %B.addr.10.lcssa = phi i64* [ %B.addr.910, %for.cond28.preheader ], [ %B.addr.11.lcssa, %for.inc58 ]
+for.inc61.loopexit:                               ; preds = %for.inc58
+  %B.addr.11.lcssa.lcssa = phi i64* [ %B.addr.11.lcssa, %for.inc58 ]
+  br label %for.inc61
+
+for.inc61:                                        ; preds = %for.inc61.loopexit, %for.cond28.preheader
+  %B.addr.10.lcssa = phi i64* [ %B.addr.910, %for.cond28.preheader ], [ %B.addr.11.lcssa.lcssa, %for.inc61.loopexit ]
   %inc62 = add nsw i64 %s.09, 1
-  %cmp26 = icmp slt i64 %inc62, %n
-  br i1 %cmp26, label %for.cond28.preheader, label %for.inc64
+  %exitcond49 = icmp ne i64 %inc62, %n
+  br i1 %exitcond49, label %for.cond28.preheader, label %for.inc64.loopexit
+
+for.inc64.loopexit:                               ; preds = %for.inc61
+  %B.addr.10.lcssa.lcssa = phi i64* [ %B.addr.10.lcssa, %for.inc61 ]
+  br label %for.inc64
 
-for.inc64:                                        ; preds = %for.inc61, %for.cond25.preheader
-  %B.addr.9.lcssa = phi i64* [ %B.addr.814, %for.cond25.preheader ], [ %B.addr.10.lcssa, %for.inc61 ]
+for.inc64:                                        ; preds = %for.inc64.loopexit, %for.cond25.preheader
+  %B.addr.9.lcssa = phi i64* [ %B.addr.814, %for.cond25.preheader ], [ %B.addr.10.lcssa.lcssa, %for.inc64.loopexit ]
   %inc65 = add nsw i64 %r.013, 1
-  %cmp23 = icmp slt i64 %inc65, %n
-  br i1 %cmp23, label %for.cond25.preheader, label %for.inc67
+  %exitcond50 = icmp ne i64 %inc65, %n
+  br i1 %exitcond50, label %for.cond25.preheader, label %for.inc67.loopexit
 
-for.inc67:                                        ; preds = %for.inc64, %for.cond22.preheader
-  %B.addr.8.lcssa = phi i64* [ %B.addr.718, %for.cond22.preheader ], [ %B.addr.9.lcssa, %for.inc64 ]
+for.inc67.loopexit:                               ; preds = %for.inc64
+  %B.addr.9.lcssa.lcssa = phi i64* [ %B.addr.9.lcssa, %for.inc64 ]
+  br label %for.inc67
+
+for.inc67:                                        ; preds = %for.inc67.loopexit, %for.cond22.preheader
+  %B.addr.8.lcssa = phi i64* [ %B.addr.718, %for.cond22.preheader ], [ %B.addr.9.lcssa.lcssa, %for.inc67.loopexit ]
   %inc68 = add nsw i64 %q.017, 1
-  %cmp20 = icmp slt i64 %inc68, %n
-  br i1 %cmp20, label %for.cond22.preheader, label %for.inc70
+  %exitcond51 = icmp ne i64 %inc68, %n
+  br i1 %exitcond51, label %for.cond22.preheader, label %for.inc70.loopexit
+
+for.inc70.loopexit:                               ; preds = %for.inc67
+  %B.addr.8.lcssa.lcssa = phi i64* [ %B.addr.8.lcssa, %for.inc67 ]
+  br label %for.inc70
 
-for.inc70:                                        ; preds = %for.inc67, %for.cond19.preheader
-  %B.addr.7.lcssa = phi i64* [ %B.addr.622, %for.cond19.preheader ], [ %B.addr.8.lcssa, %for.inc67 ]
+for.inc70:                                        ; preds = %for.inc70.loopexit, %for.cond19.preheader
+  %B.addr.7.lcssa = phi i64* [ %B.addr.622, %for.cond19.preheader ], [ %B.addr.8.lcssa.lcssa, %for.inc70.loopexit ]
   %inc71 = add nsw i64 %p.021, 1
-  %cmp17 = icmp slt i64 %inc71, %n
-  br i1 %cmp17, label %for.cond19.preheader, label %for.inc73
+  %exitcond52 = icmp ne i64 %inc71, %n
+  br i1 %exitcond52, label %for.cond19.preheader, label %for.inc73.loopexit
 
-for.inc73:                                        ; preds = %for.inc70, %for.cond16.preheader
-  %B.addr.6.lcssa = phi i64* [ %B.addr.526, %for.cond16.preheader ], [ %B.addr.7.lcssa, %for.inc70 ]
+for.inc73.loopexit:                               ; preds = %for.inc70
+  %B.addr.7.lcssa.lcssa = phi i64* [ %B.addr.7.lcssa, %for.inc70 ]
+  br label %for.inc73
+
+for.inc73:                                        ; preds = %for.inc73.loopexit, %for.cond16.preheader
+  %B.addr.6.lcssa = phi i64* [ %B.addr.526, %for.cond16.preheader ], [ %B.addr.7.lcssa.lcssa, %for.inc73.loopexit ]
   %inc74 = add nsw i64 %o.025, 1
-  %cmp14 = icmp slt i64 %inc74, %n
-  br i1 %cmp14, label %for.cond16.preheader, label %for.inc76
+  %exitcond53 = icmp ne i64 %inc74, %n
+  br i1 %exitcond53, label %for.cond16.preheader, label %for.inc76.loopexit
+
+for.inc76.loopexit:                               ; preds = %for.inc73
+  %B.addr.6.lcssa.lcssa = phi i64* [ %B.addr.6.lcssa, %for.inc73 ]
+  br label %for.inc76
 
-for.inc76:                                        ; preds = %for.inc73, %for.cond13.preheader
-  %B.addr.5.lcssa = phi i64* [ %B.addr.430, %for.cond13.preheader ], [ %B.addr.6.lcssa, %for.inc73 ]
+for.inc76:                                        ; preds = %for.inc76.loopexit, %for.cond13.preheader
+  %B.addr.5.lcssa = phi i64* [ %B.addr.430, %for.cond13.preheader ], [ %B.addr.6.lcssa.lcssa, %for.inc76.loopexit ]
   %inc77 = add nsw i64 %m.029, 1
-  %cmp11 = icmp slt i64 %inc77, %n
-  br i1 %cmp11, label %for.cond13.preheader, label %for.inc79
+  %exitcond54 = icmp ne i64 %inc77, %n
+  br i1 %exitcond54, label %for.cond13.preheader, label %for.inc79.loopexit
+
+for.inc79.loopexit:                               ; preds = %for.inc76
+  %B.addr.5.lcssa.lcssa = phi i64* [ %B.addr.5.lcssa, %for.inc76 ]
+  br label %for.inc79
 
-for.inc79:                                        ; preds = %for.inc76, %for.cond10.preheader
-  %B.addr.4.lcssa = phi i64* [ %B.addr.334, %for.cond10.preheader ], [ %B.addr.5.lcssa, %for.inc76 ]
+for.inc79:                                        ; preds = %for.inc79.loopexit, %for.cond10.preheader
+  %B.addr.4.lcssa = phi i64* [ %B.addr.334, %for.cond10.preheader ], [ %B.addr.5.lcssa.lcssa, %for.inc79.loopexit ]
   %inc80 = add nsw i64 %l.033, 1
-  %cmp8 = icmp slt i64 %inc80, %n
-  br i1 %cmp8, label %for.cond10.preheader, label %for.inc82
+  %exitcond55 = icmp ne i64 %inc80, %n
+  br i1 %exitcond55, label %for.cond10.preheader, label %for.inc82.loopexit
 
-for.inc82:                                        ; preds = %for.inc79, %for.cond7.preheader
-  %B.addr.3.lcssa = phi i64* [ %B.addr.238, %for.cond7.preheader ], [ %B.addr.4.lcssa, %for.inc79 ]
+for.inc82.loopexit:                               ; preds = %for.inc79
+  %B.addr.4.lcssa.lcssa = phi i64* [ %B.addr.4.lcssa, %for.inc79 ]
+  br label %for.inc82
+
+for.inc82:                                        ; preds = %for.inc82.loopexit, %for.cond7.preheader
+  %B.addr.3.lcssa = phi i64* [ %B.addr.238, %for.cond7.preheader ], [ %B.addr.4.lcssa.lcssa, %for.inc82.loopexit ]
   %inc83 = add nsw i64 %k.037, 1
-  %cmp5 = icmp slt i64 %inc83, %n
-  br i1 %cmp5, label %for.cond7.preheader, label %for.inc85
+  %exitcond56 = icmp ne i64 %inc83, %n
+  br i1 %exitcond56, label %for.cond7.preheader, label %for.inc85.loopexit
+
+for.inc85.loopexit:                               ; preds = %for.inc82
+  %B.addr.3.lcssa.lcssa = phi i64* [ %B.addr.3.lcssa, %for.inc82 ]
+  br label %for.inc85
 
-for.inc85:                                        ; preds = %for.inc82, %for.cond4.preheader
-  %B.addr.2.lcssa = phi i64* [ %B.addr.142, %for.cond4.preheader ], [ %B.addr.3.lcssa, %for.inc82 ]
+for.inc85:                                        ; preds = %for.inc85.loopexit, %for.cond4.preheader
+  %B.addr.2.lcssa = phi i64* [ %B.addr.142, %for.cond4.preheader ], [ %B.addr.3.lcssa.lcssa, %for.inc85.loopexit ]
   %inc86 = add nsw i64 %j.041, 1
-  %cmp2 = icmp slt i64 %inc86, %n
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc88
+  %exitcond57 = icmp ne i64 %inc86, %n
+  br i1 %exitcond57, label %for.cond4.preheader, label %for.inc88.loopexit
 
-for.inc88:                                        ; preds = %for.inc85, %for.cond1.preheader
-  %B.addr.1.lcssa = phi i64* [ %B.addr.046, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc85 ]
+for.inc88.loopexit:                               ; preds = %for.inc85
+  %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc85 ]
+  br label %for.inc88
+
+for.inc88:                                        ; preds = %for.inc88.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.046, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc88.loopexit ]
   %inc89 = add nsw i64 %i.045, 1
-  %cmp = icmp slt i64 %inc89, %n
-  br i1 %cmp, label %for.cond1.preheader, label %for.end90
+  %exitcond58 = icmp ne i64 %inc89, %n
+  br i1 %exitcond58, label %for.cond1.preheader, label %for.end90.loopexit
+
+for.end90.loopexit:                               ; preds = %for.inc88
+  br label %for.end90
 
-for.end90:                                        ; preds = %for.inc88, %entry
+for.end90:                                        ; preds = %for.end90.loopexit, %entry
   ret void
 }
 
 
-;; cleanup around chars, shorts, ints
-;;void p4(int *A, int *B, long int n)
-;;  for (char i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;void p4(int *A, int *B, long int n) {
+;;  for (char i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @p4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp sgt i64 %n, 0
-  br i1 %cmp1, label %for.body, label %for.end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i8 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - output [*]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv2 = sext i8 %i.03 to i32
   %conv3 = sext i8 %i.03 to i64
   %add = add i64 %conv3, 2
@@ -312,32 +446,44 @@ for.body:                                         ; preds = %for.body, %entry
   %idxprom4 = sext i8 %i.03 to i64
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i8 %i.03, 1
   %conv = sext i8 %inc to i64
   %cmp = icmp slt i64 %conv, %n
-  br i1 %cmp, label %for.body, label %for.end
+  br i1 %cmp, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;void p5(int *A, int *B, long int n)
-;;  for (short i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;void p5(int *A, int *B, long int n) {
+;;  for (short i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @p5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp sgt i64 %n, 0
-  br i1 %cmp1, label %for.body, label %for.end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - output [*]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i16 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv2 = sext i16 %i.03 to i32
   %conv3 = sext i16 %i.03 to i64
   %add = add i64 %conv3, 2
@@ -346,124 +492,208 @@ for.body:                                         ; preds = %for.body, %entry
   %idxprom4 = sext i16 %i.03 to i64
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i16 %i.03, 1
   %conv = sext i16 %inc to i64
   %cmp = icmp slt i64 %conv, %n
-  br i1 %cmp, label %for.body, label %for.end
+  br i1 %cmp, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;void p6(int *A, int *B, long int n)
-;;  for (int i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;void p6(int *A, int *B, long int n) {
+;;  for (int i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @p6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp sgt i64 %n, 0
-  br i1 %cmp1, label %for.body, label %for.end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %add = add nsw i32 %i.03, 2
-  %idxprom = sext i32 %add to i64
-  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
-  store i32 %i.03, i32* %arrayidx, align 4
-  %idxprom2 = sext i32 %i.03 to i64
-  %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
-  %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+  %0 = add nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %0
+  %1 = trunc i64 %indvars.iv to i32
+  store i32 %1, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-  store i32 %0, i32* %B.addr.02, align 4
-  %inc = add nsw i32 %i.03, 1
-  %conv = sext i32 %inc to i64
-  %cmp = icmp slt i64 %conv, %n
-  br i1 %cmp, label %for.body, label %for.end
+  store i32 %2, i32* %B.addr.02, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;void p7(unsigned *A, unsigned *B,  char n)
-;;  A[n] = ...
-;;  ... = A[n + 1];
+;;void p7(unsigned *A, unsigned *B,  char n) {
+;;  A[n] = 0;
+;;  *B = A[n + 1];
 
 define void @p7(i32* %A, i32* %B, i8 signext %n) nounwind uwtable ssp {
 entry:
   %idxprom = sext i8 %n to i64
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   store i32 0, i32* %arrayidx, align 4
   %conv = sext i8 %n to i64
   %add = add i64 %conv, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   store i32 %0, i32* %B, align 4
   ret void
 }
 
 
-
-;;void p8(unsigned *A, unsigned *B,  short n)
-;;  A[n] = ...
-;;  ... = A[n + 1];
+;;void p8(unsigned *A, unsigned *B,  short n) {
+;;  A[n] = 0;
+;;  *B = A[n + 1];
 
 define void @p8(i32* %A, i32* %B, i16 signext %n) nounwind uwtable ssp {
 entry:
   %idxprom = sext i16 %n to i64
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
   store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %conv = sext i16 %n to i64
   %add = add i64 %conv, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   store i32 %0, i32* %B, align 4
   ret void
 }
 
 
-;;void p9(unsigned *A, unsigned *B,  int n)
-;;  A[n] = ...
-;;  ... = A[n + 1];
+;;void p9(unsigned *A, unsigned *B,  int n) {
+;;  A[n] = 0;
+;;  *B = A[n + 1];
 
 define void @p9(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
   store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %add = add nsw i32 %n, 1
   %idxprom1 = sext i32 %add to i64
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   store i32 %0, i32* %B, align 4
   ret void
 }
 
 
-;;void p10(unsigned *A, unsigned *B,  unsigned n)
-;;  A[n] = ...
-;;  ... = A[n + 1];
+;;void p10(unsigned *A, unsigned *B,  unsigned n) {
+;;  A[n] = 0;
+;;  *B = A[n + 1];
 
 define void @p10(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   %idxprom = zext i32 %n to i64
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
   store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %add = add i32 %n, 1
   %idxprom1 = zext i32 %add to i64
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   store i32 %0, i32* %B, align 4
   ret void
 }
+
+
+;;typedef struct { int v; } S;
+;;
+;;void f(S *s, unsigned size) {
+;;  S *i = s, *e = s + size - 1;
+;;  while (i != e) {
+;;    *i = *(i + 1);
+;;    ++i;
+
+%struct.S = type { i32 }
+
+define void @f(%struct.S* %s, i32 %size) nounwind uwtable ssp {
+entry:
+  %idx.ext = zext i32 %size to i64
+  %add.ptr.sum = add i64 %idx.ext, -1
+  %add.ptr1 = getelementptr inbounds %struct.S* %s, i64 %add.ptr.sum
+  %cmp1 = icmp eq i64 %add.ptr.sum, 0
+  br i1 %cmp1, label %while.end, label %while.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1]!
+; CHECK: da analyze - none!
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %i.02 = phi %struct.S* [ %incdec.ptr, %while.body ], [ %s, %while.body.preheader ]
+  %0 = getelementptr inbounds %struct.S* %i.02, i64 1, i32 0
+  %1 = load i32* %0, align 4
+  %2 = getelementptr inbounds %struct.S* %i.02, i64 0, i32 0
+  store i32 %1, i32* %2, align 4
+  %incdec.ptr = getelementptr inbounds %struct.S* %i.02, i64 1
+  %cmp = icmp eq %struct.S* %incdec.ptr, %add.ptr1
+  br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Analysis/DependenceAnalysis/Propagating.ll b/test/Analysis/DependenceAnalysis/Propagating.ll
index 076348c68dc8..f9034ede9d0b 100644
--- a/test/Analysis/DependenceAnalysis/Propagating.ll
+++ b/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
+;;    for (long int j = 0; j < 100; j++) {
 ;;      A[i + 1][i + j] = i;
 ;;      *B++ = A[i][i + j];
 
@@ -14,12 +14,19 @@ define void @prop0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc9, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [1 -1]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc9
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -30,17 +37,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add6 = add nsw i64 %i.03, %j.02
   %arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6
   %0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - consistent flow [1 -1]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc9
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc9
 
 for.inc9:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc10 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc10, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+  %exitcond5 = icmp ne i64 %inc10, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end11
 
 for.end11:                                        ; preds = %for.inc9
   ret void
@@ -49,25 +56,32 @@ for.end11:                                        ; preds = %for.inc9
 
 ;;  for (long int i = 0; i < 100; i++)
 ;;    for (long int j = 0; j < 100; j++)
-;;      for (long int k = 0; k < 100; k++)
-;;        A[j - i][i + 1][j + k] = ...
-;;        ... = A[j - i][i][j + k];
+;;      for (long int k = 0; k < 100; k++) {
+;;        A[j - i][i + 1][j + k] = i;
+;;        *B++ = A[j - i][i][j + k];
 
 define void @prop1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc18, %entry
-  %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc18 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [1 1 -1]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc18
+  %B.addr.06 = phi i32* [ %B, %entry ], [ %scevgep7, %for.inc18 ]
   %i.05 = phi i64 [ 0, %entry ], [ %inc19, %for.inc18 ]
   br label %for.cond4.preheader
 
-for.cond4.preheader:                              ; preds = %for.inc15, %for.cond1.preheader
-  %B.addr.14 = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.inc15 ]
+for.cond4.preheader:                              ; preds = %for.cond1.preheader, %for.inc15
+  %B.addr.14 = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %scevgep, %for.inc15 ]
   %j.03 = phi i64 [ 0, %for.cond1.preheader ], [ %inc16, %for.inc15 ]
   br label %for.body6
 
-for.body6:                                        ; preds = %for.body6, %for.cond4.preheader
+for.body6:                                        ; preds = %for.cond4.preheader, %for.body6
   %k.02 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
   %B.addr.21 = phi i32* [ %B.addr.14, %for.cond4.preheader ], [ %incdec.ptr, %for.body6 ]
   %conv = trunc i64 %i.05 to i32
@@ -80,22 +94,23 @@ for.body6:                                        ; preds = %for.body6, %for.con
   %sub11 = sub nsw i64 %j.03, %i.05
   %arrayidx14 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub11, i64 %i.05, i64 %add10
   %0 = load i32* %arrayidx14, align 4
-; CHECK: da analyze - consistent flow [1 1 -1]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.21, i64 1
   store i32 %0, i32* %B.addr.21, align 4
   %inc = add nsw i64 %k.02, 1
-  %cmp5 = icmp slt i64 %inc, 100
-  br i1 %cmp5, label %for.body6, label %for.inc15
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body6, label %for.inc15
 
 for.inc15:                                        ; preds = %for.body6
+  %scevgep = getelementptr i32* %B.addr.14, i64 100
   %inc16 = add nsw i64 %j.03, 1
-  %cmp2 = icmp slt i64 %inc16, 100
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc18
+  %exitcond8 = icmp ne i64 %inc16, 100
+  br i1 %exitcond8, label %for.cond4.preheader, label %for.inc18
 
 for.inc18:                                        ; preds = %for.inc15
+  %scevgep7 = getelementptr i32* %B.addr.06, i64 10000
   %inc19 = add nsw i64 %i.05, 1
-  %cmp = icmp slt i64 %inc19, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end20
+  %exitcond9 = icmp ne i64 %inc19, 100
+  br i1 %exitcond9, label %for.cond1.preheader, label %for.end20
 
 for.end20:                                        ; preds = %for.inc18
   ret void
@@ -103,20 +118,27 @@ for.end20:                                        ; preds = %for.inc18
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[i - 1][2*i] = ...
-;;      ... = A[i][i + j + 110];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[i - 1][2*i] = i;
+;;      *B++ = A[i][i + j + 110];
 
 define void @prop2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc8, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc8 ]
+; CHECK: da analyze - consistent output [0 S]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc8
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -128,17 +150,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add5 = add nsw i64 %add, 110
   %arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add5
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc8
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc8
 
 for.inc8:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc9 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc9, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end10
+  %exitcond5 = icmp ne i64 %inc9, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
 
 for.end10:                                        ; preds = %for.inc8
   ret void
@@ -146,20 +168,27 @@ for.end10:                                        ; preds = %for.inc8
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[i][2*j + i] = ...
-;;      ... = A[i][2*j - i + 5];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[i][2*j + i] = i;
+;;      *B++ = A[i][2*j - i + 5];
 
 define void @prop3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc9, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc9
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -172,39 +201,45 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add6 = add nsw i64 %sub, 5
   %arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6
   %0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc9
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc9
 
 for.inc9:                                         ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc10 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc10, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+  %exitcond5 = icmp ne i64 %inc10, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end11
 
 for.end11:                                        ; preds = %for.inc9
   ret void
 }
 
 
-;; propagate Distance
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[i + 2][2*i + j + 1] = ...
-;;      ... = A[i][2*i + j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[i + 2][2*i + j + 1] = i;
+;;      *B++ = A[i][2*i + j];
 
 define void @prop4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc11, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc11 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [2 -3]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc11
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc11 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc12, %for.inc11 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -218,39 +253,46 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add8 = add nsw i64 %mul7, %j.02
   %arrayidx10 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add8
   %0 = load i32* %arrayidx10, align 4
-; CHECK: da analyze - consistent flow [2 -3]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc11
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc11
 
 for.inc11:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc12 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc12, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end13
+  %exitcond5 = icmp ne i64 %inc12, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end13
 
 for.end13:                                        ; preds = %for.inc11
   ret void
 }
 
 
-;; propagate Point
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[3*i - 18][22 - i][2*i + j] = ...
-;;      ... = A[i][i][3*i + j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[3*i - 18][22 - i][2*i + j] = i;
+;;      *B++ = A[i][i][3*i + j];
 
 define void @prop5([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc13, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc13 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [< -16] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc13
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc13 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -265,40 +307,45 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add9 = add nsw i64 %mul8, %j.02
   %arrayidx12 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.03, i64 %i.03, i64 %add9
   %0 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - flow [< -16] splitable!
-; CHECK: da analyze - split level = 1, iteration = 11!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc13
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc13
 
 for.inc13:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc14 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc14, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end15
+  %exitcond5 = icmp ne i64 %inc14, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end15
 
 for.end15:                                        ; preds = %for.inc13
   ret void
 }
 
 
-;; propagate Line
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[i + 1][4*i + j + 2] = ...
-;;      ... = A[2*i][8*i + j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[i + 1][4*i + j + 2] = i;
+;;      *B++ = A[2*i][8*i + j];
 
 define void @prop6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc12, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc12 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=> -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc12
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc12 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc13, %for.inc12 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -313,17 +360,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %mul9 = shl nsw i64 %i.03, 1
   %arrayidx11 = getelementptr inbounds [100 x i32]* %A, i64 %mul9, i64 %add8
   %0 = load i32* %arrayidx11, align 4
-; CHECK: da analyze - flow [=> -2]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc12
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc12
 
 for.inc12:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc13 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc13, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end14
+  %exitcond5 = icmp ne i64 %inc13, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end14
 
 for.end14:                                        ; preds = %for.inc12
   ret void
@@ -331,20 +378,28 @@ for.end14:                                        ; preds = %for.inc12
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[2*i + 4][-5*i + j + 2] = ...
-;;      ... = A[-2*i + 20][5*i + j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[2*i + 4][-5*i + j + 2] = i;
+;;      *B++ = A[-2*i + 20][5*i + j];
 
 define void @prop7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc14, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc14 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [* -38] splitable!
+; CHECK: da analyze - split level = 1, iteration = 4!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc14
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc14 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc15, %for.inc14 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -361,18 +416,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add11 = add nsw i64 %mul10, 20
   %arrayidx13 = getelementptr inbounds [100 x i32]* %A, i64 %add11, i64 %add9
   %0 = load i32* %arrayidx13, align 4
-; CHECK: da analyze - flow [* -38] splitable!
-; CHECK: da analyze - split level = 1, iteration = 4!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc14
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc14
 
 for.inc14:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc15 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc15, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end16
+  %exitcond5 = icmp ne i64 %inc15, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end16
 
 for.end16:                                        ; preds = %for.inc14
   ret void
@@ -380,20 +434,27 @@ for.end16:                                        ; preds = %for.inc14
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[4][j + 2] = ...
-;;      ... = A[-2*i + 4][5*i + j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[4][j + 2] = i;
+;;      *B++ = A[-2*i + 4][5*i + j];
 
 define void @prop8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc10, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ]
+; CHECK: da analyze - consistent output [S 0]!
+; CHECK: da analyze - flow [p<= 2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc10
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc10 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -406,17 +467,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %add7 = add nsw i64 %mul6, 4
   %arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 %add7, i64 %add5
   %0 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - flow [p<= 2]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc10
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc10
 
 for.inc10:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc11 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc11, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+  %exitcond5 = icmp ne i64 %inc11, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end12
 
 for.end12:                                        ; preds = %for.inc10
   ret void
@@ -424,20 +485,27 @@ for.end12:                                        ; preds = %for.inc10
 
 
 ;;  for (long int i = 0; i < 100; i++)
-;;    for (long int j = 0; j < 100; j++)
-;;      A[2*i + 4][5*i + j + 2] = ...
-;;      ... = A[4][j];
+;;    for (long int j = 0; j < 100; j++) {
+;;      A[2*i + 4][5*i + j + 2] = i;
+;;      *B++ = A[4][j];
 
 define void @prop9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc10, %entry
-  %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [p<= 2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S 0]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc10
+  %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc10 ]
   %i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
   br label %for.body3
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
   %j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
   %conv = trunc i64 %i.03 to i32
@@ -450,17 +518,17 @@ for.body3:                                        ; preds = %for.body3, %for.con
   store i32 %conv, i32* %arrayidx7, align 4
   %arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 4, i64 %j.02
   %0 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - flow [p<= 2]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc, 100
-  br i1 %cmp2, label %for.body3, label %for.inc10
+  %exitcond = icmp ne i64 %inc, 100
+  br i1 %exitcond, label %for.body3, label %for.inc10
 
 for.inc10:                                        ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.04, i64 100
   %inc11 = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc11, 100
-  br i1 %cmp, label %for.cond1.preheader, label %for.end12
+  %exitcond5 = icmp ne i64 %inc11, 100
+  br i1 %exitcond5, label %for.cond1.preheader, label %for.end12
 
 for.end12:                                        ; preds = %for.inc10
   ret void
diff --git a/test/Analysis/DependenceAnalysis/Separability.ll b/test/Analysis/DependenceAnalysis/Separability.ll
index d42d3cdb39e5..3dcaaec2ae85 100644
--- a/test/Analysis/DependenceAnalysis/Separability.ll
+++ b/test/Analysis/DependenceAnalysis/Separability.ll
@@ -8,30 +8,37 @@ target triple = "x86_64-apple-macosx10.6.0"
 ;;  for (long int i = 0; i < 50; i++)
 ;;    for (long int j = 0; j < 50; j++)
 ;;      for (long int k = 0; k < 50; k++)
-;;        for (long int l = 0; l < 50; l++)
-;;          A[n][i][j + k] = ...
-;;          ... = A[10][i + 10][2*j - l];
+;;        for (long int l = 0; l < 50; l++) {
+;;          A[n][i][j + k] = i;
+;;          *B++ = A[10][i + 10][2*j - l];
 
 define void @sep0([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc22, %entry
-  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ]
+; CHECK: da analyze - output [0 * * S]!
+; CHECK: da analyze - flow [-10 * * *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * S *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc22
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc22 ]
   %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ]
   br label %for.cond4.preheader
 
-for.cond4.preheader:                              ; preds = %for.inc19, %for.cond1.preheader
-  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ]
+for.cond4.preheader:                              ; preds = %for.cond1.preheader, %for.inc19
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc19 ]
   %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ]
   br label %for.cond7.preheader
 
-for.cond7.preheader:                              ; preds = %for.inc16, %for.cond4.preheader
-  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ]
+for.cond7.preheader:                              ; preds = %for.cond4.preheader, %for.inc16
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc16 ]
   %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ]
   br label %for.body9
 
-for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+for.body9:                                        ; preds = %for.cond7.preheader, %for.body9
   %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
   %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
   %conv = trunc i64 %i.07 to i32
@@ -44,27 +51,29 @@ for.body9:                                        ; preds = %for.body9, %for.con
   %add12 = add nsw i64 %i.07, 10
   %arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
   %0 = load i32* %arrayidx15, align 4
-; CHECK: da analyze - flow [-10 * * *]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
-  %cmp8 = icmp slt i64 %inc, 50
-  br i1 %cmp8, label %for.body9, label %for.inc16
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body9, label %for.inc16
 
 for.inc16:                                        ; preds = %for.body9
+  %scevgep = getelementptr i32* %B.addr.24, i64 50
   %inc17 = add nsw i64 %k.03, 1
-  %cmp5 = icmp slt i64 %inc17, 50
-  br i1 %cmp5, label %for.cond7.preheader, label %for.inc19
+  %exitcond10 = icmp ne i64 %inc17, 50
+  br i1 %exitcond10, label %for.cond7.preheader, label %for.inc19
 
 for.inc19:                                        ; preds = %for.inc16
+  %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
   %inc20 = add nsw i64 %j.05, 1
-  %cmp2 = icmp slt i64 %inc20, 50
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc22
+  %exitcond12 = icmp ne i64 %inc20, 50
+  br i1 %exitcond12, label %for.cond4.preheader, label %for.inc22
 
 for.inc22:                                        ; preds = %for.inc19
+  %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
   %inc23 = add nsw i64 %i.07, 1
-  %cmp = icmp slt i64 %inc23, 50
-  br i1 %cmp, label %for.cond1.preheader, label %for.end24
+  %exitcond13 = icmp ne i64 %inc23, 50
+  br i1 %exitcond13, label %for.cond1.preheader, label %for.end24
 
 for.end24:                                        ; preds = %for.inc22
   ret void
@@ -74,30 +83,37 @@ for.end24:                                        ; preds = %for.inc22
 ;;  for (long int i = 0; i < 50; i++)
 ;;    for (long int j = 0; j < 50; j++)
 ;;      for (long int k = 0; k < 50; k++)
-;;        for (long int l = 0; l < 50; l++)
-;;          A[i][i][j + k] = ...
-;;          ... = A[10][i + 10][2*j - l];
+;;        for (long int l = 0; l < 50; l++) {
+;;          A[i][i][j + k] = i;
+;;          *B++ = A[10][i + 10][2*j - l];
 
 define void @sep1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc22, %entry
-  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ]
+; CHECK: da analyze - output [0 * * S]!
+; CHECK: da analyze - flow [> * * *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * S *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc22
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc22 ]
   %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ]
   br label %for.cond4.preheader
 
-for.cond4.preheader:                              ; preds = %for.inc19, %for.cond1.preheader
-  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ]
+for.cond4.preheader:                              ; preds = %for.cond1.preheader, %for.inc19
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc19 ]
   %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ]
   br label %for.cond7.preheader
 
-for.cond7.preheader:                              ; preds = %for.inc16, %for.cond4.preheader
-  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ]
+for.cond7.preheader:                              ; preds = %for.cond4.preheader, %for.inc16
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc16 ]
   %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ]
   br label %for.body9
 
-for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+for.body9:                                        ; preds = %for.cond7.preheader, %for.body9
   %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
   %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
   %conv = trunc i64 %i.07 to i32
@@ -109,27 +125,29 @@ for.body9:                                        ; preds = %for.body9, %for.con
   %add12 = add nsw i64 %i.07, 10
   %arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
   %0 = load i32* %arrayidx15, align 4
-; CHECK: da analyze - flow [> * * *]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
-  %cmp8 = icmp slt i64 %inc, 50
-  br i1 %cmp8, label %for.body9, label %for.inc16
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body9, label %for.inc16
 
 for.inc16:                                        ; preds = %for.body9
+  %scevgep = getelementptr i32* %B.addr.24, i64 50
   %inc17 = add nsw i64 %k.03, 1
-  %cmp5 = icmp slt i64 %inc17, 50
-  br i1 %cmp5, label %for.cond7.preheader, label %for.inc19
+  %exitcond10 = icmp ne i64 %inc17, 50
+  br i1 %exitcond10, label %for.cond7.preheader, label %for.inc19
 
 for.inc19:                                        ; preds = %for.inc16
+  %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
   %inc20 = add nsw i64 %j.05, 1
-  %cmp2 = icmp slt i64 %inc20, 50
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc22
+  %exitcond12 = icmp ne i64 %inc20, 50
+  br i1 %exitcond12, label %for.cond4.preheader, label %for.inc22
 
 for.inc22:                                        ; preds = %for.inc19
+  %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
   %inc23 = add nsw i64 %i.07, 1
-  %cmp = icmp slt i64 %inc23, 50
-  br i1 %cmp, label %for.cond1.preheader, label %for.end24
+  %exitcond13 = icmp ne i64 %inc23, 50
+  br i1 %exitcond13, label %for.cond1.preheader, label %for.end24
 
 for.end24:                                        ; preds = %for.inc22
   ret void
@@ -139,30 +157,37 @@ for.end24:                                        ; preds = %for.inc22
 ;;  for (long int i = 0; i < 50; i++)
 ;;    for (long int j = 0; j < 50; j++)
 ;;      for (long int k = 0; k < 50; k++)
-;;        for (long int l = 0; l < 50; l++)
-;;          A[i][i][i + k][l] = ...
-;;          ... = A[10][i + 10][j + k][l + 10];
+;;        for (long int l = 0; l < 50; l++) {
+;;          A[i][i][i + k][l] = i;
+;;          *B++ = A[10][i + 10][j + k][l + 10];
 
 define void @sep2([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc26, %entry
-  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc26 ]
+; CHECK: da analyze - consistent output [0 S 0 0]!
+; CHECK: da analyze - flow [> * * -10]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * * 0]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc26
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc26 ]
   %i.07 = phi i64 [ 0, %entry ], [ %inc27, %for.inc26 ]
   br label %for.cond4.preheader
 
-for.cond4.preheader:                              ; preds = %for.inc23, %for.cond1.preheader
-  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc23 ]
+for.cond4.preheader:                              ; preds = %for.cond1.preheader, %for.inc23
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc23 ]
   %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc24, %for.inc23 ]
   br label %for.cond7.preheader
 
-for.cond7.preheader:                              ; preds = %for.inc20, %for.cond4.preheader
-  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc20 ]
+for.cond7.preheader:                              ; preds = %for.cond4.preheader, %for.inc20
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc20 ]
   %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc21, %for.inc20 ]
   br label %for.body9
 
-for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+for.body9:                                        ; preds = %for.cond7.preheader, %for.body9
   %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
   %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
   %conv = trunc i64 %i.07 to i32
@@ -174,27 +199,29 @@ for.body9:                                        ; preds = %for.body9, %for.con
   %add15 = add nsw i64 %i.07, 10
   %arrayidx19 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add15, i64 %add14, i64 %add13
   %0 = load i32* %arrayidx19, align 4
-; CHECK: da analyze - flow [> * * -10]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
-  %cmp8 = icmp slt i64 %inc, 50
-  br i1 %cmp8, label %for.body9, label %for.inc20
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body9, label %for.inc20
 
 for.inc20:                                        ; preds = %for.body9
+  %scevgep = getelementptr i32* %B.addr.24, i64 50
   %inc21 = add nsw i64 %k.03, 1
-  %cmp5 = icmp slt i64 %inc21, 50
-  br i1 %cmp5, label %for.cond7.preheader, label %for.inc23
+  %exitcond10 = icmp ne i64 %inc21, 50
+  br i1 %exitcond10, label %for.cond7.preheader, label %for.inc23
 
 for.inc23:                                        ; preds = %for.inc20
+  %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
   %inc24 = add nsw i64 %j.05, 1
-  %cmp2 = icmp slt i64 %inc24, 50
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc26
+  %exitcond12 = icmp ne i64 %inc24, 50
+  br i1 %exitcond12, label %for.cond4.preheader, label %for.inc26
 
 for.inc26:                                        ; preds = %for.inc23
+  %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
   %inc27 = add nsw i64 %i.07, 1
-  %cmp = icmp slt i64 %inc27, 50
-  br i1 %cmp, label %for.cond1.preheader, label %for.end28
+  %exitcond13 = icmp ne i64 %inc27, 50
+  br i1 %exitcond13, label %for.cond1.preheader, label %for.end28
 
 for.end28:                                        ; preds = %for.inc26
   ret void
@@ -204,30 +231,37 @@ for.end28:                                        ; preds = %for.inc26
 ;;  for (long int i = 0; i < 50; i++)
 ;;    for (long int j = 0; j < 50; j++)
 ;;      for (long int k = 0; k < 50; k++)
-;;        for (long int l = 0; l < 50; l++)
-;;          A[i][i][i + k][l + k] = ...
-;;          ... = A[10][i + 10][j + k][l + 10];
+;;        for (long int l = 0; l < 50; l++) {
+;;          A[i][i][i + k][l + k] = i;
+;;          *B++ = A[10][i + 10][j + k][l + 10];
 
 define void @sep3([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc27, %entry
-  %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc27 ]
+; CHECK: da analyze - consistent output [0 S 0 0]!
+; CHECK: da analyze - flow [> * * *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * * 0]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc27
+  %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc27 ]
   %i.07 = phi i64 [ 0, %entry ], [ %inc28, %for.inc27 ]
   br label %for.cond4.preheader
 
-for.cond4.preheader:                              ; preds = %for.inc24, %for.cond1.preheader
-  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc24 ]
+for.cond4.preheader:                              ; preds = %for.cond1.preheader, %for.inc24
+  %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc24 ]
   %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc25, %for.inc24 ]
   br label %for.cond7.preheader
 
-for.cond7.preheader:                              ; preds = %for.inc21, %for.cond4.preheader
-  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc21 ]
+for.cond7.preheader:                              ; preds = %for.cond4.preheader, %for.inc21
+  %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc21 ]
   %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc22, %for.inc21 ]
   br label %for.body9
 
-for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
+for.body9:                                        ; preds = %for.cond7.preheader, %for.body9
   %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
   %B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
   %conv = trunc i64 %i.07 to i32
@@ -240,27 +274,29 @@ for.body9:                                        ; preds = %for.body9, %for.con
   %add16 = add nsw i64 %i.07, 10
   %arrayidx20 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add16, i64 %add15, i64 %add14
   %0 = load i32* %arrayidx20, align 4
-; CHECK: da analyze - flow [> * * *]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
-  %cmp8 = icmp slt i64 %inc, 50
-  br i1 %cmp8, label %for.body9, label %for.inc21
+  %exitcond = icmp ne i64 %inc, 50
+  br i1 %exitcond, label %for.body9, label %for.inc21
 
 for.inc21:                                        ; preds = %for.body9
+  %scevgep = getelementptr i32* %B.addr.24, i64 50
   %inc22 = add nsw i64 %k.03, 1
-  %cmp5 = icmp slt i64 %inc22, 50
-  br i1 %cmp5, label %for.cond7.preheader, label %for.inc24
+  %exitcond10 = icmp ne i64 %inc22, 50
+  br i1 %exitcond10, label %for.cond7.preheader, label %for.inc24
 
 for.inc24:                                        ; preds = %for.inc21
+  %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
   %inc25 = add nsw i64 %j.05, 1
-  %cmp2 = icmp slt i64 %inc25, 50
-  br i1 %cmp2, label %for.cond4.preheader, label %for.inc27
+  %exitcond12 = icmp ne i64 %inc25, 50
+  br i1 %exitcond12, label %for.cond4.preheader, label %for.inc27
 
 for.inc27:                                        ; preds = %for.inc24
+  %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
   %inc28 = add nsw i64 %i.07, 1
-  %cmp = icmp slt i64 %inc28, 50
-  br i1 %cmp, label %for.cond1.preheader, label %for.end29
+  %exitcond13 = icmp ne i64 %inc28, 50
+  br i1 %exitcond13, label %for.cond1.preheader, label %for.end29
 
 for.end29:                                        ; preds = %for.inc27
   ret void
diff --git a/test/Analysis/DependenceAnalysis/StrongSIV.ll b/test/Analysis/DependenceAnalysis/StrongSIV.ll
index be336c3580ce..f499e84d4844 100644
--- a/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -1,143 +1,196 @@
-; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
 
 ; ModuleID = 'StrongSIV.bc'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;;  for (int i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;  for (int i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @strong0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp sgt i64 %n, 0
-  br i1 %cmp1, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %add = add nsw i32 %i.03, 2
-  %idxprom = sext i32 %add to i64
-  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
-  store i32 %i.03, i32* %arrayidx, align 4
-  %idxprom2 = sext i32 %i.03 to i64
-  %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
-  %0 = load i32* %arrayidx3, align 4
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+  %0 = add nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %0
+  %1 = trunc i64 %indvars.iv to i32
+  store i32 %1, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-  store i32 %0, i32* %B.addr.02, align 4
-  %inc = add nsw i32 %i.03, 1
-  %conv = sext i32 %inc to i64
-  %cmp = icmp slt i64 %conv, %n
-  br i1 %cmp, label %for.body, label %for.end
+  store i32 %2, i32* %B.addr.02, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @strong1(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
-  %conv = sext i32 %n to i64
   %cmp1 = icmp sgt i32 %n, 0
-  br i1 %cmp1, label %for.body, label %for.end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  %0 = sext i32 %n to i64
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv2 = trunc i64 %i.03 to i32
   %add = add nsw i64 %i.03, 2
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv2, i32* %arrayidx, align 4
   %arrayidx3 = getelementptr inbounds i32* %A, i64 %i.03
-  %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - consistent flow [2]!
+  %1 = load i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-  store i32 %0, i32* %B.addr.02, align 4
+  store i32 %1, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp slt i64 %inc, %conv
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %0
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @strong2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %add = add i64 %i.03, 2
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.03
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - consistent flow [2]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (int i = 0; i < n; i++)
-;;    A[i + 2] = ...
-;;    ... = A[i];
+;;  for (int i = 0; i < n; i++) {
+;;    A[i + 2] = i;
+;;    *B++ = A[i];
 
 define void @strong3(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
-  br i1 %cmp1, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %add = add nsw i32 %i.03, 2
-  %idxprom = sext i32 %add to i64
-  %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
-  store i32 %i.03, i32* %arrayidx, align 4
-  %idxprom1 = sext i32 %i.03 to i64
-  %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
-  %0 = load i32* %arrayidx2, align 4
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+  %0 = add nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %0
+  %1 = trunc i64 %indvars.iv to i32
+  store i32 %1, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %2 = load i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-  store i32 %0, i32* %B.addr.02, align 4
-  %inc = add nsw i32 %i.03, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  store i32 %2, i32* %B.addr.02, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 19; i++)
-;;    A[i + 19] = ...
-;;    ... = A[i];
+;;  for (long unsigned i = 0; i < 19; i++) {
+;;    A[i + 19] = i;
+;;    *B++ = A[i];
 
 define void @strong4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -146,27 +199,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 19
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 19
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 20; i++)
-;;    A[i + 19] = ...
-;;    ... = A[i];
+;;  for (long unsigned i = 0; i < 20; i++) {
+;;    A[i + 19] = i;
+;;    *B++ = A[i];
 
 define void @strong5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [19]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -175,27 +234,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - consistent flow [19]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 20
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 20; i++)
-;;    A[2*i + 6] = ...
-;;    ... = A[2*i];
+;;  for (long unsigned i = 0; i < 20; i++) {
+;;    A[2*i + 6] = i;
+;;    *B++ = A[2*i];
 
 define void @strong6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [3]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -206,27 +271,33 @@ for.body:                                         ; preds = %for.body, %entry
   %mul1 = shl i64 %i.02, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - consistent flow [3]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 20
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 20; i++)
-;;    A[2*i + 7] = ...
-;;    ... = A[2*i];
+;;  for (long unsigned i = 0; i < 20; i++) {
+;;    A[2*i + 7] = i;
+;;    *B++ = A[2*i];
 
 define void @strong7(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -237,27 +308,33 @@ for.body:                                         ; preds = %for.body, %entry
   %mul1 = shl i64 %i.02, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 20
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 20; i++)
-;;    A[i + n] = ...
-;;    ... = A[i];
+;;  for (long unsigned i = 0; i < 20; i++) {
+;;    A[i + n] = i;
+;;    *B++ = A[i];
 
 define void @strong8(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [%n|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -266,30 +343,39 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - consistent flow [%n|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 20
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 20
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[i + n] = ...
-;;    ... = A[i + 2*n];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[i + n] = i;
+;;    *B++ = A[i + 2*n];
 
 define void @strong9(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %add = add i64 %i.03, %n
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
@@ -298,27 +384,36 @@ for.body:                                         ; preds = %for.body, %entry
   %add1 = add i64 %i.03, %mul
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 1000; i++)
-;;    A[n*i + 5] = ...
-;;    ... = A[n*i + 5];
+;;  for (long unsigned i = 0; i < 1000; i++) {
+;;    A[n*i + 5] = i;
+;;    *B++ = A[n*i + 5];
 
 define void @strong10(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -330,12 +425,11 @@ for.body:                                         ; preds = %for.body, %entry
   %add2 = add i64 %mul1, 5
   %arrayidx3 = getelementptr inbounds i32* %A, i64 %add2
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - consistent flow [0|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 1000
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 1000
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
diff --git a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index 2a1b4e7e971d..81e61892d8e8 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -6,65 +6,99 @@ target triple = "x86_64-apple-macosx10.6.0"
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    A[2*i + n1] = ...
+;;    A[2*i + n1] = i;
 ;;  for (long int j = 0; j < n2; j++)
-;;    ... = A[3*j + 3*n1];
+;;    *B++ = A[3*j + 3*n1];
 
 define void @symbolicrdiv0(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-for.cond1.preheader:                              ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond1.preheader.loopexit:                     ; preds = %for.body
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.loopexit, %entry
   %cmp21 = icmp eq i64 %n2, 0
-  br i1 %cmp21, label %for.end11, label %for.body4
+  br i1 %cmp21, label %for.end11, label %for.body4.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body4.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body4
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %conv = trunc i64 %i.05 to i32
   %mul = shl nsw i64 %i.05, 1
   %add = add i64 %mul, %n1
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc, %n1
-  br i1 %cmp, label %for.body, label %for.cond1.preheader
+  %exitcond = icmp ne i64 %inc, %n1
+  br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
 
-for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
-  %j.03 = phi i64 [ %inc10, %for.body4 ], [ 0, %for.cond1.preheader ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.03 = phi i64 [ %inc10, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %mul56 = add i64 %j.03, %n1
   %add7 = mul i64 %mul56, 3
   %arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
   %0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc10 = add nsw i64 %j.03, 1
-  %cmp2 = icmp ult i64 %inc10, %n2
-  br i1 %cmp2, label %for.body4, label %for.end11
+  %exitcond7 = icmp ne i64 %inc10, %n2
+  br i1 %exitcond7, label %for.body4, label %for.end11.loopexit
 
-for.end11:                                        ; preds = %for.body4, %for.cond1.preheader
+for.end11.loopexit:                               ; preds = %for.body4
+  br label %for.end11
+
+for.end11:                                        ; preds = %for.end11.loopexit, %for.cond1.preheader
   ret void
 }
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    A[2*i + 5*n2] = ...
+;;    A[2*i + 5*n2] = i;
 ;;  for (long int j = 0; j < n2; j++)
-;;    ... = A[3*j + 2*n2];
+;;    *B++ = A[3*j + 2*n2];
 
 define void @symbolicrdiv1(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.cond2.preheader, label %for.body
+  br i1 %cmp4, label %for.cond2.preheader, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond2.preheader.loopexit:                     ; preds = %for.body
+  br label %for.cond2.preheader
 
-for.cond2.preheader:                              ; preds = %for.body, %entry
+for.cond2.preheader:                              ; preds = %for.cond2.preheader.loopexit, %entry
   %cmp31 = icmp eq i64 %n2, 0
-  br i1 %cmp31, label %for.end12, label %for.body5
+  br i1 %cmp31, label %for.end12, label %for.body5.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body5.preheader:                              ; preds = %for.cond2.preheader
+  br label %for.body5
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %conv = trunc i64 %i.05 to i32
   %mul = shl nsw i64 %i.05, 1
   %mul1 = mul i64 %n2, 5
@@ -72,220 +106,307 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc, %n1
-  br i1 %cmp, label %for.body, label %for.cond2.preheader
+  %exitcond = icmp ne i64 %inc, %n1
+  br i1 %exitcond, label %for.body, label %for.cond2.preheader.loopexit
 
-for.body5:                                        ; preds = %for.body5, %for.cond2.preheader
-  %j.03 = phi i64 [ %inc11, %for.body5 ], [ 0, %for.cond2.preheader ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body5 ], [ %B, %for.cond2.preheader ]
+for.body5:                                        ; preds = %for.body5.preheader, %for.body5
+  %j.03 = phi i64 [ %inc11, %for.body5 ], [ 0, %for.body5.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body5 ], [ %B, %for.body5.preheader ]
   %mul6 = mul nsw i64 %j.03, 3
   %mul7 = shl i64 %n2, 1
   %add8 = add i64 %mul6, %mul7
   %arrayidx9 = getelementptr inbounds i32* %A, i64 %add8
   %0 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc11 = add nsw i64 %j.03, 1
-  %cmp3 = icmp ult i64 %inc11, %n2
-  br i1 %cmp3, label %for.body5, label %for.end12
+  %exitcond6 = icmp ne i64 %inc11, %n2
+  br i1 %exitcond6, label %for.body5, label %for.end12.loopexit
+
+for.end12.loopexit:                               ; preds = %for.body5
+  br label %for.end12
 
-for.end12:                                        ; preds = %for.body5, %for.cond2.preheader
+for.end12:                                        ; preds = %for.end12.loopexit, %for.cond2.preheader
   ret void
 }
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    A[2*i - n2] = ...
+;;    A[2*i - n2] = i;
 ;;  for (long int j = 0; j < n2; j++)
-;;    ... = A[-j + 2*n1];
+;;    *B++ = A[-j + 2*n1];
 
 define void @symbolicrdiv2(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.cond1.preheader:                              ; preds = %for.body, %entry
+for.cond1.preheader.loopexit:                     ; preds = %for.body
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.loopexit, %entry
   %cmp21 = icmp eq i64 %n2, 0
-  br i1 %cmp21, label %for.end10, label %for.body4
+  br i1 %cmp21, label %for.end10, label %for.body4.preheader
+
+for.body4.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body4
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %conv = trunc i64 %i.05 to i32
   %mul = shl nsw i64 %i.05, 1
   %sub = sub i64 %mul, %n2
   %arrayidx = getelementptr inbounds i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc, %n1
-  br i1 %cmp, label %for.body, label %for.cond1.preheader
+  %exitcond = icmp ne i64 %inc, %n1
+  br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
 
-for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
-  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %mul6 = shl i64 %n1, 1
   %add = sub i64 %mul6, %j.03
   %arrayidx7 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc9 = add nsw i64 %j.03, 1
-  %cmp2 = icmp ult i64 %inc9, %n2
-  br i1 %cmp2, label %for.body4, label %for.end10
+  %exitcond6 = icmp ne i64 %inc9, %n2
+  br i1 %exitcond6, label %for.body4, label %for.end10.loopexit
+
+for.end10.loopexit:                               ; preds = %for.body4
+  br label %for.end10
 
-for.end10:                                        ; preds = %for.body4, %for.cond1.preheader
+for.end10:                                        ; preds = %for.end10.loopexit, %for.cond1.preheader
   ret void
 }
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    A[-i + n2] = ...
+;;    A[-i + n2] = i;
 ;;  for (long int j = 0; j < n2; j++)
-;;    ... = A[j - n1];
+;;    *B++ = A[j - n1];
 
 define void @symbolicrdiv3(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-for.cond1.preheader:                              ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond1.preheader.loopexit:                     ; preds = %for.body
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.loopexit, %entry
   %cmp21 = icmp eq i64 %n2, 0
-  br i1 %cmp21, label %for.end9, label %for.body4
+  br i1 %cmp21, label %for.end9, label %for.body4.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body4.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body4
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %conv = trunc i64 %i.05 to i32
   %add = sub i64 %n2, %i.05
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc, %n1
-  br i1 %cmp, label %for.body, label %for.cond1.preheader
+  %exitcond = icmp ne i64 %inc, %n1
+  br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
 
-for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
-  %j.03 = phi i64 [ %inc8, %for.body4 ], [ 0, %for.cond1.preheader ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.03 = phi i64 [ %inc8, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub5 = sub i64 %j.03, %n1
   %arrayidx6 = getelementptr inbounds i32* %A, i64 %sub5
   %0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc8 = add nsw i64 %j.03, 1
-  %cmp2 = icmp ult i64 %inc8, %n2
-  br i1 %cmp2, label %for.body4, label %for.end9
+  %exitcond6 = icmp ne i64 %inc8, %n2
+  br i1 %exitcond6, label %for.body4, label %for.end9.loopexit
 
-for.end9:                                         ; preds = %for.body4, %for.cond1.preheader
+for.end9.loopexit:                                ; preds = %for.body4
+  br label %for.end9
+
+for.end9:                                         ; preds = %for.end9.loopexit, %for.cond1.preheader
   ret void
 }
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    A[-i + 2*n1] = ...
+;;    A[-i + 2*n1] = i;
 ;;  for (long int j = 0; j < n2; j++)
-;;    ... = A[-j + n1];
+;;    *B++ = A[-j + n1];
 
 define void @symbolicrdiv4(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-for.cond1.preheader:                              ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond1.preheader.loopexit:                     ; preds = %for.body
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.loopexit, %entry
   %cmp21 = icmp eq i64 %n2, 0
-  br i1 %cmp21, label %for.end10, label %for.body4
+  br i1 %cmp21, label %for.end10, label %for.body4.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body4.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body4
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %conv = trunc i64 %i.05 to i32
   %mul = shl i64 %n1, 1
   %add = sub i64 %mul, %i.05
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc, %n1
-  br i1 %cmp, label %for.body, label %for.cond1.preheader
+  %exitcond = icmp ne i64 %inc, %n1
+  br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
 
-for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
-  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %add6 = sub i64 %n1, %j.03
   %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc9 = add nsw i64 %j.03, 1
-  %cmp2 = icmp ult i64 %inc9, %n2
-  br i1 %cmp2, label %for.body4, label %for.end10
+  %exitcond6 = icmp ne i64 %inc9, %n2
+  br i1 %exitcond6, label %for.body4, label %for.end10.loopexit
 
-for.end10:                                        ; preds = %for.body4, %for.cond1.preheader
+for.end10.loopexit:                               ; preds = %for.body4
+  br label %for.end10
+
+for.end10:                                        ; preds = %for.end10.loopexit, %for.cond1.preheader
   ret void
 }
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    A[-i + n2] = ...
+;;    A[-i + n2] = i;
 ;;  for (long int j = 0; j < n2; j++)
-;;    ... = A[-j + 2*n2];
+;;    *B++ = A[-j + 2*n2];
 
 define void @symbolicrdiv5(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.cond1.preheader, label %for.body
+  br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-for.cond1.preheader:                              ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond1.preheader.loopexit:                     ; preds = %for.body
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.loopexit, %entry
   %cmp21 = icmp eq i64 %n2, 0
-  br i1 %cmp21, label %for.end10, label %for.body4
+  br i1 %cmp21, label %for.end10, label %for.body4.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body4.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body4
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %conv = trunc i64 %i.05 to i32
   %add = sub i64 %n2, %i.05
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %inc = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc, %n1
-  br i1 %cmp, label %for.body, label %for.cond1.preheader
+  %exitcond = icmp ne i64 %inc, %n1
+  br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
 
-for.body4:                                        ; preds = %for.body4, %for.cond1.preheader
-  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4:                                        ; preds = %for.body4.preheader, %for.body4
+  %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %mul = shl i64 %n2, 1
   %add6 = sub i64 %mul, %j.03
   %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
   %0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc9 = add nsw i64 %j.03, 1
-  %cmp2 = icmp ult i64 %inc9, %n2
-  br i1 %cmp2, label %for.body4, label %for.end10
+  %exitcond6 = icmp ne i64 %inc9, %n2
+  br i1 %exitcond6, label %for.body4, label %for.end10.loopexit
 
-for.end10:                                        ; preds = %for.body4, %for.cond1.preheader
+for.end10.loopexit:                               ; preds = %for.body4
+  br label %for.end10
+
+for.end10:                                        ; preds = %for.end10.loopexit, %for.cond1.preheader
   ret void
 }
 
 
 ;;  for (long int i = 0; i < n1; i++)
-;;    for (long int j = 0; j < n2; j++)
-;;      A[j -i + n2] = ...
-;;      ... = A[2*n2];
+;;    for (long int j = 0; j < n2; j++) {
+;;      A[j -i + n2] = i;
+;;      *B++ = A[2*n2];
 
 define void @symbolicrdiv6(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 entry:
   %cmp4 = icmp eq i64 %n1, 0
-  br i1 %cmp4, label %for.end7, label %for.cond1.preheader
+  br i1 %cmp4, label %for.end7, label %for.cond1.preheader.preheader
+
+; CHECK: da analyze - output [* *]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *]!
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
 
-for.cond1.preheader:                              ; preds = %for.inc5, %entry
-  %B.addr.06 = phi i32* [ %B.addr.1.lcssa, %for.inc5 ], [ %B, %entry ]
-  %i.05 = phi i64 [ %inc6, %for.inc5 ], [ 0, %entry ]
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc5
+  %B.addr.06 = phi i32* [ %B.addr.1.lcssa, %for.inc5 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.05 = phi i64 [ %inc6, %for.inc5 ], [ 0, %for.cond1.preheader.preheader ]
   %cmp21 = icmp eq i64 %n2, 0
-  br i1 %cmp21, label %for.inc5, label %for.body3
+  br i1 %cmp21, label %for.inc5, label %for.body3.preheader
 
-for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
-  %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.cond1.preheader ]
-  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.cond1.preheader ]
+for.body3.preheader:                              ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.preheader, %for.body3
+  %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.body3.preheader ]
+  %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.body3.preheader ]
   %conv = trunc i64 %i.05 to i32
   %sub = sub nsw i64 %j.03, %i.05
   %add = add i64 %sub, %n2
@@ -294,19 +415,25 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %mul = shl i64 %n2, 1
   %arrayidx4 = getelementptr inbounds i32* %A, i64 %mul
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
   store i32 %0, i32* %B.addr.12, align 4
   %inc = add nsw i64 %j.03, 1
-  %cmp2 = icmp ult i64 %inc, %n2
-  br i1 %cmp2, label %for.body3, label %for.inc5
+  %exitcond = icmp ne i64 %inc, %n2
+  br i1 %exitcond, label %for.body3, label %for.inc5.loopexit
 
-for.inc5:                                         ; preds = %for.body3, %for.cond1.preheader
-  %B.addr.1.lcssa = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+for.inc5.loopexit:                                ; preds = %for.body3
+  %scevgep = getelementptr i32* %B.addr.06, i64 %n2
+  br label %for.inc5
+
+for.inc5:                                         ; preds = %for.inc5.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %scevgep, %for.inc5.loopexit ]
   %inc6 = add nsw i64 %i.05, 1
-  %cmp = icmp ult i64 %inc6, %n1
-  br i1 %cmp, label %for.cond1.preheader, label %for.end7
+  %exitcond7 = icmp ne i64 %inc6, %n1
+  br i1 %exitcond7, label %for.cond1.preheader, label %for.end7.loopexit
+
+for.end7.loopexit:                                ; preds = %for.inc5
+  br label %for.end7
 
-for.end7:                                         ; preds = %for.inc5, %entry
+for.end7:                                         ; preds = %for.end7.loopexit, %entry
   ret void
 }
diff --git a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index ee2343fa51e9..297096ce135d 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -5,18 +5,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[2*i + n] = ...
-;;    ... = A[3*i + 3*n];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[2*i + n] = i;
+;;    *B++ = A[3*i + 3*n];
 
 define void @symbolicsiv0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = shl nsw i64 %i.03, 1
   %add = add i64 %mul, %n
@@ -26,30 +36,42 @@ for.body:                                         ; preds = %for.body, %entry
   %add3 = mul i64 %mul14, 3
   %arrayidx4 = getelementptr inbounds i32* %A, i64 %add3
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[2*i + 5*n] = ...
-;;    ... = A[3*i + 2*n];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[2*i + 5*n] = i;
+;;    *B++ = A[3*i + 2*n];
 
 define void @symbolicsiv1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = shl nsw i64 %i.03, 1
   %mul1 = mul i64 %n, 5
@@ -61,30 +83,42 @@ for.body:                                         ; preds = %for.body, %entry
   %add4 = add i64 %mul2, %mul3
   %arrayidx5 = getelementptr inbounds i32* %A, i64 %add4
   %0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[2*i - n] = ...
-;;    ... = A[-i + 2*n];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[2*i - n] = i;
+;;    *B++ = A[-i + 2*n];
 
 define void @symbolicsiv2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = shl nsw i64 %i.03, 1
   %sub = sub i64 %mul, %n
@@ -94,30 +128,42 @@ for.body:                                         ; preds = %for.body, %entry
   %add = sub i64 %mul2, %i.03
   %arrayidx3 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[-2*i + n + 1] = ...
-;;    ... = A[i - 2*n];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[-2*i + n + 1] = i;
+;;    *B++ = A[i - 2*n];
 
 define void @symbolicsiv3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -2
   %add = add i64 %mul, %n
@@ -128,30 +174,42 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 %i.03, %mul2
   %arrayidx3 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[-2*i + 3*n] = ...
-;;    ... = A[-i + n];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[-2*i + 3*n] = i;
+;;    *B++ = A[-i + n];
 
 define void @symbolicsiv4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -2
   %mul1 = mul i64 %n, 3
@@ -161,30 +219,42 @@ for.body:                                         ; preds = %for.body, %entry
   %add2 = sub i64 %n, %i.03
   %arrayidx3 = getelementptr inbounds i32* %A, i64 %add2
   %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long int i = 0; i < n; i++)
-;;    A[-2*i - 2*n] = ...
-;;    ... = A[-i - n];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[-2*i - 2*n] = i;
+;;    *B++ = A[-i - n];
 
 define void @symbolicsiv5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul nsw i64 %i.03, -2
   %mul1 = shl i64 %n, 1
@@ -195,32 +265,44 @@ for.body:                                         ; preds = %for.body, %entry
   %sub3 = sub i64 %sub2, %n
   %arrayidx4 = getelementptr inbounds i32* %A, i64 %sub3
   %0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
 ;; why doesn't SCEV package understand that n >= 0?
-;;void weaktest(int *A, int *B, long unsigned n)
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[i + n + 1] = ...
-;;    ... = A[-i];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[i + n + 1] = i;
+;;    *B++ = A[-i];
 
 define void @weaktest(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [*|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %add = add i64 %i.03, %n
   %add1 = add i64 %add, 1
@@ -229,29 +311,36 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 0, %i.03
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [*|<] splitable!
-; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  void symbolicsiv6(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) {
-;;    for (long int i = 0; i < n; i++) {
-;;      A[4*N*i + M] = i;
-;;      *B++ = A[4*N*i + 3*M + 1];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[4*N*i + M] = i;
+;;    *B++ = A[4*N*i + 3*M + 1];
 
 define void @symbolicsiv6(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.body.preheader:                               ; preds = %entry
   br label %for.body
 
@@ -272,7 +361,6 @@ for.body:                                         ; preds = %for.body.preheader,
   %arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
   %0 = load i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-; CHECK: da analyze - none!
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
   %exitcond = icmp ne i64 %inc, %n
@@ -286,16 +374,22 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 
-;;  void symbolicsiv7(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) {
-;;    for (long int i = 0; i < n; i++) {
-;;      A[2*N*i + M] = i;
-;;      *B++ = A[2*N*i - 3*M + 2];
+;;  for (long int i = 0; i < n; i++) {
+;;    A[2*N*i + M] = i;
+;;    *B++ = A[2*N*i - 3*M + 2];
 
 define void @symbolicsiv7(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
 for.body.preheader:                               ; preds = %entry
   br label %for.body
 
@@ -316,7 +410,6 @@ for.body:                                         ; preds = %for.body.preheader,
   %arrayidx6 = getelementptr inbounds i32* %A, i64 %add5
   %1 = load i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-; CHECK: da analyze - flow [<>]!
   store i32 %1, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
   %exitcond = icmp ne i64 %inc, %n
diff --git a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
index 343e8f49bf9e..8b2e43f3d868 100644
--- a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -5,18 +5,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[1 + n*i] = ...
-;;    ... = A[1 - n*i];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[1 + n*i] = i;
+;;    *B++ = A[1 - n*i];
 
 define void @weakcrossing0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul i64 %i.03, %n
   %add = add i64 %mul, 1
@@ -26,30 +36,43 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 1, %mul1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [0|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[n + i] = ...
-;;    ... = A[1 + n - i];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[n + i] = i;
+;;    *B++ = A[1 + n - i];
 
 define void @weakcrossing1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 0!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %add = add i64 %i.03, %n
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
@@ -58,28 +81,36 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 %add1, %i.03
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [<>] splitable!
-; CHECK: da analyze - split level = 1, iteration = 0!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 3; i++)
-;;    A[i] = ...
-;;    ... = A[6 - i];
+;;  for (long unsigned i = 0; i < 3; i++) {
+;;    A[i] = i;
+;;    *B++ = A[6 - i];
 
 define void @weakcrossing2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -88,27 +119,33 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 6, %i.02
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 3
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 3
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 4; i++)
-;;    A[i] = ...
-;;    ... = A[6 - i];
+;;  for (long unsigned i = 0; i < 4; i++) {
+;;    A[i] = i;
+;;    *B++ = A[6 - i];
 
 define void @weakcrossing3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -117,27 +154,33 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 6, %i.02
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [0|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 4
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 4
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 10; i++)
-;;    A[i] = ...
-;;    ... = A[-6 - i];
+;;  for (long unsigned i = 0; i < 10; i++) {
+;;    A[i] = i;
+;;    *B++ = A[-6 - i];
 
 define void @weakcrossing4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -146,30 +189,39 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 -6, %i.02
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 10
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 10
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[3*i] = ...
-;;    ... = A[5 - 3*i];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[3*i] = i;
+;;    *B++ = A[5 - 3*i];
 
 define void @weakcrossing5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul i64 %i.03, 3
   %arrayidx = getelementptr inbounds i32* %A, i64 %mul
@@ -178,27 +230,37 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = add i64 %0, 5
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
   %1 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %1, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
 
-for.end:                                          ; preds = %for.body, %entry
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 4; i++)
-;;    A[i] = ...
-;;    ... = A[5 - i];
+;;  for (long unsigned i = 0; i < 4; i++) {
+;;    A[i] = i;
+;;    *B++ = A[5 - i];
 
 define void @weakcrossing6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [<>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 2!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -207,13 +269,11 @@ for.body:                                         ; preds = %for.body, %entry
   %sub = sub i64 5, %i.02
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [<>] splitable!
-; CHECK: da analyze - split level = 1, iteration = 2!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 4
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 4
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
index a59871602b6c..bc85e6c8b690 100644
--- a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
@@ -5,15 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;;  for (long unsigned i = 0; i < 30; i++)
-;;    A[2*i + 10] = ...
-;;    ... = A[10];
+;;  for (long unsigned i = 0; i < 30; i++) {
+;;    A[2*i + 10] = i;
+;;    *B++ = A[10];
 
 define void @weakzerodst0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -23,30 +30,39 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 30
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 30
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[n*i + 10] = ...
-;;    ... = A[10];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[n*i + 10] = i;
+;;    *B++ = A[10];
 
 define void @weakzerodst1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul i64 %i.03, %n
   %add = add i64 %mul, 10
@@ -54,27 +70,36 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 5; i++)
-;;    A[2*i] = ...
-;;    ... = A[10];
+;;  for (long unsigned i = 0; i < 5; i++) {
+;;    A[2*i] = i;
+;;    *B++ = A[10];
 
 define void @weakzerodst2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -83,27 +108,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 5
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 5
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 6; i++)
-;;    A[2*i] = ...
-;;    ... = A[10];
+;;  for (long unsigned i = 0; i < 6; i++) {
+;;    A[2*i] = i;
+;;    *B++ = A[10];
 
 define void @weakzerodst3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [=>p|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -112,27 +143,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>p|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 6
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 6
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 7; i++)
-;;    A[2*i] = ...
-;;    ... = A[10];
+;;  for (long unsigned i = 0; i < 7; i++) {
+;;    A[2*i] = i;
+;;    *B++ = A[10];
 
 define void @weakzerodst4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -141,27 +178,33 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 7
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 7
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 7; i++)
-;;    A[2*i] = ...
-;;    ... = A[-10];
+;;  for (long unsigned i = 0; i < 7; i++) {
+;;    A[2*i] = i;
+;;    *B++ = A[-10];
 
 define void @weakzerodst5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -170,43 +213,54 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 -10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 7
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 7
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[3*i] = ...
-;;    ... = A[10];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[3*i] = i;
+;;    *B++ = A[10];
 
 define void @weakzerodst6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %mul = mul i64 %i.03, 3
   %arrayidx = getelementptr inbounds i32* %A, i64 %mul
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i64 10
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
index fd4f46269546..2b3b2d00ecac 100644
--- a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
@@ -5,15 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;;  for (long unsigned i = 0; i < 30; i++)
-;;    A[10] = ...
-;;    ... = A[2*i + 10];
+;;  for (long unsigned i = 0; i < 30; i++) {
+;;    A[10] = i;
+;;    *B++ = A[2*i + 10];
 
 define void @weakzerosrc0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -23,30 +30,39 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %mul, 10
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 30
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 30
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[10] = ...
-;;    ... = A[n*i + 10];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[10] = i;
+;;    *B++ = A[n*i + 10];
 
 define void @weakzerosrc1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %arrayidx = getelementptr inbounds i32* %A, i64 10
   store i32 %conv, i32* %arrayidx, align 4
@@ -54,27 +70,36 @@ for.body:                                         ; preds = %for.body, %entry
   %add = add i64 %mul, 10
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 5; i++)
-;;    A[10] = ...
-;;    ... = A[2*i];
+;;  for (long unsigned i = 0; i < 5; i++) {
+;;    A[10] = i;
+;;    *B++ = A[2*i];
 
 define void @weakzerosrc2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -83,27 +108,33 @@ for.body:                                         ; preds = %for.body, %entry
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 5
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 5
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 6; i++)
-;;    A[10] = ...
-;;    ... = A[2*i];
+;;  for (long unsigned i = 0; i < 6; i++) {
+;;    A[10] = i;
+;;    *B++ = A[2*i];
 
 define void @weakzerosrc3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - flow [=>p|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -112,27 +143,33 @@ for.body:                                         ; preds = %for.body, %entry
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>p|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 6
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 6
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 7; i++)
-;;    A[10] = ...
-;;    ... = A[2*i];
+;;  for (long unsigned i = 0; i < 7; i++) {
+;;    A[10] = i;
+;;    *B++ = A[2*i];
 
 define void @weakzerosrc4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -141,27 +178,33 @@ for.body:                                         ; preds = %for.body, %entry
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [*|<]!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 7
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 7
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < 7; i++)
-;;    A[-10] = ...
-;;    ... = A[2*i];
+;;  for (long unsigned i = 0; i < 7; i++) {
+;;    A[-10] = i;
+;;    *B++ = A[2*i];
 
 define void @weakzerosrc5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
   %conv = trunc i64 %i.02 to i32
@@ -170,43 +213,54 @@ for.body:                                         ; preds = %for.body, %entry
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
-  %cmp = icmp ult i64 %inc, 7
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, 7
+  br i1 %exitcond, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
 
-;;  for (long unsigned i = 0; i < n; i++)
-;;    A[10] = ...
-;;    ... = A[3*i];
+;;  for (long unsigned i = 0; i < n; i++) {
+;;    A[10] = i;
+;;    *B++ = A[3*i];
 
 define void @weakzerosrc6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %cmp1 = icmp eq i64 %n, 0
-  br i1 %cmp1, label %for.end, label %for.body
+  br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [S]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
 
-for.body:                                         ; preds = %for.body, %entry
-  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
   %conv = trunc i64 %i.03 to i32
   %arrayidx = getelementptr inbounds i32* %A, i64 10
   store i32 %conv, i32* %arrayidx, align 4
   %mul = mul i64 %i.03, 3
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   %incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
-  %cmp = icmp ult i64 %inc, %n
-  br i1 %cmp, label %for.body, label %for.end
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
 
-for.end:                                          ; preds = %for.body, %entry
+for.end:                                          ; preds = %for.end.loopexit, %entry
   ret void
 }
diff --git a/test/Analysis/DependenceAnalysis/ZIV.ll b/test/Analysis/DependenceAnalysis/ZIV.ll
index 42b2389df268..5463c63ba3fb 100644
--- a/test/Analysis/DependenceAnalysis/ZIV.ll
+++ b/test/Analysis/DependenceAnalysis/ZIV.ll
@@ -5,49 +5,70 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.0"
 
 
-;;  A[n + 1] = ...
-;;  ... = A[1 + n];
+;;  A[n + 1] = 0;
+;;  *B = A[1 + n];
 
 define void @z0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %add = add i64 %n, 1
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %add1 = add i64 %n, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
   %0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - consistent flow!
   store i32 %0, i32* %B, align 4
   ret void
 }
 
 
-;;  A[n] = ...
-;;  ... = A[n + 1];
+;;  A[n] = 0;
+;;  *B = A[n + 1];
 
 define void @z1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %n
   store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %add = add i64 %n, 1
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
   store i32 %0, i32* %B, align 4
   ret void
 }
 
 
-;;  A[n] = ...
-;;  ... = A[m];
+;;  A[n] = 0;
+;;  *B = A[m];
 
 define void @z2(i32* %A, i32* %B, i64 %n, i64 %m) nounwind uwtable ssp {
 entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %n
   store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %m
   %0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow!
   store i32 %0, i32* %B, align 4
   ret void
 }
diff --git a/test/Analysis/Dominators/invoke.ll b/test/Analysis/Dominators/invoke.ll
index f935750c987e..da0b2461656c 100644
--- a/test/Analysis/Dominators/invoke.ll
+++ b/test/Analysis/Dominators/invoke.ll
@@ -1,4 +1,4 @@
-; RUN: opt -verify -disable-output %s
+; RUN: opt -verify -disable-output < %s
 ; This tests that we handle unreachable blocks correctly
 
 define void @f() {
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
index 19eebc0ac7ac..444b7dc27410 100644
--- a/test/Analysis/Profiling/lit.local.cfg
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -1 +1,16 @@
 config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+# Most profiling tests rely on a JIT being present to gather their data; AArch64
+# doesn't have any JIT at present so they will fail when run there.
+if root.host_arch in ['AArch64']:
+    config.unsupported = True
+
+if 'hexagon' in root.target_triple:
+    config.unsupported = True
diff --git a/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
index 218b4375f70c..0dfa0bf9cd8d 100644
--- a/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
+++ b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
@@ -1,4 +1,4 @@
-; RUN: opt -regions %s
+; RUN: opt -regions < %s
 define i32 @main() nounwind {
 entry:
   br label %for.cond
diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll
index ac77ab36e6f5..d7ef79cf6b55 100644
--- a/test/Analysis/RegionInfo/block_sort.ll
+++ b/test/Analysis/RegionInfo/block_sort.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats -analyze < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll
index 1145ffdba039..0da4e5dca3d3 100644
--- a/test/Analysis/RegionInfo/cond_loop.ll
+++ b/test/Analysis/RegionInfo/cond_loop.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll
index 6b398800db9c..53f13c10eaf3 100644
--- a/test/Analysis/RegionInfo/condition_complicated.ll
+++ b/test/Analysis/RegionInfo/condition_complicated.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll
index f551108d6083..fd04afc20dca 100644
--- a/test/Analysis/RegionInfo/condition_complicated_2.ll
+++ b/test/Analysis/RegionInfo/condition_complicated_2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll
index 5e4d9d2f8b23..88c45c2e6efd 100644
--- a/test/Analysis/RegionInfo/condition_forward_edge.ll
+++ b/test/Analysis/RegionInfo/condition_forward_edge.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll
index e48413a4c2dd..bfb0df84b44d 100644
--- a/test/Analysis/RegionInfo/condition_same_exit.ll
+++ b/test/Analysis/RegionInfo/condition_same_exit.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll
index 00d9ed24e17d..3f93a6ecd358 100644
--- a/test/Analysis/RegionInfo/condition_simple.ll
+++ b/test/Analysis/RegionInfo/condition_simple.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll
index b84abecc1649..ac409ec1bb25 100644
--- a/test/Analysis/RegionInfo/exit_in_condition.ll
+++ b/test/Analysis/RegionInfo/exit_in_condition.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll
index 8e588286a58a..61abef8ff7a9 100644
--- a/test/Analysis/RegionInfo/infinite_loop.ll
+++ b/test/Analysis/RegionInfo/infinite_loop.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s 
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll
index a8227e340c5e..56e83cfdebb9 100644
--- a/test/Analysis/RegionInfo/infinite_loop_2.ll
+++ b/test/Analysis/RegionInfo/infinite_loop_2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s 
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll
index b09c9c1e5919..4538f0f78587 100644
--- a/test/Analysis/RegionInfo/infinite_loop_3.ll
+++ b/test/Analysis/RegionInfo/infinite_loop_3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s 
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll
index 681c305ce971..4ac9068f0dd8 100644
--- a/test/Analysis/RegionInfo/infinite_loop_4.ll
+++ b/test/Analysis/RegionInfo/infinite_loop_4.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s 
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll
index 08d2ba8e35a9..4c1c8654ca4f 100644
--- a/test/Analysis/RegionInfo/loop_with_condition.ll
+++ b/test/Analysis/RegionInfo/loop_with_condition.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll
index 6449949df843..9efe619ad9d7 100644
--- a/test/Analysis/RegionInfo/loops_1.ll
+++ b/test/Analysis/RegionInfo/loops_1.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll
index dc4a1adffbac..ca7eca75affd 100644
--- a/test/Analysis/RegionInfo/loops_2.ll
+++ b/test/Analysis/RegionInfo/loops_2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll
index 1474e033e574..55001c7f435c 100644
--- a/test/Analysis/RegionInfo/mix_1.ll
+++ b/test/Analysis/RegionInfo/mix_1.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll
index a3707a19872f..3e73b3a328f6 100644
--- a/test/Analysis/RegionInfo/nested_loops.ll
+++ b/test/Analysis/RegionInfo/nested_loops.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 
diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll
index 890b4f23001e..b22bbcc2b6d5 100644
--- a/test/Analysis/RegionInfo/next.ll
+++ b/test/Analysis/RegionInfo/next.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll
index 96c87e0559b4..0398d2baa225 100644
--- a/test/Analysis/RegionInfo/paper.ll
+++ b/test/Analysis/RegionInfo/paper.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll
index e75661e8905b..25713420a3b7 100644
--- a/test/Analysis/RegionInfo/two_loops_same_header.ll
+++ b/test/Analysis/RegionInfo/two_loops_same_header.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -regions -analyze < %s | FileCheck %s
 ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
 ; RUN: opt -regions -print-region-style=bb  -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s
diff --git a/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll b/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
index aba0ce74678f..5a0239810418 100644
--- a/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
+++ b/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
@@ -1,8 +1,10 @@
-; RUN: opt -indvars -scalar-evolution -analyze %s
+; RUN: opt -indvars -scalar-evolution -analyze < %s | FileCheck %s
 ; This test checks if the SCEV analysis is printed out at all.
 ; It failed once as the RequiredTransitive option was not implemented
 ; correctly.
 
+; CHECK: Classifying expressions for: @main
+
 define i32 @main() nounwind {
 entry:
   br label %for.cond
diff --git a/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll
index 9f17e27577c2..49e944dcd266 100644
--- a/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll
+++ b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll
@@ -1,4 +1,4 @@
-; RUN: opt -indvars  %s
+; RUN: opt -indvars < %s
 ; PR9424: Attempt to use a SCEVCouldNotCompute object!
 ; The inner loop computes the Step and Start of the outer loop.
 ; Call that Vexit. The outer End value is max(2,Vexit), because
diff --git a/test/Analysis/ScalarEvolution/fold.ll b/test/Analysis/ScalarEvolution/fold.ll
index 4e2adf187e8b..57006dd9bb42 100644
--- a/test/Analysis/ScalarEvolution/fold.ll
+++ b/test/Analysis/ScalarEvolution/fold.ll
@@ -1,4 +1,4 @@
-; RUN: opt -analyze -scalar-evolution %s -S | FileCheck %s
+; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
 
 define i16 @test1(i8 %x) {
   %A = zext i8 %x to i12
diff --git a/test/Analysis/ScalarEvolution/scev-invalid.ll b/test/Analysis/ScalarEvolution/scev-invalid.ll
new file mode 100644
index 000000000000..aac0d319ae84
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/scev-invalid.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -indvars -loop-unroll | FileCheck %s
+;
+; PR15570: SEGV: SCEV back-edge info invalid after dead code removal.
+;
+; Indvars creates a SCEV expression for the loop's back edge taken
+; count, then determines that the comparison is always true and
+; removes it.
+;
+; When loop-unroll asks for the expression, it contains a NULL
+; SCEVUnknkown (as a CallbackVH).
+;
+; forgetMemoizedResults should invalidate the backedge taken count expression.
+
+; CHECK: @test
+; CHECK-NOT: phi
+; CHECK-NOT: icmp
+; CHECK: ret void
+define void @test() {
+entry:
+  %xor1 = xor i32 0, 1
+  br label %b17
+
+b17:
+  br i1 undef, label %b22, label %b18
+
+b18:
+  %phi1 = phi i32 [ %add1, %b18 ], [ %xor1, %b17 ]
+  %add1 = add nsw i32 %phi1, -1
+  %cmp1 = icmp sgt i32 %add1, 0
+  br i1 %cmp1, label %b18, label %b22
+
+b22:
+  ret void
+}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 1ac59278e7ea..c6cc26a24106 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -9,13 +9,13 @@
 ; invalid, as it's possible that this only happens after optimization on a
 ; code path which isn't ever executed.
 
-; CHECK: define void @test0_yes(i32* nocapture %p) nounwind readnone {
+; CHECK: define void @test0_yes(i32* nocapture %p) #0 {
 define void @test0_yes(i32* %p) nounwind {
   store i32 0, i32* %p, !tbaa !1
   ret void
 }
 
-; CHECK: define void @test0_no(i32* nocapture %p) nounwind {
+; CHECK: define void @test0_no(i32* nocapture %p) #1 {
 define void @test0_no(i32* %p) nounwind {
   store i32 0, i32* %p, !tbaa !2
   ret void
@@ -24,13 +24,13 @@ define void @test0_no(i32* %p) nounwind {
 ; Add the readonly attribute, since there's just a call to a function which 
 ; TBAA says doesn't modify any memory.
 
-; CHECK: define void @test1_yes(i32* nocapture %p) nounwind readonly {
+; CHECK: define void @test1_yes(i32* nocapture %p) #2 {
 define void @test1_yes(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !1
   ret void
 }
 
-; CHECK: define void @test1_no(i32* %p) nounwind {
+; CHECK: define void @test1_no(i32* %p) #1 {
 define void @test1_no(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !2
   ret void
@@ -43,13 +43,13 @@ define void @test1_no(i32* %p) nounwind {
 ; This is unusual, since the function is memcpy, but as above, this
 ; isn't necessarily invalid.
 
-; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind readnone {
+; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) #0 {
 define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1
   ret void
 }
 
-; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind {
+; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) #1 {
 define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2
   ret void
@@ -57,13 +57,13 @@ define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
 
 ; Similar to the others, va_arg only accesses memory through its operand.
 
-; CHECK: define i32 @test3_yes(i8* nocapture %p) nounwind readnone {
+; CHECK: define i32 @test3_yes(i8* nocapture %p) #0 {
 define i32 @test3_yes(i8* %p) nounwind {
   %t = va_arg i8* %p, i32, !tbaa !1
   ret i32 %t
 }
 
-; CHECK: define i32 @test3_no(i8* nocapture %p) nounwind {
+; CHECK: define i32 @test3_no(i8* nocapture %p) #1 {
 define i32 @test3_no(i8* %p) nounwind {
   %t = va_arg i8* %p, i32, !tbaa !2
   ret i32 %t
@@ -72,6 +72,10 @@ define i32 @test3_no(i8* %p) nounwind {
 declare void @callee(i32* %p) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
 
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #2 = { nounwind readonly }
+
 ; Root note.
 !0 = metadata !{ }
 
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 8f080e2108bd..6f1c22da3ac5 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 
 ; CHECK:      define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
@@ -22,6 +22,9 @@ entry:
 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 
+; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{metadata !"tbaa root", null}
 !1 = metadata !{metadata !"A", metadata !0}
 !2 = metadata !{metadata !"B", metadata !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
new file mode 100644
index 000000000000..f1edb4482cf1
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; Generated with "clang -cc1 -disable-llvm-optzns -O1 -emit-llvm"
+; #include <new>
+; struct Foo { long i; };
+; struct Bar { void *p; };
+; long foo(int n) {
+;   Foo *f = new Foo;
+;   f->i = 1;
+;   for (int i=0; i<n; ++i) {
+;     Bar *b = new (f) Bar;
+;     b->p = 0;
+;     f = new (f) Foo;
+;     f->i = i;
+;   }
+;   return f->i;
+; }
+
+; Basic AA says MayAlias, TBAA says NoAlias
+; CHECK: MayAlias: i64* %i5, i8** %p
+; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !4 <->   store i8* null, i8** %p, align 8, !tbaa !3
+
+%struct.Foo = type { i64 }
+%struct.Bar = type { i8* }
+
+define i64 @_Z3fooi(i32 %n) #0 {
+entry:
+  %n.addr = alloca i32, align 4
+  %f = alloca %struct.Foo*, align 8
+  %i1 = alloca i32, align 4
+  %b = alloca %struct.Bar*, align 8
+  store i32 %n, i32* %n.addr, align 4, !tbaa !0
+  %call = call noalias i8* @_Znwm(i64 8)
+  %0 = bitcast i8* %call to %struct.Foo*
+  store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !3
+  %1 = load %struct.Foo** %f, align 8, !tbaa !3
+  %i = getelementptr inbounds %struct.Foo* %1, i32 0, i32 0
+  store i64 1, i64* %i, align 8, !tbaa !4
+  store i32 0, i32* %i1, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:
+  %2 = load i32* %i1, align 4, !tbaa !0
+  %3 = load i32* %n.addr, align 4, !tbaa !0
+  %cmp = icmp slt i32 %2, %3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %4 = load %struct.Foo** %f, align 8, !tbaa !3
+  %5 = bitcast %struct.Foo* %4 to i8*
+  %new.isnull = icmp eq i8* %5, null
+  br i1 %new.isnull, label %new.cont, label %new.notnull
+
+new.notnull:
+  %6 = bitcast i8* %5 to %struct.Bar*
+  br label %new.cont
+
+new.cont:
+  %7 = phi %struct.Bar* [ %6, %new.notnull ], [ null, %for.body ]
+  store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !3
+  %8 = load %struct.Bar** %b, align 8, !tbaa !3
+  %p = getelementptr inbounds %struct.Bar* %8, i32 0, i32 0
+  store i8* null, i8** %p, align 8, !tbaa !3
+  %9 = load %struct.Foo** %f, align 8, !tbaa !3
+  %10 = bitcast %struct.Foo* %9 to i8*
+  %new.isnull2 = icmp eq i8* %10, null
+  br i1 %new.isnull2, label %new.cont4, label %new.notnull3
+
+new.notnull3:
+  %11 = bitcast i8* %10 to %struct.Foo*
+  br label %new.cont4
+
+new.cont4:
+  %12 = phi %struct.Foo* [ %11, %new.notnull3 ], [ null, %new.cont ]
+  store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !3
+  %13 = load i32* %i1, align 4, !tbaa !0
+  %conv = sext i32 %13 to i64
+  %14 = load %struct.Foo** %f, align 8, !tbaa !3
+  %i5 = getelementptr inbounds %struct.Foo* %14, i32 0, i32 0
+  store i64 %conv, i64* %i5, align 8, !tbaa !4
+  br label %for.inc
+
+for.inc:
+  %15 = load i32* %i1, align 4, !tbaa !0
+  %inc = add nsw i32 %15, 1
+  store i32 %inc, i32* %i1, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:
+  %16 = load %struct.Foo** %f, align 8, !tbaa !3
+  %i6 = getelementptr inbounds %struct.Foo* %16, i32 0, i32 0
+  %17 = load i64* %i6, align 8, !tbaa !4
+  ret i64 %17
+}
+
+declare noalias i8* @_Znwm(i64)
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"long", metadata !1}
diff --git a/test/Assembler/2008-09-02-FunctionNotes.ll b/test/Assembler/2008-09-02-FunctionNotes.ll
index 761c91e864c4..11a0411ef79f 100644
--- a/test/Assembler/2008-09-02-FunctionNotes.ll
+++ b/test/Assembler/2008-09-02-FunctionNotes.ll
@@ -1,14 +1,21 @@
 ; Test function attributes
-; RUN: llvm-as < %s | llvm-dis | grep inline | count 2
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
+; CHECK: define void @fn1() #0
 define void @fn1() alwaysinline {
   ret void
 }
 
+; CHECK: define void @fn2() #1
 define void @fn2() noinline {
   ret void
 }
 
+; CHECK: define void @fn3()
+; CHECK-NOT: define void @fn3() #{{.*}}
 define void @fn3() {
   ret void
 }
+
+; CHECK: attributes #0 = { alwaysinline }
+; CHECK: attributes #1 = { noinline }
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index b2256b10a8da..df70149a33f6 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -22,4 +22,11 @@ define i32 @main() nounwind readonly {
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 459008, metadata !0, metadata !0, metadata !0, i32 38, metadata !0} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{metadata !1}
+!6 = metadata !{i32 786449, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !7, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786688, metadata !1, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, metadata !6, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
new file mode 100644
index 000000000000..83e8909b5ebd
--- /dev/null
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -0,0 +1,23 @@
+; This test checks to make sure that constant exprs don't fold in some simple
+; situations
+
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Even give it a datalayout, to tempt folding as much as possible.
+target datalayout = "p:32:32"
+
+@A = global i64 0
+@B = global i64 0
+
+; Don't fold this. @A might really be allocated next to @B, in which case the
+; icmp should return true. It's not valid to *dereference* in @B from a pointer
+; based on @A, but icmp isn't a dereference.
+
+; CHECK: @C = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* @B)
+@C = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* @B)
+
+; Don't fold this completely away either. In theory this could be simplified
+; to only use a gep on one side of the icmp though.
+
+; CHECK: @D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
+@D = global i1 icmp eq (i64* getelementptr inbounds (i64* @A, i64 1), i64* getelementptr inbounds (i64* @B, i64 2))
diff --git a/test/Assembler/externally-initialized.ll b/test/Assembler/externally-initialized.ll
new file mode 100644
index 000000000000..4be6e629a1d0
--- /dev/null
+++ b/test/Assembler/externally-initialized.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: @G = externally_initialized global i32 0
+
+@G = externally_initialized global i32 0
diff --git a/test/Assembler/fast-math-flags.ll b/test/Assembler/fast-math-flags.ll
new file mode 100644
index 000000000000..3a116c507f48
--- /dev/null
+++ b/test/Assembler/fast-math-flags.ll
@@ -0,0 +1,142 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: opt -S < %s | FileCheck %s
+
+@addr   = external global i64
+@select = external global i1
+@vec    = external global <3 x float>
+@arr    = external global [3 x float]
+
+define float @none(float %x, float %y) {
+entry:
+; CHECK:  %vec = load  <3 x float>* @vec
+  %vec    = load  <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr    = load [3 x float]* @arr
+  %arr    = load [3 x float]* @arr
+
+; CHECK:  %a = fadd  float %x, %y
+  %a = fadd  float %x, %y
+; CHECK:  %a_vec = fadd  <3 x float> %vec, %vec
+  %a_vec = fadd  <3 x float> %vec, %vec
+; CHECK:  %b = fsub  float %x, %y
+  %b = fsub  float %x, %y
+; CHECK:  %b_vec = fsub  <3 x float> %vec, %vec
+  %b_vec = fsub  <3 x float> %vec, %vec
+; CHECK:  %c = fmul  float %x, %y
+  %c = fmul  float %x, %y
+; CHECK:  %c_vec = fmul  <3 x float> %vec, %vec
+  %c_vec = fmul  <3 x float> %vec, %vec
+; CHECK:  %d = fdiv  float %x, %y
+  %d = fdiv  float %x, %y
+; CHECK:  %d_vec = fdiv  <3 x float> %vec, %vec
+  %d_vec = fdiv  <3 x float> %vec, %vec
+; CHECK:  %e = frem  float %x, %y
+  %e = frem  float %x, %y
+; CHECK:  %e_vec = frem  <3 x float> %vec, %vec
+  %e_vec = frem  <3 x float> %vec, %vec
+; CHECK:  ret  float %e
+  ret  float %e
+}
+
+; CHECK: no_nan
+define float @no_nan(float %x, float %y) {
+entry:
+; CHECK:  %vec = load <3 x float>* @vec
+  %vec    = load  <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr = load  [3 x float]* @arr
+  %arr    = load  [3 x float]* @arr
+
+; CHECK:  %a = fadd nnan  float %x, %y
+  %a = fadd nnan  float %x, %y
+; CHECK:  %a_vec = fadd nnan  <3 x float> %vec, %vec
+  %a_vec = fadd nnan  <3 x float> %vec, %vec
+; CHECK:  %b = fsub nnan  float %x, %y
+  %b = fsub nnan  float %x, %y
+; CHECK:  %b_vec = fsub nnan  <3 x float> %vec, %vec
+  %b_vec = fsub nnan  <3 x float> %vec, %vec
+; CHECK:  %c = fmul nnan  float %x, %y
+  %c = fmul nnan  float %x, %y
+; CHECK:  %c_vec = fmul nnan  <3 x float> %vec, %vec
+  %c_vec = fmul nnan <3 x float> %vec, %vec
+; CHECK:  %d = fdiv nnan  float %x, %y
+  %d = fdiv nnan float %x, %y
+; CHECK:  %d_vec = fdiv nnan  <3 x float> %vec, %vec
+  %d_vec = fdiv nnan <3 x float> %vec, %vec
+; CHECK:  %e = frem nnan  float %x, %y
+  %e = frem nnan  float %x, %y
+; CHECK:  %e_vec = frem nnan  <3 x float> %vec, %vec
+  %e_vec = frem nnan  <3 x float> %vec, %vec
+; CHECK:  ret float %e
+  ret float %e
+}
+
+; CHECK: no_nan_inf
+define float @no_nan_inf(float %x, float %y) {
+entry:
+; CHECK:  %vec = load <3 x float>* @vec
+  %vec    = load <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr = load [3 x float]* @arr
+  %arr    = load [3 x float]* @arr
+
+; CHECK:  %a = fadd nnan ninf  float %x, %y
+  %a = fadd ninf nnan  float %x, %y
+; CHECK:  %a_vec = fadd nnan  <3 x float> %vec, %vec
+  %a_vec = fadd nnan  <3 x float> %vec, %vec
+; CHECK:  %b = fsub nnan  float %x, %y
+  %b = fsub nnan  float %x, %y
+; CHECK:  %b_vec = fsub nnan ninf  <3 x float> %vec, %vec
+  %b_vec = fsub ninf nnan  <3 x float> %vec, %vec
+; CHECK:  %c = fmul nnan  float %x, %y
+  %c = fmul nnan  float %x, %y
+; CHECK:  %c_vec = fmul nnan  <3 x float> %vec, %vec
+  %c_vec = fmul nnan <3 x float> %vec, %vec
+; CHECK:  %d = fdiv nnan ninf  float %x, %y
+  %d = fdiv ninf nnan float %x, %y
+; CHECK:  %d_vec = fdiv nnan  <3 x float> %vec, %vec
+  %d_vec = fdiv nnan <3 x float> %vec, %vec
+; CHECK:  %e = frem nnan  float %x, %y
+  %e = frem nnan  float %x, %y
+; CHECK:  %e_vec = frem nnan ninf  <3 x float> %vec, %vec
+  %e_vec = frem ninf nnan  <3 x float> %vec, %vec
+; CHECK:  ret  float %e
+  ret  float %e
+}
+
+; CHECK: mixed_flags
+define float @mixed_flags(float %x, float %y) {
+entry:
+; CHECK:  %vec = load <3 x float>* @vec
+  %vec    = load <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr    = load [3 x float]* @arr
+  %arr    = load [3 x float]* @arr
+
+; CHECK:  %a = fadd nnan ninf float %x, %y
+  %a = fadd ninf nnan float %x, %y
+; CHECK:  %a_vec = fadd nnan <3 x float> %vec, %vec
+  %a_vec = fadd nnan <3 x float> %vec, %vec
+; CHECK:  %b = fsub fast float %x, %y
+  %b = fsub nnan nsz fast float %x, %y
+; CHECK:  %b_vec = fsub nnan <3 x float> %vec, %vec
+  %b_vec = fsub nnan <3 x float> %vec, %vec
+; CHECK:  %c = fmul fast float %x, %y
+  %c = fmul nsz fast arcp float %x, %y
+; CHECK:  %c_vec = fmul nsz <3 x float> %vec, %vec
+  %c_vec = fmul nsz <3 x float> %vec, %vec
+; CHECK:  %d = fdiv nnan ninf arcp float %x, %y
+  %d = fdiv arcp ninf nnan float %x, %y
+; CHECK:  %d_vec = fdiv fast <3 x float> %vec, %vec
+  %d_vec = fdiv fast nnan arcp <3 x float> %vec, %vec
+; CHECK:  %e = frem nnan nsz float %x, %y
+  %e = frem nnan nsz float %x, %y
+; CHECK:  %e_vec = frem nnan <3 x float> %vec, %vec
+  %e_vec = frem nnan <3 x float> %vec, %vec
+; CHECK:  ret  float %e
+  ret  float %e
+}
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index ce6866d54417..af03fca6d2c1 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -7,12 +7,12 @@
 @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523)
 ; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5)
 
-;; Verify that i16 indices work.
+; Verify that i16 indices work.
 @x = external global {i32, i32}
 @y = global i32* getelementptr ({ i32, i32 }* @x, i16 42, i32 0)
 ; CHECK: @y = global i32* getelementptr ({ i32, i32 }* @x, i16 42, i32 0)
 
-; see if i92 indices work too.
+; See if i92 indices work too.
 define i32 *@test({i32, i32}* %t, i92 %n) {
 ; CHECK: @test
 ; CHECK: %B = getelementptr { i32, i32 }* %t, i92 %n, i32 0
@@ -20,3 +20,18 @@ define i32 *@test({i32, i32}* %t, i92 %n) {
   ret i32* %B
 }
 
+; Verify that constant expression vector GEPs work.
+
+@z = global <2 x i32*> getelementptr (<2 x [3 x {i32, i32}]*> zeroinitializer, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>)
+
+; Verify that struct GEP works with a vector of pointers.
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+  %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
+  ret <2 x i32*> %w
+}
+
+; Verify that array GEP works with a vector of pointers.
+define <2 x i8*> @test8(<2 x [2 x i8]*> %a) {
+  %w = getelementptr <2 x  [2 x i8]*> %a, <2 x i32> <i32 0, i32 0>, <2 x i8> <i8 0, i8 1>
+  ret <2 x i8*> %w
+}
diff --git a/test/Assembler/getelementptr_vec_idx1.ll b/test/Assembler/getelementptr_vec_idx1.ll
new file mode 100644
index 000000000000..d2479f44048a
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_idx1.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that a vector index is only used with a vector pointer.
+
+; CHECK: getelementptr index type missmatch
+
+define i32 @test(i32* %a) {
+  %w = getelementptr i32* %a, <2 x i32> <i32 5, i32 9>
+  ret i32 %w
+}
diff --git a/test/Assembler/getelementptr_vec_idx2.ll b/test/Assembler/getelementptr_vec_idx2.ll
new file mode 100644
index 000000000000..8b71ce3095b1
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_idx2.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that a vector pointer is only used with a vector index.
+
+; CHECK: getelementptr index type missmatch
+
+define <2 x i32> @test(<2 x i32*> %a) {
+  %w = getelementptr <2 x i32*> %a, i32 2
+  ret <2 x i32> %w
+}
diff --git a/test/Assembler/getelementptr_vec_idx3.ll b/test/Assembler/getelementptr_vec_idx3.ll
new file mode 100644
index 000000000000..1f6c29b3ccc2
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_idx3.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that vector indices have the same number of elements as the pointer.
+
+; CHECK: getelementptr index type missmatch
+
+define <4 x i32> @test(<4 x i32>* %a) {
+  %w = getelementptr <4 x i32>* %a, <2 x i32> <i32 5, i32 9>
+  ret i32 %w
+}
diff --git a/test/Assembler/getelementptr_vec_struct.ll b/test/Assembler/getelementptr_vec_struct.ll
new file mode 100644
index 000000000000..ec66836bac19
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_struct.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that a vector struct index with non-equal elements is rejected.
+
+; CHECK: invalid getelementptr indices
+
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+  %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> <i32 0, i32 1>
+  ret <2 x i32*> %w
+}
diff --git a/test/Assembler/unnamed-addr.ll b/test/Assembler/unnamed-addr.ll
index 3c94ca213081..35b3b39ce48f 100644
--- a/test/Assembler/unnamed-addr.ll
+++ b/test/Assembler/unnamed-addr.ll
@@ -15,4 +15,6 @@ declare i32 @zed(%struct.foobar*, %struct.foobar*)
 
 ; CHECK: @bar.d = internal unnamed_addr constant %struct.foobar zeroinitializer, align 4
 ; CHECK: @foo.d = internal constant %struct.foobar zeroinitializer, align 4
-; CHECK: define i32 @main() unnamed_addr nounwind ssp {
+; CHECK: define i32 @main() unnamed_addr #0 {
+
+; CHECK: attributes #0 = { nounwind ssp }
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index 61be4b770358..b49bab9ab17b 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -860,7 +860,8 @@ let test_builder () =
   group "function attribute";
   begin
       ignore (add_function_attr fn Attribute.UWTable);
-      (* RUN: grep "X7.*uwtable" < %t.ll
+      (* RUN: grep "X7.*#0" < %t.ll
+       * RUN: grep "attributes #0 = .*uwtable.*" < %t.ll
        *)
       insist ([Attribute.UWTable] = function_attr fn);
   end;
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 502e96728230..6c46e94012a5 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -14,7 +14,7 @@ define void @f2(i8 signext)
 }
 
 define void @f3() noreturn
-; CHECK: define void @f3() noreturn
+; CHECK: define void @f3() #0
 {
         ret void;
 }
@@ -32,7 +32,7 @@ define void @f5(i8* sret)
 }
 
 define void @f6() nounwind
-; CHECK: define void @f6() nounwind
+; CHECK: define void @f6() #1
 {
         ret void;
 }
@@ -56,43 +56,43 @@ define void @f9(i8* nest)
 }
 
 define void @f10() readnone
-; CHECK: define void @f10() readnone
+; CHECK: define void @f10() #2
 {
         ret void;
 }
 
 define void @f11() readonly
-; CHECK: define void @f11() readonly
+; CHECK: define void @f11() #3
 {
         ret void;
 }
 
 define void @f12() noinline
-; CHECK: define void @f12() noinline
+; CHECK: define void @f12() #4
 {
         ret void;
 }
 
 define void @f13() alwaysinline
-; CHECK: define void @f13() alwaysinline
+; CHECK: define void @f13() #5
 {
         ret void;
 }
 
 define void @f14() optsize
-; CHECK: define void @f14() optsize
+; CHECK: define void @f14() #6
 {
         ret void;
 }
 
 define void @f15() ssp
-; CHECK: define void @f15() ssp
+; CHECK: define void @f15() #7
 {
         ret void;
 }
 
 define void @f16() sspreq
-; CHECK: define void @f16() sspreq
+; CHECK: define void @f16() #8
 {
         ret void;
 }
@@ -110,55 +110,93 @@ define void @f18(i8* nocapture)
 }
 
 define void @f19() noredzone
-; CHECK: define void @f19() noredzone
+; CHECK: define void @f19() #9
 {
         ret void;
 }
 
 define void @f20() noimplicitfloat
-; CHECK: define void @f20() noimplicitfloat
+; CHECK: define void @f20() #10
 {
         ret void;
 }
 
 define void @f21() naked
-; CHECK: define void @f21() naked
+; CHECK: define void @f21() #11
 {
         ret void;
 }
 
 define void @f22() inlinehint
-; CHECK: define void @f22() inlinehint
+; CHECK: define void @f22() #12
 {
         ret void;
 }
 
 define void @f23() alignstack(4)
-; CHECK: define void @f23() alignstack(4)
+; CHECK: define void @f23() #13
 {
         ret void;
 }
 
 define void @f24() returns_twice
-; CHECK: define void @f24() returns_twice
+; CHECK: define void @f24() #14
 {
         ret void;
 }
 
 define void @f25() uwtable
-; CHECK: define void @f25() uwtable
+; CHECK: define void @f25() #15
 {
         ret void;
 }
 
 define void @f26() nonlazybind
-; CHECK: define void @f26() nonlazybind
+; CHECK: define void @f26() #16
 {
         ret void;
 }
 
-define void @f27() address_safety
-; CHECK: define void @f27() address_safety
+define void @f27() sanitize_address
+; CHECK: define void @f27() #17
 {
         ret void;
 }
+define void @f28() sanitize_thread
+; CHECK: define void @f28() #18
+{
+        ret void;
+}
+define void @f29() sanitize_memory
+; CHECK: define void @f29() #19
+{
+        ret void;
+}
+
+define void @f30() "cpu"="cortex-a8"
+; CHECK: define void @f30() #20
+{
+        ret void;
+}
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #2 = { readnone }
+; CHECK: attributes #3 = { readonly }
+; CHECK: attributes #4 = { noinline }
+; CHECK: attributes #5 = { alwaysinline }
+; CHECK: attributes #6 = { optsize }
+; CHECK: attributes #7 = { ssp }
+; CHECK: attributes #8 = { sspreq }
+; CHECK: attributes #9 = { noredzone }
+; CHECK: attributes #10 = { noimplicitfloat }
+; CHECK: attributes #11 = { naked }
+; CHECK: attributes #12 = { inlinehint }
+; CHECK: attributes #13 = { alignstack=4 }
+; CHECK: attributes #14 = { returns_twice }
+; CHECK: attributes #15 = { uwtable }
+; CHECK: attributes #16 = { nonlazybind }
+; CHECK: attributes #17 = { sanitize_address }
+; CHECK: attributes #18 = { sanitize_thread }
+; CHECK: attributes #19 = { sanitize_memory }
+; CHECK: attributes #20 = { "cpu"="cortex-a8" }
diff --git a/test/Bitcode/ptest-new.ll b/test/Bitcode/ptest-new.ll
index 276fb7ab6a13..735cc9c1cc44 100644
--- a/test/Bitcode/ptest-new.ll
+++ b/test/Bitcode/ptest-new.ll
@@ -13,10 +13,13 @@ entry:
  ret i32 %add2
 }
 
-; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) #1
 
 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll
index fc6ed8ef7b67..fbe962fae51a 100644
--- a/test/Bitcode/ptest-old.ll
+++ b/test/Bitcode/ptest-old.ll
@@ -13,10 +13,13 @@ entry:
  ret i32 %add2
 }
 
-; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
-; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
+; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) #1
+; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) #1
 
 declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e10a532341e6..728213f6130a 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,11 +12,8 @@ if(NOT LLVM_BUILD_TOOLS)
   set(EXCLUDE_FROM_ALL ON)
 endif()
 
-add_lit_testsuite(check-llvm "Running the LLVM regression tests"
-  ${CMAKE_CURRENT_BINARY_DIR}
-  PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-         llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-  DEPENDS UnitTests
+# Set the depends list as a variable so that it can grow conditionally.
+set(LLVM_TEST_DEPENDS UnitTests
           BugpointPasses LLVMHello
           llc lli llvm-ar llvm-as
           llvm-bcanalyzer llvm-diff
@@ -27,10 +24,23 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests"
           llvm-nm
           llvm-objdump
           llvm-readobj
+          llvm-rtdyld
+          llvm-symbolizer
           macho-dump opt
           profile_rt-shared
           FileCheck count not
-          yaml2obj
+          yaml2obj obj2yaml)
+
+# If Intel JIT events are supported, depend on a tool that tests the listener.
+if( LLVM_USE_INTEL_JITEVENTS )
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-jitlistener)
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+add_lit_testsuite(check-llvm "Running the LLVM regression tests"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+         llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+  DEPENDS ${LLVM_TEST_DEPENDS}
   )
 set_target_properties(check-llvm PROPERTIES FOLDER "Tests")
 
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
new file mode 100644
index 000000000000..7cb373232a2c
--- /dev/null
+++ b/test/CodeGen/AArch64/adc.ll
@@ -0,0 +1,54 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
+; CHECK: test_simple:
+
+  %valadd = add i128 %a, %b
+; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2
+; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3
+
+  %valsub = sub i128 %valadd, %c
+; CHECK: subs x0, [[ADDLO]], x4
+; CHECK: sbcs x1, [[ADDHI]], x5
+
+  ret i128 %valsub
+; CHECK: ret
+}
+
+define i128 @test_imm(i128 %a) {
+; CHECK: test_imm:
+
+  %val = add i128 %a, 12
+; CHECK: adds x0, x0, #12
+; CHECK: adcs x1, x1, {{x[0-9]|xzr}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_shifted(i128 %a, i128 %b) {
+; CHECK: test_shifted:
+
+  %rhs = shl i128 %b, 45
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, x2, lsl #45
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
+
+define i128 @test_extended(i128 %a, i16 %b) {
+; CHECK: test_extended:
+
+  %ext = sext i16 %b to i128
+  %rhs = shl i128 %ext, 3
+
+  %val = add i128 %a, %rhs
+; CHECK: adds x0, x0, w2, sxth #3
+; CHECK: adcs x1, x1, {{x[0-9]}}
+
+  ret i128 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
new file mode 100644
index 000000000000..f2c74f6952b0
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -0,0 +1,295 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsl_arith:
+
+  %rhs1 = load volatile i32* @var32
+  %shift1 = shl i32 %rhs1, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
+
+  %rhs2 = load volatile i32* @var32
+  %shift2 = shl i32 %rhs2, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+
+  %rhs3 = load volatile i32* @var32
+  %shift3 = shl i32 %rhs3, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs4 = load volatile i32* @var32
+  %shift4 = shl i32 %rhs4, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
+
+  %lhs4a = load volatile i32* @var32
+  %shift4a = shl i32 %lhs4a, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15
+
+  %rhs5 = load volatile i64* @var64
+  %shift5 = shl i64 %rhs5, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
+
+  %rhs6 = load volatile i64* @var64
+  %shift6 = shl i64 %rhs6, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
+
+  %rhs7 = load volatile i64* @var64
+  %shift7 = shl i64 %rhs7, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
+
+; Subtraction is not commutative!
+  %rhs8 = load volatile i64* @var64
+  %shift8 = shl i64 %rhs8, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
+
+  %lhs8a = load volatile i64* @var64
+  %shift8a = shl i64 %lhs8a, 60
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_lsr_arith:
+
+  %shift1 = lshr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18
+
+  %shift2 = lshr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31
+
+  %shift3 = lshr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift4 = lshr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19
+
+  %shift4a = lshr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15
+
+  %shift5 = lshr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18
+
+  %shift6 = lshr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31
+
+  %shift7 = lshr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5
+
+; Subtraction is not commutative!
+  %shift8 = lshr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19
+
+  %shift8a = lshr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_asr_arith:
+
+  %shift1 = ashr i32 %rhs32, 18
+  %val1 = add i32 %lhs32, %shift1
+  store volatile i32 %val1, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18
+
+  %shift2 = ashr i32 %rhs32, 31
+  %val2 = add i32 %shift2, %lhs32
+  store volatile i32 %val2, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31
+
+  %shift3 = ashr i32 %rhs32, 5
+  %val3 = sub i32 %lhs32, %shift3
+  store volatile i32 %val3, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift4 = ashr i32 %rhs32, 19
+  %val4 = sub i32 %shift4, %lhs32
+  store volatile i32 %val4, i32* @var32
+; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19
+
+  %shift4a = ashr i32 %lhs32, 15
+  %val4a = sub i32 0, %shift4a
+  store volatile i32 %val4a, i32* @var32
+; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15
+
+  %shift5 = ashr i64 %rhs64, 18
+  %val5 = add i64 %lhs64, %shift5
+  store volatile i64 %val5, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18
+
+  %shift6 = ashr i64 %rhs64, 31
+  %val6 = add i64 %shift6, %lhs64
+  store volatile i64 %val6, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31
+
+  %shift7 = ashr i64 %rhs64, 5
+  %val7 = sub i64 %lhs64, %shift7
+  store volatile i64 %val7, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5
+
+; Subtraction is not commutative!
+  %shift8 = ashr i64 %rhs64, 19
+  %val8 = sub i64 %shift8, %lhs64
+  store volatile i64 %val8, i64* @var64
+; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19
+
+  %shift8a = ashr i64 %lhs64, 45
+  %val8a = sub i64 0, %shift8a
+  store volatile i64 %val8a, i64* @var64
+; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45
+
+  ret void
+; CHECK: ret
+}
+
+define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmp:
+
+  %shift1 = shl i32 %rhs32, 13
+  %tst1 = icmp uge i32 %lhs32, %shift1
+  br i1 %tst1, label %t2, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %tst2 = icmp ne i32 %lhs32, %shift2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %tst3 = icmp ne i32 %lhs32, %shift3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %tst4 = icmp uge i64 %lhs64, %shift4
+  br i1 %tst4, label %t5, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %tst5 = icmp ne i64 %lhs64, %shift5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %tst6 = icmp ne i64 %lhs64, %shift6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
+define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+; CHECK: test_cmn:
+
+  %shift1 = shl i32 %rhs32, 13
+  %val1 = sub i32 0, %shift1
+  %tst1 = icmp uge i32 %lhs32, %val1
+  br i1 %tst1, label %t2, label %end
+  ; Important that this isn't lowered to a cmn instruction because if %rhs32 ==
+  ; 0 then the results will differ.
+; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13
+; CHECK: cmp {{w[0-9]+}}, [[RHS]]
+
+t2:
+  %shift2 = lshr i32 %rhs32, 20
+  %val2 = sub i32 0, %shift2
+  %tst2 = icmp ne i32 %lhs32, %val2
+  br i1 %tst2, label %t3, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
+
+t3:
+  %shift3 = ashr i32 %rhs32, 9
+  %val3 = sub i32 0, %shift3
+  %tst3 = icmp eq i32 %lhs32, %val3
+  br i1 %tst3, label %t4, label %end
+; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, asr #9
+
+t4:
+  %shift4 = shl i64 %rhs64, 43
+  %val4 = sub i64 0, %shift4
+  %tst4 = icmp slt i64 %lhs64, %val4
+  br i1 %tst4, label %t5, label %end
+  ; Again, it's important that cmn isn't used here in case %rhs64 == 0.
+; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43
+; CHECK: cmp {{x[0-9]+}}, [[RHS]]
+
+t5:
+  %shift5 = lshr i64 %rhs64, 20
+  %val5 = sub i64 0, %shift5
+  %tst5 = icmp ne i64 %lhs64, %val5
+  br i1 %tst5, label %t6, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
+
+t6:
+  %shift6 = ashr i64 %rhs64, 59
+  %val6 = sub i64 0, %shift6
+  %tst6 = icmp ne i64 %lhs64, %val6
+  br i1 %tst6, label %t7, label %end
+; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, asr #59
+
+t7:
+  ret i32 1
+end:
+
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
new file mode 100644
index 000000000000..5148807163c9
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -0,0 +1,127 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Note that this should be refactored (for efficiency if nothing else)
+; when the PCS is implemented so we don't have to worry about the
+; loads and stores.
+
+@var_i32 = global i32 42
+@var_i64 = global i64 0
+
+; Add pure 12-bit immediates:
+define void @add_small() {
+; CHECK: add_small:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Add 12-bit immediates, shifted left by 12 bits
+define void @add_med() {
+; CHECK: add_med:
+
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = add i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = add i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates
+define void @sub_small() {
+; CHECK: sub_small:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 4095
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 52
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+; Subtract 12-bit immediates, shifted left by 12 bits
+define void @sub_med() {
+; CHECK: sub_med:
+
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12
+  %val32 = load i32* @var_i32
+  %newval32 = sub i32 %val32, 14610432 ; =0xdef000
+  store i32 %newval32, i32* @var_i32
+
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12
+  %val64 = load i64* @var_i64
+  %newval64 = sub i64 %val64, 16773120 ; =0xfff000
+  store i64 %newval64, i64* @var_i64
+
+  ret void
+}
+
+define void @testing() {
+; CHECK: testing:
+  %val = load i32* @var_i32
+
+; CHECK: cmp {{w[0-9]+}}, #4095
+; CHECK: b.ne .LBB4_6
+  %cmp_pos_small = icmp ne i32 %val, 4095
+  br i1 %cmp_pos_small, label %ret, label %test2
+
+test2:
+; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12
+; CHECK: b.lo .LBB4_6
+  %newval2 = add i32 %val, 1
+  store i32 %newval2, i32* @var_i32
+  %cmp_pos_big = icmp ult i32 %val, 14610432
+  br i1 %cmp_pos_big, label %ret, label %test3
+
+test3:
+; CHECK: cmp {{w[0-9]+}}, #123
+; CHECK: b.lt .LBB4_6
+  %newval3 = add i32 %val, 2
+  store i32 %newval3, i32* @var_i32
+  %cmp_pos_slt = icmp slt i32 %val, 123
+  br i1 %cmp_pos_slt, label %ret, label %test4
+
+test4:
+; CHECK: cmp {{w[0-9]+}}, #321
+; CHECK: b.gt .LBB4_6
+  %newval4 = add i32 %val, 3
+  store i32 %newval4, i32* @var_i32
+  %cmp_pos_sgt = icmp sgt i32 %val, 321
+  br i1 %cmp_pos_sgt, label %ret, label %test5
+
+test5:
+; CHECK: cmn {{w[0-9]+}}, #444
+; CHECK: b.gt .LBB4_6
+  %newval5 = add i32 %val, 4
+  store i32 %newval5, i32* @var_i32
+  %cmp_neg_uge = icmp sgt i32 %val, -444
+  br i1 %cmp_neg_uge, label %ret, label %test6
+
+test6:
+  %newval6 = add i32 %val, 5
+  store i32 %newval6, i32* @var_i32
+  ret void
+
+ret:
+  ret void
+}
+; TODO: adds/subs
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
new file mode 100644
index 000000000000..2dd16626ea9f
--- /dev/null
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -0,0 +1,189 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @addsub_i8rhs() {
+; CHECK: addsub_i8rhs:
+    %val8_tmp = load i8* @var8
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i8 operand.
+    %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i8 %val8 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i8 %val8 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i8 %val8 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i8 %val8 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+test2:
+    %cmp_sext = sext i8 %val8 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+define void @addsub_i16rhs() {
+; CHECK: addsub_i16rhs:
+    %val16_tmp = load i16* @var16
+    %lhs32 = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i16 operand.
+    %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i16 %val16 to i32
+    %res32_zext = add i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i16 %val16 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i16 %val16 to i32
+    %res32_sext = add i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i16 %val16 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+
+; CMP variants
+    %tst = icmp slt i32 %lhs32, %rhs32_zext
+    br i1 %tst, label %end, label %test2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+test2:
+    %cmp_sext = sext i16 %val16 to i64
+    %tst2 = icmp eq i64 %lhs64, %cmp_sext
+    br i1 %tst2, label %other, label %end
+; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+other:
+    store volatile i32 %lhs32, i32* @var32
+    ret void
+
+end:
+    ret void
+}
+
+; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
+; example), but the remaining instructions are probably not idiomatic
+; in the face of "add/sub (shifted register)" so I don't intend to.
+define void @addsub_i32rhs() {
+; CHECK: addsub_i32rhs:
+    %val32_tmp = load i32* @var32
+    %lhs64 = load i64* @var64
+
+    %val32 = add i32 %val32_tmp, 123
+
+    %rhs64_zext = zext i32 %val32 to i64
+    %res64_zext = add i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
+    store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+    %rhs64_sext = sext i32 %val32 to i64
+    %res64_sext = add i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+    %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
+    store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+    ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
new file mode 100644
index 000000000000..c33b442624a5
--- /dev/null
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+
+define i64 @testfn() nounwind {
+entry:
+  ret i64 0
+}
+
+define i64 @foo() nounwind {
+entry:
+  %bar = alloca i64 ()*, align 8
+  store i64 ()* @testfn, i64 ()** %bar, align 8
+  %call = call i64 @testfn()
+  ret i64 %call
+}
+
+; The above should produce an ADRP/ADD pair to calculate the address of
+; testfn. The important point is that LLVM shouldn't think it can deal with the
+; relocation on the ADRP itself (even though it knows everything about the
+; relative offsets of testfn and foo) because its value depends on where this
+; object file's .text section gets relocated in memory.
+
+; CHECK: .rela.text
+
+; CHECK: # Relocation 0
+; CHECK-NEXT: (('r_offset', 0x0000000000000010)
+; CHECK-NEXT:  ('r_sym', 0x00000007)
+; CHECK-NEXT:  ('r_type', 0x00000113)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
+; CHECK-NEXT:  Relocation 1
+; CHECK-NEXT: (('r_offset', 0x0000000000000014)
+; CHECK-NEXT:  ('r_sym', 0x00000007)
+; CHECK-NEXT:  ('r_type', 0x00000115)
+; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+; CHECK-NEXT: ),
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
new file mode 100644
index 000000000000..c62edf6503c6
--- /dev/null
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -0,0 +1,134 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+declare void @use_addr(i8*)
+
+define void @test_simple_alloca(i64 %n) {
+; CHECK: test_simple_alloca:
+
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  call void @use_addr(i8* %buf)
+; CHECK: bl use_addr
+
+  ret void
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+declare void @use_addr_loc(i8*, i64*)
+
+define i64 @test_alloca_with_local(i64 %n) {
+; CHECK: test_alloca_with_local:
+; CHECK: sub sp, sp, #32
+; CHECK: stp x29, x30, [sp, #16]
+
+  %loc = alloca i64
+  %buf = alloca i8, i64 %n
+  ; Make sure we align the stack change to 16 bytes:
+; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15
+; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0
+
+  ; Make sure we change SP. It would be surprising if anything but x0 were used
+  ; for the final sp, but it could be if it was then moved into x0.
+; CHECK: mov [[TMP:x[0-9]+]], sp
+; CHECK: sub x0, [[TMP]], [[SPDELTA]]
+; CHECK: mov sp, x0
+
+  ; Obviously suboptimal code here, but it to get &local in x1
+; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]]
+; CHECK: add x1, [[TMP]], #0
+
+  call void @use_addr_loc(i8* %buf, i64* %loc)
+; CHECK: bl use_addr
+
+  %val = load i64* %loc
+; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]]
+; CHECK: ldr x0, [x[[TMP]]]
+
+  ret i64 %val
+  ; Make sure epilogue restores sp from fp
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: add sp, sp, #32
+; CHECK: ret
+}
+
+define void @test_variadic_alloca(i64 %n, ...) {
+; CHECK: test_variadic_alloca:
+
+; CHECK: sub     sp, sp, #208
+; CHECK: stp     x29, x30, [sp, #192]
+; CHECK: add     x29, sp, #192
+; CHECK: sub     [[TMP:x[0-9]+]], x29, #192
+; CHECK: add     x8, [[TMP]], #0
+; CHECK: str     q7, [x8, #112]
+; [...]
+; CHECK: str     q1, [x8, #16]
+
+  %addr = alloca i8, i64 %n
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  ret void
+; CHECK: sub sp, x29, #192
+; CHECK: ldp x29, x30, [sp, #192]
+; CHECK: add sp, sp, #208
+}
+
+define void @test_alloca_large_frame(i64 %n) {
+; CHECK: test_alloca_large_frame:
+
+; CHECK: sub sp, sp, #496
+; CHECK: stp x29, x30, [sp, #480]
+; CHECK: add x29, sp, #480
+; CHECK: sub sp, sp, #48
+; CHECK: sub sp, sp, #1953, lsl #12
+
+  %addr1 = alloca i8, i64 %n
+  %addr2 = alloca i64, i64 1000000
+
+  call void @use_addr_loc(i8* %addr1, i64* %addr2)
+
+  ret void
+; CHECK: sub sp, x29, #480
+; CHECK: ldp x29, x30, [sp, #480]
+; CHECK: add sp, sp, #496
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test_scoped_alloca(i64 %n) {
+; CHECK: test_scoped_alloca
+; CHECK: sub sp, sp, #32
+
+  %sp = call i8* @llvm.stacksave()
+; CHECK: mov [[SAVED_SP:x[0-9]+]], sp
+
+  %addr = alloca i8, i64 %n
+; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0
+; CHECK: mov [[OLDSP:x[0-9]+]], sp
+; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]]
+; CHECK: mov sp, [[NEWSP]]
+
+  call void @use_addr(i8* %addr)
+; CHECK: bl use_addr
+
+  call void @llvm.stackrestore(i8* %sp)
+; CHECK: mov sp, [[SAVED_SP]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
new file mode 100644
index 000000000000..e10bbb0f8691
--- /dev/null
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -0,0 +1,231 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test checks that LLVM can do basic stripping and reapplying of branches
+; to basic blocks.
+
+declare void @test_true()
+declare void @test_false()
+
+; !0 corresponds to a branch being taken, !1 to not being takne.
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_taken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
+; CHECK: test_Bcc_fallthrough_nottaken:
+  %tst = icmp eq i32 %in, 42
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cmp {{w[0-9]+}}, #42
+
+; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_taken:
+  %tst = icmp eq i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBZ_fallthrough_nottaken:
+  %tst = icmp eq i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_taken:
+  %tst = icmp ne i32 %in, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_CBNZ_fallthrough_nottaken:
+  %tst = icmp ne i64 %in, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp eq i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp eq i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+
+define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_taken:
+  %bit = and i32 %in, 32768
+  %tst = icmp ne i32 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: bl test_false
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
+define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind {
+; CHECK: test_TBNZ_fallthrough_nottaken:
+  %bit = and i64 %in, 32768
+  %tst = icmp ne i64 %bit, 0
+  br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: // BB#
+; CHECK-NEXT: bl test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: bl test_true
+
+true:
+  call void @test_true()
+  ret void
+
+false:
+  call void @test_false()
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
new file mode 100644
index 000000000000..3c03e47147b0
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i32 @foo(i32* %var, i1 %cond) {
+; CHECK: foo:
+  br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+  %oldval = load i32* %var
+  %newval = add nsw i32 %oldval, -1
+  store i32 %newval, i32* %var
+  br label %somewhere
+atomic_ver:
+  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  br label %somewhere
+; CHECK: dmb
+; CHECK: ldxr
+; CHECK: dmb
+  ; The key point here is that the second dmb isn't immediately followed by the
+  ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+  ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+  %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+  ret i32 %combined
+}
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
new file mode 100644
index 000000000000..f3c16171cc83
--- /dev/null
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -0,0 +1,1055 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_add_i8:
+   %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_add_i16:
+   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_add_i32:
+   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_add_i64:
+   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i8:
+   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i16:
+   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i32:
+   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_sub_i64:
+   %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_and_i8:
+   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_and_i16:
+   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_and_i32:
+   %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_and_i64:
+   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_or_i8:
+   %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_or_i16:
+   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_or_i32:
+   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_or_i64:
+   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i8:
+   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i16:
+   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i32:
+   %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xor_i64:
+   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i8:
+   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i16:
+   %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i32:
+   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_xchg_i64:
+   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_min_i8:
+   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_min_i16:
+   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_min_i32:
+   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_min_i64:
+   %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_max_i8:
+   %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_max_i16:
+   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], sxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_max_i32:
+   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_max_i64:
+   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i8:
+   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i16:
+   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i32:
+   %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umin_i64:
+   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i8:
+   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i16:
+   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]], uxth
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i32:
+   %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w0, w[[OLD]]
+; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK: test_atomic_load_umax_i64:
+   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; x0 below is a reasonable guess but could change: it certainly comes into the
+  ; function there.
+; CHECK-NEXT: cmp x0, x[[OLD]]
+; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
+; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i8:
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i16:
+   %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i32:
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK: test_atomic_cmpxchg_i64:
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
+; CHECK: dmb ish
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
+
+; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+  ; w0 below is a reasonable guess but could change: it certainly comes into the
+  ;  function there.
+; CHECK-NEXT: cmp x[[OLD]], x0
+; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
+  ; As above, w1 is a reasonable guess.
+; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: dmb ish
+
+; CHECK: mov x0, x[[OLD]]
+   ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK: test_atomic_load_monotonic_i8:
+  %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i8:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK: ldrb w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK: test_atomic_load_acquire_i8:
+  %val = load atomic i8* @var8 acquire, align 1
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
+
+; CHECK: ldarb w0, [x[[ADDR]]]
+  ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK: test_atomic_load_seq_cst_i8:
+  %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
+; CHECK: dmb ish
+  ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK: test_atomic_load_monotonic_i16:
+  %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
+; CHECK-NOT: dmb
+
+  ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK: test_atomic_load_monotonic_regoff_i32:
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK: ldr w0, [x0, x1]
+; CHECK-NOT: dmb
+
+  ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK: test_atomic_load_seq_cst_i64:
+  %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK: adrp x[[HIADDR:[0-9]+]], var64
+; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
+; CHECK: dmb ish
+  ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i8:
+  store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: adrp x[[HIADDR:[0-9]+]], var8
+; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i8:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i8*
+
+  store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: strb w2, [x0, x1]
+
+  ret void
+}
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_release_i8:
+  store atomic i8 %val, i8* @var8 release, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+
+  ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK: test_atomic_store_seq_cst_i8:
+  store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK: stlrb w0, [x[[ADDR]]]
+; CHECK: dmb ish
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_i16:
+  store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
+
+  ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK: test_atomic_store_monotonic_regoff_i32:
+
+  %addr_int = add i64 %base, %off
+  %addr = inttoptr i64 %addr_int to i32*
+
+  store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK: str w2, [x0, x1]
+
+  ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK: test_atomic_store_release_i64:
+  store atomic i64 %val, i64* @var64 release, align 8
+; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK: stlr x0, [x[[ADDR]]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
new file mode 100644
index 000000000000..da94041c95ff
--- /dev/null
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var = global i32 0
+
+; CHECK-ELF: RELOCATION RECORDS FOR [.text]
+
+define i32 @get_globalvar() {
+; CHECK: get_globalvar:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var]
+; CHECK: ldr w0, [x[[GOTLOC]]]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32 %val
+}
+
+define i32* @get_globalvaraddr() {
+; CHECK: get_globalvaraddr:
+
+  %val = load i32* @var
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var]
+
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var
+  ret i32* @var
+}
+
+@hiddenvar = hidden global i32 0
+
+define i32 @get_hiddenvar() {
+; CHECK: get_hiddenvar:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
+; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar]
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar
+  ret i32 %val
+}
+
+define i32* @get_hiddenvaraddr() {
+; CHECK: get_hiddenvaraddr:
+
+  %val = load i32* @hiddenvar
+; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
+; CHECK: add x0, [[HI]], #:lo12:hiddenvar
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar
+  ret i32* @hiddenvar
+}
+
+define void()* @get_func() {
+; CHECK: get_func:
+
+  ret void()* bitcast(void()*()* @get_func to void()*)
+; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func
+; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func]
+
+  ; Particularly important that the ADRP gets a relocation, LLVM tends to think
+  ; it can relax it because it knows where get_func is. It can't!
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
new file mode 100644
index 000000000000..d1191f6aaa8a
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s
+
+; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one
+; point, in the edge case where lsb = 0. Just make sure.
+
+define void @test_bfi0(i32* %existing, i32* %new) {
+; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
+
+  %newval = load volatile i32* %new
+  %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
+
+  %combined = or i32 %newval_masked, %oldval_keep
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
new file mode 100644
index 000000000000..3e871b9a6d27
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -0,0 +1,193 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; First, a simple example from Clang. The registers could plausibly be
+; different, but probably won't be.
+
+%struct.foo = type { i8, [2 x i8], i8 }
+
+define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone {
+; CHECK: from_clang:
+; CHECK: bfi w0, w1, #3, #4
+; CHECK-NEXT: ret
+
+entry:
+  %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0
+  %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32
+  %bf.value = shl i32 %n, 3
+  %0 = and i32 %bf.value, 120
+  %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135
+  %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0
+  %f.sroa.0.0.extract.trunc = zext i32 %1 to i64
+  %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040
+  %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert
+  %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0
+  ret [1 x i64] %.fca.0.insert
+}
+
+define void @test_whole32(i32* %existing, i32* %new) {
+; CHECK: test_whole32:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 26
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_whole64(i64* %existing, i64* %new) {
+; CHECK: test_whole64:
+; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 26
+  %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_whole32_from64(i64* %existing, i64* %new) {
+; CHECK: test_whole32_from64:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16
+; CHECK-NOT: and
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
+
+  %newval = load volatile i64* %new
+  %newval_masked = and i64 %newval, 65535 ; = 0xffff
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+define void @test_32bit_masked(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_masked:
+; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+define void @test_64bit_masked(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_masked:
+; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
+; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 40
+  %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
+
+  %combined = or i64 %newval_masked, %oldval_keep
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Mask is too complicated for literal ANDwwi, make sure other avenues are tried.
+define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_complexmask:
+; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 647 ; = 0x287
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Neither mask is is a contiguous set of 1s. BFI can't be used
+define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
+; CHECK: test_32bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i32* %new
+  %newval_shifted = shl i32 %newval, 3
+  %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+
+  ret void
+}
+
+; Ditto
+define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
+; CHECK: test_64bit_badmask:
+; CHECK-NOT: bfi
+; CHECK: ret
+
+  %oldval = load volatile i64* %existing
+  %oldval_keep = and i64 %oldval, 135 ; = 0x87
+
+  %newval = load volatile i64* %new
+  %newval_shifted = shl i64 %newval, 3
+  %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
+
+  %combined = or i64 %oldval_keep, %newval_masked
+  store volatile i64 %combined, i64* %existing
+
+  ret void
+}
+
+; Bitfield insert where there's a left-over shr needed at the beginning
+; (e.g. result of str.bf1 = str.bf2)
+define void @test_32bit_with_shr(i32* %existing, i32* %new) {
+; CHECK: test_32bit_with_shr:
+
+  %oldval = load volatile i32* %existing
+  %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
+
+  %newval = load i32* %new
+  %newval_shifted = shl i32 %newval, 12
+  %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14
+; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5
+
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
new file mode 100644
index 000000000000..36d337ef05ef
--- /dev/null
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -0,0 +1,218 @@
+
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_extendb(i8 %var) {
+; CHECK: test_extendb:
+
+  %sxt32 = sext i8 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i8 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i8 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+
+  %uxt64 = zext i8 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendh(i16 %var) {
+; CHECK: test_extendh:
+
+  %sxt32 = sext i16 %var to i32
+  store volatile i32 %sxt32, i32* @var32
+; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
+
+  %sxt64 = sext i16 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i16 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+
+  %uxt64 = zext i16 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+define void @test_extendw(i32 %var) {
+; CHECK: test_extendw:
+
+  %sxt64 = sext i32 %var to i64
+  store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+
+  %uxt64 = zext i32 %var to i64
+  store volatile i64 %uxt64, i64* @var64
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32
+  ret void
+}
+
+define void @test_shifts(i32 %val32, i64 %val64) {
+; CHECK: test_shifts:
+
+  %shift1 = ashr i32 %val32, 31
+  store volatile i32 %shift1, i32* @var32
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift2 = lshr i32 %val32, 8
+  store volatile i32 %shift2, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #8
+
+  %shift3 = shl i32 %val32, 1
+  store volatile i32 %shift3, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #1
+
+  %shift4 = ashr i64 %val64, 31
+  store volatile i64 %shift4, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #31
+
+  %shift5 = lshr i64 %val64, 8
+  store volatile i64 %shift5, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8
+
+  %shift6 = shl i64 %val64, 63
+  store volatile i64 %shift6, i64* @var64
+; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift7 = ashr i64 %val64, 63
+  store volatile i64 %shift7, i64* @var64
+; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift8 = lshr i64 %val64, 63
+  store volatile i64 %shift8, i64* @var64
+; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #63
+
+  %shift9 = lshr i32 %val32, 31
+  store volatile i32 %shift9, i32* @var32
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  %shift10 = shl i32 %val32, 31
+  store volatile i32 %shift10, i32* @var32
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #31
+
+  ret void
+}
+
+; LLVM can produce in-register extensions taking place entirely with
+; 64-bit registers too.
+define void @test_sext_inreg_64(i64 %in) {
+; CHECK: test_sext_inreg_64:
+
+; i1 doesn't have an official alias, but crops up and is handled by
+; the bitfield ops.
+  %trunc_i1 = trunc i64 %in to i1
+  %sext_i1 = sext i1 %trunc_i1 to i64
+  store volatile i64 %sext_i1, i64* @var64
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  %trunc_i8 = trunc i64 %in to i8
+  %sext_i8 = sext i8 %trunc_i8 to i64
+  store volatile i64 %sext_i8, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i16 = trunc i64 %in to i16
+  %sext_i16 = sext i16 %trunc_i16 to i64
+  store volatile i64 %sext_i16, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+
+  %trunc_i32 = trunc i64 %in to i32
+  %sext_i32 = sext i32 %trunc_i32 to i64
+  store volatile i64 %sext_i32, i64* @var64
+; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
+
+; These instructions don't actually select to official bitfield
+; operations, but it's important that we select them somehow:
+define void @test_zext_inreg_64(i64 %in) {
+; CHECK: test_zext_inreg_64:
+
+  %trunc_i8 = trunc i64 %in to i8
+  %zext_i8 = zext i8 %trunc_i8 to i64
+  store volatile i64 %zext_i8, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+
+  %trunc_i16 = trunc i64 %in to i16
+  %zext_i16 = zext i16 %trunc_i16 to i64
+  store volatile i64 %zext_i16, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+
+  %trunc_i32 = trunc i64 %in to i32
+  %zext_i32 = zext i32 %trunc_i32 to i64
+  store volatile i64 %zext_i32, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffffffff
+
+  ret void
+}
+
+define i64 @test_sext_inreg_from_32(i32 %in) {
+; CHECK: test_sext_inreg_from_32:
+
+  %small = trunc i32 %in to i1
+  %ext = sext i1 %small to i64
+
+  ; Different registers are of course, possible, though suboptimal. This is
+  ; making sure that a 64-bit "(sext_inreg (anyext GPR32), i1)" uses the 64-bit
+  ; sbfx rather than just 32-bits.
+; CHECK: sbfx x0, x0, #0, #1
+  ret i64 %ext
+}
+
+
+define i32 @test_ubfx32(i32* %addr) {
+; CHECK: test_ubfx32:
+; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
+
+   %fields = load i32* %addr
+   %shifted = lshr i32 %fields, 23
+   %masked = and i32 %shifted, 7
+   ret i32 %masked
+}
+
+define i64 @test_ubfx64(i64* %addr) {
+; CHECK: test_ubfx64:
+; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
+
+   %fields = load i64* %addr
+   %shifted = lshr i64 %fields, 25
+   %masked = and i64 %shifted, 1023
+   ret i64 %masked
+}
+
+define i32 @test_sbfx32(i32* %addr) {
+; CHECK: test_sbfx32:
+; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
+
+   %fields = load i32* %addr
+   %shifted = shl i32 %fields, 23
+   %extended = ashr i32 %shifted, 29
+   ret i32 %extended
+}
+
+define i64 @test_sbfx64(i64* %addr) {
+; CHECK: test_sbfx64:
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
+
+   %fields = load i64* %addr
+   %shifted = shl i64 %fields, 1
+   %extended = ashr i64 %shifted, 1
+   ret i64 %extended
+}
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
new file mode 100644
index 000000000000..3d0a5cf96bcd
--- /dev/null
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@addr = global i8* null
+
+define void @test_blockaddress() {
+; CHECK: test_blockaddress:
+  store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
+  %val = load volatile i8** @addr
+  indirectbr i8* %val, [label %block]
+; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
+; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]]
+; CHECK: str [[DEST]],
+; CHECK: ldr [[NEWDEST:x[0-9]+]]
+; CHECK: br [[NEWDEST]]
+
+block:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
new file mode 100644
index 000000000000..5c7640bc4218
--- /dev/null
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+@var = global i1 0
+
+define i32 @test_sextloadi32() {
+; CHECK: test_sextloadi32
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i32 %ret
+; CHECK: ret
+}
+
+define i64 @test_sextloadi64() {
+; CHECK: test_sextloadi64
+
+  %val = load i1* @var
+  %ret = sext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1
+
+  ret i64 %ret
+; CHECK: ret
+}
+
+define i32 @test_zextloadi32() {
+; CHECK: test_zextloadi32
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i32
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i32 %ret
+; CHECK-NEXT: ret
+}
+
+define i64 @test_zextloadi64() {
+; CHECK: test_zextloadi64
+
+; It's not actually necessary that "ret" is next, but as far as LLVM
+; is concerned only 0 or 1 should be loadable so no extension is
+; necessary.
+  %val = load i1* @var
+  %ret = zext i1 %val to i64
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var]
+
+  ret i64 %ret
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
new file mode 100644
index 000000000000..38ed4734e1b4
--- /dev/null
+++ b/test/CodeGen/AArch64/breg.ll
@@ -0,0 +1,17 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@stored_label = global i8* null
+
+define void @foo() {
+; CHECK: foo:
+  %lab = load i8** @stored_label
+  indirectbr i8* %lab, [label  %otherlab, label %retlab]
+; CHECK: adrp {{x[0-9]+}}, stored_label
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label]
+; CHECK: br {{x[0-9]+}}
+
+otherlab:
+  ret void
+retlab:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
new file mode 100644
index 000000000000..c66aa5bfc510
--- /dev/null
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -0,0 +1,86 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var = global float 0.0
+
+define void @foo() {
+; CHECK: foo:
+
+; CHECK: stp d14, d15, [sp
+; CHECK: stp d12, d13, [sp
+; CHECK: stp d10, d11, [sp
+; CHECK: stp d8, d9, [sp
+
+  ; Create lots of live variables to exhaust the supply of
+  ; caller-saved registers
+  %val1 = load volatile float* @var
+  %val2 = load volatile float* @var
+  %val3 = load volatile float* @var
+  %val4 = load volatile float* @var
+  %val5 = load volatile float* @var
+  %val6 = load volatile float* @var
+  %val7 = load volatile float* @var
+  %val8 = load volatile float* @var
+  %val9 = load volatile float* @var
+  %val10 = load volatile float* @var
+  %val11 = load volatile float* @var
+  %val12 = load volatile float* @var
+  %val13 = load volatile float* @var
+  %val14 = load volatile float* @var
+  %val15 = load volatile float* @var
+  %val16 = load volatile float* @var
+  %val17 = load volatile float* @var
+  %val18 = load volatile float* @var
+  %val19 = load volatile float* @var
+  %val20 = load volatile float* @var
+  %val21 = load volatile float* @var
+  %val22 = load volatile float* @var
+  %val23 = load volatile float* @var
+  %val24 = load volatile float* @var
+  %val25 = load volatile float* @var
+  %val26 = load volatile float* @var
+  %val27 = load volatile float* @var
+  %val28 = load volatile float* @var
+  %val29 = load volatile float* @var
+  %val30 = load volatile float* @var
+  %val31 = load volatile float* @var
+  %val32 = load volatile float* @var
+
+  store volatile float %val1, float* @var
+  store volatile float %val2, float* @var
+  store volatile float %val3, float* @var
+  store volatile float %val4, float* @var
+  store volatile float %val5, float* @var
+  store volatile float %val6, float* @var
+  store volatile float %val7, float* @var
+  store volatile float %val8, float* @var
+  store volatile float %val9, float* @var
+  store volatile float %val10, float* @var
+  store volatile float %val11, float* @var
+  store volatile float %val12, float* @var
+  store volatile float %val13, float* @var
+  store volatile float %val14, float* @var
+  store volatile float %val15, float* @var
+  store volatile float %val16, float* @var
+  store volatile float %val17, float* @var
+  store volatile float %val18, float* @var
+  store volatile float %val19, float* @var
+  store volatile float %val20, float* @var
+  store volatile float %val21, float* @var
+  store volatile float %val22, float* @var
+  store volatile float %val23, float* @var
+  store volatile float %val24, float* @var
+  store volatile float %val25, float* @var
+  store volatile float %val26, float* @var
+  store volatile float %val27, float* @var
+  store volatile float %val28, float* @var
+  store volatile float %val29, float* @var
+  store volatile float %val30, float* @var
+  store volatile float %val31, float* @var
+  store volatile float %val32, float* @var
+
+; CHECK: ldp     d8, d9, [sp
+; CHECK: ldp     d10, d11, [sp
+; CHECK: ldp     d12, d13, [sp
+; CHECK: ldp     d14, d15, [sp
+  ret void
+}
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
new file mode 100644
index 000000000000..4213110497d3
--- /dev/null
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -0,0 +1,38 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+
+  %val1 = load volatile i32* @var32
+  %tst1 = icmp eq i32 %val1, 0
+  br i1 %tst1, label %end, label %test2
+; CHECK: cbz {{w[0-9]+}}, .LBB
+
+test2:
+  %val2 = load volatile i32* @var32
+  %tst2 = icmp ne i32 %val2, 0
+  br i1 %tst2, label %end, label %test3
+; CHECK: cbnz {{w[0-9]+}}, .LBB
+
+test3:
+  %val3 = load volatile i64* @var64
+  %tst3 = icmp eq i64 %val3, 0
+  br i1 %tst3, label %end, label %test4
+; CHECK: cbz {{x[0-9]+}}, .LBB
+
+test4:
+  %val4 = load volatile i64* @var64
+  %tst4 = icmp ne i64 %val4, 0
+  br i1 %tst4, label %end, label %test5
+; CHECK: cbnz {{x[0-9]+}}, .LBB
+
+test5:
+  store volatile i64 %val4, i64* @var64
+  ret void
+
+end:
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
new file mode 100644
index 000000000000..3051cf53fdf8
--- /dev/null
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -0,0 +1,213 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64
+  store i64 %val2, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw
+; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]]
+; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %rhs64) {
+; CHECK: test_floatcsel:
+
+  %tst1 = fcmp one float %lhs32, %rhs32
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+  %val1 = select i1 %tst1, i32 42, i32 52
+  store i32 %val1, i32* @var32
+; CHECK: movz [[W52:w[0-9]+]], #52
+; CHECK: movz [[W42:w[0-9]+]], #42
+; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi
+; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt
+
+
+  %tst2 = fcmp ueq double %lhs64, %rhs64
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+  %val2 = select i1 %tst2, i64 9, i64 15
+  store i64 %val2, i64* @var64
+; CHECK: movz [[CONST15:x[0-9]+]], #15
+; CHECK: movz [[CONST9:x[0-9]+]], #9
+; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq
+; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs
+
+  ret void
+; CHECK: ret
+}
+
+
+define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinc:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = add i32 %rhs32, 1
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = add i32 %rhs32, 1
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = add i64 %rhs3, 1
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = add i64 %rhs4, 1
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csinv:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = xor i32 -1, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = xor i32 -1, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = xor i64 -1, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = xor i64 -1, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csneg:
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %inc1 = sub i32 0, %rhs32
+  %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32
+  store volatile i32 %val1, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], [[RHS]], ls
+
+  %rhs2 = add i32 %rhs32, 42
+  %tst2 = icmp sle i32 %lhs32, %rhs2
+  %inc2 = sub i32 0, %rhs32
+  %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2
+  store volatile i32 %val2, i32* @var32
+; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le
+
+; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls).
+  %rhs3 = sext i32 %rhs32 to i64
+  %tst3 = icmp ugt i64 %lhs64, %rhs3
+  %inc3 = sub i64 0, %rhs3
+  %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64
+  store volatile i64 %val3, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls
+
+  %rhs4 = zext i32 %rhs32 to i64
+  %tst4 = icmp sle i64 %lhs64, %rhs4
+  %inc4 = sub i64 0, %rhs4
+  %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4
+  store volatile i64 %val4, i64* @var64
+; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}}
+; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_cset:
+
+; N.b. code is not optimal here (32-bit csinc would be better) but
+; incoming DAG is too complex
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = zext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = zext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinc {{w[0-9]+}}, wzr, wzr, hi
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
+; CHECK: test_csetm:
+
+  %tst1 = icmp eq i32 %lhs, %rhs
+  %val1 = sext i1 %tst1 to i32
+  store i32 %val1, i32* @var32
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: csinv {{w[0-9]+}}, wzr, wzr, ne
+
+  %rhs64 = sext i32 %rhs to i64
+  %tst2 = icmp ule i64 %lhs64, %rhs64
+  %val2 = sext i1 %tst2 to i64
+  store i64 %val2, i64* @var64
+; CHECK: csinv {{x[0-9]+}}, xzr, xzr, hi
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll
new file mode 100644
index 000000000000..f5d57593bfad
--- /dev/null
+++ b/test/CodeGen/AArch64/directcond.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) {
+; CHECK: test_select_i32:
+  %val = select i1 %bit, i32 %a, i32 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel w0, w1, w2, ne
+
+  ret i32 %val
+}
+
+define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) {
+; CHECK: test_select_i64:
+  %val = select i1 %bit, i64 %a, i64 %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: csel x0, x1, x2, ne
+
+  ret i64 %val
+}
+
+define float @test_select_float(i1 %bit, float %a, float %b) {
+; CHECK: test_select_float:
+  %val = select i1 %bit, float %a, float %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+
+  ret float %val
+}
+
+define double @test_select_double(i1 %bit, double %a, double %b) {
+; CHECK: test_select_double:
+  %val = select i1 %bit, double %a, double %b
+; CHECK: movz [[ONE:w[0-9]+]], #1
+; CHECK: tst w0, [[ONE]]
+; CHECK-NEXT: fcsel d0, d0, d1, ne
+
+  ret double %val
+}
+
+define i32 @test_brcond(i1 %bit) {
+; CHECK: test_brcond:
+  br i1 %bit, label %true, label %false
+; CHECK: tbz {{w[0-9]+}}, #0, .LBB
+
+true:
+  ret i32 0
+false:
+  ret i32 42
+}
+
+define i1 @test_setcc_float(float %lhs, float %rhs) {
+; CHECK: test_setcc_float
+  %val = fcmp oeq float %lhs, %rhs
+; CHECK: fcmp s0, s1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_double(double %lhs, double %rhs) {
+; CHECK: test_setcc_double
+  %val = fcmp oeq double %lhs, %rhs
+; CHECK: fcmp d0, d1
+; CHECK: csinc w0, wzr, wzr, ne
+  ret i1 %val
+}
+
+define i1 @test_setcc_i32(i32 %lhs, i32 %rhs) {
+; CHECK: test_setcc_i32
+  %val = icmp ugt i32 %lhs, %rhs
+; CHECK: cmp w0, w1
+; CHECK: csinc w0, wzr, wzr, ls
+  ret i1 %val
+}
+
+define i1 @test_setcc_i64(i64 %lhs, i64 %rhs) {
+; CHECK: test_setcc_i64
+  %val = icmp ne i64 %lhs, %rhs
+; CHECK: cmp x0, x1
+; CHECK: csinc w0, wzr, wzr, eq
+  ret i1 %val
+}
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
new file mode 100644
index 000000000000..c40d3933b44b
--- /dev/null
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -0,0 +1,163 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_madd32:
+  %mid = mul i32 %val1, %val2
+  %res = add i32 %val0, %mid
+; CHECK: madd {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_madd64:
+  %mid = mul i64 %val1, %val2
+  %res = add i64 %val0, %mid
+; CHECK: madd {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) {
+; CHECK: test_msub32:
+  %mid = mul i32 %val1, %val2
+  %res = sub i32 %val0, %mid
+; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) {
+; CHECK: test_msub64:
+  %mid = mul i64 %val1, %val2
+  %res = sub i64 %val0, %mid
+; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smaddl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_smsubl:
+  %ext1 = sext i32 %val1 to i64
+  %ext2 = sext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umaddl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = add i64 %acc, %prod
+; CHECK: umaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) {
+; CHECK: test_umsubl:
+  %ext1 = zext i32 %val1 to i64
+  %ext2 = zext i32 %val2 to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 %acc, %prod
+; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_smulh:
+  %ext1 = sext i64 %lhs to i128
+  %ext2 = sext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: smulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i64 @test_umulh(i64 %lhs, i64 %rhs) {
+; CHECK: test_umulh:
+  %ext1 = zext i64 %lhs to i128
+  %ext2 = zext i64 %rhs to i128
+  %res = mul i128 %ext1, %ext2
+  %high = lshr i128 %res, 64
+  %val = trunc i128 %high to i64
+; CHECK: umulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %val
+}
+
+define i32 @test_mul32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mul32:
+  %res = mul i32 %lhs, %rhs
+; CHECK: mul {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mul64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mul64:
+  %res = mul i64 %lhs, %rhs
+; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i32 @test_mneg32(i32 %lhs, i32 %rhs) {
+; CHECK: test_mneg32:
+  %prod = mul i32 %lhs, %rhs
+  %res = sub i32 0, %prod
+; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i32 %res
+}
+
+define i64 @test_mneg64(i64 %lhs, i64 %rhs) {
+; CHECK: test_mneg64:
+  %prod = mul i64 %lhs, %rhs
+  %res = sub i64 0, %prod
+; CHECK: mneg {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smull(i32 %lhs, i32 %rhs) {
+; CHECK: test_smull:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: smull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umull(i32 %lhs, i32 %rhs) {
+; CHECK: test_umull:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %res = mul i64 %ext1, %ext2
+; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_smnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_smnegl:
+  %ext1 = sext i32 %lhs to i64
+  %ext2 = sext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
+
+define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
+; CHECK: test_umnegl:
+  %ext1 = zext i32 %lhs to i64
+  %ext2 = zext i32 %rhs to i64
+  %prod = mul i64 %ext1, %ext2
+  %res = sub i64 0, %prod
+; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret i64 %res
+}
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
new file mode 100644
index 000000000000..83aa8b4f6631
--- /dev/null
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -0,0 +1,152 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @rev_i32() {
+; CHECK: rev_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
+; CHECK: rev	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val1_tmp, i32* @var32
+    ret void
+}
+
+define void @rev_i64() {
+; CHECK: rev_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
+; CHECK: rev	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val1_tmp, i64* @var64
+    ret void
+}
+
+define void @rev32_i64() {
+; CHECK: rev32_i64:
+    %val0_tmp = load i64* @var64
+    %val1_tmp = shl i64 %val0_tmp, 32
+    %val5_tmp = sub i64 64, 32
+    %val2_tmp = lshr i64 %val0_tmp, %val5_tmp
+    %val3_tmp = or i64 %val1_tmp, %val2_tmp
+    %val4_tmp = call i64 @llvm.bswap.i64(i64 %val3_tmp)
+; CHECK: rev32	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @rev16_i32() {
+; CHECK: rev16_i32:
+    %val0_tmp = load i32* @var32
+    %val1_tmp = shl i32 %val0_tmp, 16
+    %val2_tmp = lshr i32 %val0_tmp, 16
+    %val3_tmp = or i32 %val1_tmp, %val2_tmp
+    %val4_tmp = call i32 @llvm.bswap.i32(i32 %val3_tmp)
+; CHECK: rev16	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i32() {
+; CHECK: clz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zerodef_i64() {
+; CHECK: clz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @clz_zeroundef_i32() {
+; CHECK: clz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
+; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @clz_zeroundef_i64() {
+; CHECK: clz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
+; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zerodef_i32() {
+; CHECK: cttz_zerodef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zerodef_i64() {
+; CHECK: cttz_zerodef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+define void @cttz_zeroundef_i32() {
+; CHECK: cttz_zeroundef_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
+; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @cttz_zeroundef_i64() {
+; CHECK: cttz_zeroundef_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
+; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
+; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+; These two are just compilation tests really: the operation's set to Expand in
+; ISelLowering.
+define void @ctpop_i32() {
+; CHECK: ctpop_i32:
+    %val0_tmp = load i32* @var32
+    %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
+    store volatile i32 %val4_tmp, i32* @var32
+    ret void
+}
+
+define void @ctpop_i64() {
+; CHECK: ctpop_i64:
+    %val0_tmp = load i64* @var64
+    %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
+    store volatile i64 %val4_tmp, i64* @var64
+    ret void
+}
+
+
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare i32  @llvm.ctlz.i32 (i32, i1)
+declare i64  @llvm.ctlz.i64 (i64, i1)
+declare i32  @llvm.cttz.i32 (i32, i1)
+declare i64  @llvm.cttz.i64 (i64, i1)
+declare i32  @llvm.ctpop.i32 (i32)
+declare i64  @llvm.ctpop.i64 (i64)
+
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
new file mode 100644
index 000000000000..4c740f6b8623
--- /dev/null
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -0,0 +1,169 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32_0 = global i32 0
+@var32_1 = global i32 0
+@var64_0 = global i64 0
+@var64_1 = global i64 0
+
+define void @rorv_i64() {
+; CHECK: rorv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val2_tmp = sub i64 64, %val1_tmp
+    %val3_tmp = shl i64 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+    %val5_tmp = or i64 %val3_tmp, %val4_tmp
+; CHECK: ror	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val5_tmp, i64* @var64_0
+    ret void
+}
+
+define void @asrv_i64() {
+; CHECK: asrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
+; CHECK: asr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @lsrv_i64() {
+; CHECK: lsrv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
+; CHECK: lsr	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @lslv_i64() {
+; CHECK: lslv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = shl i64 %val0_tmp, %val1_tmp
+; CHECK: lsl	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+define void @udiv_i64() {
+; CHECK: udiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = udiv i64 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_0
+    ret void
+}
+
+define void @sdiv_i64() {
+; CHECK: sdiv_i64:
+    %val0_tmp = load i64* @var64_0
+    %val1_tmp = load i64* @var64_1
+    %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+    store volatile i64 %val4_tmp, i64* @var64_1
+    ret void
+}
+
+
+define void @lsrv_i32() {
+; CHECK: lsrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val2_tmp
+; CHECK: lsr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+define void @lslv_i32() {
+; CHECK: lslv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = shl i32 %val0_tmp, %val2_tmp
+; CHECK: lsl	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @rorv_i32() {
+; CHECK: rorv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val6_tmp = load i32* @var32_1
+    %val1_tmp = add i32 1, %val6_tmp
+    %val2_tmp = sub i32 32, %val1_tmp
+    %val3_tmp = shl i32 %val0_tmp, %val2_tmp
+    %val4_tmp = lshr i32 %val0_tmp, %val1_tmp
+    %val5_tmp = or i32 %val3_tmp, %val4_tmp
+; CHECK: ror	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val5_tmp, i32* @var32_0
+    ret void
+}
+
+define void @asrv_i32() {
+; CHECK: asrv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val2_tmp = add i32 1, %val1_tmp
+    %val4_tmp = ashr i32 %val0_tmp, %val2_tmp
+; CHECK: asr	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @sdiv_i32() {
+; CHECK: sdiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
+; CHECK: sdiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_1
+    ret void
+}
+
+define void @udiv_i32() {
+; CHECK: udiv_i32:
+    %val0_tmp = load i32* @var32_0
+    %val1_tmp = load i32* @var32_1
+    %val4_tmp = udiv i32 %val0_tmp, %val1_tmp
+; CHECK: udiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+    store volatile i32 %val4_tmp, i32* @var32_0
+    ret void
+}
+
+; The point of this test is that we may not actually see (shl GPR32:$Val, (zext GPR32:$Val2))
+; in the DAG (the RHS may be natively 64-bit), but we should still use the lsl instructions.
+define i32 @test_lsl32() {
+; CHECK: test_lsl32:
+
+  %val = load i32* @var32_0
+  %ret = shl i32 1, %val
+; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_lsr32() {
+; CHECK: test_lsr32:
+
+  %val = load i32* @var32_0
+  %ret = lshr i32 1, %val
+; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
+
+define i32 @test_asr32(i32 %in) {
+; CHECK: test_asr32:
+
+  %val = load i32* @var32_0
+  %ret = ashr i32 %in, %val
+; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
new file mode 100644
index 000000000000..ee89d8d94ba4
--- /dev/null
+++ b/test/CodeGen/AArch64/elf-extern.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s
+
+; External symbols are a different concept to global variables but should still
+; get relocations and so on when used.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @check_extern() {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+  ret i32 0
+}
+
+; CHECK: .rela.text
+; CHECK: ('r_sym', 0x00000009)
+; CHECK-NEXT: ('r_type', 0x0000011b)
+
+; CHECK: .symtab
+; CHECK: Symbol 9
+; CHECK-NEXT: memcpy
+
+
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
new file mode 100644
index 000000000000..3d3d8676818a
--- /dev/null
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s
+
+declare extern_weak i32 @var()
+
+define i32()* @foo() {
+; The usual ADRP/ADD pair can't be used for a weak reference because it must
+; evaluate to 0 if the symbol is undefined. We use a litpool entry.
+  ret i32()* @var
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .xword var
+
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0]
+
+}
+
+
+@arr_var = extern_weak global [10 x i32]
+
+define i32* @bar() {
+  %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .xword arr_var
+
+; CHECK: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0]
+; CHECK: add x0, [[BASE]], #20
+  ret i32* %addr
+}
+
+@defined_weak_var = internal unnamed_addr global i32 0
+
+define i32* @wibble() {
+  ret i32* @defined_weak_var
+; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
+; CHECK: add x0, [[BASE]], #:lo12:defined_weak_var
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll
new file mode 100644
index 000000000000..06267816a4e1
--- /dev/null
+++ b/test/CodeGen/AArch64/extract.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i64 @ror_i64(i64 %in) {
+; CHECK: ror_i64:
+    %left = shl i64 %in, 19
+    %right = lshr i64 %in, 45
+    %val5 = or i64 %left, %right
+; CHECK: extr {{x[0-9]+}}, x0, x0, #45
+    ret i64 %val5
+}
+
+define i32 @ror_i32(i32 %in) {
+; CHECK: ror_i32:
+    %left = shl i32 %in, 9
+    %right = lshr i32 %in, 23
+    %val5 = or i32 %left, %right
+; CHECK: extr {{w[0-9]+}}, w0, w0, #23
+    ret i32 %val5
+}
+
+define i32 @extr_i32(i32 %lhs, i32 %rhs) {
+; CHECK: extr_i32:
+  %left = shl i32 %lhs, 6
+  %right = lshr i32 %rhs, 26
+  %val = or i32 %left, %right
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{w[0-9]+}}, w0, w1, #26
+
+  ret i32 %val
+}
+
+define i64 @extr_i64(i64 %lhs, i64 %rhs) {
+; CHECK: extr_i64:
+  %right = lshr i64 %rhs, 40
+  %left = shl i64 %lhs, 24
+  %val = or i64 %right, %left
+  ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use
+  ; something other than w0 and w1.
+; CHECK: extr {{x[0-9]+}}, x0, x1, #40
+
+  ret i64 %val
+}
+
+; Regression test: a bad experimental pattern crept into git which optimised
+; this pattern to a single EXTR.
+define i32 @extr_regress(i32 %a, i32 %b) {
+; CHECK: extr_regress:
+
+    %sh1 = shl i32 %a, 14
+    %sh2 = lshr i32 %b, 14
+    %val = or i32 %sh2, %sh1
+; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}}
+
+    ret i32 %val
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
new file mode 100644
index 000000000000..e40aa3033bde
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+
+; This test is designed to be run in the situation where the
+; call-frame is not reserved (hence disable-fp-elim), but where
+; callee-pop can occur (hence tailcallopt).
+
+declare fastcc void @will_pop([8 x i32], i32 %val)
+
+define fastcc void @foo(i32 %in) {
+; CHECK: foo:
+
+  %addr = alloca i8, i32 %in
+
+; Normal frame setup stuff:
+; CHECK: sub sp, sp,
+; CHECK: stp x29, x30
+
+; Reserve space for call-frame:
+; CHECK: sub sp, sp, #16
+
+  call fastcc void @will_pop([8 x i32] undef, i32 42)
+; CHECK: bl will_pop
+
+; Since @will_pop is fastcc with tailcallopt, it will put the stack
+; back where it needs to be, we shouldn't duplicate that
+; CHECK-NOT: sub sp, sp, #16
+; CHECK-NOT: add sp, sp,
+
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
+
+declare void @wont_pop([8 x i32], i32 %val)
+
+define void @foo1(i32 %in) {
+; CHECK: foo1:
+
+  %addr = alloca i8, i32 %in
+; Normal frame setup again
+; CHECK: sub sp, sp,
+; CHECK: stp x29, x30
+
+; Reserve space for call-frame
+; CHECK: sub sp, sp, #16
+
+  call void @wont_pop([8 x i32] undef, i32 42)
+; CHECK: bl wont_pop
+
+; This time we *do* need to unreserve the call-frame
+; CHECK: add sp, sp, #16
+
+; Check for epilogue (primarily to make sure sp spotted above wasn't
+; part of it).
+; CHECK: ldp x29, x30
+; CHECK: add sp, sp,
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
new file mode 100644
index 000000000000..41cde94edc1c
--- /dev/null
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -0,0 +1,123 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; Without tailcallopt fastcc still means the caller cleans up the
+; stack, so try to make sure this is respected.
+
+define fastcc void @func_stack0() {
+; CHECK: func_stack0:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack0:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #48
+; CHECK-TAIL-NEXT: ret
+
+}
+
+define fastcc void @func_stack8([8 x i32], i32 %stacked) {
+; CHECK: func_stack8:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack8:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #64
+; CHECK-TAIL-NEXT: ret
+}
+
+define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
+; CHECK: func_stack32:
+; CHECK: sub sp, sp, #48
+
+; CHECK-TAIL: func_stack32:
+; CHECK-TAIL: sub sp, sp, #48
+
+
+  call fastcc void @func_stack8([8 x i32] undef, i32 42)
+; CHECK:  bl func_stack8
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack8
+; CHECK-TAIL: sub sp, sp, #16
+
+
+  call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
+; CHECK: bl func_stack32
+; CHECK-NOT: sub sp, sp,
+
+; CHECK-TAIL: bl func_stack32
+; CHECK-TAIL: sub sp, sp, #32
+
+
+  call fastcc void @func_stack0()
+; CHECK: bl func_stack0
+; CHECK-NOT: sub sp, sp
+
+; CHECK-TAIL: bl func_stack0
+; CHECK-TAIL-NOT: sub sp, sp
+
+  ret void
+; CHECK: add sp, sp, #48
+; CHECK-NEXT: ret
+
+; CHECK-TAIL: add sp, sp, #80
+; CHECK-TAIL-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll
new file mode 100644
index 000000000000..ad4a903c9b25
--- /dev/null
+++ b/test/CodeGen/AArch64/fcmp.ll
@@ -0,0 +1,81 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+declare void @bar(i32)
+
+define void @test_float(float %a, float %b) {
+; CHECK: test_float:
+
+  %tst1 = fcmp oeq float %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une float %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one float %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge float %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
+
+define void @test_double(double %a, double %b) {
+; CHECK: test_double:
+
+  %tst1 = fcmp oeq double %a, %b
+  br i1 %tst1, label %end, label %t2
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: b.eq .L
+
+t2:
+  %tst2 = fcmp une double %b, 0.0
+  br i1 %tst2, label %t3, label %end
+; CHECK: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.eq .L
+
+
+t3:
+; This test can't be implemented with just one A64 conditional
+; branch. LLVM converts "ordered and not equal" to "unordered or
+; equal" before instruction selection, which is what we currently
+; test. Obviously, other sequences are valid.
+  %tst3 = fcmp one double %a,  %b
+  br i1 %tst3, label %t4, label %end
+; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.vs .[[T4]]
+t4:
+  %tst4 = fcmp uge double %a, -0.0
+  br i1 %tst4, label %t5, label %end
+; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0
+; CHECK: b.mi .LBB
+
+t5:
+  call void @bar(i32 0)
+  ret void
+end:
+  ret void
+
+}
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
new file mode 100644
index 000000000000..0f7b95b2a48f
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -0,0 +1,191 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fcvtzs(float %flt, double %dbl) {
+; CHECK: test_fcvtzs:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptosi float %fix1 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptosi float %fix2 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptosi float %fix3 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptosi float %fix4 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptosi double %fix5 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptosi double %fix6 to i32
+; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptosi double %fix7 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptosi double %fix8 to i64
+; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+define void @test_fcvtzu(float %flt, double %dbl) {
+; CHECK: test_fcvtzu:
+
+  %fix1 = fmul float %flt, 128.0
+  %cvt1 = fptoui float %fix1 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i32 %cvt1, i32* @var32
+
+  %fix2 = fmul float %flt, 4294967296.0
+  %cvt2 = fptoui float %fix2 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #32
+  store volatile i32 %cvt2, i32* @var32
+
+  %fix3 = fmul float %flt, 128.0
+  %cvt3 = fptoui float %fix3 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #7
+  store volatile i64 %cvt3, i64* @var64
+
+  %fix4 = fmul float %flt, 18446744073709551616.0
+  %cvt4 = fptoui float %fix4 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #64
+  store volatile i64 %cvt4, i64* @var64
+
+  %fix5 = fmul double %dbl, 128.0
+  %cvt5 = fptoui double %fix5 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i32 %cvt5, i32* @var32
+
+  %fix6 = fmul double %dbl, 4294967296.0
+  %cvt6 = fptoui double %fix6 to i32
+; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #32
+  store volatile i32 %cvt6, i32* @var32
+
+  %fix7 = fmul double %dbl, 128.0
+  %cvt7 = fptoui double %fix7 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #7
+  store volatile i64 %cvt7, i64* @var64
+
+  %fix8 = fmul double %dbl, 18446744073709551616.0
+  %cvt8 = fptoui double %fix8 to i64
+; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #64
+  store volatile i64 %cvt8, i64* @var64
+
+  ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_scvtf(i32 %int, i64 %long) {
+; CHECK: test_scvtf:
+
+  %cvt1 = sitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = sitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = sitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = sitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = sitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = sitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = sitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = sitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
+
+define void @test_ucvtf(i32 %int, i64 %long) {
+; CHECK: test_ucvtf:
+
+  %cvt1 = uitofp i32 %int to float
+  %fix1 = fdiv float %cvt1, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile float %fix1, float* @varfloat
+
+  %cvt2 = uitofp i32 %int to float
+  %fix2 = fdiv float %cvt2, 4294967296.0
+; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile float %fix2, float* @varfloat
+
+  %cvt3 = uitofp i64 %long to float
+  %fix3 = fdiv float %cvt3, 128.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile float %fix3, float* @varfloat
+
+  %cvt4 = uitofp i64 %long to float
+  %fix4 = fdiv float %cvt4, 18446744073709551616.0
+; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile float %fix4, float* @varfloat
+
+  %cvt5 = uitofp i32 %int to double
+  %fix5 = fdiv double %cvt5, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #7
+  store volatile double %fix5, double* @vardouble
+
+  %cvt6 = uitofp i32 %int to double
+  %fix6 = fdiv double %cvt6, 4294967296.0
+; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #32
+  store volatile double %fix6, double* @vardouble
+
+  %cvt7 = uitofp i64 %long to double
+  %fix7 = fdiv double %cvt7, 128.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #7
+  store volatile double %fix7, double* @vardouble
+
+  %cvt8 = uitofp i64 %long to double
+  %fix8 = fdiv double %cvt8, 18446744073709551616.0
+; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #64
+  store volatile double %fix8, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll
new file mode 100644
index 000000000000..c771d683a99c
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt-int.ll
@@ -0,0 +1,151 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i32 @test_floattoi32(float %in) {
+; CHECK: test_floattoi32:
+
+  %signed = fptosi float %in to i32
+  %unsigned = fptoui float %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i32 @test_doubletoi32(double %in) {
+; CHECK: test_doubletoi32:
+
+  %signed = fptosi double %in to i32
+  %unsigned = fptoui double %in to i32
+; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i32 %signed, %unsigned
+; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i32 %res
+; CHECK: ret
+}
+
+define i64 @test_floattoi64(float %in) {
+; CHECK: test_floattoi64:
+
+  %signed = fptosi float %in to i64
+  %unsigned = fptoui float %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define i64 @test_doubletoi64(double %in) {
+; CHECK: test_doubletoi64:
+
+  %signed = fptosi double %in to i64
+  %unsigned = fptoui double %in to i64
+; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}}
+; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}}
+
+  %res = sub i64 %signed, %unsigned
+; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]]
+
+  ret i64 %res
+; CHECK: ret
+}
+
+define float @test_i32tofloat(i32 %in) {
+; CHECK: test_i32tofloat:
+
+  %signed = sitofp i32 %in to float
+  %unsigned = uitofp i32 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i32todouble(i32 %in) {
+; CHECK: test_i32todouble:
+
+  %signed = sitofp i32 %in to double
+  %unsigned = uitofp i32 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define float @test_i64tofloat(i64 %in) {
+; CHECK: test_i64tofloat:
+
+  %signed = sitofp i64 %in to float
+  %unsigned = uitofp i64 %in to float
+; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub float %signed, %unsigned
+; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret float %res
+; CHECK: ret
+}
+
+define double @test_i64todouble(i64 %in) {
+; CHECK: test_i64todouble:
+
+  %signed = sitofp i64 %in to double
+  %unsigned = uitofp i64 %in to double
+; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}}
+; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}}
+
+  %res = fsub double %signed, %unsigned
+; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]]
+  ret double %res
+; CHECK: ret
+}
+
+define i32 @test_bitcastfloattoi32(float %in) {
+; CHECK: test_bitcastfloattoi32:
+
+   %res = bitcast float %in to i32
+; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+   ret i32 %res
+}
+
+define i64 @test_bitcastdoubletoi64(double %in) {
+; CHECK: test_bitcastdoubletoi64:
+
+   %res = bitcast double %in to i64
+; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
+   ret i64 %res
+}
+
+define float @test_bitcasti32tofloat(i32 %in) {
+; CHECK: test_bitcasti32tofloat:
+
+   %res = bitcast i32 %in to float
+; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
+   ret float %res
+
+}
+
+define double @test_bitcasti64todouble(i64 %in) {
+; CHECK: test_bitcasti64todouble:
+
+   %res = bitcast i64 %in to double
+; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
+   ret double %res
+
+}
diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll
new file mode 100644
index 000000000000..940c146f0a9f
--- /dev/null
+++ b/test/CodeGen/AArch64/flags-multiuse.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; LLVM should be able to cope with multiple uses of the same flag-setting
+; instruction at different points of a routine. Either by rematerializing the
+; compare or by saving and restoring the flag register.
+
+declare void @bar()
+
+@var = global i32 0
+
+define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) {
+; CHECK: test_multiflag:
+
+  %test = icmp ne i32 %n, %m
+; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]]
+
+  %val = zext i1 %test to i32
+; CHECK: csinc {{[xw][0-9]+}}, {{xzr|wzr}}, {{xzr|wzr}}, eq
+
+  store i32 %val, i32* @var
+
+  call void @bar()
+; CHECK: bl bar
+
+  ; Currently, the comparison is emitted again. An MSR/MRS pair would also be
+  ; acceptable, but assuming the call preserves NZCV is not.
+  br i1 %test, label %iftrue, label %iffalse
+; CHECK: cmp [[LHS]], [[RHS]]
+; CHECK: b.eq
+
+iftrue:
+  ret i32 42
+iffalse:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
new file mode 100644
index 000000000000..c94ba9b57b5a
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -0,0 +1,138 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varhalf = global half 0.0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+declare float @fabsf(float) readonly
+declare double @fabs(double) readonly
+
+declare float @llvm.sqrt.f32(float %Val)
+declare double @llvm.sqrt.f64(double %Val)
+
+declare float @ceilf(float) readonly
+declare double @ceil(double) readonly
+
+declare float @floorf(float) readonly
+declare double @floor(double) readonly
+
+declare float @truncf(float) readonly
+declare double @trunc(double) readonly
+
+declare float @rintf(float) readonly
+declare double @rint(double) readonly
+
+declare float @nearbyintf(float) readonly
+declare double @nearbyint(double) readonly
+
+define void @simple_float() {
+; CHECK: simple_float:
+  %val1 = load volatile float* @varfloat
+
+  %valabs = call float @fabsf(float %val1)
+  store volatile float %valabs, float* @varfloat
+; CHECK: fabs {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valneg = fsub float -0.0, %val1
+  store volatile float %valneg, float* @varfloat
+; CHECK: fneg {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valsqrt = call float @llvm.sqrt.f32(float %val1)
+  store volatile float %valsqrt, float* @varfloat
+; CHECK: fsqrt {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valceil = call float @ceilf(float %val1)
+  store volatile float %valceil, float* @varfloat
+; CHECK: frintp {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valfloor = call float @floorf(float %val1)
+  store volatile float %valfloor, float* @varfloat
+; CHECK: frintm {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valtrunc = call float @truncf(float %val1)
+  store volatile float %valtrunc, float* @varfloat
+; CHECK: frintz {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valrint = call float @rintf(float %val1)
+  store volatile float %valrint, float* @varfloat
+; CHECK: frintx {{s[0-9]+}}, {{s[0-9]+}}
+
+  %valnearbyint = call float @nearbyintf(float %val1)
+  store volatile float %valnearbyint, float* @varfloat
+; CHECK: frinti {{s[0-9]+}}, {{s[0-9]+}}
+
+  ret void
+}
+
+define void @simple_double() {
+; CHECK: simple_double:
+  %val1 = load volatile double* @vardouble
+
+  %valabs = call double @fabs(double %val1)
+  store volatile double %valabs, double* @vardouble
+; CHECK: fabs {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valneg = fsub double -0.0, %val1
+  store volatile double %valneg, double* @vardouble
+; CHECK: fneg {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valsqrt = call double @llvm.sqrt.f64(double %val1)
+  store volatile double %valsqrt, double* @vardouble
+; CHECK: fsqrt {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valceil = call double @ceil(double %val1)
+  store volatile double %valceil, double* @vardouble
+; CHECK: frintp {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valfloor = call double @floor(double %val1)
+  store volatile double %valfloor, double* @vardouble
+; CHECK: frintm {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valtrunc = call double @trunc(double %val1)
+  store volatile double %valtrunc, double* @vardouble
+; CHECK: frintz {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valrint = call double @rint(double %val1)
+  store volatile double %valrint, double* @vardouble
+; CHECK: frintx {{d[0-9]+}}, {{d[0-9]+}}
+
+  %valnearbyint = call double @nearbyint(double %val1)
+  store volatile double %valnearbyint, double* @vardouble
+; CHECK: frinti {{d[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
+
+define void @converts() {
+; CHECK: converts:
+
+  %val16 = load volatile half* @varhalf
+  %val32 = load volatile float* @varfloat
+  %val64 = load volatile double* @vardouble
+
+  %val16to32 = fpext half %val16 to float
+  store volatile float %val16to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
+
+  %val16to64 = fpext half %val16 to double
+  store volatile double %val16to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}}
+
+  %val32to16 = fptrunc float %val32 to half
+  store volatile half %val32to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{s[0-9]+}}
+
+  %val32to64 = fpext float %val32 to double
+  store volatile double %val32to64, double* @vardouble
+; CHECK: fcvt {{d[0-9]+}}, {{s[0-9]+}}
+
+  %val64to16 = fptrunc double %val64 to half
+  store volatile half %val64to16, half* @varhalf
+; CHECK: fcvt {{h[0-9]+}}, {{d[0-9]+}}
+
+  %val64to32 = fptrunc double %val64 to float
+  store volatile float %val64to32, float* @varfloat
+; CHECK: fcvt {{s[0-9]+}}, {{d[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
new file mode 100644
index 000000000000..b2256b342acf
--- /dev/null
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -0,0 +1,60 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @testfloat() {
+; CHECK: testfloat:
+  %val1 = load float* @varfloat
+
+  %val2 = fadd float %val1, %val1
+; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val3 = fmul float %val2, %val1
+; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val4 = fdiv float %val3, %val1
+; CHECK: fdiv {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  %val5 = fsub float %val4, %val2
+; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val5, float* @varfloat
+
+; These will be enabled with the implementation of floating-point litpool entries.
+  %val6 = fmul float %val1, %val2
+  %val7 = fsub float -0.0, %val6
+; CHECK: fnmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+
+  store volatile float %val7, float* @varfloat
+
+  ret void
+}
+
+define void @testdouble() {
+; CHECK: testdouble:
+  %val1 = load double* @vardouble
+
+  %val2 = fadd double %val1, %val1
+; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val3 = fmul double %val2, %val1
+; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val4 = fdiv double %val3, %val1
+; CHECK: fdiv {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  %val5 = fsub double %val4, %val2
+; CHECK: fsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+  store volatile double %val5, double* @vardouble
+
+; These will be enabled with the implementation of doubleing-point litpool entries.
+   %val6 = fmul double %val1, %val2
+   %val7 = fsub double -0.0, %val6
+; CHECK: fnmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+
+   store volatile double %val7, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll
new file mode 100644
index 000000000000..56e8f16f9b36
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-cond-sel.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) {
+; CHECK: test_csel:
+
+  %tst1 = icmp ugt i32 %lhs32, %rhs32
+  %val1 = select i1 %tst1, float 0.0, float 1.0
+  store float %val1, float* @varfloat
+; CHECK: ldr [[FLT0:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: fmov [[FLT1:s[0-9]+]], #1.0
+; CHECK: fcsel {{s[0-9]+}}, [[FLT0]], [[FLT1]], hi
+
+  %rhs64 = sext i32 %rhs32 to i64
+  %tst2 = icmp sle i64 %lhs64, %rhs64
+  %val2 = select i1 %tst2, double 1.0, double 0.0
+  store double %val2, double* @vardouble
+; CHECK: ldr [[FLT0:d[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: fmov [[FLT1:d[0-9]+]], #1.0
+; CHECK: fcsel {{d[0-9]+}}, [[FLT1]], [[FLT0]], le
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll
new file mode 100644
index 000000000000..39db9be15771
--- /dev/null
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -0,0 +1,102 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+define float @test_fmadd(float %a, float %b, float %c) {
+; CHECK: test_fmadd:
+  %val = call float @llvm.fma.f32(float %a, float %b, float %c)
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fmsub(float %a, float %b, float %c) {
+; CHECK: test_fmsub:
+  %nega = fsub float -0.0, %a
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmadd(float %a, float %b, float %c) {
+; CHECK: test_fnmadd:
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define float @test_fnmsub(float %a, float %b, float %c) {
+; CHECK: test_fnmsub:
+  %nega = fsub float -0.0, %a
+  %negc = fsub float -0.0, %c
+  %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
+
+define double @testd_fmadd(double %a, double %b, double %c) {
+; CHECK: testd_fmadd:
+  %val = call double @llvm.fma.f64(double %a, double %b, double %c)
+; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fmsub(double %a, double %b, double %c) {
+; CHECK: testd_fmsub:
+  %nega = fsub double -0.0, %a
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
+; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmadd(double %a, double %b, double %c) {
+; CHECK: testd_fnmadd:
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
+; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define double @testd_fnmsub(double %a, double %b, double %c) {
+; CHECK: testd_fnmsub:
+  %nega = fsub double -0.0, %a
+  %negc = fsub double -0.0, %c
+  %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
+; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+  ret double %val
+}
+
+define float @test_fmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmadd_unfused:
+  %prod = fmul float %b, %c
+  %sum = fadd float %a, %prod
+; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fmsub_unfused:
+  %prod = fmul float %b, %c
+  %diff = fsub float %a, %prod
+; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
+
+define float @test_fnmadd_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmadd_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %sum = fadd float %nega, %prod
+; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %sum
+}
+
+define float @test_fnmsub_unfused(float %a, float %b, float %c) {
+; CHECK: test_fnmsub_unfused:
+  %nega = fsub float -0.0, %a
+  %prod = fmul float %b, %c
+  %diff = fsub float %nega, %prod
+; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %diff
+}
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
new file mode 100644
index 000000000000..b5bdcf4f37b4
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+declare void @bar(i8*, i8*, i32*)
+
+; SelectionDAG used to try to fold some fp128 operations using the ppc128 type,
+; which is not supported.
+
+define fp128 @test_folding() {
+; CHECK: test_folding:
+  %l = alloca i32
+  store i32 42, i32* %l
+  %val = load i32* %l
+  %fpval = sitofp i32 %val to fp128
+  ; If the value is loaded from a constant pool into an fp128, it's been folded
+  ; successfully.
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI
+  ret fp128 %fpval
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll
new file mode 100644
index 000000000000..258d34b8f81f
--- /dev/null
+++ b/test/CodeGen/AArch64/fp128.ll
@@ -0,0 +1,280 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@lhs = global fp128 zeroinitializer
+@rhs = global fp128 zeroinitializer
+
+define fp128 @test_add() {
+; CHECK: test_add:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fadd fp128 %lhs, %rhs
+; CHECK: bl __addtf3
+  ret fp128 %val
+}
+
+define fp128 @test_sub() {
+; CHECK: test_sub:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fsub fp128 %lhs, %rhs
+; CHECK: bl __subtf3
+  ret fp128 %val
+}
+
+define fp128 @test_mul() {
+; CHECK: test_mul:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fmul fp128 %lhs, %rhs
+; CHECK: bl __multf3
+  ret fp128 %val
+}
+
+define fp128 @test_div() {
+; CHECK: test_div:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  %val = fdiv fp128 %lhs, %rhs
+; CHECK: bl __divtf3
+  ret fp128 %val
+}
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_fptosi() {
+; CHECK: test_fptosi:
+  %val = load fp128* @lhs
+
+  %val32 = fptosi fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixtfsi
+
+  %val64 = fptosi fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixtfdi
+
+  ret void
+}
+
+define void @test_fptoui() {
+; CHECK: test_fptoui:
+  %val = load fp128* @lhs
+
+  %val32 = fptoui fp128 %val to i32
+  store i32 %val32, i32* @var32
+; CHECK: bl __fixunstfsi
+
+  %val64 = fptoui fp128 %val to i64
+  store i64 %val64, i64* @var64
+; CHECK: bl __fixunstfdi
+
+  ret void
+}
+
+define void @test_sitofp() {
+; CHECK: test_sitofp:
+
+  %src32 = load i32* @var32
+  %val32 = sitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatsitf
+
+  %src64 = load i64* @var64
+  %val64 = sitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatditf
+
+  ret void
+}
+
+define void @test_uitofp() {
+; CHECK: test_uitofp:
+
+  %src32 = load i32* @var32
+  %val32 = uitofp i32 %src32 to fp128
+  store volatile fp128 %val32, fp128* @lhs
+; CHECK: bl __floatunsitf
+
+  %src64 = load i64* @var64
+  %val64 = uitofp i64 %src64 to fp128
+  store volatile fp128 %val64, fp128* @lhs
+; CHECK: bl __floatunditf
+
+  ret void
+}
+
+define i1 @test_setcc1() {
+; CHECK: test_setcc1:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ole fp128 %lhs, %rhs
+; CHECK: bl __letf2
+; CHECK: cmp w0, #0
+; CHECK: csinc w0, wzr, wzr, gt
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i1 @test_setcc2() {
+; CHECK: test_setcc2:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+; Technically, everything after the call to __letf2 is redundant, but we'll let
+; LLVM have its fun for now.
+  %val = fcmp ugt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __gttf2
+; CHECK: cmp w0, #0
+; CHECK: csinc   [[GT:w[0-9]+]], wzr, wzr, le
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     w0, [[UNORDERED]], [[GT]]
+
+  ret i1 %val
+; CHECK: ret
+}
+
+define i32 @test_br_cc() {
+; CHECK: test_br_cc:
+
+  %lhs = load fp128* @lhs
+  %rhs = load fp128* @rhs
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs]
+; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs]
+
+  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+  %cond = fcmp olt fp128 %lhs, %rhs
+; CHECK: bl      __unordtf2
+; CHECK: mov     x[[UNORDERED:[0-9]+]], x0
+
+; CHECK: bl      __getf2
+; CHECK: cmp w0, #0
+
+; CHECK: csinc   [[OGE:w[0-9]+]], wzr, wzr, lt
+; CHECK: cmp w[[UNORDERED]], #0
+; CHECK: csinc   [[UNORDERED:w[0-9]+]], wzr, wzr, eq
+; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
+; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+  br i1 %cond, label %iftrue, label %iffalse
+
+iftrue:
+  ret i32 42
+; CHECK-NEXT: BB#
+; CHECK-NEXT: movz x0, #42
+; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
+
+iffalse:
+  ret i32 29
+; CHECK: [[RET29]]:
+; CHECK-NEXT: movz x0, #29
+; CHECK-NEXT: [[REALRET]]:
+; CHECK: ret
+}
+
+define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
+; CHECK: test_select:
+
+  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
+  store fp128 %val, fp128* @lhs
+; CHECK: cmp w0, #0
+; CHECK: str q1, [sp]
+; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#
+; CHECK-NEXT: str q0, [sp]
+; CHECK-NEXT: [[IFFALSE]]:
+; CHECK-NEXT: ldr q0, [sp]
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+  ret void
+; CHECK: ret
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @test_round() {
+; CHECK: test_round:
+
+  %val = load fp128* @lhs
+
+  %float = fptrunc fp128 %val to float
+  store float %float, float* @varfloat
+; CHECK: bl __trunctfsf2
+; CHECK: str s0, [{{x[0-9]+}}, #:lo12:varfloat]
+
+  %double = fptrunc fp128 %val to double
+  store double %double, double* @vardouble
+; CHECK: bl __trunctfdf2
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  ret void
+}
+
+define void @test_extend() {
+; CHECK: test_extend:
+
+  %val = load fp128* @lhs
+
+  %float = load float* @varfloat
+  %fromfloat = fpext float %float to fp128
+  store volatile fp128 %fromfloat, fp128* @lhs
+; CHECK: bl __extendsftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  %double = load double* @vardouble
+  %fromdouble = fpext double %double to fp128
+  store volatile fp128 %fromdouble, fp128* @lhs
+; CHECK: bl __extenddftf2
+; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs]
+
+  ret void
+; CHECK: ret
+}
+
+define fp128 @test_neg(fp128 %in) {
+; CHECK: [[MINUS0:.LCPI[0-9]+_0]]:
+; Make sure the weird hex constant below *is* -0.0
+; CHECK-NEXT: fp128 -0
+
+; CHECK: test_neg:
+
+  ; Could in principle be optimized to fneg which we can't select, this makes
+  ; sure that doesn't happen.
+  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
+; CHECK: str q0, [sp, #-16]
+; CHECK-NEXT: ldr q1, [sp], #16
+; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:[[MINUS0]]]
+; CHECK: bl __subtf3
+
+  ret fp128 %ret
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
new file mode 100644
index 000000000000..fd28aeef9291
--- /dev/null
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -0,0 +1,34 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@varf32 = global float 0.0
+@varf64 = global double 0.0
+
+define void @check_float() {
+; CHECK: check_float:
+
+  %val = load float* @varf32
+  %newval1 = fadd float %val, 8.5
+  store volatile float %newval1, float* @varf32
+; CHECK: fmov {{s[0-9]+}}, #8.5
+
+  %newval2 = fadd float %val, 128.0
+  store volatile float %newval2, float* @varf32
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI0_0
+
+  ret void
+}
+
+define void @check_double() {
+; CHECK: check_double:
+
+  %val = load double* @varf64
+  %newval1 = fadd double %val, 8.5
+  store volatile double %newval1, double* @varf64
+; CHECK: fmov {{d[0-9]+}}, #8.5
+
+  %newval2 = fadd double %val, 128.0
+  store volatile double %newval2, double* @varf64
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
new file mode 100644
index 000000000000..78fde6a3c33a
--- /dev/null
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -0,0 +1,193 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+
+define void @take_i8s(i8 %val1, i8 %val2) {
+; CHECK: take_i8s:
+    store i8 %val2, i8* @var8
+    ; Not using w1 may be technically allowed, but it would indicate a
+    ; problem in itself.
+;  CHECK: strb w1, [{{x[0-9]+}}, #:lo12:var8]
+    ret void
+}
+
+define void @add_floats(float %val1, float %val2) {
+; CHECK: add_floats:
+    %newval = fadd float %val1, %val2
+; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1
+    store float %newval, float* @varfloat
+; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat]
+    ret void
+}
+
+; byval pointers should be allocated to the stack and copied as if
+; with memcpy.
+define void @take_struct(%myStruct* byval %structval) {
+; CHECK: take_struct:
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
+    store i64 %val1, i64* @var64
+; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+; %structval should be at sp + 16
+define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) {
+; CHECK: check_byval_align:
+
+    %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
+    %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+
+    %val0 = load i32* %addr0
+    ; Some weird move means x0 is used for one access
+; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16
+; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12]
+    store i32 %val0, i32* @var32
+; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32]
+
+    %val1 = load i64* %addr1
+; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
+    store i64 %val1, i64* @var64
+; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    ret void
+}
+
+define i32 @return_int() {
+; CHECK: return_int:
+    %val = load i32* @var32
+    ret i32 %val
+; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32]
+    ; Make sure epilogue follows
+; CHECK-NEXT: ret
+}
+
+define double @return_double() {
+; CHECK: return_double:
+    ret double 3.14
+; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI
+}
+
+; This is the kind of IR clang will produce for returning a struct
+; small enough to go into registers. Not all that pretty, but it
+; works.
+define [2 x i64] @return_struct() {
+; CHECK: return_struct:
+    %addr = bitcast %myStruct* @varstruct to [2 x i64]*
+    %val = load [2 x i64]* %addr
+    ret [2 x i64] %val
+; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:varstruct]
+    ; Odd register regex below disallows x0 which we want to be live now.
+; CHECK: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, #:lo12:varstruct
+; CHECK-NEXT: ldr x1, [{{x[1-9][0-9]*}}, #8]
+    ; Make sure epilogue immediately follows
+; CHECK-NEXT: ret
+}
+
+; Large structs are passed by reference (storage allocated by caller
+; to preserve value semantics) in x8. Strictly this only applies to
+; structs larger than 16 bytes, but C semantics can still be provided
+; if LLVM does it to %myStruct too. So this is the simplest check
+define void @return_large_struct(%myStruct* sret %retval) {
+; CHECK: return_large_struct:
+    %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0
+    %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1
+    %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2
+
+    store i64 42, i64* %addr0
+    store i8 2, i8* %addr1
+    store i32 9, i32* %addr2
+; CHECK: str {{x[0-9]+}}, [x8]
+; CHECK: strb {{w[0-9]+}}, [x8, #8]
+; CHECK: str {{w[0-9]+}}, [x8, #12]
+
+    ret void
+}
+
+; This struct is just too far along to go into registers: (only x7 is
+; available, but it needs two). Also make sure that %stacked doesn't
+; sneak into x7 behind.
+define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                          i32* %var6, %myStruct* byval %struct, i32* byval %stacked,
+                          double %notstacked) {
+; CHECK: struct_on_stack:
+    %addr = getelementptr %myStruct* %struct, i64 0, i32 0
+    %val64 = load i64* %addr
+    store i64 %val64, i64* @var64
+    ; Currently nothing on local stack, so struct should be at sp
+; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
+; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64]
+
+    store double %notstacked, double* @vardouble
+; CHECK-NOT: ldr d0
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble
+
+    %retval = load i32* %stacked
+    ret i32 %retval
+; CHECK: ldr w0, [sp, #16]
+}
+
+define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                         float %var4, float %var5, float %var6, float %var7,
+                         float %var8) {
+; CHECK: stacked_fpu:
+    store float %var8, float* @varfloat
+    ; Beware as above: the offset would be different on big-endian
+    ; machines if the first ldr were changed to use s-registers.
+; CHECK: ldr d[[VALFLOAT:[0-9]+]], [sp]
+; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, #:lo12:varfloat]
+
+    ret void
+}
+
+; 128-bit integer types should be passed in xEVEN, xODD rather than
+; the reverse. In this case x2 and x3. Nothing should use x1.
+define i32 @check_i128_regalign(i32 %val0, i128 %val1, i32 %val2) {
+; CHECK: check_i128_regalign
+    store i128 %val1, i128* @var128
+; CHECK: str x2, [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: str x3, [{{x[0-9]+}}, #8]
+
+    ret i32 %val2
+; CHECK: mov x0, x4
+}
+
+define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                   i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                   i32 %stack1, i128 %stack2) {
+; CHECK: check_i128_stackalign
+    store i128 %stack2, i128* @var128
+    ; Nothing local on stack in current codegen, so first stack is 16 away
+; CHECK: ldr {{x[0-9]+}}, [sp, #16]
+    ; Important point is that we address sp+24 for second dword
+; CHECK: add     [[REG:x[0-9]+]], sp, #16
+; CHECK: ldr     {{x[0-9]+}}, {{\[}}[[REG]], #8]
+    ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define i32 @test_extern() {
+; CHECK: test_extern:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0)
+; CHECK: bl memcpy
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
new file mode 100644
index 000000000000..13b689c40886
--- /dev/null
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -0,0 +1,140 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+%myStruct = type { i64 , i8, i32 }
+
+@var8 = global i8 0
+@var8_2 = global i8 0
+@var32 = global i32 0
+@var64 = global i64 0
+@var128 = global i128 0
+@varfloat = global float 0.0
+@varfloat_2 = global float 0.0
+@vardouble = global double 0.0
+@varstruct = global %myStruct zeroinitializer
+@varsmallstruct = global [2 x i64] zeroinitializer
+
+declare void @take_i8s(i8 %val1, i8 %val2)
+declare void @take_floats(float %val1, float %val2)
+
+define void @simple_args() {
+; CHECK: simple_args:
+  %char1 = load i8* @var8
+  %char2 = load i8* @var8_2
+  call void @take_i8s(i8 %char1, i8 %char2)
+; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8]
+; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2]
+; CHECK: bl take_i8s
+
+  %float1 = load float* @varfloat
+  %float2 = load float* @varfloat_2
+  call void @take_floats(float %float1, float %float2)
+; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2]
+; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat]
+; CHECK: bl take_floats
+
+  ret void
+}
+
+declare i32 @return_int()
+declare double @return_double()
+declare [2 x i64] @return_smallstruct()
+declare void @return_large_struct(%myStruct* sret %retval)
+
+define void @simple_rets() {
+; CHECK: simple_rets:
+
+  %int = call i32 @return_int()
+  store i32 %int, i32* @var32
+; CHECK: bl return_int
+; CHECK: str w0, [{{x[0-9]+}}, #:lo12:var32]
+
+  %dbl = call double @return_double()
+  store double %dbl, double* @vardouble
+; CHECK: bl return_double
+; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble]
+
+  %arr = call [2 x i64] @return_smallstruct()
+  store [2 x i64] %arr, [2 x i64]* @varsmallstruct
+; CHECK: bl return_smallstruct
+; CHECK: str x1, [{{x[0-9]+}}, #8]
+; CHECK: str x0, [{{x[0-9]+}}, #:lo12:varsmallstruct]
+
+  call void @return_large_struct(%myStruct* sret @varstruct)
+; CHECK: add x8, {{x[0-9]+}}, #:lo12:varstruct
+; CHECK: bl return_large_struct
+
+  ret void
+}
+
+
+declare i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45,
+                             i32* %var6, %myStruct* byval %struct, i32 %stacked,
+                             double %notstacked)
+declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
+                          float %var4, float %var5, float %var6, float %var7,
+                          float %var8)
+
+define void @check_stack_args() {
+  call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1,
+                            i32* @var32, %myStruct* byval @varstruct,
+                            i32 999, double 1.0)
+  ; Want to check that the final double is passed in registers and
+  ; that varstruct is passed on the stack. Rather dependent on how a
+  ; memcpy gets created, but the following works for now.
+; CHECK: mov x0, sp
+; CHECK: str {{w[0-9]+}}, [x0]
+; CHECK: str {{w[0-9]+}}, [x0, #12]
+; CHECK: fmov d0,
+; CHECK: bl struct_on_stack
+
+  call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0,
+                         float -2.0, float -8.0, float 16.0, float 1.0,
+                         float 64.0)
+; CHECK: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI
+; CHECK: mov x0, sp
+; CHECK: str d[[STACKEDREG]], [x0]
+; CHECK: bl stacked_fpu
+  ret void
+}
+
+
+declare void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                                    i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                                    i32 %stack1, i128 %stack2)
+
+declare void @check_i128_regalign(i32 %val0, i128 %val1)
+
+
+define void @check_i128_align() {
+; CHECK: check_i128_align:
+  %val = load i128* @var128
+  call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
+                                   i32 4, i32 5, i32 6, i32 7,
+                                   i32 42, i128 %val)
+; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var128]
+; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK: mov x[[SPREG:[0-9]+]], sp
+; CHECK: str [[I128HI]], [x[[SPREG]], #24]
+; CHECK: str [[I128LO]], [x[[SPREG]], #16]
+; CHECK: bl check_i128_stackalign
+
+  call void @check_i128_regalign(i32 0, i128 42)
+; CHECK-NOT: mov x1
+; CHECK: movz x2, #42
+; CHECK: mov x3, xzr
+; CHECK: bl check_i128_regalign
+
+  ret void
+}
+
+@fptr = global void()* null
+
+define void @check_indirect_call() {
+; CHECK: check_indirect_call:
+  %func = load void()** @fptr
+  call void %func()
+; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr]
+; CHECK: blr [[FPTR]]
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
new file mode 100644
index 000000000000..8ed6e551cdeb
--- /dev/null
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var32 = global [3 x i32] zeroinitializer
+@var64 = global [3 x i64] zeroinitializer
+@var32_align64 = global [3 x i32] zeroinitializer, align 8
+
+define i64 @test_align32() {
+; CHECK: test_align32:
+  %addr = bitcast [3 x i32]* @var32 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp [[HIBITS:x[0-9]+]], var32
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:var32
+; CHECK: ldr x0, [x[[ADDR]]]
+
+  ret i64 %val
+}
+
+define i64 @test_align64() {
+; CHECK: test_align64:
+  %addr = bitcast [3 x i64]* @var64 to i64*
+
+  ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
+  ; inefficient.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var64]
+
+  ret i64 %val
+}
+
+define i64 @test_var32_align64() {
+; CHECK: test_var32_align64:
+  %addr = bitcast [3 x i32]* @var32_align64 to i64*
+
+  ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
+  ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
+  %val = load i64* %addr
+; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
+; CHECK-NOT: add x[[HIBITS]]
+; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var32_align64]
+
+  ret i64 %val
+}
+
+@yet_another_var = external global {i32, i32}
+
+define i64 @test_yet_another_var() {
+; CHECK: test_yet_another_var:
+
+  ; @yet_another_var has a preferred alignment of 8, but that's not enough if
+  ; we're going to be linking against other things. Its ABI alignment is only 4
+  ; so we can't fold the load.
+  %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
+; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
+; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:yet_another_var
+; CHECK: ldr x0, [x[[ADDR]]]
+  ret i64 %val
+}
+
+define i64()* @test_functions() {
+; CHECK: test_functions:
+  ret i64()* @test_yet_another_var
+; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var
+; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var
+}
diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll
new file mode 100644
index 000000000000..c474e5845a64
--- /dev/null
+++ b/test/CodeGen/AArch64/got-abuse.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s
+
+; LLVM gives well-defined semantics to this horrible construct (though C says
+; it's undefined). Regardless, we shouldn't crash. The important feature here is
+; that in general the only way to access a GOT symbol is via a 64-bit
+; load. Neither of these alternatives has the ELF relocations required to
+; support it:
+;    + ldr wD, [xN, #:got_lo12:func]
+;    + add xD, xN, #:got_lo12:func
+
+declare void @consume(i32)
+declare void @func()
+
+define void @foo() nounwind {
+; CHECK: foo:
+entry:
+  call void @consume(i32 ptrtoint (void ()* @func to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func
+; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], #:got_lo12:func]
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
new file mode 100644
index 000000000000..f019ea0a6706
--- /dev/null
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+%struct = type { i32, i128, i8 }
+
+@var = global %struct zeroinitializer
+
+define i64 @check_size() {
+; CHECK: check_size:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 1
+  %endi = ptrtoint %struct* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #48
+}
+
+define i64 @check_field() {
+; CHECK: check_field:
+  %starti = ptrtoint %struct* @var to i64
+
+  %endp = getelementptr %struct* @var, i64 0, i32 1
+  %endi = ptrtoint i128* %endp to i64
+
+  %diff = sub i64 %endi, %starti
+  ret i64 %diff
+; CHECK: movz x0, #16
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll
new file mode 100644
index 000000000000..446151b8ffac
--- /dev/null
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -0,0 +1,221 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+@varfp128 = global fp128 zeroinitializer
+
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+define void @test_cos(float %float, double %double, fp128 %fp128) {
+; CHECK: test_cos:
+
+   %cosfloat = call float @llvm.cos.f32(float %float)
+   store float %cosfloat, float* @varfloat
+; CHECK: bl cosf
+
+   %cosdouble = call double @llvm.cos.f64(double %double)
+   store double %cosdouble, double* @vardouble
+; CHECK: bl cos
+
+   %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
+   store fp128 %cosfp128, fp128* @varfp128
+; CHECK: bl cosl
+
+  ret void
+}
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+
+define void @test_exp(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp:
+
+   %expfloat = call float @llvm.exp.f32(float %float)
+   store float %expfloat, float* @varfloat
+; CHECK: bl expf
+
+   %expdouble = call double @llvm.exp.f64(double %double)
+   store double %expdouble, double* @vardouble
+; CHECK: bl exp
+
+   %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
+   store fp128 %expfp128, fp128* @varfp128
+; CHECK: bl expl
+
+  ret void
+}
+
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+define void @test_exp2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_exp2:
+
+   %exp2float = call float @llvm.exp2.f32(float %float)
+   store float %exp2float, float* @varfloat
+; CHECK: bl exp2f
+
+   %exp2double = call double @llvm.exp2.f64(double %double)
+   store double %exp2double, double* @vardouble
+; CHECK: bl exp2
+
+   %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
+   store fp128 %exp2fp128, fp128* @varfp128
+; CHECK: bl exp2l
+  ret void
+
+}
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+
+define void @test_log(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log:
+
+   %logfloat = call float @llvm.log.f32(float %float)
+   store float %logfloat, float* @varfloat
+; CHECK: bl logf
+
+   %logdouble = call double @llvm.log.f64(double %double)
+   store double %logdouble, double* @vardouble
+; CHECK: bl log
+
+   %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
+   store fp128 %logfp128, fp128* @varfp128
+; CHECK: bl logl
+
+  ret void
+}
+
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+
+define void @test_log2(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log2:
+
+   %log2float = call float @llvm.log2.f32(float %float)
+   store float %log2float, float* @varfloat
+; CHECK: bl log2f
+
+   %log2double = call double @llvm.log2.f64(double %double)
+   store double %log2double, double* @vardouble
+; CHECK: bl log2
+
+   %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
+   store fp128 %log2fp128, fp128* @varfp128
+; CHECK: bl log2l
+  ret void
+
+}
+
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+define void @test_log10(float %float, double %double, fp128 %fp128) {
+; CHECK: test_log10:
+
+   %log10float = call float @llvm.log10.f32(float %float)
+   store float %log10float, float* @varfloat
+; CHECK: bl log10f
+
+   %log10double = call double @llvm.log10.f64(double %double)
+   store double %log10double, double* @vardouble
+; CHECK: bl log10
+
+   %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
+   store fp128 %log10fp128, fp128* @varfp128
+; CHECK: bl log10l
+
+  ret void
+}
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+
+define void @test_sin(float %float, double %double, fp128 %fp128) {
+; CHECK: test_sin:
+
+   %sinfloat = call float @llvm.sin.f32(float %float)
+   store float %sinfloat, float* @varfloat
+; CHECK: bl sinf
+
+   %sindouble = call double @llvm.sin.f64(double %double)
+   store double %sindouble, double* @vardouble
+; CHECK: bl sin
+
+   %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
+   store fp128 %sinfp128, fp128* @varfp128
+; CHECK: bl sinl
+  ret void
+
+}
+
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+define void @test_pow(float %float, double %double, fp128 %fp128) {
+; CHECK: test_pow:
+
+   %powfloat = call float @llvm.pow.f32(float %float, float %float)
+   store float %powfloat, float* @varfloat
+; CHECK: bl powf
+
+   %powdouble = call double @llvm.pow.f64(double %double, double %double)
+   store double %powdouble, double* @vardouble
+; CHECK: bl pow
+
+   %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
+   store fp128 %powfp128, fp128* @varfp128
+; CHECK: bl powl
+
+  ret void
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+
+define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128) {
+; CHECK: test_powi:
+
+   %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent)
+   store float %powifloat, float* @varfloat
+; CHECK: bl __powisf2
+
+   %powidouble = call double @llvm.powi.f64(double %double, i32 %exponent)
+   store double %powidouble, double* @vardouble
+; CHECK: bl __powidf2
+
+   %powifp128 = call fp128 @llvm.powi.f128(fp128 %fp128, i32 %exponent)
+   store fp128 %powifp128, fp128* @varfp128
+; CHECK: bl __powitf2
+  ret void
+
+}
+
+define void @test_frem(float %float, double %double, fp128 %fp128) {
+; CHECK: test_frem:
+
+  %fremfloat = frem float %float, %float
+  store float %fremfloat, float* @varfloat
+; CHECK: bl fmodf
+
+  %fremdouble = frem double %double, %double
+  store double %fremdouble, double* @vardouble
+; CHECK: bl fmod
+
+  %fremfp128 = frem fp128 %fp128, %fp128
+  store fp128 %fremfp128, fp128* @varfp128
+; CHECK: bl fmodl
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
new file mode 100644
index 000000000000..d80be8f3a639
--- /dev/null
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
+
+define internal void @_GLOBAL__I_a() section ".text.startup" {
+  ret void
+}
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+; CHECK: .section .init_array
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
new file mode 100644
index 000000000000..c39c57f05822
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; Out of range immediate for I.
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 4096)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
new file mode 100644
index 000000000000..47c5f98bf009
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
new file mode 100644
index 000000000000..7a5b99e23b3d
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and w0, w0, $0", "K"(i64 4294967296)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
new file mode 100644
index 000000000000..4f0039865a35
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s
+
+define void @foo() {
+  ; 32-bit bitpattern ending in 1101 can't be produced.
+  call void asm sideeffect "and x0, x0, $0", "L"(i32 13)
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll
new file mode 100644
index 000000000000..c232f3208cfa
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-constraints.ll
@@ -0,0 +1,117 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+define i64 @test_inline_constraint_r(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+  ret i64 %val
+}
+
+define i16 @test_small_reg(i16 %lhs, i16 %rhs) {
+; CHECK: test_small_reg:
+  %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs)
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+  ret i16 %val
+}
+
+define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) {
+; CHECK: test_inline_constraint_r_imm:
+  %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12)
+; CHECK: movz [[FOUR:x[0-9]+]], #4
+; CHECK: movz [[TWELVE:w[0-9]+]], #12
+; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw
+  ret i64 %val
+}
+
+; m is permitted to have a base/offset form. We don't do that
+; currently though.
+define i32 @test_inline_constraint_m(i32 *%ptr) {
+; CHECK: test_inline_constraint_m:
+  %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr)
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@arr = global [8 x i32] zeroinitializer
+
+; Q should *never* have base/offset form even if given the chance.
+define i32 @test_inline_constraint_Q(i32 *%ptr) {
+; CHECK: test_inline_constraint_Q:
+  %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1))
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
+  ret i32 %val
+}
+
+@dump = global fp128 zeroinitializer
+
+define void @test_inline_constraint_I() {
+; CHECK: test_inline_constraint_I:
+  call void asm sideeffect "add x0, x0, $0", "I"(i32 0)
+  call void asm sideeffect "add x0, x0, $0", "I"(i64 4095)
+; CHECK: add x0, x0, #0
+; CHECK: add x0, x0, #4095
+
+  ret void
+}
+
+; Skip J because it's useless
+
+define void @test_inline_constraint_K() {
+; CHECK: test_inline_constraint_K:
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa
+  call void asm sideeffect "and w0, w0, $0", "K"(i32 65535)
+; CHECK: and w0, w0, #-1431655766
+; CHECK: and w0, w0, #65535
+
+  ret void
+}
+
+define void @test_inline_constraint_L() {
+; CHECK: test_inline_constraint_L:
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa
+  call void asm sideeffect "and x0, x0, $0", "L"(i64 65535)
+; CHECK: and x0, x0, #4294967296
+; CHECK: and x0, x0, #65535
+
+  ret void
+}
+
+; Skip M and N because we don't support MOV pseudo-instructions yet.
+
+@var = global i32 0
+
+define void @test_inline_constraint_S() {
+; CHECK: test_inline_constraint_S:
+  call void asm sideeffect "adrp x0, $0", "S"(i32* @var)
+  call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var)
+; CHECK: adrp x0, var
+; CHECK: adrp x0, var
+; CHECK: add x0, x0, #:lo12:var
+  ret void
+}
+
+define i32 @test_inline_constraint_S_label(i1 %in) {
+; CHECK: test_inline_constraint_S_label:
+  call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc))
+; CHECK: adr x0, .Ltmp{{[0-9]+}}
+  br i1 %in, label %loc, label %loc2
+loc:
+  ret i32 0
+loc2:
+  ret i32 42
+}
+
+define void @test_inline_constraint_Y() {
+; CHECK: test_inline_constraint_Y:
+  call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0)
+; CHECK: fcmp s0, #0.0
+  ret void
+}
+
+define void @test_inline_constraint_Z() {
+; CHECK: test_inline_constraint_Z:
+  call void asm sideeffect "cmp w0, $0", "Z"(i32 0)
+; CHECK: cmp w0, #0
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll
new file mode 100644
index 000000000000..3b55945561eb
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s
+
+@var_simple = hidden global i32 0
+@var_got = global i32 0
+@var_tlsgd = thread_local global i32 0
+@var_tlsld = thread_local(localdynamic) global i32 0
+@var_tlsie = thread_local(initialexec) global i32 0
+@var_tlsle = thread_local(localexec) global i32 0
+
+define void @test_inline_modifier_L() nounwind {
+; CHECK: test_inline_modifier_L:
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie)
+  call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:lo12:var_simple
+; CHECK: ldr x0, [x0, #:got_lo12:var_got]
+; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd
+; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld
+; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie]
+; CHECK: add x0, x0, #:tprel_lo12:var_tlsle
+
+; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple
+; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_G() nounwind {
+; CHECK: test_inline_modifier_G:
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld)
+  call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle)
+; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
+; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
+
+; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld
+; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle
+
+  ret void
+}
+
+define void @test_inline_modifier_A() nounwind {
+; CHECK: test_inline_modifier_A:
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd)
+  call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie)
+  ; N.b. All tprel and dtprel relocs are modified: lo12 or granules.
+; CHECK: adrp x0, var_simple
+; CHECK: adrp x0, :got:var_got
+; CHECK: adrp x0, :tlsdesc:var_tlsgd
+; CHECK: adrp x0, :gottprel:var_tlsie
+
+; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple
+; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got
+; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
+
+  ret void
+}
+
+define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind {
+; CHECK: test_inline_modifier_wx:
+  call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small)
+  call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small)
+; CHECK: //APP
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big)
+  call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big)
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+
+  call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0)
+  call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0)
+; CHECK: add {{w[0-9]+}}, wzr, wzr
+; CHECK: add {{x[0-9]+}}, xzr, xzr
+  ret void
+}
+
+define void @test_inline_modifier_bhsdq() nounwind {
+; CHECK: test_inline_modifier_bhsdq:
+  call float asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call float asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+
+  call double asm sideeffect "ldr ${0:b}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:h}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:s}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:d}, [sp]", "=w"()
+  call double asm sideeffect "ldr ${0:q}, [sp]", "=w"()
+; CHECK: ldr b0, [sp]
+; CHECK: ldr h0, [sp]
+; CHECK: ldr s0, [sp]
+; CHECK: ldr d0, [sp]
+; CHECK: ldr q0, [sp]
+  ret void
+}
+
+define void @test_inline_modifier_c() nounwind {
+; CHECK: test_inline_modifier_c:
+  call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3)
+; CHECK: adr x0, 3
+
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
new file mode 100644
index 000000000000..dcf9f4ed455c
--- /dev/null
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -0,0 +1,56 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF
+
+define i32 @test_jumptable(i32 %in) {
+; CHECK: test_jumptable
+
+  switch i32 %in, label %def [
+    i32 0, label %lbl1
+    i32 1, label %lbl2
+    i32 2, label %lbl3
+    i32 4, label %lbl4
+  ]
+; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0
+; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0
+; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3]
+; CHECK: br [[DEST]]
+
+def:
+  ret i32 0
+
+lbl1:
+  ret i32 1
+
+lbl2:
+  ret i32 2
+
+lbl3:
+  ret i32 4
+
+lbl4:
+  ret i32 8
+
+}
+
+; CHECK: .rodata
+
+; CHECK: .LJTI0_0:
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+; CHECK-NEXT: .xword
+
+; ELF tests:
+
+; First make sure we get a page/lo12 pair in .text to pick up the jump-table
+; CHECK-ELF: .rela.text
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000113)
+; CHECK-ELF: ('r_sym', 0x00000008)
+; CHECK-ELF-NEXT: ('r_type', 0x00000115)
+
+; Also check the targets in .rodata are relocated
+; CHECK-ELF: .rela.rodata
+; CHECK-ELF: ('r_sym', 0x00000005)
+; CHECK-ELF-NEXT: ('r_type', 0x00000101)
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll
new file mode 100644
index 000000000000..2b2e1295c4f6
--- /dev/null
+++ b/test/CodeGen/AArch64/large-frame.ll
@@ -0,0 +1,114 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+declare void @use_addr(i8*)
+
+@addr = global i8* null
+
+define void @test_bigframe() {
+; CHECK: test_bigframe:
+
+  %var1 = alloca i8, i32 20000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 20000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+  ; Total adjust is 39999536
+; CHECK: movz [[SUBCONST:x[0-9]+]], #22576
+; CHECK: movk [[SUBCONST]], #610, lsl #16
+; CHECK: sub sp, sp, [[SUBCONST]]
+
+  ; Total offset is 20000024
+; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544
+; CHECK: movk [[VAR1OFFSET]], #305, lsl #16
+; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]]
+  store volatile i8* %var1, i8** @addr
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+
+; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528
+; CHECK: movk [[VAR2OFFSET]], #305, lsl #16
+; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]]
+  store volatile i8* %var2, i8** @addr
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: movz [[ADDCONST:x[0-9]+]], #22576
+; CHECK: movk [[ADDCONST]], #610, lsl #16
+; CHECK: add sp, sp, [[ADDCONST]]
+  ret void
+}
+
+define void @test_mediumframe() {
+; CHECK: test_mediumframe:
+  %var1 = alloca i8, i32 1000000
+  %var2 = alloca i8, i32 16
+  %var3 = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #496
+; CHECK: str x30, [sp, #488]
+; CHECK: sub sp, sp, #688
+; CHECK-NEXT: sub sp, sp, #488, lsl #12
+
+  store volatile i8* %var1, i8** @addr
+; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600
+; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12
+
+  %var1plus2 = getelementptr i8* %var1, i32 2
+  store volatile i8* %var1plus2, i8** @addr
+; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var2, i8** @addr
+; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584
+; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12
+
+  %var2plus2 = getelementptr i8* %var2, i32 2
+  store volatile i8* %var2plus2, i8** @addr
+; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2
+
+  store volatile i8* %var3, i8** @addr
+
+  %var3plus2 = getelementptr i8* %var3, i32 2
+  store volatile i8* %var3plus2, i8** @addr
+
+; CHECK: add sp, sp, #688
+; CHECK: add sp, sp, #488, lsl #12
+; CHECK: ldr x30, [sp, #488]
+; CHECK: add sp, sp, #496
+  ret void
+}
+
+
+@bigspace = global [8 x i64] zeroinitializer
+
+; If temporary registers are allocated for adjustment, they should *not* clobber
+; argument registers.
+define void @test_tempallocation([8 x i64] %val) nounwind {
+; CHECK: test_tempallocation:
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp,
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+
+; Make sure we don't trash an argument register
+; CHECK-NOT: movz {{x[0-7],}}
+; CHECK: sub sp, sp,
+
+; CHECK-NOT: movz {{x[0-7],}}
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
new file mode 100644
index 000000000000..45935129fd7e
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -0,0 +1,333 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_8bit:
+
+   %addr8_sxtw = getelementptr i8* %base, i32 %off32
+   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val32_signed = sext i8 %val8_sxtw to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %addr_lsl = getelementptr i8* %base, i64 %off64
+  %val8_lsl = load volatile i8* %addr_lsl
+  %val32_unsigned = zext i8 %val8_lsl to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %addrint_uxtw = ptrtoint i8* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
+  %val8_uxtw = load volatile i8* %addr_uxtw
+  %newval8 = add i8 %val8_uxtw, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+   ret void
+}
+
+
+define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_16bit:
+
+   %addr8_sxtwN = getelementptr i16* %base, i32 %off32
+   %val8_sxtwN = load volatile i16* %addr8_sxtwN
+   %val32_signed = sext i16 %val8_sxtwN to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #1]
+
+  %addr_lslN = getelementptr i16* %base, i64 %off64
+  %val8_lslN = load volatile i16* %addr_lslN
+  %val32_unsigned = zext i16 %val8_lslN to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1]
+
+  %addrint_uxtw = ptrtoint i16* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
+  %val8_uxtw = load volatile i16* %addr_uxtw
+  %newval8 = add i16 %val8_uxtw, 1
+  store volatile i16 %newval8, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i16* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
+  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val64_signed = sext i16 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i16* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i16*
+  %val16_lsl = load volatile i16* %addr_lsl
+  %val64_unsigned = zext i16 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i16* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 1
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_uxtwN
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+   ret void
+}
+
+define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_32bit:
+
+   %addr_sxtwN = getelementptr i32* %base, i32 %off32
+   %val_sxtwN = load volatile i32* %addr_sxtwN
+   store volatile i32 %val_sxtwN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr i32* %base, i64 %off64
+  %val_lslN = load volatile i32* %addr_lslN
+  store volatile i32 %val_lslN, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint i32* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
+  %val_uxtw = load volatile i32* %addr_uxtw
+  %newval8 = add i32 %val_uxtw, 1
+  store volatile i32 %newval8, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+
+  %base_sxtw = ptrtoint i32* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
+  %val16_sxtw = load volatile i32* %addr_sxtw
+  %val64_signed = sext i32 %val16_sxtw to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+  %base_lsl = ptrtoint i32* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i32*
+  %val16_lsl = load volatile i32* %addr_lsl
+  %val64_unsigned = zext i32 %val16_lsl to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i32* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
+  %val32 = load volatile i32* @var_32bit
+  store volatile i32 %val32, i32* %addr_uxtwN
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_64bit:
+
+   %addr_sxtwN = getelementptr i64* %base, i32 %off32
+   %val_sxtwN = load volatile i64* %addr_sxtwN
+   store volatile i64 %val_sxtwN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr i64* %base, i64 %off64
+  %val_lslN = load volatile i64* %addr_lslN
+  store volatile i64 %val_lslN, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint i64* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
+  %val8_uxtw = load volatile i64* %addr_uxtw
+  %newval8 = add i64 %val8_uxtw, 1
+  store volatile i64 %newval8, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint i64* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
+  %val64_sxtw = load volatile i64* %addr_sxtw
+  store volatile i64 %val64_sxtw, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint i64* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to i64*
+  %val64_lsl = load volatile i64* %addr_lsl
+  store volatile i64 %val64_lsl, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint i64* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
+  %val64 = load volatile i64* @var_64bit
+  store volatile i64 %val64, i64* %addr_uxtwN
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+define void @ldst_float(float* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_float:
+
+   %addr_sxtwN = getelementptr float* %base, i32 %off32
+   %val_sxtwN = load volatile float* %addr_sxtwN
+   store volatile float %val_sxtwN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2]
+
+  %addr_lslN = getelementptr float* %base, i64 %off64
+  %val_lslN = load volatile float* %addr_lslN
+  store volatile float %val_lslN, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
+
+  %addrint_uxtw = ptrtoint float* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
+  %val_uxtw = load volatile float* %addr_uxtw
+  store volatile float %val_uxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint float* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to float*
+  %val64_sxtw = load volatile float* %addr_sxtw
+  store volatile float %val64_sxtw, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint float* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to float*
+  %val64_lsl = load volatile float* %addr_lsl
+  store volatile float %val64_lsl, float* @var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint float* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
+  %val64 = load volatile float* @var_float
+  store volatile float %val64, float* %addr_uxtwN
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+   ret void
+}
+
+define void @ldst_double(double* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_double:
+
+   %addr_sxtwN = getelementptr double* %base, i32 %off32
+   %val_sxtwN = load volatile double* %addr_sxtwN
+   store volatile double %val_sxtwN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3]
+
+  %addr_lslN = getelementptr double* %base, i64 %off64
+  %val_lslN = load volatile double* %addr_lslN
+  store volatile double %val_lslN, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
+
+  %addrint_uxtw = ptrtoint double* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
+  %val_uxtw = load volatile double* %addr_uxtw
+  store volatile double %val_uxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint double* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to double*
+  %val64_sxtw = load volatile double* %addr_sxtw
+  store volatile double %val64_sxtw, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint double* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to double*
+  %val64_lsl = load volatile double* %addr_lsl
+  store volatile double %val64_lsl, double* @var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint double* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
+  %val64 = load volatile double* @var_double
+  store volatile double %val64, double* %addr_uxtwN
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+   ret void
+}
+
+
+define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) {
+; CHECK: ldst_128bit:
+
+   %addr_sxtwN = getelementptr fp128* %base, i32 %off32
+   %val_sxtwN = load volatile fp128* %addr_sxtwN
+   store volatile fp128 %val_sxtwN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4]
+
+  %addr_lslN = getelementptr fp128* %base, i64 %off64
+  %val_lslN = load volatile fp128* %addr_lslN
+  store volatile fp128 %val_lslN, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
+
+  %addrint_uxtw = ptrtoint fp128* %base to i64
+  %offset_uxtw = zext i32 %off32 to i64
+  %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+  %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
+  %val_uxtw = load volatile fp128* %addr_uxtw
+  store volatile fp128 %val_uxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+  %base_sxtw = ptrtoint fp128* %base to i64
+  %offset_sxtw = sext i32 %off32 to i64
+  %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+  %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
+  %val64_sxtw = load volatile fp128* %addr_sxtw
+  store volatile fp128 %val64_sxtw, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+  %base_lsl = ptrtoint fp128* %base to i64
+  %addrint_lsl = add i64 %base_lsl, %off64
+  %addr_lsl = inttoptr i64 %addrint_lsl to fp128*
+  %val64_lsl = load volatile fp128* %addr_lsl
+  store volatile fp128 %val64_lsl, fp128* %base
+; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
+
+  %base_uxtwN = ptrtoint fp128* %base to i64
+  %offset_uxtwN = zext i32 %off32 to i64
+  %offset2_uxtwN = shl i64 %offset_uxtwN, 4
+  %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+  %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
+  %val64 = load volatile fp128* %base
+  store volatile fp128 %val64, fp128* %addr_uxtwN
+; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4]
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
new file mode 100644
index 000000000000..78a3c83c3dd8
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -0,0 +1,218 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+@varptr = global i8* null
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 8-bit -> 32-bit
+   %addr_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %val8_sext32 = load volatile i8* %addr_sext32
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %addr_zext32 = getelementptr i8* %addr_8bit, i64 -12
+  %val8_zext32 = load volatile i8* %addr_zext32
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %addr_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %val8_anyext = load volatile i8* %addr_anyext
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %addr_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %val8_sext64 = load volatile i8* %addr_sext64
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr_zext64 = getelementptr i8* %addr_8bit, i64 -9
+  %val8_zext64 = load volatile i8* %addr_zext64
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9]
+
+; truncating store volatile 32-bits to 8-bits
+  %addr_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* %addr_trunc32
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 8-bits
+  %addr_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* %addr_trunc64
+; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for loads to 16-bit or 16-bit since we
+; promote i16 during lowering.
+  %addr_8bit = load i8** @varptr
+
+; match a sign-extending load 16-bit -> 32-bit
+   %addr8_sext32 = getelementptr i8* %addr_8bit, i64 -256
+   %addr_sext32 = bitcast i8* %addr8_sext32 to i16*
+   %val16_sext32 = load volatile i16* %addr_sext32
+   %val32_signed = sext i16 %val16_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned.
+  %addr8_zext32 = getelementptr i8* %addr_8bit, i64 15
+  %addr_zext32 = bitcast i8* %addr8_zext32 to i16*
+  %val16_zext32 = load volatile i16* %addr_zext32
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %addr8_anyext = getelementptr i8* %addr_8bit, i64 -1
+  %addr_anyext = bitcast i8* %addr8_anyext to i16*
+  %val16_anyext = load volatile i16* %addr_anyext
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %addr8_sext64 = getelementptr i8* %addr_8bit, i64 -5
+  %addr_sext64 = bitcast i8* %addr8_sext64 to i16*
+  %val16_sext64 = load volatile i16* %addr_sext64
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %addr8_zext64 = getelementptr i8* %addr_8bit, i64 9
+  %addr_zext64 = bitcast i8* %addr8_zext64 to i16*
+  %val16_zext64 = load volatile i16* %addr_zext64
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9]
+
+; truncating store volatile 32-bits to 16-bits
+  %addr8_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+  %addr_trunc32 = bitcast i8* %addr8_trunc32 to i16*
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* %addr_trunc32
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+
+; truncating store volatile 64-bits to 16-bits
+  %addr8_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+  %addr_trunc64 = bitcast i8* %addr8_trunc64 to i16*
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* %addr_trunc64
+; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
+
+   ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+  %addr_8bit = load i8** @varptr
+
+; Straight 32-bit load/store
+  %addr32_8_noext = getelementptr i8* %addr_8bit, i64 1
+  %addr32_noext = bitcast i8* %addr32_8_noext to i32*
+  %val32_noext = load volatile i32* %addr32_noext
+  store volatile i32 %val32_noext, i32* %addr32_noext
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+; Zero-extension to 64-bits
+  %addr32_8_zext = getelementptr i8* %addr_8bit, i64 -256
+  %addr32_zext = bitcast i8* %addr32_8_zext to i32*
+  %val32_zext = load volatile i32* %addr32_zext
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12
+  %addr32_sext = bitcast i8* %addr32_8_sext to i32*
+  %val32_sext = load volatile i32* %addr32_sext
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255
+  %addr64_trunc = bitcast i8* %addr64_8_trunc to i64*
+  %addr32_8_trunc = getelementptr i8* %addr_8bit, i64 -20
+  %addr32_trunc = bitcast i8* %addr32_8_trunc to i32*
+
+  %val64_trunc = load volatile i64* %addr64_trunc
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* %addr32_trunc
+; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255]
+; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #-20]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5
+  %addrfp = bitcast i8* %addrfp_8 to float*
+
+  %valfp = load volatile float* %addrfp
+; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  store volatile float %valfp, float* %addrfp
+; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
+
+  ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+  %addr_8bit = load i8** @varptr
+  %addrfp_8 = getelementptr i8* %addr_8bit, i64 4
+  %addrfp = bitcast i8* %addrfp_8 to double*
+
+  %valfp = load volatile double* %addrfp
+; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  store volatile double %valfp, double* %addrfp
+; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
new file mode 100644
index 000000000000..1e7540d9be0a
--- /dev/null
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -0,0 +1,251 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var_8bit = global i8 0
+@var_16bit = global i16 0
+@var_32bit = global i32 0
+@var_64bit = global i64 0
+
+@var_float = global float 0.0
+@var_double = global double 0.0
+
+define void @ldst_8bit() {
+; CHECK: ldst_8bit:
+
+; No architectural support for loads to 16-bit or 8-bit since we
+; promote i8 during lowering.
+
+; match a sign-extending load 8-bit -> 32-bit
+   %val8_sext32 = load volatile i8* @var_8bit
+   %val32_signed = sext i8 %val8_sext32 to i32
+   store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_8bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 32-bit
+  %val8_zext32 = load volatile i8* @var_8bit
+  %val32_unsigned = zext i8 %val8_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match an any-extending load volatile 8-bit -> 32-bit
+  %val8_anyext = load volatile i8* @var_8bit
+  %newval8 = add i8 %val8_anyext, 1
+  store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a sign-extending load volatile 8-bit -> 64-bit
+  %val8_sext64 = load volatile i8* @var_8bit
+  %val64_signed = sext i8 %val8_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; match a zero-extending load volatile 8-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val8_zext64 = load volatile i8* @var_8bit
+  %val64_unsigned = zext i8 %val8_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 32-bits to 8-bits
+  %val32 = load volatile i32* @var_32bit
+  %val8_trunc32 = trunc i32 %val32 to i8
+  store volatile i8 %val8_trunc32, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+; truncating store volatile 64-bits to 8-bits
+  %val64 = load volatile i64* @var_64bit
+  %val8_trunc64 = trunc i64 %val64 to i8
+  store volatile i8 %val8_trunc64, i8* @var_8bit
+; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit]
+
+   ret void
+}
+
+define void @ldst_16bit() {
+; CHECK: ldst_16bit:
+
+; No architectural support for load volatiles to 16-bit promote i16 during
+; lowering.
+
+; match a sign-extending load volatile 16-bit -> 32-bit
+  %val16_sext32 = load volatile i16* @var_16bit
+  %val32_signed = sext i16 %val16_sext32 to i32
+  store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_16bit
+; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 32-bit
+  %val16_zext32 = load volatile i16* @var_16bit
+  %val32_unsigned = zext i16 %val16_zext32 to i32
+  store volatile i32 %val32_unsigned, i32* @var_32bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match an any-extending load volatile 16-bit -> 32-bit
+  %val16_anyext = load volatile i16* @var_16bit
+  %newval16 = add i16 %val16_anyext, 1
+  store volatile i16 %newval16, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a sign-extending load volatile 16-bit -> 64-bit
+  %val16_sext64 = load volatile i16* @var_16bit
+  %val64_signed = sext i16 %val16_sext64 to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; match a zero-extending load volatile 16-bit -> 64-bit.
+; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
+; of x0 so it's identical to load volatileing to 32-bits.
+  %val16_zext64 = load volatile i16* @var_16bit
+  %val64_unsigned = zext i16 %val16_zext64 to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 32-bits to 16-bits
+  %val32 = load volatile i32* @var_32bit
+  %val16_trunc32 = trunc i32 %val32 to i16
+  store volatile i16 %val16_trunc32, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+; truncating store volatile 64-bits to 16-bits
+  %val64 = load volatile i64* @var_64bit
+  %val16_trunc64 = trunc i64 %val64 to i16
+  store volatile i16 %val16_trunc64, i16* @var_16bit
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit]
+
+  ret void
+}
+
+define void @ldst_32bit() {
+; CHECK: ldst_32bit:
+
+; Straight 32-bit load/store
+  %val32_noext = load volatile i32* @var_32bit
+  store volatile i32 %val32_noext, i32* @var_32bit
+; CHECK: adrp {{x[0-9]+}}, var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+; Zero-extension to 64-bits
+  %val32_zext = load volatile i32* @var_32bit
+  %val64_unsigned = zext i32 %val32_zext to i64
+  store volatile i64 %val64_unsigned, i64* @var_64bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Sign-extension to 64-bits
+  %val32_sext = load volatile i32* @var_32bit
+  %val64_signed = sext i32 %val32_sext to i64
+  store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+
+; Truncation from 64-bits
+  %val64_trunc = load volatile i64* @var_64bit
+  %val32_trunc = trunc i64 %val64_trunc to i32
+  store volatile i32 %val32_trunc, i32* @var_32bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit]
+
+  ret void
+}
+
+@arr8 = global i8* null
+@arr16 = global i16* null
+@arr32 = global i32* null
+@arr64 = global i64* null
+
+; Now check that our selection copes with accesses more complex than a
+; single symbol. Permitted offsets should be folded into the loads and
+; stores. Since all forms use the same Operand it's only necessary to
+; check the various access-sizes involved.
+
+define void @ldst_complex_offsets() {
+; CHECK: ldst_complex_offsets
+  %arr8_addr = load volatile i8** @arr8
+; CHECK: adrp {{x[0-9]+}}, arr8
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr8]
+
+  %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1
+  %arr8_sub1 = load volatile i8* %arr8_sub1_addr
+  store volatile i8 %arr8_sub1, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
+
+  %arr8_sub4095_addr = getelementptr i8* %arr8_addr, i64 4095
+  %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr
+  store volatile i8 %arr8_sub4095, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
+
+
+  %arr16_addr = load volatile i16** @arr16
+; CHECK: adrp {{x[0-9]+}}, arr16
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr16]
+
+  %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1
+  %arr16_sub1 = load volatile i16* %arr16_sub1_addr
+  store volatile i16 %arr16_sub1, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
+
+  %arr16_sub4095_addr = getelementptr i16* %arr16_addr, i64 4095
+  %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr
+  store volatile i16 %arr16_sub4095, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
+
+
+  %arr32_addr = load volatile i32** @arr32
+; CHECK: adrp {{x[0-9]+}}, arr32
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr32]
+
+  %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1
+  %arr32_sub1 = load volatile i32* %arr32_sub1_addr
+  store volatile i32 %arr32_sub1, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
+
+  %arr32_sub4095_addr = getelementptr i32* %arr32_addr, i64 4095
+  %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr
+  store volatile i32 %arr32_sub4095, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
+
+
+  %arr64_addr = load volatile i64** @arr64
+; CHECK: adrp {{x[0-9]+}}, arr64
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr64]
+
+  %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1
+  %arr64_sub1 = load volatile i64* %arr64_sub1_addr
+  store volatile i64 %arr64_sub1, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+
+  %arr64_sub4095_addr = getelementptr i64* %arr64_addr, i64 4095
+  %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr
+  store volatile i64 %arr64_sub4095, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
+
+  ret void
+}
+
+define void @ldst_float() {
+; CHECK: ldst_float:
+
+   %valfp = load volatile float* @var_float
+; CHECK: adrp {{x[0-9]+}}, var_float
+; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+  store volatile float %valfp, float* @var_float
+; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float]
+
+   ret void
+}
+
+define void @ldst_double() {
+; CHECK: ldst_double:
+
+   %valfp = load volatile double* @var_double
+; CHECK: adrp {{x[0-9]+}}, var_double
+; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+  store volatile double %valfp, double* @var_double
+; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double]
+
+   ret void
+}
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..c5ce2411ed48
--- /dev/null
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
new file mode 100644
index 000000000000..e09084148fdf
--- /dev/null
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -0,0 +1,55 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @foo() {
+; CHECK: foo:
+    %val32 = load i32* @var32
+    %val64 = load i64* @var64
+
+    %val32_lit32 = and i32 %val32, 123456785
+    store volatile i32 %val32_lit32, i32* @var32
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit32 = and i64 %val64, 305402420
+    store volatile i64 %val64_lit32, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit32signed = and i64 %val64, -12345678
+    store volatile i64 %val64_lit32signed, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    %val64_lit64 = and i64 %val64, 1234567898765432
+    store volatile i64 %val64_lit64, i64* @var64
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
+; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+
+    ret void
+}
+
+@varfloat = global float 0.0
+@vardouble = global double 0.0
+
+define void @floating_lits() {
+; CHECK: floating_lits:
+
+  %floatval = load float* @varfloat
+  %newfloat = fadd float %floatval, 128.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
+; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd
+  store float %newfloat, float* @varfloat
+
+  %doubleval = load double* @vardouble
+  %newdouble = fadd double %doubleval, 129.0
+; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
+; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
+; CHECK: fadd
+  store double %newdouble, double* @vardouble
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
new file mode 100644
index 000000000000..5cbf5a37ec54
--- /dev/null
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -0,0 +1,57 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s
+
+; Make sure a reasonably sane prologue and epilogue are
+; generated. This test is not robust in the face of an frame-handling
+; evolving, but still has value for unrelated changes, I
+; believe.
+;
+; In particular, it will fail when ldp/stp are used for frame setup,
+; when FP-elim is implemented, and when addressing from FP is
+; implemented.
+
+@var = global i64 0
+@local_addr = global i64* null
+
+declare void @foo()
+
+define void @trivial_func() nounwind {
+; CHECK: trivial_func: // @trivial_func
+; CHECK-NEXT: // BB#0
+; CHECK-NEXT: ret
+
+  ret void
+}
+
+define void @trivial_fp_func() {
+; CHECK-WITHFP: trivial_fp_func:
+
+; CHECK-WITHFP: sub sp, sp, #16
+; CHECK-WITHFP: stp x29, x30, [sp]
+; CHECK-WITHFP-NEXT: mov x29, sp
+
+; Dont't really care, but it would be a Bad Thing if this came after the epilogue.
+; CHECK: bl foo
+  call void @foo()
+  ret void
+
+; CHECK-WITHFP: ldp x29, x30, [sp]
+; CHECK-WITHFP: add sp, sp, #16
+
+; CHECK-WITHFP: ret
+}
+
+define void @stack_local() {
+  %local_var = alloca i64
+; CHECK: stack_local:
+; CHECK: sub sp, sp, #16
+
+  %val = load i64* @var
+  store i64 %val, i64* %local_var
+; CHECK: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
+
+  store i64* %local_var, i64** @local_addr
+; CHECK: add {{x[0-9]+}}, sp, #{{[0-9]+}}
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll
new file mode 100644
index 000000000000..5f3f4da0cdad
--- /dev/null
+++ b/test/CodeGen/AArch64/logical-imm.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_and(i32 %in32, i64 %in64) {
+; CHECK: test_and:
+
+  %val0 = and i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = and i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = and i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = and i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_orr(i32 %in32, i64 %in64) {
+; CHECK: test_orr:
+
+  %val0 = or i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = or i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = or i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = or i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_eor(i32 %in32, i64 %in64) {
+; CHECK: test_eor:
+
+  %val0 = xor i32 %in32, 2863311530
+  store volatile i32 %val0, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa
+
+  %val1 = xor i32 %in32, 4293984240
+  store volatile i32 %val1, i32* @var32
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0
+
+  %val2 = xor i64 %in64, 9331882296111890817
+  store volatile i64 %val2, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181
+
+  %val3 = xor i64 %in64, 18429855317404942275
+  store volatile i64 %val3, i64* @var64
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3
+
+  ret void
+}
+
+define void @test_mov(i32 %in32, i64 %in64) {
+; CHECK: test_mov:
+  %val0 = add i32 %in32, 2863311530
+  store i32 %val0, i32* @var32
+; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa
+
+  %val1 = add i64 %in64, 11068046444225730969
+  store i64 %val1, i64* @var64
+; CHECK: orr {{x[0-9]+}}, xzr, #0x9999999999999999
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
new file mode 100644
index 000000000000..bbbfcc1b9118
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -0,0 +1,224 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+
+@var1_32 = global i32 0
+@var2_32 = global i32 0
+
+@var1_64 = global i64 0
+@var2_64 = global i64 0
+
+define void @logical_32bit() {
+; CHECK: logical_32bit:
+  %val1 = load i32* @var1_32
+  %val2 = load i32* @var2_32
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i32 -1, %val2
+
+  %and_noshift = and i32 %val1, %val2
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %and_noshift, i32* @var1_32
+  %bic_noshift = and i32 %neg_val2, %val1
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %bic_noshift, i32* @var1_32
+
+  %or_noshift = or i32 %val1, %val2
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %or_noshift, i32* @var1_32
+  %orn_noshift = or i32 %neg_val2, %val1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %orn_noshift, i32* @var1_32
+
+  %xor_noshift = xor i32 %val1, %val2
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xor_noshift, i32* @var1_32
+  %xorn_noshift = xor i32 %neg_val2, %val1
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  store volatile i32 %xorn_noshift, i32* @var1_32
+
+  ; Check the maximum shift on each
+  %operand_lsl31 = shl i32 %val2, 31
+  %neg_operand_lsl31 = xor i32 -1, %operand_lsl31
+
+  %and_lsl31 = and i32 %val1, %operand_lsl31
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %and_lsl31, i32* @var1_32
+  %bic_lsl31 = and i32 %val1, %neg_operand_lsl31
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %bic_lsl31, i32* @var1_32
+
+  %or_lsl31 = or i32 %val1, %operand_lsl31
+; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %or_lsl31, i32* @var1_32
+  %orn_lsl31 = or i32 %val1, %neg_operand_lsl31
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %orn_lsl31, i32* @var1_32
+
+  %xor_lsl31 = xor i32 %val1, %operand_lsl31
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xor_lsl31, i32* @var1_32
+  %xorn_lsl31 = xor i32 %val1, %neg_operand_lsl31
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
+  store volatile i32 %xorn_lsl31, i32* @var1_32
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i32 %val2, 10
+  %neg_operand_asr10 = xor i32 -1, %operand_asr10
+
+  %bic_asr10 = and i32 %val1, %neg_operand_asr10
+; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %bic_asr10, i32* @var1_32
+  %xor_asr10 = xor i32 %val1, %operand_asr10
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10
+  store volatile i32 %xor_asr10, i32* @var1_32
+
+  %operand_lsr1 = lshr i32 %val2, 1
+  %neg_operand_lsr1 = xor i32 -1, %operand_lsr1
+
+  %orn_lsr1 = or i32 %val1, %neg_operand_lsr1
+; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %orn_lsr1, i32* @var1_32
+  %xor_lsr1 = xor i32 %val1, %operand_lsr1
+; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1
+  store volatile i32 %xor_lsr1, i32* @var1_32
+
+  %operand_ror20_big = shl i32 %val2, 12
+  %operand_ror20_small = lshr i32 %val2, 20
+  %operand_ror20 = or i32 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i32 -1, %operand_ror20
+
+  %xorn_ror20 = xor i32 %val1, %neg_operand_ror20
+; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %xorn_ror20, i32* @var1_32
+  %and_ror20 = and i32 %val1, %operand_ror20
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20
+  store volatile i32 %and_ror20, i32* @var1_32
+
+  ret void
+}
+
+define void @logical_64bit() {
+; CHECK: logical_64bit:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+  ; First check basic and/bic/or/orn/eor/eon patterns with no shift
+  %neg_val2 = xor i64 -1, %val2
+
+  %and_noshift = and i64 %val1, %val2
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %and_noshift, i64* @var1_64
+  %bic_noshift = and i64 %neg_val2, %val1
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %bic_noshift, i64* @var1_64
+
+  %or_noshift = or i64 %val1, %val2
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %or_noshift, i64* @var1_64
+  %orn_noshift = or i64 %neg_val2, %val1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %orn_noshift, i64* @var1_64
+
+  %xor_noshift = xor i64 %val1, %val2
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xor_noshift, i64* @var1_64
+  %xorn_noshift = xor i64 %neg_val2, %val1
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+  store volatile i64 %xorn_noshift, i64* @var1_64
+
+  ; Check the maximum shift on each
+  %operand_lsl63 = shl i64 %val2, 63
+  %neg_operand_lsl63 = xor i64 -1, %operand_lsl63
+
+  %and_lsl63 = and i64 %val1, %operand_lsl63
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %and_lsl63, i64* @var1_64
+  %bic_lsl63 = and i64 %val1, %neg_operand_lsl63
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %bic_lsl63, i64* @var1_64
+
+  %or_lsl63 = or i64 %val1, %operand_lsl63
+; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %or_lsl63, i64* @var1_64
+  %orn_lsl63 = or i64 %val1, %neg_operand_lsl63
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %orn_lsl63, i64* @var1_64
+
+  %xor_lsl63 = xor i64 %val1, %operand_lsl63
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xor_lsl63, i64* @var1_64
+  %xorn_lsl63 = xor i64 %val1, %neg_operand_lsl63
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+  store volatile i64 %xorn_lsl63, i64* @var1_64
+
+  ; Check other shifts on a subset
+  %operand_asr10 = ashr i64 %val2, 10
+  %neg_operand_asr10 = xor i64 -1, %operand_asr10
+
+  %bic_asr10 = and i64 %val1, %neg_operand_asr10
+; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %bic_asr10, i64* @var1_64
+  %xor_asr10 = xor i64 %val1, %operand_asr10
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10
+  store volatile i64 %xor_asr10, i64* @var1_64
+
+  %operand_lsr1 = lshr i64 %val2, 1
+  %neg_operand_lsr1 = xor i64 -1, %operand_lsr1
+
+  %orn_lsr1 = or i64 %val1, %neg_operand_lsr1
+; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %orn_lsr1, i64* @var1_64
+  %xor_lsr1 = xor i64 %val1, %operand_lsr1
+; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1
+  store volatile i64 %xor_lsr1, i64* @var1_64
+
+  ; Construct a rotate-right from a bunch of other logical
+  ; operations. DAGCombiner should ensure we the ROTR during
+  ; selection
+  %operand_ror20_big = shl i64 %val2, 44
+  %operand_ror20_small = lshr i64 %val2, 20
+  %operand_ror20 = or i64 %operand_ror20_big, %operand_ror20_small
+  %neg_operand_ror20 = xor i64 -1, %operand_ror20
+
+  %xorn_ror20 = xor i64 %val1, %neg_operand_ror20
+; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %xorn_ror20, i64* @var1_64
+  %and_ror20 = and i64 %val1, %operand_ror20
+; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20
+  store volatile i64 %and_ror20, i64* @var1_64
+
+  ret void
+}
+
+define void @flag_setting() {
+; CHECK: flag_setting:
+  %val1 = load i64* @var1_64
+  %val2 = load i64* @var2_64
+
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: b.gt .L
+  %simple_and = and i64 %val1, %val2
+  %tst1 = icmp sgt i64 %simple_and, 0
+  br i1 %tst1, label %ret, label %test2
+
+test2:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63
+; CHECK: b.lt .L
+  %shifted_op = shl i64 %val2, 63
+  %shifted_and = and i64 %val1, %shifted_op
+  %tst2 = icmp slt i64 %shifted_and, 0
+  br i1 %tst2, label %ret, label %test3
+
+test3:
+; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12
+; CHECK: b.gt .L
+  %asr_op = ashr i64 %val2, 12
+  %asr_and = and i64 %asr_op, %val1
+  %tst3 = icmp sgt i64 %asr_and, 0
+  br i1 %tst3, label %ret, label %other_exit
+
+other_exit:
+  store volatile i64 %val1, i64* @var1_64
+  ret void
+ret:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.s b/test/CodeGen/AArch64/logical_shifted_reg.s
new file mode 100644
index 000000000000..89aea580119b
--- /dev/null
+++ b/test/CodeGen/AArch64/logical_shifted_reg.s
@@ -0,0 +1,208 @@
+	.file	"/home/timnor01/a64-trunk/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll"
+	.text
+	.globl	logical_32bit
+	.type	logical_32bit,@function
+logical_32bit:                          // @logical_32bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_32
+	ldr	w1, [x0, #:lo12:var1_32]
+	adrp	x0, var2_32
+	ldr	w2, [x0, #:lo12:var2_32]
+	and	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w2, w1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orr	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, lsl #31
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	bic	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, asr #10
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	orn	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eor	w3, w1, w2, lsr #1
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	eon	w3, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w3, [x0, #:lo12:var1_32]
+	and	w1, w1, w2, ror #20
+	adrp	x0, var1_32
+	str	w1, [x0, #:lo12:var1_32]
+	ret
+.Ltmp0:
+	.size	logical_32bit, .Ltmp0-logical_32bit
+	.cfi_endproc
+
+	.globl	logical_64bit
+	.type	logical_64bit,@function
+logical_64bit:                          // @logical_64bit
+	.cfi_startproc
+// BB#0:
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	and	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x1, x0
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orr	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, lsl #63
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	bic	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, asr #10
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	orn	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eor	x2, x0, x1, lsr #1
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	eon	x2, x0, x1, ror #20
+	adrp	x3, var1_64
+	str	x2, [x3, #:lo12:var1_64]
+	and	x0, x0, x1, ror #20
+	adrp	x1, var1_64
+	str	x0, [x1, #:lo12:var1_64]
+	ret
+.Ltmp1:
+	.size	logical_64bit, .Ltmp1-logical_64bit
+	.cfi_endproc
+
+	.globl	flag_setting
+	.type	flag_setting,@function
+flag_setting:                           // @flag_setting
+	.cfi_startproc
+// BB#0:
+	sub	sp, sp, #16
+	adrp	x0, var1_64
+	ldr	x0, [x0, #:lo12:var1_64]
+	adrp	x1, var2_64
+	ldr	x1, [x1, #:lo12:var2_64]
+	tst	x0, x1
+	str	x0, [sp, #8]            // 8-byte Folded Spill
+	str	x1, [sp]                // 8-byte Folded Spill
+	b.gt .LBB2_4
+	b	.LBB2_1
+.LBB2_1:                                // %test2
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, lsl #63
+	b.lt .LBB2_4
+	b	.LBB2_2
+.LBB2_2:                                // %test3
+	ldr	x0, [sp, #8]            // 8-byte Folded Reload
+	ldr	x1, [sp]                // 8-byte Folded Reload
+	tst	x0, x1, asr #12
+	b.gt .LBB2_4
+	b	.LBB2_3
+.LBB2_3:                                // %other_exit
+	adrp	x0, var1_64
+	ldr	x1, [sp, #8]            // 8-byte Folded Reload
+	str	x1, [x0, #:lo12:var1_64]
+	add	sp, sp, #16
+	ret
+.LBB2_4:                                // %ret
+	add	sp, sp, #16
+	ret
+.Ltmp2:
+	.size	flag_setting, .Ltmp2-flag_setting
+	.cfi_endproc
+
+	.type	var1_32,@object         // @var1_32
+	.bss
+	.globl	var1_32
+	.align	2
+var1_32:
+	.word	0                       // 0x0
+	.size	var1_32, 4
+
+	.type	var2_32,@object         // @var2_32
+	.globl	var2_32
+	.align	2
+var2_32:
+	.word	0                       // 0x0
+	.size	var2_32, 4
+
+	.type	var1_64,@object         // @var1_64
+	.globl	var1_64
+	.align	3
+var1_64:
+	.xword	0                       // 0x0
+	.size	var1_64, 8
+
+	.type	var2_64,@object         // @var2_64
+	.globl	var2_64
+	.align	3
+var2_64:
+	.xword	0                       // 0x0
+	.size	var2_64, 8
+
+
diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll
new file mode 100644
index 000000000000..b8a5fb932202
--- /dev/null
+++ b/test/CodeGen/AArch64/movw-consts.ll
@@ -0,0 +1,124 @@
+; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+define i64 @test0() {
+; CHECK: test0:
+; Not produced by move wide instructions, but good to make sure we can return 0 anyway:
+; CHECK: mov x0, xzr
+  ret i64 0
+}
+
+define i64 @test1() {
+; CHECK: test1:
+; CHECK: movz x0, #1
+  ret i64 1
+}
+
+define i64 @test2() {
+; CHECK: test2:
+; CHECK: movz x0, #65535
+  ret i64 65535
+}
+
+define i64 @test3() {
+; CHECK: test3:
+; CHECK: movz x0, #1, lsl #16
+  ret i64 65536
+}
+
+define i64 @test4() {
+; CHECK: test4:
+; CHECK: movz x0, #65535, lsl #16
+  ret i64 4294901760
+}
+
+define i64 @test5() {
+; CHECK: test5:
+; CHECK: movz x0, #1, lsl #32
+  ret i64 4294967296
+}
+
+define i64 @test6() {
+; CHECK: test6:
+; CHECK: movz x0, #65535, lsl #32
+  ret i64 281470681743360
+}
+
+define i64 @test7() {
+; CHECK: test7:
+; CHECK: movz x0, #1, lsl #48
+  ret i64 281474976710656
+}
+
+; A 32-bit MOVN can generate some 64-bit patterns that a 64-bit one
+; couldn't. Useful even for i64
+define i64 @test8() {
+; CHECK: test8:
+; CHECK: movn w0, #60875
+  ret i64 4294906420
+}
+
+define i64 @test9() {
+; CHECK: test9:
+; CHECK: movn x0, #0
+  ret i64 -1
+}
+
+define i64 @test10() {
+; CHECK: test10:
+; CHECK: movn x0, #60875, lsl #16
+  ret i64 18446744069720047615
+}
+
+; For reasonably legitimate reasons returning an i32 results in the
+; selection of an i64 constant, so we need a different idiom to test that selection
+@var32 = global i32 0
+
+define void @test11() {
+; CHECK: test11:
+; CHECK: mov {{w[0-9]+}}, wzr
+  store i32 0, i32* @var32
+  ret void
+}
+
+define void @test12() {
+; CHECK: test12:
+; CHECK: movz {{w[0-9]+}}, #1
+  store i32 1, i32* @var32
+  ret void
+}
+
+define void @test13() {
+; CHECK: test13:
+; CHECK: movz {{w[0-9]+}}, #65535
+  store i32 65535, i32* @var32
+  ret void
+}
+
+define void @test14() {
+; CHECK: test14:
+; CHECK: movz {{w[0-9]+}}, #1, lsl #16
+  store i32 65536, i32* @var32
+  ret void
+}
+
+define void @test15() {
+; CHECK: test15:
+; CHECK: movz {{w[0-9]+}}, #65535, lsl #16
+  store i32 4294901760, i32* @var32
+  ret void
+}
+
+define void @test16() {
+; CHECK: test16:
+; CHECK: movn {{w[0-9]+}}, #0
+  store i32 -1, i32* @var32
+  ret void
+}
+
+define i64 @test17() {
+; CHECK: test17:
+
+  ; Mustn't MOVN w0 here.
+; CHECK: movn x0, #2
+  ret i64 -3
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
new file mode 100644
index 000000000000..77bf691cbcbd
--- /dev/null
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+
+; Make sure exception-handling PIC code can be linked correctly. An alternative
+; to the sequence described below would have .gcc_except_table itself writable
+; and not use the indirection, but this isn't what LLVM does right now.
+
+  ; There should be a read-only .gcc_except_table section...
+; CHECK: .section .gcc_except_table,"a"
+
+  ; ... referring indirectly to stubs for its typeinfo ...
+; CHECK: // @TType Encoding = indirect pcrel sdata8
+  ; ... one of which is "int"'s typeinfo
+; CHECK: .Ltmp9:
+; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-.Ltmp9
+
+  ; .. and which is properly defined (in a writable section for the dynamic loader) later.
+; CHECK: .section .data.rel,"aw"
+; CHECK: .L_ZTIi.DW.stub:
+; CHECK-NEXT: .xword _ZTIi
+
+@_ZTIi = external constant i8*
+
+define i32 @_Z3barv() {
+entry:
+  invoke void @_Z3foov()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+  %5 = bitcast i8* %4 to i32*
+  %exn.scalar = load i32* %5, align 4
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ %exn.scalar, %catch ], [ 42, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll
new file mode 100644
index 000000000000..28dc9a7e2515
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; CallingConv.td requires a bitcast for vector arguments. Make sure we're
+; actually capable of that (the test was omitted from LowerFormalArguments).
+
+define void @test_bitcast_lower(<2 x i32> %a) {
+; CHECK: test_bitcast_lower:
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll
new file mode 100644
index 000000000000..b35185ccd6f3
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+; We used to not mark NZCV as being used in the continuation basic-block
+; when lowering a 128-bit "select" to branches. This meant a subsequent use
+; of the same flags gave an internal fault here.
+
+declare void @foo(fp128)
+
+define double @test_f128csel_flags(i32 %lhs, fp128 %a, fp128 %b) nounwind {
+; CHECK: test_f128csel_flags
+
+    %tst = icmp ne i32 %lhs, 42
+    %val = select i1 %tst, fp128 %a, fp128 %b
+; CHECK: cmp w0, #42
+; CHECK: b.eq .LBB0
+
+    call void @foo(fp128 %val)
+    %retval = select i1 %tst, double 4.0, double 5.0
+
+    ; It's also reasonably important that the actual fcsel comes before the
+    ; function call since bl may corrupt NZCV. We were doing the right thing anyway,
+    ; but just as well test it while we're here.
+; CHECK: fcsel {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, ne
+; CHECK: bl foo
+
+    ret double %retval
+}
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
new file mode 100644
index 000000000000..8d5485cae4c8
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -0,0 +1,19 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+@var = global void()* zeroinitializer
+
+declare void @bar()
+
+define void @foo() {
+; CHECK: foo:
+       %func = load void()** @var
+
+       ; Calling a function encourages @foo to use a callee-saved register,
+       ; which makes it a natural choice for the tail call itself. But we don't
+       ; want that: the final "br xN" has to use a temporary or argument
+       ; register.
+       call void @bar()
+
+       tail call void %func()
+; CHECK: br {{x([0-79]|1[0-8])}}
+       ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
new file mode 100644
index 000000000000..e54552fd8edf
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; When generating DAG selection tables, TableGen used to only flag an
+; instruction as needing a chain on its own account if it had a built-in pattern
+; which used the chain. This meant that the AArch64 load/stores weren't
+; recognised and so both loads from %locvar below were coalesced into a single
+; LS8_LDR instruction (same operands other than the non-existent chain) and the
+; increment was lost at return.
+
+; This was obviously a Bad Thing.
+
+declare void @bar(i8*)
+
+define i64 @test_chains() {
+; CHECK: test_chains:
+
+  %locvar = alloca i8
+
+  call void @bar(i8* %locvar)
+; CHECK: bl bar
+
+  %inc.1 = load i8* %locvar
+  %inc.2 = zext i8 %inc.1 to i64
+  %inc.3 = add i64 %inc.2, 1
+  %inc.4 = trunc i64 %inc.3 to i8
+  store i8 %inc.4, i8* %locvar
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]]
+; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1
+; CHECK: strb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]]
+
+  %ret.1 = load i8* %locvar
+  %ret.2 = zext i8 %ret.1 to i64
+  ret i64 %ret.2
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
new file mode 100644
index 000000000000..980e2ffef901
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define void @test_w29_reserved() {
+; CHECK: test_w29_reserved:
+; CHECK: add x29, sp, #{{[0-9]+}}
+
+  %val1 = load volatile i32* @var
+  %val2 = load volatile i32* @var
+  %val3 = load volatile i32* @var
+  %val4 = load volatile i32* @var
+  %val5 = load volatile i32* @var
+  %val6 = load volatile i32* @var
+  %val7 = load volatile i32* @var
+  %val8 = load volatile i32* @var
+  %val9 = load volatile i32* @var
+
+; CHECK-NOT: ldr w29,
+
+  ; Call to prevent fp-elim that occurs regardless in leaf functions.
+  call void @bar()
+
+  store volatile i32 %val1,  i32* @var
+  store volatile i32 %val2,  i32* @var
+  store volatile i32 %val3,  i32* @var
+  store volatile i32 %val4,  i32* @var
+  store volatile i32 %val5,  i32* @var
+  store volatile i32 %val6,  i32* @var
+  store volatile i32 %val7,  i32* @var
+  store volatile i32 %val8,  i32* @var
+  store volatile i32 %val9,  i32* @var
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
new file mode 100644
index 000000000000..764d2bc44f0d
--- /dev/null
+++ b/test/CodeGen/AArch64/regress-wzr-allocatable.ll
@@ -0,0 +1,41 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0
+
+; When WZR wasn't marked as reserved, this function tried to allocate
+; it at O0 and then generated an internal fault (mostly incidentally)
+; when it discovered that it was already in use for a multiplication.
+
+; I'm not really convinced this is a good test since it could easily
+; stop testing what it does now with no-one any the wiser. However, I
+; can't think of a better way to force the allocator to use WZR
+; specifically.
+
+define void @test() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.body9, %for.end
+  br i1 undef, label %for.body9, label %while.cond30
+
+for.body9:                                        ; preds = %for.cond6
+  store i16 0, i16* undef, align 2
+  %0 = load i32* undef, align 4
+  %1 = load i32* undef, align 4
+  %mul15 = mul i32 %0, %1
+  %add16 = add i32 %mul15, 32768
+  %div = udiv i32 %add16, 65535
+  %add17 = add i32 %div, 1
+  store i32 %add17, i32* undef, align 4
+  br label %for.cond6
+
+while.cond30:                                     ; preds = %for.cond6
+  ret void
+}
diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll
new file mode 100644
index 000000000000..d2eb77ab1b54
--- /dev/null
+++ b/test/CodeGen/AArch64/setcc-takes-i32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; Most important point here is that the promotion of the i1 works
+; correctly. Previously LLVM thought that i64 was the appropriate SetCC output,
+; which meant it proceded in two steps and produced an i64 -> i64 any_ext which
+; couldn't be selected and faulted.
+
+; It was expecting the smallest legal promotion of i1 to be the preferred SetCC
+; type, so we'll satisfy it (this actually arguably gives better code anyway,
+; with flag-manipulation operations allowed to use W-registers).
+
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+
+define i64 @test_select(i64 %lhs, i64 %rhs) {
+; CHECK: test_select:
+
+  %res = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %lhs, i64 %rhs)
+  %flag = extractvalue {i64, i1} %res, 1
+  %retval = select i1 %flag, i64 %lhs, i64 %rhs
+  ret i64 %retval
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
new file mode 100644
index 000000000000..a1ec618b03ba
--- /dev/null
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+declare void @callee_stack0()
+declare void @callee_stack8([8 x i32], i64)
+declare void @callee_stack16([8 x i32], i64, i64)
+
+define void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to0_from8([8 x i32], i64) nounwind{
+; CHECK: caller_to0_from8:
+; CHECK-NEXT: // BB
+
+  tail call void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+
+; Caller isn't going to clean up any extra stack we allocate, so it
+; can't be a tail call.
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: bl callee_stack8
+}
+
+define void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK-NOT: sub sp, sp,
+
+; This should reuse our stack area for the 42
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+
+; Shouldn't be a tail call: we can't use SP+8 because our caller might
+; have something there. This may sound obvious but implementation does
+; some funky aligning.
+  tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef)
+; CHECK: bl callee_stack16
+  ret void
+}
+
+define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK-NOT: sub sp, sp
+
+; Reuse our area, putting "42" at incoming sp
+  tail call void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp]
+; CHECK-NEXT: b callee_stack8
+}
+
+define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK-NOT: sub sp, sp,
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK-NOT: add sp, sp,
+; CHECK: b callee_stack16
+}
+
+@func = global void(i32)* null
+
+define void @indirect_tail() {
+; CHECK: indirect_tail:
+; CHECK-NOT: sub sp, sp
+
+  %fptr = load void(i32)** @func
+  tail call void %fptr(i32 42)
+  ret void
+; CHECK: movz w0, #42
+; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func]
+; CHECK: br [[FPTR]]
+}
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll
new file mode 100644
index 000000000000..c7a392b78c24
--- /dev/null
+++ b/test/CodeGen/AArch64/sincos-expansion.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+define float @test_sincos_f32(float %f) {
+  %sin = call float @sinf(float %f) readnone
+  %cos = call float @cosf(float %f) readnone
+; CHECK: bl cosf
+; CHECK: bl sinf
+  %val = fadd float %sin, %cos
+  ret float %val
+}
+
+define double @test_sincos_f64(double %f) {
+  %sin = call double @sin(double %f) readnone
+  %cos = call double @cos(double %f) readnone
+  %val = fadd double %sin, %cos
+; CHECK: bl cos
+; CHECK: bl sin
+  ret double %val
+}
+
+define fp128 @test_sincos_f128(fp128 %f) {
+  %sin = call fp128 @sinl(fp128 %f) readnone
+  %cos = call fp128 @cosl(fp128 %f) readnone
+  %val = fadd fp128 %sin, %cos
+; CHECK: bl cosl
+; CHECK: bl sinl
+  ret fp128 %val
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare fp128 @sinl(fp128) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+declare fp128 @cosl(fp128) readonly
+\ No newline at end of file
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
new file mode 100644
index 000000000000..f323b151ad1e
--- /dev/null
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -0,0 +1,94 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+
+declare fastcc void @callee_stack0()
+declare fastcc void @callee_stack8([8 x i32], i64)
+declare fastcc void @callee_stack16([8 x i32], i64, i64)
+
+define fastcc void @caller_to0_from0() nounwind {
+; CHECK: caller_to0_from0:
+; CHECK-NEXT: // BB
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to0_from8([8 x i32], i64) {
+; CHECK: caller_to0_from8:
+
+  tail call fastcc void @callee_stack0()
+  ret void
+; CHECK: add sp, sp, #16
+; CHECK-NEXT: b callee_stack0
+}
+
+define fastcc void @caller_to8_from0() {
+; CHECK: caller_to8_from0:
+; CHECK: sub sp, sp, #32
+
+; Key point is that the "42" should go #16 below incoming stack
+; pointer (we didn't have arg space to reuse).
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to8_from8([8 x i32], i64 %a) {
+; CHECK: caller_to8_from8:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: b callee_stack8
+}
+
+define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
+; CHECK: caller_to16_from8:
+; CHECK: sub sp, sp, #16
+
+; Important point is that the call reuses the "dead" argument space
+; above %a on the stack. If it tries to go below incoming-SP then the
+; callee will not deallocate the space, even in fastcc.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
+; CHECK: str {{x[0-9]+}}, [sp, #24]
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+  ret void
+}
+
+
+define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) {
+; CHECK: caller_to8_from24:
+; CHECK: sub sp, sp, #16
+
+; Key point is that the "%a" should go where at #16 above SP on entry.
+  tail call fastcc void @callee_stack8([8 x i32] undef, i64 42)
+  ret void
+; CHECK: str {{x[0-9]+}}, [sp, #32]
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: b callee_stack8
+}
+
+
+define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
+; CHECK: caller_to16_from16:
+; CHECK: sub sp, sp, #16
+
+; Here we want to make sure that both loads happen before the stores:
+; otherwise either %a or %b will be wrongly clobbered.
+  tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a)
+  ret void
+
+; CHECK: ldr x0,
+; CHECK: ldr x1,
+; CHECK: str x1,
+; CHECK: str x0,
+
+; CHECK: add sp, sp, #16
+; CHECK: b callee_stack16
+}
diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll
new file mode 100644
index 000000000000..bad2298c8a65
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamic-together.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+
+; If the .tlsdesccall and blr parts are emitted completely separately (even with
+; glue) then LLVM will separate them quite happily (with a spill at O0, hence
+; the option). This is definitely wrong, so we make sure they are emitted
+; together.
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr {{x[0-9]+}}
+}
diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll
new file mode 100644
index 000000000000..cdfd11783c23
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-dynamics.ll
@@ -0,0 +1,121 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@general_dynamic_var = external thread_local global i32
+
+define i32 @test_generaldynamic() {
+; CHECK: test_generaldynamic:
+
+  %val = load i32* @general_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x0]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_generaldynamic_addr() {
+; CHECK: test_generaldynamic_addr:
+
+  ret i32* @general_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var]
+; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], x0
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+@local_dynamic_var = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic() {
+; CHECK: test_localdynamic:
+
+  %val = load i32* @local_dynamic_var
+  ret i32 %val
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: ldr w0, [x0, [[DTP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+define i32* @test_localdynamic_addr() {
+; CHECK: test_localdynamic_addr:
+
+  ret i32* @local_dynamic_var
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
+; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+
+; CHECK: add x0, x0, [[DTP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+}
+
+; The entire point of the local-dynamic access model is to have a single call to
+; the expensive resolver. Make sure we achieve that goal.
+
+@local_dynamic_var2 = external thread_local(localdynamic) global i32
+
+define i32 @test_localdynamic_deduplicate() {
+; CHECK: test_localdynamic_deduplicate:
+
+  %val = load i32* @local_dynamic_var
+  %val2 = load i32* @local_dynamic_var2
+
+  %sum = add i32 %val, %val2
+  ret i32 %sum
+
+; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: blr [[CALLEE]]
+
+; CHECK-NOT: _TLS_MODULE_BASE_
+
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll
new file mode 100644
index 000000000000..a66588422793
--- /dev/null
+++ b/test/CodeGen/AArch64/tls-execs.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+@initial_exec_var = external thread_local(initialexec) global i32
+
+define i32 @test_initial_exec() {
+; CHECK: test_initial_exec:
+  %val = load i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+  ret i32 %val
+}
+
+define i32* @test_initial_exec_addr() {
+; CHECK: test_initial_exec_addr:
+  ret i32* @initial_exec_var
+
+; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
+; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var]
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+
+}
+
+@local_exec_var = thread_local(initialexec) global i32 0
+
+define i32 @test_local_exec() {
+; CHECK: test_local_exec:
+  %val = load i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0
+; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+  ret i32 %val
+}
+
+define i32* @test_local_exec_addr() {
+; CHECK: test_local_exec_addr:
+  ret i32* @local_exec_var
+
+; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
+; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
+; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0
+; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
+; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+}
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
new file mode 100644
index 000000000000..65c1fda49e2d
--- /dev/null
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -0,0 +1,48 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+; We've got the usual issues with LLVM reordering blocks here. The
+; tests are correct for the current order, but who knows when that
+; will change. Beware!
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i32 @test_tbz() {
+; CHECK: test_tbz:
+
+  %val = load i32* @var32
+  %val64 = load i64* @var64
+
+  %tbit0 = and i32 %val, 32768
+  %tst0 = icmp ne i32 %tbit0, 0
+  br i1 %tst0, label %test1, label %end1
+; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.LBB0_[0-9]+]]
+
+test1:
+  %tbit1 = and i32 %val, 4096
+  %tst1 = icmp ne i32 %tbit1, 0
+  br i1 %tst1, label %test2, label %end1
+; CHECK: tbz {{w[0-9]+}}, #12, [[LBL_end1]]
+
+test2:
+  %tbit2 = and i64 %val64, 32768
+  %tst2 = icmp ne i64 %tbit2, 0
+  br i1 %tst2, label %test3, label %end1
+; CHECK: tbz {{x[0-9]+}}, #15, [[LBL_end1]]
+
+test3:
+  %tbit3 = and i64 %val64, 4096
+  %tst3 = icmp ne i64 %tbit3, 0
+  br i1 %tst3, label %end2, label %end1
+; CHECK: tbz {{x[0-9]+}}, #12, [[LBL_end1]]
+
+end2:
+; CHECK: movz x0, #1
+; CHECK-NEXT: ret
+  ret i32 1
+
+end1:
+; CHECK: [[LBL_end1]]:
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll
new file mode 100644
index 000000000000..c5d319eb112b
--- /dev/null
+++ b/test/CodeGen/AArch64/variadic.ll
@@ -0,0 +1,144 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+%va_list = type {i8*, i8*, i8*, i32, i32}
+
+@var = global %va_list zeroinitializer
+
+declare void @llvm.va_start(i8*)
+
+define void @test_simple(i32 %n, ...) {
+; CHECK: test_simple:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #112]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #48]
+
+; Omit the middle ones
+
+; CHECK: str q0, [sp]
+; CHECK: str x1, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #127
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #55
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
+; CHECK: test_fewargs:
+; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #96]
+; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]]
+; CHECK: str x7, [x[[GPRBASE]], #32]
+
+; Omit the middle ones
+
+; CHECK: str q1, [sp]
+; CHECK: str x3, [sp, #[[GPRFROMSP]]]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #111
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: movn [[GR_OFFS:w[0-9]+]], #39
+; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40
+; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
+; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]]
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+define void @test_nospare([8 x i64], [8 x float], ...) {
+; CHECK: test_nospare:
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK-NOT: sub sp, sp
+; CHECK: mov [[STACK:x[0-9]+]], sp
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+; If there are non-variadic arguments on the stack (here two i64s) then the
+; __stack field should point just past them.
+define void @test_offsetstack([10 x i64], [3 x float], ...) {
+; CHECK: test_offsetstack:
+; CHECK: sub sp, sp, #80
+; CHECK: mov x[[FPRBASE:[0-9]+]], sp
+; CHECK: str q7, [x[[FPRBASE]], #64]
+
+; CHECK-NOT: str x{{[0-9]+}},
+; Omit the middle ones
+
+; CHECK: str q3, [sp]
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_start(i8* %addr)
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+; CHECK: movn [[VR_OFFS:w[0-9]+]], #79
+; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: str wzr, [x[[VA_LIST]], #24]
+; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80
+; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
+; CHECK: add [[STACK:x[0-9]+]], sp, #96
+; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var]
+
+  ret void
+}
+
+declare void @llvm.va_end(i8*)
+
+define void @test_va_end() nounwind {
+; CHECK: test_va_end:
+; CHECK-NEXT: BB#0
+
+  %addr = bitcast %va_list* @var to i8*
+  call void @llvm.va_end(i8* %addr)
+
+  ret void
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.va_copy(i8* %dest, i8* %src)
+
+@second_list = global %va_list zeroinitializer
+
+define void @test_va_copy() {
+; CHECK: test_va_copy:
+  %srcaddr = bitcast %va_list* @var to i8*
+  %dstaddr = bitcast %va_list* @second_list to i8*
+  call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr)
+
+; Check beginning and end again:
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
+; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
+
+; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
+; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
+
+; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
+; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
+
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
new file mode 100644
index 000000000000..fef0437ae7f3
--- /dev/null
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -0,0 +1,31 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @test_zr() {
+; CHECK: test_zr:
+
+  store i32 0, i32* @var32
+; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32]
+  store i64 0, i64* @var64
+; CHECK: str xzr, [{{x[0-9]+}}, #:lo12:var64]
+
+  ret void
+; CHECK: ret
+}
+
+define void @test_sp(i32 %val) {
+; CHECK: test_sp:
+
+; Important correctness point here is that LLVM doesn't try to use xzr
+; as an addressing register: "str w0, [xzr]" is not a valid A64
+; instruction (0b11111 in the Rn field would mean "sp").
+  %addr = getelementptr i32* null, i64 0
+  store i32 %val, i32* %addr
+; CHECK: mov x[[NULL:[0-9]+]], xzr
+; CHECK: str {{w[0-9]+}}, [x[[NULL]]]
+
+  ret void
+; CHECK: ret
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index a63cdd46e2d8..4783f3707690 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
diff --git a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 642268992062..000000000000
--- a/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 6aeaa26cebd1..91a9903f3852 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -74,51 +74,54 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
-!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !22 = metadata !{metadata !13}
-!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
-!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
-!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 786470, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
-!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
+!46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
+!47 = metadata !{i32 0}
+!48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
index 8b164c5d91f8..94a05412f5d4 100644
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -23,7 +23,7 @@ entry:
 
 ; OBJ:            Relocation 0
 ; OBJ-NEXT:       'r_offset', 0x00000004
-; OBJ-NEXT:       'r_sym', 0x000007
+; OBJ-NEXT:       'r_sym', 0x000009
 ; OBJ-NEXT:        'r_type', 0x2b
 
 ; OBJ:          Relocation 1
@@ -33,7 +33,7 @@ entry:
 
 ; OBJ:          # Relocation 2
 ; OBJ-NEXT:       'r_offset', 0x0000000c
-; OBJ-NEXT:       'r_sym', 0x000008
+; OBJ-NEXT:       'r_sym', 0x00000a
 ; OBJ-NEXT:       'r_type', 0x1c
 
 }
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index 5cfbb4f944f7..1272a257931d 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -10,7 +10,8 @@
 @STRIDE = internal global i32 8
 
 ; ASM:          .type   array00,%object         @ @array00
-; ASM-NEXT:     .lcomm  array00,80
+; ASM-NEXT:     .local  array00
+; ASM-NEXT:     .comm   array00,80,1
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index ca88eedcea60..1d1b89a34f9a 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -75,44 +75,38 @@ entry:
   ret i8 %0, !dbg !39
 }
 
-!llvm.dbg.sp = !{!0, !6, !7, !8, !9}
-!llvm.dbg.lv.get1 = !{!10, !11}
-!llvm.dbg.gv = !{!13, !14, !15, !16, !17}
-!llvm.dbg.lv.get2 = !{!18, !19}
-!llvm.dbg.lv.get3 = !{!21, !22}
-!llvm.dbg.lv.get4 = !{!24, !25}
-!llvm.dbg.lv.get5 = !{!27, !28}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 589860, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 589835, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1} ; [ DW_TAG_variable ]
-!14 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590080, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 589835, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 590080, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 589835, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 590080, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 589835, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 589835, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1, null} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2, null} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3, null} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4, null} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5, null} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786688, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 786443, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786688, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 786443, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 786443, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 4, i32 0, metadata !0, null}
 !31 = metadata !{i32 4, i32 0, metadata !12, null}
 !32 = metadata !{i32 7, i32 0, metadata !6, null}
@@ -123,3 +117,11 @@ entry:
 !37 = metadata !{i32 13, i32 0, metadata !26, null}
 !38 = metadata !{i32 16, i32 0, metadata !9, null}
 !39 = metadata !{i32 16, i32 0, metadata !29, null}
+!40 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !9}
+!41 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17}
+!42 = metadata !{metadata !10, metadata !11}
+!43 = metadata !{metadata !18, metadata !19}
+!44 = metadata !{metadata !21, metadata !22}
+!45 = metadata !{metadata !24, metadata !25}
+!46 = metadata !{metadata !27, metadata !28}
+!47 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index f2b0c5d7d090..266609b8ce69 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,7 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK: .long Lset33
+;CHECK: .long Lset8
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
@@ -73,44 +73,37 @@ define i32 @get5(i32 %a) nounwind optsize ssp {
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !6, !7, !8, !9}
-!llvm.dbg.lv.get1 = !{!10, !11}
-!llvm.dbg.lv.get2 = !{!13, !14}
-!llvm.dbg.lv.get3 = !{!16, !17}
-!llvm.dbg.lv.get4 = !{!19, !20}
-!llvm.dbg.gv = !{!22, !23, !24, !25, !26}
-!llvm.dbg.lv.get5 = !{!27, !28}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"ss3.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"ss3.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 589870, i32 0, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 589835, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 590080, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 589835, metadata !6, i32 8, i32 17, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590080, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 589835, metadata !7, i32 11, i32 19, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590080, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!21 = metadata !{i32 589835, metadata !8, i32 14, i32 19, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5} ; [ DW_TAG_variable ]
-!23 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4} ; [ DW_TAG_variable ]
-!24 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3} ; [ DW_TAG_variable ]
-!25 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x2", metadata !"x2", metadata !"", metadata !2, i32 7, metadata !5, i32 1, i32 1, i32* @x2} ; [ DW_TAG_variable ]
-!26 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1} ; [ DW_TAG_variable ]
-!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 589835, metadata !9, i32 17, i32 19, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786478, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786688, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !6, i32 8, i32 17, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786688, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !7, i32 11, i32 19, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786688, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!21 = metadata !{i32 786443, metadata !8, i32 14, i32 19, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5, null} ; [ DW_TAG_variable ]
+!23 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4, null} ; [ DW_TAG_variable ]
+!24 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3, null} ; [ DW_TAG_variable ]
+!25 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x2", metadata !"x2", metadata !"", metadata !2, i32 7, metadata !5, i32 1, i32 1, i32* @x2, null} ; [ DW_TAG_variable ]
+!26 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1, null} ; [ DW_TAG_variable ]
+!27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !9, i32 17, i32 19, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 5, i32 16, metadata !1, null}
 !31 = metadata !{i32 5, i32 32, metadata !12, null}
 !32 = metadata !{i32 8, i32 14, metadata !6, null}
@@ -121,3 +114,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !37 = metadata !{i32 14, i32 32, metadata !21, null}
 !38 = metadata !{i32 17, i32 16, metadata !9, null}
 !39 = metadata !{i32 17, i32 32, metadata !29, null}
+!40 = metadata !{metadata !1, metadata !6, metadata !7, metadata !8, metadata !9}
+!41 = metadata !{metadata !22, metadata !23, metadata !24, metadata !25, metadata !26}
+!42 = metadata !{metadata !10, metadata !11}
+!43 = metadata !{metadata !13, metadata !14}
+!44 = metadata !{metadata !16, metadata !17}
+!45 = metadata !{metadata !19, metadata !20}
+!46 = metadata !{metadata !27, metadata !28}
+!47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
index 6e0ef9619657..f563eeef0180 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -1,13 +1,5 @@
 ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
 
-; Should trigger a NEON store.
-; CHECK: vstr
-define void @f_0_12(i8* nocapture %c) nounwind optsize {
-entry:
-  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
-  ret void
-}
-
 ; Trigger multiple NEON stores.
 ; CHECK:      vst1.64
 ; CHECK-NEXT: vst1.64
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index f9ede7401a3c..0d0d03b23e86 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -8,7 +8,7 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
 
 ; CHECK:      movw    r1, :lower16:{{.*}}
 ; CHECK:      movt    r1, :upper16:{{.*}}
-; CHECK:      vld1.64 {{.*}}, [r1, :128]
+; CHECK:      vld1.64 {{.*}}, [r1:128]
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
 ; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
@@ -252,7 +252,7 @@ define void @test_powi(<4 x float>* %X) nounwind {
 
 ; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
 ; CHECK:       movt  [[reg0]], :upper16:{{.*}}
-; CHECK:       vld1.64 {{.*}}, :128
+; CHECK:       vld1.64 {{.*}}:128
 ; CHECK:       vmul.f32 {{.*}}
 
 ; CHECK:      vst1.64
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index b21bb006e327..1b21f7571d8e 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; Radar 10266272
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index b05ec6367ee4..ca0964a05933 100644
--- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -13,6 +13,7 @@
 ; CHECK-NOT: ch SU
 ; CHECK: ch SU(2): Latency=1
 ; CHECK-NOT: ch SU
+; CHECK: Successors:
 ; CHECK: ** List Scheduling
 ; CHECK: SU(2){{.*}}STR{{.*}}
 ; CHECK-NOT: ch SU
@@ -22,6 +23,7 @@
 ; CHECK-NOT: ch SU
 ; CHECK: ch SU(2): Latency=1
 ; CHECK-NOT: ch SU
+; CHECK: Successors:
 define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
   store volatile i32 65540, i32* %p1, align 4, !tbaa !0
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index b55f1cae7fe6..764c58f2e159 100644
--- a/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -18,7 +18,7 @@ define void @test_v2i8tov2i32() {
 
   %i32val = sext <2 x i8> %i8val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
-; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16]
+; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:16]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 
@@ -32,7 +32,7 @@ define void @test_v2i8tov2i64() {
 
   %i64val = sext <2 x i8> %i8val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
-; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16]
+; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}:16]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}}
@@ -50,7 +50,7 @@ define void @test_v4i8tov4i16() {
 
   %i16val = sext <4 x i8> %i8val to <4 x i16>
   store <4 x i16> %i16val, <4 x i16>* @var_v4i16
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK-NOT: vmovl.s16
 
@@ -65,7 +65,7 @@ define void @test_v4i8tov4i32() {
 
   %i16val = sext <4 x i8> %i8val to <4 x i32>
   store <4 x i32> %i16val, <4 x i32>* @var_v4i32
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
 
@@ -79,7 +79,7 @@ define void @test_v2i16tov2i32() {
 
   %i32val = sext <2 x i16> %i16val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK-NOT: vmovl
 
@@ -94,7 +94,7 @@ define void @test_v2i16tov2i64() {
 
   %i64val = sext <2 x i16> %i16val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
-; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}:32]
 ; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
 ; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]]
 
diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
new file mode 100644
index 000000000000..2f55204aa407
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; PR12281
+; Test generataion of code for vmull instruction when multiplying 128-bit
+; vectors that were created by sign-extending smaller vector sizes.
+;
+; The vmull operation requires 64-bit vectors, so we must extend the original
+; vector size to 64 bits for vmull operation.
+; Previously failed with an assertion because the <4 x i8> vector was too small
+; for vmull.
+
+; Vector x Constant
+; v4i8
+;
+define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
+;CHECK: sextload_v4i8_c:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, <i32 3, i32 3, i32 3, i32 3>
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+;
+define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
+;CHECK: sextload_v2i8_c:
+entry:
+  %0   = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8>  %0 to <2 x i64>
+;CHECK: vmull
+  %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+;
+define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
+;CHECK: sextload_v2i16_c:
+entry:
+  %0   = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16>  %0 to <2 x i64>
+;CHECK: vmull
+  %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+
+; Vector x Vector
+; v4i8
+;
+define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
+;CHECK: sextload_v4i8_v:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+
+  %1  = load <4 x i8>* %p, align 8
+  %v2 = sext <4 x i8> %1 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, %v2
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+;
+define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
+;CHECK: sextload_v2i8_v:
+entry:
+  %0 = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8> %0 to <2 x i64>
+
+  %1  = load <2 x i8>* %p, align 8
+  %v2 = sext <2 x i8> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+;
+define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
+;CHECK: sextload_v2i16_v:
+entry:
+  %0 = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16> %0 to <2 x i64>
+
+  %1  = load <2 x i16>* %p, align 8
+  %v2 = sext <2 x i16> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+
+; Vector(small) x Vector(big)
+; v4i8 x v4i16
+;
+define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
+;CHECK: sextload_v4i8_vs:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+
+  %1  = load <4 x i16>* %p, align 8
+  %v2 = sext <4 x i16> %1 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, %v2
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+; v2i8 x v2i16
+define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
+;CHECK: sextload_v2i8_vs:
+entry:
+  %0 = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8> %0 to <2 x i64>
+
+  %1  = load <2 x i16>* %p, align 8
+  %v2 = sext <2 x i16> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+; v2i16 x v2i32
+define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
+;CHECK: sextload_v2i16_vs:
+entry:
+  %0 = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16> %0 to <2 x i64>
+
+  %1  = load <2 x i32>* %p, align 8
+  %v2 = sext <2 x i32> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index b5f6d311cb9c..b0644d17431d 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -19,7 +19,7 @@ entry:
 ; CHECK: bfc	[[REG]], #0, #3
   %0 = va_arg i8** %g, double
   call void @llvm.va_end(i8* %g1)
-  
+
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
new file mode 100644
index 000000000000..38700f3a8d10
--- /dev/null
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+;CHECK: foo
+;CHECK: adds
+;CHECK-NEXT: adc
+;CHECK-NEXT: bx
+
+;rdar://12028498
+
+define i32 @foo() nounwind ssp {
+entry:
+  %tmp2 = zext i32 3 to i64
+  br  label %bug_block
+
+bug_block:
+  %tmp410 = and i64 1031, 1647010
+  %tmp411 = and i64 %tmp2, -211
+  %tmp412 = shl i64 %tmp410, %tmp2
+  %tmp413 = shl i64 %tmp411, %tmp2
+  %tmp415 = and i64 %tmp413, 1
+  %tmp420 = xor i64 0, %tmp415
+  %tmp421 = and i64 %tmp412, %tmp415
+  %tmp422 = shl i64 %tmp421, 1
+  br  label %finish
+
+finish:
+  %tmp423 = lshr i64 %tmp422, 32
+  %tmp424 = trunc i64 %tmp423 to i32
+  ret i32 %tmp424
+}
+
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
new file mode 100644
index 000000000000..38b9e0e8f086
--- /dev/null
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -0,0 +1,28 @@
+;PR14492 - Tablegen incorrectly converts ARM tLDMIA_UPD pseudo to tLDMIA
+;RUN: llc -mtriple=thumbv7 < %s  | FileCheck -check-prefix=EXPECTED %s
+;RUN: llc -mtriple=thumbv7 < %s  | FileCheck %s
+
+;EXPECTED: foo:
+;CHECK: foo:
+define i32 @foo(i32* %a) nounwind optsize {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 1
+  %1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 2
+  %2 = load i32* %arrayidx2, align 4, !tbaa !0
+  %add.ptr = getelementptr inbounds i32* %a, i32 3
+;Make sure we do not have a duplicated register in the front of the reg list
+;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
+;CHECK-NOT: ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], [[REG]],
+  tail call void @bar(i32* %add.ptr) nounwind optsize
+  %add = add nsw i32 %1, %0
+  %add3 = add nsw i32 %add, %2
+  ret i32 %add3
+}
+
+declare void @bar(i32*) optsize
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2013-02-27-expand-vfma.ll b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
new file mode 100644
index 000000000000..0e3bf2371061
--- /dev/null
+++ b/test/CodeGen/ARM/2013-02-27-expand-vfma.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7s-apple-darwin | FileCheck %s -check-prefix=VFP4
+
+define <4 x float> @muladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind {
+; CHECK: muladd:
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK-NOT: fmaf
+
+; CHECK-VFP4: vfma.f32
+  %tmp = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a) #2
+  ret <4 x float> %tmp
+}
+
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
+
+define <2 x float> @muladd2(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind {
+; CHECK: muladd2:
+; CHECK: fmaf
+; CHECK: fmaf
+; CHECK-NOT: fmaf
+
+; CHECK-VFP4: vfma.f32
+  %tmp = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a) #2
+  ret <2 x float> %tmp
+}
+
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
+
diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
new file mode 100644
index 000000000000..2561686c1f83
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s
+; The test is presented by Jiangning Liu.
+;CHECK-NOT: vldmia
+
+define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind {
+entry:
+  %s0 = load <8 x i64> * %source, align 64
+  %s1 = load <8 x i64> * %secondSource, align 64
+  %s2 = bitcast <8 x i64> %s0 to i512
+  %data.i.i.48.extract.shift = lshr i512 %s2, 384
+  %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64
+  %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
+  %s120 = load <8 x i64> * %arrayidx64, align 64
+  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
+  %s121 = load <8 x i64> * %arrayidx67, align 64
+  %s122 = bitcast <8 x i64> %s120 to i512
+  %data.i.i677.48.extract.shift = lshr i512 %s122, 384
+  %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
+  %s123 = insertelement <8 x i64> undef, i64 %data.i.i677.48.extract.trunc, i32 0
+  %data.i.i677.32.extract.shift = lshr i512 %s122, 256
+  %data.i.i677.32.extract.trunc = trunc i512 %data.i.i677.32.extract.shift to i64
+  %s124 = insertelement <8 x i64> %s123, i64 %data.i.i677.32.extract.trunc, i32 1
+  %data.i.i677.16.extract.shift = lshr i512 %s122, 128
+  %data.i.i677.16.extract.trunc = trunc i512 %data.i.i677.16.extract.shift to i64
+  %s125 = insertelement <8 x i64> %s124, i64 %data.i.i677.16.extract.trunc, i32 2
+  %data.i.i677.56.extract.shift = lshr i512 %s122, 448
+  %data.i.i677.56.extract.trunc = trunc i512 %data.i.i677.56.extract.shift to i64
+  %s126 = insertelement <8 x i64> %s125, i64 %data.i.i677.56.extract.trunc, i32 3
+  %data.i.i677.24.extract.shift = lshr i512 %s122, 192
+  %data.i.i677.24.extract.trunc = trunc i512 %data.i.i677.24.extract.shift to i64
+  %s127 = insertelement <8 x i64> %s126, i64 %data.i.i677.24.extract.trunc, i32 4
+  %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5
+  %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6
+  %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
+  %s131 = bitcast <8 x i64> %s121 to i512
+  %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
+  %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
+  %s132 = insertelement <8 x i64> undef, i64 %data.i1.i676.48.extract.trunc, i32 0
+  %data.i1.i676.32.extract.shift = lshr i512 %s131, 256
+  %data.i1.i676.32.extract.trunc = trunc i512 %data.i1.i676.32.extract.shift to i64
+  %s133 = insertelement <8 x i64> %s132, i64 %data.i1.i676.32.extract.trunc, i32 1
+  %data.i1.i676.16.extract.shift = lshr i512 %s131, 128
+  %data.i1.i676.16.extract.trunc = trunc i512 %data.i1.i676.16.extract.shift to i64
+  %s134 = insertelement <8 x i64> %s133, i64 %data.i1.i676.16.extract.trunc, i32 2
+  %data.i1.i676.56.extract.shift = lshr i512 %s131, 448
+  %data.i1.i676.56.extract.trunc = trunc i512 %data.i1.i676.56.extract.shift to i64
+  %s135 = insertelement <8 x i64> %s134, i64 %data.i1.i676.56.extract.trunc, i32 3
+  %data.i1.i676.24.extract.shift = lshr i512 %s131, 192
+  %data.i1.i676.24.extract.trunc = trunc i512 %data.i1.i676.24.extract.shift to i64
+  %s136 = insertelement <8 x i64> %s135, i64 %data.i1.i676.24.extract.trunc, i32 4
+  %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5
+  %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6
+  %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7
+  %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+  %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+  %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+  %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+  %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
+  store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
+  %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7
+  %s140 = load <8 x i64> * %arrayidx75, align 64
+  %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
+  %s141 = load <8 x i64> * %arrayidx78, align 64
+  %s142 = bitcast <8 x i64> %s140 to i512
+  %data.i.i650.32.extract.shift = lshr i512 %s142, 256
+  %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64
+  %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0
+  %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1
+  %data.i.i650.16.extract.shift = lshr i512 %s142, 128
+  %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64
+  %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2
+  %data.i.i650.8.extract.shift = lshr i512 %s142, 64
+  %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64
+  %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3
+  %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4
+  %data.i.i650.48.extract.shift = lshr i512 %s142, 384
+  %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64
+  %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5
+  %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6
+  %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64
+  %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7
+  %s151 = bitcast <8 x i64> %s141 to i512
+  %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
+  %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
+  %s152 = insertelement <8 x i64> undef, i64 %data.i1.i649.32.extract.trunc, i32 0
+  %s153 = insertelement <8 x i64> %s152, i64 %data.i1.i649.32.extract.trunc, i32 1
+  %data.i1.i649.16.extract.shift = lshr i512 %s151, 128
+  %data.i1.i649.16.extract.trunc = trunc i512 %data.i1.i649.16.extract.shift to i64
+  %s154 = insertelement <8 x i64> %s153, i64 %data.i1.i649.16.extract.trunc, i32 2
+  %data.i1.i649.8.extract.shift = lshr i512 %s151, 64
+  %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
+  %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
+  %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4
+  %data.i1.i649.48.extract.shift = lshr i512 %s151, 384
+  %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64
+  %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5
+  %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6
+  %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64
+  %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7
+  %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> <i32 0, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 10, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+  %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+  %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+  %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+  %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
+  store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64
+  ret void
+}
diff --git a/test/CodeGen/ARM/DbgValueOtherTargets.test b/test/CodeGen/ARM/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..bf90891de0a7
--- /dev/null
+++ b/test/CodeGen/ARM/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/ARM/MergeConsecutiveStores.ll b/test/CodeGen/ARM/MergeConsecutiveStores.ll
new file mode 100644
index 000000000000..06c87e986a83
--- /dev/null
+++ b/test/CodeGen/ARM/MergeConsecutiveStores.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy.
+; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK: strh    [[REG]], [r1], #2
+define void @MergeLoadStoreBaseIndexOffset(i32* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
+  %.0 = phi i32* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i32* %.0, i32 1
+  %3 = load i32* %.0, align 1
+  %4 = getelementptr inbounds i8* %c, i32 %3
+  %5 = load i8* %4, align 1
+  %6 = add i32 %3, 1
+  %7 = getelementptr inbounds i8* %c, i32 %6
+  %8 = load i8* %7, align 1
+  store i8 %5, i8* %.08, align 1
+  %9 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %8, i8* %9, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 2
+  %11 = add nsw i32 %.09, -1
+  %12 = icmp eq i32 %11, 0
+  br i1 %12, label %13, label %1
+
+; <label>:13
+  ret void
+}
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy even if there are intermediate sign
+; extensions.
+; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK: strh    [[REG]], [r1], #2
+define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i32 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i32
+  %5 = getelementptr inbounds i8* %c, i32 %4
+  %6 = load i8* %5, align 1
+  %7 = add i32 %4, 1
+  %8 = getelementptr inbounds i8* %c, i32 %7
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i32 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
+
+; However, we can only merge ignore sign extensions when they are on all memory
+; computations;
+; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-NOT: ldrh    [[REG:r[0-9]+]], [{{.*}}]
+; CHECK-NOT: strh    [[REG]], [r1], #2
+define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i32 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i32
+  %5 = getelementptr inbounds i8* %c, i32 %4
+  %6 = load i8* %5, align 1
+  %7 = add i8 %3, 1
+  %wrap.4 = sext i8 %7 to i32
+  %8 = getelementptr inbounds i8* %c, i32 %wrap.4
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i32 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i32 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
diff --git a/test/CodeGen/ARM/PR15053.ll b/test/CodeGen/ARM/PR15053.ll
new file mode 100644
index 000000000000..706a90efe3a8
--- /dev/null
+++ b/test/CodeGen/ARM/PR15053.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=armv7 < %s
+; PR15053
+
+declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind
+declare { i32, i32 } @llvm.arm.ldrexd(i8*) nounwind readonly
+
+define void @foo() {
+entry:
+  %0 = tail call { i32, i32 } @llvm.arm.ldrexd(i8* undef) nounwind
+  %1 = extractvalue { i32, i32 } %0, 0
+  %2 = tail call i32 @llvm.arm.strexd(i32 %1, i32 undef, i8* undef) nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll
new file mode 100644
index 000000000000..a52468e5be9e
--- /dev/null
+++ b/test/CodeGen/ARM/a15-SD-dep.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s  | FileCheck -check-prefix=DISABLED %s
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=ENABLED %s
+
+; CHECK-ENABLED: t1:
+; CHECK-DISABLED: t1:
+define <2 x float> @t1(float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
+  %i1 = insertelement <2 x float> undef, float %f, i32 1
+  %i2 = fadd <2 x float> %i1, %i1
+  ret <2 x float> %i2
+}
+
+; CHECK-ENABLED: t2:
+; CHECK-DISABLED: t2:
+define <4 x float> @t2(float %g, float %f) {
+  ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0]
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
+  %i1 = insertelement <4 x float> undef, float %f, i32 1
+  %i2 = fadd <4 x float> %i1, %i1
+  ret <4 x float> %i2
+}
+
+; CHECK-ENABLED: t3:
+; CHECK-DISABLED: t3:
+define arm_aapcs_vfpcc <2 x float> @t3(float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] 
+  ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0]
+  %i1 = insertelement <2 x float> undef, float %f, i32 1
+  %i2 = fadd <2 x float> %i1, %i1
+  ret <2 x float> %i2
+}
+
+; CHECK-ENABLED: t4:
+; CHECK-DISABLED: t4:
+define <2 x float> @t4(float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0]
+  ; CHECK-DISABLED-NOT: vdup
+  %i1 = insertelement <2 x float> undef, float %f, i32 1
+  br label %b
+
+  ; Block %b has an S-reg as live-in.
+b:
+  %i2 = fadd <2 x float> %i1, %i1
+  ret <2 x float> %i2
+}
+
+; CHECK-ENABLED: t5:
+; CHECK-DISABLED: t5:
+define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
+  ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
+  ; CHECK-ENABLED: vadd.f32
+  ; CHECK-ENABLED-NEXT: bx lr
+  ; CHECK-DISABLED-NOT: vdup
+  %i1 = insertelement <4 x float> %q, float %f, i32 1
+  %i2 = fadd <4 x float> %i1, %i1
+  ret <4 x float> %i2
+}
diff --git a/test/CodeGen/ARM/a15-partial-update.ll b/test/CodeGen/ARM/a15-partial-update.ll
new file mode 100644
index 000000000000..6306790d15f0
--- /dev/null
+++ b/test/CodeGen/ARM/a15-partial-update.ll
@@ -0,0 +1,38 @@
+; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s  | FileCheck %s
+
+; CHECK: t1:
+define <2 x float> @t1(float* %A, <2 x float> %B) {
+; The generated code for this test uses a vld1.32 instruction
+; to write the lane 1 of a D register containing the value of
+; <2 x float> %B. Since the D register is defined, it would
+; be incorrect to fully write it (with a vmov.f64) before the
+; vld1.32 instruction. The test checks that a vmov.f64 was not
+; generated.
+
+; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
+  %tmp2 = load float* %A, align 4
+  %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
+  ret <2 x float> %tmp3
+}
+
+; CHECK: t2:
+define void @t2(<4 x i8> *%in, <4 x i8> *%out, i32 %n) {
+entry:
+  br label %loop
+loop:
+; The code generated by this test uses a vld1.32 instruction.
+; We check that a dependency breaking vmov* instruction was
+; generated.
+
+; CHECK: vmov.{{.*}} d{{[0-9]+}},
+  %oldcount = phi i32 [0, %entry], [%newcount, %loop]
+  %newcount = add i32 %oldcount, 1
+  %p1 = getelementptr <4 x i8> *%in, i32 %newcount
+  %p2 = getelementptr <4 x i8> *%out, i32 %newcount
+  %tmp1 = load <4 x i8> *%p1, align 4
+  store <4 x i8> %tmp1, <4 x i8> *%p2
+  %cmp = icmp eq i32 %newcount, %n
+  br i1 %cmp, label %loop, label %ret
+ret:
+  ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 6da90897b94b..748d25804447 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4
 
 define i32 @t1(i32 %a) {
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
new file mode 100644
index 000000000000..273041dee34e
--- /dev/null
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 -realign-stack=0 | FileCheck %s -check-prefix=NO-REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
+
+; rdar://12713765
+; When realign-stack is set to false, make sure we are not creating stack
+; objects that are assumed to be 64-byte aligned.
+@T3_retval = common global <16 x float> zeroinitializer, align 16
+
+define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp {
+entry:
+; CHECK: test
+; CHECK: bic sp, sp, #63
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; CHECK: vst1.64
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; CHECK: vst1.64
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; CHECK: vst1.64
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; CHECK: vst1.64
+; CHECK: vst1.64
+; NO-REALIGN: test
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; NO-REALIGN: vst1.64
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; NO-REALIGN: vst1.64
+; NO-REALIGN: vst1.64
+ %retval = alloca <16 x float>, align 16
+ %0 = load <16 x float>* @T3_retval, align 16
+ store <16 x float> %0, <16 x float>* %retval
+ %1 = load <16 x float>* %retval
+ store <16 x float> %1, <16 x float>* %agg.result, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 5e12d8e03555..c74701663459 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -61,8 +61,7 @@ ret void
 define i64 @f4(i64* %val) nounwind {
 entry:
   ;CHECK: f4
-  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r0]
-  ;CHECK: mov r0, [[REG1]]
+  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
   %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
   ret i64 %0
 }
diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll
new file mode 100644
index 000000000000..8b5087f89c04
--- /dev/null
+++ b/test/CodeGen/ARM/arm-ttype-target2.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s 
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00"
+@_ZTI3Foo = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([5 x i8]* @_ZTS3Foo, i32 0, i32 0) }
+
+define i32 @main() {
+entry:
+  invoke void @_Z3foov()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)) nounwind
+; CHECK: _ZTI3Foo(target2)
+
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %lpad
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+  tail call void @__cxa_end_catch()
+  br label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ 1, %catch ], [ 0, %entry ]
+  ret i32 %retval.0
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index e9609ac0f9ef..f2c7305ff33a 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,98 +1,176 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB
 
 define i64 @test1(i64* %ptr, i64 %val) {
-; CHECK: test1
+; CHECK: test1:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: adds r0, r2
-; CHECK: adc r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test1:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw add i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test2(i64* %ptr, i64 %val) {
-; CHECK: test2
+; CHECK: test2:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: subs r0, r2
-; CHECK: sbc r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test2:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test3(i64* %ptr, i64 %val) {
-; CHECK: test3
+; CHECK: test3:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: and r0, r2
-; CHECK: and r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test3:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw and i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test4(i64* %ptr, i64 %val) {
-; CHECK: test4
+; CHECK: test4:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: orr r0, r2
-; CHECK: orr r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test4:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw or i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test5(i64* %ptr, i64 %val) {
-; CHECK: test5
+; CHECK: test5:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: eor r0, r2
-; CHECK: eor r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test5:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test6(i64* %ptr, i64 %val) {
-; CHECK: test6
+; CHECK: test6:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test6:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
 
 define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
-; CHECK: test7
+; CHECK: test7:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: cmp r2
-; CHECK: cmpeq r3
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: cmp [[REG1]]
+; CHECK: cmpeq [[REG2]]
 ; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test7:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: cmp [[REG1]]
+; CHECK-THUMB: it eq
+; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB: bne
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst
   ret i64 %r
 }
@@ -100,15 +178,27 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; Compiles down to cmpxchg
 ; FIXME: Should compile to a single ldrexd
 define i64 @test8(i64* %ptr) {
-; CHECK: test8
-; CHECK: ldrexd r2, r3
-; CHECK: cmp r2
-; CHECK: cmpeq r3
+; CHECK: test8:
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: cmp [[REG1]]
+; CHECK: cmpeq [[REG2]]
 ; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test8:
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: cmp [[REG1]]
+; CHECK-THUMB: it eq
+; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB: bne
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   %r = load atomic i64* %ptr seq_cst, align 8
   ret i64 %r
 }
@@ -116,13 +206,131 @@ define i64 @test8(i64* %ptr) {
 ; Compiles down to atomicrmw xchg; there really isn't any more efficient
 ; way to write it.
 define void @test9(i64* %ptr, i64 %val) {
-; CHECK: test9
+; CHECK: test9:
 ; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
 ; CHECK: bne
 ; CHECK: dmb ish
+
+; CHECK-THUMB: test9:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
 }
+
+define i64 @test10(i64* %ptr, i64 %val) {
+; CHECK: test10:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: blt
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+; CHECK-THUMB: test10:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: blt
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
+  %r = atomicrmw min i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test11(i64* %ptr, i64 %val) {
+; CHECK: test11:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: blo
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+
+; CHECK-THUMB: test11:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: blo
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
+  %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test12(i64* %ptr, i64 %val) {
+; CHECK: test12:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bge
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+; CHECK-THUMB: test12:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: bge
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+
+  %r = atomicrmw max i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test13(i64* %ptr, i64 %val) {
+; CHECK: test13:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bhs
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+
+; CHECK-THUMB: test13:
+; CHECK-THUMB: dmb ish
+; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
+; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
+; CHECK-THUMB: bhs
+; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK-THUMB: cmp
+; CHECK-THUMB: bne
+; CHECK-THUMB: dmb ish
+  %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 96e83dd88e92..c5d00a0f8a4c 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -49,3 +49,68 @@ while.body:
 while.end:
   ret void
 }
+
+; Allow partial CPSR dependency when code size is the priority.
+; rdar://12878928
+define void @t3(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind minsize {
+entry:
+; CHECK: t3:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; Avoid producing tMOVi8 after a high-latency flag-setting operation.
+; <rdar://problem/13468102>
+define void @t4(i32* nocapture %p, double* nocapture %q) {
+entry:
+; CHECK: t4
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: if.then
+; CHECK-NOT movs
+  %0 = load double* %q, align 4
+  %cmp = fcmp olt double %0, 1.000000e+01
+  %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  store i32 7, i32* %p, align 4
+  %incdec.ptr2 = getelementptr inbounds i32* %p, i32 2
+  store i32 8, i32* %incdec.ptr1, align 4
+  store i32 9, i32* %incdec.ptr2, align 4
+  br label %if.end
+
+if.else:
+  store i32 3, i32* %p, align 4
+  %incdec.ptr5 = getelementptr inbounds i32* %p, i32 2
+  store i32 5, i32* %incdec.ptr1, align 4
+  store i32 6, i32* %incdec.ptr5, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll
index 519c1353a387..394da9e157ff 100644
--- a/test/CodeGen/ARM/bfx.ll
+++ b/test/CodeGen/ARM/bfx.ll
@@ -26,3 +26,28 @@ define i32 @ubfx2(i32 %a) {
 	ret i32 %t2
 }
 
+; rdar://12870177
+define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {
+entry:
+; CHECK: ubfx_opt
+; CHECK: lsr [[REG1:(lr|r[0-9]+)]], r1, #24
+; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG1]], lsl #2]
+; CHECK: ubfx [[REG2:(lr|r[0-9]+)]], r1, #16, #8
+; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG2]], lsl #2]
+; CHECK: ubfx [[REG3:(lr|r[0-9]+)]], r1, #8, #8
+; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG3]], lsl #2]
+  %and = lshr i32 %x, 8
+  %shr = and i32 %and, 255
+  %and1 = lshr i32 %x, 16
+  %shr2 = and i32 %and1, 255
+  %shr4 = lshr i32 %x, 24
+  %arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
+  %1 = load i32* %arrayidx5, align 4
+  %add = add i32 %1, %0
+  %arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
+  %2 = load i32* %arrayidx6, align 4
+  %add7 = add i32 %add, %2
+  ret i32 %add7
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index 94edff5c0be5..58fbbda0f6bd 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -103,7 +103,6 @@ define i32 @t8(i32 %x) nounwind ssp {
 entry:
 ; CHECKT2D: t8:
 ; CHECKT2D-NOT: push
-; CHECKT2D-NOT
   %and = and i32 %x, 1
   %tobool = icmp eq i32 %and, 0
   br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 00b16888f389..5ec7f74a605f 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:   not grep "bx lr"
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s
 
 	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
 @r = external global [14 x i32]		; <[14 x i32]*> [#uses=4]
@@ -8,6 +7,8 @@
 @numi = external global i32		; <i32*> [#uses=1]
 @counter = external global [2 x i32]		; <[2 x i32]*> [#uses=1]
 
+; CHECK: main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i:
+; CHECK-NOT: bx lr
 
 define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() {
 newFuncRoot:
@@ -50,3 +51,12 @@ bb115.i.i:		; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
 	icmp slt i32 %tmp166.i.i, %tmp168.i.i		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
 }
+
+define void @PR15520(void ()* %fn) {
+  call void %fn()
+  ret void
+
+; CHECK: PR15520:
+; CHECK: mov lr, pc
+; CHECK: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 3ba947579a3a..e7bd5f41bb4b 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -147,7 +147,7 @@ if.end:                                           ; preds = %entry, %if.then
 ; CHECK: vmov.f32 {{.*}}, #1.0
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vorr
-; CHECK: %if.end
+; CHECK: bx
 ; We may leave the last insertelement in the if.end block.
 ; It is inserting the %add value into a dead lane, but %add causes interference
 ; in the entry block, and we don't do dead lane checks across basic blocks.
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
index 7316452cd617..769ba55eb9eb 100644
--- a/test/CodeGen/ARM/commute-movcc.ll
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
-; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-block-placement < %s | FileCheck %s
 
 ; LLVM IR optimizers canonicalize icmp+select this way.
 ; Make sure that TwoAddressInstructionPass can commute the corresponding
diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll
index 0f6f33e0448e..4e3e2010b07a 100644
--- a/test/CodeGen/ARM/crash.ll
+++ b/test/CodeGen/ARM/crash.ll
@@ -69,3 +69,26 @@ bb:
   store <4 x float> %tmp154, <4 x float>* undef, align 16
   ret void
 }
+
+; <rdar://problem/12721258>
+%A = type { %B }
+%B = type { i32 }
+
+define void @_Z3Foov() ssp {
+entry:
+  br i1 true, label %exit, label %false
+
+false:
+  invoke void undef(%A* undef)
+          to label %exit unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+
+exit:
+  ret void
+}
+
+declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index a7b44e6fe709..33c8e9daae69 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -30,29 +30,27 @@ declare void @foobar(i64, i64)
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv.foo = !{!5, !13, !14, !17, !18, !19}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
-!5 = metadata !{i32 590081, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589843, metadata !0, metadata !"tag_s", metadata !2, i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !11, metadata !12}
-!9 = metadata !{i32 589837, metadata !7, metadata !"x", metadata !2, i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 589837, metadata !7, metadata !"y", metadata !2, i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
-!12 = metadata !{i32 589837, metadata !7, metadata !"z", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!13 = metadata !{i32 590081, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 590081, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 589846, metadata !0, metadata !"UInt64", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
-!16 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 590081, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 590081, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"y", i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"z", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 786454, metadata !32, metadata !0, metadata !"UInt64", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
+!16 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !20 = metadata !{i32 11, i32 24, metadata !1, null}
 !21 = metadata !{i32 11, i32 44, metadata !1, null}
 !22 = metadata !{i32 11, i32 54, metadata !1, null}
@@ -60,6 +58,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !24 = metadata !{i32 11, i32 81, metadata !1, null}
 !25 = metadata !{i32 11, i32 101, metadata !1, null}
 !26 = metadata !{i32 12, i32 3, metadata !27, null}
-!27 = metadata !{i32 589835, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 13, i32 5, metadata !27, null}
 !29 = metadata !{i32 14, i32 1, metadata !27, null}
+!30 = metadata !{metadata !1}
+!31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19}
+!32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 0ad0a15ca3d9..d0bfecc5af41 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -93,153 +93,166 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 }
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.enum = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
-!llvm.dbg.sp = !{!23}
 
-!0 = metadata !{i32 589841, i32 0, i32 16, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !"Apple clang version 2.1", i1 true, i1 false, metadata !"", i32 2} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589828, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!2 = metadata !{i32 589865, metadata !"header.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, i32 16, metadata !40, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
 !3 = metadata !{metadata !4}
-!4 = metadata !{i32 589864, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
-!5 = metadata !{i32 589828, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!6 = metadata !{i32 589865, metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
+!5 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8}
-!8 = metadata !{i32 589864, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
-!9 = metadata !{i32 589828, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!10 = metadata !{i32 589865, metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
+!9 = metadata !{i32 786433, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 589864, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
-!13 = metadata !{i32 589864, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
-!14 = metadata !{i32 589828, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!15 = metadata !{i32 589865, metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
+!13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
+!14 = metadata !{i32 786433, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ]
 !16 = metadata !{metadata !17, metadata !18}
-!17 = metadata !{i32 589864, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
-!18 = metadata !{i32 589864, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
-!19 = metadata !{i32 589828, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!20 = metadata !{i32 589865, metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
+!18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
+!19 = metadata !{i32 786433, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ]
 !21 = metadata !{metadata !22}
-!22 = metadata !{i32 589864, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
-!23 = metadata !{i32 589870, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 589865, metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 589845, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
+!23 = metadata !{i32 786478, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !26 = metadata !{null}
-!27 = metadata !{i32 590081, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 589843, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
-!31 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!32 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
-!34 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 589837, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!36 = metadata !{i32 589837, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
-!37 = metadata !{i32 589837, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{i32 589843, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!40 = metadata !{i32 589865, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__reserved", i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
+!37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ]
 !41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
-!42 = metadata !{i32 589837, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
-!43 = metadata !{i32 589860, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!44 = metadata !{i32 589837, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
-!45 = metadata !{i32 589837, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
-!46 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!47 = metadata !{i32 589837, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
-!48 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
-!49 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
-!50 = metadata !{i32 589843, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
+!43 = metadata !{i32 786468, null, metadata !0, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!44 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"Size", i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"CopyFuncPtr", i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
+!49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
+!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
-!52 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!53 = metadata !{i32 589837, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
-!54 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!55 = metadata !{i32 589837, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
-!56 = metadata !{i32 589837, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
-!57 = metadata !{i32 589837, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
-!58 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
-!59 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
-!60 = metadata !{i32 589843, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!61 = metadata !{i32 589865, metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
+!54 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!55 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__size", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
+!56 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__copy_helper", i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
+!57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
+!58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
+!59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
+!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ]
 !62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
-!63 = metadata !{i32 589852, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!64 = metadata !{i32 589843, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!65 = metadata !{i32 589865, metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ]
 !66 = metadata !{metadata !67}
-!67 = metadata !{i32 589837, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!68 = metadata !{i32 589846, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
-!69 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 589843, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!71 = metadata !{i32 589837, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!72 = metadata !{i32 589846, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
-!73 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
-!74 = metadata !{i32 589862, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
-!75 = metadata !{i32 589837, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!76 = metadata !{i32 589846, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
-!77 = metadata !{i32 589865, metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
-!78 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!79 = metadata !{i32 589837, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!80 = metadata !{i32 589843, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!68 = metadata !{i32 786454, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
+!69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 786451, metadata !40, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!72 = metadata !{i32 786454, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
+!73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{i32 786470, null, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
+!75 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_scale", i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!76 = metadata !{i32 786454, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
+!77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ]
+!78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
-!82 = metadata !{i32 589837, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
-!83 = metadata !{i32 589860, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!84 = metadata !{i32 589837, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
-!85 = metadata !{i32 589837, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
-!86 = metadata !{i32 589837, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
-!87 = metadata !{i32 589837, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
-!88 = metadata !{i32 589837, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
-!89 = metadata !{i32 589837, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
-!90 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
-!91 = metadata !{i32 589843, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
+!83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!84 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"mydataO", i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
+!85 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"cached", i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
+!86 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasBeenCached", i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
+!87 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasPattern", i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
+!88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
+!89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
+!90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
+!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
 !92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
-!93 = metadata !{i32 589852, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
-!94 = metadata !{i32 589843, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!95 = metadata !{i32 589865, metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!93 = metadata !{i32 786460, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
+!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ]
 !96 = metadata !{metadata !97}
-!97 = metadata !{i32 589852, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!98 = metadata !{i32 589837, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!99 = metadata !{i32 589846, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
-!100 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!101 = metadata !{i32 589837, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!102 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
-!103 = metadata !{i32 589843, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
-!104 = metadata !{i32 589865, metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!98 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_itemID", i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!99 = metadata !{i32 786454, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
+!100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
+!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ]
 !105 = metadata !{metadata !106}
-!106 = metadata !{i32 589852, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!107 = metadata !{i32 589837, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!108 = metadata !{i32 589846, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
-!109 = metadata !{i32 589843, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!108 = metadata !{i32 786454, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
+!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !110 = metadata !{metadata !111, metadata !117}
-!111 = metadata !{i32 589837, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
-!112 = metadata !{i32 589846, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
-!113 = metadata !{i32 589843, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
+!112 = metadata !{i32 786454, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
+!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !114 = metadata !{metadata !115, metadata !116}
-!115 = metadata !{i32 589837, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!116 = metadata !{i32 589837, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!117 = metadata !{i32 589837, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
-!118 = metadata !{i32 589846, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
-!119 = metadata !{i32 589843, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
+!118 = metadata !{i32 786454, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
+!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !120 = metadata !{metadata !121, metadata !122}
-!121 = metadata !{i32 589837, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!122 = metadata !{i32 589837, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!123 = metadata !{i32 589837, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!124 = metadata !{i32 589837, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
-!125 = metadata !{i32 589846, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
-!126 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
-!127 = metadata !{i32 589843, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!128 = metadata !{i32 589865, metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!123 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_data", i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
+!125 = metadata !{i32 786454, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
+!126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
+!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ]
 !129 = metadata !{i32 609, i32 144, metadata !23, null}
-!130 = metadata !{i32 590081, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0} ; [ DW_TAG_arg_variable ]
+!130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ]
 !131 = metadata !{i32 609, i32 155, metadata !23, null}
-!132 = metadata !{i32 590081, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!132 = metadata !{i32 786689, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
 !133 = metadata !{i32 609, i32 175, metadata !23, null}
-!134 = metadata !{i32 590081, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!134 = metadata !{i32 786689, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
 !135 = metadata !{i32 609, i32 190, metadata !23, null}
-!136 = metadata !{i32 590080, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!136 = metadata !{i32 786688, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, null, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
 !137 = metadata !{i32 604, i32 49, metadata !23, null}
-!138 = metadata !{i32 590080, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
-!139 = metadata !{i32 590080, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
+!138 = metadata !{i32 786688, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, null, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!139 = metadata !{i32 786688, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, null, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
 !140 = metadata !{i32 607, i32 30, metadata !23, null}
 !141 = metadata !{i32 610, i32 17, metadata !142, null}
-!142 = metadata !{i32 589835, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
+!142 = metadata !{i32 786443, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
 !143 = metadata !{i32 611, i32 17, metadata !142, null}
 !144 = metadata !{i32 612, i32 17, metadata !142, null}
 !145 = metadata !{i32 613, i32 17, metadata !142, null}
 !146 = metadata !{i32 615, i32 13, metadata !142, null}
+!147 = metadata !{metadata !1, metadata !1, metadata !5, metadata !5, metadata !9, metadata !14, metadata !19, metadata !19, metadata !14, metadata !14, metadata !14, metadata !19, metadata !19, metadata !19}
+!148 = metadata !{metadata !23}
+!149 = metadata !{metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm"}
+!150 = metadata !{metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm"}
+!151 = metadata !{metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm"}
+!152 = metadata !{metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm"}
+!153 = metadata !{metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm"}
+!154 = metadata !{metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm"}
+!155 = metadata !{metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm"}
+!156 = metadata !{metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm"}
+!157 = metadata !{metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm"}
+!158 = metadata !{metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm"}
+!159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"}
+!160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"}
+!161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 4f4ff8e81707..95e6cf2554a0 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: 	vadd.f32	q4, q8, q8
-;CHECK-NEXT: Ltmp1
+;CHECK-NEXT: LBB0_1
 
 ;CHECK:@DEBUG_VALUE: x <- Q4+0
 ;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
@@ -38,58 +38,59 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !10, !14}
-!llvm.dbg.lv.test0001 = !{!18}
-!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
-!llvm.dbg.lv.printFV = !{!30}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null}
-!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
+!50 = metadata !{metadata !0, metadata !10, metadata !14}
+!51 = metadata !{metadata !18}
+!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29}
+!53 = metadata !{metadata !30}
+!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
+!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 325eea00c8d6..e3e4d068932e 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -56,44 +56,41 @@ entry:
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!llvm.dbg.sp = !{!0, !9, !10}
-!llvm.dbg.lv.printer = !{!16, !17, !18}
-!llvm.dbg.lv.inlineprinter = !{!19, !20, !21}
-!llvm.dbg.lv.main = !{!22, !23, !24}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
-!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589860, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 589860, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786468, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786468, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !5, metadata !5, metadata !13}
-!13 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 589860, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590081, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 590081, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 590080, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 589835, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786689, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786689, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 786688, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 4, i32 0, metadata !9, null}
 !27 = metadata !{i32 6, i32 0, metadata !28, null}
-!28 = metadata !{i32 589835, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !1, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 0, metadata !28, null}
 !30 = metadata !{i32 11, i32 0, metadata !0, null}
 !31 = metadata !{i32 13, i32 0, metadata !32, null}
-!32 = metadata !{i32 589835, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!32 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !33 = metadata !{i32 14, i32 0, metadata !32, null}
 !34 = metadata !{i32 17, i32 0, metadata !10, null}
 !35 = metadata !{i32 19, i32 0, metadata !25, null}
@@ -103,3 +100,8 @@ declare i32 @puts(i8* nocapture) nounwind
 !39 = metadata !{i32 6, i32 0, metadata !28, metadata !37}
 !40 = metadata !{i32 22, i32 0, metadata !25, null}
 !41 = metadata !{i32 23, i32 0, metadata !25, null}
+!42 = metadata !{metadata !0, metadata !9, metadata !10}
+!43 = metadata !{metadata !16, metadata !17, metadata !18}
+!44 = metadata !{metadata !19, metadata !20, metadata !21}
+!45 = metadata !{metadata !22, metadata !23, metadata !24}
+!46 = metadata !{metadata !"a.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 97c9c66c58aa..038c2296cdbe 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -35,58 +35,61 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !10, !14}
-!llvm.dbg.lv.test0001 = !{!18}
-!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29}
-!llvm.dbg.lv.printFV = !{!30}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786478, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
+!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null}
-!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
+!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
 !34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
+!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
+!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
+!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
 !44 = metadata !{i32 75, i32 5, metadata !42, null}
 !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ]
+!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
+!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
 !48 = metadata !{i32 95, i32 3, metadata !25, null}
 !49 = metadata !{i32 99, i32 3, metadata !25, null}
+!50 = metadata !{metadata !0, metadata !10, metadata !14}
+!51 = metadata !{metadata !18}
+!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29}
+!53 = metadata !{metadata !30}
+!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
+!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index db41143fb3b1..f3af0b93c69c 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -61,46 +61,43 @@ declare i32 @puts(i8* nocapture) nounwind optsize
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !6, !7}
-!llvm.dbg.lv.inlineprinter = !{!8, !10, !12}
-!llvm.dbg.lv.printer = !{!14, !15, !16}
-!llvm.dbg.lv.main = !{!17, !18, !22}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 590081, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 590081, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 590081, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 590081, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 590081, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!22 = metadata !{i32 590080, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 589835, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 786689, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786689, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!22 = metadata !{i32 786688, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0, null} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 786443, metadata !1, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 4, i32 22, metadata !0, null}
 !25 = metadata !{i32 4, i32 33, metadata !0, null}
 !26 = metadata !{i32 4, i32 52, metadata !0, null}
 !27 = metadata !{i32 6, i32 3, metadata !28, null}
-!28 = metadata !{i32 589835, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 7, i32 3, metadata !28, null}
 !30 = metadata !{i32 11, i32 42, metadata !6, null}
 !31 = metadata !{i32 11, i32 53, metadata !6, null}
 !32 = metadata !{i32 11, i32 72, metadata !6, null}
 !33 = metadata !{i32 13, i32 3, metadata !34, null}
-!34 = metadata !{i32 589835, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 786443, metadata !1, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
 !35 = metadata !{i32 14, i32 3, metadata !34, null}
 !36 = metadata !{i32 17, i32 15, metadata !7, null}
 !37 = metadata !{i32 17, i32 28, metadata !7, null}
@@ -113,3 +110,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !44 = metadata !{i32 6, i32 3, metadata !28, metadata !40}
 !45 = metadata !{i32 22, i32 3, metadata !23, null}
 !46 = metadata !{i32 23, i32 1, metadata !23, null}
+!47 = metadata !{metadata !0, metadata !6, metadata !7}
+!48 = metadata !{metadata !8, metadata !10, metadata !12}
+!49 = metadata !{metadata !14, metadata !15, metadata !16}
+!50 = metadata !{metadata !17, metadata !18, metadata !22}
+!51 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ae7af0afad50..ae02a245b432 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -40,22 +40,23 @@ declare float @_Z2f3f(float) optsize
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv._Z3foov = !{!5, !8}
 
-!0 = metadata !{i32 589841, i32 0, i32 4, metadata !"k.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130845)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"k.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
-!5 = metadata !{i32 590080, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 589835, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 590080, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 589835, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 589835, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 786443, metadata !2, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786688, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 786443, metadata !2, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 6, i32 18, metadata !6, null}
 !12 = metadata !{i32 7, i32 3, metadata !6, null}
 !13 = metadata !{i32 8, i32 20, metadata !9, null}
 !14 = metadata !{i32 7, i32 20, metadata !10, null}
 !15 = metadata !{i32 10, i32 1, metadata !6, null}
+!16 = metadata !{metadata !1}
+!17 = metadata !{metadata !5, metadata !8}
+!18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"}
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index a5c41144584c..b5586cc99fc1 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -78,7 +78,7 @@ define float @test_ineligible(float, float %in) {
   ; use-def chains would be messed up. Primarily a compile-test (we used to
   ; internal fault).
   call void @bar()
-; CHECL: bl bar
+; CHECK: bl bar
 ; CHECK: vext.32
 ; CHECK: vext.32
   ret float %val
@@ -98,3 +98,23 @@ define i32 @test_vmovs_no_sreg(i32 %in) {
 
   ret i32 %resi
 }
+
+
+; The point of this test is:
+;   + Make sure s1 is live before the BL
+;   + Make sure s1 is clobbered by the BL
+;   + Convince LLVM to emit a VMOV to S0
+;   + Convince LLVM to domain-convert this.
+
+; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
+; because it's dead.
+
+declare float @clobbers_s1(float, float)
+
+define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
+  %elt = call float @clobbers_s1(float %val, float %val)
+
+  %vec = insertelement <2 x float> %invec, float %elt, i32 0
+  %res = fadd <2 x float> %vec, %vec
+  ret <2 x float> %res
+}
diff --git a/test/CodeGen/ARM/eh-dispcont.ll b/test/CodeGen/ARM/eh-dispcont.ll
new file mode 100644
index 000000000000..935965bbdf8b
--- /dev/null
+++ b/test/CodeGen/ARM/eh-dispcont.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple armv7-apple-ios -relocation-model=pic -o - %s | FileCheck %s -check-prefix=ARM-PIC
+; RUN: llc -mtriple armv7-apple-ios -relocation-model=static -o - %s | FileCheck %s -check-prefix=ARM-NOPIC
+; RUN: llc -mtriple armv7-apple-ios -relocation-model=dynamic-no-pic -o - %s | FileCheck %s -check-prefix=ARM-NOPIC
+; RUN: llc -mtriple thumbv6-apple-ios -relocation-model=pic -o - %s | FileCheck %s -check-prefix=THUMB1-PIC
+; RUN: llc -mtriple thumbv6-apple-ios -relocation-model=static -o - %s | FileCheck %s -check-prefix=THUMB1-NOPIC
+; RUN: llc -mtriple thumbv6-apple-ios -relocation-model=dynamic-no-pic -o - %s | FileCheck %s -check-prefix=THUMB1-NOPIC
+
+@_ZTIi = external constant i8*
+
+define i32 @main() #0 {
+entry:
+  %exception = tail call i8* @__cxa_allocate_exception(i32 4) #1
+  %0 = bitcast i8* %exception to i32*
+  store i32 1, i32* %0, align 4
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #2
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = tail call i8* @__cxa_begin_catch(i8* %2) #1
+  tail call void @__cxa_end_catch()
+  ret i32 0
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_sj0(...)
+
+attributes #0 = { ssp }
+attributes #1 = { nounwind }
+attributes #2 = { noreturn }
+
+; ARM-PIC: cxa_throw
+; ARM-PIC: trap
+; ARM-PIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; ARM-PIC: ldr [[REG0:r[0-9]+]], [r{{[0-9]+}}, [[REG1]]]
+; ARM-PIC: add pc, [[REG0]], [[REG1]]
+; ARM-PIC: [[LJTI]]
+; ARM-PIC: .data_region jt32
+; ARM-PIC: .long [[LABEL:LBB0_[0-9]]]-[[LJTI]]
+; ARM-PIC: .end_data_region
+; ARM-PIC: [[LABEL]]
+
+; ARM-NOPIC: cxa_throw
+; ARM-NOPIC: trap
+; ARM-NOPIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; ARM-NOPIC: ldr [[REG0:r[0-9]+]], [r{{[0-9]+}}, [[REG1]]]
+; ARM-NOPIC: mov pc, [[REG0]]
+; ARM-NOPIC: [[LJTI]]
+; ARM-NOPIC: .data_region jt32
+; ARM-NOPIC: .long [[LABEL:LBB0_[0-9]]]
+; ARM-NOPIC: .end_data_region
+; ARM-NOPIC: [[LABEL]]
+
+; THUMB1-PIC: cxa_throw
+; THUMB1-PIC: trap
+; THUMB1-PIC: adr [[REG0:r[0-9]+]], [[LJTI:.*]]
+; THUMB1-PIC: adds [[REG1:r[0-9]+]], [[REG1]], [[REG0]]
+; THUMB1-PIC: ldr [[REG1]]
+; THUMB1-PIC: adds [[REG0]], [[REG1]], [[REG0]]
+; THUMB1-PIC: mov pc, [[REG0]]
+; THUMB1-PIC: [[LJTI]]
+; THUMB1-PIC: .data_region jt32
+; THUMB1-PIC: .long [[LABEL:LBB0_[0-9]]]-[[LJTI]]
+; THUMB1-PIC: .end_data_region
+; THUMB1-PIC: [[LABEL]]
+
+; THUMB1-NOPIC: cxa_throw
+; THUMB1-NOPIC: trap
+; THUMB1-NOPIC: adr [[REG1:r[0-9]+]], [[LJTI:.*]]
+; THUMB1-NOPIC: adds [[REG0:r[0-9]+]], [[REG0]], [[REG1]]
+; THUMB1-NOPIC: ldr [[REG0]]
+; THUMB1-NOPIC: mov pc, [[REG0]]
+; THUMB1-NOPIC: [[LJTI]]
+; THUMB1-NOPIC: .data_region jt32
+; THUMB1-NOPIC: .long [[LABEL:LBB0_[0-9]]]+1
+; THUMB1-NOPIC: .end_data_region
+; THUMB1-NOPIC: [[LABEL]]
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
new file mode 100644
index 000000000000..c42839d9fe3d
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -0,0 +1,77 @@
+; RUN: llc -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+@_ZTIi = external constant i8*
+
+declare void @_Z3foov() noreturn;
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+define i32 @main() {
+; CHECK: main:
+entry:
+  %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
+  %0 = bitcast i8* %exception.i to i32*
+  store i32 42, i32* %0, align 4, !tbaa !0
+  invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+          to label %unreachable.i unwind label %lpad.i
+
+lpad.i:                                           ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [1 x i8*] [i8* bitcast (i8** @_ZTIi to i8*)]
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK: .long	_ZTIi(target2)          @ TypeInfo 1
+; CHECK: .long	_ZTIi(target2)          @ FilterInfo -1
+  %2 = extractvalue { i8*, i32 } %1, 1
+  %ehspec.fails.i = icmp slt i32 %2, 0
+  br i1 %ehspec.fails.i, label %ehspec.unexpected.i, label %lpad.body
+
+ehspec.unexpected.i:                              ; preds = %lpad.i
+  %3 = extractvalue { i8*, i32 } %1, 0
+  invoke void @__cxa_call_unexpected(i8* %3) noreturn
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %ehspec.unexpected.i
+  unreachable
+
+unreachable.i:                                    ; preds = %entry
+  unreachable
+
+lpad:                                             ; preds = %ehspec.unexpected.i
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %lpad.body
+
+lpad.body:                                        ; preds = %lpad.i, %lpad
+  %eh.lpad-body = phi { i8*, i32 } [ %4, %lpad ], [ %1, %lpad.i ]
+  %5 = extractvalue { i8*, i32 } %eh.lpad-body, 1
+  %6 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %5, %6
+  br i1 %matches, label %try.cont, label %eh.resume
+
+try.cont:                                         ; preds = %lpad.body
+  %7 = extractvalue { i8*, i32 } %eh.lpad-body, 0
+  %8 = tail call i8* @__cxa_begin_catch(i8* %7) nounwind
+  tail call void @__cxa_end_catch() nounwind
+  ret i32 0
+
+eh.resume:                                        ; preds = %lpad.body
+  resume { i8*, i32 } %eh.lpad-body
+}
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/ehabi-mc-cantunwind.ll b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
new file mode 100644
index 000000000000..698d76e56580
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-cantunwind.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @test() nounwind {
+entry:
+  ret void
+}
+
+; CHECK: section .text
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 01000000
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
new file mode 100644
index 000000000000..5e4b5096c494
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll
@@ -0,0 +1,79 @@
+; Test section group of the function with linkonce_odr
+
+; The instantiation of C++ function template will come with linkonce_odr,
+; which indicates that the linker can remove the duplicated instantiation.
+; However, to make this feature work, we have to group the section properly.
+; .text, .ARM.extab, and .ARM.exidx should be grouped together.
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | elf-dump --dump-section-data \
+; RUN:   | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv4t--linux-gnueabi"
+
+define void @_Z11instantiatev() {
+entry:
+  tail call void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 1, i32 2, i32 3, i32 4, i32 5, double 1.000000e-01, double 2.000000e-01, double 3.000000e-01, double 4.000000e-01, double 5.000000e-01)
+  ret void
+}
+
+define linkonce_odr void @_Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK:      # Section 1
+; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group'
+; CHECK:       ('_section_data', '01000000 0a000000 0c000000 0e000000')
+; CHECK:      # Section 10
+; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:      # Section 12
+; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:      # Section 14
+; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
new file mode 100644
index 000000000000..fc51b240ff3d
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-section.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK: section .test_section
+; CHECK: section .ARM.extab.test_section
+; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK: section .ARM.exidx.test_section
+; CHECK-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
new file mode 100644
index 000000000000..f90e5f384c1e
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
@@ -0,0 +1,47 @@
+; Test the sh_link in Elf32_Shdr.
+
+; The .ARM.exidx section should be linked with corresponding text section.
+; The sh_link in Elf32_Shdr should be filled with the section index of
+; the text section.
+
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | elf-dump --dump-section-data \
+; RUN:   | FileCheck %s
+
+define void @test1() nounwind {
+entry:
+  ret void
+}
+
+define void @test2() nounwind section ".test_section" {
+entry:
+  ret void
+}
+
+; CHECK: # Section 1
+; CHECK-NEXT: (('sh_name', 0x00000010) # '.text'
+
+; CHECK:      (('sh_name', 0x00000005) # '.ARM.exidx'
+; CHECK-NEXT:  ('sh_type', 0x70000001)
+; CHECK-NEXT:  ('sh_flags', 0x00000082)
+; CHECK-NEXT:  ('sh_addr', 0x00000000)
+; CHECK-NEXT:  ('sh_offset', 0x0000005c)
+; CHECK-NEXT:  ('sh_size', 0x00000008)
+; CHECK-NEXT:  ('sh_link',  0x00000001)
+; CHECK-NEXT:  ('sh_info',  0x00000000)
+; CHECK-NEXT:  ('sh_addralign',  0x00000004)
+
+; CHECK: # Section 7
+; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section'
+
+; CHECK:      (('sh_name', 0x0000002f) # '.ARM.exidx.test_section'
+; CHECK-NEXT:  ('sh_type', 0x70000001)
+; CHECK-NEXT:  ('sh_flags', 0x00000082)
+; CHECK-NEXT:  ('sh_addr', 0x00000000)
+; CHECK-NEXT:  ('sh_offset', 0x00000068)
+; CHECK-NEXT:  ('sh_size', 0x00000008)
+; CHECK-NEXT:  ('sh_link',  0x00000007)
+; CHECK-NEXT:  ('sh_info',  0x00000000)
+; CHECK-NEXT:  ('sh_addralign',  0x00000004)
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
new file mode 100644
index 000000000000..0dc2ef7838f0
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s
+
+define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
+entry:
+  invoke void @_Z5printiiiii(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5)
+          to label %try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+  invoke void @_Z5printddddd(double %v1, double %v2, double %v3, double %v4, double %v5)
+          to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:                                     ; preds = %lpad
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:                                         ; preds = %entry, %invoke.cont2
+  ret void
+
+lpad1:                                            ; preds = %lpad
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @__cxa_end_catch()
+          to label %eh.resume unwind label %terminate.lpad
+
+eh.resume:                                        ; preds = %lpad1
+  resume { i8*, i32 } %3
+
+terminate.lpad:                                   ; preds = %lpad1
+  %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  tail call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_Z5printiiiii(i32, i32, i32, i32, i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_Z5printddddd(double, double, double, double, double)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+; CHECK: section .text
+; CHECK: section .ARM.extab
+; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-no-landingpad.ll b/test/CodeGen/ARM/ehabi-no-landingpad.ll
new file mode 100644
index 000000000000..ac0dff421a6f
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-no-landingpad.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi \
+; RUN:   -arm-enable-ehabi -arm-enable-ehabi-descriptors | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-unknown-linux-gnueabi"
+
+define void @_Z4testv() {
+; CHECK: _Z4testv
+; CHECK: .fnstart
+; CHECK: .size
+; CHECK-NOT: .handlerdata
+; CHECK: .fnend
+entry:
+  call void @_Z15throw_exceptionv()
+  ret void
+}
+
+declare void @_Z15throw_exceptionv()
diff --git a/test/CodeGen/ARM/elf-lcomm-align.ll b/test/CodeGen/ARM/elf-lcomm-align.ll
index 46792990e593..a98b3c06f5e2 100644
--- a/test/CodeGen/ARM/elf-lcomm-align.ll
+++ b/test/CodeGen/ARM/elf-lcomm-align.ll
@@ -4,8 +4,9 @@
 @c = internal global i8 0, align 1
 @x = internal global i32 0, align 4
 
-; CHECK: .lcomm c,1
-; .lcomm doesn't support alignment.
+; .lcomm doesn't support alignment, so we always use .local/.comm.
+; CHECK: .local c
+; CHECK-NEXT: .comm c,1,1
 ; CHECK: .local x
 ; CHECK-NEXT: .comm x,4,4
 
diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll
new file mode 100644
index 000000000000..8fd6b6bd777a
--- /dev/null
+++ b/test/CodeGen/ARM/extload-knownzero.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; rdar://12771555
+
+define void @foo(i16* %ptr, i32 %a) nounwind {
+entry:
+; CHECK: foo:
+  %tmp1 = icmp ult i32 %a, 100
+  br i1 %tmp1, label %bb1, label %bb2
+bb1:
+; CHECK: ldrh
+  %tmp2 = load i16* %ptr, align 2
+  br label %bb2
+bb2:
+; CHECK-NOT: uxth
+; CHECK: cmp
+  %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
+  %cmp = icmp ult i16 %tmp3, 24
+  br i1 %cmp, label %bb3, label %exit
+bb3:
+  call void @bar() nounwind
+  br label %exit
+exit:
+  ret void
+}
+
+declare void @bar () 
diff --git a/test/CodeGen/ARM/fabs-neon.ll b/test/CodeGen/ARM/fabs-neon.ll
new file mode 100644
index 000000000000..614117ff7bca
--- /dev/null
+++ b/test/CodeGen/ARM/fabs-neon.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-eabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s
+
+; CHECK: test:
+; CHECK:         vabs.f32        q0, q0
+define <4 x float> @test(<4 x float> %a) {
+  %foo = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+  ret <4 x float> %foo
+}
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+
+; CHECK: test2:
+; CHECK:        vabs.f32        d0, d0
+define <2 x float> @test2(<2 x float> %a) {
+  %foo = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+    ret <2 x float> %foo
+}
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 46c2f1c65fe5..c3e00ce47019 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -14,12 +14,12 @@ entry:
 declare float @fabsf(float)
 
 ; VFP2: test:
-; VFP2: 	vabs.f32	s2, s2
+; VFP2: 	vabs.f32	s
 
 ; NFP1: test:
-; NFP1: 	vabs.f32	d1, d1
+; NFP1: 	vabs.f32	d
 ; NFP0: test:
-; NFP0: 	vabs.f32	s2, s2
+; NFP0: 	vabs.f32	s
 
 ; CORTEXA8: test:
 ; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll
index 48ef5ed88fb0..c7e2f5d094b8 100644
--- a/test/CodeGen/ARM/fadds.ll
+++ b/test/CodeGen/ARM/fadds.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
@@ -18,6 +20,8 @@ entry:
 ; NFP0: 	vadd.f32	s
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vadd.f32	d
+; CORTEXA8: 	vadd.f32	s
+; CORTEXA8U: test:
+; CORTEXA8U: 	vadd.f32	d
 ; CORTEXA9: test:
-; CORTEXA9: 	vadd.f32	s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: 	vadd.f32	s
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index dbb634df0a1e..60bc6a62f5d3 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 %struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
 %struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 7c532d5fba38..4e6efd248997 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index 14721a4d8024..b6f201728c2b 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Fast-isel can't handle non-double multi-reg retvals.
 ; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index 370c70f174fd..8fb4b66b7dd4 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
 
 %union.anon = type { <16 x i32> }
 
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index aa0629928846..f245168a8e30 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
 ; rdar://9515076
 ; (Make sure this doesn't crash.)
 
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 7e147c7b4d7d..3a943d854b4a 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index 61bd18504c5c..7a65295f01b6 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 @a = global i8 1, align 1
 @b = global i16 2, align 2
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index 8f7b2943b56d..c256e73ab98c 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
 
 define i8* @frameaddr_index0() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 8764bef7dab9..8357ed5c549c 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,6 +1,21 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
+define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_signed
+; ARM: sxth r0, r0
+; ARM: sxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_signed
+; THUMB: sxth r0, r0
+; THUMB: sxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp slt i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
 ; ARM: icmp_i16_unsigned
@@ -31,6 +46,21 @@ entry:
   ret i32 %conv2
 }
 
+define i32 @icmp_i8_unsigned(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_unsigned
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_unsigned
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp ugt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
 define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
 entry:
 ; ARM: icmp_i1_unsigned
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
index be8035ec794d..ebc0e8426d55 100644
--- a/test/CodeGen/ARM/fast-isel-indirectbr.ll
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1(i8* %x) {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index b73fceff6cd0..48105dd3893b 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -35,7 +35,7 @@ define void @t1() nounwind ssp {
 ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
-  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false)
   ret void
 }
 
@@ -73,7 +73,7 @@ define void @t2() nounwind ssp {
 ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false)
   ret void
 }
 
@@ -125,6 +125,7 @@ define void @t4() nounwind ssp {
 ; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
+; THUMB: t4
 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
 ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
@@ -135,8 +136,117 @@ define void @t4() nounwind ssp {
 ; THUMB: ldrh r1, [r0, #24]
 ; THUMB: strh r1, [r0, #12]
 ; THUMB: bx lr
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false)
   ret void
 }
 
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t5() nounwind ssp {
+; ARM: t5
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldrh r1, [r0, #16]
+; ARM: strh r1, [r0, #4]
+; ARM: ldrh r1, [r0, #18]
+; ARM: strh r1, [r0, #6]
+; ARM: ldrh r1, [r0, #20]
+; ARM: strh r1, [r0, #8]
+; ARM: ldrh r1, [r0, #22]
+; ARM: strh r1, [r0, #10]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: t5
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldrh r1, [r0, #16]
+; THUMB: strh r1, [r0, #4]
+; THUMB: ldrh r1, [r0, #18]
+; THUMB: strh r1, [r0, #6]
+; THUMB: ldrh r1, [r0, #20]
+; THUMB: strh r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #22]
+; THUMB: strh r1, [r0, #10]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false)
+  ret void
+}
+
+define void @t6() nounwind ssp {
+; ARM: t6
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldrb r1, [r0, #16]
+; ARM: strb r1, [r0, #4]
+; ARM: ldrb r1, [r0, #17]
+; ARM: strb r1, [r0, #5]
+; ARM: ldrb r1, [r0, #18]
+; ARM: strb r1, [r0, #6]
+; ARM: ldrb r1, [r0, #19]
+; ARM: strb r1, [r0, #7]
+; ARM: ldrb r1, [r0, #20]
+; ARM: strb r1, [r0, #8]
+; ARM: ldrb r1, [r0, #21]
+; ARM: strb r1, [r0, #9]
+; ARM: ldrb r1, [r0, #22]
+; ARM: strb r1, [r0, #10]
+; ARM: ldrb r1, [r0, #23]
+; ARM: strb r1, [r0, #11]
+; ARM: ldrb r1, [r0, #24]
+; ARM: strb r1, [r0, #12]
+; ARM: ldrb r1, [r0, #25]
+; ARM: strb r1, [r0, #13]
+; ARM: bx lr
+; THUMB: t6
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldrb r1, [r0, #16]
+; THUMB: strb r1, [r0, #4]
+; THUMB: ldrb r1, [r0, #17]
+; THUMB: strb r1, [r0, #5]
+; THUMB: ldrb r1, [r0, #18]
+; THUMB: strb r1, [r0, #6]
+; THUMB: ldrb r1, [r0, #19]
+; THUMB: strb r1, [r0, #7]
+; THUMB: ldrb r1, [r0, #20]
+; THUMB: strb r1, [r0, #8]
+; THUMB: ldrb r1, [r0, #21]
+; THUMB: strb r1, [r0, #9]
+; THUMB: ldrb r1, [r0, #22]
+; THUMB: strb r1, [r0, #10]
+; THUMB: ldrb r1, [r0, #23]
+; THUMB: strb r1, [r0, #11]
+; THUMB: ldrb r1, [r0, #24]
+; THUMB: strb r1, [r0, #12]
+; THUMB: ldrb r1, [r0, #25]
+; THUMB: strb r1, [r0, #13]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+; rdar://13202135
+define void @t7() nounwind ssp {
+; Just make sure this doesn't assert when we have an odd length and an alignment of 2.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false)
+  ret void
+}
+
+define i32 @t8(i32 %x) nounwind {
+entry:
+; ARM: t8
+; ARM-NOT: FastISel missed call:   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
+; THUMB: t8
+; THUMB-NOT: FastISel missed call:   %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
+  %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
+  ret i32 %expval
+}
+
+declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index e8cc2b238dff..0b5267ddc973 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -103,7 +103,7 @@ entry:
 ; ARM: t11
   %add.ptr = getelementptr inbounds i16* %a, i64 8
   store i16 0, i16* %add.ptr, align 2
-; ARM strh r{{[1-9]}}, [r0, #16]
+; ARM: strh r{{[1-9]}}, [r0, #16]
   ret void
 }
 
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
index 8de54ad5332b..27731def1f57 100644
--- a/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
 
 define i32 @main() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index e50c3a4954e1..563880dab0a9 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index a86e3251f715..e8759a7fc4ce 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll
index 8fab00213585..8f13f395e078 100644
--- a/test/CodeGen/ARM/fdivs.ll
+++ b/test/CodeGen/ARM/fdivs.ll
@@ -10,14 +10,14 @@ entry:
 }
 
 ; VFP2: test:
-; VFP2: 	vdiv.f32	s0, s2, s0
+; VFP2: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
 ; NFP1: test:
-; NFP1: 	vdiv.f32	s0, s2, s0
+; NFP1: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 ; NFP0: test:
-; NFP0: 	vdiv.f32	s0, s2, s0
+; NFP0: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vdiv.f32	s0, s2, s0
+; CORTEXA8: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
 ; CORTEXA9: test:
 ; CORTEXA9: 	vdiv.f32	s{{.}}, s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index 1566a9272db1..f5245c946398 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
@@ -18,9 +20,11 @@ entry:
 ; NFP0: 	vmul.f32	s
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vmul.f32	d
+; CORTEXA8: 	vmul.f32	s
+; CORTEXA8U: test:
+; CORTEXA8U: 	vmul.f32	d
 ; CORTEXA9: test:
-; CORTEXA9: 	vmul.f32	s{{.}}, s{{.}}, s{{.}}
+; CORTEXA9: 	vmul.f32	s
 
 ; VFP2: test2
 define float @test2(float %a) nounwind {
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 418b59803d30..d84690ba4e4b 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test1(float* %a) {
@@ -22,7 +24,10 @@ entry:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test1:
-; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test1:
+; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test1:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
@@ -46,7 +51,10 @@ entry:
 ; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
 
 ; CORTEXA8: test2:
-; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+; CORTEXA8: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8U: test2:
+; CORTEXA8U: 	vneg.f32	d{{.*}}, d{{.*}}
 
 ; CORTEXA9: test2:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll
index 6081712829a2..c30806173428 100644
--- a/test/CodeGen/ARM/fnmscs.ll
+++ b/test/CodeGen/ARM/fnmscs.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8U
 
 define float @t1(float %acc, float %a, float %b) nounwind {
 entry:
@@ -11,9 +13,13 @@ entry:
 ; NEON: t1:
 ; NEON: vnmla.f32
 
+; A8U: t1:
+; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
 ; A8: t1:
 ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fsub float -0.0, %0
         %2 = fsub float %1, %acc
@@ -28,9 +34,13 @@ entry:
 ; NEON: t2:
 ; NEON: vnmla.f32
 
+; A8U: t2:
+; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
+; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+
 ; A8: t2:
 ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
-; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
+; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
 	%0 = fmul float %a, %b
 	%1 = fmul float -1.0, %0
         %2 = fsub float %1, %acc
@@ -45,9 +55,13 @@ entry:
 ; NEON: t3:
 ; NEON: vnmla.f64
 
+; A8U: t3:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
 ; A8: t3:
-; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vnmul.f64 d
+; A8: vsub.f64 d
 	%0 = fmul double %a, %b
 	%1 = fsub double -0.0, %0
         %2 = fsub double %1, %acc
@@ -62,9 +76,13 @@ entry:
 ; NEON: t4:
 ; NEON: vnmla.f64
 
+; A8U: t4:
+; A8U: vnmul.f64 d
+; A8U: vsub.f64 d
+
 ; A8: t4:
-; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
-; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}}
+; A8: vnmul.f64 d
+; A8: vsub.f64 d
 	%0 = fmul double %a, %b
 	%1 = fmul double -1.0, %0
         %2 = fsub double %1, %acc
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 44298b9c5d8d..3c47eb580ff1 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
 
 define i32 @test1(float %a, float %b) {
diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll
index f039e74c8ee6..617b01881a2e 100644
--- a/test/CodeGen/ARM/fsubs.ll
+++ b/test/CodeGen/ARM/fsubs.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U
+; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1U
 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
 
 define float @test(float %a, float %b) {
@@ -9,5 +11,6 @@ entry:
 }
 
 ; VFP2: vsub.f32	s
-; NFP1: vsub.f32	d
+; NFP1U: vsub.f32	d
+; NFP1: vsub.f32	s
 ; NFP0: vsub.f32	s
diff --git a/test/CodeGen/ARM/global-merge-addrspace.ll b/test/CodeGen/ARM/global-merge-addrspace.ll
new file mode 100644
index 000000000000..0efa690bde28
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge-addrspace.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Test the GlobalMerge pass. Check that the pass does not crash when using
+; multiple address spaces.
+
+; CHECK: _MergedGlobals:
+@g1 = internal addrspace(1) global i32 1
+@g2 = internal addrspace(1) global i32 2
+
+
+; CHECK: _MergedGlobals1:
+@g3 = internal addrspace(2) global i32 3
+@g4 = internal addrspace(2) global i32 4
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 1732df3fa5ef..f88e92796196 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -global-merge-on-const=true | FileCheck %s
 ; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
 ; value for the maximum offset (127).
 
@@ -6,6 +6,52 @@
 ; CHECK: g0:
 @g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
 
+; Global variables marked with "used" attribute must be kept
+; CHECK: g8
+@g8 = internal global i32 0
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @g8 to i8*)], section "llvm.metadata"
+
+; Global used in landing pad instruction must be kept
+; CHECK: ZTIi
+@_ZTIi = internal global i8* null
+
+define i32 @_Z9exceptioni(i32 %arg) {
+bb:
+  %tmp = invoke i32 @_Z14throwSomethingi(i32 %arg)
+          to label %bb9 unwind label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %tmp3 = extractvalue { i8*, i32 } %tmp2, 1
+  %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+  %tmp5 = icmp eq i32 %tmp3, %tmp4
+  br i1 %tmp5, label %bb6, label %bb10
+
+bb6:                                              ; preds = %bb1
+  %tmp7 = extractvalue { i8*, i32 } %tmp2, 0
+  %tmp8 = tail call i8* @__cxa_begin_catch(i8* %tmp7)
+  tail call void @__cxa_end_catch()
+  br label %bb9
+
+bb9:                                              ; preds = %bb6, %bb
+  %res.0 = phi i32 [ 0, %bb6 ], [ %tmp, %bb ]
+  ret i32 %res.0
+
+bb10:                                             ; preds = %bb1
+  resume { i8*, i32 } %tmp2
+}
+
+declare i32 @_Z14throwSomethingi(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
 ; CHECK: _MergedGlobals:
 @g1 = internal global i32 1
 @g2 = internal global i32 2
@@ -21,3 +67,8 @@
 ; CHECK: _MergedGlobals2
 @g4 = internal global i32 0
 @g5 = internal global i32 0
+
+; Global variables that are constant can be merged together
+; CHECK: _MergedGlobals3
+@g6 = internal constant [12 x i32] zeroinitializer, align 4
+@g7 = internal constant [12 x i32] zeroinitializer, align 4
diff --git a/test/CodeGen/ARM/indirect-reg-input.ll b/test/CodeGen/ARM/indirect-reg-input.ll
new file mode 100644
index 000000000000..86728fa61934
--- /dev/null
+++ b/test/CodeGen/ARM/indirect-reg-input.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+; Check for error message:
+; CHECK: error: inline asm not supported yet: don't know how to handle tied indirect register inputs
+
+%struct.my_stack = type { %struct.myjmp_buf }
+%struct.myjmp_buf = type { [6 x i32] }
+
+define void @switch_to_stack(%struct.my_stack* %stack) nounwind {
+entry:
+  %regs = getelementptr inbounds %struct.my_stack* %stack, i32 0, i32 0
+  tail call void asm "\0A", "=*r,*0"(%struct.myjmp_buf* %regs, %struct.myjmp_buf* %regs)
+  ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
new file mode 100644
index 000000000000..be5eb8157317
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -O3  -mtriple=arm-linux-gnueabi | FileCheck %s
+
+; check if regs are passing correctly
+define void @i64_write(i64* %p, i64 %val) nounwind {
+; CHECK: i64_write:
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+  %1 = tail call i64 asm sideeffect "1: ldrexd $0, ${0:H}, [$2]\0A strexd $0, $3, ${3:H}, [$2]\0A teq $0, #0\0A bne 1b", "=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %val) nounwind
+  ret void
+}
+
+; check if register allocation can reuse the registers
+define void @multi_writes(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind {
+entry:
+; CHECK: multi_writes:
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  %incdec.ptr = getelementptr inbounds i64* %p, i32 1
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
+  ret void
+}
+
+
+; check if callee-saved registers used by inline asm are saved/restored
+define void @foo(i64* %p, i64 %i) nounwind {
+; CHECK:foo:
+; CHECK: push {{{r[4-9]|r10|r11}}
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
+; CHECK: strexd [[REG1]], {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
+; CHECK: pop {{{r[4-9]|r10|r11}}
+  %1 = tail call { i64, i64 } asm sideeffect "@ atomic64_set\0A1: ldrexd $0, ${0:H}, [$3]\0Aldrexd $1, ${1:H}, [$3]\0A strexd $0, $4, ${4:H}, [$3]\0A teq $0, #0\0A bne 1b", "=&r,=&r,=*Qo,r,r,~{cc}"(i64* %p, i64* %p, i64 %i) nounwind
+  ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 2fcc45f4af9c..390a44e375b9 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -30,7 +30,7 @@ entry:
 
 define hidden void @conv4_8_E() nounwind {
 entry:
-%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1, :128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
+%asmtmp31 = call %0 asm "vld1.u8  {$0}, [$1:128]!\0A", "=w,=r,1"(<16 x i8>* undef) nounwind
 unreachable
 }
 
diff --git a/test/CodeGen/ARM/invoke-donothing-assert.ll b/test/CodeGen/ARM/invoke-donothing-assert.ll
new file mode 100644
index 000000000000..0b607f7edf38
--- /dev/null
+++ b/test/CodeGen/ARM/invoke-donothing-assert.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+; This testcase makes sure we can handle invoke @llvm.donothing without
+; assertion failure.
+; <rdar://problem/13228754> & <rdar://problem/13316637>
+
+; CHECK: .globl  _foo
+define void @foo() {
+invoke.cont:
+  invoke void @callA() 
+          to label %invoke.cont25 unwind label %lpad2
+invoke.cont25:
+  invoke void @llvm.donothing()
+          to label %invoke.cont27 unwind label %lpad15
+
+invoke.cont27:
+  invoke void @callB()
+          to label %invoke.cont75 unwind label %lpad15
+
+invoke.cont75:
+  ret void
+
+lpad2:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+lpad15:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %eh.resume
+
+eh.resume:
+  resume { i8*, i32 } zeroinitializer
+}
+
+; CHECK: .globl _bar
+define linkonce_odr void @bar(i32* %a) {
+if.end.i.i.i:
+  invoke void @llvm.donothing()
+          to label %call.i.i.i.noexc unwind label %eh.resume
+
+call.i.i.i.noexc:
+  br i1 false, label %cleanup, label %new.notnull.i.i
+
+new.notnull.i.i:
+  br label %cleanup
+
+cleanup:
+  %0 = load i32* %a, align 4
+  %inc294 = add nsw i32 %0, 4
+  store i32 %inc294, i32* %a, align 4
+  br i1 false, label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit, label %delete.notnull.i.i.i1409
+
+delete.notnull.i.i.i1409:
+  br label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit
+
+_ZN3lol5ArrayIivvvvvvvED1Ev.exit:
+  ret void
+
+eh.resume:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+  %lpad.val395 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+  resume { i8*, i32 } %lpad.val395
+}
+
+declare void @callA()
+declare void @callB()
+declare void @llvm.donothing() nounwind readnone
+declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
index cb77b09ef4ad..4d75f581a1d2 100644
--- a/test/CodeGen/ARM/lit.local.cfg
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
index 5283f5747d96..248c4bd1beea 100644
--- a/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
-; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-block-placement < %s | FileCheck %s
 
 ; LSR should compare against the post-incremented induction variable.
 ; In this case, the immediate value is -2 which requires a cmn instruction.
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 3ac7d77d6f79..03abd762a261 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -45,3 +45,35 @@ for.cond1.preheader:                              ; preds = %entry
 }
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+; rdar://12462006
+define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: f3:
+; CHECK-NOT: sub
+; CHECK: cmp
+; CHECK: blt
+%0 = load i32* %offset, align 4
+%cmp = icmp slt i32 %0, %size
+%s = sub nsw i32 %0, %size
+%size2 = sub nsw i32 %size, 0
+br i1 %cmp, label %return, label %if.end
+
+if.end:
+; We are checking cse between %sub here and %s in entry block.
+%sub = sub nsw i32 %0, %size2
+%s2 = sub nsw i32 %s, %size
+%s3 = sub nsw i32 %sub, %s2
+; CHECK: sub [[R1:r[0-9]+]], [[R2:r[0-9]+]], r2
+; CHECK: sub [[R3:r[0-9]+]], [[R1]], r2
+; CHECK: sub [[R4:r[0-9]+]], [[R1]], [[R3]]
+; CHECK-NOT: sub
+; CHECK: str
+store i32 %s3, i32* %offset, align 4
+%add.ptr = getelementptr inbounds i8* %base, i32 %sub
+br label %return
+
+return:
+%retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+ret i8* %retval.0
+}
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index dc772827f270..d846e5cb268b 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,18 +1,115 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
-
-; CHECK: ldrd
-; CHECK: strd
-; CHECK: ldrb
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
 @src = external global %struct.x
 @dst = external global %struct.x
 
-define i32 @t() {
+@.str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 1
+@.str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM, SOME STRING BLAH\00", align 1
+@.str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM, SOME\00", align 1
+@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR  \00", align 1
+@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
+@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
+@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
+
+define i32 @t0() {
 entry:
+; CHECK: t0:
+; CHECK: vldr [[REG1:d[0-9]+]],
+; CHECK: vstr [[REG1]], 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
   ret i32 0
 }
 
+define void @t1(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #15
+; CHECK: adds r1, #15
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
+  ret void
+}
+
+define void @t2(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ldr [[REG2:r[0-9]+]], [r1, #32]
+; CHECK: str [[REG2]], [r0, #32]
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
+  ret void
+}
+
+define void @t3(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
+  ret void
+}
+
+define void @t4(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t4:
+; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
+; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
+  ret void
+}
+
+define void @t5(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t5:
+; CHECK: movs [[REG5:r[0-9]+]], #0
+; CHECK: strb [[REG5]], [r0, #6]
+; CHECK: movw [[REG6:r[0-9]+]], #21587
+; CHECK: strh [[REG6]], [r0, #4]
+; CHECK: ldr [[REG7:r[0-9]+]], 
+; CHECK: str [[REG7]]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; CHECK: t6:
+; CHECK: vld1.8 {[[REG8:d[0-9]+]]}, [r0]
+; CHECK: vstr [[REG8]], [r1]
+; CHECK: adds r1, #6
+; CHECK: adds r0, #6
+; CHECK: vld1.8
+; CHECK: vst1.16
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
+  ret void
+}
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+; CHECK: t7
+; CHECK: vld1.32
+; CHECK: vst1.32
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
new file mode 100644
index 000000000000..ee8c36433885
--- /dev/null
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
+
+define void @t1(i8* nocapture %c) nounwind optsize {
+entry:
+; CHECK: t1:
+; CHECK: movs r1, #0
+; CHECK: str r1, [r0]
+; CHECK: str r1, [r0, #4]
+; CHECK: str r1, [r0, #8]
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: add.w r1, r0, #10
+; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
+; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  %buf = alloca [26 x i8], align 1
+  %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
+  call void @something(i8* %0) nounwind
+  ret void
+}
+
+declare void @something(i8*) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
new file mode 100644
index 000000000000..c00f0d17c9f5
--- /dev/null
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15
+; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT
+
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15
+; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT
+
+; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since
+; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected.
+
+@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1
+
+; CHECK-LINUXA5: main:
+; CHECK-LINUXA8: main:
+; CHECK-LINUXA9: main:
+; CHECK-LINUXA15: main:
+; CHECK-LINUXSWIFT: main:
+; CHECK-UNSAFEA5: main:
+; CHECK-UNSAFEA8: main:
+; CHECK-UNSAFEA9: main:
+; CHECK-UNSAFEA15: main:
+; CHECK-UNSAFESWIFT: main:
+; CHECK-DARWINA5: main:
+; CHECK-DARWINA8: main:
+; CHECK-DARWINA9: main:
+; CHECK-DARWINA15: main:
+; CHECK-DARWINSWIFT: main:
+define i32 @main() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+  %mul = fmul float %q.03, 0x3FEFAE1480000000
+; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}}
+; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}}
+; Swift is *always* unsafe
+; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}}
+; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}}
+; A9 and A15 don't need this
+; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}}
+; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}}
+
+; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}}
+; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
+; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
+  %conv = fpext float %mul to double
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
+  %inc = add nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 16000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...)
diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll
new file mode 100644
index 000000000000..046b5da22899
--- /dev/null
+++ b/test/CodeGen/ARM/neon_cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; bug 15283
+; radar://13191881
+; CHECK: vfcmp
+define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
+  %wide.load = load <2 x double>* %a, align 4
+  %wide.load2 = load <2 x double>* %b, align 4
+; CHECK-NOT: vdup.32
+; CHECK-NOT: vmovn.i64
+  %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
+  %v2 = zext <2 x i1> %v1 to <2 x i32>
+  %v3 = sitofp <2 x i32> %v2 to <2 x double>
+  store <2 x double> %v3, <2 x double>* %b, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll
new file mode 100644
index 000000000000..149f4c777003
--- /dev/null
+++ b/test/CodeGen/ARM/neon_fpconv.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; PR12540: ARM backend lowering of FP_ROUND v2f64 to v2f32.
+define <2 x float> @vtrunc(<2 x double> %a) {
+; CHECK: vcvt.f32.f64 [[S0:s[0-9]+]], [[D0:d[0-9]+]]
+; CHECK: vcvt.f32.f64 [[S1:s[0-9]+]], [[D1:d[0-9]+]]
+  %vt = fptrunc <2 x double> %a to <2 x float>
+  ret <2 x float> %vt
+}
+
+define <2 x double> @vextend(<2 x float> %a) {
+; CHECK: vcvt.f64.f32 [[D0:d[0-9]+]], [[S0:s[0-9]+]]
+; CHECK: vcvt.f64.f32 [[D1:d[0-9]+]], [[S1:s[0-9]+]]
+  %ve = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %ve
+}
+
+; We used to generate vmovs between scalar and vfp/neon registers.
+; CHECK: vsitofp_double
+define void @vsitofp_double(<2 x i32>* %loadaddr,
+                            <2 x double>* %storeaddr) {
+  %v0 = load <2 x i32>* %loadaddr
+; CHECK:      vldr
+; CHECK-NEXT:	vcvt.f64.s32
+; CHECK-NEXT:	vcvt.f64.s32
+; CHECK-NEXT:	vst
+  %r = sitofp <2 x i32> %v0 to <2 x double>
+  store <2 x double> %r, <2 x double>* %storeaddr
+  ret void
+}
+; CHECK: vuitofp_double
+define void @vuitofp_double(<2 x i32>* %loadaddr,
+                            <2 x double>* %storeaddr) {
+  %v0 = load <2 x i32>* %loadaddr
+; CHECK:      vldr
+; CHECK-NEXT:	vcvt.f64.u32
+; CHECK-NEXT:	vcvt.f64.u32
+; CHECK-NEXT:	vst
+  %r = uitofp <2 x i32> %v0 to <2 x double>
+  store <2 x double> %r, <2 x double>* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 497619ed746a..25a670b09778 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -7,10 +7,10 @@
 ; CHECK: vadd.i64 q
 ; CHECK: vst1.64
 ; SWIFT: t1
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 ; SWIFT: vadd.i64 q
-; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -28,8 +28,8 @@ entry:
 ; CHECK: vmov r0, r1, d
 ; CHECK: vmov r2, r3, d
 ; SWIFT: t2
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
-; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+, :128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
+; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 ; SWIFT: vsub.i64 q
 ; SWIFT: vmov r0, r1, d
 ; SWIFT: vmov r2, r3, d
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index d301c6a4ca90..0a7c8b2b6aae 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s
 
 define float @fmin_ole(float %x) nounwind {
 ;CHECK: fmin_ole:
diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll
new file mode 100644
index 000000000000..0b9c9467c206
--- /dev/null
+++ b/test/CodeGen/ARM/popcnt.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; Implement ctpop with vcnt
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
+; CHECK: vcnt16:
+; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
+; CHECK: vcntQ16:
+; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
+; CHECK: vcnt32:
+; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind {
+; CHECK: vcntQ32:
+; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
deleted file mode 100644
index d1d0ee5f3e7b..000000000000
--- a/test/CodeGen/ARM/reg_asc_order.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
-
-%struct.Foo = type { i32, i32, i32, i32 }
-
-define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
-entry:
-;CHECK: ldm
-;CHECK: stm
-  %0 = bitcast %struct.Foo* %a to i8*
-  %1 = bitcast %struct.Foo* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 6d6586e4f283..fd2083cf9f41 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -242,8 +242,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
-; CHECK-NEXT:   vst1.64 {d16, d17}, [r0, :128]
-; CHECK-NEXT:   vst1.64 {d16, d17}, [r0, :128]
+; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
+; CHECK-NEXT:   vst1.64 {d16, d17}, [r0:128]
   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   store <4 x float> %4, <4 x float>* undef, align 16
diff --git a/test/CodeGen/ARM/ret_sret_vector.ll b/test/CodeGen/ARM/ret_sret_vector.ll
new file mode 100644
index 000000000000..9bb3519555e8
--- /dev/null
+++ b/test/CodeGen/ARM/ret_sret_vector.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+define <4 x double> @PR14337(<4 x double> %a, <4 x double> %b) {
+  %foo = fadd <4 x double>  %a, %b
+  ret <4 x double> %foo
+; CHECK: PR14337:
+; CHECK: vst1.64
+; CHECK: vst1.64
+}
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
new file mode 100644
index 000000000000..d8241d0dc380
--- /dev/null
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -O1 -mtriple thumbv7-apple-ios6
+; Just make sure no one tries to make the assumption that the normal edge of an
+; invoke is never a critical edge.  Previously, this code would assert.
+
+%struct.__CFString = type opaque
+
+declare void @bar(%struct.__CFString*, %struct.__CFString*)
+
+define noalias i8* @foo(i8* nocapture %inRefURL) noreturn ssp {
+entry:
+  %call = tail call %struct.__CFString* @bar3()
+  %call2 = invoke i8* @bar2()
+          to label %for.cond unwind label %lpad
+
+for.cond:                                         ; preds = %entry, %for.cond
+  invoke void @bar(%struct.__CFString* undef, %struct.__CFString* null)
+          to label %for.cond unwind label %lpad5
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  br label %ehcleanup
+
+lpad5:                                            ; preds = %for.cond
+  %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %4 = extractvalue { i8*, i32 } %3, 0
+  %5 = extractvalue { i8*, i32 } %3, 1
+  invoke void @release(i8* %call2)
+          to label %ehcleanup unwind label %terminate.lpad.i.i16
+
+terminate.lpad.i.i16:                             ; preds = %lpad5
+  %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  tail call void @terminatev() noreturn nounwind
+  unreachable
+
+ehcleanup:                                        ; preds = %lpad5, %lpad
+  %exn.slot.0 = phi i8* [ %1, %lpad ], [ %4, %lpad5 ]
+  %ehselector.slot.0 = phi i32 [ %2, %lpad ], [ %5, %lpad5 ]
+  %7 = bitcast %struct.__CFString* %call to i8*
+  invoke void @release(i8* %7)
+          to label %_ZN5SmartIPK10__CFStringED1Ev.exit unwind label %terminate.lpad.i.i
+
+terminate.lpad.i.i:                               ; preds = %ehcleanup
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  tail call void @terminatev() noreturn nounwind
+  unreachable
+
+_ZN5SmartIPK10__CFStringED1Ev.exit:               ; preds = %ehcleanup
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+  %lpad.val12 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+  resume { i8*, i32 } %lpad.val12
+}
+
+declare %struct.__CFString* @bar3()
+
+declare i8* @bar2()
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @release(i8*)
+
+declare void @terminatev()
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 057ea11389ac..e93cdbc10a46 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic {{.*}}, #15
-; CHECK: vst1.64 {{.*}}sp, :128
-; CHECK: vld1.64 {{.*}}sp, :128
+; CHECK: vst1.64 {{.*}}sp:128
+; CHECK: vld1.64 {{.*}}sp:128
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 455bfce0f2e5..1bc0315354cb 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    d0, r0, r0
-; CHECK: vldr s0, LCPI
+; CHECK: vldr s1, LCPI
 ; The vector must be spilled:
 ; CHECK: vstr d0,
 ; CHECK: asm clobber d0
@@ -20,8 +20,8 @@ target triple = "thumbv7-apple-ios"
 ; CHECK: vldr [[D16:d[0-9]+]],
 ; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
-  %v1 = insertelement <2 x float> undef, float %x, i32 1
-  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
+  %v1 = insertelement <2 x float> undef, float %x, i32 0
+  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 1
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
   store <2 x float> %v2, <2 x float>* %p, align 8
   ret void
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index 21865f8e4aed..a4e3c3c0efa9 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -1,5 +1,23 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR
 ; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC
+; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7 -mattr=+nacl-trap -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -fast-isel -mtriple=armv7-unknown-nacl -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7-unknown-nacl - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-NACL
+; RUN: llc -mtriple=armv7 -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
+; RUN: llc -fast-isel -mtriple=armv7 -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - \
+; RUN:  | FileCheck %s -check-prefix=ENCODING-ALL
 ; rdar://7961298
 ; rdar://9249183
 
@@ -10,6 +28,11 @@ entry:
 
 ; FUNC: t:
 ; FUNC: bl __trap
+
+; ENCODING-NACL: f0 de fe e7
+
+; ENCODING-ALL: fe de ff e7
+
   call void @llvm.trap()
   unreachable
 }
@@ -21,6 +44,11 @@ entry:
 
 ; FUNC: t2:
 ; FUNC: bl __trap
+
+; ENCODING-NACL: f0 de fe e7
+
+; ENCODING-ALL: fe de ff e7
+
   call void @llvm.debugtrap()
   unreachable
 }
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index c078f493094b..e67b4788a37d 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -156,3 +156,175 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 
 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
+
+; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
+; instructions as expensive. If lowering is improved the cost model needs to
+; change.
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
+%T0_5 = type <8 x i8>
+%T1_5 = type <8 x i32>
+; CHECK: func_cvt5:
+define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
+; CHECK: vmovl.s8
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %T0_5* %loadaddr
+; COST: func_cvt5
+; COST: cost of 3 {{.*}} sext
+  %r = sext %T0_5 %v0 to %T1_5
+  store %T1_5 %r, %T1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TA0_5 = type <8 x i8>
+%TA1_5 = type <8 x i32>
+; CHECK: func_cvt1:
+define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
+; CHECK: vmovl.u8
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TA0_5* %loadaddr
+; COST: func_cvt1
+; COST: cost of 3 {{.*}} zext
+  %r = zext %TA0_5 %v0 to %TA1_5
+  store %TA1_5 %r, %TA1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%T0_51 = type <8 x i32>
+%T1_51 = type <8 x i8>
+; CHECK: func_cvt51:
+define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+  %v0 = load %T0_51* %loadaddr
+; COST: func_cvt51
+; COST: cost of 19 {{.*}} trunc
+  %r = trunc %T0_51 %v0 to %T1_51
+  store %T1_51 %r, %T1_51* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TT0_5 = type <16 x i8>
+%TT1_5 = type <16 x i32>
+; CHECK: func_cvt52:
+define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %TT0_5* %loadaddr
+; COST: func_cvt52
+; COST: cost of 6 {{.*}} sext
+  %r = sext %TT0_5 %v0 to %TT1_5
+  store %TT1_5 %r, %TT1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TTA0_5 = type <16 x i8>
+%TTA1_5 = type <16 x i32>
+; CHECK: func_cvt12:
+define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TTA0_5* %loadaddr
+; COST: func_cvt12
+; COST: cost of 6 {{.*}} zext
+  %r = zext %TTA0_5 %v0 to %TTA1_5
+  store %TTA1_5 %r, %TTA1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TT0_51 = type <16 x i32>
+%TT1_51 = type <16 x i8>
+; CHECK: func_cvt512:
+define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+  %v0 = load %TT0_51* %loadaddr
+; COST: func_cvt512
+; COST: cost of 38 {{.*}} trunc
+  %r = trunc %TT0_51 %v0 to %TT1_51
+  store %TT1_51 %r, %TT1_51* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v4i16_v4i64:
+define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: sext_v4i16_v4i64
+; COST: cost of 3 {{.*}} sext
+  %r = sext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v4i16_v4i64:
+define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: zext_v4i16_v4i64
+; COST: cost of 3 {{.*}} zext
+  %r = zext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v8i16_v8i64:
+define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: sext_v8i16_v8i64
+; COST: cost of 6 {{.*}} sext
+  %r = sext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v8i16_v8i64:
+define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: zext_v8i16_v8i64
+; COST: cost of 6 {{.*}} zext
+  %r = zext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index a38a0feae042..42964deb0b5e 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -133,3 +133,30 @@ define i16 @foldBuildVectors() {
   %3 = extractelement <8 x i16> %2, i32 0
   ret i16 %3
 }
+
+; Test that we are generating vrev and vext for reverse shuffles of v8i16
+; shuffles.
+; CHECK: reverse_v8i16
+define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
+  %v0 = load <8 x i16>* %loadaddr
+  ; CHECK: vrev64.16
+  ; CHECK: vext.16
+  %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
+              <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <8 x i16> %v1, <8 x i16>* %storeaddr
+  ret void
+}
+
+; Test that we are generating vrev and vext for reverse shuffles of v16i8
+; shuffles.
+; CHECK: reverse_v16i8
+define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
+  %v0 = load <16 x i8>* %loadaddr
+  ; CHECK: vrev64.8
+  ; CHECK: vext.8
+  %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
+       <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8,
+                   i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  store <16 x i8> %v1, <16 x i8>* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vfloatintrinsics.ll b/test/CodeGen/ARM/vfloatintrinsics.ll
new file mode 100644
index 000000000000..6f53b2ccd96c
--- /dev/null
+++ b/test/CodeGen/ARM/vfloatintrinsics.ll
@@ -0,0 +1,377 @@
+; RUN: llc -mcpu=swift -march=arm < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.1.0"
+
+;;; Float vectors
+
+%v2f32 = type <2 x float>
+; CHECK: test_v2f32.sqrt:
+define %v2f32 @test_v2f32.sqrt(%v2f32 %a) {
+  ; CHECK: sqrt
+  %1 = call %v2f32 @llvm.sqrt.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.powi:
+define %v2f32 @test_v2f32.powi(%v2f32 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f32 @llvm.powi.v2f32(%v2f32 %a, i32 %b)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.sin:
+define %v2f32 @test_v2f32.sin(%v2f32 %a) {
+  ; CHECK: sin
+  %1 = call %v2f32 @llvm.sin.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.cos:
+define %v2f32 @test_v2f32.cos(%v2f32 %a) {
+  ; CHECK: cos
+  %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.pow:
+define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f32 @llvm.pow.v2f32(%v2f32 %a, %v2f32 %b)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.exp:
+define %v2f32 @test_v2f32.exp(%v2f32 %a) {
+  ; CHECK: exp
+  %1 = call %v2f32 @llvm.exp.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.exp2:
+define %v2f32 @test_v2f32.exp2(%v2f32 %a) {
+  ; CHECK: exp
+  %1 = call %v2f32 @llvm.exp2.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log:
+define %v2f32 @test_v2f32.log(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log10:
+define %v2f32 @test_v2f32.log10(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log10.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.log2:
+define %v2f32 @test_v2f32.log2(%v2f32 %a) {
+  ; CHECK: log
+  %1 = call %v2f32 @llvm.log2.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fma:
+define %v2f32 @test_v2f32.fma(%v2f32 %a, %v2f32 %b, %v2f32 %c) {
+  ; CHECK: fma
+  %1 = call %v2f32 @llvm.fma.v2f32(%v2f32 %a, %v2f32 %b, %v2f32 %c)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fabs:
+define %v2f32 @test_v2f32.fabs(%v2f32 %a) {
+  ; CHECK: fabs
+  %1 = call %v2f32 @llvm.fabs.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.floor:
+define %v2f32 @test_v2f32.floor(%v2f32 %a) {
+  ; CHECK: floor
+  %1 = call %v2f32 @llvm.floor.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.fceil:
+define %v2f32 @test_v2f32.fceil(%v2f32 %a) {
+  ; CHECK: ceil
+  %1 = call %v2f32 @llvm.fceil.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.trunc:
+define %v2f32 @test_v2f32.trunc(%v2f32 %a) {
+  ; CHECK: trunc
+  %1 = call %v2f32 @llvm.trunc.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.rint:
+define %v2f32 @test_v2f32.rint(%v2f32 %a) {
+  ; CHECK: rint
+  %1 = call %v2f32 @llvm.rint.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+; CHECK: test_v2f32.nearbyint:
+define %v2f32 @test_v2f32.nearbyint(%v2f32 %a) {
+  ; CHECK: nearbyint
+  %1 = call %v2f32 @llvm.nearbyint.v2f32(%v2f32 %a)
+  ret %v2f32 %1
+}
+
+declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0
+declare %v2f32 @llvm.powi.v2f32(%v2f32, i32) #0
+declare %v2f32 @llvm.sin.v2f32(%v2f32) #0
+declare %v2f32 @llvm.cos.v2f32(%v2f32) #0
+declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0
+declare %v2f32 @llvm.exp.v2f32(%v2f32) #0
+declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log10.v2f32(%v2f32) #0
+declare %v2f32 @llvm.log2.v2f32(%v2f32) #0
+declare %v2f32 @llvm.fma.v2f32(%v2f32, %v2f32, %v2f32) #0
+declare %v2f32 @llvm.fabs.v2f32(%v2f32) #0
+declare %v2f32 @llvm.floor.v2f32(%v2f32) #0
+declare %v2f32 @llvm.fceil.v2f32(%v2f32) #0
+declare %v2f32 @llvm.trunc.v2f32(%v2f32) #0
+declare %v2f32 @llvm.rint.v2f32(%v2f32) #0
+declare %v2f32 @llvm.nearbyint.v2f32(%v2f32) #0
+
+;;;
+
+%v4f32 = type <4 x float>
+; CHECK: test_v4f32.sqrt:
+define %v4f32 @test_v4f32.sqrt(%v4f32 %a) {
+  ; CHECK: sqrt
+  %1 = call %v4f32 @llvm.sqrt.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.powi:
+define %v4f32 @test_v4f32.powi(%v4f32 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v4f32 @llvm.powi.v4f32(%v4f32 %a, i32 %b)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.sin:
+define %v4f32 @test_v4f32.sin(%v4f32 %a) {
+  ; CHECK: sin
+  %1 = call %v4f32 @llvm.sin.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.cos:
+define %v4f32 @test_v4f32.cos(%v4f32 %a) {
+  ; CHECK: cos
+  %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.pow:
+define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) {
+  ; CHECK: pow
+  %1 = call %v4f32 @llvm.pow.v4f32(%v4f32 %a, %v4f32 %b)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.exp:
+define %v4f32 @test_v4f32.exp(%v4f32 %a) {
+  ; CHECK: exp
+  %1 = call %v4f32 @llvm.exp.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.exp2:
+define %v4f32 @test_v4f32.exp2(%v4f32 %a) {
+  ; CHECK: exp
+  %1 = call %v4f32 @llvm.exp2.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log:
+define %v4f32 @test_v4f32.log(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log10:
+define %v4f32 @test_v4f32.log10(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log10.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.log2:
+define %v4f32 @test_v4f32.log2(%v4f32 %a) {
+  ; CHECK: log
+  %1 = call %v4f32 @llvm.log2.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fma:
+define %v4f32 @test_v4f32.fma(%v4f32 %a, %v4f32 %b, %v4f32 %c) {
+  ; CHECK: fma
+  %1 = call %v4f32 @llvm.fma.v4f32(%v4f32 %a, %v4f32 %b, %v4f32 %c)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fabs:
+define %v4f32 @test_v4f32.fabs(%v4f32 %a) {
+  ; CHECK: fabs
+  %1 = call %v4f32 @llvm.fabs.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.floor:
+define %v4f32 @test_v4f32.floor(%v4f32 %a) {
+  ; CHECK: floor
+  %1 = call %v4f32 @llvm.floor.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.fceil:
+define %v4f32 @test_v4f32.fceil(%v4f32 %a) {
+  ; CHECK: ceil
+  %1 = call %v4f32 @llvm.fceil.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.trunc:
+define %v4f32 @test_v4f32.trunc(%v4f32 %a) {
+  ; CHECK: trunc
+  %1 = call %v4f32 @llvm.trunc.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.rint:
+define %v4f32 @test_v4f32.rint(%v4f32 %a) {
+  ; CHECK: rint
+  %1 = call %v4f32 @llvm.rint.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+; CHECK: test_v4f32.nearbyint:
+define %v4f32 @test_v4f32.nearbyint(%v4f32 %a) {
+  ; CHECK: nearbyint
+  %1 = call %v4f32 @llvm.nearbyint.v4f32(%v4f32 %a)
+  ret %v4f32 %1
+}
+
+declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0
+declare %v4f32 @llvm.powi.v4f32(%v4f32, i32) #0
+declare %v4f32 @llvm.sin.v4f32(%v4f32) #0
+declare %v4f32 @llvm.cos.v4f32(%v4f32) #0
+declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0
+declare %v4f32 @llvm.exp.v4f32(%v4f32) #0
+declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log10.v4f32(%v4f32) #0
+declare %v4f32 @llvm.log2.v4f32(%v4f32) #0
+declare %v4f32 @llvm.fma.v4f32(%v4f32, %v4f32, %v4f32) #0
+declare %v4f32 @llvm.fabs.v4f32(%v4f32) #0
+declare %v4f32 @llvm.floor.v4f32(%v4f32) #0
+declare %v4f32 @llvm.fceil.v4f32(%v4f32) #0
+declare %v4f32 @llvm.trunc.v4f32(%v4f32) #0
+declare %v4f32 @llvm.rint.v4f32(%v4f32) #0
+declare %v4f32 @llvm.nearbyint.v4f32(%v4f32) #0
+
+;;; Double vector
+
+%v2f64 = type <2 x double>
+; CHECK: test_v2f64.sqrt:
+define %v2f64 @test_v2f64.sqrt(%v2f64 %a) {
+  ; CHECK: sqrt
+  %1 = call %v2f64 @llvm.sqrt.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.powi:
+define %v2f64 @test_v2f64.powi(%v2f64 %a, i32 %b) {
+  ; CHECK: pow
+  %1 = call %v2f64 @llvm.powi.v2f64(%v2f64 %a, i32 %b)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.sin:
+define %v2f64 @test_v2f64.sin(%v2f64 %a) {
+  ; CHECK: sin
+  %1 = call %v2f64 @llvm.sin.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.cos:
+define %v2f64 @test_v2f64.cos(%v2f64 %a) {
+  ; CHECK: cos
+  %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.pow:
+define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) {
+  ; CHECK: pow
+  %1 = call %v2f64 @llvm.pow.v2f64(%v2f64 %a, %v2f64 %b)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.exp:
+define %v2f64 @test_v2f64.exp(%v2f64 %a) {
+  ; CHECK: exp
+  %1 = call %v2f64 @llvm.exp.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.exp2:
+define %v2f64 @test_v2f64.exp2(%v2f64 %a) {
+  ; CHECK: exp
+  %1 = call %v2f64 @llvm.exp2.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log:
+define %v2f64 @test_v2f64.log(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log10:
+define %v2f64 @test_v2f64.log10(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log10.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.log2:
+define %v2f64 @test_v2f64.log2(%v2f64 %a) {
+  ; CHECK: log
+  %1 = call %v2f64 @llvm.log2.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fma:
+define %v2f64 @test_v2f64.fma(%v2f64 %a, %v2f64 %b, %v2f64 %c) {
+  ; CHECK: fma
+  %1 = call %v2f64 @llvm.fma.v2f64(%v2f64 %a, %v2f64 %b, %v2f64 %c)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fabs:
+define %v2f64 @test_v2f64.fabs(%v2f64 %a) {
+  ; CHECK: fabs
+  %1 = call %v2f64 @llvm.fabs.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.floor:
+define %v2f64 @test_v2f64.floor(%v2f64 %a) {
+  ; CHECK: floor
+  %1 = call %v2f64 @llvm.floor.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.fceil:
+define %v2f64 @test_v2f64.fceil(%v2f64 %a) {
+  ; CHECK: ceil
+  %1 = call %v2f64 @llvm.fceil.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.trunc:
+define %v2f64 @test_v2f64.trunc(%v2f64 %a) {
+  ; CHECK: trunc
+  %1 = call %v2f64 @llvm.trunc.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.rint:
+define %v2f64 @test_v2f64.rint(%v2f64 %a) {
+  ; CHECK: rint
+  %1 = call %v2f64 @llvm.rint.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+; CHECK: test_v2f64.nearbyint:
+define %v2f64 @test_v2f64.nearbyint(%v2f64 %a) {
+  ; CHECK: nearbyint
+  %1 = call %v2f64 @llvm.nearbyint.v2f64(%v2f64 %a)
+  ret %v2f64 %1
+}
+
+declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0
+declare %v2f64 @llvm.powi.v2f64(%v2f64, i32) #0
+declare %v2f64 @llvm.sin.v2f64(%v2f64) #0
+declare %v2f64 @llvm.cos.v2f64(%v2f64) #0
+declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0
+declare %v2f64 @llvm.exp.v2f64(%v2f64) #0
+declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log10.v2f64(%v2f64) #0
+declare %v2f64 @llvm.log2.v2f64(%v2f64) #0
+declare %v2f64 @llvm.fma.v2f64(%v2f64, %v2f64, %v2f64) #0
+declare %v2f64 @llvm.fabs.v2f64(%v2f64) #0
+declare %v2f64 @llvm.floor.v2f64(%v2f64) #0
+declare %v2f64 @llvm.fceil.v2f64(%v2f64) #0
+declare %v2f64 @llvm.trunc.v2f64(%v2f64) #0
+declare %v2f64 @llvm.rint.v2f64(%v2f64) #0
+declare %v2f64 @llvm.nearbyint.v2f64(%v2f64) #0
+
+attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index e524395c501a..994f05dacb84 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld1.8 {d16}, [r0, :64]
+;CHECK: vld1.8 {d16}, [r0:64]
 	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
 	ret <8 x i8> %tmp1
 }
@@ -68,7 +68,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK: vld1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.8 {d16, d17}, [r0, :64]
+;CHECK: vld1.8 {d16, d17}, [r0:64]
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	ret <16 x i8> %tmp1
 }
@@ -76,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load.
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK: vld1Qi8_update:
-;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]!
+;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
 	%A = load i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8* %A, i32 16
@@ -87,7 +87,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.16 {d16, d17}, [r0, :128]
+;CHECK: vld1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
 	ret <8 x i16> %tmp1
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 29b379465db5..caa016e929d8 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -14,7 +14,7 @@
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld2.8 {d16, d17}, [r0, :64]
+;CHECK: vld2.8 {d16, d17}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
@@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld2.16 {d16, d17}, [r0, :128]
+;CHECK: vld2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
 define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;CHECK: vld2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vld1.64 {d16, d17}, [r0, :128]
+;CHECK: vld1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
@@ -86,7 +86,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;CHECK: vld2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
@@ -97,7 +97,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK: vld2Qi8_update:
-;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
@@ -111,7 +111,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;CHECK: vld2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
@@ -123,7 +123,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;CHECK: vld2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index b495319830b0..ad63e1f716b2 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -15,7 +15,7 @@
 define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK: vld3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
@@ -74,7 +74,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
 define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;CHECK: vld3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
+;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@@ -86,8 +86,8 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;CHECK: vld3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
-;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 59a73db3187e..9ee5fe46eea2 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -14,7 +14,7 @@
 define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK: vld4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;Check for a post-increment updating load with register increment.
 define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK: vld4i8_update:
-;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8** %ptr
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
 	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
@@ -39,7 +39,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
@@ -51,7 +51,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
@@ -74,7 +74,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
 define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;CHECK: vld4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@@ -86,8 +86,8 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;CHECK: vld4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
-;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
         %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
@@ -111,8 +111,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;Check for a post-increment updating load. 
 define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
 ;CHECK: vld4Qi16_update:
-;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
-;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index c69473f87f98..7c7319c090ba 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -13,7 +13,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
 define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 ;CHECK: vld1dupi16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[]}, [r0, :16]
+;CHECK: vld1.16 {d16[]}, [r0:16]
 	%tmp1 = load i16* %A, align 8
 	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
 	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -23,7 +23,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
 define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 ;CHECK: vld1dupi32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[]}, [r0, :32]
+;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp1 = load i32* %A, align 8
 	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
 	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0, :32]
+;CHECK: vld1.32 {d16[]}, [r0:32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
+;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@@ -109,7 +109,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
 	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -194,7 +194,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
 ;CHECK: vld4dupi32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
 	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 7bd0cbda02b1..f35fa92f5dc7 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -14,7 +14,7 @@ define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld1.16 {d16[2]}, [r0, :16]
+;CHECK: vld1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = load i16* %A, align 8
 	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
@@ -24,7 +24,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = load i32* %A, align 8
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
@@ -34,7 +34,7 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld1lanei32a32:
 ;Check the alignment value.  Legal values are none or :32.
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = load i32* %A, align 4
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
@@ -43,7 +43,7 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
 
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0, :32]
+;CHECK: vld1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -61,7 +61,7 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 
 define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld1laneQi16:
-;CHECK: vld1.16 {d17[1]}, [r0, :16]
+;CHECK: vld1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = load i16* %A, align 8
 	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
@@ -70,7 +70,7 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 
 define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld1laneQi32:
-;CHECK: vld1.32 {d17[1]}, [r0, :32]
+;CHECK: vld1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = load i32* %A, align 8
 	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
@@ -79,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0, :32]
+;CHECK: vld1.32 {d16[0]}, [r0:32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
@@ -98,7 +98,7 @@ define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
@@ -110,7 +110,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -176,7 +176,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}, :64]
+;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -354,7 +354,7 @@ declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x flo
 define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}, :32]
+;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
@@ -370,7 +370,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating load.
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vld4lanei8_update:
-;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :32]!
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -408,7 +408,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4lanei32:
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
-;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}, :64]
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
@@ -441,7 +441,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}, :64]
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index f5994046de4b..7e79d6c68c2b 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -10,3 +10,114 @@ define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
     ret void
 }
 
+; We adjusted the cost model of the following selects. When we improve code
+; lowering we also need to adjust the cost.
+%T0_10 = type <16 x i16>
+%T1_10 = type <16 x i1>
+; CHECK: func_blend10:
+define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
+                           %T1_10* %blend, %T0_10* %storeaddr) {
+  %v0 = load %T0_10* %loadaddr
+  %v1 = load %T0_10* %loadaddr2
+  %c = icmp slt %T0_10 %v0, %v1
+; CHECK: vst1
+; CHECK: vst1
+; CHECK: vst1
+; CHECK: vst1
+; CHECK: vld
+; COST: func_blend10
+; COST: cost of 40 {{.*}} select
+  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
+  store %T0_10 %r, %T0_10* %storeaddr
+  ret void
+}
+%T0_14 = type <8 x i32>
+%T1_14 = type <8 x i1>
+; CHECK: func_blend14:
+define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
+                           %T1_14* %blend, %T0_14* %storeaddr) {
+  %v0 = load %T0_14* %loadaddr
+  %v1 = load %T0_14* %loadaddr2
+  %c = icmp slt %T0_14 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend14
+; COST: cost of 41 {{.*}} select
+  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
+  store %T0_14 %r, %T0_14* %storeaddr
+  ret void
+}
+%T0_15 = type <16 x i32>
+%T1_15 = type <16 x i1>
+; CHECK: func_blend15:
+define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
+                           %T1_15* %blend, %T0_15* %storeaddr) {
+  %v0 = load %T0_15* %loadaddr
+  %v1 = load %T0_15* %loadaddr2
+  %c = icmp slt %T0_15 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend15
+; COST: cost of 82 {{.*}} select
+  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
+  store %T0_15 %r, %T0_15* %storeaddr
+  ret void
+}
+%T0_18 = type <4 x i64>
+%T1_18 = type <4 x i1>
+; CHECK: func_blend18:
+define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
+                           %T1_18* %blend, %T0_18* %storeaddr) {
+  %v0 = load %T0_18* %loadaddr
+  %v1 = load %T0_18* %loadaddr2
+  %c = icmp slt %T0_18 %v0, %v1
+; CHECK: strh
+; CHECK: strh
+; CHECK: strh
+; CHECK: strh
+; COST: func_blend18
+; COST: cost of 19 {{.*}} select
+  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
+  store %T0_18 %r, %T0_18* %storeaddr
+  ret void
+}
+%T0_19 = type <8 x i64>
+%T1_19 = type <8 x i1>
+; CHECK: func_blend19:
+define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
+                           %T1_19* %blend, %T0_19* %storeaddr) {
+  %v0 = load %T0_19* %loadaddr
+  %v1 = load %T0_19* %loadaddr2
+  %c = icmp slt %T0_19 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend19
+; COST: cost of 50 {{.*}} select
+  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
+  store %T0_19 %r, %T0_19* %storeaddr
+  ret void
+}
+%T0_20 = type <16 x i64>
+%T1_20 = type <16 x i1>
+; CHECK: func_blend20:
+define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
+                           %T1_20* %blend, %T0_20* %storeaddr) {
+  %v0 = load %T0_20* %loadaddr
+  %v1 = load %T0_20* %loadaddr2
+  %c = icmp slt %T0_20 %v0, %v1
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; CHECK: strb
+; COST: func_blend20
+; COST: cost of 100 {{.*}} select
+  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
+  store %T0_20 %r, %T0_20* %storeaddr
+  ret void
+}
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 364d44b7116f..e1f3e8890724 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -3,7 +3,7 @@
 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst1.8 {d16}, [r0, :64]
+;CHECK: vst1.8 {d16}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
@@ -61,7 +61,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.8 {d16, d17}, [r0, :64]
+;CHECK: vst1.8 {d16, d17}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
@@ -70,7 +70,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.16 {d16, d17}, [r0, :128]
+;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
@@ -80,7 +80,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;Check for a post-increment updating store with register increment.
 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK: vst1Qi16_update:
-;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+;CHECK: vst1.16 {d16, d17}, [r1:64], r2
 	%A = load i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index fb05a20f6695..a31f8635fe3b 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -3,7 +3,7 @@
 define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst2.8 {d16, d17}, [r0, :64]
+;CHECK: vst2.8 {d16, d17}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
@@ -24,7 +24,7 @@ define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst2.16 {d16, d17}, [r0, :128]
+;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
@@ -52,7 +52,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst1.64 {d16, d17}, [r0, :128]
+;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
@@ -62,7 +62,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK: vst2i64_update:
-;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+;CHECK: vst1.64 {d16, d17}, [r1:64]!
 	%A = load i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
@@ -75,7 +75,7 @@ define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
@@ -84,7 +84,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2Qi16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
@@ -94,7 +94,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2Qi32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index f117ab205d41..281bb730feb7 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -4,7 +4,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
@@ -54,7 +54,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst3i64:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
@@ -65,8 +65,8 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst3Qi8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
-;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index e94acb66bf2e..7dedb2fafee2 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -3,7 +3,7 @@
 define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
@@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store with register increment.
 define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK: vst4i8_update:
-;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
@@ -24,7 +24,7 @@ define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
@@ -34,7 +34,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
@@ -53,7 +53,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst4i64:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>* %B
 	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
@@ -63,8 +63,8 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst4Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
-;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
-;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = load <16 x i8>* %B
 	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 758b355736d0..67f251f70689 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -26,7 +26,7 @@ define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vst1.16 {d16[2]}, [r0, :16]
+;CHECK: vst1.16 {d16[2]}, [r0:16]
 	%tmp1 = load <4 x i16>* %B
         %tmp2 = extractelement <4 x i16> %tmp1, i32 2
         store i16 %tmp2, i16* %A, align 8
@@ -36,7 +36,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst1.32 {d16[1]}, [r0, :32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x i32>* %B
         %tmp2 = extractelement <2 x i32> %tmp1, i32 1
         store i32 %tmp2, i32* %A, align 8
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0, :32]
+;CHECK: vst1.32 {d16[1]}, [r0:32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -64,7 +64,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 
 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1laneQi16:
-;CHECK: vst1.16 {d17[1]}, [r0, :16]
+;CHECK: vst1.16 {d17[1]}, [r0:16]
 	%tmp1 = load <8 x i16>* %B
         %tmp2 = extractelement <8 x i16> %tmp1, i32 5
         store i16 %tmp2, i16* %A, align 8
@@ -74,7 +74,7 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32:
 ; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {d17[1]}, [r0, :32]
+; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
 	%tmp1 = load <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
@@ -85,7 +85,7 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK: vst1laneQi32_update:
 ; // Can use scalar load. No need to use vectors.
-; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]!
+; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
 	%A = load i32** %ptr
 	%tmp1 = load <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
@@ -108,7 +108,7 @@ define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
 define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
-;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
@@ -117,7 +117,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2lanei16:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
@@ -168,7 +168,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2laneQi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
@@ -283,7 +283,7 @@ declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x f
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	ret void
@@ -292,7 +292,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check for a post-increment updating store.
 define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vst4lanei8_update:
-;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
 	%A = load i8** %ptr
 	%tmp1 = load <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
@@ -313,7 +313,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4lanei32:
 ;Check the alignment value.  Max for this instruction is 128 bits:
-;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
@@ -332,7 +332,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4laneQi16:
 ;Check the alignment value.  Max for this instruction is 64 bits:
-;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 679e3f434733..1efbc73650d8 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-code-place | FileCheck %s
+; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
 ; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
diff --git a/test/CodeGen/ARM/zextload_demandedbits.ll b/test/CodeGen/ARM/zextload_demandedbits.ll
new file mode 100644
index 000000000000..3d3269cae236
--- /dev/null
+++ b/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mtriple="thumbv7-apple-ios3.0.0" | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+
+%struct.eggs = type { %struct.spam, i16 }
+%struct.spam = type { [3 x i32] }
+%struct.barney = type { [2 x i32], [2 x i32] }
+
+; Make sure that the sext op does not get lost due to ComputeMaskedBits.
+; CHECK: quux
+; CHECK: lsl
+; CHECK: asr
+; CHECK: bl
+; CHECK: pop
+define void @quux(%struct.eggs* %arg) {
+bb:
+  %tmp1 = getelementptr inbounds %struct.eggs* %arg, i32 0, i32 1
+  %0 = load i16* %tmp1, align 2
+  %tobool = icmp eq i16 %0, 0
+  br i1 %tobool, label %bb16, label %bb3
+
+bb3:                                              ; preds = %bb
+  %tmp4 = bitcast i16* %tmp1 to i8*
+  %tmp5 = ptrtoint i16* %tmp1 to i32
+  %tmp6 = shl i32 %tmp5, 20
+  %tmp7 = ashr exact i32 %tmp6, 20
+  %tmp14 = getelementptr inbounds %struct.barney* undef, i32 %tmp7
+  %tmp15 = tail call i32 @widget(%struct.barney* %tmp14, i8* %tmp4, i32 %tmp7)
+  br label %bb16
+
+bb16:                                             ; preds = %bb3, %bb
+  ret void
+}
+
+declare i32 @widget(%struct.barney*, i8*, i32)
diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll
index 16e9798481fe..71fea12d9c2c 100644
--- a/test/CodeGen/CPP/2007-06-16-Funcname.ll
+++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -5,4 +5,3 @@ define void @foo() {
   ret void
 }
 
-
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
deleted file mode 100644
index 35422311c574..000000000000
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=cellspu -o - | grep brz
-; PR3274
-
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-	%struct.anon = type { i64 }
-	%struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon }
-
-define double @__floatunsidf(i32 %arg_a) nounwind {
-entry:
-	%in = alloca %struct.fp_number_type, align 16
-	%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
-	store i32 0, i32* %0, align 4
-	%1 = icmp eq i32 %arg_a, 0
-	%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
-	br i1 %1, label %bb, label %bb1
-
-bb:		; preds = %entry
-	store i32 2, i32* %2, align 8
-	br label %bb7
-
-bb1:		; preds = %entry
-	ret double 0.0
-
-bb7:		; preds = %bb5, %bb1, %bb
-	ret double 1.0
-}
-
-; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
-
-declare double @__pack_d(%struct.fp_number_type*)
diff --git a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 401399face9a..000000000000
--- a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
deleted file mode 100644
index 4203e91068d0..000000000000
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and    %t1.s | count 234
-; RUN: grep andc   %t1.s | count 85
-; RUN: grep andi   %t1.s | count 37
-; RUN: grep andhi  %t1.s | count 30
-; RUN: grep andbi  %t1.s | count 4
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; AND instruction generation:
-define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg1, %arg2
-        ret <4 x i32> %A
-}
-
-define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg2, %arg1
-        ret <4 x i32> %A
-}
-
-define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg1, %arg2
-        ret <8 x i16> %A
-}
-
-define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg2, %arg1
-        ret <8 x i16> %A
-}
-
-define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg2, %arg1
-        ret <16 x i8> %A
-}
-
-define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg1, %arg2
-        ret <16 x i8> %A
-}
-
-define i32 @and_i32_1(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define i32 @and_i32_2(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i16 @and_i16_1(i16 %arg1, i16 %arg2) {
-        %A = and i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define i16 @and_i16_2(i16 %arg1, i16 %arg2) {
-        %A = and i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i8 @and_i8_1(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg2, %arg1
-        ret i8 %A
-}
-
-define i8 @and_i8_2(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg1, %arg2
-        ret i8 %A
-}
-
-; ANDC instruction generation:
-define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %arg1, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %arg2, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %arg2
-        ret <4 x i32> %B
-}
-
-define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %arg1, %A
-        ret <8 x i16> %B
-}
-
-define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %arg2, %A
-        ret <8 x i16> %B
-}
-
-define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %arg2, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %arg1, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %arg1
-        ret <16 x i8> %B
-}
-
-define i32 @andc_i32_1(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = and i32 %A, %arg1
-        ret i32 %B
-}
-
-define i32 @andc_i32_2(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg1, -1
-        %B = and i32 %A, %arg2
-        ret i32 %B
-}
-
-define i32 @andc_i32_3(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = and i32 %arg1, %A
-        ret i32 %B
-}
-
-define i16 @andc_i16_1(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = and i16 %A, %arg1
-        ret i16 %B
-}
-
-define i16 @andc_i16_2(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg1, -1
-        %B = and i16 %A, %arg2
-        ret i16 %B
-}
-
-define i16 @andc_i16_3(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = and i16 %arg1, %A
-        ret i16 %B
-}
-
-define i8 @andc_i8_1(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = and i8 %A, %arg1
-        ret i8 %B
-}
-
-define i8 @andc_i8_2(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg1, -1
-        %B = and i8 %A, %arg2
-        ret i8 %B
-}
-
-define i8 @andc_i8_3(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = and i8 %arg1, %A
-        ret i8 %B
-}
-
-; ANDI instruction generation (i32 data type):
-define <4 x i32> @andi_v4i32_1(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_2(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_3(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
-        %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
-        ret <4 x i32> %tmp2
-}
-
-define zeroext i32 @andi_u32(i32 zeroext  %in)   {
-        %tmp37 = and i32 %in, 37
-        ret i32 %tmp37
-}
-
-define signext i32 @andi_i32(i32 signext  %in)   {
-        %tmp38 = and i32 %in, 37
-        ret i32 %tmp38
-}
-
-define i32 @andi_i32_1(i32 %in) {
-        %tmp37 = and i32 %in, 37
-        ret i32 %tmp37
-}
-
-; ANDHI instruction generation (i16 data type):
-define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
-                                     i16 511, i16 511, i16 511, i16 511 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
-                                     i16 510, i16 510, i16 510, i16 510 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
-                                     i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
-        %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
-                                     i16 -512, i16 -512, i16 -512, i16 -512 >
-        ret <8 x i16> %tmp2
-}
-
-define zeroext i16 @andhi_u16(i16 zeroext  %in)   {
-        %tmp37 = and i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp37
-}
-
-define signext i16 @andhi_i16(i16 signext  %in)   {
-        %tmp38 = and i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp38
-}
-
-; i8 data type (s/b ANDBI if 8-bit registers were supported):
-define <16 x i8> @and_v16i8(<16 x i8> %in) {
-        ; ANDBI generated for vector types
-        %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                     i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                     i8 42, i8 42, i8 42, i8 42 >
-        ret <16 x i8> %tmp2
-}
-
-define zeroext i8 @and_u8(i8 zeroext  %in)   {
-        ; ANDBI generated:
-        %tmp37 = and i8 %in, 37
-        ret i8 %tmp37
-}
-
-define signext i8 @and_sext8(i8 signext  %in)   {
-        ; ANDBI generated
-        %tmp38 = and i8 %in, 37
-        ret i8 %tmp38
-}
-
-define i8 @and_i8(i8 %in) {
-        ; ANDBI generated
-        %tmp38 = and i8 %in, 205
-        ret i8 %tmp38
-}
diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll
deleted file mode 100644
index 7410b724d6fc..000000000000
--- a/test/CodeGen/CellSPU/arg_ret.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Test parameter passing and return values
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-
-; this fits into registers r3-r74
-%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
-                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32}
-define ccc i32 @test_regs( %paramstruct %prm )
-{
-;CHECK:	lr	$3, $74
-;CHECK:	bi	$lr
-  %1 = extractvalue %paramstruct %prm, 71
-  ret i32 %1
-}
-
-define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
-{
-;CHECK-NOT:	a	$3, $74, $75
-  %1 = extractvalue %paramstruct %prm, 71
-  %2 = add i32 %1, %stackprm
-  ret i32 %2
-}
-
-define ccc %paramstruct @test_return( i32 %param,  %paramstruct %prm )
-{
-;CHECK:  lqd	{{\$[0-9]+}}, 80($sp)
-;CHECK-NOT:	ori	{{\$[0-9]+, \$[0-9]+, 0}}
-;CHECK:  lr    $3, $4
-  ret %paramstruct %prm
-}
-
diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll
deleted file mode 100644
index 63293e2aecb1..000000000000
--- a/test/CodeGen/CellSPU/bigstack.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep lqx   %t1.s | count 3
-; RUN: grep il    %t1.s | grep -v file | count 5
-; RUN: grep stqx  %t1.s | count 1
-
-define i32 @bigstack() nounwind {
-entry:
-  %avar = alloca i32                            
-  %big_data = alloca [2048 x i32]                
-  store i32 3840, i32* %avar, align 4
-  br label %return
-
-return:                                          
-  %retval = load i32* %avar                
-  ret i32 %retval
-}
-
diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll
deleted file mode 100644
index 327800d09cbf..000000000000
--- a/test/CodeGen/CellSPU/bss.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-@bssVar = global i32 zeroinitializer
-; CHECK: .section .bss
-; CHECK-NEXT: .globl
-
-@localVar= internal global i32 zeroinitializer
-; CHECK-NOT: .lcomm
-; CHECK: .local
-; CHECK-NEXT: .comm
-
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
deleted file mode 100644
index 11cf770145ba..000000000000
--- a/test/CodeGen/CellSPU/call.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @main() {
-entry:
-  %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
-  call void @extern_stub_1(i32 %a, i32 4)
-  ret i32 %a
-}
-
-declare void @extern_stub_1(i32, i32)
-
-define i32 @stub_1(i32 %x, float %y) {
- ; CHECK: il $3, 0
- ; CHECK: bi $lr 
-entry:
-  ret i32 0
-}
-
-; vararg call: ensure that all caller-saved registers are spilled to the
-; stack:
-define i32 @stub_2(...) {
-entry:
-  ret i32 0
-}
-
-; check that struct is passed in r3->
-; assert this by changing the second field in the struct
-%0 = type { i32, i32, i32 }
-declare %0 @callee()
-define %0 @test_structret()
-{
-;CHECK:	stqd	$lr, 16($sp)
-;CHECK:	stqd	$sp, -48($sp)
-;CHECK:	ai	$sp, $sp, -48
-;CHECK:	brasl	$lr, callee
-  %rv = call %0 @callee()
-;CHECK: ai	$4, $4, 1
-;CHECK: lqd	$lr, 64($sp)
-;CHECK:	ai	$sp, $sp, 48
-;CHECK:	bi	$lr
-  %oldval = extractvalue %0 %rv, 1
-  %newval = add i32 %oldval,1
-  %newrv = insertvalue %0 %rv, i32 %newval, 1
-  ret %0 %newrv
-}
-
diff --git a/test/CodeGen/CellSPU/crash.ll b/test/CodeGen/CellSPU/crash.ll
deleted file mode 100644
index cc2ab71db3b3..000000000000
--- a/test/CodeGen/CellSPU/crash.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc %s -march=cellspu -o -
-declare i8 @return_i8()
-declare i16 @return_i16()
-define void @testfunc() {
- %rv1 = call i8 @return_i8()
- %rv2 = call i16 @return_i16()
- ret void
-}
-\ No newline at end of file
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
deleted file mode 100644
index e1a6cd829260..000000000000
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep cntb    %t1.s | count 3
-; RUN: grep andi    %t1.s | count 3
-; RUN: grep rotmi   %t1.s | count 2
-; RUN: grep rothmi  %t1.s | count 1
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare i8 @llvm.ctpop.i8(i8)
-declare i16 @llvm.ctpop.i16(i16)
-declare i32 @llvm.ctpop.i32(i32)
-
-define i32 @test_i8(i8 %X) {
-        call i8 @llvm.ctpop.i8(i8 %X)
-        %Y = zext i8 %1 to i32
-        ret i32 %Y
-}
-
-define i32 @test_i16(i16 %X) {
-        call i16 @llvm.ctpop.i16(i16 %X)
-        %Y = zext i16 %1 to i32
-        ret i32 %Y
-}
-
-define i32 @test_i32(i32 %X) {
-        call i32 @llvm.ctpop.i32(i32 %X)
-        %Y = bitcast i32 %1 to i32
-        ret i32 %Y
-}
-
diff --git a/test/CodeGen/CellSPU/div_ops.ll b/test/CodeGen/CellSPU/div_ops.ll
deleted file mode 100644
index 0c93d83ca76d..000000000000
--- a/test/CodeGen/CellSPU/div_ops.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc --march=cellspu %s -o - | FileCheck %s
-
-; signed division rounds towards zero, rotma don't.
-define i32 @sdivide (i32 %val )
-{
-; CHECK: rotmai
-; CHECK: rotmi
-; CHECK: a
-; CHECK: rotmai
-; CHECK: bi $lr
-   %rv = sdiv i32 %val, 4
-   ret i32 %rv
-}
-
-define i32 @udivide (i32 %val )
-{
-; CHECK: rotmi
-; CHECK: bi $lr
-   %rv = udiv i32 %val, 4
-   ret i32 %rv
-}
-
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
deleted file mode 100644
index 66bff3eb7835..000000000000
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep dfa    %t1.s | count 2
-; RUN: grep dfs    %t1.s | count 2
-; RUN: grep dfm    %t1.s | count 6
-; RUN: grep dfma   %t1.s | count 2
-; RUN: grep dfms   %t1.s | count 2
-; RUN: grep dfnms  %t1.s | count 4
-;
-; This file includes double precision floating point arithmetic instructions
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define double @fadd(double %arg1, double %arg2) {
-        %A = fadd double %arg1, %arg2
-        ret double %A
-}
-
-define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = fadd <2 x double> %arg1, %arg2
-        ret <2 x double> %A
-}
-
-define double @fsub(double %arg1, double %arg2) {
-        %A = fsub double %arg1,  %arg2
-        ret double %A
-}
-
-define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = fsub <2 x double> %arg1,  %arg2
-        ret <2 x double> %A
-}
-
-define double @fmul(double %arg1, double %arg2) {
-        %A = fmul double %arg1,  %arg2
-        ret double %A
-}
-
-define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        ret <2 x double> %A
-}
-
-define double @fma(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fadd double %A, %arg3
-        ret double %B
-}
-
-define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        %B = fadd <2 x double> %A, %arg3
-        ret <2 x double> %B
-}
-
-define double @fms(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fsub double %A, %arg3
-        ret double %B
-}
-
-define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        %B = fsub <2 x double> %A, %arg3
-        ret <2 x double> %B
-}
-
-; - (a * b - c)
-define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fsub double %A, %arg3
-        %C = fsub double -0.000000e+00, %B               ; <double> [#uses=1]
-        ret double %C
-}
-
-; Annother way of getting fnms
-; - ( a * b ) + c => c - (a * b)
-define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
-        %A = fmul double %arg1,  %arg2
-        %B = fsub double %arg3, %A
-        ret double %B
-}
-
-; FNMS: - (a * b - c) => c - (a * b)
-define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2
-        %B = fsub <2 x double> %arg3, %A
-        ret <2 x double> %B
-}
-
-; Another way to get fnms using a constant vector
-; - ( a * b - c)
-define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
-        %A = fmul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
-        %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
-        %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
-        ret <2 x double> %C
-}
-
-;define double @fdiv_1(double %arg1, double %arg2) {
-;       %A = fdiv double %arg1,  %arg2  ; <double> [#uses=1]
-;       ret double %A
-;}
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
deleted file mode 100644
index 79676814f282..000000000000
--- a/test/CodeGen/CellSPU/eqv.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep eqv  %t1.s | count 18
-; RUN: grep xshw %t1.s | count 6
-; RUN: grep xsbh %t1.s | count 3
-; RUN: grep andi %t1.s | count 3
-
-; Test the 'eqv' instruction, whose boolean expression is:
-; (a & b) | (~a & ~b), which simplifies to
-; (a & b) | ~(a | b)
-; Alternatively, a ^ ~b, which the compiler will also match.
-
-; ModuleID = 'eqv.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg1, %arg2
-        %B = or <4 x i32> %arg1, %arg2
-        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %C = or <4 x i32> %A, %Bnot
-        ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
-        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
-        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
-        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
-        ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
-        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
-        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
-        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
-        ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %C = xor <4 x i32> %arg1, %arg2not
-        ret <4 x i32> %C
-}
-
-define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
-        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
-        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
-        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
-        ret i32 %C
-}
-
-define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
-        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
-        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
-        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
-        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
-        ret i32 %C
-}
-
-define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
-        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
-        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
-        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
-        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
-        ret i32 %C
-}
-
-define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
-        %arg2not = xor i32 %arg2, -1
-        %C = xor i32 %arg1, %arg2not
-        ret i32 %C
-}
-
-define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
-        %arg1not = xor i32 %arg1, -1
-        %C = xor i32 %arg2, %arg1not
-        ret i32 %C
-}
-
-define signext i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2)  {
-        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
-        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
-        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
-        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
-        ret i16 %C
-}
-
-define signext i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) {
-        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
-        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
-        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
-        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
-        ret i16 %C
-}
-
-define signext i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2)  {
-        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
-        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
-        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
-        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
-        ret i16 %C
-}
-
-define signext i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2)  {
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define signext i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define signext i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2)  {
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2)  {
-        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
-        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
-        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
-        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
-        ret i8 %C
-}
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
deleted file mode 100644
index 0ac971c58c5b..000000000000
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ /dev/null
@@ -1,277 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep shufb   %t1.s | count 39
-; RUN: grep ilhu    %t1.s | count 27
-; RUN: grep iohl    %t1.s | count 27
-; RUN: grep lqa     %t1.s | count 10
-; RUN: grep shlqby  %t1.s | count 12
-; RUN: grep   515   %t1.s | count 1
-; RUN: grep  1029   %t1.s | count 2
-; RUN: grep  1543   %t1.s | count 2
-; RUN: grep  2057   %t1.s | count 2
-; RUN: grep  2571   %t1.s | count 2
-; RUN: grep  3085   %t1.s | count 2
-; RUN: grep  3599   %t1.s | count 2
-; RUN: grep 32768   %t1.s | count 1
-; RUN: grep 32769   %t1.s | count 1
-; RUN: grep 32770   %t1.s | count 1
-; RUN: grep 32771   %t1.s | count 1
-; RUN: grep 32772   %t1.s | count 1
-; RUN: grep 32773   %t1.s | count 1
-; RUN: grep 32774   %t1.s | count 1
-; RUN: grep 32775   %t1.s | count 1
-; RUN: grep 32776   %t1.s | count 1
-; RUN: grep 32777   %t1.s | count 1
-; RUN: grep 32778   %t1.s | count 1
-; RUN: grep 32779   %t1.s | count 1
-; RUN: grep 32780   %t1.s | count 1
-; RUN: grep 32781   %t1.s | count 1
-; RUN: grep 32782   %t1.s | count 1
-; RUN: grep 32783   %t1.s | count 1
-; RUN: grep 32896   %t1.s | count 24
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @i32_extract_0(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 0
-  ret i32 %a
-}
-
-define i32 @i32_extract_1(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 1
-  ret i32 %a
-}
-
-define i32 @i32_extract_2(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 2
-  ret i32 %a
-}
-
-define i32 @i32_extract_3(<4 x i32> %v) {
-entry:
-  %a = extractelement <4 x i32> %v, i32 3
-  ret i32 %a
-}
-
-define i16 @i16_extract_0(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 0
-  ret i16 %a
-}
-
-define i16 @i16_extract_1(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 1
-  ret i16 %a
-}
-
-define i16 @i16_extract_2(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 2
-  ret i16 %a
-}
-
-define i16 @i16_extract_3(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 3
-  ret i16 %a
-}
-
-define i16 @i16_extract_4(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 4
-  ret i16 %a
-}
-
-define i16 @i16_extract_5(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 5
-  ret i16 %a
-}
-
-define i16 @i16_extract_6(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 6
-  ret i16 %a
-}
-
-define i16 @i16_extract_7(<8 x i16> %v) {
-entry:
-  %a = extractelement <8 x i16> %v, i32 7
-  ret i16 %a
-}
-
-define i8 @i8_extract_0(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 0
-  ret i8 %a
-}
-
-define i8 @i8_extract_1(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 1
-  ret i8 %a
-}
-
-define i8 @i8_extract_2(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 2
-  ret i8 %a
-}
-
-define i8 @i8_extract_3(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 3
-  ret i8 %a
-}
-
-define i8 @i8_extract_4(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 4
-  ret i8 %a
-}
-
-define i8 @i8_extract_5(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 5
-  ret i8 %a
-}
-
-define i8 @i8_extract_6(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 6
-  ret i8 %a
-}
-
-define i8 @i8_extract_7(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 7
-  ret i8 %a
-}
-
-define i8 @i8_extract_8(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 8
-  ret i8 %a
-}
-
-define i8 @i8_extract_9(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 9
-  ret i8 %a
-}
-
-define i8 @i8_extract_10(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 10
-  ret i8 %a
-}
-
-define i8 @i8_extract_11(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 11
-  ret i8 %a
-}
-
-define i8 @i8_extract_12(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 12
-  ret i8 %a
-}
-
-define i8 @i8_extract_13(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 13
-  ret i8 %a
-}
-
-define i8 @i8_extract_14(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 14
-  ret i8 %a
-}
-
-define i8 @i8_extract_15(<16 x i8> %v) {
-entry:
-  %a = extractelement <16 x i8> %v, i32 15
-  ret i8 %a
-}
-
-;;--------------------------------------------------------------------------
-;; extract element, variable index:
-;;--------------------------------------------------------------------------
-
-define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
-        ret i8 %0
-}
-
-define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <16 x i8> %v, i32 %i
-        ret i8 %0
-}
-
-define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
-        ret i16 %0
-}
-
-define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <8 x i16> %v, i32 %i
-        ret i16 %0
-}
-
-define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
-        ret i32 %0
-}
-
-define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x i32> %v, i32 %i
-        ret i32 %0
-}
-
-define float @extract_varadic_f32(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
-        ret float %0
-}
-
-define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <4 x float> %v, i32 %i
-        ret float %0
-}
-
-define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
-        ret i64 %0
-}
-
-define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x i64> %v, i32 %i
-        ret i64 %0
-}
-
-define double @extract_varadic_f64(i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
-        ret double %0
-}
-
-define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
-entry:
-        %0 = extractelement <2 x double> %v, i32 %i
-        ret double %0
-}
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
deleted file mode 100644
index f6b028dbb88a..000000000000
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s
-
-; Exercise the floating point comparison operators for f32:
-
-declare double @fabs(double)
-declare float @fabsf(float)
-
-define i1 @fcmp_eq(float %arg1, float %arg2) {
-; CHECK: fceq
-; CHECK: bi $lr
-        %A = fcmp oeq float %arg1,  %arg2
-        ret i1 %A
-}
-
-define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
-; CHECK: fcmeq
-; CHECK: bi $lr
-        %1 = call float @fabsf(float %arg1) readnone
-        %2 = call float @fabsf(float %arg2) readnone
-        %3 = fcmp oeq float %1, %2
-        ret i1 %3
-}
-
-define i1 @test_ogt(float %a, float %b) {
-; CHECK: fcgt
-; CHECK: bi $lr
-	%cmp = fcmp ogt float %a, %b
-	ret i1 %cmp
-}
-
-define i1 @test_ugt(float %a, float %b) {
-; CHECK: fcgt
-; CHECK: bi $lr
-	%cmp = fcmp ugt float %a, %b
-	ret i1 %cmp
-}
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
deleted file mode 100644
index 2b61fa6d2dc2..000000000000
--- a/test/CodeGen/CellSPU/fcmp64.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-
-define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
-entry:
-       %A = fcmp oeq double %arg1, %arg2
-       ret i1 %A
-}
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
deleted file mode 100644
index 9921626b79cb..000000000000
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep frest    %t1.s | count 2 
-; RUN: grep -w fi    %t1.s | count 2 
-; RUN: grep -w fm    %t1.s | count 2
-; RUN: grep fma      %t1.s | count 2 
-; RUN: grep fnms     %t1.s | count 4
-; RUN: grep cgti     %t1.s | count 2
-; RUN: grep selb     %t1.s | count 2
-;
-; This file includes standard floating point arithmetic instructions
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @fdiv32(float %arg1, float %arg2) {
-        %A = fdiv float %arg1,  %arg2
-        ret float %A
-}
-
-define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fdiv <4 x float> %arg1,  %arg2
-        ret <4 x float> %A
-}
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
deleted file mode 100644
index 6e01906dae69..000000000000
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep 32768   %t1.s | count 2
-; RUN: grep xor     %t1.s | count 4
-; RUN: grep and     %t1.s | count 2
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define double @fneg_dp(double %X) {
-        %Y = fsub double -0.000000e+00, %X
-        ret double %Y
-}
-
-define <2 x double> @fneg_dp_vec(<2 x double> %X) {
-        %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
-        ret <2 x double> %Y
-}
-
-define float @fneg_sp(float %X) {
-        %Y = fsub float -0.000000e+00, %X
-        ret float %Y
-}
-
-define <4 x float> @fneg_sp_vec(<4 x float> %X) {
-        %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
-                              float -0.000000e+00, float -0.000000e+00>, %X
-        ret <4 x float> %Y
-}
-
-declare double @fabs(double)
-
-declare float @fabsf(float)
-
-define double @fabs_dp(double %X) {
-        %Y = call double @fabs( double %X ) readnone
-        ret double %Y
-}
-
-define float @fabs_sp(float %X) {
-        %Y = call float @fabsf( float %X ) readnone
-        ret float %Y
-}
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
deleted file mode 100644
index 3553cbbf7b5c..000000000000
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep xswd	     %t1.s | count 3
-; RUN: grep xsbh	     %t1.s | count 1
-; RUN: grep xshw	     %t1.s | count 2
-; RUN: grep shufb        %t1.s | count 7
-; RUN: grep cg           %t1.s | count 4
-; RUN: grep addx         %t1.s | count 4
-; RUN: grep fsmbi        %t1.s | count 3
-; RUN: grep il           %t1.s | count 2
-; RUN: grep mpy          %t1.s | count 10
-; RUN: grep mpyh         %t1.s | count 6
-; RUN: grep mpyhhu       %t1.s | count 2
-; RUN: grep mpyu         %t1.s | count 4
-
-; ModuleID = 'stores.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i64 @sext_i64_i8(i8 %a) nounwind {
-  %1 = sext i8 %a to i64
-  ret i64 %1
-}
-
-define i64 @sext_i64_i16(i16 %a) nounwind {
-  %1 = sext i16 %a to i64
-  ret i64 %1
-}
-
-define i64 @sext_i64_i32(i32 %a) nounwind {
-  %1 = sext i32 %a to i64
-  ret i64 %1
-}
-
-define i64 @zext_i64_i8(i8 %a) nounwind {
-  %1 = zext i8 %a to i64
-  ret i64 %1
-}
-
-define i64 @zext_i64_i16(i16 %a) nounwind {
-  %1 = zext i16 %a to i64
-  ret i64 %1
-}
-
-define i64 @zext_i64_i32(i32 %a) nounwind {
-  %1 = zext i32 %a to i64
-  ret i64 %1
-}
-
-define i64 @add_i64(i64 %a, i64 %b) nounwind {
-  %1 = add i64 %a, %b
-  ret i64 %1
-}
-
-define i64 @mul_i64(i64 %a, i64 %b) nounwind {
-  %1 = mul i64 %a, %b
-  ret i64 %1
-}
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
deleted file mode 100644
index 57a2aa894725..000000000000
--- a/test/CodeGen/CellSPU/i8ops.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-
-; ModuleID = 'i8ops.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i8 @add_i8(i8 %a, i8 %b) nounwind {
-  %1 = add i8 %a, %b
-  ret i8 %1
-}
-
-define i8 @add_i8_imm(i8 %a, i8 %b) nounwind {
-  %1 = add i8 %a, 15 
-  ret i8 %1
-}
-
-define i8 @sub_i8(i8 %a, i8 %b) nounwind {
-  %1 = sub i8 %a, %b
-  ret i8 %1
-}
-
-define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind {
-  %1 = sub i8 %a, 15 
-  ret i8 %1
-}
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
deleted file mode 100644
index 853ae1db160f..000000000000
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ /dev/null
@@ -1,574 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i16 integer comparisons:
-define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_select_i16:
-; CHECK:        ceqh
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp eq i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_setcc_i16:
-; CHECK:        ilhu
-; CHECK:        ceqh
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp eq i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_eq_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_select_i16:
-; CHECK:        ceqh
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ne i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        ilhu
-; CHECK:        xorhi
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ne i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ne_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_select_i16:
-; CHECK:        clgth
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_setcc_i16:
-; CHECK:        ilhu
-; CHECK:        clgth
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ugt i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed01_i16:
-; CHECK:        clgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 500
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 0
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed03_i16:
-; CHECK:        clgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 65024
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ugt_immed04_i16:
-; CHECK:        ilh
-; CHECK:        clgth
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_uge_select_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp uge i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_uge_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp uge i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp uge i16 %arg1, <immed> can always be transformed into
-;;       icmp ugt i16 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_select_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ult i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ult i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        clgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        clgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 65534
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        clgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 65024
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ult_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        clgth
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i16 %arg1, 32769
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ule_select_i16:
-; CHECK:        clgth
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ule i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_ule_setcc_i16:
-; CHECK:        clgth
-; CHECK:        ilhu
-; CHECK:        xorhi
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ule i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp ule i16 %arg1, <immed> can always be transformed into
-;;       icmp ult i16 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_select_i16:
-; CHECK:        cgth
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_setcc_i16:
-; CHECK:        ilhu
-; CHECK:        cgth
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sgt i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed01_i16:
-; CHECK:        cgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed02_i16:
-; CHECK:        cgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed03_i16:
-; CHECK:        cgthi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sgt_immed04_i16:
-; CHECK:        ilh
-; CHECK:        ceqh
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp sgt i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sge_select_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sge i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sge_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sge i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sge i16 %arg1, <immed> can always be transformed into
-;;       icmp sgt i16 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_select_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp slt i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_setcc_i16:
-; CHECK:        ceqh
-; CHECK:        cgth
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp slt i16 %arg1, %arg2
-       ret i1 %A
-}
-
-define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed01_i16:
-; CHECK:        ceqhi
-; CHECK:        cgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i16 %arg1, 511
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed02_i16:
-; CHECK:        ceqhi
-; CHECK:        cgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i16 %arg1, -512
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed03_i16:
-; CHECK:        ceqhi
-; CHECK:        cgthi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i16 %arg1, -1
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_slt_immed04_i16:
-; CHECK:        lr
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp slt i16 %arg1, 32768
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sle_select_i16:
-; CHECK:        cgth
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp sle i16 %arg1, %arg2
-       %B = select i1 %A, i16 %val1, i16 %val2
-       ret i16 %B
-}
-
-define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK:      icmp_sle_setcc_i16:
-; CHECK:        cgth
-; CHECK:        ilhu
-; CHECK:        xorhi
-; CHECK:        iohl
-; CHECK:   bi
-
-entry:
-       %A = icmp sle i16 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sle i16 %arg1, <immed> can always be transformed into
-;;       icmp slt i16 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
deleted file mode 100644
index 1794f4cd7b66..000000000000
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ /dev/null
@@ -1,575 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i32 integer comparisons:
-define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_select_i32:
-; CHECK:        ceq
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp eq i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_setcc_i32:
-; CHECK:        ilhu
-; CHECK:        ceq
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp eq i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, -512
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, -1
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_eq_immed04_i32:
-; CHECK:        ila
-; CHECK:        ceq
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_select_i32:
-; CHECK:        ceq
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ne i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_setcc_i32:
-; CHECK:        ceq
-; CHECK:        ilhu
-; CHECK:        xori
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ne i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, -512
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, -1
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ne_immed04_i32:
-; CHECK:        ila
-; CHECK:        ceq
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_select_i32:
-; CHECK:        clgt
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_setcc_i32:
-; CHECK:        ilhu
-; CHECK:        clgt
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ugt i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed01_i32:
-; CHECK:        clgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed02_i32:
-; CHECK:        clgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 4294966784
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed03_i32:
-; CHECK:        clgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 4294967293
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ugt_immed04_i32:
-; CHECK:        ila
-; CHECK:        clgt
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_uge_select_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp uge i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_uge_setcc_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp uge i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp uge i32 %arg1, <immed> can always be transformed into
-;;       icmp ugt i32 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_select_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ult i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_setcc_i32:
-; CHECK:        ceq
-; CHECK:        clgt
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ult i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        clgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        clgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 4294966784
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        clgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 4294967293
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ult_immed04_i32:
-; CHECK:        rotmi
-; CHECK:        ceqi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ule_select_i32:
-; CHECK:        clgt
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ule i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_ule_setcc_i32:
-; CHECK:        clgt
-; CHECK:        ilhu
-; CHECK:        xori
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp ule i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp ule i32 %arg1, <immed> can always be transformed into
-;;       icmp ult i32 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_select_i32:
-; CHECK:        cgt
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_setcc_i32:
-; CHECK:        ilhu
-; CHECK:        cgt
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sgt i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed01_i32:
-; CHECK:        cgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed02_i32:
-; CHECK:        cgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 4294966784
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed03_i32:
-; CHECK:        cgti
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 4294967293
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sgt_immed04_i32:
-; CHECK:        ila
-; CHECK:        cgt
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sge_select_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sge i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sge_setcc_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        ilhu
-; CHECK:        or
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sge i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sge i32 %arg1, <immed> can always be transformed into
-;;       icmp sgt i32 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_select_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp slt i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_setcc_i32:
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        ilhu
-; CHECK:        nor
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp slt i32 %arg1, %arg2
-       ret i1 %A
-}
-
-define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed01_i32:
-; CHECK:        ceqi
-; CHECK:        cgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, 511
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed02_i32:
-; CHECK:        ceqi
-; CHECK:        cgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, -512
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed03_i32:
-; CHECK:        ceqi
-; CHECK:        cgti
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, -1
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_slt_immed04_i32:
-; CHECK:        ila
-; CHECK:        ceq
-; CHECK:        cgt
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i32 %arg1, 32768
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sle_select_i32:
-; CHECK:        cgt
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp sle i32 %arg1, %arg2
-       %B = select i1 %A, i32 %val1, i32 %val2
-       ret i32 %B
-}
-
-define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK:      icmp_sle_setcc_i32:
-; CHECK:        cgt
-; CHECK:        ilhu
-; CHECK:        xori
-; CHECK:        iohl
-; CHECK:        shufb
-
-entry:
-       %A = icmp sle i32 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sle i32 %arg1, <immed> can always be transformed into
-;;       icmp slt i32 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
deleted file mode 100644
index 9dd2cdc0dea9..000000000000
--- a/test/CodeGen/CellSPU/icmp64.ll
+++ /dev/null
@@ -1,146 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceq                                %t1.s | count 20
-; RUN: grep cgti                               %t1.s | count 12
-; RUN: grep cgt                                %t1.s | count 16
-; RUN: grep clgt                               %t1.s | count 12
-; RUN: grep gb                                 %t1.s | count 12
-; RUN: grep fsm                                %t1.s | count 10
-; RUN: grep xori                               %t1.s | count 5
-; RUN: grep selb                               %t1.s | count 18
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; i64 integer comparisons:
-define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp eq i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp eq i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ne i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ne i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ugt i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ugt i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp uge i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp uge i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ult i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ult i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ule i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp ule i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sgt i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sgt i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sge i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sge i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp slt i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp slt i64 %arg1, %arg2
-       ret i1 %A
-}
-
-define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sle i64 %arg1, %arg2
-       %B = select i1 %A, i64 %val1, i64 %val2
-       ret i64 %B
-}
-
-define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
-       %A = icmp sle i64 %arg1, %arg2
-       ret i1 %A
-}
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
deleted file mode 100644
index 1db641e5a853..000000000000
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ /dev/null
@@ -1,446 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i8 integer comparisons:
-define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_select_i8:
-; CHECK:        ceqb
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp eq i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_setcc_i8:
-; CHECK:        ceqb
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp eq i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i8 %arg1, 127
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i8 %arg1, -128
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_eq_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp eq i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_select_i8:
-; CHECK:        ceqb
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ne i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        xorbi
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ne i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i8 %arg1, 127
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i8 %arg1, -128
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ne_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp ne i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ugt_select_i8:
-; CHECK:        clgtb
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ugt i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ugt_setcc_i8:
-; CHECK:        clgtb
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ugt i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ugt_immed01_i8:
-; CHECK:        clgtbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ugt i8 %arg1, 126
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_uge_select_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp uge i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_uge_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        or
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp uge i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp uge i8 %arg1, <immed> can always be transformed into
-;;       icmp ugt i8 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_select_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp ult i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        clgtb
-; CHECK:        nor
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ult i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        clgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i8 %arg1, 253
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ult_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        clgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp ult i8 %arg1, 129
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ule_select_i8:
-; CHECK:        clgtb
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp ule i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_ule_setcc_i8:
-; CHECK:        clgtb
-; CHECK:        xorbi
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp ule i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp ule i8 %arg1, <immed> can always be transformed into
-;;       icmp ult i8 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_select_i8:
-; CHECK:        cgtb
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_setcc_i8:
-; CHECK:        cgtb
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp sgt i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_immed01_i8:
-; CHECK:        cgtbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, 96
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_immed02_i8:
-; CHECK:        cgtbi
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sgt_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        selb $3, $4, $5, $3
-
-entry:
-       %A = icmp sgt i8 %arg1, -128
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sge_select_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        or
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp sge i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sge_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        or
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp sge i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sge i8 %arg1, <immed> can always be transformed into
-;;       icmp sgt i8 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_select_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        nor
-; CHECK:        selb $3, $6, $5, $3
-
-entry:
-       %A = icmp slt i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_setcc_i8:
-; CHECK:        ceqb
-; CHECK:        cgtb
-; CHECK:        nor
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp slt i8 %arg1, %arg2
-       ret i1 %A
-}
-
-define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_immed01_i8:
-; CHECK:        ceqbi
-; CHECK:        cgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i8 %arg1, 96
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_immed02_i8:
-; CHECK:        ceqbi
-; CHECK:        cgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i8 %arg1, -120
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_slt_immed03_i8:
-; CHECK:        ceqbi
-; CHECK:        cgtbi
-; CHECK:        nor
-; CHECK:        selb $3, $5, $4, $3
-
-entry:
-       %A = icmp slt i8 %arg1, -1
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sle_select_i8:
-; CHECK:        cgtb
-; CHECK:        selb $3, $5, $6, $3
-
-entry:
-       %A = icmp sle i8 %arg1, %arg2
-       %B = select i1 %A, i8 %val1, i8 %val2
-       ret i8 %B
-}
-
-define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK:      icmp_sle_setcc_i8:
-; CHECK:        cgtb
-; CHECK:        xorbi
-; CHECK-NEXT:   bi
-
-entry:
-       %A = icmp sle i8 %arg1, %arg2
-       ret i1 %A
-}
-
-;; Note: icmp sle i8 %arg1, <immed> can always be transformed into
-;;       icmp slt i8 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
deleted file mode 100644
index 077d07169e45..000000000000
--- a/test/CodeGen/CellSPU/immed16.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "ilh" %t1.s | count 11
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i16 @test_1() {
-  %x = alloca i16, align 16
-  store i16 419, i16* %x        ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_2() {
-  %x = alloca i16, align 16
-  store i16 1023, i16* %x       ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_3() {
-  %x = alloca i16, align 16
-  store i16 -1023, i16* %x      ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_4() {
-  %x = alloca i16, align 16
-  store i16 32767, i16* %x      ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_5() {
-  %x = alloca i16, align 16
-  store i16 -32768, i16* %x     ;; ILH via pattern
-  ret i16 0
-}
-
-define i16 @test_6() {
-  ret i16 0
-}
-
-
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
deleted file mode 100644
index 8e48f0b52c17..000000000000
--- a/test/CodeGen/CellSPU/immed32.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilhu  %t1.s | count 9
-; RUN: grep iohl  %t1.s | count 7
-; RUN: grep -w il    %t1.s | count 3
-; RUN: grep 16429 %t1.s | count 1
-; RUN: grep 63572 %t1.s | count 1
-; RUN: grep   128 %t1.s | count 1
-; RUN: grep 32639 %t1.s | count 1
-; RUN: grep 65535 %t1.s | count 1
-; RUN: grep 16457 %t1.s | count 1
-; RUN: grep  4059 %t1.s | count 1
-; RUN: grep 49077 %t1.s | count 1
-; RUN: grep  1267 %t1.s | count 2
-; RUN: grep 16309 %t1.s | count 1
-; RUN: cat %t1.s | FileCheck %s
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @test_1() {
-  ret i32 4784128               ;; ILHU via pattern (0x49000)
-}
-
-define i32 @test_2() {
-  ret i32 5308431               ;; ILHU/IOHL via pattern (0x5100f)
-}
-
-define i32 @test_3() {
-  ret i32 511                   ;; IL via pattern
-}
-
-define i32 @test_4() {
-  ret i32 -512                  ;; IL via pattern
-}
-
-define i32 @test_5()
-{
-;CHECK: test_5:
-;CHECK-NOT: ila $3, 40000
-;CHECK: ilhu
-;CHECK: iohl
-;CHECK: bi $lr
-  ret i32 400000
-}
-
-;; double             float       floatval
-;; 0x4005bf0a80000000 0x402d|f854 2.718282
-define float @float_const_1() {
-  ret float 0x4005BF0A80000000  ;; ILHU/IOHL
-}
-
-;; double             float       floatval
-;; 0x3810000000000000 0x0080|0000 0.000000
-define float @float_const_2() {
-  ret float 0x3810000000000000  ;; IL 128
-}
-
-;; double             float       floatval
-;; 0x47efffffe0000000 0x7f7f|ffff NaN
-define float @float_const_3() {
-  ret float 0x47EFFFFFE0000000  ;; ILHU/IOHL via pattern
-}
-
-;; double             float       floatval
-;; 0x400921fb60000000 0x4049|0fdb 3.141593
-define float @float_const_4() {
-  ret float 0x400921FB60000000  ;; ILHU/IOHL via pattern
-}
-
-;; double             float       floatval
-;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214
-define float @float_const_5() {
-  ret float 0xBFF6A09E60000000  ;; ILHU/IOHL via pattern
-}
-
-;; double             float       floatval
-;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214
-define float @float_const_6() {
-  ret float 0x3FF6A09E60000000  ;; ILHU/IOHL via pattern
-}
-
-define float @float_const_7() {
-  ret float 0.000000e+00        ;; IL 0 via pattern
-}
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
deleted file mode 100644
index fd483651756e..000000000000
--- a/test/CodeGen/CellSPU/immed64.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep lqa        %t1.s | count 13
-; RUN: grep ilhu       %t1.s | count 15
-; RUN: grep ila        %t1.s | count 1
-; RUN: grep -w il      %t1.s | count 6
-; RUN: grep shufb      %t1.s | count 13
-; RUN: grep      65520 %t1.s | count  1
-; RUN: grep      43981 %t1.s | count  1
-; RUN: grep      13702 %t1.s | count  1
-; RUN: grep      28225 %t1.s | count  1
-; RUN: grep      30720 %t1.s | count  1
-; RUN: grep 3233857728 %t1.s | count  8
-; RUN: grep 2155905152 %t1.s | count  6
-; RUN: grep      66051 %t1.s | count  7
-; RUN: grep  471670303 %t1.s | count 11
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-;  1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
-; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
-; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
-;              5308431 => 0x 00000000 0051000F
-;  9223372038704560128 => 0x 80000000 6e417800
-
-define i64 @i64_const_1() {
-  ret i64  1311768467750121234          ;; Constant pool spill
-}
-
-define i64 @i64_const_2() {
-  ret i64 18446744073709551591          ;; IL/SHUFB
-}
-
-define i64 @i64_const_3() {
-  ret i64 18446744073708516742          ;; IHLU/IOHL/SHUFB
-}
-
-define i64 @i64_const_4() {
-  ret i64              5308431          ;; ILHU/IOHL/SHUFB
-}
-
-define i64 @i64_const_5() {
-  ret i64                  511          ;; IL/SHUFB
-}
-
-define i64 @i64_const_6() {
-  ret i64                 -512          ;; IL/SHUFB
-}
-
-define i64 @i64_const_7() {
-  ret i64  9223372038704560128          ;; IHLU/IOHL/SHUFB
-}
-
-define i64 @i64_const_8() {
-  ret i64 0                             ;; IL
-}
-
-define i64 @i64_const_9() {
-  ret i64 -1                            ;; IL
-}
-
-define i64 @i64_const_10() {
-  ret i64 281470681808895                ;; IL 65535
-}
-
-; 0x4005bf0a8b145769 ->
-;   (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906])
-;   (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377])
-define double @f64_const_1() {
- ret double 0x4005bf0a8b145769        ;; ILHU/IOHL via pattern
-}
- 
-define double @f64_const_2() {
- ret double 0x0010000000000000
-}
-
-define double @f64_const_3() {
- ret double 0x7fefffffffffffff
-}
-
-define double @f64_const_4() {
- ret double 0x400921fb54442d18
-}
- 
-define double @f64_const_5() {
-  ret double 0xbff6a09e667f3bcd         ;; ILHU/IOHL via pattern
-}
- 
-define double @f64_const_6() {
-  ret double 0x3ff6a09e667f3bcd
-}
-
-define double @f64_const_7() {
-  ret double 0.000000e+00
-}
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
deleted file mode 100644
index 984c017c96d1..000000000000
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep csflt %t1.s | count 5
-; RUN: grep cuflt %t1.s | count 1
-; RUN: grep xshw  %t1.s | count 2
-; RUN: grep xsbh  %t1.s | count 1
-; RUN: grep and   %t1.s | count 2
-; RUN: grep andi  %t1.s | count 1
-; RUN: grep ila   %t1.s | count 1
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @sitofp_i32(i32 %arg1) {
-        %A = sitofp i32 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @uitofp_u32(i32 %arg1) {
-        %A = uitofp i32 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @sitofp_i16(i16 %arg1) {
-        %A = sitofp i16 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @uitofp_i16(i16 %arg1) {
-        %A = uitofp i16 %arg1 to float          ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @sitofp_i8(i8 %arg1) {
-        %A = sitofp i8 %arg1 to float           ; <float> [#uses=1]
-        ret float %A
-}
-
-define float @uitofp_i8(i8 %arg1) {
-        %A = uitofp i8 %arg1 to float           ; <float> [#uses=1]
-        ret float %A
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
deleted file mode 100644
index b0f6a6247e41..000000000000
--- a/test/CodeGen/CellSPU/intrinsics_branch.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceq     %t1.s | count 30 
-; RUN: grep ceqb    %t1.s | count 10
-; RUN: grep ceqhi   %t1.s | count 5
-; RUN: grep ceqi    %t1.s | count 5
-; RUN: grep cgt     %t1.s | count 30
-; RUN: grep cgtb    %t1.s | count 10
-; RUN: grep cgthi   %t1.s | count 5
-; RUN: grep cgti    %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
-
-declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
-
-
-
-define <4 x i32> @test(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
-        call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
-        call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @ceqitest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @ceqhitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @ceqbitest(<16 x i8> %A) {
-        call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
-        call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
-        call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @cgtitest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @cgthitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @cgtbitest(<16 x i8> %A) {
-        call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
-        call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
-        call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
-
-define <4 x i32> @clgtitest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @clgthitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
-
-define <16 x i8> @clgtbitest(<16 x i8> %A) {
-        call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
-        %Y = bitcast <16 x i8> %1 to <16 x i8>
-        ret <16 x i8> %Y
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
deleted file mode 100644
index 81373470d069..000000000000
--- a/test/CodeGen/CellSPU/intrinsics_float.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep fa      %t1.s | count 5
-; RUN: grep fs      %t1.s | count 5
-; RUN: grep fm      %t1.s | count 15
-; RUN: grep fceq    %t1.s | count 5
-; RUN: grep fcmeq   %t1.s | count 5
-; RUN: grep fcgt    %t1.s | count 5
-; RUN: grep fcmgt   %t1.s | count 5
-; RUN: grep fma     %t1.s | count 5
-; RUN: grep fnms    %t1.s | count 5
-; RUN: grep fms     %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
-
-declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
-
-declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
-
-declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x i32> @test(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
-        call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
-        call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
-        call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
-
-define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
-        call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
-        %Y = bitcast <4 x float> %1 to <4 x float>
-        ret <4 x float> %Y
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
deleted file mode 100644
index a29ee4c2405d..000000000000
--- a/test/CodeGen/CellSPU/intrinsics_logical.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and       %t1.s | count 20
-; RUN: grep andc      %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
-        call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <4 x i32> @anditest(<4 x i32> %A) {
-        call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
-        %Y = bitcast <4 x i32> %1 to <4 x i32>
-        ret <4 x i32> %Y
-}
-
-define <8 x i16> @andhitest(<8 x i16> %A) {
-        call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
-        %Y = bitcast <8 x i16> %1 to <8 x i16>
-        ret <8 x i16> %Y
-}
diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll
deleted file mode 100644
index 66c2fdeb51fd..000000000000
--- a/test/CodeGen/CellSPU/jumptable.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s
-; This is to check that emitting jumptables doesn't crash llc
-define i32 @test(i32 %param) {
-entry:
-;CHECK:        ai      {{\$.}}, $3, -1
-;CHECK:        clgti   {{\$., \$.}}, 3
-;CHECK:        brnz    {{\$.}},.LBB0_
-  switch i32 %param, label %bb2 [
-    i32 1, label %bb1
-    i32 2, label %bb2
-    i32 3, label %bb3
-    i32 4, label %bb2
-  ]
-;CHECK-NOT: # BB#2
-bb1:                                            
-  ret i32 1
-bb2:      
-  ret i32 2
-bb3:     
-  ret i32 %param
-}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
deleted file mode 100644
index ea00867701b2..000000000000
--- a/test/CodeGen/CellSPU/lit.local.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-config.suffixes = ['.ll', '.c', '.cpp']
-
-targets = set(config.root.targets_to_build.split())
-if not 'CellSPU' in targets:
-    config.unsupported = True
-
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
deleted file mode 100644
index 4771752f5f4c..000000000000
--- a/test/CodeGen/CellSPU/loads.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'loads.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
-entry:
-	%tmp1 = load <4 x float>* %a
-	ret <4 x float> %tmp1
-; CHECK:	lqd	$3, 0($3)
-}
-
-define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
-entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1
-	%tmp1 = load <4 x float>* %arrayidx
-	ret <4 x float> %tmp1
-; CHECK:	lqd	$3, 16($3)
-}
-
-
-declare <4 x i32>* @getv4f32ptr()
-define <4 x i32> @func() {
-	;CHECK: brasl
-	; we need to have some instruction to move the result to safety.
-	; which instruction (lr, stqd...) depends on the regalloc
-	;CHECK: {{.*}}
-	;CHECK: brasl
-	%rv1 = call <4 x i32>* @getv4f32ptr()
-	%rv2 = call <4 x i32>* @getv4f32ptr()
-	%rv3 = load <4 x i32>* %rv1
-	ret <4 x i32> %rv3
-}
-
-define <4 x float> @load_undef(){
-	; CHECK: lqd	$3, 0($3)
-	%val = load <4 x float>* undef
-	ret <4 x float> %val
-}
-
-;check that 'misaligned' loads that may span two memory chunks
-;have two loads. Don't check for the bitmanipulation, as that 
-;might change with improved algorithms or scheduling 
-define i32 @load_misaligned( i32* %ptr ){
-;CHECK: load_misaligned
-;CHECK: lqd
-;CHECK: lqd
-;CHECK: bi $lr
-  %rv = load i32* %ptr, align 2
-  ret i32 %rv
-}
-
-define <4 x i32> @load_null_vec( ) {
-;CHECK: lqa
-;CHECK: bi $lr
-	%rv = load <4 x i32>* null
-	ret <4 x i32> %rv
-}
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
deleted file mode 100644
index c04e69e3e193..000000000000
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=cellspu
-
-declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-define zeroext i1 @a(i16 %x)  nounwind {
-  %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
-  %obil = extractvalue {i16, i1} %res, 1
-  ret i1 %obil
-}
-
-declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-define zeroext i1 @b(i16 %x)  nounwind {
-  %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
-  %obil = extractvalue {i16, i1} %res, 1
-  ret i1 %obil
-}
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
deleted file mode 100644
index 1e28fc7a918d..000000000000
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep mpy     %t1.s | count 44
-; RUN: grep mpyu    %t1.s | count 4
-; RUN: grep mpyh    %t1.s | count 10
-; RUN: grep mpyhh   %t1.s | count 2
-; RUN: grep rotma   %t1.s | count 12
-; RUN: grep rotmahi %t1.s | count 4
-; RUN: grep and     %t1.s | count 2
-; RUN: grep selb    %t1.s | count 6
-; RUN: grep fsmbi   %t1.s | count 4
-; RUN: grep shli    %t1.s | count 4
-; RUN: grep shlhi   %t1.s | count 4
-; RUN: grep ila     %t1.s | count 2
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; 32-bit multiply instruction generation:
-define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-entry:
-        %A = mul <4 x i32> %arg1, %arg2
-        ret <4 x i32> %A
-}
-
-define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-entry:
-        %A = mul <4 x i32> %arg2, %arg1
-        ret <4 x i32> %A
-}
-
-define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-entry:
-        %A = mul <8 x i16> %arg1, %arg2
-        ret <8 x i16> %A
-}
-
-define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-entry:
-        %A = mul <8 x i16> %arg2, %arg1
-        ret <8 x i16> %A
-}
-
-define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-entry:
-        %A = mul <16 x i8> %arg2, %arg1
-        ret <16 x i8> %A
-}
-
-define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-entry:
-        %A = mul <16 x i8> %arg1, %arg2
-        ret <16 x i8> %A
-}
-
-define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
-entry:
-        %A = mul i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
-entry:
-        %A = mul i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
-entry:
-        %A = mul i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
-entry:
-        %A = mul i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
-entry:
-        %A = mul i8 %arg2, %arg1
-        ret i8 %A
-}
-
-define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
-entry:
-        %A = mul i8 %arg1, %arg2
-        ret i8 %A
-}
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
deleted file mode 100644
index 57ac709c5414..000000000000
--- a/test/CodeGen/CellSPU/nand.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep nand   %t1.s | count 90
-; RUN: grep and    %t1.s | count 94
-; RUN: grep xsbh   %t1.s | count 2
-; RUN: grep xshw   %t1.s | count 4
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg2, %arg1      ; <<4 x i32>> [#uses=1]
-        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = and <4 x i32> %arg1, %arg2      ; <<4 x i32>> [#uses=1]
-        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %B
-}
-
-define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg2, %arg1      ; <<8 x i16>> [#uses=1]
-        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                 i16 -1, i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %B
-}
-
-define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = and <8 x i16> %arg1, %arg2      ; <<8 x i16>> [#uses=1]
-        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                 i16 -1, i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %B
-}
-
-define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg2, %arg1      ; <<16 x i8>> [#uses=1]
-        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = and <16 x i8> %arg1, %arg2      ; <<16 x i8>> [#uses=1]
-        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        ret <16 x i8> %B
-}
-
-define i32 @nand_i32_1(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg2, %arg1            ; <i32> [#uses=1]
-        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
-        ret i32 %B
-}
-
-define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
-        %A = and i32 %arg1, %arg2            ; <i32> [#uses=1]
-        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
-        ret i32 %B
-}
-
-define signext i16 @nand_i16_1(i16 signext  %arg1, i16 signext  %arg2)   {
-        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define signext i16 @nand_i16_2(i16 signext  %arg1, i16 signext  %arg2)   {
-        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define zeroext i16 @nand_i16u_1(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
-        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define zeroext i16 @nand_i16u_2(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
-        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
-        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
-        ret i16 %B
-}
-
-define zeroext i8 @nand_i8u_1(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
-        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define zeroext i8 @nand_i8u_2(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
-        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define signext i8 @nand_i8_1(i8 signext  %arg1, i8 signext  %arg2)   {
-        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define signext i8 @nand_i8_2(i8 signext  %arg1, i8 signext  %arg2) {
-        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define i8 @nand_i8_3(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
-
-define i8 @nand_i8_4(i8 %arg1, i8 %arg2) {
-        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
-        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
-        ret i8 %B
-}
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
deleted file mode 100644
index f329266a3c23..000000000000
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and    %t1.s | count 2
-; RUN: grep orc    %t1.s | count 85
-; RUN: grep ori    %t1.s | count 34
-; RUN: grep orhi   %t1.s | count 30
-; RUN: grep orbi   %t1.s | count 15
-; RUN: FileCheck %s < %t1.s
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; OR instruction generation:
-define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = or <4 x i32> %arg1, %arg2
-        ret <4 x i32> %A
-}
-
-define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = or <4 x i32> %arg2, %arg1
-        ret <4 x i32> %A
-}
-
-define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = or <8 x i16> %arg1, %arg2
-        ret <8 x i16> %A
-}
-
-define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = or <8 x i16> %arg2, %arg1
-        ret <8 x i16> %A
-}
-
-define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = or <16 x i8> %arg2, %arg1
-        ret <16 x i8> %A
-}
-
-define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = or <16 x i8> %arg1, %arg2
-        ret <16 x i8> %A
-}
-
-define i32 @or_i32_1(i32 %arg1, i32 %arg2) {
-        %A = or i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define i32 @or_i32_2(i32 %arg1, i32 %arg2) {
-        %A = or i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i16 @or_i16_1(i16 %arg1, i16 %arg2) {
-        %A = or i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define i16 @or_i16_2(i16 %arg1, i16 %arg2) {
-        %A = or i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i8 @or_i8_1(i8 %arg1, i8 %arg2) {
-        %A = or i8 %arg2, %arg1
-        ret i8 %A
-}
-
-define i8 @or_i8_2(i8 %arg1, i8 %arg2) {
-        %A = or i8 %arg1, %arg2
-        ret i8 %A
-}
-
-; ORC instruction generation:
-define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = or <4 x i32> %arg1, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = or <4 x i32> %arg2, %A
-        ret <4 x i32> %B
-}
-
-define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
-        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = or <4 x i32> %A, %arg2
-        ret <4 x i32> %B
-}
-
-define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = or <8 x i16> %arg1, %A
-        ret <8 x i16> %B
-}
-
-define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = or <8 x i16> %arg2, %A
-        ret <8 x i16> %B
-}
-
-define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = or <16 x i8> %arg2, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = or <16 x i8> %arg1, %A
-        ret <16 x i8> %B
-}
-
-define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
-        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-                                    i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = or <16 x i8> %A, %arg1
-        ret <16 x i8> %B
-}
-
-define i32 @orc_i32_1(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = or i32 %A, %arg1
-        ret i32 %B
-}
-
-define i32 @orc_i32_2(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg1, -1
-        %B = or i32 %A, %arg2
-        ret i32 %B
-}
-
-define i32 @orc_i32_3(i32 %arg1, i32 %arg2) {
-        %A = xor i32 %arg2, -1
-        %B = or i32 %arg1, %A
-        ret i32 %B
-}
-
-define i16 @orc_i16_1(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = or i16 %A, %arg1
-        ret i16 %B
-}
-
-define i16 @orc_i16_2(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg1, -1
-        %B = or i16 %A, %arg2
-        ret i16 %B
-}
-
-define i16 @orc_i16_3(i16 %arg1, i16 %arg2) {
-        %A = xor i16 %arg2, -1
-        %B = or i16 %arg1, %A
-        ret i16 %B
-}
-
-define i8 @orc_i8_1(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = or i8 %A, %arg1
-        ret i8 %B
-}
-
-define i8 @orc_i8_2(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg1, -1
-        %B = or i8 %A, %arg2
-        ret i8 %B
-}
-
-define i8 @orc_i8_3(i8 %arg1, i8 %arg2) {
-        %A = xor i8 %arg2, -1
-        %B = or i8 %arg1, %A
-        ret i8 %B
-}
-
-; ORI instruction generation (i32 data type):
-define <4 x i32> @ori_v4i32_1(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_2(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_3(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
-        %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
-        ret <4 x i32> %tmp2
-}
-
-define zeroext i32 @ori_u32(i32 zeroext  %in)   {
-        %tmp37 = or i32 %in, 37         ; <i32> [#uses=1]
-        ret i32 %tmp37
-}
-
-define signext i32 @ori_i32(i32 signext  %in)   {
-        %tmp38 = or i32 %in, 37         ; <i32> [#uses=1]
-        ret i32 %tmp38
-}
-
-define i32 @ori_i32_600(i32 %in) {
-	;600 does not fit into 'ori' immediate field
-	;CHECK: ori_i32_600
-	;CHECK: il
-	;CHECK: ori
-	%tmp = or i32 %in, 600
-	ret i32 %tmp
-}
-
-; ORHI instruction generation (i16 data type):
-define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
-                                    i16 511, i16 511, i16 511, i16 511 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
-                                    i16 510, i16 510, i16 510, i16 510 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
-                                    i16 -1, i16 -1, i16 -1 >
-        ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
-        %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
-                                    i16 -512, i16 -512, i16 -512, i16 -512 >
-        ret <8 x i16> %tmp2
-}
-
-define zeroext i16 @orhi_u16(i16 zeroext  %in)   {
-        %tmp37 = or i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp37
-}
-
-define signext i16 @orhi_i16(i16 signext  %in)   {
-        %tmp38 = or i16 %in, 37         ; <i16> [#uses=1]
-        ret i16 %tmp38
-}
-
-; ORBI instruction generation (i8 data type):
-define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
-        %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                    i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
-                                    i8 42, i8 42, i8 42, i8 42 >
-        ret <16 x i8> %tmp2
-}
-
-define zeroext i8 @orbi_u8(i8 zeroext  %in)   {
-        %tmp37 = or i8 %in, 37         ; <i8> [#uses=1]
-        ret i8 %tmp37
-}
-
-define signext i8 @orbi_i8(i8 signext  %in)   {
-        %tmp38 = or i8 %in, 37         ; <i8> [#uses=1]
-        ret i8 %tmp38
-}
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
deleted file mode 100644
index 1d933adac939..000000000000
--- a/test/CodeGen/CellSPU/private.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Test to make sure that the 'private' is used correctly.
-;
-; RUN: llc < %s -march=cellspu > %t
-; RUN: grep .Lfoo: %t
-; RUN: grep brsl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep ila.*\.Lbaz %t
-
-define private void @foo() {
-        ret void
-}
-
-@baz = private global i32 4
-
-define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
-}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
deleted file mode 100644
index 977093527609..000000000000
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ /dev/null
@@ -1,172 +0,0 @@
-; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot          %t1.s | count 86
-; RUN: grep roth         %t1.s | count 8
-; RUN: grep roti.*5      %t1.s | count 1
-; RUN: grep roti.*27     %t1.s | count 1
-; RUN: grep rothi.*5      %t1.s | count 2
-; RUN: grep rothi.*11     %t1.s | count 1
-; RUN: grep rothi.*,.3    %t1.s | count 1
-; RUN: grep andhi        %t1.s | count 4
-; RUN: grep shlhi        %t1.s | count 4
-; RUN: cat %t1.s | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; Vector rotates are not currently supported in gcc or llvm assembly. These are
-; not tested.
-
-; 32-bit rotates:
-define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
-        %tmp1 = zext i8 %arg2 to i32    ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
-        %arg22 = sub i8 32, %arg2       ; <i8> [#uses=1]
-        %tmp2 = zext i8 %arg22 to i32   ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
-        %tmp1 = zext i16 %arg2 to i32   ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
-        %arg22 = sub i16 32, %arg2      ; <i8> [#uses=1]
-        %tmp2 = zext i16 %arg22 to i32  ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
-        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
-        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
-        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
-        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
-        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
-        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotr32_1(i32 %A, i8 %Amt) {
-        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
-        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
-        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
-        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
-        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-define i32 @rotr32_2(i32 %A, i8 %Amt) {
-        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
-        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
-        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
-        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
-        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-; Rotate left with immediate
-define i32 @rotli32(i32 %A) {
-        %B = shl i32 %A, 5              ; <i32> [#uses=1]
-        %C = lshr i32 %A, 27            ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-; Rotate right with immediate
-define i32 @rotri32(i32 %A) {
-        %B = lshr i32 %A, 5             ; <i32> [#uses=1]
-        %C = shl i32 %A, 27             ; <i32> [#uses=1]
-        %D = or i32 %B, %C              ; <i32> [#uses=1]
-        ret i32 %D
-}
-
-; 16-bit rotates:
-define i16 @rotr16_1(i16 %arg1, i8 %arg) {
-        %tmp1 = zext i8 %arg to i16             ; <i16> [#uses=1]
-        %B = lshr i16 %arg1, %tmp1              ; <i16> [#uses=1]
-        %arg2 = sub i8 16, %arg                 ; <i8> [#uses=1]
-        %tmp2 = zext i8 %arg2 to i16            ; <i16> [#uses=1]
-        %C = shl i16 %arg1, %tmp2               ; <i16> [#uses=1]
-        %D = or i16 %B, %C                      ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i16 @rotr16_2(i16 %arg1, i16 %arg) {
-        %B = lshr i16 %arg1, %arg       ; <i16> [#uses=1]
-        %tmp1 = sub i16 16, %arg        ; <i16> [#uses=1]
-        %C = shl i16 %arg1, %tmp1       ; <i16> [#uses=1]
-        %D = or i16 %B, %C              ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i16 @rotli16(i16 %A) {
-        %B = shl i16 %A, 5              ; <i16> [#uses=1]
-        %C = lshr i16 %A, 11            ; <i16> [#uses=1]
-        %D = or i16 %B, %C              ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i16 @rotri16(i16 %A) {
-        %B = lshr i16 %A, 5             ; <i16> [#uses=1]
-        %C = shl i16 %A, 11             ; <i16> [#uses=1]
-        %D = or i16 %B, %C              ; <i16> [#uses=1]
-        ret i16 %D
-}
-
-define i8 @rotl8(i8 %A, i8 %Amt) {
-        %B = shl i8 %A, %Amt            ; <i8> [#uses=1]
-        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
-        %C = lshr i8 %A, %Amt2          ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define i8 @rotr8(i8 %A, i8 %Amt) {
-        %B = lshr i8 %A, %Amt           ; <i8> [#uses=1]
-        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
-        %C = shl i8 %A, %Amt2           ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define i8 @rotli8(i8 %A) {
-        %B = shl i8 %A, 5               ; <i8> [#uses=1]
-        %C = lshr i8 %A, 3              ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define i8 @rotri8(i8 %A) {
-        %B = lshr i8 %A, 5              ; <i8> [#uses=1]
-        %C = shl i8 %A, 3               ; <i8> [#uses=1]
-        %D = or i8 %B, %C               ; <i8> [#uses=1]
-        ret i8 %D
-}
-
-define <2 x float> @test1(<4 x float> %param )
-{
-; CHECK: test1
-; CHECK: shufb
-  %el = extractelement <4 x float> %param, i32 1
-  %vec1 = insertelement <1 x float> undef, float %el, i32 0
-  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
-; CHECK: bi $lr
-  ret <2 x float> %rv
-} 
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
deleted file mode 100644
index 65e0aa6fa0b0..000000000000
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ /dev/null
@@ -1,572 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep selb   %t1.s | count 56
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v2i64
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rC, %rB
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rB, %rC
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %C = and <2 x i64> %rB, %rC
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %A, %rA
-        %C = and <2 x i64> %rC, %rB
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rC, %rB
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %C = and <2 x i64> %rB, %rC
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %C = and <2 x i64> %rB, %rC
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
-        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
-        %B = and <2 x i64> %rA, %A
-        %C = and <2 x i64> %rC, %rB
-        %D = or <2 x i64> %C, %B
-        ret <2 x i64> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v4i32
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rC, %rB
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %rA
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rB, %rC
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %rA
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
-        %B = and <4 x i32> %A, %rA
-        %C = and <4 x i32> %rB, %rC
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %A, %rA
-        %C = and <4 x i32> %rC, %rB
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rC, %rB
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %C = and <4 x i32> %rB, %rC
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %C = and <4 x i32> %rB, %rC
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
-        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
-        %B = and <4 x i32> %rA, %A
-        %C = and <4 x i32> %rC, %rB
-        %D = or <4 x i32> %C, %B
-        ret <4 x i32> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v8i16
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rC, %rB
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rB, %rC
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %C = and <8 x i16> %rB, %rC
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %A, %rA
-        %C = and <8 x i16> %rC, %rB
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rC, %rB
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %C = and <8 x i16> %rB, %rC
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %C = and <8 x i16> %rB, %rC
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
-        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
-                                  i16 -1, i16 -1, i16 -1, i16 -1 >
-        %B = and <8 x i16> %rA, %A
-        %C = and <8 x i16> %rC, %rB
-        %D = or <8 x i16> %C, %B
-        ret <8 x i16> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v16i8
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rC, %rB
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rB, %rC
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %C = and <16 x i8> %rB, %rC
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %A, %rA
-        %C = and <16 x i8> %rC, %rB
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rC, %rB
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %C = and <16 x i8> %rB, %rC
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %C = and <16 x i8> %rB, %rC
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
-        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1,
-                                  i8 -1, i8 -1, i8 -1, i8 -1 >
-        %B = and <16 x i8> %rA, %A
-        %C = and <16 x i8> %rC, %rB
-        %D = or <16 x i8> %C, %B
-        ret <16 x i8> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i32
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rC, %rB
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rB, %rC
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %C = and i32 %rB, %rC
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %A, %rA
-        %C = and i32 %rC, %rB
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rC, %rB
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
-        %C = and i32 %rB, %rC
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %C = and i32 %rB, %rC
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
-        %A = xor i32 %rC, -1
-        %B = and i32 %rA, %A
-        %C = and i32 %rC, %rB
-        %D = or i32 %C, %B
-        ret i32 %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i16
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rC, %rB
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rB, %rC
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %C = and i16 %rB, %rC
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %A, %rA
-        %C = and i16 %rC, %rB
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rC, %rB
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
-        %C = and i16 %rB, %rC
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %C = and i16 %rB, %rC
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
-        %A = xor i16 %rC, -1
-        %B = and i16 %rA, %A
-        %C = and i16 %rC, %rB
-        %D = or i16 %C, %B
-        ret i16 %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i8
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rC, %rB
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rB, %rC
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %C = and i8 %rB, %rC
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %A, %rA
-        %C = and i8 %rC, %rB
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rC, %rB
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
-        %C = and i8 %rB, %rC
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %C = and i8 %rB, %rC
-        %D = or i8 %C, %B
-        ret i8 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
-        %A = xor i8 %rC, -1
-        %B = and i8 %rA, %A
-        %C = and i8 %rC, %rB
-        %D = or i8 %C, %B
-        ret i8 %D
-}
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
deleted file mode 100644
index 6ae9aa51202f..000000000000
--- a/test/CodeGen/CellSPU/sext128.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s 
-
-; ModuleID = 'sext128.bc'
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-
-define i128 @sext_i64_i128(i64 %a) {
-entry:
-        %0 = sext i64 %a to i128
-        ret i128 %0
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK:	long	66051
-; CHECK: 	long	67438087
-; CHECK-NOT: rotqmbyi
-; CHECK:	lqa
-; CHECK: 	rotmai
-; CHECK:	shufb
-}
-
-define i128 @sext_i32_i128(i32 %a) {
-entry:
-        %0 = sext i32 %a to i128
-        ret i128 %0
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK:	long	66051
-; CHECK-NOT: rotqmbyi
-; CHECK:	lqa
-; CHECK: 	rotmai
-; CHECK:	shufb
-}
-
-define i128 @sext_i32_i128a(float %a) {
-entry:
-  %0 = call i32 @myfunc(float %a)
-  %1 = sext i32 %0 to i128
-  ret i128 %1
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK: 	long	269488144
-; CHECK:	long	66051
-; CHECK-NOT: rotqmbyi
-; CHECK:	lqa
-; CHECK: 	rotmai
-; CHECK:	shufb
-}
-
-declare i32 @myfunc(float)
-
-define i128 @func1(i8 %u) {
-entry:
-; CHECK: xsbh
-; CHECK: xshw
-; CHECK: rotmai
-; CHECK: shufb
-; CHECK: bi $lr
-      %0 = sext i8 %u to i128
-      ret i128 %0
-}
-
-define i128 @func2(i16 %u) {
-entry:
-; CHECK: xshw
-; CHECK: rotmai
-; CHECK: shufb
-; CHECK: bi $lr
-      %0 = sext i16 %u to i128
-      ret i128 %0
-}
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
deleted file mode 100644
index 1ccc356dcf5a..000000000000
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ /dev/null
@@ -1,348 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "shlh	"  %t1.s | count 10
-; RUN: grep "shlhi	"  %t1.s | count 3
-; RUN: grep "shl	"  %t1.s | count 10
-; RUN: grep "shli	"  %t1.s | count 3
-; RUN: grep "xshw	"  %t1.s | count 5
-; RUN: grep "and	"  %t1.s | count 15
-; RUN: grep "andi	"  %t1.s | count 4
-; RUN: grep "rotmi	"  %t1.s | count 4
-; RUN: grep "rotqmbyi	"  %t1.s | count 1
-; RUN: grep "rotqmbii	"  %t1.s | count 2
-; RUN: grep "rotqmby	"  %t1.s | count 1
-; RUN: grep "rotqmbi	"  %t1.s | count 2
-; RUN: grep "rotqbyi	"  %t1.s | count 1
-; RUN: grep "rotqbii	"  %t1.s | count 2
-; RUN: grep "rotqbybi	"  %t1.s | count 1
-; RUN: grep "sfi	"  %t1.s | count 6
-; RUN: cat %t1.s | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; Shift left i16 via register, note that the second operand to shl is promoted
-; to a 32-bit type:
-
-define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
-        %A = shl i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
-        %A = shl i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define signext i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) {
-        %A = shl i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define signext i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) {
-        %A = shl i16 %arg2, %arg1
-        ret i16 %A
-}
-
-define zeroext i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2)  {
-        %A = shl i16 %arg1, %arg2
-        ret i16 %A
-}
-
-define zeroext i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) {
-        %A = shl i16 %arg2, %arg1
-        ret i16 %A
-}
-
-; Shift left i16 with immediate:
-define i16 @shlhi_i16_1(i16 %arg1) {
-        %A = shl i16 %arg1, 12
-        ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define i16 @shlhi_i16_2(i16 %arg1) {
-        %A = shl i16 %arg1, 0
-        ret i16 %A
-}
-
-define i16 @shlhi_i16_3(i16 %arg1) {
-        %A = shl i16 16383, %arg1
-        ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define i16 @shlhi_i16_4(i16 %arg1) {
-        %A = shl i16 0, %arg1
-        ret i16 %A
-}
-
-define signext i16 @shlhi_i16_5(i16 signext %arg1)  {
-        %A = shl i16 %arg1, 12
-        ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define signext i16 @shlhi_i16_6(i16 signext %arg1) {
-        %A = shl i16 %arg1, 0
-        ret i16 %A
-}
-
-define signext i16 @shlhi_i16_7(i16 signext %arg1) {
-        %A = shl i16 16383, %arg1
-        ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define signext i16 @shlhi_i16_8(i16 signext %arg1)  {
-        %A = shl i16 0, %arg1
-        ret i16 %A
-}
-
-define zeroext i16 @shlhi_i16_9(i16 zeroext %arg1)  {
-        %A = shl i16 %arg1, 12
-        ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define zeroext i16 @shlhi_i16_10(i16 zeroext %arg1)  {
-        %A = shl i16 %arg1, 0
-        ret i16 %A
-}
-
-define zeroext i16 @shlhi_i16_11(i16 zeroext %arg1)  {
-        %A = shl i16 16383, %arg1
-        ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define zeroext i16 @shlhi_i16_12(i16 zeroext %arg1)  {
-        %A = shl i16 0, %arg1
-        ret i16 %A
-}
-
-; Shift left i32 via register, note that the second operand to shl is promoted
-; to a 32-bit type:
-
-define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
-        %A = shl i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
-        %A = shl i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define signext i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2)  {
-        %A = shl i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define signext i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2)  {
-        %A = shl i32 %arg2, %arg1
-        ret i32 %A
-}
-
-define zeroext i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2)  {
-        %A = shl i32 %arg1, %arg2
-        ret i32 %A
-}
-
-define zeroext i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2)  {
-        %A = shl i32 %arg2, %arg1
-        ret i32 %A
-}
-
-; Shift left i32 with immediate:
-define i32 @shli_i32_1(i32 %arg1) {
-        %A = shl i32 %arg1, 12
-        ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define i32 @shli_i32_2(i32 %arg1) {
-        %A = shl i32 %arg1, 0
-        ret i32 %A
-}
-
-define i32 @shli_i32_3(i32 %arg1) {
-        %A = shl i32 16383, %arg1
-        ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define i32 @shli_i32_4(i32 %arg1) {
-        %A = shl i32 0, %arg1
-        ret i32 %A
-}
-
-define signext i32 @shli_i32_5(i32 signext %arg1)  {
-        %A = shl i32 %arg1, 12
-        ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define signext i32 @shli_i32_6(i32 signext %arg1) {
-        %A = shl i32 %arg1, 0
-        ret i32 %A
-}
-
-define signext i32 @shli_i32_7(i32 signext %arg1)  {
-        %A = shl i32 16383, %arg1
-        ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define signext i32 @shli_i32_8(i32 signext %arg1) {
-        %A = shl i32 0, %arg1
-        ret i32 %A
-}
-
-define zeroext i32 @shli_i32_9(i32 zeroext %arg1)  {
-        %A = shl i32 %arg1, 12
-        ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define zeroext i32 @shli_i32_10(i32 zeroext %arg1)  {
-        %A = shl i32 %arg1, 0
-        ret i32 %A
-}
-
-define zeroext i32 @shli_i32_11(i32 zeroext %arg1) {
-        %A = shl i32 16383, %arg1
-        ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define zeroext i32 @shli_i32_12(i32 zeroext %arg1) {
-        %A = shl i32 0, %arg1
-        ret i32 %A
-}
-
-;; i64 shift left
-
-define i64 @shl_i64_1(i64 %arg1) {
-	%A = shl i64 %arg1, 9
-	ret i64 %A
-}
-
-define i64 @shl_i64_2(i64 %arg1) {
-	%A = shl i64 %arg1, 3
-	ret i64 %A
-}
-
-define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
-	%1 = zext i32 %shift to i64
-	%2 = shl i64 %arg1, %1
-	ret i64 %2
-}
-
-;; i64 shift right logical (shift 0s from the right)
-
-define i64 @lshr_i64_1(i64 %arg1) {
-	%1 = lshr i64 %arg1, 9
-	ret i64 %1
-}
-
-define i64 @lshr_i64_2(i64 %arg1) {
-	%1 = lshr i64 %arg1, 3
-	ret i64 %1
-}
-
-define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
-	%1 = zext i32 %shift to i64
-	%2 = lshr i64 %arg1, %1
-	ret i64 %2
-}
-
-;; i64 shift right arithmetic (shift 1s from the right)
-
-define i64 @ashr_i64_1(i64 %arg) {
-	%1 = ashr i64 %arg, 9
-	ret i64 %1
-}
-
-define i64 @ashr_i64_2(i64 %arg) {
-	%1 = ashr i64 %arg, 3
-	ret i64 %1
-}
-
-define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
-	%1 = zext i32 %shift to i64
-	%2 = ashr i64 %arg1, %1
-	ret i64 %2
-}
-
-define i32 @hi32_i64(i64 %arg) {
-	%1 = lshr i64 %arg, 32
-	%2 = trunc i64 %1 to i32
-	ret i32 %2
-}
-
-; some random tests
-define i128 @test_lshr_i128( i128 %val ) {
- 	;CHECK: test_lshr_i128
-	;CHECK: sfi
-	;CHECK: rotqmbi
-	;CHECK: rotqmbybi
-	;CHECK: bi $lr
-	%rv = lshr i128 %val, 64
-	ret i128 %rv
-}
-
-;Vector shifts
-define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) {
-;CHECK: shl
-;CHECK: bi $lr
-	%rv = shl <2 x i32> %val, %sh
-	ret <2 x i32> %rv
-}
-
-define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: shl
-;CHECK: bi $lr
-	%rv = shl <4 x i32> %val, %sh
-	ret <4 x i32> %rv
-}
-
-define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: shlh
-;CHECK: bi $lr
-	%rv = shl <8 x i16> %val, %sh
-	ret <8 x i16> %rv
-}
-
-define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: rotm
-;CHECK: bi $lr
-	%rv = lshr <4 x i32> %val, %sh
-	ret <4 x i32> %rv
-}
-
-define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: sfhi
-;CHECK: rothm
-;CHECK: bi $lr
-	%rv = lshr <8 x i16> %val, %sh
-	ret <8 x i16> %rv
-}
-
-define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: rotma
-;CHECK: bi $lr
-	%rv = ashr <4 x i32> %val, %sh
-	ret <4 x i32> %rv
-}
-
-define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: sfhi
-;CHECK: rotmah
-;CHECK: bi $lr
-	%rv = ashr <8 x i16> %val, %sh
-	ret <8 x i16> %rv
-}
-
-define <2 x i64> @special_const() {
-  ret <2 x i64> <i64 4294967295, i64 4294967295>
-}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
deleted file mode 100644
index 973586bf6cf2..000000000000
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
-
-;CHECK: shuffle
-define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
-  ; CHECK: cwd {{\$.}}, 0($sp)
-  ; CHECK: shufb {{\$., \$4, \$3, \$.}}
-  %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
-  ret <4 x float> %val
-}
-
-;CHECK: splat
-define <4 x float> @splat(float %param1) {
-  ; CHECK: lqa
-  ; CHECK: shufb $3
-  ; CHECK: bi
-  %vec = insertelement <1 x float> undef, float %param1, i32 0
-  %val= shufflevector <1 x float> %vec, <1 x float> undef, <4 x i32> <i32 0,i32 0,i32 0,i32 0>
-  ret <4 x float> %val  
-}
-
-;CHECK: test_insert
-define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
-  %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
-;CHECK:	lqa	$6,
-;CHECK:	shufb	$4, $4, $5, $6
-  %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1
-
-;CHECK: cdd	$5, 0($3)
-;CHECK: lqd	$6, 0($3)
-;CHECK: shufb	$4, $4, $6, $5
-;CHECK: stqd	$4, 0($3)
-;CHECK:	bi	$lr
-  store <2 x float> %sl2_17, <2 x float>* %ptr
-  ret void 
-}
-
-;CHECK: test_insert_1
-define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
-;CHECK: cwd     $5, 4($sp)
-;CHECK: shufb   $3, $4, $3, $5
-;CHECK: bi      $lr
-  %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1
-  ret <4 x float> %rv
-}
-
-;CHECK: test_v2i32
-define <2 x i32> @test_v2i32(<4 x i32>%vec)
-{
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
-  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
-  ret <2 x i32> %rv
-}
-
-define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
-{
-  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
-        <4 x i32> <i32 2,i32 3,i32 0, i32 1>
-  ret <4 x i32> %rv
-}
-
-;CHECK: test_v4i32_rot4
-define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
-{
-  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
-        <4 x i32> <i32 1,i32 2,i32 3, i32 0>
-  ret <4 x i32> %rv
-}
-
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
deleted file mode 100644
index 80bf47ccf5d9..000000000000
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ /dev/null
@@ -1,90 +0,0 @@
-; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
-; RUN: grep fa %t1.s | count 2
-; RUN: grep fs %t1.s | count 2
-; RUN: grep fm %t1.s | count 6
-; RUN: grep fma %t1.s | count 2
-; RUN: grep fms %t1.s | count 2
-; RUN: grep fnms %t1.s | count 3
-;
-; This file includes standard floating point arithmetic instructions
-; NOTE fdiv is tested separately since it is a compound operation
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @fp_add(float %arg1, float %arg2) {
-        %A = fadd float %arg1, %arg2     ; <float> [#uses=1]
-        ret float %A
-}
-
-define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fadd <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
-        ret <4 x float> %A
-}
-
-define float @fp_sub(float %arg1, float %arg2) {
-        %A = fsub float %arg1,  %arg2    ; <float> [#uses=1]
-        ret float %A
-}
-
-define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fsub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        ret <4 x float> %A
-}
-
-define float @fp_mul(float %arg1, float %arg2) {
-        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
-        ret float %A
-}
-
-define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
-        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        ret <4 x float> %A
-}
-
-define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = fadd float %A, %arg3        ; <float> [#uses=1]
-        ret float %B
-}
-
-define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = fadd <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
-        ret <4 x float> %B
-}
-
-define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
-        %B = fsub float %A, %arg3        ; <float> [#uses=1]
-        ret float %B
-}
-
-define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
-        %B = fsub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
-        ret <4 x float> %B
-}
-
-; Test the straightforward way of getting fnms
-; c - a * b
-define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2
-        %B = fsub float %arg3, %A
-        ret float %B
-}
-
-; Test another way of getting fnms
-; - ( a *b -c ) = c - a * b
-define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
-        %A = fmul float %arg1,  %arg2
-        %B = fsub float %A, %arg3
-        %C = fsub float -0.0, %B
-        ret float %C
-}
-
-define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
-        %A = fmul <4 x float> %arg1,  %arg2
-        %B = fsub <4 x float> %A, %arg3
-        %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
-        ret <4 x float> %D
-}
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
deleted file mode 100644
index 43f8776a3d46..000000000000
--- a/test/CodeGen/CellSPU/stores.ll
+++ /dev/null
@@ -1,181 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep 'stqd.*0($3)'       %t1.s | count 4
-; RUN: grep 'stqd.*16($3)'      %t1.s | count 4
-; RUN: grep 16256               %t1.s | count 2
-; RUN: grep 16384               %t1.s | count 1
-; RUN: grep 771                 %t1.s | count 4
-; RUN: grep 515                 %t1.s | count 2
-; RUN: grep 1799                %t1.s | count 2
-; RUN: grep 1543                %t1.s | count 5
-; RUN: grep 1029                %t1.s | count 3
-; RUN: grep 'shli.*, 4'         %t1.s | count 4
-; RUN: grep stqx                %t1.s | count 4
-; RUN: grep ilhu                %t1.s | count 11
-; RUN: grep iohl                %t1.s | count 8
-; RUN: grep shufb               %t1.s | count 15
-; RUN: grep frds                %t1.s | count 1
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'stores.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define void @store_v16i8_1(<16 x i8>* %a) nounwind {
-entry:
-	store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
-	ret void
-}
-
-define void @store_v16i8_2(<16 x i8>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <16 x i8>* %a, i32 1
-	store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
-	ret void
-}
-
-define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <16 x i8>* %a, i32 %i
-	store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
-        ret void
-}
-
-define void @store_v8i16_1(<8 x i16>* %a) nounwind {
-entry:
-	store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
-	ret void
-}
-
-define void @store_v8i16_2(<8 x i16>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <8 x i16>* %a, i16 1
-	store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
-	ret void
-}
-
-define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <8 x i16>* %a, i32 %i
-	store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
-        ret void
-}
-
-define void @store_v4i32_1(<4 x i32>* %a) nounwind {
-entry:
-	store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
-	ret void
-}
-
-define void @store_v4i32_2(<4 x i32>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x i32>* %a, i32 1
-	store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
-	ret void
-}
-
-define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <4 x i32>* %a, i32 %i
-        store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
-        ret void
-}
-
-define void @store_v4f32_1(<4 x float>* %a) nounwind {
-entry:
-	store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
-	ret void
-}
-
-define void @store_v4f32_2(<4 x float>* %a) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 1
-	store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
-	ret void
-}
-
-define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
-entry:
-        %arrayidx = getelementptr <4 x float>* %a, i32 %i
-        store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
-        ret void
-}
-
-; Test truncating stores:
-
-define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
-entry:
-	%conv = trunc i16 %val to i8
-	store i8 %conv, i8* %dest
-	ret i8 %conv
-}
-
-define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
-entry:
-	%conv = trunc i32 %val to i8
-	store i8 %conv, i8* %dest
-	ret i8 %conv
-}
-
-define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
-entry:
-	%conv = trunc i32 %val to i16
-	store i16 %conv, i16* %dest
-	ret i16 %conv
-}
-
-define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
-entry:
-	%conv = trunc i64 %val to i8
-	store i8 %conv, i8* %dest
-	ret i8 %conv
-}
-
-define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
-entry:
-	%conv = trunc i64 %val to i16
-	store i16 %conv, i16* %dest
-	ret i16 %conv
-}
-
-define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
-entry:
-	%conv = trunc i64 %val to i32
-	store i32 %conv, i32* %dest
-	ret i32 %conv
-}
-
-define float @tstore_f64_f32(double %val, float* %dest) nounwind {
-entry:
-	%conv = fptrunc double %val to float
-	store float %conv, float* %dest
-	ret float %conv
-}
-
-;Check stores that might span two 16 byte memory blocks
-define void @store_misaligned( i32 %val, i32* %ptr) {	
-;CHECK: store_misaligned
-;CHECK: lqd
-;CHECK: lqd
-;CHECK: stqd
-;CHECK: stqd
-;CHECK: bi $lr
-	store i32 %val, i32*%ptr, align 2
-	ret void
-}
-
-define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
-{
-;CHECK: stq
-;CHECK: stq
-;CHECK: bi $lr
-	store <8 x float> %val, <8 x float>* %ptr
-	ret void
-}
-
-define void @store_null_vec( <4 x i32> %val ) {
-; FIXME - this is for some reason compiled into a il+stqd, not a sta. 
-;CHECK: stqd
-;CHECK: bi $lr
-	store <4 x i32> %val, <4 x i32>* null
-	ret void
-}
diff --git a/test/CodeGen/CellSPU/storestruct.ll b/test/CodeGen/CellSPU/storestruct.ll
deleted file mode 100644
index 47185e829661..000000000000
--- a/test/CodeGen/CellSPU/storestruct.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-%0 = type {i32, i32} 
-@buffer = global [ 72 x %0 ] zeroinitializer
-
-define void@test( ) {
-; Check that there is no illegal "a rt, ra, imm" instruction 
-; CHECK-NOT:	a	 {{\$., \$., 5..}}
-; CHECK:	a	{{\$., \$., \$.}}
-	store %0 {i32 1, i32 2} , 
-                %0* getelementptr ([72 x %0]* @buffer, i32 0, i32 71)
-	ret void
-}
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
deleted file mode 100644
index 8c3275080c69..000000000000
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
-; RUN: grep lqa     %t1.s | count 5
-; RUN: grep lqd     %t1.s | count 11
-; RUN: grep rotqbyi %t1.s | count 7
-; RUN: grep xshw    %t1.s | count 1
-; RUN: grep andi    %t1.s | count 5
-; RUN: grep cbd     %t1.s | count 3
-; RUN: grep chd     %t1.s | count 1
-; RUN: grep cwd     %t1.s | count 3
-; RUN: grep shufb   %t1.s | count 7
-; RUN: grep stqd    %t1.s | count 7
-; RUN: grep iohl    %t2.s | count 16
-; RUN: grep ilhu    %t2.s | count 16
-; RUN: grep lqd     %t2.s | count 16
-; RUN: grep rotqbyi %t2.s | count 7
-; RUN: grep xshw    %t2.s | count 1
-; RUN: grep andi    %t2.s | count 5
-; RUN: grep cbd     %t2.s | count 3
-; RUN: grep chd     %t2.s | count 1
-; RUN: grep cwd     %t2.s | count 3
-; RUN: grep shufb   %t2.s | count 7
-; RUN: grep stqd    %t2.s | count 7
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-; ModuleID = 'struct_1.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; struct hackstate {
-;   unsigned char c1;   // offset 0 (rotate left by 13 bytes to byte 3)
-;   unsigned char c2;   // offset 1 (rotate left by 14 bytes to byte 3)
-;   unsigned char c3;   // offset 2 (rotate left by 15 bytes to byte 3)
-;   int           i1;   // offset 4 (rotate left by 4 bytes to byte 0)
-;   short         s1;   // offset 8 (rotate left by 6 bytes to byte 2)
-;   int           i2;   // offset 12 [ignored]
-;   unsigned char c4;   // offset 16 [ignored]
-;   unsigned char c5;   // offset 17 [ignored]
-;   unsigned char c6;   // offset 18 (rotate left by 14 bytes to byte 3)
-;   unsigned char c7;   // offset 19 (no rotate, in preferred slot)
-;   int           i3;   // offset 20 [ignored]
-;   int           i4;   // offset 24 [ignored]
-;   int           i5;   // offset 28 [ignored]
-;   int           i6;   // offset 32 (no rotate, in preferred slot)
-; }
-%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
-
-; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-@state = global %struct.hackstate zeroinitializer, align 16
-
-define zeroext i8 @get_hackstate_c1()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
-        ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c2()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
-        ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c3()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
-        ret i8 %tmp2
-}
-
-define i32 @get_hackstate_i1() nounwind  {
-entry:
-        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
-        ret i32 %tmp2
-}
-
-define signext i16 @get_hackstate_s1()  nounwind  {
-entry:
-        %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
-        ret i16 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c6()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
-        ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c7()  nounwind  {
-entry:
-        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
-        ret i8 %tmp2
-}
-
-define i32 @get_hackstate_i3() nounwind  {
-entry:
-        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
-        ret i32 %tmp2
-}
-
-define i32 @get_hackstate_i6() nounwind  {
-entry:
-        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
-        ret i32 %tmp2
-}
-
-define void @set_hackstate_c1(i8 zeroext  %c) nounwind  {
-entry:
-        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
-        ret void
-}
-
-define void @set_hackstate_c2(i8 zeroext  %c) nounwind  {
-entry:
-        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
-        ret void
-}
-
-define void @set_hackstate_c3(i8 zeroext  %c) nounwind  {
-entry:
-        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
-        ret void
-}
-
-define void @set_hackstate_i1(i32 %i) nounwind  {
-entry:
-        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
-        ret void
-}
-
-define void @set_hackstate_s1(i16 signext  %s) nounwind  {
-entry:
-        store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
-        ret void
-}
-
-define void @set_hackstate_i3(i32 %i) nounwind  {
-entry:
-        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
-        ret void
-}
-
-define void @set_hackstate_i6(i32 %i) nounwind  {
-entry:
-        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
-        ret void
-}
diff --git a/test/CodeGen/CellSPU/sub_ops.ll b/test/CodeGen/CellSPU/sub_ops.ll
deleted file mode 100644
index f0c40d37ce9d..000000000000
--- a/test/CodeGen/CellSPU/sub_ops.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-define i32 @subword( i32 %param1, i32 %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=rb-ra
-; CHECK-NOT:	sf	$3, $3, $4
-; CHECK:	sf	$3, $4, $3
-	%1 = sub i32 %param1, %param2
-	ret i32 %1
-}
-
-define i16 @subhword( i16 %param1, i16 %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=rb-ra
-; CHECK-NOT:	sfh	$3, $3, $4
-; CHECK:	sfh	$3, $4, $3
-	%1 = sub i16 %param1, %param2
-	ret i16 %1
-}
-
-define float @subfloat( float %param1, float %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=ra-rb 
-; (yes this is reverse of i32 instruction)
-; CHECK-NOT:	fs	$3, $4, $3 
-; CHECK:	fs	$3, $3, $4
-	%1 = fsub float %param1, %param2
-	ret float %1
-}
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
deleted file mode 100644
index e4c8fb49a32c..000000000000
--- a/test/CodeGen/CellSPU/trunc.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep shufb   %t1.s | count 19
-; RUN: grep "ilhu.*1799"  %t1.s | count 1
-; RUN: grep "ilhu.*771"  %t1.s | count 2
-; RUN: grep "ilhu.*1543"  %t1.s | count 1
-; RUN: grep "ilhu.*1029"  %t1.s | count 1
-; RUN: grep "ilhu.*515"  %t1.s | count 1
-; RUN: grep "ilhu.*3855"  %t1.s | count 1
-; RUN: grep "ilhu.*3599"  %t1.s | count 1
-; RUN: grep "ilhu.*3085"  %t1.s | count 1
-; RUN: grep "iohl.*3855"  %t1.s | count 1
-; RUN: grep "iohl.*3599"  %t1.s | count 2
-; RUN: grep "iohl.*1543"  %t1.s | count 2
-; RUN: grep "iohl.*771"  %t1.s | count 2
-; RUN: grep "iohl.*515"  %t1.s | count 1
-; RUN: grep "iohl.*1799"  %t1.s | count 1
-; RUN: grep lqa  %t1.s | count 1
-; RUN: grep cbd  %t1.s | count 4
-; RUN: grep chd  %t1.s | count 3
-; RUN: grep cwd  %t1.s | count 1
-; RUN: grep cdd  %t1.s | count 1
-
-; ModuleID = 'trunc.bc'
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-
-define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) {
-entry:
-	%0 = trunc i128 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15 
-    ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) {
-entry:
-    %0 = trunc i128 %u to i16
-    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8 
-    ret <8 x i16> %tmp1
-}
-
-define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) {
-entry:
-    %0 = trunc i128 %u to i32
-    %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2
-    ret <4 x i32> %tmp1
-}
-
-define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) {
-entry:
-    %0 = trunc i128 %u to i64
-    %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1
-    ret <2 x i64> %tmp1
-}
-
-define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) {
-entry:
-    %0 = trunc i64 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
-    ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) {
-entry:
-    %0 = trunc i64 %u to i16
-    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
-    ret <8 x i16> %tmp1
-}
-
-define i32 @trunc_i64_i32(i64 %u) {
-entry:
-    %0 = trunc i64 %u to i32
-    ret i32 %0
-}
-
-define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) {
-entry:
-    %0 = trunc i32 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7
-    ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) {
-entry:
-    %0 = trunc i32 %u to i16
-    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
-    ret <8 x i16> %tmp1
-}
-
-define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) {
-entry:
-    %0 = trunc i16 %u to i8
-    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
-    ret <16 x i8> %tmp1
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/README.txt b/test/CodeGen/CellSPU/useful-harnesses/README.txt
deleted file mode 100644
index d87b3989e4f7..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/README.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-This directory contains code that's not part of the DejaGNU test suite,
-but is generally useful as various test harnesses.
-
-vecoperations.c: Various vector operation sanity checks, e.g., shuffles,
-  8-bit vector add and multiply.
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
deleted file mode 100644
index 12fc30bf65d7..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include <stdio.h>
-
-typedef unsigned int  		uint32_t;
-typedef int           		int32_t;
-
-const char *boolstring(int val) {
-  return val ? "true" : "false";
-}
-
-int i32_eq(int32_t a, int32_t b) {
-  return (a == b);
-}
-
-int i32_neq(int32_t a, int32_t b) {
-  return (a != b);
-}
-
-int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
-  return ((a == b) ? c : d);
-}
-
-int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
-  return ((a != b) ? c : d);
-}
-
-struct pred_s {
-  const char *name;
-  int (*predfunc)(int32_t, int32_t);
-  int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
-};
-
-struct pred_s preds[] = {
-  { "eq",  i32_eq,  i32_eq_select },
-  { "neq", i32_neq, i32_neq_select }
-};
-
-int main(void) {
-  int i;
-  int32_t a = 1234567890;
-  int32_t b =  345678901;
-  int32_t c = 1234500000;
-  int32_t d =      10001;
-  int32_t e =      10000;
-
-  printf("a = %12d (0x%08x)\n", a, a);
-  printf("b = %12d (0x%08x)\n", b, b);
-  printf("c = %12d (0x%08x)\n", c, c);
-  printf("d = %12d (0x%08x)\n", d, d);
-  printf("e = %12d (0x%08x)\n", e, e);
-  printf("----------------------------------------\n");
-
-  for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
-    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
-    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
-    printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
-    printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
-    printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
-    printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
-
-    printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
-    printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
-    printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
-    printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
-
-    printf("----------------------------------------\n");
-  }
-
-  return 0;
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
deleted file mode 100644
index b613bd872e28..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
+++ /dev/null
@@ -1,673 +0,0 @@
-#include <stdio.h>
-#include "i64operations.h"
-
-int64_t         tval_a = 1234567890003LL;
-int64_t         tval_b = 2345678901235LL;
-int64_t         tval_c = 1234567890001LL;
-int64_t         tval_d = 10001LL;
-int64_t         tval_e = 10000LL;
-uint64_t        tval_f = 0xffffff0750135eb9;
-int64_t		tval_g = -1;
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-i64_eq(int64_t a, int64_t b)
-{
-  return (a == b);
-}
-
-int
-i64_neq(int64_t a, int64_t b)
-{
-  return (a != b);
-}
-
-int
-i64_gt(int64_t a, int64_t b)
-{
-  return (a > b);
-}
-
-int
-i64_le(int64_t a, int64_t b)
-{
-  return (a <= b);
-}
-
-int
-i64_ge(int64_t a, int64_t b) {
-  return (a >= b);
-}
-
-int
-i64_lt(int64_t a, int64_t b) {
-  return (a < b);
-}
-
-int
-i64_uge(uint64_t a, uint64_t b)
-{
-  return (a >= b);
-}
-
-int
-i64_ult(uint64_t a, uint64_t b)
-{
-  return (a < b);
-}
-
-int
-i64_ugt(uint64_t a, uint64_t b)
-{
-  return (a > b);
-}
-
-int
-i64_ule(uint64_t a, uint64_t b)
-{
-  return (a <= b);
-}
-
-int64_t
-i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d)
-{
-  return ((a == b) ? c : d);
-}
-
-int64_t
-i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d)
-{
-  return ((a != b) ? c : d);
-}
-
-int64_t
-i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a > b) ? c : d);
-}
-
-int64_t
-i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a <= b) ? c : d);
-}
-
-int64_t
-i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a >= b) ? c : d);
-}
-
-int64_t
-i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
-  return ((a < b) ? c : d);
-}
-
-uint64_t
-i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
-{
-  return ((a > b) ? c : d);
-}
-
-uint64_t
-i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
-{
-  return ((a <= b) ? c : d);
-}
-
-uint64_t
-i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
-  return ((a >= b) ? c : d);
-}
-
-uint64_t
-i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
-  return ((a < b) ? c : d);
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-struct harness_int64_pred int64_tests_eq[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_neq[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct harness_int64_pred int64_tests_sgt[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct harness_int64_pred int64_tests_sle[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_sge[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_slt[] = {
-  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
-  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
-  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct int64_pred_s int64_preds[] = {
-  {"eq", i64_eq, i64_eq_select,
-     int64_tests_eq, ARR_SIZE(int64_tests_eq)},
-  {"neq", i64_neq, i64_neq_select,
-     int64_tests_neq, ARR_SIZE(int64_tests_neq)},
-  {"gt", i64_gt, i64_gt_select,
-     int64_tests_sgt, ARR_SIZE(int64_tests_sgt)},
-  {"le", i64_le, i64_le_select,
-     int64_tests_sle, ARR_SIZE(int64_tests_sle)},
-  {"ge", i64_ge, i64_ge_select,
-     int64_tests_sge, ARR_SIZE(int64_tests_sge)},
-  {"lt", i64_lt, i64_lt_select,
-     int64_tests_slt, ARR_SIZE(int64_tests_slt)}
-};
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-struct harness_uint64_pred uint64_tests_ugt[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d },
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }
-};
-
-struct harness_uint64_pred uint64_tests_ule[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
-};
-
-struct harness_uint64_pred uint64_tests_uge[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
-};
-
-struct harness_uint64_pred uint64_tests_ult[] = {
-  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
-  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
-  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
-     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}
-};
-
-struct uint64_pred_s uint64_preds[] = {
-  {"ugt", i64_ugt, i64_ugt_select,
-     uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)},
-  {"ule", i64_ule, i64_ule_select,
-     uint64_tests_ule, ARR_SIZE(uint64_tests_ule)},
-  {"uge", i64_uge, i64_uge_select,
-     uint64_tests_uge, ARR_SIZE(uint64_tests_uge)},
-  {"ult", i64_ult, i64_ult_select,
-     uint64_tests_ult, ARR_SIZE(uint64_tests_ult)}
-};
-
-int
-compare_expect_int64(const struct int64_pred_s * pred)
-{
-  int             j, failed = 0;
-
-  for (j = 0; j < pred->n_tests; ++j) {
-    int             pred_result;
-
-    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
-
-    if (pred_result != pred->tests[j].expected) {
-      char            str[64];
-
-      sprintf(str, pred->tests[j].fmt_string, pred->name);
-      printf("%s: returned value is %d, expecting %d\n", str,
-	     pred_result, pred->tests[j].expected);
-      printf("  lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
-             *pred->tests[j].lhs);
-      printf("  rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
-             *pred->tests[j].rhs);
-      ++failed;
-    } else {
-      int64_t         selresult;
-
-      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
-                                   *pred->tests[j].select_a,
-                                   *pred->tests[j].select_b);
-
-      if (selresult != *pred->tests[j].select_expected) {
-	char            str[64];
-
-	sprintf(str, pred->tests[j].fmt_string, pred->name);
-	printf("%s select: returned value is %d, expecting %d\n", str,
-	       pred_result, pred->tests[j].expected);
-	printf("  lhs   = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
-	       *pred->tests[j].lhs);
-	printf("  rhs   = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
-	       *pred->tests[j].rhs);
-	printf("  true  = %19lld (0x%016llx)\n", *pred->tests[j].select_a,
-	       *pred->tests[j].select_a);
-	printf("  false = %19lld (0x%016llx)\n", *pred->tests[j].select_b,
-	       *pred->tests[j].select_b);
-	++failed;
-      }
-    }
-  }
-
-  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
-
-  return failed;
-}
-
-int
-compare_expect_uint64(const struct uint64_pred_s * pred)
-{
-  int             j, failed = 0;
-
-  for (j = 0; j < pred->n_tests; ++j) {
-    int             pred_result;
-
-    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
-    if (pred_result != pred->tests[j].expected) {
-      char            str[64];
-
-      sprintf(str, pred->tests[j].fmt_string, pred->name);
-      printf("%s: returned value is %d, expecting %d\n", str,
-	     pred_result, pred->tests[j].expected);
-      printf("  lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
-             *pred->tests[j].lhs);
-      printf("  rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
-             *pred->tests[j].rhs);
-      ++failed;
-    } else {
-      uint64_t        selresult;
-
-      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
-                                   *pred->tests[j].select_a,
-                                   *pred->tests[j].select_b);
-      if (selresult != *pred->tests[j].select_expected) {
-	char            str[64];
-
-	sprintf(str, pred->tests[j].fmt_string, pred->name);
-	printf("%s select: returned value is %d, expecting %d\n", str,
-	       pred_result, pred->tests[j].expected);
-	printf("  lhs   = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
-	       *pred->tests[j].lhs);
-	printf("  rhs   = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
-	       *pred->tests[j].rhs);
-	printf("  true  = %19llu (0x%016llx)\n", *pred->tests[j].select_a,
-	       *pred->tests[j].select_a);
-	printf("  false = %19llu (0x%016llx)\n", *pred->tests[j].select_b,
-	       *pred->tests[j].select_b);
-	++failed;
-      }
-    }
-  }
-
-  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
-
-  return failed;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-test_i64_sext_i32(int in, int64_t expected) {
-  int64_t result = (int64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_sext_i16(short in, int64_t expected) {
-  int64_t result = (int64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_sext_i8(signed char in, int64_t expected) {
-  int64_t result = (int64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_zext_i32(unsigned int in, uint64_t expected) {
-  uint64_t result = (uint64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_zext_i16(unsigned short in, uint64_t expected) {
-  uint64_t result = (uint64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_zext_i8(unsigned char in, uint64_t expected) {
-  uint64_t result = (uint64_t) in;
-
-  if (result != expected) {
-    char str[64];
-    sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result);
-    return 1;
-  }
-
-  return 0;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int64_t
-i64_shl_const(int64_t a) {
-  return a << 10;
-}
-
-int64_t
-i64_shl(int64_t a, int amt) {
-  return a << amt;
-}
-
-uint64_t
-u64_shl_const(uint64_t a) {
-  return a << 10;
-}
-
-uint64_t
-u64_shl(uint64_t a, int amt) {
-  return a << amt;
-}
-
-int64_t
-i64_srl_const(int64_t a) {
-  return a >> 10;
-}
-
-int64_t
-i64_srl(int64_t a, int amt) {
-  return a >> amt;
-}
-
-uint64_t
-u64_srl_const(uint64_t a) {
-  return a >> 10;
-}
-
-uint64_t
-u64_srl(uint64_t a, int amt) {
-  return a >> amt;
-}
-
-int64_t
-i64_sra_const(int64_t a) {
-  return a >> 10;
-}
-
-int64_t
-i64_sra(int64_t a, int amt) {
-  return a >> amt;
-}
-
-uint64_t
-u64_sra_const(uint64_t a) {
-  return a >> 10;
-}
-
-uint64_t
-u64_sra(uint64_t a, int amt) {
-  return a >> amt;
-}
-
-int
-test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) {
-  uint64_t result = (*func)(a);
-
-  if (result != expected) {
-    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) {
-  int64_t result = (*func)(a);
-
-  if (result != expected) {
-    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) {
-  uint64_t result = (*func)(a, b);
-
-  if (result != expected) {
-    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-int
-test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) {
-  int64_t result = (*func)(a, b);
-
-  if (result != expected) {
-    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
-    return 1;
-  }
-
-  return 0;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int64_t i64_mul(int64_t a, int64_t b) {
-  return a * b;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-main(void)
-{
-  int             i, j, failed = 0;
-  const char     *something_failed = "  %d tests failed.\n";
-  const char     *all_tests_passed = "  All tests passed.\n";
-
-  printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
-  printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
-  printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
-  printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
-  printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
-  printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
-  printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
-  printf("----------------------------------------\n");
-
-  for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
-    printf("%s series:\n", int64_preds[i].name);
-    if ((failed = compare_expect_int64(int64_preds + i)) > 0) {
-      printf(something_failed, failed);
-    } else {
-      printf(all_tests_passed);
-    }
-
-    printf("----------------------------------------\n");
-  }
-
-  for (i = 0; i < ARR_SIZE(uint64_preds); ++i) {
-    printf("%s series:\n", uint64_preds[i].name);
-    if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) {
-      printf(something_failed, failed);
-    } else {
-      printf(all_tests_passed);
-    }
-
-    printf("----------------------------------------\n");
-  }
-
-  /*----------------------------------------------------------------------*/
-
-  puts("signed/zero-extend tests:");
-
-  failed = 0;
-  failed += test_i64_sext_i32(-1, -1LL);
-  failed += test_i64_sext_i32(10, 10LL);
-  failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL);
-  failed += test_i64_sext_i16(-1, -1LL);
-  failed += test_i64_sext_i16(10, 10LL);
-  failed += test_i64_sext_i16(0x7fff, 0x7fffLL);
-  failed += test_i64_sext_i8(-1, -1LL);
-  failed += test_i64_sext_i8(10, 10LL);
-  failed += test_i64_sext_i8(0x7f, 0x7fLL);
-
-  failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU);
-  failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU);
-  failed += test_i64_zext_i16(0xffff,     0x000000000000ffffLLU);
-  failed += test_i64_zext_i16(0x569a,     0x000000000000569aLLU);
-  failed += test_i64_zext_i8(0xff,        0x00000000000000ffLLU);
-  failed += test_i64_zext_i8(0xa0,        0x00000000000000a0LLU);
-
-  if (failed > 0) {
-    printf("  %d tests failed.\n", failed);
-  } else {
-    printf("  All tests passed.\n");
-  }
-
-  printf("----------------------------------------\n");
-
-  failed = 0;
-  puts("signed left/right shift tests:");
-  failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a,     0x00047dc7ec114c00LL);
-  failed += test_i64_variable_shift("i64_shl",       i64_shl,       tval_a, 10, 0x00047dc7ec114c00LL);
-  failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a,     0x0000000047dc7ec1LL);
-  failed += test_i64_variable_shift("i64_srl",       i64_srl,       tval_a, 10, 0x0000000047dc7ec1LL);
-  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a,     0x0000000047dc7ec1LL);
-  failed += test_i64_variable_shift("i64_sra",       i64_sra,       tval_a, 10, 0x0000000047dc7ec1LL);
-
-  if (failed > 0) {
-    printf("  %d tests ailed.\n", failed);
-  } else {
-    printf("  All tests passed.\n");
-  }
-
-  printf("----------------------------------------\n");
-
-  failed = 0;
-  puts("unsigned left/right shift tests:");
-  failed += test_u64_constant_shift("u64_shl_const", u64_shl_const,  tval_f,     0xfffc1d404d7ae400LL);
-  failed += test_u64_variable_shift("u64_shl",       u64_shl,        tval_f, 10, 0xfffc1d404d7ae400LL);
-  failed += test_u64_constant_shift("u64_srl_const", u64_srl_const,  tval_f,     0x003fffffc1d404d7LL);
-  failed += test_u64_variable_shift("u64_srl",       u64_srl,        tval_f, 10, 0x003fffffc1d404d7LL);
-  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const,  tval_f,     0xffffffffc1d404d7LL);
-  failed += test_i64_variable_shift("i64_sra",       i64_sra,        tval_f, 10, 0xffffffffc1d404d7LL);
-  failed += test_u64_constant_shift("u64_sra_const", u64_sra_const,  tval_f,     0x003fffffc1d404d7LL);
-  failed += test_u64_variable_shift("u64_sra",       u64_sra,        tval_f, 10, 0x003fffffc1d404d7LL);
-
-  if (failed > 0) {
-    printf("  %d tests ailed.\n", failed);
-  } else {
-    printf("  All tests passed.\n");
-  }
-
-  printf("----------------------------------------\n");
-
-  int64_t result;
-  
-  result = i64_mul(tval_g, tval_g);
-  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
-  result = i64_mul(tval_d, tval_e);
-  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
-  /* 0xba7a664f13077c9 */
-  result = i64_mul(tval_a, tval_b);
-  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
-
-  printf("----------------------------------------\n");
-
-  return 0;
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
deleted file mode 100644
index 7a02794cd7e0..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#define TRUE_VAL (!0)
-#define FALSE_VAL 0
-#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
-
-typedef unsigned long long int uint64_t;
-typedef long long int int64_t;
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-struct harness_int64_pred {
-  const char     *fmt_string;
-  int64_t        *lhs;
-  int64_t        *rhs;
-  int64_t        *select_a;
-  int64_t        *select_b;
-  int             expected;
-  int64_t        *select_expected;
-};
-
-struct harness_uint64_pred {
-  const char     *fmt_string;
-  uint64_t       *lhs;
-  uint64_t       *rhs;
-  uint64_t       *select_a;
-  uint64_t       *select_b;
-  int             expected;
-  uint64_t       *select_expected;
-};
-
-struct int64_pred_s {
-  const char     *name;
-  int             (*predfunc) (int64_t, int64_t);
-  int64_t         (*selfunc) (int64_t, int64_t, int64_t, int64_t);
-  struct harness_int64_pred *tests;
-  int             n_tests;
-};
-
-struct uint64_pred_s {
-  const char     *name;
-  int             (*predfunc) (uint64_t, uint64_t);
-  uint64_t        (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t);
-  struct harness_uint64_pred *tests;
-  int             n_tests;
-};
diff --git a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
deleted file mode 100644
index c4c86e37635d..000000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
+++ /dev/null
@@ -1,179 +0,0 @@
-#include <stdio.h>
-
-typedef unsigned char v16i8 __attribute__((ext_vector_type(16))); 
-typedef short         v8i16 __attribute__((ext_vector_type(16))); 
-typedef int           v4i32 __attribute__((ext_vector_type(4))); 
-typedef float         v4f32 __attribute__((ext_vector_type(4))); 
-typedef long long     v2i64 __attribute__((ext_vector_type(2))); 
-typedef double        v2f64 __attribute__((ext_vector_type(2))); 
-
-void print_v16i8(const char *str, const v16i8 v) {
-  union {
-    unsigned char elts[16];
-    v16i8 vec;
-  } tv;
-  tv.vec = v;
-  printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
-                "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
-		"%hhu, %hhu }\n",
-	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
-	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
-	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
-}
-
-void print_v16i8_hex(const char *str, const v16i8 v) {
-  union {
-    unsigned char elts[16];
-    v16i8 vec;
-  } tv;
-  tv.vec = v;
-  printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
-                "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
-		"0x%02hhx, 0x%02hhx }\n",
-	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
-	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
-	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
-}
-
-void print_v8i16_hex(const char *str, v8i16 v) {
-  union {
-    short elts[8];
-    v8i16 vec;
-  } tv;
-  tv.vec = v;
-  printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, "
-                "0x%04hx, 0x%04hx, 0x%04hx }\n",
-	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4],
-	tv.elts[5], tv.elts[6], tv.elts[7]);
-}
-
-void print_v4i32(const char *str, v4i32 v) {
-  printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w);
-}
-
-void print_v4f32(const char *str, v4f32 v) {
-  printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w);
-}
-
-void print_v2i64(const char *str, v2i64 v) {
-  printf("%s = { %lld, %lld }\n", str, v.x, v.y);
-}
-
-void print_v2f64(const char *str, v2f64 v) {
-  printf("%s = { %g, %g }\n", str, v.x, v.y);
-}
-
-/*----------------------------------------------------------------------*/
-
-v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) {
-  return v1 * v2;
-}
-
-v16i8 v16i8_add(v16i8 v1, v16i8 v2) {
-  return v1 + v2;
-}
-
-v4i32 v4i32_shuffle_1(v4i32 a) {
-  v4i32 c2 = a.yzwx;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_2(v4i32 a) {
-  v4i32 c2 = a.zwxy;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_3(v4i32 a) {
-  v4i32 c2 = a.wxyz;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_4(v4i32 a) {
-  v4i32 c2 = a.xyzw;
-  return c2;
-}
-
-v4i32 v4i32_shuffle_5(v4i32 a) {
-  v4i32 c2 = a.xwzy;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_1(v4f32 a) {
-  v4f32 c2 = a.yzwx;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_2(v4f32 a) {
-  v4f32 c2 = a.zwxy;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_3(v4f32 a) {
-  v4f32 c2 = a.wxyz;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_4(v4f32 a) {
-  v4f32 c2 = a.xyzw;
-  return c2;
-}
-
-v4f32 v4f32_shuffle_5(v4f32 a) {
-  v4f32 c2 = a.xwzy;
-  return c2;
-}
-
-v2i64 v2i64_shuffle(v2i64 a) {
-  v2i64 c2 = a.yx;
-  return c2;
-}
-
-v2f64 v2f64_shuffle(v2f64 a) {
-  v2f64 c2 = a.yx;
-  return c2;
-}
-
-int main(void) {
-  v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a,
-                0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 };
-  v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-                0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 };
-  v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5,
-                0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 };
-  v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0,
-                0xe194, 0x0184, 0x801e, 0x5940 };
-  v4i32 v1 = { 1, 2, 3, 4 };
-  v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 };
-  v2i64 v3 = { 691043ll, 910301513ll };
-  v2f64 v4 = { 5.8e56, 9.103e-62 };
-
-  puts("---- vector tests start ----");
-
-  print_v16i8_hex("v00                        ", v00);
-  print_v16i8_hex("va0                        ", va0);
-  print_v16i8_hex("va1                        ", va1);
-  print_v16i8_hex("va0 x va1                  ", v16i8_mpy(va0, va1));
-  print_v16i8_hex("va0 + va1                  ", v16i8_add(va0, va1));
-  print_v8i16_hex("v01                        ", v01);
-
-  print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1));
-  print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1));
-  print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1));
-  print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1));
-  print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1));
-
-  print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2));
-  print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2));
-  print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2));
-  print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2));
-  print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2));
-
-  print_v2i64("v3                         ", v3);
-  print_v2i64("v2i64_shuffle              ", v2i64_shuffle(v3));
-  print_v2f64("v4                         ", v4);
-  print_v2f64("v2f64_shuffle              ", v2f64_shuffle(v4));
-
-  puts("---- vector tests end ----");
-
-  return 0;
-}
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
deleted file mode 100644
index 09e15ffbc75d..000000000000
--- a/test/CodeGen/CellSPU/v2f32.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-%vec = type <2 x float>
-
-define %vec @test_ret(%vec %param)
-{
-;CHECK: bi $lr
- ret %vec %param
-}
-
-define %vec @test_add(%vec %param)
-{
-;CHECK: fa {{\$.}}, $3, $3
- %1 = fadd %vec %param, %param
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_sub(%vec %param)
-{
-;CHECK: fs {{\$.}}, $3, $3
- %1 = fsub %vec %param, %param
-
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_mul(%vec %param)
-{
-;CHECK: fm {{\$.}}, $3, $3
- %1 = fmul %vec %param, %param
-
-;CHECK: bi $lr
- ret %vec %1
-}
-
-; CHECK: test_splat:
-define %vec @test_splat(float %param ) {
-;CHECK: lqa
-;CHECK: shufb
-  %sv = insertelement <1 x float> undef, float %param, i32 0 
-  %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer 
-;CHECK: bi $lr
-  ret %vec %rv
-}
-
-define void @test_store(%vec %val, %vec* %ptr){
-; CHECK: test_store:
-;CHECK: stqd 
-  store %vec zeroinitializer, %vec* null
-
-;CHECK: stqd $3, 0(${{.*}})
-;CHECK: bi $lr
-  store %vec %val, %vec* %ptr
-  ret void
-}
-
-; CHECK: test_insert:
-define %vec @test_insert(){
-;CHECK: cwd
-;CHECK: shufb $3
-  %rv = insertelement %vec undef, float 0.0e+00, i32 undef
-;CHECK: bi $lr
-  ret %vec %rv
-}
-
-; CHECK: test_unaligned_store:
-
-define void @test_unaligned_store()  {
-;CHECK:	cdd
-;CHECK:	shufb
-;CHECK:	stqd
-  %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
-  %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
-  %vptr = bitcast float* %ptr to  <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
-  store <2 x float> zeroinitializer, <2 x float>* %vptr
-  ret void
-}
-
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
deleted file mode 100644
index 9c5b89613df9..000000000000
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-%vec = type <2 x i32>
-
-define %vec @test_ret(%vec %param)
-{
-;CHECK:	bi	$lr
-  ret %vec %param
-}
-
-define %vec @test_add(%vec %param)
-{
-;CHECK: shufb
-;CHECK: addx
-  %1 = add %vec %param, %param
-;CHECK: bi $lr
-  ret %vec %1
-}
-
-define %vec @test_sub(%vec %param)
-{
-  %1 = sub %vec %param, <i32 1, i32 1>
-;CHECK: bi $lr
-  ret %vec %1
-}
-
-define %vec @test_mul(%vec %param)
-{
-  %1 = mul %vec %param, %param
-;CHECK: bi $lr
-  ret %vec %1
-}
-
-define <2 x i32> @test_splat(i32 %param ) {
-;see svn log for why this is here...
-;CHECK-NOT: or $3, $3, $3
-;CHECK: lqa
-;CHECK: shufb
-  %sv = insertelement <1 x i32> undef, i32 %param, i32 0 
-  %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer 
-;CHECK: bi $lr
-  ret <2 x i32> %rv
-}
-
-define i32 @test_extract() {
-;CHECK: shufb $3
-  %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
-;CHECK: bi $lr
-  ret i32 %rv
-}
-
-define void @test_store( %vec %val, %vec* %ptr)
-{
-  store %vec %val, %vec* %ptr
-  ret void
-}
-
-define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
-{
-   %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
-   ret <2 x i32>* %rv
-}
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
deleted file mode 100644
index 24c05c684084..000000000000
--- a/test/CodeGen/CellSPU/vec_const.ll
+++ /dev/null
@@ -1,154 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
-; RUN: grep -w il  %t1.s | count 3
-; RUN: grep ilhu   %t1.s | count 8
-; RUN: grep -w ilh %t1.s | count 5
-; RUN: grep iohl   %t1.s | count 7
-; RUN: grep lqa    %t1.s | count 6
-; RUN: grep 24672  %t1.s | count 2
-; RUN: grep 16429  %t1.s | count 1
-; RUN: grep 63572  %t1.s | count 1
-; RUN: grep  4660  %t1.s | count 1
-; RUN: grep 22136  %t1.s | count 1
-; RUN: grep 43981  %t1.s | count 1
-; RUN: grep 61202  %t1.s | count 1
-; RUN: grep 16393  %t1.s | count 1
-; RUN: grep  8699  %t1.s | count 1
-; RUN: grep 21572  %t1.s | count 1
-; RUN: grep 11544  %t1.s | count 1
-; RUN: grep 1311768467750121234 %t1.s | count 1
-; RUN: grep lqd    %t2.s | count 6
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-; Vector constant load tests:
-
-; IL <reg>, 2
-define <4 x i32> @v4i32_constvec() {
-        ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 >
-}
-
-; Spill to constant pool
-define <4 x i32> @v4i32_constpool() {
-        ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 >
-}
-
-; Max negative range for IL
-define <4 x i32> @v4i32_constvec_2() {
-        ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 >
-}
-
-; ILHU <reg>, 73 (0x49)
-; 4784128 = 0x490000
-define <4 x i32> @v4i32_constvec_3() {
-        ret <4 x i32> < i32 4784128, i32 4784128,
-                        i32 4784128, i32 4784128 >
-}
-
-; ILHU <reg>, 61 (0x3d)
-; IOHL <reg>, 15395 (0x3c23)
-define <4 x i32> @v4i32_constvec_4() {
-        ret <4 x i32> < i32 4013091, i32 4013091,
-                        i32 4013091, i32 4013091 >
-}
-
-; ILHU <reg>, 0x5050 (20560)
-; IOHL <reg>, 0x5050 (20560)
-; Tests for whether we expand the size of the bit pattern properly, because
-; this could be interpreted as an i8 pattern (0x50)
-define <4 x i32> @v4i32_constvec_5() {
-        ret <4 x i32> < i32 1347440720, i32 1347440720,
-                        i32 1347440720, i32 1347440720 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_1() {
-        ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767,
-                        i16 32767, i16 32767, i16 32767, i16 32767 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_2() {
-        ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511,
-                        i16 511, i16 511, i16 511 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_3() {
-        ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512,
-                        i16 -512, i16 -512, i16 -512 >
-}
-
-; ILH <reg>, 24672 (0x6060)
-; Tests whether we expand the size of the bit pattern properly, because
-; this could be interpreted as an i8 pattern (0x60)
-define <8 x i16> @v8i16_constvec_4() {
-        ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672,
-                        i16 24672, i16 24672, i16 24672 >
-}
-
-; ILH <reg>, 24672 (0x6060)
-; Tests whether we expand the size of the bit pattern properly, because
-; this is an i8 pattern but has to be expanded out to i16 to load it
-; properly into the vector register.
-define <16 x i8> @v16i8_constvec_1() {
-        ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96,
-                        i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 >
-}
-
-define <4 x float> @v4f32_constvec_1() {
-entry:
-        ret <4 x float> < float 0x4005BF0A80000000,
-                          float 0x4005BF0A80000000,
-                          float 0x4005BF0A80000000,
-                          float 0x4005BF0A80000000 >
-}
-
-define <4 x float> @v4f32_constvec_2() {
-entry:
-        ret <4 x float> < float 0.000000e+00,
-                          float 0.000000e+00,
-                          float 0.000000e+00,
-                          float 0.000000e+00 >
-}
-
-
-define <4 x float> @v4f32_constvec_3() {
-entry:
-        ret <4 x float> < float 0x4005BF0A80000000,
-                          float 0x3810000000000000,
-                          float 0x47EFFFFFE0000000,
-                          float 0x400921FB60000000 >
-}
-
-;  1311768467750121234 => 0x 12345678 abcdef12
-;  HI32_hi:  4660
-;  HI32_lo: 22136
-;  LO32_hi: 43981
-;  LO32_lo: 61202
-define <2 x i64> @i64_constvec_1() {
-entry:
-        ret <2 x i64> < i64 1311768467750121234,
-                        i64 1311768467750121234 >
-}
-
-define <2 x i64> @i64_constvec_2() {
-entry:
-        ret <2 x i64> < i64 1, i64 1311768467750121234 >
-}
-
-define <2 x double> @f64_constvec_1() {
-entry:
- ret <2 x double> < double 0x400921fb54442d18,
-                    double 0xbff6a09e667f3bcd >
-}
-
-; 0x400921fb 54442d18 ->
-;   (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699])
-;   (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544])
-define <2 x double> @f64_constvec_2() {
-entry:
- ret <2 x double> < double 0x400921fb54442d18,
-                    double 0x400921fb54442d18 >
-}
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
deleted file mode 100644
index 8dcab1d84c9c..000000000000
--- a/test/CodeGen/CellSPU/vecinsert.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep cbd     %t1.s | count 5
-; RUN: grep chd     %t1.s | count 5
-; RUN: grep cwd     %t1.s | count 11
-; RUN: grep -w il   %t1.s | count 5
-; RUN: grep -w ilh  %t1.s | count 6
-; RUN: grep iohl    %t1.s | count 1
-; RUN: grep ilhu    %t1.s | count 4
-; RUN: grep shufb   %t1.s | count 27
-; RUN: grep 17219   %t1.s | count 1 
-; RUN: grep 22598   %t1.s | count 1
-; RUN: grep -- -39  %t1.s | count 1
-; RUN: grep    24   %t1.s | count 1
-; RUN: grep  1159   %t1.s | count 1
-; RUN: FileCheck %s < %t1.s
-
-; ModuleID = 'vecinsert.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343
-define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) {
-entry:
-        %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10
-        %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7
-        %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15
-        ret <16 x i8> %tmp1.2
-}
-
-; 22598 -> 0x5846
-define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) {
-entry:
-        %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5
-        %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7
-        %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2
-        ret <8 x i16> %tmp1.2
-}
-
-; 1574023 -> 0x180487 (ILHU 24/IOHL 1159)
-define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) {
-entry:
-        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
-        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1
-        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
-        ret <4 x i32> %tmp1.2
-}
-
-; Should generate IL for the load
-define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) {
-entry:
-        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
-        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1
-        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
-        ret <4 x i32> %tmp1.2
-}
-
-define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <16 x i8>* %a, i32 %i
-	%tmp2 = load <16 x i8>* %arrayidx
-	%tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
-	%tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
-	store <16 x i8> %tmp8, <16 x i8>* %arrayidx
-	ret void
-}
-
-define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <8 x i16>* %a, i32 %i
-	%tmp2 = load <8 x i16>* %arrayidx
-	%tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
-	%tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
-	store <8 x i16> %tmp8, <8 x i16>* %arrayidx
-	ret void
-}
-
-define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x i32>* %a, i32 %i
-	%tmp2 = load <4 x i32>* %arrayidx
-	%tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
-	%tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
-	store <4 x i32> %tmp8, <4 x i32>* %arrayidx
-	ret void
-}
-
-define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <4 x float>* %a, i32 %i
-	%tmp2 = load <4 x float>* %arrayidx
-	%tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
-	%tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
-	store <4 x float> %tmp8, <4 x float>* %arrayidx
-	ret void
-}
-
-define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
-	%tmp2 = load <2 x i64>* %arrayidx
-	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
-	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
-	ret void
-}
-
-define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
-	%tmp2 = load <2 x i64>* %arrayidx
-	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
-	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
-	ret void
-}
-
-define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
-entry:
-	%arrayidx = getelementptr <2 x double>* %a, i32 %i
-	%tmp2 = load <2 x double>* %arrayidx
-	%tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
-	store <2 x double> %tmp3, <2 x double>* %arrayidx
-	ret void
-}
-
-define <4 x i32> @undef_v4i32( i32 %param ) {
-	;CHECK: cwd
-	;CHECK: lqa
-	;CHECK: shufb
-	%val = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 %param, i32 undef 
-	ret <4 x i32> %val
-}
-
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index da1aeb556a39..7ffb734c713a 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s
 ; PR1133
+; XFAIL: hexagon
 define void @test(i32* %X) nounwind  {
 entry:
 	%tmp1 = getelementptr i32* %X, i32 10		; <i32*> [#uses=2]
diff --git a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll b/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
deleted file mode 100644
index 6591c64d871e..000000000000
--- a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc -mcpu=corei7 < %s
-; We don't care about the output, just that it doesn't crash
-
-define <1 x i1> @buildvec_promote() {
-  %cmp = icmp ule <1 x i32> undef, undef
-  %sel = select i1 undef, <1 x i1> undef, <1 x i1> %cmp
-  ret <1 x i1> %sel
-}
diff --git a/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll
new file mode 100644
index 000000000000..a1aed0e3a4b6
--- /dev/null
+++ b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+
+define internal i1 @f(float %s) {
+entry:
+  %c = fcmp ogt float %s, 0x41EFFFFFE0000000
+  ret i1 %c
+}
diff --git a/test/CodeGen/Generic/dag-combine-crash.ll b/test/CodeGen/Generic/dag-combine-crash.ll
new file mode 100644
index 000000000000..a7810b5c05e2
--- /dev/null
+++ b/test/CodeGen/Generic/dag-combine-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+define void @main()  {
+if.end:
+  br label %block.i.i
+
+block.i.i:
+  %tmpbb = load i8* undef
+  %tmp54 = zext i8 %tmpbb to i64
+  %tmp59 = and i64 %tmp54, 8
+  %tmp60 = add i64 %tmp59, 3691045929300498764
+  %tmp62 = sub i64 %tmp60, 3456506383779105993
+  %tmp63 = xor i64 1050774804270620004, %tmp62
+  %tmp65 = xor i64 %tmp62, 234539545521392771
+  %tmp67 = or i64 %tmp65, %tmp63
+  %tmp71 = xor i64 %tmp67, 6781485823212740913
+  %tmp72 = trunc i64 %tmp71 to i32
+  %tmp74 = lshr i32 2, %tmp72
+  store i32 %tmp74, i32* undef
+  br label %block.i.i
+}
diff --git a/test/CodeGen/Generic/inline-asm-mem-clobber.ll b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
new file mode 100644
index 000000000000..e523d031dc65
--- /dev/null
+++ b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O2 < %s | FileCheck %s
+
+@G = common global i32 0, align 4
+
+define i32 @foo(i8* %p) nounwind uwtable {
+entry:
+  %p.addr = alloca i8*, align 8
+  %rv = alloca i32, align 4
+  store i8* %p, i8** %p.addr, align 8
+  store i32 0, i32* @G, align 4
+  %0 = load i8** %p.addr, align 8
+; CHECK: blah
+  %1 = call i32 asm "blah", "=r,r,~{memory}"(i8* %0) nounwind
+; CHECK: @G
+  store i32 %1, i32* %rv, align 4
+  %2 = load i32* %rv, align 4
+  %3 = load i32* @G, align 4
+  %add = add nsw i32 %2, %3
+  ret i32 %add
+}
+
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index e709080bfc5a..a135c625fccc 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s
-
+; XFAIL: hexagon
 declare { i64, double } @wild()
 
 define void @foo(i64* %p, double* %q) nounwind {
diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll
index b653e2a46dcf..7510f701b147 100644
--- a/test/CodeGen/Generic/select-cc.ll
+++ b/test/CodeGen/Generic/select-cc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s
 ; PR2504
-
+; XFAIL: hexagon
 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
 	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
 	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index a0f9a02d4cbb..bc7c7d00a11c 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
 ; RUN: llc < %s
-
+; XFAIL: hexagon
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
@@ -152,3 +152,8 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
 	store %i4 %R, %i4* %P
 	ret void
 }
+
+define <2 x i32*> @vector_gep(<2 x [3 x {i32, i32}]*> %a) {
+    %w = getelementptr <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
+      ret <2 x i32*> %w
+}
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
new file mode 100644
index 000000000000..5c2554df8aeb
--- /dev/null
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with absolute addressing mode.
+
+@a = external global i32
+@b = external global i8
+@c = external global i16
+@d = external global i64
+
+define zeroext i8 @absStoreByte() nounwind {
+; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i8* @b, align 1
+  %conv = zext i8 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i8
+  store i8 %conv1, i8* @b, align 1
+  ret i8 %conv1
+}
+
+define signext i16 @absStoreHalf() nounwind {
+; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i16* @c, align 2
+  %conv = sext i16 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i16
+  store i16 %conv1, i16* @c, align 2
+  ret i16 %conv1
+}
+
+define i32 @absStoreWord() nounwind {
+; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i32* @a, align 4
+  %mul = mul nsw i32 100, %0
+  store i32 %mul, i32* @a, align 4
+  ret i32 %mul
+}
+
+define void @absStoreDouble() nounwind {
+; CHECK: memd(##d){{ *}}={{ *}}r{{[0-9]+}}:{{[0-9]+}}
+entry:
+  store i64 100, i64* @d, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
new file mode 100644
index 000000000000..9cee3e215d62
--- /dev/null
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+
+
+define void @check_adde_addc (i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+        %tmp1 = zext i64 %AL to i128
+        %tmp23 = zext i64 %AH to i128
+        %tmp4 = shl i128 %tmp23, 64
+        %tmp5 = or i128 %tmp4, %tmp1
+        %tmp67 = zext i64 %BL to i128
+        %tmp89 = zext i64 %BH to i128
+        %tmp11 = shl i128 %tmp89, 64
+        %tmp12 = or i128 %tmp11, %tmp67
+        %tmp15 = add i128 %tmp12, %tmp5
+        %tmp1617 = trunc i128 %tmp15 to i64
+        store i64 %tmp1617, i64* %RL
+        %tmp21 = lshr i128 %tmp15, 64
+        %tmp2122 = trunc i128 %tmp21 to i64
+        store i64 %tmp2122, i64* %RH
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index 8a6efb620ec0..f8c9e44c831d 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,12 +1,11 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
-; CHECK: r[[T0:[0-9]+]] = #7
-; CHECK: memw(r29 + #0) = r[[T0]]
-; CHECK: r5 = #6
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
+; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
 ; CHECK: r0 = #1
 ; CHECK: r1 = #2
 ; CHECK: r2 = #3
 ; CHECK: r3 = #4
 ; CHECK: r4 = #5
+; CHECK: r5 = #6
 
 
 define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/ashift-left-right.ll b/test/CodeGen/Hexagon/ashift-left-right.ll
new file mode 100644
index 000000000000..7c41bc7bbf3b
--- /dev/null
+++ b/test/CodeGen/Hexagon/ashift-left-right.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) nounwind readnone {
+; CHECK: lsl
+; CHECK: aslh
+entry:
+  %shl1 = shl i32 16, %a
+  %shl2 = shl i32 %b, 16
+  %ret = mul i32 %shl1, %shl2
+  ret i32 %ret
+}
+
+define i32 @bar(i32 %a, i32 %b) nounwind readnone {
+; CHECK: asrh
+; CHECK: lsr
+entry:
+  %shl1 = ashr i32 16, %a
+  %shl2 = ashr i32 %b, 16
+  %ret = mul i32 %shl1, %shl2
+  ret i32 %ret
+}
diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll
new file mode 100644
index 000000000000..54a12bf48448
--- /dev/null
+++ b/test/CodeGen/Hexagon/block-addr.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = CONST32(#.LJTI{{[0-9]+_[0-9]+}})
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+r{{[0-9]+<<#[0-9]+}})
+; CHECK: jumpr r{{[0-9]+}}
+
+define void @main() #0 {
+entry:
+  %ret = alloca i32, align 4
+  br label %while.body
+
+while.body:
+  %ret.0.load17 = load volatile i32* %ret, align 4
+  switch i32 %ret.0.load17, label %label6 [
+    i32 0, label %label0
+    i32 1, label %label1
+    i32 2, label %label2
+    i32 3, label %label3
+    i32 4, label %label4
+    i32 5, label %label5
+  ]
+
+label0:
+  %ret.0.load18 = load volatile i32* %ret, align 4
+  %inc = add nsw i32 %ret.0.load18, 1
+  store volatile i32 %inc, i32* %ret, align 4
+  br label %while.body
+
+label1:
+  %ret.0.load19 = load volatile i32* %ret, align 4
+  %inc2 = add nsw i32 %ret.0.load19, 1
+  store volatile i32 %inc2, i32* %ret, align 4
+  br label %while.body
+
+label2:
+  %ret.0.load20 = load volatile i32* %ret, align 4
+  %inc4 = add nsw i32 %ret.0.load20, 1
+  store volatile i32 %inc4, i32* %ret, align 4
+  br label %while.body
+
+label3:
+  %ret.0.load21 = load volatile i32* %ret, align 4
+  %inc6 = add nsw i32 %ret.0.load21, 1
+  store volatile i32 %inc6, i32* %ret, align 4
+  br label %while.body
+
+label4:
+  %ret.0.load22 = load volatile i32* %ret, align 4
+  %inc8 = add nsw i32 %ret.0.load22, 1
+  store volatile i32 %inc8, i32* %ret, align 4
+  br label %while.body
+
+label5:
+  %ret.0.load23 = load volatile i32* %ret, align 4
+  %inc10 = add nsw i32 %ret.0.load23, 1
+  store volatile i32 %inc10, i32* %ret, align 4
+  br label %while.body
+
+label6:
+  store volatile i32 0, i32* %ret, align 4
+  br label %while.body
+}
+
+attributes #0 = { noreturn nounwind "target-cpu"="hexagonv4" }
diff --git a/test/CodeGen/Hexagon/cext-check.ll b/test/CodeGen/Hexagon/cext-check.ll
new file mode 100644
index 000000000000..7c4b19e5a402
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-check.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we constant extended instructions only when necessary.
+
+define i32 @cext_test1(i32* %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##8000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##4092)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 2000
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, 300000
+  br label %return
+
+if.end:
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1023
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %2, 300
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
+
+define i32 @cext_test2(i8* %a) nounwind {
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1023)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1024)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##6000)
+entry:
+  %tobool = icmp ne i8* %a, null
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx = getelementptr inbounds i8* %a, i32 1023
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 300000
+  br label %return
+
+if.end:
+  %arrayidx1 = getelementptr inbounds i8* %a, i32 1024
+  %1 = load i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %add3 = add nsw i32 %conv2, 6000
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/Hexagon/cext-valid-packet1.ll b/test/CodeGen/Hexagon/cext-valid-packet1.ll
new file mode 100644
index 000000000000..a479d37e4ae5
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-valid-packet1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that the packetizer generates valid packets with constant
+; extended instructions.
+; CHECK: {
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: }
+
+define i32 @check-packet1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %add = add nsw i32 %a, 200000
+  %add1 = add nsw i32 %b, 200001
+  %add2 = add nsw i32 %c, 200002
+  %cmp = icmp sgt i32 %add, %add1
+  %b.addr.0 = select i1 %cmp, i32 %add1, i32 %add2
+  ret i32 %b.addr.0
+}
diff --git a/test/CodeGen/Hexagon/cext-valid-packet2.ll b/test/CodeGen/Hexagon/cext-valid-packet2.ll
new file mode 100644
index 000000000000..2788a6b1c865
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-valid-packet2.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that the packetizer generates valid packets with constant
+; extended add and base+offset store instructions.
+
+; CHECK: {
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: memw(r{{[0-9]+}}+{{ *}}##{{[0-9]+}}){{ *}}={{ *}}r{{[0-9]+}}.new
+; CHECK-NEXT: }
+
+define i32 @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind {
+entry:
+  %add = add nsw i32 %c, 200002
+  %0 = load i32* %a, align 4
+  %add1 = add nsw i32 %0, 200000
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 3000
+  store i32 %add1, i32* %arrayidx2, align 4
+  %1 = load i32* %b, align 4
+  %add4 = add nsw i32 %1, 200001
+  %arrayidx5 = getelementptr inbounds i32* %a, i32 1
+  store i32 %add4, i32* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 1
+  %2 = load i32* %arrayidx7, align 4
+  %cmp = icmp sgt i32 %add4, %2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %arrayidx8 = getelementptr inbounds i32* %a, i32 2
+  %3 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %b, i32 2000
+  %4 = load i32* %arrayidx9, align 4
+  %sub = sub nsw i32 %3, %4
+  %arrayidx10 = getelementptr inbounds i32* %a, i32 4000
+  store i32 %sub, i32* %arrayidx10, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %arrayidx11 = getelementptr inbounds i32* %b, i32 3200
+  store i32 %add, i32* %arrayidx11, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 %add
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-genreg.ll b/test/CodeGen/Hexagon/cmp-to-genreg.ll
new file mode 100644
index 000000000000..97cf51ce1a2b
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-genreg.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to general register.
+
+define i32 @compare1(i32 %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp eq i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare2(i32 %a) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp ne i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp eq i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare4(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-predreg.ll b/test/CodeGen/Hexagon/cmp-to-predreg.ll
new file mode 100644
index 000000000000..d430b901866d
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-predreg.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to predicate register.
+
+define i32 @compare1(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare2(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp ne i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
+define i32 @compare4(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp sgt i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
diff --git a/test/CodeGen/Hexagon/cmp_pred.ll b/test/CodeGen/Hexagon/cmp_pred.ll
new file mode 100644
index 000000000000..37db3b499f63
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmp_pred_reg.ll b/test/CodeGen/Hexagon/cmp_pred_reg.ll
new file mode 100644
index 000000000000..37db3b499f63
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred_reg.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll
new file mode 100644
index 000000000000..1e6144701fee
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -0,0 +1,92 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+@Enum_global = external global i8
+
+define i32 @Func_3(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3b(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp ne i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3c(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3d(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3e(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3f(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ugt i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3g(i32) nounwind readnone {
+entry:
+; CHECK: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ult i32 %conv, 3
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3h(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ult i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3i(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ugt i32 %conv, 1
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
new file mode 100644
index 000000000000..921ce9928e6d
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: word
+; CHECK: combine(#0
+
+define void @word(i32* nocapture %a) nounwind {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %1 = zext i32 %0 to i64
+  %add.ptr = getelementptr inbounds i32* %a, i32 1
+  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %3 = zext i32 %2 to i64
+  %4 = shl nuw i64 %3, 32
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+declare void @bar(i64)
+
+; CHECK: halfword
+; CHECK: combine(#0
+
+define void @halfword(i16* nocapture %a) nounwind {
+entry:
+  %0 = load i16* %a, align 2, !tbaa !3
+  %1 = zext i16 %0 to i64
+  %add.ptr = getelementptr inbounds i16* %a, i32 1
+  %2 = load i16* %add.ptr, align 2, !tbaa !3
+  %3 = zext i16 %2 to i64
+  %4 = shl nuw nsw i64 %3, 16
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+; CHECK: byte
+; CHECK: combine(#0
+
+define void @byte(i8* nocapture %a) nounwind {
+entry:
+  %0 = load i8* %a, align 1, !tbaa !1
+  %1 = zext i8 %0 to i64
+  %add.ptr = getelementptr inbounds i8* %a, i32 1
+  %2 = load i8* %add.ptr, align 1, !tbaa !1
+  %3 = zext i8 %2 to i64
+  %4 = shl nuw nsw i64 %3, 8
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
new file mode 100644
index 000000000000..e942f8d0c5dd
--- /dev/null
+++ b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} |= lsr(r{{[0-9]+}}:{{[0-9]+}}, #4)
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} &= lsr(r{{[0-9]+}}:{{[0-9]+}}, #2)
+; CHECK: r{{[0-9]+}} += lsr(r{{[0-9]+}}, #4)
+
+define i32 @foo(i64 %a, i32 %b) nounwind  {
+entry:
+        %tmp0 = tail call i64 @llvm.ctlz.i64( i64 %a, i1 true )
+        %tmp1 = tail call i64 @llvm.cttz.i64( i64 %a, i1 true )
+        %tmp2 = tail call i32 @llvm.ctlz.i32( i32 %b, i1 true )
+        %tmp3 = tail call i32 @llvm.cttz.i32( i32 %b, i1 true )
+        %tmp4 = tail call i64 @llvm.ctpop.i64( i64 %a )
+        %tmp5 = tail call i32 @llvm.ctpop.i32( i32 %b )
+
+
+        %tmp6 = trunc i64 %tmp0 to i32
+        %tmp7 = trunc i64 %tmp1 to i32
+        %tmp8 = trunc i64 %tmp4 to i32
+        %tmp9 = add i32 %tmp6, %tmp7
+        %tmp10 = add i32 %tmp9, %tmp8
+        %tmp11 = add i32 %tmp10, %tmp2
+        %tmp12 = add i32 %tmp11, %tmp3
+        %tmp13 = add i32 %tmp12, %tmp5
+
+        ret i32 %tmp13
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index 9b27dda52c1d..f7d7e8bbe75d 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
 ; Check that we generate dual stores in one packet in V4
 
-; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
-; CHECK-NEXT: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##500000
+; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##100000
 ; CHECK-NEXT: }
 
 @Reg = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-load.ll b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
new file mode 100644
index 000000000000..a1b80a65f82a
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @loadWord(i32 %val1, i32 %val2, i32* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##foo{{ *}}+{{ *}}4)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
+  store i32 %0, i32* %ival, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadByte(i32 %val1, i32 %val2, i8* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(##foo{{ *}}+{{ *}}1)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  store i8 %0, i8* %ival, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadHWord(i32 %val1, i32 %val2, i16* %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(##foo{{ *}}+{{ *}}2)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  store i16 %0, i16* %ival, align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-store.ll b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
new file mode 100644
index 000000000000..c782b30920ea
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate store instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @storeByte(i32 %val1, i32 %val2, i8 zeroext %ival) nounwind {
+; CHECK: memb(##foo{{ *}}+{{ *}}1){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i8 %ival, i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @storeHW(i32 %val1, i32 %val2, i16 signext %ival) nounwind {
+; CHECK: memh(##foo{{ *}}+{{ *}}2){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i16 %ival, i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/gp-rel.ll b/test/CodeGen/Hexagon/gp-rel.ll
new file mode 100644
index 000000000000..561869e8ef35
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-rel.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that gp-relative instructions are being generated.
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+
+define i32 @foo(i32 %p) #0 {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#a)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#b)
+; CHECK: if{{ *}}(p{{[0-3]}}) memw(##c){{ *}}={{ *}}r{{[0-9]+}}
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %add = add nsw i32 %1, %0
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %add1 = add nsw i32 %add, %0
+  store i32 %add1, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %2 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add1, %if.then ]
+  %cmp2 = icmp eq i32 %add, %2
+  %sel1 = select i1 %cmp2, i32 %2, i32 %1
+  ret i32 %sel1
+}
diff --git a/test/CodeGen/Hexagon/hwloop-cleanup.ll b/test/CodeGen/Hexagon/hwloop-cleanup.ll
new file mode 100644
index 000000000000..6456ebff16d3
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-cleanup.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we remove the compare and induction variable instructions
+; after generating hardware loops.
+; Bug 6685.
+
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test1(i32* nocapture %b, i32 %n) nounwind readonly {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.03
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %n
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+  ret i32 %sum.0.lcssa
+}
+
+; This test checks that that initial loop count value is removed.
+; CHECK-NOT: ={{.}}#40
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test2(i32* nocapture %b) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.02
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
+
+; This test checks that we don't remove the induction variable since it's used.
+; CHECK: loop0
+; CHECK: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+define i32 @test3(i32* nocapture %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32 %i.01, i32* %arrayidx.phi, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-const.ll b/test/CodeGen/Hexagon/hwloop-const.ll
new file mode 100644
index 000000000000..a621c58c63ed
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-const.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O2 < %s | FileCheck %s
+; ModuleID = 'hwloop-const.c'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown-linux-gnu"
+
+@b = common global [25000 x i32] zeroinitializer, align 8
+@a = common global [25000 x i32] zeroinitializer, align 8
+@c = common global [25000 x i32] zeroinitializer, align 8
+
+define i32 @hwloop_bug() nounwind {
+entry:
+  br label %for.body
+
+; CHECK: endloop
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [25000 x i32]* @b, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [25000 x i32]* @a, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx1, align 4, !tbaa !0
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 25000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
new file mode 100644
index 000000000000..c2e8153b7dff
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=hexagon -mcpu=hexagonv4 -O2 -disable-lsr | FileCheck %s
+; ModuleID = 'hwloop-dbg.o'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i32* %b}, i64 0, metadata !14), !dbg !18
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !19
+  br label %for.body, !dbg !19
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK:     loop0(
+; CHECK-NOT: add({{r[0-9]*}}, #
+; CHECK:     endloop0
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32* %incdec.ptr}, i64 0, metadata !14), !dbg !21
+  %0 = load i32* %b.addr.01, align 4, !dbg !21, !tbaa !23
+  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21, !tbaa !23
+  %inc = add nsw i32 %i.02, 1, !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !15), !dbg !26
+  %exitcond = icmp eq i32 %inc, 10, !dbg !19
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !19
+
+for.end:                                          ; preds = %for.body
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786473, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !9}
+!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13, metadata !14, metadata !15}
+!13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
+!16 = metadata !{i32 786443, metadata !5, i32 1, i32 26, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!17 = metadata !{i32 1, i32 15, metadata !5, null}
+!18 = metadata !{i32 1, i32 23, metadata !5, null}
+!19 = metadata !{i32 3, i32 8, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !16, i32 3, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!21 = metadata !{i32 4, i32 5, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !20, i32 3, i32 28, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 3, i32 23, metadata !20, null}
+!27 = metadata !{i32 6, i32 1, metadata !16, null}
diff --git a/test/CodeGen/Hexagon/hwloop-le.ll b/test/CodeGen/Hexagon/hwloop-le.ll
new file mode 100644
index 000000000000..9c8cec7c2a1b
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-le.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 28395, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 9073, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 21956, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 16782, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 19097, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 14040
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, 14040
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 13710
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, 13710
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 9920
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, 9920
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 18924
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, 18924
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 11812
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, 11812
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt.ll b/test/CodeGen/Hexagon/hwloop-lt.ll
new file mode 100644
index 000000000000..7e43733da2a6
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 8531, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9152, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 18851, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 25466, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9295, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 31236
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, 31236
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22653
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, 22653
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1431
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, 1431
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22403
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, 22403
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 21715
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, 21715
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt1.ll b/test/CodeGen/Hexagon/hwloop-lt1.ll
new file mode 100644
index 000000000000..cf5874011ee0
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt1.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate a hardware loop instruction.
+; CHECK: endloop0
+
+@A = common global [400 x i8] zeroinitializer, align 8
+@B = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+  br label %polly.loop_body
+
+polly.loop_after:                                 ; preds = %polly.loop_body
+  ret void
+
+polly.loop_body:                                  ; preds = %entry, %polly.loop_body
+  %polly.loopiv16 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+  %polly.next_loopiv = add i32 %polly.loopiv16, 4
+  %p_vector_iv14 = or i32 %polly.loopiv16, 1
+  %p_vector_iv3 = add i32 %p_vector_iv14, 1
+  %p_vector_iv415 = or i32 %polly.loopiv16, 3
+  %p_arrayidx = getelementptr [400 x i8]* @A, i32 0, i32 %polly.loopiv16
+  %p_arrayidx5 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv14
+  %p_arrayidx6 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv3
+  %p_arrayidx7 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv415
+  store i8 123, i8* %p_arrayidx, align 1
+  store i8 123, i8* %p_arrayidx5, align 1
+  store i8 123, i8* %p_arrayidx6, align 1
+  store i8 123, i8* %p_arrayidx7, align 1
+  %0 = icmp slt i32 %polly.next_loopiv, 400
+  br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/hwloop-ne.ll b/test/CodeGen/Hexagon/hwloop-ne.ll
new file mode 100644
index 000000000000..bceef2a16955
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-ne.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32623, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 29554, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 15692, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 10449, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32087, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 3472
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, 3472
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 8730
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, 8730
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1493
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, 1493
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1706
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, 1706
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1886
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, 1886
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
new file mode 100644
index 000000000000..eb44c2905c9d
--- /dev/null
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %val1 = zext i1 %lt_r to i16
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i16 %val1 )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
new file mode 100644
index 000000000000..7dbfb25cd2b7
--- /dev/null
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )
+        call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )
+        call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )
+        call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )
+        call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )
+        call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
new file mode 100644
index 000000000000..687b178824ce
--- /dev/null
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %val1 = zext i1 %lt_r to i8
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i8 %val1 )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
new file mode 100644
index 000000000000..ca6df88a5529
--- /dev/null
+++ b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -0,0 +1,70 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instruction with (base + register offset << 0)
+
+; load word
+
+define i32 @load_w(i32* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i32* %a, i32 %tmp
+  %val = load i32* %scevgep9, align 4
+  ret i32 %val
+}
+
+; load unsigned half word
+
+define i16 @load_uh(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  ret i16 %val
+}
+
+; load signed half word
+
+define i32 @load_h(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  %conv = sext i16 %val to i32
+  ret i32 %conv
+}
+
+; load unsigned byte
+
+define i8 @load_ub(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  ret i8 %val
+}
+
+; load signed byte
+
+define i32 @foo_2(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  %conv = sext i8 %val to i32
+  ret i32 %conv
+}
+
+; load doubleword
+
+define i64 @load_d(i64* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i64* %a, i32 %tmp
+  %val = load i64* %scevgep9, align 8
+  ret i64 %val
+}
diff --git a/test/CodeGen/Hexagon/indirect-br.ll b/test/CodeGen/Hexagon/indirect-br.ll
new file mode 100644
index 000000000000..919e50189160
--- /dev/null
+++ b/test/CodeGen/Hexagon/indirect-br.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+;CHECK: jumpr  r{{[0-9]+}}
+
+define i32 @check_indirect_br(i8* %target) nounwind {
+entry:
+        indirectbr i8* %target, [label %test_label]
+
+test_label:
+        br label %ret
+
+ret:
+        ret i32 -1
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll
new file mode 100644
index 000000000000..5498848d8560
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops.ll
@@ -0,0 +1,1369 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_add_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_sub_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv1 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %and = and i32 %conv, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i8* %p, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i8* %p, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %p, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 %i
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_add_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_sub_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i8 %x to i32
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv13 = zext i8 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %or3 = or i8 %0, %x
+  store i8 %or3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %and3 = and i8 %0, %x
+  store i8 %and3, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %and = and i32 %conv2, 223
+  %conv1 = trunc i32 %and to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i8* %p, i32 5
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv2 = zext i8 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %add = add nsw i32 %conv, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_add_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %add = add nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_sub_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv1 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv1, %conv
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %and = and i32 %conv, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv = zext i16 %0 to i32
+  %or = or i32 %conv, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i16* %p, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i16* %p, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %p, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 %i
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %add = add nsw i32 %conv2, 5
+  %conv1 = trunc i32 %add to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_add_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %add = add nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %add to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_sub_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
+  %conv4 = zext i16 %x to i32
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv13 = zext i16 %0 to i32
+  %sub = sub nsw i32 %conv13, %conv4
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %or3 = or i16 %0, %x
+  store i16 %or3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %and3 = and i16 %0, %x
+  store i16 %and3, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %and = and i32 %conv2, 65503
+  %conv1 = trunc i32 %and to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i16* %p, i32 5
+  %0 = load i16* %add.ptr, align 2, !tbaa !2
+  %conv2 = zext i16 %0 to i32
+  %or = or i32 %conv2, 128
+  %conv1 = trunc i32 %or to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2
+  ret void
+}
+
+define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %0 = load i32* %p, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %p, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 %i
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %add = add nsw i32 %0, %x
+  store i32 %add, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %sub = sub nsw i32 %0, %x
+  store i32 %sub, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, %x
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, %x
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %and = and i32 %0, -33
+  store i32 %and, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
+  %add.ptr = getelementptr inbounds i32* %p, i32 5
+  %0 = load i32* %add.ptr, align 4, !tbaa !3
+  %or = or i32 %0, 128
+  store i32 %or, i32* %add.ptr, align 4, !tbaa !3
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"short", metadata !0}
+!3 = metadata !{metadata !"int", metadata !0}
diff --git a/test/CodeGen/Hexagon/memops1.ll b/test/CodeGen/Hexagon/memops1.ll
new file mode 100644
index 000000000000..2babdc848ddc
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops1.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i32* %p) nounwind {
+entry:
+; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
+  %p.addr = alloca i32*, align 4
+  store i32* %p, i32** %p.addr, align 4
+  %0 = load i32** %p.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %0, i32 10
+  %1 = load i32* %add.ptr, align 4
+  %sub = sub nsw i32 %1, 1
+  store i32 %sub, i32* %add.ptr, align 4
+  ret void
+}
+
+define void @g(i32* %p, i32 %i) nounwind {
+entry:
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
+  %p.addr = alloca i32*, align 4
+  %i.addr = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32** %p.addr, align 4
+  %1 = load i32* %i.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %0, i32 %1
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 10
+  %2 = load i32* %add.ptr1, align 4
+  %sub = sub nsw i32 %2, 1
+  store i32 %sub, i32* %add.ptr1, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/memops2.ll b/test/CodeGen/Hexagon/memops2.ll
new file mode 100644
index 000000000000..b1b25445c029
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops2.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i16* nocapture %p) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
+  %add.ptr = getelementptr inbounds i16* %p, i32 10
+  %0 = load i16* %add.ptr, align 2, !tbaa !0
+  %conv2 = zext i16 %0 to i32
+  %sub = add nsw i32 %conv2, 65535
+  %conv1 = trunc i32 %sub to i16
+  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !0
+  ret void
+}
+
+define void @g(i16* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
+  %add.ptr.sum = add i32 %i, 10
+  %add.ptr1 = getelementptr inbounds i16* %p, i32 %add.ptr.sum
+  %0 = load i16* %add.ptr1, align 2, !tbaa !0
+  %conv3 = zext i16 %0 to i32
+  %sub = add nsw i32 %conv3, 65535
+  %conv2 = trunc i32 %sub to i16
+  store i16 %conv2, i16* %add.ptr1, align 2, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/memops3.ll b/test/CodeGen/Hexagon/memops3.ll
new file mode 100644
index 000000000000..5b8bd6c87bfb
--- /dev/null
+++ b/test/CodeGen/Hexagon/memops3.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate MemOps for V4 and above.
+
+
+define void @f(i8* nocapture %p) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
+  %add.ptr = getelementptr inbounds i8* %p, i32 10
+  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %sub = add nsw i32 %conv, 255
+  %conv1 = trunc i32 %sub to i8
+  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  ret void
+}
+
+define void @g(i8* nocapture %p, i32 %i) nounwind {
+entry:
+; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
+  %add.ptr.sum = add i32 %i, 10
+  %add.ptr1 = getelementptr inbounds i8* %p, i32 %add.ptr.sum
+  %0 = load i8* %add.ptr1, align 1, !tbaa !0
+  %conv = zext i8 %0 to i32
+  %sub = add nsw i32 %conv, 255
+  %conv2 = trunc i32 %sub to i8
+  store i8 %conv2, i8* %add.ptr1, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/misaligned-access.ll b/test/CodeGen/Hexagon/misaligned-access.ll
new file mode 100644
index 000000000000..4dafb44cc3ef
--- /dev/null
+++ b/test/CodeGen/Hexagon/misaligned-access.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s
+; Check that the mis-aligned load doesn't cause compiler to assert.
+
+declare i32 @_hi(i64) #1
+@temp1 = common global i32 0, align 4
+
+define i32 @CSDRSEARCH_executeSearchManager() #0 {
+entry:
+  %temp = alloca i32, align 4
+  %0 = load i32* @temp1, align 4
+  store i32 %0, i32* %temp, align 4
+  %1 = bitcast i32* %temp to i64*
+  %2 = load i64* %1, align 8
+  %call = call i32 @_hi(i64 %2)
+  ret i32 %call
+}
diff --git a/test/CodeGen/Hexagon/postinc-load.ll b/test/CodeGen/Hexagon/postinc-load.ll
new file mode 100644
index 000000000000..855a347d74f5
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment load instructions are being generated.
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}})
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+  %sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %1 = load i16* %arrayidx1.phi, align 2
+  %conv = sext i16 %1 to i32
+  %add = add i32 %0, %sum.03
+  %add2 = add i32 %add, %conv
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %exitcond = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add2
+}
+
diff --git a/test/CodeGen/Hexagon/postinc-store.ll b/test/CodeGen/Hexagon/postinc-store.ll
new file mode 100644
index 000000000000..99a3a58ad39c
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-store.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment store instructions are being generated.
+; CHECK: memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}}){{ *}}={{ *}}r{{[0-9]+}}
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %1 = load i16* %arrayidx1.phi, align 2
+  %conv = sext i16 %1 to i32
+  %factor = mul i32 %0, 2
+  %add3 = add i32 %factor, %conv
+  store i32 %add3, i32* %arrayidx.phi, align 4
+
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %exitcond = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/pred-absolute-store.ll b/test/CodeGen/Hexagon/pred-absolute-store.ll
new file mode 100644
index 000000000000..b1b09f414a54
--- /dev/null
+++ b/test/CodeGen/Hexagon/pred-absolute-store.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we are able to predicate instructions with abosolute
+; addressing mode.
+
+; CHECK: if{{ *}}(p{{[0-3]+}}){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
+
+@gvar = external global i32
+define i32 @test2(i32 %a, i32 %b) nounwind {
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @gvar, align 4
+  br label %if.end
+
+if.end:
+  ret i32 %b
+}
diff --git a/test/CodeGen/Hexagon/predicate-copy.ll b/test/CodeGen/Hexagon/predicate-copy.ll
new file mode 100644
index 000000000000..552b68794195
--- /dev/null
+++ b/test/CodeGen/Hexagon/predicate-copy.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = p{{[0-9]+}}
+define i1 @foo() {
+entry:
+  ret i1 false
+}
+
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index e488f33c3d16..f91300b5067e 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
+; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
 ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
new file mode 100644
index 000000000000..84172e957d04
--- /dev/null
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+        %tmp1 = zext i64 %AL to i128
+        %tmp23 = zext i64 %AH to i128
+        %tmp4 = shl i128 %tmp23, 64
+        %tmp5 = or i128 %tmp4, %tmp1
+        %tmp67 = zext i64 %BL to i128
+        %tmp89 = zext i64 %BH to i128
+        %tmp11 = shl i128 %tmp89, 64
+        %tmp12 = or i128 %tmp11, %tmp67
+        %tmp15 = sub i128 %tmp5, %tmp12
+        %tmp1617 = trunc i128 %tmp15 to i64
+        store i64 %tmp1617, i64* %RL
+        %tmp21 = lshr i128 %tmp15, 64
+        %tmp2122 = trunc i128 %tmp21 to i64
+        store i64 %tmp2122, i64* %RH
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/validate-offset.ll b/test/CodeGen/Hexagon/validate-offset.ll
new file mode 100644
index 000000000000..9e7d0aa07832
--- /dev/null
+++ b/test/CodeGen/Hexagon/validate-offset.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s -O0
+
+; This is a regression test which makes sure that the offset check
+; is available for STRiw_indexed instruction. This is required
+; by 'Hexagon Expand Predicate Spill Code' pass.
+
+define i32 @f(i32 %a, i32 %b) nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %2 = load i32* %a.addr, align 4
+  %3 = load i32* %b.addr, align 4
+  %add = add nsw i32 %2, %3
+  store i32 %add, i32* %retval
+  br label %return
+
+if.else:
+  %4 = load i32* %a.addr, align 4
+  %5 = load i32* %b.addr, align 4
+  %sub = sub nsw i32 %4, %5
+  store i32 %sub, i32* %retval
+  br label %return
+
+return:
+  %6 = load i32* %retval
+  ret i32 %6
+}
diff --git a/test/CodeGen/Hexagon/zextloadi1.ll b/test/CodeGen/Hexagon/zextloadi1.ll
new file mode 100644
index 000000000000..cb6e6fdf84a5
--- /dev/null
+++ b/test/CodeGen/Hexagon/zextloadi1.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = ##i129_l+16
+; CHECK: r{{[0-9]+}} = ##i129_s+16
+; CHECK: memd(##i129_s) = r{{[0-9]+:[0-9]+}}
+; CHECK: r{{[0-9]+}} = ##i65_l+8
+; CHECK: r{{[0-9]+}} = ##i65_s+8
+; CHECK: memd(##i65_s) = r{{[0-9]+:[0-9]+}}
+
+@i65_l = external global i65
+@i65_s = external global i65
+@i129_l = external global i129
+@i129_s = external global i129
+
+define void @i129_ls() nounwind  {
+        %tmp = load i129* @i129_l
+        store i129 %tmp, i129* @i129_s
+        ret void
+}
+
+define void @i65_ls() nounwind  {
+        %tmp = load i65* @i65_l
+        store i65 %tmp, i65* @i65_s
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
new file mode 100644
index 000000000000..d5162b964a08
--- /dev/null
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+
+!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 786688, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 786443, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+!11 = metadata !{metadata !0}
+!12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"}
diff --git a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index d8970eac9007..000000000000
--- a/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/MBlaze/DbgValueOtherTargets.test b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..8b850f51105b
--- /dev/null
+++ b/test/CodeGen/MBlaze/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
index e236200d7572..ff4928de4b9c 100644
--- a/test/CodeGen/MBlaze/lit.local.cfg
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'MBlaze' in targets:
diff --git a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 9d549da8a93a..000000000000
--- a/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/MSP430/DbgValueOtherTargets.test b/test/CodeGen/MSP430/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..7adfbcafa35b
--- /dev/null
+++ b/test/CodeGen/MSP430/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=msp430 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/MSP430/byval.ll b/test/CodeGen/MSP430/byval.ll
new file mode 100644
index 000000000000..9dda0a097b56
--- /dev/null
+++ b/test/CodeGen/MSP430/byval.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+%struct.Foo = type { i16, i16, i16 }
+@foo = global %struct.Foo { i16 1, i16 2, i16 3 }, align 2
+
+define i16 @callee(%struct.Foo* byval %f) nounwind {
+entry:
+; CHECK: callee:
+; CHECK: mov.w 2(r1), r15
+  %0 = getelementptr inbounds %struct.Foo* %f, i32 0, i32 0
+  %1 = load i16* %0, align 2
+  ret i16 %1
+}
+
+define void @caller() nounwind {
+entry:
+; CHECK: caller:
+; CHECK: mov.w &foo+4, 4(r1)
+; CHECK-NEXT: mov.w &foo+2, 2(r1)
+; CHECK-NEXT: mov.w &foo, 0(r1)
+  %call = call i16 @callee(%struct.Foo* byval @foo)
+  ret void
+}
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
index 972732ebad30..0ca9fc9c6912 100644
--- a/test/CodeGen/MSP430/lit.local.cfg
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'MSP430' in targets:
diff --git a/test/CodeGen/MSP430/vararg.ll b/test/CodeGen/MSP430/vararg.ll
new file mode 100644
index 000000000000..603d3ec6b686
--- /dev/null
+++ b/test/CodeGen/MSP430/vararg.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+define void @va_start(i16 %a, ...) nounwind {
+entry:
+; CHECK: va_start:
+; CHECK: sub.w #2, r1
+  %vl = alloca i8*, align 2
+  %vl1 = bitcast i8** %vl to i8*
+; CHECK-NEXT: mov.w r1, [[REG:r[0-9]+]]
+; CHECK-NEXT: add.w #6, [[REG]]
+; CHECK-NEXT: mov.w [[REG]], 0(r1)
+  call void @llvm.va_start(i8* %vl1)
+  call void @llvm.va_end(i8* %vl1)
+  ret void
+}
+
+define i16 @va_arg(i8* %vl) nounwind {
+entry:
+; CHECK: va_arg:
+  %vl.addr = alloca i8*, align 2
+; CHECK: mov.w r15, 0(r1)
+  store i8* %vl, i8** %vl.addr, align 2
+; CHECK: mov.w r15, [[REG:r[0-9]+]]
+; CHECK-NEXT: add.w #2, [[REG]]
+; CHECK-NEXT: mov.w [[REG]], 0(r1)
+  %0 = va_arg i8** %vl.addr, i16
+; CHECK-NEXT: mov.w 0(r15), r15
+  ret i16 %0
+}
+
+define void @va_copy(i8* %vl) nounwind {
+entry:
+; CHECK: va_copy:
+  %vl.addr = alloca i8*, align 2
+  %vl2 = alloca i8*, align 2
+; CHECK: mov.w r15, 2(r1)
+  store i8* %vl, i8** %vl.addr, align 2
+  %0 = bitcast i8** %vl2 to i8*
+  %1 = bitcast i8** %vl.addr to i8*
+; CHECK-NEXT: mov.w r15, 0(r1)
+  call void @llvm.va_copy(i8* %0, i8* %1)
+  ret void
+}
diff --git a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 994e19af4f87..000000000000
--- a/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 261fe9db1732..38d7b7e25592 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s -check-prefix=STATIC-O32 
-; RUN: llc < %s -march=mips -relocation-model=pic | FileCheck %s -check-prefix=PIC-O32 
-; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc < %s -march=mips -relocation-model=static | \
+; RUN: FileCheck %s -check-prefix=STATIC-O32 
+; RUN: llc < %s -march=mips -relocation-model=pic | \
+; RUN: FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 | \
+; RUN: FileCheck %s -check-prefix=N64
+; RUN: llc < %s -march=mips64 -relocation-model=static -mcpu=mips64 | \
+; RUN: FileCheck %s -check-prefix=N64
 
 define i32 @main() nounwind readnone {
 entry:
@@ -17,12 +22,12 @@ entry:
 ; PIC-O32: lw $[[R4:[0-9]+]], %lo($JTI0_0)($[[R2]])
 ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
 ; PIC-O32: jr  $[[R5]]
-; PIC-N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
-; PIC-N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
-; PIC-N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
-; PIC-N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
-; PIC-N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
-; PIC-N64: jr  $[[R5]]
+; N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3
+; N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0)
+; N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]]
+; N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]])
+; N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]]
+; N64: jr  $[[R5]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -58,10 +63,10 @@ bb5:                                              ; preds = %entry
 ; PIC-O32: .gpword
 ; PIC-O32: .gpword 
 ; PIC-O32: .gpword 
-; PIC-N64: .align  3
-; PIC-N64: $JTI0_0:
-; PIC-N64: .gpdword
-; PIC-N64: .gpdword
-; PIC-N64: .gpdword 
-; PIC-N64: .gpdword 
+; N64: .align  3
+; N64: $JTI0_0:
+; N64: .gpdword
+; N64: .gpdword
+; N64: .gpdword 
+; N64: .gpdword 
 
diff --git a/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
new file mode 100644
index 000000000000..9d4daee696db
--- /dev/null
+++ b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s
+
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
+
+define void @t(i8* %ptr) {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/DbgValueOtherTargets.test b/test/CodeGen/Mips/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..da20e7ef5224
--- /dev/null
+++ b/test/CodeGen/Mips/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=mips -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
new file mode 100644
index 000000000000..8f70a469c44f
--- /dev/null
+++ b/test/CodeGen/Mips/addi.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 6, align 4
+@j = global i32 12, align 4
+@k = global i32 15, align 4
+@l = global i32 20, align 4
+@.str = private unnamed_addr constant [13 x i8] c"%i %i %i %i\0A\00", align 1
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %add = add nsw i32 %0, 5
+  store i32 %add, i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %sub = sub nsw i32 %1, 5
+  store i32 %sub, i32* @j, align 4
+  %2 = load i32* @k, align 4
+  %add1 = add nsw i32 %2, 10000
+  store i32 %add1, i32* @k, align 4
+  %3 = load i32* @l, align 4
+  %sub2 = sub nsw i32 %3, 10000
+  store i32 %sub2, i32* @l, align 4
+; 16: 	addiu	${{[0-9]+}}, 5	# 16 bit inst
+; 16: 	addiu	${{[0-9]+}}, -5	# 16 bit inst
+; 16: 	addiu	${{[0-9]+}}, 10000
+; 16: 	addiu	${{[0-9]+}}, -10000
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/addressing-mode.ll b/test/CodeGen/Mips/addressing-mode.ll
new file mode 100644
index 000000000000..ea76dde82dc3
--- /dev/null
+++ b/test/CodeGen/Mips/addressing-mode.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+
+; Check that LSR doesn't choose a solution with a formula "reg + 4*reg".
+;
+; CHECK:      $BB0_2:
+; CHECK-NOT:  sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+
+define i32 @f0(i32 %n, i32 %m, [256 x i32]* nocapture %a, [256 x i32]* nocapture %b) nounwind readonly {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %s.022 = phi i32 [ 0, %entry ], [ %add7, %for.inc9 ]
+  %i.021 = phi i32 [ 0, %entry ], [ %add10, %for.inc9 ]
+  br label %for.body3
+
+for.body3:
+  %s.120 = phi i32 [ %s.022, %for.cond1.preheader ], [ %add7, %for.body3 ]
+  %j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
+  %arrayidx4 = getelementptr inbounds [256 x i32]* %a, i32 %i.021, i32 %j.019
+  %0 = load i32* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds [256 x i32]* %b, i32 %i.021, i32 %j.019
+  %1 = load i32* %arrayidx6, align 4
+  %add = add i32 %0, %s.120
+  %add7 = add i32 %add, %1
+  %add8 = add nsw i32 %j.019, %m
+  %cmp2 = icmp slt i32 %add8, 64
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.inc9:
+  %add10 = add nsw i32 %i.021, %n
+  %cmp = icmp slt i32 %add10, 64
+  br i1 %cmp, label %for.cond1.preheader, label %for.end11
+
+for.end11:
+  ret i32 %add7
+}
+
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
new file mode 100644
index 000000000000..99139abbe848
--- /dev/null
+++ b/test/CodeGen/Mips/align16.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 25, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @p(i32* %i) nounwind {
+entry:
+  ret void
+}
+
+
+define void @foo() nounwind {
+entry:
+  %y = alloca [512 x i32], align 4
+  %x = alloca i32, align 8
+  %zz = alloca i32, align 4
+  %z = alloca i32, align 4
+  %0 = load i32* @i, align 4
+  %arrayidx = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+  store i32 %0, i32* %arrayidx, align 4
+  %1 = load i32* @i, align 4
+  store i32 %1, i32* %x, align 8
+  call void @p(i32* %x)
+  %arrayidx1 = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+  call void @p(i32* %arrayidx1)
+  ret void
+}
+; 16:	save	$ra, $s0, $s1, 2040
+; 16:	addiu	$sp, -48 # 16 bit inst
+; 16:	addiu	$sp, 48 # 16 bit inst
+; 16:	restore	$ra,  $s0, $s1, 2040
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 29f43c8afa18..d79ea9193d28 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -3,11 +3,11 @@
 define i32 @twoalloca(i32 %size) nounwind {
 entry:
 ; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
-; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: move  $sp, $[[T0]]
 ; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
-; CHECK: addu  $sp, $zero, $[[T2]]
-; CHECK: addu  $4, $zero, $[[T0]]
-; CHECK: addu  $4, $zero, $[[T2]]
+; CHECK: move  $sp, $[[T2]]
+; CHECK: move  $4, $[[T0]]
+; CHECK: move  $4, $[[T2]]
   %tmp1 = alloca i8, i32 %size, align 4
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
   store i8 97, i8* %add.ptr, align 1
@@ -29,7 +29,7 @@ define i32 @alloca2(i32 %size) nounwind {
 entry:
 ; CHECK: alloca2
 ; CHECK: subu  $[[T0:[0-9]+]], $sp
-; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: move  $sp, $[[T0]]
 
   %tmp1 = alloca i8, i32 %size, align 4
   %0 = bitcast i8* %tmp1 to i32*
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index 731edae43cbb..5ae9a847917b 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -68,8 +68,8 @@ entry:
   %21 = load i32** %ip, align 4
   %arrayidx6 = getelementptr inbounds i32* %21, i32 %20
   %22 = load i32* %arrayidx6, align 4
-; 16: 	save	16
+; 16: 	addiu $sp, -16
   call void @temp(i32 %22)
-; 16: 	restore	16
+; 16: 	addiu $sp, 16
   ret void
 }
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
index 1b5513ab394d..9ca8d159614f 100644
--- a/test/CodeGen/Mips/br-jmp.ll
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
 entry:
@@ -11,3 +13,6 @@ bosco:                                            ; preds = %bosco, %entry
 
 ; CHECK-PIC: b	$BB0_1
 ; CHECK-STATIC: j	$BB0_1
+; CHECK-PIC16: b	$BB0_1
+; CHECK-STATIC16: b	$BB0_1
+
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 2fdb736dc886..2deb037c9c39 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -1,5 +1,12 @@
 ; RUN: llc -march=mipsel -O0 < %s | FileCheck %s -check-prefix=None
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=Default
+; RUN: llc -march=mipsel -O1 -relocation-model=static < %s | \
+; RUN: FileCheck %s -check-prefix=STATICO1
+; RUN: llc -march=mipsel -disable-mips-df-forward-search=false \
+; RUN: -relocation-model=static < %s | FileCheck %s -check-prefix=FORWARD
+; RUN: llc -march=mipsel -disable-mips-df-backward-search \
+; RUN: -disable-mips-df-succbb-search=false < %s | \
+; RUN: FileCheck %s -check-prefix=SUCCBB
 
 define void @foo1() nounwind {
 entry:
@@ -35,3 +42,137 @@ entry:
 
 declare void @foo4(double)
 
+@g2 = external global i32
+@g1 = external global i32
+@g3 = external global i32
+
+; Check that branch delay slot can be filled with an instruction with operand
+; $1.
+;
+; Default:     foo5:
+; Default-NOT: nop
+
+define void @foo5(i32 %a) nounwind {
+entry:
+  %0 = load i32* @g2, align 4
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+  %1 = load i32* @g1, align 4
+  %add = add nsw i32 %1, %0
+  store i32 %add, i32* @g1, align 4
+  br label %if.end
+
+if.else:
+  %2 = load i32* @g3, align 4
+  %sub = sub nsw i32 %2, %0
+  store i32 %sub, i32* @g3, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; Check that delay slot filler can place mov.s or mov.d in delay slot.
+;
+; Default:     foo6:
+; Default-NOT: nop
+; Default:     .end foo6
+
+define void @foo6(float %a0, double %a1) nounwind {
+entry:
+  tail call void @foo7(double %a1, float %a0) nounwind
+  ret void
+}
+
+declare void @foo7(double, float)
+
+; Check that a store can move past other memory instructions.
+;
+; STATICO1:      foo8:
+; STATICO1:      jalr ${{[0-9]+}}
+; STATICO1-NEXT: sw ${{[0-9]+}}, %lo(g1)
+
+@foo9 = common global void ()* null, align 4
+
+define i32 @foo8(i32 %a) nounwind {
+entry:
+  store i32 %a, i32* @g1, align 4
+  %0 = load void ()** @foo9, align 4
+  tail call void %0() nounwind
+  %1 = load i32* @g1, align 4
+  %add = add nsw i32 %1, %a
+  ret i32 %add
+}
+
+; Test searchForward. Check that the second jal's slot is filled with another
+; instruction in the same block.
+;
+; FORWARD:     foo10:
+; FORWARD:     jal foo11
+; FORWARD:     jal foo11
+; FORWARD-NOT: nop
+; FORWARD:     end foo10
+
+define void @foo10() nounwind {
+entry:
+  tail call void @foo11() nounwind
+  tail call void @foo11() nounwind
+  store i32 0, i32* @g1, align 4
+  tail call void @foo11() nounwind
+  store i32 0, i32* @g1, align 4
+  ret void
+}
+
+declare void @foo11()
+
+; Check that delay slots of branches in both the entry block and loop body are
+; filled.
+;
+; SUCCBB:      succbbs_loop1:
+; SUCCBB:      bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: addiu
+; SUCCBB:      bne ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: addiu
+
+define i32 @succbbs_loop1(i32* nocapture %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %s.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.05
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %s.06
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %s.0.lcssa
+}
+
+; Check that the first branch has its slot filled.
+;
+; SUCCBB:      succbbs_br1:
+; SUCCBB:      beq ${{[0-9]+}}, $zero, $BB
+; SUCCBB-NEXT: lw $25, %call16(foo100)
+
+define void @succbbs_br1(i32 %a) {
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @foo100() #1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare void @foo100()
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 3af899a4e258..81925a4953ce 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -59,3 +59,140 @@ entry:
   ret i64 %cond
 }
 
+; slti and conditional move.
+;
+; Check that, pattern
+;  (select (setgt a, N), t, f)
+; turns into
+;  (movz t, (setlt a, N + 1), f)
+; if N + 1 fits in 16-bit.
+
+; O32: slti0:
+; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @slti0(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, 32766
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: slti1:
+; O32: slt ${{[0-9]+}}
+
+define i32 @slti1(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, 32767
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: slti2:
+; O32: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @slti2(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, -32769
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: slti3:
+; O32: slt ${{[0-9]+}}
+
+define i32 @slti3(i32 %a) {
+entry:
+  %cmp = icmp sgt i32 %a, -32770
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; 64-bit patterns.
+
+; N64: slti64_0:
+; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
+; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i64 @slti64_0(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, 32766
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; N64: slti64_1:
+; N64: slt ${{[0-9]+}}
+
+define i64 @slti64_1(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, 32767
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; N64: slti64_2:
+; N64: slti $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
+; N64: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i64 @slti64_2(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, -32769
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; N64: slti64_3:
+; N64: slt ${{[0-9]+}}
+
+define i64 @slti64_3(i64 %a) {
+entry:
+  %cmp = icmp sgt i64 %a, -32770
+  %conv = select i1 %cmp, i64 3, i64 4
+  ret i64 %conv
+}
+
+; sltiu instructions.
+
+; O32: sltiu0:
+; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, 32767
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @sltiu0(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, 32766
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: sltiu1:
+; O32: sltu ${{[0-9]+}}
+
+define i32 @sltiu1(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, 32767
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: sltiu2:
+; O32: sltiu $[[R0:[0-9]+]], ${{[0-9]+}}, -32768
+; O32: movz ${{[0-9]+}}, ${{[0-9]+}}, $[[R0]]
+
+define i32 @sltiu2(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, -32769
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
+
+; O32: sltiu3:
+; O32: sltu ${{[0-9]+}}
+
+define i32 @sltiu3(i32 %a) {
+entry:
+  %cmp = icmp ugt i32 %a, -32770
+  %cond = select i1 %cmp, i32 3, i32 4
+  ret i32 %cond
+}
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
new file mode 100644
index 000000000000..0752f69c3e9e
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s
+
+; CHECK: test_lbux:
+; CHECK: lbux ${{[0-9]+}}
+
+define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i8* %b, i32 %i
+  %0 = load i8* %add.ptr, align 1
+  ret i8 %0
+}
+
+; CHECK: test_lhx:
+; CHECK: lhx ${{[0-9]+}}
+
+define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i16* %b, i32 %i
+  %0 = load i16* %add.ptr, align 2
+  ret i16 %0
+}
+
+; CHECK: test_lwx:
+; CHECK: lwx ${{[0-9]+}}
+
+define i32 @test_lwx(i32* nocapture %b, i32 %i) {
+entry:
+  %add.ptr = getelementptr inbounds i32* %b, i32 %i
+  %0 = load i32* %add.ptr, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
new file mode 100644
index 000000000000..c3003b34b162
--- /dev/null
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -0,0 +1,85 @@
+; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s
+
+declare void @llvm.eh.return.i32(i32, i8*)
+declare void @foo(...)
+
+define i8* @f1(i32 %offset, i8* %handler) {
+entry:
+  call void (...)* @foo()
+  call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f1
+; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sw      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sw      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sw      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sw      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $[[R0:[a-z0-9]+]], $5
+; CHECK:        move    $[[R1:[a-z0-9]+]], $4
+; CHECK:        move    $3, $[[R1]]
+; CHECK:        move    $2, $[[R0]]
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        lw      $4, [[offset0]]($sp)
+; CHECK:        lw      $5, [[offset1]]($sp)
+; CHECK:        lw      $6, [[offset2]]($sp)
+; CHECK:        lw      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        addu    $sp, $sp, $3
+}
+
+define i8* @f2(i32 %offset, i8* %handler) {
+entry:
+  call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f2
+; CHECK:        addiu   $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sw      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sw      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sw      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sw      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $3, $4
+; CHECK:        move    $2, $5
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        lw      $4, [[offset0]]($sp)
+; CHECK:        lw      $5, [[offset1]]($sp)
+; CHECK:        lw      $6, [[offset2]]($sp)
+; CHECK:        lw      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        addiu   $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        addu    $sp, $sp, $3
+}
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
new file mode 100644
index 000000000000..373a9a114453
--- /dev/null
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -0,0 +1,87 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+declare void @llvm.eh.return.i64(i64, i8*)
+declare void @foo(...)
+
+define void @f1(i64 %offset, i8* %handler) {
+entry:
+  call void (...)* @foo()
+  call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f1
+; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sd      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sd      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $[[R0:[a-z0-9]+]], $5
+; CHECK:        move    $[[R1:[a-z0-9]+]], $4
+; CHECK:        move    $3, $[[R1]]
+; CHECK:        move    $2, $[[R0]]
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        ld      $4, [[offset0]]($sp)
+; CHECK:        ld      $5, [[offset1]]($sp)
+; CHECK:        ld      $6, [[offset2]]($sp)
+; CHECK:        ld      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        daddu   $sp, $sp, $3
+
+}
+
+define void @f2(i64 %offset, i8* %handler) {
+entry:
+  call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
+  unreachable
+
+; CHECK:        f2
+; CHECK:        daddiu  $sp, $sp, -[[spoffset:[0-9]+]]
+
+; check that $a0-$a3 are saved on stack.
+; CHECK:        sd      $4, [[offset0:[0-9]+]]($sp)
+; CHECK:        sd      $5, [[offset1:[0-9]+]]($sp)
+; CHECK:        sd      $6, [[offset2:[0-9]+]]($sp)
+; CHECK:        sd      $7, [[offset3:[0-9]+]]($sp)
+
+; check that .cfi_offset directives are emitted for $a0-$a3.
+; CHECK:        .cfi_offset 4,
+; CHECK:        .cfi_offset 5,
+; CHECK:        .cfi_offset 6,
+; CHECK:        .cfi_offset 7,
+
+; check that stack adjustment and handler are put in $v1 and $v0.
+; CHECK:        move    $3, $4
+; CHECK:        move    $2, $5
+
+; check that $a0-$a3 are restored from stack.
+; CHECK:        ld      $4, [[offset0]]($sp)
+; CHECK:        ld      $5, [[offset1]]($sp)
+; CHECK:        ld      $6, [[offset2]]($sp)
+; CHECK:        ld      $7, [[offset3]]($sp)
+
+; check that stack is adjusted by $v1 and that code returns to address in $v0
+; also check that $25 contains handler value
+; CHECK:        daddiu  $sp, $sp, [[spoffset]]
+; CHECK:        move    $25, $2
+; CHECK:        move    $ra, $2
+; CHECK:        jr      $ra
+; CHECK:        daddu   $sp, $sp, $3
+
+}
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
new file mode 100644
index 000000000000..67d19e4b84ca
--- /dev/null
+++ b/test/CodeGen/Mips/ex2.ll
@@ -0,0 +1,29 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+@_ZTIPKc = external constant i8*
+
+define i32 @main() {
+; 16: main:
+; 16: 	.cfi_startproc
+; 16: 	save	$ra, $s0, $s1, 32
+; 16:   .cfi_offset 17, -8
+; 16: 	.cfi_offset 16, -12
+; 16: 	.cfi_offset 31, -4
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
+  %0 = bitcast i8* %exception to i8**
+  store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** %0
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIPKc to i8*), i8* null) noreturn
+  unreachable
+
+return:                                           ; No predecessors!
+  %1 = load i32* %retval
+  ret i32 %1
+}
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
new file mode 100644
index 000000000000..240ec75a36b6
--- /dev/null
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+@x = common global float 0.000000e+00, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load float* @x, align 4
+  %1 = load float* @x, align 4
+  %mul = fmul float %0, %1
+  store float %mul, float* @x, align 4
+; CHECK-STATIC16: jal	__mips16_mulsf3
+  ret void
+}
diff --git a/test/CodeGen/Mips/frame-address.ll b/test/CodeGen/Mips/frame-address.ll
index 9df1808fde53..92946d9ffd68 100644
--- a/test/CodeGen/Mips/frame-address.ll
+++ b/test/CodeGen/Mips/frame-address.ll
@@ -7,6 +7,6 @@ entry:
   %0 = call i8* @llvm.frameaddress(i32 0)
   ret i8* %0
 
-; CHECK:   addu    $fp, $sp, $zero
-; CHECK:   addu    $2, $zero, $fp
+; CHECK:   move    $fp, $sp
+; CHECK:   move    $2, $fp
 }
diff --git a/test/CodeGen/Mips/gpreg-lazy-binding.ll b/test/CodeGen/Mips/gpreg-lazy-binding.ll
new file mode 100644
index 000000000000..88e596b3bb0d
--- /dev/null
+++ b/test/CodeGen/Mips/gpreg-lazy-binding.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | FileCheck %s 
+
+@g = external global i32
+
+; CHECK:     move  $gp
+; CHECK:     jalr  $25
+; CHECK:     nop
+; CHECK-NOT: move  $gp
+; CHECK:     jalr  $25
+
+define void @f0() nounwind {
+entry:
+  tail call void @externalFunc() nounwind
+  tail call fastcc void @internalFunc()
+  ret void
+}
+
+declare void @externalFunc()
+
+define internal fastcc void @internalFunc() nounwind noinline {
+entry:
+  %0 = load i32* @g, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @g, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index aee58b650e7a..56ee60785f46 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,9 +1,11 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
 ;
-; re-enable this when mips16's jalr is fixed.
-; DISABLED: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
 
 
 @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
@@ -15,7 +17,15 @@ entry:
 
 ; SR: 	.set	mips16                  # @main
 
-; SR:	save 	$ra, [[FS:[0-9]+]]
+; SR32: .set nomips16
+; SR32: .ent main
+; SR-NOT:  .set noreorder
+; SR-NOT:  .set nomacro
+; SR-NOT:  .set noat
+; SR32:  .set noreorder
+; SR32:  .set nomacro
+; SR32:  .set noat
+; SR:	save 	$ra, $s0, $s1, [[FS:[0-9]+]]
 ; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
 ; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
 ; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
@@ -25,10 +35,23 @@ entry:
 ; C2:	move	$25, ${{[0-9]+}}
 ; C1:	move 	$gp, ${{[0-9]+}}
 ; C1:	jalrc 	${{[0-9]+}}
-; SR:	restore 	$ra, [[FS]]
+; SR:	restore 	$ra, $s0, $s1, [[FS]]
 ; PE:	li	$2, 0
 ; PE:	jrc 	$ra
 
+; ST1:  li	${{[0-9]+}}, %hi($.str)
+; ST1:  sll     ${{[0-9]+}}, ${{[0-9]+}}, 16
+; ST1:	addiu	${{[0-9]+}}, %lo($.str)
+; ST2:  li	${{[0-9]+}}, %hi($.str)
+; ST2:  jal     printf
 }
 
+;  SR-NOT:  .set at
+;  SR-NOT:  .set macro
+;  SR-NOT:  .set reorder
+;  SR32:  .set at
+;  SR32:  .set macro
+;  SR32:  .set reorder
+; SR:   .end main
+; SR32:   .end main
 declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
new file mode 100644
index 000000000000..c7454ee0a8dd
--- /dev/null
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -0,0 +1,256 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=2
+
+
+@x = common global float 0.000000e+00, align 4
+@xd = common global double 0.000000e+00, align 8
+@y = common global float 0.000000e+00, align 4
+@yd = common global double 0.000000e+00, align 8
+@xy = common global { float, float } zeroinitializer, align 4
+@xyd = common global { double, double } zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  %0 = load float* @x, align 4
+  call void @v_sf(float %0)
+  %1 = load double* @xd, align 8
+  call void @v_df(double %1)
+  %2 = load float* @x, align 4
+  %3 = load float* @y, align 4
+  call void @v_sf_sf(float %2, float %3)
+  %4 = load double* @xd, align 8
+  %5 = load float* @x, align 4
+  call void @v_df_sf(double %4, float %5)
+  %6 = load double* @xd, align 8
+  %7 = load double* @yd, align 8
+  call void @v_df_df(double %6, double %7)
+  %call = call float @sf_v()
+  %8 = load float* @x, align 4
+  %call1 = call float @sf_sf(float %8)
+  %9 = load double* @xd, align 8
+  %call2 = call float @sf_df(double %9)
+  %10 = load float* @x, align 4
+  %11 = load float* @y, align 4
+  %call3 = call float @sf_sf_sf(float %10, float %11)
+  %12 = load double* @xd, align 8
+  %13 = load float* @x, align 4
+  %call4 = call float @sf_df_sf(double %12, float %13)
+  %14 = load double* @xd, align 8
+  %15 = load double* @yd, align 8
+  %call5 = call float @sf_df_df(double %14, double %15)
+  %call6 = call double @df_v()
+  %16 = load float* @x, align 4
+  %call7 = call double @df_sf(float %16)
+  %17 = load double* @xd, align 8
+  %call8 = call double @df_df(double %17)
+  %18 = load float* @x, align 4
+  %19 = load float* @y, align 4
+  %call9 = call double @df_sf_sf(float %18, float %19)
+  %20 = load double* @xd, align 8
+  %21 = load float* @x, align 4
+  %call10 = call double @df_df_sf(double %20, float %21)
+  %22 = load double* @xd, align 8
+  %23 = load double* @yd, align 8
+  %call11 = call double @df_df_df(double %22, double %23)
+  %call12 = call { float, float } @sc_v()
+  %24 = extractvalue { float, float } %call12, 0
+  %25 = extractvalue { float, float } %call12, 1
+  %26 = load float* @x, align 4
+  %call13 = call { float, float } @sc_sf(float %26)
+  %27 = extractvalue { float, float } %call13, 0
+  %28 = extractvalue { float, float } %call13, 1
+  %29 = load double* @xd, align 8
+  %call14 = call { float, float } @sc_df(double %29)
+  %30 = extractvalue { float, float } %call14, 0
+  %31 = extractvalue { float, float } %call14, 1
+  %32 = load float* @x, align 4
+  %33 = load float* @y, align 4
+  %call15 = call { float, float } @sc_sf_sf(float %32, float %33)
+  %34 = extractvalue { float, float } %call15, 0
+  %35 = extractvalue { float, float } %call15, 1
+  %36 = load double* @xd, align 8
+  %37 = load float* @x, align 4
+  %call16 = call { float, float } @sc_df_sf(double %36, float %37)
+  %38 = extractvalue { float, float } %call16, 0
+  %39 = extractvalue { float, float } %call16, 1
+  %40 = load double* @xd, align 8
+  %41 = load double* @yd, align 8
+  %call17 = call { float, float } @sc_df_df(double %40, double %41)
+  %42 = extractvalue { float, float } %call17, 0
+  %43 = extractvalue { float, float } %call17, 1
+  %call18 = call { double, double } @dc_v()
+  %44 = extractvalue { double, double } %call18, 0
+  %45 = extractvalue { double, double } %call18, 1
+  %46 = load float* @x, align 4
+  %call19 = call { double, double } @dc_sf(float %46)
+  %47 = extractvalue { double, double } %call19, 0
+  %48 = extractvalue { double, double } %call19, 1
+  %49 = load double* @xd, align 8
+  %call20 = call { double, double } @dc_df(double %49)
+  %50 = extractvalue { double, double } %call20, 0
+  %51 = extractvalue { double, double } %call20, 1
+  %52 = load float* @x, align 4
+  %53 = load float* @y, align 4
+  %call21 = call { double, double } @dc_sf_sf(float %52, float %53)
+  %54 = extractvalue { double, double } %call21, 0
+  %55 = extractvalue { double, double } %call21, 1
+  %56 = load double* @xd, align 8
+  %57 = load float* @x, align 4
+  %call22 = call { double, double } @dc_df_sf(double %56, float %57)
+  %58 = extractvalue { double, double } %call22, 0
+  %59 = extractvalue { double, double } %call22, 1
+  %60 = load double* @xd, align 8
+  %61 = load double* @yd, align 8
+  %call23 = call { double, double } @dc_df_df(double %60, double %61)
+  %62 = extractvalue { double, double } %call23, 0
+  %63 = extractvalue { double, double } %call23, 1
+  ret void
+}
+
+declare void @v_sf(float)
+
+declare void @v_df(double)
+
+declare void @v_sf_sf(float, float)
+
+declare void @v_df_sf(double, float)
+
+declare void @v_df_df(double, double)
+
+declare float @sf_v()
+
+declare float @sf_sf(float)
+
+declare float @sf_df(double)
+
+declare float @sf_sf_sf(float, float)
+
+declare float @sf_df_sf(double, float)
+
+declare float @sf_df_df(double, double)
+
+declare double @df_v()
+
+declare double @df_sf(float)
+
+declare double @df_df(double)
+
+declare double @df_sf_sf(float, float)
+
+declare double @df_df_sf(double, float)
+
+declare double @df_df_df(double, double)
+
+declare { float, float } @sc_v()
+
+declare { float, float } @sc_sf(float)
+
+declare { float, float } @sc_df(double)
+
+declare { float, float } @sc_sf_sf(float, float)
+
+declare { float, float } @sc_df_sf(double, float)
+
+declare { float, float } @sc_df_df(double, double)
+
+declare { double, double } @dc_v()
+
+declare { double, double } @dc_sf(float)
+
+declare { double, double } @dc_df(double)
+
+declare { double, double } @dc_sf_sf(float, float)
+
+declare { double, double } @dc_df_sf(double, float)
+
+declare { double, double } @dc_df_df(double, double)
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(v_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sf_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(df_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sc_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(sc_df_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_0)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_v)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_1)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_2)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_5)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_sf_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_6)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df_sf)(${{[0-9]+}})
+
+; 1:	lw	${{[0-9]+}}, %got(__mips16_call_stub_dc_10)(${{[0-9]+}})
+; 2:	lw	${{[0-9]+}}, %call16(dc_df_df)(${{[0-9]+}})
+
+
+
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 8b1f71b69f19..704014cba010 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -2,8 +2,8 @@
 
 define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
-; CHECK: addu $[[R1:[0-9]+]], $zero, $5
-; CHECK: addu $[[R0:[0-9]+]], $zero, $4
+; CHECK: move $[[R1:[0-9]+]], $5
+; CHECK: move $[[R0:[0-9]+]], $4
 ; CHECK: ori $6, ${{[0-9]+}}, 3855
 ; CHECK: ori $7, ${{[0-9]+}}, 22136
 ; CHECK: lw  $25, %call16(ff1)
@@ -12,16 +12,16 @@ entry:
 ; CHECK: lw $25, %call16(ff2)
 ; CHECK: lw $[[R2:[0-9]+]], 80($sp)
 ; CHECK: lw $[[R3:[0-9]+]], 84($sp)
-; CHECK: addu $4, $zero, $[[R2]]
-; CHECK: addu $5, $zero, $[[R3]]
+; CHECK: move $4, $[[R2]]
+; CHECK: move $5, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
+; CHECK: lw $25, %call16(ff3)
 ; CHECK: sw $[[R1]], 28($sp)
 ; CHECK: sw $[[R0]], 24($sp)
-; CHECK: lw $25, %call16(ff3)
-; CHECK: addu $6, $zero, $[[R2]]
-; CHECK: addu $7, $zero, $[[R3]]
+; CHECK: move $6, $[[R2]]
+; CHECK: move $7, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff3(i32 %i, i64 %ll, i32 %sub, i64 %ll1) nounwind
   ret void
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 5adec3bb29ea..8d30f45d84e3 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -51,5 +51,14 @@ entry:
 ; CHECK: #NO_APP	
   tail call i32 asm sideeffect "addi $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
 
+; Now R Which takes the address of c
+  %c = alloca i32, align 4
+  store i32 -4469539, i32* %c, align 4
+  %8 = call i32 asm sideeffect "lwl $0, 1 + $1\0A\09lwr $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
+; CHECK: #APP
+; CHECK: lwl ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
+; CHECK: lwr ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
+; CHECK: #NO_APP	
+
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/jtstat.ll b/test/CodeGen/Mips/jtstat.ll
new file mode 100644
index 000000000000..01afc080c2ed
--- /dev/null
+++ b/test/CodeGen/Mips/jtstat.ll
@@ -0,0 +1,71 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+
+@s = global i8 115, align 1
+@c = common global i8 0, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
+
+define void @test(i32 %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.epilog [
+    i32 115, label %sw.bb
+    i32 105, label %sw.bb1
+    i32 100, label %sw.bb2
+    i32 108, label %sw.bb3
+    i32 99, label %sw.bb4
+    i32 68, label %sw.bb5
+    i32 81, label %sw.bb6
+    i32 76, label %sw.bb7
+  ]
+
+sw.bb:                                            ; preds = %entry
+  store i8 115, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %entry
+  store i8 105, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  store i8 100, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  store i8 108, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  store i8 99, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb5:                                           ; preds = %entry
+  store i8 68, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb6:                                           ; preds = %entry
+  store i8 81, i8* @c, align 1
+  br label %sw.epilog
+
+sw.bb7:                                           ; preds = %entry
+  store i8 76, i8* @c, align 1
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb7, %sw.bb6, %sw.bb5, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+  ret void
+}
+
+; CHECK-STATIC16: li	${{[0-9]+}}, %hi($JTI{{[0-9]+}}_{{[0-9]+}})
+; CHECK-STATIC16: lw	${{[0-9]+}}, %lo($JTI{{[0-9]+}}_{{[0-9]+}})(${{[0-9]+}})
+; CHECK-STATIC16: $JTI{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
+; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}})
diff --git a/test/CodeGen/Mips/largefr1.ll b/test/CodeGen/Mips/largefr1.ll
new file mode 100644
index 000000000000..0fe89f71d9f3
--- /dev/null
+++ b/test/CodeGen/Mips/largefr1.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=1
+
+@i = common global i32 0, align 4
+@j = common global i32 0, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i %i \0A\00", align 1
+
+define void @foo(i32* %p, i32 %i, i32 %j) nounwind {
+entry:
+  %p.addr = alloca i32*, align 4
+  %i.addr = alloca i32, align 4
+  %j.addr = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 %j, i32* %j.addr, align 4
+  %0 = load i32* %j.addr, align 4
+  %1 = load i32** %p.addr, align 4
+  %2 = load i32* %i.addr, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  store i32 %0, i32* %add.ptr, align 4
+  ret void
+}
+
+define i32 @main() nounwind {
+entry:
+; 1: main: 
+; 1: 1: 	.word	-797992
+; 1:            li ${{[0-9]+}}, 12
+; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 2:            move $sp, ${{[0-9]+}}
+; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 1:            li ${{[0-9]+}}, 6
+; 1:            sll ${{[0-9]+}}, ${{[0-9]+}}, 16
+; 1:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 2:            move $sp, ${{[0-9]+}}
+; 2:            addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; 1:          	addiu	${{[0-9]+}}, ${{[0-9]+}}, 6800
+; 1: 	        li	${{[0-9]+}}, 1
+; 1:	        sll	${{[0-9]+}}, ${{[0-9]+}}, 16
+; 2: 	        li	${{[0-9]+}}, 34463
+  %retval = alloca i32, align 4
+  %one = alloca [100000 x i32], align 4
+  %two = alloca [100000 x i32], align 4
+  store i32 0, i32* %retval
+  %arrayidx = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 0
+  call void @foo(i32* %arrayidx, i32 50, i32 9999)
+  %arrayidx1 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 0
+  call void @foo(i32* %arrayidx1, i32 99999, i32 5555)
+  %arrayidx2 = getelementptr inbounds [100000 x i32]* %one, i32 0, i32 50
+  %0 = load i32* %arrayidx2, align 4
+  store i32 %0, i32* @i, align 4
+  %arrayidx3 = getelementptr inbounds [100000 x i32]* %two, i32 0, i32 99999
+  %1 = load i32* %arrayidx3, align 4
+  store i32 %1, i32* @j, align 4
+  %2 = load i32* @i, align 4
+  %3 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
index 0587d3243e6b..e157c540b538 100644
--- a/test/CodeGen/Mips/lit.local.cfg
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'Mips' in targets:
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index 0aeabb30e289..0dbb2c27b8f9 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -1,6 +1,9 @@
-; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=32
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips16 < %s
 
-; CHECK: madd 
+; 32: madd ${{[0-9]+}}
+; DSP: madd $ac
 define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -11,7 +14,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: maddu
+; 32: maddu ${{[0-9]+}}
+; DSP: maddu $ac
 define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %a to i64
@@ -22,7 +26,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: madd
+; 32: madd ${{[0-9]+}}
+; DSP: madd $ac
 define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
@@ -32,7 +37,8 @@ entry:
   ret i64 %add
 }
 
-; CHECK: msub
+; 32: msub ${{[0-9]+}}
+; DSP: msub $ac
 define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = sext i32 %c to i64
@@ -43,7 +49,8 @@ entry:
   ret i64 %sub
 }
 
-; CHECK: msubu 
+; 32: msubu ${{[0-9]+}}
+; DSP: msubu $ac
 define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
 entry:
   %conv = zext i32 %c to i64
@@ -54,7 +61,8 @@ entry:
   ret i64 %sub
 }
 
-; CHECK: msub 
+; 32: msub ${{[0-9]+}}
+; DSP: msub $ac
 define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
 entry:
   %conv = sext i32 %a to i64
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
new file mode 100644
index 000000000000..ecb30b5c63b8
--- /dev/null
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -0,0 +1,87 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+;16: $eh_func_begin0=.
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
+@_ZTIi = external constant i8*
+@.str1 = private unnamed_addr constant [15 x i8] c"exception %i \0A\00", align 1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %e = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+  %exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
+  %0 = bitcast i8* %exception to i32*
+  store i32 20, i32* %0
+  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+          to label %unreachable unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %2 = extractvalue { i8*, i32 } %1, 0
+  store i8* %2, i8** %exn.slot
+  %3 = extractvalue { i8*, i32 } %1, 1
+  store i32 %3, i32* %ehselector.slot
+  br label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %lpad
+  %sel = load i32* %ehselector.slot
+  %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+  %matches = icmp eq i32 %sel, %4
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:                                            ; preds = %catch.dispatch
+  %exn = load i8** %exn.slot
+  %5 = call i8* @__cxa_begin_catch(i8* %exn) nounwind
+  %6 = bitcast i8* %5 to i32*
+  %exn.scalar = load i32* %6
+  store i32 %exn.scalar, i32* %e, align 4
+  %7 = load i32* %e, align 4
+  %call2 = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str1, i32 0, i32 0), i32 %7)
+          to label %invoke.cont unwind label %lpad1
+
+invoke.cont:                                      ; preds = %catch
+  call void @__cxa_end_catch() nounwind
+  br label %try.cont
+
+try.cont:                                         ; preds = %invoke.cont
+  ret i32 0
+
+lpad1:                                            ; preds = %catch
+  %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %9 = extractvalue { i8*, i32 } %8, 0
+  store i8* %9, i8** %exn.slot
+  %10 = extractvalue { i8*, i32 } %8, 1
+  store i32 %10, i32* %ehselector.slot
+  call void @__cxa_end_catch() nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad1, %catch.dispatch
+  %exn3 = load i8** %exn.slot
+  %sel4 = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
+  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
+  resume { i8*, i32 } %lpad.val5
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare i32 @printf(i8*, ...)
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
new file mode 100644
index 000000000000..433543607967
--- /dev/null
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -0,0 +1,381 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=16hf
+
+@x = global float 5.000000e+00, align 4
+@y = global float 1.500000e+01, align 4
+@xd = global double 6.000000e+00, align 8
+@yd = global double 1.800000e+01, align 8
+@two = global i32 2, align 4
+@addsf3_result = common global float 0.000000e+00, align 4
+@adddf3_result = common global double 0.000000e+00, align 8
+@subsf3_result = common global float 0.000000e+00, align 4
+@subdf3_result = common global double 0.000000e+00, align 8
+@mulsf3_result = common global float 0.000000e+00, align 4
+@muldf3_result = common global double 0.000000e+00, align 8
+@divsf3_result = common global float 0.000000e+00, align 4
+@divdf3_result = common global double 0.000000e+00, align 8
+@extendsfdf2_result = common global double 0.000000e+00, align 8
+@xd2 = global double 0x40147E6B74B4CF6A, align 8
+@truncdfsf2_result = common global float 0.000000e+00, align 4
+@fix_truncsfsi_result = common global i32 0, align 4
+@fix_truncdfsi_result = common global i32 0, align 4
+@si = global i32 -9, align 4
+@ui = global i32 9, align 4
+@floatsisf_result = common global float 0.000000e+00, align 4
+@floatsidf_result = common global double 0.000000e+00, align 8
+@floatunsisf_result = common global float 0.000000e+00, align 4
+@floatunsidf_result = common global double 0.000000e+00, align 8
+@xx = global float 5.000000e+00, align 4
+@eqsf2_result = common global i32 0, align 4
+@xxd = global double 6.000000e+00, align 8
+@eqdf2_result = common global i32 0, align 4
+@nesf2_result = common global i32 0, align 4
+@nedf2_result = common global i32 0, align 4
+@gesf2_result = common global i32 0, align 4
+@gedf2_result = common global i32 0, align 4
+@ltsf2_result = common global i32 0, align 4
+@ltdf2_result = common global i32 0, align 4
+@lesf2_result = common global i32 0, align 4
+@ledf2_result = common global i32 0, align 4
+@gtsf2_result = common global i32 0, align 4
+@gtdf2_result = common global i32 0, align 4
+
+define void @test_addsf3() nounwind {
+entry:
+;16hf: test_addsf3:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %add = fadd float %0, %1
+  store float %add, float* @addsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_addsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_adddf3() nounwind {
+entry:
+;16hf: test_adddf3:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %add = fadd double %0, %1
+  store double %add, double* @adddf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_adddf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_subsf3() nounwind {
+entry:
+;16hf: test_subsf3:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %sub = fsub float %0, %1
+  store float %sub, float* @subsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_subsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_subdf3() nounwind {
+entry:
+;16hf: test_subdf3:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %sub = fsub double %0, %1
+  store double %sub, double* @subdf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_subdf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_mulsf3() nounwind {
+entry:
+;16hf: test_mulsf3:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %mul = fmul float %0, %1
+  store float %mul, float* @mulsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_mulsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_muldf3() nounwind {
+entry:
+;16hf: test_muldf3:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %mul = fmul double %0, %1
+  store double %mul, double* @muldf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_muldf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_divsf3() nounwind {
+entry:
+;16hf: test_divsf3:
+  %0 = load float* @y, align 4
+  %1 = load float* @x, align 4
+  %div = fdiv float %0, %1
+  store float %div, float* @divsf3_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_divsf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_divdf3() nounwind {
+entry:
+;16hf: test_divdf3:
+  %0 = load double* @yd, align 8
+  %mul = fmul double %0, 2.000000e+00
+  %1 = load double* @xd, align 8
+  %div = fdiv double %mul, %1
+  store double %div, double* @divdf3_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_divdf3)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_extendsfdf2() nounwind {
+entry:
+;16hf: test_extendsfdf2:
+  %0 = load float* @x, align 4
+  %conv = fpext float %0 to double
+  store double %conv, double* @extendsfdf2_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_extendsfdf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_truncdfsf2() nounwind {
+entry:
+;16hf: test_truncdfsf2:
+  %0 = load double* @xd2, align 8
+  %conv = fptrunc double %0 to float
+  store float %conv, float* @truncdfsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_truncdfsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_fix_truncsfsi() nounwind {
+entry:
+;16hf: test_fix_truncsfsi:
+  %0 = load float* @x, align 4
+  %conv = fptosi float %0 to i32
+  store i32 %conv, i32* @fix_truncsfsi_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_fix_truncsfsi)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_fix_truncdfsi() nounwind {
+entry:
+;16hf: test_fix_truncdfsi:
+  %0 = load double* @xd, align 8
+  %conv = fptosi double %0 to i32
+  store i32 %conv, i32* @fix_truncdfsi_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_fix_truncdfsi)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatsisf() nounwind {
+entry:
+;16hf: test_floatsisf:
+  %0 = load i32* @si, align 4
+  %conv = sitofp i32 %0 to float
+  store float %conv, float* @floatsisf_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatsisf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatsidf() nounwind {
+entry:
+;16hf: test_floatsidf:
+  %0 = load i32* @si, align 4
+  %conv = sitofp i32 %0 to double
+  store double %conv, double* @floatsidf_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatsidf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatunsisf() nounwind {
+entry:
+;16hf: test_floatunsisf:
+  %0 = load i32* @ui, align 4
+  %conv = uitofp i32 %0 to float
+  store float %conv, float* @floatunsisf_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatunsisf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_floatunsidf() nounwind {
+entry:
+;16hf: test_floatunsidf:
+  %0 = load i32* @ui, align 4
+  %conv = uitofp i32 %0 to double
+  store double %conv, double* @floatunsidf_result, align 8
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatunsidf)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_eqsf2() nounwind {
+entry:
+;16hf: test_eqsf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %cmp = fcmp oeq float %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @eqsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_eqsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_eqdf2() nounwind {
+entry:
+;16hf: test_eqdf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %cmp = fcmp oeq double %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @eqdf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_eqdf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_nesf2() nounwind {
+entry:
+;16hf: test_nesf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %cmp = fcmp une float %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @nesf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_nesf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_nedf2() nounwind {
+entry:
+;16hf: test_nedf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @yd, align 8
+  %cmp = fcmp une double %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @nedf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_nedf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gesf2() nounwind {
+entry:
+;16hf: test_gesf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %cmp = fcmp oge float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp oge float %2, %0
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @gesf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gesf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gedf2() nounwind {
+entry:
+;16hf: test_gedf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %cmp = fcmp oge double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp oge double %2, %0
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @gedf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gedf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_ltsf2() nounwind {
+entry:
+;16hf: test_ltsf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %lnot = fcmp uge float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp olt float %0, %2
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @ltsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_unordsf2)(${{[0-9]+}})
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_ltdf2() nounwind {
+entry:
+;16hf: test_ltdf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %lnot = fcmp uge double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp olt double %0, %2
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @ltdf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_unorddf2)(${{[0-9]+}})
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_lesf2() nounwind {
+entry:
+;16hf: test_lesf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %cmp = fcmp ole float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp ole float %0, %2
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @lesf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_lesf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_ledf2() nounwind {
+entry:
+;16hf: test_ledf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %cmp = fcmp ole double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp ole double %0, %2
+  %and3 = and i1 %cmp, %cmp1
+  %and = zext i1 %and3 to i32
+  store i32 %and, i32* @ledf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ledf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gtsf2() nounwind {
+entry:
+;16hf: test_gtsf2:
+  %0 = load float* @x, align 4
+  %1 = load float* @xx, align 4
+  %lnot = fcmp ule float %0, %1
+  %2 = load float* @y, align 4
+  %cmp1 = fcmp ogt float %2, %0
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @gtsf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gtsf2)(${{[0-9]+}})
+  ret void
+}
+
+define void @test_gtdf2() nounwind {
+entry:
+;16hf: test_gtdf2:
+  %0 = load double* @xd, align 8
+  %1 = load double* @xxd, align 8
+  %lnot = fcmp ule double %0, %1
+  %2 = load double* @yd, align 8
+  %cmp1 = fcmp ogt double %2, %0
+  %and2 = and i1 %lnot, %cmp1
+  %and = zext i1 %and2 to i32
+  store i32 %and, i32* @gtdf2_result, align 4
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_gtdf2)(${{[0-9]+}})
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/mips64-f128-call.ll b/test/CodeGen/Mips/mips64-f128-call.ll
new file mode 100644
index 000000000000..455e540e5df1
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-f128-call.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 < %s | FileCheck %s
+
+@gld0 = external global fp128
+@gld1 = external global fp128
+
+; CHECK: foo0
+; CHECK: sdc1  $f13, 8(${{[0-9]+}})
+; CHECK: sdc1  $f12, 0(${{[0-9]+}})
+
+define void @foo0(fp128 %a0) {
+entry:
+  store fp128 %a0, fp128* @gld0, align 16
+  ret void
+}
+
+; CHECK: foo1
+; CHECK: ldc1  $f13, 8(${{[0-9]+}})
+; CHECK: ldc1  $f12, 0(${{[0-9]+}})
+
+define void @foo1() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  tail call void @foo2(fp128 %0)
+  ret void
+}
+
+declare void @foo2(fp128)
+
+; CHECK: foo3
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld0)
+; CHECK: sdc1 $f2, 8($[[R0]])
+; CHECK: sdc1 $f0, 0($[[R0]])
+; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gld1)
+; CHECK: ldc1 $f0, 0($[[R1]])
+; CHECK: ldc1 $f2, 8($[[R1]])
+
+define fp128 @foo3() {
+entry:
+  %call = tail call fp128 @foo4()
+  store fp128 %call, fp128* @gld0, align 16
+  %0 = load fp128* @gld1, align 16
+  ret fp128 %0
+}
+
+declare fp128 @foo4()
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
new file mode 100644
index 000000000000..5892cab4f8ea
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -0,0 +1,646 @@
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -soft-float -O1 \
+; RUN: -disable-mips-delay-filler < %s | FileCheck %s
+
+@gld0 = external global fp128
+@gld1 = external global fp128
+@gld2 = external global fp128
+@gf1 = external global float
+@gd1 = external global double
+
+; CHECK: addLD:
+; CHECK: ld $25, %call16(__addtf3)
+
+define fp128 @addLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %add = fadd fp128 %0, %1
+  ret fp128 %add
+}
+
+; CHECK: subLD:
+; CHECK: ld $25, %call16(__subtf3)
+
+define fp128 @subLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %sub = fsub fp128 %0, %1
+  ret fp128 %sub
+}
+
+; CHECK: mulLD:
+; CHECK: ld $25, %call16(__multf3)
+
+define fp128 @mulLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %mul = fmul fp128 %0, %1
+  ret fp128 %mul
+}
+
+; CHECK: divLD:
+; CHECK: ld $25, %call16(__divtf3)
+
+define fp128 @divLD() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %div = fdiv fp128 %0, %1
+  ret fp128 %div
+}
+
+; CHECK: conv_LD_char:
+; CHECK: ld $25, %call16(__floatsitf)
+
+define fp128 @conv_LD_char(i8 signext %a) {
+entry:
+  %conv = sitofp i8 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_short:
+; CHECK: ld $25, %call16(__floatsitf)
+
+define fp128 @conv_LD_short(i16 signext %a) {
+entry:
+  %conv = sitofp i16 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_int:
+; CHECK: ld $25, %call16(__floatsitf)
+
+define fp128 @conv_LD_int(i32 %a) {
+entry:
+  %conv = sitofp i32 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_LL:
+; CHECK: ld $25, %call16(__floatditf)
+
+define fp128 @conv_LD_LL(i64 %a) {
+entry:
+  %conv = sitofp i64 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_UChar:
+; CHECK: ld $25, %call16(__floatunsitf)
+
+define fp128 @conv_LD_UChar(i8 zeroext %a) {
+entry:
+  %conv = uitofp i8 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_UShort:
+; CHECK: ld $25, %call16(__floatunsitf)
+
+define fp128 @conv_LD_UShort(i16 zeroext %a) {
+entry:
+  %conv = uitofp i16 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_UInt:
+; CHECK: ld $25, %call16(__floatunsitf)
+
+define fp128 @conv_LD_UInt(i32 %a) {
+entry:
+  %conv = uitofp i32 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_ULL:
+; CHECK: ld $25, %call16(__floatunditf)
+
+define fp128 @conv_LD_ULL(i64 %a) {
+entry:
+  %conv = uitofp i64 %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_char_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define signext i8 @conv_char_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i8
+  ret i8 %conv
+}
+
+; CHECK: conv_short_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define signext i16 @conv_short_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i16
+  ret i16 %conv
+}
+
+; CHECK: conv_int_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define i32 @conv_int_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i32
+  ret i32 %conv
+}
+
+; CHECK: conv_LL_LD:
+; CHECK: ld $25, %call16(__fixtfdi)
+
+define i64 @conv_LL_LD(fp128 %a) {
+entry:
+  %conv = fptosi fp128 %a to i64
+  ret i64 %conv
+}
+
+; CHECK: conv_UChar_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define zeroext i8 @conv_UChar_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i8
+  ret i8 %conv
+}
+
+; CHECK: conv_UShort_LD:
+; CHECK: ld $25, %call16(__fixtfsi)
+
+define zeroext i16 @conv_UShort_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i16
+  ret i16 %conv
+}
+
+; CHECK: conv_UInt_LD:
+; CHECK: ld $25, %call16(__fixunstfsi)
+
+define i32 @conv_UInt_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i32
+  ret i32 %conv
+}
+
+; CHECK: conv_ULL_LD:
+; CHECK: ld $25, %call16(__fixunstfdi)
+
+define i64 @conv_ULL_LD(fp128 %a) {
+entry:
+  %conv = fptoui fp128 %a to i64
+  ret i64 %conv
+}
+
+; CHECK: conv_LD_float:
+; CHECK: ld $25, %call16(__extendsftf2)
+
+define fp128 @conv_LD_float(float %a) {
+entry:
+  %conv = fpext float %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_LD_double:
+; CHECK: ld $25, %call16(__extenddftf2)
+
+define fp128 @conv_LD_double(double %a) {
+entry:
+  %conv = fpext double %a to fp128
+  ret fp128 %conv
+}
+
+; CHECK: conv_float_LD:
+; CHECK: ld $25, %call16(__trunctfsf2)
+
+define float @conv_float_LD(fp128 %a) {
+entry:
+  %conv = fptrunc fp128 %a to float
+  ret float %conv
+}
+
+; CHECK: conv_double_LD:
+; CHECK: ld $25, %call16(__trunctfdf2)
+
+define double @conv_double_LD(fp128 %a) {
+entry:
+  %conv = fptrunc fp128 %a to double
+  ret double %conv
+}
+
+; CHECK: libcall1_fabsl:
+; CHECK: ld      $[[R0:[0-9]+]], 8($[[R4:[0-9]+]])
+; CHECK: daddiu  $[[R1:[0-9]+]], $zero, 1
+; CHECK: dsll    $[[R2:[0-9]+]], $[[R1]], 63
+; CHECK: daddiu  $[[R3:[0-9]+]], $[[R2]], -1
+; CHECK: and     $4, $[[R0]], $[[R3]]
+; CHECK: ld      $2, 0($[[R4]])
+
+define fp128 @libcall1_fabsl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @fabsl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @fabsl(fp128) #1
+
+; CHECK: libcall1_ceill:
+; CHECK: ld $25, %call16(ceill)
+
+define fp128 @libcall1_ceill() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @ceill(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @ceill(fp128) #1
+
+; CHECK: libcall1_sinl:
+; CHECK: ld $25, %call16(sinl)
+
+define fp128 @libcall1_sinl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @sinl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @sinl(fp128) #2
+
+; CHECK: libcall1_cosl:
+; CHECK: ld $25, %call16(cosl)
+
+define fp128 @libcall1_cosl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @cosl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @cosl(fp128) #2
+
+; CHECK: libcall1_expl:
+; CHECK: ld $25, %call16(expl)
+
+define fp128 @libcall1_expl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @expl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @expl(fp128) #2
+
+; CHECK: libcall1_exp2l:
+; CHECK: ld $25, %call16(exp2l)
+
+define fp128 @libcall1_exp2l() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @exp2l(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @exp2l(fp128) #2
+
+; CHECK: libcall1_logl:
+; CHECK: ld $25, %call16(logl)
+
+define fp128 @libcall1_logl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @logl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @logl(fp128) #2
+
+; CHECK: libcall1_log2l:
+; CHECK: ld $25, %call16(log2l)
+
+define fp128 @libcall1_log2l() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @log2l(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @log2l(fp128) #2
+
+; CHECK: libcall1_log10l:
+; CHECK: ld $25, %call16(log10l)
+
+define fp128 @libcall1_log10l() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @log10l(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @log10l(fp128) #2
+
+; CHECK: libcall1_nearbyintl:
+; CHECK: ld $25, %call16(nearbyintl)
+
+define fp128 @libcall1_nearbyintl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @nearbyintl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @nearbyintl(fp128) #1
+
+; CHECK: libcall1_floorl:
+; CHECK: ld $25, %call16(floorl)
+
+define fp128 @libcall1_floorl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @floorl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @floorl(fp128) #1
+
+; CHECK: libcall1_sqrtl:
+; CHECK: ld $25, %call16(sqrtl)
+
+define fp128 @libcall1_sqrtl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @sqrtl(fp128 %0) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @sqrtl(fp128) #2
+
+; CHECK: libcall1_rintl:
+; CHECK: ld $25, %call16(rintl)
+
+define fp128 @libcall1_rintl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %call = tail call fp128 @rintl(fp128 %0) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @rintl(fp128) #1
+
+; CHECK: libcall_powil:
+; CHECK: ld $25, %call16(__powitf2)
+
+define fp128 @libcall_powil(fp128 %a, i32 %b) {
+entry:
+  %0 = tail call fp128 @llvm.powi.f128(fp128 %a, i32 %b)
+  ret fp128 %0
+}
+
+declare fp128 @llvm.powi.f128(fp128, i32) #3
+
+; CHECK: libcall2_copysignl:
+; CHECK: daddiu $[[R2:[0-9]+]], $zero, 1
+; CHECK: dsll   $[[R3:[0-9]+]], $[[R2]], 63
+; CHECK: ld     $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld     $[[R1:[0-9]+]], 8($[[R0]])
+; CHECK: and    $[[R4:[0-9]+]], $[[R1]], $[[R3]]
+; CHECK: ld     $[[R5:[0-9]+]], %got_disp(gld0)
+; CHECK: ld     $[[R6:[0-9]+]], 8($[[R5]])
+; CHECK: daddiu $[[R7:[0-9]+]], $[[R3]], -1
+; CHECK: and    $[[R8:[0-9]+]], $[[R6]], $[[R7]]
+; CHECK: or     $4, $[[R8]], $[[R4]]
+; CHECK: ld     $2, 0($[[R5]])
+
+define fp128 @libcall2_copysignl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %call = tail call fp128 @copysignl(fp128 %0, fp128 %1) nounwind readnone
+  ret fp128 %call
+}
+
+declare fp128 @copysignl(fp128, fp128) #1
+
+; CHECK: libcall2_powl:
+; CHECK: ld $25, %call16(powl)
+
+define fp128 @libcall2_powl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %call = tail call fp128 @powl(fp128 %0, fp128 %1) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @powl(fp128, fp128) #2
+
+; CHECK: libcall2_fmodl:
+; CHECK: ld $25, %call16(fmodl)
+
+define fp128 @libcall2_fmodl() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld1, align 16
+  %call = tail call fp128 @fmodl(fp128 %0, fp128 %1) nounwind
+  ret fp128 %call
+}
+
+declare fp128 @fmodl(fp128, fp128) #2
+
+; CHECK: libcall3_fmal:
+; CHECK: ld $25, %call16(fmal)
+
+define fp128 @libcall3_fmal() {
+entry:
+  %0 = load fp128* @gld0, align 16
+  %1 = load fp128* @gld2, align 16
+  %2 = load fp128* @gld1, align 16
+  %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %2, fp128 %1)
+  ret fp128 %3
+}
+
+declare fp128 @llvm.fma.f128(fp128, fp128, fp128) #4
+
+; CHECK: cmp_lt:
+; CHECK: ld $25, %call16(__lttf2)
+
+define i32 @cmp_lt(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp olt fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_le:
+; CHECK: ld $25, %call16(__letf2)
+
+define i32 @cmp_le(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp ole fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_gt:
+; CHECK: ld $25, %call16(__gttf2)
+
+define i32 @cmp_gt(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp ogt fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_ge:
+; CHECK: ld $25, %call16(__getf2)
+
+define i32 @cmp_ge(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp oge fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_eq:
+; CHECK: ld $25, %call16(__eqtf2)
+
+define i32 @cmp_eq(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp oeq fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: cmp_ne:
+; CHECK: ld $25, %call16(__netf2)
+
+define i32 @cmp_ne(fp128 %a, fp128 %b) {
+entry:
+  %cmp = fcmp une fp128 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; CHECK: load_LD_LD:
+; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld $2, 0($[[R0]])
+; CHECK: ld $4, 8($[[R0]])
+
+define fp128 @load_LD_LD() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  ret fp128 %0
+}
+
+; CHECK: load_LD_float:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gf1)
+; CHECK: lw   $4, 0($[[R0]])
+; CHECK: ld   $25, %call16(__extendsftf2)
+; CHECK: jalr $25
+
+define fp128 @load_LD_float() {
+entry:
+  %0 = load float* @gf1, align 4
+  %conv = fpext float %0 to fp128
+  ret fp128 %conv
+}
+
+; CHECK: load_LD_double:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gd1)
+; CHECK: ld   $4, 0($[[R0]])
+; CHECK: ld   $25, %call16(__extenddftf2)
+; CHECK: jalr $25
+
+define fp128 @load_LD_double() {
+entry:
+  %0 = load double* @gd1, align 8
+  %conv = fpext double %0 to fp128
+  ret fp128 %conv
+}
+
+; CHECK: store_LD_LD:
+; CHECK: ld $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld $[[R1:[0-9]+]], 0($[[R0]])
+; CHECK: ld $[[R2:[0-9]+]], 8($[[R0]])
+; CHECK: ld $[[R3:[0-9]+]], %got_disp(gld0)
+; CHECK: sd $[[R2]], 8($[[R3]])
+; CHECK: sd $[[R1]], 0($[[R3]])
+
+define void @store_LD_LD() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  store fp128 %0, fp128* @gld0, align 16
+  ret void
+}
+
+; CHECK: store_LD_float:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld   $4, 0($[[R0]])
+; CHECK: ld   $5, 8($[[R0]])
+; CHECK: ld   $25, %call16(__trunctfsf2)
+; CHECK: jalr $25
+; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gf1)
+; CHECK: sw   $2, 0($[[R1]])
+
+define void @store_LD_float() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  %conv = fptrunc fp128 %0 to float
+  store float %conv, float* @gf1, align 4
+  ret void
+}
+
+; CHECK: store_LD_double:
+; CHECK: ld   $[[R0:[0-9]+]], %got_disp(gld1)
+; CHECK: ld   $4, 0($[[R0]])
+; CHECK: ld   $5, 8($[[R0]])
+; CHECK: ld   $25, %call16(__trunctfdf2)
+; CHECK: jalr $25
+; CHECK: ld   $[[R1:[0-9]+]], %got_disp(gd1)
+; CHECK: sd   $2, 0($[[R1]])
+
+define void @store_LD_double() {
+entry:
+  %0 = load fp128* @gld1, align 16
+  %conv = fptrunc fp128 %0 to double
+  store double %conv, double* @gd1, align 8
+  ret void
+}
+
+; CHECK: select_LD:
+; CHECK: movn $8, $6, $4
+; CHECK: movn $9, $7, $4
+; CHECK: move $2, $8
+; CHECK: move $4, $9
+
+define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
+entry:
+  %tobool = icmp ne i32 %a, 0
+  %cond = select i1 %tobool, fp128 %b, fp128 %c
+  ret fp128 %cond
+}
+
+; CHECK: selectCC_LD:
+; CHECK: move $[[R0:[0-9]+]], $11
+; CHECK: move $[[R1:[0-9]+]], $10
+; CHECK: move $[[R2:[0-9]+]], $9
+; CHECK: move $[[R3:[0-9]+]], $8
+; CHECK: ld   $25, %call16(__gttf2)($gp)
+; CHECK: jalr $25
+; CHECK: slti $1, $2, 1
+; CHECK: movz $[[R1]], $[[R3]], $1
+; CHECK: movz $[[R0]], $[[R2]], $1
+; CHECK: move $2, $[[R1]]
+; CHECK: move $4, $[[R0]]
+
+define fp128 @selectCC_LD(fp128 %a, fp128 %b, fp128 %c, fp128 %d) {
+entry:
+  %cmp = fcmp ogt fp128 %a, %b
+  %cond = select i1 %cmp, fp128 %c, fp128 %d
+  ret fp128 %cond
+}
diff --git a/test/CodeGen/Mips/mips64-libcall.ll b/test/CodeGen/Mips/mips64-libcall.ll
new file mode 100644
index 000000000000..d54598be70d8
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-libcall.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -O3 < %s |\
+; RUN: FileCheck %s -check-prefix=HARD
+; RUN: llc -march=mips64el -mcpu=mips64r2 -soft-float < %s |\
+; RUN: FileCheck %s -check-prefix=SOFT
+
+; Check that %add is not passed in an integer register.
+;
+; HARD: callfloor:
+; HARD-NOT: dmfc1 $4
+
+define double @callfloor(double %d) nounwind readnone {
+entry:
+  %add = fadd double %d, 1.000000e+00
+  %call = tail call double @floor(double %add) nounwind readnone
+  ret double %call
+}
+
+declare double @floor(double) nounwind readnone
+
+; Check call16.
+;
+; SOFT: f64add:
+; SOFT: ld $25, %call16(__adddf3)
+
+define double @f64add(double %a, double %b) {
+entry:
+  %add = fadd double %a, %b
+  ret double %add
+}
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index e26b0223b447..e01609f3b1e4 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -6,7 +6,7 @@
 
 define void @f(%struct.S* noalias sret %agg.result) nounwind {
 entry:
-; CHECK: daddu $2, $zero, $4
+; CHECK: move $2, $4
 
   %0 = bitcast %struct.S* %agg.result to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 5558ba6e10f4..0a8f85f4825d 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -12,20 +12,20 @@ define void @f1() nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: sw  $[[R5]], 32($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
 ; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
 ; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
 ; CHECK: sw  $[[R7]], 20($sp)
 ; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: lw  $7, 4($[[R0]])
 ; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: lw  $7, 4($[[R0]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
diff --git a/test/CodeGen/Mips/return-vector-float4.ll b/test/CodeGen/Mips/return-vector-float4.ll
deleted file mode 100644
index ae10f123e4d2..000000000000
--- a/test/CodeGen/Mips/return-vector-float4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc -march=mipsel -mattr=+android < %s | FileCheck %s
-
-define <4 x float> @retvec4() nounwind readnone {
-entry:
-; CHECK: lwc1 $f0
-; CHECK: lwc1 $f2
-; CHECK: lwc1 $f1
-; CHECK: lwc1 $f3
-
-  ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-}
-
diff --git a/test/CodeGen/Mips/return_address.ll b/test/CodeGen/Mips/return_address.ll
index e1c9241984ca..34b72baa6d25 100644
--- a/test/CodeGen/Mips/return_address.ll
+++ b/test/CodeGen/Mips/return_address.ll
@@ -5,7 +5,7 @@ entry:
   %0 = call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
 
-; CHECK:    addu    $2, $zero, $ra
+; CHECK:    move  $2, $ra
 }
 
 define i8* @f2() nounwind {
@@ -14,9 +14,9 @@ entry:
   %0 = call i8* @llvm.returnaddress(i32 0)
   ret i8* %0
 
-; CHECK:    addu    $[[R0:[0-9]+]], $zero, $ra
+; CHECK:    move  $[[R0:[0-9]+]], $ra
 ; CHECK:    jal
-; CHECK:    addu    $2,  $zero, $[[R0]]
+; CHECK:    move  $2, $[[R0]]
 }
 
 declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/test/CodeGen/Mips/selTBteqzCmpi.ll b/test/CodeGen/Mips/selTBteqzCmpi.ll
new file mode 100644
index 000000000000..9cb8227f9d2b
--- /dev/null
+++ b/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i = 2\0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 10
+  %1 = load i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/selTBtnezCmpi.ll b/test/CodeGen/Mips/selTBtnezCmpi.ll
new file mode 100644
index 000000000000..bd334f59d33b
--- /dev/null
+++ b/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [8 x i8] c"%i = 1\0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 10
+  %1 = load i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+; 16:	cmpi	${{[0-9]+}}, 10
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+
diff --git a/test/CodeGen/Mips/selTBtnezSlti.ll b/test/CodeGen/Mips/selTBtnezSlti.ll
new file mode 100644
index 000000000000..593f6f274eb3
--- /dev/null
+++ b/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 1, align 4
+@j = global i32 2, align 4
+@a = global i32 5, align 4
+@.str = private unnamed_addr constant [9 x i8] c"%i = 2 \0A\00", align 1
+@k = common global i32 0, align 4
+
+define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 10
+  %1 = load i32* @j, align 4
+  %2 = load i32* @i, align 4
+  %cond = select i1 %cmp, i32 %1, i32 %2
+  store i32 %cond, i32* @i, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slti	${{[0-9]+}}, 10
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
new file mode 100644
index 000000000000..190baad0b1db
--- /dev/null
+++ b/test/CodeGen/Mips/seleq.ll
@@ -0,0 +1,95 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+
+define void @calc_seleq() nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp eq i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp eq i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp eq i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" }
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/seleqk.ll b/test/CodeGen/Mips/seleqk.ll
new file mode 100644
index 000000000000..3ca622d5d8fe
--- /dev/null
+++ b/test/CodeGen/Mips/seleqk.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 1000, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_seleqk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @a, align 4
+  %cmp1 = icmp eq i32 %3, 1000
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @b, align 4
+  %cmp6 = icmp eq i32 %6, 3
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @b, align 4
+  %cmp11 = icmp eq i32 %9, 1000
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmpi	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
diff --git a/test/CodeGen/Mips/selgek.ll b/test/CodeGen/Mips/selgek.ll
new file mode 100644
index 000000000000..8ab4046e92cb
--- /dev/null
+++ b/test/CodeGen/Mips/selgek.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 2, align 4
+@b = global i32 1000, align 4
+@c = global i32 2, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp sge i32 %0, 1000
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp sge i32 %3, 1
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @c, align 4
+  %cmp6 = icmp sge i32 %6, 2
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @a, align 4
+  %cmp11 = icmp sge i32 %9, 2
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slti	${{[0-9]+}}, 1000
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 2 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 2 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+
diff --git a/test/CodeGen/Mips/selgt.ll b/test/CodeGen/Mips/selgt.ll
new file mode 100644
index 000000000000..67b9b498709b
--- /dev/null
+++ b/test/CodeGen/Mips/selgt.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [9 x i8] c"%i = %i\0A\00", align 1
+
+define i32 @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %retval = alloca i32, align 4
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp sgt i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp sgt i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp sgt i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  %16 = load i32* %retval
+  ret i32 %16
+}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
diff --git a/test/CodeGen/Mips/selle.ll b/test/CodeGen/Mips/selle.ll
new file mode 100644
index 000000000000..b27df45e6739
--- /dev/null
+++ b/test/CodeGen/Mips/selle.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp sle i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp sle i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp sle i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp sle i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
diff --git a/test/CodeGen/Mips/selltk.ll b/test/CodeGen/Mips/selltk.ll
new file mode 100644
index 000000000000..1471b892c92a
--- /dev/null
+++ b/test/CodeGen/Mips/selltk.ll
@@ -0,0 +1,93 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 2, align 4
+@b = global i32 1000, align 4
+@c = global i32 2, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_selltk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 1000
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @t, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @b, align 4
+  %cmp1 = icmp slt i32 %3, 2
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @c, align 4
+  %cmp6 = icmp sgt i32 %6, 2
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @a, align 4
+  %cmp11 = icmp sgt i32 %9, 2
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	slti	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/selne.ll b/test/CodeGen/Mips/selne.ll
new file mode 100644
index 000000000000..e3d82b8cf5d0
--- /dev/null
+++ b/test/CodeGen/Mips/selne.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 10, align 4
+@c = global i32 1, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_seleq() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %1 = load i32* @b, align 4
+  %cmp = icmp ne i32 %0, %1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %2 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %4 = load i32* @b, align 4
+  %5 = load i32* @a, align 4
+  %cmp1 = icmp ne i32 %4, %5
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %6 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %7 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %8 = load i32* @c, align 4
+  %9 = load i32* @a, align 4
+  %cmp6 = icmp ne i32 %8, %9
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %10 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %11 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %12 = load i32* @a, align 4
+  %13 = load i32* @c, align 4
+  %cmp11 = icmp ne i32 %12, %13
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %14 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %15 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
new file mode 100644
index 000000000000..26015523106d
--- /dev/null
+++ b/test/CodeGen/Mips/selnek.ll
@@ -0,0 +1,107 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+
+@t = global i32 10, align 4
+@f = global i32 199, align 4
+@a = global i32 1, align 4
+@b = global i32 1000, align 4
+@z1 = common global i32 0, align 4
+@z2 = common global i32 0, align 4
+@z3 = common global i32 0, align 4
+@z4 = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp ne i32 %0, 1
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %1 = load i32* @f, align 4
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %2 = load i32* @t, align 4
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
+  store i32 %cond, i32* @z1, align 4
+  %3 = load i32* @a, align 4
+  %cmp1 = icmp ne i32 %3, 1000
+  br i1 %cmp1, label %cond.true2, label %cond.false3
+
+cond.true2:                                       ; preds = %cond.end
+  %4 = load i32* @t, align 4
+  br label %cond.end4
+
+cond.false3:                                      ; preds = %cond.end
+  %5 = load i32* @f, align 4
+  br label %cond.end4
+
+cond.end4:                                        ; preds = %cond.false3, %cond.true2
+  %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
+  store i32 %cond5, i32* @z2, align 4
+  %6 = load i32* @b, align 4
+  %cmp6 = icmp ne i32 %6, 3
+  br i1 %cmp6, label %cond.true7, label %cond.false8
+
+cond.true7:                                       ; preds = %cond.end4
+  %7 = load i32* @t, align 4
+  br label %cond.end9
+
+cond.false8:                                      ; preds = %cond.end4
+  %8 = load i32* @f, align 4
+  br label %cond.end9
+
+cond.end9:                                        ; preds = %cond.false8, %cond.true7
+  %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
+  store i32 %cond10, i32* @z3, align 4
+  %9 = load i32* @b, align 4
+  %cmp11 = icmp ne i32 %9, 1000
+  br i1 %cmp11, label %cond.true12, label %cond.false13
+
+cond.true12:                                      ; preds = %cond.end9
+  %10 = load i32* @f, align 4
+  br label %cond.end14
+
+cond.false13:                                     ; preds = %cond.end9
+  %11 = load i32* @t, align 4
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %cond.false13, %cond.true12
+  %cond15 = phi i32 [ %10, %cond.true12 ], [ %11, %cond.false13 ]
+  store i32 %cond15, i32* @z4, align 4
+  ret void
+}
+
+define i32 @main() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
+entry:
+  call void @calc_z() "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %0 = load i32* @z1, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %1 = load i32* @z2, align 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %2 = load i32* @z3, align 4
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  %3 = load i32* @z4, align 4
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+
+attributes #0 = { nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+attributes #1 = { "target-cpu"="mips16" "target-features"="+mips16,+o32" }
+
+; 16:	cmpi	${{[0-9]+}}, 1 	# 16 bit inst
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 3 	# 16 bit inst
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+
+; 16:	cmpi	${{[0-9]+}}, 1000
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
+\ No newline at end of file
diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll
index cda0c96ef4be..8eda8de45e08 100644
--- a/test/CodeGen/Mips/selpat.ll
+++ b/test/CodeGen/Mips/selpat.ll
@@ -20,7 +20,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
   %4 = load i32* @c, align 4
@@ -41,7 +41,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp eq i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
@@ -51,7 +51,7 @@ entry:
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
 ; 16:	cmpi	${{[0-9]+}}, 10
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp11 = icmp eq i32 %3, 10
   %cond15 = select i1 %cmp11, i32 %1, i32 %2
@@ -67,7 +67,7 @@ entry:
   %2 = load i32* @f, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	beqz	${{[0-9]+}}, .+4
+; 16:	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp eq i32 %3, 0
@@ -91,7 +91,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp sge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -112,7 +112,7 @@ entry:
   %1 = load i32* @b, align 4
   %cmp = icmp sgt i32 %0, %1
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %2 = load i32* @f, align 4
   %3 = load i32* @t, align 4
@@ -141,7 +141,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp sle i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -165,7 +165,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	slti	${{[0-9]+}}, {{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp slt i32 %3, 2
@@ -192,7 +192,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
   %4 = load i32* @c, align 4
@@ -212,7 +212,7 @@ entry:
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ne i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
@@ -222,7 +222,7 @@ entry:
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
 ; 16:	cmpi	${{[0-9]+}}, 10
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp11 = icmp ne i32 %3, 10
   %cond15 = select i1 %cmp11, i32 %1, i32 %2
@@ -238,7 +238,7 @@ entry:
   %2 = load i32* @t, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	bnez	${{[0-9]+}}, .+4
+; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %cmp1 = icmp ne i32 %3, 0
@@ -260,7 +260,7 @@ entry:
   %2 = load i32* @t, align 4
   %cond = select i1 %tobool, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
-; 16:	bnez	${{[0-9]+}}, .+4
+; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %3 = load i32* @b, align 4
   %tobool1 = icmp ne i32 %3, 0
@@ -284,7 +284,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp uge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -309,7 +309,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	btnez	.+4
+; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ugt i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
@@ -334,7 +334,7 @@ entry:
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	bteqz	.+4
+; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   %cmp1 = icmp ule i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
diff --git a/test/CodeGen/Mips/seteq.ll b/test/CodeGen/Mips/seteq.ll
index da840c83a2b4..5fadf78d57a0 100644
--- a/test/CodeGen/Mips/seteq.ll
+++ b/test/CodeGen/Mips/seteq.ll
@@ -15,7 +15,7 @@ entry:
   store i32 %conv, i32* @r1, align 4
 ; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
 ; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
 
diff --git a/test/CodeGen/Mips/seteqz.ll b/test/CodeGen/Mips/seteqz.ll
index d445be6aedb0..80dc3120a6a1 100644
--- a/test/CodeGen/Mips/seteqz.ll
+++ b/test/CodeGen/Mips/seteqz.ll
@@ -12,13 +12,13 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltiu	${{[0-9]+}}, 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   %1 = load i32* @j, align 4
   %cmp1 = icmp eq i32 %1, 99
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
 ; 16:	xor	$[[REGISTER:[0-9A-Ba-b_]+]], ${{[0-9]+}}
 ; 16:	sltiu	$[[REGISTER:[0-9A-Ba-b_]+]], 1
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setge.ll b/test/CodeGen/Mips/setge.ll
index 94b499bc31e9..8869eb8fc547 100644
--- a/test/CodeGen/Mips/setge.ll
+++ b/test/CodeGen/Mips/setge.ll
@@ -17,7 +17,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp sge i32 %0, %2
diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll
index b6bae09bcb5b..18a0fcf62130 100644
--- a/test/CodeGen/Mips/setgek.ll
+++ b/test/CodeGen/Mips/setgek.ll
@@ -12,7 +12,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slti	${{[0-9]+}}, -32768
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
 ; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
   ret void
 }
diff --git a/test/CodeGen/Mips/setle.ll b/test/CodeGen/Mips/setle.ll
index f36fb4392d76..2df6774c1fad 100644
--- a/test/CodeGen/Mips/setle.ll
+++ b/test/CodeGen/Mips/setle.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp sle i32 %2, %1
diff --git a/test/CodeGen/Mips/setlt.ll b/test/CodeGen/Mips/setlt.ll
index 435be8e2334a..3dac74bf2e01 100644
--- a/test/CodeGen/Mips/setlt.ll
+++ b/test/CodeGen/Mips/setlt.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setltk.ll b/test/CodeGen/Mips/setltk.ll
index c0b610e37784..ecebc7e578e1 100644
--- a/test/CodeGen/Mips/setltk.ll
+++ b/test/CodeGen/Mips/setltk.ll
@@ -15,6 +15,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slti	$[[REGISTER:[0-9]+]], 10
-; 16:	move	$[[REGISTER]], $t8
+; 16:	move	$[[REGISTER]], $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setne.ll b/test/CodeGen/Mips/setne.ll
index 6460c83c7b0b..9e66901e32b5 100644
--- a/test/CodeGen/Mips/setne.ll
+++ b/test/CodeGen/Mips/setne.ll
@@ -15,6 +15,6 @@ entry:
   store i32 %conv, i32* @r1, align 4
 ; 16:	xor	$[[REGISTER:[0-9]+]], ${{[0-9]+}}
 ; 16:	sltu	${{[0-9]+}}, $[[REGISTER]]
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setuge.ll b/test/CodeGen/Mips/setuge.ll
index ac72b66e9fb0..1c9b5bbe8114 100644
--- a/test/CodeGen/Mips/setuge.ll
+++ b/test/CodeGen/Mips/setuge.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move    $[[REGISTER:[0-9]+]], $t8
+; 16:	move    $[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp uge i32 %0, %2
diff --git a/test/CodeGen/Mips/setugt.ll b/test/CodeGen/Mips/setugt.ll
index 328f0e3be34a..f10b47ae7178 100644
--- a/test/CodeGen/Mips/setugt.ll
+++ b/test/CodeGen/Mips/setugt.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move    ${{[0-9]+}}, $t8
+; 16:	move    ${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setule.ll b/test/CodeGen/Mips/setule.ll
index 792f2ae0fa29..a6d6bf064052 100644
--- a/test/CodeGen/Mips/setule.ll
+++ b/test/CodeGen/Mips/setule.ll
@@ -16,7 +16,7 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	$[[REGISTER:[0-9]+]], $t8
+; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
   %2 = load i32* @m, align 4
   %cmp1 = icmp ule i32 %2, %1
diff --git a/test/CodeGen/Mips/setult.ll b/test/CodeGen/Mips/setult.ll
index 56d2e8daa3e0..00ee437a2ffe 100644
--- a/test/CodeGen/Mips/setult.ll
+++ b/test/CodeGen/Mips/setult.ll
@@ -16,6 +16,6 @@ entry:
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
-; 16:	move	${{[0-9]+}}, $t8
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/setultk.ll b/test/CodeGen/Mips/setultk.ll
index 75b270ed8428..eb9edbaad7f8 100644
--- a/test/CodeGen/Mips/setultk.ll
+++ b/test/CodeGen/Mips/setultk.ll
@@ -14,7 +14,7 @@ entry:
   %cmp = icmp ult i32 %0, 10
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
-; 16:	sltiu	$[[REGISTER:[0-9]+]], 10
-; 16:	move	$[[REGISTER]], $t8
+; 16:	sltiu	${{[0-9]+}}, 10 # 16 bit inst
+; 16:	move	${{[0-9]+}}, $24
   ret void
 }
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index 72d30dc36912..b86d25e5e5e8 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -21,9 +21,9 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
-; STATIC:   rdhwr   $3, $29
 ; STATIC:   lui     $[[R0:[0-9]+]], %tprel_hi(t1)
 ; STATIC:   addiu   $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
+; STATIC:   rdhwr   $3, $29
 ; STATIC:   addu    $[[R2:[0-9]+]], $3, $[[R1]]
 ; STATIC:   lw      $2, 0($[[R2]])
 }
diff --git a/test/CodeGen/Mips/vector-setcc.ll b/test/CodeGen/Mips/vector-setcc.ll
new file mode 100644
index 000000000000..aeff4918c8bb
--- /dev/null
+++ b/test/CodeGen/Mips/vector-setcc.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=mipsel < %s
+
+@a = common global <4 x i32> zeroinitializer, align 16
+@b = common global <4 x i32> zeroinitializer, align 16
+@g0 = common global <4 x i32> zeroinitializer, align 16
+
+define void @foo0() nounwind {
+entry:
+  %0 = load <4 x i32>* @a, align 16
+  %1 = load <4 x i32>* @b, align 16
+  %cmp = icmp slt <4 x i32> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  store <4 x i32> %sext, <4 x i32>* @g0, align 16
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
index d93f688ef1fd..39d52d382663 100644
--- a/test/CodeGen/NVPTX/annotations.ll
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
deleted file mode 100644
index 73c77f56bc9c..000000000000
--- a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-;; These tests should run for all targets
-
-;;===-- Basic instruction selection tests ---------------------------------===;;
-
-
-;;; f64
-
-define double @fadd_f64(double %a, double %b) {
-; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fadd double %a, %b
-  ret double %ret
-}
-
-define double @fsub_f64(double %a, double %b) {
-; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fsub double %a, %b
-  ret double %ret
-}
-
-define double @fmul_f64(double %a, double %b) {
-; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fmul double %a, %b
-  ret double %ret
-}
-
-define double @fdiv_f64(double %a, double %b) {
-; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
-; CHECK: ret
-  %ret = fdiv double %a, %b
-  ret double %ret
-}
-
-;; PTX does not have a floating-point rem instruction
-
-
-;;; f32
-
-define float @fadd_f32(float %a, float %b) {
-; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fadd float %a, %b
-  ret float %ret
-}
-
-define float @fsub_f32(float %a, float %b) {
-; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fsub float %a, %b
-  ret float %ret
-}
-
-define float @fmul_f32(float %a, float %b) {
-; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fmul float %a, %b
-  ret float %ret
-}
-
-define float @fdiv_f32(float %a, float %b) {
-; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
-; CHECK: ret
-  %ret = fdiv float %a, %b
-  ret float %ret
-}
-
-;; PTX does not have a floating-point rem instruction
diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll
index 529f84900afd..8d73b7e6c4c6 100644
--- a/test/CodeGen/NVPTX/arithmetic-int.ll
+++ b/test/CodeGen/NVPTX/arithmetic-int.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
index 968203e5f70e..190a1462adbc 100644
--- a/test/CodeGen/NVPTX/calling-conv.ll
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
index 12fc7548212c..16af0a336ddc 100644
--- a/test/CodeGen/NVPTX/compare-int.ll
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll
index 21c84379b062..1882121fa724 100644
--- a/test/CodeGen/NVPTX/convert-fp.ll
+++ b/test/CodeGen/NVPTX/convert-fp.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll
deleted file mode 100644
index 20716f982e3b..000000000000
--- a/test/CodeGen/NVPTX/convert-int-sm10.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-
-; i16
-
-define i16 @cvt_i16_i32(i32 %x) {
-; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i32 %x to i16
-  ret i16 %a
-}
-
-define i16 @cvt_i16_i64(i64 %x) {
-; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i64 %x to i16
-  ret i16 %a
-}
-
-
-
-; i32
-
-define i32 @cvt_i32_i16(i16 %x) {
-; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: ret
-  %a = zext i16 %x to i32
-  ret i32 %a
-}
-
-define i32 @cvt_i32_i64(i64 %x) {
-; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}}
-; CHECK: ret
-  %a = trunc i64 %x to i32
-  ret i32 %a
-}
-
-
-
-; i64
-
-define i64 @cvt_i64_i16(i16 %x) {
-; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: ret
-  %a = zext i16 %x to i64
-  ret i64 %a
-}
-
-define i64 @cvt_i64_i32(i32 %x) {
-; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: ret
-  %a = zext i32 %x to i64
-  ret i64 %a
-}
diff --git a/test/CodeGen/NVPTX/intrin-nocapture.ll b/test/CodeGen/NVPTX/intrin-nocapture.ll
new file mode 100644
index 000000000000..55781bb15a0b
--- /dev/null
+++ b/test/CodeGen/NVPTX/intrin-nocapture.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; Address space intrinsics were erroneously marked NoCapture, leading to bad
+; optimizations (such as the store below being eliminated as dead code). This
+; test makes sure we don't regress.
+
+declare void @foo(i32 addrspace(1)*)
+
+declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*)
+
+; CHECK: @bar
+define void @bar() {
+  %t1 = alloca i32
+; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
+; CHECK-NEXT: store i32 10, i32* %t1
+  %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
+  store i32 10, i32* %t1
+  call void @foo(i32 addrspace(1)* %t2)
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll
index 1c9879c4178b..53a28f333798 100644
--- a/test/CodeGen/NVPTX/intrinsic-old.ll
+++ b/test/CodeGen/NVPTX/intrinsic-old.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
index afab60ca96a8..8b0357be87cb 100644
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -1,5 +1,3 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index d1f5093df223..3265868d3c52 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll
new file mode 100644
index 000000000000..0d02194651e3
--- /dev/null
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
+; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
+
+@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00"
+
+declare i32 @__nvvm_reflect(i8*)
+declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+
+define float @foo(float %a, float %b) {
+; USE_MUL_0: define float @foo
+; USE_MUL_0-NOT: call i32 @__nvvm_reflect
+; USE_MUL_1: define float @foo
+; USE_MUL_1-NOT: call i32 @__nvvm_reflect
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+  %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+  %cmp = icmp ugt i32 %reflect, 0
+  br i1 %cmp, label %use_mul, label %use_add
+
+use_mul:
+; USE_MUL_1: fmul float %a, %b
+; USE_MUL_0-NOT: fadd float %a, %b
+  %ret1 = fmul float %a, %b
+  br label %exit
+
+use_add:
+; USE_MUL_0: fadd float %a, %b
+; USE_MUL_1-NOT: fmul float %a, %b
+  %ret2 = fadd float %a, %b
+  br label %exit
+
+exit:
+  %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
+  ret float %ret
+}
diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll
new file mode 100644
index 000000000000..03ab635e73b9
--- /dev/null
+++ b/test/CodeGen/NVPTX/sched1.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Ensure source scheduling is working
+
+define void @foo(i32* %a) {
+; CHECK: .func foo
+; CHECK: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: ld.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+  %ptr0 = getelementptr i32* %a, i32 0
+  %val0 = load i32* %ptr0
+  %ptr1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %ptr1
+  %ptr2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %ptr2
+  %ptr3 = getelementptr i32* %a, i32 3
+  %val3 = load i32* %ptr3
+
+  %t0 = add i32 %val0, %val1
+  %t1 = add i32 %t0, %val2
+  %t2 = add i32 %t1, %val3
+
+  store i32 %t2, i32* %a
+
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll
new file mode 100644
index 000000000000..71a9a4963faf
--- /dev/null
+++ b/test/CodeGen/NVPTX/sched2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+define void @foo(<2 x i32>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: ld.v2.u32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+; CHECK-NEXT: add.s32
+  %ptr0 = getelementptr <2 x i32>* %a, i32 0
+  %val0 = load <2 x i32>* %ptr0
+  %ptr1 = getelementptr <2 x i32>* %a, i32 1
+  %val1 = load <2 x i32>* %ptr1
+  %ptr2 = getelementptr <2 x i32>* %a, i32 2
+  %val2 = load <2 x i32>* %ptr2
+  %ptr3 = getelementptr <2 x i32>* %a, i32 3
+  %val3 = load <2 x i32>* %ptr3
+
+  %t0 = add <2 x i32> %val0, %val1
+  %t1 = add <2 x i32> %t0, %val2
+  %t2 = add <2 x i32> %t1, %val3
+
+  store <2 x i32> %t2, <2 x i32>* %a
+
+  ret void
+}
+
diff --git a/test/CodeGen/NVPTX/sm-version-10.ll b/test/CodeGen/NVPTX/sm-version-10.ll
deleted file mode 100644
index 9324a3780986..000000000000
--- a/test/CodeGen/NVPTX/sm-version-10.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s
-
-
-; CHECK: .target sm_10
-
diff --git a/test/CodeGen/NVPTX/sm-version-11.ll b/test/CodeGen/NVPTX/sm-version-11.ll
deleted file mode 100644
index 9033a4eba5e4..000000000000
--- a/test/CodeGen/NVPTX/sm-version-11.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_11 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_11 | FileCheck %s
-
-
-; CHECK: .target sm_11
-
diff --git a/test/CodeGen/NVPTX/sm-version-12.ll b/test/CodeGen/NVPTX/sm-version-12.ll
deleted file mode 100644
index d8ee85c9010e..000000000000
--- a/test/CodeGen/NVPTX/sm-version-12.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_12 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_12 | FileCheck %s
-
-
-; CHECK: .target sm_12
-
diff --git a/test/CodeGen/NVPTX/sm-version-13.ll b/test/CodeGen/NVPTX/sm-version-13.ll
deleted file mode 100644
index ad67d642ce30..000000000000
--- a/test/CodeGen/NVPTX/sm-version-13.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_13 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_13 | FileCheck %s
-
-
-; CHECK: .target sm_13
-
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
index 54e04ae6106d..0b26d802df84 100644
--- a/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
 
 
diff --git a/test/CodeGen/NVPTX/tuple-literal.ll b/test/CodeGen/NVPTX/tuple-literal.ll
new file mode 100644
index 000000000000..2b1f2c4b6680
--- /dev/null
+++ b/test/CodeGen/NVPTX/tuple-literal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+
+define ptx_device void @test_function({i8, i8}*) {
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-args.ll b/test/CodeGen/NVPTX/vector-args.ll
new file mode 100644
index 000000000000..80deae46935a
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-args.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+
+define float @foo(<2 x float> %a) {
+; CHECK: .func (.param .b32 func_retval0) foo
+; CHECK: .param .align 8 .b8 foo_param_0[8]
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+  %t1 = fmul <2 x float> %a, %a
+  %t2 = extractelement <2 x float> %t1, i32 0
+  %t3 = extractelement <2 x float> %t1, i32 1
+  %t4 = fadd float %t2, %t3
+  ret float %t4
+}
+
+
+define float @bar(<4 x float> %a) {
+; CHECK: .func (.param .b32 func_retval0) bar
+; CHECK: .param .align 16 .b8 bar_param_0[16]
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+; CHECK: ld.param.f32 %f{{[0-9]+}}
+  %t1 = fmul <4 x float> %a, %a
+  %t2 = extractelement <4 x float> %t1, i32 0
+  %t3 = extractelement <4 x float> %t1, i32 1
+  %t4 = fadd float %t2, %t3
+  ret float %t4
+}
diff --git a/test/CodeGen/NVPTX/vector-compare.ll b/test/CodeGen/NVPTX/vector-compare.ll
new file mode 100644
index 000000000000..218049995233
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-compare.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that the result of vector compares are properly
+; scalarized.  If codegen fails, then the type legalizer incorrectly
+; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
+
+define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
+  %aval = load <2 x i32>* %a
+  %bval = load <2 x i32>* %b
+  %res = icmp slt <2 x i32> %aval, %bval
+  %t1 = extractelement <2 x i1> %res, i32 0
+  %t2 = extractelement <2 x i1> %res, i32 1
+  %t1a = zext i1 %t1 to i32
+  %t2a = zext i1 %t2 to i32
+  store i32 %t1a, i32* %r1
+  store i32 %t2a, i32* %r2
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll
new file mode 100644
index 000000000000..58882bf16668
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; Even though general vector types are not supported in PTX, we can still
+; optimize loads/stores with pseudo-vector instructions of the form:
+;
+; ld.v2.f32 {%f0, %f1}, [%r0]
+;
+; which will load two floats at once into scalar registers.
+
+define void @foo(<2 x float>* %a) {
+; CHECK: .func foo
+; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = load <2 x float>* %a
+  %t2 = fmul <2 x float> %t1, %t1
+  store <2 x float> %t2, <2 x float>* %a
+  ret void
+}
+
+define void @foo2(<4 x float>* %a) {
+; CHECK: .func foo2
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = load <4 x float>* %a
+  %t2 = fmul <4 x float> %t1, %t1
+  store <4 x float> %t2, <4 x float>* %a
+  ret void
+}
+
+define void @foo3(<8 x float>* %a) {
+; CHECK: .func foo3
+; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+  %t1 = load <8 x float>* %a
+  %t2 = fmul <8 x float> %t1, %t1
+  store <8 x float> %t2, <8 x float>* %a
+  ret void
+}
+
+
+
+define void @foo4(<2 x i32>* %a) {
+; CHECK: .func foo4
+; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
+  %t1 = load <2 x i32>* %a
+  %t2 = mul <2 x i32> %t1, %t1
+  store <2 x i32> %t2, <2 x i32>* %a
+  ret void
+}
+
+define void @foo5(<4 x i32>* %a) {
+; CHECK: .func foo5
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+  %t1 = load <4 x i32>* %a
+  %t2 = mul <4 x i32> %t1, %t1
+  store <4 x i32> %t2, <4 x i32>* %a
+  ret void
+}
+
+define void @foo6(<8 x i32>* %a) {
+; CHECK: .func foo6
+; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+  %t1 = load <8 x i32>* %a
+  %t2 = mul <8 x i32> %t1, %t1
+  store <8 x i32> %t2, <8 x i32>* %a
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-select.ll b/test/CodeGen/NVPTX/vector-select.ll
new file mode 100644
index 000000000000..11893df10329
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-select.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that vector selects are scalarized by the type legalizer.
+; If not, type legalization will fail.
+
+define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
+entry:
+  %tmp4 = load <2 x i32> addrspace(1)* %def_a
+  %tmp6 = load <2 x i32> addrspace(1)* %def_c
+  %tmp8 = load <2 x i32> addrspace(1)* %def_b
+  %0 = icmp sge <2 x i32> %tmp4, zeroinitializer
+  %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
+  store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
index 82ef2b82cbe6..b6feb5abbc3f 100644
--- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
+++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | grep dst | count 4
+; RUN: llc < %s -march=ppc64 -mattr=+altivec | grep dst | count 4
 
 define hidden void @_Z4borkPc(i8* %image) {
 entry:
diff --git a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
index 8802b97d2a6a..00a402e0e487 100644
--- a/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
+++ b/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+; XFAIL: *
 
 define <4 x i32> @test() nounwind {
 	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 84aa40c4b52a..91253daae396 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0"
 define void @foo(i32 %y) nounwind ssp {
 entry:
 ; CHECK: foo
-; CHECK: add r3
-; CHECK: 0(r3)
+; CHECK: add r2
+; CHECK: 0(r2)
   %y_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 %y, i32* %y_addr
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index 974a99a52cb5..097611a7619c 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -2,21 +2,21 @@
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
-; This formerly used R0 for both the stack address and CR.
 
 define void @foo() nounwind {
 entry:
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  ori r3, r3, 34524
-;CHECK:  stwx r2, r1, r3
-; Make sure that the register scavenger returns the same temporary register.
-;CHECK:  mfcr r2
-;CHECK:  lis r3, 1
-;CHECK:  rlwinm r2, r2, 12, 0, 31
-;CHECK:  ori r3, r3, 34520
-;CHECK:  stwx r2, r1, r3
+; Note that part of what is being checked here is proper register reuse.
+; CHECK: mfcr [[T1:r[0-9]+]]                         ; cr2
+; CHECK: lis [[T2:r[0-9]+]], 1
+; CHECK: addi r3, r1, 72
+; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: ori [[T2]], [[T2]], 34540
+; CHECK: stwx [[T1]], r1, [[T2]]
+; CHECK: lis [[T3:r[0-9]+]], 1
+; CHECK: mfcr [[T4:r[0-9]+]]                         ; cr3
+; CHECK: ori [[T3]], [[T3]], 34536
+; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: stwx [[T4]], r1, [[T3]]
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
@@ -25,11 +25,16 @@ entry:
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r3, 1
-;CHECK:  ori r3, r3, 34524
-;CHECK:  lwzx r2, r1, r3
-;CHECK:  rlwinm r2, r2, 24, 0, 31
-;CHECK:  mtcrf 32, r2
+; CHECK: lis [[T1:r[0-9]+]], 1
+; CHECK: ori [[T1]], [[T1]], 34536
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: mtcrf 16, [[T1]]
+; CHECK: lis [[T1]], 1
+; CHECK: ori [[T1]], [[T1]], 34540
+; CHECK: lwzx [[T1]], r1, [[T1]]
+; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: mtcrf 32, [[T1]]
   ret void
 }
 
diff --git a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 4a850984a909..000000000000
--- a/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index 72ae9d6c73b3..0dbc2d0180ff 100644
--- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -18,8 +18,8 @@ entry:
 ; CHECK: _g:
 ; CHECK:  mflr r0
 ; CHECK:  stw r0, 8(r1)
-; CHECK:  lwz r3, 0(r1)
-; CHECK:  lwz r3, 8(r3)
+; CHECK:  lwz r2, 0(r1)
+; CHECK:  lwz r3, 8(r2)
   %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
   ret i8* %0
 }
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index bf3d577a3677..d1a3c9f46b57 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
 entry:
 ; Make sure we're generating references using the red zone
 ; CHECK: main:
-; CHECK: stw r3, -12(r1)
+; CHECK: stw r2, -12(r1)
   %retval = alloca i32
   %0 = alloca i32
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
index 9d2e390c1c97..5bff58f2bbf5 100644
--- a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
+++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
new file mode 100644
index 000000000000..35e3fdd26e72
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
+;
+; PR14315: misched should not move the physreg copy of %t below the calls.
+
+@.str89 = external unnamed_addr constant [6 x i8], align 1
+
+declare void @init() nounwind
+
+declare void @clock() nounwind
+
+; CHECK: %entry
+; CHECK: fmr 31, 1
+; CHECK: bl init
+define void @s332(double %t) nounwind {
+entry:
+  tail call void @init()
+  tail call void @clock() nounwind
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.body4, %entry
+  %i.0 = phi i32 [ %inc, %for.body4 ], [ 0, %entry ]
+  %cmp3 = icmp slt i32 undef, 16000
+  br i1 %cmp3, label %for.body4, label %L20
+
+for.body4:                                        ; preds = %for.cond2
+  %cmp5 = fcmp ogt double undef, %t
+  %inc = add nsw i32 %i.0, 1
+  br i1 %cmp5, label %L20, label %for.cond2
+
+L20:                                              ; preds = %for.body4, %for.cond2
+  %index.0 = phi i32 [ -2, %for.cond2 ], [ %i.0, %for.body4 ]
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..9702934f7e68
--- /dev/null
+++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index 6f985c819fb6..e8765deab05d 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "stw r4, 32751"
+; RUN:   grep "stw r3, 32751"
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep "std r4, 9024"
+; RUN:   grep "std r3, 9024"
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2q-stackalign.ll b/test/CodeGen/PowerPC/a2q-stackalign.ll
new file mode 100644
index 000000000000..00c329119376
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q-stackalign.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck -check-prefix=CHECK-A2Q %s
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc64-bgq-linux -mcpu=a2 | FileCheck -check-prefix=CHECK-BGQ %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @bar(i8* %a) nounwind;
+define i32 @foo() nounwind {
+  %p = alloca i8, i8 115
+  store i8 0, i8* %p
+  %r = call i32 @bar(i8* %p)
+  ret i32 %r
+}
+
+; Without QPX, the allocated stack frame is 240 bytes, but with QPX
+; (because we require 32-byte alignment), it is 256 bytes.
+; CHECK-A2: @foo
+; CHECK-A2: stdu 1, -240(1)
+; CHECK-A2Q: @foo
+; CHECK-A2Q: stdu 1, -256(1)
+; CHECK-BGQ: @foo
+; CHECK-BGQ: stdu 1, -256(1)
+
diff --git a/test/CodeGen/PowerPC/a2q.ll b/test/CodeGen/PowerPC/a2q.ll
new file mode 100644
index 000000000000..b26480f08b39
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2q.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -mattr=+qpx | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
+
diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll
new file mode 100644
index 000000000000..1cf4cec07695
--- /dev/null
+++ b/test/CodeGen/PowerPC/allocate-r0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; Because r0 is allocatable, we can use it to hold r3 without spilling.
+; CHECK: mr 0, 3
+; CHECK: mr 3, 0
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
new file mode 100644
index 000000000000..52587e2c0b87
--- /dev/null
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -0,0 +1,99 @@
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+; Test case for PR 14779: anonymous aggregates are not handled correctly.
+; The bug is triggered by passing a byval structure after an anonymous
+; aggregate.
+
+%tarray = type { i64, i8* }
+
+define i8* @func1({ i64, i8* } %array, i8* %ptr) {
+entry:
+  %array_ptr = extractvalue {i64, i8* } %array, 1
+  %cond = icmp eq i8* %array_ptr, %ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array_ptr
+unequal:
+  ret i8* %ptr
+}
+
+; CHECK: func1:
+; CHECK: cmpld {{[0-9]+}}, 4, 5
+; CHECK: std 4, -[[OFFSET1:[0-9]+]]
+; CHECK: std 5, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
+
+define i8* @func2({ i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func2:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, 4, [[REG2]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]]
+; CHECK: std 4, -[[OFFSET2:[0-9]+]]
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
+entry:
+  %tmp1 = getelementptr inbounds { i64, i8* }* %array1, i32 0, i32 1
+  %array1_ptr = load i8** %tmp1
+  %tmp2 = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp2
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func3:
+; CHECK: addi [[REG1:[0-9]+]], 1, 64
+; CHECK: addi [[REG2:[0-9]+]], 1, 48
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: ld [[REG4:[0-9]+]], 8([[REG2]])
+; CHECK: cmpld {{[0-9]+}}, [[REG4]], [[REG3]]
+; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+
+define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
+                  i64 %p5, i64 %p6, i64 %p7, i64 %p8,
+                  { i64, i8* } %array1, %tarray* byval %array2) {
+entry:
+  %array1_ptr = extractvalue {i64, i8* } %array1, 1
+  %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
+  %array2_ptr = load i8** %tmp
+  %cond = icmp eq i8* %array1_ptr, %array2_ptr
+  br i1 %cond, label %equal, label %unequal
+equal:
+  ret i8* %array1_ptr
+unequal:
+  ret i8* %array2_ptr
+}
+
+; CHECK: func4:
+; CHECK: addi [[REG1:[0-9]+]], 1, 128
+; CHECK: ld [[REG2:[0-9]+]], 120(1)
+; CHECK: ld [[REG3:[0-9]+]], 8([[REG1]])
+; CHECK: cmpld {{[0-9]+}}, [[REG2]], [[REG3]]
+; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
+; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
+; CHECK: ld 3, -[[OFFSET1]](1)
+; CHECK: ld 3, -[[OFFSET2]](1)
+
diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll
new file mode 100644
index 000000000000..d04a6c98ee19
--- /dev/null
+++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with
+; GPRC is handled correctly. When it was not, this test would assert.
+
+@gen_random.last = external unnamed_addr global i64, align 8
+@.str = external unnamed_addr constant [4 x i8], align 1
+
+declare double @gen_random(double) #0
+
+declare void @benchmark_heapsort(i32 signext, double* nocapture) #0
+
+define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 {
+entry:
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ]
+  %add = add i32 %cond, 1
+  %conv = sext i32 %add to i64
+  %mul = shl nsw i64 %conv, 3
+  %call1 = tail call noalias i8* @malloc(i64 %mul) #1
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %cond.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %add
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %cond.end
+  ret i32 0
+}
+
+declare noalias i8* @malloc(i64) #0
+
+declare signext i32 @printf(i8* nocapture, ...) #0
+
+declare void @free(i8* nocapture) #0
+
+declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index cbfa4094fb4e..838db20ddd1b 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=ppc32 |  FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 |  FileCheck %s
 
 define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
 ; CHECK: exchange_and_add:
-; CHECK: lwarx
+; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}}
   %tmp = atomicrmw add i32* %mem, i32 %val monotonic
-; CHECK: stwcx.
+; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}}
   ret i32 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index a427379a8b6d..40b4a2eea976 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -24,3 +24,23 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
 ; CHECK: stdcx.
   ret i64 %tmp
 }
+
+define void @atomic_store(i64* %mem, i64 %val) nounwind {
+entry:
+; CHECK: @atomic_store
+  store atomic i64 %val, i64* %mem release, align 64
+; CHECK: ldarx
+; CHECK: stdcx.
+  ret void
+}
+
+define i64 @atomic_load(i64* %mem) nounwind {
+entry:
+; CHECK: @atomic_load
+  %tmp = load atomic i64* %mem acquire, align 64
+; CHECK: ldarx
+; CHECK: stdcx.
+; CHECK: stdcx.
+  ret i64 %tmp
+}
+
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index fdead7dd8b34..abed0de80b88 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
 ; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=PIC64
 ; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=DYNAMIC64
 ; PR4482
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "powerpc-apple-darwin8"
@@ -16,10 +18,18 @@ entry:
 ; PIC: bl L_exact_log2$stub
 ; PIC: blr
 
+; PIC64: _foo:
+; PIC64: bl L_exact_log2$stub
+; PIC64: blr
+
 ; DYNAMIC: _foo:
 ; DYNAMIC: bl L_exact_log2$stub
 ; DYNAMIC: blr
 
+; DYNAMIC64: _foo:
+; DYNAMIC64: bl L_exact_log2$stub
+; DYNAMIC64: blr
+
         %A = call i32 @exact_log2(i64 %x) nounwind
 	ret i32 %A
 }
@@ -34,13 +44,13 @@ entry:
 ; PIC: L_exact_log2$stub:
 ; PIC: .indirect_symbol _exact_log2
 ; PIC: mflr r0
-; PIC: bcl 20,31,L_exact_log2$stub$tmp
+; PIC: bcl 20, 31, L_exact_log2$stub$tmp
 
 ; PIC: L_exact_log2$stub$tmp:
 ; PIC: mflr r11
-; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
 ; PIC: mtlr r0
-; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: lwzu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
 ; PIC: mtctr r12
 ; PIC: bctr
 
@@ -51,12 +61,32 @@ entry:
 
 ; PIC: .subsections_via_symbols
 
+; PIC64: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC64: L_exact_log2$stub:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: mflr r0
+; PIC64: bcl 20, 31, L_exact_log2$stub$tmp
+
+; PIC64: L_exact_log2$stub$tmp:
+; PIC64: mflr r11
+; PIC64: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC64: mtlr r0
+; PIC64: ldu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC64: mtctr r12
+; PIC64: bctr
+
+; PIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC64: L_exact_log2$lazy_ptr:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: .quad dyld_stub_binding_helper
+
+; PIC64: .subsections_via_symbols
 
 ; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
 ; DYNAMIC: L_exact_log2$stub:
 ; DYNAMIC: .indirect_symbol _exact_log2
-; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
-; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12, lo16(L_exact_log2$lazy_ptr)(r11)
 ; DYNAMIC: mtctr r12
 ; DYNAMIC: bctr
 
@@ -65,7 +95,15 @@ entry:
 ; DYNAMIC: .indirect_symbol _exact_log2
 ; DYNAMIC: .long dyld_stub_binding_helper
 
-
-
-
-
+; DYNAMIC64: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC64: L_exact_log2$stub:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC64: ldu r12, lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC64: mtctr r12
+; DYNAMIC64: bctr
+
+; DYNAMIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC64: L_exact_log2$lazy_ptr:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: .quad dyld_stub_binding_helper
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 4f6bfc729913..53bbc52167c4 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
+; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32
 
 
 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
         ret i16 %tmp6
 }
 
+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i )                ; <i64> [#uses=1]
+        store i64 %tmp13, i64* %tmp1.upgrd.1
+        ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
+        ret i64 %tmp14
+}
+
 declare i32 @llvm.bswap.i32(i32)
 
 declare i16 @llvm.bswap.i16(i16)
 
+declare i64 @llvm.bswap.i64(i64)
+
 
 ; X32: stwbrx
 ; X32: lwbrx
 ; X32: sthbrx
 ; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx
 
 ; X64: stwbrx
 ; X64: lwbrx
 ; X64: sthbrx
 ; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx
 
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 0454c584bcfe..e155a35c4da0 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,10 +1,4 @@
-; There should be exactly one vxor here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vxor | count 1
-
-; There should be exactly one vsplti here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vsplti | count 1
+; RUN: llc < %s -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
@@ -15,10 +9,16 @@ define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         store <4 x i32> zeroinitializer, <4 x i32>* %P2
         ret void
 }
+; The fmul will spill a vspltisw to create a -0.0 vector used as the addend
+; to vmaddfp (so it would IEEE compliant with zero sign propagation).
+; CHECK: @VXOR
+; CHECK: vsplti
+; CHECK: vxor
 
 define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) {
         store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2
         store <8 x i16> < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >, <8 x i16>* %P3
         ret void
 }
-
+; CHECK: @VSPLTI
+; CHECK: vsplti
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
new file mode 100644
index 000000000000..f12152ff0fca
--- /dev/null
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define { ppc_fp128, ppc_fp128 } @foo() nounwind {
+entry:
+  %retval = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %x = alloca { ppc_fp128, ppc_fp128 }, align 16
+  %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real
+  store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag
+  %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+  %x.real = load ppc_fp128* %x.realp
+  %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+  %x.imag = load ppc_fp128* %x.imagp
+  %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
+  store ppc_fp128 %x.real, ppc_fp128* %real1
+  store ppc_fp128 %x.imag, ppc_fp128* %imag2
+  %0 = load { ppc_fp128, ppc_fp128 }* %retval
+  ret { ppc_fp128, ppc_fp128 } %0
+}
+
+; CHECK: foo:
+; CHECK: lfd 3
+; CHECK: lfd 4
+; CHECK: lfd 2
+; CHECK: lfd 1
+
+define { float, float } @oof() nounwind {
+entry:
+  %retval = alloca { float, float }, align 4
+  %x = alloca { float, float }, align 4
+  %real = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  store float 3.500000e+00, float* %real
+  store float 0xC00547AE20000000, float* %imag
+  %x.realp = getelementptr inbounds { float, float }* %x, i32 0, i32 0
+  %x.real = load float* %x.realp
+  %x.imagp = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+  %x.imag = load float* %x.imagp
+  %real1 = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+  %imag2 = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+  store float %x.real, float* %real1
+  store float %x.imag, float* %imag2
+  %0 = load { float, float }* %retval
+  ret { float, float } %0
+}
+
+; CHECK: oof:
+; CHECK: lfs 2
+; CHECK: lfs 1
+
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
new file mode 100644
index 000000000000..d6df7a237668
--- /dev/null
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -0,0 +1,409 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; This test case triggers several functions related to cr spilling, both in
+; frame lowering and to handle cr register pressure. When the register kill
+; flags were not being set correctly, this would cause the register scavenger to
+; assert.
+
+@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2
+@weight_luma = external global i32
+@offset_luma = external global i32
+@wp_luma_round = external global i32, align 4
+@luma_log_weight_denom = external global i32, align 4
+
+define void @SetupFastFullPelSearch() #0 {
+entry:
+  %mul10 = mul nsw i32 undef, undef
+  br i1 undef, label %land.end, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %entry
+  switch i32 0, label %land.end [
+    i32 0, label %land.rhs
+    i32 3, label %land.rhs
+  ]
+
+land.rhs:                                         ; preds = %land.lhs.true, %land.lhs.true
+  %tobool21 = icmp ne i32 undef, 0
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
+  %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
+  %cond = load i32** undef, align 8
+  br i1 undef, label %if.then95, label %for.body.lr.ph
+
+if.then95:                                        ; preds = %land.end
+  %cmp.i4.i1427 = icmp slt i32 undef, undef
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.then95, %land.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  br i1 undef, label %for.body, label %for.body252
+
+for.body252:                                      ; preds = %for.inc997, %for.body
+  %shl263 = add i32 undef, 80
+  br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader
+
+for.cond286.preheader:                            ; preds = %for.body252
+  br label %for.cond290.preheader
+
+for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
+  %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+  %1 = load i32* undef, align 4, !tbaa !0
+  %2 = load i32* @weight_luma, align 4, !tbaa !0
+  %3 = load i32* @wp_luma_round, align 4, !tbaa !0
+  %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0
+  %5 = load i32* @offset_luma, align 4, !tbaa !0
+  %incdec.ptr502.sum = add i64 undef, 16
+  br label %for.body293
+
+for.body293:                                      ; preds = %for.body293, %for.cond290.preheader
+  %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ]
+  %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ]
+  %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ]
+  %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
+  %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
+  %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
+  %6 = load i16* %refptr.11590, align 2, !tbaa !3
+  %conv294 = zext i16 %6 to i32
+  %mul295 = mul nsw i32 %conv294, %2
+  %add296 = add nsw i32 %mul295, %3
+  %shr = ashr i32 %add296, %4
+  %add297 = add nsw i32 %shr, %5
+  %cmp.i.i1513 = icmp sgt i32 %add297, 0
+  %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
+  %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
+  %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
+  %7 = load i16* %srcptr.41591, align 2, !tbaa !3
+  %conv300 = zext i16 %7 to i32
+  %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
+  %idxprom302 = sext i32 %sub301 to i64
+  %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
+  %8 = load i32* %arrayidx303, align 4, !tbaa !0
+  %add304 = add nsw i32 %8, %LineSadBlk0.01588
+  %9 = load i32* undef, align 4, !tbaa !0
+  %add318 = add nsw i32 %add304, %9
+  %10 = load i16* undef, align 2, !tbaa !3
+  %conv321 = zext i16 %10 to i32
+  %mul322 = mul nsw i32 %conv321, %2
+  %add323 = add nsw i32 %mul322, %3
+  %shr324 = ashr i32 %add323, %4
+  %add325 = add nsw i32 %shr324, %5
+  %cmp.i.i1505 = icmp sgt i32 %add325, 0
+  %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0
+  %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1
+  %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
+  %sub329 = sub nsw i32 %cond.i5.i1508, 0
+  %idxprom330 = sext i32 %sub329 to i64
+  %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
+  %11 = load i32* %arrayidx331, align 4, !tbaa !0
+  %add332 = add nsw i32 %add318, %11
+  %cmp.i.i1501 = icmp sgt i32 undef, 0
+  %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
+  %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
+  %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
+  %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
+  %12 = load i16* null, align 2, !tbaa !3
+  %conv342 = zext i16 %12 to i32
+  %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
+  %idxprom344 = sext i32 %sub343 to i64
+  %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
+  %13 = load i32* %arrayidx345, align 4, !tbaa !0
+  %add346 = add nsw i32 %add332, %13
+  %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
+  %14 = load i16* null, align 2, !tbaa !3
+  %conv349 = zext i16 %14 to i32
+  %mul350 = mul nsw i32 %conv349, %2
+  %add351 = add nsw i32 %mul350, %3
+  %shr352 = ashr i32 %add351, %4
+  %add353 = add nsw i32 %shr352, %5
+  %cmp.i.i1497 = icmp sgt i32 %add353, 0
+  %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
+  %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
+  %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
+  %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
+  %15 = load i16* %incdec.ptr341, align 2, !tbaa !3
+  %conv356 = zext i16 %15 to i32
+  %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
+  %idxprom358 = sext i32 %sub357 to i64
+  %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
+  %16 = load i32* %arrayidx359, align 4, !tbaa !0
+  %add360 = add nsw i32 %16, %LineSadBlk1.01587
+  %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
+  %17 = load i16* %incdec.ptr348, align 2, !tbaa !3
+  %conv363 = zext i16 %17 to i32
+  %mul364 = mul nsw i32 %conv363, %2
+  %add365 = add nsw i32 %mul364, %3
+  %shr366 = ashr i32 %add365, %4
+  %add367 = add nsw i32 %shr366, %5
+  %cmp.i.i1493 = icmp sgt i32 %add367, 0
+  %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
+  %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
+  %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
+  %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
+  %18 = load i16* %incdec.ptr355, align 2, !tbaa !3
+  %conv370 = zext i16 %18 to i32
+  %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
+  %idxprom372 = sext i32 %sub371 to i64
+  %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
+  %19 = load i32* %arrayidx373, align 4, !tbaa !0
+  %add374 = add nsw i32 %add360, %19
+  %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
+  %20 = load i16* %incdec.ptr362, align 2, !tbaa !3
+  %conv377 = zext i16 %20 to i32
+  %mul378 = mul nsw i32 %conv377, %2
+  %add379 = add nsw i32 %mul378, %3
+  %shr380 = ashr i32 %add379, %4
+  %add381 = add nsw i32 %shr380, %5
+  %cmp.i.i1489 = icmp sgt i32 %add381, 0
+  %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
+  %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
+  %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
+  %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
+  %21 = load i16* %incdec.ptr369, align 2, !tbaa !3
+  %conv384 = zext i16 %21 to i32
+  %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
+  %idxprom386 = sext i32 %sub385 to i64
+  %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
+  %22 = load i32* %arrayidx387, align 4, !tbaa !0
+  %add388 = add nsw i32 %add374, %22
+  %23 = load i16* %incdec.ptr376, align 2, !tbaa !3
+  %conv391 = zext i16 %23 to i32
+  %mul392 = mul nsw i32 %conv391, %2
+  %add395 = add nsw i32 0, %5
+  %cmp.i.i1485 = icmp sgt i32 %add395, 0
+  %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
+  %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
+  %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
+  %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
+  %24 = load i16* %incdec.ptr383, align 2, !tbaa !3
+  %conv398 = zext i16 %24 to i32
+  %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
+  %idxprom400 = sext i32 %sub399 to i64
+  %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
+  %25 = load i32* %arrayidx401, align 4, !tbaa !0
+  %add402 = add nsw i32 %add388, %25
+  %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+  %cmp.i4.i1483 = icmp slt i32 undef, %1
+  %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
+  %26 = load i16* %incdec.ptr397, align 2, !tbaa !3
+  %conv412 = zext i16 %26 to i32
+  %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
+  %idxprom414 = sext i32 %sub413 to i64
+  %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
+  %27 = load i32* %arrayidx415, align 4, !tbaa !0
+  %add416 = add nsw i32 %27, %LineSadBlk2.01585
+  %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
+  %28 = load i16* %incdec.ptr404, align 2, !tbaa !3
+  %conv419 = zext i16 %28 to i32
+  %mul420 = mul nsw i32 %conv419, %2
+  %add421 = add nsw i32 %mul420, %3
+  %shr422 = ashr i32 %add421, %4
+  %add423 = add nsw i32 %shr422, %5
+  %cmp.i.i1477 = icmp sgt i32 %add423, 0
+  %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
+  %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
+  %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
+  %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+  %sub427 = sub nsw i32 %cond.i5.i1480, 0
+  %idxprom428 = sext i32 %sub427 to i64
+  %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
+  %29 = load i32* %arrayidx429, align 4, !tbaa !0
+  %add430 = add nsw i32 %add416, %29
+  %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
+  %30 = load i16* %incdec.ptr418, align 2, !tbaa !3
+  %conv433 = zext i16 %30 to i32
+  %mul434 = mul nsw i32 %conv433, %2
+  %add435 = add nsw i32 %mul434, %3
+  %shr436 = ashr i32 %add435, %4
+  %add437 = add nsw i32 %shr436, %5
+  %cmp.i.i1473 = icmp sgt i32 %add437, 0
+  %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
+  %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
+  %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
+  %31 = load i16* %incdec.ptr425, align 2, !tbaa !3
+  %conv440 = zext i16 %31 to i32
+  %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
+  %idxprom442 = sext i32 %sub441 to i64
+  %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
+  %32 = load i32* %arrayidx443, align 4, !tbaa !0
+  %add444 = add nsw i32 %add430, %32
+  %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
+  %33 = load i16* %incdec.ptr432, align 2, !tbaa !3
+  %conv447 = zext i16 %33 to i32
+  %mul448 = mul nsw i32 %conv447, %2
+  %add449 = add nsw i32 %mul448, %3
+  %shr450 = ashr i32 %add449, %4
+  %add451 = add nsw i32 %shr450, %5
+  %cmp.i.i1469 = icmp sgt i32 %add451, 0
+  %cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0
+  %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
+  %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
+  %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
+  %34 = load i16* undef, align 2, !tbaa !3
+  %conv454 = zext i16 %34 to i32
+  %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
+  %idxprom456 = sext i32 %sub455 to i64
+  %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
+  %35 = load i32* %arrayidx457, align 4, !tbaa !0
+  %add458 = add nsw i32 %add444, %35
+  %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
+  %36 = load i16* %incdec.ptr446, align 2, !tbaa !3
+  %conv461 = zext i16 %36 to i32
+  %mul462 = mul nsw i32 %conv461, %2
+  %add463 = add nsw i32 %mul462, %3
+  %shr464 = ashr i32 %add463, %4
+  %add465 = add nsw i32 %shr464, %5
+  %cmp.i.i1465 = icmp sgt i32 %add465, 0
+  %cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0
+  %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
+  %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
+  %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
+  %37 = load i16* %incdec.ptr453, align 2, !tbaa !3
+  %conv468 = zext i16 %37 to i32
+  %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
+  %idxprom470 = sext i32 %sub469 to i64
+  %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
+  %38 = load i32* %arrayidx471, align 4, !tbaa !0
+  %add472 = add nsw i32 %38, %LineSadBlk3.01586
+  %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
+  %add477 = add nsw i32 0, %3
+  %shr478 = ashr i32 %add477, %4
+  %add479 = add nsw i32 %shr478, %5
+  %cmp.i.i1461 = icmp sgt i32 %add479, 0
+  %cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0
+  %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
+  %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
+  %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
+  %39 = load i16* %incdec.ptr467, align 2, !tbaa !3
+  %conv482 = zext i16 %39 to i32
+  %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
+  %idxprom484 = sext i32 %sub483 to i64
+  %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
+  %40 = load i32* %arrayidx485, align 4, !tbaa !0
+  %add486 = add nsw i32 %add472, %40
+  %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
+  %41 = load i16* %incdec.ptr474, align 2, !tbaa !3
+  %conv489 = zext i16 %41 to i32
+  %mul490 = mul nsw i32 %conv489, %2
+  %add491 = add nsw i32 %mul490, %3
+  %shr492 = ashr i32 %add491, %4
+  %add493 = add nsw i32 %shr492, %5
+  %cmp.i.i1457 = icmp sgt i32 %add493, 0
+  %cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0
+  %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
+  %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
+  %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
+  %42 = load i16* %incdec.ptr481, align 2, !tbaa !3
+  %conv496 = zext i16 %42 to i32
+  %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
+  %idxprom498 = sext i32 %sub497 to i64
+  %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
+  %43 = load i32* %arrayidx499, align 4, !tbaa !0
+  %add500 = add nsw i32 %add486, %43
+  %44 = load i16* %incdec.ptr488, align 2, !tbaa !3
+  %conv503 = zext i16 %44 to i32
+  %mul504 = mul nsw i32 %conv503, %2
+  %add505 = add nsw i32 %mul504, %3
+  %shr506 = ashr i32 %add505, %4
+  %add507 = add nsw i32 %shr506, %5
+  %cmp.i.i1453 = icmp sgt i32 %add507, 0
+  %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
+  %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
+  %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
+  %45 = load i16* %incdec.ptr495, align 2, !tbaa !3
+  %conv510 = zext i16 %45 to i32
+  %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
+  %idxprom512 = sext i32 %sub511 to i64
+  %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
+  %46 = load i32* %arrayidx513, align 4, !tbaa !0
+  %add514 = add nsw i32 %add500, %46
+  %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
+  %exitcond1692 = icmp eq i32 undef, 4
+  br i1 %exitcond1692, label %for.end520, label %for.body293
+
+for.end520:                                       ; preds = %for.body293
+  store i32 %add346, i32* undef, align 4, !tbaa !0
+  store i32 %add402, i32* undef, align 4, !tbaa !0
+  store i32 %add458, i32* undef, align 4, !tbaa !0
+  store i32 %add514, i32* null, align 4, !tbaa !0
+  br i1 undef, label %for.end543, label %for.cond290.preheader
+
+for.end543:                                       ; preds = %for.end520
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.body549:                                      ; preds = %for.inc701, %for.end543
+  %call554 = call i16* null(i16**** null, i32 signext undef, i32 signext %shl263) #1
+  br label %for.cond559.preheader
+
+for.cond559.preheader:                            ; preds = %for.cond559.preheader, %for.body549
+  br i1 undef, label %for.inc701, label %for.cond559.preheader
+
+for.inc701:                                       ; preds = %for.cond559.preheader
+  br i1 undef, label %for.inc997, label %for.body549
+
+for.cond713.preheader:                            ; preds = %for.end850, %for.body252
+  br label %for.body716
+
+for.body716:                                      ; preds = %for.body716, %for.cond713.preheader
+  br i1 undef, label %for.end850, label %for.body716
+
+for.end850:                                       ; preds = %for.body716
+  br i1 undef, label %for.end873, label %for.cond713.preheader
+
+for.end873:                                       ; preds = %for.end850
+  br i1 undef, label %for.inc997, label %for.body879
+
+for.body879:                                      ; preds = %for.inc992, %for.end873
+  br label %for.cond889.preheader
+
+for.cond889.preheader:                            ; preds = %for.end964, %for.body879
+  br i1 undef, label %for.cond894.preheader.lr.ph, label %for.end964
+
+for.cond894.preheader.lr.ph:                      ; preds = %for.cond889.preheader
+  br label %for.body898.lr.ph.us
+
+for.end957.us:                                    ; preds = %for.body946.us
+  br i1 undef, label %for.body898.lr.ph.us, label %for.end964
+
+for.body946.us:                                   ; preds = %for.body930.us, %for.body946.us
+  br i1 false, label %for.body946.us, label %for.end957.us
+
+for.body930.us:                                   ; preds = %for.body914.us, %for.body930.us
+  br i1 undef, label %for.body930.us, label %for.body946.us
+
+for.body914.us:                                   ; preds = %for.body898.us, %for.body914.us
+  br i1 undef, label %for.body914.us, label %for.body930.us
+
+for.body898.us:                                   ; preds = %for.body898.lr.ph.us, %for.body898.us
+  br i1 undef, label %for.body898.us, label %for.body914.us
+
+for.body898.lr.ph.us:                             ; preds = %for.end957.us, %for.cond894.preheader.lr.ph
+  br label %for.body898.us
+
+for.end964:                                       ; preds = %for.end957.us, %for.cond889.preheader
+  %inc990 = add nsw i32 undef, 1
+  br i1 false, label %for.inc992, label %for.cond889.preheader
+
+for.inc992:                                       ; preds = %for.end964
+  br i1 false, label %for.inc997, label %for.body879
+
+for.inc997:                                       ; preds = %for.inc992, %for.end873, %for.inc701, %for.end543
+  %cmp250 = icmp slt i32 undef, %mul10
+  br i1 %cmp250, label %for.body252, label %for.end999
+
+for.end999:                                       ; preds = %for.inc997
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll
new file mode 100644
index 000000000000..04e4ffb0d48d
--- /dev/null
+++ b/test/CodeGen/PowerPC/ctr-cleanup.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main
+; CHECK: li {{[0-9]+}}, 4
+; CHECK-NOT: li {{[0-9]+}}, 4
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 1d365d47a877..3757fa3e2f29 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,10 +1,12 @@
 ; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=ppc32 | grep -i cntlzw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
 
 declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
+; CHECK: @bar
+; CHECK: cntlzw
         %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index e161cb05686f..21e36618c5c1 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -16,12 +16,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !9, metadata !10}
diff --git a/test/CodeGen/PowerPC/dcbt-sched.ll b/test/CodeGen/PowerPC/dcbt-sched.ll
new file mode 100644
index 000000000000..dfa1b75bd7db
--- /dev/null
+++ b/test/CodeGen/PowerPC/dcbt-sched.ll
@@ -0,0 +1,22 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -mcpu=a2 -enable-misched -enable-aa-sched-mi < %s | FileCheck %s
+
+define i8 @test1(i8* noalias %a, i8* noalias %b, i8* noalias %c) nounwind {
+entry:
+  %q = load i8* %b
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  %r = load i8* %c
+  %s = add i8 %q, %r
+  ret i8 %s
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; Test that we've moved the second load to before the dcbt to better
+; hide its latency.
+; CHECK: @test1
+; CHECK: lbz
+; CHECK: lbz
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/float-asmprint.ll b/test/CodeGen/PowerPC/float-asmprint.ll
new file mode 100644
index 000000000000..c9dc02862aac
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-asmprint.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=powerpc64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a big-endian target. x86_fp80 can't actually print for unrelated reasons,
+; but that's not really a problem.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad -9223372036854775808      # fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad -9223372036854775808      # ppc_fp128 -0
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll
new file mode 100644
index 000000000000..39cd4f929f8d
--- /dev/null
+++ b/test/CodeGen/PowerPC/float-to-int.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @foo(float %a) nounwind {
+  %x = fptosi float %a to i64
+  ret i64 %x
+
+; CHECK: @foo
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo2(double %a) nounwind {
+  %x = fptosi double %a to i64
+  ret i64 %x
+
+; CHECK: @foo2
+; CHECK: fctidz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo3(float %a) nounwind {
+  %x = fptoui float %a to i64
+  ret i64 %x
+
+; CHECK: @foo3
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i64 @foo4(double %a) nounwind {
+  %x = fptoui double %a to i64
+  ret i64 %x
+
+; CHECK: @foo4
+; CHECK: fctiduz [[REG:[0-9]+]], 1
+; CHECK: stfd [[REG]],
+; CHECK: ld 3,
+; CHECK: blr
+}
+
+define i32 @goo(float %a) nounwind {
+  %x = fptosi float %a to i32
+  ret i32 %x
+
+; CHECK: @goo
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo2(double %a) nounwind {
+  %x = fptosi double %a to i32
+  ret i32 %x
+
+; CHECK: @goo2
+; CHECK: fctiwz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo3(float %a) nounwind {
+  %x = fptoui float %a to i32
+  ret i32 %x
+
+; CHECK: @goo3
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
+define i32 @goo4(double %a) nounwind {
+  %x = fptoui double %a to i32
+  ret i32 %x
+
+; CHECK: @goo4
+; CHECK: fctiwuz [[REG:[0-9]+]], 1
+; CHECK: stfiwx [[REG]],
+; CHECK: lwz 3,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/frame-size.ll b/test/CodeGen/PowerPC/frame-size.ll
new file mode 100644
index 000000000000..0e569a4602c3
--- /dev/null
+++ b/test/CodeGen/PowerPC/frame-size.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo() nounwind {
+entry:
+  %x = alloca [32568 x i8]
+  %"alloca point" = bitcast i32 0 to i32
+  %x1 = bitcast [32568 x i8]* %x to i8*
+
+; Check that the RS spill slot has been allocated (because the estimate
+; will fail the small-frame-size check and the function has spills).
+; CHECK: @foo
+; CHECK: stdu 1, -32768(1)
+
+  %s1 = call i64 @bar(i8* %x1) nounwind
+  %s2 = call i64 @bar(i8* %x1) nounwind
+  %s3 = call i64 @bar(i8* %x1) nounwind
+  %s4 = call i64 @bar(i8* %x1) nounwind
+  %s5 = call i64 @bar(i8* %x1) nounwind
+  %s6 = call i64 @bar(i8* %x1) nounwind
+  %s7 = call i64 @bar(i8* %x1) nounwind
+  %s8 = call i64 @bar(i8* %x1) nounwind
+  %r = call i64 @can(i64 %s1, i64 %s2, i64 %s3, i64 %s4, i64 %s5, i64 %s6, i64 %s7, i64 %s8) nounwind
+  br label %return
+
+return:
+  ret i64 %r
+}
+
+declare i64 @bar(i8*)
+declare i64 @can(i64, i64, i64, i64, i64, i64, i64, i64)
+
diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll
new file mode 100644
index 000000000000..eabd4a68aa83
--- /dev/null
+++ b/test/CodeGen/PowerPC/frameaddr.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define i8* @main() #0 {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @main
+; CHECK: mr 3, 1
+}
+
+define i8* @foo() #3 { ; naked
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; CHECK: @foo
+; CHECK: mr 3, 1
+}
+
+define i8* @bar() #0 {
+entry:
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @use(i8* %x1) nounwind
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+
+; Note that if we start eliminating non-leaf frame pointers by default, this
+; will need to be updated.
+; CHECK: @bar
+; CHECK: mr 3, 31
+}
+
+declare void @use(i8*)
+
+declare i8* @llvm.frameaddress(i32) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll
new file mode 100644
index 000000000000..2707d0352de1
--- /dev/null
+++ b/test/CodeGen/PowerPC/i32-to-float.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
+; CHECK: frsp 1, [[REG3]]
+; CHECK: blr
+
+; CHECK-PWR6: @foo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-PWR6: frsp 1, [[REG2]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfids 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goo(i32 %a) nounwind {
+entry:
+  %x = sitofp i32 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: extsw [[REG:[0-9]+]], 3
+; CHECK: std [[REG]],
+; CHECK: lfd [[REG2:[0-9]+]],
+; CHECK: fcfid 1, [[REG2]]
+; CHECK: blr
+
+; CHECK-PWR6: @goo
+; CHECK-PWR6: stw 3,
+; CHECK-PWR6: lfiwax [[REG:[0-9]+]],
+; CHECK-PWR6: fcfid 1, [[REG]]
+; CHECK-PWR6: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define float @foou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to float
+  ret float %x
+
+; CHECK-A2: @foou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidus 1, [[REG]]
+; CHECK-A2: blr
+}
+
+define double @goou(i32 %a) nounwind {
+entry:
+  %x = uitofp i32 %a to double
+  ret double %x
+
+; CHECK-A2: @goou
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwzx [[REG:[0-9]+]],
+; CHECK-A2: fcfidu 1, [[REG]]
+; CHECK-A2: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll
new file mode 100644
index 000000000000..b81d109e7f45
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64-to-float.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @foo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfids 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goo(i64 %a) nounwind {
+entry:
+  %x = sitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goo
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfid 1, [[REG]]
+; CHECK: blr
+}
+
+define float @foou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to float
+  ret float %x
+
+; CHECK: @foou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidus 1, [[REG]]
+; CHECK: blr
+}
+
+define double @goou(i64 %a) nounwind {
+entry:
+  %x = uitofp i64 %a to double
+  ret double %x
+
+; CHECK: @goou
+; CHECK: std 3,
+; CHECK: lfd [[REG:[0-9]+]],
+; CHECK: fcfidu 1, [[REG]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5a0c072c9c52..d2a3239ab865 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -12,16 +12,16 @@ entry:
 ; Note that only parts of the sequence are checked for here, to allow
 ; for minor code generation differences.
 
-; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
-; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
-; CHECK: cmpldi 0, [[REGISTER]], 1
-; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
-; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+; CHECK: sradi [[REG1:[0-9]+]], 3, 53
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1
+; CHECK: cmpldi 0, [[REG2]], 1
+; CHECK: isel [[REG3:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REG3]], -{{[0-9]+}}(1)
 
 
 ; Also check that with -enable-unsafe-fp-math we do not get that extra
 ; code sequence.  Simply verify that there is no "isel" present.
 
-; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
 ; CHECK-UNSAFE-NOT: isel
 
diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll
index 7d089bbd653c..f683238de268 100644
--- a/test/CodeGen/PowerPC/iabs.ll
+++ b/test/CodeGen/PowerPC/iabs.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
 ; RUN:   grep "4 .*Number of machine instrs printed"
 
diff --git a/test/CodeGen/PowerPC/in-asm-f64-reg.ll b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
new file mode 100644
index 000000000000..1321dfce2027
--- /dev/null
+++ b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+
+define void @f() {
+; CHECK: @f
+
+entry:
+  %0 = tail call double* asm sideeffect "qvstfdux $2,$0,$1", "=b,{r7},{f11},0,~{memory}"(i32 64, double undef, double* undef)
+  ret void
+
+; CHECK: qvstfdux 11,{{[0-9]+}},7
+}
diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll
index 62aa7cf929f8..a10c5ddb36fb 100644
--- a/test/CodeGen/PowerPC/jaggedstructs.ll
+++ b/test/CodeGen/PowerPC/jaggedstructs.ll
@@ -23,22 +23,22 @@ entry:
 ; CHECK: std 4, 200(1)
 ; CHECK: std 3, 192(1)
 ; CHECK: lbz {{[0-9]+}}, 199(1)
-; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: lhz {{[0-9]+}}, 197(1)
+; CHECK: stb {{[0-9]+}}, 55(1)
 ; CHECK: sth {{[0-9]+}}, 53(1)
 ; CHECK: lbz {{[0-9]+}}, 207(1)
-; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: lwz {{[0-9]+}}, 203(1)
+; CHECK: stb {{[0-9]+}}, 63(1)
 ; CHECK: stw {{[0-9]+}}, 59(1)
 ; CHECK: lhz {{[0-9]+}}, 214(1)
-; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: lwz {{[0-9]+}}, 210(1)
+; CHECK: sth {{[0-9]+}}, 70(1)
 ; CHECK: stw {{[0-9]+}}, 66(1)
 ; CHECK: lbz {{[0-9]+}}, 223(1)
-; CHECK: stb {{[0-9]+}}, 79(1)
 ; CHECK: lhz {{[0-9]+}}, 221(1)
-; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: lwz {{[0-9]+}}, 217(1)
+; CHECK: stb {{[0-9]+}}, 79(1)
+; CHECK: sth {{[0-9]+}}, 77(1)
 ; CHECK: stw {{[0-9]+}}, 73(1)
 ; CHECK: ld 6, 72(1)
 ; CHECK: ld 5, 64(1)
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 12f1d1f130d8..98951306fd8e 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -1,6 +1,6 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
 
 define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
index 4019eca0bb88..aaa31d93d5f2 100644
--- a/test/CodeGen/PowerPC/lit.local.cfg
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'PowerPC' in targets:
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
new file mode 100644
index 000000000000..a5d1224864a6
--- /dev/null
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+; This used to cause a crash.  A standard load is converted to a pre-increment
+; load.  Later the pre-increment load is combined with a subsequent SRL to
+; produce a smaller load.  This transform invalidly created a standard load
+; and propagated the produced value into uses of both produced values of the
+; pre-increment load.  The result was a crash when attempting to process an
+; add with a token-chain operand.
+
+%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
+%struct.S1847 = type { [12 x i8], [4 x i8], [8 x i8], [4 x i8], [8 x i8], [2 x i8], i8, [4 x i64], i8, [3 x i8], [4 x i8], i8, i16, [4 x %struct.anon.76], i16, i8, i8* }
+%struct.anon.76 = type { i32 }
+@info = common global %struct.Info zeroinitializer, align 8
+@fails = common global i32 0, align 4
+@a1847 = external global [5 x %struct.S1847]
+define void @test1847() nounwind {
+entry:
+  %j = alloca i32, align 4
+  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %1 = load i32* @fails, align 4
+  %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear2 = and i96 %bf.load1, 302231454903657293676543
+  %bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
+  store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %2 = load i32* %j, align 4
+  %3 = load i32* %j, align 4
+  %inc11 = add nsw i32 %3, 1
+  store i32 %inc11, i32* %j, align 4
+  %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.clear16 = and i96 %bf.load15, -18446744069414584321
+  %bf.set17 = or i96 %bf.clear16, 18446743532543672320
+  store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
new file mode 100644
index 000000000000..a57fb9dd98d0
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing an external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
new file mode 100644
index 000000000000..4bec3e16fa04
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; CHECK: test_fn_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .local [[VAR]]
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
new file mode 100644
index 000000000000..f2bc4c9cb72c
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; CHECK: test_file_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .data
+; CHECK: .globl [[VAR]]
+; CHECK: [[VAR]]:
+; CHECK: .long 5
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
new file mode 100644
index 000000000000..911305d4355f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK: .quad 4562098671269285104
+; CHECK: test_double_const:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
new file mode 100644
index 000000000000..f0dff4c5a39c
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; MEDIUM: test_fn_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .local [[VAR]]
+; MEDIUM: .comm [[VAR]],4,4
+
+; LARGE: test_fn_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .local [[VAR]]
+; LARGE: .comm [[VAR]],4,4
+
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
new file mode 100644
index 000000000000..b7905503f458
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; MEDIUM: test_file_static:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
+; MEDIUM: .type [[VAR]],@object
+; MEDIUM: .data
+; MEDIUM: .globl [[VAR]]
+; MEDIUM: [[VAR]]:
+; MEDIUM: .long 5
+
+; LARGE: test_file_static:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lwz {{[0-9]+}}, 0([[REG2]])
+; LARGE: stw {{[0-9]+}}, 0([[REG2]])
+; LARGE: .type [[VAR]],@object
+; LARGE: .data
+; LARGE: .globl [[VAR]]
+; LARGE: [[VAR]]:
+; LARGE: .long 5
+
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
new file mode 100644
index 000000000000..47c60c936038
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck -check-prefix=LARGE %s
+
+; Test correct code generation for medium and large code model
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; MEDIUM: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM: .quad 4562098671269285104
+; MEDIUM: test_double_const:
+; MEDIUM: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE: .quad 4562098671269285104
+; LARGE: test_double_const:
+; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; LARGE: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]])
+; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
new file mode 100644
index 000000000000..1be27b7e8cc0
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading the address of a jump table from the TOC.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; CHECK: test_jump_table:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: ldx {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
new file mode 100644
index 000000000000..35efaaa5628f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a tentatively defined variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; CHECK: test_tentative:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc [[VAR:[a-z0-9A-Z_.]+]][TC],{{[a-z0-9A-Z_.]+}}
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
new file mode 100644
index 000000000000..0dd39ee4109d
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading a function address.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; CHECK: test_fnaddr:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-8.ll b/test/CodeGen/PowerPC/mcm-8.ll
new file mode 100644
index 000000000000..3ece786d6447
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-8.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large < %s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading a variable with available-externally linkage.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@x = available_externally constant [13 x i8] c"St9bad_alloc\00"
+
+define signext i8 @test_avext() nounwind {
+entry:
+  %0 = getelementptr inbounds [13 x i8]* @x, i32 0, i32 0
+  %1 = load i8* %0, align 1
+  ret i8 %1
+}
+
+; CHECK: test_avext:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lbz {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
new file mode 100644
index 000000000000..f366f45cc863
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing an aliased external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+@a = alias i32* @ei
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-default.ll b/test/CodeGen/PowerPC/mcm-default.ll
new file mode 100644
index 000000000000..19de2536aec3
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-default.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test that we generate code for the medium model as the default.
+; Use an external variable reference as an example.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
new file mode 100644
index 000000000000..2dd1718ba75a
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -0,0 +1,77 @@
+; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; CHECK:       Relocation 6
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 7
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
new file mode 100644
index 000000000000..117c3b334346
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -0,0 +1,268 @@
+; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=LARGE %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing external variable ei.
+;
+; MEDIUM:       '.rela.text'
+; MEDIUM:       Relocation 0
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 1
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM1]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       '.rela.text'
+; LARGE:       Relocation 0
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 1
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM1]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; MEDIUM:       Relocation 2
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 3
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM2]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function-scoped variable si.
+;
+; LARGE:       Relocation 2
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 3
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM2]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; MEDIUM:       Relocation 4
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 5
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM3]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing file-scope variable gi.
+;
+; LARGE:       Relocation 4
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 5
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM3]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; MEDIUM:       Relocation 6
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 7
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM4]]
+; MEDIUM-NEXT:  'r_type', 0x00000030
+;
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a constant.
+;
+; LARGE:       Relocation 6
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 7
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM4]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a jump table address.
+;
+; MEDIUM:       Relocation 8
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 9
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM5]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 8
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 9
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM5]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; MEDIUM:       Relocation 10
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 11
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM6]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 10
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 11
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM6]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; MEDIUM:       Relocation 12
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; MEDIUM-NEXT:  'r_type', 0x00000032
+; MEDIUM:       Relocation 13
+; MEDIUM-NEXT:  'r_offset'
+; MEDIUM-NEXT:  'r_sym', 0x[[SYM7]]
+; MEDIUM-NEXT:  'r_type', 0x00000040
+;
+; LARGE:       Relocation 12
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; LARGE-NEXT:  'r_type', 0x00000032
+; LARGE:       Relocation 13
+; LARGE-NEXT:  'r_offset'
+; LARGE-NEXT:  'r_sym', 0x[[SYM7]]
+; LARGE-NEXT:  'r_type', 0x00000040
+
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index 39af11a3d54c..fcf53da67fc2 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN:   not grep addi
-; RUN: llc < %s -march=ppc64 | \
+; RUN: llc -code-model=small < %s -march=ppc64 | \
 ; RUN:   not grep addi
 
 @Glob = global i64 4
diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
new file mode 100644
index 000000000000..8fae7ad4d1df
--- /dev/null
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \
+; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
+;
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; %val1 is a load live out of %entry. It should be hoisted
+; above the add.
+; CHECK: testload:
+; CHECK: %entry
+; CHECK: lwz
+; CHECK: addi
+; CHECK: bne
+; CHECK: %true
+define i32 @testload(i32 *%ptr, i32 %sumin) {
+entry:
+  %sum1 = add i32 %sumin, 1
+  %val1 = load i32* %ptr
+  %p = icmp eq i32 %sumin, 0
+  br i1 %p, label %true, label %end
+true:
+  %sum2 = add i32 %sum1, 1
+  %ptr2 = getelementptr i32* %ptr, i32 1
+  %val = load i32* %ptr2
+  %val2 = add i32 %val1, %val
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
+  %sumout = add i32 %valmerge, %summerge
+  ret i32 %sumout
+}
+
+; The prefetch gets a default latency of 3 cycles and should be hoisted
+; above the add.
+;
+; CHECK: testprefetch:
+; CHECK: %entry
+; CHECK: dcbt
+; CHECK: addi
+; CHECK: blr
+define i32 @testprefetch(i8 *%ptr, i32 %i) {
+entry:
+  %val1 = add i32 %i, 1
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+  %p = icmp eq i32 %i, 0
+  br i1 %p, label %true, label %end
+true:
+  %val2 = add i32 %val1, 1
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  ret i32 %valmerge
+}
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll
new file mode 100644
index 000000000000..2f6995c65dd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/negctr.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -mcpu=a2 -disable-lsr | FileCheck -check-prefix=NOLSR %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @main() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
+; CHECK: @main
+; CHECK-NOT: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main1() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main1
+; CHECK: li [[REG:[0-9]+]], -1
+; CHECK: mtctr [[REG]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main2() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, -100000
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: @main2
+; CHECK: lis [[REG:[0-9]+]], -2
+; CHECK: ori [[REG2:[0-9]+]], [[REG]], 31071
+; CHECK: mtctr [[REG2]]
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @main3() #0 {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 127984, %entry ]
+  %indvars.iv.next = add i64 %indvars.iv, -16
+  %exitcond = icmp eq i64 %indvars.iv.next, -16
+  br i1 %exitcond, label %for.end, label %for.body
+
+; NOLSR: @main3
+; NOLSR: li [[REG:[0-9]+]], 8000
+; NOLSR: mtctr [[REG]]
+; NOLSR: bdnz
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll
new file mode 100644
index 000000000000..b304d72aede2
--- /dev/null
+++ b/test/CodeGen/PowerPC/popcnt.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: @cnt8
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: @cnt16
+; CHECK: rlwinm
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: @cnt32
+; CHECK: popcntw
+; CHECK: blr
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: @cnt64
+; CHECK: popcntd
+; CHECK: blr
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
index a29bdcb25031..7f30ef883e9a 100644
--- a/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
new file mode 100644
index 000000000000..5ccf941a1f16
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -0,0 +1,370 @@
+; RUN: llc -mcpu=pwr7 -O3 < %s | FileCheck %s
+
+; Test case derived from bug report 15031.  The code in the post-RA
+; scheduler to break critical anti-dependencies was failing to check
+; whether an instruction had more than one definition, and ensuring
+; that any additional definitions interfered with the choice of a new
+; register.  As a result, this test originally caused this to be
+; generated:
+;
+;   lbzu 3, 1(3)
+;
+; which is illegal, since it requires register 3 to both receive the
+; loaded value and receive the updated address.  With the fix to bug
+; 15031, a different register is chosen to receive the loaded value.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%"class.llvm::MachineMemOperand" = type { %"struct.llvm::MachinePointerInfo", i64, i32, %"class.llvm::MDNode"*, %"class.llvm::MDNode"* }
+%"struct.llvm::MachinePointerInfo" = type { %"class.llvm::Value"*, i64 }
+%"class.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"class.llvm::Type"*, %"class.llvm::Use"*, %"class.llvm::StringMapEntry"* }
+%"class.llvm::Type" = type { %"class.llvm::LLVMContext"*, i32, i32, %"class.llvm::Type"** }
+%"class.llvm::LLVMContext" = type { %"class.llvm::LLVMContextImpl"* }
+%"class.llvm::LLVMContextImpl" = type opaque
+%"class.llvm::Use" = type { %"class.llvm::Value"*, %"class.llvm::Use"*, %"class.llvm::PointerIntPair" }
+%"class.llvm::PointerIntPair" = type { i64 }
+%"class.llvm::StringMapEntry" = type opaque
+%"class.llvm::MDNode" = type { %"class.llvm::Value", %"class.llvm::FoldingSetImpl::Node", i32, i32 }
+%"class.llvm::FoldingSetImpl::Node" = type { i8* }
+%"class.llvm::MachineInstr" = type { %"class.llvm::ilist_node", %"class.llvm::MCInstrDesc"*, %"class.llvm::MachineBasicBlock"*, %"class.llvm::MachineOperand"*, i32, %"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity", i8, i8, i8, %"class.llvm::MachineMemOperand"**, %"class.llvm::DebugLoc" }
+%"class.llvm::ilist_node" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineInstr"* }
+%"class.llvm::ilist_half_node" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::MCInstrDesc" = type { i16, i16, i16, i16, i16, i32, i64, i16*, i16*, %"class.llvm::MCOperandInfo"* }
+%"class.llvm::MCOperandInfo" = type { i16, i8, i8, i32 }
+%"class.llvm::MachineBasicBlock" = type { %"class.llvm::ilist_node.0", %"struct.llvm::ilist", %"class.llvm::BasicBlock"*, i32, %"class.llvm::MachineFunction"*, %"class.std::vector.163", %"class.std::vector.163", %"class.std::vector.123", %"class.std::vector.123", i32, i8, i8 }
+%"class.llvm::ilist_node.0" = type { %"class.llvm::ilist_half_node.1", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::ilist_half_node.1" = type { %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist" = type { %"class.llvm::iplist" }
+%"class.llvm::iplist" = type { %"struct.llvm::ilist_traits", %"class.llvm::MachineInstr"* }
+%"struct.llvm::ilist_traits" = type { %"class.llvm::ilist_half_node", %"class.llvm::MachineBasicBlock"* }
+%"class.llvm::BasicBlock" = type { %"class.llvm::Value", %"class.llvm::ilist_node.2", %"class.llvm::iplist.4", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.2" = type { %"class.llvm::ilist_half_node.3", %"class.llvm::BasicBlock"* }
+%"class.llvm::ilist_half_node.3" = type { %"class.llvm::BasicBlock"* }
+%"class.llvm::iplist.4" = type { %"struct.llvm::ilist_traits.5", %"class.llvm::Instruction"* }
+%"struct.llvm::ilist_traits.5" = type { %"class.llvm::ilist_half_node.10" }
+%"class.llvm::ilist_half_node.10" = type { %"class.llvm::Instruction"* }
+%"class.llvm::Instruction" = type { %"class.llvm::User", %"class.llvm::ilist_node.193", %"class.llvm::BasicBlock"*, %"class.llvm::DebugLoc" }
+%"class.llvm::User" = type { %"class.llvm::Value", %"class.llvm::Use"*, i32 }
+%"class.llvm::ilist_node.193" = type { %"class.llvm::ilist_half_node.10", %"class.llvm::Instruction"* }
+%"class.llvm::DebugLoc" = type { i32, i32 }
+%"class.llvm::Function" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.27", %"class.llvm::iplist.47", %"class.llvm::iplist.54", %"class.llvm::ValueSymbolTable"*, %"class.llvm::AttributeSet" }
+%"class.llvm::GlobalValue" = type { [52 x i8], [4 x i8], %"class.llvm::Module"*, %"class.std::basic_string" }
+%"class.llvm::Module" = type { %"class.llvm::LLVMContext"*, %"class.llvm::iplist.11", %"class.llvm::iplist.20", %"class.llvm::iplist.29", %"struct.llvm::ilist.38", %"class.std::basic_string", %"class.llvm::ValueSymbolTable"*, %"class.llvm::OwningPtr", %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", i8* }
+%"class.llvm::iplist.11" = type { %"struct.llvm::ilist_traits.12", %"class.llvm::GlobalVariable"* }
+%"struct.llvm::ilist_traits.12" = type { %"class.llvm::ilist_node.18" }
+%"class.llvm::ilist_node.18" = type { %"class.llvm::ilist_half_node.19", %"class.llvm::GlobalVariable"* }
+%"class.llvm::ilist_half_node.19" = type { %"class.llvm::GlobalVariable"* }
+%"class.llvm::GlobalVariable" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.18", i8 }
+%"class.llvm::iplist.20" = type { %"struct.llvm::ilist_traits.21", %"class.llvm::Function"* }
+%"struct.llvm::ilist_traits.21" = type { %"class.llvm::ilist_node.27" }
+%"class.llvm::ilist_node.27" = type { %"class.llvm::ilist_half_node.28", %"class.llvm::Function"* }
+%"class.llvm::ilist_half_node.28" = type { %"class.llvm::Function"* }
+%"class.llvm::iplist.29" = type { %"struct.llvm::ilist_traits.30", %"class.llvm::GlobalAlias"* }
+%"struct.llvm::ilist_traits.30" = type { %"class.llvm::ilist_node.36" }
+%"class.llvm::ilist_node.36" = type { %"class.llvm::ilist_half_node.37", %"class.llvm::GlobalAlias"* }
+%"class.llvm::ilist_half_node.37" = type { %"class.llvm::GlobalAlias"* }
+%"class.llvm::GlobalAlias" = type { %"class.llvm::GlobalValue", %"class.llvm::ilist_node.36" }
+%"struct.llvm::ilist.38" = type { %"class.llvm::iplist.39" }
+%"class.llvm::iplist.39" = type { %"struct.llvm::ilist_traits.40", %"class.llvm::NamedMDNode"* }
+%"struct.llvm::ilist_traits.40" = type { %"class.llvm::ilist_node.45" }
+%"class.llvm::ilist_node.45" = type { %"class.llvm::ilist_half_node.46", %"class.llvm::NamedMDNode"* }
+%"class.llvm::ilist_half_node.46" = type { %"class.llvm::NamedMDNode"* }
+%"class.llvm::NamedMDNode" = type { %"class.llvm::ilist_node.45", %"class.std::basic_string", %"class.llvm::Module"*, i8* }
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"class.llvm::ValueSymbolTable" = type opaque
+%"class.llvm::OwningPtr" = type { %"class.llvm::GVMaterializer"* }
+%"class.llvm::GVMaterializer" = type opaque
+%"class.llvm::iplist.47" = type { %"struct.llvm::ilist_traits.48", %"class.llvm::BasicBlock"* }
+%"struct.llvm::ilist_traits.48" = type { %"class.llvm::ilist_half_node.3" }
+%"class.llvm::iplist.54" = type { %"struct.llvm::ilist_traits.55", %"class.llvm::Argument"* }
+%"struct.llvm::ilist_traits.55" = type { %"class.llvm::ilist_half_node.61" }
+%"class.llvm::ilist_half_node.61" = type { %"class.llvm::Argument"* }
+%"class.llvm::Argument" = type { %"class.llvm::Value", %"class.llvm::ilist_node.192", %"class.llvm::Function"* }
+%"class.llvm::ilist_node.192" = type { %"class.llvm::ilist_half_node.61", %"class.llvm::Argument"* }
+%"class.llvm::AttributeSet" = type { %"class.llvm::AttributeSetImpl"* }
+%"class.llvm::AttributeSetImpl" = type opaque
+%"class.llvm::MachineFunction" = type { %"class.llvm::Function"*, %"class.llvm::TargetMachine"*, %"class.llvm::MCContext"*, %"class.llvm::MachineModuleInfo"*, %"class.llvm::GCModuleInfo"*, %"class.llvm::MachineRegisterInfo"*, %"struct.llvm::MachineFunctionInfo"*, %"class.llvm::MachineFrameInfo"*, %"class.llvm::MachineConstantPool"*, %"class.llvm::MachineJumpTableInfo"*, %"class.std::vector.163", %"class.llvm::BumpPtrAllocator", %"class.llvm::Recycler", %"class.llvm::ArrayRecycler", %"class.llvm::Recycler.180", %"struct.llvm::ilist.181", i32, i32, i8 }
+%"class.llvm::TargetMachine" = type { i32 (...)**, %"class.llvm::Target"*, %"class.std::basic_string", %"class.std::basic_string", %"class.std::basic_string", %"class.llvm::MCCodeGenInfo"*, %"class.llvm::MCAsmInfo"*, i8, %"class.llvm::TargetOptions" }
+%"class.llvm::Target" = type opaque
+%"class.llvm::MCCodeGenInfo" = type opaque
+%"class.llvm::MCAsmInfo" = type opaque
+%"class.llvm::TargetOptions" = type { [2 x i8], i32, i8, i32, i8, %"class.std::basic_string", i32, i32 }
+%"class.llvm::MCContext" = type { %"class.llvm::SourceMgr"*, %"class.llvm::MCAsmInfo"*, %"class.llvm::MCRegisterInfo"*, %"class.llvm::MCObjectFileInfo"*, %"class.llvm::BumpPtrAllocator", %"class.llvm::StringMap", %"class.llvm::StringMap.62", i32, %"class.llvm::DenseMap.63", i8*, %"class.llvm::raw_ostream"*, i8, %"class.std::basic_string", %"class.std::basic_string", %"class.std::vector", %"class.std::vector.70", %"class.llvm::MCDwarfLoc", i8, i8, i32, %"class.llvm::MCSection"*, %"class.llvm::MCSymbol"*, %"class.llvm::MCSymbol"*, %"class.std::vector.75", %"class.llvm::StringRef", %"class.llvm::StringRef", i8, %"class.llvm::DenseMap.80", %"class.std::vector.84", i8*, i8*, i8*, i8 }
+%"class.llvm::SourceMgr" = type opaque
+%"class.llvm::MCRegisterInfo" = type { %"struct.llvm::MCRegisterDesc"*, i32, i32, i32, %"class.llvm::MCRegisterClass"*, i32, i32, [2 x i16]*, i16*, i8*, i16*, i32, i16*, i32, i32, i32, i32, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair"*, %"class.llvm::DenseMap" }
+%"struct.llvm::MCRegisterDesc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCRegisterClass" = type { i8*, i16*, i8*, i16, i16, i16, i16, i16, i8, i8 }
+%"struct.llvm::MCRegisterInfo::DwarfLLVMRegPair" = type { i32, i32 }
+%"class.llvm::DenseMap" = type { %"struct.std::pair"*, i32, i32, i32 }
+%"struct.std::pair" = type { i32, i32 }
+%"class.llvm::MCObjectFileInfo" = type opaque
+%"class.llvm::BumpPtrAllocator" = type { i64, i64, %"class.llvm::SlabAllocator"*, %"class.llvm::MemSlab"*, i8*, i8*, i64 }
+%"class.llvm::SlabAllocator" = type { i32 (...)** }
+%"class.llvm::MemSlab" = type { i64, %"class.llvm::MemSlab"* }
+%"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 }
+%"class.llvm::StringMapEntryBase" = type { i32 }
+%"class.llvm::StringMap.62" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocator"* }
+%"class.llvm::DenseMap.63" = type { %"struct.std::pair.66"*, i32, i32, i32 }
+%"struct.std::pair.66" = type opaque
+%"class.llvm::raw_ostream" = type { i32 (...)**, i8*, i8*, i8*, i32 }
+%"class.std::vector" = type { %"struct.std::_Vector_base" }
+%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCDwarfFile *, std::allocator<llvm::MCDwarfFile *> >::_Vector_impl" = type { %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"**, %"class.llvm::MCDwarfFile"** }
+%"class.llvm::MCDwarfFile" = type { %"class.llvm::StringRef", i32 }
+%"class.llvm::StringRef" = type { i8*, i64 }
+%"class.std::vector.70" = type { %"struct.std::_Vector_base.71" }
+%"struct.std::_Vector_base.71" = type { %"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::StringRef, std::allocator<llvm::StringRef> >::_Vector_impl" = type { %"class.llvm::StringRef"*, %"class.llvm::StringRef"*, %"class.llvm::StringRef"* }
+%"class.llvm::MCDwarfLoc" = type { i32, i32, i32, i32, i32, i32 }
+%"class.llvm::MCSection" = type opaque
+%"class.llvm::MCSymbol" = type { %"class.llvm::StringRef", %"class.llvm::MCSection"*, %"class.llvm::MCExpr"*, i8 }
+%"class.llvm::MCExpr" = type opaque
+%"class.std::vector.75" = type { %"struct.std::_Vector_base.76" }
+%"struct.std::_Vector_base.76" = type { %"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCGenDwarfLabelEntry *, std::allocator<const llvm::MCGenDwarfLabelEntry *> >::_Vector_impl" = type { %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"**, %"class.llvm::MCGenDwarfLabelEntry"** }
+%"class.llvm::MCGenDwarfLabelEntry" = type { %"class.llvm::StringRef", i32, i32, %"class.llvm::MCSymbol"* }
+%"class.llvm::DenseMap.80" = type { %"struct.std::pair.83"*, i32, i32, i32 }
+%"struct.std::pair.83" = type { %"class.llvm::MCSection"*, %"class.llvm::MCLineSection"* }
+%"class.llvm::MCLineSection" = type { %"class.std::vector.215" }
+%"class.std::vector.215" = type { %"struct.std::_Vector_base.216" }
+%"struct.std::_Vector_base.216" = type { %"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MCLineEntry, std::allocator<llvm::MCLineEntry> >::_Vector_impl" = type { %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"*, %"class.llvm::MCLineEntry"* }
+%"class.llvm::MCLineEntry" = type { %"class.llvm::MCDwarfLoc", %"class.llvm::MCSymbol"* }
+%"class.std::vector.84" = type { %"struct.std::_Vector_base.85" }
+%"struct.std::_Vector_base.85" = type { %"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::MCSection *, std::allocator<const llvm::MCSection *> >::_Vector_impl" = type { %"class.llvm::MCSection"**, %"class.llvm::MCSection"**, %"class.llvm::MCSection"** }
+%"class.llvm::MachineModuleInfo" = type { %"class.llvm::ImmutablePass", %"class.llvm::MCContext", %"class.llvm::Module"*, %"class.llvm::MachineModuleInfoImpl"*, %"class.std::vector.95", i32, %"class.std::vector.100", %"class.llvm::DenseMap.110", %"class.llvm::DenseMap.114", i32, %"class.std::vector.118", %"class.std::vector.123", %"class.std::vector.123", %"class.std::vector.128", %"class.llvm::SmallPtrSet", %"class.llvm::MMIAddrLabelMap"*, i8, i8, i8, i8, %"class.llvm::SmallVector.133" }
+%"class.llvm::ImmutablePass" = type { %"class.llvm::ModulePass" }
+%"class.llvm::ModulePass" = type { %"class.llvm::Pass" }
+%"class.llvm::Pass" = type { i32 (...)**, %"class.llvm::AnalysisResolver"*, i8*, i32 }
+%"class.llvm::AnalysisResolver" = type { %"class.std::vector.89", %"class.llvm::PMDataManager"* }
+%"class.std::vector.89" = type { %"struct.std::_Vector_base.90" }
+%"struct.std::_Vector_base.90" = type { %"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const void *, llvm::Pass *>, std::allocator<std::pair<const void *, llvm::Pass *> > >::_Vector_impl" = type { %"struct.std::pair.94"*, %"struct.std::pair.94"*, %"struct.std::pair.94"* }
+%"struct.std::pair.94" = type { i8*, %"class.llvm::Pass"* }
+%"class.llvm::PMDataManager" = type opaque
+%"class.llvm::MachineModuleInfoImpl" = type { i32 (...)** }
+%"class.std::vector.95" = type { %"struct.std::_Vector_base.96" }
+%"struct.std::_Vector_base.96" = type { %"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineMove, std::allocator<llvm::MachineMove> >::_Vector_impl" = type { %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"*, %"class.llvm::MachineMove"* }
+%"class.llvm::MachineMove" = type { %"class.llvm::MCSymbol"*, %"class.llvm::MachineLocation", %"class.llvm::MachineLocation" }
+%"class.llvm::MachineLocation" = type { i8, i32, i32 }
+%"class.std::vector.100" = type { %"struct.std::_Vector_base.101" }
+%"struct.std::_Vector_base.101" = type { %"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::LandingPadInfo, std::allocator<llvm::LandingPadInfo> >::_Vector_impl" = type { %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"*, %"struct.llvm::LandingPadInfo"* }
+%"struct.llvm::LandingPadInfo" = type { %"class.llvm::MachineBasicBlock"*, %"class.llvm::SmallVector", %"class.llvm::SmallVector", %"class.llvm::MCSymbol"*, %"class.llvm::Function"*, %"class.std::vector.105" }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [8 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { i8 }
+%"class.std::vector.105" = type { %"struct.std::_Vector_base.106" }
+%"struct.std::_Vector_base.106" = type { %"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int, std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.llvm::DenseMap.110" = type { %"struct.std::pair.113"*, i32, i32, i32 }
+%"struct.std::pair.113" = type { %"class.llvm::MCSymbol"*, %"class.llvm::SmallVector.206" }
+%"class.llvm::SmallVector.206" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.207" }
+%"struct.llvm::SmallVectorStorage.207" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::AlignedCharArrayUnion.198" = type { %"struct.llvm::AlignedCharArray.199" }
+%"struct.llvm::AlignedCharArray.199" = type { [4 x i8] }
+%"class.llvm::DenseMap.114" = type { %"struct.std::pair.117"*, i32, i32, i32 }
+%"struct.std::pair.117" = type { %"class.llvm::MCSymbol"*, i32 }
+%"class.std::vector.118" = type { %"struct.std::_Vector_base.119" }
+%"struct.std::_Vector_base.119" = type { %"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::GlobalVariable *, std::allocator<const llvm::GlobalVariable *> >::_Vector_impl" = type { %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"**, %"class.llvm::GlobalVariable"** }
+%"class.std::vector.123" = type { %"struct.std::_Vector_base.124" }
+%"struct.std::_Vector_base.124" = type { %"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" }
+%"struct.std::_Vector_base<unsigned int, std::allocator<unsigned int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"class.std::vector.128" = type { %"struct.std::_Vector_base.129" }
+%"struct.std::_Vector_base.129" = type { %"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" }
+%"struct.std::_Vector_base<const llvm::Function *, std::allocator<const llvm::Function *> >::_Vector_impl" = type { %"class.llvm::Function"**, %"class.llvm::Function"**, %"class.llvm::Function"** }
+%"class.llvm::SmallPtrSet" = type { %"class.llvm::SmallPtrSetImpl", [33 x i8*] }
+%"class.llvm::SmallPtrSetImpl" = type { i8**, i8**, i32, i32, i32 }
+%"class.llvm::MMIAddrLabelMap" = type opaque
+%"class.llvm::SmallVector.133" = type { %"class.llvm::SmallVectorImpl.134", %"struct.llvm::SmallVectorStorage.139" }
+%"class.llvm::SmallVectorImpl.134" = type { %"class.llvm::SmallVectorTemplateBase.135" }
+%"class.llvm::SmallVectorTemplateBase.135" = type { %"class.llvm::SmallVectorTemplateCommon.136" }
+%"class.llvm::SmallVectorTemplateCommon.136" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.137" }
+%"struct.llvm::AlignedCharArrayUnion.137" = type { %"struct.llvm::AlignedCharArray.138" }
+%"struct.llvm::AlignedCharArray.138" = type { [40 x i8] }
+%"struct.llvm::SmallVectorStorage.139" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.137"] }
+%"class.llvm::GCModuleInfo" = type opaque
+%"class.llvm::MachineRegisterInfo" = type { %"class.llvm::TargetRegisterInfo"*, i8, i8, %"class.llvm::IndexedMap", %"class.llvm::IndexedMap.146", %"class.llvm::MachineOperand"**, %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.llvm::BitVector", %"class.std::vector.147", %"class.std::vector.123" }
+%"class.llvm::TargetRegisterInfo" = type { i32 (...)**, %"class.llvm::MCRegisterInfo", %"struct.llvm::TargetRegisterInfoDesc"*, i8**, i32*, %"class.llvm::TargetRegisterClass"**, %"class.llvm::TargetRegisterClass"** }
+%"struct.llvm::TargetRegisterInfoDesc" = type { i32, i8 }
+%"class.llvm::TargetRegisterClass" = type { %"class.llvm::MCRegisterClass"*, i32*, i32*, i16*, %"class.llvm::TargetRegisterClass"**, void (%"class.llvm::ArrayRef"*, %"class.llvm::MachineFunction"*)* }
+%"class.llvm::ArrayRef" = type { i16*, i64 }
+%"class.llvm::IndexedMap" = type { %"class.std::vector.140", %"struct.std::pair.145", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.140" = type { %"struct.std::_Vector_base.141" }
+%"struct.std::_Vector_base.141" = type { %"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *>, std::allocator<std::pair<const llvm::TargetRegisterClass *, llvm::MachineOperand *> > >::_Vector_impl" = type { %"struct.std::pair.145"*, %"struct.std::pair.145"*, %"struct.std::pair.145"* }
+%"struct.std::pair.145" = type { %"class.llvm::TargetRegisterClass"*, %"class.llvm::MachineOperand"* }
+%"class.llvm::MachineOperand" = type { i8, [3 x i8], %union.anon, %"class.llvm::MachineInstr"*, %union.anon.188 }
+%union.anon = type { i32 }
+%union.anon.188 = type { %struct.anon }
+%struct.anon = type { %"class.llvm::MachineOperand"*, %"class.llvm::MachineOperand"* }
+%"struct.llvm::VirtReg2IndexFunctor" = type { i8 }
+%"class.llvm::IndexedMap.146" = type { %"class.std::vector.147", %"struct.std::pair.152", %"struct.llvm::VirtReg2IndexFunctor" }
+%"class.std::vector.147" = type { %"struct.std::_Vector_base.148" }
+%"struct.std::_Vector_base.148" = type { %"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::pair<unsigned int, unsigned int>, std::allocator<std::pair<unsigned int, unsigned int> > >::_Vector_impl" = type { %"struct.std::pair.152"*, %"struct.std::pair.152"*, %"struct.std::pair.152"* }
+%"struct.std::pair.152" = type { i32, i32 }
+%"class.llvm::BitVector" = type { i64*, i32, i32 }
+%"struct.llvm::MachineFunctionInfo" = type { i32 (...)** }
+%"class.llvm::MachineFrameInfo" = type opaque
+%"class.llvm::MachineConstantPool" = type { %"class.llvm::DataLayout"*, i32, %"class.std::vector.153", %"class.llvm::DenseSet" }
+%"class.llvm::DataLayout" = type opaque
+%"class.std::vector.153" = type { %"struct.std::_Vector_base.154" }
+%"struct.std::_Vector_base.154" = type { %"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineConstantPoolEntry, std::allocator<llvm::MachineConstantPoolEntry> >::_Vector_impl" = type { %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"*, %"class.llvm::MachineConstantPoolEntry"* }
+%"class.llvm::MachineConstantPoolEntry" = type { %union.anon.158, i32 }
+%union.anon.158 = type { %"class.llvm::Constant"* }
+%"class.llvm::Constant" = type { %"class.llvm::User" }
+%"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.159" }
+%"class.llvm::DenseMap.159" = type { %"struct.std::pair.162"*, i32, i32, i32 }
+%"struct.std::pair.162" = type { %"class.llvm::MachineConstantPoolValue"*, i8 }
+%"class.llvm::MachineConstantPoolValue" = type { i32 (...)**, %"class.llvm::Type"* }
+%"class.llvm::MachineJumpTableInfo" = type opaque
+%"class.std::vector.163" = type { %"struct.std::_Vector_base.164" }
+%"struct.std::_Vector_base.164" = type { %"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::MachineBasicBlock *, std::allocator<llvm::MachineBasicBlock *> >::_Vector_impl" = type { %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"**, %"class.llvm::MachineBasicBlock"** }
+%"class.llvm::Recycler" = type { %"class.llvm::iplist.168" }
+%"class.llvm::iplist.168" = type { %"struct.llvm::ilist_traits.169", %"struct.llvm::RecyclerStruct"* }
+%"struct.llvm::ilist_traits.169" = type { %"struct.llvm::RecyclerStruct" }
+%"struct.llvm::RecyclerStruct" = type { %"struct.llvm::RecyclerStruct"*, %"struct.llvm::RecyclerStruct"* }
+%"class.llvm::ArrayRecycler" = type { %"class.llvm::SmallVector.174" }
+%"class.llvm::SmallVector.174" = type { %"class.llvm::SmallVectorImpl.175", %"struct.llvm::SmallVectorStorage.179" }
+%"class.llvm::SmallVectorImpl.175" = type { %"class.llvm::SmallVectorTemplateBase.176" }
+%"class.llvm::SmallVectorTemplateBase.176" = type { %"class.llvm::SmallVectorTemplateCommon.177" }
+%"class.llvm::SmallVectorTemplateCommon.177" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.178" }
+%"struct.llvm::AlignedCharArrayUnion.178" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.179" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.178"] }
+%"class.llvm::Recycler.180" = type { %"class.llvm::iplist.168" }
+%"struct.llvm::ilist.181" = type { %"class.llvm::iplist.182" }
+%"class.llvm::iplist.182" = type { %"struct.llvm::ilist_traits.183", %"class.llvm::MachineBasicBlock"* }
+%"struct.llvm::ilist_traits.183" = type { %"class.llvm::ilist_half_node.1" }
+%"class.llvm::ArrayRecycler<llvm::MachineOperand, 8>::Capacity" = type { i8 }
+%"class.llvm::ConstantInt" = type { %"class.llvm::Constant", %"class.llvm::APInt" }
+%"class.llvm::APInt" = type { i32, %union.anon.189 }
+%union.anon.189 = type { i64 }
+%"class.llvm::ConstantFP" = type { %"class.llvm::Constant", %"class.llvm::APFloat" }
+%"class.llvm::APFloat" = type { %"struct.llvm::fltSemantics"*, %"union.llvm::APFloat::Significand", i16, i8 }
+%"struct.llvm::fltSemantics" = type opaque
+%"union.llvm::APFloat::Significand" = type { i64 }
+%"class.llvm::BlockAddress" = type { %"class.llvm::Constant" }
+%"class.llvm::hash_code" = type { i64 }
+%"struct.llvm::hashing::detail::hash_combine_recursive_helper" = type { [64 x i8], %"struct.llvm::hashing::detail::hash_state", i64 }
+%"struct.llvm::hashing::detail::hash_state" = type { i64, i64, i64, i64, i64, i64, i64, i64 }
+%"class.llvm::PrintReg" = type { %"class.llvm::TargetRegisterInfo"*, i32, i32 }
+%"class.llvm::PseudoSourceValue" = type { %"class.llvm::Value" }
+%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector.194" }
+%"class.llvm::SmallVector.194" = type { [28 x i8], %"struct.llvm::SmallVectorStorage.200" }
+%"struct.llvm::SmallVectorStorage.200" = type { [31 x %"struct.llvm::AlignedCharArrayUnion.198"] }
+%"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList" = type { %"struct.llvm::ArrayRecycler<llvm::MachineOperand, 8>::FreeList"* }
+%"class.llvm::ilist_iterator.202" = type { %"class.llvm::MachineInstr"* }
+%"class.llvm::TargetInstrInfo" = type { i32 (...)**, [28 x i8], i32, i32 }
+%"struct.std::pair.203" = type { i8, i8 }
+%"class.llvm::SmallVectorImpl.195" = type { %"class.llvm::SmallVectorTemplateBase.196" }
+%"class.llvm::SmallVectorTemplateBase.196" = type { %"class.llvm::SmallVectorTemplateCommon.197" }
+%"class.llvm::SmallVectorTemplateCommon.197" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.198" }
+%"class.llvm::AliasAnalysis" = type { i32 (...)**, %"class.llvm::DataLayout"*, %"class.llvm::TargetLibraryInfo"*, %"class.llvm::AliasAnalysis"* }
+%"class.llvm::TargetLibraryInfo" = type opaque
+%"struct.llvm::AliasAnalysis::Location" = type { %"class.llvm::Value"*, i64, %"class.llvm::MDNode"* }
+%"class.llvm::DIVariable" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::DIDescriptor" = type { %"class.llvm::MDNode"* }
+%"class.llvm::DIScope" = type { %"class.llvm::DIDescriptor" }
+%"class.llvm::ArrayRef.208" = type { i32*, i64 }
+%"class.llvm::SmallVector.209" = type { %"class.llvm::SmallVectorImpl.210", %"struct.llvm::SmallVectorStorage.214" }
+%"class.llvm::SmallVectorImpl.210" = type { %"class.llvm::SmallVectorTemplateBase.211" }
+%"class.llvm::SmallVectorTemplateBase.211" = type { %"class.llvm::SmallVectorTemplateCommon.212" }
+%"class.llvm::SmallVectorTemplateCommon.212" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.213" }
+%"struct.llvm::AlignedCharArrayUnion.213" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::SmallVectorStorage.214" = type { [7 x %"struct.llvm::AlignedCharArrayUnion.213"] }
+%"class.llvm::Twine" = type { %"union.llvm::Twine::Child", %"union.llvm::Twine::Child", i8, i8 }
+%"union.llvm::Twine::Child" = type { %"class.llvm::Twine"* }
+%"struct.std::random_access_iterator_tag" = type { i8 }
+
+declare void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineOperand"*)
+
+declare zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"*, i32 zeroext, i32 zeroext)
+
+define void @_ZN4llvm14MachineOperand12substPhysRegEjRKNS_18TargetRegisterInfoE(%"class.llvm::MachineOperand"* %this, i32 zeroext %Reg, %"class.llvm::TargetRegisterInfo"* %TRI) align 2 {
+entry:
+  %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 1
+  %0 = bitcast [3 x i8]* %SubReg_TargetFlags.i to i24*
+  %bf.load.i = load i24* %0, align 1
+  %bf.lshr.i = lshr i24 %bf.load.i, 12
+  %tobool = icmp eq i24 %bf.lshr.i, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %bf.cast.i = zext i24 %bf.lshr.i to i32
+  %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
+  %call3 = tail call zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"* %add.ptr, i32 zeroext %Reg, i32 zeroext %bf.cast.i)
+  %bf.load.i10 = load i24* %0, align 1
+  %bf.clear.i = and i24 %bf.load.i10, 4095
+  store i24 %bf.clear.i, i24* %0, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
+  %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
+  %1 = load i32* %RegNo.i.i, align 4, !tbaa !0
+  %cmp.i = icmp eq i32 %1, %Reg.addr.0
+  br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
+
+if.end.i:                                         ; preds = %if.end
+  %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
+  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8, !tbaa !3
+  %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
+  br i1 %tobool.i, label %if.end13.i, label %if.then3.i
+
+if.then3.i:                                       ; preds = %if.end.i
+  %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
+  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8, !tbaa !3
+  %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
+  br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
+
+if.then6.i:                                       ; preds = %if.then3.i
+  %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
+  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8, !tbaa !3
+  %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
+  br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
+
+if.then9.i:                                       ; preds = %if.then6.i
+  %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
+  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8, !tbaa !3
+  tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+if.end13.i:                                       ; preds = %if.then6.i, %if.then3.i, %if.end.i
+  store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4, !tbaa !0
+  br label %_ZN4llvm14MachineOperand6setRegEj.exit
+
+_ZN4llvm14MachineOperand6setRegEj.exit:           ; preds = %if.end, %if.then9.i, %if.end13.i
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"vtable pointer", metadata !2}
+!5 = metadata !{metadata !"long", metadata !1}
+!6 = metadata !{i64 0, i64 8, metadata !3, i64 8, i64 8, metadata !5}
+!7 = metadata !{metadata !"short", metadata !1}
+!8 = metadata !{i64 0, i64 1, metadata !1, i64 1, i64 4, metadata !0, i64 2, i64 1, metadata !1, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 3, i64 1, metadata !9, i64 4, i64 4, metadata !0, i64 4, i64 4, metadata !0, i64 8, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !5, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 8, metadata !3, i64 16, i64 4, metadata !0, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 16, i64 8, metadata !3, i64 24, i64 4, metadata !0}
+!9 = metadata !{metadata !"bool", metadata !1}
+!10 = metadata !{i8 0, i8 2}
+
+; CHECK-NOT: lbzu 3, 1(3)
diff --git a/test/CodeGen/PowerPC/pr15359.ll b/test/CodeGen/PowerPC/pr15359.ll
new file mode 100644
index 000000000000..12fa3e5ffbdd
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15359.ll
@@ -0,0 +1,20 @@
+; RUN: llc -O0 -mcpu=pwr7 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+target datalayout = "E-p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@nextIdx = external thread_local global i32
+
+define fastcc void @func() nounwind {
+entry:
+  store i32 42, i32* @nextIdx
+  ret void
+}
+
+; Verify that nextIdx has symbol type TLS.
+;
+; CHECK:    '.symtab'
+; CHECK:    'nextIdx'
+; CHECK:    'st_type', 0x6
+
diff --git a/test/CodeGen/PowerPC/pr15630.ll b/test/CodeGen/PowerPC/pr15630.ll
new file mode 100644
index 000000000000..c5ba8a4d4f04
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15630.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define weak_odr void @_D4core6atomic49__T11atomicStoreVE4core6atomic11MemoryOrder3ThThZ11atomicStoreFNaNbKOhhZv(i8* %val_arg, i8 zeroext %newval_arg) {
+entry:
+  %newval = alloca i8
+  %ordering = alloca i32, align 4
+  store i8 %newval_arg, i8* %newval
+  %tmp = load i8* %newval
+  store atomic volatile i8 %tmp, i8* %val_arg seq_cst, align 1
+  ret void
+}
+
+; CHECK: stwcx.
diff --git a/test/CodeGen/PowerPC/pr15632.ll b/test/CodeGen/PowerPC/pr15632.ll
new file mode 100644
index 000000000000..3ea83468b6d7
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr15632.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare void @other(ppc_fp128 %tmp70)
+
+define void @bug() {
+entry:
+  %tmp70 = frem ppc_fp128 0xM00000000000000000000000000000000, undef
+  call void @other(ppc_fp128 %tmp70)
+  unreachable
+}
+
+; CHECK: bl fmodl
diff --git a/test/CodeGen/PowerPC/pwr3-6x.ll b/test/CodeGen/PowerPC/pwr3-6x.ll
new file mode 100644
index 000000000000..a9cfe412fd84
--- /dev/null
+++ b/test/CodeGen/PowerPC/pwr3-6x.ll
@@ -0,0 +1,14 @@
+; Test basic support for some older processors.
+
+;RUN: llc < %s -march=ppc64 -mcpu=pwr3 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr4 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5 | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr5x | FileCheck %s
+;RUN: llc < %s -march=ppc64 -mcpu=pwr6x | FileCheck %s
+
+define void @foo() {
+entry:
+  ret void
+}
+
+; CHECK: @foo
diff --git a/test/CodeGen/PowerPC/quadint-return.ll b/test/CodeGen/PowerPC/quadint-return.ll
new file mode 100644
index 000000000000..03499915e78e
--- /dev/null
+++ b/test/CodeGen/PowerPC/quadint-return.ll
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: llc -O0 -debug -o - < %s 2>&1 | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i128 @foo() nounwind {
+entry:
+  %x = alloca i128, align 16
+  store i128 27, i128* %x, align 16
+  %0 = load i128* %x, align 16
+  ret i128 %0
+}
+
+; CHECK: ********** Function: foo
+; CHECK: ********** FAST REGISTER ALLOCATION **********
+; CHECK: %X3<def> = COPY %vreg
+; CHECK-NEXT: %X4<def> = COPY %vreg
+; CHECK-NEXT: BLR
diff --git a/test/CodeGen/PowerPC/r31.ll b/test/CodeGen/PowerPC/r31.ll
new file mode 100644
index 000000000000..7ce12f600b41
--- /dev/null
+++ b/test/CodeGen/PowerPC/r31.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define i64 @foo(i64 %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mr 31, 3
+
+return:                                           ; preds = %entry
+  ret i64 %a
+}
+
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
new file mode 100644
index 000000000000..89705faa46e9
--- /dev/null
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare double @llvm.sqrt.f64(double)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %r = fdiv double %a, %x
+  ret double %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define double @foof(double %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %y = fpext float %x to double
+  %r = fdiv double %a, %y
+  ret double %r
+
+; CHECK: @foof
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @food(float %a, double %b) nounwind {
+entry:
+  %x = call double @llvm.sqrt.f64(double %b)
+  %y = fptrunc double %x to float
+  %r = fdiv float %a, %y
+  ret float %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: frsp
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define float @goo(float %a, float %b) nounwind {
+entry:
+  %x = call float @llvm.sqrt.f32(float %b)
+  %r = fdiv float %a, %x
+  ret float %r
+
+; CHECK: @goo
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+  %r = fdiv <4 x float> %a, %x
+  ret <4 x float> %r
+
+; CHECK: @hoo
+; CHECK: vrsqrtefp
+
+; CHECK-SAFE: @hoo
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
+define double @foo2(double %a, double %b) nounwind {
+entry:
+  %r = fdiv double %a, %b
+  ret double %r
+
+; CHECK: @foo2
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @goo2(float %a, float %b) nounwind {
+entry:
+  %r = fdiv float %a, %b
+  ret float %r
+
+; CHECK: @goo2
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %r = fdiv <4 x float> %a, %b
+  ret <4 x float> %r
+
+; CHECK: @hoo2
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo2
+; CHECK-SAFE-NOT: vrefp
+; CHECK-SAFE: blr
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+  %r = call double @llvm.sqrt.f64(double %a)
+  ret double %r
+
+; CHECK: @foo3
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: blr
+
+; CHECK-SAFE: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define float @goo3(float %a) nounwind {
+entry:
+  %r = call float @llvm.sqrt.f32(float %a)
+  ret float %r
+
+; CHECK: @goo3
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: blr
+
+; CHECK-SAFE: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo3(<4 x float> %a) nounwind {
+entry:
+  %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+  ret <4 x float> %r
+
+; CHECK: @hoo3
+; CHECK: vrsqrtefp
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo3
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll
index 7efdbe9634fe..31b6d4aa03bc 100644
--- a/test/CodeGen/PowerPC/rlwimi3.ll
+++ b/test/CodeGen/PowerPC/rlwimi3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \
 ; RUN:   grep "Number of machine instrs printed" | grep 12
 
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
new file mode 100644
index 000000000000..b210a6bda8bf
--- /dev/null
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test1:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: frim 1, 1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test2:
+; CHECK: frim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: frim 1, 1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test3:
+; CHECK-NOT: frin
+
+; CHECK-FM: test3:
+; CHECK-FM: frin 1, 1
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test4:
+; CHECK-NOT: frin
+
+; CHECK-FM: test4:
+; CHECK-FM: frin 1, 1
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test5:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: frip 1, 1
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test6:
+; CHECK: frip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: frip 1, 1
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test9:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: friz 1, 1
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test10:
+; CHECK: friz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: friz 1, 1
+}
+
+declare double @trunc(double) nounwind readnone
+
+define float @test11(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK: test11:
+; CHECK-NOT: frin
+
+; CHECK-FM: test11:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test12(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK: test12:
+; CHECK-NOT: frin
+
+; CHECK-FM: test12:
+; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]]
+; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]]
+; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2
+; CHECK-FM: mtfsb1 6
+; CHECK-FM: .LBB[[BB]]_2:
+; CHECK-FM: blr
+}
+
+declare double @rint(double) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
new file mode 100644
index 000000000000..d03ee8738eea
--- /dev/null
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -0,0 +1,101 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s
+
+@aa = external global [256 x [256 x double]], align 32
+@bb = external global [256 x [256 x double]], align 32
+@cc = external global [256 x [256 x double]], align 32
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 1
+@X = external global [16000 x double], align 32
+@Y = external global [16000 x double], align 32
+@Z = external global [16000 x double], align 32
+@U = external global [16000 x double], align 32
+@V = external global [16000 x double], align 32
+@.str137 = external hidden unnamed_addr constant [14 x i8], align 1
+
+declare void @check(i32 signext) nounwind
+
+declare signext i32 @printf(i8* nocapture, ...) nounwind
+
+declare signext i32 @init(i8*) nounwind
+
+define signext i32 @s000() nounwind {
+entry:
+  %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.cond2.preheader
+
+; CHECK: @s000
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.cond2.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ]
+  %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
+  %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+  %0 = bitcast double* %arrayidx to <1 x double>*
+  %1 = load <1 x double>* %0, align 32, !tbaa !0
+  %add = fadd <1 x double> %1, <double 1.000000e+00>
+  %2 = bitcast double* %arrayidx6 to <1 x double>*
+  store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0
+  %indvars.iv.next.322 = or i64 %indvars.iv, 4
+  %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
+  %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
+  %3 = bitcast double* %arrayidx.4 to <1 x double>*
+  %4 = load <1 x double>* %3, align 32, !tbaa !0
+  %add.4 = fadd <1 x double> %4, <double 1.000000e+00>
+  %5 = bitcast double* %arrayidx6.4 to <1 x double>*
+  store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0
+  %indvars.iv.next.726 = or i64 %indvars.iv, 8
+  %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
+  %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
+  %6 = bitcast double* %arrayidx.8 to <1 x double>*
+  %7 = load <1 x double>* %6, align 32, !tbaa !0
+  %add.8 = fadd <1 x double> %7, <double 1.000000e+00>
+  %8 = bitcast double* %arrayidx6.8 to <1 x double>*
+  store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0
+  %indvars.iv.next.1130 = or i64 %indvars.iv, 12
+  %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
+  %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
+  %9 = bitcast double* %arrayidx.12 to <1 x double>*
+  %10 = load <1 x double>* %9, align 32, !tbaa !0
+  %add.12 = fadd <1 x double> %10, <double 1.000000e+00>
+  %11 = bitcast double* %arrayidx6.12 to <1 x double>*
+  store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; All of the loads should come before all of the stores.
+; CHECK: mtctr
+; CHECK: stfd
+; CHECK-NOT: lfd
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body4
+  %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:                                        ; preds = %for.end
+  %call11 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call11, %call1
+  %conv = sitofp i64 %sub to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 signext 1)
+  ret i32 0
+}
+
+declare i64 @clock() nounwind
+
+declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/sdag-ppcf128.ll b/test/CodeGen/PowerPC/sdag-ppcf128.ll
new file mode 100644
index 000000000000..535ece6d3dfe
--- /dev/null
+++ b/test/CodeGen/PowerPC/sdag-ppcf128.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+;
+; PR14751: Unsupported type in SelectionDAG::getConstantFP()
+
+define fastcc void @_D3std4math4sqrtFNaNbNfcZc() {
+entry:
+  br i1 undef, label %if, label %else
+; CHECK: cmplwi 0, 3, 0
+if:                                               ; preds = %entry
+  store { ppc_fp128, ppc_fp128 } zeroinitializer, { ppc_fp128, ppc_fp128 }* undef
+  ret void
+
+else:                                             ; preds = %entry
+  unreachable
+}
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
new file mode 100644
index 000000000000..7ea35dafc3fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-NOAV %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] }
+%struct.__sigset_t = type { [16 x i64] }
+
+@env_sigill = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @foo() #0 {
+entry:
+  call void @llvm.eh.sjlj.longjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  unreachable
+
+; CHECK: @foo
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: ld 31, 0([[REG]])
+; CHECK: ld [[REG2:[0-9]+]], 8([[REG]])
+; CHECK: ld 1, 16([[REG]])
+; CHECK: mtctr [[REG2]]
+; CHECK: ld 2, 24([[REG]])
+; CHECK: bctr
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.eh.sjlj.longjmp(i8*) #1
+
+define signext i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
+  %1 = call i8* @llvm.stacksave()
+  store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+  %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+if.else:                                          ; preds = %entry
+  call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval
+  ret i32 %3
+
+; FIXME: We should be saving VRSAVE on Darwin, but we're not!
+
+; CHECK: @main
+; CHECK: std
+; Make sure that we're not saving VRSAVE on non-Darwin:
+; CHECK-NOT: mfspr
+; CHECK: stfd
+; CHECK: stvx
+
+; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha
+; CHECK: std 31, env_sigill@toc@l([[REG]])
+; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l
+; CHECK: std [[REG]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
+; CHECK: std 1, 16([[REG]])
+; CHECK: std 2, 24([[REG]])
+; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: li 3, 1
+; CHECK: #EH_SjLj_Setup	.LBB1_1
+; CHECK: b .LBB1_2
+
+; CHECK: .LBB1_1:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
+; CHECK: .LBB1_2:
+
+; CHECK: lfd
+; CHECK: lvx
+; CHECK: ld
+; CHECK: blr
+
+; CHECK-NOAV: @main
+; CHECK-NOAV-NOT: stvx
+; CHECK-NOAV: bcl
+; CHECK-NOAV: mflr
+; CHECK-NOAV: bl foo
+; CHECK-NOAV-NOT: lvx
+; CHECK-NOAV: blr
+}
+
+declare i8* @llvm.frameaddress(i32) #2
+
+declare i8* @llvm.stacksave() #3
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/stdux-constuse.ll b/test/CodeGen/PowerPC/stdux-constuse.ll
new file mode 100644
index 000000000000..e62d438014ee
--- /dev/null
+++ b/test/CodeGen/PowerPC/stdux-constuse.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i64 %add, i64* %ptr) nounwind {
+entry:
+  %p1 = getelementptr i64* %ptr, i64 144115188075855
+  br label %for.cond2.preheader
+
+for.cond2.preheader:
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+  %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
+  %i6 = getelementptr i64* %i0, i64 400000
+  %i7 = getelementptr i64* %i6, i64 300000
+  %i8 = getelementptr i64* %i6, i64 200000
+  %i9 = getelementptr i64* %i6, i64 100000
+  store i64 %add, i64* %i6, align 32
+  store i64 %add, i64* %i7, align 32
+  store i64 %add, i64* %i8, align 32
+  store i64 %add, i64* %i9, align 32
+  %lsr.iv.next = add i32 %lsr.iv, -16
+  %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; Make sure that we generate the most compact form of this loop with no
+; unnecessary moves
+; CHECK: @test1
+; CHECK: mtctr
+; CHECK: stdux
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: stdx
+; CHECK-NEXT: bdnz
+
+for.end:
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll
index c49b25cc2303..7786fc17eacb 100644
--- a/test/CodeGen/PowerPC/stfiwx-2.ll
+++ b/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -1,11 +1,14 @@
-; This cannot be a stfiwx
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 | FileCheck %s
 
 define void @test(float %F, i8* %P) {
 	%I = fptosi float %F to i32
 	%X = trunc i32 %I to i8
 	store i8 %X, i8* %P
 	ret void
+; CHECK: fctiwz 0, 1
+; CHECK: stfiwx 0, 0, 4
+; CHECK: lwz 4, 12(1)
+; CHECK: stb 4, 0(3)
+; CHECK: blr
 }
 
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
new file mode 100644
index 000000000000..538ed24fbc46
--- /dev/null
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @stbu(i8* %base, i8 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %val, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu(i16* %base, i16 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %val, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu(i32* %base, i32 zeroext %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %val, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i8* @stbu8(i8* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 16
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbu
+; CHECK: %entry
+; CHECK-NEXT: stbu
+; CHECK-NEXT: blr
+
+define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i8
+  %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+  store i8 %conv, i8* %arrayidx, align 1
+  ret i8* %arrayidx
+}
+; CHECK: @stbux
+; CHECK: %entry
+; CHECK-NEXT: stbux
+; CHECK-NEXT: blr
+
+define i16* @sthu8(i16* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 16
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthu
+; CHECK: %entry
+; CHECK-NEXT: sthu
+; CHECK-NEXT: blr
+
+define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i16
+  %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+  store i16 %conv, i16* %arrayidx, align 2
+  ret i16* %arrayidx
+}
+; CHECK: @sthux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: sthux
+; CHECK-NEXT: blr
+
+define i32* @stwu8(i32* %base, i64 %val) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 16
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwu
+; CHECK: %entry
+; CHECK-NEXT: stwu
+; CHECK-NEXT: blr
+
+define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %conv = trunc i64 %val to i32
+  %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+  store i32 %conv, i32* %arrayidx, align 4
+  ret i32* %arrayidx
+}
+; CHECK: @stwux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stwux
+; CHECK-NEXT: blr
+
+define i64* @stdu(i64* %base, i64 %val) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 16
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdu
+; CHECK: %entry
+; CHECK-NEXT: stdu
+; CHECK-NEXT: blr
+
+define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind {
+entry:
+  %arrayidx = getelementptr inbounds i64* %base, i64 %offset
+  store i64 %val, i64* %arrayidx, align 8
+  ret i64* %arrayidx
+}
+; CHECK: @stdux
+; CHECK: %entry
+; CHECK-NEXT: sldi
+; CHECK-NEXT: stdux
+; CHECK-NEXT: blr
+
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index 884d3a89d15a..2a17e740ea01 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads.  This test case will
-; need to be revised when that is fixed.
-
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -118,8 +114,8 @@ entry:
   ret i32 %add13
 
 ; CHECK: lha {{[0-9]+}}, 126(1)
-; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lha {{[0-9]+}}, 132(1)
+; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
 ; CHECK: lwz {{[0-9]+}}, 144(1)
 ; CHECK: lwz {{[0-9]+}}, 152(1)
@@ -209,19 +205,11 @@ entry:
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
-; CHECK: lbz {{[0-9]+}}, 149(1)
-; CHECK: lbz {{[0-9]+}}, 150(1)
-; CHECK: lbz {{[0-9]+}}, 147(1)
-; CHECK: lbz {{[0-9]+}}, 148(1)
-; CHECK: lbz {{[0-9]+}}, 133(1)
-; CHECK: lbz {{[0-9]+}}, 134(1)
 ; CHECK: lha {{[0-9]+}}, 126(1)
+; CHECK: lha {{[0-9]+}}, 133(1)
 ; CHECK: lbz {{[0-9]+}}, 119(1)
 ; CHECK: lwz {{[0-9]+}}, 140(1)
-; CHECK: lhz {{[0-9]+}}, 154(1)
-; CHECK: lhz {{[0-9]+}}, 156(1)
-; CHECK: lbz {{[0-9]+}}, 163(1)
-; CHECK: lbz {{[0-9]+}}, 164(1)
-; CHECK: lbz {{[0-9]+}}, 161(1)
-; CHECK: lbz {{[0-9]+}}, 162(1)
+; CHECK: lwz {{[0-9]+}}, 147(1)
+; CHECK: lwz {{[0-9]+}}, 154(1)
+; CHECK: lwz {{[0-9]+}}, 161(1)
 }
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index ef706af95d65..54de6060d0f0 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
 
-; FIXME: The code generation for packed structs is very poor because the
-; PowerPC target wrongly rejects all unaligned loads.  This test case will
-; need to be revised when that is fixed.
-
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -63,13 +59,13 @@ entry:
   %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7)
   ret i32 %call
 
-; CHECK: ld 9, 128(31)
-; CHECK: ld 8, 136(31)
-; CHECK: ld 7, 144(31)
-; CHECK: lwz 6, 152(31)
-; CHECK: lwz 5, 160(31)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: ld 9, 112(31)
+; CHECK: ld 8, 120(31)
+; CHECK: ld 7, 128(31)
+; CHECK: lwz 6, 136(31)
+; CHECK: lwz 5, 144(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -109,8 +105,8 @@ entry:
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
-; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lha {{[0-9]+}}, 68(1)
+; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
 ; CHECK: lwz {{[0-9]+}}, 80(1)
 ; CHECK: lwz {{[0-9]+}}, 88(1)
@@ -155,10 +151,10 @@ entry:
 ; CHECK: ld 9, 96(1)
 ; CHECK: ld 8, 88(1)
 ; CHECK: ld 7, 80(1)
-; CHECK: lwz 6, 152(31)
+; CHECK: lwz 6, 136(31)
 ; CHECK: ld 5, 64(1)
-; CHECK: lhz 4, 168(31)
-; CHECK: lbz 3, 176(31)
+; CHECK: lhz 4, 152(31)
+; CHECK: lbz 3, 160(31)
 }
 
 define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
@@ -195,19 +191,11 @@ entry:
 ; CHECK: std 5, 64(1)
 ; CHECK: sth 4, 62(1)
 ; CHECK: stb 3, 55(1)
-; CHECK: lbz {{[0-9]+}}, 85(1)
-; CHECK: lbz {{[0-9]+}}, 86(1)
-; CHECK: lbz {{[0-9]+}}, 83(1)
-; CHECK: lbz {{[0-9]+}}, 84(1)
-; CHECK: lbz {{[0-9]+}}, 69(1)
-; CHECK: lbz {{[0-9]+}}, 70(1)
 ; CHECK: lha {{[0-9]+}}, 62(1)
+; CHECK: lha {{[0-9]+}}, 69(1)
 ; CHECK: lbz {{[0-9]+}}, 55(1)
 ; CHECK: lwz {{[0-9]+}}, 76(1)
-; CHECK: lhz {{[0-9]+}}, 90(1)
-; CHECK: lhz {{[0-9]+}}, 92(1)
-; CHECK: lbz {{[0-9]+}}, 99(1)
-; CHECK: lbz {{[0-9]+}}, 100(1)
-; CHECK: lbz {{[0-9]+}}, 97(1)
-; CHECK: lbz {{[0-9]+}}, 98(1)
+; CHECK: lwz {{[0-9]+}}, 83(1)
+; CHECK: lwz {{[0-9]+}}, 90(1)
+; CHECK: lwz {{[0-9]+}}, 97(1)
 }
diff --git a/test/CodeGen/PowerPC/stubs.ll b/test/CodeGen/PowerPC/stubs.ll
index 4889263b4c4e..cfcc50b7a876 100644
--- a/test/CodeGen/PowerPC/stubs.ll
+++ b/test/CodeGen/PowerPC/stubs.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK: 	.section	__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
 ; CHECK: ___floatditf$stub:
 ; CHECK: 	.indirect_symbol ___floatditf
-; CHECK: 	lis r11,ha16(___floatditf$lazy_ptr)
-; CHECK: 	lwzu r12,lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: 	lis r11, ha16(___floatditf$lazy_ptr)
+; CHECK: 	lwzu r12, lo16(___floatditf$lazy_ptr)(r11)
 ; CHECK: 	mtctr r12
 ; CHECK: 	bctr
 ; CHECK: 	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
index 897bfc6d6caa..e0bd04345439 100644
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
new file mode 100644
index 000000000000..91ff5797389b
--- /dev/null
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple="powerpc-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC32
+; RUN: llc -mtriple="powerpc64-unknown-linux-gnu" < %s | FileCheck %s --check-prefix=PPC64
+; PR15332
+
+define void @regalloc() nounwind {
+entry:
+	%0 = add i32 1, 2
+	ret void
+}
+; PPC32: regalloc:
+; PPC32-NOT: stwu 1, -{{[0-9]+}}(1)
+; PPC32: blr
+
+; PPC64: regalloc:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @smallstack() nounwind {
+entry:
+	%0 = alloca i8, i32 4
+	ret void
+}
+; PPC32: smallstack:
+; PPC32: stwu 1, -16(1)
+
+; PPC64: smallstack:
+; PPC64-NOT: stdu 1, -{{[0-9]+}}(1)
+; PPC64: blr
+
+define void @bigstack() nounwind {
+entry:
+	%0 = alloca i8, i32 230
+	ret void
+}
+; PPC32: bigstack:
+; PPC32: stwu 1, -240(1)
+
+; PPC64: bigstack:
+; PPC64: stdu 1, -352(1)
diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll
new file mode 100644
index 000000000000..20d8fe46ea17
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-2.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd10.0"
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
+
+@a = thread_local global i32 0, align 4
+
+;CHECK:          localexec:
+define i32 @localexec() nounwind {
+entry:
+;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
+;CHECK-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
+  store i32 42, i32* @a, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
new file mode 100644
index 000000000000..00b537d5325b
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-gd-obj.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage using
+; the general dynamic model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
+; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
+; for the call to __tls_get_addr.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000052
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000050
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000006b
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
+; CHECK-NEXT:  'r_type', 0x0000000a
+
diff --git a/test/CodeGen/PowerPC/tls-gd.ll b/test/CodeGen/PowerPC/tls-gd.ll
new file mode 100644
index 000000000000..5f0ef9a050da
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-gd.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage using
+; the general dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l
+; CHECK:      bl __tls_get_addr(a@tlsgd)
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
new file mode 100644
index 000000000000..3600cc52ba54
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie-obj.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage
+; using the initial-exec model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+; accessing external variable a.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x0000005a
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000058
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000043
+
diff --git a/test/CodeGen/PowerPC/tls-ie.ll b/test/CodeGen/PowerPC/tls-ie.ll
new file mode 100644
index 000000000000..c5cfba7b3f7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage
+; using the initial-exec model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: addis [[REG1:[0-9]+]], 2, a@got@tprel@ha
+; CHECK: ld [[REG2:[0-9]+]], a@got@tprel@l([[REG1]])
+; CHECK: add {{[0-9]+}}, [[REG2]], a@tls
+
diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll
new file mode 100644
index 000000000000..4399b330ea47
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-2.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s
+
+; Test peephole optimization for thread-local storage using the
+; local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK:      bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
new file mode 100644
index 000000000000..c521ae405f46
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld-obj.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage using
+; the local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TLSLD16_HA, R_PPC64_GOT_TLSLD16_LO,
+; R_PPC64_TLSLD, R_PPC64_DTPREL16_HA, and R_PPC64_DTPREL16_LO for
+; accessing external variable a, and R_PPC64_REL24 for the call to
+; __tls_get_addr.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000056
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000054
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000006c
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
+; CHECK-NEXT:  'r_type', 0x0000000a
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000004d
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x0000004b
+
diff --git a/test/CodeGen/PowerPC/tls-ld.ll b/test/CodeGen/PowerPC/tls-ld.ll
new file mode 100644
index 000000000000..db02a56f6a22
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ld.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage using
+; the local dynamic model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = hidden thread_local global i32 0, align 4
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK:      addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
+; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; CHECK:      bl __tls_get_addr(a@tlsld)
+; CHECK-NEXT: nop
+; CHECK:      addis [[REG2:[0-9]+]], 3, a@dtprel@ha
+; CHECK-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 713893bf5862..2daa60ab37f2 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -1,16 +1,21 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-freebsd10.0"
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc -O0 < %s -march=ppc64 | FileCheck -check-prefix=OPT0 %s
+; RUN: llc -O1 < %s -march=ppc64 | FileCheck -check-prefix=OPT1 %s
 
 @a = thread_local global i32 0, align 4
 
-;CHECK:          localexec:
+;OPT0:          localexec:
+;OPT1:          localexec:
 define i32 @localexec() nounwind {
 entry:
-;CHECK:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
-;CHECK-NEXT:     li [[REG2:[0-9]+]], 42
-;CHECK-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
-;CHECK-NEXT:     stw [[REG2]], 0([[REG1]])
+;OPT0:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT0-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT0-NEXT:     addi [[REG1]], [[REG1]], a@tprel@l
+;OPT0:          stw [[REG2]], 0([[REG1]])
+;OPT1:          addis [[REG1:[0-9]+]], 13, a@tprel@ha
+;OPT1-NEXT:     li [[REG2:[0-9]+]], 42
+;OPT1-NEXT:     stw [[REG2]], a@tprel@l([[REG1]])
   store i32 42, i32* @a, align 4
   ret i32 0
 }
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
new file mode 100644
index 000000000000..169bd787c0c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc void @copy_to_conceal() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.end210
+
+if.then:                                          ; preds = %entry
+  br label %vector.body.i
+
+vector.body.i:                                    ; preds = %vector.body.i, %if.then
+  %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ]
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2
+  br label %vector.body.i
+
+if.end210:                                        ; preds = %entry
+  ret void
+
+; This will generate two align-1 i64 stores. Make sure that they are
+; indexed stores and not in r+i form (which require the offset to be
+; a multiple of 4).
+; CHECK: @copy_to_conceal
+; CHECK: stdx {{[0-9]+}}, 0,
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll
new file mode 100644
index 000000000000..d05080338f33
--- /dev/null
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+
+define void @foo1(i16* %p, i16* %r) nounwind {
+entry:
+  %v = load i16* %p, align 1
+  store i16 %v, i16* %r, align 1
+  ret void
+
+; CHECK: @foo1
+; CHECK: lhz
+; CHECK: sth
+}
+
+define void @foo2(i32* %p, i32* %r) nounwind {
+entry:
+  %v = load i32* %p, align 1
+  store i32 %v, i32* %r, align 1
+  ret void
+
+; CHECK: @foo2
+; CHECK: lwz
+; CHECK: stw
+}
+
+define void @foo3(i64* %p, i64* %r) nounwind {
+entry:
+  %v = load i64* %p, align 1
+  store i64 %v, i64* %r, align 1
+  ret void
+
+; CHECK: @foo3
+; CHECK: ld
+; CHECK: std
+}
+
+define void @foo4(float* %p, float* %r) nounwind {
+entry:
+  %v = load float* %p, align 1
+  store float %v, float* %r, align 1
+  ret void
+
+; CHECK: @foo4
+; CHECK: lfs
+; CHECK: stfs
+}
+
+define void @foo5(double* %p, double* %r) nounwind {
+entry:
+  %v = load double* %p, align 1
+  store double %v, double* %r, align 1
+  ret void
+
+; CHECK: @foo5
+; CHECK: lfd
+; CHECK: stfd
+}
+
+define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
+entry:
+  %v = load <4 x float>* %p, align 1
+  store <4 x float> %v, <4 x float>* %r, align 1
+  ret void
+
+; These loads and stores are legalized into aligned loads and stores
+; using aligned stack slots.
+; CHECK: @foo6
+; CHECK: ld
+; CHECK: ld
+; CHECK: std
+; CHECK: std
+}
+
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll
new file mode 100644
index 000000000000..e65148aff03a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -0,0 +1,149 @@
+; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
+
+; Test optimizations of build_vector for 6-bit immediates.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%v4i32 = type <4 x i32>
+%v8i16 = type <8 x i16>
+%v16i8 = type <16 x i8>
+
+define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], 9
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_even:
+; CHECK: vspltisw [[REG1:[0-9]+]], -14
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_even:
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_even:
+; CHECK: vspltish [[REG1:[0-9]+]], -16
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], 8
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_even:
+; CHECK: vspltisb [[REG1:[0-9]+]], -9
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
+
+define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], 11
+; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], -11
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], -15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], 1
+; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], -1
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
+
diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll
index 1769be957ac4..90f0480d6ad2 100644
--- a/test/CodeGen/PowerPC/varargs.ll
+++ b/test/CodeGen/PowerPC/varargs.ll
@@ -8,15 +8,16 @@ define i8* @test1(i8** %foo) nounwind {
 }
 
 ; P32: test1:
-; P32: 	lwz r4, 0(r3)
-; P32:	addi r5, r4, 4
-; P32:	stw r5, 0(r3)
-; P32:	lwz r3, 0(r4)
-; P32:	blr 
+; P32: lwz r2, 0(r3)
+; P32: addi r4, r2, 4
+; P32: stw r4, 0(r3)
+; P32: lwz r3, 0(r2)
+; P32: blr 
 
 ; P64: test1:
-; P64: ld r4, 0(r3)
-; P64: addi r5, r4, 8
-; P64: std r5, 0(r3)
-; P64: ld r3, 0(r4)
-; P64: blr
+; P64: ld r2, 0(r3)
+; P64: addi r4, r2, 8
+; P64: std r4, 0(r3)
+; P64: ld r3, 0(r2)
+; P64: blr 
+
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 3180f464d125..eb41667610cd 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -54,7 +54,7 @@ entry:
 }
 ; CHECK:     v16si8_cmp_ne:
 ; CHECK:     vcmpequb [[RET:[0-9]+]], 2, 3
-; CHECK-NOR: vnor     2, [[RET]], [[RET]]
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index 399f19f8d2e2..e4799e50e6ad 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
+; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
@@ -13,32 +16,71 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
 	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp13, <4 x float>* %P3
 	ret void
+
+; CHECK: test1:
+; CHECK-NOT: CPI
 }
 
 define <4 x i32> @test_30() nounwind {
 	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
+
+; CHECK: test_30:
+; CHECK: vspltisw
+; CHECK-NEXT: vadduwm
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_29() nounwind {
 	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
+
+; CHECK: test_29:
+; CHECK: vspltisw
+; CHECK-NEXT: vspltisw
+; CHECK-NEXT: vsubuwm
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_n30() nounwind {
 	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
+
+; CHECK: test_n30:
+; CHECK: vspltish
+; CHECK-NEXT: vadduhm
+; CHECK-NEXT: blr
 }
 
 define <16 x i8> @test_n104() nounwind {
 	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
+
+; CHECK: test_n104:
+; CHECK: vspltisb
+; CHECK-NEXT: vslb
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_vsldoi() nounwind {
 	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
+
+; CHECK: test_vsldoi:
+; CHECK: vspltisw
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <8 x i16> @test_vsldoi_65023() nounwind {
 	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
+
+; CHECK: test_vsldoi_65023:
+; CHECK: vspltish
+; CHECK-NEXT: vsldoi
+; CHECK-NEXT: blr
 }
 
 define <4 x i32> @test_rol() nounwind {
 	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
+
+; CHECK: test_rol:
+; CHECK: vspltisw
+; CHECK-NEXT: vrlw
+; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 201c15b9c735..998645d90da6 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mcpu=pwr6 -mattr=+altivec -code-model=small < %s | FileCheck %s
 
 ; Check vector extend load expansion with altivec enabled.
 
@@ -15,55 +15,9 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
   ret <16 x i8> %c
 }
 ; CHECK: v16si8_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lbz
-; CHECK: stb
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslb
+; CHECK: vsrab
+; CHECK: blr 
 
 ; The zero extend uses a more clever logic: a vector splat
 ; and a logic and to set higher bits to 0.
@@ -83,31 +37,9 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
   ret <8 x i16> %c
 }
 ; CHECK: v8si16_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lhz
-; CHECK: sth
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslh
+; CHECK: vsrah
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg, but instead of creating the mask
 ; with a splat, loads it from memory.
@@ -129,19 +61,9 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
   ret <4 x i32> %c
 }
 ; CHECK: v4si32_sext_in_reg:
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: lha
-; CHECK: stw
-; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vslw
+; CHECK: vsraw
+; CHECK: blr 
 
 ; Same as v8si16_sext_in_reg.
 define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 80f4de4a1728..53bc75dd1078 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -7,6 +6,9 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp3
 }
+; CHECK: test_v4i32:
+; CHECK: vmsumuhm
+; CHECK-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -14,6 +16,9 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
 	ret <8 x i16> %tmp3
 }
+; CHECK: test_v8i16:
+; CHECK: vmladduhm
+; CHECK-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -21,3 +26,21 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
 	ret <16 x i8> %tmp3
 }
+; CHECK: test_v16i8:
+; CHECK: vmuloub
+; CHECK: vmuleub
+; CHECK-NOT: mullw
+
+define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
+	%tmp = load <4 x float>* %X
+	%tmp2 = load <4 x float>* %Y
+	%tmp3 = fmul <4 x float> %tmp, %tmp2
+	ret <4 x float> %tmp3
+}
+; Check the creation of a negative zero float vector by creating a vector of
+; all bits set and shifting it 31 bits to left, resulting a an vector of 
+; 4 x 0x80000000 (-0.0 as float).
+; CHECK: test_float:
+; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
+; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
+; CHECK: vmaddfp
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
new file mode 100644
index 000000000000..7c55638620a9
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector round to single-precision toward -infinity (vrfim)
+; instruction generation using Altivec.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+define <2 x double> @floor_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: floor_v2f64:
+; CHECK: frim
+; CHECK: frim
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+define <4 x double> @floor_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: floor_v4f64:
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+; CHECK: frim
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: ceil_v2f64:
+; CHECK: frip
+; CHECK: frip
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: ceil_v4f64:
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+; CHECK: frip
+
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: trunc_v2f64:
+; CHECK: friz
+; CHECK: friz
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: trunc_v4f64:
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+; CHECK: friz
+
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+; CHECK: nearbyint_v2f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+; CHECK: nearbyint_v4f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+define <4 x float> @floor_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: floor_v4f32:
+; CHECK: vrfim
+
+declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+define <8 x float> @floor_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: floor_v8f32:
+; CHECK: vrfim
+; CHECK: vrfim
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: ceil_v4f32:
+; CHECK: vrfip
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: ceil_v8f32:
+; CHECK: vrfip
+; CHECK: vrfip
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: trunc_v4f32:
+; CHECK: vrfiz
+
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: trunc_v8f32:
+; CHECK: vrfiz
+; CHECK: vrfiz
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+; CHECK: nearbyint_v4f32:
+; CHECK: vrfin
+
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+; CHECK: nearbyint_v8f32:
+; CHECK: vrfin
+; CHECK: vrfin
diff --git a/test/CodeGen/PowerPC/vec_select.ll b/test/CodeGen/PowerPC/vec_select.ll
new file mode 100644
index 000000000000..4ad0acca0067
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_select.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=powerpc64-linux-gnu -mattr=+altivec | FileCheck %s
+
+; CHECK: vsel_float
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
diff --git a/test/CodeGen/PowerPC/vrsave-spill.ll b/test/CodeGen/PowerPC/vrsave-spill.ll
new file mode 100644
index 000000000000..c73206d8fc86
--- /dev/null
+++ b/test/CodeGen/PowerPC/vrsave-spill.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin -mcpu=g5 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-apple-darwin"
+
+define <4 x float> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %c = fadd <4 x float> %a, %b
+  %d = fmul <4 x float> %c, %a
+  call void asm sideeffect "", "~{VRsave}"() nounwind
+  br label %return
+
+; CHECK: @foo
+; CHECK: mfspr r{{[0-9]+}}, 256
+; CHECK: mtspr 256, r{{[0-9]+}}
+
+return:                                           ; preds = %entry
+  ret <4 x float> %d
+}
+
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
index 7641017c434e..9fb3d03477c9 100644
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -13,7 +13,7 @@ entry:
   ret void
 }
 
-; CHECK: stvx 2, 0, 0
-; CHECK: lvx 2, 0, 0
+; CHECK: stvx 2, 1,
+; CHECK: lvx 2, 1,
 
 declare void @foo(i32*)
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
new file mode 100644
index 000000000000..114f9e74474f
--- /dev/null
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @v4i32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+
+define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32>  %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @v4f32_kernel_arg
+; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40
+define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
+entry:
+  store <4 x float> %in, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.v4i32.ll
new file mode 100644
index 000000000000..ac4a87417bde
--- /dev/null
+++ b/test/CodeGen/R600/add.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = add <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
new file mode 100644
index 000000000000..afefcd9f78b0
--- /dev/null
+++ b/test/CodeGen/R600/alu-split.ll
@@ -0,0 +1,850 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ALU
+;CHECK: ALU
+;CHECK: ALU
+;CHECK-NOT: ALU
+
+define void @main() #0 {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fcmp une float 0x4016F2B020000000, %5
+  %7 = select i1 %6, float 1.000000e+00, float 0.000000e+00
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = fcmp une float 0x401FDCC640000000, %9
+  %11 = select i1 %10, float 1.000000e+00, float 0.000000e+00
+  %12 = fsub float -0.000000e+00, %7
+  %13 = fptosi float %12 to i32
+  %14 = fsub float -0.000000e+00, %11
+  %15 = fptosi float %14 to i32
+  %16 = bitcast i32 %13 to float
+  %17 = bitcast i32 %15 to float
+  %18 = bitcast float %16 to i32
+  %19 = bitcast float %17 to i32
+  %20 = or i32 %18, %19
+  %21 = bitcast i32 %20 to float
+  %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+  %23 = extractelement <4 x float> %22, i32 0
+  %24 = fcmp une float 0xC00574BC60000000, %23
+  %25 = select i1 %24, float 1.000000e+00, float 0.000000e+00
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 17)
+  %27 = extractelement <4 x float> %26, i32 1
+  %28 = fcmp une float 0x40210068E0000000, %27
+  %29 = select i1 %28, float 1.000000e+00, float 0.000000e+00
+  %30 = fsub float -0.000000e+00, %25
+  %31 = fptosi float %30 to i32
+  %32 = fsub float -0.000000e+00, %29
+  %33 = fptosi float %32 to i32
+  %34 = bitcast i32 %31 to float
+  %35 = bitcast i32 %33 to float
+  %36 = bitcast float %34 to i32
+  %37 = bitcast float %35 to i32
+  %38 = or i32 %36, %37
+  %39 = bitcast i32 %38 to float
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+  %41 = extractelement <4 x float> %40, i32 0
+  %42 = fcmp une float 0xBFC9A6B500000000, %41
+  %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 18)
+  %45 = extractelement <4 x float> %44, i32 1
+  %46 = fcmp une float 0xC0119BDA60000000, %45
+  %47 = select i1 %46, float 1.000000e+00, float 0.000000e+00
+  %48 = fsub float -0.000000e+00, %43
+  %49 = fptosi float %48 to i32
+  %50 = fsub float -0.000000e+00, %47
+  %51 = fptosi float %50 to i32
+  %52 = bitcast i32 %49 to float
+  %53 = bitcast i32 %51 to float
+  %54 = bitcast float %52 to i32
+  %55 = bitcast float %53 to i32
+  %56 = or i32 %54, %55
+  %57 = bitcast i32 %56 to float
+  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+  %59 = extractelement <4 x float> %58, i32 0
+  %60 = fcmp une float 0xC02085D640000000, %59
+  %61 = select i1 %60, float 1.000000e+00, float 0.000000e+00
+  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 19)
+  %63 = extractelement <4 x float> %62, i32 1
+  %64 = fcmp une float 0xBFD7C1BDA0000000, %63
+  %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
+  %66 = fsub float -0.000000e+00, %61
+  %67 = fptosi float %66 to i32
+  %68 = fsub float -0.000000e+00, %65
+  %69 = fptosi float %68 to i32
+  %70 = bitcast i32 %67 to float
+  %71 = bitcast i32 %69 to float
+  %72 = bitcast float %70 to i32
+  %73 = bitcast float %71 to i32
+  %74 = or i32 %72, %73
+  %75 = bitcast i32 %74 to float
+  %76 = insertelement <4 x float> undef, float %21, i32 0
+  %77 = insertelement <4 x float> %76, float %39, i32 1
+  %78 = insertelement <4 x float> %77, float %57, i32 2
+  %79 = insertelement <4 x float> %78, float %75, i32 3
+  %80 = insertelement <4 x float> undef, float %21, i32 0
+  %81 = insertelement <4 x float> %80, float %39, i32 1
+  %82 = insertelement <4 x float> %81, float %57, i32 2
+  %83 = insertelement <4 x float> %82, float %75, i32 3
+  %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
+  %85 = bitcast float %84 to i32
+  %86 = icmp ne i32 %85, 0
+  %87 = sext i1 %86 to i32
+  %88 = bitcast i32 %87 to float
+  %89 = bitcast float %88 to i32
+  %90 = xor i32 %89, -1
+  %91 = bitcast i32 %90 to float
+  %92 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+  %93 = extractelement <4 x float> %92, i32 0
+  %94 = fcmp une float 0x401FDCC640000000, %93
+  %95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
+  %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 20)
+  %97 = extractelement <4 x float> %96, i32 1
+  %98 = fcmp une float 0xC00574BC60000000, %97
+  %99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
+  %100 = fsub float -0.000000e+00, %95
+  %101 = fptosi float %100 to i32
+  %102 = fsub float -0.000000e+00, %99
+  %103 = fptosi float %102 to i32
+  %104 = bitcast i32 %101 to float
+  %105 = bitcast i32 %103 to float
+  %106 = bitcast float %104 to i32
+  %107 = bitcast float %105 to i32
+  %108 = or i32 %106, %107
+  %109 = bitcast i32 %108 to float
+  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+  %111 = extractelement <4 x float> %110, i32 0
+  %112 = fcmp une float 0x40210068E0000000, %111
+  %113 = select i1 %112, float 1.000000e+00, float 0.000000e+00
+  %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 21)
+  %115 = extractelement <4 x float> %114, i32 1
+  %116 = fcmp une float 0xBFC9A6B500000000, %115
+  %117 = select i1 %116, float 1.000000e+00, float 0.000000e+00
+  %118 = fsub float -0.000000e+00, %113
+  %119 = fptosi float %118 to i32
+  %120 = fsub float -0.000000e+00, %117
+  %121 = fptosi float %120 to i32
+  %122 = bitcast i32 %119 to float
+  %123 = bitcast i32 %121 to float
+  %124 = bitcast float %122 to i32
+  %125 = bitcast float %123 to i32
+  %126 = or i32 %124, %125
+  %127 = bitcast i32 %126 to float
+  %128 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+  %129 = extractelement <4 x float> %128, i32 0
+  %130 = fcmp une float 0xC0119BDA60000000, %129
+  %131 = select i1 %130, float 1.000000e+00, float 0.000000e+00
+  %132 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 22)
+  %133 = extractelement <4 x float> %132, i32 1
+  %134 = fcmp une float 0xC02085D640000000, %133
+  %135 = select i1 %134, float 1.000000e+00, float 0.000000e+00
+  %136 = fsub float -0.000000e+00, %131
+  %137 = fptosi float %136 to i32
+  %138 = fsub float -0.000000e+00, %135
+  %139 = fptosi float %138 to i32
+  %140 = bitcast i32 %137 to float
+  %141 = bitcast i32 %139 to float
+  %142 = bitcast float %140 to i32
+  %143 = bitcast float %141 to i32
+  %144 = or i32 %142, %143
+  %145 = bitcast i32 %144 to float
+  %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %147 = extractelement <4 x float> %146, i32 0
+  %148 = fcmp une float 0xBFD7C1BDA0000000, %147
+  %149 = select i1 %148, float 1.000000e+00, float 0.000000e+00
+  %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %151 = extractelement <4 x float> %150, i32 1
+  %152 = fcmp une float 0x401E1D7DC0000000, %151
+  %153 = select i1 %152, float 1.000000e+00, float 0.000000e+00
+  %154 = fsub float -0.000000e+00, %149
+  %155 = fptosi float %154 to i32
+  %156 = fsub float -0.000000e+00, %153
+  %157 = fptosi float %156 to i32
+  %158 = bitcast i32 %155 to float
+  %159 = bitcast i32 %157 to float
+  %160 = bitcast float %158 to i32
+  %161 = bitcast float %159 to i32
+  %162 = or i32 %160, %161
+  %163 = bitcast i32 %162 to float
+  %164 = insertelement <4 x float> undef, float %109, i32 0
+  %165 = insertelement <4 x float> %164, float %127, i32 1
+  %166 = insertelement <4 x float> %165, float %145, i32 2
+  %167 = insertelement <4 x float> %166, float %163, i32 3
+  %168 = insertelement <4 x float> undef, float %109, i32 0
+  %169 = insertelement <4 x float> %168, float %127, i32 1
+  %170 = insertelement <4 x float> %169, float %145, i32 2
+  %171 = insertelement <4 x float> %170, float %163, i32 3
+  %172 = call float @llvm.AMDGPU.dp4(<4 x float> %167, <4 x float> %171)
+  %173 = bitcast float %172 to i32
+  %174 = icmp ne i32 %173, 0
+  %175 = sext i1 %174 to i32
+  %176 = bitcast i32 %175 to float
+  %177 = bitcast float %176 to i32
+  %178 = xor i32 %177, -1
+  %179 = bitcast i32 %178 to float
+  %180 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %181 = extractelement <4 x float> %180, i32 0
+  %182 = fcmp une float 0x401FDCC640000000, %181
+  %183 = select i1 %182, float 1.000000e+00, float 0.000000e+00
+  %184 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %185 = extractelement <4 x float> %184, i32 1
+  %186 = fcmp une float 0xC00574BC60000000, %185
+  %187 = select i1 %186, float 1.000000e+00, float 0.000000e+00
+  %188 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %189 = extractelement <4 x float> %188, i32 2
+  %190 = fcmp une float 0x40210068E0000000, %189
+  %191 = select i1 %190, float 1.000000e+00, float 0.000000e+00
+  %192 = fsub float -0.000000e+00, %183
+  %193 = fptosi float %192 to i32
+  %194 = fsub float -0.000000e+00, %187
+  %195 = fptosi float %194 to i32
+  %196 = fsub float -0.000000e+00, %191
+  %197 = fptosi float %196 to i32
+  %198 = bitcast i32 %193 to float
+  %199 = bitcast i32 %195 to float
+  %200 = bitcast i32 %197 to float
+  %201 = bitcast float %199 to i32
+  %202 = bitcast float %200 to i32
+  %203 = or i32 %201, %202
+  %204 = bitcast i32 %203 to float
+  %205 = bitcast float %198 to i32
+  %206 = bitcast float %204 to i32
+  %207 = or i32 %205, %206
+  %208 = bitcast i32 %207 to float
+  %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %210 = extractelement <4 x float> %209, i32 0
+  %211 = fcmp une float 0xBFC9A6B500000000, %210
+  %212 = select i1 %211, float 1.000000e+00, float 0.000000e+00
+  %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %214 = extractelement <4 x float> %213, i32 1
+  %215 = fcmp une float 0xC0119BDA60000000, %214
+  %216 = select i1 %215, float 1.000000e+00, float 0.000000e+00
+  %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %218 = extractelement <4 x float> %217, i32 2
+  %219 = fcmp une float 0xC02085D640000000, %218
+  %220 = select i1 %219, float 1.000000e+00, float 0.000000e+00
+  %221 = fsub float -0.000000e+00, %212
+  %222 = fptosi float %221 to i32
+  %223 = fsub float -0.000000e+00, %216
+  %224 = fptosi float %223 to i32
+  %225 = fsub float -0.000000e+00, %220
+  %226 = fptosi float %225 to i32
+  %227 = bitcast i32 %222 to float
+  %228 = bitcast i32 %224 to float
+  %229 = bitcast i32 %226 to float
+  %230 = bitcast float %228 to i32
+  %231 = bitcast float %229 to i32
+  %232 = or i32 %230, %231
+  %233 = bitcast i32 %232 to float
+  %234 = bitcast float %227 to i32
+  %235 = bitcast float %233 to i32
+  %236 = or i32 %234, %235
+  %237 = bitcast i32 %236 to float
+  %238 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %239 = extractelement <4 x float> %238, i32 0
+  %240 = fcmp une float 0xBFD7C1BDA0000000, %239
+  %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
+  %242 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %243 = extractelement <4 x float> %242, i32 1
+  %244 = fcmp une float 0x401E1D7DC0000000, %243
+  %245 = select i1 %244, float 1.000000e+00, float 0.000000e+00
+  %246 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %247 = extractelement <4 x float> %246, i32 2
+  %248 = fcmp une float 0xC019893740000000, %247
+  %249 = select i1 %248, float 1.000000e+00, float 0.000000e+00
+  %250 = fsub float -0.000000e+00, %241
+  %251 = fptosi float %250 to i32
+  %252 = fsub float -0.000000e+00, %245
+  %253 = fptosi float %252 to i32
+  %254 = fsub float -0.000000e+00, %249
+  %255 = fptosi float %254 to i32
+  %256 = bitcast i32 %251 to float
+  %257 = bitcast i32 %253 to float
+  %258 = bitcast i32 %255 to float
+  %259 = bitcast float %257 to i32
+  %260 = bitcast float %258 to i32
+  %261 = or i32 %259, %260
+  %262 = bitcast i32 %261 to float
+  %263 = bitcast float %256 to i32
+  %264 = bitcast float %262 to i32
+  %265 = or i32 %263, %264
+  %266 = bitcast i32 %265 to float
+  %267 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %268 = extractelement <4 x float> %267, i32 0
+  %269 = fcmp une float 0x40220F0D80000000, %268
+  %270 = select i1 %269, float 1.000000e+00, float 0.000000e+00
+  %271 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %272 = extractelement <4 x float> %271, i32 1
+  %273 = fcmp une float 0xC018E2EB20000000, %272
+  %274 = select i1 %273, float 1.000000e+00, float 0.000000e+00
+  %275 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %276 = extractelement <4 x float> %275, i32 2
+  %277 = fcmp une float 0xBFEA8DB8C0000000, %276
+  %278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
+  %279 = fsub float -0.000000e+00, %270
+  %280 = fptosi float %279 to i32
+  %281 = fsub float -0.000000e+00, %274
+  %282 = fptosi float %281 to i32
+  %283 = fsub float -0.000000e+00, %278
+  %284 = fptosi float %283 to i32
+  %285 = bitcast i32 %280 to float
+  %286 = bitcast i32 %282 to float
+  %287 = bitcast i32 %284 to float
+  %288 = bitcast float %286 to i32
+  %289 = bitcast float %287 to i32
+  %290 = or i32 %288, %289
+  %291 = bitcast i32 %290 to float
+  %292 = bitcast float %285 to i32
+  %293 = bitcast float %291 to i32
+  %294 = or i32 %292, %293
+  %295 = bitcast i32 %294 to float
+  %296 = insertelement <4 x float> undef, float %208, i32 0
+  %297 = insertelement <4 x float> %296, float %237, i32 1
+  %298 = insertelement <4 x float> %297, float %266, i32 2
+  %299 = insertelement <4 x float> %298, float %295, i32 3
+  %300 = insertelement <4 x float> undef, float %208, i32 0
+  %301 = insertelement <4 x float> %300, float %237, i32 1
+  %302 = insertelement <4 x float> %301, float %266, i32 2
+  %303 = insertelement <4 x float> %302, float %295, i32 3
+  %304 = call float @llvm.AMDGPU.dp4(<4 x float> %299, <4 x float> %303)
+  %305 = bitcast float %304 to i32
+  %306 = icmp ne i32 %305, 0
+  %307 = sext i1 %306 to i32
+  %308 = bitcast i32 %307 to float
+  %309 = bitcast float %308 to i32
+  %310 = xor i32 %309, -1
+  %311 = bitcast i32 %310 to float
+  %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %313 = extractelement <4 x float> %312, i32 0
+  %314 = fcmp une float 0xC00574BC60000000, %313
+  %315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
+  %316 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %317 = extractelement <4 x float> %316, i32 1
+  %318 = fcmp une float 0x40210068E0000000, %317
+  %319 = select i1 %318, float 1.000000e+00, float 0.000000e+00
+  %320 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %321 = extractelement <4 x float> %320, i32 2
+  %322 = fcmp une float 0xBFC9A6B500000000, %321
+  %323 = select i1 %322, float 1.000000e+00, float 0.000000e+00
+  %324 = fsub float -0.000000e+00, %315
+  %325 = fptosi float %324 to i32
+  %326 = fsub float -0.000000e+00, %319
+  %327 = fptosi float %326 to i32
+  %328 = fsub float -0.000000e+00, %323
+  %329 = fptosi float %328 to i32
+  %330 = bitcast i32 %325 to float
+  %331 = bitcast i32 %327 to float
+  %332 = bitcast i32 %329 to float
+  %333 = bitcast float %331 to i32
+  %334 = bitcast float %332 to i32
+  %335 = or i32 %333, %334
+  %336 = bitcast i32 %335 to float
+  %337 = bitcast float %330 to i32
+  %338 = bitcast float %336 to i32
+  %339 = or i32 %337, %338
+  %340 = bitcast i32 %339 to float
+  %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %342 = extractelement <4 x float> %341, i32 0
+  %343 = fcmp une float 0xC0119BDA60000000, %342
+  %344 = select i1 %343, float 1.000000e+00, float 0.000000e+00
+  %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %346 = extractelement <4 x float> %345, i32 1
+  %347 = fcmp une float 0xC02085D640000000, %346
+  %348 = select i1 %347, float 1.000000e+00, float 0.000000e+00
+  %349 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %350 = extractelement <4 x float> %349, i32 2
+  %351 = fcmp une float 0xBFD7C1BDA0000000, %350
+  %352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
+  %353 = fsub float -0.000000e+00, %344
+  %354 = fptosi float %353 to i32
+  %355 = fsub float -0.000000e+00, %348
+  %356 = fptosi float %355 to i32
+  %357 = fsub float -0.000000e+00, %352
+  %358 = fptosi float %357 to i32
+  %359 = bitcast i32 %354 to float
+  %360 = bitcast i32 %356 to float
+  %361 = bitcast i32 %358 to float
+  %362 = bitcast float %360 to i32
+  %363 = bitcast float %361 to i32
+  %364 = or i32 %362, %363
+  %365 = bitcast i32 %364 to float
+  %366 = bitcast float %359 to i32
+  %367 = bitcast float %365 to i32
+  %368 = or i32 %366, %367
+  %369 = bitcast i32 %368 to float
+  %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %371 = extractelement <4 x float> %370, i32 0
+  %372 = fcmp une float 0x401E1D7DC0000000, %371
+  %373 = select i1 %372, float 1.000000e+00, float 0.000000e+00
+  %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %375 = extractelement <4 x float> %374, i32 1
+  %376 = fcmp une float 0xC019893740000000, %375
+  %377 = select i1 %376, float 1.000000e+00, float 0.000000e+00
+  %378 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %379 = extractelement <4 x float> %378, i32 2
+  %380 = fcmp une float 0x40220F0D80000000, %379
+  %381 = select i1 %380, float 1.000000e+00, float 0.000000e+00
+  %382 = fsub float -0.000000e+00, %373
+  %383 = fptosi float %382 to i32
+  %384 = fsub float -0.000000e+00, %377
+  %385 = fptosi float %384 to i32
+  %386 = fsub float -0.000000e+00, %381
+  %387 = fptosi float %386 to i32
+  %388 = bitcast i32 %383 to float
+  %389 = bitcast i32 %385 to float
+  %390 = bitcast i32 %387 to float
+  %391 = bitcast float %389 to i32
+  %392 = bitcast float %390 to i32
+  %393 = or i32 %391, %392
+  %394 = bitcast i32 %393 to float
+  %395 = bitcast float %388 to i32
+  %396 = bitcast float %394 to i32
+  %397 = or i32 %395, %396
+  %398 = bitcast i32 %397 to float
+  %399 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %400 = extractelement <4 x float> %399, i32 0
+  %401 = fcmp une float 0xC018E2EB20000000, %400
+  %402 = select i1 %401, float 1.000000e+00, float 0.000000e+00
+  %403 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %404 = extractelement <4 x float> %403, i32 1
+  %405 = fcmp une float 0xBFEA8DB8C0000000, %404
+  %406 = select i1 %405, float 1.000000e+00, float 0.000000e+00
+  %407 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %408 = extractelement <4 x float> %407, i32 2
+  %409 = fcmp une float 0x4015236E20000000, %408
+  %410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
+  %411 = fsub float -0.000000e+00, %402
+  %412 = fptosi float %411 to i32
+  %413 = fsub float -0.000000e+00, %406
+  %414 = fptosi float %413 to i32
+  %415 = fsub float -0.000000e+00, %410
+  %416 = fptosi float %415 to i32
+  %417 = bitcast i32 %412 to float
+  %418 = bitcast i32 %414 to float
+  %419 = bitcast i32 %416 to float
+  %420 = bitcast float %418 to i32
+  %421 = bitcast float %419 to i32
+  %422 = or i32 %420, %421
+  %423 = bitcast i32 %422 to float
+  %424 = bitcast float %417 to i32
+  %425 = bitcast float %423 to i32
+  %426 = or i32 %424, %425
+  %427 = bitcast i32 %426 to float
+  %428 = insertelement <4 x float> undef, float %340, i32 0
+  %429 = insertelement <4 x float> %428, float %369, i32 1
+  %430 = insertelement <4 x float> %429, float %398, i32 2
+  %431 = insertelement <4 x float> %430, float %427, i32 3
+  %432 = insertelement <4 x float> undef, float %340, i32 0
+  %433 = insertelement <4 x float> %432, float %369, i32 1
+  %434 = insertelement <4 x float> %433, float %398, i32 2
+  %435 = insertelement <4 x float> %434, float %427, i32 3
+  %436 = call float @llvm.AMDGPU.dp4(<4 x float> %431, <4 x float> %435)
+  %437 = bitcast float %436 to i32
+  %438 = icmp ne i32 %437, 0
+  %439 = sext i1 %438 to i32
+  %440 = bitcast i32 %439 to float
+  %441 = bitcast float %440 to i32
+  %442 = xor i32 %441, -1
+  %443 = bitcast i32 %442 to float
+  %444 = load <4 x float> addrspace(8)* null
+  %445 = extractelement <4 x float> %444, i32 0
+  %446 = fcmp une float 0xC00574BC60000000, %445
+  %447 = select i1 %446, float 1.000000e+00, float 0.000000e+00
+  %448 = load <4 x float> addrspace(8)* null
+  %449 = extractelement <4 x float> %448, i32 1
+  %450 = fcmp une float 0x40210068E0000000, %449
+  %451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
+  %452 = load <4 x float> addrspace(8)* null
+  %453 = extractelement <4 x float> %452, i32 2
+  %454 = fcmp une float 0xBFC9A6B500000000, %453
+  %455 = select i1 %454, float 1.000000e+00, float 0.000000e+00
+  %456 = load <4 x float> addrspace(8)* null
+  %457 = extractelement <4 x float> %456, i32 3
+  %458 = fcmp une float 0xC0119BDA60000000, %457
+  %459 = select i1 %458, float 1.000000e+00, float 0.000000e+00
+  %460 = fsub float -0.000000e+00, %447
+  %461 = fptosi float %460 to i32
+  %462 = fsub float -0.000000e+00, %451
+  %463 = fptosi float %462 to i32
+  %464 = fsub float -0.000000e+00, %455
+  %465 = fptosi float %464 to i32
+  %466 = fsub float -0.000000e+00, %459
+  %467 = fptosi float %466 to i32
+  %468 = bitcast i32 %461 to float
+  %469 = bitcast i32 %463 to float
+  %470 = bitcast i32 %465 to float
+  %471 = bitcast i32 %467 to float
+  %472 = bitcast float %468 to i32
+  %473 = bitcast float %469 to i32
+  %474 = or i32 %472, %473
+  %475 = bitcast i32 %474 to float
+  %476 = bitcast float %470 to i32
+  %477 = bitcast float %471 to i32
+  %478 = or i32 %476, %477
+  %479 = bitcast i32 %478 to float
+  %480 = bitcast float %475 to i32
+  %481 = bitcast float %479 to i32
+  %482 = or i32 %480, %481
+  %483 = bitcast i32 %482 to float
+  %484 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %485 = extractelement <4 x float> %484, i32 0
+  %486 = fcmp une float 0xC02085D640000000, %485
+  %487 = select i1 %486, float 1.000000e+00, float 0.000000e+00
+  %488 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %489 = extractelement <4 x float> %488, i32 1
+  %490 = fcmp une float 0xBFD7C1BDA0000000, %489
+  %491 = select i1 %490, float 1.000000e+00, float 0.000000e+00
+  %492 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %493 = extractelement <4 x float> %492, i32 2
+  %494 = fcmp une float 0x401E1D7DC0000000, %493
+  %495 = select i1 %494, float 1.000000e+00, float 0.000000e+00
+  %496 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %497 = extractelement <4 x float> %496, i32 3
+  %498 = fcmp une float 0xC019893740000000, %497
+  %499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
+  %500 = fsub float -0.000000e+00, %487
+  %501 = fptosi float %500 to i32
+  %502 = fsub float -0.000000e+00, %491
+  %503 = fptosi float %502 to i32
+  %504 = fsub float -0.000000e+00, %495
+  %505 = fptosi float %504 to i32
+  %506 = fsub float -0.000000e+00, %499
+  %507 = fptosi float %506 to i32
+  %508 = bitcast i32 %501 to float
+  %509 = bitcast i32 %503 to float
+  %510 = bitcast i32 %505 to float
+  %511 = bitcast i32 %507 to float
+  %512 = bitcast float %508 to i32
+  %513 = bitcast float %509 to i32
+  %514 = or i32 %512, %513
+  %515 = bitcast i32 %514 to float
+  %516 = bitcast float %510 to i32
+  %517 = bitcast float %511 to i32
+  %518 = or i32 %516, %517
+  %519 = bitcast i32 %518 to float
+  %520 = bitcast float %515 to i32
+  %521 = bitcast float %519 to i32
+  %522 = or i32 %520, %521
+  %523 = bitcast i32 %522 to float
+  %524 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %525 = extractelement <4 x float> %524, i32 0
+  %526 = fcmp une float 0x40220F0D80000000, %525
+  %527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
+  %528 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %529 = extractelement <4 x float> %528, i32 1
+  %530 = fcmp une float 0xC018E2EB20000000, %529
+  %531 = select i1 %530, float 1.000000e+00, float 0.000000e+00
+  %532 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %533 = extractelement <4 x float> %532, i32 2
+  %534 = fcmp une float 0xBFEA8DB8C0000000, %533
+  %535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
+  %536 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %537 = extractelement <4 x float> %536, i32 3
+  %538 = fcmp une float 0x4015236E20000000, %537
+  %539 = select i1 %538, float 1.000000e+00, float 0.000000e+00
+  %540 = fsub float -0.000000e+00, %527
+  %541 = fptosi float %540 to i32
+  %542 = fsub float -0.000000e+00, %531
+  %543 = fptosi float %542 to i32
+  %544 = fsub float -0.000000e+00, %535
+  %545 = fptosi float %544 to i32
+  %546 = fsub float -0.000000e+00, %539
+  %547 = fptosi float %546 to i32
+  %548 = bitcast i32 %541 to float
+  %549 = bitcast i32 %543 to float
+  %550 = bitcast i32 %545 to float
+  %551 = bitcast i32 %547 to float
+  %552 = bitcast float %548 to i32
+  %553 = bitcast float %549 to i32
+  %554 = or i32 %552, %553
+  %555 = bitcast i32 %554 to float
+  %556 = bitcast float %550 to i32
+  %557 = bitcast float %551 to i32
+  %558 = or i32 %556, %557
+  %559 = bitcast i32 %558 to float
+  %560 = bitcast float %555 to i32
+  %561 = bitcast float %559 to i32
+  %562 = or i32 %560, %561
+  %563 = bitcast i32 %562 to float
+  %564 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %565 = extractelement <4 x float> %564, i32 0
+  %566 = fcmp une float 0x4016ED5D00000000, %565
+  %567 = select i1 %566, float 1.000000e+00, float 0.000000e+00
+  %568 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %569 = extractelement <4 x float> %568, i32 1
+  %570 = fcmp une float 0x402332FEC0000000, %569
+  %571 = select i1 %570, float 1.000000e+00, float 0.000000e+00
+  %572 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %573 = extractelement <4 x float> %572, i32 2
+  %574 = fcmp une float 0xC01484B5E0000000, %573
+  %575 = select i1 %574, float 1.000000e+00, float 0.000000e+00
+  %576 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %577 = extractelement <4 x float> %576, i32 3
+  %578 = fcmp une float 0x400179A6C0000000, %577
+  %579 = select i1 %578, float 1.000000e+00, float 0.000000e+00
+  %580 = fsub float -0.000000e+00, %567
+  %581 = fptosi float %580 to i32
+  %582 = fsub float -0.000000e+00, %571
+  %583 = fptosi float %582 to i32
+  %584 = fsub float -0.000000e+00, %575
+  %585 = fptosi float %584 to i32
+  %586 = fsub float -0.000000e+00, %579
+  %587 = fptosi float %586 to i32
+  %588 = bitcast i32 %581 to float
+  %589 = bitcast i32 %583 to float
+  %590 = bitcast i32 %585 to float
+  %591 = bitcast i32 %587 to float
+  %592 = bitcast float %588 to i32
+  %593 = bitcast float %589 to i32
+  %594 = or i32 %592, %593
+  %595 = bitcast i32 %594 to float
+  %596 = bitcast float %590 to i32
+  %597 = bitcast float %591 to i32
+  %598 = or i32 %596, %597
+  %599 = bitcast i32 %598 to float
+  %600 = bitcast float %595 to i32
+  %601 = bitcast float %599 to i32
+  %602 = or i32 %600, %601
+  %603 = bitcast i32 %602 to float
+  %604 = insertelement <4 x float> undef, float %483, i32 0
+  %605 = insertelement <4 x float> %604, float %523, i32 1
+  %606 = insertelement <4 x float> %605, float %563, i32 2
+  %607 = insertelement <4 x float> %606, float %603, i32 3
+  %608 = insertelement <4 x float> undef, float %483, i32 0
+  %609 = insertelement <4 x float> %608, float %523, i32 1
+  %610 = insertelement <4 x float> %609, float %563, i32 2
+  %611 = insertelement <4 x float> %610, float %603, i32 3
+  %612 = call float @llvm.AMDGPU.dp4(<4 x float> %607, <4 x float> %611)
+  %613 = bitcast float %612 to i32
+  %614 = icmp ne i32 %613, 0
+  %615 = sext i1 %614 to i32
+  %616 = bitcast i32 %615 to float
+  %617 = bitcast float %616 to i32
+  %618 = xor i32 %617, -1
+  %619 = bitcast i32 %618 to float
+  %620 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %621 = extractelement <4 x float> %620, i32 0
+  %622 = fcmp une float 0x40210068E0000000, %621
+  %623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
+  %624 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %625 = extractelement <4 x float> %624, i32 1
+  %626 = fcmp une float 0xBFC9A6B500000000, %625
+  %627 = select i1 %626, float 1.000000e+00, float 0.000000e+00
+  %628 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %629 = extractelement <4 x float> %628, i32 2
+  %630 = fcmp une float 0xC0119BDA60000000, %629
+  %631 = select i1 %630, float 1.000000e+00, float 0.000000e+00
+  %632 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %633 = extractelement <4 x float> %632, i32 3
+  %634 = fcmp une float 0xC02085D640000000, %633
+  %635 = select i1 %634, float 1.000000e+00, float 0.000000e+00
+  %636 = fsub float -0.000000e+00, %623
+  %637 = fptosi float %636 to i32
+  %638 = fsub float -0.000000e+00, %627
+  %639 = fptosi float %638 to i32
+  %640 = fsub float -0.000000e+00, %631
+  %641 = fptosi float %640 to i32
+  %642 = fsub float -0.000000e+00, %635
+  %643 = fptosi float %642 to i32
+  %644 = bitcast i32 %637 to float
+  %645 = bitcast i32 %639 to float
+  %646 = bitcast i32 %641 to float
+  %647 = bitcast i32 %643 to float
+  %648 = bitcast float %644 to i32
+  %649 = bitcast float %645 to i32
+  %650 = or i32 %648, %649
+  %651 = bitcast i32 %650 to float
+  %652 = bitcast float %646 to i32
+  %653 = bitcast float %647 to i32
+  %654 = or i32 %652, %653
+  %655 = bitcast i32 %654 to float
+  %656 = bitcast float %651 to i32
+  %657 = bitcast float %655 to i32
+  %658 = or i32 %656, %657
+  %659 = bitcast i32 %658 to float
+  %660 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %661 = extractelement <4 x float> %660, i32 0
+  %662 = fcmp une float 0xBFD7C1BDA0000000, %661
+  %663 = select i1 %662, float 1.000000e+00, float 0.000000e+00
+  %664 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %665 = extractelement <4 x float> %664, i32 1
+  %666 = fcmp une float 0x401E1D7DC0000000, %665
+  %667 = select i1 %666, float 1.000000e+00, float 0.000000e+00
+  %668 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %669 = extractelement <4 x float> %668, i32 2
+  %670 = fcmp une float 0xC019893740000000, %669
+  %671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
+  %672 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %673 = extractelement <4 x float> %672, i32 3
+  %674 = fcmp une float 0x40220F0D80000000, %673
+  %675 = select i1 %674, float 1.000000e+00, float 0.000000e+00
+  %676 = fsub float -0.000000e+00, %663
+  %677 = fptosi float %676 to i32
+  %678 = fsub float -0.000000e+00, %667
+  %679 = fptosi float %678 to i32
+  %680 = fsub float -0.000000e+00, %671
+  %681 = fptosi float %680 to i32
+  %682 = fsub float -0.000000e+00, %675
+  %683 = fptosi float %682 to i32
+  %684 = bitcast i32 %677 to float
+  %685 = bitcast i32 %679 to float
+  %686 = bitcast i32 %681 to float
+  %687 = bitcast i32 %683 to float
+  %688 = bitcast float %684 to i32
+  %689 = bitcast float %685 to i32
+  %690 = or i32 %688, %689
+  %691 = bitcast i32 %690 to float
+  %692 = bitcast float %686 to i32
+  %693 = bitcast float %687 to i32
+  %694 = or i32 %692, %693
+  %695 = bitcast i32 %694 to float
+  %696 = bitcast float %691 to i32
+  %697 = bitcast float %695 to i32
+  %698 = or i32 %696, %697
+  %699 = bitcast i32 %698 to float
+  %700 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %701 = extractelement <4 x float> %700, i32 0
+  %702 = fcmp une float 0xC018E2EB20000000, %701
+  %703 = select i1 %702, float 1.000000e+00, float 0.000000e+00
+  %704 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %705 = extractelement <4 x float> %704, i32 1
+  %706 = fcmp une float 0xBFEA8DB8C0000000, %705
+  %707 = select i1 %706, float 1.000000e+00, float 0.000000e+00
+  %708 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %709 = extractelement <4 x float> %708, i32 2
+  %710 = fcmp une float 0x4015236E20000000, %709
+  %711 = select i1 %710, float 1.000000e+00, float 0.000000e+00
+  %712 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %713 = extractelement <4 x float> %712, i32 3
+  %714 = fcmp une float 0x4016ED5D00000000, %713
+  %715 = select i1 %714, float 1.000000e+00, float 0.000000e+00
+  %716 = fsub float -0.000000e+00, %703
+  %717 = fptosi float %716 to i32
+  %718 = fsub float -0.000000e+00, %707
+  %719 = fptosi float %718 to i32
+  %720 = fsub float -0.000000e+00, %711
+  %721 = fptosi float %720 to i32
+  %722 = fsub float -0.000000e+00, %715
+  %723 = fptosi float %722 to i32
+  %724 = bitcast i32 %717 to float
+  %725 = bitcast i32 %719 to float
+  %726 = bitcast i32 %721 to float
+  %727 = bitcast i32 %723 to float
+  %728 = bitcast float %724 to i32
+  %729 = bitcast float %725 to i32
+  %730 = or i32 %728, %729
+  %731 = bitcast i32 %730 to float
+  %732 = bitcast float %726 to i32
+  %733 = bitcast float %727 to i32
+  %734 = or i32 %732, %733
+  %735 = bitcast i32 %734 to float
+  %736 = bitcast float %731 to i32
+  %737 = bitcast float %735 to i32
+  %738 = or i32 %736, %737
+  %739 = bitcast i32 %738 to float
+  %740 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %741 = extractelement <4 x float> %740, i32 0
+  %742 = fcmp une float 0x402332FEC0000000, %741
+  %743 = select i1 %742, float 1.000000e+00, float 0.000000e+00
+  %744 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %745 = extractelement <4 x float> %744, i32 1
+  %746 = fcmp une float 0xC01484B5E0000000, %745
+  %747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
+  %748 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %749 = extractelement <4 x float> %748, i32 2
+  %750 = fcmp une float 0x400179A6C0000000, %749
+  %751 = select i1 %750, float 1.000000e+00, float 0.000000e+00
+  %752 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %753 = extractelement <4 x float> %752, i32 3
+  %754 = fcmp une float 0xBFEE752540000000, %753
+  %755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
+  %756 = fsub float -0.000000e+00, %743
+  %757 = fptosi float %756 to i32
+  %758 = fsub float -0.000000e+00, %747
+  %759 = fptosi float %758 to i32
+  %760 = fsub float -0.000000e+00, %751
+  %761 = fptosi float %760 to i32
+  %762 = fsub float -0.000000e+00, %755
+  %763 = fptosi float %762 to i32
+  %764 = bitcast i32 %757 to float
+  %765 = bitcast i32 %759 to float
+  %766 = bitcast i32 %761 to float
+  %767 = bitcast i32 %763 to float
+  %768 = bitcast float %764 to i32
+  %769 = bitcast float %765 to i32
+  %770 = or i32 %768, %769
+  %771 = bitcast i32 %770 to float
+  %772 = bitcast float %766 to i32
+  %773 = bitcast float %767 to i32
+  %774 = or i32 %772, %773
+  %775 = bitcast i32 %774 to float
+  %776 = bitcast float %771 to i32
+  %777 = bitcast float %775 to i32
+  %778 = or i32 %776, %777
+  %779 = bitcast i32 %778 to float
+  %780 = insertelement <4 x float> undef, float %659, i32 0
+  %781 = insertelement <4 x float> %780, float %699, i32 1
+  %782 = insertelement <4 x float> %781, float %739, i32 2
+  %783 = insertelement <4 x float> %782, float %779, i32 3
+  %784 = insertelement <4 x float> undef, float %659, i32 0
+  %785 = insertelement <4 x float> %784, float %699, i32 1
+  %786 = insertelement <4 x float> %785, float %739, i32 2
+  %787 = insertelement <4 x float> %786, float %779, i32 3
+  %788 = call float @llvm.AMDGPU.dp4(<4 x float> %783, <4 x float> %787)
+  %789 = bitcast float %788 to i32
+  %790 = icmp ne i32 %789, 0
+  %791 = sext i1 %790 to i32
+  %792 = bitcast i32 %791 to float
+  %793 = bitcast float %792 to i32
+  %794 = xor i32 %793, -1
+  %795 = bitcast i32 %794 to float
+  %796 = bitcast float %91 to i32
+  %797 = bitcast float %179 to i32
+  %798 = and i32 %796, %797
+  %799 = bitcast i32 %798 to float
+  %800 = bitcast float %311 to i32
+  %801 = bitcast float %443 to i32
+  %802 = and i32 %800, %801
+  %803 = bitcast i32 %802 to float
+  %804 = bitcast float %799 to i32
+  %805 = bitcast float %803 to i32
+  %806 = and i32 %804, %805
+  %807 = bitcast i32 %806 to float
+  %808 = bitcast float %619 to i32
+  %809 = bitcast float %795 to i32
+  %810 = and i32 %808, %809
+  %811 = bitcast i32 %810 to float
+  %812 = bitcast float %807 to i32
+  %813 = bitcast float %811 to i32
+  %814 = and i32 %812, %813
+  %815 = bitcast i32 %814 to float
+  %816 = bitcast float %815 to i32
+  %817 = icmp ne i32 %816, 0
+  %. = select i1 %817, float 1.000000e+00, float 0.000000e+00
+  %.32 = select i1 %817, float 0.000000e+00, float 1.000000e+00
+  %818 = insertelement <4 x float> undef, float %0, i32 0
+  %819 = insertelement <4 x float> %818, float %1, i32 1
+  %820 = insertelement <4 x float> %819, float %2, i32 2
+  %821 = insertelement <4 x float> %820, float %3, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %821, i32 60, i32 1)
+  %822 = insertelement <4 x float> undef, float %.32, i32 0
+  %823 = insertelement <4 x float> %822, float %., i32 1
+  %824 = insertelement <4 x float> %823, float 0.000000e+00, i32 2
+  %825 = insertelement <4 x float> %824, float 1.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %825, i32 0, i32 2)
+  ret void
+}
+
+declare float @llvm.R600.load.input(i32) #1
+
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.v4i32.ll
new file mode 100644
index 000000000000..662085e2d673
--- /dev/null
+++ b/test/CodeGen/R600/and.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = and <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
new file mode 100644
index 000000000000..fd958b365961
--- /dev/null
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -0,0 +1,36 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test is for a bug in
+; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where
+; the wrong type was being passed to
+; TargetLowering::getOperationAction() when checking the legality of
+; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
+
+
+; CHECK: @sint
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %sint = load i32 addrspace(1) * %in
+  %conv = sitofp i32 %sint to float
+  %0 = insertelement <4 x float> undef, float %conv, i32 0
+  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  store <4 x float> %splat, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+;CHECK: @uint
+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %uint = load i32 addrspace(1) * %in
+  %conv = uitofp i32 %uint to float
+  %0 = insertelement <4 x float> undef, float %conv, i32 0
+  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  store <4 x float> %splat, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
new file mode 100644
index 000000000000..09baee7a1dcd
--- /dev/null
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; PRED_SET* instructions must be tied to any instruction that uses their
+; result.  This tests that there are no instructions between the PRED_SET*
+; and the PREDICATE_BREAK in this loop.
+
+; CHECK: @loop_ge
+; CHECK: LOOP_START_DX10
+; CHECK: PRED_SET
+; CHECK-NEXT: JUMP
+; CHECK-NEXT: LOOP_BREAK
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
+entry:
+  %cmp5 = icmp sgt i32 %iterations, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+  %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.07 = add nsw i32 %i.07.in, -1
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+  store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %ai.06, 1
+  %exitcond = icmp eq i32 %add, %iterations
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
new file mode 100644
index 000000000000..0407533eaa5f
--- /dev/null
+++ b/test/CodeGen/R600/fabs.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @fabs( float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
new file mode 100644
index 000000000000..d7d1b6572c41
--- /dev/null
+++ b/test/CodeGen/R600/fadd.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fadd float %r0, %r1
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll
new file mode 100644
index 000000000000..85dbfd52cbb3
--- /dev/null
+++ b/test/CodeGen/R600/fadd.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fadd <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
new file mode 100644
index 000000000000..a94cfb5cf2fe
--- /dev/null
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;Not checking arguments 2 and 3 to CNDE, because they may change between
+;registers and literal.x depending on what the optimizer does.
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %value = select i1 %cmp, i32 2, i32 3 
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
new file mode 100644
index 000000000000..55aba0d72d39
--- /dev/null
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the
+; chance to optimize the fcmp + select instructions to SET* was missed
+; due to the fact that the operands to fcmp and select had different types
+
+; CHECK: SET{{[A-Z]+}}_DX10
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %value = select i1 %cmp, i32 -1, i32 0
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
new file mode 100644
index 000000000000..37f621d23958
--- /dev/null
+++ b/test/CodeGen/R600/fcmp.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fcmp_sext
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
+  %1 = load float addrspace(1)* %arrayidx1
+  %cmp = fcmp oeq float %0, %1
+  %sext = sext i1 %cmp to i32
+  store i32 %sext, i32 addrspace(1)* %out
+  ret void
+}
+
+; This test checks that a setcc node with f32 operands is lowered to a
+; SET*_DX10 instruction.  Previously we were lowering this to:
+; SET* + FP_TO_SINT
+
+; CHECK: @fcmp_br
+; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0
+
+define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp oeq float %in, 5.0
+  br i1 %0, label %IF, label %ENDIF
+
+IF:
+  %1 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %1
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
new file mode 100644
index 000000000000..79e677f541f5
--- /dev/null
+++ b/test/CodeGen/R600/fdiv.v4f32.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fdiv <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
new file mode 100644
index 000000000000..845330f28419
--- /dev/null
+++ b/test/CodeGen/R600/floor.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @floor(float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @floor(float) readonly
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
new file mode 100644
index 000000000000..a3d4d0ff0db7
--- /dev/null
+++ b/test/CodeGen/R600/fmad.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.R600.load.input(i32 2)
+   %r3 = fmul float %r0, %r1
+	%r4 = fadd float %r3, %r2
+   call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
new file mode 100644
index 000000000000..3708f0b9eed2
--- /dev/null
+++ b/test/CodeGen/R600/fmax.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fcmp uge float %r0, %r1
+   %r3 = select i1 %r2, float %r0, float %r1
+   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
new file mode 100644
index 000000000000..19d59ab3061e
--- /dev/null
+++ b/test/CodeGen/R600/fmin.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fcmp uge float %r0, %r1
+   %r3 = select i1 %r2, float %r1, float %r0
+   call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
new file mode 100644
index 000000000000..eb1d523c0bb4
--- /dev/null
+++ b/test/CodeGen/R600/fmul.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fmul float %r0, %r1
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll
new file mode 100644
index 000000000000..6d44a0c5c782
--- /dev/null
+++ b/test/CodeGen/R600/fmul.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fmul <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
new file mode 100644
index 000000000000..591aa52676a4
--- /dev/null
+++ b/test/CodeGen/R600/fsub.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = fsub float %r0, %r1
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll
new file mode 100644
index 000000000000..612a57e4b609
--- /dev/null
+++ b/test/CodeGen/R600/fsub.v4f32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fsub <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8_to_double_to_float.ll
new file mode 100644
index 000000000000..39f33227fa4b
--- /dev/null
+++ b/test/CodeGen/R600/i8_to_double_to_float.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %1 = load i8 addrspace(1)* %in
+  %2 = uitofp i8 %1 to double
+  %3 = fptrunc double %2 to float
+  store float %3, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
new file mode 100644
index 000000000000..71705a64f50e
--- /dev/null
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -0,0 +1,18 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;Test that a select with reversed True/False values is correctly lowered
+;to a SETNE_INT.  There should only be one SETNE_INT instruction.
+
+;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NOT: SETNE_INT
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
+  %1 = load i32 addrspace(1)* %arrayidx1
+  %cmp = icmp eq i32 %0, %1
+  %value = select i1 %cmp, i32 0, i32 -1
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/jump_address.ll b/test/CodeGen/R600/jump_address.ll
new file mode 100644
index 000000000000..cd35bffb1304
--- /dev/null
+++ b/test/CodeGen/R600/jump_address.ll
@@ -0,0 +1,50 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: JUMP @4
+
+define void @main() #0 {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  %4 = sext i1 %3 to i32
+  %5 = bitcast i32 %4 to float
+  %6 = bitcast float %5 to i32
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %ENDIF, label %ELSE
+
+ELSE:                                             ; preds = %main_body
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = extractelement <4 x float> %8, i32 0
+  %10 = bitcast float %9 to i32
+  %11 = icmp eq i32 %10, 1
+  %12 = sext i1 %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %13 to i32
+  %15 = icmp ne i32 %14, 0
+  br i1 %15, label %IF13, label %ENDIF
+
+ENDIF:                                            ; preds = %IF13, %ELSE, %main_body
+  %temp.0 = phi float [ 0xFFF8000000000000, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp2.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %temp3.0 = phi float [ 5.000000e-01, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+  %18 = insertelement <4 x float> %17, float %temp2.0, i32 2
+  %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+  ret void
+
+IF13:                                             ; preds = %ELSE
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 0
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fadd float 0xFFF8000000000000, %22
+  br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
new file mode 100644
index 000000000000..3d70e4bd54aa
--- /dev/null
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -0,0 +1,100 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @main1
+; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
+define void @main1() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = fcmp ult float %1, 0.000000e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load <4 x float> addrspace(8)* null
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %11 = extractelement <4 x float> %10, i32 1
+  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %13 = extractelement <4 x float> %12, i32 1
+  %14 = fcmp ult float %9, 0.000000e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load <4 x float> addrspace(8)* null
+  %17 = extractelement <4 x float> %16, i32 2
+  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %19 = extractelement <4 x float> %18, i32 2
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fcmp ult float %17, 0.000000e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load <4 x float> addrspace(8)* null
+  %25 = extractelement <4 x float> %24, i32 3
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %27 = extractelement <4 x float> %26, i32 3
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %29 = extractelement <4 x float> %28, i32 3
+  %30 = fcmp ult float %25, 0.000000e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+  %36 = insertelement <4 x float> undef, float %32, i32 0
+  %37 = insertelement <4 x float> %36, float %33, i32 1
+  %38 = insertelement <4 x float> %37, float %34, i32 2
+  %39 = insertelement <4 x float> %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+  ret void
+}
+
+; CHECK: @main2
+; CHECK-NOT: MOV
+define void @main2() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %5 = extractelement <4 x float> %4, i32 1
+  %6 = fcmp ult float %1, 0.000000e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load <4 x float> addrspace(8)* null
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %11 = extractelement <4 x float> %10, i32 0
+  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %13 = extractelement <4 x float> %12, i32 1
+  %14 = fcmp ult float %9, 0.000000e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load <4 x float> addrspace(8)* null
+  %17 = extractelement <4 x float> %16, i32 2
+  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %19 = extractelement <4 x float> %18, i32 3
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fcmp ult float %17, 0.000000e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load <4 x float> addrspace(8)* null
+  %25 = extractelement <4 x float> %24, i32 3
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %27 = extractelement <4 x float> %26, i32 3
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %29 = extractelement <4 x float> %28, i32 2
+  %30 = fcmp ult float %25, 0.000000e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+  %36 = insertelement <4 x float> undef, float %32, i32 0
+  %37 = insertelement <4 x float> %36, float %33, i32 1
+  %38 = insertelement <4 x float> %37, float %34, i32 2
+  %39 = insertelement <4 x float> %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+  ret void
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
new file mode 100644
index 000000000000..1aae7f9f91f4
--- /dev/null
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This tests a bug where LegalizeDAG was not checking the target's
+; BooleanContents value and always using one for true, when expanding
+; setcc to select_cc.
+;
+; This bug caused the icmp IR instruction to be expanded to two machine
+; instructions, when only one is needed.
+;
+
+; CHECK: @setcc_expand
+; CHECK: SET
+; CHECK-NOT: CND
+define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp eq i32 %in, 5
+  br i1 %0, label %IF, label %ENDIF
+IF:
+  %1 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %1
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg
new file mode 100644
index 000000000000..36ee493e5945
--- /dev/null
+++ b/test/CodeGen/R600/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'R600' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
new file mode 100644
index 000000000000..e69f64e0e142
--- /dev/null
+++ b/test/CodeGen/R600/literals.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Test using an integer literal constant.
+; Generated ASM should be:
+; ADD_INT REG literal.x, 5
+; or
+; ADD_INT literal.x REG, 5
+
+; CHECK: @i32_literal
+; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
+define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = add i32 5, %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Test using a float literal constant.
+; Generated ASM should be:
+; ADD REG literal.x, 5.0
+; or
+; ADD literal.x REG, 5.0
+
+; CHECK: @float_literal
+; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
+define void @float_literal(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fadd float 5.0, %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
new file mode 100644
index 000000000000..693eb27457c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -0,0 +1,17 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.mul(float ,float ) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
new file mode 100644
index 000000000000..74331fa26934
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -0,0 +1,42 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12
+;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15
+;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+   %addr = load <4 x float> addrspace(1)* %in
+   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
+   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
+   %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
+   %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4)
+   %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5)
+   %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6)
+   %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7)
+   %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8)
+   %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9)
+   %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10)
+   %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11)
+   %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12)
+   %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13)
+   %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14)
+   %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15)
+   %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16)
+   store <4 x float> %res16, <4 x float> addrspace(1)* %out
+   ret void
+}
+
+declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
new file mode 100644
index 000000000000..fac957f7eeec
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.AMDGPU.trunc( float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.trunc(float ) readnone
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
new file mode 100644
index 000000000000..bf0cdaa2fa3a
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -0,0 +1,21 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: S_MOV_B32
+;CHECK-NEXT: V_INTERP_MOV_F32
+
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+  %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+  %5 = call i32 @llvm.SI.packf16(float %4, float %4)
+  %6 = bitcast i32 %5 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
+  ret void
+}
+
+declare void @llvm.AMDGPU.shader.type(i32)
+
+declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
+
+declare i32 @llvm.SI.packf16(float, float) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
new file mode 100644
index 000000000000..c724395b98c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -0,0 +1,106 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE_C
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE
+
+define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+   %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
+   %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1
+   %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2
+   %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3
+   %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0
+   %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1
+   %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2
+   %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3
+   %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0
+   %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1
+   %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2
+   %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3
+   %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0
+   %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
+   %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
+   %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
+   %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1,
+      <8 x i32> undef, <4 x i32> undef, i32 1)
+   %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2,
+      <8 x i32> undef, <4 x i32> undef, i32 2)
+   %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3,
+      <8 x i32> undef, <4 x i32> undef, i32 3)
+   %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4,
+      <8 x i32> undef, <4 x i32> undef, i32 4)
+   %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5,
+      <8 x i32> undef, <4 x i32> undef, i32 5)
+   %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6,
+      <8 x i32> undef, <4 x i32> undef, i32 6)
+   %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7,
+      <8 x i32> undef, <4 x i32> undef, i32 7)
+   %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8,
+      <8 x i32> undef, <4 x i32> undef, i32 8)
+   %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9,
+      <8 x i32> undef, <4 x i32> undef, i32 9)
+   %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10,
+      <8 x i32> undef, <4 x i32> undef, i32 10)
+   %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11,
+      <8 x i32> undef, <4 x i32> undef, i32 11)
+   %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12,
+      <8 x i32> undef, <4 x i32> undef, i32 12)
+   %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13,
+      <8 x i32> undef, <4 x i32> undef, i32 13)
+   %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14,
+      <8 x i32> undef, <4 x i32> undef, i32 14)
+   %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15,
+      <8 x i32> undef, <4 x i32> undef, i32 15)
+   %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16,
+      <8 x i32> undef, <4 x i32> undef, i32 16)
+   %e1 = extractelement <4 x float> %res1, i32 0
+   %e2 = extractelement <4 x float> %res2, i32 0
+   %e3 = extractelement <4 x float> %res3, i32 0
+   %e4 = extractelement <4 x float> %res4, i32 0
+   %e5 = extractelement <4 x float> %res5, i32 0
+   %e6 = extractelement <4 x float> %res6, i32 0
+   %e7 = extractelement <4 x float> %res7, i32 0
+   %e8 = extractelement <4 x float> %res8, i32 0
+   %e9 = extractelement <4 x float> %res9, i32 0
+   %e10 = extractelement <4 x float> %res10, i32 0
+   %e11 = extractelement <4 x float> %res11, i32 0
+   %e12 = extractelement <4 x float> %res12, i32 0
+   %e13 = extractelement <4 x float> %res13, i32 0
+   %e14 = extractelement <4 x float> %res14, i32 0
+   %e15 = extractelement <4 x float> %res15, i32 0
+   %e16 = extractelement <4 x float> %res16, i32 0
+   %s1 = fadd float %e1, %e2
+   %s2 = fadd float %s1, %e3
+   %s3 = fadd float %s2, %e4
+   %s4 = fadd float %s3, %e5
+   %s5 = fadd float %s4, %e6
+   %s6 = fadd float %s5, %e7
+   %s7 = fadd float %s6, %e8
+   %s8 = fadd float %s7, %e9
+   %s9 = fadd float %s8, %e10
+   %s10 = fadd float %s9, %e11
+   %s11 = fadd float %s10, %e12
+   %s12 = fadd float %s11, %e13
+   %s13 = fadd float %s12, %e14
+   %s14 = fadd float %s13, %e15
+   %s15 = fadd float %s14, %e16
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
new file mode 100644
index 000000000000..dc120bfb00c2
--- /dev/null
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.cos.f32(float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.cos.f32(float) readnone
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
new file mode 100644
index 000000000000..b4ce9f429f16
--- /dev/null
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.pow.f32( float %r0, float %r1)
+   call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.pow.f32(float ,float ) readonly
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
new file mode 100644
index 000000000000..5cd6998c9370
--- /dev/null
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.sin.f32( float %r0)
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.sin.f32(float) readnone
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll
new file mode 100644
index 000000000000..93627283bb94
--- /dev/null
+++ b/test/CodeGen/R600/load.constant_addrspace.f32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @test(float addrspace(1)* %out, float addrspace(2)* %in) {
+  %1 = load float addrspace(2)* %in
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll
new file mode 100644
index 000000000000..b070dcd52049
--- /dev/null
+++ b/test/CodeGen/R600/load.i8.ll
@@ -0,0 +1,10 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %1 = load i8 addrspace(1)* %in
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
new file mode 100644
index 000000000000..423adb9da900
--- /dev/null
+++ b/test/CodeGen/R600/lshl.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = mul i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
new file mode 100644
index 000000000000..551eac1d76bf
--- /dev/null
+++ b/test/CodeGen/R600/lshr.ll
@@ -0,0 +1,14 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
new file mode 100644
index 000000000000..28744e00c3cf
--- /dev/null
+++ b/test/CodeGen/R600/mulhu.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
+;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
+;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
+
+define void @test(i32 %p) {
+   %i = udiv i32 %p, 3
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
new file mode 100644
index 000000000000..eb8b052b6f72
--- /dev/null
+++ b/test/CodeGen/R600/predicates.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests make sure the compiler is optimizing branches using predicates
+; when it is legal to do so.
+
+; CHECK: @simple_if
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF, label %ENDIF
+
+IF:
+  %1 = shl i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %2 = phi i32 [ %in, %entry ], [ %1, %IF ]
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @simple_if_else
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF, label %ELSE
+
+IF:
+  %1 = shl i32 %in, 1
+  br label %ENDIF
+
+ELSE:
+  %2 = lshr i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ]
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @nested_if
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: POP
+define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+  %1 = add i32 %in, 10
+  %2 = icmp sgt i32 %1, 0
+  br i1 %2, label %IF1, label %ENDIF
+
+IF1:
+  %3 = shl i32  %1, 1
+  br label %ENDIF
+
+ENDIF:
+  %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1]
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @nested_if_else
+; CHECK: ALU_PUSH_BEFORE
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
+; CHECK: JUMP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: POP
+define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  br i1 %0, label %IF0, label %ENDIF
+
+IF0:
+  %1 = add i32 %in, 10
+  %2 = icmp sgt i32 %1, 0
+  br i1 %2, label %IF1, label %ELSE1
+
+IF1:
+  %3 = shl i32  %1, 1
+  br label %ENDIF
+
+ELSE1:
+  %4 = lshr i32 %in, 1
+  br label %ENDIF
+
+ENDIF:
+  %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1]
+  store i32 %5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
new file mode 100644
index 000000000000..6838c1ae3662
--- /dev/null
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = fdiv float 1.0, %r0
+   call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @llvm.AMDGPU.rcp(float ) readnone
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
new file mode 100644
index 000000000000..ba9620c40a49
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -0,0 +1,83 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
+  %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
+  %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
+  %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+  %4 = fcmp ult float %1, 0.000000e+00
+  %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+  %6 = fsub float -0.000000e+00, %5
+  %7 = fptosi float %6 to i32
+  %8 = bitcast i32 %7 to float
+  %9 = fcmp ult float %0, 5.700000e+01
+  %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00
+  %11 = fsub float -0.000000e+00, %10
+  %12 = fptosi float %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %8 to i32
+  %15 = bitcast float %13 to i32
+  %16 = and i32 %14, %15
+  %17 = bitcast i32 %16 to float
+  %18 = bitcast float %17 to i32
+  %19 = icmp ne i32 %18, 0
+  %20 = fcmp ult float %0, 0.000000e+00
+  %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fptosi float %22 to i32
+  %24 = bitcast i32 %23 to float
+  %25 = bitcast float %24 to i32
+  %26 = icmp ne i32 %25, 0
+  br i1 %19, label %IF, label %ELSE
+
+IF:                                               ; preds = %main_body
+  %. = select i1 %26, float 0.000000e+00, float 1.000000e+00
+  %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00
+  br label %ENDIF
+
+ELSE:                                             ; preds = %main_body
+  br i1 %26, label %ENDIF, label %ELSE17
+
+ENDIF:                                            ; preds = %ELSE17, %ELSE, %IF
+  %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+  %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ]
+  %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ]
+  %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00)
+  %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00)
+  %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00)
+  %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  %31 = insertelement <4 x float> undef, float %27, i32 0
+  %32 = insertelement <4 x float> %31, float %28, i32 1
+  %33 = insertelement <4 x float> %32, float %29, i32 2
+  %34 = insertelement <4 x float> %33, float %30, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
+  ret void
+
+ELSE17:                                           ; preds = %ELSE
+  %35 = fadd float 0.000000e+00, 0x3FC99999A0000000
+  %36 = fadd float 0.000000e+00, 0x3FC99999A0000000
+  %37 = fadd float 0.000000e+00, 0x3FC99999A0000000
+  %38 = fadd float %35, 0x3FC99999A0000000
+  %39 = fadd float %36, 0x3FC99999A0000000
+  %40 = fadd float %37, 0x3FC99999A0000000
+  %41 = fadd float %38, 0x3FC99999A0000000
+  %42 = fadd float %39, 0x3FC99999A0000000
+  %43 = fadd float %40, 0x3FC99999A0000000
+  %44 = fadd float %41, 0x3FC99999A0000000
+  %45 = fadd float %42, 0x3FC99999A0000000
+  %46 = fadd float %43, 0x3FC99999A0000000
+  %47 = fadd float %44, 0x3FC99999A0000000
+  %48 = fadd float %45, 0x3FC99999A0000000
+  %49 = fadd float %46, 0x3FC99999A0000000
+  br label %ENDIF
+}
+
+declare float @llvm.R600.interp.input(i32, i32) #0
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
new file mode 100644
index 000000000000..5e875c49ab51
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -0,0 +1,88 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(9)* null
+  %1 = extractelement <4 x float> %0, i32 3
+  %2 = fptosi float %1 to i32
+  %3 = bitcast i32 %2 to float
+  %4 = bitcast float %3 to i32
+  %5 = sdiv i32 %4, 4
+  %6 = bitcast i32 %5 to float
+  %7 = bitcast float %6 to i32
+  %8 = mul i32 %7, 4
+  %9 = bitcast i32 %8 to float
+  %10 = bitcast float %9 to i32
+  %11 = sub i32 0, %10
+  %12 = bitcast i32 %11 to float
+  %13 = bitcast float %3 to i32
+  %14 = bitcast float %12 to i32
+  %15 = add i32 %13, %14
+  %16 = bitcast i32 %15 to float
+  %17 = load <4 x float> addrspace(9)* null
+  %18 = extractelement <4 x float> %17, i32 0
+  %19 = load <4 x float> addrspace(9)* null
+  %20 = extractelement <4 x float> %19, i32 1
+  %21 = load <4 x float> addrspace(9)* null
+  %22 = extractelement <4 x float> %21, i32 2
+  br label %LOOP
+
+LOOP:                                             ; preds = %IF31, %main_body
+  %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ]
+  %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ]
+  %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ]
+  %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ]
+  %23 = bitcast float %temp12.0 to i32
+  %24 = bitcast float %6 to i32
+  %25 = icmp sge i32 %23, %24
+  %26 = sext i1 %25 to i32
+  %27 = bitcast i32 %26 to float
+  %28 = bitcast float %27 to i32
+  %29 = icmp ne i32 %28, 0
+  br i1 %29, label %IF, label %LOOP29
+
+IF:                                               ; preds = %LOOP
+  %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+  %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+  %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  %34 = insertelement <4 x float> undef, float %30, i32 0
+  %35 = insertelement <4 x float> %34, float %31, i32 1
+  %36 = insertelement <4 x float> %35, float %32, i32 2
+  %37 = insertelement <4 x float> %36, float %33, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
+  ret void
+
+LOOP29:                                           ; preds = %LOOP, %ENDIF30
+  %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ]
+  %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ]
+  %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ]
+  %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ]
+  %38 = bitcast float %temp20.0 to i32
+  %39 = bitcast float %16 to i32
+  %40 = icmp sge i32 %38, %39
+  %41 = sext i1 %40 to i32
+  %42 = bitcast i32 %41 to float
+  %43 = bitcast float %42 to i32
+  %44 = icmp ne i32 %43, 0
+  br i1 %44, label %IF31, label %ENDIF30
+
+IF31:                                             ; preds = %LOOP29
+  %45 = bitcast float %temp12.0 to i32
+  %46 = add i32 %45, 1
+  %47 = bitcast i32 %46 to float
+  br label %LOOP
+
+ENDIF30:                                          ; preds = %LOOP29
+  %48 = bitcast float %temp20.0 to i32
+  %49 = add i32 %48, 1
+  %50 = bitcast i32 %49 to float
+  br label %LOOP29
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
new file mode 100644
index 000000000000..d142cacd4335
--- /dev/null
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -0,0 +1,55 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(9)* null
+  %1 = extractelement <4 x float> %0, i32 3
+  %2 = fptosi float %1 to i32
+  %3 = bitcast i32 %2 to float
+  %4 = load <4 x float> addrspace(9)* null
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = load <4 x float> addrspace(9)* null
+  %7 = extractelement <4 x float> %6, i32 1
+  %8 = load <4 x float> addrspace(9)* null
+  %9 = extractelement <4 x float> %8, i32 2
+  br label %LOOP
+
+LOOP:                                             ; preds = %ENDIF, %main_body
+  %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ]
+  %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ]
+  %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ]
+  %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ]
+  %10 = bitcast float %temp8.0 to i32
+  %11 = bitcast float %3 to i32
+  %12 = icmp sge i32 %10, %11
+  %13 = sext i1 %12 to i32
+  %14 = bitcast i32 %13 to float
+  %15 = bitcast float %14 to i32
+  %16 = icmp ne i32 %15, 0
+  br i1 %16, label %IF, label %ENDIF
+
+IF:                                               ; preds = %LOOP
+  %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
+  %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
+  %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
+  %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
+  %21 = insertelement <4 x float> undef, float %17, i32 0
+  %22 = insertelement <4 x float> %21, float %18, i32 1
+  %23 = insertelement <4 x float> %22, float %19, i32 2
+  %24 = insertelement <4 x float> %23, float %20, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
+  ret void
+
+ENDIF:                                            ; preds = %LOOP
+  %25 = bitcast float %temp8.0 to i32
+  %26 = add i32 %25, 1
+  %27 = bitcast i32 %26 to float
+  br label %LOOP
+}
+
+declare float @llvm.AMDIL.clamp.(float, float, float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
new file mode 100644
index 000000000000..6afd6772926b
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -0,0 +1,94 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = fadd float 1.000000e+03, %1
+  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = extractelement <4 x float> %3, i32 0
+  %5 = bitcast float %4 to i32
+  %6 = icmp eq i32 %5, 0
+  %7 = sext i1 %6 to i32
+  %8 = bitcast i32 %7 to float
+  %9 = bitcast float %8 to i32
+  %10 = icmp ne i32 %9, 0
+  br i1 %10, label %IF, label %ELSE
+
+IF:                                               ; preds = %main_body
+  %11 = call float @fabs(float %2)
+  %12 = fcmp ueq float %11, 0x7FF0000000000000
+  %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00
+  %14 = fsub float -0.000000e+00, %13
+  %15 = fptosi float %14 to i32
+  %16 = bitcast i32 %15 to float
+  %17 = bitcast float %16 to i32
+  %18 = icmp ne i32 %17, 0
+  %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00
+  %19 = fcmp une float %2, %2
+  %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00
+  %21 = fsub float -0.000000e+00, %20
+  %22 = fptosi float %21 to i32
+  %23 = bitcast i32 %22 to float
+  %24 = bitcast float %23 to i32
+  %25 = icmp ne i32 %24, 0
+  %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00
+  %26 = bitcast float %. to i32
+  %27 = sitofp i32 %26 to float
+  %28 = bitcast float %temp8.0 to i32
+  %29 = sitofp i32 %28 to float
+  %30 = fcmp ugt float %2, 0.000000e+00
+  %31 = select i1 %30, float 1.000000e+00, float %2
+  %32 = fcmp uge float %31, 0.000000e+00
+  %33 = select i1 %32, float %31, float -1.000000e+00
+  %34 = fadd float %33, 1.000000e+00
+  %35 = fmul float %34, 5.000000e-01
+  br label %ENDIF
+
+ELSE:                                             ; preds = %main_body
+  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %37 = extractelement <4 x float> %36, i32 0
+  %38 = bitcast float %37 to i32
+  %39 = icmp eq i32 %38, 1
+  %40 = sext i1 %39 to i32
+  %41 = bitcast i32 %40 to float
+  %42 = bitcast float %41 to i32
+  %43 = icmp ne i32 %42, 0
+  br i1 %43, label %IF23, label %ENDIF
+
+ENDIF:                                            ; preds = %IF23, %ELSE, %IF
+  %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ]
+  %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ]
+  %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+  %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
+  %44 = insertelement <4 x float> undef, float %temp4.0, i32 0
+  %45 = insertelement <4 x float> %44, float %temp5.0, i32 1
+  %46 = insertelement <4 x float> %45, float %temp6.0, i32 2
+  %47 = insertelement <4 x float> %46, float %temp7.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
+  ret void
+
+IF23:                                             ; preds = %ELSE
+  %48 = fcmp ult float 0.000000e+00, %2
+  %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00
+  %50 = fsub float -0.000000e+00, %49
+  %51 = fptosi float %50 to i32
+  %52 = bitcast i32 %51 to float
+  %53 = bitcast float %52 to i32
+  %54 = icmp ne i32 %53, 0
+  %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
+  %55 = bitcast float %.28 to i32
+  %56 = sitofp i32 %55 to float
+  %57 = load <4 x float> addrspace(8)* null
+  %58 = extractelement <4 x float> %57, i32 0
+  %59 = fsub float -0.000000e+00, %58
+  %60 = fadd float %2, %59
+  br label %ENDIF
+}
+
+declare float @fabs(float) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readonly }
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
new file mode 100644
index 000000000000..347d92fd6a0e
--- /dev/null
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -0,0 +1,46 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = bitcast float %1 to i32
+  %3 = icmp eq i32 %2, 0
+  %4 = sext i1 %3 to i32
+  %5 = bitcast i32 %4 to float
+  %6 = bitcast float %5 to i32
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %ENDIF, label %ELSE
+
+ELSE:                                             ; preds = %main_body
+  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = extractelement <4 x float> %8, i32 0
+  %10 = bitcast float %9 to i32
+  %11 = icmp eq i32 %10, 1
+  %12 = sext i1 %11 to i32
+  %13 = bitcast i32 %12 to float
+  %14 = bitcast float %13 to i32
+  %15 = icmp ne i32 %14, 0
+  br i1 %15, label %IF13, label %ENDIF
+
+ENDIF:                                            ; preds = %IF13, %ELSE, %main_body
+  %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
+  %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
+  %16 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
+  %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
+  %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
+  ret void
+
+IF13:                                             ; preds = %ELSE
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 0
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fadd float 1.000000e+03, %22
+  br label %ENDIF
+}
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
new file mode 100644
index 000000000000..44b7c2f68002
--- /dev/null
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -0,0 +1,134 @@
+;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
+;REQUIRES: asserts
+
+define void @main() {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = fcmp ult float %0, 0.000000e+00
+  %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
+  %6 = fsub float -0.000000e+00, %5
+  %7 = fptosi float %6 to i32
+  %8 = bitcast i32 %7 to float
+  %9 = bitcast float %8 to i32
+  %10 = icmp ne i32 %9, 0
+  br i1 %10, label %LOOP, label %ENDIF
+
+ENDIF:                                            ; preds = %ENDIF16, %LOOP, %main_body
+  %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ]
+  %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
+  %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
+  %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
+  %11 = load <4 x float> addrspace(9)* null
+  %12 = extractelement <4 x float> %11, i32 0
+  %13 = fmul float %12, %0
+  %14 = load <4 x float> addrspace(9)* null
+  %15 = extractelement <4 x float> %14, i32 1
+  %16 = fmul float %15, %0
+  %17 = load <4 x float> addrspace(9)* null
+  %18 = extractelement <4 x float> %17, i32 2
+  %19 = fmul float %18, %0
+  %20 = load <4 x float> addrspace(9)* null
+  %21 = extractelement <4 x float> %20, i32 3
+  %22 = fmul float %21, %0
+  %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %24 = extractelement <4 x float> %23, i32 0
+  %25 = fmul float %24, %1
+  %26 = fadd float %25, %13
+  %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %28 = extractelement <4 x float> %27, i32 1
+  %29 = fmul float %28, %1
+  %30 = fadd float %29, %16
+  %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %32 = extractelement <4 x float> %31, i32 2
+  %33 = fmul float %32, %1
+  %34 = fadd float %33, %19
+  %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %36 = extractelement <4 x float> %35, i32 3
+  %37 = fmul float %36, %1
+  %38 = fadd float %37, %22
+  %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %40 = extractelement <4 x float> %39, i32 0
+  %41 = fmul float %40, %2
+  %42 = fadd float %41, %26
+  %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %44 = extractelement <4 x float> %43, i32 1
+  %45 = fmul float %44, %2
+  %46 = fadd float %45, %30
+  %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %48 = extractelement <4 x float> %47, i32 2
+  %49 = fmul float %48, %2
+  %50 = fadd float %49, %34
+  %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %52 = extractelement <4 x float> %51, i32 3
+  %53 = fmul float %52, %2
+  %54 = fadd float %53, %38
+  %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %56 = extractelement <4 x float> %55, i32 0
+  %57 = fmul float %56, %3
+  %58 = fadd float %57, %42
+  %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %60 = extractelement <4 x float> %59, i32 1
+  %61 = fmul float %60, %3
+  %62 = fadd float %61, %46
+  %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %64 = extractelement <4 x float> %63, i32 2
+  %65 = fmul float %64, %3
+  %66 = fadd float %65, %50
+  %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %68 = extractelement <4 x float> %67, i32 3
+  %69 = fmul float %68, %3
+  %70 = fadd float %69, %54
+  %71 = insertelement <4 x float> undef, float %58, i32 0
+  %72 = insertelement <4 x float> %71, float %62, i32 1
+  %73 = insertelement <4 x float> %72, float %66, i32 2
+  %74 = insertelement <4 x float> %73, float %70, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
+  %75 = insertelement <4 x float> undef, float %temp.0, i32 0
+  %76 = insertelement <4 x float> %75, float %temp1.0, i32 1
+  %77 = insertelement <4 x float> %76, float %temp2.0, i32 2
+  %78 = insertelement <4 x float> %77, float %temp3.0, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
+  ret void
+
+LOOP:                                             ; preds = %main_body, %ENDIF19
+  %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+  %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ]
+  %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+  %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ]
+  %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ]
+  %79 = fcmp uge float %temp4.0, %0
+  %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00
+  %81 = fsub float -0.000000e+00, %80
+  %82 = fptosi float %81 to i32
+  %83 = bitcast i32 %82 to float
+  %84 = bitcast float %83 to i32
+  %85 = icmp ne i32 %84, 0
+  br i1 %85, label %ENDIF, label %ENDIF16
+
+ENDIF16:                                          ; preds = %LOOP
+  %86 = fcmp une float %2, %temp4.0
+  %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00
+  %88 = fsub float -0.000000e+00, %87
+  %89 = fptosi float %88 to i32
+  %90 = bitcast i32 %89 to float
+  %91 = bitcast float %90 to i32
+  %92 = icmp ne i32 %91, 0
+  br i1 %92, label %ENDIF, label %ENDIF19
+
+ENDIF19:                                          ; preds = %ENDIF16
+  %93 = fadd float %temp.1, 1.000000e+00
+  %94 = fadd float %temp1.1, 0.000000e+00
+  %95 = fadd float %temp2.1, 0.000000e+00
+  %96 = fadd float %temp3.1, 0.000000e+00
+  %97 = fadd float %temp4.0, 1.000000e+00
+  br label %LOOP
+}
+
+declare float @llvm.R600.load.input(i32) #0
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
new file mode 100644
index 000000000000..3556facfbab3
--- /dev/null
+++ b/test/CodeGen/R600/sdiv.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; The code generated by sdiv is long and complex and may frequently change.
+; The goal of this test is to make sure the ISel doesn't fail.
+;
+; This program was previously failing to compile when one of the selectcc
+; opcodes generated by the sdiv lowering was being legalized and optimized to:
+; selectcc Remainder -1, 0, -1, SETGT
+; This was fixed by adding an additional pattern in R600Instructions.td to
+; match this pattern with a CNDGE_INT.
+
+; CHECK: RETURN
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in
+  %den = load i32 addrspace(1) * %den_ptr
+  %result = sdiv i32 %num, %den
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
new file mode 100644
index 000000000000..359ca1e6f8ce
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Note additional optimizations may cause this SGT to be replaced with a
+; CND* instruction.
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
+; Test a selectcc with i32 LHS/RHS and float True/False
+
+define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = icmp sge i32 %0, 0
+  %2 = select i1 %1, float 1.0, float 0.0
+  store float %2, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
new file mode 100644
index 000000000000..02d935390423
--- /dev/null
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @test_a
+; CHECK-NOT: CND
+; CHECK: SET{{[NEQGTL]+}}_DX10
+
+define void @test_a(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 0.000000e+00
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  %4 = bitcast i32 %3 to float
+  %5 = bitcast float %4 to i32
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %IF, label %ENDIF
+
+IF:
+  %7 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %7
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Same as test_a, but the branch labels are swapped to produce the inverse cc
+; for the icmp instruction
+
+; CHECK: @test_b
+; CHECK: SET{{[GTEQN]+}}_DX10
+; CHECK-NEXT: PRED_
+define void @test_b(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 0.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  %4 = bitcast i32 %3 to float
+  %5 = bitcast float %4 to i32
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %ENDIF, label %IF
+
+IF:
+  %7 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 0, i32 addrspace(1)* %7
+  br label %ENDIF
+
+ENDIF:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; Test a CND*_INT instruction with float true/false values
+; CHECK: @test_c
+; CHECK: CND{{[GTE]+}}_INT
+define void @test_c(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sgt i32 %in, 0
+  %1 = select i1 %0, float 2.0, float 3.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll
new file mode 100644
index 000000000000..f0a0f512ba15
--- /dev/null
+++ b/test/CodeGen/R600/selectcc_cnde.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: SETE
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
+define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %1 = load float addrspace(1)* %in
+  %2 = fcmp oeq float %1, 0.0
+  %3 = select i1 %2, float 1.0, float 2.0
+  store float %3, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll
new file mode 100644
index 000000000000..b38078e26db6
--- /dev/null
+++ b/test/CodeGen/R600/selectcc_cnde_int.ll
@@ -0,0 +1,11 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK-NOT: SETE_INT
+;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %1 = load i32 addrspace(1)* %in
+  %2 = icmp eq i32 %1, 0
+  %3 = select i1 %2, i32 1, i32 2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
new file mode 100644
index 000000000000..54febcf0e68e
--- /dev/null
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests check that floating point comparisons which are used by select
+; to store integer true (-1) and false (0) values are lowered to one of the
+; SET*DX10 instructions.
+
+; CHECK: @fcmp_une_select_fptosi
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp une float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_une_select_i32
+; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp une float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ueq float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ueq_select_i32
+; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ueq float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ugt float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ugt_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ugt float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_uge_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp uge float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_uge_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp uge float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ule_select_fptosi
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ule_select_i32
+; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ult_select_fptosi
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
+  %2 = fsub float -0.000000e+00, %1
+  %3 = fptosi float %2 to i32
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fcmp_ult_select_i32
+; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.v4i32.ll
new file mode 100644
index 000000000000..0752f2e63dbf
--- /dev/null
+++ b/test/CodeGen/R600/setcc.v4i32.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = icmp eq <4 x i32> %a, %b
+  %sext = sext <4 x i1> %result to <4 x i32>
+  store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
new file mode 100644
index 000000000000..5ab4b87d570c
--- /dev/null
+++ b/test/CodeGen/R600/seto.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+  %c = fcmp oeq float %p, %p
+  %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
new file mode 100644
index 000000000000..320835576d41
--- /dev/null
+++ b/test/CodeGen/R600/setuo.ll
@@ -0,0 +1,13 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
+
+define void @main(float %p) {
+main_body:
+  %c = fcmp une float %p, %p
+  %r = select i1 %c, float 1.000000e+00, float 0.000000e+00
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll
new file mode 100644
index 000000000000..b69e327bf6df
--- /dev/null
+++ b/test/CodeGen/R600/short-args.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @i8_arg
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK: @i8_zext_arg
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+entry:
+  %0 = zext i8 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK: @i16_arg
+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK: @i16_zext_arg
+; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+entry:
+  %0 = zext i16 %in to i32
+  store i32 %0, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll
new file mode 100644
index 000000000000..8b0d24445971
--- /dev/null
+++ b/test/CodeGen/R600/store.v4f32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %1 = load <4 x float> addrspace(1) * %in
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll
new file mode 100644
index 000000000000..a659815ddeba
--- /dev/null
+++ b/test/CodeGen/R600/store.v4i32.ll
@@ -0,0 +1,9 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %1 = load <4 x i32> addrspace(1) * %in
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.v4i32.ll
new file mode 100644
index 000000000000..47657a6be75e
--- /dev/null
+++ b/test/CodeGen/R600/udiv.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;The code generated by udiv is long and complex and may frequently change.
+;The goal of this test is to make sure the ISel doesn't fail when it gets
+;a v4i32 udiv
+;CHECK: RETURN
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = udiv <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
new file mode 100644
index 000000000000..b48c59151831
--- /dev/null
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; These tests are for condition codes that are not supported by the hardware
+
+; CHECK: @slt
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @slt(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp slt i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ult_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp ult i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ult_float
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ult_float(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @olt(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp olt float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @sle
+; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @sle(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp sle i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ule_i32
+; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = icmp ule i32 %in, 5
+  %1 = select i1 %0, i32 -1, i32 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ule_float
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ule_float(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @ole
+; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+define void @ole(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ole float %in, 5.0
+  %1 = select i1 %0, float 1.0, float 0.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.v4i32.ll
new file mode 100644
index 000000000000..2e7388caa6ce
--- /dev/null
+++ b/test/CodeGen/R600/urem.v4i32.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;The code generated by urem is long and complex and may frequently change.
+;The goal of this test is to make sure the ISel doesn't fail when it gets
+;a v4i32 urem
+;CHECK: RETURN
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = urem <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll
new file mode 100644
index 000000000000..8f62bc692908
--- /dev/null
+++ b/test/CodeGen/R600/vec4-expand.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_sint
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptosi <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @fp_to_uint
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptoui <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @sint_to_fp
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = sitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: @uint_to_fp
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = uitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/SI/sanity.ll b/test/CodeGen/SI/sanity.ll
new file mode 100644
index 000000000000..62cdcf5eca28
--- /dev/null
+++ b/test/CodeGen/SI/sanity.ll
@@ -0,0 +1,37 @@
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+; CHECK: S_ENDPGM
+
+define void @main() {
+main_body:
+  call void @llvm.AMDGPU.shader.type(i32 1)
+  %0 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
+  %1 = getelementptr <4 x i32> addrspace(2)* %0, i32 0
+  %2 = load <4 x i32> addrspace(2)* %1
+  %3 = call i32 @llvm.SI.vs.load.buffer.index()
+  %4 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %2, i32 0, i32 %3)
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = extractelement <4 x float> %4, i32 3
+  %9 = load <4 x i32> addrspace(2)* addrspace(8)* inttoptr (i32 6 to <4 x i32> addrspace(2)* addrspace(8)*)
+  %10 = getelementptr <4 x i32> addrspace(2)* %9, i32 1
+  %11 = load <4 x i32> addrspace(2)* %10
+  %12 = call i32 @llvm.SI.vs.load.buffer.index()
+  %13 = call <4 x float> @llvm.SI.vs.load.input(<4 x i32> %11, i32 0, i32 %12)
+  %14 = extractelement <4 x float> %13, i32 0
+  %15 = extractelement <4 x float> %13, i32 1
+  %16 = extractelement <4 x float> %13, i32 2
+  %17 = extractelement <4 x float> %13, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %14, float %15, float %16, float %17)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %5, float %6, float %7, float %8)
+  ret void
+}
+
+declare void @llvm.AMDGPU.shader.type(i32)
+
+declare i32 @llvm.SI.vs.load.buffer.index() readnone
+
+declare <4 x float> @llvm.SI.vs.load.input(<4 x i32>, i32, i32)
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 3b644986f2e4..000000000000
--- a/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
new file mode 100644
index 000000000000..0d4e191c9509
--- /dev/null
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+
+; CHECK: ret2:
+; CHECK: or %g0, %i1, %i0
+define i64 @ret2(i64 %a, i64 %b) {
+  ret i64 %b
+}
+
+; CHECK: shl_imm
+; CHECK: sllx %i0, 7, %i0
+define i64 @shl_imm(i64 %a) {
+  %x = shl i64 %a, 7
+  ret i64 %x
+}
+
+; CHECK: sra_reg
+; CHECK: srax %i0, %i1, %i0
+define i64 @sra_reg(i64 %a, i64 %b) {
+  %x = ashr i64 %a, %b
+  ret i64 %x
+}
+
+; Immediate materialization. Many of these patterns could actually be merged
+; into the restore instruction:
+;
+;     restore %g0, %g0, %o0
+;
+; CHECK: ret_imm0
+; CHECK: or %g0, %g0, %i0
+define i64 @ret_imm0() {
+  ret i64 0
+}
+
+; CHECK: ret_simm13
+; CHECK: or %g0, -4096, %i0
+define i64 @ret_simm13() {
+  ret i64 -4096
+}
+
+; CHECK: ret_sethi
+; CHECK: sethi 4, %i0
+; CHECK-NOT: or
+; CHECK: restore
+define i64 @ret_sethi() {
+  ret i64 4096
+}
+
+; CHECK: ret_sethi
+; CHECK: sethi 4, [[R:%[goli][0-7]]]
+; CHECK: or [[R]], 1, %i0
+define i64 @ret_sethi_or() {
+  ret i64 4097
+}
+
+; CHECK: ret_nimm33
+; CHECK: sethi 4, [[R:%[goli][0-7]]]
+; CHECK: xor [[R]], -4, %i0
+define i64 @ret_nimm33() {
+  ret i64 -4100
+}
+
+; CHECK: ret_bigimm
+; CHECK: sethi
+; CHECK: sethi
+define i64 @ret_bigimm() {
+  ret i64 6800754272627607872
+}
+
+; CHECK: reg_reg_alu
+; CHECK: add %i0, %i1, [[R0:%[goli][0-7]]]
+; CHECK: sub [[R0]], %i2, [[R1:%[goli][0-7]]]
+; CHECK: andn [[R1]], %i0, %i0
+define i64 @reg_reg_alu(i64 %x, i64 %y, i64 %z) {
+  %a = add i64 %x, %y
+  %b = sub i64 %a, %z
+  %c = xor i64 %x, -1
+  %d = and i64 %b, %c
+  ret i64 %d
+}
+
+; CHECK: reg_imm_alu
+; CHECK: add %i0, -5, [[R0:%[goli][0-7]]]
+; CHECK: xor [[R0]], 2, %i0
+define i64 @reg_imm_alu(i64 %x, i64 %y, i64 %z) {
+  %a = add i64 %x, -5
+  %b = xor i64 %a, 2
+  ret i64 %b
+}
+
+; CHECK: loads
+; CHECK: ldx [%i0]
+; CHECK: stx %
+; CHECK: ld [%i1]
+; CHECK: st %
+; CHECK: ldsw [%i2]
+; CHECK: stx %
+; CHECK: ldsh [%i3]
+; CHECK: sth %
+define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
+  %a = load i64* %p
+  %ai = add i64 1, %a
+  store i64 %ai, i64* %p
+  %b = load i32* %q
+  %b2 = zext i32 %b to i64
+  %bi = trunc i64 %ai to i32
+  store i32 %bi, i32* %q
+  %c = load i32* %r
+  %c2 = sext i32 %c to i64
+  store i64 %ai, i64* %p
+  %d = load i16* %s
+  %d2 = sext i16 %d to i64
+  %di = trunc i64 %ai to i16
+  store i16 %di, i16* %s
+
+  %x1 = add i64 %a, %b2
+  %x2 = add i64 %c2, %d2
+  %x3 = add i64 %x1, %x2
+  ret i64 %x3
+}
+
+; CHECK: stores
+; CHECK: ldx [%i0+8], [[R:%[goli][0-7]]]
+; CHECK: stx [[R]], [%i0+16]
+; CHECK: st [[R]], [%i1+-8]
+; CHECK: sth [[R]], [%i2+40]
+; CHECK: stb [[R]], [%i3+-20]
+define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
+  %p1 = getelementptr i64* %p, i64 1
+  %p2 = getelementptr i64* %p, i64 2
+  %pv = load i64* %p1
+  store i64 %pv, i64* %p2
+
+  %q2 = getelementptr i32* %q, i32 -2
+  %qv = trunc i64 %pv to i32
+  store i32 %qv, i32* %q2
+
+  %r2 = getelementptr i16* %r, i16 20
+  %rv = trunc i64 %pv to i16
+  store i16 %rv, i16* %r2
+
+  %s2 = getelementptr i8* %s, i8 -20
+  %sv = trunc i64 %pv to i8
+  store i8 %sv, i8* %s2
+
+  ret void
+}
diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll
new file mode 100644
index 000000000000..6e66a262a4f2
--- /dev/null
+++ b/test/CodeGen/SPARC/64cond.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=sparcv9 | FileCheck %s
+; Testing 64-bit conditionals.
+
+; CHECK: cmpri
+; CHECK: subcc %i1, 1
+; CHECK: bpe %xcc,
+define void @cmpri(i64* %p, i64 %x) {
+entry:
+  %tobool = icmp eq i64 %x, 1
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i64 %x, i64* %p, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: cmprr
+; CHECK: subcc %i1, %i2
+; CHECK: bpgu %xcc,
+define void @cmprr(i64* %p, i64 %x, i64 %y) {
+entry:
+  %tobool = icmp ugt i64 %x, %y
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  store i64 %x, i64* %p, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: selecti32_xcc
+; CHECK: subcc %i0, %i1
+; CHECK: movg %xcc, %i2, %i3
+; CHECK: or %g0, %i3, %i0
+define i32 @selecti32_xcc(i64 %x, i64 %y, i32 %a, i32 %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i32 %a, i32 %b
+  ret i32 %rv
+}
+
+; CHECK: selecti64_xcc
+; CHECK: subcc %i0, %i1
+; CHECK: movg %xcc, %i2, %i3
+; CHECK: or %g0, %i3, %i0
+define i64 @selecti64_xcc(i64 %x, i64 %y, i64 %a, i64 %b) {
+entry:
+  %tobool = icmp sgt i64 %x, %y
+  %rv = select i1 %tobool, i64 %a, i64 %b
+  ret i64 %rv
+}
diff --git a/test/CodeGen/SPARC/DbgValueOtherTargets.test b/test/CodeGen/SPARC/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..a669bf848d65
--- /dev/null
+++ b/test/CodeGen/SPARC/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=sparc -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/SPARC/ctpop.ll b/test/CodeGen/SPARC/ctpop.ll
index e56f4947b52a..916a41496e2a 100644
--- a/test/CodeGen/SPARC/ctpop.ll
+++ b/test/CodeGen/SPARC/ctpop.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=sparc -mattr=-v9 | not grep popc
-; RUN: llc < %s -march=sparcv9 -mattr=v9 | grep popc
+; RUN: llc < %s -march=sparc -mattr=+v9 | grep popc
 
 declare i32 @llvm.ctpop.i32(i32)
 
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
index 786fee9e6610..6f30a8797967 100644
--- a/test/CodeGen/SPARC/lit.local.cfg
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'Sparc' in targets:
diff --git a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index b9039774d42e..000000000000
--- a/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 9f5a677ed356..d6b649569173 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -47,8 +47,8 @@ declare double @sqrt(double) nounwind readonly
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 46, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!1 = metadata !{i32 524299, metadata !4, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !4, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
 !5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
@@ -61,7 +61,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
 !13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !14 = metadata !{metadata !15}
-!15 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
+!15 = metadata !{i32 524321, i64 0, i64 3}        ; [ DW_TAG_subrange_type ]
 !16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !18 = metadata !{null, metadata !19, metadata !20}
@@ -140,8 +140,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
 !92 = metadata !{i32 48, i32 0, metadata !1, null}
 !93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
-!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
-!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!94 = metadata !{i32 524299, metadata !4, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !4, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
 !96 = metadata !{i32 51, i32 0, metadata !1, null}
 !97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
 !98 = metadata !{i32 52, i32 0, metadata !1, null}
diff --git a/test/CodeGen/Thumb/DbgValueOtherTargets.test b/test/CodeGen/Thumb/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..afb18a43be47
--- /dev/null
+++ b/test/CodeGen/Thumb/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index 2e77660c45c1..76224bc5348c 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,22 +1,20 @@
-; RUN: llc < %s -march=thumb -stats 2>&1 | \
-; RUN:   grep "4 .*Number of machine instrs printed"
-
-;; Integer absolute value, should produce something as good as:
-;; Thumb:
-;;   movs r0, r0
-;;   bpl
-;;   rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
-;;   bx lr
+; RUN: llc < %s -mtriple=thumb-unknown-unknown -filetype=obj -o %t.o
+; RUN: llvm-objdump -disassemble -arch=thumb %t.o | FileCheck %s
 
 define i32 @test(i32 %a) {
         %tmp1neg = sub i32 0, %a
         %b = icmp sgt i32 %a, -1
         %abs = select i1 %b, i32 %a, i32 %tmp1neg
         ret i32 %abs
-; CHECK:  movs r0, r0
-; CHECK:  bpl
-; CHECK:  rsb r0, r0, #0
-; CHECK:  bx lr
-}
 
+; This test just checks that 4 instructions were emitted
+
+; CHECK:      {{text}}
+; CHECK:      0:
+; CHECK-NEXT: 2:
+; CHECK-NEXT: 4:
+; CHECK-NEXT: 6:
+
+; CHECK-NOT: 8:
+}
 
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
index cb77b09ef4ad..4d75f581a1d2 100644
--- a/test/CodeGen/Thumb/lit.local.cfg
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
new file mode 100644
index 000000000000..3f6407a0a3c0
--- /dev/null
+++ b/test/CodeGen/Thumb/stack-coloring-without-frame-ptr.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=thumb -mcpu=arm1022e
+
+%iterator = type { i8**, i8**, i8**, i8*** }
+%insert_iterator = type { %deque*, %iterator }
+%deque = type { %iterator, %iterator, i8***, i32 }
+
+define i32 @test_thumbv5e_fp_elim() nounwind optsize {
+entry:
+  %var1 = alloca %iterator, align 4
+  %var2 = alloca %insert_iterator, align 4
+  %var3 = alloca %deque, align 4
+
+  %0 = bitcast %deque* %var3 to i8*
+  %1 = bitcast %iterator* %var1 to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %1) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %0, i32 16, i32 4, i1 false)
+  call void @llvm.lifetime.end(i64 16, i8* %1) nounwind
+
+  %2 = bitcast %insert_iterator* %var2 to i8*
+  call void @llvm.lifetime.start(i64 20, i8* %2) nounwind
+
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
new file mode 100644
index 000000000000..502b138f65c8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios6.0.0 -verify-machineinstrs
+
+; Check to make sure the tail-call return at the end doesn't use a
+; callee-saved register. Register hinting from t2LDRDri was getting this
+; wrong. The intervening call will force allocation to try a high register
+; first, so the hint will attempt to fire, but must be rejected due to
+; not being in the allocation order for the tcGPR register class.
+; The machine instruction verifier will make sure that all actually worked
+; out the way it's supposed to.
+
+%"myclass" = type { %struct.foo }
+%struct.foo = type { i32, [40 x i8] }
+
+define hidden void @func(i8* %Data) nounwind ssp {
+  %1 = getelementptr inbounds i8* %Data, i32 12
+  %2 = bitcast i8* %1 to %"myclass"*
+  tail call void @abc(%"myclass"* %2) nounwind
+  tail call void @def(%"myclass"* %2) nounwind
+  %3 = getelementptr inbounds i8* %Data, i32 8
+  %4 = bitcast i8* %3 to i8**
+  %5 = load i8** %4, align 4, !tbaa !0
+  tail call void @ghi(i8* %5) nounwind
+  %6 = bitcast i8* %Data to void (i8*)**
+  %7 = load void (i8*)** %6, align 4, !tbaa !0
+  %8 = getelementptr inbounds i8* %Data, i32 4
+  %9 = bitcast i8* %8 to i8**
+  %10 = load i8** %9, align 4, !tbaa !0
+  %11 = icmp eq i8* %Data, null
+  br i1 %11, label %14, label %12
+
+; <label>:12                                      ; preds = %0
+  %13 = tail call %"myclass"* @jkl(%"myclass"* %2) nounwind
+  tail call void @mno(i8* %Data) nounwind
+  br label %14
+
+; <label>:14                                      ; preds = %12, %0
+  tail call void %7(i8* %10) nounwind
+  ret void
+}
+
+declare void @mno(i8*)
+
+declare void @def(%"myclass"*)
+
+declare void @abc(%"myclass"*)
+
+declare void @ghi(i8*)
+
+declare %"myclass"* @jkl(%"myclass"*) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
new file mode 100644
index 000000000000..937ecc0d6679
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
+; CHECK:  vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
+; CHECK:  add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
+; CHECK:  vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
+; CHECK:  vst1.32 {[[DREG]], [[DREG2]]}, [r0]
+  %val = extractelement <4 x i32> %phitmp, i32 %lane
+  %r1 = insertelement <4 x i32> undef, i32 %val, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3
+  store <4 x i32> %r3, <4 x i32>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
new file mode 100644
index 000000000000..203815fadc9c
--- /dev/null
+++ b/test/CodeGen/Thumb2/2013-03-06-vector-sext-operand-scalarize.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; Testing that these don't crash/assert. The loop vectorizer can end up
+; with odd constructs like this. The code actually generated is incidental.
+define <1 x i64> @test_zext(i32 %a) nounwind {
+; CHECK: test_zext:
+  %Cmp = icmp uge i32 %a, 42
+  %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+  %Se = zext <1 x i1> %vec to <1 x i64>
+  ret <1 x i64> %Se
+}
+
+define <1 x i64> @test_sext(i32 %a) nounwind {
+; CHECK: test_sext:
+  %Cmp = icmp uge i32 %a, 42
+  %vec = insertelement <1 x i1> zeroinitializer, i1 %Cmp, i32 0
+  %Se = sext <1 x i1> %vec to <1 x i64>
+  ret <1 x i64> %Se
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index c98ca8098583..3a2803f91f16 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -26,8 +26,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128]
 ; Stack pointer adjustment for the stack frame contents.
 ; This could legally happen before the spills.
 ; Since the spill slot is only 8 bytes, technically it would be fine to only
@@ -36,8 +36,8 @@ entry:
 ; NEON: sub sp, #16
 ; The epilog is free to use another scratch register than r4.
 ; NEON: add r[[R4:[0-9]+]], sp, #16
-; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
-; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128]
 ; The stack pointer restore must happen after the reloads.
 ; NEON: mov sp,
 ; NEON: pop
@@ -57,8 +57,8 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
-; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
+; NEON: vst1.64 {d12, d13}, [r4:128]
 ; NEON: vstr d14, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9, d10, d11},
@@ -84,7 +84,7 @@ entry:
 ; NEON: bic r4, r4, #15
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
-; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vst1.64 {d8, d9}, [r4:128]
 ; NEON: vstr d10, [r4, #16]
 ; Epilog
 ; NEON: vld1.64 {d8, d9},
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index b7df2fbf546c..f6cea72caecd 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -7,7 +7,7 @@ define float @foo(float %a, float %b) {
 entry:
 ; CHECK: foo
 ; CORTEXM3: blx ___mulsf3
-; CORTEXM4: vmul.f32  s0, s2, s0
+; CORTEXM4: vmul.f32  s
 ; CORTEXA8: vmul.f32  d
   %0 = fmul float %a, %b
   ret float %0
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index cb4d08058f41..6ce0b82b94d7 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs -O0
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -76,3 +77,11 @@ entry:
   store i32 %num, i32* %p2, align 4
   ret void
 }
+
+; Check RAFast handling of inline assembly with many dense clobbers.
+; The large tuple aliases of the vector registers can cause problems.
+define void @rdar13249625(double* nocapture %p) nounwind {
+  %1 = tail call double asm sideeffect "@ $0", "=w,~{d0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  store double %1, double* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index 2178eecb43e4..bce847471beb 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
-; RUN:   grep "ldr.*\[.*\]," | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
@@ -9,4 +8,5 @@ define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
 }
+; CHECK: 	ldr	r{{.*}},	[{{.*}}],
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index ac059bdaf05d..a8134e630821 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -15,7 +15,7 @@ entry:
 ; CHECK: t1:
 ; CHECK: mla     r0, r2, r0, r1
 ; CHECK: add.w   r0, r0, r0, lsl #3
-; CHECL: add.w   r0, r3, r0, lsl #2
+; CHECK: add.w   r0, r3, r0, lsl #2
   %mul = mul i32 %n, %i
   %add = add i32 %mul, %j
   %0 = ptrtoint %struct.CMPoint* %thePoints to i32
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index 98854a1205f8..05dd90cfbfed 100644
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+
+; rdar://12892707
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsl
-; CHECK: add.w  r0, r0, r1, lsl #16
+; A8: t2ADDrs_lsl
+; A8: add.w  r0, r0, r1, lsl #16
         %A = shl i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsr
-; CHECK: add.w  r0, r0, r1, lsr #16
+; A8: t2ADDrs_lsr
+; A8: add.w  r0, r0, r1, lsr #16
         %A = lshr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
 }
 
 define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_asr
-; CHECK: add.w  r0, r0, r1, asr #16
+; A8: t2ADDrs_asr
+; A8: add.w  r0, r0, r1, asr #16
         %A = ashr i32 %Y, 16
         %B = add i32 %X, %A
         ret i32 %B
@@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
 
 ; i32 ror(n) = (x >> n) | (x << (32 - n))
 define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_ror
-; CHECK: add.w  r0, r0, r1, ror #16
+; A8: t2ADDrs_ror
+; A8: add.w  r0, r0, r1, ror #16
         %A = lshr i32 %Y, 16
         %B = shl  i32 %Y, 16
         %C = or   i32 %B, %A
@@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
 }
 
 define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
-; CHECK: t2ADDrs_noRegShift
-; CHECK: uxtb r2, r2
-; CHECK: lsls r1, r2
-; CHECK: add  r0, r1
+; A8: t2ADDrs_noRegShift
+; A8: uxtb r2, r2
+; A8: lsls r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift
+; SWIFT-NOT: lsls
+; SWIFT: lsl.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift2
+; A8: uxtb r2, r2
+; A8: lsrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift2
+; SWIFT-NOT: lsrs
+; SWIFT: lsr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift3
+; A8: uxtb r2, r2
+; A8: asrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift3
+; SWIFT-NOT: asrs
+; SWIFT: asr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize {
+; SWIFT: t2ADDrs_optsize
+; SWIFT-NOT: lsl.w
+; SWIFT: lsls
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
         %B = add i32 %X, %A
         ret i32 %B
 }
 
+define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize {
+; SWIFT: t2ADDrs_minsize
+; SWIFT-NOT: lsr.w
+; SWIFT: lsrs
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d9a0617f5a46..5bff268e2c3e 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
 ; CHECK: bic r4, r4, #15
-; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
-; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vst1.64 {{.*}}[{{.*}}:128]
+; CHECK: vld1.64 {{.*}}[{{.*}}:128]
 entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 0af2445d7fba..2e4cb1fe7eda 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; The old instruction selector used to load all arguments to a call up in 
 ; registers, then start pushing them all onto the stack.  This is bad news as
 ; it makes a ton of annoying overlapping live ranges.  This code should not
diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
index 1a3d74918d1a..7673124d5dda 100644
--- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
+++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86  -stats 2>&1 | \
 ; RUN:   grep asm-printer | grep 7
 
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index 5cba3efeefb8..faa3e21a934d 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \
 ; RUN:   not grep "Number of register spills"
 ; END.
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 1c75f93915a7..0afddd8f876f 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
 ; RUN:   grep asm-printer | grep 14
 ;
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 95eefa1e7196..222b7a0b41fd 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -stats  2>&1 | \
 ; RUN:   grep asm-printer | grep 13
 
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 37c510786a5e..6912351d7b7e 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \
 ; RUN:     grep "asm-printer" | grep 35
 
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index a1b973d7ccfa..363a6008a00d 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
 ; PR1909
 
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index 19a73543c65e..fc38135032c2 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
 
 define void @a(<4 x float>* %x) nounwind  {
 entry:
@@ -8,4 +8,10 @@ entry:
         ret void
 }
 
+; CHECK: a:
+; CHECK: movups
+; CHECK: movups
+; CHECK-NOT: movups
+; CHECK: ret
+
 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index d423bfc389df..496779c468f4 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -1,10 +1,15 @@
 ; Check that eh_return & unwind_init were properly lowered
-; RUN: llc < %s | grep %rbp | count 7
-; RUN: llc < %s | grep %rcx | count 3
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
+; CHECK: test
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: popq %rbp
+; CHECK: movq %rcx, %rsp
+; CHECK: ret # eh_return, addr: %rcx
 define i8* @test(i64 %a, i8* %b)  {
 entry:
   call void @llvm.eh.unwind.init()
@@ -15,3 +20,36 @@ entry:
 
 declare void @llvm.eh.return.i64(i64, i8*)
 declare void @llvm.eh.unwind.init()
+
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+
+; PR14750
+; This function contains a normal return as well as eh_return.
+; CHECK: _Unwind_Resume_or_Rethrow
+define i32 @_Unwind_Resume_or_Rethrow() nounwind uwtable ssp {
+entry:
+  %0 = load i32* @b, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  %call = tail call i32 (...)* @_Unwind_ForcedUnwind_Phase2() nounwind
+  store i32 %call, i32* @a, align 4
+  %tobool1 = icmp eq i32 %call, 0
+  br i1 %tobool1, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  tail call void @abort() noreturn nounwind
+  unreachable
+
+cond.end:                                         ; preds = %if.end
+  tail call void @llvm.eh.return.i64(i64 0, i8* null)
+  unreachable
+}
+
+declare i32 @_Unwind_ForcedUnwind_Phase2(...)
+declare void @abort() noreturn
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index b2cf34cd2033..0310a5dcb565 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s
 ; Now this test spills one register. But a reload in the loop is cheaper than
 ; the divsd so it's a win.
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
deleted file mode 100644
index a57f7166cadc..000000000000
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; Linux doesn't support stack realignment for functions with allocas (PR2888).
-; Until it does, we shouldn't use movaps to access the stack.  On targets with
-; sufficiently aligned stack (e.g. darwin) we should.
-; PR8969 - make 32-bit linux have a 16-byte aligned stack
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
-
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-  
-define void @foo(i32 %t) nounwind {
-  %tmp1210 = alloca i8, i32 32, align 4
-  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
-  %x = alloca i8, i32 %t
-  call void @dummy(i8* %x)
-  ret void
-}
-
-declare void @dummy(i8*)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 0dca14d064eb..890fd0f067cf 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -78,7 +78,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 458785, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!12 = metadata !{i32 458785, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !13 = metadata !{i32 3, i32 0, metadata !14, null}
 !14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 4, i32 0, metadata !14, null}
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 9cbf35094061..9ea34e27a17e 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
 ; rdar://6608609
 
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index d50fe6f73a00..68a9fafb6de8 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index d934ec9a88f8..351a1722a231 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
 ; RUN: not grep spill %t
 ; RUN: not grep "%rsp" %t
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index ad18a0c5b94d..0607eda271af 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index 94075e78a28a..c2d9d84d4c5a 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -6,15 +6,16 @@
 define void @t(i32 %count) ssp nounwind {
 entry:
 ; CHECK: t:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movups L_str(%rip), %xmm0
+; CHECK: movups L_str+12(%rip), %xmm0
+; CHECK: movups L_str(%rip), %xmm1
   %tmp0 = alloca [60 x i8], align 1
   %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
   br label %bb1
 
 bb1:
 ; CHECK: LBB0_1:
-; CHECK: movaps %xmm0, (%rsp)
+; CHECK: movups %xmm0, 12(%rsp)
+; CHECK: movaps %xmm1, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
   %tmp3 = add i32 %tmp2, 1
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index 85ee091c3478..7dba332b1bec 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -28,21 +28,25 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
index eb4a5c04a2ae..ec24e73c34ac 100644
--- a/test/CodeGen/X86/2010-01-19-OptExtBug.ll
+++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt
 
 define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
diff --git a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 42f19b3ad86a..000000000000
--- a/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
-; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 2fceab6f091f..8ab93fcb978f 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -198,27 +198,27 @@ declare float @copysignf(float, float) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.lv = !{!0, !11, !12, !13, !14, !16, !17, !18}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 524310, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524310, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 524324, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 524545, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 524545, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 524544, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 524299, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 524544, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
-!17 = metadata !{i32 524544, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 524544, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"complex float", i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SFtype", i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 786689, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786688, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !2, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786688, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 786688, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !19 = metadata !{i32 1929, i32 0, metadata !15, null}
 !20 = metadata !{i32 1931, i32 0, metadata !15, null}
 !21 = metadata !{i32 1932, i32 0, metadata !15, null}
@@ -243,3 +243,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !40 = metadata !{i32 1964, i32 0, metadata !15, null}
 !41 = metadata !{i32 1965, i32 0, metadata !15, null}
 !42 = metadata !{i32 1969, i32 0, metadata !15, null}
+!43 = metadata !{metadata !0, metadata !11, metadata !12, metadata !13, metadata !14, metadata !16, metadata !17, metadata !18}
+!44 = metadata !{metadata !1}
+!45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
+!46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 7909d2736b9c..6519ca063a7c 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -21,40 +21,45 @@ declare void @foo(i32) nounwind optsize noinline ssp
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.gv = !{!0}
-!llvm.dbg.lv = !{!4, !8, !18, !25, !26}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!4 = metadata !{i32 524545, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3} ; [ DW_TAG_arg_variable ]
-!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{null, metadata !3}
-!8 = metadata !{i32 524545, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786451, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
 !15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 524301, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
-!17 = metadata !{i32 524301, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
-!18 = metadata !{i32 524545, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786445, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
+!17 = metadata !{i32 786445, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !21 = metadata !{metadata !3, metadata !3, metadata !22}
-!22 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
-!24 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 524545, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 524544, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 524299, metadata !19, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786689, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22, i32 0, null} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786688, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14, i32 0, null} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 786443, metadata !36, metadata !19, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !28 = metadata !{i32 18, i32 0, metadata !29, null}
-!29 = metadata !{i32 524299, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 786443, metadata !36, metadata !9, i32 17, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 19, i32 0, metadata !29, null}
+!31 = metadata !{metadata !0}
+!32 = metadata !{metadata !5, metadata !9, metadata !19}
+!33 = metadata !{metadata !4}
+!34 = metadata !{metadata !8}
+!35 = metadata !{metadata !18, metadata !25, metadata !26}
+!36 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
 
 ; The variable bar:myvar changes registers after the first movq.
 ; It is cobbered by popq %rbx
@@ -79,4 +84,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; CHECK-NEXT: .short  Lset{{.*}}
 ; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
-; CHECK-NEXT: Ltmp{{.*}}:
-\ No newline at end of file
+; CHECK-NEXT: Ltmp{{.*}}:
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 1a0da3177a22..4ea3bf077841 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -22,23 +22,27 @@ entry:
   ret i32 %1, !dbg !13
 }
 
-!llvm.dbg.lv = !{!0, !7}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"f.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"f.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
 !9 = metadata !{i32 3, i32 0, metadata !10, null}
-!10 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 1}
 !12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
 !13 = metadata !{i32 7, i32 0, metadata !14, null}
-!14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !2, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !0}
+!16 = metadata !{metadata !7}
+!17 = metadata !{metadata !1, metadata !8}
+!18 = metadata !{metadata !"f.c", metadata !"/tmp"}
 
 ;CHECK: DEBUG_VALUE: bar:x <- E
 ;CHECK: Ltmp
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index a9c03ee563d8..b764b0b34597 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -21,34 +21,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524307, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
-!3 = metadata !{i32 524329, metadata !"foo.cp", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786451, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{metadata !6, metadata !1, metadata !8}
-!6 = metadata !{i32 524301, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
-!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{i32 786445, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
+!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !10 = metadata !{metadata !7, metadata !11, metadata !7}
-!11 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 524326, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 524545, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 524545, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 786689, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 786478, metadata !3, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !20 = metadata !{metadata !7, metadata !7, metadata !21}
-!21 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 524324, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 524545, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 524544, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 524299, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 524299, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!28 = metadata !{i32 524544, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7} ; [ DW_TAG_auto_variable ]
+!21 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 786468, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 786689, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 786688, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2, i32 0, null} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 786443, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 16, i32 0, metadata !30, null}
-!30 = metadata !{i32 524299, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 786443, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index edd6015b0d28..208e93e098e6 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -16,7 +16,7 @@
 !103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ]
 !104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
 !105 = metadata !{metadata !106}
-!106 = metadata !{i32 524321, i64 0, i64 1332}    ; [ DW_TAG_subrange_type ]
+!106 = metadata !{i32 524321, i64 0, i64 1333}    ; [ DW_TAG_subrange_type ]
 !107 = metadata !{i32 73, i32 0, metadata !103, null}
 
 define i32 @main() nounwind ssp {
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index ba36fe7c12fd..aaa562a439d5 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -74,51 +74,52 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+!llvm.dbg.cu = !{!3}
+!46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
-!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !22 = metadata !{metadata !13}
-!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 16, i32 0, metadata !17, null}
-!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
 !29 = metadata !{i32 18, i32 0, metadata !28, null}
 !30 = metadata !{i32 20, i32 0, metadata !28, null}
-!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, null} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 786470, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
 !34 = metadata !{i32 11, i32 0, metadata !16, null}
 !35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, null} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
 !41 = metadata !{i32 24, i32 0, metadata !39, null}
 !42 = metadata !{i32 25, i32 0, metadata !39, null}
 !43 = metadata !{i32 26, i32 0, metadata !39, null}
-!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
 !45 = metadata !{i32 27, i32 0, metadata !39, null}
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
deleted file mode 100644
index b3cc35d723f7..000000000000
--- a/test/CodeGen/X86/2010-08-10-DbgConstant.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc  -mtriple=i686-linux -O0 < %s | FileCheck %s
-; CHECK: DW_TAG_constant
-; CHECK-NEXT: .long .Lstring3 #{{#?}} DW_AT_name
-
-define void @foo() nounwind ssp {
-entry:
-  call void @bar(i32 201), !dbg !8
-  ret void, !dbg !8
-}
-
-declare void @bar(i32)
-
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.gv = !{!5}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null}
-!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ]
-!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
-!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 3, i32 14, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index bed8c8a77b9a..de0d216e266f 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -12,18 +12,21 @@ entry:
   ret i32 21, !dbg !10
 }
 
-!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, metadata !13, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
 !8 = metadata !{i32 53, i32 13, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786443, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 !10 = metadata !{i32 4, i32 13, metadata !11, null}
-!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 786443, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786443, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{metadata !0, metadata !6}
+!14 = metadata !{metadata !"", metadata !"/private/tmp"}
+!15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 79c0cf35c660..31a6822b34b8 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -15,21 +15,23 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.foo = !{!6}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
 !9 = metadata !{metadata !10, metadata !11}
-!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786445, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 786445, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
 !12 = metadata !{i32 3, i32 47, metadata !0, null}
 !13 = metadata !{i32 4, i32 2, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{metadata !0}
+!16 = metadata !{metadata !6}
+!17 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll
index 31446786ec15..4d8d974f703e 100644
--- a/test/CodeGen/X86/2010-12-02-MC-Set.ll
+++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -6,17 +6,18 @@ entry:
   ret void, !dbg !5
 }
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
+!7 = metadata !{metadata !0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, null, null, metadata !7, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 5, i32 1, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786443, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 
 ; CHECK: .subsections_via_symbols
 ; CHECK-NEXT: __debug_line
-; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Lline_table_start0
 ; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 166dcf259989..2355528a81e8 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; Check debug info for variable z_s
-;CHECK: .long Lset13
+;CHECK: .long Lset14
 ;CHECK-NEXT:  ## DW_AT_decl_file
 ;CHECK-NEXT:  ## DW_AT_decl_line
 ;CHECK-NEXT:  ## DW_AT_type
@@ -69,35 +69,37 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!llvm.dbg.sp = !{!0, !6}
-!llvm.dbg.lv.gcd = !{!10, !11, !12}
-!llvm.dbg.lv.main = !{!14, !17}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786688, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786688, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0, null} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !1, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786688, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
 !18 = metadata !{i32 5, i32 41, metadata !0, null}
 !19 = metadata !{i32 5, i32 49, metadata !0, null}
 !20 = metadata !{i32 7, i32 5, metadata !13, null}
 !21 = metadata !{i32 8, i32 9, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !1, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 9, metadata !22, null}
 !24 = metadata !{i32 26, i32 38, metadata !15, null}
 !25 = metadata !{i32 27, i32 38, metadata !15, null}
 !26 = metadata !{i32 28, i32 9, metadata !15, null}
 !27 = metadata !{i32 30, i32 1, metadata !15, null}
+!28 = metadata !{metadata !0, metadata !6}
+!29 = metadata !{metadata !10, metadata !11, metadata !12}
+!30 = metadata !{metadata !14, metadata !17}
+!31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
index 47ef693cc25e..6f43b94b264a 100644
--- a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
+++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s
 ;
 ; This test should not cause any spilling with RAFast.
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index a5ec614a943b..54d2b403509d 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -disable-code-place | FileCheck %s
+; RUN: llc < %s -march=x86 -disable-block-placement | FileCheck %s
 ;
 ; Test RegistersDefinedFromSameValue. We have multiple copies of the same vreg:
 ; while.body85.i:
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index a7207537de21..da734d4b6454 100644
--- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: main
 define i32 @main() nounwind uwtable {
 entry:
-; CHECK: movsbq  j(%rip), %
-; CHECK: movsbq  i(%rip), %
+; CHECK: pmovsxbq  j(%rip), %
+; CHECK: pmovsxbq  i(%rip), %
   %0 = load <2 x i8>* @i, align 8
   %1 = load <2 x i8>* @j, align 8
   %div = sdiv <2 x i8> %1, %0
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index 0a949eb29b89..8ac4632329b3 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -8,15 +8,15 @@ target triple = "x86_64-apple-macosx10.6.6"
 ; CHECK: pblendvb        %xmm1, %xmm2
 ; CHECK: ret
 
-define void @select_func() {
+define void @select_func(<8 x i16> %in) {
 entry:
-  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
+  %c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
   %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
-  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
-  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
-  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
+  %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
+  %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
   %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
   store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
   ret void
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
index 6b9007291901..7e914984fe44 100644
--- a/test/CodeGen/X86/2012-01-11-split-cv.ll
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -2,7 +2,7 @@
 
 ;CHECK: add18i16
 define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
-;CHECK: vmovups
+;CHECK: vmovaps
   %b = load <18 x i16>* %bp, align 16
   %x = add <18 x i16> zeroinitializer, %b
   store <18 x i16> %x, <18 x i16>* %ret, align 16
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
index 18a331377353..90d8d3d2dd6d 100644
--- a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \
 ; RUN:   not grep "Number of machine instructions hoisted out of loops post regalloc"
 
diff --git a/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll b/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll
new file mode 100644
index 000000000000..078f1b05c3fc
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7
+; We don't care about the output, just that it doesn't crash
+
+define <1 x i1> @buildvec_promote() {
+  %cmp = icmp ule <1 x i32> undef, undef
+  %sel = select i1 undef, <1 x i1> undef, <1 x i1> %cmp
+  ret <1 x i1> %sel
+}
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 3b7a8a7b871c..2c7dfc8dfd45 100644
--- a/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -3,7 +3,7 @@
 declare x86_fastcallcc i64 @barrier()
 
 ;CHECK: bcast_fold
-;CHECK: vmovaps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
+;CHECK: vmov{{[au]}}ps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
 ;CHECK: barrier
 ;CHECK: vbroadcastss [[SPILLED]], %ymm0
 ;CHECK: ret
diff --git a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
new file mode 100644
index 000000000000..756e86e0f801
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
+
+; CHECK: merge_stores_can
+; CHECK: callq foo
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movups  %xmm0
+; CHECK: callq foo
+; CHECK: ret
+declare i32 @foo([10 x i32]* )
+
+define i32 @merge_stores_can() nounwind ssp {
+  %object1 = alloca [10 x i32]
+
+  %ret0 = call i32 @foo([10 x i32]* %object1) nounwind
+
+  %O1_1 = getelementptr [10 x i32]* %object1, i64 0, i32 1
+  %O1_2 = getelementptr [10 x i32]* %object1, i64 0, i32 2
+  %O1_3 = getelementptr [10 x i32]* %object1, i64 0, i32 3
+  %O1_4 = getelementptr [10 x i32]* %object1, i64 0, i32 4
+  %ld_ptr = getelementptr [10 x i32]* %object1, i64 0, i32 9
+
+  store i32 0, i32* %O1_1
+  store i32 0, i32* %O1_2
+  %ret = load  i32* %ld_ptr  ; <--- does not alias.
+  store i32 0, i32* %O1_3
+  store i32 0, i32* %O1_4
+
+  %ret1 = call i32 @foo([10 x i32]* %object1) nounwind
+
+  ret i32 %ret
+}
+
+; CHECK: merge_stores_cant
+; CHECK-NOT: xorps %xmm0, %xmm0
+; CHECK-NOT: movups  %xmm0
+; CHECK: ret
+define i32 @merge_stores_cant([10 x i32]* %in0, [10 x i32]* %in1) nounwind ssp {
+
+  %O1_1 = getelementptr [10 x i32]* %in1, i64 0, i32 1
+  %O1_2 = getelementptr [10 x i32]* %in1, i64 0, i32 2
+  %O1_3 = getelementptr [10 x i32]* %in1, i64 0, i32 3
+  %O1_4 = getelementptr [10 x i32]* %in1, i64 0, i32 4
+  %ld_ptr = getelementptr [10 x i32]* %in0, i64 0, i32 2
+
+  store i32 0, i32* %O1_1
+  store i32 0, i32* %O1_2
+  %ret = load  i32* %ld_ptr  ;  <--- may alias
+  store i32 0, i32* %O1_3
+  store i32 0, i32* %O1_4
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
new file mode 100644
index 000000000000..9525653f3fff
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test LiveInterval update handling of DBG_VALUE.
+; rdar://12777252.
+;
+; CHECK: %entry
+; CHECK: DEBUG_VALUE: hg
+; CHECK: je
+
+%struct.node.0.27 = type { i16, double, [3 x double], i32, i32 }
+%struct.hgstruct.2.29 = type { %struct.bnode.1.28*, [3 x double], double, [3 x double] }
+%struct.bnode.1.28 = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode.1.28*, %struct.bnode.1.28* }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
+entry:
+  call void @llvm.dbg.declare(metadata !{%struct.hgstruct.2.29* %hg}, metadata !4)
+  %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
+  %0 = load i16* %type, align 2, !tbaa !8
+  %cmp = icmp eq i16 %0, 1
+  br i1 %cmp, label %return, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  %arrayidx6.1 = getelementptr inbounds %struct.hgstruct.2.29* %hg, i64 0, i32 1, i64 1
+  %cmp22 = fcmp olt double 0.000000e+00, %dsq
+  %conv24 = zext i1 %cmp22 to i16
+  br label %return
+
+return:                                           ; preds = %for.cond.preheader, %entry
+  %retval.0 = phi i16 [ %conv24, %for.cond.preheader ], [ 0, %entry ]
+  ret i16 %retval.0
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{null}
+!4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
+!5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
+!6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
+!8 = metadata !{metadata !"short", metadata !9}
+!9 = metadata !{metadata !"omnipotent char", metadata !10}
+!10 = metadata !{metadata !"Simple C/C++ TBAA"}
+!11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
new file mode 100644
index 000000000000..a0fbbb2ff9ef
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -0,0 +1,136 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test MachineScheduler handling of DBG_VALUE.
+; rdar://12776937.
+;
+; CHECK: %if.else581
+; CHECK: DEBUG_VALUE: num1
+; CHECK: call
+
+%union.rec = type {}
+
+@.str15 = external hidden unnamed_addr constant [6 x i8], align 1
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp {
+entry:
+  %num14075 = alloca [20 x i8], align 16
+  br label %if.end33
+
+if.end33:                                         ; preds = %entry
+  %cmp1733 = icmp eq i32 undef, 0
+  br label %if.else581
+
+if.else581:                                       ; preds = %if.end33
+  %cmp586 = icmp eq i8 undef, -123
+  br i1 %cmp586, label %if.then588, label %if.else594
+
+if.then588:                                       ; preds = %if.else581
+  br label %for.cond1710.preheader
+
+if.else594:                                       ; preds = %if.else581
+  unreachable
+
+for.cond1710.preheader:                           ; preds = %if.then588
+  br label %for.cond1710
+
+for.cond1710:                                     ; preds = %for.cond1710, %for.cond1710.preheader
+  br i1 undef, label %for.cond1710, label %if.then3344
+
+if.then3344:
+  br label %if.then4073
+
+if.then4073:                                      ; preds = %if.then3344
+  call void @llvm.dbg.declare(metadata !{[20 x i8]* %num14075}, metadata !4)
+  %arraydecay4078 = getelementptr inbounds [20 x i8]* %num14075, i64 0, i64 0
+  %0 = load i32* undef, align 4
+  %add4093 = add nsw i32 %0, 0
+  %conv4094 = sitofp i32 %add4093 to float
+  %div4095 = fdiv float %conv4094, 5.670000e+02
+  %conv4096 = fpext float %div4095 to double
+  %call4097 = call i32 (i8*, i32, i64, i8*, ...)* @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
+  br i1 %cmp1733, label %if.then4107, label %if.else4114
+
+if.then4107:                                      ; preds = %if.then4073
+  unreachable
+
+if.else4114:                                      ; preds = %if.then4073
+  unreachable
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{}
+!4 = metadata !{i32 786688, metadata !5, metadata !"num1", metadata !14, i32 815, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [num1] [line 815]
+!5 = metadata !{i32 786443, metadata !6, i32 815, i32 0, metadata !14, i32 177} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!6 = metadata !{i32 786443, metadata !7, i32 812, i32 0, metadata !14, i32 176} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!7 = metadata !{i32 786443, metadata !8, i32 807, i32 0, metadata !14, i32 175} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!8 = metadata !{i32 786443, metadata !9, i32 440, i32 0, metadata !14, i32 94} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!9 = metadata !{i32 786443, metadata !10, i32 435, i32 0, metadata !14, i32 91} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!10 = metadata !{i32 786443, metadata !11, i32 434, i32 0, metadata !14, i32 90} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!11 = metadata !{i32 786443, metadata !12, i32 250, i32 0, metadata !14, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
+!19 = metadata !{metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset"}
+
+; Test DebugValue uses visited by RegisterPressureTracker findUseBetween().
+;
+; CHECK: @main
+; CHECK: DEBUG_VALUE: X
+; CHECK: call
+
+%"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" }
+%"class.__gnu_cxx::hashtable" = type { i64, i64, i64, i64, i64, i64 }
+
+define void @main() uwtable ssp {
+entry:
+  %X = alloca %"class.__gnu_cxx::hash_map", align 8
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{%"class.__gnu_cxx::hash_map"* %X}, metadata !31)
+  %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
+  invoke void @_Znwm()
+          to label %exit.i unwind label %lpad2.i.i.i.i
+
+exit.i:                                           ; preds = %cond.end
+  unreachable
+
+lpad2.i.i.i.i:                                    ; preds = %cond.end
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br i1 undef, label %lpad.body.i.i, label %if.then.i.i.i.i.i.i.i.i
+
+if.then.i.i.i.i.i.i.i.i:                          ; preds = %lpad2.i.i.i.i
+  unreachable
+
+lpad.body.i.i:                                    ; preds = %lpad2.i.i.i.i
+  resume { i8*, i32 } %0
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Znwm()
+
+!llvm.dbg.cu = !{!30}
+
+!30 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
+!31 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !32, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29]
+!32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
+!33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ]
+!34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"}
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
new file mode 100644
index 000000000000..df93c5647d95
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test RegisterPressure handling of DBG_VALUE.
+;
+; CHECK: %entry
+; CHECK: DEBUG_VALUE: callback
+; CHECK: ret
+
+%struct.btCompoundLeafCallback = type { i32, i32 }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define void @test() unnamed_addr uwtable ssp align 2 {
+entry:
+  %callback = alloca %struct.btCompoundLeafCallback, align 8
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{%struct.btCompoundLeafCallback* %callback}, metadata !3)
+  %m = getelementptr inbounds %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
+  store i32 0, i32* undef, align 8
+  %cmp12447 = icmp sgt i32 undef, 0
+  br i1 %cmp12447, label %for.body.lr.ph, label %invoke.cont44
+
+for.body.lr.ph:                                   ; preds = %if.end
+  unreachable
+
+invoke.cont44:                                    ; preds = %if.end
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet", metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, i1 true, metadata !"", i32 0, metadata !1, null, null, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{null, null}
+!3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
+!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
+!5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
+!6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"}
diff --git a/test/CodeGen/X86/2012-12-06-python27-miscompile.ll b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
new file mode 100644
index 000000000000..d9effc92fa92
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure that we are zeroing one memory location at a time using xorl and
+; not both using XMM registers.
+
+;CHECK: @foo
+;CHECK: xorl
+;CHECK-NOT: xmm
+;CHECK: ret
+define i32 @foo (i64* %so) nounwind uwtable ssp {
+entry:
+  %used = getelementptr inbounds i64* %so, i32 3
+  store i64 0, i64* %used, align 8
+  %fill = getelementptr inbounds i64* %so, i32 2
+  %L = load i64* %fill, align 8
+  store i64 0, i64* %fill, align 8
+  %cmp28 = icmp sgt i64 %L, 0
+  %R = sext i1 %cmp28 to i32
+  ret i32 %R
+}
diff --git a/test/CodeGen/X86/2012-12-1-merge-multiple.ll b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
new file mode 100644
index 000000000000..5931c3d27be1
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
+
+; CHECK: multiple_stores_on_chain
+; CHECK: movabsq
+; CHECK: movq
+; CHECK: movabsq
+; CHECK: movq
+; CHECK: ret
+define void @multiple_stores_on_chain(i16 * %A) {
+entry:
+  %a0 = getelementptr inbounds i16* %A, i64 0
+  %a1 = getelementptr inbounds i16* %A, i64 1
+  %a2 = getelementptr inbounds i16* %A, i64 2
+  %a3 = getelementptr inbounds i16* %A, i64 3
+  %a4 = getelementptr inbounds i16* %A, i64 4
+  %a5 = getelementptr inbounds i16* %A, i64 5
+  %a6 = getelementptr inbounds i16* %A, i64 6
+  %a7 = getelementptr inbounds i16* %A, i64 7
+
+  store i16 0, i16* %a0
+  store i16 1, i16* %a1
+  store i16 2, i16* %a2
+  store i16 3, i16* %a3
+  store i16 4, i16* %a4
+  store i16 5, i16* %a5
+  store i16 6, i16* %a6
+  store i16 7, i16* %a7
+
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll b/test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll
new file mode 100644
index 000000000000..8cef2c8201c6
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-12-DAGCombineCrash.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=x86 -mtriple=i686-apple-ios -mcpu=yonah < %s
+; rdar://12868039
+
+define void @t() nounwind ssp {
+  %1 = alloca i32
+  %2 = ptrtoint i32* %1 to i32
+  br label %3
+
+; <label>:3                                       ; preds = %5, %3, %0
+  switch i32 undef, label %3 [
+    i32 611946160, label %5
+    i32 954117870, label %4
+  ]
+
+; <label>:4                                       ; preds = %3
+  ret void
+
+; <label>:5                                       ; preds = %5, %3
+  %6 = add i32 0, 148
+  %7 = and i32 %6, 48
+  %8 = add i32 %7, 0
+  %9 = or i32 %2, %8
+  %10 = xor i32 -1, %2
+  %11 = or i32 %8, %10
+  %12 = or i32 %9, %11
+  %13 = xor i32 %9, %11
+  %14 = sub i32 %12, %13
+  %15 = xor i32 2044674005, %14
+  %16 = xor i32 %15, 0
+  %17 = shl nuw nsw i32 %16, 1
+  %18 = sub i32 0, %17
+  %19 = and i32 %18, 2051242402
+  %20 = sub i32 0, %19
+  %21 = xor i32 %20, 0
+  %22 = xor i32 %21, 0
+  %23 = add i32 0, %22
+  %24 = shl i32 %23, 1
+  %25 = or i32 1, %24
+  %26 = add i32 0, %25
+  %27 = trunc i32 %26 to i8
+  %28 = xor i8 %27, 125
+  %29 = add i8 %28, -16
+  %30 = add i8 0, %29
+  store i8 %30, i8* null
+  br i1 undef, label %5, label %3
+}
diff --git a/test/CodeGen/X86/2012-12-14-v8fp80-crash.ll b/test/CodeGen/X86/2012-12-14-v8fp80-crash.ll
new file mode 100644
index 000000000000..c465527bd867
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-14-v8fp80-crash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32
+
+; Make sure we don't crash on this testcase.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define void @_ZN6VectorIfE3equIeEEvfRKS_IT_E() nounwind uwtable ssp align 2 {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %while.body.lr.ph
+  %0 = fptrunc <8 x x86_fp80> undef to <8 x float>
+  store <8 x float> %0, <8 x float>* undef, align 4
+  br label %vector.body
+
+while.end:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll b/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
new file mode 100644
index 000000000000..302566520671
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 < %s | FileCheck %s
+; Test that we do not introduce vector operations with noimplicitfloat.
+; rdar://12879313
+
+%struct1 = type { i32*, i32* }
+
+define void @test() nounwind noimplicitfloat {
+entry:
+; CHECK-NOT: xmm
+; CHECK: ret
+  %0 = load %struct1** undef, align 8
+  %1 = getelementptr inbounds %struct1* %0, i64 0, i32 0
+  store i32* null, i32** %1, align 8
+  %2 = getelementptr inbounds %struct1* %0, i64 0, i32 1
+  store i32* null, i32** %2, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
new file mode 100644
index 000000000000..1b417e54a2f7
--- /dev/null
+++ b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.5.0 < %s
+
+; rdar://12968664
+
+define void @t() nounwind uwtable ssp {
+  br label %4
+
+; <label>:1                                       ; preds = %4, %2
+  ret void
+
+; <label>:2                                       ; preds = %6, %5, %3, %2
+  switch i32 undef, label %2 [
+    i32 1090573978, label %1
+    i32 1090573938, label %3
+    i32 1090573957, label %5
+  ]
+
+; <label>:3                                       ; preds = %4, %2
+  br i1 undef, label %2, label %4
+
+; <label>:4                                       ; preds = %6, %5, %3, %0
+  switch i32 undef, label %11 [
+    i32 1090573938, label %3
+    i32 1090573957, label %5
+    i32 1090573978, label %1
+    i32 165205179, label %6
+  ]
+
+; <label>:5                                       ; preds = %4, %2
+  br i1 undef, label %2, label %4
+
+; <label>:6                                       ; preds = %4
+  %7 = icmp eq i32 undef, 590901838
+  %8 = or i1 false, %7
+  %9 = or i1 true, %8
+  %10 = xor i1 %8, %9
+  br i1 %10, label %4, label %2
+
+; <label>:11                                      ; preds = %11, %4
+  br label %11
+}
+
+; PR15608
+@global = external constant [2 x i8]
+
+define void @PR15608() {
+bb:
+  br label %bb3
+
+bb1:                                              ; No predecessors!
+  br i1 icmp ult (i64 xor (i64 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i64), i64 1), i64 1), label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb
+  br i1 xor (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1), i1 trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1)), label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb6, %bb3
+  %tmp = phi i1 [ true, %bb6 ], [ trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1), %bb3 ]
+  br i1 false, label %bb8, label %bb5
+
+bb5:                                              ; preds = %bb4
+  br i1 %tmp, label %bb8, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 false, label %bb8, label %bb4
+
+bb7:                                              ; preds = %bb3
+  unreachable
+
+bb8:                                              ; preds = %bb6, %bb5, %bb4
+  unreachable
+}
diff --git a/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
new file mode 100644
index 000000000000..614ccda5e250
--- /dev/null
+++ b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; CHECK: test
+; CHECK: vpmovzxwd
+; CHECK: vpmovzxwd
+define void @test(<4 x i64> %a, <4 x i16>* %buf) {
+  %ex1 = extractelement <4 x i64> %a, i32 0
+  %ex2 = extractelement <4 x i64> %a, i32 1
+  %x1 = bitcast i64 %ex1 to <4 x i16>
+  %x2 = bitcast i64 %ex2 to <4 x i16>
+  %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  store <4 x i16> %Sh, <4 x i16>* %buf, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
new file mode 100644
index 000000000000..03b6bdeafa87
--- /dev/null
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx-i -show-mc-encoding
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = external global [8 x float], align 32
+@e = external global [8 x float], align 16
+
+define void @main() #0 {
+entry:
+  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
+  %bitcast.i = extractelement <8 x float> %0, i32 0
+  %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
+  %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
+  %vecinit3.i.i = insertelement <4 x float> %vecinit2.i.i, float 0.000000e+00, i32 2
+  %vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
+  %1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
+  %vecext.i.i = extractelement <4 x float> %1, i32 0
+  store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
+  unreachable
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index 8e93762cec17..8b0a349a8be3 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: llc < %s -march=x86 > %t
+; RUN: llc < %s -march=x86-64 > %t.x86-64
+; RUN: llc < %s -march=x86 > %t.x86
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
 
diff --git a/test/CodeGen/X86/DbgValueOtherTargets.test b/test/CodeGen/X86/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..7b4d431c93b1
--- /dev/null
+++ b/test/CodeGen/X86/DbgValueOtherTargets.test
@@ -0,0 +1,2 @@
+RUN: llc -O0 -march=x86 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
+RUN: llc -O0 -march=x86-64 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/X86/GC/erlang-gc.ll b/test/CodeGen/X86/GC/erlang-gc.ll
new file mode 100644
index 000000000000..c55b7f6dcf61
--- /dev/null
+++ b/test/CodeGen/X86/GC/erlang-gc.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK64
+; RUN: llc -mtriple=i686-linux-gnu < %s | FileCheck %s --check-prefix=CHECK32
+
+define i32 @main(i32 %x) nounwind gc "erlang" {
+  %puts = tail call i32 @foo(i32 %x)
+  ret i32 0
+
+; CHECK64:      .section  .note.gc,"",@progbits
+; CHECK64-NEXT: .align 8
+; CHECK64-NEXT: .short 1      # safe point count
+; CHECK64-NEXT: .long  .Ltmp0 # safe point address
+; CHECK64-NEXT: .short 1      # stack frame size (in words)
+; CHECK64-NEXT: .short 0      # stack arity
+; CHECK64-NEXT: .short 0      # live root count
+
+; CHECK32:      .section  .note.gc,"",@progbits
+; CHECK32-NEXT: .align 4
+; CHECK32-NEXT: .short 1      # safe point count
+; CHECK32-NEXT: .long  .Ltmp0 # safe point address
+; CHECK32-NEXT: .short 3      # stack frame size (in words)
+; CHECK32-NEXT: .short 0      # stack arity
+; CHECK32-NEXT: .short 0      # live root count
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/X86/GC/ocaml-gc.ll b/test/CodeGen/X86/GC/ocaml-gc.ll
new file mode 100644
index 000000000000..44241a90d0e7
--- /dev/null
+++ b/test/CodeGen/X86/GC/ocaml-gc.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define i32 @main(i32 %x) nounwind gc "ocaml" {
+; CHECK:        .text
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___code_begin
+; CHECK-NEXT: caml_3C_stdin_3E___code_begin:
+; CHECK-NEXT:   .data
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_begin
+; CHECK-NEXT: caml_3C_stdin_3E___data_begin:
+
+  %puts = tail call i32 @foo(i32 %x)
+  ret i32 0
+
+; CHECK:        .globl  caml_3C_stdin_3E___code_end
+; CHECK-NEXT: caml_3C_stdin_3E___code_end:
+; CHECK-NEXT:   .data
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___data_end
+; CHECK-NEXT: caml_3C_stdin_3E___data_end:
+; CHECK-NEXT:   .quad   0
+; CHECK-NEXT:   .globl  caml_3C_stdin_3E___frametable
+; CHECK-NEXT: caml_3C_stdin_3E___frametable:
+; CHECK-NEXT:   .short  1
+; CHECK-NEXT:   .align  8
+; CHECK-NEXT:                # live roots for main
+; CHECK-NEXT:   .quad   .Ltmp0
+; CHECK-NEXT:   .short  8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .align  8
+}
+
+declare i32 @foo(i32)
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index ea791a3c083c..227ef3466e0a 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -26,24 +26,25 @@ bb2:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv.foo = !{!6, !7, !10}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 589860, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 590080, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !1, i32 2, i32 25, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786468, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 13, metadata !1, null}
 !13 = metadata !{i32 2, i32 22, metadata !1, null}
 !14 = metadata !{i32 3, i32 14, metadata !11, null}
 !15 = metadata !{i32 4, i32 3, metadata !11, null}
 !16 = metadata !{i32 5, i32 5, metadata !11, null}
 !17 = metadata !{i32 7, i32 1, metadata !11, null}
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !6, metadata !7, metadata !10}
+!20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
index 33141680aa92..1329200c3e6e 100644
--- a/test/CodeGen/X86/MachineSink-PHIUse.ll
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink"
 
 define fastcc void @t() nounwind ssp {
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index 64825bac9719..bb227a0185df 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -40,9 +40,43 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt
   ret void
 }
 
+; No vectors because we use noimplicitfloat
+; CHECK: merge_const_store_no_vec
+; CHECK-NOT: vmovups
+; CHECK: ret
+define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
+  %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+  store i32 0, i32* %3, align 4
+  %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
+  store i32 0, i32* %4, align 4
+  %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
+  store i32 0, i32* %5, align 4
+  %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
+  store i32 0, i32* %6, align 4
+  %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
+  store i32 0, i32* %7, align 4
+  %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
+  store i32 0, i32* %8, align 4
+  %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
+  store i32 0, i32* %9, align 4
+  %10 = add nsw i32 %i.02, 1
+  %11 = getelementptr inbounds %struct.B* %.01, i64 1
+  %exitcond = icmp eq i32 %10, %count
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+  ret void
+}
+
 ; Move the constants using a single vector store.
 ; CHECK: merge_const_store_vec
-; CHECK: vmovups  %ymm0, (%rsi)
+; CHECK: vmovups
 ; CHECK: ret
 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %count, 0
@@ -303,3 +337,99 @@ block4:                                       ; preds = %4, %.lr.ph
   ret void
 }
 
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy.
+; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK: movw    [[REG]], (%{{.*}})
+define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
+  %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i64* %.0, i64 1
+  %3 = load i64* %.0, align 1
+  %4 = getelementptr inbounds i8* %c, i64 %3
+  %5 = load i8* %4, align 1
+  %6 = add i64 %3, 1
+  %7 = getelementptr inbounds i8* %c, i64 %6
+  %8 = load i8* %7, align 1
+  store i8 %5, i8* %.08, align 1
+  %9 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %8, i8* %9, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 2
+  %11 = add nsw i32 %.09, -1
+  %12 = icmp eq i32 %11, 0
+  br i1 %12, label %13, label %1
+
+; <label>:13
+  ret void
+}
+
+; Make sure that we merge the consecutive load/store sequence below and use a
+; word (16 bit) instead of a byte copy even if there are intermediate sign
+; extensions.
+; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK: movw    [[REG]], (%{{.*}})
+define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i64
+  %5 = getelementptr inbounds i8* %c, i64 %4
+  %6 = load i8* %5, align 1
+  %7 = add i64 %4, 1
+  %8 = getelementptr inbounds i8* %c, i64 %7
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i64 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
+
+; However, we can only merge ignore sign extensions when they are on all memory
+; computations;
+; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movw    [[REG]], (%{{.*}})
+define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
+  br label %1
+
+; <label>:1
+  %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
+  %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
+  %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
+  %2 = getelementptr inbounds i8* %.0, i64 1
+  %3 = load i8* %.0, align 1
+  %4 = sext i8 %3 to i64
+  %5 = getelementptr inbounds i8* %c, i64 %4
+  %6 = load i8* %5, align 1
+  %7 = add i8 %3, 1
+  %wrap.4 = sext i8 %7 to i64
+  %8 = getelementptr inbounds i8* %c, i64 %wrap.4
+  %9 = load i8* %8, align 1
+  store i8 %6, i8* %.08, align 1
+  %10 = getelementptr inbounds i8* %.08, i64 1
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds i8* %.08, i64 2
+  %12 = add nsw i32 %.09, -1
+  %13 = icmp eq i32 %12, 0
+  br i1 %13, label %14, label %1
+
+; <label>:14
+  ret void
+}
diff --git a/test/CodeGen/X86/WidenArith.ll b/test/CodeGen/X86/WidenArith.ll
new file mode 100644
index 000000000000..0383bd665b0f
--- /dev/null
+++ b/test/CodeGen/X86/WidenArith.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
+
+;CHECK: test
+;CHECK: vaddps
+;CHECK: vmulps
+;CHECK: vsubps
+;CHECK: vcmpltps
+;CHECK: vcmpltps
+;CHECK: vandps
+;CHECK: vandps
+;CHECK: ret
+define <8 x i32> @test(<8 x float> %a, <8 x float> %b) {
+ %c1 = fadd <8 x float> %a, %b
+ %b1 = fmul <8 x float> %b, %a
+ %d  = fsub <8 x float> %b1, %c1
+ %res1 = fcmp olt <8 x float> %a, %b1
+ %res2 = fcmp olt <8 x float> %c1, %d
+ %andr = and <8 x i1>%res1, %res2
+ %ex = zext <8 x i1> %andr to <8 x i32>
+ ret <8 x i32>%ex
+}
+
+
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
new file mode 100644
index 000000000000..a3bbea3c996b
--- /dev/null
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
+
+; Additional tests for 64-bit divide bypass
+
+define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = sdiv i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = srem i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: divw
+; CHECK: addq
+; CHECK: ret
+; CHECK-NOT: idivq
+; CHECK-NOT: divw
+  %resultdiv = sdiv i64 %a, %b
+  %resultrem = srem i64 %a, %b
+  %result = add i64 %resultdiv, %resultrem
+  ret i64 %result
+}
diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll
index e7c9605d3e88..4612940445cb 100644
--- a/test/CodeGen/X86/atom-bypass-slow-division.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
 
-define i32 @test_get_quotient(i32 %a, i32 %b) nounwind {
-; CHECK: test_get_quotient
+define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
+; CHECK: Test_get_quotient:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -13,8 +13,8 @@ define i32 @test_get_quotient(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_get_remainder(i32 %a, i32 %b) nounwind {
-; CHECK: test_get_remainder
+define i32 @Test_get_remainder(i32 %a, i32 %b) nounwind {
+; CHECK: Test_get_remainder:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -26,8 +26,8 @@ define i32 @test_get_remainder(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
-; CHECK: test_get_quotient_and_remainder
+define i32 @Test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
 ; CHECK: orl %ecx, %edx
 ; CHECK-NEXT: testl $-256, %edx
 ; CHECK-NEXT: je
@@ -35,7 +35,7 @@ define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
 ; CHECK: divb
 ; CHECK: addl
 ; CHECK: ret
-; CEECK-NOT: idivl
+; CHECK-NOT: idivl
 ; CHECK-NOT: divb
   %resultdiv = sdiv i32 %a, %b
   %resultrem = srem i32 %a, %b
@@ -43,8 +43,8 @@ define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
-; CHECK: test_use_div_and_idiv
+define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
+; CHECK: Test_use_div_and_idiv:
 ; CHECK: idivl
 ; CHECK: divb
 ; CHECK: divl
@@ -57,34 +57,34 @@ define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
   ret i32 %result
 }
 
-define i32 @test_use_div_imm_imm() nounwind {
-; CHECK: test_use_div_imm_imm
+define i32 @Test_use_div_imm_imm() nounwind {
+; CHECK: Test_use_div_imm_imm:
 ; CHECK: movl $64
   %resultdiv = sdiv i32 256, 4
   ret i32 %resultdiv
 }
 
-define i32 @test_use_div_reg_imm(i32 %a) nounwind {
-; CHECK: test_use_div_reg_imm
-; CEHCK-NOT: test
+define i32 @Test_use_div_reg_imm(i32 %a) nounwind {
+; CHECK: Test_use_div_reg_imm:
+; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
   %resultdiv = sdiv i32 %a, 33
   ret i32 %resultdiv
 }
 
-define i32 @test_use_rem_reg_imm(i32 %a) nounwind {
-; CHECK: test_use_rem_reg_imm
-; CEHCK-NOT: test
+define i32 @Test_use_rem_reg_imm(i32 %a) nounwind {
+; CHECK: Test_use_rem_reg_imm:
+; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
   %resultrem = srem i32 %a, 33
   ret i32 %resultrem
 }
 
-define i32 @test_use_divrem_reg_imm(i32 %a) nounwind {
-; CHECK: test_use_divrem_reg_imm
-; CEHCK-NOT: test
+define i32 @Test_use_divrem_reg_imm(i32 %a) nounwind {
+; CHECK: Test_use_divrem_reg_imm:
+; CHECK-NOT: test
 ; CHECK-NOT: idiv
 ; CHECK-NOT: divb
   %resultdiv = sdiv i32 %a, 33
@@ -93,8 +93,8 @@ define i32 @test_use_divrem_reg_imm(i32 %a) nounwind {
   ret i32 %result
 }
 
-define i32 @test_use_div_imm_reg(i32 %a) nounwind {
-; CHECK: test_use_div_imm_reg
+define i32 @Test_use_div_imm_reg(i32 %a) nounwind {
+; CHECK: Test_use_div_imm_reg:
 ; CHECK: test
 ; CHECK: idiv
 ; CHECK: divb
@@ -102,8 +102,8 @@ define i32 @test_use_div_imm_reg(i32 %a) nounwind {
   ret i32 %resultdiv
 }
 
-define i32 @test_use_rem_imm_reg(i32 %a) nounwind {
-; CHECK: test_use_rem_imm_reg
+define i32 @Test_use_rem_imm_reg(i32 %a) nounwind {
+; CHECK: Test_use_rem_imm_reg:
 ; CHECK: test
 ; CHECK: idiv
 ; CHECK: divb
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
new file mode 100644
index 000000000000..2a34e0298f30
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom 2>&1 | \
+; RUN:     grep "calll" | not grep "("
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 2>&1 | \
+; RUN:     grep "calll" | grep "*funcp"
+;
+; original source code built with clang -S -emit-llvm -M32 test32.c:
+;
+;   int a, b, c, d, e, f, g, h, i, j;
+;   extern int (*funcp)(int, int, int, int, int, int, int, int);
+;   extern int sum;
+;   
+;   void func()
+;   {
+;     sum = 0;
+;     for( i = a; i < b; ++i )
+;     {
+;       sum += (*funcp)(i, b, c, d, e, f, g, h);
+;     }
+;   }
+;
+; ModuleID = 'test32.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+@sum = external global i32
+@a = common global i32 0, align 4
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@funcp = external global i32 (i32, i32, i32, i32, i32, i32, i32, i32)*
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@j = common global i32 0, align 4
+
+define void @func() #0 {
+entry:
+  store i32 0, i32* @sum, align 4
+  %0 = load i32* @a, align 4
+  store i32 %0, i32* @i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32* @i, align 4
+  %2 = load i32* @b, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
+  %4 = load i32* @i, align 4
+  %5 = load i32* @b, align 4
+  %6 = load i32* @c, align 4
+  %7 = load i32* @d, align 4
+  %8 = load i32* @e, align 4
+  %9 = load i32* @f, align 4
+  %10 = load i32* @g, align 4
+  %11 = load i32* @h, align 4
+  %call = call i32 %3(i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11)
+  %12 = load i32* @sum, align 4
+  %add = add nsw i32 %12, %call
+  store i32 %add, i32* @sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %13 = load i32* @i, align 4
+  %inc = add nsw i32 %13, 1
+  store i32 %inc, i32* @i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
new file mode 100644
index 000000000000..bcfbd6107a56
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom 2>&1 | \
+; RUN:     grep "callq" | not grep "("
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 2>&1 | \
+; RUN:     grep "callq" | grep "*funcp"
+;
+; Original source code built with clang -S -emit-llvm -m64 test64.c:
+;   int a, b, c, d, e, f, g, h, i, j, k, l, m, n;
+;   extern int (*funcp)(int, int, int, int, int, int,
+;                       int, int, int, int, int, int,
+;                       int, int);
+;   extern int sum;
+;   
+;   void func()
+;   {
+;     sum = 0;
+;     for( i = a; i < b; ++i )
+;     {
+;       sum += (*funcp)(a, i, i*2, i/b, c, d, e, f, g, h, j, k, l, n);
+;     }
+;   }
+;   
+; ModuleID = 'test64.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@sum = external global i32
+@a = common global i32 0, align 4
+@i = common global i32 0, align 4
+@b = common global i32 0, align 4
+@funcp = external global i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i32 0, align 4
+@j = common global i32 0, align 4
+@k = common global i32 0, align 4
+@l = common global i32 0, align 4
+@n = common global i32 0, align 4
+@m = common global i32 0, align 4
+
+define void @func() #0 {
+entry:
+  store i32 0, i32* @sum, align 4
+  %0 = load i32* @a, align 4
+  store i32 %0, i32* @i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32* @i, align 4
+  %2 = load i32* @b, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
+  %4 = load i32* @a, align 4
+  %5 = load i32* @i, align 4
+  %6 = load i32* @i, align 4
+  %mul = mul nsw i32 %6, 2
+  %7 = load i32* @i, align 4
+  %8 = load i32* @b, align 4
+  %div = sdiv i32 %7, %8
+  %9 = load i32* @c, align 4
+  %10 = load i32* @d, align 4
+  %11 = load i32* @e, align 4
+  %12 = load i32* @f, align 4
+  %13 = load i32* @g, align 4
+  %14 = load i32* @h, align 4
+  %15 = load i32* @j, align 4
+  %16 = load i32* @k, align 4
+  %17 = load i32* @l, align 4
+  %18 = load i32* @n, align 4
+  %call = call i32 %3(i32 %4, i32 %5, i32 %mul, i32 %div, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17, i32 %18)
+  %19 = load i32* @sum, align 4
+  %add = add nsw i32 %19, %call
+  store i32 %add, i32* @sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %20 = load i32* @i, align 4
+  %inc = add nsw i32 %20, 1
+  store i32 %inc, i32* @i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
new file mode 100644
index 000000000000..632781130d06
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM32 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
+; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux  | FileCheck -check-prefix=ATOM64 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+
+
+; fn_ptr.ll
+%class.A = type { i32 (...)** }
+
+define i32 @test1() #0 {
+  ;ATOM: test1
+entry:
+  %call = tail call %class.A* @_Z3facv()
+  %0 = bitcast %class.A* %call to void (%class.A*)***
+  %vtable = load void (%class.A*)*** %0, align 8
+  %1 = load void (%class.A*)** %vtable, align 8
+  ;ATOM32: movl (%ecx), %ecx
+  ;ATOM32: calll *%ecx
+  ;ATOM-NOT32: calll *(%ecx)
+  ;ATOM64: movq (%rcx), %rcx
+  ;ATOM64: callq *%rcx
+  ;ATOM-NOT64: callq *(%rcx)
+  tail call void %1(%class.A* %call)
+  ret i32 0
+}
+
+declare %class.A* @_Z3facv() #1
+
+; virt_fn.ll
+@p = external global void (i32)**
+
+define i32 @test2() #0 {
+  ;ATOM: test2
+entry:
+  %0 = load void (i32)*** @p, align 8
+  %1 = load void (i32)** %0, align 8
+  ;ATOM32: movl (%eax), %eax
+  ;ATOM32: calll *%eax
+  ;ATOM-NOT: calll *(%eax)
+  ;ATOM64: movq (%rax), %rax
+  ;ATOM64: callq *%rax
+  ;ATOM-NOT64: callq *(%rax)
+  tail call void %1(i32 2)
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/atom-pad-short-functions.ll b/test/CodeGen/X86/atom-pad-short-functions.ll
new file mode 100644
index 000000000000..b9a39e08cb51
--- /dev/null
+++ b/test/CodeGen/X86/atom-pad-short-functions.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -O1 -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+
+declare void @external_function(...)
+
+define i32 @test_return_val(i32 %a) nounwind {
+; CHECK: test_return_val
+; CHECK: movl
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+  ret i32 %a
+}
+
+define i32 @test_optsize(i32 %a) nounwind optsize {
+; CHECK: test_optsize
+; CHECK: movl
+; CHECK-NEXT: ret
+  ret i32 %a
+}
+
+define i32 @test_minsize(i32 %a) nounwind minsize {
+; CHECK: test_minsize
+; CHECK: movl
+; CHECK-NEXT: ret
+  ret i32 %a
+}
+
+define i32 @test_add(i32 %a, i32 %b) nounwind {
+; CHECK: test_add
+; CHECK: addl
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+  %result = add i32 %a, %b
+  ret i32 %result
+}
+
+define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind {
+; CHECK: @test_multiple_ret
+; CHECK: je
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+
+  br i1 %c, label %bb1, label %bb2
+
+bb1:
+  ret i32 %a
+
+bb2:
+  ret i32 %b
+}
+
+define void @test_call_others(i32 %x) nounwind
+{
+; CHECK: test_call_others
+; CHECK: je
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %if.end, label %true.case
+
+; CHECK: jmp external_function
+true.case:
+  tail call void bitcast (void (...)* @external_function to void ()*)() nounwind
+  br label %if.end
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+if.end:
+  ret void
+
+}
+
+define void @test_branch_to_same_bb(i32 %x, i32 %y) nounwind {
+; CHECK: @test_branch_to_same_bb
+  %cmp = icmp sgt i32 %x, 0
+  br i1 %cmp, label %while.cond, label %while.end
+
+while.cond:
+  br label %while.cond
+
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: nop
+; CHECK: ret
+while.end:
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
new file mode 100644
index 000000000000..0e7cf8c09668
--- /dev/null
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s
+
+define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
+entry:
+  %ptrtoarg4 = load i8** %a, align 8
+  %brglist1 = getelementptr i8** %a, i64 1
+  %ptrtoarg25 = load i8** %brglist1, align 8
+  %0 = load i64* %b, align 8
+  %1 = mul i64 %0, 4
+  %scevgep = getelementptr i8* %ptrtoarg25, i64 %1
+  %2 = mul i64 %d, 4
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %test.exit, %entry
+  %asr.iv6 = phi i8* [ %29, %test.exit ], [ %scevgep, %entry ]
+  %iv = phi i64 [ %0, %entry ], [ %28, %test.exit ]
+  %3 = icmp eq i64 %iv, %c
+  br i1 %3, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
+  %5 = load i64* %4, align 8, !tbaa !3
+  %vector.size.i = ashr i64 %5, 3
+  %num.vector.wi.i = shl i64 %vector.size.i, 3
+  %6 = icmp eq i64 %vector.size.i, 0
+  br i1 %6, label %scalarIf.i, label %dim_0_vector_pre_head.i
+
+dim_0_vector_pre_head.i:                          ; preds = %loop
+  %7 = trunc i64 %5 to i32
+  %tempvector_func.i = insertelement <8 x i32> undef, i32 %7, i32 0
+  %vectorvector_func.i = shufflevector <8 x i32> %tempvector_func.i, <8 x i32> undef, <8 x i32> zeroinitializer
+  br label %vector_kernel_entry.i
+
+vector_kernel_entry.i:                            ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i
+  %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ]
+  %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
+  %8 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %asr.iv911 = bitcast i8* %asr.iv9 to <8 x i32> addrspace(1)*
+  %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
+  %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
+  %extract10vector_func.i = extractelement <8 x i32> %9, i32 2
+  %extract11vector_func.i = extractelement <8 x i32> %9, i32 3
+  %extract12vector_func.i = extractelement <8 x i32> %9, i32 4
+  %extract13vector_func.i = extractelement <8 x i32> %9, i32 5
+  %extract14vector_func.i = extractelement <8 x i32> %9, i32 6
+  %extract15vector_func.i = extractelement <8 x i32> %9, i32 7
+  %10 = atomicrmw min i32 addrspace(1)* %8, i32 %extract8vector_func.i seq_cst
+  %11 = atomicrmw min i32 addrspace(1)* %8, i32 %extract9vector_func.i seq_cst
+  %12 = atomicrmw min i32 addrspace(1)* %8, i32 %extract10vector_func.i seq_cst
+  %13 = atomicrmw min i32 addrspace(1)* %8, i32 %extract11vector_func.i seq_cst
+  %14 = atomicrmw min i32 addrspace(1)* %8, i32 %extract12vector_func.i seq_cst
+  %15 = atomicrmw min i32 addrspace(1)* %8, i32 %extract13vector_func.i seq_cst
+  %16 = atomicrmw min i32 addrspace(1)* %8, i32 %extract14vector_func.i seq_cst
+  %17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst
+  store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %asr.iv.next = add i64 %asr.iv, -1
+  %scevgep10 = getelementptr i8* %asr.iv9, i64 32
+  %dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0
+  br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i
+
+scalarIf.i:                                       ; preds = %vector_kernel_entry.i, %loop
+  %exec_wi.i = phi i64 [ 0, %loop ], [ %num.vector.wi.i, %vector_kernel_entry.i ]
+  %18 = icmp eq i64 %exec_wi.i, %5
+  br i1 %18, label %test.exit, label %dim_0_pre_head.i
+
+dim_0_pre_head.i:                                 ; preds = %scalarIf.i
+  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
+  %20 = load i64* %19, align 8, !tbaa !3
+  %21 = trunc i64 %20 to i32
+  %22 = mul i64 %vector.size.i, 8
+  br label %scalar_kernel_entry.i
+
+scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i
+  %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
+  %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
+  %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
+  %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+  %25 = load i32 addrspace(1)* %scevgep16, align 4, !tbaa !4
+  %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
+  %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+  store i32 %21, i32 addrspace(1)* %scevgep15, align 4, !tbaa !4
+  %asr.iv.next13 = add i64 %asr.iv12, 1
+  %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
+  br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i
+
+test.exit:                     ; preds = %scalar_kernel_entry.i, %scalarIf.i
+  %27 = bitcast i8* %asr.iv6 to i1*
+  %28 = add i64 %iv, %d
+  store i64 %28, i64* %b, align 8
+  %scevgep8 = getelementptr i1* %27, i64 %2
+  %29 = bitcast i1* %scevgep8 to i8*
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  store i64 %0, i64* %b, align 8
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
+
+; CHECK: test
+; CHECK: decq
+; CHECK-NOT: cmpxchgl
+; CHECK: jne
+; CHECK: ret
diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll
index a9ebfef2ebeb..17e04f059034 100644
--- a/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
 
 ; 64-bit load/store on x86-32
 ; FIXME: The generated code can be substantially improved.
diff --git a/test/CodeGen/X86/atomic-load-store.ll b/test/CodeGen/X86/atomic-load-store.ll
index fee45859c16a..86a744ed00f0 100644
--- a/test/CodeGen/X86/atomic-load-store.ll
+++ b/test/CodeGen/X86/atomic-load-store.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s
 
 define void @test1(i32* %ptr, i32 %val1) {
 ; CHECK: test1
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index e3ef605f7f1c..62f784f69608 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux < %s | FileCheck %s -check-prefix=LINUX
-; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
+; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV
+; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
 
 @sc64 = external global i64
 
@@ -16,6 +17,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: setl
+; NOCMOV: cmpl
+; NOCMOV: setl
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   %2 = atomicrmw min  i64* @sc64, i64 6 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
@@ -27,6 +38,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: setg
+; NOCMOV: cmpl
+; NOCMOV: setg
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   %3 = atomicrmw umax i64* @sc64, i64 7 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
@@ -38,6 +59,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: setb
+; NOCMOV: cmpl
+; NOCMOV: setb
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   %4 = atomicrmw umin i64* @sc64, i64 8 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
@@ -49,6 +80,16 @@ define void @atomic_maxmin_i6432() {
 ; LINUX: lock
 ; LINUX-NEXT: cmpxchg8b
 ; LINUX: jne [[LABEL]]
+; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
+; NOCMOV: cmpl
+; NOCMOV: seta
+; NOCMOV: cmpl
+; NOCMOV: seta
+; NOCMOV: jne
+; NOCMOV: jne
+; NOCMOV: lock
+; NOCMOV-NEXT: cmpxchg8b
+; NOCMOV: jne [[LABEL]]
   ret void
 }
 
diff --git a/test/CodeGen/X86/atomic-or.ll b/test/CodeGen/X86/atomic-or.ll
index 3f02eafb44a2..d759beb2caa8 100644
--- a/test/CodeGen/X86/atomic-or.ll
+++ b/test/CodeGen/X86/atomic-or.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
 
 ; rdar://9692967
 
diff --git a/test/CodeGen/X86/atomic-pointer.ll b/test/CodeGen/X86/atomic-pointer.ll
index a455277be4db..ec3e6c3a8c19 100644
--- a/test/CodeGen/X86/atomic-pointer.ll
+++ b/test/CodeGen/X86/atomic-pointer.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-none-linux | FileCheck %s
+; RUN: llc < %s -mtriple=i686-none-linux -verify-machineinstrs | FileCheck %s
 
 define i32* @test_atomic_ptr_load(i32** %a0) {
 ; CHECK: test_atomic_ptr_load
diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll
index 824995d6cb98..ec2887e29f81 100644
--- a/test/CodeGen/X86/atomic16.ll
+++ b/test/CodeGen/X86/atomic16.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -show-mc-encoding | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs -show-mc-encoding | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 
 @sc16 = external global i16
 
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
index dc927d8cb6f6..3cb9ca1c76c7 100644
--- a/test/CodeGen/X86/atomic32.ll
+++ b/test/CodeGen/X86/atomic32.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefix NOCMOV
 
 @sc32 = external global i32
 
@@ -164,9 +165,15 @@ define void @atomic_fetch_max32(i32 %x) nounwind {
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    jl
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_min32(i32 %x) nounwind {
@@ -180,9 +187,15 @@ define void @atomic_fetch_min32(i32 %x) nounwind {
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    jg
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_umax32(i32 %x) nounwind {
@@ -196,9 +209,15 @@ define void @atomic_fetch_umax32(i32 %x) nounwind {
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    jb
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_umin32(i32 %x) nounwind {
@@ -207,13 +226,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind {
 ; X64:       cmov
 ; X64:       lock
 ; X64:       cmpxchgl
+
 ; X32:       cmpl
 ; X32:       cmov
 ; X32:       lock
 ; X32:       cmpxchgl
+
+; NOCMOV:    cmpl
+; NOCMOV:    ja
+; NOCMOV:    lock
+; NOCMOV:    cmpxchgl
   ret void
 ; X64:       ret
 ; X32:       ret
+; NOCMOV:    ret
 }
 
 define void @atomic_fetch_cmpxchg32() nounwind {
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index 45785cc8fe52..aa000455753f 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
 
 @sc64 = external global i64
 
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
index f9b21c5bc75e..31e66c876e3d 100644
--- a/test/CodeGen/X86/atomic6432.ll
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 
 @sc64 = external global i64
 
diff --git a/test/CodeGen/X86/atomic8.ll b/test/CodeGen/X86/atomic8.ll
index 412428406dcf..3278ed1f504e 100644
--- a/test/CodeGen/X86/atomic8.ll
+++ b/test/CodeGen/X86/atomic8.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
 
 @sc8 = external global i8
 
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
index d94499889de4..6b3a6b224dba 100644
--- a/test/CodeGen/X86/atomic_add.ll
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
 
 ; rdar://7103704
 
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index c5fa07d07d80..a378d6e8d684 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index d0a7fe01009e..22fad7ce4b7d 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,6 +18,12 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
+; CHECK: vcvtdq2ps %ymm
+define <8 x float> @sitofp02(<8 x i16> %a) {
+  %b = sitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %b
+}
+
 ; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
@@ -46,7 +52,7 @@ entry:
   ret double %conv
 }
 
-; CHECK: vcvtsi2sd (%
+; CHECK: vcvtsi2sdl (%
 define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
 entry:
   %tmp1 = load i32* %e, align 4
@@ -54,7 +60,7 @@ entry:
   ret double %conv
 }
 
-; CHECK: vcvtsi2ss (%
+; CHECK: vcvtsi2ssl (%
 define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
 entry:
   %tmp1 = load i32* %e, align 4
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 1446b36a0fb4..055072098a25 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X64 %s
 
 declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+declare i32 @func_int(i32, i32)
+
 ; WIN64: testf16_inp
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
@@ -11,19 +14,19 @@ declare <16 x float> @func_float16(<16 x float>, <16 x float>)
 ; WIN64: call
 ; WIN64: ret
 
-; WIN32: testf16_inp
-; WIN32: movl    %eax, (%esp)
-; WIN32: vaddps  {{.*}}, {{%ymm[0-1]}}
-; WIN32: vaddps  {{.*}}, {{%ymm[0-1]}}
-; WIN32: call
-; WIN32: ret
+; X32: testf16_inp
+; X32: movl    %eax, (%esp)
+; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X32: call
+; X32: ret
 
-; NOT_WIN: testf16_inp
-; NOT_WIN: vaddps  {{.*}}, {{%ymm[0-1]}}
-; NOT_WIN: vaddps  {{.*}}, {{%ymm[0-1]}}
-; NOT_WIN: leaq    {{.*}}(%rsp), %rdi
-; NOT_WIN: call
-; NOT_WIN: ret
+; X64: testf16_inp
+; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
+; X64: leaq    {{.*}}(%rsp), %rdi
+; X64: call
+; X64: ret
 
 ;test calling conventions - input parameters
 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
@@ -45,11 +48,11 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
-; NOT_WIN: testf16_regs
-; NOT_WIN: call
-; NOT_WIN: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
-; NOT_WIN: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
-; NOT_WIN: ret
+; X64: testf16_regs
+; X64: call
+; X64: vaddps  {{%ymm[8-9]}}, %ymm0, %ymm0
+; X64: vaddps  {{%ymm[8-9]}}, %ymm1, %ymm1
+; X64: ret
 
 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
   %y = alloca <16 x float>, align 16
@@ -84,24 +87,83 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
 ; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
 
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp)  ## 32-byte Spill
-; NOT_WIN: call
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: call
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
 define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
    %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
    ret <16 x float> %c
 }
+
+; test functions with integer parameters
+; pass parameters on stack for 32-bit platform
+; X32: movl {{.*}}, 4(%esp)
+; X32: movl {{.*}}, (%esp)
+; X32: call
+; X32: addl {{.*}}, %eax
+
+; pass parameters in registers for 64-bit platform
+; X64: leal {{.*}}, %edi
+; X64: movl {{.*}}, %esi
+; X64: call
+; X64: addl {{.*}}, %eax
+define i32 @test_int(i32 %a, i32 %b) nounwind {
+    %c1 = add i32 %a, %b
+	%c2 = call intel_ocl_bicc i32 @func_int(i32 %c1, i32 %a)
+    %c = add i32 %c2, %b
+	ret i32 %c
+}
+
+; WIN64: test_float4
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64: ret
+
+; X64: test_float4
+; X64-NOT: vzeroupper
+; X64: call
+; X64-NOT: vzeroupper
+; X64: call
+; X64: ret
+
+; X32: test_float4
+; X32: vzeroupper
+; X32: call
+; X32: vzeroupper
+; X32: call
+; X32: ret
+
+declare <4 x float> @func_float4(<4 x float>, <4 x float>, <4 x float>)
+
+define <8 x float> @test_float4(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone {
+entry:
+  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %call.i = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %0, <4 x float> %1, <4 x float> %2) nounwind
+  %3 = shufflevector <4 x float> %call.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %4 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %5 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %6 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %call.i2 = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %4, <4 x float> %5, <4 x float> %6) nounwind
+  %7 = shufflevector <4 x float> %call.i2, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x float> %8
+}
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 88ecd5a5d34f..0be83f648d1a 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -671,7 +671,9 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
   ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
-  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
@@ -681,6 +683,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
+  ; fadd operation forces the execution domain.
   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
@@ -2345,7 +2348,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
 
 
 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
-  ; CHECK: vpermilps
+  ; CHECK: vpshufd
   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index c9fc66a8a791..a6775aba0989 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -53,19 +53,24 @@ define void @storev16i16(<16 x i16> %a) nounwind {
   unreachable
 }
 
-; CHECK: vmovups  %ymm
+; CHECK: storev16i16_01
+; CHECK: vextractf128
+; CHECK: vmovups  %xmm
 define void @storev16i16_01(<16 x i16> %a) nounwind {
   store <16 x i16> %a, <16 x i16>* undef, align 4
   unreachable
 }
 
+; CHECK: storev32i8
 ; CHECK: vmovaps  %ymm
 define void @storev32i8(<32 x i8> %a) nounwind {
   store <32 x i8> %a, <32 x i8>* undef, align 32
   unreachable
 }
 
-; CHECK: vmovups  %ymm
+; CHECK: storev32i8_01
+; CHECK: vextractf128
+; CHECK: vmovups  %xmm
 define void @storev32i8_01(<32 x i8> %a) nounwind {
   store <32 x i8> %a, <32 x i8>* undef, align 4
   unreachable
@@ -109,3 +114,38 @@ cif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
   unreachable
 }
 
+; CHECK: add8i32
+; CHECK: vmovups
+; CHECK: vmovups
+; CHECK-NOT: vinsertf128
+; CHECK-NOT: vextractf128
+; CHECK: vmovups
+; CHECK: vmovups
+define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
+  %b = load <8 x i32>* %bp, align 1
+  %x = add <8 x i32> zeroinitializer, %b
+  store <8 x i32> %x, <8 x i32>* %ret, align 1
+  ret void
+}
+
+; CHECK: add4i64a64
+; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
+; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
+define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
+  %b = load <4 x i64>* %bp, align 64
+  %x = add <4 x i64> zeroinitializer, %b
+  store <4 x i64> %x, <4 x i64>* %ret, align 64
+  ret void
+}
+
+; CHECK: add4i64a16
+; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
+; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
+; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
+; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
+define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
+  %b = load <4 x i64>* %bp, align 16
+  %x = add <4 x i64> zeroinitializer, %b
+  store <4 x i64> %x, <4 x i64>* %ret, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
index 3713a8c37799..b9c700051005 100755
--- a/test/CodeGen/X86/avx-sext.ll
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -1,17 +1,188 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2
 
 define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
-;CHECK: sext_8i16_to_8i32
-;CHECK: vpmovsxwd
+; AVX: sext_8i16_to_8i32
+; AVX: vpmovsxwd
 
   %B = sext <8 x i16> %A to <8 x i32>
   ret <8 x i32>%B
 }
 
 define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
-;CHECK: sext_4i32_to_4i64
-;CHECK: vpmovsxdq
+; AVX: sext_4i32_to_4i64
+; AVX: vpmovsxdq
 
   %B = sext <4 x i32> %A to <4 x i64>
   ret <4 x i64>%B
 }
+
+; AVX: load_sext_test1
+; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test1
+; SSSE3: movq
+; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}}
+; SSSE3: psrad $16
+; SSSE3: ret
+
+; SSE2: load_sext_test1
+; SSE2: movq
+; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}}
+; SSE2: psrad $16
+; SSE2: ret
+define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i32>
+ ret <4 x i32>%Y
+}
+
+; AVX: load_sext_test2
+; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test2
+; SSSE3: movd
+; SSSE3: pshufb
+; SSSE3: psrad $24
+; SSSE3: ret
+
+; SSE2: load_sext_test2
+; SSE2: movl
+; SSE2: psrad $24
+; SSE2: ret
+define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i32>
+ ret <4 x i32>%Y
+}
+
+; AVX: load_sext_test3
+; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test3
+; SSSE3: movsbq
+; SSSE3: movsbq
+; SSSE3: punpcklqdq
+; SSSE3: ret
+
+; SSE2: load_sext_test3
+; SSE2: movsbq
+; SSE2: movsbq
+; SSE2: punpcklqdq
+; SSE2: ret
+define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
+ %X = load <2 x i8>* %ptr
+ %Y = sext <2 x i8> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+; AVX: load_sext_test4
+; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test4
+; SSSE3: movswq
+; SSSE3: movswq
+; SSSE3: punpcklqdq
+; SSSE3: ret
+
+; SSE2: load_sext_test4
+; SSE2: movswq
+; SSE2: movswq
+; SSE2: punpcklqdq
+; SSE2: ret
+define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
+ %X = load <2 x i16>* %ptr
+ %Y = sext <2 x i16> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+; AVX: load_sext_test5
+; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test5
+; SSSE3: movslq
+; SSSE3: movslq
+; SSSE3: punpcklqdq
+; SSSE3: ret
+
+; SSE2: load_sext_test5
+; SSE2: movslq
+; SSE2: movslq
+; SSE2: punpcklqdq
+; SSE2: ret
+define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
+ %X = load <2 x i32>* %ptr
+ %Y = sext <2 x i32> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+; AVX: load_sext_test6
+; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
+; AVX: ret
+
+; SSSE3: load_sext_test6
+; SSSE3: movq
+; SSSE3: punpcklbw
+; SSSE3: psraw $8
+; SSSE3: ret
+
+; SSE2: load_sext_test6
+; SSE2: movq
+; SSE2: punpcklbw
+; SSE2: psraw $8
+; SSE2: ret
+define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
+ %X = load <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i16>
+ ret <8 x i16>%Y
+}
+
+; AVX: sext_4i1_to_4i64
+; AVX: vpslld  $31
+; AVX: vpsrad  $31
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
+  %extmask = sext <4 x i1> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+; AVX: sext_4i8_to_4i64
+; AVX: vpslld  $24
+; AVX: vpsrad  $24
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
+  %extmask = sext <4 x i8> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+; AVX: sext_4i8_to_4i64
+; AVX: vpmovsxbd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+; AVX: sext_4i16_to_4i64
+; AVX: vpmovsxwd
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 681747b844a0..01eb7361e293 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -105,13 +105,22 @@ define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
 ; CHECK: _vshift08
 ; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
-; CHECK: vextractf128 $1
 ; CHECK: vpslld $23
 define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
   ret <8 x i32> %bitop
 }
 
+; PR15141
+; CHECK: _vshift13:
+; CHECK-NOT: vpsll
+; CHECK: vcvttps2dq
+; CHECK-NEXT: vpmulld
+define <4 x i32> @vshift13(<4 x i32> %in) {
+  %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %T
+}
+
 ;;; Uses shifts for sign extension
 ; CHECK: _sext_v16i16
 ; CHECK: vpsllw
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index ec11654b3556..73faa1fe0d40 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpermilps
+; CHECK: vpshufd
 }
 
 ; rdar://10538417
@@ -98,23 +98,23 @@ define i32 @test10(<4 x i32> %a) nounwind {
 }
 
 define <4 x float> @test11(<4 x float> %a) nounwind  {
-; check: test11
-; check: vpermilps $27
+; CHECK: test11
+; CHECK: vpshufd $27
   %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %tmp1
 }
 
 define <4 x float> @test12(<4 x float>* %a) nounwind  {
 ; CHECK: test12
-; CHECK: vpermilps $27, (
+; CHECK: vpshufd
   %tmp0 = load <4 x float>* %a
   %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %tmp1
 }
 
 define <4 x i32> @test13(<4 x i32> %a) nounwind  {
-; check: test13
-; check: vpshufd $27
+; CHECK: test13
+; CHECK: vpshufd $27
   %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %tmp1
 }
@@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
   ret <8 x float>%S
 }
 
+; rdar://12684358
+; Make sure loads happen before stores.
+; CHECK: swap8doubles
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
+entry:
+  %add.ptr = getelementptr inbounds double* %A, i64 2
+  %v.i = bitcast double* %A to <2 x double>*
+  %0 = load <2 x double>* %v.i, align 1
+  %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v1.i = bitcast double* %add.ptr to <2 x double>*
+  %1 = load <2 x double>* %v1.i, align 1
+  %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
+  %add.ptr1 = getelementptr inbounds double* %A, i64 6
+  %add.ptr2 = getelementptr inbounds double* %A, i64 4
+  %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
+  %3 = load <2 x double>* %v.i27, align 1
+  %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
+  %4 = load <2 x double>* %v1.i29, align 1
+  %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
+  %6 = bitcast double* %C to <4 x double>*
+  %7 = load <4 x double>* %6, align 32
+  %add.ptr5 = getelementptr inbounds double* %C, i64 4
+  %8 = bitcast double* %add.ptr5 to <4 x double>*
+  %9 = load <4 x double>* %8, align 32
+  %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
+  %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
+  store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
+  store <2 x double> %10, <2 x double>* %v1.i, align 16
+  store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
+  store <2 x double> %11, <2 x double>* %v1.i29, align 16
+  store <4 x double> %2, <4 x double>* %6, align 32
+  store <4 x double> %5, <4 x double>* %8, align 32
+  ret void
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 94bcddd97592..5c01c2cc5b50 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -3,8 +3,8 @@
 
 ; CHECK: vpunpcklbw %xmm
 ; CHECK-NEXT: vpunpckhbw %xmm
+; CHECK-NEXT: vpshufd $85
 ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -12,8 +12,8 @@ entry:
 }
 
 ; CHECK: vpunpckhwd %xmm
+; CHECK-NEXT: vpshufd $85
 ; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $85
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -47,9 +47,9 @@ entry:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ; To:
 ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovaps
+; CHECK: vmovdqa
+; CHECK-NEXT: vpshufd $-1
 ; CHECK-NEXT: vinsertf128  $1
-; CHECK-NEXT: vpermilps $-1
 define <8 x float> @funcE() nounwind {
 allocas:
   %udx495 = alloca [18 x [18 x float]], align 32
@@ -75,8 +75,8 @@ __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_ex
   ret <8 x float> %load_broadcast12281250
 }
 
-; CHECK: vinsertf128 $1
-; CHECK-NEXT: vpermilps $0
+; CHECK: vpshufd $0
+; CHECK-NEXT: vinsertf128 $1
 define <8 x float> @funcF(i32 %val) nounwind {
   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
   %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
@@ -84,8 +84,8 @@ define <8 x float> @funcF(i32 %val) nounwind {
   ret <8 x float> %tmp
 }
 
-; CHECK: vinsertf128  $1
-; CHECK-NEXT: vpermilps  $0
+; CHECK: vpshufd  $0
+; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -93,8 +93,8 @@ entry:
 }
 
 ; CHECK: vextractf128  $1
+; CHECK-NEXT: vpshufd
 ; CHECK-NEXT: vinsertf128  $1
-; CHECK-NEXT: vpermilps  $85
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index ff56a454996e..ad8365bb59c0 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -102,3 +102,21 @@ entry:
   store <2 x i64> %2, <2 x i64>* %addr, align 1
   ret void
 }
+
+; PR15462
+define void @t9(i64* %p) {
+ store i64 0, i64* %p
+ %q = getelementptr i64* %p, i64 1
+ store i64 0, i64* %q
+ %r = getelementptr i64* %p, i64 2
+ store i64 0, i64* %r
+ %s = getelementptr i64* %p, i64 3
+ store i64 0, i64* %s
+ ret void
+
+; CHECK: t9:
+; CHECK: vxorps	%xmm
+; CHECK-NOT: vextractf
+; CHECK: vmovups
+; CHECK: vmovups
+}
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index cb904b93313a..7f2f9d821dd5 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -45,8 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK: palignr
-; CHECK: palignr
+; CHECK: palignr $8
+; CHECK: psrldq $8
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
index b630e9d14612..582537ea906f 100755
--- a/test/CodeGen/X86/avx-zext.ll
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -18,11 +18,10 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
   ret <4 x i64>%B
 }
 
-
 define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
 ;CHECK: zext_8i8_to_8i32
 ;CHECK: vpunpckhwd
-;CHECK: vpunpcklwd
+;CHECK: vpmovzxwd
 ;CHECK: vinsertf128
 ;CHECK: ret
   %t = zext <8 x i8> %z to <8 x i32>
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index b47491335a31..3ce08dcc7370 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -63,6 +63,47 @@ define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
   ret <8 x i32>%B
 }
 
+; CHECK: load_sext_test1
+; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
+ %X = load <4 x i32>* %ptr
+ %Y = sext <4 x i32> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+; CHECK: load_sext_test2
+; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
 
+; CHECK: load_sext_test3
+; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
 
+; CHECK: load_sext_test4
+; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
+ %X = load <8 x i16>* %ptr
+ %Y = sext <8 x i16> %X to <8 x i32>
+ ret <8 x i32>%Y
+}
 
+; CHECK: load_sext_test5
+; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
+; CHECK: ret 
+define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
+ %X = load <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i32>
+ ret <8 x i32>%Y
+}
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
index 13ebaa6f8797..a5bb1a8f8e44 100644
--- a/test/CodeGen/X86/avx2-logic.ll
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -48,9 +48,8 @@ entry:
 ; CHECK: vpblendvb
 ; CHECK: vpblendvb %ymm
 ; CHECK: ret
-define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
-  %min_is_x = icmp ult <32 x i8> %x, %y
-  %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
+define <32 x i8> @vpblendvb(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y) {
+  %min = select <32 x i1> %cond, <32 x i8> %x, <32 x i8> %y
   ret <32 x i8> %min
 }
 
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index a414e6880c32..cf319cb7fe1d 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -4,15 +4,62 @@
 ; The mask for the vpblendw instruction needs to be identical for both halves
 ; of the YMM. Need to use two vpblendw instructions.
 
-; CHECK: blendw1
-; CHECK: vpblendw
-; CHECK: vpblendw
+; CHECK: vpblendw_test1
+; mask = 10010110,b = 150,d
+; CHECK: vpblendw  $150, %ymm
 ; CHECK: ret
-define <16 x i16> @blendw1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
-  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+define <16 x i16> @vpblendw_test1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3,  i32 20, i32 5,  i32 6,  i32 23, 
+                                                               i32 8, i32 25, i32 26, i32 11, i32 28, i32 13, i32 14, i32 31>
   ret <16 x i16> %t
 }
 
+; CHECK: vpblendw_test2
+; mask1 = 00010110 = 22
+; mask2 = 10000000 = 128
+; CHECK: vpblendw  $128, %xmm
+; CHECK: vpblendw  $22, %xmm
+; CHECK: vinserti128
+; CHECK: ret
+define <16 x i16> @vpblendw_test2(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, 
+                                                               i32 8, i32 9,  i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i16> %t
+}
+
+; CHECK: blend_test1
+; CHECK: vpblendd
+; CHECK: ret
+define <8 x i32> @blend_test1(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x i32> %t
+}
+
+; CHECK: blend_test2
+; CHECK: vpblendd
+; CHECK: ret
+define <8 x i32> @blend_test2(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x i32> %t
+}
+
+
+; CHECK: blend_test3
+; CHECK: vblendps
+; CHECK: ret
+define <8 x float> @blend_test3(<8 x float> %a, <8 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x float> %t
+}
+
+; CHECK: blend_test4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  ret <4 x i64> %t
+}
+
 ; CHECK: vpshufhw $27, %ymm
 define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 11f811f8cf63..e565da74a082 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -5,7 +5,8 @@
 ; shifting the needed bit to the MSB, and not using shl+sra.
 
 ;CHECK: vsel_float
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -14,7 +15,8 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 }
 
 ;CHECK: vsel_4xi8
-;CHECK: pslld
+;CHECK: movl $-2147483648
+;CHECK-NEXT: movd
 ;CHECK-NEXT: blendvps
 ;CHECK: ret
 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 43c47c0fa8a5..b89e648c52d9 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -26,6 +26,14 @@ define i32 @t3(i32 %x) nounwind  {
 ; CHECK: tzcntl
 }
 
+define i32 @tzcnt32_load(i32* %x) nounwind  {
+  %x1 = load i32* %x
+  %tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
+  ret i32 %tmp
+; CHECK: tzcnt32_load:
+; CHECK: tzcntl ({{.*}})
+}
+
 define i64 @t4(i64 %x) nounwind  {
   %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
   ret i64 %tmp
@@ -69,6 +77,15 @@ define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: andnl
 }
 
+define i32 @andn32_load(i32 %x, i32* %y) nounwind readnone {
+  %y1 = load i32* %y
+  %tmp1 = xor i32 %x, -1
+  %tmp2 = and i32 %y1, %tmp1
+  ret i32 %tmp2
+; CHECK: andn32_load:
+; CHECK: andnl ({{.*}})
+}
+
 define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
   %tmp1 = xor i64 %x, -1
   %tmp2 = and i64 %tmp1, %y
@@ -84,6 +101,14 @@ define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: bextrl
 }
 
+define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
+  ret i32 %tmp
+; CHECK: bextr32_load:
+; CHECK: bextrl {{.*}}, ({{.*}}), {{.*}}
+}
+
 declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
 
 define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
@@ -102,6 +127,14 @@ define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: bzhil
 }
 
+define i32 @bzhi32_load(i32* %x, i32 %y) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
+  ret i32 %tmp
+; CHECK: bzhi32_load:
+; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}}
+}
+
 declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
 
 define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
@@ -121,6 +154,15 @@ define i32 @blsi32(i32 %x) nounwind readnone {
 ; CHECK: blsil
 }
 
+define i32 @blsi32_load(i32* %x) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = sub i32 0, %x1
+  %tmp2 = and i32 %x1, %tmp
+  ret i32 %tmp2
+; CHECK: blsi32_load:
+; CHECK: blsil ({{.*}})
+}
+
 define i64 @blsi64(i64 %x) nounwind readnone {
   %tmp = sub i64 0, %x
   %tmp2 = and i64 %tmp, %x
@@ -137,6 +179,15 @@ define i32 @blsmsk32(i32 %x) nounwind readnone {
 ; CHECK: blsmskl
 }
 
+define i32 @blsmsk32_load(i32* %x) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = sub i32 %x1, 1
+  %tmp2 = xor i32 %x1, %tmp
+  ret i32 %tmp2
+; CHECK: blsmsk32_load:
+; CHECK: blsmskl ({{.*}})
+}
+
 define i64 @blsmsk64(i64 %x) nounwind readnone {
   %tmp = sub i64 %x, 1
   %tmp2 = xor i64 %tmp, %x
@@ -153,6 +204,15 @@ define i32 @blsr32(i32 %x) nounwind readnone {
 ; CHECK: blsrl
 }
 
+define i32 @blsr32_load(i32* %x) nounwind readnone {
+  %x1 = load i32* %x
+  %tmp = sub i32 %x1, 1
+  %tmp2 = and i32 %x1, %tmp
+  ret i32 %tmp2
+; CHECK: blsr32_load:
+; CHECK: blsrl ({{.*}})
+}
+
 define i64 @blsr64(i64 %x) nounwind readnone {
   %tmp = sub i64 %x, 1
   %tmp2 = and i64 %tmp, %x
@@ -168,6 +228,14 @@ define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: pdepl
 }
 
+define i32 @pdep32_load(i32 %x, i32* %y) nounwind readnone {
+  %y1 = load i32* %y
+  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
+  ret i32 %tmp
+; CHECK: pdep32_load:
+; CHECK: pdepl ({{.*}})
+}
+
 declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
 
 define i64 @pdep64(i64 %x, i64 %y) nounwind readnone {
@@ -186,6 +254,14 @@ define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
 ; CHECK: pextl
 }
 
+define i32 @pext32_load(i32 %x, i32* %y) nounwind readnone {
+  %y1 = load i32* %y
+  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
+  ret i32 %tmp
+; CHECK: pext32_load:
+; CHECK: pextl ({{.*}})
+}
+
 declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
 
 define i64 @pext64(i64 %x, i64 %y) nounwind readnone {
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
index 09eb5d1038f7..fa6f6e85e9b8 100644
--- a/test/CodeGen/X86/bool-simplify.ll
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand,+rdseed | FileCheck %s
 
 define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
   %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
@@ -39,7 +39,22 @@ define i32 @bax(<2 x i64> %c) {
 ; CHECK: ret
 }
 
-define i32 @rnd(i32 %arg) nounwind uwtable {
+define i16 @rnd16(i16 %arg) nounwind uwtable {
+  %1 = tail call { i16, i32 } @llvm.x86.rdrand.16() nounwind
+  %2 = extractvalue { i16, i32 } %1, 0
+  %3 = extractvalue { i16, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i16 0, i16 %arg
+  %6 = add i16 %5, %2
+  ret i16 %6
+; CHECK: rnd16
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i32 @rnd32(i32 %arg) nounwind uwtable {
   %1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
   %2 = extractvalue { i32, i32 } %1, 0
   %3 = extractvalue { i32, i32 } %1, 1
@@ -47,12 +62,77 @@ define i32 @rnd(i32 %arg) nounwind uwtable {
   %5 = select i1 %4, i32 0, i32 %arg
   %6 = add i32 %5, %2
   ret i32 %6
-; CHECK: rnd
+; CHECK: rnd32
 ; CHECK: rdrand
 ; CHECK: cmov
 ; CHECK-NOT: cmov
 ; CHECK: ret
 }
 
+define i64 @rnd64(i64 %arg) nounwind uwtable {
+  %1 = tail call { i64, i32 } @llvm.x86.rdrand.64() nounwind
+  %2 = extractvalue { i64, i32 } %1, 0
+  %3 = extractvalue { i64, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i64 0, i64 %arg
+  %6 = add i64 %5, %2
+  ret i64 %6
+; CHECK: rnd64
+; CHECK: rdrand
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i16 @seed16(i16 %arg) nounwind uwtable {
+  %1 = tail call { i16, i32 } @llvm.x86.rdseed.16() nounwind
+  %2 = extractvalue { i16, i32 } %1, 0
+  %3 = extractvalue { i16, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i16 0, i16 %arg
+  %6 = add i16 %5, %2
+  ret i16 %6
+; CHECK: seed16
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i32 @seed32(i32 %arg) nounwind uwtable {
+  %1 = tail call { i32, i32 } @llvm.x86.rdseed.32() nounwind
+  %2 = extractvalue { i32, i32 } %1, 0
+  %3 = extractvalue { i32, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i32 0, i32 %arg
+  %6 = add i32 %5, %2
+  ret i32 %6
+; CHECK: seed32
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @seed64(i64 %arg) nounwind uwtable {
+  %1 = tail call { i64, i32 } @llvm.x86.rdseed.64() nounwind
+  %2 = extractvalue { i64, i32 } %1, 0
+  %3 = extractvalue { i64, i32 } %1, 1
+  %4 = icmp eq i32 %3, 0
+  %5 = select i1 %4, i64 0, i64 %arg
+  %6 = add i64 %5, %2
+  ret i64 %6
+; CHECK: seed64
+; CHECK: rdseed
+; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare { i16, i32 } @llvm.x86.rdrand.16() nounwind
 declare { i32, i32 } @llvm.x86.rdrand.32() nounwind
+declare { i64, i32 } @llvm.x86.rdrand.64() nounwind
+declare { i16, i32 } @llvm.x86.rdseed.16() nounwind
+declare { i32, i32 } @llvm.x86.rdseed.32() nounwind
+declare { i64, i32 } @llvm.x86.rdseed.64() nounwind
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index ec447e5e9c81..39a784dec37d 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=x86 | grep btl | count 28
-; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
-; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=penryn | FileCheck %s
 ; PR3253
 
 ; The register+memory form of the BT instruction should be usable on
@@ -21,6 +19,9 @@
 
 define void @test2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -36,6 +37,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -51,6 +55,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atest2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atest2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -66,6 +73,8 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atest2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atest2b
+; CHECK: btl %eax, %ecx
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -81,6 +90,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test3
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -96,6 +108,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test3b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -111,6 +126,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -126,6 +144,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -141,6 +162,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atestne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atestne2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -156,6 +180,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atestne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atestne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -171,6 +198,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne3
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -186,6 +216,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne3b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -201,6 +234,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -216,6 +252,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -231,6 +270,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aquery2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aquery2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -246,6 +288,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aquery2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aquery2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -261,6 +306,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -276,6 +324,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -291,6 +342,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3x(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3x
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -306,6 +360,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3bx(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3bx
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -321,6 +378,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -336,6 +396,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -351,6 +414,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aqueryne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aqueryne2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -366,6 +432,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aqueryne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -381,6 +450,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -396,6 +468,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -411,6 +486,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3x(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3x
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -426,6 +504,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3bx(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3bx
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -440,3 +521,16 @@ UnifiedReturnBlock:		; preds = %entry
 }
 
 declare void @foo()
+
+; rdar://12755626
+define zeroext i1 @invert(i32 %flags, i32 %flag) nounwind {
+; CHECK: invert
+; CHECK: btl %eax, %ecx
+; CHECK: setae
+entry:
+  %neg = xor i32 %flags, -1
+  %shl = shl i32 1, %flag
+  %and = and i32 %shl, %neg
+  %tobool = icmp ne i32 %and, 0
+  ret i1 %tobool
+}
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 196efe58e6f3..c5187db6de4b 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index f3b125c6e3ba..d06fd8898e7f 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index b7a4aa3f9b01..4711e4511112 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index dca093602241..f24a5f9aa3b4 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
 ; X64-NOT:     movsq
 ; X64:     rep
 ; X64-NOT:     rep
@@ -12,7 +12,7 @@
 
 ; Win64 has not supported byval yet.
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
 ; X32-NOT:     movsl
 ; X32:     rep
 ; X32-NOT:     rep
diff --git a/test/CodeGen/X86/cas.ll b/test/CodeGen/X86/cas.ll
new file mode 100644
index 000000000000..c2dd05ef7302
--- /dev/null
+++ b/test/CodeGen/X86/cas.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu %s -o - | FileCheck %s
+
+; C code this came from
+;bool cas(float volatile *p, float *expected, float desired) {
+;  bool success;
+;  __asm__ __volatile__("lock; cmpxchg %[desired], %[mem]; "
+;                       "mov %[expected], %[expected_out]; "
+;                       "sete %[success]"
+;                       : [success] "=a" (success),
+;                         [expected_out] "=rm" (*expected)
+;                       : [expected] "a" (*expected),
+;                         [desired] "q" (desired),
+;                         [mem] "m" (*p)
+;                       : "memory", "cc");
+;  return success;
+;}
+
+define zeroext i1 @cas(float* %p, float* %expected, float %desired) nounwind {
+entry:
+  %p.addr = alloca float*, align 8
+  %expected.addr = alloca float*, align 8
+  %desired.addr = alloca float, align 4
+  %success = alloca i8, align 1
+  store float* %p, float** %p.addr, align 8
+  store float* %expected, float** %expected.addr, align 8
+  store float %desired, float* %desired.addr, align 4
+  %0 = load float** %expected.addr, align 8
+  %1 = load float** %expected.addr, align 8
+  %2 = load float* %1, align 4
+  %3 = load float* %desired.addr, align 4
+  %4 = load float** %p.addr, align 8
+  %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(float* %0, float %2, float %3, float* %4) nounwind
+  store i8 %5, i8* %success, align 1
+  %6 = load i8* %success, align 1
+  %tobool = trunc i8 %6 to i1
+  ret i1 %tobool
+}
+
+; CHECK: @cas
+; Make sure we're emitting a move from eax.
+; CHECK: #APP
+; CHECK-NEXT: lock;{{.*}}mov %eax,{{.*}}
+; CHECK-NEXT: #NO_APP
+
+define zeroext i1 @cas2(i8* %p, i8* %expected, i1 zeroext %desired) nounwind {
+entry:
+  %p.addr = alloca i8*, align 8
+  %expected.addr = alloca i8*, align 8
+  %desired.addr = alloca i8, align 1
+  %success = alloca i8, align 1
+  store i8* %p, i8** %p.addr, align 8
+  store i8* %expected, i8** %expected.addr, align 8
+  %frombool = zext i1 %desired to i8
+  store i8 %frombool, i8* %desired.addr, align 1
+  %0 = load i8** %expected.addr, align 8
+  %1 = load i8** %expected.addr, align 8
+  %2 = load i8* %1, align 1
+  %tobool = trunc i8 %2 to i1
+  %3 = load i8* %desired.addr, align 1
+  %tobool1 = trunc i8 %3 to i1
+  %4 = load i8** %p.addr, align 8
+  %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i8* %0, i1 %tobool, i1 %tobool1, i8* %4) nounwind
+  store i8 %5, i8* %success, align 1
+  %6 = load i8* %success, align 1
+  %tobool2 = trunc i8 %6 to i1
+  ret i1 %tobool2
+}
+
+; CHECK: @cas2
+; Make sure we're emitting a move from %al here.
+; CHECK: #APP
+; CHECK-NEXT: lock;{{.*}}mov %al,{{.*}}
+; CHECK-NEXT: #NO_APP
diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll
new file mode 100644
index 000000000000..38a42dbf1aa1
--- /dev/null
+++ b/test/CodeGen/X86/clobber-fi0.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; In the code below we need to copy the EFLAGS because of scheduling constraints.
+; When copying the EFLAGS we need to write to the stack with push/pop. This forces
+; us to emit the prolog.
+
+; CHECK: main
+; CHECK: subq{{.*}}rsp
+; CHECK: ret
+define i32 @main(i32 %arg, i8** %arg1) nounwind {
+bb:
+  %tmp = alloca i32, align 4                      ; [#uses=3 type=i32*]
+  %tmp2 = alloca i32, align 4                     ; [#uses=3 type=i32*]
+  %tmp3 = alloca i32                              ; [#uses=1 type=i32*]
+  store i32 1, i32* %tmp, align 4
+  store i32 1, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb4, %bb
+  %tmp6 = load i32* %tmp2, align 4                ; [#uses=1 type=i32]
+  %tmp7 = add i32 %tmp6, -1                       ; [#uses=2 type=i32]
+  store i32 %tmp7, i32* %tmp2, align 4
+  %tmp8 = icmp eq i32 %tmp7, 0                    ; [#uses=1 type=i1]
+  %tmp9 = load i32* %tmp                          ; [#uses=1 type=i32]
+  %tmp10 = add i32 %tmp9, -1              ; [#uses=1 type=i32]
+  store i32 %tmp10, i32* %tmp3
+  br i1 %tmp8, label %bb11, label %bb4
+
+bb11:                                             ; preds = %bb4
+  %tmp12 = load i32* %tmp, align 4                ; [#uses=1 type=i32]
+  ret i32 %tmp12
+}
+
+
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index eb06327f55a6..1855fe2fb89e 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -151,3 +151,18 @@ entry:
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
+
+define i32 @test12() uwtable ssp {
+; CHECK: test12:
+; CHECK: testb
+  %1 = call zeroext i1 @test12b()
+  br i1 %1, label %2, label %3
+
+; <label>:2                                       ; preds = %0
+  ret i32 1
+
+; <label>:3                                       ; preds = %0
+  ret i32 2
+}
+
+declare zeroext i1 @test12b()
diff --git a/test/CodeGen/X86/coalesce-implicitdef.ll b/test/CodeGen/X86/coalesce-implicitdef.ll
new file mode 100644
index 000000000000..19cd08cf3797
--- /dev/null
+++ b/test/CodeGen/X86/coalesce-implicitdef.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -verify-coalescing
+; PR14732
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10"
+
+@c = common global i32 0, align 4
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+@d = common global i32 0, align 4
+
+; This function creates an IMPLICIT_DEF with a long live range, even after
+; ProcessImplicitDefs.
+;
+; The coalescer should be able to deal with all kinds of IMPLICIT_DEF live
+; ranges, even if they are not common.
+
+define void @f() nounwind uwtable ssp {
+entry:
+  %i = alloca i32, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc34, %entry
+  %i.0.load44 = phi i32 [ %inc35, %for.inc34 ], [ undef, %entry ]
+  %pi.0 = phi i32* [ %pi.4, %for.inc34 ], [ undef, %entry ]
+  %tobool = icmp eq i32 %i.0.load44, 0
+  br i1 %tobool, label %for.end36, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* @c, align 4, !tbaa !0
+  br label %for.body2
+
+for.body2:                                        ; preds = %for.body, %for.inc
+  %i.0.load45 = phi i32 [ %i.0.load44, %for.body ], [ 0, %for.inc ]
+  %tobool3 = icmp eq i32 %i.0.load45, 0
+  br i1 %tobool3, label %if.then10, label %if.then
+
+if.then:                                          ; preds = %for.body2
+  store i32 0, i32* %i, align 4, !tbaa !0
+  br label %for.body6
+
+for.body6:                                        ; preds = %if.then, %for.body6
+  store i32 0, i32* %i, align 4
+  br i1 true, label %for.body6, label %for.inc
+
+if.then10:                                        ; preds = %for.body2
+  store i32 1, i32* @b, align 4, !tbaa !0
+  ret void
+
+for.inc:                                          ; preds = %for.body6
+  br i1 undef, label %for.body2, label %if.end30
+
+while.condthread-pre-split:                       ; preds = %label.loopexit, %while.condthread-pre-split.lr.ph.lr.ph, %for.inc27.backedge
+  %0 = phi i32 [ %inc28, %for.inc27.backedge ], [ %inc285863, %while.condthread-pre-split.lr.ph.lr.ph ], [ %inc2858, %label.loopexit ]
+  %inc2060 = phi i32 [ %inc20, %for.inc27.backedge ], [ %a.promoted.pre, %while.condthread-pre-split.lr.ph.lr.ph ], [ %inc20, %label.loopexit ]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.condthread-pre-split, %while.cond
+  %p2.1.in = phi i32* [ %pi.3.ph, %while.cond ], [ %i, %while.condthread-pre-split ]
+  %p2.1 = bitcast i32* %p2.1.in to i16*
+  br i1 %tobool19, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %inc20 = add nsw i32 %inc2060, 1
+  %tobool21 = icmp eq i32 %inc2060, 0
+  br i1 %tobool21, label %for.inc27.backedge, label %if.then22
+
+for.inc27.backedge:                               ; preds = %while.end, %if.then22
+  %inc28 = add nsw i32 %0, 1
+  store i32 %inc28, i32* @b, align 4, !tbaa !0
+  %tobool17 = icmp eq i32 %inc28, 0
+  br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split
+
+if.then22:                                        ; preds = %while.end
+  %1 = load i16* %p2.1, align 2, !tbaa !3
+  %tobool23 = icmp eq i16 %1, 0
+  br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit
+
+label.loopexit:                                   ; preds = %if.then22
+  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  %inc2858 = add nsw i32 %0, 1
+  store i32 %inc2858, i32* @b, align 4, !tbaa !0
+  %tobool1759 = icmp eq i32 %inc2858, 0
+  br i1 %tobool1759, label %if.end30, label %while.condthread-pre-split
+
+for.inc27.if.end30.loopexit56_crit_edge:          ; preds = %for.inc27.backedge
+  store i32 %inc20, i32* @a, align 4, !tbaa !0
+  br label %if.end30
+
+if.end30:                                         ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc
+  %i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ]
+  %pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ]
+  %2 = load i32* %pi.4, align 4, !tbaa !0
+  %tobool31 = icmp eq i32 %2, 0
+  br i1 %tobool31, label %for.inc34, label %label.preheader
+
+for.inc34:                                        ; preds = %if.end30
+  %inc35 = add nsw i32 %i.0.load46, 1
+  store i32 %inc35, i32* %i, align 4
+  br label %for.cond
+
+for.end36:                                        ; preds = %for.cond
+  store i32 1, i32* %i, align 4
+  %3 = load i32* @c, align 4, !tbaa !0
+  %tobool37 = icmp eq i32 %3, 0
+  br i1 %tobool37, label %label.preheader, label %land.rhs
+
+land.rhs:                                         ; preds = %for.end36
+  store i32 0, i32* @a, align 4, !tbaa !0
+  br label %label.preheader
+
+label.preheader:                                  ; preds = %for.end36, %if.end30, %land.rhs
+  %i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ]
+  %pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ]
+  %4 = load i32* @b, align 4, !tbaa !0
+  %inc285863 = add nsw i32 %4, 1
+  store i32 %inc285863, i32* @b, align 4, !tbaa !0
+  %tobool175964 = icmp eq i32 %inc285863, 0
+  br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph
+
+while.condthread-pre-split.lr.ph.lr.ph:           ; preds = %label.preheader
+  %.pr50 = load i32* @d, align 4, !tbaa !0
+  %tobool19 = icmp eq i32 %.pr50, 0
+  %a.promoted.pre = load i32* @a, align 4, !tbaa !0
+  br label %while.condthread-pre-split
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/X86/coldcc64.ll b/test/CodeGen/X86/coldcc64.ll
new file mode 100644
index 000000000000..4db56bbaea2d
--- /dev/null
+++ b/test/CodeGen/X86/coldcc64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-linux-gnu"
+
+define coldcc void @foo() {
+; CHECK: pushq %rbp
+; CHECK: pushq %r15
+; CHECK: pushq %r14
+; CHECK: pushq %r13
+; CHECK: pushq %r12
+; CHECK: pushq %r11
+; CHECK: pushq %r10
+; CHECK: pushq %r9
+; CHECK: pushq %r8
+; CHECK: pushq %rdi
+; CHECK: pushq %rsi
+; CHECK: pushq %rdx
+; CHECK: pushq %rcx
+; CHECK: pushq %rbx
+; CHECK: movaps %xmm15
+; CHECK: movaps %xmm0
+  call void asm sideeffect "", "~{xmm15},~{xmm0},~{rbp},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rdi},~{rsi},~{rdx},~{rcx},~{rbx}"()
+  ret void
+}
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 7e7acaa98a76..8ad38a4ee5c0 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=x86 | grep mov | count 2
 
+; Skip this on Windows as there is no ccosl and sret behaves differently.
+; XFAIL: pc-win32
+
 define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
 entry:
 	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 4be14d2128ef..4a0110896ced 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 064ee364d14e..74a7240c8190 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
 ; STATS: 9 asm-printer
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 276d0db9a4f3..6d2196206e7c 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -431,7 +431,7 @@ return:                                           ; preds = %entry
 ; uitofp expands to an FCMOV instruction which splits the basic block.
 ; Make sure the live range of %AL isn't split.
 @.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
-define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+define void @pr13188(i64* nocapture %this) uwtable ssp sanitize_address align 2 {
 entry:
   %x7 = load i64* %this, align 8
   %sub = add i64 %x7, -1
diff --git a/test/CodeGen/X86/cvtv2f32.ll b/test/CodeGen/X86/cvtv2f32.ll
index 466b09606786..d11bb9ee3e75 100644
--- a/test/CodeGen/X86/cvtv2f32.ll
+++ b/test/CodeGen/X86/cvtv2f32.ll
@@ -1,3 +1,7 @@
+; A bug fix in the DAGCombiner made this test fail, so marking as xfail
+; until this can be investigated further.
+; XFAIL: *
+
 ; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
 
 define <2 x float> @foo(i32 %x, i32 %y, <2 x float> %v) {
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index af69531246cf..75d3d93ddb89 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
 
 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine_unsafe_math.ll
new file mode 100644
index 000000000000..592cf1bec2e5
--- /dev/null
+++ b/test/CodeGen/X86/dagcombine_unsafe_math.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s 
+
+
+; rdar://13126763
+; Expression "x + x*x" was mistakenly transformed into "x * 3.0f".
+
+define float @test1(float %x) {
+  %t1 = fmul fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test1
+; CHECK: vaddss
+}
+
+; (x + x) + x => x * 3.0
+define float @test2(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test2
+; CHECK: vmulss LCPI1_0(%rip), %xmm0, %xmm0
+}
+
+; x + (x + x) => x * 3.0
+define float @test3(float %x) {
+  %t1 = fadd fast float %x, %x
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: .long  1077936128
+; CHECK: test3
+; CHECK: vmulss LCPI2_0(%rip), %xmm0, %xmm0
+}
+
+; (y + x) + x != x * 3.0
+define float @test4(float %x, float %y) {
+  %t1 = fadd fast float %x, %y
+  %t2 = fadd fast float %t1, %x
+  ret float %t2
+; CHECK: test4
+; CHECK: vaddss
+}
+
+; rdar://13445387
+; "x + x + x => 3.0 * x" should be disabled after legalization because 
+; Instruction-Selection dosen't know how to handle "3.0"
+; 
+define float @test5() {
+  %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer
+  %vecext.i8.i152 = extractelement <4 x float> %mul.i.i151, i32 1
+  %vecext1.i9.i153 = extractelement <4 x float> %mul.i.i151, i32 0
+  %add.i10.i154 = fadd float %vecext1.i9.i153, %vecext.i8.i152
+  %vecext.i7.i155 = extractelement <4 x float> %mul.i.i151, i32 2
+  %add.i.i156 = fadd float %vecext.i7.i155, %add.i10.i154
+  ret float %add.i.i156
+}
diff --git a/test/CodeGen/X86/dbg-at-specficiation.ll b/test/CodeGen/X86/dbg-at-specficiation.ll
index aa5e6efede27..48b8202bd5fa 100644
--- a/test/CodeGen/X86/dbg-at-specficiation.ll
+++ b/test/CodeGen/X86/dbg-at-specficiation.ll
@@ -17,4 +17,4 @@
 !7 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !8 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 720929, i64 0, i64 9}        ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
index 5e5577620d97..aca06a27a1df 100644
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ b/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -25,21 +25,25 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
 !15 = metadata !{i32 11, i32 0, metadata !1, null}
 !16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{metadata !1}
+!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
index bfc96f17ec9b..aabc2068068d 100644
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -13,17 +13,18 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
-!llvm.dbg.lv.foo = !{!6}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 132191)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590080, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 589835, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 42}
 !9 = metadata !{i32 2, i32 12, metadata !7, null}
 !10 = metadata !{i32 3, i32 2, metadata !7, null}
+!11 = metadata !{metadata !1}
+!12 = metadata !{metadata !6}
+!13 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
index 5a51eb88b895..a9b8f1fdc4f3 100644
--- a/test/CodeGen/X86/dbg-const.ll
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -16,19 +16,21 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 declare i32 @bar() nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.foobar = !{!6}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar}
-!1 = metadata !{i32 524329, metadata !"mu.c", metadata !"/private/tmp", metadata !2}
-!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"mu.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114183)", i1 true, i1 true, metadata !"", i32 0}
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
+!0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!6 = metadata !{i32 524544, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5}
-!7 = metadata !{i32 524299, metadata !0, i32 12, i32 52, metadata !1, i32 0}
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
+!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 42}
 !9 = metadata !{i32 15, i32 12, metadata !7, null}
 !10 = metadata !{i32 23, i32 3, metadata !7, null}
 !11 = metadata !{i32 17, i32 3, metadata !7, null}
 !12 = metadata !{i32 18, i32 3, metadata !7, null}
+!13 = metadata !{metadata !0}
+!14 = metadata !{metadata !6}
+!15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/CodeGen/X86/dbg-declare-arg.ll
index 367c1ef36c60..f7e0c91cdff2 100644
--- a/test/CodeGen/X86/dbg-declare-arg.ll
+++ b/test/CodeGen/X86/dbg-declare-arg.ll
@@ -69,55 +69,57 @@ entry:
   ret void, !dbg !48
 }
 
-!llvm.dbg.sp = !{!0, !10, !14, !19, !22, !25}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!2 = metadata !{i32 589841, i32 0, i32 4, metadata !"a.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130127)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589865, metadata !"a.cc", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
 !4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
-!5 = metadata !{i32 589837, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 589837, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
-!8 = metadata !{i32 589837, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!9 = metadata !{i32 589837, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786445, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786445, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
+!8 = metadata !{i32 786445, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!9 = metadata !{i32 786445, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 589839, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 589870, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{i32 786447, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !16 = metadata !{null, metadata !13, metadata !17}
 !17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
-!18 = metadata !{i32 589862, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
-!19 = metadata !{i32 589870, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{i32 786470, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
+!19 = metadata !{i32 786478, metadata !"_Z3fooi", i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !21 = metadata !{metadata !1}
-!22 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{i32 786478, metadata !"_ZN1AD1Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ]
+!23 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !24 = metadata !{null}
-!25 = metadata !{i32 589870, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 590081, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 786478, metadata !"_ZN1AD2Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 4, i32 11, metadata !19, null}
-!28 = metadata !{i32 590080, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 589835, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 786443, metadata !19, i32 4, i32 14, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
 !30 = metadata !{i32 5, i32 7, metadata !29, null}
 !31 = metadata !{i32 5, i32 12, metadata !29, null}
 !32 = metadata !{i32 6, i32 3, metadata !29, null}
 !33 = metadata !{i32 7, i32 5, metadata !34, null}
-!34 = metadata !{i32 589835, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 786443, metadata !29, i32 6, i32 16, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
 !35 = metadata !{i32 8, i32 3, metadata !34, null}
 !36 = metadata !{i32 9, i32 9, metadata !29, null}
-!37 = metadata !{i32 590080, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
+!37 = metadata !{i32 786688, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0, null} ; [ DW_TAG_auto_variable ]
 !38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
 !39 = metadata !{i32 9, i32 5, metadata !29, null}
 !40 = metadata !{i32 10, i32 3, metadata !29, null}
 !41 = metadata !{i32 11, i32 3, metadata !29, null}
 !42 = metadata !{i32 12, i32 1, metadata !29, null}
-!43 = metadata !{i32 590081, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!43 = metadata !{i32 786689, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
 !44 = metadata !{i32 2, i32 47, metadata !22, null}
 !45 = metadata !{i32 2, i32 61, metadata !22, null}
-!46 = metadata !{i32 590081, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64} ; [ DW_TAG_arg_variable ]
+!46 = metadata !{i32 786689, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
 !47 = metadata !{i32 2, i32 47, metadata !25, null}
 !48 = metadata !{i32 2, i32 54, metadata !49, null}
-!49 = metadata !{i32 589835, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
+!49 = metadata !{i32 786443, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
+!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25}
+!51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
index 5d4cedc5c4e3..6ac397ac42e1 100644
--- a/test/CodeGen/X86/dbg-declare.ll
+++ b/test/CodeGen/X86/dbg-declare.ll
@@ -29,12 +29,10 @@ declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
@@ -51,7 +49,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !21 = metadata !{metadata !22}
-!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !23 = metadata !{i32 7, i32 8, metadata !17, null}
 !24 = metadata !{i32 9, i32 1, metadata !17, null}
 !25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index adf985461055..1bd3d77522a3 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -9,11 +9,13 @@ define i32 @main() nounwind {
   ret i32 0
 }
 
-!llvm.dbg.sp = !{ !6}
+!llvm.dbg.cu = !{!2}
 
-!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !5}
+!9 = metadata !{metadata !6}
+!10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"}
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
index bd96d9195d02..17d645757d99 100644
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ b/test/CodeGen/X86/dbg-i128-const.ll
@@ -12,15 +12,20 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+!llvm.dbg.cu = !{!5}
+
 !0 = metadata !{i128 42 }
-!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!9 = metadata !{i32 524329, metadata !"myint.h", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
-!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !11 = metadata !{i32 29, i32 0, metadata !2, null}
+!12 = metadata !{metadata !3}
+!13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
+!14 = metadata !{metadata !"myint.h", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll
index fc295c679445..ff16318efcec 100644
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ b/test/CodeGen/X86/dbg-large-unsigned-const.ll
@@ -26,36 +26,36 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !6}
-!llvm.dbg.lv._Z3iseRKxS0_ = !{!7, !11}
-!llvm.dbg.lv._Z2fnx = !{!12}
+!29 = metadata !{metadata !1, metadata !6}
+!30 = metadata !{metadata !7, metadata !11}
+!31 = metadata !{metadata !12}
 
-!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"lli.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 135593)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 655401, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 655381, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, null, null, metadata !29, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !"_Z3iseRKxS0_", i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 655406, i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 655617, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!9 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
-!10 = metadata !{i32 655396, metadata !0, metadata !"long long int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 655617, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 655617, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !"_Z2fnx", i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!9 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ]
+!10 = metadata !{i32 786468, metadata !0, metadata !"long long int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777222, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 2, i32 27, metadata !1, null}
 !14 = metadata !{i32 2, i32 49, metadata !1, null}
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 655371, metadata !1, i32 2, i32 54, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{metadata !"long long", metadata !18}
 !18 = metadata !{metadata !"omnipotent char", metadata !19}
 !19 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !20 = metadata !{i32 6, i32 19, metadata !6, null}
-!21 = metadata !{i32 655617, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 7, i32 10, metadata !23, null}
-!23 = metadata !{i32 655371, metadata !6, i32 6, i32 22, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !2, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ]
 !24 = metadata !{i32 2, i32 27, metadata !1, metadata !22}
 !25 = metadata !{i64 9223372036854775807}         
-!26 = metadata !{i32 655617, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !27 = metadata !{i32 2, i32 49, metadata !1, metadata !22}
 !28 = metadata !{i32 3, i32 3, metadata !16, metadata !22}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index d1e349f79d6f..baad6c0b60e6 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -43,33 +43,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !9}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
-!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 786478, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !11 = metadata !{metadata !12, metadata !12, metadata !12}
-!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 1093, i32 0, metadata !9, null}
 !16 = metadata !{i64 0}
-!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
-!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !1, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !21 = metadata !{i32 1095, i32 0, metadata !18, null}
 !22 = metadata !{i32 1103, i32 0, metadata !18, null}
 !23 = metadata !{i32 1104, i32 0, metadata !18, null}
 !24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
-!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 786443, metadata !1, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !26 = metadata !{i32 1107, i32 0, metadata !18, null}
 !27 = metadata !{i32 1111, i32 0, metadata !18, null}
+!28 = metadata !{metadata !0, metadata !9}
+!29 = metadata !{metadata !"foobar.c", metadata !"/tmp"}
+!30 = metadata !{metadata !"foobar.h", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
index 81303bb3d2ba..26bac2e08286 100644
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -33,23 +33,23 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !6}
+!18 = metadata !{metadata !1, metadata !6}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 131100)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 1, i32 13, metadata !1, null}
-!9 = metadata !{i32 590080, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 2, i32 6, metadata !10, null}
 !12 = metadata !{i32 2, i32 11, metadata !10, null}
 !13 = metadata !{i32 3, i32 2, metadata !10, null}
 !14 = metadata !{i32 4, i32 2, metadata !10, null}
 !15 = metadata !{i32 5, i32 2, metadata !10, null}
 !16 = metadata !{i32 8, i32 2, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 788910c7fe72..6090185dc10e 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -14,24 +14,21 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{metadata !10}
 !10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
+!11 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
 !14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 5, i32 3, metadata !19, null}
-!19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
index b115bf475c42..fcbf64f42378 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -23,26 +23,25 @@ entry:
   store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
   ret void, !dbg !17
 }
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata
-!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata
-!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !8 = metadata !{i32 1, i32 42, metadata !0, null}
-!9 = metadata !{i32 590080, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 786688, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 3, i32 41, metadata !10, null}
 !12 = metadata !{i32 0}
-!13 = metadata !{i32 590080, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 786688, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !14 = metadata !{i32 4, i32 20, metadata !10, null}
 !15 = metadata !{i32 5, i32 15, metadata !10, null}
 !16 = metadata !{i32 6, i32 18, metadata !10, null}
 !17 = metadata !{i32 7, i32 1, metadata !0, null}
-
+!18 = metadata !{metadata !0}
+!19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
deleted file mode 100644
index d248a4130355..000000000000
--- a/test/CodeGen/X86/dbg-value-inlined-parameter.ll
+++ /dev/null
@@ -1,87 +0,0 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
-
-;CHECK: DW_TAG_inlined_subroutine
-;CHECK-NEXT: DW_AT_abstract_origin
-;CHECK-NEXT: DW_AT_low_pc
-;CHECK-NEXT: DW_AT_high_pc
-;CHECK-NEXT: DW_AT_call_file
-;CHECK-NEXT: DW_AT_call_line
-;CHECK-NEXT: DW_TAG_formal_parameter
-;CHECK-NEXT: Lstring11-Lsection_str ## DW_AT_name
-
-%struct.S1 = type { float*, i32 }
-
-@p = common global %struct.S1 zeroinitializer, align 8
-
-define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
-  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
-  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
-  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
-  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
-  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
-  %cmp = icmp ne float* %call, null, !dbg !29
-  %cond = zext i1 %cmp to i32, !dbg !29
-  ret i32 %cond, !dbg !29
-}
-
-declare float* @bar(i32) optsize
-
-define void @foobar() nounwind optsize ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
-  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
-  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
-  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
-  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
-  ret void, !dbg !38
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0, !6}
-!llvm.dbg.lv.foo = !{!9, !18}
-!llvm.dbg.gv = !{!19}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{null}
-!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!13 = metadata !{metadata !14, metadata !17}
-!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
-!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 7, i32 13, metadata !0, null}
-!21 = metadata !{i32 7, i32 21, metadata !0, null}
-!22 = metadata !{i32 9, i32 3, metadata !23, null}
-!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{metadata !"int", metadata !25}
-!25 = metadata !{metadata !"omnipotent char", metadata !26}
-!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!27 = metadata !{i32 10, i32 3, metadata !23, null}
-!28 = metadata !{metadata !"any pointer", metadata !25}
-!29 = metadata !{i32 11, i32 3, metadata !23, null}
-!30 = metadata !{%struct.S1* @p}
-!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
-!32 = metadata !{i32 16, i32 3, metadata !33, null}
-!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
-!34 = metadata !{i32 1}
-!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
-!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
-!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
-!38 = metadata !{i32 17, i32 1, metadata !33, null}
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
index f1101e61f0fe..55be3b1f222b 100644
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -78,25 +78,26 @@ declare <4 x i32> @__amdil_get_global_id_int() nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 590081, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
+!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 32, metadata !0, null}
-!10 = metadata !{i32 590080, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !0, i32 2, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 5, i32 24, metadata !11, null}
-!13 = metadata !{i32 590080, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 786688, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !14 = metadata !{i32 6, i32 25, metadata !11, null}
-!15 = metadata !{i32 590080, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786688, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
 !16 = metadata !{i32 7, i32 26, metadata !11, null}
 !17 = metadata !{i32 9, i32 24, metadata !11, null}
 !18 = metadata !{i32 10, i32 1, metadata !0, null}
-
+!19 = metadata !{metadata !0}
+!20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index 05e29ecff03f..2a1916f26c97 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -45,26 +45,30 @@ declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
 declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0, !6, !7, !8}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !10 = metadata !{metadata !11}
-!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 19509, i32 20, metadata !0, null}
 !14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
-!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 19514, i32 2, metadata !18, null}
-!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !1, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
 !22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
 !23 = metadata !{i32 19524, i32 1, metadata !18, null}
+!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8}
+!25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
+!26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
+!27 = metadata !{metadata !"f.i", metadata !"/tmp"}
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 6b16865ba9ee..6766dbe9edb0 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -17,22 +17,21 @@ declare i32 @foo(...)
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.bar = !{!6, !11}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786445, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 5, i32 19, metadata !0, null}
 !14 = metadata !{i32 6, i32 14, metadata !12, null}
 !15 = metadata !{metadata !"int", metadata !16}
@@ -40,6 +39,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !17 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !18 = metadata !{i32 7, i32 2, metadata !12, null}
 !19 = metadata !{i32 8, i32 2, metadata !12, null}
+!20 = metadata !{metadata !0}
+!21 = metadata !{metadata !6, metadata !11}
+!22 = metadata !{metadata !"bar.c", metadata !"/private/tmp"}
 
 ; Check that variable bar:b value range is appropriately trucated in debug info.
 ; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 8e7c13d8efa9..9669d97cb7fa 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -56,9 +56,9 @@ entry:
   %div = sdiv i16 %x, 10
   ret i16 %div
 ; CHECK: test6:
-; CHECK: imull	$26215, %eax, %eax
-; CHECK: shrl	$31, %ecx
-; CHECK: sarl	$18, %eax
+; CHECK: imull $26215, %eax, %ecx
+; CHECK: sarl $18, %ecx
+; CHECK: shrl $15, %eax
 }
 
 define i32 @test7(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index c64752c9522b..3bc2957963eb 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -1,14 +1,16 @@
 ; RUN: llc %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"empty.c", metadata !"/home/nlewycky", metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
+!3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !"empty.c", metadata !"/home/nlewycky"}
 
 ; The important part of the following check is that dir = #0.
 ;                        Dir  Mod Time   File Len   File Name
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index c5e47facf346..9405f76cbed0 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -103,7 +103,7 @@ entry:
 
 declare void @t4_helper(i32*, i32*, <8 x float>*)
 
-; Dynamic realignment + Spill
+; Spilling an AVX register shouldn't cause dynamic realignment
 define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
 entry:
   %a = alloca i32, align 4
@@ -116,21 +116,15 @@ entry:
   ret i32 %add
 
 ; CHECK: _t5
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
 ; CHECK: subq ${{[0-9]+}}, %rsp
 ;
 ; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
-; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: vmovups [[AVXREG]], (%rsp)
 ; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
 ; CHECK: callq   _t5_helper1
-; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: vmovups (%rsp), %ymm0
 ; CHECK: callq   _t5_helper2
 ; CHECK: movl {{[0-9]+}}(%rsp), %eax
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
 }
 
 declare void @t5_helper1(i32*)
diff --git a/test/CodeGen/X86/early-ifcvt-crash.ll b/test/CodeGen/X86/early-ifcvt-crash.ll
index c8280269689d..d9580503e91e 100644
--- a/test/CodeGen/X86/early-ifcvt-crash.ll
+++ b/test/CodeGen/X86/early-ifcvt-crash.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -x86-early-ifcvt -verify-machineinstrs
 ; RUN: llc < %s -x86-early-ifcvt -stress-early-ifcvt -verify-machineinstrs
+; CPU without a scheduling model:
+; RUN: llc < %s -x86-early-ifcvt -mcpu=pentium3 -verify-machineinstrs
 ;
 ; Run these tests with and without -stress-early-ifcvt to exercise heuristics.
 ;
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
index 2e1852d3e3ae..2606bd28d5fc 100644
--- a/test/CodeGen/X86/early-ifcvt.ll
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -142,3 +142,34 @@ save_state_and_return:
 }
 
 declare void @BZ2_bz__AssertH__fail()
+
+; Make sure we don't speculate on div/idiv instructions
+; CHECK: test_idiv
+; CHECK-NOT: cmov
+define i32 @test_idiv(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+  %1 = icmp eq i32 %b, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = sdiv i32 %a, %b
+  br label %4
+
+; <label>:4                                       ; preds = %0, %2
+  %5 = phi i32 [ %3, %2 ], [ %a, %0 ]
+  ret i32 %5
+}
+
+; CHECK: test_div
+; CHECK-NOT: cmov
+define i32 @test_div(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+  %1 = icmp eq i32 %b, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = udiv i32 %a, %b
+  br label %4
+
+; <label>:4                                       ; preds = %0, %2
+  %5 = phi i32 [ %3, %2 ], [ %a, %0 ]
+  ret i32 %5
+}
diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll
new file mode 100644
index 000000000000..e748e1cad1fd
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win64 | FileCheck %s -check-prefix=WIN64
+; Requires: Asserts
+
+; Previously, this would cause an assert.
+define i31 @t1(i31 %a, i31 %b, i31 %c) {
+entry:
+  %add = add nsw i31 %b, %a
+  %add1 = add nsw i31 %add, %c
+  ret i31 %add1
+}
+
+; We don't handle the Windows CC, yet.
+define i32 @foo(i32* %p) {
+entry:
+; WIN32: foo
+; WIN32: movl (%rcx), %eax
+; WIN64: foo
+; WIN64: movl (%rdi), %eax
+  %0 = load i32* %p, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/X86/fast-isel-args.ll b/test/CodeGen/X86/fast-isel-args.ll
new file mode 100644
index 000000000000..0f3626565e7d
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-args.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+
+; Just make sure these don't abort when lowering the arguments.
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  ret i32 %add1
+}
+
+define i64 @t2(i64 %a, i64 %b, i64 %c) {
+entry:
+  %add = add nsw i64 %b, %a
+  %add1 = add nsw i64 %add, %c
+  ret i64 %add1
+}
+
+define i64 @t3(i32 %a, i64 %b, i32 %c) {
+entry:
+  %conv = sext i32 %a to i64
+  %add = add nsw i64 %conv, %b
+  %conv1 = sext i32 %c to i64
+  %add2 = add nsw i64 %add, %conv1
+  ret i64 %add2
+}
diff --git a/test/CodeGen/X86/fast-isel-constant.ll b/test/CodeGen/X86/fast-isel-constant.ll
new file mode 100644
index 000000000000..6f9240ac4700
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-constant.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s
+; Make sure fast-isel doesn't reset the materialised constant map
+; across an intrinsic call.
+
+; CHECK: movl	$100000
+; CHECK-NOT: movl	$100000
+define i1 @test1(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %a = shl i32 100000, %v1
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %v2)
+  %ext = extractvalue {i32, i1} %t, 0
+  %sum = shl i32 100000, %ext
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  store i32 %sum, i32* %X
+  br label %overflow
+
+overflow:
+  ret i1 false
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
diff --git a/test/CodeGen/X86/fast-isel-expect.ll b/test/CodeGen/X86/fast-isel-expect.ll
new file mode 100644
index 000000000000..c4be7f364f30
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-expect.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -march=x86 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glbl = extern_weak constant i8
+
+declare i64 @llvm.expect.i64(i64, i64)
+
+define void @test() {
+; CHECK: movl $glbl
+  %tmp = call i64 @llvm.expect.i64(i64 zext (i1 icmp eq (i8* @glbl, i8* null) to i64), i64 0)
+  %tmp2 = icmp ne i64 %tmp, 0
+  br i1 %tmp2, label %bb1, label %bb2
+
+bb1:
+  unreachable
+
+bb2:
+  unreachable
+}
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index cdfaf7f4c134..ad1520ef8194 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
-; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll
new file mode 100644
index 000000000000..4aeae7fe0469
--- /dev/null
+++ b/test/CodeGen/X86/float-asmprint.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-none-linux < %s | FileCheck %s
+
+; Check that all current floating-point types are correctly emitted to assembly
+; on a little-endian target.
+
+@var128 = global fp128 0xL00000000000000008000000000000000, align 16
+@varppc128 = global ppc_fp128 0xM80000000000000000000000000000000, align 16
+@var80 = global x86_fp80 0xK80000000000000000000, align 16
+@var64 = global double -0.0, align 8
+@var32 = global float -0.0, align 4
+@var16 = global half -0.0, align 2
+
+; CHECK: var128:
+; CHECK-NEXT: .quad 0                         # fp128 -0
+; CHECK-NEXT: .quad -9223372036854775808
+; CHECK-NEXT: .size
+
+; CHECK: varppc128:
+; CHECK-NEXT: .quad 0                         # ppc_fp128 -0
+; CHECK-NEXT: .quad -9223372036854775808
+; CHECK-NEXT: .size
+
+; CHECK: var80:
+; CHECK-NEXT: .quad 0                         # x86_fp80 -0
+; CHECK-NEXT: .short 32768
+; CHECK-NEXT: .zero 6
+; CHECK-NEXT: .size
+
+; CHECK: var64:
+; CHECK-NEXT: .quad -9223372036854775808      # double -0
+; CHECK-NEXT: .size
+
+; CHECK: var32:
+; CHECK-NEXT: .long 2147483648                # float -0
+; CHECK-NEXT: .size
+
+; CHECK: var16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .size
+
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 2fe1ecd40e0c..7a1a9ae46147 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -63,6 +63,16 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x floa
 }
 declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
+; To test execution dependency
+define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vmovaps
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a0
+  %y = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+
 define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddpd
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
@@ -82,6 +92,16 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x do
 }
 declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
+; To test execution dependency
+define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vmovapd
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a0
+  %y = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+
 define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmaddps
   ; CHECK: ymm
diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
index 603e9ad66caa..35327faa6486 100644
--- a/test/CodeGen/X86/fold-call.ll
+++ b/test/CodeGen/X86/fold-call.ll
@@ -1,10 +1,27 @@
-; RUN: llc < %s -march=x86 | not grep mov
-; RUN: llc < %s -march=x86-64 | not grep mov
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
-declare void @bar()
+; CHECK: test1
+; CHECK-NOT: mov
 
-define void @foo(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
+declare void @bar()
+define void @test1(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
 	call void @bar()
 	call void %arg()
 	ret void
 }
+
+; PR14739
+; CHECK: test2
+; CHECK: mov{{.*}} $0, ([[REGISTER:%[a-z]+]])
+; CHECK-NOT: jmp{{.*}} *([[REGISTER]])
+
+%struct.X = type { void ()* }
+define void @test2(%struct.X* nocapture %x) {
+entry:
+  %f = getelementptr inbounds %struct.X* %x, i64 0, i32 0
+  %0 = load void ()** %f
+  store void ()* null, void ()** %f
+  tail call void %0()
+  ret void
+}
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
new file mode 100644
index 000000000000..c1756d5e2e1a
--- /dev/null
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+
+; rdar://12721174
+; We should not fold movss into pshufd since pshufd expects m128 while movss
+; loads from m32.
+define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind {
+; CHECK: sample_test
+; CHECK: movss
+; CHECK: pshufd
+entry:
+  %source.addr = alloca <4 x float>*, align 8
+  %dest.addr = alloca <2 x float>*, align 8
+  %tmp = alloca <2 x float>, align 8
+  store <4 x float>* %source, <4 x float>** %source.addr, align 8
+  store <2 x float>* %dest, <2 x float>** %dest.addr, align 8
+  store <2 x float> zeroinitializer, <2 x float>* %tmp, align 8
+  %0 = load <4 x float>** %source.addr, align 8
+  %arrayidx = getelementptr inbounds <4 x float>* %0, i64 0
+  %1 = load <4 x float>* %arrayidx, align 16
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = load <2 x float>* %tmp, align 8
+  %4 = insertelement <2 x float> %3, float %2, i32 1
+  store <2 x float> %4, <2 x float>* %tmp, align 8
+  %5 = load <2 x float>* %tmp, align 8
+  %6 = load <2 x float>** %dest.addr, align 8
+  %arrayidx1 = getelementptr inbounds <2 x float>* %6, i64 0
+  store <2 x float> %5, <2 x float>* %arrayidx1, align 8
+  %7 = load <2 x float>** %dest.addr, align 8
+  %arrayidx2 = getelementptr inbounds <2 x float>* %7, i64 0
+  %8 = load <2 x float>* %arrayidx2, align 8
+  %vecext = extractelement <2 x float> %8, i32 0
+  %9 = load <2 x float>** %dest.addr, align 8
+  %arrayidx3 = getelementptr inbounds <2 x float>* %9, i64 0
+  %10 = load <2 x float>* %arrayidx3, align 8
+  %vecext4 = extractelement <2 x float> %10, i32 1
+  call void @ext(float %vecext, float %vecext4)
+  ret void
+}
+declare void @ext(float, float)
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9cf4607cf5b2..2bde76efd2ae 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -43,21 +43,21 @@ forbody:		; preds = %forcond
 	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
 	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps178.i = add <4 x i32> %bitcast176.i, <i32 1, i32 1, i32 1, i32 1>		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps192.i = add <4 x i32> %bitcast190.i, <i32 1, i32 1, i32 1, i32 1>		; <<4 x i32>> [#uses=1]
 	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%orps203.i = add <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
-	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll
new file mode 100644
index 000000000000..2bb5b441c7c0
--- /dev/null
+++ b/test/CodeGen/X86/fold-vex.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
+
+;CHECK: @test
+; No need to load from memory. The operand will be loaded as part of th AND instr.
+;CHECK-NOT: vmovaps
+;CHECK: vandps
+;CHECK: ret
+
+define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind {
+entry:
+  %in0 = load <8 x i32>* %p0, align 2
+  %a = and <8 x i32> %in0, %in1
+  store <8 x i32> %a, <8 x i32>* undef
+  ret void
+}
+
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index d70aa7d79f00..287504801d04 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -38,7 +38,7 @@ define float @test3(float %a) {
 ; CHECK: test4
 define float @test4(float %a) {
 ; CHECK-NOT: fma
-; CHECK-NOT mul
+; CHECK-NOT: mul
 ; CHECK-NOT: add
 ; CHECK: ret
   %t1 = fmul float %a, 0.0
diff --git a/test/CodeGen/X86/fp-load-trunc.ll b/test/CodeGen/X86/fp-load-trunc.ll
index 2ae65c97d97a..a973befdafe7 100644
--- a/test/CodeGen/X86/fp-load-trunc.ll
+++ b/test/CodeGen/X86/fp-load-trunc.ll
@@ -49,8 +49,8 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
 ; CHECK: movlhps
 ; CHECK: ret
 ; AVX:   test4
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
+; AVX:   vcvtpd2psy
+; AVX:   vcvtpd2psy
 ; AVX:   vinsertf128
 ; AVX:   ret
   %x = load <8 x double>* %p
diff --git a/test/CodeGen/X86/handle-move.ll b/test/CodeGen/X86/handle-move.ll
index e9f7a962e20d..ba96275569b3 100644
--- a/test/CodeGen/X86/handle-move.ll
+++ b/test/CodeGen/X86/handle-move.ll
@@ -16,7 +16,7 @@
 ;       DL:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
 ;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
 ;
-define i32 @f1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+define i32 @f1(i32 %a, i32 %b, i32 %c) nounwind uwtable readnone ssp {
 entry:
   %y = add i32 %c, 1
   %x = udiv i32 %b, %a
@@ -50,7 +50,7 @@ entry:
 ;       %vreg5:         [16r,112r:0)  0@16r
 ;            -->        [16r,120r:0)  0@16r
 ;
-define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+define i32 @f3(i32 %a, i32 %b) nounwind uwtable readnone ssp {
 entry:
   %y = sub i32 %a, %b
   %x = add i32 %a, %b
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
new file mode 100644
index 000000000000..76d17a09d54e
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu -mcpu=pentium | FileCheck %s
+
+; Check the HiPE calling convention works (x86-32)
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+  ; CHECK:      movl 40(%esp), %eax
+  ; CHECK-NEXT: movl 44(%esp), %edx
+  ; CHECK-NEXT: movl       $8, %ecx
+  ; CHECK-NEXT: calll addfour
+  %0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8)
+  %res = extractvalue {i32, i32, i32} %0, 2
+
+  ; CHECK:      movl %eax, 16(%esp)
+  ; CHECK-NEXT: movl   $2, 12(%esp)
+  ; CHECK-NEXT: movl   $1,  8(%esp)
+  ; CHECK:      calll foo
+  tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind
+  ret void
+}
+
+define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  ; CHECK:      addl %edx, %eax
+  ; CHECK-NEXT: addl %ecx, %eax
+  %0 = add i32 %x, %y
+  %1 = add i32 %0, %z
+
+  ; CHECK:      ret
+  %res = insertvalue {i32, i32, i32} undef, i32 %1, 2
+  ret {i32, i32, i32} %res
+}
+
+define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind {
+entry:
+  ; CHECK:      movl  %esi, 16(%esp)
+  ; CHECK-NEXT: movl  %ebp, 12(%esp)
+  ; CHECK-NEXT: movl  %eax,  8(%esp)
+  ; CHECK-NEXT: movl  %edx,  4(%esp)
+  ; CHECK-NEXT: movl  %ecx,   (%esp)
+  %hp_var   = alloca i32
+  %p_var    = alloca i32
+  %arg0_var = alloca i32
+  %arg1_var = alloca i32
+  %arg2_var = alloca i32
+  store i32 %hp, i32* %hp_var
+  store i32 %p, i32* %p_var
+  store i32 %arg0, i32* %arg0_var
+  store i32 %arg1, i32* %arg1_var
+  store i32 %arg2, i32* %arg2_var
+
+  ; CHECK:      movl   4(%esp), %edx
+  ; CHECK-NEXT: movl   8(%esp), %eax
+  ; CHECK-NEXT: movl  12(%esp), %ebp
+  ; CHECK-NEXT: movl  16(%esp), %esi
+  %0 = load i32* %hp_var
+  %1 = load i32* %p_var
+  %2 = load i32* %arg0_var
+  %3 = load i32* %arg1_var
+  %4 = load i32* %arg2_var
+  ; CHECK:      jmp bar
+  tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
+  ret void
+}
+
+define cc 11 void @baz() nounwind {
+  %tmp_clos = load i32* @clos
+  %tmp_clos2 = inttoptr i32 %tmp_clos to i32*
+  %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
+  ; CHECK:      movl $42, %eax
+  ; CHECK-NEXT: jmpl *clos
+  tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind
+  ret void
+}
+
+@clos = external constant i32
+declare cc 11 void @bar(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
new file mode 100644
index 000000000000..5dbb5a25cbeb
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s
+
+; Check the HiPE calling convention works (x86-64)
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+  ; CHECK:      movq %rsi, %rax
+  ; CHECK-NEXT: movq %rdi, %rsi
+  ; CHECK-NEXT: movq %rax, %rdx
+  ; CHECK-NEXT: movl $8, %ecx
+  ; CHECK-NEXT: movl $9, %r8d
+  ; CHECK-NEXT: callq addfour
+  %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
+  %res = extractvalue {i64, i64, i64} %0, 2
+
+  ; CHECK:      movl $1, %edx
+  ; CHECK-NEXT: movl $2, %ecx
+  ; CHECK-NEXT: movl $3, %r8d
+  ; CHECK-NEXT: movq %rax, %r9
+  ; CHECK:      callq foo
+  tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
+  ret void
+}
+
+define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
+entry:
+  ; CHECK:      leaq (%rsi,%rdx), %rax
+  ; CHECK-NEXT: addq %rcx, %rax
+  ; CHECK-NEXT: addq %r8, %rax
+  %0 = add i64 %x, %y
+  %1 = add i64 %0, %z
+  %2 = add i64 %1, %w
+
+  ; CHECK:      ret
+  %res = insertvalue {i64, i64, i64} undef, i64 %2, 2
+  ret {i64, i64, i64} %res
+}
+
+define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
+entry:
+  ; CHECK:      movq  %r15, 40(%rsp)
+  ; CHECK-NEXT: movq  %rbp, 32(%rsp)
+  ; CHECK-NEXT: movq  %rsi, 24(%rsp)
+  ; CHECK-NEXT: movq  %rdx, 16(%rsp)
+  ; CHECK-NEXT: movq  %rcx, 8(%rsp)
+  ; CHECK-NEXT: movq  %r8, (%rsp)
+  %hp_var   = alloca i64
+  %p_var    = alloca i64
+  %arg0_var = alloca i64
+  %arg1_var = alloca i64
+  %arg2_var = alloca i64
+  %arg3_var = alloca i64
+  store i64 %hp, i64* %hp_var
+  store i64 %p, i64* %p_var
+  store i64 %arg0, i64* %arg0_var
+  store i64 %arg1, i64* %arg1_var
+  store i64 %arg2, i64* %arg2_var
+  store i64 %arg3, i64* %arg3_var
+
+  ; CHECK:      movq  8(%rsp), %rcx
+  ; CHECK-NEXT: movq  16(%rsp), %rdx
+  ; CHECK-NEXT: movq  24(%rsp), %rsi
+  ; CHECK-NEXT: movq  32(%rsp), %rbp
+  ; CHECK-NEXT: movq  40(%rsp), %r15
+  %0 = load i64* %hp_var
+  %1 = load i64* %p_var
+  %2 = load i64* %arg0_var
+  %3 = load i64* %arg1_var
+  %4 = load i64* %arg2_var
+  %5 = load i64* %arg3_var
+  ; CHECK:      jmp bar
+  tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
+  ret void
+}
+
+define cc 11 void @baz() nounwind {
+  %tmp_clos = load i64* @clos
+  %tmp_clos2 = inttoptr i64 %tmp_clos to i64*
+  %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
+  ; CHECK:      movl $42, %esi
+  ; CHECK-NEXT: jmpq *(%rax)
+  tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind
+  ret void
+}
+
+@clos = external constant i64
+declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)
diff --git a/test/CodeGen/X86/hipe-prologue.ll b/test/CodeGen/X86/hipe-prologue.ll
new file mode 100644
index 000000000000..ff3c5c803c90
--- /dev/null
+++ b/test/CodeGen/X86/hipe-prologue.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+
+; The HiPE compiler (i.e., the native code compiler of the Erlang/OTP system)
+; adds a custom assembly prologue in order to efficiently manipulate the stack
+; at runtime.
+
+; Just to prevent the alloca from being optimized away.
+declare void @dummy_use(i32*, i32)
+
+define {i32, i32} @test_basic(i32 %hp, i32 %p) {
+  ; X32-Linux:       test_basic:
+  ; X32-Linux-NOT:   calll inc_stack_0
+
+  ; X64-Linux:       test_basic:
+  ; X64-Linux-NOT:   callq inc_stack_0
+
+  %mem = alloca i32, i32 10
+  call void @dummy_use (i32* %mem, i32 10)
+  %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+  %2 = insertvalue {i32, i32} %1, i32 %p, 1
+  ret {i32, i32} %1
+}
+
+define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
+  ; X32-Linux:       test_basic_hipecc:
+  ; X32-Linux:       leal -156(%esp), %ebx
+  ; X32-Linux-NEXT:  cmpl 76(%ebp), %ebx
+  ; X32-Linux-NEXT:  jb .LBB1_1
+
+  ; X32-Linux:       ret
+
+  ; X32-Linux:       .LBB1_1:
+  ; X32-Linux-NEXT:  calll inc_stack_0
+
+  ; X64-Linux:       test_basic_hipecc:
+  ; X64-Linux:       leaq -232(%rsp), %r14
+  ; X64-Linux-NEXT:  cmpq 144(%rbp), %r14
+  ; X64-Linux-NEXT:  jb .LBB1_1
+
+  ; X64-Linux:       ret
+
+  ; X64-Linux:       .LBB1_1:
+  ; X64-Linux-NEXT:  callq inc_stack_0
+
+  %mem = alloca i32, i32 10
+  call void @dummy_use (i32* %mem, i32 10)
+  %1 = insertvalue {i32, i32} undef, i32 %hp, 0
+  %2 = insertvalue {i32, i32} %1, i32 %p, 1
+  ret {i32, i32} %2
+}
+
+define cc 11 {i32,i32,i32} @test_nocall_hipecc(i32 %hp,i32 %p,i32 %x,i32 %y) {
+  ; X32-Linux:       test_nocall_hipecc:
+  ; X32-Linux-NOT:   calll inc_stack_0
+
+  ; X64-Linux:       test_nocall_hipecc:
+  ; X64-Linux-NOT:   callq inc_stack_0
+
+  %1 = add i32 %x, %y
+  %2 = mul i32 42, %1
+  %3 = sub i32 24, %2
+  %4 = insertvalue {i32, i32, i32} undef, i32 %hp, 0
+  %5 = insertvalue {i32, i32, i32} %4, i32 %p, 1
+  %6 = insertvalue {i32, i32, i32} %5, i32 %p, 2
+  ret {i32, i32, i32} %6
+}
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
index 74ecd045b3d5..34191e3f9a31 100644
--- a/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm"
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/CodeGen/X86/imul-lea-2.ll b/test/CodeGen/X86/imul-lea-2.ll
index 1cb54b37b0e1..7b79d0678bee 100644
--- a/test/CodeGen/X86/imul-lea-2.ll
+++ b/test/CodeGen/X86/imul-lea-2.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 3
-; RUN: llc < %s -march=x86-64 | grep shl | count 1
-; RUN: llc < %s -march=x86-64 | not grep imul
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK-NOT: imul
 
 define i64 @t1(i64 %a) nounwind readnone {
 entry:
-	%0 = mul i64 %a, 81		; <i64> [#uses=1]
-	ret i64 %0
+  %0 = mul i64 %a, 81
+; CHECK: lea
+; CHECK: lea
+  ret i64 %0
 }
 
 define i64 @t2(i64 %a) nounwind readnone {
 entry:
-	%0 = mul i64 %a, 40		; <i64> [#uses=1]
-	ret i64 %0
+  %0 = mul i64 %a, 40
+; CHECK: shl
+; CHECK: lea
+  ret i64 %0
 }
diff --git a/test/CodeGen/X86/imul-lea.ll b/test/CodeGen/X86/imul-lea.ll
index 4e8e2af0f2fe..d55ece7996ed 100644
--- a/test/CodeGen/X86/imul-lea.ll
+++ b/test/CodeGen/X86/imul-lea.ll
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare i32 @foo()
 
 define i32 @test() {
-        %tmp.0 = tail call i32 @foo( )          ; <i32> [#uses=1]
-        %tmp.1 = mul i32 %tmp.0, 9              ; <i32> [#uses=1]
-        ret i32 %tmp.1
+  %tmp.0 = tail call i32 @foo( )
+  %tmp.1 = mul i32 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: lea
+  ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/X86/imul64-lea.ll b/test/CodeGen/X86/imul64-lea.ll
new file mode 100644
index 000000000000..047c129ddb33
--- /dev/null
+++ b/test/CodeGen/X86/imul64-lea.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 | FileCheck %s
+
+; Test that 64-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
+declare i64 @foo64()
+
+define i64 @test64() {
+  %tmp.0 = tail call i64 @foo64( )
+  %tmp.1 = mul i64 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: leaq
+  ret i64 %tmp.1
+}
+
+; Test that 32-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
+declare i32 @foo32()
+
+define i32 @test32() {
+  %tmp.0 = tail call i32 @foo32( )
+  %tmp.1 = mul i32 %tmp.0, 9
+; CHECK-NOT: mul
+; CHECK: leal
+  ret i32 %tmp.1
+}
+
diff --git a/test/CodeGen/X86/insertelement-copytoregs.ll b/test/CodeGen/X86/insertelement-copytoregs.ll
index 34a29ca7d939..88ff4dafad7d 100644
--- a/test/CodeGen/X86/insertelement-copytoregs.ll
+++ b/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK-NOT: IMPLICIT_DEF
 
 define void @foo(<2 x float>* %p) {
   %t = insertelement <2 x float> undef, float 0.0, i32 0
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 43f69b0c6e93..21128096e6e7 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,13 +1,15 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "lea	EAX, DWORD PTR \[... + 4\*... - 5\]"
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
-; RUN:   not grep add
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
 
 define i32 @test1(i32 %A, i32 %B) {
-        %tmp1 = shl i32 %A, 2           ; <i32> [#uses=1]
-        %tmp3 = add i32 %B, -5          ; <i32> [#uses=1]
-        %tmp4 = add i32 %tmp3, %tmp1            ; <i32> [#uses=1]
-        ret i32 %tmp4
+  %tmp1 = shl i32 %A, 2
+  %tmp3 = add i32 %B, -5
+  %tmp4 = add i32 %tmp3, %tmp1
+; The above computation of %tmp4 should match a single lea, without using
+; actual add instructions.
+; CHECK-NOT: add
+; CHECK: lea {{[A-Z]+}}, DWORD PTR [{{[A-Z]+}} + 4*{{[A-Z]+}} - 5]
+
+  ret i32 %tmp4
 }
 
 
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index 2171204c01d1..cef47264a583 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,19 +1,21 @@
-; RUN: llc < %s -march=x86-64 | grep lea | count 2
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
-	%0 = sub i32 %on_off, 1
-	%1 = mul i32 %0, 2
-	%2 = trunc i32 %1 to i16
-	%3 = zext i16 %2 to i32
-	%4 = trunc i32 %3 to i16
-	ret i16 %4
+  %0 = sub i32 %on_off, 1
+  %1 = mul i32 %0, 2
+  %2 = trunc i32 %1 to i16
+  %3 = zext i16 %2 to i32
+  %4 = trunc i32 %3 to i16
+; CHECK: lea
+  ret i16 %4
 }
 
 define i32 @t2(i32 %on_off) nounwind {
 entry:
-	%0 = sub i32 %on_off, 1
-	%1 = mul i32 %0, 2
-        %2 = and i32 %1, 65535
-	ret i32 %2
+  %0 = sub i32 %on_off, 1
+  %1 = mul i32 %0, 2
+  %2 = and i32 %1, 65535
+; CHECK: lea
+  ret i32 %2
 }
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index c9f2fc27dbff..71ef2d3152f8 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -54,3 +54,14 @@ define i64 @test4(i64 %xx, i32 %test) nounwind {
 ; CHECK: orl	%esi, %eax
 ; CHECK: sarl	%cl, %edx
 }
+
+; PR14668
+define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) {
+  %shl = shl <2 x i64> %A, %B
+  ret <2 x i64> %shl
+; CHECK: test5
+; CHECK: shl
+; CHECK: shldl
+; CHECK: shl
+; CHECK: shldl
+}
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index c3f991d7a9b0..66074fb3682c 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3
 
 ; MachineLICM should be able to hoist the symbolic addresses out of
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index a8ad0f1a28b2..9d285bf4e238 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index eae2e708349c..630c0ed1a33c 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Darwin
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Mingw32
 ; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
 ; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
 ; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
@@ -8,19 +9,26 @@
 
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
-; SSE2: t1:
-; SSE2: movaps _.str, %xmm0
-; SSE2: movaps %xmm0
-; SSE2: movb $0
-; SSE2: movl $0
-; SSE2: movl $0
+; SSE2-Darwin: t1:
+; SSE2-Darwin: movsd _.str+16, %xmm0
+; SSE2-Darwin: movsd %xmm0, 16(%esp)
+; SSE2-Darwin: movaps _.str, %xmm0
+; SSE2-Darwin: movaps %xmm0
+; SSE2-Darwin: movb $0, 24(%esp)
+
+; SSE2-Mingw32: t1:
+; SSE2-Mingw32: movsd _.str+16, %xmm0
+; SSE2-Mingw32: movsd %xmm0, 16(%esp)
+; SSE2-Mingw32: movaps _.str, %xmm0
+; SSE2-Mingw32: movups %xmm0
+; SSE2-Mingw32: movb $0, 24(%esp)
 
 ; SSE1: t1:
 ; SSE1: movaps _.str, %xmm0
 ; SSE1: movaps %xmm0
-; SSE1: movb $0
-; SSE1: movl $0
-; SSE1: movl $0
+; SSE1: movb $0, 24(%esp)
+; SSE1: movl $0, 20(%esp)
+; SSE1: movl $0, 16(%esp)
 
 ; NOSSE: t1:
 ; NOSSE: movb $0
@@ -47,9 +55,13 @@ entry:
 
 define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2: t2:
-; SSE2: movaps (%eax), %xmm0
-; SSE2: movaps %xmm0, (%eax)
+; SSE2-Darwin: t2:
+; SSE2-Darwin: movaps (%eax), %xmm0
+; SSE2-Darwin: movaps %xmm0, (%eax)
+
+; SSE2-Mingw32: t2:
+; SSE2-Mingw32: movaps (%eax), %xmm0
+; SSE2-Mingw32: movaps %xmm0, (%eax)
 
 ; SSE1: t2:
 ; SSE1: movaps (%eax), %xmm0
@@ -78,11 +90,17 @@ entry:
 
 define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
 entry:
-; SSE2: t3:
-; SSE2: movsd (%eax), %xmm0
-; SSE2: movsd 8(%eax), %xmm1
-; SSE2: movsd %xmm1, 8(%eax)
-; SSE2: movsd %xmm0, (%eax)
+; SSE2-Darwin: t3:
+; SSE2-Darwin: movsd (%eax), %xmm0
+; SSE2-Darwin: movsd 8(%eax), %xmm1
+; SSE2-Darwin: movsd %xmm1, 8(%eax)
+; SSE2-Darwin: movsd %xmm0, (%eax)
+
+; SSE2-Mingw32: t3:
+; SSE2-Mingw32: movsd (%eax), %xmm0
+; SSE2-Mingw32: movsd 8(%eax), %xmm1
+; SSE2-Mingw32: movsd %xmm1, 8(%eax)
+; SSE2-Mingw32: movsd %xmm0, (%eax)
 
 ; SSE1: t3:
 ; SSE1: movl
@@ -121,15 +139,25 @@ entry:
 
 define void @t4() nounwind {
 entry:
-; SSE2: t4:
-; SSE2: movw $120
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
-; SSE2: movl $2021161080
+; SSE2-Darwin: t4:
+; SSE2-Darwin: movw $120
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+; SSE2-Darwin: movl $2021161080
+
+; SSE2-Mingw32: t4:
+; SSE2-Mingw32: movw $120
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
+; SSE2-Mingw32: movl $2021161080
 
 ; SSE1: t4:
 ; SSE1: movw $120
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 39c7fbafd4c7..3372a4adc5ee 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -87,8 +87,34 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
   ret void
 
+; DARWIN: test5:
 ; DARWIN: movabsq	$7016996765293437281
 ; DARWIN: movabsq	$7016996765293437184
 }
 
 
+; PR14896
+@.str2 = private unnamed_addr constant [2 x i8] c"x\00", align 1
+
+define void @test6() nounwind uwtable {
+entry:
+; DARWIN: test6
+; DARWIN: movw $0, 8
+; DARWIN: movq $120, 0
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i64 10, i32 1, i1 false)
+  ret void
+}
+
+define void @PR15348(i8* %a, i8* %b) {
+; Ensure that alignment of '0' in an @llvm.memcpy intrinsic results in
+; unaligned loads and stores.
+; LINUX: PR15348
+; LINUX: movb
+; LINUX: movb
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/memset-sse-stack-realignment.ll b/test/CodeGen/X86/memset-sse-stack-realignment.ll
new file mode 100644
index 000000000000..df9de5dfaf22
--- /dev/null
+++ b/test/CodeGen/X86/memset-sse-stack-realignment.ll
@@ -0,0 +1,77 @@
+; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
+; need 16 bytes for SSE and 32 bytes for AVX.
+
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
+
+define void @test1(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 32, align 4
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+
+; NOSSE: test1:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test1:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test1:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test1:
+; AVX1: andl $-32
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %ymm
+
+; AVX2: test1:
+; AVX2: andl $-32
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %ymm
+
+}
+
+define void @test2(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 16, align 4
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 16, i32 4, i1 false)
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+
+; NOSSE: test2:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test2:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test2:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test2:
+; AVX1: andl $-16
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %xmm
+
+; AVX2: test2:
+; AVX2: andl $-16
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %xmm
+}
+
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 72b3e0fa3d51..0d479f0abe7b 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
-; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mcpu=pentium2 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=XMM
+; RUN: llc < %s -march=x86 -mcpu=bdver1   -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=YMM
 
 	%struct.x = type { i16, i16 }
 
@@ -8,7 +9,30 @@ entry:
 	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
 	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
 	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
+
 	call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86-NOT: movl $0,
+; X86: ret
+
+; XMM: xorps %xmm{{[0-9]+}}, [[Z:%xmm[0-9]+]]
+; XMM: movaps [[Z]],
+; XMM: movaps [[Z]],
+; XMM-NOT: movaps
+; XMM: ret
+
+; YMM: vxorps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, [[Z:%ymm[0-9]+]]
+; YMM: vmovaps [[Z]],
+; YMM-NOT: movaps
+; YMM: ret
+
 	call void @foo( %struct.x* %up_mvd116 ) nounwind 
 	ret void
 }
@@ -16,3 +40,16 @@ entry:
 declare void @foo(%struct.x*)
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+define void @PR15348(i8* %a) {
+; Ensure that alignment of '0' in an @llvm.memset intrinsic results in
+; unaligned loads and stores.
+; XMM: PR15348
+; XMM: movb $0,
+; XMM: movl $0,
+; XMM: movl $0,
+; XMM: movl $0,
+; XMM: movl $0,
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i32 0, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index e20fce172f27..8cfa032797f7 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movups | count 5
 ; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=core2   | grep movl   | count 20
+; RUN: llc < %s -mtriple=i386-pc-mingw32   -mcpu=core2   | grep movl   | count 20
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2   | grep movq   | count 10
 
 define void @bork() nounwind {
diff --git a/test/CodeGen/X86/misched-crash.ll b/test/CodeGen/X86/misched-crash.ll
new file mode 100644
index 000000000000..7644ee070878
--- /dev/null
+++ b/test/CodeGen/X86/misched-crash.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -enable-misched -verify-misched
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10"
+
+; This function contains a cmp instruction with two users.
+; Hoisting the last use requires trimming the EFLAGS live range to the second.
+define void @rdar13353090(i8* %plane, i64 %_x1, i64 %_x2) {
+entry:
+  %cmp = icmp ult i64 %_x1, %_x2
+  %cond = select i1 %cmp, i64 %_x1, i64 %_x2
+  %cond10 = select i1 %cmp, i64 %_x2, i64 %_x1
+  %0 = load i64* null, align 8
+  %cmp16 = icmp ult i64 %cond, %0
+  %cmp23 = icmp ugt i64 %cond10, 0
+  br i1 %cmp16, label %land.lhs.true21, label %return
+
+land.lhs.true21:                                  ; preds = %entry
+  %sub = add i64 %0, -1
+  br i1 %cmp23, label %if.then24, label %return
+
+if.then24:                                        ; preds = %land.lhs.true21
+  %cmp16.i = icmp ult i64 %cond, %sub
+  %cond20.i = select i1 %cmp16.i, i64 %cond, i64 %sub
+  %add21.i = add i64 0, %cond20.i
+  br label %for.body34.i
+
+for.body34.i:                                     ; preds = %for.inc39.i, %if.then24
+  %index.178.i = phi i64 [ %add21.i, %if.then24 ], [ %inc41.i, %for.inc39.i ]
+  %arrayidx35.i = getelementptr inbounds i8* %plane, i64 %index.178.i
+  %1 = load i8* %arrayidx35.i, align 1
+  %tobool36.i = icmp eq i8 %1, 0
+  br i1 %tobool36.i, label %for.inc39.i, label %return
+
+for.inc39.i:                                      ; preds = %for.body34.i
+  %inc41.i = add i64 %index.178.i, 1
+  br i1 undef, label %return, label %for.body34.i
+
+return:                                           ; preds = %for.inc39.i, %for.body34.i, %land.lhs.true21, %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/misched-ilp.ll b/test/CodeGen/X86/misched-ilp.ll
index c6cedb7be871..4ca296ca92e5 100644
--- a/test/CodeGen/X86/misched-ilp.ll
+++ b/test/CodeGen/X86/misched-ilp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
 ;
 ; Basic verification of the ScheduleDAGILP metric.
 ;
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
new file mode 100644
index 000000000000..0f6e442b1a8d
--- /dev/null
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -0,0 +1,228 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
+;
+; Verify that register pressure heuristics are working in MachineScheduler.
+;
+; When we enable subtree scheduling heuristics on X86, we may need a
+; flag to disable it for this test case.
+;
+; CHECK: @wrap_mul4
+; CHECK: 30 regalloc - Number of spills inserted
+
+define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
+entry:
+  %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
+  %0 = load double* %arrayidx1.i, align 8, !tbaa !0
+  %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
+  %1 = load double* %arrayidx3.i, align 8, !tbaa !0
+  %mul.i = fmul double %0, %1
+  %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
+  %2 = load double* %arrayidx5.i, align 8, !tbaa !0
+  %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
+  %3 = load double* %arrayidx7.i, align 8, !tbaa !0
+  %mul8.i = fmul double %2, %3
+  %add.i = fadd double %mul.i, %mul8.i
+  %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
+  %4 = load double* %arrayidx10.i, align 8, !tbaa !0
+  %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
+  %5 = load double* %arrayidx12.i, align 8, !tbaa !0
+  %mul13.i = fmul double %4, %5
+  %add14.i = fadd double %add.i, %mul13.i
+  %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
+  %6 = load double* %arrayidx16.i, align 8, !tbaa !0
+  %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
+  %7 = load double* %arrayidx18.i, align 8, !tbaa !0
+  %mul19.i = fmul double %6, %7
+  %add20.i = fadd double %add14.i, %mul19.i
+  %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
+  %8 = load double* %arrayidx25.i, align 8, !tbaa !0
+  %mul26.i = fmul double %0, %8
+  %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
+  %9 = load double* %arrayidx30.i, align 8, !tbaa !0
+  %mul31.i = fmul double %2, %9
+  %add32.i = fadd double %mul26.i, %mul31.i
+  %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
+  %10 = load double* %arrayidx36.i, align 8, !tbaa !0
+  %mul37.i = fmul double %4, %10
+  %add38.i = fadd double %add32.i, %mul37.i
+  %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
+  %11 = load double* %arrayidx42.i, align 8, !tbaa !0
+  %mul43.i = fmul double %6, %11
+  %add44.i = fadd double %add38.i, %mul43.i
+  %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
+  %12 = load double* %arrayidx49.i, align 8, !tbaa !0
+  %mul50.i = fmul double %0, %12
+  %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
+  %13 = load double* %arrayidx54.i, align 8, !tbaa !0
+  %mul55.i = fmul double %2, %13
+  %add56.i = fadd double %mul50.i, %mul55.i
+  %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
+  %14 = load double* %arrayidx60.i, align 8, !tbaa !0
+  %mul61.i = fmul double %4, %14
+  %add62.i = fadd double %add56.i, %mul61.i
+  %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
+  %15 = load double* %arrayidx66.i, align 8, !tbaa !0
+  %mul67.i = fmul double %6, %15
+  %add68.i = fadd double %add62.i, %mul67.i
+  %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
+  %16 = load double* %arrayidx73.i, align 8, !tbaa !0
+  %mul74.i = fmul double %0, %16
+  %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
+  %17 = load double* %arrayidx78.i, align 8, !tbaa !0
+  %mul79.i = fmul double %2, %17
+  %add80.i = fadd double %mul74.i, %mul79.i
+  %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
+  %18 = load double* %arrayidx84.i, align 8, !tbaa !0
+  %mul85.i = fmul double %4, %18
+  %add86.i = fadd double %add80.i, %mul85.i
+  %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
+  %19 = load double* %arrayidx90.i, align 8, !tbaa !0
+  %mul91.i = fmul double %6, %19
+  %add92.i = fadd double %add86.i, %mul91.i
+  %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
+  %20 = load double* %arrayidx95.i, align 8, !tbaa !0
+  %mul98.i = fmul double %1, %20
+  %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
+  %21 = load double* %arrayidx100.i, align 8, !tbaa !0
+  %mul103.i = fmul double %3, %21
+  %add104.i = fadd double %mul98.i, %mul103.i
+  %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
+  %22 = load double* %arrayidx106.i, align 8, !tbaa !0
+  %mul109.i = fmul double %5, %22
+  %add110.i = fadd double %add104.i, %mul109.i
+  %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
+  %23 = load double* %arrayidx112.i, align 8, !tbaa !0
+  %mul115.i = fmul double %7, %23
+  %add116.i = fadd double %add110.i, %mul115.i
+  %mul122.i = fmul double %8, %20
+  %mul127.i = fmul double %9, %21
+  %add128.i = fadd double %mul122.i, %mul127.i
+  %mul133.i = fmul double %10, %22
+  %add134.i = fadd double %add128.i, %mul133.i
+  %mul139.i = fmul double %11, %23
+  %add140.i = fadd double %add134.i, %mul139.i
+  %mul146.i = fmul double %12, %20
+  %mul151.i = fmul double %13, %21
+  %add152.i = fadd double %mul146.i, %mul151.i
+  %mul157.i = fmul double %14, %22
+  %add158.i = fadd double %add152.i, %mul157.i
+  %mul163.i = fmul double %15, %23
+  %add164.i = fadd double %add158.i, %mul163.i
+  %mul170.i = fmul double %16, %20
+  %mul175.i = fmul double %17, %21
+  %add176.i = fadd double %mul170.i, %mul175.i
+  %mul181.i = fmul double %18, %22
+  %add182.i = fadd double %add176.i, %mul181.i
+  %mul187.i = fmul double %19, %23
+  %add188.i = fadd double %add182.i, %mul187.i
+  %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
+  %24 = load double* %arrayidx191.i, align 8, !tbaa !0
+  %mul194.i = fmul double %1, %24
+  %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
+  %25 = load double* %arrayidx196.i, align 8, !tbaa !0
+  %mul199.i = fmul double %3, %25
+  %add200.i = fadd double %mul194.i, %mul199.i
+  %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
+  %26 = load double* %arrayidx202.i, align 8, !tbaa !0
+  %mul205.i = fmul double %5, %26
+  %add206.i = fadd double %add200.i, %mul205.i
+  %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
+  %27 = load double* %arrayidx208.i, align 8, !tbaa !0
+  %mul211.i = fmul double %7, %27
+  %add212.i = fadd double %add206.i, %mul211.i
+  %mul218.i = fmul double %8, %24
+  %mul223.i = fmul double %9, %25
+  %add224.i = fadd double %mul218.i, %mul223.i
+  %mul229.i = fmul double %10, %26
+  %add230.i = fadd double %add224.i, %mul229.i
+  %mul235.i = fmul double %11, %27
+  %add236.i = fadd double %add230.i, %mul235.i
+  %mul242.i = fmul double %12, %24
+  %mul247.i = fmul double %13, %25
+  %add248.i = fadd double %mul242.i, %mul247.i
+  %mul253.i = fmul double %14, %26
+  %add254.i = fadd double %add248.i, %mul253.i
+  %mul259.i = fmul double %15, %27
+  %add260.i = fadd double %add254.i, %mul259.i
+  %mul266.i = fmul double %16, %24
+  %mul271.i = fmul double %17, %25
+  %add272.i = fadd double %mul266.i, %mul271.i
+  %mul277.i = fmul double %18, %26
+  %add278.i = fadd double %add272.i, %mul277.i
+  %mul283.i = fmul double %19, %27
+  %add284.i = fadd double %add278.i, %mul283.i
+  %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
+  %28 = load double* %arrayidx287.i, align 8, !tbaa !0
+  %mul290.i = fmul double %1, %28
+  %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
+  %29 = load double* %arrayidx292.i, align 8, !tbaa !0
+  %mul295.i = fmul double %3, %29
+  %add296.i = fadd double %mul290.i, %mul295.i
+  %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
+  %30 = load double* %arrayidx298.i, align 8, !tbaa !0
+  %mul301.i = fmul double %5, %30
+  %add302.i = fadd double %add296.i, %mul301.i
+  %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
+  %31 = load double* %arrayidx304.i, align 8, !tbaa !0
+  %mul307.i = fmul double %7, %31
+  %add308.i = fadd double %add302.i, %mul307.i
+  %mul314.i = fmul double %8, %28
+  %mul319.i = fmul double %9, %29
+  %add320.i = fadd double %mul314.i, %mul319.i
+  %mul325.i = fmul double %10, %30
+  %add326.i = fadd double %add320.i, %mul325.i
+  %mul331.i = fmul double %11, %31
+  %add332.i = fadd double %add326.i, %mul331.i
+  %mul338.i = fmul double %12, %28
+  %mul343.i = fmul double %13, %29
+  %add344.i = fadd double %mul338.i, %mul343.i
+  %mul349.i = fmul double %14, %30
+  %add350.i = fadd double %add344.i, %mul349.i
+  %mul355.i = fmul double %15, %31
+  %add356.i = fadd double %add350.i, %mul355.i
+  %mul362.i = fmul double %16, %28
+  %mul367.i = fmul double %17, %29
+  %add368.i = fadd double %mul362.i, %mul367.i
+  %mul373.i = fmul double %18, %30
+  %add374.i = fadd double %add368.i, %mul373.i
+  %mul379.i = fmul double %19, %31
+  %add380.i = fadd double %add374.i, %mul379.i
+  store double %add20.i, double* %Out, align 8
+  %Res.i.sroa.1.8.idx2 = getelementptr inbounds double* %Out, i64 1
+  store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
+  %Res.i.sroa.2.16.idx4 = getelementptr inbounds double* %Out, i64 2
+  store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
+  %Res.i.sroa.3.24.idx6 = getelementptr inbounds double* %Out, i64 3
+  store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
+  %Res.i.sroa.4.32.idx8 = getelementptr inbounds double* %Out, i64 4
+  store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
+  %Res.i.sroa.5.40.idx10 = getelementptr inbounds double* %Out, i64 5
+  store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
+  %Res.i.sroa.6.48.idx12 = getelementptr inbounds double* %Out, i64 6
+  store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
+  %Res.i.sroa.7.56.idx14 = getelementptr inbounds double* %Out, i64 7
+  store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
+  %Res.i.sroa.8.64.idx16 = getelementptr inbounds double* %Out, i64 8
+  store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
+  %Res.i.sroa.9.72.idx18 = getelementptr inbounds double* %Out, i64 9
+  store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
+  %Res.i.sroa.10.80.idx20 = getelementptr inbounds double* %Out, i64 10
+  store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
+  %Res.i.sroa.11.88.idx22 = getelementptr inbounds double* %Out, i64 11
+  store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
+  %Res.i.sroa.12.96.idx24 = getelementptr inbounds double* %Out, i64 12
+  store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
+  %Res.i.sroa.13.104.idx26 = getelementptr inbounds double* %Out, i64 13
+  store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
+  %Res.i.sroa.14.112.idx28 = getelementptr inbounds double* %Out, i64 14
+  store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
+  %Res.i.sroa.15.120.idx30 = getelementptr inbounds double* %Out, i64 15
+  store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
+  ret void
+}
+
+attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
new file mode 100644
index 000000000000..f5566e5e5de9
--- /dev/null
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -0,0 +1,195 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched-topdown -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=TOPDOWN
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched=ilpmin -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=ILPMIN
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched=ilpmax -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=ILPMAX
+;
+; Verify that the MI scheduler minimizes register pressure for a
+; uniform set of bottom-up subtrees (unrolled matrix multiply).
+;
+; For current top-down heuristics, ensure that some folded imulls have
+; been reordered with the stores. This tests the scheduler's cheap
+; alias analysis ability (that doesn't require any AliasAnalysis pass).
+;
+; TOPDOWN: %for.body
+; TOPDOWN: movl %{{.*}}, (
+; TOPDOWN: imull {{[0-9]*}}(
+; TOPDOWN: movl %{{.*}}, 4(
+; TOPDOWN: imull {{[0-9]*}}(
+; TOPDOWN: movl %{{.*}}, 8(
+; TOPDOWN: movl %{{.*}}, 12(
+; TOPDOWN: %for.end
+;
+; For -misched=ilpmin, verify that each expression subtree is
+; scheduled independently, and that the imull/adds are interleaved.
+;
+; ILPMIN: %for.body
+; ILPMIN: movl %{{.*}}, (
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 4(
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 8(
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 12(
+; ILPMIN: %for.end
+;
+; For -misched=ilpmax, verify that each expression subtree is
+; scheduled independently, and that the imull/adds are clustered.
+;
+; ILPMAX: %for.body
+; ILPMAX: movl %{{.*}}, (
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 4(
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 8(
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 12(
+; ILPMAX: %for.end
+
+define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
+[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                              ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
+  %tmp = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
+  %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
+  %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
+  %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
+  %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
+  %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
+  %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
+  %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
+  %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
+  %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
+  %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
+  %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
+  %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
+  %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
+  %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
+  %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
+  %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
+  %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
+  %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
+  %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
+  %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
+  %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
+  %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
+  %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
+  %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
+  %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
+  %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
+  %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
+  %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
+  %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
+  %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
+  %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
+  %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
+  %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
+  %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
+  %mul = mul nsw i32 %tmp1, %tmp
+  %mul.1 = mul nsw i32 %tmp3, %tmp2
+  %mul.2 = mul nsw i32 %tmp5, %tmp4
+  %mul.3 = mul nsw i32 %tmp7, %tmp6
+  %mul.138 = mul nsw i32 %tmp9, %tmp8
+  %mul.1.1 = mul nsw i32 %tmp11, %tmp10
+  %mul.2.1 = mul nsw i32 %tmp13, %tmp12
+  %mul.3.1 = mul nsw i32 %tmp15, %tmp14
+  %mul.240 = mul nsw i32 %tmp17, %tmp16
+  %mul.1.2 = mul nsw i32 %tmp19, %tmp18
+  %mul.2.2 = mul nsw i32 %tmp21, %tmp20
+  %mul.3.2 = mul nsw i32 %tmp23, %tmp22
+  %mul.342 = mul nsw i32 %tmp25, %tmp24
+  %mul.1.3 = mul nsw i32 %tmp27, %tmp26
+  %mul.2.3 = mul nsw i32 %tmp29, %tmp28
+  %mul.3.3 = mul nsw i32 %tmp31, %tmp30
+  %add.1 = add nsw i32 %mul.1, %mul
+  %add.2 = add nsw i32 %mul.2, %add.1
+  %add.3 = add nsw i32 %mul.3, %add.2
+  %add.1.1 = add nsw i32 %mul.1.1, %mul.138
+  %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
+  %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
+  %add.1.2 = add nsw i32 %mul.1.2, %mul.240
+  %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
+  %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
+  %add.1.3 = add nsw i32 %mul.1.3, %mul.342
+  %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
+  %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
+  %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
+  store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
+  %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
+  store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
+  %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
+  store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
+  %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
+  store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                        ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
index cec04b534fba..89e45b7cfc21 100644
--- a/test/CodeGen/X86/misched-new.ll
+++ b/test/CodeGen/X86/misched-new.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
 ; RUN:          -misched=shuffle -misched-bottomup -verify-machineinstrs \
 ; RUN:     | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
+; RUN:          -misched=shuffle -misched-topdown -verify-machineinstrs \
+; RUN:     | FileCheck %s --check-prefix TOPDOWN
 ; REQUIRES: asserts
 ;
 ; Interesting MachineScheduler cases.
@@ -51,3 +54,56 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 declare void @bar(i32,i32)
+
+; Test that the DAG builder can handle an undef vreg on ExitSU.
+; CHECK: hasundef
+; CHECK: call
+
+%t0 = type { i32, i32, i8 }
+%t6 = type { i32 (...)**, %t7* }
+%t7 = type { i32 (...)** }
+
+define void @hasundef() unnamed_addr uwtable ssp align 2 {
+  %1 = alloca %t0, align 8
+  br i1 undef, label %3, label %2
+
+; <label>:2                                       ; preds = %0
+  unreachable
+
+; <label>:3                                       ; preds = %0
+  br i1 undef, label %4, label %5
+
+; <label>:4                                       ; preds = %3
+  call void undef(%t6* undef, %t0* %1)
+  unreachable
+
+; <label>:5                                       ; preds = %3
+  ret void
+}
+
+; Test top-down subregister liveness tracking. Self-verification
+; catches any pressure set underflow.
+; rdar://12797931.
+;
+; TOPDOWN: @testSubregTracking
+; TOPDOWN: divb
+; TOPDOWN: movzbl %al
+; TOPDOWN: ret
+define void @testSubregTracking() nounwind uwtable ssp align 2 {
+  %tmp = load i8* undef, align 1
+  %tmp6 = sub i8 0, %tmp
+  %tmp7 = load i8* undef, align 1
+  %tmp8 = udiv i8 %tmp6, %tmp7
+  %tmp9 = zext i8 %tmp8 to i64
+  %tmp10 = load i8* undef, align 1
+  %tmp11 = zext i8 %tmp10 to i64
+  %tmp12 = mul i64 %tmp11, %tmp9
+  %tmp13 = urem i8 %tmp6, %tmp7
+  %tmp14 = zext i8 %tmp13 to i32
+  %tmp15 = add nsw i32 %tmp14, 0
+  %tmp16 = add i32 %tmp15, 0
+  store i32 %tmp16, i32* undef, align 4
+  %tmp17 = add i64 0, %tmp12
+  store i64 %tmp17, i64* undef, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index 65ee7b1d8e00..bb42734833dd 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=sse41 | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
 entry:
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index 24d28adda894..5048a93ad302 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
 define i32 @t1() nounwind {
 entry:
   %0 = tail call i32 asm sideeffect inteldialect "mov eax, $1\0A\09mov $0, eax", "=r,r,~{eax},~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
   ret i32 %0
 ; CHECK: t1
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, ecx
@@ -18,6 +19,7 @@ entry:
   call void asm sideeffect inteldialect "mov eax, $$1", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
   ret void
 ; CHECK: t2
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, 1
@@ -32,6 +34,7 @@ entry:
   call void asm sideeffect inteldialect "mov eax, DWORD PTR [$0]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %V.addr) nounwind
   ret void
 ; CHECK: t3
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: mov eax, DWORD PTR {{[[esp]}}
@@ -53,6 +56,7 @@ entry:
   %0 = load i32* %b1, align 4
   ret i32 %0
 ; CHECK: t18
+; CHECK: movl %esp, %ebp
 ; CHECK: {{## InlineAsm Start|#APP}}
 ; CHECK: .intel_syntax
 ; CHECK: lea ebx, foo
@@ -61,3 +65,46 @@ entry:
 ; CHECK: .att_syntax
 ; CHECK: {{## InlineAsm End|#NO_APP}}
 }
+
+define void @t19_helper() nounwind {
+entry:
+  ret void
+}
+
+define void @t19() nounwind {
+entry:
+  call void asm sideeffect inteldialect "call $0", "r,~{dirflag},~{fpsr},~{flags}"(void ()* @t19_helper) nounwind
+  ret void
+; CHECK: t19:
+; CHECK: movl %esp, %ebp
+; CHECK: movl ${{_?}}t19_helper, %eax
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: call eax
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+}
+
+@results = global [2 x i32] [i32 3, i32 2], align 4
+
+define i32* @t30() nounwind ssp {
+entry:
+  %res = alloca i32*, align 4
+  call void asm sideeffect inteldialect "lea edi, dword ptr $0", "*m,~{edi},~{dirflag},~{fpsr},~{flags}"([2 x i32]* @results) nounwind
+  call void asm sideeffect inteldialect "mov dword ptr $0, edi", "=*m,~{dirflag},~{fpsr},~{flags}"(i32** %res) nounwind
+  %0 = load i32** %res, align 4
+  ret i32* %0
+; CHECK: t30:
+; CHECK: movl %esp, %ebp
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: lea edi, dword ptr [{{_?}}results]
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+; CHECK: {{## InlineAsm Start|#APP}}
+; CHECK: .intel_syntax
+; CHECK: mov dword ptr [esi], edi
+; CHECK: .att_syntax
+; CHECK: {{## InlineAsm End|#NO_APP}}
+; CHECK: movl (%esi), %eax
+}
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 9f7d036cf141..29b9f34464f0 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-block-placement -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7236213
 ;
 ; The scheduler's 2-address hack has been disabled, so there is
diff --git a/test/CodeGen/X86/no-cmov.ll b/test/CodeGen/X86/no-cmov.ll
new file mode 100644
index 000000000000..62d73b0732e7
--- /dev/null
+++ b/test/CodeGen/X86/no-cmov.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86 -mcpu=i486 < %s | FileCheck %s
+
+define i32 @test1(i32 %g, i32* %j) {
+  %tobool = icmp eq i32 %g, 0
+  %cmp = load i32* %j, align 4
+  %retval.0 = select i1 %tobool, i32 1, i32 %cmp
+  ret i32 %retval.0
+
+; CHECK: test1:
+; CHECK-NOT: cmov
+}
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 476bb1099831..6425ef0e8376 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
 ; PR1296
 
diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll
new file mode 100644
index 000000000000..d8c27f25043a
--- /dev/null
+++ b/test/CodeGen/X86/pmovsx-inreg.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
+
+; PR14887
+; These tests inject a store into the chain to test the inreg versions of pmovsx
+
+define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i8>* %in, align 1
+  %sext = sext <2 x i8> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+; SSE41: test1:
+; SSE41: pmovsxbq
+
+; AVX1: test1:
+; AVX1: vpmovsxbq
+
+; AVX2: test1:
+; AVX2: vpmovsxbq
+}
+
+define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i8>* %in, align 1
+  %sext = sext <4 x i8> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test2:
+; AVX2: vpmovsxbq
+}
+
+define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
+  %wide.load35 = load <4 x i8>* %in, align 1
+  %sext = sext <4 x i8> %wide.load35 to <4 x i32>
+  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
+  store <4 x i32> %sext, <4 x i32>* %out, align 8
+  ret void
+
+; SSE41: test3:
+; SSE41: pmovsxbd
+
+; AVX1: test3:
+; AVX1: vpmovsxbd
+
+; AVX2: test3:
+; AVX2: vpmovsxbd
+}
+
+define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
+  %wide.load35 = load <8 x i8>* %in, align 1
+  %sext = sext <8 x i8> %wide.load35 to <8 x i32>
+  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
+  store <8 x i32> %sext, <8 x i32>* %out, align 8
+  ret void
+
+; AVX2: test4:
+; AVX2: vpmovsxbd
+}
+
+define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
+  %wide.load35 = load <8 x i8>* %in, align 1
+  %sext = sext <8 x i8> %wide.load35 to <8 x i16>
+  store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
+  store <8 x i16> %sext, <8 x i16>* %out, align 8
+  ret void
+
+; SSE41: test5:
+; SSE41: pmovsxbw
+
+; AVX1: test5:
+; AVX1: vpmovsxbw
+
+; AVX2: test5:
+; AVX2: vpmovsxbw
+}
+
+define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
+  %wide.load35 = load <16 x i8>* %in, align 1
+  %sext = sext <16 x i8> %wide.load35 to <16 x i16>
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
+  store <16 x i16> %sext, <16 x i16>* %out, align 8
+  ret void
+
+; AVX2: test6:
+; FIXME: v16i8 -> v16i16 is scalarized.
+; AVX2-NOT: pmovsx
+}
+
+define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i16>* %in, align 1
+  %sext = sext <2 x i16> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+
+; SSE41: test7:
+; SSE41: pmovsxwq
+
+; AVX1: test7:
+; AVX1: vpmovsxwq
+
+; AVX2: test7:
+; AVX2: vpmovsxwq
+}
+
+define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i16>* %in, align 1
+  %sext = sext <4 x i16> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test8:
+; AVX2: vpmovsxwq
+}
+
+define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
+  %wide.load35 = load <4 x i16>* %in, align 1
+  %sext = sext <4 x i16> %wide.load35 to <4 x i32>
+  store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
+  store <4 x i32> %sext, <4 x i32>* %out, align 8
+  ret void
+
+; SSE41: test9:
+; SSE41: pmovsxwd
+
+; AVX1: test9:
+; AVX1: vpmovsxwd
+
+; AVX2: test9:
+; AVX2: vpmovsxwd
+}
+
+define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
+  %wide.load35 = load <8 x i16>* %in, align 1
+  %sext = sext <8 x i16> %wide.load35 to <8 x i32>
+  store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
+  store <8 x i32> %sext, <8 x i32>* %out, align 8
+  ret void
+
+; AVX2: test10:
+; AVX2: vpmovsxwd
+}
+
+define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
+  %wide.load35 = load <2 x i32>* %in, align 1
+  %sext = sext <2 x i32> %wide.load35 to <2 x i64>
+  store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
+  store <2 x i64> %sext, <2 x i64>* %out, align 8
+  ret void
+
+; SSE41: test11:
+; SSE41: pmovsxdq
+
+; AVX1: test11:
+; AVX1: vpmovsxdq
+
+; AVX2: test11:
+; AVX2: vpmovsxdq
+}
+
+define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
+  %wide.load35 = load <4 x i32>* %in, align 1
+  %sext = sext <4 x i32> %wide.load35 to <4 x i64>
+  store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
+  store <4 x i64> %sext, <4 x i64>* %out, align 8
+  ret void
+
+; AVX2: test12:
+; AVX2: vpmovsxdq
+}
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index 58423d195964..0ee99875264f 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 | FileCheck %s
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instsimplify -disable-output < %s
 
 ;CHECK: SHUFF0
 define <8 x i32*> @SHUFF0(<4 x i32*> %ptrv) nounwind {
diff --git a/test/CodeGen/X86/pr10475.ll b/test/CodeGen/X86/pr10475.ll
new file mode 100644
index 000000000000..3efc39ee9f1f
--- /dev/null
+++ b/test/CodeGen/X86/pr10475.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx
+
+; No check in a crash test
+
+define void @autogen_262380_1000() {
+BB:
+  br label %CF79
+
+CF79:                                             ; preds = %CF79, %BB
+  br i1 undef, label %CF79, label %CF84.critedge.critedge
+
+CF84.critedge.critedge:                           ; preds = %CF79
+  %L35 = load <8 x i32>* undef
+  br label %CF85
+
+CF85:                                             ; preds = %CF85, %CF84.critedge.critedge
+  br i1 undef, label %CF85, label %CF86
+
+CF86:                                             ; preds = %CF86, %CF85
+  %B61 = sub <8 x i32> %L35, zeroinitializer
+  %S64 = icmp ne <8 x i32> %B61, zeroinitializer
+  %E73 = extractelement <8 x i1> %S64, i32 6
+  br i1 %E73, label %CF86, label %CF87
+
+CF87:                                             ; preds = %CF87, %CF86
+  br i1 undef, label %CF87, label %CF88
+
+CF88:                                             ; preds = %CF87
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10499.ll b/test/CodeGen/X86/pr10499.ll
new file mode 100644
index 000000000000..f9cc747e49a8
--- /dev/null
+++ b/test/CodeGen/X86/pr10499.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx -mattr=-sse2
+
+; No check as PR10499 is a crashing bug.
+
+define void @autogen_24438_500() {
+BB:
+  %I = insertelement <8 x i32> undef, i32 -1, i32 4
+  %BC = bitcast <8 x i32> %I to <8 x float>
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %ZE = fpext <8 x float> %BC to <8 x double>
+  br label %CF
+}
diff --git a/test/CodeGen/X86/pr10523.ll b/test/CodeGen/X86/pr10523.ll
new file mode 100644
index 000000000000..7191d6949c18
--- /dev/null
+++ b/test/CodeGen/X86/pr10523.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_129334_5000() {
+BB:
+  %I74 = insertelement <32 x i32> undef, i32 undef, i32 15
+  %I105 = insertelement <32 x i32> undef, i32 undef, i32 14
+  %Shuff292 = shufflevector <32 x i32> %I74, <32 x i32> undef, <32 x i32> <i32 undef, i32 12, i32 14, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 undef, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 undef, i32 54, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 2, i32 4, i32 6, i32 8>
+  %Shuff302 = shufflevector <32 x i32> %Shuff292, <32 x i32> undef, <32 x i32> <i32 27, i32 29, i32 undef, i32 33, i32 undef, i32 37, i32 39, i32 undef, i32 undef, i32 undef, i32 47, i32 undef, i32 51, i32 53, i32 55, i32 57, i32 undef, i32 undef, i32 63, i32 1, i32 undef, i32 undef, i32 undef, i32 9, i32 11, i32 13, i32 undef, i32 17, i32 19, i32 21, i32 23, i32 undef>
+  %I326 = insertelement <32 x i32> undef, i32 undef, i32 15
+  %B338 = sub <32 x i32> zeroinitializer, %I105
+  %FC339 = sitofp <32 x i32> %I326 to <32 x double>
+  %S341 = icmp ne <32 x i32> %B338, undef
+  %E376 = extractelement <32 x i1> %S341, i32 0
+  %Shuff419 = shufflevector <32 x i32> undef, <32 x i32> %Shuff302, <32 x i32> <i32 undef, i32 44, i32 46, i32 48, i32 50, i32 52, i32 undef, i32 56, i32 58, i32 60, i32 62, i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 12, i32 14, i32 undef, i32 undef, i32 20, i32 22, i32 undef, i32 26, i32 28, i32 undef, i32 32, i32 34, i32 36, i32 38, i32 40>
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10524.ll b/test/CodeGen/X86/pr10524.ll
new file mode 100644
index 000000000000..ed3e7c528052
--- /dev/null
+++ b/test/CodeGen/X86/pr10524.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_178513_5000() {
+BB:
+  %Shuff22 = shufflevector <2 x i32> undef, <2 x i32> zeroinitializer, <2 x i32> <i32 3, i32 1>
+  %B26 = sub <2 x i32> %Shuff22, zeroinitializer
+  %S79 = icmp eq <2 x i32> %B26, zeroinitializer
+  %B269 = urem <2 x i1> zeroinitializer, %S79
+  %Se335 = sext <2 x i1> %B269 to <2 x i8>
+  store <2 x i8> %Se335, <2 x i8>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10525.ll b/test/CodeGen/X86/pr10525.ll
new file mode 100644
index 000000000000..342c1d63e192
--- /dev/null
+++ b/test/CodeGen/X86/pr10525.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_163411_5000() {
+BB:
+  %L = load <2 x i64>* undef
+  %Shuff11 = shufflevector <2 x i64> %L, <2 x i64> %L, <2 x i32> <i32 2, i32 0>
+  %I51 = insertelement <2 x i64> undef, i64 undef, i32 0
+  %Shuff152 = shufflevector <2 x i64> %I51, <2 x i64> %Shuff11, <2 x i32> <i32 1, i32 3>
+  store <2 x i64> %Shuff152, <2 x i64>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr10526.ll b/test/CodeGen/X86/pr10526.ll
new file mode 100644
index 000000000000..6963fe515898
--- /dev/null
+++ b/test/CodeGen/X86/pr10526.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
+
+; No check in a crash test
+
+define void @autogen_142660_5000() {
+BB:
+  %Shuff49 = shufflevector <8 x i32> zeroinitializer, <8 x i32> undef, <8 x i32> <i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 0>
+  %B85 = sub <8 x i32> %Shuff49, zeroinitializer
+  %S242 = icmp eq <8 x i32> zeroinitializer, %B85
+  %FC284 = uitofp <8 x i1> %S242 to <8 x float>
+  store <8 x float> %FC284, <8 x float>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr11998.ll b/test/CodeGen/X86/pr11998.ll
new file mode 100644
index 000000000000..1baf07924d39
--- /dev/null
+++ b/test/CodeGen/X86/pr11998.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -march=x86-64 -mattr=+avx
+
+define void @autogen_51367_5000(i8) {
+BB:
+  %B = srem i8 55, %0
+  %B9 = shl i8 %B, %B
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  br i1 undef, label %CF, label %CF403
+
+CF403:                                            ; preds = %CF403, %CF
+  %S44 = icmp eq i8 %B9, %0
+  br i1 %S44, label %CF403, label %CF405
+
+CF405:                                            ; preds = %CF405, %CF403
+  br label %CF405
+}
diff --git a/test/CodeGen/X86/pr14314.ll b/test/CodeGen/X86/pr14314.ll
index 5388a4b01b65..0832702244e5 100644
--- a/test/CodeGen/X86/pr14314.ll
+++ b/test/CodeGen/X86/pr14314.ll
@@ -5,9 +5,9 @@ entry:
   %0 = atomicrmw sub i64* %a, i64 %b seq_cst
   ret i64 %0
 ; CHECK: atomicSub
-; movl %eax, %ebx
-; subl {{%[a-z]+}}, %ebx
-; movl %edx, %ecx
-; sbbl {{%[a-z]+}}, %ecx
+; CHECK: movl %eax, %ebx
+; CHECK: subl {{%[a-z]+}}, %ebx
+; CHECK: movl %edx, %ecx
+; CHECK: sbbl {{%[a-z]+}}, %ecx
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/pr14562.ll b/test/CodeGen/X86/pr14562.ll
new file mode 100644
index 000000000000..e66f1752a30f
--- /dev/null
+++ b/test/CodeGen/X86/pr14562.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+@temp1 = global i64 -77129852189294865, align 8
+
+define void @foo() nounwind {
+  %x = load i64* @temp1, align 8
+  %s = shl i64 %x, 32
+  %t = trunc i64 %s to i32
+  %z = zext i32 %t to i64
+  store i64 %z, i64* @temp1, align 8
+; CHECK: movl $0, {{_?}}temp1+4                                                
+; CHECK: movl $0, {{_?}}temp1
+  ret void
+}
+
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
new file mode 100644
index 000000000000..c8aaf327a7dd
--- /dev/null
+++ b/test/CodeGen/X86/pr15267.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
+
+define <4 x i3> @test1(<4 x i3>* %in) nounwind {
+  %ret = load <4 x i3>* %in, align 1
+  ret <4 x i3> %ret
+}
+
+; CHECK: test1
+; CHECK: movzwl
+; CHECK: shrl $3
+; CHECK: andl $7
+; CHECK: andl $7
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $6
+; CHECK: andl $7
+; CHECK: pinsrd $2
+; CHECK: shrl $9
+; CHECK: andl $7
+; CHECK: pinsrd $3
+; CHECK: ret
+
+define <4 x i1> @test2(<4 x i1>* %in) nounwind {
+  %ret = load <4 x i1>* %in, align 1
+  ret <4 x i1> %ret
+}
+
+; CHECK: test2
+; CHECK: movzbl
+; CHECK: shrl
+; CHECK: andl $1
+; CHECK: andl $1
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $2
+; CHECK: andl $1
+; CHECK: pinsrd $2
+; CHECK: shrl $3
+; CHECK: andl $1
+; CHECK: pinsrd $3
+; CHECK: ret
+
+define <4 x i64> @test3(<4 x i1>* %in) nounwind {
+  %wide.load35 = load <4 x i1>* %in, align 1
+  %sext = sext <4 x i1> %wide.load35 to <4 x i64>
+  ret <4 x i64> %sext
+}
+
+; CHECK: test3
+; CHECK: movzbl
+; CHECK: shrl
+; CHECK: andl $1
+; CHECK: andl $1
+; CHECK: vmovd
+; CHECK: pinsrd $1
+; CHECK: shrl $2
+; CHECK: andl $1
+; CHECK: pinsrd $2
+; CHECK: shrl $3
+; CHECK: andl $1
+; CHECK: pinsrd $3
+; CHECK: pslld
+; CHECK: psrad
+; CHECK: pmovsxdq
+; CHECK: pmovsxdq
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr15296.ll b/test/CodeGen/X86/pr15296.ll
new file mode 100644
index 000000000000..1187d80cdf75
--- /dev/null
+++ b/test/CodeGen/X86/pr15296.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7-avx | FileCheck %s
+
+define <8 x i32> @shiftInput___vyuunu(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
+  %smear.1 = insertelement <8 x i32> %smear.0, i32 %shiftval, i32 1
+  %smear.2 = insertelement <8 x i32> %smear.1, i32 %shiftval, i32 2
+  %smear.3 = insertelement <8 x i32> %smear.2, i32 %shiftval, i32 3
+  %smear.4 = insertelement <8 x i32> %smear.3, i32 %shiftval, i32 4
+  %smear.5 = insertelement <8 x i32> %smear.4, i32 %shiftval, i32 5
+  %smear.6 = insertelement <8 x i32> %smear.5, i32 %shiftval, i32 6
+  %smear.7 = insertelement <8 x i32> %smear.6, i32 %shiftval, i32 7
+  %bitop = lshr <8 x i32> %input, %smear.7
+  ret <8 x i32> %bitop
+}
+
+; CHECK: shiftInput___vyuunu
+; CHECK: psrld
+; CHECK: psrld
+; CHECK: ret
+
+define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0
+  %smear.7 = shufflevector <8 x i32> %smear.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %bitop = lshr <8 x i32> %input, %smear.7
+  ret <8 x i32> %bitop
+}
+
+; CHECK: shiftInput___canonical
+; CHECK: psrld
+; CHECK: psrld
+; CHECK: ret
+
+define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval, <4 x i64> %__mask) nounwind {
+allocas:
+  %smear.0 = insertelement <4 x i64> undef, i64 %shiftval, i32 0
+  %smear.7 = shufflevector <4 x i64> %smear.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %bitop = lshr <4 x i64> %input, %smear.7
+  ret <4 x i64> %bitop
+}
+
+; CHECK: shiftInput___64in32bitmode
+; CHECK: psrlq
+; CHECK: psrlq
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr15309.ll b/test/CodeGen/X86/pr15309.ll
new file mode 100644
index 000000000000..6dbbc72a7b7a
--- /dev/null
+++ b/test/CodeGen/X86/pr15309.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s
+
+define void @test_convert_float2_ulong2(<2 x i64>* nocapture %src, <2 x float>* nocapture %dest) noinline {
+L.entry:
+  %0 = getelementptr <2 x i64>* %src, i32 10
+  %1 = load <2 x i64>* %0, align 16
+  %2 = uitofp <2 x i64> %1 to <2 x float>
+  %3 = getelementptr <2 x float>* %dest, i32 10
+  store <2 x float> %2, <2 x float>* %3, align 8
+  ret void
+}
+
+; CHECK: test_convert_float2_ulong2
+; CHECK-NOT: cvtpd2ps
+; CHECK: ret
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index d8f37781fc6e..9f8dc0370668 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk"
 ; PR3522
 
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
new file mode 100644
index 000000000000..b792ffa09fb9
--- /dev/null
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
+; RUN:     2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; rdar:13279013: pre-RA-sched should not check all interferences and
+; repush them on the ready queue after scheduling each instruction.
+;
+; CHECK: *** List Scheduling
+; CHECK: Interfering reg EFLAGS
+; CHECK: Repushing
+; CHECK: Repushing
+; CHECK: Repushing
+; CHECK-NOT: Repushing
+; CHECK: *** Final schedule
+define i32 @test(i8* %pin) #0 {
+  %g0 = getelementptr inbounds i8* %pin, i64 0
+  %l0 = load i8* %g0, align 1
+
+  %g1a = getelementptr inbounds i8* %pin, i64 1
+  %l1a = load i8* %g1a, align 1
+  %z1a = zext i8 %l1a to i32
+  %g1b = getelementptr inbounds i8* %pin, i64 2
+  %l1b = load i8* %g1b, align 1
+  %z1b = zext i8 %l1b to i32
+  %c1 = icmp ne i8 %l0, 0
+  %x1 = xor i32 %z1a, %z1b
+  %s1 = select i1 %c1, i32 %z1a, i32 %x1
+
+  %g2a = getelementptr inbounds i8* %pin, i64 3
+  %l2a = load i8* %g2a, align 1
+  %z2a = zext i8 %l2a to i32
+  %g2b = getelementptr inbounds i8* %pin, i64 4
+  %l2b = load i8* %g2b, align 1
+  %z2b = zext i8 %l2b to i32
+  %x2 = xor i32 %z2a, %z2b
+  %s2 = select i1 %c1, i32 %z2a, i32 %x2
+
+  %g3a = getelementptr inbounds i8* %pin, i64 5
+  %l3a = load i8* %g3a, align 1
+  %z3a = zext i8 %l3a to i32
+  %g3b = getelementptr inbounds i8* %pin, i64 6
+  %l3b = load i8* %g3b, align 1
+  %z3b = zext i8 %l3b to i32
+  %x3 = xor i32 %z3a, %z3b
+  %s3 = select i1 %c1, i32 %z3a, i32 %x3
+
+  %c3 = icmp ne i8 %l1a, 0
+  %c4 = icmp ne i8 %l2a, 0
+
+  %s4 = select i1 %c3, i32 %s1, i32 %s2
+  %s5 = select i1 %c4, i32 %s4, i32 %s3
+
+  ret i32 %s5
+}
+
+attributes #0 = { nounwind ssp uwtable }
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ec2f302b1499..efb51913c5c1 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
 
 ; rdar://10538297
 
@@ -9,10 +10,12 @@ entry:
 ; CHECK: prefetcht1
 ; CHECK: prefetcht0
 ; CHECK: prefetchnta
+; PRFCHW: prefetchw
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
 	ret void
 }
 
diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll
new file mode 100644
index 000000000000..aff4afbd2e35
--- /dev/null
+++ b/test/CodeGen/X86/psubus.ll
@@ -0,0 +1,340 @@
+; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @test1(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <8 x i16>*
+  %2 = load <8 x i16>* %1, align 2
+  %3 = icmp slt <8 x i16> %2, zeroinitializer
+  %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
+  store <8 x i16> %5, <8 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test1
+; SSE2: psubusw LCPI0_0(%rip), %xmm0
+
+; AVX1: @test1
+; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test1
+; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+}
+
+define void @test2(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <8 x i16>*
+  %2 = load <8 x i16>* %1, align 2
+  %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
+  %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
+  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
+  store <8 x i16> %5, <8 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test2
+; SSE2: psubusw LCPI1_0(%rip), %xmm0
+
+; AVX1: @test2
+; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test2
+; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+}
+
+define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <8 x i16> undef, i16 %w, i32 0
+  %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i16* %head, i64 %index
+  %2 = bitcast i16* %1 to <8 x i16>*
+  %3 = load <8 x i16>* %2, align 2
+  %4 = icmp ult <8 x i16> %3, %broadcast15
+  %5 = sub <8 x i16> %3, %broadcast15
+  %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
+  store <8 x i16> %6, <8 x i16>* %2, align 2
+  %index.next = add i64 %index, 8
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test3
+; SSE2: psubusw %xmm0, %xmm1
+
+; AVX1: @test3
+; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
+
+; AVX2: @test3
+; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
+}
+
+define void @test4(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <16 x i8>*
+  %2 = load <16 x i8>* %1, align 1
+  %3 = icmp slt <16 x i8> %2, zeroinitializer
+  %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
+  store <16 x i8> %5, <16 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test4
+; SSE2: psubusb LCPI3_0(%rip), %xmm0
+
+; AVX1: @test4
+; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test4
+; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+}
+
+define void @test5(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <16 x i8>*
+  %2 = load <16 x i8>* %1, align 1
+  %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
+  %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
+  store <16 x i8> %5, <16 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test5
+; SSE2: psubusb LCPI4_0(%rip), %xmm0
+
+; AVX1: @test5
+; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
+
+; AVX2: @test5
+; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
+}
+
+define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <16 x i8> undef, i8 %w, i32 0
+  %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i8* %head, i64 %index
+  %2 = bitcast i8* %1 to <16 x i8>*
+  %3 = load <16 x i8>* %2, align 1
+  %4 = icmp ult <16 x i8> %3, %broadcast15
+  %5 = sub <16 x i8> %3, %broadcast15
+  %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
+  store <16 x i8> %6, <16 x i8>* %2, align 1
+  %index.next = add i64 %index, 16
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: @test6
+; SSE2: psubusb %xmm0, %xmm1
+
+; AVX1: @test6
+; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
+
+; AVX2: @test6
+; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
+}
+
+define void @test7(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <16 x i16>*
+  %2 = load <16 x i16>* %1, align 2
+  %3 = icmp slt <16 x i16> %2, zeroinitializer
+  %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
+  store <16 x i16> %5, <16 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test7
+; AVX2: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
+}
+
+define void @test8(i16* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i16* %head, i64 %index
+  %1 = bitcast i16* %0 to <16 x i16>*
+  %2 = load <16 x i16>* %1, align 2
+  %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
+  %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
+  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
+  store <16 x i16> %5, <16 x i16>* %1, align 2
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test8
+; AVX2: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
+}
+
+define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <16 x i16> undef, i16 %w, i32 0
+  %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i16* %head, i64 %index
+  %2 = bitcast i16* %1 to <16 x i16>*
+  %3 = load <16 x i16>* %2, align 2
+  %4 = icmp ult <16 x i16> %3, %broadcast15
+  %5 = sub <16 x i16> %3, %broadcast15
+  %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
+  store <16 x i16> %6, <16 x i16>* %2, align 2
+  %index.next = add i64 %index, 8
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+
+; AVX2: @test9
+; AVX2: vpsubusw %ymm0, %ymm1, %ymm1
+}
+
+define void @test10(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <32 x i8>*
+  %2 = load <32 x i8>* %1, align 1
+  %3 = icmp slt <32 x i8> %2, zeroinitializer
+  %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
+  store <32 x i8> %5, <32 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+
+; AVX2: @test10
+; AVX2: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
+}
+
+define void @test11(i8* nocapture %head) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i8* %head, i64 %index
+  %1 = bitcast i8* %0 to <32 x i8>*
+  %2 = load <32 x i8>* %1, align 1
+  %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
+  %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
+  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
+  store <32 x i8> %5, <32 x i8>* %1, align 1
+  %index.next = add i64 %index, 16
+  %6 = icmp eq i64 %index.next, 16384
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test11
+; AVX2: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
+}
+
+define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
+vector.ph:
+  %0 = insertelement <32 x i8> undef, i8 %w, i32 0
+  %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %1 = getelementptr inbounds i8* %head, i64 %index
+  %2 = bitcast i8* %1 to <32 x i8>*
+  %3 = load <32 x i8>* %2, align 1
+  %4 = icmp ult <32 x i8> %3, %broadcast15
+  %5 = sub <32 x i8> %3, %broadcast15
+  %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
+  store <32 x i8> %6, <32 x i8>* %2, align 1
+  %index.next = add i64 %index, 16
+  %7 = icmp eq i64 %index.next, 16384
+  br i1 %7, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: @test12
+; AVX2: vpsubusb %ymm0, %ymm1, %ymm1
+}
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index e2224a619676..98f407776381 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -39,7 +39,7 @@ define i32 @_rdrand64_step(i64* %random_val) {
   %isvalid = extractvalue {i64, i32} %call, 1
   ret i32 %isvalid
 ; CHECK: _rdrand64_step:
-; CHECK: rdrandq	%r[[T1:[[a-z]+]]
+; CHECK: rdrandq	%r[[T1:[a-z]+]]
 ; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll
new file mode 100644
index 000000000000..35de7ebf7430
--- /dev/null
+++ b/test/CodeGen/X86/rdseed.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdseed | FileCheck %s
+
+declare {i16, i32} @llvm.x86.rdseed.16()
+declare {i32, i32} @llvm.x86.rdseed.32()
+declare {i64, i32} @llvm.x86.rdseed.64()
+
+define i32 @_rdseed16_step(i16* %random_val) {
+  %call = call {i16, i32} @llvm.x86.rdseed.16()
+  %randval = extractvalue {i16, i32} %call, 0
+  store i16 %randval, i16* %random_val
+  %isvalid = extractvalue {i16, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed16_step:
+; CHECK: rdseedw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
+; CHECK: movzwl	%ax, %ecx
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%ecx, %eax
+; CHECK: ret
+}
+
+define i32 @_rdseed32_step(i32* %random_val) {
+  %call = call {i32, i32} @llvm.x86.rdseed.32()
+  %randval = extractvalue {i32, i32} %call, 0
+  store i32 %randval, i32* %random_val
+  %isvalid = extractvalue {i32, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed32_step:
+; CHECK: rdseedl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: ret
+}
+
+define i32 @_rdseed64_step(i64* %random_val) {
+  %call = call {i64, i32} @llvm.x86.rdseed.64()
+  %randval = extractvalue {i64, i32} %call, 0
+  store i64 %randval, i64* %random_val
+  %isvalid = extractvalue {i64, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdseed64_step:
+; CHECK: rdseedq	%r[[T1:[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index 52d7b56f182e..1f756bee8a9d 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ;; Both functions in this testcase should codegen to the same function, and
 ;; neither of them should require spilling anything to the stack.
 
diff --git a/test/CodeGen/X86/ret-mmx.ll b/test/CodeGen/X86/ret-mmx.ll
index 865e147a4a24..778e4722cd95 100644
--- a/test/CodeGen/X86/ret-mmx.ll
+++ b/test/CodeGen/X86/ret-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -mcpu=core2 -mattr=+mmx,+sse2 | FileCheck %s
 ; rdar://6602459
 
 @g_v1di = external global <1 x i64>
diff --git a/test/CodeGen/X86/rip-rel-lea.ll b/test/CodeGen/X86/rip-rel-lea.ll
new file mode 100644
index 000000000000..71dacf60caa1
--- /dev/null
+++ b/test/CodeGen/X86/rip-rel-lea.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -relocation-model=pic | FileCheck %s -check-prefix=PICX32
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC32
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
+
+@a = internal global double 3.4
+define double* @foo() nounwind {
+  %a = getelementptr double* @a, i64 0
+  ret double* %a
+  
+; PIC64:    leaq	a(%rip)
+; PICX32:   leal	a(%rip)
+; PIC32:    leal	a@GOTOFF(%eax)
+}
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll
new file mode 100644
index 000000000000..5a23cf136d85
--- /dev/null
+++ b/test/CodeGen/X86/sandybridge-loads.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: wideloads
+;CHECK: vmovaps
+;CHECK: vinsertf128
+;CHECK: vmovaps
+;CHECK-NOT: vinsertf128
+;CHECK: ret
+
+define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16  ; <---- unaligned!
+  %v1 = load <8 x float>* %b, align 32  ; <---- aligned!
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 32  ; <---- aligned!
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
+; CHECK: widestores
+; loads:
+; CHECK: vmovaps
+; CHECK: vmovaps
+; stores:
+; CHECK: vmovaps
+; CHECK: vextractf128
+; CHECK: vmovaps
+;CHECK: ret
+
+define void @widestores(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 32
+  %v1 = load <8 x float>* %b, align 32
+  store <8 x float> %v0, <8 x float>* %b, align 32 ; <--- aligned
+  store <8 x float> %v1, <8 x float>* %a, align 16 ; <--- unaligned
+  ret void
+}
+
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 3bec3acdbf76..09ca07b31a10 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -282,7 +282,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
 ; ATOM: test13:
 ; ATOM: cmpl
 ; ATOM-NEXT: sbbl
-; ATOM-NEXT: ret
+; ATOM: ret
 }
 
 define i32 @test14(i32 %a, i32 %b) nounwind {
@@ -299,7 +299,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
 ; ATOM: cmpl
 ; ATOM-NEXT: sbbl
 ; ATOM-NEXT: notl
-; ATOM-NEXT: ret
+; ATOM: ret
 }
 
 ; rdar://10961709
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index c9b39d3a489e..58c93229a2c0 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -1,9 +1,30 @@
-; RUN: llc < %s -march=x86 | grep movsbl
+; RUN: llc < %s -march=x86 | FileCheck %s
 
-define i32 @foo(i32 %X) nounwind  {
+; When doing sign extension, use the sext-load lowering to take advantage of
+; x86's sign extension during loads.
+;
+; CHECK: test1:
+; CHECK:      movsbl {{.*}}, %eax
+; CHECK-NEXT: ret
+define i32 @test1(i32 %X) nounwind  {
 entry:
 	%tmp12 = trunc i32 %X to i8		; <i8> [#uses=1]
 	%tmp123 = sext i8 %tmp12 to i32		; <i32> [#uses=1]
 	ret i32 %tmp123
 }
 
+; When using a sextload representation, ensure that the sign extension is
+; preserved even when removing shifted-out low bits.
+;
+; CHECK: test2:
+; CHECK:      movswl {{.*}}, %eax
+; CHECK-NEXT: ret
+define i32 @test2({i16, [6 x i8]}* %this) {
+entry:
+  %b48 = getelementptr inbounds { i16, [6 x i8] }* %this, i32 0, i32 1
+  %cast = bitcast [6 x i8]* %b48 to i48*
+  %bf.load = load i48* %cast, align 2
+  %bf.ashr = ashr i48 %bf.load, 32
+  %bf.cast = trunc i48 %bf.ashr to i32
+  ret i32 %bf.cast
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 2af355905dc3..ceb79ea927a1 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+; RUN: llc < %s -mtriple=i686-linux   -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
 
 define void @t1(i32 %x) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
new file mode 100644
index 000000000000..f364d1fc2dc8
--- /dev/null
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_SINCOS
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_NOOPT
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS
+
+; Combine sin / cos into a single call.
+; rdar://13087969
+
+define float @test1(float %x) nounwind {
+entry:
+; GNU_SINCOS: test1:
+; GNU_SINCOS: callq sincosf
+; GNU_SINCOS: movss 4(%rsp), %xmm0
+; GNU_SINCOS: addss (%rsp), %xmm0
+
+; OSX_SINCOS: test1:
+; OSX_SINCOS: callq ___sincosf_stret
+; OSX_SINCOS: addss %xmm1, %xmm0
+
+; OSX_NOOPT: test1
+; OSX_NOOPT: callq _cosf
+; OSX_NOOPT: callq _sinf
+  %call = tail call float @sinf(float %x) nounwind readnone
+  %call1 = tail call float @cosf(float %x) nounwind readnone
+  %add = fadd float %call, %call1
+  ret float %add
+}
+
+define double @test2(double %x) nounwind {
+entry:
+; GNU_SINCOS: test2:
+; GNU_SINCOS: callq sincos
+; GNU_SINCOS: movsd 16(%rsp), %xmm0
+; GNU_SINCOS: addsd 8(%rsp), %xmm0
+
+; OSX_SINCOS: test2:
+; OSX_SINCOS: callq ___sincos_stret
+; OSX_SINCOS: addsd %xmm1, %xmm0
+
+; OSX_NOOPT: test2
+; OSX_NOOPT: callq _cos
+; OSX_NOOPT: callq _sin
+  %call = tail call double @sin(double %x) nounwind readnone
+  %call1 = tail call double @cos(double %x) nounwind readnone
+  %add = fadd double %call, %call1
+  ret double %add
+}
+
+define x86_fp80 @test3(x86_fp80 %x) nounwind {
+entry:
+; GNU_SINCOS: test3:
+; GNU_SINCOS: callq sinl
+; GNU_SINCOS: callq cosl
+; GNU_SINCOS: ret
+  %call = tail call x86_fp80 @sinl(x86_fp80 %x) nounwind
+  %call1 = tail call x86_fp80 @cosl(x86_fp80 %x) nounwind
+  %add = fadd x86_fp80 %call, %call1
+  ret x86_fp80 %add
+}
+
+declare float  @sinf(float) readonly
+declare double @sin(double) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+
+declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80)
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 649cd61ab78c..2aca5b897d35 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index 102c3fb06cd7..22cd7723068c 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -1,12 +1,21 @@
-; RUN: llc < %s -march=x86-64 | grep movup | count 2
+; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck %s
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
   %t = load <4 x float>* %p, align 4
   %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
+
+; CHECK: foo:
+; CHECK: movups
+; CHECK: ret
+
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
   %t = load <2 x double>* %p, align 8
   %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
+
+; CHECK: bar:
+; CHECK: movupd
+; CHECK: ret
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index c99287bdfb9f..168959a5d653 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -55,10 +55,10 @@ while.end:
 ; instructions, they are still dependent on themselves.
 ; CHECK: xorps [[XMM1:%xmm[0-9]+]]
 ; CHECK: , [[XMM1]]
-; CHECK: cvtsi2ss %{{.*}}, [[XMM1]]
+; CHECK: cvtsi2ssl %{{.*}}, [[XMM1]]
 ; CHECK: xorps [[XMM2:%xmm[0-9]+]]
 ; CHECK: , [[XMM2]]
-; CHECK: cvtsi2ss %{{.*}}, [[XMM2]]
+; CHECK: cvtsi2ssl %{{.*}}, [[XMM2]]
 ;
 define float @f2(i32 %m) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 2f4317bf294c..30a0fbe7d6de 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -28,33 +28,29 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
-  %A = load <4 x i64>* %v1
-  %B = load <4 x i64>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
-  store <4 x i64 > %vsel, <4 x i64>* %v1
+define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
+  %A = load <2 x i64>* %v1
+  %B = load <2 x i64>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
+  store <2 x i64 > %vsel, <2 x i64>* %v1
   ret void
 }
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
-  %A = load <4 x double>* %v1
-  %B = load <4 x double>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
-  store <4 x double > %vsel, <4 x double>* %v1
+define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
+  %A = load <2 x double>* %v1
+  %B = load <2 x double>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
+  store <2 x double > %vsel, <2 x double>* %v1
   ret void
 }
 
diff --git a/test/CodeGen/X86/sse2-mul.ll b/test/CodeGen/X86/sse2-mul.ll
new file mode 100644
index 000000000000..0466d60ec301
--- /dev/null
+++ b/test/CodeGen/X86/sse2-mul.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %x, <4 x i32> %y) {
+  %m = mul <4 x i32> %x, %y
+  ret <4 x i32> %m
+; CHECK: test1:
+; CHECK: pshufd $49
+; CHECK: pmuludq
+; CHECK: pshufd $49
+; CHECK: pmuludq
+; CHECK: shufps $-120
+; CHECK: pshufd $-40
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll
new file mode 100644
index 000000000000..74945e5bb1bd
--- /dev/null
+++ b/test/CodeGen/X86/stack-align-memcpy.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+
+%struct.foo = type { [88 x i8] }
+
+; PR15249
+; We can't use rep;movsl here because it clobbers the base pointer in %esi.
+define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind {
+  %dynalloc = alloca i8, i32 %y, align 1
+  call void @bar(i8* %dynalloc, %struct.foo* align 4 byval %x)
+  ret void
+
+; CHECK: test1:
+; CHECK: andl $-16, %esp
+; CHECK: movl %esp, %esi
+; CHECK-NOT: rep;movsl
+}
+
+declare void @bar(i8* nocapture, %struct.foo* align 4 byval) nounwind
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index c07511443bce..1e9ca1d2c24d 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -1,28 +1,3141 @@
-; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
-; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_guard"
-; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_fail"
+; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-I386 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
+; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=LINUX-KERNEL-X64 %s
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | FileCheck --check-prefix=DARWIN-X64 %s
 
-@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+%struct.foo = type { [16 x i8] }
+%struct.foo.0 = type { [4 x i8] }
+%struct.pair = type { i32, i32 }
+%struct.nest = type { %struct.pair, %struct.pair }
+%struct.vec = type { <4 x i32> }
+%class.A = type { [2 x i8] }
+%struct.deep = type { %union.anon }
+%union.anon = type { %struct.anon }
+%struct.anon = type { %struct.anon.0 }
+%struct.anon.0 = type { %union.anon.1 }
+%union.anon.1 = type { [2 x i8] }
+%struct.small = type { i8 }
 
-define void @test(i8* %a) nounwind ssp {
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; test1a: array of [16 x i8] 
+;         no ssp attribute
+; Requires no protector.
+define void @test1a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test1a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test1a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test1a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test1a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1b: array of [16 x i8] 
+;         ssp attribute
+; Requires protector.
+define void @test1b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test1b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1c: array of [16 x i8] 
+;         sspstrong attribute
+; Requires protector.
+define void @test1c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test1c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test1d: array of [16 x i8] 
+;         sspreq attribute
+; Requires protector.
+define void @test1d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test1d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test1d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test1d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test1d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test2a: struct { [16 x i8] }
+;         no ssp attribute
+; Requires no protector.
+define void @test2a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test2a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test2a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test2a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test2a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2b: struct { [16 x i8] }
+;          ssp attribute
+; Requires protector.
+define void @test2b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test2b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2c: struct { [16 x i8] }
+;          sspstrong attribute
+; Requires protector.
+define void @test2c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test2c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test2d: struct { [16 x i8] }
+;          sspreq attribute
+; Requires protector.
+define void @test2d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test2d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test2d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test2d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test2d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test3a:  array of [4 x i8]
+;          no ssp attribute
+; Requires no protector.
+define void @test3a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test3a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test3a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test3a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test3a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3b:  array [4 x i8]
+;          ssp attribute
+; Requires no protector.
+define void @test3b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test3b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test3b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test3b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test3b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3c:  array of [4 x i8]
+;          sspstrong attribute
+; Requires protector.
+define void @test3c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test3c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test3c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test3c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test3c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test3d:  array of [4 x i8]
+;          sspreq attribute
+; Requires protector.
+define void @test3d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test3d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test3d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test3d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test3d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+; test4a:  struct { [4 x i8] }
+;          no ssp attribute
+; Requires no protector.
+define void @test4a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test4a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test4a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test4a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test4a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4b:  struct { [4 x i8] }
+;          ssp attribute
+; Requires no protector.
+define void @test4b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test4b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test4b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test4b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test4b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4c:  struct { [4 x i8] }
+;          sspstrong attribute
+; Requires protector.
+define void @test4c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test4c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test4c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test4c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test4c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test4d:  struct { [4 x i8] }
+;          sspreq attribute
+; Requires protector.
+define void @test4d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test4d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test4d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test4d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test4d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  %b = alloca %struct.foo.0, align 1
+  store i8* %a, i8** %a.addr, align 8
+  %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
+  %0 = load i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+  ret void
+}
+
+; test5a:  no arrays / no nested arrays
+;          no ssp attribute
+; Requires no protector.
+define void @test5a(i8* %a) nounwind uwtable {
+entry:
+; LINUX-I386: test5a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5b:  no arrays / no nested arrays
+;          ssp attribute
+; Requires no protector.
+define void @test5b(i8* %a) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test5b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5c:  no arrays / no nested arrays
+;          sspstrong attribute
+; Requires no protector.
+define void @test5c(i8* %a) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test5c:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test5c:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test5c:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test5c:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test5d:  no arrays / no nested arrays
+;          sspreq attribute
+; Requires protector.
+define void @test5d(i8* %a) nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test5d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test5d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test5d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test5d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8** %a.addr, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+; test6a:  Address-of local taken (j = &a)
+;          no ssp attribute
+; Requires no protector.
+define void @test6a() nounwind uwtable {
+entry:
+; LINUX-I386: test6a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test6a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test6a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test6a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6b:  Address-of local taken (j = &a)
+;          ssp attribute
+; Requires no protector.
+define void @test6b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test6b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test6b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test6b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test6b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6c:  Address-of local taken (j = &a)
+;          sspstrong attribute
+; Requires protector.
+define void @test6c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test6c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test6c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test6c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test6c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test6d:  Address-of local taken (j = &a)
+;          sspreq attribute
+; Requires protector.
+define void @test6d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test6d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test6d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test6d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test6d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+
+; test7a:  PtrToInt Cast
+;          no ssp attribute
+; Requires no protector.
+define void @test7a() nounwind uwtable readnone {
+entry:
+; LINUX-I386: test7a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test7a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test7a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test7a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7b:  PtrToInt Cast
+;          ssp attribute
+; Requires no protector.
+define void @test7b() nounwind uwtable readnone ssp {
+entry:
+; LINUX-I386: test7b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test7b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test7b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test7b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7c:  PtrToInt Cast
+;          sspstrong attribute
+; Requires protector.
+define void @test7c() nounwind uwtable readnone sspstrong {
+entry:
+; LINUX-I386: test7c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test7c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test7c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test7c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test7d:  PtrToInt Cast
+;          sspreq attribute
+; Requires protector.
+define void @test7d() nounwind uwtable readnone sspreq {
+entry:
+; LINUX-I386: test7d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test7d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test7d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test7d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test8a:  Passing addr-of to function call
+;          no ssp attribute
+; Requires no protector.
+define void @test8a() nounwind uwtable {
+entry:
+; LINUX-I386: test8a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test8a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test8a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test8a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8b:  Passing addr-of to function call
+;          ssp attribute
+; Requires no protector.
+define void @test8b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test8b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test8b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test8b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test8b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8c:  Passing addr-of to function call
+;          sspstrong attribute
+; Requires protector.
+define void @test8c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test8c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test8c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test8c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test8c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test8d:  Passing addr-of to function call
+;          sspreq attribute
+; Requires protector.
+define void @test8d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test8d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test8d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test8d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test8d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+; test9a:  Addr-of in select instruction
+;          no ssp attribute
+; Requires no protector.
+define void @test9a() nounwind uwtable {
+entry:
+; LINUX-I386: test9a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test9a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test9a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test9a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9b:  Addr-of in select instruction
+;          ssp attribute
+; Requires no protector.
+define void @test9b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test9b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test9b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test9b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test9b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9c:  Addr-of in select instruction
+;          sspstrong attribute
+; Requires protector.
+define void @test9c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test9c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test9c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test9c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test9c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test9d:  Addr-of in select instruction
+;          sspreq attribute
+; Requires protector.
+define void @test9d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test9d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test9d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test9d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test9d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+; test10a: Addr-of in phi instruction
+;          no ssp attribute
+; Requires no protector.
+define void @test10a() nounwind uwtable {
+entry:
+; LINUX-I386: test10a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test10a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test10a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test10a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10b: Addr-of in phi instruction
+;          ssp attribute
+; Requires no protector.
+define void @test10b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test10b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test10b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test10b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test10b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10c: Addr-of in phi instruction
+;          sspstrong attribute
+; Requires protector.
+define void @test10c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test10c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test10c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test10c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test10c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test10d: Addr-of in phi instruction
+;          sspreq attribute
+; Requires protector.
+define void @test10d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test10d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test10d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test10d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test10d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+; test11a: Addr-of struct element. (GEP followed by store).
+;          no ssp attribute
+; Requires no protector.
+define void @test11a() nounwind uwtable {
+entry:
+; LINUX-I386: test11a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test11a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test11a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test11a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11b: Addr-of struct element. (GEP followed by store).
+;          ssp attribute
+; Requires no protector.
+define void @test11b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test11b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test11b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test11b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test11b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11c: Addr-of struct element. (GEP followed by store).
+;          sspstrong attribute
+; Requires protector.
+define void @test11c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test11c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test11c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test11c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test11c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test11d: Addr-of struct element. (GEP followed by store).
+;          sspreq attribute
+; Requires protector.
+define void @test11d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test11d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test11d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test11d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test11d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+; test12a: Addr-of struct element, GEP followed by ptrtoint.
+;          no ssp attribute
+; Requires no protector.
+define void @test12a() nounwind uwtable {
+entry:
+; LINUX-I386: test12a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test12a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test12a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test12a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12b: Addr-of struct element, GEP followed by ptrtoint.
+;          ssp attribute
+; Requires no protector.
+define void @test12b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test12b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test12b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test12b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test12b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12c: Addr-of struct element, GEP followed by ptrtoint.
+;          sspstrong attribute
+; Requires protector.
+define void @test12c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test12c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test12c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test12c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test12c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test12d: Addr-of struct element, GEP followed by ptrtoint.
+;          sspreq attribute
+; Requires protector.
+define void @test12d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test12d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test12d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test12d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test12d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+; test13a: Addr-of struct element, GEP followed by callinst.
+;          no ssp attribute
+; Requires no protector.
+define void @test13a() nounwind uwtable {
+entry:
+; LINUX-I386: test13a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test13a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test13a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test13a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13b: Addr-of struct element, GEP followed by callinst.
+;          ssp attribute
+; Requires no protector.
+define void @test13b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test13b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test13b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test13b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test13b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13c: Addr-of struct element, GEP followed by callinst.
+;          sspstrong attribute
+; Requires protector.
+define void @test13c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test13c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test13c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test13c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test13c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+; test13d: Addr-of struct element, GEP followed by callinst.
+;          sspreq attribute
+; Requires protector.
+define void @test13d() nounwind uwtable sspreq {
 entry:
-	%a_addr = alloca i8*		; <i8**> [#uses=2]
-	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i8* %a, i8** %a_addr
-	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
-	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
-	%buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
-	br label %return
+; LINUX-I386: test13d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test13d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
 
-return:		; preds = %entry
-	ret void
+; LINUX-KERNEL-X64: test13d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test13d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
 }
 
-declare i8* @strcpy(i8*, i8*) nounwind
+; test14a: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          no ssp attribute
+; Requires no protector.
+define void @test14a() nounwind uwtable {
+entry:
+; LINUX-I386: test14a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test14a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test14a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test14a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14b: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          ssp attribute
+; Requires no protector.
+define void @test14b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test14b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test14b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test14b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test14b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14c: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          sspstrong attribute
+; Requires protector.
+define void @test14c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test14c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test14c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test14c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test14c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test14d: Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;          sspreq  attribute
+; Requires protector.
+define void @test14d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test14d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test14d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test14d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test14d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+; test15a: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          no ssp attribute
+; Requires no protector.
+define void @test15a() nounwind uwtable {
+entry:
+; LINUX-I386: test15a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test15a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test15a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test15a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15b: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          ssp attribute
+; Requires no protector.
+define void @test15b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test15b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test15b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test15b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test15b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15c: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspstrong attribute
+; Requires protector.
+define void @test15c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test15c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test15c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test15c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test15c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test15d: Addr-of a local cast to a ptr of a different type
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspreq attribute
+; Requires protector.
+define void @test15d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test15d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test15d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test15d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test15d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float** %b, align 8
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+; test16a: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          no ssp attribute
+; Requires no protector.
+define void @test16a() nounwind uwtable {
+entry:
+; LINUX-I386: test16a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test16a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test16a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test16a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16b: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          ssp attribute
+; Requires no protector.
+define void @test16b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test16b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test16b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test16b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test16b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16c: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspstrong attribute
+; Requires protector.
+define void @test16c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test16c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test16c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test16c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test16c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test16d: Addr-of a local cast to a ptr of a different type (optimized)
+;           (e.g., int a; ... ; float *b = &a;)
+;          sspreq attribute
+; Requires protector.
+define void @test16d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test16d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test16d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test16d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test16d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+; test17a: Addr-of a vector nested in a struct
+;          no ssp attribute
+; Requires no protector.
+define void @test17a() nounwind uwtable {
+entry:
+; LINUX-I386: test17a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test17a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test17a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test17a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17b: Addr-of a vector nested in a struct
+;          ssp attribute
+; Requires no protector.
+define void @test17b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test17b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test17b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test17b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test17b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17c: Addr-of a vector nested in a struct
+;          sspstrong attribute
+; Requires protector.
+define void @test17c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test17c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test17c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test17c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test17c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test17d: Addr-of a vector nested in a struct
+;          sspreq attribute
+; Requires protector.
+define void @test17d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test17d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test17d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test17d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test17d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+; test18a: Addr-of a variable passed into an invoke instruction.
+;          no ssp attribute
+; Requires no protector.
+define i32 @test18a() uwtable {
+entry:
+; LINUX-I386: test18a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test18a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test18a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test18a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18b: Addr-of a variable passed into an invoke instruction.
+;          ssp attribute
+; Requires no protector.
+define i32 @test18b() uwtable ssp {
+entry:
+; LINUX-I386: test18b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test18b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test18b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test18b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18c: Addr-of a variable passed into an invoke instruction.
+;          sspstrong attribute
+; Requires protector.
+define i32 @test18c() uwtable sspstrong {
+entry:
+; LINUX-I386: test18c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test18c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test18c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test18c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test18d: Addr-of a variable passed into an invoke instruction.
+;          sspreq attribute
+; Requires protector.
+define i32 @test18d() uwtable sspreq {
+entry:
+; LINUX-I386: test18d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test18d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test18d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test18d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19a: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          no ssp attribute
+; Requires no protector.
+define i32 @test19a() uwtable {
+entry:
+; LINUX-I386: test19a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test19a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test19a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test19a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19b: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          ssp attribute
+; Requires no protector.
+define i32 @test19b() uwtable ssp {
+entry:
+; LINUX-I386: test19b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test19b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test19b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test19b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19c: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          sspstrong attribute
+; Requires protector.
+define i32 @test19c() uwtable sspstrong {
+entry:
+; LINUX-I386: test19c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test19c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test19c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test19c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test19d: Addr-of a struct element passed into an invoke instruction.
+;           (GEP followed by an invoke)
+;          sspreq attribute
+; Requires protector.
+define i32 @test19d() uwtable sspreq {
+entry:
+; LINUX-I386: test19d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test19d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test19d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test19d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i32 0
+}
+
+; test20a: Addr-of a pointer
+;          no ssp attribute
+; Requires no protector.
+define void @test20a() nounwind uwtable {
+entry:
+; LINUX-I386: test20a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test20a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test20a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test20a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20b: Addr-of a pointer
+;          ssp attribute
+; Requires no protector.
+define void @test20b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test20b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test20b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test20b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test20b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20c: Addr-of a pointer
+;          sspstrong attribute
+; Requires protector.
+define void @test20c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test20c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test20c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test20c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test20c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test20d: Addr-of a pointer
+;          sspreq attribute
+; Requires protector.
+define void @test20d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test20d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test20d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test20d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test20d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+; test21a: Addr-of a casted pointer
+;          no ssp attribute
+; Requires no protector.
+define void @test21a() nounwind uwtable {
+entry:
+; LINUX-I386: test21a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test21a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test21a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test21a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21b: Addr-of a casted pointer
+;          ssp attribute
+; Requires no protector.
+define void @test21b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test21b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test21b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test21b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test21b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21c: Addr-of a casted pointer
+;          sspstrong attribute
+; Requires protector.
+define void @test21c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test21c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test21c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test21c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test21c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test21d: Addr-of a casted pointer
+;          sspreq attribute
+; Requires protector.
+define void @test21d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test21d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test21d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test21d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test21d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+; test22a: [2 x i8] in a class
+;          no ssp attribute
+; Requires no protector.
+define signext i8 @test22a() nounwind uwtable {
+entry:
+; LINUX-I386: test22a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test22a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test22a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test22a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22b: [2 x i8] in a class
+;          ssp attribute
+; Requires no protector.
+define signext i8 @test22b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test22b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test22b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test22b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test22b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22c: [2 x i8] in a class
+;          sspstrong attribute
+; Requires protector.
+define signext i8 @test22c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test22c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test22c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test22c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test22c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test22d: [2 x i8] in a class
+;          sspreq attribute
+; Requires protector.
+define signext i8 @test22d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test22d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test22d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test22d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test22d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23a: [2 x i8] nested in several layers of structs and unions
+;          no ssp attribute
+; Requires no protector.
+define signext i8 @test23a() nounwind uwtable {
+entry:
+; LINUX-I386: test23a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test23a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test23a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test23a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23b: [2 x i8] nested in several layers of structs and unions
+;          ssp attribute
+; Requires no protector.
+define signext i8 @test23b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test23b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test23b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test23b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test23b:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23c: [2 x i8] nested in several layers of structs and unions
+;          sspstrong attribute
+; Requires protector.
+define signext i8 @test23c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test23c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test23c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test23c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test23c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test23d: [2 x i8] nested in several layers of structs and unions
+;          sspreq attribute
+; Requires protector.
+define signext i8 @test23d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test23d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test23d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test23d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test23d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8* %arrayidx, align 1
+  ret i8 %0
+}
+
+; test24a: Variable sized alloca
+;          no ssp attribute
+; Requires no protector.
+define void @test24a(i32 %n) nounwind uwtable {
+entry:
+; LINUX-I386: test24a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test24a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test24a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test24a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24b: Variable sized alloca
+;          ssp attribute
+; Requires protector.
+define void @test24b(i32 %n) nounwind uwtable ssp {
+entry:
+; LINUX-I386: test24b:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24b:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24b:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24c: Variable sized alloca
+;          sspstrong attribute
+; Requires protector.
+define void @test24c(i32 %n) nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test24c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test24d: Variable sized alloca
+;          sspreq attribute
+; Requires protector.
+define void @test24d(i32 %n) nounwind uwtable sspreq  {
+entry:
+; LINUX-I386: test24d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test24d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test24d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test24d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ret void
+}
+
+; test25a: array of [4 x i32]
+;          no ssp attribute
+; Requires no protector.
+define i32 @test25a() nounwind uwtable {
+entry:
+; LINUX-I386: test25a:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test25a:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test25a:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test25a:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25b: array of [4 x i32]
+;          ssp attribute
+; Requires no protector, except for Darwin which _does_ require a protector.
+define i32 @test25b() nounwind uwtable ssp {
+entry:
+; LINUX-I386: test25b:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test25b:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test25b:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test25b:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25c: array of [4 x i32]
+;          sspstrong attribute
+; Requires protector.
+define i32 @test25c() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test25c:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test25c:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test25c:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test25c:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test25d: array of [4 x i32]
+;          sspreq attribute
+; Requires protector.
+define i32 @test25d() nounwind uwtable sspreq {
+entry:
+; LINUX-I386: test25d:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test25d:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test25d:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test25d:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %a = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; test26: Nested structure, no arrays, no address-of expressions.
+;         Verify that the resulting gep-of-gep does not incorrectly trigger
+;         a stack protector.
+;         ssptrong attribute
+; Requires no protector.
+define void @test26() nounwind uwtable sspstrong {
+entry:
+; LINUX-I386: test26:
+; LINUX-I386-NOT: calll __stack_chk_fail
+; LINUX-I386: .cfi_endproc
+
+; LINUX-X64: test26:
+; LINUX-X64-NOT: callq __stack_chk_fail
+; LINUX-X64: .cfi_endproc
+
+; LINUX-KERNEL-X64: test26:
+; LINUX-KERNEL-X64-NOT: callq __stack_chk_fail
+; LINUX-KERNEL-X64: .cfi_endproc
+
+; DARWIN-X64: test26:
+; DARWIN-X64-NOT: callq ___stack_chk_fail
+; DARWIN-X64: .cfi_endproc
+  %c = alloca %struct.nest, align 4
+  %b = getelementptr inbounds %struct.nest* %c, i32 0, i32 1
+  %_a = getelementptr inbounds %struct.pair* %b, i32 0, i32 0
+  %0 = load i32* %_a, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0)
+  ret void
+}
+
+; test27: Address-of a structure taken in a function with a loop where
+;         the alloca is an incoming value to a PHI node and a use of that PHI 
+;         node is also an incoming value.
+;         Verify that the address-of analysis does not get stuck in infinite
+;         recursion when chasing the alloca through the PHI nodes.
+; Requires protector.
+define i32 @test27(i32 %arg) nounwind uwtable sspstrong {
+bb:
+; LINUX-I386: test27:
+; LINUX-I386: mov{{l|q}} %gs:
+; LINUX-I386: calll __stack_chk_fail
+
+; LINUX-X64: test27:
+; LINUX-X64: mov{{l|q}} %fs:
+; LINUX-X64: callq __stack_chk_fail
+
+; LINUX-KERNEL-X64: test27:
+; LINUX-KERNEL-X64: mov{{l|q}} %gs:
+; LINUX-KERNEL-X64: callq __stack_chk_fail
+
+; DARWIN-X64: test27:
+; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
+; DARWIN-X64: callq ___stack_chk_fail
+  %tmp = alloca %struct.small*, align 8
+  %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp) nounwind
+  %tmp2 = load %struct.small** %tmp, align 8
+  %tmp3 = ptrtoint %struct.small* %tmp2 to i64
+  %tmp4 = trunc i64 %tmp3 to i32
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb21
+
+bb6:                                              ; preds = %bb17, %bb
+  %tmp7 = phi %struct.small* [ %tmp19, %bb17 ], [ %tmp2, %bb ]
+  %tmp8 = phi i64 [ %tmp20, %bb17 ], [ 1, %bb ]
+  %tmp9 = phi i32 [ %tmp14, %bb17 ], [ %tmp1, %bb ]
+  %tmp10 = getelementptr inbounds %struct.small* %tmp7, i64 0, i32 0
+  %tmp11 = load i8* %tmp10, align 1
+  %tmp12 = icmp eq i8 %tmp11, 1
+  %tmp13 = add nsw i32 %tmp9, 8
+  %tmp14 = select i1 %tmp12, i32 %tmp13, i32 %tmp9
+  %tmp15 = trunc i64 %tmp8 to i32
+  %tmp16 = icmp eq i32 %tmp15, %tmp4
+  br i1 %tmp16, label %bb21, label %bb17
+
+bb17:                                             ; preds = %bb6
+  %tmp18 = getelementptr inbounds %struct.small** %tmp, i64 %tmp8
+  %tmp19 = load %struct.small** %tmp18, align 8
+  %tmp20 = add i64 %tmp8, 1
+  br label %bb6
+
+bb21:                                             ; preds = %bb6, %bb
+  %tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
+  %tmp23 = call i32 (...)* @dummy(i32 %tmp22) nounwind
+  ret i32 undef
+}
 
-declare i32 @printf(i8*, ...) nounwind
+declare double @testi_aux()
+declare i8* @strcpy(i8*, i8*)
+declare i32 @printf(i8*, ...)
+declare void @funcall(i32*)
+declare void @funcall2(i32**)
+declare void @funfloat(float*)
+declare void @funfloat2(float**)
+declare void @_Z3exceptPi(i32*)
+declare i32 @__gxx_personality_v0(...)
+declare i32* @getp()
+declare i32 @dummy(...)
diff --git a/test/CodeGen/X86/stack-update-frame-opcode.ll b/test/CodeGen/X86/stack-update-frame-opcode.ll
new file mode 100644
index 000000000000..9a5a2421233d
--- /dev/null
+++ b/test/CodeGen/X86/stack-update-frame-opcode.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_LP64 %s
+; RUN: llc -mtriple=x86_64-pc-linux -mcpu=atom < %s | FileCheck -check-prefix=ATOM_LP64 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=corei7 < %s | FileCheck -check-prefix=CORE_ILP32 %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -mcpu=atom < %s | FileCheck -check-prefix=ATOM_ILP32 %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %arr = alloca [400 x i32], align 16
+
+; There is a 2x2 variation matrix here:
+; Atoms use LEA to update the SP. Opcode bitness depends on data model.
+; Cores use sub/add to update the SP. Opcode bitness depends on data model.
+
+; CORE_LP64: subq $1608
+; CORE_ILP32: subl $1608
+; ATOM_LP64: leaq -1608
+; ATOM_ILP32: leal -1608
+
+  %arraydecay = getelementptr inbounds [400 x i32]* %arr, i64 0, i64 0
+  %call = call i32 @foo(i32 %a, i32* %arraydecay) nounwind
+  ret i32 %call
+
+; CORE_LP64: addq $1608
+; CORE_ILP32: addl $1608
+; ATOM_LP64: leaq 1608
+; ATOM_ILP32: leal 1608
+
+}
+
+declare i32 @foo(i32, i32*)
+
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index 6e47eb397d1d..070cccdb87dd 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -1,13 +1,30 @@
-; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -mtriple=i686-darwin | FileCheck %s
 ;
 ; Test the add and load are folded into the store instruction.
 
 @X = internal global i16 0              ; <i16*> [#uses=2]
 
 define void @foo() nounwind {
+; CHECK: foo:
+; CHECK-NOT: mov
+; CHECK: add
+; CHECK-NEXT: ret
         %tmp.0 = load i16* @X           ; <i16> [#uses=1]
         %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]
         store i16 %tmp.3, i16* @X
         ret void
 }
 
+; rdar://12838504
+%struct.S2 = type { i64, i16, [2 x i8], i8, [3 x i8], [7 x i8], i8, [8 x i8] }
+@s2 = external global %struct.S2, align 16
+define void @test2() nounwind uwtable ssp {
+; CHECK: test2:
+; CHECK: mov
+; CHECK-NEXT: and
+; CHECK-NEXT: ret
+  %bf.load35 = load i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+  %bf.clear36 = and i56 %bf.load35, -1125895611875329
+  store i56 %bf.clear36, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
new file mode 100644
index 000000000000..cd677294c669
--- /dev/null
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; This should not generate SSE instructions:
+;
+; CHECK: without.sse:
+; CHECK: flds
+; CHECK: fmuls
+; CHECK: fstps
+define void @without.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %mul = fmul float %0, %1
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; This should generate SSE instructions:
+;
+; CHECK: with.sse
+; CHECK: movss
+; CHECK: mulss
+; CHECK: movss
+define void @with.sse(float* nocapture %a, float* nocapture %b, float* nocapture %c, i32 %n) #1 {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %mul = fmul float %0, %1
+  %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
+attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index 7f92af4dca9f..842ed25439f8 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -1,12 +1,11 @@
-; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
-
-; Fast-isel shouldn't attempt to cope with tail calls.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -tailcallopt -fast-isel -fast-isel-abort | FileCheck %s
 
 %0 = type { i64, i32, i8* }
 
 define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
 fail:                                             ; preds = %entry
   %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+; CHECK: jmp "_visit_array_aux<`Reference>" ## TAILCALL
   ret i8* %tmp20
 }
 
diff --git a/test/CodeGen/X86/tailcall-structret.ll b/test/CodeGen/X86/tailcall-structret.ll
index d8be4b2e2dfd..dcfefe86704e 100644
--- a/test/CodeGen/X86/tailcall-structret.ll
+++ b/test/CodeGen/X86/tailcall-structret.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s
 define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
 entry:
       %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
       ret { { i8*, i8* }*, i8*} %2
+; CHECK: jmp init
 }
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 118eee6ba6cd..9a0b57c138c2 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
-; RUN: llc < %s -march=x86 -tailcallopt | grep "movl[[:space:]]*4(%esp), %eax" | count 1
+; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s
 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32,
                   i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -9,10 +8,14 @@ entry:
         %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
         %tmp3 = load i32* %tmp2
         ret i32 %tmp3
+; CHECK: tailcallee
+; CHECK: movl 4(%esp), %eax
 }
 
 define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
 entry:
         %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* byval %a )
         ret i32 %tmp4
+; CHECK: tailcaller
+; CHECK: jmp tailcallee
 }
diff --git a/test/CodeGen/X86/tailcallfp.ll b/test/CodeGen/X86/tailcallfp.ll
index c0b609ac956e..22a7930ba877 100644
--- a/test/CodeGen/X86/tailcallfp.ll
+++ b/test/CodeGen/X86/tailcallfp.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
 define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
      %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
      ret i32 %Y
+; CHECK: jmpl
 }
diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll
index 60e3be5c50fd..ff590a1fd3e9 100644
--- a/test/CodeGen/X86/tailcallpic1.ll
+++ b/test/CodeGen/X86/tailcallpic1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
 
 define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
@@ -9,4 +9,5 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
+; CHECK: jmp tailcallee
 }
diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll
index eaa76312396c..1b6bdb769861 100644
--- a/test/CodeGen/X86/tailcallpic2.ll
+++ b/test/CodeGen/X86/tailcallpic2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | FileCheck %s
 
 define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 entry:
@@ -9,4 +9,7 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
 entry:
 	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
 	ret i32 %tmp11
+; CHECK: movl tailcallee@GOT
+; CHECK: jmpl
 }
+
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
deleted file mode 100644
index 0507cb890cd2..000000000000
--- a/test/CodeGen/X86/thiscall-struct-return.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
-
-%class.C = type { i8 }
-%struct.S = type { i32 }
-%struct.M = type { i32, i32 }
-
-declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
-declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
-declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
-
-define void @testv() nounwind {
-; CHECK: testv:
-; CHECK: leal 16(%esp), %esi
-; CHECK-NEXT: movl	%esi, (%esp)
-; CHECK-NEXT: calll _ZN1CC1Ev
-; CHECK: leal 8(%esp), %eax
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: calll _ZNK1C5SmallEv
-entry:
-  %c = alloca %class.C, align 1
-  %tmp = alloca %struct.S, align 4
-  call void @_ZN1CC1Ev(%class.C* %c)
-  ; This call should put the return structure as a pointer
-  ; into EAX instead of returning directly in EAX.  The this
-  ; pointer should go into ECX
-  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
-  ret void
-}
-
-define void @test2v() nounwind {
-; CHECK: test2v:
-; CHECK: leal 16(%esp), %esi
-; CHECK-NEXT: movl	%esi, (%esp)
-; CHECK-NEXT: calll _ZN1CC1Ev
-; CHECK: leal 8(%esp), %eax
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: calll _ZNK1C6MediumEv
-entry:
-  %c = alloca %class.C, align 1
-  %tmp = alloca %struct.M, align 4
-  call void @_ZN1CC1Ev(%class.C* %c)
-  ; This call should put the return structure as a pointer
-  ; into EAX instead of returning directly in EAX/EDX.  The this
-  ; pointer should go into ECX
-  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
-  ret void
-}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index e8a79bfa6ee3..8cdecd81bff5 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -22,13 +22,13 @@ define i32 @f1() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f1:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -49,13 +49,13 @@ define i32* @f2() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f2:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -75,13 +75,13 @@ define i32 @f3() nounwind {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f3:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -102,13 +102,13 @@ define i32* @f4() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i2@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f4:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -126,13 +126,13 @@ define i32 @f5() nounwind {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: movl _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f5:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -153,13 +153,13 @@ define i32* @f6() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: leal _i3@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f6:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -234,14 +234,14 @@ define i16 @f11() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: # kill
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f11:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: # kill
 ; X64_WIN-NEXT: ret
 
@@ -261,13 +261,13 @@ define i32 @f12() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f12:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -287,13 +287,13 @@ define i8 @f13() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: movb _b1@SECREL32(%eax), %al
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f13:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -312,13 +312,13 @@ define i32 @f14() {
 ; X32_WIN:      movl __tls_index, %eax
 ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
 ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax
 ; X32_WIN-NEXT: ret
 ; X64_WIN: f14:
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll
index af6d47af7a0f..cbcde0655597 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-2.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \
 ; RUN:   grep "twoaddrinstr" | grep "Number of instructions aggressively commuted"
 ; rdar://6480363
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 513c304e3bf8..9ca280627afe 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk"
 
 define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index b89c4738af12..e02e3b54752b 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -18,12 +18,16 @@ entry:
   ret i32 %c, !dbg !8
 }
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"test.c", metadata !"/dir", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"test.c", metadata !".", metadata !"producer", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524299, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786443, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
 !8 = metadata !{i32 4, i32 3, metadata !7, null}
+!9 = metadata !{metadata !1}
+!10 = metadata !{metadata !"test.c", metadata !"/dir"}
diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll
new file mode 100644
index 000000000000..8cbfb5d7243a
--- /dev/null
+++ b/test/CodeGen/X86/v8i1-masks.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: and_masks
+;CHECK: vmovaps
+;CHECK: vcmpltp
+;CHECK: vcmpltp
+;CHECK: vandps
+;CHECK: vandps
+;CHECK: vmovaps
+;CHECK: ret
+
+define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 16
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
+;CHECK: neg_mask
+;CHECK: vcmpltps
+;CHECK: vxorps
+;CHECK: vandps
+;CHECK: vmovaps
+;CHECK: ret
+define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 32
+  ret void
+}
+
diff --git a/test/CodeGen/X86/vec_align_i256.ll b/test/CodeGen/X86/vec_align_i256.ll
new file mode 100644
index 000000000000..078bcb1544d3
--- /dev/null
+++ b/test/CodeGen/X86/vec_align_i256.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=corei7-avx | FileCheck %s 
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; Make sure that we are not generating a movaps because the vector is aligned to 1.
+;CHECK: @foo
+;CHECK: xor
+;CHECK-NEXT: vmovups
+;CHECK-NEXT: ret
+define void @foo() {
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 367dd27f3076..b6d91a3f770e 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -41,3 +41,27 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
         %D = sext <4 x i1> %C to <4 x i32>
 	ret <4 x i32> %D
 }
+
+define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test5:
+; CHECK: pcmpeqd
+; CHECK: pshufd $-79
+; CHECK: pand
+; CHECK: ret
+	%C = icmp eq <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test6:
+; CHECK: pcmpeqd
+; CHECK: pshufd $-79
+; CHECK: pand
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp ne <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll
index 5e0160bd2856..4db68bd18223 100644
--- a/test/CodeGen/X86/vec_floor.ll
+++ b/test/CodeGen/X86/vec_floor.ll
@@ -36,3 +36,147 @@ define <8 x float> @floor_v8f32(<8 x float> %p)
   ret <8 x float> %t
 }
 declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+  ; CHECK: ceil_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+  ; CHECK: ceil_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+  ; CHECK: ceil_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+  ; CHECK: ceil_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+  ; CHECK: trunc_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+  ; CHECK: trunc_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+  ; CHECK: trunc_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+  ; CHECK: trunc_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+
+define <2 x double> @rint_v2f64(<2 x double> %p)
+{
+  ; CHECK: rint_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
+
+define <4 x float> @rint_v4f32(<4 x float> %p)
+{
+  ; CHECK: rint_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
+
+define <4 x double> @rint_v4f64(<4 x double> %p)
+{
+  ; CHECK: rint_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
+
+define <8 x float> @rint_v8f32(<8 x float> %p)
+{
+  ; CHECK: rint_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+  ; CHECK: nearbyint_v2f64
+  ; CHECK: vroundpd
+  %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+  ret <2 x double> %t
+}
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+  ; CHECK: nearbyint_v4f32
+  ; CHECK: vroundps
+  %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+  ret <4 x float> %t
+}
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+  ; CHECK: nearbyint_v4f64
+  ; CHECK: vroundpd
+  %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+  ; CHECK: nearbyint_v8f32
+  ; CHECK: vroundps
+  %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+  ret <8 x float> %t
+}
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index dc0464ff9e0f..863712ff48b3 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
 
 ; PR11674
 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
@@ -29,8 +29,8 @@ entry:
 ; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
-; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
 ; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
+; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
   %0 = load <8 x float>* %in
   %1 = fpext <8 x float> %0 to <8 x double>
   store <8 x double> %1, <8 x double>* %out, align 1
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 2a4864a48a25..4583e1925e59 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
 
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
new file mode 100644
index 000000000000..349868a87f53
--- /dev/null
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=+avx2 | FileCheck %s
+
+
+define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
+entry:
+; CHECK: sdiv_vec8x16
+; CHECK: psraw  $15
+; CHECK: vpsrlw  $11
+; CHECK: vpaddw
+; CHECK: vpsraw  $5
+; CHECK: ret
+  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %0
+}
+
+define <4 x i32> @sdiv_zero(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_zero
+; CHECK-NOT: sra
+; CHECK: ret
+  %0 = sdiv <4 x i32> %var, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_vec4x32
+; CHECK: vpsrad $31
+; CHECK: vpsrld $28
+; CHECK: vpaddd
+; CHECK: vpsrad $4
+; CHECK: ret
+%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
+ret <4 x i32> %0
+}
+
+define <4 x i32> @sdiv_negative(<4 x i32> %var) {
+entry:
+; CHECK: sdiv_negative
+; CHECK: vpsrad $31
+; CHECK: vpsrld $28
+; CHECK: vpaddd
+; CHECK: vpsrad $4
+; CHECK: vpsubd
+; CHECK: ret
+%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
+ret <4 x i32> %0
+}
+
+define <8 x i32> @sdiv8x32(<8 x i32> %var) {
+entry:
+; CHECK: sdiv8x32
+; CHECK: vpsrad $31
+; CHECK: vpsrld $26
+; CHECK: vpaddd
+; CHECK: vpsrad $6
+; CHECK: ret
+%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
+ret <8 x i32> %0
+}
+
+define <16 x i16> @sdiv16x16(<16 x i16> %var) {
+entry:
+; CHECK: sdiv16x16
+; CHECK: vpsraw  $15
+; CHECK: vpsrlw  $14
+; CHECK: vpaddw
+; CHECK: vpsraw  $2
+; CHECK: ret
+  %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <16 x i16> %a0
+}
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index b26f920e5e23..48db8de0d936 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
 ; PR2485
 
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 976cd1835b40..5a2c4449456b 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; REQUIRES: asserts
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
index f105de4d977d..5c668b7e5a5b 100644
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ b/test/CodeGen/X86/vec_splat-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd | count 1
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
 
 define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
@@ -23,4 +23,11 @@ define void @test(<2 x i64>* %P, i8 %x) nounwind {
 	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
 	ret void
+
+; CHECK: test:
+; CHECK-NOT: pshufd
+; CHECK: punpcklbw
+; CHECK: punpcklbw
+; CHECK: pshufd $0
+; CHECK-NOT: pshufd
 }
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
index feacc42406df..cf0ecf40554d 100644
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ b/test/CodeGen/X86/vec_splat-3.ll
@@ -1,55 +1,230 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
-; RUN: grep punpcklwd %t | count 4
-; RUN: grep punpckhwd %t | count 4
-; RUN: grep "pshufd" %t | count 8
+; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
 
 ; Splat test for v8i16
-; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used twice)
 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_0:
+; CHECK: pshuflw $0
 }
 
 define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_1:
+; CHECK: pshuflw $5
 }
 
 define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_2:
+; CHECK: punpcklwd
+; CHECK-NEXT: pshufd $-86
 }
 
 define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_3:
+; CHECK: pshuflw $15
 }
 
 define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_4:
+; CHECK: movhlps
 }
 
 define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_5:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $85
 }
 
 define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
-}
 
+; CHECK: shuf_8i16_6:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $-86
+}
 
 define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef >
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
 	ret <8 x i16> %tmp6
+
+; CHECK: shuf_8i16_7:
+; CHECK: punpckhwd
+; CHECK-NEXT: pshufd $-1
+}
+
+; Splat test for v16i8
+define <16 x i8> @shuf_16i8_8(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_8:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_9(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_9:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_10(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_10:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_11(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_11:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-1
+}
+
+
+define <16 x i8> @shuf_16i8_12(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_12:
+; CHECK: pshufd $5
+}
+
+define <16 x i8> @shuf_16i8_13(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_13:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_14(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_14:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_15(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_15:
+; CHECK: punpcklbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-1
+}
+
+define <16 x i8> @shuf_16i8_16(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_16:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_17(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_17:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_18(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_18:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_19(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_19:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpcklbw
+; CHECK-NEXT: pshufd $-1
+}
+
+define <16 x i8> @shuf_16i8_20(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_20:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $0
+}
+
+define <16 x i8> @shuf_16i8_21(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_21:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $85
+}
+
+define <16 x i8> @shuf_16i8_22(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_22:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-86
+}
+
+define <16 x i8> @shuf_16i8_23(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+	ret <16 x i8> %tmp6
+
+; CHECK: shuf_16i8_23:
+; CHECK: punpckhbw
+; CHECK-NEXT: punpckhbw
+; CHECK-NEXT: pshufd $-1
 }
diff --git a/test/CodeGen/X86/vec_splat-4.ll b/test/CodeGen/X86/vec_splat-4.ll
deleted file mode 100644
index 374acfa4e094..000000000000
--- a/test/CodeGen/X86/vec_splat-4.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 -o %t
-; RUN: grep punpcklbw %t | count 16
-; RUN: grep punpckhbw %t | count 16
-; RUN: grep "pshufd" %t | count 16
-
-; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times)
-
-; Splat test for v16i8
-define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 >
-	ret <16 x i8 > %tmp6
-}
-
-
-define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef  >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 >
-	ret <16 x i8 > %tmp6
-}
-
-define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
-entry:
-	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 >
-	ret <16 x i8 > %tmp6
-}
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
index 24d8487f17bd..deedee801967 100644
--- a/test/CodeGen/X86/vec_splat.ll
+++ b/test/CodeGen/X86/vec_splat.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 | grep pshufd
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse3 | grep movddup
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
 
 define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
@@ -10,6 +10,12 @@ define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
 	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp10, <4 x float>* %P
 	ret void
+
+; SSE2: test_v4sf:
+; SSE2: pshufd $0
+
+; SSE3: test_v4sf:
+; SSE3: pshufd $0
 }
 
 define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
@@ -19,4 +25,10 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
 	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
 	store <2 x double> %tmp6, <2 x double>* %P
 	ret void
+
+; SSE2: test_v2sd:
+; SSE2: shufpd $0
+
+; SSE3: test_v2sd:
+; SSE3: movddup
 }
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 682a0dfca806..c3ea0ad2023f 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -13,7 +13,7 @@ define void @foo(<4 x float>* %P) {
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
-        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
+        %S = sub <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
         store <4 x i32> %S, <4 x i32>* %P
         ret void
 }
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index 3476e36c646f..ec93ce0761cc 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instsimplify -disable-output < %s
 
 ;CHECK: AGEP0:
 define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
@@ -8,10 +8,8 @@ entry:
   %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
   %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
   %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
-;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-;CHECK: pslld $2
 ;CHECK: padd
   %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
   ret <4 x i32*> %A3
@@ -21,7 +19,6 @@ entry:
 ;CHECK: AGEP1:
 define i32 @AGEP1(<4 x i32*> %param) nounwind {
 entry:
-;CHECK: pslld $2
 ;CHECK: padd
   %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   %k = extractelement <4 x i32*> %A2, i32 3
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
new file mode 100644
index 000000000000..cf654b6f2059
--- /dev/null
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -0,0 +1,2788 @@
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test1:
+; SSE4: pminsb
+
+; AVX1: test1:
+; AVX1: vpminsb
+
+; AVX2: test1:
+; AVX2: vpminsb
+}
+
+define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test2:
+; SSE4: pminsb
+
+; AVX1: test2:
+; AVX1: vpminsb
+
+; AVX2: test2:
+; AVX2: vpminsb
+}
+
+define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test3:
+; SSE4: pmaxsb
+
+; AVX1: test3:
+; AVX1: vpmaxsb
+
+; AVX2: test3:
+; AVX2: vpmaxsb
+}
+
+define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test4:
+; SSE4: pmaxsb
+
+; AVX1: test4:
+; AVX1: vpmaxsb
+
+; AVX2: test4:
+; AVX2: vpmaxsb
+}
+
+define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test5:
+; SSE2: pminub
+
+; AVX1: test5:
+; AVX1: vpminub
+
+; AVX2: test5:
+; AVX2: vpminub
+}
+
+define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test6:
+; SSE2: pminub
+
+; AVX1: test6:
+; AVX1: vpminub
+
+; AVX2: test6:
+; AVX2: vpminub
+}
+
+define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test7:
+; SSE2: pmaxub
+
+; AVX1: test7:
+; AVX1: vpmaxub
+
+; AVX2: test7:
+; AVX2: vpmaxub
+}
+
+define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test8:
+; SSE2: pmaxub
+
+; AVX1: test8:
+; AVX1: vpmaxub
+
+; AVX2: test8:
+; AVX2: vpmaxub
+}
+
+define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test9:
+; SSE2: pminsw
+
+; AVX1: test9:
+; AVX1: vpminsw
+
+; AVX2: test9:
+; AVX2: vpminsw
+}
+
+define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test10:
+; SSE2: pminsw
+
+; AVX1: test10:
+; AVX1: vpminsw
+
+; AVX2: test10:
+; AVX2: vpminsw
+}
+
+define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test11:
+; SSE2: pmaxsw
+
+; AVX1: test11:
+; AVX1: vpmaxsw
+
+; AVX2: test11:
+; AVX2: vpmaxsw
+}
+
+define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test12:
+; SSE2: pmaxsw
+
+; AVX1: test12:
+; AVX1: vpmaxsw
+
+; AVX2: test12:
+; AVX2: vpmaxsw
+}
+
+define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test13:
+; SSE4: pminuw
+
+; AVX1: test13:
+; AVX1: vpminuw
+
+; AVX2: test13:
+; AVX2: vpminuw
+}
+
+define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test14:
+; SSE4: pminuw
+
+; AVX1: test14:
+; AVX1: vpminuw
+
+; AVX2: test14:
+; AVX2: vpminuw
+}
+
+define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test15:
+; SSE4: pmaxuw
+
+; AVX1: test15:
+; AVX1: vpmaxuw
+
+; AVX2: test15:
+; AVX2: vpmaxuw
+}
+
+define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test16:
+; SSE4: pmaxuw
+
+; AVX1: test16:
+; AVX1: vpmaxuw
+
+; AVX2: test16:
+; AVX2: vpmaxuw
+}
+
+define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test17:
+; SSE4: pminsd
+
+; AVX1: test17:
+; AVX1: vpminsd
+
+; AVX2: test17:
+; AVX2: vpminsd
+}
+
+define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test18:
+; SSE4: pminsd
+
+; AVX1: test18:
+; AVX1: vpminsd
+
+; AVX2: test18:
+; AVX2: vpminsd
+}
+
+define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test19:
+; SSE4: pmaxsd
+
+; AVX1: test19:
+; AVX1: vpmaxsd
+
+; AVX2: test19:
+; AVX2: vpmaxsd
+}
+
+define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test20:
+; SSE4: pmaxsd
+
+; AVX1: test20:
+; AVX1: vpmaxsd
+
+; AVX2: test20:
+; AVX2: vpmaxsd
+}
+
+define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test21:
+; SSE4: pminud
+
+; AVX1: test21:
+; AVX1: vpminud
+
+; AVX2: test21:
+; AVX2: vpminud
+}
+
+define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test22:
+; SSE4: pminud
+
+; AVX1: test22:
+; AVX1: vpminud
+
+; AVX2: test22:
+; AVX2: vpminud
+}
+
+define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test23:
+; SSE4: pmaxud
+
+; AVX1: test23:
+; AVX1: vpmaxud
+
+; AVX2: test23:
+; AVX2: vpmaxud
+}
+
+define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test24:
+; SSE4: pmaxud
+
+; AVX1: test24:
+; AVX1: vpmaxud
+
+; AVX2: test24:
+; AVX2: vpmaxud
+}
+
+define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test25:
+; AVX2: vpminsb
+}
+
+define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test26:
+; AVX2: vpminsb
+}
+
+define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test27:
+; AVX2: vpmaxsb
+}
+
+define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test28:
+; AVX2: vpmaxsb
+}
+
+define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test29:
+; AVX2: vpminub
+}
+
+define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test30:
+; AVX2: vpminub
+}
+
+define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test31:
+; AVX2: vpmaxub
+}
+
+define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test32:
+; AVX2: vpmaxub
+}
+
+define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test33:
+; AVX2: vpminsw
+}
+
+define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test34:
+; AVX2: vpminsw
+}
+
+define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test35:
+; AVX2: vpmaxsw
+}
+
+define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test36:
+; AVX2: vpmaxsw
+}
+
+define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test37:
+; AVX2: vpminuw
+}
+
+define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test38:
+; AVX2: vpminuw
+}
+
+define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test39:
+; AVX2: vpmaxuw
+}
+
+define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test40:
+; AVX2: vpmaxuw
+}
+
+define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test41:
+; AVX2: vpminsd
+}
+
+define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test42:
+; AVX2: vpminsd
+}
+
+define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test43:
+; AVX2: vpmaxsd
+}
+
+define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test44:
+; AVX2: vpmaxsd
+}
+
+define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test45:
+; AVX2: vpminud
+}
+
+define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test46:
+; AVX2: vpminud
+}
+
+define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test47:
+; AVX2: vpmaxud
+}
+
+define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test48:
+; AVX2: vpmaxud
+}
+
+define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test49:
+; SSE4: pmaxsb
+
+; AVX1: test49:
+; AVX1: vpmaxsb
+
+; AVX2: test49:
+; AVX2: vpmaxsb
+}
+
+define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test50:
+; SSE4: pmaxsb
+
+; AVX1: test50:
+; AVX1: vpmaxsb
+
+; AVX2: test50:
+; AVX2: vpmaxsb
+}
+
+define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test51:
+; SSE4: pminsb
+
+; AVX1: test51:
+; AVX1: vpminsb
+
+; AVX2: test51:
+; AVX2: vpminsb
+}
+
+define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test52:
+; SSE4: pminsb
+
+; AVX1: test52:
+; AVX1: vpminsb
+
+; AVX2: test52:
+; AVX2: vpminsb
+}
+
+define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test53:
+; SSE2: pmaxub
+
+; AVX1: test53:
+; AVX1: vpmaxub
+
+; AVX2: test53:
+; AVX2: vpmaxub
+}
+
+define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test54:
+; SSE2: pmaxub
+
+; AVX1: test54:
+; AVX1: vpmaxub
+
+; AVX2: test54:
+; AVX2: vpmaxub
+}
+
+define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test55:
+; SSE2: pminub
+
+; AVX1: test55:
+; AVX1: vpminub
+
+; AVX2: test55:
+; AVX2: vpminub
+}
+
+define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
+  %load.a = load <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i8> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
+  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test56:
+; SSE2: pminub
+
+; AVX1: test56:
+; AVX1: vpminub
+
+; AVX2: test56:
+; AVX2: vpminub
+}
+
+define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test57:
+; SSE2: pmaxsw
+
+; AVX1: test57:
+; AVX1: vpmaxsw
+
+; AVX2: test57:
+; AVX2: vpmaxsw
+}
+
+define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test58:
+; SSE2: pmaxsw
+
+; AVX1: test58:
+; AVX1: vpmaxsw
+
+; AVX2: test58:
+; AVX2: vpmaxsw
+}
+
+define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test59:
+; SSE2: pminsw
+
+; AVX1: test59:
+; AVX1: vpminsw
+
+; AVX2: test59:
+; AVX2: vpminsw
+}
+
+define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE2: test60:
+; SSE2: pminsw
+
+; AVX1: test60:
+; AVX1: vpminsw
+
+; AVX2: test60:
+; AVX2: vpminsw
+}
+
+define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test61:
+; SSE4: pmaxuw
+
+; AVX1: test61:
+; AVX1: vpmaxuw
+
+; AVX2: test61:
+; AVX2: vpmaxuw
+}
+
+define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test62:
+; SSE4: pmaxuw
+
+; AVX1: test62:
+; AVX1: vpmaxuw
+
+; AVX2: test62:
+; AVX2: vpmaxuw
+}
+
+define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test63:
+; SSE4: pminuw
+
+; AVX1: test63:
+; AVX1: vpminuw
+
+; AVX2: test63:
+; AVX2: vpminuw
+}
+
+define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
+  %load.a = load <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i16> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
+  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test64:
+; SSE4: pminuw
+
+; AVX1: test64:
+; AVX1: vpminuw
+
+; AVX2: test64:
+; AVX2: vpminuw
+}
+
+define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test65:
+; SSE4: pmaxsd
+
+; AVX1: test65:
+; AVX1: vpmaxsd
+
+; AVX2: test65:
+; AVX2: vpmaxsd
+}
+
+define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test66:
+; SSE4: pmaxsd
+
+; AVX1: test66:
+; AVX1: vpmaxsd
+
+; AVX2: test66:
+; AVX2: vpmaxsd
+}
+
+define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test67:
+; SSE4: pminsd
+
+; AVX1: test67:
+; AVX1: vpminsd
+
+; AVX2: test67:
+; AVX2: vpminsd
+}
+
+define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test68:
+; SSE4: pminsd
+
+; AVX1: test68:
+; AVX1: vpminsd
+
+; AVX2: test68:
+; AVX2: vpminsd
+}
+
+define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test69:
+; SSE4: pmaxud
+
+; AVX1: test69:
+; AVX1: vpmaxud
+
+; AVX2: test69:
+; AVX2: vpmaxud
+}
+
+define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test70:
+; SSE4: pmaxud
+
+; AVX1: test70:
+; AVX1: vpmaxud
+
+; AVX2: test70:
+; AVX2: vpmaxud
+}
+
+define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test71:
+; SSE4: pminud
+
+; AVX1: test71:
+; AVX1: vpminud
+
+; AVX2: test71:
+; AVX2: vpminud
+}
+
+define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
+  %load.a = load <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i32> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
+  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 4
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; SSE4: test72:
+; SSE4: pminud
+
+; AVX1: test72:
+; AVX1: vpminud
+
+; AVX2: test72:
+; AVX2: vpminud
+}
+
+define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test73:
+; AVX2: vpmaxsb
+}
+
+define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test74:
+; AVX2: vpmaxsb
+}
+
+define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test75:
+; AVX2: vpminsb
+}
+
+define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test76:
+; AVX2: vpminsb
+}
+
+define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test77:
+; AVX2: vpmaxub
+}
+
+define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test78:
+; AVX2: vpmaxub
+}
+
+define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test79:
+; AVX2: vpminub
+}
+
+define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
+  %load.a = load <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i8> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
+  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test80:
+; AVX2: vpminub
+}
+
+define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test81:
+; AVX2: vpmaxsw
+}
+
+define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test82:
+; AVX2: vpmaxsw
+}
+
+define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test83:
+; AVX2: vpminsw
+}
+
+define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test84:
+; AVX2: vpminsw
+}
+
+define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test85:
+; AVX2: vpmaxuw
+}
+
+define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test86:
+; AVX2: vpmaxuw
+}
+
+define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test87:
+; AVX2: vpminuw
+}
+
+define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
+  %load.a = load <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i16> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
+  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test88:
+; AVX2: vpminuw
+}
+
+define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test89:
+; AVX2: vpmaxsd
+}
+
+define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test90:
+; AVX2: vpmaxsd
+}
+
+define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test91:
+; AVX2: vpminsd
+}
+
+define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test92:
+; AVX2: vpminsd
+}
+
+define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test93:
+; AVX2: vpmaxud
+}
+
+define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test94:
+; AVX2: vpmaxud
+}
+
+define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test95:
+; AVX2: vpminud
+}
+
+define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
+  %load.a = load <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i32> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
+  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX2: test96:
+; AVX2: vpminud
+}
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index ee98806c0f8b..3b7fdff84e3c 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux |  FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn |  FileCheck %s
 
 define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
 ; CHECK: t0
diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll
new file mode 100644
index 000000000000..d93f33ba0e58
--- /dev/null
+++ b/test/CodeGen/X86/wide-fma-contraction.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; CHECK: fmafunc
+define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+  %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
+  ret <16 x float> %ret
+}
+
+declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+
+
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 878c6db99286..52b987e2be65 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -1,28 +1,127 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
-; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
-; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 
 ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
 ; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
 ; arguments are caller-cleanup like normal arguments.
 
-define void @sret1(i8* sret) nounwind {
+define void @sret1(i8* sret %x) nounwind {
 entry:
-; WIN_X32:    {{ret$}}
-; MINGW_X32:  ret $4
+; WIN32:      sret1
+; WIN32:      movb $42, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret1
+; MINGW_X86:  ret $4
+
+; LINUX:      sret1
 ; LINUX:      ret $4
+
+  store i8 42, i8* %x, align 4
   ret void
 }
 
-define void @sret2(i32* sret %x, i32 %y) nounwind {
+define void @sret2(i8* sret %x, i8 %y) nounwind {
 entry:
-; WIN_X32:    {{ret$}}
-; MINGW_X32:  ret $4
+; WIN32:      sret2
+; WIN32:      movb {{.*}}, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret2
+; MINGW_X86:  ret $4
+
+; LINUX:      sret2
 ; LINUX:      ret $4
-  store i32 %y, i32* %x
+
+  store i8 %y, i8* %x
+  ret void
+}
+
+define void @sret3(i8* sret %x, i8* %y) nounwind {
+entry:
+; WIN32:      sret3
+; WIN32:      movb $42, (%eax)
+; WIN32-NOT:  movb $13, (%eax)
+; WIN32-NOT:  popl %eax
+; WIN32:    {{ret$}}
+
+; MINGW_X86:  sret3
+; MINGW_X86:  ret $4
+
+; LINUX:      sret3
+; LINUX:      ret $4
+
+  store i8 42, i8* %x
+  store i8 13, i8* %y
+  ret void
+}
+
+; PR15556
+%struct.S4 = type { i32, i32, i32 }
+
+define void @sret4(%struct.S4* noalias sret %agg.result) {
+entry:
+; WIN32:     sret4
+; WIN32:     movl $42, (%eax)
+; WIN32-NOT: popl %eax
+; WIN32:   {{ret$}}
+
+; MINGW_X86: sret4
+; MINGW_X86: ret $4
+
+; LINUX:     sret4
+; LINUX:     ret $4
+
+  %x = getelementptr inbounds %struct.S4* %agg.result, i32 0, i32 0
+  store i32 42, i32* %x, align 4
   ret void
 }
 
+%struct.S5 = type { i32 }
+%class.C5 = type { i8 }
+
+define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret %agg.result, %class.C5* %this) {
+entry:
+  %this.addr = alloca %class.C5*, align 4
+  store %class.C5* %this, %class.C5** %this.addr, align 4
+  %this1 = load %class.C5** %this.addr
+  %x = getelementptr inbounds %struct.S5* %agg.result, i32 0, i32 0
+  store i32 42, i32* %x, align 4
+  ret void
+; WIN32:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+
+; The address of the return structure is passed as an implicit parameter.
+; In the -O0 build, %eax is spilled at the beginning of the function, hence we
+; should match both 4(%esp) and 8(%esp).
+; WIN32:     {{[48]}}(%esp), %eax
+; WIN32:     movl $42, (%eax)
+; WIN32:     ret $4
+}
+
+define void @call_foo5() {
+entry:
+  %c = alloca %class.C5, align 1
+  %s = alloca %struct.S5, align 4
+  call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
+; WIN32:      {{^}}_call_foo5:
+
+; Load the address of the result and put it onto stack
+; (through %ecx in the -O0 build).
+; WIN32:      leal {{[0-9]+}}(%esp), %eax
+; WIN32:      movl %eax, (%e{{[sc][px]}})
+
+; The this pointer goes to ECX.
+; FIXME: for some reason, the below checks fail on the Ubuntu Atom D2700 bot.
+; FIXME-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; FIXME-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
+
+; WIN32:      calll "?foo@C5@@QAE?AUS5@@XZ"
+; WIN32:      ret
+  ret void
+}
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
index 596b4262e6b0..14591248f354 100644
--- a/test/CodeGen/X86/win_ftol2.ll
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
 ; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
@@ -63,9 +63,9 @@ define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
 
   %1 = fdiv double %x, %y
   %2 = fsub double %x, %z
-  %3 = fptoui double %1 to i64
-  %4 = fptoui double %2 to i64
-  %5 = sub i64 %3, %4
+  %3 = fptoui double %2 to i64
+  %4 = fptoui double %1 to i64
+  %5 = sub i64 %4, %3
   ret i64 %5
 }
 
@@ -121,9 +121,9 @@ define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
 ; FTOL_2: calll __ftol2
 ;; stack is %x
 
-  %1 = fptoui double %x to i64
-  %2 = fptoui double %y to i64
-  %3 = sub i64 %1, %2
+  %1 = fptoui double %y to i64
+  %2 = fptoui double %x to i64
+  %3 = sub i64 %2, %1
   %4 = insertvalue {double, i64} undef, double %x, 0
   %5 = insertvalue {double, i64} %4, i64 %3, 1
   ret {double, i64} %5
diff --git a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
index 902c9d5ae081..9c01f16f24f5 100644
--- a/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
+++ b/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=nehalem | not grep rsp
-; RUN: llc < %s -mcpu=nehalem | grep cvttsd2siq
+; RUN: llc < %s -mcpu=nehalem | grep cvttsd2si
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-ptr-arg-simple.ll b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
new file mode 100644
index 000000000000..6d466639890b
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
+
+; %in is kept in %esi for both ABIs. But the pointer will be passed in %edi
+; for x32, not %rdi
+
+; CHECK: movl %esi, (%rdi)
+; X32ABI: movl %esi, (%edi)
+
+define void @foo(i32* nocapture %out, i32 %in) nounwind {
+entry:
+  store i32 %in, i32* %out, align 4
+  ret void
+}
+
+; CHECK: bar
+; CHECK: movl (%rsi), %eax
+
+; Similarly here, but for loading
+; X32ABI: bar
+; X32ABI: movl (%esi), %eax
+
+define void @bar(i32* nocapture %pOut, i32* nocapture %pIn) nounwind {
+entry:
+  %0 = load i32* %pIn, align 4
+  store i32 %0, i32* %pOut, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 7b5f189faa0f..bc8a54346580 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-	%struct.foo = type { [4 x i64] }
+%struct.foo = type { [4 x i64] }
 
 ; CHECK: bar:
 ; CHECK: movq %rdi, %rax
+
+; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; X32ABI: bar:
+; X32ABI: movl %edi, %eax
+
 define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
 entry:
 	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
@@ -57,6 +62,11 @@ return:		; preds = %entry
 
 ; CHECK: foo:
 ; CHECK: movq %rdi, %rax
+
+; For the x32 ABI, pointers are 32-bit so 32-bit instructions will be used
+; X32ABI: foo:
+; X32ABI: movl %edi, %eax
+
 define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
   store { i64 } { i64 0 }, { i64 }* %agg.result
   ret void
diff --git a/test/CodeGen/X86/xtest.ll b/test/CodeGen/X86/xtest.ll
new file mode 100644
index 000000000000..e85565edcd55
--- /dev/null
+++ b/test/CodeGen/X86/xtest.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 -mattr=+rtm | FileCheck %s
+
+declare i32 @llvm.x86.xtest() nounwind
+
+define i32 @test_xtest() nounwind uwtable {
+entry:
+  %0 = tail call i32 @llvm.x86.xtest() nounwind
+  ret i32 %0
+; CHECK: test_xtest
+; CHECK: xtest
+}
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 4242530f7731..5d25a2d74971 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer  | grep 12
 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
diff --git a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 80cf3a6d678f..000000000000
--- a/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/XCore/DbgValueOtherTargets.test b/test/CodeGen/XCore/DbgValueOtherTargets.test
new file mode 100644
index 000000000000..7c2ecd0312c6
--- /dev/null
+++ b/test/CodeGen/XCore/DbgValueOtherTargets.test
@@ -0,0 +1 @@
+RUN: llc -O0 -march=xcore -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
new file mode 100644
index 000000000000..d83b246a5527
--- /dev/null
+++ b/test/CodeGen/XCore/aliases.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare void @a_val() nounwind
+@b_val = external constant i32, section ".cp.rodata"
+@c_val = external global i32
+
+@a = alias void ()* @a_val
+@b = alias i32* @b_val
+@c = alias i32* @c_val
+
+; CHECK: a_addr:
+; CHECK: ldap r11, a
+; CHECK: retsp
+define void ()* @a_addr() nounwind {
+entry:
+  ret void ()* @a
+}
+
+; CHECK: b_addr:
+; CHECK: ldaw r11, cp[b]
+; CHECK: retsp
+define i32 *@b_addr() nounwind {
+entry:
+  ret i32* @b
+}
+
+; CHECK: c_addr:
+; CHECK: ldaw r0, dp[c]
+; CHECK: retsp
+define i32 *@c_addr() nounwind {
+entry:
+  ret i32* @c
+}
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
index f8726af57f79..8756f37fe8a1 100644
--- a/test/CodeGen/XCore/lit.local.cfg
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.c', '.cpp', '.test']
 
 targets = set(config.root.targets_to_build.split())
 if not 'XCore' in targets:
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
index 8782e4446f4b..5bfca21b3ecb 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -1,11 +1,18 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
-!0 = metadata !{i32 42}
+!dbg = !{!0}
+!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !1, i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 41, metadata !4} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 21, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!3 = metadata !{null}
+!4 = metadata !{metadata !"/foo", metadata !"bar.cpp"}
 
 define <{i32, i32}> @f1() {
-; CHECK: !dbgx !0
-  %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbgx !0
-; CHECK: !dbgx !0
-  %e = extractvalue <{ i32, i32 }> %r, 0, !dbgx !0
+; CHECK: !dbgx !1
+  %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbgx !1
+; CHECK: !dbgx !1
+  %e = extractvalue <{ i32, i32 }> %r, 0, !dbgx !1
   ret <{ i32, i32 }> %r
 }
+
+; CHECK: [protected]
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index e0371d646d37..13bd31039522 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -9,12 +9,10 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"fb.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"fb.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index 01db617a679b..83d6ac28223e 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -12,12 +12,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"cf.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 720937, metadata !"cf.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index 20c0b8ee009f..06c2367913da 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -36,27 +36,32 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524544, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 524299, metadata !2, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524299, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", metadata !4, i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !"one.cc", metadata !"/tmp/", metadata !5} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!5}
+
+!0 = metadata !{i32 786688, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 786443, metadata !2, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 786443, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786449, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8}
-!8 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524307, metadata !4, metadata !"S", metadata !10, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
-!10 = metadata !{i32 524329, metadata !"one.h", metadata !"/tmp/", metadata !5} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 524334, i32 0, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", metadata !10, i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{i32 786478, metadata !10, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{metadata !8, metadata !15}
-!15 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{i32 3, i32 0, metadata !1, null}
 !17 = metadata !{i32 3, i32 0, metadata !3, null}
-!18 = metadata !{i32 524545, metadata !12, metadata !"this", metadata !10, i32 3, metadata !19} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ]
-!20 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 786689, metadata !12, metadata !"this", metadata !10, i32 3, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786470, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ]
+!20 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
 !21 = metadata !{i32 3, i32 0, metadata !12, null}
 !22 = metadata !{i32 3, i32 0, metadata !23, null}
-!23 = metadata !{i32 524299, metadata !12, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !12, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !3, metadata !12}
+!25 = metadata !{metadata !"one.cc", metadata !"/tmp/"}
+!26 = metadata !{metadata !"one.h", metadata !"/tmp/"}
diff --git a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
index 9bb35fab4fee..accdf8a86cb7 100644
--- a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
+++ b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
@@ -25,6 +25,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !14 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null} ; [ DW_TAG_array_type ]
 !15 = metadata !{i32 524324, metadata !4, metadata !"char", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 524321, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{metadata !"llvm.mdnode.fwdref.19"}
 !19 = metadata !{metadata !"llvm.mdnode.fwdref.23"}
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index dd6c5a965eb6..dd98db904509 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -50,40 +50,43 @@ entry:
   ret i32 %0, !dbg !35
 }
 
-!0 = metadata !{i32 524544, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 524299, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 524329, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp", metadata !"clang 1.5", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!4}
+!37 = metadata !{metadata !2, metadata !10, metadata !23}
+
+!0 = metadata !{i32 786688, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 786443, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 786478, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 786473, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 524290, metadata !3, metadata !"B", metadata !3, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
+!7 = metadata !{i32 786468, metadata !3, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786434, metadata !3, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 524334, i32 0, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{i32 786478, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !7, metadata !13}
-!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 16, i32 5, metadata !1, null}
 !15 = metadata !{i32 17, i32 3, metadata !1, null}
 !16 = metadata !{i32 18, i32 1, metadata !2, null}
-!17 = metadata !{i32 524545, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 4, i32 7, metadata !10, null}
-!19 = metadata !{i32 524544, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 524299, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 524290, metadata !10, metadata !"A", metadata !3, i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
+!19 = metadata !{i32 786688, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21, i32 0, null} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 786443, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 786434, metadata !3, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
 !22 = metadata !{metadata !23}
-!23 = metadata !{i32 524334, i32 0, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{i32 786478, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !25 = metadata !{metadata !7, metadata !26}
-!26 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
+!26 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
 !27 = metadata !{i32 9, i32 7, metadata !20, null}
-!28 = metadata !{i32 524544, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7} ; [ DW_TAG_auto_variable ]
+!28 = metadata !{i32 786688, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !29 = metadata !{i32 10, i32 9, metadata !20, null}
 !30 = metadata !{i32 10, i32 5, metadata !20, null}
 !31 = metadata !{i32 11, i32 5, metadata !20, null}
 !32 = metadata !{i32 12, i32 3, metadata !10, null}
-!33 = metadata !{i32 524545, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26} ; [ DW_TAG_arg_variable ]
+!33 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
 !34 = metadata !{i32 7, i32 11, metadata !23, null}
 !35 = metadata !{i32 7, i32 19, metadata !36, null}
-!36 = metadata !{i32 524299, metadata !23, i32 7, i32 17} ; [ DW_TAG_lexical_block ]
+!36 = metadata !{i32 786443, metadata !23, i32 7, i32 17} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index 30031219d4ea..f9e90cd1b3d1 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -19,12 +19,15 @@ return:                                           ; preds = %entry
   ret i32 %retval1, !dbg !7
 }
 
+!llvm.dbg.cu = !{!3}
+!9 = metadata !{metadata !1}
+
 !0 = metadata !{i32 2, i32 0, metadata !1, null}
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 2, i32 0, metadata !8, null}
-!8 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 94bddc092f4a..1ade04504631 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -81,6 +81,6 @@ declare void @uuid_LtoB(i8*, i8*)
 !30 = metadata !{i32 524310, metadata !3, metadata !"uint32_t", metadata !12, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
 !31 = metadata !{i32 524324, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !32 = metadata !{metadata !33}
-!33 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
+!33 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
 !34 = metadata !{i32 524544, metadata !24, metadata !"addr", metadata !10, i32 96, metadata !35} ; [ DW_TAG_auto_variable ]
 !35 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index 721b70839fef..75e4389afef8 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -26,19 +26,23 @@ return:
   ret i32 21, !dbg !8
 }
 
+!llvm.dbg.cu = !{!4, !12}
+!16 = metadata !{metadata !2}
+!17 = metadata !{metadata !10}
+
 !0 = metadata !{i32 3, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!1 = metadata !{i32 786443, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 786478, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 786449, i32 1, metadata !3, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !16, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !6 = metadata !{metadata !7}
-!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 3, i32 0, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 524334, i32 0, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ]
-!12 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!13 = metadata !{i32 524309, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786443, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786478, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786473, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 786449, i32 1, metadata !11, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!13 = metadata !{i32 786453, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{metadata !15}
-!15 = metadata !{i32 524324, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!15 = metadata !{i32 786468, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 2557c9c63dea..e44362d4129e 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -21,34 +21,33 @@ entry:
   ret i32 %1, !dbg !23
 }
 
-!llvm.dbg.sp = !{!0, !6}
-!llvm.dbg.lv.foo = !{!9, !10}
-!llvm.dbg.gv = !{!16}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"bar.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !5}
-!9 = metadata !{i32 524545, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 524544, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 524299, metadata !0, i32 9, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 524307, metadata !0, metadata !"X", metadata !1, i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786689, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !1, metadata !0, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
 !13 = metadata !{metadata !14, metadata !15}
-!14 = metadata !{i32 524301, metadata !12, metadata !"a", metadata !1, i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!15 = metadata !{i32 524301, metadata !12, metadata !"b", metadata !1, i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
-!16 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"a", i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!15 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"b", i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
 !17 = metadata !{i32 15, i32 0, metadata !18, null}
-!18 = metadata !{i32 524299, metadata !6, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786443, metadata !1, metadata !6, i32 14, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
 !19 = metadata !{i32 9, i32 0, metadata !0, metadata !17}
 !20 = metadata !{null}
 !21 = metadata !{i32 9, i32 0, metadata !11, metadata !17}
 !22 = metadata !{i32 11, i32 0, metadata !11, metadata !17}
 !23 = metadata !{i32 16, i32 0, metadata !18, null}
-!24 = metadata !{metadata !25}
-!25 = metadata !{metadata !9, metadata !10}
-
+!24 = metadata !{metadata !9, metadata !10}
+!25 = metadata !{metadata !0, metadata !6}
+!26 = metadata !{metadata !16}
+!27 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
index e61f63f40d8f..c4161b49426d 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 %s -o /dev/null
+; XFAIL: hexagon
 ; PR 8235
 
 define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
diff --git a/test/DebugInfo/AArch64/cfi-frame.ll b/test/DebugInfo/AArch64/cfi-frame.ll
new file mode 100644
index 000000000000..7290ddf357c1
--- /dev/null
+++ b/test/DebugInfo/AArch64/cfi-frame.ll
@@ -0,0 +1,58 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-WITH-FP
+
+@bigspace = global [8 x i64] zeroinitializer
+
+declare void @use_addr(i8*)
+
+define void @test_frame([8 x i64] %val) {
+; CHECK: test_frame:
+; CHECK: .cfi_startproc
+
+  %var = alloca i8, i32 1000000
+; CHECK: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
+
+; Make sure the prologue is reasonably efficient
+; CHECK-NEXT: stp x29, x30, [sp,
+; CHECK-NEXT: stp x25, x26, [sp,
+; CHECK-NEXT: stp x23, x24, [sp,
+; CHECK-NEXT: stp x21, x22, [sp,
+; CHECK-NEXT: stp x19, x20, [sp,
+; CHECK-NEXT: sub sp, sp, #160
+; CHECK-NEXT: sub sp, sp, #244, lsl #12
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_def_cfa sp, 1000080
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_offset x30, -8
+; CHECK-NEXT: .Ltmp
+; CHECK-NEXT: .cfi_offset x29, -16
+; [...]
+; CHECK: .cfi_offset x19, -80
+
+; CHECK: bl use_addr
+  call void @use_addr(i8* %var)
+
+  store [8 x i64] %val, [8 x i64]* @bigspace
+  ret void
+; CHECK: ret
+; CHECK: .cfi_endproc
+}
+
+; CHECK-WITH-FP: test_frame:
+
+; CHECK-WITH-FP: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]]
+; CHECK-WITH-FP-NEXT: .Ltmp
+; CHECK-WITH-FP-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]]
+
+; CHECK-WITH-FP: stp x29, x30, [sp, [[OFFSET:#[0-9]+]]]
+; CHECK-WITH-FP-NEXT: add x29, sp, [[OFFSET]]
+; CHECK-WITH-FP-NEXT: .Ltmp
+; CHECK-WITH-FP-NEXT: .cfi_def_cfa x29, 16
+
+  ; We shouldn't emit any kind of update for the second stack adjustment if the
+  ; FP is in use.
+; CHECK-WITH-FP-NOT: .cfi_def_cfa_offset
+
+; CHECK-WITH-FP: bl use_addr
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
new file mode 100644
index 000000000000..673c789fe62c
--- /dev/null
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=aarch64-non-linux-gnu < %s -filetype=obj | llvm-dwarfdump - | FileCheck %s
+
+; We're mostly checking that relocations are applied correctly
+; here. Currently R_AARCH64_ABS32 is used for references to debug data
+; and R_AARCH64_ABS64 is used for program addresses.
+
+; A couple of ABS32s, both at 0 and elsewhere, interpreted correctly:
+
+; CHECK: DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = "clang version 3.3 ")
+; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000013] = "tmp.c")
+
+; A couple of ABS64s similarly:
+
+; CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
+; CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000008)
+
+define i32 @main() nounwind {
+  ret i32 0, !dbg !8
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!4 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{i32 2, i32 0, metadata !3, null}
+!9 = metadata !{metadata !"tmp.c", metadata !"/home/tim/llvm/build"}
diff --git a/test/DebugInfo/AArch64/eh_frame.ll b/test/DebugInfo/AArch64/eh_frame.ll
new file mode 100644
index 000000000000..2539c56fa123
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame.ll
@@ -0,0 +1,51 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu %s -filetype=obj -o %t
+; RUN: llvm-objdump -s %t | FileCheck %s
+@var = global i32 0
+
+declare void @bar()
+
+define i64 @check_largest_class(i32 %in)  {
+  %res = load i32* @var
+  call void @bar()
+  %ext = zext i32 %res to i64
+  ret i64 %ext
+}
+
+; The really key points we're checking here are:
+;  * Return register is x30.
+;  * Pointer format is 0x1b (GNU doesn't appear to understand others).
+
+; The rest is largely incidental, but not expected to change regularly.
+
+; Output is:
+
+; CHECK: Contents of section .eh_frame:
+; CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+; CHECK-NEXT: 0010 1b0c1f00 18000000 18000000 00000000  ................
+
+
+; Won't check the rest, it's rather incidental.
+; 0020 24000000 00440c1f 10449e02 93040000  $....D...D......
+
+
+; The first CIE:
+; -------------------
+; 10000000: length of first CIE = 0x10
+; 00000000: This is a CIE
+; 01: version = 0x1
+; 7a 52 00: augmentation string "zR" -- pointer format is specified
+; 01: code alignment factor 1
+; 7c: data alignment factor -4
+; 1e: return address register 30 (== x30).
+; 01: 1 byte of augmentation
+; 1b: pointer format 1b: DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; 0c 1f 00: initial instructions: "DW_CFA_def_cfa x31 ofs 0" in this case
+
+; Next the FDE:
+; -------------
+; 18000000: FDE length 0x18
+; 18000000: Uses CIE 0x18 backwards (only coincidentally same as above)
+; 00000000: PC begin for this FDE is at 00000000 (relocation is applied here)
+; 24000000: FDE applies up to PC begin+0x24
+; 00: Augmentation string length 0 for this FDE
+; Rest: call frame instructions
diff --git a/test/DebugInfo/AArch64/eh_frame_personality.ll b/test/DebugInfo/AArch64/eh_frame_personality.ll
new file mode 100644
index 000000000000..d35f2a2fcafb
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame_personality.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu %s -filetype=obj -o %t
+; RUN: llvm-objdump -s %t | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @bar()
+
+define i64 @foo(i64 %lhs, i64 %rhs) {
+  invoke void @bar() to label %end unwind label %clean
+end:
+ ret i64 0
+
+clean:
+  %tst = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup
+  ret i64 42
+}
+
+; CHECK: Contents of section .eh_frame:
+; CHECK: 0000 1c000000 00000000 017a504c 5200017c  .........zPLR..|
+; CHECK: 0010 1e0b0000 00000000 00000000 1b0c1f00  ................
+
+; Don't really care about the rest:
+
+; 0020 1c000000 24000000 00000000 24000000  ....$.......$...
+; 0030 08000000 00000000 00440c1f 10449e02  .........D...D..
+
+; The key test here is that the personality routine is sanely encoded (under the
+; small memory model it must be an 8-byte value for full generality: code+data <
+; 4GB, but you might need both +4GB and -4GB depending on where things end
+; up. However, for completeness:
+
+; First CIE:
+; ----------
+; 1c000000: Length = 0x1c
+; 00000000: This is a CIE
+; 01: Version 1
+; 7a 50 4c 52 00: Augmentation string "zPLR" (personality routine, language-specific data, pointer format)
+; 01: Code alignment factor 1
+; 78: Data alignment factor: -8
+; 1e: Return address in x30
+; 07: Augmentation data 0xb bytes (this is key!)
+; 00: Personality encoding is DW_EH_PE_absptr
+; 00 00 00 00 00 00 00 00: First part of aug (personality routine). Relocated, obviously
+; 00: Second part of aug (language-specific data): absolute pointer format used
+; 1b: pointer format: pc-relative signed 4-byte. Just like GNU.
+; 0c 1f 00: Initial instructions ("DW_CFA_def_cfa x31 ofs 0" in this case)
diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..c5ce2411ed48
--- /dev/null
+++ b/test/DebugInfo/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
new file mode 100644
index 000000000000..ac3037e04b4b
--- /dev/null
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+
+; This is a regression test making sure the location of variables is correct in
+; debugging information, even if they're addressed via the frame pointer.
+
+; In case it needs, regenerating, the following suffices:
+; int printf(const char *, ...);
+; void populate_array(int *, int);
+; int sum_array(int *, int);
+
+; int main() {
+;     int main_arr[100], val;
+;     populate_array(main_arr, 100);
+;     val = sum_array(main_arr, 100);
+;     printf("Total is %d\n", val);
+;     return 0;
+; }
+
+  ; First make sure main_arr is where we expect it: sp + 12 == x29 - 420:
+; CHECK: main:
+; CHECK: sub sp, sp, #448
+; CHECK: stp x29, x30, [sp, #432]
+; CHECK: add x29, sp, #432
+; CHECK: add {{x[0-9]+}}, sp, #12
+
+  ; Now check the debugging information reflects this:
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: .word .Linfo_string7
+
+  ; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is LEB128 encoded -420.
+; CHECK: DW_AT_location
+; CHECK-NEXT: .byte 145
+; CHECK-NEXT: .ascii "\334|"
+
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+
+
+target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnu"
+
+@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 1
+
+declare void @populate_array(i32*, i32) nounwind
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @sum_array(i32*, i32) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %main_arr = alloca [100 x i32], align 4
+  %val = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22
+  call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24
+  %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25
+  call void @populate_array(i32* %arraydecay, i32 100), !dbg !25
+  %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
+  %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
+  store i32 %call, i32* %val, align 4, !dbg !26
+  %0 = load i32* %val, align 4, !dbg !27
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
+  ret i32 0, !dbg !28
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !11, metadata !14}
+!5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786478, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{metadata !10, metadata !9, metadata !10}
+!14 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !10}
+!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
+!18 = metadata !{i32 786443, metadata !6, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
+!22 = metadata !{i32 19, i32 7, metadata !18, null}
+!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
+!24 = metadata !{i32 20, i32 7, metadata !18, null}
+!25 = metadata !{i32 22, i32 3, metadata !18, null}
+!26 = metadata !{i32 23, i32 9, metadata !18, null}
+!27 = metadata !{i32 24, i32 3, metadata !18, null}
+!28 = metadata !{i32 26, i32 3, metadata !18, null}
+!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.cc b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
new file mode 100644
index 000000000000..8ffbb528f2a9
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.cc
@@ -0,0 +1,15 @@
+#include "dwarfdump-inl-test.h"
+static inline int inlined_f() {
+  volatile int x = inlined_g();
+  return x;
+}
+
+int main() {
+  return inlined_f();
+}
+
+// Built with Clang 3.2
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-inl-test.* /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ clang++ -O2 -gline-tables-only -fsanitize=address -fPIC -shared dwarfdump-inl-test.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
index 9a1d5383caac..6df03dad95a7 100755
--- a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.h b/test/DebugInfo/Inputs/dwarfdump-inl-test.h
new file mode 100644
index 000000000000..ecc2aaac0994
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.h
@@ -0,0 +1,9 @@
+inline int inlined_h() {
+  volatile int z = 0;
+  return z;
+}
+
+inline int inlined_g() {
+  volatile int y = inlined_h();
+  return y;
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-pubnames.cc b/test/DebugInfo/Inputs/dwarfdump-pubnames.cc
new file mode 100644
index 000000000000..284755bd940f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-pubnames.cc
@@ -0,0 +1,32 @@
+// Object file built using:
+// clang -g -mllvm -generate-dwarf-pubnames -o dwarfdump-pubnames.elf-x86_64 \
+//    dwarfdump-pubnames.cc  -c
+
+struct C {
+  void member_function();
+  static int static_member_function();
+  static int static_member_variable;
+};
+
+int C::static_member_variable = 0;
+
+void C::member_function() {
+  static_member_variable = 0;
+}
+
+int C::static_member_function() {
+  return static_member_variable;
+}
+
+C global_variable;
+
+int global_function() {
+  return -1;
+}
+
+namespace ns {
+  void global_namespace_function() {
+    global_variable.member_function();
+  }
+  int global_namespace_variable = 1;
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64
new file mode 100644
index 000000000000..3c9c1ad56b38
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-pubnames.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c
new file mode 100644
index 000000000000..708e037f4e31
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.c
@@ -0,0 +1,14 @@
+// clang -c -g -o dwarfdump-test-32bit.elf.o -m32 dwarfdump-test-32bit.elf.c
+
+extern int glob;
+
+int foo(int arg) {
+  int a = arg * 2;
+  return a + glob;
+}
+
+int bar(int arg) {
+  int a = foo(arg) * foo(arg * 2);
+  return glob - foo(a);
+}
+
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o
new file mode 100644
index 000000000000..817665e6a708
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-32bit.elf.o
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.cc b/test/DebugInfo/Inputs/dwarfdump-test.cc
new file mode 100644
index 000000000000..408999864907
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test.cc
@@ -0,0 +1,23 @@
+class DummyClass {
+  int a_;
+ public:
+  DummyClass(int a) : a_(a) {}
+  int add(int b) {
+    return a_ + b;
+  }
+};
+
+int f(int a, int b) {
+  DummyClass c(a);
+  return c.add(b);
+}
+
+int main() {
+  return f(2, 3);
+}
+
+// Built with Clang 3.2:
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test.cc /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ clang++ -g dwarfdump-test.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
index fe20c8e59667..455dd1ce4fc2 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2-helper.cc b/test/DebugInfo/Inputs/dwarfdump-test2-helper.cc
new file mode 100644
index 000000000000..7d9264050bd9
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test2-helper.cc
@@ -0,0 +1,3 @@
+extern "C" int a() {
+  return 0;
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2-main.cc b/test/DebugInfo/Inputs/dwarfdump-test2-main.cc
new file mode 100644
index 000000000000..b327674ab792
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test2-main.cc
@@ -0,0 +1,11 @@
+extern "C" int a();
+
+int main() {
+  return a();
+}
+
+// Built with gcc 4.6.3
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test2-helper.cc dwarfdump-test2-main.cc /tmp/dbginfo/
+// $ cd /tmp/dbginfo
+// $ g++ -g dwarfdump-test2-helper.cc dwarfdump-test2-main.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
index ce4af7fd06bc..6f362ad62cf1 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3-decl.h b/test/DebugInfo/Inputs/dwarfdump-test3-decl.h
new file mode 100644
index 000000000000..4a79e959b0ce
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3-decl.h
@@ -0,0 +1,7 @@
+#include "dwarfdump-test3-decl2.h"
+
+class C {
+  explicit C(bool a = false, bool b = false);
+};
+
+void do1() {}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3-decl2.h b/test/DebugInfo/Inputs/dwarfdump-test3-decl2.h
new file mode 100644
index 000000000000..9c92d56fcf4e
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3-decl2.h
@@ -0,0 +1 @@
+void do2() { }
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.cc b/test/DebugInfo/Inputs/dwarfdump-test3.cc
new file mode 100644
index 000000000000..7b4d7ea71180
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3.cc
@@ -0,0 +1,12 @@
+#include "dwarfdump-test3-decl.h"
+
+C::C(bool a, bool b) {}
+
+// Built with gcc 4.6.3
+// $ mkdir -p /tmp/dbginfo/include
+// $ mkdir -p /tmp/include
+// $ cp dwarfdump-test3.cc /tmp/dbginfo
+// $ cp dwarfdump-test3-decl.h /tmp/include
+// $ cp dwarfdump-test3-decl2.h /tmp/dbginfo/include
+// $ cd /tmp/dbginfo
+// $ gcc dwarfdump-test3.cc -g -I/tmp/include -Iinclude -fPIC -shared -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
deleted file mode 100755
index 7c1730462055..000000000000
--- a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64
+++ /dev/null
diff --git a/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 space b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 space
new file mode 100755
index 000000000000..7330cd8baa1e
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test3.elf-x86-64 space
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4-decl.h b/test/DebugInfo/Inputs/dwarfdump-test4-decl.h
new file mode 100644
index 000000000000..9abd875415d6
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4-decl.h
@@ -0,0 +1 @@
+inline void a(){}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4-part1.cc b/test/DebugInfo/Inputs/dwarfdump-test4-part1.cc
new file mode 100644
index 000000000000..94a818cddd5f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4-part1.cc
@@ -0,0 +1,8 @@
+#include "dwarfdump-test4-decl.h"
+int c(){a();}
+
+// Built with gcc 4.6.3
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test4-*.* /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ gcc -fPIC -shared -g dwarfdump-test4-part*.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4-part2.cc b/test/DebugInfo/Inputs/dwarfdump-test4-part2.cc
new file mode 100644
index 000000000000..2a1936f0b3d4
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4-part2.cc
@@ -0,0 +1,2 @@
+#include "dwarfdump-test4-decl.h"
+int d(){a();}
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
index 884870859d98..a1dd8b91baa2 100755
--- a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
diff --git a/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg b/test/DebugInfo/Inputs/lit.local.cfg
index e6f55eef7af5..e6f55eef7af5 100644
--- a/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
+++ b/test/DebugInfo/Inputs/lit.local.cfg
diff --git a/test/DebugInfo/Inputs/test-inline.o b/test/DebugInfo/Inputs/test-inline.o
new file mode 100644
index 000000000000..a650c91725d9
--- /dev/null
+++ b/test/DebugInfo/Inputs/test-inline.o
diff --git a/test/DebugInfo/Inputs/test-parameters.o b/test/DebugInfo/Inputs/test-parameters.o
new file mode 100644
index 000000000000..7f4b6702df2e
--- /dev/null
+++ b/test/DebugInfo/Inputs/test-parameters.o
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 559f032cb3a6..5169647fa41d 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -28,20 +28,24 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7, metadata !9}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 524307, metadata !2, metadata !"X", metadata !2, i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
-!9 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524307, metadata !2, metadata !"Y", metadata !2, i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!12 = metadata !{i32 786445, metadata !18, metadata !10, metadata !"x", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !13 = metadata !{i32 7, i32 0, metadata !1, null}
-!14 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !15 = metadata !{i32 7, i32 0, metadata !16, null}
-!16 = metadata !{i32 524299, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 786443, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{metadata !1}
+!18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
new file mode 100644
index 000000000000..d05dfc6c32be
--- /dev/null
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -0,0 +1,28 @@
+; RUN: llc  -mtriple=i686-linux -O0 -filetype=obj -o %t %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_constant [4]
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000002c] = "ro")
+
+define void @foo() nounwind ssp {
+entry:
+  call void @bar(i32 201), !dbg !8
+  ret void, !dbg !8
+}
+
+declare void @bar(i32)
+
+!llvm.dbg.cu = !{!2}
+
+!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ]
+!6 = metadata !{i32 786470, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
+!7 = metadata !{i32 786468, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 14, metadata !9, null}
+!9 = metadata !{i32 786443, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{metadata !0}
+!11 = metadata !{metadata !5}
+!12 = metadata !{metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D"}
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 934fa81435ad..ad55db05a70e 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=x86_64-pc-linux-gnu -asm-verbose %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; ModuleID = 'test.c'
 
@@ -17,31 +18,32 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 721152, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 720907, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !14}
+!14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB, null} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 786688, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 9, i32 0} ; [ DW_TAG_lexical_block ]
 !17 = metadata !{i32 4, i32 9, metadata !16, null}
 !18 = metadata !{i32 4, i32 23, metadata !16, null}
 !19 = metadata !{i32 5, i32 5, metadata !16, null}
+!20 = metadata !{metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo"}
 
-; CHECK: .long .Lstring3
-; CHECK: .byte	1
-; CHECK: .byte	1
+; CHECK: DW_TAG_variable [3]
+; CHECK: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x00000043] = "GLB")
+; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
+
+; CHECK: DW_TAG_variable [6]
+; CHECK: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x0000004d] = "LOC")
+; CHECK: DW_AT_decl_file [DW_FORM_data1]     (0x01)
+; CHECK: DW_AT_decl_line [DW_FORM_data1]     (0x04)
 
-; CHECK: .long .Lstring6
-; CHECK: .byte	1
-; CHECK: .byte	4
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 6e201695636e..e248aa60295e 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: b_ref
 ; CHECK-NOT: AT_bit_size
@@ -88,53 +88,50 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", metadata !"clang version 3.1 (trunk 146596)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !9}
-!5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
-!6 = metadata !{i32 720937, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 720913, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !9}
+!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!6 = metadata !{i32 720937, metadata !82} ; [ DW_TAG_file_type ]
 !7 = metadata !{metadata !8, metadata !19, metadata !21}
-!8 = metadata !{i32 720909, metadata !5, metadata !"b", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!9 = metadata !{i32 720898, null, metadata !"baz", metadata !6, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!8 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
 !10 = metadata !{metadata !11, metadata !13}
-!11 = metadata !{i32 720909, metadata !9, metadata !"h", metadata !6, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!13 = metadata !{i32 720942, i32 0, metadata !9, metadata !"baz", metadata !"baz", metadata !"", metadata !6, i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 720942, metadata !6, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
 !14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !15 = metadata !{null, metadata !16, metadata !12}
 !16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
 !17 = metadata !{metadata !18}
 !18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!19 = metadata !{i32 720909, metadata !5, metadata !"b_ref", metadata !6, i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
 !20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!21 = metadata !{i32 720942, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 720942, metadata !6, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
 !22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !23 = metadata !{null, metadata !24, metadata !12}
 !24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!27 = metadata !{metadata !28}
-!28 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
-!29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
+!27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
+!29 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
 !30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !31 = metadata !{metadata !12, metadata !12, metadata !32}
-!32 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !35 = metadata !{metadata !36}
 !36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
 !38 = metadata !{metadata !39}
 !39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!40 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
+!40 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
 !41 = metadata !{metadata !42}
 !42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!43 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
 !44 = metadata !{metadata !45}
 !45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!46 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
+!46 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
 !47 = metadata !{metadata !48}
 !48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
@@ -170,3 +167,4 @@ entry:
 !79 = metadata !{i32 6, i32 23, metadata !46, null}
 !80 = metadata !{i32 6, i32 24, metadata !81, null}
 !81 = metadata !{i32 720907, metadata !46, i32 6, i32 23, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!82 = metadata !{metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref"}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index 25b5f00c6af6..84e3f630976a 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
 
 ; Checks that we don't emit a size for a pointer type.
 ; CHECK: DW_TAG_pointer_type
@@ -24,23 +24,22 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 150996)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786445, metadata !11, metadata !"b", metadata !6, i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !17 = metadata !{i32 3, i32 13, metadata !5, null}
 !18 = metadata !{i32 4, i32 3, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !5, i32 3, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 3be9abaffcda..356360b09834 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -85,17 +85,16 @@ declare i32 @g(i32, i32)
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.f = !{!5}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !"clang version 3.0 (trunk)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
-!5 = metadata !{i32 590080, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 589835, metadata !0, i32 4, i32 14, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 786443, metadata !1, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 6, i32 3, metadata !6, null}
 !9 = metadata !{metadata !"int", metadata !10}
 !10 = metadata !{metadata !"omnipotent char", metadata !11}
@@ -109,3 +108,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !18 = metadata !{i32 11, i32 3, metadata !6, null}
 !19 = metadata !{i32 12, i32 3, metadata !6, null}
 !20 = metadata !{i32 13, i32 1, metadata !6, null}
+!21 = metadata !{metadata !0}
+!22 = metadata !{metadata !5}
+!23 = metadata !{metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation"}
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 163a1e7cec73..a3ad26cf82bc 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -1,20 +1,25 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
-; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x00bf => {0x000000bf})
-; CHECK: 0x000000bf:     DW_TAG_formal_parameter [12]
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000085] = "this")
+; CHECK: DW_TAG_formal_parameter [
+; CHECK: DW_TAG_class_type
+; CHECK: DW_AT_object_pointer [DW_FORM_ref4]     (cu + 0x00fd => {0x000000fd})
+; CHECK: 0x000000fd:     DW_TAG_formal_parameter [13]
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000086] = "this")
 
 %class.A = type { i32 }
 
-define i32 @_Z3foov() nounwind uwtable ssp {
+define i32 @_Z3fooi(i32) nounwind uwtable ssp {
 entry:
+  %.addr = alloca i32, align 4
   %a = alloca %class.A, align 4
+  store i32 %0, i32* %.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %.addr}, metadata !36), !dbg !35
   call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !21), !dbg !23
   call void @_ZN1AC1Ev(%class.A* %a), !dbg !24
   %m_a = getelementptr inbounds %class.A* %a, i32 0, i32 0, !dbg !25
-  %0 = load i32* %m_a, align 4, !dbg !25
-  ret i32 %0, !dbg !25
+  %1 = load i32* %m_a, align 4, !dbg !25
+  ret i32 %1, !dbg !25
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
@@ -42,38 +47,39 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests", metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !10, metadata !20}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
-!6 = metadata !{i32 786473, metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10, metadata !20}
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
+!6 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, i32 0, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", metadata !6, i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
 !11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !12 = metadata !{null, metadata !13}
 !13 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!14 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ]
+!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ]
 !15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !14, metadata !"m_a", metadata !6, i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
-!17 = metadata !{i32 786478, i32 0, metadata !14, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
+!16 = metadata !{i32 786445, metadata !37, metadata !14, metadata !"m_a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
+!17 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{i32 786478, i32 0, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", metadata !6, i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!20 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
 !21 = metadata !{i32 786688, metadata !22, metadata !"a", metadata !6, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 8]
-!22 = metadata !{i32 786443, metadata !5, i32 7, i32 11, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
+!22 = metadata !{i32 786443, metadata !6, metadata !5, i32 7, i32 11, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
 !23 = metadata !{i32 8, i32 5, metadata !22, null}
 !24 = metadata !{i32 8, i32 6, metadata !22, null}
 !25 = metadata !{i32 9, i32 3, metadata !22, null}
 !26 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3]
-!27 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
 !28 = metadata !{i32 3, i32 3, metadata !10, null}
 !29 = metadata !{i32 3, i32 18, metadata !10, null}
 !30 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3]
 !31 = metadata !{i32 3, i32 3, metadata !20, null}
 !32 = metadata !{i32 3, i32 9, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !20, i32 3, i32 7, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
+!33 = metadata !{i32 786443, metadata !6, metadata !20, i32 3, i32 7, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
 !34 = metadata !{i32 3, i32 18, metadata !33, null}
+!35 = metadata !{i32 7, i32 0, metadata !5, null}
+!36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7]
+!37 = metadata !{metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests"}
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 078b740a4170..07849f352268 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; test that the DW_AT_specification is a back edge in the file.
 
@@ -16,28 +16,26 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"<unknown>", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 720915, null, metadata !"foo", metadata !6, i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!11 = metadata !{i32 720942, i32 0, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
 !13 = metadata !{metadata !11}
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!18 = metadata !{metadata !19}
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x} ; [ DW_TAG_variable ]
-!21 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
-!22 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !20}
+!20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x, null} ; [ DW_TAG_variable ]
+!21 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
+!22 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !25 = metadata !{i32 6, i32 1, metadata !26, null}
-!26 = metadata !{i32 720907, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786443, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index a0dcec32e691..f60175fb69aa 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Check that the friend tag is there and is followed by a DW_AT_friend that has a reference back.
 
@@ -17,31 +17,30 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !17}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
+!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !17}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
 !8 = metadata !{metadata !9, metadata !11}
-!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 786445, metadata !28, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ]
 !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !13 = metadata !{null, metadata !14}
 !14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{metadata !16}
 !16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 786434, null, metadata !"B", metadata !6, i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
+!17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b, null} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
 !19 = metadata !{metadata !20, metadata !21, metadata !27}
-!20 = metadata !{i32 786445, metadata !18, metadata !"b", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
-!21 = metadata !{i32 786478, i32 0, metadata !18, metadata !"B", metadata !"B", metadata !"", metadata !6, i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 786445, metadata !28, metadata !18, metadata !"b", i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!21 = metadata !{i32 786478, metadata !6, metadata !18, metadata !"B", metadata !"B", metadata !"", i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ]
 !22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !23 = metadata !{null, metadata !24}
 !24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ]
+!28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 9e6c7ff813af..a8f6cca750c0 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -mtriple=x86_64-pc-linux-gnu -O0 -filetype=obj -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; If stack is realigned, we shouldn't describe locations of local
 ; variables by giving offset from the frame pointer (%rbp):
@@ -26,17 +26,15 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
 !9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
 !11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !12 = metadata !{i32 2, i32 7, metadata !10, null}
 !13 = metadata !{i32 3, i32 1, metadata !10, null}
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index 4953c421cd32..fadea775aadf 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Checks that we emit debug info for the block variable declare.
 ; CHECK: 0x00000030:   DW_TAG_subprogram [3]
@@ -62,57 +62,55 @@ declare i32 @__objc_personality_v0(...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35, !36, !37, !38}
 
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 151227)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786454, null, metadata !"dispatch_block_t", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_generic", metadata !6, i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 786454, metadata !63, null, metadata !"dispatch_block_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
-!13 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
-!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
-!18 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
-!19 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
-!20 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786451, metadata !6, metadata !"__block_descriptor", metadata !6, i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!19 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !22 = metadata !{metadata !23, metadata !25}
-!23 = metadata !{i32 786445, metadata !6, metadata !"reserved", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
-!24 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 786445, metadata !6, metadata !"Size", metadata !6, i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
+!23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
 !26 = metadata !{metadata !27}
 !27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!28 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", metadata !6, i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !30 = metadata !{null, metadata !14}
-!31 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", metadata !6, i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
 !32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !33 = metadata !{null, metadata !14, metadata !14}
-!34 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", metadata !6, i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ]
 !35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
 !39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!40 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
-!41 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_1", metadata !6, i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
+!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
-!43 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
-!44 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
-!45 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
-!46 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
-!47 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
-!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
-!49 = metadata !{i32 786451, null, metadata !"__block_descriptor_withcopydispose", metadata !6, i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
-!50 = metadata !{i32 786445, metadata !6, metadata !"block", metadata !6, i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
+!43 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!44 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!47 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
+!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
 !51 = metadata !{i32 7, i32 18, metadata !28, null}
 !52 = metadata !{i32 7, i32 19, metadata !28, null}
 !53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ]
@@ -125,3 +123,4 @@ declare i32 @__objc_personality_v0(...)
 !60 = metadata !{i32 786443, metadata !57, i32 9, i32 35, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
 !61 = metadata !{i32 10, i32 21, metadata !28, null}
 !62 = metadata !{i32 9, i32 20, metadata !56, null}
+!63 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 58fb05573670..48e1defd4c95 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-linux %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; test that we add DW_AT_inline even when we only have concrete out of line
 ; instances.
@@ -34,62 +34,55 @@ declare void @_Z8moz_freePv(i8*)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"nsAutoRefCnt.cpp", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", metadata !"clang version 3.1 ()", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
-!5 = metadata !{i32 720942, i32 0, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
+!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 720915, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!12 = metadata !{i32 720942, i32 0, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
 !14 = metadata !{metadata !12, metadata !15}
-!15 = metadata !{i32 720942, i32 0, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", metadata !6, i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
 !16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !17 = metadata !{null, metadata !10}
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{metadata !21}
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 721153, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24} ; [ DW_TAG_subprogram ]
-!24 = metadata !{metadata !25}
-!25 = metadata !{metadata !26}
-!26 = metadata !{i32 721153, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28} ; [ DW_TAG_subprogram ]
-!28 = metadata !{metadata !29}
-!29 = metadata !{metadata !30}
-!30 = metadata !{i32 721153, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 720942, i32 0, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{metadata !22}
+!22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ]
+!24 = metadata !{metadata !26}
+!26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ]
+!28 = metadata !{metadata !30}
+!30 = metadata !{i32 786689, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ]
 !32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !33 = metadata !{metadata !9, metadata !34, metadata !9}
-!34 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
-!35 = metadata !{i32 720915, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
-!36 = metadata !{i32 720942, i32 0, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
+!34 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
+!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!36 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
 !38 = metadata !{metadata !39, metadata !40, metadata !36}
-!39 = metadata !{i32 720909, metadata !37, metadata !"mValue", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!40 = metadata !{i32 720942, i32 0, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", metadata !6, i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!39 = metadata !{i32 786445, metadata !59, metadata !37, metadata !"mValue", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!40 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ]
 !41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !42 = metadata !{null, metadata !34}
-!43 = metadata !{metadata !44}
-!44 = metadata !{metadata !45, metadata !46}
-!45 = metadata !{i32 721153, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!46 = metadata !{i32 721153, metadata !31, metadata !"aValue", metadata !6, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!47 = metadata !{metadata !48}
-!48 = metadata !{metadata !49}
-!49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null} ; [ DW_TAG_variable ]
+!43 = metadata !{metadata !45, metadata !46}
+!45 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!46 = metadata !{i32 786689, metadata !31, metadata !"aValue", metadata !6, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!47 = metadata !{metadata !49}
+!49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null, null} ; [ DW_TAG_variable ]
 !50 = metadata !{i32 5, i32 5, metadata !51, metadata !52}
-!51 = metadata !{i32 720907, metadata !31, i32 4, i32 29, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!51 = metadata !{i32 786443, metadata !6, metadata !31, i32 4, i32 29, i32 2} ; [ DW_TAG_lexical_block ]
 !52 = metadata !{i32 15, i32 0, metadata !53, null}
-!53 = metadata !{i32 720907, metadata !5, i32 14, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!53 = metadata !{i32 786443, metadata !6, metadata !5, i32 14, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
 !54 = metadata !{i32 19, i32 3, metadata !55, metadata !56}
-!55 = metadata !{i32 720907, metadata !27, i32 18, i32 41, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!55 = metadata !{i32 786443, metadata !6, metadata !27, i32 18, i32 41, i32 1} ; [ DW_TAG_lexical_block ]
 !56 = metadata !{i32 18, i32 41, metadata !23, metadata !52}
 !57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
 !58 = metadata !{i32 18, i32 41, metadata !23, null}
+!59 = metadata !{metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src"}
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
new file mode 100644
index 000000000000..e7a554ff868d
--- /dev/null
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+;CHECK: DW_TAG_inlined_subroutine [12]
+;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_low_pc
+;CHECK-NEXT: DW_AT_high_pc
+;CHECK-NEXT: DW_AT_call_file
+;CHECK-NEXT: DW_AT_call_line
+
+;CHECK: DW_TAG_formal_parameter [9]
+;CHECK-NEXT: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000055] = "sp")
+
+%struct.S1 = type { float*, i32 }
+
+@p = common global %struct.S1 zeroinitializer, align 8
+
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
+  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
+  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
+  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+  %cmp = icmp ne float* %call, null, !dbg !29
+  %cond = zext i1 %cmp to i32, !dbg !29
+  ret i32 %cond, !dbg !29
+}
+
+declare float* @bar(i32) optsize
+
+define void @foobar() nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
+  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
+  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+  ret void, !dbg !38
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+
+!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !17}
+!14 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"m", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"nums", i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 786484, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p, null} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 7, i32 13, metadata !0, null}
+!21 = metadata !{i32 7, i32 21, metadata !0, null}
+!22 = metadata !{i32 9, i32 3, metadata !23, null}
+!23 = metadata !{i32 786443, metadata !1, metadata !0, i32 8, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !"int", metadata !25}
+!25 = metadata !{metadata !"omnipotent char", metadata !26}
+!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!27 = metadata !{i32 10, i32 3, metadata !23, null}
+!28 = metadata !{metadata !"any pointer", metadata !25}
+!29 = metadata !{i32 11, i32 3, metadata !23, null}
+!30 = metadata !{%struct.S1* @p}
+!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
+!32 = metadata !{i32 16, i32 3, metadata !33, null}
+!33 = metadata !{i32 786443, metadata !1, metadata !6, i32 15, i32 15, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 1}
+!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
+!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
+!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
+!38 = metadata !{i32 17, i32 1, metadata !33, null}
+!39 = metadata !{metadata !0, metadata !6}
+!40 = metadata !{metadata !19}
+!41 = metadata !{metadata !9, metadata !18}
+!42 = metadata !{metadata !"nm2.c", metadata !"/private/tmp"}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
new file mode 100644
index 000000000000..77e02c62aada
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -0,0 +1,106 @@
+; RUN: llc -mtriple x86_64-apple-darwin -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump %t.o | FileCheck %s
+;
+; Test that DW_AT_location is generated for a captured "self" inside a
+; block.
+;
+; This test is split into two parts, the frontend part can be found at
+; llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m
+;
+; CHECK:      {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}}
+; CHECK:      DW_TAG_variable
+; CHECK:      {{.*}}DW_AT_name{{.*}}"self"{{.*}}
+; CHECK-NOT:  DW_TAG
+; CHECK:      DW_AT_location
+;
+; CHECK:      {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}}
+; CHECK:      DW_TAG_variable
+; CHECK:      {{.*}}DW_AT_name{{.*}}"self"{{.*}}
+; CHECK-NOT:  DW_TAG
+; CHECK:      DW_AT_location
+;
+; Generated (and then reduced) from
+; ----------------------------------------------------------------------
+;
+; @class T;
+; @interface S
+; @end
+; @interface Mode
+; -(int) count;
+; @end
+; @interface Context
+; @end
+; @interface ViewController
+; @property (nonatomic, readwrite, strong) Context *context;
+; @end
+; typedef enum {
+;     Unknown = 0,
+; } State;
+; @interface Main : ViewController
+; {
+;     T * t1;
+;     T * t2;
+; }
+; @property(readwrite, nonatomic) State state;
+; @end
+; @implementation Main
+; - (id) initWithContext:(Context *) context
+; {
+;     t1 = [self.context withBlock:^(id obj){
+;         id *mode1;
+; 	t2 = [mode1 withBlock:^(id object){
+; 	    Mode *mode2 = object;
+; 	    if ([mode2 count] != 0) {
+; 	      self.state = 0;
+; 	    }
+; 	  }];
+;       }];
+; }
+; @end
+; ----------------------------------------------------------------------
+; ModuleID = 'llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m'
+%0 = type opaque
+%struct.__block_descriptor = type { i64, i64 }
+declare void @llvm.dbg.declare(metadata, metadata) #1
+define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 {
+  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !84
+  %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !84
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block}, metadata !86), !dbg !87
+  ret void, !dbg !87
+}
+
+define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 {
+  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
+  %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !103
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block}, metadata !105), !dbg !106
+  ret void, !dbg !106
+}
+
+!llvm.dbg.cu = !{!0}
+!0 = metadata !{i32 786449, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
+!1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ]
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ]
+!4 = metadata !{}
+!15 = metadata !{i32 0}
+!23 = metadata !{metadata !38, metadata !42}
+!27 = metadata !{i32 786454, metadata !107, null, metadata !"id", i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
+!28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!30 = metadata !{metadata !31}
+!31 = metadata !{i32 786445, metadata !107, metadata !29, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!32 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, i32 0, i32 0, i32 16} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [fwd] [from ]
+!38 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
+!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = metadata !{null, metadata !41, metadata !27}
+!41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
+!84 = metadata !{i32 33, i32 0, metadata !38, null}
+!86 = metadata !{i32 786688, metadata !38, metadata !"self", metadata !1, i32 41, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 41]
+!87 = metadata !{i32 41, i32 0, metadata !38, null}
+!103 = metadata !{i32 35, i32 0, metadata !42, null}
+!105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 40]
+!106 = metadata !{i32 40, i32 0, metadata !42, null}
+!107 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", metadata !""}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
new file mode 100644
index 000000000000..36ab61100856
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -0,0 +1,372 @@
+; RUN: llc -mtriple x86_64-apple-darwin -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+
+; Generated from llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m
+; rdar://problem/9279956
+; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} )
+
+; CHECK: DW_AT_name{{.*}}_block_invoke
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: .block_descriptor
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_location
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}"self"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_type{{.*}}{[[APTR:.*]]}
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_artificial
+; CHECK-NOT: DW_TAG
+; 0x06 = DW_OP_deref
+; 0x23 = DW_OP_uconst
+; 0x91 = DW_OP_fbreg
+; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} )
+; CHECK: DW_TAG_structure_type
+; CHECK: [[A:.*]]:   DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_APPLE_objc_complete_type
+; CHECK-NEXT: DW_AT_name{{.*}}"A"
+; CHECK: [[APTR]]:   DW_TAG_pointer_type [5]
+; CHECK-NEXT: {[[A]]}
+
+
+; ModuleID = 'llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+%0 = type opaque
+%1 = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._objc_cache = type opaque
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct._message_ref_t = type { i8*, i8* }
+%struct._objc_super = type { i8*, i8* }
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+
+@"OBJC_CLASS_$_A" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_A", %struct._class_t* @"OBJC_CLASS_$_NSObject", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_CLASS_RO_$_A" }, section "__DATA, __objc_data", align 8
+@"\01L_OBJC_CLASSLIST_SUP_REFS_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_A", section "__DATA, __objc_superrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [5 x i8] c"init\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal externally_initialized global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"OBJC_CLASS_$_NSMutableDictionary" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSMutableDictionary", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_1" = internal global [6 x i8] c"alloc\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01l_objc_msgSend_fixup_alloc" = weak hidden global { i8* (i8*, %struct._message_ref_t*, ...)*, i8* } { i8* (i8*, %struct._message_ref_t*, ...)* @objc_msgSend_fixup, i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0) }, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"\01L_OBJC_METH_VAR_NAME_2" = internal global [6 x i8] c"count\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01l_objc_msgSend_fixup_count" = weak hidden global { i8* (i8*, %struct._message_ref_t*, ...)*, i8* } { i8* (i8*, %struct._message_ref_t*, ...)* @objc_msgSend_fixup, i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0) }, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"OBJC_IVAR_$_A.ivar" = global i64 0, section "__DATA, __objc_ivar", align 8
+@_NSConcreteStackBlock = external global i8*
+@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1
+@__block_descriptor_tmp = internal constant { i64, i64, i8*, i8*, i8*, i64 } { i64 0, i64 40, i8* bitcast (void (i8*, i8*)* @__copy_helper_block_ to i8*), i8* bitcast (void (i8*)* @__destroy_helper_block_ to i8*), i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i64 256 }
+@_objc_empty_cache = external global %struct._objc_cache
+@_objc_empty_vtable = external global i8* (i8*, i8*)*
+@"OBJC_METACLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_CLASS_NAME_" = internal global [2 x i8] c"A\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+@"\01l_OBJC_METACLASS_RO_$_A" = internal global %struct._class_ro_t { i32 1, i32 40, i32 40, i8* null, i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
+@"OBJC_METACLASS_$_A" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_NSObject", %struct._class_t* @"OBJC_METACLASS_$_NSObject", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_METACLASS_RO_$_A" }, section "__DATA, __objc_data", align 8
+@"OBJC_CLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_METH_VAR_TYPE_" = internal global [8 x i8] c"@16@0:8\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+@"\01l_OBJC_$_INSTANCE_METHODS_A" = internal global { i32, i32, [1 x %struct._objc_method] } { i32 24, i32 1, [1 x %struct._objc_method] [%struct._objc_method { i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast (i8* (%0*, i8*)* @"\01-[A init]" to i8*) }] }, section "__DATA, __objc_const", align 8
+@"\01L_OBJC_METH_VAR_NAME_3" = internal global [5 x i8] c"ivar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_METH_VAR_TYPE_4" = internal global [2 x i8] c"i\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+@"\01l_OBJC_$_INSTANCE_VARIABLES_A" = internal global { i32, i32, [1 x %struct._ivar_t] } { i32 32, i32 1, [1 x %struct._ivar_t] [%struct._ivar_t { i64* @"OBJC_IVAR_$_A.ivar", i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i32 2, i32 4 }] }, section "__DATA, __objc_const", align 8
+@"\01l_OBJC_CLASS_RO_$_A" = internal global %struct._class_ro_t { i32 0, i32 0, i32 4, i8* null, i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to %struct.__method_list_t*), %struct._objc_protocol_list* null, %struct._ivar_list_t* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to %struct._ivar_list_t*), i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_5" = internal global %struct._class_t* @"OBJC_CLASS_$_A", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_LABEL_CLASS_$" = internal global [1 x i8*] [i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_A" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
+@llvm.used = appending global [14 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
+
+define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 {
+  %1 = alloca %0*, align 8
+  %2 = alloca i8*, align 8
+  %3 = alloca %struct._objc_super
+  %4 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
+  store %0* %self, %0** %1, align 8
+  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !60), !dbg !62
+  store i8* %_cmd, i8** %2, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !63), !dbg !62
+  %5 = load %0** %1, !dbg !65
+  %6 = bitcast %0* %5 to i8*, !dbg !65
+  %7 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 0, !dbg !65
+  store i8* %6, i8** %7, !dbg !65
+  %8 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_", !dbg !65
+  %9 = bitcast %struct._class_t* %8 to i8*, !dbg !65
+  %10 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 1, !dbg !65
+  store i8* %9, i8** %10, !dbg !65
+  %11 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !65, !invariant.load !67
+  %12 = call i8* bitcast (i8* (%struct._objc_super*, i8*, ...)* @objc_msgSendSuper2 to i8* (%struct._objc_super*, i8*)*)(%struct._objc_super* %3, i8* %11), !dbg !65
+  %13 = bitcast i8* %12 to %0*, !dbg !65
+  store %0* %13, %0** %1, align 8, !dbg !65
+  %14 = icmp ne %0* %13, null, !dbg !65
+  br i1 %14, label %15, label %24, !dbg !65
+
+; <label>:15                                      ; preds = %0
+  %16 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 0, !dbg !68
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %16, !dbg !68
+  %17 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 1, !dbg !68
+  store i32 -1040187392, i32* %17, !dbg !68
+  %18 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 2, !dbg !68
+  store i32 0, i32* %18, !dbg !68
+  %19 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 3, !dbg !68
+  store i8* bitcast (void (i8*)* @"__9-[A init]_block_invoke" to i8*), i8** %19, !dbg !68
+  %20 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 4, !dbg !68
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i64 }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %20, !dbg !68
+  %21 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !68
+  %22 = load %0** %1, align 8, !dbg !68
+  store %0* %22, %0** %21, align 8, !dbg !68
+  %23 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4 to void ()*, !dbg !68
+  call void @run(void ()* %23), !dbg !68
+  br label %24, !dbg !70
+
+; <label>:24                                      ; preds = %15, %0
+  %25 = load %0** %1, align 8, !dbg !71
+  %26 = bitcast %0* %25 to i8*, !dbg !71
+  ret i8* %26, !dbg !71
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare i8* @objc_msgSendSuper2(%struct._objc_super*, i8*, ...)
+
+define internal void @run(void ()* %block) #0 {
+  %1 = alloca void ()*, align 8
+  store void ()* %block, void ()** %1, align 8
+  call void @llvm.dbg.declare(metadata !{void ()** %1}, metadata !72), !dbg !73
+  %2 = load void ()** %1, align 8, !dbg !74
+  %3 = bitcast void ()* %2 to %struct.__block_literal_generic*, !dbg !74
+  %4 = getelementptr inbounds %struct.__block_literal_generic* %3, i32 0, i32 3, !dbg !74
+  %5 = bitcast %struct.__block_literal_generic* %3 to i8*, !dbg !74
+  %6 = load i8** %4, !dbg !74
+  %7 = bitcast i8* %6 to void (i8*)*, !dbg !74
+  call void %7(i8* %5), !dbg !74
+  ret void, !dbg !75
+}
+
+define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 {
+  %1 = alloca i8*, align 8
+  %2 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, align 8
+  %d = alloca %1*, align 8
+  store i8* %.block_descriptor, i8** %1, align 8
+  %3 = load i8** %1
+  call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !76), !dbg !88
+  call void @llvm.dbg.declare(metadata !{i8* %.block_descriptor}, metadata !76), !dbg !88
+  %4 = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !88
+  store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2, align 8, !dbg !88
+  %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !88
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2}, metadata !89), !dbg !90
+  call void @llvm.dbg.declare(metadata !{%1** %d}, metadata !91), !dbg !100
+  %6 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", !dbg !100
+  %7 = bitcast %struct._class_t* %6 to i8*, !dbg !100
+  %8 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !100
+  %9 = bitcast i8* %8 to i8* (i8*, i8*)*, !dbg !100
+  %10 = call i8* %9(i8* %7, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !100
+  %11 = bitcast i8* %10 to %1*, !dbg !100
+  %12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !100, !invariant.load !67
+  %13 = bitcast %1* %11 to i8*, !dbg !100
+  %14 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %13, i8* %12), !dbg !100
+  %15 = bitcast i8* %14 to %1*, !dbg !100
+  store %1* %15, %1** %d, align 8, !dbg !100
+  %16 = load %1** %d, align 8, !dbg !101
+  %17 = bitcast %1* %16 to i8*, !dbg !101
+  %18 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to %struct._message_ref_t*), i32 0, i32 0), !dbg !101
+  %19 = bitcast i8* %18 to i32 (i8*, i8*)*, !dbg !101
+  %20 = call i32 %19(i8* %17, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to i8*)), !dbg !101
+  %21 = add nsw i32 42, %20, !dbg !101
+  %22 = load %0** %5, align 8, !dbg !101
+  %23 = load i64* @"OBJC_IVAR_$_A.ivar", !dbg !101, !invariant.load !67
+  %24 = bitcast %0* %22 to i8*, !dbg !101
+  %25 = getelementptr inbounds i8* %24, i64 %23, !dbg !101
+  %26 = bitcast i8* %25 to i32*, !dbg !101
+  store i32 %21, i32* %26, align 4, !dbg !101
+  ret void, !dbg !90
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+declare i8* @objc_msgSend_fixup(i8*, %struct._message_ref_t*, ...)
+
+declare i8* @objc_msgSend(i8*, i8*, ...) #2
+
+define internal void @__copy_helper_block_(i8*, i8*) {
+  %3 = alloca i8*, align 8
+  %4 = alloca i8*, align 8
+  store i8* %0, i8** %3, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %3}, metadata !102), !dbg !103
+  store i8* %1, i8** %4, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %4}, metadata !104), !dbg !103
+  %5 = load i8** %4, !dbg !103
+  %6 = bitcast i8* %5 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
+  %7 = load i8** %3, !dbg !103
+  %8 = bitcast i8* %7 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
+  %9 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %6, i32 0, i32 5, !dbg !103
+  %10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %8, i32 0, i32 5, !dbg !103
+  %11 = load %0** %9, !dbg !103
+  %12 = bitcast %0* %11 to i8*, !dbg !103
+  %13 = bitcast %0** %10 to i8*, !dbg !103
+  call void @_Block_object_assign(i8* %13, i8* %12, i32 3) #3, !dbg !103
+  ret void, !dbg !103
+}
+
+declare void @_Block_object_assign(i8*, i8*, i32)
+
+define internal void @__destroy_helper_block_(i8*) {
+  %2 = alloca i8*, align 8
+  store i8* %0, i8** %2, align 8
+  call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !105), !dbg !106
+  %3 = load i8** %2, !dbg !106
+  %4 = bitcast i8* %3 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !106
+  %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !106
+  %6 = load %0** %5, !dbg !106
+  %7 = bitcast %0* %6 to i8*, !dbg !106
+  call void @_Block_object_dispose(i8* %7, i32 3) #3, !dbg !106
+  ret void, !dbg !106
+}
+
+declare void @_Block_object_dispose(i8*, i32)
+
+define i32 @main() #0 {
+  %1 = alloca i32, align 4
+  %a = alloca %0*, align 8
+  store i32 0, i32* %1
+  call void @llvm.dbg.declare(metadata !{%0** %a}, metadata !107), !dbg !108
+  %2 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5", !dbg !108
+  %3 = bitcast %struct._class_t* %2 to i8*, !dbg !108
+  %4 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !108
+  %5 = bitcast i8* %4 to i8* (i8*, i8*)*, !dbg !108
+  %6 = call i8* %5(i8* %3, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !108
+  %7 = bitcast i8* %6 to %0*, !dbg !108
+  %8 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !108, !invariant.load !67
+  %9 = bitcast %0* %7 to i8*, !dbg !108
+  %10 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %9, i8* %8), !dbg !108
+  %11 = bitcast i8* %10 to %0*, !dbg !108
+  store %0* %11, %0** %a, align 8, !dbg !108
+  ret i32 0, !dbg !109
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nonlazybind }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!56, !57, !58, !59}
+
+!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
+!1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [from ]
+!5 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", metadata !"llvm/_build.ninja.Debug"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!7 = metadata !{metadata !8, metadata !10}
+!8 = metadata !{i32 786460, null, metadata !4, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
+!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [from ]
+!10 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"ivar", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13, metadata !27, metadata !31, metadata !35, metadata !36, metadata !39}
+!13 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[A init]", metadata !"-[A init]", metadata !"", i32 46, metadata !14, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*)* @"\01-[A init]", null, null, metadata !2, i32 46} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]]
+!14 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = metadata !{metadata !16, metadata !23, metadata !24}
+!16 = metadata !{i32 786454, metadata !5, null, metadata !"id", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!23 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!24 = metadata !{i32 786454, metadata !5, i32 0, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
+!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
+!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!27 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__9-[A init]_block_invoke", metadata !"__9-[A init]_block_invoke", metadata !"", i32 49, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @"__9-[A init]_block_invoke", null, null, metadata !2, i32 49} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke]
+!28 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !30}
+!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!31 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 52, metadata !33, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*, i8*)* @__copy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_]
+!32 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>]
+!33 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = metadata !{null, metadata !30, metadata !30}
+!35 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 52, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*)* @__destroy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_]
+!36 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !37, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 60} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main]
+!37 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!38 = metadata !{metadata !11}
+!39 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 39, metadata !40, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (void ()*)* @run, null, null, metadata !2, i32 40} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run]
+!40 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!41 = metadata !{null, metadata !42}
+!42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
+!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [from ]
+!44 = metadata !{metadata !45, metadata !46, metadata !47, metadata !48, metadata !49}
+!45 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
+!46 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
+!47 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 0, size 32, align 32, offset 96] [from int]
+!48 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
+!49 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 40, i64 64, i64 64, i64 192, i32 0, metadata !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ]
+!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
+!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [from ]
+!52 = metadata !{metadata !53, metadata !55}
+!53 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
+!54 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!55 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !54} ; [ DW_TAG_member ] [Size] [line 0, size 64, align 64, offset 64] [from long unsigned int]
+!56 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!57 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!58 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!59 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!60 = metadata !{i32 786689, metadata !13, metadata !"self", metadata !32, i32 16777262, metadata !61, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 46]
+!61 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!62 = metadata !{i32 46, i32 0, metadata !13, null}
+!63 = metadata !{i32 786689, metadata !13, metadata !"_cmd", metadata !32, i32 33554478, metadata !64, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 46]
+!64 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [from ]
+!65 = metadata !{i32 48, i32 0, metadata !66, null}
+!66 = metadata !{i32 786443, metadata !5, metadata !13, i32 47, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!67 = metadata !{}
+!68 = metadata !{i32 49, i32 0, metadata !69, null}
+!69 = metadata !{i32 786443, metadata !5, metadata !66, i32 48, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!70 = metadata !{i32 53, i32 0, metadata !69, null}
+!71 = metadata !{i32 54, i32 0, metadata !66, null}
+!72 = metadata !{i32 786689, metadata !39, metadata !"block", metadata !6, i32 16777255, metadata !42, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [block] [line 39]
+!73 = metadata !{i32 39, i32 0, metadata !39, null}
+!74 = metadata !{i32 41, i32 0, metadata !39, null}
+!75 = metadata !{i32 42, i32 0, metadata !39, null}
+!76 = metadata !{i32 786689, metadata !27, metadata !".block_descriptor", metadata !6, i32 16777265, metadata !77, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49]
+!77 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
+!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [from ]
+!79 = metadata !{metadata !80, metadata !81, metadata !82, metadata !83, metadata !84, metadata !87}
+!80 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 49, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ]
+!81 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 49, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int]
+!82 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 49, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 49, size 32, align 32, offset 96] [from int]
+!83 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 49, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ]
+!84 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 49, i64 64, i64 64, i64 192, i32 0, metadata !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ]
+!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
+!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [fwd] [from ]
+!87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
+!88 = metadata !{i32 49, i32 0, metadata !27, null}
+!89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, i64 2, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 52]
+!90 = metadata !{i32 52, i32 0, metadata !27, null}
+!91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50]
+!92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!93 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary]
+!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [from ]
+!95 = metadata !{metadata !96}
+!96 = metadata !{i32 786460, null, metadata !94, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary]
+!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [from ]
+!98 = metadata !{metadata !99}
+!99 = metadata !{i32 786460, null, metadata !97, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
+!100 = metadata !{i32 50, i32 0, metadata !92, null}
+!101 = metadata !{i32 51, i32 0, metadata !92, null}
+!102 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
+!103 = metadata !{i32 52, i32 0, metadata !31, null}
+!104 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 33554484, metadata !30, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
+!105 = metadata !{i32 786689, metadata !35, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
+!106 = metadata !{i32 52, i32 0, metadata !35, null}
+!107 = metadata !{i32 786688, metadata !36, metadata !"a", metadata !6, i32 61, metadata !61, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 61]
+!108 = metadata !{i32 61, i32 0, metadata !36, null}
+!109 = metadata !{i32 62, i32 0, metadata !36, null}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
new file mode 100644
index 000000000000..50a2b3fa5163
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -0,0 +1,257 @@
+; RUN: llc %s -o %t -filetype=obj -O0 -mtriple=x86_64-unknown-linux-gnu
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=PRESENT
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=ABSENT
+; RUN: llc %s -o %t -filetype=obj -O0 -mtriple=x86_64-apple-darwin
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DARWINP
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DARWINA
+; Verify that attributes we do want are PRESENT;
+; verify that attributes we don't want are ABSENT.
+; It's a lot easier to do this in two passes than in one.
+; PR14471
+
+; LLVM IR generated using: clang -emit-llvm -S -g
+; (with the Clang part of this patch applied).
+;
+; class C
+; {
+;   static int a;
+;   const static bool const_a = true;
+; protected:
+;   static int b;
+;   const static float const_b = 3.14;
+; public:
+;   static int c;
+;   const static int const_c = 18;
+;   int d;
+; };
+; 
+; int C::a = 4;
+; int C::b = 2;
+; int C::c = 1;
+; 
+; int main()
+; {
+;         C instance_C;
+;         instance_C.d = 8;
+;         return C::c;
+; }
+
+%class.C = type { i32 }
+
+@_ZN1C1aE = global i32 4, align 4
+@_ZN1C1bE = global i32 2, align 4
+@_ZN1C1cE = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %instance_C = alloca %class.C, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{%class.C* %instance_C}, metadata !29), !dbg !30
+  %d = getelementptr inbounds %class.C* %instance_C, i32 0, i32 0, !dbg !31
+  store i32 8, i32* %d, align 4, !dbg !31
+  %0 = load i32* @_ZN1C1cE, align 4, !dbg !32
+  ret i32 %0, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
+!6 = metadata !{i32 786473, metadata !33} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !12, metadata !27, metadata !28}
+!12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
+!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ]
+!14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26}
+!15 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"a", i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
+!16 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_a", i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
+!17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool]
+!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"b", i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int]
+!20 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_b", i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ]
+!21 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float]
+!22 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!23 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"c", i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int]
+!24 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_c", i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ]
+!25 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!26 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"d", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int]
+!27 = metadata !{i32 786484, i32 0, metadata !13, metadata !"b", metadata !"b", metadata !"_ZN1C1bE", metadata !6, i32 15, metadata !9, i32 0, i32 1, i32* @_ZN1C1bE, metadata !19} ; [ DW_TAG_variable ] [b] [line 15] [def]
+!28 = metadata !{i32 786484, i32 0, metadata !13, metadata !"c", metadata !"c", metadata !"_ZN1C1cE", metadata !6, i32 16, metadata !9, i32 0, i32 1, i32* @_ZN1C1cE, metadata !23} ; [ DW_TAG_variable ] [c] [line 16] [def]
+!29 = metadata !{i32 786688, metadata !5, metadata !"instance_C", metadata !6, i32 20, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [instance_C] [line 20]
+!30 = metadata !{i32 20, i32 0, metadata !5, null}
+!31 = metadata !{i32 21, i32 0, metadata !5, null}
+!32 = metadata !{i32 22, i32 0, metadata !5, null}
+!33 = metadata !{metadata !"/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", metadata !"/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm"}
+; PRESENT verifies that static member declarations have these attributes:
+; external, declaration, accessibility, and either DW_AT_MIPS_linkage_name
+; (for variables) or DW_AT_const_value (for constants).
+;
+; PRESENT:      .debug_info contents:
+; PRESENT:      DW_TAG_class_type
+; PRESENT-NEXT: DW_AT_name {{.*}} "C"
+; PRESENT:      0x[[DECL_A:[0-9a-f]+]]: DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "a"
+; PRESENT:      DW_AT_external
+; PRESENT:      DW_AT_declaration
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "const_a"
+; PRESENT:      DW_AT_external
+; PRESENT:      DW_AT_declaration
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; PRESENT:      DW_AT_const_value {{.*}} (1)
+; PRESENT:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "b"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "const_b"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; PRESENT:      DW_AT_const_value {{.*}} (0x4048f5c3)
+; PRESENT:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "c"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "const_c"
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_AT_const_value {{.*}} (0x00000012)
+; While we're here, a normal member has data_member_location and
+; accessibility attributes.
+; PRESENT:      DW_TAG_member
+; PRESENT-NEXT: DW_AT_name {{.*}} "d"
+; PRESENT:      DW_AT_data_member_location
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      NULL
+; Definitions point back to their declarations, and have a location.
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_A]]}
+; PRESENT-NEXT: DW_AT_location
+; PRESENT-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1aE"
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_B]]}
+; PRESENT-NEXT: DW_AT_location
+; PRESENT-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1bE"
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_C]]}
+; PRESENT-NEXT: DW_AT_location
+; PRESENT-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1cE"
+
+; For Darwin gdb:
+; DARWINP:      .debug_info contents:
+; DARWINP:      DW_TAG_class_type
+; DARWINP-NEXT: DW_AT_name {{.*}} "C"
+; DARWINP:      0x[[DECL_A:[0-9a-f]+]]: DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "a"
+; DARWINP:      DW_AT_external
+; DARWINP:      DW_AT_declaration
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1aE"
+; DARWINP:      DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "const_a"
+; DARWINP:      DW_AT_external
+; DARWINP:      DW_AT_declaration
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; DARWINP:      DW_AT_const_value {{.*}} (1)
+; DARWINP:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "b"
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1bE"
+; DARWINP:      DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "const_b"
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; DARWINP:      DW_AT_const_value {{.*}} (0x4048f5c3)
+; DARWINP:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "c"
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; DARWINP:      DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1cE"
+; DARWINP:      DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "const_c"
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; DARWINP:      DW_AT_const_value {{.*}} (0x00000012)
+; While we're here, a normal member has data_member_location and
+; accessibility attributes.
+; DARWINP:      DW_TAG_member
+; DARWINP-NEXT: DW_AT_name {{.*}} "d"
+; DARWINP:      DW_AT_data_member_location
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; DARWINP:      NULL
+; Definitions point back to their declarations, and have a location.
+; DARWINP:      DW_TAG_variable
+; DARWINP-NEXT: DW_AT_specification {{.*}} {0x[[DECL_A]]}
+; DARWINP-NEXT: DW_AT_location
+; DARWINP-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1aE"
+; DARWINP:      DW_TAG_variable
+; DARWINP-NEXT: DW_AT_specification {{.*}} {0x[[DECL_B]]}
+; DARWINP-NEXT: DW_AT_location
+; DARWINP-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1bE"
+; DARWINP:      DW_TAG_variable
+; DARWINP-NEXT: DW_AT_specification {{.*}} {0x[[DECL_C]]}
+; DARWINP-NEXT: DW_AT_location
+; DARWINP-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN1C1cE"
+
+; ABSENT verifies that static member declarations do not have either
+; DW_AT_location or DW_AT_data_member_location; also, variables do not
+; have DW_AT_const_value and constants do not have DW_AT_MIPS_linkage_name.
+;
+; ABSENT:      .debug_info contents:
+; ABSENT:      DW_TAG_member
+; ABSENT:      DW_AT_name {{.*}} "a"
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "const_a"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "b"
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "const_b"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "c"
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  location
+; ABSENT:      DW_AT_name {{.*}} "const_c"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  location
+; While we're here, a normal member does not have a linkage name, constant
+; value, or DW_AT_location.
+; ABSENT:      DW_AT_name {{.*}} "d"
+; ABSENT-NOT:  DW_AT_MIPS_linkage_name
+; ABSENT-NOT:  DW_AT_const_value
+; ABSENT-NOT:  DW_AT_location
+; ABSENT:      NULL
+
+; For Darwin gdb:
+; DARWINA:      .debug_info contents:
+; DARWINA:      DW_TAG_member
+; DARWINA:      DW_AT_name {{.*}} "a"
+; DARWINA-NOT:  DW_AT_const_value
+; DARWINA-NOT:  location
+; DARWINA:      DW_AT_name {{.*}} "const_a"
+; DARWINA-NOT:  DW_AT_MIPS_linkage_name
+; DARWINA-NOT:  location
+; DARWINA:      DW_AT_name {{.*}} "b"
+; DARWINA-NOT:  DW_AT_const_value
+; DARWINA-NOT:  location
+; DARWINA:      DW_AT_name {{.*}} "const_b"
+; DARWINA-NOT:  DW_AT_MIPS_linkage_name
+; DARWINA-NOT:  location
+; DARWINA:      DW_AT_name {{.*}} "c"
+; DARWINA-NOT:  DW_AT_const_value
+; DARWINA-NOT:  location
+; DARWINA:      DW_AT_name {{.*}} "const_c"
+; DARWINA-NOT:  DW_AT_MIPS_linkage_name
+; DARWINA-NOT:  location
+; While we're here, a normal member does not have a linkage name, constant
+; value, or DW_AT_location.
+; DARWINA:      DW_AT_name {{.*}} "d"
+; DARWINA-NOT:  DW_AT_MIPS_linkage_name
+; DARWINA-NOT:  DW_AT_const_value
+; DARWINA-NOT:  DW_AT_location
+; DARWINA:      NULL
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index d273d7353786..0e93427df010 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -9,10 +9,11 @@ entry:
   ret void
 }
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
+!5 = metadata !{metadata !0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !5, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index b908bcefe478..30e8c2e27430 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; RUN: llvm-as < %s | llvm-dis | FileCheck --check-prefix=CHECK-DIS %s
 
 ; CHECK: 0x0000000b: DW_TAG_compile_unit
 ; CHECK: 0x00000012:   DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000035] = "foo.cpp")
@@ -7,6 +8,9 @@
 ; CHECK: 0x0000003d:     DW_AT_name [DW_FORM_strp]       ( .debug_str[0x0000006d] = "D")
 ; CHECK: 0x00000044:     DW_TAG_member
 ; CHECK: 0x00000045:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x0000005d] = "c1")
+; CHECK: 0x0000008d:       DW_AT_artificial [DW_FORM_flag_present]       (true)
+
+; CHECK-DIS: [artificial]
 
 %class.D = type { i32, i32, i32, i32 }
 
@@ -54,38 +58,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo", metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !31}
-!5 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", metadata !6, i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !31}
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
+!6 = metadata !{i32 786473, metadata !53} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!10 = metadata !{i32 786434, null, metadata !"D", metadata !6, i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ]
+!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ]
 !11 = metadata !{metadata !12, metadata !14, metadata !15, metadata !16, metadata !17, metadata !20}
-!12 = metadata !{i32 786445, metadata !10, metadata !"c1", metadata !6, i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
-!13 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786445, metadata !10, metadata !"c2", metadata !6, i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int]
-!15 = metadata !{i32 786445, metadata !10, metadata !"c3", metadata !6, i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int]
-!16 = metadata !{i32 786445, metadata !10, metadata !"c4", metadata !6, i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int]
-!17 = metadata !{i32 786478, i32 0, metadata !10, metadata !"D", metadata !"D", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D]
+!12 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c1", i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
+!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c2", i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int]
+!15 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c3", i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int]
+!16 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c4", i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int]
+!17 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{i32 786478, i32 0, metadata !10, metadata !"D", metadata !"D", metadata !"", metadata !6, i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
+!20 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
 !21 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !22 = metadata !{null, metadata !9, metadata !23}
 !23 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!24 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
+!24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
 !25 = metadata !{metadata !26}
 !26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
 !27 = metadata !{metadata !28}
 !28 = metadata !{metadata !29}
 !29 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777228, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 12]
-!30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!31 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", metadata !6, i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
+!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
+!31 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
 !32 = metadata !{metadata !33}
 !33 = metadata !{metadata !34, metadata !35}
 !34 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777235, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 19]
@@ -107,3 +109,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !50 = metadata !{i32 22, i32 0, metadata !48, null}
 !51 = metadata !{i32 23, i32 0, metadata !48, null}
 !52 = metadata !{i32 24, i32 0, metadata !48, null}
+!53 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo"}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
new file mode 100644
index 000000000000..6e59915fe13e
--- /dev/null
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; <rdar://problem/12566646>
+
+%struct.foo = type { i32, [1 x i32] }
+%struct.bar = type { i32, [0 x i32] }
+
+define i32 @func() nounwind uwtable ssp {
+entry:
+  %my_foo = alloca %struct.foo, align 4
+  %my_bar = alloca %struct.bar, align 4
+  call void @llvm.dbg.declare(metadata !{%struct.foo* %my_foo}, metadata !10), !dbg !19
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %my_bar}, metadata !20), !dbg !28
+  %a = getelementptr inbounds %struct.foo* %my_foo, i32 0, i32 0, !dbg !29
+  store i32 3, i32* %a, align 4, !dbg !29
+  %a1 = getelementptr inbounds %struct.bar* %my_bar, i32 0, i32 0, !dbg !30
+  store i32 5, i32* %a1, align 4, !dbg !30
+  %a2 = getelementptr inbounds %struct.foo* %my_foo, i32 0, i32 0, !dbg !31
+  %0 = load i32* %a2, align 4, !dbg !31
+  %a3 = getelementptr inbounds %struct.bar* %my_bar, i32 0, i32 0, !dbg !31
+  %1 = load i32* %a3, align 4, !dbg !31
+  %add = add nsw i32 %0, %1, !dbg !31
+  ret i32 %add, !dbg !31
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+; An empty array should not have an AT_upper_bound attribute. But an array of 1
+; should.
+
+; CHECK:      0x00000074:   DW_TAG_base_type [5]  
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000043] = "int")
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
+
+; int[1]:
+; CHECK:      0x00000082:   DW_TAG_array_type [7] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
+; CHECK:      0x00000087:     DW_TAG_subrange_type [8]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK-NEXT: DW_AT_upper_bound [DW_FORM_data1]  (0x00)
+
+; int foo::b[1]:
+; CHECK:      0x000000a5:     DW_TAG_member [10]
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x0082 => {0x00000082})
+
+; int[0]:
+; CHECK:      0x000000b5:   DW_TAG_array_type [7] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
+; CHECK:      0x000000ba:     DW_TAG_subrange_type [11]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK-NOT:  DW_AT_upper_bound
+
+; int bar::b[0]:
+; CHECK:      0x000000d7:     DW_TAG_member [10]
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x00b5 => {0x000000b5})
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
+!6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
+!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ]
+!13 = metadata !{metadata !14, metadata !15}
+!14 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
+!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1]
+!19 = metadata !{i32 12, i32 0, metadata !11, null}
+!20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
+!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ]
+!22 = metadata !{metadata !23, metadata !24}
+!23 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"a", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
+!24 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"b", i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
+!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0]
+!28 = metadata !{i32 13, i32 0, metadata !11, null}
+!29 = metadata !{i32 15, i32 0, metadata !11, null}
+!30 = metadata !{i32 16, i32 0, metadata !11, null}
+!31 = metadata !{i32 17, i32 0, metadata !11, null}
+!32 = metadata !{metadata !"test.c", metadata !"/Volumes/Sandbox/llvm"}
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
new file mode 100644
index 000000000000..ace115610ebc
--- /dev/null
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; <rdar://problem/12566646>
+
+%class.A = type { [0 x i32] }
+
+@a = global %class.A zeroinitializer, align 4
+
+; CHECK:      0x0000002d:   DW_TAG_base_type [3]  
+; CHECK-NEXT: DW_AT_name
+; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+
+; CHECK:      0x00000034:   DW_TAG_array_type [4] *
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+
+; CHECK:      0x00000039:     DW_TAG_subrange_type [5]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
+; CHECK-NOT:  DW_AT_upper_bound
+
+; CHECK:      DW_TAG_member [8]
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x0000003f] = "x")
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]  (cu + 0x0034 => {0x00000034})
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound]
+!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 6935c47d0cf6..6de15f6404cb 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 ; Check that the line table starts at 7, not 4, but that the first
 ; statement isn't until line 8.
@@ -28,13 +28,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !19, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -43,7 +41,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !13 = metadata !{i32 5, i32 5, metadata !5, null}
 !14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !5, i32 7, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 786443, metadata !19, metadata !5, i32 7, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !16 = metadata !{i32 8, i32 9, metadata !15, null}
 !17 = metadata !{i32 8, i32 18, metadata !15, null}
 !18 = metadata !{i32 9, i32 5, metadata !15, null}
+!19 = metadata !{metadata !"ending-run.c", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index 6eb715d82872..22728116d9bb 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 @a = global i32 0, align 4
 @b = global i64 0, align 8
@@ -7,28 +7,26 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{metadata !3, metadata !8, metadata !12}
-!3 = metadata !{i32 786436, null, metadata !"A", metadata !4, i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!4 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !3, metadata !8, metadata !12}
+!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ]
-!8 = metadata !{i32 786436, null, metadata !"B", metadata !4, i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
-!9 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ]
-!12 = metadata !{i32 786436, null, metadata !"C", metadata !4, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !13 = metadata !{metadata !14}
 !14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 0}
-!17 = metadata !{metadata !18}
-!18 = metadata !{metadata !19, metadata !20, metadata !21}
-!19 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !4, i32 4, metadata !3, i32 0, i32 1, i32* @a} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !4, i32 5, metadata !8, i32 0, i32 1, i64* @b} ; [ DW_TAG_variable ]
-!21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 0}
+!17 = metadata !{metadata !19, metadata !20, metadata !21}
+!19 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !4, i32 4, metadata !3, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !4, i32 5, metadata !8, i32 0, i32 1, i64* @b, null} ; [ DW_TAG_variable ]
+!21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ]
+!22 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
 
 ; CHECK: DW_TAG_enumeration_type [3]
 ; CHECK: DW_AT_type [DW_FORM_ref4]      (cu + 0x0026 => {0x00000026})
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index 0902430008c1..33d807e30548 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -1,18 +1,16 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 @e = global i16 0, align 2
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/tmp", metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e} ; [ DW_TAG_variable ] [e] [line 2] [def]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786436, null, metadata !"E", metadata !6, i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
+!7 = metadata !{i32 786436, metadata !6, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ]
 
 ; CHECK: DW_TAG_enumeration_type
 ; CHECK-NEXT: DW_AT_name
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
new file mode 100644
index 000000000000..bfe2d17e20f1
--- /dev/null
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -0,0 +1,100 @@
+; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+
+@a = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp"}
+
+; Check that the skeleton compile unit contains the proper attributes:
+; This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+; DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+; DW_AT_ranges_base, DW_AT_addr_base.
+
+; CHECK: .debug_abbrev contents:
+; CHECK: Abbrev table for offset: 0x00000000
+; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_no
+; CHECK: DW_AT_GNU_dwo_name      DW_FORM_strp
+; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
+; CHECK: DW_AT_GNU_addr_base     DW_FORM_sec_offset
+; CHECK: DW_AT_low_pc    DW_FORM_addr
+; CHECK: DW_AT_stmt_list DW_FORM_sec_offset
+; CHECK: DW_AT_comp_dir  DW_FORM_strp
+
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000000] = "baz.dwo")
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
+; CHECK: DW_AT_GNU_addr_base [DW_FORM_sec_offset]                   (0x00000000)
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_AT_stmt_list [DW_FORM_sec_offset]   (0x00000000)
+; CHECK: DW_AT_comp_dir [DW_FORM_strp]     ( .debug_str[0x00000008] = "/usr/local/google/home/echristo/tmp")
+
+; CHECK: .debug_str contents:
+; CHECK: 0x00000000: "baz.dwo"
+; CHECK: 0x00000008: "/usr/local/google/home/echristo/tmp"
+
+; Check that we're using the right forms.
+; CHECK: .debug_abbrev.dwo contents:
+; CHECK: Abbrev table for offset: 0x00000000
+; CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+; CHECK: DW_AT_producer  DW_FORM_GNU_str_index
+; CHECK: DW_AT_language  DW_FORM_data2
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_low_pc    DW_FORM_GNU_addr_index
+; CHECK: DW_AT_stmt_list DW_FORM_data4
+; CHECK: DW_AT_comp_dir  DW_FORM_GNU_str_index
+; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
+
+; CHECK: [2] DW_TAG_base_type    DW_CHILDREN_no
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_encoding  DW_FORM_data1
+; CHECK: DW_AT_byte_size DW_FORM_data1
+
+; CHECK: [3] DW_TAG_variable     DW_CHILDREN_no
+; CHECK: DW_AT_name      DW_FORM_GNU_str_index
+; CHECK: DW_AT_type      DW_FORM_ref4
+; CHECK: DW_AT_external  DW_FORM_flag_present
+; CHECK: DW_AT_decl_file DW_FORM_data1
+; CHECK: DW_AT_decl_line DW_FORM_data1
+; CHECK: DW_AT_location  DW_FORM_block1
+
+; Check that the rest of the compile units have information.
+; CHECK: .debug_info.dwo contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_producer [DW_FORM_GNU_str_index] ( indexed (00000000) string = "clang version 3.3 (trunk 169021) (llvm/trunk 169020)")
+; CHECK: DW_AT_language [DW_FORM_data2]        (0x000c)
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]    ( indexed (00000001) string = "baz.c")
+; CHECK: DW_AT_low_pc [DW_FORM_GNU_addr_index]     ( indexed (00000000) address = 0x0000000000000000)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
+; CHECK: DW_TAG_base_type
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000004) string = "int")
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "a")
+; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x001e => {0x0000001e})
+; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
+; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 01 )
+
+
+; CHECK: .debug_str.dwo contents:
+; CHECK: 0x00000000: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)"
+; CHECK: 0x00000035: "baz.c"
+; CHECK: 0x0000003b: "/usr/local/google/home/echristo/tmp"
+; CHECK: 0x0000005f: "a"
+; CHECK: 0x00000061: "int"
+
+; CHECK: .debug_str_offsets.dwo contents:
+; CHECK: 0x00000000: 00000000
+; CHECK: 0x00000004: 00000035
+; CHECK: 0x00000008: 0000003b
+; CHECK: 0x0000000c: 0000005f
+; CHECK: 0x00000010: 00000061
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
new file mode 100644
index 000000000000..0c0a7ab51d4b
--- /dev/null
+++ b/test/DebugInfo/X86/line-info.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: [[FILEID:[0-9]+]]]{{.*}}list0.h
+; CHECK: [[FILEID]]      0      1   0  is_stmt{{$}}
+
+; IR generated from clang -g -emit-llvm with the following source:
+; list0.h:
+; int foo (int x) {
+;     return ++x;
+; }
+; list0.c:
+; #include "list0.h"
+; int main() {
+; }
+
+define i32 @foo(i32 %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !14), !dbg !15
+  %0 = load i32* %x.addr, align 4, !dbg !16
+  %inc = add nsw i32 %0, 1, !dbg !16
+  store i32 %inc, i32* %x.addr, align 4, !dbg !16
+  ret i32 %inc, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+define i32 @main() #0 {
+entry:
+  ret i32 0, !dbg !17
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
+!1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !10}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = metadata !{metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h]
+!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
+!12 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{metadata !9}
+!14 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!15 = metadata !{i32 1, i32 0, metadata !4, null}
+!16 = metadata !{i32 2, i32 0, metadata !4, null}
+!17 = metadata !{i32 3, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !11, metadata !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index b98492383ac3..9440f3a994e1 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-macosx -darwin-gdb-compat=Disable %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_subprogram [9] *
 ; CHECK-NOT: DW_AT_MIPS_linkage_name
@@ -26,31 +26,29 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9, metadata !10, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
 !12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, i32 0, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
 !14 = metadata !{metadata !15}
 !15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!18 = metadata !{metadata !19}
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
+!18 = metadata !{metadata !20}
+!20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
 !21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
 !23 = metadata !{i32 5, i32 8, metadata !5, null}
 !24 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554437, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !25 = metadata !{i32 5, i32 14, metadata !5, null}
 !26 = metadata !{i32 6, i32 4, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !5, i32 5, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
index 0d694da8df5a..60d66eae4953 100644
--- a/test/DebugInfo/X86/lit.local.cfg
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.ll']
+config.suffixes = ['.ll', '.s']
 
 targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index f9d9b9171493..4dd5aafe18ea 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Check that we use DW_AT_low_pc
 
@@ -14,18 +14,16 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !12}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"q", metadata !"q", metadata !"_Z1qv", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !12}
+!5 = metadata !{i32 786478, metadata !"_Z1qv", i32 0, metadata !6, metadata !"q", metadata !"q", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{metadata !11}
 !11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"t", metadata !"t", metadata !"", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 786478, metadata !"", i32 0, metadata !6, metadata !"t", metadata !"t", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 7, i32 1, metadata !14, null}
 !14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/main-file-name.s b/test/DebugInfo/X86/main-file-name.s
new file mode 100644
index 000000000000..0369c6158a43
--- /dev/null
+++ b/test/DebugInfo/X86/main-file-name.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64-unknown-linux-gnu -filetype obj -main-file-name foo.S -g -o %t %s
+// RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+// CHECK: DW_TAG_compile_unit [1]
+// CHECK-NOT: DW_TAG_
+// CHECK: DW_AT_name [DW_FORM_string]       ("foo.S")
+        
+
+# 1 "foo.S"
+# 1 "<built-in>" 1
+# 1 "foo.S" 2
+
+foo:
+  nop
+  nop
+  nop
+        
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
new file mode 100644
index 000000000000..0980e23b7517
--- /dev/null
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -0,0 +1,174 @@
+; RUN: llc %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t -enable-misched
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; rdar://13183203
+; Make sure when misched is enabled, we still have location information for
+; function parameters.
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_subprogram
+; CHECK: Proc8
+; CHECK: DW_TAG_formal_parameter
+; CHECK: Array1Par
+; CHECK: DW_AT_location
+; CHECK: DW_TAG_formal_parameter
+; CHECK: Array2Par
+; CHECK: DW_AT_location
+; CHECK: DW_TAG_formal_parameter
+; CHECK: IntParI1
+; CHECK: DW_AT_location
+; CHECK: DW_TAG_formal_parameter
+; CHECK: IntParI2
+; CHECK: DW_AT_location
+
+%struct.Record = type { %struct.Record*, i32, i32, i32, [31 x i8] }
+
+@Version = global [4 x i8] c"1.1\00", align 1
+@IntGlob = common global i32 0, align 4
+@BoolGlob = common global i32 0, align 4
+@Char1Glob = common global i8 0, align 1
+@Char2Glob = common global i8 0, align 1
+@Array1Glob = common global [51 x i32] zeroinitializer, align 16
+@Array2Glob = common global [51 x [51 x i32]] zeroinitializer, align 16
+@PtrGlb = common global %struct.Record* null, align 8
+@PtrGlbNext = common global %struct.Record* null, align 8
+
+define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %Array1Par}, i64 0, metadata !23), !dbg !64
+  tail call void @llvm.dbg.value(metadata !{[51 x i32]* %Array2Par}, i64 0, metadata !24), !dbg !65
+  tail call void @llvm.dbg.value(metadata !{i32 %IntParI1}, i64 0, metadata !25), !dbg !66
+  tail call void @llvm.dbg.value(metadata !{i32 %IntParI2}, i64 0, metadata !26), !dbg !67
+  %add = add i32 %IntParI1, 5, !dbg !68
+  tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !27), !dbg !68
+  %idxprom = sext i32 %add to i64, !dbg !69
+  %arrayidx = getelementptr inbounds i32* %Array1Par, i64 %idxprom, !dbg !69
+  store i32 %IntParI2, i32* %arrayidx, align 4, !dbg !69, !tbaa !70
+  %add3 = add nsw i32 %IntParI1, 6, !dbg !73
+  %idxprom4 = sext i32 %add3 to i64, !dbg !73
+  %arrayidx5 = getelementptr inbounds i32* %Array1Par, i64 %idxprom4, !dbg !73
+  store i32 %IntParI2, i32* %arrayidx5, align 4, !dbg !73, !tbaa !70
+  %add6 = add nsw i32 %IntParI1, 35, !dbg !74
+  %idxprom7 = sext i32 %add6 to i64, !dbg !74
+  %arrayidx8 = getelementptr inbounds i32* %Array1Par, i64 %idxprom7, !dbg !74
+  store i32 %add, i32* %arrayidx8, align 4, !dbg !74, !tbaa !70
+  tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !28), !dbg !75
+  br label %for.body, !dbg !75
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %idxprom, %entry ], [ %indvars.iv.next, %for.body ]
+  %IntIndex.046 = phi i32 [ %add, %entry ], [ %inc, %for.body ]
+  %arrayidx13 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %indvars.iv, !dbg !77
+  store i32 %add, i32* %arrayidx13, align 4, !dbg !77, !tbaa !70
+  %inc = add nsw i32 %IntIndex.046, 1, !dbg !75
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !28), !dbg !75
+  %cmp = icmp sgt i32 %inc, %add3, !dbg !75
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !75
+  br i1 %cmp, label %for.end, label %for.body, !dbg !75
+
+for.end:                                          ; preds = %for.body
+  %sub = add nsw i32 %IntParI1, 4, !dbg !78
+  %idxprom14 = sext i32 %sub to i64, !dbg !78
+  %arrayidx17 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %idxprom14, !dbg !78
+  %0 = load i32* %arrayidx17, align 4, !dbg !78, !tbaa !70
+  %inc18 = add nsw i32 %0, 1, !dbg !78
+  store i32 %inc18, i32* %arrayidx17, align 4, !dbg !78, !tbaa !70
+  %1 = load i32* %arrayidx, align 4, !dbg !79, !tbaa !70
+  %add22 = add nsw i32 %IntParI1, 25, !dbg !79
+  %idxprom23 = sext i32 %add22 to i64, !dbg !79
+  %arrayidx25 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom23, i64 %idxprom, !dbg !79
+  store i32 %1, i32* %arrayidx25, align 4, !dbg !79, !tbaa !70
+  store i32 5, i32* @IntGlob, align 4, !dbg !80, !tbaa !70
+  ret void, !dbg !81
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+attributes #0 = { nounwind optsize ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
+!3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ]
+!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9}
+!5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
+!6 = metadata !{i32 786472, metadata !"Ident2", i64 10000} ; [ DW_TAG_enumerator ] [Ident2 :: 10000]
+!7 = metadata !{i32 786472, metadata !"Ident3", i64 10001} ; [ DW_TAG_enumerator ] [Ident3 :: 10001]
+!8 = metadata !{i32 786472, metadata !"Ident4", i64 10002} ; [ DW_TAG_enumerator ] [Ident4 :: 10002]
+!9 = metadata !{i32 786472, metadata !"Ident5", i64 10003} ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
+!10 = metadata !{i32 0}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
+!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21}
+!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 786465, i64 0, i64 51}       ; [ DW_TAG_subrange_type ] [0, 50]
+!21 = metadata !{i32 786454, metadata !82, null, metadata !"OneToFifty", i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
+!22 = metadata !{metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28}
+!23 = metadata !{i32 786689, metadata !12, metadata !"Array1Par", metadata !3, i32 16777397, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array1Par] [line 181]
+!24 = metadata !{i32 786689, metadata !12, metadata !"Array2Par", metadata !3, i32 33554614, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array2Par] [line 182]
+!25 = metadata !{i32 786689, metadata !12, metadata !"IntParI1", metadata !3, i32 50331831, metadata !21, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [IntParI1] [line 183]
+!26 = metadata !{i32 786689, metadata !12, metadata !"IntParI2", metadata !3, i32 67109048, metadata !21, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [IntParI2] [line 184]
+!27 = metadata !{i32 786688, metadata !12, metadata !"IntLoc", metadata !3, i32 186, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntLoc] [line 186]
+!28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
+!29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63}
+!30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
+!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!32 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!33 = metadata !{metadata !34}
+!34 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!35 = metadata !{i32 786484, i32 0, null, metadata !"IntGlob", metadata !"IntGlob", metadata !"", metadata !3, i32 171, metadata !16, i32 0, i32 1, i32* @IntGlob, null} ; [ DW_TAG_variable ] [IntGlob] [line 171] [def]
+!36 = metadata !{i32 786484, i32 0, null, metadata !"BoolGlob", metadata !"BoolGlob", metadata !"", metadata !3, i32 172, metadata !37, i32 0, i32 1, i32* @BoolGlob, null} ; [ DW_TAG_variable ] [BoolGlob] [line 172] [def]
+!37 = metadata !{i32 786454, metadata !82, null, metadata !"boolean", i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int]
+!38 = metadata !{i32 786484, i32 0, null, metadata !"Char1Glob", metadata !"Char1Glob", metadata !"", metadata !3, i32 173, metadata !32, i32 0, i32 1, i8* @Char1Glob, null} ; [ DW_TAG_variable ] [Char1Glob] [line 173] [def]
+!39 = metadata !{i32 786484, i32 0, null, metadata !"Char2Glob", metadata !"Char2Glob", metadata !"", metadata !3, i32 174, metadata !32, i32 0, i32 1, i8* @Char2Glob, null} ; [ DW_TAG_variable ] [Char2Glob] [line 174] [def]
+!40 = metadata !{i32 786484, i32 0, null, metadata !"Array1Glob", metadata !"Array1Glob", metadata !"", metadata !3, i32 175, metadata !41, i32 0, i32 1, [51 x i32]* @Array1Glob, null} ; [ DW_TAG_variable ] [Array1Glob] [line 175] [def]
+!41 = metadata !{i32 786454, metadata !82, null, metadata !"Array1Dim", i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
+!42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
+!43 = metadata !{i32 786454, metadata !82, null, metadata !"Array2Dim", i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
+!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
+!45 = metadata !{metadata !20, metadata !20}
+!46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
+!47 = metadata !{i32 786454, metadata !82, null, metadata !"RecordPtr", i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
+!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
+!49 = metadata !{i32 786454, metadata !82, null, metadata !"RecordType", i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
+!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ]
+!51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58}
+!52 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"PtrComp", i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
+!53 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
+!54 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"Discr", i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration]
+!55 = metadata !{i32 786454, metadata !82, null, metadata !"Enumeration", i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ]
+!56 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"EnumComp", i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration]
+!57 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"IntComp", i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
+!58 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"StringComp", i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
+!59 = metadata !{i32 786454, metadata !82, null, metadata !"String30", i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
+!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
+!61 = metadata !{metadata !62}
+!62 = metadata !{i32 786465, i64 0, i64 31}       ; [ DW_TAG_subrange_type ] [0, 30]
+!63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
+!64 = metadata !{i32 181, i32 0, metadata !12, null}
+!65 = metadata !{i32 182, i32 0, metadata !12, null}
+!66 = metadata !{i32 183, i32 0, metadata !12, null}
+!67 = metadata !{i32 184, i32 0, metadata !12, null}
+!68 = metadata !{i32 189, i32 0, metadata !12, null}
+!69 = metadata !{i32 190, i32 0, metadata !12, null}
+!70 = metadata !{metadata !"int", metadata !71}
+!71 = metadata !{metadata !"omnipotent char", metadata !72}
+!72 = metadata !{metadata !"Simple C/C++ TBAA"}
+!73 = metadata !{i32 191, i32 0, metadata !12, null}
+!74 = metadata !{i32 192, i32 0, metadata !12, null}
+!75 = metadata !{i32 193, i32 0, metadata !76, null}
+!76 = metadata !{i32 786443, metadata !12, i32 193, i32 0, metadata !3, i32 0} ; [ DW_TAG_lexical_block ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c]
+!77 = metadata !{i32 194, i32 0, metadata !76, null}
+!78 = metadata !{i32 195, i32 0, metadata !12, null}
+!79 = metadata !{i32 196, i32 0, metadata !12, null}
+!80 = metadata !{i32 197, i32 0, metadata !12, null}
+!81 = metadata !{i32 198, i32 0, metadata !12, null}
+!82 = metadata !{metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2"}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
new file mode 100644
index 000000000000..f6ca10bcc4ca
--- /dev/null
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -0,0 +1,61 @@
+; RUN: llc -O0 %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; rdar://13071590
+; Check we are not emitting mutliple AT_const_value for a single member.
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_class_type
+; CHECK: DW_TAG_member
+; CHECK: badbit
+; CHECK: DW_AT_const_value [DW_FORM_data4]	(0x00000001)
+; CHECK-NOT: DW_AT_const_value
+; CHECK: NULL
+
+%"class.std::basic_ostream" = type { i32 (...)**, %"class.std::basic_os" }
+%"class.std::basic_os" = type { %"class.std::os_base", %"class.std::basic_ostream"*, i8, i8 }
+%"class.std::os_base" = type { i32 (...)**, i64, i64, i32, i32, i32 }
+
+@_ZSt4cout = external global %"class.std::basic_ostream"
+@.str = private unnamed_addr constant [6 x i8] c"c is \00", align 1
+
+define i32 @main() {
+entry:
+  %call1.i = tail call %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"* @_ZSt4cout, i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i64 5)
+  ret i32 0
+}
+
+declare %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"*, i8*, i64)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !26}
+!4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
+!5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ]
+!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32}
+!28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
+!29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
+!30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
+!31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
+!32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
+!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ]
+!50 = metadata !{metadata !77}
+!54 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!55 = metadata !{metadata !56}
+!56 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!77 = metadata !{i32 786445, metadata !1801, metadata !49, metadata !"badbit", i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
+!78 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ]
+!79 = metadata !{i32 786454, metadata !49, metadata !"ostate", metadata !5, i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ]
+!955 = metadata !{i32 0}
+!956 = metadata !{metadata !960}
+!960 = metadata !{i32 786478, i32 0, metadata !961, metadata !"main", metadata !"main", metadata !"", metadata !961, i32 73, metadata !54, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !955, i32 73} ; [ DW_TAG_subprogram ]
+!961 = metadata !{i32 786473, metadata !1802} ; [ DW_TAG_file_type ]
+!1786 = metadata !{metadata !1800}
+!1800 = metadata !{i32 786484, i32 0, metadata !5, metadata !"badbit", metadata !"badbit", metadata !"badbit", metadata !5, i32 331, metadata !78, i32 1, i32 1, i32 1, metadata !77} ; [ DW_TAG_variable ]
+!1801 = metadata !{metadata !"os_base.h", metadata !"/privite/tmp"}
+!1802 = metadata !{metadata !"student2.cpp", metadata !"/privite/tmp"}
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
new file mode 100644
index 000000000000..33a6f8ba9e50
--- /dev/null
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+%class.A = type { [42 x i32] }
+
+@a = global %class.A zeroinitializer, align 4
+
+; Check that we can handle non-default array bounds. In this case, the array
+; goes from [-3, 38].
+
+; CHECK:      0x0000002d:   DW_TAG_base_type [3]
+; CHECK-NEXT: 0x0000002e:     DW_AT_name [DW_FORM_strp]       ( .debug_str[0x00000041] = "int")
+; CHECK-NEXT: 0x00000032:     DW_AT_byte_size [DW_FORM_data1] (0x04)
+; CHECK-NEXT: 0x00000033:     DW_AT_encoding [DW_FORM_data1]  (0x05)
+
+; CHECK:      0x00000034:   DW_TAG_array_type [4] *
+; CHECK-NEXT: 0x00000035:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+
+; CHECK:      0x00000039:     DW_TAG_subrange_type [5]
+; CHECK-NEXT: 0x0000003a:       DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
+; CHECK-NEXT: 0x0000003e:       DW_AT_lower_bound [DW_FORM_data8]       (0xfffffffffffffffd)
+; CHECK-NEXT: 0x00000046:       DW_AT_upper_bound [DW_FORM_data1]       (0x26)
+
+; CHECK:      0x00000055:     DW_TAG_member [8]
+; CHECK-NEXT: 0x00000056:       DW_AT_name [DW_FORM_strp]       ( .debug_str[0x0000003f] = "x")
+; CHECK-NEXT: 0x0000005a:       DW_AT_type [DW_FORM_ref4]       (cu + 0x0034 => {0x00000034})
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39]
+!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 1a815f936c17..1847d2c10fdf 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: 0x00000027:   DW_TAG_structure_type
 ; CHECK: 0x0000002c:     DW_AT_declaration
@@ -12,16 +12,15 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10, !11, !12}
 
-!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, null, metadata !"FooBarBaz", metadata !6, i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
 !9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
 !10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
 !11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!13 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index c84b2e6931f2..3bb93e7251b8 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000067] = "vla")
 ; FIXME: The location here needs to be fixed, but llvm-dwarfdump doesn't handle
@@ -59,31 +59,30 @@ declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !11 = metadata !{i32 1, i32 26, metadata !5, null}
 !12 = metadata !{i32 3, i32 13, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !5, i32 2, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 786443, metadata !6, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 3, i32 7, metadata !13, null}
 !19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !20 = metadata !{i32 4, i32 7, metadata !13, null}
 !21 = metadata !{i32 5, i32 8, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !13, i32 5, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !6, metadata !13, i32 5, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 6, i32 5, metadata !24, null}
-!24 = metadata !{i32 786443, metadata !22, i32 5, i32 27, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 786443, metadata !6, metadata !22, i32 5, i32 27, i32 2} ; [ DW_TAG_lexical_block ]
 !25 = metadata !{i32 7, i32 3, metadata !24, null}
 !26 = metadata !{i32 5, i32 22, metadata !22, null}
 !27 = metadata !{i32 8, i32 1, metadata !13, null}
+!28 = metadata !{metadata !"bar.c", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index f11fbe4cc5f5..aa560587a602 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: ptr
 ; CHECK-NOT: AT_bit_size
@@ -10,16 +10,15 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 147882)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !"foo.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720915, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !13} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 720909, metadata !7, metadata !"ptr", metadata !6, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786445, metadata !13, metadata !7, metadata !"ptr", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !"foo.c", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 5a001eea75a1..61df4ad0baa6 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; test that the DW_AT_specification is a back edge in the file.
 
@@ -31,35 +31,34 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !20}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !20}
+!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 720942, i32 0, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 720942, metadata !6, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !14 = metadata !{null, metadata !15}
-!15 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !16 = metadata !{metadata !17}
 !17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
 !18 = metadata !{metadata !19}
 !19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ]
 !21 = metadata !{metadata !22}
 !22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!23 = metadata !{i32 721153, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !24 = metadata !{i32 4, i32 15, metadata !5, null}
 !25 = metadata !{i32 4, i32 20, metadata !26, null}
-!26 = metadata !{i32 720907, metadata !5, i32 4, i32 18, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 18, i32 0} ; [ DW_TAG_lexical_block ]
 !27 = metadata !{i32 4, i32 30, metadata !26, null}
-!28 = metadata !{i32 721153, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
 !29 = metadata !{i32 2, i32 8, metadata !20, null}
 !30 = metadata !{i32 2, i32 15, metadata !31, null}
-!31 = metadata !{i32 720907, metadata !20, i32 2, i32 14, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 786443, metadata !6, metadata !20, i32 2, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
+!32 = metadata !{metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build"}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index abb946d51477..295c018c5e13 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -77,12 +77,10 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
-!5 = metadata !{i32 786478, i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
+!0 = metadata !{i32 786449, i32 4, metadata !159, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
+!5 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
@@ -93,32 +91,32 @@ entry:
 !13 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
 !14 = metadata !{i32 786434, null, metadata !"BPLModuleWriter", metadata !6, i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ]
 !15 = metadata !{metadata !16}
-!16 = metadata !{i32 786478, i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 786478, metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !18 = metadata !{null, metadata !19, metadata !20}
 !19 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
 !20 = metadata !{i32 786434, null, metadata !"function<void ()>", metadata !6, i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97} ; [ DW_TAG_class_type ]
 !21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92}
-!22 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
 !23 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !24 = metadata !{null, metadata !25, metadata !26}
 !25 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
 !26 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null} ; [ DW_TAG_class_type ]
 !27 = metadata !{metadata !28, metadata !35, metadata !41}
-!28 = metadata !{i32 786478, i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
+!28 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !30 = metadata !{null, metadata !31}
 !31 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
 !32 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
 !33 = metadata !{metadata !34}
 !34 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 786478, i32 0, metadata !26, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
+!35 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"~", metadata !"~", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
 !36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !37 = metadata !{null, metadata !38}
 !38 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
 !39 = metadata !{metadata !40}
 !40 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!41 = metadata !{i32 786478, i32 0, metadata !26, metadata !"", metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
+!41 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
 !42 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !43 = metadata !{null, metadata !38, metadata !44}
 !44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ]
@@ -128,32 +126,32 @@ entry:
 !48 = metadata !{i32 786479, null, metadata !"_Functor", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !49 = metadata !{metadata !50}
 !50 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!51 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
+!51 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
 !52 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !53 = metadata !{null, metadata !25, metadata !20}
 !54 = metadata !{metadata !55}
 !55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !56 = metadata !{metadata !57}
 !57 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!58 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
+!58 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
 !59 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !60 = metadata !{null, metadata !25, metadata !61}
 !61 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null} ; [ DW_TAG_class_type ]
 !62 = metadata !{metadata !63, metadata !70, metadata !76}
-!63 = metadata !{i32 786478, i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
+!63 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
 !64 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !65 = metadata !{null, metadata !66}
 !66 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
 !67 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
 !68 = metadata !{metadata !69}
 !69 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!70 = metadata !{i32 786478, i32 0, metadata !61, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
+!70 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"~", metadata !"~", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
 !71 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !72 = metadata !{null, metadata !73}
 !73 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
 !74 = metadata !{metadata !75}
 !75 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!76 = metadata !{i32 786478, i32 0, metadata !61, metadata !"", metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
+!76 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
 !77 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !78 = metadata !{null, metadata !73, metadata !79}
 !79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ]
@@ -163,13 +161,13 @@ entry:
 !83 = metadata !{i32 786479, null, metadata !"_Functor", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
 !84 = metadata !{metadata !85}
 !85 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!86 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function", metadata !"function", metadata !"", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
+!86 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function", metadata !"function", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
 !87 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !88 = metadata !{null, metadata !25, metadata !89}
 !89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ]
 !90 = metadata !{metadata !91}
 !91 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!92 = metadata !{i32 786478, i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !"", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
+!92 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
 !93 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !94 = metadata !{null, metadata !25}
 !95 = metadata !{metadata !96}
@@ -180,20 +178,20 @@ entry:
 !100 = metadata !{null}
 !101 = metadata !{metadata !102}
 !102 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!103 = metadata !{i32 786478, i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
+!103 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
 !104 = metadata !{metadata !105}
 !105 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!106 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!107 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!106 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!107 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
 !108 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !109 = metadata !{null, metadata !110}
 !110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ]
 !111 = metadata !{metadata !112}
 !112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!113 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
+!113 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
 !114 = metadata !{i32 786434, null, metadata !"_Base_manager", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null} ; [ DW_TAG_class_type ]
 !115 = metadata !{metadata !116, metadata !113}
-!116 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
+!116 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
 !117 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !118 = metadata !{null, metadata !119}
 !119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ]
@@ -203,10 +201,9 @@ entry:
 !123 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
 !124 = metadata !{metadata !125}
 !125 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!126 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!127 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!128 = metadata !{metadata !129}
-!129 = metadata !{metadata !130}
+!126 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
+!127 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
+!128 = metadata !{metadata !130}
 !130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true} ; [ DW_TAG_variable ]
 !131 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
 !132 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
@@ -236,3 +233,4 @@ entry:
 !156 = metadata !{i32 10, i32 13, metadata !155, null}
 !157 = metadata !{i32 4, i32 5, metadata !158, null}
 !158 = metadata !{i32 786443, metadata !127, i32 3, i32 105, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
+!159 = metadata !{i32 786473, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
new file mode 100644
index 000000000000..34956237ae0b
--- /dev/null
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -o %t -filetype=obj -mtriple=x86_64-unknown-linux-gnu
+; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
+; PR13303
+
+; Check that the prologue ends with is_stmt here.
+; CHECK: 0x0000000000000000 {{.*}} is_stmt
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 1, i32 14, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
diff --git a/test/DebugInfo/X86/pr9951.ll b/test/DebugInfo/X86/pr9951.ll
index 7716cd7c6c1c..cb348e2c9adc 100644
--- a/test/DebugInfo/X86/pr9951.ll
+++ b/test/DebugInfo/X86/pr9951.ll
@@ -5,14 +5,15 @@ entry:
   ret i32 42
 }
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
+!6 = metadata !{metadata !0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @f, null, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build-rust2", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !6, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 
 
 ; CHECK:      _f:                                     ## @f
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index 929db5190267..6e4917747c14 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -20,16 +20,14 @@ declare i32 @callme(i32)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
 !6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
 !10 = metadata !{i32 5, i32 3, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
 !12 = metadata !{i32 6, i32 3, metadata !11, null}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index e73869dbe07a..ae2e3d4578c5 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj -O0
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; CHECK: DW_TAG_rvalue_reference_type
 
@@ -22,13 +22,11 @@ declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null, metadata !9}
 !9 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_rvalue_reference_type ]
@@ -36,5 +34,6 @@ declare i32 @printf(i8*, ...)
 !11 = metadata !{i32 786689, metadata !5, metadata !"i", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
 !12 = metadata !{i32 4, i32 17, metadata !5, null}
 !13 = metadata !{i32 6, i32 3, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 7, i32 1, metadata !14, null}
+!16 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
new file mode 100644
index 000000000000..39a026c35494
--- /dev/null
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -0,0 +1,67 @@
+; RUN: llc -O0 %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; rdar://13067005
+; CHECK: .debug_info contents:
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_AT_stmt_list [DW_FORM_data4]   (0x00000000)
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_AT_stmt_list [DW_FORM_data4]   (0x0000003c)
+
+; CHECK: .debug_line contents:
+; CHECK-NEXT: Line table prologue:
+; CHECK-NEXT: total_length: 0x00000038
+; CHECK: file_names[  1]    0 0x00000000 0x00000000 simple.c
+; CHECK: Line table prologue:
+; CHECK-NEXT: total_length: 0x00000039
+; CHECK: file_names[  1]    0 0x00000000 0x00000000 simple2.c
+; CHECK-NOT: file_names
+
+define i32 @test(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !15), !dbg !16
+  %0 = load i32* %a.addr, align 4, !dbg !17
+  %call = call i32 @fn(i32 %0), !dbg !17
+  ret i32 %call, !dbg !17
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @fn(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !19), !dbg !20
+  %0 = load i32* %a.addr, align 4, !dbg !21
+  ret i32 %0, !dbg !21
+}
+
+!llvm.dbg.cu = !{!0, !10}
+!0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !23, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
+!6 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!11 = metadata !{metadata !13}
+!13 = metadata !{i32 786478, metadata !24, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
+!14 = metadata !{i32 786473, metadata !24} ; [ DW_TAG_file_type ]
+!15 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777218, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 2]
+!16 = metadata !{i32 2, i32 0, metadata !5, null}
+!17 = metadata !{i32 4, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !23, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786689, metadata !13, metadata !"a", metadata !14, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!20 = metadata !{i32 1, i32 0, metadata !13, null}
+!21 = metadata !{i32 2, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !24, metadata !13, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{metadata !"simple.c", metadata !"/private/tmp"}
+!24 = metadata !{metadata !"simple2.c", metadata !"/private/tmp"}
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index 145649bf4592..4c8521f5d805 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -10,10 +10,11 @@ entry:
   ret void
 }
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
+!5 = metadata !{metadata !0}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"test2.c", metadata !"/home/espindola/llvm", metadata !"clang version 3.0 ()", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !5, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index caf12c2756e0..8df281d08ea6 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -5,17 +5,16 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"z.c", metadata !"/home/nicholas", metadata !"clang version 3.1 (trunk 143009)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !"z.c", metadata !"/home/nicholas", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{metadata !"z.c", metadata !"/home/nicholas"}
 
 ; Verify that we refer to 'yyyy' with a relocation.
-; LINUX:      .long   .Lstring3               # DW_AT_name
+; LINUX:      .long   .Linfo_string3          # DW_AT_name
 ; LINUX-NEXT: .long   38                      # DW_AT_type
 ; LINUX-NEXT:                                 # DW_AT_external
 ; LINUX-NEXT: .byte   1                       # DW_AT_decl_file
@@ -25,7 +24,7 @@
 ; LINUX-NEXT: .quad   yyyy
 
 ; Verify that we refer to 'yyyy' without a relocation.
-; DARWIN: Lset5 = Lstring3-Lsection_str               ## DW_AT_name
+; DARWIN: Lset5 = Linfo_string3-Linfo_string          ## DW_AT_name
 ; DARWIN-NEXT:        .long   Lset5
 ; DARWIN-NEXT:        .long   39                      ## DW_AT_type
 ; DARWIN-NEXT:        .byte   1                       ## DW_AT_external
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 9a047388207a..bdf104f07e39 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
 ; Make sure that structures have a decl file and decl line attached.
 ; CHECK: DW_TAG_structure_type [3]
@@ -13,14 +13,13 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 786473, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786445, metadata !11, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{metadata !"struct_bug.c", metadata !"/Users/echristo/tmp"}
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
new file mode 100644
index 000000000000..efc5bf0417f1
--- /dev/null
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -0,0 +1,38 @@
+; RUN: llc -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+; Make sure that the base type from the subrange type has a name.
+; CHECK: 0x0000006b:   DW_TAG_base_type [6]
+; CHECK-NEXT: DW_AT_name
+; CHECK: DW_TAG_subrange_type [8]
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]     (cu + 0x006b => {0x0000006b})
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca [2 x i32], align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{[2 x i32]* %i}, metadata !10), !dbg !15
+  ret i32 0, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
+!6 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786688, metadata !11, metadata !"i", metadata !6, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 4]
+!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
+!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
+!15 = metadata !{i32 4, i32 0, metadata !11, null}
+!16 = metadata !{i32 6, i32 0, metadata !11, null}
+!17 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index 1c4456f4c5b4..027589b3d995 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -16,12 +16,15 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
-!0 = metadata !{i32 590081, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589841, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+!9 = metadata !{metadata !1}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null}
-!6 = metadata !{i32 589860, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 4, i32 22, metadata !8, null}
-!8 = metadata !{i32 589835, metadata !1, i32 3, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786443, metadata !2, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
new file mode 100644
index 000000000000..0f5538e8b40e
--- /dev/null
+++ b/test/DebugInfo/X86/union-template.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O0 -mtriple=x86_64-linux-gnu %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+; Verify that we've emitted template arguments for the union
+; CHECK: DW_TAG_union_type
+; CHECK-NEXT: "Value<float>"
+; CHECK: DW_TAG_template_type_parameter
+; CHECK: "T"
+
+%"union.PR15637::Value" = type { i32 }
+
+@_ZN7PR156371fE = global %"union.PR15637::Value" zeroinitializer, align 4
+
+define void @_ZN7PR156371gEf(float %value) #0 {
+entry:
+  %value.addr = alloca float, align 4
+  %tempValue = alloca %"union.PR15637::Value", align 4
+  store float %value, float* %value.addr, align 4
+  call void @llvm.dbg.declare(metadata !{float* %value.addr}, metadata !23), !dbg !24
+  call void @llvm.dbg.declare(metadata !{%"union.PR15637::Value"* %tempValue}, metadata !25), !dbg !26
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo/tmp"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"g", metadata !"g", metadata !"_ZN7PR156371gEf", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (float)* @_ZN7PR156371gEf, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [g]
+!5 = metadata !{i32 786489, metadata !1, null, metadata !"PR15637", i32 1} ; [ DW_TAG_namespace ] [PR15637] [line 1]
+!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 786484, i32 0, metadata !5, metadata !"f", metadata !"f", metadata !"_ZN7PR156371fE", metadata !11, i32 6, metadata !12, i32 0, i32 1, %"union.PR15637::Value"* @_ZN7PR156371fE, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cc]
+!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [from ]
+!13 = metadata !{metadata !14, metadata !16}
+!14 = metadata !{i32 786445, metadata !1, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!16 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"Value", metadata !"Value", metadata !"", i32 2, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !20, i32 2} ; [ DW_TAG_subprogram ] [line 2] [Value]
+!17 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = metadata !{null, metadata !19}
+!19 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
+!20 = metadata !{i32 786468}
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786479, null, metadata !"T", metadata !8, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
+!23 = metadata !{i32 786689, metadata !4, metadata !"value", metadata !11, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 3]
+!24 = metadata !{i32 3, i32 0, metadata !4, null}
+!25 = metadata !{i32 786688, metadata !4, metadata !"tempValue", metadata !11, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tempValue] [line 4]
+!26 = metadata !{i32 4, i32 0, metadata !4, null}
+!27 = metadata !{i32 5, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
new file mode 100644
index 000000000000..570adf9e4329
--- /dev/null
+++ b/test/DebugInfo/X86/vector.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=x86_64-linux-gnu -O0 -filetype=obj -o %t %s
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+; Generated from:
+; clang -g -S -emit-llvm -o foo.ll foo.c
+; typedef int v4si __attribute__((__vector_size__(16)));
+;
+; v4si a
+
+@a = common global <4 x i32> zeroinitializer, align 16
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
+!6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786454, metadata !12, null, metadata !"v4si", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
+!12 = metadata !{metadata !"foo.c", metadata !"/Users/echristo"}
+
+; Check that we get an array type with a vector attribute.
+; CHECK: DW_TAG_array_type
+; CHECK-NEXT: DW_AT_GNU_vector
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 9f592a12a923..30771104912f 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -12,23 +12,25 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"array.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"array.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129138)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, null} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590080, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 589835, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786688, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
+!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !9 = metadata !{metadata !10}
 ;CHECK: DW_TAG_subrange_type
 ;CHECK-NEXT: DW_AT_type
 ;CHECK-NOT: DW_AT_lower_bound
 ;CHECK-NOT: DW_AT_upper_bound
 ;CHECK-NEXT: End Of Children Mark
-!10 = metadata !{i32 589857, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !11 = metadata !{i32 4, i32 7, metadata !7, null}
 !12 = metadata !{i32 5, i32 3, metadata !7, null}
+!13 = metadata !{metadata !0}
+!14 = metadata !{metadata !"array.c", metadata !"/private/tmp"}
diff --git a/test/DebugInfo/debuglineinfo.test b/test/DebugInfo/debuglineinfo.test
new file mode 100644
index 000000000000..14d2f82b04ac
--- /dev/null
+++ b/test/DebugInfo/debuglineinfo.test
@@ -0,0 +1,49 @@
+RUN: llvm-rtdyld -printline %p/Inputs/test-inline.o \
+RUN:   | FileCheck %s -check-prefix TEST_INLINE
+RUN: llvm-rtdyld -printline %p/Inputs/test-parameters.o \
+RUN:   | FileCheck %s -check-prefix TEST_PARAMETERS
+
+; This test verifies that relocations are correctly applied to the
+; .debug_line section and exercises DIContext::getLineInfoForAddressRange().
+; If relocations are not applied the first two functions will be reported as
+; both starting at address zero in the; line number table.
+TEST_INLINE:      Function: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:33
+TEST_INLINE-NEXT: Line info @ 35: test-inline.cpp, line:34
+TEST_INLINE-NEXT: Line info @ 165: test-inline.cpp, line:35
+TEST_INLINE-NEXT: Function: _Z3foov, Size = 3
+TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:28
+TEST_INLINE-NEXT: Line info @ 2: test-inline.cpp, line:29
+TEST_INLINE-NEXT: Line info @ 3: test-inline.cpp, line:29
+TEST_INLINE-NEXT: Function: main, Size = 146
+TEST_INLINE-NEXT: Line info @ 0: test-inline.cpp, line:39
+TEST_INLINE-NEXT: Line info @ 21: test-inline.cpp, line:41
+TEST_INLINE-NEXT: Line info @ 39: test-inline.cpp, line:42
+TEST_INLINE-NEXT: Line info @ 60: test-inline.cpp, line:44
+TEST_INLINE-NEXT: Line info @ 80: test-inline.cpp, line:48
+TEST_INLINE-NEXT: Line info @ 90: test-inline.cpp, line:45
+TEST_INLINE-NEXT: Line info @ 95: test-inline.cpp, line:46
+TEST_INLINE-NEXT: Line info @ 114: test-inline.cpp, line:48 
+TEST_INLINE-NEXT: Line info @ 141: test-inline.cpp, line:49
+TEST_INLINE-NEXT: Line info @ 146: test-inline.cpp, line:49
+
+; This test checks the case where all code is in a single section.
+TEST_PARAMETERS:      Function: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+TEST_PARAMETERS-NEXT: Line info @ 0: test-parameters.cpp, line:33
+TEST_PARAMETERS-NEXT: Line info @ 35: test-parameters.cpp, line:34
+TEST_PARAMETERS-NEXT: Line info @ 165: test-parameters.cpp, line:35
+TEST_PARAMETERS-NEXT: Function: _Z3foov, Size = 3
+TEST_PARAMETERS-NEXT: Line info @ 0: test-parameters.cpp, line:28
+TEST_PARAMETERS-NEXT: Line info @ 2: test-parameters.cpp, line:29
+TEST_PARAMETERS-NEXT: Function: main, Size = 146
+TEST_PARAMETERS-NEXT: Line info @ 0: test-parameters.cpp, line:39
+TEST_PARAMETERS-NEXT: Line info @ 21: test-parameters.cpp, line:41
+TEST_PARAMETERS-NEXT: Line info @ 39: test-parameters.cpp, line:42
+TEST_PARAMETERS-NEXT: Line info @ 60: test-parameters.cpp, line:44
+TEST_PARAMETERS-NEXT: Line info @ 80: test-parameters.cpp, line:48
+TEST_PARAMETERS-NEXT: Line info @ 90: test-parameters.cpp, line:45
+TEST_PARAMETERS-NEXT: Line info @ 95: test-parameters.cpp, line:46
+TEST_PARAMETERS-NEXT: Line info @ 114: test-parameters.cpp, line:48 
+TEST_PARAMETERS-NEXT: Line info @ 141: test-parameters.cpp, line:49
+TEST_PARAMETERS-NEXT: Line info @ 146: test-parameters.cpp, line:49
+
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
new file mode 100644
index 000000000000..52b2397714e3
--- /dev/null
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -0,0 +1,125 @@
+; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
+; XFAIL: hexagon
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+;   void member_function();
+;   static int static_member_function();
+;   static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+;   static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+;   return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+;   return -1;
+; }
+;
+; namespace ns {
+;   void global_namespace_function() {
+;     global_variable.member_function();
+;   }
+;   int global_namespace_variable = 1;
+; }
+
+; Skip the output to the header of the pubnames section.
+; CHECK: debug_pubnames
+
+; Check for each name in the output.
+; CHECK: global_namespace_variable
+; CHECK: global_namespace_function
+; CHECK: static_member_function
+; CHECK: global_variable
+; CHECK: global_function
+; CHECK: member_function
+
+%struct.C = type { i8 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %struct.C*, align 8
+  store %struct.C* %this, %struct.C** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !28), !dbg !30
+  %this1 = load %struct.C** %this.addr
+  store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
+  ret void, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+entry:
+  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+  ret i32 %0, !dbg !33
+}
+
+define i32 @_Z15global_functionv() nounwind uwtable {
+entry:
+  ret i32 -1, !dbg !34
+}
+
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+entry:
+  call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
+  ret void, !dbg !36
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
+!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{null, metadata !7}
+!7 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ]
+!9 = metadata !{metadata !10, metadata !12, metadata !14}
+!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !11}
+!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null}
+!24 = metadata !{metadata !25, metadata !26, metadata !27}
+!25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
+!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!30 = metadata !{i32 9, i32 0, metadata !3, null}
+!31 = metadata !{i32 10, i32 0, metadata !3, null}
+!32 = metadata !{i32 11, i32 0, metadata !3, null}
+!33 = metadata !{i32 14, i32 0, metadata !18, null}
+!34 = metadata !{i32 20, i32 0, metadata !19, null}
+!35 = metadata !{i32 25, i32 0, metadata !20, null}
+!36 = metadata !{i32 26, i32 0, metadata !20, null}
+!37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
diff --git a/test/DebugInfo/dwarfdump-debug-frame-simple.test b/test/DebugInfo/dwarfdump-debug-frame-simple.test
new file mode 100644
index 000000000000..c2427d840e45
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-debug-frame-simple.test
@@ -0,0 +1,28 @@
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-32bit.elf.o -debug-dump=frames | FileCheck %s -check-prefix FRAMES
+; Note: the input file was generated from Inputs/dwarfdump-test-32bit.elf.c
+
+; FRAMES: .debug_frame
+; FRAMES-NOT: .eh_frame
+
+; FRAMES: 00000000 00000010 ffffffff CIE
+; FRAMES: Version: 1
+; FRAMES:      DW_CFA_def_cfa
+; FRAMES-NEXT: DW_CFA_offset
+; FRAMES-NEXT: DW_CFA_nop
+; FRAMES-NEXT: DW_CFA_nop
+
+; FRAMES: 00000014 00000010 00000000 FDE cie=00000000 pc=00000000...00000022
+; FRAMES:      DW_CFA_advance_loc
+; FRAMES-NEXT: DW_CFA_def_cfa_offset
+; FRAMES-NEXT: DW_CFA_nop
+
+; FRAMES: 00000028 00000014 00000000 FDE cie=00000000 pc=00000030...00000080
+; FRAMES:      DW_CFA_advance_loc
+; FRAMES-NEXT: DW_CFA_def_cfa_offset
+; FRAMES-NEXT: DW_CFA_offset
+; FRAMES-NEXT: DW_CFA_advance_loc
+; FRAMES-NEXT: DW_CFA_def_cfa_register
+
+; FRAMES-NOT: CIE
+; FRAMES-NOT: FDE
+
diff --git a/test/DebugInfo/dwarfdump-dump-flags.test b/test/DebugInfo/dwarfdump-dump-flags.test
new file mode 100644
index 000000000000..92b2d50f393b
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-dump-flags.test
@@ -0,0 +1,13 @@
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=all | FileCheck %s -check-prefix DUMP_ALL
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=info | FileCheck %s -check-prefix DUMP_INFO
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=ranges | FileCheck %s -check-prefix DUMP_RANGES
+
+; DUMP_ALL: .debug_info
+; DUMP_ALL: .debug_ranges
+
+; DUMP_INFO: .debug_info
+; DUMP_INFO-NOT: .debug_ranges
+
+; DUMP_RANGES-NOT: .debug_info
+; DUMP_RANGES: .debug_ranges
+
diff --git a/test/DebugInfo/dwarfdump-inlining.test b/test/DebugInfo/dwarfdump-inlining.test
index d3a7e12a8703..e926634d52f6 100644
--- a/test/DebugInfo/dwarfdump-inlining.test
+++ b/test/DebugInfo/dwarfdump-inlining.test
@@ -1,28 +1,28 @@
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x613 \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x710 \
 RUN:   --inlining --functions | FileCheck %s -check-prefix DEEP_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x6de \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x7d1 \
 RUN:   --inlining | FileCheck %s -check-prefix SHORTER_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x685 \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x785 \
 RUN:   --inlining | FileCheck %s -check-prefix SHORT_STACK
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x640 \
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x737 \
 RUN:   --functions | FileCheck %s -check-prefix INL_FUNC_NAME
 
 DEEP_STACK:      inlined_h
-DEEP_STACK-NEXT: header.h:2:21
+DEEP_STACK-NEXT: dwarfdump-inl-test.h:2
 DEEP_STACK-NEXT: inlined_g
-DEEP_STACK-NEXT: header.h:7
+DEEP_STACK-NEXT: dwarfdump-inl-test.h:7
 DEEP_STACK-NEXT: inlined_f
-DEEP_STACK-NEXT: main.cc:3
+DEEP_STACK-NEXT: dwarfdump-inl-test.cc:3
 DEEP_STACK-NEXT: main
-DEEP_STACK-NEXT: main.cc:8
+DEEP_STACK-NEXT: dwarfdump-inl-test.cc:8
 
-SHORTER_STACK:      header.h:7:20
-SHORTER_STACK-NEXT: main.cc:3
-SHORTER_STACK-NEXT: main.cc:8
+SHORTER_STACK:      dwarfdump-inl-test.h:7
+SHORTER_STACK-NEXT: dwarfdump-inl-test.cc:3
+SHORTER_STACK-NEXT: dwarfdump-inl-test.cc:8
 
-SHORT_STACK:      main.cc:3:20
-SHORT_STACK-NEXT: main.cc:8
+SHORT_STACK:      dwarfdump-inl-test.cc:3
+SHORT_STACK-NEXT: dwarfdump-inl-test.cc:8
 
 INL_FUNC_NAME:      inlined_g
-INL_FUNC_NAME-NEXT: header.h:7:20
+INL_FUNC_NAME-NEXT: dwarfdump-inl-test.h:7
 
diff --git a/test/DebugInfo/dwarfdump-pubnames.test b/test/DebugInfo/dwarfdump-pubnames.test
new file mode 100644
index 000000000000..e1b16c2f2741
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-pubnames.test
@@ -0,0 +1,16 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-pubnames.elf-x86-64 \
+RUN:   -debug-dump=pubnames | FileCheck %s
+
+CHECK: .debug_pubnames contents:
+CHECK: Length:                161
+CHECK: Version:               2
+CHECK: Offset in .debug_info: 0
+CHECK: Size:                  321
+
+CHECK:  Offset    Name
+CHECK:      98    global_namespace_variable
+CHECK:      a7    global_namespace_function
+CHECK:      ec    static_member_function
+CHECK:      7c    global_variable
+CHECK:     103    global_function
+CHECK:      c2    member_function
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
index 973c3447e340..058d6a36981a 100644
--- a/test/DebugInfo/dwarfdump-test.test
+++ b/test/DebugInfo/dwarfdump-test.test
@@ -1,56 +1,56 @@
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
-RUN:   --address=0x400589 --functions | FileCheck %s -check-prefix MAIN
+RUN:   --address=0x400559 --functions | FileCheck %s -check-prefix MAIN
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64  \
-RUN:   --address=0x400558 --functions | FileCheck %s -check-prefix FUNCTION
+RUN:   --address=0x400528 --functions | FileCheck %s -check-prefix FUNCTION
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \
-RUN:   --address=0x4005b6 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC
+RUN:   --address=0x400586 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
-RUN:   --address=0x4004b8 --functions | FileCheck %s -check-prefix MANY_CU_1
+RUN:   --address=0x4004e8 --functions | FileCheck %s -check-prefix MANY_CU_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \
-RUN:   --address=0x4004c4 --functions | FileCheck %s -check-prefix MANY_CU_2
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
-RUN:   --address=0x580 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
-RUN:   --address=0x573 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
-RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
-RUN:   --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
+RUN:   --address=0x4004f4 --functions | FileCheck %s -check-prefix MANY_CU_2
+RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
+RUN:   --address=0x640 --functions | FileCheck %s -check-prefix ABS_ORIGIN_1
+RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
+RUN:   --address=0x633 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
+RUN: llvm-dwarfdump "%p/Inputs/dwarfdump-test3.elf-x86-64 space" \
+RUN:   --address=0x62d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
-RUN:   --address=0x55c --functions \
+RUN:   --address=0x62c --functions \
 RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
 RUN:   | FileCheck %s -check-prefix DEBUG_RANGES
 
 MAIN: main
-MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10
+MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
 
 FUNCTION: _Z1fii
-FUNCTION-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11:18
+FUNCTION-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:11
 
-CTOR_WITH_SPEC: _ZN10DummyClassC1Ei
-CTOR_WITH_SPEC-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4:30
+CTOR_WITH_SPEC: DummyClass
+CTOR_WITH_SPEC-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:4
 
 MANY_CU_1: a
-MANY_CU_1-NEXT: /tmp/dbginfo{{[/\\]}}a.cc:2:0
+MANY_CU_1-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-helper.cc:2
 
 MANY_CU_2: main
-MANY_CU_2-NEXT: /tmp/dbginfo{{[/\\]}}main.cc:4:0
+MANY_CU_2-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test2-main.cc:4
 
 ABS_ORIGIN_1: C
-ABS_ORIGIN_1-NEXT: /tmp/dbginfo{{[/\\]}}def2.cc:4:0
+ABS_ORIGIN_1-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test3.cc:3
 
-INCLUDE_TEST_1: _Z3do2v
-INCLUDE_TEST_1-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}decl2.h:1:0
+INCLUDE_TEST_1: _Z3do1v
+INCLUDE_TEST_1-NEXT: /tmp/include{{[/\\]}}dwarfdump-test3-decl.h:7
 
-INCLUDE_TEST_2: _Z3do1v
-INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0
+INCLUDE_TEST_2: _Z3do2v
+INCLUDE_TEST_2-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}dwarfdump-test3-decl2.h:1
 
 MANY_SEQ_IN_LINE_TABLE: _Z1cv
-MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0
+MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
 
 DEBUG_RANGES:      .debug_ranges contents:
-DEBUG_RANGES-NEXT: 00000000 000000000000055c 0000000000000567
-DEBUG_RANGES-NEXT: 00000000 0000000000000567 000000000000056d
+DEBUG_RANGES-NEXT: 00000000 000000000000062c 0000000000000637
+DEBUG_RANGES-NEXT: 00000000 0000000000000637 000000000000063d
 DEBUG_RANGES-NEXT: 00000000 <End of list>
-DEBUG_RANGES-NEXT: 00000030 0000000000000570 000000000000057b
-DEBUG_RANGES-NEXT: 00000030 0000000000000567 000000000000056d
+DEBUG_RANGES-NEXT: 00000030 0000000000000640 000000000000064b
+DEBUG_RANGES-NEXT: 00000030 0000000000000637 000000000000063d
 DEBUG_RANGES-NEXT: 00000030 <End of list>
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index ed4e7da029e5..f302294031c0 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -4,8 +4,8 @@
 
 define i32 @main() uwtable {
 entry:
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !18), !dbg !21
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !22), !dbg !23
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !18), !dbg !21
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !22), !dbg !23
   tail call void @smth(i32 0), !dbg !24
   tail call void @smth(i32 0), !dbg !25
   ret i32 0, !dbg !19
@@ -17,18 +17,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", metadata !"clang version 3.2 (trunk 159419)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10}
+!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786478, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !9, metadata !9}
 !13 = metadata !{metadata !14}
 !14 = metadata !{metadata !15, metadata !16}
@@ -39,19 +37,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 ; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 
-!16 = metadata !{i32 786688, metadata !17, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 786688, metadata !10, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 
 ; Two DW_TAG_variable: one abstract and one inlined.
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
 
-!17 = metadata !{i32 786443, metadata !10, i32 3, i32 35, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
 !18 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, metadata !19} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 11, i32 10, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !5, i32 10, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 11, i32 10, metadata !5, null}
 !21 = metadata !{i32 3, i32 25, metadata !10, metadata !19}
-!22 = metadata !{i32 786688, metadata !17, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, metadata !19} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 4, i32 16, metadata !17, metadata !19}
-!24 = metadata !{i32 5, i32 3, metadata !17, metadata !19}
-!25 = metadata !{i32 6, i32 3, metadata !17, metadata !19}
+!22 = metadata !{i32 786688, metadata !10, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, metadata !19} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 4, i32 16, metadata !10, metadata !19}
+!24 = metadata !{i32 5, i32 3, metadata !10, metadata !19}
+!25 = metadata !{i32 6, i32 3, metadata !10, metadata !19}
+!26 = metadata !{metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"}
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
new file mode 100644
index 000000000000..842a5e603eb4
--- /dev/null
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -0,0 +1,25 @@
+RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400559" > %t.input
+RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
+RUN: echo '"%p/Inputs/dwarfdump-test3.elf-x86-64 space" 0x633' >> %t.input
+
+RUN: llvm-symbolizer --functions --inlining --demangle=false < %t.input \
+RUN:    | FileCheck %s
+
+REQUIRES: shell
+
+CHECK:       main
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
+CHECK:      _Z1cv
+CHECK-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test4-part1.cc:2
+CHECK:      inlined_h
+CHECK-NEXT: dwarfdump-inl-test.h:2
+CHECK-NEXT: inlined_g
+CHECK-NEXT: dwarfdump-inl-test.h:7
+CHECK-NEXT: inlined_f
+CHECK-NEXT: dwarfdump-inl-test.cc:3
+CHECK-NEXT: main
+CHECK-NEXT: dwarfdump-inl-test.cc:
+
+CHECK:       _Z3do1v
+CHECK-NEXT: dwarfdump-test3-decl.h:7
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
new file mode 100644
index 000000000000..4b77189563fe
--- /dev/null
+++ b/test/DebugInfo/member-pointers.ll
@@ -0,0 +1,36 @@
+; RUN: llc -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK: [[TYPE:.*]]:   DW_TAG_subroutine_type
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_type
+; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag_present]
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
+; IR generated from clang -g with the following source:
+; XFAIL: hexagon
+; struct S {
+; };
+;
+; int S::*x = 0;
+; void (S::*y)(int) = 0;
+
+@x = global i64 -1, align 8
+@y = global { i64, i64 } zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !10}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
+!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ]
+!10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
+!11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{null, metadata !14, metadata !8}
+!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
+!15 = metadata !{metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch"}
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
new file mode 100644
index 000000000000..8d59b523028a
--- /dev/null
+++ b/test/DebugInfo/namespace.ll
@@ -0,0 +1,42 @@
+; RUN: llc -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; CHECK: debug_info contents
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name{{.*}} = "A"
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1:[0-9]]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x03)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name{{.*}} = "B"
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2:[0-9]]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x01)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}= "i"
+; CHECK: file_names[  [[F1]]]{{.*}}debug-info-namespace.cpp
+; CHECK: file_names[  [[F2]]]{{.*}}foo.cpp
+
+; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths
+; changed to protect the guilty. The C++ source code is simply:
+; namespace A {
+; #line 1 "foo.cpp"
+; namespace B {
+; int i;
+; }
+; }
+
+@_ZN1A1B1iE = global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !4, metadata !""} ; [ DW_TAG_compile_unit ] [/home/foo/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/foo/debug-info-namespace.cpp]
+!2 = metadata !{metadata !"debug-info-namespace.cpp", metadata !"/home/foo"}
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, metadata !6, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !7, i32 2, metadata !10, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
+!6 = metadata !{i32 786489, metadata !8, metadata !9, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
+!7 = metadata !{i32 786473, metadata !8}          ; [ DW_TAG_file_type ] [/home/foo/foo.cpp]
+!8 = metadata !{metadata !"foo.cpp", metadata !"/home/foo"}
+!9 = metadata !{i32 786489, metadata !2, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll
deleted file mode 100644
index 396ae852266a..000000000000
--- a/test/DebugInfo/printdbginfo2.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: opt < %s -print-dbginfo -disable-output 2>&1 | FileCheck %s
-;  grep {%b is variable b of type x declared at x.c:7} %t1
-;  grep {%2 is variable b of type x declared at x.c:7} %t1
-;  grep {@c.1442 is variable c of type int declared at x.c:4} %t1
-
-%struct.foo = type { i32 }
-
-@main.c = internal global i32 5                   ; <i32*> [#uses=1]
-
-define i32 @main() nounwind {
-entry:
-  %retval = alloca i32                            ; <i32*> [#uses=3]
-  %b = alloca %struct.foo, align 4                ; <%struct.foo*> [#uses=2]
-; CHECK:; %b is variable b of type foo declared at x.c:7
-  %a = alloca [4 x i32], align 4                  ; <[4 x i32]*> [#uses=1]
-; CHECK:; %a is variable a of type  declared at x.c:8
-  call void @llvm.dbg.func.start(metadata !3)
-  store i32 0, i32* %retval
-  call void @llvm.dbg.stoppoint(i32 6, i32 3, metadata !1)
-  call void @llvm.dbg.stoppoint(i32 7, i32 3, metadata !1)
-  %0 = bitcast %struct.foo* %b to { }*            ; <{ }*> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{%struct.foo* %b}, metadata !4)
-; CHECK:; %0 is variable b of type foo declared at x.c:7
-  call void @llvm.dbg.stoppoint(i32 8, i32 3, metadata !1)
-  %1 = bitcast [4 x i32]* %a to { }*              ; <{ }*> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{[4 x i32]* %a}, metadata !8)
-; CHECK:; %1 is variable a of type  declared at x.c:8
-  call void @llvm.dbg.stoppoint(i32 9, i32 3, metadata !1)
-  %tmp = getelementptr inbounds %struct.foo* %b, i32 0, i32 0 ; <i32*> [#uses=1]
-; CHECK:; %tmp is variable b of type foo declared at x.c:7
-  store i32 5, i32* %tmp
-  call void @llvm.dbg.stoppoint(i32 10, i32 3, metadata !1)
-  %tmp1 = load i32* @main.c                       ; <i32> [#uses=1]
-; CHECK:; @main.c is variable c of type int declared at x.c:6
-  store i32 %tmp1, i32* %retval
-  br label %2
-
-; <label>:2                                       ; preds = %entry
-  call void @llvm.dbg.stoppoint(i32 11, i32 1, metadata !1)
-  call void @llvm.dbg.region.end(metadata !3)
-  %3 = load i32* %retval                          ; <i32> [#uses=1]
-  ret i32 %3
-}
-
-declare void @llvm.dbg.func.start(metadata) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.region.end(metadata) nounwind readnone
-
-!llvm.dbg.gv = !{!0}
-
-!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"c", metadata !"c", metadata !"", metadata !1, i32 6, metadata !2, i1 true, i1 true, i32* @main.c}
-!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"x.c", metadata !"/home/edwin/llvm-git/llvm/test/DebugInfo", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
-!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!3 = metadata !{i32 458798, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 5, metadata !2, i1 false, i1 true}
-!4 = metadata !{i32 459008, metadata !3, metadata !"b", metadata !1, i32 7, metadata !5}
-!5 = metadata !{i32 458771, metadata !1, metadata !"foo", metadata !1, i32 1, i64 32, i64 32, i64 0, i32 0, null, metadata !6, i32 0}
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 458765, metadata !1, metadata !"a", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !2}
-!8 = metadata !{i32 459008, metadata !3, metadata !"a", metadata !1, i32 8, metadata !9}
-!9 = metadata !{i32 458753, metadata !1, metadata !"", null, i32 0, i64 128, i64 32, i64 0, i32 0, metadata !2, metadata !10, i32 0}
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 458785, i64 0, i64 3}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
new file mode 100644
index 000000000000..58671d59f748
--- /dev/null
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -0,0 +1,71 @@
+; For http://llvm.org/bugs/show_bug.cgi?id=12942
+;   There are two CUs coming from /tmp/foo.c in this module. Make sure it doesn't
+;   blow llc up and produces something reasonable.
+;
+
+; RUN: llc %s -o %t -filetype=obj -O0
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+; XFAIL: hexagon
+; ModuleID = 'test.bc'
+
+@str = private unnamed_addr constant [4 x i8] c"FOO\00"
+@str1 = private unnamed_addr constant [6 x i8] c"Main!\00"
+
+define void @foo() nounwind {
+entry:
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i32 0, i32 0)), !dbg !23
+  ret void, !dbg !25
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !21), !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !22), !dbg !27
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @str1, i32 0, i32 0)), !dbg !28
+  tail call void @foo() nounwind, !dbg !30
+  ret i32 0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0, !9}
+
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!10 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15, metadata !15, metadata !16}
+!15 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{metadata !21, metadata !22}
+!21 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !6, i32 16777227, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !6, i32 33554443, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 6, i32 3, metadata !24, null}
+!24 = metadata !{i32 786443, metadata !5, i32 5, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 7, i32 1, metadata !24, null}
+!26 = metadata !{i32 11, i32 14, metadata !12, null}
+!27 = metadata !{i32 11, i32 26, metadata !12, null}
+!28 = metadata !{i32 12, i32 3, metadata !29, null}
+!29 = metadata !{i32 786443, metadata !12, i32 11, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 13, i32 3, metadata !29, null}
+!31 = metadata !{i32 14, i32 3, metadata !29, null}
+!32 = metadata !{metadata !"foo.c", metadata !"/tmp"}
+
+; This test is simple to be cross platform (many targets don't yet have
+; sufficiently good DWARF emission and/or dumping)
+; CHECK: {{DW_TAG_compile_unit}}
+; CHECK: {{foo\.c}}
+
diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
index 28cc54a86806..babd8f6a7803 100644
--- a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
index 9f895983fdb1..bbb81b88b16a 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
 	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
index 997b2a9037ee..7574267bdcdc 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
index ba35b5bcc436..261939ad2028 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
index f3c88adf8435..f76f99832825 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
index f925e79f2484..2b83bb9e43e8 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
index 5b426f6c330b..d1ca2bee3994 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @bar(i8* %X) {
         ; pointer should be 4 byte aligned!
diff --git a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
index c0a7393f8244..20ef0ff95cdd 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
@@ -1,6 +1,6 @@
 ; This testcase should return with an exit code of 1.
 ;
-; RUN: not %lli -mtriple=%mcjit_triple -use-mcjit %s
+; RUN: not %lli_mcjit %s
 
 @test = global i64 0		; <i64*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
index d3e6204a85be..c7bcc5450b09 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s test
+; RUN: %lli_mcjit %s test
 
 declare i32 @puts(i8*)
 
diff --git a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
index 55a169754104..0512575d1b4e 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
index 79c6e7fe4cae..c292a818a091 100644
--- a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
index ffd6df6e5e25..c0a83f5ecbdb 100644
--- a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
index 90839e96986f..55ce689b865b 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; This testcase failed to work because two variable sized allocas confused the
 ; local register allocator.
diff --git a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
index 29ef2c556cd0..2e999967a2c5 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ;
 ; Regression Test: EnvironmentTest.ll
diff --git a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
index 2adb608acbb1..659901b9b36f 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; This testcase exposes a bug in the local register allocator where it runs out
 ; of registers (due to too many overlapping live ranges), but then attempts to
diff --git a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index 91bde4690361..68e31a7074dd 100644
--- a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @A = global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
index a7462d9e698a..0bc010584f1e 100644
--- a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
@@ -1,5 +1,5 @@
 ; PR672
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s
+; RUN: %lli_mcjit %s
 ; XFAIL: mcjit-ia32
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
index 240659660252..43188f2be481 100644
--- a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter %s
+; RUN: %lli_mcjit -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index d429d519e04f..0912897c05fa 100644
--- a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter=true %s | grep 1
+; RUN: %lli_mcjit -force-interpreter=true %s | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
index a6d18e7919cc..7ed0e3870a3b 100644
--- a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter=true %s > /dev/null
+; RUN: %lli_mcjit -force-interpreter=true %s > /dev/null
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
index bb4957e9e66e..fb5ab6f24215 100644
--- a/test/ExecutionEngine/MCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -force-interpreter=true %s | grep 40091eb8
+; RUN: %lli_mcjit -force-interpreter=true %s | grep 40091eb8
 ;
 define i32 @test(double %x) {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/hello.ll b/test/ExecutionEngine/MCJIT/hello.ll
index ceb9c12ab4bd..b74470724deb 100644
--- a/test/ExecutionEngine/MCJIT/hello.ll
+++ b/test/ExecutionEngine/MCJIT/hello.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/hello2.ll b/test/ExecutionEngine/MCJIT/hello2.ll
index 756fcadb1caf..cd033d50947a 100644
--- a/test/ExecutionEngine/MCJIT/hello2.ll
+++ b/test/ExecutionEngine/MCJIT/hello2.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @X = global i32 7		; <i32*> [#uses=0]
 @msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/pr13727.ll b/test/ExecutionEngine/MCJIT/pr13727.ll
index c33bf3281087..1c719c5b7c27 100644
--- a/test/ExecutionEngine/MCJIT/pr13727.ll
+++ b/test/ExecutionEngine/MCJIT/pr13727.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -O0 -disable-lazy-compilation=false %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/simplesttest.ll b/test/ExecutionEngine/MCJIT/simplesttest.ll
index 02ad0061fd13..318baf4e8ffd 100644
--- a/test/ExecutionEngine/MCJIT/simplesttest.ll
+++ b/test/ExecutionEngine/MCJIT/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
new file mode 100644
index 000000000000..9ceaf545c5e6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
@@ -0,0 +1,12 @@
+; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
+; XFAIL: arm, mips
+
+define i32 @bar() {
+	ret i32 0
+}
+
+define i32 @main() {
+	%r = call i32 @bar( )		; <i32> [#uses=1]
+	ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/simpletest.ll b/test/ExecutionEngine/MCJIT/simpletest.ll
index 958b783067e4..5b0f2dd3055e 100644
--- a/test/ExecutionEngine/MCJIT/simpletest.ll
+++ b/test/ExecutionEngine/MCJIT/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @bar() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/stubs-remote.ll b/test/ExecutionEngine/MCJIT/stubs-remote.ll
new file mode 100644
index 000000000000..15cb5d037efc
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/stubs-remote.ll
@@ -0,0 +1,36 @@
+; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false %s
+; XFAIL: arm, mips
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/stubs.ll b/test/ExecutionEngine/MCJIT/stubs.ll
index 9e5d5b2e4186..f4aac3339450 100644
--- a/test/ExecutionEngine/MCJIT/stubs.ll
+++ b/test/ExecutionEngine/MCJIT/stubs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -disable-lazy-compilation=false %s
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-arith.ll b/test/ExecutionEngine/MCJIT/test-arith.ll
index b73227fe635e..e1cc23b9fcd3 100644
--- a/test/ExecutionEngine/MCJIT/test-arith.ll
+++ b/test/ExecutionEngine/MCJIT/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-branch.ll b/test/ExecutionEngine/MCJIT/test-branch.ll
index 8f3c7279051e..cdf10350ec11 100644
--- a/test/ExecutionEngine/MCJIT/test-branch.ll
+++ b/test/ExecutionEngine/MCJIT/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
index 20150b2de626..8a36cf2953f7 100644
--- a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
+++ b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @_Z14func_exit_codev() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-call.ll b/test/ExecutionEngine/MCJIT/test-call.ll
index 51d19fe99178..1a0f00841685 100644
--- a/test/ExecutionEngine/MCJIT/test-call.ll
+++ b/test/ExecutionEngine/MCJIT/test-call.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/MCJIT/test-cast.ll b/test/ExecutionEngine/MCJIT/test-cast.ll
index dcc97f466568..335ec508eff1 100644
--- a/test/ExecutionEngine/MCJIT/test-cast.ll
+++ b/test/ExecutionEngine/MCJIT/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
index d666a2aa4aa3..989a47342339 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+; RUN: %lli_mcjit -O0 %s
 
 ; This test checks that common symbols have been allocated addresses honouring
 ; the alignment requirement.
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
new file mode 100644
index 000000000000..3b8ee9dd1cce
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
@@ -0,0 +1,89 @@
+; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false %s
+; XFAIL: arm, mips
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+; 
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+; 
+;     if (zero_double < 1.0)
+;         zero_arr[zero_int + 2] = 70;
+; 
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols.ll b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
index 8c8190291f18..13ee06a65071 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli_mcjit -O0 -disable-lazy-compilation=false %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/test-constantexpr.ll b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
index 56c1290448ad..8f15cbd7f7ef 100644
--- a/test/ExecutionEngine/MCJIT/test-constantexpr.ll
+++ b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/MCJIT/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
new file mode 100644
index 000000000000..9daf1684de81
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
@@ -0,0 +1,16 @@
+; RUN:  %lli_mcjit -remote-mcjit -O0 %s
+; XFAIL: armv7, mips
+
+; Check that a variable is always aligned as specified.
+
+@var = global i32 0, align 32
+define i32 @main() {
+  %addr = ptrtoint i32* @var to i64
+  %mask = and i64 %addr, 31
+  %tst = icmp eq i64 %mask, 0
+  br i1 %tst, label %good, label %bad
+good:
+  ret i32 0
+bad:
+  ret i32 1
+}
diff --git a/test/ExecutionEngine/MCJIT/test-data-align.ll b/test/ExecutionEngine/MCJIT/test-data-align.ll
index 0493cba87fdb..2472d95e7778 100644
--- a/test/ExecutionEngine/MCJIT/test-data-align.ll
+++ b/test/ExecutionEngine/MCJIT/test-data-align.ll
@@ -1,4 +1,4 @@
-; RUN:  %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+; RUN:  %lli_mcjit -O0 %s
 
 ; Check that a variable is always aligned as specified.
 
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
new file mode 100644
index 000000000000..847d2253a0ed
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
@@ -0,0 +1,22 @@
+; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
+; XFAIL: arm, mips
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
index 7af1d8b53910..f094f3d91923 100644
--- a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-fp.ll b/test/ExecutionEngine/MCJIT/test-fp.ll
index f7e6fb9ba18e..b10e9d6c169d 100644
--- a/test/ExecutionEngine/MCJIT/test-fp.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
new file mode 100644
index 000000000000..4510d9b6a495
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
@@ -0,0 +1,21 @@
+; RUN: %lli_mcjit %s > /dev/null
+@var = global i32 1, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @ctor_func }]
+@llvm.global_dtors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @dtor_func }]
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @var, align 4
+  ret i32 %0
+}
+
+define internal void @ctor_func() section ".text.startup" {
+entry:
+  store i32 0, i32* @var, align 4
+  ret void
+}
+
+define internal void @dtor_func() section ".text.startup" {
+entry:
+  ret void
+}
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
new file mode 100644
index 000000000000..b8d94b50cfe2
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
@@ -0,0 +1,35 @@
+; RUN: %lli_mcjit -remote-mcjit %s > /dev/null
+; XFAIL: arm, mips
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
index ec6cbad2f14e..b9f74b8be403 100644
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @count = global i32 1, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/test-global.ll b/test/ExecutionEngine/MCJIT/test-global.ll
index e7972f978e95..6a8c042ef89e 100644
--- a/test/ExecutionEngine/MCJIT/test-global.ll
+++ b/test/ExecutionEngine/MCJIT/test-global.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 @count = global i32 0, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/test-loadstore.ll b/test/ExecutionEngine/MCJIT/test-loadstore.ll
index f450d0ab528b..90381947e8fb 100644
--- a/test/ExecutionEngine/MCJIT/test-loadstore.ll
+++ b/test/ExecutionEngine/MCJIT/test-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
 	%V = load i8* %P		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-local.ll b/test/ExecutionEngine/MCJIT/test-local.ll
index d4e9f444e426..d7c173427977 100644
--- a/test/ExecutionEngine/MCJIT/test-local.ll
+++ b/test/ExecutionEngine/MCJIT/test-local.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-logical.ll b/test/ExecutionEngine/MCJIT/test-logical.ll
index 32f45ef119e6..a03833e5c516 100644
--- a/test/ExecutionEngine/MCJIT/test-logical.ll
+++ b/test/ExecutionEngine/MCJIT/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/MCJIT/test-loop.ll b/test/ExecutionEngine/MCJIT/test-loop.ll
index ebc689664d65..5ed8c4020f76 100644
--- a/test/ExecutionEngine/MCJIT/test-loop.ll
+++ b/test/ExecutionEngine/MCJIT/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/MCJIT/test-phi.ll b/test/ExecutionEngine/MCJIT/test-phi.ll
index 1408533d7ae9..4245ccabed78 100644
--- a/test/ExecutionEngine/MCJIT/test-phi.ll
+++ b/test/ExecutionEngine/MCJIT/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
new file mode 100644
index 000000000000..f2c2cd6199f7
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
@@ -0,0 +1,17 @@
+; RUN: %lli_mcjit -remote-mcjit -O0 %s
+; XFAIL: arm, mips
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %0 = load i8** @ptr, align 4
+  %1 = load i8** @ptr2, align 4
+  %cmp = icmp eq i8* %0, %1
+  %. = zext i1 %cmp to i32
+  ret i32 %.
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
index 93b6a6deffd1..871d8bfa29e8 100644
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s
+; RUN: %lli_mcjit -O0 %s
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
 @ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/test-ret.ll b/test/ExecutionEngine/MCJIT/test-ret.ll
index af282926907f..6bfc48052d02 100644
--- a/test/ExecutionEngine/MCJIT/test-ret.ll
+++ b/test/ExecutionEngine/MCJIT/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/MCJIT/test-return.ll b/test/ExecutionEngine/MCJIT/test-return.ll
index 67f7107c3d7d..4db1c3fe39f0 100644
--- a/test/ExecutionEngine/MCJIT/test-return.ll
+++ b/test/ExecutionEngine/MCJIT/test-return.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
index a8f4bd8529f8..b4367d0337a0 100644
--- a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
+++ b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-int.ll b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
index ed52b5065c84..8c7d815446cb 100644
--- a/test/ExecutionEngine/MCJIT/test-setcond-int.ll
+++ b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/MCJIT/test-shift.ll b/test/ExecutionEngine/MCJIT/test-shift.ll
index 5a5c10d56050..8d9a94ef12c8 100644
--- a/test/ExecutionEngine/MCJIT/test-shift.ll
+++ b/test/ExecutionEngine/MCJIT/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+; RUN: %lli_mcjit %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index f0343263dba6..1f8ae69b9868 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -7,6 +7,8 @@ def getRoot(config):
 
 root = getRoot(config)
 
-if root.host_arch in ['PowerPC']:
+if root.host_arch in ['PowerPC', 'AArch64']:
     config.unsupported = True
 
+if 'hexagon' in root.target_triple:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/test-interp-vec-loadstore.ll b/test/ExecutionEngine/test-interp-vec-loadstore.ll
new file mode 100644
index 000000000000..e9f5b445a864
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -0,0 +1,84 @@
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; CHECK: 1
+; CHECK: 2
+; CHECK: 3
+; CHECK: 4
+; CHECK: 5.{{[0]+}}e+{{[0]+}}
+; CHECK: 6.{{[0]+}}e+{{[0]+}}
+; CHECK: 7.{{[0]+}}e+{{[0]+}}
+; CHECK: 8.{{[0]+}}e+{{[0]+}}
+; CHECK: 9.{{[0]+}}e+{{[0]+}}
+; CHECK: 1.{{[0]+}}e+{{[0]+}}1
+; CHECK: 1.1{{[0]+}}e+{{[0]+}}1
+; CHECK: 1.2{{[0]+}}e+{{[0]+}}1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+@format_i32 = internal global [4 x i8] c"%d\0A\00"
+@format_float = internal global [4 x i8] c"%e\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+  %a = alloca <4 x i32>, align 16
+  %b = alloca <4 x double>, align 16
+  %c = alloca <4 x float>, align 16
+  
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %a, align 16
+
+  %val0 = load <4 x i32> *%a, align 16
+
+  %res_i32_0 = extractelement <4 x i32> %val0, i32 0
+  %res_i32_1 = extractelement <4 x i32> %val0, i32 1
+  %res_i32_2 = extractelement <4 x i32> %val0, i32 2
+  %res_i32_3 = extractelement <4 x i32> %val0, i32 3
+  
+  %ptr0 = getelementptr [4 x i8]* @format_i32, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_0)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_1)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_2)
+  call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_3)
+
+  store <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, <4 x double>* %b, align 16
+
+  %val1 = load <4 x double> *%b, align 16
+
+  %res_double_0 = extractelement <4 x double> %val1, i32 0
+  %res_double_1 = extractelement <4 x double> %val1, i32 1
+  %res_double_2 = extractelement <4 x double> %val1, i32 2
+  %res_double_3 = extractelement <4 x double> %val1, i32 3
+  
+  %ptr1 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_0)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_1)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_2)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_3)
+
+
+  store <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, <4 x float>* %c, align 16
+  
+  %val2 = load <4 x float> *%c, align 16
+  
+  %ptr2 = getelementptr [4 x i8]* @format_float, i32 0, i32 0
+
+  ; by some reason printf doesn't print float correctly, so
+  ; floats are casted to doubles and are printed as doubles
+  
+  %res_serv_0 = extractelement <4 x float> %val2, i32 0
+  %res_float_0 = fpext float %res_serv_0 to double
+  %res_serv_1 = extractelement <4 x float> %val2, i32 1
+  %res_float_1 = fpext float %res_serv_1 to double
+  %res_serv_2 = extractelement <4 x float> %val2, i32 2
+  %res_float_2 = fpext float %res_serv_2 to double
+  %res_serv_3 = extractelement <4 x float> %val2, i32 3
+  %res_float_3 = fpext float %res_serv_3 to double
+
+ 
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_0)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_1)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_2)
+  call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_3)
+ 
+  
+  ret i32 0
+}
diff --git a/test/Feature/attributes.ll b/test/Feature/attributes.ll
new file mode 100644
index 000000000000..7707d82be5ce
--- /dev/null
+++ b/test/Feature/attributes.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@.str = private unnamed_addr constant [14 x i8] c"hello world!\0A\00", align 1
+
+define void @foo() #0 {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind ssp uwtable }
diff --git a/test/Feature/const_pv.ll b/test/Feature/const_pv.ll
index 6fd6abdccf08..272bf43a0687 100644
--- a/test/Feature/const_pv.ll
+++ b/test/Feature/const_pv.ll
@@ -4,5 +4,5 @@
 @G1 = global i8 zeroinitializer
 @g = constant <2 x i8*> getelementptr (<2 x i8*> <i8* @G1, i8* @G1>, <2 x i32> <i32 0, i32 0>)
 
-@t = constant <2 x i1> icmp ((<2 x i32> ptrtoint (<2 x i8*> zeroinitializer to <2 x i32>), <2 x i32> zeroinitializer )
+@t = constant <2 x i1> icmp eq (<2 x i32> ptrtoint (<2 x i8*> zeroinitializer to <2 x i32>), <2 x i32> zeroinitializer )
 
diff --git a/test/Feature/global_pv.ll b/test/Feature/global_pv.ll
index d257ec077ab9..34b9a7df8824 100644
--- a/test/Feature/global_pv.ll
+++ b/test/Feature/global_pv.ll
@@ -1,5 +1,5 @@
-; RUN: opt -instcombine -S -o - %s | llvm-as
-; RUN: opt -instcombine -globalopt -S -o - %s | llvm-as
+; RUN: opt -instcombine -S < %s | llvm-as
+; RUN: opt -instcombine -globalopt -S < %s | llvm-as
 @G1 = global i32 zeroinitializer
 @G2 = global i32 zeroinitializer
 @g = global <2 x i32*> zeroinitializer
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index 9e7dc6d4102e..28be053714d1 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -61,10 +61,14 @@ define void @libm() {
 ; FIXME: test ALL the intrinsics in this file.
 
 ; rdar://11542750
-; CHECK: declare void @llvm.trap() noreturn nounwind
+; CHECK: declare void @llvm.trap() #2
 declare void @llvm.trap()
 
 define void @trap() {
   call void @llvm.trap()
   ret void
 }
+
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readonly }
+; CHECK: attributes #2 = { noreturn nounwind }
diff --git a/test/Feature/minsize_attr.ll b/test/Feature/minsize_attr.ll
index 51b133c4bdb7..1f915b35a365 100644
--- a/test/Feature/minsize_attr.ll
+++ b/test/Feature/minsize_attr.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
 define void @test1() minsize {
-; CHECK: define void @test1() minsize
+; CHECK: define void @test1() #0
         ret void
 }
 
+; CHECK: attributes #0 = { minsize }
diff --git a/test/Feature/properties.ll b/test/Feature/properties.ll
index c688d689be28..2111fa2f8d61 100644
--- a/test/Feature/properties.ll
+++ b/test/Feature/properties.ll
@@ -4,4 +4,3 @@
 
 target datalayout = "e-p:32:32"
 target triple = "proc-vend-sys"
-deplibs = [ "m", "c" ]
diff --git a/test/FileCheck/dos-style-eol.txt b/test/FileCheck/dos-style-eol.txt
new file mode 100644
index 000000000000..4252aad4d3e7
--- /dev/null
+++ b/test/FileCheck/dos-style-eol.txt
@@ -0,0 +1,11 @@
+// Test for using FileCheck on DOS style end-of-line
+// This test was deliberately committed with DOS style end of line.
+// Don't change line endings!
+// RUN: FileCheck -input-file %s %s
+// RUN: FileCheck  --strict-whitespace -input-file %s %s
+
+LINE 1
+; CHECK: {{^}}LINE 1{{$}}
+
+LINE 2
+; CHECK: {{^}}LINE 2{{$}}
+\ No newline at end of file
diff --git a/test/FileCheck/lit.local.cfg b/test/FileCheck/lit.local.cfg
new file mode 100644
index 000000000000..ee25f56231c5
--- /dev/null
+++ b/test/FileCheck/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.txt']
diff --git a/test/FileCheck/next-no-match.txt b/test/FileCheck/next-no-match.txt
new file mode 100644
index 000000000000..908615b248c5
--- /dev/null
+++ b/test/FileCheck/next-no-match.txt
@@ -0,0 +1,9 @@
+// RUN: not FileCheck -input-file %s %s
+
+foo
+bar
+; CHECK: foo
+baz
+; CHECK-NEXT: baz
+
+
diff --git a/test/FileCheck/regex-brackets.txt b/test/FileCheck/regex-brackets.txt
new file mode 100644
index 000000000000..fd8568d3a504
--- /dev/null
+++ b/test/FileCheck/regex-brackets.txt
@@ -0,0 +1,7 @@
+// RUN: FileCheck -input-file %s %s
+
+op r1
+op r2, [x r1]
+; CHECK:        op [[REG:r[0-9]]]
+; CHECK:        op [[REG2:r[0-9]]], [x [[REG]]]
+
diff --git a/test/FileCheck/regex-no-match.txt b/test/FileCheck/regex-no-match.txt
new file mode 100644
index 000000000000..f80ac12911e4
--- /dev/null
+++ b/test/FileCheck/regex-no-match.txt
@@ -0,0 +1,5 @@
+// RUN: not FileCheck -input-file %s %s
+
+foobar
+; CHECK: fooba{{[a-b]}}
+
diff --git a/test/FileCheck/simple-var-capture.txt b/test/FileCheck/simple-var-capture.txt
new file mode 100644
index 000000000000..a487baaa531c
--- /dev/null
+++ b/test/FileCheck/simple-var-capture.txt
@@ -0,0 +1,13 @@
+// RUN: FileCheck -input-file %s %s
+
+op1 r1
+op2 r1, r2
+; CHECK:        op1 [[REG:r[0-9]]]
+; CHECK-NEXT:   op2 [[REG]]
+
+op3 r16, r18, r21
+op4 r30, r18, r21
+; CHECK:        op3 {{r[0-9]+}}, [[REGa:r[0-9]+]], [[REGb:r[0-9]+]]
+; CHECK-NEXT:   op4 {{r[0-9]+}}, [[REGa]], [[REGb]]
+
+
diff --git a/test/FileCheck/two-checks-for-same-match.txt b/test/FileCheck/two-checks-for-same-match.txt
new file mode 100644
index 000000000000..2195aa96c540
--- /dev/null
+++ b/test/FileCheck/two-checks-for-same-match.txt
@@ -0,0 +1,8 @@
+// Check that two distinct CHECK lines won't match the same string
+// RUN: not FileCheck -input-file %s %s
+
+; CHECK: {{a[0-9]b}}
+; CHECK: {{a[0-9]b}}
+
+a2b
+
diff --git a/test/FileCheck/var-ref-same-line.txt b/test/FileCheck/var-ref-same-line.txt
new file mode 100644
index 000000000000..1755cefbf864
--- /dev/null
+++ b/test/FileCheck/var-ref-same-line.txt
@@ -0,0 +1,16 @@
+// Test for referencing a variable defined on the same line
+// RUN: FileCheck -input-file %s %s
+
+op1 r1, r2, r1
+
+; CHECK: op1 [[REG:r[0-9]+]], {{r[0-9]+}}, [[REG]]
+
+op3 r1, r2, r1, r2
+
+; CHECK: op3 [[REG1:r[0-9]+]], [[REG2:r[0-9]+]], [[REG1]], [[REG2]]
+
+op4 g1, g2, g1
+
+; Test that parens inside the regex don't confuse FileCheck
+; CHECK: {{([a-z]+[0-9])+}} [[REG:g[0-9]+]], {{g[0-9]+}}, [[REG]]
+
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 35c5c4a0bba4..38168fc2d68d 100644
--- a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -36,14 +36,14 @@ target triple = "i386-unknown-linux-gnu"
 @ff_mlp_firorder_7 = external global i8
 @ff_mlp_firorder_8 = external global i8
 
-define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind address_safety {
+define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind sanitize_address {
 entry:
   %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131
   store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0
   ret void
 }
 
-define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind address_safety {
+define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind sanitize_address {
 entry:
   %filter_shift.addr = alloca i32, align 4
   %mask.addr = alloca i32, align 4
diff --git a/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll b/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
new file mode 100644
index 000000000000..6a60d1c29f56
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Here we check that the global redzone sizes grow with the object size.
+
+@G10 = global [10 x i8] zeroinitializer, align 1
+; CHECK: @G10 = global { [10 x i8], [54 x i8] }
+
+@G31 = global [31 x i8] zeroinitializer, align 1
+@G32 = global [32 x i8] zeroinitializer, align 1
+@G33 = global [33 x i8] zeroinitializer, align 1
+; CHECK: @G31 = global { [31 x i8], [33 x i8] }
+; CHECK: @G32 = global { [32 x i8], [32 x i8] }
+; CHECK: @G33 = global { [33 x i8], [63 x i8] }
+
+@G63 = global [63 x i8] zeroinitializer, align 1
+@G64 = global [64 x i8] zeroinitializer, align 1
+@G65 = global [65 x i8] zeroinitializer, align 1
+; CHECK: @G63 = global { [63 x i8], [33 x i8] }
+; CHECK: @G64 = global { [64 x i8], [32 x i8] }
+; CHECK: @G65 = global { [65 x i8], [63 x i8] }
+
+@G127 = global [127 x i8] zeroinitializer, align 1
+@G128 = global [128 x i8] zeroinitializer, align 1
+@G129 = global [129 x i8] zeroinitializer, align 1
+; CHECK: @G127 = global { [127 x i8], [33 x i8] }
+; CHECK: @G128 = global { [128 x i8], [32 x i8] }
+; CHECK: @G129 = global { [129 x i8], [63 x i8] }
+
+@G255 = global [255 x i8] zeroinitializer, align 1
+@G256 = global [256 x i8] zeroinitializer, align 1
+@G257 = global [257 x i8] zeroinitializer, align 1
+; CHECK: @G255 = global { [255 x i8], [33 x i8] }
+; CHECK: @G256 = global { [256 x i8], [64 x i8] }
+; CHECK: @G257 = global { [257 x i8], [95 x i8] }
+
+@G511 = global [511 x i8] zeroinitializer, align 1
+@G512 = global [512 x i8] zeroinitializer, align 1
+@G513 = global [513 x i8] zeroinitializer, align 1
+; CHECK: @G511 = global { [511 x i8], [97 x i8] }
+; CHECK: @G512 = global { [512 x i8], [128 x i8] }
+; CHECK: @G513 = global { [513 x i8], [159 x i8] }
+
+@G1023 = global [1023 x i8] zeroinitializer, align 1
+@G1024 = global [1024 x i8] zeroinitializer, align 1
+@G1025 = global [1025 x i8] zeroinitializer, align 1
+; CHECK: @G1023 = global { [1023 x i8], [225 x i8] }
+; CHECK: @G1024 = global { [1024 x i8], [256 x i8] }
+; CHECK: @G1025 = global { [1025 x i8], [287 x i8] }
+
+@G1000000 = global [1000000 x i8] zeroinitializer, align 1
+@G10000000 = global [10000000 x i8] zeroinitializer, align 1
+@G100000000 = global [100000000 x i8] zeroinitializer, align 1
+; CHECK: @G1000000 = global { [1000000 x i8], [249984 x i8] }
+; CHECK: @G10000000 = global { [10000000 x i8], [262144 x i8] }
+; CHECK: @G100000000 = global { [100000000 x i8], [262144 x i8] }
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
index c0fe15e9fcec..da8f54137598 100644
--- a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -11,9 +11,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
 
-; Accessing bytes 4 and 6, not ok to widen to i32 if address_safety is set.
+; Accessing bytes 4 and 6, not ok to widen to i32 if sanitize_address is set.
 
-define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone address_safety {
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone sanitize_address {
 entry:
   %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
@@ -36,7 +36,7 @@ define void @end_test_widening_bad() {
 
 ;; Accessing bytes 4 and 5. Ok to widen to i16.
 
-define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone address_safety {
+define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone sanitize_address {
 entry:
   %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 655f69c16fdf..fb32e704af86 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -5,12 +5,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @test_load(i32* %a) address_safety {
+define i32 @test_load(i32* %a) sanitize_address {
 ; CHECK: @test_load
 ; CHECK-NOT: load
 ; CHECK:   %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
 ; CHECK:   lshr i64 %[[LOAD_ADDR]], 3
-; CHECK:   or i64
+; CHECK:   {{or|add}}
 ; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
 ; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
@@ -38,12 +38,12 @@ entry:
   ret i32 %tmp1
 }
 
-define void @test_store(i32* %a) address_safety {
+define void @test_store(i32* %a) sanitize_address {
 ; CHECK: @test_store
 ; CHECK-NOT: store
 ; CHECK:   %[[STORE_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
 ; CHECK:   lshr i64 %[[STORE_ADDR]], 3
-; CHECK:   or i64
+; CHECK:   {{or|add}}
 ; CHECK:   %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
 ; CHECK:   %[[STORE_SHADOW:[^ ]*]] = load i8* %[[STORE_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
@@ -73,7 +73,7 @@ entry:
 ; Check that asan leaves just one alloca.
 
 declare void @alloca_test_use([10 x i8]*)
-define void @alloca_test() address_safety {
+define void @alloca_test() sanitize_address {
 entry:
   %x = alloca [10 x i8], align 1
   %y = alloca [10 x i8], align 1
@@ -89,3 +89,54 @@ entry:
 ; CHECK-NOT: = alloca
 ; CHECK: ret void
 
+define void @LongDoubleTest(x86_fp80* nocapture %a) nounwind uwtable sanitize_address {
+entry:
+    store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a, align 16
+    ret void
+}
+
+; CHECK: LongDoubleTest
+; CHECK: __asan_report_store_n
+; CHECK: __asan_report_store_n
+; CHECK: ret void
+
+
+define void @i40test(i40* %a, i40* %b) nounwind uwtable sanitize_address {
+  entry:
+  %t = load i40* %a
+  store i40 %t, i40* %b, align 8
+  ret void
+}
+
+; CHECK: i40test
+; CHECK: __asan_report_load_n{{.*}}, i64 5)
+; CHECK: __asan_report_load_n{{.*}}, i64 5)
+; CHECK: __asan_report_store_n{{.*}}, i64 5)
+; CHECK: __asan_report_store_n{{.*}}, i64 5)
+; CHECK: ret void
+
+define void @i80test(i80* %a, i80* %b) nounwind uwtable sanitize_address {
+  entry:
+  %t = load i80* %a
+  store i80 %t, i80* %b, align 8
+  ret void
+}
+
+; CHECK: i80test
+; CHECK: __asan_report_load_n{{.*}}, i64 10)
+; CHECK: __asan_report_load_n{{.*}}, i64 10)
+; CHECK: __asan_report_store_n{{.*}}, i64 10)
+; CHECK: __asan_report_store_n{{.*}}, i64 10)
+; CHECK: ret void
+
+; asan should not instrument functions with available_externally linkage.
+define available_externally i32 @f_available_externally(i32* %a) sanitize_address  {
+entry:
+  %tmp1 = load i32* %a
+  ret i32 %tmp1
+}
+; CHECK: @f_available_externally
+; CHECK-NOT: __asan_report
+; CHECK: ret i32
+
+
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
new file mode 100644
index 000000000000..ec51caeb5868
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+; Checks that llvm.dbg.declare instructions are updated 
+; accordingly as we merge allocas.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address {
+entry:
+  %p.addr = alloca i32, align 4
+  %r = alloca i32, align 4
+  store i32 %p, i32* %p.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %p.addr}, metadata !10), !dbg !11
+  call void @llvm.dbg.declare(metadata !{i32* %r}, metadata !12), !dbg !14
+  %0 = load i32* %p.addr, align 4, !dbg !14
+  %add = add nsw i32 %0, 1, !dbg !14
+  store i32 %add, i32* %r, align 4, !dbg !14
+  %1 = load i32* %r, align 4, !dbg !15
+  ret i32 %1, !dbg !15
+}
+
+;   CHECK: define i32 @_Z3zzzi
+;   CHECK: entry:
+; Verify that llvm.dbg.declare calls are in the entry basic block.
+;   CHECK-NOT: %entry
+;   CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ARG_ID:[0-9]+]])
+;   CHECK-NOT: %entry
+;   CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[VAR_ID:[0-9]+]])
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo", metadata !"clang version 3.3 (trunk 169314)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
+!6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786689, metadata !5, metadata !"p", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!11 = metadata !{i32 1, i32 0, metadata !5, null}
+!12 = metadata !{i32 786688, metadata !13, metadata !"r", metadata !6, i32 2, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [r] [line 2]
+
+; Verify that debug descriptors for argument and local variable will be replaced
+; with descriptors that end with OpDeref (encoded as 2).
+;   CHECK: ![[ARG_ID]] = metadata {{.*}} i64 2} ; [ DW_TAG_arg_variable ] [p] [line 1]
+;   CHECK: ![[VAR_ID]] = metadata {{.*}} i64 2} ; [ DW_TAG_auto_variable ] [r] [line 2]
+; Verify that there are no more variable descriptors.
+;   CHECK-NOT: DW_TAG_arg_variable
+;   CHECK-NOT: DW_TAG_auto_variable
+
+
+!13 = metadata !{i32 786443, metadata !5, i32 1, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc]
+!14 = metadata !{i32 2, i32 0, metadata !13, null}
+!15 = metadata !{i32 3, i32 0, metadata !13, null}
+!16 = metadata !{metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo"}
diff --git a/test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll b/test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll
new file mode 100644
index 000000000000..b0371769be05
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/different_scale_and_offset.ll
@@ -0,0 +1,41 @@
+; Test non-default shadow mapping scale and offset.
+;
+; RUN: opt < %s -asan -asan-mapping-scale=2 -asan-mapping-offset-log=0 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Test that ASan tells scale and offset to runtime.
+; CHECK: @__asan_mapping_offset = linkonce_odr constant i64 0
+; CHECK: @__asan_mapping_scale = linkonce_odr constant i64 2
+
+define i32 @test_load(i32* %a) sanitize_address {
+; CHECK: @test_load
+; CHECK-NOT: load
+; CHECK:   %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64
+; CHECK:   lshr i64 %[[LOAD_ADDR]], 2
+
+; No need in shift for zero offset.
+; CHECK-NOT:  or i64
+
+; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
+; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
+; CHECK:   icmp ne i8
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+
+; No need in slow path for i32 and mapping scale equal to 2.
+; CHECK-NOT:   and i64 %[[LOAD_ADDR]]
+;
+; The crash block reports the error.
+; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]])
+; CHECK:   unreachable
+;
+; The actual load.
+; CHECK:   %tmp1 = load i32* %a
+; CHECK:   ret i32 %tmp1
+
+entry:
+  %tmp1 = load i32* %a
+  ret i32 %tmp1
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
index 28d4ac0c0f58..0928c494154e 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @_Z3barv() uwtable address_safety {
+define void @_Z3barv() uwtable sanitize_address {
 entry:
   %a = alloca i32, align 4
   call void @_Z3fooPi(i32* %a)
@@ -14,6 +14,7 @@ entry:
 
 declare void @_Z3fooPi(i32*)
 ; We create one global string constant for the stack frame above.
+; It should have unnamed_addr and align 1.
 ; Make sure we don't create any other global constants.
-; CHECK: = private constant
-; CHECK-NOT: = private constant
+; CHECK: = private unnamed_addr constant{{.*}}align 1
+; CHECK-NOT: = private unnamed_addr constant
diff --git a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
index 80f1b1c74cd1..2d835a34080a 100644
--- a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -1,17 +1,49 @@
 ; RUN: opt < %s -asan -S | FileCheck %s
 ; AddressSanitizer must insert __asan_handle_no_return
-; before every noreturn call.
+; before every noreturn call or invoke.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
 declare void @MyNoReturnFunc(i32) noreturn
 
-define i32 @_Z5ChildPv(i8* nocapture %arg) uwtable address_safety {
+define i32 @Call1(i8* nocapture %arg) uwtable sanitize_address {
 entry:
-  call void @MyNoReturnFunc(i32 1) noreturn
+  call void @MyNoReturnFunc(i32 1) noreturn  ; The call insn has noreturn attr.
+; CHECK:        @Call1
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   call void @MyNoReturnFunc
+; CHECK-NEXT: unreachable
   unreachable
 }
 
+define i32 @Call2(i8* nocapture %arg) uwtable sanitize_address {
+entry:
+  call void @MyNoReturnFunc(i32 1)  ; No noreturn attribure on the call.
+; CHECK:        @Call2
 ; CHECK:        call void @__asan_handle_no_return
 ; CHECK-NEXT:   call void @MyNoReturnFunc
+; CHECK-NEXT: unreachable
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+define i64 @Invoke1(i8** %esc) nounwind uwtable ssp sanitize_address {
+entry:
+  invoke void @MyNoReturnFunc(i32 1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i64 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret i64 1
+}
+; CHECK: @Invoke1
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   invoke void @MyNoReturnFunc
+; CHECK: ret i64 0
+; CHECK: ret i64 1
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 3d92946087ec..2c183f523feb 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -asan -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 @xxx = global i32 0, align 4
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index 472551654e53..1d00cfacafe4 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -1,11 +1,15 @@
-; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-@xxx = global i32 0, align 4
+@xxx = internal global i32 0, align 4  ; With dynamic initializer.
+@XXX = global i32 0, align 4           ; With dynamic initializer.
+@yyy = internal global i32 0, align 4  ; W/o dynamic initializer.
+@YYY = global i32 0, align 4           ; W/o dynamic initializer.
 ; Clang will emit the following metadata identifying @xxx as dynamically
 ; initialized.
 !0 = metadata !{i32* @xxx}
-!llvm.asan.dynamically_initialized_globals = !{!0}
+!1 = metadata !{i32* @XXX}
+!llvm.asan.dynamically_initialized_globals = !{!0, !1}
 
 define i32 @initializer() uwtable {
 entry:
@@ -19,7 +23,7 @@ entry:
   ret void
 }
 
-define internal void @_GLOBAL__I_a() address_safety section ".text.startup" {
+define internal void @_GLOBAL__I_a() sanitize_address section ".text.startup" {
 entry:
   call void @__cxx_global_var_init()
   ret void
@@ -34,3 +38,40 @@ entry:
 ; CHECK: call void @__cxx_global_var_init
 ; CHECK: call void @__asan_after_dynamic_init
 ; CHECK: ret
+
+; Check that xxx is instrumented.
+define void @touch_xxx() sanitize_address {
+  store i32 0, i32 *@xxx, align 4
+  ret void
+; CHECK: define void @touch_xxx
+; CHECK: call void @__asan_report_store4
+; CHECK: ret void
+}
+
+; Check that XXX is instrumented.
+define void @touch_XXX() sanitize_address {
+  store i32 0, i32 *@XXX, align 4
+  ret void
+; CHECK: define void @touch_XXX
+; CHECK: call void @__asan_report_store4
+; CHECK: ret void
+}
+
+
+; Check that yyy is NOT instrumented (as it does not have dynamic initializer).
+define void @touch_yyy() sanitize_address {
+  store i32 0, i32 *@yyy, align 4
+  ret void
+; CHECK: define void @touch_yyy
+; CHECK-NOT: call void @__asan_report_store4
+; CHECK: ret void
+}
+
+; Check that YYY is NOT instrumented (as it does not have dynamic initializer).
+define void @touch_YYY() sanitize_address {
+  store i32 0, i32 *@YYY, align 4
+  ret void
+; CHECK: define void @touch_YYY
+; CHECK-NOT: call void @__asan_report_store4
+; CHECK: ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
index 633bf9ae78c0..23cf6d28ec6c 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -4,7 +4,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-define void @IncrementMe(i32* %a) address_safety {
+define void @IncrementMe(i32* %a) sanitize_address {
 entry:
   %tmp1 = load i32* %a, align 4
   %tmp2 = add i32 %tmp1,  1
diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll
new file mode 100644
index 000000000000..334872865f1a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -0,0 +1,84 @@
+; Test hanlding of llvm.lifetime intrinsics.
+; RUN: opt < %s -asan -asan-check-lifetime -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+define void @lifetime_no_size() sanitize_address {
+entry:
+  %i = alloca i32, align 4
+  %i.ptr = bitcast i32* %i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %i.ptr)
+  call void @llvm.lifetime.end(i64 -1, i8* %i.ptr)
+
+; Check that lifetime with no size are ignored.
+; CHECK: @lifetime_no_size
+; CHECK-NOT: @__asan_poison_stack_memory
+; CHECK-NOT: @__asan_unpoison_stack_memory
+; CHECK: ret void
+  ret void
+}
+
+; Generic case of lifetime analysis.
+define void @lifetime() sanitize_address {
+  ; CHECK: @lifetime
+
+  ; Regular variable lifetime intrinsics.
+  %i = alloca i32, align 4
+  %i.ptr = bitcast i32* %i to i8*
+  call void @llvm.lifetime.start(i64 3, i8* %i.ptr)
+  ; Memory is unpoisoned at llvm.lifetime.start
+  ; CHECK: %[[VAR:[^ ]*]] = ptrtoint i32* %{{[^ ]+}} to i64
+  ; CHECK-NEXT: call void @__asan_unpoison_stack_memory(i64 %[[VAR]], i64 3)
+  call void @llvm.lifetime.end(i64 4, i8* %i.ptr)
+  call void @llvm.lifetime.end(i64 2, i8* %i.ptr)
+  ; Memory is poisoned at every call to llvm.lifetime.end
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 4)
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 2)
+
+  ; Lifetime intrinsics for array.
+  %arr = alloca [10 x i32], align 16
+  %arr.ptr = bitcast [10 x i32]* %arr to i8*
+  call void @llvm.lifetime.start(i64 40, i8* %arr.ptr)
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 40)
+  call void @llvm.lifetime.end(i64 40, i8* %arr.ptr)
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 40)
+
+  ; One more lifetime start/end for the same variable %i.
+  call void @llvm.lifetime.start(i64 4, i8* %i.ptr)
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 4)
+  call void @llvm.lifetime.end(i64 4, i8* %i.ptr)
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 4)
+
+  ; Memory is unpoisoned at function exit (only once).
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 {{.*}})
+  ; CHECK-NOT: @__asan_unpoison_stack_memory
+  ; CHECK: ret void
+  ret void
+}
+
+; Check that arguments of lifetime may come from phi nodes.
+define void @phi_args(i1 %x) sanitize_address {
+  ; CHECK: @phi_args
+
+entry:
+  %i = alloca i64, align 4
+  %i.ptr = bitcast i64* %i to i8*
+  call void @llvm.lifetime.start(i64 8, i8* %i.ptr)
+  ; CHECK: __asan_unpoison_stack_memory
+  br i1 %x, label %bb0, label %bb1
+
+bb0:
+  %i.ptr2 = bitcast i64* %i to i8*
+  br label %bb1
+
+bb1:
+  %i.phi = phi i8* [ %i.ptr, %entry ], [ %i.ptr2, %bb0 ]
+  call void @llvm.lifetime.end(i64 8, i8* %i.phi)
+  ; CHECK: __asan_poison_stack_memory
+  ; CHECK: ret void
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
index d544d77b93db..6aa5c2885099 100644
--- a/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -asan -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-define i32 @read_4_bytes(i32* %a) address_safety {
+define i32 @read_4_bytes(i32* %a) sanitize_address {
 entry:
   %tmp1 = load i32* %a, align 4
   ret i32 %tmp1
@@ -9,11 +9,11 @@ entry:
 ; CHECK: @read_4_bytes
 ; CHECK-NOT: ret
 ; CHECK: lshr {{.*}} 3
-; Check for ASAN's Offset for 64-bit (2^44)
-; CHECK-NEXT: 17592186044416
+; Check for ASAN's Offset for 64-bit (2^44 or 7fff8000)
+; CHECK-NEXT: {{17592186044416|2147450880}}
 ; CHECK: ret
 
-define void @example_atomicrmw(i64* %ptr) nounwind uwtable address_safety {
+define void @example_atomicrmw(i64* %ptr) nounwind uwtable sanitize_address {
 entry:
   %0 = atomicrmw add i64* %ptr, i64 1 seq_cst
   ret void
@@ -24,7 +24,7 @@ entry:
 ; CHECK: atomicrmw
 ; CHECK: ret
 
-define void @example_cmpxchg(i64* %ptr, i64 %compare_to, i64 %new_value) nounwind uwtable address_safety {
+define void @example_cmpxchg(i64* %ptr, i64 %compare_to, i64 %new_value) nounwind uwtable sanitize_address {
 entry:
   %0 = cmpxchg i64* %ptr, i64 %compare_to, i64 %new_value seq_cst
   ret void
diff --git a/test/Instrumentation/MemorySanitizer/lit.local.cfg b/test/Instrumentation/MemorySanitizer/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
new file mode 100644
index 000000000000..1e7a31793dea
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -0,0 +1,625 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Check the presence of __msan_init
+; CHECK: @llvm.global_ctors {{.*}} @__msan_init
+
+; Check the presence and the linkage type of __msan_track_origins
+; CHECK: @__msan_track_origins = weak_odr constant i32 0
+
+
+; Check instrumentation of stores
+
+define void @Store(i32* nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+entry:
+  store i32 %x, i32* %p, align 4
+  ret void
+}
+
+; CHECK: @Store
+; CHECK: load {{.*}} @__msan_param_tls
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+; CHECK-ORIGINS: @Store
+; CHECK-ORIGINS: load {{.*}} @__msan_param_tls
+; CHECK-ORIGINS: store
+; CHECK-ORIGINS: icmp
+; CHECK-ORIGINS: br i1
+; CHECK-ORIGINS: <label>
+; CHECK-ORIGINS: store
+; CHECK-ORIGINS: br label
+; CHECK-ORIGINS: <label>
+; CHECK-ORIGINS: store
+; CHECK-ORIGINS: ret void
+
+
+; Check instrumentation of aligned stores
+; Shadow store has the same alignment as the original store; origin store
+; does not specify explicit alignment.
+
+define void @AlignedStore(i32* nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+entry:
+  store i32 %x, i32* %p, align 32
+  ret void
+}
+
+; CHECK: @AlignedStore
+; CHECK: load {{.*}} @__msan_param_tls
+; CHECK: store {{.*}} align 32
+; CHECK: store {{.*}} align 32
+; CHECK: ret void
+; CHECK-ORIGINS: @AlignedStore
+; CHECK-ORIGINS: load {{.*}} @__msan_param_tls
+; CHECK-ORIGINS: store {{.*}} align 32
+; CHECK-ORIGINS: icmp
+; CHECK-ORIGINS: br i1
+; CHECK-ORIGINS: <label>
+; CHECK-ORIGINS: store {{.*}} align 32
+; CHECK-ORIGINS: br label
+; CHECK-ORIGINS: <label>
+; CHECK-ORIGINS: store {{.*}} align 32
+; CHECK-ORIGINS: ret void
+
+
+; load followed by cmp: check that we load the shadow and call __msan_warning.
+define void @LoadAndCmp(i32* nocapture %a) nounwind uwtable sanitize_memory {
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @foo() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+; CHECK: @LoadAndCmp
+; CHECK: = load
+; CHECK: = load
+; CHECK: call void @__msan_warning_noreturn()
+; CHECK-NEXT: call void asm sideeffect
+; CHECK-NEXT: unreachable
+; CHECK: ret void
+
+; Check that we store the shadow for the retval.
+define i32 @ReturnInt() nounwind uwtable readnone sanitize_memory {
+entry:
+  ret i32 123
+}
+
+; CHECK: @ReturnInt
+; CHECK: store i32 0,{{.*}}__msan_retval_tls
+; CHECK: ret i32
+
+; Check that we get the shadow for the retval.
+define void @CopyRetVal(i32* nocapture %a) nounwind uwtable sanitize_memory {
+entry:
+  %call = tail call i32 @ReturnInt() nounwind
+  store i32 %call, i32* %a, align 4
+  ret void
+}
+
+; CHECK: @CopyRetVal
+; CHECK: load{{.*}}__msan_retval_tls
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+
+; Check that we generate PHIs for shadow.
+define void @FuncWithPhi(i32* nocapture %a, i32* %b, i32* nocapture %c) nounwind uwtable sanitize_memory {
+entry:
+  %tobool = icmp eq i32* %b, null
+  br i1 %tobool, label %if.else, label %if.then
+
+  if.then:                                          ; preds = %entry
+  %0 = load i32* %b, align 4
+  br label %if.end
+
+  if.else:                                          ; preds = %entry
+  %1 = load i32* %c, align 4
+  br label %if.end
+
+  if.end:                                           ; preds = %if.else, %if.then
+  %t.0 = phi i32 [ %0, %if.then ], [ %1, %if.else ]
+  store i32 %t.0, i32* %a, align 4
+  ret void
+}
+
+; CHECK: @FuncWithPhi
+; CHECK: = phi
+; CHECK-NEXT: = phi
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+; Compute shadow for "x << 10"
+define void @ShlConst(i32* nocapture %x) nounwind uwtable sanitize_memory {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = shl i32 %0, 10
+  store i32 %1, i32* %x, align 4
+  ret void
+}
+
+; CHECK: @ShlConst
+; CHECK: = load
+; CHECK: = load
+; CHECK: shl
+; CHECK: shl
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+; Compute shadow for "10 << x": it should have 'sext i1'.
+define void @ShlNonConst(i32* nocapture %x) nounwind uwtable sanitize_memory {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = shl i32 10, %0
+  store i32 %1, i32* %x, align 4
+  ret void
+}
+
+; CHECK: @ShlNonConst
+; CHECK: = load
+; CHECK: = load
+; CHECK: = sext i1
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+; SExt
+define void @SExt(i32* nocapture %a, i16* nocapture %b) nounwind uwtable sanitize_memory {
+entry:
+  %0 = load i16* %b, align 2
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32* %a, align 4
+  ret void
+}
+
+; CHECK: @SExt
+; CHECK: = load
+; CHECK: = load
+; CHECK: = sext
+; CHECK: = sext
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+
+; memset
+define void @MemSet(i8* nocapture %x) nounwind uwtable sanitize_memory {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %x, i8 42, i64 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; CHECK: @MemSet
+; CHECK: call i8* @__msan_memset
+; CHECK: ret void
+
+
+; memcpy
+define void @MemCpy(i8* nocapture %x, i8* nocapture %y) nounwind uwtable sanitize_memory {
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; CHECK: @MemCpy
+; CHECK: call i8* @__msan_memcpy
+; CHECK: ret void
+
+
+; memmove is lowered to a call
+define void @MemMove(i8* nocapture %x, i8* nocapture %y) nounwind uwtable sanitize_memory {
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; CHECK: @MemMove
+; CHECK: call i8* @__msan_memmove
+; CHECK: ret void
+
+
+; Check that we propagate shadow for "select"
+
+define i32 @Select(i32 %a, i32 %b, i32 %c) nounwind uwtable readnone sanitize_memory {
+entry:
+  %tobool = icmp ne i32 %c, 0
+  %cond = select i1 %tobool, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+; CHECK: @Select
+; CHECK: select
+; CHECK-NEXT: select
+; CHECK: ret i32
+
+
+; Check that we propagate origin for "select" with vector condition.
+; Select condition is flattened to i1, which is then used to select one of the
+; argument origins.
+
+define <8 x i16> @SelectVector(<8 x i16> %a, <8 x i16> %b, <8 x i1> %c) nounwind uwtable readnone sanitize_memory {
+entry:
+  %cond = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %cond
+}
+
+; CHECK-ORIGINS: @SelectVector
+; CHECK-ORIGINS: bitcast <8 x i1> {{.*}} to i8
+; CHECK-ORIGINS: icmp ne i8
+; CHECK-ORIGINS: select i1
+; CHECK-ORIGINS: ret <8 x i16>
+
+
+define i8* @IntToPtr(i64 %x) nounwind uwtable readnone sanitize_memory {
+entry:
+  %0 = inttoptr i64 %x to i8*
+  ret i8* %0
+}
+
+; CHECK: @IntToPtr
+; CHECK: load i64*{{.*}}__msan_param_tls
+; CHECK-NEXT: inttoptr
+; CHECK-NEXT: store i64{{.*}}__msan_retval_tls
+; CHECK: ret i8
+
+
+define i8* @IntToPtr_ZExt(i16 %x) nounwind uwtable readnone sanitize_memory {
+entry:
+  %0 = inttoptr i16 %x to i8*
+  ret i8* %0
+}
+
+; CHECK: @IntToPtr_ZExt
+; CHECK: zext
+; CHECK-NEXT: inttoptr
+; CHECK: ret i8
+
+
+; Check that we insert exactly one check on udiv
+; (2nd arg shadow is checked, 1st arg shadow is propagated)
+
+define i32 @Div(i32 %a, i32 %b) nounwind uwtable readnone sanitize_memory {
+entry:
+  %div = udiv i32 %a, %b
+  ret i32 %div
+}
+
+; CHECK: @Div
+; CHECK: icmp
+; CHECK: call void @__msan_warning
+; CHECK-NOT: icmp
+; CHECK: udiv
+; CHECK-NOT: icmp
+; CHECK: ret i32
+
+
+; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
+
+define zeroext i1 @ICmpSLT(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp slt i32 %x, 0
+  ret i1 %1
+}
+
+; CHECK: @ICmpSLT
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGE(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp sge i32 %x, 0
+  ret i1 %1
+}
+
+; CHECK: @ICmpSGE
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sge
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGT(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp sgt i32 0, %x
+  ret i1 %1
+}
+
+; CHECK: @ICmpSGT
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sgt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp sle i32 0, %x
+  ret i1 %1
+}
+
+; CHECK: @ICmpSLE
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sle
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+
+; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
+; of the vector arguments.
+
+define <2 x i1> @ICmpSLT_vector(<2 x i32*> %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp slt <2 x i32*> %x, zeroinitializer
+  ret <2 x i1> %1
+}
+
+; CHECK: @ICmpSLT_vector
+; CHECK: icmp slt <2 x i64>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt <2 x i32*>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret <2 x i1>
+
+
+; Check that we propagate shadow for unsigned relational comparisons with
+; constants
+
+define zeroext i1 @ICmpUGTConst(i32 %x) nounwind uwtable readnone sanitize_memory {
+entry:
+  %cmp = icmp ugt i32 %x, 7
+  ret i1 %cmp
+}
+
+; CHECK: @ICmpUGTConst
+; CHECK: icmp ugt i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp ugt i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp ugt i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+
+; Check that loads of shadow have the same aligment as the original loads.
+; Check that loads of origin have the aligment of max(4, original alignment).
+
+define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
+  %y = alloca i32, align 64
+  %1 = load volatile i32* %y, align 64
+  ret i32 %1
+}
+
+; CHECK: @ShadowLoadAlignmentLarge
+; CHECK: load i32* {{.*}} align 64
+; CHECK: load volatile i32* {{.*}} align 64
+; CHECK: ret i32
+
+define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
+  %y = alloca i32, align 2
+  %1 = load volatile i32* %y, align 2
+  ret i32 %1
+}
+
+; CHECK: @ShadowLoadAlignmentSmall
+; CHECK: load i32* {{.*}} align 2
+; CHECK: load volatile i32* {{.*}} align 2
+; CHECK: ret i32
+
+; CHECK-ORIGINS: @ShadowLoadAlignmentSmall
+; CHECK-ORIGINS: load i32* {{.*}} align 2
+; CHECK-ORIGINS: load i32* {{.*}} align 4
+; CHECK-ORIGINS: load volatile i32* {{.*}} align 2
+; CHECK-ORIGINS: ret i32
+
+
+; Test vector manipulation instructions.
+; Check that the same bit manipulation is applied to the shadow values.
+; Check that there is a zero test of the shadow of %idx argument, where present.
+
+define i32 @ExtractElement(<4 x i32> %vec, i32 %idx) sanitize_memory {
+  %x = extractelement <4 x i32> %vec, i32 %idx
+  ret i32 %x
+}
+
+; CHECK: @ExtractElement
+; CHECK: extractelement
+; CHECK: call void @__msan_warning
+; CHECK: extractelement
+; CHECK: ret i32
+
+define <4 x i32> @InsertElement(<4 x i32> %vec, i32 %idx, i32 %x) sanitize_memory {
+  %vec1 = insertelement <4 x i32> %vec, i32 %x, i32 %idx
+  ret <4 x i32> %vec1
+}
+
+; CHECK: @InsertElement
+; CHECK: insertelement
+; CHECK: call void @__msan_warning
+; CHECK: insertelement
+; CHECK: ret <4 x i32>
+
+define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) sanitize_memory {
+  %vec2 = shufflevector <4 x i32> %vec, <4 x i32> %vec1,
+                        <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %vec2
+}
+
+; CHECK: @ShuffleVector
+; CHECK: shufflevector
+; CHECK-NOT: call void @__msan_warning
+; CHECK: shufflevector
+; CHECK: ret <4 x i32>
+
+
+; Test bswap intrinsic instrumentation
+define i32 @BSwap(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %y = tail call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %y
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+; CHECK: @BSwap
+; CHECK-NOT: call void @__msan_warning
+; CHECK: @llvm.bswap.i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: @llvm.bswap.i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i32
+
+
+; Store intrinsic.
+
+define void @StoreIntrinsic(i8* %p, <4 x float> %x) nounwind uwtable sanitize_memory {
+  call void @llvm.x86.sse.storeu.ps(i8* %p, <4 x float> %x)
+  ret void
+}
+
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+; CHECK: @StoreIntrinsic
+; CHECK-NOT: br
+; CHECK-NOT: = or
+; CHECK: store <4 x i32> {{.*}} align 1
+; CHECK: call void @llvm.x86.sse.storeu.ps
+; CHECK: ret void
+
+
+; Load intrinsic.
+
+define <16 x i8> @LoadIntrinsic(i8* %p) nounwind uwtable sanitize_memory {
+  %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p)
+  ret <16 x i8> %call
+}
+
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p) nounwind
+
+; CHECK: @LoadIntrinsic
+; CHECK: load <16 x i8>* {{.*}} align 1
+; CHECK-NOT: br
+; CHECK-NOT: = or
+; CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq
+; CHECK: store <16 x i8> {{.*}} @__msan_retval_tls
+; CHECK: ret <16 x i8>
+
+; CHECK-ORIGINS: @LoadIntrinsic
+; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32* {{.*}}
+; CHECK-ORIGINS: call <16 x i8> @llvm.x86.sse3.ldu.dq
+; CHECK-ORIGINS: store i32 {{.*}}[[ORIGIN]], i32* @__msan_retval_origin_tls
+; CHECK-ORIGINS: ret <16 x i8>
+
+
+; Simple NoMem intrinsic
+; Check that shadow is OR'ed, and origin is Select'ed
+; And no shadow checks!
+
+define <8 x i16> @Paddsw128(<8 x i16> %a, <8 x i16> %b) nounwind uwtable sanitize_memory {
+  %call = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %call
+}
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) nounwind
+
+; CHECK: @Paddsw128
+; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
+; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
+; CHECK-NEXT: = or <8 x i16>
+; CHECK-NEXT: call <8 x i16> @llvm.x86.sse2.padds.w
+; CHECK-NEXT: store <8 x i16> {{.*}} @__msan_retval_tls
+; CHECK-NEXT: ret <8 x i16>
+
+; CHECK-ORIGINS: @Paddsw128
+; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS: = bitcast <8 x i16> {{.*}} to i128
+; CHECK-ORIGINS-NEXT: = icmp ne i128 {{.*}}, 0
+; CHECK-ORIGINS-NEXT: = select i1 {{.*}}, i32 {{.*}}, i32
+; CHECK-ORIGINS: call <8 x i16> @llvm.x86.sse2.padds.w
+; CHECK-ORIGINS: store i32 {{.*}} @__msan_retval_origin_tls
+; CHECK-ORIGINS: ret <8 x i16>
+
+
+; Test handling of vectors of pointers.
+; Check that shadow of such vector is a vector of integers.
+
+define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable sanitize_memory {
+  %x = load <8 x i8*>* %p
+  ret <8 x i8*> %x
+}
+
+; CHECK: @VectorOfPointers
+; CHECK: load <8 x i64>*
+; CHECK: load <8 x i8*>*
+; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
+; CHECK: ret <8 x i8*>
+
+; Test handling of va_copy.
+
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable sanitize_memory {
+  call void @llvm.va_copy(i8* %p1, i8* %p2) nounwind
+  ret void
+}
+
+; CHECK: @VACopy
+; CHECK: call void @llvm.memset.p0i8.i64({{.*}}, i8 0, i64 24, i32 8, i1 false)
+; CHECK: ret void
+
+
+; Test handling of volatile stores.
+; Check that MemorySanitizer does not add a check of the value being stored.
+
+define void @VolatileStore(i32* nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+entry:
+  store volatile i32 %x, i32* %p, align 4
+  ret void
+}
+
+; CHECK: @VolatileStore
+; CHECK-NOT: @__msan_warning
+; CHECK: ret void
+
+
+; Test that checks are omitted but shadow propagation is kept if
+; sanitize_memory attribute is missing.
+
+define i32 @NoSanitizeMemory(i32 %x) uwtable {
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @bar()
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret i32 %x
+}
+
+declare void @bar()
+
+; CHECK: @NoSanitizeMemory
+; CHECK-NOT: @__msan_warning
+; CHECK: load i32* {{.*}} @__msan_param_tls
+; CHECK-NOT: @__msan_warning
+; CHECK: store {{.*}} @__msan_retval_tls
+; CHECK-NOT: @__msan_warning
+; CHECK: ret i32
diff --git a/test/Instrumentation/MemorySanitizer/unreachable.ll b/test/Instrumentation/MemorySanitizer/unreachable.ll
new file mode 100644
index 000000000000..c8130717c7da
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/unreachable.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -msan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+; Test that MemorySanitizer correctly handles unreachable blocks.
+
+define i32 @Func(i32* %p) nounwind uwtable {
+entry:
+  br label %exit
+
+unreachable:
+  %x = load i32* %p
+  br label %exit
+
+exit:
+  %z = phi i32 [ 42, %entry ], [ %x, %unreachable ]
+  ret i32 %z
+}
+
+; CHECK: @Func
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32 42
+
+
+define i32 @UnreachableLoop() nounwind uwtable {
+entry:
+  ret i32 0
+
+zzz:
+  br label %xxx
+
+xxx:
+  br label %zzz
+}
+
+; CHECK: @UnreachableLoop
+; CHECK: store i32 0, {{.*}} @__msan_retval_tls
+; CHECK: ret i32 0
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index 107dbdc0f227..70b6cbbf3105 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -114,6 +114,14 @@ entry:
 ; CHECK: atomic8_xor_monotonic
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 0)
 
+define void @atomic8_nand_monotonic(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 monotonic
+  ret void
+}
+; CHECK: atomic8_nand_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 0)
+
 define void @atomic8_xchg_acquire(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 acquire
@@ -162,6 +170,14 @@ entry:
 ; CHECK: atomic8_xor_acquire
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 2)
 
+define void @atomic8_nand_acquire(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 acquire
+  ret void
+}
+; CHECK: atomic8_nand_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 2)
+
 define void @atomic8_xchg_release(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 release
@@ -210,6 +226,14 @@ entry:
 ; CHECK: atomic8_xor_release
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 3)
 
+define void @atomic8_nand_release(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 release
+  ret void
+}
+; CHECK: atomic8_nand_release
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 3)
+
 define void @atomic8_xchg_acq_rel(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 acq_rel
@@ -258,6 +282,14 @@ entry:
 ; CHECK: atomic8_xor_acq_rel
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 4)
 
+define void @atomic8_nand_acq_rel(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 acq_rel
+  ret void
+}
+; CHECK: atomic8_nand_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 4)
+
 define void @atomic8_xchg_seq_cst(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 seq_cst
@@ -306,13 +338,21 @@ entry:
 ; CHECK: atomic8_xor_seq_cst
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 5)
 
+define void @atomic8_nand_seq_cst(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 seq_cst
+  ret void
+}
+; CHECK: atomic8_nand_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 5)
+
 define void @atomic8_cas_monotonic(i8* %a) nounwind uwtable {
 entry:
   cmpxchg i8* %a, i8 0, i8 1 monotonic
   ret void
 }
 ; CHECK: atomic8_cas_monotonic
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0, i32 0)
 
 define void @atomic8_cas_acquire(i8* %a) nounwind uwtable {
 entry:
@@ -320,7 +360,7 @@ entry:
   ret void
 }
 ; CHECK: atomic8_cas_acquire
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2, i32 2)
 
 define void @atomic8_cas_release(i8* %a) nounwind uwtable {
 entry:
@@ -328,7 +368,7 @@ entry:
   ret void
 }
 ; CHECK: atomic8_cas_release
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3, i32 0)
 
 define void @atomic8_cas_acq_rel(i8* %a) nounwind uwtable {
 entry:
@@ -336,7 +376,7 @@ entry:
   ret void
 }
 ; CHECK: atomic8_cas_acq_rel
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4, i32 2)
 
 define void @atomic8_cas_seq_cst(i8* %a) nounwind uwtable {
 entry:
@@ -344,7 +384,7 @@ entry:
   ret void
 }
 ; CHECK: atomic8_cas_seq_cst
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5, i32 5)
 
 define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable {
 entry:
@@ -458,6 +498,14 @@ entry:
 ; CHECK: atomic16_xor_monotonic
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 0)
 
+define void @atomic16_nand_monotonic(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 monotonic
+  ret void
+}
+; CHECK: atomic16_nand_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 0)
+
 define void @atomic16_xchg_acquire(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 acquire
@@ -506,6 +554,14 @@ entry:
 ; CHECK: atomic16_xor_acquire
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 2)
 
+define void @atomic16_nand_acquire(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 acquire
+  ret void
+}
+; CHECK: atomic16_nand_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 2)
+
 define void @atomic16_xchg_release(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 release
@@ -554,6 +610,14 @@ entry:
 ; CHECK: atomic16_xor_release
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 3)
 
+define void @atomic16_nand_release(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 release
+  ret void
+}
+; CHECK: atomic16_nand_release
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 3)
+
 define void @atomic16_xchg_acq_rel(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 acq_rel
@@ -602,6 +666,14 @@ entry:
 ; CHECK: atomic16_xor_acq_rel
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 4)
 
+define void @atomic16_nand_acq_rel(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 acq_rel
+  ret void
+}
+; CHECK: atomic16_nand_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 4)
+
 define void @atomic16_xchg_seq_cst(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 seq_cst
@@ -650,13 +722,21 @@ entry:
 ; CHECK: atomic16_xor_seq_cst
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 5)
 
+define void @atomic16_nand_seq_cst(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 seq_cst
+  ret void
+}
+; CHECK: atomic16_nand_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 5)
+
 define void @atomic16_cas_monotonic(i16* %a) nounwind uwtable {
 entry:
   cmpxchg i16* %a, i16 0, i16 1 monotonic
   ret void
 }
 ; CHECK: atomic16_cas_monotonic
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0, i32 0)
 
 define void @atomic16_cas_acquire(i16* %a) nounwind uwtable {
 entry:
@@ -664,7 +744,7 @@ entry:
   ret void
 }
 ; CHECK: atomic16_cas_acquire
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2, i32 2)
 
 define void @atomic16_cas_release(i16* %a) nounwind uwtable {
 entry:
@@ -672,7 +752,7 @@ entry:
   ret void
 }
 ; CHECK: atomic16_cas_release
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3, i32 0)
 
 define void @atomic16_cas_acq_rel(i16* %a) nounwind uwtable {
 entry:
@@ -680,7 +760,7 @@ entry:
   ret void
 }
 ; CHECK: atomic16_cas_acq_rel
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4, i32 2)
 
 define void @atomic16_cas_seq_cst(i16* %a) nounwind uwtable {
 entry:
@@ -688,7 +768,7 @@ entry:
   ret void
 }
 ; CHECK: atomic16_cas_seq_cst
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5, i32 5)
 
 define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable {
 entry:
@@ -802,6 +882,14 @@ entry:
 ; CHECK: atomic32_xor_monotonic
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 0)
 
+define void @atomic32_nand_monotonic(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 monotonic
+  ret void
+}
+; CHECK: atomic32_nand_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 0)
+
 define void @atomic32_xchg_acquire(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 acquire
@@ -850,6 +938,14 @@ entry:
 ; CHECK: atomic32_xor_acquire
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 2)
 
+define void @atomic32_nand_acquire(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 acquire
+  ret void
+}
+; CHECK: atomic32_nand_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 2)
+
 define void @atomic32_xchg_release(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 release
@@ -898,6 +994,14 @@ entry:
 ; CHECK: atomic32_xor_release
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 3)
 
+define void @atomic32_nand_release(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 release
+  ret void
+}
+; CHECK: atomic32_nand_release
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 3)
+
 define void @atomic32_xchg_acq_rel(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 acq_rel
@@ -946,6 +1050,14 @@ entry:
 ; CHECK: atomic32_xor_acq_rel
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 4)
 
+define void @atomic32_nand_acq_rel(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 acq_rel
+  ret void
+}
+; CHECK: atomic32_nand_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 4)
+
 define void @atomic32_xchg_seq_cst(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 seq_cst
@@ -994,13 +1106,21 @@ entry:
 ; CHECK: atomic32_xor_seq_cst
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 5)
 
+define void @atomic32_nand_seq_cst(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 seq_cst
+  ret void
+}
+; CHECK: atomic32_nand_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 5)
+
 define void @atomic32_cas_monotonic(i32* %a) nounwind uwtable {
 entry:
   cmpxchg i32* %a, i32 0, i32 1 monotonic
   ret void
 }
 ; CHECK: atomic32_cas_monotonic
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0, i32 0)
 
 define void @atomic32_cas_acquire(i32* %a) nounwind uwtable {
 entry:
@@ -1008,7 +1128,7 @@ entry:
   ret void
 }
 ; CHECK: atomic32_cas_acquire
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2, i32 2)
 
 define void @atomic32_cas_release(i32* %a) nounwind uwtable {
 entry:
@@ -1016,7 +1136,7 @@ entry:
   ret void
 }
 ; CHECK: atomic32_cas_release
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3, i32 0)
 
 define void @atomic32_cas_acq_rel(i32* %a) nounwind uwtable {
 entry:
@@ -1024,7 +1144,7 @@ entry:
   ret void
 }
 ; CHECK: atomic32_cas_acq_rel
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4, i32 2)
 
 define void @atomic32_cas_seq_cst(i32* %a) nounwind uwtable {
 entry:
@@ -1032,7 +1152,7 @@ entry:
   ret void
 }
 ; CHECK: atomic32_cas_seq_cst
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5, i32 5)
 
 define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable {
 entry:
@@ -1146,6 +1266,14 @@ entry:
 ; CHECK: atomic64_xor_monotonic
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 0)
 
+define void @atomic64_nand_monotonic(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 monotonic
+  ret void
+}
+; CHECK: atomic64_nand_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 0)
+
 define void @atomic64_xchg_acquire(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 acquire
@@ -1194,6 +1322,14 @@ entry:
 ; CHECK: atomic64_xor_acquire
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 2)
 
+define void @atomic64_nand_acquire(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 acquire
+  ret void
+}
+; CHECK: atomic64_nand_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 2)
+
 define void @atomic64_xchg_release(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 release
@@ -1242,6 +1378,14 @@ entry:
 ; CHECK: atomic64_xor_release
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 3)
 
+define void @atomic64_nand_release(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 release
+  ret void
+}
+; CHECK: atomic64_nand_release
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 3)
+
 define void @atomic64_xchg_acq_rel(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 acq_rel
@@ -1290,6 +1434,14 @@ entry:
 ; CHECK: atomic64_xor_acq_rel
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 4)
 
+define void @atomic64_nand_acq_rel(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 acq_rel
+  ret void
+}
+; CHECK: atomic64_nand_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 4)
+
 define void @atomic64_xchg_seq_cst(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 seq_cst
@@ -1338,13 +1490,21 @@ entry:
 ; CHECK: atomic64_xor_seq_cst
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 5)
 
+define void @atomic64_nand_seq_cst(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 seq_cst
+  ret void
+}
+; CHECK: atomic64_nand_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 5)
+
 define void @atomic64_cas_monotonic(i64* %a) nounwind uwtable {
 entry:
   cmpxchg i64* %a, i64 0, i64 1 monotonic
   ret void
 }
 ; CHECK: atomic64_cas_monotonic
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0, i32 0)
 
 define void @atomic64_cas_acquire(i64* %a) nounwind uwtable {
 entry:
@@ -1352,7 +1512,7 @@ entry:
   ret void
 }
 ; CHECK: atomic64_cas_acquire
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2, i32 2)
 
 define void @atomic64_cas_release(i64* %a) nounwind uwtable {
 entry:
@@ -1360,7 +1520,7 @@ entry:
   ret void
 }
 ; CHECK: atomic64_cas_release
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3, i32 0)
 
 define void @atomic64_cas_acq_rel(i64* %a) nounwind uwtable {
 entry:
@@ -1368,7 +1528,7 @@ entry:
   ret void
 }
 ; CHECK: atomic64_cas_acq_rel
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4, i32 2)
 
 define void @atomic64_cas_seq_cst(i64* %a) nounwind uwtable {
 entry:
@@ -1376,7 +1536,7 @@ entry:
   ret void
 }
 ; CHECK: atomic64_cas_seq_cst
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5, i32 5)
 
 define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable {
 entry:
@@ -1490,6 +1650,14 @@ entry:
 ; CHECK: atomic128_xor_monotonic
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 0)
 
+define void @atomic128_nand_monotonic(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 monotonic
+  ret void
+}
+; CHECK: atomic128_nand_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 0)
+
 define void @atomic128_xchg_acquire(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 acquire
@@ -1538,6 +1706,14 @@ entry:
 ; CHECK: atomic128_xor_acquire
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 2)
 
+define void @atomic128_nand_acquire(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 acquire
+  ret void
+}
+; CHECK: atomic128_nand_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 2)
+
 define void @atomic128_xchg_release(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 release
@@ -1586,6 +1762,14 @@ entry:
 ; CHECK: atomic128_xor_release
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 3)
 
+define void @atomic128_nand_release(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 release
+  ret void
+}
+; CHECK: atomic128_nand_release
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 3)
+
 define void @atomic128_xchg_acq_rel(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 acq_rel
@@ -1634,6 +1818,14 @@ entry:
 ; CHECK: atomic128_xor_acq_rel
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 4)
 
+define void @atomic128_nand_acq_rel(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 acq_rel
+  ret void
+}
+; CHECK: atomic128_nand_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 4)
+
 define void @atomic128_xchg_seq_cst(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 seq_cst
@@ -1682,13 +1874,21 @@ entry:
 ; CHECK: atomic128_xor_seq_cst
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 5)
 
+define void @atomic128_nand_seq_cst(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 seq_cst
+  ret void
+}
+; CHECK: atomic128_nand_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 5)
+
 define void @atomic128_cas_monotonic(i128* %a) nounwind uwtable {
 entry:
   cmpxchg i128* %a, i128 0, i128 1 monotonic
   ret void
 }
 ; CHECK: atomic128_cas_monotonic
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0, i32 0)
 
 define void @atomic128_cas_acquire(i128* %a) nounwind uwtable {
 entry:
@@ -1696,7 +1896,7 @@ entry:
   ret void
 }
 ; CHECK: atomic128_cas_acquire
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2, i32 2)
 
 define void @atomic128_cas_release(i128* %a) nounwind uwtable {
 entry:
@@ -1704,7 +1904,7 @@ entry:
   ret void
 }
 ; CHECK: atomic128_cas_release
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3, i32 0)
 
 define void @atomic128_cas_acq_rel(i128* %a) nounwind uwtable {
 entry:
@@ -1712,7 +1912,7 @@ entry:
   ret void
 }
 ; CHECK: atomic128_cas_acq_rel
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4, i32 2)
 
 define void @atomic128_cas_seq_cst(i128* %a) nounwind uwtable {
 entry:
@@ -1720,7 +1920,7 @@ entry:
   ret void
 }
 ; CHECK: atomic128_cas_seq_cst
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5, i32 5)
 
 define void @atomic_signal_fence_acquire() nounwind uwtable {
 entry:
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
index a08453ac4a94..7b6b94edf1b1 100644
--- a/test/Instrumentation/ThreadSanitizer/read_from_global.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -48,7 +48,7 @@ entry:
 }
 
 ; CHECK: define void @call_virtual_func
-; CHECK: __tsan_read
+; CHECK: __tsan_vptr_read
 ; CHECK: = load
 ; CHECK-NOT: __tsan_read
 ; CHECK: = load
diff --git a/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll b/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
new file mode 100644
index 000000000000..a83a274bcf6e
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -basicaa -gvn -tsan -S | FileCheck %s
+; TSAN conflicts with load widening. Make sure the load widening is off with -tsan.
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+%struct_of_8_bytes_4_aligned = type { i32, i8, i8, i8, i8}
+
+@f = global %struct_of_8_bytes_4_aligned zeroinitializer, align 4
+
+; Accessing bytes 4 and 6, not ok to widen to i32 if sanitize_thread is set.
+
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone sanitize_thread {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_bad
+; CHECK: call void @__tsan_read1
+; CHECK: call void @__tsan_read1
+; CHECK-NOT: call void @__tsan_read4
+; CHECK: ret i32
+}
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index 33c703b4c9bd..0ecff40493a2 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -20,3 +20,36 @@ entry:
 ; CHECK: ret i32
 
 
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+
+; Check that tsan converts mem intrinsics back to function calls.
+
+define void @MemCpyTest(i8* nocapture %x, i8* nocapture %y) {
+entry:
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
+    ret void
+; CHECK: define void @MemCpyTest
+; CHECK: call i8* @memcpy
+; CHECK: ret void
+}
+
+define void @MemMoveTest(i8* nocapture %x, i8* nocapture %y) {
+entry:
+    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
+    ret void
+; CHECK: define void @MemMoveTest
+; CHECK: call i8* @memmove
+; CHECK: ret void
+}
+
+define void @MemSetTest(i8* nocapture %x)  {
+entry:
+    tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false)
+    ret void
+; CHECK define void @MemSetTest
+; CHECK: call i8* @memset
+; CHECK: ret void
+}
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
new file mode 100644
index 000000000000..404ca3ffe50f
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that vptr reads are treated in a special way.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @Foo(i8* %a) nounwind uwtable {
+entry:
+; CHECK: call void @__tsan_vptr_read
+  %0 = load i8* %a, align 8, !tbaa !0
+  ret i8 %0
+}
+!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Integer/properties_bt.ll b/test/Integer/properties_bt.ll
index f24ddc2e80ef..695adf3c5f1d 100644
--- a/test/Integer/properties_bt.ll
+++ b/test/Integer/properties_bt.ll
@@ -5,5 +5,3 @@
 
 target datalayout = "e-p:32:32"
 target triple = "proc-vend-sys"
-deplibs = [ "m", "c" ]
-
diff --git a/test/JitListener/lit.local.cfg b/test/JitListener/lit.local.cfg
new file mode 100644
index 000000000000..a5aa6de182c4
--- /dev/null
+++ b/test/JitListener/lit.local.cfg
@@ -0,0 +1,11 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+if not root.llvm_use_intel_jitevents == "ON":
+    config.unsupported = True
+
diff --git a/test/JitListener/test-common-symbols.ll b/test/JitListener/test-common-symbols.ll
new file mode 100644
index 000000000000..bc94bda9a410
--- /dev/null
+++ b/test/JitListener/test-common-symbols.ll
@@ -0,0 +1,113 @@
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; CHECK: Method load [1]: main, Size = 164
+; CHECK: Method unload [1]
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4, !dbg !21
+  %add = add nsw i32 %0, 5, !dbg !21
+  %idxprom = sext i32 %add to i64, !dbg !21
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom, !dbg !21
+  store i32 40, i32* %arrayidx, align 4, !dbg !21
+  %1 = load double* @zero_double, align 8, !dbg !23
+  %cmp = fcmp olt double %1, 1.000000e+00, !dbg !23
+  br i1 %cmp, label %if.then, label %if.end, !dbg !23
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4, !dbg !24
+  %add1 = add nsw i32 %2, 2, !dbg !24
+  %idxprom2 = sext i32 %add1 to i64, !dbg !24
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2, !dbg !24
+  store i32 70, i32* %arrayidx3, align 4, !dbg !24
+  br label %if.end, !dbg !24
+
+if.end:                                           ; preds = %if.then, %entry
+  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !25), !dbg !27
+  store i32 1, i32* %i, align 4, !dbg !28
+  br label %for.cond, !dbg !28
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4, !dbg !28
+  %cmp4 = icmp slt i32 %3, 10, !dbg !28
+  br i1 %cmp4, label %for.body, label %for.end, !dbg !28
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4, !dbg !29
+  %sub = sub nsw i32 %4, 1, !dbg !29
+  %idxprom5 = sext i32 %sub to i64, !dbg !29
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5, !dbg !29
+  %5 = load i32* %arrayidx6, align 4, !dbg !29
+  %6 = load i32* %i, align 4, !dbg !29
+  %idxprom7 = sext i32 %6 to i64, !dbg !29
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7, !dbg !29
+  %7 = load i32* %arrayidx8, align 4, !dbg !29
+  %add9 = add nsw i32 %5, %7, !dbg !29
+  %8 = load i32* %i, align 4, !dbg !29
+  %idxprom10 = sext i32 %8 to i64, !dbg !29
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10, !dbg !29
+  store i32 %add9, i32* %arrayidx11, align 4, !dbg !29
+  br label %for.inc, !dbg !31
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4, !dbg !32
+  %inc = add nsw i32 %9, 1, !dbg !32
+  store i32 %inc, i32* %i, align 4, !dbg !32
+  br label %for.cond, !dbg !32
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4, !dbg !33
+  %cmp12 = icmp eq i32 %10, 110, !dbg !33
+  %cond = select i1 %cmp12, i32 0, i32 -1, !dbg !33
+  ret i32 %cond, !dbg !33
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build", metadata !"clang version 3.1 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{metadata !14, metadata !15, metadata !17}
+!14 = metadata !{i32 720948, i32 0, null, metadata !"zero_int", metadata !"zero_int", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @zero_int} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 720948, i32 0, null, metadata !"zero_double", metadata !"zero_double", metadata !"", metadata !6, i32 2, metadata !16, i32 0, i32 1, double* @zero_double} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
+!21 = metadata !{i32 7, i32 5, metadata !22, null}
+!22 = metadata !{i32 720907, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 5, metadata !22, null}
+!24 = metadata !{i32 10, i32 9, metadata !22, null}
+!25 = metadata !{i32 721152, metadata !26, metadata !"i", metadata !6, i32 12, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 720907, metadata !22, i32 12, i32 5, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 12, i32 14, metadata !26, null}
+!28 = metadata !{i32 12, i32 19, metadata !26, null}
+!29 = metadata !{i32 13, i32 9, metadata !30, null}
+!30 = metadata !{i32 720907, metadata !26, i32 12, i32 34, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 14, i32 5, metadata !30, null}
+!32 = metadata !{i32 12, i32 29, metadata !26, null}
+!33 = metadata !{i32 15, i32 5, metadata !22, null}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
new file mode 100644
index 000000000000..ca5d8d6484b8
--- /dev/null
+++ b/test/JitListener/test-inline.ll
@@ -0,0 +1,212 @@
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+; CHECK:   Line info @ 0: test-inline.cpp, line 33
+; CHECK:   Line info @ 35: test-inline.cpp, line 34
+; CHECK:   Line info @ 165: test-inline.cpp, line 35
+; CHECK: Method load [2]: _Z3foov, Size = 3
+; CHECK:   Line info @ 0: test-inline.cpp, line 28
+; CHECK:   Line info @ 2: test-inline.cpp, line 29
+; CHECK:   Line info @ 3: test-inline.cpp, line 29
+; CHECK: Method load [3]: main, Size = 146
+; CHECK:   Line info @ 0: test-inline.cpp, line 39
+; CHECK:   Line info @ 21: test-inline.cpp, line 41
+; CHECK:   Line info @ 39: test-inline.cpp, line 42
+; CHECK:   Line info @ 60: test-inline.cpp, line 44
+; CHECK:   Line info @ 80: test-inline.cpp, line 48
+; CHECK:   Line info @ 90: test-inline.cpp, line 45
+; CHECK:   Line info @ 95: test-inline.cpp, line 46
+; CHECK:   Line info @ 114: test-inline.cpp, line 48
+; CHECK:   Line info @ 141: test-inline.cpp, line 49
+; CHECK:   Line info @ 146: test-inline.cpp, line 49
+; CHECK: Method unload [1]
+; CHECK: Method unload [2]
+; CHECK: Method unload [3]
+
+; ModuleID = 'test-inline.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.char_struct = type { i8, [2 x i8] }
+
+@compound_char = global %struct.char_struct zeroinitializer, align 1
+@_ZZ4mainE1d = private unnamed_addr constant [2 x [2 x double]] [[2 x double] [double 0.000000e+00, double 1.000000e+00], [2 x double] [double 2.000000e+00, double 3.000000e+00]], align 16
+
+define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) uwtable {
+entry:
+  %pf.addr = alloca float*, align 8
+  %ppd.addr = alloca [2 x double]*, align 8
+  %s.addr = alloca %struct.char_struct*, align 8
+  %ppn.addr = alloca i32**, align 8
+  %us.addr = alloca i16, align 2
+  %l.addr = alloca i64, align 8
+  %result = alloca double, align 8
+  store float* %pf, float** %pf.addr, align 8
+  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !46), !dbg !47
+  store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
+  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !48), !dbg !47
+  store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !49), !dbg !47
+  store i32** %ppn, i32*** %ppn.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !50), !dbg !47
+  store i16 %us, i16* %us.addr, align 2
+  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !51), !dbg !47
+  store i64 %l, i64* %l.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !52), !dbg !47
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !53), !dbg !55
+  %0 = load float** %pf.addr, align 8, !dbg !55
+  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !55
+  %1 = load float* %arrayidx, align 4, !dbg !55
+  %conv = fpext float %1 to double, !dbg !55
+  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !55
+  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !55
+  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !55
+  %3 = load double* %arrayidx2, align 8, !dbg !55
+  %mul = fmul double %conv, %3, !dbg !55
+  %4 = load %struct.char_struct** %s.addr, align 8, !dbg !55
+  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !55
+  %5 = load i8* %c, align 1, !dbg !55
+  %conv3 = sext i8 %5 to i32, !dbg !55
+  %conv4 = sitofp i32 %conv3 to double, !dbg !55
+  %mul5 = fmul double %mul, %conv4, !dbg !55
+  %6 = load i16* %us.addr, align 2, !dbg !55
+  %conv6 = zext i16 %6 to i32, !dbg !55
+  %conv7 = sitofp i32 %conv6 to double, !dbg !55
+  %mul8 = fmul double %mul5, %conv7, !dbg !55
+  %7 = load i64* %l.addr, align 8, !dbg !55
+  %conv9 = uitofp i64 %7 to double, !dbg !55
+  %mul10 = fmul double %mul8, %conv9, !dbg !55
+  %call = call i32 @_Z3foov(), !dbg !55
+  %conv11 = sitofp i32 %call to double, !dbg !55
+  %add = fadd double %mul10, %conv11, !dbg !55
+  store double %add, double* %result, align 8, !dbg !55
+  %8 = load double* %result, align 8, !dbg !56
+  ret double %8, !dbg !56
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr i32 @_Z3foov() nounwind uwtable inlinehint {
+entry:
+  ret i32 0, !dbg !57
+}
+
+define i32 @main(i32 %argc, i8** %argv) uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %s = alloca %struct.char_struct, align 1
+  %f = alloca float, align 4
+  %d = alloca [2 x [2 x double]], align 16
+  %result = alloca double, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !59), !dbg !60
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !61), !dbg !60
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !62), !dbg !64
+  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !65), !dbg !66
+  store float 0.000000e+00, float* %f, align 4, !dbg !66
+  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !67), !dbg !70
+  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !70
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !70
+  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !71
+  store i8 97, i8* %c, align 1, !dbg !71
+  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !72
+  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !72
+  store i8 48, i8* %arrayidx, align 1, !dbg !72
+  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !73
+  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !73
+  store i8 49, i8* %arrayidx2, align 1, !dbg !73
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !74), !dbg !75
+  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !75
+  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !75
+  store double %call, double* %result, align 8, !dbg !75
+  %1 = load double* %result, align 8, !dbg !76
+  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !76
+  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !76
+  ret i32 %cond, !dbg !76
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test-inline.cpp", metadata !"/home/akaylor/dev", metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-inline.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !35, metadata !40}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 32, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
+!6 = metadata !{i32 786473, metadata !"test-inline.cpp", metadata !"/home/akaylor/dev", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10, metadata !12, metadata !16, metadata !29, metadata !32, metadata !33}
+!9 = metadata !{i32 786468, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
+!11 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!12 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
+!16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
+!17 = metadata !{i32 786451, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!18 = metadata !{metadata !19, metadata !21, metadata !23}
+!19 = metadata !{i32 786445, metadata !17, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !20} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
+!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!21 = metadata !{i32 786445, metadata !17, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !22} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
+!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!23 = metadata !{i32 786478, i32 0, metadata !17, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
+!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = metadata !{null, metadata !26}
+!26 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
+!27 = metadata !{metadata !28}
+!28 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !31} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!31 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!32 = metadata !{i32 786468, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!33 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !34} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
+!34 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!35 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 38, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
+!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = metadata !{metadata !31, metadata !31, metadata !38}
+!38 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!39 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!40 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 27, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
+!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!42 = metadata !{metadata !31}
+!43 = metadata !{metadata !44}
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !17, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
+!46 = metadata !{i32 786689, metadata !5, metadata !"pf", metadata !6, i32 16777248, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
+!47 = metadata !{i32 32, i32 0, metadata !5, null}
+!48 = metadata !{i32 786689, metadata !5, metadata !"ppd", metadata !6, i32 33554464, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
+!49 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 50331680, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 32]
+!50 = metadata !{i32 786689, metadata !5, metadata !"ppn", metadata !6, i32 67108896, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppn] [line 32]
+!51 = metadata !{i32 786689, metadata !5, metadata !"us", metadata !6, i32 83886112, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
+!52 = metadata !{i32 786689, metadata !5, metadata !"l", metadata !6, i32 100663328, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
+!53 = metadata !{i32 786688, metadata !54, metadata !"result", metadata !6, i32 34, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
+!54 = metadata !{i32 786443, metadata !5, i32 33, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!55 = metadata !{i32 34, i32 0, metadata !54, null}
+!56 = metadata !{i32 35, i32 0, metadata !54, null}
+!57 = metadata !{i32 29, i32 0, metadata !58, null}
+!58 = metadata !{i32 786443, metadata !40, i32 28, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!59 = metadata !{i32 786689, metadata !35, metadata !"argc", metadata !6, i32 16777254, metadata !31, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
+!60 = metadata !{i32 38, i32 0, metadata !35, null}
+!61 = metadata !{i32 786689, metadata !35, metadata !"argv", metadata !6, i32 33554470, metadata !38, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
+!62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
+!63 = metadata !{i32 786443, metadata !35, i32 39, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
+!64 = metadata !{i32 40, i32 0, metadata !63, null}
+!65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
+!66 = metadata !{i32 41, i32 0, metadata !63, null}
+!67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!69 = metadata !{metadata !15, metadata !15}
+!70 = metadata !{i32 42, i32 0, metadata !63, null}
+!71 = metadata !{i32 44, i32 0, metadata !63, null}
+!72 = metadata !{i32 45, i32 0, metadata !63, null}
+!73 = metadata !{i32 46, i32 0, metadata !63, null}
+!74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
+!75 = metadata !{i32 48, i32 0, metadata !63, null}
+!76 = metadata !{i32 49, i32 0, metadata !63, null}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
new file mode 100644
index 000000000000..1e2a2b342aae
--- /dev/null
+++ b/test/JitListener/test-parameters.ll
@@ -0,0 +1,211 @@
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+; CHECK:   Line info @ 0: test-parameters.cpp, line 33
+; CHECK:   Line info @ 35: test-parameters.cpp, line 34
+; CHECK:   Line info @ 165: test-parameters.cpp, line 35
+; CHECK: Method load [2]: _Z3foov, Size = 3
+; CHECK:   Line info @ 0: test-parameters.cpp, line 28
+; CHECK:   Line info @ 2: test-parameters.cpp, line 29
+; CHECK: Method load [3]: main, Size = 146
+; CHECK:   Line info @ 0: test-parameters.cpp, line 39
+; CHECK:   Line info @ 21: test-parameters.cpp, line 41
+; CHECK:   Line info @ 39: test-parameters.cpp, line 42
+; CHECK:   Line info @ 60: test-parameters.cpp, line 44
+; CHECK:   Line info @ 80: test-parameters.cpp, line 48
+; CHECK:   Line info @ 90: test-parameters.cpp, line 45
+; CHECK:   Line info @ 95: test-parameters.cpp, line 46
+; CHECK:   Line info @ 114: test-parameters.cpp, line 48
+; CHECK:   Line info @ 141: test-parameters.cpp, line 49
+; CHECK:   Line info @ 146: test-parameters.cpp, line 49
+; CHECK: Method unload [1]
+; CHECK: Method unload [2]
+; CHECK: Method unload [3]
+
+; ModuleID = 'test-parameters.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.char_struct = type { i8, [2 x i8] }
+
+@compound_char = global %struct.char_struct zeroinitializer, align 1
+@_ZZ4mainE1d = private unnamed_addr constant [2 x [2 x double]] [[2 x double] [double 0.000000e+00, double 1.000000e+00], [2 x double] [double 2.000000e+00, double 3.000000e+00]], align 16
+
+define i32 @_Z3foov() nounwind uwtable {
+entry:
+  ret i32 0, !dbg !46
+}
+
+define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) nounwind uwtable {
+entry:
+  %pf.addr = alloca float*, align 8
+  %ppd.addr = alloca [2 x double]*, align 8
+  %s.addr = alloca %struct.char_struct*, align 8
+  %ppn.addr = alloca i32**, align 8
+  %us.addr = alloca i16, align 2
+  %l.addr = alloca i64, align 8
+  %result = alloca double, align 8
+  store float* %pf, float** %pf.addr, align 8
+  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !48), !dbg !49
+  store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
+  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !50), !dbg !49
+  store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !51), !dbg !49
+  store i32** %ppn, i32*** %ppn.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !52), !dbg !49
+  store i16 %us, i16* %us.addr, align 2
+  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !53), !dbg !49
+  store i64 %l, i64* %l.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !54), !dbg !49
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !55), !dbg !57
+  %0 = load float** %pf.addr, align 8, !dbg !57
+  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !57
+  %1 = load float* %arrayidx, align 4, !dbg !57
+  %conv = fpext float %1 to double, !dbg !57
+  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !57
+  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !57
+  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !57
+  %3 = load double* %arrayidx2, align 8, !dbg !57
+  %mul = fmul double %conv, %3, !dbg !57
+  %4 = load %struct.char_struct** %s.addr, align 8, !dbg !57
+  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !57
+  %5 = load i8* %c, align 1, !dbg !57
+  %conv3 = sext i8 %5 to i32, !dbg !57
+  %conv4 = sitofp i32 %conv3 to double, !dbg !57
+  %mul5 = fmul double %mul, %conv4, !dbg !57
+  %6 = load i16* %us.addr, align 2, !dbg !57
+  %conv6 = zext i16 %6 to i32, !dbg !57
+  %conv7 = sitofp i32 %conv6 to double, !dbg !57
+  %mul8 = fmul double %mul5, %conv7, !dbg !57
+  %7 = load i64* %l.addr, align 8, !dbg !57
+  %conv9 = uitofp i64 %7 to double, !dbg !57
+  %mul10 = fmul double %mul8, %conv9, !dbg !57
+  %call = call i32 @_Z3foov(), !dbg !57
+  %conv11 = sitofp i32 %call to double, !dbg !57
+  %add = fadd double %mul10, %conv11, !dbg !57
+  store double %add, double* %result, align 8, !dbg !57
+  %8 = load double* %result, align 8, !dbg !58
+  ret double %8, !dbg !58
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %s = alloca %struct.char_struct, align 1
+  %f = alloca float, align 4
+  %d = alloca [2 x [2 x double]], align 16
+  %result = alloca double, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !59), !dbg !60
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !61), !dbg !60
+  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !62), !dbg !64
+  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !65), !dbg !66
+  store float 0.000000e+00, float* %f, align 4, !dbg !66
+  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !67), !dbg !70
+  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !70
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !70
+  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !71
+  store i8 97, i8* %c, align 1, !dbg !71
+  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !72
+  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !72
+  store i8 48, i8* %arrayidx, align 1, !dbg !72
+  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !73
+  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !73
+  store i8 49, i8* %arrayidx2, align 1, !dbg !73
+  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !74), !dbg !75
+  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !75
+  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !75
+  store double %call, double* %result, align 8, !dbg !75
+  %1 = load double* %result, align 8, !dbg !76
+  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !76
+  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !76
+  ret i32 %cond, !dbg !76
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev", metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-parameters.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !10, metadata !38}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 27, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
+!6 = metadata !{i32 786473, metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 32, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
+!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !13, metadata !14, metadata !16, metadata !20, metadata !33, metadata !35, metadata !36}
+!13 = metadata !{i32 786468, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
+!15 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
+!20 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
+!21 = metadata !{i32 786451, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [from ]
+!22 = metadata !{metadata !23, metadata !25, metadata !27}
+!23 = metadata !{i32 786445, metadata !21, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
+!24 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786445, metadata !21, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !26} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
+!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
+!27 = metadata !{i32 786478, i32 0, metadata !21, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !28, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !31, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !30}
+!30 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !21} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!33 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!34 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!35 = metadata !{i32 786468, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!36 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
+!37 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!38 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 38, metadata !39, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
+!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = metadata !{metadata !9, metadata !9, metadata !41}
+!41 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !42} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!42 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!43 = metadata !{metadata !44}
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !21, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
+!46 = metadata !{i32 29, i32 0, metadata !47, null}
+!47 = metadata !{i32 786443, metadata !5, i32 28, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!48 = metadata !{i32 786689, metadata !10, metadata !"pf", metadata !6, i32 16777248, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
+!49 = metadata !{i32 32, i32 0, metadata !10, null}
+!50 = metadata !{i32 786689, metadata !10, metadata !"ppd", metadata !6, i32 33554464, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
+!51 = metadata !{i32 786689, metadata !10, metadata !"s", metadata !6, i32 50331680, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 32]
+!52 = metadata !{i32 786689, metadata !10, metadata !"ppn", metadata !6, i32 67108896, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppn] [line 32]
+!53 = metadata !{i32 786689, metadata !10, metadata !"us", metadata !6, i32 83886112, metadata !35, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
+!54 = metadata !{i32 786689, metadata !10, metadata !"l", metadata !6, i32 100663328, metadata !36, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
+!55 = metadata !{i32 786688, metadata !56, metadata !"result", metadata !6, i32 34, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
+!56 = metadata !{i32 786443, metadata !10, i32 33, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!57 = metadata !{i32 34, i32 0, metadata !56, null}
+!58 = metadata !{i32 35, i32 0, metadata !56, null}
+!59 = metadata !{i32 786689, metadata !38, metadata !"argc", metadata !6, i32 16777254, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
+!60 = metadata !{i32 38, i32 0, metadata !38, null}
+!61 = metadata !{i32 786689, metadata !38, metadata !"argv", metadata !6, i32 33554470, metadata !41, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
+!62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
+!63 = metadata !{i32 786443, metadata !38, i32 39, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
+!64 = metadata !{i32 40, i32 0, metadata !63, null}
+!65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
+!66 = metadata !{i32 41, i32 0, metadata !63, null}
+!67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
+!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
+!69 = metadata !{metadata !19, metadata !19}
+!70 = metadata !{i32 42, i32 0, metadata !63, null}
+!71 = metadata !{i32 44, i32 0, metadata !63, null}
+!72 = metadata !{i32 45, i32 0, metadata !63, null}
+!73 = metadata !{i32 46, i32 0, metadata !63, null}
+!74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
+!75 = metadata !{i32 48, i32 0, metadata !63, null}
+!76 = metadata !{i32 49, i32 0, metadata !63, null}
diff --git a/test/Linker/2006-01-19-ConstantPacked.ll b/test/Linker/2006-01-19-ConstantPacked.ll
index d2409e20c4d9..5b39cb2ad279 100644
--- a/test/Linker/2006-01-19-ConstantPacked.ll
+++ b/test/Linker/2006-01-19-ConstantPacked.ll
@@ -3,11 +3,8 @@
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin7.7.0"
-deplibs = [ "c", "crtend" ]
 @source = global <4 x i32> < i32 0, i32 1, i32 2, i32 3 >		; <<4 x i32>*> [#uses=0]
 
 define i32 @main() {
-entry:
-	ret i32 0
+  ret i32 0
 }
-
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
new file mode 100644
index 000000000000..7f64f95c3a7f
--- /dev/null
+++ b/test/Linker/DbgDeclare.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-link %s %p/DbgDeclare2.ll -o %t.bc
+; RUN: llvm-dis < %t.bc | FileCheck %s
+; Test if metadata in dbg.declare is mapped properly or not.
+
+; rdar://13089880
+; CHECK: define i32 @main(i32 %argc, i8** %argv)
+; CHECK: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !{{[0-9]+}})
+; CHECK: define void @test(i32 %argc, i8** %argv)
+; CHECK: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !{{[0-9]+}})
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !16), !dbg !15
+  %0 = load i32* %argc.addr, align 4, !dbg !17
+  %1 = load i8*** %argv.addr, align 8, !dbg !17
+  call void @test(i32 %0, i8** %1), !dbg !17
+  ret i32 0, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @test(i32, i8**)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"main.cpp", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 173515)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"main.cpp", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 3, i32 0, metadata !5, null}
+!16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554435, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 5, i32 0, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !5, i32 4, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 6, i32 0, metadata !18, null}
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
new file mode 100644
index 000000000000..e2e56b289338
--- /dev/null
+++ b/test/Linker/DbgDeclare2.ll
@@ -0,0 +1,76 @@
+; This file is used by 2011-08-04-DebugLoc.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define void @test(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %i = alloca i32, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !16), !dbg !15
+  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !17), !dbg !20
+  store i32 0, i32* %i, align 4, !dbg !20
+  br label %for.cond, !dbg !20
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4, !dbg !20
+  %1 = load i32* %argc.addr, align 4, !dbg !20
+  %cmp = icmp slt i32 %0, %1, !dbg !20
+  br i1 %cmp, label %for.body, label %for.end, !dbg !20
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32* %i, align 4, !dbg !21
+  %idxprom = sext i32 %2 to i64, !dbg !21
+  %3 = load i8*** %argv.addr, align 8, !dbg !21
+  %arrayidx = getelementptr inbounds i8** %3, i64 %idxprom, !dbg !21
+  %4 = load i8** %arrayidx, align 8, !dbg !21
+  %call = call i32 @puts(i8* %4), !dbg !21
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4, !dbg !20
+  %inc = add nsw i32 %5, 1, !dbg !20
+  store i32 %inc, i32* %i, align 4, !dbg !20
+  br label %for.cond, !dbg !20
+
+for.end:                                          ; preds = %for.cond
+  ret void, !dbg !24
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @puts(i8*)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"main.cpp", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 173515)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"print_args", metadata !"print_args", metadata !"test", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i8**)* @test, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"test.cpp", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 4, i32 0, metadata !5, null}
+!16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554436, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786688, metadata !18, metadata !"i", metadata !6, i32 6, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 786443, metadata !19, i32 6, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 786443, metadata !5, i32 5, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 0, metadata !18, null}
+!21 = metadata !{i32 8, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !18, i32 7, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 0, metadata !22, null}
+!24 = metadata !{i32 10, i32 0, metadata !19, null}
diff --git a/test/Linker/module-flags-1-a.ll b/test/Linker/module-flags-1-a.ll
index 973aa80822c5..32f189cf99f1 100644
--- a/test/Linker/module-flags-1-a.ll
+++ b/test/Linker/module-flags-1-a.ll
@@ -3,10 +3,10 @@
 ; Test basic functionality of module flags.
 
 ; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
-; CHECK: !1 = metadata !{i32 1, metadata !"qux", i32 42}
+; CHECK: !1 = metadata !{i32 2, metadata !"bar", i32 42}
 ; CHECK: !2 = metadata !{i32 1, metadata !"mux", metadata !3}
 ; CHECK: !3 = metadata !{metadata !"hello world", i32 927}
-; CHECK: !4 = metadata !{i32 2, metadata !"bar", i32 42}
+; CHECK: !4 = metadata !{i32 1, metadata !"qux", i32 42}
 ; CHECK: !llvm.module.flags = !{!0, !1, !2, !4}
 
 !0 = metadata !{ i32 1, metadata !"foo", i32 37 }
diff --git a/test/Linker/module-flags-3-a.ll b/test/Linker/module-flags-3-a.ll
index 4233a0a7a5b1..e7a720e9c024 100644
--- a/test/Linker/module-flags-3-a.ll
+++ b/test/Linker/module-flags-3-a.ll
@@ -3,10 +3,10 @@
 ; Test 'require' behavior.
 
 ; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
-; CHECK: !1 = metadata !{i32 3, metadata !"foo", metadata !2}
-; CHECK: !2 = metadata !{metadata !"bar", i32 42}
-; CHECK: !3 = metadata !{i32 1, metadata !"bar", i32 42}
-; CHECK: !llvm.module.flags = !{!0, !1, !3}
+; CHECK: !1 = metadata !{i32 1, metadata !"bar", i32 42}
+; CHECK: !2 = metadata !{i32 3, metadata !"foo", metadata !3}
+; CHECK: !3 = metadata !{metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !2}
 
 !0 = metadata !{ i32 1, metadata !"foo", i32 37 }
 !1 = metadata !{ i32 1, metadata !"bar", i32 42 }
diff --git a/test/Linker/module-flags-7-a.ll b/test/Linker/module-flags-7-a.ll
new file mode 100644
index 000000000000..976c8fecf5fe
--- /dev/null
+++ b/test/Linker/module-flags-7-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-7-b.ll -S -o - 2>&1 | FileCheck %s
+
+; Test module flags error messages.
+
+; CHECK: linking module flags 'foo': IDs have conflicting behaviors
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-7-b.ll b/test/Linker/module-flags-7-b.ll
new file mode 100644
index 000000000000..2bc72508d468
--- /dev/null
+++ b/test/Linker/module-flags-7-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-7-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 2, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-8-a.ll b/test/Linker/module-flags-8-a.ll
new file mode 100644
index 000000000000..146cae763d6d
--- /dev/null
+++ b/test/Linker/module-flags-8-a.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link %s %p/module-flags-8-b.ll -S -o - | sort | FileCheck %s
+
+; Test append-type module flags.
+
+; CHECK: !0 = metadata !{i32 5, metadata !"flag-0", metadata !1}
+; CHECK: !1 = metadata !{i32 0, i32 0, i32 1}
+; CHECK: !2 = metadata !{i32 6, metadata !"flag-1", metadata !3}
+; CHECK: !3 = metadata !{i32 0, i32 1, i32 2}
+; CHECK: !llvm.module.flags = !{!0, !2}
+
+!0 = metadata !{ i32 5, metadata !"flag-0", metadata !{ i32 0 } }
+!1 = metadata !{ i32 6, metadata !"flag-1", metadata !{ i32 0, i32 1 } }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-8-b.ll b/test/Linker/module-flags-8-b.ll
new file mode 100644
index 000000000000..08f9bc49ee5e
--- /dev/null
+++ b/test/Linker/module-flags-8-b.ll
@@ -0,0 +1,7 @@
+; This file is used with module-flags-6-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 5, metadata !"flag-0", metadata !{ i32 0, i32 1 } }
+!1 = metadata !{ i32 6, metadata !"flag-1", metadata !{ i32 1, i32 2 } }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/testlink1.ll b/test/Linker/testlink1.ll
index a8746379b6cf..6ba6fd5fd7e9 100644
--- a/test/Linker/testlink1.ll
+++ b/test/Linker/testlink1.ll
@@ -13,6 +13,10 @@
 ; The uses of intlist in the other file should be remapped.
 ; CHECK-NOT: {{%intlist.[0-9]}}
 
+; CHECK: %VecSize = type { <5 x i32> }
+; CHECK: %VecSize.{{[0-9]}} = type { <10 x i32> }
+%VecSize = type { <5 x i32> }
+
 %Struct1 = type opaque
 @S1GV = external global %Struct1*
 
@@ -93,3 +97,5 @@ define internal void @Testintern() {
 define void @testIntern() {
   ret void
 }
+
+declare void @VecSizeCrash(%VecSize)
diff --git a/test/Linker/testlink2.ll b/test/Linker/testlink2.ll
index 1798e31e47dc..ff8e5299869a 100644
--- a/test/Linker/testlink2.ll
+++ b/test/Linker/testlink2.ll
@@ -8,6 +8,8 @@
 %Ty1 = type { %Ty2* }
 %Ty2 = type opaque
 
+%VecSize = type { <10 x i32> }
+
 @GVTy1 = global %Ty1* null
 @GVTy2 = external global %Ty2*
 
@@ -53,3 +55,4 @@ define internal void @testIntern() {
   ret void
 }
 
+declare void @VecSizeCrash1(%VecSize)
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
new file mode 100644
index 000000000000..1e9024c5eede
--- /dev/null
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -0,0 +1,3713 @@
+// RUN: not llvm-mc -triple=aarch64 < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+
+//------------------------------------------------------------------------------
+// Add/sub (extended register)
+//------------------------------------------------------------------------------
+
+        // Mismatched final register and extend
+        add x2, x3, x5, sxtb
+        add x2, x4, w2, uxtx
+        add w5, w7, x9, sxtx
+// CHECK-ERROR: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         add x2, x3, x5, sxtb
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         add x2, x4, w2, uxtx
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         add w5, w7, x9, sxtx
+// CHECK-ERROR:                     ^
+
+        // Out of range extends
+        add x9, x10, w11, uxtb #-1
+        add x3, x5, w7, uxtb #5
+        sub x9, x15, x2, uxth #5
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR:         add x9, x10, w11, uxtb #-1
+// CHECK-ERROR:                                 ^
+// CHECK-ERROR: error: expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         add x3, x5, w7, uxtb #5
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR:         sub x9, x15, x2, uxth #5
+// CHECK-ERROR:                          ^
+
+        // Wrong registers on normal variants
+        add xzr, x3, x5, uxtx
+        sub x3, xzr, w9, sxth #1
+        add x1, x2, sp, uxtx
+// CHECK-ERROR: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR:         add xzr, x3, x5, uxtx
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         sub x3, xzr, w9, sxth #1
+// CHECK-ERROR:                 ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         add x1, x2, sp, uxtx
+// CHECK-ERROR:                     ^
+
+        // Wrong registers on flag-setting variants
+        adds sp, x3, w2, uxtb
+        adds x3, xzr, x9, uxtx
+        subs x2, x1, sp, uxtx
+        adds x2, x1, sp, uxtb #2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         adds sp, x3, w2, uxtb
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR:         adds x3, xzr, x9, uxtx
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         subs x2, x1, sp, uxtx
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR:         adds x2, x1, sp, uxtb #2
+// CHECK-ERROR:                      ^
+
+        // Amount not optional if lsl valid and used
+        add sp, x5, x7, lsl
+// CHECK-ERROR: error: expected #imm after shift specifier
+// CHECK-ERROR:         add sp, x5, x7, lsl
+// CHECK-ERROR:                             ^
+
+//------------------------------------------------------------------------------
+// Add/sub (immediate)
+//------------------------------------------------------------------------------
+
+// Out of range immediates: < 0 or more than 12 bits
+        add w4, w5, #-1
+        add w5, w6, #0x1000
+        add w4, w5, #-1, lsl #12
+        add w5, w6, #0x1000, lsl #12
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w4, w5, #-1
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w5, w6, #0x1000
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w4, w5, #-1, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w5, w6, #0x1000, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+
+// Only lsl #0 and lsl #12 are allowed
+        add w2, w3, #0x1, lsl #1
+        add w5, w17, #0xfff, lsl #13
+        add w17, w20, #0x1000, lsl #12
+        sub xsp, x34, #0x100, lsl #-1
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w2, w3, #0x1, lsl #1
+// CHECK-ERROR-NEXT:                                ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w5, w17, #0xfff, lsl #13
+// CHECK-ERROR-NEXT:                                   ^
+// CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add w17, w20, #0x1000, lsl #12
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-NEXT:         sub xsp, x34, #0x100, lsl #-1
+// CHECK-ERROR-NEXT:                                    ^
+
+// Incorrect registers (w31 doesn't exist at all, and 31 decodes to sp for these).
+        add w31, w20, #1234
+        add wzr, w20, #0x123
+        add w20, wzr, #0x321
+        add wzr, wzr, #0xfff
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w31, w20, #1234
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wzr, w20, #0x123
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add w20, wzr, #0x321
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add wzr, wzr, #0xfff
+// CHECK-ERROR-NEXT:             ^
+
+// Mixed register classes
+        add xsp, w2, #123
+        sub w2, x30, #32
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         add xsp, w2, #123
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sub w2, x30, #32
+// CHECK-ERROR-NEXT:                 ^
+
+// Out of range immediate
+        adds w0, w5, #0x10000
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         adds w0, w5, #0x10000
+// CHECK-ERROR-NEXT:                      ^
+
+// Wn|WSP should be in second place
+        adds w4, wzr, #0x123
+// ...but wzr is the 31 destination
+        subs wsp, w5, #123
+        subs x5, xzr, #0x456, lsl #12
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adds w4, wzr, #0x123
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs wsp, w5, #123
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         subs x5, xzr, #0x456, lsl #12
+// CHECK-ERROR-NEXT:                  ^
+
+        // MOV alias should not accept any fiddling
+        mov x2, xsp, #123
+        mov wsp, w27, #0xfff, lsl #12
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         mov x2, xsp, #123
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov wsp, w27, #0xfff, lsl #12
+// CHECK-ERROR-NEXT:                       ^
+
+        // A relocation should be provided for symbols
+        add x3, x9, #variable
+// CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
+// CHECK-ERROR-NEXT:         add x3, x9, #variable
+// CHECK-ERROR-NEXT:                      ^
+
+
+//------------------------------------------------------------------------------
+// Add-subtract (shifted register)
+//------------------------------------------------------------------------------
+
+        add wsp, w1, w2, lsr #3
+        add x4, sp, x9, asr #5
+        add x9, x10, x5, ror #3
+// CHECK-ERROR: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add wsp, w1, w2, lsr #3
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x4, sp, x9, asr #5
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x9, x10, x5, ror #3
+// CHECK-ERROR-NEXT:                          ^
+
+        add w1, w2, w3, lsl #-1
+        add w1, w2, w3, lsl #32
+        add w1, w2, w3, lsr #-1
+        add w1, w2, w3, lsr #32
+        add w1, w2, w3, asr #-1
+        add w1, w2, w3, asr #32
+        add x1, x2, x3, lsl #-1
+        add x1, x2, x3, lsl #64
+        add x1, x2, x3, lsr #-1
+        add x1, x2, x3, lsr #64
+        add x1, x2, x3, asr #-1
+        add x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         add x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         add x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                         ^
+
+        adds w1, w2, w3, lsl #-1
+        adds w1, w2, w3, lsl #32
+        adds w1, w2, w3, lsr #-1
+        adds w1, w2, w3, lsr #32
+        adds w1, w2, w3, asr #-1
+        adds w1, w2, w3, asr #32
+        adds x1, x2, x3, lsl #-1
+        adds x1, x2, x3, lsl #64
+        adds x1, x2, x3, lsr #-1
+        adds x1, x2, x3, lsr #64
+        adds x1, x2, x3, asr #-1
+        adds x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         adds x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                          ^
+
+        sub w1, w2, w3, lsl #-1
+        sub w1, w2, w3, lsl #32
+        sub w1, w2, w3, lsr #-1
+        sub w1, w2, w3, lsr #32
+        sub w1, w2, w3, asr #-1
+        sub w1, w2, w3, asr #32
+        sub x1, x2, x3, lsl #-1
+        sub x1, x2, x3, lsl #64
+        sub x1, x2, x3, lsr #-1
+        sub x1, x2, x3, lsr #64
+        sub x1, x2, x3, asr #-1
+        sub x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                              ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         sub x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                         ^
+
+        subs w1, w2, w3, lsl #-1
+        subs w1, w2, w3, lsl #32
+        subs w1, w2, w3, lsr #-1
+        subs w1, w2, w3, lsr #32
+        subs w1, w2, w3, asr #-1
+        subs w1, w2, w3, asr #32
+        subs x1, x2, x3, lsl #-1
+        subs x1, x2, x3, lsl #64
+        subs x1, x2, x3, lsr #-1
+        subs x1, x2, x3, lsr #64
+        subs x1, x2, x3, asr #-1
+        subs x1, x2, x3, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, lsr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs w1, w2, w3, asr #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, lsr #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, asr #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         subs x1, x2, x3, asr #64
+// CHECK-ERROR-NEXT:                          ^
+
+        cmn w9, w10, lsl #-1
+        cmn w9, w10, lsl #32
+        cmn w11, w12, lsr #-1
+        cmn w11, w12, lsr #32
+        cmn w19, wzr, asr #-1
+        cmn wzr, wzr, asr #32
+        cmn x9, x10, lsl #-1
+        cmn x9, x10, lsl #64
+        cmn x11, x12, lsr #-1
+        cmn x11, x12, lsr #64
+        cmn x19, xzr, asr #-1
+        cmn xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         cmn wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmn x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmn x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         cmn xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        cmp w9, w10, lsl #-1
+        cmp w9, w10, lsl #32
+        cmp w11, w12, lsr #-1
+        cmp w11, w12, lsr #32
+        cmp w19, wzr, asr #-1
+        cmp wzr, wzr, asr #32
+        cmp x9, x10, lsl #-1
+        cmp x9, x10, lsl #64
+        cmp x11, x12, lsr #-1
+        cmp x11, x12, lsr #64
+        cmp x19, xzr, asr #-1
+        cmp xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         cmp wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]
+// CHECK-ERROR-NEXT:         cmp x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         cmp x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         cmp xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        neg w9, w10, lsl #-1
+        neg w9, w10, lsl #32
+        neg w11, w12, lsr #-1
+        neg w11, w12, lsr #32
+        neg w19, wzr, asr #-1
+        neg wzr, wzr, asr #32
+        neg x9, x10, lsl #-1
+        neg x9, x10, lsl #64
+        neg x11, x12, lsr #-1
+        neg x11, x12, lsr #64
+        neg x19, xzr, asr #-1
+        neg xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         neg w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         neg w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         neg wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         neg x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         neg x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         neg x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         neg xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                       ^
+
+        negs w9, w10, lsl #-1
+        negs w9, w10, lsl #32
+        negs w11, w12, lsr #-1
+        negs w11, w12, lsr #32
+        negs w19, wzr, asr #-1
+        negs wzr, wzr, asr #32
+        negs x9, x10, lsl #-1
+        negs x9, x10, lsl #64
+        negs x11, x12, lsr #-1
+        negs x11, x12, lsr #64
+        negs x19, xzr, asr #-1
+        negs xzr, xzr, asr #64
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w9, w10, lsl #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         negs w9, w10, lsl #32
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w11, w12, lsr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         negs w11, w12, lsr #32
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs w19, wzr, asr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         negs wzr, wzr, asr #32
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x9, x10, lsl #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         negs x9, x10, lsl #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x11, x12, lsr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         negs x11, x12, lsr #64
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         negs x19, xzr, asr #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         negs xzr, xzr, asr #64
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Add-subtract (shifted register)
+//------------------------------------------------------------------------------
+
+        adc wsp, w3, w5
+        adc w1, wsp, w2
+        adc w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        adc wsp, w3, w5
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc w1, wsp, w2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc w0, w10, wsp
+// CHECK-ERROR-NEXT:                      ^
+
+        adc sp, x3, x5
+        adc x1, sp, x2
+        adc x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc sp, x3, x5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc x1, sp, x2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adc x0, x10, sp
+// CHECK-ERROR-NEXT:                      ^
+
+        adcs wsp, w3, w5
+        adcs w1, wsp, w2
+        adcs w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs wsp, w3, w5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs w1, wsp, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs w0, w10, wsp
+// CHECK-ERROR-NEXT:                       ^
+
+        adcs sp, x3, x5
+        adcs x1, sp, x2
+        adcs x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs sp, x3, x5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs x1, sp, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adcs x0, x10, sp
+// CHECK-ERROR-NEXT:                       ^
+
+        sbc wsp, w3, w5
+        sbc w1, wsp, w2
+        sbc w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc wsp, w3, w5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc w1, wsp, w2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc w0, w10, wsp
+// CHECK-ERROR-NEXT:                      ^
+
+        sbc sp, x3, x5
+        sbc x1, sp, x2
+        sbc x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc sp, x3, x5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc x1, sp, x2
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbc x0, x10, sp
+// CHECK-ERROR-NEXT:                      ^
+
+        sbcs wsp, w3, w5
+        sbcs w1, wsp, w2
+        sbcs w0, w10, wsp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs wsp, w3, w5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs w1, wsp, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs w0, w10, wsp
+// CHECK-ERROR-NEXT:                       ^
+
+        sbcs sp, x3, x5
+        sbcs x1, sp, x2
+        sbcs x0, x10, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs sp, x3, x5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs x1, sp, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbcs x0, x10, sp
+// CHECK-ERROR-NEXT:                       ^
+
+        ngc wsp, w3
+        ngc w9, wsp
+        ngc sp, x9
+        ngc x2, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc wsp, w3
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc w9, wsp
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc sp, x9
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngc x2, sp
+// CHECK-ERROR-NEXT:                 ^
+
+        ngcs wsp, w3
+        ngcs w9, wsp
+        ngcs sp, x9
+        ngcs x2, sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs wsp, w3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs w9, wsp
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs sp, x9
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ngcs x2, sp
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Bitfield
+//------------------------------------------------------------------------------
+
+        sbfm x3, w13, #0, #0
+        sbfm w12, x9, #0, #0
+        sbfm sp, x3, #3, #5
+        sbfm w3, wsp, #1, #9
+        sbfm x9, x5, #-1, #0
+        sbfm x9, x5, #0, #-1
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm x3, w13, #0, #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w12, x9, #0, #0
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm sp, x3, #3, #5
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfm w3, wsp, #1, #9
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x9, x5, #-1, #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x9, x5, #0, #-1
+// CHECK-ERROR-NEXT:                          ^
+
+        sbfm w3, w5, #32, #1
+        sbfm w7, w11, #19, #32
+        sbfm x29, x30, #64, #0
+        sbfm x10, x20, #63, #64
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         sbfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        ubfm w3, w5, #32, #1
+        ubfm w7, w11, #19, #32
+        ubfm x29, x30, #64, #0
+        ubfm x10, x20, #63, #64
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         ubfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         ubfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        bfm w3, w5, #32, #1
+        bfm w7, w11, #19, #32
+        bfm x29, x30, #64, #0
+        bfm x10, x20, #63, #64
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfm w3, w5, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfm w7, w11, #19, #32
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         bfm x29, x30, #64, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         bfm x10, x20, #63, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        sxtb x3, x2
+        sxth xzr, xzr
+        sxtw x3, x5
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxtb x3, x2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxth xzr, xzr
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sxtw x3, x5
+// CHECK-ERROR-NEXT:                  ^
+
+        uxtb x3, x12
+        uxth x5, x9
+        uxtw x3, x5
+        uxtb x2, sp
+        uxtb sp, xzr
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb x3, x12
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxth x5, x9
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         uxtw x3, x5
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb x2, sp
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         uxtb sp, xzr
+// CHECK-ERROR-NEXT:              ^
+
+        asr x3, w2, #1
+        asr sp, x2, #1
+        asr x25, x26, #-1
+        asr x25, x26, #64
+        asr w9, w8, #32
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr x3, w2, #1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         asr sp, x2, #1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         asr x25, x26, #-1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         asr x25, x26, #64
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         asr w9, w8, #32
+// CHECK-ERROR-NEXT:                     ^
+
+        sbfiz w1, w2, #0, #0
+        sbfiz wsp, w9, #0, #1
+        sbfiz w9, w10, #32, #1
+        sbfiz w11, w12, #32, #0
+        sbfiz w9, w10, #10, #23
+        sbfiz x3, x5, #12, #53
+        sbfiz sp, x3, #5, #6
+        sbfiz w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         sbfiz w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfiz w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfiz w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         sbfiz w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         sbfiz x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        sbfx w1, w2, #0, #0
+        sbfx wsp, w9, #0, #1
+        sbfx w9, w10, #32, #1
+        sbfx w11, w12, #32, #0
+        sbfx w9, w10, #10, #23
+        sbfx x3, x5, #12, #53
+        sbfx sp, x3, #5, #6
+        sbfx w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         sbfx w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfx w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         sbfx w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         sbfx w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         sbfx x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sbfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                  ^
+
+        bfi w1, w2, #0, #0
+        bfi wsp, w9, #0, #1
+        bfi w9, w10, #32, #1
+        bfi w11, w12, #32, #0
+        bfi w9, w10, #10, #23
+        bfi x3, x5, #12, #53
+        bfi sp, x3, #5, #6
+        bfi w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         bfi w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfi w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfi w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         bfi w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         bfi x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi sp, x3, #5, #6
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfi w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                 ^
+
+        bfxil w1, w2, #0, #0
+        bfxil wsp, w9, #0, #1
+        bfxil w9, w10, #32, #1
+        bfxil w11, w12, #32, #0
+        bfxil w9, w10, #10, #23
+        bfxil x3, x5, #12, #53
+        bfxil sp, x3, #5, #6
+        bfxil w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         bfxil w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfxil w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         bfxil w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         bfxil w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         bfxil x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bfxil w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        ubfiz w1, w2, #0, #0
+        ubfiz wsp, w9, #0, #1
+        ubfiz w9, w10, #32, #1
+        ubfiz w11, w12, #32, #0
+        ubfiz w9, w10, #10, #23
+        ubfiz x3, x5, #12, #53
+        ubfiz sp, x3, #5, #6
+        ubfiz w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         ubfiz w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfiz w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfiz w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         ubfiz w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: requested insert overflows register
+// CHECK-ERROR-NEXT:         ubfiz x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz sp, x3, #5, #6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfiz w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                   ^
+
+        ubfx w1, w2, #0, #0
+        ubfx wsp, w9, #0, #1
+        ubfx w9, w10, #32, #1
+        ubfx w11, w12, #32, #0
+        ubfx w9, w10, #10, #23
+        ubfx x3, x5, #12, #53
+        ubfx sp, x3, #5, #6
+        ubfx w3, wsp, #7, #8
+// CHECK-ERROR: error: expected integer in range [<lsb>, 31]
+// CHECK-ERROR-NEXT:         ubfx w1, w2, #0, #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx wsp, w9, #0, #1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfx w9, w10, #32, #1
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ubfx w11, w12, #32, #0
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         ubfx w9, w10, #10, #23
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: requested extract overflows register
+// CHECK-ERROR-NEXT:         ubfx x3, x5, #12, #53
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx sp, x3, #5, #6
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ubfx w3, wsp, #7, #8
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Compare & branch (immediate)
+//------------------------------------------------------------------------------
+
+        cbnz wsp, lbl
+        cbz  sp, lbl
+        cbz  x3, x5
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbnz wsp, lbl
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           cbz sp, lbl
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbz x3, x5
+// CHECK-ERROR-NEXT:                   ^
+
+        cbz w20, #1048576
+        cbnz xzr, #-1048580
+        cbz x29, #1
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbz w20, #1048576
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbnz xzr, #-1048580
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           cbz x29, #1
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Conditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b.zf lbl
+// CHECK-ERROR: error: invalid condition code
+// CHECK-ERROR-NEXT:           b.zf lbl
+// CHECK-ERROR-NEXT:             ^
+
+        b.eq #1048576
+        b.ge #-1048580
+        b.cc #1
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           b.eq #1048576
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           b.ge #-1048580
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:           b.cc #1
+// CHECK-ERROR-NEXT:                ^
+
+//------------------------------------------------------------------------------
+// Conditional compare (immediate)
+//------------------------------------------------------------------------------
+
+        ccmp wsp, #4, #2, ne
+        ccmp w25, #-1, #15, hs
+        ccmp w3, #32, #0, ge
+        ccmp w19, #5, #-1, lt
+        ccmp w20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp wsp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp w25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp w3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmp sp, #4, #2, ne
+        ccmp x25, #-1, #15, hs
+        ccmp x3, #32, #0, ge
+        ccmp x19, #5, #-1, lt
+        ccmp x20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp sp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp x25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp x3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn wsp, #4, #2, ne
+        ccmn w25, #-1, #15, hs
+        ccmn w3, #32, #0, ge
+        ccmn w19, #5, #-1, lt
+        ccmn w20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn wsp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn w25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn w3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn sp, #4, #2, ne
+        ccmn x25, #-1, #15, hs
+        ccmn x3, #32, #0, ge
+        ccmn x19, #5, #-1, lt
+        ccmn x20, #7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn sp, #4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn x25, #-1, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn x3, #32, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x19, #5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x20, #7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Conditional compare (register)
+//------------------------------------------------------------------------------
+
+        ccmp wsp, w4, #2, ne
+        ccmp w3, wsp, #0, ge
+        ccmp w19, w5, #-1, lt
+        ccmp w20, w7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp wsp, w4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp w3, wsp, #0, ge
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w19, w5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp w20, w7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmp sp, x4, #2, ne
+        ccmp x25, sp, #15, hs
+        ccmp x19, x5, #-1, lt
+        ccmp x20, x7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmp sp, x4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmp x25, sp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x19, x5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmp x20, x7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn wsp, w4, #2, ne
+        ccmn w25, wsp, #15, hs
+        ccmn w19, w5, #-1, lt
+        ccmn w20, w7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn wsp, w4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn w25, wsp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w19, w5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn w20, w7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        ccmn sp, x4, #2, ne
+        ccmn x25, sp, #15, hs
+        ccmn x19, x5, #-1, lt
+        ccmn x20, x7, #16, hs
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ccmn sp, x4, #2, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        ccmn x25, sp, #15, hs
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x19, x5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        ccmn x20, x7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Conditional select
+//------------------------------------------------------------------------------
+
+        csel w4, wsp, w9, eq
+        csel wsp, w2, w3, ne
+        csel w10, w11, wsp, ge
+        csel w1, w2, w3, #3
+        csel x4, sp, x9, eq
+        csel sp, x2, x3, ne
+        csel x10, x11, sp, ge
+        csel x1, x2, x3, #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w4, wsp, w9, eq
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel wsp, w2, w3, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel w10, w11, wsp, ge
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected AArch64 condition code
+// CHECK-ERROR-NEXT:        csel w1, w2, w3, #3
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x4, sp, x9, eq
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel sp, x2, x3, ne
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csel x10, x11, sp, ge
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected AArch64 condition code
+// CHECK-ERROR-NEXT:        csel x1, x2, x3, #3
+// CHECK-ERROR-NEXT:                         ^
+
+        csinc w20, w21, wsp, mi
+        csinc sp, x30, x29, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinc w20, w21, wsp, mi
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinc sp, x30, x29, eq
+// CHECK-ERROR-NEXT:              ^
+
+        csinv w20, wsp, wsp, mi
+        csinv sp, x30, x29, le
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinv w20, wsp, wsp, mi
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csinv sp, x30, x29, le
+// CHECK-ERROR-NEXT:              ^
+
+        csneg w20, w21, wsp, mi
+        csneg x0, sp, x29, le
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csneg w20, w21, wsp, mi
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csneg x0, sp, x29, le
+// CHECK-ERROR-NEXT:                  ^
+
+        cset wsp, lt
+        csetm sp, ge
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cset wsp, lt
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        csetm sp, ge
+// CHECK-ERROR-NEXT:              ^
+
+        cinc w3, wsp, ne
+        cinc sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinc w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinc sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+        cinv w3, wsp, ne
+        cinv sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinv w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cinv sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+        cneg w3, wsp, ne
+        cneg sp, x9, eq
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cneg w3, wsp, ne
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        cneg sp, x9, eq
+// CHECK-ERROR-NEXT:             ^
+
+//------------------------------------------------------------------------------
+// Data Processing (1 source)
+//------------------------------------------------------------------------------
+        rbit x23, w2
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     rbit x23, w2
+
+        cls sp, x2
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     cls sp, x2
+
+        clz wsp, w3
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     clz wsp, w3
+
+//------------------------------------------------------------------------------
+// Data Processing (2 sources)
+//------------------------------------------------------------------------------
+        udiv x23, w2, x18
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     udiv x23, w2, x18
+
+        lsl sp, x2, x4
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     lsl sp, x2, x4
+
+        asr wsp, w3, w9
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     asr wsp, w3, w9
+
+//------------------------------------------------------------------------------
+// Data Processing (3 sources)
+//------------------------------------------------------------------------------
+
+        madd sp, x3, x9, x10
+//CHECK-ERROR: error: invalid operand for instruction
+//CHECK-ERROR-NEXT:     madd sp, x3, x9, x10
+
+//------------------------------------------------------------------------------
+// Exception generation
+//------------------------------------------------------------------------------
+        svc #-1
+        hlt #65536
+        dcps4 #43
+        dcps4
+// CHECK-ERROR: error: expected integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         svc #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         hlt #65536
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         dcps4 #43
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR-NEXT: error: invalid instruction
+// CHECK-ERROR-NEXT:         dcps4
+// CHECK-ERROR-NEXT:         ^
+
+//------------------------------------------------------------------------------
+// Extract (immediate)
+//------------------------------------------------------------------------------
+
+        extr w2, w20, w30, #-1
+        extr w9, w19, w20, #32
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         extr w2, w20, w30, #-1
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         extr w9, w19, w20, #32
+// CHECK-ERROR-NEXT:                            ^
+
+        extr x10, x15, x20, #-1
+        extr x20, x25, x30, #64
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         extr x10, x15, x20, #-1
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         extr x20, x25, x30, #64
+// CHECK-ERROR-NEXT:                             ^
+
+        ror w9, w10, #32
+        ror x10, x11, #64
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:         ror w9, w10, #32
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:         ror x10, x11, #64
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Floating-point compare
+//------------------------------------------------------------------------------
+
+        fcmp s3, d2
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp s3, d2
+// CHECK-ERROR-NEXT:                  ^
+
+        fcmp s9, #-0.0
+        fcmp d3, #-0.0
+        fcmp s1, #1.0
+        fcmpe s30, #-0.0
+// CHECK-ERROR: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp s9, #-0.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp d3, #-0.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmp s1, #1.0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected floating-point constant #0.0
+// CHECK-ERROR-NEXT:         fcmpe s30, #-0.0
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fccmp s19, s5, #-1, lt
+        fccmp s20, s7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp s19, s5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp s20, s7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmp d19, d5, #-1, lt
+        fccmp d20, d7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp d19, d5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmp d20, d7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmpe s19, s5, #-1, lt
+        fccmpe s20, s7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe s19, s5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe s20, s7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+        fccmpe d19, d5, #-1, lt
+        fccmpe d20, d7, #16, hs
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe d19, d5, #-1, lt
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:        fccmpe d20, d7, #16, hs
+// CHECK-ERROR-NEXT:                      ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcsel q3, q20, q9, pl
+        fcsel h9, h10, h11, mi
+        fcsel b9, b10, b11, mi
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel q3, q20, q9, pl
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel h9, h10, h11, mi
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcsel b9, b10, b11, mi
+// CHECK-ERROR-NEXT:               ^
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (1 source)
+//------------------------------------------------------------------------------
+
+        fmov d0, s3
+        fcvt d0, d1
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov d0, s3
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fcvt d0, d1
+// CHECK-ERROR-NEXT:                    ^
+
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (2 sources)
+//------------------------------------------------------------------------------
+
+        fadd s0, d3, d7
+        fmaxnm d3, s19, d12
+        fnmul d1, d9, s18
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fadd s0, d3, d7
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fmaxnm d3, s19, d12
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:           fnmul d1, d9, s18
+// CHECK-ERROR-NEXT: ^
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (3 sources)
+//------------------------------------------------------------------------------
+
+        fmadd b3, b4, b5, b6
+        fmsub h1, h2, h3, h4
+        fnmadd q3, q5, q6, q7
+        fnmsub s2, s4, d5, h9
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmadd b3, b4, b5, b6
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fmsub h1, h2, h3, h4
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fnmadd q3, q5, q6, q7
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fnmsub s2, s4, d5, h9
+// CHECK-ERROR-NEXT:                ^
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcvtzs w13, s31, #0
+        fcvtzs w19, s20, #33
+        fcvtzs wsp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzs w13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzs w19, s20, #33
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs wsp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzs x13, s31, #0
+        fcvtzs x19, s20, #65
+        fcvtzs sp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzs x13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzs x19, s20, #65
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzs sp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzu w13, s31, #0
+        fcvtzu w19, s20, #33
+        fcvtzu wsp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzu w13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 32]
+// CHECK-ERROR-NEXT:        fcvtzu w19, s20, #33
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu wsp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        fcvtzu x13, s31, #0
+        fcvtzu x19, s20, #65
+        fcvtzu sp, s19, #14
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzu x13, s31, #0
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [1, 64]
+// CHECK-ERROR-NEXT:        fcvtzu x19, s20, #65
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        fcvtzu sp, s19, #14
+// CHECK-ERROR-NEXT:               ^
+
+        scvtf w13, s31, #0
+        scvtf w19, s20, #33
+        scvtf wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf w13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf w19, s20, #33
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf wsp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        scvtf x13, s31, #0
+        scvtf x19, s20, #65
+        scvtf sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf x13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf x19, s20, #65
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        scvtf sp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        ucvtf w13, s31, #0
+        ucvtf w19, s20, #33
+        ucvtf wsp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf w13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf w19, s20, #33
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf wsp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+        ucvtf x13, s31, #0
+        ucvtf x19, s20, #65
+        ucvtf sp, s19, #14
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf x13, s31, #0
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf x19, s20, #65
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ucvtf sp, s19, #14
+// CHECK-ERROR-NEXT:              ^
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+        ;; Exponent too large
+        fmov d3, #0.0625
+        fmov s2, #32.0
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov d3, #0.0625
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov s2, #32.0
+// CHECK-ERROR-NEXT:                    ^
+
+        ;; Fraction too precise
+        fmov s9, #1.03125
+        fmov s28, #1.96875
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov s9, #1.03125
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov s28, #1.96875
+// CHECK-ERROR-NEXT:                     ^
+
+        ;; No particular reason, but a striking omission
+        fmov d0, #0.0
+// CHECK-ERROR: error: expected compatible register or floating-point constant
+// CHECK-ERROR-NEXT:           fmov d0, #0.0
+// CHECK-ERROR-NEXT:                    ^
+
+//------------------------------------------------------------------------------
+// Floating-point <-> integer conversion
+//------------------------------------------------------------------------------
+
+        fmov x3, v0.d[0]
+        fmov v29.1d[1], x2
+        fmov x7, v0.d[2]
+        fcvtns sp, s5
+        scvtf s6, wsp
+// CHECK-ERROR: error: expected lane specifier '[1]'
+// CHECK-ERROR-NEXT:         fmov x3, v0.d[0]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-NEXT: fmov v29.1d[1], x2
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: lane number incompatible with layout
+// CHECK-ERROR-NEXT: fmov x7, v0.d[2]
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         fcvtns sp, s5
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         scvtf s6, wsp
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// Load-register (literal)
+//------------------------------------------------------------------------------
+
+        ldr sp, some_label
+        ldrsw w3, somewhere
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr sp, some_label
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw w3, somewhere
+// CHECK-ERROR-NEXT:               ^
+
+        ldrsw x2, #1048576
+        ldr q0, #-1048580
+        ldr x0, #2
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         ldrsw x2, #1048576
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         ldr q0, #-1048580
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         ldr x0, #2
+// CHECK-ERROR-NEXT:                 ^
+
+//------------------------------------------------------------------------------
+// Load/store exclusive
+//------------------------------------------------------------------------------
+
+       stxrb w2, x3, [x4, #20]
+       stlxrh w10, w11, [w2]
+// CHECK-ERROR: error: expected '#0'
+// CHECK-ERROR-NEXT:         stxrb w2, x3, [x4, #20]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxrh w10, w11, [w2]
+// CHECK-ERROR-NEXT:                           ^
+
+       stlxr  x20, w21, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxr  x20, w21, [sp]
+// CHECK-ERROR-NEXT:                ^
+
+       ldxr   sp, [sp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldxr   sp, [sp]
+// CHECK-ERROR-NEXT:                ^
+
+       stxp x1, x2, x3, [x4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stxp x1, x2,  x3, [x4]
+// CHECK-ERROR-NEXT:              ^
+
+       stlxp w5, x1, w4, [x5]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxp w5, x1, w4, [x5]
+// CHECK-ERROR-NEXT:                       ^
+
+       stlxp w17, w6, x7, [x22]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stlxp w17, w6, x7, [x22]
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Load/store (unscaled immediate)
+//------------------------------------------------------------------------------
+
+        ldurb w2, [sp, #256]
+        sturh w17, [x1, #256]
+        ldursw x20, [x1, #256]
+        ldur x12, [sp, #256]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:        ldurb w2, [sp, #256]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         sturh w17, [x1, #256]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldursw x20, [x1, #256]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldur x12, [sp, #256]
+// CHECK-ERROR-NEXT:                   ^
+
+        stur h2, [x2, #-257]
+        stur b2, [x2, #-257]
+        ldursb x9, [sp, #-257]
+        ldur w2, [x30, #-257]
+        stur q9, [x20, #-257]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         stur h2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         stur b2, [x2, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldursb x9, [sp, #-257]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldur w2, [x30, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         stur q9, [x20, #-257]
+// CHECK-ERROR-NEXT:                  ^
+
+        prfum pstl3strm, [xzr]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         prfum pstl3strm, [xzr]
+// CHECK-ERROR-NEXT:                           ^
+
+//------------------------------------------------------------------------------
+// Load-store register (immediate post-indexed)
+//------------------------------------------------------------------------------
+        ldr x3, [x4, #25], #0
+        ldr x4, [x9, #0], #4
+// CHECK-ERROR: error: expected symbolic reference or integer in range [0, 32760]
+// CHECK-ERROR-NEXT:         ldr x3, [x4, #25], #0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr x4, [x9, #0], #4
+// CHECK-ERROR-NEXT:                           ^
+
+        strb w1, [x19], #256
+        strb w9, [sp], #-257
+        strh w1, [x19], #256
+        strh w9, [sp], #-257
+        str w1, [x19], #256
+        str w9, [sp], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strb w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strb w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strh w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strh w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w1, [x19], #256
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w9, [sp], #-257
+// CHECK-ERROR-NEXT:                       ^
+
+        ldrb w1, [x19], #256
+        ldrb w9, [sp], #-257
+        ldrh w1, [x19], #256
+        ldrh w9, [sp], #-257
+        ldr w1, [x19], #256
+        ldr w9, [sp], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w1, [x19], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w9, [sp], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w1, [x19], #256
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w9, [sp], #-257
+// CHECK-ERROR-NEXT:                       ^
+
+        ldrsb x2, [x3], #256
+        ldrsb x22, [x13], #-257
+        ldrsh x2, [x3], #256
+        ldrsh x22, [x13], #-257
+        ldrsw x2, [x3], #256
+        ldrsw x22, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsw x2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsw x22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+
+        ldrsb w2, [x3], #256
+        ldrsb w22, [x13], #-257
+        ldrsh w2, [x3], #256
+        ldrsh w22, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb w2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb w22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh w2, [x3], #256
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh w22, [x13], #-257
+// CHECK-ERROR-NEXT:                           ^
+
+        str b3, [x3], #256
+        str b3, [x13], #-257
+        str h3, [x3], #256
+        str h3, [x13], #-257
+        str s3, [x3], #256
+        str s3, [x13], #-257
+        str d3, [x3], #256
+        str d3, [x13], #-257
+        str q3, [x3], #256
+        str q3, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str b3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str b3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str h3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str h3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str s3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str s3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str d3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str d3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str q3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str q3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+
+        ldr b3, [x3], #256
+        ldr b3, [x13], #-257
+        ldr h3, [x3], #256
+        ldr h3, [x13], #-257
+        ldr s3, [x3], #256
+        ldr s3, [x13], #-257
+        ldr d3, [x3], #256
+        ldr d3, [x13], #-257
+        ldr q3, [x3], #256
+        ldr q3, [x13], #-257
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr b3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr b3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr h3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr h3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr s3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr s3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr d3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr d3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr q3, [x3], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr q3, [x13], #-257
+// CHECK-ERROR-NEXT:                        ^
+
+//------------------------------------------------------------------------------
+// Load-store register (immediate pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldr x3, [x4]!
+// CHECK-ERROR: error:
+// CHECK-ERROR-NEXT:         ldr x3, [x4]!
+// CHECK-ERROR-NEXT:                     ^
+
+        strb w1, [x19, #256]!
+        strb w9, [sp, #-257]!
+        strh w1, [x19, #256]!
+        strh w9, [sp, #-257]!
+        str w1, [x19, #256]!
+        str w9, [sp, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strb w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strb w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         strh w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldrb w1, [x19, #256]!
+        ldrb w9, [sp, #-257]!
+        ldrh w1, [x19, #256]!
+        ldrh w9, [sp, #-257]!
+        ldr w1, [x19, #256]!
+        ldr w9, [sp, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrb w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrh w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w1, [x19, #256]!
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w9, [sp, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldrsb x2, [x3, #256]!
+        ldrsb x22, [x13, #-257]!
+        ldrsh x2, [x3, #256]!
+        ldrsh x22, [x13, #-257]!
+        ldrsw x2, [x3, #256]!
+        ldrsw x22, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsw x2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsw x22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+
+        ldrsb w2, [x3, #256]!
+        ldrsb w22, [x13, #-257]!
+        ldrsh w2, [x3, #256]!
+        ldrsh w22, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsb w2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsb w22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x3, #256]!
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrsh w22, [x13, #-257]!
+// CHECK-ERROR-NEXT:                    ^
+
+        str b3, [x3, #256]!
+        str b3, [x13, #-257]!
+        str h3, [x3, #256]!
+        str h3, [x13, #-257]!
+        str s3, [x3, #256]!
+        str s3, [x13, #-257]!
+        str d3, [x3, #256]!
+        str d3, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str b3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str b3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str h3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str h3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str s3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str s3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         str d3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str d3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+        ldr b3, [x3, #256]!
+        ldr b3, [x13, #-257]!
+        ldr h3, [x3, #256]!
+        ldr h3, [x13, #-257]!
+        ldr s3, [x3, #256]!
+        ldr s3, [x13, #-257]!
+        ldr d3, [x3, #256]!
+        ldr d3, [x13, #-257]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr b3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr b3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr h3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr h3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr s3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr s3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr d3, [x3, #256]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr d3, [x13, #-257]!
+// CHECK-ERROR-NEXT:                 ^
+
+//------------------------------------------------------------------------------
+// Load/store (unprivileged)
+//------------------------------------------------------------------------------
+
+        ldtrb w2, [sp, #256]
+        sttrh w17, [x1, #256]
+        ldtrsw x20, [x1, #256]
+        ldtr x12, [sp, #256]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:        ldtrb w2, [sp, #256]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         sttrh w17, [x1, #256]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtrsw x20, [x1, #256]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtr x12, [sp, #256]
+// CHECK-ERROR-NEXT:                   ^
+
+        sttr h2, [x2, #-257]
+        sttr b2, [x2, #-257]
+        ldtrsb x9, [sp, #-257]
+        ldtr w2, [x30, #-257]
+        sttr q9, [x20, #-257]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr h2, [x2, #-257]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr b2, [x2, #-257]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtrsb x9, [sp, #-257]
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldtr w2, [x30, #-257]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         sttr q9, [x20, #-257]
+// CHECK-ERROR-NEXT:                  ^
+
+
+//------------------------------------------------------------------------------
+// Load/store (unsigned immediate)
+//------------------------------------------------------------------------------
+
+//// Out of range immediates
+        ldr q0, [x11, #65536]
+        ldr x0, [sp, #32768]
+        ldr w0, [x4, #16384]
+        ldrh w2, [x21, #8192]
+        ldrb w3, [x12, #4096]
+// CHECK-ERROR: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr q0, [x11, #65536]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr x0, [sp, #32768]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldr w0, [x4, #16384]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrh w2, [x21, #8192]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         ldrb w3, [x12, #4096]
+// CHECK-ERROR-NEXT:                  ^
+
+//// Misaligned addresses
+        ldr w0, [x0, #2]
+        ldrsh w2, [x0, #123]
+        str q0, [x0, #8]
+// CHECK-ERROR: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [x0, #2]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         ldrsh w2, [x0, #123]
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         str q0, [x0, #8]
+// CHECK-ERROR-NEXT:                 ^
+
+//// 32-bit addresses
+        ldr w0, [w20]
+        ldrsh x3, [wsp]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldr w0, [w20]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldrsh x3, [wsp]
+// CHECK-ERROR-NEXT:                    ^
+
+//// Store things
+        strb w0, [wsp]
+        strh w31, [x23, #1]
+        str x5, [x22, #12]
+        str w7, [x12, #16384]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: strb w0, [wsp]
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         strh w31, [x23, #1]
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: too few operands for instruction
+// CHECK-ERROR-NEXT:         str x5, [x22, #12]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [-256, 255]
+// CHECK-ERROR-NEXT:         str w7, [x12, #16384]
+// CHECK-ERROR-NEXT:                 ^
+
+//// Bad PRFMs
+        prfm #-1, [sp]
+        prfm #32, [sp, #8]
+        prfm pldl1strm, [w3, #8]
+        prfm wibble, [sp]
+// CHECK-ERROR: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:        prfm #-1, [sp]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:        prfm #32, [sp, #8]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        prfm pldl1strm, [w3, #8]
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-NEXT:        prfm wibble, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+//------------------------------------------------------------------------------
+// Load/store register (register offset)
+//------------------------------------------------------------------------------
+
+        ldr w3, [xzr, x3]
+        ldr w4, [x0, x4, lsl]
+        ldr w9, [x5, x5, uxtw]
+        ldr w10, [x6, x9, sxtw #2]
+        ldr w11, [x7, w2, lsl #2]
+        ldr w12, [x8, w1, sxtx]
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:        ldr w3, [xzr, x3]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected #imm after shift specifier
+// CHECK-ERROR-NEXT:         ldr w4, [x0, x4, lsl]
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w9, [x5, x5, uxtw]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w10, [x6, x9, sxtw #2]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w11, [x7, w2, lsl #2]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldr w12, [x8, w1, sxtx]
+// CHECK-ERROR-NEXT:                           ^
+
+        ldrsb w9, [x4, x2, lsl #-1]
+        strb w9, [x4, x2, lsl #1]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldrsb w9, [x4, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0
+// CHECK-ERROR-NEXT:         strb w9, [x4, x2, lsl #1]
+// CHECK-ERROR-NEXT:                  ^
+
+        ldrsh w9, [x4, x2, lsl #-1]
+        ldr h13, [x4, w2, uxtw #2]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldrsh w9, [x4, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #1
+// CHECK-ERROR-NEXT:         ldr h13, [x4, w2, uxtw #2]
+// CHECK-ERROR-NEXT:                           ^
+
+        str w9, [x5, w9, sxtw #-1]
+        str s3, [sp, w9, uxtw #1]
+        ldrsw x9, [x15, x4, sxtx #3]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         str w9, [x5, w9, sxtw #-1]
+// CHECK-ERROR-NEXT:                                ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         str s3, [sp, w9, uxtw #1]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #2
+// CHECK-ERROR-NEXT:         ldrsw x9, [x15, x4, sxtx #3]
+// CHECK-ERROR-NEXT:                             ^
+
+        str xzr, [x5, x9, sxtx #-1]
+        prfm pldl3keep, [sp, x20, lsl #2]
+        ldr d3, [x20, wzr, uxtw #4]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         str xzr, [x5, x9, sxtx #-1]
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtx' with optional shift of #0 or #3
+// CHECK-ERROR-NEXT:         prfm pldl3keep, [sp, x20, lsl #2]
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected 'uxtw' or 'sxtw' with optional shift of #0 or #3
+// CHECK-ERROR-NEXT:         ldr d3, [x20, wzr, uxtw #4]
+// CHECK-ERROR-NEXT:                 ^
+
+        ldr q5, [sp, x2, lsl #-1]
+        ldr q10, [x20, w4, uxtw #2]
+        str q21, [x20, w4, uxtw #5]
+// CHECK-ERROR-NEXT: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         ldr q5, [sp, x2, lsl #-1]
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-NEXT:         ldr q10, [x20, w4, uxtw #2]
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected 'lsl' or 'sxtw' with optional shift of #0 or #4
+// CHECK-ERROR-NEXT:         str q21, [x20, w4, uxtw #5]
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+        ldp w3, w2, [x4, #1]
+        stp w1, w2, [x3, #253]
+        stp w9, w10, [x5, #256]
+        ldp w11, w12, [x9, #-260]
+        stp wsp, w9, [sp]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp]
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp, #2]
+        ldpsw x1, x2, [x10, #256]
+        ldpsw x3, x4, [x11, #-260]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp, #4]
+        ldp x5, x6, [x9, #512]
+        stp x7, x8, [x10, #-520]
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10]
+        stp x3, sp, [x9]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9]
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp, #-2]
+        ldp s6, s26, [x4, #-260]
+        stp s13, s19, [x5, #256]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr]
+        ldp d5, d6, [x0, #512]
+        stp d7, d8, [x0, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp]
+        ldp q3, q5, [sp, #8]
+        stp q20, q25, [x5, #1024]
+        ldp q30, q15, [x23, #-1040]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w2, [x4], #1
+        stp w1, w2, [x3], #253
+        stp w9, w10, [x5], #256
+        ldp w11, w12, [x9], #-260
+        stp wsp, w9, [sp], #0
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4], #1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3], #253
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5], #256
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9], #-260
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp], #0
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp], #2
+        ldpsw x1, x2, [x10], #256
+        ldpsw x3, x4, [x11], #-260
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp], #2
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10], #256
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11], #-260
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp], #4
+        ldp x5, x6, [x9], #512
+        stp x7, x8, [x10], #-520
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp], #4
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9], #512
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10], #-520
+// CHECK-ERROR-NEXT:                            ^
+
+        ldp sp, x3, [x10], #0
+        stp x3, sp, [x9], #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10], #0
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9], #0
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp], #-2
+        ldp s6, s26, [x4], #-260
+        stp s13, s19, [x5], #256
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp], #-2
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4], #-260
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5], #256
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr], #0
+        ldp d5, d6, [x0], #512
+        stp d7, d8, [x0], #-520
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr], #0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0], #512
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0], #-520
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp], #0
+        ldp q3, q5, [sp], #8
+        stp q20, q25, [x5], #1024
+        ldp q30, q15, [x23], #-1040
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp], #0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp], #8
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5], #1024
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23], #-1040
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w2, [x4, #1]!
+        stp w1, w2, [x3, #253]!
+        stp w9, w10, [x5, #256]!
+        ldp w11, w12, [x9, #-260]!
+        stp wsp, w9, [sp, #0]!
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w3, w2, [x4, #1]!
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w1, w2, [x3, #253]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp w9, w10, [x5, #256]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp w11, w12, [x9, #-260]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp wsp, w9, [sp, #0]!
+// CHECK-ERROR-NEXT:             ^
+
+        ldpsw x9, x2, [sp, #2]!
+        ldpsw x1, x2, [x10, #256]!
+        ldpsw x3, x4, [x11, #-260]!
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x9, x2, [sp, #2]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x1, x2, [x10, #256]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldpsw x3, x4, [x11, #-260]!
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp x2, x5, [sp, #4]!
+        ldp x5, x6, [x9, #512]!
+        stp x7, x8, [x10, #-520]!
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x2, x5, [sp, #4]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp x5, x6, [x9, #512]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp x7, x8, [x10, #-520]!
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp sp, x3, [x10, #0]!
+        stp x3, sp, [x9, #0]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp sp, x3, [x10, #0]!
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stp x3, sp, [x9, #0]!
+// CHECK-ERROR-NEXT:                 ^
+
+        stp s3, s5, [sp, #-2]!
+        ldp s6, s26, [x4, #-260]!
+        stp s13, s19, [x5, #256]!
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s3, s5, [sp, #-2]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldp s6, s26, [x4, #-260]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stp s13, s19, [x5, #256]!
+// CHECK-ERROR-NEXT:                       ^
+
+        ldp d3, d4, [xzr, #0]!
+        ldp d5, d6, [x0, #512]!
+        stp d7, d8, [x0, #-520]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, d4, [xzr, #0]!
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldp d5, d6, [x0, #512]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stp d7, d8, [x0, #-520]!
+// CHECK-ERROR-NEXT:                     ^
+
+        ldp d3, q2, [sp, #0]!
+        ldp q3, q5, [sp, #8]!
+        stp q20, q25, [x5, #1024]!
+        ldp q30, q15, [x23, #-1040]!
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldp d3, q2, [sp, #0]!
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q3, q5, [sp, #8]!
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stp q20, q25, [x5, #1024]!
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldp q30, q15, [x23, #-1040]!
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+        ldnp w3, w2, [x4, #1]
+        stnp w1, w2, [x3, #253]
+        stnp w9, w10, [x5, #256]
+        ldnp w11, w12, [x9, #-260]
+        stnp wsp, w9, [sp]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldnp w3, w2, [x4, #1]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp w1, w2, [x3, #253]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp w9, w10, [x5, #256]
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldnp w11, w12, [x9, #-260]
+// CHECK-ERROR-NEXT:                             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp wsp, w9, [sp]
+// CHECK-ERROR-NEXT:              ^
+
+        ldnp x2, x5, [sp, #4]
+        ldnp x5, x6, [x9, #512]
+        stnp x7, x8, [x10, #-520]
+// CHECK-ERROR: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldnp x2, x5, [sp, #4]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldnp x5, x6, [x9, #512]
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stnp x7, x8, [x10, #-520]
+// CHECK-ERROR-NEXT:                            ^
+
+        ldnp sp, x3, [x10]
+        stnp x3, sp, [x9]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp sp, x3, [x10]
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         stnp x3, sp, [x9]
+// CHECK-ERROR-NEXT:                 ^
+
+        stnp s3, s5, [sp, #-2]
+        ldnp s6, s26, [x4, #-260]
+        stnp s13, s19, [x5, #256]
+// CHECK-ERROR: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp s3, s5, [sp, #-2]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         ldnp s6, s26, [x4, #-260]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 4 in range [-256, 252]
+// CHECK-ERROR-NEXT:         stnp s13, s19, [x5, #256]
+// CHECK-ERROR-NEXT:                       ^
+
+        ldnp d3, d4, [xzr]
+        ldnp d5, d6, [x0, #512]
+        stnp d7, d8, [x0, #-520]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d3, d4, [xzr]
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         ldnp d5, d6, [x0, #512]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 8 in range [-512, 508]
+// CHECK-ERROR-NEXT:         stnp d7, d8, [x0, #-520]
+// CHECK-ERROR-NEXT:                     ^
+
+        ldnp d3, q2, [sp]
+        ldnp q3, q5, [sp, #8]
+        stnp q20, q25, [x5, #1024]
+        ldnp q30, q15, [x23, #-1040]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ldnp d3, q2, [sp]
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldnp q3, q5, [sp, #8]
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         stnp q20, q25, [x5, #1024]
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: expected integer multiple of 16 in range [-1024, 1016]
+// CHECK-ERROR-NEXT:         ldnp q30, q15, [x23, #-1040]
+// CHECK-ERROR-NEXT:                       ^
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+        orr w0, w1, #0xffffffff
+        and x3, x5, #0xffffffffffffffff
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         orr w0, w1, #0xffffffff
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         and x3, x5, #0xffffffffffffffff
+// CHECK-ERROR-NEXT:                     ^
+
+        ands w3, w9, #0x0
+        eor x2, x0, #0x0
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         ands w3, w9, #0x0
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor x2, x0, #0x0
+// CHECK-ERROR-NEXT:                     ^
+
+        eor w3, w5, #0x83
+        eor x9, x20, #0x1234
+// CHECK-ERROR: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor w3, w5, #0x83
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         eor x9, x20, #0x1234
+// CHECK-ERROR-NEXT:                      ^
+
+        and wzr, w4, 0xffff0000
+        eor xzr, x9, #0xffff0000ffff0000
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and wzr, w4, 0xffff0000
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eor xzr, x9, #0xffff0000ffff0000
+// CHECK-ERROR-NEXT:                      ^
+
+        orr w3, wsp, #0xf0f0f0f0
+        ands x3, sp, #0xaaaaaaaaaaaaaaaa
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orr w3, wsp, #0xf0f0f0f0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands x3, sp, #0xaaaaaaaaaaaaaaaa
+// CHECK-ERROR-NEXT:                  ^
+
+        tst sp, #0xe0e0e0e0e0e0e0e0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         tst sp, #0xe0e0e0e0e0e0e0e0
+// CHECK-ERROR-NEXT:             ^
+
+        // movi has been removed from the specification. Make sure it's really gone.
+        movi wzr, #0x44444444
+        movi w3, #0xffff
+        movi x9, #0x0000ffff00000000
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi wzr, #0x44444444
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi w3, #0xffff
+// CHECK-ERROR-NEXT:         ^
+// CHECK-ERROR: error: invalid instruction
+// CHECK-ERROR-NEXT:         movi x9, #0x0000ffff00000000
+// CHECK-ERROR-NEXT:         ^
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+
+        //// Out of range shifts
+        and w2, w24, w6, lsl #-1
+        and w4, w6, w12, lsl #32
+        and x4, x6, x12, lsl #64
+        and x2, x5, x11, asr
+// CHECK-ERROR: error: expected integer shift amount
+// CHECK-ERROR-NEXT:         and w2, w24, w6, lsl #-1
+// CHECK-ERROR-NEXT:                               ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]
+// CHECK-ERROR-NEXT:         and w4, w6, w12, lsl #32
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]
+// CHECK-ERROR-NEXT:         and x4, x6, x12, lsl #64
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: expected #imm after shift specifier
+// CHECK-ERROR-NEXT:         and x2, x5, x11, asr
+// CHECK-ERROR-NEXT:                             ^
+
+        //// sp not allowed
+        orn wsp, w3, w5
+        bics x20, sp, x9, lsr #0
+        orn x2, x6, sp, lsl #3
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orn wsp, w3, w5
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         bics x20, sp, x9, lsr #0
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         orn x2, x6, sp, lsl #3
+// CHECK-ERROR-NEXT:                     ^
+
+        //// Mismatched registers
+        and x3, w2, w1
+        ands w1, x12, w2
+        and x4, x5, w6, lsl #12
+        orr w2, w5, x7, asr #0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         and x3, w2, w1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         ands w1, x12, w2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         and x4, x5, w6, lsl #12
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: expected compatible register or logical immediate
+// CHECK-ERROR-NEXT:         orr w2, w5, x7, asr #0
+// CHECK-ERROR-NEXT:                     ^
+
+        //// Shifts should not be allowed on mov
+        mov w3, w7, lsl #13
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         mov w3, w7, lsl #13
+// CHECK-ERROR-NEXT:                     ^
+
+//------------------------------------------------------------------------------
+// Move wide (immediate)
+//------------------------------------------------------------------------------
+
+        movz w3, #65536, lsl #0
+        movz w4, #65536
+        movn w1, #2, lsl #1
+        movk w3, #0, lsl #-1
+        movn w2, #-1, lsl #0
+        movz x3, #-1
+        movk w3, #1, lsl #32
+        movn x2, #12, lsl #64
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w3, #65536, lsl #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w4, #65536
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn w1, #2, lsl #1
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: only 'lsl #+N' valid after immediate
+// CHECK-ERROR-NEXT:         movk w3, #0, lsl #-1
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn w2, #-1, lsl #0
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x3, #-1
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #1, lsl #32
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x2, #12, lsl #64
+// CHECK-ERROR-NEXT:                  ^
+
+        movz x12, #:abs_g0:sym, lsl #16
+        movz x12, #:abs_g0:sym, lsl #0
+        movn x2, #:abs_g0:sym
+        movk w3, #:abs_g0:sym
+        movz x3, #:abs_g0_nc:sym
+        movn x4, #:abs_g0_nc:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #16
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x12, #:abs_g0:sym, lsl #0
+// CHECK-ERROR-NEXT:                                 ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x2, #:abs_g0:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g0:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x3, #:abs_g0_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x4, #:abs_g0_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+
+        movn x2, #:abs_g1:sym
+        movk w3, #:abs_g1:sym
+        movz x3, #:abs_g1_nc:sym
+        movn x4, #:abs_g1_nc:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x2, #:abs_g1:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g1:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x3, #:abs_g1_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x4, #:abs_g1_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+
+        movz w12, #:abs_g2:sym
+        movn x12, #:abs_g2:sym
+        movk x13, #:abs_g2:sym
+        movk w3, #:abs_g2_nc:sym
+        movz x13, #:abs_g2_nc:sym
+        movn x24, #:abs_g2_nc:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w12, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x12, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x13, #:abs_g2:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w3, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz x13, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x24, #:abs_g2_nc:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movn x19, #:abs_g3:sym
+        movz w20, #:abs_g3:sym
+        movk w21, #:abs_g3:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn x19, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w20, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w21, #:abs_g3:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movk x19, #:abs_g0_s:sym
+        movk w23, #:abs_g0_s:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g0_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g0_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movk x19, #:abs_g1_s:sym
+        movk w23, #:abs_g1_s:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g1_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g1_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+        movz w2, #:abs_g2_s:sym
+        movn w29, #:abs_g2_s:sym
+        movk x19, #:abs_g2_s:sym
+        movk w23, #:abs_g2_s:sym
+// CHECK-ERROR: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movz w2, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movn w29, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk x19, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected relocated symbol or integer in range [0, 65535]
+// CHECK-ERROR-NEXT:         movk w23, #:abs_g2_s:sym
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// PC-relative addressing
+//------------------------------------------------------------------------------
+
+        adr sp, loc             // expects xzr
+        adrp x3, #20            // Immediate unaligned
+        adrp w2, loc            // 64-bit register needed
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adr sp, loc
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adrp x3, #20
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         adrp w2, loc
+// CHECK-ERROR-NEXT:              ^
+
+        adr x9, #1048576
+        adr x2, #-1048577
+        adrp x9, #4294967296
+        adrp x20, #-4294971392
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adr x9, #1048576
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adr x2, #-1048577
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adrp x9, #4294967296
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         adrp x20, #-4294971392
+// CHECK-ERROR-NEXT:                   ^
+
+//------------------------------------------------------------------------------
+// System
+//------------------------------------------------------------------------------
+
+        hint #-1
+        hint #128
+// CHECK-ERROR: error: expected integer in range [0, 127]
+// CHECK-ERROR-NEXT:         hint #-1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 127]
+// CHECK-ERROR-NEXT:         hint #128
+// CHECK-ERROR-NEXT:              ^
+
+        clrex #-1
+        clrex #16
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         clrex #-1
+// CHECK-ERROR-NEXT:               ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         clrex #16
+// CHECK-ERROR-NEXT:               ^
+
+        dsb #-1
+        dsb #16
+        dmb #-1
+        dmb #16
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dsb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dsb #16
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dmb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         dmb #16
+// CHECK-ERROR-NEXT:             ^
+
+        isb #-1
+        isb #16
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         isb #-1
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Invalid immediate for instruction
+// CHECK-ERROR-NEXT:         isb #16
+// CHECK-ERROR-NEXT:             ^
+
+        msr daifset, x4
+        msr spsel, #-1
+        msr spsel #-1
+        msr daifclr, #16
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         msr daifset, x4
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         msr spsel, #-1
+// CHECK-ERROR-NEXT:                    ^
+// CHECK-ERROR-NEXT: error: expected comma before next operand
+// CHECK-ERROR-NEXT:         msr spsel #-1
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 15]
+// CHECK-ERROR-NEXT:         msr daifclr, #16
+// CHECK-ERROR-NEXT:                      ^
+
+        sys #8, c1, c2, #7, x9
+        sys #3, c16, c2, #3, x10
+        sys #2, c11, c16, #5
+        sys #4, c9, c8, #8, xzr
+        sysl x11, #8, c1, c2, #7
+        sysl x13, #3, c16, c2, #3
+        sysl x9, #2, c11, c16, #5
+        sysl x4, #4, c9, c8, #8
+// CHECK-ERROR-NEXT: error:  expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sys #8, c1, c2, #7, x9
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sys #3, c16, c2, #3, x10
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sys #2, c11, c16, #5
+// CHECK-ERROR-NEXT:                      ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sys #4, c9, c8, #8, xzr
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sysl x11, #8, c1, c2, #7
+// CHECK-ERROR-NEXT:                   ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sysl x13, #3, c16, c2, #3
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: Expected cN operand where 0 <= N <= 15
+// CHECK-ERROR-NEXT:         sysl x9, #2, c11, c16, #5
+// CHECK-ERROR-NEXT:                           ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 7]
+// CHECK-ERROR-NEXT:         sysl x4, #4, c9, c8, #8
+// CHECK-ERROR-NEXT:                              ^
+
+        ic ialluis, x2
+        ic allu, x7
+        ic ivau
+// CHECK-ERROR-NEXT: error: specified IC op does not use a register
+// CHECK-ERROR-NEXT:         ic ialluis, x2
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: operand specifier not recognised
+// CHECK-ERROR-NEXT:         ic allu, x7
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: specified IC op requires a register
+// CHECK-ERROR-NEXT:         ic ivau
+// CHECK-ERROR-NEXT:            ^
+
+        tlbi IPAS2E1IS
+        tlbi IPAS2LE1IS
+        tlbi VMALLE1IS, x12
+        tlbi ALLE2IS, x11
+        tlbi ALLE3IS, x20
+        tlbi VAE1IS
+        tlbi VAE2IS
+        tlbi VAE3IS
+        tlbi ASIDE1IS
+        tlbi VAAE1IS
+        tlbi ALLE1IS, x0
+        tlbi VALE1IS
+        tlbi VALE2IS
+        tlbi VALE3IS
+        tlbi VMALLS12E1IS, xzr
+        tlbi VAALE1IS
+        tlbi IPAS2E1
+        tlbi IPAS2LE1
+        tlbi VMALLE1, x9
+        tlbi ALLE2, x10
+        tlbi ALLE3, x11
+        tlbi VAE1
+        tlbi VAE2
+        tlbi VAE3
+        tlbi ASIDE1
+        tlbi VAAE1
+        tlbi ALLE1, x25
+        tlbi VALE1
+        tlbi VALE2
+        tlbi VALE3
+        tlbi VMALLS12E1, x15
+        tlbi VAALE1
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2E1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2LE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLE1IS, x12
+// CHECK-ERROR-NEXT:                         ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE2IS, x11
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE3IS, x20
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE2IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE3IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi ASIDE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAAE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE1IS, x0
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE2IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE3IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLS12E1IS, xzr
+// CHECK-ERROR-NEXT:                            ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAALE1IS
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2E1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi IPAS2LE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLE1, x9
+// CHECK-ERROR-NEXT:                       ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE2, x10
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE3, x11
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAE3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi ASIDE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAAE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi ALLE1, x25
+// CHECK-ERROR-NEXT:                     ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE1
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VALE3
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: specified TLBI op does not use a register
+// CHECK-ERROR-NEXT:         tlbi VMALLS12E1, x15
+// CHECK-ERROR-NEXT:                          ^
+// CHECK-ERROR-NEXT: error: specified TLBI op requires a register
+// CHECK-ERROR-NEXT:         tlbi VAALE1
+// CHECK-ERROR-NEXT:              ^
+
+// For the MSR/MRS instructions, first make sure read-only and
+// write-only registers actually are.
+        msr MDCCSR_EL0, x12
+        msr DBGDTRRX_EL0, x12
+        msr MDRAR_EL1, x12
+        msr OSLSR_EL1, x12
+        msr DBGAUTHSTATUS_EL1, x12
+        msr MIDR_EL1, x12
+        msr CCSIDR_EL1, x12
+        msr CLIDR_EL1, x12
+        msr CTR_EL0, x12
+        msr MPIDR_EL1, x12
+        msr REVIDR_EL1, x12
+        msr AIDR_EL1, x12
+        msr DCZID_EL0, x12
+        msr ID_PFR0_EL1, x12
+        msr ID_PFR1_EL1, x12
+        msr ID_DFR0_EL1, x12
+        msr ID_AFR0_EL1, x12
+        msr ID_MMFR0_EL1, x12
+        msr ID_MMFR1_EL1, x12
+        msr ID_MMFR2_EL1, x12
+        msr ID_MMFR3_EL1, x12
+        msr ID_ISAR0_EL1, x12
+        msr ID_ISAR1_EL1, x12
+        msr ID_ISAR2_EL1, x12
+        msr ID_ISAR3_EL1, x12
+        msr ID_ISAR4_EL1, x12
+        msr ID_ISAR5_EL1, x12
+        msr MVFR0_EL1, x12
+        msr MVFR1_EL1, x12
+        msr MVFR2_EL1, x12
+        msr ID_AA64PFR0_EL1, x12
+        msr ID_AA64PFR1_EL1, x12
+        msr ID_AA64DFR0_EL1, x12
+        msr ID_AA64DFR1_EL1, x12
+        msr ID_AA64AFR0_EL1, x12
+        msr ID_AA64AFR1_EL1, x12
+        msr ID_AA64ISAR0_EL1, x12
+        msr ID_AA64ISAR1_EL1, x12
+        msr ID_AA64MMFR0_EL1, x12
+        msr ID_AA64MMFR1_EL1, x12
+        msr PMCEID0_EL0, x12
+        msr PMCEID1_EL0, x12
+        msr RVBAR_EL1, x12
+        msr RVBAR_EL2, x12
+        msr RVBAR_EL3, x12
+        msr ISR_EL1, x12
+        msr CNTPCT_EL0, x12
+        msr CNTVCT_EL0, x12
+        msr PMEVCNTR31_EL0, x12
+        msr PMEVTYPER31_EL0, x12
+// CHECK-ERROR: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MDCCSR_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr DBGDTRRX_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MDRAR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr OSLSR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr DBGAUTHSTATUS_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CCSIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CLIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CTR_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MPIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr REVIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr AIDR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr DCZID_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_PFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_PFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_DFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_MMFR3_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR3_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR4_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_ISAR5_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MVFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MVFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr MVFR2_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64PFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64PFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64DFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64DFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64AFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64AFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64ISAR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64ISAR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64MMFR0_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ID_AA64MMFR1_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMCEID0_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMCEID1_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr RVBAR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr RVBAR_EL2, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr RVBAR_EL3, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr ISR_EL1, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CNTPCT_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr CNTVCT_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMEVCNTR31_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected writable system register or pstate
+// CHECK-ERROR-NEXT:         msr PMEVTYPER31_EL0, x12
+// CHECK-ERROR-NEXT:             ^
+
+        mrs x9, DBGDTRTX_EL0
+        mrs x9, OSLAR_EL1
+        mrs x9, PMSWINC_EL0
+        mrs x9, PMEVCNTR31_EL0
+        mrs x9, PMEVTYPER31_EL0
+// CHECK-ERROR: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, DBGDTRTX_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, OSLAR_EL1
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, PMSWINC_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, PMEVCNTR31_EL0
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x9, PMEVTYPER31_EL0
+// CHECK-ERROR-NEXT:                 ^
+
+// Now check some invalid generic names
+        mrs xzr, s2_5_c11_c13_2
+        mrs x12, s3_8_c11_c13_2
+        mrs x13, s3_3_c12_c13_2
+        mrs x19, s3_2_c15_c16_2
+        mrs x30, s3_2_c15_c1_8
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs xzr, s2_5_c11_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x12, s3_8_c11_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x13, s3_3_c12_c13_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x19, s3_2_c15_c16_2
+// CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x30, s3_2_c15_c1_8
+// CHECK-ERROR-NEXT:                  ^
+
+//------------------------------------------------------------------------------
+// Test and branch (immediate)
+//------------------------------------------------------------------------------
+
+        tbz w3, #-1, addr
+        tbz w3, #32, nowhere
+        tbz x9, #-1, there
+        tbz x20, #64, dont
+// CHECK-ERROR: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:     tbz w3, #-1, addr
+// CHECK-ERROR-NEXT:             ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        tbz w3, #32, nowhere
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbz x9, #-1, there
+// CHECK-ERROR-NEXT:                ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbz x20, #64, dont
+// CHECK-ERROR-NEXT:                 ^
+
+        tbnz w3, #-1, addr
+        tbnz w3, #32, nowhere
+        tbnz x9, #-1, there
+        tbnz x20, #64, dont
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        tbnz w3, #-1, addr
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 31]
+// CHECK-ERROR-NEXT:        tbnz w3, #32, nowhere
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbnz x9, #-1, there
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected integer in range [0, 63]
+// CHECK-ERROR-NEXT:        tbnz x20, #64, dont
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b #134217728
+        b #-134217732
+        b #1
+// CHECK-ERROR: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         b #134217728
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         b #-134217732
+// CHECK-ERROR-NEXT:           ^
+// CHECK-ERROR-NEXT: error: expected label or encodable integer pc offset
+// CHECK-ERROR-NEXT:         b #1
+// CHECK-ERROR-NEXT:           ^
+
+//------------------------------------------------------------------------------
+// Unconditional branch (register)
+//------------------------------------------------------------------------------
+
+        br w2
+        br sp
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         br w2
+// CHECK-ERROR-NEXT:            ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         br sp
+// CHECK-ERROR-NEXT:            ^
+
+        //// These ones shouldn't allow any registers
+        eret x2
+        drps x2
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         eret x2
+// CHECK-ERROR-NEXT:              ^
+// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT:         drps x2
+// CHECK-ERROR-NEXT:              ^
+
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
new file mode 100644
index 000000000000..ad3064e5e524
--- /dev/null
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -0,0 +1,4819 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding < %s | FileCheck %s
+  .globl _func
+
+// Check that the assembler can handle the documented syntax from the ARM ARM.
+// For complex constructs like shifter operands, check more thoroughly for them
+// once then spot check that following instructions accept the form generally.
+// This gives us good coverage while keeping the overall size of the test
+// more reasonable.
+
+
+_func:
+// CHECK: _func
+
+//------------------------------------------------------------------------------
+// Add/sub (extended register)
+//------------------------------------------------------------------------------
+        // Basic extends 64-bit ops
+        add x2, x4, w5, uxtb
+        add x20, sp, w19, uxth
+        add x12, x1, w20, uxtw
+        add x20, x3, x13, uxtx
+        add x17, x25, w20, sxtb
+        add x18, x13, w19, sxth
+        add sp, x2, w3, sxtw
+        add x3, x5, x9, sxtx
+// CHECK: add      x2, x4, w5, uxtb           // encoding: [0x82,0x00,0x25,0x8b]
+// CHECK: add      x20, sp, w19, uxth         // encoding: [0xf4,0x23,0x33,0x8b]
+// CHECK: add      x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0x8b]
+// CHECK: add      x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0x8b]
+// CHECK: add      x17, x25, w20, sxtb        // encoding: [0x31,0x83,0x34,0x8b]
+// CHECK: add      x18, x13, w19, sxth        // encoding: [0xb2,0xa1,0x33,0x8b]
+// CHECK: add      sp, x2, w3, sxtw           // encoding: [0x5f,0xc0,0x23,0x8b]
+// CHECK: add      x3, x5, x9, sxtx           // encoding: [0xa3,0xe0,0x29,0x8b]
+
+        // Basic extends, 32-bit ops
+        add w2, w5, w7, uxtb
+        add w21, w15, w17, uxth
+        add w30, w29, wzr, uxtw
+        add w19, w17, w1, uxtx  // Goodness knows what this means
+        add w2, w5, w1, sxtb
+        add w26, w17, w19, sxth
+        add w0, w2, w3, sxtw
+        add w2, w3, w5, sxtx
+// CHECK: add      w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x0b]
+// CHECK: add      w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x0b]
+// CHECK: add      w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x0b]
+// CHECK: add      w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x0b]
+// CHECK: add      w2, w5, w1, sxtb           // encoding: [0xa2,0x80,0x21,0x0b]
+// CHECK: add      w26, w17, w19, sxth        // encoding: [0x3a,0xa2,0x33,0x0b]
+// CHECK: add      w0, w2, w3, sxtw           // encoding: [0x40,0xc0,0x23,0x0b]
+// CHECK: add      w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x0b]
+
+        // Nonzero shift amounts
+        add x2, x3, w5, sxtb #0
+        add x7, x11, w13, uxth #4
+        add w17, w19, w23, uxtw #2
+        add w29, w23, w17, uxtx #1
+// CHECK: add      x2, x3, w5, sxtb           // encoding: [0x62,0x80,0x25,0x8b]
+// CHECK: add      x7, x11, w13, uxth #4      // encoding: [0x67,0x31,0x2d,0x8b]
+// CHECK: add      w17, w19, w23, uxtw #2     // encoding: [0x71,0x4a,0x37,0x0b]
+// CHECK: add      w29, w23, w17, uxtx #1     // encoding: [0xfd,0x66,0x31,0x0b]
+
+        // Sub
+        sub x2, x4, w5, uxtb #2
+        sub x20, sp, w19, uxth #4
+        sub x12, x1, w20, uxtw
+        sub x20, x3, x13, uxtx #0
+        sub x17, x25, w20, sxtb
+        sub x18, x13, w19, sxth
+        sub sp, x2, w3, sxtw
+        sub x3, x5, x9, sxtx
+// CHECK: sub      x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xcb]
+// CHECK: sub      x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xcb]
+// CHECK: sub      x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xcb]
+// CHECK: sub      x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xcb]
+// CHECK: sub      x17, x25, w20, sxtb        // encoding: [0x31,0x83,0x34,0xcb]
+// CHECK: sub      x18, x13, w19, sxth        // encoding: [0xb2,0xa1,0x33,0xcb]
+// CHECK: sub      sp, x2, w3, sxtw           // encoding: [0x5f,0xc0,0x23,0xcb]
+// CHECK: sub      x3, x5, x9, sxtx           // encoding: [0xa3,0xe0,0x29,0xcb]
+
+        sub w2, w5, w7, uxtb
+        sub w21, w15, w17, uxth
+        sub w30, w29, wzr, uxtw
+        sub w19, w17, w1, uxtx  // Goodness knows what this means
+        sub w2, w5, w1, sxtb
+        sub w26, wsp, w19, sxth
+        sub wsp, w2, w3, sxtw
+        sub w2, w3, w5, sxtx
+// CHECK: sub      w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x4b]
+// CHECK: sub      w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x4b]
+// CHECK: sub      w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x4b]
+// CHECK: sub      w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x4b]
+// CHECK: sub      w2, w5, w1, sxtb           // encoding: [0xa2,0x80,0x21,0x4b]
+// CHECK: sub      w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x4b]
+// CHECK: sub      wsp, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x4b]
+// CHECK: sub      w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x4b]
+
+        // Adds
+        adds x2, x4, w5, uxtb #2
+        adds x20, sp, w19, uxth #4
+        adds x12, x1, w20, uxtw
+        adds x20, x3, x13, uxtx #0
+        adds xzr, x25, w20, sxtb #3
+        adds x18, sp, w19, sxth
+        adds xzr, x2, w3, sxtw
+        adds x3, x5, x9, sxtx #2
+// CHECK: adds     x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xab]
+// CHECK: adds     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xab]
+// CHECK: adds     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xab]
+// CHECK: adds     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xab]
+// CHECK: adds     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: adds     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xab]
+// CHECK: adds     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: adds     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xab]
+
+        adds w2, w5, w7, uxtb
+        adds w21, w15, w17, uxth
+        adds w30, w29, wzr, uxtw
+        adds w19, w17, w1, uxtx  // Goodness knows what this means
+        adds w2, w5, w1, sxtb #1
+        adds w26, wsp, w19, sxth
+        adds wzr, w2, w3, sxtw
+        adds w2, w3, w5, sxtx
+// CHECK: adds     w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x2b]
+// CHECK: adds     w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x2b]
+// CHECK: adds     w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x2b]
+// CHECK: adds     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x2b]
+// CHECK: adds     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x2b]
+// CHECK: adds     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x2b]
+// CHECK: adds     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: adds     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x2b]
+
+        // subs
+        subs x2, x4, w5, uxtb #2
+        subs x20, sp, w19, uxth #4
+        subs x12, x1, w20, uxtw
+        subs x20, x3, x13, uxtx #0
+        subs xzr, x25, w20, sxtb #3
+        subs x18, sp, w19, sxth
+        subs xzr, x2, w3, sxtw
+        subs x3, x5, x9, sxtx #2
+// CHECK: subs     x2, x4, w5, uxtb #2        // encoding: [0x82,0x08,0x25,0xeb]
+// CHECK: subs     x20, sp, w19, uxth #4      // encoding: [0xf4,0x33,0x33,0xeb]
+// CHECK: subs     x12, x1, w20, uxtw         // encoding: [0x2c,0x40,0x34,0xeb]
+// CHECK: subs     x20, x3, x13, uxtx         // encoding: [0x74,0x60,0x2d,0xeb]
+// CHECK: subs     xzr, x25, w20, sxtb #3     // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: subs     x18, sp, w19, sxth         // encoding: [0xf2,0xa3,0x33,0xeb]
+// CHECK: subs     xzr, x2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: subs     x3, x5, x9, sxtx #2        // encoding: [0xa3,0xe8,0x29,0xeb]
+
+        subs w2, w5, w7, uxtb
+        subs w21, w15, w17, uxth
+        subs w30, w29, wzr, uxtw
+        subs w19, w17, w1, uxtx  // Goodness knows what this means
+        subs w2, w5, w1, sxtb #1
+        subs w26, wsp, w19, sxth
+        subs wzr, w2, w3, sxtw
+        subs w2, w3, w5, sxtx
+// CHECK: subs     w2, w5, w7, uxtb           // encoding: [0xa2,0x00,0x27,0x6b]
+// CHECK: subs     w21, w15, w17, uxth        // encoding: [0xf5,0x21,0x31,0x6b]
+// CHECK: subs     w30, w29, wzr, uxtw        // encoding: [0xbe,0x43,0x3f,0x6b]
+// CHECK: subs     w19, w17, w1, uxtx         // encoding: [0x33,0x62,0x21,0x6b]
+// CHECK: subs     w2, w5, w1, sxtb #1        // encoding: [0xa2,0x84,0x21,0x6b]
+// CHECK: subs     w26, wsp, w19, sxth        // encoding: [0xfa,0xa3,0x33,0x6b]
+// CHECK: subs     wzr, w2, w3, sxtw          // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: subs     w2, w3, w5, sxtx           // encoding: [0x62,0xe0,0x25,0x6b]
+
+        // cmp
+        cmp x4, w5, uxtb #2
+        cmp sp, w19, uxth #4
+        cmp x1, w20, uxtw
+        cmp x3, x13, uxtx #0
+        cmp x25, w20, sxtb #3
+        cmp sp, w19, sxth
+        cmp x2, w3, sxtw
+        cmp x5, x9, sxtx #2
+// CHECK: cmp      x4, w5, uxtb #2            // encoding: [0x9f,0x08,0x25,0xeb]
+// CHECK: cmp      sp, w19, uxth #4           // encoding: [0xff,0x33,0x33,0xeb]
+// CHECK: cmp      x1, w20, uxtw              // encoding: [0x3f,0x40,0x34,0xeb]
+// CHECK: cmp      x3, x13, uxtx              // encoding: [0x7f,0x60,0x2d,0xeb]
+// CHECK: cmp      x25, w20, sxtb #3          // encoding: [0x3f,0x8f,0x34,0xeb]
+// CHECK: cmp      sp, w19, sxth              // encoding: [0xff,0xa3,0x33,0xeb]
+// CHECK: cmp      x2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0xeb]
+// CHECK: cmp      x5, x9, sxtx #2            // encoding: [0xbf,0xe8,0x29,0xeb]
+
+        cmp w5, w7, uxtb
+        cmp w15, w17, uxth
+        cmp w29, wzr, uxtw
+        cmp w17, w1, uxtx  // Goodness knows what this means
+        cmp w5, w1, sxtb #1
+        cmp wsp, w19, sxth
+        cmp w2, w3, sxtw
+        cmp w3, w5, sxtx
+// CHECK: cmp      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x6b]
+// CHECK: cmp      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x6b]
+// CHECK: cmp      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x6b]
+// CHECK: cmp      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x6b]
+// CHECK: cmp      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x6b]
+// CHECK: cmp      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x6b]
+// CHECK: cmp      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x6b]
+// CHECK: cmp      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x6b]
+
+
+        // cmn
+        cmn x4, w5, uxtb #2
+        cmn sp, w19, uxth #4
+        cmn x1, w20, uxtw
+        cmn x3, x13, uxtx #0
+        cmn x25, w20, sxtb #3
+        cmn sp, w19, sxth
+        cmn x2, w3, sxtw
+        cmn x5, x9, sxtx #2
+// CHECK: cmn      x4, w5, uxtb #2            // encoding: [0x9f,0x08,0x25,0xab]
+// CHECK: cmn      sp, w19, uxth #4           // encoding: [0xff,0x33,0x33,0xab]
+// CHECK: cmn      x1, w20, uxtw              // encoding: [0x3f,0x40,0x34,0xab]
+// CHECK: cmn      x3, x13, uxtx              // encoding: [0x7f,0x60,0x2d,0xab]
+// CHECK: cmn      x25, w20, sxtb #3          // encoding: [0x3f,0x8f,0x34,0xab]
+// CHECK: cmn      sp, w19, sxth              // encoding: [0xff,0xa3,0x33,0xab]
+// CHECK: cmn      x2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0xab]
+// CHECK: cmn      x5, x9, sxtx #2            // encoding: [0xbf,0xe8,0x29,0xab]
+
+        cmn w5, w7, uxtb
+        cmn w15, w17, uxth
+        cmn w29, wzr, uxtw
+        cmn w17, w1, uxtx  // Goodness knows what this means
+        cmn w5, w1, sxtb #1
+        cmn wsp, w19, sxth
+        cmn w2, w3, sxtw
+        cmn w3, w5, sxtx
+// CHECK: cmn      w5, w7, uxtb               // encoding: [0xbf,0x00,0x27,0x2b]
+// CHECK: cmn      w15, w17, uxth             // encoding: [0xff,0x21,0x31,0x2b]
+// CHECK: cmn      w29, wzr, uxtw             // encoding: [0xbf,0x43,0x3f,0x2b]
+// CHECK: cmn      w17, w1, uxtx              // encoding: [0x3f,0x62,0x21,0x2b]
+// CHECK: cmn      w5, w1, sxtb #1            // encoding: [0xbf,0x84,0x21,0x2b]
+// CHECK: cmn      wsp, w19, sxth             // encoding: [0xff,0xa3,0x33,0x2b]
+// CHECK: cmn      w2, w3, sxtw               // encoding: [0x5f,0xc0,0x23,0x2b]
+// CHECK: cmn      w3, w5, sxtx               // encoding: [0x7f,0xe0,0x25,0x2b]
+
+        // operands for cmp
+        cmp x20, w29, uxtb #3
+        cmp x12, x13, uxtx #4
+        cmp wsp, w1, uxtb
+        cmn wsp, wzr, sxtw
+// CHECK: cmp      x20, w29, uxtb #3          // encoding: [0x9f,0x0e,0x3d,0xeb]
+// CHECK: cmp      x12, x13, uxtx #4          // encoding: [0x9f,0x71,0x2d,0xeb]
+// CHECK: cmp      wsp, w1, uxtb              // encoding: [0xff,0x03,0x21,0x6b]
+// CHECK: cmn      wsp, wzr, sxtw             // encoding: [0xff,0xc3,0x3f,0x2b]
+
+        // LSL variant if sp involved
+        sub sp, x3, x7, lsl #4
+        add w2, wsp, w3, lsl #1
+        cmp wsp, w9, lsl #0
+        adds wzr, wsp, w3, lsl #4
+        subs x3, sp, x9, lsl #2
+// CHECK: sub      sp, x3, x7, lsl #4         // encoding: [0x7f,0x70,0x27,0xcb]
+// CHECK: add      w2, wsp, w3, lsl #1        // encoding: [0xe2,0x47,0x23,0x0b]
+// CHECK: cmp      wsp, w9                    // encoding: [0xff,0x43,0x29,0x6b]
+// CHECK: adds     wzr, wsp, w3, lsl #4       // encoding: [0xff,0x53,0x23,0x2b]
+// CHECK: subs     x3, sp, x9, lsl #2         // encoding: [0xe3,0x6b,0x29,0xeb]
+
+//------------------------------------------------------------------------------
+// Add/sub (immediate)
+//------------------------------------------------------------------------------
+
+// Check basic immediate values: an unsigned 12-bit immediate, optionally
+// shifted left by 12 bits.
+        add w4, w5, #0x0
+        add w2, w3, #4095
+        add w30, w29, #1, lsl #12
+        add w13, w5, #4095, lsl #12
+        add x5, x7, #1638
+// CHECK: add      w4, w5, #0                 // encoding: [0xa4,0x00,0x00,0x11]
+// CHECK: add      w2, w3, #4095              // encoding: [0x62,0xfc,0x3f,0x11]
+// CHECK: add      w30, w29, #1, lsl #12      // encoding: [0xbe,0x07,0x40,0x11]
+// CHECK: add      w13, w5, #4095, lsl #12    // encoding: [0xad,0xfc,0x7f,0x11]
+// CHECK: add      x5, x7, #1638              // encoding: [0xe5,0x98,0x19,0x91]
+
+// All registers involved in the non-S variants have 31 encoding sp rather than zr
+        add w20, wsp, #801, lsl #0
+        add wsp, wsp, #1104
+        add wsp, w30, #4084
+// CHECK: add      w20, wsp, #801             // encoding: [0xf4,0x87,0x0c,0x11]
+// CHECK: add      wsp, wsp, #1104            // encoding: [0xff,0x43,0x11,0x11]
+// CHECK: add      wsp, w30, #4084            // encoding: [0xdf,0xd3,0x3f,0x11]
+
+// A few checks on the sanity of 64-bit versions
+        add x0, x24, #291
+        add x3, x24, #4095, lsl #12
+        add x8, sp, #1074
+        add sp, x29, #3816
+// CHECK: add      x0, x24, #291              // encoding: [0x00,0x8f,0x04,0x91]
+// CHECK: add      x3, x24, #4095, lsl #12    // encoding: [0x03,0xff,0x7f,0x91]
+// CHECK: add      x8, sp, #1074              // encoding: [0xe8,0xcb,0x10,0x91]
+// CHECK: add      sp, x29, #3816             // encoding: [0xbf,0xa3,0x3b,0x91]
+
+// And on sub
+        sub w0, wsp, #4077
+        sub w4, w20, #546, lsl #12
+        sub sp, sp, #288
+        sub wsp, w19, #16
+// CHECK: sub      w0, wsp, #4077             // encoding: [0xe0,0xb7,0x3f,0x51]
+// CHECK: sub      w4, w20, #546, lsl #12     // encoding: [0x84,0x8a,0x48,0x51]
+// CHECK: sub      sp, sp, #288               // encoding: [0xff,0x83,0x04,0xd1]
+// CHECK: sub      wsp, w19, #16              // encoding: [0x7f,0x42,0x00,0x51]
+
+// ADDS/SUBS accept zr in the Rd position but sp in the Rn position
+        adds w13, w23, #291, lsl #12
+        adds wzr, w2, #4095                  // FIXME: canonically should be cmn
+        adds w20, wsp, #0x0
+        adds xzr, x3, #0x1, lsl #12          // FIXME: canonically should be cmn
+// CHECK: adds     w13, w23, #291, lsl #12    // encoding: [0xed,0x8e,0x44,0x31]
+// CHECK: adds     wzr, w2, #4095             // encoding: [0x5f,0xfc,0x3f,0x31]
+// CHECK: adds     w20, wsp, #0               // encoding: [0xf4,0x03,0x00,0x31]
+// CHECK: adds     xzr, x3, #1, lsl #12       // encoding: [0x7f,0x04,0x40,0xb1]
+
+// Checks for subs
+        subs xzr, sp, #20, lsl #12           // FIXME: canonically should be cmp
+        subs xzr, x30, #4095, lsl #0         // FIXME: canonically should be cmp
+        subs x4, sp, #3822
+// CHECK: subs     xzr, sp, #20, lsl #12      // encoding: [0xff,0x53,0x40,0xf1]
+// CHECK: subs     xzr, x30, #4095            // encoding: [0xdf,0xff,0x3f,0xf1]
+// CHECK: subs     x4, sp, #3822              // encoding: [0xe4,0xbb,0x3b,0xf1]
+
+// cmn is an alias for adds zr, ...
+        cmn w3, #291, lsl #12
+        cmn wsp, #1365, lsl #0
+        cmn sp, #1092, lsl #12
+// CHECK: cmn      w3, #291, lsl #12          // encoding: [0x7f,0x8c,0x44,0x31]
+// CHECK: cmn      wsp, #1365                 // encoding: [0xff,0x57,0x15,0x31]
+// CHECK: cmn      sp, #1092, lsl #12         // encoding: [0xff,0x13,0x51,0xb1]
+
+// cmp is an alias for subs zr, ... (FIXME: should always disassemble as such too).
+        cmp x4, #300, lsl #12
+        cmp wsp, #500
+        cmp sp, #200, lsl #0
+// CHECK: cmp      x4, #300, lsl #12          // encoding: [0x9f,0xb0,0x44,0xf1]
+// CHECK: cmp      wsp, #500                  // encoding: [0xff,0xd3,0x07,0x71]
+// CHECK: cmp      sp, #200                   // encoding: [0xff,0x23,0x03,0xf1]
+
+// A "MOV" involving sp is encoded in this manner: add Reg, Reg, #0
+        mov sp, x30
+        mov wsp, w20
+        mov x11, sp
+        mov w24, wsp
+// CHECK: mov      sp, x30                    // encoding: [0xdf,0x03,0x00,0x91]
+// CHECK: mov      wsp, w20                   // encoding: [0x9f,0x02,0x00,0x11]
+// CHECK: mov      x11, sp                    // encoding: [0xeb,0x03,0x00,0x91]
+// CHECK: mov      w24, wsp                   // encoding: [0xf8,0x03,0x00,0x11]
+
+// A relocation check (default to lo12, which is the only sane relocation anyway really)
+        add x0, x4, #:lo12:var
+// CHECK: add     x0, x4, #:lo12:var         // encoding: [0x80'A',A,A,0x91'A']
+// CHECK:                                    //   fixup A - offset: 0, value: :lo12:var, kind: fixup_a64_add_lo12
+
+//------------------------------------------------------------------------------
+// Add-sub (shifted register)
+//------------------------------------------------------------------------------
+
+// As usual, we don't print the canonical forms of many instructions.
+
+        add w3, w5, w7
+        add wzr, w3, w5
+        add w20, wzr, w4
+        add w4, w6, wzr
+// CHECK: add      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x0b]
+// CHECK: add      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x0b]
+// CHECK: add      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x0b]
+// CHECK: add      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x0b]
+
+        add w11, w13, w15, lsl #0
+        add w9, w3, wzr, lsl #10
+        add w17, w29, w20, lsl #31
+// CHECK: add      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x0b]
+// CHECK: add      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x0b]
+// CHECK: add      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x0b]
+
+        add w21, w22, w23, lsr #0
+        add w24, w25, w26, lsr #18
+        add w27, w28, w29, lsr #31
+// CHECK: add      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x0b]
+// CHECK: add      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x0b]
+// CHECK: add      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x0b]
+
+        add w2, w3, w4, asr #0
+        add w5, w6, w7, asr #21
+        add w8, w9, w10, asr #31
+// CHECK: add      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x0b]
+// CHECK: add      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x0b]
+// CHECK: add      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x0b]
+
+        add x3, x5, x7
+        add xzr, x3, x5
+        add x20, xzr, x4
+        add x4, x6, xzr
+// CHECK: add      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0x8b]
+// CHECK: add      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0x8b]
+// CHECK: add      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0x8b]
+// CHECK: add      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0x8b]
+
+        add x11, x13, x15, lsl #0
+        add x9, x3, xzr, lsl #10
+        add x17, x29, x20, lsl #63
+// CHECK: add      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0x8b]
+// CHECK: add      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x8b]
+// CHECK: add      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0x8b]
+
+        add x21, x22, x23, lsr #0
+        add x24, x25, x26, lsr #18
+        add x27, x28, x29, lsr #63
+// CHECK: add      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0x8b]
+// CHECK: add      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x8b]
+// CHECK: add      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0x8b]
+
+        add x2, x3, x4, asr #0
+        add x5, x6, x7, asr #21
+        add x8, x9, x10, asr #63
+// CHECK: add      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0x8b]
+// CHECK: add      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0x8b]
+// CHECK: add      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0x8b]
+
+        adds w3, w5, w7
+        adds wzr, w3, w5
+        adds w20, wzr, w4
+        adds w4, w6, wzr
+// CHECK: adds     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x2b]
+// CHECK: adds     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x2b]
+// CHECK: adds     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x2b]
+// CHECK: adds     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x2b]
+
+        adds w11, w13, w15, lsl #0
+        adds w9, w3, wzr, lsl #10
+        adds w17, w29, w20, lsl #31
+// CHECK: adds     w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x2b]
+// CHECK: adds     w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x2b]
+// CHECK: adds     w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x2b]
+
+        adds w21, w22, w23, lsr #0
+        adds w24, w25, w26, lsr #18
+        adds w27, w28, w29, lsr #31
+// CHECK: adds     w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x2b]
+// CHECK: adds     w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x2b]
+// CHECK: adds     w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x2b]
+
+        adds w2, w3, w4, asr #0
+        adds w5, w6, w7, asr #21
+        adds w8, w9, w10, asr #31
+// CHECK: adds     w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x2b]
+// CHECK: adds     w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x2b]
+// CHECK: adds     w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x2b]
+
+        adds x3, x5, x7
+        adds xzr, x3, x5
+        adds x20, xzr, x4
+        adds x4, x6, xzr
+// CHECK: adds     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xab]
+// CHECK: adds     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xab]
+// CHECK: adds     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xab]
+// CHECK: adds     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xab]
+
+        adds x11, x13, x15, lsl #0
+        adds x9, x3, xzr, lsl #10
+        adds x17, x29, x20, lsl #63
+// CHECK: adds     x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xab]
+// CHECK: adds     x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xab]
+// CHECK: adds     x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xab]
+
+        adds x21, x22, x23, lsr #0
+        adds x24, x25, x26, lsr #18
+        adds x27, x28, x29, lsr #63
+// CHECK: adds     x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xab]
+// CHECK: adds     x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xab]
+// CHECK: adds     x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xab]
+
+        adds x2, x3, x4, asr #0
+        adds x5, x6, x7, asr #21
+        adds x8, x9, x10, asr #63
+// CHECK: adds     x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xab]
+// CHECK: adds     x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xab]
+// CHECK: adds     x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xab]
+
+        sub w3, w5, w7
+        sub wzr, w3, w5
+        sub w20, wzr, w4
+        sub w4, w6, wzr
+// CHECK: sub      w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x4b]
+// CHECK: sub      wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x4b]
+// CHECK: sub      w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x4b]
+// CHECK: sub      w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x4b]
+
+        sub w11, w13, w15, lsl #0
+        sub w9, w3, wzr, lsl #10
+        sub w17, w29, w20, lsl #31
+// CHECK: sub      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x4b]
+// CHECK: sub      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x4b]
+// CHECK: sub      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x4b]
+
+        sub w21, w22, w23, lsr #0
+        sub w24, w25, w26, lsr #18
+        sub w27, w28, w29, lsr #31
+// CHECK: sub      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x4b]
+// CHECK: sub      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x4b]
+// CHECK: sub      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x4b]
+
+        sub w2, w3, w4, asr #0
+        sub w5, w6, w7, asr #21
+        sub w8, w9, w10, asr #31
+// CHECK: sub      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x4b]
+// CHECK: sub      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x4b]
+// CHECK: sub      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x4b]
+
+        sub x3, x5, x7
+        sub xzr, x3, x5
+        sub x20, xzr, x4
+        sub x4, x6, xzr
+// CHECK: sub      x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xcb]
+// CHECK: sub      xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xcb]
+// CHECK: sub      x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xcb]
+// CHECK: sub      x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xcb]
+
+        sub x11, x13, x15, lsl #0
+        sub x9, x3, xzr, lsl #10
+        sub x17, x29, x20, lsl #63
+// CHECK: sub      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xcb]
+// CHECK: sub      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xcb]
+// CHECK: sub      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xcb]
+
+        sub x21, x22, x23, lsr #0
+        sub x24, x25, x26, lsr #18
+        sub x27, x28, x29, lsr #63
+// CHECK: sub      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xcb]
+// CHECK: sub      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xcb]
+// CHECK: sub      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xcb]
+
+        sub x2, x3, x4, asr #0
+        sub x5, x6, x7, asr #21
+        sub x8, x9, x10, asr #63
+// CHECK: sub      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xcb]
+// CHECK: sub      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xcb]
+// CHECK: sub      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xcb]
+
+        subs w3, w5, w7
+        subs wzr, w3, w5
+        subs w20, wzr, w4
+        subs w4, w6, wzr
+// CHECK: subs     w3, w5, w7                 // encoding: [0xa3,0x00,0x07,0x6b]
+// CHECK: subs     wzr, w3, w5                // encoding: [0x7f,0x00,0x05,0x6b]
+// CHECK: subs     w20, wzr, w4               // encoding: [0xf4,0x03,0x04,0x6b]
+// CHECK: subs     w4, w6, wzr                // encoding: [0xc4,0x00,0x1f,0x6b]
+
+        subs w11, w13, w15, lsl #0
+        subs w9, w3, wzr, lsl #10
+        subs w17, w29, w20, lsl #31
+// CHECK: subs     w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x6b]
+// CHECK: subs     w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x6b]
+// CHECK: subs     w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x6b]
+
+        subs w21, w22, w23, lsr #0
+        subs w24, w25, w26, lsr #18
+        subs w27, w28, w29, lsr #31
+// CHECK: subs     w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x6b]
+// CHECK: subs     w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x6b]
+// CHECK: subs     w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x6b]
+
+        subs w2, w3, w4, asr #0
+        subs w5, w6, w7, asr #21
+        subs w8, w9, w10, asr #31
+// CHECK: subs     w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x6b]
+// CHECK: subs     w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x6b]
+// CHECK: subs     w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x6b]
+
+        subs x3, x5, x7
+        subs xzr, x3, x5
+        subs x20, xzr, x4
+        subs x4, x6, xzr
+// CHECK: subs     x3, x5, x7                 // encoding: [0xa3,0x00,0x07,0xeb]
+// CHECK: subs     xzr, x3, x5                // encoding: [0x7f,0x00,0x05,0xeb]
+// CHECK: subs     x20, xzr, x4               // encoding: [0xf4,0x03,0x04,0xeb]
+// CHECK: subs     x4, x6, xzr                // encoding: [0xc4,0x00,0x1f,0xeb]
+
+        subs x11, x13, x15, lsl #0
+        subs x9, x3, xzr, lsl #10
+        subs x17, x29, x20, lsl #63
+// CHECK: subs     x11, x13, x15              // encoding: [0xab,0x01,0x0f,0xeb]
+// CHECK: subs     x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0xeb]
+// CHECK: subs     x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0xeb]
+
+        subs x21, x22, x23, lsr #0
+        subs x24, x25, x26, lsr #18
+        subs x27, x28, x29, lsr #63
+// CHECK: subs     x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0xeb]
+// CHECK: subs     x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0xeb]
+// CHECK: subs     x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0xeb]
+
+        subs x2, x3, x4, asr #0
+        subs x5, x6, x7, asr #21
+        subs x8, x9, x10, asr #63
+// CHECK: subs     x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0xeb]
+// CHECK: subs     x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0xeb]
+// CHECK: subs     x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0xeb]
+
+        cmn w0, w3
+        cmn wzr, w4
+        cmn w5, wzr
+// CHECK: cmn      w0, w3                     // encoding: [0x1f,0x00,0x03,0x2b]
+// CHECK: cmn      wzr, w4                    // encoding: [0xff,0x03,0x04,0x2b]
+// CHECK: cmn      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x2b]
+
+        cmn w6, w7, lsl #0
+        cmn w8, w9, lsl #15
+        cmn w10, w11, lsl #31
+// CHECK: cmn      w6, w7                     // encoding: [0xdf,0x00,0x07,0x2b]
+// CHECK: cmn      w8, w9, lsl #15            // encoding: [0x1f,0x3d,0x09,0x2b]
+// CHECK: cmn      w10, w11, lsl #31          // encoding: [0x5f,0x7d,0x0b,0x2b]
+
+        cmn w12, w13, lsr #0
+        cmn w14, w15, lsr #21
+        cmn w16, w17, lsr #31
+// CHECK: cmn      w12, w13, lsr #0           // encoding: [0x9f,0x01,0x4d,0x2b]
+// CHECK: cmn      w14, w15, lsr #21          // encoding: [0xdf,0x55,0x4f,0x2b]
+// CHECK: cmn      w16, w17, lsr #31          // encoding: [0x1f,0x7e,0x51,0x2b]
+
+        cmn w18, w19, asr #0
+        cmn w20, w21, asr #22
+        cmn w22, w23, asr #31
+// CHECK: cmn      w18, w19, asr #0           // encoding: [0x5f,0x02,0x93,0x2b]
+// CHECK: cmn      w20, w21, asr #22          // encoding: [0x9f,0x5a,0x95,0x2b]
+// CHECK: cmn      w22, w23, asr #31          // encoding: [0xdf,0x7e,0x97,0x2b]
+
+        cmn x0, x3
+        cmn xzr, x4
+        cmn x5, xzr
+// CHECK: cmn      x0, x3                     // encoding: [0x1f,0x00,0x03,0xab]
+// CHECK: cmn      xzr, x4                    // encoding: [0xff,0x03,0x04,0xab]
+// CHECK: cmn      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xab]
+
+        cmn x6, x7, lsl #0
+        cmn x8, x9, lsl #15
+        cmn x10, x11, lsl #63
+// CHECK: cmn      x6, x7                     // encoding: [0xdf,0x00,0x07,0xab]
+// CHECK: cmn      x8, x9, lsl #15            // encoding: [0x1f,0x3d,0x09,0xab]
+// CHECK: cmn      x10, x11, lsl #63          // encoding: [0x5f,0xfd,0x0b,0xab]
+
+        cmn x12, x13, lsr #0
+        cmn x14, x15, lsr #41
+        cmn x16, x17, lsr #63
+// CHECK: cmn      x12, x13, lsr #0           // encoding: [0x9f,0x01,0x4d,0xab]
+// CHECK: cmn      x14, x15, lsr #41          // encoding: [0xdf,0xa5,0x4f,0xab]
+// CHECK: cmn      x16, x17, lsr #63          // encoding: [0x1f,0xfe,0x51,0xab]
+
+        cmn x18, x19, asr #0
+        cmn x20, x21, asr #55
+        cmn x22, x23, asr #63
+// CHECK: cmn      x18, x19, asr #0           // encoding: [0x5f,0x02,0x93,0xab]
+// CHECK: cmn      x20, x21, asr #55          // encoding: [0x9f,0xde,0x95,0xab]
+// CHECK: cmn      x22, x23, asr #63          // encoding: [0xdf,0xfe,0x97,0xab]
+
+        cmp w0, w3
+        cmp wzr, w4
+        cmp w5, wzr
+// CHECK: cmp      w0, w3                     // encoding: [0x1f,0x00,0x03,0x6b]
+// CHECK: cmp      wzr, w4                    // encoding: [0xff,0x03,0x04,0x6b]
+// CHECK: cmp      w5, wzr                    // encoding: [0xbf,0x00,0x1f,0x6b]
+
+        cmp w6, w7, lsl #0
+        cmp w8, w9, lsl #15
+        cmp w10, w11, lsl #31
+// CHECK: cmp      w6, w7                     // encoding: [0xdf,0x00,0x07,0x6b]
+// CHECK: cmp      w8, w9, lsl #15            // encoding: [0x1f,0x3d,0x09,0x6b]
+// CHECK: cmp      w10, w11, lsl #31          // encoding: [0x5f,0x7d,0x0b,0x6b]
+
+        cmp w12, w13, lsr #0
+        cmp w14, w15, lsr #21
+        cmp w16, w17, lsr #31
+// CHECK: cmp      w12, w13, lsr #0           // encoding: [0x9f,0x01,0x4d,0x6b]
+// CHECK: cmp      w14, w15, lsr #21          // encoding: [0xdf,0x55,0x4f,0x6b]
+// CHECK: cmp      w16, w17, lsr #31          // encoding: [0x1f,0x7e,0x51,0x6b]
+
+        cmp w18, w19, asr #0
+        cmp w20, w21, asr #22
+        cmp w22, w23, asr #31
+// CHECK: cmp      w18, w19, asr #0           // encoding: [0x5f,0x02,0x93,0x6b]
+// CHECK: cmp      w20, w21, asr #22          // encoding: [0x9f,0x5a,0x95,0x6b]
+// CHECK: cmp      w22, w23, asr #31          // encoding: [0xdf,0x7e,0x97,0x6b]
+
+        cmp x0, x3
+        cmp xzr, x4
+        cmp x5, xzr
+// CHECK: cmp      x0, x3                     // encoding: [0x1f,0x00,0x03,0xeb]
+// CHECK: cmp      xzr, x4                    // encoding: [0xff,0x03,0x04,0xeb]
+// CHECK: cmp      x5, xzr                    // encoding: [0xbf,0x00,0x1f,0xeb]
+
+        cmp x6, x7, lsl #0
+        cmp x8, x9, lsl #15
+        cmp x10, x11, lsl #63
+// CHECK: cmp      x6, x7                     // encoding: [0xdf,0x00,0x07,0xeb]
+// CHECK: cmp      x8, x9, lsl #15            // encoding: [0x1f,0x3d,0x09,0xeb]
+// CHECK: cmp      x10, x11, lsl #63          // encoding: [0x5f,0xfd,0x0b,0xeb]
+
+        cmp x12, x13, lsr #0
+        cmp x14, x15, lsr #41
+        cmp x16, x17, lsr #63
+// CHECK: cmp      x12, x13, lsr #0           // encoding: [0x9f,0x01,0x4d,0xeb]
+// CHECK: cmp      x14, x15, lsr #41          // encoding: [0xdf,0xa5,0x4f,0xeb]
+// CHECK: cmp      x16, x17, lsr #63          // encoding: [0x1f,0xfe,0x51,0xeb]
+
+        cmp x18, x19, asr #0
+        cmp x20, x21, asr #55
+        cmp x22, x23, asr #63
+// CHECK: cmp      x18, x19, asr #0           // encoding: [0x5f,0x02,0x93,0xeb]
+// CHECK: cmp      x20, x21, asr #55          // encoding: [0x9f,0xde,0x95,0xeb]
+// CHECK: cmp      x22, x23, asr #63          // encoding: [0xdf,0xfe,0x97,0xeb]
+
+        neg w29, w30
+        neg w30, wzr
+        neg wzr, w0
+// CHECK: sub      w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x4b]
+// CHECK: sub      w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x4b]
+// CHECK: sub      wzr, wzr, w0                    // encoding: [0xff,0x03,0x00,0x4b]
+
+        neg w28, w27, lsl #0
+        neg w26, w25, lsl #29
+        neg w24, w23, lsl #31
+// CHECK: sub      w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x4b]
+// CHECK: sub      w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x4b]
+// CHECK: sub      w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x4b]
+
+        neg w22, w21, lsr #0
+        neg w20, w19, lsr #1
+        neg w18, w17, lsr #31
+// CHECK: sub      w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x4b]
+// CHECK: sub      w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x4b]
+// CHECK: sub      w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x4b]
+
+        neg w16, w15, asr #0
+        neg w14, w13, asr #12
+        neg w12, w11, asr #31
+// CHECK: sub      w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x4b]
+// CHECK: sub      w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x4b]
+// CHECK: sub      w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x4b]
+
+        neg x29, x30
+        neg x30, xzr
+        neg xzr, x0
+// CHECK: sub      x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xcb]
+// CHECK: sub      x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xcb]
+// CHECK: sub      xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xcb]
+
+        neg x28, x27, lsl #0
+        neg x26, x25, lsl #29
+        neg x24, x23, lsl #31
+// CHECK: sub      x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xcb]
+// CHECK: sub      x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xcb]
+// CHECK: sub      x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xcb]
+
+        neg x22, x21, lsr #0
+        neg x20, x19, lsr #1
+        neg x18, x17, lsr #31
+// CHECK: sub      x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xcb]
+// CHECK: sub      x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xcb]
+// CHECK: sub      x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xcb]
+
+        neg x16, x15, asr #0
+        neg x14, x13, asr #12
+        neg x12, x11, asr #31
+// CHECK: sub      x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xcb]
+// CHECK: sub      x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xcb]
+// CHECK: sub      x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xcb]
+
+        negs w29, w30
+        negs w30, wzr
+        negs wzr, w0
+// CHECK: subs     w29, wzr, w30              // encoding: [0xfd,0x03,0x1e,0x6b]
+// CHECK: subs     w30, wzr, wzr              // encoding: [0xfe,0x03,0x1f,0x6b]
+// CHECK: subs     wzr, wzr, w0               // encoding: [0xff,0x03,0x00,0x6b]
+
+        negs w28, w27, lsl #0
+        negs w26, w25, lsl #29
+        negs w24, w23, lsl #31
+// CHECK: subs     w28, wzr, w27              // encoding: [0xfc,0x03,0x1b,0x6b]
+// CHECK: subs     w26, wzr, w25, lsl #29     // encoding: [0xfa,0x77,0x19,0x6b]
+// CHECK: subs     w24, wzr, w23, lsl #31     // encoding: [0xf8,0x7f,0x17,0x6b]
+
+        negs w22, w21, lsr #0
+        negs w20, w19, lsr #1
+        negs w18, w17, lsr #31
+// CHECK: subs     w22, wzr, w21, lsr #0      // encoding: [0xf6,0x03,0x55,0x6b]
+// CHECK: subs     w20, wzr, w19, lsr #1      // encoding: [0xf4,0x07,0x53,0x6b]
+// CHECK: subs     w18, wzr, w17, lsr #31     // encoding: [0xf2,0x7f,0x51,0x6b]
+
+        negs w16, w15, asr #0
+        negs w14, w13, asr #12
+        negs w12, w11, asr #31
+// CHECK: subs     w16, wzr, w15, asr #0      // encoding: [0xf0,0x03,0x8f,0x6b]
+// CHECK: subs     w14, wzr, w13, asr #12     // encoding: [0xee,0x33,0x8d,0x6b]
+// CHECK: subs     w12, wzr, w11, asr #31     // encoding: [0xec,0x7f,0x8b,0x6b]
+
+        negs x29, x30
+        negs x30, xzr
+        negs xzr, x0
+// CHECK: subs     x29, xzr, x30              // encoding: [0xfd,0x03,0x1e,0xeb]
+// CHECK: subs     x30, xzr, xzr              // encoding: [0xfe,0x03,0x1f,0xeb]
+// CHECK: subs     xzr, xzr, x0               // encoding: [0xff,0x03,0x00,0xeb]
+
+        negs x28, x27, lsl #0
+        negs x26, x25, lsl #29
+        negs x24, x23, lsl #31
+// CHECK: subs     x28, xzr, x27              // encoding: [0xfc,0x03,0x1b,0xeb]
+// CHECK: subs     x26, xzr, x25, lsl #29     // encoding: [0xfa,0x77,0x19,0xeb]
+// CHECK: subs     x24, xzr, x23, lsl #31     // encoding: [0xf8,0x7f,0x17,0xeb]
+
+        negs x22, x21, lsr #0
+        negs x20, x19, lsr #1
+        negs x18, x17, lsr #31
+// CHECK: subs     x22, xzr, x21, lsr #0      // encoding: [0xf6,0x03,0x55,0xeb]
+// CHECK: subs     x20, xzr, x19, lsr #1      // encoding: [0xf4,0x07,0x53,0xeb]
+// CHECK: subs     x18, xzr, x17, lsr #31     // encoding: [0xf2,0x7f,0x51,0xeb]
+
+        negs x16, x15, asr #0
+        negs x14, x13, asr #12
+        negs x12, x11, asr #31
+// CHECK: subs     x16, xzr, x15, asr #0      // encoding: [0xf0,0x03,0x8f,0xeb]
+// CHECK: subs     x14, xzr, x13, asr #12     // encoding: [0xee,0x33,0x8d,0xeb]
+// CHECK: subs     x12, xzr, x11, asr #31     // encoding: [0xec,0x7f,0x8b,0xeb]
+
+//------------------------------------------------------------------------------
+// Add-sub (shifted register)
+//------------------------------------------------------------------------------
+        adc w29, w27, w25
+        adc wzr, w3, w4
+        adc w9, wzr, w10
+        adc w20, w0, wzr
+// CHECK: adc      w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x1a]
+// CHECK: adc      wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x1a]
+// CHECK: adc      w9, wzr, w10               // encoding: [0xe9,0x03,0x0a,0x1a]
+// CHECK: adc      w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x1a]
+
+        adc x29, x27, x25
+        adc xzr, x3, x4
+        adc x9, xzr, x10
+        adc x20, x0, xzr
+// CHECK: adc      x29, x27, x25              // encoding: [0x7d,0x03,0x19,0x9a]
+// CHECK: adc      xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0x9a]
+// CHECK: adc      x9, xzr, x10               // encoding: [0xe9,0x03,0x0a,0x9a]
+// CHECK: adc      x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0x9a]
+
+        adcs w29, w27, w25
+        adcs wzr, w3, w4
+        adcs w9, wzr, w10
+        adcs w20, w0, wzr
+// CHECK: adcs     w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x3a]
+// CHECK: adcs     wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x3a]
+// CHECK: adcs     w9, wzr, w10               // encoding: [0xe9,0x03,0x0a,0x3a]
+// CHECK: adcs     w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x3a]
+
+        adcs x29, x27, x25
+        adcs xzr, x3, x4
+        adcs x9, xzr, x10
+        adcs x20, x0, xzr
+// CHECK: adcs     x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xba]
+// CHECK: adcs     xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xba]
+// CHECK: adcs     x9, xzr, x10               // encoding: [0xe9,0x03,0x0a,0xba]
+// CHECK: adcs     x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xba]
+
+        sbc w29, w27, w25
+        sbc wzr, w3, w4
+        sbc w9, wzr, w10
+        sbc w20, w0, wzr
+// CHECK: sbc      w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x5a]
+// CHECK: sbc      wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x5a]
+// CHECK: ngc      w9, w10                    // encoding: [0xe9,0x03,0x0a,0x5a]
+// CHECK: sbc      w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x5a]
+
+        sbc x29, x27, x25
+        sbc xzr, x3, x4
+        sbc x9, xzr, x10
+        sbc x20, x0, xzr
+// CHECK: sbc      x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xda]
+// CHECK: sbc      xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xda]
+// CHECK: ngc      x9, x10                    // encoding: [0xe9,0x03,0x0a,0xda]
+// CHECK: sbc      x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xda]
+
+        sbcs w29, w27, w25
+        sbcs wzr, w3, w4
+        sbcs w9, wzr, w10
+        sbcs w20, w0, wzr
+// CHECK: sbcs     w29, w27, w25              // encoding: [0x7d,0x03,0x19,0x7a]
+// CHECK: sbcs     wzr, w3, w4                // encoding: [0x7f,0x00,0x04,0x7a]
+// CHECK: ngcs     w9, w10                    // encoding: [0xe9,0x03,0x0a,0x7a]
+// CHECK: sbcs     w20, w0, wzr               // encoding: [0x14,0x00,0x1f,0x7a]
+
+        sbcs x29, x27, x25
+        sbcs xzr, x3, x4
+        sbcs x9, xzr, x10
+        sbcs x20, x0, xzr
+// CHECK: sbcs     x29, x27, x25              // encoding: [0x7d,0x03,0x19,0xfa]
+// CHECK: sbcs     xzr, x3, x4                // encoding: [0x7f,0x00,0x04,0xfa]
+// CHECK: ngcs     x9, x10                    // encoding: [0xe9,0x03,0x0a,0xfa]
+// CHECK: sbcs     x20, x0, xzr               // encoding: [0x14,0x00,0x1f,0xfa]
+
+        ngc w3, w12
+        ngc wzr, w9
+        ngc w23, wzr
+// CHECK: ngc      w3, w12                    // encoding: [0xe3,0x03,0x0c,0x5a]
+// CHECK: ngc      wzr, w9                    // encoding: [0xff,0x03,0x09,0x5a]
+// CHECK: ngc      w23, wzr                   // encoding: [0xf7,0x03,0x1f,0x5a]
+
+        ngc x29, x30
+        ngc xzr, x0
+        ngc x0, xzr
+// CHECK: ngc      x29, x30                   // encoding: [0xfd,0x03,0x1e,0xda]
+// CHECK: ngc      xzr, x0                    // encoding: [0xff,0x03,0x00,0xda]
+// CHECK: ngc      x0, xzr                    // encoding: [0xe0,0x03,0x1f,0xda]
+
+        ngcs w3, w12
+        ngcs wzr, w9
+        ngcs w23, wzr
+// CHECK: ngcs     w3, w12                    // encoding: [0xe3,0x03,0x0c,0x7a]
+// CHECK: ngcs     wzr, w9                    // encoding: [0xff,0x03,0x09,0x7a]
+// CHECK: ngcs     w23, wzr                   // encoding: [0xf7,0x03,0x1f,0x7a]
+
+        ngcs x29, x30
+        ngcs xzr, x0
+        ngcs x0, xzr
+// CHECK: ngcs     x29, x30                   // encoding: [0xfd,0x03,0x1e,0xfa]
+// CHECK: ngcs     xzr, x0                    // encoding: [0xff,0x03,0x00,0xfa]
+// CHECK: ngcs     x0, xzr                    // encoding: [0xe0,0x03,0x1f,0xfa]
+
+//------------------------------------------------------------------------------
+// Bitfield
+//------------------------------------------------------------------------------
+
+        sbfm x1, x2, #3, #4
+        sbfm x3, x4, #63, #63
+        sbfm wzr, wzr, #31, #31
+        sbfm w12, w9, #0, #0
+// CHECK: sbfm     x1, x2, #3, #4             // encoding: [0x41,0x10,0x43,0x93]
+// CHECK: sbfm     x3, x4, #63, #63           // encoding: [0x83,0xfc,0x7f,0x93]
+// CHECK: sbfm     wzr, wzr, #31, #31         // encoding: [0xff,0x7f,0x1f,0x13]
+// CHECK: sbfm     w12, w9, #0, #0            // encoding: [0x2c,0x01,0x00,0x13]
+
+        ubfm x4, x5, #12, #10
+        ubfm xzr, x4, #0, #0
+        ubfm x4, xzr, #63, #5
+        ubfm x5, x6, #12, #63
+// CHECK: ubfm     x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xd3]
+// CHECK: ubfm     xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xd3]
+// CHECK: ubfm     x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xd3]
+// CHECK: ubfm     x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xd3]
+
+        bfm x4, x5, #12, #10
+        bfm xzr, x4, #0, #0
+        bfm x4, xzr, #63, #5
+        bfm x5, x6, #12, #63
+// CHECK: bfm      x4, x5, #12, #10           // encoding: [0xa4,0x28,0x4c,0xb3]
+// CHECK: bfm      xzr, x4, #0, #0            // encoding: [0x9f,0x00,0x40,0xb3]
+// CHECK: bfm      x4, xzr, #63, #5            // encoding: [0xe4,0x17,0x7f,0xb3]
+// CHECK: bfm      x5, x6, #12, #63           // encoding: [0xc5,0xfc,0x4c,0xb3]
+
+        sxtb w1, w2
+        sxtb xzr, w3
+        sxth w9, w10
+        sxth x0, w1
+        sxtw x3, w30
+// CHECK: sxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x13]
+// CHECK: sxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x40,0x93]
+// CHECK: sxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x13]
+// CHECK: sxth     x0, w1                     // encoding: [0x20,0x3c,0x40,0x93]
+// CHECK: sxtw     x3, w30                    // encoding: [0xc3,0x7f,0x40,0x93]
+
+        uxtb w1, w2
+        uxtb xzr, w3
+        uxth w9, w10
+        uxth x0, w1
+// CHECK: uxtb     w1, w2                     // encoding: [0x41,0x1c,0x00,0x53]
+// CHECK: uxtb     xzr, w3                    // encoding: [0x7f,0x1c,0x00,0x53]
+// CHECK: uxth     w9, w10                    // encoding: [0x49,0x3d,0x00,0x53]
+// CHECK: uxth     x0, w1                     // encoding: [0x20,0x3c,0x00,0x53]
+
+        asr w3, w2, #0
+        asr w9, w10, #31
+        asr x20, x21, #63
+        asr w1, wzr, #3
+// CHECK: asr      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x13]
+// CHECK: asr      w9, w10, #31               // encoding: [0x49,0x7d,0x1f,0x13]
+// CHECK: asr      x20, x21, #63              // encoding: [0xb4,0xfe,0x7f,0x93]
+// CHECK: asr      w1, wzr, #3                // encoding: [0xe1,0x7f,0x03,0x13]
+
+        lsr w3, w2, #0
+        lsr w9, w10, #31
+        lsr x20, x21, #63
+        lsr wzr, wzr, #3
+// CHECK: lsr      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: lsr      w9, w10, #31               // encoding: [0x49,0x7d,0x1f,0x53]
+// CHECK: lsr      x20, x21, #63              // encoding: [0xb4,0xfe,0x7f,0xd3]
+// CHECK: lsr      wzr, wzr, #3               // encoding: [0xff,0x7f,0x03,0x53]
+
+        lsl w3, w2, #0
+        lsl w9, w10, #31
+        lsl x20, x21, #63
+        lsl w1, wzr, #3
+// CHECK: lsl      w3, w2, #0                 // encoding: [0x43,0x7c,0x00,0x53]
+// CHECK: lsl      w9, w10, #31               // encoding: [0x49,0x01,0x01,0x53]
+// CHECK: lsl      x20, x21, #63              // encoding: [0xb4,0x02,0x41,0xd3]
+// CHECK: lsl      w1, wzr, #3                // encoding: [0xe1,0x73,0x1d,0x53]
+
+        sbfiz w9, w10, #0, #1
+        sbfiz x2, x3, #63, #1
+        sbfiz x19, x20, #0, #64
+        sbfiz x9, x10, #5, #59
+        sbfiz w9, w10, #0, #32
+        sbfiz w11, w12, #31, #1
+        sbfiz w13, w14, #29, #3
+        sbfiz xzr, xzr, #10, #11
+// CHECK: sbfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: sbfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0x93]
+// CHECK: sbfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: sbfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0x93]
+// CHECK: sbfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: sbfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x13]
+// CHECK: sbfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x13]
+// CHECK: sbfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0x93]
+
+        sbfx w9, w10, #0, #1
+        sbfx x2, x3, #63, #1
+        sbfx x19, x20, #0, #64
+        sbfx x9, x10, #5, #59
+        sbfx w9, w10, #0, #32
+        sbfx w11, w12, #31, #1
+        sbfx w13, w14, #29, #3
+        sbfx xzr, xzr, #10, #11
+// CHECK: sbfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x13]
+// CHECK: sbfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0x93]
+// CHECK: sbfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0x93]
+// CHECK: sbfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0x93]
+// CHECK: sbfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x13]
+// CHECK: sbfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x13]
+// CHECK: sbfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x13]
+// CHECK: sbfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0x93]
+
+        bfi w9, w10, #0, #1
+        bfi x2, x3, #63, #1
+        bfi x19, x20, #0, #64
+        bfi x9, x10, #5, #59
+        bfi w9, w10, #0, #32
+        bfi w11, w12, #31, #1
+        bfi w13, w14, #29, #3
+        bfi xzr, xzr, #10, #11
+// CHECK: bfi      w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfi      x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xb3]
+// CHECK: bfi      x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfi      x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xb3]
+// CHECK: bfi      w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfi      w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x33]
+// CHECK: bfi      w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x33]
+// CHECK: bfi      xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xb3]
+
+        bfxil w9, w10, #0, #1
+        bfxil x2, x3, #63, #1
+        bfxil x19, x20, #0, #64
+        bfxil x9, x10, #5, #59
+        bfxil w9, w10, #0, #32
+        bfxil w11, w12, #31, #1
+        bfxil w13, w14, #29, #3
+        bfxil xzr, xzr, #10, #11
+// CHECK: bfxil    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x33]
+// CHECK: bfxil    x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xb3]
+// CHECK: bfxil    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xb3]
+// CHECK: bfxil    x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xb3]
+// CHECK: bfxil    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x33]
+// CHECK: bfxil    w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x33]
+// CHECK: bfxil    w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x33]
+// CHECK: bfxil    xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xb3]
+
+        ubfiz w9, w10, #0, #1
+        ubfiz x2, x3, #63, #1
+        ubfiz x19, x20, #0, #64
+        ubfiz x9, x10, #5, #59
+        ubfiz w9, w10, #0, #32
+        ubfiz w11, w12, #31, #1
+        ubfiz w13, w14, #29, #3
+        ubfiz xzr, xzr, #10, #11
+// CHECK: ubfiz    w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: ubfiz    x2, x3, #63, #1            // encoding: [0x62,0x00,0x41,0xd3]
+// CHECK: ubfiz    x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: ubfiz    x9, x10, #5, #59           // encoding: [0x49,0xe9,0x7b,0xd3]
+// CHECK: ubfiz    w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: ubfiz    w11, w12, #31, #1          // encoding: [0x8b,0x01,0x01,0x53]
+// CHECK: ubfiz    w13, w14, #29, #3          // encoding: [0xcd,0x09,0x03,0x53]
+// CHECK: ubfiz    xzr, xzr, #10, #11         // encoding: [0xff,0x2b,0x76,0xd3]
+
+        ubfx w9, w10, #0, #1
+        ubfx x2, x3, #63, #1
+        ubfx x19, x20, #0, #64
+        ubfx x9, x10, #5, #59
+        ubfx w9, w10, #0, #32
+        ubfx w11, w12, #31, #1
+        ubfx w13, w14, #29, #3
+        ubfx xzr, xzr, #10, #11
+// CHECK: ubfx     w9, w10, #0, #1            // encoding: [0x49,0x01,0x00,0x53]
+// CHECK: ubfx     x2, x3, #63, #1            // encoding: [0x62,0xfc,0x7f,0xd3]
+// CHECK: ubfx     x19, x20, #0, #64          // encoding: [0x93,0xfe,0x40,0xd3]
+// CHECK: ubfx     x9, x10, #5, #59           // encoding: [0x49,0xfd,0x45,0xd3]
+// CHECK: ubfx     w9, w10, #0, #32           // encoding: [0x49,0x7d,0x00,0x53]
+// CHECK: ubfx     w11, w12, #31, #1          // encoding: [0x8b,0x7d,0x1f,0x53]
+// CHECK: ubfx     w13, w14, #29, #3          // encoding: [0xcd,0x7d,0x1d,0x53]
+// CHECK: ubfx     xzr, xzr, #10, #11         // encoding: [0xff,0x53,0x4a,0xd3]
+
+//------------------------------------------------------------------------------
+// Compare & branch (immediate)
+//------------------------------------------------------------------------------
+
+        cbz w5, lbl
+        cbz x5, lbl
+        cbnz x2, lbl
+        cbnz x26, lbl
+// CHECK: cbz      w5, lbl                // encoding: [0x05'A',A,A,0x34'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbz      x5, lbl                // encoding: [0x05'A',A,A,0xb4'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     x2, lbl                // encoding: [0x02'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     x26, lbl               // encoding: [0x1a'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        cbz wzr, lbl
+        cbnz xzr, lbl
+// CHECK: cbz      wzr, lbl               // encoding: [0x1f'A',A,A,0x34'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: cbnz     xzr, lbl               // encoding: [0x1f'A',A,A,0xb5'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        cbz w5, #0
+        cbnz x3, #-4
+        cbz w20, #1048572
+        cbnz xzr, #-1048576
+// CHECK: cbz     w5, #0                  // encoding: [0x05,0x00,0x00,0x34]
+// CHECK: cbnz    x3, #-4                 // encoding: [0xe3,0xff,0xff,0xb5]
+// CHECK: cbz     w20, #1048572           // encoding: [0xf4,0xff,0x7f,0x34]
+// CHECK: cbnz    xzr, #-1048576          // encoding: [0x1f,0x00,0x80,0xb5]
+
+//------------------------------------------------------------------------------
+// Conditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b.eq lbl
+        b.ne lbl
+        b.cs lbl
+        b.hs lbl
+        b.lo lbl
+        b.cc lbl
+        b.mi lbl
+        b.pl lbl
+        b.vs lbl
+        b.vc lbl
+        b.hi lbl
+        b.ls lbl
+        b.ge lbl
+        b.lt lbl
+        b.gt lbl
+        b.le lbl
+        b.al lbl
+// CHECK: b.eq lbl                        // encoding: [A,A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ne lbl                        // encoding: [0x01'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hs lbl                        // encoding: [0x02'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lo lbl                        // encoding: [0x03'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.mi lbl                        // encoding: [0x04'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.pl lbl                        // encoding: [0x05'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.vs lbl                        // encoding: [0x06'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.vc lbl                        // encoding: [0x07'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.hi lbl                        // encoding: [0x08'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ls lbl                        // encoding: [0x09'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.ge lbl                        // encoding: [0x0a'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.lt lbl                        // encoding: [0x0b'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.gt lbl                        // encoding: [0x0c'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.le lbl                        // encoding: [0x0d'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+// CHECK: b.al lbl                        // encoding: [0x0e'A',A,A,0x54'A']
+// CHECK:                                 //   fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr
+
+        b.eq #0
+        b.lt #-4
+        b.cc #1048572
+// CHECK: b.eq #0                         // encoding: [0x00,0x00,0x00,0x54]
+// CHECK: b.lt #-4                        // encoding: [0xeb,0xff,0xff,0x54]
+// CHECK: b.lo #1048572                   // encoding: [0xe3,0xff,0x7f,0x54]
+
+//------------------------------------------------------------------------------
+// Conditional compare (immediate)
+//------------------------------------------------------------------------------
+
+        ccmp w1, #31, #0, eq
+        ccmp w3, #0, #15, hs
+        ccmp wzr, #15, #13, cs
+// CHECK: ccmp    w1, #31, #0, eq         // encoding: [0x20,0x08,0x5f,0x7a]
+// CHECK: ccmp    w3, #0, #15, hs         // encoding: [0x6f,0x28,0x40,0x7a]
+// CHECK: ccmp    wzr, #15, #13, hs       // encoding: [0xed,0x2b,0x4f,0x7a]
+
+        ccmp x9, #31, #0, le
+        ccmp x3, #0, #15, gt
+        ccmp xzr, #5, #7, ne
+// CHECK: ccmp    x9, #31, #0, le         // encoding: [0x20,0xd9,0x5f,0xfa]
+// CHECK: ccmp    x3, #0, #15, gt         // encoding: [0x6f,0xc8,0x40,0xfa]
+// CHECK: ccmp    xzr, #5, #7, ne         // encoding: [0xe7,0x1b,0x45,0xfa]
+
+        ccmn w1, #31, #0, eq
+        ccmn w3, #0, #15, hs
+        ccmn wzr, #15, #13, cs
+// CHECK: ccmn    w1, #31, #0, eq         // encoding: [0x20,0x08,0x5f,0x3a]
+// CHECK: ccmn    w3, #0, #15, hs         // encoding: [0x6f,0x28,0x40,0x3a]
+// CHECK: ccmn    wzr, #15, #13, hs       // encoding: [0xed,0x2b,0x4f,0x3a]
+
+        ccmn x9, #31, #0, le
+        ccmn x3, #0, #15, gt
+        ccmn xzr, #5, #7, ne
+// CHECK: ccmn    x9, #31, #0, le         // encoding: [0x20,0xd9,0x5f,0xba]
+// CHECK: ccmn    x3, #0, #15, gt         // encoding: [0x6f,0xc8,0x40,0xba]
+// CHECK: ccmn    xzr, #5, #7, ne         // encoding: [0xe7,0x1b,0x45,0xba]
+
+//------------------------------------------------------------------------------
+// Conditional compare (register)
+//------------------------------------------------------------------------------
+
+        ccmp w1, wzr, #0, eq
+        ccmp w3, w0, #15, hs
+        ccmp wzr, w15, #13, cs
+// CHECK: ccmp    w1, wzr, #0, eq         // encoding: [0x20,0x00,0x5f,0x7a]
+// CHECK: ccmp    w3, w0, #15, hs         // encoding: [0x6f,0x20,0x40,0x7a]
+// CHECK: ccmp    wzr, w15, #13, hs       // encoding: [0xed,0x23,0x4f,0x7a]
+
+        ccmp x9, xzr, #0, le
+        ccmp x3, x0, #15, gt
+        ccmp xzr, x5, #7, ne
+// CHECK: ccmp    x9, xzr, #0, le         // encoding: [0x20,0xd1,0x5f,0xfa]
+// CHECK: ccmp    x3, x0, #15, gt         // encoding: [0x6f,0xc0,0x40,0xfa]
+// CHECK: ccmp    xzr, x5, #7, ne         // encoding: [0xe7,0x13,0x45,0xfa]
+
+        ccmn w1, wzr, #0, eq
+        ccmn w3, w0, #15, hs
+        ccmn wzr, w15, #13, cs
+// CHECK: ccmn    w1, wzr, #0, eq         // encoding: [0x20,0x00,0x5f,0x3a]
+// CHECK: ccmn    w3, w0, #15, hs         // encoding: [0x6f,0x20,0x40,0x3a]
+// CHECK: ccmn    wzr, w15, #13, hs       // encoding: [0xed,0x23,0x4f,0x3a]
+
+        ccmn x9, xzr, #0, le
+        ccmn x3, x0, #15, gt
+        ccmn xzr, x5, #7, ne
+// CHECK: ccmn    x9, xzr, #0, le         // encoding: [0x20,0xd1,0x5f,0xba]
+// CHECK: ccmn    x3, x0, #15, gt         // encoding: [0x6f,0xc0,0x40,0xba]
+// CHECK: ccmn    xzr, x5, #7, ne         // encoding: [0xe7,0x13,0x45,0xba]
+
+//------------------------------------------------------------------------------
+// Conditional select
+//------------------------------------------------------------------------------
+        csel w1, w0, w19, ne
+        csel wzr, w5, w9, eq
+        csel w9, wzr, w30, gt
+        csel w1, w28, wzr, mi
+// CHECK: csel     w1, w0, w19, ne            // encoding: [0x01,0x10,0x93,0x1a]
+// CHECK: csel     wzr, w5, w9, eq            // encoding: [0xbf,0x00,0x89,0x1a]
+// CHECK: csel     w9, wzr, w30, gt           // encoding: [0xe9,0xc3,0x9e,0x1a]
+// CHECK: csel     w1, w28, wzr, mi           // encoding: [0x81,0x43,0x9f,0x1a]
+
+        csel x19, x23, x29, lt
+        csel xzr, x3, x4, ge
+        csel x5, xzr, x6, cs
+        csel x7, x8, xzr, cc
+// CHECK: csel     x19, x23, x29, lt          // encoding: [0xf3,0xb2,0x9d,0x9a]
+// CHECK: csel     xzr, x3, x4, ge            // encoding: [0x7f,0xa0,0x84,0x9a]
+// CHECK: csel     x5, xzr, x6, hs            // encoding: [0xe5,0x23,0x86,0x9a]
+// CHECK: csel     x7, x8, xzr, lo            // encoding: [0x07,0x31,0x9f,0x9a]
+
+        csinc w1, w0, w19, ne
+        csinc wzr, w5, w9, eq
+        csinc w9, wzr, w30, gt
+        csinc w1, w28, wzr, mi
+// CHECK: csinc    w1, w0, w19, ne            // encoding: [0x01,0x14,0x93,0x1a]
+// CHECK: csinc    wzr, w5, w9, eq            // encoding: [0xbf,0x04,0x89,0x1a]
+// CHECK: csinc    w9, wzr, w30, gt           // encoding: [0xe9,0xc7,0x9e,0x1a]
+// CHECK: csinc    w1, w28, wzr, mi           // encoding: [0x81,0x47,0x9f,0x1a]
+
+        csinc x19, x23, x29, lt
+        csinc xzr, x3, x4, ge
+        csinc x5, xzr, x6, cs
+        csinc x7, x8, xzr, cc
+// CHECK: csinc    x19, x23, x29, lt          // encoding: [0xf3,0xb6,0x9d,0x9a]
+// CHECK: csinc    xzr, x3, x4, ge            // encoding: [0x7f,0xa4,0x84,0x9a]
+// CHECK: csinc    x5, xzr, x6, hs            // encoding: [0xe5,0x27,0x86,0x9a]
+// CHECK: csinc    x7, x8, xzr, lo            // encoding: [0x07,0x35,0x9f,0x9a]
+
+        csinv w1, w0, w19, ne
+        csinv wzr, w5, w9, eq
+        csinv w9, wzr, w30, gt
+        csinv w1, w28, wzr, mi
+// CHECK: csinv    w1, w0, w19, ne            // encoding: [0x01,0x10,0x93,0x5a]
+// CHECK: csinv    wzr, w5, w9, eq            // encoding: [0xbf,0x00,0x89,0x5a]
+// CHECK: csinv    w9, wzr, w30, gt           // encoding: [0xe9,0xc3,0x9e,0x5a]
+// CHECK: csinv    w1, w28, wzr, mi           // encoding: [0x81,0x43,0x9f,0x5a]
+
+        csinv x19, x23, x29, lt
+        csinv xzr, x3, x4, ge
+        csinv x5, xzr, x6, cs
+        csinv x7, x8, xzr, cc
+// CHECK: csinv    x19, x23, x29, lt          // encoding: [0xf3,0xb2,0x9d,0xda]
+// CHECK: csinv    xzr, x3, x4, ge            // encoding: [0x7f,0xa0,0x84,0xda]
+// CHECK: csinv    x5, xzr, x6, hs            // encoding: [0xe5,0x23,0x86,0xda]
+// CHECK: csinv    x7, x8, xzr, lo            // encoding: [0x07,0x31,0x9f,0xda]
+
+        csneg w1, w0, w19, ne
+        csneg wzr, w5, w9, eq
+        csneg w9, wzr, w30, gt
+        csneg w1, w28, wzr, mi
+// CHECK: csneg    w1, w0, w19, ne            // encoding: [0x01,0x14,0x93,0x5a]
+// CHECK: csneg    wzr, w5, w9, eq            // encoding: [0xbf,0x04,0x89,0x5a]
+// CHECK: csneg    w9, wzr, w30, gt           // encoding: [0xe9,0xc7,0x9e,0x5a]
+// CHECK: csneg    w1, w28, wzr, mi           // encoding: [0x81,0x47,0x9f,0x5a]
+
+        csneg x19, x23, x29, lt
+        csneg xzr, x3, x4, ge
+        csneg x5, xzr, x6, cs
+        csneg x7, x8, xzr, cc
+// CHECK: csneg    x19, x23, x29, lt          // encoding: [0xf3,0xb6,0x9d,0xda]
+// CHECK: csneg    xzr, x3, x4, ge            // encoding: [0x7f,0xa4,0x84,0xda]
+// CHECK: csneg    x5, xzr, x6, hs            // encoding: [0xe5,0x27,0x86,0xda]
+// CHECK: csneg    x7, x8, xzr, lo            // encoding: [0x07,0x35,0x9f,0xda]
+
+        cset w3, eq
+        cset x9, pl
+// CHECK: csinc    w3, wzr, wzr, ne           // encoding: [0xe3,0x17,0x9f,0x1a]
+// CHECK: csinc    x9, xzr, xzr, mi           // encoding: [0xe9,0x47,0x9f,0x9a]
+
+        csetm w20, ne
+        csetm x30, ge
+// CHECK: csinv    w20, wzr, wzr, eq          // encoding: [0xf4,0x03,0x9f,0x5a]
+// CHECK: csinv    x30, xzr, xzr, lt          // encoding: [0xfe,0xb3,0x9f,0xda]
+
+        cinc w3, w5, gt
+        cinc wzr, w4, le
+        cinc w9, wzr, lt
+// CHECK: csinc    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x1a]
+// CHECK: csinc    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x1a]
+// CHECK: csinc    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x1a]
+
+        cinc x3, x5, gt
+        cinc xzr, x4, le
+        cinc x9, xzr, lt
+// CHECK: csinc     x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0x9a]
+// CHECK: csinc     xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0x9a]
+// CHECK: csinc     x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0x9a]
+
+        cinv w3, w5, gt
+        cinv wzr, w4, le
+        cinv w9, wzr, lt
+// CHECK: csinv    w3, w5, w5, le             // encoding: [0xa3,0xd0,0x85,0x5a]
+// CHECK: csinv    wzr, w4, w4, gt            // encoding: [0x9f,0xc0,0x84,0x5a]
+// CHECK: csinv    w9, wzr, wzr, ge           // encoding: [0xe9,0xa3,0x9f,0x5a]
+
+        cinv x3, x5, gt
+        cinv xzr, x4, le
+        cinv x9, xzr, lt
+// CHECK: csinv    x3, x5, x5, le             // encoding: [0xa3,0xd0,0x85,0xda]
+// CHECK: csinv    xzr, x4, x4, gt            // encoding: [0x9f,0xc0,0x84,0xda]
+// CHECK: csinv    x9, xzr, xzr, ge           // encoding: [0xe9,0xa3,0x9f,0xda]
+
+        cneg w3, w5, gt
+        cneg wzr, w4, le
+        cneg w9, wzr, lt
+// CHECK: csneg    w3, w5, w5, le             // encoding: [0xa3,0xd4,0x85,0x5a]
+// CHECK: csneg    wzr, w4, w4, gt            // encoding: [0x9f,0xc4,0x84,0x5a]
+// CHECK: csneg    w9, wzr, wzr, ge           // encoding: [0xe9,0xa7,0x9f,0x5a]
+
+        cneg x3, x5, gt
+        cneg xzr, x4, le
+        cneg x9, xzr, lt
+// CHECK: csneg    x3, x5, x5, le             // encoding: [0xa3,0xd4,0x85,0xda]
+// CHECK: csneg    xzr, x4, x4, gt            // encoding: [0x9f,0xc4,0x84,0xda]
+// CHECK: csneg    x9, xzr, xzr, ge           // encoding: [0xe9,0xa7,0x9f,0xda]
+
+//------------------------------------------------------------------------------
+// Data-processing (1 source)
+//------------------------------------------------------------------------------
+
+	rbit	w0, w7
+	rbit	x18, x3
+	rev16	w17, w1
+	rev16	x5, x2
+	rev	w18, w0
+	rev32	x20, x1
+	rev32	x20, xzr
+// CHECK: rbit	w0, w7                       // encoding: [0xe0,0x00,0xc0,0x5a]
+// CHECK: rbit	x18, x3                      // encoding: [0x72,0x00,0xc0,0xda]
+// CHECK: rev16 w17, w1                      // encoding: [0x31,0x04,0xc0,0x5a]
+// CHECK: rev16	x5, x2                       // encoding: [0x45,0x04,0xc0,0xda]
+// CHECK: rev	w18, w0                      // encoding: [0x12,0x08,0xc0,0x5a]
+// CHECK: rev32	x20, x1                      // encoding: [0x34,0x08,0xc0,0xda]
+// CHECK: rev32	x20, xzr                     // encoding: [0xf4,0x0b,0xc0,0xda]
+
+	rev	x22, x2
+	rev	x18, xzr
+	rev	w7, wzr
+	clz	w24, w3
+	clz	x26, x4
+	cls	w3, w5
+	cls	x20, x5
+// CHECK: rev	x22, x2                      // encoding: [0x56,0x0c,0xc0,0xda]
+// CHECK: rev	x18, xzr                     // encoding: [0xf2,0x0f,0xc0,0xda]
+// CHECK: rev	w7, wzr                      // encoding: [0xe7,0x0b,0xc0,0x5a]
+// CHECK: clz	w24, w3                      // encoding: [0x78,0x10,0xc0,0x5a]
+// CHECK: clz	x26, x4                      // encoding: [0x9a,0x10,0xc0,0xda]
+// CHECK: cls	w3, w5                       // encoding: [0xa3,0x14,0xc0,0x5a]
+// CHECK: cls	x20, x5                      // encoding: [0xb4,0x14,0xc0,0xda]
+
+	clz	w24, wzr
+	rev	x22, xzr
+// CHECK: clz	w24, wzr                     // encoding: [0xf8,0x13,0xc0,0x5a]
+// CHECK: rev	x22, xzr                     // encoding: [0xf6,0x0f,0xc0,0xda]
+
+//------------------------------------------------------------------------------
+// Data-processing (2 source)
+//------------------------------------------------------------------------------
+
+        crc32b  w5, w7, w20
+        crc32h  w28, wzr, w30
+        crc32w  w0, w1, w2
+        crc32x  w7, w9, x20
+        crc32cb w9, w5, w4
+        crc32ch w13, w17, w25
+        crc32cw wzr, w3, w5
+        crc32cx w18, w16, xzr
+// CHECK: crc32b   w5, w7, w20             // encoding: [0xe5,0x40,0xd4,0x1a]
+// CHECK: crc32h   w28, wzr, w30           // encoding: [0xfc,0x47,0xde,0x1a]
+// CHECK: crc32w   w0, w1, w2              // encoding: [0x20,0x48,0xc2,0x1a]
+// CHECK: crc32x   w7, w9, x20             // encoding: [0x27,0x4d,0xd4,0x9a]
+// CHECK: crc32cb  w9, w5, w4              // encoding: [0xa9,0x50,0xc4,0x1a]
+// CHECK: crc32ch  w13, w17, w25           // encoding: [0x2d,0x56,0xd9,0x1a]
+// CHECK: crc32cw  wzr, w3, w5             // encoding: [0x7f,0x58,0xc5,0x1a]
+// CHECK: crc32cx  w18, w16, xzr           // encoding: [0x12,0x5e,0xdf,0x9a]
+
+        udiv	w0, w7, w10
+        udiv	x9, x22, x4
+        sdiv	w12, w21, w0
+        sdiv	x13, x2, x1
+        lslv	w11, w12, w13
+        lslv	x14, x15, x16
+        lsrv	w17, w18, w19
+        lsrv	x20, x21, x22
+        asrv	w23, w24, w25
+        asrv	x26, x27, x28
+        rorv	w0, w1, w2
+        rorv    x3, x4, x5
+
+
+// CHECK: udiv	w0, w7, w10                   // encoding: [0xe0,0x08,0xca,0x1a]
+// CHECK: udiv	x9, x22, x4                   // encoding: [0xc9,0x0a,0xc4,0x9a]
+// CHECK: sdiv	w12, w21, w0                  // encoding: [0xac,0x0e,0xc0,0x1a]
+// CHECK: sdiv	x13, x2, x1                   // encoding: [0x4d,0x0c,0xc1,0x9a]
+// CHECK: lsl	w11, w12, w13                 // encoding: [0x8b,0x21,0xcd,0x1a]
+// CHECK: lsl	x14, x15, x16                 // encoding: [0xee,0x21,0xd0,0x9a]
+// CHECK: lsr	w17, w18, w19                 // encoding: [0x51,0x26,0xd3,0x1a]
+// CHECK: lsr	x20, x21, x22                 // encoding: [0xb4,0x26,0xd6,0x9a]
+// CHECK: asr	w23, w24, w25                 // encoding: [0x17,0x2b,0xd9,0x1a]
+// CHECK: asr	x26, x27, x28                 // encoding: [0x7a,0x2b,0xdc,0x9a]
+// CHECK: ror	w0, w1, w2                    // encoding: [0x20,0x2c,0xc2,0x1a]
+// CHECK: ror  x3, x4, x5                     // encoding: [0x83,0x2c,0xc5,0x9a]
+
+
+        lsl	w6, w7, w8
+        lsl	x9, x10, x11
+        lsr	w12, w13, w14
+        lsr	x15, x16, x17
+        asr	w18, w19, w20
+        asr	x21, x22, x23
+        ror	w24, w25, w26
+        ror	x27, x28, x29
+// CHECK: lsl	w6, w7, w8                    // encoding: [0xe6,0x20,0xc8,0x1a]
+// CHECK: lsl	x9, x10, x11                  // encoding: [0x49,0x21,0xcb,0x9a]
+// CHECK: lsr	w12, w13, w14                 // encoding: [0xac,0x25,0xce,0x1a]
+// CHECK: lsr	x15, x16, x17                 // encoding: [0x0f,0x26,0xd1,0x9a]
+// CHECK: asr	w18, w19, w20                 // encoding: [0x72,0x2a,0xd4,0x1a]
+// CHECK: asr	x21, x22, x23                 // encoding: [0xd5,0x2a,0xd7,0x9a]
+// CHECK: ror	w24, w25, w26                 // encoding: [0x38,0x2f,0xda,0x1a]
+// CHECK: ror	x27, x28, x29                 // encoding: [0x9b,0x2f,0xdd,0x9a]
+
+        madd w1, w3, w7, w4
+        madd wzr, w0, w9, w11
+        madd w13, wzr, w4, w4
+        madd w19, w30, wzr, w29
+        madd w4, w5, w6, wzr
+// CHECK: madd     w1, w3, w7, w4             // encoding: [0x61,0x10,0x07,0x1b]
+// CHECK: madd     wzr, w0, w9, w11           // encoding: [0x1f,0x2c,0x09,0x1b]
+// CHECK: madd     w13, wzr, w4, w4           // encoding: [0xed,0x13,0x04,0x1b]
+// CHECK: madd     w19, w30, wzr, w29         // encoding: [0xd3,0x77,0x1f,0x1b]
+// CHECK: mul      w4, w5, w6                 // encoding: [0xa4,0x7c,0x06,0x1b]
+
+        madd x1, x3, x7, x4
+        madd xzr, x0, x9, x11
+        madd x13, xzr, x4, x4
+        madd x19, x30, xzr, x29
+        madd x4, x5, x6, xzr
+// CHECK: madd     x1, x3, x7, x4             // encoding: [0x61,0x10,0x07,0x9b]
+// CHECK: madd     xzr, x0, x9, x11           // encoding: [0x1f,0x2c,0x09,0x9b]
+// CHECK: madd     x13, xzr, x4, x4           // encoding: [0xed,0x13,0x04,0x9b]
+// CHECK: madd     x19, x30, xzr, x29         // encoding: [0xd3,0x77,0x1f,0x9b]
+// CHECK: mul      x4, x5, x6                 // encoding: [0xa4,0x7c,0x06,0x9b]
+
+        msub w1, w3, w7, w4
+        msub wzr, w0, w9, w11
+        msub w13, wzr, w4, w4
+        msub w19, w30, wzr, w29
+        msub w4, w5, w6, wzr
+// CHECK: msub     w1, w3, w7, w4             // encoding: [0x61,0x90,0x07,0x1b]
+// CHECK: msub     wzr, w0, w9, w11           // encoding: [0x1f,0xac,0x09,0x1b]
+// CHECK: msub     w13, wzr, w4, w4           // encoding: [0xed,0x93,0x04,0x1b]
+// CHECK: msub     w19, w30, wzr, w29         // encoding: [0xd3,0xf7,0x1f,0x1b]
+// CHECK: mneg     w4, w5, w6                 // encoding: [0xa4,0xfc,0x06,0x1b]
+
+        msub x1, x3, x7, x4
+        msub xzr, x0, x9, x11
+        msub x13, xzr, x4, x4
+        msub x19, x30, xzr, x29
+        msub x4, x5, x6, xzr
+// CHECK: msub     x1, x3, x7, x4             // encoding: [0x61,0x90,0x07,0x9b]
+// CHECK: msub     xzr, x0, x9, x11           // encoding: [0x1f,0xac,0x09,0x9b]
+// CHECK: msub     x13, xzr, x4, x4           // encoding: [0xed,0x93,0x04,0x9b]
+// CHECK: msub     x19, x30, xzr, x29         // encoding: [0xd3,0xf7,0x1f,0x9b]
+// CHECK: mneg     x4, x5, x6                 // encoding: [0xa4,0xfc,0x06,0x9b]
+
+        smaddl x3, w5, w2, x9
+        smaddl xzr, w10, w11, x12
+        smaddl x13, wzr, w14, x15
+        smaddl x16, w17, wzr, x18
+        smaddl x19, w20, w21, xzr
+// CHECK: smaddl   x3, w5, w2, x9             // encoding: [0xa3,0x24,0x22,0x9b]
+// CHECK: smaddl   xzr, w10, w11, x12         // encoding: [0x5f,0x31,0x2b,0x9b]
+// CHECK: smaddl   x13, wzr, w14, x15         // encoding: [0xed,0x3f,0x2e,0x9b]
+// CHECK: smaddl   x16, w17, wzr, x18         // encoding: [0x30,0x4a,0x3f,0x9b]
+// CHECK: smull    x19, w20, w21              // encoding: [0x93,0x7e,0x35,0x9b]
+
+        smsubl x3, w5, w2, x9
+        smsubl xzr, w10, w11, x12
+        smsubl x13, wzr, w14, x15
+        smsubl x16, w17, wzr, x18
+        smsubl x19, w20, w21, xzr
+// CHECK: smsubl   x3, w5, w2, x9             // encoding: [0xa3,0xa4,0x22,0x9b]
+// CHECK: smsubl   xzr, w10, w11, x12         // encoding: [0x5f,0xb1,0x2b,0x9b]
+// CHECK: smsubl   x13, wzr, w14, x15         // encoding: [0xed,0xbf,0x2e,0x9b]
+// CHECK: smsubl   x16, w17, wzr, x18         // encoding: [0x30,0xca,0x3f,0x9b]
+// CHECK: smnegl   x19, w20, w21              // encoding: [0x93,0xfe,0x35,0x9b]
+
+        umaddl x3, w5, w2, x9
+        umaddl xzr, w10, w11, x12
+        umaddl x13, wzr, w14, x15
+        umaddl x16, w17, wzr, x18
+        umaddl x19, w20, w21, xzr
+// CHECK: umaddl   x3, w5, w2, x9             // encoding: [0xa3,0x24,0xa2,0x9b]
+// CHECK: umaddl   xzr, w10, w11, x12         // encoding: [0x5f,0x31,0xab,0x9b]
+// CHECK: umaddl   x13, wzr, w14, x15         // encoding: [0xed,0x3f,0xae,0x9b]
+// CHECK: umaddl   x16, w17, wzr, x18         // encoding: [0x30,0x4a,0xbf,0x9b]
+// CHECK: umull    x19, w20, w21              // encoding: [0x93,0x7e,0xb5,0x9b]
+
+
+
+        umsubl x3, w5, w2, x9
+        umsubl xzr, w10, w11, x12
+        umsubl x13, wzr, w14, x15
+        umsubl x16, w17, wzr, x18
+        umsubl x19, w20, w21, xzr
+// CHECK: umsubl   x3, w5, w2, x9             // encoding: [0xa3,0xa4,0xa2,0x9b]
+// CHECK: umsubl   xzr, w10, w11, x12         // encoding: [0x5f,0xb1,0xab,0x9b]
+// CHECK: umsubl   x13, wzr, w14, x15         // encoding: [0xed,0xbf,0xae,0x9b]
+// CHECK: umsubl   x16, w17, wzr, x18         // encoding: [0x30,0xca,0xbf,0x9b]
+// CHECK: umnegl   x19, w20, w21              // encoding: [0x93,0xfe,0xb5,0x9b]
+
+        smulh x30, x29, x28
+        smulh xzr, x27, x26
+        smulh x25, xzr, x24
+        smulh x23, x22, xzr
+// CHECK: smulh    x30, x29, x28              // encoding: [0xbe,0x7f,0x5c,0x9b]
+// CHECK: smulh    xzr, x27, x26              // encoding: [0x7f,0x7f,0x5a,0x9b]
+// CHECK: smulh    x25, xzr, x24              // encoding: [0xf9,0x7f,0x58,0x9b]
+// CHECK: smulh    x23, x22, xzr              // encoding: [0xd7,0x7e,0x5f,0x9b]
+
+        umulh x30, x29, x28
+        umulh xzr, x27, x26
+        umulh x25, xzr, x24
+        umulh x23, x22, xzr
+// CHECK: umulh    x30, x29, x28              // encoding: [0xbe,0x7f,0xdc,0x9b]
+// CHECK: umulh    xzr, x27, x26              // encoding: [0x7f,0x7f,0xda,0x9b]
+// CHECK: umulh    x25, xzr, x24              // encoding: [0xf9,0x7f,0xd8,0x9b]
+// CHECK: umulh    x23, x22, xzr              // encoding: [0xd7,0x7e,0xdf,0x9b]
+
+        mul w3, w4, w5
+        mul wzr, w6, w7
+        mul w8, wzr, w9
+        mul w10, w11, wzr
+
+        mul x12, x13, x14
+        mul xzr, x15, x16
+        mul x17, xzr, x18
+        mul x19, x20, xzr
+
+        mneg w21, w22, w23
+        mneg wzr, w24, w25
+        mneg w26, wzr, w27
+        mneg w28, w29, wzr
+
+        smull x11, w13, w17
+        umull x11, w13, w17
+        smnegl x11, w13, w17
+        umnegl x11, w13, w17
+// CHECK: mul      w3, w4, w5                 // encoding: [0x83,0x7c,0x05,0x1b]
+// CHECK: mul      wzr, w6, w7                // encoding: [0xdf,0x7c,0x07,0x1b]
+// CHECK: mul      w8, wzr, w9                // encoding: [0xe8,0x7f,0x09,0x1b]
+// CHECK: mul      w10, w11, wzr              // encoding: [0x6a,0x7d,0x1f,0x1b]
+// CHECK: mul      x12, x13, x14              // encoding: [0xac,0x7d,0x0e,0x9b]
+// CHECK: mul      xzr, x15, x16              // encoding: [0xff,0x7d,0x10,0x9b]
+// CHECK: mul      x17, xzr, x18              // encoding: [0xf1,0x7f,0x12,0x9b]
+// CHECK: mul      x19, x20, xzr              // encoding: [0x93,0x7e,0x1f,0x9b]
+// CHECK: mneg     w21, w22, w23              // encoding: [0xd5,0xfe,0x17,0x1b]
+// CHECK: mneg     wzr, w24, w25              // encoding: [0x1f,0xff,0x19,0x1b]
+// CHECK: mneg     w26, wzr, w27              // encoding: [0xfa,0xff,0x1b,0x1b]
+// CHECK: mneg     w28, w29, wzr              // encoding: [0xbc,0xff,0x1f,0x1b]
+// CHECK: smull    x11, w13, w17              // encoding: [0xab,0x7d,0x31,0x9b]
+// CHECK: umull    x11, w13, w17              // encoding: [0xab,0x7d,0xb1,0x9b]
+// CHECK: smnegl   x11, w13, w17              // encoding: [0xab,0xfd,0x31,0x9b]
+// CHECK: umnegl   x11, w13, w17              // encoding: [0xab,0xfd,0xb1,0x9b]
+
+//------------------------------------------------------------------------------
+// Exception generation
+//------------------------------------------------------------------------------
+        svc #0
+        svc #65535
+// CHECK: svc      #0                         // encoding: [0x01,0x00,0x00,0xd4]
+// CHECK: svc      #65535                     // encoding: [0xe1,0xff,0x1f,0xd4]
+
+        hvc #1
+        smc #12000
+        brk #12
+        hlt #123
+// CHECK: hvc      #1                         // encoding: [0x22,0x00,0x00,0xd4]
+// CHECK: smc      #12000                     // encoding: [0x03,0xdc,0x05,0xd4]
+// CHECK: brk      #12                        // encoding: [0x80,0x01,0x20,0xd4]
+// CHECK: hlt      #123                       // encoding: [0x60,0x0f,0x40,0xd4]
+
+        dcps1 #42
+        dcps2 #9
+        dcps3 #1000
+// CHECK: dcps1    #42                        // encoding: [0x41,0x05,0xa0,0xd4]
+// CHECK: dcps2    #9                         // encoding: [0x22,0x01,0xa0,0xd4]
+// CHECK: dcps3    #1000                      // encoding: [0x03,0x7d,0xa0,0xd4]
+
+        dcps1
+        dcps2
+        dcps3
+// CHECK: dcps1                               // encoding: [0x01,0x00,0xa0,0xd4]
+// CHECK: dcps2                               // encoding: [0x02,0x00,0xa0,0xd4]
+// CHECK: dcps3                               // encoding: [0x03,0x00,0xa0,0xd4]
+
+//------------------------------------------------------------------------------
+// Extract (immediate)
+//------------------------------------------------------------------------------
+
+        extr w3, w5, w7, #0
+        extr w11, w13, w17, #31
+// CHECK: extr     w3, w5, w7, #0             // encoding: [0xa3,0x00,0x87,0x13]
+// CHECK: extr     w11, w13, w17, #31         // encoding: [0xab,0x7d,0x91,0x13]
+
+        extr x3, x5, x7, #15
+        extr x11, x13, x17, #63
+// CHECK: extr     x3, x5, x7, #15            // encoding: [0xa3,0x3c,0xc7,0x93]
+// CHECK: extr     x11, x13, x17, #63         // encoding: [0xab,0xfd,0xd1,0x93]
+
+        ror x19, x23, #24
+        ror x29, xzr, #63
+// CHECK: extr     x19, x23, x23, #24         // encoding: [0xf3,0x62,0xd7,0x93]
+// CHECK: extr     x29, xzr, xzr, #63         // encoding: [0xfd,0xff,0xdf,0x93]
+
+        ror w9, w13, #31
+// CHECK: extr     w9, w13, w13, #31          // encoding: [0xa9,0x7d,0x8d,0x13]
+
+//------------------------------------------------------------------------------
+// Floating-point compare
+//------------------------------------------------------------------------------
+
+        fcmp s3, s5
+        fcmp s31, #0.0
+// CHECK: fcmp    s3, s5                  // encoding: [0x60,0x20,0x25,0x1e]
+// CHECK: fcmp    s31, #0.0               // encoding: [0xe8,0x23,0x20,0x1e]
+
+        fcmpe s29, s30
+        fcmpe s15, #0.0
+// CHECK: fcmpe   s29, s30                // encoding: [0xb0,0x23,0x3e,0x1e]
+// CHECK: fcmpe   s15, #0.0               // encoding: [0xf8,0x21,0x20,0x1e]
+
+        fcmp d4, d12
+        fcmp d23, #0.0
+// CHECK: fcmp    d4, d12                 // encoding: [0x80,0x20,0x6c,0x1e]
+// CHECK: fcmp    d23, #0.0               // encoding: [0xe8,0x22,0x60,0x1e]
+
+        fcmpe d26, d22
+        fcmpe d29, #0.0
+// CHECK: fcmpe   d26, d22                // encoding: [0x50,0x23,0x76,0x1e]
+// CHECK: fcmpe   d29, #0.0               // encoding: [0xb8,0x23,0x60,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fccmp s1, s31, #0, eq
+        fccmp s3, s0, #15, hs
+        fccmp s31, s15, #13, cs
+// CHECK: fccmp    s1, s31, #0, eq         // encoding: [0x20,0x04,0x3f,0x1e]
+// CHECK: fccmp    s3, s0, #15, hs         // encoding: [0x6f,0x24,0x20,0x1e]
+// CHECK: fccmp    s31, s15, #13, hs       // encoding: [0xed,0x27,0x2f,0x1e]
+
+        fccmp d9, d31, #0, le
+        fccmp d3, d0, #15, gt
+        fccmp d31, d5, #7, ne
+// CHECK: fccmp    d9, d31, #0, le         // encoding: [0x20,0xd5,0x7f,0x1e]
+// CHECK: fccmp    d3, d0, #15, gt         // encoding: [0x6f,0xc4,0x60,0x1e]
+// CHECK: fccmp    d31, d5, #7, ne         // encoding: [0xe7,0x17,0x65,0x1e]
+
+        fccmpe s1, s31, #0, eq
+        fccmpe s3, s0, #15, hs
+        fccmpe s31, s15, #13, cs
+// CHECK: fccmpe    s1, s31, #0, eq         // encoding: [0x30,0x04,0x3f,0x1e]
+// CHECK: fccmpe    s3, s0, #15, hs         // encoding: [0x7f,0x24,0x20,0x1e]
+// CHECK: fccmpe    s31, s15, #13, hs       // encoding: [0xfd,0x27,0x2f,0x1e]
+
+        fccmpe d9, d31, #0, le
+        fccmpe d3, d0, #15, gt
+        fccmpe d31, d5, #7, ne
+// CHECK: fccmpe    d9, d31, #0, le         // encoding: [0x30,0xd5,0x7f,0x1e]
+// CHECK: fccmpe    d3, d0, #15, gt         // encoding: [0x7f,0xc4,0x60,0x1e]
+// CHECK: fccmpe    d31, d5, #7, ne         // encoding: [0xf7,0x17,0x65,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point conditional compare
+//------------------------------------------------------------------------------
+
+        fcsel s3, s20, s9, pl
+        fcsel d9, d10, d11, mi
+// CHECK: fcsel   s3, s20, s9, pl         // encoding: [0x83,0x5e,0x29,0x1e]
+// CHECK: fcsel   d9, d10, d11, mi        // encoding: [0x49,0x4d,0x6b,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (1 source)
+//------------------------------------------------------------------------------
+
+        fmov s0, s1
+        fabs s2, s3
+        fneg s4, s5
+        fsqrt s6, s7
+        fcvt d8, s9
+        fcvt h10, s11
+        frintn s12, s13
+        frintp s14, s15
+        frintm s16, s17
+        frintz s18, s19
+        frinta s20, s21
+        frintx s22, s23
+        frinti s24, s25
+// CHECK: fmov     s0, s1                // encoding: [0x20,0x40,0x20,0x1e]
+// CHECK: fabs     s2, s3                // encoding: [0x62,0xc0,0x20,0x1e]
+// CHECK: fneg     s4, s5                     // encoding: [0xa4,0x40,0x21,0x1e]
+// CHECK: fsqrt    s6, s7                     // encoding: [0xe6,0xc0,0x21,0x1e]
+// CHECK: fcvt     d8, s9                     // encoding: [0x28,0xc1,0x22,0x1e]
+// CHECK: fcvt     h10, s11                   // encoding: [0x6a,0xc1,0x23,0x1e]
+// CHECK: frintn   s12, s13                   // encoding: [0xac,0x41,0x24,0x1e]
+// CHECK: frintp   s14, s15                   // encoding: [0xee,0xc1,0x24,0x1e]
+// CHECK: frintm   s16, s17                   // encoding: [0x30,0x42,0x25,0x1e]
+// CHECK: frintz   s18, s19                   // encoding: [0x72,0xc2,0x25,0x1e]
+// CHECK: frinta   s20, s21                   // encoding: [0xb4,0x42,0x26,0x1e]
+// CHECK: frintx   s22, s23                   // encoding: [0xf6,0x42,0x27,0x1e]
+// CHECK: frinti   s24, s25                   // encoding: [0x38,0xc3,0x27,0x1e]
+
+        fmov d0, d1
+        fabs d2, d3
+        fneg d4, d5
+        fsqrt d6, d7
+        fcvt s8, d9
+        fcvt h10, d11
+        frintn d12, d13
+        frintp d14, d15
+        frintm d16, d17
+        frintz d18, d19
+        frinta d20, d21
+        frintx d22, d23
+        frinti d24, d25
+// CHECK: fmov     d0, d1                     // encoding: [0x20,0x40,0x60,0x1e]
+// CHECK: fabs     d2, d3                     // encoding: [0x62,0xc0,0x60,0x1e]
+// CHECK: fneg     d4, d5                     // encoding: [0xa4,0x40,0x61,0x1e]
+// CHECK: fsqrt    d6, d7                     // encoding: [0xe6,0xc0,0x61,0x1e]
+// CHECK: fcvt     s8, d9                     // encoding: [0x28,0x41,0x62,0x1e]
+// CHECK: fcvt     h10, d11                   // encoding: [0x6a,0xc1,0x63,0x1e]
+// CHECK: frintn   d12, d13                   // encoding: [0xac,0x41,0x64,0x1e]
+// CHECK: frintp   d14, d15                   // encoding: [0xee,0xc1,0x64,0x1e]
+// CHECK: frintm   d16, d17                   // encoding: [0x30,0x42,0x65,0x1e]
+// CHECK: frintz   d18, d19                   // encoding: [0x72,0xc2,0x65,0x1e]
+// CHECK: frinta   d20, d21                   // encoding: [0xb4,0x42,0x66,0x1e]
+// CHECK: frintx   d22, d23                   // encoding: [0xf6,0x42,0x67,0x1e]
+// CHECK: frinti   d24, d25                   // encoding: [0x38,0xc3,0x67,0x1e]
+
+        fcvt s26, h27
+        fcvt d28, h29
+// CHECK: fcvt     s26, h27                   // encoding: [0x7a,0x43,0xe2,0x1e]
+// CHECK: fcvt     d28, h29                   // encoding: [0xbc,0xc3,0xe2,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (2 sources)
+//------------------------------------------------------------------------------
+
+        fmul s20, s19, s17
+        fdiv s1, s2, s3
+        fadd s4, s5, s6
+        fsub s7, s8, s9
+        fmax s10, s11, s12
+        fmin s13, s14, s15
+        fmaxnm s16, s17, s18
+        fminnm s19, s20, s21
+        fnmul s22, s23, s24
+// CHECK: fmul     s20, s19, s17              // encoding: [0x74,0x0a,0x31,0x1e]
+// CHECK: fdiv     s1, s2, s3                 // encoding: [0x41,0x18,0x23,0x1e]
+// CHECK: fadd     s4, s5, s6                 // encoding: [0xa4,0x28,0x26,0x1e]
+// CHECK: fsub     s7, s8, s9                 // encoding: [0x07,0x39,0x29,0x1e]
+// CHECK: fmax     s10, s11, s12              // encoding: [0x6a,0x49,0x2c,0x1e]
+// CHECK: fmin     s13, s14, s15              // encoding: [0xcd,0x59,0x2f,0x1e]
+// CHECK: fmaxnm   s16, s17, s18              // encoding: [0x30,0x6a,0x32,0x1e]
+// CHECK: fminnm   s19, s20, s21              // encoding: [0x93,0x7a,0x35,0x1e]
+// CHECK: fnmul    s22, s23, s24              // encoding: [0xf6,0x8a,0x38,0x1e]
+
+        fmul d20, d19, d17
+        fdiv d1, d2, d3
+        fadd d4, d5, d6
+        fsub d7, d8, d9
+        fmax d10, d11, d12
+        fmin d13, d14, d15
+        fmaxnm d16, d17, d18
+        fminnm d19, d20, d21
+        fnmul d22, d23, d24
+// CHECK: fmul     d20, d19, d17              // encoding: [0x74,0x0a,0x71,0x1e]
+// CHECK: fdiv     d1, d2, d3                 // encoding: [0x41,0x18,0x63,0x1e]
+// CHECK: fadd     d4, d5, d6                 // encoding: [0xa4,0x28,0x66,0x1e]
+// CHECK: fsub     d7, d8, d9                 // encoding: [0x07,0x39,0x69,0x1e]
+// CHECK: fmax     d10, d11, d12              // encoding: [0x6a,0x49,0x6c,0x1e]
+// CHECK: fmin     d13, d14, d15              // encoding: [0xcd,0x59,0x6f,0x1e]
+// CHECK: fmaxnm   d16, d17, d18              // encoding: [0x30,0x6a,0x72,0x1e]
+// CHECK: fminnm   d19, d20, d21              // encoding: [0x93,0x7a,0x75,0x1e]
+// CHECK: fnmul    d22, d23, d24              // encoding: [0xf6,0x8a,0x78,0x1e]
+
+//------------------------------------------------------------------------------
+// Floating-point data-processing (3 sources)
+//------------------------------------------------------------------------------
+
+        fmadd s3, s5, s6, s31
+        fmadd d3, d13, d0, d23
+        fmsub s3, s5, s6, s31
+        fmsub d3, d13, d0, d23
+        fnmadd s3, s5, s6, s31
+        fnmadd d3, d13, d0, d23
+        fnmsub s3, s5, s6, s31
+        fnmsub d3, d13, d0, d23
+// CHECK: fmadd   s3, s5, s6, s31         // encoding: [0xa3,0x7c,0x06,0x1f]
+// CHECK: fmadd   d3, d13, d0, d23        // encoding: [0xa3,0x5d,0x40,0x1f]
+// CHECK: fmsub   s3, s5, s6, s31         // encoding: [0xa3,0xfc,0x06,0x1f]
+// CHECK: fmsub   d3, d13, d0, d23        // encoding: [0xa3,0xdd,0x40,0x1f]
+// CHECK: fnmadd  s3, s5, s6, s31         // encoding: [0xa3,0x7c,0x26,0x1f]
+// CHECK: fnmadd  d3, d13, d0, d23        // encoding: [0xa3,0x5d,0x60,0x1f]
+// CHECK: fnmsub  s3, s5, s6, s31         // encoding: [0xa3,0xfc,0x26,0x1f]
+// CHECK: fnmsub  d3, d13, d0, d23        // encoding: [0xa3,0xdd,0x60,0x1f]
+
+//------------------------------------------------------------------------------
+// Floating-point <-> fixed-point conversion
+//------------------------------------------------------------------------------
+
+        fcvtzs w3, s5, #1
+        fcvtzs wzr, s20, #13
+        fcvtzs w19, s0, #32
+// CHECK: fcvtzs  w3, s5, #1              // encoding: [0xa3,0xfc,0x18,0x1e]
+// CHECK: fcvtzs  wzr, s20, #13           // encoding: [0x9f,0xce,0x18,0x1e]
+// CHECK: fcvtzs  w19, s0, #32            // encoding: [0x13,0x80,0x18,0x1e]
+
+        fcvtzs x3, s5, #1
+        fcvtzs x12, s30, #45
+        fcvtzs x19, s0, #64
+// CHECK: fcvtzs  x3, s5, #1              // encoding: [0xa3,0xfc,0x18,0x9e]
+// CHECK: fcvtzs  x12, s30, #45           // encoding: [0xcc,0x4f,0x18,0x9e]
+// CHECK: fcvtzs  x19, s0, #64            // encoding: [0x13,0x00,0x18,0x9e]
+
+        fcvtzs w3, d5, #1
+        fcvtzs wzr, d20, #13
+        fcvtzs w19, d0, #32
+// CHECK: fcvtzs  w3, d5, #1              // encoding: [0xa3,0xfc,0x58,0x1e]
+// CHECK: fcvtzs  wzr, d20, #13           // encoding: [0x9f,0xce,0x58,0x1e]
+// CHECK: fcvtzs  w19, d0, #32            // encoding: [0x13,0x80,0x58,0x1e]
+
+        fcvtzs x3, d5, #1
+        fcvtzs x12, d30, #45
+        fcvtzs x19, d0, #64
+// CHECK: fcvtzs  x3, d5, #1              // encoding: [0xa3,0xfc,0x58,0x9e]
+// CHECK: fcvtzs  x12, d30, #45           // encoding: [0xcc,0x4f,0x58,0x9e]
+// CHECK: fcvtzs  x19, d0, #64            // encoding: [0x13,0x00,0x58,0x9e]
+
+        fcvtzu w3, s5, #1
+        fcvtzu wzr, s20, #13
+        fcvtzu w19, s0, #32
+// CHECK: fcvtzu  w3, s5, #1              // encoding: [0xa3,0xfc,0x19,0x1e]
+// CHECK: fcvtzu  wzr, s20, #13           // encoding: [0x9f,0xce,0x19,0x1e]
+// CHECK: fcvtzu  w19, s0, #32            // encoding: [0x13,0x80,0x19,0x1e]
+
+        fcvtzu x3, s5, #1
+        fcvtzu x12, s30, #45
+        fcvtzu x19, s0, #64
+// CHECK: fcvtzu  x3, s5, #1              // encoding: [0xa3,0xfc,0x19,0x9e]
+// CHECK: fcvtzu  x12, s30, #45           // encoding: [0xcc,0x4f,0x19,0x9e]
+// CHECK: fcvtzu  x19, s0, #64            // encoding: [0x13,0x00,0x19,0x9e]
+
+        fcvtzu w3, d5, #1
+        fcvtzu wzr, d20, #13
+        fcvtzu w19, d0, #32
+// CHECK: fcvtzu  w3, d5, #1              // encoding: [0xa3,0xfc,0x59,0x1e]
+// CHECK: fcvtzu  wzr, d20, #13           // encoding: [0x9f,0xce,0x59,0x1e]
+// CHECK: fcvtzu  w19, d0, #32            // encoding: [0x13,0x80,0x59,0x1e]
+
+        fcvtzu x3, d5, #1
+        fcvtzu x12, d30, #45
+        fcvtzu x19, d0, #64
+// CHECK: fcvtzu  x3, d5, #1              // encoding: [0xa3,0xfc,0x59,0x9e]
+// CHECK: fcvtzu  x12, d30, #45           // encoding: [0xcc,0x4f,0x59,0x9e]
+// CHECK: fcvtzu  x19, d0, #64            // encoding: [0x13,0x00,0x59,0x9e]
+
+        scvtf s23, w19, #1
+        scvtf s31, wzr, #20
+        scvtf s14, w0, #32
+// CHECK: scvtf   s23, w19, #1            // encoding: [0x77,0xfe,0x02,0x1e]
+// CHECK: scvtf   s31, wzr, #20           // encoding: [0xff,0xb3,0x02,0x1e]
+// CHECK: scvtf   s14, w0, #32            // encoding: [0x0e,0x80,0x02,0x1e]
+
+        scvtf s23, x19, #1
+        scvtf s31, xzr, #20
+        scvtf s14, x0, #64
+// CHECK: scvtf   s23, x19, #1            // encoding: [0x77,0xfe,0x02,0x9e]
+// CHECK: scvtf   s31, xzr, #20           // encoding: [0xff,0xb3,0x02,0x9e]
+// CHECK: scvtf   s14, x0, #64            // encoding: [0x0e,0x00,0x02,0x9e]
+
+        scvtf d23, w19, #1
+        scvtf d31, wzr, #20
+        scvtf d14, w0, #32
+// CHECK: scvtf   d23, w19, #1            // encoding: [0x77,0xfe,0x42,0x1e]
+// CHECK: scvtf   d31, wzr, #20           // encoding: [0xff,0xb3,0x42,0x1e]
+// CHECK: scvtf   d14, w0, #32            // encoding: [0x0e,0x80,0x42,0x1e]
+
+        scvtf d23, x19, #1
+        scvtf d31, xzr, #20
+        scvtf d14, x0, #64
+// CHECK: scvtf   d23, x19, #1            // encoding: [0x77,0xfe,0x42,0x9e]
+// CHECK: scvtf   d31, xzr, #20           // encoding: [0xff,0xb3,0x42,0x9e]
+// CHECK: scvtf   d14, x0, #64            // encoding: [0x0e,0x00,0x42,0x9e]
+
+        ucvtf s23, w19, #1
+        ucvtf s31, wzr, #20
+        ucvtf s14, w0, #32
+// CHECK: ucvtf   s23, w19, #1            // encoding: [0x77,0xfe,0x03,0x1e]
+// CHECK: ucvtf   s31, wzr, #20           // encoding: [0xff,0xb3,0x03,0x1e]
+// CHECK: ucvtf   s14, w0, #32            // encoding: [0x0e,0x80,0x03,0x1e]
+
+        ucvtf s23, x19, #1
+        ucvtf s31, xzr, #20
+        ucvtf s14, x0, #64
+// CHECK: ucvtf   s23, x19, #1            // encoding: [0x77,0xfe,0x03,0x9e]
+// CHECK: ucvtf   s31, xzr, #20           // encoding: [0xff,0xb3,0x03,0x9e]
+// CHECK: ucvtf   s14, x0, #64            // encoding: [0x0e,0x00,0x03,0x9e]
+
+        ucvtf d23, w19, #1
+        ucvtf d31, wzr, #20
+        ucvtf d14, w0, #32
+// CHECK: ucvtf   d23, w19, #1            // encoding: [0x77,0xfe,0x43,0x1e]
+// CHECK: ucvtf   d31, wzr, #20           // encoding: [0xff,0xb3,0x43,0x1e]
+// CHECK: ucvtf   d14, w0, #32            // encoding: [0x0e,0x80,0x43,0x1e]
+
+        ucvtf d23, x19, #1
+        ucvtf d31, xzr, #20
+        ucvtf d14, x0, #64
+// CHECK: ucvtf   d23, x19, #1            // encoding: [0x77,0xfe,0x43,0x9e]
+// CHECK: ucvtf   d31, xzr, #20           // encoding: [0xff,0xb3,0x43,0x9e]
+// CHECK: ucvtf   d14, x0, #64            // encoding: [0x0e,0x00,0x43,0x9e]
+
+//------------------------------------------------------------------------------
+// Floating-point <-> integer conversion
+//------------------------------------------------------------------------------
+        fcvtns w3, s31
+        fcvtns xzr, s12
+        fcvtnu wzr, s12
+        fcvtnu x0, s0
+// CHECK: fcvtns   w3, s31                    // encoding: [0xe3,0x03,0x20,0x1e]
+// CHECK: fcvtns   xzr, s12                   // encoding: [0x9f,0x01,0x20,0x9e]
+// CHECK: fcvtnu   wzr, s12                   // encoding: [0x9f,0x01,0x21,0x1e]
+// CHECK: fcvtnu   x0, s0                     // encoding: [0x00,0x00,0x21,0x9e]
+
+        fcvtps wzr, s9
+        fcvtps x12, s20
+        fcvtpu w30, s23
+        fcvtpu x29, s3
+// CHECK: fcvtps   wzr, s9                    // encoding: [0x3f,0x01,0x28,0x1e]
+// CHECK: fcvtps   x12, s20                   // encoding: [0x8c,0x02,0x28,0x9e]
+// CHECK: fcvtpu   w30, s23                   // encoding: [0xfe,0x02,0x29,0x1e]
+// CHECK: fcvtpu   x29, s3                    // encoding: [0x7d,0x00,0x29,0x9e]
+
+        fcvtms w2, s3
+        fcvtms x4, s5
+        fcvtmu w6, s7
+        fcvtmu x8, s9
+// CHECK: fcvtms   w2, s3                     // encoding: [0x62,0x00,0x30,0x1e]
+// CHECK: fcvtms   x4, s5                     // encoding: [0xa4,0x00,0x30,0x9e]
+// CHECK: fcvtmu   w6, s7                     // encoding: [0xe6,0x00,0x31,0x1e]
+// CHECK: fcvtmu   x8, s9                     // encoding: [0x28,0x01,0x31,0x9e]
+
+        fcvtzs w10, s11
+        fcvtzs x12, s13
+        fcvtzu w14, s15
+        fcvtzu x15, s16
+// CHECK: fcvtzs   w10, s11                   // encoding: [0x6a,0x01,0x38,0x1e]
+// CHECK: fcvtzs   x12, s13                   // encoding: [0xac,0x01,0x38,0x9e]
+// CHECK: fcvtzu   w14, s15                   // encoding: [0xee,0x01,0x39,0x1e]
+// CHECK: fcvtzu   x15, s16                   // encoding: [0x0f,0x02,0x39,0x9e]
+
+        scvtf s17, w18
+        scvtf s19, x20
+        ucvtf s21, w22
+        scvtf s23, x24
+// CHECK: scvtf    s17, w18                   // encoding: [0x51,0x02,0x22,0x1e]
+// CHECK: scvtf    s19, x20                   // encoding: [0x93,0x02,0x22,0x9e]
+// CHECK: ucvtf    s21, w22                   // encoding: [0xd5,0x02,0x23,0x1e]
+// CHECK: scvtf    s23, x24                   // encoding: [0x17,0x03,0x22,0x9e]
+
+        fcvtas w25, s26
+        fcvtas x27, s28
+        fcvtau w29, s30
+        fcvtau xzr, s0
+// CHECK: fcvtas   w25, s26                   // encoding: [0x59,0x03,0x24,0x1e]
+// CHECK: fcvtas   x27, s28                   // encoding: [0x9b,0x03,0x24,0x9e]
+// CHECK: fcvtau   w29, s30                   // encoding: [0xdd,0x03,0x25,0x1e]
+// CHECK: fcvtau   xzr, s0                    // encoding: [0x1f,0x00,0x25,0x9e]
+
+        fcvtns w3, d31
+        fcvtns xzr, d12
+        fcvtnu wzr, d12
+        fcvtnu x0, d0
+// CHECK: fcvtns   w3, d31                    // encoding: [0xe3,0x03,0x60,0x1e]
+// CHECK: fcvtns   xzr, d12                   // encoding: [0x9f,0x01,0x60,0x9e]
+// CHECK: fcvtnu   wzr, d12                   // encoding: [0x9f,0x01,0x61,0x1e]
+// CHECK: fcvtnu   x0, d0                     // encoding: [0x00,0x00,0x61,0x9e]
+
+        fcvtps wzr, d9
+        fcvtps x12, d20
+        fcvtpu w30, d23
+        fcvtpu x29, d3
+// CHECK: fcvtps   wzr, d9                    // encoding: [0x3f,0x01,0x68,0x1e]
+// CHECK: fcvtps   x12, d20                   // encoding: [0x8c,0x02,0x68,0x9e]
+// CHECK: fcvtpu   w30, d23                   // encoding: [0xfe,0x02,0x69,0x1e]
+// CHECK: fcvtpu   x29, d3                    // encoding: [0x7d,0x00,0x69,0x9e]
+
+        fcvtms w2, d3
+        fcvtms x4, d5
+        fcvtmu w6, d7
+        fcvtmu x8, d9
+// CHECK: fcvtms   w2, d3                     // encoding: [0x62,0x00,0x70,0x1e]
+// CHECK: fcvtms   x4, d5                     // encoding: [0xa4,0x00,0x70,0x9e]
+// CHECK: fcvtmu   w6, d7                     // encoding: [0xe6,0x00,0x71,0x1e]
+// CHECK: fcvtmu   x8, d9                     // encoding: [0x28,0x01,0x71,0x9e]
+
+        fcvtzs w10, d11
+        fcvtzs x12, d13
+        fcvtzu w14, d15
+        fcvtzu x15, d16
+// CHECK: fcvtzs   w10, d11                   // encoding: [0x6a,0x01,0x78,0x1e]
+// CHECK: fcvtzs   x12, d13                   // encoding: [0xac,0x01,0x78,0x9e]
+// CHECK: fcvtzu   w14, d15                   // encoding: [0xee,0x01,0x79,0x1e]
+// CHECK: fcvtzu   x15, d16                   // encoding: [0x0f,0x02,0x79,0x9e]
+
+        scvtf d17, w18
+        scvtf d19, x20
+        ucvtf d21, w22
+        ucvtf d23, x24
+// CHECK: scvtf    d17, w18                   // encoding: [0x51,0x02,0x62,0x1e]
+// CHECK: scvtf    d19, x20                   // encoding: [0x93,0x02,0x62,0x9e]
+// CHECK: ucvtf    d21, w22                   // encoding: [0xd5,0x02,0x63,0x1e]
+// CHECK: ucvtf    d23, x24                   // encoding: [0x17,0x03,0x63,0x9e]
+
+        fcvtas w25, d26
+        fcvtas x27, d28
+        fcvtau w29, d30
+        fcvtau xzr, d0
+// CHECK: fcvtas   w25, d26                   // encoding: [0x59,0x03,0x64,0x1e]
+// CHECK: fcvtas   x27, d28                   // encoding: [0x9b,0x03,0x64,0x9e]
+// CHECK: fcvtau   w29, d30                   // encoding: [0xdd,0x03,0x65,0x1e]
+// CHECK: fcvtau   xzr, d0                    // encoding: [0x1f,0x00,0x65,0x9e]
+
+        fmov w3, s9
+        fmov s9, w3
+// CHECK: fmov     w3, s9                     // encoding: [0x23,0x01,0x26,0x1e]
+// CHECK: fmov     s9, w3                     // encoding: [0x69,0x00,0x27,0x1e]
+
+        fmov x20, d31
+        fmov d1, x15
+// CHECK: fmov     x20, d31                   // encoding: [0xf4,0x03,0x66,0x9e]
+// CHECK: fmov     d1, x15                    // encoding: [0xe1,0x01,0x67,0x9e]
+
+        fmov x3, v12.d[1]
+        fmov v1.d[1], x19
+        fmov v3.2d[1], xzr
+// CHECK: fmov     x3, v12.d[1]               // encoding: [0x83,0x01,0xae,0x9e]
+// CHECK: fmov     v1.d[1], x19               // encoding: [0x61,0x02,0xaf,0x9e]
+// CHECK: fmov     v3.d[1], xzr               // encoding: [0xe3,0x03,0xaf,0x9e]
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+
+        fmov s2, #0.125
+        fmov s3, #1.0
+        fmov d30, #16.0
+// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+
+        fmov s4, #1.0625
+        fmov d10, #1.9375
+// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+
+        fmov s12, #-1.0
+// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+
+        fmov d16, #8.5
+// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+
+//------------------------------------------------------------------------------
+// Load-register (literal)
+//------------------------------------------------------------------------------
+        ldr w3, here
+        ldr x29, there
+        ldrsw xzr, everywhere
+// CHECK: ldr     w3, here                // encoding: [0x03'A',A,A,0x18'A']
+// CHECK:                                 //   fixup A - offset: 0, value: here, kind: fixup_a64_ld_prel
+// CHECK: ldr     x29, there              // encoding: [0x1d'A',A,A,0x58'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_ld_prel
+// CHECK: ldrsw   xzr, everywhere         // encoding: [0x1f'A',A,A,0x98'A']
+// CHECK:                                 //   fixup A - offset: 0, value: everywhere, kind: fixup_a64_ld_prel
+
+        ldr s0, who_knows
+        ldr d0, i_dont
+        ldr q0, there_must_be_a_better_way
+// CHECK: ldr     s0, who_knows           // encoding: [A,A,A,0x1c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: who_knows, kind: fixup_a64_ld_prel
+// CHECK: ldr     d0, i_dont              // encoding: [A,A,A,0x5c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: i_dont, kind: fixup_a64_ld_prel
+// CHECK: ldr     q0, there_must_be_a_better_way // encoding: [A,A,A,0x9c'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_a64_ld_prel
+
+        ldr w0, #1048572
+        ldr x10, #-1048576
+// CHECK: ldr     w0, #1048572            // encoding: [0xe0,0xff,0x7f,0x18]
+// CHECK: ldr     x10, #-1048576          // encoding: [0x0a,0x00,0x80,0x58]
+
+        prfm pldl1strm, nowhere
+        prfm #22, somewhere
+// CHECK: prfm    pldl1strm, nowhere      // encoding: [0x01'A',A,A,0xd8'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_ld_prel
+// CHECK: prfm    #22, somewhere          // encoding: [0x16'A',A,A,0xd8'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_ld_prel
+
+//------------------------------------------------------------------------------
+// Floating-point immediate
+//------------------------------------------------------------------------------
+
+        fmov s2, #0.125
+        fmov s3, #1.0
+        fmov d30, #16.0
+// CHECK: fmov     s2, #0.12500000            // encoding: [0x02,0x10,0x28,0x1e]
+// CHECK: fmov     s3, #1.00000000            // encoding: [0x03,0x10,0x2e,0x1e]
+// CHECK: fmov     d30, #16.00000000          // encoding: [0x1e,0x10,0x66,0x1e]
+
+        fmov s4, #1.0625
+        fmov d10, #1.9375
+// CHECK: fmov     s4, #1.06250000            // encoding: [0x04,0x30,0x2e,0x1e]
+// CHECK: fmov     d10, #1.93750000           // encoding: [0x0a,0xf0,0x6f,0x1e]
+
+        fmov s12, #-1.0
+// CHECK: fmov     s12, #-1.00000000          // encoding: [0x0c,0x10,0x3e,0x1e]
+
+        fmov d16, #8.5
+// CHECK: fmov     d16, #8.50000000           // encoding: [0x10,0x30,0x64,0x1e]
+
+//------------------------------------------------------------------------------
+// Load/store exclusive
+//------------------------------------------------------------------------------
+
+        stxrb      w1, w2, [x3, #0]
+        stxrh      w2, w3, [x4]
+        stxr       wzr, w4, [sp]
+        stxr       w5, x6, [x7]
+// CHECK: stxrb    w1, w2, [x3]              // encoding: [0x62,0x7c,0x01,0x08]
+// CHECK: stxrh    w2, w3, [x4]              // encoding: [0x83,0x7c,0x02,0x48]
+// CHECK: stxr     wzr, w4, [sp]             // encoding: [0xe4,0x7f,0x1f,0x88]
+// CHECK: stxr     w5, x6, [x7]              // encoding: [0xe6,0x7c,0x05,0xc8]
+
+        ldxrb      w7, [x9]
+        ldxrh      wzr, [x10]
+        ldxr       w9, [sp]
+        ldxr       x10, [x11]
+// CHECK: ldxrb    w7, [x9]                  // encoding: [0x27,0x7d,0x5f,0x08]
+// CHECK: ldxrh    wzr, [x10]                // encoding: [0x5f,0x7d,0x5f,0x48]
+// CHECK: ldxr     w9, [sp]                  // encoding: [0xe9,0x7f,0x5f,0x88]
+// CHECK: ldxr     x10, [x11]                // encoding: [0x6a,0x7d,0x5f,0xc8]
+
+        stxp       w11, w12, w13, [x14]
+        stxp       wzr, x23, x14, [x15]
+// CHECK: stxp     w11, w12, w13, [x14]      // encoding: [0xcc,0x35,0x2b,0x88]
+// CHECK: stxp     wzr, x23, x14, [x15]      // encoding: [0xf7,0x39,0x3f,0xc8]
+
+        ldxp       w12, wzr, [sp]
+        ldxp       x13, x14, [x15]
+// CHECK: ldxp     w12, wzr, [sp]            // encoding: [0xec,0x7f,0x7f,0x88]
+// CHECK: ldxp     x13, x14, [x15]           // encoding: [0xed,0x39,0x7f,0xc8]
+
+        stlxrb     w14, w15, [x16]
+        stlxrh     w15, w16, [x17,#0]
+        stlxr      wzr, w17, [sp]
+        stlxr      w18, x19, [x20]
+// CHECK: stlxrb   w14, w15, [x16]           // encoding: [0x0f,0xfe,0x0e,0x08]
+// CHECK: stlxrh   w15, w16, [x17]           // encoding: [0x30,0xfe,0x0f,0x48]
+// CHECK: stlxr    wzr, w17, [sp]            // encoding: [0xf1,0xff,0x1f,0x88]
+// CHECK: stlxr    w18, x19, [x20]           // encoding: [0x93,0xfe,0x12,0xc8]
+
+        ldaxrb     w19, [x21]
+        ldaxrh     w20, [sp]
+        ldaxr      wzr, [x22]
+        ldaxr      x21, [x23]
+// CHECK: ldaxrb   w19, [x21]                // encoding: [0xb3,0xfe,0x5f,0x08]
+// CHECK: ldaxrh   w20, [sp]                 // encoding: [0xf4,0xff,0x5f,0x48]
+// CHECK: ldaxr    wzr, [x22]                // encoding: [0xdf,0xfe,0x5f,0x88]
+// CHECK: ldaxr    x21, [x23]                // encoding: [0xf5,0xfe,0x5f,0xc8]
+
+        stlxp      wzr, w22, w23, [x24]
+        stlxp      w25, x26, x27, [sp]
+// CHECK: stlxp    wzr, w22, w23, [x24]      // encoding: [0x16,0xdf,0x3f,0x88]
+// CHECK: stlxp    w25, x26, x27, [sp]       // encoding: [0xfa,0xef,0x39,0xc8]
+
+        ldaxp      w26, wzr, [sp]
+        ldaxp      x27, x28, [x30]
+// CHECK: ldaxp    w26, wzr, [sp]            // encoding: [0xfa,0xff,0x7f,0x88]
+// CHECK: ldaxp    x27, x28, [x30]           // encoding: [0xdb,0xf3,0x7f,0xc8]
+
+        stlrb      w27, [sp]
+        stlrh      w28, [x0]
+        stlr       wzr, [x1]
+        stlr       x30, [x2]
+// CHECK: stlrb    w27, [sp]                 // encoding: [0xfb,0xff,0x9f,0x08]
+// CHECK: stlrh    w28, [x0]                 // encoding: [0x1c,0xfc,0x9f,0x48]
+// CHECK: stlr     wzr, [x1]                 // encoding: [0x3f,0xfc,0x9f,0x88]
+// CHECK: stlr     x30, [x2]                 // encoding: [0x5e,0xfc,0x9f,0xc8]
+
+        ldarb      w29, [sp]
+        ldarh      w30, [x0]
+        ldar       wzr, [x1]
+        ldar       x1, [x2]
+// CHECK: ldarb    w29, [sp]                 // encoding: [0xfd,0xff,0xdf,0x08]
+// CHECK: ldarh    w30, [x0]                 // encoding: [0x1e,0xfc,0xdf,0x48]
+// CHECK: ldar     wzr, [x1]                 // encoding: [0x3f,0xfc,0xdf,0x88]
+// CHECK: ldar     x1, [x2]                  // encoding: [0x41,0xfc,0xdf,0xc8]
+
+        stlxp      wzr, w22, w23, [x24,#0]
+// CHECK: stlxp    wzr, w22, w23, [x24]      // encoding: [0x16,0xdf,0x3f,0x88]
+
+//------------------------------------------------------------------------------
+// Load/store (unaligned immediate)
+//------------------------------------------------------------------------------
+
+        sturb w9, [sp, #0]
+        sturh wzr, [x12, #255]
+        stur w16, [x0, #-256]
+        stur x28, [x14, #1]
+// CHECK: sturb    w9, [sp]                   // encoding: [0xe9,0x03,0x00,0x38]
+// CHECK: sturh    wzr, [x12, #255]           // encoding: [0x9f,0xf1,0x0f,0x78]
+// CHECK: stur     w16, [x0, #-256]           // encoding: [0x10,0x00,0x10,0xb8]
+// CHECK: stur     x28, [x14, #1]             // encoding: [0xdc,0x11,0x00,0xf8]
+
+        ldurb w1, [x20, #255]
+        ldurh w20, [x1, #255]
+        ldur w12, [sp, #255]
+        ldur xzr, [x12, #255]
+// CHECK: ldurb    w1, [x20, #255]            // encoding: [0x81,0xf2,0x4f,0x38]
+// CHECK: ldurh    w20, [x1, #255]            // encoding: [0x34,0xf0,0x4f,0x78]
+// CHECK: ldur     w12, [sp, #255]            // encoding: [0xec,0xf3,0x4f,0xb8]
+// CHECK: ldur     xzr, [x12, #255]           // encoding: [0x9f,0xf1,0x4f,0xf8]
+
+        ldursb x9, [x7, #-256]
+        ldursh x17, [x19, #-256]
+        ldursw x20, [x15, #-256]
+        ldursw x13, [x2]
+        prfum pldl2keep, [sp, #-256]
+        ldursb w19, [x1, #-256]
+        ldursh w15, [x21, #-256]
+// CHECK: ldursb   x9, [x7, #-256]            // encoding: [0xe9,0x00,0x90,0x38]
+// CHECK: ldursh   x17, [x19, #-256]          // encoding: [0x71,0x02,0x90,0x78]
+// CHECK: ldursw   x20, [x15, #-256]          // encoding: [0xf4,0x01,0x90,0xb8]
+// CHECK: ldursw   x13, [x2]                  // encoding: [0x4d,0x00,0x80,0xb8]
+// CHECK: prfum    pldl2keep, [sp, #-256]     // encoding: [0xe2,0x03,0x90,0xf8]
+// CHECK: ldursb   w19, [x1, #-256]           // encoding: [0x33,0x00,0xd0,0x38]
+// CHECK: ldursh   w15, [x21, #-256]          // encoding: [0xaf,0x02,0xd0,0x78]
+
+        stur b0, [sp, #1]
+        stur h12, [x12, #-1]
+        stur s15, [x0, #255]
+        stur d31, [x5, #25]
+        stur q9, [x5]
+// CHECK: stur     b0, [sp, #1]               // encoding: [0xe0,0x13,0x00,0x3c]
+// CHECK: stur     h12, [x12, #-1]            // encoding: [0x8c,0xf1,0x1f,0x7c]
+// CHECK: stur     s15, [x0, #255]            // encoding: [0x0f,0xf0,0x0f,0xbc]
+// CHECK: stur     d31, [x5, #25]             // encoding: [0xbf,0x90,0x01,0xfc]
+// CHECK: stur     q9, [x5]                   // encoding: [0xa9,0x00,0x80,0x3c]
+
+        ldur b3, [sp]
+        ldur h5, [x4, #-256]
+        ldur s7, [x12, #-1]
+        ldur d11, [x19, #4]
+        ldur q13, [x1, #2]
+// CHECK: ldur     b3, [sp]                   // encoding: [0xe3,0x03,0x40,0x3c]
+// CHECK: ldur     h5, [x4, #-256]            // encoding: [0x85,0x00,0x50,0x7c]
+// CHECK: ldur     s7, [x12, #-1]             // encoding: [0x87,0xf1,0x5f,0xbc]
+// CHECK: ldur     d11, [x19, #4]             // encoding: [0x6b,0x42,0x40,0xfc]
+// CHECK: ldur     q13, [x1, #2]              // encoding: [0x2d,0x20,0xc0,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store (unsigned immediate)
+//------------------------------------------------------------------------------
+
+//// Basic addressing mode limits: 8 byte access
+        ldr x0, [x0]
+        ldr x4, [x29, #0]
+        ldr x30, [x12, #32760]
+        ldr x20, [sp, #8]
+// CHECK: ldr      x0, [x0]                   // encoding: [0x00,0x00,0x40,0xf9]
+// CHECK: ldr      x4, [x29]                  // encoding: [0xa4,0x03,0x40,0xf9]
+// CHECK: ldr      x30, [x12, #32760]         // encoding: [0x9e,0xfd,0x7f,0xf9]
+// CHECK: ldr      x20, [sp, #8]              // encoding: [0xf4,0x07,0x40,0xf9]
+
+//// Rt treats 31 as zero-register
+        ldr xzr, [sp]
+// CHECK: ldr      xzr, [sp]                  // encoding: [0xff,0x03,0x40,0xf9]
+
+        //// 4-byte load, check still 64-bit address, limits
+        ldr w2, [sp]
+        ldr w17, [sp, #16380]
+        ldr w13, [x2, #4]
+// CHECK: ldr      w2, [sp]                   // encoding: [0xe2,0x03,0x40,0xb9]
+// CHECK: ldr      w17, [sp, #16380]          // encoding: [0xf1,0xff,0x7f,0xb9]
+// CHECK: ldr      w13, [x2, #4]              // encoding: [0x4d,0x04,0x40,0xb9]
+
+//// Signed 4-byte load. Limits.
+        ldrsw x2, [x5,#4]
+        ldrsw x23, [sp, #16380]
+// CHECK: ldrsw    x2, [x5, #4]               // encoding: [0xa2,0x04,0x80,0xb9]
+// CHECK: ldrsw    x23, [sp, #16380]          // encoding: [0xf7,0xff,0xbf,0xb9]
+
+////  2-byte loads
+        ldrh w2, [x4]
+        ldrsh w23, [x6, #8190]
+        ldrsh wzr, [sp, #2]
+        ldrsh x29, [x2, #2]
+// CHECK: ldrh     w2, [x4]                   // encoding: [0x82,0x00,0x40,0x79]
+// CHECK: ldrsh    w23, [x6, #8190]           // encoding: [0xd7,0xfc,0xff,0x79]
+// CHECK: ldrsh    wzr, [sp, #2]              // encoding: [0xff,0x07,0xc0,0x79]
+// CHECK: ldrsh    x29, [x2, #2]              // encoding: [0x5d,0x04,0x80,0x79]
+
+//// 1-byte loads
+        ldrb w26, [x3, #121]
+        ldrb w12, [x2, #0]
+        ldrsb w27, [sp, #4095]
+        ldrsb xzr, [x15]
+// CHECK: ldrb     w26, [x3, #121]            // encoding: [0x7a,0xe4,0x41,0x39]
+// CHECK: ldrb     w12, [x2]                  // encoding: [0x4c,0x00,0x40,0x39]
+// CHECK: ldrsb    w27, [sp, #4095]           // encoding: [0xfb,0xff,0xff,0x39]
+// CHECK: ldrsb    xzr, [x15]                 // encoding: [0xff,0x01,0x80,0x39]
+
+//// Stores
+        str x30, [sp]
+        str w20, [x4, #16380]
+        strh w20, [x10, #14]
+        strh w17, [sp, #8190]
+        strb w23, [x3, #4095]
+        strb wzr, [x2]
+// CHECK: str      x30, [sp]                  // encoding: [0xfe,0x03,0x00,0xf9]
+// CHECK: str      w20, [x4, #16380]          // encoding: [0x94,0xfc,0x3f,0xb9]
+// CHECK: strh     w20, [x10, #14]            // encoding: [0x54,0x1d,0x00,0x79]
+// CHECK: strh     w17, [sp, #8190]           // encoding: [0xf1,0xff,0x3f,0x79]
+// CHECK: strb     w23, [x3, #4095]           // encoding: [0x77,0xfc,0x3f,0x39]
+// CHECK: strb     wzr, [x2]                  // encoding: [0x5f,0x00,0x00,0x39]
+
+//// Relocations
+        str x15, [x5, #:lo12:sym]
+        ldrb w15, [x5, #:lo12:sym]
+        ldrsh x15, [x5, #:lo12:sym]
+        ldrsw x15, [x5, #:lo12:sym]
+        ldr x15, [x5, #:lo12:sym]
+        ldr q3, [x2, #:lo12:sym]
+// CHECK: str     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,A,0xf9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
+// CHECK: ldrb    w15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0x39'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst8_lo12
+// CHECK: ldrsh   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0x79'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst16_lo12
+// CHECK: ldrsw   x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x80'A',0xb9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst32_lo12
+// CHECK: ldr     x15, [x5, #:lo12:sym]   // encoding: [0xaf'A',A,0x40'A',0xf9'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12
+// CHECK: ldr     q3, [x2, #:lo12:sym]    // encoding: [0x43'A',A,0xc0'A',0x3d'A']
+// CHECK:                                         //   fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst128_lo12
+
+        prfm pldl1keep, [sp, #8]
+        prfm pldl1strm, [x3]
+        prfm pldl2keep, [x5,#16]
+        prfm pldl2strm, [x2]
+        prfm pldl3keep, [x5]
+        prfm pldl3strm, [x6]
+        prfm plil1keep, [sp, #8]
+        prfm plil1strm, [x3]
+        prfm plil2keep, [x5,#16]
+        prfm plil2strm, [x2]
+        prfm plil3keep, [x5]
+        prfm plil3strm, [x6]
+        prfm pstl1keep, [sp, #8]
+        prfm pstl1strm, [x3]
+        prfm pstl2keep, [x5,#16]
+        prfm pstl2strm, [x2]
+        prfm pstl3keep, [x5]
+        prfm pstl3strm, [x6]
+        prfm #15, [sp]
+// CHECK: prfm    pldl1keep, [sp, #8]     // encoding: [0xe0,0x07,0x80,0xf9]
+// CHECK: prfm    pldl1strm, [x3, #0]     // encoding: [0x61,0x00,0x80,0xf9]
+// CHECK: prfm    pldl2keep, [x5, #16]    // encoding: [0xa2,0x08,0x80,0xf9]
+// CHECK: prfm    pldl2strm, [x2, #0]     // encoding: [0x43,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3keep, [x5, #0]     // encoding: [0xa4,0x00,0x80,0xf9]
+// CHECK: prfm    pldl3strm, [x6, #0]     // encoding: [0xc5,0x00,0x80,0xf9]
+// CHECK: prfm    plil1keep, [sp, #8]     // encoding: [0xe8,0x07,0x80,0xf9]
+// CHECK: prfm    plil1strm, [x3, #0]     // encoding: [0x69,0x00,0x80,0xf9]
+// CHECK: prfm    plil2keep, [x5, #16]    // encoding: [0xaa,0x08,0x80,0xf9]
+// CHECK: prfm    plil2strm, [x2, #0]     // encoding: [0x4b,0x00,0x80,0xf9]
+// CHECK: prfm    plil3keep, [x5, #0]     // encoding: [0xac,0x00,0x80,0xf9]
+// CHECK: prfm    plil3strm, [x6, #0]     // encoding: [0xcd,0x00,0x80,0xf9]
+// CHECK: prfm    pstl1keep, [sp, #8]     // encoding: [0xf0,0x07,0x80,0xf9]
+// CHECK: prfm    pstl1strm, [x3, #0]     // encoding: [0x71,0x00,0x80,0xf9]
+// CHECK: prfm    pstl2keep, [x5, #16]    // encoding: [0xb2,0x08,0x80,0xf9]
+// CHECK: prfm    pstl2strm, [x2, #0]     // encoding: [0x53,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3keep, [x5, #0]     // encoding: [0xb4,0x00,0x80,0xf9]
+// CHECK: prfm    pstl3strm, [x6, #0]     // encoding: [0xd5,0x00,0x80,0xf9]
+// CHECK: prfm    #15, [sp, #0]           // encoding: [0xef,0x03,0x80,0xf9]
+
+//// Floating-point versions
+
+        ldr b31, [sp, #4095]
+        ldr h20, [x2, #8190]
+        ldr s10, [x19, #16380]
+        ldr d3, [x10, #32760]
+        str q12, [sp, #65520]
+// CHECK: ldr      b31, [sp, #4095]           // encoding: [0xff,0xff,0x7f,0x3d]
+// CHECK: ldr      h20, [x2, #8190]           // encoding: [0x54,0xfc,0x7f,0x7d]
+// CHECK: ldr      s10, [x19, #16380]         // encoding: [0x6a,0xfe,0x7f,0xbd]
+// CHECK: ldr      d3, [x10, #32760]          // encoding: [0x43,0xfd,0x7f,0xfd]
+// CHECK: str      q12, [sp, #65520]          // encoding: [0xec,0xff,0xbf,0x3d]
+
+//------------------------------------------------------------------------------
+// Load/store register (register offset)
+//------------------------------------------------------------------------------
+
+        ldrb w3, [sp, x5]
+        ldrb w9, [x27, x6, lsl #0]
+        ldrsb w10, [x30, x7]
+        ldrb w11, [x29, x3, sxtx]
+        strb w12, [x28, xzr, sxtx #0]
+        ldrb w14, [x26, w6, uxtw]
+        ldrsb w15, [x25, w7, uxtw #0]
+        ldrb w17, [x23, w9, sxtw]
+        ldrsb x18, [x22, w10, sxtw #0]
+// CHECK: ldrb     w3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0x38]
+// CHECK: ldrb     w9, [x27, x6, lsl #0]      // encoding: [0x69,0x7b,0x66,0x38]
+// CHECK: ldrsb    w10, [x30, x7]             // encoding: [0xca,0x6b,0xe7,0x38]
+// CHECK: ldrb     w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x63,0x38]
+// CHECK: strb     w12, [x28, xzr, sxtx #0]   // encoding: [0x8c,0xfb,0x3f,0x38]
+// CHECK: ldrb     w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x66,0x38]
+// CHECK: ldrsb    w15, [x25, w7, uxtw #0]    // encoding: [0x2f,0x5b,0xe7,0x38]
+// CHECK: ldrb     w17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0x38]
+// CHECK: ldrsb    x18, [x22, w10, sxtw #0]   // encoding: [0xd2,0xda,0xaa,0x38]
+
+        ldrsh w3, [sp, x5]
+        ldrsh w9, [x27, x6, lsl #0]
+        ldrh w10, [x30, x7, lsl #1]
+        strh w11, [x29, x3, sxtx]
+        ldrh w12, [x28, xzr, sxtx #0]
+        ldrsh x13, [x27, x5, sxtx #1]
+        ldrh w14, [x26, w6, uxtw]
+        ldrh w15, [x25, w7, uxtw #0]
+        ldrsh w16, [x24, w8, uxtw #1]
+        ldrh w17, [x23, w9, sxtw]
+        ldrh w18, [x22, w10, sxtw #0]
+        strh w19, [x21, wzr, sxtw #1]
+// CHECK: ldrsh    w3, [sp, x5]               // encoding: [0xe3,0x6b,0xe5,0x78]
+// CHECK: ldrsh    w9, [x27, x6]              // encoding: [0x69,0x6b,0xe6,0x78]
+// CHECK: ldrh     w10, [x30, x7, lsl #1]     // encoding: [0xca,0x7b,0x67,0x78]
+// CHECK: strh     w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x23,0x78]
+// CHECK: ldrh     w12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x7f,0x78]
+// CHECK: ldrsh    x13, [x27, x5, sxtx #1]    // encoding: [0x6d,0xfb,0xa5,0x78]
+// CHECK: ldrh     w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x66,0x78]
+// CHECK: ldrh     w15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0x78]
+// CHECK: ldrsh    w16, [x24, w8, uxtw #1]    // encoding: [0x10,0x5b,0xe8,0x78]
+// CHECK: ldrh     w17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0x78]
+// CHECK: ldrh     w18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0x78]
+// CHECK: strh     w19, [x21, wzr, sxtw #1]   // encoding: [0xb3,0xda,0x3f,0x78]
+
+        ldr w3, [sp, x5]
+        ldr s9, [x27, x6, lsl #0]
+        ldr w10, [x30, x7, lsl #2]
+        ldr w11, [x29, x3, sxtx]
+        str s12, [x28, xzr, sxtx #0]
+        str w13, [x27, x5, sxtx #2]
+        str w14, [x26, w6, uxtw]
+        ldr w15, [x25, w7, uxtw #0]
+        ldr w16, [x24, w8, uxtw #2]
+        ldrsw x17, [x23, w9, sxtw]
+        ldr w18, [x22, w10, sxtw #0]
+        ldrsw x19, [x21, wzr, sxtw #2]
+// CHECK: ldr      w3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0xb8]
+// CHECK: ldr      s9, [x27, x6]              // encoding: [0x69,0x6b,0x66,0xbc]
+// CHECK: ldr      w10, [x30, x7, lsl #2]     // encoding: [0xca,0x7b,0x67,0xb8]
+// CHECK: ldr      w11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x63,0xb8]
+// CHECK: str      s12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x3f,0xbc]
+// CHECK: str      w13, [x27, x5, sxtx #2]    // encoding: [0x6d,0xfb,0x25,0xb8]
+// CHECK: str      w14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0x26,0xb8]
+// CHECK: ldr      w15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0xb8]
+// CHECK: ldr      w16, [x24, w8, uxtw #2]    // encoding: [0x10,0x5b,0x68,0xb8]
+// CHECK: ldrsw    x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0xa9,0xb8]
+// CHECK: ldr      w18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xb8]
+// CHECK: ldrsw    x19, [x21, wzr, sxtw #2]   // encoding: [0xb3,0xda,0xbf,0xb8]
+
+        ldr x3, [sp, x5]
+        str x9, [x27, x6, lsl #0]
+        ldr d10, [x30, x7, lsl #3]
+        str x11, [x29, x3, sxtx]
+        ldr x12, [x28, xzr, sxtx #0]
+        ldr x13, [x27, x5, sxtx #3]
+        prfm pldl1keep, [x26, w6, uxtw]
+        ldr x15, [x25, w7, uxtw #0]
+        ldr x16, [x24, w8, uxtw #3]
+        ldr x17, [x23, w9, sxtw]
+        ldr x18, [x22, w10, sxtw #0]
+        str d19, [x21, wzr, sxtw #3]
+        prfm #6, [x0, x5]
+// CHECK: ldr      x3, [sp, x5]               // encoding: [0xe3,0x6b,0x65,0xf8]
+// CHECK: str      x9, [x27, x6]              // encoding: [0x69,0x6b,0x26,0xf8]
+// CHECK: ldr      d10, [x30, x7, lsl #3]     // encoding: [0xca,0x7b,0x67,0xfc]
+// CHECK: str      x11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0x23,0xf8]
+// CHECK: ldr      x12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0x7f,0xf8]
+// CHECK: ldr      x13, [x27, x5, sxtx #3]    // encoding: [0x6d,0xfb,0x65,0xf8]
+// CHECK: prfm     pldl1keep, [x26, w6, uxtw] // encoding: [0x40,0x4b,0xa6,0xf8]
+// CHECK: ldr      x15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0x67,0xf8]
+// CHECK: ldr      x16, [x24, w8, uxtw #3]    // encoding: [0x10,0x5b,0x68,0xf8]
+// CHECK: ldr      x17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0x69,0xf8]
+// CHECK: ldr      x18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0x6a,0xf8]
+// CHECK: str      d19, [x21, wzr, sxtw #3]   // encoding: [0xb3,0xda,0x3f,0xfc]
+// CHECK: prfm     #6, [x0, x5, lsl #0]       // encoding: [0x06,0x68,0xa5,0xf8]
+
+        ldr q3, [sp, x5]
+        ldr q9, [x27, x6, lsl #0]
+        ldr q10, [x30, x7, lsl #4]
+        str q11, [x29, x3, sxtx]
+        str q12, [x28, xzr, sxtx #0]
+        str q13, [x27, x5, sxtx #4]
+        ldr q14, [x26, w6, uxtw]
+        ldr q15, [x25, w7, uxtw #0]
+        ldr q16, [x24, w8, uxtw #4]
+        ldr q17, [x23, w9, sxtw]
+        str q18, [x22, w10, sxtw #0]
+        ldr q19, [x21, wzr, sxtw #4]
+// CHECK: ldr      q3, [sp, x5]               // encoding: [0xe3,0x6b,0xe5,0x3c]
+// CHECK: ldr      q9, [x27, x6]              // encoding: [0x69,0x6b,0xe6,0x3c]
+// CHECK: ldr      q10, [x30, x7, lsl #4]     // encoding: [0xca,0x7b,0xe7,0x3c]
+// CHECK: str      q11, [x29, x3, sxtx]       // encoding: [0xab,0xeb,0xa3,0x3c]
+// CHECK: str      q12, [x28, xzr, sxtx]      // encoding: [0x8c,0xeb,0xbf,0x3c]
+// CHECK: str      q13, [x27, x5, sxtx #4]    // encoding: [0x6d,0xfb,0xa5,0x3c]
+// CHECK: ldr      q14, [x26, w6, uxtw]       // encoding: [0x4e,0x4b,0xe6,0x3c]
+// CHECK: ldr      q15, [x25, w7, uxtw]       // encoding: [0x2f,0x4b,0xe7,0x3c]
+// CHECK: ldr      q16, [x24, w8, uxtw #4]    // encoding: [0x10,0x5b,0xe8,0x3c]
+// CHECK: ldr      q17, [x23, w9, sxtw]       // encoding: [0xf1,0xca,0xe9,0x3c]
+// CHECK: str      q18, [x22, w10, sxtw]      // encoding: [0xd2,0xca,0xaa,0x3c]
+// CHECK: ldr      q19, [x21, wzr, sxtw #4]   // encoding: [0xb3,0xda,0xff,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store register (immediate post-indexed)
+//------------------------------------------------------------------------------
+
+        strb w9, [x2], #255
+        strb w10, [x3], #1
+        strb w10, [x3], #-256
+        strh w9, [x2], #255
+        strh w9, [x2], #1
+        strh w10, [x3], #-256
+// CHECK: strb     w9, [x2], #255             // encoding: [0x49,0xf4,0x0f,0x38]
+// CHECK: strb     w10, [x3], #1              // encoding: [0x6a,0x14,0x00,0x38]
+// CHECK: strb     w10, [x3], #-256           // encoding: [0x6a,0x04,0x10,0x38]
+// CHECK: strh     w9, [x2], #255             // encoding: [0x49,0xf4,0x0f,0x78]
+// CHECK: strh     w9, [x2], #1               // encoding: [0x49,0x14,0x00,0x78]
+// CHECK: strh     w10, [x3], #-256           // encoding: [0x6a,0x04,0x10,0x78]
+
+        str w19, [sp], #255
+        str w20, [x30], #1
+        str w21, [x12], #-256
+        str xzr, [x9], #255
+        str x2, [x3], #1
+        str x19, [x12], #-256
+// CHECK: str      w19, [sp], #255            // encoding: [0xf3,0xf7,0x0f,0xb8]
+// CHECK: str      w20, [x30], #1             // encoding: [0xd4,0x17,0x00,0xb8]
+// CHECK: str      w21, [x12], #-256          // encoding: [0x95,0x05,0x10,0xb8]
+// CHECK: str      xzr, [x9], #255            // encoding: [0x3f,0xf5,0x0f,0xf8]
+// CHECK: str      x2, [x3], #1               // encoding: [0x62,0x14,0x00,0xf8]
+// CHECK: str      x19, [x12], #-256          // encoding: [0x93,0x05,0x10,0xf8]
+
+        ldrb w9, [x2], #255
+        ldrb w10, [x3], #1
+        ldrb w10, [x3], #-256
+        ldrh w9, [x2], #255
+        ldrh w9, [x2], #1
+        ldrh w10, [x3], #-256
+// CHECK: ldrb     w9, [x2], #255             // encoding: [0x49,0xf4,0x4f,0x38]
+// CHECK: ldrb     w10, [x3], #1              // encoding: [0x6a,0x14,0x40,0x38]
+// CHECK: ldrb     w10, [x3], #-256           // encoding: [0x6a,0x04,0x50,0x38]
+// CHECK: ldrh     w9, [x2], #255             // encoding: [0x49,0xf4,0x4f,0x78]
+// CHECK: ldrh     w9, [x2], #1               // encoding: [0x49,0x14,0x40,0x78]
+// CHECK: ldrh     w10, [x3], #-256           // encoding: [0x6a,0x04,0x50,0x78]
+
+        ldr w19, [sp], #255
+        ldr w20, [x30], #1
+        ldr w21, [x12], #-256
+        ldr xzr, [x9], #255
+        ldr x2, [x3], #1
+        ldr x19, [x12], #-256
+// CHECK: ldr      w19, [sp], #255            // encoding: [0xf3,0xf7,0x4f,0xb8]
+// CHECK: ldr      w20, [x30], #1             // encoding: [0xd4,0x17,0x40,0xb8]
+// CHECK: ldr      w21, [x12], #-256          // encoding: [0x95,0x05,0x50,0xb8]
+// CHECK: ldr      xzr, [x9], #255            // encoding: [0x3f,0xf5,0x4f,0xf8]
+// CHECK: ldr      x2, [x3], #1               // encoding: [0x62,0x14,0x40,0xf8]
+// CHECK: ldr      x19, [x12], #-256          // encoding: [0x93,0x05,0x50,0xf8]
+
+        ldrsb xzr, [x9], #255
+        ldrsb x2, [x3], #1
+        ldrsb x19, [x12], #-256
+        ldrsh xzr, [x9], #255
+        ldrsh x2, [x3], #1
+        ldrsh x19, [x12], #-256
+        ldrsw xzr, [x9], #255
+        ldrsw x2, [x3], #1
+        ldrsw x19, [x12], #-256
+// CHECK: ldrsb    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0x38]
+// CHECK: ldrsb    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0x38]
+// CHECK: ldrsb    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0x38]
+// CHECK: ldrsh    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0x78]
+// CHECK: ldrsh    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0x78]
+// CHECK: ldrsh    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0x78]
+// CHECK: ldrsw    xzr, [x9], #255            // encoding: [0x3f,0xf5,0x8f,0xb8]
+// CHECK: ldrsw    x2, [x3], #1               // encoding: [0x62,0x14,0x80,0xb8]
+// CHECK: ldrsw    x19, [x12], #-256          // encoding: [0x93,0x05,0x90,0xb8]
+
+        ldrsb wzr, [x9], #255
+        ldrsb w2, [x3], #1
+        ldrsb w19, [x12], #-256
+        ldrsh wzr, [x9], #255
+        ldrsh w2, [x3], #1
+        ldrsh w19, [x12], #-256
+// CHECK: ldrsb    wzr, [x9], #255            // encoding: [0x3f,0xf5,0xcf,0x38]
+// CHECK: ldrsb    w2, [x3], #1               // encoding: [0x62,0x14,0xc0,0x38]
+// CHECK: ldrsb    w19, [x12], #-256          // encoding: [0x93,0x05,0xd0,0x38]
+// CHECK: ldrsh    wzr, [x9], #255            // encoding: [0x3f,0xf5,0xcf,0x78]
+// CHECK: ldrsh    w2, [x3], #1               // encoding: [0x62,0x14,0xc0,0x78]
+// CHECK: ldrsh    w19, [x12], #-256          // encoding: [0x93,0x05,0xd0,0x78]
+
+        str b0, [x0], #255
+        str b3, [x3], #1
+        str b5, [sp], #-256
+        str h10, [x10], #255
+        str h13, [x23], #1
+        str h15, [sp], #-256
+        str s20, [x20], #255
+        str s23, [x23], #1
+        str s25, [x0], #-256
+        str d20, [x20], #255
+        str d23, [x23], #1
+        str d25, [x0], #-256
+// CHECK: str      b0, [x0], #255             // encoding: [0x00,0xf4,0x0f,0x3c]
+// CHECK: str      b3, [x3], #1               // encoding: [0x63,0x14,0x00,0x3c]
+// CHECK: str      b5, [sp], #-256            // encoding: [0xe5,0x07,0x10,0x3c]
+// CHECK: str      h10, [x10], #255           // encoding: [0x4a,0xf5,0x0f,0x7c]
+// CHECK: str      h13, [x23], #1             // encoding: [0xed,0x16,0x00,0x7c]
+// CHECK: str      h15, [sp], #-256           // encoding: [0xef,0x07,0x10,0x7c]
+// CHECK: str      s20, [x20], #255           // encoding: [0x94,0xf6,0x0f,0xbc]
+// CHECK: str      s23, [x23], #1             // encoding: [0xf7,0x16,0x00,0xbc]
+// CHECK: str      s25, [x0], #-256           // encoding: [0x19,0x04,0x10,0xbc]
+// CHECK: str      d20, [x20], #255           // encoding: [0x94,0xf6,0x0f,0xfc]
+// CHECK: str      d23, [x23], #1             // encoding: [0xf7,0x16,0x00,0xfc]
+// CHECK: str      d25, [x0], #-256           // encoding: [0x19,0x04,0x10,0xfc]
+
+        ldr b0, [x0], #255
+        ldr b3, [x3], #1
+        ldr b5, [sp], #-256
+        ldr h10, [x10], #255
+        ldr h13, [x23], #1
+        ldr h15, [sp], #-256
+        ldr s20, [x20], #255
+        ldr s23, [x23], #1
+        ldr s25, [x0], #-256
+        ldr d20, [x20], #255
+        ldr d23, [x23], #1
+        ldr d25, [x0], #-256
+// CHECK: ldr      b0, [x0], #255             // encoding: [0x00,0xf4,0x4f,0x3c]
+// CHECK: ldr      b3, [x3], #1               // encoding: [0x63,0x14,0x40,0x3c]
+// CHECK: ldr      b5, [sp], #-256            // encoding: [0xe5,0x07,0x50,0x3c]
+// CHECK: ldr      h10, [x10], #255           // encoding: [0x4a,0xf5,0x4f,0x7c]
+// CHECK: ldr      h13, [x23], #1             // encoding: [0xed,0x16,0x40,0x7c]
+// CHECK: ldr      h15, [sp], #-256           // encoding: [0xef,0x07,0x50,0x7c]
+// CHECK: ldr      s20, [x20], #255           // encoding: [0x94,0xf6,0x4f,0xbc]
+// CHECK: ldr      s23, [x23], #1             // encoding: [0xf7,0x16,0x40,0xbc]
+// CHECK: ldr      s25, [x0], #-256           // encoding: [0x19,0x04,0x50,0xbc]
+// CHECK: ldr      d20, [x20], #255           // encoding: [0x94,0xf6,0x4f,0xfc]
+// CHECK: ldr      d23, [x23], #1             // encoding: [0xf7,0x16,0x40,0xfc]
+// CHECK: ldr      d25, [x0], #-256           // encoding: [0x19,0x04,0x50,0xfc]
+
+        ldr q20, [x1], #255
+        ldr q23, [x9], #1
+        ldr q25, [x20], #-256
+        str q10, [x1], #255
+        str q22, [sp], #1
+        str q21, [x20], #-256
+// CHECK: ldr      q20, [x1], #255            // encoding: [0x34,0xf4,0xcf,0x3c]
+// CHECK: ldr      q23, [x9], #1              // encoding: [0x37,0x15,0xc0,0x3c]
+// CHECK: ldr      q25, [x20], #-256          // encoding: [0x99,0x06,0xd0,0x3c]
+// CHECK: str      q10, [x1], #255            // encoding: [0x2a,0xf4,0x8f,0x3c]
+// CHECK: str      q22, [sp], #1              // encoding: [0xf6,0x17,0x80,0x3c]
+// CHECK: str      q21, [x20], #-256          // encoding: [0x95,0x06,0x90,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store register (immediate pre-indexed)
+//------------------------------------------------------------------------------
+
+        ldr x3, [x4, #0]!
+        ldr xzr, [sp, #0]!
+// CHECK: ldr      x3, [x4, #0]!              // encoding: [0x83,0x0c,0x40,0xf8]
+// CHECK: ldr      xzr, [sp, #0]!              // encoding: [0xff,0x0f,0x40,0xf8]
+
+        strb w9, [x2, #255]!
+        strb w10, [x3, #1]!
+        strb w10, [x3, #-256]!
+        strh w9, [x2, #255]!
+        strh w9, [x2, #1]!
+        strh w10, [x3, #-256]!
+// CHECK: strb     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x0f,0x38]
+// CHECK: strb     w10, [x3, #1]!             // encoding: [0x6a,0x1c,0x00,0x38]
+// CHECK: strb     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x10,0x38]
+// CHECK: strh     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x0f,0x78]
+// CHECK: strh     w9, [x2, #1]!              // encoding: [0x49,0x1c,0x00,0x78]
+// CHECK: strh     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x10,0x78]
+
+        str w19, [sp, #255]!
+        str w20, [x30, #1]!
+        str w21, [x12, #-256]!
+        str xzr, [x9, #255]!
+        str x2, [x3, #1]!
+        str x19, [x12, #-256]!
+// CHECK: str      w19, [sp, #255]!           // encoding: [0xf3,0xff,0x0f,0xb8]
+// CHECK: str      w20, [x30, #1]!            // encoding: [0xd4,0x1f,0x00,0xb8]
+// CHECK: str      w21, [x12, #-256]!         // encoding: [0x95,0x0d,0x10,0xb8]
+// CHECK: str      xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x0f,0xf8]
+// CHECK: str      x2, [x3, #1]!              // encoding: [0x62,0x1c,0x00,0xf8]
+// CHECK: str      x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x10,0xf8]
+
+        ldrb w9, [x2, #255]!
+        ldrb w10, [x3, #1]!
+        ldrb w10, [x3, #-256]!
+        ldrh w9, [x2, #255]!
+        ldrh w9, [x2, #1]!
+        ldrh w10, [x3, #-256]!
+// CHECK: ldrb     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x4f,0x38]
+// CHECK: ldrb     w10, [x3, #1]!             // encoding: [0x6a,0x1c,0x40,0x38]
+// CHECK: ldrb     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x50,0x38]
+// CHECK: ldrh     w9, [x2, #255]!            // encoding: [0x49,0xfc,0x4f,0x78]
+// CHECK: ldrh     w9, [x2, #1]!              // encoding: [0x49,0x1c,0x40,0x78]
+// CHECK: ldrh     w10, [x3, #-256]!          // encoding: [0x6a,0x0c,0x50,0x78]
+
+        ldr w19, [sp, #255]!
+        ldr w20, [x30, #1]!
+        ldr w21, [x12, #-256]!
+        ldr xzr, [x9, #255]!
+        ldr x2, [x3, #1]!
+        ldr x19, [x12, #-256]!
+// CHECK: ldr      w19, [sp, #255]!           // encoding: [0xf3,0xff,0x4f,0xb8]
+// CHECK: ldr      w20, [x30, #1]!            // encoding: [0xd4,0x1f,0x40,0xb8]
+// CHECK: ldr      w21, [x12, #-256]!         // encoding: [0x95,0x0d,0x50,0xb8]
+// CHECK: ldr      xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x4f,0xf8]
+// CHECK: ldr      x2, [x3, #1]!              // encoding: [0x62,0x1c,0x40,0xf8]
+// CHECK: ldr      x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x50,0xf8]
+
+        ldrsb xzr, [x9, #255]!
+        ldrsb x2, [x3, #1]!
+        ldrsb x19, [x12, #-256]!
+        ldrsh xzr, [x9, #255]!
+        ldrsh x2, [x3, #1]!
+        ldrsh x19, [x12, #-256]!
+        ldrsw xzr, [x9, #255]!
+        ldrsw x2, [x3, #1]!
+        ldrsw x19, [x12, #-256]!
+// CHECK: ldrsb    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0x38]
+// CHECK: ldrsb    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0x38]
+// CHECK: ldrsb    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0x38]
+// CHECK: ldrsh    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0x78]
+// CHECK: ldrsh    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0x78]
+// CHECK: ldrsh    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0x78]
+// CHECK: ldrsw    xzr, [x9, #255]!           // encoding: [0x3f,0xfd,0x8f,0xb8]
+// CHECK: ldrsw    x2, [x3, #1]!              // encoding: [0x62,0x1c,0x80,0xb8]
+// CHECK: ldrsw    x19, [x12, #-256]!         // encoding: [0x93,0x0d,0x90,0xb8]
+
+        ldrsb wzr, [x9, #255]!
+        ldrsb w2, [x3, #1]!
+        ldrsb w19, [x12, #-256]!
+        ldrsh wzr, [x9, #255]!
+        ldrsh w2, [x3, #1]!
+        ldrsh w19, [x12, #-256]!
+// CHECK: ldrsb    wzr, [x9, #255]!           // encoding: [0x3f,0xfd,0xcf,0x38]
+// CHECK: ldrsb    w2, [x3, #1]!              // encoding: [0x62,0x1c,0xc0,0x38]
+// CHECK: ldrsb    w19, [x12, #-256]!         // encoding: [0x93,0x0d,0xd0,0x38]
+// CHECK: ldrsh    wzr, [x9, #255]!           // encoding: [0x3f,0xfd,0xcf,0x78]
+// CHECK: ldrsh    w2, [x3, #1]!              // encoding: [0x62,0x1c,0xc0,0x78]
+// CHECK: ldrsh    w19, [x12, #-256]!         // encoding: [0x93,0x0d,0xd0,0x78]
+
+        str b0, [x0, #255]!
+        str b3, [x3, #1]!
+        str b5, [sp, #-256]!
+        str h10, [x10, #255]!
+        str h13, [x23, #1]!
+        str h15, [sp, #-256]!
+        str s20, [x20, #255]!
+        str s23, [x23, #1]!
+        str s25, [x0, #-256]!
+        str d20, [x20, #255]!
+        str d23, [x23, #1]!
+        str d25, [x0, #-256]!
+// CHECK: str      b0, [x0, #255]!            // encoding: [0x00,0xfc,0x0f,0x3c]
+// CHECK: str      b3, [x3, #1]!              // encoding: [0x63,0x1c,0x00,0x3c]
+// CHECK: str      b5, [sp, #-256]!           // encoding: [0xe5,0x0f,0x10,0x3c]
+// CHECK: str      h10, [x10, #255]!          // encoding: [0x4a,0xfd,0x0f,0x7c]
+// CHECK: str      h13, [x23, #1]!            // encoding: [0xed,0x1e,0x00,0x7c]
+// CHECK: str      h15, [sp, #-256]!          // encoding: [0xef,0x0f,0x10,0x7c]
+// CHECK: str      s20, [x20, #255]!          // encoding: [0x94,0xfe,0x0f,0xbc]
+// CHECK: str      s23, [x23, #1]!            // encoding: [0xf7,0x1e,0x00,0xbc]
+// CHECK: str      s25, [x0, #-256]!          // encoding: [0x19,0x0c,0x10,0xbc]
+// CHECK: str      d20, [x20, #255]!          // encoding: [0x94,0xfe,0x0f,0xfc]
+// CHECK: str      d23, [x23, #1]!            // encoding: [0xf7,0x1e,0x00,0xfc]
+// CHECK: str      d25, [x0, #-256]!          // encoding: [0x19,0x0c,0x10,0xfc]
+
+        ldr b0, [x0, #255]!
+        ldr b3, [x3, #1]!
+        ldr b5, [sp, #-256]!
+        ldr h10, [x10, #255]!
+        ldr h13, [x23, #1]!
+        ldr h15, [sp, #-256]!
+        ldr s20, [x20, #255]!
+        ldr s23, [x23, #1]!
+        ldr s25, [x0, #-256]!
+        ldr d20, [x20, #255]!
+        ldr d23, [x23, #1]!
+        ldr d25, [x0, #-256]!
+// CHECK: ldr      b0, [x0, #255]!            // encoding: [0x00,0xfc,0x4f,0x3c]
+// CHECK: ldr      b3, [x3, #1]!              // encoding: [0x63,0x1c,0x40,0x3c]
+// CHECK: ldr      b5, [sp, #-256]!           // encoding: [0xe5,0x0f,0x50,0x3c]
+// CHECK: ldr      h10, [x10, #255]!          // encoding: [0x4a,0xfd,0x4f,0x7c]
+// CHECK: ldr      h13, [x23, #1]!            // encoding: [0xed,0x1e,0x40,0x7c]
+// CHECK: ldr      h15, [sp, #-256]!          // encoding: [0xef,0x0f,0x50,0x7c]
+// CHECK: ldr      s20, [x20, #255]!          // encoding: [0x94,0xfe,0x4f,0xbc]
+// CHECK: ldr      s23, [x23, #1]!            // encoding: [0xf7,0x1e,0x40,0xbc]
+// CHECK: ldr      s25, [x0, #-256]!          // encoding: [0x19,0x0c,0x50,0xbc]
+// CHECK: ldr      d20, [x20, #255]!          // encoding: [0x94,0xfe,0x4f,0xfc]
+// CHECK: ldr      d23, [x23, #1]!            // encoding: [0xf7,0x1e,0x40,0xfc]
+// CHECK: ldr      d25, [x0, #-256]!          // encoding: [0x19,0x0c,0x50,0xfc]
+
+        ldr q20, [x1, #255]!
+        ldr q23, [x9, #1]!
+        ldr q25, [x20, #-256]!
+        str q10, [x1, #255]!
+        str q22, [sp, #1]!
+        str q21, [x20, #-256]!
+// CHECK: ldr      q20, [x1, #255]!           // encoding: [0x34,0xfc,0xcf,0x3c]
+// CHECK: ldr      q23, [x9, #1]!             // encoding: [0x37,0x1d,0xc0,0x3c]
+// CHECK: ldr      q25, [x20, #-256]!         // encoding: [0x99,0x0e,0xd0,0x3c]
+// CHECK: str      q10, [x1, #255]!           // encoding: [0x2a,0xfc,0x8f,0x3c]
+// CHECK: str      q22, [sp, #1]!             // encoding: [0xf6,0x1f,0x80,0x3c]
+// CHECK: str      q21, [x20, #-256]!         // encoding: [0x95,0x0e,0x90,0x3c]
+
+//------------------------------------------------------------------------------
+// Load/store (unprivileged)
+//------------------------------------------------------------------------------
+
+        sttrb w9, [sp, #0]
+        sttrh wzr, [x12, #255]
+        sttr w16, [x0, #-256]
+        sttr x28, [x14, #1]
+// CHECK: sttrb    w9, [sp]                   // encoding: [0xe9,0x0b,0x00,0x38]
+// CHECK: sttrh    wzr, [x12, #255]           // encoding: [0x9f,0xf9,0x0f,0x78]
+// CHECK: sttr     w16, [x0, #-256]           // encoding: [0x10,0x08,0x10,0xb8]
+// CHECK: sttr     x28, [x14, #1]             // encoding: [0xdc,0x19,0x00,0xf8]
+
+        ldtrb w1, [x20, #255]
+        ldtrh w20, [x1, #255]
+        ldtr w12, [sp, #255]
+        ldtr xzr, [x12, #255]
+// CHECK: ldtrb    w1, [x20, #255]            // encoding: [0x81,0xfa,0x4f,0x38]
+// CHECK: ldtrh    w20, [x1, #255]            // encoding: [0x34,0xf8,0x4f,0x78]
+// CHECK: ldtr     w12, [sp, #255]            // encoding: [0xec,0xfb,0x4f,0xb8]
+// CHECK: ldtr     xzr, [x12, #255]           // encoding: [0x9f,0xf9,0x4f,0xf8]
+
+        ldtrsb x9, [x7, #-256]
+        ldtrsh x17, [x19, #-256]
+        ldtrsw x20, [x15, #-256]
+        ldtrsb w19, [x1, #-256]
+        ldtrsh w15, [x21, #-256]
+// CHECK: ldtrsb   x9, [x7, #-256]            // encoding: [0xe9,0x08,0x90,0x38]
+// CHECK: ldtrsh   x17, [x19, #-256]          // encoding: [0x71,0x0a,0x90,0x78]
+// CHECK: ldtrsw   x20, [x15, #-256]          // encoding: [0xf4,0x09,0x90,0xb8]
+// CHECK: ldtrsb   w19, [x1, #-256]           // encoding: [0x33,0x08,0xd0,0x38]
+// CHECK: ldtrsh   w15, [x21, #-256]          // encoding: [0xaf,0x0a,0xd0,0x78]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (offset)
+//------------------------------------------------------------------------------
+
+        ldp w3, w5, [sp]
+        stp wzr, w9, [sp, #252]
+        ldp w2, wzr, [sp, #-256]
+        ldp w9, w10, [sp, #4]
+// CHECK: ldp      w3, w5, [sp]               // encoding: [0xe3,0x17,0x40,0x29]
+// CHECK: stp      wzr, w9, [sp, #252]        // encoding: [0xff,0xa7,0x1f,0x29]
+// CHECK: ldp      w2, wzr, [sp, #-256]       // encoding: [0xe2,0x7f,0x60,0x29]
+// CHECK: ldp      w9, w10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x29]
+
+        ldpsw x9, x10, [sp, #4]
+        ldpsw x9, x10, [x2, #-256]
+        ldpsw x20, x30, [sp, #252]
+// CHECK: ldpsw    x9, x10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x69]
+// CHECK: ldpsw    x9, x10, [x2, #-256]       // encoding: [0x49,0x28,0x60,0x69]
+// CHECK: ldpsw    x20, x30, [sp, #252]       // encoding: [0xf4,0xfb,0x5f,0x69]
+
+        ldp x21, x29, [x2, #504]
+        ldp x22, x23, [x3, #-512]
+        ldp x24, x25, [x4, #8]
+// CHECK: ldp      x21, x29, [x2, #504]       // encoding: [0x55,0xf4,0x5f,0xa9]
+// CHECK: ldp      x22, x23, [x3, #-512]      // encoding: [0x76,0x5c,0x60,0xa9]
+// CHECK: ldp      x24, x25, [x4, #8]         // encoding: [0x98,0xe4,0x40,0xa9]
+
+        ldp s29, s28, [sp, #252]
+        stp s27, s26, [sp, #-256]
+        ldp s1, s2, [x3, #44]
+// CHECK: ldp      s29, s28, [sp, #252]       // encoding: [0xfd,0xf3,0x5f,0x2d]
+// CHECK: stp      s27, s26, [sp, #-256]      // encoding: [0xfb,0x6b,0x20,0x2d]
+// CHECK: ldp      s1, s2, [x3, #44]          // encoding: [0x61,0x88,0x45,0x2d]
+
+        stp d3, d5, [x9, #504]
+        stp d7, d11, [x10, #-512]
+        ldp d2, d3, [x30, #-8]
+// CHECK: stp      d3, d5, [x9, #504]         // encoding: [0x23,0x95,0x1f,0x6d]
+// CHECK: stp      d7, d11, [x10, #-512]      // encoding: [0x47,0x2d,0x20,0x6d]
+// CHECK: ldp      d2, d3, [x30, #-8]         // encoding: [0xc2,0x8f,0x7f,0x6d]
+
+        stp q3, q5, [sp]
+        stp q17, q19, [sp, #1008]
+        ldp q23, q29, [x1, #-1024]
+// CHECK: stp      q3, q5, [sp]               // encoding: [0xe3,0x17,0x00,0xad]
+// CHECK: stp      q17, q19, [sp, #1008]      // encoding: [0xf1,0xcf,0x1f,0xad]
+// CHECK: ldp      q23, q29, [x1, #-1024]     // encoding: [0x37,0x74,0x60,0xad]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (post-indexed)
+//------------------------------------------------------------------------------
+
+        ldp w3, w5, [sp], #0
+        stp wzr, w9, [sp], #252
+        ldp w2, wzr, [sp], #-256
+        ldp w9, w10, [sp], #4
+// CHECK: ldp      w3, w5, [sp], #0           // encoding: [0xe3,0x17,0xc0,0x28]
+// CHECK: stp      wzr, w9, [sp], #252        // encoding: [0xff,0xa7,0x9f,0x28]
+// CHECK: ldp      w2, wzr, [sp], #-256       // encoding: [0xe2,0x7f,0xe0,0x28]
+// CHECK: ldp      w9, w10, [sp], #4          // encoding: [0xe9,0xab,0xc0,0x28]
+
+        ldpsw x9, x10, [sp], #4
+        ldpsw x9, x10, [x2], #-256
+        ldpsw x20, x30, [sp], #252
+// CHECK: ldpsw    x9, x10, [sp], #4          // encoding: [0xe9,0xab,0xc0,0x68]
+// CHECK: ldpsw    x9, x10, [x2], #-256       // encoding: [0x49,0x28,0xe0,0x68]
+// CHECK: ldpsw    x20, x30, [sp], #252       // encoding: [0xf4,0xfb,0xdf,0x68]
+
+        ldp x21, x29, [x2], #504
+        ldp x22, x23, [x3], #-512
+        ldp x24, x25, [x4], #8
+// CHECK: ldp      x21, x29, [x2], #504       // encoding: [0x55,0xf4,0xdf,0xa8]
+// CHECK: ldp      x22, x23, [x3], #-512      // encoding: [0x76,0x5c,0xe0,0xa8]
+// CHECK: ldp      x24, x25, [x4], #8         // encoding: [0x98,0xe4,0xc0,0xa8]
+
+        ldp s29, s28, [sp], #252
+        stp s27, s26, [sp], #-256
+        ldp s1, s2, [x3], #44
+// CHECK: ldp      s29, s28, [sp], #252       // encoding: [0xfd,0xf3,0xdf,0x2c]
+// CHECK: stp      s27, s26, [sp], #-256      // encoding: [0xfb,0x6b,0xa0,0x2c]
+// CHECK: ldp      s1, s2, [x3], #44          // encoding: [0x61,0x88,0xc5,0x2c]
+
+        stp d3, d5, [x9], #504
+        stp d7, d11, [x10], #-512
+        ldp d2, d3, [x30], #-8
+// CHECK: stp      d3, d5, [x9], #504         // encoding: [0x23,0x95,0x9f,0x6c]
+// CHECK: stp      d7, d11, [x10], #-512      // encoding: [0x47,0x2d,0xa0,0x6c]
+// CHECK: ldp      d2, d3, [x30], #-8         // encoding: [0xc2,0x8f,0xff,0x6c]
+
+        stp q3, q5, [sp], #0
+        stp q17, q19, [sp], #1008
+        ldp q23, q29, [x1], #-1024
+// CHECK: stp      q3, q5, [sp], #0           // encoding: [0xe3,0x17,0x80,0xac]
+// CHECK: stp      q17, q19, [sp], #1008      // encoding: [0xf1,0xcf,0x9f,0xac]
+// CHECK: ldp      q23, q29, [x1], #-1024     // encoding: [0x37,0x74,0xe0,0xac]
+
+//------------------------------------------------------------------------------
+// Load/store register pair (pre-indexed)
+//------------------------------------------------------------------------------
+        ldp w3, w5, [sp, #0]!
+        stp wzr, w9, [sp, #252]!
+        ldp w2, wzr, [sp, #-256]!
+        ldp w9, w10, [sp, #4]!
+// CHECK: ldp      w3, w5, [sp, #0]!          // encoding: [0xe3,0x17,0xc0,0x29]
+// CHECK: stp      wzr, w9, [sp, #252]!       // encoding: [0xff,0xa7,0x9f,0x29]
+// CHECK: ldp      w2, wzr, [sp, #-256]!      // encoding: [0xe2,0x7f,0xe0,0x29]
+// CHECK: ldp      w9, w10, [sp, #4]!         // encoding: [0xe9,0xab,0xc0,0x29]
+
+        ldpsw x9, x10, [sp, #4]!
+        ldpsw x9, x10, [x2, #-256]!
+        ldpsw x20, x30, [sp, #252]!
+// CHECK: ldpsw    x9, x10, [sp, #4]!         // encoding: [0xe9,0xab,0xc0,0x69]
+// CHECK: ldpsw    x9, x10, [x2, #-256]!      // encoding: [0x49,0x28,0xe0,0x69]
+// CHECK: ldpsw    x20, x30, [sp, #252]!      // encoding: [0xf4,0xfb,0xdf,0x69]
+
+        ldp x21, x29, [x2, #504]!
+        ldp x22, x23, [x3, #-512]!
+        ldp x24, x25, [x4, #8]!
+// CHECK: ldp      x21, x29, [x2, #504]!      // encoding: [0x55,0xf4,0xdf,0xa9]
+// CHECK: ldp      x22, x23, [x3, #-512]!     // encoding: [0x76,0x5c,0xe0,0xa9]
+// CHECK: ldp      x24, x25, [x4, #8]!        // encoding: [0x98,0xe4,0xc0,0xa9]
+
+        ldp s29, s28, [sp, #252]!
+        stp s27, s26, [sp, #-256]!
+        ldp s1, s2, [x3, #44]!
+// CHECK: ldp      s29, s28, [sp, #252]!      // encoding: [0xfd,0xf3,0xdf,0x2d]
+// CHECK: stp      s27, s26, [sp, #-256]!     // encoding: [0xfb,0x6b,0xa0,0x2d]
+// CHECK: ldp      s1, s2, [x3, #44]!         // encoding: [0x61,0x88,0xc5,0x2d]
+
+        stp d3, d5, [x9, #504]!
+        stp d7, d11, [x10, #-512]!
+        ldp d2, d3, [x30, #-8]!
+// CHECK: stp      d3, d5, [x9, #504]!        // encoding: [0x23,0x95,0x9f,0x6d]
+// CHECK: stp      d7, d11, [x10, #-512]!     // encoding: [0x47,0x2d,0xa0,0x6d]
+// CHECK: ldp      d2, d3, [x30, #-8]!        // encoding: [0xc2,0x8f,0xff,0x6d]
+
+        stp q3, q5, [sp, #0]!
+        stp q17, q19, [sp, #1008]!
+        ldp q23, q29, [x1, #-1024]!
+// CHECK: stp      q3, q5, [sp, #0]!          // encoding: [0xe3,0x17,0x80,0xad]
+// CHECK: stp      q17, q19, [sp, #1008]!     // encoding: [0xf1,0xcf,0x9f,0xad]
+// CHECK: ldp      q23, q29, [x1, #-1024]!    // encoding: [0x37,0x74,0xe0,0xad]
+
+//------------------------------------------------------------------------------
+// Load/store non-temporal register pair (offset)
+//------------------------------------------------------------------------------
+
+        ldnp w3, w5, [sp]
+        stnp wzr, w9, [sp, #252]
+        ldnp w2, wzr, [sp, #-256]
+        ldnp w9, w10, [sp, #4]
+// CHECK: ldnp      w3, w5, [sp]               // encoding: [0xe3,0x17,0x40,0x28]
+// CHECK: stnp      wzr, w9, [sp, #252]        // encoding: [0xff,0xa7,0x1f,0x28]
+// CHECK: ldnp      w2, wzr, [sp, #-256]       // encoding: [0xe2,0x7f,0x60,0x28]
+// CHECK: ldnp      w9, w10, [sp, #4]          // encoding: [0xe9,0xab,0x40,0x28]
+
+        ldnp x21, x29, [x2, #504]
+        ldnp x22, x23, [x3, #-512]
+        ldnp x24, x25, [x4, #8]
+// CHECK: ldnp      x21, x29, [x2, #504]       // encoding: [0x55,0xf4,0x5f,0xa8]
+// CHECK: ldnp      x22, x23, [x3, #-512]      // encoding: [0x76,0x5c,0x60,0xa8]
+// CHECK: ldnp      x24, x25, [x4, #8]         // encoding: [0x98,0xe4,0x40,0xa8]
+
+        ldnp s29, s28, [sp, #252]
+        stnp s27, s26, [sp, #-256]
+        ldnp s1, s2, [x3, #44]
+// CHECK: ldnp      s29, s28, [sp, #252]       // encoding: [0xfd,0xf3,0x5f,0x2c]
+// CHECK: stnp      s27, s26, [sp, #-256]      // encoding: [0xfb,0x6b,0x20,0x2c]
+// CHECK: ldnp      s1, s2, [x3, #44]          // encoding: [0x61,0x88,0x45,0x2c]
+
+        stnp d3, d5, [x9, #504]
+        stnp d7, d11, [x10, #-512]
+        ldnp d2, d3, [x30, #-8]
+// CHECK: stnp      d3, d5, [x9, #504]         // encoding: [0x23,0x95,0x1f,0x6c]
+// CHECK: stnp      d7, d11, [x10, #-512]      // encoding: [0x47,0x2d,0x20,0x6c]
+// CHECK: ldnp      d2, d3, [x30, #-8]         // encoding: [0xc2,0x8f,0x7f,0x6c]
+
+        stnp q3, q5, [sp]
+        stnp q17, q19, [sp, #1008]
+        ldnp q23, q29, [x1, #-1024]
+// CHECK: stnp      q3, q5, [sp]               // encoding: [0xe3,0x17,0x00,0xac]
+// CHECK: stnp      q17, q19, [sp, #1008]      // encoding: [0xf1,0xcf,0x1f,0xac]
+// CHECK: ldnp      q23, q29, [x1, #-1024]     // encoding: [0x37,0x74,0x60,0xac]
+
+//------------------------------------------------------------------------------
+// Logical (immediate)
+//------------------------------------------------------------------------------
+        // 32 bit replication-width
+        orr w3, w9, #0xffff0000
+        orr wsp, w10, #0xe00000ff
+        orr w9, w10, #0x000003ff
+// CHECK: orr      w3, w9, #0xffff0000        // encoding: [0x23,0x3d,0x10,0x32]
+// CHECK: orr      wsp, w10, #0xe00000ff      // encoding: [0x5f,0x29,0x03,0x32]
+// CHECK: orr      w9, w10, #0x3ff            // encoding: [0x49,0x25,0x00,0x32]
+
+        // 16 bit replication width
+        and w14, w15, #0x80008000
+        and w12, w13, #0xffc3ffc3
+        and w11, wzr, #0x00030003
+// CHECK: and      w14, w15, #0x80008000      // encoding: [0xee,0x81,0x01,0x12]
+// CHECK: and      w12, w13, #0xffc3ffc3      // encoding: [0xac,0xad,0x0a,0x12]
+// CHECK: and      w11, wzr, #0x30003         // encoding: [0xeb,0x87,0x00,0x12]
+
+        // 8 bit replication width
+        eor w3, w6, #0xe0e0e0e0
+        eor wsp, wzr, #0x03030303
+        eor w16, w17, #0x81818181
+// CHECK: eor      w3, w6, #0xe0e0e0e0        // encoding: [0xc3,0xc8,0x03,0x52]
+// CHECK: eor      wsp, wzr, #0x3030303       // encoding: [0xff,0xc7,0x00,0x52]
+// CHECK: eor      w16, w17, #0x81818181      // encoding: [0x30,0xc6,0x01,0x52]
+
+        // 4 bit replication width
+        ands wzr, w18, #0xcccccccc
+        ands w19, w20, #0x33333333
+        ands w21, w22, #0x99999999
+// CHECK: ands     wzr, w18, #0xcccccccc      // encoding: [0x5f,0xe6,0x02,0x72]
+// CHECK: ands     w19, w20, #0x33333333      // encoding: [0x93,0xe6,0x00,0x72]
+// CHECK: ands     w21, w22, #0x99999999      // encoding: [0xd5,0xe6,0x01,0x72]
+
+        // 2 bit replication width
+        tst w3, #0xaaaaaaaa
+        tst wzr, #0x55555555
+// CHECK: ands     wzr, w3, #0xaaaaaaaa       // encoding: [0x7f,0xf0,0x01,0x72]
+// CHECK: ands     wzr, wzr, #0x55555555      // encoding: [0xff,0xf3,0x00,0x72]
+
+        // 64 bit replication-width
+        eor x3, x5, #0xffffffffc000000
+        and x9, x10, #0x00007fffffffffff
+        orr x11, x12, #0x8000000000000fff
+// CHECK: eor      x3, x5, #0xffffffffc000000 // encoding: [0xa3,0x84,0x66,0xd2]
+// CHECK: and      x9, x10, #0x7fffffffffff   // encoding: [0x49,0xb9,0x40,0x92]
+// CHECK: orr      x11, x12, #0x8000000000000fff // encoding: [0x8b,0x31,0x41,0xb2]
+
+        // 32 bit replication-width
+        orr x3, x9, #0xffff0000ffff0000
+        orr sp, x10, #0xe00000ffe00000ff
+        orr x9, x10, #0x000003ff000003ff
+// CHECK: orr      x3, x9, #0xffff0000ffff0000 // encoding: [0x23,0x3d,0x10,0xb2]
+// CHECK: orr      sp, x10, #0xe00000ffe00000ff // encoding: [0x5f,0x29,0x03,0xb2]
+// CHECK: orr      x9, x10, #0x3ff000003ff    // encoding: [0x49,0x25,0x00,0xb2]
+
+        // 16 bit replication-width
+        and x14, x15, #0x8000800080008000
+        and x12, x13, #0xffc3ffc3ffc3ffc3
+        and x11, xzr, #0x0003000300030003
+// CHECK: and      x14, x15, #0x8000800080008000 // encoding: [0xee,0x81,0x01,0x92]
+// CHECK: and      x12, x13, #0xffc3ffc3ffc3ffc3 // encoding: [0xac,0xad,0x0a,0x92]
+// CHECK: and      x11, xzr, #0x3000300030003 // encoding: [0xeb,0x87,0x00,0x92]
+
+        // 8 bit replication-width
+        eor x3, x6, #0xe0e0e0e0e0e0e0e0
+        eor sp, xzr, #0x0303030303030303
+        eor x16, x17, #0x8181818181818181
+// CHECK: eor      x3, x6, #0xe0e0e0e0e0e0e0e0 // encoding: [0xc3,0xc8,0x03,0xd2]
+// CHECK: eor      sp, xzr, #0x303030303030303 // encoding: [0xff,0xc7,0x00,0xd2]
+// CHECK: eor      x16, x17, #0x8181818181818181 // encoding: [0x30,0xc6,0x01,0xd2]
+
+        // 4 bit replication-width
+        ands xzr, x18, #0xcccccccccccccccc
+        ands x19, x20, #0x3333333333333333
+        ands x21, x22, #0x9999999999999999
+// CHECK: ands     xzr, x18, #0xcccccccccccccccc // encoding: [0x5f,0xe6,0x02,0xf2]
+// CHECK: ands     x19, x20, #0x3333333333333333 // encoding: [0x93,0xe6,0x00,0xf2]
+// CHECK: ands     x21, x22, #0x9999999999999999 // encoding: [0xd5,0xe6,0x01,0xf2]
+
+        // 2 bit replication-width
+        tst x3, #0xaaaaaaaaaaaaaaaa
+        tst xzr, #0x5555555555555555
+// CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa    // encoding: [0x7f,0xf0,0x01,0xf2]
+// CHECK: ands     xzr, xzr, #0x5555555555555555   // encoding: [0xff,0xf3,0x00,0xf2]
+
+        mov w3, #0xf000f
+        mov x10, #0xaaaaaaaaaaaaaaaa
+// CHECK: orr      w3, wzr, #0xf000f          // encoding: [0xe3,0x8f,0x00,0x32]
+// CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa // encoding: [0xea,0xf3,0x01,0xb2]
+
+//------------------------------------------------------------------------------
+// Logical (shifted register)
+//------------------------------------------------------------------------------
+
+        and w12, w23, w21
+        and w16, w15, w1, lsl #1
+        and w9, w4, w10, lsl #31
+        and w3, w30, w11, lsl #0
+        and x3, x5, x7, lsl #63
+// CHECK: and      w12, w23, w21              // encoding: [0xec,0x02,0x15,0x0a]
+// CHECK: and      w16, w15, w1, lsl #1       // encoding: [0xf0,0x05,0x01,0x0a]
+// CHECK: and      w9, w4, w10, lsl #31       // encoding: [0x89,0x7c,0x0a,0x0a]
+// CHECK: and      w3, w30, w11               // encoding: [0xc3,0x03,0x0b,0x0a]
+// CHECK: and      x3, x5, x7, lsl #63        // encoding: [0xa3,0xfc,0x07,0x8a]
+
+        and x5, x14, x19, asr #4
+        and w3, w17, w19, ror #31
+        and w0, w2, wzr, lsr #17
+        and w3, w30, w11, asr #0
+// CHECK: and      x5, x14, x19, asr #4       // encoding: [0xc5,0x11,0x93,0x8a]
+// CHECK: and      w3, w17, w19, ror #31      // encoding: [0x23,0x7e,0xd3,0x0a]
+// CHECK: and      w0, w2, wzr, lsr #17       // encoding: [0x40,0x44,0x5f,0x0a]
+// CHECK: and      w3, w30, w11, asr #0       // encoding: [0xc3,0x03,0x8b,0x0a]
+
+        and xzr, x4, x26, lsl #0
+        and w3, wzr, w20, ror #0
+        and x7, x20, xzr, asr #63
+// CHECK: and      xzr, x4, x26               // encoding: [0x9f,0x00,0x1a,0x8a]
+// CHECK: and      w3, wzr, w20, ror #0       // encoding: [0xe3,0x03,0xd4,0x0a]
+// CHECK: and      x7, x20, xzr, asr #63      // encoding: [0x87,0xfe,0x9f,0x8a]
+
+        bic x13, x20, x14, lsl #47
+        bic w2, w7, w9
+        orr w2, w7, w0, asr #31
+        orr x8, x9, x10, lsl #12
+        orn x3, x5, x7, asr #0
+        orn w2, w5, w29
+// CHECK: bic      x13, x20, x14, lsl #47     // encoding: [0x8d,0xbe,0x2e,0x8a]
+// CHECK: bic      w2, w7, w9                 // encoding: [0xe2,0x00,0x29,0x0a]
+// CHECK: orr      w2, w7, w0, asr #31        // encoding: [0xe2,0x7c,0x80,0x2a]
+// CHECK: orr      x8, x9, x10, lsl #12       // encoding: [0x28,0x31,0x0a,0xaa]
+// CHECK: orn      x3, x5, x7, asr #0         // encoding: [0xa3,0x00,0xa7,0xaa]
+// CHECK: orn      w2, w5, w29                // encoding: [0xa2,0x00,0x3d,0x2a]
+
+        ands w7, wzr, w9, lsl #1
+        ands x3, x5, x20, ror #63
+        bics w3, w5, w7, lsl #0
+        bics x3, xzr, x3, lsl #1
+// CHECK: ands     w7, wzr, w9, lsl #1        // encoding: [0xe7,0x07,0x09,0x6a]
+// CHECK: ands     x3, x5, x20, ror #63       // encoding: [0xa3,0xfc,0xd4,0xea]
+// CHECK: bics     w3, w5, w7                 // encoding: [0xa3,0x00,0x27,0x6a]
+// CHECK: bics     x3, xzr, x3, lsl #1        // encoding: [0xe3,0x07,0x23,0xea]
+
+        tst w3, w7, lsl #31
+        tst x2, x20, asr #0
+// CHECK: tst      w3, w7, lsl #31            // encoding: [0x7f,0x7c,0x07,0x6a]
+// CHECK: tst      x2, x20, asr #0            // encoding: [0x5f,0x00,0x94,0xea]
+
+        mov x3, x6
+        mov x3, xzr
+        mov wzr, w2
+        mov w3, w5
+// CHECK: mov      x3, x6                     // encoding: [0xe3,0x03,0x06,0xaa]
+// CHECK: mov      x3, xzr                    // encoding: [0xe3,0x03,0x1f,0xaa]
+// CHECK: mov      wzr, w2                    // encoding: [0xff,0x03,0x02,0x2a]
+// CHECK: mov      w3, w5                     // encoding: [0xe3,0x03,0x05,0x2a]
+
+//------------------------------------------------------------------------------
+// Move wide (immediate)
+//------------------------------------------------------------------------------
+
+        movz w1, #65535, lsl #0
+        movz w2, #0, lsl #16
+        movn w2, #1234, lsl #0
+// CHECK: movz     w1, #65535                 // encoding: [0xe1,0xff,0x9f,0x52]
+// CHECK: movz     w2, #0, lsl #16            // encoding: [0x02,0x00,0xa0,0x52]
+// CHECK: movn     w2, #1234                  // encoding: [0x42,0x9a,0x80,0x12]
+
+        movz x2, #1234, lsl #32
+        movk xzr, #4321, lsl #48
+// CHECK: movz     x2, #1234, lsl #32         // encoding: [0x42,0x9a,0xc0,0xd2]
+// CHECK: movk     xzr, #4321, lsl #48        // encoding: [0x3f,0x1c,0xe2,0xf2]
+
+        movz x2, #:abs_g0:sym
+        movk w3, #:abs_g0_nc:sym
+// CHECK: movz    x2, #:abs_g0:sym        // encoding: [0x02'A',A,0x80'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_a64_movw_uabs_g0
+// CHECK: movk     w3, #:abs_g0_nc:sym    // encoding: [0x03'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_a64_movw_uabs_g0_nc
+
+        movz x4, #:abs_g1:sym
+        movk w5, #:abs_g1_nc:sym
+// CHECK: movz     x4, #:abs_g1:sym       // encoding: [0x04'A',A,0xa0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_a64_movw_uabs_g1
+// CHECK: movk     w5, #:abs_g1_nc:sym    // encoding: [0x05'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_a64_movw_uabs_g1_nc
+
+        movz x6, #:abs_g2:sym
+        movk x7, #:abs_g2_nc:sym
+// CHECK: movz     x6, #:abs_g2:sym       // encoding: [0x06'A',A,0xc0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_a64_movw_uabs_g2
+// CHECK: movk     x7, #:abs_g2_nc:sym    // encoding: [0x07'A',A,0xc0'A',0xf2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_a64_movw_uabs_g2_nc
+
+        movz x8, #:abs_g3:sym
+        movk x9, #:abs_g3:sym
+// CHECK: movz     x8, #:abs_g3:sym       // encoding: [0x08'A',A,0xe0'A',0xd2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+// CHECK: movk     x9, #:abs_g3:sym       // encoding: [0x09'A',A,0xe0'A',0xf2'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3
+
+        movn x30, #:abs_g0_s:sym
+        movz x19, #:abs_g0_s:sym
+        movn w10, #:abs_g0_s:sym
+        movz w25, #:abs_g0_s:sym
+// CHECK: movn     x30, #:abs_g0_s:sym    // encoding: [0x1e'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movz     x19, #:abs_g0_s:sym    // encoding: [0x13'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movn     w10, #:abs_g0_s:sym    // encoding: [0x0a'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+// CHECK: movz     w25, #:abs_g0_s:sym    // encoding: [0x19'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0
+
+        movn x30, #:abs_g1_s:sym
+        movz x19, #:abs_g1_s:sym
+        movn w10, #:abs_g1_s:sym
+        movz w25, #:abs_g1_s:sym
+// CHECK: movn     x30, #:abs_g1_s:sym    // encoding: [0x1e'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movz     x19, #:abs_g1_s:sym    // encoding: [0x13'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movn     w10, #:abs_g1_s:sym    // encoding: [0x0a'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+// CHECK: movz     w25, #:abs_g1_s:sym    // encoding: [0x19'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1
+
+        movn x30, #:abs_g2_s:sym
+        movz x19, #:abs_g2_s:sym
+// CHECK: movn     x30, #:abs_g2_s:sym    // encoding: [0x1e'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+// CHECK: movz     x19, #:abs_g2_s:sym    // encoding: [0x13'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                            //   fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2
+
+//------------------------------------------------------------------------------
+// PC-relative addressing
+//------------------------------------------------------------------------------
+
+        adr x2, loc
+        adr xzr, loc
+ // CHECK: adr     x2, loc                 // encoding: [0x02'A',A,A,0x10'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+ // CHECK: adr     xzr, loc                // encoding: [0x1f'A',A,A,0x10'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel
+
+        adrp x29, loc
+ // CHECK: adrp    x29, loc                // encoding: [0x1d'A',A,A,0x90'A']
+ // CHECK:                                 //   fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel_page
+
+        adrp x30, #4096
+        adr x20, #0
+        adr x9, #-1
+        adr x5, #1048575
+// CHECK: adrp    x30, #4096              // encoding: [0x1e,0x00,0x00,0xb0]
+// CHECK: adr     x20, #0                 // encoding: [0x14,0x00,0x00,0x10]
+// CHECK: adr     x9, #-1                 // encoding: [0xe9,0xff,0xff,0x70]
+// CHECK: adr     x5, #1048575            // encoding: [0xe5,0xff,0x7f,0x70]
+
+        adr x9, #1048575
+        adr x2, #-1048576
+        adrp x9, #4294963200
+        adrp x20, #-4294967296
+// CHECK: adr     x9, #1048575            // encoding: [0xe9,0xff,0x7f,0x70]
+// CHECK: adr     x2, #-1048576           // encoding: [0x02,0x00,0x80,0x10]
+// CHECK: adrp    x9, #4294963200         // encoding: [0xe9,0xff,0x7f,0xf0]
+// CHECK: adrp    x20, #-4294967296       // encoding: [0x14,0x00,0x80,0x90]
+
+//------------------------------------------------------------------------------
+// System
+//------------------------------------------------------------------------------
+
+        hint #0
+        hint #127
+// CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
+// CHECK: hint    #127                    // encoding: [0xff,0x2f,0x03,0xd5]
+
+        nop
+        yield
+        wfe
+        wfi
+        sev
+        sevl
+// CHECK: nop                             // encoding: [0x1f,0x20,0x03,0xd5]
+// CHECK: yield                           // encoding: [0x3f,0x20,0x03,0xd5]
+// CHECK: wfe                             // encoding: [0x5f,0x20,0x03,0xd5]
+// CHECK: wfi                             // encoding: [0x7f,0x20,0x03,0xd5]
+// CHECK: sev                             // encoding: [0x9f,0x20,0x03,0xd5]
+// CHECK: sevl                            // encoding: [0xbf,0x20,0x03,0xd5]
+
+        clrex
+        clrex #0
+        clrex #7
+        clrex #15
+// CHECK: clrex                           // encoding: [0x5f,0x3f,0x03,0xd5]
+// CHECK: clrex   #0                      // encoding: [0x5f,0x30,0x03,0xd5]
+// CHECK: clrex   #7                      // encoding: [0x5f,0x37,0x03,0xd5]
+// CHECK: clrex                           // encoding: [0x5f,0x3f,0x03,0xd5]
+
+        dsb #0
+        dsb #12
+        dsb #15
+        dsb oshld
+        dsb oshst
+        dsb osh
+        dsb nshld
+        dsb nshst
+        dsb nsh
+        dsb ishld
+        dsb ishst
+        dsb ish
+        dsb ld
+        dsb st
+        dsb sy
+// CHECK: dsb     #0                      // encoding: [0x9f,0x30,0x03,0xd5]
+// CHECK: dsb     #12                     // encoding: [0x9f,0x3c,0x03,0xd5]
+// CHECK: dsb     sy                      // encoding: [0x9f,0x3f,0x03,0xd5]
+// CHECK: dsb     oshld                   // encoding: [0x9f,0x31,0x03,0xd5]
+// CHECK: dsb     oshst                   // encoding: [0x9f,0x32,0x03,0xd5]
+// CHECK: dsb     osh                     // encoding: [0x9f,0x33,0x03,0xd5]
+// CHECK: dsb     nshld                   // encoding: [0x9f,0x35,0x03,0xd5]
+// CHECK: dsb     nshst                   // encoding: [0x9f,0x36,0x03,0xd5]
+// CHECK: dsb     nsh                     // encoding: [0x9f,0x37,0x03,0xd5]
+// CHECK: dsb     ishld                   // encoding: [0x9f,0x39,0x03,0xd5]
+// CHECK: dsb     ishst                   // encoding: [0x9f,0x3a,0x03,0xd5]
+// CHECK: dsb     ish                     // encoding: [0x9f,0x3b,0x03,0xd5]
+// CHECK: dsb     ld                      // encoding: [0x9f,0x3d,0x03,0xd5]
+// CHECK: dsb     st                      // encoding: [0x9f,0x3e,0x03,0xd5]
+// CHECK: dsb     sy                      // encoding: [0x9f,0x3f,0x03,0xd5]
+
+        dmb #0
+        dmb #12
+        dmb #15
+        dmb oshld
+        dmb oshst
+        dmb osh
+        dmb nshld
+        dmb nshst
+        dmb nsh
+        dmb ishld
+        dmb ishst
+        dmb ish
+        dmb ld
+        dmb st
+        dmb sy
+// CHECK: dmb     #0                      // encoding: [0xbf,0x30,0x03,0xd5]
+// CHECK: dmb     #12                     // encoding: [0xbf,0x3c,0x03,0xd5]
+// CHECK: dmb     sy                      // encoding: [0xbf,0x3f,0x03,0xd5]
+// CHECK: dmb     oshld                   // encoding: [0xbf,0x31,0x03,0xd5]
+// CHECK: dmb     oshst                   // encoding: [0xbf,0x32,0x03,0xd5]
+// CHECK: dmb     osh                     // encoding: [0xbf,0x33,0x03,0xd5]
+// CHECK: dmb     nshld                   // encoding: [0xbf,0x35,0x03,0xd5]
+// CHECK: dmb     nshst                   // encoding: [0xbf,0x36,0x03,0xd5]
+// CHECK: dmb     nsh                     // encoding: [0xbf,0x37,0x03,0xd5]
+// CHECK: dmb     ishld                   // encoding: [0xbf,0x39,0x03,0xd5]
+// CHECK: dmb     ishst                   // encoding: [0xbf,0x3a,0x03,0xd5]
+// CHECK: dmb     ish                     // encoding: [0xbf,0x3b,0x03,0xd5]
+// CHECK: dmb     ld                      // encoding: [0xbf,0x3d,0x03,0xd5]
+// CHECK: dmb     st                      // encoding: [0xbf,0x3e,0x03,0xd5]
+// CHECK: dmb     sy                      // encoding: [0xbf,0x3f,0x03,0xd5]
+
+        isb sy
+        isb
+        isb #12
+// CHECK: isb                             // encoding: [0xdf,0x3f,0x03,0xd5]
+// CHECK: isb                             // encoding: [0xdf,0x3f,0x03,0xd5]
+// CHECK: isb     #12                     // encoding: [0xdf,0x3c,0x03,0xd5]
+
+
+        msr spsel, #0
+        msr daifset, #15
+        msr daifclr, #12
+// CHECK: msr     spsel, #0               // encoding: [0xbf,0x40,0x00,0xd5]
+// CHECK: msr     daifset, #15            // encoding: [0xdf,0x4f,0x03,0xd5]
+// CHECK: msr     daifclr, #12            // encoding: [0xff,0x4c,0x03,0xd5]
+
+        sys #7, c5, c9, #7, x5
+        sys #0, c15, c15, #2
+// CHECK: sys     #7, c5, c9, #7, x5      // encoding: [0xe5,0x59,0x0f,0xd5]
+// CHECK: sys     #0, c15, c15, #2, xzr   // encoding: [0x5f,0xff,0x08,0xd5]
+
+        sysl x9, #7, c5, c9, #7
+        sysl x1, #0, c15, c15, #2
+// CHECK: sysl    x9, #7, c5, c9, #7      // encoding: [0xe9,0x59,0x2f,0xd5]
+// CHECK: sysl    x1, #0, c15, c15, #2    // encoding: [0x41,0xff,0x28,0xd5]
+
+        ic ialluis
+        ic iallu
+        ic ivau, x9
+// CHECK:         ic      ialluis                 // encoding: [0x1f,0x71,0x08,0xd5]
+// CHECK:         ic      iallu                   // encoding: [0x1f,0x75,0x08,0xd5]
+// CHECK:         ic      ivau, x9                // encoding: [0x29,0x75,0x0b,0xd5]
+
+        dc zva, x12
+        dc ivac, xzr
+        dc isw, x2
+        dc cvac, x9
+        dc csw, x10
+        dc cvau, x0
+        dc civac, x3
+        dc cisw, x30
+// CHECK:         dc      zva, x12                // encoding: [0x2c,0x74,0x0b,0xd5]
+// CHECK:         dc      ivac, xzr               // encoding: [0x3f,0x76,0x08,0xd5]
+// CHECK:         dc      isw, x2                 // encoding: [0x42,0x76,0x08,0xd5]
+// CHECK:         dc      cvac, x9                // encoding: [0x29,0x7a,0x0b,0xd5]
+// CHECK:         dc      csw, x10                // encoding: [0x4a,0x7a,0x08,0xd5]
+// CHECK:         dc      cvau, x0                // encoding: [0x20,0x7b,0x0b,0xd5]
+// CHECK:         dc      civac, x3               // encoding: [0x23,0x7e,0x0b,0xd5]
+// CHECK:         dc      cisw, x30               // encoding: [0x5e,0x7e,0x08,0xd5]
+
+        at S1E1R, x19
+        at S1E2R, x19
+        at S1E3R, x19
+        at S1E1W, x19
+        at S1E2W, x19
+        at S1E3W, x19
+        at S1E0R, x19
+        at S1E0W, x19
+        at S12E1R, x20
+        at S12E1W, x20
+        at S12E0R, x20
+        at S12E0W, x20
+// CHECK: at      s1e1r, x19              // encoding: [0x13,0x78,0x08,0xd5]
+// CHECK: at      s1e2r, x19              // encoding: [0x13,0x78,0x0c,0xd5]
+// CHECK: at      s1e3r, x19              // encoding: [0x13,0x78,0x0e,0xd5]
+// CHECK: at      s1e1w, x19              // encoding: [0x33,0x78,0x08,0xd5]
+// CHECK: at      s1e2w, x19              // encoding: [0x33,0x78,0x0c,0xd5]
+// CHECK: at      s1e3w, x19              // encoding: [0x33,0x78,0x0e,0xd5]
+// CHECK: at      s1e0r, x19              // encoding: [0x53,0x78,0x08,0xd5]
+// CHECK: at      s1e0w, x19              // encoding: [0x73,0x78,0x08,0xd5]
+// CHECK: at      s12e1r, x20             // encoding: [0x94,0x78,0x0c,0xd5]
+// CHECK: at      s12e1w, x20             // encoding: [0xb4,0x78,0x0c,0xd5]
+// CHECK: at      s12e0r, x20             // encoding: [0xd4,0x78,0x0c,0xd5]
+// CHECK: at      s12e0w, x20             // encoding: [0xf4,0x78,0x0c,0xd5]
+
+        tlbi IPAS2E1IS, x4
+        tlbi IPAS2LE1IS, x9
+        tlbi VMALLE1IS
+        tlbi ALLE2IS
+        tlbi ALLE3IS
+        tlbi VAE1IS, x1
+        tlbi VAE2IS, x2
+        tlbi VAE3IS, x3
+        tlbi ASIDE1IS, x5
+        tlbi VAAE1IS, x9
+        tlbi ALLE1IS
+        tlbi VALE1IS, x10
+        tlbi VALE2IS, x11
+        tlbi VALE3IS, x13
+        tlbi VMALLS12E1IS
+        tlbi VAALE1IS, x14
+        tlbi IPAS2E1, x15
+        tlbi IPAS2LE1, x16
+        tlbi VMALLE1
+        tlbi ALLE2
+        tlbi ALLE3
+        tlbi VAE1, x17
+        tlbi VAE2, x18
+        tlbi VAE3, x19
+        tlbi ASIDE1, x20
+        tlbi VAAE1, x21
+        tlbi ALLE1
+        tlbi VALE1, x22
+        tlbi VALE2, x23
+        tlbi VALE3, x24
+        tlbi VMALLS12E1
+        tlbi VAALE1, x25
+// CHECK: tlbi    ipas2e1is, x4           // encoding: [0x24,0x80,0x0c,0xd5]
+// CHECK: tlbi    ipas2le1is, x9          // encoding: [0xa9,0x80,0x0c,0xd5]
+// CHECK: tlbi    vmalle1is               // encoding: [0x1f,0x83,0x08,0xd5]
+// CHECK: tlbi    alle2is                 // encoding: [0x1f,0x83,0x0c,0xd5]
+// CHECK: tlbi    alle3is                 // encoding: [0x1f,0x83,0x0e,0xd5]
+// CHECK: tlbi    vae1is, x1              // encoding: [0x21,0x83,0x08,0xd5]
+// CHECK: tlbi    vae2is, x2              // encoding: [0x22,0x83,0x0c,0xd5]
+// CHECK: tlbi    vae3is, x3              // encoding: [0x23,0x83,0x0e,0xd5]
+// CHECK: tlbi    aside1is, x5            // encoding: [0x45,0x83,0x08,0xd5]
+// CHECK: tlbi    vaae1is, x9             // encoding: [0x69,0x83,0x08,0xd5]
+// CHECK: tlbi    alle1is                 // encoding: [0x9f,0x83,0x0c,0xd5]
+// CHECK: tlbi    vale1is, x10            // encoding: [0xaa,0x83,0x08,0xd5]
+// CHECK: tlbi    vale2is, x11            // encoding: [0xab,0x83,0x0c,0xd5]
+// CHECK: tlbi    vale3is, x13            // encoding: [0xad,0x83,0x0e,0xd5]
+// CHECK: tlbi    vmalls12e1is            // encoding: [0xdf,0x83,0x0c,0xd5]
+// CHECK: tlbi    vaale1is, x14           // encoding: [0xee,0x83,0x08,0xd5]
+// CHECK: tlbi    ipas2e1, x15            // encoding: [0x2f,0x84,0x0c,0xd5]
+// CHECK: tlbi    ipas2le1, x16           // encoding: [0xb0,0x84,0x0c,0xd5]
+// CHECK: tlbi    vmalle1                 // encoding: [0x1f,0x87,0x08,0xd5]
+// CHECK: tlbi    alle2                   // encoding: [0x1f,0x87,0x0c,0xd5]
+// CHECK: tlbi    alle3                   // encoding: [0x1f,0x87,0x0e,0xd5]
+// CHECK: tlbi    vae1, x17               // encoding: [0x31,0x87,0x08,0xd5]
+// CHECK: tlbi    vae2, x18               // encoding: [0x32,0x87,0x0c,0xd5]
+// CHECK: tlbi    vae3, x19               // encoding: [0x33,0x87,0x0e,0xd5]
+// CHECK: tlbi    aside1, x20             // encoding: [0x54,0x87,0x08,0xd5]
+// CHECK: tlbi    vaae1, x21              // encoding: [0x75,0x87,0x08,0xd5]
+// CHECK: tlbi    alle1                   // encoding: [0x9f,0x87,0x0c,0xd5]
+// CHECK: tlbi    vale1, x22              // encoding: [0xb6,0x87,0x08,0xd5]
+// CHECK: tlbi    vale2, x23              // encoding: [0xb7,0x87,0x0c,0xd5]
+// CHECK: tlbi    vale3, x24              // encoding: [0xb8,0x87,0x0e,0xd5]
+// CHECK: tlbi    vmalls12e1              // encoding: [0xdf,0x87,0x0c,0xd5]
+// CHECK: tlbi    vaale1, x25             // encoding: [0xf9,0x87,0x08,0xd5]
+
+	msr TEECR32_EL1, x12
+	msr OSDTRRX_EL1, x12
+	msr MDCCINT_EL1, x12
+	msr MDSCR_EL1, x12
+	msr OSDTRTX_EL1, x12
+	msr DBGDTR_EL0, x12
+	msr DBGDTRTX_EL0, x12
+	msr OSECCR_EL1, x12
+	msr DBGVCR32_EL2, x12
+	msr DBGBVR0_EL1, x12
+	msr DBGBVR1_EL1, x12
+	msr DBGBVR2_EL1, x12
+	msr DBGBVR3_EL1, x12
+	msr DBGBVR4_EL1, x12
+	msr DBGBVR5_EL1, x12
+	msr DBGBVR6_EL1, x12
+	msr DBGBVR7_EL1, x12
+	msr DBGBVR8_EL1, x12
+	msr DBGBVR9_EL1, x12
+	msr DBGBVR10_EL1, x12
+	msr DBGBVR11_EL1, x12
+	msr DBGBVR12_EL1, x12
+	msr DBGBVR13_EL1, x12
+	msr DBGBVR14_EL1, x12
+	msr DBGBVR15_EL1, x12
+	msr DBGBCR0_EL1, x12
+	msr DBGBCR1_EL1, x12
+	msr DBGBCR2_EL1, x12
+	msr DBGBCR3_EL1, x12
+	msr DBGBCR4_EL1, x12
+	msr DBGBCR5_EL1, x12
+	msr DBGBCR6_EL1, x12
+	msr DBGBCR7_EL1, x12
+	msr DBGBCR8_EL1, x12
+	msr DBGBCR9_EL1, x12
+	msr DBGBCR10_EL1, x12
+	msr DBGBCR11_EL1, x12
+	msr DBGBCR12_EL1, x12
+	msr DBGBCR13_EL1, x12
+	msr DBGBCR14_EL1, x12
+	msr DBGBCR15_EL1, x12
+	msr DBGWVR0_EL1, x12
+	msr DBGWVR1_EL1, x12
+	msr DBGWVR2_EL1, x12
+	msr DBGWVR3_EL1, x12
+	msr DBGWVR4_EL1, x12
+	msr DBGWVR5_EL1, x12
+	msr DBGWVR6_EL1, x12
+	msr DBGWVR7_EL1, x12
+	msr DBGWVR8_EL1, x12
+	msr DBGWVR9_EL1, x12
+	msr DBGWVR10_EL1, x12
+	msr DBGWVR11_EL1, x12
+	msr DBGWVR12_EL1, x12
+	msr DBGWVR13_EL1, x12
+	msr DBGWVR14_EL1, x12
+	msr DBGWVR15_EL1, x12
+	msr DBGWCR0_EL1, x12
+	msr DBGWCR1_EL1, x12
+	msr DBGWCR2_EL1, x12
+	msr DBGWCR3_EL1, x12
+	msr DBGWCR4_EL1, x12
+	msr DBGWCR5_EL1, x12
+	msr DBGWCR6_EL1, x12
+	msr DBGWCR7_EL1, x12
+	msr DBGWCR8_EL1, x12
+	msr DBGWCR9_EL1, x12
+	msr DBGWCR10_EL1, x12
+	msr DBGWCR11_EL1, x12
+	msr DBGWCR12_EL1, x12
+	msr DBGWCR13_EL1, x12
+	msr DBGWCR14_EL1, x12
+	msr DBGWCR15_EL1, x12
+	msr TEEHBR32_EL1, x12
+	msr OSLAR_EL1, x12
+	msr OSDLR_EL1, x12
+	msr DBGPRCR_EL1, x12
+	msr DBGCLAIMSET_EL1, x12
+	msr DBGCLAIMCLR_EL1, x12
+	msr CSSELR_EL1, x12
+	msr VPIDR_EL2, x12
+	msr VMPIDR_EL2, x12
+	msr SCTLR_EL1, x12
+	msr SCTLR_EL2, x12
+	msr SCTLR_EL3, x12
+	msr ACTLR_EL1, x12
+	msr ACTLR_EL2, x12
+	msr ACTLR_EL3, x12
+	msr CPACR_EL1, x12
+	msr HCR_EL2, x12
+	msr SCR_EL3, x12
+	msr MDCR_EL2, x12
+	msr SDER32_EL3, x12
+	msr CPTR_EL2, x12
+	msr CPTR_EL3, x12
+	msr HSTR_EL2, x12
+	msr HACR_EL2, x12
+	msr MDCR_EL3, x12
+	msr TTBR0_EL1, x12
+	msr TTBR0_EL2, x12
+	msr TTBR0_EL3, x12
+	msr TTBR1_EL1, x12
+	msr TCR_EL1, x12
+	msr TCR_EL2, x12
+	msr TCR_EL3, x12
+	msr VTTBR_EL2, x12
+	msr VTCR_EL2, x12
+	msr DACR32_EL2, x12
+	msr SPSR_EL1, x12
+	msr SPSR_EL2, x12
+	msr SPSR_EL3, x12
+	msr ELR_EL1, x12
+	msr ELR_EL2, x12
+	msr ELR_EL3, x12
+	msr SP_EL0, x12
+	msr SP_EL1, x12
+	msr SP_EL2, x12
+	msr SPSel, x12
+	msr NZCV, x12
+	msr DAIF, x12
+	msr CurrentEL, x12
+	msr SPSR_irq, x12
+	msr SPSR_abt, x12
+	msr SPSR_und, x12
+	msr SPSR_fiq, x12
+	msr FPCR, x12
+	msr FPSR, x12
+	msr DSPSR_EL0, x12
+	msr DLR_EL0, x12
+	msr IFSR32_EL2, x12
+	msr AFSR0_EL1, x12
+	msr AFSR0_EL2, x12
+	msr AFSR0_EL3, x12
+	msr AFSR1_EL1, x12
+	msr AFSR1_EL2, x12
+	msr AFSR1_EL3, x12
+	msr ESR_EL1, x12
+	msr ESR_EL2, x12
+	msr ESR_EL3, x12
+	msr FPEXC32_EL2, x12
+	msr FAR_EL1, x12
+	msr FAR_EL2, x12
+	msr FAR_EL3, x12
+	msr HPFAR_EL2, x12
+	msr PAR_EL1, x12
+	msr PMCR_EL0, x12
+	msr PMCNTENSET_EL0, x12
+	msr PMCNTENCLR_EL0, x12
+	msr PMOVSCLR_EL0, x12
+	msr PMSELR_EL0, x12
+	msr PMCCNTR_EL0, x12
+	msr PMXEVTYPER_EL0, x12
+	msr PMXEVCNTR_EL0, x12
+	msr PMUSERENR_EL0, x12
+	msr PMINTENSET_EL1, x12
+	msr PMINTENCLR_EL1, x12
+	msr PMOVSSET_EL0, x12
+	msr MAIR_EL1, x12
+	msr MAIR_EL2, x12
+	msr MAIR_EL3, x12
+	msr AMAIR_EL1, x12
+	msr AMAIR_EL2, x12
+	msr AMAIR_EL3, x12
+	msr VBAR_EL1, x12
+	msr VBAR_EL2, x12
+	msr VBAR_EL3, x12
+	msr RMR_EL1, x12
+	msr RMR_EL2, x12
+	msr RMR_EL3, x12
+	msr CONTEXTIDR_EL1, x12
+	msr TPIDR_EL0, x12
+	msr TPIDR_EL2, x12
+	msr TPIDR_EL3, x12
+	msr TPIDRRO_EL0, x12
+	msr TPIDR_EL1, x12
+	msr CNTFRQ_EL0, x12
+	msr CNTVOFF_EL2, x12
+	msr CNTKCTL_EL1, x12
+	msr CNTHCTL_EL2, x12
+	msr CNTP_TVAL_EL0, x12
+	msr CNTHP_TVAL_EL2, x12
+	msr CNTPS_TVAL_EL1, x12
+	msr CNTP_CTL_EL0, x12
+	msr CNTHP_CTL_EL2, x12
+	msr CNTPS_CTL_EL1, x12
+	msr CNTP_CVAL_EL0, x12
+	msr CNTHP_CVAL_EL2, x12
+	msr CNTPS_CVAL_EL1, x12
+	msr CNTV_TVAL_EL0, x12
+	msr CNTV_CTL_EL0, x12
+	msr CNTV_CVAL_EL0, x12
+	msr PMEVCNTR0_EL0, x12
+	msr PMEVCNTR1_EL0, x12
+	msr PMEVCNTR2_EL0, x12
+	msr PMEVCNTR3_EL0, x12
+	msr PMEVCNTR4_EL0, x12
+	msr PMEVCNTR5_EL0, x12
+	msr PMEVCNTR6_EL0, x12
+	msr PMEVCNTR7_EL0, x12
+	msr PMEVCNTR8_EL0, x12
+	msr PMEVCNTR9_EL0, x12
+	msr PMEVCNTR10_EL0, x12
+	msr PMEVCNTR11_EL0, x12
+	msr PMEVCNTR12_EL0, x12
+	msr PMEVCNTR13_EL0, x12
+	msr PMEVCNTR14_EL0, x12
+	msr PMEVCNTR15_EL0, x12
+	msr PMEVCNTR16_EL0, x12
+	msr PMEVCNTR17_EL0, x12
+	msr PMEVCNTR18_EL0, x12
+	msr PMEVCNTR19_EL0, x12
+	msr PMEVCNTR20_EL0, x12
+	msr PMEVCNTR21_EL0, x12
+	msr PMEVCNTR22_EL0, x12
+	msr PMEVCNTR23_EL0, x12
+	msr PMEVCNTR24_EL0, x12
+	msr PMEVCNTR25_EL0, x12
+	msr PMEVCNTR26_EL0, x12
+	msr PMEVCNTR27_EL0, x12
+	msr PMEVCNTR28_EL0, x12
+	msr PMEVCNTR29_EL0, x12
+	msr PMEVCNTR30_EL0, x12
+	msr PMCCFILTR_EL0, x12
+	msr PMEVTYPER0_EL0, x12
+	msr PMEVTYPER1_EL0, x12
+	msr PMEVTYPER2_EL0, x12
+	msr PMEVTYPER3_EL0, x12
+	msr PMEVTYPER4_EL0, x12
+	msr PMEVTYPER5_EL0, x12
+	msr PMEVTYPER6_EL0, x12
+	msr PMEVTYPER7_EL0, x12
+	msr PMEVTYPER8_EL0, x12
+	msr PMEVTYPER9_EL0, x12
+	msr PMEVTYPER10_EL0, x12
+	msr PMEVTYPER11_EL0, x12
+	msr PMEVTYPER12_EL0, x12
+	msr PMEVTYPER13_EL0, x12
+	msr PMEVTYPER14_EL0, x12
+	msr PMEVTYPER15_EL0, x12
+	msr PMEVTYPER16_EL0, x12
+	msr PMEVTYPER17_EL0, x12
+	msr PMEVTYPER18_EL0, x12
+	msr PMEVTYPER19_EL0, x12
+	msr PMEVTYPER20_EL0, x12
+	msr PMEVTYPER21_EL0, x12
+	msr PMEVTYPER22_EL0, x12
+	msr PMEVTYPER23_EL0, x12
+	msr PMEVTYPER24_EL0, x12
+	msr PMEVTYPER25_EL0, x12
+	msr PMEVTYPER26_EL0, x12
+	msr PMEVTYPER27_EL0, x12
+	msr PMEVTYPER28_EL0, x12
+	msr PMEVTYPER29_EL0, x12
+	msr PMEVTYPER30_EL0, x12
+// CHECK: msr      teecr32_el1, x12           // encoding: [0x0c,0x00,0x12,0xd5]
+// CHECK: msr      osdtrrx_el1, x12           // encoding: [0x4c,0x00,0x10,0xd5]
+// CHECK: msr      mdccint_el1, x12           // encoding: [0x0c,0x02,0x10,0xd5]
+// CHECK: msr      mdscr_el1, x12             // encoding: [0x4c,0x02,0x10,0xd5]
+// CHECK: msr      osdtrtx_el1, x12           // encoding: [0x4c,0x03,0x10,0xd5]
+// CHECK: msr      dbgdtr_el0, x12            // encoding: [0x0c,0x04,0x13,0xd5]
+// CHECK: msr      dbgdtrtx_el0, x12          // encoding: [0x0c,0x05,0x13,0xd5]
+// CHECK: msr      oseccr_el1, x12            // encoding: [0x4c,0x06,0x10,0xd5]
+// CHECK: msr      dbgvcr32_el2, x12          // encoding: [0x0c,0x07,0x14,0xd5]
+// CHECK: msr      dbgbvr0_el1, x12           // encoding: [0x8c,0x00,0x10,0xd5]
+// CHECK: msr      dbgbvr1_el1, x12           // encoding: [0x8c,0x01,0x10,0xd5]
+// CHECK: msr      dbgbvr2_el1, x12           // encoding: [0x8c,0x02,0x10,0xd5]
+// CHECK: msr      dbgbvr3_el1, x12           // encoding: [0x8c,0x03,0x10,0xd5]
+// CHECK: msr      dbgbvr4_el1, x12           // encoding: [0x8c,0x04,0x10,0xd5]
+// CHECK: msr      dbgbvr5_el1, x12           // encoding: [0x8c,0x05,0x10,0xd5]
+// CHECK: msr      dbgbvr6_el1, x12           // encoding: [0x8c,0x06,0x10,0xd5]
+// CHECK: msr      dbgbvr7_el1, x12           // encoding: [0x8c,0x07,0x10,0xd5]
+// CHECK: msr      dbgbvr8_el1, x12           // encoding: [0x8c,0x08,0x10,0xd5]
+// CHECK: msr      dbgbvr9_el1, x12           // encoding: [0x8c,0x09,0x10,0xd5]
+// CHECK: msr      dbgbvr10_el1, x12          // encoding: [0x8c,0x0a,0x10,0xd5]
+// CHECK: msr      dbgbvr11_el1, x12          // encoding: [0x8c,0x0b,0x10,0xd5]
+// CHECK: msr      dbgbvr12_el1, x12          // encoding: [0x8c,0x0c,0x10,0xd5]
+// CHECK: msr      dbgbvr13_el1, x12          // encoding: [0x8c,0x0d,0x10,0xd5]
+// CHECK: msr      dbgbvr14_el1, x12          // encoding: [0x8c,0x0e,0x10,0xd5]
+// CHECK: msr      dbgbvr15_el1, x12          // encoding: [0x8c,0x0f,0x10,0xd5]
+// CHECK: msr      dbgbcr0_el1, x12           // encoding: [0xac,0x00,0x10,0xd5]
+// CHECK: msr      dbgbcr1_el1, x12           // encoding: [0xac,0x01,0x10,0xd5]
+// CHECK: msr      dbgbcr2_el1, x12           // encoding: [0xac,0x02,0x10,0xd5]
+// CHECK: msr      dbgbcr3_el1, x12           // encoding: [0xac,0x03,0x10,0xd5]
+// CHECK: msr      dbgbcr4_el1, x12           // encoding: [0xac,0x04,0x10,0xd5]
+// CHECK: msr      dbgbcr5_el1, x12           // encoding: [0xac,0x05,0x10,0xd5]
+// CHECK: msr      dbgbcr6_el1, x12           // encoding: [0xac,0x06,0x10,0xd5]
+// CHECK: msr      dbgbcr7_el1, x12           // encoding: [0xac,0x07,0x10,0xd5]
+// CHECK: msr      dbgbcr8_el1, x12           // encoding: [0xac,0x08,0x10,0xd5]
+// CHECK: msr      dbgbcr9_el1, x12           // encoding: [0xac,0x09,0x10,0xd5]
+// CHECK: msr      dbgbcr10_el1, x12          // encoding: [0xac,0x0a,0x10,0xd5]
+// CHECK: msr      dbgbcr11_el1, x12          // encoding: [0xac,0x0b,0x10,0xd5]
+// CHECK: msr      dbgbcr12_el1, x12          // encoding: [0xac,0x0c,0x10,0xd5]
+// CHECK: msr      dbgbcr13_el1, x12          // encoding: [0xac,0x0d,0x10,0xd5]
+// CHECK: msr      dbgbcr14_el1, x12          // encoding: [0xac,0x0e,0x10,0xd5]
+// CHECK: msr      dbgbcr15_el1, x12          // encoding: [0xac,0x0f,0x10,0xd5]
+// CHECK: msr      dbgwvr0_el1, x12           // encoding: [0xcc,0x00,0x10,0xd5]
+// CHECK: msr      dbgwvr1_el1, x12           // encoding: [0xcc,0x01,0x10,0xd5]
+// CHECK: msr      dbgwvr2_el1, x12           // encoding: [0xcc,0x02,0x10,0xd5]
+// CHECK: msr      dbgwvr3_el1, x12           // encoding: [0xcc,0x03,0x10,0xd5]
+// CHECK: msr      dbgwvr4_el1, x12           // encoding: [0xcc,0x04,0x10,0xd5]
+// CHECK: msr      dbgwvr5_el1, x12           // encoding: [0xcc,0x05,0x10,0xd5]
+// CHECK: msr      dbgwvr6_el1, x12           // encoding: [0xcc,0x06,0x10,0xd5]
+// CHECK: msr      dbgwvr7_el1, x12           // encoding: [0xcc,0x07,0x10,0xd5]
+// CHECK: msr      dbgwvr8_el1, x12           // encoding: [0xcc,0x08,0x10,0xd5]
+// CHECK: msr      dbgwvr9_el1, x12           // encoding: [0xcc,0x09,0x10,0xd5]
+// CHECK: msr      dbgwvr10_el1, x12          // encoding: [0xcc,0x0a,0x10,0xd5]
+// CHECK: msr      dbgwvr11_el1, x12          // encoding: [0xcc,0x0b,0x10,0xd5]
+// CHECK: msr      dbgwvr12_el1, x12          // encoding: [0xcc,0x0c,0x10,0xd5]
+// CHECK: msr      dbgwvr13_el1, x12          // encoding: [0xcc,0x0d,0x10,0xd5]
+// CHECK: msr      dbgwvr14_el1, x12          // encoding: [0xcc,0x0e,0x10,0xd5]
+// CHECK: msr      dbgwvr15_el1, x12          // encoding: [0xcc,0x0f,0x10,0xd5]
+// CHECK: msr      dbgwcr0_el1, x12           // encoding: [0xec,0x00,0x10,0xd5]
+// CHECK: msr      dbgwcr1_el1, x12           // encoding: [0xec,0x01,0x10,0xd5]
+// CHECK: msr      dbgwcr2_el1, x12           // encoding: [0xec,0x02,0x10,0xd5]
+// CHECK: msr      dbgwcr3_el1, x12           // encoding: [0xec,0x03,0x10,0xd5]
+// CHECK: msr      dbgwcr4_el1, x12           // encoding: [0xec,0x04,0x10,0xd5]
+// CHECK: msr      dbgwcr5_el1, x12           // encoding: [0xec,0x05,0x10,0xd5]
+// CHECK: msr      dbgwcr6_el1, x12           // encoding: [0xec,0x06,0x10,0xd5]
+// CHECK: msr      dbgwcr7_el1, x12           // encoding: [0xec,0x07,0x10,0xd5]
+// CHECK: msr      dbgwcr8_el1, x12           // encoding: [0xec,0x08,0x10,0xd5]
+// CHECK: msr      dbgwcr9_el1, x12           // encoding: [0xec,0x09,0x10,0xd5]
+// CHECK: msr      dbgwcr10_el1, x12          // encoding: [0xec,0x0a,0x10,0xd5]
+// CHECK: msr      dbgwcr11_el1, x12          // encoding: [0xec,0x0b,0x10,0xd5]
+// CHECK: msr      dbgwcr12_el1, x12          // encoding: [0xec,0x0c,0x10,0xd5]
+// CHECK: msr      dbgwcr13_el1, x12          // encoding: [0xec,0x0d,0x10,0xd5]
+// CHECK: msr      dbgwcr14_el1, x12          // encoding: [0xec,0x0e,0x10,0xd5]
+// CHECK: msr      dbgwcr15_el1, x12          // encoding: [0xec,0x0f,0x10,0xd5]
+// CHECK: msr      teehbr32_el1, x12          // encoding: [0x0c,0x10,0x12,0xd5]
+// CHECK: msr      oslar_el1, x12             // encoding: [0x8c,0x10,0x10,0xd5]
+// CHECK: msr      osdlr_el1, x12             // encoding: [0x8c,0x13,0x10,0xd5]
+// CHECK: msr      dbgprcr_el1, x12           // encoding: [0x8c,0x14,0x10,0xd5]
+// CHECK: msr      dbgclaimset_el1, x12       // encoding: [0xcc,0x78,0x10,0xd5]
+// CHECK: msr      dbgclaimclr_el1, x12       // encoding: [0xcc,0x79,0x10,0xd5]
+// CHECK: msr      csselr_el1, x12            // encoding: [0x0c,0x00,0x1a,0xd5]
+// CHECK: msr      vpidr_el2, x12             // encoding: [0x0c,0x00,0x1c,0xd5]
+// CHECK: msr      vmpidr_el2, x12            // encoding: [0xac,0x00,0x1c,0xd5]
+// CHECK: msr      sctlr_el1, x12             // encoding: [0x0c,0x10,0x18,0xd5]
+// CHECK: msr      sctlr_el2, x12             // encoding: [0x0c,0x10,0x1c,0xd5]
+// CHECK: msr      sctlr_el3, x12             // encoding: [0x0c,0x10,0x1e,0xd5]
+// CHECK: msr      actlr_el1, x12             // encoding: [0x2c,0x10,0x18,0xd5]
+// CHECK: msr      actlr_el2, x12             // encoding: [0x2c,0x10,0x1c,0xd5]
+// CHECK: msr      actlr_el3, x12             // encoding: [0x2c,0x10,0x1e,0xd5]
+// CHECK: msr      cpacr_el1, x12             // encoding: [0x4c,0x10,0x18,0xd5]
+// CHECK: msr      hcr_el2, x12               // encoding: [0x0c,0x11,0x1c,0xd5]
+// CHECK: msr      scr_el3, x12               // encoding: [0x0c,0x11,0x1e,0xd5]
+// CHECK: msr      mdcr_el2, x12              // encoding: [0x2c,0x11,0x1c,0xd5]
+// CHECK: msr      sder32_el3, x12            // encoding: [0x2c,0x11,0x1e,0xd5]
+// CHECK: msr      cptr_el2, x12              // encoding: [0x4c,0x11,0x1c,0xd5]
+// CHECK: msr      cptr_el3, x12              // encoding: [0x4c,0x11,0x1e,0xd5]
+// CHECK: msr      hstr_el2, x12              // encoding: [0x6c,0x11,0x1c,0xd5]
+// CHECK: msr      hacr_el2, x12              // encoding: [0xec,0x11,0x1c,0xd5]
+// CHECK: msr      mdcr_el3, x12              // encoding: [0x2c,0x13,0x1e,0xd5]
+// CHECK: msr      ttbr0_el1, x12             // encoding: [0x0c,0x20,0x18,0xd5]
+// CHECK: msr      ttbr0_el2, x12             // encoding: [0x0c,0x20,0x1c,0xd5]
+// CHECK: msr      ttbr0_el3, x12             // encoding: [0x0c,0x20,0x1e,0xd5]
+// CHECK: msr      ttbr1_el1, x12             // encoding: [0x2c,0x20,0x18,0xd5]
+// CHECK: msr      tcr_el1, x12               // encoding: [0x4c,0x20,0x18,0xd5]
+// CHECK: msr      tcr_el2, x12               // encoding: [0x4c,0x20,0x1c,0xd5]
+// CHECK: msr      tcr_el3, x12               // encoding: [0x4c,0x20,0x1e,0xd5]
+// CHECK: msr      vttbr_el2, x12             // encoding: [0x0c,0x21,0x1c,0xd5]
+// CHECK: msr      vtcr_el2, x12              // encoding: [0x4c,0x21,0x1c,0xd5]
+// CHECK: msr      dacr32_el2, x12            // encoding: [0x0c,0x30,0x1c,0xd5]
+// CHECK: msr      spsr_el1, x12              // encoding: [0x0c,0x40,0x18,0xd5]
+// CHECK: msr      spsr_el2, x12              // encoding: [0x0c,0x40,0x1c,0xd5]
+// CHECK: msr      spsr_el3, x12              // encoding: [0x0c,0x40,0x1e,0xd5]
+// CHECK: msr      elr_el1, x12               // encoding: [0x2c,0x40,0x18,0xd5]
+// CHECK: msr      elr_el2, x12               // encoding: [0x2c,0x40,0x1c,0xd5]
+// CHECK: msr      elr_el3, x12               // encoding: [0x2c,0x40,0x1e,0xd5]
+// CHECK: msr      sp_el0, x12                // encoding: [0x0c,0x41,0x18,0xd5]
+// CHECK: msr      sp_el1, x12                // encoding: [0x0c,0x41,0x1c,0xd5]
+// CHECK: msr      sp_el2, x12                // encoding: [0x0c,0x41,0x1e,0xd5]
+// CHECK: msr      spsel, x12                 // encoding: [0x0c,0x42,0x18,0xd5]
+// CHECK: msr      nzcv, x12                  // encoding: [0x0c,0x42,0x1b,0xd5]
+// CHECK: msr      daif, x12                  // encoding: [0x2c,0x42,0x1b,0xd5]
+// CHECK: msr      currentel, x12             // encoding: [0x4c,0x42,0x18,0xd5]
+// CHECK: msr      spsr_irq, x12              // encoding: [0x0c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_abt, x12              // encoding: [0x2c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_und, x12              // encoding: [0x4c,0x43,0x1c,0xd5]
+// CHECK: msr      spsr_fiq, x12              // encoding: [0x6c,0x43,0x1c,0xd5]
+// CHECK: msr      fpcr, x12                  // encoding: [0x0c,0x44,0x1b,0xd5]
+// CHECK: msr      fpsr, x12                  // encoding: [0x2c,0x44,0x1b,0xd5]
+// CHECK: msr      dspsr_el0, x12             // encoding: [0x0c,0x45,0x1b,0xd5]
+// CHECK: msr      dlr_el0, x12               // encoding: [0x2c,0x45,0x1b,0xd5]
+// CHECK: msr      ifsr32_el2, x12            // encoding: [0x2c,0x50,0x1c,0xd5]
+// CHECK: msr      afsr0_el1, x12             // encoding: [0x0c,0x51,0x18,0xd5]
+// CHECK: msr      afsr0_el2, x12             // encoding: [0x0c,0x51,0x1c,0xd5]
+// CHECK: msr      afsr0_el3, x12             // encoding: [0x0c,0x51,0x1e,0xd5]
+// CHECK: msr      afsr1_el1, x12             // encoding: [0x2c,0x51,0x18,0xd5]
+// CHECK: msr      afsr1_el2, x12             // encoding: [0x2c,0x51,0x1c,0xd5]
+// CHECK: msr      afsr1_el3, x12             // encoding: [0x2c,0x51,0x1e,0xd5]
+// CHECK: msr      esr_el1, x12               // encoding: [0x0c,0x52,0x18,0xd5]
+// CHECK: msr      esr_el2, x12               // encoding: [0x0c,0x52,0x1c,0xd5]
+// CHECK: msr      esr_el3, x12               // encoding: [0x0c,0x52,0x1e,0xd5]
+// CHECK: msr      fpexc32_el2, x12           // encoding: [0x0c,0x53,0x1c,0xd5]
+// CHECK: msr      far_el1, x12               // encoding: [0x0c,0x60,0x18,0xd5]
+// CHECK: msr      far_el2, x12               // encoding: [0x0c,0x60,0x1c,0xd5]
+// CHECK: msr      far_el3, x12               // encoding: [0x0c,0x60,0x1e,0xd5]
+// CHECK: msr      hpfar_el2, x12             // encoding: [0x8c,0x60,0x1c,0xd5]
+// CHECK: msr      par_el1, x12               // encoding: [0x0c,0x74,0x18,0xd5]
+// CHECK: msr      pmcr_el0, x12              // encoding: [0x0c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmcntenset_el0, x12        // encoding: [0x2c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmcntenclr_el0, x12        // encoding: [0x4c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmovsclr_el0, x12          // encoding: [0x6c,0x9c,0x1b,0xd5]
+// CHECK: msr      pmselr_el0, x12            // encoding: [0xac,0x9c,0x1b,0xd5]
+// CHECK: msr      pmccntr_el0, x12           // encoding: [0x0c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmxevtyper_el0, x12        // encoding: [0x2c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmxevcntr_el0, x12         // encoding: [0x4c,0x9d,0x1b,0xd5]
+// CHECK: msr      pmuserenr_el0, x12         // encoding: [0x0c,0x9e,0x1b,0xd5]
+// CHECK: msr      pmintenset_el1, x12        // encoding: [0x2c,0x9e,0x18,0xd5]
+// CHECK: msr      pmintenclr_el1, x12        // encoding: [0x4c,0x9e,0x18,0xd5]
+// CHECK: msr      pmovsset_el0, x12          // encoding: [0x6c,0x9e,0x1b,0xd5]
+// CHECK: msr      mair_el1, x12              // encoding: [0x0c,0xa2,0x18,0xd5]
+// CHECK: msr      mair_el2, x12              // encoding: [0x0c,0xa2,0x1c,0xd5]
+// CHECK: msr      mair_el3, x12              // encoding: [0x0c,0xa2,0x1e,0xd5]
+// CHECK: msr      amair_el1, x12             // encoding: [0x0c,0xa3,0x18,0xd5]
+// CHECK: msr      amair_el2, x12             // encoding: [0x0c,0xa3,0x1c,0xd5]
+// CHECK: msr      amair_el3, x12             // encoding: [0x0c,0xa3,0x1e,0xd5]
+// CHECK: msr      vbar_el1, x12              // encoding: [0x0c,0xc0,0x18,0xd5]
+// CHECK: msr      vbar_el2, x12              // encoding: [0x0c,0xc0,0x1c,0xd5]
+// CHECK: msr      vbar_el3, x12              // encoding: [0x0c,0xc0,0x1e,0xd5]
+// CHECK: msr      rmr_el1, x12               // encoding: [0x4c,0xc0,0x18,0xd5]
+// CHECK: msr      rmr_el2, x12               // encoding: [0x4c,0xc0,0x1c,0xd5]
+// CHECK: msr      rmr_el3, x12               // encoding: [0x4c,0xc0,0x1e,0xd5]
+// CHECK: msr      contextidr_el1, x12        // encoding: [0x2c,0xd0,0x18,0xd5]
+// CHECK: msr      tpidr_el0, x12             // encoding: [0x4c,0xd0,0x1b,0xd5]
+// CHECK: msr      tpidr_el2, x12             // encoding: [0x4c,0xd0,0x1c,0xd5]
+// CHECK: msr      tpidr_el3, x12             // encoding: [0x4c,0xd0,0x1e,0xd5]
+// CHECK: msr      tpidrro_el0, x12           // encoding: [0x6c,0xd0,0x1b,0xd5]
+// CHECK: msr      tpidr_el1, x12             // encoding: [0x8c,0xd0,0x18,0xd5]
+// CHECK: msr      cntfrq_el0, x12            // encoding: [0x0c,0xe0,0x1b,0xd5]
+// CHECK: msr      cntvoff_el2, x12           // encoding: [0x6c,0xe0,0x1c,0xd5]
+// CHECK: msr      cntkctl_el1, x12           // encoding: [0x0c,0xe1,0x18,0xd5]
+// CHECK: msr      cnthctl_el2, x12           // encoding: [0x0c,0xe1,0x1c,0xd5]
+// CHECK: msr      cntp_tval_el0, x12         // encoding: [0x0c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_tval_el2, x12        // encoding: [0x0c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_tval_el1, x12        // encoding: [0x0c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntp_ctl_el0, x12          // encoding: [0x2c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_ctl_el2, x12         // encoding: [0x2c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_ctl_el1, x12         // encoding: [0x2c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntp_cval_el0, x12         // encoding: [0x4c,0xe2,0x1b,0xd5]
+// CHECK: msr      cnthp_cval_el2, x12        // encoding: [0x4c,0xe2,0x1c,0xd5]
+// CHECK: msr      cntps_cval_el1, x12        // encoding: [0x4c,0xe2,0x1f,0xd5]
+// CHECK: msr      cntv_tval_el0, x12         // encoding: [0x0c,0xe3,0x1b,0xd5]
+// CHECK: msr      cntv_ctl_el0, x12          // encoding: [0x2c,0xe3,0x1b,0xd5]
+// CHECK: msr      cntv_cval_el0, x12         // encoding: [0x4c,0xe3,0x1b,0xd5]
+// CHECK: msr      pmevcntr0_el0, x12         // encoding: [0x0c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr1_el0, x12         // encoding: [0x2c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr2_el0, x12         // encoding: [0x4c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr3_el0, x12         // encoding: [0x6c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr4_el0, x12         // encoding: [0x8c,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr5_el0, x12         // encoding: [0xac,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr6_el0, x12         // encoding: [0xcc,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr7_el0, x12         // encoding: [0xec,0xe8,0x1b,0xd5]
+// CHECK: msr      pmevcntr8_el0, x12         // encoding: [0x0c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr9_el0, x12         // encoding: [0x2c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr10_el0, x12        // encoding: [0x4c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr11_el0, x12        // encoding: [0x6c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr12_el0, x12        // encoding: [0x8c,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr13_el0, x12        // encoding: [0xac,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr14_el0, x12        // encoding: [0xcc,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr15_el0, x12        // encoding: [0xec,0xe9,0x1b,0xd5]
+// CHECK: msr      pmevcntr16_el0, x12        // encoding: [0x0c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr17_el0, x12        // encoding: [0x2c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr18_el0, x12        // encoding: [0x4c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr19_el0, x12        // encoding: [0x6c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr20_el0, x12        // encoding: [0x8c,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr21_el0, x12        // encoding: [0xac,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr22_el0, x12        // encoding: [0xcc,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr23_el0, x12        // encoding: [0xec,0xea,0x1b,0xd5]
+// CHECK: msr      pmevcntr24_el0, x12        // encoding: [0x0c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr25_el0, x12        // encoding: [0x2c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr26_el0, x12        // encoding: [0x4c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr27_el0, x12        // encoding: [0x6c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr28_el0, x12        // encoding: [0x8c,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr29_el0, x12        // encoding: [0xac,0xeb,0x1b,0xd5]
+// CHECK: msr      pmevcntr30_el0, x12        // encoding: [0xcc,0xeb,0x1b,0xd5]
+// CHECK: msr      pmccfiltr_el0, x12         // encoding: [0xec,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper0_el0, x12        // encoding: [0x0c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper1_el0, x12        // encoding: [0x2c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper2_el0, x12        // encoding: [0x4c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper3_el0, x12        // encoding: [0x6c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper4_el0, x12        // encoding: [0x8c,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper5_el0, x12        // encoding: [0xac,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper6_el0, x12        // encoding: [0xcc,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper7_el0, x12        // encoding: [0xec,0xec,0x1b,0xd5]
+// CHECK: msr      pmevtyper8_el0, x12        // encoding: [0x0c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper9_el0, x12        // encoding: [0x2c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper10_el0, x12       // encoding: [0x4c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper11_el0, x12       // encoding: [0x6c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper12_el0, x12       // encoding: [0x8c,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper13_el0, x12       // encoding: [0xac,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper14_el0, x12       // encoding: [0xcc,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper15_el0, x12       // encoding: [0xec,0xed,0x1b,0xd5]
+// CHECK: msr      pmevtyper16_el0, x12       // encoding: [0x0c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper17_el0, x12       // encoding: [0x2c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper18_el0, x12       // encoding: [0x4c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper19_el0, x12       // encoding: [0x6c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper20_el0, x12       // encoding: [0x8c,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper21_el0, x12       // encoding: [0xac,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper22_el0, x12       // encoding: [0xcc,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper23_el0, x12       // encoding: [0xec,0xee,0x1b,0xd5]
+// CHECK: msr      pmevtyper24_el0, x12       // encoding: [0x0c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper25_el0, x12       // encoding: [0x2c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper26_el0, x12       // encoding: [0x4c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper27_el0, x12       // encoding: [0x6c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper28_el0, x12       // encoding: [0x8c,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper29_el0, x12       // encoding: [0xac,0xef,0x1b,0xd5]
+// CHECK: msr      pmevtyper30_el0, x12       // encoding: [0xcc,0xef,0x1b,0xd5]
+
+	mrs x9, TEECR32_EL1
+	mrs x9, OSDTRRX_EL1
+	mrs x9, MDCCSR_EL0
+	mrs x9, MDCCINT_EL1
+	mrs x9, MDSCR_EL1
+	mrs x9, OSDTRTX_EL1
+	mrs x9, DBGDTR_EL0
+	mrs x9, DBGDTRRX_EL0
+	mrs x9, OSECCR_EL1
+	mrs x9, DBGVCR32_EL2
+	mrs x9, DBGBVR0_EL1
+	mrs x9, DBGBVR1_EL1
+	mrs x9, DBGBVR2_EL1
+	mrs x9, DBGBVR3_EL1
+	mrs x9, DBGBVR4_EL1
+	mrs x9, DBGBVR5_EL1
+	mrs x9, DBGBVR6_EL1
+	mrs x9, DBGBVR7_EL1
+	mrs x9, DBGBVR8_EL1
+	mrs x9, DBGBVR9_EL1
+	mrs x9, DBGBVR10_EL1
+	mrs x9, DBGBVR11_EL1
+	mrs x9, DBGBVR12_EL1
+	mrs x9, DBGBVR13_EL1
+	mrs x9, DBGBVR14_EL1
+	mrs x9, DBGBVR15_EL1
+	mrs x9, DBGBCR0_EL1
+	mrs x9, DBGBCR1_EL1
+	mrs x9, DBGBCR2_EL1
+	mrs x9, DBGBCR3_EL1
+	mrs x9, DBGBCR4_EL1
+	mrs x9, DBGBCR5_EL1
+	mrs x9, DBGBCR6_EL1
+	mrs x9, DBGBCR7_EL1
+	mrs x9, DBGBCR8_EL1
+	mrs x9, DBGBCR9_EL1
+	mrs x9, DBGBCR10_EL1
+	mrs x9, DBGBCR11_EL1
+	mrs x9, DBGBCR12_EL1
+	mrs x9, DBGBCR13_EL1
+	mrs x9, DBGBCR14_EL1
+	mrs x9, DBGBCR15_EL1
+	mrs x9, DBGWVR0_EL1
+	mrs x9, DBGWVR1_EL1
+	mrs x9, DBGWVR2_EL1
+	mrs x9, DBGWVR3_EL1
+	mrs x9, DBGWVR4_EL1
+	mrs x9, DBGWVR5_EL1
+	mrs x9, DBGWVR6_EL1
+	mrs x9, DBGWVR7_EL1
+	mrs x9, DBGWVR8_EL1
+	mrs x9, DBGWVR9_EL1
+	mrs x9, DBGWVR10_EL1
+	mrs x9, DBGWVR11_EL1
+	mrs x9, DBGWVR12_EL1
+	mrs x9, DBGWVR13_EL1
+	mrs x9, DBGWVR14_EL1
+	mrs x9, DBGWVR15_EL1
+	mrs x9, DBGWCR0_EL1
+	mrs x9, DBGWCR1_EL1
+	mrs x9, DBGWCR2_EL1
+	mrs x9, DBGWCR3_EL1
+	mrs x9, DBGWCR4_EL1
+	mrs x9, DBGWCR5_EL1
+	mrs x9, DBGWCR6_EL1
+	mrs x9, DBGWCR7_EL1
+	mrs x9, DBGWCR8_EL1
+	mrs x9, DBGWCR9_EL1
+	mrs x9, DBGWCR10_EL1
+	mrs x9, DBGWCR11_EL1
+	mrs x9, DBGWCR12_EL1
+	mrs x9, DBGWCR13_EL1
+	mrs x9, DBGWCR14_EL1
+	mrs x9, DBGWCR15_EL1
+	mrs x9, MDRAR_EL1
+	mrs x9, TEEHBR32_EL1
+	mrs x9, OSLSR_EL1
+	mrs x9, OSDLR_EL1
+	mrs x9, DBGPRCR_EL1
+	mrs x9, DBGCLAIMSET_EL1
+	mrs x9, DBGCLAIMCLR_EL1
+	mrs x9, DBGAUTHSTATUS_EL1
+	mrs x9, MIDR_EL1
+	mrs x9, CCSIDR_EL1
+	mrs x9, CSSELR_EL1
+	mrs x9, VPIDR_EL2
+	mrs x9, CLIDR_EL1
+	mrs x9, CTR_EL0
+	mrs x9, MPIDR_EL1
+	mrs x9, VMPIDR_EL2
+	mrs x9, REVIDR_EL1
+	mrs x9, AIDR_EL1
+	mrs x9, DCZID_EL0
+	mrs x9, ID_PFR0_EL1
+	mrs x9, ID_PFR1_EL1
+	mrs x9, ID_DFR0_EL1
+	mrs x9, ID_AFR0_EL1
+	mrs x9, ID_MMFR0_EL1
+	mrs x9, ID_MMFR1_EL1
+	mrs x9, ID_MMFR2_EL1
+	mrs x9, ID_MMFR3_EL1
+	mrs x9, ID_ISAR0_EL1
+	mrs x9, ID_ISAR1_EL1
+	mrs x9, ID_ISAR2_EL1
+	mrs x9, ID_ISAR3_EL1
+	mrs x9, ID_ISAR4_EL1
+	mrs x9, ID_ISAR5_EL1
+	mrs x9, MVFR0_EL1
+	mrs x9, MVFR1_EL1
+	mrs x9, MVFR2_EL1
+	mrs x9, ID_AA64PFR0_EL1
+	mrs x9, ID_AA64PFR1_EL1
+	mrs x9, ID_AA64DFR0_EL1
+	mrs x9, ID_AA64DFR1_EL1
+	mrs x9, ID_AA64AFR0_EL1
+	mrs x9, ID_AA64AFR1_EL1
+	mrs x9, ID_AA64ISAR0_EL1
+	mrs x9, ID_AA64ISAR1_EL1
+	mrs x9, ID_AA64MMFR0_EL1
+	mrs x9, ID_AA64MMFR1_EL1
+	mrs x9, SCTLR_EL1
+	mrs x9, SCTLR_EL2
+	mrs x9, SCTLR_EL3
+	mrs x9, ACTLR_EL1
+	mrs x9, ACTLR_EL2
+	mrs x9, ACTLR_EL3
+	mrs x9, CPACR_EL1
+	mrs x9, HCR_EL2
+	mrs x9, SCR_EL3
+	mrs x9, MDCR_EL2
+	mrs x9, SDER32_EL3
+	mrs x9, CPTR_EL2
+	mrs x9, CPTR_EL3
+	mrs x9, HSTR_EL2
+	mrs x9, HACR_EL2
+	mrs x9, MDCR_EL3
+	mrs x9, TTBR0_EL1
+	mrs x9, TTBR0_EL2
+	mrs x9, TTBR0_EL3
+	mrs x9, TTBR1_EL1
+	mrs x9, TCR_EL1
+	mrs x9, TCR_EL2
+	mrs x9, TCR_EL3
+	mrs x9, VTTBR_EL2
+	mrs x9, VTCR_EL2
+	mrs x9, DACR32_EL2
+	mrs x9, SPSR_EL1
+	mrs x9, SPSR_EL2
+	mrs x9, SPSR_EL3
+	mrs x9, ELR_EL1
+	mrs x9, ELR_EL2
+	mrs x9, ELR_EL3
+	mrs x9, SP_EL0
+	mrs x9, SP_EL1
+	mrs x9, SP_EL2
+	mrs x9, SPSel
+	mrs x9, NZCV
+	mrs x9, DAIF
+	mrs x9, CurrentEL
+	mrs x9, SPSR_irq
+	mrs x9, SPSR_abt
+	mrs x9, SPSR_und
+	mrs x9, SPSR_fiq
+	mrs x9, FPCR
+	mrs x9, FPSR
+	mrs x9, DSPSR_EL0
+	mrs x9, DLR_EL0
+	mrs x9, IFSR32_EL2
+	mrs x9, AFSR0_EL1
+	mrs x9, AFSR0_EL2
+	mrs x9, AFSR0_EL3
+	mrs x9, AFSR1_EL1
+	mrs x9, AFSR1_EL2
+	mrs x9, AFSR1_EL3
+	mrs x9, ESR_EL1
+	mrs x9, ESR_EL2
+	mrs x9, ESR_EL3
+	mrs x9, FPEXC32_EL2
+	mrs x9, FAR_EL1
+	mrs x9, FAR_EL2
+	mrs x9, FAR_EL3
+	mrs x9, HPFAR_EL2
+	mrs x9, PAR_EL1
+	mrs x9, PMCR_EL0
+	mrs x9, PMCNTENSET_EL0
+	mrs x9, PMCNTENCLR_EL0
+	mrs x9, PMOVSCLR_EL0
+	mrs x9, PMSELR_EL0
+	mrs x9, PMCEID0_EL0
+	mrs x9, PMCEID1_EL0
+	mrs x9, PMCCNTR_EL0
+	mrs x9, PMXEVTYPER_EL0
+	mrs x9, PMXEVCNTR_EL0
+	mrs x9, PMUSERENR_EL0
+	mrs x9, PMINTENSET_EL1
+	mrs x9, PMINTENCLR_EL1
+	mrs x9, PMOVSSET_EL0
+	mrs x9, MAIR_EL1
+	mrs x9, MAIR_EL2
+	mrs x9, MAIR_EL3
+	mrs x9, AMAIR_EL1
+	mrs x9, AMAIR_EL2
+	mrs x9, AMAIR_EL3
+	mrs x9, VBAR_EL1
+	mrs x9, VBAR_EL2
+	mrs x9, VBAR_EL3
+	mrs x9, RVBAR_EL1
+	mrs x9, RVBAR_EL2
+	mrs x9, RVBAR_EL3
+	mrs x9, RMR_EL1
+	mrs x9, RMR_EL2
+	mrs x9, RMR_EL3
+	mrs x9, ISR_EL1
+	mrs x9, CONTEXTIDR_EL1
+	mrs x9, TPIDR_EL0
+	mrs x9, TPIDR_EL2
+	mrs x9, TPIDR_EL3
+	mrs x9, TPIDRRO_EL0
+	mrs x9, TPIDR_EL1
+	mrs x9, CNTFRQ_EL0
+	mrs x9, CNTPCT_EL0
+	mrs x9, CNTVCT_EL0
+	mrs x9, CNTVOFF_EL2
+	mrs x9, CNTKCTL_EL1
+	mrs x9, CNTHCTL_EL2
+	mrs x9, CNTP_TVAL_EL0
+	mrs x9, CNTHP_TVAL_EL2
+	mrs x9, CNTPS_TVAL_EL1
+	mrs x9, CNTP_CTL_EL0
+	mrs x9, CNTHP_CTL_EL2
+	mrs x9, CNTPS_CTL_EL1
+	mrs x9, CNTP_CVAL_EL0
+	mrs x9, CNTHP_CVAL_EL2
+	mrs x9, CNTPS_CVAL_EL1
+	mrs x9, CNTV_TVAL_EL0
+	mrs x9, CNTV_CTL_EL0
+	mrs x9, CNTV_CVAL_EL0
+	mrs x9, PMEVCNTR0_EL0
+	mrs x9, PMEVCNTR1_EL0
+	mrs x9, PMEVCNTR2_EL0
+	mrs x9, PMEVCNTR3_EL0
+	mrs x9, PMEVCNTR4_EL0
+	mrs x9, PMEVCNTR5_EL0
+	mrs x9, PMEVCNTR6_EL0
+	mrs x9, PMEVCNTR7_EL0
+	mrs x9, PMEVCNTR8_EL0
+	mrs x9, PMEVCNTR9_EL0
+	mrs x9, PMEVCNTR10_EL0
+	mrs x9, PMEVCNTR11_EL0
+	mrs x9, PMEVCNTR12_EL0
+	mrs x9, PMEVCNTR13_EL0
+	mrs x9, PMEVCNTR14_EL0
+	mrs x9, PMEVCNTR15_EL0
+	mrs x9, PMEVCNTR16_EL0
+	mrs x9, PMEVCNTR17_EL0
+	mrs x9, PMEVCNTR18_EL0
+	mrs x9, PMEVCNTR19_EL0
+	mrs x9, PMEVCNTR20_EL0
+	mrs x9, PMEVCNTR21_EL0
+	mrs x9, PMEVCNTR22_EL0
+	mrs x9, PMEVCNTR23_EL0
+	mrs x9, PMEVCNTR24_EL0
+	mrs x9, PMEVCNTR25_EL0
+	mrs x9, PMEVCNTR26_EL0
+	mrs x9, PMEVCNTR27_EL0
+	mrs x9, PMEVCNTR28_EL0
+	mrs x9, PMEVCNTR29_EL0
+	mrs x9, PMEVCNTR30_EL0
+	mrs x9, PMCCFILTR_EL0
+	mrs x9, PMEVTYPER0_EL0
+	mrs x9, PMEVTYPER1_EL0
+	mrs x9, PMEVTYPER2_EL0
+	mrs x9, PMEVTYPER3_EL0
+	mrs x9, PMEVTYPER4_EL0
+	mrs x9, PMEVTYPER5_EL0
+	mrs x9, PMEVTYPER6_EL0
+	mrs x9, PMEVTYPER7_EL0
+	mrs x9, PMEVTYPER8_EL0
+	mrs x9, PMEVTYPER9_EL0
+	mrs x9, PMEVTYPER10_EL0
+	mrs x9, PMEVTYPER11_EL0
+	mrs x9, PMEVTYPER12_EL0
+	mrs x9, PMEVTYPER13_EL0
+	mrs x9, PMEVTYPER14_EL0
+	mrs x9, PMEVTYPER15_EL0
+	mrs x9, PMEVTYPER16_EL0
+	mrs x9, PMEVTYPER17_EL0
+	mrs x9, PMEVTYPER18_EL0
+	mrs x9, PMEVTYPER19_EL0
+	mrs x9, PMEVTYPER20_EL0
+	mrs x9, PMEVTYPER21_EL0
+	mrs x9, PMEVTYPER22_EL0
+	mrs x9, PMEVTYPER23_EL0
+	mrs x9, PMEVTYPER24_EL0
+	mrs x9, PMEVTYPER25_EL0
+	mrs x9, PMEVTYPER26_EL0
+	mrs x9, PMEVTYPER27_EL0
+	mrs x9, PMEVTYPER28_EL0
+	mrs x9, PMEVTYPER29_EL0
+	mrs x9, PMEVTYPER30_EL0
+// CHECK: mrs      x9, teecr32_el1            // encoding: [0x09,0x00,0x32,0xd5]
+// CHECK: mrs      x9, osdtrrx_el1            // encoding: [0x49,0x00,0x30,0xd5]
+// CHECK: mrs      x9, mdccsr_el0             // encoding: [0x09,0x01,0x33,0xd5]
+// CHECK: mrs      x9, mdccint_el1            // encoding: [0x09,0x02,0x30,0xd5]
+// CHECK: mrs      x9, mdscr_el1              // encoding: [0x49,0x02,0x30,0xd5]
+// CHECK: mrs      x9, osdtrtx_el1            // encoding: [0x49,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgdtr_el0             // encoding: [0x09,0x04,0x33,0xd5]
+// CHECK: mrs      x9, dbgdtrrx_el0           // encoding: [0x09,0x05,0x33,0xd5]
+// CHECK: mrs      x9, oseccr_el1             // encoding: [0x49,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgvcr32_el2           // encoding: [0x09,0x07,0x34,0xd5]
+// CHECK: mrs      x9, dbgbvr0_el1            // encoding: [0x89,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr1_el1            // encoding: [0x89,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr2_el1            // encoding: [0x89,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr3_el1            // encoding: [0x89,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr4_el1            // encoding: [0x89,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr5_el1            // encoding: [0x89,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr6_el1            // encoding: [0x89,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr7_el1            // encoding: [0x89,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr8_el1            // encoding: [0x89,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr9_el1            // encoding: [0x89,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr10_el1           // encoding: [0x89,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr11_el1           // encoding: [0x89,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr12_el1           // encoding: [0x89,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr13_el1           // encoding: [0x89,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr14_el1           // encoding: [0x89,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgbvr15_el1           // encoding: [0x89,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr0_el1            // encoding: [0xa9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr1_el1            // encoding: [0xa9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr2_el1            // encoding: [0xa9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr3_el1            // encoding: [0xa9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr4_el1            // encoding: [0xa9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr5_el1            // encoding: [0xa9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr6_el1            // encoding: [0xa9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr7_el1            // encoding: [0xa9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr8_el1            // encoding: [0xa9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr9_el1            // encoding: [0xa9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr10_el1           // encoding: [0xa9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr11_el1           // encoding: [0xa9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr12_el1           // encoding: [0xa9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr13_el1           // encoding: [0xa9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr14_el1           // encoding: [0xa9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgbcr15_el1           // encoding: [0xa9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr0_el1            // encoding: [0xc9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr1_el1            // encoding: [0xc9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr2_el1            // encoding: [0xc9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr3_el1            // encoding: [0xc9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr4_el1            // encoding: [0xc9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr5_el1            // encoding: [0xc9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr6_el1            // encoding: [0xc9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr7_el1            // encoding: [0xc9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr8_el1            // encoding: [0xc9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr9_el1            // encoding: [0xc9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr10_el1           // encoding: [0xc9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr11_el1           // encoding: [0xc9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr12_el1           // encoding: [0xc9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr13_el1           // encoding: [0xc9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr14_el1           // encoding: [0xc9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgwvr15_el1           // encoding: [0xc9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr0_el1            // encoding: [0xe9,0x00,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr1_el1            // encoding: [0xe9,0x01,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr2_el1            // encoding: [0xe9,0x02,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr3_el1            // encoding: [0xe9,0x03,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr4_el1            // encoding: [0xe9,0x04,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr5_el1            // encoding: [0xe9,0x05,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr6_el1            // encoding: [0xe9,0x06,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr7_el1            // encoding: [0xe9,0x07,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr8_el1            // encoding: [0xe9,0x08,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr9_el1            // encoding: [0xe9,0x09,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr10_el1           // encoding: [0xe9,0x0a,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr11_el1           // encoding: [0xe9,0x0b,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr12_el1           // encoding: [0xe9,0x0c,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr13_el1           // encoding: [0xe9,0x0d,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr14_el1           // encoding: [0xe9,0x0e,0x30,0xd5]
+// CHECK: mrs      x9, dbgwcr15_el1           // encoding: [0xe9,0x0f,0x30,0xd5]
+// CHECK: mrs      x9, mdrar_el1              // encoding: [0x09,0x10,0x30,0xd5]
+// CHECK: mrs      x9, teehbr32_el1           // encoding: [0x09,0x10,0x32,0xd5]
+// CHECK: mrs      x9, oslsr_el1              // encoding: [0x89,0x11,0x30,0xd5]
+// CHECK: mrs      x9, osdlr_el1              // encoding: [0x89,0x13,0x30,0xd5]
+// CHECK: mrs      x9, dbgprcr_el1            // encoding: [0x89,0x14,0x30,0xd5]
+// CHECK: mrs      x9, dbgclaimset_el1        // encoding: [0xc9,0x78,0x30,0xd5]
+// CHECK: mrs      x9, dbgclaimclr_el1        // encoding: [0xc9,0x79,0x30,0xd5]
+// CHECK: mrs      x9, dbgauthstatus_el1      // encoding: [0xc9,0x7e,0x30,0xd5]
+// CHECK: mrs      x9, midr_el1               // encoding: [0x09,0x00,0x38,0xd5]
+// CHECK: mrs      x9, ccsidr_el1             // encoding: [0x09,0x00,0x39,0xd5]
+// CHECK: mrs      x9, csselr_el1             // encoding: [0x09,0x00,0x3a,0xd5]
+// CHECK: mrs      x9, vpidr_el2              // encoding: [0x09,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, clidr_el1              // encoding: [0x29,0x00,0x39,0xd5]
+// CHECK: mrs      x9, ctr_el0                // encoding: [0x29,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, mpidr_el1              // encoding: [0xa9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, vmpidr_el2             // encoding: [0xa9,0x00,0x3c,0xd5]
+// CHECK: mrs      x9, revidr_el1             // encoding: [0xc9,0x00,0x38,0xd5]
+// CHECK: mrs      x9, aidr_el1               // encoding: [0xe9,0x00,0x39,0xd5]
+// CHECK: mrs      x9, dczid_el0              // encoding: [0xe9,0x00,0x3b,0xd5]
+// CHECK: mrs      x9, id_pfr0_el1            // encoding: [0x09,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_pfr1_el1            // encoding: [0x29,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_dfr0_el1            // encoding: [0x49,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_afr0_el1            // encoding: [0x69,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr0_el1           // encoding: [0x89,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr1_el1           // encoding: [0xa9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr2_el1           // encoding: [0xc9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_mmfr3_el1           // encoding: [0xe9,0x01,0x38,0xd5]
+// CHECK: mrs      x9, id_isar0_el1           // encoding: [0x09,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar1_el1           // encoding: [0x29,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar2_el1           // encoding: [0x49,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar3_el1           // encoding: [0x69,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar4_el1           // encoding: [0x89,0x02,0x38,0xd5]
+// CHECK: mrs      x9, id_isar5_el1           // encoding: [0xa9,0x02,0x38,0xd5]
+// CHECK: mrs      x9, mvfr0_el1              // encoding: [0x09,0x03,0x38,0xd5]
+// CHECK: mrs      x9, mvfr1_el1              // encoding: [0x29,0x03,0x38,0xd5]
+// CHECK: mrs      x9, mvfr2_el1              // encoding: [0x49,0x03,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64pfr0_el1        // encoding: [0x09,0x04,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64pfr1_el1        // encoding: [0x29,0x04,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64dfr0_el1        // encoding: [0x09,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64dfr1_el1        // encoding: [0x29,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64afr0_el1        // encoding: [0x89,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64afr1_el1        // encoding: [0xa9,0x05,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64isar0_el1       // encoding: [0x09,0x06,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64isar1_el1       // encoding: [0x29,0x06,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64mmfr0_el1       // encoding: [0x09,0x07,0x38,0xd5]
+// CHECK: mrs      x9, id_aa64mmfr1_el1       // encoding: [0x29,0x07,0x38,0xd5]
+// CHECK: mrs      x9, sctlr_el1              // encoding: [0x09,0x10,0x38,0xd5]
+// CHECK: mrs      x9, sctlr_el2              // encoding: [0x09,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, sctlr_el3              // encoding: [0x09,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, actlr_el1              // encoding: [0x29,0x10,0x38,0xd5]
+// CHECK: mrs      x9, actlr_el2              // encoding: [0x29,0x10,0x3c,0xd5]
+// CHECK: mrs      x9, actlr_el3              // encoding: [0x29,0x10,0x3e,0xd5]
+// CHECK: mrs      x9, cpacr_el1              // encoding: [0x49,0x10,0x38,0xd5]
+// CHECK: mrs      x9, hcr_el2                // encoding: [0x09,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, scr_el3                // encoding: [0x09,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, mdcr_el2               // encoding: [0x29,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, sder32_el3             // encoding: [0x29,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, cptr_el2               // encoding: [0x49,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, cptr_el3               // encoding: [0x49,0x11,0x3e,0xd5]
+// CHECK: mrs      x9, hstr_el2               // encoding: [0x69,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, hacr_el2               // encoding: [0xe9,0x11,0x3c,0xd5]
+// CHECK: mrs      x9, mdcr_el3               // encoding: [0x29,0x13,0x3e,0xd5]
+// CHECK: mrs      x9, ttbr0_el1              // encoding: [0x09,0x20,0x38,0xd5]
+// CHECK: mrs      x9, ttbr0_el2              // encoding: [0x09,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, ttbr0_el3              // encoding: [0x09,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, ttbr1_el1              // encoding: [0x29,0x20,0x38,0xd5]
+// CHECK: mrs      x9, tcr_el1                // encoding: [0x49,0x20,0x38,0xd5]
+// CHECK: mrs      x9, tcr_el2                // encoding: [0x49,0x20,0x3c,0xd5]
+// CHECK: mrs      x9, tcr_el3                // encoding: [0x49,0x20,0x3e,0xd5]
+// CHECK: mrs      x9, vttbr_el2              // encoding: [0x09,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, vtcr_el2               // encoding: [0x49,0x21,0x3c,0xd5]
+// CHECK: mrs      x9, dacr32_el2             // encoding: [0x09,0x30,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_el1               // encoding: [0x09,0x40,0x38,0xd5]
+// CHECK: mrs      x9, spsr_el2               // encoding: [0x09,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_el3               // encoding: [0x09,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, elr_el1                // encoding: [0x29,0x40,0x38,0xd5]
+// CHECK: mrs      x9, elr_el2                // encoding: [0x29,0x40,0x3c,0xd5]
+// CHECK: mrs      x9, elr_el3                // encoding: [0x29,0x40,0x3e,0xd5]
+// CHECK: mrs      x9, sp_el0                 // encoding: [0x09,0x41,0x38,0xd5]
+// CHECK: mrs      x9, sp_el1                 // encoding: [0x09,0x41,0x3c,0xd5]
+// CHECK: mrs      x9, sp_el2                 // encoding: [0x09,0x41,0x3e,0xd5]
+// CHECK: mrs      x9, spsel                  // encoding: [0x09,0x42,0x38,0xd5]
+// CHECK: mrs      x9, nzcv                   // encoding: [0x09,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, daif                   // encoding: [0x29,0x42,0x3b,0xd5]
+// CHECK: mrs      x9, currentel              // encoding: [0x49,0x42,0x38,0xd5]
+// CHECK: mrs      x9, spsr_irq               // encoding: [0x09,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_abt               // encoding: [0x29,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_und               // encoding: [0x49,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, spsr_fiq               // encoding: [0x69,0x43,0x3c,0xd5]
+// CHECK: mrs      x9, fpcr                   // encoding: [0x09,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, fpsr                   // encoding: [0x29,0x44,0x3b,0xd5]
+// CHECK: mrs      x9, dspsr_el0              // encoding: [0x09,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, dlr_el0                // encoding: [0x29,0x45,0x3b,0xd5]
+// CHECK: mrs      x9, ifsr32_el2             // encoding: [0x29,0x50,0x3c,0xd5]
+// CHECK: mrs      x9, afsr0_el1              // encoding: [0x09,0x51,0x38,0xd5]
+// CHECK: mrs      x9, afsr0_el2              // encoding: [0x09,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, afsr0_el3              // encoding: [0x09,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, afsr1_el1              // encoding: [0x29,0x51,0x38,0xd5]
+// CHECK: mrs      x9, afsr1_el2              // encoding: [0x29,0x51,0x3c,0xd5]
+// CHECK: mrs      x9, afsr1_el3              // encoding: [0x29,0x51,0x3e,0xd5]
+// CHECK: mrs      x9, esr_el1                // encoding: [0x09,0x52,0x38,0xd5]
+// CHECK: mrs      x9, esr_el2                // encoding: [0x09,0x52,0x3c,0xd5]
+// CHECK: mrs      x9, esr_el3                // encoding: [0x09,0x52,0x3e,0xd5]
+// CHECK: mrs      x9, fpexc32_el2            // encoding: [0x09,0x53,0x3c,0xd5]
+// CHECK: mrs      x9, far_el1                // encoding: [0x09,0x60,0x38,0xd5]
+// CHECK: mrs      x9, far_el2                // encoding: [0x09,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, far_el3                // encoding: [0x09,0x60,0x3e,0xd5]
+// CHECK: mrs      x9, hpfar_el2              // encoding: [0x89,0x60,0x3c,0xd5]
+// CHECK: mrs      x9, par_el1                // encoding: [0x09,0x74,0x38,0xd5]
+// CHECK: mrs      x9, pmcr_el0               // encoding: [0x09,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmcntenset_el0         // encoding: [0x29,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmcntenclr_el0         // encoding: [0x49,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmovsclr_el0           // encoding: [0x69,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmselr_el0             // encoding: [0xa9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmceid0_el0            // encoding: [0xc9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmceid1_el0            // encoding: [0xe9,0x9c,0x3b,0xd5]
+// CHECK: mrs      x9, pmccntr_el0            // encoding: [0x09,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmxevtyper_el0         // encoding: [0x29,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmxevcntr_el0          // encoding: [0x49,0x9d,0x3b,0xd5]
+// CHECK: mrs      x9, pmuserenr_el0          // encoding: [0x09,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, pmintenset_el1         // encoding: [0x29,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, pmintenclr_el1         // encoding: [0x49,0x9e,0x38,0xd5]
+// CHECK: mrs      x9, pmovsset_el0           // encoding: [0x69,0x9e,0x3b,0xd5]
+// CHECK: mrs      x9, mair_el1               // encoding: [0x09,0xa2,0x38,0xd5]
+// CHECK: mrs      x9, mair_el2               // encoding: [0x09,0xa2,0x3c,0xd5]
+// CHECK: mrs      x9, mair_el3               // encoding: [0x09,0xa2,0x3e,0xd5]
+// CHECK: mrs      x9, amair_el1              // encoding: [0x09,0xa3,0x38,0xd5]
+// CHECK: mrs      x9, amair_el2              // encoding: [0x09,0xa3,0x3c,0xd5]
+// CHECK: mrs      x9, amair_el3              // encoding: [0x09,0xa3,0x3e,0xd5]
+// CHECK: mrs      x9, vbar_el1               // encoding: [0x09,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, vbar_el2               // encoding: [0x09,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, vbar_el3               // encoding: [0x09,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, rvbar_el1              // encoding: [0x29,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, rvbar_el2              // encoding: [0x29,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, rvbar_el3              // encoding: [0x29,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, rmr_el1                // encoding: [0x49,0xc0,0x38,0xd5]
+// CHECK: mrs      x9, rmr_el2                // encoding: [0x49,0xc0,0x3c,0xd5]
+// CHECK: mrs      x9, rmr_el3                // encoding: [0x49,0xc0,0x3e,0xd5]
+// CHECK: mrs      x9, isr_el1                // encoding: [0x09,0xc1,0x38,0xd5]
+// CHECK: mrs      x9, contextidr_el1         // encoding: [0x29,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, tpidr_el0              // encoding: [0x49,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, tpidr_el2              // encoding: [0x49,0xd0,0x3c,0xd5]
+// CHECK: mrs      x9, tpidr_el3              // encoding: [0x49,0xd0,0x3e,0xd5]
+// CHECK: mrs      x9, tpidrro_el0            // encoding: [0x69,0xd0,0x3b,0xd5]
+// CHECK: mrs      x9, tpidr_el1              // encoding: [0x89,0xd0,0x38,0xd5]
+// CHECK: mrs      x9, cntfrq_el0             // encoding: [0x09,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntpct_el0             // encoding: [0x29,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntvct_el0             // encoding: [0x49,0xe0,0x3b,0xd5]
+// CHECK: mrs      x9, cntvoff_el2            // encoding: [0x69,0xe0,0x3c,0xd5]
+// CHECK: mrs      x9, cntkctl_el1            // encoding: [0x09,0xe1,0x38,0xd5]
+// CHECK: mrs      x9, cnthctl_el2            // encoding: [0x09,0xe1,0x3c,0xd5]
+// CHECK: mrs      x9, cntp_tval_el0          // encoding: [0x09,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_tval_el2         // encoding: [0x09,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_tval_el1         // encoding: [0x09,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntp_ctl_el0           // encoding: [0x29,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_ctl_el2          // encoding: [0x29,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_ctl_el1          // encoding: [0x29,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntp_cval_el0          // encoding: [0x49,0xe2,0x3b,0xd5]
+// CHECK: mrs      x9, cnthp_cval_el2         // encoding: [0x49,0xe2,0x3c,0xd5]
+// CHECK: mrs      x9, cntps_cval_el1         // encoding: [0x49,0xe2,0x3f,0xd5]
+// CHECK: mrs      x9, cntv_tval_el0          // encoding: [0x09,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, cntv_ctl_el0           // encoding: [0x29,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, cntv_cval_el0          // encoding: [0x49,0xe3,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr0_el0          // encoding: [0x09,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr1_el0          // encoding: [0x29,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr2_el0          // encoding: [0x49,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr3_el0          // encoding: [0x69,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr4_el0          // encoding: [0x89,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr5_el0          // encoding: [0xa9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr6_el0          // encoding: [0xc9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr7_el0          // encoding: [0xe9,0xe8,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr8_el0          // encoding: [0x09,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr9_el0          // encoding: [0x29,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr10_el0         // encoding: [0x49,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr11_el0         // encoding: [0x69,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr12_el0         // encoding: [0x89,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr13_el0         // encoding: [0xa9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr14_el0         // encoding: [0xc9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr15_el0         // encoding: [0xe9,0xe9,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr16_el0         // encoding: [0x09,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr17_el0         // encoding: [0x29,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr18_el0         // encoding: [0x49,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr19_el0         // encoding: [0x69,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr20_el0         // encoding: [0x89,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr21_el0         // encoding: [0xa9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr22_el0         // encoding: [0xc9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr23_el0         // encoding: [0xe9,0xea,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr24_el0         // encoding: [0x09,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr25_el0         // encoding: [0x29,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr26_el0         // encoding: [0x49,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr27_el0         // encoding: [0x69,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr28_el0         // encoding: [0x89,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr29_el0         // encoding: [0xa9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmevcntr30_el0         // encoding: [0xc9,0xeb,0x3b,0xd5]
+// CHECK: mrs      x9, pmccfiltr_el0          // encoding: [0xe9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper0_el0         // encoding: [0x09,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper1_el0         // encoding: [0x29,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper2_el0         // encoding: [0x49,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper3_el0         // encoding: [0x69,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper4_el0         // encoding: [0x89,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper5_el0         // encoding: [0xa9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper6_el0         // encoding: [0xc9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper7_el0         // encoding: [0xe9,0xec,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper8_el0         // encoding: [0x09,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper9_el0         // encoding: [0x29,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper10_el0        // encoding: [0x49,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper11_el0        // encoding: [0x69,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper12_el0        // encoding: [0x89,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper13_el0        // encoding: [0xa9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper14_el0        // encoding: [0xc9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper15_el0        // encoding: [0xe9,0xed,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper16_el0        // encoding: [0x09,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper17_el0        // encoding: [0x29,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper18_el0        // encoding: [0x49,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper19_el0        // encoding: [0x69,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper20_el0        // encoding: [0x89,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper21_el0        // encoding: [0xa9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper22_el0        // encoding: [0xc9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper23_el0        // encoding: [0xe9,0xee,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper24_el0        // encoding: [0x09,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper25_el0        // encoding: [0x29,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper26_el0        // encoding: [0x49,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper27_el0        // encoding: [0x69,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper28_el0        // encoding: [0x89,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper29_el0        // encoding: [0xa9,0xef,0x3b,0xd5]
+// CHECK: mrs      x9, pmevtyper30_el0        // encoding: [0xc9,0xef,0x3b,0xd5]
+
+        mrs x12, s3_7_c15_c1_5
+        mrs x13, s3_2_c11_c15_7
+        msr s3_0_c15_c0_0, x12
+        msr s3_7_c11_c13_7, x5
+// CHECK: mrs     x12, s3_7_c15_c1_5      // encoding: [0xac,0xf1,0x3f,0xd5]
+// CHECK: mrs     x13, s3_2_c11_c15_7     // encoding: [0xed,0xbf,0x3a,0xd5]
+// CHECK: msr     s3_0_c15_c0_0, x12      // encoding: [0x0c,0xf0,0x18,0xd5]
+// CHECK: msr     s3_7_c11_c13_7, x5      // encoding: [0xe5,0xbd,0x1f,0xd5]
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        tbz x5, #0, somewhere
+        tbz xzr, #63, elsewhere
+        tbnz x5, #45, nowhere
+// CHECK: tbz     x5, #0, somewhere       // encoding: [0x05'A',A,A,0x36'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_tstbr
+// CHECK: tbz     xzr, #63, elsewhere     // encoding: [0x1f'A',A,0xf8'A',0xb6'A']
+// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_tstbr
+// CHECK: tbnz    x5, #45, nowhere        // encoding: [0x05'A',A,0x68'A',0xb7'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+
+        tbnz w3, #2, there
+        tbnz wzr, #31, nowhere
+        tbz w5, #12, anywhere
+// CHECK: tbnz    w3, #2, there           // encoding: [0x03'A',A,0x10'A',0x37'A']
+// CHECK:                                 //   fixup A - offset: 0, value: there, kind: fixup_a64_tstbr
+// CHECK: tbnz    wzr, #31, nowhere       // encoding: [0x1f'A',A,0xf8'A',0x37'A']
+// CHECK:                                 //   fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr
+// CHECK: tbz     w5, #12, anywhere       // encoding: [0x05'A',A,0x60'A',0x36'A']
+// CHECK:                                 //   fixup A - offset: 0, value: anywhere, kind: fixup_a64_tstbr
+
+//------------------------------------------------------------------------------
+// Unconditional branch (immediate)
+//------------------------------------------------------------------------------
+
+        b somewhere
+        bl elsewhere
+// CHECK: b       somewhere               // encoding: [A,A,A,0x14'A']
+// CHECK:                                 //   fixup A - offset: 0, value: somewhere, kind: fixup_a64_uncondbr
+// CHECK: bl      elsewhere               // encoding: [A,A,A,0x94'A']
+// CHECK:                                 //   fixup A - offset: 0, value: elsewhere, kind: fixup_a64_call
+
+        b #4
+        bl #0
+        b #134217724
+        bl #-134217728
+// CHECK: b       #4                      // encoding: [0x01,0x00,0x00,0x14]
+// CHECK: bl      #0                      // encoding: [0x00,0x00,0x00,0x94]
+// CHECK: b       #134217724              // encoding: [0xff,0xff,0xff,0x15]
+// CHECK: bl      #-134217728             // encoding: [0x00,0x00,0x00,0x96]
+
+//------------------------------------------------------------------------------
+// Unconditional branch (register)
+//------------------------------------------------------------------------------
+
+        br x20
+        blr xzr
+        ret x10
+// CHECK: br       x20                        // encoding: [0x80,0x02,0x1f,0xd6]
+// CHECK: blr      xzr                        // encoding: [0xe0,0x03,0x3f,0xd6]
+// CHECK: ret      x10                        // encoding: [0x40,0x01,0x5f,0xd6]
+
+        ret
+        eret
+        drps
+// CHECK: ret                                 // encoding: [0xc0,0x03,0x5f,0xd6]
+// CHECK: eret                                // encoding: [0xe0,0x03,0x9f,0xd6]
+// CHECK: drps                                // encoding: [0xe0,0x03,0xbf,0xd6]
+
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
new file mode 100644
index 000000000000..190439d8fe48
--- /dev/null
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -0,0 +1,111 @@
+;; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+;; RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+; Also take it on a round-trip through llvm-mc to stretch assembly-parsing's legs:
+;; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | \
+;; RUN:     llvm-mc -arch=aarch64 -filetype=obj -o - | \
+;; RUN:     elf-dump | FileCheck -check-prefix=OBJ %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define void @loadstore() {
+    %val8 = load i8* @var8
+    store volatile i8 %val8, i8* @var8
+
+    %val16 = load i16* @var16
+    store volatile i16 %val16, i16* @var16
+
+    %val32 = load i32* @var32
+    store volatile i32 %val32, i32* @var32
+
+    %val64 = load i64* @var64
+    store volatile i64 %val64, i64* @var64
+
+    ret void
+}
+
+@globaddr = global i64* null
+
+define void @address() {
+    store i64* @var64, i64** @globaddr
+    ret void
+}
+
+; Check we're using EM_AARCH64
+; OBJ: 'e_machine', 0x00
+
+; OBJ: .rela.text
+
+; var8
+; R_AARCH64_ADR_PREL_PG_HI21 against var8
+; OBJ: 'r_sym', 0x0000000f
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST8_ABS_LO12_NC against var8
+; OBJ: 'r_sym', 0x0000000f
+; OBJ-NEXT: 'r_type', 0x00000116
+
+
+; var16
+; R_AARCH64_ADR_PREL_PG_HI21 against var16
+; OBJ: 'r_sym', 0x0000000c
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST16_ABS_LO12_NC against var16
+; OBJ: 'r_sym', 0x0000000c
+; OBJ-NEXT: 'r_type', 0x0000011c
+
+
+; var32
+; R_AARCH64_ADR_PREL_PG_HI21 against var32
+; OBJ: 'r_sym', 0x0000000d
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST32_ABS_LO12_NC against var32
+; OBJ: 'r_sym', 0x0000000d
+; OBJ-NEXT: 'r_type', 0x0000011d
+
+
+; var64
+; R_AARCH64_ADR_PREL_PG_HI21 against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_LDST64_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x0000011e
+
+; This is on the store, so not really important, but it stops the next
+; match working.
+; R_AARCH64_LDST64_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x0000011e
+
+
+; Pure address-calculation against var64
+; R_AARCH64_ADR_PREL_PG_HI21 against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000113
+
+; R_AARCH64_ADD_ABS_LO12_NC against var64
+; OBJ: 'r_sym', 0x0000000e
+; OBJ-NEXT: 'r_type', 0x00000115
+
+
+; Make sure the symbols don't move around, otherwise relocation info
+; will be wrong:
+
+; OBJ: Symbol 12
+; OBJ-NEXT: var16
+
+; OBJ: Symbol 13
+; OBJ-NEXT: var32
+
+; OBJ: Symbol 14
+; OBJ-NEXT: var64
+
+; OBJ: Symbol 15
+; OBJ-NEXT: var8
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
new file mode 100644
index 000000000000..c5aa5b19899e
--- /dev/null
+++ b/test/MC/AArch64/elf-objdump.s
@@ -0,0 +1,5 @@
+// 64 bit little endian
+// RUN: llvm-mc -filetype=obj -arch=aarch64 -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
+
+// We just want to see if llvm-objdump works at all.
+// CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
new file mode 100644
index 000000000000..7fa6e90b5d0d
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        add x2, x3, #:lo12:some_label
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000115
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
new file mode 100644
index 000000000000..283d3b95d0db
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-condbr.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        b.eq somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000118
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
new file mode 100644
index 000000000000..ce9ff49db448
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        ldr x0, some_label
+        ldr w3, some_label
+        ldrsw x9, some_label
+        prfm pldl3keep, some_label
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000111
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
new file mode 100644
index 000000000000..345fc8247d0e
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        ldrb w0, [sp, #:lo12:some_label]
+        ldrh w0, [sp, #:lo12:some_label]
+        ldr w0, [sp, #:lo12:some_label]
+        ldr x0, [sp, #:lo12:some_label]
+        str q0, [sp, #:lo12:some_label]
+
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000116
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011c
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011d
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011e
+
+// OBJ: 'r_offset', 0x0000000000000010
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000012b
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
new file mode 100644
index 000000000000..cb7dc6768e32
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        movz x0, #:abs_g0:some_label
+        movk x0, #:abs_g0_nc:some_label
+
+        movz x3, #:abs_g1:some_label
+        movk x5, #:abs_g1_nc:some_label
+
+        movz x3, #:abs_g2:some_label
+        movk x5, #:abs_g2_nc:some_label
+
+        movz x7, #:abs_g3:some_label
+        movk x11, #:abs_g3:some_label
+
+        movz x13, #:abs_g0_s:some_label
+        movn x17, #:abs_g0_s:some_label
+
+        movz x19, #:abs_g1_s:some_label
+        movn x19, #:abs_g1_s:some_label
+
+        movz x19, #:abs_g2_s:some_label
+        movn x19, #:abs_g2_s:some_label
+// OBJ: .rela.text
+
+// :abs_g0: => R_AARCH64_MOVW_UABS_G0
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000107
+
+// :abs_g0_nc: => R_AARCH64_MOVW_UABS_G0_NC
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000108
+
+// :abs_g1: => R_AARCH64_MOVW_UABS_G1
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000109
+
+// :abs_g1_nc: => R_AARCH64_MOVW_UABS_G1_NC
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010a
+
+// :abs_g2: => R_AARCH64_MOVW_UABS_G2
+// OBJ: 'r_offset', 0x0000000000000010
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010b
+
+// :abs_g2_nc: => R_AARCH64_MOVW_UABS_G2_NC
+// OBJ: 'r_offset', 0x0000000000000014
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010c
+
+// :abs_g3: => R_AARCH64_MOVW_UABS_G3
+// OBJ: 'r_offset', 0x0000000000000018
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010d
+
+// :abs_g3: => R_AARCH64_MOVW_UABS_G3
+// OBJ: 'r_offset', 0x000000000000001c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010d
+
+// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
+// OBJ: 'r_offset', 0x0000000000000020
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010e
+
+// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
+// OBJ: 'r_offset', 0x0000000000000024
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010e
+
+// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
+// OBJ: 'r_offset', 0x0000000000000028
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010f
+
+// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
+// OBJ: 'r_offset', 0x000000000000002c
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000010f
+
+// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
+// OBJ: 'r_offset', 0x0000000000000030
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000110
+
+// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
+// OBJ: 'r_offset', 0x0000000000000034
+// OBJ:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000110
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
new file mode 100644
index 000000000000..39a8ba9402a8
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        adr x2, some_label
+        adrp x5, some_label
+
+        adrp x5, :got:some_label
+        ldr x0, [x5, #:got_lo12:some_label]
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000112
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000113
+
+// OBJ: 'r_offset', 0x0000000000000008
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000137
+
+// OBJ: 'r_offset', 0x000000000000000c
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000138
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
new file mode 100644
index 000000000000..c5e2981a22ef
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        tbz x6, #45, somewhere
+        tbnz w3, #15, somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000117
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x00000117
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
new file mode 100644
index 000000000000..0e97bc66695f
--- /dev/null
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        b somewhere
+        bl somewhere
+// OBJ: .rela.text
+
+// OBJ: 'r_offset', 0x0000000000000000
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011a
+
+// OBJ: 'r_offset', 0x0000000000000004
+// OBJ-NEXT:  'r_sym', 0x00000005
+// OBJ-NEXT: 'r_type', 0x0000011b
+
+// OBJ: .symtab
+// OBJ: Symbol 5
+// OBJ-NEXT: somewhere
+\ No newline at end of file
diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s
new file mode 100644
index 000000000000..e891adbbb375
--- /dev/null
+++ b/test/MC/AArch64/gicv3-regs-diagnostics.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
+
+        // Write-only
+        mrs x10, icc_eoir1_el1
+        mrs x7, icc_eoir0_el1
+        mrs x22, icc_dir_el1
+        mrs x24, icc_sgi1r_el1
+        mrs x8, icc_asgi1r_el1
+        mrs x28, icc_sgi0r_el1
+// CHECK: error: expected readable system register
+// CHECK-NEXT:         mrs x10, icc_eoir1_el1
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x7, icc_eoir0_el1
+// CHECK-NEXT:                 ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x22, icc_dir_el1
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x24, icc_sgi1r_el1
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x8, icc_asgi1r_el1
+// CHECK-NEXT:                 ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x28, icc_sgi0r_el1
+// CHECK-NEXT:                  ^
+
+        // Read-only
+        msr icc_iar1_el1, x16
+        msr icc_iar0_el1, x19
+        msr icc_hppir1_el1, x29
+        msr icc_hppir0_el1, x14
+        msr icc_rpr_el1, x6
+        msr ich_vtr_el2, x8
+        msr ich_eisr_el2, x22
+        msr ich_elsr_el2, x8
+// CHECK: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_iar1_el1, x16
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_iar0_el1, x19
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_hppir1_el1, x29
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_hppir0_el1, x14
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr icc_rpr_el1, x6
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr ich_vtr_el2, x8
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr ich_eisr_el2, x22
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr ich_elsr_el2, x8
+// CHECK-NEXT:             ^
diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s
new file mode 100644
index 000000000000..f7776514da09
--- /dev/null
+++ b/test/MC/AArch64/gicv3-regs.s
@@ -0,0 +1,223 @@
+ // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+
+        mrs x8, icc_iar1_el1
+        mrs x26, icc_iar0_el1
+        mrs x2, icc_hppir1_el1
+        mrs x17, icc_hppir0_el1
+        mrs x29, icc_rpr_el1
+        mrs x4, ich_vtr_el2
+        mrs x24, ich_eisr_el2
+        mrs x9, ich_elsr_el2
+        mrs x24, icc_bpr1_el1
+        mrs x14, icc_bpr0_el1
+        mrs x19, icc_pmr_el1
+        mrs x23, icc_ctlr_el1
+        mrs x20, icc_ctlr_el3
+        mrs x28, icc_sre_el1
+        mrs x25, icc_sre_el2
+        mrs x8, icc_sre_el3
+        mrs x22, icc_igrpen0_el1
+        mrs x5, icc_igrpen1_el1
+        mrs x7, icc_igrpen1_el3
+        mrs x22, icc_seien_el1
+        mrs x4, icc_ap0r0_el1
+        mrs x11, icc_ap0r1_el1
+        mrs x27, icc_ap0r2_el1
+        mrs x21, icc_ap0r3_el1
+        mrs x2, icc_ap1r0_el1
+        mrs x21, icc_ap1r1_el1
+        mrs x10, icc_ap1r2_el1
+        mrs x27, icc_ap1r3_el1
+        mrs x20, ich_ap0r0_el2
+        mrs x21, ich_ap0r1_el2
+        mrs x5, ich_ap0r2_el2
+        mrs x4, ich_ap0r3_el2
+        mrs x15, ich_ap1r0_el2
+        mrs x12, ich_ap1r1_el2
+        mrs x27, ich_ap1r2_el2
+        mrs x20, ich_ap1r3_el2
+        mrs x10, ich_hcr_el2
+        mrs x27, ich_misr_el2
+        mrs x6, ich_vmcr_el2
+        mrs x19, ich_vseir_el2
+        mrs x3, ich_lr0_el2
+        mrs x1, ich_lr1_el2
+        mrs x22, ich_lr2_el2
+        mrs x21, ich_lr3_el2
+        mrs x6, ich_lr4_el2
+        mrs x10, ich_lr5_el2
+        mrs x11, ich_lr6_el2
+        mrs x12, ich_lr7_el2
+        mrs x0, ich_lr8_el2
+        mrs x21, ich_lr9_el2
+        mrs x13, ich_lr10_el2
+        mrs x26, ich_lr11_el2
+        mrs x1, ich_lr12_el2
+        mrs x8, ich_lr13_el2
+        mrs x2, ich_lr14_el2
+        mrs x8, ich_lr15_el2
+// CHECK: mrs      x8, icc_iar1_el1           // encoding: [0x08,0xcc,0x38,0xd5]
+// CHECK: mrs      x26, icc_iar0_el1          // encoding: [0x1a,0xc8,0x38,0xd5]
+// CHECK: mrs      x2, icc_hppir1_el1         // encoding: [0x42,0xcc,0x38,0xd5]
+// CHECK: mrs      x17, icc_hppir0_el1        // encoding: [0x51,0xc8,0x38,0xd5]
+// CHECK: mrs      x29, icc_rpr_el1           // encoding: [0x7d,0xcb,0x38,0xd5]
+// CHECK: mrs      x4, ich_vtr_el2            // encoding: [0x24,0xcb,0x3c,0xd5]
+// CHECK: mrs      x24, ich_eisr_el2          // encoding: [0x78,0xcb,0x3c,0xd5]
+// CHECK: mrs      x9, ich_elsr_el2           // encoding: [0xa9,0xcb,0x3c,0xd5]
+// CHECK: mrs      x24, icc_bpr1_el1          // encoding: [0x78,0xcc,0x38,0xd5]
+// CHECK: mrs      x14, icc_bpr0_el1          // encoding: [0x6e,0xc8,0x38,0xd5]
+// CHECK: mrs      x19, icc_pmr_el1           // encoding: [0x13,0x46,0x38,0xd5]
+// CHECK: mrs      x23, icc_ctlr_el1          // encoding: [0x97,0xcc,0x38,0xd5]
+// CHECK: mrs      x20, icc_ctlr_el3          // encoding: [0x94,0xcc,0x3e,0xd5]
+// CHECK: mrs      x28, icc_sre_el1           // encoding: [0xbc,0xcc,0x38,0xd5]
+// CHECK: mrs      x25, icc_sre_el2           // encoding: [0xb9,0xc9,0x3c,0xd5]
+// CHECK: mrs      x8, icc_sre_el3            // encoding: [0xa8,0xcc,0x3e,0xd5]
+// CHECK: mrs      x22, icc_igrpen0_el1       // encoding: [0xd6,0xcc,0x38,0xd5]
+// CHECK: mrs      x5, icc_igrpen1_el1        // encoding: [0xe5,0xcc,0x38,0xd5]
+// CHECK: mrs      x7, icc_igrpen1_el3        // encoding: [0xe7,0xcc,0x3e,0xd5]
+// CHECK: mrs      x22, icc_seien_el1         // encoding: [0x16,0xcd,0x38,0xd5]
+// CHECK: mrs      x4, icc_ap0r0_el1          // encoding: [0x84,0xc8,0x38,0xd5]
+// CHECK: mrs      x11, icc_ap0r1_el1         // encoding: [0xab,0xc8,0x38,0xd5]
+// CHECK: mrs      x27, icc_ap0r2_el1         // encoding: [0xdb,0xc8,0x38,0xd5]
+// CHECK: mrs      x21, icc_ap0r3_el1         // encoding: [0xf5,0xc8,0x38,0xd5]
+// CHECK: mrs      x2, icc_ap1r0_el1          // encoding: [0x02,0xc9,0x38,0xd5]
+// CHECK: mrs      x21, icc_ap1r1_el1         // encoding: [0x35,0xc9,0x38,0xd5]
+// CHECK: mrs      x10, icc_ap1r2_el1         // encoding: [0x4a,0xc9,0x38,0xd5]
+// CHECK: mrs      x27, icc_ap1r3_el1         // encoding: [0x7b,0xc9,0x38,0xd5]
+// CHECK: mrs      x20, ich_ap0r0_el2         // encoding: [0x14,0xc8,0x3c,0xd5]
+// CHECK: mrs      x21, ich_ap0r1_el2         // encoding: [0x35,0xc8,0x3c,0xd5]
+// CHECK: mrs      x5, ich_ap0r2_el2          // encoding: [0x45,0xc8,0x3c,0xd5]
+// CHECK: mrs      x4, ich_ap0r3_el2          // encoding: [0x64,0xc8,0x3c,0xd5]
+// CHECK: mrs      x15, ich_ap1r0_el2         // encoding: [0x0f,0xc9,0x3c,0xd5]
+// CHECK: mrs      x12, ich_ap1r1_el2         // encoding: [0x2c,0xc9,0x3c,0xd5]
+// CHECK: mrs      x27, ich_ap1r2_el2         // encoding: [0x5b,0xc9,0x3c,0xd5]
+// CHECK: mrs      x20, ich_ap1r3_el2         // encoding: [0x74,0xc9,0x3c,0xd5]
+// CHECK: mrs      x10, ich_hcr_el2           // encoding: [0x0a,0xcb,0x3c,0xd5]
+// CHECK: mrs      x27, ich_misr_el2          // encoding: [0x5b,0xcb,0x3c,0xd5]
+// CHECK: mrs      x6, ich_vmcr_el2           // encoding: [0xe6,0xcb,0x3c,0xd5]
+// CHECK: mrs      x19, ich_vseir_el2         // encoding: [0x93,0xc9,0x3c,0xd5]
+// CHECK: mrs      x3, ich_lr0_el2            // encoding: [0x03,0xcc,0x3c,0xd5]
+// CHECK: mrs      x1, ich_lr1_el2            // encoding: [0x21,0xcc,0x3c,0xd5]
+// CHECK: mrs      x22, ich_lr2_el2           // encoding: [0x56,0xcc,0x3c,0xd5]
+// CHECK: mrs      x21, ich_lr3_el2           // encoding: [0x75,0xcc,0x3c,0xd5]
+// CHECK: mrs      x6, ich_lr4_el2            // encoding: [0x86,0xcc,0x3c,0xd5]
+// CHECK: mrs      x10, ich_lr5_el2           // encoding: [0xaa,0xcc,0x3c,0xd5]
+// CHECK: mrs      x11, ich_lr6_el2           // encoding: [0xcb,0xcc,0x3c,0xd5]
+// CHECK: mrs      x12, ich_lr7_el2           // encoding: [0xec,0xcc,0x3c,0xd5]
+// CHECK: mrs      x0, ich_lr8_el2            // encoding: [0x00,0xcd,0x3c,0xd5]
+// CHECK: mrs      x21, ich_lr9_el2           // encoding: [0x35,0xcd,0x3c,0xd5]
+// CHECK: mrs      x13, ich_lr10_el2          // encoding: [0x4d,0xcd,0x3c,0xd5]
+// CHECK: mrs      x26, ich_lr11_el2          // encoding: [0x7a,0xcd,0x3c,0xd5]
+// CHECK: mrs      x1, ich_lr12_el2           // encoding: [0x81,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, ich_lr13_el2           // encoding: [0xa8,0xcd,0x3c,0xd5]
+// CHECK: mrs      x2, ich_lr14_el2           // encoding: [0xc2,0xcd,0x3c,0xd5]
+// CHECK: mrs      x8, ich_lr15_el2           // encoding: [0xe8,0xcd,0x3c,0xd5]
+
+        msr icc_eoir1_el1, x27
+        msr icc_eoir0_el1, x5
+        msr icc_dir_el1, x13
+        msr icc_sgi1r_el1, x21
+        msr icc_asgi1r_el1, x25
+        msr icc_sgi0r_el1, x28
+        msr icc_bpr1_el1, x7
+        msr icc_bpr0_el1, x9
+        msr icc_pmr_el1, x29
+        msr icc_ctlr_el1, x24
+        msr icc_ctlr_el3, x0
+        msr icc_sre_el1, x2
+        msr icc_sre_el2, x5
+        msr icc_sre_el3, x10
+        msr icc_igrpen0_el1, x22
+        msr icc_igrpen1_el1, x11
+        msr icc_igrpen1_el3, x8
+        msr icc_seien_el1, x4
+        msr icc_ap0r0_el1, x27
+        msr icc_ap0r1_el1, x5
+        msr icc_ap0r2_el1, x20
+        msr icc_ap0r3_el1, x0
+        msr icc_ap1r0_el1, x2
+        msr icc_ap1r1_el1, x29
+        msr icc_ap1r2_el1, x23
+        msr icc_ap1r3_el1, x11
+        msr ich_ap0r0_el2, x2
+        msr ich_ap0r1_el2, x27
+        msr ich_ap0r2_el2, x7
+        msr ich_ap0r3_el2, x1
+        msr ich_ap1r0_el2, x7
+        msr ich_ap1r1_el2, x12
+        msr ich_ap1r2_el2, x14
+        msr ich_ap1r3_el2, x13
+        msr ich_hcr_el2, x1
+        msr ich_misr_el2, x10
+        msr ich_vmcr_el2, x24
+        msr ich_vseir_el2, x29
+        msr ich_lr0_el2, x26
+        msr ich_lr1_el2, x9
+        msr ich_lr2_el2, x18
+        msr ich_lr3_el2, x26
+        msr ich_lr4_el2, x22
+        msr ich_lr5_el2, x26
+        msr ich_lr6_el2, x27
+        msr ich_lr7_el2, x8
+        msr ich_lr8_el2, x17
+        msr ich_lr9_el2, x19
+        msr ich_lr10_el2, x17
+        msr ich_lr11_el2, x5
+        msr ich_lr12_el2, x29
+        msr ich_lr13_el2, x2
+        msr ich_lr14_el2, x13
+        msr ich_lr15_el2, x27
+// CHECK: msr      icc_eoir1_el1, x27         // encoding: [0x3b,0xcc,0x18,0xd5]
+// CHECK: msr      icc_eoir0_el1, x5          // encoding: [0x25,0xc8,0x18,0xd5]
+// CHECK: msr      icc_dir_el1, x13           // encoding: [0x2d,0xcb,0x18,0xd5]
+// CHECK: msr      icc_sgi1r_el1, x21         // encoding: [0xb5,0xcb,0x18,0xd5]
+// CHECK: msr      icc_asgi1r_el1, x25        // encoding: [0xd9,0xcb,0x18,0xd5]
+// CHECK: msr      icc_sgi0r_el1, x28         // encoding: [0xfc,0xcb,0x18,0xd5]
+// CHECK: msr      icc_bpr1_el1, x7           // encoding: [0x67,0xcc,0x18,0xd5]
+// CHECK: msr      icc_bpr0_el1, x9           // encoding: [0x69,0xc8,0x18,0xd5]
+// CHECK: msr      icc_pmr_el1, x29           // encoding: [0x1d,0x46,0x18,0xd5]
+// CHECK: msr      icc_ctlr_el1, x24          // encoding: [0x98,0xcc,0x18,0xd5]
+// CHECK: msr      icc_ctlr_el3, x0           // encoding: [0x80,0xcc,0x1e,0xd5]
+// CHECK: msr      icc_sre_el1, x2            // encoding: [0xa2,0xcc,0x18,0xd5]
+// CHECK: msr      icc_sre_el2, x5            // encoding: [0xa5,0xc9,0x1c,0xd5]
+// CHECK: msr      icc_sre_el3, x10           // encoding: [0xaa,0xcc,0x1e,0xd5]
+// CHECK: msr      icc_igrpen0_el1, x22       // encoding: [0xd6,0xcc,0x18,0xd5]
+// CHECK: msr      icc_igrpen1_el1, x11       // encoding: [0xeb,0xcc,0x18,0xd5]
+// CHECK: msr      icc_igrpen1_el3, x8        // encoding: [0xe8,0xcc,0x1e,0xd5]
+// CHECK: msr      icc_seien_el1, x4          // encoding: [0x04,0xcd,0x18,0xd5]
+// CHECK: msr      icc_ap0r0_el1, x27         // encoding: [0x9b,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap0r1_el1, x5          // encoding: [0xa5,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap0r2_el1, x20         // encoding: [0xd4,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap0r3_el1, x0          // encoding: [0xe0,0xc8,0x18,0xd5]
+// CHECK: msr      icc_ap1r0_el1, x2          // encoding: [0x02,0xc9,0x18,0xd5]
+// CHECK: msr      icc_ap1r1_el1, x29         // encoding: [0x3d,0xc9,0x18,0xd5]
+// CHECK: msr      icc_ap1r2_el1, x23         // encoding: [0x57,0xc9,0x18,0xd5]
+// CHECK: msr      icc_ap1r3_el1, x11         // encoding: [0x6b,0xc9,0x18,0xd5]
+// CHECK: msr      ich_ap0r0_el2, x2          // encoding: [0x02,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap0r1_el2, x27         // encoding: [0x3b,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap0r2_el2, x7          // encoding: [0x47,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap0r3_el2, x1          // encoding: [0x61,0xc8,0x1c,0xd5]
+// CHECK: msr      ich_ap1r0_el2, x7          // encoding: [0x07,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_ap1r1_el2, x12         // encoding: [0x2c,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_ap1r2_el2, x14         // encoding: [0x4e,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_ap1r3_el2, x13         // encoding: [0x6d,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_hcr_el2, x1            // encoding: [0x01,0xcb,0x1c,0xd5]
+// CHECK: msr      ich_misr_el2, x10          // encoding: [0x4a,0xcb,0x1c,0xd5]
+// CHECK: msr      ich_vmcr_el2, x24          // encoding: [0xf8,0xcb,0x1c,0xd5]
+// CHECK: msr      ich_vseir_el2, x29         // encoding: [0x9d,0xc9,0x1c,0xd5]
+// CHECK: msr      ich_lr0_el2, x26           // encoding: [0x1a,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr1_el2, x9            // encoding: [0x29,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr2_el2, x18           // encoding: [0x52,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr3_el2, x26           // encoding: [0x7a,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr4_el2, x22           // encoding: [0x96,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr5_el2, x26           // encoding: [0xba,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr6_el2, x27           // encoding: [0xdb,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr7_el2, x8            // encoding: [0xe8,0xcc,0x1c,0xd5]
+// CHECK: msr      ich_lr8_el2, x17           // encoding: [0x11,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr9_el2, x19           // encoding: [0x33,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr10_el2, x17          // encoding: [0x51,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr11_el2, x5           // encoding: [0x65,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr12_el2, x29          // encoding: [0x9d,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr13_el2, x2           // encoding: [0xa2,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr14_el2, x13          // encoding: [0xcd,0xcd,0x1c,0xd5]
+// CHECK: msr      ich_lr15_el2, x27          // encoding: [0xfb,0xcd,0x1c,0xd5]
diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cc02173c8ed4
--- /dev/null
+++ b/test/MC/AArch64/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+\ No newline at end of file
diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s
new file mode 100644
index 000000000000..3d32c1dfb400
--- /dev/null
+++ b/test/MC/AArch64/mapping-across-sections.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+        .text
+        add w0, w0, w0
+
+// .wibble should *not* inherit .text's mapping symbol. It's a completely different section.
+        .section .wibble
+        add w0, w0, w0
+
+// A setion should be able to start with a $d
+        .section .starts_data
+        .word 42
+
+// Changing back to .text should not emit a redundant $x
+        .text
+        add w0, w0, w0
+
+// With all those constraints, we want:
+//   + .text to have $x at 0 and no others
+//   + .wibble to have $x at 0
+//   + .starts_data to have $d at 0
+
+
+// CHECK: 00000000 .starts_data 00000000 $d
+// CHECK-NEXT: 00000000 .text 00000000 $x
+// CHECK-NEXT: 00000000 .wibble 00000000 $x
+// CHECK-NOT: ${{[adtx]}}
+
diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s
new file mode 100644
index 000000000000..c8bd804fa0e3
--- /dev/null
+++ b/test/MC/AArch64/mapping-within-section.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+    .text
+// $x at 0x0000
+    add w0, w0, w0
+// $d at 0x0004
+    .ascii "012"
+    .byte 1
+    .hword 2
+    .word 4
+    .xword 8
+    .single 4.0
+    .double 8.0
+    .space 10
+    .zero 3
+    .fill 10, 2, 42
+    .org 100, 12
+// $x at 0x0018
+    add x0, x0, x0
+
+// CHECK: 00000004         .text  00000000 $d
+// CHECK-NEXT: 00000000         .text  00000000 $x
+// CHECK-NEXT: 00000064         .text  00000000 $x
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
new file mode 100644
index 000000000000..690fa8c00962
--- /dev/null
+++ b/test/MC/AArch64/tls-relocs.s
@@ -0,0 +1,662 @@
+// RUN: llvm-mc -arch=aarch64 -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -arch=aarch64 -filetype=obj < %s -o %t
+// RUN: elf-dump %t | FileCheck --check-prefix=CHECK-ELF %s
+// RUN: llvm-objdump -r %t | FileCheck --check-prefix=CHECK-ELF-NAMES %s
+
+// CHECK-ELF:  .rela.text
+
+        // TLS local-dynamic forms
+        movz x1, #:dtprel_g2:var
+        movn x2, #:dtprel_g2:var
+        movz x3, #:dtprel_g2:var
+        movn x4, #:dtprel_g2:var
+// CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0x01'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+// CHECK-NEXT: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
+
+// CHECK-ELF: # Relocation 0
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000000)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM:0x[0-9a-f]+]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 1
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000004)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 2
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000008)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+// CHECK-ELF: # Relocation 3
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000000c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
+
+// CHECK-ELF-NAMES: 0 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 4 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 8 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF-NAMES: 12 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+
+        movz x5, #:dtprel_g1:var
+        movn x6, #:dtprel_g1:var
+        movz w7, #:dtprel_g1:var
+        movn w8, #:dtprel_g1:var
+// CHECK: movz    x5, #:dtprel_g1:var     // encoding: [0x05'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movn    x6, #:dtprel_g1:var     // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movz    w7, #:dtprel_g1:var     // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+// CHECK-NEXT: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
+
+// CHECK-ELF: # Relocation 4
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000010)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 5
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000014)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 6
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000018)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+// CHECK-ELF: # Relocation 7
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000001c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
+
+// CHECK-ELF-NAMES: 16 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 20 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 24 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NAMES: 28 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+
+        movk x9, #:dtprel_g1_nc:var
+        movk w10, #:dtprel_g1_nc:var
+// CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0x09'A',A,0xa0'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+// CHECK-NEXT: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
+
+// CHECK-ELF: # Relocation 8
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000020)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+// CHECK-ELF: # Relocation 9
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000024)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+
+// CHECK-ELF-NAMES: 32 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+// CHECK-ELF-NAMES: 36 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+
+        movz x11, #:dtprel_g0:var
+        movn x12, #:dtprel_g0:var
+        movz w13, #:dtprel_g0:var
+        movn w14, #:dtprel_g0:var
+// CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0x0b'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
+// CHECK-NEXT: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
+
+
+// CHECK-ELF: # Relocation 10
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000028)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 11
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000002c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 12
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000030)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+// CHECK-ELF: # Relocation 13
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000034)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
+
+// CHECK-ELF-NAMES: 40 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 44 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 48 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK-ELF-NAMES: 52 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+
+
+        movk x15, #:dtprel_g0_nc:var
+        movk w16, #:dtprel_g0_nc:var
+// CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+// CHECK-NEXT: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
+
+// CHECK-ELF: # Relocation 14
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000038)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+// CHECK-ELF: # Relocation 15
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000003c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+
+// CHECK-ELF-NAMES: 56 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+// CHECK-ELF-NAMES: 60 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+
+        add x17, x18, #:dtprel_hi12:var, lsl #12
+        add w19, w20, #:dtprel_hi12:var, lsl #12
+// CHECK: add     x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+// CHECK-NEXT: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
+
+// CHECK-ELF: # Relocation 16
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000040)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
+// CHECK-ELF: # Relocation 17
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000044)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
+
+// CHECK-ELF-NAMES: 64 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+// CHECK-ELF-NAMES: 68 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+
+
+        add x21, x22, #:dtprel_lo12:var
+        add w23, w24, #:dtprel_lo12:var
+// CHECK: add     x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+// CHECK-NEXT: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
+
+// CHECK-ELF: # Relocation 18
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000048)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+// CHECK-ELF: # Relocation 19
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000004c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+
+// CHECK-ELF-NAMES: 72 R_AARCH64_TLSLD_ADD_DTPREL_LO12
+// CHECK-ELF-NAMES: 76 R_AARCH64_TLSLD_ADD_DTPREL_LO12
+
+        add x25, x26, #:dtprel_lo12_nc:var
+        add w27, w28, #:dtprel_lo12_nc:var
+// CHECK: add     x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+// CHECK-NEXT: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
+
+// CHECK-ELF: # Relocation 20
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000050)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+// CHECK-ELF: # Relocation 21
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000054)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+
+// CHECK-ELF-NAMES: 80 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+// CHECK-ELF-NAMES: 84 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+
+        ldrb w29, [x30, #:dtprel_lo12:var]
+        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12
+// CHECK-NEXT: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc
+
+// CHECK-ELF: # Relocation 22
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000058)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000213)
+// CHECK-ELF: # Relocation 23
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000005c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000214)
+
+// CHECK-ELF-NAMES: 88 R_AARCH64_TLSLD_LDST8_DTPREL_LO12
+// CHECK-ELF-NAMES: 92 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC
+
+        strh w27, [x26, #:dtprel_lo12:var]
+        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12
+// CHECK-NEXT: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n
+
+// CHECK-ELF: # Relocation 24
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000060)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000215)
+// CHECK-ELF: # Relocation 25
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000064)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000216)
+
+// CHECK-ELF-NAMES: 96 R_AARCH64_TLSLD_LDST16_DTPREL_LO12
+// CHECK-ELF-NAMES: 100 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC
+
+        ldr w23, [x22, #:dtprel_lo12:var]
+        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12
+// CHECK-NEXT: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n
+
+// CHECK-ELF: # Relocation 26
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000068)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000217)
+// CHECK-ELF: # Relocation 27
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000006c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000218)
+
+// CHECK-ELF-NAMES: 104 R_AARCH64_TLSLD_LDST32_DTPREL_LO12
+// CHECK-ELF-NAMES: 108 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC
+
+        ldr x19, [x18, #:dtprel_lo12:var]
+        str x17, [x16, #:dtprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12
+// CHECK-NEXT: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc
+
+
+// CHECK-ELF: # Relocation 28
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000070)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000219)
+// CHECK-ELF: # Relocation 29
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000074)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021a)
+
+// CHECK-ELF-NAMES: 112 R_AARCH64_TLSLD_LDST64_DTPREL_LO12
+// CHECK-ELF-NAMES: 116 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC
+
+        // TLS initial-exec forms
+        movz x15, #:gottprel_g1:var
+        movz w14, #:gottprel_g1:var
+// CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0x0f'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+// CHECK-NEXT: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
+
+// CHECK-ELF: # Relocation 30
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000078)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+// CHECK-ELF: # Relocation 31
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000007c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+
+// CHECK-ELF-NAMES: 120 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+// CHECK-ELF-NAMES: 124 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+
+        movk x13, #:gottprel_g0_nc:var
+        movk w12, #:gottprel_g0_nc:var
+// CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+// CHECK-NEXT: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
+
+// CHECK-ELF: # Relocation 32
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000080)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+// CHECK-ELF: # Relocation 33
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000084)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+
+// CHECK-ELF-NAMES: 128 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+// CHECK-ELF-NAMES: 132 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+
+        adrp x11, :gottprel:var
+        ldr x10, [x0, #:gottprel_lo12:var]
+        ldr x9, :gottprel:var
+// CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page
+// CHECK-NEXT: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc
+// CHECK-NEXT: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19
+
+// CHECK-ELF: # Relocation 34
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000088)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021d)
+// CHECK-ELF: # Relocation 35
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000008c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021e)
+// CHECK-ELF: # Relocation 36
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000090)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000021f)
+
+// CHECK-ELF-NAMES: 136 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE
+// CHECK-ELF-NAMES: 140 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+// CHECK-ELF-NAMES: 144 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
+
+        // TLS local-exec forms
+        movz x3, #:tprel_g2:var
+        movn x4, #:tprel_g2:var
+// CHECK: movz    x3, #:tprel_g2:var      // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+// CHECK-NEXT: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
+
+// CHECK-ELF: # Relocation 37
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000094)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+// CHECK-ELF: # Relocation 38
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000098)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+
+// CHECK-ELF-NAMES: 148 R_AARCH64_TLSLE_MOVW_TPREL_G2
+// CHECK-ELF-NAMES: 152 R_AARCH64_TLSLE_MOVW_TPREL_G2
+
+        movz x5, #:tprel_g1:var
+        movn x6, #:tprel_g1:var
+        movz w7, #:tprel_g1:var
+        movn w8, #:tprel_g1:var
+// CHECK: movz    x5, #:tprel_g1:var      // encoding: [0x05'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+// CHECK-NEXT: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
+
+// CHECK-ELF: # Relocation 39
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000009c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 40
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 41
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+// CHECK-ELF: # Relocation 42
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
+
+// CHECK-ELF-NAMES: 156 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 160 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 164 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NAMES: 168 R_AARCH64_TLSLE_MOVW_TPREL_G1
+
+        movk x9, #:tprel_g1_nc:var
+        movk w10, #:tprel_g1_nc:var
+// CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0x09'A',A,0xa0'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+// CHECK-NEXT: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
+
+// CHECK-ELF: # Relocation 43
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ac)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+// CHECK-ELF: # Relocation 44
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+
+// CHECK-ELF-NAMES: 172 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+// CHECK-ELF-NAMES: 176 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+
+        movz x11, #:tprel_g0:var
+        movn x12, #:tprel_g0:var
+        movz w13, #:tprel_g0:var
+        movn w14, #:tprel_g0:var
+// CHECK: movz    x11, #:tprel_g0:var     // encoding: [0x0b'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+// CHECK-NEXT: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
+
+// CHECK-ELF: # Relocation 45
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 46
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 47
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000bc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+// CHECK-ELF: # Relocation 48
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
+
+// CHECK-ELF-NAMES: 180 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 184 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 188 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NAMES: 192 R_AARCH64_TLSLE_MOVW_TPREL_G0
+
+        movk x15, #:tprel_g0_nc:var
+        movk w16, #:tprel_g0_nc:var
+// CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0x0f'A',A,0x80'A',0xf2'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+// CHECK-NEXT: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
+
+// CHECK-ELF: # Relocation 49
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+// CHECK-ELF: # Relocation 50
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+
+// CHECK-ELF-NAMES: 196 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+// CHECK-ELF-NAMES: 200 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+
+        add x17, x18, #:tprel_hi12:var, lsl #12
+        add w19, w20, #:tprel_hi12:var, lsl #12
+// CHECK: add     x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+// CHECK-NEXT: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
+
+// CHECK-ELF: # Relocation 51
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000cc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+// CHECK-ELF: # Relocation 52
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+
+// CHECK-ELF-NAMES: 204 R_AARCH64_TLSLE_ADD_TPREL_HI12
+// CHECK-ELF-NAMES: 208 R_AARCH64_TLSLE_ADD_TPREL_HI12
+
+        add x21, x22, #:tprel_lo12:var
+        add w23, w24, #:tprel_lo12:var
+// CHECK: add     x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+// CHECK-NEXT: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
+
+// CHECK-ELF: # Relocation 53
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+// CHECK-ELF: # Relocation 54
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+
+// CHECK-ELF-NAMES: 212 R_AARCH64_TLSLE_ADD_TPREL_LO12
+// CHECK-ELF-NAMES: 216 R_AARCH64_TLSLE_ADD_TPREL_LO12
+
+        add x25, x26, #:tprel_lo12_nc:var
+        add w27, w28, #:tprel_lo12_nc:var
+// CHECK: add     x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+// CHECK-NEXT: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 55
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000dc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
+// CHECK-ELF: # Relocation 56
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
+
+
+// CHECK-ELF-NAMES: 220 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+// CHECK-ELF-NAMES: 224 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+
+        ldrb w29, [x30, #:tprel_lo12:var]
+        ldrsb x29, [x28, #:tprel_lo12_nc:var]
+// CHECK: ldrb    w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12
+// CHECK-NEXT: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 57
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000228)
+// CHECK-ELF: # Relocation 58
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000229)
+
+// CHECK-ELF-NAMES: 228 R_AARCH64_TLSLE_LDST8_TPREL_LO12
+// CHECK-ELF-NAMES: 232 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
+
+        strh w27, [x26, #:tprel_lo12:var]
+        ldrsh x25, [x24, #:tprel_lo12_nc:var]
+// CHECK: strh    w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12
+// CHECK-NEXT: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n
+
+// CHECK-ELF: # Relocation 59
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ec)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022a)
+// CHECK-ELF: # Relocation 60
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f0)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022b)
+
+// CHECK-ELF-NAMES: 236 R_AARCH64_TLSLE_LDST16_TPREL_LO12
+// CHECK-ELF-NAMES: 240 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
+
+        ldr w23, [x22, #:tprel_lo12:var]
+        ldrsw x21, [x20, #:tprel_lo12_nc:var]
+// CHECK: ldr     w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12
+// CHECK-NEXT: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n
+
+// CHECK-ELF: # Relocation 61
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f4)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022c)
+// CHECK-ELF: # Relocation 62
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f8)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022d)
+
+// CHECK-ELF-NAMES: 244 R_AARCH64_TLSLE_LDST32_TPREL_LO12
+// CHECK-ELF-NAMES: 248 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
+
+        ldr x19, [x18, #:tprel_lo12:var]
+        str x17, [x16, #:tprel_lo12_nc:var]
+// CHECK: ldr     x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12
+// CHECK-NEXT: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc
+
+// CHECK-ELF: # Relocation 63
+// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000fc)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022e)
+// CHECK-ELF: # Relocation 64
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000100)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x0000022f)
+
+// CHECK-ELF-NAMES: 252 R_AARCH64_TLSLE_LDST64_TPREL_LO12
+// CHECK-ELF-NAMES: 256 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
+
+        // TLS descriptor forms
+        adrp x8, :tlsdesc:var
+        ldr x7, [x6, :tlsdesc_lo12:var]
+        add x5, x4, #:tlsdesc_lo12:var
+        .tlsdesccall var
+        blr x3
+
+// CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page
+// CHECK-NEXT: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc
+// CHECK-NEXT: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc
+// CHECK-NEXT: .tlsdesccall var                // encoding: []
+// CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call
+// CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
+
+
+// CHECK-ELF: # Relocation 65
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000104)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000232)
+// CHECK-ELF: # Relocation 66
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000108)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000233)
+// CHECK-ELF: # Relocation 67
+// CHECK-ELF-NEXT: (('r_offset', 0x000000000000010c)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000234)
+// CHECK-ELF: # Relocation 68
+// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000110)
+// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
+// CHECK-ELF-NEXT:  ('r_type', 0x00000239)
+
+// CHECK-ELF-NAMES: 260 R_AARCH64_TLSDESC_ADR_PAGE
+// CHECK-ELF-NAMES: 264 R_AARCH64_TLSDESC_LD64_LO12_NC
+// CHECK-ELF-NAMES: 268 R_AARCH64_TLSDESC_ADD_LO12_NC
+// CHECK-ELF-NAMES: 272 R_AARCH64_TLSDESC_CALL
+
+
+// Make sure symbol 5 has type STT_TLS:
+
+// CHECK-ELF: # Symbol 5
+// CHECK-ELF-NEXT: (('st_name', 0x00000006) # 'var'
+// CHECK-ELF-NEXT:  ('st_bind', 0x1)
+// CHECK-ELF-NEXT:  ('st_type', 0x6)
diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s
new file mode 100644
index 000000000000..82ec7c0c745d
--- /dev/null
+++ b/test/MC/AArch64/trace-regs-diagnostics.s
@@ -0,0 +1,156 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s
+        // Write-only
+        mrs x12, trcoslar
+        mrs x10, trclar
+// CHECK: error: expected readable system register
+// CHECK-NEXT:         mrs x12, trcoslar
+// CHECK-NEXT:                  ^
+// CHECK-NEXT: error: expected readable system register
+// CHECK-NEXT:         mrs x10, trclar
+// CHECK-NEXT:                  ^
+
+        // Read-only
+        msr trcstatr, x0
+        msr trcidr8, x13
+        msr trcidr9, x25
+        msr trcidr10, x2
+        msr trcidr11, x19
+        msr trcidr12, x15
+        msr trcidr13, x24
+        msr trcidr0, x20
+        msr trcidr1, x5
+        msr trcidr2, x18
+        msr trcidr3, x10
+        msr trcidr4, x1
+        msr trcidr5, x10
+        msr trcidr6, x4
+        msr trcidr7, x0
+        msr trcoslsr, x23
+        msr trcpdsr, x21
+        msr trcdevaff0, x4
+        msr trcdevaff1, x17
+        msr trclsr, x18
+        msr trcauthstatus, x10
+        msr trcdevarch, x8
+        msr trcdevid, x11
+        msr trcdevtype, x1
+        msr trcpidr4, x2
+        msr trcpidr5, x7
+        msr trcpidr6, x17
+        msr trcpidr7, x5
+        msr trcpidr0, x0
+        msr trcpidr1, x16
+        msr trcpidr2, x29
+        msr trcpidr3, x1
+        msr trccidr0, x27
+        msr trccidr1, x1
+        msr trccidr2, x24
+        msr trccidr3, x8
+// CHECK: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcstatr, x0
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr8, x13
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr9, x25
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr10, x2
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr11, x19
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr12, x15
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr13, x24
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr0, x20
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr1, x5
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr2, x18
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr3, x10
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr4, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr5, x10
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr6, x4
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcidr7, x0
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcoslsr, x23
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpdsr, x21
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevaff0, x4
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevaff1, x17
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trclsr, x18
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcauthstatus, x10
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevarch, x8
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevid, x11
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcdevtype, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr4, x2
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr5, x7
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr6, x17
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr7, x5
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr0, x0
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr1, x16
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr2, x29
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trcpidr3, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr0, x27
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr1, x1
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr2, x24
+// CHECK-NEXT:             ^
+// CHECK-NEXT: error: expected writable system register or pstate
+// CHECK-NEXT:         msr trccidr3, x8
+// CHECK-NEXT:             ^
diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s
new file mode 100644
index 000000000000..f9ab4c9ad975
--- /dev/null
+++ b/test/MC/AArch64/trace-regs.s
@@ -0,0 +1,766 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+        mrs x8, trcstatr
+        mrs x9, trcidr8
+        mrs x11, trcidr9
+        mrs x25, trcidr10
+        mrs x7, trcidr11
+        mrs x7, trcidr12
+        mrs x6, trcidr13
+        mrs x27, trcidr0
+        mrs x29, trcidr1
+        mrs x4, trcidr2
+        mrs x8, trcidr3
+        mrs x15, trcidr4
+        mrs x20, trcidr5
+        mrs x6, trcidr6
+        mrs x6, trcidr7
+        mrs x24, trcoslsr
+        mrs x18, trcpdsr
+        mrs x28, trcdevaff0
+        mrs x5, trcdevaff1
+        mrs x5, trclsr
+        mrs x11, trcauthstatus
+        mrs x13, trcdevarch
+        mrs x18, trcdevid
+        mrs x22, trcdevtype
+        mrs x14, trcpidr4
+        mrs x5, trcpidr5
+        mrs x5, trcpidr6
+        mrs x9, trcpidr7
+        mrs x15, trcpidr0
+        mrs x6, trcpidr1
+        mrs x11, trcpidr2
+        mrs x20, trcpidr3
+        mrs x17, trccidr0
+        mrs x2, trccidr1
+        mrs x20, trccidr2
+        mrs x4, trccidr3
+        mrs x11, trcprgctlr
+        mrs x23, trcprocselr
+        mrs x13, trcconfigr
+        mrs x23, trcauxctlr
+        mrs x9, trceventctl0r
+        mrs x16, trceventctl1r
+        mrs x4, trcstallctlr
+        mrs x14, trctsctlr
+        mrs x24, trcsyncpr
+        mrs x28, trcccctlr
+        mrs x15, trcbbctlr
+        mrs x1, trctraceidr
+        mrs x20, trcqctlr
+        mrs x2, trcvictlr
+        mrs x12, trcviiectlr
+        mrs x16, trcvissctlr
+        mrs x8, trcvipcssctlr
+        mrs x27, trcvdctlr
+        mrs x9, trcvdsacctlr
+        mrs x0, trcvdarcctlr
+        mrs x13, trcseqevr0
+        mrs x11, trcseqevr1
+        mrs x26, trcseqevr2
+        mrs x14, trcseqrstevr
+        mrs x4, trcseqstr
+        mrs x17, trcextinselr
+        mrs x21, trccntrldvr0
+        mrs x10, trccntrldvr1
+        mrs x20, trccntrldvr2
+        mrs x5, trccntrldvr3
+        mrs x17, trccntctlr0
+        mrs x1, trccntctlr1
+        mrs x17, trccntctlr2
+        mrs x6, trccntctlr3
+        mrs x28, trccntvr0
+        mrs x23, trccntvr1
+        mrs x9, trccntvr2
+        mrs x6, trccntvr3
+        mrs x24, trcimspec0
+        mrs x24, trcimspec1
+        mrs x15, trcimspec2
+        mrs x10, trcimspec3
+        mrs x29, trcimspec4
+        mrs x18, trcimspec5
+        mrs x29, trcimspec6
+        mrs x2, trcimspec7
+        mrs x8, trcrsctlr2
+        mrs x0, trcrsctlr3
+        mrs x12, trcrsctlr4
+        mrs x26, trcrsctlr5
+        mrs x29, trcrsctlr6
+        mrs x17, trcrsctlr7
+        mrs x0, trcrsctlr8
+        mrs x1, trcrsctlr9
+        mrs x17, trcrsctlr10
+        mrs x21, trcrsctlr11
+        mrs x1, trcrsctlr12
+        mrs x8, trcrsctlr13
+        mrs x24, trcrsctlr14
+        mrs x0, trcrsctlr15
+        mrs x2, trcrsctlr16
+        mrs x29, trcrsctlr17
+        mrs x22, trcrsctlr18
+        mrs x6, trcrsctlr19
+        mrs x26, trcrsctlr20
+        mrs x26, trcrsctlr21
+        mrs x4, trcrsctlr22
+        mrs x12, trcrsctlr23
+        mrs x1, trcrsctlr24
+        mrs x0, trcrsctlr25
+        mrs x17, trcrsctlr26
+        mrs x8, trcrsctlr27
+        mrs x10, trcrsctlr28
+        mrs x25, trcrsctlr29
+        mrs x12, trcrsctlr30
+        mrs x11, trcrsctlr31
+        mrs x18, trcssccr0
+        mrs x12, trcssccr1
+        mrs x3, trcssccr2
+        mrs x2, trcssccr3
+        mrs x21, trcssccr4
+        mrs x10, trcssccr5
+        mrs x22, trcssccr6
+        mrs x23, trcssccr7
+        mrs x23, trcsscsr0
+        mrs x19, trcsscsr1
+        mrs x25, trcsscsr2
+        mrs x17, trcsscsr3
+        mrs x19, trcsscsr4
+        mrs x11, trcsscsr5
+        mrs x5, trcsscsr6
+        mrs x9, trcsscsr7
+        mrs x1, trcsspcicr0
+        mrs x12, trcsspcicr1
+        mrs x21, trcsspcicr2
+        mrs x11, trcsspcicr3
+        mrs x3, trcsspcicr4
+        mrs x9, trcsspcicr5
+        mrs x5, trcsspcicr6
+        mrs x2, trcsspcicr7
+        mrs x26, trcpdcr
+        mrs x8, trcacvr0
+        mrs x15, trcacvr1
+        mrs x19, trcacvr2
+        mrs x8, trcacvr3
+        mrs x28, trcacvr4
+        mrs x3, trcacvr5
+        mrs x25, trcacvr6
+        mrs x24, trcacvr7
+        mrs x6, trcacvr8
+        mrs x3, trcacvr9
+        mrs x24, trcacvr10
+        mrs x3, trcacvr11
+        mrs x12, trcacvr12
+        mrs x9, trcacvr13
+        mrs x14, trcacvr14
+        mrs x3, trcacvr15
+        mrs x21, trcacatr0
+        mrs x26, trcacatr1
+        mrs x8, trcacatr2
+        mrs x22, trcacatr3
+        mrs x6, trcacatr4
+        mrs x29, trcacatr5
+        mrs x5, trcacatr6
+        mrs x18, trcacatr7
+        mrs x2, trcacatr8
+        mrs x19, trcacatr9
+        mrs x13, trcacatr10
+        mrs x25, trcacatr11
+        mrs x18, trcacatr12
+        mrs x29, trcacatr13
+        mrs x9, trcacatr14
+        mrs x18, trcacatr15
+        mrs x29, trcdvcvr0
+        mrs x15, trcdvcvr1
+        mrs x15, trcdvcvr2
+        mrs x15, trcdvcvr3
+        mrs x19, trcdvcvr4
+        mrs x22, trcdvcvr5
+        mrs x27, trcdvcvr6
+        mrs x1, trcdvcvr7
+        mrs x29, trcdvcmr0
+        mrs x9, trcdvcmr1
+        mrs x1, trcdvcmr2
+        mrs x2, trcdvcmr3
+        mrs x5, trcdvcmr4
+        mrs x21, trcdvcmr5
+        mrs x5, trcdvcmr6
+        mrs x1, trcdvcmr7
+        mrs x21, trccidcvr0
+        mrs x24, trccidcvr1
+        mrs x24, trccidcvr2
+        mrs x12, trccidcvr3
+        mrs x10, trccidcvr4
+        mrs x9, trccidcvr5
+        mrs x6, trccidcvr6
+        mrs x20, trccidcvr7
+        mrs x20, trcvmidcvr0
+        mrs x20, trcvmidcvr1
+        mrs x26, trcvmidcvr2
+        mrs x1, trcvmidcvr3
+        mrs x14, trcvmidcvr4
+        mrs x27, trcvmidcvr5
+        mrs x29, trcvmidcvr6
+        mrs x17, trcvmidcvr7
+        mrs x10, trccidcctlr0
+        mrs x4, trccidcctlr1
+        mrs x9, trcvmidcctlr0
+        mrs x11, trcvmidcctlr1
+        mrs x22, trcitctrl
+        mrs x23, trcclaimset
+        mrs x14, trcclaimclr
+// CHECK: mrs      x8, trcstatr               // encoding: [0x08,0x03,0x31,0xd5]
+// CHECK: mrs      x9, trcidr8                // encoding: [0xc9,0x00,0x31,0xd5]
+// CHECK: mrs      x11, trcidr9               // encoding: [0xcb,0x01,0x31,0xd5]
+// CHECK: mrs      x25, trcidr10              // encoding: [0xd9,0x02,0x31,0xd5]
+// CHECK: mrs      x7, trcidr11               // encoding: [0xc7,0x03,0x31,0xd5]
+// CHECK: mrs      x7, trcidr12               // encoding: [0xc7,0x04,0x31,0xd5]
+// CHECK: mrs      x6, trcidr13               // encoding: [0xc6,0x05,0x31,0xd5]
+// CHECK: mrs      x27, trcidr0               // encoding: [0xfb,0x08,0x31,0xd5]
+// CHECK: mrs      x29, trcidr1               // encoding: [0xfd,0x09,0x31,0xd5]
+// CHECK: mrs      x4, trcidr2                // encoding: [0xe4,0x0a,0x31,0xd5]
+// CHECK: mrs      x8, trcidr3                // encoding: [0xe8,0x0b,0x31,0xd5]
+// CHECK: mrs      x15, trcidr4               // encoding: [0xef,0x0c,0x31,0xd5]
+// CHECK: mrs      x20, trcidr5               // encoding: [0xf4,0x0d,0x31,0xd5]
+// CHECK: mrs      x6, trcidr6                // encoding: [0xe6,0x0e,0x31,0xd5]
+// CHECK: mrs      x6, trcidr7                // encoding: [0xe6,0x0f,0x31,0xd5]
+// CHECK: mrs      x24, trcoslsr              // encoding: [0x98,0x11,0x31,0xd5]
+// CHECK: mrs      x18, trcpdsr               // encoding: [0x92,0x15,0x31,0xd5]
+// CHECK: mrs      x28, trcdevaff0            // encoding: [0xdc,0x7a,0x31,0xd5]
+// CHECK: mrs      x5, trcdevaff1             // encoding: [0xc5,0x7b,0x31,0xd5]
+// CHECK: mrs      x5, trclsr                 // encoding: [0xc5,0x7d,0x31,0xd5]
+// CHECK: mrs      x11, trcauthstatus         // encoding: [0xcb,0x7e,0x31,0xd5]
+// CHECK: mrs      x13, trcdevarch            // encoding: [0xcd,0x7f,0x31,0xd5]
+// CHECK: mrs      x18, trcdevid              // encoding: [0xf2,0x72,0x31,0xd5]
+// CHECK: mrs      x22, trcdevtype            // encoding: [0xf6,0x73,0x31,0xd5]
+// CHECK: mrs      x14, trcpidr4              // encoding: [0xee,0x74,0x31,0xd5]
+// CHECK: mrs      x5, trcpidr5               // encoding: [0xe5,0x75,0x31,0xd5]
+// CHECK: mrs      x5, trcpidr6               // encoding: [0xe5,0x76,0x31,0xd5]
+// CHECK: mrs      x9, trcpidr7               // encoding: [0xe9,0x77,0x31,0xd5]
+// CHECK: mrs      x15, trcpidr0              // encoding: [0xef,0x78,0x31,0xd5]
+// CHECK: mrs      x6, trcpidr1               // encoding: [0xe6,0x79,0x31,0xd5]
+// CHECK: mrs      x11, trcpidr2              // encoding: [0xeb,0x7a,0x31,0xd5]
+// CHECK: mrs      x20, trcpidr3              // encoding: [0xf4,0x7b,0x31,0xd5]
+// CHECK: mrs      x17, trccidr0              // encoding: [0xf1,0x7c,0x31,0xd5]
+// CHECK: mrs      x2, trccidr1               // encoding: [0xe2,0x7d,0x31,0xd5]
+// CHECK: mrs      x20, trccidr2              // encoding: [0xf4,0x7e,0x31,0xd5]
+// CHECK: mrs      x4, trccidr3               // encoding: [0xe4,0x7f,0x31,0xd5]
+// CHECK: mrs      x11, trcprgctlr            // encoding: [0x0b,0x01,0x31,0xd5]
+// CHECK: mrs      x23, trcprocselr           // encoding: [0x17,0x02,0x31,0xd5]
+// CHECK: mrs      x13, trcconfigr            // encoding: [0x0d,0x04,0x31,0xd5]
+// CHECK: mrs      x23, trcauxctlr            // encoding: [0x17,0x06,0x31,0xd5]
+// CHECK: mrs      x9, trceventctl0r          // encoding: [0x09,0x08,0x31,0xd5]
+// CHECK: mrs      x16, trceventctl1r         // encoding: [0x10,0x09,0x31,0xd5]
+// CHECK: mrs      x4, trcstallctlr           // encoding: [0x04,0x0b,0x31,0xd5]
+// CHECK: mrs      x14, trctsctlr             // encoding: [0x0e,0x0c,0x31,0xd5]
+// CHECK: mrs      x24, trcsyncpr             // encoding: [0x18,0x0d,0x31,0xd5]
+// CHECK: mrs      x28, trcccctlr             // encoding: [0x1c,0x0e,0x31,0xd5]
+// CHECK: mrs      x15, trcbbctlr             // encoding: [0x0f,0x0f,0x31,0xd5]
+// CHECK: mrs      x1, trctraceidr            // encoding: [0x21,0x00,0x31,0xd5]
+// CHECK: mrs      x20, trcqctlr              // encoding: [0x34,0x01,0x31,0xd5]
+// CHECK: mrs      x2, trcvictlr              // encoding: [0x42,0x00,0x31,0xd5]
+// CHECK: mrs      x12, trcviiectlr           // encoding: [0x4c,0x01,0x31,0xd5]
+// CHECK: mrs      x16, trcvissctlr           // encoding: [0x50,0x02,0x31,0xd5]
+// CHECK: mrs      x8, trcvipcssctlr          // encoding: [0x48,0x03,0x31,0xd5]
+// CHECK: mrs      x27, trcvdctlr             // encoding: [0x5b,0x08,0x31,0xd5]
+// CHECK: mrs      x9, trcvdsacctlr           // encoding: [0x49,0x09,0x31,0xd5]
+// CHECK: mrs      x0, trcvdarcctlr           // encoding: [0x40,0x0a,0x31,0xd5]
+// CHECK: mrs      x13, trcseqevr0            // encoding: [0x8d,0x00,0x31,0xd5]
+// CHECK: mrs      x11, trcseqevr1            // encoding: [0x8b,0x01,0x31,0xd5]
+// CHECK: mrs      x26, trcseqevr2            // encoding: [0x9a,0x02,0x31,0xd5]
+// CHECK: mrs      x14, trcseqrstevr          // encoding: [0x8e,0x06,0x31,0xd5]
+// CHECK: mrs      x4, trcseqstr              // encoding: [0x84,0x07,0x31,0xd5]
+// CHECK: mrs      x17, trcextinselr          // encoding: [0x91,0x08,0x31,0xd5]
+// CHECK: mrs      x21, trccntrldvr0          // encoding: [0xb5,0x00,0x31,0xd5]
+// CHECK: mrs      x10, trccntrldvr1          // encoding: [0xaa,0x01,0x31,0xd5]
+// CHECK: mrs      x20, trccntrldvr2          // encoding: [0xb4,0x02,0x31,0xd5]
+// CHECK: mrs      x5, trccntrldvr3           // encoding: [0xa5,0x03,0x31,0xd5]
+// CHECK: mrs      x17, trccntctlr0           // encoding: [0xb1,0x04,0x31,0xd5]
+// CHECK: mrs      x1, trccntctlr1            // encoding: [0xa1,0x05,0x31,0xd5]
+// CHECK: mrs      x17, trccntctlr2           // encoding: [0xb1,0x06,0x31,0xd5]
+// CHECK: mrs      x6, trccntctlr3            // encoding: [0xa6,0x07,0x31,0xd5]
+// CHECK: mrs      x28, trccntvr0             // encoding: [0xbc,0x08,0x31,0xd5]
+// CHECK: mrs      x23, trccntvr1             // encoding: [0xb7,0x09,0x31,0xd5]
+// CHECK: mrs      x9, trccntvr2              // encoding: [0xa9,0x0a,0x31,0xd5]
+// CHECK: mrs      x6, trccntvr3              // encoding: [0xa6,0x0b,0x31,0xd5]
+// CHECK: mrs      x24, trcimspec0            // encoding: [0xf8,0x00,0x31,0xd5]
+// CHECK: mrs      x24, trcimspec1            // encoding: [0xf8,0x01,0x31,0xd5]
+// CHECK: mrs      x15, trcimspec2            // encoding: [0xef,0x02,0x31,0xd5]
+// CHECK: mrs      x10, trcimspec3            // encoding: [0xea,0x03,0x31,0xd5]
+// CHECK: mrs      x29, trcimspec4            // encoding: [0xfd,0x04,0x31,0xd5]
+// CHECK: mrs      x18, trcimspec5            // encoding: [0xf2,0x05,0x31,0xd5]
+// CHECK: mrs      x29, trcimspec6            // encoding: [0xfd,0x06,0x31,0xd5]
+// CHECK: mrs      x2, trcimspec7             // encoding: [0xe2,0x07,0x31,0xd5]
+// CHECK: mrs      x8, trcrsctlr2             // encoding: [0x08,0x12,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr3             // encoding: [0x00,0x13,0x31,0xd5]
+// CHECK: mrs      x12, trcrsctlr4            // encoding: [0x0c,0x14,0x31,0xd5]
+// CHECK: mrs      x26, trcrsctlr5            // encoding: [0x1a,0x15,0x31,0xd5]
+// CHECK: mrs      x29, trcrsctlr6            // encoding: [0x1d,0x16,0x31,0xd5]
+// CHECK: mrs      x17, trcrsctlr7            // encoding: [0x11,0x17,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr8             // encoding: [0x00,0x18,0x31,0xd5]
+// CHECK: mrs      x1, trcrsctlr9             // encoding: [0x01,0x19,0x31,0xd5]
+// CHECK: mrs      x17, trcrsctlr10           // encoding: [0x11,0x1a,0x31,0xd5]
+// CHECK: mrs      x21, trcrsctlr11           // encoding: [0x15,0x1b,0x31,0xd5]
+// CHECK: mrs      x1, trcrsctlr12            // encoding: [0x01,0x1c,0x31,0xd5]
+// CHECK: mrs      x8, trcrsctlr13            // encoding: [0x08,0x1d,0x31,0xd5]
+// CHECK: mrs      x24, trcrsctlr14           // encoding: [0x18,0x1e,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr15            // encoding: [0x00,0x1f,0x31,0xd5]
+// CHECK: mrs      x2, trcrsctlr16            // encoding: [0x22,0x10,0x31,0xd5]
+// CHECK: mrs      x29, trcrsctlr17           // encoding: [0x3d,0x11,0x31,0xd5]
+// CHECK: mrs      x22, trcrsctlr18           // encoding: [0x36,0x12,0x31,0xd5]
+// CHECK: mrs      x6, trcrsctlr19            // encoding: [0x26,0x13,0x31,0xd5]
+// CHECK: mrs      x26, trcrsctlr20           // encoding: [0x3a,0x14,0x31,0xd5]
+// CHECK: mrs      x26, trcrsctlr21           // encoding: [0x3a,0x15,0x31,0xd5]
+// CHECK: mrs      x4, trcrsctlr22            // encoding: [0x24,0x16,0x31,0xd5]
+// CHECK: mrs      x12, trcrsctlr23           // encoding: [0x2c,0x17,0x31,0xd5]
+// CHECK: mrs      x1, trcrsctlr24            // encoding: [0x21,0x18,0x31,0xd5]
+// CHECK: mrs      x0, trcrsctlr25            // encoding: [0x20,0x19,0x31,0xd5]
+// CHECK: mrs      x17, trcrsctlr26           // encoding: [0x31,0x1a,0x31,0xd5]
+// CHECK: mrs      x8, trcrsctlr27            // encoding: [0x28,0x1b,0x31,0xd5]
+// CHECK: mrs      x10, trcrsctlr28           // encoding: [0x2a,0x1c,0x31,0xd5]
+// CHECK: mrs      x25, trcrsctlr29           // encoding: [0x39,0x1d,0x31,0xd5]
+// CHECK: mrs      x12, trcrsctlr30           // encoding: [0x2c,0x1e,0x31,0xd5]
+// CHECK: mrs      x11, trcrsctlr31           // encoding: [0x2b,0x1f,0x31,0xd5]
+// CHECK: mrs      x18, trcssccr0             // encoding: [0x52,0x10,0x31,0xd5]
+// CHECK: mrs      x12, trcssccr1             // encoding: [0x4c,0x11,0x31,0xd5]
+// CHECK: mrs      x3, trcssccr2              // encoding: [0x43,0x12,0x31,0xd5]
+// CHECK: mrs      x2, trcssccr3              // encoding: [0x42,0x13,0x31,0xd5]
+// CHECK: mrs      x21, trcssccr4             // encoding: [0x55,0x14,0x31,0xd5]
+// CHECK: mrs      x10, trcssccr5             // encoding: [0x4a,0x15,0x31,0xd5]
+// CHECK: mrs      x22, trcssccr6             // encoding: [0x56,0x16,0x31,0xd5]
+// CHECK: mrs      x23, trcssccr7             // encoding: [0x57,0x17,0x31,0xd5]
+// CHECK: mrs      x23, trcsscsr0             // encoding: [0x57,0x18,0x31,0xd5]
+// CHECK: mrs      x19, trcsscsr1             // encoding: [0x53,0x19,0x31,0xd5]
+// CHECK: mrs      x25, trcsscsr2             // encoding: [0x59,0x1a,0x31,0xd5]
+// CHECK: mrs      x17, trcsscsr3             // encoding: [0x51,0x1b,0x31,0xd5]
+// CHECK: mrs      x19, trcsscsr4             // encoding: [0x53,0x1c,0x31,0xd5]
+// CHECK: mrs      x11, trcsscsr5             // encoding: [0x4b,0x1d,0x31,0xd5]
+// CHECK: mrs      x5, trcsscsr6              // encoding: [0x45,0x1e,0x31,0xd5]
+// CHECK: mrs      x9, trcsscsr7              // encoding: [0x49,0x1f,0x31,0xd5]
+// CHECK: mrs      x1, trcsspcicr0            // encoding: [0x61,0x10,0x31,0xd5]
+// CHECK: mrs      x12, trcsspcicr1           // encoding: [0x6c,0x11,0x31,0xd5]
+// CHECK: mrs      x21, trcsspcicr2           // encoding: [0x75,0x12,0x31,0xd5]
+// CHECK: mrs      x11, trcsspcicr3           // encoding: [0x6b,0x13,0x31,0xd5]
+// CHECK: mrs      x3, trcsspcicr4            // encoding: [0x63,0x14,0x31,0xd5]
+// CHECK: mrs      x9, trcsspcicr5            // encoding: [0x69,0x15,0x31,0xd5]
+// CHECK: mrs      x5, trcsspcicr6            // encoding: [0x65,0x16,0x31,0xd5]
+// CHECK: mrs      x2, trcsspcicr7            // encoding: [0x62,0x17,0x31,0xd5]
+// CHECK: mrs      x26, trcpdcr               // encoding: [0x9a,0x14,0x31,0xd5]
+// CHECK: mrs      x8, trcacvr0               // encoding: [0x08,0x20,0x31,0xd5]
+// CHECK: mrs      x15, trcacvr1              // encoding: [0x0f,0x22,0x31,0xd5]
+// CHECK: mrs      x19, trcacvr2              // encoding: [0x13,0x24,0x31,0xd5]
+// CHECK: mrs      x8, trcacvr3               // encoding: [0x08,0x26,0x31,0xd5]
+// CHECK: mrs      x28, trcacvr4              // encoding: [0x1c,0x28,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr5               // encoding: [0x03,0x2a,0x31,0xd5]
+// CHECK: mrs      x25, trcacvr6              // encoding: [0x19,0x2c,0x31,0xd5]
+// CHECK: mrs      x24, trcacvr7              // encoding: [0x18,0x2e,0x31,0xd5]
+// CHECK: mrs      x6, trcacvr8               // encoding: [0x26,0x20,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr9               // encoding: [0x23,0x22,0x31,0xd5]
+// CHECK: mrs      x24, trcacvr10             // encoding: [0x38,0x24,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr11              // encoding: [0x23,0x26,0x31,0xd5]
+// CHECK: mrs      x12, trcacvr12             // encoding: [0x2c,0x28,0x31,0xd5]
+// CHECK: mrs      x9, trcacvr13              // encoding: [0x29,0x2a,0x31,0xd5]
+// CHECK: mrs      x14, trcacvr14             // encoding: [0x2e,0x2c,0x31,0xd5]
+// CHECK: mrs      x3, trcacvr15              // encoding: [0x23,0x2e,0x31,0xd5]
+// CHECK: mrs      x21, trcacatr0             // encoding: [0x55,0x20,0x31,0xd5]
+// CHECK: mrs      x26, trcacatr1             // encoding: [0x5a,0x22,0x31,0xd5]
+// CHECK: mrs      x8, trcacatr2              // encoding: [0x48,0x24,0x31,0xd5]
+// CHECK: mrs      x22, trcacatr3             // encoding: [0x56,0x26,0x31,0xd5]
+// CHECK: mrs      x6, trcacatr4              // encoding: [0x46,0x28,0x31,0xd5]
+// CHECK: mrs      x29, trcacatr5             // encoding: [0x5d,0x2a,0x31,0xd5]
+// CHECK: mrs      x5, trcacatr6              // encoding: [0x45,0x2c,0x31,0xd5]
+// CHECK: mrs      x18, trcacatr7             // encoding: [0x52,0x2e,0x31,0xd5]
+// CHECK: mrs      x2, trcacatr8              // encoding: [0x62,0x20,0x31,0xd5]
+// CHECK: mrs      x19, trcacatr9             // encoding: [0x73,0x22,0x31,0xd5]
+// CHECK: mrs      x13, trcacatr10            // encoding: [0x6d,0x24,0x31,0xd5]
+// CHECK: mrs      x25, trcacatr11            // encoding: [0x79,0x26,0x31,0xd5]
+// CHECK: mrs      x18, trcacatr12            // encoding: [0x72,0x28,0x31,0xd5]
+// CHECK: mrs      x29, trcacatr13            // encoding: [0x7d,0x2a,0x31,0xd5]
+// CHECK: mrs      x9, trcacatr14             // encoding: [0x69,0x2c,0x31,0xd5]
+// CHECK: mrs      x18, trcacatr15            // encoding: [0x72,0x2e,0x31,0xd5]
+// CHECK: mrs      x29, trcdvcvr0             // encoding: [0x9d,0x20,0x31,0xd5]
+// CHECK: mrs      x15, trcdvcvr1             // encoding: [0x8f,0x24,0x31,0xd5]
+// CHECK: mrs      x15, trcdvcvr2             // encoding: [0x8f,0x28,0x31,0xd5]
+// CHECK: mrs      x15, trcdvcvr3             // encoding: [0x8f,0x2c,0x31,0xd5]
+// CHECK: mrs      x19, trcdvcvr4             // encoding: [0xb3,0x20,0x31,0xd5]
+// CHECK: mrs      x22, trcdvcvr5             // encoding: [0xb6,0x24,0x31,0xd5]
+// CHECK: mrs      x27, trcdvcvr6             // encoding: [0xbb,0x28,0x31,0xd5]
+// CHECK: mrs      x1, trcdvcvr7              // encoding: [0xa1,0x2c,0x31,0xd5]
+// CHECK: mrs      x29, trcdvcmr0             // encoding: [0xdd,0x20,0x31,0xd5]
+// CHECK: mrs      x9, trcdvcmr1              // encoding: [0xc9,0x24,0x31,0xd5]
+// CHECK: mrs      x1, trcdvcmr2              // encoding: [0xc1,0x28,0x31,0xd5]
+// CHECK: mrs      x2, trcdvcmr3              // encoding: [0xc2,0x2c,0x31,0xd5]
+// CHECK: mrs      x5, trcdvcmr4              // encoding: [0xe5,0x20,0x31,0xd5]
+// CHECK: mrs      x21, trcdvcmr5             // encoding: [0xf5,0x24,0x31,0xd5]
+// CHECK: mrs      x5, trcdvcmr6              // encoding: [0xe5,0x28,0x31,0xd5]
+// CHECK: mrs      x1, trcdvcmr7              // encoding: [0xe1,0x2c,0x31,0xd5]
+// CHECK: mrs      x21, trccidcvr0            // encoding: [0x15,0x30,0x31,0xd5]
+// CHECK: mrs      x24, trccidcvr1            // encoding: [0x18,0x32,0x31,0xd5]
+// CHECK: mrs      x24, trccidcvr2            // encoding: [0x18,0x34,0x31,0xd5]
+// CHECK: mrs      x12, trccidcvr3            // encoding: [0x0c,0x36,0x31,0xd5]
+// CHECK: mrs      x10, trccidcvr4            // encoding: [0x0a,0x38,0x31,0xd5]
+// CHECK: mrs      x9, trccidcvr5             // encoding: [0x09,0x3a,0x31,0xd5]
+// CHECK: mrs      x6, trccidcvr6             // encoding: [0x06,0x3c,0x31,0xd5]
+// CHECK: mrs      x20, trccidcvr7            // encoding: [0x14,0x3e,0x31,0xd5]
+// CHECK: mrs      x20, trcvmidcvr0           // encoding: [0x34,0x30,0x31,0xd5]
+// CHECK: mrs      x20, trcvmidcvr1           // encoding: [0x34,0x32,0x31,0xd5]
+// CHECK: mrs      x26, trcvmidcvr2           // encoding: [0x3a,0x34,0x31,0xd5]
+// CHECK: mrs      x1, trcvmidcvr3            // encoding: [0x21,0x36,0x31,0xd5]
+// CHECK: mrs      x14, trcvmidcvr4           // encoding: [0x2e,0x38,0x31,0xd5]
+// CHECK: mrs      x27, trcvmidcvr5           // encoding: [0x3b,0x3a,0x31,0xd5]
+// CHECK: mrs      x29, trcvmidcvr6           // encoding: [0x3d,0x3c,0x31,0xd5]
+// CHECK: mrs      x17, trcvmidcvr7           // encoding: [0x31,0x3e,0x31,0xd5]
+// CHECK: mrs      x10, trccidcctlr0          // encoding: [0x4a,0x30,0x31,0xd5]
+// CHECK: mrs      x4, trccidcctlr1           // encoding: [0x44,0x31,0x31,0xd5]
+// CHECK: mrs      x9, trcvmidcctlr0          // encoding: [0x49,0x32,0x31,0xd5]
+// CHECK: mrs      x11, trcvmidcctlr1         // encoding: [0x4b,0x33,0x31,0xd5]
+// CHECK: mrs      x22, trcitctrl             // encoding: [0x96,0x70,0x31,0xd5]
+// CHECK: mrs      x23, trcclaimset           // encoding: [0xd7,0x78,0x31,0xd5]
+// CHECK: mrs      x14, trcclaimclr           // encoding: [0xce,0x79,0x31,0xd5]
+
+        msr trcoslar, x28
+        msr trclar, x14
+        msr trcprgctlr, x10
+        msr trcprocselr, x27
+        msr trcconfigr, x24
+        msr trcauxctlr, x8
+        msr trceventctl0r, x16
+        msr trceventctl1r, x27
+        msr trcstallctlr, x26
+        msr trctsctlr, x0
+        msr trcsyncpr, x14
+        msr trcccctlr, x8
+        msr trcbbctlr, x6
+        msr trctraceidr, x23
+        msr trcqctlr, x5
+        msr trcvictlr, x0
+        msr trcviiectlr, x0
+        msr trcvissctlr, x1
+        msr trcvipcssctlr, x0
+        msr trcvdctlr, x7
+        msr trcvdsacctlr, x18
+        msr trcvdarcctlr, x24
+        msr trcseqevr0, x28
+        msr trcseqevr1, x21
+        msr trcseqevr2, x16
+        msr trcseqrstevr, x16
+        msr trcseqstr, x25
+        msr trcextinselr, x29
+        msr trccntrldvr0, x20
+        msr trccntrldvr1, x20
+        msr trccntrldvr2, x22
+        msr trccntrldvr3, x12
+        msr trccntctlr0, x20
+        msr trccntctlr1, x4
+        msr trccntctlr2, x8
+        msr trccntctlr3, x16
+        msr trccntvr0, x5
+        msr trccntvr1, x27
+        msr trccntvr2, x21
+        msr trccntvr3, x8
+        msr trcimspec0, x6
+        msr trcimspec1, x27
+        msr trcimspec2, x23
+        msr trcimspec3, x15
+        msr trcimspec4, x13
+        msr trcimspec5, x25
+        msr trcimspec6, x19
+        msr trcimspec7, x27
+        msr trcrsctlr2, x4
+        msr trcrsctlr3, x0
+        msr trcrsctlr4, x21
+        msr trcrsctlr5, x8
+        msr trcrsctlr6, x20
+        msr trcrsctlr7, x11
+        msr trcrsctlr8, x18
+        msr trcrsctlr9, x24
+        msr trcrsctlr10, x15
+        msr trcrsctlr11, x21
+        msr trcrsctlr12, x4
+        msr trcrsctlr13, x28
+        msr trcrsctlr14, x3
+        msr trcrsctlr15, x20
+        msr trcrsctlr16, x12
+        msr trcrsctlr17, x17
+        msr trcrsctlr18, x10
+        msr trcrsctlr19, x11
+        msr trcrsctlr20, x3
+        msr trcrsctlr21, x18
+        msr trcrsctlr22, x26
+        msr trcrsctlr23, x5
+        msr trcrsctlr24, x25
+        msr trcrsctlr25, x5
+        msr trcrsctlr26, x4
+        msr trcrsctlr27, x20
+        msr trcrsctlr28, x5
+        msr trcrsctlr29, x10
+        msr trcrsctlr30, x24
+        msr trcrsctlr31, x20
+        msr trcssccr0, x23
+        msr trcssccr1, x27
+        msr trcssccr2, x27
+        msr trcssccr3, x6
+        msr trcssccr4, x3
+        msr trcssccr5, x12
+        msr trcssccr6, x7
+        msr trcssccr7, x6
+        msr trcsscsr0, x20
+        msr trcsscsr1, x17
+        msr trcsscsr2, x11
+        msr trcsscsr3, x4
+        msr trcsscsr4, x14
+        msr trcsscsr5, x22
+        msr trcsscsr6, x3
+        msr trcsscsr7, x11
+        msr trcsspcicr0, x2
+        msr trcsspcicr1, x3
+        msr trcsspcicr2, x5
+        msr trcsspcicr3, x7
+        msr trcsspcicr4, x11
+        msr trcsspcicr5, x13
+        msr trcsspcicr6, x17
+        msr trcsspcicr7, x23
+        msr trcpdcr, x3
+        msr trcacvr0, x6
+        msr trcacvr1, x20
+        msr trcacvr2, x25
+        msr trcacvr3, x1
+        msr trcacvr4, x28
+        msr trcacvr5, x15
+        msr trcacvr6, x25
+        msr trcacvr7, x12
+        msr trcacvr8, x5
+        msr trcacvr9, x25
+        msr trcacvr10, x13
+        msr trcacvr11, x10
+        msr trcacvr12, x19
+        msr trcacvr13, x10
+        msr trcacvr14, x19
+        msr trcacvr15, x2
+        msr trcacatr0, x15
+        msr trcacatr1, x13
+        msr trcacatr2, x8
+        msr trcacatr3, x1
+        msr trcacatr4, x11
+        msr trcacatr5, x8
+        msr trcacatr6, x24
+        msr trcacatr7, x6
+        msr trcacatr8, x23
+        msr trcacatr9, x5
+        msr trcacatr10, x11
+        msr trcacatr11, x11
+        msr trcacatr12, x3
+        msr trcacatr13, x28
+        msr trcacatr14, x25
+        msr trcacatr15, x4
+        msr trcdvcvr0, x6
+        msr trcdvcvr1, x3
+        msr trcdvcvr2, x5
+        msr trcdvcvr3, x11
+        msr trcdvcvr4, x9
+        msr trcdvcvr5, x14
+        msr trcdvcvr6, x10
+        msr trcdvcvr7, x12
+        msr trcdvcmr0, x8
+        msr trcdvcmr1, x8
+        msr trcdvcmr2, x22
+        msr trcdvcmr3, x22
+        msr trcdvcmr4, x5
+        msr trcdvcmr5, x16
+        msr trcdvcmr6, x27
+        msr trcdvcmr7, x21
+        msr trccidcvr0, x8
+        msr trccidcvr1, x6
+        msr trccidcvr2, x9
+        msr trccidcvr3, x8
+        msr trccidcvr4, x3
+        msr trccidcvr5, x21
+        msr trccidcvr6, x12
+        msr trccidcvr7, x7
+        msr trcvmidcvr0, x4
+        msr trcvmidcvr1, x3
+        msr trcvmidcvr2, x9
+        msr trcvmidcvr3, x17
+        msr trcvmidcvr4, x14
+        msr trcvmidcvr5, x12
+        msr trcvmidcvr6, x10
+        msr trcvmidcvr7, x3
+        msr trccidcctlr0, x14
+        msr trccidcctlr1, x22
+        msr trcvmidcctlr0, x8
+        msr trcvmidcctlr1, x15
+        msr trcitctrl, x1
+        msr trcclaimset, x7
+        msr trcclaimclr, x29
+// CHECK: msr      trcoslar, x28              // encoding: [0x9c,0x10,0x11,0xd5]
+// CHECK: msr      trclar, x14                // encoding: [0xce,0x7c,0x11,0xd5]
+// CHECK: msr      trcprgctlr, x10            // encoding: [0x0a,0x01,0x11,0xd5]
+// CHECK: msr      trcprocselr, x27           // encoding: [0x1b,0x02,0x11,0xd5]
+// CHECK: msr      trcconfigr, x24            // encoding: [0x18,0x04,0x11,0xd5]
+// CHECK: msr      trcauxctlr, x8             // encoding: [0x08,0x06,0x11,0xd5]
+// CHECK: msr      trceventctl0r, x16         // encoding: [0x10,0x08,0x11,0xd5]
+// CHECK: msr      trceventctl1r, x27         // encoding: [0x1b,0x09,0x11,0xd5]
+// CHECK: msr      trcstallctlr, x26          // encoding: [0x1a,0x0b,0x11,0xd5]
+// CHECK: msr      trctsctlr, x0              // encoding: [0x00,0x0c,0x11,0xd5]
+// CHECK: msr      trcsyncpr, x14             // encoding: [0x0e,0x0d,0x11,0xd5]
+// CHECK: msr      trcccctlr, x8              // encoding: [0x08,0x0e,0x11,0xd5]
+// CHECK: msr      trcbbctlr, x6              // encoding: [0x06,0x0f,0x11,0xd5]
+// CHECK: msr      trctraceidr, x23           // encoding: [0x37,0x00,0x11,0xd5]
+// CHECK: msr      trcqctlr, x5               // encoding: [0x25,0x01,0x11,0xd5]
+// CHECK: msr      trcvictlr, x0              // encoding: [0x40,0x00,0x11,0xd5]
+// CHECK: msr      trcviiectlr, x0            // encoding: [0x40,0x01,0x11,0xd5]
+// CHECK: msr      trcvissctlr, x1            // encoding: [0x41,0x02,0x11,0xd5]
+// CHECK: msr      trcvipcssctlr, x0          // encoding: [0x40,0x03,0x11,0xd5]
+// CHECK: msr      trcvdctlr, x7              // encoding: [0x47,0x08,0x11,0xd5]
+// CHECK: msr      trcvdsacctlr, x18          // encoding: [0x52,0x09,0x11,0xd5]
+// CHECK: msr      trcvdarcctlr, x24          // encoding: [0x58,0x0a,0x11,0xd5]
+// CHECK: msr      trcseqevr0, x28            // encoding: [0x9c,0x00,0x11,0xd5]
+// CHECK: msr      trcseqevr1, x21            // encoding: [0x95,0x01,0x11,0xd5]
+// CHECK: msr      trcseqevr2, x16            // encoding: [0x90,0x02,0x11,0xd5]
+// CHECK: msr      trcseqrstevr, x16          // encoding: [0x90,0x06,0x11,0xd5]
+// CHECK: msr      trcseqstr, x25             // encoding: [0x99,0x07,0x11,0xd5]
+// CHECK: msr      trcextinselr, x29          // encoding: [0x9d,0x08,0x11,0xd5]
+// CHECK: msr      trccntrldvr0, x20          // encoding: [0xb4,0x00,0x11,0xd5]
+// CHECK: msr      trccntrldvr1, x20          // encoding: [0xb4,0x01,0x11,0xd5]
+// CHECK: msr      trccntrldvr2, x22          // encoding: [0xb6,0x02,0x11,0xd5]
+// CHECK: msr      trccntrldvr3, x12          // encoding: [0xac,0x03,0x11,0xd5]
+// CHECK: msr      trccntctlr0, x20           // encoding: [0xb4,0x04,0x11,0xd5]
+// CHECK: msr      trccntctlr1, x4            // encoding: [0xa4,0x05,0x11,0xd5]
+// CHECK: msr      trccntctlr2, x8            // encoding: [0xa8,0x06,0x11,0xd5]
+// CHECK: msr      trccntctlr3, x16           // encoding: [0xb0,0x07,0x11,0xd5]
+// CHECK: msr      trccntvr0, x5              // encoding: [0xa5,0x08,0x11,0xd5]
+// CHECK: msr      trccntvr1, x27             // encoding: [0xbb,0x09,0x11,0xd5]
+// CHECK: msr      trccntvr2, x21             // encoding: [0xb5,0x0a,0x11,0xd5]
+// CHECK: msr      trccntvr3, x8              // encoding: [0xa8,0x0b,0x11,0xd5]
+// CHECK: msr      trcimspec0, x6             // encoding: [0xe6,0x00,0x11,0xd5]
+// CHECK: msr      trcimspec1, x27            // encoding: [0xfb,0x01,0x11,0xd5]
+// CHECK: msr      trcimspec2, x23            // encoding: [0xf7,0x02,0x11,0xd5]
+// CHECK: msr      trcimspec3, x15            // encoding: [0xef,0x03,0x11,0xd5]
+// CHECK: msr      trcimspec4, x13            // encoding: [0xed,0x04,0x11,0xd5]
+// CHECK: msr      trcimspec5, x25            // encoding: [0xf9,0x05,0x11,0xd5]
+// CHECK: msr      trcimspec6, x19            // encoding: [0xf3,0x06,0x11,0xd5]
+// CHECK: msr      trcimspec7, x27            // encoding: [0xfb,0x07,0x11,0xd5]
+// CHECK: msr      trcrsctlr2, x4             // encoding: [0x04,0x12,0x11,0xd5]
+// CHECK: msr      trcrsctlr3, x0             // encoding: [0x00,0x13,0x11,0xd5]
+// CHECK: msr      trcrsctlr4, x21            // encoding: [0x15,0x14,0x11,0xd5]
+// CHECK: msr      trcrsctlr5, x8             // encoding: [0x08,0x15,0x11,0xd5]
+// CHECK: msr      trcrsctlr6, x20            // encoding: [0x14,0x16,0x11,0xd5]
+// CHECK: msr      trcrsctlr7, x11            // encoding: [0x0b,0x17,0x11,0xd5]
+// CHECK: msr      trcrsctlr8, x18            // encoding: [0x12,0x18,0x11,0xd5]
+// CHECK: msr      trcrsctlr9, x24            // encoding: [0x18,0x19,0x11,0xd5]
+// CHECK: msr      trcrsctlr10, x15           // encoding: [0x0f,0x1a,0x11,0xd5]
+// CHECK: msr      trcrsctlr11, x21           // encoding: [0x15,0x1b,0x11,0xd5]
+// CHECK: msr      trcrsctlr12, x4            // encoding: [0x04,0x1c,0x11,0xd5]
+// CHECK: msr      trcrsctlr13, x28           // encoding: [0x1c,0x1d,0x11,0xd5]
+// CHECK: msr      trcrsctlr14, x3            // encoding: [0x03,0x1e,0x11,0xd5]
+// CHECK: msr      trcrsctlr15, x20           // encoding: [0x14,0x1f,0x11,0xd5]
+// CHECK: msr      trcrsctlr16, x12           // encoding: [0x2c,0x10,0x11,0xd5]
+// CHECK: msr      trcrsctlr17, x17           // encoding: [0x31,0x11,0x11,0xd5]
+// CHECK: msr      trcrsctlr18, x10           // encoding: [0x2a,0x12,0x11,0xd5]
+// CHECK: msr      trcrsctlr19, x11           // encoding: [0x2b,0x13,0x11,0xd5]
+// CHECK: msr      trcrsctlr20, x3            // encoding: [0x23,0x14,0x11,0xd5]
+// CHECK: msr      trcrsctlr21, x18           // encoding: [0x32,0x15,0x11,0xd5]
+// CHECK: msr      trcrsctlr22, x26           // encoding: [0x3a,0x16,0x11,0xd5]
+// CHECK: msr      trcrsctlr23, x5            // encoding: [0x25,0x17,0x11,0xd5]
+// CHECK: msr      trcrsctlr24, x25           // encoding: [0x39,0x18,0x11,0xd5]
+// CHECK: msr      trcrsctlr25, x5            // encoding: [0x25,0x19,0x11,0xd5]
+// CHECK: msr      trcrsctlr26, x4            // encoding: [0x24,0x1a,0x11,0xd5]
+// CHECK: msr      trcrsctlr27, x20           // encoding: [0x34,0x1b,0x11,0xd5]
+// CHECK: msr      trcrsctlr28, x5            // encoding: [0x25,0x1c,0x11,0xd5]
+// CHECK: msr      trcrsctlr29, x10           // encoding: [0x2a,0x1d,0x11,0xd5]
+// CHECK: msr      trcrsctlr30, x24           // encoding: [0x38,0x1e,0x11,0xd5]
+// CHECK: msr      trcrsctlr31, x20           // encoding: [0x34,0x1f,0x11,0xd5]
+// CHECK: msr      trcssccr0, x23             // encoding: [0x57,0x10,0x11,0xd5]
+// CHECK: msr      trcssccr1, x27             // encoding: [0x5b,0x11,0x11,0xd5]
+// CHECK: msr      trcssccr2, x27             // encoding: [0x5b,0x12,0x11,0xd5]
+// CHECK: msr      trcssccr3, x6              // encoding: [0x46,0x13,0x11,0xd5]
+// CHECK: msr      trcssccr4, x3              // encoding: [0x43,0x14,0x11,0xd5]
+// CHECK: msr      trcssccr5, x12             // encoding: [0x4c,0x15,0x11,0xd5]
+// CHECK: msr      trcssccr6, x7              // encoding: [0x47,0x16,0x11,0xd5]
+// CHECK: msr      trcssccr7, x6              // encoding: [0x46,0x17,0x11,0xd5]
+// CHECK: msr      trcsscsr0, x20             // encoding: [0x54,0x18,0x11,0xd5]
+// CHECK: msr      trcsscsr1, x17             // encoding: [0x51,0x19,0x11,0xd5]
+// CHECK: msr      trcsscsr2, x11             // encoding: [0x4b,0x1a,0x11,0xd5]
+// CHECK: msr      trcsscsr3, x4              // encoding: [0x44,0x1b,0x11,0xd5]
+// CHECK: msr      trcsscsr4, x14             // encoding: [0x4e,0x1c,0x11,0xd5]
+// CHECK: msr      trcsscsr5, x22             // encoding: [0x56,0x1d,0x11,0xd5]
+// CHECK: msr      trcsscsr6, x3              // encoding: [0x43,0x1e,0x11,0xd5]
+// CHECK: msr      trcsscsr7, x11             // encoding: [0x4b,0x1f,0x11,0xd5]
+// CHECK: msr      trcsspcicr0, x2            // encoding: [0x62,0x10,0x11,0xd5]
+// CHECK: msr      trcsspcicr1, x3            // encoding: [0x63,0x11,0x11,0xd5]
+// CHECK: msr      trcsspcicr2, x5            // encoding: [0x65,0x12,0x11,0xd5]
+// CHECK: msr      trcsspcicr3, x7            // encoding: [0x67,0x13,0x11,0xd5]
+// CHECK: msr      trcsspcicr4, x11           // encoding: [0x6b,0x14,0x11,0xd5]
+// CHECK: msr      trcsspcicr5, x13           // encoding: [0x6d,0x15,0x11,0xd5]
+// CHECK: msr      trcsspcicr6, x17           // encoding: [0x71,0x16,0x11,0xd5]
+// CHECK: msr      trcsspcicr7, x23           // encoding: [0x77,0x17,0x11,0xd5]
+// CHECK: msr      trcpdcr, x3                // encoding: [0x83,0x14,0x11,0xd5]
+// CHECK: msr      trcacvr0, x6               // encoding: [0x06,0x20,0x11,0xd5]
+// CHECK: msr      trcacvr1, x20              // encoding: [0x14,0x22,0x11,0xd5]
+// CHECK: msr      trcacvr2, x25              // encoding: [0x19,0x24,0x11,0xd5]
+// CHECK: msr      trcacvr3, x1               // encoding: [0x01,0x26,0x11,0xd5]
+// CHECK: msr      trcacvr4, x28              // encoding: [0x1c,0x28,0x11,0xd5]
+// CHECK: msr      trcacvr5, x15              // encoding: [0x0f,0x2a,0x11,0xd5]
+// CHECK: msr      trcacvr6, x25              // encoding: [0x19,0x2c,0x11,0xd5]
+// CHECK: msr      trcacvr7, x12              // encoding: [0x0c,0x2e,0x11,0xd5]
+// CHECK: msr      trcacvr8, x5               // encoding: [0x25,0x20,0x11,0xd5]
+// CHECK: msr      trcacvr9, x25              // encoding: [0x39,0x22,0x11,0xd5]
+// CHECK: msr      trcacvr10, x13             // encoding: [0x2d,0x24,0x11,0xd5]
+// CHECK: msr      trcacvr11, x10             // encoding: [0x2a,0x26,0x11,0xd5]
+// CHECK: msr      trcacvr12, x19             // encoding: [0x33,0x28,0x11,0xd5]
+// CHECK: msr      trcacvr13, x10             // encoding: [0x2a,0x2a,0x11,0xd5]
+// CHECK: msr      trcacvr14, x19             // encoding: [0x33,0x2c,0x11,0xd5]
+// CHECK: msr      trcacvr15, x2              // encoding: [0x22,0x2e,0x11,0xd5]
+// CHECK: msr      trcacatr0, x15             // encoding: [0x4f,0x20,0x11,0xd5]
+// CHECK: msr      trcacatr1, x13             // encoding: [0x4d,0x22,0x11,0xd5]
+// CHECK: msr      trcacatr2, x8              // encoding: [0x48,0x24,0x11,0xd5]
+// CHECK: msr      trcacatr3, x1              // encoding: [0x41,0x26,0x11,0xd5]
+// CHECK: msr      trcacatr4, x11             // encoding: [0x4b,0x28,0x11,0xd5]
+// CHECK: msr      trcacatr5, x8              // encoding: [0x48,0x2a,0x11,0xd5]
+// CHECK: msr      trcacatr6, x24             // encoding: [0x58,0x2c,0x11,0xd5]
+// CHECK: msr      trcacatr7, x6              // encoding: [0x46,0x2e,0x11,0xd5]
+// CHECK: msr      trcacatr8, x23             // encoding: [0x77,0x20,0x11,0xd5]
+// CHECK: msr      trcacatr9, x5              // encoding: [0x65,0x22,0x11,0xd5]
+// CHECK: msr      trcacatr10, x11            // encoding: [0x6b,0x24,0x11,0xd5]
+// CHECK: msr      trcacatr11, x11            // encoding: [0x6b,0x26,0x11,0xd5]
+// CHECK: msr      trcacatr12, x3             // encoding: [0x63,0x28,0x11,0xd5]
+// CHECK: msr      trcacatr13, x28            // encoding: [0x7c,0x2a,0x11,0xd5]
+// CHECK: msr      trcacatr14, x25            // encoding: [0x79,0x2c,0x11,0xd5]
+// CHECK: msr      trcacatr15, x4             // encoding: [0x64,0x2e,0x11,0xd5]
+// CHECK: msr      trcdvcvr0, x6              // encoding: [0x86,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcvr1, x3              // encoding: [0x83,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcvr2, x5              // encoding: [0x85,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcvr3, x11             // encoding: [0x8b,0x2c,0x11,0xd5]
+// CHECK: msr      trcdvcvr4, x9              // encoding: [0xa9,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcvr5, x14             // encoding: [0xae,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcvr6, x10             // encoding: [0xaa,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcvr7, x12             // encoding: [0xac,0x2c,0x11,0xd5]
+// CHECK: msr      trcdvcmr0, x8              // encoding: [0xc8,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcmr1, x8              // encoding: [0xc8,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcmr2, x22             // encoding: [0xd6,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcmr3, x22             // encoding: [0xd6,0x2c,0x11,0xd5]
+// CHECK: msr      trcdvcmr4, x5              // encoding: [0xe5,0x20,0x11,0xd5]
+// CHECK: msr      trcdvcmr5, x16             // encoding: [0xf0,0x24,0x11,0xd5]
+// CHECK: msr      trcdvcmr6, x27             // encoding: [0xfb,0x28,0x11,0xd5]
+// CHECK: msr      trcdvcmr7, x21             // encoding: [0xf5,0x2c,0x11,0xd5]
+// CHECK: msr      trccidcvr0, x8             // encoding: [0x08,0x30,0x11,0xd5]
+// CHECK: msr      trccidcvr1, x6             // encoding: [0x06,0x32,0x11,0xd5]
+// CHECK: msr      trccidcvr2, x9             // encoding: [0x09,0x34,0x11,0xd5]
+// CHECK: msr      trccidcvr3, x8             // encoding: [0x08,0x36,0x11,0xd5]
+// CHECK: msr      trccidcvr4, x3             // encoding: [0x03,0x38,0x11,0xd5]
+// CHECK: msr      trccidcvr5, x21            // encoding: [0x15,0x3a,0x11,0xd5]
+// CHECK: msr      trccidcvr6, x12            // encoding: [0x0c,0x3c,0x11,0xd5]
+// CHECK: msr      trccidcvr7, x7             // encoding: [0x07,0x3e,0x11,0xd5]
+// CHECK: msr      trcvmidcvr0, x4            // encoding: [0x24,0x30,0x11,0xd5]
+// CHECK: msr      trcvmidcvr1, x3            // encoding: [0x23,0x32,0x11,0xd5]
+// CHECK: msr      trcvmidcvr2, x9            // encoding: [0x29,0x34,0x11,0xd5]
+// CHECK: msr      trcvmidcvr3, x17           // encoding: [0x31,0x36,0x11,0xd5]
+// CHECK: msr      trcvmidcvr4, x14           // encoding: [0x2e,0x38,0x11,0xd5]
+// CHECK: msr      trcvmidcvr5, x12           // encoding: [0x2c,0x3a,0x11,0xd5]
+// CHECK: msr      trcvmidcvr6, x10           // encoding: [0x2a,0x3c,0x11,0xd5]
+// CHECK: msr      trcvmidcvr7, x3            // encoding: [0x23,0x3e,0x11,0xd5]
+// CHECK: msr      trccidcctlr0, x14          // encoding: [0x4e,0x30,0x11,0xd5]
+// CHECK: msr      trccidcctlr1, x22          // encoding: [0x56,0x31,0x11,0xd5]
+// CHECK: msr      trcvmidcctlr0, x8          // encoding: [0x48,0x32,0x11,0xd5]
+// CHECK: msr      trcvmidcctlr1, x15         // encoding: [0x4f,0x33,0x11,0xd5]
+// CHECK: msr      trcitctrl, x1              // encoding: [0x81,0x70,0x11,0xd5]
+// CHECK: msr      trcclaimset, x7            // encoding: [0xc7,0x78,0x11,0xd5]
+// CHECK: msr      trcclaimclr, x29           // encoding: [0xdd,0x79,0x11,0xd5]
diff --git a/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s b/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s
new file mode 100644
index 000000000000..172abcf6f813
--- /dev/null
+++ b/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s
@@ -0,0 +1,5 @@
+@ RUN: llvm-mc -arch arm %s
+@ CHECK: test:
+@ CHECK: br r1
+test:
+  bl r1
diff --git a/test/MC/ARM/AlignedBundling/group-bundle-arm.s b/test/MC/ARM/AlignedBundling/group-bundle-arm.s
new file mode 100644
index 000000000000..1d6735320007
--- /dev/null
+++ b/test/MC/ARM/AlignedBundling/group-bundle-arm.s
@@ -0,0 +1,48 @@
+# RUN: llvm-mc -filetype=obj -triple armv7-linux-gnueabi %s -o - \
+# RUN:   | llvm-objdump -no-show-raw-insn -triple armv7 -disassemble - | FileCheck %s
+
+# On ARM each instruction is 4 bytes long so padding for individual
+# instructions should not be inserted. However, for bundle-locked groups
+# it can be.
+
+  .syntax unified
+  .text
+  .bundle_align_mode 4
+
+  bx lr
+  and r1, r1, r2
+  and r1, r1, r2
+  .bundle_lock
+  bx r9
+  bx r8
+  .bundle_unlock
+# CHECK:      c:  nop
+# CHECK-NEXT: 10: bx
+# CHECK-NEXT: 14: bx
+
+  # pow2 here
+  .align 4 
+  bx lr
+  .bundle_lock
+  bx r9
+  bx r9
+  bx r9
+  bx r8
+  .bundle_unlock
+# CHECK:      20: bx
+# CHECK-NEXT: 24: nop
+# CHECK-NEXT: 28: nop
+# CHECK-NEXT: 2c: nop
+# CHECK-NEXT: 30: bx
+
+  .align 4
+foo:
+  b foo
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+  .long 3892240112
+# CHECK:  40: b
+
diff --git a/test/MC/ARM/AlignedBundling/lit.local.cfg b/test/MC/ARM/AlignedBundling/lit.local.cfg
new file mode 100644
index 000000000000..6c49f08b7496
--- /dev/null
+++ b/test/MC/ARM/AlignedBundling/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ARM/AlignedBundling/pad-align-to-bundle-end.s b/test/MC/ARM/AlignedBundling/pad-align-to-bundle-end.s
new file mode 100644
index 000000000000..341358b9db17
--- /dev/null
+++ b/test/MC/ARM/AlignedBundling/pad-align-to-bundle-end.s
@@ -0,0 +1,41 @@
+# RUN: llvm-mc -filetype=obj -triple armv7-linux-gnueabi %s -o - \
+# RUN:   | llvm-objdump -no-show-raw-insn -triple armv7 -disassemble - | FileCheck %s
+
+	.syntax unified
+	.text
+  .bundle_align_mode 4
+
+  bx lr
+  and r1, r1, r2
+  and r1, r1, r2
+  .bundle_lock align_to_end
+  bx r9
+  .bundle_unlock
+# No padding required here because bx just happens to be in the
+# right offset.
+# CHECK:      8:  and
+# CHECK-NEXT: c:  bx
+
+  bx lr
+  and r1, r1, r2
+  .bundle_lock align_to_end
+  bx r9
+  .bundle_unlock
+# A 4-byte padding is needed here
+# CHECK:      18: nop
+# CHECK-NEXT: 1c: bx
+
+  bx lr
+  and r1, r1, r2
+  .bundle_lock align_to_end
+  bx r9
+  bx r9
+  bx r9
+  .bundle_unlock
+# A 12-byte padding is needed here to push the group to the end of the next
+# bundle
+# CHECK:      28: nop
+# CHECK-NEXT: 2c: nop
+# CHECK-NEXT: 30: nop
+# CHECK-NEXT: 34: bx
+
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index ce7e036b3be4..a4b6bda880c5 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -1,7 +1,14 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s | FileCheck %s
-
-@ CHECK: trap
-@ CHECK: encoding: [0xfe,0xde,0xff,0xe7]
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s \
+@ RUN:  | FileCheck %s -check-prefix=ALL
+@ RUN: llvm-mc -mcpu=cortex-a9-mp -triple armv7-unknown-nacl -show-encoding %s \
+@ RUN:  | FileCheck %s -check-prefix=NACL
+@ RUN: llvm-mc -mcpu=cortex-a8 -mattr=+nacl-trap -triple armv7 -show-encoding %s \
+@ RUN:  | FileCheck %s -check-prefix=NACL
+
+@ ALL: trap
+@ ALL: encoding: [0xfe,0xde,0xff,0xe7]
+@ NACL: trap
+@ NACL: encoding: [0xf0,0xde,0xfe,0xe7]
         trap
 
 @ CHECK: bx	lr
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 5c2a214598d1..560a0d633cbe 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -143,11 +143,15 @@ Lforward:
 
         adr r1, #-0x0
         adr r1, #-0x12000000
+        adr r1, #-0x80000001
         adr r1, #0x12000000
+        adr r1, #0x80000001
 
 @ CHECK: adr	r1, #-0                 @ encoding: [0x00,0x10,0x4f,0xe2]
 @ CHECK: adr	r1, #-301989888         @ encoding: [0x12,0x14,0x4f,0xe2]
+@ CHECK: adr	r1, #2147483647         @ encoding: [0x06,0x11,0x4f,0xe2]
 @ CHECK: adr	r1, #301989888          @ encoding: [0x12,0x14,0x8f,0xe2]
+@ CHECK: adr	r1, #-2147483647        @ encoding: [0x06,0x11,0x8f,0xe2]
 
 
 @------------------------------------------------------------------------------
@@ -2087,6 +2091,49 @@ Lforward:
 @ CHECK: srsia	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf8]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
 
+@ Compatibility aliases.
+        srsda #5
+        srsdb #1
+        srsia #0
+        srsib #15
+
+        srsda #31!
+        srsdb #19!
+        srsia #2!
+        srsib #14!
+
+        srsfa #11
+        srsea #10
+        srsfd #9
+        srsed #5
+
+        srsfa #5!
+        srsea #5!
+        srsfd #5!
+        srsed #5!
+
+        srs #5
+        srs #5!
+
+@ CHECK: srsda	sp, #5                  @ encoding: [0x05,0x05,0x4d,0xf8]
+@ CHECK: srsdb	sp, #1                  @ encoding: [0x01,0x05,0x4d,0xf9]
+@ CHECK: srsia	sp, #0                  @ encoding: [0x00,0x05,0xcd,0xf8]
+@ CHECK: srsib	sp, #15                 @ encoding: [0x0f,0x05,0xcd,0xf9]
+@ CHECK: srsda	sp!, #31                @ encoding: [0x1f,0x05,0x6d,0xf8]
+@ CHECK: srsdb	sp!, #19                @ encoding: [0x13,0x05,0x6d,0xf9]
+@ CHECK: srsia	sp!, #2                 @ encoding: [0x02,0x05,0xed,0xf8]
+@ CHECK: srsib	sp!, #14                @ encoding: [0x0e,0x05,0xed,0xf9]
+@ CHECK: srsda	sp, #11                 @ encoding: [0x0b,0x05,0x4d,0xf8]
+@ CHECK: srsdb	sp, #10                 @ encoding: [0x0a,0x05,0x4d,0xf9]
+@ CHECK: srsia	sp, #9                  @ encoding: [0x09,0x05,0xcd,0xf8]
+@ CHECK: srsib	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf9]
+@ CHECK: srsda	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf8]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x05,0x05,0x6d,0xf9]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+@ CHECK: srsib	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf9]
+@ CHECK: srsia	sp, #5                  @ encoding: [0x05,0x05,0xcd,0xf8]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0x05,0x05,0xed,0xf8]
+
 
 @------------------------------------------------------------------------------
 @ SSAT
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 23d9f5977a29..9278a2a94b56 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -2352,6 +2352,32 @@ _func:
 @ CHECK: srsia	sp, #5                  @ encoding: [0x8d,0xe9,0x05,0xc0]
 @ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
 
+        srsdb #1
+        srsia #0
+
+        srsdb #19!
+        srsia #2!
+
+        srsea #10
+        srsfd #9
+
+        srsea #5!
+        srsfd #5!
+
+        srs #5
+        srs #5!
+
+@ CHECK: srsdb	sp, #1                  @ encoding: [0x0d,0xe8,0x01,0xc0]
+@ CHECK: srsia	sp, #0                  @ encoding: [0x8d,0xe9,0x00,0xc0]
+@ CHECK: srsdb	sp!, #19                @ encoding: [0x2d,0xe8,0x13,0xc0]
+@ CHECK: srsia	sp!, #2                 @ encoding: [0xad,0xe9,0x02,0xc0]
+@ CHECK: srsdb	sp, #10                 @ encoding: [0x0d,0xe8,0x0a,0xc0]
+@ CHECK: srsia	sp, #9                  @ encoding: [0x8d,0xe9,0x09,0xc0]
+@ CHECK: srsdb	sp!, #5                 @ encoding: [0x2d,0xe8,0x05,0xc0]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
+@ CHECK: srsia	sp, #5                  @ encoding: [0x8d,0xe9,0x05,0xc0]
+@ CHECK: srsia	sp!, #5                 @ encoding: [0xad,0xe9,0x05,0xc0]
+
 
 @------------------------------------------------------------------------------
 @ SSAT
@@ -3509,3 +3535,7 @@ _func:
 @ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
 @ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
 @ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
+
+@ rdar://12596361
+        ldr r1, [pc, #12]
+@ CHECK: ldr.n r1, [pc, #12]        @ encoding: [0x03,0x49]
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
new file mode 100644
index 000000000000..c2feec5303c3
--- /dev/null
+++ b/test/MC/ARM/data-in-code.ll
@@ -0,0 +1,176 @@
+;; RUN: llc -O0 -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
+;; RUN:   elf-dump | FileCheck -check-prefix=ARM %s
+
+;; RUN: llc -O0 -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=TMB %s
+
+;; Ensure that if a jump table is generated that it has Mapping Symbols
+;; marking the data-in-code region.
+
+define void @foo(i32* %ptr) nounwind ssp {
+  %tmp = load i32* %ptr, align 4
+  switch i32 %tmp, label %default [
+    i32 11, label %bb0
+    i32 10, label %bb1
+    i32 8, label %bb2
+    i32 4, label %bb3
+    i32 2, label %bb4
+    i32 6, label %bb5
+    i32 9, label %bb6
+    i32 15, label %bb7
+    i32 1, label %bb8
+    i32 3, label %bb9
+    i32 5, label %bb10
+    i32 30, label %bb11
+    i32 31, label %bb12
+    i32 13, label %bb13
+    i32 14, label %bb14
+    i32 20, label %bb15
+    i32 19, label %bb16
+    i32 17, label %bb17
+    i32 18, label %bb18
+    i32 21, label %bb19
+    i32 22, label %bb20
+    i32 16, label %bb21
+    i32 24, label %bb22
+    i32 25, label %bb23
+    i32 26, label %bb24
+    i32 27, label %bb25
+    i32 28, label %bb26
+    i32 23, label %bb27
+    i32 12, label %bb28
+  ]
+
+default:
+  br label %exit
+bb0:
+  br label %exit
+bb1:
+  br label %exit
+bb2:
+  br label %exit
+bb3:
+  br label %exit
+bb4:
+  br label %exit
+bb5:
+  br label %exit
+bb6:
+  br label %exit
+bb7:
+  br label %exit
+bb8:
+  br label %exit
+bb9:
+  br label %exit
+bb10:
+  br label %exit
+bb11:
+  br label %exit
+bb12:
+  br label %exit
+bb13:
+  br label %exit
+bb14:
+  br label %exit
+bb15:
+  br label %exit
+bb16:
+  br label %exit
+bb17:
+  br label %exit
+bb18:
+  br label %exit
+bb19:
+  br label %exit
+bb20:
+  br label %exit
+bb21:
+  br label %exit
+bb22:
+  br label %exit
+bb23:
+  br label %exit
+bb24:
+  br label %exit
+bb25:
+  br label %exit
+bb26:
+  br label %exit
+bb27:
+  br label %exit
+bb28:
+  br label %exit
+
+
+exit:
+
+  ret void
+}
+
+;; ARM:         # Symbol 2
+;; ARM-NEXT:    $a
+;; ARM-NEXT:   'st_value', 0x00000000
+;; ARM-NEXT:   'st_size', 0x00000000
+;; ARM-NEXT:   'st_bind', 0x0
+;; ARM-NEXT:   'st_type', 0x0
+;; ARM-NEXT:   'st_other'
+;; ARM-NEXT:   'st_shndx', [[MIXED_SECT:0x[0-9a-f]+]]
+
+;; ARM:         # Symbol 3
+;; ARM-NEXT:    $a
+;; ARM-NEXT:   'st_value', 0x000000ac
+;; ARM-NEXT:   'st_size', 0x00000000
+;; ARM-NEXT:   'st_bind', 0x0
+;; ARM-NEXT:   'st_type', 0x0
+;; ARM-NEXT:   'st_other'
+;; ARM-NEXT:   'st_shndx', [[MIXED_SECT]]
+
+;; ARM:         # Symbol 4
+;; ARM-NEXT:    $d
+;; ARM-NEXT:    'st_value', 0x00000000
+;; ARM-NEXT:    'st_size', 0x00000000
+;; ARM-NEXT:    'st_bind', 0x0
+;; ARM-NEXT:    'st_type', 0x0
+
+;; ARM:         # Symbol 5
+;; ARM-NEXT:    $d
+;; ARM-NEXT:   'st_value', 0x00000030
+;; ARM-NEXT:   'st_size', 0x00000000
+;; ARM-NEXT:   'st_bind', 0x0
+;; ARM-NEXT:   'st_type', 0x0
+;; ARM-NEXT:   'st_other'
+;; ARM-NEXT:   'st_shndx', [[MIXED_SECT]]
+
+;; ARM-NOT:     ${{[atd]}}
+
+;; TMB:         # Symbol 3
+;; TMB-NEXT:    $d
+;; TMB-NEXT:   'st_value', 0x00000016
+;; TMB-NEXT:   'st_size', 0x00000000
+;; TMB-NEXT:   'st_bind', 0x0
+;; TMB-NEXT:   'st_type', 0x0
+;; TMB-NEXT:   'st_other'
+;; TMB-NEXT:   'st_shndx', [[MIXED_SECT:0x[0-9a-f]+]]
+
+;; TMB:         # Symbol 4
+;; TMB-NEXT:    $t
+;; TMB-NEXT:   'st_value', 0x00000000
+;; TMB-NEXT:   'st_size', 0x00000000
+;; TMB-NEXT:   'st_bind', 0x0
+;; TMB-NEXT:   'st_type', 0x0
+;; TMB-NEXT:   'st_other'
+;; TMB-NEXT:   'st_shndx', [[MIXED_SECT]]
+
+;; TMB:         # Symbol 5
+;; TMB-NEXT:    $t
+;; TMB-NEXT:   'st_value', 0x00000036
+;; TMB-NEXT:   'st_size', 0x00000000
+;; TMB-NEXT:   'st_bind', 0x0
+;; TMB-NEXT:   'st_type', 0x0
+;; TMB-NEXT:   'st_other'
+;; TMB-NEXT:   'st_shndx', [[MIXED_SECT]]
+
+
+;; TMB-NOT:     ${{[atd]}}
+
diff --git a/test/MC/ARM/elf-eflags-eabi-cg.ll b/test/MC/ARM/elf-eflags-eabi-cg.ll
new file mode 100644
index 000000000000..2e86a0f36077
--- /dev/null
+++ b/test/MC/ARM/elf-eflags-eabi-cg.ll
@@ -0,0 +1,13 @@
+; Codegen version to check for ELF header flags.
+;
+; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
+; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: FileCheck %s
+
+define void @bar() nounwind {
+entry:
+  ret void
+}
+
+; For now the only e_flag set is EF_ARM_EABI_VER5
+;CHECK:    'e_flags', 0x05000000
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index c98026b6a043..3ebd7c641b6d 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -62,9 +62,9 @@ declare void @exit(i32) noreturn nounwind
 
 ;; OBJ:          Relocation 1
 ;; OBJ-NEXT:     'r_offset',
-;; OBJ-NEXT:     'r_sym', 0x000002
+;; OBJ-NEXT:     'r_sym', 0x000007
 ;; OBJ-NEXT:     'r_type', 0x2b
 
-;; OBJ:         Symbol 2
+;; OBJ:         Symbol 7
 ;; OBJ-NEXT:    '_MergedGlobals'
 ;; OBJ-NEXT:    'st_value', 0x00000010
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
index e51bac30ca8a..6b6b03c388a4 100644
--- a/test/MC/ARM/elf-reloc-02.ll
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -42,9 +42,9 @@ declare i32 @write(...)
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ:        Relocation 0
-;; OBJ-NEXT:    'r_offset', 
-;; OBJ-NEXT:    'r_sym', 0x000002
+;; OBJ-NEXT:    'r_offset',
+;; OBJ-NEXT:    'r_sym', 0x000005
 ;; OBJ-NEXT:    'r_type', 0x2b
 
-;; OBJ:          Symbol 2
+;; OBJ:          Symbol 5
 ;; OBJ-NEXT:    '.L.str'
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
index 922242f9d3d6..87f91c11210b 100644
--- a/test/MC/ARM/elf-reloc-03.ll
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -89,9 +89,9 @@ entry:
 declare void @exit(i32) noreturn nounwind
 
 ;; OBJ:           Relocation 1
-;; OBJ-NEXT:     'r_offset', 
-;; OBJ-NEXT:     'r_sym', 0x00000c
+;; OBJ-NEXT:     'r_offset',
+;; OBJ-NEXT:     'r_sym', 0x000010
 ;; OBJ-NEXT:     'r_type', 0x2b
 
-;; OBJ:      Symbol 12
+;; OBJ:      Symbol 16
 ;; OBJ-NEXT:    'vtable'
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
index 08b4ecc9c745..3fafb43eb060 100644
--- a/test/MC/ARM/elf-reloc-condcall.s
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -9,25 +9,25 @@
 // OBJ: .rel.text
 
 // OBJ: 'r_offset', 0x00000000
-// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT:  'r_sym', 0x000005
 // OBJ-NEXT: 'r_type', 0x1d
 
 // OBJ: 'r_offset', 0x00000004
-// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT:  'r_sym', 0x000005
 // OBJ-NEXT: 'r_type', 0x1c
 
 // OBJ: 'r_offset', 0x00000008
-// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT:  'r_sym', 0x000005
 // OBJ-NEXT: 'r_type', 0x1c
 
 // OBJ: 'r_offset', 0x0000000c
-// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT:  'r_sym', 0x000005
 // OBJ-NEXT: 'r_type', 0x1d
 
 // OBJ: 'r_offset', 0x00000010
-// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT:  'r_sym', 0x000005
 // OBJ-NEXT: 'r_type', 0x1d
 
 // OBJ: .symtab
-// OBJ: Symbol 4
+// OBJ: Symbol 5
 // OBJ-NEXT: some_label
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.ll b/test/MC/ARM/elf-thumbfunc-reloc.ll
index ecac11daa3cf..b2f253d2fa95 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.ll
+++ b/test/MC/ARM/elf-thumbfunc-reloc.ll
@@ -28,10 +28,10 @@ entry:
 ; 00000008  0000070a R_ARM_THM_CALL    00000001   foo
 ; CHECK:           Relocation 0
 ; CHECK-NEXT:      'r_offset', 0x00000008
-; CHECK-NEXT:      'r_sym', 0x000007
+; CHECK-NEXT:      'r_sym', 0x000009
 ; CHECK-NEXT:      'r_type', 0x0a
 
 ; make sure foo is thumb function: bit 0 = 1
-; CHECK:           Symbol 7
+; CHECK:           Symbol 9
 ; CHECK-NEXT:      'foo'
 ; CHECK-NEXT:      'st_value', 0x00000001
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
index 0aa7f41cc4be..91b2eee7592b 100644
--- a/test/MC/ARM/elf-thumbfunc.s
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -12,7 +12,7 @@ foo:
 	bx	lr
 
 @@ make sure foo is thumb function: bit 0 = 1 (st_value)
-@CHECK:           Symbol 4
+@CHECK:           Symbol 5
 @CHECK-NEXT:      'st_name', 0x00000001
 @CHECK-NEXT:      'st_value', 0x00000001
 @CHECK-NEXT:      'st_size', 0x00000000
diff --git a/test/MC/ARM/mapping-within-section.s b/test/MC/ARM/mapping-within-section.s
new file mode 100644
index 000000000000..56dd6ef07e73
--- /dev/null
+++ b/test/MC/ARM/mapping-within-section.s
@@ -0,0 +1,33 @@
+@ RUN: llvm-mc -triple=arm-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+    .text
+@ $a at 0x0000
+    add r0, r0, r0
+@ $d at 0x0004
+    .word 42
+    .thumb
+@ $t at 0x0008
+    adds r0, r0, r0
+    adds r0, r0, r0
+@ $a at 0x000c
+    .arm
+    add r0, r0, r0
+@ $t at 0x0010
+    .thumb
+    adds r0, r0, r0
+@ $d at 0x0012
+    .ascii "012"
+    .byte 1
+    .byte 2
+    .byte 3
+@ $a at 0x0018
+    .arm
+    add r0, r0, r0
+
+@ CHECK:      00000000         .text  00000000 $a
+@ CHECK-NEXT: 0000000c         .text  00000000 $a
+@ CHECK-NEXT: 00000018         .text  00000000 $a
+@ CHECK-NEXT: 00000004         .text  00000000 $d
+@ CHECK-NEXT: 00000012         .text  00000000 $d
+@ CHECK-NEXT: 00000008         .text  00000000 $t
+@ CHECK-NEXT: 00000010         .text  00000000 $t
diff --git a/test/MC/ARM/multi-section-mapping.s b/test/MC/ARM/multi-section-mapping.s
new file mode 100644
index 000000000000..f7c4e89a85ea
--- /dev/null
+++ b/test/MC/ARM/multi-section-mapping.s
@@ -0,0 +1,35 @@
+@ RUN: llvm-mc -triple=arm-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+        .text
+        add r0, r0, r0
+
+@ .wibble should *not* inherit .text's mapping symbol. It's a completely different section.
+        .section .wibble
+        add r0, r0, r0
+
+@ A section should be able to start with a $t
+        .section .starts_thumb
+        .thumb
+        adds r0, r0, r0
+
+@ A setion should be able to start with a $d
+        .section .starts_data
+        .word 42
+
+@ Changing back to .text should not emit a redundant $a
+        .text
+        .arm
+        add r0, r0, r0
+
+@ With all those constraints, we want:
+@   + .text to have $a at 0 and no others
+@   + .wibble to have $a at 0
+@   + .starts_thumb to have $t at 0
+@   + .starts_data to have $d at 0
+
+@ CHECK: 00000000 .text 00000000 $a
+@ CHECK-NEXT: 00000000 .wibble 00000000 $a
+@ CHECK-NEXT: 00000000 .starts_data 00000000 $d
+@ CHECK-NEXT: 00000000 .starts_thumb 00000000 $t
+@ CHECK-NOT: ${{[adt]}}
+
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index e8c1dd634867..8c7228835c9b 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -1,4 +1,5 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s \
+@ RUN: | FileCheck %s
 
 	vand	d16, d17, d16
 	vand	q8, q8, q9
@@ -255,6 +256,42 @@
 	veor.f   q8, q2
 	veor.i64 q8, q2
 
+	vclt.s16 q5, #0
+	vclt.s16 d5, #0
+
+	vceq.s16 q5, q3
+	vceq.s16 d5, d3
+
+	vcgt.s16 q5, q3
+	vcgt.s16 d5, d3
+
+	vcge.s16 q5, q3
+	vcge.s16 d5, d3
+
+	vcgt.s16 q5, #0
+	vcgt.s16 d5, #0
+
+	vcge.s16 q5, #0
+	vcge.s16 d5, #0
+
+	vceq.s16 q5, #0
+	vceq.s16 d5, #0
+
+	vcle.s16 q5, #0
+	vcle.s16 d5, #0
+
+	vacge.f32 d5, d30
+	vacge.f32 q5, q3
+
+	vacgt.f32 d5, d30
+	vacgt.f32 q5, q3
+
+@ FIXME: We don't have an alias that reverses the operands
+@  vacle.f32 d5, d30 
+@  vacle.f32 q5, q3 
+@  vaclt.f32 d5, d30
+@  vaclt.f32 q5, q3
+
 @ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
 @ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
 @ CHECK: vand	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf2]
@@ -272,3 +309,32 @@
 @ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
 @ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: vclt.s16        q5, q5, #0      @ encoding: [0x4a,0xa2,0xb5,0xf3]
+@ CHECK: vclt.s16        d5, d5, #0      @ encoding: [0x05,0x52,0xb5,0xf3]
+
+@ CHECK: vceq.i16        q5, q5, q3      @ encoding: [0x56,0xa8,0x1a,0xf3]
+@ CHECK: vceq.i16        d5, d5, d3      @ encoding: [0x13,0x58,0x15,0xf3]
+
+@ CHECK: vcgt.s16        q5, q5, q3      @ encoding: [0x46,0xa3,0x1a,0xf2]
+@ CHECK: vcgt.s16        d5, d5, d3      @ encoding: [0x03,0x53,0x15,0xf2]
+
+@ CHECK: vcge.s16        q5, q5, q3      @ encoding: [0x56,0xa3,0x1a,0xf2]
+@ CHECK: vcge.s16        d5, d5, d3      @ encoding: [0x13,0x53,0x15,0xf2]
+
+@ CHECK: vcgt.s16        q5, q5, #0      @ encoding: [0x4a,0xa0,0xb5,0xf3]
+@ CHECK: vcgt.s16        d5, d5, #0      @ encoding: [0x05,0x50,0xb5,0xf3]
+
+@ CHECK: vcge.s16        q5, q5, #0      @ encoding: [0xca,0xa0,0xb5,0xf3]
+@ CHECK: vcge.s16        d5, d5, #0      @ encoding: [0x85,0x50,0xb5,0xf3]
+
+@ CHECK: vceq.i16        q5, q5, #0      @ encoding: [0x4a,0xa1,0xb5,0xf3]
+@ CHECK: vceq.i16        d5, d5, #0      @ encoding: [0x05,0x51,0xb5,0xf3]
+
+@ CHECK: vcle.s16        q5, q5, #0      @ encoding: [0xca,0xa1,0xb5,0xf3]
+@ CHECK: vcle.s16        d5, d5, #0      @ encoding: [0x85,0x51,0xb5,0xf3]
+
+@ CHECK: vacge.f32       d5, d5, d30     @ encoding: [0x3e,0x5e,0x05,0xf3]
+@ CHECK: vacge.f32       q5, q5, q3      @ encoding: [0x56,0xae,0x0a,0xf3]
+
+@ CHECK: vacgt.f32       d5, d5, d30     @ encoding: [0x3e,0x5e,0x25,0xf3]
+@ CHECK: vacgt.f32       q5, q5, q3      @ encoding: [0x56,0xae,0x2a,0xf3]
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 3cc6bf11cf5e..648e91705782 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -1,163 +1,163 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
 
-	vld1.8	{d16}, [r0, :64]
+	vld1.8	{d16}, [r0:64]
 	vld1.16	{d16}, [r0]
 	vld1.32	{d16}, [r0]
 	vld1.64	{d16}, [r0]
-	vld1.8	{d16, d17}, [r0, :64]
-	vld1.16	{d16, d17}, [r0, :128]
+	vld1.8	{d16, d17}, [r0:64]
+	vld1.16	{d16, d17}, [r0:128]
 	vld1.32	{d16, d17}, [r0]
 	vld1.64	{d16, d17}, [r0]
 	vld1.8 {d1, d2, d3}, [r3]
-	vld1.16 {d4, d5, d6}, [r3, :64]
+	vld1.16 {d4, d5, d6}, [r3:64]
 	vld1.32 {d5, d6, d7}, [r3]
-	vld1.64 {d6, d7, d8}, [r3, :64]
+	vld1.64 {d6, d7, d8}, [r3:64]
 	vld1.8 {d1, d2, d3, d4}, [r3]
-	vld1.16 {d4, d5, d6, d7}, [r3, :64]
+	vld1.16 {d4, d5, d6, d7}, [r3:64]
 	vld1.32 {d5, d6, d7, d8}, [r3]
-	vld1.64 {d6, d7, d8, d9}, [r3, :64]
+	vld1.64 {d6, d7, d8, d9}, [r3:64]
 
-	vld1.8	{d16}, [r0, :64]!
+	vld1.8	{d16}, [r0:64]!
 	vld1.16	{d16}, [r0]!
 	vld1.32	{d16}, [r0]!
 	vld1.64	{d16}, [r0]!
-	vld1.8	{d16, d17}, [r0, :64]!
-	vld1.16	{d16, d17}, [r0, :128]!
+	vld1.8	{d16, d17}, [r0:64]!
+	vld1.16	{d16, d17}, [r0:128]!
 	vld1.32	{d16, d17}, [r0]!
 	vld1.64	{d16, d17}, [r0]!
 
-	vld1.8	{d16}, [r0, :64], r5
+	vld1.8	{d16}, [r0:64], r5
 	vld1.16	{d16}, [r0], r5
 	vld1.32	{d16}, [r0], r5
 	vld1.64	{d16}, [r0], r5
-	vld1.8	{d16, d17}, [r0, :64], r5
-	vld1.16	{d16, d17}, [r0, :128], r5
+	vld1.8	{d16, d17}, [r0:64], r5
+	vld1.16	{d16, d17}, [r0:128], r5
 	vld1.32	{d16, d17}, [r0], r5
 	vld1.64	{d16, d17}, [r0], r5
 
 	vld1.8 {d1, d2, d3}, [r3]!
-	vld1.16 {d4, d5, d6}, [r3, :64]!
+	vld1.16 {d4, d5, d6}, [r3:64]!
 	vld1.32 {d5, d6, d7}, [r3]!
-	vld1.64 {d6, d7, d8}, [r3, :64]!
+	vld1.64 {d6, d7, d8}, [r3:64]!
 
 	vld1.8 {d1, d2, d3}, [r3], r6
-	vld1.16 {d4, d5, d6}, [r3, :64], r6
+	vld1.16 {d4, d5, d6}, [r3:64], r6
 	vld1.32 {d5, d6, d7}, [r3], r6
-	vld1.64 {d6, d7, d8}, [r3, :64], r6
+	vld1.64 {d6, d7, d8}, [r3:64], r6
 
 	vld1.8 {d1, d2, d3, d4}, [r3]!
-	vld1.16 {d4, d5, d6, d7}, [r3, :64]!
+	vld1.16 {d4, d5, d6, d7}, [r3:64]!
 	vld1.32 {d5, d6, d7, d8}, [r3]!
-	vld1.64 {d6, d7, d8, d9}, [r3, :64]!
+	vld1.64 {d6, d7, d8, d9}, [r3:64]!
 
 	vld1.8 {d1, d2, d3, d4}, [r3], r8
-	vld1.16 {d4, d5, d6, d7}, [r3, :64], r8
+	vld1.16 {d4, d5, d6, d7}, [r3:64], r8
 	vld1.32 {d5, d6, d7, d8}, [r3], r8
-	vld1.64 {d6, d7, d8, d9}, [r3, :64], r8
+	vld1.64 {d6, d7, d8, d9}, [r3:64], r8
 
-@ CHECK: vld1.8 {d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16}, [r0:64]          @ encoding: [0x1f,0x07,0x60,0xf4]
 @ CHECK: vld1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x60,0xf4]
 @ CHECK: vld1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x60,0xf4]
 @ CHECK: vld1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x60,0xf4]
-@ CHECK: vld1.8 {d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
-@ CHECK: vld1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0:64]     @ encoding: [0x1f,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0:128]   @ encoding: [0x6f,0x0a,0x60,0xf4]
 @ CHECK: vld1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x60,0xf4]
 @ CHECK: vld1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x60,0xf4]
 @ CHECK: vld1.8 {d1, d2, d3}, [r3]      @ encoding: [0x0f,0x16,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64] @ encoding: [0x5f,0x46,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3:64]  @ encoding: [0x5f,0x46,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7}, [r3]     @ encoding: [0x8f,0x56,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64] @ encoding: [0xdf,0x66,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3:64]  @ encoding: [0xdf,0x66,0x23,0xf4]
 @ CHECK: vld1.8 {d1, d2, d3, d4}, [r3]  @ encoding: [0x0f,0x12,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64] @ encoding: [0x5f,0x42,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3:64] @ encoding: [0x5f,0x42,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]  @ encoding: [0x8f,0x52,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64] @ encoding: [0xdf,0x62,0x23,0xf4]
-@ CHECK: vld1.8	{d16}, [r0, :64]!       @ encoding: [0x1d,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3:64] @ encoding: [0xdf,0x62,0x23,0xf4]
+@ CHECK: vld1.8	{d16}, [r0:64]!         @ encoding: [0x1d,0x07,0x60,0xf4]
 
 @ CHECK: vld1.16 {d16}, [r0]!           @ encoding: [0x4d,0x07,0x60,0xf4]
 @ CHECK: vld1.32 {d16}, [r0]!           @ encoding: [0x8d,0x07,0x60,0xf4]
 @ CHECK: vld1.64 {d16}, [r0]!           @ encoding: [0xcd,0x07,0x60,0xf4]
-@ CHECK: vld1.8 {d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x0a,0x60,0xf4]
-@ CHECK: vld1.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0:64]!    @ encoding: [0x1d,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0:128]!  @ encoding: [0x6d,0x0a,0x60,0xf4]
 @ CHECK: vld1.32 {d16, d17}, [r0]!      @ encoding: [0x8d,0x0a,0x60,0xf4]
 @ CHECK: vld1.64 {d16, d17}, [r0]!      @ encoding: [0xcd,0x0a,0x60,0xf4]
 
-@ CHECK: vld1.8 {d16}, [r0, :64], r5    @ encoding: [0x15,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16}, [r0:64], r5      @ encoding: [0x15,0x07,0x60,0xf4]
 @ CHECK: vld1.16 {d16}, [r0], r5        @ encoding: [0x45,0x07,0x60,0xf4]
 @ CHECK: vld1.32 {d16}, [r0], r5        @ encoding: [0x85,0x07,0x60,0xf4]
 @ CHECK: vld1.64 {d16}, [r0], r5        @ encoding: [0xc5,0x07,0x60,0xf4]
-@ CHECK: vld1.8 {d16, d17}, [r0, :64], r5 @ encoding: [0x15,0x0a,0x60,0xf4]
-@ CHECK: vld1.16 {d16, d17}, [r0, :128], r5 @ encoding: [0x65,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0:64], r5 @ encoding: [0x15,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0:128], r5 @ encoding: [0x65,0x0a,0x60,0xf4]
 @ CHECK: vld1.32 {d16, d17}, [r0], r5   @ encoding: [0x85,0x0a,0x60,0xf4]
 @ CHECK: vld1.64 {d16, d17}, [r0], r5   @ encoding: [0xc5,0x0a,0x60,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3}, [r3]!     @ encoding: [0x0d,0x16,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64]! @ encoding: [0x5d,0x46,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3:64]! @ encoding: [0x5d,0x46,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7}, [r3]!     @ encoding: [0x8d,0x56,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64]! @ encoding: [0xdd,0x66,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3:64]! @ encoding: [0xdd,0x66,0x23,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3}, [r3], r6  @ encoding: [0x06,0x16,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64], r6 @ encoding: [0x56,0x46,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3:64], r6 @ encoding: [0x56,0x46,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7}, [r3], r6  @ encoding: [0x86,0x56,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64], r6 @ encoding: [0xd6,0x66,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3:64], r6 @ encoding: [0xd6,0x66,0x23,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3, d4}, [r3]! @ encoding: [0x0d,0x12,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64]! @ encoding: [0x5d,0x42,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3:64]! @ encoding: [0x5d,0x42,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]! @ encoding: [0x8d,0x52,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64]! @ encoding: [0xdd,0x62,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3:64]! @ encoding: [0xdd,0x62,0x23,0xf4]
 
 @ CHECK: vld1.8	{d1, d2, d3, d4}, [r3], r8 @ encoding: [0x08,0x12,0x23,0xf4]
-@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64], r8 @ encoding: [0x58,0x42,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3:64], r8 @ encoding: [0x58,0x42,0x23,0xf4]
 @ CHECK: vld1.32 {d5, d6, d7, d8}, [r3], r8 @ encoding: [0x88,0x52,0x23,0xf4]
-@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64], r8 @ encoding: [0xd8,0x62,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3:64], r8 @ encoding: [0xd8,0x62,0x23,0xf4]
 
 
-	vld2.8	{d16, d17}, [r0, :64]
-	vld2.16	{d16, d17}, [r0, :128]
+	vld2.8	{d16, d17}, [r0:64]
+	vld2.16	{d16, d17}, [r0:128]
 	vld2.32	{d16, d17}, [r0]
-	vld2.8	{d16, d17, d18, d19}, [r0, :64]
-	vld2.16	{d16, d17, d18, d19}, [r0, :128]
-	vld2.32	{d16, d17, d18, d19}, [r0, :256]
+	vld2.8	{d16, d17, d18, d19}, [r0:64]
+	vld2.16	{d16, d17, d18, d19}, [r0:128]
+	vld2.32	{d16, d17, d18, d19}, [r0:256]
 
-	vld2.8	{d19, d20}, [r0, :64]!
-	vld2.16	{d16, d17}, [r0, :128]!
+	vld2.8	{d19, d20}, [r0:64]!
+	vld2.16	{d16, d17}, [r0:128]!
 	vld2.32	{q10}, [r0]!
-	vld2.8	{d4-d7}, [r0, :64]!
-	vld2.16	{d1, d2, d3, d4}, [r0, :128]!
-	vld2.32	{q7, q8}, [r0, :256]!
+	vld2.8	{d4-d7}, [r0:64]!
+	vld2.16	{d1, d2, d3, d4}, [r0:128]!
+	vld2.32	{q7, q8}, [r0:256]!
 
-	vld2.8	{d19, d20}, [r0, :64], r6
-	vld2.16	{d16, d17}, [r0, :128], r6
+	vld2.8	{d19, d20}, [r0:64], r6
+	vld2.16	{d16, d17}, [r0:128], r6
 	vld2.32	{q10}, [r0], r6
-	vld2.8	{d4-d7}, [r0, :64], r6
-	vld2.16	{d1, d2, d3, d4}, [r0, :128], r6
-	vld2.32	{q7, q8}, [r0, :256], r6
+	vld2.8	{d4-d7}, [r0:64], r6
+	vld2.16	{d1, d2, d3, d4}, [r0:128], r6
+	vld2.32	{q7, q8}, [r0:256], r6
 
-@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
-@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x60,0xf4]
+@ CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x60,0xf4]
 
-@ CHECK: vld2.8	{d19, d20}, [r0, :64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d19, d20}, [r0:64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128]! @ encoding: [0x6d,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d20, d21}, [r0]!       @ encoding: [0x8d,0x48,0x60,0xf4]
-@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64]! @ encoding: [0x1d,0x43,0x20,0xf4]
-@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128]! @ encoding: [0x6d,0x13,0x20,0xf4]
-@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0:64]! @ encoding: [0x1d,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0:128]! @ encoding: [0x6d,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0:256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
 
-@ CHECK: vld2.8	{d19, d20}, [r0, :64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
-@ CHECK: vld2.16 {d16, d17}, [r0, :128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d19, d20}, [r0:64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d20, d21}, [r0], r6    @ encoding: [0x86,0x48,0x60,0xf4]
-@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
-@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
-@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0:64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0:128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0:256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
 
 
 	vld3.8 {d16, d17, d18}, [r1]
 	vld3.16 {d6, d7, d8}, [r2]
 	vld3.32 {d1, d2, d3}, [r3]
-	vld3.8 {d16, d18, d20}, [r0, :64]
+	vld3.8 {d16, d18, d20}, [r0:64]
 	vld3.u16 {d27, d29, d31}, [r4]
 	vld3.i32 {d6, d8, d10}, [r5]
 
@@ -171,7 +171,7 @@
 	vld3.p8 {d6, d7, d8}, [r8]!
 	vld3.16 {d9, d10, d11}, [r7]!
 	vld3.f32 {d1, d2, d3}, [r6]!
-	vld3.8 {d16, d18, d20}, [r0, :64]!
+	vld3.8 {d16, d18, d20}, [r0:64]!
 	vld3.p16 {d20, d22, d24}, [r5]!
 	vld3.32 {d5, d7, d9}, [r4]!
 
@@ -179,7 +179,7 @@
 @ CHECK: vld3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x61,0xf4]
 @ CHECK: vld3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x22,0xf4]
 @ CHECK: vld3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x23,0xf4]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x60,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64] @ encoding: [0x1f,0x05,0x60,0xf4]
 @ CHECK: vld3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x64,0xf4]
 @ CHECK: vld3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x25,0xf4]
 @ CHECK: vld3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x26,0xf4]
@@ -191,48 +191,48 @@
 @ CHECK: vld3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x28,0xf4]
 @ CHECK: vld3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x27,0xf4]
 @ CHECK: vld3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x26,0xf4]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x60,0xf4]
 @ CHECK: vld3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x65,0xf4]
 @ CHECK: vld3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x24,0xf4]
 
 
-	vld4.8 {d16, d17, d18, d19}, [r1, :64]
-	vld4.16 {d16, d17, d18, d19}, [r2, :128]
-	vld4.32 {d16, d17, d18, d19}, [r3, :256]
-	vld4.8 {d17, d19, d21, d23}, [r5, :256]
+	vld4.8 {d16, d17, d18, d19}, [r1:64]
+	vld4.16 {d16, d17, d18, d19}, [r2:128]
+	vld4.32 {d16, d17, d18, d19}, [r3:256]
+	vld4.8 {d17, d19, d21, d23}, [r5:256]
 	vld4.16 {d17, d19, d21, d23}, [r7]
 	vld4.32 {d16, d18, d20, d22}, [r8]
 
-	vld4.s8 {d16, d17, d18, d19}, [r1, :64]!
-	vld4.s16 {d16, d17, d18, d19}, [r2, :128]!
-	vld4.s32 {d16, d17, d18, d19}, [r3, :256]!
-	vld4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vld4.s8 {d16, d17, d18, d19}, [r1:64]!
+	vld4.s16 {d16, d17, d18, d19}, [r2:128]!
+	vld4.s32 {d16, d17, d18, d19}, [r3:256]!
+	vld4.u8 {d17, d19, d21, d23}, [r5:256]!
 	vld4.u16 {d17, d19, d21, d23}, [r7]!
 	vld4.u32 {d16, d18, d20, d22}, [r8]!
 
-	vld4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vld4.p8 {d16, d17, d18, d19}, [r1:64], r8
 	vld4.p16 {d16, d17, d18, d19}, [r2], r7
-	vld4.f32 {d16, d17, d18, d19}, [r3, :64], r5
-	vld4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vld4.f32 {d16, d17, d18, d19}, [r3:64], r5
+	vld4.i8 {d16, d18, d20, d22}, [r4:256], r2
 	vld4.i16 {d16, d18, d20, d22}, [r6], r3
 	vld4.i32 {d17, d19, d21, d23}, [r9], r4
 
-@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x61,0xf4]
-@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x62,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x63,0xf4]
-@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x65,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1:64] @ encoding: [0x1f,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2:128] @ encoding: [0x6f,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3:256] @ encoding: [0xbf,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5:256] @ encoding: [0x3f,0x11,0x65,0xf4]
 @ CHECK: vld4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x67,0xf4]
 @ CHECK: vld4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x68,0xf4]
-@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x61,0xf4]
-@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x62,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x63,0xf4]
-@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x65,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1:64]! @ encoding: [0x1d,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2:128]! @ encoding: [0x6d,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3:256]! @ encoding: [0xbd,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5:256]! @ encoding: [0x3d,0x11,0x65,0xf4]
 @ CHECK: vld4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x67,0xf4]
 @ CHECK: vld4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x68,0xf4]
-@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1:64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
 @ CHECK: vld4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x62,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
-@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3:64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4:256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
 @ CHECK: vld4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x66,0xf4]
 @ CHECK: vld4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x69,0xf4]
 
@@ -252,28 +252,28 @@
 @ CHECK: vld1.8	{d4[], d5[]}, [r1], r3  @ encoding: [0x23,0x4c,0xa1,0xf4]
 
 	vld1.8	{d16[3]}, [r0]
-	vld1.16	{d16[2]}, [r0, :16]
-	vld1.32	{d16[1]}, [r0, :32]
+	vld1.16	{d16[2]}, [r0:16]
+	vld1.32	{d16[1]}, [r0:32]
         vld1.p8 d12[6], [r2]!
         vld1.i8 d12[6], [r2], r2
         vld1.u16 d12[3], [r2]!
         vld1.16 d12[2], [r2], r2
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
-@ CHECK: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
-@ CHECK: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
+@ CHECK: vld1.16 {d16[2]}, [r0:16]      @ encoding: [0x9f,0x04,0xe0,0xf4]
+@ CHECK: vld1.32 {d16[1]}, [r0:32]      @ encoding: [0xbf,0x08,0xe0,0xf4]
 @ CHECK: vld1.8	{d12[6]}, [r2]!         @ encoding: [0xcd,0xc0,0xa2,0xf4]
 @ CHECK: vld1.8	{d12[6]}, [r2], r2      @ encoding: [0xc2,0xc0,0xa2,0xf4]
 @ CHECK: vld1.16 {d12[3]}, [r2]!        @ encoding: [0xcd,0xc4,0xa2,0xf4]
 @ CHECK: vld1.16 {d12[2]}, [r2], r2     @ encoding: [0x82,0xc4,0xa2,0xf4]
 
 
-	vld2.8	{d16[1], d17[1]}, [r0, :16]
-	vld2.16	{d16[1], d17[1]}, [r0, :32]
+	vld2.8	{d16[1], d17[1]}, [r0:16]
+	vld2.16	{d16[1], d17[1]}, [r0:32]
 	vld2.32	{d16[1], d17[1]}, [r0]
 	vld2.16	{d17[1], d19[1]}, [r0]
-	vld2.32	{d17[0], d19[0]}, [r0, :64]
-	vld2.32	{d17[0], d19[0]}, [r0, :64]!
+	vld2.32	{d17[0], d19[0]}, [r0:64]
+	vld2.32	{d17[0], d19[0]}, [r0:64]!
         vld2.8 {d2[4], d3[4]}, [r2], r3
         vld2.8 {d2[4], d3[4]}, [r2]!
         vld2.8 {d2[4], d3[4]}, [r2]
@@ -284,12 +284,12 @@
         vld2.32 {d22[ ],d23[ ]}, [r5], r4
         vld2.32 {d22[ ],d24[ ]}, [r6], r4
 
-@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
-@ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xe0,0xf4]
+@ CHECK: vld2.16 {d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xe0,0xf4]
 @ CHECK: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
 @ CHECK: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
-@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
-@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0:64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
@@ -383,15 +383,15 @@
 	vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
 	vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
 
-	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
-	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
-	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
+	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]!
+	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]!
 	vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
 	vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
 
-	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8
 	vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
-	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5
 	vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
 	vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
 
@@ -400,14 +400,14 @@
 @ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4]
 @ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4]
 @ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4]
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
-@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
-@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
 @ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4]
 @ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4]
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
 @ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4]
-@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
 @ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4]
 @ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4]
 
@@ -490,8 +490,17 @@
 
 
 @ Register lists can use the range syntax, just like VLDM
-	vld1.f64 {d2-d5}, [r2,:128]!
-	vld1.f64 {d2,d3,d4,d5}, [r2,:128]!
+	vld1.f64 {d2-d5}, [r2:128]!
+	vld1.f64 {d2,d3,d4,d5}, [r2:128]!
 
-@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
-@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2:128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2:128]! @ encoding: [0xed,0x22,0x22,0xf4]
+
+
+@ verify that the old incorrect alignment specifier syntax (", :")
+@ still gets accepted.
+        vld2.8	{d16, d17}, [r0, :64]
+        vld2.16	{d16, d17}, [r0, :128]
+
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x60,0xf4]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index f5feca4c8c12..ef9f037c536f 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -1,67 +1,67 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
 
-	vst1.8	{d16}, [r0, :64]
+	vst1.8	{d16}, [r0:64]
 	vst1.16	{d16}, [r0]
 	vst1.32	{d16}, [r0]
 	vst1.64	{d16}, [r0]
-	vst1.8	{d16, d17}, [r0, :64]
-	vst1.16	{d16, d17}, [r0, :128]
+	vst1.8	{d16, d17}, [r0:64]
+	vst1.16	{d16, d17}, [r0:128]
 	vst1.32	{d16, d17}, [r0]
 	vst1.64	{d16, d17}, [r0]
-        vst1.8  {d16, d17, d18}, [r0, :64]
-        vst1.8  {d16, d17, d18}, [r0, :64]!
+        vst1.8  {d16, d17, d18}, [r0:64]
+        vst1.8  {d16, d17, d18}, [r0:64]!
         vst1.8  {d16, d17, d18}, [r0], r3
-        vst1.8  {d16, d17, d18, d19}, [r0, :64]
-        vst1.16  {d16, d17, d18, d19}, [r1, :64]!
+        vst1.8  {d16, d17, d18, d19}, [r0:64]
+        vst1.16  {d16, d17, d18, d19}, [r1:64]!
         vst1.64  {d16, d17, d18, d19}, [r3], r2
 
-@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
+@ CHECK: vst1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x40,0xf4]
 @ CHECK: vst1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x40,0xf4]
 @ CHECK: vst1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x40,0xf4]
 @ CHECK: vst1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
-@ CHECK: vst1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x0a,0x40,0xf4]
 @ CHECK: vst1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x40,0xf4]
 @ CHECK: vst1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x06,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64]! @ encoding: [0x1d,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0:64]! @ encoding: [0x1d,0x06,0x40,0xf4]
 @ CHECK: vst1.8	{d16, d17, d18}, [r0], r3 @ encoding: [0x03,0x06,0x40,0xf4]
-@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x02,0x40,0xf4]
-@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x5d,0x02,0x41,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x02,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1:64]! @ encoding: [0x5d,0x02,0x41,0xf4]
 @ CHECK: vst1.64 {d16, d17, d18, d19}, [r3], r2 @ encoding: [0xc2,0x02,0x43,0xf4]
 
 
-	vst2.8	{d16, d17}, [r0, :64]
-	vst2.16	{d16, d17}, [r0, :128]
+	vst2.8	{d16, d17}, [r0:64]
+	vst2.16	{d16, d17}, [r0:128]
 	vst2.32	{d16, d17}, [r0]
-	vst2.8	{d16, d17, d18, d19}, [r0, :64]
-	vst2.16	{d16, d17, d18, d19}, [r0, :128]
-	vst2.32	{d16, d17, d18, d19}, [r0, :256]
-	vst2.8	{d16, d17}, [r0, :64]!
-	vst2.16	{q15}, [r0, :128]!
+	vst2.8	{d16, d17, d18, d19}, [r0:64]
+	vst2.16	{d16, d17, d18, d19}, [r0:128]
+	vst2.32	{d16, d17, d18, d19}, [r0:256]
+	vst2.8	{d16, d17}, [r0:64]!
+	vst2.16	{q15}, [r0:128]!
 	vst2.32	{d14, d15}, [r0]!
-	vst2.8	{d16, d17, d18, d19}, [r0, :64]!
-	vst2.16	{d18-d21}, [r0, :128]!
-	vst2.32	{q4, q5}, [r0, :256]!
+	vst2.8	{d16, d17, d18, d19}, [r0:64]!
+	vst2.16	{d18-d21}, [r0:128]!
+	vst2.32	{q4, q5}, [r0:256]!
 
-@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
-@ CHECK: vst2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x40,0xf4]
 @ CHECK: vst2.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x08,0x40,0xf4]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
-@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
-@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
-@ CHECK: vst2.8	{d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
-@ CHECK: vst2.16	{d30, d31}, [r0, :128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
+@ CHECK: vst2.16	{d30, d31}, [r0:128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
 @ CHECK: vst2.32	{d14, d15}, [r0]!       @ encoding: [0x8d,0xe8,0x00,0xf4]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]! @ encoding: [0x1d,0x03,0x40,0xf4]
-@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0, :128]! @ encoding: [0x6d,0x23,0x40,0xf4]
-@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64]! @ encoding: [0x1d,0x03,0x40,0xf4]
+@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0:128]! @ encoding: [0x6d,0x23,0x40,0xf4]
+@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0:256]! @ encoding: [0xbd,0x83,0x00,0xf4]
 
 
 	vst3.8 {d16, d17, d18}, [r1]
 	vst3.16 {d6, d7, d8}, [r2]
 	vst3.32 {d1, d2, d3}, [r3]
-	vst3.8 {d16, d18, d20}, [r0, :64]
+	vst3.8 {d16, d18, d20}, [r0:64]
 	vst3.u16 {d27, d29, d31}, [r4]
 	vst3.i32 {d6, d8, d10}, [r5]
 
@@ -75,14 +75,14 @@
 	vst3.p8 {d6, d7, d8}, [r8]!
 	vst3.16 {d9, d10, d11}, [r7]!
 	vst3.f32 {d1, d2, d3}, [r6]!
-	vst3.8 {d16, d18, d20}, [r0, :64]!
+	vst3.8 {d16, d18, d20}, [r0:64]!
 	vst3.p16 {d20, d22, d24}, [r5]!
 	vst3.32 {d5, d7, d9}, [r4]!
 
 @ CHECK: vst3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x41,0xf4]
 @ CHECK: vst3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x02,0xf4]
 @ CHECK: vst3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x03,0xf4]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x40,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64] @ encoding: [0x1f,0x05,0x40,0xf4]
 @ CHECK: vst3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x44,0xf4]
 @ CHECK: vst3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x05,0xf4]
 @ CHECK: vst3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x06,0xf4]
@@ -94,85 +94,85 @@
 @ CHECK: vst3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x08,0xf4]
 @ CHECK: vst3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x07,0xf4]
 @ CHECK: vst3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x06,0xf4]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x40,0xf4]
 @ CHECK: vst3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x45,0xf4]
 @ CHECK: vst3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x04,0xf4]
 
 
-	vst4.8 {d16, d17, d18, d19}, [r1, :64]
-	vst4.16 {d16, d17, d18, d19}, [r2, :128]
-	vst4.32 {d16, d17, d18, d19}, [r3, :256]
-	vst4.8 {d17, d19, d21, d23}, [r5, :256]
+	vst4.8 {d16, d17, d18, d19}, [r1:64]
+	vst4.16 {d16, d17, d18, d19}, [r2:128]
+	vst4.32 {d16, d17, d18, d19}, [r3:256]
+	vst4.8 {d17, d19, d21, d23}, [r5:256]
 	vst4.16 {d17, d19, d21, d23}, [r7]
 	vst4.32 {d16, d18, d20, d22}, [r8]
 
-	vst4.s8 {d16, d17, d18, d19}, [r1, :64]!
-	vst4.s16 {d16, d17, d18, d19}, [r2, :128]!
-	vst4.s32 {d16, d17, d18, d19}, [r3, :256]!
-	vst4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vst4.s8 {d16, d17, d18, d19}, [r1:64]!
+	vst4.s16 {d16, d17, d18, d19}, [r2:128]!
+	vst4.s32 {d16, d17, d18, d19}, [r3:256]!
+	vst4.u8 {d17, d19, d21, d23}, [r5:256]!
 	vst4.u16 {d17, d19, d21, d23}, [r7]!
 	vst4.u32 {d16, d18, d20, d22}, [r8]!
 
-	vst4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vst4.p8 {d16, d17, d18, d19}, [r1:64], r8
 	vst4.p16 {d16, d17, d18, d19}, [r2], r7
-	vst4.f32 {d16, d17, d18, d19}, [r3, :64], r5
-	vst4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vst4.f32 {d16, d17, d18, d19}, [r3:64], r5
+	vst4.i8 {d16, d18, d20, d22}, [r4:256], r2
 	vst4.i16 {d16, d18, d20, d22}, [r6], r3
 	vst4.i32 {d17, d19, d21, d23}, [r9], r4
 
-@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x41,0xf4]
-@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x42,0xf4]
-@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x43,0xf4]
-@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x45,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1:64] @ encoding: [0x1f,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2:128] @ encoding: [0x6f,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3:256] @ encoding: [0xbf,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5:256] @ encoding: [0x3f,0x11,0x45,0xf4]
 @ CHECK: vst4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x47,0xf4]
 @ CHECK: vst4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x48,0xf4]
-@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x41,0xf4]
-@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x42,0xf4]
-@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x43,0xf4]
-@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x45,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1:64]! @ encoding: [0x1d,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2:128]! @ encoding: [0x6d,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3:256]! @ encoding: [0xbd,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5:256]! @ encoding: [0x3d,0x11,0x45,0xf4]
 @ CHECK: vst4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x47,0xf4]
 @ CHECK: vst4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x48,0xf4]
-@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1:64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
 @ CHECK: vst4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x42,0xf4]
-@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
-@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3:64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4:256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
 @ CHECK: vst4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x46,0xf4]
 @ CHECK: vst4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x49,0xf4]
 
 
-	vst2.8	{d16[1], d17[1]}, [r0, :16]
-	vst2.p16	{d16[1], d17[1]}, [r0, :32]
+	vst2.8	{d16[1], d17[1]}, [r0:16]
+	vst2.p16	{d16[1], d17[1]}, [r0:32]
 	vst2.i32	{d16[1], d17[1]}, [r0]
 	vst2.u16	{d17[1], d19[1]}, [r0]
-	vst2.f32	{d17[0], d19[0]}, [r0, :64]
+	vst2.f32	{d17[0], d19[0]}, [r0:64]
 
         vst2.8 {d2[4], d3[4]}, [r2], r3
         vst2.u8 {d2[4], d3[4]}, [r2]!
         vst2.p8 {d2[4], d3[4]}, [r2]
 
         vst2.16 {d17[1], d19[1]}, [r0]
-        vst2.32 {d17[0], d19[0]}, [r0, :64]
+        vst2.32 {d17[0], d19[0]}, [r0:64]
         vst2.i16 {d7[1], d9[1]}, [r1]!
-        vst2.32 {d6[0], d8[0]}, [r2, :64]!
+        vst2.32 {d6[0], d8[0]}, [r2:64]!
         vst2.16 {d2[1], d4[1]}, [r3], r5
-        vst2.u32 {d5[0], d7[0]}, [r4, :64], r7
+        vst2.u32 {d5[0], d7[0]}, [r4:64], r7
 
-@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
-@ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xc0,0xf4]
+@ CHECK: vst2.16 {d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xc0,0xf4]
 @ CHECK: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
 @ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf4]
 
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0x82,0xf4]
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0x82,0xf4]
 @ CHECK: vst2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0x82,0xf4]
 
 @ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf4]
 @ CHECK: vst2.16 {d7[1], d9[1]}, [r1]!   @ encoding: [0x6d,0x75,0x81,0xf4]
-@ CHECK: vst2.32 {d6[0], d8[0]}, [r2, :64]! @ encoding: [0x5d,0x69,0x82,0xf4]
+@ CHECK: vst2.32 {d6[0], d8[0]}, [r2:64]! @ encoding: [0x5d,0x69,0x82,0xf4]
 @ CHECK: vst2.16 {d2[1], d4[1]}, [r3], r5 @ encoding: [0x65,0x25,0x83,0xf4]
-@ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
+@ CHECK: vst2.32 {d5[0], d7[0]}, [r4:64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
 
 
 	vst3.8 {d16[1], d17[1], d18[1]}, [r1]
@@ -216,15 +216,15 @@
 	vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
 	vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
 
-	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
-	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
-	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
+	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]!
+	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]!
 	vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
 	vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
 
-	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8
 	vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
-	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5
 	vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
 	vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
 
@@ -233,14 +233,14 @@
 @ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4]
 @ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4]
 @ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4]
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
-@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
-@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2:64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
 @ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4]
 @ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4]
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1:32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
 @ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4]
-@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3:64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
 @ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4]
 @ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4]
 
@@ -269,10 +269,17 @@
         vst2.8 {d8, d10}, [r4]
 @ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x0f,0x89,0x04,0xf4]
 
-        vst1.32 {d9[1]}, [r3, :32]
-        vst1.32 {d27[1]}, [r9, :32]!
-        vst1.32 {d27[1]}, [r3, :32], r5
-@ CHECK: vst1.32	{d9[1]}, [r3, :32]       @ encoding: [0xbf,0x98,0x83,0xf4]
-@ CHECK: vst1.32	{d27[1]}, [r9, :32]!     @ encoding: [0xbd,0xb8,0xc9,0xf4]
-@ CHECK: vst1.32	{d27[1]}, [r3, :32], r5  @ encoding: [0xb5,0xb8,0xc3,0xf4]
+        vst1.32 {d9[1]}, [r3:32]
+        vst1.32 {d27[1]}, [r9:32]!
+        vst1.32 {d27[1]}, [r3:32], r5
+@ CHECK: vst1.32	{d9[1]}, [r3:32]       @ encoding: [0xbf,0x98,0x83,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r9:32]!     @ encoding: [0xbd,0xb8,0xc9,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r3:32], r5  @ encoding: [0xb5,0xb8,0xc3,0xf4]
 
+@ verify that the old incorrect alignment specifier syntax (", :")
+@ still gets accepted.
+        vst2.8	{d16, d17}, [r0, :64]
+        vst2.16	{d16, d17}, [r0, :128]
+
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17}, [r0:128] @ encoding: [0x6f,0x08,0x40,0xf4]
+\ No newline at end of file
diff --git a/test/MC/ARM/neont2-vld-encoding.s b/test/MC/ARM/neont2-vld-encoding.s
index 031205a5cc8a..7db855278116 100644
--- a/test/MC/ARM/neont2-vld-encoding.s
+++ b/test/MC/ARM/neont2-vld-encoding.s
@@ -3,46 +3,46 @@
 
 .code 16
 
-@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf9]
-	vld1.8	{d16}, [r0, :64]
+@ CHECK: vld1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x60,0xf9]
+	vld1.8	{d16}, [r0:64]
 @ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf9]
   vld1.16	{d16}, [r0]
 @ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf9]
   vld1.32	{d16}, [r0]
 @ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf9]
   vld1.64	{d16}, [r0]
-@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
-  vld1.8	{d16, d17}, [r0, :64]
-@ CHECK: vld1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
-  vld1.16	{d16, d17}, [r0, :128]
+@ CHECK: vld1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
+  vld1.8	{d16, d17}, [r0:64]
+@ CHECK: vld1.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
+  vld1.16	{d16, d17}, [r0:128]
 @ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf9]
   vld1.32	{d16, d17}, [r0]
 @ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf9]
   vld1.64	{d16, d17}, [r0]
 
-@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf9]
-  vld2.8	{d16, d17}, [r0, :64]
-@ CHECK: vld2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x60,0xf9]
-  vld2.16	{d16, d17}, [r0, :128]
+@ CHECK: vld2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x60,0xf9]
+  vld2.8	{d16, d17}, [r0:64]
+@ CHECK: vld2.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x08,0x60,0xf9]
+  vld2.16	{d16, d17}, [r0:128]
 @ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf9]
   vld2.32	{d16, d17}, [r0]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf9]
-  vld2.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf9]
-  vld2.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf9]
-  vld2.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x60,0xf9]
+  vld2.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x60,0xf9]
+  vld2.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x60,0xf9]
+  vld2.32	{d16, d17, d18, d19}, [r0:256]
 
-@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf9]
-  vld3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x04,0x60,0xf9]
+  vld3.8	{d16, d17, d18}, [r0:64]
 @ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf9]
   vld3.16	{d16, d17, d18}, [r0]
 @ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf9]
   vld3.32	{d16, d17, d18}, [r0]
-@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf9]
-  vld3.8	{d16, d18, d20}, [r0, :64]!
-@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf9]
-  vld3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x60,0xf9]
+  vld3.8	{d16, d18, d20}, [r0:64]!
+@ CHECK: vld3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x1d,0x15,0x60,0xf9]
+  vld3.8	{d17, d19, d21}, [r0:64]!
 @ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf9] 
   vld3.16	{d16, d18, d20}, [r0]!
 @ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf9]
@@ -52,16 +52,16 @@
 @ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf9]
   vld3.32	{d17, d19, d21}, [r0]!
 
-@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf9]
-  vld4.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf9]
-  vld4.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf9]
-  vld4.32	{d16, d17, d18, d19}, [r0, :256]
-@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf9]
-  vld4.8	{d16, d18, d20, d22}, [r0, :256]!
-@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf9]
-  vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x00,0x60,0xf9]
+  vld4.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x00,0x60,0xf9]
+  vld4.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x00,0x60,0xf9]
+  vld4.32	{d16, d17, d18, d19}, [r0:256]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x3d,0x01,0x60,0xf9]
+  vld4.8	{d16, d18, d20, d22}, [r0:256]!
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x3d,0x11,0x60,0xf9]
+  vld4.8	{d17, d19, d21, d23}, [r0:256]!
 @ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf9]
   vld4.16	{d16, d18, d20, d22}, [r0]!
 @ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf9]
@@ -73,21 +73,21 @@
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf9]
   vld1.8	{d16[3]}, [r0]
-@ CHECK: vld1.16	{d16[2]}, [r0, :16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
-  vld1.16	{d16[2]}, [r0, :16]
-@ CHECK: vld1.32	{d16[1]}, [r0, :32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
-  vld1.32	{d16[1]}, [r0, :32]
+@ CHECK: vld1.16	{d16[2]}, [r0:16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
+  vld1.16	{d16[2]}, [r0:16]
+@ CHECK: vld1.32	{d16[1]}, [r0:32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
+  vld1.32	{d16[1]}, [r0:32]
 
-@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf9]
-  vld2.8	{d16[1], d17[1]}, [r0, :16]
-@ CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf9]
-  vld2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xe0,0xf9]
+  vld2.8	{d16[1], d17[1]}, [r0:16]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xe0,0xf9]
+  vld2.16	{d16[1], d17[1]}, [r0:32]
 @ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf9]
   vld2.32	{d16[1], d17[1]}, [r0]
 @ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf9]
   vld2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf9]
-  vld2.32	{d17[0], d19[0]}, [r0, :64]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xe0,0xf9]
+  vld2.32	{d17[0], d19[0]}, [r0:64]
 
 @ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf9]
   vld3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -100,13 +100,13 @@
 @ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf9]
   vld3.32	{d17[1], d19[1], d21[1]}, [r0]
 
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf9]
-  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0x3f,0x03,0xe0,0xf9]
+  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 @ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf9]
   vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
-  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf9]
-  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
+  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64] @ encoding: [0x7f,0x07,0xe0,0xf9]
+  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
 @ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf9]
   vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
index b50d8b63c1c2..9adf7514f4b7 100644
--- a/test/MC/ARM/neont2-vst-encoding.s
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -3,46 +3,46 @@
 
 .code 16
 
-@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf9]
-  vst1.8	{d16}, [r0, :64]
+@ CHECK: vst1.8	{d16}, [r0:64]        @ encoding: [0x1f,0x07,0x40,0xf9]
+  vst1.8	{d16}, [r0:64]
 @ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf9]
   vst1.16	{d16}, [r0]
 @ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf9]
   vst1.32	{d16}, [r0]
 @ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf9]
   vst1.64	{d16}, [r0]
-@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
-  vst1.8	{d16, d17}, [r0, :64]
-@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
-  vst1.16	{d16, d17}, [r0, :128]
+@ CHECK: vst1.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
+  vst1.8	{d16, d17}, [r0:64]
+@ CHECK: vst1.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
+  vst1.16	{d16, d17}, [r0:128]
 @ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf9]
   vst1.32	{d16, d17}, [r0]
 @ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf9]
   vst1.64	{d16, d17}, [r0]
 
-@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf9]
-  vst2.8	{d16, d17}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf9]
-  vst2.16	{d16, d17}, [r0, :128]
+@ CHECK: vst2.8	{d16, d17}, [r0:64]   @ encoding: [0x1f,0x08,0x40,0xf9]
+  vst2.8	{d16, d17}, [r0:64]
+@ CHECK: vst2.16	{d16, d17}, [r0:128]  @ encoding: [0x6f,0x08,0x40,0xf9]
+  vst2.16	{d16, d17}, [r0:128]
 @ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf9]
   vst2.32	{d16, d17}, [r0]
-@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf9]
-  vst2.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf9]
-  vst2.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf9]
-  vst2.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x03,0x40,0xf9]
+  vst2.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x03,0x40,0xf9]
+  vst2.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256] @ encoding: [0xbf,0x03,0x40,0xf9]
+  vst2.32	{d16, d17, d18, d19}, [r0:256]
 
-@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf9]
-  vst3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vst3.8	{d16, d17, d18}, [r0:64] @ encoding: [0x1f,0x04,0x40,0xf9]
+  vst3.8	{d16, d17, d18}, [r0:64]
 @ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf9]
   vst3.16	{d16, d17, d18}, [r0]
 @ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf9]
   vst3.32	{d16, d17, d18}, [r0]
-@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf9]
-  vst3.8	{d16, d18, d20}, [r0, :64]!
-@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf9]
-  vst3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vst3.8	{d16, d18, d20}, [r0:64]! @ encoding: [0x1d,0x05,0x40,0xf9]
+  vst3.8	{d16, d18, d20}, [r0:64]!
+@ CHECK: vst3.8	{d17, d19, d21}, [r0:64]! @ encoding: [0x1d,0x15,0x40,0xf9]
+  vst3.8	{d17, d19, d21}, [r0:64]!
 @ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf9]
   vst3.16	{d16, d18, d20}, [r0]!
 @ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf9]
@@ -52,14 +52,14 @@
 @ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf9]
   vst3.32	{d17, d19, d21}, [r0]!
 
-@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf9]
-  vst4.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf9]
-  vst4.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf9]
-  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
-@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf9]
-  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64] @ encoding: [0x1f,0x00,0x40,0xf9]
+  vst4.8	{d16, d17, d18, d19}, [r0:64]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128] @ encoding: [0x6f,0x00,0x40,0xf9]
+  vst4.16	{d16, d17, d18, d19}, [r0:128]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]! @ encoding: [0x3d,0x01,0x40,0xf9]
+  vst4.8	{d16, d18, d20, d22}, [r0:256]!
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]! @ encoding: [0x3d,0x11,0x40,0xf9]
+  vst4.8	{d17, d19, d21, d23}, [r0:256]!
 @ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf9]
   vst4.16	{d16, d18, d20, d22}, [r0]!
 @ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf9]
@@ -69,16 +69,16 @@
 @ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf9]
   vst4.32	{d17, d19, d21, d23}, [r0]!
 
-@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf9]
-  vst2.8	{d16[1], d17[1]}, [r0, :16]
-@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf9]
-  vst2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0:16] @ encoding: [0x3f,0x01,0xc0,0xf9]
+  vst2.8	{d16[1], d17[1]}, [r0:16]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0:32] @ encoding: [0x5f,0x05,0xc0,0xf9]
+  vst2.16	{d16[1], d17[1]}, [r0:32]
 @ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf9]
   vst2.32	{d16[1], d17[1]}, [r0]
 @ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf9]
   vst2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf9]
-  vst2.32	{d17[0], d19[0]}, [r0, :64]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0:64] @ encoding: [0x5f,0x19,0xc0,0xf9]
+  vst2.32	{d17[0], d19[0]}, [r0:64]
 
 @ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf9]
   vst3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -91,14 +91,14 @@
 @ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf9]
   vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf9]
-  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32] @ encoding: [0x3f,0x03,0xc0,0xf9]
+  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 @ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf9]
   vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
-  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf9]
-  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
+  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64] @ encoding: [0xff,0x17,0xc0,0xf9]
+  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 @ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
   vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
diff --git a/test/MC/ARM/relocated-mapping.s b/test/MC/ARM/relocated-mapping.s
new file mode 100644
index 000000000000..3bed14c4520a
--- /dev/null
+++ b/test/MC/ARM/relocated-mapping.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=arm-linux-gnueabi -filetype=obj < %s | llvm-objdump -t - | FileCheck %s
+
+@ Implementation-detail test (unfortunately): values that are relocated do not
+@ go via MCStreamer::EmitBytes; make sure they still emit a mapping symbol.
+        add r0, r0, r0
+        .word somewhere
+        add r0, r0, r0
+
+@ CHECK: 00000000 .text 00000000 $a
+@ CHECK-NEXT: 00000008 .text 00000000 $a
+@ CHECK-NEXT: 00000004 .text 00000000 $d
diff --git a/test/MC/AsmParser/align_invalid.s b/test/MC/AsmParser/align_invalid.s
new file mode 100644
index 000000000000..0d06d9423ced
--- /dev/null
+++ b/test/MC/AsmParser/align_invalid.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple i386-linux-gnu < %s 2>&1 | FileCheck %s -check-prefix=ELF
+# RUN: llvm-mc -triple i386-apple-darwin < %s 2>&1 | FileCheck %s -check-prefix=DARWIN
+
+.align 3
+# ELF: error: alignment must be a power of 2
+# DARWIN-NOT: error
+
+.align 32
+# ELF-NOT: error
+# DARWIN: error: invalid alignment value
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
index 6c79c38bf569..ed932b297462 100644
--- a/test/MC/AsmParser/directive_values.s
+++ b/test/MC/AsmParser/directive_values.s
@@ -63,3 +63,9 @@ TEST7:
 # CHECK-NEXT:   .byte   2
 # CHECK-NEXT:   .byte   3
 # CHECK-NEXT:   .byte   4
+
+TEST8:
+        .long 0x200000UL+1
+        .long 0x200000L+1
+# CHECK: .long 2097153
+# CHECK: .long 2097153
diff --git a/test/MC/AsmParser/section_names.s b/test/MC/AsmParser/section_names.s
new file mode 100644
index 000000000000..332cdbe3fed5
--- /dev/null
+++ b/test/MC/AsmParser/section_names.s
@@ -0,0 +1,62 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
+# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+.section .nobits
+.byte 1
+.section .nobits2
+.byte 1
+.section .nobitsfoo
+.byte 1
+.section .init_array
+.byte 1
+.section .init_array2
+.byte 1
+.section .init_arrayfoo
+.byte 1
+.section .fini_array
+.byte 1
+.section .fini_array2
+.byte 1
+.section .fini_arrayfoo
+.byte 1
+.section .preinit_array
+.byte 1
+.section .preinit_array2
+.byte 1
+.section .preinit_arrayfoo
+.byte 1
+.section .note
+.byte 1
+.section .note2
+.byte 1
+.section .notefoo
+.byte 1
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobitsfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array'
+# CHECK-NEXT:  ('sh_type', 0x0000000e)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_arrayfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array'
+# CHECK-NEXT:  ('sh_type', 0x0000000f)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_arrayfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array'
+# CHECK-NEXT:  ('sh_type', 0x00000010)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array2'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_arrayfoo'
+# CHECK-NEXT:  ('sh_type', 0x00000001)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.note'
+# CHECK-NEXT:  ('sh_type', 0x00000007)
+# CHECK:      (('sh_name', 0x00000{{...}}) # '.note2'
+# CHECK-NEXT:  ('sh_type', 0x00000007)
+#CHECK:       (('sh_name', 0x00000{{...}}) # '.notefoo'
+# CHECK-NEXT:  ('sh_type', 0x00000007)
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
index 03f07b2e5685..4b1772ce711b 100644
--- a/test/MC/COFF/symbol-alias.s
+++ b/test/MC/COFF/symbol-alias.s
@@ -23,8 +23,11 @@ _bar:
 	.long	0                       # 0x0
 
 
+# Order is important here. Assign _bar_alias_alias before _bar_alias.
 	.globl	_foo_alias
 _foo_alias = _foo
+	.globl	_bar_alias_alias
+_bar_alias_alias = _bar_alias
 	.globl	_bar_alias
 _bar_alias = _bar
 
@@ -52,6 +55,14 @@ _bar_alias = _bar
 // CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
 // CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
 
+// CHECK:      Name               = {{_?}}bar_alias_alias
+// CHECK-NEXT: Value              = [[BAR_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+
 // CHECK:      Name               = {{_?}}bar_alias
 // CHECK-NEXT: Value              = [[BAR_VALUE]]
 // CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
diff --git a/test/MC/COFF/weak-symbol-section-specification.ll b/test/MC/COFF/weak-symbol-section-specification.ll
new file mode 100644
index 000000000000..5049372959fb
--- /dev/null
+++ b/test/MC/COFF/weak-symbol-section-specification.ll
@@ -0,0 +1,23 @@
+; The purpose of this test is to verify that weak linkage type is not ignored by backend,
+; if section was specialized.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+
+@a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
+
+; CHECK:           Name                     = .data$a
+; CHECK-NEXT:      VirtualSize              = 0
+; CHECK-NEXT:      VirtualAddress           = 0
+; CHECK-NEXT:      SizeOfRawData            = {{[0-9]+}}
+; CHECK-NEXT:      PointerToRawData         = 0x{{[0-9A-F]+}}
+; CHECK-NEXT:      PointerToRelocations     = 0x0
+; CHECK-NEXT:      PointerToLineNumbers     = 0x0
+; CHECK-NEXT:      NumberOfRelocations      = 0
+; CHECK-NEXT:      NumberOfLineNumbers      = 0
+; CHECK-NEXT:      Charateristics           = 0x40401040
+; CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK-NEXT:        IMAGE_SCN_LNK_COMDAT
+; CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+; CHECK-NEXT:        IMAGE_SCN_MEM_READ
+; CHECK-NEXT:      SectionData              = 
+; CHECK-NEXT:        00 00 00 00 00 00 00 00 - 00 00 00 00 
diff --git a/test/MC/Disassembler/AArch64/a64-ignored-fields.txt b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
new file mode 100644
index 000000000000..966530d36a33
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/a64-ignored-fields.txt
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble -show-encoding < %s | FileCheck %s
+
+# The "Rm" bits are ignored, but the canonical representation has them filled
+# with 0s. This is what we should produce even if the input bit-pattern had
+# something else there.
+
+# CHECK: fcmp    s31, #0.0               // encoding: [0xe8,0x23,0x20,0x1e]
+0xe8 0x23 0x33 0x1e
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
new file mode 100644
index 000000000000..4fa2d5078b2f
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -0,0 +1,4200 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Add/sub (immediate)
+#------------------------------------------------------------------------------
+# CHECK: add      w4, w5, #0
+# CHECK: add      w2, w3, #4095
+# CHECK: add      w30, w29, #1, lsl #12
+# CHECK: add      w13, w5, #4095, lsl #12
+# CHECK: add      x5, x7, #1638
+0xa4 0x0 0x0 0x11
+0x62 0xfc 0x3f 0x11
+0xbe 0x7 0x40 0x11
+0xad 0xfc 0x7f 0x11
+0xe5 0x98 0x19 0x91
+
+# CHECK: add      w20, wsp, #801
+# CHECK: add      wsp, wsp, #1104
+# CHECK: add      wsp, w30, #4084
+0xf4 0x87 0xc 0x11
+0xff 0x43 0x11 0x11
+0xdf 0xd3 0x3f 0x11
+
+# CHECK: add      x0, x24, #291
+# CHECK: add      x3, x24, #4095, lsl #12
+# CHECK: add      x8, sp, #1074
+# CHECK: add      sp, x29, #3816
+0x0 0x8f 0x4 0x91
+0x3 0xff 0x7f 0x91
+0xe8 0xcb 0x10 0x91
+0xbf 0xa3 0x3b 0x91
+
+# CHECK: sub      w0, wsp, #4077
+# CHECK: sub      w4, w20, #546, lsl #12
+# CHECK: sub      sp, sp, #288
+# CHECK: sub      wsp, w19, #16
+0xe0 0xb7 0x3f 0x51
+0x84 0x8a 0x48 0x51
+0xff 0x83 0x4 0xd1
+0x7f 0x42 0x0 0x51
+
+
+# CHECK: adds     w13, w23, #291, lsl #12
+# CHECK: cmn      w2, #4095
+# CHECK: adds     w20, wsp, #0
+# CHECK: cmn      x3, #1, lsl #12
+0xed 0x8e 0x44 0x31
+0x5f 0xfc 0x3f 0x31
+0xf4 0x3 0x0 0x31
+0x7f 0x4 0x40 0xb1
+
+# CHECK: cmp      sp, #20, lsl #12
+# CHECK: cmp      x30, #4095
+# CHECK: subs     x4, sp, #3822
+0xff 0x53 0x40 0xf1
+0xdf 0xff 0x3f 0xf1
+0xe4 0xbb 0x3b 0xf1
+
+# These should really be CMN
+# CHECK: cmn      w3, #291, lsl #12
+# CHECK: cmn      wsp, #1365
+# CHECK: cmn      sp, #1092, lsl #12
+0x7f 0x8c 0x44 0x31
+0xff 0x57 0x15 0x31
+0xff 0x13 0x51 0xb1
+
+# CHECK: mov      sp, x30
+# CHECK: mov      wsp, w20
+# CHECK: mov      x11, sp
+# CHECK: mov      w24, wsp
+0xdf 0x3 0x0 0x91
+0x9f 0x2 0x0 0x11
+0xeb 0x3 0x0 0x91
+0xf8 0x3 0x0 0x11
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: add      w3, w5, w7
+# CHECK: add      wzr, w3, w5
+# CHECK: add      w20, wzr, w4
+# CHECK: add      w4, w6, wzr
+# CHECK: add      w11, w13, w15
+# CHECK: add      w9, w3, wzr, lsl #10
+# CHECK: add      w17, w29, w20, lsl #31
+# CHECK: add      w21, w22, w23, lsr #0
+# CHECK: add      w24, w25, w26, lsr #18
+# CHECK: add      w27, w28, w29, lsr #31
+# CHECK: add      w2, w3, w4, asr #0
+# CHECK: add      w5, w6, w7, asr #21
+# CHECK: add      w8, w9, w10, asr #31
+0xa3 0x0 0x7 0xb
+0x7f 0x0 0x5 0xb
+0xf4 0x3 0x4 0xb
+0xc4 0x0 0x1f 0xb
+0xab 0x1 0xf 0xb
+0x69 0x28 0x1f 0xb
+0xb1 0x7f 0x14 0xb
+0xd5 0x2 0x57 0xb
+0x38 0x4b 0x5a 0xb
+0x9b 0x7f 0x5d 0xb
+0x62 0x0 0x84 0xb
+0xc5 0x54 0x87 0xb
+0x28 0x7d 0x8a 0xb
+
+# CHECK: add      x3, x5, x7
+# CHECK: add      xzr, x3, x5
+# CHECK: add      x20, xzr, x4
+# CHECK: add      x4, x6, xzr
+# CHECK: add      x11, x13, x15
+# CHECK: add      x9, x3, xzr, lsl #10
+# CHECK: add      x17, x29, x20, lsl #63
+# CHECK: add      x21, x22, x23, lsr #0
+# CHECK: add      x24, x25, x26, lsr #18
+# CHECK: add      x27, x28, x29, lsr #63
+# CHECK: add      x2, x3, x4, asr #0
+# CHECK: add      x5, x6, x7, asr #21
+# CHECK: add      x8, x9, x10, asr #63
+0xa3 0x0 0x7 0x8b
+0x7f 0x0 0x5 0x8b
+0xf4 0x3 0x4 0x8b
+0xc4 0x0 0x1f 0x8b
+0xab 0x1 0xf 0x8b
+0x69 0x28 0x1f 0x8b
+0xb1 0xff 0x14 0x8b
+0xd5 0x2 0x57 0x8b
+0x38 0x4b 0x5a 0x8b
+0x9b 0xff 0x5d 0x8b
+0x62 0x0 0x84 0x8b
+0xc5 0x54 0x87 0x8b
+0x28 0xfd 0x8a 0x8b
+
+# CHECK: adds     w3, w5, w7
+# CHECK: cmn      w3, w5
+# CHECK: adds     w20, wzr, w4
+# CHECK: adds     w4, w6, wzr
+# CHECK: adds     w11, w13, w15
+# CHECK: adds     w9, w3, wzr, lsl #10
+# CHECK: adds     w17, w29, w20, lsl #31
+# CHECK: adds     w21, w22, w23, lsr #0
+# CHECK: adds     w24, w25, w26, lsr #18
+# CHECK: adds     w27, w28, w29, lsr #31
+# CHECK: adds     w2, w3, w4, asr #0
+# CHECK: adds     w5, w6, w7, asr #21
+# CHECK: adds     w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x2b
+0x7f 0x0 0x5 0x2b
+0xf4 0x3 0x4 0x2b
+0xc4 0x0 0x1f 0x2b
+0xab 0x1 0xf 0x2b
+0x69 0x28 0x1f 0x2b
+0xb1 0x7f 0x14 0x2b
+0xd5 0x2 0x57 0x2b
+0x38 0x4b 0x5a 0x2b
+0x9b 0x7f 0x5d 0x2b
+0x62 0x0 0x84 0x2b
+0xc5 0x54 0x87 0x2b
+0x28 0x7d 0x8a 0x2b
+
+# CHECK: adds     x3, x5, x7
+# CHECK: cmn      x3, x5
+# CHECK: adds     x20, xzr, x4
+# CHECK: adds     x4, x6, xzr
+# CHECK: adds     x11, x13, x15
+# CHECK: adds     x9, x3, xzr, lsl #10
+# CHECK: adds     x17, x29, x20, lsl #63
+# CHECK: adds     x21, x22, x23, lsr #0
+# CHECK: adds     x24, x25, x26, lsr #18
+# CHECK: adds     x27, x28, x29, lsr #63
+# CHECK: adds     x2, x3, x4, asr #0
+# CHECK: adds     x5, x6, x7, asr #21
+# CHECK: adds     x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xab
+0x7f 0x0 0x5 0xab
+0xf4 0x3 0x4 0xab
+0xc4 0x0 0x1f 0xab
+0xab 0x1 0xf 0xab
+0x69 0x28 0x1f 0xab
+0xb1 0xff 0x14 0xab
+0xd5 0x2 0x57 0xab
+0x38 0x4b 0x5a 0xab
+0x9b 0xff 0x5d 0xab
+0x62 0x0 0x84 0xab
+0xc5 0x54 0x87 0xab
+0x28 0xfd 0x8a 0xab
+
+# CHECK: sub      w3, w5, w7
+# CHECK: sub      wzr, w3, w5
+# CHECK: sub      w20, wzr, w4
+# CHECK: sub      w4, w6, wzr
+# CHECK: sub      w11, w13, w15
+# CHECK: sub      w9, w3, wzr, lsl #10
+# CHECK: sub      w17, w29, w20, lsl #31
+# CHECK: sub      w21, w22, w23, lsr #0
+# CHECK: sub      w24, w25, w26, lsr #18
+# CHECK: sub      w27, w28, w29, lsr #31
+# CHECK: sub      w2, w3, w4, asr #0
+# CHECK: sub      w5, w6, w7, asr #21
+# CHECK: sub      w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x4b
+0x7f 0x0 0x5 0x4b
+0xf4 0x3 0x4 0x4b
+0xc4 0x0 0x1f 0x4b
+0xab 0x1 0xf 0x4b
+0x69 0x28 0x1f 0x4b
+0xb1 0x7f 0x14 0x4b
+0xd5 0x2 0x57 0x4b
+0x38 0x4b 0x5a 0x4b
+0x9b 0x7f 0x5d 0x4b
+0x62 0x0 0x84 0x4b
+0xc5 0x54 0x87 0x4b
+0x28 0x7d 0x8a 0x4b
+
+# CHECK: sub      x3, x5, x7
+# CHECK: sub      xzr, x3, x5
+# CHECK: sub      x20, xzr, x4
+# CHECK: sub      x4, x6, xzr
+# CHECK: sub      x11, x13, x15
+# CHECK: sub      x9, x3, xzr, lsl #10
+# CHECK: sub      x17, x29, x20, lsl #63
+# CHECK: sub      x21, x22, x23, lsr #0
+# CHECK: sub      x24, x25, x26, lsr #18
+# CHECK: sub      x27, x28, x29, lsr #63
+# CHECK: sub      x2, x3, x4, asr #0
+# CHECK: sub      x5, x6, x7, asr #21
+# CHECK: sub      x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xcb
+0x7f 0x0 0x5 0xcb
+0xf4 0x3 0x4 0xcb
+0xc4 0x0 0x1f 0xcb
+0xab 0x1 0xf 0xcb
+0x69 0x28 0x1f 0xcb
+0xb1 0xff 0x14 0xcb
+0xd5 0x2 0x57 0xcb
+0x38 0x4b 0x5a 0xcb
+0x9b 0xff 0x5d 0xcb
+0x62 0x0 0x84 0xcb
+0xc5 0x54 0x87 0xcb
+0x28 0xfd 0x8a 0xcb
+
+# CHECK: subs     w3, w5, w7
+# CHECK: cmp      w3, w5
+# CHECK: subs     w20, wzr, w4
+# CHECK: subs     w4, w6, wzr
+# CHECK: subs     w11, w13, w15
+# CHECK: subs     w9, w3, wzr, lsl #10
+# CHECK: subs     w17, w29, w20, lsl #31
+# CHECK: subs     w21, w22, w23, lsr #0
+# CHECK: subs     w24, w25, w26, lsr #18
+# CHECK: subs     w27, w28, w29, lsr #31
+# CHECK: subs     w2, w3, w4, asr #0
+# CHECK: subs     w5, w6, w7, asr #21
+# CHECK: subs     w8, w9, w10, asr #31
+0xa3 0x0 0x7 0x6b
+0x7f 0x0 0x5 0x6b
+0xf4 0x3 0x4 0x6b
+0xc4 0x0 0x1f 0x6b
+0xab 0x1 0xf 0x6b
+0x69 0x28 0x1f 0x6b
+0xb1 0x7f 0x14 0x6b
+0xd5 0x2 0x57 0x6b
+0x38 0x4b 0x5a 0x6b
+0x9b 0x7f 0x5d 0x6b
+0x62 0x0 0x84 0x6b
+0xc5 0x54 0x87 0x6b
+0x28 0x7d 0x8a 0x6b
+
+# CHECK: subs     x3, x5, x7
+# CHECK: cmp      x3, x5
+# CHECK: subs     x20, xzr, x4
+# CHECK: subs     x4, x6, xzr
+# CHECK: subs     x11, x13, x15
+# CHECK: subs     x9, x3, xzr, lsl #10
+# CHECK: subs     x17, x29, x20, lsl #63
+# CHECK: subs     x21, x22, x23, lsr #0
+# CHECK: subs     x24, x25, x26, lsr #18
+# CHECK: subs     x27, x28, x29, lsr #63
+# CHECK: subs     x2, x3, x4, asr #0
+# CHECK: subs     x5, x6, x7, asr #21
+# CHECK: subs     x8, x9, x10, asr #63
+0xa3 0x0 0x7 0xeb
+0x7f 0x0 0x5 0xeb
+0xf4 0x3 0x4 0xeb
+0xc4 0x0 0x1f 0xeb
+0xab 0x1 0xf 0xeb
+0x69 0x28 0x1f 0xeb
+0xb1 0xff 0x14 0xeb
+0xd5 0x2 0x57 0xeb
+0x38 0x4b 0x5a 0xeb
+0x9b 0xff 0x5d 0xeb
+0x62 0x0 0x84 0xeb
+0xc5 0x54 0x87 0xeb
+0x28 0xfd 0x8a 0xeb
+
+# CHECK: cmn      w0, w3
+# CHECK: cmn      wzr, w4
+# CHECK: cmn      w5, wzr
+# CHECK: cmn      w6, w7
+# CHECK: cmn      w8, w9, lsl #15
+# CHECK: cmn      w10, w11, lsl #31
+# CHECK: cmn      w12, w13, lsr #0
+# CHECK: cmn      w14, w15, lsr #21
+# CHECK: cmn      w16, w17, lsr #31
+# CHECK: cmn      w18, w19, asr #0
+# CHECK: cmn      w20, w21, asr #22
+# CHECK: cmn      w22, w23, asr #31
+0x1f 0x0 0x3 0x2b
+0xff 0x3 0x4 0x2b
+0xbf 0x0 0x1f 0x2b
+0xdf 0x0 0x7 0x2b
+0x1f 0x3d 0x9 0x2b
+0x5f 0x7d 0xb 0x2b
+0x9f 0x1 0x4d 0x2b
+0xdf 0x55 0x4f 0x2b
+0x1f 0x7e 0x51 0x2b
+0x5f 0x2 0x93 0x2b
+0x9f 0x5a 0x95 0x2b
+0xdf 0x7e 0x97 0x2b
+
+# CHECK: cmn      x0, x3
+# CHECK: cmn      xzr, x4
+# CHECK: cmn      x5, xzr
+# CHECK: cmn      x6, x7
+# CHECK: cmn      x8, x9, lsl #15
+# CHECK: cmn      x10, x11, lsl #63
+# CHECK: cmn      x12, x13, lsr #0
+# CHECK: cmn      x14, x15, lsr #41
+# CHECK: cmn      x16, x17, lsr #63
+# CHECK: cmn      x18, x19, asr #0
+# CHECK: cmn      x20, x21, asr #55
+# CHECK: cmn      x22, x23, asr #63
+0x1f 0x0 0x3 0xab
+0xff 0x3 0x4 0xab
+0xbf 0x0 0x1f 0xab
+0xdf 0x0 0x7 0xab
+0x1f 0x3d 0x9 0xab
+0x5f 0xfd 0xb 0xab
+0x9f 0x1 0x4d 0xab
+0xdf 0xa5 0x4f 0xab
+0x1f 0xfe 0x51 0xab
+0x5f 0x2 0x93 0xab
+0x9f 0xde 0x95 0xab
+0xdf 0xfe 0x97 0xab
+
+# CHECK: cmp      w0, w3
+# CHECK: cmp      wzr, w4
+# CHECK: cmp      w5, wzr
+# CHECK: cmp      w6, w7
+# CHECK: cmp      w8, w9, lsl #15
+# CHECK: cmp      w10, w11, lsl #31
+# CHECK: cmp      w12, w13, lsr #0
+# CHECK: cmp      w14, w15, lsr #21
+# CHECK: cmp      w16, w17, lsr #31
+# CHECK: cmp      w18, w19, asr #0
+# CHECK: cmp      w20, w21, asr #22
+# CHECK: cmp      w22, w23, asr #31
+0x1f 0x0 0x3 0x6b
+0xff 0x3 0x4 0x6b
+0xbf 0x0 0x1f 0x6b
+0xdf 0x0 0x7 0x6b
+0x1f 0x3d 0x9 0x6b
+0x5f 0x7d 0xb 0x6b
+0x9f 0x1 0x4d 0x6b
+0xdf 0x55 0x4f 0x6b
+0x1f 0x7e 0x51 0x6b
+0x5f 0x2 0x93 0x6b
+0x9f 0x5a 0x95 0x6b
+0xdf 0x7e 0x97 0x6b
+
+# CHECK: cmp      x0, x3
+# CHECK: cmp      xzr, x4
+# CHECK: cmp      x5, xzr
+# CHECK: cmp      x6, x7
+# CHECK: cmp      x8, x9, lsl #15
+# CHECK: cmp      x10, x11, lsl #63
+# CHECK: cmp      x12, x13, lsr #0
+# CHECK: cmp      x14, x15, lsr #41
+# CHECK: cmp      x16, x17, lsr #63
+# CHECK: cmp      x18, x19, asr #0
+# CHECK: cmp      x20, x21, asr #55
+# CHECK: cmp      x22, x23, asr #63
+0x1f 0x0 0x3 0xeb
+0xff 0x3 0x4 0xeb
+0xbf 0x0 0x1f 0xeb
+0xdf 0x0 0x7 0xeb
+0x1f 0x3d 0x9 0xeb
+0x5f 0xfd 0xb 0xeb
+0x9f 0x1 0x4d 0xeb
+0xdf 0xa5 0x4f 0xeb
+0x1f 0xfe 0x51 0xeb
+0x5f 0x2 0x93 0xeb
+0x9f 0xde 0x95 0xeb
+0xdf 0xfe 0x97 0xeb
+
+# CHECK: sub      w29, wzr, w30
+# CHECK: sub      w30, wzr, wzr
+# CHECK: sub      wzr, wzr, w0
+# CHECK: sub      w28, wzr, w27
+# CHECK: sub      w26, wzr, w25, lsl #29
+# CHECK: sub      w24, wzr, w23, lsl #31
+# CHECK: sub      w22, wzr, w21, lsr #0
+# CHECK: sub      w20, wzr, w19, lsr #1
+# CHECK: sub      w18, wzr, w17, lsr #31
+# CHECK: sub      w16, wzr, w15, asr #0
+# CHECK: sub      w14, wzr, w13, asr #12
+# CHECK: sub      w12, wzr, w11, asr #31
+0xfd 0x3 0x1e 0x4b
+0xfe 0x3 0x1f 0x4b
+0xff 0x3 0x0 0x4b
+0xfc 0x3 0x1b 0x4b
+0xfa 0x77 0x19 0x4b
+0xf8 0x7f 0x17 0x4b
+0xf6 0x3 0x55 0x4b
+0xf4 0x7 0x53 0x4b
+0xf2 0x7f 0x51 0x4b
+0xf0 0x3 0x8f 0x4b
+0xee 0x33 0x8d 0x4b
+0xec 0x7f 0x8b 0x4b
+
+# CHECK: sub      x29, xzr, x30
+# CHECK: sub      x30, xzr, xzr
+# CHECK: sub      xzr, xzr, x0
+# CHECK: sub      x28, xzr, x27
+# CHECK: sub      x26, xzr, x25, lsl #29
+# CHECK: sub      x24, xzr, x23, lsl #31
+# CHECK: sub      x22, xzr, x21, lsr #0
+# CHECK: sub      x20, xzr, x19, lsr #1
+# CHECK: sub      x18, xzr, x17, lsr #31
+# CHECK: sub      x16, xzr, x15, asr #0
+# CHECK: sub      x14, xzr, x13, asr #12
+# CHECK: sub      x12, xzr, x11, asr #31
+0xfd 0x3 0x1e 0xcb
+0xfe 0x3 0x1f 0xcb
+0xff 0x3 0x0 0xcb
+0xfc 0x3 0x1b 0xcb
+0xfa 0x77 0x19 0xcb
+0xf8 0x7f 0x17 0xcb
+0xf6 0x3 0x55 0xcb
+0xf4 0x7 0x53 0xcb
+0xf2 0x7f 0x51 0xcb
+0xf0 0x3 0x8f 0xcb
+0xee 0x33 0x8d 0xcb
+0xec 0x7f 0x8b 0xcb
+
+# CHECK: subs     w29, wzr, w30
+# CHECK: subs     w30, wzr, wzr
+# CHECK: cmp      wzr, w0
+# CHECK: subs     w28, wzr, w27
+# CHECK: subs     w26, wzr, w25, lsl #29
+# CHECK: subs     w24, wzr, w23, lsl #31
+# CHECK: subs     w22, wzr, w21, lsr #0
+# CHECK: subs     w20, wzr, w19, lsr #1
+# CHECK: subs     w18, wzr, w17, lsr #31
+# CHECK: subs     w16, wzr, w15, asr #0
+# CHECK: subs     w14, wzr, w13, asr #12
+# CHECK: subs     w12, wzr, w11, asr #31
+0xfd 0x3 0x1e 0x6b
+0xfe 0x3 0x1f 0x6b
+0xff 0x3 0x0 0x6b
+0xfc 0x3 0x1b 0x6b
+0xfa 0x77 0x19 0x6b
+0xf8 0x7f 0x17 0x6b
+0xf6 0x3 0x55 0x6b
+0xf4 0x7 0x53 0x6b
+0xf2 0x7f 0x51 0x6b
+0xf0 0x3 0x8f 0x6b
+0xee 0x33 0x8d 0x6b
+0xec 0x7f 0x8b 0x6b
+
+# CHECK: subs     x29, xzr, x30
+# CHECK: subs     x30, xzr, xzr
+# CHECK: cmp      xzr, x0
+# CHECK: subs     x28, xzr, x27
+# CHECK: subs     x26, xzr, x25, lsl #29
+# CHECK: subs     x24, xzr, x23, lsl #31
+# CHECK: subs     x22, xzr, x21, lsr #0
+# CHECK: subs     x20, xzr, x19, lsr #1
+# CHECK: subs     x18, xzr, x17, lsr #31
+# CHECK: subs     x16, xzr, x15, asr #0
+# CHECK: subs     x14, xzr, x13, asr #12
+# CHECK: subs     x12, xzr, x11, asr #31
+0xfd 0x3 0x1e 0xeb
+0xfe 0x3 0x1f 0xeb
+0xff 0x3 0x0 0xeb
+0xfc 0x3 0x1b 0xeb
+0xfa 0x77 0x19 0xeb
+0xf8 0x7f 0x17 0xeb
+0xf6 0x3 0x55 0xeb
+0xf4 0x7 0x53 0xeb
+0xf2 0x7f 0x51 0xeb
+0xf0 0x3 0x8f 0xeb
+0xee 0x33 0x8d 0xeb
+0xec 0x7f 0x8b 0xeb
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: adc      w29, w27, w25
+# CHECK: adc      wzr, w3, w4
+# CHECK: adc      w9, wzr, w10
+# CHECK: adc      w20, w0, wzr
+0x7d 0x3 0x19 0x1a
+0x7f 0x0 0x4 0x1a
+0xe9 0x3 0xa 0x1a
+0x14 0x0 0x1f 0x1a
+
+# CHECK: adc      x29, x27, x25
+# CHECK: adc      xzr, x3, x4
+# CHECK: adc      x9, xzr, x10
+# CHECK: adc      x20, x0, xzr
+0x7d 0x3 0x19 0x9a
+0x7f 0x0 0x4 0x9a
+0xe9 0x3 0xa 0x9a
+0x14 0x0 0x1f 0x9a
+
+# CHECK: adcs     w29, w27, w25
+# CHECK: adcs     wzr, w3, w4
+# CHECK: adcs     w9, wzr, w10
+# CHECK: adcs     w20, w0, wzr
+0x7d 0x3 0x19 0x3a
+0x7f 0x0 0x4 0x3a
+0xe9 0x3 0xa 0x3a
+0x14 0x0 0x1f 0x3a
+
+# CHECK: adcs     x29, x27, x25
+# CHECK: adcs     xzr, x3, x4
+# CHECK: adcs     x9, xzr, x10
+# CHECK: adcs     x20, x0, xzr
+0x7d 0x3 0x19 0xba
+0x7f 0x0 0x4 0xba
+0xe9 0x3 0xa 0xba
+0x14 0x0 0x1f 0xba
+
+# CHECK: sbc      w29, w27, w25
+# CHECK: sbc      wzr, w3, w4
+# CHECK: ngc      w9, w10
+# CHECK: sbc      w20, w0, wzr
+0x7d 0x3 0x19 0x5a
+0x7f 0x0 0x4 0x5a
+0xe9 0x3 0xa 0x5a
+0x14 0x0 0x1f 0x5a
+
+# CHECK: sbc      x29, x27, x25
+# CHECK: sbc      xzr, x3, x4
+# CHECK: ngc      x9, x10
+# CHECK: sbc      x20, x0, xzr
+0x7d 0x3 0x19 0xda
+0x7f 0x0 0x4 0xda
+0xe9 0x3 0xa 0xda
+0x14 0x0 0x1f 0xda
+
+# CHECK: sbcs     w29, w27, w25
+# CHECK: sbcs     wzr, w3, w4
+# CHECK: ngcs     w9, w10
+# CHECK: sbcs     w20, w0, wzr
+0x7d 0x3 0x19 0x7a
+0x7f 0x0 0x4 0x7a
+0xe9 0x3 0xa 0x7a
+0x14 0x0 0x1f 0x7a
+
+# CHECK: sbcs     x29, x27, x25
+# CHECK: sbcs     xzr, x3, x4
+# CHECK: ngcs     x9, x10
+# CHECK: sbcs     x20, x0, xzr
+0x7d 0x3 0x19 0xfa
+0x7f 0x0 0x4 0xfa
+0xe9 0x3 0xa 0xfa
+0x14 0x0 0x1f 0xfa
+
+# CHECK: ngc      w3, w12
+# CHECK: ngc      wzr, w9
+# CHECK: ngc      w23, wzr
+0xe3 0x3 0xc 0x5a
+0xff 0x3 0x9 0x5a
+0xf7 0x3 0x1f 0x5a
+
+# CHECK: ngc      x29, x30
+# CHECK: ngc      xzr, x0
+# CHECK: ngc      x0, xzr
+0xfd 0x3 0x1e 0xda
+0xff 0x3 0x0 0xda
+0xe0 0x3 0x1f 0xda
+
+# CHECK: ngcs     w3, w12
+# CHECK: ngcs     wzr, w9
+# CHECK: ngcs     w23, wzr
+0xe3 0x3 0xc 0x7a
+0xff 0x3 0x9 0x7a
+0xf7 0x3 0x1f 0x7a
+
+# CHECK: ngcs     x29, x30
+# CHECK: ngcs     xzr, x0
+# CHECK: ngcs     x0, xzr
+0xfd 0x3 0x1e 0xfa
+0xff 0x3 0x0 0xfa
+0xe0 0x3 0x1f 0xfa
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: sbfx     x1, x2, #3, #2
+# CHECK: asr      x3, x4, #63
+# CHECK: asr      wzr, wzr, #31
+# CHECK: sbfx     w12, w9, #0, #1
+0x41 0x10 0x43 0x93
+0x83 0xfc 0x7f 0x93
+0xff 0x7f 0x1f 0x13
+0x2c 0x1 0x0 0x13
+
+# CHECK: ubfiz    x4, x5, #52, #11
+# CHECK: ubfx     xzr, x4, #0, #1
+# CHECK: ubfiz    x4, xzr, #1, #6
+# CHECK: lsr      x5, x6, #12
+0xa4 0x28 0x4c 0xd3
+0x9f 0x0 0x40 0xd3
+0xe4 0x17 0x7f 0xd3
+0xc5 0xfc 0x4c 0xd3
+
+# CHECK: bfi      x4, x5, #52, #11
+# CHECK: bfxil    xzr, x4, #0, #1
+# CHECK: bfi      x4, xzr, #1, #6
+# CHECK: bfxil    x5, x6, #12, #52
+0xa4 0x28 0x4c 0xb3
+0x9f 0x0 0x40 0xb3
+0xe4 0x17 0x7f 0xb3
+0xc5 0xfc 0x4c 0xb3
+
+# CHECK: sxtb     w1, w2
+# CHECK: sxtb     xzr, w3
+# CHECK: sxth     w9, w10
+# CHECK: sxth     x0, w1
+# CHECK: sxtw     x3, w30
+0x41 0x1c 0x0 0x13
+0x7f 0x1c 0x40 0x93
+0x49 0x3d 0x0 0x13
+0x20 0x3c 0x40 0x93
+0xc3 0x7f 0x40 0x93
+
+# CHECK: uxtb     w1, w2
+# CHECK: uxth     w9, w10
+# CHECK: ubfx     x3, x30, #0, #32
+0x41 0x1c 0x0 0x53
+0x49 0x3d 0x0 0x53
+0xc3 0x7f 0x40 0xd3
+
+# CHECK: asr      w3, w2, #0
+# CHECK: asr      w9, w10, #31
+# CHECK: asr      x20, x21, #63
+# CHECK: asr      w1, wzr, #3
+0x43 0x7c 0x0 0x13
+0x49 0x7d 0x1f 0x13
+0xb4 0xfe 0x7f 0x93
+0xe1 0x7f 0x3 0x13
+
+# CHECK: lsr      w3, w2, #0
+# CHECK: lsr      w9, w10, #31
+# CHECK: lsr      x20, x21, #63
+# CHECK: lsr      wzr, wzr, #3
+0x43 0x7c 0x0 0x53
+0x49 0x7d 0x1f 0x53
+0xb4 0xfe 0x7f 0xd3
+0xff 0x7f 0x3 0x53
+
+# CHECK: lsr      w3, w2, #0
+# CHECK: lsl      w9, w10, #31
+# CHECK: lsl      x20, x21, #63
+# CHECK: lsl      w1, wzr, #3
+0x43 0x7c 0x0 0x53
+0x49 0x1 0x1 0x53
+0xb4 0x2 0x41 0xd3
+0xe1 0x73 0x1d 0x53
+
+# CHECK: sbfx     w9, w10, #0, #1
+# CHECK: sbfiz    x2, x3, #63, #1
+# CHECK: asr      x19, x20, #0
+# CHECK: sbfiz    x9, x10, #5, #59
+# CHECK: asr      w9, w10, #0
+# CHECK: sbfiz    w11, w12, #31, #1
+# CHECK: sbfiz    w13, w14, #29, #3
+# CHECK: sbfiz    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x13
+0x62 0x0 0x41 0x93
+0x93 0xfe 0x40 0x93
+0x49 0xe9 0x7b 0x93
+0x49 0x7d 0x0 0x13
+0x8b 0x1 0x1 0x13
+0xcd 0x9 0x3 0x13
+0xff 0x2b 0x76 0x93
+
+# CHECK: sbfx     w9, w10, #0, #1
+# CHECK: asr      x2, x3, #63
+# CHECK: asr      x19, x20, #0
+# CHECK: asr      x9, x10, #5
+# CHECK: asr      w9, w10, #0
+# CHECK: asr      w11, w12, #31
+# CHECK: asr      w13, w14, #29
+# CHECK: sbfx     xzr, xzr, #10, #11
+0x49 0x1 0x0 0x13
+0x62 0xfc 0x7f 0x93
+0x93 0xfe 0x40 0x93
+0x49 0xfd 0x45 0x93
+0x49 0x7d 0x0 0x13
+0x8b 0x7d 0x1f 0x13
+0xcd 0x7d 0x1d 0x13
+0xff 0x53 0x4a 0x93
+
+# CHECK: bfxil    w9, w10, #0, #1
+# CHECK: bfi      x2, x3, #63, #1
+# CHECK: bfxil    x19, x20, #0, #64
+# CHECK: bfi      x9, x10, #5, #59
+# CHECK: bfxil    w9, w10, #0, #32
+# CHECK: bfi      w11, w12, #31, #1
+# CHECK: bfi      w13, w14, #29, #3
+# CHECK: bfi      xzr, xzr, #10, #11
+0x49 0x1 0x0 0x33
+0x62 0x0 0x41 0xb3
+0x93 0xfe 0x40 0xb3
+0x49 0xe9 0x7b 0xb3
+0x49 0x7d 0x0 0x33
+0x8b 0x1 0x1 0x33
+0xcd 0x9 0x3 0x33
+0xff 0x2b 0x76 0xb3
+
+# CHECK: bfxil    w9, w10, #0, #1
+# CHECK: bfxil    x2, x3, #63, #1
+# CHECK: bfxil    x19, x20, #0, #64
+# CHECK: bfxil    x9, x10, #5, #59
+# CHECK: bfxil    w9, w10, #0, #32
+# CHECK: bfxil    w11, w12, #31, #1
+# CHECK: bfxil    w13, w14, #29, #3
+# CHECK: bfxil    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x33
+0x62 0xfc 0x7f 0xb3
+0x93 0xfe 0x40 0xb3
+0x49 0xfd 0x45 0xb3
+0x49 0x7d 0x0 0x33
+0x8b 0x7d 0x1f 0x33
+0xcd 0x7d 0x1d 0x33
+0xff 0x53 0x4a 0xb3
+
+# CHECK: ubfx     w9, w10, #0, #1
+# CHECK: lsl      x2, x3, #63
+# CHECK: lsr      x19, x20, #0
+# CHECK: lsl      x9, x10, #5
+# CHECK: lsr      w9, w10, #0
+# CHECK: lsl      w11, w12, #31
+# CHECK: lsl      w13, w14, #29
+# CHECK: ubfiz    xzr, xzr, #10, #11
+0x49 0x1 0x0 0x53
+0x62 0x0 0x41 0xd3
+0x93 0xfe 0x40 0xd3
+0x49 0xe9 0x7b 0xd3
+0x49 0x7d 0x0 0x53
+0x8b 0x1 0x1 0x53
+0xcd 0x9 0x3 0x53
+0xff 0x2b 0x76 0xd3
+
+# CHECK: ubfx     w9, w10, #0, #1
+# CHECK: lsr      x2, x3, #63
+# CHECK: lsr      x19, x20, #0
+# CHECK: lsr      x9, x10, #5
+# CHECK: lsr      w9, w10, #0
+# CHECK: lsr      w11, w12, #31
+# CHECK: lsr      w13, w14, #29
+# CHECK: ubfx     xzr, xzr, #10, #11
+0x49 0x1 0x0 0x53
+0x62 0xfc 0x7f 0xd3
+0x93 0xfe 0x40 0xd3
+0x49 0xfd 0x45 0xd3
+0x49 0x7d 0x0 0x53
+0x8b 0x7d 0x1f 0x53
+0xcd 0x7d 0x1d 0x53
+0xff 0x53 0x4a 0xd3
+
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: cbz      w5, #4
+# CHECK: cbz      x5, #0
+# CHECK: cbnz     x2, #-4
+# CHECK: cbnz     x26, #1048572
+0x25 0x0 0x0 0x34
+0x05 0x0 0x0 0xb4
+0xe2 0xff 0xff 0xb5
+0xfa 0xff 0x7f 0xb5
+
+# CHECK: cbz      wzr, #0
+# CHECK: cbnz     xzr, #0
+0x1f 0x0 0x0 0x34
+0x1f 0x0 0x0 0xb5
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: b.ne #4
+# CHECK: b.ge #1048572
+# CHECK: b.ge #-4
+0x21 0x00 0x00 0x54
+0xea 0xff 0x7f 0x54
+0xea 0xff 0xff 0x54
+
+#------------------------------------------------------------------------------
+# Conditional compare (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: ccmp w1, #31, #0, eq
+# CHECK: ccmp w3, #0, #15, hs
+# CHECK: ccmp wzr, #15, #13, hs
+0x20 0x08 0x5f 0x7a
+0x6f 0x28 0x40 0x7a
+0xed 0x2b 0x4f 0x7a
+
+# CHECK: ccmp x9, #31, #0, le
+# CHECK: ccmp x3, #0, #15, gt
+# CHECK: ccmp xzr, #5, #7, ne
+0x20 0xd9 0x5f 0xfa
+0x6f 0xc8 0x40 0xfa
+0xe7 0x1b 0x45 0xfa
+
+# CHECK: ccmn w1, #31, #0, eq
+# CHECK: ccmn w3, #0, #15, hs
+# CHECK: ccmn wzr, #15, #13, hs
+0x20 0x08 0x5f 0x3a
+0x6f 0x28 0x40 0x3a
+0xed 0x2b 0x4f 0x3a
+
+# CHECK: ccmn x9, #31, #0, le
+# CHECK: ccmn x3, #0, #15, gt
+# CHECK: ccmn xzr, #5, #7, ne
+0x20 0xd9 0x5f 0xba
+0x6f 0xc8 0x40 0xba
+0xe7 0x1b 0x45 0xba
+
+#------------------------------------------------------------------------------
+# Conditional compare (register)
+#------------------------------------------------------------------------------
+
+# CHECK: ccmp w1, wzr, #0, eq
+# CHECK: ccmp w3, w0, #15, hs
+# CHECK: ccmp wzr, w15, #13, hs
+0x20 0x00 0x5f 0x7a
+0x6f 0x20 0x40 0x7a
+0xed 0x23 0x4f 0x7a
+
+# CHECK: ccmp x9, xzr, #0, le
+# CHECK: ccmp x3, x0, #15, gt
+# CHECK: ccmp xzr, x5, #7, ne
+0x20 0xd1 0x5f 0xfa
+0x6f 0xc0 0x40 0xfa
+0xe7 0x13 0x45 0xfa
+
+# CHECK: ccmn w1, wzr, #0, eq
+# CHECK: ccmn w3, w0, #15, hs
+# CHECK: ccmn wzr, w15, #13, hs
+0x20 0x00 0x5f 0x3a
+0x6f 0x20 0x40 0x3a
+0xed 0x23 0x4f 0x3a
+
+# CHECK: ccmn x9, xzr, #0, le
+# CHECK: ccmn x3, x0, #15, gt
+# CHECK: ccmn xzr, x5, #7, ne
+0x20 0xd1 0x5f 0xba
+0x6f 0xc0 0x40 0xba
+0xe7 0x13 0x45 0xba
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+# CHECK: csel     w1, w0, w19, ne
+# CHECK: csel     wzr, w5, w9, eq
+# CHECK: csel     w9, wzr, w30, gt
+# CHECK: csel     w1, w28, wzr, mi
+# CHECK: csel     x19, x23, x29, lt
+# CHECK: csel     xzr, x3, x4, ge
+# CHECK: csel     x5, xzr, x6, hs
+# CHECK: csel     x7, x8, xzr, lo
+0x1 0x10 0x93 0x1a
+0xbf 0x0 0x89 0x1a
+0xe9 0xc3 0x9e 0x1a
+0x81 0x43 0x9f 0x1a
+0xf3 0xb2 0x9d 0x9a
+0x7f 0xa0 0x84 0x9a
+0xe5 0x23 0x86 0x9a
+0x7 0x31 0x9f 0x9a
+
+# CHECK: csinc    w1, w0, w19, ne
+# CHECK: csinc    wzr, w5, w9, eq
+# CHECK: csinc    w9, wzr, w30, gt
+# CHECK: csinc    w1, w28, wzr, mi
+# CHECK: csinc    x19, x23, x29, lt
+# CHECK: csinc    xzr, x3, x4, ge
+# CHECK: csinc    x5, xzr, x6, hs
+# CHECK: csinc    x7, x8, xzr, lo
+0x1 0x14 0x93 0x1a
+0xbf 0x4 0x89 0x1a
+0xe9 0xc7 0x9e 0x1a
+0x81 0x47 0x9f 0x1a
+0xf3 0xb6 0x9d 0x9a
+0x7f 0xa4 0x84 0x9a
+0xe5 0x27 0x86 0x9a
+0x7 0x35 0x9f 0x9a
+
+# CHECK: csinv    w1, w0, w19, ne
+# CHECK: csinv    wzr, w5, w9, eq
+# CHECK: csinv    w9, wzr, w30, gt
+# CHECK: csinv    w1, w28, wzr, mi
+# CHECK: csinv    x19, x23, x29, lt
+# CHECK: csinv    xzr, x3, x4, ge
+# CHECK: csinv    x5, xzr, x6, hs
+# CHECK: csinv    x7, x8, xzr, lo
+0x1 0x10 0x93 0x5a
+0xbf 0x0 0x89 0x5a
+0xe9 0xc3 0x9e 0x5a
+0x81 0x43 0x9f 0x5a
+0xf3 0xb2 0x9d 0xda
+0x7f 0xa0 0x84 0xda
+0xe5 0x23 0x86 0xda
+0x7 0x31 0x9f 0xda
+
+# CHECK: csneg    w1, w0, w19, ne
+# CHECK: csneg    wzr, w5, w9, eq
+# CHECK: csneg    w9, wzr, w30, gt
+# CHECK: csneg    w1, w28, wzr, mi
+# CHECK: csneg    x19, x23, x29, lt
+# CHECK: csneg    xzr, x3, x4, ge
+# CHECK: csneg    x5, xzr, x6, hs
+# CHECK: csneg    x7, x8, xzr, lo
+0x1 0x14 0x93 0x5a
+0xbf 0x4 0x89 0x5a
+0xe9 0xc7 0x9e 0x5a
+0x81 0x47 0x9f 0x5a
+0xf3 0xb6 0x9d 0xda
+0x7f 0xa4 0x84 0xda
+0xe5 0x27 0x86 0xda
+0x7 0x35 0x9f 0xda
+
+# CHECK: csinc    w3, wzr, wzr, ne
+# CHECK: csinc    x9, xzr, xzr, mi
+# CHECK: csinv    w20, wzr, wzr, eq
+# CHECK: csinv    x30, xzr, xzr, lt
+0xe3 0x17 0x9f 0x1a
+0xe9 0x47 0x9f 0x9a
+0xf4 0x3 0x9f 0x5a
+0xfe 0xb3 0x9f 0xda
+
+# CHECK: csinc    w3, w5, w5, le
+# CHECK: csinc    wzr, w4, w4, gt
+# CHECK: csinc    w9, wzr, wzr, ge
+# CHECK: csinc    x3, x5, x5, le
+# CHECK: csinc    xzr, x4, x4, gt
+# CHECK: csinc    x9, xzr, xzr, ge
+0xa3 0xd4 0x85 0x1a
+0x9f 0xc4 0x84 0x1a
+0xe9 0xa7 0x9f 0x1a
+0xa3 0xd4 0x85 0x9a
+0x9f 0xc4 0x84 0x9a
+0xe9 0xa7 0x9f 0x9a
+
+# CHECK: csinv    w3, w5, w5, le
+# CHECK: csinv    wzr, w4, w4, gt
+# CHECK: csinv    w9, wzr, wzr, ge
+# CHECK: csinv    x3, x5, x5, le
+# CHECK: csinv    xzr, x4, x4, gt
+# CHECK: csinv    x9, xzr, xzr, ge
+0xa3 0xd0 0x85 0x5a
+0x9f 0xc0 0x84 0x5a
+0xe9 0xa3 0x9f 0x5a
+0xa3 0xd0 0x85 0xda
+0x9f 0xc0 0x84 0xda
+0xe9 0xa3 0x9f 0xda
+
+# CHECK: csneg     w3, w5, w5, le
+# CHECK: csneg     wzr, w4, w4, gt
+# CHECK: csneg     w9, wzr, wzr, ge
+# CHECK: csneg     x3, x5, x5, le
+# CHECK: csneg     xzr, x4, x4, gt
+# CHECK: csneg     x9, xzr, xzr, ge
+0xa3 0xd4 0x85 0x5a
+0x9f 0xc4 0x84 0x5a
+0xe9 0xa7 0x9f 0x5a
+0xa3 0xd4 0x85 0xda
+0x9f 0xc4 0x84 0xda
+0xe9 0xa7 0x9f 0xda
+
+#------------------------------------------------------------------------------
+# Data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: rbit	w0, w7
+# CHECK: rbit   x18, x3
+# CHECK: rev16	w17, w1
+# CHECK: rev16	x5, x2
+# CHECK: rev	w18, w0
+# CHECK: rev32	x20, x1
+0xe0 0x00 0xc0 0x5a
+0x72 0x00 0xc0 0xda
+0x31 0x04 0xc0 0x5a
+0x45 0x04 0xc0 0xda
+0x12 0x08 0xc0 0x5a
+0x34 0x08 0xc0 0xda
+
+# CHECK: rev	x22, x2
+# CHECK: clz	w24, w3
+# CHECK: clz	x26, x4
+# CHECK: cls	w3, w5
+# CHECK: cls	x20, x5
+0x56 0x0c 0xc0 0xda
+0x78 0x10 0xc0 0x5a
+0x9a 0x10 0xc0 0xda
+0xa3 0x14 0xc0 0x5a
+0xb4 0x14 0xc0 0xda
+
+#------------------------------------------------------------------------------
+# Data-processing (2 source)
+#------------------------------------------------------------------------------
+
+# CHECK: crc32b  w5, w7, w20
+# CHECK: crc32h  w28, wzr, w30
+# CHECK: crc32w  w0, w1, w2
+# CHECK: crc32x  w7, w9, x20
+# CHECK: crc32cb w9, w5, w4
+# CHECK: crc32ch w13, w17, w25
+# CHECK: crc32cw wzr, w3, w5
+# CHECK: crc32cx w18, w16, xzr
+0xe5 0x40 0xd4 0x1a
+0xfc 0x47 0xde 0x1a
+0x20 0x48 0xc2 0x1a
+0x27 0x4d 0xd4 0x9a
+0xa9 0x50 0xc4 0x1a
+0x2d 0x56 0xd9 0x1a
+0x7f 0x58 0xc5 0x1a
+0x12 0x5e 0xdf 0x9a
+
+# CHECK: udiv	w0, w7, w10
+# CHECK: udiv	x9, x22, x4
+# CHECK: sdiv	w12, w21, w0
+# CHECK: sdiv	x13, x2, x1
+# CHECK: lsl	w11, w12, w13
+# CHECK: lsl	x14, x15, x16
+# CHECK: lsr	w17, w18, w19
+# CHECK: lsr	x20, x21, x22
+# CHECK: asr	w23, w24, w25
+# CHECK: asr	x26, x27, x28
+# CHECK: ror	w0, w1, w2
+# CHECK: ror    x3, x4, x5
+0xe0 0x08 0xca 0x1a
+0xc9 0x0a 0xc4 0x9a
+0xac 0x0e 0xc0 0x1a
+0x4d 0x0c 0xc1 0x9a
+0x8b 0x21 0xcd 0x1a
+0xee 0x21 0xd0 0x9a
+0x51 0x26 0xd3 0x1a
+0xb4 0x26 0xd6 0x9a
+0x17 0x2b 0xd9 0x1a
+0x7a 0x2b 0xdc 0x9a
+0x20 0x2c 0xc2 0x1a
+0x83 0x2c 0xc5 0x9a
+
+# CHECK: lsl	w6, w7, w8
+# CHECK: lsl	x9, x10, x11
+# CHECK: lsr	w12, w13, w14
+# CHECK: lsr	x15, x16, x17
+# CHECK: asr	w18, w19, w20
+# CHECK: asr	x21, x22, x23
+# CHECK: ror	w24, w25, w26
+# CHECK: ror	x27, x28, x29
+0xe6 0x20 0xc8 0x1a
+0x49 0x21 0xcb 0x9a
+0xac 0x25 0xce 0x1a
+0x0f 0x26 0xd1 0x9a
+0x72 0x2a 0xd4 0x1a
+0xd5 0x2a 0xd7 0x9a
+0x38 0x2f 0xda 0x1a
+0x9b 0x2f 0xdd 0x9a
+
+#------------------------------------------------------------------------------
+# Data-processing (3 sources)
+#------------------------------------------------------------------------------
+
+# First check some non-canonical encodings where Ra is not 0b11111 (only umulh
+# and smulh have them).
+
+# CHECK: smulh    x30, x29, x28
+# CHECK: smulh    xzr, x27, x26
+# CHECK: umulh    x30, x29, x28
+# CHECK: umulh    x23, x30, xzr
+0xbe 0x73 0x5c 0x9b
+0x7f 0x2f 0x5a 0x9b
+0xbe 0x3f 0xdc 0x9b
+0xd7 0x77 0xdf 0x9b
+
+# Now onto the boilerplate stuff
+
+# CHECK: madd     w1, w3, w7, w4
+# CHECK: madd     wzr, w0, w9, w11
+# CHECK: madd     w13, wzr, w4, w4
+# CHECK: madd     w19, w30, wzr, w29
+# CHECK: mul      w4, w5, w6
+0x61 0x10 0x7 0x1b
+0x1f 0x2c 0x9 0x1b
+0xed 0x13 0x4 0x1b
+0xd3 0x77 0x1f 0x1b
+0xa4 0x7c 0x6 0x1b
+
+# CHECK: madd     x1, x3, x7, x4
+# CHECK: madd     xzr, x0, x9, x11
+# CHECK: madd     x13, xzr, x4, x4
+# CHECK: madd     x19, x30, xzr, x29
+# CHECK: mul      x4, x5, x6
+0x61 0x10 0x7 0x9b
+0x1f 0x2c 0x9 0x9b
+0xed 0x13 0x4 0x9b
+0xd3 0x77 0x1f 0x9b
+0xa4 0x7c 0x6 0x9b
+
+# CHECK: msub     w1, w3, w7, w4
+# CHECK: msub     wzr, w0, w9, w11
+# CHECK: msub     w13, wzr, w4, w4
+# CHECK: msub     w19, w30, wzr, w29
+# CHECK: mneg     w4, w5, w6
+0x61 0x90 0x7 0x1b
+0x1f 0xac 0x9 0x1b
+0xed 0x93 0x4 0x1b
+0xd3 0xf7 0x1f 0x1b
+0xa4 0xfc 0x6 0x1b
+
+# CHECK: msub     x1, x3, x7, x4
+# CHECK: msub     xzr, x0, x9, x11
+# CHECK: msub     x13, xzr, x4, x4
+# CHECK: msub     x19, x30, xzr, x29
+# CHECK: mneg     x4, x5, x6
+0x61 0x90 0x7 0x9b
+0x1f 0xac 0x9 0x9b
+0xed 0x93 0x4 0x9b
+0xd3 0xf7 0x1f 0x9b
+0xa4 0xfc 0x6 0x9b
+
+# CHECK: smaddl   x3, w5, w2, x9
+# CHECK: smaddl   xzr, w10, w11, x12
+# CHECK: smaddl   x13, wzr, w14, x15
+# CHECK: smaddl   x16, w17, wzr, x18
+# CHECK: smull    x19, w20, w21
+0xa3 0x24 0x22 0x9b
+0x5f 0x31 0x2b 0x9b
+0xed 0x3f 0x2e 0x9b
+0x30 0x4a 0x3f 0x9b
+0x93 0x7e 0x35 0x9b
+
+# CHECK: smsubl   x3, w5, w2, x9
+# CHECK: smsubl   xzr, w10, w11, x12
+# CHECK: smsubl   x13, wzr, w14, x15
+# CHECK: smsubl   x16, w17, wzr, x18
+# CHECK: smnegl   x19, w20, w21
+0xa3 0xa4 0x22 0x9b
+0x5f 0xb1 0x2b 0x9b
+0xed 0xbf 0x2e 0x9b
+0x30 0xca 0x3f 0x9b
+0x93 0xfe 0x35 0x9b
+
+# CHECK: umaddl   x3, w5, w2, x9
+# CHECK: umaddl   xzr, w10, w11, x12
+# CHECK: umaddl   x13, wzr, w14, x15
+# CHECK: umaddl   x16, w17, wzr, x18
+# CHECK: umull    x19, w20, w21
+0xa3 0x24 0xa2 0x9b
+0x5f 0x31 0xab 0x9b
+0xed 0x3f 0xae 0x9b
+0x30 0x4a 0xbf 0x9b
+0x93 0x7e 0xb5 0x9b
+
+# CHECK: umsubl   x3, w5, w2, x9
+# CHECK: umsubl   xzr, w10, w11, x12
+# CHECK: umsubl   x13, wzr, w14, x15
+# CHECK: umsubl   x16, w17, wzr, x18
+# CHECK: umnegl   x19, w20, w21
+0xa3 0xa4 0xa2 0x9b
+0x5f 0xb1 0xab 0x9b
+0xed 0xbf 0xae 0x9b
+0x30 0xca 0xbf 0x9b
+0x93 0xfe 0xb5 0x9b
+
+# CHECK: smulh    x30, x29, x28
+# CHECK: smulh    xzr, x27, x26
+# CHECK: smulh    x25, xzr, x24
+# CHECK: smulh    x23, x22, xzr
+0xbe 0x7f 0x5c 0x9b
+0x7f 0x7f 0x5a 0x9b
+0xf9 0x7f 0x58 0x9b
+0xd7 0x7e 0x5f 0x9b
+
+# CHECK: umulh    x30, x29, x28
+# CHECK: umulh    xzr, x27, x26
+# CHECK: umulh    x25, xzr, x24
+# CHECK: umulh    x23, x22, xzr
+0xbe 0x7f 0xdc 0x9b
+0x7f 0x7f 0xda 0x9b
+0xf9 0x7f 0xd8 0x9b
+0xd7 0x7e 0xdf 0x9b
+
+# CHECK: mul      w3, w4, w5
+# CHECK: mul      wzr, w6, w7
+# CHECK: mul      w8, wzr, w9
+# CHECK: mul      w10, w11, wzr
+# CHECK: mul      x12, x13, x14
+# CHECK: mul      xzr, x15, x16
+# CHECK: mul      x17, xzr, x18
+# CHECK: mul      x19, x20, xzr
+0x83 0x7c 0x5 0x1b
+0xdf 0x7c 0x7 0x1b
+0xe8 0x7f 0x9 0x1b
+0x6a 0x7d 0x1f 0x1b
+0xac 0x7d 0xe 0x9b
+0xff 0x7d 0x10 0x9b
+0xf1 0x7f 0x12 0x9b
+0x93 0x7e 0x1f 0x9b
+
+# CHECK: mneg     w21, w22, w23
+# CHECK: mneg     wzr, w24, w25
+# CHECK: mneg     w26, wzr, w27
+# CHECK: mneg     w28, w29, wzr
+0xd5 0xfe 0x17 0x1b
+0x1f 0xff 0x19 0x1b
+0xfa 0xff 0x1b 0x1b
+0xbc 0xff 0x1f 0x1b
+
+# CHECK: smull    x11, w13, w17
+# CHECK: umull    x11, w13, w17
+# CHECK: smnegl   x11, w13, w17
+# CHECK: umnegl   x11, w13, w17
+0xab 0x7d 0x31 0x9b
+0xab 0x7d 0xb1 0x9b
+0xab 0xfd 0x31 0x9b
+0xab 0xfd 0xb1 0x9b
+
+#------------------------------------------------------------------------------
+# Exception generation
+#------------------------------------------------------------------------------
+
+# CHECK: svc      #0
+# CHECK: svc      #65535
+0x1 0x0 0x0 0xd4
+0xe1 0xff 0x1f 0xd4
+
+# CHECK: hvc      #1
+# CHECK: smc      #12000
+# CHECK: brk      #12
+# CHECK: hlt      #123
+0x22 0x0 0x0 0xd4
+0x3 0xdc 0x5 0xd4
+0x80 0x1 0x20 0xd4
+0x60 0xf 0x40 0xd4
+
+# CHECK: dcps1    #42
+# CHECK: dcps2    #9
+# CHECK: dcps3    #1000
+0x41 0x5 0xa0 0xd4
+0x22 0x1 0xa0 0xd4
+0x3 0x7d 0xa0 0xd4
+
+# CHECK: dcps1
+# CHECK: dcps2
+# CHECK: dcps3
+0x1 0x0 0xa0 0xd4
+0x2 0x0 0xa0 0xd4
+0x3 0x0 0xa0 0xd4
+
+#------------------------------------------------------------------------------
+# Extract (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: extr     w3, w5, w7, #0
+# CHECK: extr     w11, w13, w17, #31
+0xa3 0x0 0x87 0x13
+0xab 0x7d 0x91 0x13
+
+# CHECK: extr     x3, x5, x7, #15
+# CHECK: extr     x11, x13, x17, #63
+0xa3 0x3c 0xc7 0x93
+0xab 0xfd 0xd1 0x93
+
+# CHECK: extr     x19, x23, x23, #24
+# CHECK: extr     x29, xzr, xzr, #63
+# CHECK: extr     w9, w13, w13, #31
+0xf3 0x62 0xd7 0x93
+0xfd 0xff 0xdf 0x93
+0xa9 0x7d 0x8d 0x13
+
+#------------------------------------------------------------------------------
+# Floating-point compare
+#------------------------------------------------------------------------------
+
+# CHECK: fcmp    s3, s5
+# CHECK: fcmp    s31, #0.0
+# CHECK: fcmp    s31, #0.0
+0x60 0x20 0x25 0x1e
+0xe8 0x23 0x20 0x1e
+0xe8 0x23 0x3f 0x1e
+
+# CHECK: fcmpe   s29, s30
+# CHECK: fcmpe   s15, #0.0
+# CHECK: fcmpe   s15, #0.0
+0xb0 0x23 0x3e 0x1e
+0xf8 0x21 0x20 0x1e
+0xf8 0x21 0x2f 0x1e
+
+# CHECK: fcmp    d4, d12
+# CHECK: fcmp    d23, #0.0
+# CHECK: fcmp    d23, #0.0
+0x80 0x20 0x6c 0x1e
+0xe8 0x22 0x60 0x1e
+0xe8 0x22 0x77 0x1e
+
+# CHECK: fcmpe   d26, d22
+# CHECK: fcmpe   d29, #0.0
+# CHECK: fcmpe   d29, #0.0
+0x50 0x23 0x76 0x1e
+0xb8 0x23 0x60 0x1e
+0xb8 0x23 0x6d 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point conditional compare
+#------------------------------------------------------------------------------
+
+# CHECK: fccmp s1, s31, #0, eq
+# CHECK: fccmp s3, s0, #15, hs
+# CHECK: fccmp s31, s15, #13, hs
+0x20 0x04 0x3f 0x1e
+0x6f 0x24 0x20 0x1e
+0xed 0x27 0x2f 0x1e
+
+# CHECK: fccmp d9, d31, #0, le
+# CHECK: fccmp d3, d0, #15, gt
+# CHECK: fccmp d31, d5, #7, ne
+0x20 0xd5 0x7f 0x1e
+0x6f 0xc4 0x60 0x1e
+0xe7 0x17 0x65 0x1e
+
+# CHECK: fccmpe s1, s31, #0, eq
+# CHECK: fccmpe s3, s0, #15, hs
+# CHECK: fccmpe s31, s15, #13, hs
+0x30 0x04 0x3f 0x1e
+0x7f 0x24 0x20 0x1e
+0xfd 0x27 0x2f 0x1e
+
+# CHECK: fccmpe d9, d31, #0, le
+# CHECK: fccmpe d3, d0, #15, gt
+# CHECK: fccmpe d31, d5, #7, ne
+0x30 0xd5 0x7f 0x1e
+0x7f 0xc4 0x60 0x1e
+0xf7 0x17 0x65 0x1e
+
+#-------------------------------------------------------------------------------
+# Floating-point conditional compare
+#-------------------------------------------------------------------------------
+
+# CHECK: fcsel s3, s20, s9, pl
+# CHECK: fcsel d9, d10, d11, mi
+0x83 0x5e 0x29 0x1e
+0x49 0x4d 0x6b 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: fmov     s0, s1
+# CHECK: fabs     s2, s3
+# CHECK: fneg     s4, s5
+# CHECK: fsqrt    s6, s7
+# CHECK: fcvt     d8, s9
+# CHECK: fcvt     h10, s11
+# CHECK: frintn   s12, s13
+# CHECK: frintp   s14, s15
+# CHECK: frintm   s16, s17
+# CHECK: frintz   s18, s19
+# CHECK: frinta   s20, s21
+# CHECK: frintx   s22, s23
+# CHECK: frinti   s24, s25
+0x20 0x40 0x20 0x1e
+0x62 0xc0 0x20 0x1e
+0xa4 0x40 0x21 0x1e
+0xe6 0xc0 0x21 0x1e
+0x28 0xc1 0x22 0x1e
+0x6a 0xc1 0x23 0x1e
+0xac 0x41 0x24 0x1e
+0xee 0xc1 0x24 0x1e
+0x30 0x42 0x25 0x1e
+0x72 0xc2 0x25 0x1e
+0xb4 0x42 0x26 0x1e
+0xf6 0x42 0x27 0x1e
+0x38 0xc3 0x27 0x1e
+
+# CHECK: fmov     d0, d1
+# CHECK: fabs     d2, d3
+# CHECK: fneg     d4, d5
+# CHECK: fsqrt    d6, d7
+# CHECK: fcvt     s8, d9
+# CHECK: fcvt     h10, d11
+# CHECK: frintn   d12, d13
+# CHECK: frintp   d14, d15
+# CHECK: frintm   d16, d17
+# CHECK: frintz   d18, d19
+# CHECK: frinta   d20, d21
+# CHECK: frintx   d22, d23
+# CHECK: frinti   d24, d25
+0x20 0x40 0x60 0x1e
+0x62 0xc0 0x60 0x1e
+0xa4 0x40 0x61 0x1e
+0xe6 0xc0 0x61 0x1e
+0x28 0x41 0x62 0x1e
+0x6a 0xc1 0x63 0x1e
+0xac 0x41 0x64 0x1e
+0xee 0xc1 0x64 0x1e
+0x30 0x42 0x65 0x1e
+0x72 0xc2 0x65 0x1e
+0xb4 0x42 0x66 0x1e
+0xf6 0x42 0x67 0x1e
+0x38 0xc3 0x67 0x1e
+
+# CHECK: fcvt     s26, h27
+# CHECK: fcvt     d28, h29
+0x7a 0x43 0xe2 0x1e
+0xbc 0xc3 0xe2 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (2 sources)
+#------------------------------------------------------------------------------
+
+# CHECK: fmul     s20, s19, s17
+# CHECK: fdiv     s1, s2, s3
+# CHECK: fadd     s4, s5, s6
+# CHECK: fsub     s7, s8, s9
+# CHECK: fmax     s10, s11, s12
+# CHECK: fmin     s13, s14, s15
+# CHECK: fmaxnm   s16, s17, s18
+# CHECK: fminnm   s19, s20, s21
+# CHECK: fnmul    s22, s23, s2
+0x74 0xa 0x31 0x1e
+0x41 0x18 0x23 0x1e
+0xa4 0x28 0x26 0x1e
+0x7 0x39 0x29 0x1e
+0x6a 0x49 0x2c 0x1e
+0xcd 0x59 0x2f 0x1e
+0x30 0x6a 0x32 0x1e
+0x93 0x7a 0x35 0x1e
+0xf6 0x8a 0x38 0x1e
+
+
+# CHECK: fmul     d20, d19, d17
+# CHECK: fdiv     d1, d2, d3
+# CHECK: fadd     d4, d5, d6
+# CHECK: fsub     d7, d8, d9
+# CHECK: fmax     d10, d11, d12
+# CHECK: fmin     d13, d14, d15
+# CHECK: fmaxnm   d16, d17, d18
+# CHECK: fminnm   d19, d20, d21
+# CHECK: fnmul    d22, d23, d24
+0x74 0xa 0x71 0x1e
+0x41 0x18 0x63 0x1e
+0xa4 0x28 0x66 0x1e
+0x7 0x39 0x69 0x1e
+0x6a 0x49 0x6c 0x1e
+0xcd 0x59 0x6f 0x1e
+0x30 0x6a 0x72 0x1e
+0x93 0x7a 0x75 0x1e
+0xf6 0x8a 0x78 0x1e
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+# CHECK: fmadd s3, s5, s6, s31
+# CHECK: fmadd d3, d13, d0, d23
+# CHECK: fmsub s3, s5, s6, s31
+# CHECK: fmsub d3, d13, d0, d23
+# CHECK: fnmadd s3, s5, s6, s31
+# CHECK: fnmadd d3, d13, d0, d23
+# CHECK: fnmsub s3, s5, s6, s31
+# CHECK: fnmsub d3, d13, d0, d23
+0xa3 0x7c 0x06 0x1f
+0xa3 0x5d 0x40 0x1f
+0xa3 0xfc 0x06 0x1f
+0xa3 0xdd 0x40 0x1f
+0xa3 0x7c 0x26 0x1f
+0xa3 0x5d 0x60 0x1f
+0xa3 0xfc 0x26 0x1f
+0xa3 0xdd 0x60 0x1f
+
+#------------------------------------------------------------------------------
+# Floating-point <-> fixed-point conversion
+#------------------------------------------------------------------------------
+
+# CHECK: fcvtzs  w3, s5, #1
+# CHECK: fcvtzs  wzr, s20, #13
+# CHECK: fcvtzs  w19, s0, #32
+0xa3 0xfc 0x18 0x1e
+0x9f 0xce 0x18 0x1e
+0x13 0x80 0x18 0x1e
+
+# CHECK: fcvtzs  x3, s5, #1
+# CHECK: fcvtzs  x12, s30, #45
+# CHECK: fcvtzs  x19, s0, #64
+0xa3 0xfc 0x18 0x9e
+0xcc 0x4f 0x18 0x9e
+0x13 0x00 0x18 0x9e
+
+# CHECK: fcvtzs  w3, d5, #1
+# CHECK: fcvtzs  wzr, d20, #13
+# CHECK: fcvtzs  w19, d0, #32
+0xa3 0xfc 0x58 0x1e
+0x9f 0xce 0x58 0x1e
+0x13 0x80 0x58 0x1e
+
+# CHECK: fcvtzs  x3, d5, #1
+# CHECK: fcvtzs  x12, d30, #45
+# CHECK: fcvtzs  x19, d0, #64
+0xa3 0xfc 0x58 0x9e
+0xcc 0x4f 0x58 0x9e
+0x13 0x00 0x58 0x9e
+
+# CHECK: fcvtzu  w3, s5, #1
+# CHECK: fcvtzu  wzr, s20, #13
+# CHECK: fcvtzu  w19, s0, #32
+0xa3 0xfc 0x19 0x1e
+0x9f 0xce 0x19 0x1e
+0x13 0x80 0x19 0x1e
+
+# CHECK: fcvtzu  x3, s5, #1
+# CHECK: fcvtzu  x12, s30, #45
+# CHECK: fcvtzu  x19, s0, #64
+0xa3 0xfc 0x19 0x9e
+0xcc 0x4f 0x19 0x9e
+0x13 0x00 0x19 0x9e
+
+# CHECK: fcvtzu  w3, d5, #1
+# CHECK: fcvtzu  wzr, d20, #13
+# CHECK: fcvtzu  w19, d0, #32
+0xa3 0xfc 0x59 0x1e
+0x9f 0xce 0x59 0x1e
+0x13 0x80 0x59 0x1e
+
+# CHECK: fcvtzu  x3, d5, #1
+# CHECK: fcvtzu  x12, d30, #45
+# CHECK: fcvtzu  x19, d0, #64
+0xa3 0xfc 0x59 0x9e
+0xcc 0x4f 0x59 0x9e
+0x13 0x00 0x59 0x9e
+
+# CHECK: scvtf   s23, w19, #1
+# CHECK: scvtf   s31, wzr, #20
+# CHECK: scvtf   s14, w0, #32
+0x77 0xfe 0x02 0x1e
+0xff 0xb3 0x02 0x1e
+0x0e 0x80 0x02 0x1e
+
+# CHECK: scvtf   s23, x19, #1
+# CHECK: scvtf   s31, xzr, #20
+# CHECK: scvtf   s14, x0, #64
+0x77 0xfe 0x02 0x9e
+0xff 0xb3 0x02 0x9e
+0x0e 0x00 0x02 0x9e
+
+# CHECK: scvtf   d23, w19, #1
+# CHECK: scvtf   d31, wzr, #20
+# CHECK: scvtf   d14, w0, #32
+0x77 0xfe 0x42 0x1e
+0xff 0xb3 0x42 0x1e
+0x0e 0x80 0x42 0x1e
+
+# CHECK: scvtf   d23, x19, #1
+# CHECK: scvtf   d31, xzr, #20
+# CHECK: scvtf   d14, x0, #64
+0x77 0xfe 0x42 0x9e
+0xff 0xb3 0x42 0x9e
+0x0e 0x00 0x42 0x9e
+
+# CHECK: ucvtf   s23, w19, #1
+# CHECK: ucvtf   s31, wzr, #20
+# CHECK: ucvtf   s14, w0, #32
+0x77 0xfe 0x03 0x1e
+0xff 0xb3 0x03 0x1e
+0x0e 0x80 0x03 0x1e
+
+# CHECK: ucvtf   s23, x19, #1
+# CHECK: ucvtf   s31, xzr, #20
+# CHECK: ucvtf   s14, x0, #64
+0x77 0xfe 0x03 0x9e
+0xff 0xb3 0x03 0x9e
+0x0e 0x00 0x03 0x9e
+
+# CHECK: ucvtf   d23, w19, #1
+# CHECK: ucvtf   d31, wzr, #20
+# CHECK: ucvtf   d14, w0, #32
+0x77 0xfe 0x43 0x1e
+0xff 0xb3 0x43 0x1e
+0x0e 0x80 0x43 0x1e
+
+# CHECK: ucvtf   d23, x19, #1
+# CHECK: ucvtf   d31, xzr, #20
+# CHECK: ucvtf   d14, x0, #64
+0x77 0xfe 0x43 0x9e
+0xff 0xb3 0x43 0x9e
+0x0e 0x00 0x43 0x9e
+
+#------------------------------------------------------------------------------
+# Floating-point <-> integer conversion
+#------------------------------------------------------------------------------
+# CHECK: fcvtns   w3, s31
+# CHECK: fcvtns   xzr, s12
+# CHECK: fcvtnu   wzr, s12
+# CHECK: fcvtnu   x0, s0
+0xe3 0x3 0x20 0x1e
+0x9f 0x1 0x20 0x9e
+0x9f 0x1 0x21 0x1e
+0x0 0x0 0x21 0x9e
+
+# CHECK: fcvtps   wzr, s9
+# CHECK: fcvtps   x12, s20
+# CHECK: fcvtpu   w30, s23
+# CHECK: fcvtpu   x29, s3
+0x3f 0x1 0x28 0x1e
+0x8c 0x2 0x28 0x9e
+0xfe 0x2 0x29 0x1e
+0x7d 0x0 0x29 0x9e
+
+# CHECK: fcvtms   w2, s3
+# CHECK: fcvtms   x4, s5
+# CHECK: fcvtmu   w6, s7
+# CHECK: fcvtmu   x8, s9
+0x62 0x0 0x30 0x1e
+0xa4 0x0 0x30 0x9e
+0xe6 0x0 0x31 0x1e
+0x28 0x1 0x31 0x9e
+
+# CHECK: fcvtzs   w10, s11
+# CHECK: fcvtzs   x12, s13
+# CHECK: fcvtzu   w14, s15
+# CHECK: fcvtzu   x15, s16
+0x6a 0x1 0x38 0x1e
+0xac 0x1 0x38 0x9e
+0xee 0x1 0x39 0x1e
+0xf 0x2 0x39 0x9e
+
+# CHECK: scvtf    s17, w18
+# CHECK: scvtf    s19, x20
+# CHECK: ucvtf    s21, w22
+# CHECK: scvtf    s23, x24
+0x51 0x2 0x22 0x1e
+0x93 0x2 0x22 0x9e
+0xd5 0x2 0x23 0x1e
+0x17 0x3 0x22 0x9e
+
+# CHECK: fcvtas   w25, s26
+# CHECK: fcvtas   x27, s28
+# CHECK: fcvtau   w29, s30
+# CHECK: fcvtau   xzr, s0
+0x59 0x3 0x24 0x1e
+0x9b 0x3 0x24 0x9e
+0xdd 0x3 0x25 0x1e
+0x1f 0x0 0x25 0x9e
+
+# CHECK: fcvtns   w3, d31
+# CHECK: fcvtns   xzr, d12
+# CHECK: fcvtnu   wzr, d12
+# CHECK: fcvtnu   x0, d0
+0xe3 0x3 0x60 0x1e
+0x9f 0x1 0x60 0x9e
+0x9f 0x1 0x61 0x1e
+0x0 0x0 0x61 0x9e
+
+# CHECK: fcvtps   wzr, d9
+# CHECK: fcvtps   x12, d20
+# CHECK: fcvtpu   w30, d23
+# CHECK: fcvtpu   x29, d3
+0x3f 0x1 0x68 0x1e
+0x8c 0x2 0x68 0x9e
+0xfe 0x2 0x69 0x1e
+0x7d 0x0 0x69 0x9e
+
+# CHECK: fcvtms   w2, d3
+# CHECK: fcvtms   x4, d5
+# CHECK: fcvtmu   w6, d7
+# CHECK: fcvtmu   x8, d9
+0x62 0x0 0x70 0x1e
+0xa4 0x0 0x70 0x9e
+0xe6 0x0 0x71 0x1e
+0x28 0x1 0x71 0x9e
+
+# CHECK: fcvtzs   w10, d11
+# CHECK: fcvtzs   x12, d13
+# CHECK: fcvtzu   w14, d15
+# CHECK: fcvtzu   x15, d16
+0x6a 0x1 0x78 0x1e
+0xac 0x1 0x78 0x9e
+0xee 0x1 0x79 0x1e
+0xf 0x2 0x79 0x9e
+
+# CHECK: scvtf    d17, w18
+# CHECK: scvtf    d19, x20
+# CHECK: ucvtf    d21, w22
+# CHECK: ucvtf    d23, x24
+0x51 0x2 0x62 0x1e
+0x93 0x2 0x62 0x9e
+0xd5 0x2 0x63 0x1e
+0x17 0x3 0x63 0x9e
+
+# CHECK: fcvtas   w25, d26
+# CHECK: fcvtas   x27, d28
+# CHECK: fcvtau   w29, d30
+# CHECK: fcvtau   xzr, d0
+0x59 0x3 0x64 0x1e
+0x9b 0x3 0x64 0x9e
+0xdd 0x3 0x65 0x1e
+0x1f 0x0 0x65 0x9e
+
+# CHECK: fmov     w3, s9
+# CHECK: fmov     s9, w3
+0x23 0x1 0x26 0x1e
+0x69 0x0 0x27 0x1e
+
+# CHECK: fmov     x20, d31
+# CHECK: fmov     d1, x15
+0xf4 0x3 0x66 0x9e
+0xe1 0x1 0x67 0x9e
+
+# CHECK: fmov     x3, v12.d[1]
+# CHECK: fmov     v1.d[1], x19
+0x83 0x1 0xae 0x9e
+0x61 0x2 0xaf 0x9e
+
+#------------------------------------------------------------------------------
+# Floating-point immediate
+#------------------------------------------------------------------------------
+
+# CHECK: fmov     s2, #0.12500000
+# CHECK: fmov     s3, #1.00000000
+# CHECK: fmov     d30, #16.00000000
+0x2 0x10 0x28 0x1e
+0x3 0x10 0x2e 0x1e
+0x1e 0x10 0x66 0x1e
+
+# CHECK: fmov     s4, #1.06250000
+# CHECK: fmov     d10, #1.93750000
+0x4 0x30 0x2e 0x1e
+0xa 0xf0 0x6f 0x1e
+
+# CHECK: fmov     s12, #-1.00000000
+0xc 0x10 0x3e 0x1e
+
+# CHECK: fmov     d16, #8.50000000
+0x10 0x30 0x64 0x1e
+
+#------------------------------------------------------------------------------
+# Load-register (literal)
+#------------------------------------------------------------------------------
+
+# CHECK: ldr       w3, #0
+# CHECK: ldr       x29, #4
+# CHECK: ldrsw     xzr, #-4
+0x03 0x00 0x00 0x18
+0x3d 0x00 0x00 0x58
+0xff 0xff 0xff 0x98
+
+# CHECK: ldr       s0, #8
+# CHECK: ldr       d0, #1048572
+# CHECK: ldr       q0, #-1048576
+0x40 0x00 0x00 0x1c
+0xe0 0xff 0x7f 0x5c
+0x00 0x00 0x80 0x9c
+
+# CHECK: prfm      pldl1strm, #0
+# CHECK: prfm      #22, #0
+0x01 0x00 0x00 0xd8
+0x16 0x00 0x00 0xd8
+
+#------------------------------------------------------------------------------
+# Load/store exclusive
+#------------------------------------------------------------------------------
+
+#CHECK: stxrb      w18, w8, [sp]
+#CHECK: stxrh      w24, w15, [x16]
+#CHECK: stxr       w5, w6, [x17]
+#CHECK: stxr       w1, x10, [x21]
+#CHECK: stxr       w1, x10, [x21]
+0xe8 0x7f 0x12 0x08
+0x0f 0x7e 0x18 0x48
+0x26 0x7e 0x05 0x88
+0xaa 0x7e 0x01 0xc8
+0xaa 0x7a 0x01 0xc8
+
+#CHECK: ldxrb      w30, [x0]
+#CHECK: ldxrh      w17, [x4]
+#CHECK: ldxr       w22, [sp]
+#CHECK: ldxr       x11, [x29]
+#CHECK: ldxr       x11, [x29]
+#CHECK: ldxr       x11, [x29]
+0x1e 0x7c 0x5f 0x08
+0x91 0x7c 0x5f 0x48
+0xf6 0x7f 0x5f 0x88
+0xab 0x7f 0x5f 0xc8
+0xab 0x6f 0x5f 0xc8
+0xab 0x7f 0x5e 0xc8
+
+#CHECK: stxp       w12, w11, w10, [sp]
+#CHECK: stxp       wzr, x27, x9, [x12]
+0xeb 0x2b 0x2c 0x88
+0x9b 0x25 0x3f 0xc8
+
+#CHECK: ldxp       w0, wzr, [sp]
+#CHECK: ldxp       x17, x0, [x18]
+#CHECK: ldxp       x17, x0, [x18]
+0xe0 0x7f 0x7f 0x88
+0x51 0x02 0x7f 0xc8
+0x51 0x02 0x7e 0xc8
+
+#CHECK: stlxrb     w12, w22, [x0]
+#CHECK: stlxrh     w10, w1, [x1]
+#CHECK: stlxr      w9, w2, [x2]
+#CHECK: stlxr      w9, x3, [sp]
+
+0x16 0xfc 0x0c 0x08
+0x21 0xfc 0x0a 0x48
+0x42 0xfc 0x09 0x88
+0xe3 0xff 0x09 0xc8
+
+#CHECK: ldaxrb     w8, [x4]
+#CHECK: ldaxrh     w7, [x5]
+#CHECK: ldaxr      w6, [sp]
+#CHECK: ldaxr      x5, [x6]
+#CHECK: ldaxr      x5, [x6]
+#CHECK: ldaxr      x5, [x6]
+0x88 0xfc 0x5f 0x08
+0xa7 0xfc 0x5f 0x48
+0xe6 0xff 0x5f 0x88
+0xc5 0xfc 0x5f 0xc8
+0xc5 0xec 0x5f 0xc8
+0xc5 0xfc 0x5e 0xc8
+
+#CHECK: stlxp      w4, w5, w6, [sp]
+#CHECK: stlxp      wzr, x6, x7, [x1]
+0xe5 0x9b 0x24 0x88
+0x26 0x9c 0x3f 0xc8
+
+#CHECK: ldaxp      w5, w18, [sp]
+#CHECK: ldaxp      x6, x19, [x22]
+#CHECK: ldaxp      x6, x19, [x22]
+0xe5 0xcb 0x7f 0x88
+0xc6 0xce 0x7f 0xc8
+0xc6 0xce 0x7e 0xc8
+
+#CHECK: stlrb      w24, [sp]
+#CHECK: stlrh      w25, [x30]
+#CHECK: stlr       w26, [x29]
+#CHECK: stlr       x27, [x28]
+#CHECK: stlr       x27, [x28]
+#CHECK: stlr       x27, [x28]
+0xf8 0xff 0x9f 0x08
+0xd9 0xff 0x9f 0x48
+0xba 0xff 0x9f 0x88
+0x9b 0xff 0x9f 0xc8
+0x9b 0xef 0x9f 0xc8
+0x9b 0xff 0x9e 0xc8
+
+#CHECK: ldarb      w23, [sp]
+#CHECK: ldarh      w22, [x30]
+#CHECK: ldar       wzr, [x29]
+#CHECK: ldar       x21, [x28]
+#CHECK: ldar       x21, [x28]
+#CHECK: ldar       x21, [x28]
+0xf7 0xff 0xdf 0x08
+0xd6 0xff 0xdf 0x48
+0xbf 0xff 0xdf 0x88
+0x95 0xff 0xdf 0xc8
+0x95 0xef 0xdf 0xc8
+0x95 0xff 0xde 0xc8
+
+#------------------------------------------------------------------------------
+# Load/store (unscaled  immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: sturb    w9, [sp]
+# CHECK: sturh    wzr, [x12, #255]
+# CHECK: stur     w16, [x0, #-256]
+# CHECK: stur     x28, [x14, #1]
+0xe9 0x3 0x0 0x38
+0x9f 0xf1 0xf 0x78
+0x10 0x0 0x10 0xb8
+0xdc 0x11 0x0 0xf8
+
+# CHECK: ldurb    w1, [x20, #255]
+# CHECK: ldurh    w20, [x1, #255]
+# CHECK: ldur     w12, [sp, #255]
+# CHECK: ldur     xzr, [x12, #255]
+0x81 0xf2 0x4f 0x38
+0x34 0xf0 0x4f 0x78
+0xec 0xf3 0x4f 0xb8
+0x9f 0xf1 0x4f 0xf8
+
+# CHECK: ldursb   x9, [x7, #-256]
+# CHECK: ldursh   x17, [x19, #-256]
+# CHECK: ldursw   x20, [x15, #-256]
+# CHECK: prfum    pldl2keep, [sp, #-256]
+# CHECK: ldursb   w19, [x1, #-256]
+# CHECK: ldursh   w15, [x21, #-256]
+0xe9 0x0 0x90 0x38
+0x71 0x2 0x90 0x78
+0xf4 0x1 0x90 0xb8
+0xe2 0x3 0x90 0xf8
+0x33 0x0 0xd0 0x38
+0xaf 0x2 0xd0 0x78
+
+# CHECK: stur     b0, [sp, #1]
+# CHECK: stur     h12, [x12, #-1]
+# CHECK: stur     s15, [x0, #255]
+# CHECK: stur     d31, [x5, #25]
+# CHECK: stur     q9, [x5]
+0xe0 0x13 0x0 0x3c
+0x8c 0xf1 0x1f 0x7c
+0xf 0xf0 0xf 0xbc
+0xbf 0x90 0x1 0xfc
+0xa9 0x0 0x80 0x3c
+
+# CHECK: ldur     b3, [sp]
+# CHECK: ldur     h5, [x4, #-256]
+# CHECK: ldur     s7, [x12, #-1]
+# CHECK: ldur     d11, [x19, #4]
+# CHECK: ldur     q13, [x1, #2]
+0xe3 0x3 0x40 0x3c
+0x85 0x0 0x50 0x7c
+0x87 0xf1 0x5f 0xbc
+0x6b 0x42 0x40 0xfc
+0x2d 0x20 0xc0 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+# E.g. "str xzr, [sp], #4" is *not* unpredictable
+# CHECK-NOT: warning: potentially undefined instruction encoding
+0xff 0x47 0x40 0xb8
+
+# CHECK: strb     w9, [x2], #255
+# CHECK: strb     w10, [x3], #1
+# CHECK: strb     w10, [x3], #-256
+# CHECK: strh     w9, [x2], #255
+# CHECK: strh     w9, [x2], #1
+# CHECK: strh     w10, [x3], #-256
+0x49 0xf4 0xf 0x38
+0x6a 0x14 0x0 0x38
+0x6a 0x4 0x10 0x38
+0x49 0xf4 0xf 0x78
+0x49 0x14 0x0 0x78
+0x6a 0x4 0x10 0x78
+
+# CHECK: str      w19, [sp], #255
+# CHECK: str      w20, [x30], #1
+# CHECK: str      w21, [x12], #-256
+# CHECK: str      xzr, [x9], #255
+# CHECK: str      x2, [x3], #1
+# CHECK: str      x19, [x12], #-256
+0xf3 0xf7 0xf 0xb8
+0xd4 0x17 0x0 0xb8
+0x95 0x5 0x10 0xb8
+0x3f 0xf5 0xf 0xf8
+0x62 0x14 0x0 0xf8
+0x93 0x5 0x10 0xf8
+
+# CHECK: ldrb     w9, [x2], #255
+# CHECK: ldrb     w10, [x3], #1
+# CHECK: ldrb     w10, [x3], #-256
+# CHECK: ldrh     w9, [x2], #255
+# CHECK: ldrh     w9, [x2], #1
+# CHECK: ldrh     w10, [x3], #-256
+0x49 0xf4 0x4f 0x38
+0x6a 0x14 0x40 0x38
+0x6a 0x4 0x50 0x38
+0x49 0xf4 0x4f 0x78
+0x49 0x14 0x40 0x78
+0x6a 0x4 0x50 0x78
+
+# CHECK: ldr      w19, [sp], #255
+# CHECK: ldr      w20, [x30], #1
+# CHECK: ldr      w21, [x12], #-256
+# CHECK: ldr      xzr, [x9], #255
+# CHECK: ldr      x2, [x3], #1
+# CHECK: ldr      x19, [x12], #-256
+0xf3 0xf7 0x4f 0xb8
+0xd4 0x17 0x40 0xb8
+0x95 0x5 0x50 0xb8
+0x3f 0xf5 0x4f 0xf8
+0x62 0x14 0x40 0xf8
+0x93 0x5 0x50 0xf8
+
+# CHECK: ldrsb    xzr, [x9], #255
+# CHECK: ldrsb    x2, [x3], #1
+# CHECK: ldrsb    x19, [x12], #-256
+# CHECK: ldrsh    xzr, [x9], #255
+# CHECK: ldrsh    x2, [x3], #1
+# CHECK: ldrsh    x19, [x12], #-256
+# CHECK: ldrsw    xzr, [x9], #255
+# CHECK: ldrsw    x2, [x3], #1
+# CHECK: ldrsw    x19, [x12], #-256
+0x3f 0xf5 0x8f 0x38
+0x62 0x14 0x80 0x38
+0x93 0x5 0x90 0x38
+0x3f 0xf5 0x8f 0x78
+0x62 0x14 0x80 0x78
+0x93 0x5 0x90 0x78
+0x3f 0xf5 0x8f 0xb8
+0x62 0x14 0x80 0xb8
+0x93 0x5 0x90 0xb8
+
+# CHECK: ldrsb    wzr, [x9], #255
+# CHECK: ldrsb    w2, [x3], #1
+# CHECK: ldrsb    w19, [x12], #-256
+# CHECK: ldrsh    wzr, [x9], #255
+# CHECK: ldrsh    w2, [x3], #1
+# CHECK: ldrsh    w19, [x12], #-256
+0x3f 0xf5 0xcf 0x38
+0x62 0x14 0xc0 0x38
+0x93 0x5 0xd0 0x38
+0x3f 0xf5 0xcf 0x78
+0x62 0x14 0xc0 0x78
+0x93 0x5 0xd0 0x78
+
+# CHECK: str      b0, [x0], #255
+# CHECK: str      b3, [x3], #1
+# CHECK: str      b5, [sp], #-256
+# CHECK: str      h10, [x10], #255
+# CHECK: str      h13, [x23], #1
+# CHECK: str      h15, [sp], #-256
+# CHECK: str      s20, [x20], #255
+# CHECK: str      s23, [x23], #1
+# CHECK: str      s25, [x0], #-256
+# CHECK: str      d20, [x20], #255
+# CHECK: str      d23, [x23], #1
+# CHECK: str      d25, [x0], #-256
+0x0 0xf4 0xf 0x3c
+0x63 0x14 0x0 0x3c
+0xe5 0x7 0x10 0x3c
+0x4a 0xf5 0xf 0x7c
+0xed 0x16 0x0 0x7c
+0xef 0x7 0x10 0x7c
+0x94 0xf6 0xf 0xbc
+0xf7 0x16 0x0 0xbc
+0x19 0x4 0x10 0xbc
+0x94 0xf6 0xf 0xfc
+0xf7 0x16 0x0 0xfc
+0x19 0x4 0x10 0xfc
+
+# CHECK: ldr      b0, [x0], #255
+# CHECK: ldr      b3, [x3], #1
+# CHECK: ldr      b5, [sp], #-256
+# CHECK: ldr      h10, [x10], #255
+# CHECK: ldr      h13, [x23], #1
+# CHECK: ldr      h15, [sp], #-256
+# CHECK: ldr      s20, [x20], #255
+# CHECK: ldr      s23, [x23], #1
+# CHECK: ldr      s25, [x0], #-256
+# CHECK: ldr      d20, [x20], #255
+# CHECK: ldr      d23, [x23], #1
+# CHECK: ldr      d25, [x0], #-256
+0x0 0xf4 0x4f 0x3c
+0x63 0x14 0x40 0x3c
+0xe5 0x7 0x50 0x3c
+0x4a 0xf5 0x4f 0x7c
+0xed 0x16 0x40 0x7c
+0xef 0x7 0x50 0x7c
+0x94 0xf6 0x4f 0xbc
+0xf7 0x16 0x40 0xbc
+0x19 0x4 0x50 0xbc
+0x94 0xf6 0x4f 0xfc
+0xf7 0x16 0x40 0xfc
+0x19 0x4 0x50 0xfc
+0x34 0xf4 0xcf 0x3c
+
+# CHECK: ldr      q20, [x1], #255
+# CHECK: ldr      q23, [x9], #1
+# CHECK: ldr      q25, [x20], #-256
+# CHECK: str      q10, [x1], #255
+# CHECK: str      q22, [sp], #1
+# CHECK: str      q21, [x20], #-256
+0x37 0x15 0xc0 0x3c
+0x99 0x6 0xd0 0x3c
+0x2a 0xf4 0x8f 0x3c
+0xf6 0x17 0x80 0x3c
+0x95 0x6 0x90 0x3c
+
+#-------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#-------------------------------------------------------------------------------
+
+# E.g. "str xzr, [sp, #4]!" is *not* unpredictable
+# CHECK-NOT: warning: potentially undefined instruction encoding
+0xff 0xf 0x40 0xf8
+
+# CHECK: ldr      x3, [x4, #0]!
+0x83 0xc 0x40 0xf8
+
+# CHECK: strb     w9, [x2, #255]!
+# CHECK: strb     w10, [x3, #1]!
+# CHECK: strb     w10, [x3, #-256]!
+# CHECK: strh     w9, [x2, #255]!
+# CHECK: strh     w9, [x2, #1]!
+# CHECK: strh     w10, [x3, #-256]!
+0x49 0xfc 0xf 0x38
+0x6a 0x1c 0x0 0x38
+0x6a 0xc 0x10 0x38
+0x49 0xfc 0xf 0x78
+0x49 0x1c 0x0 0x78
+0x6a 0xc 0x10 0x78
+
+# CHECK: str      w19, [sp, #255]!
+# CHECK: str      w20, [x30, #1]!
+# CHECK: str      w21, [x12, #-256]!
+# CHECK: str      xzr, [x9, #255]!
+# CHECK: str      x2, [x3, #1]!
+# CHECK: str      x19, [x12, #-256]!
+0xf3 0xff 0xf 0xb8
+0xd4 0x1f 0x0 0xb8
+0x95 0xd 0x10 0xb8
+0x3f 0xfd 0xf 0xf8
+0x62 0x1c 0x0 0xf8
+0x93 0xd 0x10 0xf8
+
+# CHECK: ldrb     w9, [x2, #255]!
+# CHECK: ldrb     w10, [x3, #1]!
+# CHECK: ldrb     w10, [x3, #-256]!
+# CHECK: ldrh     w9, [x2, #255]!
+# CHECK: ldrh     w9, [x2, #1]!
+# CHECK: ldrh     w10, [x3, #-256]!
+0x49 0xfc 0x4f 0x38
+0x6a 0x1c 0x40 0x38
+0x6a 0xc 0x50 0x38
+0x49 0xfc 0x4f 0x78
+0x49 0x1c 0x40 0x78
+0x6a 0xc 0x50 0x78
+
+# CHECK: ldr      w19, [sp, #255]!
+# CHECK: ldr      w20, [x30, #1]!
+# CHECK: ldr      w21, [x12, #-256]!
+# CHECK: ldr      xzr, [x9, #255]!
+# CHECK: ldr      x2, [x3, #1]!
+# CHECK: ldr      x19, [x12, #-256]!
+0xf3 0xff 0x4f 0xb8
+0xd4 0x1f 0x40 0xb8
+0x95 0xd 0x50 0xb8
+0x3f 0xfd 0x4f 0xf8
+0x62 0x1c 0x40 0xf8
+0x93 0xd 0x50 0xf8
+
+# CHECK: ldrsb    xzr, [x9, #255]!
+# CHECK: ldrsb    x2, [x3, #1]!
+# CHECK: ldrsb    x19, [x12, #-256]!
+# CHECK: ldrsh    xzr, [x9, #255]!
+# CHECK: ldrsh    x2, [x3, #1]!
+# CHECK: ldrsh    x19, [x12, #-256]!
+# CHECK: ldrsw    xzr, [x9, #255]!
+# CHECK: ldrsw    x2, [x3, #1]!
+# CHECK: ldrsw    x19, [x12, #-256]!
+0x3f 0xfd 0x8f 0x38
+0x62 0x1c 0x80 0x38
+0x93 0xd 0x90 0x38
+0x3f 0xfd 0x8f 0x78
+0x62 0x1c 0x80 0x78
+0x93 0xd 0x90 0x78
+0x3f 0xfd 0x8f 0xb8
+0x62 0x1c 0x80 0xb8
+0x93 0xd 0x90 0xb8
+
+# CHECK: ldrsb    wzr, [x9, #255]!
+# CHECK: ldrsb    w2, [x3, #1]!
+# CHECK: ldrsb    w19, [x12, #-256]!
+# CHECK: ldrsh    wzr, [x9, #255]!
+# CHECK: ldrsh    w2, [x3, #1]!
+# CHECK: ldrsh    w19, [x12, #-256]!
+0x3f 0xfd 0xcf 0x38
+0x62 0x1c 0xc0 0x38
+0x93 0xd 0xd0 0x38
+0x3f 0xfd 0xcf 0x78
+0x62 0x1c 0xc0 0x78
+0x93 0xd 0xd0 0x78
+
+# CHECK: str      b0, [x0, #255]!
+# CHECK: str      b3, [x3, #1]!
+# CHECK: str      b5, [sp, #-256]!
+# CHECK: str      h10, [x10, #255]!
+# CHECK: str      h13, [x23, #1]!
+# CHECK: str      h15, [sp, #-256]!
+# CHECK: str      s20, [x20, #255]!
+# CHECK: str      s23, [x23, #1]!
+# CHECK: str      s25, [x0, #-256]!
+# CHECK: str      d20, [x20, #255]!
+# CHECK: str      d23, [x23, #1]!
+# CHECK: str      d25, [x0, #-256]!
+0x0 0xfc 0xf 0x3c
+0x63 0x1c 0x0 0x3c
+0xe5 0xf 0x10 0x3c
+0x4a 0xfd 0xf 0x7c
+0xed 0x1e 0x0 0x7c
+0xef 0xf 0x10 0x7c
+0x94 0xfe 0xf 0xbc
+0xf7 0x1e 0x0 0xbc
+0x19 0xc 0x10 0xbc
+0x94 0xfe 0xf 0xfc
+0xf7 0x1e 0x0 0xfc
+0x19 0xc 0x10 0xfc
+
+# CHECK: ldr      b0, [x0, #255]!
+# CHECK: ldr      b3, [x3, #1]!
+# CHECK: ldr      b5, [sp, #-256]!
+# CHECK: ldr      h10, [x10, #255]!
+# CHECK: ldr      h13, [x23, #1]!
+# CHECK: ldr      h15, [sp, #-256]!
+# CHECK: ldr      s20, [x20, #255]!
+# CHECK: ldr      s23, [x23, #1]!
+# CHECK: ldr      s25, [x0, #-256]!
+# CHECK: ldr      d20, [x20, #255]!
+# CHECK: ldr      d23, [x23, #1]!
+# CHECK: ldr      d25, [x0, #-256]!
+0x0 0xfc 0x4f 0x3c
+0x63 0x1c 0x40 0x3c
+0xe5 0xf 0x50 0x3c
+0x4a 0xfd 0x4f 0x7c
+0xed 0x1e 0x40 0x7c
+0xef 0xf 0x50 0x7c
+0x94 0xfe 0x4f 0xbc
+0xf7 0x1e 0x40 0xbc
+0x19 0xc 0x50 0xbc
+0x94 0xfe 0x4f 0xfc
+0xf7 0x1e 0x40 0xfc
+0x19 0xc 0x50 0xfc
+
+# CHECK: ldr      q20, [x1, #255]!
+# CHECK: ldr      q23, [x9, #1]!
+# CHECK: ldr      q25, [x20, #-256]!
+# CHECK: str      q10, [x1, #255]!
+# CHECK: str      q22, [sp, #1]!
+# CHECK: str      q21, [x20, #-256]!
+0x34 0xfc 0xcf 0x3c
+0x37 0x1d 0xc0 0x3c
+0x99 0xe 0xd0 0x3c
+0x2a 0xfc 0x8f 0x3c
+0xf6 0x1f 0x80 0x3c
+0x95 0xe 0x90 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store (unprivileged)
+#------------------------------------------------------------------------------
+
+# CHECK: sttrb    w9, [sp]
+# CHECK: sttrh    wzr, [x12, #255]
+# CHECK: sttr     w16, [x0, #-256]
+# CHECK: sttr     x28, [x14, #1]
+0xe9 0x0b 0x0 0x38
+0x9f 0xf9 0xf 0x78
+0x10 0x08 0x10 0xb8
+0xdc 0x19 0x0 0xf8
+
+# CHECK: ldtrb    w1, [x20, #255]
+# CHECK: ldtrh    w20, [x1, #255]
+# CHECK: ldtr     w12, [sp, #255]
+# CHECK: ldtr     xzr, [x12, #255]
+0x81 0xfa 0x4f 0x38
+0x34 0xf8 0x4f 0x78
+0xec 0xfb 0x4f 0xb8
+0x9f 0xf9 0x4f 0xf8
+
+# CHECK: ldtrsb   x9, [x7, #-256]
+# CHECK: ldtrsh   x17, [x19, #-256]
+# CHECK: ldtrsw   x20, [x15, #-256]
+# CHECK: ldtrsb   w19, [x1, #-256]
+# CHECK: ldtrsh   w15, [x21, #-256]
+0xe9 0x08 0x90 0x38
+0x71 0x0a 0x90 0x78
+0xf4 0x09 0x90 0xb8
+0x33 0x08 0xd0 0x38
+0xaf 0x0a 0xd0 0x78
+
+#------------------------------------------------------------------------------
+# Load/store (unsigned  immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: ldr      x0, [x0]
+# CHECK: ldr      x4, [x29]
+# CHECK: ldr      x30, [x12, #32760]
+# CHECK: ldr      x20, [sp, #8]
+0x0 0x0 0x40 0xf9
+0xa4 0x3 0x40 0xf9
+0x9e 0xfd 0x7f 0xf9
+0xf4 0x7 0x40 0xf9
+
+# CHECK: ldr      xzr, [sp]
+0xff 0x3 0x40 0xf9
+
+# CHECK: ldr      w2, [sp]
+# CHECK: ldr      w17, [sp, #16380]
+# CHECK: ldr      w13, [x2, #4]
+0xe2 0x3 0x40 0xb9
+0xf1 0xff 0x7f 0xb9
+0x4d 0x4 0x40 0xb9
+
+# CHECK: ldrsw    x2, [x5, #4]
+# CHECK: ldrsw    x23, [sp, #16380]
+0xa2 0x4 0x80 0xb9
+0xf7 0xff 0xbf 0xb9
+
+# CHECK: ldrh     w2, [x4]
+# CHECK: ldrsh    w23, [x6, #8190]
+# CHECK: ldrsh    wzr, [sp, #2]
+# CHECK: ldrsh    x29, [x2, #2]
+0x82 0x0 0x40 0x79
+0xd7 0xfc 0xff 0x79
+0xff 0x7 0xc0 0x79
+0x5d 0x4 0x80 0x79
+
+# CHECK: ldrb     w26, [x3, #121]
+# CHECK: ldrb     w12, [x2]
+# CHECK: ldrsb    w27, [sp, #4095]
+# CHECK: ldrsb    xzr, [x15]
+0x7a 0xe4 0x41 0x39
+0x4c 0x0 0x40 0x39
+0xfb 0xff 0xff 0x39
+0xff 0x1 0x80 0x39
+
+# CHECK: str      x30, [sp]
+# CHECK: str      w20, [x4, #16380]
+# CHECK: strh     w20, [x10, #14]
+# CHECK: strh     w17, [sp, #8190]
+# CHECK: strb     w23, [x3, #4095]
+# CHECK: strb     wzr, [x2]
+0xfe 0x3 0x0 0xf9
+0x94 0xfc 0x3f 0xb9
+0x54 0x1d 0x0 0x79
+0xf1 0xff 0x3f 0x79
+0x77 0xfc 0x3f 0x39
+0x5f 0x0 0x0 0x39
+
+# CHECK: ldr      b31, [sp, #4095]
+# CHECK: ldr      h20, [x2, #8190]
+# CHECK: ldr      s10, [x19, #16380]
+# CHECK: ldr      d3, [x10, #32760]
+# CHECK: str      q12, [sp, #65520]
+0xff 0xff 0x7f 0x3d
+0x54 0xfc 0x7f 0x7d
+0x6a 0xfe 0x7f 0xbd
+0x43 0xfd 0x7f 0xfd
+0xec 0xff 0xbf 0x3d
+
+# CHECK: prfm    pldl1keep, [sp, #8]
+# CHECK: prfm    pldl1strm, [x3, #0]
+# CHECK: prfm    pldl2keep, [x5, #16]
+# CHECK: prfm    pldl2strm, [x2, #0]
+# CHECK: prfm    pldl3keep, [x5, #0]
+# CHECK: prfm    pldl3strm, [x6, #0]
+# CHECK: prfm    plil1keep, [sp, #8]
+# CHECK: prfm    plil1strm, [x3, #0]
+# CHECK: prfm    plil2keep, [x5, #16]
+# CHECK: prfm    plil2strm, [x2, #0]
+# CHECK: prfm    plil3keep, [x5, #0]
+# CHECK: prfm    plil3strm, [x6, #0]
+# CHECK: prfm    pstl1keep, [sp, #8]
+# CHECK: prfm    pstl1strm, [x3, #0]
+# CHECK: prfm    pstl2keep, [x5, #16]
+# CHECK: prfm    pstl2strm, [x2, #0]
+# CHECK: prfm    pstl3keep, [x5, #0]
+# CHECK: prfm    pstl3strm, [x6, #0]
+0xe0 0x07 0x80 0xf9
+0x61 0x00 0x80 0xf9
+0xa2 0x08 0x80 0xf9
+0x43 0x00 0x80 0xf9
+0xa4 0x00 0x80 0xf9
+0xc5 0x00 0x80 0xf9
+0xe8 0x07 0x80 0xf9
+0x69 0x00 0x80 0xf9
+0xaa 0x08 0x80 0xf9
+0x4b 0x00 0x80 0xf9
+0xac 0x00 0x80 0xf9
+0xcd 0x00 0x80 0xf9
+0xf0 0x07 0x80 0xf9
+0x71 0x00 0x80 0xf9
+0xb2 0x08 0x80 0xf9
+0x53 0x00 0x80 0xf9
+0xb4 0x00 0x80 0xf9
+0xd5 0x00 0x80 0xf9
+
+
+#------------------------------------------------------------------------------
+# Load/store (register offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldrb     w3, [sp, x5]
+# CHECK: ldrb     w9, [x27, x6]
+# CHECK: ldrsb    w10, [x30, x7]
+# CHECK: ldrb     w11, [x29, x3, sxtx]
+# CHECK: strb     w12, [x28, xzr, sxtx]
+# CHECK: ldrb     w14, [x26, w6, uxtw]
+# CHECK: ldrsb    w15, [x25, w7, uxtw]
+# CHECK: ldrb     w17, [x23, w9, sxtw]
+# CHECK: ldrsb    x18, [x22, w10, sxtw]
+0xe3 0x6b 0x65 0x38
+0x69 0x6b 0x66 0x38
+0xca 0x6b 0xe7 0x38
+0xab 0xeb 0x63 0x38
+0x8c 0xeb 0x3f 0x38
+0x4e 0x4b 0x66 0x38
+0x2f 0x4b 0xe7 0x38
+0xf1 0xca 0x69 0x38
+0xd2 0xca 0xaa 0x38
+
+# CHECK: ldrsh    w3, [sp, x5]
+# CHECK: ldrsh    w9, [x27, x6]
+# CHECK: ldrh     w10, [x30, x7, lsl #1]
+# CHECK: strh     w11, [x29, x3, sxtx]
+# CHECK: ldrh     w12, [x28, xzr, sxtx]
+# CHECK: ldrsh    x13, [x27, x5, sxtx #1]
+# CHECK: ldrh     w14, [x26, w6, uxtw]
+# CHECK: ldrh     w15, [x25, w7, uxtw]
+# CHECK: ldrsh    w16, [x24, w8, uxtw #1]
+# CHECK: ldrh     w17, [x23, w9, sxtw]
+# CHECK: ldrh     w18, [x22, w10, sxtw]
+# CHECK: strh     w19, [x21, wzr, sxtw #1]
+0xe3 0x6b 0xe5 0x78
+0x69 0x6b 0xe6 0x78
+0xca 0x7b 0x67 0x78
+0xab 0xeb 0x23 0x78
+0x8c 0xeb 0x7f 0x78
+0x6d 0xfb 0xa5 0x78
+0x4e 0x4b 0x66 0x78
+0x2f 0x4b 0x67 0x78
+0x10 0x5b 0xe8 0x78
+0xf1 0xca 0x69 0x78
+0xd2 0xca 0x6a 0x78
+0xb3 0xda 0x3f 0x78
+
+# CHECK: ldr      w3, [sp, x5]
+# CHECK: ldr      s9, [x27, x6]
+# CHECK: ldr      w10, [x30, x7, lsl #2]
+# CHECK: ldr      w11, [x29, x3, sxtx]
+# CHECK: str      s12, [x28, xzr, sxtx]
+# CHECK: str      w13, [x27, x5, sxtx #2]
+# CHECK: str      w14, [x26, w6, uxtw]
+# CHECK: ldr      w15, [x25, w7, uxtw]
+# CHECK: ldr      w16, [x24, w8, uxtw #2]
+# CHECK: ldrsw    x17, [x23, w9, sxtw]
+# CHECK: ldr      w18, [x22, w10, sxtw]
+# CHECK: ldrsw    x19, [x21, wzr, sxtw #2]
+0xe3 0x6b 0x65 0xb8
+0x69 0x6b 0x66 0xbc
+0xca 0x7b 0x67 0xb8
+0xab 0xeb 0x63 0xb8
+0x8c 0xeb 0x3f 0xbc
+0x6d 0xfb 0x25 0xb8
+0x4e 0x4b 0x26 0xb8
+0x2f 0x4b 0x67 0xb8
+0x10 0x5b 0x68 0xb8
+0xf1 0xca 0xa9 0xb8
+0xd2 0xca 0x6a 0xb8
+0xb3 0xda 0xbf 0xb8
+
+# CHECK: ldr      x3, [sp, x5]
+# CHECK: str      x9, [x27, x6]
+# CHECK: ldr      d10, [x30, x7, lsl #3]
+# CHECK: str      x11, [x29, x3, sxtx]
+# CHECK: ldr      x12, [x28, xzr, sxtx]
+# CHECK: ldr      x13, [x27, x5, sxtx #3]
+# CHECK: prfm     pldl1keep, [x26, w6, uxtw]
+# CHECK: ldr      x15, [x25, w7, uxtw]
+# CHECK: ldr      x16, [x24, w8, uxtw #3]
+# CHECK: ldr      x17, [x23, w9, sxtw]
+# CHECK: ldr      x18, [x22, w10, sxtw]
+# CHECK: str      d19, [x21, wzr, sxtw #3]
+0xe3 0x6b 0x65 0xf8
+0x69 0x6b 0x26 0xf8
+0xca 0x7b 0x67 0xfc
+0xab 0xeb 0x23 0xf8
+0x8c 0xeb 0x7f 0xf8
+0x6d 0xfb 0x65 0xf8
+0x40 0x4b 0xa6 0xf8
+0x2f 0x4b 0x67 0xf8
+0x10 0x5b 0x68 0xf8
+0xf1 0xca 0x69 0xf8
+0xd2 0xca 0x6a 0xf8
+0xb3 0xda 0x3f 0xfc
+
+# CHECK: ldr      q3, [sp, x5]
+# CHECK: ldr      q9, [x27, x6]
+# CHECK: ldr      q10, [x30, x7, lsl #4]
+# CHECK: str      q11, [x29, x3, sxtx]
+# CHECK: str      q12, [x28, xzr, sxtx]
+# CHECK: str      q13, [x27, x5, sxtx #4]
+# CHECK: ldr      q14, [x26, w6, uxtw]
+# CHECK: ldr      q15, [x25, w7, uxtw]
+# CHECK: ldr      q16, [x24, w8, uxtw #4]
+# CHECK: ldr      q17, [x23, w9, sxtw]
+# CHECK: str      q18, [x22, w10, sxtw]
+# CHECK: ldr      q19, [x21, wzr, sxtw #4]
+0xe3 0x6b 0xe5 0x3c
+0x69 0x6b 0xe6 0x3c
+0xca 0x7b 0xe7 0x3c
+0xab 0xeb 0xa3 0x3c
+0x8c 0xeb 0xbf 0x3c
+0x6d 0xfb 0xa5 0x3c
+0x4e 0x4b 0xe6 0x3c
+0x2f 0x4b 0xe7 0x3c
+0x10 0x5b 0xe8 0x3c
+0xf1 0xca 0xe9 0x3c
+0xd2 0xca 0xaa 0x3c
+0xb3 0xda 0xff 0x3c
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp]
+# CHECK: stp      wzr, w9, [sp, #252]
+# CHECK: ldp      w2, wzr, [sp, #-256]
+# CHECK: ldp      w9, w10, [sp, #4]
+0xe3 0x17 0x40 0x29
+0xff 0xa7 0x1f 0x29
+0xe2 0x7f 0x60 0x29
+0xe9 0xab 0x40 0x29
+
+# CHECK: ldpsw    x9, x10, [sp, #4]
+# CHECK: ldpsw    x9, x10, [x2, #-256]
+# CHECK: ldpsw    x20, x30, [sp, #252]
+0xe9 0xab 0x40 0x69
+0x49 0x28 0x60 0x69
+0xf4 0xfb 0x5f 0x69
+
+# CHECK: ldp      x21, x29, [x2, #504]
+# CHECK: ldp      x22, x23, [x3, #-512]
+# CHECK: ldp      x24, x25, [x4, #8]
+0x55 0xf4 0x5f 0xa9
+0x76 0x5c 0x60 0xa9
+0x98 0xe4 0x40 0xa9
+
+# CHECK: ldp      s29, s28, [sp, #252]
+# CHECK: stp      s27, s26, [sp, #-256]
+# CHECK: ldp      s1, s2, [x3, #44]
+0xfd 0xf3 0x5f 0x2d
+0xfb 0x6b 0x20 0x2d
+0x61 0x88 0x45 0x2d
+
+# CHECK: stp      d3, d5, [x9, #504]
+# CHECK: stp      d7, d11, [x10, #-512]
+# CHECK: ldp      d2, d3, [x30, #-8]
+0x23 0x95 0x1f 0x6d
+0x47 0x2d 0x20 0x6d
+0xc2 0x8f 0x7f 0x6d
+
+# CHECK: stp      q3, q5, [sp]
+# CHECK: stp      q17, q19, [sp, #1008]
+# CHECK: ldp      q23, q29, [x1, #-1024]
+0xe3 0x17 0x0 0xad
+0xf1 0xcf 0x1f 0xad
+0x37 0x74 0x60 0xad
+
+#------------------------------------------------------------------------------
+# Load/store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp], #0
+# CHECK: stp      wzr, w9, [sp], #252
+# CHECK: ldp      w2, wzr, [sp], #-256
+# CHECK: ldp      w9, w10, [sp], #4
+0xe3 0x17 0xc0 0x28
+0xff 0xa7 0x9f 0x28
+0xe2 0x7f 0xe0 0x28
+0xe9 0xab 0xc0 0x28
+
+# CHECK: ldpsw    x9, x10, [sp], #4
+# CHECK: ldpsw    x9, x10, [x2], #-256
+# CHECK: ldpsw    x20, x30, [sp], #252
+0xe9 0xab 0xc0 0x68
+0x49 0x28 0xe0 0x68
+0xf4 0xfb 0xdf 0x68
+
+# CHECK: ldp      x21, x29, [x2], #504
+# CHECK: ldp      x22, x23, [x3], #-512
+# CHECK: ldp      x24, x25, [x4], #8
+0x55 0xf4 0xdf 0xa8
+0x76 0x5c 0xe0 0xa8
+0x98 0xe4 0xc0 0xa8
+
+# CHECK: ldp      s29, s28, [sp], #252
+# CHECK: stp      s27, s26, [sp], #-256
+# CHECK: ldp      s1, s2, [x3], #44
+0xfd 0xf3 0xdf 0x2c
+0xfb 0x6b 0xa0 0x2c
+0x61 0x88 0xc5 0x2c
+
+# CHECK: stp      d3, d5, [x9], #504
+# CHECK: stp      d7, d11, [x10], #-512
+# CHECK: ldp      d2, d3, [x30], #-8
+0x23 0x95 0x9f 0x6c
+0x47 0x2d 0xa0 0x6c
+0xc2 0x8f 0xff 0x6c
+
+# CHECK: stp      q3, q5, [sp], #0
+# CHECK: stp      q17, q19, [sp], #1008
+# CHECK: ldp      q23, q29, [x1], #-1024
+0xe3 0x17 0x80 0xac
+0xf1 0xcf 0x9f 0xac
+0x37 0x74 0xe0 0xac
+
+#------------------------------------------------------------------------------
+# Load/store register pair (pre-indexed)
+#------------------------------------------------------------------------------
+
+# CHECK: ldp      w3, w5, [sp, #0]!
+# CHECK: stp      wzr, w9, [sp, #252]!
+# CHECK: ldp      w2, wzr, [sp, #-256]!
+# CHECK: ldp      w9, w10, [sp, #4]!
+0xe3 0x17 0xc0 0x29
+0xff 0xa7 0x9f 0x29
+0xe2 0x7f 0xe0 0x29
+0xe9 0xab 0xc0 0x29
+
+# CHECK: ldpsw    x9, x10, [sp, #4]!
+# CHECK: ldpsw    x9, x10, [x2, #-256]!
+# CHECK: ldpsw    x20, x30, [sp, #252]!
+0xe9 0xab 0xc0 0x69
+0x49 0x28 0xe0 0x69
+0xf4 0xfb 0xdf 0x69
+
+# CHECK: ldp      x21, x29, [x2, #504]!
+# CHECK: ldp      x22, x23, [x3, #-512]!
+# CHECK: ldp      x24, x25, [x4, #8]!
+0x55 0xf4 0xdf 0xa9
+0x76 0x5c 0xe0 0xa9
+0x98 0xe4 0xc0 0xa9
+
+# CHECK: ldp      s29, s28, [sp, #252]!
+# CHECK: stp      s27, s26, [sp, #-256]!
+# CHECK: ldp      s1, s2, [x3, #44]!
+0xfd 0xf3 0xdf 0x2d
+0xfb 0x6b 0xa0 0x2d
+0x61 0x88 0xc5 0x2d
+
+# CHECK: stp      d3, d5, [x9, #504]!
+# CHECK: stp      d7, d11, [x10, #-512]!
+# CHECK: ldp      d2, d3, [x30, #-8]!
+0x23 0x95 0x9f 0x6d
+0x47 0x2d 0xa0 0x6d
+0xc2 0x8f 0xff 0x6d
+
+# CHECK: stp      q3, q5, [sp, #0]!
+# CHECK: stp      q17, q19, [sp, #1008]!
+# CHECK: ldp      q23, q29, [x1, #-1024]!
+0xe3 0x17 0x80 0xad
+0xf1 0xcf 0x9f 0xad
+0x37 0x74 0xe0 0xad
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+# CHECK: ldnp      w3, w5, [sp]
+# CHECK: stnp      wzr, w9, [sp, #252]
+# CHECK: ldnp      w2, wzr, [sp, #-256]
+# CHECK: ldnp      w9, w10, [sp, #4]
+0xe3 0x17 0x40 0x28
+0xff 0xa7 0x1f 0x28
+0xe2 0x7f 0x60 0x28
+0xe9 0xab 0x40 0x28
+
+# CHECK: ldnp      x21, x29, [x2, #504]
+# CHECK: ldnp      x22, x23, [x3, #-512]
+# CHECK: ldnp      x24, x25, [x4, #8]
+0x55 0xf4 0x5f 0xa8
+0x76 0x5c 0x60 0xa8
+0x98 0xe4 0x40 0xa8
+
+# CHECK: ldnp      s29, s28, [sp, #252]
+# CHECK: stnp      s27, s26, [sp, #-256]
+# CHECK: ldnp      s1, s2, [x3, #44]
+0xfd 0xf3 0x5f 0x2c
+0xfb 0x6b 0x20 0x2c
+0x61 0x88 0x45 0x2c
+
+# CHECK: stnp      d3, d5, [x9, #504]
+# CHECK: stnp      d7, d11, [x10, #-512]
+# CHECK: ldnp      d2, d3, [x30, #-8]
+0x23 0x95 0x1f 0x6c
+0x47 0x2d 0x20 0x6c
+0xc2 0x8f 0x7f 0x6c
+
+# CHECK: stnp      q3, q5, [sp]
+# CHECK: stnp      q17, q19, [sp, #1008]
+# CHECK: ldnp      q23, q29, [x1, #-1024]
+0xe3 0x17 0x0 0xac
+0xf1 0xcf 0x1f 0xac
+0x37 0x74 0x60 0xac
+
+#------------------------------------------------------------------------------
+# Logical (immediate)
+#------------------------------------------------------------------------------
+# CHECK: orr      w3, w9, #0xffff0000
+# CHECK: orr      wsp, w10, #0xe00000ff
+# CHECK: orr      w9, w10, #0x3ff
+0x23 0x3d 0x10 0x32
+0x5f 0x29 0x3 0x32
+0x49 0x25 0x0 0x32
+
+# CHECK: and      w14, w15, #0x80008000
+# CHECK: and      w12, w13, #0xffc3ffc3
+# CHECK: and      w11, wzr, #0x30003
+0xee 0x81 0x1 0x12
+0xac 0xad 0xa 0x12
+0xeb 0x87 0x0 0x12
+
+# CHECK: eor      w3, w6, #0xe0e0e0e0
+# CHECK: eor      wsp, wzr, #0x3030303
+# CHECK: eor      w16, w17, #0x81818181
+0xc3 0xc8 0x3 0x52
+0xff 0xc7 0x0 0x52
+0x30 0xc6 0x1 0x52
+
+# CHECK: ands     wzr, w18, #0xcccccccc
+# CHECK: ands     w19, w20, #0x33333333
+# CHECK: ands     w21, w22, #0x99999999
+0x5f 0xe6 0x2 0x72
+0x93 0xe6 0x0 0x72
+0xd5 0xe6 0x1 0x72
+
+# CHECK: ands     wzr, w3, #0xaaaaaaaa
+# CHECK: ands     wzr, wzr, #0x55555555
+0x7f 0xf0 0x1 0x72
+0xff 0xf3 0x0 0x72
+
+# CHECK: eor      x3, x5, #0xffffffffc000000
+# CHECK: and      x9, x10, #0x7fffffffffff
+# CHECK: orr      x11, x12, #0x8000000000000fff
+0xa3 0x84 0x66 0xd2
+0x49 0xb9 0x40 0x92
+0x8b 0x31 0x41 0xb2
+
+# CHECK: orr      x3, x9, #0xffff0000ffff0000
+# CHECK: orr      sp, x10, #0xe00000ffe00000ff
+# CHECK: orr      x9, x10, #0x3ff000003ff
+0x23 0x3d 0x10 0xb2
+0x5f 0x29 0x3 0xb2
+0x49 0x25 0x0 0xb2
+
+# CHECK: and      x14, x15, #0x8000800080008000
+# CHECK: and      x12, x13, #0xffc3ffc3ffc3ffc3
+# CHECK: and      x11, xzr, #0x3000300030003
+0xee 0x81 0x1 0x92
+0xac 0xad 0xa 0x92
+0xeb 0x87 0x0 0x92
+
+# CHECK: eor      x3, x6, #0xe0e0e0e0e0e0e0e0
+# CHECK: eor      sp, xzr, #0x303030303030303
+# CHECK: eor      x16, x17, #0x8181818181818181
+0xc3 0xc8 0x3 0xd2
+0xff 0xc7 0x0 0xd2
+0x30 0xc6 0x1 0xd2
+
+# CHECK: ands     xzr, x18, #0xcccccccccccccccc
+# CHECK: ands     x19, x20, #0x3333333333333333
+# CHECK: ands     x21, x22, #0x9999999999999999
+0x5f 0xe6 0x2 0xf2
+0x93 0xe6 0x0 0xf2
+0xd5 0xe6 0x1 0xf2
+
+# CHECK: ands     xzr, x3, #0xaaaaaaaaaaaaaaaa
+# CHECK: ands     xzr, xzr, #0x5555555555555555
+0x7f 0xf0 0x1 0xf2
+0xff 0xf3 0x0 0xf2
+
+# CHECK: orr      w3, wzr, #0xf000f
+# CHECK: orr      x10, xzr, #0xaaaaaaaaaaaaaaaa
+0xe3 0x8f 0x0 0x32
+0xea 0xf3 0x1 0xb2
+
+# CHECK: orr      w3, wzr, #0xffff
+# CHECK: orr      x9, xzr, #0xffff00000000
+0xe3 0x3f 0x0 0x32
+0xe9 0x3f 0x60 0xb2
+
+#------------------------------------------------------------------------------
+# Logical (shifted register)
+#------------------------------------------------------------------------------
+
+# CHECK: and      w12, w23, w21
+# CHECK: and      w16, w15, w1, lsl #1
+# CHECK: and      w9, w4, w10, lsl #31
+# CHECK: and      w3, w30, w11
+# CHECK: and      x3, x5, x7, lsl #63
+0xec 0x2 0x15 0xa
+0xf0 0x5 0x1 0xa
+0x89 0x7c 0xa 0xa
+0xc3 0x3 0xb 0xa
+0xa3 0xfc 0x7 0x8a
+
+# CHECK: and      x5, x14, x19, asr #4
+# CHECK: and      w3, w17, w19, ror #31
+# CHECK: and      w0, w2, wzr, lsr #17
+# CHECK: and      w3, w30, w11, asr
+0xc5 0x11 0x93 0x8a
+0x23 0x7e 0xd3 0xa
+0x40 0x44 0x5f 0xa
+0xc3 0x3 0x8b 0xa
+
+# CHECK: and      xzr, x4, x26
+# CHECK: and      w3, wzr, w20, ror
+# CHECK: and      x7, x20, xzr, asr #63
+0x9f 0x0 0x1a 0x8a
+0xe3 0x3 0xd4 0xa
+0x87 0xfe 0x9f 0x8a
+
+# CHECK: bic      x13, x20, x14, lsl #47
+# CHECK: bic      w2, w7, w9
+# CHECK: orr      w2, w7, w0, asr #31
+# CHECK: orr      x8, x9, x10, lsl #12
+# CHECK: orn      x3, x5, x7, asr
+# CHECK: orn      w2, w5, w29
+0x8d 0xbe 0x2e 0x8a
+0xe2 0x0 0x29 0xa
+0xe2 0x7c 0x80 0x2a
+0x28 0x31 0xa 0xaa
+0xa3 0x0 0xa7 0xaa
+0xa2 0x0 0x3d 0x2a
+
+# CHECK: ands     w7, wzr, w9, lsl #1
+# CHECK: ands     x3, x5, x20, ror #63
+# CHECK: bics     w3, w5, w7
+# CHECK: bics     x3, xzr, x3, lsl #1
+# CHECK: tst      w3, w7, lsl #31
+# CHECK: tst      x2, x20, asr
+0xe7 0x7 0x9 0x6a
+0xa3 0xfc 0xd4 0xea
+0xa3 0x0 0x27 0x6a
+0xe3 0x7 0x23 0xea
+0x7f 0x7c 0x7 0x6a
+0x5f 0x0 0x94 0xea
+
+# CHECK: mov      x3, x6
+# CHECK: mov      x3, xzr
+# CHECK: mov      wzr, w2
+# CHECK: mov      w3, w5
+0xe3 0x3 0x6 0xaa
+0xe3 0x3 0x1f 0xaa
+0xff 0x3 0x2 0x2a
+0xe3 0x3 0x5 0x2a
+
+#------------------------------------------------------------------------------
+# Move wide (immediate)
+#------------------------------------------------------------------------------
+
+# N.b. (FIXME) canonical aliases aren't produced here because of
+# limitation in InstAlias. Lots of the "mov[nz]" instructions should
+# be "mov".
+
+# CHECK: movz     w1, #65535
+# CHECK: movz     w2, #0, lsl #16
+# CHECK: movn     w2, #1234
+0xe1 0xff 0x9f 0x52
+0x2 0x0 0xa0 0x52
+0x42 0x9a 0x80 0x12
+
+# CHECK: movz     x2, #1234, lsl #32
+# CHECK: movk     xzr, #4321, lsl #48
+0x42 0x9a 0xc0 0xd2
+0x3f 0x1c 0xe2 0xf2
+
+# CHECK: movz     x2, #0
+# CHECK: movk     w3, #0
+# CHECK: movz     x4, #0, lsl #16
+# CHECK: movk     w5, #0, lsl #16
+# CHECK: movz     x6, #0, lsl #32
+# CHECK: movk     x7, #0, lsl #32
+# CHECK: movz     x8, #0, lsl #48
+# CHECK: movk     x9, #0, lsl #48
+0x2 0x0 0x80 0xd2
+0x3 0x0 0x80 0x72
+0x4 0x0 0xa0 0xd2
+0x5 0x0 0xa0 0x72
+0x6 0x0 0xc0 0xd2
+0x7 0x0 0xc0 0xf2
+0x8 0x0 0xe0 0xd2
+0x9 0x0 0xe0 0xf2
+
+#------------------------------------------------------------------------------
+# PC-relative addressing
+#------------------------------------------------------------------------------
+
+# It's slightly dodgy using immediates here, but harmless enough when
+# it's all that's available.
+
+# CHECK: adr      x2, #1600
+# CHECK: adrp     x21, #6553600
+# CHECK: adr      x0, #262144
+0x02 0x32 0x00 0x10
+0x15 0x32 0x00 0x90
+0x00 0x00 0x20 0x10
+
+#------------------------------------------------------------------------------
+# System
+#------------------------------------------------------------------------------
+
+# CHECK: nop
+# CHECK: hint     #127
+# CHECK: nop
+# CHECK: yield
+# CHECK: wfe
+# CHECK: wfi
+# CHECK: sev
+# CHECK: sevl
+0x1f 0x20 0x3 0xd5
+0xff 0x2f 0x3 0xd5
+0x1f 0x20 0x3 0xd5
+0x3f 0x20 0x3 0xd5
+0x5f 0x20 0x3 0xd5
+0x7f 0x20 0x3 0xd5
+0x9f 0x20 0x3 0xd5
+0xbf 0x20 0x3 0xd5
+
+# CHECK: clrex
+# CHECK: clrex    #0
+# CHECK: clrex    #7
+# CHECK: clrex
+0x5f 0x3f 0x3 0xd5
+0x5f 0x30 0x3 0xd5
+0x5f 0x37 0x3 0xd5
+0x5f 0x3f 0x3 0xd5
+
+# CHECK: dsb      #0
+# CHECK: dsb      #12
+# CHECK: dsb      sy
+# CHECK: dsb      oshld
+# CHECK: dsb      oshst
+# CHECK: dsb      osh
+# CHECK: dsb      nshld
+# CHECK: dsb      nshst
+# CHECK: dsb      nsh
+# CHECK: dsb      ishld
+# CHECK: dsb      ishst
+# CHECK: dsb      ish
+# CHECK: dsb      ld
+# CHECK: dsb      st
+# CHECK: dsb      sy
+0x9f 0x30 0x3 0xd5
+0x9f 0x3c 0x3 0xd5
+0x9f 0x3f 0x3 0xd5
+0x9f 0x31 0x3 0xd5
+0x9f 0x32 0x3 0xd5
+0x9f 0x33 0x3 0xd5
+0x9f 0x35 0x3 0xd5
+0x9f 0x36 0x3 0xd5
+0x9f 0x37 0x3 0xd5
+0x9f 0x39 0x3 0xd5
+0x9f 0x3a 0x3 0xd5
+0x9f 0x3b 0x3 0xd5
+0x9f 0x3d 0x3 0xd5
+0x9f 0x3e 0x3 0xd5
+0x9f 0x3f 0x3 0xd5
+
+# CHECK: dmb      #0
+# CHECK: dmb      #12
+# CHECK: dmb      sy
+# CHECK: dmb      oshld
+# CHECK: dmb      oshst
+# CHECK: dmb      osh
+# CHECK: dmb      nshld
+# CHECK: dmb      nshst
+# CHECK: dmb      nsh
+# CHECK: dmb      ishld
+# CHECK: dmb      ishst
+# CHECK: dmb      ish
+# CHECK: dmb      ld
+# CHECK: dmb      st
+# CHECK: dmb      sy
+0xbf 0x30 0x3 0xd5
+0xbf 0x3c 0x3 0xd5
+0xbf 0x3f 0x3 0xd5
+0xbf 0x31 0x3 0xd5
+0xbf 0x32 0x3 0xd5
+0xbf 0x33 0x3 0xd5
+0xbf 0x35 0x3 0xd5
+0xbf 0x36 0x3 0xd5
+0xbf 0x37 0x3 0xd5
+0xbf 0x39 0x3 0xd5
+0xbf 0x3a 0x3 0xd5
+0xbf 0x3b 0x3 0xd5
+0xbf 0x3d 0x3 0xd5
+0xbf 0x3e 0x3 0xd5
+0xbf 0x3f 0x3 0xd5
+
+# CHECK: isb
+# CHECK: isb      #12
+0xdf 0x3f 0x3 0xd5
+0xdf 0x3c 0x3 0xd5
+
+# CHECK: msr      spsel, #0
+# CHECK: msr      daifset, #15
+# CHECK: msr      daifclr, #12
+0xbf 0x40 0x0 0xd5
+0xdf 0x4f 0x3 0xd5
+0xff 0x4c 0x3 0xd5
+
+# CHECK: sys      #7, c5, c9, #7, x5
+# CHECK: sys      #0, c15, c15, #2
+# CHECK: sysl     x9, #7, c5, c9, #7
+# CHECK: sysl     x1, #0, c15, c15, #2
+0xe5 0x59 0xf 0xd5
+0x5f 0xff 0x8 0xd5
+0xe9 0x59 0x2f 0xd5
+0x41 0xff 0x28 0xd5
+
+# CHECK: sys     #0, c7, c1, #0, xzr
+# CHECK: sys     #0, c7, c5, #0, xzr
+# CHECK: sys     #3, c7, c5, #1, x9
+0x1f 0x71 0x8 0xd5
+0x1f 0x75 0x8 0xd5
+0x29 0x75 0xb 0xd5
+
+# CHECK: sys     #3, c7, c4, #1, x12
+# CHECK: sys     #0, c7, c6, #1, xzr
+# CHECK: sys     #0, c7, c6, #2, x2
+# CHECK: sys     #3, c7, c10, #1, x9
+# CHECK: sys     #0, c7, c10, #2, x10
+# CHECK: sys     #3, c7, c11, #1, x0
+# CHECK: sys     #3, c7, c14, #1, x3
+# CHECK: sys     #0, c7, c14, #2, x30
+0x2c 0x74 0xb 0xd5
+0x3f 0x76 0x8 0xd5
+0x42 0x76 0x8 0xd5
+0x29 0x7a 0xb 0xd5
+0x4a 0x7a 0x8 0xd5
+0x20 0x7b 0xb 0xd5
+0x23 0x7e 0xb 0xd5
+0x5e 0x7e 0x8 0xd5
+
+
+# CHECK: msr      teecr32_el1, x12
+# CHECK: msr      osdtrrx_el1, x12
+# CHECK: msr      mdccint_el1, x12
+# CHECK: msr      mdscr_el1, x12
+# CHECK: msr      osdtrtx_el1, x12
+# CHECK: msr      dbgdtr_el0, x12
+# CHECK: msr      dbgdtrtx_el0, x12
+# CHECK: msr      oseccr_el1, x12
+# CHECK: msr      dbgvcr32_el2, x12
+# CHECK: msr      dbgbvr0_el1, x12
+# CHECK: msr      dbgbvr1_el1, x12
+# CHECK: msr      dbgbvr2_el1, x12
+# CHECK: msr      dbgbvr3_el1, x12
+# CHECK: msr      dbgbvr4_el1, x12
+# CHECK: msr      dbgbvr5_el1, x12
+# CHECK: msr      dbgbvr6_el1, x12
+# CHECK: msr      dbgbvr7_el1, x12
+# CHECK: msr      dbgbvr8_el1, x12
+# CHECK: msr      dbgbvr9_el1, x12
+# CHECK: msr      dbgbvr10_el1, x12
+# CHECK: msr      dbgbvr11_el1, x12
+# CHECK: msr      dbgbvr12_el1, x12
+# CHECK: msr      dbgbvr13_el1, x12
+# CHECK: msr      dbgbvr14_el1, x12
+# CHECK: msr      dbgbvr15_el1, x12
+# CHECK: msr      dbgbcr0_el1, x12
+# CHECK: msr      dbgbcr1_el1, x12
+# CHECK: msr      dbgbcr2_el1, x12
+# CHECK: msr      dbgbcr3_el1, x12
+# CHECK: msr      dbgbcr4_el1, x12
+# CHECK: msr      dbgbcr5_el1, x12
+# CHECK: msr      dbgbcr6_el1, x12
+# CHECK: msr      dbgbcr7_el1, x12
+# CHECK: msr      dbgbcr8_el1, x12
+# CHECK: msr      dbgbcr9_el1, x12
+# CHECK: msr      dbgbcr10_el1, x12
+# CHECK: msr      dbgbcr11_el1, x12
+# CHECK: msr      dbgbcr12_el1, x12
+# CHECK: msr      dbgbcr13_el1, x12
+# CHECK: msr      dbgbcr14_el1, x12
+# CHECK: msr      dbgbcr15_el1, x12
+# CHECK: msr      dbgwvr0_el1, x12
+# CHECK: msr      dbgwvr1_el1, x12
+# CHECK: msr      dbgwvr2_el1, x12
+# CHECK: msr      dbgwvr3_el1, x12
+# CHECK: msr      dbgwvr4_el1, x12
+# CHECK: msr      dbgwvr5_el1, x12
+# CHECK: msr      dbgwvr6_el1, x12
+# CHECK: msr      dbgwvr7_el1, x12
+# CHECK: msr      dbgwvr8_el1, x12
+# CHECK: msr      dbgwvr9_el1, x12
+# CHECK: msr      dbgwvr10_el1, x12
+# CHECK: msr      dbgwvr11_el1, x12
+# CHECK: msr      dbgwvr12_el1, x12
+# CHECK: msr      dbgwvr13_el1, x12
+# CHECK: msr      dbgwvr14_el1, x12
+# CHECK: msr      dbgwvr15_el1, x12
+# CHECK: msr      dbgwcr0_el1, x12
+# CHECK: msr      dbgwcr1_el1, x12
+# CHECK: msr      dbgwcr2_el1, x12
+# CHECK: msr      dbgwcr3_el1, x12
+# CHECK: msr      dbgwcr4_el1, x12
+# CHECK: msr      dbgwcr5_el1, x12
+# CHECK: msr      dbgwcr6_el1, x12
+# CHECK: msr      dbgwcr7_el1, x12
+# CHECK: msr      dbgwcr8_el1, x12
+# CHECK: msr      dbgwcr9_el1, x12
+# CHECK: msr      dbgwcr10_el1, x12
+# CHECK: msr      dbgwcr11_el1, x12
+# CHECK: msr      dbgwcr12_el1, x12
+# CHECK: msr      dbgwcr13_el1, x12
+# CHECK: msr      dbgwcr14_el1, x12
+# CHECK: msr      dbgwcr15_el1, x12
+# CHECK: msr      teehbr32_el1, x12
+# CHECK: msr      oslar_el1, x12
+# CHECK: msr      osdlr_el1, x12
+# CHECK: msr      dbgprcr_el1, x12
+# CHECK: msr      dbgclaimset_el1, x12
+# CHECK: msr      dbgclaimclr_el1, x12
+# CHECK: msr      csselr_el1, x12
+# CHECK: msr      vpidr_el2, x12
+# CHECK: msr      vmpidr_el2, x12
+# CHECK: msr      sctlr_el1, x12
+# CHECK: msr      sctlr_el2, x12
+# CHECK: msr      sctlr_el3, x12
+# CHECK: msr      actlr_el1, x12
+# CHECK: msr      actlr_el2, x12
+# CHECK: msr      actlr_el3, x12
+# CHECK: msr      cpacr_el1, x12
+# CHECK: msr      hcr_el2, x12
+# CHECK: msr      scr_el3, x12
+# CHECK: msr      mdcr_el2, x12
+# CHECK: msr      sder32_el3, x12
+# CHECK: msr      cptr_el2, x12
+# CHECK: msr      cptr_el3, x12
+# CHECK: msr      hstr_el2, x12
+# CHECK: msr      hacr_el2, x12
+# CHECK: msr      mdcr_el3, x12
+# CHECK: msr      ttbr0_el1, x12
+# CHECK: msr      ttbr0_el2, x12
+# CHECK: msr      ttbr0_el3, x12
+# CHECK: msr      ttbr1_el1, x12
+# CHECK: msr      tcr_el1, x12
+# CHECK: msr      tcr_el2, x12
+# CHECK: msr      tcr_el3, x12
+# CHECK: msr      vttbr_el2, x12
+# CHECK: msr      vtcr_el2, x12
+# CHECK: msr      dacr32_el2, x12
+# CHECK: msr      spsr_el1, x12
+# CHECK: msr      spsr_el2, x12
+# CHECK: msr      spsr_el3, x12
+# CHECK: msr      elr_el1, x12
+# CHECK: msr      elr_el2, x12
+# CHECK: msr      elr_el3, x12
+# CHECK: msr      sp_el0, x12
+# CHECK: msr      sp_el1, x12
+# CHECK: msr      sp_el2, x12
+# CHECK: msr      spsel, x12
+# CHECK: msr      nzcv, x12
+# CHECK: msr      daif, x12
+# CHECK: msr      currentel, x12
+# CHECK: msr      spsr_irq, x12
+# CHECK: msr      spsr_abt, x12
+# CHECK: msr      spsr_und, x12
+# CHECK: msr      spsr_fiq, x12
+# CHECK: msr      fpcr, x12
+# CHECK: msr      fpsr, x12
+# CHECK: msr      dspsr_el0, x12
+# CHECK: msr      dlr_el0, x12
+# CHECK: msr      ifsr32_el2, x12
+# CHECK: msr      afsr0_el1, x12
+# CHECK: msr      afsr0_el2, x12
+# CHECK: msr      afsr0_el3, x12
+# CHECK: msr      afsr1_el1, x12
+# CHECK: msr      afsr1_el2, x12
+# CHECK: msr      afsr1_el3, x12
+# CHECK: msr      esr_el1, x12
+# CHECK: msr      esr_el2, x12
+# CHECK: msr      esr_el3, x12
+# CHECK: msr      fpexc32_el2, x12
+# CHECK: msr      far_el1, x12
+# CHECK: msr      far_el2, x12
+# CHECK: msr      far_el3, x12
+# CHECK: msr      hpfar_el2, x12
+# CHECK: msr      par_el1, x12
+# CHECK: msr      pmcr_el0, x12
+# CHECK: msr      pmcntenset_el0, x12
+# CHECK: msr      pmcntenclr_el0, x12
+# CHECK: msr      pmovsclr_el0, x12
+# CHECK: msr      pmselr_el0, x12
+# CHECK: msr      pmccntr_el0, x12
+# CHECK: msr      pmxevtyper_el0, x12
+# CHECK: msr      pmxevcntr_el0, x12
+# CHECK: msr      pmuserenr_el0, x12
+# CHECK: msr      pmintenset_el1, x12
+# CHECK: msr      pmintenclr_el1, x12
+# CHECK: msr      pmovsset_el0, x12
+# CHECK: msr      mair_el1, x12
+# CHECK: msr      mair_el2, x12
+# CHECK: msr      mair_el3, x12
+# CHECK: msr      amair_el1, x12
+# CHECK: msr      amair_el2, x12
+# CHECK: msr      amair_el3, x12
+# CHECK: msr      vbar_el1, x12
+# CHECK: msr      vbar_el2, x12
+# CHECK: msr      vbar_el3, x12
+# CHECK: msr      rmr_el1, x12
+# CHECK: msr      rmr_el2, x12
+# CHECK: msr      rmr_el3, x12
+# CHECK: msr      tpidr_el0, x12
+# CHECK: msr      tpidr_el2, x12
+# CHECK: msr      tpidr_el3, x12
+# CHECK: msr      tpidrro_el0, x12
+# CHECK: msr      tpidr_el1, x12
+# CHECK: msr      cntfrq_el0, x12
+# CHECK: msr      cntvoff_el2, x12
+# CHECK: msr      cntkctl_el1, x12
+# CHECK: msr      cnthctl_el2, x12
+# CHECK: msr      cntp_tval_el0, x12
+# CHECK: msr      cnthp_tval_el2, x12
+# CHECK: msr      cntps_tval_el1, x12
+# CHECK: msr      cntp_ctl_el0, x12
+# CHECK: msr      cnthp_ctl_el2, x12
+# CHECK: msr      cntps_ctl_el1, x12
+# CHECK: msr      cntp_cval_el0, x12
+# CHECK: msr      cnthp_cval_el2, x12
+# CHECK: msr      cntps_cval_el1, x12
+# CHECK: msr      cntv_tval_el0, x12
+# CHECK: msr      cntv_ctl_el0, x12
+# CHECK: msr      cntv_cval_el0, x12
+# CHECK: msr      pmevcntr0_el0, x12
+# CHECK: msr      pmevcntr1_el0, x12
+# CHECK: msr      pmevcntr2_el0, x12
+# CHECK: msr      pmevcntr3_el0, x12
+# CHECK: msr      pmevcntr4_el0, x12
+# CHECK: msr      pmevcntr5_el0, x12
+# CHECK: msr      pmevcntr6_el0, x12
+# CHECK: msr      pmevcntr7_el0, x12
+# CHECK: msr      pmevcntr8_el0, x12
+# CHECK: msr      pmevcntr9_el0, x12
+# CHECK: msr      pmevcntr10_el0, x12
+# CHECK: msr      pmevcntr11_el0, x12
+# CHECK: msr      pmevcntr12_el0, x12
+# CHECK: msr      pmevcntr13_el0, x12
+# CHECK: msr      pmevcntr14_el0, x12
+# CHECK: msr      pmevcntr15_el0, x12
+# CHECK: msr      pmevcntr16_el0, x12
+# CHECK: msr      pmevcntr17_el0, x12
+# CHECK: msr      pmevcntr18_el0, x12
+# CHECK: msr      pmevcntr19_el0, x12
+# CHECK: msr      pmevcntr20_el0, x12
+# CHECK: msr      pmevcntr21_el0, x12
+# CHECK: msr      pmevcntr22_el0, x12
+# CHECK: msr      pmevcntr23_el0, x12
+# CHECK: msr      pmevcntr24_el0, x12
+# CHECK: msr      pmevcntr25_el0, x12
+# CHECK: msr      pmevcntr26_el0, x12
+# CHECK: msr      pmevcntr27_el0, x12
+# CHECK: msr      pmevcntr28_el0, x12
+# CHECK: msr      pmevcntr29_el0, x12
+# CHECK: msr      pmevcntr30_el0, x12
+# CHECK: msr      pmccfiltr_el0, x12
+# CHECK: msr      pmevtyper0_el0, x12
+# CHECK: msr      pmevtyper1_el0, x12
+# CHECK: msr      pmevtyper2_el0, x12
+# CHECK: msr      pmevtyper3_el0, x12
+# CHECK: msr      pmevtyper4_el0, x12
+# CHECK: msr      pmevtyper5_el0, x12
+# CHECK: msr      pmevtyper6_el0, x12
+# CHECK: msr      pmevtyper7_el0, x12
+# CHECK: msr      pmevtyper8_el0, x12
+# CHECK: msr      pmevtyper9_el0, x12
+# CHECK: msr      pmevtyper10_el0, x12
+# CHECK: msr      pmevtyper11_el0, x12
+# CHECK: msr      pmevtyper12_el0, x12
+# CHECK: msr      pmevtyper13_el0, x12
+# CHECK: msr      pmevtyper14_el0, x12
+# CHECK: msr      pmevtyper15_el0, x12
+# CHECK: msr      pmevtyper16_el0, x12
+# CHECK: msr      pmevtyper17_el0, x12
+# CHECK: msr      pmevtyper18_el0, x12
+# CHECK: msr      pmevtyper19_el0, x12
+# CHECK: msr      pmevtyper20_el0, x12
+# CHECK: msr      pmevtyper21_el0, x12
+# CHECK: msr      pmevtyper22_el0, x12
+# CHECK: msr      pmevtyper23_el0, x12
+# CHECK: msr      pmevtyper24_el0, x12
+# CHECK: msr      pmevtyper25_el0, x12
+# CHECK: msr      pmevtyper26_el0, x12
+# CHECK: msr      pmevtyper27_el0, x12
+# CHECK: msr      pmevtyper28_el0, x12
+# CHECK: msr      pmevtyper29_el0, x12
+# CHECK: msr      pmevtyper30_el0, x12
+# CHECK: mrs      x9, teecr32_el1
+# CHECK: mrs      x9, osdtrrx_el1
+# CHECK: mrs      x9, mdccsr_el0
+# CHECK: mrs      x9, mdccint_el1
+# CHECK: mrs      x9, mdscr_el1
+# CHECK: mrs      x9, osdtrtx_el1
+# CHECK: mrs      x9, dbgdtr_el0
+# CHECK: mrs      x9, dbgdtrrx_el0
+# CHECK: mrs      x9, oseccr_el1
+# CHECK: mrs      x9, dbgvcr32_el2
+# CHECK: mrs      x9, dbgbvr0_el1
+# CHECK: mrs      x9, dbgbvr1_el1
+# CHECK: mrs      x9, dbgbvr2_el1
+# CHECK: mrs      x9, dbgbvr3_el1
+# CHECK: mrs      x9, dbgbvr4_el1
+# CHECK: mrs      x9, dbgbvr5_el1
+# CHECK: mrs      x9, dbgbvr6_el1
+# CHECK: mrs      x9, dbgbvr7_el1
+# CHECK: mrs      x9, dbgbvr8_el1
+# CHECK: mrs      x9, dbgbvr9_el1
+# CHECK: mrs      x9, dbgbvr10_el1
+# CHECK: mrs      x9, dbgbvr11_el1
+# CHECK: mrs      x9, dbgbvr12_el1
+# CHECK: mrs      x9, dbgbvr13_el1
+# CHECK: mrs      x9, dbgbvr14_el1
+# CHECK: mrs      x9, dbgbvr15_el1
+# CHECK: mrs      x9, dbgbcr0_el1
+# CHECK: mrs      x9, dbgbcr1_el1
+# CHECK: mrs      x9, dbgbcr2_el1
+# CHECK: mrs      x9, dbgbcr3_el1
+# CHECK: mrs      x9, dbgbcr4_el1
+# CHECK: mrs      x9, dbgbcr5_el1
+# CHECK: mrs      x9, dbgbcr6_el1
+# CHECK: mrs      x9, dbgbcr7_el1
+# CHECK: mrs      x9, dbgbcr8_el1
+# CHECK: mrs      x9, dbgbcr9_el1
+# CHECK: mrs      x9, dbgbcr10_el1
+# CHECK: mrs      x9, dbgbcr11_el1
+# CHECK: mrs      x9, dbgbcr12_el1
+# CHECK: mrs      x9, dbgbcr13_el1
+# CHECK: mrs      x9, dbgbcr14_el1
+# CHECK: mrs      x9, dbgbcr15_el1
+# CHECK: mrs      x9, dbgwvr0_el1
+# CHECK: mrs      x9, dbgwvr1_el1
+# CHECK: mrs      x9, dbgwvr2_el1
+# CHECK: mrs      x9, dbgwvr3_el1
+# CHECK: mrs      x9, dbgwvr4_el1
+# CHECK: mrs      x9, dbgwvr5_el1
+# CHECK: mrs      x9, dbgwvr6_el1
+# CHECK: mrs      x9, dbgwvr7_el1
+# CHECK: mrs      x9, dbgwvr8_el1
+# CHECK: mrs      x9, dbgwvr9_el1
+# CHECK: mrs      x9, dbgwvr10_el1
+# CHECK: mrs      x9, dbgwvr11_el1
+# CHECK: mrs      x9, dbgwvr12_el1
+# CHECK: mrs      x9, dbgwvr13_el1
+# CHECK: mrs      x9, dbgwvr14_el1
+# CHECK: mrs      x9, dbgwvr15_el1
+# CHECK: mrs      x9, dbgwcr0_el1
+# CHECK: mrs      x9, dbgwcr1_el1
+# CHECK: mrs      x9, dbgwcr2_el1
+# CHECK: mrs      x9, dbgwcr3_el1
+# CHECK: mrs      x9, dbgwcr4_el1
+# CHECK: mrs      x9, dbgwcr5_el1
+# CHECK: mrs      x9, dbgwcr6_el1
+# CHECK: mrs      x9, dbgwcr7_el1
+# CHECK: mrs      x9, dbgwcr8_el1
+# CHECK: mrs      x9, dbgwcr9_el1
+# CHECK: mrs      x9, dbgwcr10_el1
+# CHECK: mrs      x9, dbgwcr11_el1
+# CHECK: mrs      x9, dbgwcr12_el1
+# CHECK: mrs      x9, dbgwcr13_el1
+# CHECK: mrs      x9, dbgwcr14_el1
+# CHECK: mrs      x9, dbgwcr15_el1
+# CHECK: mrs      x9, mdrar_el1
+# CHECK: mrs      x9, teehbr32_el1
+# CHECK: mrs      x9, oslsr_el1
+# CHECK: mrs      x9, osdlr_el1
+# CHECK: mrs      x9, dbgprcr_el1
+# CHECK: mrs      x9, dbgclaimset_el1
+# CHECK: mrs      x9, dbgclaimclr_el1
+# CHECK: mrs      x9, dbgauthstatus_el1
+# CHECK: mrs      x9, midr_el1
+# CHECK: mrs      x9, ccsidr_el1
+# CHECK: mrs      x9, csselr_el1
+# CHECK: mrs      x9, vpidr_el2
+# CHECK: mrs      x9, clidr_el1
+# CHECK: mrs      x9, ctr_el0
+# CHECK: mrs      x9, mpidr_el1
+# CHECK: mrs      x9, vmpidr_el2
+# CHECK: mrs      x9, revidr_el1
+# CHECK: mrs      x9, aidr_el1
+# CHECK: mrs      x9, dczid_el0
+# CHECK: mrs      x9, id_pfr0_el1
+# CHECK: mrs      x9, id_pfr1_el1
+# CHECK: mrs      x9, id_dfr0_el1
+# CHECK: mrs      x9, id_afr0_el1
+# CHECK: mrs      x9, id_mmfr0_el1
+# CHECK: mrs      x9, id_mmfr1_el1
+# CHECK: mrs      x9, id_mmfr2_el1
+# CHECK: mrs      x9, id_mmfr3_el1
+# CHECK: mrs      x9, id_isar0_el1
+# CHECK: mrs      x9, id_isar1_el1
+# CHECK: mrs      x9, id_isar2_el1
+# CHECK: mrs      x9, id_isar3_el1
+# CHECK: mrs      x9, id_isar4_el1
+# CHECK: mrs      x9, id_isar5_el1
+# CHECK: mrs      x9, mvfr0_el1
+# CHECK: mrs      x9, mvfr1_el1
+# CHECK: mrs      x9, mvfr2_el1
+# CHECK: mrs      x9, id_aa64pfr0_el1
+# CHECK: mrs      x9, id_aa64pfr1_el1
+# CHECK: mrs      x9, id_aa64dfr0_el1
+# CHECK: mrs      x9, id_aa64dfr1_el1
+# CHECK: mrs      x9, id_aa64afr0_el1
+# CHECK: mrs      x9, id_aa64afr1_el1
+# CHECK: mrs      x9, id_aa64isar0_el1
+# CHECK: mrs      x9, id_aa64isar1_el1
+# CHECK: mrs      x9, id_aa64mmfr0_el1
+# CHECK: mrs      x9, id_aa64mmfr1_el1
+# CHECK: mrs      x9, sctlr_el1
+# CHECK: mrs      x9, sctlr_el2
+# CHECK: mrs      x9, sctlr_el3
+# CHECK: mrs      x9, actlr_el1
+# CHECK: mrs      x9, actlr_el2
+# CHECK: mrs      x9, actlr_el3
+# CHECK: mrs      x9, cpacr_el1
+# CHECK: mrs      x9, hcr_el2
+# CHECK: mrs      x9, scr_el3
+# CHECK: mrs      x9, mdcr_el2
+# CHECK: mrs      x9, sder32_el3
+# CHECK: mrs      x9, cptr_el2
+# CHECK: mrs      x9, cptr_el3
+# CHECK: mrs      x9, hstr_el2
+# CHECK: mrs      x9, hacr_el2
+# CHECK: mrs      x9, mdcr_el3
+# CHECK: mrs      x9, ttbr0_el1
+# CHECK: mrs      x9, ttbr0_el2
+# CHECK: mrs      x9, ttbr0_el3
+# CHECK: mrs      x9, ttbr1_el1
+# CHECK: mrs      x9, tcr_el1
+# CHECK: mrs      x9, tcr_el2
+# CHECK: mrs      x9, tcr_el3
+# CHECK: mrs      x9, vttbr_el2
+# CHECK: mrs      x9, vtcr_el2
+# CHECK: mrs      x9, dacr32_el2
+# CHECK: mrs      x9, spsr_el1
+# CHECK: mrs      x9, spsr_el2
+# CHECK: mrs      x9, spsr_el3
+# CHECK: mrs      x9, elr_el1
+# CHECK: mrs      x9, elr_el2
+# CHECK: mrs      x9, elr_el3
+# CHECK: mrs      x9, sp_el0
+# CHECK: mrs      x9, sp_el1
+# CHECK: mrs      x9, sp_el2
+# CHECK: mrs      x9, spsel
+# CHECK: mrs      x9, nzcv
+# CHECK: mrs      x9, daif
+# CHECK: mrs      x9, currentel
+# CHECK: mrs      x9, spsr_irq
+# CHECK: mrs      x9, spsr_abt
+# CHECK: mrs      x9, spsr_und
+# CHECK: mrs      x9, spsr_fiq
+# CHECK: mrs      x9, fpcr
+# CHECK: mrs      x9, fpsr
+# CHECK: mrs      x9, dspsr_el0
+# CHECK: mrs      x9, dlr_el0
+# CHECK: mrs      x9, ifsr32_el2
+# CHECK: mrs      x9, afsr0_el1
+# CHECK: mrs      x9, afsr0_el2
+# CHECK: mrs      x9, afsr0_el3
+# CHECK: mrs      x9, afsr1_el1
+# CHECK: mrs      x9, afsr1_el2
+# CHECK: mrs      x9, afsr1_el3
+# CHECK: mrs      x9, esr_el1
+# CHECK: mrs      x9, esr_el2
+# CHECK: mrs      x9, esr_el3
+# CHECK: mrs      x9, fpexc32_el2
+# CHECK: mrs      x9, far_el1
+# CHECK: mrs      x9, far_el2
+# CHECK: mrs      x9, far_el3
+# CHECK: mrs      x9, hpfar_el2
+# CHECK: mrs      x9, par_el1
+# CHECK: mrs      x9, pmcr_el0
+# CHECK: mrs      x9, pmcntenset_el0
+# CHECK: mrs      x9, pmcntenclr_el0
+# CHECK: mrs      x9, pmovsclr_el0
+# CHECK: mrs      x9, pmselr_el0
+# CHECK: mrs      x9, pmceid0_el0
+# CHECK: mrs      x9, pmceid1_el0
+# CHECK: mrs      x9, pmccntr_el0
+# CHECK: mrs      x9, pmxevtyper_el0
+# CHECK: mrs      x9, pmxevcntr_el0
+# CHECK: mrs      x9, pmuserenr_el0
+# CHECK: mrs      x9, pmintenset_el1
+# CHECK: mrs      x9, pmintenclr_el1
+# CHECK: mrs      x9, pmovsset_el0
+# CHECK: mrs      x9, mair_el1
+# CHECK: mrs      x9, mair_el2
+# CHECK: mrs      x9, mair_el3
+# CHECK: mrs      x9, amair_el1
+# CHECK: mrs      x9, amair_el2
+# CHECK: mrs      x9, amair_el3
+# CHECK: mrs      x9, vbar_el1
+# CHECK: mrs      x9, vbar_el2
+# CHECK: mrs      x9, vbar_el3
+# CHECK: mrs      x9, rvbar_el1
+# CHECK: mrs      x9, rvbar_el2
+# CHECK: mrs      x9, rvbar_el3
+# CHECK: mrs      x9, rmr_el1
+# CHECK: mrs      x9, rmr_el2
+# CHECK: mrs      x9, rmr_el3
+# CHECK: mrs      x9, isr_el1
+# CHECK: mrs      x9, contextidr_el1
+# CHECK: mrs      x9, tpidr_el0
+# CHECK: mrs      x9, tpidr_el2
+# CHECK: mrs      x9, tpidr_el3
+# CHECK: mrs      x9, tpidrro_el0
+# CHECK: mrs      x9, tpidr_el1
+# CHECK: mrs      x9, cntfrq_el0
+# CHECK: mrs      x9, cntpct_el0
+# CHECK: mrs      x9, cntvct_el0
+# CHECK: mrs      x9, cntvoff_el2
+# CHECK: mrs      x9, cntkctl_el1
+# CHECK: mrs      x9, cnthctl_el2
+# CHECK: mrs      x9, cntp_tval_el0
+# CHECK: mrs      x9, cnthp_tval_el2
+# CHECK: mrs      x9, cntps_tval_el1
+# CHECK: mrs      x9, cntp_ctl_el0
+# CHECK: mrs      x9, cnthp_ctl_el2
+# CHECK: mrs      x9, cntps_ctl_el1
+# CHECK: mrs      x9, cntp_cval_el0
+# CHECK: mrs      x9, cnthp_cval_el2
+# CHECK: mrs      x9, cntps_cval_el1
+# CHECK: mrs      x9, cntv_tval_el0
+# CHECK: mrs      x9, cntv_ctl_el0
+# CHECK: mrs      x9, cntv_cval_el0
+# CHECK: mrs      x9, pmevcntr0_el0
+# CHECK: mrs      x9, pmevcntr1_el0
+# CHECK: mrs      x9, pmevcntr2_el0
+# CHECK: mrs      x9, pmevcntr3_el0
+# CHECK: mrs      x9, pmevcntr4_el0
+# CHECK: mrs      x9, pmevcntr5_el0
+# CHECK: mrs      x9, pmevcntr6_el0
+# CHECK: mrs      x9, pmevcntr7_el0
+# CHECK: mrs      x9, pmevcntr8_el0
+# CHECK: mrs      x9, pmevcntr9_el0
+# CHECK: mrs      x9, pmevcntr10_el0
+# CHECK: mrs      x9, pmevcntr11_el0
+# CHECK: mrs      x9, pmevcntr12_el0
+# CHECK: mrs      x9, pmevcntr13_el0
+# CHECK: mrs      x9, pmevcntr14_el0
+# CHECK: mrs      x9, pmevcntr15_el0
+# CHECK: mrs      x9, pmevcntr16_el0
+# CHECK: mrs      x9, pmevcntr17_el0
+# CHECK: mrs      x9, pmevcntr18_el0
+# CHECK: mrs      x9, pmevcntr19_el0
+# CHECK: mrs      x9, pmevcntr20_el0
+# CHECK: mrs      x9, pmevcntr21_el0
+# CHECK: mrs      x9, pmevcntr22_el0
+# CHECK: mrs      x9, pmevcntr23_el0
+# CHECK: mrs      x9, pmevcntr24_el0
+# CHECK: mrs      x9, pmevcntr25_el0
+# CHECK: mrs      x9, pmevcntr26_el0
+# CHECK: mrs      x9, pmevcntr27_el0
+# CHECK: mrs      x9, pmevcntr28_el0
+# CHECK: mrs      x9, pmevcntr29_el0
+# CHECK: mrs      x9, pmevcntr30_el0
+# CHECK: mrs      x9, pmccfiltr_el0
+# CHECK: mrs      x9, pmevtyper0_el0
+# CHECK: mrs      x9, pmevtyper1_el0
+# CHECK: mrs      x9, pmevtyper2_el0
+# CHECK: mrs      x9, pmevtyper3_el0
+# CHECK: mrs      x9, pmevtyper4_el0
+# CHECK: mrs      x9, pmevtyper5_el0
+# CHECK: mrs      x9, pmevtyper6_el0
+# CHECK: mrs      x9, pmevtyper7_el0
+# CHECK: mrs      x9, pmevtyper8_el0
+# CHECK: mrs      x9, pmevtyper9_el0
+# CHECK: mrs      x9, pmevtyper10_el0
+# CHECK: mrs      x9, pmevtyper11_el0
+# CHECK: mrs      x9, pmevtyper12_el0
+# CHECK: mrs      x9, pmevtyper13_el0
+# CHECK: mrs      x9, pmevtyper14_el0
+# CHECK: mrs      x9, pmevtyper15_el0
+# CHECK: mrs      x9, pmevtyper16_el0
+# CHECK: mrs      x9, pmevtyper17_el0
+# CHECK: mrs      x9, pmevtyper18_el0
+# CHECK: mrs      x9, pmevtyper19_el0
+# CHECK: mrs      x9, pmevtyper20_el0
+# CHECK: mrs      x9, pmevtyper21_el0
+# CHECK: mrs      x9, pmevtyper22_el0
+# CHECK: mrs      x9, pmevtyper23_el0
+# CHECK: mrs      x9, pmevtyper24_el0
+# CHECK: mrs      x9, pmevtyper25_el0
+# CHECK: mrs      x9, pmevtyper26_el0
+# CHECK: mrs      x9, pmevtyper27_el0
+# CHECK: mrs      x9, pmevtyper28_el0
+# CHECK: mrs      x9, pmevtyper29_el0
+# CHECK: mrs      x9, pmevtyper30_el0
+
+0xc 0x0 0x12 0xd5
+0x4c 0x0 0x10 0xd5
+0xc 0x2 0x10 0xd5
+0x4c 0x2 0x10 0xd5
+0x4c 0x3 0x10 0xd5
+0xc 0x4 0x13 0xd5
+0xc 0x5 0x13 0xd5
+0x4c 0x6 0x10 0xd5
+0xc 0x7 0x14 0xd5
+0x8c 0x0 0x10 0xd5
+0x8c 0x1 0x10 0xd5
+0x8c 0x2 0x10 0xd5
+0x8c 0x3 0x10 0xd5
+0x8c 0x4 0x10 0xd5
+0x8c 0x5 0x10 0xd5
+0x8c 0x6 0x10 0xd5
+0x8c 0x7 0x10 0xd5
+0x8c 0x8 0x10 0xd5
+0x8c 0x9 0x10 0xd5
+0x8c 0xa 0x10 0xd5
+0x8c 0xb 0x10 0xd5
+0x8c 0xc 0x10 0xd5
+0x8c 0xd 0x10 0xd5
+0x8c 0xe 0x10 0xd5
+0x8c 0xf 0x10 0xd5
+0xac 0x0 0x10 0xd5
+0xac 0x1 0x10 0xd5
+0xac 0x2 0x10 0xd5
+0xac 0x3 0x10 0xd5
+0xac 0x4 0x10 0xd5
+0xac 0x5 0x10 0xd5
+0xac 0x6 0x10 0xd5
+0xac 0x7 0x10 0xd5
+0xac 0x8 0x10 0xd5
+0xac 0x9 0x10 0xd5
+0xac 0xa 0x10 0xd5
+0xac 0xb 0x10 0xd5
+0xac 0xc 0x10 0xd5
+0xac 0xd 0x10 0xd5
+0xac 0xe 0x10 0xd5
+0xac 0xf 0x10 0xd5
+0xcc 0x0 0x10 0xd5
+0xcc 0x1 0x10 0xd5
+0xcc 0x2 0x10 0xd5
+0xcc 0x3 0x10 0xd5
+0xcc 0x4 0x10 0xd5
+0xcc 0x5 0x10 0xd5
+0xcc 0x6 0x10 0xd5
+0xcc 0x7 0x10 0xd5
+0xcc 0x8 0x10 0xd5
+0xcc 0x9 0x10 0xd5
+0xcc 0xa 0x10 0xd5
+0xcc 0xb 0x10 0xd5
+0xcc 0xc 0x10 0xd5
+0xcc 0xd 0x10 0xd5
+0xcc 0xe 0x10 0xd5
+0xcc 0xf 0x10 0xd5
+0xec 0x0 0x10 0xd5
+0xec 0x1 0x10 0xd5
+0xec 0x2 0x10 0xd5
+0xec 0x3 0x10 0xd5
+0xec 0x4 0x10 0xd5
+0xec 0x5 0x10 0xd5
+0xec 0x6 0x10 0xd5
+0xec 0x7 0x10 0xd5
+0xec 0x8 0x10 0xd5
+0xec 0x9 0x10 0xd5
+0xec 0xa 0x10 0xd5
+0xec 0xb 0x10 0xd5
+0xec 0xc 0x10 0xd5
+0xec 0xd 0x10 0xd5
+0xec 0xe 0x10 0xd5
+0xec 0xf 0x10 0xd5
+0xc 0x10 0x12 0xd5
+0x8c 0x10 0x10 0xd5
+0x8c 0x13 0x10 0xd5
+0x8c 0x14 0x10 0xd5
+0xcc 0x78 0x10 0xd5
+0xcc 0x79 0x10 0xd5
+0xc 0x0 0x1a 0xd5
+0xc 0x0 0x1c 0xd5
+0xac 0x0 0x1c 0xd5
+0xc 0x10 0x18 0xd5
+0xc 0x10 0x1c 0xd5
+0xc 0x10 0x1e 0xd5
+0x2c 0x10 0x18 0xd5
+0x2c 0x10 0x1c 0xd5
+0x2c 0x10 0x1e 0xd5
+0x4c 0x10 0x18 0xd5
+0xc 0x11 0x1c 0xd5
+0xc 0x11 0x1e 0xd5
+0x2c 0x11 0x1c 0xd5
+0x2c 0x11 0x1e 0xd5
+0x4c 0x11 0x1c 0xd5
+0x4c 0x11 0x1e 0xd5
+0x6c 0x11 0x1c 0xd5
+0xec 0x11 0x1c 0xd5
+0x2c 0x13 0x1e 0xd5
+0xc 0x20 0x18 0xd5
+0xc 0x20 0x1c 0xd5
+0xc 0x20 0x1e 0xd5
+0x2c 0x20 0x18 0xd5
+0x4c 0x20 0x18 0xd5
+0x4c 0x20 0x1c 0xd5
+0x4c 0x20 0x1e 0xd5
+0xc 0x21 0x1c 0xd5
+0x4c 0x21 0x1c 0xd5
+0xc 0x30 0x1c 0xd5
+0xc 0x40 0x18 0xd5
+0xc 0x40 0x1c 0xd5
+0xc 0x40 0x1e 0xd5
+0x2c 0x40 0x18 0xd5
+0x2c 0x40 0x1c 0xd5
+0x2c 0x40 0x1e 0xd5
+0xc 0x41 0x18 0xd5
+0xc 0x41 0x1c 0xd5
+0xc 0x41 0x1e 0xd5
+0xc 0x42 0x18 0xd5
+0xc 0x42 0x1b 0xd5
+0x2c 0x42 0x1b 0xd5
+0x4c 0x42 0x18 0xd5
+0xc 0x43 0x1c 0xd5
+0x2c 0x43 0x1c 0xd5
+0x4c 0x43 0x1c 0xd5
+0x6c 0x43 0x1c 0xd5
+0xc 0x44 0x1b 0xd5
+0x2c 0x44 0x1b 0xd5
+0xc 0x45 0x1b 0xd5
+0x2c 0x45 0x1b 0xd5
+0x2c 0x50 0x1c 0xd5
+0xc 0x51 0x18 0xd5
+0xc 0x51 0x1c 0xd5
+0xc 0x51 0x1e 0xd5
+0x2c 0x51 0x18 0xd5
+0x2c 0x51 0x1c 0xd5
+0x2c 0x51 0x1e 0xd5
+0xc 0x52 0x18 0xd5
+0xc 0x52 0x1c 0xd5
+0xc 0x52 0x1e 0xd5
+0xc 0x53 0x1c 0xd5
+0xc 0x60 0x18 0xd5
+0xc 0x60 0x1c 0xd5
+0xc 0x60 0x1e 0xd5
+0x8c 0x60 0x1c 0xd5
+0xc 0x74 0x18 0xd5
+0xc 0x9c 0x1b 0xd5
+0x2c 0x9c 0x1b 0xd5
+0x4c 0x9c 0x1b 0xd5
+0x6c 0x9c 0x1b 0xd5
+0xac 0x9c 0x1b 0xd5
+0xc 0x9d 0x1b 0xd5
+0x2c 0x9d 0x1b 0xd5
+0x4c 0x9d 0x1b 0xd5
+0xc 0x9e 0x1b 0xd5
+0x2c 0x9e 0x18 0xd5
+0x4c 0x9e 0x18 0xd5
+0x6c 0x9e 0x1b 0xd5
+0xc 0xa2 0x18 0xd5
+0xc 0xa2 0x1c 0xd5
+0xc 0xa2 0x1e 0xd5
+0xc 0xa3 0x18 0xd5
+0xc 0xa3 0x1c 0xd5
+0xc 0xa3 0x1e 0xd5
+0xc 0xc0 0x18 0xd5
+0xc 0xc0 0x1c 0xd5
+0xc 0xc0 0x1e 0xd5
+0x4c 0xc0 0x18 0xd5
+0x4c 0xc0 0x1c 0xd5
+0x4c 0xc0 0x1e 0xd5
+0x4c 0xd0 0x1b 0xd5
+0x4c 0xd0 0x1c 0xd5
+0x4c 0xd0 0x1e 0xd5
+0x6c 0xd0 0x1b 0xd5
+0x8c 0xd0 0x18 0xd5
+0xc 0xe0 0x1b 0xd5
+0x6c 0xe0 0x1c 0xd5
+0xc 0xe1 0x18 0xd5
+0xc 0xe1 0x1c 0xd5
+0xc 0xe2 0x1b 0xd5
+0xc 0xe2 0x1c 0xd5
+0xc 0xe2 0x1f 0xd5
+0x2c 0xe2 0x1b 0xd5
+0x2c 0xe2 0x1c 0xd5
+0x2c 0xe2 0x1f 0xd5
+0x4c 0xe2 0x1b 0xd5
+0x4c 0xe2 0x1c 0xd5
+0x4c 0xe2 0x1f 0xd5
+0xc 0xe3 0x1b 0xd5
+0x2c 0xe3 0x1b 0xd5
+0x4c 0xe3 0x1b 0xd5
+0xc 0xe8 0x1b 0xd5
+0x2c 0xe8 0x1b 0xd5
+0x4c 0xe8 0x1b 0xd5
+0x6c 0xe8 0x1b 0xd5
+0x8c 0xe8 0x1b 0xd5
+0xac 0xe8 0x1b 0xd5
+0xcc 0xe8 0x1b 0xd5
+0xec 0xe8 0x1b 0xd5
+0xc 0xe9 0x1b 0xd5
+0x2c 0xe9 0x1b 0xd5
+0x4c 0xe9 0x1b 0xd5
+0x6c 0xe9 0x1b 0xd5
+0x8c 0xe9 0x1b 0xd5
+0xac 0xe9 0x1b 0xd5
+0xcc 0xe9 0x1b 0xd5
+0xec 0xe9 0x1b 0xd5
+0xc 0xea 0x1b 0xd5
+0x2c 0xea 0x1b 0xd5
+0x4c 0xea 0x1b 0xd5
+0x6c 0xea 0x1b 0xd5
+0x8c 0xea 0x1b 0xd5
+0xac 0xea 0x1b 0xd5
+0xcc 0xea 0x1b 0xd5
+0xec 0xea 0x1b 0xd5
+0xc 0xeb 0x1b 0xd5
+0x2c 0xeb 0x1b 0xd5
+0x4c 0xeb 0x1b 0xd5
+0x6c 0xeb 0x1b 0xd5
+0x8c 0xeb 0x1b 0xd5
+0xac 0xeb 0x1b 0xd5
+0xcc 0xeb 0x1b 0xd5
+0xec 0xef 0x1b 0xd5
+0xc 0xec 0x1b 0xd5
+0x2c 0xec 0x1b 0xd5
+0x4c 0xec 0x1b 0xd5
+0x6c 0xec 0x1b 0xd5
+0x8c 0xec 0x1b 0xd5
+0xac 0xec 0x1b 0xd5
+0xcc 0xec 0x1b 0xd5
+0xec 0xec 0x1b 0xd5
+0xc 0xed 0x1b 0xd5
+0x2c 0xed 0x1b 0xd5
+0x4c 0xed 0x1b 0xd5
+0x6c 0xed 0x1b 0xd5
+0x8c 0xed 0x1b 0xd5
+0xac 0xed 0x1b 0xd5
+0xcc 0xed 0x1b 0xd5
+0xec 0xed 0x1b 0xd5
+0xc 0xee 0x1b 0xd5
+0x2c 0xee 0x1b 0xd5
+0x4c 0xee 0x1b 0xd5
+0x6c 0xee 0x1b 0xd5
+0x8c 0xee 0x1b 0xd5
+0xac 0xee 0x1b 0xd5
+0xcc 0xee 0x1b 0xd5
+0xec 0xee 0x1b 0xd5
+0xc 0xef 0x1b 0xd5
+0x2c 0xef 0x1b 0xd5
+0x4c 0xef 0x1b 0xd5
+0x6c 0xef 0x1b 0xd5
+0x8c 0xef 0x1b 0xd5
+0xac 0xef 0x1b 0xd5
+0xcc 0xef 0x1b 0xd5
+0x9 0x0 0x32 0xd5
+0x49 0x0 0x30 0xd5
+0x9 0x1 0x33 0xd5
+0x9 0x2 0x30 0xd5
+0x49 0x2 0x30 0xd5
+0x49 0x3 0x30 0xd5
+0x9 0x4 0x33 0xd5
+0x9 0x5 0x33 0xd5
+0x49 0x6 0x30 0xd5
+0x9 0x7 0x34 0xd5
+0x89 0x0 0x30 0xd5
+0x89 0x1 0x30 0xd5
+0x89 0x2 0x30 0xd5
+0x89 0x3 0x30 0xd5
+0x89 0x4 0x30 0xd5
+0x89 0x5 0x30 0xd5
+0x89 0x6 0x30 0xd5
+0x89 0x7 0x30 0xd5
+0x89 0x8 0x30 0xd5
+0x89 0x9 0x30 0xd5
+0x89 0xa 0x30 0xd5
+0x89 0xb 0x30 0xd5
+0x89 0xc 0x30 0xd5
+0x89 0xd 0x30 0xd5
+0x89 0xe 0x30 0xd5
+0x89 0xf 0x30 0xd5
+0xa9 0x0 0x30 0xd5
+0xa9 0x1 0x30 0xd5
+0xa9 0x2 0x30 0xd5
+0xa9 0x3 0x30 0xd5
+0xa9 0x4 0x30 0xd5
+0xa9 0x5 0x30 0xd5
+0xa9 0x6 0x30 0xd5
+0xa9 0x7 0x30 0xd5
+0xa9 0x8 0x30 0xd5
+0xa9 0x9 0x30 0xd5
+0xa9 0xa 0x30 0xd5
+0xa9 0xb 0x30 0xd5
+0xa9 0xc 0x30 0xd5
+0xa9 0xd 0x30 0xd5
+0xa9 0xe 0x30 0xd5
+0xa9 0xf 0x30 0xd5
+0xc9 0x0 0x30 0xd5
+0xc9 0x1 0x30 0xd5
+0xc9 0x2 0x30 0xd5
+0xc9 0x3 0x30 0xd5
+0xc9 0x4 0x30 0xd5
+0xc9 0x5 0x30 0xd5
+0xc9 0x6 0x30 0xd5
+0xc9 0x7 0x30 0xd5
+0xc9 0x8 0x30 0xd5
+0xc9 0x9 0x30 0xd5
+0xc9 0xa 0x30 0xd5
+0xc9 0xb 0x30 0xd5
+0xc9 0xc 0x30 0xd5
+0xc9 0xd 0x30 0xd5
+0xc9 0xe 0x30 0xd5
+0xc9 0xf 0x30 0xd5
+0xe9 0x0 0x30 0xd5
+0xe9 0x1 0x30 0xd5
+0xe9 0x2 0x30 0xd5
+0xe9 0x3 0x30 0xd5
+0xe9 0x4 0x30 0xd5
+0xe9 0x5 0x30 0xd5
+0xe9 0x6 0x30 0xd5
+0xe9 0x7 0x30 0xd5
+0xe9 0x8 0x30 0xd5
+0xe9 0x9 0x30 0xd5
+0xe9 0xa 0x30 0xd5
+0xe9 0xb 0x30 0xd5
+0xe9 0xc 0x30 0xd5
+0xe9 0xd 0x30 0xd5
+0xe9 0xe 0x30 0xd5
+0xe9 0xf 0x30 0xd5
+0x9 0x10 0x30 0xd5
+0x9 0x10 0x32 0xd5
+0x89 0x11 0x30 0xd5
+0x89 0x13 0x30 0xd5
+0x89 0x14 0x30 0xd5
+0xc9 0x78 0x30 0xd5
+0xc9 0x79 0x30 0xd5
+0xc9 0x7e 0x30 0xd5
+0x9 0x0 0x38 0xd5
+0x9 0x0 0x39 0xd5
+0x9 0x0 0x3a 0xd5
+0x9 0x0 0x3c 0xd5
+0x29 0x0 0x39 0xd5
+0x29 0x0 0x3b 0xd5
+0xa9 0x0 0x38 0xd5
+0xa9 0x0 0x3c 0xd5
+0xc9 0x0 0x38 0xd5
+0xe9 0x0 0x39 0xd5
+0xe9 0x0 0x3b 0xd5
+0x9 0x1 0x38 0xd5
+0x29 0x1 0x38 0xd5
+0x49 0x1 0x38 0xd5
+0x69 0x1 0x38 0xd5
+0x89 0x1 0x38 0xd5
+0xa9 0x1 0x38 0xd5
+0xc9 0x1 0x38 0xd5
+0xe9 0x1 0x38 0xd5
+0x9 0x2 0x38 0xd5
+0x29 0x2 0x38 0xd5
+0x49 0x2 0x38 0xd5
+0x69 0x2 0x38 0xd5
+0x89 0x2 0x38 0xd5
+0xa9 0x2 0x38 0xd5
+0x9 0x3 0x38 0xd5
+0x29 0x3 0x38 0xd5
+0x49 0x3 0x38 0xd5
+0x9 0x4 0x38 0xd5
+0x29 0x4 0x38 0xd5
+0x9 0x5 0x38 0xd5
+0x29 0x5 0x38 0xd5
+0x89 0x5 0x38 0xd5
+0xa9 0x5 0x38 0xd5
+0x9 0x6 0x38 0xd5
+0x29 0x6 0x38 0xd5
+0x9 0x7 0x38 0xd5
+0x29 0x7 0x38 0xd5
+0x9 0x10 0x38 0xd5
+0x9 0x10 0x3c 0xd5
+0x9 0x10 0x3e 0xd5
+0x29 0x10 0x38 0xd5
+0x29 0x10 0x3c 0xd5
+0x29 0x10 0x3e 0xd5
+0x49 0x10 0x38 0xd5
+0x9 0x11 0x3c 0xd5
+0x9 0x11 0x3e 0xd5
+0x29 0x11 0x3c 0xd5
+0x29 0x11 0x3e 0xd5
+0x49 0x11 0x3c 0xd5
+0x49 0x11 0x3e 0xd5
+0x69 0x11 0x3c 0xd5
+0xe9 0x11 0x3c 0xd5
+0x29 0x13 0x3e 0xd5
+0x9 0x20 0x38 0xd5
+0x9 0x20 0x3c 0xd5
+0x9 0x20 0x3e 0xd5
+0x29 0x20 0x38 0xd5
+0x49 0x20 0x38 0xd5
+0x49 0x20 0x3c 0xd5
+0x49 0x20 0x3e 0xd5
+0x9 0x21 0x3c 0xd5
+0x49 0x21 0x3c 0xd5
+0x9 0x30 0x3c 0xd5
+0x9 0x40 0x38 0xd5
+0x9 0x40 0x3c 0xd5
+0x9 0x40 0x3e 0xd5
+0x29 0x40 0x38 0xd5
+0x29 0x40 0x3c 0xd5
+0x29 0x40 0x3e 0xd5
+0x9 0x41 0x38 0xd5
+0x9 0x41 0x3c 0xd5
+0x9 0x41 0x3e 0xd5
+0x9 0x42 0x38 0xd5
+0x9 0x42 0x3b 0xd5
+0x29 0x42 0x3b 0xd5
+0x49 0x42 0x38 0xd5
+0x9 0x43 0x3c 0xd5
+0x29 0x43 0x3c 0xd5
+0x49 0x43 0x3c 0xd5
+0x69 0x43 0x3c 0xd5
+0x9 0x44 0x3b 0xd5
+0x29 0x44 0x3b 0xd5
+0x9 0x45 0x3b 0xd5
+0x29 0x45 0x3b 0xd5
+0x29 0x50 0x3c 0xd5
+0x9 0x51 0x38 0xd5
+0x9 0x51 0x3c 0xd5
+0x9 0x51 0x3e 0xd5
+0x29 0x51 0x38 0xd5
+0x29 0x51 0x3c 0xd5
+0x29 0x51 0x3e 0xd5
+0x9 0x52 0x38 0xd5
+0x9 0x52 0x3c 0xd5
+0x9 0x52 0x3e 0xd5
+0x9 0x53 0x3c 0xd5
+0x9 0x60 0x38 0xd5
+0x9 0x60 0x3c 0xd5
+0x9 0x60 0x3e 0xd5
+0x89 0x60 0x3c 0xd5
+0x9 0x74 0x38 0xd5
+0x9 0x9c 0x3b 0xd5
+0x29 0x9c 0x3b 0xd5
+0x49 0x9c 0x3b 0xd5
+0x69 0x9c 0x3b 0xd5
+0xa9 0x9c 0x3b 0xd5
+0xc9 0x9c 0x3b 0xd5
+0xe9 0x9c 0x3b 0xd5
+0x9 0x9d 0x3b 0xd5
+0x29 0x9d 0x3b 0xd5
+0x49 0x9d 0x3b 0xd5
+0x9 0x9e 0x3b 0xd5
+0x29 0x9e 0x38 0xd5
+0x49 0x9e 0x38 0xd5
+0x69 0x9e 0x3b 0xd5
+0x9 0xa2 0x38 0xd5
+0x9 0xa2 0x3c 0xd5
+0x9 0xa2 0x3e 0xd5
+0x9 0xa3 0x38 0xd5
+0x9 0xa3 0x3c 0xd5
+0x9 0xa3 0x3e 0xd5
+0x9 0xc0 0x38 0xd5
+0x9 0xc0 0x3c 0xd5
+0x9 0xc0 0x3e 0xd5
+0x29 0xc0 0x38 0xd5
+0x29 0xc0 0x3c 0xd5
+0x29 0xc0 0x3e 0xd5
+0x49 0xc0 0x38 0xd5
+0x49 0xc0 0x3c 0xd5
+0x49 0xc0 0x3e 0xd5
+0x9 0xc1 0x38 0xd5
+0x29 0xd0 0x38 0xd5
+0x49 0xd0 0x3b 0xd5
+0x49 0xd0 0x3c 0xd5
+0x49 0xd0 0x3e 0xd5
+0x69 0xd0 0x3b 0xd5
+0x89 0xd0 0x38 0xd5
+0x9 0xe0 0x3b 0xd5
+0x29 0xe0 0x3b 0xd5
+0x49 0xe0 0x3b 0xd5
+0x69 0xe0 0x3c 0xd5
+0x9 0xe1 0x38 0xd5
+0x9 0xe1 0x3c 0xd5
+0x9 0xe2 0x3b 0xd5
+0x9 0xe2 0x3c 0xd5
+0x9 0xe2 0x3f 0xd5
+0x29 0xe2 0x3b 0xd5
+0x29 0xe2 0x3c 0xd5
+0x29 0xe2 0x3f 0xd5
+0x49 0xe2 0x3b 0xd5
+0x49 0xe2 0x3c 0xd5
+0x49 0xe2 0x3f 0xd5
+0x9 0xe3 0x3b 0xd5
+0x29 0xe3 0x3b 0xd5
+0x49 0xe3 0x3b 0xd5
+0x9 0xe8 0x3b 0xd5
+0x29 0xe8 0x3b 0xd5
+0x49 0xe8 0x3b 0xd5
+0x69 0xe8 0x3b 0xd5
+0x89 0xe8 0x3b 0xd5
+0xa9 0xe8 0x3b 0xd5
+0xc9 0xe8 0x3b 0xd5
+0xe9 0xe8 0x3b 0xd5
+0x9 0xe9 0x3b 0xd5
+0x29 0xe9 0x3b 0xd5
+0x49 0xe9 0x3b 0xd5
+0x69 0xe9 0x3b 0xd5
+0x89 0xe9 0x3b 0xd5
+0xa9 0xe9 0x3b 0xd5
+0xc9 0xe9 0x3b 0xd5
+0xe9 0xe9 0x3b 0xd5
+0x9 0xea 0x3b 0xd5
+0x29 0xea 0x3b 0xd5
+0x49 0xea 0x3b 0xd5
+0x69 0xea 0x3b 0xd5
+0x89 0xea 0x3b 0xd5
+0xa9 0xea 0x3b 0xd5
+0xc9 0xea 0x3b 0xd5
+0xe9 0xea 0x3b 0xd5
+0x9 0xeb 0x3b 0xd5
+0x29 0xeb 0x3b 0xd5
+0x49 0xeb 0x3b 0xd5
+0x69 0xeb 0x3b 0xd5
+0x89 0xeb 0x3b 0xd5
+0xa9 0xeb 0x3b 0xd5
+0xc9 0xeb 0x3b 0xd5
+0xe9 0xef 0x3b 0xd5
+0x9 0xec 0x3b 0xd5
+0x29 0xec 0x3b 0xd5
+0x49 0xec 0x3b 0xd5
+0x69 0xec 0x3b 0xd5
+0x89 0xec 0x3b 0xd5
+0xa9 0xec 0x3b 0xd5
+0xc9 0xec 0x3b 0xd5
+0xe9 0xec 0x3b 0xd5
+0x9 0xed 0x3b 0xd5
+0x29 0xed 0x3b 0xd5
+0x49 0xed 0x3b 0xd5
+0x69 0xed 0x3b 0xd5
+0x89 0xed 0x3b 0xd5
+0xa9 0xed 0x3b 0xd5
+0xc9 0xed 0x3b 0xd5
+0xe9 0xed 0x3b 0xd5
+0x9 0xee 0x3b 0xd5
+0x29 0xee 0x3b 0xd5
+0x49 0xee 0x3b 0xd5
+0x69 0xee 0x3b 0xd5
+0x89 0xee 0x3b 0xd5
+0xa9 0xee 0x3b 0xd5
+0xc9 0xee 0x3b 0xd5
+0xe9 0xee 0x3b 0xd5
+0x9 0xef 0x3b 0xd5
+0x29 0xef 0x3b 0xd5
+0x49 0xef 0x3b 0xd5
+0x69 0xef 0x3b 0xd5
+0x89 0xef 0x3b 0xd5
+0xa9 0xef 0x3b 0xd5
+0xc9 0xef 0x3b 0xd5
+
+# CHECK: mrs     x12, s3_7_c15_c1_5
+# CHECK: mrs     x13, s3_2_c11_c15_7
+# CHECK: msr     s3_0_c15_c0_0, x12
+# CHECK: msr     s3_7_c11_c13_7, x5
+0xac 0xf1 0x3f 0xd5
+0xed 0xbf 0x3a 0xd5
+0x0c 0xf0 0x18 0xd5
+0xe5 0xbd 0x1f 0xd5
+
+#------------------------------------------------------------------------------
+# Test and branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: tbz     x12, #62, #0
+# CHECK: tbz     x12, #62, #4
+# CHECK: tbz     x12, #62, #-32768
+# CHECK: tbnz    x12, #60, #32764
+0x0c 0x00 0xf0 0xb6
+0x2c 0x00 0xf0 0xb6
+0x0c 0x00 0xf4 0xb6
+0xec 0xff 0xe3 0xb7
+
+#------------------------------------------------------------------------------
+# Unconditional branch (immediate)
+#------------------------------------------------------------------------------
+
+# CHECK: b        #4
+# CHECK: b        #-4
+# CHECK: b        #134217724
+0x01 0x00 0x00 0x14
+0xff 0xff 0xff 0x17
+0xff 0xff 0xff 0x15
+
+#------------------------------------------------------------------------------
+# Unconditional branch (register)
+#------------------------------------------------------------------------------
+
+# CHECK: br       x20
+# CHECK: blr      xzr
+# CHECK: ret      x10
+0x80 0x2 0x1f 0xd6
+0xe0 0x3 0x3f 0xd6
+0x40 0x1 0x5f 0xd6
+
+# CHECK: ret
+# CHECK: eret
+# CHECK: drps
+0xc0 0x3 0x5f 0xd6
+0xe0 0x3 0x9f 0xd6
+0xe0 0x3 0xbf 0xd6
+
diff --git a/test/MC/Disassembler/AArch64/basic-a64-undefined.txt b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
new file mode 100644
index 000000000000..a17579cb1680
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-undefined.txt
@@ -0,0 +1,43 @@
+# These spawn another process so they're rather expensive. Not many.
+
+# Instructions notionally in the add/sub (extended register) sheet, but with
+# invalid shift amount or "opt" field.
+# RUN: echo "0x00 0x10 0xa0 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0x60 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x14 0x20 0x0b" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the add/sub (immediate) sheet, but with
+# invalid "shift" field.
+# RUN: echo "0xdf 0x3 0x80 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0xed 0x8e 0xc4 0x31" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x62 0xfc 0xbf 0x11" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x3 0xff 0xff 0x91" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the load/store (unsigned immediate) sheet.
+# Only unallocated (int-register) variants are: opc=0b11, size=0b10, 0b11
+# RUN: echo "0xd7 0xfc 0xff 0xb9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0xd7 0xfc 0xcf 0xf9" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the floating-point <-> fixed-point conversion
+# Scale field is 64-<imm> and <imm> should be 1-32 for a 32-bit int register.
+# RUN: echo "0x23 0x01 0x18 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x23 0x25 0x42 0x1e" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the logical (shifted register) sheet, but with out
+# of range shift: w-registers can only have 0-31.
+# RUN: echo "0x00 0x80 0x00 0x0a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Instructions notionally in the move wide (immediate) sheet, but with out
+# of range shift: w-registers can only have 0 or 16.
+# RUN: echo "0x00 0x00 0xc0 0x12" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x12 0x34 0xe0 0x52" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Data-processing instructions are undefined when S=1 and for the 0b0000111 value in opcode:sf
+# RUN: echo "0x00 0x00 0xc0 0x5f" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x56 0x0c 0xc0 0x5a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# Data-processing instructions (2 source) are undefined for a value of 0001xx:0:x or 0011xx:0:x for opcode:S:sf
+# RUN: echo "0x00 0x30 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+# RUN: echo "0x00 0x10 0xc1 0x1a" | llvm-mc -triple=aarch64 -disassemble 2>&1 | FileCheck %s
+
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
new file mode 100644
index 000000000000..adb8f75ed990
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/basic-a64-unpredictable.txt
@@ -0,0 +1,96 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Load-store exclusive
+#------------------------------------------------------------------------------
+
+#ldxp x14, x14, [sp]
+0xee 0x3b 0x7f 0xc8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0xee 0x3b 0x7f 0xc8
+
+#ldaxp w19, w19, [x1]
+0x33 0xcc 0x7f 0x88
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x33 0xcc 0x7f 0x88
+
+#------------------------------------------------------------------------------
+# Load-store register (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+0x63 0x44 0x40 0xf8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x63 0x44 0x40 0xf8
+
+0x42 0x14 0xc0 0x38
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x42 0x14 0xc0 0x38
+
+#------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#------------------------------------------------------------------------------
+
+0x63 0x4c 0x40 0xf8
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x63 0x4c 0x40 0xf8
+
+0x42 0x1c 0xc0 0x38
+#CHECK: warning: potentially undefined instruction encoding
+#CHECK-NEXT: 0x42 0x1c 0xc0 0x38
+
+#------------------------------------------------------------------------------
+# Load-store register pair (offset)
+#------------------------------------------------------------------------------
+
+# Unpredictable if Rt == Rt2 on a load.
+
+0xe3 0x0f 0x40 0xa9
+# CHECK:  warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe3 0x0f 0x40 0xa9
+# CHECK-NEXT: ^
+
+0xe2 0x8b 0x41 0x69
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe2 0x8b 0x41 0x69
+# CHECK-NEXT: ^
+
+0x82 0x88 0x40 0x2d
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x82 0x88 0x40 0x2d
+# CHECK-NEXT: ^
+
+#------------------------------------------------------------------------------
+# Load-store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+# Unpredictable if Rt == Rt2 on a load.
+
+0xe3 0x0f 0xc0 0xa8
+# CHECK:  warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe3 0x0f 0xc0 0xa8
+# CHECK-NEXT: ^
+
+0xe2 0x8b 0xc1 0x68
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0xe2 0x8b 0xc1 0x68
+# CHECK-NEXT: ^
+
+0x82 0x88 0xc0 0x2c
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x82 0x88 0xc0 0x2c
+# CHECK-NEXT: ^
+
+# Also unpredictable if writeback clashes with either transfer register
+
+0x63 0x94 0xc0 0xa8
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x63 0x94 0xc0 0xa8
+
+0x69 0x2d 0x81 0xa8
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x69 0x2d 0x81 0xa8
+
+0x29 0xad 0xc0 0x28
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: 0x29 0xad 0xc0 0x28
+
diff --git a/test/MC/Disassembler/AArch64/gicv3-regs.txt b/test/MC/Disassembler/AArch64/gicv3-regs.txt
new file mode 100644
index 000000000000..4351f6460c75
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/gicv3-regs.txt
@@ -0,0 +1,222 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s
+
+0x8 0xcc 0x38 0xd5
+# CHECK: mrs      x8, icc_iar1_el1
+0x1a 0xc8 0x38 0xd5
+# CHECK: mrs      x26, icc_iar0_el1
+0x42 0xcc 0x38 0xd5
+# CHECK: mrs      x2, icc_hppir1_el1
+0x51 0xc8 0x38 0xd5
+# CHECK: mrs      x17, icc_hppir0_el1
+0x7d 0xcb 0x38 0xd5
+# CHECK: mrs      x29, icc_rpr_el1
+0x24 0xcb 0x3c 0xd5
+# CHECK: mrs      x4, ich_vtr_el2
+0x78 0xcb 0x3c 0xd5
+# CHECK: mrs      x24, ich_eisr_el2
+0xa9 0xcb 0x3c 0xd5
+# CHECK: mrs      x9, ich_elsr_el2
+0x78 0xcc 0x38 0xd5
+# CHECK: mrs      x24, icc_bpr1_el1
+0x6e 0xc8 0x38 0xd5
+# CHECK: mrs      x14, icc_bpr0_el1
+0x13 0x46 0x38 0xd5
+# CHECK: mrs      x19, icc_pmr_el1
+0x97 0xcc 0x38 0xd5
+# CHECK: mrs      x23, icc_ctlr_el1
+0x94 0xcc 0x3e 0xd5
+# CHECK: mrs      x20, icc_ctlr_el3
+0xbc 0xcc 0x38 0xd5
+# CHECK: mrs      x28, icc_sre_el1
+0xb9 0xc9 0x3c 0xd5
+# CHECK: mrs      x25, icc_sre_el2
+0xa8 0xcc 0x3e 0xd5
+# CHECK: mrs      x8, icc_sre_el3
+0xd6 0xcc 0x38 0xd5
+# CHECK: mrs      x22, icc_igrpen0_el1
+0xe5 0xcc 0x38 0xd5
+# CHECK: mrs      x5, icc_igrpen1_el1
+0xe7 0xcc 0x3e 0xd5
+# CHECK: mrs      x7, icc_igrpen1_el3
+0x16 0xcd 0x38 0xd5
+# CHECK: mrs      x22, icc_seien_el1
+0x84 0xc8 0x38 0xd5
+# CHECK: mrs      x4, icc_ap0r0_el1
+0xab 0xc8 0x38 0xd5
+# CHECK: mrs      x11, icc_ap0r1_el1
+0xdb 0xc8 0x38 0xd5
+# CHECK: mrs      x27, icc_ap0r2_el1
+0xf5 0xc8 0x38 0xd5
+# CHECK: mrs      x21, icc_ap0r3_el1
+0x2 0xc9 0x38 0xd5
+# CHECK: mrs      x2, icc_ap1r0_el1
+0x35 0xc9 0x38 0xd5
+# CHECK: mrs      x21, icc_ap1r1_el1
+0x4a 0xc9 0x38 0xd5
+# CHECK: mrs      x10, icc_ap1r2_el1
+0x7b 0xc9 0x38 0xd5
+# CHECK: mrs      x27, icc_ap1r3_el1
+0x14 0xc8 0x3c 0xd5
+# CHECK: mrs      x20, ich_ap0r0_el2
+0x35 0xc8 0x3c 0xd5
+# CHECK: mrs      x21, ich_ap0r1_el2
+0x45 0xc8 0x3c 0xd5
+# CHECK: mrs      x5, ich_ap0r2_el2
+0x64 0xc8 0x3c 0xd5
+# CHECK: mrs      x4, ich_ap0r3_el2
+0xf 0xc9 0x3c 0xd5
+# CHECK: mrs      x15, ich_ap1r0_el2
+0x2c 0xc9 0x3c 0xd5
+# CHECK: mrs      x12, ich_ap1r1_el2
+0x5b 0xc9 0x3c 0xd5
+# CHECK: mrs      x27, ich_ap1r2_el2
+0x74 0xc9 0x3c 0xd5
+# CHECK: mrs      x20, ich_ap1r3_el2
+0xa 0xcb 0x3c 0xd5
+# CHECK: mrs      x10, ich_hcr_el2
+0x5b 0xcb 0x3c 0xd5
+# CHECK: mrs      x27, ich_misr_el2
+0xe6 0xcb 0x3c 0xd5
+# CHECK: mrs      x6, ich_vmcr_el2
+0x93 0xc9 0x3c 0xd5
+# CHECK: mrs      x19, ich_vseir_el2
+0x3 0xcc 0x3c 0xd5
+# CHECK: mrs      x3, ich_lr0_el2
+0x21 0xcc 0x3c 0xd5
+# CHECK: mrs      x1, ich_lr1_el2
+0x56 0xcc 0x3c 0xd5
+# CHECK: mrs      x22, ich_lr2_el2
+0x75 0xcc 0x3c 0xd5
+# CHECK: mrs      x21, ich_lr3_el2
+0x86 0xcc 0x3c 0xd5
+# CHECK: mrs      x6, ich_lr4_el2
+0xaa 0xcc 0x3c 0xd5
+# CHECK: mrs      x10, ich_lr5_el2
+0xcb 0xcc 0x3c 0xd5
+# CHECK: mrs      x11, ich_lr6_el2
+0xec 0xcc 0x3c 0xd5
+# CHECK: mrs      x12, ich_lr7_el2
+0x0 0xcd 0x3c 0xd5
+# CHECK: mrs      x0, ich_lr8_el2
+0x35 0xcd 0x3c 0xd5
+# CHECK: mrs      x21, ich_lr9_el2
+0x4d 0xcd 0x3c 0xd5
+# CHECK: mrs      x13, ich_lr10_el2
+0x7a 0xcd 0x3c 0xd5
+# CHECK: mrs      x26, ich_lr11_el2
+0x81 0xcd 0x3c 0xd5
+# CHECK: mrs      x1, ich_lr12_el2
+0xa8 0xcd 0x3c 0xd5
+# CHECK: mrs      x8, ich_lr13_el2
+0xc2 0xcd 0x3c 0xd5
+# CHECK: mrs      x2, ich_lr14_el2
+0xe8 0xcd 0x3c 0xd5
+# CHECK: mrs      x8, ich_lr15_el2
+0x3b 0xcc 0x18 0xd5
+# CHECK: msr      icc_eoir1_el1, x27
+0x25 0xc8 0x18 0xd5
+# CHECK: msr      icc_eoir0_el1, x5
+0x2d 0xcb 0x18 0xd5
+# CHECK: msr      icc_dir_el1, x13
+0xb5 0xcb 0x18 0xd5
+# CHECK: msr      icc_sgi1r_el1, x21
+0xd9 0xcb 0x18 0xd5
+# CHECK: msr      icc_asgi1r_el1, x25
+0xfc 0xcb 0x18 0xd5
+# CHECK: msr      icc_sgi0r_el1, x28
+0x67 0xcc 0x18 0xd5
+# CHECK: msr      icc_bpr1_el1, x7
+0x69 0xc8 0x18 0xd5
+# CHECK: msr      icc_bpr0_el1, x9
+0x1d 0x46 0x18 0xd5
+# CHECK: msr      icc_pmr_el1, x29
+0x98 0xcc 0x18 0xd5
+# CHECK: msr      icc_ctlr_el1, x24
+0x80 0xcc 0x1e 0xd5
+# CHECK: msr      icc_ctlr_el3, x0
+0xa2 0xcc 0x18 0xd5
+# CHECK: msr      icc_sre_el1, x2
+0xa5 0xc9 0x1c 0xd5
+# CHECK: msr      icc_sre_el2, x5
+0xaa 0xcc 0x1e 0xd5
+# CHECK: msr      icc_sre_el3, x10
+0xd6 0xcc 0x18 0xd5
+# CHECK: msr      icc_igrpen0_el1, x22
+0xeb 0xcc 0x18 0xd5
+# CHECK: msr      icc_igrpen1_el1, x11
+0xe8 0xcc 0x1e 0xd5
+# CHECK: msr      icc_igrpen1_el3, x8
+0x4 0xcd 0x18 0xd5
+# CHECK: msr      icc_seien_el1, x4
+0x9b 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r0_el1, x27
+0xa5 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r1_el1, x5
+0xd4 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r2_el1, x20
+0xe0 0xc8 0x18 0xd5
+# CHECK: msr      icc_ap0r3_el1, x0
+0x2 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r0_el1, x2
+0x3d 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r1_el1, x29
+0x57 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r2_el1, x23
+0x6b 0xc9 0x18 0xd5
+# CHECK: msr      icc_ap1r3_el1, x11
+0x2 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r0_el2, x2
+0x3b 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r1_el2, x27
+0x47 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r2_el2, x7
+0x61 0xc8 0x1c 0xd5
+# CHECK: msr      ich_ap0r3_el2, x1
+0x7 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r0_el2, x7
+0x2c 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r1_el2, x12
+0x4e 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r2_el2, x14
+0x6d 0xc9 0x1c 0xd5
+# CHECK: msr      ich_ap1r3_el2, x13
+0x1 0xcb 0x1c 0xd5
+# CHECK: msr      ich_hcr_el2, x1
+0x4a 0xcb 0x1c 0xd5
+# CHECK: msr      ich_misr_el2, x10
+0xf8 0xcb 0x1c 0xd5
+# CHECK: msr      ich_vmcr_el2, x24
+0x9d 0xc9 0x1c 0xd5
+# CHECK: msr      ich_vseir_el2, x29
+0x1a 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr0_el2, x26
+0x29 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr1_el2, x9
+0x52 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr2_el2, x18
+0x7a 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr3_el2, x26
+0x96 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr4_el2, x22
+0xba 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr5_el2, x26
+0xdb 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr6_el2, x27
+0xe8 0xcc 0x1c 0xd5
+# CHECK: msr      ich_lr7_el2, x8
+0x11 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr8_el2, x17
+0x33 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr9_el2, x19
+0x51 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr10_el2, x17
+0x65 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr11_el2, x5
+0x9d 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr12_el2, x29
+0xa2 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr13_el2, x2
+0xcd 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr14_el2, x13
+0xfb 0xcd 0x1c 0xd5
+# CHECK: msr      ich_lr15_el2, x27
diff --git a/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
new file mode 100644
index 000000000000..7ff495f4996d
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-offset-predictable.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# Stores are OK.
+0xe0 0x83 0x00 0xa9
+# CHECK-NOT: potentially undefined instruction encoding
+# CHECK: stp x0, x0, [sp, #8]
+
diff --git a/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
new file mode 100644
index 000000000000..775660bba8a3
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-postind.predictable.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# None of these instructions should be classified as unpredictable:
+
+# CHECK-NOT: potentially undefined instruction encoding
+
+# Stores from duplicated registers should be fine.
+0xe3 0x0f 0x80 0xa8
+# CHECK: stp x3, x3, [sp], #0
+
+# d5 != x5 so "ldp d5, d6, [x5], #24" is fine.
+0xa5 0x98 0xc1 0x6c
+# CHECK: ldp d5, d6, [x5], #24
+
+# xzr != sp so "stp xzr, xzr, [sp], #8" is fine.
+0xff 0xff 0x80 0xa8
+# CHECK: stp xzr, xzr, [sp], #8
diff --git a/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
new file mode 100644
index 000000000000..48ea8170ba9e
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/ldp-preind.predictable.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=aarch64 -disassemble < %s 2>&1 | FileCheck %s
+
+# None of these instructions should be classified as unpredictable:
+
+# CHECK-NOT: potentially undefined instruction encoding
+
+# Stores from duplicated registers should be fine.
+0xe3 0x0f 0x80 0xa9
+# CHECK: stp x3, x3, [sp, #0]!
+
+# d5 != x5 so "ldp d5, d6, [x5, #24]!" is fine.
+0xa5 0x98 0xc1 0x6d
+# CHECK: ldp d5, d6, [x5, #24]!
+
+# xzr != sp so "stp xzr, xzr, [sp, #8]!" is fine.
+0xff 0xff 0x80 0xa9
+# CHECK: stp xzr, xzr, [sp, #8]!
diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..f9df30e4d3df
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'AArch64' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/AArch64/trace-regs.txt b/test/MC/Disassembler/AArch64/trace-regs.txt
new file mode 100644
index 000000000000..10c5937f5dea
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/trace-regs.txt
@@ -0,0 +1,736 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s
+
+0x8 0x3 0x31 0xd5
+# CHECK: mrs      x8, trcstatr
+0xc9 0x0 0x31 0xd5
+# CHECK: mrs      x9, trcidr8
+0xcb 0x1 0x31 0xd5
+# CHECK: mrs      x11, trcidr9
+0xd9 0x2 0x31 0xd5
+# CHECK: mrs      x25, trcidr10
+0xc7 0x3 0x31 0xd5
+# CHECK: mrs      x7, trcidr11
+0xc7 0x4 0x31 0xd5
+# CHECK: mrs      x7, trcidr12
+0xc6 0x5 0x31 0xd5
+# CHECK: mrs      x6, trcidr13
+0xfb 0x8 0x31 0xd5
+# CHECK: mrs      x27, trcidr0
+0xfd 0x9 0x31 0xd5
+# CHECK: mrs      x29, trcidr1
+0xe4 0xa 0x31 0xd5
+# CHECK: mrs      x4, trcidr2
+0xe8 0xb 0x31 0xd5
+# CHECK: mrs      x8, trcidr3
+0xef 0xc 0x31 0xd5
+# CHECK: mrs      x15, trcidr4
+0xf4 0xd 0x31 0xd5
+# CHECK: mrs      x20, trcidr5
+0xe6 0xe 0x31 0xd5
+# CHECK: mrs      x6, trcidr6
+0xe6 0xf 0x31 0xd5
+# CHECK: mrs      x6, trcidr7
+0x98 0x11 0x31 0xd5
+# CHECK: mrs      x24, trcoslsr
+0x92 0x15 0x31 0xd5
+# CHECK: mrs      x18, trcpdsr
+0xdc 0x7a 0x31 0xd5
+# CHECK: mrs      x28, trcdevaff0
+0xc5 0x7b 0x31 0xd5
+# CHECK: mrs      x5, trcdevaff1
+0xc5 0x7d 0x31 0xd5
+# CHECK: mrs      x5, trclsr
+0xcb 0x7e 0x31 0xd5
+# CHECK: mrs      x11, trcauthstatus
+0xcd 0x7f 0x31 0xd5
+# CHECK: mrs      x13, trcdevarch
+0xf2 0x72 0x31 0xd5
+# CHECK: mrs      x18, trcdevid
+0xf6 0x73 0x31 0xd5
+# CHECK: mrs      x22, trcdevtype
+0xee 0x74 0x31 0xd5
+# CHECK: mrs      x14, trcpidr4
+0xe5 0x75 0x31 0xd5
+# CHECK: mrs      x5, trcpidr5
+0xe5 0x76 0x31 0xd5
+# CHECK: mrs      x5, trcpidr6
+0xe9 0x77 0x31 0xd5
+# CHECK: mrs      x9, trcpidr7
+0xef 0x78 0x31 0xd5
+# CHECK: mrs      x15, trcpidr0
+0xe6 0x79 0x31 0xd5
+# CHECK: mrs      x6, trcpidr1
+0xeb 0x7a 0x31 0xd5
+# CHECK: mrs      x11, trcpidr2
+0xf4 0x7b 0x31 0xd5
+# CHECK: mrs      x20, trcpidr3
+0xf1 0x7c 0x31 0xd5
+# CHECK: mrs      x17, trccidr0
+0xe2 0x7d 0x31 0xd5
+# CHECK: mrs      x2, trccidr1
+0xf4 0x7e 0x31 0xd5
+# CHECK: mrs      x20, trccidr2
+0xe4 0x7f 0x31 0xd5
+# CHECK: mrs      x4, trccidr3
+0xb 0x1 0x31 0xd5
+# CHECK: mrs      x11, trcprgctlr
+0x17 0x2 0x31 0xd5
+# CHECK: mrs      x23, trcprocselr
+0xd 0x4 0x31 0xd5
+# CHECK: mrs      x13, trcconfigr
+0x17 0x6 0x31 0xd5
+# CHECK: mrs      x23, trcauxctlr
+0x9 0x8 0x31 0xd5
+# CHECK: mrs      x9, trceventctl0r
+0x10 0x9 0x31 0xd5
+# CHECK: mrs      x16, trceventctl1r
+0x4 0xb 0x31 0xd5
+# CHECK: mrs      x4, trcstallctlr
+0xe 0xc 0x31 0xd5
+# CHECK: mrs      x14, trctsctlr
+0x18 0xd 0x31 0xd5
+# CHECK: mrs      x24, trcsyncpr
+0x1c 0xe 0x31 0xd5
+# CHECK: mrs      x28, trcccctlr
+0xf 0xf 0x31 0xd5
+# CHECK: mrs      x15, trcbbctlr
+0x21 0x0 0x31 0xd5
+# CHECK: mrs      x1, trctraceidr
+0x34 0x1 0x31 0xd5
+# CHECK: mrs      x20, trcqctlr
+0x42 0x0 0x31 0xd5
+# CHECK: mrs      x2, trcvictlr
+0x4c 0x1 0x31 0xd5
+# CHECK: mrs      x12, trcviiectlr
+0x50 0x2 0x31 0xd5
+# CHECK: mrs      x16, trcvissctlr
+0x48 0x3 0x31 0xd5
+# CHECK: mrs      x8, trcvipcssctlr
+0x5b 0x8 0x31 0xd5
+# CHECK: mrs      x27, trcvdctlr
+0x49 0x9 0x31 0xd5
+# CHECK: mrs      x9, trcvdsacctlr
+0x40 0xa 0x31 0xd5
+# CHECK: mrs      x0, trcvdarcctlr
+0x8d 0x0 0x31 0xd5
+# CHECK: mrs      x13, trcseqevr0
+0x8b 0x1 0x31 0xd5
+# CHECK: mrs      x11, trcseqevr1
+0x9a 0x2 0x31 0xd5
+# CHECK: mrs      x26, trcseqevr2
+0x8e 0x6 0x31 0xd5
+# CHECK: mrs      x14, trcseqrstevr
+0x84 0x7 0x31 0xd5
+# CHECK: mrs      x4, trcseqstr
+0x91 0x8 0x31 0xd5
+# CHECK: mrs      x17, trcextinselr
+0xb5 0x0 0x31 0xd5
+# CHECK: mrs      x21, trccntrldvr0
+0xaa 0x1 0x31 0xd5
+# CHECK: mrs      x10, trccntrldvr1
+0xb4 0x2 0x31 0xd5
+# CHECK: mrs      x20, trccntrldvr2
+0xa5 0x3 0x31 0xd5
+# CHECK: mrs      x5, trccntrldvr3
+0xb1 0x4 0x31 0xd5
+# CHECK: mrs      x17, trccntctlr0
+0xa1 0x5 0x31 0xd5
+# CHECK: mrs      x1, trccntctlr1
+0xb1 0x6 0x31 0xd5
+# CHECK: mrs      x17, trccntctlr2
+0xa6 0x7 0x31 0xd5
+# CHECK: mrs      x6, trccntctlr3
+0xbc 0x8 0x31 0xd5
+# CHECK: mrs      x28, trccntvr0
+0xb7 0x9 0x31 0xd5
+# CHECK: mrs      x23, trccntvr1
+0xa9 0xa 0x31 0xd5
+# CHECK: mrs      x9, trccntvr2
+0xa6 0xb 0x31 0xd5
+# CHECK: mrs      x6, trccntvr3
+0xf8 0x0 0x31 0xd5
+# CHECK: mrs      x24, trcimspec0
+0xf8 0x1 0x31 0xd5
+# CHECK: mrs      x24, trcimspec1
+0xef 0x2 0x31 0xd5
+# CHECK: mrs      x15, trcimspec2
+0xea 0x3 0x31 0xd5
+# CHECK: mrs      x10, trcimspec3
+0xfd 0x4 0x31 0xd5
+# CHECK: mrs      x29, trcimspec4
+0xf2 0x5 0x31 0xd5
+# CHECK: mrs      x18, trcimspec5
+0xfd 0x6 0x31 0xd5
+# CHECK: mrs      x29, trcimspec6
+0xe2 0x7 0x31 0xd5
+# CHECK: mrs      x2, trcimspec7
+0x8 0x12 0x31 0xd5
+# CHECK: mrs      x8, trcrsctlr2
+0x0 0x13 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr3
+0xc 0x14 0x31 0xd5
+# CHECK: mrs      x12, trcrsctlr4
+0x1a 0x15 0x31 0xd5
+# CHECK: mrs      x26, trcrsctlr5
+0x1d 0x16 0x31 0xd5
+# CHECK: mrs      x29, trcrsctlr6
+0x11 0x17 0x31 0xd5
+# CHECK: mrs      x17, trcrsctlr7
+0x0 0x18 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr8
+0x1 0x19 0x31 0xd5
+# CHECK: mrs      x1, trcrsctlr9
+0x11 0x1a 0x31 0xd5
+# CHECK: mrs      x17, trcrsctlr10
+0x15 0x1b 0x31 0xd5
+# CHECK: mrs      x21, trcrsctlr11
+0x1 0x1c 0x31 0xd5
+# CHECK: mrs      x1, trcrsctlr12
+0x8 0x1d 0x31 0xd5
+# CHECK: mrs      x8, trcrsctlr13
+0x18 0x1e 0x31 0xd5
+# CHECK: mrs      x24, trcrsctlr14
+0x0 0x1f 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr15
+0x22 0x10 0x31 0xd5
+# CHECK: mrs      x2, trcrsctlr16
+0x3d 0x11 0x31 0xd5
+# CHECK: mrs      x29, trcrsctlr17
+0x36 0x12 0x31 0xd5
+# CHECK: mrs      x22, trcrsctlr18
+0x26 0x13 0x31 0xd5
+# CHECK: mrs      x6, trcrsctlr19
+0x3a 0x14 0x31 0xd5
+# CHECK: mrs      x26, trcrsctlr20
+0x3a 0x15 0x31 0xd5
+# CHECK: mrs      x26, trcrsctlr21
+0x24 0x16 0x31 0xd5
+# CHECK: mrs      x4, trcrsctlr22
+0x2c 0x17 0x31 0xd5
+# CHECK: mrs      x12, trcrsctlr23
+0x21 0x18 0x31 0xd5
+# CHECK: mrs      x1, trcrsctlr24
+0x20 0x19 0x31 0xd5
+# CHECK: mrs      x0, trcrsctlr25
+0x31 0x1a 0x31 0xd5
+# CHECK: mrs      x17, trcrsctlr26
+0x28 0x1b 0x31 0xd5
+# CHECK: mrs      x8, trcrsctlr27
+0x2a 0x1c 0x31 0xd5
+# CHECK: mrs      x10, trcrsctlr28
+0x39 0x1d 0x31 0xd5
+# CHECK: mrs      x25, trcrsctlr29
+0x2c 0x1e 0x31 0xd5
+# CHECK: mrs      x12, trcrsctlr30
+0x2b 0x1f 0x31 0xd5
+# CHECK: mrs      x11, trcrsctlr31
+0x52 0x10 0x31 0xd5
+# CHECK: mrs      x18, trcssccr0
+0x4c 0x11 0x31 0xd5
+# CHECK: mrs      x12, trcssccr1
+0x43 0x12 0x31 0xd5
+# CHECK: mrs      x3, trcssccr2
+0x42 0x13 0x31 0xd5
+# CHECK: mrs      x2, trcssccr3
+0x55 0x14 0x31 0xd5
+# CHECK: mrs      x21, trcssccr4
+0x4a 0x15 0x31 0xd5
+# CHECK: mrs      x10, trcssccr5
+0x56 0x16 0x31 0xd5
+# CHECK: mrs      x22, trcssccr6
+0x57 0x17 0x31 0xd5
+# CHECK: mrs      x23, trcssccr7
+0x57 0x18 0x31 0xd5
+# CHECK: mrs      x23, trcsscsr0
+0x53 0x19 0x31 0xd5
+# CHECK: mrs      x19, trcsscsr1
+0x59 0x1a 0x31 0xd5
+# CHECK: mrs      x25, trcsscsr2
+0x51 0x1b 0x31 0xd5
+# CHECK: mrs      x17, trcsscsr3
+0x53 0x1c 0x31 0xd5
+# CHECK: mrs      x19, trcsscsr4
+0x4b 0x1d 0x31 0xd5
+# CHECK: mrs      x11, trcsscsr5
+0x45 0x1e 0x31 0xd5
+# CHECK: mrs      x5, trcsscsr6
+0x49 0x1f 0x31 0xd5
+# CHECK: mrs      x9, trcsscsr7
+0x9a 0x14 0x31 0xd5
+# CHECK: mrs      x26, trcpdcr
+0x8 0x20 0x31 0xd5
+# CHECK: mrs      x8, trcacvr0
+0xf 0x22 0x31 0xd5
+# CHECK: mrs      x15, trcacvr1
+0x13 0x24 0x31 0xd5
+# CHECK: mrs      x19, trcacvr2
+0x8 0x26 0x31 0xd5
+# CHECK: mrs      x8, trcacvr3
+0x1c 0x28 0x31 0xd5
+# CHECK: mrs      x28, trcacvr4
+0x3 0x2a 0x31 0xd5
+# CHECK: mrs      x3, trcacvr5
+0x19 0x2c 0x31 0xd5
+# CHECK: mrs      x25, trcacvr6
+0x18 0x2e 0x31 0xd5
+# CHECK: mrs      x24, trcacvr7
+0x26 0x20 0x31 0xd5
+# CHECK: mrs      x6, trcacvr8
+0x23 0x22 0x31 0xd5
+# CHECK: mrs      x3, trcacvr9
+0x38 0x24 0x31 0xd5
+# CHECK: mrs      x24, trcacvr10
+0x23 0x26 0x31 0xd5
+# CHECK: mrs      x3, trcacvr11
+0x2c 0x28 0x31 0xd5
+# CHECK: mrs      x12, trcacvr12
+0x29 0x2a 0x31 0xd5
+# CHECK: mrs      x9, trcacvr13
+0x2e 0x2c 0x31 0xd5
+# CHECK: mrs      x14, trcacvr14
+0x23 0x2e 0x31 0xd5
+# CHECK: mrs      x3, trcacvr15
+0x55 0x20 0x31 0xd5
+# CHECK: mrs      x21, trcacatr0
+0x5a 0x22 0x31 0xd5
+# CHECK: mrs      x26, trcacatr1
+0x48 0x24 0x31 0xd5
+# CHECK: mrs      x8, trcacatr2
+0x56 0x26 0x31 0xd5
+# CHECK: mrs      x22, trcacatr3
+0x46 0x28 0x31 0xd5
+# CHECK: mrs      x6, trcacatr4
+0x5d 0x2a 0x31 0xd5
+# CHECK: mrs      x29, trcacatr5
+0x45 0x2c 0x31 0xd5
+# CHECK: mrs      x5, trcacatr6
+0x52 0x2e 0x31 0xd5
+# CHECK: mrs      x18, trcacatr7
+0x62 0x20 0x31 0xd5
+# CHECK: mrs      x2, trcacatr8
+0x73 0x22 0x31 0xd5
+# CHECK: mrs      x19, trcacatr9
+0x6d 0x24 0x31 0xd5
+# CHECK: mrs      x13, trcacatr10
+0x79 0x26 0x31 0xd5
+# CHECK: mrs      x25, trcacatr11
+0x72 0x28 0x31 0xd5
+# CHECK: mrs      x18, trcacatr12
+0x7d 0x2a 0x31 0xd5
+# CHECK: mrs      x29, trcacatr13
+0x69 0x2c 0x31 0xd5
+# CHECK: mrs      x9, trcacatr14
+0x72 0x2e 0x31 0xd5
+# CHECK: mrs      x18, trcacatr15
+0x9d 0x20 0x31 0xd5
+# CHECK: mrs      x29, trcdvcvr0
+0x8f 0x24 0x31 0xd5
+# CHECK: mrs      x15, trcdvcvr1
+0x8f 0x28 0x31 0xd5
+# CHECK: mrs      x15, trcdvcvr2
+0x8f 0x2c 0x31 0xd5
+# CHECK: mrs      x15, trcdvcvr3
+0xb3 0x20 0x31 0xd5
+# CHECK: mrs      x19, trcdvcvr4
+0xb6 0x24 0x31 0xd5
+# CHECK: mrs      x22, trcdvcvr5
+0xbb 0x28 0x31 0xd5
+# CHECK: mrs      x27, trcdvcvr6
+0xa1 0x2c 0x31 0xd5
+# CHECK: mrs      x1, trcdvcvr7
+0xdd 0x20 0x31 0xd5
+# CHECK: mrs      x29, trcdvcmr0
+0xc9 0x24 0x31 0xd5
+# CHECK: mrs      x9, trcdvcmr1
+0xc1 0x28 0x31 0xd5
+# CHECK: mrs      x1, trcdvcmr2
+0xc2 0x2c 0x31 0xd5
+# CHECK: mrs      x2, trcdvcmr3
+0xe5 0x20 0x31 0xd5
+# CHECK: mrs      x5, trcdvcmr4
+0xf5 0x24 0x31 0xd5
+# CHECK: mrs      x21, trcdvcmr5
+0xe5 0x28 0x31 0xd5
+# CHECK: mrs      x5, trcdvcmr6
+0xe1 0x2c 0x31 0xd5
+# CHECK: mrs      x1, trcdvcmr7
+0x15 0x30 0x31 0xd5
+# CHECK: mrs      x21, trccidcvr0
+0x18 0x32 0x31 0xd5
+# CHECK: mrs      x24, trccidcvr1
+0x18 0x34 0x31 0xd5
+# CHECK: mrs      x24, trccidcvr2
+0xc 0x36 0x31 0xd5
+# CHECK: mrs      x12, trccidcvr3
+0xa 0x38 0x31 0xd5
+# CHECK: mrs      x10, trccidcvr4
+0x9 0x3a 0x31 0xd5
+# CHECK: mrs      x9, trccidcvr5
+0x6 0x3c 0x31 0xd5
+# CHECK: mrs      x6, trccidcvr6
+0x14 0x3e 0x31 0xd5
+# CHECK: mrs      x20, trccidcvr7
+0x34 0x30 0x31 0xd5
+# CHECK: mrs      x20, trcvmidcvr0
+0x34 0x32 0x31 0xd5
+# CHECK: mrs      x20, trcvmidcvr1
+0x3a 0x34 0x31 0xd5
+# CHECK: mrs      x26, trcvmidcvr2
+0x21 0x36 0x31 0xd5
+# CHECK: mrs      x1, trcvmidcvr3
+0x2e 0x38 0x31 0xd5
+# CHECK: mrs      x14, trcvmidcvr4
+0x3b 0x3a 0x31 0xd5
+# CHECK: mrs      x27, trcvmidcvr5
+0x3d 0x3c 0x31 0xd5
+# CHECK: mrs      x29, trcvmidcvr6
+0x31 0x3e 0x31 0xd5
+# CHECK: mrs      x17, trcvmidcvr7
+0x4a 0x30 0x31 0xd5
+# CHECK: mrs      x10, trccidcctlr0
+0x44 0x31 0x31 0xd5
+# CHECK: mrs      x4, trccidcctlr1
+0x49 0x32 0x31 0xd5
+# CHECK: mrs      x9, trcvmidcctlr0
+0x4b 0x33 0x31 0xd5
+# CHECK: mrs      x11, trcvmidcctlr1
+0x96 0x70 0x31 0xd5
+# CHECK: mrs      x22, trcitctrl
+0xd7 0x78 0x31 0xd5
+# CHECK: mrs      x23, trcclaimset
+0xce 0x79 0x31 0xd5
+# CHECK: mrs      x14, trcclaimclr
+0x9c 0x10 0x11 0xd5
+# CHECK: msr      trcoslar, x28
+0xce 0x7c 0x11 0xd5
+# CHECK: msr      trclar, x14
+0xa 0x1 0x11 0xd5
+# CHECK: msr      trcprgctlr, x10
+0x1b 0x2 0x11 0xd5
+# CHECK: msr      trcprocselr, x27
+0x18 0x4 0x11 0xd5
+# CHECK: msr      trcconfigr, x24
+0x8 0x6 0x11 0xd5
+# CHECK: msr      trcauxctlr, x8
+0x10 0x8 0x11 0xd5
+# CHECK: msr      trceventctl0r, x16
+0x1b 0x9 0x11 0xd5
+# CHECK: msr      trceventctl1r, x27
+0x1a 0xb 0x11 0xd5
+# CHECK: msr      trcstallctlr, x26
+0x0 0xc 0x11 0xd5
+# CHECK: msr      trctsctlr, x0
+0xe 0xd 0x11 0xd5
+# CHECK: msr      trcsyncpr, x14
+0x8 0xe 0x11 0xd5
+# CHECK: msr      trcccctlr, x8
+0x6 0xf 0x11 0xd5
+# CHECK: msr      trcbbctlr, x6
+0x37 0x0 0x11 0xd5
+# CHECK: msr      trctraceidr, x23
+0x25 0x1 0x11 0xd5
+# CHECK: msr      trcqctlr, x5
+0x40 0x0 0x11 0xd5
+# CHECK: msr      trcvictlr, x0
+0x40 0x1 0x11 0xd5
+# CHECK: msr      trcviiectlr, x0
+0x41 0x2 0x11 0xd5
+# CHECK: msr      trcvissctlr, x1
+0x40 0x3 0x11 0xd5
+# CHECK: msr      trcvipcssctlr, x0
+0x47 0x8 0x11 0xd5
+# CHECK: msr      trcvdctlr, x7
+0x52 0x9 0x11 0xd5
+# CHECK: msr      trcvdsacctlr, x18
+0x58 0xa 0x11 0xd5
+# CHECK: msr      trcvdarcctlr, x24
+0x9c 0x0 0x11 0xd5
+# CHECK: msr      trcseqevr0, x28
+0x95 0x1 0x11 0xd5
+# CHECK: msr      trcseqevr1, x21
+0x90 0x2 0x11 0xd5
+# CHECK: msr      trcseqevr2, x16
+0x90 0x6 0x11 0xd5
+# CHECK: msr      trcseqrstevr, x16
+0x99 0x7 0x11 0xd5
+# CHECK: msr      trcseqstr, x25
+0x9d 0x8 0x11 0xd5
+# CHECK: msr      trcextinselr, x29
+0xb4 0x0 0x11 0xd5
+# CHECK: msr      trccntrldvr0, x20
+0xb4 0x1 0x11 0xd5
+# CHECK: msr      trccntrldvr1, x20
+0xb6 0x2 0x11 0xd5
+# CHECK: msr      trccntrldvr2, x22
+0xac 0x3 0x11 0xd5
+# CHECK: msr      trccntrldvr3, x12
+0xb4 0x4 0x11 0xd5
+# CHECK: msr      trccntctlr0, x20
+0xa4 0x5 0x11 0xd5
+# CHECK: msr      trccntctlr1, x4
+0xa8 0x6 0x11 0xd5
+# CHECK: msr      trccntctlr2, x8
+0xb0 0x7 0x11 0xd5
+# CHECK: msr      trccntctlr3, x16
+0xa5 0x8 0x11 0xd5
+# CHECK: msr      trccntvr0, x5
+0xbb 0x9 0x11 0xd5
+# CHECK: msr      trccntvr1, x27
+0xb5 0xa 0x11 0xd5
+# CHECK: msr      trccntvr2, x21
+0xa8 0xb 0x11 0xd5
+# CHECK: msr      trccntvr3, x8
+0xe6 0x0 0x11 0xd5
+# CHECK: msr      trcimspec0, x6
+0xfb 0x1 0x11 0xd5
+# CHECK: msr      trcimspec1, x27
+0xf7 0x2 0x11 0xd5
+# CHECK: msr      trcimspec2, x23
+0xef 0x3 0x11 0xd5
+# CHECK: msr      trcimspec3, x15
+0xed 0x4 0x11 0xd5
+# CHECK: msr      trcimspec4, x13
+0xf9 0x5 0x11 0xd5
+# CHECK: msr      trcimspec5, x25
+0xf3 0x6 0x11 0xd5
+# CHECK: msr      trcimspec6, x19
+0xfb 0x7 0x11 0xd5
+# CHECK: msr      trcimspec7, x27
+0x4 0x12 0x11 0xd5
+# CHECK: msr      trcrsctlr2, x4
+0x0 0x13 0x11 0xd5
+# CHECK: msr      trcrsctlr3, x0
+0x15 0x14 0x11 0xd5
+# CHECK: msr      trcrsctlr4, x21
+0x8 0x15 0x11 0xd5
+# CHECK: msr      trcrsctlr5, x8
+0x14 0x16 0x11 0xd5
+# CHECK: msr      trcrsctlr6, x20
+0xb 0x17 0x11 0xd5
+# CHECK: msr      trcrsctlr7, x11
+0x12 0x18 0x11 0xd5
+# CHECK: msr      trcrsctlr8, x18
+0x18 0x19 0x11 0xd5
+# CHECK: msr      trcrsctlr9, x24
+0xf 0x1a 0x11 0xd5
+# CHECK: msr      trcrsctlr10, x15
+0x15 0x1b 0x11 0xd5
+# CHECK: msr      trcrsctlr11, x21
+0x4 0x1c 0x11 0xd5
+# CHECK: msr      trcrsctlr12, x4
+0x1c 0x1d 0x11 0xd5
+# CHECK: msr      trcrsctlr13, x28
+0x3 0x1e 0x11 0xd5
+# CHECK: msr      trcrsctlr14, x3
+0x14 0x1f 0x11 0xd5
+# CHECK: msr      trcrsctlr15, x20
+0x2c 0x10 0x11 0xd5
+# CHECK: msr      trcrsctlr16, x12
+0x31 0x11 0x11 0xd5
+# CHECK: msr      trcrsctlr17, x17
+0x2a 0x12 0x11 0xd5
+# CHECK: msr      trcrsctlr18, x10
+0x2b 0x13 0x11 0xd5
+# CHECK: msr      trcrsctlr19, x11
+0x23 0x14 0x11 0xd5
+# CHECK: msr      trcrsctlr20, x3
+0x32 0x15 0x11 0xd5
+# CHECK: msr      trcrsctlr21, x18
+0x3a 0x16 0x11 0xd5
+# CHECK: msr      trcrsctlr22, x26
+0x25 0x17 0x11 0xd5
+# CHECK: msr      trcrsctlr23, x5
+0x39 0x18 0x11 0xd5
+# CHECK: msr      trcrsctlr24, x25
+0x25 0x19 0x11 0xd5
+# CHECK: msr      trcrsctlr25, x5
+0x24 0x1a 0x11 0xd5
+# CHECK: msr      trcrsctlr26, x4
+0x34 0x1b 0x11 0xd5
+# CHECK: msr      trcrsctlr27, x20
+0x25 0x1c 0x11 0xd5
+# CHECK: msr      trcrsctlr28, x5
+0x2a 0x1d 0x11 0xd5
+# CHECK: msr      trcrsctlr29, x10
+0x38 0x1e 0x11 0xd5
+# CHECK: msr      trcrsctlr30, x24
+0x34 0x1f 0x11 0xd5
+# CHECK: msr      trcrsctlr31, x20
+0x57 0x10 0x11 0xd5
+# CHECK: msr      trcssccr0, x23
+0x5b 0x11 0x11 0xd5
+# CHECK: msr      trcssccr1, x27
+0x5b 0x12 0x11 0xd5
+# CHECK: msr      trcssccr2, x27
+0x46 0x13 0x11 0xd5
+# CHECK: msr      trcssccr3, x6
+0x43 0x14 0x11 0xd5
+# CHECK: msr      trcssccr4, x3
+0x4c 0x15 0x11 0xd5
+# CHECK: msr      trcssccr5, x12
+0x47 0x16 0x11 0xd5
+# CHECK: msr      trcssccr6, x7
+0x46 0x17 0x11 0xd5
+# CHECK: msr      trcssccr7, x6
+0x54 0x18 0x11 0xd5
+# CHECK: msr      trcsscsr0, x20
+0x51 0x19 0x11 0xd5
+# CHECK: msr      trcsscsr1, x17
+0x4b 0x1a 0x11 0xd5
+# CHECK: msr      trcsscsr2, x11
+0x44 0x1b 0x11 0xd5
+# CHECK: msr      trcsscsr3, x4
+0x4e 0x1c 0x11 0xd5
+# CHECK: msr      trcsscsr4, x14
+0x56 0x1d 0x11 0xd5
+# CHECK: msr      trcsscsr5, x22
+0x43 0x1e 0x11 0xd5
+# CHECK: msr      trcsscsr6, x3
+0x4b 0x1f 0x11 0xd5
+# CHECK: msr      trcsscsr7, x11
+0x83 0x14 0x11 0xd5
+# CHECK: msr      trcpdcr, x3
+0x6 0x20 0x11 0xd5
+# CHECK: msr      trcacvr0, x6
+0x14 0x22 0x11 0xd5
+# CHECK: msr      trcacvr1, x20
+0x19 0x24 0x11 0xd5
+# CHECK: msr      trcacvr2, x25
+0x1 0x26 0x11 0xd5
+# CHECK: msr      trcacvr3, x1
+0x1c 0x28 0x11 0xd5
+# CHECK: msr      trcacvr4, x28
+0xf 0x2a 0x11 0xd5
+# CHECK: msr      trcacvr5, x15
+0x19 0x2c 0x11 0xd5
+# CHECK: msr      trcacvr6, x25
+0xc 0x2e 0x11 0xd5
+# CHECK: msr      trcacvr7, x12
+0x25 0x20 0x11 0xd5
+# CHECK: msr      trcacvr8, x5
+0x39 0x22 0x11 0xd5
+# CHECK: msr      trcacvr9, x25
+0x2d 0x24 0x11 0xd5
+# CHECK: msr      trcacvr10, x13
+0x2a 0x26 0x11 0xd5
+# CHECK: msr      trcacvr11, x10
+0x33 0x28 0x11 0xd5
+# CHECK: msr      trcacvr12, x19
+0x2a 0x2a 0x11 0xd5
+# CHECK: msr      trcacvr13, x10
+0x33 0x2c 0x11 0xd5
+# CHECK: msr      trcacvr14, x19
+0x22 0x2e 0x11 0xd5
+# CHECK: msr      trcacvr15, x2
+0x4f 0x20 0x11 0xd5
+# CHECK: msr      trcacatr0, x15
+0x4d 0x22 0x11 0xd5
+# CHECK: msr      trcacatr1, x13
+0x48 0x24 0x11 0xd5
+# CHECK: msr      trcacatr2, x8
+0x41 0x26 0x11 0xd5
+# CHECK: msr      trcacatr3, x1
+0x4b 0x28 0x11 0xd5
+# CHECK: msr      trcacatr4, x11
+0x48 0x2a 0x11 0xd5
+# CHECK: msr      trcacatr5, x8
+0x58 0x2c 0x11 0xd5
+# CHECK: msr      trcacatr6, x24
+0x46 0x2e 0x11 0xd5
+# CHECK: msr      trcacatr7, x6
+0x77 0x20 0x11 0xd5
+# CHECK: msr      trcacatr8, x23
+0x65 0x22 0x11 0xd5
+# CHECK: msr      trcacatr9, x5
+0x6b 0x24 0x11 0xd5
+# CHECK: msr      trcacatr10, x11
+0x6b 0x26 0x11 0xd5
+# CHECK: msr      trcacatr11, x11
+0x63 0x28 0x11 0xd5
+# CHECK: msr      trcacatr12, x3
+0x7c 0x2a 0x11 0xd5
+# CHECK: msr      trcacatr13, x28
+0x79 0x2c 0x11 0xd5
+# CHECK: msr      trcacatr14, x25
+0x64 0x2e 0x11 0xd5
+# CHECK: msr      trcacatr15, x4
+0x86 0x20 0x11 0xd5
+# CHECK: msr      trcdvcvr0, x6
+0x83 0x24 0x11 0xd5
+# CHECK: msr      trcdvcvr1, x3
+0x85 0x28 0x11 0xd5
+# CHECK: msr      trcdvcvr2, x5
+0x8b 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcvr3, x11
+0xa9 0x20 0x11 0xd5
+# CHECK: msr      trcdvcvr4, x9
+0xae 0x24 0x11 0xd5
+# CHECK: msr      trcdvcvr5, x14
+0xaa 0x28 0x11 0xd5
+# CHECK: msr      trcdvcvr6, x10
+0xac 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcvr7, x12
+0xc8 0x20 0x11 0xd5
+# CHECK: msr      trcdvcmr0, x8
+0xc8 0x24 0x11 0xd5
+# CHECK: msr      trcdvcmr1, x8
+0xd6 0x28 0x11 0xd5
+# CHECK: msr      trcdvcmr2, x22
+0xd6 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcmr3, x22
+0xe5 0x20 0x11 0xd5
+# CHECK: msr      trcdvcmr4, x5
+0xf0 0x24 0x11 0xd5
+# CHECK: msr      trcdvcmr5, x16
+0xfb 0x28 0x11 0xd5
+# CHECK: msr      trcdvcmr6, x27
+0xf5 0x2c 0x11 0xd5
+# CHECK: msr      trcdvcmr7, x21
+0x8 0x30 0x11 0xd5
+# CHECK: msr      trccidcvr0, x8
+0x6 0x32 0x11 0xd5
+# CHECK: msr      trccidcvr1, x6
+0x9 0x34 0x11 0xd5
+# CHECK: msr      trccidcvr2, x9
+0x8 0x36 0x11 0xd5
+# CHECK: msr      trccidcvr3, x8
+0x3 0x38 0x11 0xd5
+# CHECK: msr      trccidcvr4, x3
+0x15 0x3a 0x11 0xd5
+# CHECK: msr      trccidcvr5, x21
+0xc 0x3c 0x11 0xd5
+# CHECK: msr      trccidcvr6, x12
+0x7 0x3e 0x11 0xd5
+# CHECK: msr      trccidcvr7, x7
+0x24 0x30 0x11 0xd5
+# CHECK: msr      trcvmidcvr0, x4
+0x23 0x32 0x11 0xd5
+# CHECK: msr      trcvmidcvr1, x3
+0x29 0x34 0x11 0xd5
+# CHECK: msr      trcvmidcvr2, x9
+0x31 0x36 0x11 0xd5
+# CHECK: msr      trcvmidcvr3, x17
+0x2e 0x38 0x11 0xd5
+# CHECK: msr      trcvmidcvr4, x14
+0x2c 0x3a 0x11 0xd5
+# CHECK: msr      trcvmidcvr5, x12
+0x2a 0x3c 0x11 0xd5
+# CHECK: msr      trcvmidcvr6, x10
+0x23 0x3e 0x11 0xd5
+# CHECK: msr      trcvmidcvr7, x3
+0x4e 0x30 0x11 0xd5
+# CHECK: msr      trccidcctlr0, x14
+0x56 0x31 0x11 0xd5
+# CHECK: msr      trccidcctlr1, x22
+0x48 0x32 0x11 0xd5
+# CHECK: msr      trcvmidcctlr0, x8
+0x4f 0x33 0x11 0xd5
+# CHECK: msr      trcvmidcctlr1, x15
+0x81 0x70 0x11 0xd5
+# CHECK: msr      trcitctrl, x1
+0xc7 0x78 0x11 0xd5
+# CHECK: msr      trcclaimset, x7
+0xdd 0x79 0x11 0xd5
+# CHECK: msr      trcclaimclr, x29
+
+
diff --git a/test/MC/Disassembler/ARM/hex-immediates.txt b/test/MC/Disassembler/ARM/hex-immediates.txt
new file mode 100644
index 000000000000..2634d7ed3368
--- /dev/null
+++ b/test/MC/Disassembler/ARM/hex-immediates.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -hdis < %s | FileCheck %s
+# CHECK: ldr	r4, [pc, #0x20]
+0x08 0x4c
+# CHECK: sub	sp, #0x84
+0xa1 0xb0
diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
index 2d2a62811ae9..99da8ce9d85a 100644
--- a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
+++ b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
@@ -7,7 +7,7 @@
 # -------------------------------------------------------------------------------------------------
 #
 # A8.6.391 VST1 (multiple single elements)
-# This encoding looks like: vst1.8 {d0,d1,d2}, [r0, :128]
+# This encoding looks like: vst1.8 {d0,d1,d2}, [r0:128]
 # But bits 5-4 for the alignment of 128 encoded as align = 0b10, is available only if <list>
 # contains two or four registers.  rdar://11220250
 0x00 0xf9 0x2f 0x06
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index a7b6b1ccb408..65e9954ac68b 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -21,10 +21,10 @@
 # CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
 0x0f 0x41 0x22 0xf4
 
-# CHECK:	vld1.32	{d3[], d4[]}, [r0, :32]!
+# CHECK:	vld1.32	{d3[], d4[]}, [r0:32]!
 0xbd 0x3c 0xa0 0xf4
 
-# CHECK:	vld4.16	{d3[], d5[], d7[], d9[]}, [r0, :64]!
+# CHECK:	vld4.16	{d3[], d5[], d7[], d9[]}, [r0:64]!
 0x7d 0x3f 0xa0 0xf4
 
 # CHECK:	vorr	d0, d15, d15
@@ -75,7 +75,7 @@
 # CHECK:	vbic.i32	q2, #0xa900
 0x79 0x43 0x82 0xf3
 
-# CHECK:	vst2.32	{d16, d18}, [r2, :64], r2
+# CHECK:	vst2.32	{d16, d18}, [r2:64], r2
 0x92 0x9 0x42 0xf4
 
 # CHECK:	vmov.s8	r0, d8[1]
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index 649424af22fd..cd5f418b56c0 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -1638,7 +1638,7 @@
 
 
 0x1f 0x07 0x60 0xf4
-# CHECK: vld1.8	{d16}, [r0, :64]
+# CHECK: vld1.8	{d16}, [r0:64]
 0x4f 0x07 0x60 0xf4
 # CHECK: vld1.16	{d16}, [r0]
 0x8f 0x07 0x60 0xf4
@@ -1646,37 +1646,37 @@
 0xcf 0x07 0x60 0xf4
 # CHECK: vld1.64	{d16}, [r0]
 0x1f 0x0a 0x60 0xf4
-# CHECK: vld1.8	{d16, d17}, [r0, :64]
+# CHECK: vld1.8	{d16, d17}, [r0:64]
 0x6f 0x0a 0x60 0xf4
-# CHECK: vld1.16	{d16, d17}, [r0, :128]
+# CHECK: vld1.16	{d16, d17}, [r0:128]
 0x8f 0x0a 0x60 0xf4
 # CHECK: vld1.32	{d16, d17}, [r0]
 0xcf 0x0a 0x60 0xf4
 # CHECK: vld1.64	{d16, d17}, [r0]
 
 0x1f 0x08 0x60 0xf4
-# CHECK: vld2.8	{d16, d17}, [r0, :64]
+# CHECK: vld2.8	{d16, d17}, [r0:64]
 0x6f 0x08 0x60 0xf4
-# CHECK: vld2.16	{d16, d17}, [r0, :128]
+# CHECK: vld2.16	{d16, d17}, [r0:128]
 0x8f 0x08 0x60 0xf4
 # CHECK: vld2.32	{d16, d17}, [r0]
 0x1f 0x03 0x60 0xf4
-# CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x03 0x60 0xf4
-# CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128]
 0xbf 0x03 0x60 0xf4
-# CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x1f 0x04 0x60 0xf4
-# CHECK: vld3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vld3.8	{d16, d17, d18}, [r0:64]
 0x4f 0x04 0x60 0xf4
 # CHECK: vld3.16	{d16, d17, d18}, [r0]
 0x8f 0x04 0x60 0xf4
 # CHECK: vld3.32	{d16, d17, d18}, [r0]
 0x1d 0x05 0x60 0xf4
-# CHECK: vld3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vld3.8	{d16, d18, d20}, [r0:64]!
 0x1d 0x15 0x60 0xf4
-# CHECK: vld3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vld3.8	{d17, d19, d21}, [r0:64]!
 0x4d 0x05 0x60 0xf4
 # CHECK: vld3.16	{d16, d18, d20}, [r0]!
 0x4d 0x15 0x60 0xf4
@@ -1687,15 +1687,15 @@
 # CHECK: vld3.32	{d17, d19, d21}, [r0]!
 
 0x1f 0x00 0x60 0xf4
-# CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x00 0x60 0xf4
-# CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128]
 0xbf 0x00 0x60 0xf4
-# CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256]
 0x3d 0x01 0x60 0xf4
-# CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]!
 0x3d 0x11 0x60 0xf4
-# CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]!
 0x4d 0x01 0x60 0xf4
 # CHECK: vld4.16	{d16, d18, d20, d22}, [r0]!
 0x4d 0x11 0x60 0xf4
@@ -1708,20 +1708,20 @@
 0x6f 0x00 0xe0 0xf4
 # CHECK: vld1.8	{d16[3]}, [r0]
 0x9f 0x04 0xe0 0xf4
-# CHECK: vld1.16	{d16[2]}, [r0, :16]
+# CHECK: vld1.16	{d16[2]}, [r0:16]
 0xbf 0x08 0xe0 0xf4
-# CHECK: vld1.32	{d16[1]}, [r0, :32]
+# CHECK: vld1.32	{d16[1]}, [r0:32]
 
 0x3f 0x01 0xe0 0xf4
-# CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vld2.8	{d16[1], d17[1]}, [r0:16]
 0x5f 0x05 0xe0 0xf4
-# CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vld2.16	{d16[1], d17[1]}, [r0:32]
 0x8f 0x09 0xe0 0xf4
 # CHECK: vld2.32	{d16[1], d17[1]}, [r0]
 0x6f 0x15 0xe0 0xf4
 # CHECK: vld2.16	{d17[1], d19[1]}, [r0]
 0x5f 0x19 0xe0 0xf4
-# CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vld2.32	{d17[0], d19[0]}, [r0:64]
 
 0x2f 0x02 0xe0 0xf4
 # CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1754,44 +1754,44 @@
 0xa5 0x0e 0xa4 0xf4
 
 0x3f 0x03 0xe0 0xf4
-# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0x4f 0x07 0xe0 0xf4
 # CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xaf 0x0b 0xe0 0xf4
-# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0x7f 0x07 0xe0 0xf4
-# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
 0x4f 0x1b 0xe0 0xf4
 # CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0x0f 0x0f 0xa4 0xf4
 # CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4]
 0x3f 0x0f 0xa4 0xf4
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32]
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32]
 0x1d 0x0f 0xa4 0xf4
-# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4, :32]!
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32]!
 0x35 0x0f 0xa4 0xf4
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32], r5
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32], r5
 0x4f 0x0f 0xa4 0xf4
 # CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4]
 0x7f 0x0f 0xa4 0xf4
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64]
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64]
 0x5d 0x0f 0xa4 0xf4
-# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4, :64]!
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64]!
 0x75 0x0f 0xa4 0xf4
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64], r5
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64], r5
 0x8f 0x0f 0xa4 0xf4
 # CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4]
 0xbf 0x0f 0xa4 0xf4
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :64]
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64]
 0xdd 0x0f 0xa4 0xf4
-# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4, :128]!
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128]!
 0xf5 0x0f 0xa4 0xf4
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :128], r5
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128], r5
 
 
 0x1f 0x07 0x40 0xf4
-# CHECK: vst1.8	{d16}, [r0, :64]
+# CHECK: vst1.8	{d16}, [r0:64]
 0x4f 0x07 0x40 0xf4
 # CHECK: vst1.16	{d16}, [r0]
 0x8f 0x07 0x40 0xf4
@@ -1799,37 +1799,37 @@
 0xcf 0x07 0x40 0xf4
 # CHECK: vst1.64	{d16}, [r0]
 0x1f 0x0a 0x40 0xf4
-# CHECK: vst1.8	{d16, d17}, [r0, :64]
+# CHECK: vst1.8	{d16, d17}, [r0:64]
 0x6f 0x0a 0x40 0xf4
-# CHECK: vst1.16	{d16, d17}, [r0, :128]
+# CHECK: vst1.16	{d16, d17}, [r0:128]
 0x8f 0x0a 0x40 0xf4
 # CHECK: vst1.32	{d16, d17}, [r0]
 0xcf 0x0a 0x40 0xf4
 # CHECK: vst1.64	{d16, d17}, [r0]
 
 0x1f 0x08 0x40 0xf4
-# CHECK: vst2.8	{d16, d17}, [r0, :64]
+# CHECK: vst2.8	{d16, d17}, [r0:64]
 0x6f 0x08 0x40 0xf4
-# CHECK: vst2.16	{d16, d17}, [r0, :128]
+# CHECK: vst2.16	{d16, d17}, [r0:128]
 0x8f 0x08 0x40 0xf4
 # CHECK: vst2.32	{d16, d17}, [r0]
 0x1f 0x03 0x40 0xf4
-# CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x03 0x40 0xf4
-# CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128]
 0xbf 0x03 0x40 0xf4
-# CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x1f 0x04 0x40 0xf4
-# CHECK: vst3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vst3.8	{d16, d17, d18}, [r0:64]
 0x4f 0x04 0x40 0xf4
 # CHECK: vst3.16	{d16, d17, d18}, [r0]
 0x8f 0x04 0x40 0xf4
 # CHECK: vst3.32	{d16, d17, d18}, [r0]
 0x1d 0x05 0x40 0xf4
-# CHECK: vst3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vst3.8	{d16, d18, d20}, [r0:64]!
 0x1d 0x15 0x40 0xf4
-# CHECK: vst3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vst3.8	{d17, d19, d21}, [r0:64]!
 0x4d 0x05 0x40 0xf4
 # CHECK: vst3.16	{d16, d18, d20}, [r0]!
 0x4d 0x15 0x40 0xf4
@@ -1840,13 +1840,13 @@
 # CHECK: vst3.32	{d17, d19, d21}, [r0]!
 
 0x1f 0x00 0x40 0xf4
-# CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64]
 0x6f 0x00 0x40 0xf4
-# CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128]
 0x3d 0x01 0x40 0xf4
-# CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]!
 0x3d 0x11 0x40 0xf4
-# CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]!
 0x4d 0x01 0x40 0xf4
 # CHECK: vst4.16	{d16, d18, d20, d22}, [r0]!
 0x4d 0x11 0x40 0xf4
@@ -1857,15 +1857,15 @@
 # CHECK: vst4.32	{d17, d19, d21, d23}, [r0]!
 
 0x3f 0x01 0xc0 0xf4
-# CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vst2.8	{d16[1], d17[1]}, [r0:16]
 0x5f 0x05 0xc0 0xf4
-# CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vst2.16	{d16[1], d17[1]}, [r0:32]
 0x8f 0x09 0xc0 0xf4
 # CHECK: vst2.32	{d16[1], d17[1]}, [r0]
 0x6f 0x15 0xc0 0xf4
 # CHECK: vst2.16	{d17[1], d19[1]}, [r0]
 0x5f 0x19 0xc0 0xf4
-# CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vst2.32	{d17[0], d19[0]}, [r0:64]
 
 0x2f 0x02 0xc0 0xf4
 # CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1879,13 +1879,13 @@
 # CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
 0x3f 0x03 0xc0 0xf4
-# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0x4f 0x07 0xc0 0xf4
 # CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xaf 0x0b 0xc0 0xf4
-# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0xff 0x17 0xc0 0xf4
-# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 0x4f 0x1b 0xc0 0xf4
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
@@ -1920,11 +1920,11 @@
 # CHECK: vcvttmi.f32.f16	s2, s19
 
 0x1d 0x76 0x66 0xf4
-# CHECK: vld1.8	{d23, d24, d25}, [r6, :64]!
+# CHECK: vld1.8	{d23, d24, d25}, [r6:64]!
 0x9d 0x62 0x6f 0xf4
-# CHECK: vld1.32	{d22, d23, d24, d25}, [pc, :64]!
+# CHECK: vld1.32	{d22, d23, d24, d25}, [pc:64]!
 0x9d 0xaa 0x41 0xf4
-# CHECK: vst1.32	{d26, d27}, [r1, :64]!
+# CHECK: vst1.32	{d26, d27}, [r1:64]!
 
 0x10 0x0f 0x83 0xf2
 0x50 0x0f 0x83 0xf2
diff --git a/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
index e53739e73975..650614351fb0 100644
--- a/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
+++ b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt
@@ -28,13 +28,13 @@
 0xa0 0xf9 0xd0 0x04
 
 # CHECK: vld1.16 {d0[0]}, [r0], r0      @ encoding: [0xa0,0xf9,0x00,0x04]
-# CHECK: vld1.16 {d0[0]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x10,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0x10,0x04]
 # CHECK: vld1.16 {d0[1]}, [r0], r0      @ encoding: [0xa0,0xf9,0x40,0x04]
-# CHECK: vld1.16 {d0[1]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x50,0x04]
+# CHECK: vld1.16 {d0[1]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0x50,0x04]
 # CHECK: vld1.16 {d0[2]}, [r0], r0      @ encoding: [0xa0,0xf9,0x80,0x04]
-# CHECK: vld1.16 {d0[2]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x90,0x04]
+# CHECK: vld1.16 {d0[2]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0x90,0x04]
 # CHECK: vld1.16 {d0[3]}, [r0], r0      @ encoding: [0xa0,0xf9,0xc0,0x04]
-# CHECK: vld1.16 {d0[3]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0xd0,0x04]
+# CHECK: vld1.16 {d0[3]}, [r0:16], r0 @ encoding: [0xa0,0xf9,0xd0,0x04]
 
 0xa0 0xf9 0x00 0x08
 0xa0 0xf9 0x30 0x08
@@ -42,20 +42,20 @@
 0xa0 0xf9 0xb0 0x08
 
 # CHECK: vld1.32 {d0[0]}, [r0], r0      @ encoding: [0xa0,0xf9,0x00,0x08]
-# CHECK: vld1.32 {d0[0]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0x30,0x08]
+# CHECK: vld1.32 {d0[0]}, [r0:32], r0 @ encoding: [0xa0,0xf9,0x30,0x08]
 # CHECK: vld1.32 {d0[1]}, [r0], r0      @ encoding: [0xa0,0xf9,0x80,0x08]
-# CHECK: vld1.32 {d0[1]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0xb0,0x08]
+# CHECK: vld1.32 {d0[1]}, [r0:32], r0 @ encoding: [0xa0,0xf9,0xb0,0x08]
 
 0xa0 0xf9 0x1f 0x04
 0xa0 0xf9 0x8f 0x00
 
-# CHECK: vld1.16 {d0[0]}, [r0, :16] @ encoding: [0xa0,0xf9,0x1f,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0:16] @ encoding: [0xa0,0xf9,0x1f,0x04]
 # CHECK: vld1.8  {d0[4]}, [r0]      @ encoding: [0xa0,0xf9,0x8f,0x00]
 
 0xa0 0xf9 0x1d 0x04
 0xa0 0xf9 0x8d 0x00
 
-# CHECK: vld1.16 {d0[0]}, [r0, :16]! @ encoding: [0xa0,0xf9,0x1d,0x04]
+# CHECK: vld1.16 {d0[0]}, [r0:16]! @ encoding: [0xa0,0xf9,0x1d,0x04]
 # CHECK: vld1.8  {d0[4]}, [r0]!      @ encoding: [0xa0,0xf9,0x8d,0x00]
 
 0xa5 0xf9 0x10 0x04
@@ -63,15 +63,15 @@
 0xae 0xf9 0x1a 0x04
 0xa5 0xf9 0x1a 0x94
 
-# CHECK: vld1.16 {d0[0]}, [r5, :16], r0  @ encoding: [0xa5,0xf9,0x10,0x04]
-# CHECK: vld1.16 {d0[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x04]
-# CHECK: vld1.16 {d0[0]}, [lr, :16], r10 @ encoding: [0xae,0xf9,0x1a,0x04]
-# CHECK: vld1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x94]
+# CHECK: vld1.16 {d0[0]}, [r5:16], r0  @ encoding: [0xa5,0xf9,0x10,0x04]
+# CHECK: vld1.16 {d0[0]}, [r5:16], r10 @ encoding: [0xa5,0xf9,0x1a,0x04]
+# CHECK: vld1.16 {d0[0]}, [lr:16], r10 @ encoding: [0xae,0xf9,0x1a,0x04]
+# CHECK: vld1.16 {d9[0]}, [r5:16], r10 @ encoding: [0xa5,0xf9,0x1a,0x94]
 
 0xa0 0xf9 0x20 0x0b
 0xa0 0xf9 0x20 0x07
 0xa0 0xf9 0x20 0x03
 
-# CHECK: vld4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0, :128], r0 @ encoding: [0xa0,0xf9,0x20,0x0b]
+# CHECK: vld4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0:128], r0 @ encoding: [0xa0,0xf9,0x20,0x0b]
 # CHECK: vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r0       @ encoding: [0xa0,0xf9,0x20,0x07]
 # CHECK: vld4.8  {d0[1], d1[1], d2[1], d3[1]}, [r0], r0       @ encoding: [0xa0,0xf9,0x20,0x03]
diff --git a/test/MC/Disassembler/ARM/neont-VST-reencoding.txt b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
index eb3722c08531..5119d925d8bf 100644
--- a/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
+++ b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt
@@ -28,13 +28,13 @@
 0xc9 0xf9 0xd9 0x94
 
 # CHECK: vst1.16 {d0[0]},  [r0], r0      @ encoding: [0x80,0xf9,0x00,0x04]
-# CHECK: vst1.16 {d16[0]}, [r3, :16], r3 @ encoding: [0xc3,0xf9,0x13,0x04]
+# CHECK: vst1.16 {d16[0]}, [r3:16], r3 @ encoding: [0xc3,0xf9,0x13,0x04]
 # CHECK: vst1.16 {d16[1]}, [r4], r3      @ encoding: [0xc4,0xf9,0x43,0x04]
-# CHECK: vst1.16 {d16[1]}, [r5, :16], r5 @ encoding: [0xc5,0xf9,0x55,0x04]
+# CHECK: vst1.16 {d16[1]}, [r5:16], r5 @ encoding: [0xc5,0xf9,0x55,0x04]
 # CHECK: vst1.16 {d16[2]}, [r6], r5      @ encoding: [0xc6,0xf9,0x85,0x04]
-# CHECK: vst1.16 {d23[2]}, [r7, :16], r5 @ encoding: [0xc7,0xf9,0x95,0x74]
+# CHECK: vst1.16 {d23[2]}, [r7:16], r5 @ encoding: [0xc7,0xf9,0x95,0x74]
 # CHECK: vst1.16 {d24[3]}, [r8], r7      @ encoding: [0xc8,0xf9,0xc7,0x84]
-# CHECK: vst1.16 {d25[3]}, [r9, :16], r9 @ encoding: [0xc9,0xf9,0xd9,0x94]
+# CHECK: vst1.16 {d25[3]}, [r9:16], r9 @ encoding: [0xc9,0xf9,0xd9,0x94]
 
 0x8a 0xf9 0x01 0xa8
 0xcb 0xf9 0x32 0x18
@@ -42,20 +42,20 @@
 0xcd 0xf9 0xb4 0x28
 
 # CHECK: vst1.32 {d10[0]}, [r10], r1      @ encoding: [0x8a,0xf9,0x01,0xa8]
-# CHECK: vst1.32 {d17[0]}, [r11, :32], r2 @ encoding: [0xcb,0xf9,0x32,0x18]
+# CHECK: vst1.32 {d17[0]}, [r11:32], r2 @ encoding: [0xcb,0xf9,0x32,0x18]
 # CHECK: vst1.32 {d11[1]}, [r12], r3      @ encoding: [0x8c,0xf9,0x83,0xb8]
-# CHECK: vst1.32 {d18[1]}, [sp, :32], r4  @ encoding: [0xcd,0xf9,0xb4,0x28]
+# CHECK: vst1.32 {d18[1]}, [sp:32], r4  @ encoding: [0xcd,0xf9,0xb4,0x28]
 
 0x81 0xf9 0x1f 0x44
 0x82 0xf9 0x8f 0x30
 
-# CHECK: vst1.16 {d4[0]}, [r1, :16] @ encoding: [0x81,0xf9,0x1f,0x44]
+# CHECK: vst1.16 {d4[0]}, [r1:16] @ encoding: [0x81,0xf9,0x1f,0x44]
 # CHECK: vst1.8  {d3[4]}, [r2]      @ encoding: [0x82,0xf9,0x8f,0x30]
 
 0x83 0xf9 0x1d 0x24
 0x84 0xf9 0x8d 0x10
 
-# CHECK: vst1.16 {d2[0]}, [r3, :16]! @ encoding: [0x83,0xf9,0x1d,0x24]
+# CHECK: vst1.16 {d2[0]}, [r3:16]! @ encoding: [0x83,0xf9,0x1d,0x24]
 # CHECK: vst1.8  {d1[4]}, [r4]!      @ encoding: [0x84,0xf9,0x8d,0x10]
 
 0x85 0xf9 0x10 0x04
@@ -63,15 +63,15 @@
 0x8e 0xf9 0x1a 0x84
 0x85 0xf9 0x1a 0x94
 
-# CHECK: vst1.16 {d0[0]}, [r5, :16], r0  @ encoding: [0x85,0xf9,0x10,0x04]
-# CHECK: vst1.16 {d7[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x74]
-# CHECK: vst1.16 {d8[0]}, [lr, :16], r10 @ encoding: [0x8e,0xf9,0x1a,0x84]
-# CHECK: vst1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x94]
+# CHECK: vst1.16 {d0[0]}, [r5:16], r0  @ encoding: [0x85,0xf9,0x10,0x04]
+# CHECK: vst1.16 {d7[0]}, [r5:16], r10 @ encoding: [0x85,0xf9,0x1a,0x74]
+# CHECK: vst1.16 {d8[0]}, [lr:16], r10 @ encoding: [0x8e,0xf9,0x1a,0x84]
+# CHECK: vst1.16 {d9[0]}, [r5:16], r10 @ encoding: [0x85,0xf9,0x1a,0x94]
 
 0x81 0xf9 0x24 0x0b
 0x82 0xf9 0x25 0x07
 0x83 0xf9 0x26 0x03
 
-# CHECK: vst4.32 {d0[0], d1[0], d2[0], d3[0]}, [r1, :128], r4 @ encoding: [0x81,0xf9,0x24,0x0b]
+# CHECK: vst4.32 {d0[0], d1[0], d2[0], d3[0]}, [r1:128], r4 @ encoding: [0x81,0xf9,0x24,0x0b]
 # CHECK: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r2], r5       @ encoding: [0x82,0xf9,0x25,0x07]
 # CHECK: vst4.8  {d0[1], d1[1], d2[1], d3[1]}, [r3], r6       @ encoding: [0x83,0xf9,0x26,0x03]
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 7d7010febb5e..337457847213 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -1379,7 +1379,7 @@
 # CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21
 
 0x60 0xf9 0x1f 0x07
-# CHECK: vld1.8	{d16}, [r0, :64]
+# CHECK: vld1.8	{d16}, [r0:64]
 0x60 0xf9 0x4f 0x07
 # CHECK: vld1.16	{d16}, [r0]
 0x60 0xf9 0x8f 0x07
@@ -1387,37 +1387,37 @@
 0x60 0xf9 0xcf 0x07
 # CHECK: vld1.64	{d16}, [r0]
 0x60 0xf9 0x1f 0x0a
-# CHECK: vld1.8	{d16, d17}, [r0, :64]
+# CHECK: vld1.8	{d16, d17}, [r0:64]
 0x60 0xf9 0x6f 0x0a
-# CHECK: vld1.16	{d16, d17}, [r0, :128]
+# CHECK: vld1.16	{d16, d17}, [r0:128]
 0x60 0xf9 0x8f 0x0a
 # CHECK: vld1.32	{d16, d17}, [r0]
 0x60 0xf9 0xcf 0x0a
 # CHECK: vld1.64	{d16, d17}, [r0]
 
 0x60 0xf9 0x1f 0x08
-# CHECK: vld2.8	{d16, d17}, [r0, :64]
+# CHECK: vld2.8	{d16, d17}, [r0:64]
 0x60 0xf9 0x6f 0x08
-# CHECK: vld2.16	{d16, d17}, [r0, :128]
+# CHECK: vld2.16	{d16, d17}, [r0:128]
 0x60 0xf9 0x8f 0x08
 # CHECK: vld2.32	{d16, d17}, [r0]
 0x60 0xf9 0x1f 0x03
-# CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld2.8	{d16, d17, d18, d19}, [r0:64]
 0x60 0xf9 0x6f 0x03
-# CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld2.16	{d16, d17, d18, d19}, [r0:128]
 0x60 0xf9 0xbf 0x03
-# CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x60 0xf9 0x1f 0x04
-# CHECK: vld3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vld3.8	{d16, d17, d18}, [r0:64]
 0x60 0xf9 0x4f 0x04
 # CHECK: vld3.16	{d16, d17, d18}, [r0]
 0x60 0xf9 0x8f 0x04
 # CHECK: vld3.32	{d16, d17, d18}, [r0]
 0x60 0xf9 0x1d 0x05
-# CHECK: vld3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vld3.8	{d16, d18, d20}, [r0:64]!
 0x60 0xf9 0x1d 0x15
-# CHECK: vld3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vld3.8	{d17, d19, d21}, [r0:64]!
 0x60 0xf9 0x4d 0x05
 # CHECK: vld3.16	{d16, d18, d20}, [r0]!
 0x60 0xf9 0x4d 0x15
@@ -1428,15 +1428,15 @@
 # CHECK: vld3.32	{d17, d19, d21}, [r0]!
 
 0x60 0xf9 0x1f 0x00
-# CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vld4.8	{d16, d17, d18, d19}, [r0:64]
 0x60 0xf9 0x6f 0x00
-# CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vld4.16	{d16, d17, d18, d19}, [r0:128]
 0x60 0xf9 0xbf 0x00
-# CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vld4.32	{d16, d17, d18, d19}, [r0:256]
 0x60 0xf9 0x3d 0x01
-# CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vld4.8	{d16, d18, d20, d22}, [r0:256]!
 0x60 0xf9 0x3d 0x11
-# CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vld4.8	{d17, d19, d21, d23}, [r0:256]!
 0x60 0xf9 0x4d 0x01
 # CHECK: vld4.16	{d16, d18, d20, d22}, [r0]!
 0x60 0xf9 0x4d 0x11
@@ -1449,20 +1449,20 @@
 0xe0 0xf9 0x6f 0x00
 # CHECK: vld1.8	{d16[3]}, [r0]
 0xe0 0xf9 0x9f 0x04
-# CHECK: vld1.16	{d16[2]}, [r0, :16]
+# CHECK: vld1.16	{d16[2]}, [r0:16]
 0xe0 0xf9 0xbf 0x08
-# CHECK: vld1.32	{d16[1]}, [r0, :32]
+# CHECK: vld1.32	{d16[1]}, [r0:32]
 
 0xe0 0xf9 0x3f 0x01
-# CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vld2.8	{d16[1], d17[1]}, [r0:16]
 0xe0 0xf9 0x5f 0x05
-# CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vld2.16	{d16[1], d17[1]}, [r0:32]
 0xe0 0xf9 0x8f 0x09
 # CHECK: vld2.32	{d16[1], d17[1]}, [r0]
 0xe0 0xf9 0x6f 0x15
 # CHECK: vld2.16	{d17[1], d19[1]}, [r0]
 0xe0 0xf9 0x5f 0x19
-# CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vld2.32	{d17[0], d19[0]}, [r0:64]
 
 0xe0 0xf9 0x2f 0x02
 # CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1495,43 +1495,43 @@
 # CHECK: vld3.32	{d0[], d2[], d4[]}, [r4], r5
 
 0xe0 0xf9 0x3f 0x03
-# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0xe0 0xf9 0x4f 0x07
 # CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xe0 0xf9 0xaf 0x0b
-# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0xe0 0xf9 0x7f 0x07
-# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+# CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0:64]
 0xe0 0xf9 0x4f 0x1b
 # CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0xa4 0xf9 0x0f 0x0f
 # CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4] 
 0xa4 0xf9 0x3f 0x0f
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32] 
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32] 
 0xa4 0xf9 0x1d 0x0f
-# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4, :32]! 
+# CHECK: vld4.8	{d0[], d1[], d2[], d3[]}, [r4:32]! 
 0xa4 0xf9 0x35 0x0f
-# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4, :32], r5 
+# CHECK: vld4.8	{d0[], d2[], d4[], d6[]}, [r4:32], r5 
 0xa4 0xf9 0x4f 0x0f
 # CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4] 
 0xa4 0xf9 0x7f 0x0f
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64] 
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64] 
 0xa4 0xf9 0x5d 0x0f
-# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4, :64]! 
+# CHECK: vld4.16	{d0[], d1[], d2[], d3[]}, [r4:64]! 
 0xa4 0xf9 0x75 0x0f
-# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4, :64], r5 
+# CHECK: vld4.16	{d0[], d2[], d4[], d6[]}, [r4:64], r5 
 0xa4 0xf9 0x8f 0x0f
 # CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4] 
 0xa4 0xf9 0xbf 0x0f
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :64] 
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:64] 
 0xa4 0xf9 0xdd 0x0f
-# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4, :128]! 
+# CHECK: vld4.32	{d0[], d1[], d2[], d3[]}, [r4:128]! 
 0xa4 0xf9 0xf5 0x0f
-# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4, :128], r5 
+# CHECK: vld4.32	{d0[], d2[], d4[], d6[]}, [r4:128], r5 
 
 0x40 0xf9 0x1f 0x07
-# CHECK: vst1.8	{d16}, [r0, :64]
+# CHECK: vst1.8	{d16}, [r0:64]
 0x40 0xf9 0x4f 0x07
 # CHECK: vst1.16	{d16}, [r0]
 0x40 0xf9 0x8f 0x07
@@ -1539,37 +1539,37 @@
 0x40 0xf9 0xcf 0x07
 # CHECK: vst1.64	{d16}, [r0]
 0x40 0xf9 0x1f 0x0a
-# CHECK: vst1.8	{d16, d17}, [r0, :64]
+# CHECK: vst1.8	{d16, d17}, [r0:64]
 0x40 0xf9 0x6f 0x0a
-# CHECK: vst1.16	{d16, d17}, [r0, :128]
+# CHECK: vst1.16	{d16, d17}, [r0:128]
 0x40 0xf9 0x8f 0x0a
 # CHECK: vst1.32	{d16, d17}, [r0]
 0x40 0xf9 0xcf 0x0a
 # CHECK: vst1.64	{d16, d17}, [r0]
 
 0x40 0xf9 0x1f 0x08
-# CHECK: vst2.8	{d16, d17}, [r0, :64]
+# CHECK: vst2.8	{d16, d17}, [r0:64]
 0x40 0xf9 0x6f 0x08
-# CHECK: vst2.16	{d16, d17}, [r0, :128]
+# CHECK: vst2.16	{d16, d17}, [r0:128]
 0x40 0xf9 0x8f 0x08
 # CHECK: vst2.32	{d16, d17}, [r0]
 0x40 0xf9 0x1f 0x03
-# CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst2.8	{d16, d17, d18, d19}, [r0:64]
 0x40 0xf9 0x6f 0x03
-# CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst2.16	{d16, d17, d18, d19}, [r0:128]
 0x40 0xf9 0xbf 0x03
-# CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256]
+# CHECK: vst2.32	{d16, d17, d18, d19}, [r0:256]
 
 0x40 0xf9 0x1f 0x04
-# CHECK: vst3.8	{d16, d17, d18}, [r0, :64]
+# CHECK: vst3.8	{d16, d17, d18}, [r0:64]
 0x40 0xf9 0x4f 0x04
 # CHECK: vst3.16	{d16, d17, d18}, [r0]
 0x40 0xf9 0x8f 0x04
 # CHECK: vst3.32	{d16, d17, d18}, [r0]
 0x40 0xf9 0x1d 0x05
-# CHECK: vst3.8	{d16, d18, d20}, [r0, :64]!
+# CHECK: vst3.8	{d16, d18, d20}, [r0:64]!
 0x40 0xf9 0x1d 0x15
-# CHECK: vst3.8	{d17, d19, d21}, [r0, :64]!
+# CHECK: vst3.8	{d17, d19, d21}, [r0:64]!
 0x40 0xf9 0x4d 0x05
 # CHECK: vst3.16	{d16, d18, d20}, [r0]!
 0x40 0xf9 0x4d 0x15
@@ -1580,13 +1580,13 @@
 # CHECK: vst3.32	{d17, d19, d21}, [r0]!
 
 0x40 0xf9 0x1f 0x00
-# CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64]
+# CHECK: vst4.8	{d16, d17, d18, d19}, [r0:64]
 0x40 0xf9 0x6f 0x00
-# CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128]
+# CHECK: vst4.16	{d16, d17, d18, d19}, [r0:128]
 0x40 0xf9 0x3d 0x01
-# CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+# CHECK: vst4.8	{d16, d18, d20, d22}, [r0:256]!
 0x40 0xf9 0x3d 0x11
-# CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+# CHECK: vst4.8	{d17, d19, d21, d23}, [r0:256]!
 0x40 0xf9 0x4d 0x01
 # CHECK: vst4.16	{d16, d18, d20, d22}, [r0]!
 0x40 0xf9 0x4d 0x11
@@ -1597,15 +1597,15 @@
 # CHECK: vst4.32	{d17, d19, d21, d23}, [r0]!
 
 0xc0 0xf9 0x3f 0x01
-# CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16]
+# CHECK: vst2.8	{d16[1], d17[1]}, [r0:16]
 0xc0 0xf9 0x5f 0x05
-# CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32]
+# CHECK: vst2.16	{d16[1], d17[1]}, [r0:32]
 0xc0 0xf9 0x8f 0x09
 # CHECK: vst2.32	{d16[1], d17[1]}, [r0]
 0xc0 0xf9 0x6f 0x15
 # CHECK: vst2.16	{d17[1], d19[1]}, [r0]
 0xc0 0xf9 0x5f 0x19
-# CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64]
+# CHECK: vst2.32	{d17[0], d19[0]}, [r0:64]
 
 0xc0 0xf9 0x2f 0x02
 # CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0]
@@ -1619,26 +1619,26 @@
 # CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0]
 
 0xc0 0xf9 0x3f 0x03
-# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+# CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 0xc0 0xf9 0x4f 0x07
 # CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
 0xc0 0xf9 0xaf 0x0b
-# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+# CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 0xc0 0xf9 0xff 0x17
-# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+# CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 0xc0 0xf9 0x4f 0x1b
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0x63 0xf9 0x37 0xc9
-# CHECK: vld2.8	{d28, d30}, [r3, :256], r7
+# CHECK: vld2.8	{d28, d30}, [r3:256], r7
 
 # rdar://10798451
 0xe7 0xf9 0x32 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+# CHECK vld2.8	{d17[], d19[]}, [r7:16], r2
 0xe7 0xf9 0x3d 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+# CHECK vld2.8	{d17[], d19[]}, [r7:16]!
 0xe7 0xf9 0x3f 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+# CHECK vld2.8	{d17[], d19[]}, [r7:16]
 
 # rdar://11034702
 0x04 0xf9 0x0d 0x87
@@ -2046,9 +2046,9 @@
 
 # rdar://10798451
 0xe7 0xf9 0x32 0x1d
-# CHECK: vld2.8	{d17[], d19[]}, [r7, :16], r2
+# CHECK: vld2.8	{d17[], d19[]}, [r7:16], r2
 0xe7 0xf9 0x3d 0x1d
-# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]!
+# CHECK: vld2.8	{d17[], d19[]}, [r7:16]!
 0xe7 0xf9 0x3f 0x1d
-# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]
+# CHECK: vld2.8	{d17[], d19[]}, [r7:16]
 
diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt
index 45dace3b09c5..31f75b39fa9c 100644
--- a/test/MC/Disassembler/ARM/thumb2.txt
+++ b/test/MC/Disassembler/ARM/thumb2.txt
@@ -254,9 +254,12 @@
 #------------------------------------------------------------------------------
 # CHECK: cbnz    r7, #6
 # CHECK: cbnz    r7, #12
+# CHECK: cbz     r4, #64
 
 0x1f 0xb9
 0x37 0xb9
+0x04 0xb3
+
 
 #------------------------------------------------------------------------------
 # CDP/CDP2
@@ -554,6 +557,7 @@
 # CHECK: ldr.w r8, [r8, r2, lsl #2]
 # CHECK: ldr.w r7, [sp, r2, lsl #1]
 # CHECK: ldr.w r7, [sp, r2]
+# CHECK: ldr pc, [sp], #12
 # CHECK: ldr r2, [r4, #255]!
 # CHECK: ldr r8, [sp, #4]!
 # CHECK: ldr lr, [sp, #-4]!
@@ -567,6 +571,7 @@
 0x58 0xf8 0x22 0x80
 0x5d 0xf8 0x12 0x70
 0x5d 0xf8 0x02 0x70
+0x5d 0xf8 0x0c 0xfb
 0x54 0xf8 0xff 0x2f
 0x5d 0xf8 0x04 0x8f
 0x5d 0xf8 0x04 0xed
diff --git a/test/MC/Disassembler/ARM/unpredictable-BFI.txt b/test/MC/Disassembler/ARM/unpredictable-BFI.txt
new file mode 100644
index 000000000000..a98f859c4c66
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-BFI.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
+
+# rdar://11437956
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: 0x90 0x00 0xc0 0xe7
+0x90 0x00 0xc0 0xe7
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: 0x90 0x01 0xc0 0xe7
+0x90 0x01 0xc0 0xe7
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index a1933190b141..70224860bc71 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -404,3 +404,9 @@
 
 # CHECK: xori  $9,  $6, 17767
 0x38 0xc9 0x45 0x67
+
+# CHECK: .set    push
+# CHECK: .set    mips32r2
+# CHECK: rdhwr   $5, $29
+# CHECK: .set    pop
+0x7c 0x05 0xe8 0x3b
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index 08b36726baf3..48fa8e2c7fac 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -404,3 +404,9 @@
 
 # CHECK: xori  $9,  $6, 17767
 0x67 0x45 0xc9 0x38
+
+# CHECK: .set    push
+# CHECK: .set    mips32r2
+# CHECK: rdhwr   $5, $29
+# CHECK: .set    pop
+0x3b 0xe8 0x05 0x7c
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index 0a88c40839fa..38b137766125 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -1,67 +1,67 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0x67 0x4b 0x7c 0xcd
-
-# CHECK: daddu $26, $1, $11
-0x00 0x2b 0xd0 0x2d
-
-# CHECK: ddiv $zero, $26, $22
-0x03 0x56 0x00 0x1e
-
-# CHECK: ddivu $zero, $9, $24
-0x01 0x38 0x00 0x1f
-
-# CHECK: dmfc1 $2, $f14
-0x44 0x22 0x70 0x00
-
-# CHECK: dmtc1 $23, $f5
-0x44 0xb7 0x28 0x00
-
-# CHECK: dmult $11, $26
-0x01 0x7a 0x00 0x1c
-
-# CHECK: dmultu $23, $13
-0x02 0xed 0x00 0x1d
-
-# CHECK: dsll $3, $24, 17
-0x00 0x18 0x1c 0x78
-
-# CHECK: dsllv $gp, $27, $24
-0x03 0x1b 0xe0 0x14
-
-# CHECK: dsra $1, $1, 30
-0x00 0x01 0x0f 0xbb
-
-# CHECK: dsrav $1, $1, $fp
-0x03 0xc1 0x08 0x17
-
-# CHECK: dsrl $10, $gp, 24
-0x00 0x1c 0x56 0x3a
-
-# CHECK: dsrlv $gp, $10, $23
-0x02 0xea 0xe0 0x16
-
-# CHECK: dsubu $gp, $27, $24
-0x03 0x78 0xe0 0x2f
-
-# CHECK: lw $27, -15155($1)
-0x8c 0x3b 0xc4 0xcd
-
-# CHECK: lui $1, 1
-0x3c 0x01 0x00 0x01
-
-# CHECK: lwu $3, -1746($3)
-0x9c 0x63 0xf9 0x2e
-
-# CHECK: lui $ra, 1
-0x3c 0x1f 0x00 0x01
-
-# CHECK: sw $26, -15159($1)
-0xac 0x3a 0xc4 0xc9
-
-# CHECK: ld $26, 3958($zero)
-0xdc 0x1a 0x0f 0x76
-
-# CHECK: sd $6, 17767($zero)
-0xfc 0x06 0x45 0x67
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
+# CHECK: .section	 __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0x67 0x4b 0x7c 0xcd
+
+# CHECK: daddu $26, $1, $11
+0x00 0x2b 0xd0 0x2d
+
+# CHECK: ddiv $zero, $26, $22
+0x03 0x56 0x00 0x1e
+
+# CHECK: ddivu $zero, $9, $24
+0x01 0x38 0x00 0x1f
+
+# CHECK: dmfc1 $2, $f14
+0x44 0x22 0x70 0x00
+
+# CHECK: dmtc1 $23, $f5
+0x44 0xb7 0x28 0x00
+
+# CHECK: dmult $11, $26
+0x01 0x7a 0x00 0x1c
+
+# CHECK: dmultu $23, $13
+0x02 0xed 0x00 0x1d
+
+# CHECK: dsll $3, $24, 17
+0x00 0x18 0x1c 0x78
+
+# CHECK: dsllv $gp, $27, $24
+0x03 0x1b 0xe0 0x14
+
+# CHECK: dsra $1, $1, 30
+0x00 0x01 0x0f 0xbb
+
+# CHECK: dsrav $1, $1, $fp
+0x03 0xc1 0x08 0x17
+
+# CHECK: dsrl $10, $gp, 24
+0x00 0x1c 0x56 0x3a
+
+# CHECK: dsrlv $gp, $10, $23
+0x02 0xea 0xe0 0x16
+
+# CHECK: dsubu $gp, $27, $24
+0x03 0x78 0xe0 0x2f
+
+# CHECK: lw $27, -15155($1)
+0x8c 0x3b 0xc4 0xcd
+
+# CHECK: lui $1, 1
+0x3c 0x01 0x00 0x01
+
+# CHECK: lwu $3, -1746($3)
+0x9c 0x63 0xf9 0x2e
+
+# CHECK: lui $ra, 1
+0x3c 0x1f 0x00 0x01
+
+# CHECK: sw $26, -15159($1)
+0xac 0x3a 0xc4 0xc9
+
+# CHECK: ld $26, 3958($zero)
+0xdc 0x1a 0x0f 0x76
+
+# CHECK: sd $6, 17767($zero)
+0xfc 0x06 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index fe8faffa8335..a7ef0e473bbe 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -1,67 +1,67 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
+# CHECK: .section	 __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0xcd 0x7c 0x4b 0x67
+
+# CHECK: daddu $26, $1, $11
+0x2d 0xd0 0x2b 0x00
+
+# CHECK: ddiv $zero, $26, $22
+0x1e 0x00 0x56 0x03
+
+# CHECK: ddivu $zero, $9, $24
+0x1f 0x00 0x38 0x01
+
+# CHECK: dmfc1 $2, $f14
+0x00 0x70 0x22 0x44
+
+# CHECK: dmtc1 $23, $f5
+0x00 0x28 0xb7 0x44
+
+# CHECK: dmult $11, $26
+0x1c 0x00 0x7a 0x01
+
+# CHECK: dmultu $23, $13
+0x1d 0x00 0xed 0x02
+
+# CHECK: dsll $3, $24, 17
+0x78 0x1c 0x18 0x00
+
+# CHECK: dsllv $gp, $27, $24
+0x14 0xe0 0x1b 0x03
+
+# CHECK: dsra $1, $1, 30
+0xbb 0x0f 0x01 0x00
+
+# CHECK: dsrav $1, $1, $fp
+0x17 0x08 0xc1 0x03
+
+# CHECK: dsrl $10, $gp, 24
+0x3a 0x56 0x1c 0x00
+
+# CHECK: dsrlv $gp, $10, $23
+0x16 0xe0 0xea 0x02
+
+# CHECK: dsubu $gp, $27, $24
+0x2f 0xe0 0x78 0x03
+
+# CHECK: lw $27, -15155($1)
+0xcd 0xc4 0x3b 0x8c
+
+# CHECK: lui $1, 1
+0x01 0x00 0x01 0x3c
+
+# CHECK: lwu $3, -1746($3)
+0x2e 0xf9 0x63 0x9c
+
+# CHECK: lui $ra, 1
+0x01 0x00 0x1f 0x3c
+
+# CHECK: sw $26, -15159($1)
+0xc9 0xc4 0x3a 0xac
+
+# CHECK: ld $26, 3958($zero)
+0x76 0x0f 0x1a 0xdc
+
+# CHECK: sd $6, 17767($zero)
+0x67 0x45 0x06 0xfc
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt
index 2dfde0d231c6..0b421fc551e2 100644
--- a/test/MC/Disassembler/Mips/mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2.txt
@@ -1,91 +1,91 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0x67 0x4b 0x7c 0xcd
-
-# CHECK: daddu $26, $1, $11
-0x00 0x2b 0xd0 0x2d
-
-# CHECK: ddiv $zero, $26, $22
-0x03 0x56 0x00 0x1e
-
-# CHECK: ddivu $zero, $9, $24
-0x01 0x38 0x00 0x1f
-
-# CHECK: dmfc1 $2, $f14
-0x44 0x22 0x70 0x00
-
-# CHECK: dmtc1 $23, $f5
-0x44 0xb7 0x28 0x00
-
-# CHECK: dmult $11, $26
-0x01 0x7a 0x00 0x1c
-
-# CHECK: dmultu $23, $13
-0x02 0xed 0x00 0x1d
-
-# CHECK: dsll $3, $24, 17
-0x00 0x18 0x1c 0x78
-
-# CHECK: dsllv $gp, $27, $24
-0x03 0x1b 0xe0 0x14
-
-# CHECK: dsra $1, $1, 30
-0x00 0x01 0x0f 0xbb
-
-# CHECK: dsrav $1, $1, $fp
-0x03 0xc1 0x08 0x17
-
-# CHECK: dsrl $10, $gp, 24
-0x00 0x1c 0x56 0x3a
-
-# CHECK: dsrlv $gp, $10, $23
-0x02 0xea 0xe0 0x16
-
-# CHECK: dsubu $gp, $27, $24
-0x03 0x78 0xe0 0x2f
-
-# CHECK: lw $27, -15155($1)
-0x8c 0x3b 0xc4 0xcd
-
-# CHECK: lui $1, 1
-0x3c 0x01 0x00 0x01
-
-# CHECK: lwu $3, -1746($3)
-0x9c 0x63 0xf9 0x2e
-
-# CHECK: lui $ra, 1
-0x3c 0x1f 0x00 0x01
-
-# CHECK: sw $26, -15159($1)
-0xac 0x3a 0xc4 0xc9
-
-# CHECK: ld $26, 3958($zero)
-0xdc 0x1a 0x0f 0x76
-
-# CHECK: sd $6, 17767($zero)
-0xfc 0x06 0x45 0x67
-
-# CHECK: dclo $9, $24
-0x73 0x09 0x48 0x25
-
-# CHECK: dclz $26, $9
-0x71 0x3a 0xd0 0x24
-
-# CHECK: dext $7, $gp, 29, 31
-0x7f 0x87 0xf7 0x43
-
-# CHECK: dins $20, $gp, 15, 1
-0x7f 0x94 0x7b 0xc7
-
-# CHECK: dsbh $7, $gp
-0x7c 0x1c 0x38 0xa4
-
-# CHECK: dshd $3, $14
-0x7c 0x0e 0x19 0x64
-
-# CHECK: drotr $20, $27, 6
-0x00 0x3b 0xa1 0xba
-
-# CHECK: drotrv $24, $23, $5
-0x00 0xb7 0xc0 0x56
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
+# CHECK: .section	 __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0x67 0x4b 0x7c 0xcd
+
+# CHECK: daddu $26, $1, $11
+0x00 0x2b 0xd0 0x2d
+
+# CHECK: ddiv $zero, $26, $22
+0x03 0x56 0x00 0x1e
+
+# CHECK: ddivu $zero, $9, $24
+0x01 0x38 0x00 0x1f
+
+# CHECK: dmfc1 $2, $f14
+0x44 0x22 0x70 0x00
+
+# CHECK: dmtc1 $23, $f5
+0x44 0xb7 0x28 0x00
+
+# CHECK: dmult $11, $26
+0x01 0x7a 0x00 0x1c
+
+# CHECK: dmultu $23, $13
+0x02 0xed 0x00 0x1d
+
+# CHECK: dsll $3, $24, 17
+0x00 0x18 0x1c 0x78
+
+# CHECK: dsllv $gp, $27, $24
+0x03 0x1b 0xe0 0x14
+
+# CHECK: dsra $1, $1, 30
+0x00 0x01 0x0f 0xbb
+
+# CHECK: dsrav $1, $1, $fp
+0x03 0xc1 0x08 0x17
+
+# CHECK: dsrl $10, $gp, 24
+0x00 0x1c 0x56 0x3a
+
+# CHECK: dsrlv $gp, $10, $23
+0x02 0xea 0xe0 0x16
+
+# CHECK: dsubu $gp, $27, $24
+0x03 0x78 0xe0 0x2f
+
+# CHECK: lw $27, -15155($1)
+0x8c 0x3b 0xc4 0xcd
+
+# CHECK: lui $1, 1
+0x3c 0x01 0x00 0x01
+
+# CHECK: lwu $3, -1746($3)
+0x9c 0x63 0xf9 0x2e
+
+# CHECK: lui $ra, 1
+0x3c 0x1f 0x00 0x01
+
+# CHECK: sw $26, -15159($1)
+0xac 0x3a 0xc4 0xc9
+
+# CHECK: ld $26, 3958($zero)
+0xdc 0x1a 0x0f 0x76
+
+# CHECK: sd $6, 17767($zero)
+0xfc 0x06 0x45 0x67
+
+# CHECK: dclo $9, $24
+0x73 0x09 0x48 0x25
+
+# CHECK: dclz $26, $9
+0x71 0x3a 0xd0 0x24
+
+# CHECK: dext $7, $gp, 29, 31
+0x7f 0x87 0xf7 0x43
+
+# CHECK: dins $20, $gp, 15, 1
+0x7f 0x94 0x7b 0xc7
+
+# CHECK: dsbh $7, $gp
+0x7c 0x1c 0x38 0xa4
+
+# CHECK: dshd $3, $14
+0x7c 0x0e 0x19 0x64
+
+# CHECK: drotr $20, $27, 6
+0x00 0x3b 0xa1 0xba
+
+# CHECK: drotrv $24, $23, $5
+0x00 0xb7 0xc0 0x56
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
index 620d9ebe8da3..c1d326f6d674 100644
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips64r2_le.txt
@@ -1,91 +1,91 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
-
-# CHECK: dclo $9, $24
-0x25 0x48 0x09 0x73
-
-# CHECK: dclz $26, $9
-0x24 0xd0 0x3a 0x71
-
-# CHECK: dext $7, $gp, 29, 31
-0x43 0xf7 0x87 0x7f
-
-# CHECK: dins $20, $gp, 15, 1
-0xc7 0x7b 0x94 0x7f
-
-# CHECK: dsbh $7, $gp
-0xa4 0x38 0x1c 0x7c
-
-# CHECK: dshd $3, $14
-0x64 0x19 0x0e 0x7c
-
-# CHECK: drotr $20, $27, 6
-0xba 0xa1 0x3b 0x00
-
-# CHECK: drotrv $24, $23, $5
-0x56 0xc0 0xb7 0x00
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
+# CHECK: .section	 __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0xcd 0x7c 0x4b 0x67
+
+# CHECK: daddu $26, $1, $11
+0x2d 0xd0 0x2b 0x00
+
+# CHECK: ddiv $zero, $26, $22
+0x1e 0x00 0x56 0x03
+
+# CHECK: ddivu $zero, $9, $24
+0x1f 0x00 0x38 0x01
+
+# CHECK: dmfc1 $2, $f14
+0x00 0x70 0x22 0x44
+
+# CHECK: dmtc1 $23, $f5
+0x00 0x28 0xb7 0x44
+
+# CHECK: dmult $11, $26
+0x1c 0x00 0x7a 0x01
+
+# CHECK: dmultu $23, $13
+0x1d 0x00 0xed 0x02
+
+# CHECK: dsll $3, $24, 17
+0x78 0x1c 0x18 0x00
+
+# CHECK: dsllv $gp, $27, $24
+0x14 0xe0 0x1b 0x03
+
+# CHECK: dsra $1, $1, 30
+0xbb 0x0f 0x01 0x00
+
+# CHECK: dsrav $1, $1, $fp
+0x17 0x08 0xc1 0x03
+
+# CHECK: dsrl $10, $gp, 24
+0x3a 0x56 0x1c 0x00
+
+# CHECK: dsrlv $gp, $10, $23
+0x16 0xe0 0xea 0x02
+
+# CHECK: dsubu $gp, $27, $24
+0x2f 0xe0 0x78 0x03
+
+# CHECK: lw $27, -15155($1)
+0xcd 0xc4 0x3b 0x8c
+
+# CHECK: lui $1, 1
+0x01 0x00 0x01 0x3c
+
+# CHECK: lwu $3, -1746($3)
+0x2e 0xf9 0x63 0x9c
+
+# CHECK: lui $ra, 1
+0x01 0x00 0x1f 0x3c
+
+# CHECK: sw $26, -15159($1)
+0xc9 0xc4 0x3a 0xac
+
+# CHECK: ld $26, 3958($zero)
+0x76 0x0f 0x1a 0xdc
+
+# CHECK: sd $6, 17767($zero)
+0x67 0x45 0x06 0xfc
+
+# CHECK: dclo $9, $24
+0x25 0x48 0x09 0x73
+
+# CHECK: dclz $26, $9
+0x24 0xd0 0x3a 0x71
+
+# CHECK: dext $7, $gp, 29, 31
+0x43 0xf7 0x87 0x7f
+
+# CHECK: dins $20, $gp, 15, 1
+0xc7 0x7b 0x94 0x7f
+
+# CHECK: dsbh $7, $gp
+0xa4 0x38 0x1c 0x7c
+
+# CHECK: dshd $3, $14
+0x64 0x19 0x0e 0x7c
+
+# CHECK: drotr $20, $27, 6
+0xba 0xa1 0x3b 0x00
+
+# CHECK: drotrv $24, $23, $5
+0x56 0xc0 0xb7 0x00
diff --git a/test/MC/Disassembler/X86/enhanced.txt b/test/MC/Disassembler/X86/enhanced.txt
deleted file mode 100644
index deff735b69db..000000000000
--- a/test/MC/Disassembler/X86/enhanced.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
-
-# CHECK: [o:jne][w:	][0-p:-][0-l:10=10] <br> 0:[RIP/112](pc)=18446744073709551606
-0x0f 0x85 0xf6 0xff 0xff 0xff
-# CHECK: [o:movq][w:	][1-r:%gs=r64][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r109] <mov> 0:[RCX/109]=0 1:[GS/64]=8
-0x65 0x48 0x8b 0x0c 0x25 0x08 0x00 0x00 0x00
-# CHECK: [o:xorps][w:	][2-r:%xmm1=r130][p:,][w: ][0-r:%xmm2=r131] 0:[XMM2/131]=0 1:[XMM2/131]=0 2:[XMM1/130]=0
-0x0f 0x57 0xd1
-# CHECK: [o:andps][w:	][2-r:%xmm1=r130][p:,][w: ][0-r:%xmm2=r131] 0:[XMM2/131]=0 1:[XMM2/131]=0 2:[XMM1/130]=0
-0x0f 0x54 0xd1
diff --git a/test/MC/Disassembler/X86/hex-immediates.txt b/test/MC/Disassembler/X86/hex-immediates.txt
new file mode 100644
index 000000000000..80d24487ee74
--- /dev/null
+++ b/test/MC/Disassembler/X86/hex-immediates.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --hdis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
+
+# CHECK: movabsq	$0x7fffffffffffffff, %rcx
+0x48 0xb9 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0x7f
+# CHECK: leaq	0x3e2(%rip), %rdi
+0x48 0x8d 0x3d 0xe2 0x03 0x00 0x00
+# CHECK: subq	$0x40, %rsp
+0x48 0x83 0xec 0x40
+# CHECK: leal	(,%r14,4), %eax
+0x42 0x8d 0x04 0xb5 0x00 0x00 0x00 0x00
diff --git a/test/MC/Disassembler/X86/intel-syntax-32.txt b/test/MC/Disassembler/X86/intel-syntax-32.txt
new file mode 100644
index 000000000000..08bae6ec6753
--- /dev/null
+++ b/test/MC/Disassembler/X86/intel-syntax-32.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s
+
+# CHECK: sgdt
+0x0f 0x01 0x00
+
+# CHECK: sidt
+0x0f 0x01 0x08
+
+# CHECK: lgdt
+0x0f 0x01 0x10
+
+# CHECK: lidt
+0x0f 0x01 0x18
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 672d23924321..9827a1809f1b 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -120,13 +120,13 @@
 # CHECK: vandps (%rdx), %xmm1, %xmm7
 0xc5 0xf0 0x54 0x3a
 
-# CHECK: vcvtss2sil %xmm0, %eax
+# CHECK: vcvtss2si %xmm0, %eax
 0xc5 0xfa 0x2d 0xc0
 
-# CHECK: vcvtsd2sil %xmm0, %eax
+# CHECK: vcvtsd2si %xmm0, %eax
 0xc5 0xfb 0x2d 0xc0
 
-# CHECK: vcvtsd2siq %xmm0, %rax
+# CHECK: vcvtsd2si %xmm0, %rax
 0xc4 0xe1 0xfb 0x2d 0xc0
 
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%rax)
@@ -437,10 +437,10 @@
 # CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
 0xc4 0xe3 0x7d 0x0b 0xc0 0x00
 
-# CHECK: vcvtsd2sil %xmm0, %eax
+# CHECK: vcvtsd2si %xmm0, %eax
 0xc4 0xe1 0x7f 0x2d 0xc0
 
-# CHECK: vcvtsd2siq %xmm0, %rax
+# CHECK: vcvtsd2si %xmm0, %rax
 0xc4 0xe1 0xff 0x2d 0xc0
 
 # CHECK: vucomisd %xmm1, %xmm0
@@ -753,3 +753,18 @@
 # CHECK: lock
 # CHECK-NEXT: xaddq	%rcx, %rbx
 0xf0 0x48 0x0f 0xc1 0xcb
+
+# rdar://13493622 lldb doesn't print the x86 rep/repne prefix when disassembling
+# CHECK: repne
+# CHECK-NEXT: movsd
+0xf2 0xa5
+# CHECK: repne
+# CHECK-NEXT: movsq
+0xf2 0x48 0xa5
+# CHECK: repne
+# CHECK-NEXT: movb  $0, (%rax)
+0xf2 0xc6 0x0 0x0
+# CHECK: rep
+# CHECK-NEXT: lock
+# CHECK-NEXT: incl   (%rax)
+0xf3 0xf0 0xff 0x00
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 899657b0d4a3..76d67d352ccf 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -156,13 +156,13 @@
 # CHECK: vandps (%edx), %xmm1, %xmm7
 0xc5 0xf0 0x54 0x3a
 
-# CHECK: vcvtss2sil %xmm0, %eax
+# CHECK: vcvtss2si %xmm0, %eax
 0xc5 0xfa 0x2d 0xc0
 
-# CHECK: vcvtsd2sil %xmm0, %eax
+# CHECK: vcvtsd2si %xmm0, %eax
 0xc5 0xfb 0x2d 0xc0
 
-# CHECK: vcvtsd2sil %xmm0, %eax
+# CHECK: vcvtsd2si %xmm0, %eax
 0xc4 0xe1 0x7b 0x2d 0xc0
 
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%eax)
@@ -460,10 +460,10 @@
 # CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
 0xc4 0xe3 0x7d 0x0b 0xc0 0x00
 
-# CHECK: vcvtsd2sil %xmm0, %eax
+# CHECK: vcvtsd2si %xmm0, %eax
 0xc4 0xe1 0x7f 0x2d 0xc0
 
-# CHECK: vcvtsd2sil %xmm0, %eax
+# CHECK: vcvtsd2si %xmm0, %eax
 0xc4 0xe1 0xff 0x2d 0xc0
 
 # CHECK: vucomisd %xmm1, %xmm0
@@ -630,3 +630,21 @@
 
 # CHECK: movntss %xmm0, (%edi)
 0xf3 0x0f 0x2b 0x07
+
+# CHECK: prefetch (%eax)
+0x0f 0x0d 0x00
+
+# CHECK: prefetchw (%eax)
+0x0f 0x0d 0x08
+
+# CHECK: adcxl %eax, %eax
+0x66 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adcxl (%eax), %eax
+0x66 0x0f 0x38 0xf6 0x00
+
+# CHECK: adoxl %eax, %eax
+0xf3 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adoxl (%eax), %eax
+0xf3 0x0f 0x38 0xf6 0x00
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index df449a403b89..5de1d5978433 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -2,64 +2,64 @@
 
 # Coverage
 
-# CHECK: vcmptrue_usps 
+# CHECK: vcmptrue_usps
 0xc5 0x04 0xc2 0xc7 0x1f
 
-# CHECK: vcmptrue_uspd 
+# CHECK: vcmptrue_uspd
 0xc5 0x05 0xc2 0xc7 0x1f
 
-# CHECK: vcmptrue_usss 
+# CHECK: vcmptrue_usss
 0xc5 0x06 0xc2 0xc7 0x1f
 
-# CHECK: vcmptrue_ussd 
+# CHECK: vcmptrue_ussd
 0xc5 0x07 0xc2 0xc7 0x1f
 
-# CHECK: vcmpeq_uqps 
+# CHECK: vcmpeq_uqps
 0xc5 0x04 0xc2 0xc7 0x08
 
-# CHECK: vcmpeq_uqpd 
+# CHECK: vcmpeq_uqpd
 0xc5 0x05 0xc2 0xc7 0x08
 
-# CHECK: vcmpeq_uqss 
+# CHECK: vcmpeq_uqss
 0xc5 0x06 0xc2 0xc7 0x08
 
-# CHECK: vcmpeq_uqsd 
+# CHECK: vcmpeq_uqsd
 0xc5 0x07 0xc2 0xc7 0x08
 
-# CHECK: vcmpeqps 
+# CHECK: vcmpeqps
 0xc5 0x04 0xc2 0xc7 0x00
 
-# CHECK: vcmpeqpd 
+# CHECK: vcmpeqpd
 0xc5 0x05 0xc2 0xc7 0x00
 
-# CHECK: vcmpeqss 
+# CHECK: vcmpeqss
 0xc5 0x06 0xc2 0xc7 0x00
 
-# CHECK: vcmpeqsd 
+# CHECK: vcmpeqsd
 0xc5 0x07 0xc2 0xc7 0x00
 
-# CHECK: cmpeqps 
+# CHECK: cmpeqps
 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpeqpd 
+# CHECK: cmpeqpd
 0x66 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpeqss 
+# CHECK: cmpeqss
 0xf3 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpeqsd 
+# CHECK: cmpeqsd
 0xf2 0x0f 0xc2 0xc7 0x00
 
-# CHECK: cmpordps 
+# CHECK: cmpordps
 0x0f 0xc2 0xc7 0x07
 
-# CHECK: cmpordpd 
+# CHECK: cmpordpd
 0x66 0x0f 0xc2 0xc7 0x07
 
-# CHECK: cmpordss 
+# CHECK: cmpordss
 0xf3 0x0f 0xc2 0xc7 0x07
 
-# CHECK: cmpordsd 
+# CHECK: cmpordsd
 0xf2 0x0f 0xc2 0xc7 0x07
 
 # CHECK: extrq  $2, $3, %xmm0
@@ -79,3 +79,36 @@
 
 # CHECK: movntss %xmm0, (%rdi)
 0xf3 0x0f 0x2b 0x07
+
+# CHECK: adcxl %eax, %eax
+0x66 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adcxl (%rax), %eax
+0x66 0x0f 0x38 0xf6 0x00
+
+# CHECK: adcxq %rax, %rax
+0x66 0x48 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adcxq (%rax), %rax
+0x66 0x48 0x0f 0x38 0xf6 0x00
+
+# CHECK: adoxl %eax, %eax
+0xf3 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adoxl (%rax), %eax
+0xf3 0x0f 0x38 0xf6 0x00
+
+# CHECK: adoxq %rax, %rax
+0xf3 0x48 0x0f 0x38 0xf6 0xc0
+
+# CHECK: adoxq (%rax), %rax
+0xf3 0x48 0x0f 0x38 0xf6 0x00
+
+# CHECK: xbegin	53
+0xc7 0xf8 0x35 0x00 0x00 0x00
+
+# CHECK: xend
+0x0f 0x01 0xd5
+
+# CHECK: xabort $13
+0xc6 0xf8 0x0d
diff --git a/test/MC/Disassembler/XCore/lit.local.cfg b/test/MC/Disassembler/XCore/lit.local.cfg
new file mode 100644
index 000000000000..15b65836e717
--- /dev/null
+++ b/test/MC/Disassembler/XCore/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
diff --git a/test/MC/Disassembler/XCore/xcore.txt b/test/MC/Disassembler/XCore/xcore.txt
new file mode 100644
index 000000000000..99e54e9857c5
--- /dev/null
+++ b/test/MC/Disassembler/XCore/xcore.txt
@@ -0,0 +1,695 @@
+# RUN: llvm-mc --disassemble %s -triple=xcore-xmos-elf | FileCheck %s
+# CHECK: .section        __TEXT,__text,regular,pure_instructions
+
+# 0r instructions
+
+# CHECK: clre
+0xed 0x07
+
+# CHECK: get r11, id
+0xee 0x17
+
+# CHECK: get r11, ed
+0xfe 0x0f
+
+# CHECK: get r11, et
+0xff 0x0f
+
+# CHECK: ssync
+0xee 0x07
+
+# CHECK: waiteu
+0xec 0x07
+
+# CHECK: dcall
+0xfc 0x07
+
+# CHECK: dentsp
+0xec 0x17
+
+# CHECK: drestsp
+0xed 0x17
+
+# CHECK: dret
+0xfe 0x07
+
+# CHECK: freet
+0xef 0x07
+
+# CHECK: get r11, kep
+0xef 0x17
+
+# CHECK: get r11, ksp
+0xfc 0x17
+
+# CHECK: kret
+0xfd 0x07
+
+# CHECK: ldw et, sp[4]
+0xfe 0x17
+
+# CHECK: ldw sed, sp[3]
+0xfd 0x17
+
+# CHECK: ldw spc, sp[1]
+0xec 0x0f
+
+# CHECK: ldw ssr, sp[2]
+0xee 0x0f
+
+# CHECK: set kep, r11
+0xff 0x07
+
+# CHECK: stw et, sp[4]
+0xfd 0x0f
+
+# CHECK: stw sed, sp[3]
+0xfc 0x0f
+
+# CHECK: stw spc, sp[1]
+0xed 0x0f
+
+# CHECK: stw ssr, sp[2]
+0xef 0x0f
+
+# 1r instructions
+
+# CHECK: msync res[r0]
+0xf0 0x1f
+
+# CHECK: mjoin res[r1]
+0xf1 0x17
+
+# CHECK: bau r2
+0xf2 0x27
+
+# CHECK: set sp, r3
+0xf3 0x2f
+
+# CHECK: ecallt r4
+0xf4 0x4f
+
+# CHECK: ecallf r5
+0xe5 0x4f
+
+# CHECK: bla r6
+0xe6 0x27
+
+# CHECK: bru r8
+0xe8 0x2f
+
+# CHECK: syncr res[r7]
+0xf7 0x87
+
+# CHECK: freer res[r8]
+0xe8 0x17
+
+# CHECK: setv res[r9], r11
+0xf9 0x47
+
+# CHECK: setev res[r10], r11
+0xfa 0x3f
+
+# CHECK: eeu res[r11]
+0xfb 0x07
+
+# CHECK: set dp, r5
+0xe5 0x37
+
+# CHECK: set cp, r0
+0xf0 0x37
+
+# CHECK: dgetreg r11
+0xeb 0x3f
+
+# CHECK: edu res[r8]
+0xe8 0x07
+
+# CHECK: kcall r2
+0xe2 0x47
+
+# CHECK: waitef r10
+0xfa 0x0f
+
+# CHECK: waitet r7
+0xe7 0x0f
+
+# CHECK: start t[r4]
+0xe4 0x1f
+
+# CHECK: clrpt res[r9]
+0xe9 0x87
+
+# 2r instructions
+
+# CHECK: not r1, r8
+0x24 0x8f
+
+# CHECK: neg r7, r6
+0xce 0x97
+
+# CHECK: andnot r10, r11
+0xab 0x2f
+
+# CHECK: mkmsk r11, r0
+0x4c 0xa7
+
+# CHECK: getts r8, res[r1]
+0x41 0x3f
+
+# CHECK: setpt res[r2], r3
+0xde 0x3e
+
+# CHECK: outct res[r1], r2
+0xc6 0x4e
+
+# CHECK: outt res[r5], r4
+0xd1 0x0f
+
+# CHECK: out res[r9], r10
+0xa9 0xaf
+
+# CHECK: outshr res[r0], r2
+0xd8 0xae
+
+# CHECK: inct r7, res[r4]
+0xdc 0x87
+
+# CHECK: int r8, res[r3]
+0x53 0x8f
+
+# CHECK: in r10, res[r0]
+0x48 0xb7
+
+# CHECK: inshr r4, res[r2]
+0x12 0xb7
+
+# CHECK: chkct res[r6], r0
+0x08 0xcf
+
+# CHECK: testct r8, res[r3]
+0x53 0xbf
+
+# CHECK: testwct r2, res[r9]
+0x39 0xc7
+
+# CHECK: setd res[r3], r4
+0x13 0x17
+
+# CHECK: getst r7, res[r1]
+0x1d 0x07
+
+# CHECK: init t[r1]:sp, r2
+0xc9 0x16
+
+# CHECK: init t[r10]:pc, r1
+0x26 0x07
+
+# CHECK: init t[r2]:cp, r10
+0x4a 0x1f
+
+# CHECK: init t[r2]:dp, r3
+0xce 0x0e
+
+# CHECK: setpsc res[r8], r2
+0x28 0xc7
+
+# CHECK: zext r3, r8
+0x2c 0x47
+
+# CHECK: sext r9, r1
+0x45 0x37
+
+# CHECK: tsetmr r7, r3
+0x1f 0x1f
+
+# CHECK: eef r1, res[r6]
+0x96 0x2f
+
+# CHECK: eet r11, res[r0]
+0x5c 0x27
+
+# rus instructions
+
+# CHECK: chkct res[r1], 8
+0x34 0xcf
+
+# CHECK: getr r11, 2
+0x4e 0x87
+
+# CHECK: mkmsk r4, 24
+0x72 0xa7
+
+# CHECK: outct res[r3], 0
+0xdc 0x4e
+
+# CHECK: sext r8, 16
+0xb1 0x37
+
+# CHECK: zext r2, 32
+0xd8 0x46
+
+# CHECK: peek r0, res[r5]
+0x81 0xbf
+
+# CHECK: endin r10, res[r1]
+0x59 0x97
+
+# l2r instructions
+
+# CHECK: bitrev r1, r10
+0x26 0xff 0xec 0x07
+
+# CHECK: byterev r4, r1
+0x11 0xff 0xec 0x07
+
+# CHECK: clz r11, r10
+0xae 0xff 0xec 0x0f
+
+# CHECK: get r3, ps[r6]
+0x9e 0xff 0xec 0x17
+
+# CHECK: setc res[r5], r9
+0x75 0xff 0xec 0x2f
+
+# CHECK: init t[r2]:lr, r1
+0xc6 0xfe 0xec 0x17
+
+# CHECK: setclk res[r2], r1
+0xd6 0xfe 0xec 0x0f
+
+# CHECK: set ps[r9], r10
+0xa9 0xff 0xec 0x1f
+
+# CHECK: setrdy res[r3], r1
+0xc7 0xfe 0xec 0x2f
+
+# CHECK: settw res[r7], r2
+0x9b 0xff 0xec 0x27
+
+# CHECK: getd r8, res[r3]
+0x53 0xff 0xec 0x1f
+
+# CHECK: getn r10, res[r11]
+0xbb 0xff 0xec 0x37
+
+# CHECK: testlcl r2, res[r0]
+0xc8 0xfe 0xec 0x27
+
+# CHECK: setn res[r9], r7
+0x6d 0xff 0xec 0x37
+
+# 3r instructions
+
+# CHECK: add r1, r2, r3
+0x1b 0x10
+
+# CHECK: and r11, r10, r9
+0xb9 0x3e
+
+# CHECK: eq r6, r1, r2
+0x66 0x30
+
+# CHECK: ld16s r8, r3[r4]
+0xcc 0x82
+
+# CHECK: ld8u r9, r1[r10]
+0x16 0x8d
+
+# CHECK: ldw r9, r4[r5]
+0x91 0x4b
+
+# CHECK: lss r7, r3, r0
+0x7c 0xc0
+
+# CHECK: lsu r5, r8, r6
+0x12 0xcc
+
+# CHECK: or r1, r3, r2
+0x1e 0x40
+
+# CHECK: shl r8, r2, r4
+0xc8 0x22
+
+# CHECK: shr r9, r7, r1
+0x5d 0x29
+
+# CHECK: sub r4, r2, r5
+0x89 0x1a
+
+# CHECK: set t[r0]:r1, r2
+0x18 0xb8
+
+# 2rus instructions
+
+# CHECK: add r10, r2, 5
+0xe9 0x92
+
+# CHECK: eq r2, r1, 0
+0x24 0xb0
+
+# CHECK: ldw r5, r6[1]
+0x19 0x09
+
+# CHECK: shl r6, r5, 24
+0xa6 0xa5
+
+# CHECK: shr r3, r8, 5
+0xf1 0xab
+
+# CHECK: stw r3, r2[0]
+0x38 0x00
+
+# CHECK: sub r2, r4, 11
+0x63 0x9d
+
+# l3r instructions
+
+# CHECK: ashr r5, r1, r11
+0xd7 0xfc 0xec 0x17
+
+# CHECK: crc32 r5, r6, r1
+0x19 0xf9 0xec 0xaf
+
+# CHECK: divu r9, r1, r3
+0x97 0xf8 0xec 0x4f
+
+# CHECK: divs r6, r7, r2
+0x2e 0xf9 0xec 0x47
+
+# CHECK: lda16 r11, r2[r1]
+0xb9 0xf8 0xec 0x2f
+
+# CHECK: lda16 r9, r3[-r11]
+0x1f 0xfd 0xec 0x37
+
+# CHECK: ldaw r9, r1[r2]
+0x96 0xf8 0xec 0x1f
+
+# CHECK: ldaw r8, r7[-r11]
+0xcf 0xfd 0xec 0x27
+
+# CHECK: mul r0, r4, r2
+0xc2 0xf8 0xec 0x3f
+
+# CHECK: remu r1, r2, r3
+0x1b 0xf8 0xec 0xcf
+
+# CHECK: rems r11, r10, r9
+0xb9 0xfe 0xec 0xc7
+
+# CHECK: st16 r5, r3[r8]
+0xdc 0xfc 0xec 0x87
+
+# CHECK: st8 r9, r1[r3]
+0x97 0xf8 0xec 0x8f
+
+# CHECK: stw r7, r10[r1]
+0xf9 0xf9 0xec 0x07
+
+# CHECK: xor r4, r3, r9
+0xcd 0xfc 0xec 0x0f
+
+# l2rus instructions
+
+# CHECK: ashr r5, r1, 3
+0x57 0xf8 0xec 0x97
+
+# CHECK: ldaw r11, r10[6]
+0x7a 0xfc 0xec 0x9f
+
+# CHECK: ldaw r8, r2[-9]
+0x09 0xfd 0xec 0xa7
+
+# CHECK: inpw r6, res[r1], 8
+0xe4 0xfc 0xee 0x97
+
+# CHECK: outpw res[r3], r0, 2
+0x0e 0xf8 0xed 0x97
+
+# ru6 / lru6 instructions
+
+# CHECK: bt r6, -5
+0x85 0x75
+
+# CHECK: bt r10, -451
+0x07 0xf0 0x83 0x76
+
+# CHECK: bt r8, 10
+0x0a 0x72
+
+# CHECK: bt r1, 6451
+0x64 0xf0 0x73 0x70
+
+# CHECK: bf r5, 8
+0x48 0x79
+
+# CHECK: bf r6, 65
+0x01 0xf0 0x81 0x79
+
+# CHECK: bf r1, 53
+0x75 0x78
+
+# CHECK: bf r10, 101
+0x01 0xf0 0xa5 0x7a
+
+# CHECK: ldaw r11, dp[63]
+0xff 0x62
+
+# CHECK: ldaw r1, dp[456]
+0x07 0xf0 0x48 0x60
+
+# CHECK: ldaw cp, dp[5]
+0x05 0x63
+
+# CHECK: ldaw sp, dp[9929]
+0x9b 0xf0 0x89 0x63
+
+# CHECK: ldaw r3, sp[2]
+0xc2 0x64
+
+# CHECK: ldaw r8, sp[65535]
+0xff 0xf3 0x3f 0x66
+
+# CHECK: ldaw sp, sp[41]
+0xa9 0x67
+
+# CHECK: ldaw sp, sp[13121]
+0xcd 0xf0 0x81 0x67
+
+# CHECK: ldc r3, 30
+0xde 0x68
+
+# CHECK: ldc r11, 1000
+0x0f 0xf0 0xe8 0x6a
+
+# CHECK: ldc sp, 0
+0x80 0x6b
+
+# CHECK: ldc lr, 81
+0x01 0xf0 0xd1 0x6b
+
+# CHECK: ldw r0, cp[4]
+0x04 0x6c
+
+# CHECK: ldw r1, cp[32345]
+0xf9 0xf1 0x59 0x6c
+
+# CHECK: ldw cp, cp[8]
+0x08 0x6f
+
+# CHECK: ldw sp, cp[10222]
+0x9f 0xf0 0xae 0x6f
+
+# CHECK: ldw r10, dp[16]
+0x90 0x5a
+
+# CHECK: ldw r10, dp[76]
+0x01 0xf0 0x8c 0x5a
+
+# CHECK: ldw lr, dp[8]
+0xc8 0x5b
+
+# CHECK: ldw dp, dp[33221]
+0x07 0xf2 0x45 0x5b
+
+# CHECK: ldw r8, sp[51]
+0x33 0x5e
+
+# CHECK: ldw r8, sp[1225]
+0x13 0xf0 0x09 0x5e
+
+# CHECK: ldw cp, sp[31]
+0x1f 0x5f
+
+# CHECK: ldw sp, sp[1000]
+0x0f 0xf0 0xa8 0x5f
+
+# CHECK: setc res[r5], 36
+0x64 0xe9
+
+# CHECK: setc res[r2], 40312
+0x75 0xf2 0xb8 0xe8
+
+# CHECK: stw r8, dp[14]
+0x0e 0x52
+
+# CHECK: stw r9, dp[654]
+0x0a 0xf0 0x4e 0x52
+
+# CHECK: stw lr, dp[23]
+0xd7 0x53
+
+# CHECK: stw sp, dp[44442]
+0xb6 0xf2 0x9a 0x53
+
+# CHECK: stw r1, sp[32]
+0x60 0x54
+
+# CHECK: stw r0, sp[8761]
+0x88 0xf0 0x39 0x54
+
+# CHECK: stw cp, sp[63]
+0x3f 0x57
+
+# CHECK: stw lr, sp[4391]
+0x44 0xf0 0xe7 0x57
+
+# u6 / lu6 instructions
+
+# CHECK: bu -20
+0x14 0x77
+
+# CHECK: bu -1000
+0x0f 0xf0 0x28 0x77
+
+# CHECK: bu 24
+0x18 0x73
+
+# CHECK: bu 2231
+0x22 0xf0 0x37 0x73
+
+# CHECK: extsp 9
+0x89 0x77
+
+# CHECK: extsp 5721
+0x59 0xf0 0x99 0x77
+
+# CHECK: clrsr 60
+0x3c 0x7b
+
+# CHECK: clrsr 64391
+0xee 0xf3 0x07 0x7b
+
+# CHECK: entsp 1
+0x41 0x77
+
+# CHECK: entsp 70
+0x01 0xf0 0x46 0x77
+
+# CHECK: ldaw r11, cp[5]
+0x45 0x7f
+
+# CHECK: ldaw r11, cp[33000]
+0x03 0xf2 0x68 0x7f
+
+# CHECK: retsp 40
+0xe8 0x77
+
+# CHECK: retsp 52010
+0x2c 0xf3 0xea 0x77
+
+# CHECK: setsr 42
+0x6a 0x7b
+
+# CHECK: setsr 21863
+0x55 0xf1 0x67 0x7b
+
+# CHECK: extdp 4
+0x84 0x73
+
+# CHECK: extdp 554
+0x08 0xf0 0xaa 0x73
+
+# CHECK: blat 9
+0x49 0x73
+
+# CHECK: blat 61212
+0xbc 0xf3 0x5c 0x73
+
+# CHECK: getsr r11, 54
+0x36 0x7f
+
+# CHECK: getsr r11, 442
+0x06 0xf0 0x3a 0x7f
+
+# CHECK: kcall 11
+0xcb 0x73
+
+# CHECK: kcall 4001
+0x3e 0xf0 0xe1 0x73
+
+# CHECK: kentsp 22
+0x96 0x7b
+
+# CHECK: kentsp 8793
+0x89 0xf0 0x99 0x7b
+
+# CHECK: krestsp 0
+0xc0 0x7b
+
+# CHECK: krestsp 55312
+0x60 0xf3 0xd0 0x7b
+
+# u10 / lu10 instructions
+
+# CHECK: ldap r11, 40
+0x28 0xd8
+
+# CHECK: ldap r11, 53112
+0x33 0xf0 0x78 0xdb
+
+# CHECK: bl 8
+0x08 0xd0
+
+# CHECK: bl 38631
+0x25 0xf0 0xe7 0xd2
+
+# CHECK: bla cp[500]
+0xf4 0xe1
+
+# CHECK: bla cp[413742]
+0x94 0xf1 0x2e 0xe0
+
+# CHECK: ldw r11, cp[132]
+0x84 0xe4
+
+# CHECK: ldw r11, cp[102741]
+0x64 0xf0 0x55 0xe5
+
+# l6r instructions
+
+# CHECK: lmul r11, r0, r2, r5, r8, r10
+0xf9 0xfa 0x02 0x06
+
+# l5r instructions
+
+# CHECK: ladd r10, r2, r5, r1, r7
+0xe5 0xf8 0xfb 0x06
+
+# CHECK: ldivu r5, r6, r3, r9, r8
+0x54 0xfe 0x0b 0x07
+
+# CHECK: lsub r1, r8, r7, r11, r5
+0xcf 0xfd 0x85 0x0f
+
+# l4r instructions
+
+# CHECK: crc8 r6, r3, r4, r11
+0x73 0xfd 0xe6 0x07
+
+# CHECK: maccs r11, r8, r2, r4
+0xf8 0xfa 0xe8 0x0f
+
+# CHECK: maccu r0, r2, r5, r8
+0x44 0xfd 0xf2 0x07
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
new file mode 100644
index 000000000000..37723097030f
--- /dev/null
+++ b/test/MC/ELF/cfi-register.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_register %rbp, %rax
+        nop
+	.cfi_endproc
+
+// CHECK:        # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410906 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
new file mode 100644
index 000000000000..28049faec285
--- /dev/null
+++ b/test/MC/ELF/cfi-undefined.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_undefined %rbp
+        nop
+	.cfi_endproc
+// CHECK:  # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410706 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/comp-dir.s b/test/MC/ELF/comp-dir.s
new file mode 100644
index 000000000000..59e3d7ded261
--- /dev/null
+++ b/test/MC/ELF/comp-dir.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple=x86_64-linux-unknown -g -fdebug-compilation-dir=/test/comp/dir %s -filetype=obj -o %t.o
+// RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+
+// CHECK: DW_AT_comp_dir [DW_FORM_string] ("{{([A-Za-z]:.*)?}}/test/comp/dir")
+
+f:
+  nop
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
index b090e0802b10..85e02428fe3f 100644
--- a/test/MC/ELF/gen-dwarf.s
+++ b/test/MC/ELF/gen-dwarf.s
@@ -1,8 +1,9 @@
 // RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
 
 
-// Test that on ELF the debug info has a relocation to debug_abbrev and one to
-// to debug_line.
+// Test that on ELF:
+// 1. the debug info has a relocation to debug_abbrev and one to to debug_line.
+// 2. the debug_aranges has relocations to text and debug_line.
 
 
     .text
@@ -47,6 +48,34 @@ foo:
 // CHECK:       # Section 8
 // CHECK-NEXT:  (('sh_name', 0x00000001) # '.debug_abbrev'
 
+// Section 9 is .debug_aranges
+// CHECK:       # Section 9
+// CHECK-NEXT:  (('sh_name', 0x0000001e) # '.debug_aranges'
+
+// Two relocations in .debug_aranges, one to text and one to debug_info.
+// CHECK:       # '.rel.debug_aranges'
+// CHECK:       # Relocation 0
+// CHECK-NEXT:  (('r_offset', 0x00000006)
+// CHECK-NEXT:   ('r_sym', 0x000005)
+// CHECK-NEXT:   ('r_type', 0x01)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Relocation 1
+// CHECK-NEXT: (('r_offset', 0x00000010)
+// CHECK-NEXT:  ('r_sym', 0x000001)
+// CHECK-NEXT:  ('r_type', 0x01)
+// CHECK-NEXT: ),
+
+// Symbol 1 is section 1 (.text)
+// CHECK:         # Symbol 1
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0001)
+// CHECK-NEXT:    ),
+
 // Symbol 4 is section 4 (.debug_line)
 // CHECK:         # Symbol 4
 // CHECK-NEXT:    (('st_name', 0x00000000) # ''
@@ -58,6 +87,17 @@ foo:
 // CHECK-NEXT:     ('st_shndx', 0x0004)
 // CHECK-NEXT:    ),
 
+// Symbol 5 is section 6 (.debug_info)
+// CHECK:         # Symbol 5
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0006)
+// CHECK-NEXT:    ),
+
 // Symbol 6 is section 8 (.debug_abbrev)
 // CHECK:         # Symbol 6
 // CHECK-NEXT:    (('st_name', 0x00000000) # ''
diff --git a/test/MC/ELF/many-sections-2.s b/test/MC/ELF/many-sections-2.s
new file mode 100644
index 000000000000..789ebf378d8e
--- /dev/null
+++ b/test/MC/ELF/many-sections-2.s
@@ -0,0 +1,65281 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
+// RUN: llvm-readobj -s %t | FileCheck %s
+
+// CHECK: symtab_shndx
+
+.section saaaa
+.section saaab
+.section saaba
+.section saabb
+.section saaca
+.section saacb
+.section saada
+.section saadb
+.section saaea
+.section saaeb
+.section saafa
+.section saafb
+.section saaga
+.section saagb
+.section saaha
+.section saahb
+.section saaia
+.section saaib
+.section saaja
+.section saajb
+.section saaka
+.section saakb
+.section saala
+.section saalb
+.section saama
+.section saamb
+.section saana
+.section saanb
+.section saaoa
+.section saaob
+.section saapa
+.section saapb
+.section saaqa
+.section saaqb
+.section saara
+.section saarb
+.section saasa
+.section saasb
+.section saata
+.section saatb
+.section saaua
+.section saaub
+.section saava
+.section saavb
+.section saawa
+.section saawb
+.section saaxa
+.section saaxb
+.section saaya
+.section saayb
+.section saaza
+.section saazb
+.section saa1a
+.section saa1b
+.section saa2a
+.section saa2b
+.section saa3a
+.section saa3b
+.section saa4a
+.section saa4b
+.section saa5a
+.section saa5b
+.section saa6a
+.section saa6b
+.section saa7a
+.section saa7b
+.section saa8a
+.section saa8b
+.section saa9a
+.section saa9b
+.section saa0a
+.section saa0b
+.section sabaa
+.section sabab
+.section sabba
+.section sabbb
+.section sabca
+.section sabcb
+.section sabda
+.section sabdb
+.section sabea
+.section sabeb
+.section sabfa
+.section sabfb
+.section sabga
+.section sabgb
+.section sabha
+.section sabhb
+.section sabia
+.section sabib
+.section sabja
+.section sabjb
+.section sabka
+.section sabkb
+.section sabla
+.section sablb
+.section sabma
+.section sabmb
+.section sabna
+.section sabnb
+.section saboa
+.section sabob
+.section sabpa
+.section sabpb
+.section sabqa
+.section sabqb
+.section sabra
+.section sabrb
+.section sabsa
+.section sabsb
+.section sabta
+.section sabtb
+.section sabua
+.section sabub
+.section sabva
+.section sabvb
+.section sabwa
+.section sabwb
+.section sabxa
+.section sabxb
+.section sabya
+.section sabyb
+.section sabza
+.section sabzb
+.section sab1a
+.section sab1b
+.section sab2a
+.section sab2b
+.section sab3a
+.section sab3b
+.section sab4a
+.section sab4b
+.section sab5a
+.section sab5b
+.section sab6a
+.section sab6b
+.section sab7a
+.section sab7b
+.section sab8a
+.section sab8b
+.section sab9a
+.section sab9b
+.section sab0a
+.section sab0b
+.section sacaa
+.section sacab
+.section sacba
+.section sacbb
+.section sacca
+.section saccb
+.section sacda
+.section sacdb
+.section sacea
+.section saceb
+.section sacfa
+.section sacfb
+.section sacga
+.section sacgb
+.section sacha
+.section sachb
+.section sacia
+.section sacib
+.section sacja
+.section sacjb
+.section sacka
+.section sackb
+.section sacla
+.section saclb
+.section sacma
+.section sacmb
+.section sacna
+.section sacnb
+.section sacoa
+.section sacob
+.section sacpa
+.section sacpb
+.section sacqa
+.section sacqb
+.section sacra
+.section sacrb
+.section sacsa
+.section sacsb
+.section sacta
+.section sactb
+.section sacua
+.section sacub
+.section sacva
+.section sacvb
+.section sacwa
+.section sacwb
+.section sacxa
+.section sacxb
+.section sacya
+.section sacyb
+.section sacza
+.section saczb
+.section sac1a
+.section sac1b
+.section sac2a
+.section sac2b
+.section sac3a
+.section sac3b
+.section sac4a
+.section sac4b
+.section sac5a
+.section sac5b
+.section sac6a
+.section sac6b
+.section sac7a
+.section sac7b
+.section sac8a
+.section sac8b
+.section sac9a
+.section sac9b
+.section sac0a
+.section sac0b
+.section sadaa
+.section sadab
+.section sadba
+.section sadbb
+.section sadca
+.section sadcb
+.section sadda
+.section saddb
+.section sadea
+.section sadeb
+.section sadfa
+.section sadfb
+.section sadga
+.section sadgb
+.section sadha
+.section sadhb
+.section sadia
+.section sadib
+.section sadja
+.section sadjb
+.section sadka
+.section sadkb
+.section sadla
+.section sadlb
+.section sadma
+.section sadmb
+.section sadna
+.section sadnb
+.section sadoa
+.section sadob
+.section sadpa
+.section sadpb
+.section sadqa
+.section sadqb
+.section sadra
+.section sadrb
+.section sadsa
+.section sadsb
+.section sadta
+.section sadtb
+.section sadua
+.section sadub
+.section sadva
+.section sadvb
+.section sadwa
+.section sadwb
+.section sadxa
+.section sadxb
+.section sadya
+.section sadyb
+.section sadza
+.section sadzb
+.section sad1a
+.section sad1b
+.section sad2a
+.section sad2b
+.section sad3a
+.section sad3b
+.section sad4a
+.section sad4b
+.section sad5a
+.section sad5b
+.section sad6a
+.section sad6b
+.section sad7a
+.section sad7b
+.section sad8a
+.section sad8b
+.section sad9a
+.section sad9b
+.section sad0a
+.section sad0b
+.section saeaa
+.section saeab
+.section saeba
+.section saebb
+.section saeca
+.section saecb
+.section saeda
+.section saedb
+.section saeea
+.section saeeb
+.section saefa
+.section saefb
+.section saega
+.section saegb
+.section saeha
+.section saehb
+.section saeia
+.section saeib
+.section saeja
+.section saejb
+.section saeka
+.section saekb
+.section saela
+.section saelb
+.section saema
+.section saemb
+.section saena
+.section saenb
+.section saeoa
+.section saeob
+.section saepa
+.section saepb
+.section saeqa
+.section saeqb
+.section saera
+.section saerb
+.section saesa
+.section saesb
+.section saeta
+.section saetb
+.section saeua
+.section saeub
+.section saeva
+.section saevb
+.section saewa
+.section saewb
+.section saexa
+.section saexb
+.section saeya
+.section saeyb
+.section saeza
+.section saezb
+.section sae1a
+.section sae1b
+.section sae2a
+.section sae2b
+.section sae3a
+.section sae3b
+.section sae4a
+.section sae4b
+.section sae5a
+.section sae5b
+.section sae6a
+.section sae6b
+.section sae7a
+.section sae7b
+.section sae8a
+.section sae8b
+.section sae9a
+.section sae9b
+.section sae0a
+.section sae0b
+.section safaa
+.section safab
+.section safba
+.section safbb
+.section safca
+.section safcb
+.section safda
+.section safdb
+.section safea
+.section safeb
+.section saffa
+.section saffb
+.section safga
+.section safgb
+.section safha
+.section safhb
+.section safia
+.section safib
+.section safja
+.section safjb
+.section safka
+.section safkb
+.section safla
+.section saflb
+.section safma
+.section safmb
+.section safna
+.section safnb
+.section safoa
+.section safob
+.section safpa
+.section safpb
+.section safqa
+.section safqb
+.section safra
+.section safrb
+.section safsa
+.section safsb
+.section safta
+.section saftb
+.section safua
+.section safub
+.section safva
+.section safvb
+.section safwa
+.section safwb
+.section safxa
+.section safxb
+.section safya
+.section safyb
+.section safza
+.section safzb
+.section saf1a
+.section saf1b
+.section saf2a
+.section saf2b
+.section saf3a
+.section saf3b
+.section saf4a
+.section saf4b
+.section saf5a
+.section saf5b
+.section saf6a
+.section saf6b
+.section saf7a
+.section saf7b
+.section saf8a
+.section saf8b
+.section saf9a
+.section saf9b
+.section saf0a
+.section saf0b
+.section sagaa
+.section sagab
+.section sagba
+.section sagbb
+.section sagca
+.section sagcb
+.section sagda
+.section sagdb
+.section sagea
+.section sageb
+.section sagfa
+.section sagfb
+.section sagga
+.section saggb
+.section sagha
+.section saghb
+.section sagia
+.section sagib
+.section sagja
+.section sagjb
+.section sagka
+.section sagkb
+.section sagla
+.section saglb
+.section sagma
+.section sagmb
+.section sagna
+.section sagnb
+.section sagoa
+.section sagob
+.section sagpa
+.section sagpb
+.section sagqa
+.section sagqb
+.section sagra
+.section sagrb
+.section sagsa
+.section sagsb
+.section sagta
+.section sagtb
+.section sagua
+.section sagub
+.section sagva
+.section sagvb
+.section sagwa
+.section sagwb
+.section sagxa
+.section sagxb
+.section sagya
+.section sagyb
+.section sagza
+.section sagzb
+.section sag1a
+.section sag1b
+.section sag2a
+.section sag2b
+.section sag3a
+.section sag3b
+.section sag4a
+.section sag4b
+.section sag5a
+.section sag5b
+.section sag6a
+.section sag6b
+.section sag7a
+.section sag7b
+.section sag8a
+.section sag8b
+.section sag9a
+.section sag9b
+.section sag0a
+.section sag0b
+.section sahaa
+.section sahab
+.section sahba
+.section sahbb
+.section sahca
+.section sahcb
+.section sahda
+.section sahdb
+.section sahea
+.section saheb
+.section sahfa
+.section sahfb
+.section sahga
+.section sahgb
+.section sahha
+.section sahhb
+.section sahia
+.section sahib
+.section sahja
+.section sahjb
+.section sahka
+.section sahkb
+.section sahla
+.section sahlb
+.section sahma
+.section sahmb
+.section sahna
+.section sahnb
+.section sahoa
+.section sahob
+.section sahpa
+.section sahpb
+.section sahqa
+.section sahqb
+.section sahra
+.section sahrb
+.section sahsa
+.section sahsb
+.section sahta
+.section sahtb
+.section sahua
+.section sahub
+.section sahva
+.section sahvb
+.section sahwa
+.section sahwb
+.section sahxa
+.section sahxb
+.section sahya
+.section sahyb
+.section sahza
+.section sahzb
+.section sah1a
+.section sah1b
+.section sah2a
+.section sah2b
+.section sah3a
+.section sah3b
+.section sah4a
+.section sah4b
+.section sah5a
+.section sah5b
+.section sah6a
+.section sah6b
+.section sah7a
+.section sah7b
+.section sah8a
+.section sah8b
+.section sah9a
+.section sah9b
+.section sah0a
+.section sah0b
+.section saiaa
+.section saiab
+.section saiba
+.section saibb
+.section saica
+.section saicb
+.section saida
+.section saidb
+.section saiea
+.section saieb
+.section saifa
+.section saifb
+.section saiga
+.section saigb
+.section saiha
+.section saihb
+.section saiia
+.section saiib
+.section saija
+.section saijb
+.section saika
+.section saikb
+.section saila
+.section sailb
+.section saima
+.section saimb
+.section saina
+.section sainb
+.section saioa
+.section saiob
+.section saipa
+.section saipb
+.section saiqa
+.section saiqb
+.section saira
+.section sairb
+.section saisa
+.section saisb
+.section saita
+.section saitb
+.section saiua
+.section saiub
+.section saiva
+.section saivb
+.section saiwa
+.section saiwb
+.section saixa
+.section saixb
+.section saiya
+.section saiyb
+.section saiza
+.section saizb
+.section sai1a
+.section sai1b
+.section sai2a
+.section sai2b
+.section sai3a
+.section sai3b
+.section sai4a
+.section sai4b
+.section sai5a
+.section sai5b
+.section sai6a
+.section sai6b
+.section sai7a
+.section sai7b
+.section sai8a
+.section sai8b
+.section sai9a
+.section sai9b
+.section sai0a
+.section sai0b
+.section sajaa
+.section sajab
+.section sajba
+.section sajbb
+.section sajca
+.section sajcb
+.section sajda
+.section sajdb
+.section sajea
+.section sajeb
+.section sajfa
+.section sajfb
+.section sajga
+.section sajgb
+.section sajha
+.section sajhb
+.section sajia
+.section sajib
+.section sajja
+.section sajjb
+.section sajka
+.section sajkb
+.section sajla
+.section sajlb
+.section sajma
+.section sajmb
+.section sajna
+.section sajnb
+.section sajoa
+.section sajob
+.section sajpa
+.section sajpb
+.section sajqa
+.section sajqb
+.section sajra
+.section sajrb
+.section sajsa
+.section sajsb
+.section sajta
+.section sajtb
+.section sajua
+.section sajub
+.section sajva
+.section sajvb
+.section sajwa
+.section sajwb
+.section sajxa
+.section sajxb
+.section sajya
+.section sajyb
+.section sajza
+.section sajzb
+.section saj1a
+.section saj1b
+.section saj2a
+.section saj2b
+.section saj3a
+.section saj3b
+.section saj4a
+.section saj4b
+.section saj5a
+.section saj5b
+.section saj6a
+.section saj6b
+.section saj7a
+.section saj7b
+.section saj8a
+.section saj8b
+.section saj9a
+.section saj9b
+.section saj0a
+.section saj0b
+.section sakaa
+.section sakab
+.section sakba
+.section sakbb
+.section sakca
+.section sakcb
+.section sakda
+.section sakdb
+.section sakea
+.section sakeb
+.section sakfa
+.section sakfb
+.section sakga
+.section sakgb
+.section sakha
+.section sakhb
+.section sakia
+.section sakib
+.section sakja
+.section sakjb
+.section sakka
+.section sakkb
+.section sakla
+.section saklb
+.section sakma
+.section sakmb
+.section sakna
+.section saknb
+.section sakoa
+.section sakob
+.section sakpa
+.section sakpb
+.section sakqa
+.section sakqb
+.section sakra
+.section sakrb
+.section saksa
+.section saksb
+.section sakta
+.section saktb
+.section sakua
+.section sakub
+.section sakva
+.section sakvb
+.section sakwa
+.section sakwb
+.section sakxa
+.section sakxb
+.section sakya
+.section sakyb
+.section sakza
+.section sakzb
+.section sak1a
+.section sak1b
+.section sak2a
+.section sak2b
+.section sak3a
+.section sak3b
+.section sak4a
+.section sak4b
+.section sak5a
+.section sak5b
+.section sak6a
+.section sak6b
+.section sak7a
+.section sak7b
+.section sak8a
+.section sak8b
+.section sak9a
+.section sak9b
+.section sak0a
+.section sak0b
+.section salaa
+.section salab
+.section salba
+.section salbb
+.section salca
+.section salcb
+.section salda
+.section saldb
+.section salea
+.section saleb
+.section salfa
+.section salfb
+.section salga
+.section salgb
+.section salha
+.section salhb
+.section salia
+.section salib
+.section salja
+.section saljb
+.section salka
+.section salkb
+.section salla
+.section sallb
+.section salma
+.section salmb
+.section salna
+.section salnb
+.section saloa
+.section salob
+.section salpa
+.section salpb
+.section salqa
+.section salqb
+.section salra
+.section salrb
+.section salsa
+.section salsb
+.section salta
+.section saltb
+.section salua
+.section salub
+.section salva
+.section salvb
+.section salwa
+.section salwb
+.section salxa
+.section salxb
+.section salya
+.section salyb
+.section salza
+.section salzb
+.section sal1a
+.section sal1b
+.section sal2a
+.section sal2b
+.section sal3a
+.section sal3b
+.section sal4a
+.section sal4b
+.section sal5a
+.section sal5b
+.section sal6a
+.section sal6b
+.section sal7a
+.section sal7b
+.section sal8a
+.section sal8b
+.section sal9a
+.section sal9b
+.section sal0a
+.section sal0b
+.section samaa
+.section samab
+.section samba
+.section sambb
+.section samca
+.section samcb
+.section samda
+.section samdb
+.section samea
+.section sameb
+.section samfa
+.section samfb
+.section samga
+.section samgb
+.section samha
+.section samhb
+.section samia
+.section samib
+.section samja
+.section samjb
+.section samka
+.section samkb
+.section samla
+.section samlb
+.section samma
+.section sammb
+.section samna
+.section samnb
+.section samoa
+.section samob
+.section sampa
+.section sampb
+.section samqa
+.section samqb
+.section samra
+.section samrb
+.section samsa
+.section samsb
+.section samta
+.section samtb
+.section samua
+.section samub
+.section samva
+.section samvb
+.section samwa
+.section samwb
+.section samxa
+.section samxb
+.section samya
+.section samyb
+.section samza
+.section samzb
+.section sam1a
+.section sam1b
+.section sam2a
+.section sam2b
+.section sam3a
+.section sam3b
+.section sam4a
+.section sam4b
+.section sam5a
+.section sam5b
+.section sam6a
+.section sam6b
+.section sam7a
+.section sam7b
+.section sam8a
+.section sam8b
+.section sam9a
+.section sam9b
+.section sam0a
+.section sam0b
+.section sanaa
+.section sanab
+.section sanba
+.section sanbb
+.section sanca
+.section sancb
+.section sanda
+.section sandb
+.section sanea
+.section saneb
+.section sanfa
+.section sanfb
+.section sanga
+.section sangb
+.section sanha
+.section sanhb
+.section sania
+.section sanib
+.section sanja
+.section sanjb
+.section sanka
+.section sankb
+.section sanla
+.section sanlb
+.section sanma
+.section sanmb
+.section sanna
+.section sannb
+.section sanoa
+.section sanob
+.section sanpa
+.section sanpb
+.section sanqa
+.section sanqb
+.section sanra
+.section sanrb
+.section sansa
+.section sansb
+.section santa
+.section santb
+.section sanua
+.section sanub
+.section sanva
+.section sanvb
+.section sanwa
+.section sanwb
+.section sanxa
+.section sanxb
+.section sanya
+.section sanyb
+.section sanza
+.section sanzb
+.section san1a
+.section san1b
+.section san2a
+.section san2b
+.section san3a
+.section san3b
+.section san4a
+.section san4b
+.section san5a
+.section san5b
+.section san6a
+.section san6b
+.section san7a
+.section san7b
+.section san8a
+.section san8b
+.section san9a
+.section san9b
+.section san0a
+.section san0b
+.section saoaa
+.section saoab
+.section saoba
+.section saobb
+.section saoca
+.section saocb
+.section saoda
+.section saodb
+.section saoea
+.section saoeb
+.section saofa
+.section saofb
+.section saoga
+.section saogb
+.section saoha
+.section saohb
+.section saoia
+.section saoib
+.section saoja
+.section saojb
+.section saoka
+.section saokb
+.section saola
+.section saolb
+.section saoma
+.section saomb
+.section saona
+.section saonb
+.section saooa
+.section saoob
+.section saopa
+.section saopb
+.section saoqa
+.section saoqb
+.section saora
+.section saorb
+.section saosa
+.section saosb
+.section saota
+.section saotb
+.section saoua
+.section saoub
+.section saova
+.section saovb
+.section saowa
+.section saowb
+.section saoxa
+.section saoxb
+.section saoya
+.section saoyb
+.section saoza
+.section saozb
+.section sao1a
+.section sao1b
+.section sao2a
+.section sao2b
+.section sao3a
+.section sao3b
+.section sao4a
+.section sao4b
+.section sao5a
+.section sao5b
+.section sao6a
+.section sao6b
+.section sao7a
+.section sao7b
+.section sao8a
+.section sao8b
+.section sao9a
+.section sao9b
+.section sao0a
+.section sao0b
+.section sapaa
+.section sapab
+.section sapba
+.section sapbb
+.section sapca
+.section sapcb
+.section sapda
+.section sapdb
+.section sapea
+.section sapeb
+.section sapfa
+.section sapfb
+.section sapga
+.section sapgb
+.section sapha
+.section saphb
+.section sapia
+.section sapib
+.section sapja
+.section sapjb
+.section sapka
+.section sapkb
+.section sapla
+.section saplb
+.section sapma
+.section sapmb
+.section sapna
+.section sapnb
+.section sapoa
+.section sapob
+.section sappa
+.section sappb
+.section sapqa
+.section sapqb
+.section sapra
+.section saprb
+.section sapsa
+.section sapsb
+.section sapta
+.section saptb
+.section sapua
+.section sapub
+.section sapva
+.section sapvb
+.section sapwa
+.section sapwb
+.section sapxa
+.section sapxb
+.section sapya
+.section sapyb
+.section sapza
+.section sapzb
+.section sap1a
+.section sap1b
+.section sap2a
+.section sap2b
+.section sap3a
+.section sap3b
+.section sap4a
+.section sap4b
+.section sap5a
+.section sap5b
+.section sap6a
+.section sap6b
+.section sap7a
+.section sap7b
+.section sap8a
+.section sap8b
+.section sap9a
+.section sap9b
+.section sap0a
+.section sap0b
+.section saqaa
+.section saqab
+.section saqba
+.section saqbb
+.section saqca
+.section saqcb
+.section saqda
+.section saqdb
+.section saqea
+.section saqeb
+.section saqfa
+.section saqfb
+.section saqga
+.section saqgb
+.section saqha
+.section saqhb
+.section saqia
+.section saqib
+.section saqja
+.section saqjb
+.section saqka
+.section saqkb
+.section saqla
+.section saqlb
+.section saqma
+.section saqmb
+.section saqna
+.section saqnb
+.section saqoa
+.section saqob
+.section saqpa
+.section saqpb
+.section saqqa
+.section saqqb
+.section saqra
+.section saqrb
+.section saqsa
+.section saqsb
+.section saqta
+.section saqtb
+.section saqua
+.section saqub
+.section saqva
+.section saqvb
+.section saqwa
+.section saqwb
+.section saqxa
+.section saqxb
+.section saqya
+.section saqyb
+.section saqza
+.section saqzb
+.section saq1a
+.section saq1b
+.section saq2a
+.section saq2b
+.section saq3a
+.section saq3b
+.section saq4a
+.section saq4b
+.section saq5a
+.section saq5b
+.section saq6a
+.section saq6b
+.section saq7a
+.section saq7b
+.section saq8a
+.section saq8b
+.section saq9a
+.section saq9b
+.section saq0a
+.section saq0b
+.section saraa
+.section sarab
+.section sarba
+.section sarbb
+.section sarca
+.section sarcb
+.section sarda
+.section sardb
+.section sarea
+.section sareb
+.section sarfa
+.section sarfb
+.section sarga
+.section sargb
+.section sarha
+.section sarhb
+.section saria
+.section sarib
+.section sarja
+.section sarjb
+.section sarka
+.section sarkb
+.section sarla
+.section sarlb
+.section sarma
+.section sarmb
+.section sarna
+.section sarnb
+.section saroa
+.section sarob
+.section sarpa
+.section sarpb
+.section sarqa
+.section sarqb
+.section sarra
+.section sarrb
+.section sarsa
+.section sarsb
+.section sarta
+.section sartb
+.section sarua
+.section sarub
+.section sarva
+.section sarvb
+.section sarwa
+.section sarwb
+.section sarxa
+.section sarxb
+.section sarya
+.section saryb
+.section sarza
+.section sarzb
+.section sar1a
+.section sar1b
+.section sar2a
+.section sar2b
+.section sar3a
+.section sar3b
+.section sar4a
+.section sar4b
+.section sar5a
+.section sar5b
+.section sar6a
+.section sar6b
+.section sar7a
+.section sar7b
+.section sar8a
+.section sar8b
+.section sar9a
+.section sar9b
+.section sar0a
+.section sar0b
+.section sasaa
+.section sasab
+.section sasba
+.section sasbb
+.section sasca
+.section sascb
+.section sasda
+.section sasdb
+.section sasea
+.section saseb
+.section sasfa
+.section sasfb
+.section sasga
+.section sasgb
+.section sasha
+.section sashb
+.section sasia
+.section sasib
+.section sasja
+.section sasjb
+.section saska
+.section saskb
+.section sasla
+.section saslb
+.section sasma
+.section sasmb
+.section sasna
+.section sasnb
+.section sasoa
+.section sasob
+.section saspa
+.section saspb
+.section sasqa
+.section sasqb
+.section sasra
+.section sasrb
+.section sassa
+.section sassb
+.section sasta
+.section sastb
+.section sasua
+.section sasub
+.section sasva
+.section sasvb
+.section saswa
+.section saswb
+.section sasxa
+.section sasxb
+.section sasya
+.section sasyb
+.section sasza
+.section saszb
+.section sas1a
+.section sas1b
+.section sas2a
+.section sas2b
+.section sas3a
+.section sas3b
+.section sas4a
+.section sas4b
+.section sas5a
+.section sas5b
+.section sas6a
+.section sas6b
+.section sas7a
+.section sas7b
+.section sas8a
+.section sas8b
+.section sas9a
+.section sas9b
+.section sas0a
+.section sas0b
+.section sataa
+.section satab
+.section satba
+.section satbb
+.section satca
+.section satcb
+.section satda
+.section satdb
+.section satea
+.section sateb
+.section satfa
+.section satfb
+.section satga
+.section satgb
+.section satha
+.section sathb
+.section satia
+.section satib
+.section satja
+.section satjb
+.section satka
+.section satkb
+.section satla
+.section satlb
+.section satma
+.section satmb
+.section satna
+.section satnb
+.section satoa
+.section satob
+.section satpa
+.section satpb
+.section satqa
+.section satqb
+.section satra
+.section satrb
+.section satsa
+.section satsb
+.section satta
+.section sattb
+.section satua
+.section satub
+.section satva
+.section satvb
+.section satwa
+.section satwb
+.section satxa
+.section satxb
+.section satya
+.section satyb
+.section satza
+.section satzb
+.section sat1a
+.section sat1b
+.section sat2a
+.section sat2b
+.section sat3a
+.section sat3b
+.section sat4a
+.section sat4b
+.section sat5a
+.section sat5b
+.section sat6a
+.section sat6b
+.section sat7a
+.section sat7b
+.section sat8a
+.section sat8b
+.section sat9a
+.section sat9b
+.section sat0a
+.section sat0b
+.section sauaa
+.section sauab
+.section sauba
+.section saubb
+.section sauca
+.section saucb
+.section sauda
+.section saudb
+.section sauea
+.section saueb
+.section saufa
+.section saufb
+.section sauga
+.section saugb
+.section sauha
+.section sauhb
+.section sauia
+.section sauib
+.section sauja
+.section saujb
+.section sauka
+.section saukb
+.section saula
+.section saulb
+.section sauma
+.section saumb
+.section sauna
+.section saunb
+.section sauoa
+.section sauob
+.section saupa
+.section saupb
+.section sauqa
+.section sauqb
+.section saura
+.section saurb
+.section sausa
+.section sausb
+.section sauta
+.section sautb
+.section sauua
+.section sauub
+.section sauva
+.section sauvb
+.section sauwa
+.section sauwb
+.section sauxa
+.section sauxb
+.section sauya
+.section sauyb
+.section sauza
+.section sauzb
+.section sau1a
+.section sau1b
+.section sau2a
+.section sau2b
+.section sau3a
+.section sau3b
+.section sau4a
+.section sau4b
+.section sau5a
+.section sau5b
+.section sau6a
+.section sau6b
+.section sau7a
+.section sau7b
+.section sau8a
+.section sau8b
+.section sau9a
+.section sau9b
+.section sau0a
+.section sau0b
+.section savaa
+.section savab
+.section savba
+.section savbb
+.section savca
+.section savcb
+.section savda
+.section savdb
+.section savea
+.section saveb
+.section savfa
+.section savfb
+.section savga
+.section savgb
+.section savha
+.section savhb
+.section savia
+.section savib
+.section savja
+.section savjb
+.section savka
+.section savkb
+.section savla
+.section savlb
+.section savma
+.section savmb
+.section savna
+.section savnb
+.section savoa
+.section savob
+.section savpa
+.section savpb
+.section savqa
+.section savqb
+.section savra
+.section savrb
+.section savsa
+.section savsb
+.section savta
+.section savtb
+.section savua
+.section savub
+.section savva
+.section savvb
+.section savwa
+.section savwb
+.section savxa
+.section savxb
+.section savya
+.section savyb
+.section savza
+.section savzb
+.section sav1a
+.section sav1b
+.section sav2a
+.section sav2b
+.section sav3a
+.section sav3b
+.section sav4a
+.section sav4b
+.section sav5a
+.section sav5b
+.section sav6a
+.section sav6b
+.section sav7a
+.section sav7b
+.section sav8a
+.section sav8b
+.section sav9a
+.section sav9b
+.section sav0a
+.section sav0b
+.section sawaa
+.section sawab
+.section sawba
+.section sawbb
+.section sawca
+.section sawcb
+.section sawda
+.section sawdb
+.section sawea
+.section saweb
+.section sawfa
+.section sawfb
+.section sawga
+.section sawgb
+.section sawha
+.section sawhb
+.section sawia
+.section sawib
+.section sawja
+.section sawjb
+.section sawka
+.section sawkb
+.section sawla
+.section sawlb
+.section sawma
+.section sawmb
+.section sawna
+.section sawnb
+.section sawoa
+.section sawob
+.section sawpa
+.section sawpb
+.section sawqa
+.section sawqb
+.section sawra
+.section sawrb
+.section sawsa
+.section sawsb
+.section sawta
+.section sawtb
+.section sawua
+.section sawub
+.section sawva
+.section sawvb
+.section sawwa
+.section sawwb
+.section sawxa
+.section sawxb
+.section sawya
+.section sawyb
+.section sawza
+.section sawzb
+.section saw1a
+.section saw1b
+.section saw2a
+.section saw2b
+.section saw3a
+.section saw3b
+.section saw4a
+.section saw4b
+.section saw5a
+.section saw5b
+.section saw6a
+.section saw6b
+.section saw7a
+.section saw7b
+.section saw8a
+.section saw8b
+.section saw9a
+.section saw9b
+.section saw0a
+.section saw0b
+.section saxaa
+.section saxab
+.section saxba
+.section saxbb
+.section saxca
+.section saxcb
+.section saxda
+.section saxdb
+.section saxea
+.section saxeb
+.section saxfa
+.section saxfb
+.section saxga
+.section saxgb
+.section saxha
+.section saxhb
+.section saxia
+.section saxib
+.section saxja
+.section saxjb
+.section saxka
+.section saxkb
+.section saxla
+.section saxlb
+.section saxma
+.section saxmb
+.section saxna
+.section saxnb
+.section saxoa
+.section saxob
+.section saxpa
+.section saxpb
+.section saxqa
+.section saxqb
+.section saxra
+.section saxrb
+.section saxsa
+.section saxsb
+.section saxta
+.section saxtb
+.section saxua
+.section saxub
+.section saxva
+.section saxvb
+.section saxwa
+.section saxwb
+.section saxxa
+.section saxxb
+.section saxya
+.section saxyb
+.section saxza
+.section saxzb
+.section sax1a
+.section sax1b
+.section sax2a
+.section sax2b
+.section sax3a
+.section sax3b
+.section sax4a
+.section sax4b
+.section sax5a
+.section sax5b
+.section sax6a
+.section sax6b
+.section sax7a
+.section sax7b
+.section sax8a
+.section sax8b
+.section sax9a
+.section sax9b
+.section sax0a
+.section sax0b
+.section sayaa
+.section sayab
+.section sayba
+.section saybb
+.section sayca
+.section saycb
+.section sayda
+.section saydb
+.section sayea
+.section sayeb
+.section sayfa
+.section sayfb
+.section sayga
+.section saygb
+.section sayha
+.section sayhb
+.section sayia
+.section sayib
+.section sayja
+.section sayjb
+.section sayka
+.section saykb
+.section sayla
+.section saylb
+.section sayma
+.section saymb
+.section sayna
+.section saynb
+.section sayoa
+.section sayob
+.section saypa
+.section saypb
+.section sayqa
+.section sayqb
+.section sayra
+.section sayrb
+.section saysa
+.section saysb
+.section sayta
+.section saytb
+.section sayua
+.section sayub
+.section sayva
+.section sayvb
+.section saywa
+.section saywb
+.section sayxa
+.section sayxb
+.section sayya
+.section sayyb
+.section sayza
+.section sayzb
+.section say1a
+.section say1b
+.section say2a
+.section say2b
+.section say3a
+.section say3b
+.section say4a
+.section say4b
+.section say5a
+.section say5b
+.section say6a
+.section say6b
+.section say7a
+.section say7b
+.section say8a
+.section say8b
+.section say9a
+.section say9b
+.section say0a
+.section say0b
+.section sazaa
+.section sazab
+.section sazba
+.section sazbb
+.section sazca
+.section sazcb
+.section sazda
+.section sazdb
+.section sazea
+.section sazeb
+.section sazfa
+.section sazfb
+.section sazga
+.section sazgb
+.section sazha
+.section sazhb
+.section sazia
+.section sazib
+.section sazja
+.section sazjb
+.section sazka
+.section sazkb
+.section sazla
+.section sazlb
+.section sazma
+.section sazmb
+.section sazna
+.section saznb
+.section sazoa
+.section sazob
+.section sazpa
+.section sazpb
+.section sazqa
+.section sazqb
+.section sazra
+.section sazrb
+.section sazsa
+.section sazsb
+.section sazta
+.section saztb
+.section sazua
+.section sazub
+.section sazva
+.section sazvb
+.section sazwa
+.section sazwb
+.section sazxa
+.section sazxb
+.section sazya
+.section sazyb
+.section sazza
+.section sazzb
+.section saz1a
+.section saz1b
+.section saz2a
+.section saz2b
+.section saz3a
+.section saz3b
+.section saz4a
+.section saz4b
+.section saz5a
+.section saz5b
+.section saz6a
+.section saz6b
+.section saz7a
+.section saz7b
+.section saz8a
+.section saz8b
+.section saz9a
+.section saz9b
+.section saz0a
+.section saz0b
+.section sa1aa
+.section sa1ab
+.section sa1ba
+.section sa1bb
+.section sa1ca
+.section sa1cb
+.section sa1da
+.section sa1db
+.section sa1ea
+.section sa1eb
+.section sa1fa
+.section sa1fb
+.section sa1ga
+.section sa1gb
+.section sa1ha
+.section sa1hb
+.section sa1ia
+.section sa1ib
+.section sa1ja
+.section sa1jb
+.section sa1ka
+.section sa1kb
+.section sa1la
+.section sa1lb
+.section sa1ma
+.section sa1mb
+.section sa1na
+.section sa1nb
+.section sa1oa
+.section sa1ob
+.section sa1pa
+.section sa1pb
+.section sa1qa
+.section sa1qb
+.section sa1ra
+.section sa1rb
+.section sa1sa
+.section sa1sb
+.section sa1ta
+.section sa1tb
+.section sa1ua
+.section sa1ub
+.section sa1va
+.section sa1vb
+.section sa1wa
+.section sa1wb
+.section sa1xa
+.section sa1xb
+.section sa1ya
+.section sa1yb
+.section sa1za
+.section sa1zb
+.section sa11a
+.section sa11b
+.section sa12a
+.section sa12b
+.section sa13a
+.section sa13b
+.section sa14a
+.section sa14b
+.section sa15a
+.section sa15b
+.section sa16a
+.section sa16b
+.section sa17a
+.section sa17b
+.section sa18a
+.section sa18b
+.section sa19a
+.section sa19b
+.section sa10a
+.section sa10b
+.section sa2aa
+.section sa2ab
+.section sa2ba
+.section sa2bb
+.section sa2ca
+.section sa2cb
+.section sa2da
+.section sa2db
+.section sa2ea
+.section sa2eb
+.section sa2fa
+.section sa2fb
+.section sa2ga
+.section sa2gb
+.section sa2ha
+.section sa2hb
+.section sa2ia
+.section sa2ib
+.section sa2ja
+.section sa2jb
+.section sa2ka
+.section sa2kb
+.section sa2la
+.section sa2lb
+.section sa2ma
+.section sa2mb
+.section sa2na
+.section sa2nb
+.section sa2oa
+.section sa2ob
+.section sa2pa
+.section sa2pb
+.section sa2qa
+.section sa2qb
+.section sa2ra
+.section sa2rb
+.section sa2sa
+.section sa2sb
+.section sa2ta
+.section sa2tb
+.section sa2ua
+.section sa2ub
+.section sa2va
+.section sa2vb
+.section sa2wa
+.section sa2wb
+.section sa2xa
+.section sa2xb
+.section sa2ya
+.section sa2yb
+.section sa2za
+.section sa2zb
+.section sa21a
+.section sa21b
+.section sa22a
+.section sa22b
+.section sa23a
+.section sa23b
+.section sa24a
+.section sa24b
+.section sa25a
+.section sa25b
+.section sa26a
+.section sa26b
+.section sa27a
+.section sa27b
+.section sa28a
+.section sa28b
+.section sa29a
+.section sa29b
+.section sa20a
+.section sa20b
+.section sa3aa
+.section sa3ab
+.section sa3ba
+.section sa3bb
+.section sa3ca
+.section sa3cb
+.section sa3da
+.section sa3db
+.section sa3ea
+.section sa3eb
+.section sa3fa
+.section sa3fb
+.section sa3ga
+.section sa3gb
+.section sa3ha
+.section sa3hb
+.section sa3ia
+.section sa3ib
+.section sa3ja
+.section sa3jb
+.section sa3ka
+.section sa3kb
+.section sa3la
+.section sa3lb
+.section sa3ma
+.section sa3mb
+.section sa3na
+.section sa3nb
+.section sa3oa
+.section sa3ob
+.section sa3pa
+.section sa3pb
+.section sa3qa
+.section sa3qb
+.section sa3ra
+.section sa3rb
+.section sa3sa
+.section sa3sb
+.section sa3ta
+.section sa3tb
+.section sa3ua
+.section sa3ub
+.section sa3va
+.section sa3vb
+.section sa3wa
+.section sa3wb
+.section sa3xa
+.section sa3xb
+.section sa3ya
+.section sa3yb
+.section sa3za
+.section sa3zb
+.section sa31a
+.section sa31b
+.section sa32a
+.section sa32b
+.section sa33a
+.section sa33b
+.section sa34a
+.section sa34b
+.section sa35a
+.section sa35b
+.section sa36a
+.section sa36b
+.section sa37a
+.section sa37b
+.section sa38a
+.section sa38b
+.section sa39a
+.section sa39b
+.section sa30a
+.section sa30b
+.section sa4aa
+.section sa4ab
+.section sa4ba
+.section sa4bb
+.section sa4ca
+.section sa4cb
+.section sa4da
+.section sa4db
+.section sa4ea
+.section sa4eb
+.section sa4fa
+.section sa4fb
+.section sa4ga
+.section sa4gb
+.section sa4ha
+.section sa4hb
+.section sa4ia
+.section sa4ib
+.section sa4ja
+.section sa4jb
+.section sa4ka
+.section sa4kb
+.section sa4la
+.section sa4lb
+.section sa4ma
+.section sa4mb
+.section sa4na
+.section sa4nb
+.section sa4oa
+.section sa4ob
+.section sa4pa
+.section sa4pb
+.section sa4qa
+.section sa4qb
+.section sa4ra
+.section sa4rb
+.section sa4sa
+.section sa4sb
+.section sa4ta
+.section sa4tb
+.section sa4ua
+.section sa4ub
+.section sa4va
+.section sa4vb
+.section sa4wa
+.section sa4wb
+.section sa4xa
+.section sa4xb
+.section sa4ya
+.section sa4yb
+.section sa4za
+.section sa4zb
+.section sa41a
+.section sa41b
+.section sa42a
+.section sa42b
+.section sa43a
+.section sa43b
+.section sa44a
+.section sa44b
+.section sa45a
+.section sa45b
+.section sa46a
+.section sa46b
+.section sa47a
+.section sa47b
+.section sa48a
+.section sa48b
+.section sa49a
+.section sa49b
+.section sa40a
+.section sa40b
+.section sa5aa
+.section sa5ab
+.section sa5ba
+.section sa5bb
+.section sa5ca
+.section sa5cb
+.section sa5da
+.section sa5db
+.section sa5ea
+.section sa5eb
+.section sa5fa
+.section sa5fb
+.section sa5ga
+.section sa5gb
+.section sa5ha
+.section sa5hb
+.section sa5ia
+.section sa5ib
+.section sa5ja
+.section sa5jb
+.section sa5ka
+.section sa5kb
+.section sa5la
+.section sa5lb
+.section sa5ma
+.section sa5mb
+.section sa5na
+.section sa5nb
+.section sa5oa
+.section sa5ob
+.section sa5pa
+.section sa5pb
+.section sa5qa
+.section sa5qb
+.section sa5ra
+.section sa5rb
+.section sa5sa
+.section sa5sb
+.section sa5ta
+.section sa5tb
+.section sa5ua
+.section sa5ub
+.section sa5va
+.section sa5vb
+.section sa5wa
+.section sa5wb
+.section sa5xa
+.section sa5xb
+.section sa5ya
+.section sa5yb
+.section sa5za
+.section sa5zb
+.section sa51a
+.section sa51b
+.section sa52a
+.section sa52b
+.section sa53a
+.section sa53b
+.section sa54a
+.section sa54b
+.section sa55a
+.section sa55b
+.section sa56a
+.section sa56b
+.section sa57a
+.section sa57b
+.section sa58a
+.section sa58b
+.section sa59a
+.section sa59b
+.section sa50a
+.section sa50b
+.section sa6aa
+.section sa6ab
+.section sa6ba
+.section sa6bb
+.section sa6ca
+.section sa6cb
+.section sa6da
+.section sa6db
+.section sa6ea
+.section sa6eb
+.section sa6fa
+.section sa6fb
+.section sa6ga
+.section sa6gb
+.section sa6ha
+.section sa6hb
+.section sa6ia
+.section sa6ib
+.section sa6ja
+.section sa6jb
+.section sa6ka
+.section sa6kb
+.section sa6la
+.section sa6lb
+.section sa6ma
+.section sa6mb
+.section sa6na
+.section sa6nb
+.section sa6oa
+.section sa6ob
+.section sa6pa
+.section sa6pb
+.section sa6qa
+.section sa6qb
+.section sa6ra
+.section sa6rb
+.section sa6sa
+.section sa6sb
+.section sa6ta
+.section sa6tb
+.section sa6ua
+.section sa6ub
+.section sa6va
+.section sa6vb
+.section sa6wa
+.section sa6wb
+.section sa6xa
+.section sa6xb
+.section sa6ya
+.section sa6yb
+.section sa6za
+.section sa6zb
+.section sa61a
+.section sa61b
+.section sa62a
+.section sa62b
+.section sa63a
+.section sa63b
+.section sa64a
+.section sa64b
+.section sa65a
+.section sa65b
+.section sa66a
+.section sa66b
+.section sa67a
+.section sa67b
+.section sa68a
+.section sa68b
+.section sa69a
+.section sa69b
+.section sa60a
+.section sa60b
+.section sa7aa
+.section sa7ab
+.section sa7ba
+.section sa7bb
+.section sa7ca
+.section sa7cb
+.section sa7da
+.section sa7db
+.section sa7ea
+.section sa7eb
+.section sa7fa
+.section sa7fb
+.section sa7ga
+.section sa7gb
+.section sa7ha
+.section sa7hb
+.section sa7ia
+.section sa7ib
+.section sa7ja
+.section sa7jb
+.section sa7ka
+.section sa7kb
+.section sa7la
+.section sa7lb
+.section sa7ma
+.section sa7mb
+.section sa7na
+.section sa7nb
+.section sa7oa
+.section sa7ob
+.section sa7pa
+.section sa7pb
+.section sa7qa
+.section sa7qb
+.section sa7ra
+.section sa7rb
+.section sa7sa
+.section sa7sb
+.section sa7ta
+.section sa7tb
+.section sa7ua
+.section sa7ub
+.section sa7va
+.section sa7vb
+.section sa7wa
+.section sa7wb
+.section sa7xa
+.section sa7xb
+.section sa7ya
+.section sa7yb
+.section sa7za
+.section sa7zb
+.section sa71a
+.section sa71b
+.section sa72a
+.section sa72b
+.section sa73a
+.section sa73b
+.section sa74a
+.section sa74b
+.section sa75a
+.section sa75b
+.section sa76a
+.section sa76b
+.section sa77a
+.section sa77b
+.section sa78a
+.section sa78b
+.section sa79a
+.section sa79b
+.section sa70a
+.section sa70b
+.section sa8aa
+.section sa8ab
+.section sa8ba
+.section sa8bb
+.section sa8ca
+.section sa8cb
+.section sa8da
+.section sa8db
+.section sa8ea
+.section sa8eb
+.section sa8fa
+.section sa8fb
+.section sa8ga
+.section sa8gb
+.section sa8ha
+.section sa8hb
+.section sa8ia
+.section sa8ib
+.section sa8ja
+.section sa8jb
+.section sa8ka
+.section sa8kb
+.section sa8la
+.section sa8lb
+.section sa8ma
+.section sa8mb
+.section sa8na
+.section sa8nb
+.section sa8oa
+.section sa8ob
+.section sa8pa
+.section sa8pb
+.section sa8qa
+.section sa8qb
+.section sa8ra
+.section sa8rb
+.section sa8sa
+.section sa8sb
+.section sa8ta
+.section sa8tb
+.section sa8ua
+.section sa8ub
+.section sa8va
+.section sa8vb
+.section sa8wa
+.section sa8wb
+.section sa8xa
+.section sa8xb
+.section sa8ya
+.section sa8yb
+.section sa8za
+.section sa8zb
+.section sa81a
+.section sa81b
+.section sa82a
+.section sa82b
+.section sa83a
+.section sa83b
+.section sa84a
+.section sa84b
+.section sa85a
+.section sa85b
+.section sa86a
+.section sa86b
+.section sa87a
+.section sa87b
+.section sa88a
+.section sa88b
+.section sa89a
+.section sa89b
+.section sa80a
+.section sa80b
+.section sa9aa
+.section sa9ab
+.section sa9ba
+.section sa9bb
+.section sa9ca
+.section sa9cb
+.section sa9da
+.section sa9db
+.section sa9ea
+.section sa9eb
+.section sa9fa
+.section sa9fb
+.section sa9ga
+.section sa9gb
+.section sa9ha
+.section sa9hb
+.section sa9ia
+.section sa9ib
+.section sa9ja
+.section sa9jb
+.section sa9ka
+.section sa9kb
+.section sa9la
+.section sa9lb
+.section sa9ma
+.section sa9mb
+.section sa9na
+.section sa9nb
+.section sa9oa
+.section sa9ob
+.section sa9pa
+.section sa9pb
+.section sa9qa
+.section sa9qb
+.section sa9ra
+.section sa9rb
+.section sa9sa
+.section sa9sb
+.section sa9ta
+.section sa9tb
+.section sa9ua
+.section sa9ub
+.section sa9va
+.section sa9vb
+.section sa9wa
+.section sa9wb
+.section sa9xa
+.section sa9xb
+.section sa9ya
+.section sa9yb
+.section sa9za
+.section sa9zb
+.section sa91a
+.section sa91b
+.section sa92a
+.section sa92b
+.section sa93a
+.section sa93b
+.section sa94a
+.section sa94b
+.section sa95a
+.section sa95b
+.section sa96a
+.section sa96b
+.section sa97a
+.section sa97b
+.section sa98a
+.section sa98b
+.section sa99a
+.section sa99b
+.section sa90a
+.section sa90b
+.section sa0aa
+.section sa0ab
+.section sa0ba
+.section sa0bb
+.section sa0ca
+.section sa0cb
+.section sa0da
+.section sa0db
+.section sa0ea
+.section sa0eb
+.section sa0fa
+.section sa0fb
+.section sa0ga
+.section sa0gb
+.section sa0ha
+.section sa0hb
+.section sa0ia
+.section sa0ib
+.section sa0ja
+.section sa0jb
+.section sa0ka
+.section sa0kb
+.section sa0la
+.section sa0lb
+.section sa0ma
+.section sa0mb
+.section sa0na
+.section sa0nb
+.section sa0oa
+.section sa0ob
+.section sa0pa
+.section sa0pb
+.section sa0qa
+.section sa0qb
+.section sa0ra
+.section sa0rb
+.section sa0sa
+.section sa0sb
+.section sa0ta
+.section sa0tb
+.section sa0ua
+.section sa0ub
+.section sa0va
+.section sa0vb
+.section sa0wa
+.section sa0wb
+.section sa0xa
+.section sa0xb
+.section sa0ya
+.section sa0yb
+.section sa0za
+.section sa0zb
+.section sa01a
+.section sa01b
+.section sa02a
+.section sa02b
+.section sa03a
+.section sa03b
+.section sa04a
+.section sa04b
+.section sa05a
+.section sa05b
+.section sa06a
+.section sa06b
+.section sa07a
+.section sa07b
+.section sa08a
+.section sa08b
+.section sa09a
+.section sa09b
+.section sa00a
+.section sa00b
+.section sbaaa
+.section sbaab
+.section sbaba
+.section sbabb
+.section sbaca
+.section sbacb
+.section sbada
+.section sbadb
+.section sbaea
+.section sbaeb
+.section sbafa
+.section sbafb
+.section sbaga
+.section sbagb
+.section sbaha
+.section sbahb
+.section sbaia
+.section sbaib
+.section sbaja
+.section sbajb
+.section sbaka
+.section sbakb
+.section sbala
+.section sbalb
+.section sbama
+.section sbamb
+.section sbana
+.section sbanb
+.section sbaoa
+.section sbaob
+.section sbapa
+.section sbapb
+.section sbaqa
+.section sbaqb
+.section sbara
+.section sbarb
+.section sbasa
+.section sbasb
+.section sbata
+.section sbatb
+.section sbaua
+.section sbaub
+.section sbava
+.section sbavb
+.section sbawa
+.section sbawb
+.section sbaxa
+.section sbaxb
+.section sbaya
+.section sbayb
+.section sbaza
+.section sbazb
+.section sba1a
+.section sba1b
+.section sba2a
+.section sba2b
+.section sba3a
+.section sba3b
+.section sba4a
+.section sba4b
+.section sba5a
+.section sba5b
+.section sba6a
+.section sba6b
+.section sba7a
+.section sba7b
+.section sba8a
+.section sba8b
+.section sba9a
+.section sba9b
+.section sba0a
+.section sba0b
+.section sbbaa
+.section sbbab
+.section sbbba
+.section sbbbb
+.section sbbca
+.section sbbcb
+.section sbbda
+.section sbbdb
+.section sbbea
+.section sbbeb
+.section sbbfa
+.section sbbfb
+.section sbbga
+.section sbbgb
+.section sbbha
+.section sbbhb
+.section sbbia
+.section sbbib
+.section sbbja
+.section sbbjb
+.section sbbka
+.section sbbkb
+.section sbbla
+.section sbblb
+.section sbbma
+.section sbbmb
+.section sbbna
+.section sbbnb
+.section sbboa
+.section sbbob
+.section sbbpa
+.section sbbpb
+.section sbbqa
+.section sbbqb
+.section sbbra
+.section sbbrb
+.section sbbsa
+.section sbbsb
+.section sbbta
+.section sbbtb
+.section sbbua
+.section sbbub
+.section sbbva
+.section sbbvb
+.section sbbwa
+.section sbbwb
+.section sbbxa
+.section sbbxb
+.section sbbya
+.section sbbyb
+.section sbbza
+.section sbbzb
+.section sbb1a
+.section sbb1b
+.section sbb2a
+.section sbb2b
+.section sbb3a
+.section sbb3b
+.section sbb4a
+.section sbb4b
+.section sbb5a
+.section sbb5b
+.section sbb6a
+.section sbb6b
+.section sbb7a
+.section sbb7b
+.section sbb8a
+.section sbb8b
+.section sbb9a
+.section sbb9b
+.section sbb0a
+.section sbb0b
+.section sbcaa
+.section sbcab
+.section sbcba
+.section sbcbb
+.section sbcca
+.section sbccb
+.section sbcda
+.section sbcdb
+.section sbcea
+.section sbceb
+.section sbcfa
+.section sbcfb
+.section sbcga
+.section sbcgb
+.section sbcha
+.section sbchb
+.section sbcia
+.section sbcib
+.section sbcja
+.section sbcjb
+.section sbcka
+.section sbckb
+.section sbcla
+.section sbclb
+.section sbcma
+.section sbcmb
+.section sbcna
+.section sbcnb
+.section sbcoa
+.section sbcob
+.section sbcpa
+.section sbcpb
+.section sbcqa
+.section sbcqb
+.section sbcra
+.section sbcrb
+.section sbcsa
+.section sbcsb
+.section sbcta
+.section sbctb
+.section sbcua
+.section sbcub
+.section sbcva
+.section sbcvb
+.section sbcwa
+.section sbcwb
+.section sbcxa
+.section sbcxb
+.section sbcya
+.section sbcyb
+.section sbcza
+.section sbczb
+.section sbc1a
+.section sbc1b
+.section sbc2a
+.section sbc2b
+.section sbc3a
+.section sbc3b
+.section sbc4a
+.section sbc4b
+.section sbc5a
+.section sbc5b
+.section sbc6a
+.section sbc6b
+.section sbc7a
+.section sbc7b
+.section sbc8a
+.section sbc8b
+.section sbc9a
+.section sbc9b
+.section sbc0a
+.section sbc0b
+.section sbdaa
+.section sbdab
+.section sbdba
+.section sbdbb
+.section sbdca
+.section sbdcb
+.section sbdda
+.section sbddb
+.section sbdea
+.section sbdeb
+.section sbdfa
+.section sbdfb
+.section sbdga
+.section sbdgb
+.section sbdha
+.section sbdhb
+.section sbdia
+.section sbdib
+.section sbdja
+.section sbdjb
+.section sbdka
+.section sbdkb
+.section sbdla
+.section sbdlb
+.section sbdma
+.section sbdmb
+.section sbdna
+.section sbdnb
+.section sbdoa
+.section sbdob
+.section sbdpa
+.section sbdpb
+.section sbdqa
+.section sbdqb
+.section sbdra
+.section sbdrb
+.section sbdsa
+.section sbdsb
+.section sbdta
+.section sbdtb
+.section sbdua
+.section sbdub
+.section sbdva
+.section sbdvb
+.section sbdwa
+.section sbdwb
+.section sbdxa
+.section sbdxb
+.section sbdya
+.section sbdyb
+.section sbdza
+.section sbdzb
+.section sbd1a
+.section sbd1b
+.section sbd2a
+.section sbd2b
+.section sbd3a
+.section sbd3b
+.section sbd4a
+.section sbd4b
+.section sbd5a
+.section sbd5b
+.section sbd6a
+.section sbd6b
+.section sbd7a
+.section sbd7b
+.section sbd8a
+.section sbd8b
+.section sbd9a
+.section sbd9b
+.section sbd0a
+.section sbd0b
+.section sbeaa
+.section sbeab
+.section sbeba
+.section sbebb
+.section sbeca
+.section sbecb
+.section sbeda
+.section sbedb
+.section sbeea
+.section sbeeb
+.section sbefa
+.section sbefb
+.section sbega
+.section sbegb
+.section sbeha
+.section sbehb
+.section sbeia
+.section sbeib
+.section sbeja
+.section sbejb
+.section sbeka
+.section sbekb
+.section sbela
+.section sbelb
+.section sbema
+.section sbemb
+.section sbena
+.section sbenb
+.section sbeoa
+.section sbeob
+.section sbepa
+.section sbepb
+.section sbeqa
+.section sbeqb
+.section sbera
+.section sberb
+.section sbesa
+.section sbesb
+.section sbeta
+.section sbetb
+.section sbeua
+.section sbeub
+.section sbeva
+.section sbevb
+.section sbewa
+.section sbewb
+.section sbexa
+.section sbexb
+.section sbeya
+.section sbeyb
+.section sbeza
+.section sbezb
+.section sbe1a
+.section sbe1b
+.section sbe2a
+.section sbe2b
+.section sbe3a
+.section sbe3b
+.section sbe4a
+.section sbe4b
+.section sbe5a
+.section sbe5b
+.section sbe6a
+.section sbe6b
+.section sbe7a
+.section sbe7b
+.section sbe8a
+.section sbe8b
+.section sbe9a
+.section sbe9b
+.section sbe0a
+.section sbe0b
+.section sbfaa
+.section sbfab
+.section sbfba
+.section sbfbb
+.section sbfca
+.section sbfcb
+.section sbfda
+.section sbfdb
+.section sbfea
+.section sbfeb
+.section sbffa
+.section sbffb
+.section sbfga
+.section sbfgb
+.section sbfha
+.section sbfhb
+.section sbfia
+.section sbfib
+.section sbfja
+.section sbfjb
+.section sbfka
+.section sbfkb
+.section sbfla
+.section sbflb
+.section sbfma
+.section sbfmb
+.section sbfna
+.section sbfnb
+.section sbfoa
+.section sbfob
+.section sbfpa
+.section sbfpb
+.section sbfqa
+.section sbfqb
+.section sbfra
+.section sbfrb
+.section sbfsa
+.section sbfsb
+.section sbfta
+.section sbftb
+.section sbfua
+.section sbfub
+.section sbfva
+.section sbfvb
+.section sbfwa
+.section sbfwb
+.section sbfxa
+.section sbfxb
+.section sbfya
+.section sbfyb
+.section sbfza
+.section sbfzb
+.section sbf1a
+.section sbf1b
+.section sbf2a
+.section sbf2b
+.section sbf3a
+.section sbf3b
+.section sbf4a
+.section sbf4b
+.section sbf5a
+.section sbf5b
+.section sbf6a
+.section sbf6b
+.section sbf7a
+.section sbf7b
+.section sbf8a
+.section sbf8b
+.section sbf9a
+.section sbf9b
+.section sbf0a
+.section sbf0b
+.section sbgaa
+.section sbgab
+.section sbgba
+.section sbgbb
+.section sbgca
+.section sbgcb
+.section sbgda
+.section sbgdb
+.section sbgea
+.section sbgeb
+.section sbgfa
+.section sbgfb
+.section sbgga
+.section sbggb
+.section sbgha
+.section sbghb
+.section sbgia
+.section sbgib
+.section sbgja
+.section sbgjb
+.section sbgka
+.section sbgkb
+.section sbgla
+.section sbglb
+.section sbgma
+.section sbgmb
+.section sbgna
+.section sbgnb
+.section sbgoa
+.section sbgob
+.section sbgpa
+.section sbgpb
+.section sbgqa
+.section sbgqb
+.section sbgra
+.section sbgrb
+.section sbgsa
+.section sbgsb
+.section sbgta
+.section sbgtb
+.section sbgua
+.section sbgub
+.section sbgva
+.section sbgvb
+.section sbgwa
+.section sbgwb
+.section sbgxa
+.section sbgxb
+.section sbgya
+.section sbgyb
+.section sbgza
+.section sbgzb
+.section sbg1a
+.section sbg1b
+.section sbg2a
+.section sbg2b
+.section sbg3a
+.section sbg3b
+.section sbg4a
+.section sbg4b
+.section sbg5a
+.section sbg5b
+.section sbg6a
+.section sbg6b
+.section sbg7a
+.section sbg7b
+.section sbg8a
+.section sbg8b
+.section sbg9a
+.section sbg9b
+.section sbg0a
+.section sbg0b
+.section sbhaa
+.section sbhab
+.section sbhba
+.section sbhbb
+.section sbhca
+.section sbhcb
+.section sbhda
+.section sbhdb
+.section sbhea
+.section sbheb
+.section sbhfa
+.section sbhfb
+.section sbhga
+.section sbhgb
+.section sbhha
+.section sbhhb
+.section sbhia
+.section sbhib
+.section sbhja
+.section sbhjb
+.section sbhka
+.section sbhkb
+.section sbhla
+.section sbhlb
+.section sbhma
+.section sbhmb
+.section sbhna
+.section sbhnb
+.section sbhoa
+.section sbhob
+.section sbhpa
+.section sbhpb
+.section sbhqa
+.section sbhqb
+.section sbhra
+.section sbhrb
+.section sbhsa
+.section sbhsb
+.section sbhta
+.section sbhtb
+.section sbhua
+.section sbhub
+.section sbhva
+.section sbhvb
+.section sbhwa
+.section sbhwb
+.section sbhxa
+.section sbhxb
+.section sbhya
+.section sbhyb
+.section sbhza
+.section sbhzb
+.section sbh1a
+.section sbh1b
+.section sbh2a
+.section sbh2b
+.section sbh3a
+.section sbh3b
+.section sbh4a
+.section sbh4b
+.section sbh5a
+.section sbh5b
+.section sbh6a
+.section sbh6b
+.section sbh7a
+.section sbh7b
+.section sbh8a
+.section sbh8b
+.section sbh9a
+.section sbh9b
+.section sbh0a
+.section sbh0b
+.section sbiaa
+.section sbiab
+.section sbiba
+.section sbibb
+.section sbica
+.section sbicb
+.section sbida
+.section sbidb
+.section sbiea
+.section sbieb
+.section sbifa
+.section sbifb
+.section sbiga
+.section sbigb
+.section sbiha
+.section sbihb
+.section sbiia
+.section sbiib
+.section sbija
+.section sbijb
+.section sbika
+.section sbikb
+.section sbila
+.section sbilb
+.section sbima
+.section sbimb
+.section sbina
+.section sbinb
+.section sbioa
+.section sbiob
+.section sbipa
+.section sbipb
+.section sbiqa
+.section sbiqb
+.section sbira
+.section sbirb
+.section sbisa
+.section sbisb
+.section sbita
+.section sbitb
+.section sbiua
+.section sbiub
+.section sbiva
+.section sbivb
+.section sbiwa
+.section sbiwb
+.section sbixa
+.section sbixb
+.section sbiya
+.section sbiyb
+.section sbiza
+.section sbizb
+.section sbi1a
+.section sbi1b
+.section sbi2a
+.section sbi2b
+.section sbi3a
+.section sbi3b
+.section sbi4a
+.section sbi4b
+.section sbi5a
+.section sbi5b
+.section sbi6a
+.section sbi6b
+.section sbi7a
+.section sbi7b
+.section sbi8a
+.section sbi8b
+.section sbi9a
+.section sbi9b
+.section sbi0a
+.section sbi0b
+.section sbjaa
+.section sbjab
+.section sbjba
+.section sbjbb
+.section sbjca
+.section sbjcb
+.section sbjda
+.section sbjdb
+.section sbjea
+.section sbjeb
+.section sbjfa
+.section sbjfb
+.section sbjga
+.section sbjgb
+.section sbjha
+.section sbjhb
+.section sbjia
+.section sbjib
+.section sbjja
+.section sbjjb
+.section sbjka
+.section sbjkb
+.section sbjla
+.section sbjlb
+.section sbjma
+.section sbjmb
+.section sbjna
+.section sbjnb
+.section sbjoa
+.section sbjob
+.section sbjpa
+.section sbjpb
+.section sbjqa
+.section sbjqb
+.section sbjra
+.section sbjrb
+.section sbjsa
+.section sbjsb
+.section sbjta
+.section sbjtb
+.section sbjua
+.section sbjub
+.section sbjva
+.section sbjvb
+.section sbjwa
+.section sbjwb
+.section sbjxa
+.section sbjxb
+.section sbjya
+.section sbjyb
+.section sbjza
+.section sbjzb
+.section sbj1a
+.section sbj1b
+.section sbj2a
+.section sbj2b
+.section sbj3a
+.section sbj3b
+.section sbj4a
+.section sbj4b
+.section sbj5a
+.section sbj5b
+.section sbj6a
+.section sbj6b
+.section sbj7a
+.section sbj7b
+.section sbj8a
+.section sbj8b
+.section sbj9a
+.section sbj9b
+.section sbj0a
+.section sbj0b
+.section sbkaa
+.section sbkab
+.section sbkba
+.section sbkbb
+.section sbkca
+.section sbkcb
+.section sbkda
+.section sbkdb
+.section sbkea
+.section sbkeb
+.section sbkfa
+.section sbkfb
+.section sbkga
+.section sbkgb
+.section sbkha
+.section sbkhb
+.section sbkia
+.section sbkib
+.section sbkja
+.section sbkjb
+.section sbkka
+.section sbkkb
+.section sbkla
+.section sbklb
+.section sbkma
+.section sbkmb
+.section sbkna
+.section sbknb
+.section sbkoa
+.section sbkob
+.section sbkpa
+.section sbkpb
+.section sbkqa
+.section sbkqb
+.section sbkra
+.section sbkrb
+.section sbksa
+.section sbksb
+.section sbkta
+.section sbktb
+.section sbkua
+.section sbkub
+.section sbkva
+.section sbkvb
+.section sbkwa
+.section sbkwb
+.section sbkxa
+.section sbkxb
+.section sbkya
+.section sbkyb
+.section sbkza
+.section sbkzb
+.section sbk1a
+.section sbk1b
+.section sbk2a
+.section sbk2b
+.section sbk3a
+.section sbk3b
+.section sbk4a
+.section sbk4b
+.section sbk5a
+.section sbk5b
+.section sbk6a
+.section sbk6b
+.section sbk7a
+.section sbk7b
+.section sbk8a
+.section sbk8b
+.section sbk9a
+.section sbk9b
+.section sbk0a
+.section sbk0b
+.section sblaa
+.section sblab
+.section sblba
+.section sblbb
+.section sblca
+.section sblcb
+.section sblda
+.section sbldb
+.section sblea
+.section sbleb
+.section sblfa
+.section sblfb
+.section sblga
+.section sblgb
+.section sblha
+.section sblhb
+.section sblia
+.section sblib
+.section sblja
+.section sbljb
+.section sblka
+.section sblkb
+.section sblla
+.section sbllb
+.section sblma
+.section sblmb
+.section sblna
+.section sblnb
+.section sbloa
+.section sblob
+.section sblpa
+.section sblpb
+.section sblqa
+.section sblqb
+.section sblra
+.section sblrb
+.section sblsa
+.section sblsb
+.section sblta
+.section sbltb
+.section sblua
+.section sblub
+.section sblva
+.section sblvb
+.section sblwa
+.section sblwb
+.section sblxa
+.section sblxb
+.section sblya
+.section sblyb
+.section sblza
+.section sblzb
+.section sbl1a
+.section sbl1b
+.section sbl2a
+.section sbl2b
+.section sbl3a
+.section sbl3b
+.section sbl4a
+.section sbl4b
+.section sbl5a
+.section sbl5b
+.section sbl6a
+.section sbl6b
+.section sbl7a
+.section sbl7b
+.section sbl8a
+.section sbl8b
+.section sbl9a
+.section sbl9b
+.section sbl0a
+.section sbl0b
+.section sbmaa
+.section sbmab
+.section sbmba
+.section sbmbb
+.section sbmca
+.section sbmcb
+.section sbmda
+.section sbmdb
+.section sbmea
+.section sbmeb
+.section sbmfa
+.section sbmfb
+.section sbmga
+.section sbmgb
+.section sbmha
+.section sbmhb
+.section sbmia
+.section sbmib
+.section sbmja
+.section sbmjb
+.section sbmka
+.section sbmkb
+.section sbmla
+.section sbmlb
+.section sbmma
+.section sbmmb
+.section sbmna
+.section sbmnb
+.section sbmoa
+.section sbmob
+.section sbmpa
+.section sbmpb
+.section sbmqa
+.section sbmqb
+.section sbmra
+.section sbmrb
+.section sbmsa
+.section sbmsb
+.section sbmta
+.section sbmtb
+.section sbmua
+.section sbmub
+.section sbmva
+.section sbmvb
+.section sbmwa
+.section sbmwb
+.section sbmxa
+.section sbmxb
+.section sbmya
+.section sbmyb
+.section sbmza
+.section sbmzb
+.section sbm1a
+.section sbm1b
+.section sbm2a
+.section sbm2b
+.section sbm3a
+.section sbm3b
+.section sbm4a
+.section sbm4b
+.section sbm5a
+.section sbm5b
+.section sbm6a
+.section sbm6b
+.section sbm7a
+.section sbm7b
+.section sbm8a
+.section sbm8b
+.section sbm9a
+.section sbm9b
+.section sbm0a
+.section sbm0b
+.section sbnaa
+.section sbnab
+.section sbnba
+.section sbnbb
+.section sbnca
+.section sbncb
+.section sbnda
+.section sbndb
+.section sbnea
+.section sbneb
+.section sbnfa
+.section sbnfb
+.section sbnga
+.section sbngb
+.section sbnha
+.section sbnhb
+.section sbnia
+.section sbnib
+.section sbnja
+.section sbnjb
+.section sbnka
+.section sbnkb
+.section sbnla
+.section sbnlb
+.section sbnma
+.section sbnmb
+.section sbnna
+.section sbnnb
+.section sbnoa
+.section sbnob
+.section sbnpa
+.section sbnpb
+.section sbnqa
+.section sbnqb
+.section sbnra
+.section sbnrb
+.section sbnsa
+.section sbnsb
+.section sbnta
+.section sbntb
+.section sbnua
+.section sbnub
+.section sbnva
+.section sbnvb
+.section sbnwa
+.section sbnwb
+.section sbnxa
+.section sbnxb
+.section sbnya
+.section sbnyb
+.section sbnza
+.section sbnzb
+.section sbn1a
+.section sbn1b
+.section sbn2a
+.section sbn2b
+.section sbn3a
+.section sbn3b
+.section sbn4a
+.section sbn4b
+.section sbn5a
+.section sbn5b
+.section sbn6a
+.section sbn6b
+.section sbn7a
+.section sbn7b
+.section sbn8a
+.section sbn8b
+.section sbn9a
+.section sbn9b
+.section sbn0a
+.section sbn0b
+.section sboaa
+.section sboab
+.section sboba
+.section sbobb
+.section sboca
+.section sbocb
+.section sboda
+.section sbodb
+.section sboea
+.section sboeb
+.section sbofa
+.section sbofb
+.section sboga
+.section sbogb
+.section sboha
+.section sbohb
+.section sboia
+.section sboib
+.section sboja
+.section sbojb
+.section sboka
+.section sbokb
+.section sbola
+.section sbolb
+.section sboma
+.section sbomb
+.section sbona
+.section sbonb
+.section sbooa
+.section sboob
+.section sbopa
+.section sbopb
+.section sboqa
+.section sboqb
+.section sbora
+.section sborb
+.section sbosa
+.section sbosb
+.section sbota
+.section sbotb
+.section sboua
+.section sboub
+.section sbova
+.section sbovb
+.section sbowa
+.section sbowb
+.section sboxa
+.section sboxb
+.section sboya
+.section sboyb
+.section sboza
+.section sbozb
+.section sbo1a
+.section sbo1b
+.section sbo2a
+.section sbo2b
+.section sbo3a
+.section sbo3b
+.section sbo4a
+.section sbo4b
+.section sbo5a
+.section sbo5b
+.section sbo6a
+.section sbo6b
+.section sbo7a
+.section sbo7b
+.section sbo8a
+.section sbo8b
+.section sbo9a
+.section sbo9b
+.section sbo0a
+.section sbo0b
+.section sbpaa
+.section sbpab
+.section sbpba
+.section sbpbb
+.section sbpca
+.section sbpcb
+.section sbpda
+.section sbpdb
+.section sbpea
+.section sbpeb
+.section sbpfa
+.section sbpfb
+.section sbpga
+.section sbpgb
+.section sbpha
+.section sbphb
+.section sbpia
+.section sbpib
+.section sbpja
+.section sbpjb
+.section sbpka
+.section sbpkb
+.section sbpla
+.section sbplb
+.section sbpma
+.section sbpmb
+.section sbpna
+.section sbpnb
+.section sbpoa
+.section sbpob
+.section sbppa
+.section sbppb
+.section sbpqa
+.section sbpqb
+.section sbpra
+.section sbprb
+.section sbpsa
+.section sbpsb
+.section sbpta
+.section sbptb
+.section sbpua
+.section sbpub
+.section sbpva
+.section sbpvb
+.section sbpwa
+.section sbpwb
+.section sbpxa
+.section sbpxb
+.section sbpya
+.section sbpyb
+.section sbpza
+.section sbpzb
+.section sbp1a
+.section sbp1b
+.section sbp2a
+.section sbp2b
+.section sbp3a
+.section sbp3b
+.section sbp4a
+.section sbp4b
+.section sbp5a
+.section sbp5b
+.section sbp6a
+.section sbp6b
+.section sbp7a
+.section sbp7b
+.section sbp8a
+.section sbp8b
+.section sbp9a
+.section sbp9b
+.section sbp0a
+.section sbp0b
+.section sbqaa
+.section sbqab
+.section sbqba
+.section sbqbb
+.section sbqca
+.section sbqcb
+.section sbqda
+.section sbqdb
+.section sbqea
+.section sbqeb
+.section sbqfa
+.section sbqfb
+.section sbqga
+.section sbqgb
+.section sbqha
+.section sbqhb
+.section sbqia
+.section sbqib
+.section sbqja
+.section sbqjb
+.section sbqka
+.section sbqkb
+.section sbqla
+.section sbqlb
+.section sbqma
+.section sbqmb
+.section sbqna
+.section sbqnb
+.section sbqoa
+.section sbqob
+.section sbqpa
+.section sbqpb
+.section sbqqa
+.section sbqqb
+.section sbqra
+.section sbqrb
+.section sbqsa
+.section sbqsb
+.section sbqta
+.section sbqtb
+.section sbqua
+.section sbqub
+.section sbqva
+.section sbqvb
+.section sbqwa
+.section sbqwb
+.section sbqxa
+.section sbqxb
+.section sbqya
+.section sbqyb
+.section sbqza
+.section sbqzb
+.section sbq1a
+.section sbq1b
+.section sbq2a
+.section sbq2b
+.section sbq3a
+.section sbq3b
+.section sbq4a
+.section sbq4b
+.section sbq5a
+.section sbq5b
+.section sbq6a
+.section sbq6b
+.section sbq7a
+.section sbq7b
+.section sbq8a
+.section sbq8b
+.section sbq9a
+.section sbq9b
+.section sbq0a
+.section sbq0b
+.section sbraa
+.section sbrab
+.section sbrba
+.section sbrbb
+.section sbrca
+.section sbrcb
+.section sbrda
+.section sbrdb
+.section sbrea
+.section sbreb
+.section sbrfa
+.section sbrfb
+.section sbrga
+.section sbrgb
+.section sbrha
+.section sbrhb
+.section sbria
+.section sbrib
+.section sbrja
+.section sbrjb
+.section sbrka
+.section sbrkb
+.section sbrla
+.section sbrlb
+.section sbrma
+.section sbrmb
+.section sbrna
+.section sbrnb
+.section sbroa
+.section sbrob
+.section sbrpa
+.section sbrpb
+.section sbrqa
+.section sbrqb
+.section sbrra
+.section sbrrb
+.section sbrsa
+.section sbrsb
+.section sbrta
+.section sbrtb
+.section sbrua
+.section sbrub
+.section sbrva
+.section sbrvb
+.section sbrwa
+.section sbrwb
+.section sbrxa
+.section sbrxb
+.section sbrya
+.section sbryb
+.section sbrza
+.section sbrzb
+.section sbr1a
+.section sbr1b
+.section sbr2a
+.section sbr2b
+.section sbr3a
+.section sbr3b
+.section sbr4a
+.section sbr4b
+.section sbr5a
+.section sbr5b
+.section sbr6a
+.section sbr6b
+.section sbr7a
+.section sbr7b
+.section sbr8a
+.section sbr8b
+.section sbr9a
+.section sbr9b
+.section sbr0a
+.section sbr0b
+.section sbsaa
+.section sbsab
+.section sbsba
+.section sbsbb
+.section sbsca
+.section sbscb
+.section sbsda
+.section sbsdb
+.section sbsea
+.section sbseb
+.section sbsfa
+.section sbsfb
+.section sbsga
+.section sbsgb
+.section sbsha
+.section sbshb
+.section sbsia
+.section sbsib
+.section sbsja
+.section sbsjb
+.section sbska
+.section sbskb
+.section sbsla
+.section sbslb
+.section sbsma
+.section sbsmb
+.section sbsna
+.section sbsnb
+.section sbsoa
+.section sbsob
+.section sbspa
+.section sbspb
+.section sbsqa
+.section sbsqb
+.section sbsra
+.section sbsrb
+.section sbssa
+.section sbssb
+.section sbsta
+.section sbstb
+.section sbsua
+.section sbsub
+.section sbsva
+.section sbsvb
+.section sbswa
+.section sbswb
+.section sbsxa
+.section sbsxb
+.section sbsya
+.section sbsyb
+.section sbsza
+.section sbszb
+.section sbs1a
+.section sbs1b
+.section sbs2a
+.section sbs2b
+.section sbs3a
+.section sbs3b
+.section sbs4a
+.section sbs4b
+.section sbs5a
+.section sbs5b
+.section sbs6a
+.section sbs6b
+.section sbs7a
+.section sbs7b
+.section sbs8a
+.section sbs8b
+.section sbs9a
+.section sbs9b
+.section sbs0a
+.section sbs0b
+.section sbtaa
+.section sbtab
+.section sbtba
+.section sbtbb
+.section sbtca
+.section sbtcb
+.section sbtda
+.section sbtdb
+.section sbtea
+.section sbteb
+.section sbtfa
+.section sbtfb
+.section sbtga
+.section sbtgb
+.section sbtha
+.section sbthb
+.section sbtia
+.section sbtib
+.section sbtja
+.section sbtjb
+.section sbtka
+.section sbtkb
+.section sbtla
+.section sbtlb
+.section sbtma
+.section sbtmb
+.section sbtna
+.section sbtnb
+.section sbtoa
+.section sbtob
+.section sbtpa
+.section sbtpb
+.section sbtqa
+.section sbtqb
+.section sbtra
+.section sbtrb
+.section sbtsa
+.section sbtsb
+.section sbtta
+.section sbttb
+.section sbtua
+.section sbtub
+.section sbtva
+.section sbtvb
+.section sbtwa
+.section sbtwb
+.section sbtxa
+.section sbtxb
+.section sbtya
+.section sbtyb
+.section sbtza
+.section sbtzb
+.section sbt1a
+.section sbt1b
+.section sbt2a
+.section sbt2b
+.section sbt3a
+.section sbt3b
+.section sbt4a
+.section sbt4b
+.section sbt5a
+.section sbt5b
+.section sbt6a
+.section sbt6b
+.section sbt7a
+.section sbt7b
+.section sbt8a
+.section sbt8b
+.section sbt9a
+.section sbt9b
+.section sbt0a
+.section sbt0b
+.section sbuaa
+.section sbuab
+.section sbuba
+.section sbubb
+.section sbuca
+.section sbucb
+.section sbuda
+.section sbudb
+.section sbuea
+.section sbueb
+.section sbufa
+.section sbufb
+.section sbuga
+.section sbugb
+.section sbuha
+.section sbuhb
+.section sbuia
+.section sbuib
+.section sbuja
+.section sbujb
+.section sbuka
+.section sbukb
+.section sbula
+.section sbulb
+.section sbuma
+.section sbumb
+.section sbuna
+.section sbunb
+.section sbuoa
+.section sbuob
+.section sbupa
+.section sbupb
+.section sbuqa
+.section sbuqb
+.section sbura
+.section sburb
+.section sbusa
+.section sbusb
+.section sbuta
+.section sbutb
+.section sbuua
+.section sbuub
+.section sbuva
+.section sbuvb
+.section sbuwa
+.section sbuwb
+.section sbuxa
+.section sbuxb
+.section sbuya
+.section sbuyb
+.section sbuza
+.section sbuzb
+.section sbu1a
+.section sbu1b
+.section sbu2a
+.section sbu2b
+.section sbu3a
+.section sbu3b
+.section sbu4a
+.section sbu4b
+.section sbu5a
+.section sbu5b
+.section sbu6a
+.section sbu6b
+.section sbu7a
+.section sbu7b
+.section sbu8a
+.section sbu8b
+.section sbu9a
+.section sbu9b
+.section sbu0a
+.section sbu0b
+.section sbvaa
+.section sbvab
+.section sbvba
+.section sbvbb
+.section sbvca
+.section sbvcb
+.section sbvda
+.section sbvdb
+.section sbvea
+.section sbveb
+.section sbvfa
+.section sbvfb
+.section sbvga
+.section sbvgb
+.section sbvha
+.section sbvhb
+.section sbvia
+.section sbvib
+.section sbvja
+.section sbvjb
+.section sbvka
+.section sbvkb
+.section sbvla
+.section sbvlb
+.section sbvma
+.section sbvmb
+.section sbvna
+.section sbvnb
+.section sbvoa
+.section sbvob
+.section sbvpa
+.section sbvpb
+.section sbvqa
+.section sbvqb
+.section sbvra
+.section sbvrb
+.section sbvsa
+.section sbvsb
+.section sbvta
+.section sbvtb
+.section sbvua
+.section sbvub
+.section sbvva
+.section sbvvb
+.section sbvwa
+.section sbvwb
+.section sbvxa
+.section sbvxb
+.section sbvya
+.section sbvyb
+.section sbvza
+.section sbvzb
+.section sbv1a
+.section sbv1b
+.section sbv2a
+.section sbv2b
+.section sbv3a
+.section sbv3b
+.section sbv4a
+.section sbv4b
+.section sbv5a
+.section sbv5b
+.section sbv6a
+.section sbv6b
+.section sbv7a
+.section sbv7b
+.section sbv8a
+.section sbv8b
+.section sbv9a
+.section sbv9b
+.section sbv0a
+.section sbv0b
+.section sbwaa
+.section sbwab
+.section sbwba
+.section sbwbb
+.section sbwca
+.section sbwcb
+.section sbwda
+.section sbwdb
+.section sbwea
+.section sbweb
+.section sbwfa
+.section sbwfb
+.section sbwga
+.section sbwgb
+.section sbwha
+.section sbwhb
+.section sbwia
+.section sbwib
+.section sbwja
+.section sbwjb
+.section sbwka
+.section sbwkb
+.section sbwla
+.section sbwlb
+.section sbwma
+.section sbwmb
+.section sbwna
+.section sbwnb
+.section sbwoa
+.section sbwob
+.section sbwpa
+.section sbwpb
+.section sbwqa
+.section sbwqb
+.section sbwra
+.section sbwrb
+.section sbwsa
+.section sbwsb
+.section sbwta
+.section sbwtb
+.section sbwua
+.section sbwub
+.section sbwva
+.section sbwvb
+.section sbwwa
+.section sbwwb
+.section sbwxa
+.section sbwxb
+.section sbwya
+.section sbwyb
+.section sbwza
+.section sbwzb
+.section sbw1a
+.section sbw1b
+.section sbw2a
+.section sbw2b
+.section sbw3a
+.section sbw3b
+.section sbw4a
+.section sbw4b
+.section sbw5a
+.section sbw5b
+.section sbw6a
+.section sbw6b
+.section sbw7a
+.section sbw7b
+.section sbw8a
+.section sbw8b
+.section sbw9a
+.section sbw9b
+.section sbw0a
+.section sbw0b
+.section sbxaa
+.section sbxab
+.section sbxba
+.section sbxbb
+.section sbxca
+.section sbxcb
+.section sbxda
+.section sbxdb
+.section sbxea
+.section sbxeb
+.section sbxfa
+.section sbxfb
+.section sbxga
+.section sbxgb
+.section sbxha
+.section sbxhb
+.section sbxia
+.section sbxib
+.section sbxja
+.section sbxjb
+.section sbxka
+.section sbxkb
+.section sbxla
+.section sbxlb
+.section sbxma
+.section sbxmb
+.section sbxna
+.section sbxnb
+.section sbxoa
+.section sbxob
+.section sbxpa
+.section sbxpb
+.section sbxqa
+.section sbxqb
+.section sbxra
+.section sbxrb
+.section sbxsa
+.section sbxsb
+.section sbxta
+.section sbxtb
+.section sbxua
+.section sbxub
+.section sbxva
+.section sbxvb
+.section sbxwa
+.section sbxwb
+.section sbxxa
+.section sbxxb
+.section sbxya
+.section sbxyb
+.section sbxza
+.section sbxzb
+.section sbx1a
+.section sbx1b
+.section sbx2a
+.section sbx2b
+.section sbx3a
+.section sbx3b
+.section sbx4a
+.section sbx4b
+.section sbx5a
+.section sbx5b
+.section sbx6a
+.section sbx6b
+.section sbx7a
+.section sbx7b
+.section sbx8a
+.section sbx8b
+.section sbx9a
+.section sbx9b
+.section sbx0a
+.section sbx0b
+.section sbyaa
+.section sbyab
+.section sbyba
+.section sbybb
+.section sbyca
+.section sbycb
+.section sbyda
+.section sbydb
+.section sbyea
+.section sbyeb
+.section sbyfa
+.section sbyfb
+.section sbyga
+.section sbygb
+.section sbyha
+.section sbyhb
+.section sbyia
+.section sbyib
+.section sbyja
+.section sbyjb
+.section sbyka
+.section sbykb
+.section sbyla
+.section sbylb
+.section sbyma
+.section sbymb
+.section sbyna
+.section sbynb
+.section sbyoa
+.section sbyob
+.section sbypa
+.section sbypb
+.section sbyqa
+.section sbyqb
+.section sbyra
+.section sbyrb
+.section sbysa
+.section sbysb
+.section sbyta
+.section sbytb
+.section sbyua
+.section sbyub
+.section sbyva
+.section sbyvb
+.section sbywa
+.section sbywb
+.section sbyxa
+.section sbyxb
+.section sbyya
+.section sbyyb
+.section sbyza
+.section sbyzb
+.section sby1a
+.section sby1b
+.section sby2a
+.section sby2b
+.section sby3a
+.section sby3b
+.section sby4a
+.section sby4b
+.section sby5a
+.section sby5b
+.section sby6a
+.section sby6b
+.section sby7a
+.section sby7b
+.section sby8a
+.section sby8b
+.section sby9a
+.section sby9b
+.section sby0a
+.section sby0b
+.section sbzaa
+.section sbzab
+.section sbzba
+.section sbzbb
+.section sbzca
+.section sbzcb
+.section sbzda
+.section sbzdb
+.section sbzea
+.section sbzeb
+.section sbzfa
+.section sbzfb
+.section sbzga
+.section sbzgb
+.section sbzha
+.section sbzhb
+.section sbzia
+.section sbzib
+.section sbzja
+.section sbzjb
+.section sbzka
+.section sbzkb
+.section sbzla
+.section sbzlb
+.section sbzma
+.section sbzmb
+.section sbzna
+.section sbznb
+.section sbzoa
+.section sbzob
+.section sbzpa
+.section sbzpb
+.section sbzqa
+.section sbzqb
+.section sbzra
+.section sbzrb
+.section sbzsa
+.section sbzsb
+.section sbzta
+.section sbztb
+.section sbzua
+.section sbzub
+.section sbzva
+.section sbzvb
+.section sbzwa
+.section sbzwb
+.section sbzxa
+.section sbzxb
+.section sbzya
+.section sbzyb
+.section sbzza
+.section sbzzb
+.section sbz1a
+.section sbz1b
+.section sbz2a
+.section sbz2b
+.section sbz3a
+.section sbz3b
+.section sbz4a
+.section sbz4b
+.section sbz5a
+.section sbz5b
+.section sbz6a
+.section sbz6b
+.section sbz7a
+.section sbz7b
+.section sbz8a
+.section sbz8b
+.section sbz9a
+.section sbz9b
+.section sbz0a
+.section sbz0b
+.section sb1aa
+.section sb1ab
+.section sb1ba
+.section sb1bb
+.section sb1ca
+.section sb1cb
+.section sb1da
+.section sb1db
+.section sb1ea
+.section sb1eb
+.section sb1fa
+.section sb1fb
+.section sb1ga
+.section sb1gb
+.section sb1ha
+.section sb1hb
+.section sb1ia
+.section sb1ib
+.section sb1ja
+.section sb1jb
+.section sb1ka
+.section sb1kb
+.section sb1la
+.section sb1lb
+.section sb1ma
+.section sb1mb
+.section sb1na
+.section sb1nb
+.section sb1oa
+.section sb1ob
+.section sb1pa
+.section sb1pb
+.section sb1qa
+.section sb1qb
+.section sb1ra
+.section sb1rb
+.section sb1sa
+.section sb1sb
+.section sb1ta
+.section sb1tb
+.section sb1ua
+.section sb1ub
+.section sb1va
+.section sb1vb
+.section sb1wa
+.section sb1wb
+.section sb1xa
+.section sb1xb
+.section sb1ya
+.section sb1yb
+.section sb1za
+.section sb1zb
+.section sb11a
+.section sb11b
+.section sb12a
+.section sb12b
+.section sb13a
+.section sb13b
+.section sb14a
+.section sb14b
+.section sb15a
+.section sb15b
+.section sb16a
+.section sb16b
+.section sb17a
+.section sb17b
+.section sb18a
+.section sb18b
+.section sb19a
+.section sb19b
+.section sb10a
+.section sb10b
+.section sb2aa
+.section sb2ab
+.section sb2ba
+.section sb2bb
+.section sb2ca
+.section sb2cb
+.section sb2da
+.section sb2db
+.section sb2ea
+.section sb2eb
+.section sb2fa
+.section sb2fb
+.section sb2ga
+.section sb2gb
+.section sb2ha
+.section sb2hb
+.section sb2ia
+.section sb2ib
+.section sb2ja
+.section sb2jb
+.section sb2ka
+.section sb2kb
+.section sb2la
+.section sb2lb
+.section sb2ma
+.section sb2mb
+.section sb2na
+.section sb2nb
+.section sb2oa
+.section sb2ob
+.section sb2pa
+.section sb2pb
+.section sb2qa
+.section sb2qb
+.section sb2ra
+.section sb2rb
+.section sb2sa
+.section sb2sb
+.section sb2ta
+.section sb2tb
+.section sb2ua
+.section sb2ub
+.section sb2va
+.section sb2vb
+.section sb2wa
+.section sb2wb
+.section sb2xa
+.section sb2xb
+.section sb2ya
+.section sb2yb
+.section sb2za
+.section sb2zb
+.section sb21a
+.section sb21b
+.section sb22a
+.section sb22b
+.section sb23a
+.section sb23b
+.section sb24a
+.section sb24b
+.section sb25a
+.section sb25b
+.section sb26a
+.section sb26b
+.section sb27a
+.section sb27b
+.section sb28a
+.section sb28b
+.section sb29a
+.section sb29b
+.section sb20a
+.section sb20b
+.section sb3aa
+.section sb3ab
+.section sb3ba
+.section sb3bb
+.section sb3ca
+.section sb3cb
+.section sb3da
+.section sb3db
+.section sb3ea
+.section sb3eb
+.section sb3fa
+.section sb3fb
+.section sb3ga
+.section sb3gb
+.section sb3ha
+.section sb3hb
+.section sb3ia
+.section sb3ib
+.section sb3ja
+.section sb3jb
+.section sb3ka
+.section sb3kb
+.section sb3la
+.section sb3lb
+.section sb3ma
+.section sb3mb
+.section sb3na
+.section sb3nb
+.section sb3oa
+.section sb3ob
+.section sb3pa
+.section sb3pb
+.section sb3qa
+.section sb3qb
+.section sb3ra
+.section sb3rb
+.section sb3sa
+.section sb3sb
+.section sb3ta
+.section sb3tb
+.section sb3ua
+.section sb3ub
+.section sb3va
+.section sb3vb
+.section sb3wa
+.section sb3wb
+.section sb3xa
+.section sb3xb
+.section sb3ya
+.section sb3yb
+.section sb3za
+.section sb3zb
+.section sb31a
+.section sb31b
+.section sb32a
+.section sb32b
+.section sb33a
+.section sb33b
+.section sb34a
+.section sb34b
+.section sb35a
+.section sb35b
+.section sb36a
+.section sb36b
+.section sb37a
+.section sb37b
+.section sb38a
+.section sb38b
+.section sb39a
+.section sb39b
+.section sb30a
+.section sb30b
+.section sb4aa
+.section sb4ab
+.section sb4ba
+.section sb4bb
+.section sb4ca
+.section sb4cb
+.section sb4da
+.section sb4db
+.section sb4ea
+.section sb4eb
+.section sb4fa
+.section sb4fb
+.section sb4ga
+.section sb4gb
+.section sb4ha
+.section sb4hb
+.section sb4ia
+.section sb4ib
+.section sb4ja
+.section sb4jb
+.section sb4ka
+.section sb4kb
+.section sb4la
+.section sb4lb
+.section sb4ma
+.section sb4mb
+.section sb4na
+.section sb4nb
+.section sb4oa
+.section sb4ob
+.section sb4pa
+.section sb4pb
+.section sb4qa
+.section sb4qb
+.section sb4ra
+.section sb4rb
+.section sb4sa
+.section sb4sb
+.section sb4ta
+.section sb4tb
+.section sb4ua
+.section sb4ub
+.section sb4va
+.section sb4vb
+.section sb4wa
+.section sb4wb
+.section sb4xa
+.section sb4xb
+.section sb4ya
+.section sb4yb
+.section sb4za
+.section sb4zb
+.section sb41a
+.section sb41b
+.section sb42a
+.section sb42b
+.section sb43a
+.section sb43b
+.section sb44a
+.section sb44b
+.section sb45a
+.section sb45b
+.section sb46a
+.section sb46b
+.section sb47a
+.section sb47b
+.section sb48a
+.section sb48b
+.section sb49a
+.section sb49b
+.section sb40a
+.section sb40b
+.section sb5aa
+.section sb5ab
+.section sb5ba
+.section sb5bb
+.section sb5ca
+.section sb5cb
+.section sb5da
+.section sb5db
+.section sb5ea
+.section sb5eb
+.section sb5fa
+.section sb5fb
+.section sb5ga
+.section sb5gb
+.section sb5ha
+.section sb5hb
+.section sb5ia
+.section sb5ib
+.section sb5ja
+.section sb5jb
+.section sb5ka
+.section sb5kb
+.section sb5la
+.section sb5lb
+.section sb5ma
+.section sb5mb
+.section sb5na
+.section sb5nb
+.section sb5oa
+.section sb5ob
+.section sb5pa
+.section sb5pb
+.section sb5qa
+.section sb5qb
+.section sb5ra
+.section sb5rb
+.section sb5sa
+.section sb5sb
+.section sb5ta
+.section sb5tb
+.section sb5ua
+.section sb5ub
+.section sb5va
+.section sb5vb
+.section sb5wa
+.section sb5wb
+.section sb5xa
+.section sb5xb
+.section sb5ya
+.section sb5yb
+.section sb5za
+.section sb5zb
+.section sb51a
+.section sb51b
+.section sb52a
+.section sb52b
+.section sb53a
+.section sb53b
+.section sb54a
+.section sb54b
+.section sb55a
+.section sb55b
+.section sb56a
+.section sb56b
+.section sb57a
+.section sb57b
+.section sb58a
+.section sb58b
+.section sb59a
+.section sb59b
+.section sb50a
+.section sb50b
+.section sb6aa
+.section sb6ab
+.section sb6ba
+.section sb6bb
+.section sb6ca
+.section sb6cb
+.section sb6da
+.section sb6db
+.section sb6ea
+.section sb6eb
+.section sb6fa
+.section sb6fb
+.section sb6ga
+.section sb6gb
+.section sb6ha
+.section sb6hb
+.section sb6ia
+.section sb6ib
+.section sb6ja
+.section sb6jb
+.section sb6ka
+.section sb6kb
+.section sb6la
+.section sb6lb
+.section sb6ma
+.section sb6mb
+.section sb6na
+.section sb6nb
+.section sb6oa
+.section sb6ob
+.section sb6pa
+.section sb6pb
+.section sb6qa
+.section sb6qb
+.section sb6ra
+.section sb6rb
+.section sb6sa
+.section sb6sb
+.section sb6ta
+.section sb6tb
+.section sb6ua
+.section sb6ub
+.section sb6va
+.section sb6vb
+.section sb6wa
+.section sb6wb
+.section sb6xa
+.section sb6xb
+.section sb6ya
+.section sb6yb
+.section sb6za
+.section sb6zb
+.section sb61a
+.section sb61b
+.section sb62a
+.section sb62b
+.section sb63a
+.section sb63b
+.section sb64a
+.section sb64b
+.section sb65a
+.section sb65b
+.section sb66a
+.section sb66b
+.section sb67a
+.section sb67b
+.section sb68a
+.section sb68b
+.section sb69a
+.section sb69b
+.section sb60a
+.section sb60b
+.section sb7aa
+.section sb7ab
+.section sb7ba
+.section sb7bb
+.section sb7ca
+.section sb7cb
+.section sb7da
+.section sb7db
+.section sb7ea
+.section sb7eb
+.section sb7fa
+.section sb7fb
+.section sb7ga
+.section sb7gb
+.section sb7ha
+.section sb7hb
+.section sb7ia
+.section sb7ib
+.section sb7ja
+.section sb7jb
+.section sb7ka
+.section sb7kb
+.section sb7la
+.section sb7lb
+.section sb7ma
+.section sb7mb
+.section sb7na
+.section sb7nb
+.section sb7oa
+.section sb7ob
+.section sb7pa
+.section sb7pb
+.section sb7qa
+.section sb7qb
+.section sb7ra
+.section sb7rb
+.section sb7sa
+.section sb7sb
+.section sb7ta
+.section sb7tb
+.section sb7ua
+.section sb7ub
+.section sb7va
+.section sb7vb
+.section sb7wa
+.section sb7wb
+.section sb7xa
+.section sb7xb
+.section sb7ya
+.section sb7yb
+.section sb7za
+.section sb7zb
+.section sb71a
+.section sb71b
+.section sb72a
+.section sb72b
+.section sb73a
+.section sb73b
+.section sb74a
+.section sb74b
+.section sb75a
+.section sb75b
+.section sb76a
+.section sb76b
+.section sb77a
+.section sb77b
+.section sb78a
+.section sb78b
+.section sb79a
+.section sb79b
+.section sb70a
+.section sb70b
+.section sb8aa
+.section sb8ab
+.section sb8ba
+.section sb8bb
+.section sb8ca
+.section sb8cb
+.section sb8da
+.section sb8db
+.section sb8ea
+.section sb8eb
+.section sb8fa
+.section sb8fb
+.section sb8ga
+.section sb8gb
+.section sb8ha
+.section sb8hb
+.section sb8ia
+.section sb8ib
+.section sb8ja
+.section sb8jb
+.section sb8ka
+.section sb8kb
+.section sb8la
+.section sb8lb
+.section sb8ma
+.section sb8mb
+.section sb8na
+.section sb8nb
+.section sb8oa
+.section sb8ob
+.section sb8pa
+.section sb8pb
+.section sb8qa
+.section sb8qb
+.section sb8ra
+.section sb8rb
+.section sb8sa
+.section sb8sb
+.section sb8ta
+.section sb8tb
+.section sb8ua
+.section sb8ub
+.section sb8va
+.section sb8vb
+.section sb8wa
+.section sb8wb
+.section sb8xa
+.section sb8xb
+.section sb8ya
+.section sb8yb
+.section sb8za
+.section sb8zb
+.section sb81a
+.section sb81b
+.section sb82a
+.section sb82b
+.section sb83a
+.section sb83b
+.section sb84a
+.section sb84b
+.section sb85a
+.section sb85b
+.section sb86a
+.section sb86b
+.section sb87a
+.section sb87b
+.section sb88a
+.section sb88b
+.section sb89a
+.section sb89b
+.section sb80a
+.section sb80b
+.section sb9aa
+.section sb9ab
+.section sb9ba
+.section sb9bb
+.section sb9ca
+.section sb9cb
+.section sb9da
+.section sb9db
+.section sb9ea
+.section sb9eb
+.section sb9fa
+.section sb9fb
+.section sb9ga
+.section sb9gb
+.section sb9ha
+.section sb9hb
+.section sb9ia
+.section sb9ib
+.section sb9ja
+.section sb9jb
+.section sb9ka
+.section sb9kb
+.section sb9la
+.section sb9lb
+.section sb9ma
+.section sb9mb
+.section sb9na
+.section sb9nb
+.section sb9oa
+.section sb9ob
+.section sb9pa
+.section sb9pb
+.section sb9qa
+.section sb9qb
+.section sb9ra
+.section sb9rb
+.section sb9sa
+.section sb9sb
+.section sb9ta
+.section sb9tb
+.section sb9ua
+.section sb9ub
+.section sb9va
+.section sb9vb
+.section sb9wa
+.section sb9wb
+.section sb9xa
+.section sb9xb
+.section sb9ya
+.section sb9yb
+.section sb9za
+.section sb9zb
+.section sb91a
+.section sb91b
+.section sb92a
+.section sb92b
+.section sb93a
+.section sb93b
+.section sb94a
+.section sb94b
+.section sb95a
+.section sb95b
+.section sb96a
+.section sb96b
+.section sb97a
+.section sb97b
+.section sb98a
+.section sb98b
+.section sb99a
+.section sb99b
+.section sb90a
+.section sb90b
+.section sb0aa
+.section sb0ab
+.section sb0ba
+.section sb0bb
+.section sb0ca
+.section sb0cb
+.section sb0da
+.section sb0db
+.section sb0ea
+.section sb0eb
+.section sb0fa
+.section sb0fb
+.section sb0ga
+.section sb0gb
+.section sb0ha
+.section sb0hb
+.section sb0ia
+.section sb0ib
+.section sb0ja
+.section sb0jb
+.section sb0ka
+.section sb0kb
+.section sb0la
+.section sb0lb
+.section sb0ma
+.section sb0mb
+.section sb0na
+.section sb0nb
+.section sb0oa
+.section sb0ob
+.section sb0pa
+.section sb0pb
+.section sb0qa
+.section sb0qb
+.section sb0ra
+.section sb0rb
+.section sb0sa
+.section sb0sb
+.section sb0ta
+.section sb0tb
+.section sb0ua
+.section sb0ub
+.section sb0va
+.section sb0vb
+.section sb0wa
+.section sb0wb
+.section sb0xa
+.section sb0xb
+.section sb0ya
+.section sb0yb
+.section sb0za
+.section sb0zb
+.section sb01a
+.section sb01b
+.section sb02a
+.section sb02b
+.section sb03a
+.section sb03b
+.section sb04a
+.section sb04b
+.section sb05a
+.section sb05b
+.section sb06a
+.section sb06b
+.section sb07a
+.section sb07b
+.section sb08a
+.section sb08b
+.section sb09a
+.section sb09b
+.section sb00a
+.section sb00b
+.section scaaa
+.section scaab
+.section scaba
+.section scabb
+.section scaca
+.section scacb
+.section scada
+.section scadb
+.section scaea
+.section scaeb
+.section scafa
+.section scafb
+.section scaga
+.section scagb
+.section scaha
+.section scahb
+.section scaia
+.section scaib
+.section scaja
+.section scajb
+.section scaka
+.section scakb
+.section scala
+.section scalb
+.section scama
+.section scamb
+.section scana
+.section scanb
+.section scaoa
+.section scaob
+.section scapa
+.section scapb
+.section scaqa
+.section scaqb
+.section scara
+.section scarb
+.section scasa
+.section scasb
+.section scata
+.section scatb
+.section scaua
+.section scaub
+.section scava
+.section scavb
+.section scawa
+.section scawb
+.section scaxa
+.section scaxb
+.section scaya
+.section scayb
+.section scaza
+.section scazb
+.section sca1a
+.section sca1b
+.section sca2a
+.section sca2b
+.section sca3a
+.section sca3b
+.section sca4a
+.section sca4b
+.section sca5a
+.section sca5b
+.section sca6a
+.section sca6b
+.section sca7a
+.section sca7b
+.section sca8a
+.section sca8b
+.section sca9a
+.section sca9b
+.section sca0a
+.section sca0b
+.section scbaa
+.section scbab
+.section scbba
+.section scbbb
+.section scbca
+.section scbcb
+.section scbda
+.section scbdb
+.section scbea
+.section scbeb
+.section scbfa
+.section scbfb
+.section scbga
+.section scbgb
+.section scbha
+.section scbhb
+.section scbia
+.section scbib
+.section scbja
+.section scbjb
+.section scbka
+.section scbkb
+.section scbla
+.section scblb
+.section scbma
+.section scbmb
+.section scbna
+.section scbnb
+.section scboa
+.section scbob
+.section scbpa
+.section scbpb
+.section scbqa
+.section scbqb
+.section scbra
+.section scbrb
+.section scbsa
+.section scbsb
+.section scbta
+.section scbtb
+.section scbua
+.section scbub
+.section scbva
+.section scbvb
+.section scbwa
+.section scbwb
+.section scbxa
+.section scbxb
+.section scbya
+.section scbyb
+.section scbza
+.section scbzb
+.section scb1a
+.section scb1b
+.section scb2a
+.section scb2b
+.section scb3a
+.section scb3b
+.section scb4a
+.section scb4b
+.section scb5a
+.section scb5b
+.section scb6a
+.section scb6b
+.section scb7a
+.section scb7b
+.section scb8a
+.section scb8b
+.section scb9a
+.section scb9b
+.section scb0a
+.section scb0b
+.section sccaa
+.section sccab
+.section sccba
+.section sccbb
+.section sccca
+.section scccb
+.section sccda
+.section sccdb
+.section sccea
+.section scceb
+.section sccfa
+.section sccfb
+.section sccga
+.section sccgb
+.section sccha
+.section scchb
+.section sccia
+.section sccib
+.section sccja
+.section sccjb
+.section sccka
+.section scckb
+.section sccla
+.section scclb
+.section sccma
+.section sccmb
+.section sccna
+.section sccnb
+.section sccoa
+.section sccob
+.section sccpa
+.section sccpb
+.section sccqa
+.section sccqb
+.section sccra
+.section sccrb
+.section sccsa
+.section sccsb
+.section sccta
+.section scctb
+.section sccua
+.section sccub
+.section sccva
+.section sccvb
+.section sccwa
+.section sccwb
+.section sccxa
+.section sccxb
+.section sccya
+.section sccyb
+.section sccza
+.section scczb
+.section scc1a
+.section scc1b
+.section scc2a
+.section scc2b
+.section scc3a
+.section scc3b
+.section scc4a
+.section scc4b
+.section scc5a
+.section scc5b
+.section scc6a
+.section scc6b
+.section scc7a
+.section scc7b
+.section scc8a
+.section scc8b
+.section scc9a
+.section scc9b
+.section scc0a
+.section scc0b
+.section scdaa
+.section scdab
+.section scdba
+.section scdbb
+.section scdca
+.section scdcb
+.section scdda
+.section scddb
+.section scdea
+.section scdeb
+.section scdfa
+.section scdfb
+.section scdga
+.section scdgb
+.section scdha
+.section scdhb
+.section scdia
+.section scdib
+.section scdja
+.section scdjb
+.section scdka
+.section scdkb
+.section scdla
+.section scdlb
+.section scdma
+.section scdmb
+.section scdna
+.section scdnb
+.section scdoa
+.section scdob
+.section scdpa
+.section scdpb
+.section scdqa
+.section scdqb
+.section scdra
+.section scdrb
+.section scdsa
+.section scdsb
+.section scdta
+.section scdtb
+.section scdua
+.section scdub
+.section scdva
+.section scdvb
+.section scdwa
+.section scdwb
+.section scdxa
+.section scdxb
+.section scdya
+.section scdyb
+.section scdza
+.section scdzb
+.section scd1a
+.section scd1b
+.section scd2a
+.section scd2b
+.section scd3a
+.section scd3b
+.section scd4a
+.section scd4b
+.section scd5a
+.section scd5b
+.section scd6a
+.section scd6b
+.section scd7a
+.section scd7b
+.section scd8a
+.section scd8b
+.section scd9a
+.section scd9b
+.section scd0a
+.section scd0b
+.section sceaa
+.section sceab
+.section sceba
+.section scebb
+.section sceca
+.section scecb
+.section sceda
+.section scedb
+.section sceea
+.section sceeb
+.section scefa
+.section scefb
+.section scega
+.section scegb
+.section sceha
+.section scehb
+.section sceia
+.section sceib
+.section sceja
+.section scejb
+.section sceka
+.section scekb
+.section scela
+.section scelb
+.section scema
+.section scemb
+.section scena
+.section scenb
+.section sceoa
+.section sceob
+.section scepa
+.section scepb
+.section sceqa
+.section sceqb
+.section scera
+.section scerb
+.section scesa
+.section scesb
+.section sceta
+.section scetb
+.section sceua
+.section sceub
+.section sceva
+.section scevb
+.section scewa
+.section scewb
+.section scexa
+.section scexb
+.section sceya
+.section sceyb
+.section sceza
+.section scezb
+.section sce1a
+.section sce1b
+.section sce2a
+.section sce2b
+.section sce3a
+.section sce3b
+.section sce4a
+.section sce4b
+.section sce5a
+.section sce5b
+.section sce6a
+.section sce6b
+.section sce7a
+.section sce7b
+.section sce8a
+.section sce8b
+.section sce9a
+.section sce9b
+.section sce0a
+.section sce0b
+.section scfaa
+.section scfab
+.section scfba
+.section scfbb
+.section scfca
+.section scfcb
+.section scfda
+.section scfdb
+.section scfea
+.section scfeb
+.section scffa
+.section scffb
+.section scfga
+.section scfgb
+.section scfha
+.section scfhb
+.section scfia
+.section scfib
+.section scfja
+.section scfjb
+.section scfka
+.section scfkb
+.section scfla
+.section scflb
+.section scfma
+.section scfmb
+.section scfna
+.section scfnb
+.section scfoa
+.section scfob
+.section scfpa
+.section scfpb
+.section scfqa
+.section scfqb
+.section scfra
+.section scfrb
+.section scfsa
+.section scfsb
+.section scfta
+.section scftb
+.section scfua
+.section scfub
+.section scfva
+.section scfvb
+.section scfwa
+.section scfwb
+.section scfxa
+.section scfxb
+.section scfya
+.section scfyb
+.section scfza
+.section scfzb
+.section scf1a
+.section scf1b
+.section scf2a
+.section scf2b
+.section scf3a
+.section scf3b
+.section scf4a
+.section scf4b
+.section scf5a
+.section scf5b
+.section scf6a
+.section scf6b
+.section scf7a
+.section scf7b
+.section scf8a
+.section scf8b
+.section scf9a
+.section scf9b
+.section scf0a
+.section scf0b
+.section scgaa
+.section scgab
+.section scgba
+.section scgbb
+.section scgca
+.section scgcb
+.section scgda
+.section scgdb
+.section scgea
+.section scgeb
+.section scgfa
+.section scgfb
+.section scgga
+.section scggb
+.section scgha
+.section scghb
+.section scgia
+.section scgib
+.section scgja
+.section scgjb
+.section scgka
+.section scgkb
+.section scgla
+.section scglb
+.section scgma
+.section scgmb
+.section scgna
+.section scgnb
+.section scgoa
+.section scgob
+.section scgpa
+.section scgpb
+.section scgqa
+.section scgqb
+.section scgra
+.section scgrb
+.section scgsa
+.section scgsb
+.section scgta
+.section scgtb
+.section scgua
+.section scgub
+.section scgva
+.section scgvb
+.section scgwa
+.section scgwb
+.section scgxa
+.section scgxb
+.section scgya
+.section scgyb
+.section scgza
+.section scgzb
+.section scg1a
+.section scg1b
+.section scg2a
+.section scg2b
+.section scg3a
+.section scg3b
+.section scg4a
+.section scg4b
+.section scg5a
+.section scg5b
+.section scg6a
+.section scg6b
+.section scg7a
+.section scg7b
+.section scg8a
+.section scg8b
+.section scg9a
+.section scg9b
+.section scg0a
+.section scg0b
+.section schaa
+.section schab
+.section schba
+.section schbb
+.section schca
+.section schcb
+.section schda
+.section schdb
+.section schea
+.section scheb
+.section schfa
+.section schfb
+.section schga
+.section schgb
+.section schha
+.section schhb
+.section schia
+.section schib
+.section schja
+.section schjb
+.section schka
+.section schkb
+.section schla
+.section schlb
+.section schma
+.section schmb
+.section schna
+.section schnb
+.section schoa
+.section schob
+.section schpa
+.section schpb
+.section schqa
+.section schqb
+.section schra
+.section schrb
+.section schsa
+.section schsb
+.section schta
+.section schtb
+.section schua
+.section schub
+.section schva
+.section schvb
+.section schwa
+.section schwb
+.section schxa
+.section schxb
+.section schya
+.section schyb
+.section schza
+.section schzb
+.section sch1a
+.section sch1b
+.section sch2a
+.section sch2b
+.section sch3a
+.section sch3b
+.section sch4a
+.section sch4b
+.section sch5a
+.section sch5b
+.section sch6a
+.section sch6b
+.section sch7a
+.section sch7b
+.section sch8a
+.section sch8b
+.section sch9a
+.section sch9b
+.section sch0a
+.section sch0b
+.section sciaa
+.section sciab
+.section sciba
+.section scibb
+.section scica
+.section scicb
+.section scida
+.section scidb
+.section sciea
+.section scieb
+.section scifa
+.section scifb
+.section sciga
+.section scigb
+.section sciha
+.section scihb
+.section sciia
+.section sciib
+.section scija
+.section scijb
+.section scika
+.section scikb
+.section scila
+.section scilb
+.section scima
+.section scimb
+.section scina
+.section scinb
+.section scioa
+.section sciob
+.section scipa
+.section scipb
+.section sciqa
+.section sciqb
+.section scira
+.section scirb
+.section scisa
+.section scisb
+.section scita
+.section scitb
+.section sciua
+.section sciub
+.section sciva
+.section scivb
+.section sciwa
+.section sciwb
+.section scixa
+.section scixb
+.section sciya
+.section sciyb
+.section sciza
+.section scizb
+.section sci1a
+.section sci1b
+.section sci2a
+.section sci2b
+.section sci3a
+.section sci3b
+.section sci4a
+.section sci4b
+.section sci5a
+.section sci5b
+.section sci6a
+.section sci6b
+.section sci7a
+.section sci7b
+.section sci8a
+.section sci8b
+.section sci9a
+.section sci9b
+.section sci0a
+.section sci0b
+.section scjaa
+.section scjab
+.section scjba
+.section scjbb
+.section scjca
+.section scjcb
+.section scjda
+.section scjdb
+.section scjea
+.section scjeb
+.section scjfa
+.section scjfb
+.section scjga
+.section scjgb
+.section scjha
+.section scjhb
+.section scjia
+.section scjib
+.section scjja
+.section scjjb
+.section scjka
+.section scjkb
+.section scjla
+.section scjlb
+.section scjma
+.section scjmb
+.section scjna
+.section scjnb
+.section scjoa
+.section scjob
+.section scjpa
+.section scjpb
+.section scjqa
+.section scjqb
+.section scjra
+.section scjrb
+.section scjsa
+.section scjsb
+.section scjta
+.section scjtb
+.section scjua
+.section scjub
+.section scjva
+.section scjvb
+.section scjwa
+.section scjwb
+.section scjxa
+.section scjxb
+.section scjya
+.section scjyb
+.section scjza
+.section scjzb
+.section scj1a
+.section scj1b
+.section scj2a
+.section scj2b
+.section scj3a
+.section scj3b
+.section scj4a
+.section scj4b
+.section scj5a
+.section scj5b
+.section scj6a
+.section scj6b
+.section scj7a
+.section scj7b
+.section scj8a
+.section scj8b
+.section scj9a
+.section scj9b
+.section scj0a
+.section scj0b
+.section sckaa
+.section sckab
+.section sckba
+.section sckbb
+.section sckca
+.section sckcb
+.section sckda
+.section sckdb
+.section sckea
+.section sckeb
+.section sckfa
+.section sckfb
+.section sckga
+.section sckgb
+.section sckha
+.section sckhb
+.section sckia
+.section sckib
+.section sckja
+.section sckjb
+.section sckka
+.section sckkb
+.section sckla
+.section scklb
+.section sckma
+.section sckmb
+.section sckna
+.section scknb
+.section sckoa
+.section sckob
+.section sckpa
+.section sckpb
+.section sckqa
+.section sckqb
+.section sckra
+.section sckrb
+.section scksa
+.section scksb
+.section sckta
+.section scktb
+.section sckua
+.section sckub
+.section sckva
+.section sckvb
+.section sckwa
+.section sckwb
+.section sckxa
+.section sckxb
+.section sckya
+.section sckyb
+.section sckza
+.section sckzb
+.section sck1a
+.section sck1b
+.section sck2a
+.section sck2b
+.section sck3a
+.section sck3b
+.section sck4a
+.section sck4b
+.section sck5a
+.section sck5b
+.section sck6a
+.section sck6b
+.section sck7a
+.section sck7b
+.section sck8a
+.section sck8b
+.section sck9a
+.section sck9b
+.section sck0a
+.section sck0b
+.section sclaa
+.section sclab
+.section sclba
+.section sclbb
+.section sclca
+.section sclcb
+.section sclda
+.section scldb
+.section sclea
+.section scleb
+.section sclfa
+.section sclfb
+.section sclga
+.section sclgb
+.section sclha
+.section sclhb
+.section sclia
+.section sclib
+.section sclja
+.section scljb
+.section sclka
+.section sclkb
+.section sclla
+.section scllb
+.section sclma
+.section sclmb
+.section sclna
+.section sclnb
+.section scloa
+.section sclob
+.section sclpa
+.section sclpb
+.section sclqa
+.section sclqb
+.section sclra
+.section sclrb
+.section sclsa
+.section sclsb
+.section sclta
+.section scltb
+.section sclua
+.section sclub
+.section sclva
+.section sclvb
+.section sclwa
+.section sclwb
+.section sclxa
+.section sclxb
+.section sclya
+.section sclyb
+.section sclza
+.section sclzb
+.section scl1a
+.section scl1b
+.section scl2a
+.section scl2b
+.section scl3a
+.section scl3b
+.section scl4a
+.section scl4b
+.section scl5a
+.section scl5b
+.section scl6a
+.section scl6b
+.section scl7a
+.section scl7b
+.section scl8a
+.section scl8b
+.section scl9a
+.section scl9b
+.section scl0a
+.section scl0b
+.section scmaa
+.section scmab
+.section scmba
+.section scmbb
+.section scmca
+.section scmcb
+.section scmda
+.section scmdb
+.section scmea
+.section scmeb
+.section scmfa
+.section scmfb
+.section scmga
+.section scmgb
+.section scmha
+.section scmhb
+.section scmia
+.section scmib
+.section scmja
+.section scmjb
+.section scmka
+.section scmkb
+.section scmla
+.section scmlb
+.section scmma
+.section scmmb
+.section scmna
+.section scmnb
+.section scmoa
+.section scmob
+.section scmpa
+.section scmpb
+.section scmqa
+.section scmqb
+.section scmra
+.section scmrb
+.section scmsa
+.section scmsb
+.section scmta
+.section scmtb
+.section scmua
+.section scmub
+.section scmva
+.section scmvb
+.section scmwa
+.section scmwb
+.section scmxa
+.section scmxb
+.section scmya
+.section scmyb
+.section scmza
+.section scmzb
+.section scm1a
+.section scm1b
+.section scm2a
+.section scm2b
+.section scm3a
+.section scm3b
+.section scm4a
+.section scm4b
+.section scm5a
+.section scm5b
+.section scm6a
+.section scm6b
+.section scm7a
+.section scm7b
+.section scm8a
+.section scm8b
+.section scm9a
+.section scm9b
+.section scm0a
+.section scm0b
+.section scnaa
+.section scnab
+.section scnba
+.section scnbb
+.section scnca
+.section scncb
+.section scnda
+.section scndb
+.section scnea
+.section scneb
+.section scnfa
+.section scnfb
+.section scnga
+.section scngb
+.section scnha
+.section scnhb
+.section scnia
+.section scnib
+.section scnja
+.section scnjb
+.section scnka
+.section scnkb
+.section scnla
+.section scnlb
+.section scnma
+.section scnmb
+.section scnna
+.section scnnb
+.section scnoa
+.section scnob
+.section scnpa
+.section scnpb
+.section scnqa
+.section scnqb
+.section scnra
+.section scnrb
+.section scnsa
+.section scnsb
+.section scnta
+.section scntb
+.section scnua
+.section scnub
+.section scnva
+.section scnvb
+.section scnwa
+.section scnwb
+.section scnxa
+.section scnxb
+.section scnya
+.section scnyb
+.section scnza
+.section scnzb
+.section scn1a
+.section scn1b
+.section scn2a
+.section scn2b
+.section scn3a
+.section scn3b
+.section scn4a
+.section scn4b
+.section scn5a
+.section scn5b
+.section scn6a
+.section scn6b
+.section scn7a
+.section scn7b
+.section scn8a
+.section scn8b
+.section scn9a
+.section scn9b
+.section scn0a
+.section scn0b
+.section scoaa
+.section scoab
+.section scoba
+.section scobb
+.section scoca
+.section scocb
+.section scoda
+.section scodb
+.section scoea
+.section scoeb
+.section scofa
+.section scofb
+.section scoga
+.section scogb
+.section scoha
+.section scohb
+.section scoia
+.section scoib
+.section scoja
+.section scojb
+.section scoka
+.section scokb
+.section scola
+.section scolb
+.section scoma
+.section scomb
+.section scona
+.section sconb
+.section scooa
+.section scoob
+.section scopa
+.section scopb
+.section scoqa
+.section scoqb
+.section scora
+.section scorb
+.section scosa
+.section scosb
+.section scota
+.section scotb
+.section scoua
+.section scoub
+.section scova
+.section scovb
+.section scowa
+.section scowb
+.section scoxa
+.section scoxb
+.section scoya
+.section scoyb
+.section scoza
+.section scozb
+.section sco1a
+.section sco1b
+.section sco2a
+.section sco2b
+.section sco3a
+.section sco3b
+.section sco4a
+.section sco4b
+.section sco5a
+.section sco5b
+.section sco6a
+.section sco6b
+.section sco7a
+.section sco7b
+.section sco8a
+.section sco8b
+.section sco9a
+.section sco9b
+.section sco0a
+.section sco0b
+.section scpaa
+.section scpab
+.section scpba
+.section scpbb
+.section scpca
+.section scpcb
+.section scpda
+.section scpdb
+.section scpea
+.section scpeb
+.section scpfa
+.section scpfb
+.section scpga
+.section scpgb
+.section scpha
+.section scphb
+.section scpia
+.section scpib
+.section scpja
+.section scpjb
+.section scpka
+.section scpkb
+.section scpla
+.section scplb
+.section scpma
+.section scpmb
+.section scpna
+.section scpnb
+.section scpoa
+.section scpob
+.section scppa
+.section scppb
+.section scpqa
+.section scpqb
+.section scpra
+.section scprb
+.section scpsa
+.section scpsb
+.section scpta
+.section scptb
+.section scpua
+.section scpub
+.section scpva
+.section scpvb
+.section scpwa
+.section scpwb
+.section scpxa
+.section scpxb
+.section scpya
+.section scpyb
+.section scpza
+.section scpzb
+.section scp1a
+.section scp1b
+.section scp2a
+.section scp2b
+.section scp3a
+.section scp3b
+.section scp4a
+.section scp4b
+.section scp5a
+.section scp5b
+.section scp6a
+.section scp6b
+.section scp7a
+.section scp7b
+.section scp8a
+.section scp8b
+.section scp9a
+.section scp9b
+.section scp0a
+.section scp0b
+.section scqaa
+.section scqab
+.section scqba
+.section scqbb
+.section scqca
+.section scqcb
+.section scqda
+.section scqdb
+.section scqea
+.section scqeb
+.section scqfa
+.section scqfb
+.section scqga
+.section scqgb
+.section scqha
+.section scqhb
+.section scqia
+.section scqib
+.section scqja
+.section scqjb
+.section scqka
+.section scqkb
+.section scqla
+.section scqlb
+.section scqma
+.section scqmb
+.section scqna
+.section scqnb
+.section scqoa
+.section scqob
+.section scqpa
+.section scqpb
+.section scqqa
+.section scqqb
+.section scqra
+.section scqrb
+.section scqsa
+.section scqsb
+.section scqta
+.section scqtb
+.section scqua
+.section scqub
+.section scqva
+.section scqvb
+.section scqwa
+.section scqwb
+.section scqxa
+.section scqxb
+.section scqya
+.section scqyb
+.section scqza
+.section scqzb
+.section scq1a
+.section scq1b
+.section scq2a
+.section scq2b
+.section scq3a
+.section scq3b
+.section scq4a
+.section scq4b
+.section scq5a
+.section scq5b
+.section scq6a
+.section scq6b
+.section scq7a
+.section scq7b
+.section scq8a
+.section scq8b
+.section scq9a
+.section scq9b
+.section scq0a
+.section scq0b
+.section scraa
+.section scrab
+.section scrba
+.section scrbb
+.section scrca
+.section scrcb
+.section scrda
+.section scrdb
+.section screa
+.section screb
+.section scrfa
+.section scrfb
+.section scrga
+.section scrgb
+.section scrha
+.section scrhb
+.section scria
+.section scrib
+.section scrja
+.section scrjb
+.section scrka
+.section scrkb
+.section scrla
+.section scrlb
+.section scrma
+.section scrmb
+.section scrna
+.section scrnb
+.section scroa
+.section scrob
+.section scrpa
+.section scrpb
+.section scrqa
+.section scrqb
+.section scrra
+.section scrrb
+.section scrsa
+.section scrsb
+.section scrta
+.section scrtb
+.section scrua
+.section scrub
+.section scrva
+.section scrvb
+.section scrwa
+.section scrwb
+.section scrxa
+.section scrxb
+.section scrya
+.section scryb
+.section scrza
+.section scrzb
+.section scr1a
+.section scr1b
+.section scr2a
+.section scr2b
+.section scr3a
+.section scr3b
+.section scr4a
+.section scr4b
+.section scr5a
+.section scr5b
+.section scr6a
+.section scr6b
+.section scr7a
+.section scr7b
+.section scr8a
+.section scr8b
+.section scr9a
+.section scr9b
+.section scr0a
+.section scr0b
+.section scsaa
+.section scsab
+.section scsba
+.section scsbb
+.section scsca
+.section scscb
+.section scsda
+.section scsdb
+.section scsea
+.section scseb
+.section scsfa
+.section scsfb
+.section scsga
+.section scsgb
+.section scsha
+.section scshb
+.section scsia
+.section scsib
+.section scsja
+.section scsjb
+.section scska
+.section scskb
+.section scsla
+.section scslb
+.section scsma
+.section scsmb
+.section scsna
+.section scsnb
+.section scsoa
+.section scsob
+.section scspa
+.section scspb
+.section scsqa
+.section scsqb
+.section scsra
+.section scsrb
+.section scssa
+.section scssb
+.section scsta
+.section scstb
+.section scsua
+.section scsub
+.section scsva
+.section scsvb
+.section scswa
+.section scswb
+.section scsxa
+.section scsxb
+.section scsya
+.section scsyb
+.section scsza
+.section scszb
+.section scs1a
+.section scs1b
+.section scs2a
+.section scs2b
+.section scs3a
+.section scs3b
+.section scs4a
+.section scs4b
+.section scs5a
+.section scs5b
+.section scs6a
+.section scs6b
+.section scs7a
+.section scs7b
+.section scs8a
+.section scs8b
+.section scs9a
+.section scs9b
+.section scs0a
+.section scs0b
+.section sctaa
+.section sctab
+.section sctba
+.section sctbb
+.section sctca
+.section sctcb
+.section sctda
+.section sctdb
+.section sctea
+.section scteb
+.section sctfa
+.section sctfb
+.section sctga
+.section sctgb
+.section sctha
+.section scthb
+.section sctia
+.section sctib
+.section sctja
+.section sctjb
+.section sctka
+.section sctkb
+.section sctla
+.section sctlb
+.section sctma
+.section sctmb
+.section sctna
+.section sctnb
+.section sctoa
+.section sctob
+.section sctpa
+.section sctpb
+.section sctqa
+.section sctqb
+.section sctra
+.section sctrb
+.section sctsa
+.section sctsb
+.section sctta
+.section scttb
+.section sctua
+.section sctub
+.section sctva
+.section sctvb
+.section sctwa
+.section sctwb
+.section sctxa
+.section sctxb
+.section sctya
+.section sctyb
+.section sctza
+.section sctzb
+.section sct1a
+.section sct1b
+.section sct2a
+.section sct2b
+.section sct3a
+.section sct3b
+.section sct4a
+.section sct4b
+.section sct5a
+.section sct5b
+.section sct6a
+.section sct6b
+.section sct7a
+.section sct7b
+.section sct8a
+.section sct8b
+.section sct9a
+.section sct9b
+.section sct0a
+.section sct0b
+.section scuaa
+.section scuab
+.section scuba
+.section scubb
+.section scuca
+.section scucb
+.section scuda
+.section scudb
+.section scuea
+.section scueb
+.section scufa
+.section scufb
+.section scuga
+.section scugb
+.section scuha
+.section scuhb
+.section scuia
+.section scuib
+.section scuja
+.section scujb
+.section scuka
+.section scukb
+.section scula
+.section sculb
+.section scuma
+.section scumb
+.section scuna
+.section scunb
+.section scuoa
+.section scuob
+.section scupa
+.section scupb
+.section scuqa
+.section scuqb
+.section scura
+.section scurb
+.section scusa
+.section scusb
+.section scuta
+.section scutb
+.section scuua
+.section scuub
+.section scuva
+.section scuvb
+.section scuwa
+.section scuwb
+.section scuxa
+.section scuxb
+.section scuya
+.section scuyb
+.section scuza
+.section scuzb
+.section scu1a
+.section scu1b
+.section scu2a
+.section scu2b
+.section scu3a
+.section scu3b
+.section scu4a
+.section scu4b
+.section scu5a
+.section scu5b
+.section scu6a
+.section scu6b
+.section scu7a
+.section scu7b
+.section scu8a
+.section scu8b
+.section scu9a
+.section scu9b
+.section scu0a
+.section scu0b
+.section scvaa
+.section scvab
+.section scvba
+.section scvbb
+.section scvca
+.section scvcb
+.section scvda
+.section scvdb
+.section scvea
+.section scveb
+.section scvfa
+.section scvfb
+.section scvga
+.section scvgb
+.section scvha
+.section scvhb
+.section scvia
+.section scvib
+.section scvja
+.section scvjb
+.section scvka
+.section scvkb
+.section scvla
+.section scvlb
+.section scvma
+.section scvmb
+.section scvna
+.section scvnb
+.section scvoa
+.section scvob
+.section scvpa
+.section scvpb
+.section scvqa
+.section scvqb
+.section scvra
+.section scvrb
+.section scvsa
+.section scvsb
+.section scvta
+.section scvtb
+.section scvua
+.section scvub
+.section scvva
+.section scvvb
+.section scvwa
+.section scvwb
+.section scvxa
+.section scvxb
+.section scvya
+.section scvyb
+.section scvza
+.section scvzb
+.section scv1a
+.section scv1b
+.section scv2a
+.section scv2b
+.section scv3a
+.section scv3b
+.section scv4a
+.section scv4b
+.section scv5a
+.section scv5b
+.section scv6a
+.section scv6b
+.section scv7a
+.section scv7b
+.section scv8a
+.section scv8b
+.section scv9a
+.section scv9b
+.section scv0a
+.section scv0b
+.section scwaa
+.section scwab
+.section scwba
+.section scwbb
+.section scwca
+.section scwcb
+.section scwda
+.section scwdb
+.section scwea
+.section scweb
+.section scwfa
+.section scwfb
+.section scwga
+.section scwgb
+.section scwha
+.section scwhb
+.section scwia
+.section scwib
+.section scwja
+.section scwjb
+.section scwka
+.section scwkb
+.section scwla
+.section scwlb
+.section scwma
+.section scwmb
+.section scwna
+.section scwnb
+.section scwoa
+.section scwob
+.section scwpa
+.section scwpb
+.section scwqa
+.section scwqb
+.section scwra
+.section scwrb
+.section scwsa
+.section scwsb
+.section scwta
+.section scwtb
+.section scwua
+.section scwub
+.section scwva
+.section scwvb
+.section scwwa
+.section scwwb
+.section scwxa
+.section scwxb
+.section scwya
+.section scwyb
+.section scwza
+.section scwzb
+.section scw1a
+.section scw1b
+.section scw2a
+.section scw2b
+.section scw3a
+.section scw3b
+.section scw4a
+.section scw4b
+.section scw5a
+.section scw5b
+.section scw6a
+.section scw6b
+.section scw7a
+.section scw7b
+.section scw8a
+.section scw8b
+.section scw9a
+.section scw9b
+.section scw0a
+.section scw0b
+.section scxaa
+.section scxab
+.section scxba
+.section scxbb
+.section scxca
+.section scxcb
+.section scxda
+.section scxdb
+.section scxea
+.section scxeb
+.section scxfa
+.section scxfb
+.section scxga
+.section scxgb
+.section scxha
+.section scxhb
+.section scxia
+.section scxib
+.section scxja
+.section scxjb
+.section scxka
+.section scxkb
+.section scxla
+.section scxlb
+.section scxma
+.section scxmb
+.section scxna
+.section scxnb
+.section scxoa
+.section scxob
+.section scxpa
+.section scxpb
+.section scxqa
+.section scxqb
+.section scxra
+.section scxrb
+.section scxsa
+.section scxsb
+.section scxta
+.section scxtb
+.section scxua
+.section scxub
+.section scxva
+.section scxvb
+.section scxwa
+.section scxwb
+.section scxxa
+.section scxxb
+.section scxya
+.section scxyb
+.section scxza
+.section scxzb
+.section scx1a
+.section scx1b
+.section scx2a
+.section scx2b
+.section scx3a
+.section scx3b
+.section scx4a
+.section scx4b
+.section scx5a
+.section scx5b
+.section scx6a
+.section scx6b
+.section scx7a
+.section scx7b
+.section scx8a
+.section scx8b
+.section scx9a
+.section scx9b
+.section scx0a
+.section scx0b
+.section scyaa
+.section scyab
+.section scyba
+.section scybb
+.section scyca
+.section scycb
+.section scyda
+.section scydb
+.section scyea
+.section scyeb
+.section scyfa
+.section scyfb
+.section scyga
+.section scygb
+.section scyha
+.section scyhb
+.section scyia
+.section scyib
+.section scyja
+.section scyjb
+.section scyka
+.section scykb
+.section scyla
+.section scylb
+.section scyma
+.section scymb
+.section scyna
+.section scynb
+.section scyoa
+.section scyob
+.section scypa
+.section scypb
+.section scyqa
+.section scyqb
+.section scyra
+.section scyrb
+.section scysa
+.section scysb
+.section scyta
+.section scytb
+.section scyua
+.section scyub
+.section scyva
+.section scyvb
+.section scywa
+.section scywb
+.section scyxa
+.section scyxb
+.section scyya
+.section scyyb
+.section scyza
+.section scyzb
+.section scy1a
+.section scy1b
+.section scy2a
+.section scy2b
+.section scy3a
+.section scy3b
+.section scy4a
+.section scy4b
+.section scy5a
+.section scy5b
+.section scy6a
+.section scy6b
+.section scy7a
+.section scy7b
+.section scy8a
+.section scy8b
+.section scy9a
+.section scy9b
+.section scy0a
+.section scy0b
+.section sczaa
+.section sczab
+.section sczba
+.section sczbb
+.section sczca
+.section sczcb
+.section sczda
+.section sczdb
+.section sczea
+.section sczeb
+.section sczfa
+.section sczfb
+.section sczga
+.section sczgb
+.section sczha
+.section sczhb
+.section sczia
+.section sczib
+.section sczja
+.section sczjb
+.section sczka
+.section sczkb
+.section sczla
+.section sczlb
+.section sczma
+.section sczmb
+.section sczna
+.section scznb
+.section sczoa
+.section sczob
+.section sczpa
+.section sczpb
+.section sczqa
+.section sczqb
+.section sczra
+.section sczrb
+.section sczsa
+.section sczsb
+.section sczta
+.section scztb
+.section sczua
+.section sczub
+.section sczva
+.section sczvb
+.section sczwa
+.section sczwb
+.section sczxa
+.section sczxb
+.section sczya
+.section sczyb
+.section sczza
+.section sczzb
+.section scz1a
+.section scz1b
+.section scz2a
+.section scz2b
+.section scz3a
+.section scz3b
+.section scz4a
+.section scz4b
+.section scz5a
+.section scz5b
+.section scz6a
+.section scz6b
+.section scz7a
+.section scz7b
+.section scz8a
+.section scz8b
+.section scz9a
+.section scz9b
+.section scz0a
+.section scz0b
+.section sc1aa
+.section sc1ab
+.section sc1ba
+.section sc1bb
+.section sc1ca
+.section sc1cb
+.section sc1da
+.section sc1db
+.section sc1ea
+.section sc1eb
+.section sc1fa
+.section sc1fb
+.section sc1ga
+.section sc1gb
+.section sc1ha
+.section sc1hb
+.section sc1ia
+.section sc1ib
+.section sc1ja
+.section sc1jb
+.section sc1ka
+.section sc1kb
+.section sc1la
+.section sc1lb
+.section sc1ma
+.section sc1mb
+.section sc1na
+.section sc1nb
+.section sc1oa
+.section sc1ob
+.section sc1pa
+.section sc1pb
+.section sc1qa
+.section sc1qb
+.section sc1ra
+.section sc1rb
+.section sc1sa
+.section sc1sb
+.section sc1ta
+.section sc1tb
+.section sc1ua
+.section sc1ub
+.section sc1va
+.section sc1vb
+.section sc1wa
+.section sc1wb
+.section sc1xa
+.section sc1xb
+.section sc1ya
+.section sc1yb
+.section sc1za
+.section sc1zb
+.section sc11a
+.section sc11b
+.section sc12a
+.section sc12b
+.section sc13a
+.section sc13b
+.section sc14a
+.section sc14b
+.section sc15a
+.section sc15b
+.section sc16a
+.section sc16b
+.section sc17a
+.section sc17b
+.section sc18a
+.section sc18b
+.section sc19a
+.section sc19b
+.section sc10a
+.section sc10b
+.section sc2aa
+.section sc2ab
+.section sc2ba
+.section sc2bb
+.section sc2ca
+.section sc2cb
+.section sc2da
+.section sc2db
+.section sc2ea
+.section sc2eb
+.section sc2fa
+.section sc2fb
+.section sc2ga
+.section sc2gb
+.section sc2ha
+.section sc2hb
+.section sc2ia
+.section sc2ib
+.section sc2ja
+.section sc2jb
+.section sc2ka
+.section sc2kb
+.section sc2la
+.section sc2lb
+.section sc2ma
+.section sc2mb
+.section sc2na
+.section sc2nb
+.section sc2oa
+.section sc2ob
+.section sc2pa
+.section sc2pb
+.section sc2qa
+.section sc2qb
+.section sc2ra
+.section sc2rb
+.section sc2sa
+.section sc2sb
+.section sc2ta
+.section sc2tb
+.section sc2ua
+.section sc2ub
+.section sc2va
+.section sc2vb
+.section sc2wa
+.section sc2wb
+.section sc2xa
+.section sc2xb
+.section sc2ya
+.section sc2yb
+.section sc2za
+.section sc2zb
+.section sc21a
+.section sc21b
+.section sc22a
+.section sc22b
+.section sc23a
+.section sc23b
+.section sc24a
+.section sc24b
+.section sc25a
+.section sc25b
+.section sc26a
+.section sc26b
+.section sc27a
+.section sc27b
+.section sc28a
+.section sc28b
+.section sc29a
+.section sc29b
+.section sc20a
+.section sc20b
+.section sc3aa
+.section sc3ab
+.section sc3ba
+.section sc3bb
+.section sc3ca
+.section sc3cb
+.section sc3da
+.section sc3db
+.section sc3ea
+.section sc3eb
+.section sc3fa
+.section sc3fb
+.section sc3ga
+.section sc3gb
+.section sc3ha
+.section sc3hb
+.section sc3ia
+.section sc3ib
+.section sc3ja
+.section sc3jb
+.section sc3ka
+.section sc3kb
+.section sc3la
+.section sc3lb
+.section sc3ma
+.section sc3mb
+.section sc3na
+.section sc3nb
+.section sc3oa
+.section sc3ob
+.section sc3pa
+.section sc3pb
+.section sc3qa
+.section sc3qb
+.section sc3ra
+.section sc3rb
+.section sc3sa
+.section sc3sb
+.section sc3ta
+.section sc3tb
+.section sc3ua
+.section sc3ub
+.section sc3va
+.section sc3vb
+.section sc3wa
+.section sc3wb
+.section sc3xa
+.section sc3xb
+.section sc3ya
+.section sc3yb
+.section sc3za
+.section sc3zb
+.section sc31a
+.section sc31b
+.section sc32a
+.section sc32b
+.section sc33a
+.section sc33b
+.section sc34a
+.section sc34b
+.section sc35a
+.section sc35b
+.section sc36a
+.section sc36b
+.section sc37a
+.section sc37b
+.section sc38a
+.section sc38b
+.section sc39a
+.section sc39b
+.section sc30a
+.section sc30b
+.section sc4aa
+.section sc4ab
+.section sc4ba
+.section sc4bb
+.section sc4ca
+.section sc4cb
+.section sc4da
+.section sc4db
+.section sc4ea
+.section sc4eb
+.section sc4fa
+.section sc4fb
+.section sc4ga
+.section sc4gb
+.section sc4ha
+.section sc4hb
+.section sc4ia
+.section sc4ib
+.section sc4ja
+.section sc4jb
+.section sc4ka
+.section sc4kb
+.section sc4la
+.section sc4lb
+.section sc4ma
+.section sc4mb
+.section sc4na
+.section sc4nb
+.section sc4oa
+.section sc4ob
+.section sc4pa
+.section sc4pb
+.section sc4qa
+.section sc4qb
+.section sc4ra
+.section sc4rb
+.section sc4sa
+.section sc4sb
+.section sc4ta
+.section sc4tb
+.section sc4ua
+.section sc4ub
+.section sc4va
+.section sc4vb
+.section sc4wa
+.section sc4wb
+.section sc4xa
+.section sc4xb
+.section sc4ya
+.section sc4yb
+.section sc4za
+.section sc4zb
+.section sc41a
+.section sc41b
+.section sc42a
+.section sc42b
+.section sc43a
+.section sc43b
+.section sc44a
+.section sc44b
+.section sc45a
+.section sc45b
+.section sc46a
+.section sc46b
+.section sc47a
+.section sc47b
+.section sc48a
+.section sc48b
+.section sc49a
+.section sc49b
+.section sc40a
+.section sc40b
+.section sc5aa
+.section sc5ab
+.section sc5ba
+.section sc5bb
+.section sc5ca
+.section sc5cb
+.section sc5da
+.section sc5db
+.section sc5ea
+.section sc5eb
+.section sc5fa
+.section sc5fb
+.section sc5ga
+.section sc5gb
+.section sc5ha
+.section sc5hb
+.section sc5ia
+.section sc5ib
+.section sc5ja
+.section sc5jb
+.section sc5ka
+.section sc5kb
+.section sc5la
+.section sc5lb
+.section sc5ma
+.section sc5mb
+.section sc5na
+.section sc5nb
+.section sc5oa
+.section sc5ob
+.section sc5pa
+.section sc5pb
+.section sc5qa
+.section sc5qb
+.section sc5ra
+.section sc5rb
+.section sc5sa
+.section sc5sb
+.section sc5ta
+.section sc5tb
+.section sc5ua
+.section sc5ub
+.section sc5va
+.section sc5vb
+.section sc5wa
+.section sc5wb
+.section sc5xa
+.section sc5xb
+.section sc5ya
+.section sc5yb
+.section sc5za
+.section sc5zb
+.section sc51a
+.section sc51b
+.section sc52a
+.section sc52b
+.section sc53a
+.section sc53b
+.section sc54a
+.section sc54b
+.section sc55a
+.section sc55b
+.section sc56a
+.section sc56b
+.section sc57a
+.section sc57b
+.section sc58a
+.section sc58b
+.section sc59a
+.section sc59b
+.section sc50a
+.section sc50b
+.section sc6aa
+.section sc6ab
+.section sc6ba
+.section sc6bb
+.section sc6ca
+.section sc6cb
+.section sc6da
+.section sc6db
+.section sc6ea
+.section sc6eb
+.section sc6fa
+.section sc6fb
+.section sc6ga
+.section sc6gb
+.section sc6ha
+.section sc6hb
+.section sc6ia
+.section sc6ib
+.section sc6ja
+.section sc6jb
+.section sc6ka
+.section sc6kb
+.section sc6la
+.section sc6lb
+.section sc6ma
+.section sc6mb
+.section sc6na
+.section sc6nb
+.section sc6oa
+.section sc6ob
+.section sc6pa
+.section sc6pb
+.section sc6qa
+.section sc6qb
+.section sc6ra
+.section sc6rb
+.section sc6sa
+.section sc6sb
+.section sc6ta
+.section sc6tb
+.section sc6ua
+.section sc6ub
+.section sc6va
+.section sc6vb
+.section sc6wa
+.section sc6wb
+.section sc6xa
+.section sc6xb
+.section sc6ya
+.section sc6yb
+.section sc6za
+.section sc6zb
+.section sc61a
+.section sc61b
+.section sc62a
+.section sc62b
+.section sc63a
+.section sc63b
+.section sc64a
+.section sc64b
+.section sc65a
+.section sc65b
+.section sc66a
+.section sc66b
+.section sc67a
+.section sc67b
+.section sc68a
+.section sc68b
+.section sc69a
+.section sc69b
+.section sc60a
+.section sc60b
+.section sc7aa
+.section sc7ab
+.section sc7ba
+.section sc7bb
+.section sc7ca
+.section sc7cb
+.section sc7da
+.section sc7db
+.section sc7ea
+.section sc7eb
+.section sc7fa
+.section sc7fb
+.section sc7ga
+.section sc7gb
+.section sc7ha
+.section sc7hb
+.section sc7ia
+.section sc7ib
+.section sc7ja
+.section sc7jb
+.section sc7ka
+.section sc7kb
+.section sc7la
+.section sc7lb
+.section sc7ma
+.section sc7mb
+.section sc7na
+.section sc7nb
+.section sc7oa
+.section sc7ob
+.section sc7pa
+.section sc7pb
+.section sc7qa
+.section sc7qb
+.section sc7ra
+.section sc7rb
+.section sc7sa
+.section sc7sb
+.section sc7ta
+.section sc7tb
+.section sc7ua
+.section sc7ub
+.section sc7va
+.section sc7vb
+.section sc7wa
+.section sc7wb
+.section sc7xa
+.section sc7xb
+.section sc7ya
+.section sc7yb
+.section sc7za
+.section sc7zb
+.section sc71a
+.section sc71b
+.section sc72a
+.section sc72b
+.section sc73a
+.section sc73b
+.section sc74a
+.section sc74b
+.section sc75a
+.section sc75b
+.section sc76a
+.section sc76b
+.section sc77a
+.section sc77b
+.section sc78a
+.section sc78b
+.section sc79a
+.section sc79b
+.section sc70a
+.section sc70b
+.section sc8aa
+.section sc8ab
+.section sc8ba
+.section sc8bb
+.section sc8ca
+.section sc8cb
+.section sc8da
+.section sc8db
+.section sc8ea
+.section sc8eb
+.section sc8fa
+.section sc8fb
+.section sc8ga
+.section sc8gb
+.section sc8ha
+.section sc8hb
+.section sc8ia
+.section sc8ib
+.section sc8ja
+.section sc8jb
+.section sc8ka
+.section sc8kb
+.section sc8la
+.section sc8lb
+.section sc8ma
+.section sc8mb
+.section sc8na
+.section sc8nb
+.section sc8oa
+.section sc8ob
+.section sc8pa
+.section sc8pb
+.section sc8qa
+.section sc8qb
+.section sc8ra
+.section sc8rb
+.section sc8sa
+.section sc8sb
+.section sc8ta
+.section sc8tb
+.section sc8ua
+.section sc8ub
+.section sc8va
+.section sc8vb
+.section sc8wa
+.section sc8wb
+.section sc8xa
+.section sc8xb
+.section sc8ya
+.section sc8yb
+.section sc8za
+.section sc8zb
+.section sc81a
+.section sc81b
+.section sc82a
+.section sc82b
+.section sc83a
+.section sc83b
+.section sc84a
+.section sc84b
+.section sc85a
+.section sc85b
+.section sc86a
+.section sc86b
+.section sc87a
+.section sc87b
+.section sc88a
+.section sc88b
+.section sc89a
+.section sc89b
+.section sc80a
+.section sc80b
+.section sc9aa
+.section sc9ab
+.section sc9ba
+.section sc9bb
+.section sc9ca
+.section sc9cb
+.section sc9da
+.section sc9db
+.section sc9ea
+.section sc9eb
+.section sc9fa
+.section sc9fb
+.section sc9ga
+.section sc9gb
+.section sc9ha
+.section sc9hb
+.section sc9ia
+.section sc9ib
+.section sc9ja
+.section sc9jb
+.section sc9ka
+.section sc9kb
+.section sc9la
+.section sc9lb
+.section sc9ma
+.section sc9mb
+.section sc9na
+.section sc9nb
+.section sc9oa
+.section sc9ob
+.section sc9pa
+.section sc9pb
+.section sc9qa
+.section sc9qb
+.section sc9ra
+.section sc9rb
+.section sc9sa
+.section sc9sb
+.section sc9ta
+.section sc9tb
+.section sc9ua
+.section sc9ub
+.section sc9va
+.section sc9vb
+.section sc9wa
+.section sc9wb
+.section sc9xa
+.section sc9xb
+.section sc9ya
+.section sc9yb
+.section sc9za
+.section sc9zb
+.section sc91a
+.section sc91b
+.section sc92a
+.section sc92b
+.section sc93a
+.section sc93b
+.section sc94a
+.section sc94b
+.section sc95a
+.section sc95b
+.section sc96a
+.section sc96b
+.section sc97a
+.section sc97b
+.section sc98a
+.section sc98b
+.section sc99a
+.section sc99b
+.section sc90a
+.section sc90b
+.section sc0aa
+.section sc0ab
+.section sc0ba
+.section sc0bb
+.section sc0ca
+.section sc0cb
+.section sc0da
+.section sc0db
+.section sc0ea
+.section sc0eb
+.section sc0fa
+.section sc0fb
+.section sc0ga
+.section sc0gb
+.section sc0ha
+.section sc0hb
+.section sc0ia
+.section sc0ib
+.section sc0ja
+.section sc0jb
+.section sc0ka
+.section sc0kb
+.section sc0la
+.section sc0lb
+.section sc0ma
+.section sc0mb
+.section sc0na
+.section sc0nb
+.section sc0oa
+.section sc0ob
+.section sc0pa
+.section sc0pb
+.section sc0qa
+.section sc0qb
+.section sc0ra
+.section sc0rb
+.section sc0sa
+.section sc0sb
+.section sc0ta
+.section sc0tb
+.section sc0ua
+.section sc0ub
+.section sc0va
+.section sc0vb
+.section sc0wa
+.section sc0wb
+.section sc0xa
+.section sc0xb
+.section sc0ya
+.section sc0yb
+.section sc0za
+.section sc0zb
+.section sc01a
+.section sc01b
+.section sc02a
+.section sc02b
+.section sc03a
+.section sc03b
+.section sc04a
+.section sc04b
+.section sc05a
+.section sc05b
+.section sc06a
+.section sc06b
+.section sc07a
+.section sc07b
+.section sc08a
+.section sc08b
+.section sc09a
+.section sc09b
+.section sc00a
+.section sc00b
+.section sdaaa
+.section sdaab
+.section sdaba
+.section sdabb
+.section sdaca
+.section sdacb
+.section sdada
+.section sdadb
+.section sdaea
+.section sdaeb
+.section sdafa
+.section sdafb
+.section sdaga
+.section sdagb
+.section sdaha
+.section sdahb
+.section sdaia
+.section sdaib
+.section sdaja
+.section sdajb
+.section sdaka
+.section sdakb
+.section sdala
+.section sdalb
+.section sdama
+.section sdamb
+.section sdana
+.section sdanb
+.section sdaoa
+.section sdaob
+.section sdapa
+.section sdapb
+.section sdaqa
+.section sdaqb
+.section sdara
+.section sdarb
+.section sdasa
+.section sdasb
+.section sdata
+.section sdatb
+.section sdaua
+.section sdaub
+.section sdava
+.section sdavb
+.section sdawa
+.section sdawb
+.section sdaxa
+.section sdaxb
+.section sdaya
+.section sdayb
+.section sdaza
+.section sdazb
+.section sda1a
+.section sda1b
+.section sda2a
+.section sda2b
+.section sda3a
+.section sda3b
+.section sda4a
+.section sda4b
+.section sda5a
+.section sda5b
+.section sda6a
+.section sda6b
+.section sda7a
+.section sda7b
+.section sda8a
+.section sda8b
+.section sda9a
+.section sda9b
+.section sda0a
+.section sda0b
+.section sdbaa
+.section sdbab
+.section sdbba
+.section sdbbb
+.section sdbca
+.section sdbcb
+.section sdbda
+.section sdbdb
+.section sdbea
+.section sdbeb
+.section sdbfa
+.section sdbfb
+.section sdbga
+.section sdbgb
+.section sdbha
+.section sdbhb
+.section sdbia
+.section sdbib
+.section sdbja
+.section sdbjb
+.section sdbka
+.section sdbkb
+.section sdbla
+.section sdblb
+.section sdbma
+.section sdbmb
+.section sdbna
+.section sdbnb
+.section sdboa
+.section sdbob
+.section sdbpa
+.section sdbpb
+.section sdbqa
+.section sdbqb
+.section sdbra
+.section sdbrb
+.section sdbsa
+.section sdbsb
+.section sdbta
+.section sdbtb
+.section sdbua
+.section sdbub
+.section sdbva
+.section sdbvb
+.section sdbwa
+.section sdbwb
+.section sdbxa
+.section sdbxb
+.section sdbya
+.section sdbyb
+.section sdbza
+.section sdbzb
+.section sdb1a
+.section sdb1b
+.section sdb2a
+.section sdb2b
+.section sdb3a
+.section sdb3b
+.section sdb4a
+.section sdb4b
+.section sdb5a
+.section sdb5b
+.section sdb6a
+.section sdb6b
+.section sdb7a
+.section sdb7b
+.section sdb8a
+.section sdb8b
+.section sdb9a
+.section sdb9b
+.section sdb0a
+.section sdb0b
+.section sdcaa
+.section sdcab
+.section sdcba
+.section sdcbb
+.section sdcca
+.section sdccb
+.section sdcda
+.section sdcdb
+.section sdcea
+.section sdceb
+.section sdcfa
+.section sdcfb
+.section sdcga
+.section sdcgb
+.section sdcha
+.section sdchb
+.section sdcia
+.section sdcib
+.section sdcja
+.section sdcjb
+.section sdcka
+.section sdckb
+.section sdcla
+.section sdclb
+.section sdcma
+.section sdcmb
+.section sdcna
+.section sdcnb
+.section sdcoa
+.section sdcob
+.section sdcpa
+.section sdcpb
+.section sdcqa
+.section sdcqb
+.section sdcra
+.section sdcrb
+.section sdcsa
+.section sdcsb
+.section sdcta
+.section sdctb
+.section sdcua
+.section sdcub
+.section sdcva
+.section sdcvb
+.section sdcwa
+.section sdcwb
+.section sdcxa
+.section sdcxb
+.section sdcya
+.section sdcyb
+.section sdcza
+.section sdczb
+.section sdc1a
+.section sdc1b
+.section sdc2a
+.section sdc2b
+.section sdc3a
+.section sdc3b
+.section sdc4a
+.section sdc4b
+.section sdc5a
+.section sdc5b
+.section sdc6a
+.section sdc6b
+.section sdc7a
+.section sdc7b
+.section sdc8a
+.section sdc8b
+.section sdc9a
+.section sdc9b
+.section sdc0a
+.section sdc0b
+.section sddaa
+.section sddab
+.section sddba
+.section sddbb
+.section sddca
+.section sddcb
+.section sddda
+.section sdddb
+.section sddea
+.section sddeb
+.section sddfa
+.section sddfb
+.section sddga
+.section sddgb
+.section sddha
+.section sddhb
+.section sddia
+.section sddib
+.section sddja
+.section sddjb
+.section sddka
+.section sddkb
+.section sddla
+.section sddlb
+.section sddma
+.section sddmb
+.section sddna
+.section sddnb
+.section sddoa
+.section sddob
+.section sddpa
+.section sddpb
+.section sddqa
+.section sddqb
+.section sddra
+.section sddrb
+.section sddsa
+.section sddsb
+.section sddta
+.section sddtb
+.section sddua
+.section sddub
+.section sddva
+.section sddvb
+.section sddwa
+.section sddwb
+.section sddxa
+.section sddxb
+.section sddya
+.section sddyb
+.section sddza
+.section sddzb
+.section sdd1a
+.section sdd1b
+.section sdd2a
+.section sdd2b
+.section sdd3a
+.section sdd3b
+.section sdd4a
+.section sdd4b
+.section sdd5a
+.section sdd5b
+.section sdd6a
+.section sdd6b
+.section sdd7a
+.section sdd7b
+.section sdd8a
+.section sdd8b
+.section sdd9a
+.section sdd9b
+.section sdd0a
+.section sdd0b
+.section sdeaa
+.section sdeab
+.section sdeba
+.section sdebb
+.section sdeca
+.section sdecb
+.section sdeda
+.section sdedb
+.section sdeea
+.section sdeeb
+.section sdefa
+.section sdefb
+.section sdega
+.section sdegb
+.section sdeha
+.section sdehb
+.section sdeia
+.section sdeib
+.section sdeja
+.section sdejb
+.section sdeka
+.section sdekb
+.section sdela
+.section sdelb
+.section sdema
+.section sdemb
+.section sdena
+.section sdenb
+.section sdeoa
+.section sdeob
+.section sdepa
+.section sdepb
+.section sdeqa
+.section sdeqb
+.section sdera
+.section sderb
+.section sdesa
+.section sdesb
+.section sdeta
+.section sdetb
+.section sdeua
+.section sdeub
+.section sdeva
+.section sdevb
+.section sdewa
+.section sdewb
+.section sdexa
+.section sdexb
+.section sdeya
+.section sdeyb
+.section sdeza
+.section sdezb
+.section sde1a
+.section sde1b
+.section sde2a
+.section sde2b
+.section sde3a
+.section sde3b
+.section sde4a
+.section sde4b
+.section sde5a
+.section sde5b
+.section sde6a
+.section sde6b
+.section sde7a
+.section sde7b
+.section sde8a
+.section sde8b
+.section sde9a
+.section sde9b
+.section sde0a
+.section sde0b
+.section sdfaa
+.section sdfab
+.section sdfba
+.section sdfbb
+.section sdfca
+.section sdfcb
+.section sdfda
+.section sdfdb
+.section sdfea
+.section sdfeb
+.section sdffa
+.section sdffb
+.section sdfga
+.section sdfgb
+.section sdfha
+.section sdfhb
+.section sdfia
+.section sdfib
+.section sdfja
+.section sdfjb
+.section sdfka
+.section sdfkb
+.section sdfla
+.section sdflb
+.section sdfma
+.section sdfmb
+.section sdfna
+.section sdfnb
+.section sdfoa
+.section sdfob
+.section sdfpa
+.section sdfpb
+.section sdfqa
+.section sdfqb
+.section sdfra
+.section sdfrb
+.section sdfsa
+.section sdfsb
+.section sdfta
+.section sdftb
+.section sdfua
+.section sdfub
+.section sdfva
+.section sdfvb
+.section sdfwa
+.section sdfwb
+.section sdfxa
+.section sdfxb
+.section sdfya
+.section sdfyb
+.section sdfza
+.section sdfzb
+.section sdf1a
+.section sdf1b
+.section sdf2a
+.section sdf2b
+.section sdf3a
+.section sdf3b
+.section sdf4a
+.section sdf4b
+.section sdf5a
+.section sdf5b
+.section sdf6a
+.section sdf6b
+.section sdf7a
+.section sdf7b
+.section sdf8a
+.section sdf8b
+.section sdf9a
+.section sdf9b
+.section sdf0a
+.section sdf0b
+.section sdgaa
+.section sdgab
+.section sdgba
+.section sdgbb
+.section sdgca
+.section sdgcb
+.section sdgda
+.section sdgdb
+.section sdgea
+.section sdgeb
+.section sdgfa
+.section sdgfb
+.section sdgga
+.section sdggb
+.section sdgha
+.section sdghb
+.section sdgia
+.section sdgib
+.section sdgja
+.section sdgjb
+.section sdgka
+.section sdgkb
+.section sdgla
+.section sdglb
+.section sdgma
+.section sdgmb
+.section sdgna
+.section sdgnb
+.section sdgoa
+.section sdgob
+.section sdgpa
+.section sdgpb
+.section sdgqa
+.section sdgqb
+.section sdgra
+.section sdgrb
+.section sdgsa
+.section sdgsb
+.section sdgta
+.section sdgtb
+.section sdgua
+.section sdgub
+.section sdgva
+.section sdgvb
+.section sdgwa
+.section sdgwb
+.section sdgxa
+.section sdgxb
+.section sdgya
+.section sdgyb
+.section sdgza
+.section sdgzb
+.section sdg1a
+.section sdg1b
+.section sdg2a
+.section sdg2b
+.section sdg3a
+.section sdg3b
+.section sdg4a
+.section sdg4b
+.section sdg5a
+.section sdg5b
+.section sdg6a
+.section sdg6b
+.section sdg7a
+.section sdg7b
+.section sdg8a
+.section sdg8b
+.section sdg9a
+.section sdg9b
+.section sdg0a
+.section sdg0b
+.section sdhaa
+.section sdhab
+.section sdhba
+.section sdhbb
+.section sdhca
+.section sdhcb
+.section sdhda
+.section sdhdb
+.section sdhea
+.section sdheb
+.section sdhfa
+.section sdhfb
+.section sdhga
+.section sdhgb
+.section sdhha
+.section sdhhb
+.section sdhia
+.section sdhib
+.section sdhja
+.section sdhjb
+.section sdhka
+.section sdhkb
+.section sdhla
+.section sdhlb
+.section sdhma
+.section sdhmb
+.section sdhna
+.section sdhnb
+.section sdhoa
+.section sdhob
+.section sdhpa
+.section sdhpb
+.section sdhqa
+.section sdhqb
+.section sdhra
+.section sdhrb
+.section sdhsa
+.section sdhsb
+.section sdhta
+.section sdhtb
+.section sdhua
+.section sdhub
+.section sdhva
+.section sdhvb
+.section sdhwa
+.section sdhwb
+.section sdhxa
+.section sdhxb
+.section sdhya
+.section sdhyb
+.section sdhza
+.section sdhzb
+.section sdh1a
+.section sdh1b
+.section sdh2a
+.section sdh2b
+.section sdh3a
+.section sdh3b
+.section sdh4a
+.section sdh4b
+.section sdh5a
+.section sdh5b
+.section sdh6a
+.section sdh6b
+.section sdh7a
+.section sdh7b
+.section sdh8a
+.section sdh8b
+.section sdh9a
+.section sdh9b
+.section sdh0a
+.section sdh0b
+.section sdiaa
+.section sdiab
+.section sdiba
+.section sdibb
+.section sdica
+.section sdicb
+.section sdida
+.section sdidb
+.section sdiea
+.section sdieb
+.section sdifa
+.section sdifb
+.section sdiga
+.section sdigb
+.section sdiha
+.section sdihb
+.section sdiia
+.section sdiib
+.section sdija
+.section sdijb
+.section sdika
+.section sdikb
+.section sdila
+.section sdilb
+.section sdima
+.section sdimb
+.section sdina
+.section sdinb
+.section sdioa
+.section sdiob
+.section sdipa
+.section sdipb
+.section sdiqa
+.section sdiqb
+.section sdira
+.section sdirb
+.section sdisa
+.section sdisb
+.section sdita
+.section sditb
+.section sdiua
+.section sdiub
+.section sdiva
+.section sdivb
+.section sdiwa
+.section sdiwb
+.section sdixa
+.section sdixb
+.section sdiya
+.section sdiyb
+.section sdiza
+.section sdizb
+.section sdi1a
+.section sdi1b
+.section sdi2a
+.section sdi2b
+.section sdi3a
+.section sdi3b
+.section sdi4a
+.section sdi4b
+.section sdi5a
+.section sdi5b
+.section sdi6a
+.section sdi6b
+.section sdi7a
+.section sdi7b
+.section sdi8a
+.section sdi8b
+.section sdi9a
+.section sdi9b
+.section sdi0a
+.section sdi0b
+.section sdjaa
+.section sdjab
+.section sdjba
+.section sdjbb
+.section sdjca
+.section sdjcb
+.section sdjda
+.section sdjdb
+.section sdjea
+.section sdjeb
+.section sdjfa
+.section sdjfb
+.section sdjga
+.section sdjgb
+.section sdjha
+.section sdjhb
+.section sdjia
+.section sdjib
+.section sdjja
+.section sdjjb
+.section sdjka
+.section sdjkb
+.section sdjla
+.section sdjlb
+.section sdjma
+.section sdjmb
+.section sdjna
+.section sdjnb
+.section sdjoa
+.section sdjob
+.section sdjpa
+.section sdjpb
+.section sdjqa
+.section sdjqb
+.section sdjra
+.section sdjrb
+.section sdjsa
+.section sdjsb
+.section sdjta
+.section sdjtb
+.section sdjua
+.section sdjub
+.section sdjva
+.section sdjvb
+.section sdjwa
+.section sdjwb
+.section sdjxa
+.section sdjxb
+.section sdjya
+.section sdjyb
+.section sdjza
+.section sdjzb
+.section sdj1a
+.section sdj1b
+.section sdj2a
+.section sdj2b
+.section sdj3a
+.section sdj3b
+.section sdj4a
+.section sdj4b
+.section sdj5a
+.section sdj5b
+.section sdj6a
+.section sdj6b
+.section sdj7a
+.section sdj7b
+.section sdj8a
+.section sdj8b
+.section sdj9a
+.section sdj9b
+.section sdj0a
+.section sdj0b
+.section sdkaa
+.section sdkab
+.section sdkba
+.section sdkbb
+.section sdkca
+.section sdkcb
+.section sdkda
+.section sdkdb
+.section sdkea
+.section sdkeb
+.section sdkfa
+.section sdkfb
+.section sdkga
+.section sdkgb
+.section sdkha
+.section sdkhb
+.section sdkia
+.section sdkib
+.section sdkja
+.section sdkjb
+.section sdkka
+.section sdkkb
+.section sdkla
+.section sdklb
+.section sdkma
+.section sdkmb
+.section sdkna
+.section sdknb
+.section sdkoa
+.section sdkob
+.section sdkpa
+.section sdkpb
+.section sdkqa
+.section sdkqb
+.section sdkra
+.section sdkrb
+.section sdksa
+.section sdksb
+.section sdkta
+.section sdktb
+.section sdkua
+.section sdkub
+.section sdkva
+.section sdkvb
+.section sdkwa
+.section sdkwb
+.section sdkxa
+.section sdkxb
+.section sdkya
+.section sdkyb
+.section sdkza
+.section sdkzb
+.section sdk1a
+.section sdk1b
+.section sdk2a
+.section sdk2b
+.section sdk3a
+.section sdk3b
+.section sdk4a
+.section sdk4b
+.section sdk5a
+.section sdk5b
+.section sdk6a
+.section sdk6b
+.section sdk7a
+.section sdk7b
+.section sdk8a
+.section sdk8b
+.section sdk9a
+.section sdk9b
+.section sdk0a
+.section sdk0b
+.section sdlaa
+.section sdlab
+.section sdlba
+.section sdlbb
+.section sdlca
+.section sdlcb
+.section sdlda
+.section sdldb
+.section sdlea
+.section sdleb
+.section sdlfa
+.section sdlfb
+.section sdlga
+.section sdlgb
+.section sdlha
+.section sdlhb
+.section sdlia
+.section sdlib
+.section sdlja
+.section sdljb
+.section sdlka
+.section sdlkb
+.section sdlla
+.section sdllb
+.section sdlma
+.section sdlmb
+.section sdlna
+.section sdlnb
+.section sdloa
+.section sdlob
+.section sdlpa
+.section sdlpb
+.section sdlqa
+.section sdlqb
+.section sdlra
+.section sdlrb
+.section sdlsa
+.section sdlsb
+.section sdlta
+.section sdltb
+.section sdlua
+.section sdlub
+.section sdlva
+.section sdlvb
+.section sdlwa
+.section sdlwb
+.section sdlxa
+.section sdlxb
+.section sdlya
+.section sdlyb
+.section sdlza
+.section sdlzb
+.section sdl1a
+.section sdl1b
+.section sdl2a
+.section sdl2b
+.section sdl3a
+.section sdl3b
+.section sdl4a
+.section sdl4b
+.section sdl5a
+.section sdl5b
+.section sdl6a
+.section sdl6b
+.section sdl7a
+.section sdl7b
+.section sdl8a
+.section sdl8b
+.section sdl9a
+.section sdl9b
+.section sdl0a
+.section sdl0b
+.section sdmaa
+.section sdmab
+.section sdmba
+.section sdmbb
+.section sdmca
+.section sdmcb
+.section sdmda
+.section sdmdb
+.section sdmea
+.section sdmeb
+.section sdmfa
+.section sdmfb
+.section sdmga
+.section sdmgb
+.section sdmha
+.section sdmhb
+.section sdmia
+.section sdmib
+.section sdmja
+.section sdmjb
+.section sdmka
+.section sdmkb
+.section sdmla
+.section sdmlb
+.section sdmma
+.section sdmmb
+.section sdmna
+.section sdmnb
+.section sdmoa
+.section sdmob
+.section sdmpa
+.section sdmpb
+.section sdmqa
+.section sdmqb
+.section sdmra
+.section sdmrb
+.section sdmsa
+.section sdmsb
+.section sdmta
+.section sdmtb
+.section sdmua
+.section sdmub
+.section sdmva
+.section sdmvb
+.section sdmwa
+.section sdmwb
+.section sdmxa
+.section sdmxb
+.section sdmya
+.section sdmyb
+.section sdmza
+.section sdmzb
+.section sdm1a
+.section sdm1b
+.section sdm2a
+.section sdm2b
+.section sdm3a
+.section sdm3b
+.section sdm4a
+.section sdm4b
+.section sdm5a
+.section sdm5b
+.section sdm6a
+.section sdm6b
+.section sdm7a
+.section sdm7b
+.section sdm8a
+.section sdm8b
+.section sdm9a
+.section sdm9b
+.section sdm0a
+.section sdm0b
+.section sdnaa
+.section sdnab
+.section sdnba
+.section sdnbb
+.section sdnca
+.section sdncb
+.section sdnda
+.section sdndb
+.section sdnea
+.section sdneb
+.section sdnfa
+.section sdnfb
+.section sdnga
+.section sdngb
+.section sdnha
+.section sdnhb
+.section sdnia
+.section sdnib
+.section sdnja
+.section sdnjb
+.section sdnka
+.section sdnkb
+.section sdnla
+.section sdnlb
+.section sdnma
+.section sdnmb
+.section sdnna
+.section sdnnb
+.section sdnoa
+.section sdnob
+.section sdnpa
+.section sdnpb
+.section sdnqa
+.section sdnqb
+.section sdnra
+.section sdnrb
+.section sdnsa
+.section sdnsb
+.section sdnta
+.section sdntb
+.section sdnua
+.section sdnub
+.section sdnva
+.section sdnvb
+.section sdnwa
+.section sdnwb
+.section sdnxa
+.section sdnxb
+.section sdnya
+.section sdnyb
+.section sdnza
+.section sdnzb
+.section sdn1a
+.section sdn1b
+.section sdn2a
+.section sdn2b
+.section sdn3a
+.section sdn3b
+.section sdn4a
+.section sdn4b
+.section sdn5a
+.section sdn5b
+.section sdn6a
+.section sdn6b
+.section sdn7a
+.section sdn7b
+.section sdn8a
+.section sdn8b
+.section sdn9a
+.section sdn9b
+.section sdn0a
+.section sdn0b
+.section sdoaa
+.section sdoab
+.section sdoba
+.section sdobb
+.section sdoca
+.section sdocb
+.section sdoda
+.section sdodb
+.section sdoea
+.section sdoeb
+.section sdofa
+.section sdofb
+.section sdoga
+.section sdogb
+.section sdoha
+.section sdohb
+.section sdoia
+.section sdoib
+.section sdoja
+.section sdojb
+.section sdoka
+.section sdokb
+.section sdola
+.section sdolb
+.section sdoma
+.section sdomb
+.section sdona
+.section sdonb
+.section sdooa
+.section sdoob
+.section sdopa
+.section sdopb
+.section sdoqa
+.section sdoqb
+.section sdora
+.section sdorb
+.section sdosa
+.section sdosb
+.section sdota
+.section sdotb
+.section sdoua
+.section sdoub
+.section sdova
+.section sdovb
+.section sdowa
+.section sdowb
+.section sdoxa
+.section sdoxb
+.section sdoya
+.section sdoyb
+.section sdoza
+.section sdozb
+.section sdo1a
+.section sdo1b
+.section sdo2a
+.section sdo2b
+.section sdo3a
+.section sdo3b
+.section sdo4a
+.section sdo4b
+.section sdo5a
+.section sdo5b
+.section sdo6a
+.section sdo6b
+.section sdo7a
+.section sdo7b
+.section sdo8a
+.section sdo8b
+.section sdo9a
+.section sdo9b
+.section sdo0a
+.section sdo0b
+.section sdpaa
+.section sdpab
+.section sdpba
+.section sdpbb
+.section sdpca
+.section sdpcb
+.section sdpda
+.section sdpdb
+.section sdpea
+.section sdpeb
+.section sdpfa
+.section sdpfb
+.section sdpga
+.section sdpgb
+.section sdpha
+.section sdphb
+.section sdpia
+.section sdpib
+.section sdpja
+.section sdpjb
+.section sdpka
+.section sdpkb
+.section sdpla
+.section sdplb
+.section sdpma
+.section sdpmb
+.section sdpna
+.section sdpnb
+.section sdpoa
+.section sdpob
+.section sdppa
+.section sdppb
+.section sdpqa
+.section sdpqb
+.section sdpra
+.section sdprb
+.section sdpsa
+.section sdpsb
+.section sdpta
+.section sdptb
+.section sdpua
+.section sdpub
+.section sdpva
+.section sdpvb
+.section sdpwa
+.section sdpwb
+.section sdpxa
+.section sdpxb
+.section sdpya
+.section sdpyb
+.section sdpza
+.section sdpzb
+.section sdp1a
+.section sdp1b
+.section sdp2a
+.section sdp2b
+.section sdp3a
+.section sdp3b
+.section sdp4a
+.section sdp4b
+.section sdp5a
+.section sdp5b
+.section sdp6a
+.section sdp6b
+.section sdp7a
+.section sdp7b
+.section sdp8a
+.section sdp8b
+.section sdp9a
+.section sdp9b
+.section sdp0a
+.section sdp0b
+.section sdqaa
+.section sdqab
+.section sdqba
+.section sdqbb
+.section sdqca
+.section sdqcb
+.section sdqda
+.section sdqdb
+.section sdqea
+.section sdqeb
+.section sdqfa
+.section sdqfb
+.section sdqga
+.section sdqgb
+.section sdqha
+.section sdqhb
+.section sdqia
+.section sdqib
+.section sdqja
+.section sdqjb
+.section sdqka
+.section sdqkb
+.section sdqla
+.section sdqlb
+.section sdqma
+.section sdqmb
+.section sdqna
+.section sdqnb
+.section sdqoa
+.section sdqob
+.section sdqpa
+.section sdqpb
+.section sdqqa
+.section sdqqb
+.section sdqra
+.section sdqrb
+.section sdqsa
+.section sdqsb
+.section sdqta
+.section sdqtb
+.section sdqua
+.section sdqub
+.section sdqva
+.section sdqvb
+.section sdqwa
+.section sdqwb
+.section sdqxa
+.section sdqxb
+.section sdqya
+.section sdqyb
+.section sdqza
+.section sdqzb
+.section sdq1a
+.section sdq1b
+.section sdq2a
+.section sdq2b
+.section sdq3a
+.section sdq3b
+.section sdq4a
+.section sdq4b
+.section sdq5a
+.section sdq5b
+.section sdq6a
+.section sdq6b
+.section sdq7a
+.section sdq7b
+.section sdq8a
+.section sdq8b
+.section sdq9a
+.section sdq9b
+.section sdq0a
+.section sdq0b
+.section sdraa
+.section sdrab
+.section sdrba
+.section sdrbb
+.section sdrca
+.section sdrcb
+.section sdrda
+.section sdrdb
+.section sdrea
+.section sdreb
+.section sdrfa
+.section sdrfb
+.section sdrga
+.section sdrgb
+.section sdrha
+.section sdrhb
+.section sdria
+.section sdrib
+.section sdrja
+.section sdrjb
+.section sdrka
+.section sdrkb
+.section sdrla
+.section sdrlb
+.section sdrma
+.section sdrmb
+.section sdrna
+.section sdrnb
+.section sdroa
+.section sdrob
+.section sdrpa
+.section sdrpb
+.section sdrqa
+.section sdrqb
+.section sdrra
+.section sdrrb
+.section sdrsa
+.section sdrsb
+.section sdrta
+.section sdrtb
+.section sdrua
+.section sdrub
+.section sdrva
+.section sdrvb
+.section sdrwa
+.section sdrwb
+.section sdrxa
+.section sdrxb
+.section sdrya
+.section sdryb
+.section sdrza
+.section sdrzb
+.section sdr1a
+.section sdr1b
+.section sdr2a
+.section sdr2b
+.section sdr3a
+.section sdr3b
+.section sdr4a
+.section sdr4b
+.section sdr5a
+.section sdr5b
+.section sdr6a
+.section sdr6b
+.section sdr7a
+.section sdr7b
+.section sdr8a
+.section sdr8b
+.section sdr9a
+.section sdr9b
+.section sdr0a
+.section sdr0b
+.section sdsaa
+.section sdsab
+.section sdsba
+.section sdsbb
+.section sdsca
+.section sdscb
+.section sdsda
+.section sdsdb
+.section sdsea
+.section sdseb
+.section sdsfa
+.section sdsfb
+.section sdsga
+.section sdsgb
+.section sdsha
+.section sdshb
+.section sdsia
+.section sdsib
+.section sdsja
+.section sdsjb
+.section sdska
+.section sdskb
+.section sdsla
+.section sdslb
+.section sdsma
+.section sdsmb
+.section sdsna
+.section sdsnb
+.section sdsoa
+.section sdsob
+.section sdspa
+.section sdspb
+.section sdsqa
+.section sdsqb
+.section sdsra
+.section sdsrb
+.section sdssa
+.section sdssb
+.section sdsta
+.section sdstb
+.section sdsua
+.section sdsub
+.section sdsva
+.section sdsvb
+.section sdswa
+.section sdswb
+.section sdsxa
+.section sdsxb
+.section sdsya
+.section sdsyb
+.section sdsza
+.section sdszb
+.section sds1a
+.section sds1b
+.section sds2a
+.section sds2b
+.section sds3a
+.section sds3b
+.section sds4a
+.section sds4b
+.section sds5a
+.section sds5b
+.section sds6a
+.section sds6b
+.section sds7a
+.section sds7b
+.section sds8a
+.section sds8b
+.section sds9a
+.section sds9b
+.section sds0a
+.section sds0b
+.section sdtaa
+.section sdtab
+.section sdtba
+.section sdtbb
+.section sdtca
+.section sdtcb
+.section sdtda
+.section sdtdb
+.section sdtea
+.section sdteb
+.section sdtfa
+.section sdtfb
+.section sdtga
+.section sdtgb
+.section sdtha
+.section sdthb
+.section sdtia
+.section sdtib
+.section sdtja
+.section sdtjb
+.section sdtka
+.section sdtkb
+.section sdtla
+.section sdtlb
+.section sdtma
+.section sdtmb
+.section sdtna
+.section sdtnb
+.section sdtoa
+.section sdtob
+.section sdtpa
+.section sdtpb
+.section sdtqa
+.section sdtqb
+.section sdtra
+.section sdtrb
+.section sdtsa
+.section sdtsb
+.section sdtta
+.section sdttb
+.section sdtua
+.section sdtub
+.section sdtva
+.section sdtvb
+.section sdtwa
+.section sdtwb
+.section sdtxa
+.section sdtxb
+.section sdtya
+.section sdtyb
+.section sdtza
+.section sdtzb
+.section sdt1a
+.section sdt1b
+.section sdt2a
+.section sdt2b
+.section sdt3a
+.section sdt3b
+.section sdt4a
+.section sdt4b
+.section sdt5a
+.section sdt5b
+.section sdt6a
+.section sdt6b
+.section sdt7a
+.section sdt7b
+.section sdt8a
+.section sdt8b
+.section sdt9a
+.section sdt9b
+.section sdt0a
+.section sdt0b
+.section sduaa
+.section sduab
+.section sduba
+.section sdubb
+.section sduca
+.section sducb
+.section sduda
+.section sdudb
+.section sduea
+.section sdueb
+.section sdufa
+.section sdufb
+.section sduga
+.section sdugb
+.section sduha
+.section sduhb
+.section sduia
+.section sduib
+.section sduja
+.section sdujb
+.section sduka
+.section sdukb
+.section sdula
+.section sdulb
+.section sduma
+.section sdumb
+.section sduna
+.section sdunb
+.section sduoa
+.section sduob
+.section sdupa
+.section sdupb
+.section sduqa
+.section sduqb
+.section sdura
+.section sdurb
+.section sdusa
+.section sdusb
+.section sduta
+.section sdutb
+.section sduua
+.section sduub
+.section sduva
+.section sduvb
+.section sduwa
+.section sduwb
+.section sduxa
+.section sduxb
+.section sduya
+.section sduyb
+.section sduza
+.section sduzb
+.section sdu1a
+.section sdu1b
+.section sdu2a
+.section sdu2b
+.section sdu3a
+.section sdu3b
+.section sdu4a
+.section sdu4b
+.section sdu5a
+.section sdu5b
+.section sdu6a
+.section sdu6b
+.section sdu7a
+.section sdu7b
+.section sdu8a
+.section sdu8b
+.section sdu9a
+.section sdu9b
+.section sdu0a
+.section sdu0b
+.section sdvaa
+.section sdvab
+.section sdvba
+.section sdvbb
+.section sdvca
+.section sdvcb
+.section sdvda
+.section sdvdb
+.section sdvea
+.section sdveb
+.section sdvfa
+.section sdvfb
+.section sdvga
+.section sdvgb
+.section sdvha
+.section sdvhb
+.section sdvia
+.section sdvib
+.section sdvja
+.section sdvjb
+.section sdvka
+.section sdvkb
+.section sdvla
+.section sdvlb
+.section sdvma
+.section sdvmb
+.section sdvna
+.section sdvnb
+.section sdvoa
+.section sdvob
+.section sdvpa
+.section sdvpb
+.section sdvqa
+.section sdvqb
+.section sdvra
+.section sdvrb
+.section sdvsa
+.section sdvsb
+.section sdvta
+.section sdvtb
+.section sdvua
+.section sdvub
+.section sdvva
+.section sdvvb
+.section sdvwa
+.section sdvwb
+.section sdvxa
+.section sdvxb
+.section sdvya
+.section sdvyb
+.section sdvza
+.section sdvzb
+.section sdv1a
+.section sdv1b
+.section sdv2a
+.section sdv2b
+.section sdv3a
+.section sdv3b
+.section sdv4a
+.section sdv4b
+.section sdv5a
+.section sdv5b
+.section sdv6a
+.section sdv6b
+.section sdv7a
+.section sdv7b
+.section sdv8a
+.section sdv8b
+.section sdv9a
+.section sdv9b
+.section sdv0a
+.section sdv0b
+.section sdwaa
+.section sdwab
+.section sdwba
+.section sdwbb
+.section sdwca
+.section sdwcb
+.section sdwda
+.section sdwdb
+.section sdwea
+.section sdweb
+.section sdwfa
+.section sdwfb
+.section sdwga
+.section sdwgb
+.section sdwha
+.section sdwhb
+.section sdwia
+.section sdwib
+.section sdwja
+.section sdwjb
+.section sdwka
+.section sdwkb
+.section sdwla
+.section sdwlb
+.section sdwma
+.section sdwmb
+.section sdwna
+.section sdwnb
+.section sdwoa
+.section sdwob
+.section sdwpa
+.section sdwpb
+.section sdwqa
+.section sdwqb
+.section sdwra
+.section sdwrb
+.section sdwsa
+.section sdwsb
+.section sdwta
+.section sdwtb
+.section sdwua
+.section sdwub
+.section sdwva
+.section sdwvb
+.section sdwwa
+.section sdwwb
+.section sdwxa
+.section sdwxb
+.section sdwya
+.section sdwyb
+.section sdwza
+.section sdwzb
+.section sdw1a
+.section sdw1b
+.section sdw2a
+.section sdw2b
+.section sdw3a
+.section sdw3b
+.section sdw4a
+.section sdw4b
+.section sdw5a
+.section sdw5b
+.section sdw6a
+.section sdw6b
+.section sdw7a
+.section sdw7b
+.section sdw8a
+.section sdw8b
+.section sdw9a
+.section sdw9b
+.section sdw0a
+.section sdw0b
+.section sdxaa
+.section sdxab
+.section sdxba
+.section sdxbb
+.section sdxca
+.section sdxcb
+.section sdxda
+.section sdxdb
+.section sdxea
+.section sdxeb
+.section sdxfa
+.section sdxfb
+.section sdxga
+.section sdxgb
+.section sdxha
+.section sdxhb
+.section sdxia
+.section sdxib
+.section sdxja
+.section sdxjb
+.section sdxka
+.section sdxkb
+.section sdxla
+.section sdxlb
+.section sdxma
+.section sdxmb
+.section sdxna
+.section sdxnb
+.section sdxoa
+.section sdxob
+.section sdxpa
+.section sdxpb
+.section sdxqa
+.section sdxqb
+.section sdxra
+.section sdxrb
+.section sdxsa
+.section sdxsb
+.section sdxta
+.section sdxtb
+.section sdxua
+.section sdxub
+.section sdxva
+.section sdxvb
+.section sdxwa
+.section sdxwb
+.section sdxxa
+.section sdxxb
+.section sdxya
+.section sdxyb
+.section sdxza
+.section sdxzb
+.section sdx1a
+.section sdx1b
+.section sdx2a
+.section sdx2b
+.section sdx3a
+.section sdx3b
+.section sdx4a
+.section sdx4b
+.section sdx5a
+.section sdx5b
+.section sdx6a
+.section sdx6b
+.section sdx7a
+.section sdx7b
+.section sdx8a
+.section sdx8b
+.section sdx9a
+.section sdx9b
+.section sdx0a
+.section sdx0b
+.section sdyaa
+.section sdyab
+.section sdyba
+.section sdybb
+.section sdyca
+.section sdycb
+.section sdyda
+.section sdydb
+.section sdyea
+.section sdyeb
+.section sdyfa
+.section sdyfb
+.section sdyga
+.section sdygb
+.section sdyha
+.section sdyhb
+.section sdyia
+.section sdyib
+.section sdyja
+.section sdyjb
+.section sdyka
+.section sdykb
+.section sdyla
+.section sdylb
+.section sdyma
+.section sdymb
+.section sdyna
+.section sdynb
+.section sdyoa
+.section sdyob
+.section sdypa
+.section sdypb
+.section sdyqa
+.section sdyqb
+.section sdyra
+.section sdyrb
+.section sdysa
+.section sdysb
+.section sdyta
+.section sdytb
+.section sdyua
+.section sdyub
+.section sdyva
+.section sdyvb
+.section sdywa
+.section sdywb
+.section sdyxa
+.section sdyxb
+.section sdyya
+.section sdyyb
+.section sdyza
+.section sdyzb
+.section sdy1a
+.section sdy1b
+.section sdy2a
+.section sdy2b
+.section sdy3a
+.section sdy3b
+.section sdy4a
+.section sdy4b
+.section sdy5a
+.section sdy5b
+.section sdy6a
+.section sdy6b
+.section sdy7a
+.section sdy7b
+.section sdy8a
+.section sdy8b
+.section sdy9a
+.section sdy9b
+.section sdy0a
+.section sdy0b
+.section sdzaa
+.section sdzab
+.section sdzba
+.section sdzbb
+.section sdzca
+.section sdzcb
+.section sdzda
+.section sdzdb
+.section sdzea
+.section sdzeb
+.section sdzfa
+.section sdzfb
+.section sdzga
+.section sdzgb
+.section sdzha
+.section sdzhb
+.section sdzia
+.section sdzib
+.section sdzja
+.section sdzjb
+.section sdzka
+.section sdzkb
+.section sdzla
+.section sdzlb
+.section sdzma
+.section sdzmb
+.section sdzna
+.section sdznb
+.section sdzoa
+.section sdzob
+.section sdzpa
+.section sdzpb
+.section sdzqa
+.section sdzqb
+.section sdzra
+.section sdzrb
+.section sdzsa
+.section sdzsb
+.section sdzta
+.section sdztb
+.section sdzua
+.section sdzub
+.section sdzva
+.section sdzvb
+.section sdzwa
+.section sdzwb
+.section sdzxa
+.section sdzxb
+.section sdzya
+.section sdzyb
+.section sdzza
+.section sdzzb
+.section sdz1a
+.section sdz1b
+.section sdz2a
+.section sdz2b
+.section sdz3a
+.section sdz3b
+.section sdz4a
+.section sdz4b
+.section sdz5a
+.section sdz5b
+.section sdz6a
+.section sdz6b
+.section sdz7a
+.section sdz7b
+.section sdz8a
+.section sdz8b
+.section sdz9a
+.section sdz9b
+.section sdz0a
+.section sdz0b
+.section sd1aa
+.section sd1ab
+.section sd1ba
+.section sd1bb
+.section sd1ca
+.section sd1cb
+.section sd1da
+.section sd1db
+.section sd1ea
+.section sd1eb
+.section sd1fa
+.section sd1fb
+.section sd1ga
+.section sd1gb
+.section sd1ha
+.section sd1hb
+.section sd1ia
+.section sd1ib
+.section sd1ja
+.section sd1jb
+.section sd1ka
+.section sd1kb
+.section sd1la
+.section sd1lb
+.section sd1ma
+.section sd1mb
+.section sd1na
+.section sd1nb
+.section sd1oa
+.section sd1ob
+.section sd1pa
+.section sd1pb
+.section sd1qa
+.section sd1qb
+.section sd1ra
+.section sd1rb
+.section sd1sa
+.section sd1sb
+.section sd1ta
+.section sd1tb
+.section sd1ua
+.section sd1ub
+.section sd1va
+.section sd1vb
+.section sd1wa
+.section sd1wb
+.section sd1xa
+.section sd1xb
+.section sd1ya
+.section sd1yb
+.section sd1za
+.section sd1zb
+.section sd11a
+.section sd11b
+.section sd12a
+.section sd12b
+.section sd13a
+.section sd13b
+.section sd14a
+.section sd14b
+.section sd15a
+.section sd15b
+.section sd16a
+.section sd16b
+.section sd17a
+.section sd17b
+.section sd18a
+.section sd18b
+.section sd19a
+.section sd19b
+.section sd10a
+.section sd10b
+.section sd2aa
+.section sd2ab
+.section sd2ba
+.section sd2bb
+.section sd2ca
+.section sd2cb
+.section sd2da
+.section sd2db
+.section sd2ea
+.section sd2eb
+.section sd2fa
+.section sd2fb
+.section sd2ga
+.section sd2gb
+.section sd2ha
+.section sd2hb
+.section sd2ia
+.section sd2ib
+.section sd2ja
+.section sd2jb
+.section sd2ka
+.section sd2kb
+.section sd2la
+.section sd2lb
+.section sd2ma
+.section sd2mb
+.section sd2na
+.section sd2nb
+.section sd2oa
+.section sd2ob
+.section sd2pa
+.section sd2pb
+.section sd2qa
+.section sd2qb
+.section sd2ra
+.section sd2rb
+.section sd2sa
+.section sd2sb
+.section sd2ta
+.section sd2tb
+.section sd2ua
+.section sd2ub
+.section sd2va
+.section sd2vb
+.section sd2wa
+.section sd2wb
+.section sd2xa
+.section sd2xb
+.section sd2ya
+.section sd2yb
+.section sd2za
+.section sd2zb
+.section sd21a
+.section sd21b
+.section sd22a
+.section sd22b
+.section sd23a
+.section sd23b
+.section sd24a
+.section sd24b
+.section sd25a
+.section sd25b
+.section sd26a
+.section sd26b
+.section sd27a
+.section sd27b
+.section sd28a
+.section sd28b
+.section sd29a
+.section sd29b
+.section sd20a
+.section sd20b
+.section sd3aa
+.section sd3ab
+.section sd3ba
+.section sd3bb
+.section sd3ca
+.section sd3cb
+.section sd3da
+.section sd3db
+.section sd3ea
+.section sd3eb
+.section sd3fa
+.section sd3fb
+.section sd3ga
+.section sd3gb
+.section sd3ha
+.section sd3hb
+.section sd3ia
+.section sd3ib
+.section sd3ja
+.section sd3jb
+.section sd3ka
+.section sd3kb
+.section sd3la
+.section sd3lb
+.section sd3ma
+.section sd3mb
+.section sd3na
+.section sd3nb
+.section sd3oa
+.section sd3ob
+.section sd3pa
+.section sd3pb
+.section sd3qa
+.section sd3qb
+.section sd3ra
+.section sd3rb
+.section sd3sa
+.section sd3sb
+.section sd3ta
+.section sd3tb
+.section sd3ua
+.section sd3ub
+.section sd3va
+.section sd3vb
+.section sd3wa
+.section sd3wb
+.section sd3xa
+.section sd3xb
+.section sd3ya
+.section sd3yb
+.section sd3za
+.section sd3zb
+.section sd31a
+.section sd31b
+.section sd32a
+.section sd32b
+.section sd33a
+.section sd33b
+.section sd34a
+.section sd34b
+.section sd35a
+.section sd35b
+.section sd36a
+.section sd36b
+.section sd37a
+.section sd37b
+.section sd38a
+.section sd38b
+.section sd39a
+.section sd39b
+.section sd30a
+.section sd30b
+.section sd4aa
+.section sd4ab
+.section sd4ba
+.section sd4bb
+.section sd4ca
+.section sd4cb
+.section sd4da
+.section sd4db
+.section sd4ea
+.section sd4eb
+.section sd4fa
+.section sd4fb
+.section sd4ga
+.section sd4gb
+.section sd4ha
+.section sd4hb
+.section sd4ia
+.section sd4ib
+.section sd4ja
+.section sd4jb
+.section sd4ka
+.section sd4kb
+.section sd4la
+.section sd4lb
+.section sd4ma
+.section sd4mb
+.section sd4na
+.section sd4nb
+.section sd4oa
+.section sd4ob
+.section sd4pa
+.section sd4pb
+.section sd4qa
+.section sd4qb
+.section sd4ra
+.section sd4rb
+.section sd4sa
+.section sd4sb
+.section sd4ta
+.section sd4tb
+.section sd4ua
+.section sd4ub
+.section sd4va
+.section sd4vb
+.section sd4wa
+.section sd4wb
+.section sd4xa
+.section sd4xb
+.section sd4ya
+.section sd4yb
+.section sd4za
+.section sd4zb
+.section sd41a
+.section sd41b
+.section sd42a
+.section sd42b
+.section sd43a
+.section sd43b
+.section sd44a
+.section sd44b
+.section sd45a
+.section sd45b
+.section sd46a
+.section sd46b
+.section sd47a
+.section sd47b
+.section sd48a
+.section sd48b
+.section sd49a
+.section sd49b
+.section sd40a
+.section sd40b
+.section sd5aa
+.section sd5ab
+.section sd5ba
+.section sd5bb
+.section sd5ca
+.section sd5cb
+.section sd5da
+.section sd5db
+.section sd5ea
+.section sd5eb
+.section sd5fa
+.section sd5fb
+.section sd5ga
+.section sd5gb
+.section sd5ha
+.section sd5hb
+.section sd5ia
+.section sd5ib
+.section sd5ja
+.section sd5jb
+.section sd5ka
+.section sd5kb
+.section sd5la
+.section sd5lb
+.section sd5ma
+.section sd5mb
+.section sd5na
+.section sd5nb
+.section sd5oa
+.section sd5ob
+.section sd5pa
+.section sd5pb
+.section sd5qa
+.section sd5qb
+.section sd5ra
+.section sd5rb
+.section sd5sa
+.section sd5sb
+.section sd5ta
+.section sd5tb
+.section sd5ua
+.section sd5ub
+.section sd5va
+.section sd5vb
+.section sd5wa
+.section sd5wb
+.section sd5xa
+.section sd5xb
+.section sd5ya
+.section sd5yb
+.section sd5za
+.section sd5zb
+.section sd51a
+.section sd51b
+.section sd52a
+.section sd52b
+.section sd53a
+.section sd53b
+.section sd54a
+.section sd54b
+.section sd55a
+.section sd55b
+.section sd56a
+.section sd56b
+.section sd57a
+.section sd57b
+.section sd58a
+.section sd58b
+.section sd59a
+.section sd59b
+.section sd50a
+.section sd50b
+.section sd6aa
+.section sd6ab
+.section sd6ba
+.section sd6bb
+.section sd6ca
+.section sd6cb
+.section sd6da
+.section sd6db
+.section sd6ea
+.section sd6eb
+.section sd6fa
+.section sd6fb
+.section sd6ga
+.section sd6gb
+.section sd6ha
+.section sd6hb
+.section sd6ia
+.section sd6ib
+.section sd6ja
+.section sd6jb
+.section sd6ka
+.section sd6kb
+.section sd6la
+.section sd6lb
+.section sd6ma
+.section sd6mb
+.section sd6na
+.section sd6nb
+.section sd6oa
+.section sd6ob
+.section sd6pa
+.section sd6pb
+.section sd6qa
+.section sd6qb
+.section sd6ra
+.section sd6rb
+.section sd6sa
+.section sd6sb
+.section sd6ta
+.section sd6tb
+.section sd6ua
+.section sd6ub
+.section sd6va
+.section sd6vb
+.section sd6wa
+.section sd6wb
+.section sd6xa
+.section sd6xb
+.section sd6ya
+.section sd6yb
+.section sd6za
+.section sd6zb
+.section sd61a
+.section sd61b
+.section sd62a
+.section sd62b
+.section sd63a
+.section sd63b
+.section sd64a
+.section sd64b
+.section sd65a
+.section sd65b
+.section sd66a
+.section sd66b
+.section sd67a
+.section sd67b
+.section sd68a
+.section sd68b
+.section sd69a
+.section sd69b
+.section sd60a
+.section sd60b
+.section sd7aa
+.section sd7ab
+.section sd7ba
+.section sd7bb
+.section sd7ca
+.section sd7cb
+.section sd7da
+.section sd7db
+.section sd7ea
+.section sd7eb
+.section sd7fa
+.section sd7fb
+.section sd7ga
+.section sd7gb
+.section sd7ha
+.section sd7hb
+.section sd7ia
+.section sd7ib
+.section sd7ja
+.section sd7jb
+.section sd7ka
+.section sd7kb
+.section sd7la
+.section sd7lb
+.section sd7ma
+.section sd7mb
+.section sd7na
+.section sd7nb
+.section sd7oa
+.section sd7ob
+.section sd7pa
+.section sd7pb
+.section sd7qa
+.section sd7qb
+.section sd7ra
+.section sd7rb
+.section sd7sa
+.section sd7sb
+.section sd7ta
+.section sd7tb
+.section sd7ua
+.section sd7ub
+.section sd7va
+.section sd7vb
+.section sd7wa
+.section sd7wb
+.section sd7xa
+.section sd7xb
+.section sd7ya
+.section sd7yb
+.section sd7za
+.section sd7zb
+.section sd71a
+.section sd71b
+.section sd72a
+.section sd72b
+.section sd73a
+.section sd73b
+.section sd74a
+.section sd74b
+.section sd75a
+.section sd75b
+.section sd76a
+.section sd76b
+.section sd77a
+.section sd77b
+.section sd78a
+.section sd78b
+.section sd79a
+.section sd79b
+.section sd70a
+.section sd70b
+.section sd8aa
+.section sd8ab
+.section sd8ba
+.section sd8bb
+.section sd8ca
+.section sd8cb
+.section sd8da
+.section sd8db
+.section sd8ea
+.section sd8eb
+.section sd8fa
+.section sd8fb
+.section sd8ga
+.section sd8gb
+.section sd8ha
+.section sd8hb
+.section sd8ia
+.section sd8ib
+.section sd8ja
+.section sd8jb
+.section sd8ka
+.section sd8kb
+.section sd8la
+.section sd8lb
+.section sd8ma
+.section sd8mb
+.section sd8na
+.section sd8nb
+.section sd8oa
+.section sd8ob
+.section sd8pa
+.section sd8pb
+.section sd8qa
+.section sd8qb
+.section sd8ra
+.section sd8rb
+.section sd8sa
+.section sd8sb
+.section sd8ta
+.section sd8tb
+.section sd8ua
+.section sd8ub
+.section sd8va
+.section sd8vb
+.section sd8wa
+.section sd8wb
+.section sd8xa
+.section sd8xb
+.section sd8ya
+.section sd8yb
+.section sd8za
+.section sd8zb
+.section sd81a
+.section sd81b
+.section sd82a
+.section sd82b
+.section sd83a
+.section sd83b
+.section sd84a
+.section sd84b
+.section sd85a
+.section sd85b
+.section sd86a
+.section sd86b
+.section sd87a
+.section sd87b
+.section sd88a
+.section sd88b
+.section sd89a
+.section sd89b
+.section sd80a
+.section sd80b
+.section sd9aa
+.section sd9ab
+.section sd9ba
+.section sd9bb
+.section sd9ca
+.section sd9cb
+.section sd9da
+.section sd9db
+.section sd9ea
+.section sd9eb
+.section sd9fa
+.section sd9fb
+.section sd9ga
+.section sd9gb
+.section sd9ha
+.section sd9hb
+.section sd9ia
+.section sd9ib
+.section sd9ja
+.section sd9jb
+.section sd9ka
+.section sd9kb
+.section sd9la
+.section sd9lb
+.section sd9ma
+.section sd9mb
+.section sd9na
+.section sd9nb
+.section sd9oa
+.section sd9ob
+.section sd9pa
+.section sd9pb
+.section sd9qa
+.section sd9qb
+.section sd9ra
+.section sd9rb
+.section sd9sa
+.section sd9sb
+.section sd9ta
+.section sd9tb
+.section sd9ua
+.section sd9ub
+.section sd9va
+.section sd9vb
+.section sd9wa
+.section sd9wb
+.section sd9xa
+.section sd9xb
+.section sd9ya
+.section sd9yb
+.section sd9za
+.section sd9zb
+.section sd91a
+.section sd91b
+.section sd92a
+.section sd92b
+.section sd93a
+.section sd93b
+.section sd94a
+.section sd94b
+.section sd95a
+.section sd95b
+.section sd96a
+.section sd96b
+.section sd97a
+.section sd97b
+.section sd98a
+.section sd98b
+.section sd99a
+.section sd99b
+.section sd90a
+.section sd90b
+.section sd0aa
+.section sd0ab
+.section sd0ba
+.section sd0bb
+.section sd0ca
+.section sd0cb
+.section sd0da
+.section sd0db
+.section sd0ea
+.section sd0eb
+.section sd0fa
+.section sd0fb
+.section sd0ga
+.section sd0gb
+.section sd0ha
+.section sd0hb
+.section sd0ia
+.section sd0ib
+.section sd0ja
+.section sd0jb
+.section sd0ka
+.section sd0kb
+.section sd0la
+.section sd0lb
+.section sd0ma
+.section sd0mb
+.section sd0na
+.section sd0nb
+.section sd0oa
+.section sd0ob
+.section sd0pa
+.section sd0pb
+.section sd0qa
+.section sd0qb
+.section sd0ra
+.section sd0rb
+.section sd0sa
+.section sd0sb
+.section sd0ta
+.section sd0tb
+.section sd0ua
+.section sd0ub
+.section sd0va
+.section sd0vb
+.section sd0wa
+.section sd0wb
+.section sd0xa
+.section sd0xb
+.section sd0ya
+.section sd0yb
+.section sd0za
+.section sd0zb
+.section sd01a
+.section sd01b
+.section sd02a
+.section sd02b
+.section sd03a
+.section sd03b
+.section sd04a
+.section sd04b
+.section sd05a
+.section sd05b
+.section sd06a
+.section sd06b
+.section sd07a
+.section sd07b
+.section sd08a
+.section sd08b
+.section sd09a
+.section sd09b
+.section sd00a
+.section sd00b
+.section seaaa
+.section seaab
+.section seaba
+.section seabb
+.section seaca
+.section seacb
+.section seada
+.section seadb
+.section seaea
+.section seaeb
+.section seafa
+.section seafb
+.section seaga
+.section seagb
+.section seaha
+.section seahb
+.section seaia
+.section seaib
+.section seaja
+.section seajb
+.section seaka
+.section seakb
+.section seala
+.section sealb
+.section seama
+.section seamb
+.section seana
+.section seanb
+.section seaoa
+.section seaob
+.section seapa
+.section seapb
+.section seaqa
+.section seaqb
+.section seara
+.section searb
+.section seasa
+.section seasb
+.section seata
+.section seatb
+.section seaua
+.section seaub
+.section seava
+.section seavb
+.section seawa
+.section seawb
+.section seaxa
+.section seaxb
+.section seaya
+.section seayb
+.section seaza
+.section seazb
+.section sea1a
+.section sea1b
+.section sea2a
+.section sea2b
+.section sea3a
+.section sea3b
+.section sea4a
+.section sea4b
+.section sea5a
+.section sea5b
+.section sea6a
+.section sea6b
+.section sea7a
+.section sea7b
+.section sea8a
+.section sea8b
+.section sea9a
+.section sea9b
+.section sea0a
+.section sea0b
+.section sebaa
+.section sebab
+.section sebba
+.section sebbb
+.section sebca
+.section sebcb
+.section sebda
+.section sebdb
+.section sebea
+.section sebeb
+.section sebfa
+.section sebfb
+.section sebga
+.section sebgb
+.section sebha
+.section sebhb
+.section sebia
+.section sebib
+.section sebja
+.section sebjb
+.section sebka
+.section sebkb
+.section sebla
+.section seblb
+.section sebma
+.section sebmb
+.section sebna
+.section sebnb
+.section seboa
+.section sebob
+.section sebpa
+.section sebpb
+.section sebqa
+.section sebqb
+.section sebra
+.section sebrb
+.section sebsa
+.section sebsb
+.section sebta
+.section sebtb
+.section sebua
+.section sebub
+.section sebva
+.section sebvb
+.section sebwa
+.section sebwb
+.section sebxa
+.section sebxb
+.section sebya
+.section sebyb
+.section sebza
+.section sebzb
+.section seb1a
+.section seb1b
+.section seb2a
+.section seb2b
+.section seb3a
+.section seb3b
+.section seb4a
+.section seb4b
+.section seb5a
+.section seb5b
+.section seb6a
+.section seb6b
+.section seb7a
+.section seb7b
+.section seb8a
+.section seb8b
+.section seb9a
+.section seb9b
+.section seb0a
+.section seb0b
+.section secaa
+.section secab
+.section secba
+.section secbb
+.section secca
+.section seccb
+.section secda
+.section secdb
+.section secea
+.section seceb
+.section secfa
+.section secfb
+.section secga
+.section secgb
+.section secha
+.section sechb
+.section secia
+.section secib
+.section secja
+.section secjb
+.section secka
+.section seckb
+.section secla
+.section seclb
+.section secma
+.section secmb
+.section secna
+.section secnb
+.section secoa
+.section secob
+.section secpa
+.section secpb
+.section secqa
+.section secqb
+.section secra
+.section secrb
+.section secsa
+.section secsb
+.section secta
+.section sectb
+.section secua
+.section secub
+.section secva
+.section secvb
+.section secwa
+.section secwb
+.section secxa
+.section secxb
+.section secya
+.section secyb
+.section secza
+.section seczb
+.section sec1a
+.section sec1b
+.section sec2a
+.section sec2b
+.section sec3a
+.section sec3b
+.section sec4a
+.section sec4b
+.section sec5a
+.section sec5b
+.section sec6a
+.section sec6b
+.section sec7a
+.section sec7b
+.section sec8a
+.section sec8b
+.section sec9a
+.section sec9b
+.section sec0a
+.section sec0b
+.section sedaa
+.section sedab
+.section sedba
+.section sedbb
+.section sedca
+.section sedcb
+.section sedda
+.section seddb
+.section sedea
+.section sedeb
+.section sedfa
+.section sedfb
+.section sedga
+.section sedgb
+.section sedha
+.section sedhb
+.section sedia
+.section sedib
+.section sedja
+.section sedjb
+.section sedka
+.section sedkb
+.section sedla
+.section sedlb
+.section sedma
+.section sedmb
+.section sedna
+.section sednb
+.section sedoa
+.section sedob
+.section sedpa
+.section sedpb
+.section sedqa
+.section sedqb
+.section sedra
+.section sedrb
+.section sedsa
+.section sedsb
+.section sedta
+.section sedtb
+.section sedua
+.section sedub
+.section sedva
+.section sedvb
+.section sedwa
+.section sedwb
+.section sedxa
+.section sedxb
+.section sedya
+.section sedyb
+.section sedza
+.section sedzb
+.section sed1a
+.section sed1b
+.section sed2a
+.section sed2b
+.section sed3a
+.section sed3b
+.section sed4a
+.section sed4b
+.section sed5a
+.section sed5b
+.section sed6a
+.section sed6b
+.section sed7a
+.section sed7b
+.section sed8a
+.section sed8b
+.section sed9a
+.section sed9b
+.section sed0a
+.section sed0b
+.section seeaa
+.section seeab
+.section seeba
+.section seebb
+.section seeca
+.section seecb
+.section seeda
+.section seedb
+.section seeea
+.section seeeb
+.section seefa
+.section seefb
+.section seega
+.section seegb
+.section seeha
+.section seehb
+.section seeia
+.section seeib
+.section seeja
+.section seejb
+.section seeka
+.section seekb
+.section seela
+.section seelb
+.section seema
+.section seemb
+.section seena
+.section seenb
+.section seeoa
+.section seeob
+.section seepa
+.section seepb
+.section seeqa
+.section seeqb
+.section seera
+.section seerb
+.section seesa
+.section seesb
+.section seeta
+.section seetb
+.section seeua
+.section seeub
+.section seeva
+.section seevb
+.section seewa
+.section seewb
+.section seexa
+.section seexb
+.section seeya
+.section seeyb
+.section seeza
+.section seezb
+.section see1a
+.section see1b
+.section see2a
+.section see2b
+.section see3a
+.section see3b
+.section see4a
+.section see4b
+.section see5a
+.section see5b
+.section see6a
+.section see6b
+.section see7a
+.section see7b
+.section see8a
+.section see8b
+.section see9a
+.section see9b
+.section see0a
+.section see0b
+.section sefaa
+.section sefab
+.section sefba
+.section sefbb
+.section sefca
+.section sefcb
+.section sefda
+.section sefdb
+.section sefea
+.section sefeb
+.section seffa
+.section seffb
+.section sefga
+.section sefgb
+.section sefha
+.section sefhb
+.section sefia
+.section sefib
+.section sefja
+.section sefjb
+.section sefka
+.section sefkb
+.section sefla
+.section seflb
+.section sefma
+.section sefmb
+.section sefna
+.section sefnb
+.section sefoa
+.section sefob
+.section sefpa
+.section sefpb
+.section sefqa
+.section sefqb
+.section sefra
+.section sefrb
+.section sefsa
+.section sefsb
+.section sefta
+.section seftb
+.section sefua
+.section sefub
+.section sefva
+.section sefvb
+.section sefwa
+.section sefwb
+.section sefxa
+.section sefxb
+.section sefya
+.section sefyb
+.section sefza
+.section sefzb
+.section sef1a
+.section sef1b
+.section sef2a
+.section sef2b
+.section sef3a
+.section sef3b
+.section sef4a
+.section sef4b
+.section sef5a
+.section sef5b
+.section sef6a
+.section sef6b
+.section sef7a
+.section sef7b
+.section sef8a
+.section sef8b
+.section sef9a
+.section sef9b
+.section sef0a
+.section sef0b
+.section segaa
+.section segab
+.section segba
+.section segbb
+.section segca
+.section segcb
+.section segda
+.section segdb
+.section segea
+.section segeb
+.section segfa
+.section segfb
+.section segga
+.section seggb
+.section segha
+.section seghb
+.section segia
+.section segib
+.section segja
+.section segjb
+.section segka
+.section segkb
+.section segla
+.section seglb
+.section segma
+.section segmb
+.section segna
+.section segnb
+.section segoa
+.section segob
+.section segpa
+.section segpb
+.section segqa
+.section segqb
+.section segra
+.section segrb
+.section segsa
+.section segsb
+.section segta
+.section segtb
+.section segua
+.section segub
+.section segva
+.section segvb
+.section segwa
+.section segwb
+.section segxa
+.section segxb
+.section segya
+.section segyb
+.section segza
+.section segzb
+.section seg1a
+.section seg1b
+.section seg2a
+.section seg2b
+.section seg3a
+.section seg3b
+.section seg4a
+.section seg4b
+.section seg5a
+.section seg5b
+.section seg6a
+.section seg6b
+.section seg7a
+.section seg7b
+.section seg8a
+.section seg8b
+.section seg9a
+.section seg9b
+.section seg0a
+.section seg0b
+.section sehaa
+.section sehab
+.section sehba
+.section sehbb
+.section sehca
+.section sehcb
+.section sehda
+.section sehdb
+.section sehea
+.section seheb
+.section sehfa
+.section sehfb
+.section sehga
+.section sehgb
+.section sehha
+.section sehhb
+.section sehia
+.section sehib
+.section sehja
+.section sehjb
+.section sehka
+.section sehkb
+.section sehla
+.section sehlb
+.section sehma
+.section sehmb
+.section sehna
+.section sehnb
+.section sehoa
+.section sehob
+.section sehpa
+.section sehpb
+.section sehqa
+.section sehqb
+.section sehra
+.section sehrb
+.section sehsa
+.section sehsb
+.section sehta
+.section sehtb
+.section sehua
+.section sehub
+.section sehva
+.section sehvb
+.section sehwa
+.section sehwb
+.section sehxa
+.section sehxb
+.section sehya
+.section sehyb
+.section sehza
+.section sehzb
+.section seh1a
+.section seh1b
+.section seh2a
+.section seh2b
+.section seh3a
+.section seh3b
+.section seh4a
+.section seh4b
+.section seh5a
+.section seh5b
+.section seh6a
+.section seh6b
+.section seh7a
+.section seh7b
+.section seh8a
+.section seh8b
+.section seh9a
+.section seh9b
+.section seh0a
+.section seh0b
+.section seiaa
+.section seiab
+.section seiba
+.section seibb
+.section seica
+.section seicb
+.section seida
+.section seidb
+.section seiea
+.section seieb
+.section seifa
+.section seifb
+.section seiga
+.section seigb
+.section seiha
+.section seihb
+.section seiia
+.section seiib
+.section seija
+.section seijb
+.section seika
+.section seikb
+.section seila
+.section seilb
+.section seima
+.section seimb
+.section seina
+.section seinb
+.section seioa
+.section seiob
+.section seipa
+.section seipb
+.section seiqa
+.section seiqb
+.section seira
+.section seirb
+.section seisa
+.section seisb
+.section seita
+.section seitb
+.section seiua
+.section seiub
+.section seiva
+.section seivb
+.section seiwa
+.section seiwb
+.section seixa
+.section seixb
+.section seiya
+.section seiyb
+.section seiza
+.section seizb
+.section sei1a
+.section sei1b
+.section sei2a
+.section sei2b
+.section sei3a
+.section sei3b
+.section sei4a
+.section sei4b
+.section sei5a
+.section sei5b
+.section sei6a
+.section sei6b
+.section sei7a
+.section sei7b
+.section sei8a
+.section sei8b
+.section sei9a
+.section sei9b
+.section sei0a
+.section sei0b
+.section sejaa
+.section sejab
+.section sejba
+.section sejbb
+.section sejca
+.section sejcb
+.section sejda
+.section sejdb
+.section sejea
+.section sejeb
+.section sejfa
+.section sejfb
+.section sejga
+.section sejgb
+.section sejha
+.section sejhb
+.section sejia
+.section sejib
+.section sejja
+.section sejjb
+.section sejka
+.section sejkb
+.section sejla
+.section sejlb
+.section sejma
+.section sejmb
+.section sejna
+.section sejnb
+.section sejoa
+.section sejob
+.section sejpa
+.section sejpb
+.section sejqa
+.section sejqb
+.section sejra
+.section sejrb
+.section sejsa
+.section sejsb
+.section sejta
+.section sejtb
+.section sejua
+.section sejub
+.section sejva
+.section sejvb
+.section sejwa
+.section sejwb
+.section sejxa
+.section sejxb
+.section sejya
+.section sejyb
+.section sejza
+.section sejzb
+.section sej1a
+.section sej1b
+.section sej2a
+.section sej2b
+.section sej3a
+.section sej3b
+.section sej4a
+.section sej4b
+.section sej5a
+.section sej5b
+.section sej6a
+.section sej6b
+.section sej7a
+.section sej7b
+.section sej8a
+.section sej8b
+.section sej9a
+.section sej9b
+.section sej0a
+.section sej0b
+.section sekaa
+.section sekab
+.section sekba
+.section sekbb
+.section sekca
+.section sekcb
+.section sekda
+.section sekdb
+.section sekea
+.section sekeb
+.section sekfa
+.section sekfb
+.section sekga
+.section sekgb
+.section sekha
+.section sekhb
+.section sekia
+.section sekib
+.section sekja
+.section sekjb
+.section sekka
+.section sekkb
+.section sekla
+.section seklb
+.section sekma
+.section sekmb
+.section sekna
+.section seknb
+.section sekoa
+.section sekob
+.section sekpa
+.section sekpb
+.section sekqa
+.section sekqb
+.section sekra
+.section sekrb
+.section seksa
+.section seksb
+.section sekta
+.section sektb
+.section sekua
+.section sekub
+.section sekva
+.section sekvb
+.section sekwa
+.section sekwb
+.section sekxa
+.section sekxb
+.section sekya
+.section sekyb
+.section sekza
+.section sekzb
+.section sek1a
+.section sek1b
+.section sek2a
+.section sek2b
+.section sek3a
+.section sek3b
+.section sek4a
+.section sek4b
+.section sek5a
+.section sek5b
+.section sek6a
+.section sek6b
+.section sek7a
+.section sek7b
+.section sek8a
+.section sek8b
+.section sek9a
+.section sek9b
+.section sek0a
+.section sek0b
+.section selaa
+.section selab
+.section selba
+.section selbb
+.section selca
+.section selcb
+.section selda
+.section seldb
+.section selea
+.section seleb
+.section selfa
+.section selfb
+.section selga
+.section selgb
+.section selha
+.section selhb
+.section selia
+.section selib
+.section selja
+.section seljb
+.section selka
+.section selkb
+.section sella
+.section sellb
+.section selma
+.section selmb
+.section selna
+.section selnb
+.section seloa
+.section selob
+.section selpa
+.section selpb
+.section selqa
+.section selqb
+.section selra
+.section selrb
+.section selsa
+.section selsb
+.section selta
+.section seltb
+.section selua
+.section selub
+.section selva
+.section selvb
+.section selwa
+.section selwb
+.section selxa
+.section selxb
+.section selya
+.section selyb
+.section selza
+.section selzb
+.section sel1a
+.section sel1b
+.section sel2a
+.section sel2b
+.section sel3a
+.section sel3b
+.section sel4a
+.section sel4b
+.section sel5a
+.section sel5b
+.section sel6a
+.section sel6b
+.section sel7a
+.section sel7b
+.section sel8a
+.section sel8b
+.section sel9a
+.section sel9b
+.section sel0a
+.section sel0b
+.section semaa
+.section semab
+.section semba
+.section sembb
+.section semca
+.section semcb
+.section semda
+.section semdb
+.section semea
+.section semeb
+.section semfa
+.section semfb
+.section semga
+.section semgb
+.section semha
+.section semhb
+.section semia
+.section semib
+.section semja
+.section semjb
+.section semka
+.section semkb
+.section semla
+.section semlb
+.section semma
+.section semmb
+.section semna
+.section semnb
+.section semoa
+.section semob
+.section sempa
+.section sempb
+.section semqa
+.section semqb
+.section semra
+.section semrb
+.section semsa
+.section semsb
+.section semta
+.section semtb
+.section semua
+.section semub
+.section semva
+.section semvb
+.section semwa
+.section semwb
+.section semxa
+.section semxb
+.section semya
+.section semyb
+.section semza
+.section semzb
+.section sem1a
+.section sem1b
+.section sem2a
+.section sem2b
+.section sem3a
+.section sem3b
+.section sem4a
+.section sem4b
+.section sem5a
+.section sem5b
+.section sem6a
+.section sem6b
+.section sem7a
+.section sem7b
+.section sem8a
+.section sem8b
+.section sem9a
+.section sem9b
+.section sem0a
+.section sem0b
+.section senaa
+.section senab
+.section senba
+.section senbb
+.section senca
+.section sencb
+.section senda
+.section sendb
+.section senea
+.section seneb
+.section senfa
+.section senfb
+.section senga
+.section sengb
+.section senha
+.section senhb
+.section senia
+.section senib
+.section senja
+.section senjb
+.section senka
+.section senkb
+.section senla
+.section senlb
+.section senma
+.section senmb
+.section senna
+.section sennb
+.section senoa
+.section senob
+.section senpa
+.section senpb
+.section senqa
+.section senqb
+.section senra
+.section senrb
+.section sensa
+.section sensb
+.section senta
+.section sentb
+.section senua
+.section senub
+.section senva
+.section senvb
+.section senwa
+.section senwb
+.section senxa
+.section senxb
+.section senya
+.section senyb
+.section senza
+.section senzb
+.section sen1a
+.section sen1b
+.section sen2a
+.section sen2b
+.section sen3a
+.section sen3b
+.section sen4a
+.section sen4b
+.section sen5a
+.section sen5b
+.section sen6a
+.section sen6b
+.section sen7a
+.section sen7b
+.section sen8a
+.section sen8b
+.section sen9a
+.section sen9b
+.section sen0a
+.section sen0b
+.section seoaa
+.section seoab
+.section seoba
+.section seobb
+.section seoca
+.section seocb
+.section seoda
+.section seodb
+.section seoea
+.section seoeb
+.section seofa
+.section seofb
+.section seoga
+.section seogb
+.section seoha
+.section seohb
+.section seoia
+.section seoib
+.section seoja
+.section seojb
+.section seoka
+.section seokb
+.section seola
+.section seolb
+.section seoma
+.section seomb
+.section seona
+.section seonb
+.section seooa
+.section seoob
+.section seopa
+.section seopb
+.section seoqa
+.section seoqb
+.section seora
+.section seorb
+.section seosa
+.section seosb
+.section seota
+.section seotb
+.section seoua
+.section seoub
+.section seova
+.section seovb
+.section seowa
+.section seowb
+.section seoxa
+.section seoxb
+.section seoya
+.section seoyb
+.section seoza
+.section seozb
+.section seo1a
+.section seo1b
+.section seo2a
+.section seo2b
+.section seo3a
+.section seo3b
+.section seo4a
+.section seo4b
+.section seo5a
+.section seo5b
+.section seo6a
+.section seo6b
+.section seo7a
+.section seo7b
+.section seo8a
+.section seo8b
+.section seo9a
+.section seo9b
+.section seo0a
+.section seo0b
+.section sepaa
+.section sepab
+.section sepba
+.section sepbb
+.section sepca
+.section sepcb
+.section sepda
+.section sepdb
+.section sepea
+.section sepeb
+.section sepfa
+.section sepfb
+.section sepga
+.section sepgb
+.section sepha
+.section sephb
+.section sepia
+.section sepib
+.section sepja
+.section sepjb
+.section sepka
+.section sepkb
+.section sepla
+.section seplb
+.section sepma
+.section sepmb
+.section sepna
+.section sepnb
+.section sepoa
+.section sepob
+.section seppa
+.section seppb
+.section sepqa
+.section sepqb
+.section sepra
+.section seprb
+.section sepsa
+.section sepsb
+.section septa
+.section septb
+.section sepua
+.section sepub
+.section sepva
+.section sepvb
+.section sepwa
+.section sepwb
+.section sepxa
+.section sepxb
+.section sepya
+.section sepyb
+.section sepza
+.section sepzb
+.section sep1a
+.section sep1b
+.section sep2a
+.section sep2b
+.section sep3a
+.section sep3b
+.section sep4a
+.section sep4b
+.section sep5a
+.section sep5b
+.section sep6a
+.section sep6b
+.section sep7a
+.section sep7b
+.section sep8a
+.section sep8b
+.section sep9a
+.section sep9b
+.section sep0a
+.section sep0b
+.section seqaa
+.section seqab
+.section seqba
+.section seqbb
+.section seqca
+.section seqcb
+.section seqda
+.section seqdb
+.section seqea
+.section seqeb
+.section seqfa
+.section seqfb
+.section seqga
+.section seqgb
+.section seqha
+.section seqhb
+.section seqia
+.section seqib
+.section seqja
+.section seqjb
+.section seqka
+.section seqkb
+.section seqla
+.section seqlb
+.section seqma
+.section seqmb
+.section seqna
+.section seqnb
+.section seqoa
+.section seqob
+.section seqpa
+.section seqpb
+.section seqqa
+.section seqqb
+.section seqra
+.section seqrb
+.section seqsa
+.section seqsb
+.section seqta
+.section seqtb
+.section sequa
+.section sequb
+.section seqva
+.section seqvb
+.section seqwa
+.section seqwb
+.section seqxa
+.section seqxb
+.section seqya
+.section seqyb
+.section seqza
+.section seqzb
+.section seq1a
+.section seq1b
+.section seq2a
+.section seq2b
+.section seq3a
+.section seq3b
+.section seq4a
+.section seq4b
+.section seq5a
+.section seq5b
+.section seq6a
+.section seq6b
+.section seq7a
+.section seq7b
+.section seq8a
+.section seq8b
+.section seq9a
+.section seq9b
+.section seq0a
+.section seq0b
+.section seraa
+.section serab
+.section serba
+.section serbb
+.section serca
+.section sercb
+.section serda
+.section serdb
+.section serea
+.section sereb
+.section serfa
+.section serfb
+.section serga
+.section sergb
+.section serha
+.section serhb
+.section seria
+.section serib
+.section serja
+.section serjb
+.section serka
+.section serkb
+.section serla
+.section serlb
+.section serma
+.section sermb
+.section serna
+.section sernb
+.section seroa
+.section serob
+.section serpa
+.section serpb
+.section serqa
+.section serqb
+.section serra
+.section serrb
+.section sersa
+.section sersb
+.section serta
+.section sertb
+.section serua
+.section serub
+.section serva
+.section servb
+.section serwa
+.section serwb
+.section serxa
+.section serxb
+.section serya
+.section seryb
+.section serza
+.section serzb
+.section ser1a
+.section ser1b
+.section ser2a
+.section ser2b
+.section ser3a
+.section ser3b
+.section ser4a
+.section ser4b
+.section ser5a
+.section ser5b
+.section ser6a
+.section ser6b
+.section ser7a
+.section ser7b
+.section ser8a
+.section ser8b
+.section ser9a
+.section ser9b
+.section ser0a
+.section ser0b
+.section sesaa
+.section sesab
+.section sesba
+.section sesbb
+.section sesca
+.section sescb
+.section sesda
+.section sesdb
+.section sesea
+.section seseb
+.section sesfa
+.section sesfb
+.section sesga
+.section sesgb
+.section sesha
+.section seshb
+.section sesia
+.section sesib
+.section sesja
+.section sesjb
+.section seska
+.section seskb
+.section sesla
+.section seslb
+.section sesma
+.section sesmb
+.section sesna
+.section sesnb
+.section sesoa
+.section sesob
+.section sespa
+.section sespb
+.section sesqa
+.section sesqb
+.section sesra
+.section sesrb
+.section sessa
+.section sessb
+.section sesta
+.section sestb
+.section sesua
+.section sesub
+.section sesva
+.section sesvb
+.section seswa
+.section seswb
+.section sesxa
+.section sesxb
+.section sesya
+.section sesyb
+.section sesza
+.section seszb
+.section ses1a
+.section ses1b
+.section ses2a
+.section ses2b
+.section ses3a
+.section ses3b
+.section ses4a
+.section ses4b
+.section ses5a
+.section ses5b
+.section ses6a
+.section ses6b
+.section ses7a
+.section ses7b
+.section ses8a
+.section ses8b
+.section ses9a
+.section ses9b
+.section ses0a
+.section ses0b
+.section setaa
+.section setab
+.section setba
+.section setbb
+.section setca
+.section setcb
+.section setda
+.section setdb
+.section setea
+.section seteb
+.section setfa
+.section setfb
+.section setga
+.section setgb
+.section setha
+.section sethb
+.section setia
+.section setib
+.section setja
+.section setjb
+.section setka
+.section setkb
+.section setla
+.section setlb
+.section setma
+.section setmb
+.section setna
+.section setnb
+.section setoa
+.section setob
+.section setpa
+.section setpb
+.section setqa
+.section setqb
+.section setra
+.section setrb
+.section setsa
+.section setsb
+.section setta
+.section settb
+.section setua
+.section setub
+.section setva
+.section setvb
+.section setwa
+.section setwb
+.section setxa
+.section setxb
+.section setya
+.section setyb
+.section setza
+.section setzb
+.section set1a
+.section set1b
+.section set2a
+.section set2b
+.section set3a
+.section set3b
+.section set4a
+.section set4b
+.section set5a
+.section set5b
+.section set6a
+.section set6b
+.section set7a
+.section set7b
+.section set8a
+.section set8b
+.section set9a
+.section set9b
+.section set0a
+.section set0b
+.section seuaa
+.section seuab
+.section seuba
+.section seubb
+.section seuca
+.section seucb
+.section seuda
+.section seudb
+.section seuea
+.section seueb
+.section seufa
+.section seufb
+.section seuga
+.section seugb
+.section seuha
+.section seuhb
+.section seuia
+.section seuib
+.section seuja
+.section seujb
+.section seuka
+.section seukb
+.section seula
+.section seulb
+.section seuma
+.section seumb
+.section seuna
+.section seunb
+.section seuoa
+.section seuob
+.section seupa
+.section seupb
+.section seuqa
+.section seuqb
+.section seura
+.section seurb
+.section seusa
+.section seusb
+.section seuta
+.section seutb
+.section seuua
+.section seuub
+.section seuva
+.section seuvb
+.section seuwa
+.section seuwb
+.section seuxa
+.section seuxb
+.section seuya
+.section seuyb
+.section seuza
+.section seuzb
+.section seu1a
+.section seu1b
+.section seu2a
+.section seu2b
+.section seu3a
+.section seu3b
+.section seu4a
+.section seu4b
+.section seu5a
+.section seu5b
+.section seu6a
+.section seu6b
+.section seu7a
+.section seu7b
+.section seu8a
+.section seu8b
+.section seu9a
+.section seu9b
+.section seu0a
+.section seu0b
+.section sevaa
+.section sevab
+.section sevba
+.section sevbb
+.section sevca
+.section sevcb
+.section sevda
+.section sevdb
+.section sevea
+.section seveb
+.section sevfa
+.section sevfb
+.section sevga
+.section sevgb
+.section sevha
+.section sevhb
+.section sevia
+.section sevib
+.section sevja
+.section sevjb
+.section sevka
+.section sevkb
+.section sevla
+.section sevlb
+.section sevma
+.section sevmb
+.section sevna
+.section sevnb
+.section sevoa
+.section sevob
+.section sevpa
+.section sevpb
+.section sevqa
+.section sevqb
+.section sevra
+.section sevrb
+.section sevsa
+.section sevsb
+.section sevta
+.section sevtb
+.section sevua
+.section sevub
+.section sevva
+.section sevvb
+.section sevwa
+.section sevwb
+.section sevxa
+.section sevxb
+.section sevya
+.section sevyb
+.section sevza
+.section sevzb
+.section sev1a
+.section sev1b
+.section sev2a
+.section sev2b
+.section sev3a
+.section sev3b
+.section sev4a
+.section sev4b
+.section sev5a
+.section sev5b
+.section sev6a
+.section sev6b
+.section sev7a
+.section sev7b
+.section sev8a
+.section sev8b
+.section sev9a
+.section sev9b
+.section sev0a
+.section sev0b
+.section sewaa
+.section sewab
+.section sewba
+.section sewbb
+.section sewca
+.section sewcb
+.section sewda
+.section sewdb
+.section sewea
+.section seweb
+.section sewfa
+.section sewfb
+.section sewga
+.section sewgb
+.section sewha
+.section sewhb
+.section sewia
+.section sewib
+.section sewja
+.section sewjb
+.section sewka
+.section sewkb
+.section sewla
+.section sewlb
+.section sewma
+.section sewmb
+.section sewna
+.section sewnb
+.section sewoa
+.section sewob
+.section sewpa
+.section sewpb
+.section sewqa
+.section sewqb
+.section sewra
+.section sewrb
+.section sewsa
+.section sewsb
+.section sewta
+.section sewtb
+.section sewua
+.section sewub
+.section sewva
+.section sewvb
+.section sewwa
+.section sewwb
+.section sewxa
+.section sewxb
+.section sewya
+.section sewyb
+.section sewza
+.section sewzb
+.section sew1a
+.section sew1b
+.section sew2a
+.section sew2b
+.section sew3a
+.section sew3b
+.section sew4a
+.section sew4b
+.section sew5a
+.section sew5b
+.section sew6a
+.section sew6b
+.section sew7a
+.section sew7b
+.section sew8a
+.section sew8b
+.section sew9a
+.section sew9b
+.section sew0a
+.section sew0b
+.section sexaa
+.section sexab
+.section sexba
+.section sexbb
+.section sexca
+.section sexcb
+.section sexda
+.section sexdb
+.section sexea
+.section sexeb
+.section sexfa
+.section sexfb
+.section sexga
+.section sexgb
+.section sexha
+.section sexhb
+.section sexia
+.section sexib
+.section sexja
+.section sexjb
+.section sexka
+.section sexkb
+.section sexla
+.section sexlb
+.section sexma
+.section sexmb
+.section sexna
+.section sexnb
+.section sexoa
+.section sexob
+.section sexpa
+.section sexpb
+.section sexqa
+.section sexqb
+.section sexra
+.section sexrb
+.section sexsa
+.section sexsb
+.section sexta
+.section sextb
+.section sexua
+.section sexub
+.section sexva
+.section sexvb
+.section sexwa
+.section sexwb
+.section sexxa
+.section sexxb
+.section sexya
+.section sexyb
+.section sexza
+.section sexzb
+.section sex1a
+.section sex1b
+.section sex2a
+.section sex2b
+.section sex3a
+.section sex3b
+.section sex4a
+.section sex4b
+.section sex5a
+.section sex5b
+.section sex6a
+.section sex6b
+.section sex7a
+.section sex7b
+.section sex8a
+.section sex8b
+.section sex9a
+.section sex9b
+.section sex0a
+.section sex0b
+.section seyaa
+.section seyab
+.section seyba
+.section seybb
+.section seyca
+.section seycb
+.section seyda
+.section seydb
+.section seyea
+.section seyeb
+.section seyfa
+.section seyfb
+.section seyga
+.section seygb
+.section seyha
+.section seyhb
+.section seyia
+.section seyib
+.section seyja
+.section seyjb
+.section seyka
+.section seykb
+.section seyla
+.section seylb
+.section seyma
+.section seymb
+.section seyna
+.section seynb
+.section seyoa
+.section seyob
+.section seypa
+.section seypb
+.section seyqa
+.section seyqb
+.section seyra
+.section seyrb
+.section seysa
+.section seysb
+.section seyta
+.section seytb
+.section seyua
+.section seyub
+.section seyva
+.section seyvb
+.section seywa
+.section seywb
+.section seyxa
+.section seyxb
+.section seyya
+.section seyyb
+.section seyza
+.section seyzb
+.section sey1a
+.section sey1b
+.section sey2a
+.section sey2b
+.section sey3a
+.section sey3b
+.section sey4a
+.section sey4b
+.section sey5a
+.section sey5b
+.section sey6a
+.section sey6b
+.section sey7a
+.section sey7b
+.section sey8a
+.section sey8b
+.section sey9a
+.section sey9b
+.section sey0a
+.section sey0b
+.section sezaa
+.section sezab
+.section sezba
+.section sezbb
+.section sezca
+.section sezcb
+.section sezda
+.section sezdb
+.section sezea
+.section sezeb
+.section sezfa
+.section sezfb
+.section sezga
+.section sezgb
+.section sezha
+.section sezhb
+.section sezia
+.section sezib
+.section sezja
+.section sezjb
+.section sezka
+.section sezkb
+.section sezla
+.section sezlb
+.section sezma
+.section sezmb
+.section sezna
+.section seznb
+.section sezoa
+.section sezob
+.section sezpa
+.section sezpb
+.section sezqa
+.section sezqb
+.section sezra
+.section sezrb
+.section sezsa
+.section sezsb
+.section sezta
+.section seztb
+.section sezua
+.section sezub
+.section sezva
+.section sezvb
+.section sezwa
+.section sezwb
+.section sezxa
+.section sezxb
+.section sezya
+.section sezyb
+.section sezza
+.section sezzb
+.section sez1a
+.section sez1b
+.section sez2a
+.section sez2b
+.section sez3a
+.section sez3b
+.section sez4a
+.section sez4b
+.section sez5a
+.section sez5b
+.section sez6a
+.section sez6b
+.section sez7a
+.section sez7b
+.section sez8a
+.section sez8b
+.section sez9a
+.section sez9b
+.section sez0a
+.section sez0b
+.section se1aa
+.section se1ab
+.section se1ba
+.section se1bb
+.section se1ca
+.section se1cb
+.section se1da
+.section se1db
+.section se1ea
+.section se1eb
+.section se1fa
+.section se1fb
+.section se1ga
+.section se1gb
+.section se1ha
+.section se1hb
+.section se1ia
+.section se1ib
+.section se1ja
+.section se1jb
+.section se1ka
+.section se1kb
+.section se1la
+.section se1lb
+.section se1ma
+.section se1mb
+.section se1na
+.section se1nb
+.section se1oa
+.section se1ob
+.section se1pa
+.section se1pb
+.section se1qa
+.section se1qb
+.section se1ra
+.section se1rb
+.section se1sa
+.section se1sb
+.section se1ta
+.section se1tb
+.section se1ua
+.section se1ub
+.section se1va
+.section se1vb
+.section se1wa
+.section se1wb
+.section se1xa
+.section se1xb
+.section se1ya
+.section se1yb
+.section se1za
+.section se1zb
+.section se11a
+.section se11b
+.section se12a
+.section se12b
+.section se13a
+.section se13b
+.section se14a
+.section se14b
+.section se15a
+.section se15b
+.section se16a
+.section se16b
+.section se17a
+.section se17b
+.section se18a
+.section se18b
+.section se19a
+.section se19b
+.section se10a
+.section se10b
+.section se2aa
+.section se2ab
+.section se2ba
+.section se2bb
+.section se2ca
+.section se2cb
+.section se2da
+.section se2db
+.section se2ea
+.section se2eb
+.section se2fa
+.section se2fb
+.section se2ga
+.section se2gb
+.section se2ha
+.section se2hb
+.section se2ia
+.section se2ib
+.section se2ja
+.section se2jb
+.section se2ka
+.section se2kb
+.section se2la
+.section se2lb
+.section se2ma
+.section se2mb
+.section se2na
+.section se2nb
+.section se2oa
+.section se2ob
+.section se2pa
+.section se2pb
+.section se2qa
+.section se2qb
+.section se2ra
+.section se2rb
+.section se2sa
+.section se2sb
+.section se2ta
+.section se2tb
+.section se2ua
+.section se2ub
+.section se2va
+.section se2vb
+.section se2wa
+.section se2wb
+.section se2xa
+.section se2xb
+.section se2ya
+.section se2yb
+.section se2za
+.section se2zb
+.section se21a
+.section se21b
+.section se22a
+.section se22b
+.section se23a
+.section se23b
+.section se24a
+.section se24b
+.section se25a
+.section se25b
+.section se26a
+.section se26b
+.section se27a
+.section se27b
+.section se28a
+.section se28b
+.section se29a
+.section se29b
+.section se20a
+.section se20b
+.section se3aa
+.section se3ab
+.section se3ba
+.section se3bb
+.section se3ca
+.section se3cb
+.section se3da
+.section se3db
+.section se3ea
+.section se3eb
+.section se3fa
+.section se3fb
+.section se3ga
+.section se3gb
+.section se3ha
+.section se3hb
+.section se3ia
+.section se3ib
+.section se3ja
+.section se3jb
+.section se3ka
+.section se3kb
+.section se3la
+.section se3lb
+.section se3ma
+.section se3mb
+.section se3na
+.section se3nb
+.section se3oa
+.section se3ob
+.section se3pa
+.section se3pb
+.section se3qa
+.section se3qb
+.section se3ra
+.section se3rb
+.section se3sa
+.section se3sb
+.section se3ta
+.section se3tb
+.section se3ua
+.section se3ub
+.section se3va
+.section se3vb
+.section se3wa
+.section se3wb
+.section se3xa
+.section se3xb
+.section se3ya
+.section se3yb
+.section se3za
+.section se3zb
+.section se31a
+.section se31b
+.section se32a
+.section se32b
+.section se33a
+.section se33b
+.section se34a
+.section se34b
+.section se35a
+.section se35b
+.section se36a
+.section se36b
+.section se37a
+.section se37b
+.section se38a
+.section se38b
+.section se39a
+.section se39b
+.section se30a
+.section se30b
+.section se4aa
+.section se4ab
+.section se4ba
+.section se4bb
+.section se4ca
+.section se4cb
+.section se4da
+.section se4db
+.section se4ea
+.section se4eb
+.section se4fa
+.section se4fb
+.section se4ga
+.section se4gb
+.section se4ha
+.section se4hb
+.section se4ia
+.section se4ib
+.section se4ja
+.section se4jb
+.section se4ka
+.section se4kb
+.section se4la
+.section se4lb
+.section se4ma
+.section se4mb
+.section se4na
+.section se4nb
+.section se4oa
+.section se4ob
+.section se4pa
+.section se4pb
+.section se4qa
+.section se4qb
+.section se4ra
+.section se4rb
+.section se4sa
+.section se4sb
+.section se4ta
+.section se4tb
+.section se4ua
+.section se4ub
+.section se4va
+.section se4vb
+.section se4wa
+.section se4wb
+.section se4xa
+.section se4xb
+.section se4ya
+.section se4yb
+.section se4za
+.section se4zb
+.section se41a
+.section se41b
+.section se42a
+.section se42b
+.section se43a
+.section se43b
+.section se44a
+.section se44b
+.section se45a
+.section se45b
+.section se46a
+.section se46b
+.section se47a
+.section se47b
+.section se48a
+.section se48b
+.section se49a
+.section se49b
+.section se40a
+.section se40b
+.section se5aa
+.section se5ab
+.section se5ba
+.section se5bb
+.section se5ca
+.section se5cb
+.section se5da
+.section se5db
+.section se5ea
+.section se5eb
+.section se5fa
+.section se5fb
+.section se5ga
+.section se5gb
+.section se5ha
+.section se5hb
+.section se5ia
+.section se5ib
+.section se5ja
+.section se5jb
+.section se5ka
+.section se5kb
+.section se5la
+.section se5lb
+.section se5ma
+.section se5mb
+.section se5na
+.section se5nb
+.section se5oa
+.section se5ob
+.section se5pa
+.section se5pb
+.section se5qa
+.section se5qb
+.section se5ra
+.section se5rb
+.section se5sa
+.section se5sb
+.section se5ta
+.section se5tb
+.section se5ua
+.section se5ub
+.section se5va
+.section se5vb
+.section se5wa
+.section se5wb
+.section se5xa
+.section se5xb
+.section se5ya
+.section se5yb
+.section se5za
+.section se5zb
+.section se51a
+.section se51b
+.section se52a
+.section se52b
+.section se53a
+.section se53b
+.section se54a
+.section se54b
+.section se55a
+.section se55b
+.section se56a
+.section se56b
+.section se57a
+.section se57b
+.section se58a
+.section se58b
+.section se59a
+.section se59b
+.section se50a
+.section se50b
+.section se6aa
+.section se6ab
+.section se6ba
+.section se6bb
+.section se6ca
+.section se6cb
+.section se6da
+.section se6db
+.section se6ea
+.section se6eb
+.section se6fa
+.section se6fb
+.section se6ga
+.section se6gb
+.section se6ha
+.section se6hb
+.section se6ia
+.section se6ib
+.section se6ja
+.section se6jb
+.section se6ka
+.section se6kb
+.section se6la
+.section se6lb
+.section se6ma
+.section se6mb
+.section se6na
+.section se6nb
+.section se6oa
+.section se6ob
+.section se6pa
+.section se6pb
+.section se6qa
+.section se6qb
+.section se6ra
+.section se6rb
+.section se6sa
+.section se6sb
+.section se6ta
+.section se6tb
+.section se6ua
+.section se6ub
+.section se6va
+.section se6vb
+.section se6wa
+.section se6wb
+.section se6xa
+.section se6xb
+.section se6ya
+.section se6yb
+.section se6za
+.section se6zb
+.section se61a
+.section se61b
+.section se62a
+.section se62b
+.section se63a
+.section se63b
+.section se64a
+.section se64b
+.section se65a
+.section se65b
+.section se66a
+.section se66b
+.section se67a
+.section se67b
+.section se68a
+.section se68b
+.section se69a
+.section se69b
+.section se60a
+.section se60b
+.section se7aa
+.section se7ab
+.section se7ba
+.section se7bb
+.section se7ca
+.section se7cb
+.section se7da
+.section se7db
+.section se7ea
+.section se7eb
+.section se7fa
+.section se7fb
+.section se7ga
+.section se7gb
+.section se7ha
+.section se7hb
+.section se7ia
+.section se7ib
+.section se7ja
+.section se7jb
+.section se7ka
+.section se7kb
+.section se7la
+.section se7lb
+.section se7ma
+.section se7mb
+.section se7na
+.section se7nb
+.section se7oa
+.section se7ob
+.section se7pa
+.section se7pb
+.section se7qa
+.section se7qb
+.section se7ra
+.section se7rb
+.section se7sa
+.section se7sb
+.section se7ta
+.section se7tb
+.section se7ua
+.section se7ub
+.section se7va
+.section se7vb
+.section se7wa
+.section se7wb
+.section se7xa
+.section se7xb
+.section se7ya
+.section se7yb
+.section se7za
+.section se7zb
+.section se71a
+.section se71b
+.section se72a
+.section se72b
+.section se73a
+.section se73b
+.section se74a
+.section se74b
+.section se75a
+.section se75b
+.section se76a
+.section se76b
+.section se77a
+.section se77b
+.section se78a
+.section se78b
+.section se79a
+.section se79b
+.section se70a
+.section se70b
+.section se8aa
+.section se8ab
+.section se8ba
+.section se8bb
+.section se8ca
+.section se8cb
+.section se8da
+.section se8db
+.section se8ea
+.section se8eb
+.section se8fa
+.section se8fb
+.section se8ga
+.section se8gb
+.section se8ha
+.section se8hb
+.section se8ia
+.section se8ib
+.section se8ja
+.section se8jb
+.section se8ka
+.section se8kb
+.section se8la
+.section se8lb
+.section se8ma
+.section se8mb
+.section se8na
+.section se8nb
+.section se8oa
+.section se8ob
+.section se8pa
+.section se8pb
+.section se8qa
+.section se8qb
+.section se8ra
+.section se8rb
+.section se8sa
+.section se8sb
+.section se8ta
+.section se8tb
+.section se8ua
+.section se8ub
+.section se8va
+.section se8vb
+.section se8wa
+.section se8wb
+.section se8xa
+.section se8xb
+.section se8ya
+.section se8yb
+.section se8za
+.section se8zb
+.section se81a
+.section se81b
+.section se82a
+.section se82b
+.section se83a
+.section se83b
+.section se84a
+.section se84b
+.section se85a
+.section se85b
+.section se86a
+.section se86b
+.section se87a
+.section se87b
+.section se88a
+.section se88b
+.section se89a
+.section se89b
+.section se80a
+.section se80b
+.section se9aa
+.section se9ab
+.section se9ba
+.section se9bb
+.section se9ca
+.section se9cb
+.section se9da
+.section se9db
+.section se9ea
+.section se9eb
+.section se9fa
+.section se9fb
+.section se9ga
+.section se9gb
+.section se9ha
+.section se9hb
+.section se9ia
+.section se9ib
+.section se9ja
+.section se9jb
+.section se9ka
+.section se9kb
+.section se9la
+.section se9lb
+.section se9ma
+.section se9mb
+.section se9na
+.section se9nb
+.section se9oa
+.section se9ob
+.section se9pa
+.section se9pb
+.section se9qa
+.section se9qb
+.section se9ra
+.section se9rb
+.section se9sa
+.section se9sb
+.section se9ta
+.section se9tb
+.section se9ua
+.section se9ub
+.section se9va
+.section se9vb
+.section se9wa
+.section se9wb
+.section se9xa
+.section se9xb
+.section se9ya
+.section se9yb
+.section se9za
+.section se9zb
+.section se91a
+.section se91b
+.section se92a
+.section se92b
+.section se93a
+.section se93b
+.section se94a
+.section se94b
+.section se95a
+.section se95b
+.section se96a
+.section se96b
+.section se97a
+.section se97b
+.section se98a
+.section se98b
+.section se99a
+.section se99b
+.section se90a
+.section se90b
+.section se0aa
+.section se0ab
+.section se0ba
+.section se0bb
+.section se0ca
+.section se0cb
+.section se0da
+.section se0db
+.section se0ea
+.section se0eb
+.section se0fa
+.section se0fb
+.section se0ga
+.section se0gb
+.section se0ha
+.section se0hb
+.section se0ia
+.section se0ib
+.section se0ja
+.section se0jb
+.section se0ka
+.section se0kb
+.section se0la
+.section se0lb
+.section se0ma
+.section se0mb
+.section se0na
+.section se0nb
+.section se0oa
+.section se0ob
+.section se0pa
+.section se0pb
+.section se0qa
+.section se0qb
+.section se0ra
+.section se0rb
+.section se0sa
+.section se0sb
+.section se0ta
+.section se0tb
+.section se0ua
+.section se0ub
+.section se0va
+.section se0vb
+.section se0wa
+.section se0wb
+.section se0xa
+.section se0xb
+.section se0ya
+.section se0yb
+.section se0za
+.section se0zb
+.section se01a
+.section se01b
+.section se02a
+.section se02b
+.section se03a
+.section se03b
+.section se04a
+.section se04b
+.section se05a
+.section se05b
+.section se06a
+.section se06b
+.section se07a
+.section se07b
+.section se08a
+.section se08b
+.section se09a
+.section se09b
+.section se00a
+.section se00b
+.section sfaaa
+.section sfaab
+.section sfaba
+.section sfabb
+.section sfaca
+.section sfacb
+.section sfada
+.section sfadb
+.section sfaea
+.section sfaeb
+.section sfafa
+.section sfafb
+.section sfaga
+.section sfagb
+.section sfaha
+.section sfahb
+.section sfaia
+.section sfaib
+.section sfaja
+.section sfajb
+.section sfaka
+.section sfakb
+.section sfala
+.section sfalb
+.section sfama
+.section sfamb
+.section sfana
+.section sfanb
+.section sfaoa
+.section sfaob
+.section sfapa
+.section sfapb
+.section sfaqa
+.section sfaqb
+.section sfara
+.section sfarb
+.section sfasa
+.section sfasb
+.section sfata
+.section sfatb
+.section sfaua
+.section sfaub
+.section sfava
+.section sfavb
+.section sfawa
+.section sfawb
+.section sfaxa
+.section sfaxb
+.section sfaya
+.section sfayb
+.section sfaza
+.section sfazb
+.section sfa1a
+.section sfa1b
+.section sfa2a
+.section sfa2b
+.section sfa3a
+.section sfa3b
+.section sfa4a
+.section sfa4b
+.section sfa5a
+.section sfa5b
+.section sfa6a
+.section sfa6b
+.section sfa7a
+.section sfa7b
+.section sfa8a
+.section sfa8b
+.section sfa9a
+.section sfa9b
+.section sfa0a
+.section sfa0b
+.section sfbaa
+.section sfbab
+.section sfbba
+.section sfbbb
+.section sfbca
+.section sfbcb
+.section sfbda
+.section sfbdb
+.section sfbea
+.section sfbeb
+.section sfbfa
+.section sfbfb
+.section sfbga
+.section sfbgb
+.section sfbha
+.section sfbhb
+.section sfbia
+.section sfbib
+.section sfbja
+.section sfbjb
+.section sfbka
+.section sfbkb
+.section sfbla
+.section sfblb
+.section sfbma
+.section sfbmb
+.section sfbna
+.section sfbnb
+.section sfboa
+.section sfbob
+.section sfbpa
+.section sfbpb
+.section sfbqa
+.section sfbqb
+.section sfbra
+.section sfbrb
+.section sfbsa
+.section sfbsb
+.section sfbta
+.section sfbtb
+.section sfbua
+.section sfbub
+.section sfbva
+.section sfbvb
+.section sfbwa
+.section sfbwb
+.section sfbxa
+.section sfbxb
+.section sfbya
+.section sfbyb
+.section sfbza
+.section sfbzb
+.section sfb1a
+.section sfb1b
+.section sfb2a
+.section sfb2b
+.section sfb3a
+.section sfb3b
+.section sfb4a
+.section sfb4b
+.section sfb5a
+.section sfb5b
+.section sfb6a
+.section sfb6b
+.section sfb7a
+.section sfb7b
+.section sfb8a
+.section sfb8b
+.section sfb9a
+.section sfb9b
+.section sfb0a
+.section sfb0b
+.section sfcaa
+.section sfcab
+.section sfcba
+.section sfcbb
+.section sfcca
+.section sfccb
+.section sfcda
+.section sfcdb
+.section sfcea
+.section sfceb
+.section sfcfa
+.section sfcfb
+.section sfcga
+.section sfcgb
+.section sfcha
+.section sfchb
+.section sfcia
+.section sfcib
+.section sfcja
+.section sfcjb
+.section sfcka
+.section sfckb
+.section sfcla
+.section sfclb
+.section sfcma
+.section sfcmb
+.section sfcna
+.section sfcnb
+.section sfcoa
+.section sfcob
+.section sfcpa
+.section sfcpb
+.section sfcqa
+.section sfcqb
+.section sfcra
+.section sfcrb
+.section sfcsa
+.section sfcsb
+.section sfcta
+.section sfctb
+.section sfcua
+.section sfcub
+.section sfcva
+.section sfcvb
+.section sfcwa
+.section sfcwb
+.section sfcxa
+.section sfcxb
+.section sfcya
+.section sfcyb
+.section sfcza
+.section sfczb
+.section sfc1a
+.section sfc1b
+.section sfc2a
+.section sfc2b
+.section sfc3a
+.section sfc3b
+.section sfc4a
+.section sfc4b
+.section sfc5a
+.section sfc5b
+.section sfc6a
+.section sfc6b
+.section sfc7a
+.section sfc7b
+.section sfc8a
+.section sfc8b
+.section sfc9a
+.section sfc9b
+.section sfc0a
+.section sfc0b
+.section sfdaa
+.section sfdab
+.section sfdba
+.section sfdbb
+.section sfdca
+.section sfdcb
+.section sfdda
+.section sfddb
+.section sfdea
+.section sfdeb
+.section sfdfa
+.section sfdfb
+.section sfdga
+.section sfdgb
+.section sfdha
+.section sfdhb
+.section sfdia
+.section sfdib
+.section sfdja
+.section sfdjb
+.section sfdka
+.section sfdkb
+.section sfdla
+.section sfdlb
+.section sfdma
+.section sfdmb
+.section sfdna
+.section sfdnb
+.section sfdoa
+.section sfdob
+.section sfdpa
+.section sfdpb
+.section sfdqa
+.section sfdqb
+.section sfdra
+.section sfdrb
+.section sfdsa
+.section sfdsb
+.section sfdta
+.section sfdtb
+.section sfdua
+.section sfdub
+.section sfdva
+.section sfdvb
+.section sfdwa
+.section sfdwb
+.section sfdxa
+.section sfdxb
+.section sfdya
+.section sfdyb
+.section sfdza
+.section sfdzb
+.section sfd1a
+.section sfd1b
+.section sfd2a
+.section sfd2b
+.section sfd3a
+.section sfd3b
+.section sfd4a
+.section sfd4b
+.section sfd5a
+.section sfd5b
+.section sfd6a
+.section sfd6b
+.section sfd7a
+.section sfd7b
+.section sfd8a
+.section sfd8b
+.section sfd9a
+.section sfd9b
+.section sfd0a
+.section sfd0b
+.section sfeaa
+.section sfeab
+.section sfeba
+.section sfebb
+.section sfeca
+.section sfecb
+.section sfeda
+.section sfedb
+.section sfeea
+.section sfeeb
+.section sfefa
+.section sfefb
+.section sfega
+.section sfegb
+.section sfeha
+.section sfehb
+.section sfeia
+.section sfeib
+.section sfeja
+.section sfejb
+.section sfeka
+.section sfekb
+.section sfela
+.section sfelb
+.section sfema
+.section sfemb
+.section sfena
+.section sfenb
+.section sfeoa
+.section sfeob
+.section sfepa
+.section sfepb
+.section sfeqa
+.section sfeqb
+.section sfera
+.section sferb
+.section sfesa
+.section sfesb
+.section sfeta
+.section sfetb
+.section sfeua
+.section sfeub
+.section sfeva
+.section sfevb
+.section sfewa
+.section sfewb
+.section sfexa
+.section sfexb
+.section sfeya
+.section sfeyb
+.section sfeza
+.section sfezb
+.section sfe1a
+.section sfe1b
+.section sfe2a
+.section sfe2b
+.section sfe3a
+.section sfe3b
+.section sfe4a
+.section sfe4b
+.section sfe5a
+.section sfe5b
+.section sfe6a
+.section sfe6b
+.section sfe7a
+.section sfe7b
+.section sfe8a
+.section sfe8b
+.section sfe9a
+.section sfe9b
+.section sfe0a
+.section sfe0b
+.section sffaa
+.section sffab
+.section sffba
+.section sffbb
+.section sffca
+.section sffcb
+.section sffda
+.section sffdb
+.section sffea
+.section sffeb
+.section sfffa
+.section sfffb
+.section sffga
+.section sffgb
+.section sffha
+.section sffhb
+.section sffia
+.section sffib
+.section sffja
+.section sffjb
+.section sffka
+.section sffkb
+.section sffla
+.section sfflb
+.section sffma
+.section sffmb
+.section sffna
+.section sffnb
+.section sffoa
+.section sffob
+.section sffpa
+.section sffpb
+.section sffqa
+.section sffqb
+.section sffra
+.section sffrb
+.section sffsa
+.section sffsb
+.section sffta
+.section sfftb
+.section sffua
+.section sffub
+.section sffva
+.section sffvb
+.section sffwa
+.section sffwb
+.section sffxa
+.section sffxb
+.section sffya
+.section sffyb
+.section sffza
+.section sffzb
+.section sff1a
+.section sff1b
+.section sff2a
+.section sff2b
+.section sff3a
+.section sff3b
+.section sff4a
+.section sff4b
+.section sff5a
+.section sff5b
+.section sff6a
+.section sff6b
+.section sff7a
+.section sff7b
+.section sff8a
+.section sff8b
+.section sff9a
+.section sff9b
+.section sff0a
+.section sff0b
+.section sfgaa
+.section sfgab
+.section sfgba
+.section sfgbb
+.section sfgca
+.section sfgcb
+.section sfgda
+.section sfgdb
+.section sfgea
+.section sfgeb
+.section sfgfa
+.section sfgfb
+.section sfgga
+.section sfggb
+.section sfgha
+.section sfghb
+.section sfgia
+.section sfgib
+.section sfgja
+.section sfgjb
+.section sfgka
+.section sfgkb
+.section sfgla
+.section sfglb
+.section sfgma
+.section sfgmb
+.section sfgna
+.section sfgnb
+.section sfgoa
+.section sfgob
+.section sfgpa
+.section sfgpb
+.section sfgqa
+.section sfgqb
+.section sfgra
+.section sfgrb
+.section sfgsa
+.section sfgsb
+.section sfgta
+.section sfgtb
+.section sfgua
+.section sfgub
+.section sfgva
+.section sfgvb
+.section sfgwa
+.section sfgwb
+.section sfgxa
+.section sfgxb
+.section sfgya
+.section sfgyb
+.section sfgza
+.section sfgzb
+.section sfg1a
+.section sfg1b
+.section sfg2a
+.section sfg2b
+.section sfg3a
+.section sfg3b
+.section sfg4a
+.section sfg4b
+.section sfg5a
+.section sfg5b
+.section sfg6a
+.section sfg6b
+.section sfg7a
+.section sfg7b
+.section sfg8a
+.section sfg8b
+.section sfg9a
+.section sfg9b
+.section sfg0a
+.section sfg0b
+.section sfhaa
+.section sfhab
+.section sfhba
+.section sfhbb
+.section sfhca
+.section sfhcb
+.section sfhda
+.section sfhdb
+.section sfhea
+.section sfheb
+.section sfhfa
+.section sfhfb
+.section sfhga
+.section sfhgb
+.section sfhha
+.section sfhhb
+.section sfhia
+.section sfhib
+.section sfhja
+.section sfhjb
+.section sfhka
+.section sfhkb
+.section sfhla
+.section sfhlb
+.section sfhma
+.section sfhmb
+.section sfhna
+.section sfhnb
+.section sfhoa
+.section sfhob
+.section sfhpa
+.section sfhpb
+.section sfhqa
+.section sfhqb
+.section sfhra
+.section sfhrb
+.section sfhsa
+.section sfhsb
+.section sfhta
+.section sfhtb
+.section sfhua
+.section sfhub
+.section sfhva
+.section sfhvb
+.section sfhwa
+.section sfhwb
+.section sfhxa
+.section sfhxb
+.section sfhya
+.section sfhyb
+.section sfhza
+.section sfhzb
+.section sfh1a
+.section sfh1b
+.section sfh2a
+.section sfh2b
+.section sfh3a
+.section sfh3b
+.section sfh4a
+.section sfh4b
+.section sfh5a
+.section sfh5b
+.section sfh6a
+.section sfh6b
+.section sfh7a
+.section sfh7b
+.section sfh8a
+.section sfh8b
+.section sfh9a
+.section sfh9b
+.section sfh0a
+.section sfh0b
+.section sfiaa
+.section sfiab
+.section sfiba
+.section sfibb
+.section sfica
+.section sficb
+.section sfida
+.section sfidb
+.section sfiea
+.section sfieb
+.section sfifa
+.section sfifb
+.section sfiga
+.section sfigb
+.section sfiha
+.section sfihb
+.section sfiia
+.section sfiib
+.section sfija
+.section sfijb
+.section sfika
+.section sfikb
+.section sfila
+.section sfilb
+.section sfima
+.section sfimb
+.section sfina
+.section sfinb
+.section sfioa
+.section sfiob
+.section sfipa
+.section sfipb
+.section sfiqa
+.section sfiqb
+.section sfira
+.section sfirb
+.section sfisa
+.section sfisb
+.section sfita
+.section sfitb
+.section sfiua
+.section sfiub
+.section sfiva
+.section sfivb
+.section sfiwa
+.section sfiwb
+.section sfixa
+.section sfixb
+.section sfiya
+.section sfiyb
+.section sfiza
+.section sfizb
+.section sfi1a
+.section sfi1b
+.section sfi2a
+.section sfi2b
+.section sfi3a
+.section sfi3b
+.section sfi4a
+.section sfi4b
+.section sfi5a
+.section sfi5b
+.section sfi6a
+.section sfi6b
+.section sfi7a
+.section sfi7b
+.section sfi8a
+.section sfi8b
+.section sfi9a
+.section sfi9b
+.section sfi0a
+.section sfi0b
+.section sfjaa
+.section sfjab
+.section sfjba
+.section sfjbb
+.section sfjca
+.section sfjcb
+.section sfjda
+.section sfjdb
+.section sfjea
+.section sfjeb
+.section sfjfa
+.section sfjfb
+.section sfjga
+.section sfjgb
+.section sfjha
+.section sfjhb
+.section sfjia
+.section sfjib
+.section sfjja
+.section sfjjb
+.section sfjka
+.section sfjkb
+.section sfjla
+.section sfjlb
+.section sfjma
+.section sfjmb
+.section sfjna
+.section sfjnb
+.section sfjoa
+.section sfjob
+.section sfjpa
+.section sfjpb
+.section sfjqa
+.section sfjqb
+.section sfjra
+.section sfjrb
+.section sfjsa
+.section sfjsb
+.section sfjta
+.section sfjtb
+.section sfjua
+.section sfjub
+.section sfjva
+.section sfjvb
+.section sfjwa
+.section sfjwb
+.section sfjxa
+.section sfjxb
+.section sfjya
+.section sfjyb
+.section sfjza
+.section sfjzb
+.section sfj1a
+.section sfj1b
+.section sfj2a
+.section sfj2b
+.section sfj3a
+.section sfj3b
+.section sfj4a
+.section sfj4b
+.section sfj5a
+.section sfj5b
+.section sfj6a
+.section sfj6b
+.section sfj7a
+.section sfj7b
+.section sfj8a
+.section sfj8b
+.section sfj9a
+.section sfj9b
+.section sfj0a
+.section sfj0b
+.section sfkaa
+.section sfkab
+.section sfkba
+.section sfkbb
+.section sfkca
+.section sfkcb
+.section sfkda
+.section sfkdb
+.section sfkea
+.section sfkeb
+.section sfkfa
+.section sfkfb
+.section sfkga
+.section sfkgb
+.section sfkha
+.section sfkhb
+.section sfkia
+.section sfkib
+.section sfkja
+.section sfkjb
+.section sfkka
+.section sfkkb
+.section sfkla
+.section sfklb
+.section sfkma
+.section sfkmb
+.section sfkna
+.section sfknb
+.section sfkoa
+.section sfkob
+.section sfkpa
+.section sfkpb
+.section sfkqa
+.section sfkqb
+.section sfkra
+.section sfkrb
+.section sfksa
+.section sfksb
+.section sfkta
+.section sfktb
+.section sfkua
+.section sfkub
+.section sfkva
+.section sfkvb
+.section sfkwa
+.section sfkwb
+.section sfkxa
+.section sfkxb
+.section sfkya
+.section sfkyb
+.section sfkza
+.section sfkzb
+.section sfk1a
+.section sfk1b
+.section sfk2a
+.section sfk2b
+.section sfk3a
+.section sfk3b
+.section sfk4a
+.section sfk4b
+.section sfk5a
+.section sfk5b
+.section sfk6a
+.section sfk6b
+.section sfk7a
+.section sfk7b
+.section sfk8a
+.section sfk8b
+.section sfk9a
+.section sfk9b
+.section sfk0a
+.section sfk0b
+.section sflaa
+.section sflab
+.section sflba
+.section sflbb
+.section sflca
+.section sflcb
+.section sflda
+.section sfldb
+.section sflea
+.section sfleb
+.section sflfa
+.section sflfb
+.section sflga
+.section sflgb
+.section sflha
+.section sflhb
+.section sflia
+.section sflib
+.section sflja
+.section sfljb
+.section sflka
+.section sflkb
+.section sflla
+.section sfllb
+.section sflma
+.section sflmb
+.section sflna
+.section sflnb
+.section sfloa
+.section sflob
+.section sflpa
+.section sflpb
+.section sflqa
+.section sflqb
+.section sflra
+.section sflrb
+.section sflsa
+.section sflsb
+.section sflta
+.section sfltb
+.section sflua
+.section sflub
+.section sflva
+.section sflvb
+.section sflwa
+.section sflwb
+.section sflxa
+.section sflxb
+.section sflya
+.section sflyb
+.section sflza
+.section sflzb
+.section sfl1a
+.section sfl1b
+.section sfl2a
+.section sfl2b
+.section sfl3a
+.section sfl3b
+.section sfl4a
+.section sfl4b
+.section sfl5a
+.section sfl5b
+.section sfl6a
+.section sfl6b
+.section sfl7a
+.section sfl7b
+.section sfl8a
+.section sfl8b
+.section sfl9a
+.section sfl9b
+.section sfl0a
+.section sfl0b
+.section sfmaa
+.section sfmab
+.section sfmba
+.section sfmbb
+.section sfmca
+.section sfmcb
+.section sfmda
+.section sfmdb
+.section sfmea
+.section sfmeb
+.section sfmfa
+.section sfmfb
+.section sfmga
+.section sfmgb
+.section sfmha
+.section sfmhb
+.section sfmia
+.section sfmib
+.section sfmja
+.section sfmjb
+.section sfmka
+.section sfmkb
+.section sfmla
+.section sfmlb
+.section sfmma
+.section sfmmb
+.section sfmna
+.section sfmnb
+.section sfmoa
+.section sfmob
+.section sfmpa
+.section sfmpb
+.section sfmqa
+.section sfmqb
+.section sfmra
+.section sfmrb
+.section sfmsa
+.section sfmsb
+.section sfmta
+.section sfmtb
+.section sfmua
+.section sfmub
+.section sfmva
+.section sfmvb
+.section sfmwa
+.section sfmwb
+.section sfmxa
+.section sfmxb
+.section sfmya
+.section sfmyb
+.section sfmza
+.section sfmzb
+.section sfm1a
+.section sfm1b
+.section sfm2a
+.section sfm2b
+.section sfm3a
+.section sfm3b
+.section sfm4a
+.section sfm4b
+.section sfm5a
+.section sfm5b
+.section sfm6a
+.section sfm6b
+.section sfm7a
+.section sfm7b
+.section sfm8a
+.section sfm8b
+.section sfm9a
+.section sfm9b
+.section sfm0a
+.section sfm0b
+.section sfnaa
+.section sfnab
+.section sfnba
+.section sfnbb
+.section sfnca
+.section sfncb
+.section sfnda
+.section sfndb
+.section sfnea
+.section sfneb
+.section sfnfa
+.section sfnfb
+.section sfnga
+.section sfngb
+.section sfnha
+.section sfnhb
+.section sfnia
+.section sfnib
+.section sfnja
+.section sfnjb
+.section sfnka
+.section sfnkb
+.section sfnla
+.section sfnlb
+.section sfnma
+.section sfnmb
+.section sfnna
+.section sfnnb
+.section sfnoa
+.section sfnob
+.section sfnpa
+.section sfnpb
+.section sfnqa
+.section sfnqb
+.section sfnra
+.section sfnrb
+.section sfnsa
+.section sfnsb
+.section sfnta
+.section sfntb
+.section sfnua
+.section sfnub
+.section sfnva
+.section sfnvb
+.section sfnwa
+.section sfnwb
+.section sfnxa
+.section sfnxb
+.section sfnya
+.section sfnyb
+.section sfnza
+.section sfnzb
+.section sfn1a
+.section sfn1b
+.section sfn2a
+.section sfn2b
+.section sfn3a
+.section sfn3b
+.section sfn4a
+.section sfn4b
+.section sfn5a
+.section sfn5b
+.section sfn6a
+.section sfn6b
+.section sfn7a
+.section sfn7b
+.section sfn8a
+.section sfn8b
+.section sfn9a
+.section sfn9b
+.section sfn0a
+.section sfn0b
+.section sfoaa
+.section sfoab
+.section sfoba
+.section sfobb
+.section sfoca
+.section sfocb
+.section sfoda
+.section sfodb
+.section sfoea
+.section sfoeb
+.section sfofa
+.section sfofb
+.section sfoga
+.section sfogb
+.section sfoha
+.section sfohb
+.section sfoia
+.section sfoib
+.section sfoja
+.section sfojb
+.section sfoka
+.section sfokb
+.section sfola
+.section sfolb
+.section sfoma
+.section sfomb
+.section sfona
+.section sfonb
+.section sfooa
+.section sfoob
+.section sfopa
+.section sfopb
+.section sfoqa
+.section sfoqb
+.section sfora
+.section sforb
+.section sfosa
+.section sfosb
+.section sfota
+.section sfotb
+.section sfoua
+.section sfoub
+.section sfova
+.section sfovb
+.section sfowa
+.section sfowb
+.section sfoxa
+.section sfoxb
+.section sfoya
+.section sfoyb
+.section sfoza
+.section sfozb
+.section sfo1a
+.section sfo1b
+.section sfo2a
+.section sfo2b
+.section sfo3a
+.section sfo3b
+.section sfo4a
+.section sfo4b
+.section sfo5a
+.section sfo5b
+.section sfo6a
+.section sfo6b
+.section sfo7a
+.section sfo7b
+.section sfo8a
+.section sfo8b
+.section sfo9a
+.section sfo9b
+.section sfo0a
+.section sfo0b
+.section sfpaa
+.section sfpab
+.section sfpba
+.section sfpbb
+.section sfpca
+.section sfpcb
+.section sfpda
+.section sfpdb
+.section sfpea
+.section sfpeb
+.section sfpfa
+.section sfpfb
+.section sfpga
+.section sfpgb
+.section sfpha
+.section sfphb
+.section sfpia
+.section sfpib
+.section sfpja
+.section sfpjb
+.section sfpka
+.section sfpkb
+.section sfpla
+.section sfplb
+.section sfpma
+.section sfpmb
+.section sfpna
+.section sfpnb
+.section sfpoa
+.section sfpob
+.section sfppa
+.section sfppb
+.section sfpqa
+.section sfpqb
+.section sfpra
+.section sfprb
+.section sfpsa
+.section sfpsb
+.section sfpta
+.section sfptb
+.section sfpua
+.section sfpub
+.section sfpva
+.section sfpvb
+.section sfpwa
+.section sfpwb
+.section sfpxa
+.section sfpxb
+.section sfpya
+.section sfpyb
+.section sfpza
+.section sfpzb
+.section sfp1a
+.section sfp1b
+.section sfp2a
+.section sfp2b
+.section sfp3a
+.section sfp3b
+.section sfp4a
+.section sfp4b
+.section sfp5a
+.section sfp5b
+.section sfp6a
+.section sfp6b
+.section sfp7a
+.section sfp7b
+.section sfp8a
+.section sfp8b
+.section sfp9a
+.section sfp9b
+.section sfp0a
+.section sfp0b
+.section sfqaa
+.section sfqab
+.section sfqba
+.section sfqbb
+.section sfqca
+.section sfqcb
+.section sfqda
+.section sfqdb
+.section sfqea
+.section sfqeb
+.section sfqfa
+.section sfqfb
+.section sfqga
+.section sfqgb
+.section sfqha
+.section sfqhb
+.section sfqia
+.section sfqib
+.section sfqja
+.section sfqjb
+.section sfqka
+.section sfqkb
+.section sfqla
+.section sfqlb
+.section sfqma
+.section sfqmb
+.section sfqna
+.section sfqnb
+.section sfqoa
+.section sfqob
+.section sfqpa
+.section sfqpb
+.section sfqqa
+.section sfqqb
+.section sfqra
+.section sfqrb
+.section sfqsa
+.section sfqsb
+.section sfqta
+.section sfqtb
+.section sfqua
+.section sfqub
+.section sfqva
+.section sfqvb
+.section sfqwa
+.section sfqwb
+.section sfqxa
+.section sfqxb
+.section sfqya
+.section sfqyb
+.section sfqza
+.section sfqzb
+.section sfq1a
+.section sfq1b
+.section sfq2a
+.section sfq2b
+.section sfq3a
+.section sfq3b
+.section sfq4a
+.section sfq4b
+.section sfq5a
+.section sfq5b
+.section sfq6a
+.section sfq6b
+.section sfq7a
+.section sfq7b
+.section sfq8a
+.section sfq8b
+.section sfq9a
+.section sfq9b
+.section sfq0a
+.section sfq0b
+.section sfraa
+.section sfrab
+.section sfrba
+.section sfrbb
+.section sfrca
+.section sfrcb
+.section sfrda
+.section sfrdb
+.section sfrea
+.section sfreb
+.section sfrfa
+.section sfrfb
+.section sfrga
+.section sfrgb
+.section sfrha
+.section sfrhb
+.section sfria
+.section sfrib
+.section sfrja
+.section sfrjb
+.section sfrka
+.section sfrkb
+.section sfrla
+.section sfrlb
+.section sfrma
+.section sfrmb
+.section sfrna
+.section sfrnb
+.section sfroa
+.section sfrob
+.section sfrpa
+.section sfrpb
+.section sfrqa
+.section sfrqb
+.section sfrra
+.section sfrrb
+.section sfrsa
+.section sfrsb
+.section sfrta
+.section sfrtb
+.section sfrua
+.section sfrub
+.section sfrva
+.section sfrvb
+.section sfrwa
+.section sfrwb
+.section sfrxa
+.section sfrxb
+.section sfrya
+.section sfryb
+.section sfrza
+.section sfrzb
+.section sfr1a
+.section sfr1b
+.section sfr2a
+.section sfr2b
+.section sfr3a
+.section sfr3b
+.section sfr4a
+.section sfr4b
+.section sfr5a
+.section sfr5b
+.section sfr6a
+.section sfr6b
+.section sfr7a
+.section sfr7b
+.section sfr8a
+.section sfr8b
+.section sfr9a
+.section sfr9b
+.section sfr0a
+.section sfr0b
+.section sfsaa
+.section sfsab
+.section sfsba
+.section sfsbb
+.section sfsca
+.section sfscb
+.section sfsda
+.section sfsdb
+.section sfsea
+.section sfseb
+.section sfsfa
+.section sfsfb
+.section sfsga
+.section sfsgb
+.section sfsha
+.section sfshb
+.section sfsia
+.section sfsib
+.section sfsja
+.section sfsjb
+.section sfska
+.section sfskb
+.section sfsla
+.section sfslb
+.section sfsma
+.section sfsmb
+.section sfsna
+.section sfsnb
+.section sfsoa
+.section sfsob
+.section sfspa
+.section sfspb
+.section sfsqa
+.section sfsqb
+.section sfsra
+.section sfsrb
+.section sfssa
+.section sfssb
+.section sfsta
+.section sfstb
+.section sfsua
+.section sfsub
+.section sfsva
+.section sfsvb
+.section sfswa
+.section sfswb
+.section sfsxa
+.section sfsxb
+.section sfsya
+.section sfsyb
+.section sfsza
+.section sfszb
+.section sfs1a
+.section sfs1b
+.section sfs2a
+.section sfs2b
+.section sfs3a
+.section sfs3b
+.section sfs4a
+.section sfs4b
+.section sfs5a
+.section sfs5b
+.section sfs6a
+.section sfs6b
+.section sfs7a
+.section sfs7b
+.section sfs8a
+.section sfs8b
+.section sfs9a
+.section sfs9b
+.section sfs0a
+.section sfs0b
+.section sftaa
+.section sftab
+.section sftba
+.section sftbb
+.section sftca
+.section sftcb
+.section sftda
+.section sftdb
+.section sftea
+.section sfteb
+.section sftfa
+.section sftfb
+.section sftga
+.section sftgb
+.section sftha
+.section sfthb
+.section sftia
+.section sftib
+.section sftja
+.section sftjb
+.section sftka
+.section sftkb
+.section sftla
+.section sftlb
+.section sftma
+.section sftmb
+.section sftna
+.section sftnb
+.section sftoa
+.section sftob
+.section sftpa
+.section sftpb
+.section sftqa
+.section sftqb
+.section sftra
+.section sftrb
+.section sftsa
+.section sftsb
+.section sftta
+.section sfttb
+.section sftua
+.section sftub
+.section sftva
+.section sftvb
+.section sftwa
+.section sftwb
+.section sftxa
+.section sftxb
+.section sftya
+.section sftyb
+.section sftza
+.section sftzb
+.section sft1a
+.section sft1b
+.section sft2a
+.section sft2b
+.section sft3a
+.section sft3b
+.section sft4a
+.section sft4b
+.section sft5a
+.section sft5b
+.section sft6a
+.section sft6b
+.section sft7a
+.section sft7b
+.section sft8a
+.section sft8b
+.section sft9a
+.section sft9b
+.section sft0a
+.section sft0b
+.section sfuaa
+.section sfuab
+.section sfuba
+.section sfubb
+.section sfuca
+.section sfucb
+.section sfuda
+.section sfudb
+.section sfuea
+.section sfueb
+.section sfufa
+.section sfufb
+.section sfuga
+.section sfugb
+.section sfuha
+.section sfuhb
+.section sfuia
+.section sfuib
+.section sfuja
+.section sfujb
+.section sfuka
+.section sfukb
+.section sfula
+.section sfulb
+.section sfuma
+.section sfumb
+.section sfuna
+.section sfunb
+.section sfuoa
+.section sfuob
+.section sfupa
+.section sfupb
+.section sfuqa
+.section sfuqb
+.section sfura
+.section sfurb
+.section sfusa
+.section sfusb
+.section sfuta
+.section sfutb
+.section sfuua
+.section sfuub
+.section sfuva
+.section sfuvb
+.section sfuwa
+.section sfuwb
+.section sfuxa
+.section sfuxb
+.section sfuya
+.section sfuyb
+.section sfuza
+.section sfuzb
+.section sfu1a
+.section sfu1b
+.section sfu2a
+.section sfu2b
+.section sfu3a
+.section sfu3b
+.section sfu4a
+.section sfu4b
+.section sfu5a
+.section sfu5b
+.section sfu6a
+.section sfu6b
+.section sfu7a
+.section sfu7b
+.section sfu8a
+.section sfu8b
+.section sfu9a
+.section sfu9b
+.section sfu0a
+.section sfu0b
+.section sfvaa
+.section sfvab
+.section sfvba
+.section sfvbb
+.section sfvca
+.section sfvcb
+.section sfvda
+.section sfvdb
+.section sfvea
+.section sfveb
+.section sfvfa
+.section sfvfb
+.section sfvga
+.section sfvgb
+.section sfvha
+.section sfvhb
+.section sfvia
+.section sfvib
+.section sfvja
+.section sfvjb
+.section sfvka
+.section sfvkb
+.section sfvla
+.section sfvlb
+.section sfvma
+.section sfvmb
+.section sfvna
+.section sfvnb
+.section sfvoa
+.section sfvob
+.section sfvpa
+.section sfvpb
+.section sfvqa
+.section sfvqb
+.section sfvra
+.section sfvrb
+.section sfvsa
+.section sfvsb
+.section sfvta
+.section sfvtb
+.section sfvua
+.section sfvub
+.section sfvva
+.section sfvvb
+.section sfvwa
+.section sfvwb
+.section sfvxa
+.section sfvxb
+.section sfvya
+.section sfvyb
+.section sfvza
+.section sfvzb
+.section sfv1a
+.section sfv1b
+.section sfv2a
+.section sfv2b
+.section sfv3a
+.section sfv3b
+.section sfv4a
+.section sfv4b
+.section sfv5a
+.section sfv5b
+.section sfv6a
+.section sfv6b
+.section sfv7a
+.section sfv7b
+.section sfv8a
+.section sfv8b
+.section sfv9a
+.section sfv9b
+.section sfv0a
+.section sfv0b
+.section sfwaa
+.section sfwab
+.section sfwba
+.section sfwbb
+.section sfwca
+.section sfwcb
+.section sfwda
+.section sfwdb
+.section sfwea
+.section sfweb
+.section sfwfa
+.section sfwfb
+.section sfwga
+.section sfwgb
+.section sfwha
+.section sfwhb
+.section sfwia
+.section sfwib
+.section sfwja
+.section sfwjb
+.section sfwka
+.section sfwkb
+.section sfwla
+.section sfwlb
+.section sfwma
+.section sfwmb
+.section sfwna
+.section sfwnb
+.section sfwoa
+.section sfwob
+.section sfwpa
+.section sfwpb
+.section sfwqa
+.section sfwqb
+.section sfwra
+.section sfwrb
+.section sfwsa
+.section sfwsb
+.section sfwta
+.section sfwtb
+.section sfwua
+.section sfwub
+.section sfwva
+.section sfwvb
+.section sfwwa
+.section sfwwb
+.section sfwxa
+.section sfwxb
+.section sfwya
+.section sfwyb
+.section sfwza
+.section sfwzb
+.section sfw1a
+.section sfw1b
+.section sfw2a
+.section sfw2b
+.section sfw3a
+.section sfw3b
+.section sfw4a
+.section sfw4b
+.section sfw5a
+.section sfw5b
+.section sfw6a
+.section sfw6b
+.section sfw7a
+.section sfw7b
+.section sfw8a
+.section sfw8b
+.section sfw9a
+.section sfw9b
+.section sfw0a
+.section sfw0b
+.section sfxaa
+.section sfxab
+.section sfxba
+.section sfxbb
+.section sfxca
+.section sfxcb
+.section sfxda
+.section sfxdb
+.section sfxea
+.section sfxeb
+.section sfxfa
+.section sfxfb
+.section sfxga
+.section sfxgb
+.section sfxha
+.section sfxhb
+.section sfxia
+.section sfxib
+.section sfxja
+.section sfxjb
+.section sfxka
+.section sfxkb
+.section sfxla
+.section sfxlb
+.section sfxma
+.section sfxmb
+.section sfxna
+.section sfxnb
+.section sfxoa
+.section sfxob
+.section sfxpa
+.section sfxpb
+.section sfxqa
+.section sfxqb
+.section sfxra
+.section sfxrb
+.section sfxsa
+.section sfxsb
+.section sfxta
+.section sfxtb
+.section sfxua
+.section sfxub
+.section sfxva
+.section sfxvb
+.section sfxwa
+.section sfxwb
+.section sfxxa
+.section sfxxb
+.section sfxya
+.section sfxyb
+.section sfxza
+.section sfxzb
+.section sfx1a
+.section sfx1b
+.section sfx2a
+.section sfx2b
+.section sfx3a
+.section sfx3b
+.section sfx4a
+.section sfx4b
+.section sfx5a
+.section sfx5b
+.section sfx6a
+.section sfx6b
+.section sfx7a
+.section sfx7b
+.section sfx8a
+.section sfx8b
+.section sfx9a
+.section sfx9b
+.section sfx0a
+.section sfx0b
+.section sfyaa
+.section sfyab
+.section sfyba
+.section sfybb
+.section sfyca
+.section sfycb
+.section sfyda
+.section sfydb
+.section sfyea
+.section sfyeb
+.section sfyfa
+.section sfyfb
+.section sfyga
+.section sfygb
+.section sfyha
+.section sfyhb
+.section sfyia
+.section sfyib
+.section sfyja
+.section sfyjb
+.section sfyka
+.section sfykb
+.section sfyla
+.section sfylb
+.section sfyma
+.section sfymb
+.section sfyna
+.section sfynb
+.section sfyoa
+.section sfyob
+.section sfypa
+.section sfypb
+.section sfyqa
+.section sfyqb
+.section sfyra
+.section sfyrb
+.section sfysa
+.section sfysb
+.section sfyta
+.section sfytb
+.section sfyua
+.section sfyub
+.section sfyva
+.section sfyvb
+.section sfywa
+.section sfywb
+.section sfyxa
+.section sfyxb
+.section sfyya
+.section sfyyb
+.section sfyza
+.section sfyzb
+.section sfy1a
+.section sfy1b
+.section sfy2a
+.section sfy2b
+.section sfy3a
+.section sfy3b
+.section sfy4a
+.section sfy4b
+.section sfy5a
+.section sfy5b
+.section sfy6a
+.section sfy6b
+.section sfy7a
+.section sfy7b
+.section sfy8a
+.section sfy8b
+.section sfy9a
+.section sfy9b
+.section sfy0a
+.section sfy0b
+.section sfzaa
+.section sfzab
+.section sfzba
+.section sfzbb
+.section sfzca
+.section sfzcb
+.section sfzda
+.section sfzdb
+.section sfzea
+.section sfzeb
+.section sfzfa
+.section sfzfb
+.section sfzga
+.section sfzgb
+.section sfzha
+.section sfzhb
+.section sfzia
+.section sfzib
+.section sfzja
+.section sfzjb
+.section sfzka
+.section sfzkb
+.section sfzla
+.section sfzlb
+.section sfzma
+.section sfzmb
+.section sfzna
+.section sfznb
+.section sfzoa
+.section sfzob
+.section sfzpa
+.section sfzpb
+.section sfzqa
+.section sfzqb
+.section sfzra
+.section sfzrb
+.section sfzsa
+.section sfzsb
+.section sfzta
+.section sfztb
+.section sfzua
+.section sfzub
+.section sfzva
+.section sfzvb
+.section sfzwa
+.section sfzwb
+.section sfzxa
+.section sfzxb
+.section sfzya
+.section sfzyb
+.section sfzza
+.section sfzzb
+.section sfz1a
+.section sfz1b
+.section sfz2a
+.section sfz2b
+.section sfz3a
+.section sfz3b
+.section sfz4a
+.section sfz4b
+.section sfz5a
+.section sfz5b
+.section sfz6a
+.section sfz6b
+.section sfz7a
+.section sfz7b
+.section sfz8a
+.section sfz8b
+.section sfz9a
+.section sfz9b
+.section sfz0a
+.section sfz0b
+.section sf1aa
+.section sf1ab
+.section sf1ba
+.section sf1bb
+.section sf1ca
+.section sf1cb
+.section sf1da
+.section sf1db
+.section sf1ea
+.section sf1eb
+.section sf1fa
+.section sf1fb
+.section sf1ga
+.section sf1gb
+.section sf1ha
+.section sf1hb
+.section sf1ia
+.section sf1ib
+.section sf1ja
+.section sf1jb
+.section sf1ka
+.section sf1kb
+.section sf1la
+.section sf1lb
+.section sf1ma
+.section sf1mb
+.section sf1na
+.section sf1nb
+.section sf1oa
+.section sf1ob
+.section sf1pa
+.section sf1pb
+.section sf1qa
+.section sf1qb
+.section sf1ra
+.section sf1rb
+.section sf1sa
+.section sf1sb
+.section sf1ta
+.section sf1tb
+.section sf1ua
+.section sf1ub
+.section sf1va
+.section sf1vb
+.section sf1wa
+.section sf1wb
+.section sf1xa
+.section sf1xb
+.section sf1ya
+.section sf1yb
+.section sf1za
+.section sf1zb
+.section sf11a
+.section sf11b
+.section sf12a
+.section sf12b
+.section sf13a
+.section sf13b
+.section sf14a
+.section sf14b
+.section sf15a
+.section sf15b
+.section sf16a
+.section sf16b
+.section sf17a
+.section sf17b
+.section sf18a
+.section sf18b
+.section sf19a
+.section sf19b
+.section sf10a
+.section sf10b
+.section sf2aa
+.section sf2ab
+.section sf2ba
+.section sf2bb
+.section sf2ca
+.section sf2cb
+.section sf2da
+.section sf2db
+.section sf2ea
+.section sf2eb
+.section sf2fa
+.section sf2fb
+.section sf2ga
+.section sf2gb
+.section sf2ha
+.section sf2hb
+.section sf2ia
+.section sf2ib
+.section sf2ja
+.section sf2jb
+.section sf2ka
+.section sf2kb
+.section sf2la
+.section sf2lb
+.section sf2ma
+.section sf2mb
+.section sf2na
+.section sf2nb
+.section sf2oa
+.section sf2ob
+.section sf2pa
+.section sf2pb
+.section sf2qa
+.section sf2qb
+.section sf2ra
+.section sf2rb
+.section sf2sa
+.section sf2sb
+.section sf2ta
+.section sf2tb
+.section sf2ua
+.section sf2ub
+.section sf2va
+.section sf2vb
+.section sf2wa
+.section sf2wb
+.section sf2xa
+.section sf2xb
+.section sf2ya
+.section sf2yb
+.section sf2za
+.section sf2zb
+.section sf21a
+.section sf21b
+.section sf22a
+.section sf22b
+.section sf23a
+.section sf23b
+.section sf24a
+.section sf24b
+.section sf25a
+.section sf25b
+.section sf26a
+.section sf26b
+.section sf27a
+.section sf27b
+.section sf28a
+.section sf28b
+.section sf29a
+.section sf29b
+.section sf20a
+.section sf20b
+.section sf3aa
+.section sf3ab
+.section sf3ba
+.section sf3bb
+.section sf3ca
+.section sf3cb
+.section sf3da
+.section sf3db
+.section sf3ea
+.section sf3eb
+.section sf3fa
+.section sf3fb
+.section sf3ga
+.section sf3gb
+.section sf3ha
+.section sf3hb
+.section sf3ia
+.section sf3ib
+.section sf3ja
+.section sf3jb
+.section sf3ka
+.section sf3kb
+.section sf3la
+.section sf3lb
+.section sf3ma
+.section sf3mb
+.section sf3na
+.section sf3nb
+.section sf3oa
+.section sf3ob
+.section sf3pa
+.section sf3pb
+.section sf3qa
+.section sf3qb
+.section sf3ra
+.section sf3rb
+.section sf3sa
+.section sf3sb
+.section sf3ta
+.section sf3tb
+.section sf3ua
+.section sf3ub
+.section sf3va
+.section sf3vb
+.section sf3wa
+.section sf3wb
+.section sf3xa
+.section sf3xb
+.section sf3ya
+.section sf3yb
+.section sf3za
+.section sf3zb
+.section sf31a
+.section sf31b
+.section sf32a
+.section sf32b
+.section sf33a
+.section sf33b
+.section sf34a
+.section sf34b
+.section sf35a
+.section sf35b
+.section sf36a
+.section sf36b
+.section sf37a
+.section sf37b
+.section sf38a
+.section sf38b
+.section sf39a
+.section sf39b
+.section sf30a
+.section sf30b
+.section sf4aa
+.section sf4ab
+.section sf4ba
+.section sf4bb
+.section sf4ca
+.section sf4cb
+.section sf4da
+.section sf4db
+.section sf4ea
+.section sf4eb
+.section sf4fa
+.section sf4fb
+.section sf4ga
+.section sf4gb
+.section sf4ha
+.section sf4hb
+.section sf4ia
+.section sf4ib
+.section sf4ja
+.section sf4jb
+.section sf4ka
+.section sf4kb
+.section sf4la
+.section sf4lb
+.section sf4ma
+.section sf4mb
+.section sf4na
+.section sf4nb
+.section sf4oa
+.section sf4ob
+.section sf4pa
+.section sf4pb
+.section sf4qa
+.section sf4qb
+.section sf4ra
+.section sf4rb
+.section sf4sa
+.section sf4sb
+.section sf4ta
+.section sf4tb
+.section sf4ua
+.section sf4ub
+.section sf4va
+.section sf4vb
+.section sf4wa
+.section sf4wb
+.section sf4xa
+.section sf4xb
+.section sf4ya
+.section sf4yb
+.section sf4za
+.section sf4zb
+.section sf41a
+.section sf41b
+.section sf42a
+.section sf42b
+.section sf43a
+.section sf43b
+.section sf44a
+.section sf44b
+.section sf45a
+.section sf45b
+.section sf46a
+.section sf46b
+.section sf47a
+.section sf47b
+.section sf48a
+.section sf48b
+.section sf49a
+.section sf49b
+.section sf40a
+.section sf40b
+.section sf5aa
+.section sf5ab
+.section sf5ba
+.section sf5bb
+.section sf5ca
+.section sf5cb
+.section sf5da
+.section sf5db
+.section sf5ea
+.section sf5eb
+.section sf5fa
+.section sf5fb
+.section sf5ga
+.section sf5gb
+.section sf5ha
+.section sf5hb
+.section sf5ia
+.section sf5ib
+.section sf5ja
+.section sf5jb
+.section sf5ka
+.section sf5kb
+.section sf5la
+.section sf5lb
+.section sf5ma
+.section sf5mb
+.section sf5na
+.section sf5nb
+.section sf5oa
+.section sf5ob
+.section sf5pa
+.section sf5pb
+.section sf5qa
+.section sf5qb
+.section sf5ra
+.section sf5rb
+.section sf5sa
+.section sf5sb
+.section sf5ta
+.section sf5tb
+.section sf5ua
+.section sf5ub
+.section sf5va
+.section sf5vb
+.section sf5wa
+.section sf5wb
+.section sf5xa
+.section sf5xb
+.section sf5ya
+.section sf5yb
+.section sf5za
+.section sf5zb
+.section sf51a
+.section sf51b
+.section sf52a
+.section sf52b
+.section sf53a
+.section sf53b
+.section sf54a
+.section sf54b
+.section sf55a
+.section sf55b
+.section sf56a
+.section sf56b
+.section sf57a
+.section sf57b
+.section sf58a
+.section sf58b
+.section sf59a
+.section sf59b
+.section sf50a
+.section sf50b
+.section sf6aa
+.section sf6ab
+.section sf6ba
+.section sf6bb
+.section sf6ca
+.section sf6cb
+.section sf6da
+.section sf6db
+.section sf6ea
+.section sf6eb
+.section sf6fa
+.section sf6fb
+.section sf6ga
+.section sf6gb
+.section sf6ha
+.section sf6hb
+.section sf6ia
+.section sf6ib
+.section sf6ja
+.section sf6jb
+.section sf6ka
+.section sf6kb
+.section sf6la
+.section sf6lb
+.section sf6ma
+.section sf6mb
+.section sf6na
+.section sf6nb
+.section sf6oa
+.section sf6ob
+.section sf6pa
+.section sf6pb
+.section sf6qa
+.section sf6qb
+.section sf6ra
+.section sf6rb
+.section sf6sa
+.section sf6sb
+.section sf6ta
+.section sf6tb
+.section sf6ua
+.section sf6ub
+.section sf6va
+.section sf6vb
+.section sf6wa
+.section sf6wb
+.section sf6xa
+.section sf6xb
+.section sf6ya
+.section sf6yb
+.section sf6za
+.section sf6zb
+.section sf61a
+.section sf61b
+.section sf62a
+.section sf62b
+.section sf63a
+.section sf63b
+.section sf64a
+.section sf64b
+.section sf65a
+.section sf65b
+.section sf66a
+.section sf66b
+.section sf67a
+.section sf67b
+.section sf68a
+.section sf68b
+.section sf69a
+.section sf69b
+.section sf60a
+.section sf60b
+.section sf7aa
+.section sf7ab
+.section sf7ba
+.section sf7bb
+.section sf7ca
+.section sf7cb
+.section sf7da
+.section sf7db
+.section sf7ea
+.section sf7eb
+.section sf7fa
+.section sf7fb
+.section sf7ga
+.section sf7gb
+.section sf7ha
+.section sf7hb
+.section sf7ia
+.section sf7ib
+.section sf7ja
+.section sf7jb
+.section sf7ka
+.section sf7kb
+.section sf7la
+.section sf7lb
+.section sf7ma
+.section sf7mb
+.section sf7na
+.section sf7nb
+.section sf7oa
+.section sf7ob
+.section sf7pa
+.section sf7pb
+.section sf7qa
+.section sf7qb
+.section sf7ra
+.section sf7rb
+.section sf7sa
+.section sf7sb
+.section sf7ta
+.section sf7tb
+.section sf7ua
+.section sf7ub
+.section sf7va
+.section sf7vb
+.section sf7wa
+.section sf7wb
+.section sf7xa
+.section sf7xb
+.section sf7ya
+.section sf7yb
+.section sf7za
+.section sf7zb
+.section sf71a
+.section sf71b
+.section sf72a
+.section sf72b
+.section sf73a
+.section sf73b
+.section sf74a
+.section sf74b
+.section sf75a
+.section sf75b
+.section sf76a
+.section sf76b
+.section sf77a
+.section sf77b
+.section sf78a
+.section sf78b
+.section sf79a
+.section sf79b
+.section sf70a
+.section sf70b
+.section sf8aa
+.section sf8ab
+.section sf8ba
+.section sf8bb
+.section sf8ca
+.section sf8cb
+.section sf8da
+.section sf8db
+.section sf8ea
+.section sf8eb
+.section sf8fa
+.section sf8fb
+.section sf8ga
+.section sf8gb
+.section sf8ha
+.section sf8hb
+.section sf8ia
+.section sf8ib
+.section sf8ja
+.section sf8jb
+.section sf8ka
+.section sf8kb
+.section sf8la
+.section sf8lb
+.section sf8ma
+.section sf8mb
+.section sf8na
+.section sf8nb
+.section sf8oa
+.section sf8ob
+.section sf8pa
+.section sf8pb
+.section sf8qa
+.section sf8qb
+.section sf8ra
+.section sf8rb
+.section sf8sa
+.section sf8sb
+.section sf8ta
+.section sf8tb
+.section sf8ua
+.section sf8ub
+.section sf8va
+.section sf8vb
+.section sf8wa
+.section sf8wb
+.section sf8xa
+.section sf8xb
+.section sf8ya
+.section sf8yb
+.section sf8za
+.section sf8zb
+.section sf81a
+.section sf81b
+.section sf82a
+.section sf82b
+.section sf83a
+.section sf83b
+.section sf84a
+.section sf84b
+.section sf85a
+.section sf85b
+.section sf86a
+.section sf86b
+.section sf87a
+.section sf87b
+.section sf88a
+.section sf88b
+.section sf89a
+.section sf89b
+.section sf80a
+.section sf80b
+.section sf9aa
+.section sf9ab
+.section sf9ba
+.section sf9bb
+.section sf9ca
+.section sf9cb
+.section sf9da
+.section sf9db
+.section sf9ea
+.section sf9eb
+.section sf9fa
+.section sf9fb
+.section sf9ga
+.section sf9gb
+.section sf9ha
+.section sf9hb
+.section sf9ia
+.section sf9ib
+.section sf9ja
+.section sf9jb
+.section sf9ka
+.section sf9kb
+.section sf9la
+.section sf9lb
+.section sf9ma
+.section sf9mb
+.section sf9na
+.section sf9nb
+.section sf9oa
+.section sf9ob
+.section sf9pa
+.section sf9pb
+.section sf9qa
+.section sf9qb
+.section sf9ra
+.section sf9rb
+.section sf9sa
+.section sf9sb
+.section sf9ta
+.section sf9tb
+.section sf9ua
+.section sf9ub
+.section sf9va
+.section sf9vb
+.section sf9wa
+.section sf9wb
+.section sf9xa
+.section sf9xb
+.section sf9ya
+.section sf9yb
+.section sf9za
+.section sf9zb
+.section sf91a
+.section sf91b
+.section sf92a
+.section sf92b
+.section sf93a
+.section sf93b
+.section sf94a
+.section sf94b
+.section sf95a
+.section sf95b
+.section sf96a
+.section sf96b
+.section sf97a
+.section sf97b
+.section sf98a
+.section sf98b
+.section sf99a
+.section sf99b
+.section sf90a
+.section sf90b
+.section sf0aa
+.section sf0ab
+.section sf0ba
+.section sf0bb
+.section sf0ca
+.section sf0cb
+.section sf0da
+.section sf0db
+.section sf0ea
+.section sf0eb
+.section sf0fa
+.section sf0fb
+.section sf0ga
+.section sf0gb
+.section sf0ha
+.section sf0hb
+.section sf0ia
+.section sf0ib
+.section sf0ja
+.section sf0jb
+.section sf0ka
+.section sf0kb
+.section sf0la
+.section sf0lb
+.section sf0ma
+.section sf0mb
+.section sf0na
+.section sf0nb
+.section sf0oa
+.section sf0ob
+.section sf0pa
+.section sf0pb
+.section sf0qa
+.section sf0qb
+.section sf0ra
+.section sf0rb
+.section sf0sa
+.section sf0sb
+.section sf0ta
+.section sf0tb
+.section sf0ua
+.section sf0ub
+.section sf0va
+.section sf0vb
+.section sf0wa
+.section sf0wb
+.section sf0xa
+.section sf0xb
+.section sf0ya
+.section sf0yb
+.section sf0za
+.section sf0zb
+.section sf01a
+.section sf01b
+.section sf02a
+.section sf02b
+.section sf03a
+.section sf03b
+.section sf04a
+.section sf04b
+.section sf05a
+.section sf05b
+.section sf06a
+.section sf06b
+.section sf07a
+.section sf07b
+.section sf08a
+.section sf08b
+.section sf09a
+.section sf09b
+.section sf00a
+.section sf00b
+.section sgaaa
+.section sgaab
+.section sgaba
+.section sgabb
+.section sgaca
+.section sgacb
+.section sgada
+.section sgadb
+.section sgaea
+.section sgaeb
+.section sgafa
+.section sgafb
+.section sgaga
+.section sgagb
+.section sgaha
+.section sgahb
+.section sgaia
+.section sgaib
+.section sgaja
+.section sgajb
+.section sgaka
+.section sgakb
+.section sgala
+.section sgalb
+.section sgama
+.section sgamb
+.section sgana
+.section sganb
+.section sgaoa
+.section sgaob
+.section sgapa
+.section sgapb
+.section sgaqa
+.section sgaqb
+.section sgara
+.section sgarb
+.section sgasa
+.section sgasb
+.section sgata
+.section sgatb
+.section sgaua
+.section sgaub
+.section sgava
+.section sgavb
+.section sgawa
+.section sgawb
+.section sgaxa
+.section sgaxb
+.section sgaya
+.section sgayb
+.section sgaza
+.section sgazb
+.section sga1a
+.section sga1b
+.section sga2a
+.section sga2b
+.section sga3a
+.section sga3b
+.section sga4a
+.section sga4b
+.section sga5a
+.section sga5b
+.section sga6a
+.section sga6b
+.section sga7a
+.section sga7b
+.section sga8a
+.section sga8b
+.section sga9a
+.section sga9b
+.section sga0a
+.section sga0b
+.section sgbaa
+.section sgbab
+.section sgbba
+.section sgbbb
+.section sgbca
+.section sgbcb
+.section sgbda
+.section sgbdb
+.section sgbea
+.section sgbeb
+.section sgbfa
+.section sgbfb
+.section sgbga
+.section sgbgb
+.section sgbha
+.section sgbhb
+.section sgbia
+.section sgbib
+.section sgbja
+.section sgbjb
+.section sgbka
+.section sgbkb
+.section sgbla
+.section sgblb
+.section sgbma
+.section sgbmb
+.section sgbna
+.section sgbnb
+.section sgboa
+.section sgbob
+.section sgbpa
+.section sgbpb
+.section sgbqa
+.section sgbqb
+.section sgbra
+.section sgbrb
+.section sgbsa
+.section sgbsb
+.section sgbta
+.section sgbtb
+.section sgbua
+.section sgbub
+.section sgbva
+.section sgbvb
+.section sgbwa
+.section sgbwb
+.section sgbxa
+.section sgbxb
+.section sgbya
+.section sgbyb
+.section sgbza
+.section sgbzb
+.section sgb1a
+.section sgb1b
+.section sgb2a
+.section sgb2b
+.section sgb3a
+.section sgb3b
+.section sgb4a
+.section sgb4b
+.section sgb5a
+.section sgb5b
+.section sgb6a
+.section sgb6b
+.section sgb7a
+.section sgb7b
+.section sgb8a
+.section sgb8b
+.section sgb9a
+.section sgb9b
+.section sgb0a
+.section sgb0b
+.section sgcaa
+.section sgcab
+.section sgcba
+.section sgcbb
+.section sgcca
+.section sgccb
+.section sgcda
+.section sgcdb
+.section sgcea
+.section sgceb
+.section sgcfa
+.section sgcfb
+.section sgcga
+.section sgcgb
+.section sgcha
+.section sgchb
+.section sgcia
+.section sgcib
+.section sgcja
+.section sgcjb
+.section sgcka
+.section sgckb
+.section sgcla
+.section sgclb
+.section sgcma
+.section sgcmb
+.section sgcna
+.section sgcnb
+.section sgcoa
+.section sgcob
+.section sgcpa
+.section sgcpb
+.section sgcqa
+.section sgcqb
+.section sgcra
+.section sgcrb
+.section sgcsa
+.section sgcsb
+.section sgcta
+.section sgctb
+.section sgcua
+.section sgcub
+.section sgcva
+.section sgcvb
+.section sgcwa
+.section sgcwb
+.section sgcxa
+.section sgcxb
+.section sgcya
+.section sgcyb
+.section sgcza
+.section sgczb
+.section sgc1a
+.section sgc1b
+.section sgc2a
+.section sgc2b
+.section sgc3a
+.section sgc3b
+.section sgc4a
+.section sgc4b
+.section sgc5a
+.section sgc5b
+.section sgc6a
+.section sgc6b
+.section sgc7a
+.section sgc7b
+.section sgc8a
+.section sgc8b
+.section sgc9a
+.section sgc9b
+.section sgc0a
+.section sgc0b
+.section sgdaa
+.section sgdab
+.section sgdba
+.section sgdbb
+.section sgdca
+.section sgdcb
+.section sgdda
+.section sgddb
+.section sgdea
+.section sgdeb
+.section sgdfa
+.section sgdfb
+.section sgdga
+.section sgdgb
+.section sgdha
+.section sgdhb
+.section sgdia
+.section sgdib
+.section sgdja
+.section sgdjb
+.section sgdka
+.section sgdkb
+.section sgdla
+.section sgdlb
+.section sgdma
+.section sgdmb
+.section sgdna
+.section sgdnb
+.section sgdoa
+.section sgdob
+.section sgdpa
+.section sgdpb
+.section sgdqa
+.section sgdqb
+.section sgdra
+.section sgdrb
+.section sgdsa
+.section sgdsb
+.section sgdta
+.section sgdtb
+.section sgdua
+.section sgdub
+.section sgdva
+.section sgdvb
+.section sgdwa
+.section sgdwb
+.section sgdxa
+.section sgdxb
+.section sgdya
+.section sgdyb
+.section sgdza
+.section sgdzb
+.section sgd1a
+.section sgd1b
+.section sgd2a
+.section sgd2b
+.section sgd3a
+.section sgd3b
+.section sgd4a
+.section sgd4b
+.section sgd5a
+.section sgd5b
+.section sgd6a
+.section sgd6b
+.section sgd7a
+.section sgd7b
+.section sgd8a
+.section sgd8b
+.section sgd9a
+.section sgd9b
+.section sgd0a
+.section sgd0b
+.section sgeaa
+.section sgeab
+.section sgeba
+.section sgebb
+.section sgeca
+.section sgecb
+.section sgeda
+.section sgedb
+.section sgeea
+.section sgeeb
+.section sgefa
+.section sgefb
+.section sgega
+.section sgegb
+.section sgeha
+.section sgehb
+.section sgeia
+.section sgeib
+.section sgeja
+.section sgejb
+.section sgeka
+.section sgekb
+.section sgela
+.section sgelb
+.section sgema
+.section sgemb
+.section sgena
+.section sgenb
+.section sgeoa
+.section sgeob
+.section sgepa
+.section sgepb
+.section sgeqa
+.section sgeqb
+.section sgera
+.section sgerb
+.section sgesa
+.section sgesb
+.section sgeta
+.section sgetb
+.section sgeua
+.section sgeub
+.section sgeva
+.section sgevb
+.section sgewa
+.section sgewb
+.section sgexa
+.section sgexb
+.section sgeya
+.section sgeyb
+.section sgeza
+.section sgezb
+.section sge1a
+.section sge1b
+.section sge2a
+.section sge2b
+.section sge3a
+.section sge3b
+.section sge4a
+.section sge4b
+.section sge5a
+.section sge5b
+.section sge6a
+.section sge6b
+.section sge7a
+.section sge7b
+.section sge8a
+.section sge8b
+.section sge9a
+.section sge9b
+.section sge0a
+.section sge0b
+.section sgfaa
+.section sgfab
+.section sgfba
+.section sgfbb
+.section sgfca
+.section sgfcb
+.section sgfda
+.section sgfdb
+.section sgfea
+.section sgfeb
+.section sgffa
+.section sgffb
+.section sgfga
+.section sgfgb
+.section sgfha
+.section sgfhb
+.section sgfia
+.section sgfib
+.section sgfja
+.section sgfjb
+.section sgfka
+.section sgfkb
+.section sgfla
+.section sgflb
+.section sgfma
+.section sgfmb
+.section sgfna
+.section sgfnb
+.section sgfoa
+.section sgfob
+.section sgfpa
+.section sgfpb
+.section sgfqa
+.section sgfqb
+.section sgfra
+.section sgfrb
+.section sgfsa
+.section sgfsb
+.section sgfta
+.section sgftb
+.section sgfua
+.section sgfub
+.section sgfva
+.section sgfvb
+.section sgfwa
+.section sgfwb
+.section sgfxa
+.section sgfxb
+.section sgfya
+.section sgfyb
+.section sgfza
+.section sgfzb
+.section sgf1a
+.section sgf1b
+.section sgf2a
+.section sgf2b
+.section sgf3a
+.section sgf3b
+.section sgf4a
+.section sgf4b
+.section sgf5a
+.section sgf5b
+.section sgf6a
+.section sgf6b
+.section sgf7a
+.section sgf7b
+.section sgf8a
+.section sgf8b
+.section sgf9a
+.section sgf9b
+.section sgf0a
+.section sgf0b
+.section sggaa
+.section sggab
+.section sggba
+.section sggbb
+.section sggca
+.section sggcb
+.section sggda
+.section sggdb
+.section sggea
+.section sggeb
+.section sggfa
+.section sggfb
+.section sggga
+.section sgggb
+.section sggha
+.section sgghb
+.section sggia
+.section sggib
+.section sggja
+.section sggjb
+.section sggka
+.section sggkb
+.section sggla
+.section sgglb
+.section sggma
+.section sggmb
+.section sggna
+.section sggnb
+.section sggoa
+.section sggob
+.section sggpa
+.section sggpb
+.section sggqa
+.section sggqb
+.section sggra
+.section sggrb
+.section sggsa
+.section sggsb
+.section sggta
+.section sggtb
+.section sggua
+.section sggub
+.section sggva
+.section sggvb
+.section sggwa
+.section sggwb
+.section sggxa
+.section sggxb
+.section sggya
+.section sggyb
+.section sggza
+.section sggzb
+.section sgg1a
+.section sgg1b
+.section sgg2a
+.section sgg2b
+.section sgg3a
+.section sgg3b
+.section sgg4a
+.section sgg4b
+.section sgg5a
+.section sgg5b
+.section sgg6a
+.section sgg6b
+.section sgg7a
+.section sgg7b
+.section sgg8a
+.section sgg8b
+.section sgg9a
+.section sgg9b
+.section sgg0a
+.section sgg0b
+.section sghaa
+.section sghab
+.section sghba
+.section sghbb
+.section sghca
+.section sghcb
+.section sghda
+.section sghdb
+.section sghea
+.section sgheb
+.section sghfa
+.section sghfb
+.section sghga
+.section sghgb
+.section sghha
+.section sghhb
+.section sghia
+.section sghib
+.section sghja
+.section sghjb
+.section sghka
+.section sghkb
+.section sghla
+.section sghlb
+.section sghma
+.section sghmb
+.section sghna
+.section sghnb
+.section sghoa
+.section sghob
+.section sghpa
+.section sghpb
+.section sghqa
+.section sghqb
+.section sghra
+.section sghrb
+.section sghsa
+.section sghsb
+.section sghta
+.section sghtb
+.section sghua
+.section sghub
+.section sghva
+.section sghvb
+.section sghwa
+.section sghwb
+.section sghxa
+.section sghxb
+.section sghya
+.section sghyb
+.section sghza
+.section sghzb
+.section sgh1a
+.section sgh1b
+.section sgh2a
+.section sgh2b
+.section sgh3a
+.section sgh3b
+.section sgh4a
+.section sgh4b
+.section sgh5a
+.section sgh5b
+.section sgh6a
+.section sgh6b
+.section sgh7a
+.section sgh7b
+.section sgh8a
+.section sgh8b
+.section sgh9a
+.section sgh9b
+.section sgh0a
+.section sgh0b
+.section sgiaa
+.section sgiab
+.section sgiba
+.section sgibb
+.section sgica
+.section sgicb
+.section sgida
+.section sgidb
+.section sgiea
+.section sgieb
+.section sgifa
+.section sgifb
+.section sgiga
+.section sgigb
+.section sgiha
+.section sgihb
+.section sgiia
+.section sgiib
+.section sgija
+.section sgijb
+.section sgika
+.section sgikb
+.section sgila
+.section sgilb
+.section sgima
+.section sgimb
+.section sgina
+.section sginb
+.section sgioa
+.section sgiob
+.section sgipa
+.section sgipb
+.section sgiqa
+.section sgiqb
+.section sgira
+.section sgirb
+.section sgisa
+.section sgisb
+.section sgita
+.section sgitb
+.section sgiua
+.section sgiub
+.section sgiva
+.section sgivb
+.section sgiwa
+.section sgiwb
+.section sgixa
+.section sgixb
+.section sgiya
+.section sgiyb
+.section sgiza
+.section sgizb
+.section sgi1a
+.section sgi1b
+.section sgi2a
+.section sgi2b
+.section sgi3a
+.section sgi3b
+.section sgi4a
+.section sgi4b
+.section sgi5a
+.section sgi5b
+.section sgi6a
+.section sgi6b
+.section sgi7a
+.section sgi7b
+.section sgi8a
+.section sgi8b
+.section sgi9a
+.section sgi9b
+.section sgi0a
+.section sgi0b
+.section sgjaa
+.section sgjab
+.section sgjba
+.section sgjbb
+.section sgjca
+.section sgjcb
+.section sgjda
+.section sgjdb
+.section sgjea
+.section sgjeb
+.section sgjfa
+.section sgjfb
+.section sgjga
+.section sgjgb
+.section sgjha
+.section sgjhb
+.section sgjia
+.section sgjib
+.section sgjja
+.section sgjjb
+.section sgjka
+.section sgjkb
+.section sgjla
+.section sgjlb
+.section sgjma
+.section sgjmb
+.section sgjna
+.section sgjnb
+.section sgjoa
+.section sgjob
+.section sgjpa
+.section sgjpb
+.section sgjqa
+.section sgjqb
+.section sgjra
+.section sgjrb
+.section sgjsa
+.section sgjsb
+.section sgjta
+.section sgjtb
+.section sgjua
+.section sgjub
+.section sgjva
+.section sgjvb
+.section sgjwa
+.section sgjwb
+.section sgjxa
+.section sgjxb
+.section sgjya
+.section sgjyb
+.section sgjza
+.section sgjzb
+.section sgj1a
+.section sgj1b
+.section sgj2a
+.section sgj2b
+.section sgj3a
+.section sgj3b
+.section sgj4a
+.section sgj4b
+.section sgj5a
+.section sgj5b
+.section sgj6a
+.section sgj6b
+.section sgj7a
+.section sgj7b
+.section sgj8a
+.section sgj8b
+.section sgj9a
+.section sgj9b
+.section sgj0a
+.section sgj0b
+.section sgkaa
+.section sgkab
+.section sgkba
+.section sgkbb
+.section sgkca
+.section sgkcb
+.section sgkda
+.section sgkdb
+.section sgkea
+.section sgkeb
+.section sgkfa
+.section sgkfb
+.section sgkga
+.section sgkgb
+.section sgkha
+.section sgkhb
+.section sgkia
+.section sgkib
+.section sgkja
+.section sgkjb
+.section sgkka
+.section sgkkb
+.section sgkla
+.section sgklb
+.section sgkma
+.section sgkmb
+.section sgkna
+.section sgknb
+.section sgkoa
+.section sgkob
+.section sgkpa
+.section sgkpb
+.section sgkqa
+.section sgkqb
+.section sgkra
+.section sgkrb
+.section sgksa
+.section sgksb
+.section sgkta
+.section sgktb
+.section sgkua
+.section sgkub
+.section sgkva
+.section sgkvb
+.section sgkwa
+.section sgkwb
+.section sgkxa
+.section sgkxb
+.section sgkya
+.section sgkyb
+.section sgkza
+.section sgkzb
+.section sgk1a
+.section sgk1b
+.section sgk2a
+.section sgk2b
+.section sgk3a
+.section sgk3b
+.section sgk4a
+.section sgk4b
+.section sgk5a
+.section sgk5b
+.section sgk6a
+.section sgk6b
+.section sgk7a
+.section sgk7b
+.section sgk8a
+.section sgk8b
+.section sgk9a
+.section sgk9b
+.section sgk0a
+.section sgk0b
+.section sglaa
+.section sglab
+.section sglba
+.section sglbb
+.section sglca
+.section sglcb
+.section sglda
+.section sgldb
+.section sglea
+.section sgleb
+.section sglfa
+.section sglfb
+.section sglga
+.section sglgb
+.section sglha
+.section sglhb
+.section sglia
+.section sglib
+.section sglja
+.section sgljb
+.section sglka
+.section sglkb
+.section sglla
+.section sgllb
+.section sglma
+.section sglmb
+.section sglna
+.section sglnb
+.section sgloa
+.section sglob
+.section sglpa
+.section sglpb
+.section sglqa
+.section sglqb
+.section sglra
+.section sglrb
+.section sglsa
+.section sglsb
+.section sglta
+.section sgltb
+.section sglua
+.section sglub
+.section sglva
+.section sglvb
+.section sglwa
+.section sglwb
+.section sglxa
+.section sglxb
+.section sglya
+.section sglyb
+.section sglza
+.section sglzb
+.section sgl1a
+.section sgl1b
+.section sgl2a
+.section sgl2b
+.section sgl3a
+.section sgl3b
+.section sgl4a
+.section sgl4b
+.section sgl5a
+.section sgl5b
+.section sgl6a
+.section sgl6b
+.section sgl7a
+.section sgl7b
+.section sgl8a
+.section sgl8b
+.section sgl9a
+.section sgl9b
+.section sgl0a
+.section sgl0b
+.section sgmaa
+.section sgmab
+.section sgmba
+.section sgmbb
+.section sgmca
+.section sgmcb
+.section sgmda
+.section sgmdb
+.section sgmea
+.section sgmeb
+.section sgmfa
+.section sgmfb
+.section sgmga
+.section sgmgb
+.section sgmha
+.section sgmhb
+.section sgmia
+.section sgmib
+.section sgmja
+.section sgmjb
+.section sgmka
+.section sgmkb
+.section sgmla
+.section sgmlb
+.section sgmma
+.section sgmmb
+.section sgmna
+.section sgmnb
+.section sgmoa
+.section sgmob
+.section sgmpa
+.section sgmpb
+.section sgmqa
+.section sgmqb
+.section sgmra
+.section sgmrb
+.section sgmsa
+.section sgmsb
+.section sgmta
+.section sgmtb
+.section sgmua
+.section sgmub
+.section sgmva
+.section sgmvb
+.section sgmwa
+.section sgmwb
+.section sgmxa
+.section sgmxb
+.section sgmya
+.section sgmyb
+.section sgmza
+.section sgmzb
+.section sgm1a
+.section sgm1b
+.section sgm2a
+.section sgm2b
+.section sgm3a
+.section sgm3b
+.section sgm4a
+.section sgm4b
+.section sgm5a
+.section sgm5b
+.section sgm6a
+.section sgm6b
+.section sgm7a
+.section sgm7b
+.section sgm8a
+.section sgm8b
+.section sgm9a
+.section sgm9b
+.section sgm0a
+.section sgm0b
+.section sgnaa
+.section sgnab
+.section sgnba
+.section sgnbb
+.section sgnca
+.section sgncb
+.section sgnda
+.section sgndb
+.section sgnea
+.section sgneb
+.section sgnfa
+.section sgnfb
+.section sgnga
+.section sgngb
+.section sgnha
+.section sgnhb
+.section sgnia
+.section sgnib
+.section sgnja
+.section sgnjb
+.section sgnka
+.section sgnkb
+.section sgnla
+.section sgnlb
+.section sgnma
+.section sgnmb
+.section sgnna
+.section sgnnb
+.section sgnoa
+.section sgnob
+.section sgnpa
+.section sgnpb
+.section sgnqa
+.section sgnqb
+.section sgnra
+.section sgnrb
+.section sgnsa
+.section sgnsb
+.section sgnta
+.section sgntb
+.section sgnua
+.section sgnub
+.section sgnva
+.section sgnvb
+.section sgnwa
+.section sgnwb
+.section sgnxa
+.section sgnxb
+.section sgnya
+.section sgnyb
+.section sgnza
+.section sgnzb
+.section sgn1a
+.section sgn1b
+.section sgn2a
+.section sgn2b
+.section sgn3a
+.section sgn3b
+.section sgn4a
+.section sgn4b
+.section sgn5a
+.section sgn5b
+.section sgn6a
+.section sgn6b
+.section sgn7a
+.section sgn7b
+.section sgn8a
+.section sgn8b
+.section sgn9a
+.section sgn9b
+.section sgn0a
+.section sgn0b
+.section sgoaa
+.section sgoab
+.section sgoba
+.section sgobb
+.section sgoca
+.section sgocb
+.section sgoda
+.section sgodb
+.section sgoea
+.section sgoeb
+.section sgofa
+.section sgofb
+.section sgoga
+.section sgogb
+.section sgoha
+.section sgohb
+.section sgoia
+.section sgoib
+.section sgoja
+.section sgojb
+.section sgoka
+.section sgokb
+.section sgola
+.section sgolb
+.section sgoma
+.section sgomb
+.section sgona
+.section sgonb
+.section sgooa
+.section sgoob
+.section sgopa
+.section sgopb
+.section sgoqa
+.section sgoqb
+.section sgora
+.section sgorb
+.section sgosa
+.section sgosb
+.section sgota
+.section sgotb
+.section sgoua
+.section sgoub
+.section sgova
+.section sgovb
+.section sgowa
+.section sgowb
+.section sgoxa
+.section sgoxb
+.section sgoya
+.section sgoyb
+.section sgoza
+.section sgozb
+.section sgo1a
+.section sgo1b
+.section sgo2a
+.section sgo2b
+.section sgo3a
+.section sgo3b
+.section sgo4a
+.section sgo4b
+.section sgo5a
+.section sgo5b
+.section sgo6a
+.section sgo6b
+.section sgo7a
+.section sgo7b
+.section sgo8a
+.section sgo8b
+.section sgo9a
+.section sgo9b
+.section sgo0a
+.section sgo0b
+.section sgpaa
+.section sgpab
+.section sgpba
+.section sgpbb
+.section sgpca
+.section sgpcb
+.section sgpda
+.section sgpdb
+.section sgpea
+.section sgpeb
+.section sgpfa
+.section sgpfb
+.section sgpga
+.section sgpgb
+.section sgpha
+.section sgphb
+.section sgpia
+.section sgpib
+.section sgpja
+.section sgpjb
+.section sgpka
+.section sgpkb
+.section sgpla
+.section sgplb
+.section sgpma
+.section sgpmb
+.section sgpna
+.section sgpnb
+.section sgpoa
+.section sgpob
+.section sgppa
+.section sgppb
+.section sgpqa
+.section sgpqb
+.section sgpra
+.section sgprb
+.section sgpsa
+.section sgpsb
+.section sgpta
+.section sgptb
+.section sgpua
+.section sgpub
+.section sgpva
+.section sgpvb
+.section sgpwa
+.section sgpwb
+.section sgpxa
+.section sgpxb
+.section sgpya
+.section sgpyb
+.section sgpza
+.section sgpzb
+.section sgp1a
+.section sgp1b
+.section sgp2a
+.section sgp2b
+.section sgp3a
+.section sgp3b
+.section sgp4a
+.section sgp4b
+.section sgp5a
+.section sgp5b
+.section sgp6a
+.section sgp6b
+.section sgp7a
+.section sgp7b
+.section sgp8a
+.section sgp8b
+.section sgp9a
+.section sgp9b
+.section sgp0a
+.section sgp0b
+.section sgqaa
+.section sgqab
+.section sgqba
+.section sgqbb
+.section sgqca
+.section sgqcb
+.section sgqda
+.section sgqdb
+.section sgqea
+.section sgqeb
+.section sgqfa
+.section sgqfb
+.section sgqga
+.section sgqgb
+.section sgqha
+.section sgqhb
+.section sgqia
+.section sgqib
+.section sgqja
+.section sgqjb
+.section sgqka
+.section sgqkb
+.section sgqla
+.section sgqlb
+.section sgqma
+.section sgqmb
+.section sgqna
+.section sgqnb
+.section sgqoa
+.section sgqob
+.section sgqpa
+.section sgqpb
+.section sgqqa
+.section sgqqb
+.section sgqra
+.section sgqrb
+.section sgqsa
+.section sgqsb
+.section sgqta
+.section sgqtb
+.section sgqua
+.section sgqub
+.section sgqva
+.section sgqvb
+.section sgqwa
+.section sgqwb
+.section sgqxa
+.section sgqxb
+.section sgqya
+.section sgqyb
+.section sgqza
+.section sgqzb
+.section sgq1a
+.section sgq1b
+.section sgq2a
+.section sgq2b
+.section sgq3a
+.section sgq3b
+.section sgq4a
+.section sgq4b
+.section sgq5a
+.section sgq5b
+.section sgq6a
+.section sgq6b
+.section sgq7a
+.section sgq7b
+.section sgq8a
+.section sgq8b
+.section sgq9a
+.section sgq9b
+.section sgq0a
+.section sgq0b
+.section sgraa
+.section sgrab
+.section sgrba
+.section sgrbb
+.section sgrca
+.section sgrcb
+.section sgrda
+.section sgrdb
+.section sgrea
+.section sgreb
+.section sgrfa
+.section sgrfb
+.section sgrga
+.section sgrgb
+.section sgrha
+.section sgrhb
+.section sgria
+.section sgrib
+.section sgrja
+.section sgrjb
+.section sgrka
+.section sgrkb
+.section sgrla
+.section sgrlb
+.section sgrma
+.section sgrmb
+.section sgrna
+.section sgrnb
+.section sgroa
+.section sgrob
+.section sgrpa
+.section sgrpb
+.section sgrqa
+.section sgrqb
+.section sgrra
+.section sgrrb
+.section sgrsa
+.section sgrsb
+.section sgrta
+.section sgrtb
+.section sgrua
+.section sgrub
+.section sgrva
+.section sgrvb
+.section sgrwa
+.section sgrwb
+.section sgrxa
+.section sgrxb
+.section sgrya
+.section sgryb
+.section sgrza
+.section sgrzb
+.section sgr1a
+.section sgr1b
+.section sgr2a
+.section sgr2b
+.section sgr3a
+.section sgr3b
+.section sgr4a
+.section sgr4b
+.section sgr5a
+.section sgr5b
+.section sgr6a
+.section sgr6b
+.section sgr7a
+.section sgr7b
+.section sgr8a
+.section sgr8b
+.section sgr9a
+.section sgr9b
+.section sgr0a
+.section sgr0b
+.section sgsaa
+.section sgsab
+.section sgsba
+.section sgsbb
+.section sgsca
+.section sgscb
+.section sgsda
+.section sgsdb
+.section sgsea
+.section sgseb
+.section sgsfa
+.section sgsfb
+.section sgsga
+.section sgsgb
+.section sgsha
+.section sgshb
+.section sgsia
+.section sgsib
+.section sgsja
+.section sgsjb
+.section sgska
+.section sgskb
+.section sgsla
+.section sgslb
+.section sgsma
+.section sgsmb
+.section sgsna
+.section sgsnb
+.section sgsoa
+.section sgsob
+.section sgspa
+.section sgspb
+.section sgsqa
+.section sgsqb
+.section sgsra
+.section sgsrb
+.section sgssa
+.section sgssb
+.section sgsta
+.section sgstb
+.section sgsua
+.section sgsub
+.section sgsva
+.section sgsvb
+.section sgswa
+.section sgswb
+.section sgsxa
+.section sgsxb
+.section sgsya
+.section sgsyb
+.section sgsza
+.section sgszb
+.section sgs1a
+.section sgs1b
+.section sgs2a
+.section sgs2b
+.section sgs3a
+.section sgs3b
+.section sgs4a
+.section sgs4b
+.section sgs5a
+.section sgs5b
+.section sgs6a
+.section sgs6b
+.section sgs7a
+.section sgs7b
+.section sgs8a
+.section sgs8b
+.section sgs9a
+.section sgs9b
+.section sgs0a
+.section sgs0b
+.section sgtaa
+.section sgtab
+.section sgtba
+.section sgtbb
+.section sgtca
+.section sgtcb
+.section sgtda
+.section sgtdb
+.section sgtea
+.section sgteb
+.section sgtfa
+.section sgtfb
+.section sgtga
+.section sgtgb
+.section sgtha
+.section sgthb
+.section sgtia
+.section sgtib
+.section sgtja
+.section sgtjb
+.section sgtka
+.section sgtkb
+.section sgtla
+.section sgtlb
+.section sgtma
+.section sgtmb
+.section sgtna
+.section sgtnb
+.section sgtoa
+.section sgtob
+.section sgtpa
+.section sgtpb
+.section sgtqa
+.section sgtqb
+.section sgtra
+.section sgtrb
+.section sgtsa
+.section sgtsb
+.section sgtta
+.section sgttb
+.section sgtua
+.section sgtub
+.section sgtva
+.section sgtvb
+.section sgtwa
+.section sgtwb
+.section sgtxa
+.section sgtxb
+.section sgtya
+.section sgtyb
+.section sgtza
+.section sgtzb
+.section sgt1a
+.section sgt1b
+.section sgt2a
+.section sgt2b
+.section sgt3a
+.section sgt3b
+.section sgt4a
+.section sgt4b
+.section sgt5a
+.section sgt5b
+.section sgt6a
+.section sgt6b
+.section sgt7a
+.section sgt7b
+.section sgt8a
+.section sgt8b
+.section sgt9a
+.section sgt9b
+.section sgt0a
+.section sgt0b
+.section sguaa
+.section sguab
+.section sguba
+.section sgubb
+.section sguca
+.section sgucb
+.section sguda
+.section sgudb
+.section sguea
+.section sgueb
+.section sgufa
+.section sgufb
+.section sguga
+.section sgugb
+.section sguha
+.section sguhb
+.section sguia
+.section sguib
+.section sguja
+.section sgujb
+.section sguka
+.section sgukb
+.section sgula
+.section sgulb
+.section sguma
+.section sgumb
+.section sguna
+.section sgunb
+.section sguoa
+.section sguob
+.section sgupa
+.section sgupb
+.section sguqa
+.section sguqb
+.section sgura
+.section sgurb
+.section sgusa
+.section sgusb
+.section sguta
+.section sgutb
+.section sguua
+.section sguub
+.section sguva
+.section sguvb
+.section sguwa
+.section sguwb
+.section sguxa
+.section sguxb
+.section sguya
+.section sguyb
+.section sguza
+.section sguzb
+.section sgu1a
+.section sgu1b
+.section sgu2a
+.section sgu2b
+.section sgu3a
+.section sgu3b
+.section sgu4a
+.section sgu4b
+.section sgu5a
+.section sgu5b
+.section sgu6a
+.section sgu6b
+.section sgu7a
+.section sgu7b
+.section sgu8a
+.section sgu8b
+.section sgu9a
+.section sgu9b
+.section sgu0a
+.section sgu0b
+.section sgvaa
+.section sgvab
+.section sgvba
+.section sgvbb
+.section sgvca
+.section sgvcb
+.section sgvda
+.section sgvdb
+.section sgvea
+.section sgveb
+.section sgvfa
+.section sgvfb
+.section sgvga
+.section sgvgb
+.section sgvha
+.section sgvhb
+.section sgvia
+.section sgvib
+.section sgvja
+.section sgvjb
+.section sgvka
+.section sgvkb
+.section sgvla
+.section sgvlb
+.section sgvma
+.section sgvmb
+.section sgvna
+.section sgvnb
+.section sgvoa
+.section sgvob
+.section sgvpa
+.section sgvpb
+.section sgvqa
+.section sgvqb
+.section sgvra
+.section sgvrb
+.section sgvsa
+.section sgvsb
+.section sgvta
+.section sgvtb
+.section sgvua
+.section sgvub
+.section sgvva
+.section sgvvb
+.section sgvwa
+.section sgvwb
+.section sgvxa
+.section sgvxb
+.section sgvya
+.section sgvyb
+.section sgvza
+.section sgvzb
+.section sgv1a
+.section sgv1b
+.section sgv2a
+.section sgv2b
+.section sgv3a
+.section sgv3b
+.section sgv4a
+.section sgv4b
+.section sgv5a
+.section sgv5b
+.section sgv6a
+.section sgv6b
+.section sgv7a
+.section sgv7b
+.section sgv8a
+.section sgv8b
+.section sgv9a
+.section sgv9b
+.section sgv0a
+.section sgv0b
+.section sgwaa
+.section sgwab
+.section sgwba
+.section sgwbb
+.section sgwca
+.section sgwcb
+.section sgwda
+.section sgwdb
+.section sgwea
+.section sgweb
+.section sgwfa
+.section sgwfb
+.section sgwga
+.section sgwgb
+.section sgwha
+.section sgwhb
+.section sgwia
+.section sgwib
+.section sgwja
+.section sgwjb
+.section sgwka
+.section sgwkb
+.section sgwla
+.section sgwlb
+.section sgwma
+.section sgwmb
+.section sgwna
+.section sgwnb
+.section sgwoa
+.section sgwob
+.section sgwpa
+.section sgwpb
+.section sgwqa
+.section sgwqb
+.section sgwra
+.section sgwrb
+.section sgwsa
+.section sgwsb
+.section sgwta
+.section sgwtb
+.section sgwua
+.section sgwub
+.section sgwva
+.section sgwvb
+.section sgwwa
+.section sgwwb
+.section sgwxa
+.section sgwxb
+.section sgwya
+.section sgwyb
+.section sgwza
+.section sgwzb
+.section sgw1a
+.section sgw1b
+.section sgw2a
+.section sgw2b
+.section sgw3a
+.section sgw3b
+.section sgw4a
+.section sgw4b
+.section sgw5a
+.section sgw5b
+.section sgw6a
+.section sgw6b
+.section sgw7a
+.section sgw7b
+.section sgw8a
+.section sgw8b
+.section sgw9a
+.section sgw9b
+.section sgw0a
+.section sgw0b
+.section sgxaa
+.section sgxab
+.section sgxba
+.section sgxbb
+.section sgxca
+.section sgxcb
+.section sgxda
+.section sgxdb
+.section sgxea
+.section sgxeb
+.section sgxfa
+.section sgxfb
+.section sgxga
+.section sgxgb
+.section sgxha
+.section sgxhb
+.section sgxia
+.section sgxib
+.section sgxja
+.section sgxjb
+.section sgxka
+.section sgxkb
+.section sgxla
+.section sgxlb
+.section sgxma
+.section sgxmb
+.section sgxna
+.section sgxnb
+.section sgxoa
+.section sgxob
+.section sgxpa
+.section sgxpb
+.section sgxqa
+.section sgxqb
+.section sgxra
+.section sgxrb
+.section sgxsa
+.section sgxsb
+.section sgxta
+.section sgxtb
+.section sgxua
+.section sgxub
+.section sgxva
+.section sgxvb
+.section sgxwa
+.section sgxwb
+.section sgxxa
+.section sgxxb
+.section sgxya
+.section sgxyb
+.section sgxza
+.section sgxzb
+.section sgx1a
+.section sgx1b
+.section sgx2a
+.section sgx2b
+.section sgx3a
+.section sgx3b
+.section sgx4a
+.section sgx4b
+.section sgx5a
+.section sgx5b
+.section sgx6a
+.section sgx6b
+.section sgx7a
+.section sgx7b
+.section sgx8a
+.section sgx8b
+.section sgx9a
+.section sgx9b
+.section sgx0a
+.section sgx0b
+.section sgyaa
+.section sgyab
+.section sgyba
+.section sgybb
+.section sgyca
+.section sgycb
+.section sgyda
+.section sgydb
+.section sgyea
+.section sgyeb
+.section sgyfa
+.section sgyfb
+.section sgyga
+.section sgygb
+.section sgyha
+.section sgyhb
+.section sgyia
+.section sgyib
+.section sgyja
+.section sgyjb
+.section sgyka
+.section sgykb
+.section sgyla
+.section sgylb
+.section sgyma
+.section sgymb
+.section sgyna
+.section sgynb
+.section sgyoa
+.section sgyob
+.section sgypa
+.section sgypb
+.section sgyqa
+.section sgyqb
+.section sgyra
+.section sgyrb
+.section sgysa
+.section sgysb
+.section sgyta
+.section sgytb
+.section sgyua
+.section sgyub
+.section sgyva
+.section sgyvb
+.section sgywa
+.section sgywb
+.section sgyxa
+.section sgyxb
+.section sgyya
+.section sgyyb
+.section sgyza
+.section sgyzb
+.section sgy1a
+.section sgy1b
+.section sgy2a
+.section sgy2b
+.section sgy3a
+.section sgy3b
+.section sgy4a
+.section sgy4b
+.section sgy5a
+.section sgy5b
+.section sgy6a
+.section sgy6b
+.section sgy7a
+.section sgy7b
+.section sgy8a
+.section sgy8b
+.section sgy9a
+.section sgy9b
+.section sgy0a
+.section sgy0b
+.section sgzaa
+.section sgzab
+.section sgzba
+.section sgzbb
+.section sgzca
+.section sgzcb
+.section sgzda
+.section sgzdb
+.section sgzea
+.section sgzeb
+.section sgzfa
+.section sgzfb
+.section sgzga
+.section sgzgb
+.section sgzha
+.section sgzhb
+.section sgzia
+.section sgzib
+.section sgzja
+.section sgzjb
+.section sgzka
+.section sgzkb
+.section sgzla
+.section sgzlb
+.section sgzma
+.section sgzmb
+.section sgzna
+.section sgznb
+.section sgzoa
+.section sgzob
+.section sgzpa
+.section sgzpb
+.section sgzqa
+.section sgzqb
+.section sgzra
+.section sgzrb
+.section sgzsa
+.section sgzsb
+.section sgzta
+.section sgztb
+.section sgzua
+.section sgzub
+.section sgzva
+.section sgzvb
+.section sgzwa
+.section sgzwb
+.section sgzxa
+.section sgzxb
+.section sgzya
+.section sgzyb
+.section sgzza
+.section sgzzb
+.section sgz1a
+.section sgz1b
+.section sgz2a
+.section sgz2b
+.section sgz3a
+.section sgz3b
+.section sgz4a
+.section sgz4b
+.section sgz5a
+.section sgz5b
+.section sgz6a
+.section sgz6b
+.section sgz7a
+.section sgz7b
+.section sgz8a
+.section sgz8b
+.section sgz9a
+.section sgz9b
+.section sgz0a
+.section sgz0b
+.section sg1aa
+.section sg1ab
+.section sg1ba
+.section sg1bb
+.section sg1ca
+.section sg1cb
+.section sg1da
+.section sg1db
+.section sg1ea
+.section sg1eb
+.section sg1fa
+.section sg1fb
+.section sg1ga
+.section sg1gb
+.section sg1ha
+.section sg1hb
+.section sg1ia
+.section sg1ib
+.section sg1ja
+.section sg1jb
+.section sg1ka
+.section sg1kb
+.section sg1la
+.section sg1lb
+.section sg1ma
+.section sg1mb
+.section sg1na
+.section sg1nb
+.section sg1oa
+.section sg1ob
+.section sg1pa
+.section sg1pb
+.section sg1qa
+.section sg1qb
+.section sg1ra
+.section sg1rb
+.section sg1sa
+.section sg1sb
+.section sg1ta
+.section sg1tb
+.section sg1ua
+.section sg1ub
+.section sg1va
+.section sg1vb
+.section sg1wa
+.section sg1wb
+.section sg1xa
+.section sg1xb
+.section sg1ya
+.section sg1yb
+.section sg1za
+.section sg1zb
+.section sg11a
+.section sg11b
+.section sg12a
+.section sg12b
+.section sg13a
+.section sg13b
+.section sg14a
+.section sg14b
+.section sg15a
+.section sg15b
+.section sg16a
+.section sg16b
+.section sg17a
+.section sg17b
+.section sg18a
+.section sg18b
+.section sg19a
+.section sg19b
+.section sg10a
+.section sg10b
+.section sg2aa
+.section sg2ab
+.section sg2ba
+.section sg2bb
+.section sg2ca
+.section sg2cb
+.section sg2da
+.section sg2db
+.section sg2ea
+.section sg2eb
+.section sg2fa
+.section sg2fb
+.section sg2ga
+.section sg2gb
+.section sg2ha
+.section sg2hb
+.section sg2ia
+.section sg2ib
+.section sg2ja
+.section sg2jb
+.section sg2ka
+.section sg2kb
+.section sg2la
+.section sg2lb
+.section sg2ma
+.section sg2mb
+.section sg2na
+.section sg2nb
+.section sg2oa
+.section sg2ob
+.section sg2pa
+.section sg2pb
+.section sg2qa
+.section sg2qb
+.section sg2ra
+.section sg2rb
+.section sg2sa
+.section sg2sb
+.section sg2ta
+.section sg2tb
+.section sg2ua
+.section sg2ub
+.section sg2va
+.section sg2vb
+.section sg2wa
+.section sg2wb
+.section sg2xa
+.section sg2xb
+.section sg2ya
+.section sg2yb
+.section sg2za
+.section sg2zb
+.section sg21a
+.section sg21b
+.section sg22a
+.section sg22b
+.section sg23a
+.section sg23b
+.section sg24a
+.section sg24b
+.section sg25a
+.section sg25b
+.section sg26a
+.section sg26b
+.section sg27a
+.section sg27b
+.section sg28a
+.section sg28b
+.section sg29a
+.section sg29b
+.section sg20a
+.section sg20b
+.section sg3aa
+.section sg3ab
+.section sg3ba
+.section sg3bb
+.section sg3ca
+.section sg3cb
+.section sg3da
+.section sg3db
+.section sg3ea
+.section sg3eb
+.section sg3fa
+.section sg3fb
+.section sg3ga
+.section sg3gb
+.section sg3ha
+.section sg3hb
+.section sg3ia
+.section sg3ib
+.section sg3ja
+.section sg3jb
+.section sg3ka
+.section sg3kb
+.section sg3la
+.section sg3lb
+.section sg3ma
+.section sg3mb
+.section sg3na
+.section sg3nb
+.section sg3oa
+.section sg3ob
+.section sg3pa
+.section sg3pb
+.section sg3qa
+.section sg3qb
+.section sg3ra
+.section sg3rb
+.section sg3sa
+.section sg3sb
+.section sg3ta
+.section sg3tb
+.section sg3ua
+.section sg3ub
+.section sg3va
+.section sg3vb
+.section sg3wa
+.section sg3wb
+.section sg3xa
+.section sg3xb
+.section sg3ya
+.section sg3yb
+.section sg3za
+.section sg3zb
+.section sg31a
+.section sg31b
+.section sg32a
+.section sg32b
+.section sg33a
+.section sg33b
+.section sg34a
+.section sg34b
+.section sg35a
+.section sg35b
+.section sg36a
+.section sg36b
+.section sg37a
+.section sg37b
+.section sg38a
+.section sg38b
+.section sg39a
+.section sg39b
+.section sg30a
+.section sg30b
+.section sg4aa
+.section sg4ab
+.section sg4ba
+.section sg4bb
+.section sg4ca
+.section sg4cb
+.section sg4da
+.section sg4db
+.section sg4ea
+.section sg4eb
+.section sg4fa
+.section sg4fb
+.section sg4ga
+.section sg4gb
+.section sg4ha
+.section sg4hb
+.section sg4ia
+.section sg4ib
+.section sg4ja
+.section sg4jb
+.section sg4ka
+.section sg4kb
+.section sg4la
+.section sg4lb
+.section sg4ma
+.section sg4mb
+.section sg4na
+.section sg4nb
+.section sg4oa
+.section sg4ob
+.section sg4pa
+.section sg4pb
+.section sg4qa
+.section sg4qb
+.section sg4ra
+.section sg4rb
+.section sg4sa
+.section sg4sb
+.section sg4ta
+.section sg4tb
+.section sg4ua
+.section sg4ub
+.section sg4va
+.section sg4vb
+.section sg4wa
+.section sg4wb
+.section sg4xa
+.section sg4xb
+.section sg4ya
+.section sg4yb
+.section sg4za
+.section sg4zb
+.section sg41a
+.section sg41b
+.section sg42a
+.section sg42b
+.section sg43a
+.section sg43b
+.section sg44a
+.section sg44b
+.section sg45a
+.section sg45b
+.section sg46a
+.section sg46b
+.section sg47a
+.section sg47b
+.section sg48a
+.section sg48b
+.section sg49a
+.section sg49b
+.section sg40a
+.section sg40b
+.section sg5aa
+.section sg5ab
+.section sg5ba
+.section sg5bb
+.section sg5ca
+.section sg5cb
+.section sg5da
+.section sg5db
+.section sg5ea
+.section sg5eb
+.section sg5fa
+.section sg5fb
+.section sg5ga
+.section sg5gb
+.section sg5ha
+.section sg5hb
+.section sg5ia
+.section sg5ib
+.section sg5ja
+.section sg5jb
+.section sg5ka
+.section sg5kb
+.section sg5la
+.section sg5lb
+.section sg5ma
+.section sg5mb
+.section sg5na
+.section sg5nb
+.section sg5oa
+.section sg5ob
+.section sg5pa
+.section sg5pb
+.section sg5qa
+.section sg5qb
+.section sg5ra
+.section sg5rb
+.section sg5sa
+.section sg5sb
+.section sg5ta
+.section sg5tb
+.section sg5ua
+.section sg5ub
+.section sg5va
+.section sg5vb
+.section sg5wa
+.section sg5wb
+.section sg5xa
+.section sg5xb
+.section sg5ya
+.section sg5yb
+.section sg5za
+.section sg5zb
+.section sg51a
+.section sg51b
+.section sg52a
+.section sg52b
+.section sg53a
+.section sg53b
+.section sg54a
+.section sg54b
+.section sg55a
+.section sg55b
+.section sg56a
+.section sg56b
+.section sg57a
+.section sg57b
+.section sg58a
+.section sg58b
+.section sg59a
+.section sg59b
+.section sg50a
+.section sg50b
+.section sg6aa
+.section sg6ab
+.section sg6ba
+.section sg6bb
+.section sg6ca
+.section sg6cb
+.section sg6da
+.section sg6db
+.section sg6ea
+.section sg6eb
+.section sg6fa
+.section sg6fb
+.section sg6ga
+.section sg6gb
+.section sg6ha
+.section sg6hb
+.section sg6ia
+.section sg6ib
+.section sg6ja
+.section sg6jb
+.section sg6ka
+.section sg6kb
+.section sg6la
+.section sg6lb
+.section sg6ma
+.section sg6mb
+.section sg6na
+.section sg6nb
+.section sg6oa
+.section sg6ob
+.section sg6pa
+.section sg6pb
+.section sg6qa
+.section sg6qb
+.section sg6ra
+.section sg6rb
+.section sg6sa
+.section sg6sb
+.section sg6ta
+.section sg6tb
+.section sg6ua
+.section sg6ub
+.section sg6va
+.section sg6vb
+.section sg6wa
+.section sg6wb
+.section sg6xa
+.section sg6xb
+.section sg6ya
+.section sg6yb
+.section sg6za
+.section sg6zb
+.section sg61a
+.section sg61b
+.section sg62a
+.section sg62b
+.section sg63a
+.section sg63b
+.section sg64a
+.section sg64b
+.section sg65a
+.section sg65b
+.section sg66a
+.section sg66b
+.section sg67a
+.section sg67b
+.section sg68a
+.section sg68b
+.section sg69a
+.section sg69b
+.section sg60a
+.section sg60b
+.section sg7aa
+.section sg7ab
+.section sg7ba
+.section sg7bb
+.section sg7ca
+.section sg7cb
+.section sg7da
+.section sg7db
+.section sg7ea
+.section sg7eb
+.section sg7fa
+.section sg7fb
+.section sg7ga
+.section sg7gb
+.section sg7ha
+.section sg7hb
+.section sg7ia
+.section sg7ib
+.section sg7ja
+.section sg7jb
+.section sg7ka
+.section sg7kb
+.section sg7la
+.section sg7lb
+.section sg7ma
+.section sg7mb
+.section sg7na
+.section sg7nb
+.section sg7oa
+.section sg7ob
+.section sg7pa
+.section sg7pb
+.section sg7qa
+.section sg7qb
+.section sg7ra
+.section sg7rb
+.section sg7sa
+.section sg7sb
+.section sg7ta
+.section sg7tb
+.section sg7ua
+.section sg7ub
+.section sg7va
+.section sg7vb
+.section sg7wa
+.section sg7wb
+.section sg7xa
+.section sg7xb
+.section sg7ya
+.section sg7yb
+.section sg7za
+.section sg7zb
+.section sg71a
+.section sg71b
+.section sg72a
+.section sg72b
+.section sg73a
+.section sg73b
+.section sg74a
+.section sg74b
+.section sg75a
+.section sg75b
+.section sg76a
+.section sg76b
+.section sg77a
+.section sg77b
+.section sg78a
+.section sg78b
+.section sg79a
+.section sg79b
+.section sg70a
+.section sg70b
+.section sg8aa
+.section sg8ab
+.section sg8ba
+.section sg8bb
+.section sg8ca
+.section sg8cb
+.section sg8da
+.section sg8db
+.section sg8ea
+.section sg8eb
+.section sg8fa
+.section sg8fb
+.section sg8ga
+.section sg8gb
+.section sg8ha
+.section sg8hb
+.section sg8ia
+.section sg8ib
+.section sg8ja
+.section sg8jb
+.section sg8ka
+.section sg8kb
+.section sg8la
+.section sg8lb
+.section sg8ma
+.section sg8mb
+.section sg8na
+.section sg8nb
+.section sg8oa
+.section sg8ob
+.section sg8pa
+.section sg8pb
+.section sg8qa
+.section sg8qb
+.section sg8ra
+.section sg8rb
+.section sg8sa
+.section sg8sb
+.section sg8ta
+.section sg8tb
+.section sg8ua
+.section sg8ub
+.section sg8va
+.section sg8vb
+.section sg8wa
+.section sg8wb
+.section sg8xa
+.section sg8xb
+.section sg8ya
+.section sg8yb
+.section sg8za
+.section sg8zb
+.section sg81a
+.section sg81b
+.section sg82a
+.section sg82b
+.section sg83a
+.section sg83b
+.section sg84a
+.section sg84b
+.section sg85a
+.section sg85b
+.section sg86a
+.section sg86b
+.section sg87a
+.section sg87b
+.section sg88a
+.section sg88b
+.section sg89a
+.section sg89b
+.section sg80a
+.section sg80b
+.section sg9aa
+.section sg9ab
+.section sg9ba
+.section sg9bb
+.section sg9ca
+.section sg9cb
+.section sg9da
+.section sg9db
+.section sg9ea
+.section sg9eb
+.section sg9fa
+.section sg9fb
+.section sg9ga
+.section sg9gb
+.section sg9ha
+.section sg9hb
+.section sg9ia
+.section sg9ib
+.section sg9ja
+.section sg9jb
+.section sg9ka
+.section sg9kb
+.section sg9la
+.section sg9lb
+.section sg9ma
+.section sg9mb
+.section sg9na
+.section sg9nb
+.section sg9oa
+.section sg9ob
+.section sg9pa
+.section sg9pb
+.section sg9qa
+.section sg9qb
+.section sg9ra
+.section sg9rb
+.section sg9sa
+.section sg9sb
+.section sg9ta
+.section sg9tb
+.section sg9ua
+.section sg9ub
+.section sg9va
+.section sg9vb
+.section sg9wa
+.section sg9wb
+.section sg9xa
+.section sg9xb
+.section sg9ya
+.section sg9yb
+.section sg9za
+.section sg9zb
+.section sg91a
+.section sg91b
+.section sg92a
+.section sg92b
+.section sg93a
+.section sg93b
+.section sg94a
+.section sg94b
+.section sg95a
+.section sg95b
+.section sg96a
+.section sg96b
+.section sg97a
+.section sg97b
+.section sg98a
+.section sg98b
+.section sg99a
+.section sg99b
+.section sg90a
+.section sg90b
+.section sg0aa
+.section sg0ab
+.section sg0ba
+.section sg0bb
+.section sg0ca
+.section sg0cb
+.section sg0da
+.section sg0db
+.section sg0ea
+.section sg0eb
+.section sg0fa
+.section sg0fb
+.section sg0ga
+.section sg0gb
+.section sg0ha
+.section sg0hb
+.section sg0ia
+.section sg0ib
+.section sg0ja
+.section sg0jb
+.section sg0ka
+.section sg0kb
+.section sg0la
+.section sg0lb
+.section sg0ma
+.section sg0mb
+.section sg0na
+.section sg0nb
+.section sg0oa
+.section sg0ob
+.section sg0pa
+.section sg0pb
+.section sg0qa
+.section sg0qb
+.section sg0ra
+.section sg0rb
+.section sg0sa
+.section sg0sb
+.section sg0ta
+.section sg0tb
+.section sg0ua
+.section sg0ub
+.section sg0va
+.section sg0vb
+.section sg0wa
+.section sg0wb
+.section sg0xa
+.section sg0xb
+.section sg0ya
+.section sg0yb
+.section sg0za
+.section sg0zb
+.section sg01a
+.section sg01b
+.section sg02a
+.section sg02b
+.section sg03a
+.section sg03b
+.section sg04a
+.section sg04b
+.section sg05a
+.section sg05b
+.section sg06a
+.section sg06b
+.section sg07a
+.section sg07b
+.section sg08a
+.section sg08b
+.section sg09a
+.section sg09b
+.section sg00a
+.section sg00b
+.section shaaa
+.section shaab
+.section shaba
+.section shabb
+.section shaca
+.section shacb
+.section shada
+.section shadb
+.section shaea
+.section shaeb
+.section shafa
+.section shafb
+.section shaga
+.section shagb
+.section shaha
+.section shahb
+.section shaia
+.section shaib
+.section shaja
+.section shajb
+.section shaka
+.section shakb
+.section shala
+.section shalb
+.section shama
+.section shamb
+.section shana
+.section shanb
+.section shaoa
+.section shaob
+.section shapa
+.section shapb
+.section shaqa
+.section shaqb
+.section shara
+.section sharb
+.section shasa
+.section shasb
+.section shata
+.section shatb
+.section shaua
+.section shaub
+.section shava
+.section shavb
+.section shawa
+.section shawb
+.section shaxa
+.section shaxb
+.section shaya
+.section shayb
+.section shaza
+.section shazb
+.section sha1a
+.section sha1b
+.section sha2a
+.section sha2b
+.section sha3a
+.section sha3b
+.section sha4a
+.section sha4b
+.section sha5a
+.section sha5b
+.section sha6a
+.section sha6b
+.section sha7a
+.section sha7b
+.section sha8a
+.section sha8b
+.section sha9a
+.section sha9b
+.section sha0a
+.section sha0b
+.section shbaa
+.section shbab
+.section shbba
+.section shbbb
+.section shbca
+.section shbcb
+.section shbda
+.section shbdb
+.section shbea
+.section shbeb
+.section shbfa
+.section shbfb
+.section shbga
+.section shbgb
+.section shbha
+.section shbhb
+.section shbia
+.section shbib
+.section shbja
+.section shbjb
+.section shbka
+.section shbkb
+.section shbla
+.section shblb
+.section shbma
+.section shbmb
+.section shbna
+.section shbnb
+.section shboa
+.section shbob
+.section shbpa
+.section shbpb
+.section shbqa
+.section shbqb
+.section shbra
+.section shbrb
+.section shbsa
+.section shbsb
+.section shbta
+.section shbtb
+.section shbua
+.section shbub
+.section shbva
+.section shbvb
+.section shbwa
+.section shbwb
+.section shbxa
+.section shbxb
+.section shbya
+.section shbyb
+.section shbza
+.section shbzb
+.section shb1a
+.section shb1b
+.section shb2a
+.section shb2b
+.section shb3a
+.section shb3b
+.section shb4a
+.section shb4b
+.section shb5a
+.section shb5b
+.section shb6a
+.section shb6b
+.section shb7a
+.section shb7b
+.section shb8a
+.section shb8b
+.section shb9a
+.section shb9b
+.section shb0a
+.section shb0b
+.section shcaa
+.section shcab
+.section shcba
+.section shcbb
+.section shcca
+.section shccb
+.section shcda
+.section shcdb
+.section shcea
+.section shceb
+.section shcfa
+.section shcfb
+.section shcga
+.section shcgb
+.section shcha
+.section shchb
+.section shcia
+.section shcib
+.section shcja
+.section shcjb
+.section shcka
+.section shckb
+.section shcla
+.section shclb
+.section shcma
+.section shcmb
+.section shcna
+.section shcnb
+.section shcoa
+.section shcob
+.section shcpa
+.section shcpb
+.section shcqa
+.section shcqb
+.section shcra
+.section shcrb
+.section shcsa
+.section shcsb
+.section shcta
+.section shctb
+.section shcua
+.section shcub
+.section shcva
+.section shcvb
+.section shcwa
+.section shcwb
+.section shcxa
+.section shcxb
+.section shcya
+.section shcyb
+.section shcza
+.section shczb
+.section shc1a
+.section shc1b
+.section shc2a
+.section shc2b
+.section shc3a
+.section shc3b
+.section shc4a
+.section shc4b
+.section shc5a
+.section shc5b
+.section shc6a
+.section shc6b
+.section shc7a
+.section shc7b
+.section shc8a
+.section shc8b
+.section shc9a
+.section shc9b
+.section shc0a
+.section shc0b
+.section shdaa
+.section shdab
+.section shdba
+.section shdbb
+.section shdca
+.section shdcb
+.section shdda
+.section shddb
+.section shdea
+.section shdeb
+.section shdfa
+.section shdfb
+.section shdga
+.section shdgb
+.section shdha
+.section shdhb
+.section shdia
+.section shdib
+.section shdja
+.section shdjb
+.section shdka
+.section shdkb
+.section shdla
+.section shdlb
+.section shdma
+.section shdmb
+.section shdna
+.section shdnb
+.section shdoa
+.section shdob
+.section shdpa
+.section shdpb
+.section shdqa
+.section shdqb
+.section shdra
+.section shdrb
+.section shdsa
+.section shdsb
+.section shdta
+.section shdtb
+.section shdua
+.section shdub
+.section shdva
+.section shdvb
+.section shdwa
+.section shdwb
+.section shdxa
+.section shdxb
+.section shdya
+.section shdyb
+.section shdza
+.section shdzb
+.section shd1a
+.section shd1b
+.section shd2a
+.section shd2b
+.section shd3a
+.section shd3b
+.section shd4a
+.section shd4b
+.section shd5a
+.section shd5b
+.section shd6a
+.section shd6b
+.section shd7a
+.section shd7b
+.section shd8a
+.section shd8b
+.section shd9a
+.section shd9b
+.section shd0a
+.section shd0b
+.section sheaa
+.section sheab
+.section sheba
+.section shebb
+.section sheca
+.section shecb
+.section sheda
+.section shedb
+.section sheea
+.section sheeb
+.section shefa
+.section shefb
+.section shega
+.section shegb
+.section sheha
+.section shehb
+.section sheia
+.section sheib
+.section sheja
+.section shejb
+.section sheka
+.section shekb
+.section shela
+.section shelb
+.section shema
+.section shemb
+.section shena
+.section shenb
+.section sheoa
+.section sheob
+.section shepa
+.section shepb
+.section sheqa
+.section sheqb
+.section shera
+.section sherb
+.section shesa
+.section shesb
+.section sheta
+.section shetb
+.section sheua
+.section sheub
+.section sheva
+.section shevb
+.section shewa
+.section shewb
+.section shexa
+.section shexb
+.section sheya
+.section sheyb
+.section sheza
+.section shezb
+.section she1a
+.section she1b
+.section she2a
+.section she2b
+.section she3a
+.section she3b
+.section she4a
+.section she4b
+.section she5a
+.section she5b
+.section she6a
+.section she6b
+.section she7a
+.section she7b
+.section she8a
+.section she8b
+.section she9a
+.section she9b
+.section she0a
+.section she0b
+.section shfaa
+.section shfab
+.section shfba
+.section shfbb
+.section shfca
+.section shfcb
+.section shfda
+.section shfdb
+.section shfea
+.section shfeb
+.section shffa
+.section shffb
+.section shfga
+.section shfgb
+.section shfha
+.section shfhb
+.section shfia
+.section shfib
+.section shfja
+.section shfjb
+.section shfka
+.section shfkb
+.section shfla
+.section shflb
+.section shfma
+.section shfmb
+.section shfna
+.section shfnb
+.section shfoa
+.section shfob
+.section shfpa
+.section shfpb
+.section shfqa
+.section shfqb
+.section shfra
+.section shfrb
+.section shfsa
+.section shfsb
+.section shfta
+.section shftb
+.section shfua
+.section shfub
+.section shfva
+.section shfvb
+.section shfwa
+.section shfwb
+.section shfxa
+.section shfxb
+.section shfya
+.section shfyb
+.section shfza
+.section shfzb
+.section shf1a
+.section shf1b
+.section shf2a
+.section shf2b
+.section shf3a
+.section shf3b
+.section shf4a
+.section shf4b
+.section shf5a
+.section shf5b
+.section shf6a
+.section shf6b
+.section shf7a
+.section shf7b
+.section shf8a
+.section shf8b
+.section shf9a
+.section shf9b
+.section shf0a
+.section shf0b
+.section shgaa
+.section shgab
+.section shgba
+.section shgbb
+.section shgca
+.section shgcb
+.section shgda
+.section shgdb
+.section shgea
+.section shgeb
+.section shgfa
+.section shgfb
+.section shgga
+.section shggb
+.section shgha
+.section shghb
+.section shgia
+.section shgib
+.section shgja
+.section shgjb
+.section shgka
+.section shgkb
+.section shgla
+.section shglb
+.section shgma
+.section shgmb
+.section shgna
+.section shgnb
+.section shgoa
+.section shgob
+.section shgpa
+.section shgpb
+.section shgqa
+.section shgqb
+.section shgra
+.section shgrb
+.section shgsa
+.section shgsb
+.section shgta
+.section shgtb
+.section shgua
+.section shgub
+.section shgva
+.section shgvb
+.section shgwa
+.section shgwb
+.section shgxa
+.section shgxb
+.section shgya
+.section shgyb
+.section shgza
+.section shgzb
+.section shg1a
+.section shg1b
+.section shg2a
+.section shg2b
+.section shg3a
+.section shg3b
+.section shg4a
+.section shg4b
+.section shg5a
+.section shg5b
+.section shg6a
+.section shg6b
+.section shg7a
+.section shg7b
+.section shg8a
+.section shg8b
+.section shg9a
+.section shg9b
+.section shg0a
+.section shg0b
+.section shhaa
+.section shhab
+.section shhba
+.section shhbb
+.section shhca
+.section shhcb
+.section shhda
+.section shhdb
+.section shhea
+.section shheb
+.section shhfa
+.section shhfb
+.section shhga
+.section shhgb
+.section shhha
+.section shhhb
+.section shhia
+.section shhib
+.section shhja
+.section shhjb
+.section shhka
+.section shhkb
+.section shhla
+.section shhlb
+.section shhma
+.section shhmb
+.section shhna
+.section shhnb
+.section shhoa
+.section shhob
+.section shhpa
+.section shhpb
+.section shhqa
+.section shhqb
+.section shhra
+.section shhrb
+.section shhsa
+.section shhsb
+.section shhta
+.section shhtb
+.section shhua
+.section shhub
+.section shhva
+.section shhvb
+.section shhwa
+.section shhwb
+.section shhxa
+.section shhxb
+.section shhya
+.section shhyb
+.section shhza
+.section shhzb
+.section shh1a
+.section shh1b
+.section shh2a
+.section shh2b
+.section shh3a
+.section shh3b
+.section shh4a
+.section shh4b
+.section shh5a
+.section shh5b
+.section shh6a
+.section shh6b
+.section shh7a
+.section shh7b
+.section shh8a
+.section shh8b
+.section shh9a
+.section shh9b
+.section shh0a
+.section shh0b
+.section shiaa
+.section shiab
+.section shiba
+.section shibb
+.section shica
+.section shicb
+.section shida
+.section shidb
+.section shiea
+.section shieb
+.section shifa
+.section shifb
+.section shiga
+.section shigb
+.section shiha
+.section shihb
+.section shiia
+.section shiib
+.section shija
+.section shijb
+.section shika
+.section shikb
+.section shila
+.section shilb
+.section shima
+.section shimb
+.section shina
+.section shinb
+.section shioa
+.section shiob
+.section shipa
+.section shipb
+.section shiqa
+.section shiqb
+.section shira
+.section shirb
+.section shisa
+.section shisb
+.section shita
+.section shitb
+.section shiua
+.section shiub
+.section shiva
+.section shivb
+.section shiwa
+.section shiwb
+.section shixa
+.section shixb
+.section shiya
+.section shiyb
+.section shiza
+.section shizb
+.section shi1a
+.section shi1b
+.section shi2a
+.section shi2b
+.section shi3a
+.section shi3b
+.section shi4a
+.section shi4b
+.section shi5a
+.section shi5b
+.section shi6a
+.section shi6b
+.section shi7a
+.section shi7b
+.section shi8a
+.section shi8b
+.section shi9a
+.section shi9b
+.section shi0a
+.section shi0b
+.section shjaa
+.section shjab
+.section shjba
+.section shjbb
+.section shjca
+.section shjcb
+.section shjda
+.section shjdb
+.section shjea
+.section shjeb
+.section shjfa
+.section shjfb
+.section shjga
+.section shjgb
+.section shjha
+.section shjhb
+.section shjia
+.section shjib
+.section shjja
+.section shjjb
+.section shjka
+.section shjkb
+.section shjla
+.section shjlb
+.section shjma
+.section shjmb
+.section shjna
+.section shjnb
+.section shjoa
+.section shjob
+.section shjpa
+.section shjpb
+.section shjqa
+.section shjqb
+.section shjra
+.section shjrb
+.section shjsa
+.section shjsb
+.section shjta
+.section shjtb
+.section shjua
+.section shjub
+.section shjva
+.section shjvb
+.section shjwa
+.section shjwb
+.section shjxa
+.section shjxb
+.section shjya
+.section shjyb
+.section shjza
+.section shjzb
+.section shj1a
+.section shj1b
+.section shj2a
+.section shj2b
+.section shj3a
+.section shj3b
+.section shj4a
+.section shj4b
+.section shj5a
+.section shj5b
+.section shj6a
+.section shj6b
+.section shj7a
+.section shj7b
+.section shj8a
+.section shj8b
+.section shj9a
+.section shj9b
+.section shj0a
+.section shj0b
+.section shkaa
+.section shkab
+.section shkba
+.section shkbb
+.section shkca
+.section shkcb
+.section shkda
+.section shkdb
+.section shkea
+.section shkeb
+.section shkfa
+.section shkfb
+.section shkga
+.section shkgb
+.section shkha
+.section shkhb
+.section shkia
+.section shkib
+.section shkja
+.section shkjb
+.section shkka
+.section shkkb
+.section shkla
+.section shklb
+.section shkma
+.section shkmb
+.section shkna
+.section shknb
+.section shkoa
+.section shkob
+.section shkpa
+.section shkpb
+.section shkqa
+.section shkqb
+.section shkra
+.section shkrb
+.section shksa
+.section shksb
+.section shkta
+.section shktb
+.section shkua
+.section shkub
+.section shkva
+.section shkvb
+.section shkwa
+.section shkwb
+.section shkxa
+.section shkxb
+.section shkya
+.section shkyb
+.section shkza
+.section shkzb
+.section shk1a
+.section shk1b
+.section shk2a
+.section shk2b
+.section shk3a
+.section shk3b
+.section shk4a
+.section shk4b
+.section shk5a
+.section shk5b
+.section shk6a
+.section shk6b
+.section shk7a
+.section shk7b
+.section shk8a
+.section shk8b
+.section shk9a
+.section shk9b
+.section shk0a
+.section shk0b
+.section shlaa
+.section shlab
+.section shlba
+.section shlbb
+.section shlca
+.section shlcb
+.section shlda
+.section shldb
+.section shlea
+.section shleb
+.section shlfa
+.section shlfb
+.section shlga
+.section shlgb
+.section shlha
+.section shlhb
+.section shlia
+.section shlib
+.section shlja
+.section shljb
+.section shlka
+.section shlkb
+.section shlla
+.section shllb
+.section shlma
+.section shlmb
+.section shlna
+.section shlnb
+.section shloa
+.section shlob
+.section shlpa
+.section shlpb
+.section shlqa
+.section shlqb
+.section shlra
+.section shlrb
+.section shlsa
+.section shlsb
+.section shlta
+.section shltb
+.section shlua
+.section shlub
+.section shlva
+.section shlvb
+.section shlwa
+.section shlwb
+.section shlxa
+.section shlxb
+.section shlya
+.section shlyb
+.section shlza
+.section shlzb
+.section shl1a
+.section shl1b
+.section shl2a
+.section shl2b
+.section shl3a
+.section shl3b
+.section shl4a
+.section shl4b
+.section shl5a
+.section shl5b
+.section shl6a
+.section shl6b
+.section shl7a
+.section shl7b
+.section shl8a
+.section shl8b
+.section shl9a
+.section shl9b
+.section shl0a
+.section shl0b
+.section shmaa
+.section shmab
+.section shmba
+.section shmbb
+.section shmca
+.section shmcb
+.section shmda
+.section shmdb
+.section shmea
+.section shmeb
+.section shmfa
+.section shmfb
+.section shmga
+.section shmgb
+.section shmha
+.section shmhb
+.section shmia
+.section shmib
+.section shmja
+.section shmjb
+.section shmka
+.section shmkb
+.section shmla
+.section shmlb
+.section shmma
+.section shmmb
+.section shmna
+.section shmnb
+.section shmoa
+.section shmob
+.section shmpa
+.section shmpb
+.section shmqa
+.section shmqb
+.section shmra
+.section shmrb
+.section shmsa
+.section shmsb
+.section shmta
+.section shmtb
+.section shmua
+.section shmub
+.section shmva
+.section shmvb
+.section shmwa
+.section shmwb
+.section shmxa
+.section shmxb
+.section shmya
+.section shmyb
+.section shmza
+.section shmzb
+.section shm1a
+.section shm1b
+.section shm2a
+.section shm2b
+.section shm3a
+.section shm3b
+.section shm4a
+.section shm4b
+.section shm5a
+.section shm5b
+.section shm6a
+.section shm6b
+.section shm7a
+.section shm7b
+.section shm8a
+.section shm8b
+.section shm9a
+.section shm9b
+.section shm0a
+.section shm0b
+.section shnaa
+.section shnab
+.section shnba
+.section shnbb
+.section shnca
+.section shncb
+.section shnda
+.section shndb
+.section shnea
+.section shneb
+.section shnfa
+.section shnfb
+.section shnga
+.section shngb
+.section shnha
+.section shnhb
+.section shnia
+.section shnib
+.section shnja
+.section shnjb
+.section shnka
+.section shnkb
+.section shnla
+.section shnlb
+.section shnma
+.section shnmb
+.section shnna
+.section shnnb
+.section shnoa
+.section shnob
+.section shnpa
+.section shnpb
+.section shnqa
+.section shnqb
+.section shnra
+.section shnrb
+.section shnsa
+.section shnsb
+.section shnta
+.section shntb
+.section shnua
+.section shnub
+.section shnva
+.section shnvb
+.section shnwa
+.section shnwb
+.section shnxa
+.section shnxb
+.section shnya
+.section shnyb
+.section shnza
+.section shnzb
+.section shn1a
+.section shn1b
+.section shn2a
+.section shn2b
+.section shn3a
+.section shn3b
+.section shn4a
+.section shn4b
+.section shn5a
+.section shn5b
+.section shn6a
+.section shn6b
+.section shn7a
+.section shn7b
+.section shn8a
+.section shn8b
+.section shn9a
+.section shn9b
+.section shn0a
+.section shn0b
+.section shoaa
+.section shoab
+.section shoba
+.section shobb
+.section shoca
+.section shocb
+.section shoda
+.section shodb
+.section shoea
+.section shoeb
+.section shofa
+.section shofb
+.section shoga
+.section shogb
+.section shoha
+.section shohb
+.section shoia
+.section shoib
+.section shoja
+.section shojb
+.section shoka
+.section shokb
+.section shola
+.section sholb
+.section shoma
+.section shomb
+.section shona
+.section shonb
+.section shooa
+.section shoob
+.section shopa
+.section shopb
+.section shoqa
+.section shoqb
+.section shora
+.section shorb
+.section shosa
+.section shosb
+.section shota
+.section shotb
+.section shoua
+.section shoub
+.section shova
+.section shovb
+.section showa
+.section showb
+.section shoxa
+.section shoxb
+.section shoya
+.section shoyb
+.section shoza
+.section shozb
+.section sho1a
+.section sho1b
+.section sho2a
+.section sho2b
+.section sho3a
+.section sho3b
+.section sho4a
+.section sho4b
+.section sho5a
+.section sho5b
+.section sho6a
+.section sho6b
+.section sho7a
+.section sho7b
+.section sho8a
+.section sho8b
+.section sho9a
+.section sho9b
+.section sho0a
+.section sho0b
+.section shpaa
+.section shpab
+.section shpba
+.section shpbb
+.section shpca
+.section shpcb
+.section shpda
+.section shpdb
+.section shpea
+.section shpeb
+.section shpfa
+.section shpfb
+.section shpga
+.section shpgb
+.section shpha
+.section shphb
+.section shpia
+.section shpib
+.section shpja
+.section shpjb
+.section shpka
+.section shpkb
+.section shpla
+.section shplb
+.section shpma
+.section shpmb
+.section shpna
+.section shpnb
+.section shpoa
+.section shpob
+.section shppa
+.section shppb
+.section shpqa
+.section shpqb
+.section shpra
+.section shprb
+.section shpsa
+.section shpsb
+.section shpta
+.section shptb
+.section shpua
+.section shpub
+.section shpva
+.section shpvb
+.section shpwa
+.section shpwb
+.section shpxa
+.section shpxb
+.section shpya
+.section shpyb
+.section shpza
+.section shpzb
+.section shp1a
+.section shp1b
+.section shp2a
+.section shp2b
+.section shp3a
+.section shp3b
+.section shp4a
+.section shp4b
+.section shp5a
+.section shp5b
+.section shp6a
+.section shp6b
+.section shp7a
+.section shp7b
+.section shp8a
+.section shp8b
+.section shp9a
+.section shp9b
+.section shp0a
+.section shp0b
+.section shqaa
+.section shqab
+.section shqba
+.section shqbb
+.section shqca
+.section shqcb
+.section shqda
+.section shqdb
+.section shqea
+.section shqeb
+.section shqfa
+.section shqfb
+.section shqga
+.section shqgb
+.section shqha
+.section shqhb
+.section shqia
+.section shqib
+.section shqja
+.section shqjb
+.section shqka
+.section shqkb
+.section shqla
+.section shqlb
+.section shqma
+.section shqmb
+.section shqna
+.section shqnb
+.section shqoa
+.section shqob
+.section shqpa
+.section shqpb
+.section shqqa
+.section shqqb
+.section shqra
+.section shqrb
+.section shqsa
+.section shqsb
+.section shqta
+.section shqtb
+.section shqua
+.section shqub
+.section shqva
+.section shqvb
+.section shqwa
+.section shqwb
+.section shqxa
+.section shqxb
+.section shqya
+.section shqyb
+.section shqza
+.section shqzb
+.section shq1a
+.section shq1b
+.section shq2a
+.section shq2b
+.section shq3a
+.section shq3b
+.section shq4a
+.section shq4b
+.section shq5a
+.section shq5b
+.section shq6a
+.section shq6b
+.section shq7a
+.section shq7b
+.section shq8a
+.section shq8b
+.section shq9a
+.section shq9b
+.section shq0a
+.section shq0b
+.section shraa
+.section shrab
+.section shrba
+.section shrbb
+.section shrca
+.section shrcb
+.section shrda
+.section shrdb
+.section shrea
+.section shreb
+.section shrfa
+.section shrfb
+.section shrga
+.section shrgb
+.section shrha
+.section shrhb
+.section shria
+.section shrib
+.section shrja
+.section shrjb
+.section shrka
+.section shrkb
+.section shrla
+.section shrlb
+.section shrma
+.section shrmb
+.section shrna
+.section shrnb
+.section shroa
+.section shrob
+.section shrpa
+.section shrpb
+.section shrqa
+.section shrqb
+.section shrra
+.section shrrb
+.section shrsa
+.section shrsb
+.section shrta
+.section shrtb
+.section shrua
+.section shrub
+.section shrva
+.section shrvb
+.section shrwa
+.section shrwb
+.section shrxa
+.section shrxb
+.section shrya
+.section shryb
+.section shrza
+.section shrzb
+.section shr1a
+.section shr1b
+.section shr2a
+.section shr2b
+.section shr3a
+.section shr3b
+.section shr4a
+.section shr4b
+.section shr5a
+.section shr5b
+.section shr6a
+.section shr6b
+.section shr7a
+.section shr7b
+.section shr8a
+.section shr8b
+.section shr9a
+.section shr9b
+.section shr0a
+.section shr0b
+.section shsaa
+.section shsab
+.section shsba
+.section shsbb
+.section shsca
+.section shscb
+.section shsda
+.section shsdb
+.section shsea
+.section shseb
+.section shsfa
+.section shsfb
+.section shsga
+.section shsgb
+.section shsha
+.section shshb
+.section shsia
+.section shsib
+.section shsja
+.section shsjb
+.section shska
+.section shskb
+.section shsla
+.section shslb
+.section shsma
+.section shsmb
+.section shsna
+.section shsnb
+.section shsoa
+.section shsob
+.section shspa
+.section shspb
+.section shsqa
+.section shsqb
+.section shsra
+.section shsrb
+.section shssa
+.section shssb
+.section shsta
+.section shstb
+.section shsua
+.section shsub
+.section shsva
+.section shsvb
+.section shswa
+.section shswb
+.section shsxa
+.section shsxb
+.section shsya
+.section shsyb
+.section shsza
+.section shszb
+.section shs1a
+.section shs1b
+.section shs2a
+.section shs2b
+.section shs3a
+.section shs3b
+.section shs4a
+.section shs4b
+.section shs5a
+.section shs5b
+.section shs6a
+.section shs6b
+.section shs7a
+.section shs7b
+.section shs8a
+.section shs8b
+.section shs9a
+.section shs9b
+.section shs0a
+.section shs0b
+.section shtaa
+.section shtab
+.section shtba
+.section shtbb
+.section shtca
+.section shtcb
+.section shtda
+.section shtdb
+.section shtea
+.section shteb
+.section shtfa
+.section shtfb
+.section shtga
+.section shtgb
+.section shtha
+.section shthb
+.section shtia
+.section shtib
+.section shtja
+.section shtjb
+.section shtka
+.section shtkb
+.section shtla
+.section shtlb
+.section shtma
+.section shtmb
+.section shtna
+.section shtnb
+.section shtoa
+.section shtob
+.section shtpa
+.section shtpb
+.section shtqa
+.section shtqb
+.section shtra
+.section shtrb
+.section shtsa
+.section shtsb
+.section shtta
+.section shttb
+.section shtua
+.section shtub
+.section shtva
+.section shtvb
+.section shtwa
+.section shtwb
+.section shtxa
+.section shtxb
+.section shtya
+.section shtyb
+.section shtza
+.section shtzb
+.section sht1a
+.section sht1b
+.section sht2a
+.section sht2b
+.section sht3a
+.section sht3b
+.section sht4a
+.section sht4b
+.section sht5a
+.section sht5b
+.section sht6a
+.section sht6b
+.section sht7a
+.section sht7b
+.section sht8a
+.section sht8b
+.section sht9a
+.section sht9b
+.section sht0a
+.section sht0b
+.section shuaa
+.section shuab
+.section shuba
+.section shubb
+.section shuca
+.section shucb
+.section shuda
+.section shudb
+.section shuea
+.section shueb
+.section shufa
+.section shufb
+.section shuga
+.section shugb
+.section shuha
+.section shuhb
+.section shuia
+.section shuib
+.section shuja
+.section shujb
+.section shuka
+.section shukb
+.section shula
+.section shulb
+.section shuma
+.section shumb
+.section shuna
+.section shunb
+.section shuoa
+.section shuob
+.section shupa
+.section shupb
+.section shuqa
+.section shuqb
+.section shura
+.section shurb
+.section shusa
+.section shusb
+.section shuta
+.section shutb
+.section shuua
+.section shuub
+.section shuva
+.section shuvb
+.section shuwa
+.section shuwb
+.section shuxa
+.section shuxb
+.section shuya
+.section shuyb
+.section shuza
+.section shuzb
+.section shu1a
+.section shu1b
+.section shu2a
+.section shu2b
+.section shu3a
+.section shu3b
+.section shu4a
+.section shu4b
+.section shu5a
+.section shu5b
+.section shu6a
+.section shu6b
+.section shu7a
+.section shu7b
+.section shu8a
+.section shu8b
+.section shu9a
+.section shu9b
+.section shu0a
+.section shu0b
+.section shvaa
+.section shvab
+.section shvba
+.section shvbb
+.section shvca
+.section shvcb
+.section shvda
+.section shvdb
+.section shvea
+.section shveb
+.section shvfa
+.section shvfb
+.section shvga
+.section shvgb
+.section shvha
+.section shvhb
+.section shvia
+.section shvib
+.section shvja
+.section shvjb
+.section shvka
+.section shvkb
+.section shvla
+.section shvlb
+.section shvma
+.section shvmb
+.section shvna
+.section shvnb
+.section shvoa
+.section shvob
+.section shvpa
+.section shvpb
+.section shvqa
+.section shvqb
+.section shvra
+.section shvrb
+.section shvsa
+.section shvsb
+.section shvta
+.section shvtb
+.section shvua
+.section shvub
+.section shvva
+.section shvvb
+.section shvwa
+.section shvwb
+.section shvxa
+.section shvxb
+.section shvya
+.section shvyb
+.section shvza
+.section shvzb
+.section shv1a
+.section shv1b
+.section shv2a
+.section shv2b
+.section shv3a
+.section shv3b
+.section shv4a
+.section shv4b
+.section shv5a
+.section shv5b
+.section shv6a
+.section shv6b
+.section shv7a
+.section shv7b
+.section shv8a
+.section shv8b
+.section shv9a
+.section shv9b
+.section shv0a
+.section shv0b
+.section shwaa
+.section shwab
+.section shwba
+.section shwbb
+.section shwca
+.section shwcb
+.section shwda
+.section shwdb
+.section shwea
+.section shweb
+.section shwfa
+.section shwfb
+.section shwga
+.section shwgb
+.section shwha
+.section shwhb
+.section shwia
+.section shwib
+.section shwja
+.section shwjb
+.section shwka
+.section shwkb
+.section shwla
+.section shwlb
+.section shwma
+.section shwmb
+.section shwna
+.section shwnb
+.section shwoa
+.section shwob
+.section shwpa
+.section shwpb
+.section shwqa
+.section shwqb
+.section shwra
+.section shwrb
+.section shwsa
+.section shwsb
+.section shwta
+.section shwtb
+.section shwua
+.section shwub
+.section shwva
+.section shwvb
+.section shwwa
+.section shwwb
+.section shwxa
+.section shwxb
+.section shwya
+.section shwyb
+.section shwza
+.section shwzb
+.section shw1a
+.section shw1b
+.section shw2a
+.section shw2b
+.section shw3a
+.section shw3b
+.section shw4a
+.section shw4b
+.section shw5a
+.section shw5b
+.section shw6a
+.section shw6b
+.section shw7a
+.section shw7b
+.section shw8a
+.section shw8b
+.section shw9a
+.section shw9b
+.section shw0a
+.section shw0b
+.section shxaa
+.section shxab
+.section shxba
+.section shxbb
+.section shxca
+.section shxcb
+.section shxda
+.section shxdb
+.section shxea
+.section shxeb
+.section shxfa
+.section shxfb
+.section shxga
+.section shxgb
+.section shxha
+.section shxhb
+.section shxia
+.section shxib
+.section shxja
+.section shxjb
+.section shxka
+.section shxkb
+.section shxla
+.section shxlb
+.section shxma
+.section shxmb
+.section shxna
+.section shxnb
+.section shxoa
+.section shxob
+.section shxpa
+.section shxpb
+.section shxqa
+.section shxqb
+.section shxra
+.section shxrb
+.section shxsa
+.section shxsb
+.section shxta
+.section shxtb
+.section shxua
+.section shxub
+.section shxva
+.section shxvb
+.section shxwa
+.section shxwb
+.section shxxa
+.section shxxb
+.section shxya
+.section shxyb
+.section shxza
+.section shxzb
+.section shx1a
+.section shx1b
+.section shx2a
+.section shx2b
+.section shx3a
+.section shx3b
+.section shx4a
+.section shx4b
+.section shx5a
+.section shx5b
+.section shx6a
+.section shx6b
+.section shx7a
+.section shx7b
+.section shx8a
+.section shx8b
+.section shx9a
+.section shx9b
+.section shx0a
+.section shx0b
+.section shyaa
+.section shyab
+.section shyba
+.section shybb
+.section shyca
+.section shycb
+.section shyda
+.section shydb
+.section shyea
+.section shyeb
+.section shyfa
+.section shyfb
+.section shyga
+.section shygb
+.section shyha
+.section shyhb
+.section shyia
+.section shyib
+.section shyja
+.section shyjb
+.section shyka
+.section shykb
+.section shyla
+.section shylb
+.section shyma
+.section shymb
+.section shyna
+.section shynb
+.section shyoa
+.section shyob
+.section shypa
+.section shypb
+.section shyqa
+.section shyqb
+.section shyra
+.section shyrb
+.section shysa
+.section shysb
+.section shyta
+.section shytb
+.section shyua
+.section shyub
+.section shyva
+.section shyvb
+.section shywa
+.section shywb
+.section shyxa
+.section shyxb
+.section shyya
+.section shyyb
+.section shyza
+.section shyzb
+.section shy1a
+.section shy1b
+.section shy2a
+.section shy2b
+.section shy3a
+.section shy3b
+.section shy4a
+.section shy4b
+.section shy5a
+.section shy5b
+.section shy6a
+.section shy6b
+.section shy7a
+.section shy7b
+.section shy8a
+.section shy8b
+.section shy9a
+.section shy9b
+.section shy0a
+.section shy0b
+.section shzaa
+.section shzab
+.section shzba
+.section shzbb
+.section shzca
+.section shzcb
+.section shzda
+.section shzdb
+.section shzea
+.section shzeb
+.section shzfa
+.section shzfb
+.section shzga
+.section shzgb
+.section shzha
+.section shzhb
+.section shzia
+.section shzib
+.section shzja
+.section shzjb
+.section shzka
+.section shzkb
+.section shzla
+.section shzlb
+.section shzma
+.section shzmb
+.section shzna
+.section shznb
+.section shzoa
+.section shzob
+.section shzpa
+.section shzpb
+.section shzqa
+.section shzqb
+.section shzra
+.section shzrb
+.section shzsa
+.section shzsb
+.section shzta
+.section shztb
+.section shzua
+.section shzub
+.section shzva
+.section shzvb
+.section shzwa
+.section shzwb
+.section shzxa
+.section shzxb
+.section shzya
+.section shzyb
+.section shzza
+.section shzzb
+.section shz1a
+.section shz1b
+.section shz2a
+.section shz2b
+.section shz3a
+.section shz3b
+.section shz4a
+.section shz4b
+.section shz5a
+.section shz5b
+.section shz6a
+.section shz6b
+.section shz7a
+.section shz7b
+.section shz8a
+.section shz8b
+.section shz9a
+.section shz9b
+.section shz0a
+.section shz0b
+.section sh1aa
+.section sh1ab
+.section sh1ba
+.section sh1bb
+.section sh1ca
+.section sh1cb
+.section sh1da
+.section sh1db
+.section sh1ea
+.section sh1eb
+.section sh1fa
+.section sh1fb
+.section sh1ga
+.section sh1gb
+.section sh1ha
+.section sh1hb
+.section sh1ia
+.section sh1ib
+.section sh1ja
+.section sh1jb
+.section sh1ka
+.section sh1kb
+.section sh1la
+.section sh1lb
+.section sh1ma
+.section sh1mb
+.section sh1na
+.section sh1nb
+.section sh1oa
+.section sh1ob
+.section sh1pa
+.section sh1pb
+.section sh1qa
+.section sh1qb
+.section sh1ra
+.section sh1rb
+.section sh1sa
+.section sh1sb
+.section sh1ta
+.section sh1tb
+.section sh1ua
+.section sh1ub
+.section sh1va
+.section sh1vb
+.section sh1wa
+.section sh1wb
+.section sh1xa
+.section sh1xb
+.section sh1ya
+.section sh1yb
+.section sh1za
+.section sh1zb
+.section sh11a
+.section sh11b
+.section sh12a
+.section sh12b
+.section sh13a
+.section sh13b
+.section sh14a
+.section sh14b
+.section sh15a
+.section sh15b
+.section sh16a
+.section sh16b
+.section sh17a
+.section sh17b
+.section sh18a
+.section sh18b
+.section sh19a
+.section sh19b
+.section sh10a
+.section sh10b
+.section sh2aa
+.section sh2ab
+.section sh2ba
+.section sh2bb
+.section sh2ca
+.section sh2cb
+.section sh2da
+.section sh2db
+.section sh2ea
+.section sh2eb
+.section sh2fa
+.section sh2fb
+.section sh2ga
+.section sh2gb
+.section sh2ha
+.section sh2hb
+.section sh2ia
+.section sh2ib
+.section sh2ja
+.section sh2jb
+.section sh2ka
+.section sh2kb
+.section sh2la
+.section sh2lb
+.section sh2ma
+.section sh2mb
+.section sh2na
+.section sh2nb
+.section sh2oa
+.section sh2ob
+.section sh2pa
+.section sh2pb
+.section sh2qa
+.section sh2qb
+.section sh2ra
+.section sh2rb
+.section sh2sa
+.section sh2sb
+.section sh2ta
+.section sh2tb
+.section sh2ua
+.section sh2ub
+.section sh2va
+.section sh2vb
+.section sh2wa
+.section sh2wb
+.section sh2xa
+.section sh2xb
+.section sh2ya
+.section sh2yb
+.section sh2za
+.section sh2zb
+.section sh21a
+.section sh21b
+.section sh22a
+.section sh22b
+.section sh23a
+.section sh23b
+.section sh24a
+.section sh24b
+.section sh25a
+.section sh25b
+.section sh26a
+.section sh26b
+.section sh27a
+.section sh27b
+.section sh28a
+.section sh28b
+.section sh29a
+.section sh29b
+.section sh20a
+.section sh20b
+.section sh3aa
+.section sh3ab
+.section sh3ba
+.section sh3bb
+.section sh3ca
+.section sh3cb
+.section sh3da
+.section sh3db
+.section sh3ea
+.section sh3eb
+.section sh3fa
+.section sh3fb
+.section sh3ga
+.section sh3gb
+.section sh3ha
+.section sh3hb
+.section sh3ia
+.section sh3ib
+.section sh3ja
+.section sh3jb
+.section sh3ka
+.section sh3kb
+.section sh3la
+.section sh3lb
+.section sh3ma
+.section sh3mb
+.section sh3na
+.section sh3nb
+.section sh3oa
+.section sh3ob
+.section sh3pa
+.section sh3pb
+.section sh3qa
+.section sh3qb
+.section sh3ra
+.section sh3rb
+.section sh3sa
+.section sh3sb
+.section sh3ta
+.section sh3tb
+.section sh3ua
+.section sh3ub
+.section sh3va
+.section sh3vb
+.section sh3wa
+.section sh3wb
+.section sh3xa
+.section sh3xb
+.section sh3ya
+.section sh3yb
+.section sh3za
+.section sh3zb
+.section sh31a
+.section sh31b
+.section sh32a
+.section sh32b
+.section sh33a
+.section sh33b
+.section sh34a
+.section sh34b
+.section sh35a
+.section sh35b
+.section sh36a
+.section sh36b
+.section sh37a
+.section sh37b
+.section sh38a
+.section sh38b
+.section sh39a
+.section sh39b
+.section sh30a
+.section sh30b
+.section sh4aa
+.section sh4ab
+.section sh4ba
+.section sh4bb
+.section sh4ca
+.section sh4cb
+.section sh4da
+.section sh4db
+.section sh4ea
+.section sh4eb
+.section sh4fa
+.section sh4fb
+.section sh4ga
+.section sh4gb
+.section sh4ha
+.section sh4hb
+.section sh4ia
+.section sh4ib
+.section sh4ja
+.section sh4jb
+.section sh4ka
+.section sh4kb
+.section sh4la
+.section sh4lb
+.section sh4ma
+.section sh4mb
+.section sh4na
+.section sh4nb
+.section sh4oa
+.section sh4ob
+.section sh4pa
+.section sh4pb
+.section sh4qa
+.section sh4qb
+.section sh4ra
+.section sh4rb
+.section sh4sa
+.section sh4sb
+.section sh4ta
+.section sh4tb
+.section sh4ua
+.section sh4ub
+.section sh4va
+.section sh4vb
+.section sh4wa
+.section sh4wb
+.section sh4xa
+.section sh4xb
+.section sh4ya
+.section sh4yb
+.section sh4za
+.section sh4zb
+.section sh41a
+.section sh41b
+.section sh42a
+.section sh42b
+.section sh43a
+.section sh43b
+.section sh44a
+.section sh44b
+.section sh45a
+.section sh45b
+.section sh46a
+.section sh46b
+.section sh47a
+.section sh47b
+.section sh48a
+.section sh48b
+.section sh49a
+.section sh49b
+.section sh40a
+.section sh40b
+.section sh5aa
+.section sh5ab
+.section sh5ba
+.section sh5bb
+.section sh5ca
+.section sh5cb
+.section sh5da
+.section sh5db
+.section sh5ea
+.section sh5eb
+.section sh5fa
+.section sh5fb
+.section sh5ga
+.section sh5gb
+.section sh5ha
+.section sh5hb
+.section sh5ia
+.section sh5ib
+.section sh5ja
+.section sh5jb
+.section sh5ka
+.section sh5kb
+.section sh5la
+.section sh5lb
+.section sh5ma
+.section sh5mb
+.section sh5na
+.section sh5nb
+.section sh5oa
+.section sh5ob
+.section sh5pa
+.section sh5pb
+.section sh5qa
+.section sh5qb
+.section sh5ra
+.section sh5rb
+.section sh5sa
+.section sh5sb
+.section sh5ta
+.section sh5tb
+.section sh5ua
+.section sh5ub
+.section sh5va
+.section sh5vb
+.section sh5wa
+.section sh5wb
+.section sh5xa
+.section sh5xb
+.section sh5ya
+.section sh5yb
+.section sh5za
+.section sh5zb
+.section sh51a
+.section sh51b
+.section sh52a
+.section sh52b
+.section sh53a
+.section sh53b
+.section sh54a
+.section sh54b
+.section sh55a
+.section sh55b
+.section sh56a
+.section sh56b
+.section sh57a
+.section sh57b
+.section sh58a
+.section sh58b
+.section sh59a
+.section sh59b
+.section sh50a
+.section sh50b
+.section sh6aa
+.section sh6ab
+.section sh6ba
+.section sh6bb
+.section sh6ca
+.section sh6cb
+.section sh6da
+.section sh6db
+.section sh6ea
+.section sh6eb
+.section sh6fa
+.section sh6fb
+.section sh6ga
+.section sh6gb
+.section sh6ha
+.section sh6hb
+.section sh6ia
+.section sh6ib
+.section sh6ja
+.section sh6jb
+.section sh6ka
+.section sh6kb
+.section sh6la
+.section sh6lb
+.section sh6ma
+.section sh6mb
+.section sh6na
+.section sh6nb
+.section sh6oa
+.section sh6ob
+.section sh6pa
+.section sh6pb
+.section sh6qa
+.section sh6qb
+.section sh6ra
+.section sh6rb
+.section sh6sa
+.section sh6sb
+.section sh6ta
+.section sh6tb
+.section sh6ua
+.section sh6ub
+.section sh6va
+.section sh6vb
+.section sh6wa
+.section sh6wb
+.section sh6xa
+.section sh6xb
+.section sh6ya
+.section sh6yb
+.section sh6za
+.section sh6zb
+.section sh61a
+.section sh61b
+.section sh62a
+.section sh62b
+.section sh63a
+.section sh63b
+.section sh64a
+.section sh64b
+.section sh65a
+.section sh65b
+.section sh66a
+.section sh66b
+.section sh67a
+.section sh67b
+.section sh68a
+.section sh68b
+.section sh69a
+.section sh69b
+.section sh60a
+.section sh60b
+.section sh7aa
+.section sh7ab
+.section sh7ba
+.section sh7bb
+.section sh7ca
+.section sh7cb
+.section sh7da
+.section sh7db
+.section sh7ea
+.section sh7eb
+.section sh7fa
+.section sh7fb
+.section sh7ga
+.section sh7gb
+.section sh7ha
+.section sh7hb
+.section sh7ia
+.section sh7ib
+.section sh7ja
+.section sh7jb
+.section sh7ka
+.section sh7kb
+.section sh7la
+.section sh7lb
+.section sh7ma
+.section sh7mb
+.section sh7na
+.section sh7nb
+.section sh7oa
+.section sh7ob
+.section sh7pa
+.section sh7pb
+.section sh7qa
+.section sh7qb
+.section sh7ra
+.section sh7rb
+.section sh7sa
+.section sh7sb
+.section sh7ta
+.section sh7tb
+.section sh7ua
+.section sh7ub
+.section sh7va
+.section sh7vb
+.section sh7wa
+.section sh7wb
+.section sh7xa
+.section sh7xb
+.section sh7ya
+.section sh7yb
+.section sh7za
+.section sh7zb
+.section sh71a
+.section sh71b
+.section sh72a
+.section sh72b
+.section sh73a
+.section sh73b
+.section sh74a
+.section sh74b
+.section sh75a
+.section sh75b
+.section sh76a
+.section sh76b
+.section sh77a
+.section sh77b
+.section sh78a
+.section sh78b
+.section sh79a
+.section sh79b
+.section sh70a
+.section sh70b
+.section sh8aa
+.section sh8ab
+.section sh8ba
+.section sh8bb
+.section sh8ca
+.section sh8cb
+.section sh8da
+.section sh8db
+.section sh8ea
+.section sh8eb
+.section sh8fa
+.section sh8fb
+.section sh8ga
+.section sh8gb
+.section sh8ha
+.section sh8hb
+.section sh8ia
+.section sh8ib
+.section sh8ja
+.section sh8jb
+.section sh8ka
+.section sh8kb
+.section sh8la
+.section sh8lb
+.section sh8ma
+.section sh8mb
+.section sh8na
+.section sh8nb
+.section sh8oa
+.section sh8ob
+.section sh8pa
+.section sh8pb
+.section sh8qa
+.section sh8qb
+.section sh8ra
+.section sh8rb
+.section sh8sa
+.section sh8sb
+.section sh8ta
+.section sh8tb
+.section sh8ua
+.section sh8ub
+.section sh8va
+.section sh8vb
+.section sh8wa
+.section sh8wb
+.section sh8xa
+.section sh8xb
+.section sh8ya
+.section sh8yb
+.section sh8za
+.section sh8zb
+.section sh81a
+.section sh81b
+.section sh82a
+.section sh82b
+.section sh83a
+.section sh83b
+.section sh84a
+.section sh84b
+.section sh85a
+.section sh85b
+.section sh86a
+.section sh86b
+.section sh87a
+.section sh87b
+.section sh88a
+.section sh88b
+.section sh89a
+.section sh89b
+.section sh80a
+.section sh80b
+.section sh9aa
+.section sh9ab
+.section sh9ba
+.section sh9bb
+.section sh9ca
+.section sh9cb
+.section sh9da
+.section sh9db
+.section sh9ea
+.section sh9eb
+.section sh9fa
+.section sh9fb
+.section sh9ga
+.section sh9gb
+.section sh9ha
+.section sh9hb
+.section sh9ia
+.section sh9ib
+.section sh9ja
+.section sh9jb
+.section sh9ka
+.section sh9kb
+.section sh9la
+.section sh9lb
+.section sh9ma
+.section sh9mb
+.section sh9na
+.section sh9nb
+.section sh9oa
+.section sh9ob
+.section sh9pa
+.section sh9pb
+.section sh9qa
+.section sh9qb
+.section sh9ra
+.section sh9rb
+.section sh9sa
+.section sh9sb
+.section sh9ta
+.section sh9tb
+.section sh9ua
+.section sh9ub
+.section sh9va
+.section sh9vb
+.section sh9wa
+.section sh9wb
+.section sh9xa
+.section sh9xb
+.section sh9ya
+.section sh9yb
+.section sh9za
+.section sh9zb
+.section sh91a
+.section sh91b
+.section sh92a
+.section sh92b
+.section sh93a
+.section sh93b
+.section sh94a
+.section sh94b
+.section sh95a
+.section sh95b
+.section sh96a
+.section sh96b
+.section sh97a
+.section sh97b
+.section sh98a
+.section sh98b
+.section sh99a
+.section sh99b
+.section sh90a
+.section sh90b
+.section sh0aa
+.section sh0ab
+.section sh0ba
+.section sh0bb
+.section sh0ca
+.section sh0cb
+.section sh0da
+.section sh0db
+.section sh0ea
+.section sh0eb
+.section sh0fa
+.section sh0fb
+.section sh0ga
+.section sh0gb
+.section sh0ha
+.section sh0hb
+.section sh0ia
+.section sh0ib
+.section sh0ja
+.section sh0jb
+.section sh0ka
+.section sh0kb
+.section sh0la
+.section sh0lb
+.section sh0ma
+.section sh0mb
+.section sh0na
+.section sh0nb
+.section sh0oa
+.section sh0ob
+.section sh0pa
+.section sh0pb
+.section sh0qa
+.section sh0qb
+.section sh0ra
+.section sh0rb
+.section sh0sa
+.section sh0sb
+.section sh0ta
+.section sh0tb
+.section sh0ua
+.section sh0ub
+.section sh0va
+.section sh0vb
+.section sh0wa
+.section sh0wb
+.section sh0xa
+.section sh0xb
+.section sh0ya
+.section sh0yb
+.section sh0za
+.section sh0zb
+.section sh01a
+.section sh01b
+.section sh02a
+.section sh02b
+.section sh03a
+.section sh03b
+.section sh04a
+.section sh04b
+.section sh05a
+.section sh05b
+.section sh06a
+.section sh06b
+.section sh07a
+.section sh07b
+.section sh08a
+.section sh08b
+.section sh09a
+.section sh09b
+.section sh00a
+.section sh00b
+.section siaaa
+.section siaab
+.section siaba
+.section siabb
+.section siaca
+.section siacb
+.section siada
+.section siadb
+.section siaea
+.section siaeb
+.section siafa
+.section siafb
+.section siaga
+.section siagb
+.section siaha
+.section siahb
+.section siaia
+.section siaib
+.section siaja
+.section siajb
+.section siaka
+.section siakb
+.section siala
+.section sialb
+.section siama
+.section siamb
+.section siana
+.section sianb
+.section siaoa
+.section siaob
+.section siapa
+.section siapb
+.section siaqa
+.section siaqb
+.section siara
+.section siarb
+.section siasa
+.section siasb
+.section siata
+.section siatb
+.section siaua
+.section siaub
+.section siava
+.section siavb
+.section siawa
+.section siawb
+.section siaxa
+.section siaxb
+.section siaya
+.section siayb
+.section siaza
+.section siazb
+.section sia1a
+.section sia1b
+.section sia2a
+.section sia2b
+.section sia3a
+.section sia3b
+.section sia4a
+.section sia4b
+.section sia5a
+.section sia5b
+.section sia6a
+.section sia6b
+.section sia7a
+.section sia7b
+.section sia8a
+.section sia8b
+.section sia9a
+.section sia9b
+.section sia0a
+.section sia0b
+.section sibaa
+.section sibab
+.section sibba
+.section sibbb
+.section sibca
+.section sibcb
+.section sibda
+.section sibdb
+.section sibea
+.section sibeb
+.section sibfa
+.section sibfb
+.section sibga
+.section sibgb
+.section sibha
+.section sibhb
+.section sibia
+.section sibib
+.section sibja
+.section sibjb
+.section sibka
+.section sibkb
+.section sibla
+.section siblb
+.section sibma
+.section sibmb
+.section sibna
+.section sibnb
+.section siboa
+.section sibob
+.section sibpa
+.section sibpb
+.section sibqa
+.section sibqb
+.section sibra
+.section sibrb
+.section sibsa
+.section sibsb
+.section sibta
+.section sibtb
+.section sibua
+.section sibub
+.section sibva
+.section sibvb
+.section sibwa
+.section sibwb
+.section sibxa
+.section sibxb
+.section sibya
+.section sibyb
+.section sibza
+.section sibzb
+.section sib1a
+.section sib1b
+.section sib2a
+.section sib2b
+.section sib3a
+.section sib3b
+.section sib4a
+.section sib4b
+.section sib5a
+.section sib5b
+.section sib6a
+.section sib6b
+.section sib7a
+.section sib7b
+.section sib8a
+.section sib8b
+.section sib9a
+.section sib9b
+.section sib0a
+.section sib0b
+.section sicaa
+.section sicab
+.section sicba
+.section sicbb
+.section sicca
+.section siccb
+.section sicda
+.section sicdb
+.section sicea
+.section siceb
+.section sicfa
+.section sicfb
+.section sicga
+.section sicgb
+.section sicha
+.section sichb
+.section sicia
+.section sicib
+.section sicja
+.section sicjb
+.section sicka
+.section sickb
+.section sicla
+.section siclb
+.section sicma
+.section sicmb
+.section sicna
+.section sicnb
+.section sicoa
+.section sicob
+.section sicpa
+.section sicpb
+.section sicqa
+.section sicqb
+.section sicra
+.section sicrb
+.section sicsa
+.section sicsb
+.section sicta
+.section sictb
+.section sicua
+.section sicub
+.section sicva
+.section sicvb
+.section sicwa
+.section sicwb
+.section sicxa
+.section sicxb
+.section sicya
+.section sicyb
+.section sicza
+.section siczb
+.section sic1a
+.section sic1b
+.section sic2a
+.section sic2b
+.section sic3a
+.section sic3b
+.section sic4a
+.section sic4b
+.section sic5a
+.section sic5b
+.section sic6a
+.section sic6b
+.section sic7a
+.section sic7b
+.section sic8a
+.section sic8b
+.section sic9a
+.section sic9b
+.section sic0a
+.section sic0b
+.section sidaa
+.section sidab
+.section sidba
+.section sidbb
+.section sidca
+.section sidcb
+.section sidda
+.section siddb
+.section sidea
+.section sideb
+.section sidfa
+.section sidfb
+.section sidga
+.section sidgb
+.section sidha
+.section sidhb
+.section sidia
+.section sidib
+.section sidja
+.section sidjb
+.section sidka
+.section sidkb
+.section sidla
+.section sidlb
+.section sidma
+.section sidmb
+.section sidna
+.section sidnb
+.section sidoa
+.section sidob
+.section sidpa
+.section sidpb
+.section sidqa
+.section sidqb
+.section sidra
+.section sidrb
+.section sidsa
+.section sidsb
+.section sidta
+.section sidtb
+.section sidua
+.section sidub
+.section sidva
+.section sidvb
+.section sidwa
+.section sidwb
+.section sidxa
+.section sidxb
+.section sidya
+.section sidyb
+.section sidza
+.section sidzb
+.section sid1a
+.section sid1b
+.section sid2a
+.section sid2b
+.section sid3a
+.section sid3b
+.section sid4a
+.section sid4b
+.section sid5a
+.section sid5b
+.section sid6a
+.section sid6b
+.section sid7a
+.section sid7b
+.section sid8a
+.section sid8b
+.section sid9a
+.section sid9b
+.section sid0a
+.section sid0b
+.section sieaa
+.section sieab
+.section sieba
+.section siebb
+.section sieca
+.section siecb
+.section sieda
+.section siedb
+.section sieea
+.section sieeb
+.section siefa
+.section siefb
+.section siega
+.section siegb
+.section sieha
+.section siehb
+.section sieia
+.section sieib
+.section sieja
+.section siejb
+.section sieka
+.section siekb
+.section siela
+.section sielb
+.section siema
+.section siemb
+.section siena
+.section sienb
+.section sieoa
+.section sieob
+.section siepa
+.section siepb
+.section sieqa
+.section sieqb
+.section siera
+.section sierb
+.section siesa
+.section siesb
+.section sieta
+.section sietb
+.section sieua
+.section sieub
+.section sieva
+.section sievb
+.section siewa
+.section siewb
+.section siexa
+.section siexb
+.section sieya
+.section sieyb
+.section sieza
+.section siezb
+.section sie1a
+.section sie1b
+.section sie2a
+.section sie2b
+.section sie3a
+.section sie3b
+.section sie4a
+.section sie4b
+.section sie5a
+.section sie5b
+.section sie6a
+.section sie6b
+.section sie7a
+.section sie7b
+.section sie8a
+.section sie8b
+.section sie9a
+.section sie9b
+.section sie0a
+.section sie0b
+.section sifaa
+.section sifab
+.section sifba
+.section sifbb
+.section sifca
+.section sifcb
+.section sifda
+.section sifdb
+.section sifea
+.section sifeb
+.section siffa
+.section siffb
+.section sifga
+.section sifgb
+.section sifha
+.section sifhb
+.section sifia
+.section sifib
+.section sifja
+.section sifjb
+.section sifka
+.section sifkb
+.section sifla
+.section siflb
+.section sifma
+.section sifmb
+.section sifna
+.section sifnb
+.section sifoa
+.section sifob
+.section sifpa
+.section sifpb
+.section sifqa
+.section sifqb
+.section sifra
+.section sifrb
+.section sifsa
+.section sifsb
+.section sifta
+.section siftb
+.section sifua
+.section sifub
+.section sifva
+.section sifvb
+.section sifwa
+.section sifwb
+.section sifxa
+.section sifxb
+.section sifya
+.section sifyb
+.section sifza
+.section sifzb
+.section sif1a
+.section sif1b
+.section sif2a
+.section sif2b
+.section sif3a
+.section sif3b
+.section sif4a
+.section sif4b
+.section sif5a
+.section sif5b
+.section sif6a
+.section sif6b
+.section sif7a
+.section sif7b
+.section sif8a
+.section sif8b
+.section sif9a
+.section sif9b
+.section sif0a
+.section sif0b
+.section sigaa
+.section sigab
+.section sigba
+.section sigbb
+.section sigca
+.section sigcb
+.section sigda
+.section sigdb
+.section sigea
+.section sigeb
+.section sigfa
+.section sigfb
+.section sigga
+.section siggb
+.section sigha
+.section sighb
+.section sigia
+.section sigib
+.section sigja
+.section sigjb
+.section sigka
+.section sigkb
+.section sigla
+.section siglb
+.section sigma
+.section sigmb
+.section signa
+.section signb
+.section sigoa
+.section sigob
+.section sigpa
+.section sigpb
+.section sigqa
+.section sigqb
+.section sigra
+.section sigrb
+.section sigsa
+.section sigsb
+.section sigta
+.section sigtb
+.section sigua
+.section sigub
+.section sigva
+.section sigvb
+.section sigwa
+.section sigwb
+.section sigxa
+.section sigxb
+.section sigya
+.section sigyb
+.section sigza
+.section sigzb
+.section sig1a
+.section sig1b
+.section sig2a
+.section sig2b
+.section sig3a
+.section sig3b
+.section sig4a
+.section sig4b
+.section sig5a
+.section sig5b
+.section sig6a
+.section sig6b
+.section sig7a
+.section sig7b
+.section sig8a
+.section sig8b
+.section sig9a
+.section sig9b
+.section sig0a
+.section sig0b
+.section sihaa
+.section sihab
+.section sihba
+.section sihbb
+.section sihca
+.section sihcb
+.section sihda
+.section sihdb
+.section sihea
+.section siheb
+.section sihfa
+.section sihfb
+.section sihga
+.section sihgb
+.section sihha
+.section sihhb
+.section sihia
+.section sihib
+.section sihja
+.section sihjb
+.section sihka
+.section sihkb
+.section sihla
+.section sihlb
+.section sihma
+.section sihmb
+.section sihna
+.section sihnb
+.section sihoa
+.section sihob
+.section sihpa
+.section sihpb
+.section sihqa
+.section sihqb
+.section sihra
+.section sihrb
+.section sihsa
+.section sihsb
+.section sihta
+.section sihtb
+.section sihua
+.section sihub
+.section sihva
+.section sihvb
+.section sihwa
+.section sihwb
+.section sihxa
+.section sihxb
+.section sihya
+.section sihyb
+.section sihza
+.section sihzb
+.section sih1a
+.section sih1b
+.section sih2a
+.section sih2b
+.section sih3a
+.section sih3b
+.section sih4a
+.section sih4b
+.section sih5a
+.section sih5b
+.section sih6a
+.section sih6b
+.section sih7a
+.section sih7b
+.section sih8a
+.section sih8b
+.section sih9a
+.section sih9b
+.section sih0a
+.section sih0b
+.section siiaa
+.section siiab
+.section siiba
+.section siibb
+.section siica
+.section siicb
+.section siida
+.section siidb
+.section siiea
+.section siieb
+.section siifa
+.section siifb
+.section siiga
+.section siigb
+.section siiha
+.section siihb
+.section siiia
+.section siiib
+.section siija
+.section siijb
+.section siika
+.section siikb
+.section siila
+.section siilb
+.section siima
+.section siimb
+.section siina
+.section siinb
+.section siioa
+.section siiob
+.section siipa
+.section siipb
+.section siiqa
+.section siiqb
+.section siira
+.section siirb
+.section siisa
+.section siisb
+.section siita
+.section siitb
+.section siiua
+.section siiub
+.section siiva
+.section siivb
+.section siiwa
+.section siiwb
+.section siixa
+.section siixb
+.section siiya
+.section siiyb
+.section siiza
+.section siizb
+.section sii1a
+.section sii1b
+.section sii2a
+.section sii2b
+.section sii3a
+.section sii3b
+.section sii4a
+.section sii4b
+.section sii5a
+.section sii5b
+.section sii6a
+.section sii6b
+.section sii7a
+.section sii7b
+.section sii8a
+.section sii8b
+.section sii9a
+.section sii9b
+.section sii0a
+.section sii0b
+.section sijaa
+.section sijab
+.section sijba
+.section sijbb
+.section sijca
+.section sijcb
+.section sijda
+.section sijdb
+.section sijea
+.section sijeb
+.section sijfa
+.section sijfb
+.section sijga
+.section sijgb
+.section sijha
+.section sijhb
+.section sijia
+.section sijib
+.section sijja
+.section sijjb
+.section sijka
+.section sijkb
+.section sijla
+.section sijlb
+.section sijma
+.section sijmb
+.section sijna
+.section sijnb
+.section sijoa
+.section sijob
+.section sijpa
+.section sijpb
+.section sijqa
+.section sijqb
+.section sijra
+.section sijrb
+.section sijsa
+.section sijsb
+.section sijta
+.section sijtb
+.section sijua
+.section sijub
+.section sijva
+.section sijvb
+.section sijwa
+.section sijwb
+.section sijxa
+.section sijxb
+.section sijya
+.section sijyb
+.section sijza
+.section sijzb
+.section sij1a
+.section sij1b
+.section sij2a
+.section sij2b
+.section sij3a
+.section sij3b
+.section sij4a
+.section sij4b
+.section sij5a
+.section sij5b
+.section sij6a
+.section sij6b
+.section sij7a
+.section sij7b
+.section sij8a
+.section sij8b
+.section sij9a
+.section sij9b
+.section sij0a
+.section sij0b
+.section sikaa
+.section sikab
+.section sikba
+.section sikbb
+.section sikca
+.section sikcb
+.section sikda
+.section sikdb
+.section sikea
+.section sikeb
+.section sikfa
+.section sikfb
+.section sikga
+.section sikgb
+.section sikha
+.section sikhb
+.section sikia
+.section sikib
+.section sikja
+.section sikjb
+.section sikka
+.section sikkb
+.section sikla
+.section siklb
+.section sikma
+.section sikmb
+.section sikna
+.section siknb
+.section sikoa
+.section sikob
+.section sikpa
+.section sikpb
+.section sikqa
+.section sikqb
+.section sikra
+.section sikrb
+.section siksa
+.section siksb
+.section sikta
+.section siktb
+.section sikua
+.section sikub
+.section sikva
+.section sikvb
+.section sikwa
+.section sikwb
+.section sikxa
+.section sikxb
+.section sikya
+.section sikyb
+.section sikza
+.section sikzb
+.section sik1a
+.section sik1b
+.section sik2a
+.section sik2b
+.section sik3a
+.section sik3b
+.section sik4a
+.section sik4b
+.section sik5a
+.section sik5b
+.section sik6a
+.section sik6b
+.section sik7a
+.section sik7b
+.section sik8a
+.section sik8b
+.section sik9a
+.section sik9b
+.section sik0a
+.section sik0b
+.section silaa
+.section silab
+.section silba
+.section silbb
+.section silca
+.section silcb
+.section silda
+.section sildb
+.section silea
+.section sileb
+.section silfa
+.section silfb
+.section silga
+.section silgb
+.section silha
+.section silhb
+.section silia
+.section silib
+.section silja
+.section siljb
+.section silka
+.section silkb
+.section silla
+.section sillb
+.section silma
+.section silmb
+.section silna
+.section silnb
+.section siloa
+.section silob
+.section silpa
+.section silpb
+.section silqa
+.section silqb
+.section silra
+.section silrb
+.section silsa
+.section silsb
+.section silta
+.section siltb
+.section silua
+.section silub
+.section silva
+.section silvb
+.section silwa
+.section silwb
+.section silxa
+.section silxb
+.section silya
+.section silyb
+.section silza
+.section silzb
+.section sil1a
+.section sil1b
+.section sil2a
+.section sil2b
+.section sil3a
+.section sil3b
+.section sil4a
+.section sil4b
+.section sil5a
+.section sil5b
+.section sil6a
+.section sil6b
+.section sil7a
+.section sil7b
+.section sil8a
+.section sil8b
+.section sil9a
+.section sil9b
+.section sil0a
+.section sil0b
+.section simaa
+.section simab
+.section simba
+.section simbb
+.section simca
+.section simcb
+.section simda
+.section simdb
+.section simea
+.section simeb
+.section simfa
+.section simfb
+.section simga
+.section simgb
+.section simha
+.section simhb
+.section simia
+.section simib
+.section simja
+.section simjb
+.section simka
+.section simkb
+.section simla
+.section simlb
+.section simma
+.section simmb
+.section simna
+.section simnb
+.section simoa
+.section simob
+.section simpa
+.section simpb
+.section simqa
+.section simqb
+.section simra
+.section simrb
+.section simsa
+.section simsb
+.section simta
+.section simtb
+.section simua
+.section simub
+.section simva
+.section simvb
+.section simwa
+.section simwb
+.section simxa
+.section simxb
+.section simya
+.section simyb
+.section simza
+.section simzb
+.section sim1a
+.section sim1b
+.section sim2a
+.section sim2b
+.section sim3a
+.section sim3b
+.section sim4a
+.section sim4b
+.section sim5a
+.section sim5b
+.section sim6a
+.section sim6b
+.section sim7a
+.section sim7b
+.section sim8a
+.section sim8b
+.section sim9a
+.section sim9b
+.section sim0a
+.section sim0b
+.section sinaa
+.section sinab
+.section sinba
+.section sinbb
+.section sinca
+.section sincb
+.section sinda
+.section sindb
+.section sinea
+.section sineb
+.section sinfa
+.section sinfb
+.section singa
+.section singb
+.section sinha
+.section sinhb
+.section sinia
+.section sinib
+.section sinja
+.section sinjb
+.section sinka
+.section sinkb
+.section sinla
+.section sinlb
+.section sinma
+.section sinmb
+.section sinna
+.section sinnb
+.section sinoa
+.section sinob
+.section sinpa
+.section sinpb
+.section sinqa
+.section sinqb
+.section sinra
+.section sinrb
+.section sinsa
+.section sinsb
+.section sinta
+.section sintb
+.section sinua
+.section sinub
+.section sinva
+.section sinvb
+.section sinwa
+.section sinwb
+.section sinxa
+.section sinxb
+.section sinya
+.section sinyb
+.section sinza
+.section sinzb
+.section sin1a
+.section sin1b
+.section sin2a
+.section sin2b
+.section sin3a
+.section sin3b
+.section sin4a
+.section sin4b
+.section sin5a
+.section sin5b
+.section sin6a
+.section sin6b
+.section sin7a
+.section sin7b
+.section sin8a
+.section sin8b
+.section sin9a
+.section sin9b
+.section sin0a
+.section sin0b
+.section sioaa
+.section sioab
+.section sioba
+.section siobb
+.section sioca
+.section siocb
+.section sioda
+.section siodb
+.section sioea
+.section sioeb
+.section siofa
+.section siofb
+.section sioga
+.section siogb
+.section sioha
+.section siohb
+.section sioia
+.section sioib
+.section sioja
+.section siojb
+.section sioka
+.section siokb
+.section siola
+.section siolb
+.section sioma
+.section siomb
+.section siona
+.section sionb
+.section siooa
+.section sioob
+.section siopa
+.section siopb
+.section sioqa
+.section sioqb
+.section siora
+.section siorb
+.section siosa
+.section siosb
+.section siota
+.section siotb
+.section sioua
+.section sioub
+.section siova
+.section siovb
+.section siowa
+.section siowb
+.section sioxa
+.section sioxb
+.section sioya
+.section sioyb
+.section sioza
+.section siozb
+.section sio1a
+.section sio1b
+.section sio2a
+.section sio2b
+.section sio3a
+.section sio3b
+.section sio4a
+.section sio4b
+.section sio5a
+.section sio5b
+.section sio6a
+.section sio6b
+.section sio7a
+.section sio7b
+.section sio8a
+.section sio8b
+.section sio9a
+.section sio9b
+.section sio0a
+.section sio0b
+.section sipaa
+.section sipab
+.section sipba
+.section sipbb
+.section sipca
+.section sipcb
+.section sipda
+.section sipdb
+.section sipea
+.section sipeb
+.section sipfa
+.section sipfb
+.section sipga
+.section sipgb
+.section sipha
+.section siphb
+.section sipia
+.section sipib
+.section sipja
+.section sipjb
+.section sipka
+.section sipkb
+.section sipla
+.section siplb
+.section sipma
+.section sipmb
+.section sipna
+.section sipnb
+.section sipoa
+.section sipob
+.section sippa
+.section sippb
+.section sipqa
+.section sipqb
+.section sipra
+.section siprb
+.section sipsa
+.section sipsb
+.section sipta
+.section siptb
+.section sipua
+.section sipub
+.section sipva
+.section sipvb
+.section sipwa
+.section sipwb
+.section sipxa
+.section sipxb
+.section sipya
+.section sipyb
+.section sipza
+.section sipzb
+.section sip1a
+.section sip1b
+.section sip2a
+.section sip2b
+.section sip3a
+.section sip3b
+.section sip4a
+.section sip4b
+.section sip5a
+.section sip5b
+.section sip6a
+.section sip6b
+.section sip7a
+.section sip7b
+.section sip8a
+.section sip8b
+.section sip9a
+.section sip9b
+.section sip0a
+.section sip0b
+.section siqaa
+.section siqab
+.section siqba
+.section siqbb
+.section siqca
+.section siqcb
+.section siqda
+.section siqdb
+.section siqea
+.section siqeb
+.section siqfa
+.section siqfb
+.section siqga
+.section siqgb
+.section siqha
+.section siqhb
+.section siqia
+.section siqib
+.section siqja
+.section siqjb
+.section siqka
+.section siqkb
+.section siqla
+.section siqlb
+.section siqma
+.section siqmb
+.section siqna
+.section siqnb
+.section siqoa
+.section siqob
+.section siqpa
+.section siqpb
+.section siqqa
+.section siqqb
+.section siqra
+.section siqrb
+.section siqsa
+.section siqsb
+.section siqta
+.section siqtb
+.section siqua
+.section siqub
+.section siqva
+.section siqvb
+.section siqwa
+.section siqwb
+.section siqxa
+.section siqxb
+.section siqya
+.section siqyb
+.section siqza
+.section siqzb
+.section siq1a
+.section siq1b
+.section siq2a
+.section siq2b
+.section siq3a
+.section siq3b
+.section siq4a
+.section siq4b
+.section siq5a
+.section siq5b
+.section siq6a
+.section siq6b
+.section siq7a
+.section siq7b
+.section siq8a
+.section siq8b
+.section siq9a
+.section siq9b
+.section siq0a
+.section siq0b
+.section siraa
+.section sirab
+.section sirba
+.section sirbb
+.section sirca
+.section sircb
+.section sirda
+.section sirdb
+.section sirea
+.section sireb
+.section sirfa
+.section sirfb
+.section sirga
+.section sirgb
+.section sirha
+.section sirhb
+.section siria
+.section sirib
+.section sirja
+.section sirjb
+.section sirka
+.section sirkb
+.section sirla
+.section sirlb
+.section sirma
+.section sirmb
+.section sirna
+.section sirnb
+.section siroa
+.section sirob
+.section sirpa
+.section sirpb
+.section sirqa
+.section sirqb
+.section sirra
+.section sirrb
+.section sirsa
+.section sirsb
+.section sirta
+.section sirtb
+.section sirua
+.section sirub
+.section sirva
+.section sirvb
+.section sirwa
+.section sirwb
+.section sirxa
+.section sirxb
+.section sirya
+.section siryb
+.section sirza
+.section sirzb
+.section sir1a
+.section sir1b
+.section sir2a
+.section sir2b
+.section sir3a
+.section sir3b
+.section sir4a
+.section sir4b
+.section sir5a
+.section sir5b
+.section sir6a
+.section sir6b
+.section sir7a
+.section sir7b
+.section sir8a
+.section sir8b
+.section sir9a
+.section sir9b
+.section sir0a
+.section sir0b
+.section sisaa
+.section sisab
+.section sisba
+.section sisbb
+.section sisca
+.section siscb
+.section sisda
+.section sisdb
+.section sisea
+.section siseb
+.section sisfa
+.section sisfb
+.section sisga
+.section sisgb
+.section sisha
+.section sishb
+.section sisia
+.section sisib
+.section sisja
+.section sisjb
+.section siska
+.section siskb
+.section sisla
+.section sislb
+.section sisma
+.section sismb
+.section sisna
+.section sisnb
+.section sisoa
+.section sisob
+.section sispa
+.section sispb
+.section sisqa
+.section sisqb
+.section sisra
+.section sisrb
+.section sissa
+.section sissb
+.section sista
+.section sistb
+.section sisua
+.section sisub
+.section sisva
+.section sisvb
+.section siswa
+.section siswb
+.section sisxa
+.section sisxb
+.section sisya
+.section sisyb
+.section sisza
+.section siszb
+.section sis1a
+.section sis1b
+.section sis2a
+.section sis2b
+.section sis3a
+.section sis3b
+.section sis4a
+.section sis4b
+.section sis5a
+.section sis5b
+.section sis6a
+.section sis6b
+.section sis7a
+.section sis7b
+.section sis8a
+.section sis8b
+.section sis9a
+.section sis9b
+.section sis0a
+.section sis0b
+.section sitaa
+.section sitab
+.section sitba
+.section sitbb
+.section sitca
+.section sitcb
+.section sitda
+.section sitdb
+.section sitea
+.section siteb
+.section sitfa
+.section sitfb
+.section sitga
+.section sitgb
+.section sitha
+.section sithb
+.section sitia
+.section sitib
+.section sitja
+.section sitjb
+.section sitka
+.section sitkb
+.section sitla
+.section sitlb
+.section sitma
+.section sitmb
+.section sitna
+.section sitnb
+.section sitoa
+.section sitob
+.section sitpa
+.section sitpb
+.section sitqa
+.section sitqb
+.section sitra
+.section sitrb
+.section sitsa
+.section sitsb
+.section sitta
+.section sittb
+.section situa
+.section situb
+.section sitva
+.section sitvb
+.section sitwa
+.section sitwb
+.section sitxa
+.section sitxb
+.section sitya
+.section sityb
+.section sitza
+.section sitzb
+.section sit1a
+.section sit1b
+.section sit2a
+.section sit2b
+.section sit3a
+.section sit3b
+.section sit4a
+.section sit4b
+.section sit5a
+.section sit5b
+.section sit6a
+.section sit6b
+.section sit7a
+.section sit7b
+.section sit8a
+.section sit8b
+.section sit9a
+.section sit9b
+.section sit0a
+.section sit0b
+.section siuaa
+.section siuab
+.section siuba
+.section siubb
+.section siuca
+.section siucb
+.section siuda
+.section siudb
+.section siuea
+.section siueb
+.section siufa
+.section siufb
+.section siuga
+.section siugb
+.section siuha
+.section siuhb
+.section siuia
+.section siuib
+.section siuja
+.section siujb
+.section siuka
+.section siukb
+.section siula
+.section siulb
+.section siuma
+.section siumb
+.section siuna
+.section siunb
+.section siuoa
+.section siuob
+.section siupa
+.section siupb
+.section siuqa
+.section siuqb
+.section siura
+.section siurb
+.section siusa
+.section siusb
+.section siuta
+.section siutb
+.section siuua
+.section siuub
+.section siuva
+.section siuvb
+.section siuwa
+.section siuwb
+.section siuxa
+.section siuxb
+.section siuya
+.section siuyb
+.section siuza
+.section siuzb
+.section siu1a
+.section siu1b
+.section siu2a
+.section siu2b
+.section siu3a
+.section siu3b
+.section siu4a
+.section siu4b
+.section siu5a
+.section siu5b
+.section siu6a
+.section siu6b
+.section siu7a
+.section siu7b
+.section siu8a
+.section siu8b
+.section siu9a
+.section siu9b
+.section siu0a
+.section siu0b
+.section sivaa
+.section sivab
+.section sivba
+.section sivbb
+.section sivca
+.section sivcb
+.section sivda
+.section sivdb
+.section sivea
+.section siveb
+.section sivfa
+.section sivfb
+.section sivga
+.section sivgb
+.section sivha
+.section sivhb
+.section sivia
+.section sivib
+.section sivja
+.section sivjb
+.section sivka
+.section sivkb
+.section sivla
+.section sivlb
+.section sivma
+.section sivmb
+.section sivna
+.section sivnb
+.section sivoa
+.section sivob
+.section sivpa
+.section sivpb
+.section sivqa
+.section sivqb
+.section sivra
+.section sivrb
+.section sivsa
+.section sivsb
+.section sivta
+.section sivtb
+.section sivua
+.section sivub
+.section sivva
+.section sivvb
+.section sivwa
+.section sivwb
+.section sivxa
+.section sivxb
+.section sivya
+.section sivyb
+.section sivza
+.section sivzb
+.section siv1a
+.section siv1b
+.section siv2a
+.section siv2b
+.section siv3a
+.section siv3b
+.section siv4a
+.section siv4b
+.section siv5a
+.section siv5b
+.section siv6a
+.section siv6b
+.section siv7a
+.section siv7b
+.section siv8a
+.section siv8b
+.section siv9a
+.section siv9b
+.section siv0a
+.section siv0b
+.section siwaa
+.section siwab
+.section siwba
+.section siwbb
+.section siwca
+.section siwcb
+.section siwda
+.section siwdb
+.section siwea
+.section siweb
+.section siwfa
+.section siwfb
+.section siwga
+.section siwgb
+.section siwha
+.section siwhb
+.section siwia
+.section siwib
+.section siwja
+.section siwjb
+.section siwka
+.section siwkb
+.section siwla
+.section siwlb
+.section siwma
+.section siwmb
+.section siwna
+.section siwnb
+.section siwoa
+.section siwob
+.section siwpa
+.section siwpb
+.section siwqa
+.section siwqb
+.section siwra
+.section siwrb
+.section siwsa
+.section siwsb
+.section siwta
+.section siwtb
+.section siwua
+.section siwub
+.section siwva
+.section siwvb
+.section siwwa
+.section siwwb
+.section siwxa
+.section siwxb
+.section siwya
+.section siwyb
+.section siwza
+.section siwzb
+.section siw1a
+.section siw1b
+.section siw2a
+.section siw2b
+.section siw3a
+.section siw3b
+.section siw4a
+.section siw4b
+.section siw5a
+.section siw5b
+.section siw6a
+.section siw6b
+.section siw7a
+.section siw7b
+.section siw8a
+.section siw8b
+.section siw9a
+.section siw9b
+.section siw0a
+.section siw0b
+.section sixaa
+.section sixab
+.section sixba
+.section sixbb
+.section sixca
+.section sixcb
+.section sixda
+.section sixdb
+.section sixea
+.section sixeb
+.section sixfa
+.section sixfb
+.section sixga
+.section sixgb
+.section sixha
+.section sixhb
+.section sixia
+.section sixib
+.section sixja
+.section sixjb
+.section sixka
+.section sixkb
+.section sixla
+.section sixlb
+.section sixma
+.section sixmb
+.section sixna
+.section sixnb
+.section sixoa
+.section sixob
+.section sixpa
+.section sixpb
+.section sixqa
+.section sixqb
+.section sixra
+.section sixrb
+.section sixsa
+.section sixsb
+.section sixta
+.section sixtb
+.section sixua
+.section sixub
+.section sixva
+.section sixvb
+.section sixwa
+.section sixwb
+.section sixxa
+.section sixxb
+.section sixya
+.section sixyb
+.section sixza
+.section sixzb
+.section six1a
+.section six1b
+.section six2a
+.section six2b
+.section six3a
+.section six3b
+.section six4a
+.section six4b
+.section six5a
+.section six5b
+.section six6a
+.section six6b
+.section six7a
+.section six7b
+.section six8a
+.section six8b
+.section six9a
+.section six9b
+.section six0a
+.section six0b
+.section siyaa
+.section siyab
+.section siyba
+.section siybb
+.section siyca
+.section siycb
+.section siyda
+.section siydb
+.section siyea
+.section siyeb
+.section siyfa
+.section siyfb
+.section siyga
+.section siygb
+.section siyha
+.section siyhb
+.section siyia
+.section siyib
+.section siyja
+.section siyjb
+.section siyka
+.section siykb
+.section siyla
+.section siylb
+.section siyma
+.section siymb
+.section siyna
+.section siynb
+.section siyoa
+.section siyob
+.section siypa
+.section siypb
+.section siyqa
+.section siyqb
+.section siyra
+.section siyrb
+.section siysa
+.section siysb
+.section siyta
+.section siytb
+.section siyua
+.section siyub
+.section siyva
+.section siyvb
+.section siywa
+.section siywb
+.section siyxa
+.section siyxb
+.section siyya
+.section siyyb
+.section siyza
+.section siyzb
+.section siy1a
+.section siy1b
+.section siy2a
+.section siy2b
+.section siy3a
+.section siy3b
+.section siy4a
+.section siy4b
+.section siy5a
+.section siy5b
+.section siy6a
+.section siy6b
+.section siy7a
+.section siy7b
+.section siy8a
+.section siy8b
+.section siy9a
+.section siy9b
+.section siy0a
+.section siy0b
+.section sizaa
+.section sizab
+.section sizba
+.section sizbb
+.section sizca
+.section sizcb
+.section sizda
+.section sizdb
+.section sizea
+.section sizeb
+.section sizfa
+.section sizfb
+.section sizga
+.section sizgb
+.section sizha
+.section sizhb
+.section sizia
+.section sizib
+.section sizja
+.section sizjb
+.section sizka
+.section sizkb
+.section sizla
+.section sizlb
+.section sizma
+.section sizmb
+.section sizna
+.section siznb
+.section sizoa
+.section sizob
+.section sizpa
+.section sizpb
+.section sizqa
+.section sizqb
+.section sizra
+.section sizrb
+.section sizsa
+.section sizsb
+.section sizta
+.section siztb
+.section sizua
+.section sizub
+.section sizva
+.section sizvb
+.section sizwa
+.section sizwb
+.section sizxa
+.section sizxb
+.section sizya
+.section sizyb
+.section sizza
+.section sizzb
+.section siz1a
+.section siz1b
+.section siz2a
+.section siz2b
+.section siz3a
+.section siz3b
+.section siz4a
+.section siz4b
+.section siz5a
+.section siz5b
+.section siz6a
+.section siz6b
+.section siz7a
+.section siz7b
+.section siz8a
+.section siz8b
+.section siz9a
+.section siz9b
+.section siz0a
+.section siz0b
+.section si1aa
+.section si1ab
+.section si1ba
+.section si1bb
+.section si1ca
+.section si1cb
+.section si1da
+.section si1db
+.section si1ea
+.section si1eb
+.section si1fa
+.section si1fb
+.section si1ga
+.section si1gb
+.section si1ha
+.section si1hb
+.section si1ia
+.section si1ib
+.section si1ja
+.section si1jb
+.section si1ka
+.section si1kb
+.section si1la
+.section si1lb
+.section si1ma
+.section si1mb
+.section si1na
+.section si1nb
+.section si1oa
+.section si1ob
+.section si1pa
+.section si1pb
+.section si1qa
+.section si1qb
+.section si1ra
+.section si1rb
+.section si1sa
+.section si1sb
+.section si1ta
+.section si1tb
+.section si1ua
+.section si1ub
+.section si1va
+.section si1vb
+.section si1wa
+.section si1wb
+.section si1xa
+.section si1xb
+.section si1ya
+.section si1yb
+.section si1za
+.section si1zb
+.section si11a
+.section si11b
+.section si12a
+.section si12b
+.section si13a
+.section si13b
+.section si14a
+.section si14b
+.section si15a
+.section si15b
+.section si16a
+.section si16b
+.section si17a
+.section si17b
+.section si18a
+.section si18b
+.section si19a
+.section si19b
+.section si10a
+.section si10b
+.section si2aa
+.section si2ab
+.section si2ba
+.section si2bb
+.section si2ca
+.section si2cb
+.section si2da
+.section si2db
+.section si2ea
+.section si2eb
+.section si2fa
+.section si2fb
+.section si2ga
+.section si2gb
+.section si2ha
+.section si2hb
+.section si2ia
+.section si2ib
+.section si2ja
+.section si2jb
+.section si2ka
+.section si2kb
+.section si2la
+.section si2lb
+.section si2ma
+.section si2mb
+.section si2na
+.section si2nb
+.section si2oa
+.section si2ob
+.section si2pa
+.section si2pb
+.section si2qa
+.section si2qb
+.section si2ra
+.section si2rb
+.section si2sa
+.section si2sb
+.section si2ta
+.section si2tb
+.section si2ua
+.section si2ub
+.section si2va
+.section si2vb
+.section si2wa
+.section si2wb
+.section si2xa
+.section si2xb
+.section si2ya
+.section si2yb
+.section si2za
+.section si2zb
+.section si21a
+.section si21b
+.section si22a
+.section si22b
+.section si23a
+.section si23b
+.section si24a
+.section si24b
+.section si25a
+.section si25b
+.section si26a
+.section si26b
+.section si27a
+.section si27b
+.section si28a
+.section si28b
+.section si29a
+.section si29b
+.section si20a
+.section si20b
+.section si3aa
+.section si3ab
+.section si3ba
+.section si3bb
+.section si3ca
+.section si3cb
+.section si3da
+.section si3db
+.section si3ea
+.section si3eb
+.section si3fa
+.section si3fb
+.section si3ga
+.section si3gb
+.section si3ha
+.section si3hb
+.section si3ia
+.section si3ib
+.section si3ja
+.section si3jb
+.section si3ka
+.section si3kb
+.section si3la
+.section si3lb
+.section si3ma
+.section si3mb
+.section si3na
+.section si3nb
+.section si3oa
+.section si3ob
+.section si3pa
+.section si3pb
+.section si3qa
+.section si3qb
+.section si3ra
+.section si3rb
+.section si3sa
+.section si3sb
+.section si3ta
+.section si3tb
+.section si3ua
+.section si3ub
+.section si3va
+.section si3vb
+.section si3wa
+.section si3wb
+.section si3xa
+.section si3xb
+.section si3ya
+.section si3yb
+.section si3za
+.section si3zb
+.section si31a
+.section si31b
+.section si32a
+.section si32b
+.section si33a
+.section si33b
+.section si34a
+.section si34b
+.section si35a
+.section si35b
+.section si36a
+.section si36b
+.section si37a
+.section si37b
+.section si38a
+.section si38b
+.section si39a
+.section si39b
+.section si30a
+.section si30b
+.section si4aa
+.section si4ab
+.section si4ba
+.section si4bb
+.section si4ca
+.section si4cb
+.section si4da
+.section si4db
+.section si4ea
+.section si4eb
+.section si4fa
+.section si4fb
+.section si4ga
+.section si4gb
+.section si4ha
+.section si4hb
+.section si4ia
+.section si4ib
+.section si4ja
+.section si4jb
+.section si4ka
+.section si4kb
+.section si4la
+.section si4lb
+.section si4ma
+.section si4mb
+.section si4na
+.section si4nb
+.section si4oa
+.section si4ob
+.section si4pa
+.section si4pb
+.section si4qa
+.section si4qb
+.section si4ra
+.section si4rb
+.section si4sa
+.section si4sb
+.section si4ta
+.section si4tb
+.section si4ua
+.section si4ub
+.section si4va
+.section si4vb
+.section si4wa
+.section si4wb
+.section si4xa
+.section si4xb
+.section si4ya
+.section si4yb
+.section si4za
+.section si4zb
+.section si41a
+.section si41b
+.section si42a
+.section si42b
+.section si43a
+.section si43b
+.section si44a
+.section si44b
+.section si45a
+.section si45b
+.section si46a
+.section si46b
+.section si47a
+.section si47b
+.section si48a
+.section si48b
+.section si49a
+.section si49b
+.section si40a
+.section si40b
+.section si5aa
+.section si5ab
+.section si5ba
+.section si5bb
+.section si5ca
+.section si5cb
+.section si5da
+.section si5db
+.section si5ea
+.section si5eb
+.section si5fa
+.section si5fb
+.section si5ga
+.section si5gb
+.section si5ha
+.section si5hb
+.section si5ia
+.section si5ib
+.section si5ja
+.section si5jb
+.section si5ka
+.section si5kb
+.section si5la
+.section si5lb
+.section si5ma
+.section si5mb
+.section si5na
+.section si5nb
+.section si5oa
+.section si5ob
+.section si5pa
+.section si5pb
+.section si5qa
+.section si5qb
+.section si5ra
+.section si5rb
+.section si5sa
+.section si5sb
+.section si5ta
+.section si5tb
+.section si5ua
+.section si5ub
+.section si5va
+.section si5vb
+.section si5wa
+.section si5wb
+.section si5xa
+.section si5xb
+.section si5ya
+.section si5yb
+.section si5za
+.section si5zb
+.section si51a
+.section si51b
+.section si52a
+.section si52b
+.section si53a
+.section si53b
+.section si54a
+.section si54b
+.section si55a
+.section si55b
+.section si56a
+.section si56b
+.section si57a
+.section si57b
+.section si58a
+.section si58b
+.section si59a
+.section si59b
+.section si50a
+.section si50b
+.section si6aa
+.section si6ab
+.section si6ba
+.section si6bb
+.section si6ca
+.section si6cb
+.section si6da
+.section si6db
+.section si6ea
+.section si6eb
+.section si6fa
+.section si6fb
+.section si6ga
+.section si6gb
+.section si6ha
+.section si6hb
+.section si6ia
+.section si6ib
+.section si6ja
+.section si6jb
+.section si6ka
+.section si6kb
+.section si6la
+.section si6lb
+.section si6ma
+.section si6mb
+.section si6na
+.section si6nb
+.section si6oa
+.section si6ob
+.section si6pa
+.section si6pb
+.section si6qa
+.section si6qb
+.section si6ra
+.section si6rb
+.section si6sa
+.section si6sb
+.section si6ta
+.section si6tb
+.section si6ua
+.section si6ub
+.section si6va
+.section si6vb
+.section si6wa
+.section si6wb
+.section si6xa
+.section si6xb
+.section si6ya
+.section si6yb
+.section si6za
+.section si6zb
+.section si61a
+.section si61b
+.section si62a
+.section si62b
+.section si63a
+.section si63b
+.section si64a
+.section si64b
+.section si65a
+.section si65b
+.section si66a
+.section si66b
+.section si67a
+.section si67b
+.section si68a
+.section si68b
+.section si69a
+.section si69b
+.section si60a
+.section si60b
+.section si7aa
+.section si7ab
+.section si7ba
+.section si7bb
+.section si7ca
+.section si7cb
+.section si7da
+.section si7db
+.section si7ea
+.section si7eb
+.section si7fa
+.section si7fb
+.section si7ga
+.section si7gb
+.section si7ha
+.section si7hb
+.section si7ia
+.section si7ib
+.section si7ja
+.section si7jb
+.section si7ka
+.section si7kb
+.section si7la
+.section si7lb
+.section si7ma
+.section si7mb
+.section si7na
+.section si7nb
+.section si7oa
+.section si7ob
+.section si7pa
+.section si7pb
+.section si7qa
+.section si7qb
+.section si7ra
+.section si7rb
+.section si7sa
+.section si7sb
+.section si7ta
+.section si7tb
+.section si7ua
+.section si7ub
+.section si7va
+.section si7vb
+.section si7wa
+.section si7wb
+.section si7xa
+.section si7xb
+.section si7ya
+.section si7yb
+.section si7za
+.section si7zb
+.section si71a
+.section si71b
+.section si72a
+.section si72b
+.section si73a
+.section si73b
+.section si74a
+.section si74b
+.section si75a
+.section si75b
+.section si76a
+.section si76b
+.section si77a
+.section si77b
+.section si78a
+.section si78b
+.section si79a
+.section si79b
+.section si70a
+.section si70b
+.section si8aa
+.section si8ab
+.section si8ba
+.section si8bb
+.section si8ca
+.section si8cb
+.section si8da
+.section si8db
+.section si8ea
+.section si8eb
+.section si8fa
+.section si8fb
+.section si8ga
+.section si8gb
+.section si8ha
+.section si8hb
+.section si8ia
+.section si8ib
+.section si8ja
+.section si8jb
+.section si8ka
+.section si8kb
+.section si8la
+.section si8lb
+.section si8ma
+.section si8mb
+.section si8na
+.section si8nb
+.section si8oa
+.section si8ob
+.section si8pa
+.section si8pb
+.section si8qa
+.section si8qb
+.section si8ra
+.section si8rb
+.section si8sa
+.section si8sb
+.section si8ta
+.section si8tb
+.section si8ua
+.section si8ub
+.section si8va
+.section si8vb
+.section si8wa
+.section si8wb
+.section si8xa
+.section si8xb
+.section si8ya
+.section si8yb
+.section si8za
+.section si8zb
+.section si81a
+.section si81b
+.section si82a
+.section si82b
+.section si83a
+.section si83b
+.section si84a
+.section si84b
+.section si85a
+.section si85b
+.section si86a
+.section si86b
+.section si87a
+.section si87b
+.section si88a
+.section si88b
+.section si89a
+.section si89b
+.section si80a
+.section si80b
+.section si9aa
+.section si9ab
+.section si9ba
+.section si9bb
+.section si9ca
+.section si9cb
+.section si9da
+.section si9db
+.section si9ea
+.section si9eb
+.section si9fa
+.section si9fb
+.section si9ga
+.section si9gb
+.section si9ha
+.section si9hb
+.section si9ia
+.section si9ib
+.section si9ja
+.section si9jb
+.section si9ka
+.section si9kb
+.section si9la
+.section si9lb
+.section si9ma
+.section si9mb
+.section si9na
+.section si9nb
+.section si9oa
+.section si9ob
+.section si9pa
+.section si9pb
+.section si9qa
+.section si9qb
+.section si9ra
+.section si9rb
+.section si9sa
+.section si9sb
+.section si9ta
+.section si9tb
+.section si9ua
+.section si9ub
+.section si9va
+.section si9vb
+.section si9wa
+.section si9wb
+.section si9xa
+.section si9xb
+.section si9ya
+.section si9yb
+.section si9za
+.section si9zb
+.section si91a
+.section si91b
+.section si92a
+.section si92b
+.section si93a
+.section si93b
+.section si94a
+.section si94b
+.section si95a
+.section si95b
+.section si96a
+.section si96b
+.section si97a
+.section si97b
+.section si98a
+.section si98b
+.section si99a
+.section si99b
+.section si90a
+.section si90b
+.section si0aa
+.section si0ab
+.section si0ba
+.section si0bb
+.section si0ca
+.section si0cb
+.section si0da
+.section si0db
+.section si0ea
+.section si0eb
+.section si0fa
+.section si0fb
+.section si0ga
+.section si0gb
+.section si0ha
+.section si0hb
+.section si0ia
+.section si0ib
+.section si0ja
+.section si0jb
+.section si0ka
+.section si0kb
+.section si0la
+.section si0lb
+.section si0ma
+.section si0mb
+.section si0na
+.section si0nb
+.section si0oa
+.section si0ob
+.section si0pa
+.section si0pb
+.section si0qa
+.section si0qb
+.section si0ra
+.section si0rb
+.section si0sa
+.section si0sb
+.section si0ta
+.section si0tb
+.section si0ua
+.section si0ub
+.section si0va
+.section si0vb
+.section si0wa
+.section si0wb
+.section si0xa
+.section si0xb
+.section si0ya
+.section si0yb
+.section si0za
+.section si0zb
+.section si01a
+.section si01b
+.section si02a
+.section si02b
+.section si03a
+.section si03b
+.section si04a
+.section si04b
+.section si05a
+.section si05b
+.section si06a
+.section si06b
+.section si07a
+.section si07b
+.section si08a
+.section si08b
+.section si09a
+.section si09b
+.section si00a
+.section si00b
+.section sjaaa
+.section sjaab
+.section sjaba
+.section sjabb
+.section sjaca
+.section sjacb
+.section sjada
+.section sjadb
+.section sjaea
+.section sjaeb
+.section sjafa
+.section sjafb
+.section sjaga
+.section sjagb
+.section sjaha
+.section sjahb
+.section sjaia
+.section sjaib
+.section sjaja
+.section sjajb
+.section sjaka
+.section sjakb
+.section sjala
+.section sjalb
+.section sjama
+.section sjamb
+.section sjana
+.section sjanb
+.section sjaoa
+.section sjaob
+.section sjapa
+.section sjapb
+.section sjaqa
+.section sjaqb
+.section sjara
+.section sjarb
+.section sjasa
+.section sjasb
+.section sjata
+.section sjatb
+.section sjaua
+.section sjaub
+.section sjava
+.section sjavb
+.section sjawa
+.section sjawb
+.section sjaxa
+.section sjaxb
+.section sjaya
+.section sjayb
+.section sjaza
+.section sjazb
+.section sja1a
+.section sja1b
+.section sja2a
+.section sja2b
+.section sja3a
+.section sja3b
+.section sja4a
+.section sja4b
+.section sja5a
+.section sja5b
+.section sja6a
+.section sja6b
+.section sja7a
+.section sja7b
+.section sja8a
+.section sja8b
+.section sja9a
+.section sja9b
+.section sja0a
+.section sja0b
+.section sjbaa
+.section sjbab
+.section sjbba
+.section sjbbb
+.section sjbca
+.section sjbcb
+.section sjbda
+.section sjbdb
+.section sjbea
+.section sjbeb
+.section sjbfa
+.section sjbfb
+.section sjbga
+.section sjbgb
+.section sjbha
+.section sjbhb
+.section sjbia
+.section sjbib
+.section sjbja
+.section sjbjb
+.section sjbka
+.section sjbkb
+.section sjbla
+.section sjblb
+.section sjbma
+.section sjbmb
+.section sjbna
+.section sjbnb
+.section sjboa
+.section sjbob
+.section sjbpa
+.section sjbpb
+.section sjbqa
+.section sjbqb
+.section sjbra
+.section sjbrb
+.section sjbsa
+.section sjbsb
+.section sjbta
+.section sjbtb
+.section sjbua
+.section sjbub
+.section sjbva
+.section sjbvb
+.section sjbwa
+.section sjbwb
+.section sjbxa
+.section sjbxb
+.section sjbya
+.section sjbyb
+.section sjbza
+.section sjbzb
+.section sjb1a
+.section sjb1b
+.section sjb2a
+.section sjb2b
+.section sjb3a
+.section sjb3b
+.section sjb4a
+.section sjb4b
+.section sjb5a
+.section sjb5b
+.section sjb6a
+.section sjb6b
+.section sjb7a
+.section sjb7b
+.section sjb8a
+.section sjb8b
+.section sjb9a
+.section sjb9b
+.section sjb0a
+.section sjb0b
+.section sjcaa
+.section sjcab
+.section sjcba
+.section sjcbb
+.section sjcca
+.section sjccb
+.section sjcda
+.section sjcdb
+.section sjcea
+.section sjceb
+.section sjcfa
+.section sjcfb
+.section sjcga
+.section sjcgb
+.section sjcha
+.section sjchb
+.section sjcia
+.section sjcib
+.section sjcja
+.section sjcjb
+.section sjcka
+.section sjckb
+.section sjcla
+.section sjclb
+.section sjcma
+.section sjcmb
+.section sjcna
+.section sjcnb
+.section sjcoa
+.section sjcob
+.section sjcpa
+.section sjcpb
+.section sjcqa
+.section sjcqb
+.section sjcra
+.section sjcrb
+.section sjcsa
+.section sjcsb
+.section sjcta
+.section sjctb
+.section sjcua
+.section sjcub
+.section sjcva
+.section sjcvb
+.section sjcwa
+.section sjcwb
+.section sjcxa
+.section sjcxb
+.section sjcya
+.section sjcyb
+.section sjcza
+.section sjczb
+.section sjc1a
+.section sjc1b
+.section sjc2a
+.section sjc2b
+.section sjc3a
+.section sjc3b
+.section sjc4a
+.section sjc4b
+.section sjc5a
+.section sjc5b
+.section sjc6a
+.section sjc6b
+.section sjc7a
+.section sjc7b
+.section sjc8a
+.section sjc8b
+.section sjc9a
+.section sjc9b
+.section sjc0a
+.section sjc0b
+.section sjdaa
+.section sjdab
+.section sjdba
+.section sjdbb
+.section sjdca
+.section sjdcb
+.section sjdda
+.section sjddb
+.section sjdea
+.section sjdeb
+.section sjdfa
+.section sjdfb
+.section sjdga
+.section sjdgb
+.section sjdha
+.section sjdhb
+.section sjdia
+.section sjdib
+.section sjdja
+.section sjdjb
+.section sjdka
+.section sjdkb
+.section sjdla
+.section sjdlb
+.section sjdma
+.section sjdmb
+.section sjdna
+.section sjdnb
+.section sjdoa
+.section sjdob
+.section sjdpa
+.section sjdpb
+.section sjdqa
+.section sjdqb
+.section sjdra
+.section sjdrb
+.section sjdsa
+.section sjdsb
+.section sjdta
+.section sjdtb
+.section sjdua
+.section sjdub
+.section sjdva
+.section sjdvb
+.section sjdwa
+.section sjdwb
+.section sjdxa
+.section sjdxb
+.section sjdya
+.section sjdyb
+.section sjdza
+.section sjdzb
+.section sjd1a
+.section sjd1b
+.section sjd2a
+.section sjd2b
+.section sjd3a
+.section sjd3b
+.section sjd4a
+.section sjd4b
+.section sjd5a
+.section sjd5b
+.section sjd6a
+.section sjd6b
+.section sjd7a
+.section sjd7b
+.section sjd8a
+.section sjd8b
+.section sjd9a
+.section sjd9b
+.section sjd0a
+.section sjd0b
+.section sjeaa
+.section sjeab
+.section sjeba
+.section sjebb
+.section sjeca
+.section sjecb
+.section sjeda
+.section sjedb
+.section sjeea
+.section sjeeb
+.section sjefa
+.section sjefb
+.section sjega
+.section sjegb
+.section sjeha
+.section sjehb
+.section sjeia
+.section sjeib
+.section sjeja
+.section sjejb
+.section sjeka
+.section sjekb
+.section sjela
+.section sjelb
+.section sjema
+.section sjemb
+.section sjena
+.section sjenb
+.section sjeoa
+.section sjeob
+.section sjepa
+.section sjepb
+.section sjeqa
+.section sjeqb
+.section sjera
+.section sjerb
+.section sjesa
+.section sjesb
+.section sjeta
+.section sjetb
+.section sjeua
+.section sjeub
+.section sjeva
+.section sjevb
+.section sjewa
+.section sjewb
+.section sjexa
+.section sjexb
+.section sjeya
+.section sjeyb
+.section sjeza
+.section sjezb
+.section sje1a
+.section sje1b
+.section sje2a
+.section sje2b
+.section sje3a
+.section sje3b
+.section sje4a
+.section sje4b
+.section sje5a
+.section sje5b
+.section sje6a
+.section sje6b
+.section sje7a
+.section sje7b
+.section sje8a
+.section sje8b
+.section sje9a
+.section sje9b
+.section sje0a
+.section sje0b
+.section sjfaa
+.section sjfab
+.section sjfba
+.section sjfbb
+.section sjfca
+.section sjfcb
+.section sjfda
+.section sjfdb
+.section sjfea
+.section sjfeb
+.section sjffa
+.section sjffb
+.section sjfga
+.section sjfgb
+.section sjfha
+.section sjfhb
+.section sjfia
+.section sjfib
+.section sjfja
+.section sjfjb
+.section sjfka
+.section sjfkb
+.section sjfla
+.section sjflb
+.section sjfma
+.section sjfmb
+.section sjfna
+.section sjfnb
+.section sjfoa
+.section sjfob
+.section sjfpa
+.section sjfpb
+.section sjfqa
+.section sjfqb
+.section sjfra
+.section sjfrb
+.section sjfsa
+.section sjfsb
+.section sjfta
+.section sjftb
+.section sjfua
+.section sjfub
+.section sjfva
+.section sjfvb
+.section sjfwa
+.section sjfwb
+.section sjfxa
+.section sjfxb
+.section sjfya
+.section sjfyb
+.section sjfza
+.section sjfzb
+.section sjf1a
+.section sjf1b
+.section sjf2a
+.section sjf2b
+.section sjf3a
+.section sjf3b
+.section sjf4a
+.section sjf4b
+.section sjf5a
+.section sjf5b
+.section sjf6a
+.section sjf6b
+.section sjf7a
+.section sjf7b
+.section sjf8a
+.section sjf8b
+.section sjf9a
+.section sjf9b
+.section sjf0a
+.section sjf0b
+.section sjgaa
+.section sjgab
+.section sjgba
+.section sjgbb
+.section sjgca
+.section sjgcb
+.section sjgda
+.section sjgdb
+.section sjgea
+.section sjgeb
+.section sjgfa
+.section sjgfb
+.section sjgga
+.section sjggb
+.section sjgha
+.section sjghb
+.section sjgia
+.section sjgib
+.section sjgja
+.section sjgjb
+.section sjgka
+.section sjgkb
+.section sjgla
+.section sjglb
+.section sjgma
+.section sjgmb
+.section sjgna
+.section sjgnb
+.section sjgoa
+.section sjgob
+.section sjgpa
+.section sjgpb
+.section sjgqa
+.section sjgqb
+.section sjgra
+.section sjgrb
+.section sjgsa
+.section sjgsb
+.section sjgta
+.section sjgtb
+.section sjgua
+.section sjgub
+.section sjgva
+.section sjgvb
+.section sjgwa
+.section sjgwb
+.section sjgxa
+.section sjgxb
+.section sjgya
+.section sjgyb
+.section sjgza
+.section sjgzb
+.section sjg1a
+.section sjg1b
+.section sjg2a
+.section sjg2b
+.section sjg3a
+.section sjg3b
+.section sjg4a
+.section sjg4b
+.section sjg5a
+.section sjg5b
+.section sjg6a
+.section sjg6b
+.section sjg7a
+.section sjg7b
+.section sjg8a
+.section sjg8b
+.section sjg9a
+.section sjg9b
+.section sjg0a
+.section sjg0b
+.section sjhaa
+.section sjhab
+.section sjhba
+.section sjhbb
+.section sjhca
+.section sjhcb
+.section sjhda
+.section sjhdb
+.section sjhea
+.section sjheb
+.section sjhfa
+.section sjhfb
+.section sjhga
+.section sjhgb
+.section sjhha
+.section sjhhb
+.section sjhia
+.section sjhib
+.section sjhja
+.section sjhjb
+.section sjhka
+.section sjhkb
+.section sjhla
+.section sjhlb
+.section sjhma
+.section sjhmb
+.section sjhna
+.section sjhnb
+.section sjhoa
+.section sjhob
+.section sjhpa
+.section sjhpb
+.section sjhqa
+.section sjhqb
+.section sjhra
+.section sjhrb
+.section sjhsa
+.section sjhsb
+.section sjhta
+.section sjhtb
+.section sjhua
+.section sjhub
+.section sjhva
+.section sjhvb
+.section sjhwa
+.section sjhwb
+.section sjhxa
+.section sjhxb
+.section sjhya
+.section sjhyb
+.section sjhza
+.section sjhzb
+.section sjh1a
+.section sjh1b
+.section sjh2a
+.section sjh2b
+.section sjh3a
+.section sjh3b
+.section sjh4a
+.section sjh4b
+.section sjh5a
+.section sjh5b
+.section sjh6a
+.section sjh6b
+.section sjh7a
+.section sjh7b
+.section sjh8a
+.section sjh8b
+.section sjh9a
+.section sjh9b
+.section sjh0a
+.section sjh0b
+.section sjiaa
+.section sjiab
+.section sjiba
+.section sjibb
+.section sjica
+.section sjicb
+.section sjida
+.section sjidb
+.section sjiea
+.section sjieb
+.section sjifa
+.section sjifb
+.section sjiga
+.section sjigb
+.section sjiha
+.section sjihb
+.section sjiia
+.section sjiib
+.section sjija
+.section sjijb
+.section sjika
+.section sjikb
+.section sjila
+.section sjilb
+.section sjima
+.section sjimb
+.section sjina
+.section sjinb
+.section sjioa
+.section sjiob
+.section sjipa
+.section sjipb
+.section sjiqa
+.section sjiqb
+.section sjira
+.section sjirb
+.section sjisa
+.section sjisb
+.section sjita
+.section sjitb
+.section sjiua
+.section sjiub
+.section sjiva
+.section sjivb
+.section sjiwa
+.section sjiwb
+.section sjixa
+.section sjixb
+.section sjiya
+.section sjiyb
+.section sjiza
+.section sjizb
+.section sji1a
+.section sji1b
+.section sji2a
+.section sji2b
+.section sji3a
+.section sji3b
+.section sji4a
+.section sji4b
+.section sji5a
+.section sji5b
+.section sji6a
+.section sji6b
+.section sji7a
+.section sji7b
+.section sji8a
+.section sji8b
+.section sji9a
+.section sji9b
+.section sji0a
+.section sji0b
+.section sjjaa
+.section sjjab
+.section sjjba
+.section sjjbb
+.section sjjca
+.section sjjcb
+.section sjjda
+.section sjjdb
+.section sjjea
+.section sjjeb
+.section sjjfa
+.section sjjfb
+.section sjjga
+.section sjjgb
+.section sjjha
+.section sjjhb
+.section sjjia
+.section sjjib
+.section sjjja
+.section sjjjb
+.section sjjka
+.section sjjkb
+.section sjjla
+.section sjjlb
+.section sjjma
+.section sjjmb
+.section sjjna
+.section sjjnb
+.section sjjoa
+.section sjjob
+.section sjjpa
+.section sjjpb
+.section sjjqa
+.section sjjqb
+.section sjjra
+.section sjjrb
+.section sjjsa
+.section sjjsb
+.section sjjta
+.section sjjtb
+.section sjjua
+.section sjjub
+.section sjjva
+.section sjjvb
+.section sjjwa
+.section sjjwb
+.section sjjxa
+.section sjjxb
+.section sjjya
+.section sjjyb
+.section sjjza
+.section sjjzb
+.section sjj1a
+.section sjj1b
+.section sjj2a
+.section sjj2b
+.section sjj3a
+.section sjj3b
+.section sjj4a
+.section sjj4b
+.section sjj5a
+.section sjj5b
+.section sjj6a
+.section sjj6b
+.section sjj7a
+.section sjj7b
+.section sjj8a
+.section sjj8b
+.section sjj9a
+.section sjj9b
+.section sjj0a
+.section sjj0b
+.section sjkaa
+.section sjkab
+.section sjkba
+.section sjkbb
+.section sjkca
+.section sjkcb
+.section sjkda
+.section sjkdb
+.section sjkea
+.section sjkeb
+.section sjkfa
+.section sjkfb
+.section sjkga
+.section sjkgb
+.section sjkha
+.section sjkhb
+.section sjkia
+.section sjkib
+.section sjkja
+.section sjkjb
+.section sjkka
+.section sjkkb
+.section sjkla
+.section sjklb
+.section sjkma
+.section sjkmb
+.section sjkna
+.section sjknb
+.section sjkoa
+.section sjkob
+.section sjkpa
+.section sjkpb
+.section sjkqa
+.section sjkqb
+.section sjkra
+.section sjkrb
+.section sjksa
+.section sjksb
+.section sjkta
+.section sjktb
+.section sjkua
+.section sjkub
+.section sjkva
+.section sjkvb
+.section sjkwa
+.section sjkwb
+.section sjkxa
+.section sjkxb
+.section sjkya
+.section sjkyb
+.section sjkza
+.section sjkzb
+.section sjk1a
+.section sjk1b
+.section sjk2a
+.section sjk2b
+.section sjk3a
+.section sjk3b
+.section sjk4a
+.section sjk4b
+.section sjk5a
+.section sjk5b
+.section sjk6a
+.section sjk6b
+.section sjk7a
+.section sjk7b
+.section sjk8a
+.section sjk8b
+.section sjk9a
+.section sjk9b
+.section sjk0a
+.section sjk0b
+.section sjlaa
+.section sjlab
+.section sjlba
+.section sjlbb
+.section sjlca
+.section sjlcb
+.section sjlda
+.section sjldb
+.section sjlea
+.section sjleb
+.section sjlfa
+.section sjlfb
+.section sjlga
+.section sjlgb
+.section sjlha
+.section sjlhb
+.section sjlia
+.section sjlib
+.section sjlja
+.section sjljb
+.section sjlka
+.section sjlkb
+.section sjlla
+.section sjllb
+.section sjlma
+.section sjlmb
+.section sjlna
+.section sjlnb
+.section sjloa
+.section sjlob
+.section sjlpa
+.section sjlpb
+.section sjlqa
+.section sjlqb
+.section sjlra
+.section sjlrb
+.section sjlsa
+.section sjlsb
+.section sjlta
+.section sjltb
+.section sjlua
+.section sjlub
+.section sjlva
+.section sjlvb
+.section sjlwa
+.section sjlwb
+.section sjlxa
+.section sjlxb
+.section sjlya
+.section sjlyb
+.section sjlza
+.section sjlzb
+.section sjl1a
+.section sjl1b
+.section sjl2a
+.section sjl2b
+.section sjl3a
+.section sjl3b
+.section sjl4a
+.section sjl4b
+.section sjl5a
+.section sjl5b
+.section sjl6a
+.section sjl6b
+.section sjl7a
+.section sjl7b
+.section sjl8a
+.section sjl8b
+.section sjl9a
+.section sjl9b
+.section sjl0a
+.section sjl0b
+.section sjmaa
+.section sjmab
+.section sjmba
+.section sjmbb
+.section sjmca
+.section sjmcb
+.section sjmda
+.section sjmdb
+.section sjmea
+.section sjmeb
+.section sjmfa
+.section sjmfb
+.section sjmga
+.section sjmgb
+.section sjmha
+.section sjmhb
+.section sjmia
+.section sjmib
+.section sjmja
+.section sjmjb
+.section sjmka
+.section sjmkb
+.section sjmla
+.section sjmlb
+.section sjmma
+.section sjmmb
+.section sjmna
+.section sjmnb
+.section sjmoa
+.section sjmob
+.section sjmpa
+.section sjmpb
+.section sjmqa
+.section sjmqb
+.section sjmra
+.section sjmrb
+.section sjmsa
+.section sjmsb
+.section sjmta
+.section sjmtb
+.section sjmua
+.section sjmub
+.section sjmva
+.section sjmvb
+.section sjmwa
+.section sjmwb
+.section sjmxa
+.section sjmxb
+.section sjmya
+.section sjmyb
+.section sjmza
+.section sjmzb
+.section sjm1a
+.section sjm1b
+.section sjm2a
+.section sjm2b
+.section sjm3a
+.section sjm3b
+.section sjm4a
+.section sjm4b
+.section sjm5a
+.section sjm5b
+.section sjm6a
+.section sjm6b
+.section sjm7a
+.section sjm7b
+.section sjm8a
+.section sjm8b
+.section sjm9a
+.section sjm9b
+.section sjm0a
+.section sjm0b
+.section sjnaa
+.section sjnab
+.section sjnba
+.section sjnbb
+.section sjnca
+.section sjncb
+.section sjnda
+.section sjndb
+.section sjnea
+.section sjneb
+.section sjnfa
+.section sjnfb
+.section sjnga
+.section sjngb
+.section sjnha
+.section sjnhb
+.section sjnia
+.section sjnib
+.section sjnja
+.section sjnjb
+.section sjnka
+.section sjnkb
+.section sjnla
+.section sjnlb
+.section sjnma
+.section sjnmb
+.section sjnna
+.section sjnnb
+.section sjnoa
+.section sjnob
+.section sjnpa
+.section sjnpb
+.section sjnqa
+.section sjnqb
+.section sjnra
+.section sjnrb
+.section sjnsa
+.section sjnsb
+.section sjnta
+.section sjntb
+.section sjnua
+.section sjnub
+.section sjnva
+.section sjnvb
+.section sjnwa
+.section sjnwb
+.section sjnxa
+.section sjnxb
+.section sjnya
+.section sjnyb
+.section sjnza
+.section sjnzb
+.section sjn1a
+.section sjn1b
+.section sjn2a
+.section sjn2b
+.section sjn3a
+.section sjn3b
+.section sjn4a
+.section sjn4b
+.section sjn5a
+.section sjn5b
+.section sjn6a
+.section sjn6b
+.section sjn7a
+.section sjn7b
+.section sjn8a
+.section sjn8b
+.section sjn9a
+.section sjn9b
+.section sjn0a
+.section sjn0b
+.section sjoaa
+.section sjoab
+.section sjoba
+.section sjobb
+.section sjoca
+.section sjocb
+.section sjoda
+.section sjodb
+.section sjoea
+.section sjoeb
+.section sjofa
+.section sjofb
+.section sjoga
+.section sjogb
+.section sjoha
+.section sjohb
+.section sjoia
+.section sjoib
+.section sjoja
+.section sjojb
+.section sjoka
+.section sjokb
+.section sjola
+.section sjolb
+.section sjoma
+.section sjomb
+.section sjona
+.section sjonb
+.section sjooa
+.section sjoob
+.section sjopa
+.section sjopb
+.section sjoqa
+.section sjoqb
+.section sjora
+.section sjorb
+.section sjosa
+.section sjosb
+.section sjota
+.section sjotb
+.section sjoua
+.section sjoub
+.section sjova
+.section sjovb
+.section sjowa
+.section sjowb
+.section sjoxa
+.section sjoxb
+.section sjoya
+.section sjoyb
+.section sjoza
+.section sjozb
+.section sjo1a
+.section sjo1b
+.section sjo2a
+.section sjo2b
+.section sjo3a
+.section sjo3b
+.section sjo4a
+.section sjo4b
+.section sjo5a
+.section sjo5b
+.section sjo6a
+.section sjo6b
+.section sjo7a
+.section sjo7b
+.section sjo8a
+.section sjo8b
+.section sjo9a
+.section sjo9b
+.section sjo0a
+.section sjo0b
+.section sjpaa
+.section sjpab
+.section sjpba
+.section sjpbb
+.section sjpca
+.section sjpcb
+.section sjpda
+.section sjpdb
+.section sjpea
+.section sjpeb
+.section sjpfa
+.section sjpfb
+.section sjpga
+.section sjpgb
+.section sjpha
+.section sjphb
+.section sjpia
+.section sjpib
+.section sjpja
+.section sjpjb
+.section sjpka
+.section sjpkb
+.section sjpla
+.section sjplb
+.section sjpma
+.section sjpmb
+.section sjpna
+.section sjpnb
+.section sjpoa
+.section sjpob
+.section sjppa
+.section sjppb
+.section sjpqa
+.section sjpqb
+.section sjpra
+.section sjprb
+.section sjpsa
+.section sjpsb
+.section sjpta
+.section sjptb
+.section sjpua
+.section sjpub
+.section sjpva
+.section sjpvb
+.section sjpwa
+.section sjpwb
+.section sjpxa
+.section sjpxb
+.section sjpya
+.section sjpyb
+.section sjpza
+.section sjpzb
+.section sjp1a
+.section sjp1b
+.section sjp2a
+.section sjp2b
+.section sjp3a
+.section sjp3b
+.section sjp4a
+.section sjp4b
+.section sjp5a
+.section sjp5b
+.section sjp6a
+.section sjp6b
+.section sjp7a
+.section sjp7b
+.section sjp8a
+.section sjp8b
+.section sjp9a
+.section sjp9b
+.section sjp0a
+.section sjp0b
+.section sjqaa
+.section sjqab
+.section sjqba
+.section sjqbb
+.section sjqca
+.section sjqcb
+.section sjqda
+.section sjqdb
+.section sjqea
+.section sjqeb
+.section sjqfa
+.section sjqfb
+.section sjqga
+.section sjqgb
+.section sjqha
+.section sjqhb
+.section sjqia
+.section sjqib
+.section sjqja
+.section sjqjb
+.section sjqka
+.section sjqkb
+.section sjqla
+.section sjqlb
+.section sjqma
+.section sjqmb
+.section sjqna
+.section sjqnb
+.section sjqoa
+.section sjqob
+.section sjqpa
+.section sjqpb
+.section sjqqa
+.section sjqqb
+.section sjqra
+.section sjqrb
+.section sjqsa
+.section sjqsb
+.section sjqta
+.section sjqtb
+.section sjqua
+.section sjqub
+.section sjqva
+.section sjqvb
+.section sjqwa
+.section sjqwb
+.section sjqxa
+.section sjqxb
+.section sjqya
+.section sjqyb
+.section sjqza
+.section sjqzb
+.section sjq1a
+.section sjq1b
+.section sjq2a
+.section sjq2b
+.section sjq3a
+.section sjq3b
+.section sjq4a
+.section sjq4b
+.section sjq5a
+.section sjq5b
+.section sjq6a
+.section sjq6b
+.section sjq7a
+.section sjq7b
+.section sjq8a
+.section sjq8b
+.section sjq9a
+.section sjq9b
+.section sjq0a
+.section sjq0b
+.section sjraa
+.section sjrab
+.section sjrba
+.section sjrbb
+.section sjrca
+.section sjrcb
+.section sjrda
+.section sjrdb
+.section sjrea
+.section sjreb
+.section sjrfa
+.section sjrfb
+.section sjrga
+.section sjrgb
+.section sjrha
+.section sjrhb
+.section sjria
+.section sjrib
+.section sjrja
+.section sjrjb
+.section sjrka
+.section sjrkb
+.section sjrla
+.section sjrlb
+.section sjrma
+.section sjrmb
+.section sjrna
+.section sjrnb
+.section sjroa
+.section sjrob
+.section sjrpa
+.section sjrpb
+.section sjrqa
+.section sjrqb
+.section sjrra
+.section sjrrb
+.section sjrsa
+.section sjrsb
+.section sjrta
+.section sjrtb
+.section sjrua
+.section sjrub
+.section sjrva
+.section sjrvb
+.section sjrwa
+.section sjrwb
+.section sjrxa
+.section sjrxb
+.section sjrya
+.section sjryb
+.section sjrza
+.section sjrzb
+.section sjr1a
+.section sjr1b
+.section sjr2a
+.section sjr2b
+.section sjr3a
+.section sjr3b
+.section sjr4a
+.section sjr4b
+.section sjr5a
+.section sjr5b
+.section sjr6a
+.section sjr6b
+.section sjr7a
+.section sjr7b
+.section sjr8a
+.section sjr8b
+.section sjr9a
+.section sjr9b
+.section sjr0a
+.section sjr0b
+.section sjsaa
+.section sjsab
+.section sjsba
+.section sjsbb
+.section sjsca
+.section sjscb
+.section sjsda
+.section sjsdb
+.section sjsea
+.section sjseb
+.section sjsfa
+.section sjsfb
+.section sjsga
+.section sjsgb
+.section sjsha
+.section sjshb
+.section sjsia
+.section sjsib
+.section sjsja
+.section sjsjb
+.section sjska
+.section sjskb
+.section sjsla
+.section sjslb
+.section sjsma
+.section sjsmb
+.section sjsna
+.section sjsnb
+.section sjsoa
+.section sjsob
+.section sjspa
+.section sjspb
+.section sjsqa
+.section sjsqb
+.section sjsra
+.section sjsrb
+.section sjssa
+.section sjssb
+.section sjsta
+.section sjstb
+.section sjsua
+.section sjsub
+.section sjsva
+.section sjsvb
+.section sjswa
+.section sjswb
+.section sjsxa
+.section sjsxb
+.section sjsya
+.section sjsyb
+.section sjsza
+.section sjszb
+.section sjs1a
+.section sjs1b
+.section sjs2a
+.section sjs2b
+.section sjs3a
+.section sjs3b
+.section sjs4a
+.section sjs4b
+.section sjs5a
+.section sjs5b
+.section sjs6a
+.section sjs6b
+.section sjs7a
+.section sjs7b
+.section sjs8a
+.section sjs8b
+.section sjs9a
+.section sjs9b
+.section sjs0a
+.section sjs0b
+.section sjtaa
+.section sjtab
+.section sjtba
+.section sjtbb
+.section sjtca
+.section sjtcb
+.section sjtda
+.section sjtdb
+.section sjtea
+.section sjteb
+.section sjtfa
+.section sjtfb
+.section sjtga
+.section sjtgb
+.section sjtha
+.section sjthb
+.section sjtia
+.section sjtib
+.section sjtja
+.section sjtjb
+.section sjtka
+.section sjtkb
+.section sjtla
+.section sjtlb
+.section sjtma
+.section sjtmb
+.section sjtna
+.section sjtnb
+.section sjtoa
+.section sjtob
+.section sjtpa
+.section sjtpb
+.section sjtqa
+.section sjtqb
+.section sjtra
+.section sjtrb
+.section sjtsa
+.section sjtsb
+.section sjtta
+.section sjttb
+.section sjtua
+.section sjtub
+.section sjtva
+.section sjtvb
+.section sjtwa
+.section sjtwb
+.section sjtxa
+.section sjtxb
+.section sjtya
+.section sjtyb
+.section sjtza
+.section sjtzb
+.section sjt1a
+.section sjt1b
+.section sjt2a
+.section sjt2b
+.section sjt3a
+.section sjt3b
+.section sjt4a
+.section sjt4b
+.section sjt5a
+.section sjt5b
+.section sjt6a
+.section sjt6b
+.section sjt7a
+.section sjt7b
+.section sjt8a
+.section sjt8b
+.section sjt9a
+.section sjt9b
+.section sjt0a
+.section sjt0b
+.section sjuaa
+.section sjuab
+.section sjuba
+.section sjubb
+.section sjuca
+.section sjucb
+.section sjuda
+.section sjudb
+.section sjuea
+.section sjueb
+.section sjufa
+.section sjufb
+.section sjuga
+.section sjugb
+.section sjuha
+.section sjuhb
+.section sjuia
+.section sjuib
+.section sjuja
+.section sjujb
+.section sjuka
+.section sjukb
+.section sjula
+.section sjulb
+.section sjuma
+.section sjumb
+.section sjuna
+.section sjunb
+.section sjuoa
+.section sjuob
+.section sjupa
+.section sjupb
+.section sjuqa
+.section sjuqb
+.section sjura
+.section sjurb
+.section sjusa
+.section sjusb
+.section sjuta
+.section sjutb
+.section sjuua
+.section sjuub
+.section sjuva
+.section sjuvb
+.section sjuwa
+.section sjuwb
+.section sjuxa
+.section sjuxb
+.section sjuya
+.section sjuyb
+.section sjuza
+.section sjuzb
+.section sju1a
+.section sju1b
+.section sju2a
+.section sju2b
+.section sju3a
+.section sju3b
+.section sju4a
+.section sju4b
+.section sju5a
+.section sju5b
+.section sju6a
+.section sju6b
+.section sju7a
+.section sju7b
+.section sju8a
+.section sju8b
+.section sju9a
+.section sju9b
+.section sju0a
+.section sju0b
+.section sjvaa
+.section sjvab
+.section sjvba
+.section sjvbb
+.section sjvca
+.section sjvcb
+.section sjvda
+.section sjvdb
+.section sjvea
+.section sjveb
+.section sjvfa
+.section sjvfb
+.section sjvga
+.section sjvgb
+.section sjvha
+.section sjvhb
+.section sjvia
+.section sjvib
+.section sjvja
+.section sjvjb
+.section sjvka
+.section sjvkb
+.section sjvla
+.section sjvlb
+.section sjvma
+.section sjvmb
+.section sjvna
+.section sjvnb
+.section sjvoa
+.section sjvob
+.section sjvpa
+.section sjvpb
+.section sjvqa
+.section sjvqb
+.section sjvra
+.section sjvrb
+.section sjvsa
+.section sjvsb
+.section sjvta
+.section sjvtb
+.section sjvua
+.section sjvub
+.section sjvva
+.section sjvvb
+.section sjvwa
+.section sjvwb
+.section sjvxa
+.section sjvxb
+.section sjvya
+.section sjvyb
+.section sjvza
+.section sjvzb
+.section sjv1a
+.section sjv1b
+.section sjv2a
+.section sjv2b
+.section sjv3a
+.section sjv3b
+.section sjv4a
+.section sjv4b
+.section sjv5a
+.section sjv5b
+.section sjv6a
+.section sjv6b
+.section sjv7a
+.section sjv7b
+.section sjv8a
+.section sjv8b
+.section sjv9a
+.section sjv9b
+.section sjv0a
+.section sjv0b
+.section sjwaa
+.section sjwab
+.section sjwba
+.section sjwbb
+.section sjwca
+.section sjwcb
+.section sjwda
+.section sjwdb
+.section sjwea
+.section sjweb
+.section sjwfa
+.section sjwfb
+.section sjwga
+.section sjwgb
+.section sjwha
+.section sjwhb
+.section sjwia
+.section sjwib
+.section sjwja
+.section sjwjb
+.section sjwka
+.section sjwkb
+.section sjwla
+.section sjwlb
+.section sjwma
+.section sjwmb
+.section sjwna
+.section sjwnb
+.section sjwoa
+.section sjwob
+.section sjwpa
+.section sjwpb
+.section sjwqa
+.section sjwqb
+.section sjwra
+.section sjwrb
+.section sjwsa
+.section sjwsb
+.section sjwta
+.section sjwtb
+.section sjwua
+.section sjwub
+.section sjwva
+.section sjwvb
+.section sjwwa
+.section sjwwb
+.section sjwxa
+.section sjwxb
+.section sjwya
+.section sjwyb
+.section sjwza
+.section sjwzb
+.section sjw1a
+.section sjw1b
+.section sjw2a
+.section sjw2b
+.section sjw3a
+.section sjw3b
+.section sjw4a
+.section sjw4b
+.section sjw5a
+.section sjw5b
+.section sjw6a
+.section sjw6b
+.section sjw7a
+.section sjw7b
+.section sjw8a
+.section sjw8b
+.section sjw9a
+.section sjw9b
+.section sjw0a
+.section sjw0b
+.section sjxaa
+.section sjxab
+.section sjxba
+.section sjxbb
+.section sjxca
+.section sjxcb
+.section sjxda
+.section sjxdb
+.section sjxea
+.section sjxeb
+.section sjxfa
+.section sjxfb
+.section sjxga
+.section sjxgb
+.section sjxha
+.section sjxhb
+.section sjxia
+.section sjxib
+.section sjxja
+.section sjxjb
+.section sjxka
+.section sjxkb
+.section sjxla
+.section sjxlb
+.section sjxma
+.section sjxmb
+.section sjxna
+.section sjxnb
+.section sjxoa
+.section sjxob
+.section sjxpa
+.section sjxpb
+.section sjxqa
+.section sjxqb
+.section sjxra
+.section sjxrb
+.section sjxsa
+.section sjxsb
+.section sjxta
+.section sjxtb
+.section sjxua
+.section sjxub
+.section sjxva
+.section sjxvb
+.section sjxwa
+.section sjxwb
+.section sjxxa
+.section sjxxb
+.section sjxya
+.section sjxyb
+.section sjxza
+.section sjxzb
+.section sjx1a
+.section sjx1b
+.section sjx2a
+.section sjx2b
+.section sjx3a
+.section sjx3b
+.section sjx4a
+.section sjx4b
+.section sjx5a
+.section sjx5b
+.section sjx6a
+.section sjx6b
+.section sjx7a
+.section sjx7b
+.section sjx8a
+.section sjx8b
+.section sjx9a
+.section sjx9b
+.section sjx0a
+.section sjx0b
+.section sjyaa
+.section sjyab
+.section sjyba
+.section sjybb
+.section sjyca
+.section sjycb
+.section sjyda
+.section sjydb
+.section sjyea
+.section sjyeb
+.section sjyfa
+.section sjyfb
+.section sjyga
+.section sjygb
+.section sjyha
+.section sjyhb
+.section sjyia
+.section sjyib
+.section sjyja
+.section sjyjb
+.section sjyka
+.section sjykb
+.section sjyla
+.section sjylb
+.section sjyma
+.section sjymb
+.section sjyna
+.section sjynb
+.section sjyoa
+.section sjyob
+.section sjypa
+.section sjypb
+.section sjyqa
+.section sjyqb
+.section sjyra
+.section sjyrb
+.section sjysa
+.section sjysb
+.section sjyta
+.section sjytb
+.section sjyua
+.section sjyub
+.section sjyva
+.section sjyvb
+.section sjywa
+.section sjywb
+.section sjyxa
+.section sjyxb
+.section sjyya
+.section sjyyb
+.section sjyza
+.section sjyzb
+.section sjy1a
+.section sjy1b
+.section sjy2a
+.section sjy2b
+.section sjy3a
+.section sjy3b
+.section sjy4a
+.section sjy4b
+.section sjy5a
+.section sjy5b
+.section sjy6a
+.section sjy6b
+.section sjy7a
+.section sjy7b
+.section sjy8a
+.section sjy8b
+.section sjy9a
+.section sjy9b
+.section sjy0a
+.section sjy0b
+.section sjzaa
+.section sjzab
+.section sjzba
+.section sjzbb
+.section sjzca
+.section sjzcb
+.section sjzda
+.section sjzdb
+.section sjzea
+.section sjzeb
+.section sjzfa
+.section sjzfb
+.section sjzga
+.section sjzgb
+.section sjzha
+.section sjzhb
+.section sjzia
+.section sjzib
+.section sjzja
+.section sjzjb
+.section sjzka
+.section sjzkb
+.section sjzla
+.section sjzlb
+.section sjzma
+.section sjzmb
+.section sjzna
+.section sjznb
+.section sjzoa
+.section sjzob
+.section sjzpa
+.section sjzpb
+.section sjzqa
+.section sjzqb
+.section sjzra
+.section sjzrb
+.section sjzsa
+.section sjzsb
+.section sjzta
+.section sjztb
+.section sjzua
+.section sjzub
+.section sjzva
+.section sjzvb
+.section sjzwa
+.section sjzwb
+.section sjzxa
+.section sjzxb
+.section sjzya
+.section sjzyb
+.section sjzza
+.section sjzzb
+.section sjz1a
+.section sjz1b
+.section sjz2a
+.section sjz2b
+.section sjz3a
+.section sjz3b
+.section sjz4a
+.section sjz4b
+.section sjz5a
+.section sjz5b
+.section sjz6a
+.section sjz6b
+.section sjz7a
+.section sjz7b
+.section sjz8a
+.section sjz8b
+.section sjz9a
+.section sjz9b
+.section sjz0a
+.section sjz0b
+.section sj1aa
+.section sj1ab
+.section sj1ba
+.section sj1bb
+.section sj1ca
+.section sj1cb
+.section sj1da
+.section sj1db
+.section sj1ea
+.section sj1eb
+.section sj1fa
+.section sj1fb
+.section sj1ga
+.section sj1gb
+.section sj1ha
+.section sj1hb
+.section sj1ia
+.section sj1ib
+.section sj1ja
+.section sj1jb
+.section sj1ka
+.section sj1kb
+.section sj1la
+.section sj1lb
+.section sj1ma
+.section sj1mb
+.section sj1na
+.section sj1nb
+.section sj1oa
+.section sj1ob
+.section sj1pa
+.section sj1pb
+.section sj1qa
+.section sj1qb
+.section sj1ra
+.section sj1rb
+.section sj1sa
+.section sj1sb
+.section sj1ta
+.section sj1tb
+.section sj1ua
+.section sj1ub
+.section sj1va
+.section sj1vb
+.section sj1wa
+.section sj1wb
+.section sj1xa
+.section sj1xb
+.section sj1ya
+.section sj1yb
+.section sj1za
+.section sj1zb
+.section sj11a
+.section sj11b
+.section sj12a
+.section sj12b
+.section sj13a
+.section sj13b
+.section sj14a
+.section sj14b
+.section sj15a
+.section sj15b
+.section sj16a
+.section sj16b
+.section sj17a
+.section sj17b
+.section sj18a
+.section sj18b
+.section sj19a
+.section sj19b
+.section sj10a
+.section sj10b
+.section sj2aa
+.section sj2ab
+.section sj2ba
+.section sj2bb
+.section sj2ca
+.section sj2cb
+.section sj2da
+.section sj2db
+.section sj2ea
+.section sj2eb
+.section sj2fa
+.section sj2fb
+.section sj2ga
+.section sj2gb
+.section sj2ha
+.section sj2hb
+.section sj2ia
+.section sj2ib
+.section sj2ja
+.section sj2jb
+.section sj2ka
+.section sj2kb
+.section sj2la
+.section sj2lb
+.section sj2ma
+.section sj2mb
+.section sj2na
+.section sj2nb
+.section sj2oa
+.section sj2ob
+.section sj2pa
+.section sj2pb
+.section sj2qa
+.section sj2qb
+.section sj2ra
+.section sj2rb
+.section sj2sa
+.section sj2sb
+.section sj2ta
+.section sj2tb
+.section sj2ua
+.section sj2ub
+.section sj2va
+.section sj2vb
+.section sj2wa
+.section sj2wb
+.section sj2xa
+.section sj2xb
+.section sj2ya
+.section sj2yb
+.section sj2za
+.section sj2zb
+.section sj21a
+.section sj21b
+.section sj22a
+.section sj22b
+.section sj23a
+.section sj23b
+.section sj24a
+.section sj24b
+.section sj25a
+.section sj25b
+.section sj26a
+.section sj26b
+.section sj27a
+.section sj27b
+.section sj28a
+.section sj28b
+.section sj29a
+.section sj29b
+.section sj20a
+.section sj20b
+.section sj3aa
+.section sj3ab
+.section sj3ba
+.section sj3bb
+.section sj3ca
+.section sj3cb
+.section sj3da
+.section sj3db
+.section sj3ea
+.section sj3eb
+.section sj3fa
+.section sj3fb
+.section sj3ga
+.section sj3gb
+.section sj3ha
+.section sj3hb
+.section sj3ia
+.section sj3ib
+.section sj3ja
+.section sj3jb
+.section sj3ka
+.section sj3kb
+.section sj3la
+.section sj3lb
+.section sj3ma
+.section sj3mb
+.section sj3na
+.section sj3nb
+.section sj3oa
+.section sj3ob
+.section sj3pa
+.section sj3pb
+.section sj3qa
+.section sj3qb
+.section sj3ra
+.section sj3rb
+.section sj3sa
+.section sj3sb
+.section sj3ta
+.section sj3tb
+.section sj3ua
+.section sj3ub
+.section sj3va
+.section sj3vb
+.section sj3wa
+.section sj3wb
+.section sj3xa
+.section sj3xb
+.section sj3ya
+.section sj3yb
+.section sj3za
+.section sj3zb
+.section sj31a
+.section sj31b
+.section sj32a
+.section sj32b
+.section sj33a
+.section sj33b
+.section sj34a
+.section sj34b
+.section sj35a
+.section sj35b
+.section sj36a
+.section sj36b
+.section sj37a
+.section sj37b
+.section sj38a
+.section sj38b
+.section sj39a
+.section sj39b
+.section sj30a
+.section sj30b
+.section sj4aa
+.section sj4ab
+.section sj4ba
+.section sj4bb
+.section sj4ca
+.section sj4cb
+.section sj4da
+.section sj4db
+.section sj4ea
+.section sj4eb
+.section sj4fa
+.section sj4fb
+.section sj4ga
+.section sj4gb
+.section sj4ha
+.section sj4hb
+.section sj4ia
+.section sj4ib
+.section sj4ja
+.section sj4jb
+.section sj4ka
+.section sj4kb
+.section sj4la
+.section sj4lb
+.section sj4ma
+.section sj4mb
+.section sj4na
+.section sj4nb
+.section sj4oa
+.section sj4ob
+.section sj4pa
+.section sj4pb
+.section sj4qa
+.section sj4qb
+.section sj4ra
+.section sj4rb
+.section sj4sa
+.section sj4sb
+.section sj4ta
+.section sj4tb
+.section sj4ua
+.section sj4ub
+.section sj4va
+.section sj4vb
+.section sj4wa
+.section sj4wb
+.section sj4xa
+.section sj4xb
+.section sj4ya
+.section sj4yb
+.section sj4za
+.section sj4zb
+.section sj41a
+.section sj41b
+.section sj42a
+.section sj42b
+.section sj43a
+.section sj43b
+.section sj44a
+.section sj44b
+.section sj45a
+.section sj45b
+.section sj46a
+.section sj46b
+.section sj47a
+.section sj47b
+.section sj48a
+.section sj48b
+.section sj49a
+.section sj49b
+.section sj40a
+.section sj40b
+.section sj5aa
+.section sj5ab
+.section sj5ba
+.section sj5bb
+.section sj5ca
+.section sj5cb
+.section sj5da
+.section sj5db
+.section sj5ea
+.section sj5eb
+.section sj5fa
+.section sj5fb
+.section sj5ga
+.section sj5gb
+.section sj5ha
+.section sj5hb
+.section sj5ia
+.section sj5ib
+.section sj5ja
+.section sj5jb
+.section sj5ka
+.section sj5kb
+.section sj5la
+.section sj5lb
+.section sj5ma
+.section sj5mb
+.section sj5na
+.section sj5nb
+.section sj5oa
+.section sj5ob
+.section sj5pa
+.section sj5pb
+.section sj5qa
+.section sj5qb
+.section sj5ra
+.section sj5rb
+.section sj5sa
+.section sj5sb
+.section sj5ta
+.section sj5tb
+.section sj5ua
+.section sj5ub
+.section sj5va
+.section sj5vb
+.section sj5wa
+.section sj5wb
+.section sj5xa
+.section sj5xb
+.section sj5ya
+.section sj5yb
+.section sj5za
+.section sj5zb
+.section sj51a
+.section sj51b
+.section sj52a
+.section sj52b
+.section sj53a
+.section sj53b
+.section sj54a
+.section sj54b
+.section sj55a
+.section sj55b
+.section sj56a
+.section sj56b
+.section sj57a
+.section sj57b
+.section sj58a
+.section sj58b
+.section sj59a
+.section sj59b
+.section sj50a
+.section sj50b
+.section sj6aa
+.section sj6ab
+.section sj6ba
+.section sj6bb
+.section sj6ca
+.section sj6cb
+.section sj6da
+.section sj6db
+.section sj6ea
+.section sj6eb
+.section sj6fa
+.section sj6fb
+.section sj6ga
+.section sj6gb
+.section sj6ha
+.section sj6hb
+.section sj6ia
+.section sj6ib
+.section sj6ja
+.section sj6jb
+.section sj6ka
+.section sj6kb
+.section sj6la
+.section sj6lb
+.section sj6ma
+.section sj6mb
+.section sj6na
+.section sj6nb
+.section sj6oa
+.section sj6ob
+.section sj6pa
+.section sj6pb
+.section sj6qa
+.section sj6qb
+.section sj6ra
+.section sj6rb
+.section sj6sa
+.section sj6sb
+.section sj6ta
+.section sj6tb
+.section sj6ua
+.section sj6ub
+.section sj6va
+.section sj6vb
+.section sj6wa
+.section sj6wb
+.section sj6xa
+.section sj6xb
+.section sj6ya
+.section sj6yb
+.section sj6za
+.section sj6zb
+.section sj61a
+.section sj61b
+.section sj62a
+.section sj62b
+.section sj63a
+.section sj63b
+.section sj64a
+.section sj64b
+.section sj65a
+.section sj65b
+.section sj66a
+.section sj66b
+.section sj67a
+.section sj67b
+.section sj68a
+.section sj68b
+.section sj69a
+.section sj69b
+.section sj60a
+.section sj60b
+.section sj7aa
+.section sj7ab
+.section sj7ba
+.section sj7bb
+.section sj7ca
+.section sj7cb
+.section sj7da
+.section sj7db
+.section sj7ea
+.section sj7eb
+.section sj7fa
+.section sj7fb
+.section sj7ga
+.section sj7gb
+.section sj7ha
+.section sj7hb
+.section sj7ia
+.section sj7ib
+.section sj7ja
+.section sj7jb
+.section sj7ka
+.section sj7kb
+.section sj7la
+.section sj7lb
+.section sj7ma
+.section sj7mb
+.section sj7na
+.section sj7nb
+.section sj7oa
+.section sj7ob
+.section sj7pa
+.section sj7pb
+.section sj7qa
+.section sj7qb
+.section sj7ra
+.section sj7rb
+.section sj7sa
+.section sj7sb
+.section sj7ta
+.section sj7tb
+.section sj7ua
+.section sj7ub
+.section sj7va
+.section sj7vb
+.section sj7wa
+.section sj7wb
+.section sj7xa
+.section sj7xb
+.section sj7ya
+.section sj7yb
+.section sj7za
+.section sj7zb
+.section sj71a
+.section sj71b
+.section sj72a
+.section sj72b
+.section sj73a
+.section sj73b
+.section sj74a
+.section sj74b
+.section sj75a
+.section sj75b
+.section sj76a
+.section sj76b
+.section sj77a
+.section sj77b
+.section sj78a
+.section sj78b
+.section sj79a
+.section sj79b
+.section sj70a
+.section sj70b
+.section sj8aa
+.section sj8ab
+.section sj8ba
+.section sj8bb
+.section sj8ca
+.section sj8cb
+.section sj8da
+.section sj8db
+.section sj8ea
+.section sj8eb
+.section sj8fa
+.section sj8fb
+.section sj8ga
+.section sj8gb
+.section sj8ha
+.section sj8hb
+.section sj8ia
+.section sj8ib
+.section sj8ja
+.section sj8jb
+.section sj8ka
+.section sj8kb
+.section sj8la
+.section sj8lb
+.section sj8ma
+.section sj8mb
+.section sj8na
+.section sj8nb
+.section sj8oa
+.section sj8ob
+.section sj8pa
+.section sj8pb
+.section sj8qa
+.section sj8qb
+.section sj8ra
+.section sj8rb
+.section sj8sa
+.section sj8sb
+.section sj8ta
+.section sj8tb
+.section sj8ua
+.section sj8ub
+.section sj8va
+.section sj8vb
+.section sj8wa
+.section sj8wb
+.section sj8xa
+.section sj8xb
+.section sj8ya
+.section sj8yb
+.section sj8za
+.section sj8zb
+.section sj81a
+.section sj81b
+.section sj82a
+.section sj82b
+.section sj83a
+.section sj83b
+.section sj84a
+.section sj84b
+.section sj85a
+.section sj85b
+.section sj86a
+.section sj86b
+.section sj87a
+.section sj87b
+.section sj88a
+.section sj88b
+.section sj89a
+.section sj89b
+.section sj80a
+.section sj80b
+.section sj9aa
+.section sj9ab
+.section sj9ba
+.section sj9bb
+.section sj9ca
+.section sj9cb
+.section sj9da
+.section sj9db
+.section sj9ea
+.section sj9eb
+.section sj9fa
+.section sj9fb
+.section sj9ga
+.section sj9gb
+.section sj9ha
+.section sj9hb
+.section sj9ia
+.section sj9ib
+.section sj9ja
+.section sj9jb
+.section sj9ka
+.section sj9kb
+.section sj9la
+.section sj9lb
+.section sj9ma
+.section sj9mb
+.section sj9na
+.section sj9nb
+.section sj9oa
+.section sj9ob
+.section sj9pa
+.section sj9pb
+.section sj9qa
+.section sj9qb
+.section sj9ra
+.section sj9rb
+.section sj9sa
+.section sj9sb
+.section sj9ta
+.section sj9tb
+.section sj9ua
+.section sj9ub
+.section sj9va
+.section sj9vb
+.section sj9wa
+.section sj9wb
+.section sj9xa
+.section sj9xb
+.section sj9ya
+.section sj9yb
+.section sj9za
+.section sj9zb
+.section sj91a
+.section sj91b
+.section sj92a
+.section sj92b
+.section sj93a
+.section sj93b
+.section sj94a
+.section sj94b
+.section sj95a
+.section sj95b
+.section sj96a
+.section sj96b
+.section sj97a
+.section sj97b
+.section sj98a
+.section sj98b
+.section sj99a
+.section sj99b
+.section sj90a
+.section sj90b
+.section sj0aa
+.section sj0ab
+.section sj0ba
+.section sj0bb
+.section sj0ca
+.section sj0cb
+.section sj0da
+.section sj0db
+.section sj0ea
+.section sj0eb
+.section sj0fa
+.section sj0fb
+.section sj0ga
+.section sj0gb
+.section sj0ha
+.section sj0hb
+.section sj0ia
+.section sj0ib
+.section sj0ja
+.section sj0jb
+.section sj0ka
+.section sj0kb
+.section sj0la
+.section sj0lb
+.section sj0ma
+.section sj0mb
+.section sj0na
+.section sj0nb
+.section sj0oa
+.section sj0ob
+.section sj0pa
+.section sj0pb
+.section sj0qa
+.section sj0qb
+.section sj0ra
+.section sj0rb
+.section sj0sa
+.section sj0sb
+.section sj0ta
+.section sj0tb
+.section sj0ua
+.section sj0ub
+.section sj0va
+.section sj0vb
+.section sj0wa
+.section sj0wb
+.section sj0xa
+.section sj0xb
+.section sj0ya
+.section sj0yb
+.section sj0za
+.section sj0zb
+.section sj01a
+.section sj01b
+.section sj02a
+.section sj02b
+.section sj03a
+.section sj03b
+.section sj04a
+.section sj04b
+.section sj05a
+.section sj05b
+.section sj06a
+.section sj06b
+.section sj07a
+.section sj07b
+.section sj08a
+.section sj08b
+.section sj09a
+.section sj09b
+.section sj00a
+.section sj00b
+.section skaaa
+.section skaab
+.section skaba
+.section skabb
+.section skaca
+.section skacb
+.section skada
+.section skadb
+.section skaea
+.section skaeb
+.section skafa
+.section skafb
+.section skaga
+.section skagb
+.section skaha
+.section skahb
+.section skaia
+.section skaib
+.section skaja
+.section skajb
+.section skaka
+.section skakb
+.section skala
+.section skalb
+.section skama
+.section skamb
+.section skana
+.section skanb
+.section skaoa
+.section skaob
+.section skapa
+.section skapb
+.section skaqa
+.section skaqb
+.section skara
+.section skarb
+.section skasa
+.section skasb
+.section skata
+.section skatb
+.section skaua
+.section skaub
+.section skava
+.section skavb
+.section skawa
+.section skawb
+.section skaxa
+.section skaxb
+.section skaya
+.section skayb
+.section skaza
+.section skazb
+.section ska1a
+.section ska1b
+.section ska2a
+.section ska2b
+.section ska3a
+.section ska3b
+.section ska4a
+.section ska4b
+.section ska5a
+.section ska5b
+.section ska6a
+.section ska6b
+.section ska7a
+.section ska7b
+.section ska8a
+.section ska8b
+.section ska9a
+.section ska9b
+.section ska0a
+.section ska0b
+.section skbaa
+.section skbab
+.section skbba
+.section skbbb
+.section skbca
+.section skbcb
+.section skbda
+.section skbdb
+.section skbea
+.section skbeb
+.section skbfa
+.section skbfb
+.section skbga
+.section skbgb
+.section skbha
+.section skbhb
+.section skbia
+.section skbib
+.section skbja
+.section skbjb
+.section skbka
+.section skbkb
+.section skbla
+.section skblb
+.section skbma
+.section skbmb
+.section skbna
+.section skbnb
+.section skboa
+.section skbob
+.section skbpa
+.section skbpb
+.section skbqa
+.section skbqb
+.section skbra
+.section skbrb
+.section skbsa
+.section skbsb
+.section skbta
+.section skbtb
+.section skbua
+.section skbub
+.section skbva
+.section skbvb
+.section skbwa
+.section skbwb
+.section skbxa
+.section skbxb
+.section skbya
+.section skbyb
+.section skbza
+.section skbzb
+.section skb1a
+.section skb1b
+.section skb2a
+.section skb2b
+.section skb3a
+.section skb3b
+.section skb4a
+.section skb4b
+.section skb5a
+.section skb5b
+.section skb6a
+.section skb6b
+.section skb7a
+.section skb7b
+.section skb8a
+.section skb8b
+.section skb9a
+.section skb9b
+.section skb0a
+.section skb0b
+.section skcaa
+.section skcab
+.section skcba
+.section skcbb
+.section skcca
+.section skccb
+.section skcda
+.section skcdb
+.section skcea
+.section skceb
+.section skcfa
+.section skcfb
+.section skcga
+.section skcgb
+.section skcha
+.section skchb
+.section skcia
+.section skcib
+.section skcja
+.section skcjb
+.section skcka
+.section skckb
+.section skcla
+.section skclb
+.section skcma
+.section skcmb
+.section skcna
+.section skcnb
+.section skcoa
+.section skcob
+.section skcpa
+.section skcpb
+.section skcqa
+.section skcqb
+.section skcra
+.section skcrb
+.section skcsa
+.section skcsb
+.section skcta
+.section skctb
+.section skcua
+.section skcub
+.section skcva
+.section skcvb
+.section skcwa
+.section skcwb
+.section skcxa
+.section skcxb
+.section skcya
+.section skcyb
+.section skcza
+.section skczb
+.section skc1a
+.section skc1b
+.section skc2a
+.section skc2b
+.section skc3a
+.section skc3b
+.section skc4a
+.section skc4b
+.section skc5a
+.section skc5b
+.section skc6a
+.section skc6b
+.section skc7a
+.section skc7b
+.section skc8a
+.section skc8b
+.section skc9a
+.section skc9b
+.section skc0a
+.section skc0b
+.section skdaa
+.section skdab
+.section skdba
+.section skdbb
+.section skdca
+.section skdcb
+.section skdda
+.section skddb
+.section skdea
+.section skdeb
+.section skdfa
+.section skdfb
+.section skdga
+.section skdgb
+.section skdha
+.section skdhb
+.section skdia
+.section skdib
+.section skdja
+.section skdjb
+.section skdka
+.section skdkb
+.section skdla
+.section skdlb
+.section skdma
+.section skdmb
+.section skdna
+.section skdnb
+.section skdoa
+.section skdob
+.section skdpa
+.section skdpb
+.section skdqa
+.section skdqb
+.section skdra
+.section skdrb
+.section skdsa
+.section skdsb
+.section skdta
+.section skdtb
+.section skdua
+.section skdub
+.section skdva
+.section skdvb
+.section skdwa
+.section skdwb
+.section skdxa
+.section skdxb
+.section skdya
+.section skdyb
+.section skdza
+.section skdzb
+.section skd1a
+.section skd1b
+.section skd2a
+.section skd2b
+.section skd3a
+.section skd3b
+.section skd4a
+.section skd4b
+.section skd5a
+.section skd5b
+.section skd6a
+.section skd6b
+.section skd7a
+.section skd7b
+.section skd8a
+.section skd8b
+.section skd9a
+.section skd9b
+.section skd0a
+.section skd0b
+.section skeaa
+.section skeab
+.section skeba
+.section skebb
+.section skeca
+.section skecb
+.section skeda
+.section skedb
+.section skeea
+.section skeeb
+.section skefa
+.section skefb
+.section skega
+.section skegb
+.section skeha
+.section skehb
+.section skeia
+.section skeib
+.section skeja
+.section skejb
+.section skeka
+.section skekb
+.section skela
+.section skelb
+.section skema
+.section skemb
+.section skena
+.section skenb
+.section skeoa
+.section skeob
+.section skepa
+.section skepb
+.section skeqa
+.section skeqb
+.section skera
+.section skerb
+.section skesa
+.section skesb
+.section sketa
+.section sketb
+.section skeua
+.section skeub
+.section skeva
+.section skevb
+.section skewa
+.section skewb
+.section skexa
+.section skexb
+.section skeya
+.section skeyb
+.section skeza
+.section skezb
+.section ske1a
+.section ske1b
+.section ske2a
+.section ske2b
+.section ske3a
+.section ske3b
+.section ske4a
+.section ske4b
+.section ske5a
+.section ske5b
+.section ske6a
+.section ske6b
+.section ske7a
+.section ske7b
+.section ske8a
+.section ske8b
+.section ske9a
+.section ske9b
+.section ske0a
+.section ske0b
+.section skfaa
+.section skfab
+.section skfba
+.section skfbb
+.section skfca
+.section skfcb
+.section skfda
+.section skfdb
+.section skfea
+.section skfeb
+.section skffa
+.section skffb
+.section skfga
+.section skfgb
+.section skfha
+.section skfhb
+.section skfia
+.section skfib
+.section skfja
+.section skfjb
+.section skfka
+.section skfkb
+.section skfla
+.section skflb
+.section skfma
+.section skfmb
+.section skfna
+.section skfnb
+.section skfoa
+.section skfob
+.section skfpa
+.section skfpb
+.section skfqa
+.section skfqb
+.section skfra
+.section skfrb
+.section skfsa
+.section skfsb
+.section skfta
+.section skftb
+.section skfua
+.section skfub
+.section skfva
+.section skfvb
+.section skfwa
+.section skfwb
+.section skfxa
+.section skfxb
+.section skfya
+.section skfyb
+.section skfza
+.section skfzb
+.section skf1a
+.section skf1b
+.section skf2a
+.section skf2b
+.section skf3a
+.section skf3b
+.section skf4a
+.section skf4b
+.section skf5a
+.section skf5b
+.section skf6a
+.section skf6b
+.section skf7a
+.section skf7b
+.section skf8a
+.section skf8b
+.section skf9a
+.section skf9b
+.section skf0a
+.section skf0b
+.section skgaa
+.section skgab
+.section skgba
+.section skgbb
+.section skgca
+.section skgcb
+.section skgda
+.section skgdb
+.section skgea
+.section skgeb
+.section skgfa
+.section skgfb
+.section skgga
+.section skggb
+.section skgha
+.section skghb
+.section skgia
+.section skgib
+.section skgja
+.section skgjb
+.section skgka
+.section skgkb
+.section skgla
+.section skglb
+.section skgma
+.section skgmb
+.section skgna
+.section skgnb
+.section skgoa
+.section skgob
+.section skgpa
+.section skgpb
+.section skgqa
+.section skgqb
+.section skgra
+.section skgrb
+.section skgsa
+.section skgsb
+.section skgta
+.section skgtb
+.section skgua
+.section skgub
+.section skgva
+.section skgvb
+.section skgwa
+.section skgwb
+.section skgxa
+.section skgxb
+.section skgya
+.section skgyb
+.section skgza
+.section skgzb
+.section skg1a
+.section skg1b
+.section skg2a
+.section skg2b
+.section skg3a
+.section skg3b
+.section skg4a
+.section skg4b
+.section skg5a
+.section skg5b
+.section skg6a
+.section skg6b
+.section skg7a
+.section skg7b
+.section skg8a
+.section skg8b
+.section skg9a
+.section skg9b
+.section skg0a
+.section skg0b
+.section skhaa
+.section skhab
+.section skhba
+.section skhbb
+.section skhca
+.section skhcb
+.section skhda
+.section skhdb
+.section skhea
+.section skheb
+.section skhfa
+.section skhfb
+.section skhga
+.section skhgb
+.section skhha
+.section skhhb
+.section skhia
+.section skhib
+.section skhja
+.section skhjb
+.section skhka
+.section skhkb
+.section skhla
+.section skhlb
+.section skhma
+.section skhmb
+.section skhna
+.section skhnb
+.section skhoa
+.section skhob
+.section skhpa
+.section skhpb
+.section skhqa
+.section skhqb
+.section skhra
+.section skhrb
+.section skhsa
+.section skhsb
+.section skhta
+.section skhtb
+.section skhua
+.section skhub
+.section skhva
+.section skhvb
+.section skhwa
+.section skhwb
+.section skhxa
+.section skhxb
+.section skhya
+.section skhyb
+.section skhza
+.section skhzb
+.section skh1a
+.section skh1b
+.section skh2a
+.section skh2b
+.section skh3a
+.section skh3b
+.section skh4a
+.section skh4b
+.section skh5a
+.section skh5b
+.section skh6a
+.section skh6b
+.section skh7a
+.section skh7b
+.section skh8a
+.section skh8b
+.section skh9a
+.section skh9b
+.section skh0a
+.section skh0b
+.section skiaa
+.section skiab
+.section skiba
+.section skibb
+.section skica
+.section skicb
+.section skida
+.section skidb
+.section skiea
+.section skieb
+.section skifa
+.section skifb
+.section skiga
+.section skigb
+.section skiha
+.section skihb
+.section skiia
+.section skiib
+.section skija
+.section skijb
+.section skika
+.section skikb
+.section skila
+.section skilb
+.section skima
+.section skimb
+.section skina
+.section skinb
+.section skioa
+.section skiob
+.section skipa
+.section skipb
+.section skiqa
+.section skiqb
+.section skira
+.section skirb
+.section skisa
+.section skisb
+.section skita
+.section skitb
+.section skiua
+.section skiub
+.section skiva
+.section skivb
+.section skiwa
+.section skiwb
+.section skixa
+.section skixb
+.section skiya
+.section skiyb
+.section skiza
+.section skizb
+.section ski1a
+.section ski1b
+.section ski2a
+.section ski2b
+.section ski3a
+.section ski3b
+.section ski4a
+.section ski4b
+.section ski5a
+.section ski5b
+.section ski6a
+.section ski6b
+.section ski7a
+.section ski7b
+.section ski8a
+.section ski8b
+.section ski9a
+.section ski9b
+.section ski0a
+.section ski0b
+.section skjaa
+.section skjab
+.section skjba
+.section skjbb
+.section skjca
+.section skjcb
+.section skjda
+.section skjdb
+.section skjea
+.section skjeb
+.section skjfa
+.section skjfb
+.section skjga
+.section skjgb
+.section skjha
+.section skjhb
+.section skjia
+.section skjib
+.section skjja
+.section skjjb
+.section skjka
+.section skjkb
+.section skjla
+.section skjlb
+.section skjma
+.section skjmb
+.section skjna
+.section skjnb
+.section skjoa
+.section skjob
+.section skjpa
+.section skjpb
+.section skjqa
+.section skjqb
+.section skjra
+.section skjrb
+.section skjsa
+.section skjsb
+.section skjta
+.section skjtb
+.section skjua
+.section skjub
+.section skjva
+.section skjvb
+.section skjwa
+.section skjwb
+.section skjxa
+.section skjxb
+.section skjya
+.section skjyb
+.section skjza
+.section skjzb
+.section skj1a
+.section skj1b
+.section skj2a
+.section skj2b
+.section skj3a
+.section skj3b
+.section skj4a
+.section skj4b
+.section skj5a
+.section skj5b
+.section skj6a
+.section skj6b
+.section skj7a
+.section skj7b
+.section skj8a
+.section skj8b
+.section skj9a
+.section skj9b
+.section skj0a
+.section skj0b
+.section skkaa
+.section skkab
+.section skkba
+.section skkbb
+.section skkca
+.section skkcb
+.section skkda
+.section skkdb
+.section skkea
+.section skkeb
+.section skkfa
+.section skkfb
+.section skkga
+.section skkgb
+.section skkha
+.section skkhb
+.section skkia
+.section skkib
+.section skkja
+.section skkjb
+.section skkka
+.section skkkb
+.section skkla
+.section skklb
+.section skkma
+.section skkmb
+.section skkna
+.section skknb
+.section skkoa
+.section skkob
+.section skkpa
+.section skkpb
+.section skkqa
+.section skkqb
+.section skkra
+.section skkrb
+.section skksa
+.section skksb
+.section skkta
+.section skktb
+.section skkua
+.section skkub
+.section skkva
+.section skkvb
+.section skkwa
+.section skkwb
+.section skkxa
+.section skkxb
+.section skkya
+.section skkyb
+.section skkza
+.section skkzb
+.section skk1a
+.section skk1b
+.section skk2a
+.section skk2b
+.section skk3a
+.section skk3b
+.section skk4a
+.section skk4b
+.section skk5a
+.section skk5b
+.section skk6a
+.section skk6b
+.section skk7a
+.section skk7b
+.section skk8a
+.section skk8b
+.section skk9a
+.section skk9b
+.section skk0a
+.section skk0b
+.section sklaa
+.section sklab
+.section sklba
+.section sklbb
+.section sklca
+.section sklcb
+.section sklda
+.section skldb
+.section sklea
+.section skleb
+.section sklfa
+.section sklfb
+.section sklga
+.section sklgb
+.section sklha
+.section sklhb
+.section sklia
+.section sklib
+.section sklja
+.section skljb
+.section sklka
+.section sklkb
+.section sklla
+.section skllb
+.section sklma
+.section sklmb
+.section sklna
+.section sklnb
+.section skloa
+.section sklob
+.section sklpa
+.section sklpb
+.section sklqa
+.section sklqb
+.section sklra
+.section sklrb
+.section sklsa
+.section sklsb
+.section sklta
+.section skltb
+.section sklua
+.section sklub
+.section sklva
+.section sklvb
+.section sklwa
+.section sklwb
+.section sklxa
+.section sklxb
+.section sklya
+.section sklyb
+.section sklza
+.section sklzb
+.section skl1a
+.section skl1b
+.section skl2a
+.section skl2b
+.section skl3a
+.section skl3b
+.section skl4a
+.section skl4b
+.section skl5a
+.section skl5b
+.section skl6a
+.section skl6b
+.section skl7a
+.section skl7b
+.section skl8a
+.section skl8b
+.section skl9a
+.section skl9b
+.section skl0a
+.section skl0b
+.section skmaa
+.section skmab
+.section skmba
+.section skmbb
+.section skmca
+.section skmcb
+.section skmda
+.section skmdb
+.section skmea
+.section skmeb
+.section skmfa
+.section skmfb
+.section skmga
+.section skmgb
+.section skmha
+.section skmhb
+.section skmia
+.section skmib
+.section skmja
+.section skmjb
+.section skmka
+.section skmkb
+.section skmla
+.section skmlb
+.section skmma
+.section skmmb
+.section skmna
+.section skmnb
+.section skmoa
+.section skmob
+.section skmpa
+.section skmpb
+.section skmqa
+.section skmqb
+.section skmra
+.section skmrb
+.section skmsa
+.section skmsb
+.section skmta
+.section skmtb
+.section skmua
+.section skmub
+.section skmva
+.section skmvb
+.section skmwa
+.section skmwb
+.section skmxa
+.section skmxb
+.section skmya
+.section skmyb
+.section skmza
+.section skmzb
+.section skm1a
+.section skm1b
+.section skm2a
+.section skm2b
+.section skm3a
+.section skm3b
+.section skm4a
+.section skm4b
+.section skm5a
+.section skm5b
+.section skm6a
+.section skm6b
+.section skm7a
+.section skm7b
+.section skm8a
+.section skm8b
+.section skm9a
+.section skm9b
+.section skm0a
+.section skm0b
+.section sknaa
+.section sknab
+.section sknba
+.section sknbb
+.section sknca
+.section skncb
+.section sknda
+.section skndb
+.section sknea
+.section skneb
+.section sknfa
+.section sknfb
+.section sknga
+.section skngb
+.section sknha
+.section sknhb
+.section sknia
+.section sknib
+.section sknja
+.section sknjb
+.section sknka
+.section sknkb
+.section sknla
+.section sknlb
+.section sknma
+.section sknmb
+.section sknna
+.section sknnb
+.section sknoa
+.section sknob
+.section sknpa
+.section sknpb
+.section sknqa
+.section sknqb
+.section sknra
+.section sknrb
+.section sknsa
+.section sknsb
+.section sknta
+.section skntb
+.section sknua
+.section sknub
+.section sknva
+.section sknvb
+.section sknwa
+.section sknwb
+.section sknxa
+.section sknxb
+.section sknya
+.section sknyb
+.section sknza
+.section sknzb
+.section skn1a
+.section skn1b
+.section skn2a
+.section skn2b
+.section skn3a
+.section skn3b
+.section skn4a
+.section skn4b
+.section skn5a
+.section skn5b
+.section skn6a
+.section skn6b
+.section skn7a
+.section skn7b
+.section skn8a
+.section skn8b
+.section skn9a
+.section skn9b
+.section skn0a
+.section skn0b
+.section skoaa
+.section skoab
+.section skoba
+.section skobb
+.section skoca
+.section skocb
+.section skoda
+.section skodb
+.section skoea
+.section skoeb
+.section skofa
+.section skofb
+.section skoga
+.section skogb
+.section skoha
+.section skohb
+.section skoia
+.section skoib
+.section skoja
+.section skojb
+.section skoka
+.section skokb
+.section skola
+.section skolb
+.section skoma
+.section skomb
+.section skona
+.section skonb
+.section skooa
+.section skoob
+.section skopa
+.section skopb
+.section skoqa
+.section skoqb
+.section skora
+.section skorb
+.section skosa
+.section skosb
+.section skota
+.section skotb
+.section skoua
+.section skoub
+.section skova
+.section skovb
+.section skowa
+.section skowb
+.section skoxa
+.section skoxb
+.section skoya
+.section skoyb
+.section skoza
+.section skozb
+.section sko1a
+.section sko1b
+.section sko2a
+.section sko2b
+.section sko3a
+.section sko3b
+.section sko4a
+.section sko4b
+.section sko5a
+.section sko5b
+.section sko6a
+.section sko6b
+.section sko7a
+.section sko7b
+.section sko8a
+.section sko8b
+.section sko9a
+.section sko9b
+.section sko0a
+.section sko0b
+.section skpaa
+.section skpab
+.section skpba
+.section skpbb
+.section skpca
+.section skpcb
+.section skpda
+.section skpdb
+.section skpea
+.section skpeb
+.section skpfa
+.section skpfb
+.section skpga
+.section skpgb
+.section skpha
+.section skphb
+.section skpia
+.section skpib
+.section skpja
+.section skpjb
+.section skpka
+.section skpkb
+.section skpla
+.section skplb
+.section skpma
+.section skpmb
+.section skpna
+.section skpnb
+.section skpoa
+.section skpob
+.section skppa
+.section skppb
+.section skpqa
+.section skpqb
+.section skpra
+.section skprb
+.section skpsa
+.section skpsb
+.section skpta
+.section skptb
+.section skpua
+.section skpub
+.section skpva
+.section skpvb
+.section skpwa
+.section skpwb
+.section skpxa
+.section skpxb
+.section skpya
+.section skpyb
+.section skpza
+.section skpzb
+.section skp1a
+.section skp1b
+.section skp2a
+.section skp2b
+.section skp3a
+.section skp3b
+.section skp4a
+.section skp4b
+.section skp5a
+.section skp5b
+.section skp6a
+.section skp6b
+.section skp7a
+.section skp7b
+.section skp8a
+.section skp8b
+.section skp9a
+.section skp9b
+.section skp0a
+.section skp0b
+.section skqaa
+.section skqab
+.section skqba
+.section skqbb
+.section skqca
+.section skqcb
+.section skqda
+.section skqdb
+.section skqea
+.section skqeb
+.section skqfa
+.section skqfb
+.section skqga
+.section skqgb
+.section skqha
+.section skqhb
+.section skqia
+.section skqib
+.section skqja
+.section skqjb
+.section skqka
+.section skqkb
+.section skqla
+.section skqlb
+.section skqma
+.section skqmb
+.section skqna
+.section skqnb
+.section skqoa
+.section skqob
+.section skqpa
+.section skqpb
+.section skqqa
+.section skqqb
+.section skqra
+.section skqrb
+.section skqsa
+.section skqsb
+.section skqta
+.section skqtb
+.section skqua
+.section skqub
+.section skqva
+.section skqvb
+.section skqwa
+.section skqwb
+.section skqxa
+.section skqxb
+.section skqya
+.section skqyb
+.section skqza
+.section skqzb
+.section skq1a
+.section skq1b
+.section skq2a
+.section skq2b
+.section skq3a
+.section skq3b
+.section skq4a
+.section skq4b
+.section skq5a
+.section skq5b
+.section skq6a
+.section skq6b
+.section skq7a
+.section skq7b
+.section skq8a
+.section skq8b
+.section skq9a
+.section skq9b
+.section skq0a
+.section skq0b
+.section skraa
+.section skrab
+.section skrba
+.section skrbb
+.section skrca
+.section skrcb
+.section skrda
+.section skrdb
+.section skrea
+.section skreb
+.section skrfa
+.section skrfb
+.section skrga
+.section skrgb
+.section skrha
+.section skrhb
+.section skria
+.section skrib
+.section skrja
+.section skrjb
+.section skrka
+.section skrkb
+.section skrla
+.section skrlb
+.section skrma
+.section skrmb
+.section skrna
+.section skrnb
+.section skroa
+.section skrob
+.section skrpa
+.section skrpb
+.section skrqa
+.section skrqb
+.section skrra
+.section skrrb
+.section skrsa
+.section skrsb
+.section skrta
+.section skrtb
+.section skrua
+.section skrub
+.section skrva
+.section skrvb
+.section skrwa
+.section skrwb
+.section skrxa
+.section skrxb
+.section skrya
+.section skryb
+.section skrza
+.section skrzb
+.section skr1a
+.section skr1b
+.section skr2a
+.section skr2b
+.section skr3a
+.section skr3b
+.section skr4a
+.section skr4b
+.section skr5a
+.section skr5b
+.section skr6a
+.section skr6b
+.section skr7a
+.section skr7b
+.section skr8a
+.section skr8b
+.section skr9a
+.section skr9b
+.section skr0a
+.section skr0b
+.section sksaa
+.section sksab
+.section sksba
+.section sksbb
+.section sksca
+.section skscb
+.section sksda
+.section sksdb
+.section sksea
+.section skseb
+.section sksfa
+.section sksfb
+.section sksga
+.section sksgb
+.section sksha
+.section skshb
+.section sksia
+.section sksib
+.section sksja
+.section sksjb
+.section skska
+.section skskb
+.section sksla
+.section skslb
+.section sksma
+.section sksmb
+.section sksna
+.section sksnb
+.section sksoa
+.section sksob
+.section skspa
+.section skspb
+.section sksqa
+.section sksqb
+.section sksra
+.section sksrb
+.section skssa
+.section skssb
+.section sksta
+.section skstb
+.section sksua
+.section sksub
+.section sksva
+.section sksvb
+.section skswa
+.section skswb
+.section sksxa
+.section sksxb
+.section sksya
+.section sksyb
+.section sksza
+.section skszb
+.section sks1a
+.section sks1b
+.section sks2a
+.section sks2b
+.section sks3a
+.section sks3b
+.section sks4a
+.section sks4b
+.section sks5a
+.section sks5b
+.section sks6a
+.section sks6b
+.section sks7a
+.section sks7b
+.section sks8a
+.section sks8b
+.section sks9a
+.section sks9b
+.section sks0a
+.section sks0b
+.section sktaa
+.section sktab
+.section sktba
+.section sktbb
+.section sktca
+.section sktcb
+.section sktda
+.section sktdb
+.section sktea
+.section skteb
+.section sktfa
+.section sktfb
+.section sktga
+.section sktgb
+.section sktha
+.section skthb
+.section sktia
+.section sktib
+.section sktja
+.section sktjb
+.section sktka
+.section sktkb
+.section sktla
+.section sktlb
+.section sktma
+.section sktmb
+.section sktna
+.section sktnb
+.section sktoa
+.section sktob
+.section sktpa
+.section sktpb
+.section sktqa
+.section sktqb
+.section sktra
+.section sktrb
+.section sktsa
+.section sktsb
+.section sktta
+.section skttb
+.section sktua
+.section sktub
+.section sktva
+.section sktvb
+.section sktwa
+.section sktwb
+.section sktxa
+.section sktxb
+.section sktya
+.section sktyb
+.section sktza
+.section sktzb
+.section skt1a
+.section skt1b
+.section skt2a
+.section skt2b
+.section skt3a
+.section skt3b
+.section skt4a
+.section skt4b
+.section skt5a
+.section skt5b
+.section skt6a
+.section skt6b
+.section skt7a
+.section skt7b
+.section skt8a
+.section skt8b
+.section skt9a
+.section skt9b
+.section skt0a
+.section skt0b
+.section skuaa
+.section skuab
+.section skuba
+.section skubb
+.section skuca
+.section skucb
+.section skuda
+.section skudb
+.section skuea
+.section skueb
+.section skufa
+.section skufb
+.section skuga
+.section skugb
+.section skuha
+.section skuhb
+.section skuia
+.section skuib
+.section skuja
+.section skujb
+.section skuka
+.section skukb
+.section skula
+.section skulb
+.section skuma
+.section skumb
+.section skuna
+.section skunb
+.section skuoa
+.section skuob
+.section skupa
+.section skupb
+.section skuqa
+.section skuqb
+.section skura
+.section skurb
+.section skusa
+.section skusb
+.section skuta
+.section skutb
+.section skuua
+.section skuub
+.section skuva
+.section skuvb
+.section skuwa
+.section skuwb
+.section skuxa
+.section skuxb
+.section skuya
+.section skuyb
+.section skuza
+.section skuzb
+.section sku1a
+.section sku1b
+.section sku2a
+.section sku2b
+.section sku3a
+.section sku3b
+.section sku4a
+.section sku4b
+.section sku5a
+.section sku5b
+.section sku6a
+.section sku6b
+.section sku7a
+.section sku7b
+.section sku8a
+.section sku8b
+.section sku9a
+.section sku9b
+.section sku0a
+.section sku0b
+.section skvaa
+.section skvab
+.section skvba
+.section skvbb
+.section skvca
+.section skvcb
+.section skvda
+.section skvdb
+.section skvea
+.section skveb
+.section skvfa
+.section skvfb
+.section skvga
+.section skvgb
+.section skvha
+.section skvhb
+.section skvia
+.section skvib
+.section skvja
+.section skvjb
+.section skvka
+.section skvkb
+.section skvla
+.section skvlb
+.section skvma
+.section skvmb
+.section skvna
+.section skvnb
+.section skvoa
+.section skvob
+.section skvpa
+.section skvpb
+.section skvqa
+.section skvqb
+.section skvra
+.section skvrb
+.section skvsa
+.section skvsb
+.section skvta
+.section skvtb
+.section skvua
+.section skvub
+.section skvva
+.section skvvb
+.section skvwa
+.section skvwb
+.section skvxa
+.section skvxb
+.section skvya
+.section skvyb
+.section skvza
+.section skvzb
+.section skv1a
+.section skv1b
+.section skv2a
+.section skv2b
+.section skv3a
+.section skv3b
+.section skv4a
+.section skv4b
+.section skv5a
+.section skv5b
+.section skv6a
+.section skv6b
+.section skv7a
+.section skv7b
+.section skv8a
+.section skv8b
+.section skv9a
+.section skv9b
+.section skv0a
+.section skv0b
+.section skwaa
+.section skwab
+.section skwba
+.section skwbb
+.section skwca
+.section skwcb
+.section skwda
+.section skwdb
+.section skwea
+.section skweb
+.section skwfa
+.section skwfb
+.section skwga
+.section skwgb
+.section skwha
+.section skwhb
+.section skwia
+.section skwib
+.section skwja
+.section skwjb
+.section skwka
+.section skwkb
+.section skwla
+.section skwlb
+.section skwma
+.section skwmb
+.section skwna
+.section skwnb
+.section skwoa
+.section skwob
+.section skwpa
+.section skwpb
+.section skwqa
+.section skwqb
+.section skwra
+.section skwrb
+.section skwsa
+.section skwsb
+.section skwta
+.section skwtb
+.section skwua
+.section skwub
+.section skwva
+.section skwvb
+.section skwwa
+.section skwwb
+.section skwxa
+.section skwxb
+.section skwya
+.section skwyb
+.section skwza
+.section skwzb
+.section skw1a
+.section skw1b
+.section skw2a
+.section skw2b
+.section skw3a
+.section skw3b
+.section skw4a
+.section skw4b
+.section skw5a
+.section skw5b
+.section skw6a
+.section skw6b
+.section skw7a
+.section skw7b
+.section skw8a
+.section skw8b
+.section skw9a
+.section skw9b
+.section skw0a
+.section skw0b
+.section skxaa
+.section skxab
+.section skxba
+.section skxbb
+.section skxca
+.section skxcb
+.section skxda
+.section skxdb
+.section skxea
+.section skxeb
+.section skxfa
+.section skxfb
+.section skxga
+.section skxgb
+.section skxha
+.section skxhb
+.section skxia
+.section skxib
+.section skxja
+.section skxjb
+.section skxka
+.section skxkb
+.section skxla
+.section skxlb
+.section skxma
+.section skxmb
+.section skxna
+.section skxnb
+.section skxoa
+.section skxob
+.section skxpa
+.section skxpb
+.section skxqa
+.section skxqb
+.section skxra
+.section skxrb
+.section skxsa
+.section skxsb
+.section skxta
+.section skxtb
+.section skxua
+.section skxub
+.section skxva
+.section skxvb
+.section skxwa
+.section skxwb
+.section skxxa
+.section skxxb
+.section skxya
+.section skxyb
+.section skxza
+.section skxzb
+.section skx1a
+.section skx1b
+.section skx2a
+.section skx2b
+.section skx3a
+.section skx3b
+.section skx4a
+.section skx4b
+.section skx5a
+.section skx5b
+.section skx6a
+.section skx6b
+.section skx7a
+.section skx7b
+.section skx8a
+.section skx8b
+.section skx9a
+.section skx9b
+.section skx0a
+.section skx0b
+.section skyaa
+.section skyab
+.section skyba
+.section skybb
+.section skyca
+.section skycb
+.section skyda
+.section skydb
+.section skyea
+.section skyeb
+.section skyfa
+.section skyfb
+.section skyga
+.section skygb
+.section skyha
+.section skyhb
+.section skyia
+.section skyib
+.section skyja
+.section skyjb
+.section skyka
+.section skykb
+.section skyla
+.section skylb
+.section skyma
+.section skymb
+.section skyna
+.section skynb
+.section skyoa
+.section skyob
+.section skypa
+.section skypb
+.section skyqa
+.section skyqb
+.section skyra
+.section skyrb
+.section skysa
+.section skysb
+.section skyta
+.section skytb
+.section skyua
+.section skyub
+.section skyva
+.section skyvb
+.section skywa
+.section skywb
+.section skyxa
+.section skyxb
+.section skyya
+.section skyyb
+.section skyza
+.section skyzb
+.section sky1a
+.section sky1b
+.section sky2a
+.section sky2b
+.section sky3a
+.section sky3b
+.section sky4a
+.section sky4b
+.section sky5a
+.section sky5b
+.section sky6a
+.section sky6b
+.section sky7a
+.section sky7b
+.section sky8a
+.section sky8b
+.section sky9a
+.section sky9b
+.section sky0a
+.section sky0b
+.section skzaa
+.section skzab
+.section skzba
+.section skzbb
+.section skzca
+.section skzcb
+.section skzda
+.section skzdb
+.section skzea
+.section skzeb
+.section skzfa
+.section skzfb
+.section skzga
+.section skzgb
+.section skzha
+.section skzhb
+.section skzia
+.section skzib
+.section skzja
+.section skzjb
+.section skzka
+.section skzkb
+.section skzla
+.section skzlb
+.section skzma
+.section skzmb
+.section skzna
+.section skznb
+.section skzoa
+.section skzob
+.section skzpa
+.section skzpb
+.section skzqa
+.section skzqb
+.section skzra
+.section skzrb
+.section skzsa
+.section skzsb
+.section skzta
+.section skztb
+.section skzua
+.section skzub
+.section skzva
+.section skzvb
+.section skzwa
+.section skzwb
+.section skzxa
+.section skzxb
+.section skzya
+.section skzyb
+.section skzza
+.section skzzb
+.section skz1a
+.section skz1b
+.section skz2a
+.section skz2b
+.section skz3a
+.section skz3b
+.section skz4a
+.section skz4b
+.section skz5a
+.section skz5b
+.section skz6a
+.section skz6b
+.section skz7a
+.section skz7b
+.section skz8a
+.section skz8b
+.section skz9a
+.section skz9b
+.section skz0a
+.section skz0b
+.section sk1aa
+.section sk1ab
+.section sk1ba
+.section sk1bb
+.section sk1ca
+.section sk1cb
+.section sk1da
+.section sk1db
+.section sk1ea
+.section sk1eb
+.section sk1fa
+.section sk1fb
+.section sk1ga
+.section sk1gb
+.section sk1ha
+.section sk1hb
+.section sk1ia
+.section sk1ib
+.section sk1ja
+.section sk1jb
+.section sk1ka
+.section sk1kb
+.section sk1la
+.section sk1lb
+.section sk1ma
+.section sk1mb
+.section sk1na
+.section sk1nb
+.section sk1oa
+.section sk1ob
+.section sk1pa
+.section sk1pb
+.section sk1qa
+.section sk1qb
+.section sk1ra
+.section sk1rb
+.section sk1sa
+.section sk1sb
+.section sk1ta
+.section sk1tb
+.section sk1ua
+.section sk1ub
+.section sk1va
+.section sk1vb
+.section sk1wa
+.section sk1wb
+.section sk1xa
+.section sk1xb
+.section sk1ya
+.section sk1yb
+.section sk1za
+.section sk1zb
+.section sk11a
+.section sk11b
+.section sk12a
+.section sk12b
+.section sk13a
+.section sk13b
+.section sk14a
+.section sk14b
+.section sk15a
+.section sk15b
+.section sk16a
+.section sk16b
+.section sk17a
+.section sk17b
+.section sk18a
+.section sk18b
+.section sk19a
+.section sk19b
+.section sk10a
+.section sk10b
+.section sk2aa
+.section sk2ab
+.section sk2ba
+.section sk2bb
+.section sk2ca
+.section sk2cb
+.section sk2da
+.section sk2db
+.section sk2ea
+.section sk2eb
+.section sk2fa
+.section sk2fb
+.section sk2ga
+.section sk2gb
+.section sk2ha
+.section sk2hb
+.section sk2ia
+.section sk2ib
+.section sk2ja
+.section sk2jb
+.section sk2ka
+.section sk2kb
+.section sk2la
+.section sk2lb
+.section sk2ma
+.section sk2mb
+.section sk2na
+.section sk2nb
+.section sk2oa
+.section sk2ob
+.section sk2pa
+.section sk2pb
+.section sk2qa
+.section sk2qb
+.section sk2ra
+.section sk2rb
+.section sk2sa
+.section sk2sb
+.section sk2ta
+.section sk2tb
+.section sk2ua
+.section sk2ub
+.section sk2va
+.section sk2vb
+.section sk2wa
+.section sk2wb
+.section sk2xa
+.section sk2xb
+.section sk2ya
+.section sk2yb
+.section sk2za
+.section sk2zb
+.section sk21a
+.section sk21b
+.section sk22a
+.section sk22b
+.section sk23a
+.section sk23b
+.section sk24a
+.section sk24b
+.section sk25a
+.section sk25b
+.section sk26a
+.section sk26b
+.section sk27a
+.section sk27b
+.section sk28a
+.section sk28b
+.section sk29a
+.section sk29b
+.section sk20a
+.section sk20b
+.section sk3aa
+.section sk3ab
+.section sk3ba
+.section sk3bb
+.section sk3ca
+.section sk3cb
+.section sk3da
+.section sk3db
+.section sk3ea
+.section sk3eb
+.section sk3fa
+.section sk3fb
+.section sk3ga
+.section sk3gb
+.section sk3ha
+.section sk3hb
+.section sk3ia
+.section sk3ib
+.section sk3ja
+.section sk3jb
+.section sk3ka
+.section sk3kb
+.section sk3la
+.section sk3lb
+.section sk3ma
+.section sk3mb
+.section sk3na
+.section sk3nb
+.section sk3oa
+.section sk3ob
+.section sk3pa
+.section sk3pb
+.section sk3qa
+.section sk3qb
+.section sk3ra
+.section sk3rb
+.section sk3sa
+.section sk3sb
+.section sk3ta
+.section sk3tb
+.section sk3ua
+.section sk3ub
+.section sk3va
+.section sk3vb
+.section sk3wa
+.section sk3wb
+.section sk3xa
+.section sk3xb
+.section sk3ya
+.section sk3yb
+.section sk3za
+.section sk3zb
+.section sk31a
+.section sk31b
+.section sk32a
+.section sk32b
+.section sk33a
+.section sk33b
+.section sk34a
+.section sk34b
+.section sk35a
+.section sk35b
+.section sk36a
+.section sk36b
+.section sk37a
+.section sk37b
+.section sk38a
+.section sk38b
+.section sk39a
+.section sk39b
+.section sk30a
+.section sk30b
+.section sk4aa
+.section sk4ab
+.section sk4ba
+.section sk4bb
+.section sk4ca
+.section sk4cb
+.section sk4da
+.section sk4db
+.section sk4ea
+.section sk4eb
+.section sk4fa
+.section sk4fb
+.section sk4ga
+.section sk4gb
+.section sk4ha
+.section sk4hb
+.section sk4ia
+.section sk4ib
+.section sk4ja
+.section sk4jb
+.section sk4ka
+.section sk4kb
+.section sk4la
+.section sk4lb
+.section sk4ma
+.section sk4mb
+.section sk4na
+.section sk4nb
+.section sk4oa
+.section sk4ob
+.section sk4pa
+.section sk4pb
+.section sk4qa
+.section sk4qb
+.section sk4ra
+.section sk4rb
+.section sk4sa
+.section sk4sb
+.section sk4ta
+.section sk4tb
+.section sk4ua
+.section sk4ub
+.section sk4va
+.section sk4vb
+.section sk4wa
+.section sk4wb
+.section sk4xa
+.section sk4xb
+.section sk4ya
+.section sk4yb
+.section sk4za
+.section sk4zb
+.section sk41a
+.section sk41b
+.section sk42a
+.section sk42b
+.section sk43a
+.section sk43b
+.section sk44a
+.section sk44b
+.section sk45a
+.section sk45b
+.section sk46a
+.section sk46b
+.section sk47a
+.section sk47b
+.section sk48a
+.section sk48b
+.section sk49a
+.section sk49b
+.section sk40a
+.section sk40b
+.section sk5aa
+.section sk5ab
+.section sk5ba
+.section sk5bb
+.section sk5ca
+.section sk5cb
+.section sk5da
+.section sk5db
+.section sk5ea
+.section sk5eb
+.section sk5fa
+.section sk5fb
+.section sk5ga
+.section sk5gb
+.section sk5ha
+.section sk5hb
+.section sk5ia
+.section sk5ib
+.section sk5ja
+.section sk5jb
+.section sk5ka
+.section sk5kb
+.section sk5la
+.section sk5lb
+.section sk5ma
+.section sk5mb
+.section sk5na
+.section sk5nb
+.section sk5oa
+.section sk5ob
+.section sk5pa
+.section sk5pb
+.section sk5qa
+.section sk5qb
+.section sk5ra
+.section sk5rb
+.section sk5sa
+.section sk5sb
+.section sk5ta
+.section sk5tb
+.section sk5ua
+.section sk5ub
+.section sk5va
+.section sk5vb
+.section sk5wa
+.section sk5wb
+.section sk5xa
+.section sk5xb
+.section sk5ya
+.section sk5yb
+.section sk5za
+.section sk5zb
+.section sk51a
+.section sk51b
+.section sk52a
+.section sk52b
+.section sk53a
+.section sk53b
+.section sk54a
+.section sk54b
+.section sk55a
+.section sk55b
+.section sk56a
+.section sk56b
+.section sk57a
+.section sk57b
+.section sk58a
+.section sk58b
+.section sk59a
+.section sk59b
+.section sk50a
+.section sk50b
+.section sk6aa
+.section sk6ab
+.section sk6ba
+.section sk6bb
+.section sk6ca
+.section sk6cb
+.section sk6da
+.section sk6db
+.section sk6ea
+.section sk6eb
+.section sk6fa
+.section sk6fb
+.section sk6ga
+.section sk6gb
+.section sk6ha
+.section sk6hb
+.section sk6ia
+.section sk6ib
+.section sk6ja
+.section sk6jb
+.section sk6ka
+.section sk6kb
+.section sk6la
+.section sk6lb
+.section sk6ma
+.section sk6mb
+.section sk6na
+.section sk6nb
+.section sk6oa
+.section sk6ob
+.section sk6pa
+.section sk6pb
+.section sk6qa
+.section sk6qb
+.section sk6ra
+.section sk6rb
+.section sk6sa
+.section sk6sb
+.section sk6ta
+.section sk6tb
+.section sk6ua
+.section sk6ub
+.section sk6va
+.section sk6vb
+.section sk6wa
+.section sk6wb
+.section sk6xa
+.section sk6xb
+.section sk6ya
+.section sk6yb
+.section sk6za
+.section sk6zb
+.section sk61a
+.section sk61b
+.section sk62a
+.section sk62b
+.section sk63a
+.section sk63b
+.section sk64a
+.section sk64b
+.section sk65a
+.section sk65b
+.section sk66a
+.section sk66b
+.section sk67a
+.section sk67b
+.section sk68a
+.section sk68b
+.section sk69a
+.section sk69b
+.section sk60a
+.section sk60b
+.section sk7aa
+.section sk7ab
+.section sk7ba
+.section sk7bb
+.section sk7ca
+.section sk7cb
+.section sk7da
+.section sk7db
+.section sk7ea
+.section sk7eb
+.section sk7fa
+.section sk7fb
+.section sk7ga
+.section sk7gb
+.section sk7ha
+.section sk7hb
+.section sk7ia
+.section sk7ib
+.section sk7ja
+.section sk7jb
+.section sk7ka
+.section sk7kb
+.section sk7la
+.section sk7lb
+.section sk7ma
+.section sk7mb
+.section sk7na
+.section sk7nb
+.section sk7oa
+.section sk7ob
+.section sk7pa
+.section sk7pb
+.section sk7qa
+.section sk7qb
+.section sk7ra
+.section sk7rb
+.section sk7sa
+.section sk7sb
+.section sk7ta
+.section sk7tb
+.section sk7ua
+.section sk7ub
+.section sk7va
+.section sk7vb
+.section sk7wa
+.section sk7wb
+.section sk7xa
+.section sk7xb
+.section sk7ya
+.section sk7yb
+.section sk7za
+.section sk7zb
+.section sk71a
+.section sk71b
+.section sk72a
+.section sk72b
+.section sk73a
+.section sk73b
+.section sk74a
+.section sk74b
+.section sk75a
+.section sk75b
+.section sk76a
+.section sk76b
+.section sk77a
+.section sk77b
+.section sk78a
+.section sk78b
+.section sk79a
+.section sk79b
+.section sk70a
+.section sk70b
+.section sk8aa
+.section sk8ab
+.section sk8ba
+.section sk8bb
+.section sk8ca
+.section sk8cb
+.section sk8da
+.section sk8db
+.section sk8ea
+.section sk8eb
+.section sk8fa
+.section sk8fb
+.section sk8ga
+.section sk8gb
+.section sk8ha
+.section sk8hb
+.section sk8ia
+.section sk8ib
+.section sk8ja
+.section sk8jb
+.section sk8ka
+.section sk8kb
+.section sk8la
+.section sk8lb
+.section sk8ma
+.section sk8mb
+.section sk8na
+.section sk8nb
+.section sk8oa
+.section sk8ob
+.section sk8pa
+.section sk8pb
+.section sk8qa
+.section sk8qb
+.section sk8ra
+.section sk8rb
+.section sk8sa
+.section sk8sb
+.section sk8ta
+.section sk8tb
+.section sk8ua
+.section sk8ub
+.section sk8va
+.section sk8vb
+.section sk8wa
+.section sk8wb
+.section sk8xa
+.section sk8xb
+.section sk8ya
+.section sk8yb
+.section sk8za
+.section sk8zb
+.section sk81a
+.section sk81b
+.section sk82a
+.section sk82b
+.section sk83a
+.section sk83b
+.section sk84a
+.section sk84b
+.section sk85a
+.section sk85b
+.section sk86a
+.section sk86b
+.section sk87a
+.section sk87b
+.section sk88a
+.section sk88b
+.section sk89a
+.section sk89b
+.section sk80a
+.section sk80b
+.section sk9aa
+.section sk9ab
+.section sk9ba
+.section sk9bb
+.section sk9ca
+.section sk9cb
+.section sk9da
+.section sk9db
+.section sk9ea
+.section sk9eb
+.section sk9fa
+.section sk9fb
+.section sk9ga
+.section sk9gb
+.section sk9ha
+.section sk9hb
+.section sk9ia
+.section sk9ib
+.section sk9ja
+.section sk9jb
+.section sk9ka
+.section sk9kb
+.section sk9la
+.section sk9lb
+.section sk9ma
+.section sk9mb
+.section sk9na
+.section sk9nb
+.section sk9oa
+.section sk9ob
+.section sk9pa
+.section sk9pb
+.section sk9qa
+.section sk9qb
+.section sk9ra
+.section sk9rb
+.section sk9sa
+.section sk9sb
+.section sk9ta
+.section sk9tb
+.section sk9ua
+.section sk9ub
+.section sk9va
+.section sk9vb
+.section sk9wa
+.section sk9wb
+.section sk9xa
+.section sk9xb
+.section sk9ya
+.section sk9yb
+.section sk9za
+.section sk9zb
+.section sk91a
+.section sk91b
+.section sk92a
+.section sk92b
+.section sk93a
+.section sk93b
+.section sk94a
+.section sk94b
+.section sk95a
+.section sk95b
+.section sk96a
+.section sk96b
+.section sk97a
+.section sk97b
+.section sk98a
+.section sk98b
+.section sk99a
+.section sk99b
+.section sk90a
+.section sk90b
+.section sk0aa
+.section sk0ab
+.section sk0ba
+.section sk0bb
+.section sk0ca
+.section sk0cb
+.section sk0da
+.section sk0db
+.section sk0ea
+.section sk0eb
+.section sk0fa
+.section sk0fb
+.section sk0ga
+.section sk0gb
+.section sk0ha
+.section sk0hb
+.section sk0ia
+.section sk0ib
+.section sk0ja
+.section sk0jb
+.section sk0ka
+.section sk0kb
+.section sk0la
+.section sk0lb
+.section sk0ma
+.section sk0mb
+.section sk0na
+.section sk0nb
+.section sk0oa
+.section sk0ob
+.section sk0pa
+.section sk0pb
+.section sk0qa
+.section sk0qb
+.section sk0ra
+.section sk0rb
+.section sk0sa
+.section sk0sb
+.section sk0ta
+.section sk0tb
+.section sk0ua
+.section sk0ub
+.section sk0va
+.section sk0vb
+.section sk0wa
+.section sk0wb
+.section sk0xa
+.section sk0xb
+.section sk0ya
+.section sk0yb
+.section sk0za
+.section sk0zb
+.section sk01a
+.section sk01b
+.section sk02a
+.section sk02b
+.section sk03a
+.section sk03b
+.section sk04a
+.section sk04b
+.section sk05a
+.section sk05b
+.section sk06a
+.section sk06b
+.section sk07a
+.section sk07b
+.section sk08a
+.section sk08b
+.section sk09a
+.section sk09b
+.section sk00a
+.section sk00b
+.section slaaa
+.section slaab
+.section slaba
+.section slabb
+.section slaca
+.section slacb
+.section slada
+.section sladb
+.section slaea
+.section slaeb
+.section slafa
+.section slafb
+.section slaga
+.section slagb
+.section slaha
+.section slahb
+.section slaia
+.section slaib
+.section slaja
+.section slajb
+.section slaka
+.section slakb
+.section slala
+.section slalb
+.section slama
+.section slamb
+.section slana
+.section slanb
+.section slaoa
+.section slaob
+.section slapa
+.section slapb
+.section slaqa
+.section slaqb
+.section slara
+.section slarb
+.section slasa
+.section slasb
+.section slata
+.section slatb
+.section slaua
+.section slaub
+.section slava
+.section slavb
+.section slawa
+.section slawb
+.section slaxa
+.section slaxb
+.section slaya
+.section slayb
+.section slaza
+.section slazb
+.section sla1a
+.section sla1b
+.section sla2a
+.section sla2b
+.section sla3a
+.section sla3b
+.section sla4a
+.section sla4b
+.section sla5a
+.section sla5b
+.section sla6a
+.section sla6b
+.section sla7a
+.section sla7b
+.section sla8a
+.section sla8b
+.section sla9a
+.section sla9b
+.section sla0a
+.section sla0b
+.section slbaa
+.section slbab
+.section slbba
+.section slbbb
+.section slbca
+.section slbcb
+.section slbda
+.section slbdb
+.section slbea
+.section slbeb
+.section slbfa
+.section slbfb
+.section slbga
+.section slbgb
+.section slbha
+.section slbhb
+.section slbia
+.section slbib
+.section slbja
+.section slbjb
+.section slbka
+.section slbkb
+.section slbla
+.section slblb
+.section slbma
+.section slbmb
+.section slbna
+.section slbnb
+.section slboa
+.section slbob
+.section slbpa
+.section slbpb
+.section slbqa
+.section slbqb
+.section slbra
+.section slbrb
+.section slbsa
+.section slbsb
+.section slbta
+.section slbtb
+.section slbua
+.section slbub
+.section slbva
+.section slbvb
+.section slbwa
+.section slbwb
+.section slbxa
+.section slbxb
+.section slbya
+.section slbyb
+.section slbza
+.section slbzb
+.section slb1a
+.section slb1b
+.section slb2a
+.section slb2b
+.section slb3a
+.section slb3b
+.section slb4a
+.section slb4b
+.section slb5a
+.section slb5b
+.section slb6a
+.section slb6b
+.section slb7a
+.section slb7b
+.section slb8a
+.section slb8b
+.section slb9a
+.section slb9b
+.section slb0a
+.section slb0b
+.section slcaa
+.section slcab
+.section slcba
+.section slcbb
+.section slcca
+.section slccb
+.section slcda
+.section slcdb
+.section slcea
+.section slceb
+.section slcfa
+.section slcfb
+.section slcga
+.section slcgb
+.section slcha
+.section slchb
+.section slcia
+.section slcib
+.section slcja
+.section slcjb
+.section slcka
+.section slckb
+.section slcla
+.section slclb
+.section slcma
+.section slcmb
+.section slcna
+.section slcnb
+.section slcoa
+.section slcob
+.section slcpa
+.section slcpb
+.section slcqa
+.section slcqb
+.section slcra
+.section slcrb
+.section slcsa
+.section slcsb
+.section slcta
+.section slctb
+.section slcua
+.section slcub
+.section slcva
+.section slcvb
+.section slcwa
+.section slcwb
+.section slcxa
+.section slcxb
+.section slcya
+.section slcyb
+.section slcza
+.section slczb
+.section slc1a
+.section slc1b
+.section slc2a
+.section slc2b
+.section slc3a
+.section slc3b
+.section slc4a
+.section slc4b
+.section slc5a
+.section slc5b
+.section slc6a
+.section slc6b
+.section slc7a
+.section slc7b
+.section slc8a
+.section slc8b
+.section slc9a
+.section slc9b
+.section slc0a
+.section slc0b
+.section sldaa
+.section sldab
+.section sldba
+.section sldbb
+.section sldca
+.section sldcb
+.section sldda
+.section slddb
+.section sldea
+.section sldeb
+.section sldfa
+.section sldfb
+.section sldga
+.section sldgb
+.section sldha
+.section sldhb
+.section sldia
+.section sldib
+.section sldja
+.section sldjb
+.section sldka
+.section sldkb
+.section sldla
+.section sldlb
+.section sldma
+.section sldmb
+.section sldna
+.section sldnb
+.section sldoa
+.section sldob
+.section sldpa
+.section sldpb
+.section sldqa
+.section sldqb
+.section sldra
+.section sldrb
+.section sldsa
+.section sldsb
+.section sldta
+.section sldtb
+.section sldua
+.section sldub
+.section sldva
+.section sldvb
+.section sldwa
+.section sldwb
+.section sldxa
+.section sldxb
+.section sldya
+.section sldyb
+.section sldza
+.section sldzb
+.section sld1a
+.section sld1b
+.section sld2a
+.section sld2b
+.section sld3a
+.section sld3b
+.section sld4a
+.section sld4b
+.section sld5a
+.section sld5b
+.section sld6a
+.section sld6b
+.section sld7a
+.section sld7b
+.section sld8a
+.section sld8b
+.section sld9a
+.section sld9b
+.section sld0a
+.section sld0b
+.section sleaa
+.section sleab
+.section sleba
+.section slebb
+.section sleca
+.section slecb
+.section sleda
+.section sledb
+.section sleea
+.section sleeb
+.section slefa
+.section slefb
+.section slega
+.section slegb
+.section sleha
+.section slehb
+.section sleia
+.section sleib
+.section sleja
+.section slejb
+.section sleka
+.section slekb
+.section slela
+.section slelb
+.section slema
+.section slemb
+.section slena
+.section slenb
+.section sleoa
+.section sleob
+.section slepa
+.section slepb
+.section sleqa
+.section sleqb
+.section slera
+.section slerb
+.section slesa
+.section slesb
+.section sleta
+.section sletb
+.section sleua
+.section sleub
+.section sleva
+.section slevb
+.section slewa
+.section slewb
+.section slexa
+.section slexb
+.section sleya
+.section sleyb
+.section sleza
+.section slezb
+.section sle1a
+.section sle1b
+.section sle2a
+.section sle2b
+.section sle3a
+.section sle3b
+.section sle4a
+.section sle4b
+.section sle5a
+.section sle5b
+.section sle6a
+.section sle6b
+.section sle7a
+.section sle7b
+.section sle8a
+.section sle8b
+.section sle9a
+.section sle9b
+.section sle0a
+.section sle0b
+.section slfaa
+.section slfab
+.section slfba
+.section slfbb
+.section slfca
+.section slfcb
+.section slfda
+.section slfdb
+.section slfea
+.section slfeb
+.section slffa
+.section slffb
+.section slfga
+.section slfgb
+.section slfha
+.section slfhb
+.section slfia
+.section slfib
+.section slfja
+.section slfjb
+.section slfka
+.section slfkb
+.section slfla
+.section slflb
+.section slfma
+.section slfmb
+.section slfna
+.section slfnb
+.section slfoa
+.section slfob
+.section slfpa
+.section slfpb
+.section slfqa
+.section slfqb
+.section slfra
+.section slfrb
+.section slfsa
+.section slfsb
+.section slfta
+.section slftb
+.section slfua
+.section slfub
+.section slfva
+.section slfvb
+.section slfwa
+.section slfwb
+.section slfxa
+.section slfxb
+.section slfya
+.section slfyb
+.section slfza
+.section slfzb
+.section slf1a
+.section slf1b
+.section slf2a
+.section slf2b
+.section slf3a
+.section slf3b
+.section slf4a
+.section slf4b
+.section slf5a
+.section slf5b
+.section slf6a
+.section slf6b
+.section slf7a
+.section slf7b
+.section slf8a
+.section slf8b
+.section slf9a
+.section slf9b
+.section slf0a
+.section slf0b
+.section slgaa
+.section slgab
+.section slgba
+.section slgbb
+.section slgca
+.section slgcb
+.section slgda
+.section slgdb
+.section slgea
+.section slgeb
+.section slgfa
+.section slgfb
+.section slgga
+.section slggb
+.section slgha
+.section slghb
+.section slgia
+.section slgib
+.section slgja
+.section slgjb
+.section slgka
+.section slgkb
+.section slgla
+.section slglb
+.section slgma
+.section slgmb
+.section slgna
+.section slgnb
+.section slgoa
+.section slgob
+.section slgpa
+.section slgpb
+.section slgqa
+.section slgqb
+.section slgra
+.section slgrb
+.section slgsa
+.section slgsb
+.section slgta
+.section slgtb
+.section slgua
+.section slgub
+.section slgva
+.section slgvb
+.section slgwa
+.section slgwb
+.section slgxa
+.section slgxb
+.section slgya
+.section slgyb
+.section slgza
+.section slgzb
+.section slg1a
+.section slg1b
+.section slg2a
+.section slg2b
+.section slg3a
+.section slg3b
+.section slg4a
+.section slg4b
+.section slg5a
+.section slg5b
+.section slg6a
+.section slg6b
+.section slg7a
+.section slg7b
+.section slg8a
+.section slg8b
+.section slg9a
+.section slg9b
+.section slg0a
+.section slg0b
+.section slhaa
+.section slhab
+.section slhba
+.section slhbb
+.section slhca
+.section slhcb
+.section slhda
+.section slhdb
+.section slhea
+.section slheb
+.section slhfa
+.section slhfb
+.section slhga
+.section slhgb
+.section slhha
+.section slhhb
+.section slhia
+.section slhib
+.section slhja
+.section slhjb
+.section slhka
+.section slhkb
+.section slhla
+.section slhlb
+.section slhma
+.section slhmb
+.section slhna
+.section slhnb
+.section slhoa
+.section slhob
+.section slhpa
+.section slhpb
+.section slhqa
+.section slhqb
+.section slhra
+.section slhrb
+.section slhsa
+.section slhsb
+.section slhta
+.section slhtb
+.section slhua
+.section slhub
+.section slhva
+.section slhvb
+.section slhwa
+.section slhwb
+.section slhxa
+.section slhxb
+.section slhya
+.section slhyb
+.section slhza
+.section slhzb
+.section slh1a
+.section slh1b
+.section slh2a
+.section slh2b
+.section slh3a
+.section slh3b
+.section slh4a
+.section slh4b
+.section slh5a
+.section slh5b
+.section slh6a
+.section slh6b
+.section slh7a
+.section slh7b
+.section slh8a
+.section slh8b
+.section slh9a
+.section slh9b
+.section slh0a
+.section slh0b
+.section sliaa
+.section sliab
+.section sliba
+.section slibb
+.section slica
+.section slicb
+.section slida
+.section slidb
+.section sliea
+.section slieb
+.section slifa
+.section slifb
+.section sliga
+.section sligb
+.section sliha
+.section slihb
+.section sliia
+.section sliib
+.section slija
+.section slijb
+.section slika
+.section slikb
+.section slila
+.section slilb
+.section slima
+.section slimb
+.section slina
+.section slinb
+.section slioa
+.section sliob
+.section slipa
+.section slipb
+.section sliqa
+.section sliqb
+.section slira
+.section slirb
+.section slisa
+.section slisb
+.section slita
+.section slitb
+.section sliua
+.section sliub
+.section sliva
+.section slivb
+.section sliwa
+.section sliwb
+.section slixa
+.section slixb
+.section sliya
+.section sliyb
+.section sliza
+.section slizb
+.section sli1a
+.section sli1b
+.section sli2a
+.section sli2b
+.section sli3a
+.section sli3b
+.section sli4a
+.section sli4b
+.section sli5a
+.section sli5b
+.section sli6a
+.section sli6b
+.section sli7a
+.section sli7b
+.section sli8a
+.section sli8b
+.section sli9a
+.section sli9b
+.section sli0a
+.section sli0b
+.section sljaa
+.section sljab
+.section sljba
+.section sljbb
+.section sljca
+.section sljcb
+.section sljda
+.section sljdb
+.section sljea
+.section sljeb
+.section sljfa
+.section sljfb
+.section sljga
+.section sljgb
+.section sljha
+.section sljhb
+.section sljia
+.section sljib
+.section sljja
+.section sljjb
+.section sljka
+.section sljkb
+.section sljla
+.section sljlb
+.section sljma
+.section sljmb
+.section sljna
+.section sljnb
+.section sljoa
+.section sljob
+.section sljpa
+.section sljpb
+.section sljqa
+.section sljqb
+.section sljra
+.section sljrb
+.section sljsa
+.section sljsb
+.section sljta
+.section sljtb
+.section sljua
+.section sljub
+.section sljva
+.section sljvb
+.section sljwa
+.section sljwb
+.section sljxa
+.section sljxb
+.section sljya
+.section sljyb
+.section sljza
+.section sljzb
+.section slj1a
+.section slj1b
+.section slj2a
+.section slj2b
+.section slj3a
+.section slj3b
+.section slj4a
+.section slj4b
+.section slj5a
+.section slj5b
+.section slj6a
+.section slj6b
+.section slj7a
+.section slj7b
+.section slj8a
+.section slj8b
+.section slj9a
+.section slj9b
+.section slj0a
+.section slj0b
+.section slkaa
+.section slkab
+.section slkba
+.section slkbb
+.section slkca
+.section slkcb
+.section slkda
+.section slkdb
+.section slkea
+.section slkeb
+.section slkfa
+.section slkfb
+.section slkga
+.section slkgb
+.section slkha
+.section slkhb
+.section slkia
+.section slkib
+.section slkja
+.section slkjb
+.section slkka
+.section slkkb
+.section slkla
+.section slklb
+.section slkma
+.section slkmb
+.section slkna
+.section slknb
+.section slkoa
+.section slkob
+.section slkpa
+.section slkpb
+.section slkqa
+.section slkqb
+.section slkra
+.section slkrb
+.section slksa
+.section slksb
+.section slkta
+.section slktb
+.section slkua
+.section slkub
+.section slkva
+.section slkvb
+.section slkwa
+.section slkwb
+.section slkxa
+.section slkxb
+.section slkya
+.section slkyb
+.section slkza
+.section slkzb
+.section slk1a
+.section slk1b
+.section slk2a
+.section slk2b
+.section slk3a
+.section slk3b
+.section slk4a
+.section slk4b
+.section slk5a
+.section slk5b
+.section slk6a
+.section slk6b
+.section slk7a
+.section slk7b
+.section slk8a
+.section slk8b
+.section slk9a
+.section slk9b
+.section slk0a
+.section slk0b
+.section sllaa
+.section sllab
+.section sllba
+.section sllbb
+.section sllca
+.section sllcb
+.section sllda
+.section slldb
+.section sllea
+.section slleb
+.section sllfa
+.section sllfb
+.section sllga
+.section sllgb
+.section sllha
+.section sllhb
+.section sllia
+.section sllib
+.section sllja
+.section slljb
+.section sllka
+.section sllkb
+.section sllla
+.section slllb
+.section sllma
+.section sllmb
+.section sllna
+.section sllnb
+.section slloa
+.section sllob
+.section sllpa
+.section sllpb
+.section sllqa
+.section sllqb
+.section sllra
+.section sllrb
+.section sllsa
+.section sllsb
+.section sllta
+.section slltb
+.section sllua
+.section sllub
+.section sllva
+.section sllvb
+.section sllwa
+.section sllwb
+.section sllxa
+.section sllxb
+.section sllya
+.section sllyb
+.section sllza
+.section sllzb
+.section sll1a
+.section sll1b
+.section sll2a
+.section sll2b
+.section sll3a
+.section sll3b
+.section sll4a
+.section sll4b
+.section sll5a
+.section sll5b
+.section sll6a
+.section sll6b
+.section sll7a
+.section sll7b
+.section sll8a
+.section sll8b
+.section sll9a
+.section sll9b
+.section sll0a
+.section sll0b
+.section slmaa
+.section slmab
+.section slmba
+.section slmbb
+.section slmca
+.section slmcb
+.section slmda
+.section slmdb
+.section slmea
+.section slmeb
+.section slmfa
+.section slmfb
+.section slmga
+.section slmgb
+.section slmha
+.section slmhb
+.section slmia
+.section slmib
+.section slmja
+.section slmjb
+.section slmka
+.section slmkb
+.section slmla
+.section slmlb
+.section slmma
+.section slmmb
+.section slmna
+.section slmnb
+.section slmoa
+.section slmob
+.section slmpa
+.section slmpb
+.section slmqa
+.section slmqb
+.section slmra
+.section slmrb
+.section slmsa
+.section slmsb
+.section slmta
+.section slmtb
+.section slmua
+.section slmub
+.section slmva
+.section slmvb
+.section slmwa
+.section slmwb
+.section slmxa
+.section slmxb
+.section slmya
+.section slmyb
+.section slmza
+.section slmzb
+.section slm1a
+.section slm1b
+.section slm2a
+.section slm2b
+.section slm3a
+.section slm3b
+.section slm4a
+.section slm4b
+.section slm5a
+.section slm5b
+.section slm6a
+.section slm6b
+.section slm7a
+.section slm7b
+.section slm8a
+.section slm8b
+.section slm9a
+.section slm9b
+.section slm0a
+.section slm0b
+.section slnaa
+.section slnab
+.section slnba
+.section slnbb
+.section slnca
+.section slncb
+.section slnda
+.section slndb
+.section slnea
+.section slneb
+.section slnfa
+.section slnfb
+.section slnga
+.section slngb
+.section slnha
+.section slnhb
+.section slnia
+.section slnib
+.section slnja
+.section slnjb
+.section slnka
+.section slnkb
+.section slnla
+.section slnlb
+.section slnma
+.section slnmb
+.section slnna
+.section slnnb
+.section slnoa
+.section slnob
+.section slnpa
+.section slnpb
+.section slnqa
+.section slnqb
+.section slnra
+.section slnrb
+.section slnsa
+.section slnsb
+.section slnta
+.section slntb
+.section slnua
+.section slnub
+.section slnva
+.section slnvb
+.section slnwa
+.section slnwb
+.section slnxa
+.section slnxb
+.section slnya
+.section slnyb
+.section slnza
+.section slnzb
+.section sln1a
+.section sln1b
+.section sln2a
+.section sln2b
+.section sln3a
+.section sln3b
+.section sln4a
+.section sln4b
+.section sln5a
+.section sln5b
+.section sln6a
+.section sln6b
+.section sln7a
+.section sln7b
+.section sln8a
+.section sln8b
+.section sln9a
+.section sln9b
+.section sln0a
+.section sln0b
+.section sloaa
+.section sloab
+.section sloba
+.section slobb
+.section sloca
+.section slocb
+.section sloda
+.section slodb
+.section sloea
+.section sloeb
+.section slofa
+.section slofb
+.section sloga
+.section slogb
+.section sloha
+.section slohb
+.section sloia
+.section sloib
+.section sloja
+.section slojb
+.section sloka
+.section slokb
+.section slola
+.section slolb
+.section sloma
+.section slomb
+.section slona
+.section slonb
+.section slooa
+.section sloob
+.section slopa
+.section slopb
+.section sloqa
+.section sloqb
+.section slora
+.section slorb
+.section slosa
+.section slosb
+.section slota
+.section slotb
+.section sloua
+.section sloub
+.section slova
+.section slovb
+.section slowa
+.section slowb
+.section sloxa
+.section sloxb
+.section sloya
+.section sloyb
+.section sloza
+.section slozb
+.section slo1a
+.section slo1b
+.section slo2a
+.section slo2b
+.section slo3a
+.section slo3b
+.section slo4a
+.section slo4b
+.section slo5a
+.section slo5b
+.section slo6a
+.section slo6b
+.section slo7a
+.section slo7b
+.section slo8a
+.section slo8b
+.section slo9a
+.section slo9b
+.section slo0a
+.section slo0b
+.section slpaa
+.section slpab
+.section slpba
+.section slpbb
+.section slpca
+.section slpcb
+.section slpda
+.section slpdb
+.section slpea
+.section slpeb
+.section slpfa
+.section slpfb
+.section slpga
+.section slpgb
+.section slpha
+.section slphb
+.section slpia
+.section slpib
+.section slpja
+.section slpjb
+.section slpka
+.section slpkb
+.section slpla
+.section slplb
+.section slpma
+.section slpmb
+.section slpna
+.section slpnb
+.section slpoa
+.section slpob
+.section slppa
+.section slppb
+.section slpqa
+.section slpqb
+.section slpra
+.section slprb
+.section slpsa
+.section slpsb
+.section slpta
+.section slptb
+.section slpua
+.section slpub
+.section slpva
+.section slpvb
+.section slpwa
+.section slpwb
+.section slpxa
+.section slpxb
+.section slpya
+.section slpyb
+.section slpza
+.section slpzb
+.section slp1a
+.section slp1b
+.section slp2a
+.section slp2b
+.section slp3a
+.section slp3b
+.section slp4a
+.section slp4b
+.section slp5a
+.section slp5b
+.section slp6a
+.section slp6b
+.section slp7a
+.section slp7b
+.section slp8a
+.section slp8b
+.section slp9a
+.section slp9b
+.section slp0a
+.section slp0b
+.section slqaa
+.section slqab
+.section slqba
+.section slqbb
+.section slqca
+.section slqcb
+.section slqda
+.section slqdb
+.section slqea
+.section slqeb
+.section slqfa
+.section slqfb
+.section slqga
+.section slqgb
+.section slqha
+.section slqhb
+.section slqia
+.section slqib
+.section slqja
+.section slqjb
+.section slqka
+.section slqkb
+.section slqla
+.section slqlb
+.section slqma
+.section slqmb
+.section slqna
+.section slqnb
+.section slqoa
+.section slqob
+.section slqpa
+.section slqpb
+.section slqqa
+.section slqqb
+.section slqra
+.section slqrb
+.section slqsa
+.section slqsb
+.section slqta
+.section slqtb
+.section slqua
+.section slqub
+.section slqva
+.section slqvb
+.section slqwa
+.section slqwb
+.section slqxa
+.section slqxb
+.section slqya
+.section slqyb
+.section slqza
+.section slqzb
+.section slq1a
+.section slq1b
+.section slq2a
+.section slq2b
+.section slq3a
+.section slq3b
+.section slq4a
+.section slq4b
+.section slq5a
+.section slq5b
+.section slq6a
+.section slq6b
+.section slq7a
+.section slq7b
+.section slq8a
+.section slq8b
+.section slq9a
+.section slq9b
+.section slq0a
+.section slq0b
+.section slraa
+.section slrab
+.section slrba
+.section slrbb
+.section slrca
+.section slrcb
+.section slrda
+.section slrdb
+.section slrea
+.section slreb
+.section slrfa
+.section slrfb
+.section slrga
+.section slrgb
+.section slrha
+.section slrhb
+.section slria
+.section slrib
+.section slrja
+.section slrjb
+.section slrka
+.section slrkb
+.section slrla
+.section slrlb
+.section slrma
+.section slrmb
+.section slrna
+.section slrnb
+.section slroa
+.section slrob
+.section slrpa
+.section slrpb
+.section slrqa
+.section slrqb
+.section slrra
+.section slrrb
+.section slrsa
+.section slrsb
+.section slrta
+.section slrtb
+.section slrua
+.section slrub
+.section slrva
+.section slrvb
+.section slrwa
+.section slrwb
+.section slrxa
+.section slrxb
+.section slrya
+.section slryb
+.section slrza
+.section slrzb
+.section slr1a
+.section slr1b
+.section slr2a
+.section slr2b
+.section slr3a
+.section slr3b
+.section slr4a
+.section slr4b
+.section slr5a
+.section slr5b
+.section slr6a
+.section slr6b
+.section slr7a
+.section slr7b
+.section slr8a
+.section slr8b
+.section slr9a
+.section slr9b
+.section slr0a
+.section slr0b
+.section slsaa
+.section slsab
+.section slsba
+.section slsbb
+.section slsca
+.section slscb
+.section slsda
+.section slsdb
+.section slsea
+.section slseb
+.section slsfa
+.section slsfb
+.section slsga
+.section slsgb
+.section slsha
+.section slshb
+.section slsia
+.section slsib
+.section slsja
+.section slsjb
+.section slska
+.section slskb
+.section slsla
+.section slslb
+.section slsma
+.section slsmb
+.section slsna
+.section slsnb
+.section slsoa
+.section slsob
+.section slspa
+.section slspb
+.section slsqa
+.section slsqb
+.section slsra
+.section slsrb
+.section slssa
+.section slssb
+.section slsta
+.section slstb
+.section slsua
+.section slsub
+.section slsva
+.section slsvb
+.section slswa
+.section slswb
+.section slsxa
+.section slsxb
+.section slsya
+.section slsyb
+.section slsza
+.section slszb
+.section sls1a
+.section sls1b
+.section sls2a
+.section sls2b
+.section sls3a
+.section sls3b
+.section sls4a
+.section sls4b
+.section sls5a
+.section sls5b
+.section sls6a
+.section sls6b
+.section sls7a
+.section sls7b
+.section sls8a
+.section sls8b
+.section sls9a
+.section sls9b
+.section sls0a
+.section sls0b
+.section sltaa
+.section sltab
+.section sltba
+.section sltbb
+.section sltca
+.section sltcb
+.section sltda
+.section sltdb
+.section sltea
+.section slteb
+.section sltfa
+.section sltfb
+.section sltga
+.section sltgb
+.section sltha
+.section slthb
+.section sltia
+.section sltib
+.section sltja
+.section sltjb
+.section sltka
+.section sltkb
+.section sltla
+.section sltlb
+.section sltma
+.section sltmb
+.section sltna
+.section sltnb
+.section sltoa
+.section sltob
+.section sltpa
+.section sltpb
+.section sltqa
+.section sltqb
+.section sltra
+.section sltrb
+.section sltsa
+.section sltsb
+.section sltta
+.section slttb
+.section sltua
+.section sltub
+.section sltva
+.section sltvb
+.section sltwa
+.section sltwb
+.section sltxa
+.section sltxb
+.section sltya
+.section sltyb
+.section sltza
+.section sltzb
+.section slt1a
+.section slt1b
+.section slt2a
+.section slt2b
+.section slt3a
+.section slt3b
+.section slt4a
+.section slt4b
+.section slt5a
+.section slt5b
+.section slt6a
+.section slt6b
+.section slt7a
+.section slt7b
+.section slt8a
+.section slt8b
+.section slt9a
+.section slt9b
+.section slt0a
+.section slt0b
+.section sluaa
+.section sluab
+.section sluba
+.section slubb
+.section sluca
+.section slucb
+.section sluda
+.section sludb
+.section sluea
+.section slueb
+.section slufa
+.section slufb
+.section sluga
+.section slugb
+.section sluha
+.section sluhb
+.section sluia
+.section sluib
+.section sluja
+.section slujb
+.section sluka
+.section slukb
+.section slula
+.section slulb
+.section sluma
+.section slumb
+.section sluna
+.section slunb
+.section sluoa
+.section sluob
+.section slupa
+.section slupb
+.section sluqa
+.section sluqb
+.section slura
+.section slurb
+.section slusa
+.section slusb
+.section sluta
+.section slutb
+.section sluua
+.section sluub
+.section sluva
+.section sluvb
+.section sluwa
+.section sluwb
+.section sluxa
+.section sluxb
+.section sluya
+.section sluyb
+.section sluza
+.section sluzb
+.section slu1a
+.section slu1b
+.section slu2a
+.section slu2b
+.section slu3a
+.section slu3b
+.section slu4a
+.section slu4b
+.section slu5a
+.section slu5b
+.section slu6a
+.section slu6b
+.section slu7a
+.section slu7b
+.section slu8a
+.section slu8b
+.section slu9a
+.section slu9b
+.section slu0a
+.section slu0b
+.section slvaa
+.section slvab
+.section slvba
+.section slvbb
+.section slvca
+.section slvcb
+.section slvda
+.section slvdb
+.section slvea
+.section slveb
+.section slvfa
+.section slvfb
+.section slvga
+.section slvgb
+.section slvha
+.section slvhb
+.section slvia
+.section slvib
+.section slvja
+.section slvjb
+.section slvka
+.section slvkb
+.section slvla
+.section slvlb
+.section slvma
+.section slvmb
+.section slvna
+.section slvnb
+.section slvoa
+.section slvob
+.section slvpa
+.section slvpb
+.section slvqa
+.section slvqb
+.section slvra
+.section slvrb
+.section slvsa
+.section slvsb
+.section slvta
+.section slvtb
+.section slvua
+.section slvub
+.section slvva
+.section slvvb
+.section slvwa
+.section slvwb
+.section slvxa
+.section slvxb
+.section slvya
+.section slvyb
+.section slvza
+.section slvzb
+.section slv1a
+.section slv1b
+.section slv2a
+.section slv2b
+.section slv3a
+.section slv3b
+.section slv4a
+.section slv4b
+.section slv5a
+.section slv5b
+.section slv6a
+.section slv6b
+.section slv7a
+.section slv7b
+.section slv8a
+.section slv8b
+.section slv9a
+.section slv9b
+.section slv0a
+.section slv0b
+.section slwaa
+.section slwab
+.section slwba
+.section slwbb
+.section slwca
+.section slwcb
+.section slwda
+.section slwdb
+.section slwea
+.section slweb
+.section slwfa
+.section slwfb
+.section slwga
+.section slwgb
+.section slwha
+.section slwhb
+.section slwia
+.section slwib
+.section slwja
+.section slwjb
+.section slwka
+.section slwkb
+.section slwla
+.section slwlb
+.section slwma
+.section slwmb
+.section slwna
+.section slwnb
+.section slwoa
+.section slwob
+.section slwpa
+.section slwpb
+.section slwqa
+.section slwqb
+.section slwra
+.section slwrb
+.section slwsa
+.section slwsb
+.section slwta
+.section slwtb
+.section slwua
+.section slwub
+.section slwva
+.section slwvb
+.section slwwa
+.section slwwb
+.section slwxa
+.section slwxb
+.section slwya
+.section slwyb
+.section slwza
+.section slwzb
+.section slw1a
+.section slw1b
+.section slw2a
+.section slw2b
+.section slw3a
+.section slw3b
+.section slw4a
+.section slw4b
+.section slw5a
+.section slw5b
+.section slw6a
+.section slw6b
+.section slw7a
+.section slw7b
+.section slw8a
+.section slw8b
+.section slw9a
+.section slw9b
+.section slw0a
+.section slw0b
+.section slxaa
+.section slxab
+.section slxba
+.section slxbb
+.section slxca
+.section slxcb
+.section slxda
+.section slxdb
+.section slxea
+.section slxeb
+.section slxfa
+.section slxfb
+.section slxga
+.section slxgb
+.section slxha
+.section slxhb
+.section slxia
+.section slxib
+.section slxja
+.section slxjb
+.section slxka
+.section slxkb
+.section slxla
+.section slxlb
+.section slxma
+.section slxmb
+.section slxna
+.section slxnb
+.section slxoa
+.section slxob
+.section slxpa
+.section slxpb
+.section slxqa
+.section slxqb
+.section slxra
+.section slxrb
+.section slxsa
+.section slxsb
+.section slxta
+.section slxtb
+.section slxua
+.section slxub
+.section slxva
+.section slxvb
+.section slxwa
+.section slxwb
+.section slxxa
+.section slxxb
+.section slxya
+.section slxyb
+.section slxza
+.section slxzb
+.section slx1a
+.section slx1b
+.section slx2a
+.section slx2b
+.section slx3a
+.section slx3b
+.section slx4a
+.section slx4b
+.section slx5a
+.section slx5b
+.section slx6a
+.section slx6b
+.section slx7a
+.section slx7b
+.section slx8a
+.section slx8b
+.section slx9a
+.section slx9b
+.section slx0a
+.section slx0b
+.section slyaa
+.section slyab
+.section slyba
+.section slybb
+.section slyca
+.section slycb
+.section slyda
+.section slydb
+.section slyea
+.section slyeb
+.section slyfa
+.section slyfb
+.section slyga
+.section slygb
+.section slyha
+.section slyhb
+.section slyia
+.section slyib
+.section slyja
+.section slyjb
+.section slyka
+.section slykb
+.section slyla
+.section slylb
+.section slyma
+.section slymb
+.section slyna
+.section slynb
+.section slyoa
+.section slyob
+.section slypa
+.section slypb
+.section slyqa
+.section slyqb
+.section slyra
+.section slyrb
+.section slysa
+.section slysb
+.section slyta
+.section slytb
+.section slyua
+.section slyub
+.section slyva
+.section slyvb
+.section slywa
+.section slywb
+.section slyxa
+.section slyxb
+.section slyya
+.section slyyb
+.section slyza
+.section slyzb
+.section sly1a
+.section sly1b
+.section sly2a
+.section sly2b
+.section sly3a
+.section sly3b
+.section sly4a
+.section sly4b
+.section sly5a
+.section sly5b
+.section sly6a
+.section sly6b
+.section sly7a
+.section sly7b
+.section sly8a
+.section sly8b
+.section sly9a
+.section sly9b
+.section sly0a
+.section sly0b
+.section slzaa
+.section slzab
+.section slzba
+.section slzbb
+.section slzca
+.section slzcb
+.section slzda
+.section slzdb
+.section slzea
+.section slzeb
+.section slzfa
+.section slzfb
+.section slzga
+.section slzgb
+.section slzha
+.section slzhb
+.section slzia
+.section slzib
+.section slzja
+.section slzjb
+.section slzka
+.section slzkb
+.section slzla
+.section slzlb
+.section slzma
+.section slzmb
+.section slzna
+.section slznb
+.section slzoa
+.section slzob
+.section slzpa
+.section slzpb
+.section slzqa
+.section slzqb
+.section slzra
+.section slzrb
+.section slzsa
+.section slzsb
+.section slzta
+.section slztb
+.section slzua
+.section slzub
+.section slzva
+.section slzvb
+.section slzwa
+.section slzwb
+.section slzxa
+.section slzxb
+.section slzya
+.section slzyb
+.section slzza
+.section slzzb
+.section slz1a
+.section slz1b
+.section slz2a
+.section slz2b
+.section slz3a
+.section slz3b
+.section slz4a
+.section slz4b
+.section slz5a
+.section slz5b
+.section slz6a
+.section slz6b
+.section slz7a
+.section slz7b
+.section slz8a
+.section slz8b
+.section slz9a
+.section slz9b
+.section slz0a
+.section slz0b
+.section sl1aa
+.section sl1ab
+.section sl1ba
+.section sl1bb
+.section sl1ca
+.section sl1cb
+.section sl1da
+.section sl1db
+.section sl1ea
+.section sl1eb
+.section sl1fa
+.section sl1fb
+.section sl1ga
+.section sl1gb
+.section sl1ha
+.section sl1hb
+.section sl1ia
+.section sl1ib
+.section sl1ja
+.section sl1jb
+.section sl1ka
+.section sl1kb
+.section sl1la
+.section sl1lb
+.section sl1ma
+.section sl1mb
+.section sl1na
+.section sl1nb
+.section sl1oa
+.section sl1ob
+.section sl1pa
+.section sl1pb
+.section sl1qa
+.section sl1qb
+.section sl1ra
+.section sl1rb
+.section sl1sa
+.section sl1sb
+.section sl1ta
+.section sl1tb
+.section sl1ua
+.section sl1ub
+.section sl1va
+.section sl1vb
+.section sl1wa
+.section sl1wb
+.section sl1xa
+.section sl1xb
+.section sl1ya
+.section sl1yb
+.section sl1za
+.section sl1zb
+.section sl11a
+.section sl11b
+.section sl12a
+.section sl12b
+.section sl13a
+.section sl13b
+.section sl14a
+.section sl14b
+.section sl15a
+.section sl15b
+.section sl16a
+.section sl16b
+.section sl17a
+.section sl17b
+.section sl18a
+.section sl18b
+.section sl19a
+.section sl19b
+.section sl10a
+.section sl10b
+.section sl2aa
+.section sl2ab
+.section sl2ba
+.section sl2bb
+.section sl2ca
+.section sl2cb
+.section sl2da
+.section sl2db
+.section sl2ea
+.section sl2eb
+.section sl2fa
+.section sl2fb
+.section sl2ga
+.section sl2gb
+.section sl2ha
+.section sl2hb
+.section sl2ia
+.section sl2ib
+.section sl2ja
+.section sl2jb
+.section sl2ka
+.section sl2kb
+.section sl2la
+.section sl2lb
+.section sl2ma
+.section sl2mb
+.section sl2na
+.section sl2nb
+.section sl2oa
+.section sl2ob
+.section sl2pa
+.section sl2pb
+.section sl2qa
+.section sl2qb
+.section sl2ra
+.section sl2rb
+.section sl2sa
+.section sl2sb
+.section sl2ta
+.section sl2tb
+.section sl2ua
+.section sl2ub
+.section sl2va
+.section sl2vb
+.section sl2wa
+.section sl2wb
+.section sl2xa
+.section sl2xb
+.section sl2ya
+.section sl2yb
+.section sl2za
+.section sl2zb
+.section sl21a
+.section sl21b
+.section sl22a
+.section sl22b
+.section sl23a
+.section sl23b
+.section sl24a
+.section sl24b
+.section sl25a
+.section sl25b
+.section sl26a
+.section sl26b
+.section sl27a
+.section sl27b
+.section sl28a
+.section sl28b
+.section sl29a
+.section sl29b
+.section sl20a
+.section sl20b
+.section sl3aa
+.section sl3ab
+.section sl3ba
+.section sl3bb
+.section sl3ca
+.section sl3cb
+.section sl3da
+.section sl3db
+.section sl3ea
+.section sl3eb
+.section sl3fa
+.section sl3fb
+.section sl3ga
+.section sl3gb
+.section sl3ha
+.section sl3hb
+.section sl3ia
+.section sl3ib
+.section sl3ja
+.section sl3jb
+.section sl3ka
+.section sl3kb
+.section sl3la
+.section sl3lb
+.section sl3ma
+.section sl3mb
+.section sl3na
+.section sl3nb
+.section sl3oa
+.section sl3ob
+.section sl3pa
+.section sl3pb
+.section sl3qa
+.section sl3qb
+.section sl3ra
+.section sl3rb
+.section sl3sa
+.section sl3sb
+.section sl3ta
+.section sl3tb
+.section sl3ua
+.section sl3ub
+.section sl3va
+.section sl3vb
+.section sl3wa
+.section sl3wb
+.section sl3xa
+.section sl3xb
+.section sl3ya
+.section sl3yb
+.section sl3za
+.section sl3zb
+.section sl31a
+.section sl31b
+.section sl32a
+.section sl32b
+.section sl33a
+.section sl33b
+.section sl34a
+.section sl34b
+.section sl35a
+.section sl35b
+.section sl36a
+.section sl36b
+.section sl37a
+.section sl37b
+.section sl38a
+.section sl38b
+.section sl39a
+.section sl39b
+.section sl30a
+.section sl30b
+.section sl4aa
+.section sl4ab
+.section sl4ba
+.section sl4bb
+.section sl4ca
+.section sl4cb
+.section sl4da
+.section sl4db
+.section sl4ea
+.section sl4eb
+.section sl4fa
+.section sl4fb
+.section sl4ga
+.section sl4gb
+.section sl4ha
+.section sl4hb
+.section sl4ia
+.section sl4ib
+.section sl4ja
+.section sl4jb
+.section sl4ka
+.section sl4kb
+.section sl4la
+.section sl4lb
+.section sl4ma
+.section sl4mb
+.section sl4na
+.section sl4nb
+.section sl4oa
+.section sl4ob
+.section sl4pa
+.section sl4pb
+.section sl4qa
+.section sl4qb
+.section sl4ra
+.section sl4rb
+.section sl4sa
+.section sl4sb
+.section sl4ta
+.section sl4tb
+.section sl4ua
+.section sl4ub
+.section sl4va
+.section sl4vb
+.section sl4wa
+.section sl4wb
+.section sl4xa
+.section sl4xb
+.section sl4ya
+.section sl4yb
+.section sl4za
+.section sl4zb
+.section sl41a
+.section sl41b
+.section sl42a
+.section sl42b
+.section sl43a
+.section sl43b
+.section sl44a
+.section sl44b
+.section sl45a
+.section sl45b
+.section sl46a
+.section sl46b
+.section sl47a
+.section sl47b
+.section sl48a
+.section sl48b
+.section sl49a
+.section sl49b
+.section sl40a
+.section sl40b
+.section sl5aa
+.section sl5ab
+.section sl5ba
+.section sl5bb
+.section sl5ca
+.section sl5cb
+.section sl5da
+.section sl5db
+.section sl5ea
+.section sl5eb
+.section sl5fa
+.section sl5fb
+.section sl5ga
+.section sl5gb
+.section sl5ha
+.section sl5hb
+.section sl5ia
+.section sl5ib
+.section sl5ja
+.section sl5jb
+.section sl5ka
+.section sl5kb
+.section sl5la
+.section sl5lb
+.section sl5ma
+.section sl5mb
+.section sl5na
+.section sl5nb
+.section sl5oa
+.section sl5ob
+.section sl5pa
+.section sl5pb
+.section sl5qa
+.section sl5qb
+.section sl5ra
+.section sl5rb
+.section sl5sa
+.section sl5sb
+.section sl5ta
+.section sl5tb
+.section sl5ua
+.section sl5ub
+.section sl5va
+.section sl5vb
+.section sl5wa
+.section sl5wb
+.section sl5xa
+.section sl5xb
+.section sl5ya
+.section sl5yb
+.section sl5za
+.section sl5zb
+.section sl51a
+.section sl51b
+.section sl52a
+.section sl52b
+.section sl53a
+.section sl53b
+.section sl54a
+.section sl54b
+.section sl55a
+.section sl55b
+.section sl56a
+.section sl56b
+.section sl57a
+.section sl57b
+.section sl58a
+.section sl58b
+.section sl59a
+.section sl59b
+.section sl50a
+.section sl50b
+.section sl6aa
+.section sl6ab
+.section sl6ba
+.section sl6bb
+.section sl6ca
+.section sl6cb
+.section sl6da
+.section sl6db
+.section sl6ea
+.section sl6eb
+.section sl6fa
+.section sl6fb
+.section sl6ga
+.section sl6gb
+.section sl6ha
+.section sl6hb
+.section sl6ia
+.section sl6ib
+.section sl6ja
+.section sl6jb
+.section sl6ka
+.section sl6kb
+.section sl6la
+.section sl6lb
+.section sl6ma
+.section sl6mb
+.section sl6na
+.section sl6nb
+.section sl6oa
+.section sl6ob
+.section sl6pa
+.section sl6pb
+.section sl6qa
+.section sl6qb
+.section sl6ra
+.section sl6rb
+.section sl6sa
+.section sl6sb
+.section sl6ta
+.section sl6tb
+.section sl6ua
+.section sl6ub
+.section sl6va
+.section sl6vb
+.section sl6wa
+.section sl6wb
+.section sl6xa
+.section sl6xb
+.section sl6ya
+.section sl6yb
+.section sl6za
+.section sl6zb
+.section sl61a
+.section sl61b
+.section sl62a
+.section sl62b
+.section sl63a
+.section sl63b
+.section sl64a
+.section sl64b
+.section sl65a
+.section sl65b
+.section sl66a
+.section sl66b
+.section sl67a
+.section sl67b
+.section sl68a
+.section sl68b
+.section sl69a
+.section sl69b
+.section sl60a
+.section sl60b
+.section sl7aa
+.section sl7ab
+.section sl7ba
+.section sl7bb
+.section sl7ca
+.section sl7cb
+.section sl7da
+.section sl7db
+.section sl7ea
+.section sl7eb
+.section sl7fa
+.section sl7fb
+.section sl7ga
+.section sl7gb
+.section sl7ha
+.section sl7hb
+.section sl7ia
+.section sl7ib
+.section sl7ja
+.section sl7jb
+.section sl7ka
+.section sl7kb
+.section sl7la
+.section sl7lb
+.section sl7ma
+.section sl7mb
+.section sl7na
+.section sl7nb
+.section sl7oa
+.section sl7ob
+.section sl7pa
+.section sl7pb
+.section sl7qa
+.section sl7qb
+.section sl7ra
+.section sl7rb
+.section sl7sa
+.section sl7sb
+.section sl7ta
+.section sl7tb
+.section sl7ua
+.section sl7ub
+.section sl7va
+.section sl7vb
+.section sl7wa
+.section sl7wb
+.section sl7xa
+.section sl7xb
+.section sl7ya
+.section sl7yb
+.section sl7za
+.section sl7zb
+.section sl71a
+.section sl71b
+.section sl72a
+.section sl72b
+.section sl73a
+.section sl73b
+.section sl74a
+.section sl74b
+.section sl75a
+.section sl75b
+.section sl76a
+.section sl76b
+.section sl77a
+.section sl77b
+.section sl78a
+.section sl78b
+.section sl79a
+.section sl79b
+.section sl70a
+.section sl70b
+.section sl8aa
+.section sl8ab
+.section sl8ba
+.section sl8bb
+.section sl8ca
+.section sl8cb
+.section sl8da
+.section sl8db
+.section sl8ea
+.section sl8eb
+.section sl8fa
+.section sl8fb
+.section sl8ga
+.section sl8gb
+.section sl8ha
+.section sl8hb
+.section sl8ia
+.section sl8ib
+.section sl8ja
+.section sl8jb
+.section sl8ka
+.section sl8kb
+.section sl8la
+.section sl8lb
+.section sl8ma
+.section sl8mb
+.section sl8na
+.section sl8nb
+.section sl8oa
+.section sl8ob
+.section sl8pa
+.section sl8pb
+.section sl8qa
+.section sl8qb
+.section sl8ra
+.section sl8rb
+.section sl8sa
+.section sl8sb
+.section sl8ta
+.section sl8tb
+.section sl8ua
+.section sl8ub
+.section sl8va
+.section sl8vb
+.section sl8wa
+.section sl8wb
+.section sl8xa
+.section sl8xb
+.section sl8ya
+.section sl8yb
+.section sl8za
+.section sl8zb
+.section sl81a
+.section sl81b
+.section sl82a
+.section sl82b
+.section sl83a
+.section sl83b
+.section sl84a
+.section sl84b
+.section sl85a
+.section sl85b
+.section sl86a
+.section sl86b
+.section sl87a
+.section sl87b
+.section sl88a
+.section sl88b
+.section sl89a
+.section sl89b
+.section sl80a
+.section sl80b
+.section sl9aa
+.section sl9ab
+.section sl9ba
+.section sl9bb
+.section sl9ca
+.section sl9cb
+.section sl9da
+.section sl9db
+.section sl9ea
+.section sl9eb
+.section sl9fa
+.section sl9fb
+.section sl9ga
+.section sl9gb
+.section sl9ha
+.section sl9hb
+.section sl9ia
+.section sl9ib
+.section sl9ja
+.section sl9jb
+.section sl9ka
+.section sl9kb
+.section sl9la
+.section sl9lb
+.section sl9ma
+.section sl9mb
+.section sl9na
+.section sl9nb
+.section sl9oa
+.section sl9ob
+.section sl9pa
+.section sl9pb
+.section sl9qa
+.section sl9qb
+.section sl9ra
+.section sl9rb
+.section sl9sa
+.section sl9sb
+.section sl9ta
+.section sl9tb
+.section sl9ua
+.section sl9ub
+.section sl9va
+.section sl9vb
+.section sl9wa
+.section sl9wb
+.section sl9xa
+.section sl9xb
+.section sl9ya
+.section sl9yb
+.section sl9za
+.section sl9zb
+.section sl91a
+.section sl91b
+.section sl92a
+.section sl92b
+.section sl93a
+.section sl93b
+.section sl94a
+.section sl94b
+.section sl95a
+.section sl95b
+.section sl96a
+.section sl96b
+.section sl97a
+.section sl97b
+.section sl98a
+.section sl98b
+.section sl99a
+.section sl99b
+.section sl90a
+.section sl90b
+.section sl0aa
+.section sl0ab
+.section sl0ba
+.section sl0bb
+.section sl0ca
+.section sl0cb
+.section sl0da
+.section sl0db
+.section sl0ea
+.section sl0eb
+.section sl0fa
+.section sl0fb
+.section sl0ga
+.section sl0gb
+.section sl0ha
+.section sl0hb
+.section sl0ia
+.section sl0ib
+.section sl0ja
+.section sl0jb
+.section sl0ka
+.section sl0kb
+.section sl0la
+.section sl0lb
+.section sl0ma
+.section sl0mb
+.section sl0na
+.section sl0nb
+.section sl0oa
+.section sl0ob
+.section sl0pa
+.section sl0pb
+.section sl0qa
+.section sl0qb
+.section sl0ra
+.section sl0rb
+.section sl0sa
+.section sl0sb
+.section sl0ta
+.section sl0tb
+.section sl0ua
+.section sl0ub
+.section sl0va
+.section sl0vb
+.section sl0wa
+.section sl0wb
+.section sl0xa
+.section sl0xb
+.section sl0ya
+.section sl0yb
+.section sl0za
+.section sl0zb
+.section sl01a
+.section sl01b
+.section sl02a
+.section sl02b
+.section sl03a
+.section sl03b
+.section sl04a
+.section sl04b
+.section sl05a
+.section sl05b
+.section sl06a
+.section sl06b
+.section sl07a
+.section sl07b
+.section sl08a
+.section sl08b
+.section sl09a
+.section sl09b
+.section sl00a
+.section sl00b
+.section smaaa
+.section smaab
+.section smaba
+.section smabb
+.section smaca
+.section smacb
+.section smada
+.section smadb
+.section smaea
+.section smaeb
+.section smafa
+.section smafb
+.section smaga
+.section smagb
+.section smaha
+.section smahb
+.section smaia
+.section smaib
+.section smaja
+.section smajb
+.section smaka
+.section smakb
+.section smala
+.section smalb
+.section smama
+.section smamb
+.section smana
+.section smanb
+.section smaoa
+.section smaob
+.section smapa
+.section smapb
+.section smaqa
+.section smaqb
+.section smara
+.section smarb
+.section smasa
+.section smasb
+.section smata
+.section smatb
+.section smaua
+.section smaub
+.section smava
+.section smavb
+.section smawa
+.section smawb
+.section smaxa
+.section smaxb
+.section smaya
+.section smayb
+.section smaza
+.section smazb
+.section sma1a
+.section sma1b
+.section sma2a
+.section sma2b
+.section sma3a
+.section sma3b
+.section sma4a
+.section sma4b
+.section sma5a
+.section sma5b
+.section sma6a
+.section sma6b
+.section sma7a
+.section sma7b
+.section sma8a
+.section sma8b
+.section sma9a
+.section sma9b
+.section sma0a
+.section sma0b
+.section smbaa
+.section smbab
+.section smbba
+.section smbbb
+.section smbca
+.section smbcb
+.section smbda
+.section smbdb
+.section smbea
+.section smbeb
+.section smbfa
+.section smbfb
+.section smbga
+.section smbgb
+.section smbha
+.section smbhb
+.section smbia
+.section smbib
+.section smbja
+.section smbjb
+.section smbka
+.section smbkb
+.section smbla
+.section smblb
+.section smbma
+.section smbmb
+.section smbna
+.section smbnb
+.section smboa
+.section smbob
+.section smbpa
+.section smbpb
+.section smbqa
+.section smbqb
+.section smbra
+.section smbrb
+.section smbsa
+.section smbsb
+.section smbta
+.section smbtb
+.section smbua
+.section smbub
+.section smbva
+.section smbvb
+.section smbwa
+.section smbwb
+.section smbxa
+.section smbxb
+.section smbya
+.section smbyb
+.section smbza
+.section smbzb
+.section smb1a
+.section smb1b
+.section smb2a
+.section smb2b
+.section smb3a
+.section smb3b
+.section smb4a
+.section smb4b
+.section smb5a
+.section smb5b
+.section smb6a
+.section smb6b
+.section smb7a
+.section smb7b
+.section smb8a
+.section smb8b
+.section smb9a
+.section smb9b
+.section smb0a
+.section smb0b
+.section smcaa
+.section smcab
+.section smcba
+.section smcbb
+.section smcca
+.section smccb
+.section smcda
+.section smcdb
+.section smcea
+.section smceb
+.section smcfa
+.section smcfb
+.section smcga
+.section smcgb
+.section smcha
+.section smchb
+.section smcia
+.section smcib
+.section smcja
+.section smcjb
+.section smcka
+.section smckb
+.section smcla
+.section smclb
+.section smcma
+.section smcmb
+.section smcna
+.section smcnb
+.section smcoa
+.section smcob
+.section smcpa
+.section smcpb
+.section smcqa
+.section smcqb
+.section smcra
+.section smcrb
+.section smcsa
+.section smcsb
+.section smcta
+.section smctb
+.section smcua
+.section smcub
+.section smcva
+.section smcvb
+.section smcwa
+.section smcwb
+.section smcxa
+.section smcxb
+.section smcya
+.section smcyb
+.section smcza
+.section smczb
+.section smc1a
+.section smc1b
+.section smc2a
+.section smc2b
+.section smc3a
+.section smc3b
+.section smc4a
+.section smc4b
+.section smc5a
+.section smc5b
+.section smc6a
+.section smc6b
+.section smc7a
+.section smc7b
+.section smc8a
+.section smc8b
+.section smc9a
+.section smc9b
+.section smc0a
+.section smc0b
+.section smdaa
+.section smdab
+.section smdba
+.section smdbb
+.section smdca
+.section smdcb
+.section smdda
+.section smddb
+.section smdea
+.section smdeb
+.section smdfa
+.section smdfb
+.section smdga
+.section smdgb
+.section smdha
+.section smdhb
+.section smdia
+.section smdib
+.section smdja
+.section smdjb
+.section smdka
+.section smdkb
+.section smdla
+.section smdlb
+.section smdma
+.section smdmb
+.section smdna
+.section smdnb
+.section smdoa
+.section smdob
+.section smdpa
+.section smdpb
+.section smdqa
+.section smdqb
+.section smdra
+.section smdrb
+.section smdsa
+.section smdsb
+.section smdta
+.section smdtb
+.section smdua
+.section smdub
+.section smdva
+.section smdvb
+.section smdwa
+.section smdwb
+.section smdxa
+.section smdxb
+.section smdya
+.section smdyb
+.section smdza
+.section smdzb
+.section smd1a
+.section smd1b
+.section smd2a
+.section smd2b
+.section smd3a
+.section smd3b
+.section smd4a
+.section smd4b
+.section smd5a
+.section smd5b
+.section smd6a
+.section smd6b
+.section smd7a
+.section smd7b
+.section smd8a
+.section smd8b
+.section smd9a
+.section smd9b
+.section smd0a
+.section smd0b
+.section smeaa
+.section smeab
+.section smeba
+.section smebb
+.section smeca
+.section smecb
+.section smeda
+.section smedb
+.section smeea
+.section smeeb
+.section smefa
+.section smefb
+.section smega
+.section smegb
+.section smeha
+.section smehb
+.section smeia
+.section smeib
+.section smeja
+.section smejb
+.section smeka
+.section smekb
+.section smela
+.section smelb
+.section smema
+.section smemb
+.section smena
+.section smenb
+.section smeoa
+.section smeob
+.section smepa
+.section smepb
+.section smeqa
+.section smeqb
+.section smera
+.section smerb
+.section smesa
+.section smesb
+.section smeta
+.section smetb
+.section smeua
+.section smeub
+.section smeva
+.section smevb
+.section smewa
+.section smewb
+.section smexa
+.section smexb
+.section smeya
+.section smeyb
+.section smeza
+.section smezb
+.section sme1a
+.section sme1b
+.section sme2a
+.section sme2b
+.section sme3a
+.section sme3b
+.section sme4a
+.section sme4b
+.section sme5a
+.section sme5b
+.section sme6a
+.section sme6b
+.section sme7a
+.section sme7b
+.section sme8a
+.section sme8b
+.section sme9a
+.section sme9b
+.section sme0a
+.section sme0b
+.section smfaa
+.section smfab
+.section smfba
+.section smfbb
+.section smfca
+.section smfcb
+.section smfda
+.section smfdb
+.section smfea
+.section smfeb
+.section smffa
+.section smffb
+.section smfga
+.section smfgb
+.section smfha
+.section smfhb
+.section smfia
+.section smfib
+.section smfja
+.section smfjb
+.section smfka
+.section smfkb
+.section smfla
+.section smflb
+.section smfma
+.section smfmb
+.section smfna
+.section smfnb
+.section smfoa
+.section smfob
+.section smfpa
+.section smfpb
+.section smfqa
+.section smfqb
+.section smfra
+.section smfrb
+.section smfsa
+.section smfsb
+.section smfta
+.section smftb
+.section smfua
+.section smfub
+.section smfva
+.section smfvb
+.section smfwa
+.section smfwb
+.section smfxa
+.section smfxb
+.section smfya
+.section smfyb
+.section smfza
+.section smfzb
+.section smf1a
+.section smf1b
+.section smf2a
+.section smf2b
+.section smf3a
+.section smf3b
+.section smf4a
+.section smf4b
+.section smf5a
+.section smf5b
+.section smf6a
+.section smf6b
+.section smf7a
+.section smf7b
+.section smf8a
+.section smf8b
+.section smf9a
+.section smf9b
+.section smf0a
+.section smf0b
+.section smgaa
+.section smgab
+.section smgba
+.section smgbb
+.section smgca
+.section smgcb
+.section smgda
+.section smgdb
+.section smgea
+.section smgeb
+.section smgfa
+.section smgfb
+.section smgga
+.section smggb
+.section smgha
+.section smghb
+.section smgia
+.section smgib
+.section smgja
+.section smgjb
+.section smgka
+.section smgkb
+.section smgla
+.section smglb
+.section smgma
+.section smgmb
+.section smgna
+.section smgnb
+.section smgoa
+.section smgob
+.section smgpa
+.section smgpb
+.section smgqa
+.section smgqb
+.section smgra
+.section smgrb
+.section smgsa
+.section smgsb
+.section smgta
+.section smgtb
+.section smgua
+.section smgub
+.section smgva
+.section smgvb
+.section smgwa
+.section smgwb
+.section smgxa
+.section smgxb
+.section smgya
+.section smgyb
+.section smgza
+.section smgzb
+.section smg1a
+.section smg1b
+.section smg2a
+.section smg2b
+.section smg3a
+.section smg3b
+.section smg4a
+.section smg4b
+.section smg5a
+.section smg5b
+.section smg6a
+.section smg6b
+.section smg7a
+.section smg7b
+.section smg8a
+.section smg8b
+.section smg9a
+.section smg9b
+.section smg0a
+.section smg0b
+.section smhaa
+.section smhab
+.section smhba
+.section smhbb
+.section smhca
+.section smhcb
+.section smhda
+.section smhdb
+.section smhea
+.section smheb
+.section smhfa
+.section smhfb
+.section smhga
+.section smhgb
+.section smhha
+.section smhhb
+.section smhia
+.section smhib
+.section smhja
+.section smhjb
+.section smhka
+.section smhkb
+.section smhla
+.section smhlb
+.section smhma
+.section smhmb
+.section smhna
+.section smhnb
+.section smhoa
+.section smhob
+.section smhpa
+.section smhpb
+.section smhqa
+.section smhqb
+.section smhra
+.section smhrb
+.section smhsa
+.section smhsb
+.section smhta
+.section smhtb
+.section smhua
+.section smhub
+.section smhva
+.section smhvb
+.section smhwa
+.section smhwb
+.section smhxa
+.section smhxb
+.section smhya
+.section smhyb
+.section smhza
+.section smhzb
+.section smh1a
+.section smh1b
+.section smh2a
+.section smh2b
+.section smh3a
+.section smh3b
+.section smh4a
+.section smh4b
+.section smh5a
+.section smh5b
+.section smh6a
+.section smh6b
+.section smh7a
+.section smh7b
+.section smh8a
+.section smh8b
+.section smh9a
+.section smh9b
+.section smh0a
+.section smh0b
+.section smiaa
+.section smiab
+.section smiba
+.section smibb
+.section smica
+.section smicb
+.section smida
+.section smidb
+.section smiea
+.section smieb
+.section smifa
+.section smifb
+.section smiga
+.section smigb
+.section smiha
+.section smihb
+.section smiia
+.section smiib
+.section smija
+.section smijb
+.section smika
+.section smikb
+.section smila
+.section smilb
+.section smima
+.section smimb
+.section smina
+.section sminb
+.section smioa
+.section smiob
+.section smipa
+.section smipb
+.section smiqa
+.section smiqb
+.section smira
+.section smirb
+.section smisa
+.section smisb
+.section smita
+.section smitb
+.section smiua
+.section smiub
+.section smiva
+.section smivb
+.section smiwa
+.section smiwb
+.section smixa
+.section smixb
+.section smiya
+.section smiyb
+.section smiza
+.section smizb
+.section smi1a
+.section smi1b
+.section smi2a
+.section smi2b
+.section smi3a
+.section smi3b
+.section smi4a
+.section smi4b
+.section smi5a
+.section smi5b
+.section smi6a
+.section smi6b
+.section smi7a
+.section smi7b
+.section smi8a
+.section smi8b
+.section smi9a
+.section smi9b
+.section smi0a
+.section smi0b
+.section smjaa
+.section smjab
+.section smjba
+.section smjbb
+.section smjca
+.section smjcb
+.section smjda
+.section smjdb
+.section smjea
+.section smjeb
+.section smjfa
+.section smjfb
+.section smjga
+.section smjgb
+.section smjha
+.section smjhb
+.section smjia
+.section smjib
+.section smjja
+.section smjjb
+.section smjka
+.section smjkb
+.section smjla
+.section smjlb
+.section smjma
+.section smjmb
+.section smjna
+.section smjnb
+.section smjoa
+.section smjob
+.section smjpa
+.section smjpb
+.section smjqa
+.section smjqb
+.section smjra
+.section smjrb
+.section smjsa
+.section smjsb
+.section smjta
+.section smjtb
+.section smjua
+.section smjub
+.section smjva
+.section smjvb
+.section smjwa
+.section smjwb
+.section smjxa
+.section smjxb
+.section smjya
+.section smjyb
+.section smjza
+.section smjzb
+.section smj1a
+.section smj1b
+.section smj2a
+.section smj2b
+.section smj3a
+.section smj3b
+.section smj4a
+.section smj4b
+.section smj5a
+.section smj5b
+.section smj6a
+.section smj6b
+.section smj7a
+.section smj7b
+.section smj8a
+.section smj8b
+.section smj9a
+.section smj9b
+.section smj0a
+.section smj0b
+.section smkaa
+.section smkab
+.section smkba
+.section smkbb
+.section smkca
+.section smkcb
+.section smkda
+.section smkdb
+.section smkea
+.section smkeb
+.section smkfa
+.section smkfb
+.section smkga
+.section smkgb
+.section smkha
+.section smkhb
+.section smkia
+.section smkib
+.section smkja
+.section smkjb
+.section smkka
+.section smkkb
+.section smkla
+.section smklb
+.section smkma
+.section smkmb
+.section smkna
+.section smknb
+.section smkoa
+.section smkob
+.section smkpa
+.section smkpb
+.section smkqa
+.section smkqb
+.section smkra
+.section smkrb
+.section smksa
+.section smksb
+.section smkta
+.section smktb
+.section smkua
+.section smkub
+.section smkva
+.section smkvb
+.section smkwa
+.section smkwb
+.section smkxa
+.section smkxb
+.section smkya
+.section smkyb
+.section smkza
+.section smkzb
+.section smk1a
+.section smk1b
+.section smk2a
+.section smk2b
+.section smk3a
+.section smk3b
+.section smk4a
+.section smk4b
+.section smk5a
+.section smk5b
+.section smk6a
+.section smk6b
+.section smk7a
+.section smk7b
+.section smk8a
+.section smk8b
+.section smk9a
+.section smk9b
+.section smk0a
+.section smk0b
+.section smlaa
+.section smlab
+.section smlba
+.section smlbb
+.section smlca
+.section smlcb
+.section smlda
+.section smldb
+.section smlea
+.section smleb
+.section smlfa
+.section smlfb
+.section smlga
+.section smlgb
+.section smlha
+.section smlhb
+.section smlia
+.section smlib
+.section smlja
+.section smljb
+.section smlka
+.section smlkb
+.section smlla
+.section smllb
+.section smlma
+.section smlmb
+.section smlna
+.section smlnb
+.section smloa
+.section smlob
+.section smlpa
+.section smlpb
+.section smlqa
+.section smlqb
+.section smlra
+.section smlrb
+.section smlsa
+.section smlsb
+.section smlta
+.section smltb
+.section smlua
+.section smlub
+.section smlva
+.section smlvb
+.section smlwa
+.section smlwb
+.section smlxa
+.section smlxb
+.section smlya
+.section smlyb
+.section smlza
+.section smlzb
+.section sml1a
+.section sml1b
+.section sml2a
+.section sml2b
+.section sml3a
+.section sml3b
+.section sml4a
+.section sml4b
+.section sml5a
+.section sml5b
+.section sml6a
+.section sml6b
+.section sml7a
+.section sml7b
+.section sml8a
+.section sml8b
+.section sml9a
+.section sml9b
+.section sml0a
+.section sml0b
+.section smmaa
+.section smmab
+.section smmba
+.section smmbb
+.section smmca
+.section smmcb
+.section smmda
+.section smmdb
+.section smmea
+.section smmeb
+.section smmfa
+.section smmfb
+.section smmga
+.section smmgb
+.section smmha
+.section smmhb
+.section smmia
+.section smmib
+.section smmja
+.section smmjb
+.section smmka
+.section smmkb
+.section smmla
+.section smmlb
+.section smmma
+.section smmmb
+.section smmna
+.section smmnb
+.section smmoa
+.section smmob
+.section smmpa
+.section smmpb
+.section smmqa
+.section smmqb
+.section smmra
+.section smmrb
+.section smmsa
+.section smmsb
+.section smmta
+.section smmtb
+.section smmua
+.section smmub
+.section smmva
+.section smmvb
+.section smmwa
+.section smmwb
+.section smmxa
+.section smmxb
+.section smmya
+.section smmyb
+.section smmza
+.section smmzb
+.section smm1a
+.section smm1b
+.section smm2a
+.section smm2b
+.section smm3a
+.section smm3b
+.section smm4a
+.section smm4b
+.section smm5a
+.section smm5b
+.section smm6a
+.section smm6b
+.section smm7a
+.section smm7b
+.section smm8a
+.section smm8b
+.section smm9a
+.section smm9b
+.section smm0a
+.section smm0b
+.section smnaa
+.section smnab
+.section smnba
+.section smnbb
+.section smnca
+.section smncb
+.section smnda
+.section smndb
+.section smnea
+.section smneb
+.section smnfa
+.section smnfb
+.section smnga
+.section smngb
+.section smnha
+.section smnhb
+.section smnia
+.section smnib
+.section smnja
+.section smnjb
+.section smnka
+.section smnkb
+.section smnla
+.section smnlb
+.section smnma
+.section smnmb
+.section smnna
+.section smnnb
+.section smnoa
+.section smnob
+.section smnpa
+.section smnpb
+.section smnqa
+.section smnqb
+.section smnra
+.section smnrb
+.section smnsa
+.section smnsb
+.section smnta
+.section smntb
+.section smnua
+.section smnub
+.section smnva
+.section smnvb
+.section smnwa
+.section smnwb
+.section smnxa
+.section smnxb
+.section smnya
+.section smnyb
+.section smnza
+.section smnzb
+.section smn1a
+.section smn1b
+.section smn2a
+.section smn2b
+.section smn3a
+.section smn3b
+.section smn4a
+.section smn4b
+.section smn5a
+.section smn5b
+.section smn6a
+.section smn6b
+.section smn7a
+.section smn7b
+.section smn8a
+.section smn8b
+.section smn9a
+.section smn9b
+.section smn0a
+.section smn0b
+.section smoaa
+.section smoab
+.section smoba
+.section smobb
+.section smoca
+.section smocb
+.section smoda
+.section smodb
+.section smoea
+.section smoeb
+.section smofa
+.section smofb
+.section smoga
+.section smogb
+.section smoha
+.section smohb
+.section smoia
+.section smoib
+.section smoja
+.section smojb
+.section smoka
+.section smokb
+.section smola
+.section smolb
+.section smoma
+.section smomb
+.section smona
+.section smonb
+.section smooa
+.section smoob
+.section smopa
+.section smopb
+.section smoqa
+.section smoqb
+.section smora
+.section smorb
+.section smosa
+.section smosb
+.section smota
+.section smotb
+.section smoua
+.section smoub
+.section smova
+.section smovb
+.section smowa
+.section smowb
+.section smoxa
+.section smoxb
+.section smoya
+.section smoyb
+.section smoza
+.section smozb
+.section smo1a
+.section smo1b
+.section smo2a
+.section smo2b
+.section smo3a
+.section smo3b
+.section smo4a
+.section smo4b
+.section smo5a
+.section smo5b
+.section smo6a
+.section smo6b
+.section smo7a
+.section smo7b
+.section smo8a
+.section smo8b
+.section smo9a
+.section smo9b
+.section smo0a
+.section smo0b
+.section smpaa
+.section smpab
+.section smpba
+.section smpbb
+.section smpca
+.section smpcb
+.section smpda
+.section smpdb
+.section smpea
+.section smpeb
+.section smpfa
+.section smpfb
+.section smpga
+.section smpgb
+.section smpha
+.section smphb
+.section smpia
+.section smpib
+.section smpja
+.section smpjb
+.section smpka
+.section smpkb
+.section smpla
+.section smplb
+.section smpma
+.section smpmb
+.section smpna
+.section smpnb
+.section smpoa
+.section smpob
+.section smppa
+.section smppb
+.section smpqa
+.section smpqb
+.section smpra
+.section smprb
+.section smpsa
+.section smpsb
+.section smpta
+.section smptb
+.section smpua
+.section smpub
+.section smpva
+.section smpvb
+.section smpwa
+.section smpwb
+.section smpxa
+.section smpxb
+.section smpya
+.section smpyb
+.section smpza
+.section smpzb
+.section smp1a
+.section smp1b
+.section smp2a
+.section smp2b
+.section smp3a
+.section smp3b
+.section smp4a
+.section smp4b
+.section smp5a
+.section smp5b
+.section smp6a
+.section smp6b
+.section smp7a
+.section smp7b
+.section smp8a
+.section smp8b
+.section smp9a
+.section smp9b
+.section smp0a
+.section smp0b
+.section smqaa
+.section smqab
+.section smqba
+.section smqbb
+.section smqca
+.section smqcb
+.section smqda
+.section smqdb
+.section smqea
+.section smqeb
+.section smqfa
+.section smqfb
+.section smqga
+.section smqgb
+.section smqha
+.section smqhb
+.section smqia
+.section smqib
+.section smqja
+.section smqjb
+.section smqka
+.section smqkb
+.section smqla
+.section smqlb
+.section smqma
+.section smqmb
+.section smqna
+.section smqnb
+.section smqoa
+.section smqob
+.section smqpa
+.section smqpb
+.section smqqa
+.section smqqb
+.section smqra
+.section smqrb
+.section smqsa
+.section smqsb
+.section smqta
+.section smqtb
+.section smqua
+.section smqub
+.section smqva
+.section smqvb
+.section smqwa
+.section smqwb
+.section smqxa
+.section smqxb
+.section smqya
+.section smqyb
+.section smqza
+.section smqzb
+.section smq1a
+.section smq1b
+.section smq2a
+.section smq2b
+.section smq3a
+.section smq3b
+.section smq4a
+.section smq4b
+.section smq5a
+.section smq5b
+.section smq6a
+.section smq6b
+.section smq7a
+.section smq7b
+.section smq8a
+.section smq8b
+.section smq9a
+.section smq9b
+.section smq0a
+.section smq0b
+.section smraa
+.section smrab
+.section smrba
+.section smrbb
+.section smrca
+.section smrcb
+.section smrda
+.section smrdb
+.section smrea
+.section smreb
+.section smrfa
+.section smrfb
+.section smrga
+.section smrgb
+.section smrha
+.section smrhb
+.section smria
+.section smrib
+.section smrja
+.section smrjb
+.section smrka
+.section smrkb
+.section smrla
+.section smrlb
+.section smrma
+.section smrmb
+.section smrna
+.section smrnb
+.section smroa
+.section smrob
+.section smrpa
+.section smrpb
+.section smrqa
+.section smrqb
+.section smrra
+.section smrrb
+.section smrsa
+.section smrsb
+.section smrta
+.section smrtb
+.section smrua
+.section smrub
+.section smrva
+.section smrvb
+.section smrwa
+.section smrwb
+.section smrxa
+.section smrxb
+.section smrya
+.section smryb
+.section smrza
+.section smrzb
+.section smr1a
+.section smr1b
+.section smr2a
+.section smr2b
+.section smr3a
+.section smr3b
+.section smr4a
+.section smr4b
+.section smr5a
+.section smr5b
+.section smr6a
+.section smr6b
+.section smr7a
+.section smr7b
+.section smr8a
+.section smr8b
+.section smr9a
+.section smr9b
+.section smr0a
+.section smr0b
+.section smsaa
+.section smsab
+.section smsba
+.section smsbb
+.section smsca
+.section smscb
+.section smsda
+.section smsdb
+.section smsea
+.section smseb
+.section smsfa
+.section smsfb
+.section smsga
+.section smsgb
+.section smsha
+.section smshb
+.section smsia
+.section smsib
+.section smsja
+.section smsjb
+.section smska
+.section smskb
+.section smsla
+.section smslb
+.section smsma
+.section smsmb
+.section smsna
+.section smsnb
+.section smsoa
+.section smsob
+.section smspa
+.section smspb
+.section smsqa
+.section smsqb
+.section smsra
+.section smsrb
+.section smssa
+.section smssb
+.section smsta
+.section smstb
+.section smsua
+.section smsub
+.section smsva
+.section smsvb
+.section smswa
+.section smswb
+.section smsxa
+.section smsxb
+.section smsya
+.section smsyb
+.section smsza
+.section smszb
+.section sms1a
+.section sms1b
+.section sms2a
+.section sms2b
+.section sms3a
+.section sms3b
+.section sms4a
+.section sms4b
+.section sms5a
+.section sms5b
+.section sms6a
+.section sms6b
+.section sms7a
+.section sms7b
+.section sms8a
+.section sms8b
+.section sms9a
+.section sms9b
+.section sms0a
+.section sms0b
+.section smtaa
+.section smtab
+.section smtba
+.section smtbb
+.section smtca
+.section smtcb
+.section smtda
+.section smtdb
+.section smtea
+.section smteb
+.section smtfa
+.section smtfb
+.section smtga
+.section smtgb
+.section smtha
+.section smthb
+.section smtia
+.section smtib
+.section smtja
+.section smtjb
+.section smtka
+.section smtkb
+.section smtla
+.section smtlb
+.section smtma
+.section smtmb
+.section smtna
+.section smtnb
+.section smtoa
+.section smtob
+.section smtpa
+.section smtpb
+.section smtqa
+.section smtqb
+.section smtra
+.section smtrb
+.section smtsa
+.section smtsb
+.section smtta
+.section smttb
+.section smtua
+.section smtub
+.section smtva
+.section smtvb
+.section smtwa
+.section smtwb
+.section smtxa
+.section smtxb
+.section smtya
+.section smtyb
+.section smtza
+.section smtzb
+.section smt1a
+.section smt1b
+.section smt2a
+.section smt2b
+.section smt3a
+.section smt3b
+.section smt4a
+.section smt4b
+.section smt5a
+.section smt5b
+.section smt6a
+.section smt6b
+.section smt7a
+.section smt7b
+.section smt8a
+.section smt8b
+.section smt9a
+.section smt9b
+.section smt0a
+.section smt0b
+.section smuaa
+.section smuab
+.section smuba
+.section smubb
+.section smuca
+.section smucb
+.section smuda
+.section smudb
+.section smuea
+.section smueb
+.section smufa
+.section smufb
+.section smuga
+.section smugb
+.section smuha
+.section smuhb
+.section smuia
+.section smuib
+.section smuja
+.section smujb
+.section smuka
+.section smukb
+.section smula
+.section smulb
+.section smuma
+.section smumb
+.section smuna
+.section smunb
+.section smuoa
+.section smuob
+.section smupa
+.section smupb
+.section smuqa
+.section smuqb
+.section smura
+.section smurb
+.section smusa
+.section smusb
+.section smuta
+.section smutb
+.section smuua
+.section smuub
+.section smuva
+.section smuvb
+.section smuwa
+.section smuwb
+.section smuxa
+.section smuxb
+.section smuya
+.section smuyb
+.section smuza
+.section smuzb
+.section smu1a
+.section smu1b
+.section smu2a
+.section smu2b
+.section smu3a
+.section smu3b
+.section smu4a
+.section smu4b
+.section smu5a
+.section smu5b
+.section smu6a
+.section smu6b
+.section smu7a
+.section smu7b
+.section smu8a
+.section smu8b
+.section smu9a
+.section smu9b
+.section smu0a
+.section smu0b
+.section smvaa
+.section smvab
+.section smvba
+.section smvbb
+.section smvca
+.section smvcb
+.section smvda
+.section smvdb
+.section smvea
+.section smveb
+.section smvfa
+.section smvfb
+.section smvga
+.section smvgb
+.section smvha
+.section smvhb
+.section smvia
+.section smvib
+.section smvja
+.section smvjb
+.section smvka
+.section smvkb
+.section smvla
+.section smvlb
+.section smvma
+.section smvmb
+.section smvna
+.section smvnb
+.section smvoa
+.section smvob
+.section smvpa
+.section smvpb
+.section smvqa
+.section smvqb
+.section smvra
+.section smvrb
+.section smvsa
+.section smvsb
+.section smvta
+.section smvtb
+.section smvua
+.section smvub
+.section smvva
+.section smvvb
+.section smvwa
+.section smvwb
+.section smvxa
+.section smvxb
+.section smvya
+.section smvyb
+.section smvza
+.section smvzb
+.section smv1a
+.section smv1b
+.section smv2a
+.section smv2b
+.section smv3a
+.section smv3b
+.section smv4a
+.section smv4b
+.section smv5a
+.section smv5b
+.section smv6a
+.section smv6b
+.section smv7a
+.section smv7b
+.section smv8a
+.section smv8b
+.section smv9a
+.section smv9b
+.section smv0a
+.section smv0b
+.section smwaa
+.section smwab
+.section smwba
+.section smwbb
+.section smwca
+.section smwcb
+.section smwda
+.section smwdb
+.section smwea
+.section smweb
+.section smwfa
+.section smwfb
+.section smwga
+.section smwgb
+.section smwha
+.section smwhb
+.section smwia
+.section smwib
+.section smwja
+.section smwjb
+.section smwka
+.section smwkb
+.section smwla
+.section smwlb
+.section smwma
+.section smwmb
+.section smwna
+.section smwnb
+.section smwoa
+.section smwob
+.section smwpa
+.section smwpb
+.section smwqa
+.section smwqb
+.section smwra
+.section smwrb
+.section smwsa
+.section smwsb
+.section smwta
+.section smwtb
+.section smwua
+.section smwub
+.section smwva
+.section smwvb
+.section smwwa
+.section smwwb
+.section smwxa
+.section smwxb
+.section smwya
+.section smwyb
+.section smwza
+.section smwzb
+.section smw1a
+.section smw1b
+.section smw2a
+.section smw2b
+.section smw3a
+.section smw3b
+.section smw4a
+.section smw4b
+.section smw5a
+.section smw5b
+.section smw6a
+.section smw6b
+.section smw7a
+.section smw7b
+.section smw8a
+.section smw8b
+.section smw9a
+.section smw9b
+.section smw0a
+.section smw0b
+.section smxaa
+.section smxab
+.section smxba
+.section smxbb
+.section smxca
+.section smxcb
+.section smxda
+.section smxdb
+.section smxea
+.section smxeb
+.section smxfa
+.section smxfb
+.section smxga
+.section smxgb
+.section smxha
+.section smxhb
+.section smxia
+.section smxib
+.section smxja
+.section smxjb
+.section smxka
+.section smxkb
+.section smxla
+.section smxlb
+.section smxma
+.section smxmb
+.section smxna
+.section smxnb
+.section smxoa
+.section smxob
+.section smxpa
+.section smxpb
+.section smxqa
+.section smxqb
+.section smxra
+.section smxrb
+.section smxsa
+.section smxsb
+.section smxta
+.section smxtb
+.section smxua
+.section smxub
+.section smxva
+.section smxvb
+.section smxwa
+.section smxwb
+.section smxxa
+.section smxxb
+.section smxya
+.section smxyb
+.section smxza
+.section smxzb
+.section smx1a
+.section smx1b
+.section smx2a
+.section smx2b
+.section smx3a
+.section smx3b
+.section smx4a
+.section smx4b
+.section smx5a
+.section smx5b
+.section smx6a
+.section smx6b
+.section smx7a
+.section smx7b
+.section smx8a
+.section smx8b
+.section smx9a
+.section smx9b
+.section smx0a
+.section smx0b
+.section smyaa
+.section smyab
+.section smyba
+.section smybb
+.section smyca
+.section smycb
+.section smyda
+.section smydb
+.section smyea
+.section smyeb
+.section smyfa
+.section smyfb
+.section smyga
+.section smygb
+.section smyha
+.section smyhb
+.section smyia
+.section smyib
+.section smyja
+.section smyjb
+.section smyka
+.section smykb
+.section smyla
+.section smylb
+.section smyma
+.section smymb
+.section smyna
+.section smynb
+.section smyoa
+.section smyob
+.section smypa
+.section smypb
+.section smyqa
+.section smyqb
+.section smyra
+.section smyrb
+.section smysa
+.section smysb
+.section smyta
+.section smytb
+.section smyua
+.section smyub
+.section smyva
+.section smyvb
+.section smywa
+.section smywb
+.section smyxa
+.section smyxb
+.section smyya
+.section smyyb
+.section smyza
+.section smyzb
+.section smy1a
+.section smy1b
+.section smy2a
+.section smy2b
+.section smy3a
+.section smy3b
+.section smy4a
+.section smy4b
+.section smy5a
+.section smy5b
+.section smy6a
+.section smy6b
+.section smy7a
+.section smy7b
+.section smy8a
+.section smy8b
+.section smy9a
+.section smy9b
+.section smy0a
+.section smy0b
+.section smzaa
+.section smzab
+.section smzba
+.section smzbb
+.section smzca
+.section smzcb
+.section smzda
+.section smzdb
+.section smzea
+.section smzeb
+.section smzfa
+.section smzfb
+.section smzga
+.section smzgb
+.section smzha
+.section smzhb
+.section smzia
+.section smzib
+.section smzja
+.section smzjb
+.section smzka
+.section smzkb
+.section smzla
+.section smzlb
+.section smzma
+.section smzmb
+.section smzna
+.section smznb
+.section smzoa
+.section smzob
+.section smzpa
+.section smzpb
+.section smzqa
+.section smzqb
+.section smzra
+.section smzrb
+.section smzsa
+.section smzsb
+.section smzta
+.section smztb
+.section smzua
+.section smzub
+.section smzva
+.section smzvb
+.section smzwa
+.section smzwb
+.section smzxa
+.section smzxb
+.section smzya
+.section smzyb
+.section smzza
+.section smzzb
+.section smz1a
+.section smz1b
+.section smz2a
+.section smz2b
+.section smz3a
+.section smz3b
+.section smz4a
+.section smz4b
+.section smz5a
+.section smz5b
+.section smz6a
+.section smz6b
+.section smz7a
+.section smz7b
+.section smz8a
+.section smz8b
+.section smz9a
+.section smz9b
+.section smz0a
+.section smz0b
+.section sm1aa
+.section sm1ab
+.section sm1ba
+.section sm1bb
+.section sm1ca
+.section sm1cb
+.section sm1da
+.section sm1db
+.section sm1ea
+.section sm1eb
+.section sm1fa
+.section sm1fb
+.section sm1ga
+.section sm1gb
+.section sm1ha
+.section sm1hb
+.section sm1ia
+.section sm1ib
+.section sm1ja
+.section sm1jb
+.section sm1ka
+.section sm1kb
+.section sm1la
+.section sm1lb
+.section sm1ma
+.section sm1mb
+.section sm1na
+.section sm1nb
+.section sm1oa
+.section sm1ob
+.section sm1pa
+.section sm1pb
+.section sm1qa
+.section sm1qb
+.section sm1ra
+.section sm1rb
+.section sm1sa
+.section sm1sb
+.section sm1ta
+.section sm1tb
+.section sm1ua
+.section sm1ub
+.section sm1va
+.section sm1vb
+.section sm1wa
+.section sm1wb
+.section sm1xa
+.section sm1xb
+.section sm1ya
+.section sm1yb
+.section sm1za
+.section sm1zb
+.section sm11a
+.section sm11b
+.section sm12a
+.section sm12b
+.section sm13a
+.section sm13b
+.section sm14a
+.section sm14b
+.section sm15a
+.section sm15b
+.section sm16a
+.section sm16b
+.section sm17a
+.section sm17b
+.section sm18a
+.section sm18b
+.section sm19a
+.section sm19b
+.section sm10a
+.section sm10b
+.section sm2aa
+.section sm2ab
+.section sm2ba
+.section sm2bb
+.section sm2ca
+.section sm2cb
+.section sm2da
+.section sm2db
+.section sm2ea
+.section sm2eb
+.section sm2fa
+.section sm2fb
+.section sm2ga
+.section sm2gb
+.section sm2ha
+.section sm2hb
+.section sm2ia
+.section sm2ib
+.section sm2ja
+.section sm2jb
+.section sm2ka
+.section sm2kb
+.section sm2la
+.section sm2lb
+.section sm2ma
+.section sm2mb
+.section sm2na
+.section sm2nb
+.section sm2oa
+.section sm2ob
+.section sm2pa
+.section sm2pb
+.section sm2qa
+.section sm2qb
+.section sm2ra
+.section sm2rb
+.section sm2sa
+.section sm2sb
+.section sm2ta
+.section sm2tb
+.section sm2ua
+.section sm2ub
+.section sm2va
+.section sm2vb
+.section sm2wa
+.section sm2wb
+.section sm2xa
+.section sm2xb
+.section sm2ya
+.section sm2yb
+.section sm2za
+.section sm2zb
+.section sm21a
+.section sm21b
+.section sm22a
+.section sm22b
+.section sm23a
+.section sm23b
+.section sm24a
+.section sm24b
+.section sm25a
+.section sm25b
+.section sm26a
+.section sm26b
+.section sm27a
+.section sm27b
+.section sm28a
+.section sm28b
+.section sm29a
+.section sm29b
+.section sm20a
+.section sm20b
+.section sm3aa
+.section sm3ab
+.section sm3ba
+.section sm3bb
+.section sm3ca
+.section sm3cb
+.section sm3da
+.section sm3db
+.section sm3ea
+.section sm3eb
+.section sm3fa
+.section sm3fb
+.section sm3ga
+.section sm3gb
+.section sm3ha
+.section sm3hb
+.section sm3ia
+.section sm3ib
+.section sm3ja
+.section sm3jb
+.section sm3ka
+.section sm3kb
+.section sm3la
+.section sm3lb
+.section sm3ma
+.section sm3mb
+.section sm3na
+.section sm3nb
+.section sm3oa
+.section sm3ob
+.section sm3pa
+.section sm3pb
+.section sm3qa
+.section sm3qb
+.section sm3ra
+.section sm3rb
+.section sm3sa
+.section sm3sb
+.section sm3ta
+.section sm3tb
+.section sm3ua
+.section sm3ub
+.section sm3va
+.section sm3vb
+.section sm3wa
+.section sm3wb
+.section sm3xa
+.section sm3xb
+.section sm3ya
+.section sm3yb
+.section sm3za
+.section sm3zb
+.section sm31a
+.section sm31b
+.section sm32a
+.section sm32b
+.section sm33a
+.section sm33b
+.section sm34a
+.section sm34b
+.section sm35a
+.section sm35b
+.section sm36a
+.section sm36b
+.section sm37a
+.section sm37b
+.section sm38a
+.section sm38b
+.section sm39a
+.section sm39b
+.section sm30a
+.section sm30b
+.section sm4aa
+.section sm4ab
+.section sm4ba
+.section sm4bb
+.section sm4ca
+.section sm4cb
+.section sm4da
+.section sm4db
+.section sm4ea
+.section sm4eb
+.section sm4fa
+.section sm4fb
+.section sm4ga
+.section sm4gb
+.section sm4ha
+.section sm4hb
+.section sm4ia
+.section sm4ib
+.section sm4ja
+.section sm4jb
+.section sm4ka
+.section sm4kb
+.section sm4la
+.section sm4lb
+.section sm4ma
+.section sm4mb
+.section sm4na
+.section sm4nb
+.section sm4oa
+.section sm4ob
+.section sm4pa
+.section sm4pb
+.section sm4qa
+.section sm4qb
+.section sm4ra
+.section sm4rb
+.section sm4sa
+.section sm4sb
+.section sm4ta
+.section sm4tb
+.section sm4ua
+.section sm4ub
+.section sm4va
+.section sm4vb
+.section sm4wa
+.section sm4wb
+.section sm4xa
+.section sm4xb
+.section sm4ya
+.section sm4yb
+.section sm4za
+.section sm4zb
+.section sm41a
+.section sm41b
+.section sm42a
+.section sm42b
+.section sm43a
+.section sm43b
+.section sm44a
+.section sm44b
+.section sm45a
+.section sm45b
+.section sm46a
+.section sm46b
+.section sm47a
+.section sm47b
+.section sm48a
+.section sm48b
+.section sm49a
+.section sm49b
+.section sm40a
+.section sm40b
+.section sm5aa
+.section sm5ab
+.section sm5ba
+.section sm5bb
+.section sm5ca
+.section sm5cb
+.section sm5da
+.section sm5db
+.section sm5ea
+.section sm5eb
+.section sm5fa
+.section sm5fb
+.section sm5ga
+.section sm5gb
+.section sm5ha
+.section sm5hb
+.section sm5ia
+.section sm5ib
+.section sm5ja
+.section sm5jb
+.section sm5ka
+.section sm5kb
+.section sm5la
+.section sm5lb
+.section sm5ma
+.section sm5mb
+.section sm5na
+.section sm5nb
+.section sm5oa
+.section sm5ob
+.section sm5pa
+.section sm5pb
+.section sm5qa
+.section sm5qb
+.section sm5ra
+.section sm5rb
+.section sm5sa
+.section sm5sb
+.section sm5ta
+.section sm5tb
+.section sm5ua
+.section sm5ub
+.section sm5va
+.section sm5vb
+.section sm5wa
+.section sm5wb
+.section sm5xa
+.section sm5xb
+.section sm5ya
+.section sm5yb
+.section sm5za
+.section sm5zb
+.section sm51a
+.section sm51b
+.section sm52a
+.section sm52b
+.section sm53a
+.section sm53b
+.section sm54a
+.section sm54b
+.section sm55a
+.section sm55b
+.section sm56a
+.section sm56b
+.section sm57a
+.section sm57b
+.section sm58a
+.section sm58b
+.section sm59a
+.section sm59b
+.section sm50a
+.section sm50b
+.section sm6aa
+.section sm6ab
+.section sm6ba
+.section sm6bb
+.section sm6ca
+.section sm6cb
+.section sm6da
+.section sm6db
+.section sm6ea
+.section sm6eb
+.section sm6fa
+.section sm6fb
+.section sm6ga
+.section sm6gb
+.section sm6ha
+.section sm6hb
+.section sm6ia
+.section sm6ib
+.section sm6ja
+.section sm6jb
+.section sm6ka
+.section sm6kb
+.section sm6la
+.section sm6lb
+.section sm6ma
+.section sm6mb
+.section sm6na
+.section sm6nb
+.section sm6oa
+.section sm6ob
+.section sm6pa
+.section sm6pb
+.section sm6qa
+.section sm6qb
+.section sm6ra
+.section sm6rb
+.section sm6sa
+.section sm6sb
+.section sm6ta
+.section sm6tb
+.section sm6ua
+.section sm6ub
+.section sm6va
+.section sm6vb
+.section sm6wa
+.section sm6wb
+.section sm6xa
+.section sm6xb
+.section sm6ya
+.section sm6yb
+.section sm6za
+.section sm6zb
+.section sm61a
+.section sm61b
+.section sm62a
+.section sm62b
+.section sm63a
+.section sm63b
+.section sm64a
+.section sm64b
+.section sm65a
+.section sm65b
+.section sm66a
+.section sm66b
+.section sm67a
+.section sm67b
+.section sm68a
+.section sm68b
+.section sm69a
+.section sm69b
+.section sm60a
+.section sm60b
+.section sm7aa
+.section sm7ab
+.section sm7ba
+.section sm7bb
+.section sm7ca
+.section sm7cb
+.section sm7da
+.section sm7db
+.section sm7ea
+.section sm7eb
+.section sm7fa
+.section sm7fb
+.section sm7ga
+.section sm7gb
+.section sm7ha
+.section sm7hb
+.section sm7ia
+.section sm7ib
+.section sm7ja
+.section sm7jb
+.section sm7ka
+.section sm7kb
+.section sm7la
+.section sm7lb
+.section sm7ma
+.section sm7mb
+.section sm7na
+.section sm7nb
+.section sm7oa
+.section sm7ob
+.section sm7pa
+.section sm7pb
+.section sm7qa
+.section sm7qb
+.section sm7ra
+.section sm7rb
+.section sm7sa
+.section sm7sb
+.section sm7ta
+.section sm7tb
+.section sm7ua
+.section sm7ub
+.section sm7va
+.section sm7vb
+.section sm7wa
+.section sm7wb
+.section sm7xa
+.section sm7xb
+.section sm7ya
+.section sm7yb
+.section sm7za
+.section sm7zb
+.section sm71a
+.section sm71b
+.section sm72a
+.section sm72b
+.section sm73a
+.section sm73b
+.section sm74a
+.section sm74b
+.section sm75a
+.section sm75b
+.section sm76a
+.section sm76b
+.section sm77a
+.section sm77b
+.section sm78a
+.section sm78b
+.section sm79a
+.section sm79b
+.section sm70a
+.section sm70b
+.section sm8aa
+.section sm8ab
+.section sm8ba
+.section sm8bb
+.section sm8ca
+.section sm8cb
+.section sm8da
+.section sm8db
+.section sm8ea
+.section sm8eb
+.section sm8fa
+.section sm8fb
+.section sm8ga
+.section sm8gb
+.section sm8ha
+.section sm8hb
+.section sm8ia
+.section sm8ib
+.section sm8ja
+.section sm8jb
+.section sm8ka
+.section sm8kb
+.section sm8la
+.section sm8lb
+.section sm8ma
+.section sm8mb
+.section sm8na
+.section sm8nb
+.section sm8oa
+.section sm8ob
+.section sm8pa
+.section sm8pb
+.section sm8qa
+.section sm8qb
+.section sm8ra
+.section sm8rb
+.section sm8sa
+.section sm8sb
+.section sm8ta
+.section sm8tb
+.section sm8ua
+.section sm8ub
+.section sm8va
+.section sm8vb
+.section sm8wa
+.section sm8wb
+.section sm8xa
+.section sm8xb
+.section sm8ya
+.section sm8yb
+.section sm8za
+.section sm8zb
+.section sm81a
+.section sm81b
+.section sm82a
+.section sm82b
+.section sm83a
+.section sm83b
+.section sm84a
+.section sm84b
+.section sm85a
+.section sm85b
+.section sm86a
+.section sm86b
+.section sm87a
+.section sm87b
+.section sm88a
+.section sm88b
+.section sm89a
+.section sm89b
+.section sm80a
+.section sm80b
+.section sm9aa
+.section sm9ab
+.section sm9ba
+.section sm9bb
+.section sm9ca
+.section sm9cb
+.section sm9da
+.section sm9db
+.section sm9ea
+.section sm9eb
+.section sm9fa
+.section sm9fb
+.section sm9ga
+.section sm9gb
+.section sm9ha
+.section sm9hb
+.section sm9ia
+.section sm9ib
+.section sm9ja
+.section sm9jb
+.section sm9ka
+.section sm9kb
+.section sm9la
+.section sm9lb
+.section sm9ma
+.section sm9mb
+.section sm9na
+.section sm9nb
+.section sm9oa
+.section sm9ob
+.section sm9pa
+.section sm9pb
+.section sm9qa
+.section sm9qb
+.section sm9ra
+.section sm9rb
+.section sm9sa
+.section sm9sb
+.section sm9ta
+.section sm9tb
+.section sm9ua
+.section sm9ub
+.section sm9va
+.section sm9vb
+.section sm9wa
+.section sm9wb
+.section sm9xa
+.section sm9xb
+.section sm9ya
+.section sm9yb
+.section sm9za
+.section sm9zb
+.section sm91a
+.section sm91b
+.section sm92a
+.section sm92b
+.section sm93a
+.section sm93b
+.section sm94a
+.section sm94b
+.section sm95a
+.section sm95b
+.section sm96a
+.section sm96b
+.section sm97a
+.section sm97b
+.section sm98a
+.section sm98b
+.section sm99a
+.section sm99b
+.section sm90a
+.section sm90b
+.section sm0aa
+.section sm0ab
+.section sm0ba
+.section sm0bb
+.section sm0ca
+.section sm0cb
+.section sm0da
+.section sm0db
+.section sm0ea
+.section sm0eb
+.section sm0fa
+.section sm0fb
+.section sm0ga
+.section sm0gb
+.section sm0ha
+.section sm0hb
+.section sm0ia
+.section sm0ib
+.section sm0ja
+.section sm0jb
+.section sm0ka
+.section sm0kb
+.section sm0la
+.section sm0lb
+.section sm0ma
+.section sm0mb
+.section sm0na
+.section sm0nb
+.section sm0oa
+.section sm0ob
+.section sm0pa
+.section sm0pb
+.section sm0qa
+.section sm0qb
+.section sm0ra
+.section sm0rb
+.section sm0sa
+.section sm0sb
+.section sm0ta
+.section sm0tb
+.section sm0ua
+.section sm0ub
+.section sm0va
+.section sm0vb
+.section sm0wa
+.section sm0wb
+.section sm0xa
+.section sm0xb
+.section sm0ya
+.section sm0yb
+.section sm0za
+.section sm0zb
+.section sm01a
+.section sm01b
+.section sm02a
+.section sm02b
+.section sm03a
+.section sm03b
+.section sm04a
+.section sm04b
+.section sm05a
+.section sm05b
+.section sm06a
+.section sm06b
+.section sm07a
+.section sm07b
+.section sm08a
+.section sm08b
+.section sm09a
+.section sm09b
+.section sm00a
+.section sm00b
+.section snaaa
+.section snaab
+.section snaba
+.section snabb
+.section snaca
+.section snacb
+.section snada
+.section snadb
+.section snaea
+.section snaeb
+.section snafa
+.section snafb
+.section snaga
+.section snagb
+.section snaha
+.section snahb
+.section snaia
+.section snaib
+.section snaja
+.section snajb
+.section snaka
+.section snakb
+.section snala
+.section snalb
+.section snama
+.section snamb
+.section snana
+.section snanb
+.section snaoa
+.section snaob
+.section snapa
+.section snapb
+.section snaqa
+.section snaqb
+.section snara
+.section snarb
+.section snasa
+.section snasb
+.section snata
+.section snatb
+.section snaua
+.section snaub
+.section snava
+.section snavb
+.section snawa
+.section snawb
+.section snaxa
+.section snaxb
+.section snaya
+.section snayb
+.section snaza
+.section snazb
+.section sna1a
+.section sna1b
+.section sna2a
+.section sna2b
+.section sna3a
+.section sna3b
+.section sna4a
+.section sna4b
+.section sna5a
+.section sna5b
+.section sna6a
+.section sna6b
+.section sna7a
+.section sna7b
+.section sna8a
+.section sna8b
+.section sna9a
+.section sna9b
+.section sna0a
+.section sna0b
+.section snbaa
+.section snbab
+.section snbba
+.section snbbb
+.section snbca
+.section snbcb
+.section snbda
+.section snbdb
+.section snbea
+.section snbeb
+.section snbfa
+.section snbfb
+.section snbga
+.section snbgb
+.section snbha
+.section snbhb
+.section snbia
+.section snbib
+.section snbja
+.section snbjb
+.section snbka
+.section snbkb
+.section snbla
+.section snblb
+.section snbma
+.section snbmb
+.section snbna
+.section snbnb
+.section snboa
+.section snbob
+.section snbpa
+.section snbpb
+.section snbqa
+.section snbqb
+.section snbra
+.section snbrb
+.section snbsa
+.section snbsb
+.section snbta
+.section snbtb
+.section snbua
+.section snbub
+.section snbva
+.section snbvb
+.section snbwa
+.section snbwb
+.section snbxa
+.section snbxb
+.section snbya
+.section snbyb
+.section snbza
+.section snbzb
+.section snb1a
+.section snb1b
+.section snb2a
+.section snb2b
+.section snb3a
+.section snb3b
+.section snb4a
+.section snb4b
+.section snb5a
+.section snb5b
+.section snb6a
+.section snb6b
+.section snb7a
+.section snb7b
+.section snb8a
+.section snb8b
+.section snb9a
+.section snb9b
+.section snb0a
+.section snb0b
+.section sncaa
+.section sncab
+.section sncba
+.section sncbb
+.section sncca
+.section snccb
+.section sncda
+.section sncdb
+.section sncea
+.section snceb
+.section sncfa
+.section sncfb
+.section sncga
+.section sncgb
+.section sncha
+.section snchb
+.section sncia
+.section sncib
+.section sncja
+.section sncjb
+.section sncka
+.section snckb
+.section sncla
+.section snclb
+.section sncma
+.section sncmb
+.section sncna
+.section sncnb
+.section sncoa
+.section sncob
+.section sncpa
+.section sncpb
+.section sncqa
+.section sncqb
+.section sncra
+.section sncrb
+.section sncsa
+.section sncsb
+.section sncta
+.section snctb
+.section sncua
+.section sncub
+.section sncva
+.section sncvb
+.section sncwa
+.section sncwb
+.section sncxa
+.section sncxb
+.section sncya
+.section sncyb
+.section sncza
+.section snczb
+.section snc1a
+.section snc1b
+.section snc2a
+.section snc2b
+.section snc3a
+.section snc3b
+.section snc4a
+.section snc4b
+.section snc5a
+.section snc5b
+.section snc6a
+.section snc6b
+.section snc7a
+.section snc7b
+.section snc8a
+.section snc8b
+.section snc9a
+.section snc9b
+.section snc0a
+.section snc0b
+.section sndaa
+.section sndab
+.section sndba
+.section sndbb
+.section sndca
+.section sndcb
+.section sndda
+.section snddb
+.section sndea
+.section sndeb
+.section sndfa
+.section sndfb
+.section sndga
+.section sndgb
+.section sndha
+.section sndhb
+.section sndia
+.section sndib
+.section sndja
+.section sndjb
+.section sndka
+.section sndkb
+.section sndla
+.section sndlb
+.section sndma
+.section sndmb
+.section sndna
+.section sndnb
+.section sndoa
+.section sndob
+.section sndpa
+.section sndpb
+.section sndqa
+.section sndqb
+.section sndra
+.section sndrb
+.section sndsa
+.section sndsb
+.section sndta
+.section sndtb
+.section sndua
+.section sndub
+.section sndva
+.section sndvb
+.section sndwa
+.section sndwb
+.section sndxa
+.section sndxb
+.section sndya
+.section sndyb
+.section sndza
+.section sndzb
+.section snd1a
+.section snd1b
+.section snd2a
+.section snd2b
+.section snd3a
+.section snd3b
+.section snd4a
+.section snd4b
+.section snd5a
+.section snd5b
+.section snd6a
+.section snd6b
+.section snd7a
+.section snd7b
+.section snd8a
+.section snd8b
+.section snd9a
+.section snd9b
+.section snd0a
+.section snd0b
+.section sneaa
+.section sneab
+.section sneba
+.section snebb
+.section sneca
+.section snecb
+.section sneda
+.section snedb
+.section sneea
+.section sneeb
+.section snefa
+.section snefb
+.section snega
+.section snegb
+.section sneha
+.section snehb
+.section sneia
+.section sneib
+.section sneja
+.section snejb
+.section sneka
+.section snekb
+.section snela
+.section snelb
+.section snema
+.section snemb
+.section snena
+.section snenb
+.section sneoa
+.section sneob
+.section snepa
+.section snepb
+.section sneqa
+.section sneqb
+.section snera
+.section snerb
+.section snesa
+.section snesb
+.section sneta
+.section snetb
+.section sneua
+.section sneub
+.section sneva
+.section snevb
+.section snewa
+.section snewb
+.section snexa
+.section snexb
+.section sneya
+.section sneyb
+.section sneza
+.section snezb
+.section sne1a
+.section sne1b
+.section sne2a
+.section sne2b
+.section sne3a
+.section sne3b
+.section sne4a
+.section sne4b
+.section sne5a
+.section sne5b
+.section sne6a
+.section sne6b
+.section sne7a
+.section sne7b
+.section sne8a
+.section sne8b
+.section sne9a
+.section sne9b
+.section sne0a
+.section sne0b
+.section snfaa
+.section snfab
+.section snfba
+.section snfbb
+.section snfca
+.section snfcb
+.section snfda
+.section snfdb
+.section snfea
+.section snfeb
+.section snffa
+.section snffb
+.section snfga
+.section snfgb
+.section snfha
+.section snfhb
+.section snfia
+.section snfib
+.section snfja
+.section snfjb
+.section snfka
+.section snfkb
+.section snfla
+.section snflb
+.section snfma
+.section snfmb
+.section snfna
+.section snfnb
+.section snfoa
+.section snfob
+.section snfpa
+.section snfpb
+.section snfqa
+.section snfqb
+.section snfra
+.section snfrb
+.section snfsa
+.section snfsb
+.section snfta
+.section snftb
+.section snfua
+.section snfub
+.section snfva
+.section snfvb
+.section snfwa
+.section snfwb
+.section snfxa
+.section snfxb
+.section snfya
+.section snfyb
+.section snfza
+.section snfzb
+.section snf1a
+.section snf1b
+.section snf2a
+.section snf2b
+.section snf3a
+.section snf3b
+.section snf4a
+.section snf4b
+.section snf5a
+.section snf5b
+.section snf6a
+.section snf6b
+.section snf7a
+.section snf7b
+.section snf8a
+.section snf8b
+.section snf9a
+.section snf9b
+.section snf0a
+.section snf0b
+.section sngaa
+.section sngab
+.section sngba
+.section sngbb
+.section sngca
+.section sngcb
+.section sngda
+.section sngdb
+.section sngea
+.section sngeb
+.section sngfa
+.section sngfb
+.section sngga
+.section snggb
+.section sngha
+.section snghb
+.section sngia
+.section sngib
+.section sngja
+.section sngjb
+.section sngka
+.section sngkb
+.section sngla
+.section snglb
+.section sngma
+.section sngmb
+.section sngna
+.section sngnb
+.section sngoa
+.section sngob
+.section sngpa
+.section sngpb
+.section sngqa
+.section sngqb
+.section sngra
+.section sngrb
+.section sngsa
+.section sngsb
+.section sngta
+.section sngtb
+.section sngua
+.section sngub
+.section sngva
+.section sngvb
+.section sngwa
+.section sngwb
+.section sngxa
+.section sngxb
+.section sngya
+.section sngyb
+.section sngza
+.section sngzb
+.section sng1a
+.section sng1b
+.section sng2a
+.section sng2b
+.section sng3a
+.section sng3b
+.section sng4a
+.section sng4b
+.section sng5a
+.section sng5b
+.section sng6a
+.section sng6b
+.section sng7a
+.section sng7b
+.section sng8a
+.section sng8b
+.section sng9a
+.section sng9b
+.section sng0a
+.section sng0b
+.section snhaa
+.section snhab
+.section snhba
+.section snhbb
+.section snhca
+.section snhcb
+.section snhda
+.section snhdb
+.section snhea
+.section snheb
+.section snhfa
+.section snhfb
+.section snhga
+.section snhgb
+.section snhha
+.section snhhb
+.section snhia
+.section snhib
+.section snhja
+.section snhjb
+.section snhka
+.section snhkb
+.section snhla
+.section snhlb
+.section snhma
+.section snhmb
+.section snhna
+.section snhnb
+.section snhoa
+.section snhob
+.section snhpa
+.section snhpb
+.section snhqa
+.section snhqb
+.section snhra
+.section snhrb
+.section snhsa
+.section snhsb
+.section snhta
+.section snhtb
+.section snhua
+.section snhub
+.section snhva
+.section snhvb
+.section snhwa
+.section snhwb
+.section snhxa
+.section snhxb
+.section snhya
+.section snhyb
+.section snhza
+.section snhzb
+.section snh1a
+.section snh1b
+.section snh2a
+.section snh2b
+.section snh3a
+.section snh3b
+.section snh4a
+.section snh4b
+.section snh5a
+.section snh5b
+.section snh6a
+.section snh6b
+.section snh7a
+.section snh7b
+.section snh8a
+.section snh8b
+.section snh9a
+.section snh9b
+.section snh0a
+.section snh0b
+.section sniaa
+.section sniab
+.section sniba
+.section snibb
+.section snica
+.section snicb
+.section snida
+.section snidb
+.section sniea
+.section snieb
+.section snifa
+.section snifb
+.section sniga
+.section snigb
+.section sniha
+.section snihb
+.section sniia
+.section sniib
+.section snija
+.section snijb
+.section snika
+.section snikb
+.section snila
+.section snilb
+.section snima
+.section snimb
+.section snina
+.section sninb
+.section snioa
+.section sniob
+.section snipa
+.section snipb
+.section sniqa
+.section sniqb
+.section snira
+.section snirb
+.section snisa
+.section snisb
+.section snita
+.section snitb
+.section sniua
+.section sniub
+.section sniva
+.section snivb
+.section sniwa
+.section sniwb
+.section snixa
+.section snixb
+.section sniya
+.section sniyb
+.section sniza
+.section snizb
+.section sni1a
+.section sni1b
+.section sni2a
+.section sni2b
+.section sni3a
+.section sni3b
+.section sni4a
+.section sni4b
+.section sni5a
+.section sni5b
+.section sni6a
+.section sni6b
+.section sni7a
+.section sni7b
+.section sni8a
+.section sni8b
+.section sni9a
+.section sni9b
+.section sni0a
+.section sni0b
+.section snjaa
+.section snjab
+.section snjba
+.section snjbb
+.section snjca
+.section snjcb
+.section snjda
+.section snjdb
+.section snjea
+.section snjeb
+.section snjfa
+.section snjfb
+.section snjga
+.section snjgb
+.section snjha
+.section snjhb
+.section snjia
+.section snjib
+.section snjja
+.section snjjb
+.section snjka
+.section snjkb
+.section snjla
+.section snjlb
+.section snjma
+.section snjmb
+.section snjna
+.section snjnb
+.section snjoa
+.section snjob
+.section snjpa
+.section snjpb
+.section snjqa
+.section snjqb
+.section snjra
+.section snjrb
+.section snjsa
+.section snjsb
+.section snjta
+.section snjtb
+.section snjua
+.section snjub
+.section snjva
+.section snjvb
+.section snjwa
+.section snjwb
+.section snjxa
+.section snjxb
+.section snjya
+.section snjyb
+.section snjza
+.section snjzb
+.section snj1a
+.section snj1b
+.section snj2a
+.section snj2b
+.section snj3a
+.section snj3b
+.section snj4a
+.section snj4b
+.section snj5a
+.section snj5b
+.section snj6a
+.section snj6b
+.section snj7a
+.section snj7b
+.section snj8a
+.section snj8b
+.section snj9a
+.section snj9b
+.section snj0a
+.section snj0b
+.section snkaa
+.section snkab
+.section snkba
+.section snkbb
+.section snkca
+.section snkcb
+.section snkda
+.section snkdb
+.section snkea
+.section snkeb
+.section snkfa
+.section snkfb
+.section snkga
+.section snkgb
+.section snkha
+.section snkhb
+.section snkia
+.section snkib
+.section snkja
+.section snkjb
+.section snkka
+.section snkkb
+.section snkla
+.section snklb
+.section snkma
+.section snkmb
+.section snkna
+.section snknb
+.section snkoa
+.section snkob
+.section snkpa
+.section snkpb
+.section snkqa
+.section snkqb
+.section snkra
+.section snkrb
+.section snksa
+.section snksb
+.section snkta
+.section snktb
+.section snkua
+.section snkub
+.section snkva
+.section snkvb
+.section snkwa
+.section snkwb
+.section snkxa
+.section snkxb
+.section snkya
+.section snkyb
+.section snkza
+.section snkzb
+.section snk1a
+.section snk1b
+.section snk2a
+.section snk2b
+.section snk3a
+.section snk3b
+.section snk4a
+.section snk4b
+.section snk5a
+.section snk5b
+.section snk6a
+.section snk6b
+.section snk7a
+.section snk7b
+.section snk8a
+.section snk8b
+.section snk9a
+.section snk9b
+.section snk0a
+.section snk0b
+.section snlaa
+.section snlab
+.section snlba
+.section snlbb
+.section snlca
+.section snlcb
+.section snlda
+.section snldb
+.section snlea
+.section snleb
+.section snlfa
+.section snlfb
+.section snlga
+.section snlgb
+.section snlha
+.section snlhb
+.section snlia
+.section snlib
+.section snlja
+.section snljb
+.section snlka
+.section snlkb
+.section snlla
+.section snllb
+.section snlma
+.section snlmb
+.section snlna
+.section snlnb
+.section snloa
+.section snlob
+.section snlpa
+.section snlpb
+.section snlqa
+.section snlqb
+.section snlra
+.section snlrb
+.section snlsa
+.section snlsb
+.section snlta
+.section snltb
+.section snlua
+.section snlub
+.section snlva
+.section snlvb
+.section snlwa
+.section snlwb
+.section snlxa
+.section snlxb
+.section snlya
+.section snlyb
+.section snlza
+.section snlzb
+.section snl1a
+.section snl1b
+.section snl2a
+.section snl2b
+.section snl3a
+.section snl3b
+.section snl4a
+.section snl4b
+.section snl5a
+.section snl5b
+.section snl6a
+.section snl6b
+.section snl7a
+.section snl7b
+.section snl8a
+.section snl8b
+.section snl9a
+.section snl9b
+.section snl0a
+.section snl0b
+.section snmaa
+.section snmab
+.section snmba
+.section snmbb
+.section snmca
+.section snmcb
+.section snmda
+.section snmdb
+.section snmea
+.section snmeb
+.section snmfa
+.section snmfb
+.section snmga
+.section snmgb
+.section snmha
+.section snmhb
+.section snmia
+.section snmib
+.section snmja
+.section snmjb
+.section snmka
+.section snmkb
+.section snmla
+.section snmlb
+.section snmma
+.section snmmb
+.section snmna
+.section snmnb
+.section snmoa
+.section snmob
+.section snmpa
+.section snmpb
+.section snmqa
+.section snmqb
+.section snmra
+.section snmrb
+.section snmsa
+.section snmsb
+.section snmta
+.section snmtb
+.section snmua
+.section snmub
+.section snmva
+.section snmvb
+.section snmwa
+.section snmwb
+.section snmxa
+.section snmxb
+.section snmya
+.section snmyb
+.section snmza
+.section snmzb
+.section snm1a
+.section snm1b
+.section snm2a
+.section snm2b
+.section snm3a
+.section snm3b
+.section snm4a
+.section snm4b
+.section snm5a
+.section snm5b
+.section snm6a
+.section snm6b
+.section snm7a
+.section snm7b
+.section snm8a
+.section snm8b
+.section snm9a
+.section snm9b
+.section snm0a
+.section snm0b
+.section snnaa
+.section snnab
+.section snnba
+.section snnbb
+.section snnca
+.section snncb
+.section snnda
+.section snndb
+.section snnea
+.section snneb
+.section snnfa
+.section snnfb
+.section snnga
+.section snngb
+.section snnha
+.section snnhb
+.section snnia
+.section snnib
+.section snnja
+.section snnjb
+.section snnka
+.section snnkb
+.section snnla
+.section snnlb
+.section snnma
+.section snnmb
+.section snnna
+.section snnnb
+.section snnoa
+.section snnob
+.section snnpa
+.section snnpb
+.section snnqa
+.section snnqb
+.section snnra
+.section snnrb
+.section snnsa
+.section snnsb
+.section snnta
+.section snntb
+.section snnua
+.section snnub
+.section snnva
+.section snnvb
+.section snnwa
+.section snnwb
+.section snnxa
+.section snnxb
+.section snnya
+.section snnyb
+.section snnza
+.section snnzb
+.section snn1a
+.section snn1b
+.section snn2a
+.section snn2b
+.section snn3a
+.section snn3b
+.section snn4a
+.section snn4b
+.section snn5a
+.section snn5b
+.section snn6a
+.section snn6b
+.section snn7a
+.section snn7b
+.section snn8a
+.section snn8b
+.section snn9a
+.section snn9b
+.section snn0a
+.section snn0b
+.section snoaa
+.section snoab
+.section snoba
+.section snobb
+.section snoca
+.section snocb
+.section snoda
+.section snodb
+.section snoea
+.section snoeb
+.section snofa
+.section snofb
+.section snoga
+.section snogb
+.section snoha
+.section snohb
+.section snoia
+.section snoib
+.section snoja
+.section snojb
+.section snoka
+.section snokb
+.section snola
+.section snolb
+.section snoma
+.section snomb
+.section snona
+.section snonb
+.section snooa
+.section snoob
+.section snopa
+.section snopb
+.section snoqa
+.section snoqb
+.section snora
+.section snorb
+.section snosa
+.section snosb
+.section snota
+.section snotb
+.section snoua
+.section snoub
+.section snova
+.section snovb
+.section snowa
+.section snowb
+.section snoxa
+.section snoxb
+.section snoya
+.section snoyb
+.section snoza
+.section snozb
+.section sno1a
+.section sno1b
+.section sno2a
+.section sno2b
+.section sno3a
+.section sno3b
+.section sno4a
+.section sno4b
+.section sno5a
+.section sno5b
+.section sno6a
+.section sno6b
+.section sno7a
+.section sno7b
+.section sno8a
+.section sno8b
+.section sno9a
+.section sno9b
+.section sno0a
+.section sno0b
+.section snpaa
+.section snpab
+.section snpba
+.section snpbb
+.section snpca
+.section snpcb
+.section snpda
+.section snpdb
+.section snpea
+.section snpeb
+.section snpfa
+.section snpfb
+.section snpga
+.section snpgb
+.section snpha
+.section snphb
+.section snpia
+.section snpib
+.section snpja
+.section snpjb
+.section snpka
+.section snpkb
+.section snpla
+.section snplb
+.section snpma
+.section snpmb
+.section snpna
+.section snpnb
+.section snpoa
+.section snpob
+.section snppa
+.section snppb
+.section snpqa
+.section snpqb
+.section snpra
+.section snprb
+.section snpsa
+.section snpsb
+.section snpta
+.section snptb
+.section snpua
+.section snpub
+.section snpva
+.section snpvb
+.section snpwa
+.section snpwb
+.section snpxa
+.section snpxb
+.section snpya
+.section snpyb
+.section snpza
+.section snpzb
+.section snp1a
+.section snp1b
+.section snp2a
+.section snp2b
+.section snp3a
+.section snp3b
+.section snp4a
+.section snp4b
+.section snp5a
+.section snp5b
+.section snp6a
+.section snp6b
+.section snp7a
+.section snp7b
+.section snp8a
+.section snp8b
+.section snp9a
+.section snp9b
+.section snp0a
+.section snp0b
+.section snqaa
+.section snqab
+.section snqba
+.section snqbb
+.section snqca
+.section snqcb
+.section snqda
+.section snqdb
+.section snqea
+.section snqeb
+.section snqfa
+.section snqfb
+.section snqga
+.section snqgb
+.section snqha
+.section snqhb
+.section snqia
+.section snqib
+.section snqja
+.section snqjb
+.section snqka
+.section snqkb
+.section snqla
+.section snqlb
+.section snqma
+.section snqmb
+.section snqna
+.section snqnb
+.section snqoa
+.section snqob
+.section snqpa
+.section snqpb
+.section snqqa
+.section snqqb
+.section snqra
+.section snqrb
+.section snqsa
+.section snqsb
+.section snqta
+.section snqtb
+.section snqua
+.section snqub
+.section snqva
+.section snqvb
+.section snqwa
+.section snqwb
+.section snqxa
+.section snqxb
+.section snqya
+.section snqyb
+.section snqza
+.section snqzb
+.section snq1a
+.section snq1b
+.section snq2a
+.section snq2b
+.section snq3a
+.section snq3b
+.section snq4a
+.section snq4b
+.section snq5a
+.section snq5b
+.section snq6a
+.section snq6b
+.section snq7a
+.section snq7b
+.section snq8a
+.section snq8b
+.section snq9a
+.section snq9b
+.section snq0a
+.section snq0b
+.section snraa
+.section snrab
+.section snrba
+.section snrbb
+.section snrca
+.section snrcb
+.section snrda
+.section snrdb
+.section snrea
+.section snreb
+.section snrfa
+.section snrfb
+.section snrga
+.section snrgb
+.section snrha
+.section snrhb
+.section snria
+.section snrib
+.section snrja
+.section snrjb
+.section snrka
+.section snrkb
+.section snrla
+.section snrlb
+.section snrma
+.section snrmb
+.section snrna
+.section snrnb
+.section snroa
+.section snrob
+.section snrpa
+.section snrpb
+.section snrqa
+.section snrqb
+.section snrra
+.section snrrb
+.section snrsa
+.section snrsb
+.section snrta
+.section snrtb
+.section snrua
+.section snrub
+.section snrva
+.section snrvb
+.section snrwa
+.section snrwb
+.section snrxa
+.section snrxb
+.section snrya
+.section snryb
+.section snrza
+.section snrzb
+.section snr1a
+.section snr1b
+.section snr2a
+.section snr2b
+.section snr3a
+.section snr3b
+.section snr4a
+.section snr4b
+.section snr5a
+.section snr5b
+.section snr6a
+.section snr6b
+.section snr7a
+.section snr7b
+.section snr8a
+.section snr8b
+.section snr9a
+.section snr9b
+.section snr0a
+.section snr0b
+.section snsaa
+.section snsab
+.section snsba
+.section snsbb
+.section snsca
+.section snscb
+.section snsda
+.section snsdb
+.section snsea
+.section snseb
+.section snsfa
+.section snsfb
+.section snsga
+.section snsgb
+.section snsha
+.section snshb
+.section snsia
+.section snsib
+.section snsja
+.section snsjb
+.section snska
+.section snskb
+.section snsla
+.section snslb
+.section snsma
+.section snsmb
+.section snsna
+.section snsnb
+.section snsoa
+.section snsob
+.section snspa
+.section snspb
+.section snsqa
+.section snsqb
+.section snsra
+.section snsrb
+.section snssa
+.section snssb
+.section snsta
+.section snstb
+.section snsua
+.section snsub
+.section snsva
+.section snsvb
+.section snswa
+.section snswb
+.section snsxa
+.section snsxb
+.section snsya
+.section snsyb
+.section snsza
+.section snszb
+.section sns1a
+.section sns1b
+.section sns2a
+.section sns2b
+.section sns3a
+.section sns3b
+.section sns4a
+.section sns4b
+.section sns5a
+.section sns5b
+.section sns6a
+.section sns6b
+.section sns7a
+.section sns7b
+.section sns8a
+.section sns8b
+.section sns9a
+.section sns9b
+.section sns0a
+.section sns0b
+.section sntaa
+.section sntab
+.section sntba
+.section sntbb
+.section sntca
+.section sntcb
+.section sntda
+.section sntdb
+.section sntea
+.section snteb
+.section sntfa
+.section sntfb
+.section sntga
+.section sntgb
+.section sntha
+.section snthb
+.section sntia
+.section sntib
+.section sntja
+.section sntjb
+.section sntka
+.section sntkb
+.section sntla
+.section sntlb
+.section sntma
+.section sntmb
+.section sntna
+.section sntnb
+.section sntoa
+.section sntob
+.section sntpa
+.section sntpb
+.section sntqa
+.section sntqb
+.section sntra
+.section sntrb
+.section sntsa
+.section sntsb
+.section sntta
+.section snttb
+.section sntua
+.section sntub
+.section sntva
+.section sntvb
+.section sntwa
+.section sntwb
+.section sntxa
+.section sntxb
+.section sntya
+.section sntyb
+.section sntza
+.section sntzb
+.section snt1a
+.section snt1b
+.section snt2a
+.section snt2b
+.section snt3a
+.section snt3b
+.section snt4a
+.section snt4b
+.section snt5a
+.section snt5b
+.section snt6a
+.section snt6b
+.section snt7a
+.section snt7b
+.section snt8a
+.section snt8b
+.section snt9a
+.section snt9b
+.section snt0a
+.section snt0b
+.section snuaa
+.section snuab
+.section snuba
+.section snubb
+.section snuca
+.section snucb
+.section snuda
+.section snudb
+.section snuea
+.section snueb
+.section snufa
+.section snufb
+.section snuga
+.section snugb
+.section snuha
+.section snuhb
+.section snuia
+.section snuib
+.section snuja
+.section snujb
+.section snuka
+.section snukb
+.section snula
+.section snulb
+.section snuma
+.section snumb
+.section snuna
+.section snunb
+.section snuoa
+.section snuob
+.section snupa
+.section snupb
+.section snuqa
+.section snuqb
+.section snura
+.section snurb
+.section snusa
+.section snusb
+.section snuta
+.section snutb
+.section snuua
+.section snuub
+.section snuva
+.section snuvb
+.section snuwa
+.section snuwb
+.section snuxa
+.section snuxb
+.section snuya
+.section snuyb
+.section snuza
+.section snuzb
+.section snu1a
+.section snu1b
+.section snu2a
+.section snu2b
+.section snu3a
+.section snu3b
+.section snu4a
+.section snu4b
+.section snu5a
+.section snu5b
+.section snu6a
+.section snu6b
+.section snu7a
+.section snu7b
+.section snu8a
+.section snu8b
+.section snu9a
+.section snu9b
+.section snu0a
+.section snu0b
+.section snvaa
+.section snvab
+.section snvba
+.section snvbb
+.section snvca
+.section snvcb
+.section snvda
+.section snvdb
+.section snvea
+.section snveb
+.section snvfa
+.section snvfb
+.section snvga
+.section snvgb
+.section snvha
+.section snvhb
+.section snvia
+.section snvib
+.section snvja
+.section snvjb
+.section snvka
+.section snvkb
+.section snvla
+.section snvlb
+.section snvma
+.section snvmb
+.section snvna
+.section snvnb
+.section snvoa
+.section snvob
+.section snvpa
+.section snvpb
+.section snvqa
+.section snvqb
+.section snvra
+.section snvrb
+.section snvsa
+.section snvsb
+.section snvta
+.section snvtb
+.section snvua
+.section snvub
+.section snvva
+.section snvvb
+.section snvwa
+.section snvwb
+.section snvxa
+.section snvxb
+.section snvya
+.section snvyb
+.section snvza
+.section snvzb
+.section snv1a
+.section snv1b
+.section snv2a
+.section snv2b
+.section snv3a
+.section snv3b
+.section snv4a
+.section snv4b
+.section snv5a
+.section snv5b
+.section snv6a
+.section snv6b
+.section snv7a
+.section snv7b
+.section snv8a
+.section snv8b
+.section snv9a
+.section snv9b
+.section snv0a
+.section snv0b
+.section snwaa
+.section snwab
+.section snwba
+.section snwbb
+.section snwca
+.section snwcb
+.section snwda
+.section snwdb
+.section snwea
+.section snweb
+.section snwfa
+.section snwfb
+.section snwga
+.section snwgb
+.section snwha
+.section snwhb
+.section snwia
+.section snwib
+.section snwja
+.section snwjb
+.section snwka
+.section snwkb
+.section snwla
+.section snwlb
+.section snwma
+.section snwmb
+.section snwna
+.section snwnb
+.section snwoa
+.section snwob
+.section snwpa
+.section snwpb
+.section snwqa
+.section snwqb
+.section snwra
+.section snwrb
+.section snwsa
+.section snwsb
+.section snwta
+.section snwtb
+.section snwua
+.section snwub
+.section snwva
+.section snwvb
+.section snwwa
+.section snwwb
+.section snwxa
+.section snwxb
+.section snwya
+.section snwyb
+.section snwza
+.section snwzb
+.section snw1a
+.section snw1b
+.section snw2a
+.section snw2b
+.section snw3a
+.section snw3b
+.section snw4a
+.section snw4b
+.section snw5a
+.section snw5b
+.section snw6a
+.section snw6b
+.section snw7a
+.section snw7b
+.section snw8a
+.section snw8b
+.section snw9a
+.section snw9b
+.section snw0a
+.section snw0b
+.section snxaa
+.section snxab
+.section snxba
+.section snxbb
+.section snxca
+.section snxcb
+.section snxda
+.section snxdb
+.section snxea
+.section snxeb
+.section snxfa
+.section snxfb
+.section snxga
+.section snxgb
+.section snxha
+.section snxhb
+.section snxia
+.section snxib
+.section snxja
+.section snxjb
+.section snxka
+.section snxkb
+.section snxla
+.section snxlb
+.section snxma
+.section snxmb
+.section snxna
+.section snxnb
+.section snxoa
+.section snxob
+.section snxpa
+.section snxpb
+.section snxqa
+.section snxqb
+.section snxra
+.section snxrb
+.section snxsa
+.section snxsb
+.section snxta
+.section snxtb
+.section snxua
+.section snxub
+.section snxva
+.section snxvb
+.section snxwa
+.section snxwb
+.section snxxa
+.section snxxb
+.section snxya
+.section snxyb
+.section snxza
+.section snxzb
+.section snx1a
+.section snx1b
+.section snx2a
+.section snx2b
+.section snx3a
+.section snx3b
+.section snx4a
+.section snx4b
+.section snx5a
+.section snx5b
+.section snx6a
+.section snx6b
+.section snx7a
+.section snx7b
+.section snx8a
+.section snx8b
+.section snx9a
+.section snx9b
+.section snx0a
+.section snx0b
+.section snyaa
+.section snyab
+.section snyba
+.section snybb
+.section snyca
+.section snycb
+.section snyda
+.section snydb
+.section snyea
+.section snyeb
+.section snyfa
+.section snyfb
+.section snyga
+.section snygb
+.section snyha
+.section snyhb
+.section snyia
+.section snyib
+.section snyja
+.section snyjb
+.section snyka
+.section snykb
+.section snyla
+.section snylb
+.section snyma
+.section snymb
+.section snyna
+.section snynb
+.section snyoa
+.section snyob
+.section snypa
+.section snypb
+.section snyqa
+.section snyqb
+.section snyra
+.section snyrb
+.section snysa
+.section snysb
+.section snyta
+.section snytb
+.section snyua
+.section snyub
+.section snyva
+.section snyvb
+.section snywa
+.section snywb
+.section snyxa
+.section snyxb
+.section snyya
+.section snyyb
+.section snyza
+.section snyzb
+.section sny1a
+.section sny1b
+.section sny2a
+.section sny2b
+.section sny3a
+.section sny3b
+.section sny4a
+.section sny4b
+.section sny5a
+.section sny5b
+.section sny6a
+.section sny6b
+.section sny7a
+.section sny7b
+.section sny8a
+.section sny8b
+.section sny9a
+.section sny9b
+.section sny0a
+.section sny0b
+.section snzaa
+.section snzab
+.section snzba
+.section snzbb
+.section snzca
+.section snzcb
+.section snzda
+.section snzdb
+.section snzea
+.section snzeb
+.section snzfa
+.section snzfb
+.section snzga
+.section snzgb
+.section snzha
+.section snzhb
+.section snzia
+.section snzib
+.section snzja
+.section snzjb
+.section snzka
+.section snzkb
+.section snzla
+.section snzlb
+.section snzma
+.section snzmb
+.section snzna
+.section snznb
+.section snzoa
+.section snzob
+.section snzpa
+.section snzpb
+.section snzqa
+.section snzqb
+.section snzra
+.section snzrb
+.section snzsa
+.section snzsb
+.section snzta
+.section snztb
+.section snzua
+.section snzub
+.section snzva
+.section snzvb
+.section snzwa
+.section snzwb
+.section snzxa
+.section snzxb
+.section snzya
+.section snzyb
+.section snzza
+.section snzzb
+.section snz1a
+.section snz1b
+.section snz2a
+.section snz2b
+.section snz3a
+.section snz3b
+.section snz4a
+.section snz4b
+.section snz5a
+.section snz5b
+.section snz6a
+.section snz6b
+.section snz7a
+.section snz7b
+.section snz8a
+.section snz8b
+.section snz9a
+.section snz9b
+.section snz0a
+.section snz0b
+.section sn1aa
+.section sn1ab
+.section sn1ba
+.section sn1bb
+.section sn1ca
+.section sn1cb
+.section sn1da
+.section sn1db
+.section sn1ea
+.section sn1eb
+.section sn1fa
+.section sn1fb
+.section sn1ga
+.section sn1gb
+.section sn1ha
+.section sn1hb
+.section sn1ia
+.section sn1ib
+.section sn1ja
+.section sn1jb
+.section sn1ka
+.section sn1kb
+.section sn1la
+.section sn1lb
+.section sn1ma
+.section sn1mb
+.section sn1na
+.section sn1nb
+.section sn1oa
+.section sn1ob
+.section sn1pa
+.section sn1pb
+.section sn1qa
+.section sn1qb
+.section sn1ra
+.section sn1rb
+.section sn1sa
+.section sn1sb
+.section sn1ta
+.section sn1tb
+.section sn1ua
+.section sn1ub
+.section sn1va
+.section sn1vb
+.section sn1wa
+.section sn1wb
+.section sn1xa
+.section sn1xb
+.section sn1ya
+.section sn1yb
+.section sn1za
+.section sn1zb
+.section sn11a
+.section sn11b
+.section sn12a
+.section sn12b
+.section sn13a
+.section sn13b
+.section sn14a
+.section sn14b
+.section sn15a
+.section sn15b
+.section sn16a
+.section sn16b
+.section sn17a
+.section sn17b
+.section sn18a
+.section sn18b
+.section sn19a
+.section sn19b
+.section sn10a
+.section sn10b
+.section sn2aa
+.section sn2ab
+.section sn2ba
+.section sn2bb
+.section sn2ca
+.section sn2cb
+.section sn2da
+.section sn2db
+.section sn2ea
+.section sn2eb
+.section sn2fa
+.section sn2fb
+.section sn2ga
+.section sn2gb
+.section sn2ha
+.section sn2hb
+.section sn2ia
+.section sn2ib
+.section sn2ja
+.section sn2jb
+.section sn2ka
+.section sn2kb
+.section sn2la
+.section sn2lb
+.section sn2ma
+.section sn2mb
+.section sn2na
+.section sn2nb
+.section sn2oa
+.section sn2ob
+.section sn2pa
+.section sn2pb
+.section sn2qa
+.section sn2qb
+.section sn2ra
+.section sn2rb
+.section sn2sa
+.section sn2sb
+.section sn2ta
+.section sn2tb
+.section sn2ua
+.section sn2ub
+.section sn2va
+.section sn2vb
+.section sn2wa
+.section sn2wb
+.section sn2xa
+.section sn2xb
+.section sn2ya
+.section sn2yb
+.section sn2za
+.section sn2zb
+.section sn21a
+.section sn21b
+.section sn22a
+.section sn22b
+.section sn23a
+.section sn23b
+.section sn24a
+.section sn24b
+.section sn25a
+.section sn25b
+.section sn26a
+.section sn26b
+.section sn27a
+.section sn27b
+.section sn28a
+.section sn28b
+.section sn29a
+.section sn29b
+.section sn20a
+.section sn20b
+.section sn3aa
+.section sn3ab
+.section sn3ba
+.section sn3bb
+.section sn3ca
+.section sn3cb
+.section sn3da
+.section sn3db
+.section sn3ea
+.section sn3eb
+.section sn3fa
+.section sn3fb
+.section sn3ga
+.section sn3gb
+.section sn3ha
+.section sn3hb
+.section sn3ia
+.section sn3ib
+.section sn3ja
+.section sn3jb
+.section sn3ka
+.section sn3kb
+.section sn3la
+.section sn3lb
+.section sn3ma
+.section sn3mb
+.section sn3na
+.section sn3nb
+.section sn3oa
+.section sn3ob
+.section sn3pa
+.section sn3pb
+.section sn3qa
+.section sn3qb
+.section sn3ra
+.section sn3rb
+.section sn3sa
+.section sn3sb
+.section sn3ta
+.section sn3tb
+.section sn3ua
+.section sn3ub
+.section sn3va
+.section sn3vb
+.section sn3wa
+.section sn3wb
+.section sn3xa
+.section sn3xb
+.section sn3ya
+.section sn3yb
+.section sn3za
+.section sn3zb
+.section sn31a
+.section sn31b
+.section sn32a
+.section sn32b
+.section sn33a
+.section sn33b
+.section sn34a
+.section sn34b
+.section sn35a
+.section sn35b
+.section sn36a
+.section sn36b
+.section sn37a
+.section sn37b
+.section sn38a
+.section sn38b
+.section sn39a
+.section sn39b
+.section sn30a
+.section sn30b
+.section sn4aa
+.section sn4ab
+.section sn4ba
+.section sn4bb
+.section sn4ca
+.section sn4cb
+.section sn4da
+.section sn4db
+.section sn4ea
+.section sn4eb
+.section sn4fa
+.section sn4fb
+.section sn4ga
+.section sn4gb
+.section sn4ha
+.section sn4hb
+.section sn4ia
+.section sn4ib
+.section sn4ja
+.section sn4jb
+.section sn4ka
+.section sn4kb
+.section sn4la
+.section sn4lb
+.section sn4ma
+.section sn4mb
+.section sn4na
+.section sn4nb
+.section sn4oa
+.section sn4ob
+.section sn4pa
+.section sn4pb
+.section sn4qa
+.section sn4qb
+.section sn4ra
+.section sn4rb
+.section sn4sa
+.section sn4sb
+.section sn4ta
+.section sn4tb
+.section sn4ua
+.section sn4ub
+.section sn4va
+.section sn4vb
+.section sn4wa
+.section sn4wb
+.section sn4xa
+.section sn4xb
+.section sn4ya
+.section sn4yb
+.section sn4za
+.section sn4zb
+.section sn41a
+.section sn41b
+.section sn42a
+.section sn42b
+.section sn43a
+.section sn43b
+.section sn44a
+.section sn44b
+.section sn45a
+.section sn45b
+.section sn46a
+.section sn46b
+.section sn47a
+.section sn47b
+.section sn48a
+.section sn48b
+.section sn49a
+.section sn49b
+.section sn40a
+.section sn40b
+.section sn5aa
+.section sn5ab
+.section sn5ba
+.section sn5bb
+.section sn5ca
+.section sn5cb
+.section sn5da
+.section sn5db
+.section sn5ea
+.section sn5eb
+.section sn5fa
+.section sn5fb
+.section sn5ga
+.section sn5gb
+.section sn5ha
+.section sn5hb
+.section sn5ia
+.section sn5ib
+.section sn5ja
+.section sn5jb
+.section sn5ka
+.section sn5kb
+.section sn5la
+.section sn5lb
+.section sn5ma
+.section sn5mb
+.section sn5na
+.section sn5nb
+.section sn5oa
+.section sn5ob
+.section sn5pa
+.section sn5pb
+.section sn5qa
+.section sn5qb
+.section sn5ra
+.section sn5rb
+.section sn5sa
+.section sn5sb
+.section sn5ta
+.section sn5tb
+.section sn5ua
+.section sn5ub
+.section sn5va
+.section sn5vb
+.section sn5wa
+.section sn5wb
+.section sn5xa
+.section sn5xb
+.section sn5ya
+.section sn5yb
+.section sn5za
+.section sn5zb
+.section sn51a
+.section sn51b
+.section sn52a
+.section sn52b
+.section sn53a
+.section sn53b
+.section sn54a
+.section sn54b
+.section sn55a
+.section sn55b
+.section sn56a
+.section sn56b
+.section sn57a
+.section sn57b
+.section sn58a
+.section sn58b
+.section sn59a
+.section sn59b
+.section sn50a
+.section sn50b
+.section sn6aa
+.section sn6ab
+.section sn6ba
+.section sn6bb
+.section sn6ca
+.section sn6cb
+.section sn6da
+.section sn6db
+.section sn6ea
+.section sn6eb
+.section sn6fa
+.section sn6fb
+.section sn6ga
+.section sn6gb
+.section sn6ha
+.section sn6hb
+.section sn6ia
+.section sn6ib
+.section sn6ja
+.section sn6jb
+.section sn6ka
+.section sn6kb
+.section sn6la
+.section sn6lb
+.section sn6ma
+.section sn6mb
+.section sn6na
+.section sn6nb
+.section sn6oa
+.section sn6ob
+.section sn6pa
+.section sn6pb
+.section sn6qa
+.section sn6qb
+.section sn6ra
+.section sn6rb
+.section sn6sa
+.section sn6sb
+.section sn6ta
+.section sn6tb
+.section sn6ua
+.section sn6ub
+.section sn6va
+.section sn6vb
+.section sn6wa
+.section sn6wb
+.section sn6xa
+.section sn6xb
+.section sn6ya
+.section sn6yb
+.section sn6za
+.section sn6zb
+.section sn61a
+.section sn61b
+.section sn62a
+.section sn62b
+.section sn63a
+.section sn63b
+.section sn64a
+.section sn64b
+.section sn65a
+.section sn65b
+.section sn66a
+.section sn66b
+.section sn67a
+.section sn67b
+.section sn68a
+.section sn68b
+.section sn69a
+.section sn69b
+.section sn60a
+.section sn60b
+.section sn7aa
+.section sn7ab
+.section sn7ba
+.section sn7bb
+.section sn7ca
+.section sn7cb
+.section sn7da
+.section sn7db
+.section sn7ea
+.section sn7eb
+.section sn7fa
+.section sn7fb
+.section sn7ga
+.section sn7gb
+.section sn7ha
+.section sn7hb
+.section sn7ia
+.section sn7ib
+.section sn7ja
+.section sn7jb
+.section sn7ka
+.section sn7kb
+.section sn7la
+.section sn7lb
+.section sn7ma
+.section sn7mb
+.section sn7na
+.section sn7nb
+.section sn7oa
+.section sn7ob
+.section sn7pa
+.section sn7pb
+.section sn7qa
+.section sn7qb
+.section sn7ra
+.section sn7rb
+.section sn7sa
+.section sn7sb
+.section sn7ta
+.section sn7tb
+.section sn7ua
+.section sn7ub
+.section sn7va
+.section sn7vb
+.section sn7wa
+.section sn7wb
+.section sn7xa
+.section sn7xb
+.section sn7ya
+.section sn7yb
+.section sn7za
+.section sn7zb
+.section sn71a
+.section sn71b
+.section sn72a
+.section sn72b
+.section sn73a
+.section sn73b
+.section sn74a
+.section sn74b
+.section sn75a
+.section sn75b
+.section sn76a
+.section sn76b
+.section sn77a
+.section sn77b
+.section sn78a
+.section sn78b
+.section sn79a
+.section sn79b
+.section sn70a
+.section sn70b
+.section sn8aa
+.section sn8ab
+.section sn8ba
+.section sn8bb
+.section sn8ca
+.section sn8cb
+.section sn8da
+.section sn8db
+.section sn8ea
+.section sn8eb
+.section sn8fa
+.section sn8fb
+.section sn8ga
+.section sn8gb
+.section sn8ha
+.section sn8hb
+.section sn8ia
+.section sn8ib
+.section sn8ja
+.section sn8jb
+.section sn8ka
+.section sn8kb
+.section sn8la
+.section sn8lb
+.section sn8ma
+.section sn8mb
+.section sn8na
+.section sn8nb
+.section sn8oa
+.section sn8ob
+.section sn8pa
+.section sn8pb
+.section sn8qa
+.section sn8qb
+.section sn8ra
+.section sn8rb
+.section sn8sa
+.section sn8sb
+.section sn8ta
+.section sn8tb
+.section sn8ua
+.section sn8ub
+.section sn8va
+.section sn8vb
+.section sn8wa
+.section sn8wb
+.section sn8xa
+.section sn8xb
+.section sn8ya
+.section sn8yb
+.section sn8za
+.section sn8zb
+.section sn81a
+.section sn81b
+.section sn82a
+.section sn82b
+.section sn83a
+.section sn83b
+.section sn84a
+.section sn84b
+.section sn85a
+.section sn85b
+.section sn86a
+.section sn86b
+.section sn87a
+.section sn87b
+.section sn88a
+.section sn88b
+.section sn89a
+.section sn89b
+.section sn80a
+.section sn80b
+.section sn9aa
+.section sn9ab
+.section sn9ba
+.section sn9bb
+.section sn9ca
+.section sn9cb
+.section sn9da
+.section sn9db
+.section sn9ea
+.section sn9eb
+.section sn9fa
+.section sn9fb
+.section sn9ga
+.section sn9gb
+.section sn9ha
+.section sn9hb
+.section sn9ia
+.section sn9ib
+.section sn9ja
+.section sn9jb
+.section sn9ka
+.section sn9kb
+.section sn9la
+.section sn9lb
+.section sn9ma
+.section sn9mb
+.section sn9na
+.section sn9nb
+.section sn9oa
+.section sn9ob
+.section sn9pa
+.section sn9pb
+.section sn9qa
+.section sn9qb
+.section sn9ra
+.section sn9rb
+.section sn9sa
+.section sn9sb
+.section sn9ta
+.section sn9tb
+.section sn9ua
+.section sn9ub
+.section sn9va
+.section sn9vb
+.section sn9wa
+.section sn9wb
+.section sn9xa
+.section sn9xb
+.section sn9ya
+.section sn9yb
+.section sn9za
+.section sn9zb
+.section sn91a
+.section sn91b
+.section sn92a
+.section sn92b
+.section sn93a
+.section sn93b
+.section sn94a
+.section sn94b
+.section sn95a
+.section sn95b
+.section sn96a
+.section sn96b
+.section sn97a
+.section sn97b
+.section sn98a
+.section sn98b
+.section sn99a
+.section sn99b
+.section sn90a
+.section sn90b
+.section sn0aa
+.section sn0ab
+.section sn0ba
+.section sn0bb
+.section sn0ca
+.section sn0cb
+.section sn0da
+.section sn0db
+.section sn0ea
+.section sn0eb
+.section sn0fa
+.section sn0fb
+.section sn0ga
+.section sn0gb
+.section sn0ha
+.section sn0hb
+.section sn0ia
+.section sn0ib
+.section sn0ja
+.section sn0jb
+.section sn0ka
+.section sn0kb
+.section sn0la
+.section sn0lb
+.section sn0ma
+.section sn0mb
+.section sn0na
+.section sn0nb
+.section sn0oa
+.section sn0ob
+.section sn0pa
+.section sn0pb
+.section sn0qa
+.section sn0qb
+.section sn0ra
+.section sn0rb
+.section sn0sa
+.section sn0sb
+.section sn0ta
+.section sn0tb
+.section sn0ua
+.section sn0ub
+.section sn0va
+.section sn0vb
+.section sn0wa
+.section sn0wb
+.section sn0xa
+.section sn0xb
+.section sn0ya
+.section sn0yb
+.section sn0za
+.section sn0zb
+.section sn01a
+.section sn01b
+.section sn02a
+.section sn02b
+.section sn03a
+.section sn03b
+.section sn04a
+.section sn04b
+.section sn05a
+.section sn05b
+.section sn06a
+.section sn06b
+.section sn07a
+.section sn07b
+.section sn08a
+.section sn08b
+.section sn09a
+.section sn09b
+.section sn00a
+.section sn00b
+.section soaaa
+.section soaab
+.section soaba
+.section soabb
+.section soaca
+.section soacb
+.section soada
+.section soadb
+.section soaea
+.section soaeb
+.section soafa
+.section soafb
+.section soaga
+.section soagb
+.section soaha
+.section soahb
+.section soaia
+.section soaib
+.section soaja
+.section soajb
+.section soaka
+.section soakb
+.section soala
+.section soalb
+.section soama
+.section soamb
+.section soana
+.section soanb
+.section soaoa
+.section soaob
+.section soapa
+.section soapb
+.section soaqa
+.section soaqb
+.section soara
+.section soarb
+.section soasa
+.section soasb
+.section soata
+.section soatb
+.section soaua
+.section soaub
+.section soava
+.section soavb
+.section soawa
+.section soawb
+.section soaxa
+.section soaxb
+.section soaya
+.section soayb
+.section soaza
+.section soazb
+.section soa1a
+.section soa1b
+.section soa2a
+.section soa2b
+.section soa3a
+.section soa3b
+.section soa4a
+.section soa4b
+.section soa5a
+.section soa5b
+.section soa6a
+.section soa6b
+.section soa7a
+.section soa7b
+.section soa8a
+.section soa8b
+.section soa9a
+.section soa9b
+.section soa0a
+.section soa0b
+.section sobaa
+.section sobab
+.section sobba
+.section sobbb
+.section sobca
+.section sobcb
+.section sobda
+.section sobdb
+.section sobea
+.section sobeb
+.section sobfa
+.section sobfb
+.section sobga
+.section sobgb
+.section sobha
+.section sobhb
+.section sobia
+.section sobib
+.section sobja
+.section sobjb
+.section sobka
+.section sobkb
+.section sobla
+.section soblb
+.section sobma
+.section sobmb
+.section sobna
+.section sobnb
+.section soboa
+.section sobob
+.section sobpa
+.section sobpb
+.section sobqa
+.section sobqb
+.section sobra
+.section sobrb
+.section sobsa
+.section sobsb
+.section sobta
+.section sobtb
+.section sobua
+.section sobub
+.section sobva
+.section sobvb
+.section sobwa
+.section sobwb
+.section sobxa
+.section sobxb
+.section sobya
+.section sobyb
+.section sobza
+.section sobzb
+.section sob1a
+.section sob1b
+.section sob2a
+.section sob2b
+.section sob3a
+.section sob3b
+.section sob4a
+.section sob4b
+.section sob5a
+.section sob5b
+.section sob6a
+.section sob6b
+.section sob7a
+.section sob7b
+.section sob8a
+.section sob8b
+.section sob9a
+.section sob9b
+.section sob0a
+.section sob0b
+.section socaa
+.section socab
+.section socba
+.section socbb
+.section socca
+.section soccb
+.section socda
+.section socdb
+.section socea
+.section soceb
+.section socfa
+.section socfb
+.section socga
+.section socgb
+.section socha
+.section sochb
+.section socia
+.section socib
+.section socja
+.section socjb
+.section socka
+.section sockb
+.section socla
+.section soclb
+.section socma
+.section socmb
+.section socna
+.section socnb
+.section socoa
+.section socob
+.section socpa
+.section socpb
+.section socqa
+.section socqb
+.section socra
+.section socrb
+.section socsa
+.section socsb
+.section socta
+.section soctb
+.section socua
+.section socub
+.section socva
+.section socvb
+.section socwa
+.section socwb
+.section socxa
+.section socxb
+.section socya
+.section socyb
+.section socza
+.section soczb
+.section soc1a
+.section soc1b
+.section soc2a
+.section soc2b
+.section soc3a
+.section soc3b
+.section soc4a
+.section soc4b
+.section soc5a
+.section soc5b
+.section soc6a
+.section soc6b
+.section soc7a
+.section soc7b
+.section soc8a
+.section soc8b
+.section soc9a
+.section soc9b
+.section soc0a
+.section soc0b
+.section sodaa
+.section sodab
+.section sodba
+.section sodbb
+.section sodca
+.section sodcb
+.section sodda
+.section soddb
+.section sodea
+.section sodeb
+.section sodfa
+.section sodfb
+.section sodga
+.section sodgb
+.section sodha
+.section sodhb
+.section sodia
+.section sodib
+.section sodja
+.section sodjb
+.section sodka
+.section sodkb
+.section sodla
+.section sodlb
+.section sodma
+.section sodmb
+.section sodna
+.section sodnb
+.section sodoa
+.section sodob
+.section sodpa
+.section sodpb
+.section sodqa
+.section sodqb
+.section sodra
+.section sodrb
+.section sodsa
+.section sodsb
+.section sodta
+.section sodtb
+.section sodua
+.section sodub
+.section sodva
+.section sodvb
+.section sodwa
+.section sodwb
+.section sodxa
+.section sodxb
+.section sodya
+.section sodyb
+.section sodza
+.section sodzb
+.section sod1a
+.section sod1b
+.section sod2a
+.section sod2b
+.section sod3a
+.section sod3b
+.section sod4a
+.section sod4b
+.section sod5a
+.section sod5b
+.section sod6a
+.section sod6b
+.section sod7a
+.section sod7b
+.section sod8a
+.section sod8b
+.section sod9a
+.section sod9b
+.section sod0a
+.section sod0b
+.section soeaa
+.section soeab
+.section soeba
+.section soebb
+.section soeca
+.section soecb
+.section soeda
+.section soedb
+.section soeea
+.section soeeb
+.section soefa
+.section soefb
+.section soega
+.section soegb
+.section soeha
+.section soehb
+.section soeia
+.section soeib
+.section soeja
+.section soejb
+.section soeka
+.section soekb
+.section soela
+.section soelb
+.section soema
+.section soemb
+.section soena
+.section soenb
+.section soeoa
+.section soeob
+.section soepa
+.section soepb
+.section soeqa
+.section soeqb
+.section soera
+.section soerb
+.section soesa
+.section soesb
+.section soeta
+.section soetb
+.section soeua
+.section soeub
+.section soeva
+.section soevb
+.section soewa
+.section soewb
+.section soexa
+.section soexb
+.section soeya
+.section soeyb
+.section soeza
+.section soezb
+.section soe1a
+.section soe1b
+.section soe2a
+.section soe2b
+.section soe3a
+.section soe3b
+.section soe4a
+.section soe4b
+.section soe5a
+.section soe5b
+.section soe6a
+.section soe6b
+.section soe7a
+.section soe7b
+.section soe8a
+.section soe8b
+.section soe9a
+.section soe9b
+.section soe0a
+.section soe0b
+.section sofaa
+.section sofab
+.section sofba
+.section sofbb
+.section sofca
+.section sofcb
+.section sofda
+.section sofdb
+.section sofea
+.section sofeb
+.section soffa
+.section soffb
+.section sofga
+.section sofgb
+.section sofha
+.section sofhb
+.section sofia
+.section sofib
+.section sofja
+.section sofjb
+.section sofka
+.section sofkb
+.section sofla
+.section soflb
+.section sofma
+.section sofmb
+.section sofna
+.section sofnb
+.section sofoa
+.section sofob
+.section sofpa
+.section sofpb
+.section sofqa
+.section sofqb
+.section sofra
+.section sofrb
+.section sofsa
+.section sofsb
+.section softa
+.section softb
+.section sofua
+.section sofub
+.section sofva
+.section sofvb
+.section sofwa
+.section sofwb
+.section sofxa
+.section sofxb
+.section sofya
+.section sofyb
+.section sofza
+.section sofzb
+.section sof1a
+.section sof1b
+.section sof2a
+.section sof2b
+.section sof3a
+.section sof3b
+.section sof4a
+.section sof4b
+.section sof5a
+.section sof5b
+.section sof6a
+.section sof6b
+.section sof7a
+.section sof7b
+.section sof8a
+.section sof8b
+.section sof9a
+.section sof9b
+.section sof0a
+.section sof0b
+.section sogaa
+.section sogab
+.section sogba
+.section sogbb
+.section sogca
+.section sogcb
+.section sogda
+.section sogdb
+.section sogea
+.section sogeb
+.section sogfa
+.section sogfb
+.section sogga
+.section soggb
+.section sogha
+.section soghb
+.section sogia
+.section sogib
+.section sogja
+.section sogjb
+.section sogka
+.section sogkb
+.section sogla
+.section soglb
+.section sogma
+.section sogmb
+.section sogna
+.section sognb
+.section sogoa
+.section sogob
+.section sogpa
+.section sogpb
+.section sogqa
+.section sogqb
+.section sogra
+.section sogrb
+.section sogsa
+.section sogsb
+.section sogta
+.section sogtb
+.section sogua
+.section sogub
+.section sogva
+.section sogvb
+.section sogwa
+.section sogwb
+.section sogxa
+.section sogxb
+.section sogya
+.section sogyb
+.section sogza
+.section sogzb
+.section sog1a
+.section sog1b
+.section sog2a
+.section sog2b
+.section sog3a
+.section sog3b
+.section sog4a
+.section sog4b
+.section sog5a
+.section sog5b
+.section sog6a
+.section sog6b
+.section sog7a
+.section sog7b
+.section sog8a
+.section sog8b
+.section sog9a
+.section sog9b
+.section sog0a
+.section sog0b
+.section sohaa
+.section sohab
+.section sohba
+.section sohbb
+.section sohca
+.section sohcb
+.section sohda
+.section sohdb
+.section sohea
+.section soheb
+.section sohfa
+.section sohfb
+.section sohga
+.section sohgb
+.section sohha
+.section sohhb
+.section sohia
+.section sohib
+.section sohja
+.section sohjb
+.section sohka
+.section sohkb
+.section sohla
+.section sohlb
+.section sohma
+.section sohmb
+.section sohna
+.section sohnb
+.section sohoa
+.section sohob
+.section sohpa
+.section sohpb
+.section sohqa
+.section sohqb
+.section sohra
+.section sohrb
+.section sohsa
+.section sohsb
+.section sohta
+.section sohtb
+.section sohua
+.section sohub
+.section sohva
+.section sohvb
+.section sohwa
+.section sohwb
+.section sohxa
+.section sohxb
+.section sohya
+.section sohyb
+.section sohza
+.section sohzb
+.section soh1a
+.section soh1b
+.section soh2a
+.section soh2b
+.section soh3a
+.section soh3b
+.section soh4a
+.section soh4b
+.section soh5a
+.section soh5b
+.section soh6a
+.section soh6b
+.section soh7a
+.section soh7b
+.section soh8a
+.section soh8b
+.section soh9a
+.section soh9b
+.section soh0a
+.section soh0b
+.section soiaa
+.section soiab
+.section soiba
+.section soibb
+.section soica
+.section soicb
+.section soida
+.section soidb
+.section soiea
+.section soieb
+.section soifa
+.section soifb
+.section soiga
+.section soigb
+.section soiha
+.section soihb
+.section soiia
+.section soiib
+.section soija
+.section soijb
+.section soika
+.section soikb
+.section soila
+.section soilb
+.section soima
+.section soimb
+.section soina
+.section soinb
+.section soioa
+.section soiob
+.section soipa
+.section soipb
+.section soiqa
+.section soiqb
+.section soira
+.section soirb
+.section soisa
+.section soisb
+.section soita
+.section soitb
+.section soiua
+.section soiub
+.section soiva
+.section soivb
+.section soiwa
+.section soiwb
+.section soixa
+.section soixb
+.section soiya
+.section soiyb
+.section soiza
+.section soizb
+.section soi1a
+.section soi1b
+.section soi2a
+.section soi2b
+.section soi3a
+.section soi3b
+.section soi4a
+.section soi4b
+.section soi5a
+.section soi5b
+.section soi6a
+.section soi6b
+.section soi7a
+.section soi7b
+.section soi8a
+.section soi8b
+.section soi9a
+.section soi9b
+.section soi0a
+.section soi0b
+.section sojaa
+.section sojab
+.section sojba
+.section sojbb
+.section sojca
+.section sojcb
+.section sojda
+.section sojdb
+.section sojea
+.section sojeb
+.section sojfa
+.section sojfb
+.section sojga
+.section sojgb
+.section sojha
+.section sojhb
+.section sojia
+.section sojib
+.section sojja
+.section sojjb
+.section sojka
+.section sojkb
+.section sojla
+.section sojlb
+.section sojma
+.section sojmb
+.section sojna
+.section sojnb
+.section sojoa
+.section sojob
+.section sojpa
+.section sojpb
+.section sojqa
+.section sojqb
+.section sojra
+.section sojrb
+.section sojsa
+.section sojsb
+.section sojta
+.section sojtb
+.section sojua
+.section sojub
+.section sojva
+.section sojvb
+.section sojwa
+.section sojwb
+.section sojxa
+.section sojxb
+.section sojya
+.section sojyb
+.section sojza
+.section sojzb
+.section soj1a
+.section soj1b
+.section soj2a
+.section soj2b
+.section soj3a
+.section soj3b
+.section soj4a
+.section soj4b
+.section soj5a
+.section soj5b
+.section soj6a
+.section soj6b
+.section soj7a
+.section soj7b
+.section soj8a
+.section soj8b
+.section soj9a
+.section soj9b
+.section soj0a
+.section soj0b
+.section sokaa
+.section sokab
+.section sokba
+.section sokbb
+.section sokca
+.section sokcb
+.section sokda
+.section sokdb
+.section sokea
+.section sokeb
+.section sokfa
+.section sokfb
+.section sokga
+.section sokgb
+.section sokha
+.section sokhb
+.section sokia
+.section sokib
+.section sokja
+.section sokjb
+.section sokka
+.section sokkb
+.section sokla
+.section soklb
+.section sokma
+.section sokmb
+.section sokna
+.section soknb
+.section sokoa
+.section sokob
+.section sokpa
+.section sokpb
+.section sokqa
+.section sokqb
+.section sokra
+.section sokrb
+.section soksa
+.section soksb
+.section sokta
+.section soktb
+.section sokua
+.section sokub
+.section sokva
+.section sokvb
+.section sokwa
+.section sokwb
+.section sokxa
+.section sokxb
+.section sokya
+.section sokyb
+.section sokza
+.section sokzb
+.section sok1a
+.section sok1b
+.section sok2a
+.section sok2b
+.section sok3a
+.section sok3b
+.section sok4a
+.section sok4b
+.section sok5a
+.section sok5b
+.section sok6a
+.section sok6b
+.section sok7a
+.section sok7b
+.section sok8a
+.section sok8b
+.section sok9a
+.section sok9b
+.section sok0a
+.section sok0b
+.section solaa
+.section solab
+.section solba
+.section solbb
+.section solca
+.section solcb
+.section solda
+.section soldb
+.section solea
+.section soleb
+.section solfa
+.section solfb
+.section solga
+.section solgb
+.section solha
+.section solhb
+.section solia
+.section solib
+.section solja
+.section soljb
+.section solka
+.section solkb
+.section solla
+.section sollb
+.section solma
+.section solmb
+.section solna
+.section solnb
+.section soloa
+.section solob
+.section solpa
+.section solpb
+.section solqa
+.section solqb
+.section solra
+.section solrb
+.section solsa
+.section solsb
+.section solta
+.section soltb
+.section solua
+.section solub
+.section solva
+.section solvb
+.section solwa
+.section solwb
+.section solxa
+.section solxb
+.section solya
+.section solyb
+.section solza
+.section solzb
+.section sol1a
+.section sol1b
+.section sol2a
+.section sol2b
+.section sol3a
+.section sol3b
+.section sol4a
+.section sol4b
+.section sol5a
+.section sol5b
+.section sol6a
+.section sol6b
+.section sol7a
+.section sol7b
+.section sol8a
+.section sol8b
+.section sol9a
+.section sol9b
+.section sol0a
+.section sol0b
+.section somaa
+.section somab
+.section somba
+.section sombb
+.section somca
+.section somcb
+.section somda
+.section somdb
+.section somea
+.section someb
+.section somfa
+.section somfb
+.section somga
+.section somgb
+.section somha
+.section somhb
+.section somia
+.section somib
+.section somja
+.section somjb
+.section somka
+.section somkb
+.section somla
+.section somlb
+.section somma
+.section sommb
+.section somna
+.section somnb
+.section somoa
+.section somob
+.section sompa
+.section sompb
+.section somqa
+.section somqb
+.section somra
+.section somrb
+.section somsa
+.section somsb
+.section somta
+.section somtb
+.section somua
+.section somub
+.section somva
+.section somvb
+.section somwa
+.section somwb
+.section somxa
+.section somxb
+.section somya
+.section somyb
+.section somza
+.section somzb
+.section som1a
+.section som1b
+.section som2a
+.section som2b
+.section som3a
+.section som3b
+.section som4a
+.section som4b
+.section som5a
+.section som5b
+.section som6a
+.section som6b
+.section som7a
+.section som7b
+.section som8a
+.section som8b
+.section som9a
+.section som9b
+.section som0a
+.section som0b
+.section sonaa
+.section sonab
+.section sonba
+.section sonbb
+.section sonca
+.section soncb
+.section sonda
+.section sondb
+.section sonea
+.section soneb
+.section sonfa
+.section sonfb
+.section songa
+.section songb
+.section sonha
+.section sonhb
+.section sonia
+.section sonib
+.section sonja
+.section sonjb
+.section sonka
+.section sonkb
+.section sonla
+.section sonlb
+.section sonma
+.section sonmb
+.section sonna
+.section sonnb
+.section sonoa
+.section sonob
+.section sonpa
+.section sonpb
+.section sonqa
+.section sonqb
+.section sonra
+.section sonrb
+.section sonsa
+.section sonsb
+.section sonta
+.section sontb
+.section sonua
+.section sonub
+.section sonva
+.section sonvb
+.section sonwa
+.section sonwb
+.section sonxa
+.section sonxb
+.section sonya
+.section sonyb
+.section sonza
+.section sonzb
+.section son1a
+.section son1b
+.section son2a
+.section son2b
+.section son3a
+.section son3b
+.section son4a
+.section son4b
+.section son5a
+.section son5b
+.section son6a
+.section son6b
+.section son7a
+.section son7b
+.section son8a
+.section son8b
+.section son9a
+.section son9b
+.section son0a
+.section son0b
+.section sooaa
+.section sooab
+.section sooba
+.section soobb
+.section sooca
+.section soocb
+.section sooda
+.section soodb
+.section sooea
+.section sooeb
+.section soofa
+.section soofb
+.section sooga
+.section soogb
+.section sooha
+.section soohb
+.section sooia
+.section sooib
+.section sooja
+.section soojb
+.section sooka
+.section sookb
+.section soola
+.section soolb
+.section sooma
+.section soomb
+.section soona
+.section soonb
+.section soooa
+.section sooob
+.section soopa
+.section soopb
+.section sooqa
+.section sooqb
+.section soora
+.section soorb
+.section soosa
+.section soosb
+.section soota
+.section sootb
+.section sooua
+.section sooub
+.section soova
+.section soovb
+.section soowa
+.section soowb
+.section sooxa
+.section sooxb
+.section sooya
+.section sooyb
+.section sooza
+.section soozb
+.section soo1a
+.section soo1b
+.section soo2a
+.section soo2b
+.section soo3a
+.section soo3b
+.section soo4a
+.section soo4b
+.section soo5a
+.section soo5b
+.section soo6a
+.section soo6b
+.section soo7a
+.section soo7b
+.section soo8a
+.section soo8b
+.section soo9a
+.section soo9b
+.section soo0a
+.section soo0b
+.section sopaa
+.section sopab
+.section sopba
+.section sopbb
+.section sopca
+.section sopcb
+.section sopda
+.section sopdb
+.section sopea
+.section sopeb
+.section sopfa
+.section sopfb
+.section sopga
+.section sopgb
+.section sopha
+.section sophb
+.section sopia
+.section sopib
+.section sopja
+.section sopjb
+.section sopka
+.section sopkb
+.section sopla
+.section soplb
+.section sopma
+.section sopmb
+.section sopna
+.section sopnb
+.section sopoa
+.section sopob
+.section soppa
+.section soppb
+.section sopqa
+.section sopqb
+.section sopra
+.section soprb
+.section sopsa
+.section sopsb
+.section sopta
+.section soptb
+.section sopua
+.section sopub
+.section sopva
+.section sopvb
+.section sopwa
+.section sopwb
+.section sopxa
+.section sopxb
+.section sopya
+.section sopyb
+.section sopza
+.section sopzb
+.section sop1a
+.section sop1b
+.section sop2a
+.section sop2b
+.section sop3a
+.section sop3b
+.section sop4a
+.section sop4b
+.section sop5a
+.section sop5b
+.section sop6a
+.section sop6b
+.section sop7a
+.section sop7b
+.section sop8a
+.section sop8b
+.section sop9a
+.section sop9b
+.section sop0a
+.section sop0b
+.section soqaa
+.section soqab
+.section soqba
+.section soqbb
+.section soqca
+.section soqcb
+.section soqda
+.section soqdb
+.section soqea
+.section soqeb
+.section soqfa
+.section soqfb
+.section soqga
+.section soqgb
+.section soqha
+.section soqhb
+.section soqia
+.section soqib
+.section soqja
+.section soqjb
+.section soqka
+.section soqkb
+.section soqla
+.section soqlb
+.section soqma
+.section soqmb
+.section soqna
+.section soqnb
+.section soqoa
+.section soqob
+.section soqpa
+.section soqpb
+.section soqqa
+.section soqqb
+.section soqra
+.section soqrb
+.section soqsa
+.section soqsb
+.section soqta
+.section soqtb
+.section soqua
+.section soqub
+.section soqva
+.section soqvb
+.section soqwa
+.section soqwb
+.section soqxa
+.section soqxb
+.section soqya
+.section soqyb
+.section soqza
+.section soqzb
+.section soq1a
+.section soq1b
+.section soq2a
+.section soq2b
+.section soq3a
+.section soq3b
+.section soq4a
+.section soq4b
+.section soq5a
+.section soq5b
+.section soq6a
+.section soq6b
+.section soq7a
+.section soq7b
+.section soq8a
+.section soq8b
+.section soq9a
+.section soq9b
+.section soq0a
+.section soq0b
+.section soraa
+.section sorab
+.section sorba
+.section sorbb
+.section sorca
+.section sorcb
+.section sorda
+.section sordb
+.section sorea
+.section soreb
+.section sorfa
+.section sorfb
+.section sorga
+.section sorgb
+.section sorha
+.section sorhb
+.section soria
+.section sorib
+.section sorja
+.section sorjb
+.section sorka
+.section sorkb
+.section sorla
+.section sorlb
+.section sorma
+.section sormb
+.section sorna
+.section sornb
+.section soroa
+.section sorob
+.section sorpa
+.section sorpb
+.section sorqa
+.section sorqb
+.section sorra
+.section sorrb
+.section sorsa
+.section sorsb
+.section sorta
+.section sortb
+.section sorua
+.section sorub
+.section sorva
+.section sorvb
+.section sorwa
+.section sorwb
+.section sorxa
+.section sorxb
+.section sorya
+.section soryb
+.section sorza
+.section sorzb
+.section sor1a
+.section sor1b
+.section sor2a
+.section sor2b
+.section sor3a
+.section sor3b
+.section sor4a
+.section sor4b
+.section sor5a
+.section sor5b
+.section sor6a
+.section sor6b
+.section sor7a
+.section sor7b
+.section sor8a
+.section sor8b
+.section sor9a
+.section sor9b
+.section sor0a
+.section sor0b
+.section sosaa
+.section sosab
+.section sosba
+.section sosbb
+.section sosca
+.section soscb
+.section sosda
+.section sosdb
+.section sosea
+.section soseb
+.section sosfa
+.section sosfb
+.section sosga
+.section sosgb
+.section sosha
+.section soshb
+.section sosia
+.section sosib
+.section sosja
+.section sosjb
+.section soska
+.section soskb
+.section sosla
+.section soslb
+.section sosma
+.section sosmb
+.section sosna
+.section sosnb
+.section sosoa
+.section sosob
+.section sospa
+.section sospb
+.section sosqa
+.section sosqb
+.section sosra
+.section sosrb
+.section sossa
+.section sossb
+.section sosta
+.section sostb
+.section sosua
+.section sosub
+.section sosva
+.section sosvb
+.section soswa
+.section soswb
+.section sosxa
+.section sosxb
+.section sosya
+.section sosyb
+.section sosza
+.section soszb
+.section sos1a
+.section sos1b
+.section sos2a
+.section sos2b
+.section sos3a
+.section sos3b
+.section sos4a
+.section sos4b
+.section sos5a
+.section sos5b
+.section sos6a
+.section sos6b
+.section sos7a
+.section sos7b
+.section sos8a
+.section sos8b
+.section sos9a
+.section sos9b
+.section sos0a
+.section sos0b
+.section sotaa
+.section sotab
+.section sotba
+.section sotbb
+.section sotca
+.section sotcb
+.section sotda
+.section sotdb
+.section sotea
+.section soteb
+.section sotfa
+.section sotfb
+.section sotga
+.section sotgb
+.section sotha
+.section sothb
+.section sotia
+.section sotib
+.section sotja
+.section sotjb
+.section sotka
+.section sotkb
+.section sotla
+.section sotlb
+.section sotma
+.section sotmb
+.section sotna
+.section sotnb
+.section sotoa
+.section sotob
+.section sotpa
+.section sotpb
+.section sotqa
+.section sotqb
+.section sotra
+.section sotrb
+.section sotsa
+.section sotsb
+.section sotta
+.section sottb
+.section sotua
+.section sotub
+.section sotva
+.section sotvb
+.section sotwa
+.section sotwb
+.section sotxa
+.section sotxb
+.section sotya
+.section sotyb
+.section sotza
+.section sotzb
+.section sot1a
+.section sot1b
+.section sot2a
+.section sot2b
+.section sot3a
+.section sot3b
+.section sot4a
+.section sot4b
+.section sot5a
+.section sot5b
+.section sot6a
+.section sot6b
+.section sot7a
+.section sot7b
+.section sot8a
+.section sot8b
+.section sot9a
+.section sot9b
+.section sot0a
+.section sot0b
+.section souaa
+.section souab
+.section souba
+.section soubb
+.section souca
+.section soucb
+.section souda
+.section soudb
+.section souea
+.section soueb
+.section soufa
+.section soufb
+.section souga
+.section sougb
+.section souha
+.section souhb
+.section souia
+.section souib
+.section souja
+.section soujb
+.section souka
+.section soukb
+.section soula
+.section soulb
+.section souma
+.section soumb
+.section souna
+.section sounb
+.section souoa
+.section souob
+.section soupa
+.section soupb
+.section souqa
+.section souqb
+.section soura
+.section sourb
+.section sousa
+.section sousb
+.section souta
+.section soutb
+.section souua
+.section souub
+.section souva
+.section souvb
+.section souwa
+.section souwb
+.section souxa
+.section souxb
+.section souya
+.section souyb
+.section souza
+.section souzb
+.section sou1a
+.section sou1b
+.section sou2a
+.section sou2b
+.section sou3a
+.section sou3b
+.section sou4a
+.section sou4b
+.section sou5a
+.section sou5b
+.section sou6a
+.section sou6b
+.section sou7a
+.section sou7b
+.section sou8a
+.section sou8b
+.section sou9a
+.section sou9b
+.section sou0a
+.section sou0b
+.section sovaa
+.section sovab
+.section sovba
+.section sovbb
+.section sovca
+.section sovcb
+.section sovda
+.section sovdb
+.section sovea
+.section soveb
+.section sovfa
+.section sovfb
+.section sovga
+.section sovgb
+.section sovha
+.section sovhb
+.section sovia
+.section sovib
+.section sovja
+.section sovjb
+.section sovka
+.section sovkb
+.section sovla
+.section sovlb
+.section sovma
+.section sovmb
+.section sovna
+.section sovnb
+.section sovoa
+.section sovob
+.section sovpa
+.section sovpb
+.section sovqa
+.section sovqb
+.section sovra
+.section sovrb
+.section sovsa
+.section sovsb
+.section sovta
+.section sovtb
+.section sovua
+.section sovub
+.section sovva
+.section sovvb
+.section sovwa
+.section sovwb
+.section sovxa
+.section sovxb
+.section sovya
+.section sovyb
+.section sovza
+.section sovzb
+.section sov1a
+.section sov1b
+.section sov2a
+.section sov2b
+.section sov3a
+.section sov3b
+.section sov4a
+.section sov4b
+.section sov5a
+.section sov5b
+.section sov6a
+.section sov6b
+.section sov7a
+.section sov7b
+.section sov8a
+.section sov8b
+.section sov9a
+.section sov9b
+.section sov0a
+.section sov0b
+.section sowaa
+.section sowab
+.section sowba
+.section sowbb
+.section sowca
+.section sowcb
+.section sowda
+.section sowdb
+.section sowea
+.section soweb
+.section sowfa
+.section sowfb
+.section sowga
+.section sowgb
+.section sowha
+.section sowhb
+.section sowia
+.section sowib
+.section sowja
+.section sowjb
+.section sowka
+.section sowkb
+.section sowla
+.section sowlb
+.section sowma
+.section sowmb
+.section sowna
+.section sownb
+.section sowoa
+.section sowob
+.section sowpa
+.section sowpb
+.section sowqa
+.section sowqb
+.section sowra
+.section sowrb
+.section sowsa
+.section sowsb
+.section sowta
+.section sowtb
+.section sowua
+.section sowub
+.section sowva
+.section sowvb
+.section sowwa
+.section sowwb
+.section sowxa
+.section sowxb
+.section sowya
+.section sowyb
+.section sowza
+.section sowzb
+.section sow1a
+.section sow1b
+.section sow2a
+.section sow2b
+.section sow3a
+.section sow3b
+.section sow4a
+.section sow4b
+.section sow5a
+.section sow5b
+.section sow6a
+.section sow6b
+.section sow7a
+.section sow7b
+.section sow8a
+.section sow8b
+.section sow9a
+.section sow9b
+.section sow0a
+.section sow0b
+.section soxaa
+.section soxab
+.section soxba
+.section soxbb
+.section soxca
+.section soxcb
+.section soxda
+.section soxdb
+.section soxea
+.section soxeb
+.section soxfa
+.section soxfb
+.section soxga
+.section soxgb
+.section soxha
+.section soxhb
+.section soxia
+.section soxib
+.section soxja
+.section soxjb
+.section soxka
+.section soxkb
+.section soxla
+.section soxlb
+.section soxma
+.section soxmb
+.section soxna
+.section soxnb
+.section soxoa
+.section soxob
+.section soxpa
+.section soxpb
+.section soxqa
+.section soxqb
+.section soxra
+.section soxrb
+.section soxsa
+.section soxsb
+.section soxta
+.section soxtb
+.section soxua
+.section soxub
+.section soxva
+.section soxvb
+.section soxwa
+.section soxwb
+.section soxxa
+.section soxxb
+.section soxya
+.section soxyb
+.section soxza
+.section soxzb
+.section sox1a
+.section sox1b
+.section sox2a
+.section sox2b
+.section sox3a
+.section sox3b
+.section sox4a
+.section sox4b
+.section sox5a
+.section sox5b
+.section sox6a
+.section sox6b
+.section sox7a
+.section sox7b
+.section sox8a
+.section sox8b
+.section sox9a
+.section sox9b
+.section sox0a
+.section sox0b
+.section soyaa
+.section soyab
+.section soyba
+.section soybb
+.section soyca
+.section soycb
+.section soyda
+.section soydb
+.section soyea
+.section soyeb
+.section soyfa
+.section soyfb
+.section soyga
+.section soygb
+.section soyha
+.section soyhb
+.section soyia
+.section soyib
+.section soyja
+.section soyjb
+.section soyka
+.section soykb
+.section soyla
+.section soylb
+.section soyma
+.section soymb
+.section soyna
+.section soynb
+.section soyoa
+.section soyob
+.section soypa
+.section soypb
+.section soyqa
+.section soyqb
+.section soyra
+.section soyrb
+.section soysa
+.section soysb
+.section soyta
+.section soytb
+.section soyua
+.section soyub
+.section soyva
+.section soyvb
+.section soywa
+.section soywb
+.section soyxa
+.section soyxb
+.section soyya
+.section soyyb
+.section soyza
+.section soyzb
+.section soy1a
+.section soy1b
+.section soy2a
+.section soy2b
+.section soy3a
+.section soy3b
+.section soy4a
+.section soy4b
+.section soy5a
+.section soy5b
+.section soy6a
+.section soy6b
+.section soy7a
+.section soy7b
+.section soy8a
+.section soy8b
+.section soy9a
+.section soy9b
+.section soy0a
+.section soy0b
+.section sozaa
+.section sozab
+.section sozba
+.section sozbb
+.section sozca
+.section sozcb
+.section sozda
+.section sozdb
+.section sozea
+.section sozeb
+.section sozfa
+.section sozfb
+.section sozga
+.section sozgb
+.section sozha
+.section sozhb
+.section sozia
+.section sozib
+.section sozja
+.section sozjb
+.section sozka
+.section sozkb
+.section sozla
+.section sozlb
+.section sozma
+.section sozmb
+.section sozna
+.section soznb
+.section sozoa
+.section sozob
+.section sozpa
+.section sozpb
+.section sozqa
+.section sozqb
+.section sozra
+.section sozrb
+.section sozsa
+.section sozsb
+.section sozta
+.section soztb
+.section sozua
+.section sozub
+.section sozva
+.section sozvb
+.section sozwa
+.section sozwb
+.section sozxa
+.section sozxb
+.section sozya
+.section sozyb
+.section sozza
+.section sozzb
+.section soz1a
+.section soz1b
+.section soz2a
+.section soz2b
+.section soz3a
+.section soz3b
+.section soz4a
+.section soz4b
+.section soz5a
+.section soz5b
+.section soz6a
+.section soz6b
+.section soz7a
+.section soz7b
+.section soz8a
+.section soz8b
+.section soz9a
+.section soz9b
+.section soz0a
+.section soz0b
+.section so1aa
+.section so1ab
+.section so1ba
+.section so1bb
+.section so1ca
+.section so1cb
+.section so1da
+.section so1db
+.section so1ea
+.section so1eb
+.section so1fa
+.section so1fb
+.section so1ga
+.section so1gb
+.section so1ha
+.section so1hb
+.section so1ia
+.section so1ib
+.section so1ja
+.section so1jb
+.section so1ka
+.section so1kb
+.section so1la
+.section so1lb
+.section so1ma
+.section so1mb
+.section so1na
+.section so1nb
+.section so1oa
+.section so1ob
+.section so1pa
+.section so1pb
+.section so1qa
+.section so1qb
+.section so1ra
+.section so1rb
+.section so1sa
+.section so1sb
+.section so1ta
+.section so1tb
+.section so1ua
+.section so1ub
+.section so1va
+.section so1vb
+.section so1wa
+.section so1wb
+.section so1xa
+.section so1xb
+.section so1ya
+.section so1yb
+.section so1za
+.section so1zb
+.section so11a
+.section so11b
+.section so12a
+.section so12b
+.section so13a
+.section so13b
+.section so14a
+.section so14b
+.section so15a
+.section so15b
+.section so16a
+.section so16b
+.section so17a
+.section so17b
+.section so18a
+.section so18b
+.section so19a
+.section so19b
+.section so10a
+.section so10b
+.section so2aa
+.section so2ab
+.section so2ba
+.section so2bb
+.section so2ca
+.section so2cb
+.section so2da
+.section so2db
+.section so2ea
+.section so2eb
+.section so2fa
+.section so2fb
+.section so2ga
+.section so2gb
+.section so2ha
+.section so2hb
+.section so2ia
+.section so2ib
+.section so2ja
+.section so2jb
+.section so2ka
+.section so2kb
+.section so2la
+.section so2lb
+.section so2ma
+.section so2mb
+.section so2na
+.section so2nb
+.section so2oa
+.section so2ob
+.section so2pa
+.section so2pb
+.section so2qa
+.section so2qb
+.section so2ra
+.section so2rb
+.section so2sa
+.section so2sb
+.section so2ta
+.section so2tb
+.section so2ua
+.section so2ub
+.section so2va
+.section so2vb
+.section so2wa
+.section so2wb
+.section so2xa
+.section so2xb
+.section so2ya
+.section so2yb
+.section so2za
+.section so2zb
+.section so21a
+.section so21b
+.section so22a
+.section so22b
+.section so23a
+.section so23b
+.section so24a
+.section so24b
+.section so25a
+.section so25b
+.section so26a
+.section so26b
+.section so27a
+.section so27b
+.section so28a
+.section so28b
+.section so29a
+.section so29b
+.section so20a
+.section so20b
+.section so3aa
+.section so3ab
+.section so3ba
+.section so3bb
+.section so3ca
+.section so3cb
+.section so3da
+.section so3db
+.section so3ea
+.section so3eb
+.section so3fa
+.section so3fb
+.section so3ga
+.section so3gb
+.section so3ha
+.section so3hb
+.section so3ia
+.section so3ib
+.section so3ja
+.section so3jb
+.section so3ka
+.section so3kb
+.section so3la
+.section so3lb
+.section so3ma
+.section so3mb
+.section so3na
+.section so3nb
+.section so3oa
+.section so3ob
+.section so3pa
+.section so3pb
+.section so3qa
+.section so3qb
+.section so3ra
+.section so3rb
+.section so3sa
+.section so3sb
+.section so3ta
+.section so3tb
+.section so3ua
+.section so3ub
+.section so3va
+.section so3vb
+.section so3wa
+.section so3wb
+.section so3xa
+.section so3xb
+.section so3ya
+.section so3yb
+.section so3za
+.section so3zb
+.section so31a
+.section so31b
+.section so32a
+.section so32b
+.section so33a
+.section so33b
+.section so34a
+.section so34b
+.section so35a
+.section so35b
+.section so36a
+.section so36b
+.section so37a
+.section so37b
+.section so38a
+.section so38b
+.section so39a
+.section so39b
+.section so30a
+.section so30b
+.section so4aa
+.section so4ab
+.section so4ba
+.section so4bb
+.section so4ca
+.section so4cb
+.section so4da
+.section so4db
+.section so4ea
+.section so4eb
+.section so4fa
+.section so4fb
+.section so4ga
+.section so4gb
+.section so4ha
+.section so4hb
+.section so4ia
+.section so4ib
+.section so4ja
+.section so4jb
+.section so4ka
+.section so4kb
+.section so4la
+.section so4lb
+.section so4ma
+.section so4mb
+.section so4na
+.section so4nb
+.section so4oa
+.section so4ob
+.section so4pa
+.section so4pb
+.section so4qa
+.section so4qb
+.section so4ra
+.section so4rb
+.section so4sa
+.section so4sb
+.section so4ta
+.section so4tb
+.section so4ua
+.section so4ub
+.section so4va
+.section so4vb
+.section so4wa
+.section so4wb
+.section so4xa
+.section so4xb
+.section so4ya
+.section so4yb
+.section so4za
+.section so4zb
+.section so41a
+.section so41b
+.section so42a
+.section so42b
+.section so43a
+.section so43b
+.section so44a
+.section so44b
+.section so45a
+.section so45b
+.section so46a
+.section so46b
+.section so47a
+.section so47b
+.section so48a
+.section so48b
+.section so49a
+.section so49b
+.section so40a
+.section so40b
+.section so5aa
+.section so5ab
+.section so5ba
+.section so5bb
+.section so5ca
+.section so5cb
+.section so5da
+.section so5db
+.section so5ea
+.section so5eb
+.section so5fa
+.section so5fb
+.section so5ga
+.section so5gb
+.section so5ha
+.section so5hb
+.section so5ia
+.section so5ib
+.section so5ja
+.section so5jb
+.section so5ka
+.section so5kb
+.section so5la
+.section so5lb
+.section so5ma
+.section so5mb
+.section so5na
+.section so5nb
+.section so5oa
+.section so5ob
+.section so5pa
+.section so5pb
+.section so5qa
+.section so5qb
+.section so5ra
+.section so5rb
+.section so5sa
+.section so5sb
+.section so5ta
+.section so5tb
+.section so5ua
+.section so5ub
+.section so5va
+.section so5vb
+.section so5wa
+.section so5wb
+.section so5xa
+.section so5xb
+.section so5ya
+.section so5yb
+.section so5za
+.section so5zb
+.section so51a
+.section so51b
+.section so52a
+.section so52b
+.section so53a
+.section so53b
+.section so54a
+.section so54b
+.section so55a
+.section so55b
+.section so56a
+.section so56b
+.section so57a
+.section so57b
+.section so58a
+.section so58b
+.section so59a
+.section so59b
+.section so50a
+.section so50b
+.section so6aa
+.section so6ab
+.section so6ba
+.section so6bb
+.section so6ca
+.section so6cb
+.section so6da
+.section so6db
+.section so6ea
+.section so6eb
+.section so6fa
+.section so6fb
+.section so6ga
+.section so6gb
+.section so6ha
+.section so6hb
+.section so6ia
+.section so6ib
+.section so6ja
+.section so6jb
+.section so6ka
+.section so6kb
+.section so6la
+.section so6lb
+.section so6ma
+.section so6mb
+.section so6na
+.section so6nb
+.section so6oa
+.section so6ob
+.section so6pa
+.section so6pb
+.section so6qa
+.section so6qb
+.section so6ra
+.section so6rb
+.section so6sa
+.section so6sb
+.section so6ta
+.section so6tb
+.section so6ua
+.section so6ub
+.section so6va
+.section so6vb
+.section so6wa
+.section so6wb
+.section so6xa
+.section so6xb
+.section so6ya
+.section so6yb
+.section so6za
+.section so6zb
+.section so61a
+.section so61b
+.section so62a
+.section so62b
+.section so63a
+.section so63b
+.section so64a
+.section so64b
+.section so65a
+.section so65b
+.section so66a
+.section so66b
+.section so67a
+.section so67b
+.section so68a
+.section so68b
+.section so69a
+.section so69b
+.section so60a
+.section so60b
+.section so7aa
+.section so7ab
+.section so7ba
+.section so7bb
+.section so7ca
+.section so7cb
+.section so7da
+.section so7db
+.section so7ea
+.section so7eb
+.section so7fa
+.section so7fb
+.section so7ga
+.section so7gb
+.section so7ha
+.section so7hb
+.section so7ia
+.section so7ib
+.section so7ja
+.section so7jb
+.section so7ka
+.section so7kb
+.section so7la
+.section so7lb
+.section so7ma
+.section so7mb
+.section so7na
+.section so7nb
+.section so7oa
+.section so7ob
+.section so7pa
+.section so7pb
+.section so7qa
+.section so7qb
+.section so7ra
+.section so7rb
+.section so7sa
+.section so7sb
+.section so7ta
+.section so7tb
+.section so7ua
+.section so7ub
+.section so7va
+.section so7vb
+.section so7wa
+.section so7wb
+.section so7xa
+.section so7xb
+.section so7ya
+.section so7yb
+.section so7za
+.section so7zb
+.section so71a
+.section so71b
+.section so72a
+.section so72b
+.section so73a
+.section so73b
+.section so74a
+.section so74b
+.section so75a
+.section so75b
+.section so76a
+.section so76b
+.section so77a
+.section so77b
+.section so78a
+.section so78b
+.section so79a
+.section so79b
+.section so70a
+.section so70b
+.section so8aa
+.section so8ab
+.section so8ba
+.section so8bb
+.section so8ca
+.section so8cb
+.section so8da
+.section so8db
+.section so8ea
+.section so8eb
+.section so8fa
+.section so8fb
+.section so8ga
+.section so8gb
+.section so8ha
+.section so8hb
+.section so8ia
+.section so8ib
+.section so8ja
+.section so8jb
+.section so8ka
+.section so8kb
+.section so8la
+.section so8lb
+.section so8ma
+.section so8mb
+.section so8na
+.section so8nb
+.section so8oa
+.section so8ob
+.section so8pa
+.section so8pb
+.section so8qa
+.section so8qb
+.section so8ra
+.section so8rb
+.section so8sa
+.section so8sb
+.section so8ta
+.section so8tb
+.section so8ua
+.section so8ub
+.section so8va
+.section so8vb
+.section so8wa
+.section so8wb
+.section so8xa
+.section so8xb
+.section so8ya
+.section so8yb
+.section so8za
+.section so8zb
+.section so81a
+.section so81b
+.section so82a
+.section so82b
+.section so83a
+.section so83b
+.section so84a
+.section so84b
+.section so85a
+.section so85b
+.section so86a
+.section so86b
+.section so87a
+.section so87b
+.section so88a
+.section so88b
+.section so89a
+.section so89b
+.section so80a
+.section so80b
+.section so9aa
+.section so9ab
+.section so9ba
+.section so9bb
+.section so9ca
+.section so9cb
+.section so9da
+.section so9db
+.section so9ea
+.section so9eb
+.section so9fa
+.section so9fb
+.section so9ga
+.section so9gb
+.section so9ha
+.section so9hb
+.section so9ia
+.section so9ib
+.section so9ja
+.section so9jb
+.section so9ka
+.section so9kb
+.section so9la
+.section so9lb
+.section so9ma
+.section so9mb
+.section so9na
+.section so9nb
+.section so9oa
+.section so9ob
+.section so9pa
+.section so9pb
+.section so9qa
+.section so9qb
+.section so9ra
+.section so9rb
+.section so9sa
+.section so9sb
+.section so9ta
+.section so9tb
+.section so9ua
+.section so9ub
+.section so9va
+.section so9vb
+.section so9wa
+.section so9wb
+.section so9xa
+.section so9xb
+.section so9ya
+.section so9yb
+.section so9za
+.section so9zb
+.section so91a
+.section so91b
+.section so92a
+.section so92b
+.section so93a
+.section so93b
+.section so94a
+.section so94b
+.section so95a
+.section so95b
+.section so96a
+.section so96b
+.section so97a
+.section so97b
+.section so98a
+.section so98b
+.section so99a
+.section so99b
+.section so90a
+.section so90b
+.section so0aa
+.section so0ab
+.section so0ba
+.section so0bb
+.section so0ca
+.section so0cb
+.section so0da
+.section so0db
+.section so0ea
+.section so0eb
+.section so0fa
+.section so0fb
+.section so0ga
+.section so0gb
+.section so0ha
+.section so0hb
+.section so0ia
+.section so0ib
+.section so0ja
+.section so0jb
+.section so0ka
+.section so0kb
+.section so0la
+.section so0lb
+.section so0ma
+.section so0mb
+.section so0na
+.section so0nb
+.section so0oa
+.section so0ob
+.section so0pa
+.section so0pb
+.section so0qa
+.section so0qb
+.section so0ra
+.section so0rb
+.section so0sa
+.section so0sb
+.section so0ta
+.section so0tb
+.section so0ua
+.section so0ub
+.section so0va
+.section so0vb
+.section so0wa
+.section so0wb
+.section so0xa
+.section so0xb
+.section so0ya
+.section so0yb
+.section so0za
+.section so0zb
+.section so01a
+.section so01b
+.section so02a
+.section so02b
+.section so03a
+.section so03b
+.section so04a
+.section so04b
+.section so05a
+.section so05b
+.section so06a
+.section so06b
+.section so07a
+.section so07b
+.section so08a
+.section so08b
+.section so09a
+.section so09b
+.section so00a
+.section so00b
+.section spaaa
+.section spaab
+.section spaba
+.section spabb
+.section spaca
+.section spacb
+.section spada
+.section spadb
+.section spaea
+.section spaeb
+.section spafa
+.section spafb
+.section spaga
+.section spagb
+.section spaha
+.section spahb
+.section spaia
+.section spaib
+.section spaja
+.section spajb
+.section spaka
+.section spakb
+.section spala
+.section spalb
+.section spama
+.section spamb
+.section spana
+.section spanb
+.section spaoa
+.section spaob
+.section spapa
+.section spapb
+.section spaqa
+.section spaqb
+.section spara
+.section sparb
+.section spasa
+.section spasb
+.section spata
+.section spatb
+.section spaua
+.section spaub
+.section spava
+.section spavb
+.section spawa
+.section spawb
+.section spaxa
+.section spaxb
+.section spaya
+.section spayb
+.section spaza
+.section spazb
+.section spa1a
+.section spa1b
+.section spa2a
+.section spa2b
+.section spa3a
+.section spa3b
+.section spa4a
+.section spa4b
+.section spa5a
+.section spa5b
+.section spa6a
+.section spa6b
+.section spa7a
+.section spa7b
+.section spa8a
+.section spa8b
+.section spa9a
+.section spa9b
+.section spa0a
+.section spa0b
+.section spbaa
+.section spbab
+.section spbba
+.section spbbb
+.section spbca
+.section spbcb
+.section spbda
+.section spbdb
+.section spbea
+.section spbeb
+.section spbfa
+.section spbfb
+.section spbga
+.section spbgb
+.section spbha
+.section spbhb
+.section spbia
+.section spbib
+.section spbja
+.section spbjb
+.section spbka
+.section spbkb
+.section spbla
+.section spblb
+.section spbma
+.section spbmb
+.section spbna
+.section spbnb
+.section spboa
+.section spbob
+.section spbpa
+.section spbpb
+.section spbqa
+.section spbqb
+.section spbra
+.section spbrb
+.section spbsa
+.section spbsb
+.section spbta
+.section spbtb
+.section spbua
+.section spbub
+.section spbva
+.section spbvb
+.section spbwa
+.section spbwb
+.section spbxa
+.section spbxb
+.section spbya
+.section spbyb
+.section spbza
+.section spbzb
+.section spb1a
+.section spb1b
+.section spb2a
+.section spb2b
+.section spb3a
+.section spb3b
+.section spb4a
+.section spb4b
+.section spb5a
+.section spb5b
+.section spb6a
+.section spb6b
+.section spb7a
+.section spb7b
+.section spb8a
+.section spb8b
+.section spb9a
+.section spb9b
+.section spb0a
+.section spb0b
+.section spcaa
+.section spcab
+.section spcba
+.section spcbb
+.section spcca
+.section spccb
+.section spcda
+.section spcdb
+.section spcea
+.section spceb
+.section spcfa
+.section spcfb
+.section spcga
+.section spcgb
+.section spcha
+.section spchb
+.section spcia
+.section spcib
+.section spcja
+.section spcjb
+.section spcka
+.section spckb
+.section spcla
+.section spclb
+.section spcma
+.section spcmb
+.section spcna
+.section spcnb
+.section spcoa
+.section spcob
+.section spcpa
+.section spcpb
+.section spcqa
+.section spcqb
+.section spcra
+.section spcrb
+.section spcsa
+.section spcsb
+.section spcta
+.section spctb
+.section spcua
+.section spcub
+.section spcva
+.section spcvb
+.section spcwa
+.section spcwb
+.section spcxa
+.section spcxb
+.section spcya
+.section spcyb
+.section spcza
+.section spczb
+.section spc1a
+.section spc1b
+.section spc2a
+.section spc2b
+.section spc3a
+.section spc3b
+.section spc4a
+.section spc4b
+.section spc5a
+.section spc5b
+.section spc6a
+.section spc6b
+.section spc7a
+.section spc7b
+.section spc8a
+.section spc8b
+.section spc9a
+.section spc9b
+.section spc0a
+.section spc0b
+.section spdaa
+.section spdab
+.section spdba
+.section spdbb
+.section spdca
+.section spdcb
+.section spdda
+.section spddb
+.section spdea
+.section spdeb
+.section spdfa
+.section spdfb
+.section spdga
+.section spdgb
+.section spdha
+.section spdhb
+.section spdia
+.section spdib
+.section spdja
+.section spdjb
+.section spdka
+.section spdkb
+.section spdla
+.section spdlb
+.section spdma
+.section spdmb
+.section spdna
+.section spdnb
+.section spdoa
+.section spdob
+.section spdpa
+.section spdpb
+.section spdqa
+.section spdqb
+.section spdra
+.section spdrb
+.section spdsa
+.section spdsb
+.section spdta
+.section spdtb
+.section spdua
+.section spdub
+.section spdva
+.section spdvb
+.section spdwa
+.section spdwb
+.section spdxa
+.section spdxb
+.section spdya
+.section spdyb
+.section spdza
+.section spdzb
+.section spd1a
+.section spd1b
+.section spd2a
+.section spd2b
+.section spd3a
+.section spd3b
+.section spd4a
+.section spd4b
+.section spd5a
+.section spd5b
+.section spd6a
+.section spd6b
+.section spd7a
+.section spd7b
+.section spd8a
+.section spd8b
+.section spd9a
+.section spd9b
+.section spd0a
+.section spd0b
+.section speaa
+.section speab
+.section speba
+.section spebb
+.section speca
+.section specb
+.section speda
+.section spedb
+.section speea
+.section speeb
+.section spefa
+.section spefb
+.section spega
+.section spegb
+.section speha
+.section spehb
+.section speia
+.section speib
+.section speja
+.section spejb
+.section speka
+.section spekb
+.section spela
+.section spelb
+.section spema
+.section spemb
+.section spena
+.section spenb
+.section speoa
+.section speob
+.section spepa
+.section spepb
+.section speqa
+.section speqb
+.section spera
+.section sperb
+.section spesa
+.section spesb
+.section speta
+.section spetb
+.section speua
+.section speub
+.section speva
+.section spevb
+.section spewa
+.section spewb
+.section spexa
+.section spexb
+.section speya
+.section speyb
+.section speza
+.section spezb
+.section spe1a
+.section spe1b
+.section spe2a
+.section spe2b
+.section spe3a
+.section spe3b
+.section spe4a
+.section spe4b
+.section spe5a
+.section spe5b
+.section spe6a
+.section spe6b
+.section spe7a
+.section spe7b
+.section spe8a
+.section spe8b
+.section spe9a
+.section spe9b
+.section spe0a
+.section spe0b
+.section spfaa
+.section spfab
+.section spfba
+.section spfbb
+.section spfca
+.section spfcb
+.section spfda
+.section spfdb
+.section spfea
+.section spfeb
+.section spffa
+.section spffb
+.section spfga
+.section spfgb
+.section spfha
+.section spfhb
+.section spfia
+.section spfib
+.section spfja
+.section spfjb
+.section spfka
+.section spfkb
+.section spfla
+.section spflb
+.section spfma
+.section spfmb
+.section spfna
+.section spfnb
+.section spfoa
+.section spfob
+.section spfpa
+.section spfpb
+.section spfqa
+.section spfqb
+.section spfra
+.section spfrb
+.section spfsa
+.section spfsb
+.section spfta
+.section spftb
+.section spfua
+.section spfub
+.section spfva
+.section spfvb
+.section spfwa
+.section spfwb
+.section spfxa
+.section spfxb
+.section spfya
+.section spfyb
+.section spfza
+.section spfzb
+.section spf1a
+.section spf1b
+.section spf2a
+.section spf2b
+.section spf3a
+.section spf3b
+.section spf4a
+.section spf4b
+.section spf5a
+.section spf5b
+.section spf6a
+.section spf6b
+.section spf7a
+.section spf7b
+.section spf8a
+.section spf8b
+.section spf9a
+.section spf9b
+.section spf0a
+.section spf0b
+.section spgaa
+.section spgab
+.section spgba
+.section spgbb
+.section spgca
+.section spgcb
+.section spgda
+.section spgdb
+.section spgea
+.section spgeb
+.section spgfa
+.section spgfb
+.section spgga
+.section spggb
+.section spgha
+.section spghb
+.section spgia
+.section spgib
+.section spgja
+.section spgjb
+.section spgka
+.section spgkb
+.section spgla
+.section spglb
+.section spgma
+.section spgmb
+.section spgna
+.section spgnb
+.section spgoa
+.section spgob
+.section spgpa
+.section spgpb
+.section spgqa
+.section spgqb
+.section spgra
+.section spgrb
+.section spgsa
+.section spgsb
+.section spgta
+.section spgtb
+.section spgua
+.section spgub
+.section spgva
+.section spgvb
+.section spgwa
+.section spgwb
+.section spgxa
+.section spgxb
+.section spgya
+.section spgyb
+.section spgza
+.section spgzb
+.section spg1a
+.section spg1b
+.section spg2a
+.section spg2b
+.section spg3a
+.section spg3b
+.section spg4a
+.section spg4b
+.section spg5a
+.section spg5b
+.section spg6a
+.section spg6b
+.section spg7a
+.section spg7b
+.section spg8a
+.section spg8b
+.section spg9a
+.section spg9b
+.section spg0a
+.section spg0b
+.section sphaa
+.section sphab
+.section sphba
+.section sphbb
+.section sphca
+.section sphcb
+.section sphda
+.section sphdb
+.section sphea
+.section spheb
+.section sphfa
+.section sphfb
+.section sphga
+.section sphgb
+.section sphha
+.section sphhb
+.section sphia
+.section sphib
+.section sphja
+.section sphjb
+.section sphka
+.section sphkb
+.section sphla
+.section sphlb
+.section sphma
+.section sphmb
+.section sphna
+.section sphnb
+.section sphoa
+.section sphob
+.section sphpa
+.section sphpb
+.section sphqa
+.section sphqb
+.section sphra
+.section sphrb
+.section sphsa
+.section sphsb
+.section sphta
+.section sphtb
+.section sphua
+.section sphub
+.section sphva
+.section sphvb
+.section sphwa
+.section sphwb
+.section sphxa
+.section sphxb
+.section sphya
+.section sphyb
+.section sphza
+.section sphzb
+.section sph1a
+.section sph1b
+.section sph2a
+.section sph2b
+.section sph3a
+.section sph3b
+.section sph4a
+.section sph4b
+.section sph5a
+.section sph5b
+.section sph6a
+.section sph6b
+.section sph7a
+.section sph7b
+.section sph8a
+.section sph8b
+.section sph9a
+.section sph9b
+.section sph0a
+.section sph0b
+.section spiaa
+.section spiab
+.section spiba
+.section spibb
+.section spica
+.section spicb
+.section spida
+.section spidb
+.section spiea
+.section spieb
+.section spifa
+.section spifb
+.section spiga
+.section spigb
+.section spiha
+.section spihb
+.section spiia
+.section spiib
+.section spija
+.section spijb
+.section spika
+.section spikb
+.section spila
+.section spilb
+.section spima
+.section spimb
+.section spina
+.section spinb
+.section spioa
+.section spiob
+.section spipa
+.section spipb
+.section spiqa
+.section spiqb
+.section spira
+.section spirb
+.section spisa
+.section spisb
+.section spita
+.section spitb
+.section spiua
+.section spiub
+.section spiva
+.section spivb
+.section spiwa
+.section spiwb
+.section spixa
+.section spixb
+.section spiya
+.section spiyb
+.section spiza
+.section spizb
+.section spi1a
+.section spi1b
+.section spi2a
+.section spi2b
+.section spi3a
+.section spi3b
+.section spi4a
+.section spi4b
+.section spi5a
+.section spi5b
+.section spi6a
+.section spi6b
+.section spi7a
+.section spi7b
+.section spi8a
+.section spi8b
+.section spi9a
+.section spi9b
+.section spi0a
+.section spi0b
+.section spjaa
+.section spjab
+.section spjba
+.section spjbb
+.section spjca
+.section spjcb
+.section spjda
+.section spjdb
+.section spjea
+.section spjeb
+.section spjfa
+.section spjfb
+.section spjga
+.section spjgb
+.section spjha
+.section spjhb
+.section spjia
+.section spjib
+.section spjja
+.section spjjb
+.section spjka
+.section spjkb
+.section spjla
+.section spjlb
+.section spjma
+.section spjmb
+.section spjna
+.section spjnb
+.section spjoa
+.section spjob
+.section spjpa
+.section spjpb
+.section spjqa
+.section spjqb
+.section spjra
+.section spjrb
+.section spjsa
+.section spjsb
+.section spjta
+.section spjtb
+.section spjua
+.section spjub
+.section spjva
+.section spjvb
+.section spjwa
+.section spjwb
+.section spjxa
+.section spjxb
+.section spjya
+.section spjyb
+.section spjza
+.section spjzb
+.section spj1a
+.section spj1b
+.section spj2a
+.section spj2b
+.section spj3a
+.section spj3b
+.section spj4a
+.section spj4b
+.section spj5a
+.section spj5b
+.section spj6a
+.section spj6b
+.section spj7a
+.section spj7b
+.section spj8a
+.section spj8b
+.section spj9a
+.section spj9b
+.section spj0a
+.section spj0b
+.section spkaa
+.section spkab
+.section spkba
+.section spkbb
+.section spkca
+.section spkcb
+.section spkda
+.section spkdb
+.section spkea
+.section spkeb
+.section spkfa
+.section spkfb
+.section spkga
+.section spkgb
+.section spkha
+.section spkhb
+.section spkia
+.section spkib
+.section spkja
+.section spkjb
+.section spkka
+.section spkkb
+.section spkla
+.section spklb
+.section spkma
+.section spkmb
+.section spkna
+.section spknb
+.section spkoa
+.section spkob
+.section spkpa
+.section spkpb
+.section spkqa
+.section spkqb
+.section spkra
+.section spkrb
+.section spksa
+.section spksb
+.section spkta
+.section spktb
+.section spkua
+.section spkub
+.section spkva
+.section spkvb
+.section spkwa
+.section spkwb
+.section spkxa
+.section spkxb
+.section spkya
+.section spkyb
+.section spkza
+.section spkzb
+.section spk1a
+.section spk1b
+.section spk2a
+.section spk2b
+.section spk3a
+.section spk3b
+.section spk4a
+.section spk4b
+.section spk5a
+.section spk5b
+.section spk6a
+.section spk6b
+.section spk7a
+.section spk7b
+.section spk8a
+.section spk8b
+.section spk9a
+.section spk9b
+.section spk0a
+.section spk0b
+.section splaa
+.section splab
+.section splba
+.section splbb
+.section splca
+.section splcb
+.section splda
+.section spldb
+.section splea
+.section spleb
+.section splfa
+.section splfb
+.section splga
+.section splgb
+.section splha
+.section splhb
+.section splia
+.section splib
+.section splja
+.section spljb
+.section splka
+.section splkb
+.section splla
+.section spllb
+.section splma
+.section splmb
+.section splna
+.section splnb
+.section sploa
+.section splob
+.section splpa
+.section splpb
+.section splqa
+.section splqb
+.section splra
+.section splrb
+.section splsa
+.section splsb
+.section splta
+.section spltb
+.section splua
+.section splub
+.section splva
+.section splvb
+.section splwa
+.section splwb
+.section splxa
+.section splxb
+.section splya
+.section splyb
+.section splza
+.section splzb
+.section spl1a
+.section spl1b
+.section spl2a
+.section spl2b
+.section spl3a
+.section spl3b
+.section spl4a
+.section spl4b
+.section spl5a
+.section spl5b
+.section spl6a
+.section spl6b
+.section spl7a
+.section spl7b
+.section spl8a
+.section spl8b
+.section spl9a
+.section spl9b
+.section spl0a
+.section spl0b
+.section spmaa
+.section spmab
+.section spmba
+.section spmbb
+.section spmca
+.section spmcb
+.section spmda
+.section spmdb
+.section spmea
+.section spmeb
+.section spmfa
+.section spmfb
+.section spmga
+.section spmgb
+.section spmha
+.section spmhb
+.section spmia
+.section spmib
+.section spmja
+.section spmjb
+.section spmka
+.section spmkb
+.section spmla
+.section spmlb
+.section spmma
+.section spmmb
+.section spmna
+.section spmnb
+.section spmoa
+.section spmob
+.section spmpa
+.section spmpb
+.section spmqa
+.section spmqb
+.section spmra
+.section spmrb
+.section spmsa
+.section spmsb
+.section spmta
+.section spmtb
+.section spmua
+.section spmub
+.section spmva
+.section spmvb
+.section spmwa
+.section spmwb
+.section spmxa
+.section spmxb
+.section spmya
+.section spmyb
+.section spmza
+.section spmzb
+.section spm1a
+.section spm1b
+.section spm2a
+.section spm2b
+.section spm3a
+.section spm3b
+.section spm4a
+.section spm4b
+.section spm5a
+.section spm5b
+.section spm6a
+.section spm6b
+.section spm7a
+.section spm7b
+.section spm8a
+.section spm8b
+.section spm9a
+.section spm9b
+.section spm0a
+.section spm0b
+.section spnaa
+.section spnab
+.section spnba
+.section spnbb
+.section spnca
+.section spncb
+.section spnda
+.section spndb
+.section spnea
+.section spneb
+.section spnfa
+.section spnfb
+.section spnga
+.section spngb
+.section spnha
+.section spnhb
+.section spnia
+.section spnib
+.section spnja
+.section spnjb
+.section spnka
+.section spnkb
+.section spnla
+.section spnlb
+.section spnma
+.section spnmb
+.section spnna
+.section spnnb
+.section spnoa
+.section spnob
+.section spnpa
+.section spnpb
+.section spnqa
+.section spnqb
+.section spnra
+.section spnrb
+.section spnsa
+.section spnsb
+.section spnta
+.section spntb
+.section spnua
+.section spnub
+.section spnva
+.section spnvb
+.section spnwa
+.section spnwb
+.section spnxa
+.section spnxb
+.section spnya
+.section spnyb
+.section spnza
+.section spnzb
+.section spn1a
+.section spn1b
+.section spn2a
+.section spn2b
+.section spn3a
+.section spn3b
+.section spn4a
+.section spn4b
+.section spn5a
+.section spn5b
+.section spn6a
+.section spn6b
+.section spn7a
+.section spn7b
+.section spn8a
+.section spn8b
+.section spn9a
+.section spn9b
+.section spn0a
+.section spn0b
+.section spoaa
+.section spoab
+.section spoba
+.section spobb
+.section spoca
+.section spocb
+.section spoda
+.section spodb
+.section spoea
+.section spoeb
+.section spofa
+.section spofb
+.section spoga
+.section spogb
+.section spoha
+.section spohb
+.section spoia
+.section spoib
+.section spoja
+.section spojb
+.section spoka
+.section spokb
+.section spola
+.section spolb
+.section spoma
+.section spomb
+.section spona
+.section sponb
+.section spooa
+.section spoob
+.section spopa
+.section spopb
+.section spoqa
+.section spoqb
+.section spora
+.section sporb
+.section sposa
+.section sposb
+.section spota
+.section spotb
+.section spoua
+.section spoub
+.section spova
+.section spovb
+.section spowa
+.section spowb
+.section spoxa
+.section spoxb
+.section spoya
+.section spoyb
+.section spoza
+.section spozb
+.section spo1a
+.section spo1b
+.section spo2a
+.section spo2b
+.section spo3a
+.section spo3b
+.section spo4a
+.section spo4b
+.section spo5a
+.section spo5b
+.section spo6a
+.section spo6b
+.section spo7a
+.section spo7b
+.section spo8a
+.section spo8b
+.section spo9a
+.section spo9b
+.section spo0a
+.section spo0b
+.section sppaa
+.section sppab
+.section sppba
+.section sppbb
+.section sppca
+.section sppcb
+.section sppda
+.section sppdb
+.section sppea
+.section sppeb
+.section sppfa
+.section sppfb
+.section sppga
+.section sppgb
+.section sppha
+.section spphb
+.section sppia
+.section sppib
+.section sppja
+.section sppjb
+.section sppka
+.section sppkb
+.section sppla
+.section spplb
+.section sppma
+.section sppmb
+.section sppna
+.section sppnb
+.section sppoa
+.section sppob
+.section spppa
+.section spppb
+.section sppqa
+.section sppqb
+.section sppra
+.section spprb
+.section sppsa
+.section sppsb
+.section sppta
+.section spptb
+.section sppua
+.section sppub
+.section sppva
+.section sppvb
+.section sppwa
+.section sppwb
+.section sppxa
+.section sppxb
+.section sppya
+.section sppyb
+.section sppza
+.section sppzb
+.section spp1a
+.section spp1b
+.section spp2a
+.section spp2b
+.section spp3a
+.section spp3b
+.section spp4a
+.section spp4b
+.section spp5a
+.section spp5b
+.section spp6a
+.section spp6b
+.section spp7a
+.section spp7b
+.section spp8a
+.section spp8b
+.section spp9a
+.section spp9b
+.section spp0a
+.section spp0b
+.section spqaa
+.section spqab
+.section spqba
+.section spqbb
+.section spqca
+.section spqcb
+.section spqda
+.section spqdb
+.section spqea
+.section spqeb
+.section spqfa
+.section spqfb
+.section spqga
+.section spqgb
+.section spqha
+.section spqhb
+.section spqia
+.section spqib
+.section spqja
+.section spqjb
+.section spqka
+.section spqkb
+.section spqla
+.section spqlb
+.section spqma
+.section spqmb
+.section spqna
+.section spqnb
+.section spqoa
+.section spqob
+.section spqpa
+.section spqpb
+.section spqqa
+.section spqqb
+.section spqra
+.section spqrb
+.section spqsa
+.section spqsb
+.section spqta
+.section spqtb
+.section spqua
+.section spqub
+.section spqva
+.section spqvb
+.section spqwa
+.section spqwb
+.section spqxa
+.section spqxb
+.section spqya
+.section spqyb
+.section spqza
+.section spqzb
+.section spq1a
+.section spq1b
+.section spq2a
+.section spq2b
+.section spq3a
+.section spq3b
+.section spq4a
+.section spq4b
+.section spq5a
+.section spq5b
+.section spq6a
+.section spq6b
+.section spq7a
+.section spq7b
+.section spq8a
+.section spq8b
+.section spq9a
+.section spq9b
+.section spq0a
+.section spq0b
+.section spraa
+.section sprab
+.section sprba
+.section sprbb
+.section sprca
+.section sprcb
+.section sprda
+.section sprdb
+.section sprea
+.section spreb
+.section sprfa
+.section sprfb
+.section sprga
+.section sprgb
+.section sprha
+.section sprhb
+.section spria
+.section sprib
+.section sprja
+.section sprjb
+.section sprka
+.section sprkb
+.section sprla
+.section sprlb
+.section sprma
+.section sprmb
+.section sprna
+.section sprnb
+.section sproa
+.section sprob
+.section sprpa
+.section sprpb
+.section sprqa
+.section sprqb
+.section sprra
+.section sprrb
+.section sprsa
+.section sprsb
+.section sprta
+.section sprtb
+.section sprua
+.section sprub
+.section sprva
+.section sprvb
+.section sprwa
+.section sprwb
+.section sprxa
+.section sprxb
+.section sprya
+.section spryb
+.section sprza
+.section sprzb
+.section spr1a
+.section spr1b
+.section spr2a
+.section spr2b
+.section spr3a
+.section spr3b
+.section spr4a
+.section spr4b
+.section spr5a
+.section spr5b
+.section spr6a
+.section spr6b
+.section spr7a
+.section spr7b
+.section spr8a
+.section spr8b
+.section spr9a
+.section spr9b
+.section spr0a
+.section spr0b
+.section spsaa
+.section spsab
+.section spsba
+.section spsbb
+.section spsca
+.section spscb
+.section spsda
+.section spsdb
+.section spsea
+.section spseb
+.section spsfa
+.section spsfb
+.section spsga
+.section spsgb
+.section spsha
+.section spshb
+.section spsia
+.section spsib
+.section spsja
+.section spsjb
+.section spska
+.section spskb
+.section spsla
+.section spslb
+.section spsma
+.section spsmb
+.section spsna
+.section spsnb
+.section spsoa
+.section spsob
+.section spspa
+.section spspb
+.section spsqa
+.section spsqb
+.section spsra
+.section spsrb
+.section spssa
+.section spssb
+.section spsta
+.section spstb
+.section spsua
+.section spsub
+.section spsva
+.section spsvb
+.section spswa
+.section spswb
+.section spsxa
+.section spsxb
+.section spsya
+.section spsyb
+.section spsza
+.section spszb
+.section sps1a
+.section sps1b
+.section sps2a
+.section sps2b
+.section sps3a
+.section sps3b
+.section sps4a
+.section sps4b
+.section sps5a
+.section sps5b
+.section sps6a
+.section sps6b
+.section sps7a
+.section sps7b
+.section sps8a
+.section sps8b
+.section sps9a
+.section sps9b
+.section sps0a
+.section sps0b
+.section sptaa
+.section sptab
+.section sptba
+.section sptbb
+.section sptca
+.section sptcb
+.section sptda
+.section sptdb
+.section sptea
+.section spteb
+.section sptfa
+.section sptfb
+.section sptga
+.section sptgb
+.section sptha
+.section spthb
+.section sptia
+.section sptib
+.section sptja
+.section sptjb
+.section sptka
+.section sptkb
+.section sptla
+.section sptlb
+.section sptma
+.section sptmb
+.section sptna
+.section sptnb
+.section sptoa
+.section sptob
+.section sptpa
+.section sptpb
+.section sptqa
+.section sptqb
+.section sptra
+.section sptrb
+.section sptsa
+.section sptsb
+.section sptta
+.section spttb
+.section sptua
+.section sptub
+.section sptva
+.section sptvb
+.section sptwa
+.section sptwb
+.section sptxa
+.section sptxb
+.section sptya
+.section sptyb
+.section sptza
+.section sptzb
+.section spt1a
+.section spt1b
+.section spt2a
+.section spt2b
+.section spt3a
+.section spt3b
+.section spt4a
+.section spt4b
+.section spt5a
+.section spt5b
+.section spt6a
+.section spt6b
+.section spt7a
+.section spt7b
+.section spt8a
+.section spt8b
+.section spt9a
+.section spt9b
+.section spt0a
+.section spt0b
+.section spuaa
+.section spuab
+.section spuba
+.section spubb
+.section spuca
+.section spucb
+.section spuda
+.section spudb
+.section spuea
+.section spueb
+.section spufa
+.section spufb
+.section spuga
+.section spugb
+.section spuha
+.section spuhb
+.section spuia
+.section spuib
+.section spuja
+.section spujb
+.section spuka
+.section spukb
+.section spula
+.section spulb
+.section spuma
+.section spumb
+.section spuna
+.section spunb
+.section spuoa
+.section spuob
+.section spupa
+.section spupb
+.section spuqa
+.section spuqb
+.section spura
+.section spurb
+.section spusa
+.section spusb
+.section sputa
+.section sputb
+.section spuua
+.section spuub
+.section spuva
+.section spuvb
+.section spuwa
+.section spuwb
+.section spuxa
+.section spuxb
+.section spuya
+.section spuyb
+.section spuza
+.section spuzb
+.section spu1a
+.section spu1b
+.section spu2a
+.section spu2b
+.section spu3a
+.section spu3b
+.section spu4a
+.section spu4b
+.section spu5a
+.section spu5b
+.section spu6a
+.section spu6b
+.section spu7a
+.section spu7b
+.section spu8a
+.section spu8b
+.section spu9a
+.section spu9b
+.section spu0a
+.section spu0b
+.section spvaa
+.section spvab
+.section spvba
+.section spvbb
+.section spvca
+.section spvcb
+.section spvda
+.section spvdb
+.section spvea
+.section spveb
+.section spvfa
+.section spvfb
+.section spvga
+.section spvgb
+.section spvha
+.section spvhb
+.section spvia
+.section spvib
+.section spvja
+.section spvjb
+.section spvka
+.section spvkb
+.section spvla
+.section spvlb
+.section spvma
+.section spvmb
+.section spvna
+.section spvnb
+.section spvoa
+.section spvob
+.section spvpa
+.section spvpb
+.section spvqa
+.section spvqb
+.section spvra
+.section spvrb
+.section spvsa
+.section spvsb
+.section spvta
+.section spvtb
+.section spvua
+.section spvub
+.section spvva
+.section spvvb
+.section spvwa
+.section spvwb
+.section spvxa
+.section spvxb
+.section spvya
+.section spvyb
+.section spvza
+.section spvzb
+.section spv1a
+.section spv1b
+.section spv2a
+.section spv2b
+.section spv3a
+.section spv3b
+.section spv4a
+.section spv4b
+.section spv5a
+.section spv5b
+.section spv6a
+.section spv6b
+.section spv7a
+.section spv7b
+.section spv8a
+.section spv8b
+.section spv9a
+.section spv9b
+.section spv0a
+.section spv0b
+.section spwaa
+.section spwab
+.section spwba
+.section spwbb
+.section spwca
+.section spwcb
+.section spwda
+.section spwdb
+.section spwea
+.section spweb
+.section spwfa
+.section spwfb
+.section spwga
+.section spwgb
+.section spwha
+.section spwhb
+.section spwia
+.section spwib
+.section spwja
+.section spwjb
+.section spwka
+.section spwkb
+.section spwla
+.section spwlb
+.section spwma
+.section spwmb
+.section spwna
+.section spwnb
+.section spwoa
+.section spwob
+.section spwpa
+.section spwpb
+.section spwqa
+.section spwqb
+.section spwra
+.section spwrb
+.section spwsa
+.section spwsb
+.section spwta
+.section spwtb
+.section spwua
+.section spwub
+.section spwva
+.section spwvb
+.section spwwa
+.section spwwb
+.section spwxa
+.section spwxb
+.section spwya
+.section spwyb
+.section spwza
+.section spwzb
+.section spw1a
+.section spw1b
+.section spw2a
+.section spw2b
+.section spw3a
+.section spw3b
+.section spw4a
+.section spw4b
+.section spw5a
+.section spw5b
+.section spw6a
+.section spw6b
+.section spw7a
+.section spw7b
+.section spw8a
+.section spw8b
+.section spw9a
+.section spw9b
+.section spw0a
+.section spw0b
+.section spxaa
+.section spxab
+.section spxba
+.section spxbb
+.section spxca
+.section spxcb
+.section spxda
+.section spxdb
+.section spxea
+.section spxeb
+.section spxfa
+.section spxfb
+.section spxga
+.section spxgb
+.section spxha
+.section spxhb
+.section spxia
+.section spxib
+.section spxja
+.section spxjb
+.section spxka
+.section spxkb
+.section spxla
+.section spxlb
+.section spxma
+.section spxmb
+.section spxna
+.section spxnb
+.section spxoa
+.section spxob
+.section spxpa
+.section spxpb
+.section spxqa
+.section spxqb
+.section spxra
+.section spxrb
+.section spxsa
+.section spxsb
+.section spxta
+.section spxtb
+.section spxua
+.section spxub
+.section spxva
+.section spxvb
+.section spxwa
+.section spxwb
+.section spxxa
+.section spxxb
+.section spxya
+.section spxyb
+.section spxza
+.section spxzb
+.section spx1a
+.section spx1b
+.section spx2a
+.section spx2b
+.section spx3a
+.section spx3b
+.section spx4a
+.section spx4b
+.section spx5a
+.section spx5b
+.section spx6a
+.section spx6b
+.section spx7a
+.section spx7b
+.section spx8a
+.section spx8b
+.section spx9a
+.section spx9b
+.section spx0a
+.section spx0b
+.section spyaa
+.section spyab
+.section spyba
+.section spybb
+.section spyca
+.section spycb
+.section spyda
+.section spydb
+.section spyea
+.section spyeb
+.section spyfa
+.section spyfb
+.section spyga
+.section spygb
+.section spyha
+.section spyhb
+.section spyia
+.section spyib
+.section spyja
+.section spyjb
+.section spyka
+.section spykb
+.section spyla
+.section spylb
+.section spyma
+.section spymb
+.section spyna
+.section spynb
+.section spyoa
+.section spyob
+.section spypa
+.section spypb
+.section spyqa
+.section spyqb
+.section spyra
+.section spyrb
+.section spysa
+.section spysb
+.section spyta
+.section spytb
+.section spyua
+.section spyub
+.section spyva
+.section spyvb
+.section spywa
+.section spywb
+.section spyxa
+.section spyxb
+.section spyya
+.section spyyb
+.section spyza
+.section spyzb
+.section spy1a
+.section spy1b
+.section spy2a
+.section spy2b
+.section spy3a
+.section spy3b
+.section spy4a
+.section spy4b
+.section spy5a
+.section spy5b
+.section spy6a
+.section spy6b
+.section spy7a
+.section spy7b
+.section spy8a
+.section spy8b
+.section spy9a
+.section spy9b
+.section spy0a
+.section spy0b
+.section spzaa
+.section spzab
+.section spzba
+.section spzbb
+.section spzca
+.section spzcb
+.section spzda
+.section spzdb
+.section spzea
+.section spzeb
+.section spzfa
+.section spzfb
+.section spzga
+.section spzgb
+.section spzha
+.section spzhb
+.section spzia
+.section spzib
+.section spzja
+.section spzjb
+.section spzka
+.section spzkb
+.section spzla
+.section spzlb
+.section spzma
+.section spzmb
+.section spzna
+.section spznb
+.section spzoa
+.section spzob
+.section spzpa
+.section spzpb
+.section spzqa
+.section spzqb
+.section spzra
+.section spzrb
+.section spzsa
+.section spzsb
+.section spzta
+.section spztb
+.section spzua
+.section spzub
+.section spzva
+.section spzvb
+.section spzwa
+.section spzwb
+.section spzxa
+.section spzxb
+.section spzya
+.section spzyb
+.section spzza
+.section spzzb
+.section spz1a
+.section spz1b
+.section spz2a
+.section spz2b
+.section spz3a
+.section spz3b
+.section spz4a
+.section spz4b
+.section spz5a
+.section spz5b
+.section spz6a
+.section spz6b
+.section spz7a
+.section spz7b
+.section spz8a
+.section spz8b
+.section spz9a
+.section spz9b
+.section spz0a
+.section spz0b
+.section sp1aa
+.section sp1ab
+.section sp1ba
+.section sp1bb
+.section sp1ca
+.section sp1cb
+.section sp1da
+.section sp1db
+.section sp1ea
+.section sp1eb
+.section sp1fa
+.section sp1fb
+.section sp1ga
+.section sp1gb
+.section sp1ha
+.section sp1hb
+.section sp1ia
+.section sp1ib
+.section sp1ja
+.section sp1jb
+.section sp1ka
+.section sp1kb
+.section sp1la
+.section sp1lb
+.section sp1ma
+.section sp1mb
+.section sp1na
+.section sp1nb
+.section sp1oa
+.section sp1ob
+.section sp1pa
+.section sp1pb
+.section sp1qa
+.section sp1qb
+.section sp1ra
+.section sp1rb
+.section sp1sa
+.section sp1sb
+.section sp1ta
+.section sp1tb
+.section sp1ua
+.section sp1ub
+.section sp1va
+.section sp1vb
+.section sp1wa
+.section sp1wb
+.section sp1xa
+.section sp1xb
+.section sp1ya
+.section sp1yb
+.section sp1za
+.section sp1zb
+.section sp11a
+.section sp11b
+.section sp12a
+.section sp12b
+.section sp13a
+.section sp13b
+.section sp14a
+.section sp14b
+.section sp15a
+.section sp15b
+.section sp16a
+.section sp16b
+.section sp17a
+.section sp17b
+.section sp18a
+.section sp18b
+.section sp19a
+.section sp19b
+.section sp10a
+.section sp10b
+.section sp2aa
+.section sp2ab
+.section sp2ba
+.section sp2bb
+.section sp2ca
+.section sp2cb
+.section sp2da
+.section sp2db
+.section sp2ea
+.section sp2eb
+.section sp2fa
+.section sp2fb
+.section sp2ga
+.section sp2gb
+.section sp2ha
+.section sp2hb
+.section sp2ia
+.section sp2ib
+.section sp2ja
+.section sp2jb
+.section sp2ka
+.section sp2kb
+.section sp2la
+.section sp2lb
+.section sp2ma
+.section sp2mb
+.section sp2na
+.section sp2nb
+.section sp2oa
+.section sp2ob
+.section sp2pa
+.section sp2pb
+.section sp2qa
+.section sp2qb
+.section sp2ra
+.section sp2rb
+.section sp2sa
+.section sp2sb
+.section sp2ta
+.section sp2tb
+.section sp2ua
+.section sp2ub
+.section sp2va
+.section sp2vb
+.section sp2wa
+.section sp2wb
+.section sp2xa
+.section sp2xb
+.section sp2ya
+.section sp2yb
+.section sp2za
+.section sp2zb
+.section sp21a
+.section sp21b
+.section sp22a
+.section sp22b
+.section sp23a
+.section sp23b
+.section sp24a
+.section sp24b
+.section sp25a
+.section sp25b
+.section sp26a
+.section sp26b
+.section sp27a
+.section sp27b
+.section sp28a
+.section sp28b
+.section sp29a
+.section sp29b
+.section sp20a
+.section sp20b
+.section sp3aa
+.section sp3ab
+.section sp3ba
+.section sp3bb
+.section sp3ca
+.section sp3cb
+.section sp3da
+.section sp3db
+.section sp3ea
+.section sp3eb
+.section sp3fa
+.section sp3fb
+.section sp3ga
+.section sp3gb
+.section sp3ha
+.section sp3hb
+.section sp3ia
+.section sp3ib
+.section sp3ja
+.section sp3jb
+.section sp3ka
+.section sp3kb
+.section sp3la
+.section sp3lb
+.section sp3ma
+.section sp3mb
+.section sp3na
+.section sp3nb
+.section sp3oa
+.section sp3ob
+.section sp3pa
+.section sp3pb
+.section sp3qa
+.section sp3qb
+.section sp3ra
+.section sp3rb
+.section sp3sa
+.section sp3sb
+.section sp3ta
+.section sp3tb
+.section sp3ua
+.section sp3ub
+.section sp3va
+.section sp3vb
+.section sp3wa
+.section sp3wb
+.section sp3xa
+.section sp3xb
+.section sp3ya
+.section sp3yb
+.section sp3za
+.section sp3zb
+.section sp31a
+.section sp31b
+.section sp32a
+.section sp32b
+.section sp33a
+.section sp33b
+.section sp34a
+.section sp34b
+.section sp35a
+.section sp35b
+.section sp36a
+.section sp36b
+.section sp37a
+.section sp37b
+.section sp38a
+.section sp38b
+.section sp39a
+.section sp39b
+.section sp30a
+.section sp30b
+.section sp4aa
+.section sp4ab
+.section sp4ba
+.section sp4bb
+.section sp4ca
+.section sp4cb
+.section sp4da
+.section sp4db
+.section sp4ea
+.section sp4eb
+.section sp4fa
+.section sp4fb
+.section sp4ga
+.section sp4gb
+.section sp4ha
+.section sp4hb
+.section sp4ia
+.section sp4ib
+.section sp4ja
+.section sp4jb
+.section sp4ka
+.section sp4kb
+.section sp4la
+.section sp4lb
+.section sp4ma
+.section sp4mb
+.section sp4na
+.section sp4nb
+.section sp4oa
+.section sp4ob
+.section sp4pa
+.section sp4pb
+.section sp4qa
+.section sp4qb
+.section sp4ra
+.section sp4rb
+.section sp4sa
+.section sp4sb
+.section sp4ta
+.section sp4tb
+.section sp4ua
+.section sp4ub
+.section sp4va
+.section sp4vb
+.section sp4wa
+.section sp4wb
+.section sp4xa
+.section sp4xb
+.section sp4ya
+.section sp4yb
+.section sp4za
+.section sp4zb
+.section sp41a
+.section sp41b
+.section sp42a
+.section sp42b
+.section sp43a
+.section sp43b
+.section sp44a
+.section sp44b
+.section sp45a
+.section sp45b
+.section sp46a
+.section sp46b
+.section sp47a
+.section sp47b
+.section sp48a
+.section sp48b
+.section sp49a
+.section sp49b
+.section sp40a
+.section sp40b
+.section sp5aa
+.section sp5ab
+.section sp5ba
+.section sp5bb
+.section sp5ca
+.section sp5cb
+.section sp5da
+.section sp5db
+.section sp5ea
+.section sp5eb
+.section sp5fa
+.section sp5fb
+.section sp5ga
+.section sp5gb
+.section sp5ha
+.section sp5hb
+.section sp5ia
+.section sp5ib
+.section sp5ja
+.section sp5jb
+.section sp5ka
+.section sp5kb
+.section sp5la
+.section sp5lb
+.section sp5ma
+.section sp5mb
+.section sp5na
+.section sp5nb
+.section sp5oa
+.section sp5ob
+.section sp5pa
+.section sp5pb
+.section sp5qa
+.section sp5qb
+.section sp5ra
+.section sp5rb
+.section sp5sa
+.section sp5sb
+.section sp5ta
+.section sp5tb
+.section sp5ua
+.section sp5ub
+.section sp5va
+.section sp5vb
+.section sp5wa
+.section sp5wb
+.section sp5xa
+.section sp5xb
+.section sp5ya
+.section sp5yb
+.section sp5za
+.section sp5zb
+.section sp51a
+.section sp51b
+.section sp52a
+.section sp52b
+.section sp53a
+.section sp53b
+.section sp54a
+.section sp54b
+.section sp55a
+.section sp55b
+.section sp56a
+.section sp56b
+.section sp57a
+.section sp57b
+.section sp58a
+.section sp58b
+.section sp59a
+.section sp59b
+.section sp50a
+.section sp50b
+.section sp6aa
+.section sp6ab
+.section sp6ba
+.section sp6bb
+.section sp6ca
+.section sp6cb
+.section sp6da
+.section sp6db
+.section sp6ea
+.section sp6eb
+.section sp6fa
+.section sp6fb
+.section sp6ga
+.section sp6gb
+.section sp6ha
+.section sp6hb
+.section sp6ia
+.section sp6ib
+.section sp6ja
+.section sp6jb
+.section sp6ka
+.section sp6kb
+.section sp6la
+.section sp6lb
+.section sp6ma
+.section sp6mb
+.section sp6na
+.section sp6nb
+.section sp6oa
+.section sp6ob
+.section sp6pa
+.section sp6pb
+.section sp6qa
+.section sp6qb
+.section sp6ra
+.section sp6rb
+.section sp6sa
+.section sp6sb
+.section sp6ta
+.section sp6tb
+.section sp6ua
+.section sp6ub
+.section sp6va
+.section sp6vb
+.section sp6wa
+.section sp6wb
+.section sp6xa
+.section sp6xb
+.section sp6ya
+.section sp6yb
+.section sp6za
+.section sp6zb
+.section sp61a
+.section sp61b
+.section sp62a
+.section sp62b
+.section sp63a
+.section sp63b
+.section sp64a
+.section sp64b
+.section sp65a
+.section sp65b
+.section sp66a
+.section sp66b
+.section sp67a
+.section sp67b
+.section sp68a
+.section sp68b
+.section sp69a
+.section sp69b
+.section sp60a
+.section sp60b
+.section sp7aa
+.section sp7ab
+.section sp7ba
+.section sp7bb
+.section sp7ca
+.section sp7cb
+.section sp7da
+.section sp7db
+.section sp7ea
+.section sp7eb
+.section sp7fa
+.section sp7fb
+.section sp7ga
+.section sp7gb
+.section sp7ha
+.section sp7hb
+.section sp7ia
+.section sp7ib
+.section sp7ja
+.section sp7jb
+.section sp7ka
+.section sp7kb
+.section sp7la
+.section sp7lb
+.section sp7ma
+.section sp7mb
+.section sp7na
+.section sp7nb
+.section sp7oa
+.section sp7ob
+.section sp7pa
+.section sp7pb
+.section sp7qa
+.section sp7qb
+.section sp7ra
+.section sp7rb
+.section sp7sa
+.section sp7sb
+.section sp7ta
+.section sp7tb
+.section sp7ua
+.section sp7ub
+.section sp7va
+.section sp7vb
+.section sp7wa
+.section sp7wb
+.section sp7xa
+.section sp7xb
+.section sp7ya
+.section sp7yb
+.section sp7za
+.section sp7zb
+.section sp71a
+.section sp71b
+.section sp72a
+.section sp72b
+.section sp73a
+.section sp73b
+.section sp74a
+.section sp74b
+.section sp75a
+.section sp75b
+.section sp76a
+.section sp76b
+.section sp77a
+.section sp77b
+.section sp78a
+.section sp78b
+.section sp79a
+.section sp79b
+.section sp70a
+.section sp70b
+.section sp8aa
+.section sp8ab
+.section sp8ba
+.section sp8bb
+.section sp8ca
+.section sp8cb
+.section sp8da
+.section sp8db
+.section sp8ea
+.section sp8eb
+.section sp8fa
+.section sp8fb
+.section sp8ga
+.section sp8gb
+.section sp8ha
+.section sp8hb
+.section sp8ia
+.section sp8ib
+.section sp8ja
+.section sp8jb
+.section sp8ka
+.section sp8kb
+.section sp8la
+.section sp8lb
+.section sp8ma
+.section sp8mb
+.section sp8na
+.section sp8nb
+.section sp8oa
+.section sp8ob
+.section sp8pa
+.section sp8pb
+.section sp8qa
+.section sp8qb
+.section sp8ra
+.section sp8rb
+.section sp8sa
+.section sp8sb
+.section sp8ta
+.section sp8tb
+.section sp8ua
+.section sp8ub
+.section sp8va
+.section sp8vb
+.section sp8wa
+.section sp8wb
+.section sp8xa
+.section sp8xb
+.section sp8ya
+.section sp8yb
+.section sp8za
+.section sp8zb
+.section sp81a
+.section sp81b
+.section sp82a
+.section sp82b
+.section sp83a
+.section sp83b
+.section sp84a
+.section sp84b
+.section sp85a
+.section sp85b
+.section sp86a
+.section sp86b
+.section sp87a
+.section sp87b
+.section sp88a
+.section sp88b
+.section sp89a
+.section sp89b
+.section sp80a
+.section sp80b
+.section sp9aa
+.section sp9ab
+.section sp9ba
+.section sp9bb
+.section sp9ca
+.section sp9cb
+.section sp9da
+.section sp9db
+.section sp9ea
+.section sp9eb
+.section sp9fa
+.section sp9fb
+.section sp9ga
+.section sp9gb
+.section sp9ha
+.section sp9hb
+.section sp9ia
+.section sp9ib
+.section sp9ja
+.section sp9jb
+.section sp9ka
+.section sp9kb
+.section sp9la
+.section sp9lb
+.section sp9ma
+.section sp9mb
+.section sp9na
+.section sp9nb
+.section sp9oa
+.section sp9ob
+.section sp9pa
+.section sp9pb
+.section sp9qa
+.section sp9qb
+.section sp9ra
+.section sp9rb
+.section sp9sa
+.section sp9sb
+.section sp9ta
+.section sp9tb
+.section sp9ua
+.section sp9ub
+.section sp9va
+.section sp9vb
+.section sp9wa
+.section sp9wb
+.section sp9xa
+.section sp9xb
+.section sp9ya
+.section sp9yb
+.section sp9za
+.section sp9zb
+.section sp91a
+.section sp91b
+.section sp92a
+.section sp92b
+.section sp93a
+.section sp93b
+.section sp94a
+.section sp94b
+.section sp95a
+.section sp95b
+.section sp96a
+.section sp96b
+.section sp97a
+.section sp97b
+.section sp98a
+.section sp98b
+.section sp99a
+.section sp99b
+.section sp90a
+.section sp90b
+.section sp0aa
+.section sp0ab
+.section sp0ba
+.section sp0bb
+.section sp0ca
+.section sp0cb
+.section sp0da
+.section sp0db
+.section sp0ea
+.section sp0eb
+.section sp0fa
+.section sp0fb
+.section sp0ga
+.section sp0gb
+.section sp0ha
+.section sp0hb
+.section sp0ia
+.section sp0ib
+.section sp0ja
+.section sp0jb
+.section sp0ka
+.section sp0kb
+.section sp0la
+.section sp0lb
+.section sp0ma
+.section sp0mb
+.section sp0na
+.section sp0nb
+.section sp0oa
+.section sp0ob
+.section sp0pa
+.section sp0pb
+.section sp0qa
+.section sp0qb
+.section sp0ra
+.section sp0rb
+.section sp0sa
+.section sp0sb
+.section sp0ta
+.section sp0tb
+.section sp0ua
+.section sp0ub
+.section sp0va
+.section sp0vb
+.section sp0wa
+.section sp0wb
+.section sp0xa
+.section sp0xb
+.section sp0ya
+.section sp0yb
+.section sp0za
+.section sp0zb
+.section sp01a
+.section sp01b
+.section sp02a
+.section sp02b
+.section sp03a
+.section sp03b
+.section sp04a
+.section sp04b
+.section sp05a
+.section sp05b
+.section sp06a
+.section sp06b
+.section sp07a
+.section sp07b
+.section sp08a
+.section sp08b
+.section sp09a
+.section sp09b
+.section sp00a
+.section sp00b
+.section sqaaa
+.section sqaab
+.section sqaba
+.section sqabb
+.section sqaca
+.section sqacb
+.section sqada
+.section sqadb
+.section sqaea
+.section sqaeb
+.section sqafa
+.section sqafb
+.section sqaga
+.section sqagb
+.section sqaha
+.section sqahb
+.section sqaia
+.section sqaib
+.section sqaja
+.section sqajb
+.section sqaka
+.section sqakb
+.section sqala
+.section sqalb
+.section sqama
+.section sqamb
+.section sqana
+.section sqanb
+.section sqaoa
+.section sqaob
+.section sqapa
+.section sqapb
+.section sqaqa
+.section sqaqb
+.section sqara
+.section sqarb
+.section sqasa
+.section sqasb
+.section sqata
+.section sqatb
+.section sqaua
+.section sqaub
+.section sqava
+.section sqavb
+.section sqawa
+.section sqawb
+.section sqaxa
+.section sqaxb
+.section sqaya
+.section sqayb
+.section sqaza
+.section sqazb
+.section sqa1a
+.section sqa1b
+.section sqa2a
+.section sqa2b
+.section sqa3a
+.section sqa3b
+.section sqa4a
+.section sqa4b
+.section sqa5a
+.section sqa5b
+.section sqa6a
+.section sqa6b
+.section sqa7a
+.section sqa7b
+.section sqa8a
+.section sqa8b
+.section sqa9a
+.section sqa9b
+.section sqa0a
+.section sqa0b
+.section sqbaa
+.section sqbab
+.section sqbba
+.section sqbbb
+.section sqbca
+.section sqbcb
+.section sqbda
+.section sqbdb
+.section sqbea
+.section sqbeb
+.section sqbfa
+.section sqbfb
+.section sqbga
+.section sqbgb
+.section sqbha
+.section sqbhb
+.section sqbia
+.section sqbib
+.section sqbja
+.section sqbjb
+.section sqbka
+.section sqbkb
+.section sqbla
+.section sqblb
+.section sqbma
+.section sqbmb
+.section sqbna
+.section sqbnb
+.section sqboa
+.section sqbob
+.section sqbpa
+.section sqbpb
+.section sqbqa
+.section sqbqb
+.section sqbra
+.section sqbrb
+.section sqbsa
+.section sqbsb
+.section sqbta
+.section sqbtb
+.section sqbua
+.section sqbub
+.section sqbva
+.section sqbvb
+.section sqbwa
+.section sqbwb
+.section sqbxa
+.section sqbxb
+.section sqbya
+.section sqbyb
+.section sqbza
+.section sqbzb
+.section sqb1a
+.section sqb1b
+.section sqb2a
+.section sqb2b
+.section sqb3a
+.section sqb3b
+.section sqb4a
+.section sqb4b
+.section sqb5a
+.section sqb5b
+.section sqb6a
+.section sqb6b
+.section sqb7a
+.section sqb7b
+.section sqb8a
+.section sqb8b
+.section sqb9a
+.section sqb9b
+.section sqb0a
+.section sqb0b
+.section sqcaa
+.section sqcab
+.section sqcba
+.section sqcbb
+.section sqcca
+.section sqccb
+.section sqcda
+.section sqcdb
+.section sqcea
+.section sqceb
+.section sqcfa
+.section sqcfb
+.section sqcga
+.section sqcgb
+.section sqcha
+.section sqchb
+.section sqcia
+.section sqcib
+.section sqcja
+.section sqcjb
+.section sqcka
+.section sqckb
+.section sqcla
+.section sqclb
+.section sqcma
+.section sqcmb
+.section sqcna
+.section sqcnb
+.section sqcoa
+.section sqcob
+.section sqcpa
+.section sqcpb
+.section sqcqa
+.section sqcqb
+.section sqcra
+.section sqcrb
+.section sqcsa
+.section sqcsb
+.section sqcta
+.section sqctb
+.section sqcua
+.section sqcub
+.section sqcva
+.section sqcvb
+.section sqcwa
+.section sqcwb
+.section sqcxa
+.section sqcxb
+.section sqcya
+.section sqcyb
+.section sqcza
+.section sqczb
+.section sqc1a
+.section sqc1b
+.section sqc2a
+.section sqc2b
+.section sqc3a
+.section sqc3b
+.section sqc4a
+.section sqc4b
+.section sqc5a
+.section sqc5b
+.section sqc6a
+.section sqc6b
+.section sqc7a
+.section sqc7b
+.section sqc8a
+.section sqc8b
+.section sqc9a
+.section sqc9b
+.section sqc0a
+.section sqc0b
+.section sqdaa
+.section sqdab
+.section sqdba
+.section sqdbb
+.section sqdca
+.section sqdcb
+.section sqdda
+.section sqddb
+.section sqdea
+.section sqdeb
+.section sqdfa
+.section sqdfb
+.section sqdga
+.section sqdgb
+.section sqdha
+.section sqdhb
+.section sqdia
+.section sqdib
+.section sqdja
+.section sqdjb
+.section sqdka
+.section sqdkb
+.section sqdla
+.section sqdlb
+.section sqdma
+.section sqdmb
+.section sqdna
+.section sqdnb
+.section sqdoa
+.section sqdob
+.section sqdpa
+.section sqdpb
+.section sqdqa
+.section sqdqb
+.section sqdra
+.section sqdrb
+.section sqdsa
+.section sqdsb
+.section sqdta
+.section sqdtb
+.section sqdua
+.section sqdub
+.section sqdva
+.section sqdvb
+.section sqdwa
+.section sqdwb
+.section sqdxa
+.section sqdxb
+.section sqdya
+.section sqdyb
+.section sqdza
+.section sqdzb
+.section sqd1a
+.section sqd1b
+.section sqd2a
+.section sqd2b
+.section sqd3a
+.section sqd3b
+.section sqd4a
+.section sqd4b
+.section sqd5a
+.section sqd5b
+.section sqd6a
+.section sqd6b
+.section sqd7a
+.section sqd7b
+.section sqd8a
+.section sqd8b
+.section sqd9a
+.section sqd9b
+.section sqd0a
+.section sqd0b
+.section sqeaa
+.section sqeab
+.section sqeba
+.section sqebb
+.section sqeca
+.section sqecb
+.section sqeda
+.section sqedb
+.section sqeea
+.section sqeeb
+.section sqefa
+.section sqefb
+.section sqega
+.section sqegb
+.section sqeha
+.section sqehb
+.section sqeia
+.section sqeib
+.section sqeja
+.section sqejb
+.section sqeka
+.section sqekb
+.section sqela
+.section sqelb
+.section sqema
+.section sqemb
+.section sqena
+.section sqenb
+.section sqeoa
+.section sqeob
+.section sqepa
+.section sqepb
+.section sqeqa
+.section sqeqb
+.section sqera
+.section sqerb
+.section sqesa
+.section sqesb
+.section sqeta
+.section sqetb
+.section sqeua
+.section sqeub
+.section sqeva
+.section sqevb
+.section sqewa
+.section sqewb
+.section sqexa
+.section sqexb
+.section sqeya
+.section sqeyb
+.section sqeza
+.section sqezb
+.section sqe1a
+.section sqe1b
+.section sqe2a
+.section sqe2b
+.section sqe3a
+.section sqe3b
+.section sqe4a
+.section sqe4b
+.section sqe5a
+.section sqe5b
+.section sqe6a
+.section sqe6b
+.section sqe7a
+.section sqe7b
+.section sqe8a
+.section sqe8b
+.section sqe9a
+.section sqe9b
+.section sqe0a
+.section sqe0b
+.section sqfaa
+.section sqfab
+.section sqfba
+.section sqfbb
+.section sqfca
+.section sqfcb
+.section sqfda
+.section sqfdb
+.section sqfea
+.section sqfeb
+.section sqffa
+.section sqffb
+.section sqfga
+.section sqfgb
+.section sqfha
+.section sqfhb
+.section sqfia
+.section sqfib
+.section sqfja
+.section sqfjb
+.section sqfka
+.section sqfkb
+.section sqfla
+.section sqflb
+.section sqfma
+.section sqfmb
+.section sqfna
+.section sqfnb
+.section sqfoa
+.section sqfob
+.section sqfpa
+.section sqfpb
+.section sqfqa
+.section sqfqb
+.section sqfra
+.section sqfrb
+.section sqfsa
+.section sqfsb
+.section sqfta
+.section sqftb
+.section sqfua
+.section sqfub
+.section sqfva
+.section sqfvb
+.section sqfwa
+.section sqfwb
+.section sqfxa
+.section sqfxb
+.section sqfya
+.section sqfyb
+.section sqfza
+.section sqfzb
+.section sqf1a
+.section sqf1b
+.section sqf2a
+.section sqf2b
+.section sqf3a
+.section sqf3b
+.section sqf4a
+.section sqf4b
+.section sqf5a
+.section sqf5b
+.section sqf6a
+.section sqf6b
+.section sqf7a
+.section sqf7b
+.section sqf8a
+.section sqf8b
+.section sqf9a
+.section sqf9b
+.section sqf0a
+.section sqf0b
+.section sqgaa
+.section sqgab
+.section sqgba
+.section sqgbb
+.section sqgca
+.section sqgcb
+.section sqgda
+.section sqgdb
+.section sqgea
+.section sqgeb
+.section sqgfa
+.section sqgfb
+.section sqgga
+.section sqggb
+.section sqgha
+.section sqghb
+.section sqgia
+.section sqgib
+.section sqgja
+.section sqgjb
+.section sqgka
+.section sqgkb
+.section sqgla
+.section sqglb
+.section sqgma
+.section sqgmb
+.section sqgna
+.section sqgnb
+.section sqgoa
+.section sqgob
+.section sqgpa
+.section sqgpb
+.section sqgqa
+.section sqgqb
+.section sqgra
+.section sqgrb
+.section sqgsa
+.section sqgsb
+.section sqgta
+.section sqgtb
+.section sqgua
+.section sqgub
+.section sqgva
+.section sqgvb
+.section sqgwa
+.section sqgwb
+.section sqgxa
+.section sqgxb
+.section sqgya
+.section sqgyb
+.section sqgza
+.section sqgzb
+.section sqg1a
+.section sqg1b
+.section sqg2a
+.section sqg2b
+.section sqg3a
+.section sqg3b
+.section sqg4a
+.section sqg4b
+.section sqg5a
+.section sqg5b
+.section sqg6a
+.section sqg6b
+.section sqg7a
+.section sqg7b
+.section sqg8a
+.section sqg8b
+.section sqg9a
+.section sqg9b
+.section sqg0a
+.section sqg0b
+.section sqhaa
+.section sqhab
+.section sqhba
+.section sqhbb
+.section sqhca
+.section sqhcb
+.section sqhda
+.section sqhdb
+.section sqhea
+.section sqheb
+.section sqhfa
+.section sqhfb
+.section sqhga
+.section sqhgb
+.section sqhha
+.section sqhhb
+.section sqhia
+.section sqhib
+.section sqhja
+.section sqhjb
+.section sqhka
+.section sqhkb
+.section sqhla
+.section sqhlb
+.section sqhma
+.section sqhmb
+.section sqhna
+.section sqhnb
+.section sqhoa
+.section sqhob
+.section sqhpa
+.section sqhpb
+.section sqhqa
+.section sqhqb
+.section sqhra
+.section sqhrb
+.section sqhsa
+.section sqhsb
+.section sqhta
+.section sqhtb
+.section sqhua
+.section sqhub
+.section sqhva
+.section sqhvb
+.section sqhwa
+.section sqhwb
+.section sqhxa
+.section sqhxb
+.section sqhya
+.section sqhyb
+.section sqhza
+.section sqhzb
+.section sqh1a
+.section sqh1b
+.section sqh2a
+.section sqh2b
+.section sqh3a
+.section sqh3b
+.section sqh4a
+.section sqh4b
+.section sqh5a
+.section sqh5b
+.section sqh6a
+.section sqh6b
+.section sqh7a
+.section sqh7b
+.section sqh8a
+.section sqh8b
+.section sqh9a
+.section sqh9b
+.section sqh0a
+.section sqh0b
+.section sqiaa
+.section sqiab
+.section sqiba
+.section sqibb
+.section sqica
+.section sqicb
+.section sqida
+.section sqidb
+.section sqiea
+.section sqieb
+.section sqifa
+.section sqifb
+.section sqiga
+.section sqigb
+.section sqiha
+.section sqihb
+.section sqiia
+.section sqiib
+.section sqija
+.section sqijb
+.section sqika
+.section sqikb
+.section sqila
+.section sqilb
+.section sqima
+.section sqimb
+.section sqina
+.section sqinb
+.section sqioa
+.section sqiob
+.section sqipa
+.section sqipb
+.section sqiqa
+.section sqiqb
+.section sqira
+.section sqirb
+.section sqisa
+.section sqisb
+.section sqita
+.section sqitb
+.section sqiua
+.section sqiub
+.section sqiva
+.section sqivb
+.section sqiwa
+.section sqiwb
+.section sqixa
+.section sqixb
+.section sqiya
+.section sqiyb
+.section sqiza
+.section sqizb
+.section sqi1a
+.section sqi1b
+.section sqi2a
+.section sqi2b
+.section sqi3a
+.section sqi3b
+.section sqi4a
+.section sqi4b
+.section sqi5a
+.section sqi5b
+.section sqi6a
+.section sqi6b
+.section sqi7a
+.section sqi7b
+.section sqi8a
+.section sqi8b
+.section sqi9a
+.section sqi9b
+.section sqi0a
+.section sqi0b
+.section sqjaa
+.section sqjab
+.section sqjba
+.section sqjbb
+.section sqjca
+.section sqjcb
+.section sqjda
+.section sqjdb
+.section sqjea
+.section sqjeb
+.section sqjfa
+.section sqjfb
+.section sqjga
+.section sqjgb
+.section sqjha
+.section sqjhb
+.section sqjia
+.section sqjib
+.section sqjja
+.section sqjjb
+.section sqjka
+.section sqjkb
+.section sqjla
+.section sqjlb
+.section sqjma
+.section sqjmb
+.section sqjna
+.section sqjnb
+.section sqjoa
+.section sqjob
+.section sqjpa
+.section sqjpb
+.section sqjqa
+.section sqjqb
+.section sqjra
+.section sqjrb
+.section sqjsa
+.section sqjsb
+.section sqjta
+.section sqjtb
+.section sqjua
+.section sqjub
+.section sqjva
+.section sqjvb
+.section sqjwa
+.section sqjwb
+.section sqjxa
+.section sqjxb
+.section sqjya
+.section sqjyb
+.section sqjza
+.section sqjzb
+.section sqj1a
+.section sqj1b
+.section sqj2a
+.section sqj2b
+.section sqj3a
+.section sqj3b
+.section sqj4a
+.section sqj4b
+.section sqj5a
+.section sqj5b
+.section sqj6a
+.section sqj6b
+.section sqj7a
+.section sqj7b
+.section sqj8a
+.section sqj8b
+.section sqj9a
+.section sqj9b
+.section sqj0a
+.section sqj0b
+.section sqkaa
+.section sqkab
+.section sqkba
+.section sqkbb
+.section sqkca
+.section sqkcb
+.section sqkda
+.section sqkdb
+.section sqkea
+.section sqkeb
+.section sqkfa
+.section sqkfb
+.section sqkga
+.section sqkgb
+.section sqkha
+.section sqkhb
+.section sqkia
+.section sqkib
+.section sqkja
+.section sqkjb
+.section sqkka
+.section sqkkb
+.section sqkla
+.section sqklb
+.section sqkma
+.section sqkmb
+.section sqkna
+.section sqknb
+.section sqkoa
+.section sqkob
+.section sqkpa
+.section sqkpb
+.section sqkqa
+.section sqkqb
+.section sqkra
+.section sqkrb
+.section sqksa
+.section sqksb
+.section sqkta
+.section sqktb
+.section sqkua
+.section sqkub
+.section sqkva
+.section sqkvb
+.section sqkwa
+.section sqkwb
+.section sqkxa
+.section sqkxb
+.section sqkya
+.section sqkyb
+.section sqkza
+.section sqkzb
+.section sqk1a
+.section sqk1b
+.section sqk2a
+.section sqk2b
+.section sqk3a
+.section sqk3b
+.section sqk4a
+.section sqk4b
+.section sqk5a
+.section sqk5b
+.section sqk6a
+.section sqk6b
+.section sqk7a
+.section sqk7b
+.section sqk8a
+.section sqk8b
+.section sqk9a
+.section sqk9b
+.section sqk0a
+.section sqk0b
+.section sqlaa
+.section sqlab
+.section sqlba
+.section sqlbb
+.section sqlca
+.section sqlcb
+.section sqlda
+.section sqldb
+.section sqlea
+.section sqleb
+.section sqlfa
+.section sqlfb
+.section sqlga
+.section sqlgb
+.section sqlha
+.section sqlhb
+.section sqlia
+.section sqlib
+.section sqlja
+.section sqljb
+.section sqlka
+.section sqlkb
+.section sqlla
+.section sqllb
+.section sqlma
+.section sqlmb
+.section sqlna
+.section sqlnb
+.section sqloa
+.section sqlob
+.section sqlpa
+.section sqlpb
+.section sqlqa
+.section sqlqb
+.section sqlra
+.section sqlrb
+.section sqlsa
+.section sqlsb
+.section sqlta
+.section sqltb
+.section sqlua
+.section sqlub
+.section sqlva
+.section sqlvb
+.section sqlwa
+.section sqlwb
+.section sqlxa
+.section sqlxb
+.section sqlya
+.section sqlyb
+.section sqlza
+.section sqlzb
+.section sql1a
+.section sql1b
+.section sql2a
+.section sql2b
+.section sql3a
+.section sql3b
+.section sql4a
+.section sql4b
+.section sql5a
+.section sql5b
+.section sql6a
+.section sql6b
+.section sql7a
+.section sql7b
+.section sql8a
+.section sql8b
+.section sql9a
+.section sql9b
+.section sql0a
+.section sql0b
+.section sqmaa
+.section sqmab
+.section sqmba
+.section sqmbb
+.section sqmca
+.section sqmcb
+.section sqmda
+.section sqmdb
+.section sqmea
+.section sqmeb
+.section sqmfa
+.section sqmfb
+.section sqmga
+.section sqmgb
+.section sqmha
+.section sqmhb
+.section sqmia
+.section sqmib
+.section sqmja
+.section sqmjb
+.section sqmka
+.section sqmkb
+.section sqmla
+.section sqmlb
+.section sqmma
+.section sqmmb
+.section sqmna
+.section sqmnb
+.section sqmoa
+.section sqmob
+.section sqmpa
+.section sqmpb
+.section sqmqa
+.section sqmqb
+.section sqmra
+.section sqmrb
+.section sqmsa
+.section sqmsb
+.section sqmta
+.section sqmtb
+.section sqmua
+.section sqmub
+.section sqmva
+.section sqmvb
+.section sqmwa
+.section sqmwb
+.section sqmxa
+.section sqmxb
+.section sqmya
+.section sqmyb
+.section sqmza
+.section sqmzb
+.section sqm1a
+.section sqm1b
+.section sqm2a
+.section sqm2b
+.section sqm3a
+.section sqm3b
+.section sqm4a
+.section sqm4b
+.section sqm5a
+.section sqm5b
+.section sqm6a
+.section sqm6b
+.section sqm7a
+.section sqm7b
+.section sqm8a
+.section sqm8b
+.section sqm9a
+.section sqm9b
+.section sqm0a
+.section sqm0b
+.section sqnaa
+.section sqnab
+.section sqnba
+.section sqnbb
+.section sqnca
+.section sqncb
+.section sqnda
+.section sqndb
+.section sqnea
+.section sqneb
+.section sqnfa
+.section sqnfb
+.section sqnga
+.section sqngb
+.section sqnha
+.section sqnhb
+.section sqnia
+.section sqnib
+.section sqnja
+.section sqnjb
+.section sqnka
+.section sqnkb
+.section sqnla
+.section sqnlb
+.section sqnma
+.section sqnmb
+.section sqnna
+.section sqnnb
+.section sqnoa
+.section sqnob
+.section sqnpa
+.section sqnpb
+.section sqnqa
+.section sqnqb
+.section sqnra
+.section sqnrb
+.section sqnsa
+.section sqnsb
+.section sqnta
+.section sqntb
+.section sqnua
+.section sqnub
+.section sqnva
+.section sqnvb
+.section sqnwa
+.section sqnwb
+.section sqnxa
+.section sqnxb
+.section sqnya
+.section sqnyb
+.section sqnza
+.section sqnzb
+.section sqn1a
+.section sqn1b
+.section sqn2a
+.section sqn2b
+.section sqn3a
+.section sqn3b
+.section sqn4a
+.section sqn4b
+.section sqn5a
+.section sqn5b
+.section sqn6a
+.section sqn6b
+.section sqn7a
+.section sqn7b
+.section sqn8a
+.section sqn8b
+.section sqn9a
+.section sqn9b
+.section sqn0a
+.section sqn0b
+.section sqoaa
+.section sqoab
+.section sqoba
+.section sqobb
+.section sqoca
+.section sqocb
+.section sqoda
+.section sqodb
+.section sqoea
+.section sqoeb
+.section sqofa
+.section sqofb
+.section sqoga
+.section sqogb
+.section sqoha
+.section sqohb
+.section sqoia
+.section sqoib
+.section sqoja
+.section sqojb
+.section sqoka
+.section sqokb
+.section sqola
+.section sqolb
+.section sqoma
+.section sqomb
+.section sqona
+.section sqonb
+.section sqooa
+.section sqoob
+.section sqopa
+.section sqopb
+.section sqoqa
+.section sqoqb
+.section sqora
+.section sqorb
+.section sqosa
+.section sqosb
+.section sqota
+.section sqotb
+.section sqoua
+.section sqoub
+.section sqova
+.section sqovb
+.section sqowa
+.section sqowb
+.section sqoxa
+.section sqoxb
+.section sqoya
+.section sqoyb
+.section sqoza
+.section sqozb
+.section sqo1a
+.section sqo1b
+.section sqo2a
+.section sqo2b
+.section sqo3a
+.section sqo3b
+.section sqo4a
+.section sqo4b
+.section sqo5a
+.section sqo5b
+.section sqo6a
+.section sqo6b
+.section sqo7a
+.section sqo7b
+.section sqo8a
+.section sqo8b
+.section sqo9a
+.section sqo9b
+.section sqo0a
+.section sqo0b
+.section sqpaa
+.section sqpab
+.section sqpba
+.section sqpbb
+.section sqpca
+.section sqpcb
+.section sqpda
+.section sqpdb
+.section sqpea
+.section sqpeb
+.section sqpfa
+.section sqpfb
+.section sqpga
+.section sqpgb
+.section sqpha
+.section sqphb
+.section sqpia
+.section sqpib
+.section sqpja
+.section sqpjb
+.section sqpka
+.section sqpkb
+.section sqpla
+.section sqplb
+.section sqpma
+.section sqpmb
+.section sqpna
+.section sqpnb
+.section sqpoa
+.section sqpob
+.section sqppa
+.section sqppb
+.section sqpqa
+.section sqpqb
+.section sqpra
+.section sqprb
+.section sqpsa
+.section sqpsb
+.section sqpta
+.section sqptb
+.section sqpua
+.section sqpub
+.section sqpva
+.section sqpvb
+.section sqpwa
+.section sqpwb
+.section sqpxa
+.section sqpxb
+.section sqpya
+.section sqpyb
+.section sqpza
+.section sqpzb
+.section sqp1a
+.section sqp1b
+.section sqp2a
+.section sqp2b
+.section sqp3a
+.section sqp3b
+.section sqp4a
+.section sqp4b
+.section sqp5a
+.section sqp5b
+.section sqp6a
+.section sqp6b
+.section sqp7a
+.section sqp7b
+.section sqp8a
+.section sqp8b
+.section sqp9a
+.section sqp9b
+.section sqp0a
+.section sqp0b
+.section sqqaa
+.section sqqab
+.section sqqba
+.section sqqbb
+.section sqqca
+.section sqqcb
+.section sqqda
+.section sqqdb
+.section sqqea
+.section sqqeb
+.section sqqfa
+.section sqqfb
+.section sqqga
+.section sqqgb
+.section sqqha
+.section sqqhb
+.section sqqia
+.section sqqib
+.section sqqja
+.section sqqjb
+.section sqqka
+.section sqqkb
+.section sqqla
+.section sqqlb
+.section sqqma
+.section sqqmb
+.section sqqna
+.section sqqnb
+.section sqqoa
+.section sqqob
+.section sqqpa
+.section sqqpb
+.section sqqqa
+.section sqqqb
+.section sqqra
+.section sqqrb
+.section sqqsa
+.section sqqsb
+.section sqqta
+.section sqqtb
+.section sqqua
+.section sqqub
+.section sqqva
+.section sqqvb
+.section sqqwa
+.section sqqwb
+.section sqqxa
+.section sqqxb
+.section sqqya
+.section sqqyb
+.section sqqza
+.section sqqzb
+.section sqq1a
+.section sqq1b
+.section sqq2a
+.section sqq2b
+.section sqq3a
+.section sqq3b
+.section sqq4a
+.section sqq4b
+.section sqq5a
+.section sqq5b
+.section sqq6a
+.section sqq6b
+.section sqq7a
+.section sqq7b
+.section sqq8a
+.section sqq8b
+.section sqq9a
+.section sqq9b
+.section sqq0a
+.section sqq0b
+.section sqraa
+.section sqrab
+.section sqrba
+.section sqrbb
+.section sqrca
+.section sqrcb
+.section sqrda
+.section sqrdb
+.section sqrea
+.section sqreb
+.section sqrfa
+.section sqrfb
+.section sqrga
+.section sqrgb
+.section sqrha
+.section sqrhb
+.section sqria
+.section sqrib
+.section sqrja
+.section sqrjb
+.section sqrka
+.section sqrkb
+.section sqrla
+.section sqrlb
+.section sqrma
+.section sqrmb
+.section sqrna
+.section sqrnb
+.section sqroa
+.section sqrob
+.section sqrpa
+.section sqrpb
+.section sqrqa
+.section sqrqb
+.section sqrra
+.section sqrrb
+.section sqrsa
+.section sqrsb
+.section sqrta
+.section sqrtb
+.section sqrua
+.section sqrub
+.section sqrva
+.section sqrvb
+.section sqrwa
+.section sqrwb
+.section sqrxa
+.section sqrxb
+.section sqrya
+.section sqryb
+.section sqrza
+.section sqrzb
+.section sqr1a
+.section sqr1b
+.section sqr2a
+.section sqr2b
+.section sqr3a
+.section sqr3b
+.section sqr4a
+.section sqr4b
+.section sqr5a
+.section sqr5b
+.section sqr6a
+.section sqr6b
+.section sqr7a
+.section sqr7b
+.section sqr8a
+.section sqr8b
+.section sqr9a
+.section sqr9b
+.section sqr0a
+.section sqr0b
+.section sqsaa
+.section sqsab
+.section sqsba
+.section sqsbb
+.section sqsca
+.section sqscb
+.section sqsda
+.section sqsdb
+.section sqsea
+.section sqseb
+.section sqsfa
+.section sqsfb
+.section sqsga
+.section sqsgb
+.section sqsha
+.section sqshb
+.section sqsia
+.section sqsib
+.section sqsja
+.section sqsjb
+.section sqska
+.section sqskb
+.section sqsla
+.section sqslb
+.section sqsma
+.section sqsmb
+.section sqsna
+.section sqsnb
+.section sqsoa
+.section sqsob
+.section sqspa
+.section sqspb
+.section sqsqa
+.section sqsqb
+.section sqsra
+.section sqsrb
+.section sqssa
+.section sqssb
+.section sqsta
+.section sqstb
+.section sqsua
+.section sqsub
+.section sqsva
+.section sqsvb
+.section sqswa
+.section sqswb
+.section sqsxa
+.section sqsxb
+.section sqsya
+.section sqsyb
+.section sqsza
+.section sqszb
+.section sqs1a
+.section sqs1b
+.section sqs2a
+.section sqs2b
+.section sqs3a
+.section sqs3b
+.section sqs4a
+.section sqs4b
+.section sqs5a
+.section sqs5b
+.section sqs6a
+.section sqs6b
+.section sqs7a
+.section sqs7b
+.section sqs8a
+.section sqs8b
+.section sqs9a
+.section sqs9b
+.section sqs0a
+.section sqs0b
+.section sqtaa
+.section sqtab
+.section sqtba
+.section sqtbb
+.section sqtca
+.section sqtcb
+.section sqtda
+.section sqtdb
+.section sqtea
+.section sqteb
+.section sqtfa
+.section sqtfb
+.section sqtga
+.section sqtgb
+.section sqtha
+.section sqthb
+.section sqtia
+.section sqtib
+.section sqtja
+.section sqtjb
+.section sqtka
+.section sqtkb
+.section sqtla
+.section sqtlb
+.section sqtma
+.section sqtmb
+.section sqtna
+.section sqtnb
+.section sqtoa
+.section sqtob
+.section sqtpa
+.section sqtpb
+.section sqtqa
+.section sqtqb
+.section sqtra
+.section sqtrb
+.section sqtsa
+.section sqtsb
+.section sqtta
+.section sqttb
+.section sqtua
+.section sqtub
+.section sqtva
+.section sqtvb
+.section sqtwa
+.section sqtwb
+.section sqtxa
+.section sqtxb
+.section sqtya
+.section sqtyb
+.section sqtza
+.section sqtzb
+.section sqt1a
+.section sqt1b
+.section sqt2a
+.section sqt2b
+.section sqt3a
+.section sqt3b
+.section sqt4a
+.section sqt4b
+.section sqt5a
+.section sqt5b
+.section sqt6a
+.section sqt6b
+.section sqt7a
+.section sqt7b
+.section sqt8a
+.section sqt8b
+.section sqt9a
+.section sqt9b
+.section sqt0a
+.section sqt0b
+.section squaa
+.section squab
+.section squba
+.section squbb
+.section squca
+.section squcb
+.section squda
+.section squdb
+.section squea
+.section squeb
+.section squfa
+.section squfb
+.section squga
+.section squgb
+.section squha
+.section squhb
+.section squia
+.section squib
+.section squja
+.section squjb
+.section squka
+.section squkb
+.section squla
+.section squlb
+.section squma
+.section squmb
+.section squna
+.section squnb
+.section squoa
+.section squob
+.section squpa
+.section squpb
+.section squqa
+.section squqb
+.section squra
+.section squrb
+.section squsa
+.section squsb
+.section squta
+.section squtb
+.section squua
+.section squub
+.section squva
+.section squvb
+.section squwa
+.section squwb
+.section squxa
+.section squxb
+.section squya
+.section squyb
+.section squza
+.section squzb
+.section squ1a
+.section squ1b
+.section squ2a
+.section squ2b
+.section squ3a
+.section squ3b
+.section squ4a
+.section squ4b
+.section squ5a
+.section squ5b
+.section squ6a
+.section squ6b
+.section squ7a
+.section squ7b
+.section squ8a
+.section squ8b
+.section squ9a
+.section squ9b
+.section squ0a
+.section squ0b
+.section sqvaa
+.section sqvab
+.section sqvba
+.section sqvbb
+.section sqvca
+.section sqvcb
+.section sqvda
+.section sqvdb
+.section sqvea
+.section sqveb
+.section sqvfa
+.section sqvfb
+.section sqvga
+.section sqvgb
+.section sqvha
+.section sqvhb
+.section sqvia
+.section sqvib
+.section sqvja
+.section sqvjb
+.section sqvka
+.section sqvkb
+.section sqvla
+.section sqvlb
+.section sqvma
+.section sqvmb
+.section sqvna
+.section sqvnb
+.section sqvoa
+.section sqvob
+.section sqvpa
+.section sqvpb
+.section sqvqa
+.section sqvqb
+.section sqvra
+.section sqvrb
+.section sqvsa
+.section sqvsb
+.section sqvta
+.section sqvtb
+.section sqvua
+.section sqvub
+.section sqvva
+.section sqvvb
+.section sqvwa
+.section sqvwb
+.section sqvxa
+.section sqvxb
+.section sqvya
+.section sqvyb
+.section sqvza
+.section sqvzb
+.section sqv1a
+.section sqv1b
+.section sqv2a
+.section sqv2b
+.section sqv3a
+.section sqv3b
+.section sqv4a
+.section sqv4b
+.section sqv5a
+.section sqv5b
+.section sqv6a
+.section sqv6b
+.section sqv7a
+.section sqv7b
+.section sqv8a
+.section sqv8b
+.section sqv9a
+.section sqv9b
+.section sqv0a
+.section sqv0b
+.section sqwaa
+.section sqwab
+.section sqwba
+.section sqwbb
+.section sqwca
+.section sqwcb
+.section sqwda
+.section sqwdb
+.section sqwea
+.section sqweb
+.section sqwfa
+.section sqwfb
+.section sqwga
+.section sqwgb
+.section sqwha
+.section sqwhb
+.section sqwia
+.section sqwib
+.section sqwja
+.section sqwjb
+.section sqwka
+.section sqwkb
+.section sqwla
+.section sqwlb
+.section sqwma
+.section sqwmb
+.section sqwna
+.section sqwnb
+.section sqwoa
+.section sqwob
+.section sqwpa
+.section sqwpb
+.section sqwqa
+.section sqwqb
+.section sqwra
+.section sqwrb
+.section sqwsa
+.section sqwsb
+.section sqwta
+.section sqwtb
+.section sqwua
+.section sqwub
+.section sqwva
+.section sqwvb
+.section sqwwa
+.section sqwwb
+.section sqwxa
+.section sqwxb
+.section sqwya
+.section sqwyb
+.section sqwza
+.section sqwzb
+.section sqw1a
+.section sqw1b
+.section sqw2a
+.section sqw2b
+.section sqw3a
+.section sqw3b
+.section sqw4a
+.section sqw4b
+.section sqw5a
+.section sqw5b
+.section sqw6a
+.section sqw6b
+.section sqw7a
+.section sqw7b
+.section sqw8a
+.section sqw8b
+.section sqw9a
+.section sqw9b
+.section sqw0a
+.section sqw0b
+.section sqxaa
+.section sqxab
+.section sqxba
+.section sqxbb
+.section sqxca
+.section sqxcb
+.section sqxda
+.section sqxdb
+.section sqxea
+.section sqxeb
+.section sqxfa
+.section sqxfb
+.section sqxga
+.section sqxgb
+.section sqxha
+.section sqxhb
+.section sqxia
+.section sqxib
+.section sqxja
+.section sqxjb
+.section sqxka
+.section sqxkb
+.section sqxla
+.section sqxlb
+.section sqxma
+.section sqxmb
+.section sqxna
+.section sqxnb
+.section sqxoa
+.section sqxob
+.section sqxpa
+.section sqxpb
+.section sqxqa
+.section sqxqb
+.section sqxra
+.section sqxrb
+.section sqxsa
+.section sqxsb
+.section sqxta
+.section sqxtb
+.section sqxua
+.section sqxub
+.section sqxva
+.section sqxvb
+.section sqxwa
+.section sqxwb
+.section sqxxa
+.section sqxxb
+.section sqxya
+.section sqxyb
+.section sqxza
+.section sqxzb
+.section sqx1a
+.section sqx1b
+.section sqx2a
+.section sqx2b
+.section sqx3a
+.section sqx3b
+.section sqx4a
+.section sqx4b
+.section sqx5a
+.section sqx5b
+.section sqx6a
+.section sqx6b
+.section sqx7a
+.section sqx7b
+.section sqx8a
+.section sqx8b
+.section sqx9a
+.section sqx9b
+.section sqx0a
+.section sqx0b
+.section sqyaa
+.section sqyab
+.section sqyba
+.section sqybb
+.section sqyca
+.section sqycb
+.section sqyda
+.section sqydb
+.section sqyea
+.section sqyeb
+.section sqyfa
+.section sqyfb
+.section sqyga
+.section sqygb
+.section sqyha
+.section sqyhb
+.section sqyia
+.section sqyib
+.section sqyja
+.section sqyjb
+.section sqyka
+.section sqykb
+.section sqyla
+.section sqylb
+.section sqyma
+.section sqymb
+.section sqyna
+.section sqynb
+.section sqyoa
+.section sqyob
+.section sqypa
+.section sqypb
+.section sqyqa
+.section sqyqb
+.section sqyra
+.section sqyrb
+.section sqysa
+.section sqysb
+.section sqyta
+.section sqytb
+.section sqyua
+.section sqyub
+.section sqyva
+.section sqyvb
+.section sqywa
+.section sqywb
+.section sqyxa
+.section sqyxb
+.section sqyya
+.section sqyyb
+.section sqyza
+.section sqyzb
+.section sqy1a
+.section sqy1b
+.section sqy2a
+.section sqy2b
+.section sqy3a
+.section sqy3b
+.section sqy4a
+.section sqy4b
+.section sqy5a
+.section sqy5b
+.section sqy6a
+.section sqy6b
+.section sqy7a
+.section sqy7b
+.section sqy8a
+.section sqy8b
+.section sqy9a
+.section sqy9b
+.section sqy0a
+.section sqy0b
+.section sqzaa
+.section sqzab
+.section sqzba
+.section sqzbb
+.section sqzca
+.section sqzcb
+.section sqzda
+.section sqzdb
+.section sqzea
+.section sqzeb
+.section sqzfa
+.section sqzfb
+.section sqzga
+.section sqzgb
+.section sqzha
+.section sqzhb
+.section sqzia
+.section sqzib
+.section sqzja
+.section sqzjb
+.section sqzka
+.section sqzkb
+.section sqzla
+.section sqzlb
+.section sqzma
+.section sqzmb
+.section sqzna
+.section sqznb
+.section sqzoa
+.section sqzob
+.section sqzpa
+.section sqzpb
+.section sqzqa
+.section sqzqb
+.section sqzra
+.section sqzrb
+.section sqzsa
+.section sqzsb
+.section sqzta
+.section sqztb
+.section sqzua
+.section sqzub
+.section sqzva
+.section sqzvb
+.section sqzwa
+.section sqzwb
+.section sqzxa
+.section sqzxb
+.section sqzya
+.section sqzyb
+.section sqzza
+.section sqzzb
+.section sqz1a
+.section sqz1b
+.section sqz2a
+.section sqz2b
+.section sqz3a
+.section sqz3b
+.section sqz4a
+.section sqz4b
+.section sqz5a
+.section sqz5b
+.section sqz6a
+.section sqz6b
+.section sqz7a
+.section sqz7b
+.section sqz8a
+.section sqz8b
+.section sqz9a
+.section sqz9b
+.section sqz0a
+.section sqz0b
+.section sq1aa
+.section sq1ab
+.section sq1ba
+.section sq1bb
+.section sq1ca
+.section sq1cb
+.section sq1da
+.section sq1db
+.section sq1ea
+.section sq1eb
+.section sq1fa
+.section sq1fb
+.section sq1ga
+.section sq1gb
+.section sq1ha
+.section sq1hb
+.section sq1ia
+.section sq1ib
+.section sq1ja
+.section sq1jb
+.section sq1ka
+.section sq1kb
+.section sq1la
+.section sq1lb
+.section sq1ma
+.section sq1mb
+.section sq1na
+.section sq1nb
+.section sq1oa
+.section sq1ob
+.section sq1pa
+.section sq1pb
+.section sq1qa
+.section sq1qb
+.section sq1ra
+.section sq1rb
+.section sq1sa
+.section sq1sb
+.section sq1ta
+.section sq1tb
+.section sq1ua
+.section sq1ub
+.section sq1va
+.section sq1vb
+.section sq1wa
+.section sq1wb
+.section sq1xa
+.section sq1xb
+.section sq1ya
+.section sq1yb
+.section sq1za
+.section sq1zb
+.section sq11a
+.section sq11b
+.section sq12a
+.section sq12b
+.section sq13a
+.section sq13b
+.section sq14a
+.section sq14b
+.section sq15a
+.section sq15b
+.section sq16a
+.section sq16b
+.section sq17a
+.section sq17b
+.section sq18a
+.section sq18b
+.section sq19a
+.section sq19b
+.section sq10a
+.section sq10b
+.section sq2aa
+.section sq2ab
+.section sq2ba
+.section sq2bb
+.section sq2ca
+.section sq2cb
+.section sq2da
+.section sq2db
+.section sq2ea
+.section sq2eb
+.section sq2fa
+.section sq2fb
+.section sq2ga
+.section sq2gb
+.section sq2ha
+.section sq2hb
+.section sq2ia
+.section sq2ib
+.section sq2ja
+.section sq2jb
+.section sq2ka
+.section sq2kb
+.section sq2la
+.section sq2lb
+.section sq2ma
+.section sq2mb
+.section sq2na
+.section sq2nb
+.section sq2oa
+.section sq2ob
+.section sq2pa
+.section sq2pb
+.section sq2qa
+.section sq2qb
+.section sq2ra
+.section sq2rb
+.section sq2sa
+.section sq2sb
+.section sq2ta
+.section sq2tb
+.section sq2ua
+.section sq2ub
+.section sq2va
+.section sq2vb
+.section sq2wa
+.section sq2wb
+.section sq2xa
+.section sq2xb
+.section sq2ya
+.section sq2yb
+.section sq2za
+.section sq2zb
+.section sq21a
+.section sq21b
+.section sq22a
+.section sq22b
+.section sq23a
+.section sq23b
+.section sq24a
+.section sq24b
+.section sq25a
+.section sq25b
+.section sq26a
+.section sq26b
+.section sq27a
+.section sq27b
+.section sq28a
+.section sq28b
+.section sq29a
+.section sq29b
+.section sq20a
+.section sq20b
+.section sq3aa
+.section sq3ab
+.section sq3ba
+.section sq3bb
+.section sq3ca
+.section sq3cb
+.section sq3da
+.section sq3db
+.section sq3ea
+.section sq3eb
+.section sq3fa
+.section sq3fb
+.section sq3ga
+.section sq3gb
+.section sq3ha
+.section sq3hb
+.section sq3ia
+.section sq3ib
+.section sq3ja
+.section sq3jb
+.section sq3ka
+.section sq3kb
+.section sq3la
+.section sq3lb
+.section sq3ma
+.section sq3mb
+.section sq3na
+.section sq3nb
+.section sq3oa
+.section sq3ob
+.section sq3pa
+.section sq3pb
+.section sq3qa
+.section sq3qb
+.section sq3ra
+.section sq3rb
+.section sq3sa
+.section sq3sb
+.section sq3ta
+.section sq3tb
+.section sq3ua
+.section sq3ub
+.section sq3va
+.section sq3vb
+.section sq3wa
+.section sq3wb
+.section sq3xa
+.section sq3xb
+.section sq3ya
+.section sq3yb
+.section sq3za
+.section sq3zb
+.section sq31a
+.section sq31b
+.section sq32a
+.section sq32b
+.section sq33a
+.section sq33b
+.section sq34a
+.section sq34b
+.section sq35a
+.section sq35b
+.section sq36a
+.section sq36b
+.section sq37a
+.section sq37b
+.section sq38a
+.section sq38b
+.section sq39a
+.section sq39b
+.section sq30a
+.section sq30b
+.section sq4aa
+.section sq4ab
+.section sq4ba
+.section sq4bb
+.section sq4ca
+.section sq4cb
+.section sq4da
+.section sq4db
+.section sq4ea
+.section sq4eb
+.section sq4fa
+.section sq4fb
+.section sq4ga
+.section sq4gb
+.section sq4ha
+.section sq4hb
+.section sq4ia
+.section sq4ib
+.section sq4ja
+.section sq4jb
+.section sq4ka
+.section sq4kb
+.section sq4la
+.section sq4lb
+.section sq4ma
+.section sq4mb
+.section sq4na
+.section sq4nb
+.section sq4oa
+.section sq4ob
+.section sq4pa
+.section sq4pb
+.section sq4qa
+.section sq4qb
+.section sq4ra
+.section sq4rb
+.section sq4sa
+.section sq4sb
+.section sq4ta
+.section sq4tb
+.section sq4ua
+.section sq4ub
+.section sq4va
+.section sq4vb
+.section sq4wa
+.section sq4wb
+.section sq4xa
+.section sq4xb
+.section sq4ya
+.section sq4yb
+.section sq4za
+.section sq4zb
+.section sq41a
+.section sq41b
+.section sq42a
+.section sq42b
+.section sq43a
+.section sq43b
+.section sq44a
+.section sq44b
+.section sq45a
+.section sq45b
+.section sq46a
+.section sq46b
+.section sq47a
+.section sq47b
+.section sq48a
+.section sq48b
+.section sq49a
+.section sq49b
+.section sq40a
+.section sq40b
+.section sq5aa
+.section sq5ab
+.section sq5ba
+.section sq5bb
+.section sq5ca
+.section sq5cb
+.section sq5da
+.section sq5db
+.section sq5ea
+.section sq5eb
+.section sq5fa
+.section sq5fb
+.section sq5ga
+.section sq5gb
+.section sq5ha
+.section sq5hb
+.section sq5ia
+.section sq5ib
+.section sq5ja
+.section sq5jb
+.section sq5ka
+.section sq5kb
+.section sq5la
+.section sq5lb
+.section sq5ma
+.section sq5mb
+.section sq5na
+.section sq5nb
+.section sq5oa
+.section sq5ob
+.section sq5pa
+.section sq5pb
+.section sq5qa
+.section sq5qb
+.section sq5ra
+.section sq5rb
+.section sq5sa
+.section sq5sb
+.section sq5ta
+.section sq5tb
+.section sq5ua
+.section sq5ub
+.section sq5va
+.section sq5vb
+.section sq5wa
+.section sq5wb
+.section sq5xa
+.section sq5xb
+.section sq5ya
+.section sq5yb
+.section sq5za
+.section sq5zb
+.section sq51a
+.section sq51b
+.section sq52a
+.section sq52b
+.section sq53a
+.section sq53b
+.section sq54a
+.section sq54b
+.section sq55a
+.section sq55b
+.section sq56a
+.section sq56b
+.section sq57a
+.section sq57b
+.section sq58a
+.section sq58b
+.section sq59a
+.section sq59b
+.section sq50a
+.section sq50b
+.section sq6aa
+.section sq6ab
+.section sq6ba
+.section sq6bb
+.section sq6ca
+.section sq6cb
+.section sq6da
+.section sq6db
+.section sq6ea
+.section sq6eb
+.section sq6fa
+.section sq6fb
+.section sq6ga
+.section sq6gb
+.section sq6ha
+.section sq6hb
+.section sq6ia
+.section sq6ib
+.section sq6ja
+.section sq6jb
+.section sq6ka
+.section sq6kb
+.section sq6la
+.section sq6lb
+.section sq6ma
+.section sq6mb
+.section sq6na
+.section sq6nb
+.section sq6oa
+.section sq6ob
+.section sq6pa
+.section sq6pb
+.section sq6qa
+.section sq6qb
+.section sq6ra
+.section sq6rb
+.section sq6sa
+.section sq6sb
+.section sq6ta
+.section sq6tb
+.section sq6ua
+.section sq6ub
+.section sq6va
+.section sq6vb
+.section sq6wa
+.section sq6wb
+.section sq6xa
+.section sq6xb
+.section sq6ya
+.section sq6yb
+.section sq6za
+.section sq6zb
+.section sq61a
+.section sq61b
+.section sq62a
+.section sq62b
+.section sq63a
+.section sq63b
+.section sq64a
+.section sq64b
+.section sq65a
+.section sq65b
+.section sq66a
+.section sq66b
+.section sq67a
+.section sq67b
+.section sq68a
+.section sq68b
+.section sq69a
+.section sq69b
+.section sq60a
+.section sq60b
+.section sq7aa
+.section sq7ab
+.section sq7ba
+.section sq7bb
+.section sq7ca
+.section sq7cb
+.section sq7da
+.section sq7db
+.section sq7ea
+.section sq7eb
+.section sq7fa
+.section sq7fb
+.section sq7ga
+.section sq7gb
+.section sq7ha
+.section sq7hb
+.section sq7ia
+.section sq7ib
+.section sq7ja
+.section sq7jb
+.section sq7ka
+.section sq7kb
+.section sq7la
+.section sq7lb
+.section sq7ma
+.section sq7mb
+.section sq7na
+.section sq7nb
+.section sq7oa
+.section sq7ob
+.section sq7pa
+.section sq7pb
+.section sq7qa
+.section sq7qb
+.section sq7ra
+.section sq7rb
+.section sq7sa
+.section sq7sb
+.section sq7ta
+.section sq7tb
+.section sq7ua
+.section sq7ub
+.section sq7va
+.section sq7vb
+.section sq7wa
+.section sq7wb
+.section sq7xa
+.section sq7xb
+.section sq7ya
+.section sq7yb
+.section sq7za
+.section sq7zb
+.section sq71a
+.section sq71b
+.section sq72a
+.section sq72b
+.section sq73a
+.section sq73b
+.section sq74a
+.section sq74b
+.section sq75a
+.section sq75b
+.section sq76a
+.section sq76b
+.section sq77a
+.section sq77b
+.section sq78a
+.section sq78b
+.section sq79a
+.section sq79b
+.section sq70a
+.section sq70b
+.section sq8aa
+.section sq8ab
+.section sq8ba
+.section sq8bb
+.section sq8ca
+.section sq8cb
+.section sq8da
+.section sq8db
+.section sq8ea
+.section sq8eb
+.section sq8fa
+.section sq8fb
+.section sq8ga
+.section sq8gb
+.section sq8ha
+.section sq8hb
+.section sq8ia
+.section sq8ib
+.section sq8ja
+.section sq8jb
+.section sq8ka
+.section sq8kb
+.section sq8la
+.section sq8lb
+.section sq8ma
+.section sq8mb
+.section sq8na
+.section sq8nb
+.section sq8oa
+.section sq8ob
+.section sq8pa
+.section sq8pb
+.section sq8qa
+.section sq8qb
+.section sq8ra
+.section sq8rb
+.section sq8sa
+.section sq8sb
+.section sq8ta
+.section sq8tb
+.section sq8ua
+.section sq8ub
+.section sq8va
+.section sq8vb
+.section sq8wa
+.section sq8wb
+.section sq8xa
+.section sq8xb
+.section sq8ya
+.section sq8yb
+.section sq8za
+.section sq8zb
+.section sq81a
+.section sq81b
+.section sq82a
+.section sq82b
+.section sq83a
+.section sq83b
+.section sq84a
+.section sq84b
+.section sq85a
+.section sq85b
+.section sq86a
+.section sq86b
+.section sq87a
+.section sq87b
+.section sq88a
+.section sq88b
+.section sq89a
+.section sq89b
+.section sq80a
+.section sq80b
+.section sq9aa
+.section sq9ab
+.section sq9ba
+.section sq9bb
+.section sq9ca
+.section sq9cb
+.section sq9da
+.section sq9db
+.section sq9ea
+.section sq9eb
+.section sq9fa
+.section sq9fb
+.section sq9ga
+.section sq9gb
+.section sq9ha
+.section sq9hb
+.section sq9ia
+.section sq9ib
+.section sq9ja
+.section sq9jb
+.section sq9ka
+.section sq9kb
+.section sq9la
+.section sq9lb
+.section sq9ma
+.section sq9mb
+.section sq9na
+.section sq9nb
+.section sq9oa
+.section sq9ob
+.section sq9pa
+.section sq9pb
+.section sq9qa
+.section sq9qb
+.section sq9ra
+.section sq9rb
+.section sq9sa
+.section sq9sb
+.section sq9ta
+.section sq9tb
+.section sq9ua
+.section sq9ub
+.section sq9va
+.section sq9vb
+.section sq9wa
+.section sq9wb
+.section sq9xa
+.section sq9xb
+.section sq9ya
+.section sq9yb
+.section sq9za
+.section sq9zb
+.section sq91a
+.section sq91b
+.section sq92a
+.section sq92b
+.section sq93a
+.section sq93b
+.section sq94a
+.section sq94b
+.section sq95a
+.section sq95b
+.section sq96a
+.section sq96b
+.section sq97a
+.section sq97b
+.section sq98a
+.section sq98b
+.section sq99a
+.section sq99b
+.section sq90a
+.section sq90b
+.section sq0aa
+.section sq0ab
+.section sq0ba
+.section sq0bb
+.section sq0ca
+.section sq0cb
+.section sq0da
+.section sq0db
+.section sq0ea
+.section sq0eb
+.section sq0fa
+.section sq0fb
+.section sq0ga
+.section sq0gb
+.section sq0ha
+.section sq0hb
+.section sq0ia
+.section sq0ib
+.section sq0ja
+.section sq0jb
+.section sq0ka
+.section sq0kb
+.section sq0la
+.section sq0lb
+.section sq0ma
+.section sq0mb
+.section sq0na
+.section sq0nb
+.section sq0oa
+.section sq0ob
+.section sq0pa
+.section sq0pb
+.section sq0qa
+.section sq0qb
+.section sq0ra
+.section sq0rb
+.section sq0sa
+.section sq0sb
+.section sq0ta
+.section sq0tb
+.section sq0ua
+.section sq0ub
+.section sq0va
+.section sq0vb
+.section sq0wa
+.section sq0wb
+.section sq0xa
+.section sq0xb
+.section sq0ya
+.section sq0yb
+.section sq0za
+.section sq0zb
+.section sq01a
+.section sq01b
+.section sq02a
+.section sq02b
+.section sq03a
+.section sq03b
+.section sq04a
+.section sq04b
+.section sq05a
+.section sq05b
+.section sq06a
+.section sq06b
+.section sq07a
+.section sq07b
+.section sq08a
+.section sq08b
+.section sq09a
+.section sq09b
+.section sq00a
+.section sq00b
+.section sraaa
+.section sraab
+.section sraba
+.section srabb
+.section sraca
+.section sracb
+.section srada
+.section sradb
+.section sraea
+.section sraeb
+.section srafa
+.section srafb
+.section sraga
+.section sragb
+.section sraha
+.section srahb
+.section sraia
+.section sraib
+.section sraja
+.section srajb
+.section sraka
+.section srakb
+.section srala
+.section sralb
+.section srama
+.section sramb
+.section srana
+.section sranb
+.section sraoa
+.section sraob
+.section srapa
+.section srapb
+.section sraqa
+.section sraqb
+.section srara
+.section srarb
+.section srasa
+.section srasb
+.section srata
+.section sratb
+.section sraua
+.section sraub
+.section srava
+.section sravb
+.section srawa
+.section srawb
+.section sraxa
+.section sraxb
+.section sraya
+.section srayb
+.section sraza
+.section srazb
+.section sra1a
+.section sra1b
+.section sra2a
+.section sra2b
+.section sra3a
+.section sra3b
+.section sra4a
+.section sra4b
+.section sra5a
+.section sra5b
+.section sra6a
+.section sra6b
+.section sra7a
+.section sra7b
+.section sra8a
+.section sra8b
+.section sra9a
+.section sra9b
+.section sra0a
+.section sra0b
+.section srbaa
+.section srbab
+.section srbba
+.section srbbb
+.section srbca
+.section srbcb
+.section srbda
+.section srbdb
+.section srbea
+.section srbeb
+.section srbfa
+.section srbfb
+.section srbga
+.section srbgb
+.section srbha
+.section srbhb
+.section srbia
+.section srbib
+.section srbja
+.section srbjb
+.section srbka
+.section srbkb
+.section srbla
+.section srblb
+.section srbma
+.section srbmb
+.section srbna
+.section srbnb
+.section srboa
+.section srbob
+.section srbpa
+.section srbpb
+.section srbqa
+.section srbqb
+.section srbra
+.section srbrb
+.section srbsa
+.section srbsb
+.section srbta
+.section srbtb
+.section srbua
+.section srbub
+.section srbva
+.section srbvb
+.section srbwa
+.section srbwb
+.section srbxa
+.section srbxb
+.section srbya
+.section srbyb
+.section srbza
+.section srbzb
+.section srb1a
+.section srb1b
+.section srb2a
+.section srb2b
+.section srb3a
+.section srb3b
+.section srb4a
+.section srb4b
+.section srb5a
+.section srb5b
+.section srb6a
+.section srb6b
+.section srb7a
+.section srb7b
+.section srb8a
+.section srb8b
+.section srb9a
+.section srb9b
+.section srb0a
+.section srb0b
+.section srcaa
+.section srcab
+.section srcba
+.section srcbb
+.section srcca
+.section srccb
+.section srcda
+.section srcdb
+.section srcea
+.section srceb
+.section srcfa
+.section srcfb
+.section srcga
+.section srcgb
+.section srcha
+.section srchb
+.section srcia
+.section srcib
+.section srcja
+.section srcjb
+.section srcka
+.section srckb
+.section srcla
+.section srclb
+.section srcma
+.section srcmb
+.section srcna
+.section srcnb
+.section srcoa
+.section srcob
+.section srcpa
+.section srcpb
+.section srcqa
+.section srcqb
+.section srcra
+.section srcrb
+.section srcsa
+.section srcsb
+.section srcta
+.section srctb
+.section srcua
+.section srcub
+.section srcva
+.section srcvb
+.section srcwa
+.section srcwb
+.section srcxa
+.section srcxb
+.section srcya
+.section srcyb
+.section srcza
+.section srczb
+.section src1a
+.section src1b
+.section src2a
+.section src2b
+.section src3a
+.section src3b
+.section src4a
+.section src4b
+.section src5a
+.section src5b
+.section src6a
+.section src6b
+.section src7a
+.section src7b
+.section src8a
+.section src8b
+.section src9a
+.section src9b
+.section src0a
+.section src0b
+.section srdaa
+.section srdab
+.section srdba
+.section srdbb
+.section srdca
+.section srdcb
+.section srdda
+.section srddb
+.section srdea
+.section srdeb
+.section srdfa
+.section srdfb
+.section srdga
+.section srdgb
+.section srdha
+.section srdhb
+.section srdia
+.section srdib
+.section srdja
+.section srdjb
+.section srdka
+.section srdkb
+.section srdla
+.section srdlb
+.section srdma
+.section srdmb
+.section srdna
+.section srdnb
+.section srdoa
+.section srdob
+.section srdpa
+.section srdpb
+.section srdqa
+.section srdqb
+.section srdra
+.section srdrb
+.section srdsa
+.section srdsb
+.section srdta
+.section srdtb
+.section srdua
+.section srdub
+.section srdva
+.section srdvb
+.section srdwa
+.section srdwb
+.section srdxa
+.section srdxb
+.section srdya
+.section srdyb
+.section srdza
+.section srdzb
+.section srd1a
+.section srd1b
+.section srd2a
+.section srd2b
+.section srd3a
+.section srd3b
+.section srd4a
+.section srd4b
+.section srd5a
+.section srd5b
+.section srd6a
+.section srd6b
+.section srd7a
+.section srd7b
+.section srd8a
+.section srd8b
+.section srd9a
+.section srd9b
+.section srd0a
+.section srd0b
+.section sreaa
+.section sreab
+.section sreba
+.section srebb
+.section sreca
+.section srecb
+.section sreda
+.section sredb
+.section sreea
+.section sreeb
+.section srefa
+.section srefb
+.section srega
+.section sregb
+.section sreha
+.section srehb
+.section sreia
+.section sreib
+.section sreja
+.section srejb
+.section sreka
+.section srekb
+.section srela
+.section srelb
+.section srema
+.section sremb
+.section srena
+.section srenb
+.section sreoa
+.section sreob
+.section srepa
+.section srepb
+.section sreqa
+.section sreqb
+.section srera
+.section srerb
+.section sresa
+.section sresb
+.section sreta
+.section sretb
+.section sreua
+.section sreub
+.section sreva
+.section srevb
+.section srewa
+.section srewb
+.section srexa
+.section srexb
+.section sreya
+.section sreyb
+.section sreza
+.section srezb
+.section sre1a
+.section sre1b
+.section sre2a
+.section sre2b
+.section sre3a
+.section sre3b
+.section sre4a
+.section sre4b
+.section sre5a
+.section sre5b
+.section sre6a
+.section sre6b
+.section sre7a
+.section sre7b
+.section sre8a
+.section sre8b
+.section sre9a
+.section sre9b
+.section sre0a
+.section sre0b
+.section srfaa
+.section srfab
+.section srfba
+.section srfbb
+.section srfca
+.section srfcb
+.section srfda
+.section srfdb
+.section srfea
+.section srfeb
+.section srffa
+.section srffb
+.section srfga
+.section srfgb
+.section srfha
+.section srfhb
+.section srfia
+.section srfib
+.section srfja
+.section srfjb
+.section srfka
+.section srfkb
+.section srfla
+.section srflb
+.section srfma
+.section srfmb
+.section srfna
+.section srfnb
+.section srfoa
+.section srfob
+.section srfpa
+.section srfpb
+.section srfqa
+.section srfqb
+.section srfra
+.section srfrb
+.section srfsa
+.section srfsb
+.section srfta
+.section srftb
+.section srfua
+.section srfub
+.section srfva
+.section srfvb
+.section srfwa
+.section srfwb
+.section srfxa
+.section srfxb
+.section srfya
+.section srfyb
+.section srfza
+.section srfzb
+.section srf1a
+.section srf1b
+.section srf2a
+.section srf2b
+.section srf3a
+.section srf3b
+.section srf4a
+.section srf4b
+.section srf5a
+.section srf5b
+.section srf6a
+.section srf6b
+.section srf7a
+.section srf7b
+.section srf8a
+.section srf8b
+.section srf9a
+.section srf9b
+.section srf0a
+.section srf0b
+.section srgaa
+.section srgab
+.section srgba
+.section srgbb
+.section srgca
+.section srgcb
+.section srgda
+.section srgdb
+.section srgea
+.section srgeb
+.section srgfa
+.section srgfb
+.section srgga
+.section srggb
+.section srgha
+.section srghb
+.section srgia
+.section srgib
+.section srgja
+.section srgjb
+.section srgka
+.section srgkb
+.section srgla
+.section srglb
+.section srgma
+.section srgmb
+.section srgna
+.section srgnb
+.section srgoa
+.section srgob
+.section srgpa
+.section srgpb
+.section srgqa
+.section srgqb
+.section srgra
+.section srgrb
+.section srgsa
+.section srgsb
+.section srgta
+.section srgtb
+.section srgua
+.section srgub
+.section srgva
+.section srgvb
+.section srgwa
+.section srgwb
+.section srgxa
+.section srgxb
+.section srgya
+.section srgyb
+.section srgza
+.section srgzb
+.section srg1a
+.section srg1b
+.section srg2a
+.section srg2b
+.section srg3a
+.section srg3b
+.section srg4a
+.section srg4b
+.section srg5a
+.section srg5b
+.section srg6a
+.section srg6b
+.section srg7a
+.section srg7b
+.section srg8a
+.section srg8b
+.section srg9a
+.section srg9b
+.section srg0a
+.section srg0b
+.section srhaa
+.section srhab
+.section srhba
+.section srhbb
+.section srhca
+.section srhcb
+.section srhda
+.section srhdb
+.section srhea
+.section srheb
+.section srhfa
+.section srhfb
+.section srhga
+.section srhgb
+.section srhha
+.section srhhb
+.section srhia
+.section srhib
+.section srhja
+.section srhjb
+.section srhka
+.section srhkb
+.section srhla
+.section srhlb
+.section srhma
+.section srhmb
+.section srhna
+.section srhnb
+.section srhoa
+.section srhob
+.section srhpa
+.section srhpb
+.section srhqa
+.section srhqb
+.section srhra
+.section srhrb
+.section srhsa
+.section srhsb
+.section srhta
+.section srhtb
+.section srhua
+.section srhub
+.section srhva
+.section srhvb
+.section srhwa
+.section srhwb
+.section srhxa
+.section srhxb
+.section srhya
+.section srhyb
+.section srhza
+.section srhzb
+.section srh1a
+.section srh1b
+.section srh2a
+.section srh2b
+.section srh3a
+.section srh3b
+.section srh4a
+.section srh4b
+.section srh5a
+.section srh5b
+.section srh6a
+.section srh6b
+.section srh7a
+.section srh7b
+.section srh8a
+.section srh8b
+.section srh9a
+.section srh9b
+.section srh0a
+.section srh0b
+.section sriaa
+.section sriab
+.section sriba
+.section sribb
+.section srica
+.section sricb
+.section srida
+.section sridb
+.section sriea
+.section srieb
+.section srifa
+.section srifb
+.section sriga
+.section srigb
+.section sriha
+.section srihb
+.section sriia
+.section sriib
+.section srija
+.section srijb
+.section srika
+.section srikb
+.section srila
+.section srilb
+.section srima
+.section srimb
+.section srina
+.section srinb
+.section srioa
+.section sriob
+.section sripa
+.section sripb
+.section sriqa
+.section sriqb
+.section srira
+.section srirb
+.section srisa
+.section srisb
+.section srita
+.section sritb
+.section sriua
+.section sriub
+.section sriva
+.section srivb
+.section sriwa
+.section sriwb
+.section srixa
+.section srixb
+.section sriya
+.section sriyb
+.section sriza
+.section srizb
+.section sri1a
+.section sri1b
+.section sri2a
+.section sri2b
+.section sri3a
+.section sri3b
+.section sri4a
+.section sri4b
+.section sri5a
+.section sri5b
+.section sri6a
+.section sri6b
+.section sri7a
+.section sri7b
+.section sri8a
+.section sri8b
+.section sri9a
+.section sri9b
+.section sri0a
+.section sri0b
+.section srjaa
+.section srjab
+.section srjba
+.section srjbb
+.section srjca
+.section srjcb
+.section srjda
+.section srjdb
+.section srjea
+.section srjeb
+.section srjfa
+.section srjfb
+.section srjga
+.section srjgb
+.section srjha
+.section srjhb
+.section srjia
+.section srjib
+.section srjja
+.section srjjb
+.section srjka
+.section srjkb
+.section srjla
+.section srjlb
+.section srjma
+.section srjmb
+.section srjna
+.section srjnb
+.section srjoa
+.section srjob
+.section srjpa
+.section srjpb
+.section srjqa
+.section srjqb
+.section srjra
+.section srjrb
+.section srjsa
+.section srjsb
+.section srjta
+.section srjtb
+.section srjua
+.section srjub
+.section srjva
+.section srjvb
+.section srjwa
+.section srjwb
+.section srjxa
+.section srjxb
+.section srjya
+.section srjyb
+.section srjza
+.section srjzb
+.section srj1a
+.section srj1b
+.section srj2a
+.section srj2b
+.section srj3a
+.section srj3b
+.section srj4a
+.section srj4b
+.section srj5a
+.section srj5b
+.section srj6a
+.section srj6b
+.section srj7a
+.section srj7b
+.section srj8a
+.section srj8b
+.section srj9a
+.section srj9b
+.section srj0a
+.section srj0b
+.section srkaa
+.section srkab
+.section srkba
+.section srkbb
+.section srkca
+.section srkcb
+.section srkda
+.section srkdb
+.section srkea
+.section srkeb
+.section srkfa
+.section srkfb
+.section srkga
+.section srkgb
+.section srkha
+.section srkhb
+.section srkia
+.section srkib
+.section srkja
+.section srkjb
+.section srkka
+.section srkkb
+.section srkla
+.section srklb
+.section srkma
+.section srkmb
+.section srkna
+.section srknb
+.section srkoa
+.section srkob
+.section srkpa
+.section srkpb
+.section srkqa
+.section srkqb
+.section srkra
+.section srkrb
+.section srksa
+.section srksb
+.section srkta
+.section srktb
+.section srkua
+.section srkub
+.section srkva
+.section srkvb
+.section srkwa
+.section srkwb
+.section srkxa
+.section srkxb
+.section srkya
+.section srkyb
+.section srkza
+.section srkzb
+.section srk1a
+.section srk1b
+.section srk2a
+.section srk2b
+.section srk3a
+.section srk3b
+.section srk4a
+.section srk4b
+.section srk5a
+.section srk5b
+.section srk6a
+.section srk6b
+.section srk7a
+.section srk7b
+.section srk8a
+.section srk8b
+.section srk9a
+.section srk9b
+.section srk0a
+.section srk0b
+.section srlaa
+.section srlab
+.section srlba
+.section srlbb
+.section srlca
+.section srlcb
+.section srlda
+.section srldb
+.section srlea
+.section srleb
+.section srlfa
+.section srlfb
+.section srlga
+.section srlgb
+.section srlha
+.section srlhb
+.section srlia
+.section srlib
+.section srlja
+.section srljb
+.section srlka
+.section srlkb
+.section srlla
+.section srllb
+.section srlma
+.section srlmb
+.section srlna
+.section srlnb
+.section srloa
+.section srlob
+.section srlpa
+.section srlpb
+.section srlqa
+.section srlqb
+.section srlra
+.section srlrb
+.section srlsa
+.section srlsb
+.section srlta
+.section srltb
+.section srlua
+.section srlub
+.section srlva
+.section srlvb
+.section srlwa
+.section srlwb
+.section srlxa
+.section srlxb
+.section srlya
+.section srlyb
+.section srlza
+.section srlzb
+.section srl1a
+.section srl1b
+.section srl2a
+.section srl2b
+.section srl3a
+.section srl3b
+.section srl4a
+.section srl4b
+.section srl5a
+.section srl5b
+.section srl6a
+.section srl6b
+.section srl7a
+.section srl7b
+.section srl8a
+.section srl8b
+.section srl9a
+.section srl9b
+.section srl0a
+.section srl0b
+.section srmaa
+.section srmab
+.section srmba
+.section srmbb
+.section srmca
+.section srmcb
+.section srmda
+.section srmdb
+.section srmea
+.section srmeb
+.section srmfa
+.section srmfb
+.section srmga
+.section srmgb
+.section srmha
+.section srmhb
+.section srmia
+.section srmib
+.section srmja
+.section srmjb
+.section srmka
+.section srmkb
+.section srmla
+.section srmlb
+.section srmma
+.section srmmb
+.section srmna
+.section srmnb
+.section srmoa
+.section srmob
+.section srmpa
+.section srmpb
+.section srmqa
+.section srmqb
+.section srmra
+.section srmrb
+.section srmsa
+.section srmsb
+.section srmta
+.section srmtb
+.section srmua
+.section srmub
+.section srmva
+.section srmvb
+.section srmwa
+.section srmwb
+.section srmxa
+.section srmxb
+.section srmya
+.section srmyb
+.section srmza
+.section srmzb
+.section srm1a
+.section srm1b
+.section srm2a
+.section srm2b
+.section srm3a
+.section srm3b
+.section srm4a
+.section srm4b
+.section srm5a
+.section srm5b
+.section srm6a
+.section srm6b
+.section srm7a
+.section srm7b
+.section srm8a
+.section srm8b
+.section srm9a
+.section srm9b
+.section srm0a
+.section srm0b
+.section srnaa
+.section srnab
+.section srnba
+.section srnbb
+.section srnca
+.section srncb
+.section srnda
+.section srndb
+.section srnea
+.section srneb
+.section srnfa
+.section srnfb
+.section srnga
+.section srngb
+.section srnha
+.section srnhb
+.section srnia
+.section srnib
+.section srnja
+.section srnjb
+.section srnka
+.section srnkb
+.section srnla
+.section srnlb
+.section srnma
+.section srnmb
+.section srnna
+.section srnnb
+.section srnoa
+.section srnob
+.section srnpa
+.section srnpb
+.section srnqa
+.section srnqb
+.section srnra
+.section srnrb
+.section srnsa
+.section srnsb
+.section srnta
+.section srntb
+.section srnua
+.section srnub
+.section srnva
+.section srnvb
+.section srnwa
+.section srnwb
+.section srnxa
+.section srnxb
+.section srnya
+.section srnyb
+.section srnza
+.section srnzb
+.section srn1a
+.section srn1b
+.section srn2a
+.section srn2b
+.section srn3a
+.section srn3b
+.section srn4a
+.section srn4b
+.section srn5a
+.section srn5b
+.section srn6a
+.section srn6b
+.section srn7a
+.section srn7b
+.section srn8a
+.section srn8b
+.section srn9a
+.section srn9b
+.section srn0a
+.section srn0b
+.section sroaa
+.section sroab
+.section sroba
+.section srobb
+.section sroca
+.section srocb
+.section sroda
+.section srodb
+.section sroea
+.section sroeb
+.section srofa
+.section srofb
+.section sroga
+.section srogb
+.section sroha
+.section srohb
+.section sroia
+.section sroib
+.section sroja
+.section srojb
+.section sroka
+.section srokb
+.section srola
+.section srolb
+.section sroma
+.section sromb
+.section srona
+.section sronb
+.section srooa
+.section sroob
+.section sropa
+.section sropb
+.section sroqa
+.section sroqb
+.section srora
+.section srorb
+.section srosa
+.section srosb
+.section srota
+.section srotb
+.section sroua
+.section sroub
+.section srova
+.section srovb
+.section srowa
+.section srowb
+.section sroxa
+.section sroxb
+.section sroya
+.section sroyb
+.section sroza
+.section srozb
+.section sro1a
+.section sro1b
+.section sro2a
+.section sro2b
+.section sro3a
+.section sro3b
+.section sro4a
+.section sro4b
+.section sro5a
+.section sro5b
+.section sro6a
+.section sro6b
+.section sro7a
+.section sro7b
+.section sro8a
+.section sro8b
+.section sro9a
+.section sro9b
+.section sro0a
+.section sro0b
+.section srpaa
+.section srpab
+.section srpba
+.section srpbb
+.section srpca
+.section srpcb
+.section srpda
+.section srpdb
+.section srpea
+.section srpeb
+.section srpfa
+.section srpfb
+.section srpga
+.section srpgb
+.section srpha
+.section srphb
+.section srpia
+.section srpib
+.section srpja
+.section srpjb
+.section srpka
+.section srpkb
+.section srpla
+.section srplb
+.section srpma
+.section srpmb
+.section srpna
+.section srpnb
+.section srpoa
+.section srpob
+.section srppa
+.section srppb
+.section srpqa
+.section srpqb
+.section srpra
+.section srprb
+.section srpsa
+.section srpsb
+.section srpta
+.section srptb
+.section srpua
+.section srpub
+.section srpva
+.section srpvb
+.section srpwa
+.section srpwb
+.section srpxa
+.section srpxb
+.section srpya
+.section srpyb
+.section srpza
+.section srpzb
+.section srp1a
+.section srp1b
+.section srp2a
+.section srp2b
+.section srp3a
+.section srp3b
+.section srp4a
+.section srp4b
+.section srp5a
+.section srp5b
+.section srp6a
+.section srp6b
+.section srp7a
+.section srp7b
+.section srp8a
+.section srp8b
+.section srp9a
+.section srp9b
+.section srp0a
+.section srp0b
+.section srqaa
+.section srqab
+.section srqba
+.section srqbb
+.section srqca
+.section srqcb
+.section srqda
+.section srqdb
+.section srqea
+.section srqeb
+.section srqfa
+.section srqfb
+.section srqga
+.section srqgb
+.section srqha
+.section srqhb
+.section srqia
+.section srqib
+.section srqja
+.section srqjb
+.section srqka
+.section srqkb
+.section srqla
+.section srqlb
+.section srqma
+.section srqmb
+.section srqna
+.section srqnb
+.section srqoa
+.section srqob
+.section srqpa
+.section srqpb
+.section srqqa
+.section srqqb
+.section srqra
+.section srqrb
+.section srqsa
+.section srqsb
+.section srqta
+.section srqtb
+.section srqua
+.section srqub
+.section srqva
+.section srqvb
+.section srqwa
+.section srqwb
+.section srqxa
+.section srqxb
+.section srqya
+.section srqyb
+.section srqza
+.section srqzb
+.section srq1a
+.section srq1b
+.section srq2a
+.section srq2b
+.section srq3a
+.section srq3b
+.section srq4a
+.section srq4b
+.section srq5a
+.section srq5b
+.section srq6a
+.section srq6b
+.section srq7a
+.section srq7b
+.section srq8a
+.section srq8b
+.section srq9a
+.section srq9b
+.section srq0a
+.section srq0b
+.section srraa
+.section srrab
+.section srrba
+.section srrbb
+.section srrca
+.section srrcb
+.section srrda
+.section srrdb
+.section srrea
+.section srreb
+.section srrfa
+.section srrfb
+.section srrga
+.section srrgb
+.section srrha
+.section srrhb
+.section srria
+.section srrib
+.section srrja
+.section srrjb
+.section srrka
+.section srrkb
+.section srrla
+.section srrlb
+.section srrma
+.section srrmb
+.section srrna
+.section srrnb
+.section srroa
+.section srrob
+.section srrpa
+.section srrpb
+.section srrqa
+.section srrqb
+.section srrra
+.section srrrb
+.section srrsa
+.section srrsb
+.section srrta
+.section srrtb
+.section srrua
+.section srrub
+.section srrva
+.section srrvb
+.section srrwa
+.section srrwb
+.section srrxa
+.section srrxb
+.section srrya
+.section srryb
+.section srrza
+.section srrzb
+.section srr1a
+.section srr1b
+.section srr2a
+.section srr2b
+.section srr3a
+.section srr3b
+.section srr4a
+.section srr4b
+.section srr5a
+.section srr5b
+.section srr6a
+.section srr6b
+.section srr7a
+.section srr7b
+.section srr8a
+.section srr8b
+.section srr9a
+.section srr9b
+.section srr0a
+.section srr0b
+.section srsaa
+.section srsab
+.section srsba
+.section srsbb
+.section srsca
+.section srscb
+.section srsda
+.section srsdb
+.section srsea
+.section srseb
+.section srsfa
+.section srsfb
+.section srsga
+.section srsgb
+.section srsha
+.section srshb
+.section srsia
+.section srsib
+.section srsja
+.section srsjb
+.section srska
+.section srskb
+.section srsla
+.section srslb
+.section srsma
+.section srsmb
+.section srsna
+.section srsnb
+.section srsoa
+.section srsob
+.section srspa
+.section srspb
+.section srsqa
+.section srsqb
+.section srsra
+.section srsrb
+.section srssa
+.section srssb
+.section srsta
+.section srstb
+.section srsua
+.section srsub
+.section srsva
+.section srsvb
+.section srswa
+.section srswb
+.section srsxa
+.section srsxb
+.section srsya
+.section srsyb
+.section srsza
+.section srszb
+.section srs1a
+.section srs1b
+.section srs2a
+.section srs2b
+.section srs3a
+.section srs3b
+.section srs4a
+.section srs4b
+.section srs5a
+.section srs5b
+.section srs6a
+.section srs6b
+.section srs7a
+.section srs7b
+.section srs8a
+.section srs8b
+.section srs9a
+.section srs9b
+.section srs0a
+.section srs0b
+.section srtaa
+.section srtab
+.section srtba
+.section srtbb
+.section srtca
+.section srtcb
+.section srtda
+.section srtdb
+.section srtea
+.section srteb
+.section srtfa
+.section srtfb
+.section srtga
+.section srtgb
+.section srtha
+.section srthb
+.section srtia
+.section srtib
+.section srtja
+.section srtjb
+.section srtka
+.section srtkb
+.section srtla
+.section srtlb
+.section srtma
+.section srtmb
+.section srtna
+.section srtnb
+.section srtoa
+.section srtob
+.section srtpa
+.section srtpb
+.section srtqa
+.section srtqb
+.section srtra
+.section srtrb
+.section srtsa
+.section srtsb
+.section srtta
+.section srttb
+.section srtua
+.section srtub
+.section srtva
+.section srtvb
+.section srtwa
+.section srtwb
+.section srtxa
+.section srtxb
+.section srtya
+.section srtyb
+.section srtza
+.section srtzb
+.section srt1a
+.section srt1b
+.section srt2a
+.section srt2b
+.section srt3a
+.section srt3b
+.section srt4a
+.section srt4b
+.section srt5a
+.section srt5b
+.section srt6a
+.section srt6b
+.section srt7a
+.section srt7b
+.section srt8a
+.section srt8b
+.section srt9a
+.section srt9b
+.section srt0a
+.section srt0b
+.section sruaa
+.section sruab
+.section sruba
+.section srubb
+.section sruca
+.section srucb
+.section sruda
+.section srudb
+.section sruea
+.section srueb
+.section srufa
+.section srufb
+.section sruga
+.section srugb
+.section sruha
+.section sruhb
+.section sruia
+.section sruib
+.section sruja
+.section srujb
+.section sruka
+.section srukb
+.section srula
+.section srulb
+.section sruma
+.section srumb
+.section sruna
+.section srunb
+.section sruoa
+.section sruob
+.section srupa
+.section srupb
+.section sruqa
+.section sruqb
+.section srura
+.section srurb
+.section srusa
+.section srusb
+.section sruta
+.section srutb
+.section sruua
+.section sruub
+.section sruva
+.section sruvb
+.section sruwa
+.section sruwb
+.section sruxa
+.section sruxb
+.section sruya
+.section sruyb
+.section sruza
+.section sruzb
+.section sru1a
+.section sru1b
+.section sru2a
+.section sru2b
+.section sru3a
+.section sru3b
+.section sru4a
+.section sru4b
+.section sru5a
+.section sru5b
+.section sru6a
+.section sru6b
+.section sru7a
+.section sru7b
+.section sru8a
+.section sru8b
+.section sru9a
+.section sru9b
+.section sru0a
+.section sru0b
+.section srvaa
+.section srvab
+.section srvba
+.section srvbb
+.section srvca
+.section srvcb
+.section srvda
+.section srvdb
+.section srvea
+.section srveb
+.section srvfa
+.section srvfb
+.section srvga
+.section srvgb
+.section srvha
+.section srvhb
+.section srvia
+.section srvib
+.section srvja
+.section srvjb
+.section srvka
+.section srvkb
+.section srvla
+.section srvlb
+.section srvma
+.section srvmb
+.section srvna
+.section srvnb
+.section srvoa
+.section srvob
+.section srvpa
+.section srvpb
+.section srvqa
+.section srvqb
+.section srvra
+.section srvrb
+.section srvsa
+.section srvsb
+.section srvta
+.section srvtb
+.section srvua
+.section srvub
+.section srvva
+.section srvvb
+.section srvwa
+.section srvwb
+.section srvxa
+.section srvxb
+.section srvya
+.section srvyb
+.section srvza
+.section srvzb
+.section srv1a
+.section srv1b
+.section srv2a
+.section srv2b
+.section srv3a
+.section srv3b
+.section srv4a
+.section srv4b
+.section srv5a
+.section srv5b
+.section srv6a
+.section srv6b
+.section srv7a
+.section srv7b
+.section srv8a
+.section srv8b
+.section srv9a
+.section srv9b
+.section srv0a
+.section srv0b
+.section srwaa
+.section srwab
+.section srwba
+.section srwbb
+.section srwca
+.section srwcb
+.section srwda
+.section srwdb
+.section srwea
+.section srweb
+.section srwfa
+.section srwfb
+.section srwga
+.section srwgb
+.section srwha
+.section srwhb
+.section srwia
+.section srwib
+.section srwja
+.section srwjb
+.section srwka
+.section srwkb
+.section srwla
+.section srwlb
+.section srwma
+.section srwmb
+.section srwna
+.section srwnb
+.section srwoa
+.section srwob
+.section srwpa
+.section srwpb
+.section srwqa
+.section srwqb
+.section srwra
+.section srwrb
+.section srwsa
+.section srwsb
+.section srwta
+.section srwtb
+.section srwua
+.section srwub
+.section srwva
+.section srwvb
+.section srwwa
+.section srwwb
+.section srwxa
+.section srwxb
+.section srwya
+.section srwyb
+.section srwza
+.section srwzb
+.section srw1a
+.section srw1b
+.section srw2a
+.section srw2b
+.section srw3a
+.section srw3b
+.section srw4a
+.section srw4b
+.section srw5a
+.section srw5b
+.section srw6a
+.section srw6b
+.section srw7a
+.section srw7b
+.section srw8a
+.section srw8b
+.section srw9a
+.section srw9b
+.section srw0a
+.section srw0b
+.section srxaa
+.section srxab
+.section srxba
+.section srxbb
+.section srxca
+.section srxcb
+.section srxda
+.section srxdb
+.section srxea
+.section srxeb
+.section srxfa
+.section srxfb
+.section srxga
+.section srxgb
+.section srxha
+.section srxhb
+.section srxia
+.section srxib
+.section srxja
+.section srxjb
+.section srxka
+.section srxkb
+.section srxla
+.section srxlb
+.section srxma
+.section srxmb
+.section srxna
+.section srxnb
+.section srxoa
+.section srxob
+.section srxpa
+.section srxpb
+.section srxqa
+.section srxqb
+.section srxra
+.section srxrb
+.section srxsa
+.section srxsb
+.section srxta
+.section srxtb
+.section srxua
+.section srxub
+.section srxva
+.section srxvb
+.section srxwa
+.section srxwb
+.section srxxa
+.section srxxb
+.section srxya
+.section srxyb
+.section srxza
+.section srxzb
+.section srx1a
+.section srx1b
+.section srx2a
+.section srx2b
+.section srx3a
+.section srx3b
+.section srx4a
+.section srx4b
+.section srx5a
+.section srx5b
+.section srx6a
+.section srx6b
+.section srx7a
+.section srx7b
+.section srx8a
+.section srx8b
+.section srx9a
+.section srx9b
+.section srx0a
+.section srx0b
+.section sryaa
+.section sryab
+.section sryba
+.section srybb
+.section sryca
+.section srycb
+.section sryda
+.section srydb
+.section sryea
+.section sryeb
+.section sryfa
+.section sryfb
+.section sryga
+.section srygb
+.section sryha
+.section sryhb
+.section sryia
+.section sryib
+.section sryja
+.section sryjb
+.section sryka
+.section srykb
+.section sryla
+.section srylb
+.section sryma
+.section srymb
+.section sryna
+.section srynb
+.section sryoa
+.section sryob
+.section srypa
+.section srypb
+.section sryqa
+.section sryqb
+.section sryra
+.section sryrb
+.section srysa
+.section srysb
+.section sryta
+.section srytb
+.section sryua
+.section sryub
+.section sryva
+.section sryvb
+.section srywa
+.section srywb
+.section sryxa
+.section sryxb
+.section sryya
+.section sryyb
+.section sryza
+.section sryzb
+.section sry1a
+.section sry1b
+.section sry2a
+.section sry2b
+.section sry3a
+.section sry3b
+.section sry4a
+.section sry4b
+.section sry5a
+.section sry5b
+.section sry6a
+.section sry6b
+.section sry7a
+.section sry7b
+.section sry8a
+.section sry8b
+.section sry9a
+.section sry9b
+.section sry0a
+.section sry0b
+.section srzaa
+.section srzab
+.section srzba
+.section srzbb
+.section srzca
+.section srzcb
+.section srzda
+.section srzdb
+.section srzea
+.section srzeb
+.section srzfa
+.section srzfb
+.section srzga
+.section srzgb
+.section srzha
+.section srzhb
+.section srzia
+.section srzib
+.section srzja
+.section srzjb
+.section srzka
+.section srzkb
+.section srzla
+.section srzlb
+.section srzma
+.section srzmb
+.section srzna
+.section srznb
+.section srzoa
+.section srzob
+.section srzpa
+.section srzpb
+.section srzqa
+.section srzqb
+.section srzra
+.section srzrb
+.section srzsa
+.section srzsb
+.section srzta
+.section srztb
+.section srzua
+.section srzub
+.section srzva
+.section srzvb
+.section srzwa
+.section srzwb
+.section srzxa
+.section srzxb
+.section srzya
+.section srzyb
+.section srzza
+.section srzzb
+.section srz1a
+.section srz1b
+.section srz2a
+.section srz2b
+.section srz3a
+.section srz3b
+.section srz4a
+.section srz4b
+.section srz5a
+.section srz5b
+.section srz6a
+.section srz6b
+.section srz7a
+.section srz7b
+.section srz8a
+.section srz8b
+.section srz9a
+.section srz9b
+.section srz0a
+.section srz0b
+.section sr1aa
+.section sr1ab
+.section sr1ba
+.section sr1bb
+.section sr1ca
+.section sr1cb
+.section sr1da
+.section sr1db
+.section sr1ea
+.section sr1eb
+.section sr1fa
+.section sr1fb
+.section sr1ga
+.section sr1gb
+.section sr1ha
+.section sr1hb
+.section sr1ia
+.section sr1ib
+.section sr1ja
+.section sr1jb
+.section sr1ka
+.section sr1kb
+.section sr1la
+.section sr1lb
+.section sr1ma
+.section sr1mb
+.section sr1na
+.section sr1nb
+.section sr1oa
+.section sr1ob
+.section sr1pa
+.section sr1pb
+.section sr1qa
+.section sr1qb
+.section sr1ra
+.section sr1rb
+.section sr1sa
+.section sr1sb
+.section sr1ta
+.section sr1tb
+.section sr1ua
+.section sr1ub
+.section sr1va
+.section sr1vb
+.section sr1wa
+.section sr1wb
+.section sr1xa
+.section sr1xb
+.section sr1ya
+.section sr1yb
+.section sr1za
+.section sr1zb
+.section sr11a
+.section sr11b
+.section sr12a
+.section sr12b
+.section sr13a
+.section sr13b
+.section sr14a
+.section sr14b
+.section sr15a
+.section sr15b
+.section sr16a
+.section sr16b
+.section sr17a
+.section sr17b
+.section sr18a
+.section sr18b
+.section sr19a
+.section sr19b
+.section sr10a
+.section sr10b
+.section sr2aa
+.section sr2ab
+.section sr2ba
+.section sr2bb
+.section sr2ca
+.section sr2cb
+.section sr2da
+.section sr2db
+.section sr2ea
+.section sr2eb
+.section sr2fa
+.section sr2fb
+.section sr2ga
+.section sr2gb
+.section sr2ha
+.section sr2hb
+.section sr2ia
+.section sr2ib
+.section sr2ja
+.section sr2jb
+.section sr2ka
+.section sr2kb
+.section sr2la
+.section sr2lb
+.section sr2ma
+.section sr2mb
+.section sr2na
+.section sr2nb
+.section sr2oa
+.section sr2ob
+.section sr2pa
+.section sr2pb
+.section sr2qa
+.section sr2qb
+.section sr2ra
+.section sr2rb
+.section sr2sa
+.section sr2sb
+.section sr2ta
+.section sr2tb
+.section sr2ua
+.section sr2ub
+.section sr2va
+.section sr2vb
+.section sr2wa
+.section sr2wb
+.section sr2xa
+.section sr2xb
+.section sr2ya
+.section sr2yb
+.section sr2za
+.section sr2zb
+.section sr21a
+.section sr21b
+.section sr22a
+.section sr22b
+.section sr23a
+.section sr23b
+.section sr24a
+.section sr24b
+.section sr25a
+.section sr25b
+.section sr26a
+.section sr26b
+.section sr27a
+.section sr27b
+.section sr28a
+.section sr28b
+.section sr29a
+.section sr29b
+.section sr20a
+.section sr20b
+.section sr3aa
+.section sr3ab
+.section sr3ba
+.section sr3bb
+.section sr3ca
+.section sr3cb
+.section sr3da
+.section sr3db
+.section sr3ea
+.section sr3eb
+.section sr3fa
+.section sr3fb
+.section sr3ga
+.section sr3gb
+.section sr3ha
+.section sr3hb
+.section sr3ia
+.section sr3ib
+.section sr3ja
+.section sr3jb
+.section sr3ka
+.section sr3kb
+.section sr3la
+.section sr3lb
+.section sr3ma
+.section sr3mb
+.section sr3na
+.section sr3nb
+.section sr3oa
+.section sr3ob
+.section sr3pa
+.section sr3pb
+.section sr3qa
+.section sr3qb
+.section sr3ra
+.section sr3rb
+.section sr3sa
+.section sr3sb
+.section sr3ta
+.section sr3tb
+.section sr3ua
+.section sr3ub
+.section sr3va
+.section sr3vb
+.section sr3wa
+.section sr3wb
+.section sr3xa
+.section sr3xb
+.section sr3ya
+.section sr3yb
+.section sr3za
+.section sr3zb
+.section sr31a
+.section sr31b
+.section sr32a
+.section sr32b
+.section sr33a
+.section sr33b
+.section sr34a
+.section sr34b
+.section sr35a
+.section sr35b
+.section sr36a
+.section sr36b
+.section sr37a
+.section sr37b
+.section sr38a
+.section sr38b
+.section sr39a
+.section sr39b
+.section sr30a
+.section sr30b
+.section sr4aa
+.section sr4ab
+.section sr4ba
+.section sr4bb
+.section sr4ca
+.section sr4cb
+.section sr4da
+.section sr4db
+.section sr4ea
+.section sr4eb
+.section sr4fa
+.section sr4fb
+.section sr4ga
+.section sr4gb
+.section sr4ha
+.section sr4hb
+.section sr4ia
+.section sr4ib
+.section sr4ja
+.section sr4jb
+.section sr4ka
+.section sr4kb
+.section sr4la
+.section sr4lb
+.section sr4ma
+.section sr4mb
+.section sr4na
+.section sr4nb
+.section sr4oa
+.section sr4ob
+.section sr4pa
+.section sr4pb
+.section sr4qa
+.section sr4qb
+.section sr4ra
+.section sr4rb
+.section sr4sa
+.section sr4sb
+.section sr4ta
+.section sr4tb
+.section sr4ua
+.section sr4ub
+.section sr4va
+.section sr4vb
+.section sr4wa
+.section sr4wb
+.section sr4xa
+.section sr4xb
+.section sr4ya
+.section sr4yb
+.section sr4za
+.section sr4zb
+.section sr41a
+.section sr41b
+.section sr42a
+.section sr42b
+.section sr43a
+.section sr43b
+.section sr44a
+.section sr44b
+.section sr45a
+.section sr45b
+.section sr46a
+.section sr46b
+.section sr47a
+.section sr47b
+.section sr48a
+.section sr48b
+.section sr49a
+.section sr49b
+.section sr40a
+.section sr40b
+.section sr5aa
+.section sr5ab
+.section sr5ba
+.section sr5bb
+.section sr5ca
+.section sr5cb
+.section sr5da
+.section sr5db
+.section sr5ea
+.section sr5eb
+.section sr5fa
+.section sr5fb
+.section sr5ga
+.section sr5gb
+.section sr5ha
+.section sr5hb
+.section sr5ia
+.section sr5ib
+.section sr5ja
+.section sr5jb
+.section sr5ka
+.section sr5kb
+.section sr5la
+.section sr5lb
+.section sr5ma
+.section sr5mb
+.section sr5na
+.section sr5nb
+.section sr5oa
+.section sr5ob
+.section sr5pa
+.section sr5pb
+.section sr5qa
+.section sr5qb
+.section sr5ra
+.section sr5rb
+.section sr5sa
+.section sr5sb
+.section sr5ta
+.section sr5tb
+.section sr5ua
+.section sr5ub
+.section sr5va
+.section sr5vb
+.section sr5wa
+.section sr5wb
+.section sr5xa
+.section sr5xb
+.section sr5ya
+.section sr5yb
+.section sr5za
+.section sr5zb
+.section sr51a
+.section sr51b
+.section sr52a
+.section sr52b
+.section sr53a
+.section sr53b
+.section sr54a
+.section sr54b
+.section sr55a
+.section sr55b
+.section sr56a
+.section sr56b
+.section sr57a
+.section sr57b
+.section sr58a
+.section sr58b
+.section sr59a
+.section sr59b
+.section sr50a
+.section sr50b
+.section sr6aa
+.section sr6ab
+.section sr6ba
+.section sr6bb
+.section sr6ca
+.section sr6cb
+.section sr6da
+.section sr6db
+.section sr6ea
+.section sr6eb
+.section sr6fa
+.section sr6fb
+.section sr6ga
+.section sr6gb
+.section sr6ha
+.section sr6hb
+.section sr6ia
+.section sr6ib
+.section sr6ja
+.section sr6jb
+.section sr6ka
+.section sr6kb
+.section sr6la
+.section sr6lb
+.section sr6ma
+.section sr6mb
+.section sr6na
+.section sr6nb
+.section sr6oa
+.section sr6ob
+.section sr6pa
+.section sr6pb
+.section sr6qa
+.section sr6qb
+.section sr6ra
+.section sr6rb
+.section sr6sa
+.section sr6sb
+.section sr6ta
+.section sr6tb
+.section sr6ua
+.section sr6ub
+.section sr6va
+.section sr6vb
+.section sr6wa
+.section sr6wb
+.section sr6xa
+.section sr6xb
+.section sr6ya
+.section sr6yb
+.section sr6za
+.section sr6zb
+.section sr61a
+.section sr61b
+.section sr62a
+.section sr62b
+.section sr63a
+.section sr63b
+.section sr64a
+.section sr64b
+.section sr65a
+.section sr65b
+.section sr66a
+.section sr66b
+.section sr67a
+.section sr67b
+.section sr68a
+.section sr68b
+.section sr69a
+.section sr69b
+.section sr60a
+.section sr60b
+.section sr7aa
+.section sr7ab
+.section sr7ba
+.section sr7bb
+.section sr7ca
+.section sr7cb
+.section sr7da
+.section sr7db
+.section sr7ea
+.section sr7eb
+.section sr7fa
+.section sr7fb
+.section sr7ga
+.section sr7gb
+.section sr7ha
+.section sr7hb
+.section sr7ia
+.section sr7ib
+.section sr7ja
+.section sr7jb
+.section sr7ka
+.section sr7kb
+.section sr7la
+.section sr7lb
+.section sr7ma
+.section sr7mb
+.section sr7na
+.section sr7nb
+.section sr7oa
+.section sr7ob
+.section sr7pa
+.section sr7pb
+.section sr7qa
+.section sr7qb
+.section sr7ra
+.section sr7rb
+.section sr7sa
+.section sr7sb
+.section sr7ta
+.section sr7tb
+.section sr7ua
+.section sr7ub
+.section sr7va
+.section sr7vb
+.section sr7wa
+.section sr7wb
+.section sr7xa
+.section sr7xb
+.section sr7ya
+.section sr7yb
+.section sr7za
+.section sr7zb
+.section sr71a
+.section sr71b
+.section sr72a
+.section sr72b
+.section sr73a
+.section sr73b
+.section sr74a
+.section sr74b
+.section sr75a
+.section sr75b
+.section sr76a
+.section sr76b
+.section sr77a
+.section sr77b
+.section sr78a
+.section sr78b
+.section sr79a
+.section sr79b
+.section sr70a
+.section sr70b
+.section sr8aa
+.section sr8ab
+.section sr8ba
+.section sr8bb
+.section sr8ca
+.section sr8cb
+.section sr8da
+.section sr8db
+.section sr8ea
+.section sr8eb
+.section sr8fa
+.section sr8fb
+.section sr8ga
+.section sr8gb
+.section sr8ha
+.section sr8hb
+.section sr8ia
+.section sr8ib
+.section sr8ja
+.section sr8jb
+.section sr8ka
+.section sr8kb
+.section sr8la
+.section sr8lb
+.section sr8ma
+.section sr8mb
+.section sr8na
+.section sr8nb
+.section sr8oa
+.section sr8ob
+.section sr8pa
+.section sr8pb
+.section sr8qa
+.section sr8qb
+.section sr8ra
+.section sr8rb
+.section sr8sa
+.section sr8sb
+.section sr8ta
+.section sr8tb
+.section sr8ua
+.section sr8ub
+.section sr8va
+.section sr8vb
+.section sr8wa
+.section sr8wb
+.section sr8xa
+.section sr8xb
+.section sr8ya
+.section sr8yb
+.section sr8za
+.section sr8zb
+.section sr81a
+.section sr81b
+.section sr82a
+.section sr82b
+.section sr83a
+.section sr83b
+.section sr84a
+.section sr84b
+.section sr85a
+.section sr85b
+.section sr86a
+.section sr86b
+.section sr87a
+.section sr87b
+.section sr88a
+.section sr88b
+.section sr89a
+.section sr89b
+.section sr80a
+.section sr80b
+.section sr9aa
+.section sr9ab
+.section sr9ba
+.section sr9bb
+.section sr9ca
+.section sr9cb
+.section sr9da
+.section sr9db
+.section sr9ea
+.section sr9eb
+.section sr9fa
+.section sr9fb
+.section sr9ga
+.section sr9gb
+.section sr9ha
+.section sr9hb
+.section sr9ia
+.section sr9ib
+.section sr9ja
+.section sr9jb
+.section sr9ka
+.section sr9kb
+.section sr9la
+.section sr9lb
+.section sr9ma
+.section sr9mb
+.section sr9na
+.section sr9nb
+.section sr9oa
+.section sr9ob
+.section sr9pa
+.section sr9pb
+.section sr9qa
+.section sr9qb
+.section sr9ra
+.section sr9rb
+.section sr9sa
+.section sr9sb
+.section sr9ta
+.section sr9tb
+.section sr9ua
+.section sr9ub
+.section sr9va
+.section sr9vb
+.section sr9wa
+.section sr9wb
+.section sr9xa
+.section sr9xb
+.section sr9ya
+.section sr9yb
+.section sr9za
+.section sr9zb
+.section sr91a
+.section sr91b
+.section sr92a
+.section sr92b
+.section sr93a
+.section sr93b
+.section sr94a
+.section sr94b
+.section sr95a
+.section sr95b
+.section sr96a
+.section sr96b
+.section sr97a
+.section sr97b
+.section sr98a
+.section sr98b
+.section sr99a
+.section sr99b
+.section sr90a
+.section sr90b
+.section sr0aa
+.section sr0ab
+.section sr0ba
+.section sr0bb
+.section sr0ca
+.section sr0cb
+.section sr0da
+.section sr0db
+.section sr0ea
+.section sr0eb
+.section sr0fa
+.section sr0fb
+.section sr0ga
+.section sr0gb
+.section sr0ha
+.section sr0hb
+.section sr0ia
+.section sr0ib
+.section sr0ja
+.section sr0jb
+.section sr0ka
+.section sr0kb
+.section sr0la
+.section sr0lb
+.section sr0ma
+.section sr0mb
+.section sr0na
+.section sr0nb
+.section sr0oa
+.section sr0ob
+.section sr0pa
+.section sr0pb
+.section sr0qa
+.section sr0qb
+.section sr0ra
+.section sr0rb
+.section sr0sa
+.section sr0sb
+.section sr0ta
+.section sr0tb
+.section sr0ua
+.section sr0ub
+.section sr0va
+.section sr0vb
+.section sr0wa
+.section sr0wb
+.section sr0xa
+.section sr0xb
+.section sr0ya
+.section sr0yb
+.section sr0za
+.section sr0zb
+.section sr01a
+.section sr01b
+.section sr02a
+.section sr02b
+.section sr03a
+.section sr03b
+.section sr04a
+.section sr04b
+.section sr05a
+.section sr05b
+.section sr06a
+.section sr06b
+.section sr07a
+.section sr07b
+.section sr08a
+.section sr08b
+.section sr09a
+.section sr09b
+.section sr00a
+.section sr00b
+.section ssaaa
+.section ssaab
+.section ssaba
+.section ssabb
+.section ssaca
+.section ssacb
+.section ssada
+.section ssadb
+.section ssaea
+.section ssaeb
+.section ssafa
+.section ssafb
+.section ssaga
+.section ssagb
+.section ssaha
+.section ssahb
+.section ssaia
+.section ssaib
+.section ssaja
+.section ssajb
+.section ssaka
+.section ssakb
+.section ssala
+.section ssalb
+.section ssama
+.section ssamb
+.section ssana
+.section ssanb
+.section ssaoa
+.section ssaob
+.section ssapa
+.section ssapb
+.section ssaqa
+.section ssaqb
+.section ssara
+.section ssarb
+.section ssasa
+.section ssasb
+.section ssata
+.section ssatb
+.section ssaua
+.section ssaub
+.section ssava
+.section ssavb
+.section ssawa
+.section ssawb
+.section ssaxa
+.section ssaxb
+.section ssaya
+.section ssayb
+.section ssaza
+.section ssazb
+.section ssa1a
+.section ssa1b
+.section ssa2a
+.section ssa2b
+.section ssa3a
+.section ssa3b
+.section ssa4a
+.section ssa4b
+.section ssa5a
+.section ssa5b
+.section ssa6a
+.section ssa6b
+.section ssa7a
+.section ssa7b
+.section ssa8a
+.section ssa8b
+.section ssa9a
+.section ssa9b
+.section ssa0a
+.section ssa0b
+.section ssbaa
+.section ssbab
+.section ssbba
+.section ssbbb
+.section ssbca
+.section ssbcb
+.section ssbda
+.section ssbdb
+.section ssbea
+.section ssbeb
+.section ssbfa
+.section ssbfb
+.section ssbga
+.section ssbgb
+.section ssbha
+.section ssbhb
+.section ssbia
+.section ssbib
+.section ssbja
+.section ssbjb
+.section ssbka
+.section ssbkb
+.section ssbla
+.section ssblb
+.section ssbma
+.section ssbmb
+.section ssbna
+.section ssbnb
+.section ssboa
+.section ssbob
+.section ssbpa
+.section ssbpb
+.section ssbqa
+.section ssbqb
+.section ssbra
+.section ssbrb
+.section ssbsa
+.section ssbsb
+.section ssbta
+.section ssbtb
+.section ssbua
+.section ssbub
+.section ssbva
+.section ssbvb
+.section ssbwa
+.section ssbwb
+.section ssbxa
+.section ssbxb
+.section ssbya
+.section ssbyb
+.section ssbza
+.section ssbzb
+.section ssb1a
+.section ssb1b
+.section ssb2a
+.section ssb2b
+.section ssb3a
+.section ssb3b
+.section ssb4a
+.section ssb4b
+.section ssb5a
+.section ssb5b
+.section ssb6a
+.section ssb6b
+.section ssb7a
+.section ssb7b
+.section ssb8a
+.section ssb8b
+.section ssb9a
+.section ssb9b
+.section ssb0a
+.section ssb0b
+.section sscaa
+.section sscab
+.section sscba
+.section sscbb
+.section sscca
+.section ssccb
+.section sscda
+.section sscdb
+.section sscea
+.section ssceb
+.section sscfa
+.section sscfb
+.section sscga
+.section sscgb
+.section sscha
+.section sschb
+.section sscia
+.section sscib
+.section sscja
+.section sscjb
+.section sscka
+.section ssckb
+.section sscla
+.section ssclb
+.section sscma
+.section sscmb
+.section sscna
+.section sscnb
+.section sscoa
+.section sscob
+.section sscpa
+.section sscpb
+.section sscqa
+.section sscqb
+.section sscra
+.section sscrb
+.section sscsa
+.section sscsb
+.section sscta
+.section ssctb
+.section sscua
+.section sscub
+.section sscva
+.section sscvb
+.section sscwa
+.section sscwb
+.section sscxa
+.section sscxb
+.section sscya
+.section sscyb
+.section sscza
+.section ssczb
+.section ssc1a
+.section ssc1b
+.section ssc2a
+.section ssc2b
+.section ssc3a
+.section ssc3b
+.section ssc4a
+.section ssc4b
+.section ssc5a
+.section ssc5b
+.section ssc6a
+.section ssc6b
+.section ssc7a
+.section ssc7b
+.section ssc8a
+.section ssc8b
+.section ssc9a
+.section ssc9b
+.section ssc0a
+.section ssc0b
+.section ssdaa
+.section ssdab
+.section ssdba
+.section ssdbb
+.section ssdca
+.section ssdcb
+.section ssdda
+.section ssddb
+.section ssdea
+.section ssdeb
+.section ssdfa
+.section ssdfb
+.section ssdga
+.section ssdgb
+.section ssdha
+.section ssdhb
+.section ssdia
+.section ssdib
+.section ssdja
+.section ssdjb
+.section ssdka
+.section ssdkb
+.section ssdla
+.section ssdlb
+.section ssdma
+.section ssdmb
+.section ssdna
+.section ssdnb
+.section ssdoa
+.section ssdob
+.section ssdpa
+.section ssdpb
+.section ssdqa
+.section ssdqb
+.section ssdra
+.section ssdrb
+.section ssdsa
+.section ssdsb
+.section ssdta
+.section ssdtb
+.section ssdua
+.section ssdub
+.section ssdva
+.section ssdvb
+.section ssdwa
+.section ssdwb
+.section ssdxa
+.section ssdxb
+.section ssdya
+.section ssdyb
+.section ssdza
+.section ssdzb
+.section ssd1a
+.section ssd1b
+.section ssd2a
+.section ssd2b
+.section ssd3a
+.section ssd3b
+.section ssd4a
+.section ssd4b
+.section ssd5a
+.section ssd5b
+.section ssd6a
+.section ssd6b
+.section ssd7a
+.section ssd7b
+.section ssd8a
+.section ssd8b
+.section ssd9a
+.section ssd9b
+.section ssd0a
+.section ssd0b
+.section sseaa
+.section sseab
+.section sseba
+.section ssebb
+.section sseca
+.section ssecb
+.section sseda
+.section ssedb
+.section sseea
+.section sseeb
+.section ssefa
+.section ssefb
+.section ssega
+.section ssegb
+.section sseha
+.section ssehb
+.section sseia
+.section sseib
+.section sseja
+.section ssejb
+.section sseka
+.section ssekb
+.section ssela
+.section sselb
+.section ssema
+.section ssemb
+.section ssena
+.section ssenb
+.section sseoa
+.section sseob
+.section ssepa
+.section ssepb
+.section sseqa
+.section sseqb
+.section ssera
+.section sserb
+.section ssesa
+.section ssesb
+.section sseta
+.section ssetb
+.section sseua
+.section sseub
+.section sseva
+.section ssevb
+.section ssewa
+.section ssewb
+.section ssexa
+.section ssexb
+.section sseya
+.section sseyb
+.section sseza
+.section ssezb
+.section sse1a
+.section sse1b
+.section sse2a
+.section sse2b
+.section sse3a
+.section sse3b
+.section sse4a
+.section sse4b
+.section sse5a
+.section sse5b
+.section sse6a
+.section sse6b
+.section sse7a
+.section sse7b
+.section sse8a
+.section sse8b
+.section sse9a
+.section sse9b
+.section sse0a
+.section sse0b
+.section ssfaa
+.section ssfab
+.section ssfba
+.section ssfbb
+.section ssfca
+.section ssfcb
+.section ssfda
+.section ssfdb
+.section ssfea
+.section ssfeb
+.section ssffa
+.section ssffb
+.section ssfga
+.section ssfgb
+.section ssfha
+.section ssfhb
+.section ssfia
+.section ssfib
+.section ssfja
+.section ssfjb
+.section ssfka
+.section ssfkb
+.section ssfla
+.section ssflb
+.section ssfma
+.section ssfmb
+.section ssfna
+.section ssfnb
+.section ssfoa
+.section ssfob
+.section ssfpa
+.section ssfpb
+.section ssfqa
+.section ssfqb
+.section ssfra
+.section ssfrb
+.section ssfsa
+.section ssfsb
+.section ssfta
+.section ssftb
+.section ssfua
+.section ssfub
+.section ssfva
+.section ssfvb
+.section ssfwa
+.section ssfwb
+.section ssfxa
+.section ssfxb
+.section ssfya
+.section ssfyb
+.section ssfza
+.section ssfzb
+.section ssf1a
+.section ssf1b
+.section ssf2a
+.section ssf2b
+.section ssf3a
+.section ssf3b
+.section ssf4a
+.section ssf4b
+.section ssf5a
+.section ssf5b
+.section ssf6a
+.section ssf6b
+.section ssf7a
+.section ssf7b
+.section ssf8a
+.section ssf8b
+.section ssf9a
+.section ssf9b
+.section ssf0a
+.section ssf0b
+.section ssgaa
+.section ssgab
+.section ssgba
+.section ssgbb
+.section ssgca
+.section ssgcb
+.section ssgda
+.section ssgdb
+.section ssgea
+.section ssgeb
+.section ssgfa
+.section ssgfb
+.section ssgga
+.section ssggb
+.section ssgha
+.section ssghb
+.section ssgia
+.section ssgib
+.section ssgja
+.section ssgjb
+.section ssgka
+.section ssgkb
+.section ssgla
+.section ssglb
+.section ssgma
+.section ssgmb
+.section ssgna
+.section ssgnb
+.section ssgoa
+.section ssgob
+.section ssgpa
+.section ssgpb
+.section ssgqa
+.section ssgqb
+.section ssgra
+.section ssgrb
+.section ssgsa
+.section ssgsb
+.section ssgta
+.section ssgtb
+.section ssgua
+.section ssgub
+.section ssgva
+.section ssgvb
+.section ssgwa
+.section ssgwb
+.section ssgxa
+.section ssgxb
+.section ssgya
+.section ssgyb
+.section ssgza
+.section ssgzb
+.section ssg1a
+.section ssg1b
+.section ssg2a
+.section ssg2b
+.section ssg3a
+.section ssg3b
+.section ssg4a
+.section ssg4b
+.section ssg5a
+.section ssg5b
+.section ssg6a
+.section ssg6b
+.section ssg7a
+.section ssg7b
+.section ssg8a
+.section ssg8b
+.section ssg9a
+.section ssg9b
+.section ssg0a
+.section ssg0b
+.section sshaa
+.section sshab
+.section sshba
+.section sshbb
+.section sshca
+.section sshcb
+.section sshda
+.section sshdb
+.section sshea
+.section ssheb
+.section sshfa
+.section sshfb
+.section sshga
+.section sshgb
+.section sshha
+.section sshhb
+.section sshia
+.section sshib
+.section sshja
+.section sshjb
+.section sshka
+.section sshkb
+.section sshla
+.section sshlb
+.section sshma
+.section sshmb
+.section sshna
+.section sshnb
+.section sshoa
+.section sshob
+.section sshpa
+.section sshpb
+.section sshqa
+.section sshqb
+.section sshra
+.section sshrb
+.section sshsa
+.section sshsb
+.section sshta
+.section sshtb
+.section sshua
+.section sshub
+.section sshva
+.section sshvb
+.section sshwa
+.section sshwb
+.section sshxa
+.section sshxb
+.section sshya
+.section sshyb
+.section sshza
+.section sshzb
+.section ssh1a
+.section ssh1b
+.section ssh2a
+.section ssh2b
+.section ssh3a
+.section ssh3b
+.section ssh4a
+.section ssh4b
+.section ssh5a
+.section ssh5b
+.section ssh6a
+.section ssh6b
+.section ssh7a
+.section ssh7b
+.section ssh8a
+.section ssh8b
+.section ssh9a
+.section ssh9b
+.section ssh0a
+.section ssh0b
+.section ssiaa
+.section ssiab
+.section ssiba
+.section ssibb
+.section ssica
+.section ssicb
+.section ssida
+.section ssidb
+.section ssiea
+.section ssieb
+.section ssifa
+.section ssifb
+.section ssiga
+.section ssigb
+.section ssiha
+.section ssihb
+.section ssiia
+.section ssiib
+.section ssija
+.section ssijb
+.section ssika
+.section ssikb
+.section ssila
+.section ssilb
+.section ssima
+.section ssimb
+.section ssina
+.section ssinb
+.section ssioa
+.section ssiob
+.section ssipa
+.section ssipb
+.section ssiqa
+.section ssiqb
+.section ssira
+.section ssirb
+.section ssisa
+.section ssisb
+.section ssita
+.section ssitb
+.section ssiua
+.section ssiub
+.section ssiva
+.section ssivb
+.section ssiwa
+.section ssiwb
+.section ssixa
+.section ssixb
+.section ssiya
+.section ssiyb
+.section ssiza
+.section ssizb
+.section ssi1a
+.section ssi1b
+.section ssi2a
+.section ssi2b
+.section ssi3a
+.section ssi3b
+.section ssi4a
+.section ssi4b
+.section ssi5a
+.section ssi5b
+.section ssi6a
+.section ssi6b
+.section ssi7a
+.section ssi7b
+.section ssi8a
+.section ssi8b
+.section ssi9a
+.section ssi9b
+.section ssi0a
+.section ssi0b
+.section ssjaa
+.section ssjab
+.section ssjba
+.section ssjbb
+.section ssjca
+.section ssjcb
+.section ssjda
+.section ssjdb
+.section ssjea
+.section ssjeb
+.section ssjfa
+.section ssjfb
+.section ssjga
+.section ssjgb
+.section ssjha
+.section ssjhb
+.section ssjia
+.section ssjib
+.section ssjja
+.section ssjjb
+.section ssjka
+.section ssjkb
+.section ssjla
+.section ssjlb
+.section ssjma
+.section ssjmb
+.section ssjna
+.section ssjnb
+.section ssjoa
+.section ssjob
+.section ssjpa
+.section ssjpb
+.section ssjqa
+.section ssjqb
+.section ssjra
+.section ssjrb
+.section ssjsa
+.section ssjsb
+.section ssjta
+.section ssjtb
+.section ssjua
+.section ssjub
+.section ssjva
+.section ssjvb
+.section ssjwa
+.section ssjwb
+.section ssjxa
+.section ssjxb
+.section ssjya
+.section ssjyb
+.section ssjza
+.section ssjzb
+.section ssj1a
+.section ssj1b
+.section ssj2a
+.section ssj2b
+.section ssj3a
+.section ssj3b
+.section ssj4a
+.section ssj4b
+.section ssj5a
+.section ssj5b
+.section ssj6a
+.section ssj6b
+.section ssj7a
+.section ssj7b
+.section ssj8a
+.section ssj8b
+.section ssj9a
+.section ssj9b
+.section ssj0a
+.section ssj0b
+.section sskaa
+.section sskab
+.section sskba
+.section sskbb
+.section sskca
+.section sskcb
+.section sskda
+.section sskdb
+.section sskea
+.section sskeb
+.section sskfa
+.section sskfb
+.section sskga
+.section sskgb
+.section sskha
+.section sskhb
+.section sskia
+.section sskib
+.section sskja
+.section sskjb
+.section sskka
+.section sskkb
+.section sskla
+.section ssklb
+.section sskma
+.section sskmb
+.section sskna
+.section ssknb
+.section sskoa
+.section sskob
+.section sskpa
+.section sskpb
+.section sskqa
+.section sskqb
+.section sskra
+.section sskrb
+.section ssksa
+.section ssksb
+.section sskta
+.section ssktb
+.section sskua
+.section sskub
+.section sskva
+.section sskvb
+.section sskwa
+.section sskwb
+.section sskxa
+.section sskxb
+.section sskya
+.section sskyb
+.section sskza
+.section sskzb
+.section ssk1a
+.section ssk1b
+.section ssk2a
+.section ssk2b
+.section ssk3a
+.section ssk3b
+.section ssk4a
+.section ssk4b
+.section ssk5a
+.section ssk5b
+.section ssk6a
+.section ssk6b
+.section ssk7a
+.section ssk7b
+.section ssk8a
+.section ssk8b
+.section ssk9a
+.section ssk9b
+.section ssk0a
+.section ssk0b
+.section sslaa
+.section sslab
+.section sslba
+.section sslbb
+.section sslca
+.section sslcb
+.section sslda
+.section ssldb
+.section sslea
+.section ssleb
+.section sslfa
+.section sslfb
+.section sslga
+.section sslgb
+.section sslha
+.section sslhb
+.section sslia
+.section sslib
+.section sslja
+.section ssljb
+.section sslka
+.section sslkb
+.section sslla
+.section ssllb
+.section sslma
+.section sslmb
+.section sslna
+.section sslnb
+.section ssloa
+.section sslob
+.section sslpa
+.section sslpb
+.section sslqa
+.section sslqb
+.section sslra
+.section sslrb
+.section sslsa
+.section sslsb
+.section sslta
+.section ssltb
+.section sslua
+.section sslub
+.section sslva
+.section sslvb
+.section sslwa
+.section sslwb
+.section sslxa
+.section sslxb
+.section sslya
+.section sslyb
+.section sslza
+.section sslzb
+.section ssl1a
+.section ssl1b
+.section ssl2a
+.section ssl2b
+.section ssl3a
+.section ssl3b
+.section ssl4a
+.section ssl4b
+.section ssl5a
+.section ssl5b
+.section ssl6a
+.section ssl6b
+.section ssl7a
+.section ssl7b
+.section ssl8a
+.section ssl8b
+.section ssl9a
+.section ssl9b
+.section ssl0a
+.section ssl0b
+.section ssmaa
+.section ssmab
+.section ssmba
+.section ssmbb
+.section ssmca
+.section ssmcb
+.section ssmda
+.section ssmdb
+.section ssmea
+.section ssmeb
+.section ssmfa
+.section ssmfb
+.section ssmga
+.section ssmgb
+.section ssmha
+.section ssmhb
+.section ssmia
+.section ssmib
+.section ssmja
+.section ssmjb
+.section ssmka
+.section ssmkb
+.section ssmla
+.section ssmlb
+.section ssmma
+.section ssmmb
+.section ssmna
+.section ssmnb
+.section ssmoa
+.section ssmob
+.section ssmpa
+.section ssmpb
+.section ssmqa
+.section ssmqb
+.section ssmra
+.section ssmrb
+.section ssmsa
+.section ssmsb
+.section ssmta
+.section ssmtb
+.section ssmua
+.section ssmub
+.section ssmva
+.section ssmvb
+.section ssmwa
+.section ssmwb
+.section ssmxa
+.section ssmxb
+.section ssmya
+.section ssmyb
+.section ssmza
+.section ssmzb
+.section ssm1a
+.section ssm1b
+.section ssm2a
+.section ssm2b
+.section ssm3a
+.section ssm3b
+.section ssm4a
+.section ssm4b
+.section ssm5a
+.section ssm5b
+.section ssm6a
+.section ssm6b
+.section ssm7a
+.section ssm7b
+.section ssm8a
+.section ssm8b
+.section ssm9a
+.section ssm9b
+.section ssm0a
+.section ssm0b
+.section ssnaa
+.section ssnab
+.section ssnba
+.section ssnbb
+.section ssnca
+.section ssncb
+.section ssnda
+.section ssndb
+.section ssnea
+.section ssneb
+.section ssnfa
+.section ssnfb
+.section ssnga
+.section ssngb
+.section ssnha
+.section ssnhb
+.section ssnia
+.section ssnib
+.section ssnja
+.section ssnjb
+.section ssnka
+.section ssnkb
+.section ssnla
+.section ssnlb
+.section ssnma
+.section ssnmb
+.section ssnna
+.section ssnnb
+.section ssnoa
+.section ssnob
+.section ssnpa
+.section ssnpb
+.section ssnqa
+.section ssnqb
+.section ssnra
+.section ssnrb
+.section ssnsa
+.section ssnsb
+.section ssnta
+.section ssntb
+.section ssnua
+.section ssnub
+.section ssnva
+.section ssnvb
+.section ssnwa
+.section ssnwb
+.section ssnxa
+.section ssnxb
+.section ssnya
+.section ssnyb
+.section ssnza
+.section ssnzb
+.section ssn1a
+.section ssn1b
+.section ssn2a
+.section ssn2b
+.section ssn3a
+.section ssn3b
+.section ssn4a
+.section ssn4b
+.section ssn5a
+.section ssn5b
+.section ssn6a
+.section ssn6b
+.section ssn7a
+.section ssn7b
+.section ssn8a
+.section ssn8b
+.section ssn9a
+.section ssn9b
+.section ssn0a
+.section ssn0b
+.section ssoaa
+.section ssoab
+.section ssoba
+.section ssobb
+.section ssoca
+.section ssocb
+.section ssoda
+.section ssodb
+.section ssoea
+.section ssoeb
+.section ssofa
+.section ssofb
+.section ssoga
+.section ssogb
+.section ssoha
+.section ssohb
+.section ssoia
+.section ssoib
+.section ssoja
+.section ssojb
+.section ssoka
+.section ssokb
+.section ssola
+.section ssolb
+.section ssoma
+.section ssomb
+.section ssona
+.section ssonb
+.section ssooa
+.section ssoob
+.section ssopa
+.section ssopb
+.section ssoqa
+.section ssoqb
+.section ssora
+.section ssorb
+.section ssosa
+.section ssosb
+.section ssota
+.section ssotb
+.section ssoua
+.section ssoub
+.section ssova
+.section ssovb
+.section ssowa
+.section ssowb
+.section ssoxa
+.section ssoxb
+.section ssoya
+.section ssoyb
+.section ssoza
+.section ssozb
+.section sso1a
+.section sso1b
+.section sso2a
+.section sso2b
+.section sso3a
+.section sso3b
+.section sso4a
+.section sso4b
+.section sso5a
+.section sso5b
+.section sso6a
+.section sso6b
+.section sso7a
+.section sso7b
+.section sso8a
+.section sso8b
+.section sso9a
+.section sso9b
+.section sso0a
+.section sso0b
+.section sspaa
+.section sspab
+.section sspba
+.section sspbb
+.section sspca
+.section sspcb
+.section sspda
+.section sspdb
+.section sspea
+.section sspeb
+.section sspfa
+.section sspfb
+.section sspga
+.section sspgb
+.section sspha
+.section ssphb
+.section sspia
+.section sspib
+.section sspja
+.section sspjb
+.section sspka
+.section sspkb
+.section sspla
+.section ssplb
+.section sspma
+.section sspmb
+.section sspna
+.section sspnb
+.section sspoa
+.section sspob
+.section ssppa
+.section ssppb
+.section sspqa
+.section sspqb
+.section sspra
+.section ssprb
+.section sspsa
+.section sspsb
+.section sspta
+.section ssptb
+.section sspua
+.section sspub
+.section sspva
+.section sspvb
+.section sspwa
+.section sspwb
+.section sspxa
+.section sspxb
+.section sspya
+.section sspyb
+.section sspza
+.section sspzb
+.section ssp1a
+.section ssp1b
+.section ssp2a
+.section ssp2b
+.section ssp3a
+.section ssp3b
+.section ssp4a
+.section ssp4b
+.section ssp5a
+.section ssp5b
+.section ssp6a
+.section ssp6b
+.section ssp7a
+.section ssp7b
+.section ssp8a
+.section ssp8b
+.section ssp9a
+.section ssp9b
+.section ssp0a
+.section ssp0b
+.section ssqaa
+.section ssqab
+.section ssqba
+.section ssqbb
+.section ssqca
+.section ssqcb
+.section ssqda
+.section ssqdb
+.section ssqea
+.section ssqeb
+.section ssqfa
+.section ssqfb
+.section ssqga
+.section ssqgb
+.section ssqha
+.section ssqhb
+.section ssqia
+.section ssqib
+.section ssqja
+.section ssqjb
+.section ssqka
+.section ssqkb
+.section ssqla
+.section ssqlb
+.section ssqma
+.section ssqmb
+.section ssqna
+.section ssqnb
+.section ssqoa
+.section ssqob
+.section ssqpa
+.section ssqpb
+.section ssqqa
+.section ssqqb
+.section ssqra
+.section ssqrb
+.section ssqsa
+.section ssqsb
+.section ssqta
+.section ssqtb
+.section ssqua
+.section ssqub
+.section ssqva
+.section ssqvb
+.section ssqwa
+.section ssqwb
+.section ssqxa
+.section ssqxb
+.section ssqya
+.section ssqyb
+.section ssqza
+.section ssqzb
+.section ssq1a
+.section ssq1b
+.section ssq2a
+.section ssq2b
+.section ssq3a
+.section ssq3b
+.section ssq4a
+.section ssq4b
+.section ssq5a
+.section ssq5b
+.section ssq6a
+.section ssq6b
+.section ssq7a
+.section ssq7b
+.section ssq8a
+.section ssq8b
+.section ssq9a
+.section ssq9b
+.section ssq0a
+.section ssq0b
+.section ssraa
+.section ssrab
+.section ssrba
+.section ssrbb
+.section ssrca
+.section ssrcb
+.section ssrda
+.section ssrdb
+.section ssrea
+.section ssreb
+.section ssrfa
+.section ssrfb
+.section ssrga
+.section ssrgb
+.section ssrha
+.section ssrhb
+.section ssria
+.section ssrib
+.section ssrja
+.section ssrjb
+.section ssrka
+.section ssrkb
+.section ssrla
+.section ssrlb
+.section ssrma
+.section ssrmb
+.section ssrna
+.section ssrnb
+.section ssroa
+.section ssrob
+.section ssrpa
+.section ssrpb
+.section ssrqa
+.section ssrqb
+.section ssrra
+.section ssrrb
+.section ssrsa
+.section ssrsb
+.section ssrta
+.section ssrtb
+.section ssrua
+.section ssrub
+.section ssrva
+.section ssrvb
+.section ssrwa
+.section ssrwb
+.section ssrxa
+.section ssrxb
+.section ssrya
+.section ssryb
+.section ssrza
+.section ssrzb
+.section ssr1a
+.section ssr1b
+.section ssr2a
+.section ssr2b
+.section ssr3a
+.section ssr3b
+.section ssr4a
+.section ssr4b
+.section ssr5a
+.section ssr5b
+.section ssr6a
+.section ssr6b
+.section ssr7a
+.section ssr7b
+.section ssr8a
+.section ssr8b
+.section ssr9a
+.section ssr9b
+.section ssr0a
+.section ssr0b
+.section sssaa
+.section sssab
+.section sssba
+.section sssbb
+.section sssca
+.section ssscb
+.section sssda
+.section sssdb
+.section sssea
+.section ssseb
+.section sssfa
+.section sssfb
+.section sssga
+.section sssgb
+.section sssha
+.section ssshb
+.section sssia
+.section sssib
+.section sssja
+.section sssjb
+.section ssska
+.section ssskb
+.section sssla
+.section ssslb
+.section sssma
+.section sssmb
+.section sssna
+.section sssnb
+.section sssoa
+.section sssob
+.section ssspa
+.section ssspb
+.section sssqa
+.section sssqb
+.section sssra
+.section sssrb
+.section ssssa
+.section ssssb
+.section sssta
+.section ssstb
+.section sssua
+.section sssub
+.section sssva
+.section sssvb
+.section ssswa
+.section ssswb
+.section sssxa
+.section sssxb
+.section sssya
+.section sssyb
+.section sssza
+.section ssszb
+.section sss1a
+.section sss1b
+.section sss2a
+.section sss2b
+.section sss3a
+.section sss3b
+.section sss4a
+.section sss4b
+.section sss5a
+.section sss5b
+.section sss6a
+.section sss6b
+.section sss7a
+.section sss7b
+.section sss8a
+.section sss8b
+.section sss9a
+.section sss9b
+.section sss0a
+.section sss0b
+.section sstaa
+.section sstab
+.section sstba
+.section sstbb
+.section sstca
+.section sstcb
+.section sstda
+.section sstdb
+.section sstea
+.section ssteb
+.section sstfa
+.section sstfb
+.section sstga
+.section sstgb
+.section sstha
+.section ssthb
+.section sstia
+.section sstib
+.section sstja
+.section sstjb
+.section sstka
+.section sstkb
+.section sstla
+.section sstlb
+.section sstma
+.section sstmb
+.section sstna
+.section sstnb
+.section sstoa
+.section sstob
+.section sstpa
+.section sstpb
+.section sstqa
+.section sstqb
+.section sstra
+.section sstrb
+.section sstsa
+.section sstsb
+.section sstta
+.section ssttb
+.section sstua
+.section sstub
+.section sstva
+.section sstvb
+.section sstwa
+.section sstwb
+.section sstxa
+.section sstxb
+.section sstya
+.section sstyb
+.section sstza
+.section sstzb
+.section sst1a
+.section sst1b
+.section sst2a
+.section sst2b
+.section sst3a
+.section sst3b
+.section sst4a
+.section sst4b
+.section sst5a
+.section sst5b
+.section sst6a
+.section sst6b
+.section sst7a
+.section sst7b
+.section sst8a
+.section sst8b
+.section sst9a
+.section sst9b
+.section sst0a
+.section sst0b
+.section ssuaa
+.section ssuab
+.section ssuba
+.section ssubb
+.section ssuca
+.section ssucb
+.section ssuda
+.section ssudb
+.section ssuea
+.section ssueb
+.section ssufa
+.section ssufb
+.section ssuga
+.section ssugb
+.section ssuha
+.section ssuhb
+.section ssuia
+.section ssuib
+.section ssuja
+.section ssujb
+.section ssuka
+.section ssukb
+.section ssula
+.section ssulb
+.section ssuma
+.section ssumb
+.section ssuna
+.section ssunb
+.section ssuoa
+.section ssuob
+.section ssupa
+.section ssupb
+.section ssuqa
+.section ssuqb
+.section ssura
+.section ssurb
+.section ssusa
+.section ssusb
+.section ssuta
+.section ssutb
+.section ssuua
+.section ssuub
+.section ssuva
+.section ssuvb
+.section ssuwa
+.section ssuwb
+.section ssuxa
+.section ssuxb
+.section ssuya
+.section ssuyb
+.section ssuza
+.section ssuzb
+.section ssu1a
+.section ssu1b
+.section ssu2a
+.section ssu2b
+.section ssu3a
+.section ssu3b
+.section ssu4a
+.section ssu4b
+.section ssu5a
+.section ssu5b
+.section ssu6a
+.section ssu6b
+.section ssu7a
+.section ssu7b
+.section ssu8a
+.section ssu8b
+.section ssu9a
+.section ssu9b
+.section ssu0a
+.section ssu0b
+.section ssvaa
+.section ssvab
+.section ssvba
+.section ssvbb
+.section ssvca
+.section ssvcb
+.section ssvda
+.section ssvdb
+.section ssvea
+.section ssveb
+.section ssvfa
+.section ssvfb
+.section ssvga
+.section ssvgb
+.section ssvha
+.section ssvhb
+.section ssvia
+.section ssvib
+.section ssvja
+.section ssvjb
+.section ssvka
+.section ssvkb
+.section ssvla
+.section ssvlb
+.section ssvma
+.section ssvmb
+.section ssvna
+.section ssvnb
+.section ssvoa
+.section ssvob
+.section ssvpa
+.section ssvpb
+.section ssvqa
+.section ssvqb
+.section ssvra
+.section ssvrb
+.section ssvsa
+.section ssvsb
+.section ssvta
+.section ssvtb
+.section ssvua
+.section ssvub
+.section ssvva
+.section ssvvb
+.section ssvwa
+.section ssvwb
+.section ssvxa
+.section ssvxb
+.section ssvya
+.section ssvyb
+.section ssvza
+.section ssvzb
+.section ssv1a
+.section ssv1b
+.section ssv2a
+.section ssv2b
+.section ssv3a
+.section ssv3b
+.section ssv4a
+.section ssv4b
+.section ssv5a
+.section ssv5b
+.section ssv6a
+.section ssv6b
+.section ssv7a
+.section ssv7b
+.section ssv8a
+.section ssv8b
+.section ssv9a
+.section ssv9b
+.section ssv0a
+.section ssv0b
+.section sswaa
+.section sswab
+.section sswba
+.section sswbb
+.section sswca
+.section sswcb
+.section sswda
+.section sswdb
+.section sswea
+.section ssweb
+.section sswfa
+.section sswfb
+.section sswga
+.section sswgb
+.section sswha
+.section sswhb
+.section sswia
+.section sswib
+.section sswja
+.section sswjb
+.section sswka
+.section sswkb
+.section sswla
+.section sswlb
+.section sswma
+.section sswmb
+.section sswna
+.section sswnb
+.section sswoa
+.section sswob
+.section sswpa
+.section sswpb
+.section sswqa
+.section sswqb
+.section sswra
+.section sswrb
+.section sswsa
+.section sswsb
+.section sswta
+.section sswtb
+.section sswua
+.section sswub
+.section sswva
+.section sswvb
+.section sswwa
+.section sswwb
+.section sswxa
+.section sswxb
+.section sswya
+.section sswyb
+.section sswza
+.section sswzb
+.section ssw1a
+.section ssw1b
+.section ssw2a
+.section ssw2b
+.section ssw3a
+.section ssw3b
+.section ssw4a
+.section ssw4b
+.section ssw5a
+.section ssw5b
+.section ssw6a
+.section ssw6b
+.section ssw7a
+.section ssw7b
+.section ssw8a
+.section ssw8b
+.section ssw9a
+.section ssw9b
+.section ssw0a
+.section ssw0b
+.section ssxaa
+.section ssxab
+.section ssxba
+.section ssxbb
+.section ssxca
+.section ssxcb
+.section ssxda
+.section ssxdb
+.section ssxea
+.section ssxeb
+.section ssxfa
+.section ssxfb
+.section ssxga
+.section ssxgb
+.section ssxha
+.section ssxhb
+.section ssxia
+.section ssxib
+.section ssxja
+.section ssxjb
+.section ssxka
+.section ssxkb
+.section ssxla
+.section ssxlb
+.section ssxma
+.section ssxmb
+.section ssxna
+.section ssxnb
+.section ssxoa
+.section ssxob
+.section ssxpa
+.section ssxpb
+.section ssxqa
+.section ssxqb
+.section ssxra
+.section ssxrb
+.section ssxsa
+.section ssxsb
+.section ssxta
+.section ssxtb
+.section ssxua
+.section ssxub
+.section ssxva
+.section ssxvb
+.section ssxwa
+.section ssxwb
+.section ssxxa
+.section ssxxb
+.section ssxya
+.section ssxyb
+.section ssxza
+.section ssxzb
+.section ssx1a
+.section ssx1b
+.section ssx2a
+.section ssx2b
+.section ssx3a
+.section ssx3b
+.section ssx4a
+.section ssx4b
+.section ssx5a
+.section ssx5b
+.section ssx6a
+.section ssx6b
+.section ssx7a
+.section ssx7b
+.section ssx8a
+.section ssx8b
+.section ssx9a
+.section ssx9b
+.section ssx0a
+.section ssx0b
+.section ssyaa
+.section ssyab
+.section ssyba
+.section ssybb
+.section ssyca
+.section ssycb
+.section ssyda
+.section ssydb
+.section ssyea
+.section ssyeb
+.section ssyfa
+.section ssyfb
+.section ssyga
+.section ssygb
+.section ssyha
+.section ssyhb
+.section ssyia
+.section ssyib
+.section ssyja
+.section ssyjb
+.section ssyka
+.section ssykb
+.section ssyla
+.section ssylb
+.section ssyma
+.section ssymb
+.section ssyna
+.section ssynb
+.section ssyoa
+.section ssyob
+.section ssypa
+.section ssypb
+.section ssyqa
+.section ssyqb
+.section ssyra
+.section ssyrb
+.section ssysa
+.section ssysb
+.section ssyta
+.section ssytb
+.section ssyua
+.section ssyub
+.section ssyva
+.section ssyvb
+.section ssywa
+.section ssywb
+.section ssyxa
+.section ssyxb
+.section ssyya
+.section ssyyb
+.section ssyza
+.section ssyzb
+.section ssy1a
+.section ssy1b
+.section ssy2a
+.section ssy2b
+.section ssy3a
+.section ssy3b
+.section ssy4a
+.section ssy4b
+.section ssy5a
+.section ssy5b
+.section ssy6a
+.section ssy6b
+.section ssy7a
+.section ssy7b
+.section ssy8a
+.section ssy8b
+.section ssy9a
+.section ssy9b
+.section ssy0a
+.section ssy0b
+.section sszaa
+.section sszab
+.section sszba
+.section sszbb
+.section sszca
+.section sszcb
+.section sszda
+.section sszdb
+.section sszea
+.section sszeb
+.section sszfa
+.section sszfb
+.section sszga
+.section sszgb
+.section sszha
+.section sszhb
+.section sszia
+.section sszib
+.section sszja
+.section sszjb
+.section sszka
+.section sszkb
+.section sszla
+.section sszlb
+.section sszma
+.section sszmb
+.section sszna
+.section ssznb
+.section sszoa
+.section sszob
+.section sszpa
+.section sszpb
+.section sszqa
+.section sszqb
+.section sszra
+.section sszrb
+.section sszsa
+.section sszsb
+.section sszta
+.section ssztb
+.section sszua
+.section sszub
+.section sszva
+.section sszvb
+.section sszwa
+.section sszwb
+.section sszxa
+.section sszxb
+.section sszya
+.section sszyb
+.section sszza
+.section sszzb
+.section ssz1a
+.section ssz1b
+.section ssz2a
+.section ssz2b
+.section ssz3a
+.section ssz3b
+.section ssz4a
+.section ssz4b
+.section ssz5a
+.section ssz5b
+.section ssz6a
+.section ssz6b
+.section ssz7a
+.section ssz7b
+.section ssz8a
+.section ssz8b
+.section ssz9a
+.section ssz9b
+.section ssz0a
+.section ssz0b
+.section ss1aa
+.section ss1ab
+.section ss1ba
+.section ss1bb
+.section ss1ca
+.section ss1cb
+.section ss1da
+.section ss1db
+.section ss1ea
+.section ss1eb
+.section ss1fa
+.section ss1fb
+.section ss1ga
+.section ss1gb
+.section ss1ha
+.section ss1hb
+.section ss1ia
+.section ss1ib
+.section ss1ja
+.section ss1jb
+.section ss1ka
+.section ss1kb
+.section ss1la
+.section ss1lb
+.section ss1ma
+.section ss1mb
+.section ss1na
+.section ss1nb
+.section ss1oa
+.section ss1ob
+.section ss1pa
+.section ss1pb
+.section ss1qa
+.section ss1qb
+.section ss1ra
+.section ss1rb
+.section ss1sa
+.section ss1sb
+.section ss1ta
+.section ss1tb
+.section ss1ua
+.section ss1ub
+.section ss1va
+.section ss1vb
+.section ss1wa
+.section ss1wb
+.section ss1xa
+.section ss1xb
+.section ss1ya
+.section ss1yb
+.section ss1za
+.section ss1zb
+.section ss11a
+.section ss11b
+.section ss12a
+.section ss12b
+.section ss13a
+.section ss13b
+.section ss14a
+.section ss14b
+.section ss15a
+.section ss15b
+.section ss16a
+.section ss16b
+.section ss17a
+.section ss17b
+.section ss18a
+.section ss18b
+.section ss19a
+.section ss19b
+.section ss10a
+.section ss10b
+.section ss2aa
+.section ss2ab
+.section ss2ba
+.section ss2bb
+.section ss2ca
+.section ss2cb
+.section ss2da
+.section ss2db
+.section ss2ea
+.section ss2eb
+.section ss2fa
+.section ss2fb
+.section ss2ga
+.section ss2gb
+.section ss2ha
+.section ss2hb
+.section ss2ia
+.section ss2ib
+.section ss2ja
+.section ss2jb
+.section ss2ka
+.section ss2kb
+.section ss2la
+.section ss2lb
+.section ss2ma
+.section ss2mb
+.section ss2na
+.section ss2nb
+.section ss2oa
+.section ss2ob
+.section ss2pa
+.section ss2pb
+.section ss2qa
+.section ss2qb
+.section ss2ra
+.section ss2rb
+.section ss2sa
+.section ss2sb
+.section ss2ta
+.section ss2tb
+.section ss2ua
+.section ss2ub
+.section ss2va
+.section ss2vb
+.section ss2wa
+.section ss2wb
+.section ss2xa
+.section ss2xb
+.section ss2ya
+.section ss2yb
+.section ss2za
+.section ss2zb
+.section ss21a
+.section ss21b
+.section ss22a
+.section ss22b
+.section ss23a
+.section ss23b
+.section ss24a
+.section ss24b
+.section ss25a
+.section ss25b
+.section ss26a
+.section ss26b
+.section ss27a
+.section ss27b
+.section ss28a
+.section ss28b
+.section ss29a
+.section ss29b
+.section ss20a
+.section ss20b
+.section ss3aa
+.section ss3ab
+.section ss3ba
+.section ss3bb
+.section ss3ca
+.section ss3cb
+.section ss3da
+.section ss3db
+.section ss3ea
+.section ss3eb
+.section ss3fa
+.section ss3fb
+.section ss3ga
+.section ss3gb
+.section ss3ha
+.section ss3hb
+.section ss3ia
+.section ss3ib
+.section ss3ja
+.section ss3jb
+.section ss3ka
+.section ss3kb
+.section ss3la
+.section ss3lb
+.section ss3ma
+.section ss3mb
+.section ss3na
+.section ss3nb
+.section ss3oa
+.section ss3ob
+.section ss3pa
+.section ss3pb
+.section ss3qa
+.section ss3qb
+.section ss3ra
+.section ss3rb
+.section ss3sa
+.section ss3sb
+.section ss3ta
+.section ss3tb
+.section ss3ua
+.section ss3ub
+.section ss3va
+.section ss3vb
+.section ss3wa
+.section ss3wb
+.section ss3xa
+.section ss3xb
+.section ss3ya
+.section ss3yb
+.section ss3za
+.section ss3zb
+.section ss31a
+.section ss31b
+.section ss32a
+.section ss32b
+.section ss33a
+.section ss33b
+.section ss34a
+.section ss34b
+.section ss35a
+.section ss35b
+.section ss36a
+.section ss36b
+.section ss37a
+.section ss37b
+.section ss38a
+.section ss38b
+.section ss39a
+.section ss39b
+.section ss30a
+.section ss30b
+.section ss4aa
+.section ss4ab
+.section ss4ba
+.section ss4bb
+.section ss4ca
+.section ss4cb
+.section ss4da
+.section ss4db
+.section ss4ea
+.section ss4eb
+.section ss4fa
+.section ss4fb
+.section ss4ga
+.section ss4gb
+.section ss4ha
+.section ss4hb
+.section ss4ia
+.section ss4ib
+.section ss4ja
+.section ss4jb
+.section ss4ka
+.section ss4kb
+.section ss4la
+.section ss4lb
+.section ss4ma
+.section ss4mb
+.section ss4na
+.section ss4nb
+.section ss4oa
+.section ss4ob
+.section ss4pa
+.section ss4pb
+.section ss4qa
+.section ss4qb
+.section ss4ra
+.section ss4rb
+.section ss4sa
+.section ss4sb
+.section ss4ta
+.section ss4tb
+.section ss4ua
+.section ss4ub
+.section ss4va
+.section ss4vb
+.section ss4wa
+.section ss4wb
+.section ss4xa
+.section ss4xb
+.section ss4ya
+.section ss4yb
+.section ss4za
+.section ss4zb
+.section ss41a
+.section ss41b
+.section ss42a
+.section ss42b
+.section ss43a
+.section ss43b
+.section ss44a
+.section ss44b
+.section ss45a
+.section ss45b
+.section ss46a
+.section ss46b
+.section ss47a
+.section ss47b
+.section ss48a
+.section ss48b
+.section ss49a
+.section ss49b
+.section ss40a
+.section ss40b
+.section ss5aa
+.section ss5ab
+.section ss5ba
+.section ss5bb
+.section ss5ca
+.section ss5cb
+.section ss5da
+.section ss5db
+.section ss5ea
+.section ss5eb
+.section ss5fa
+.section ss5fb
+.section ss5ga
+.section ss5gb
+.section ss5ha
+.section ss5hb
+.section ss5ia
+.section ss5ib
+.section ss5ja
+.section ss5jb
+.section ss5ka
+.section ss5kb
+.section ss5la
+.section ss5lb
+.section ss5ma
+.section ss5mb
+.section ss5na
+.section ss5nb
+.section ss5oa
+.section ss5ob
+.section ss5pa
+.section ss5pb
+.section ss5qa
+.section ss5qb
+.section ss5ra
+.section ss5rb
+.section ss5sa
+.section ss5sb
+.section ss5ta
+.section ss5tb
+.section ss5ua
+.section ss5ub
+.section ss5va
+.section ss5vb
+.section ss5wa
+.section ss5wb
+.section ss5xa
+.section ss5xb
+.section ss5ya
+.section ss5yb
+.section ss5za
+.section ss5zb
+.section ss51a
+.section ss51b
+.section ss52a
+.section ss52b
+.section ss53a
+.section ss53b
+.section ss54a
+.section ss54b
+.section ss55a
+.section ss55b
+.section ss56a
+.section ss56b
+.section ss57a
+.section ss57b
+.section ss58a
+.section ss58b
+.section ss59a
+.section ss59b
+.section ss50a
+.section ss50b
+.section ss6aa
+.section ss6ab
+.section ss6ba
+.section ss6bb
+.section ss6ca
+.section ss6cb
+.section ss6da
+.section ss6db
+.section ss6ea
+.section ss6eb
+.section ss6fa
+.section ss6fb
+.section ss6ga
+.section ss6gb
+.section ss6ha
+.section ss6hb
+.section ss6ia
+.section ss6ib
+.section ss6ja
+.section ss6jb
+.section ss6ka
+.section ss6kb
+.section ss6la
+.section ss6lb
+.section ss6ma
+.section ss6mb
+.section ss6na
+.section ss6nb
+.section ss6oa
+.section ss6ob
+.section ss6pa
+.section ss6pb
+.section ss6qa
+.section ss6qb
+.section ss6ra
+.section ss6rb
+.section ss6sa
+.section ss6sb
+.section ss6ta
+.section ss6tb
+.section ss6ua
+.section ss6ub
+.section ss6va
+.section ss6vb
+.section ss6wa
+.section ss6wb
+.section ss6xa
+.section ss6xb
+.section ss6ya
+.section ss6yb
+.section ss6za
+.section ss6zb
+.section ss61a
+.section ss61b
+.section ss62a
+.section ss62b
+.section ss63a
+.section ss63b
+.section ss64a
+.section ss64b
+.section ss65a
+.section ss65b
+.section ss66a
+.section ss66b
+.section ss67a
+.section ss67b
+.section ss68a
+.section ss68b
+.section ss69a
+.section ss69b
+.section ss60a
+.section ss60b
+.section ss7aa
+.section ss7ab
+.section ss7ba
+.section ss7bb
+.section ss7ca
+.section ss7cb
+.section ss7da
+.section ss7db
+.section ss7ea
+.section ss7eb
+.section ss7fa
+.section ss7fb
+.section ss7ga
+.section ss7gb
+.section ss7ha
+.section ss7hb
+.section ss7ia
+.section ss7ib
+.section ss7ja
+.section ss7jb
+.section ss7ka
+.section ss7kb
+.section ss7la
+.section ss7lb
+.section ss7ma
+.section ss7mb
+.section ss7na
+.section ss7nb
+.section ss7oa
+.section ss7ob
+.section ss7pa
+.section ss7pb
+.section ss7qa
+.section ss7qb
+.section ss7ra
+.section ss7rb
+.section ss7sa
+.section ss7sb
+.section ss7ta
+.section ss7tb
+.section ss7ua
+.section ss7ub
+.section ss7va
+.section ss7vb
+.section ss7wa
+.section ss7wb
+.section ss7xa
+.section ss7xb
+.section ss7ya
+.section ss7yb
+.section ss7za
+.section ss7zb
+.section ss71a
+.section ss71b
+.section ss72a
+.section ss72b
+.section ss73a
+.section ss73b
+.section ss74a
+.section ss74b
+.section ss75a
+.section ss75b
+.section ss76a
+.section ss76b
+.section ss77a
+.section ss77b
+.section ss78a
+.section ss78b
+.section ss79a
+.section ss79b
+.section ss70a
+.section ss70b
+.section ss8aa
+.section ss8ab
+.section ss8ba
+.section ss8bb
+.section ss8ca
+.section ss8cb
+.section ss8da
+.section ss8db
+.section ss8ea
+.section ss8eb
+.section ss8fa
+.section ss8fb
+.section ss8ga
+.section ss8gb
+.section ss8ha
+.section ss8hb
+.section ss8ia
+.section ss8ib
+.section ss8ja
+.section ss8jb
+.section ss8ka
+.section ss8kb
+.section ss8la
+.section ss8lb
+.section ss8ma
+.section ss8mb
+.section ss8na
+.section ss8nb
+.section ss8oa
+.section ss8ob
+.section ss8pa
+.section ss8pb
+.section ss8qa
+.section ss8qb
+.section ss8ra
+.section ss8rb
+.section ss8sa
+.section ss8sb
+.section ss8ta
+.section ss8tb
+.section ss8ua
+.section ss8ub
+.section ss8va
+.section ss8vb
+.section ss8wa
+.section ss8wb
+.section ss8xa
+.section ss8xb
+.section ss8ya
+.section ss8yb
+.section ss8za
+.section ss8zb
+.section ss81a
+.section ss81b
+.section ss82a
+.section ss82b
+.section ss83a
+.section ss83b
+.section ss84a
+.section ss84b
+.section ss85a
+.section ss85b
+.section ss86a
+.section ss86b
+.section ss87a
+.section ss87b
+.section ss88a
+.section ss88b
+.section ss89a
+.section ss89b
+.section ss80a
+.section ss80b
+.section ss9aa
+.section ss9ab
+.section ss9ba
+.section ss9bb
+.section ss9ca
+.section ss9cb
+.section ss9da
+.section ss9db
+.section ss9ea
+.section ss9eb
+.section ss9fa
+.section ss9fb
+.section ss9ga
+.section ss9gb
+.section ss9ha
+.section ss9hb
+.section ss9ia
+.section ss9ib
+.section ss9ja
+.section ss9jb
+.section ss9ka
+.section ss9kb
+.section ss9la
+.section ss9lb
+.section ss9ma
+.section ss9mb
+.section ss9na
+.section ss9nb
+.section ss9oa
+.section ss9ob
+.section ss9pa
+.section ss9pb
+.section ss9qa
+.section ss9qb
+.section ss9ra
+.section ss9rb
+.section ss9sa
+.section ss9sb
+.section ss9ta
+.section ss9tb
+.section ss9ua
+.section ss9ub
+.section ss9va
+.section ss9vb
+.section ss9wa
+.section ss9wb
+.section ss9xa
+.section ss9xb
+.section ss9ya
+.section ss9yb
+.section ss9za
+.section ss9zb
+.section ss91a
+.section ss91b
+.section ss92a
+.section ss92b
+.section ss93a
+.section ss93b
+.section ss94a
+.section ss94b
+.section ss95a
+.section ss95b
+.section ss96a
+.section ss96b
+.section ss97a
+.section ss97b
+.section ss98a
+.section ss98b
+.section ss99a
+.section ss99b
+.section ss90a
+.section ss90b
+.section ss0aa
+.section ss0ab
+.section ss0ba
+.section ss0bb
+.section ss0ca
+.section ss0cb
+.section ss0da
+.section ss0db
+.section ss0ea
+.section ss0eb
+.section ss0fa
+.section ss0fb
+.section ss0ga
+.section ss0gb
+.section ss0ha
+.section ss0hb
+.section ss0ia
+.section ss0ib
+.section ss0ja
+.section ss0jb
+.section ss0ka
+.section ss0kb
+.section ss0la
+.section ss0lb
+.section ss0ma
+.section ss0mb
+.section ss0na
+.section ss0nb
+.section ss0oa
+.section ss0ob
+.section ss0pa
+.section ss0pb
+.section ss0qa
+.section ss0qb
+.section ss0ra
+.section ss0rb
+.section ss0sa
+.section ss0sb
+.section ss0ta
+.section ss0tb
+.section ss0ua
+.section ss0ub
+.section ss0va
+.section ss0vb
+.section ss0wa
+.section ss0wb
+.section ss0xa
+.section ss0xb
+.section ss0ya
+.section ss0yb
+.section ss0za
+.section ss0zb
+.section ss01a
+.section ss01b
+.section ss02a
+.section ss02b
+.section ss03a
+.section ss03b
+.section ss04a
+.section ss04b
+.section ss05a
+.section ss05b
+.section ss06a
+.section ss06b
+.section ss07a
+.section ss07b
+.section ss08a
+.section ss08b
+.section ss09a
+.section ss09b
+.section ss00a
+.section ss00b
+.section staaa
+.section staab
+.section staba
+.section stabb
+.section staca
+.section stacb
+.section stada
+.section stadb
+.section staea
+.section staeb
+.section stafa
+.section stafb
+.section staga
+.section stagb
+.section staha
+.section stahb
+.section staia
+.section staib
+.section staja
+.section stajb
+.section staka
+.section stakb
+.section stala
+.section stalb
+.section stama
+.section stamb
+.section stana
+.section stanb
+.section staoa
+.section staob
+.section stapa
+.section stapb
+.section staqa
+.section staqb
+.section stara
+.section starb
+.section stasa
+.section stasb
+.section stata
+.section statb
+.section staua
+.section staub
+.section stava
+.section stavb
+.section stawa
+.section stawb
+.section staxa
+.section staxb
+.section staya
+.section stayb
+.section staza
+.section stazb
+.section sta1a
+.section sta1b
+.section sta2a
+.section sta2b
+.section sta3a
+.section sta3b
+.section sta4a
+.section sta4b
+.section sta5a
+.section sta5b
+.section sta6a
+.section sta6b
+.section sta7a
+.section sta7b
+.section sta8a
+.section sta8b
+.section sta9a
+.section sta9b
+.section sta0a
+.section sta0b
+.section stbaa
+.section stbab
+.section stbba
+.section stbbb
+.section stbca
+.section stbcb
+.section stbda
+.section stbdb
+.section stbea
+.section stbeb
+.section stbfa
+.section stbfb
+.section stbga
+.section stbgb
+.section stbha
+.section stbhb
+.section stbia
+.section stbib
+.section stbja
+.section stbjb
+.section stbka
+.section stbkb
+.section stbla
+.section stblb
+.section stbma
+.section stbmb
+.section stbna
+.section stbnb
+.section stboa
+.section stbob
+.section stbpa
+.section stbpb
+.section stbqa
+.section stbqb
+.section stbra
+.section stbrb
+.section stbsa
+.section stbsb
+.section stbta
+.section stbtb
+.section stbua
+.section stbub
+.section stbva
+.section stbvb
+.section stbwa
+.section stbwb
+.section stbxa
+.section stbxb
+.section stbya
+.section stbyb
+.section stbza
+.section stbzb
+.section stb1a
+.section stb1b
+.section stb2a
+.section stb2b
+.section stb3a
+.section stb3b
+.section stb4a
+.section stb4b
+.section stb5a
+.section stb5b
+.section stb6a
+.section stb6b
+.section stb7a
+.section stb7b
+.section stb8a
+.section stb8b
+.section stb9a
+.section stb9b
+.section stb0a
+.section stb0b
+.section stcaa
+.section stcab
+.section stcba
+.section stcbb
+.section stcca
+.section stccb
+.section stcda
+.section stcdb
+.section stcea
+.section stceb
+.section stcfa
+.section stcfb
+.section stcga
+.section stcgb
+.section stcha
+.section stchb
+.section stcia
+.section stcib
+.section stcja
+.section stcjb
+.section stcka
+.section stckb
+.section stcla
+.section stclb
+.section stcma
+.section stcmb
+.section stcna
+.section stcnb
+.section stcoa
+.section stcob
+.section stcpa
+.section stcpb
+.section stcqa
+.section stcqb
+.section stcra
+.section stcrb
+.section stcsa
+.section stcsb
+.section stcta
+.section stctb
+.section stcua
+.section stcub
+.section stcva
+.section stcvb
+.section stcwa
+.section stcwb
+.section stcxa
+.section stcxb
+.section stcya
+.section stcyb
+.section stcza
+.section stczb
+.section stc1a
+.section stc1b
+.section stc2a
+.section stc2b
+.section stc3a
+.section stc3b
+.section stc4a
+.section stc4b
+.section stc5a
+.section stc5b
+.section stc6a
+.section stc6b
+.section stc7a
+.section stc7b
+.section stc8a
+.section stc8b
+.section stc9a
+.section stc9b
+.section stc0a
+.section stc0b
+.section stdaa
+.section stdab
+.section stdba
+.section stdbb
+.section stdca
+.section stdcb
+.section stdda
+.section stddb
+.section stdea
+.section stdeb
+.section stdfa
+.section stdfb
+.section stdga
+.section stdgb
+.section stdha
+.section stdhb
+.section stdia
+.section stdib
+.section stdja
+.section stdjb
+.section stdka
+.section stdkb
+.section stdla
+.section stdlb
+.section stdma
+.section stdmb
+.section stdna
+.section stdnb
+.section stdoa
+.section stdob
+.section stdpa
+.section stdpb
+.section stdqa
+.section stdqb
+.section stdra
+.section stdrb
+.section stdsa
+.section stdsb
+.section stdta
+.section stdtb
+.section stdua
+.section stdub
+.section stdva
+.section stdvb
+.section stdwa
+.section stdwb
+.section stdxa
+.section stdxb
+.section stdya
+.section stdyb
+.section stdza
+.section stdzb
+.section std1a
+.section std1b
+.section std2a
+.section std2b
+.section std3a
+.section std3b
+.section std4a
+.section std4b
+.section std5a
+.section std5b
+.section std6a
+.section std6b
+.section std7a
+.section std7b
+.section std8a
+.section std8b
+.section std9a
+.section std9b
+.section std0a
+.section std0b
+.section steaa
+.section steab
+.section steba
+.section stebb
+.section steca
+.section stecb
+.section steda
+.section stedb
+.section steea
+.section steeb
+.section stefa
+.section stefb
+.section stega
+.section stegb
+.section steha
+.section stehb
+.section steia
+.section steib
+.section steja
+.section stejb
+.section steka
+.section stekb
+.section stela
+.section stelb
+.section stema
+.section stemb
+.section stena
+.section stenb
+.section steoa
+.section steob
+.section stepa
+.section stepb
+.section steqa
+.section steqb
+.section stera
+.section sterb
+.section stesa
+.section stesb
+.section steta
+.section stetb
+.section steua
+.section steub
+.section steva
+.section stevb
+.section stewa
+.section stewb
+.section stexa
+.section stexb
+.section steya
+.section steyb
+.section steza
+.section stezb
+.section ste1a
+.section ste1b
+.section ste2a
+.section ste2b
+.section ste3a
+.section ste3b
+.section ste4a
+.section ste4b
+.section ste5a
+.section ste5b
+.section ste6a
+.section ste6b
+.section ste7a
+.section ste7b
+.section ste8a
+.section ste8b
+.section ste9a
+.section ste9b
+.section ste0a
+.section ste0b
+.section stfaa
+.section stfab
+.section stfba
+.section stfbb
+.section stfca
+.section stfcb
+.section stfda
+.section stfdb
+.section stfea
+.section stfeb
+.section stffa
+.section stffb
+.section stfga
+.section stfgb
+.section stfha
+.section stfhb
+.section stfia
+.section stfib
+.section stfja
+.section stfjb
+.section stfka
+.section stfkb
+.section stfla
+.section stflb
+.section stfma
+.section stfmb
+.section stfna
+.section stfnb
+.section stfoa
+.section stfob
+.section stfpa
+.section stfpb
+.section stfqa
+.section stfqb
+.section stfra
+.section stfrb
+.section stfsa
+.section stfsb
+.section stfta
+.section stftb
+.section stfua
+.section stfub
+.section stfva
+.section stfvb
+.section stfwa
+.section stfwb
+.section stfxa
+.section stfxb
+.section stfya
+.section stfyb
+.section stfza
+.section stfzb
+.section stf1a
+.section stf1b
+.section stf2a
+.section stf2b
+.section stf3a
+.section stf3b
+.section stf4a
+.section stf4b
+.section stf5a
+.section stf5b
+.section stf6a
+.section stf6b
+.section stf7a
+.section stf7b
+.section stf8a
+.section stf8b
+.section stf9a
+.section stf9b
+.section stf0a
+.section stf0b
+.section stgaa
+.section stgab
+.section stgba
+.section stgbb
+.section stgca
+.section stgcb
+.section stgda
+.section stgdb
+.section stgea
+.section stgeb
+.section stgfa
+.section stgfb
+.section stgga
+.section stggb
+.section stgha
+.section stghb
+.section stgia
+.section stgib
+.section stgja
+.section stgjb
+.section stgka
+.section stgkb
+.section stgla
+.section stglb
+.section stgma
+.section stgmb
+.section stgna
+.section stgnb
+.section stgoa
+.section stgob
+.section stgpa
+.section stgpb
+.section stgqa
+.section stgqb
+.section stgra
+.section stgrb
+.section stgsa
+.section stgsb
+.section stgta
+.section stgtb
+.section stgua
+.section stgub
+.section stgva
+.section stgvb
+.section stgwa
+.section stgwb
+.section stgxa
+.section stgxb
+.section stgya
+.section stgyb
+.section stgza
+.section stgzb
+.section stg1a
+.section stg1b
+.section stg2a
+.section stg2b
+.section stg3a
+.section stg3b
+.section stg4a
+.section stg4b
+.section stg5a
+.section stg5b
+.section stg6a
+.section stg6b
+.section stg7a
+.section stg7b
+.section stg8a
+.section stg8b
+.section stg9a
+.section stg9b
+.section stg0a
+.section stg0b
+.section sthaa
+.section sthab
+.section sthba
+.section sthbb
+.section sthca
+.section sthcb
+.section sthda
+.section sthdb
+.section sthea
+.section stheb
+.section sthfa
+.section sthfb
+.section sthga
+.section sthgb
+.section sthha
+.section sthhb
+.section sthia
+.section sthib
+.section sthja
+.section sthjb
+.section sthka
+.section sthkb
+.section sthla
+.section sthlb
+.section sthma
+.section sthmb
+.section sthna
+.section sthnb
+.section sthoa
+.section sthob
+.section sthpa
+.section sthpb
+.section sthqa
+.section sthqb
+.section sthra
+.section sthrb
+.section sthsa
+.section sthsb
+.section sthta
+.section sthtb
+.section sthua
+.section sthub
+.section sthva
+.section sthvb
+.section sthwa
+.section sthwb
+.section sthxa
+.section sthxb
+.section sthya
+.section sthyb
+.section sthza
+.section sthzb
+.section sth1a
+.section sth1b
+.section sth2a
+.section sth2b
+.section sth3a
+.section sth3b
+.section sth4a
+.section sth4b
+.section sth5a
+.section sth5b
+.section sth6a
+.section sth6b
+.section sth7a
+.section sth7b
+.section sth8a
+.section sth8b
+.section sth9a
+.section sth9b
+.section sth0a
+.section sth0b
+.section stiaa
+.section stiab
+.section stiba
+.section stibb
+.section stica
+.section sticb
+.section stida
+.section stidb
+.section stiea
+.section stieb
+.section stifa
+.section stifb
+.section stiga
+.section stigb
+.section stiha
+.section stihb
+.section stiia
+.section stiib
+.section stija
+.section stijb
+.section stika
+.section stikb
+.section stila
+.section stilb
+.section stima
+.section stimb
+.section stina
+.section stinb
+.section stioa
+.section stiob
+.section stipa
+.section stipb
+.section stiqa
+.section stiqb
+.section stira
+.section stirb
+.section stisa
+.section stisb
+.section stita
+.section stitb
+.section stiua
+.section stiub
+.section stiva
+.section stivb
+.section stiwa
+.section stiwb
+.section stixa
+.section stixb
+.section stiya
+.section stiyb
+.section stiza
+.section stizb
+.section sti1a
+.section sti1b
+.section sti2a
+.section sti2b
+.section sti3a
+.section sti3b
+.section sti4a
+.section sti4b
+.section sti5a
+.section sti5b
+.section sti6a
+.section sti6b
+.section sti7a
+.section sti7b
+.section sti8a
+.section sti8b
+.section sti9a
+.section sti9b
+.section sti0a
+.section sti0b
+.section stjaa
+.section stjab
+.section stjba
+.section stjbb
+.section stjca
+.section stjcb
+.section stjda
+.section stjdb
+.section stjea
+.section stjeb
+.section stjfa
+.section stjfb
+.section stjga
+.section stjgb
+.section stjha
+.section stjhb
+.section stjia
+.section stjib
+.section stjja
+.section stjjb
+.section stjka
+.section stjkb
+.section stjla
+.section stjlb
+.section stjma
+.section stjmb
+.section stjna
+.section stjnb
+.section stjoa
+.section stjob
+.section stjpa
+.section stjpb
+.section stjqa
+.section stjqb
+.section stjra
+.section stjrb
+.section stjsa
+.section stjsb
+.section stjta
+.section stjtb
+.section stjua
+.section stjub
+.section stjva
+.section stjvb
+.section stjwa
+.section stjwb
+.section stjxa
+.section stjxb
+.section stjya
+.section stjyb
+.section stjza
+.section stjzb
+.section stj1a
+.section stj1b
+.section stj2a
+.section stj2b
+.section stj3a
+.section stj3b
+.section stj4a
+.section stj4b
+.section stj5a
+.section stj5b
+.section stj6a
+.section stj6b
+.section stj7a
+.section stj7b
+.section stj8a
+.section stj8b
+.section stj9a
+.section stj9b
+.section stj0a
+.section stj0b
+.section stkaa
+.section stkab
+.section stkba
+.section stkbb
+.section stkca
+.section stkcb
+.section stkda
+.section stkdb
+.section stkea
+.section stkeb
+.section stkfa
+.section stkfb
+.section stkga
+.section stkgb
+.section stkha
+.section stkhb
+.section stkia
+.section stkib
+.section stkja
+.section stkjb
+.section stkka
+.section stkkb
+.section stkla
+.section stklb
+.section stkma
+.section stkmb
+.section stkna
+.section stknb
+.section stkoa
+.section stkob
+.section stkpa
+.section stkpb
+.section stkqa
+.section stkqb
+.section stkra
+.section stkrb
+.section stksa
+.section stksb
+.section stkta
+.section stktb
+.section stkua
+.section stkub
+.section stkva
+.section stkvb
+.section stkwa
+.section stkwb
+.section stkxa
+.section stkxb
+.section stkya
+.section stkyb
+.section stkza
+.section stkzb
+.section stk1a
+.section stk1b
+.section stk2a
+.section stk2b
+.section stk3a
+.section stk3b
+.section stk4a
+.section stk4b
+.section stk5a
+.section stk5b
+.section stk6a
+.section stk6b
+.section stk7a
+.section stk7b
+.section stk8a
+.section stk8b
+.section stk9a
+.section stk9b
+.section stk0a
+.section stk0b
+.section stlaa
+.section stlab
+.section stlba
+.section stlbb
+.section stlca
+.section stlcb
+.section stlda
+.section stldb
+.section stlea
+.section stleb
+.section stlfa
+.section stlfb
+.section stlga
+.section stlgb
+.section stlha
+.section stlhb
+.section stlia
+.section stlib
+.section stlja
+.section stljb
+.section stlka
+.section stlkb
+.section stlla
+.section stllb
+.section stlma
+.section stlmb
+.section stlna
+.section stlnb
+.section stloa
+.section stlob
+.section stlpa
+.section stlpb
+.section stlqa
+.section stlqb
+.section stlra
+.section stlrb
+.section stlsa
+.section stlsb
+.section stlta
+.section stltb
+.section stlua
+.section stlub
+.section stlva
+.section stlvb
+.section stlwa
+.section stlwb
+.section stlxa
+.section stlxb
+.section stlya
+.section stlyb
+.section stlza
+.section stlzb
+.section stl1a
+.section stl1b
+.section stl2a
+.section stl2b
+.section stl3a
+.section stl3b
+.section stl4a
+.section stl4b
+.section stl5a
+.section stl5b
+.section stl6a
+.section stl6b
+.section stl7a
+.section stl7b
+.section stl8a
+.section stl8b
+.section stl9a
+.section stl9b
+.section stl0a
+.section stl0b
+.section stmaa
+.section stmab
+.section stmba
+.section stmbb
+.section stmca
+.section stmcb
+.section stmda
+.section stmdb
+.section stmea
+.section stmeb
+.section stmfa
+.section stmfb
+.section stmga
+.section stmgb
+.section stmha
+.section stmhb
+.section stmia
+.section stmib
+.section stmja
+.section stmjb
+.section stmka
+.section stmkb
+.section stmla
+.section stmlb
+.section stmma
+.section stmmb
+.section stmna
+.section stmnb
+.section stmoa
+.section stmob
+.section stmpa
+.section stmpb
+.section stmqa
+.section stmqb
+.section stmra
+.section stmrb
+.section stmsa
+.section stmsb
+.section stmta
+.section stmtb
+.section stmua
+.section stmub
+.section stmva
+.section stmvb
+.section stmwa
+.section stmwb
+.section stmxa
+.section stmxb
+.section stmya
+.section stmyb
+.section stmza
+.section stmzb
+.section stm1a
+.section stm1b
+.section stm2a
+.section stm2b
+.section stm3a
+.section stm3b
+.section stm4a
+.section stm4b
+.section stm5a
+.section stm5b
+.section stm6a
+.section stm6b
+.section stm7a
+.section stm7b
+.section stm8a
+.section stm8b
+.section stm9a
+.section stm9b
+.section stm0a
+.section stm0b
+.section stnaa
+.section stnab
+.section stnba
+.section stnbb
+.section stnca
+.section stncb
+.section stnda
+.section stndb
+.section stnea
+.section stneb
+.section stnfa
+.section stnfb
+.section stnga
+.section stngb
+.section stnha
+.section stnhb
+.section stnia
+.section stnib
+.section stnja
+.section stnjb
+.section stnka
+.section stnkb
+.section stnla
+.section stnlb
+.section stnma
+.section stnmb
+.section stnna
+.section stnnb
+.section stnoa
+.section stnob
+.section stnpa
+.section stnpb
+.section stnqa
+.section stnqb
+.section stnra
+.section stnrb
+.section stnsa
+.section stnsb
+.section stnta
+.section stntb
+.section stnua
+.section stnub
+.section stnva
+.section stnvb
+.section stnwa
+.section stnwb
+.section stnxa
+.section stnxb
+.section stnya
+.section stnyb
+.section stnza
+.section stnzb
+.section stn1a
+.section stn1b
+.section stn2a
+.section stn2b
+.section stn3a
+.section stn3b
+.section stn4a
+.section stn4b
+.section stn5a
+.section stn5b
+.section stn6a
+.section stn6b
+.section stn7a
+.section stn7b
+.section stn8a
+.section stn8b
+.section stn9a
+.section stn9b
+.section stn0a
+.section stn0b
+.section stoaa
+.section stoab
+.section stoba
+.section stobb
+.section stoca
+.section stocb
+.section stoda
+.section stodb
+.section stoea
+.section stoeb
+.section stofa
+.section stofb
+.section stoga
+.section stogb
+.section stoha
+.section stohb
+.section stoia
+.section stoib
+.section stoja
+.section stojb
+.section stoka
+.section stokb
+.section stola
+.section stolb
+.section stoma
+.section stomb
+.section stona
+.section stonb
+.section stooa
+.section stoob
+.section stopa
+.section stopb
+.section stoqa
+.section stoqb
+.section stora
+.section storb
+.section stosa
+.section stosb
+.section stota
+.section stotb
+.section stoua
+.section stoub
+.section stova
+.section stovb
+.section stowa
+.section stowb
+.section stoxa
+.section stoxb
+.section stoya
+.section stoyb
+.section stoza
+.section stozb
+.section sto1a
+.section sto1b
+.section sto2a
+.section sto2b
+.section sto3a
+.section sto3b
+.section sto4a
+.section sto4b
+.section sto5a
+.section sto5b
+.section sto6a
+.section sto6b
+.section sto7a
+.section sto7b
+.section sto8a
+.section sto8b
+.section sto9a
+.section sto9b
+.section sto0a
+.section sto0b
+.section stpaa
+.section stpab
+.section stpba
+.section stpbb
+.section stpca
+.section stpcb
+.section stpda
+.section stpdb
+.section stpea
+.section stpeb
+.section stpfa
+.section stpfb
+.section stpga
+.section stpgb
+.section stpha
+.section stphb
+.section stpia
+.section stpib
+.section stpja
+.section stpjb
+.section stpka
+.section stpkb
+.section stpla
+.section stplb
+.section stpma
+.section stpmb
+.section stpna
+.section stpnb
+.section stpoa
+.section stpob
+.section stppa
+.section stppb
+.section stpqa
+.section stpqb
+.section stpra
+.section stprb
+.section stpsa
+.section stpsb
+.section stpta
+.section stptb
+.section stpua
+.section stpub
+.section stpva
+.section stpvb
+.section stpwa
+.section stpwb
+.section stpxa
+.section stpxb
+.section stpya
+.section stpyb
+.section stpza
+.section stpzb
+.section stp1a
+.section stp1b
+.section stp2a
+.section stp2b
+.section stp3a
+.section stp3b
+.section stp4a
+.section stp4b
+.section stp5a
+.section stp5b
+.section stp6a
+.section stp6b
+.section stp7a
+.section stp7b
+.section stp8a
+.section stp8b
+.section stp9a
+.section stp9b
+.section stp0a
+.section stp0b
+.section stqaa
+.section stqab
+.section stqba
+.section stqbb
+.section stqca
+.section stqcb
+.section stqda
+.section stqdb
+.section stqea
+.section stqeb
+.section stqfa
+.section stqfb
+.section stqga
+.section stqgb
+.section stqha
+.section stqhb
+.section stqia
+.section stqib
+.section stqja
+.section stqjb
+.section stqka
+.section stqkb
+.section stqla
+.section stqlb
+.section stqma
+.section stqmb
+.section stqna
+.section stqnb
+.section stqoa
+.section stqob
+.section stqpa
+.section stqpb
+.section stqqa
+.section stqqb
+.section stqra
+.section stqrb
+.section stqsa
+.section stqsb
+.section stqta
+.section stqtb
+.section stqua
+.section stqub
+.section stqva
+.section stqvb
+.section stqwa
+.section stqwb
+.section stqxa
+.section stqxb
+.section stqya
+.section stqyb
+.section stqza
+.section stqzb
+.section stq1a
+.section stq1b
+.section stq2a
+.section stq2b
+.section stq3a
+.section stq3b
+.section stq4a
+.section stq4b
+.section stq5a
+.section stq5b
+.section stq6a
+.section stq6b
+.section stq7a
+.section stq7b
+.section stq8a
+.section stq8b
+.section stq9a
+.section stq9b
+.section stq0a
+.section stq0b
+.section straa
+.section strab
+.section strba
+.section strbb
+.section strca
+.section strcb
+.section strda
+.section strdb
+.section strea
+.section streb
+.section strfa
+.section strfb
+.section strga
+.section strgb
+.section strha
+.section strhb
+.section stria
+.section strib
+.section strja
+.section strjb
+.section strka
+.section strkb
+.section strla
+.section strlb
+.section strma
+.section strmb
+.section strna
+.section strnb
+.section stroa
+.section strob
+.section strpa
+.section strpb
+.section strqa
+.section strqb
+.section strra
+.section strrb
+.section strsa
+.section strsb
+.section strta
+.section strtb
+.section strua
+.section strub
+.section strva
+.section strvb
+.section strwa
+.section strwb
+.section strxa
+.section strxb
+.section strya
+.section stryb
+.section strza
+.section strzb
+.section str1a
+.section str1b
+.section str2a
+.section str2b
+.section str3a
+.section str3b
+.section str4a
+.section str4b
+.section str5a
+.section str5b
+.section str6a
+.section str6b
+.section str7a
+.section str7b
+.section str8a
+.section str8b
+.section str9a
+.section str9b
+.section str0a
+.section str0b
+.section stsaa
+.section stsab
+.section stsba
+.section stsbb
+.section stsca
+.section stscb
+.section stsda
+.section stsdb
+.section stsea
+.section stseb
+.section stsfa
+.section stsfb
+.section stsga
+.section stsgb
+.section stsha
+.section stshb
+.section stsia
+.section stsib
+.section stsja
+.section stsjb
+.section stska
+.section stskb
+.section stsla
+.section stslb
+.section stsma
+.section stsmb
+.section stsna
+.section stsnb
+.section stsoa
+.section stsob
+.section stspa
+.section stspb
+.section stsqa
+.section stsqb
+.section stsra
+.section stsrb
+.section stssa
+.section stssb
+.section ststa
+.section ststb
+.section stsua
+.section stsub
+.section stsva
+.section stsvb
+.section stswa
+.section stswb
+.section stsxa
+.section stsxb
+.section stsya
+.section stsyb
+.section stsza
+.section stszb
+.section sts1a
+.section sts1b
+.section sts2a
+.section sts2b
+.section sts3a
+.section sts3b
+.section sts4a
+.section sts4b
+.section sts5a
+.section sts5b
+.section sts6a
+.section sts6b
+.section sts7a
+.section sts7b
+.section sts8a
+.section sts8b
+.section sts9a
+.section sts9b
+.section sts0a
+.section sts0b
+.section sttaa
+.section sttab
+.section sttba
+.section sttbb
+.section sttca
+.section sttcb
+.section sttda
+.section sttdb
+.section sttea
+.section stteb
+.section sttfa
+.section sttfb
+.section sttga
+.section sttgb
+.section sttha
+.section stthb
+.section sttia
+.section sttib
+.section sttja
+.section sttjb
+.section sttka
+.section sttkb
+.section sttla
+.section sttlb
+.section sttma
+.section sttmb
+.section sttna
+.section sttnb
+.section sttoa
+.section sttob
+.section sttpa
+.section sttpb
+.section sttqa
+.section sttqb
+.section sttra
+.section sttrb
+.section sttsa
+.section sttsb
+.section sttta
+.section stttb
+.section sttua
+.section sttub
+.section sttva
+.section sttvb
+.section sttwa
+.section sttwb
+.section sttxa
+.section sttxb
+.section sttya
+.section sttyb
+.section sttza
+.section sttzb
+.section stt1a
+.section stt1b
+.section stt2a
+.section stt2b
+.section stt3a
+.section stt3b
+.section stt4a
+.section stt4b
+.section stt5a
+.section stt5b
+.section stt6a
+.section stt6b
+.section stt7a
+.section stt7b
+.section stt8a
+.section stt8b
+.section stt9a
+.section stt9b
+.section stt0a
+.section stt0b
+.section stuaa
+.section stuab
+.section stuba
+.section stubb
+.section stuca
+.section stucb
+.section studa
+.section studb
+.section stuea
+.section stueb
+.section stufa
+.section stufb
+.section stuga
+.section stugb
+.section stuha
+.section stuhb
+.section stuia
+.section stuib
+.section stuja
+.section stujb
+.section stuka
+.section stukb
+.section stula
+.section stulb
+.section stuma
+.section stumb
+.section stuna
+.section stunb
+.section stuoa
+.section stuob
+.section stupa
+.section stupb
+.section stuqa
+.section stuqb
+.section stura
+.section sturb
+.section stusa
+.section stusb
+.section stuta
+.section stutb
+.section stuua
+.section stuub
+.section stuva
+.section stuvb
+.section stuwa
+.section stuwb
+.section stuxa
+.section stuxb
+.section stuya
+.section stuyb
+.section stuza
+.section stuzb
+.section stu1a
+.section stu1b
+.section stu2a
+.section stu2b
+.section stu3a
+.section stu3b
+.section stu4a
+.section stu4b
+.section stu5a
+.section stu5b
+.section stu6a
+.section stu6b
+.section stu7a
+.section stu7b
+.section stu8a
+.section stu8b
+.section stu9a
+.section stu9b
+.section stu0a
+.section stu0b
+.section stvaa
+.section stvab
+.section stvba
+.section stvbb
+.section stvca
+.section stvcb
+.section stvda
+.section stvdb
+.section stvea
+.section stveb
+.section stvfa
+.section stvfb
+.section stvga
+.section stvgb
+.section stvha
+.section stvhb
+.section stvia
+.section stvib
+.section stvja
+.section stvjb
+.section stvka
+.section stvkb
+.section stvla
+.section stvlb
+.section stvma
+.section stvmb
+.section stvna
+.section stvnb
+.section stvoa
+.section stvob
+.section stvpa
+.section stvpb
+.section stvqa
+.section stvqb
+.section stvra
+.section stvrb
+.section stvsa
+.section stvsb
+.section stvta
+.section stvtb
+.section stvua
+.section stvub
+.section stvva
+.section stvvb
+.section stvwa
+.section stvwb
+.section stvxa
+.section stvxb
+.section stvya
+.section stvyb
+.section stvza
+.section stvzb
+.section stv1a
+.section stv1b
+.section stv2a
+.section stv2b
+.section stv3a
+.section stv3b
+.section stv4a
+.section stv4b
+.section stv5a
+.section stv5b
+.section stv6a
+.section stv6b
+.section stv7a
+.section stv7b
+.section stv8a
+.section stv8b
+.section stv9a
+.section stv9b
+.section stv0a
+.section stv0b
+.section stwaa
+.section stwab
+.section stwba
+.section stwbb
+.section stwca
+.section stwcb
+.section stwda
+.section stwdb
+.section stwea
+.section stweb
+.section stwfa
+.section stwfb
+.section stwga
+.section stwgb
+.section stwha
+.section stwhb
+.section stwia
+.section stwib
+.section stwja
+.section stwjb
+.section stwka
+.section stwkb
+.section stwla
+.section stwlb
+.section stwma
+.section stwmb
+.section stwna
+.section stwnb
+.section stwoa
+.section stwob
+.section stwpa
+.section stwpb
+.section stwqa
+.section stwqb
+.section stwra
+.section stwrb
+.section stwsa
+.section stwsb
+.section stwta
+.section stwtb
+.section stwua
+.section stwub
+.section stwva
+.section stwvb
+.section stwwa
+.section stwwb
+.section stwxa
+.section stwxb
+.section stwya
+.section stwyb
+.section stwza
+.section stwzb
+.section stw1a
+.section stw1b
+.section stw2a
+.section stw2b
+.section stw3a
+.section stw3b
+.section stw4a
+.section stw4b
+.section stw5a
+.section stw5b
+.section stw6a
+.section stw6b
+.section stw7a
+.section stw7b
+.section stw8a
+.section stw8b
+.section stw9a
+.section stw9b
+.section stw0a
+.section stw0b
+.section stxaa
+.section stxab
+.section stxba
+.section stxbb
+.section stxca
+.section stxcb
+.section stxda
+.section stxdb
+.section stxea
+.section stxeb
+.section stxfa
+.section stxfb
+.section stxga
+.section stxgb
+.section stxha
+.section stxhb
+.section stxia
+.section stxib
+.section stxja
+.section stxjb
+.section stxka
+.section stxkb
+.section stxla
+.section stxlb
+.section stxma
+.section stxmb
+.section stxna
+.section stxnb
+.section stxoa
+.section stxob
+.section stxpa
+.section stxpb
+.section stxqa
+.section stxqb
+.section stxra
+.section stxrb
+.section stxsa
+.section stxsb
+.section stxta
+.section stxtb
+.section stxua
+.section stxub
+.section stxva
+.section stxvb
+.section stxwa
+.section stxwb
+.section stxxa
+.section stxxb
+.section stxya
+.section stxyb
+.section stxza
+.section stxzb
+.section stx1a
+.section stx1b
+.section stx2a
+.section stx2b
+.section stx3a
+.section stx3b
+.section stx4a
+.section stx4b
+.section stx5a
+.section stx5b
+.section stx6a
+.section stx6b
+.section stx7a
+.section stx7b
+.section stx8a
+.section stx8b
+.section stx9a
+.section stx9b
+.section stx0a
+.section stx0b
+.section styaa
+.section styab
+.section styba
+.section stybb
+.section styca
+.section stycb
+.section styda
+.section stydb
+.section styea
+.section styeb
+.section styfa
+.section styfb
+.section styga
+.section stygb
+.section styha
+.section styhb
+.section styia
+.section styib
+.section styja
+.section styjb
+.section styka
+.section stykb
+.section styla
+.section stylb
+.section styma
+.section stymb
+.section styna
+.section stynb
+.section styoa
+.section styob
+.section stypa
+.section stypb
+.section styqa
+.section styqb
+.section styra
+.section styrb
+.section stysa
+.section stysb
+.section styta
+.section stytb
+.section styua
+.section styub
+.section styva
+.section styvb
+.section stywa
+.section stywb
+.section styxa
+.section styxb
+.section styya
+.section styyb
+.section styza
+.section styzb
+.section sty1a
+.section sty1b
+.section sty2a
+.section sty2b
+.section sty3a
+.section sty3b
+.section sty4a
+.section sty4b
+.section sty5a
+.section sty5b
+.section sty6a
+.section sty6b
+.section sty7a
+.section sty7b
+.section sty8a
+.section sty8b
+.section sty9a
+.section sty9b
+.section sty0a
+.section sty0b
+.section stzaa
+.section stzab
+.section stzba
+.section stzbb
+.section stzca
+.section stzcb
+.section stzda
+.section stzdb
+.section stzea
+.section stzeb
+.section stzfa
+.section stzfb
+.section stzga
+.section stzgb
+.section stzha
+.section stzhb
+.section stzia
+.section stzib
+.section stzja
+.section stzjb
+.section stzka
+.section stzkb
+.section stzla
+.section stzlb
+.section stzma
+.section stzmb
+.section stzna
+.section stznb
+.section stzoa
+.section stzob
+.section stzpa
+.section stzpb
+.section stzqa
+.section stzqb
+.section stzra
+.section stzrb
+.section stzsa
+.section stzsb
+.section stzta
+.section stztb
+.section stzua
+.section stzub
+.section stzva
+.section stzvb
+.section stzwa
+.section stzwb
+.section stzxa
+.section stzxb
+.section stzya
+.section stzyb
+.section stzza
+.section stzzb
+.section stz1a
+.section stz1b
+.section stz2a
+.section stz2b
+.section stz3a
+.section stz3b
+.section stz4a
+.section stz4b
+.section stz5a
+.section stz5b
+.section stz6a
+.section stz6b
+.section stz7a
+.section stz7b
+.section stz8a
+.section stz8b
+.section stz9a
+.section stz9b
+.section stz0a
+.section stz0b
+.section st1aa
+.section st1ab
+.section st1ba
+.section st1bb
+.section st1ca
+.section st1cb
+.section st1da
+.section st1db
+.section st1ea
+.section st1eb
+.section st1fa
+.section st1fb
+.section st1ga
+.section st1gb
+.section st1ha
+.section st1hb
+.section st1ia
+.section st1ib
+.section st1ja
+.section st1jb
+.section st1ka
+.section st1kb
+.section st1la
+.section st1lb
+.section st1ma
+.section st1mb
+.section st1na
+.section st1nb
+.section st1oa
+.section st1ob
+.section st1pa
+.section st1pb
+.section st1qa
+.section st1qb
+.section st1ra
+.section st1rb
+.section st1sa
+.section st1sb
+.section st1ta
+.section st1tb
+.section st1ua
+.section st1ub
+.section st1va
+.section st1vb
+.section st1wa
+.section st1wb
+.section st1xa
+.section st1xb
+.section st1ya
+.section st1yb
+.section st1za
+.section st1zb
+.section st11a
+.section st11b
+.section st12a
+.section st12b
+.section st13a
+.section st13b
+.section st14a
+.section st14b
+.section st15a
+.section st15b
+.section st16a
+.section st16b
+.section st17a
+.section st17b
+.section st18a
+.section st18b
+.section st19a
+.section st19b
+.section st10a
+.section st10b
+.section st2aa
+.section st2ab
+.section st2ba
+.section st2bb
+.section st2ca
+.section st2cb
+.section st2da
+.section st2db
+.section st2ea
+.section st2eb
+.section st2fa
+.section st2fb
+.section st2ga
+.section st2gb
+.section st2ha
+.section st2hb
+.section st2ia
+.section st2ib
+.section st2ja
+.section st2jb
+.section st2ka
+.section st2kb
+.section st2la
+.section st2lb
+.section st2ma
+.section st2mb
+.section st2na
+.section st2nb
+.section st2oa
+.section st2ob
+.section st2pa
+.section st2pb
+.section st2qa
+.section st2qb
+.section st2ra
+.section st2rb
+.section st2sa
+.section st2sb
+.section st2ta
+.section st2tb
+.section st2ua
+.section st2ub
+.section st2va
+.section st2vb
+.section st2wa
+.section st2wb
+.section st2xa
+.section st2xb
+.section st2ya
+.section st2yb
+.section st2za
+.section st2zb
+.section st21a
+.section st21b
+.section st22a
+.section st22b
+.section st23a
+.section st23b
+.section st24a
+.section st24b
+.section st25a
+.section st25b
+.section st26a
+.section st26b
+.section st27a
+.section st27b
+.section st28a
+.section st28b
+.section st29a
+.section st29b
+.section st20a
+.section st20b
+.section st3aa
+.section st3ab
+.section st3ba
+.section st3bb
+.section st3ca
+.section st3cb
+.section st3da
+.section st3db
+.section st3ea
+.section st3eb
+.section st3fa
+.section st3fb
+.section st3ga
+.section st3gb
+.section st3ha
+.section st3hb
+.section st3ia
+.section st3ib
+.section st3ja
+.section st3jb
+.section st3ka
+.section st3kb
+.section st3la
+.section st3lb
+.section st3ma
+.section st3mb
+.section st3na
+.section st3nb
+.section st3oa
+.section st3ob
+.section st3pa
+.section st3pb
+.section st3qa
+.section st3qb
+.section st3ra
+.section st3rb
+.section st3sa
+.section st3sb
+.section st3ta
+.section st3tb
+.section st3ua
+.section st3ub
+.section st3va
+.section st3vb
+.section st3wa
+.section st3wb
+.section st3xa
+.section st3xb
+.section st3ya
+.section st3yb
+.section st3za
+.section st3zb
+.section st31a
+.section st31b
+.section st32a
+.section st32b
+.section st33a
+.section st33b
+.section st34a
+.section st34b
+.section st35a
+.section st35b
+.section st36a
+.section st36b
+.section st37a
+.section st37b
+.section st38a
+.section st38b
+.section st39a
+.section st39b
+.section st30a
+.section st30b
+.section st4aa
+.section st4ab
+.section st4ba
+.section st4bb
+.section st4ca
+.section st4cb
+.section st4da
+.section st4db
+.section st4ea
+.section st4eb
+.section st4fa
+.section st4fb
+.section st4ga
+.section st4gb
+.section st4ha
+.section st4hb
+.section st4ia
+.section st4ib
+.section st4ja
+.section st4jb
+.section st4ka
+.section st4kb
+.section st4la
+.section st4lb
+.section st4ma
+.section st4mb
+.section st4na
+.section st4nb
+.section st4oa
+.section st4ob
+.section st4pa
+.section st4pb
+.section st4qa
+.section st4qb
+.section st4ra
+.section st4rb
+.section st4sa
+.section st4sb
+.section st4ta
+.section st4tb
+.section st4ua
+.section st4ub
+.section st4va
+.section st4vb
+.section st4wa
+.section st4wb
+.section st4xa
+.section st4xb
+.section st4ya
+.section st4yb
+.section st4za
+.section st4zb
+.section st41a
+.section st41b
+.section st42a
+.section st42b
+.section st43a
+.section st43b
+.section st44a
+.section st44b
+.section st45a
+.section st45b
+.section st46a
+.section st46b
+.section st47a
+.section st47b
+.section st48a
+.section st48b
+.section st49a
+.section st49b
+.section st40a
+.section st40b
+.section st5aa
+.section st5ab
+.section st5ba
+.section st5bb
+.section st5ca
+.section st5cb
+.section st5da
+.section st5db
+.section st5ea
+.section st5eb
+.section st5fa
+.section st5fb
+.section st5ga
+.section st5gb
+.section st5ha
+.section st5hb
+.section st5ia
+.section st5ib
+.section st5ja
+.section st5jb
+.section st5ka
+.section st5kb
+.section st5la
+.section st5lb
+.section st5ma
+.section st5mb
+.section st5na
+.section st5nb
+.section st5oa
+.section st5ob
+.section st5pa
+.section st5pb
+.section st5qa
+.section st5qb
+.section st5ra
+.section st5rb
+.section st5sa
+.section st5sb
+.section st5ta
+.section st5tb
+.section st5ua
+.section st5ub
+.section st5va
+.section st5vb
+.section st5wa
+.section st5wb
+.section st5xa
+.section st5xb
+.section st5ya
+.section st5yb
+.section st5za
+.section st5zb
+.section st51a
+.section st51b
+.section st52a
+.section st52b
+.section st53a
+.section st53b
+.section st54a
+.section st54b
+.section st55a
+.section st55b
+.section st56a
+.section st56b
+.section st57a
+.section st57b
+.section st58a
+.section st58b
+.section st59a
+.section st59b
+.section st50a
+.section st50b
+.section st6aa
+.section st6ab
+.section st6ba
+.section st6bb
+.section st6ca
+.section st6cb
+.section st6da
+.section st6db
+.section st6ea
+.section st6eb
+.section st6fa
+.section st6fb
+.section st6ga
+.section st6gb
+.section st6ha
+.section st6hb
+.section st6ia
+.section st6ib
+.section st6ja
+.section st6jb
+.section st6ka
+.section st6kb
+.section st6la
+.section st6lb
+.section st6ma
+.section st6mb
+.section st6na
+.section st6nb
+.section st6oa
+.section st6ob
+.section st6pa
+.section st6pb
+.section st6qa
+.section st6qb
+.section st6ra
+.section st6rb
+.section st6sa
+.section st6sb
+.section st6ta
+.section st6tb
+.section st6ua
+.section st6ub
+.section st6va
+.section st6vb
+.section st6wa
+.section st6wb
+.section st6xa
+.section st6xb
+.section st6ya
+.section st6yb
+.section st6za
+.section st6zb
+.section st61a
+.section st61b
+.section st62a
+.section st62b
+.section st63a
+.section st63b
+.section st64a
+.section st64b
+.section st65a
+.section st65b
+.section st66a
+.section st66b
+.section st67a
+.section st67b
+.section st68a
+.section st68b
+.section st69a
+.section st69b
+.section st60a
+.section st60b
+.section st7aa
+.section st7ab
+.section st7ba
+.section st7bb
+.section st7ca
+.section st7cb
+.section st7da
+.section st7db
+.section st7ea
+.section st7eb
+.section st7fa
+.section st7fb
+.section st7ga
+.section st7gb
+.section st7ha
+.section st7hb
+.section st7ia
+.section st7ib
+.section st7ja
+.section st7jb
+.section st7ka
+.section st7kb
+.section st7la
+.section st7lb
+.section st7ma
+.section st7mb
+.section st7na
+.section st7nb
+.section st7oa
+.section st7ob
+.section st7pa
+.section st7pb
+.section st7qa
+.section st7qb
+.section st7ra
+.section st7rb
+.section st7sa
+.section st7sb
+.section st7ta
+.section st7tb
+.section st7ua
+.section st7ub
+.section st7va
+.section st7vb
+.section st7wa
+.section st7wb
+.section st7xa
+.section st7xb
+.section st7ya
+.section st7yb
+.section st7za
+.section st7zb
+.section st71a
+.section st71b
+.section st72a
+.section st72b
+.section st73a
+.section st73b
+.section st74a
+.section st74b
+.section st75a
+.section st75b
+.section st76a
+.section st76b
+.section st77a
+.section st77b
+.section st78a
+.section st78b
+.section st79a
+.section st79b
+.section st70a
+.section st70b
+.section st8aa
+.section st8ab
+.section st8ba
+.section st8bb
+.section st8ca
+.section st8cb
+.section st8da
+.section st8db
+.section st8ea
+.section st8eb
+.section st8fa
+.section st8fb
+.section st8ga
+.section st8gb
+.section st8ha
+.section st8hb
+.section st8ia
+.section st8ib
+.section st8ja
+.section st8jb
+.section st8ka
+.section st8kb
+.section st8la
+.section st8lb
+.section st8ma
+.section st8mb
+.section st8na
+.section st8nb
+.section st8oa
+.section st8ob
+.section st8pa
+.section st8pb
+.section st8qa
+.section st8qb
+.section st8ra
+.section st8rb
+.section st8sa
+.section st8sb
+.section st8ta
+.section st8tb
+.section st8ua
+.section st8ub
+.section st8va
+.section st8vb
+.section st8wa
+.section st8wb
+.section st8xa
+.section st8xb
+.section st8ya
+.section st8yb
+.section st8za
+.section st8zb
+.section st81a
+.section st81b
+.section st82a
+.section st82b
+.section st83a
+.section st83b
+.section st84a
+.section st84b
+.section st85a
+.section st85b
+.section st86a
+.section st86b
+.section st87a
+.section st87b
+.section st88a
+.section st88b
+.section st89a
+.section st89b
+.section st80a
+.section st80b
+.section st9aa
+.section st9ab
+.section st9ba
+.section st9bb
+.section st9ca
+.section st9cb
+.section st9da
+.section st9db
+.section st9ea
+.section st9eb
+.section st9fa
+.section st9fb
+.section st9ga
+.section st9gb
+.section st9ha
+.section st9hb
+.section st9ia
+.section st9ib
+.section st9ja
+.section st9jb
+.section st9ka
+.section st9kb
+.section st9la
+.section st9lb
+.section st9ma
+.section st9mb
+.section st9na
+.section st9nb
+.section st9oa
+.section st9ob
+.section st9pa
+.section st9pb
+.section st9qa
+.section st9qb
+.section st9ra
+.section st9rb
+.section st9sa
+.section st9sb
+.section st9ta
+.section st9tb
+.section st9ua
+.section st9ub
+.section st9va
+.section st9vb
+.section st9wa
+.section st9wb
+.section st9xa
+.section st9xb
+.section st9ya
+.section st9yb
+.section st9za
+.section st9zb
+.section st91a
+.section st91b
+.section st92a
+.section st92b
+.section st93a
+.section st93b
+.section st94a
+.section st94b
+.section st95a
+.section st95b
+.section st96a
+.section st96b
+.section st97a
+.section st97b
+.section st98a
+.section st98b
+.section st99a
+.section st99b
+.section st90a
+.section st90b
+.section st0aa
+.section st0ab
+.section st0ba
+.section st0bb
+.section st0ca
+.section st0cb
+.section st0da
+.section st0db
+.section st0ea
+.section st0eb
+.section st0fa
+.section st0fb
+.section st0ga
+.section st0gb
+.section st0ha
+.section st0hb
+.section st0ia
+.section st0ib
+.section st0ja
+.section st0jb
+.section st0ka
+.section st0kb
+.section st0la
+.section st0lb
+.section st0ma
+.section st0mb
+.section st0na
+.section st0nb
+.section st0oa
+.section st0ob
+.section st0pa
+.section st0pb
+.section st0qa
+.section st0qb
+.section st0ra
+.section st0rb
+.section st0sa
+.section st0sb
+.section st0ta
+.section st0tb
+.section st0ua
+.section st0ub
+.section st0va
+.section st0vb
+.section st0wa
+.section st0wb
+.section st0xa
+.section st0xb
+.section st0ya
+.section st0yb
+.section st0za
+.section st0zb
+.section st01a
+.section st01b
+.section st02a
+.section st02b
+.section st03a
+.section st03b
+.section st04a
+.section st04b
+.section st05a
+.section st05b
+.section st06a
+.section st06b
+.section st07a
+.section st07b
+.section st08a
+.section st08b
+.section st09a
+.section st09b
+.section st00a
+.section st00b
+.section suaaa
+.section suaab
+.section suaba
+.section suabb
+.section suaca
+.section suacb
+.section suada
+.section suadb
+.section suaea
+.section suaeb
+.section suafa
+.section suafb
+.section suaga
+.section suagb
+.section suaha
+.section suahb
+.section suaia
+.section suaib
+.section suaja
+.section suajb
+.section suaka
+.section suakb
+.section suala
+.section sualb
+.section suama
+.section suamb
+.section suana
+.section suanb
+.section suaoa
+.section suaob
+.section suapa
+.section suapb
+.section suaqa
+.section suaqb
+.section suara
+.section suarb
+.section suasa
+.section suasb
+.section suata
+.section suatb
+.section suaua
+.section suaub
+.section suava
+.section suavb
+.section suawa
+.section suawb
+.section suaxa
+.section suaxb
+.section suaya
+.section suayb
+.section suaza
+.section suazb
+.section sua1a
+.section sua1b
+.section sua2a
+.section sua2b
+.section sua3a
+.section sua3b
+.section sua4a
+.section sua4b
+.section sua5a
+.section sua5b
+.section sua6a
+.section sua6b
+.section sua7a
+.section sua7b
+.section sua8a
+.section sua8b
+.section sua9a
+.section sua9b
+.section sua0a
+.section sua0b
+.section subaa
+.section subab
+.section subba
+.section subbb
+.section subca
+.section subcb
+.section subda
+.section subdb
+.section subea
+.section subeb
+.section subfa
+.section subfb
+.section subga
+.section subgb
+.section subha
+.section subhb
+.section subia
+.section subib
+.section subja
+.section subjb
+.section subka
+.section subkb
+.section subla
+.section sublb
+.section subma
+.section submb
+.section subna
+.section subnb
+.section suboa
+.section subob
+.section subpa
+.section subpb
+.section subqa
+.section subqb
+.section subra
+.section subrb
+.section subsa
+.section subsb
+.section subta
+.section subtb
+.section subua
+.section subub
+.section subva
+.section subvb
+.section subwa
+.section subwb
+.section subxa
+.section subxb
+.section subya
+.section subyb
+.section subza
+.section subzb
+.section sub1a
+.section sub1b
+.section sub2a
+.section sub2b
+.section sub3a
+.section sub3b
+.section sub4a
+.section sub4b
+.section sub5a
+.section sub5b
+.section sub6a
+.section sub6b
+.section sub7a
+.section sub7b
+.section sub8a
+.section sub8b
+.section sub9a
+.section sub9b
+.section sub0a
+.section sub0b
+.section sucaa
+.section sucab
+.section sucba
+.section sucbb
+.section succa
+.section succb
+.section sucda
+.section sucdb
+.section sucea
+.section suceb
+.section sucfa
+.section sucfb
+.section sucga
+.section sucgb
+.section sucha
+.section suchb
+.section sucia
+.section sucib
+.section sucja
+.section sucjb
+.section sucka
+.section suckb
+.section sucla
+.section suclb
+.section sucma
+.section sucmb
+.section sucna
+.section sucnb
+.section sucoa
+.section sucob
+.section sucpa
+.section sucpb
+.section sucqa
+.section sucqb
+.section sucra
+.section sucrb
+.section sucsa
+.section sucsb
+.section sucta
+.section suctb
+.section sucua
+.section sucub
+.section sucva
+.section sucvb
+.section sucwa
+.section sucwb
+.section sucxa
+.section sucxb
+.section sucya
+.section sucyb
+.section sucza
+.section suczb
+.section suc1a
+.section suc1b
+.section suc2a
+.section suc2b
+.section suc3a
+.section suc3b
+.section suc4a
+.section suc4b
+.section suc5a
+.section suc5b
+.section suc6a
+.section suc6b
+.section suc7a
+.section suc7b
+.section suc8a
+.section suc8b
+.section suc9a
+.section suc9b
+.section suc0a
+.section suc0b
+.section sudaa
+.section sudab
+.section sudba
+.section sudbb
+.section sudca
+.section sudcb
+.section sudda
+.section suddb
+.section sudea
+.section sudeb
+.section sudfa
+.section sudfb
+.section sudga
+.section sudgb
+.section sudha
+.section sudhb
+.section sudia
+.section sudib
+.section sudja
+.section sudjb
+.section sudka
+.section sudkb
+.section sudla
+.section sudlb
+.section sudma
+.section sudmb
+.section sudna
+.section sudnb
+.section sudoa
+.section sudob
+.section sudpa
+.section sudpb
+.section sudqa
+.section sudqb
+.section sudra
+.section sudrb
+.section sudsa
+.section sudsb
+.section sudta
+.section sudtb
+.section sudua
+.section sudub
+.section sudva
+.section sudvb
+.section sudwa
+.section sudwb
+.section sudxa
+.section sudxb
+.section sudya
+.section sudyb
+.section sudza
+.section sudzb
+.section sud1a
+.section sud1b
+.section sud2a
+.section sud2b
+.section sud3a
+.section sud3b
+.section sud4a
+.section sud4b
+.section sud5a
+.section sud5b
+.section sud6a
+.section sud6b
+.section sud7a
+.section sud7b
+.section sud8a
+.section sud8b
+.section sud9a
+.section sud9b
+.section sud0a
+.section sud0b
+.section sueaa
+.section sueab
+.section sueba
+.section suebb
+.section sueca
+.section suecb
+.section sueda
+.section suedb
+.section sueea
+.section sueeb
+.section suefa
+.section suefb
+.section suega
+.section suegb
+.section sueha
+.section suehb
+.section sueia
+.section sueib
+.section sueja
+.section suejb
+.section sueka
+.section suekb
+.section suela
+.section suelb
+.section suema
+.section suemb
+.section suena
+.section suenb
+.section sueoa
+.section sueob
+.section suepa
+.section suepb
+.section sueqa
+.section sueqb
+.section suera
+.section suerb
+.section suesa
+.section suesb
+.section sueta
+.section suetb
+.section sueua
+.section sueub
+.section sueva
+.section suevb
+.section suewa
+.section suewb
+.section suexa
+.section suexb
+.section sueya
+.section sueyb
+.section sueza
+.section suezb
+.section sue1a
+.section sue1b
+.section sue2a
+.section sue2b
+.section sue3a
+.section sue3b
+.section sue4a
+.section sue4b
+.section sue5a
+.section sue5b
+.section sue6a
+.section sue6b
+.section sue7a
+.section sue7b
+.section sue8a
+.section sue8b
+.section sue9a
+.section sue9b
+.section sue0a
+.section sue0b
+.section sufaa
+.section sufab
+.section sufba
+.section sufbb
+.section sufca
+.section sufcb
+.section sufda
+.section sufdb
+.section sufea
+.section sufeb
+.section suffa
+.section suffb
+.section sufga
+.section sufgb
+.section sufha
+.section sufhb
+.section sufia
+.section sufib
+.section sufja
+.section sufjb
+.section sufka
+.section sufkb
+.section sufla
+.section suflb
+.section sufma
+.section sufmb
+.section sufna
+.section sufnb
+.section sufoa
+.section sufob
+.section sufpa
+.section sufpb
+.section sufqa
+.section sufqb
+.section sufra
+.section sufrb
+.section sufsa
+.section sufsb
+.section sufta
+.section suftb
+.section sufua
+.section sufub
+.section sufva
+.section sufvb
+.section sufwa
+.section sufwb
+.section sufxa
+.section sufxb
+.section sufya
+.section sufyb
+.section sufza
+.section sufzb
+.section suf1a
+.section suf1b
+.section suf2a
+.section suf2b
+.section suf3a
+.section suf3b
+.section suf4a
+.section suf4b
+.section suf5a
+.section suf5b
+.section suf6a
+.section suf6b
+.section suf7a
+.section suf7b
+.section suf8a
+.section suf8b
+.section suf9a
+.section suf9b
+.section suf0a
+.section suf0b
+.section sugaa
+.section sugab
+.section sugba
+.section sugbb
+.section sugca
+.section sugcb
+.section sugda
+.section sugdb
+.section sugea
+.section sugeb
+.section sugfa
+.section sugfb
+.section sugga
+.section suggb
+.section sugha
+.section sughb
+.section sugia
+.section sugib
+.section sugja
+.section sugjb
+.section sugka
+.section sugkb
+.section sugla
+.section suglb
+.section sugma
+.section sugmb
+.section sugna
+.section sugnb
+.section sugoa
+.section sugob
+.section sugpa
+.section sugpb
+.section sugqa
+.section sugqb
+.section sugra
+.section sugrb
+.section sugsa
+.section sugsb
+.section sugta
+.section sugtb
+.section sugua
+.section sugub
+.section sugva
+.section sugvb
+.section sugwa
+.section sugwb
+.section sugxa
+.section sugxb
+.section sugya
+.section sugyb
+.section sugza
+.section sugzb
+.section sug1a
+.section sug1b
+.section sug2a
+.section sug2b
+.section sug3a
+.section sug3b
+.section sug4a
+.section sug4b
+.section sug5a
+.section sug5b
+.section sug6a
+.section sug6b
+.section sug7a
+.section sug7b
+.section sug8a
+.section sug8b
+.section sug9a
+.section sug9b
+.section sug0a
+.section sug0b
+.section suhaa
+.section suhab
+.section suhba
+.section suhbb
+.section suhca
+.section suhcb
+.section suhda
+.section suhdb
+.section suhea
+.section suheb
+.section suhfa
+.section suhfb
+.section suhga
+.section suhgb
+.section suhha
+.section suhhb
+.section suhia
+.section suhib
+.section suhja
+.section suhjb
+.section suhka
+.section suhkb
+.section suhla
+.section suhlb
+.section suhma
+.section suhmb
+.section suhna
+.section suhnb
+.section suhoa
+.section suhob
+.section suhpa
+.section suhpb
+.section suhqa
+.section suhqb
+.section suhra
+.section suhrb
+.section suhsa
+.section suhsb
+.section suhta
+.section suhtb
+.section suhua
+.section suhub
+.section suhva
+.section suhvb
+.section suhwa
+.section suhwb
+.section suhxa
+.section suhxb
+.section suhya
+.section suhyb
+.section suhza
+.section suhzb
+.section suh1a
+.section suh1b
+.section suh2a
+.section suh2b
+.section suh3a
+.section suh3b
+.section suh4a
+.section suh4b
+.section suh5a
+.section suh5b
+.section suh6a
+.section suh6b
+.section suh7a
+.section suh7b
+.section suh8a
+.section suh8b
+.section suh9a
+.section suh9b
+.section suh0a
+.section suh0b
+.section suiaa
+.section suiab
+.section suiba
+.section suibb
+.section suica
+.section suicb
+.section suida
+.section suidb
+.section suiea
+.section suieb
+.section suifa
+.section suifb
+.section suiga
+.section suigb
+.section suiha
+.section suihb
+.section suiia
+.section suiib
+.section suija
+.section suijb
+.section suika
+.section suikb
+.section suila
+.section suilb
+.section suima
+.section suimb
+.section suina
+.section suinb
+.section suioa
+.section suiob
+.section suipa
+.section suipb
+.section suiqa
+.section suiqb
+.section suira
+.section suirb
+.section suisa
+.section suisb
+.section suita
+.section suitb
+.section suiua
+.section suiub
+.section suiva
+.section suivb
+.section suiwa
+.section suiwb
+.section suixa
+.section suixb
+.section suiya
+.section suiyb
+.section suiza
+.section suizb
+.section sui1a
+.section sui1b
+.section sui2a
+.section sui2b
+.section sui3a
+.section sui3b
+.section sui4a
+.section sui4b
+.section sui5a
+.section sui5b
+.section sui6a
+.section sui6b
+.section sui7a
+.section sui7b
+.section sui8a
+.section sui8b
+.section sui9a
+.section sui9b
+.section sui0a
+.section sui0b
+.section sujaa
+.section sujab
+.section sujba
+.section sujbb
+.section sujca
+.section sujcb
+.section sujda
+.section sujdb
+.section sujea
+.section sujeb
+.section sujfa
+.section sujfb
+.section sujga
+.section sujgb
+.section sujha
+.section sujhb
+.section sujia
+.section sujib
+.section sujja
+.section sujjb
+.section sujka
+.section sujkb
+.section sujla
+.section sujlb
+.section sujma
+.section sujmb
+.section sujna
+.section sujnb
+.section sujoa
+.section sujob
+.section sujpa
+.section sujpb
+.section sujqa
+.section sujqb
+.section sujra
+.section sujrb
+.section sujsa
+.section sujsb
+.section sujta
+.section sujtb
+.section sujua
+.section sujub
+.section sujva
+.section sujvb
+.section sujwa
+.section sujwb
+.section sujxa
+.section sujxb
+.section sujya
+.section sujyb
+.section sujza
+.section sujzb
+.section suj1a
+.section suj1b
+.section suj2a
+.section suj2b
+.section suj3a
+.section suj3b
+.section suj4a
+.section suj4b
+.section suj5a
+.section suj5b
+.section suj6a
+.section suj6b
+.section suj7a
+.section suj7b
+.section suj8a
+.section suj8b
+.section suj9a
+.section suj9b
+.section suj0a
+.section suj0b
+.section sukaa
+.section sukab
+.section sukba
+.section sukbb
+.section sukca
+.section sukcb
+.section sukda
+.section sukdb
+.section sukea
+.section sukeb
+.section sukfa
+.section sukfb
+.section sukga
+.section sukgb
+.section sukha
+.section sukhb
+.section sukia
+.section sukib
+.section sukja
+.section sukjb
+.section sukka
+.section sukkb
+.section sukla
+.section suklb
+.section sukma
+.section sukmb
+.section sukna
+.section suknb
+.section sukoa
+.section sukob
+.section sukpa
+.section sukpb
+.section sukqa
+.section sukqb
+.section sukra
+.section sukrb
+.section suksa
+.section suksb
+.section sukta
+.section suktb
+.section sukua
+.section sukub
+.section sukva
+.section sukvb
+.section sukwa
+.section sukwb
+.section sukxa
+.section sukxb
+.section sukya
+.section sukyb
+.section sukza
+.section sukzb
+.section suk1a
+.section suk1b
+.section suk2a
+.section suk2b
+.section suk3a
+.section suk3b
+.section suk4a
+.section suk4b
+.section suk5a
+.section suk5b
+.section suk6a
+.section suk6b
+.section suk7a
+.section suk7b
+.section suk8a
+.section suk8b
+.section suk9a
+.section suk9b
+.section suk0a
+.section suk0b
+.section sulaa
+.section sulab
+.section sulba
+.section sulbb
+.section sulca
+.section sulcb
+.section sulda
+.section suldb
+.section sulea
+.section suleb
+.section sulfa
+.section sulfb
+.section sulga
+.section sulgb
+.section sulha
+.section sulhb
+.section sulia
+.section sulib
+.section sulja
+.section suljb
+.section sulka
+.section sulkb
+.section sulla
+.section sullb
+.section sulma
+.section sulmb
+.section sulna
+.section sulnb
+.section suloa
+.section sulob
+.section sulpa
+.section sulpb
+.section sulqa
+.section sulqb
+.section sulra
+.section sulrb
+.section sulsa
+.section sulsb
+.section sulta
+.section sultb
+.section sulua
+.section sulub
+.section sulva
+.section sulvb
+.section sulwa
+.section sulwb
+.section sulxa
+.section sulxb
+.section sulya
+.section sulyb
+.section sulza
+.section sulzb
+.section sul1a
+.section sul1b
+.section sul2a
+.section sul2b
+.section sul3a
+.section sul3b
+.section sul4a
+.section sul4b
+.section sul5a
+.section sul5b
+.section sul6a
+.section sul6b
+.section sul7a
+.section sul7b
+.section sul8a
+.section sul8b
+.section sul9a
+.section sul9b
+.section sul0a
+.section sul0b
+.section sumaa
+.section sumab
+.section sumba
+.section sumbb
+.section sumca
+.section sumcb
+.section sumda
+.section sumdb
+.section sumea
+.section sumeb
+.section sumfa
+.section sumfb
+.section sumga
+.section sumgb
+.section sumha
+.section sumhb
+.section sumia
+.section sumib
+.section sumja
+.section sumjb
+.section sumka
+.section sumkb
+.section sumla
+.section sumlb
+.section summa
+.section summb
+.section sumna
+.section sumnb
+.section sumoa
+.section sumob
+.section sumpa
+.section sumpb
+.section sumqa
+.section sumqb
+.section sumra
+.section sumrb
+.section sumsa
+.section sumsb
+.section sumta
+.section sumtb
+.section sumua
+.section sumub
+.section sumva
+.section sumvb
+.section sumwa
+.section sumwb
+.section sumxa
+.section sumxb
+.section sumya
+.section sumyb
+.section sumza
+.section sumzb
+.section sum1a
+.section sum1b
+.section sum2a
+.section sum2b
+.section sum3a
+.section sum3b
+.section sum4a
+.section sum4b
+.section sum5a
+.section sum5b
+.section sum6a
+.section sum6b
+.section sum7a
+.section sum7b
+.section sum8a
+.section sum8b
+.section sum9a
+.section sum9b
+.section sum0a
+.section sum0b
+.section sunaa
+.section sunab
+.section sunba
+.section sunbb
+.section sunca
+.section suncb
+.section sunda
+.section sundb
+.section sunea
+.section suneb
+.section sunfa
+.section sunfb
+.section sunga
+.section sungb
+.section sunha
+.section sunhb
+.section sunia
+.section sunib
+.section sunja
+.section sunjb
+.section sunka
+.section sunkb
+.section sunla
+.section sunlb
+.section sunma
+.section sunmb
+.section sunna
+.section sunnb
+.section sunoa
+.section sunob
+.section sunpa
+.section sunpb
+.section sunqa
+.section sunqb
+.section sunra
+.section sunrb
+.section sunsa
+.section sunsb
+.section sunta
+.section suntb
+.section sunua
+.section sunub
+.section sunva
+.section sunvb
+.section sunwa
+.section sunwb
+.section sunxa
+.section sunxb
+.section sunya
+.section sunyb
+.section sunza
+.section sunzb
+.section sun1a
+.section sun1b
+.section sun2a
+.section sun2b
+.section sun3a
+.section sun3b
+.section sun4a
+.section sun4b
+.section sun5a
+.section sun5b
+.section sun6a
+.section sun6b
+.section sun7a
+.section sun7b
+.section sun8a
+.section sun8b
+.section sun9a
+.section sun9b
+.section sun0a
+.section sun0b
+.section suoaa
+.section suoab
+.section suoba
+.section suobb
+.section suoca
+.section suocb
+.section suoda
+.section suodb
+.section suoea
+.section suoeb
+.section suofa
+.section suofb
+.section suoga
+.section suogb
+.section suoha
+.section suohb
+.section suoia
+.section suoib
+.section suoja
+.section suojb
+.section suoka
+.section suokb
+.section suola
+.section suolb
+.section suoma
+.section suomb
+.section suona
+.section suonb
+.section suooa
+.section suoob
+.section suopa
+.section suopb
+.section suoqa
+.section suoqb
+.section suora
+.section suorb
+.section suosa
+.section suosb
+.section suota
+.section suotb
+.section suoua
+.section suoub
+.section suova
+.section suovb
+.section suowa
+.section suowb
+.section suoxa
+.section suoxb
+.section suoya
+.section suoyb
+.section suoza
+.section suozb
+.section suo1a
+.section suo1b
+.section suo2a
+.section suo2b
+.section suo3a
+.section suo3b
+.section suo4a
+.section suo4b
+.section suo5a
+.section suo5b
+.section suo6a
+.section suo6b
+.section suo7a
+.section suo7b
+.section suo8a
+.section suo8b
+.section suo9a
+.section suo9b
+.section suo0a
+.section suo0b
+.section supaa
+.section supab
+.section supba
+.section supbb
+.section supca
+.section supcb
+.section supda
+.section supdb
+.section supea
+.section supeb
+.section supfa
+.section supfb
+.section supga
+.section supgb
+.section supha
+.section suphb
+.section supia
+.section supib
+.section supja
+.section supjb
+.section supka
+.section supkb
+.section supla
+.section suplb
+.section supma
+.section supmb
+.section supna
+.section supnb
+.section supoa
+.section supob
+.section suppa
+.section suppb
+.section supqa
+.section supqb
+.section supra
+.section suprb
+.section supsa
+.section supsb
+.section supta
+.section suptb
+.section supua
+.section supub
+.section supva
+.section supvb
+.section supwa
+.section supwb
+.section supxa
+.section supxb
+.section supya
+.section supyb
+.section supza
+.section supzb
+.section sup1a
+.section sup1b
+.section sup2a
+.section sup2b
+.section sup3a
+.section sup3b
+.section sup4a
+.section sup4b
+.section sup5a
+.section sup5b
+.section sup6a
+.section sup6b
+.section sup7a
+.section sup7b
+.section sup8a
+.section sup8b
+.section sup9a
+.section sup9b
+.section sup0a
+.section sup0b
+.section suqaa
+.section suqab
+.section suqba
+.section suqbb
+.section suqca
+.section suqcb
+.section suqda
+.section suqdb
+.section suqea
+.section suqeb
+.section suqfa
+.section suqfb
+.section suqga
+.section suqgb
+.section suqha
+.section suqhb
+.section suqia
+.section suqib
+.section suqja
+.section suqjb
+.section suqka
+.section suqkb
+.section suqla
+.section suqlb
+.section suqma
+.section suqmb
+.section suqna
+.section suqnb
+.section suqoa
+.section suqob
+.section suqpa
+.section suqpb
+.section suqqa
+.section suqqb
+.section suqra
+.section suqrb
+.section suqsa
+.section suqsb
+.section suqta
+.section suqtb
+.section suqua
+.section suqub
+.section suqva
+.section suqvb
+.section suqwa
+.section suqwb
+.section suqxa
+.section suqxb
+.section suqya
+.section suqyb
+.section suqza
+.section suqzb
+.section suq1a
+.section suq1b
+.section suq2a
+.section suq2b
+.section suq3a
+.section suq3b
+.section suq4a
+.section suq4b
+.section suq5a
+.section suq5b
+.section suq6a
+.section suq6b
+.section suq7a
+.section suq7b
+.section suq8a
+.section suq8b
+.section suq9a
+.section suq9b
+.section suq0a
+.section suq0b
+.section suraa
+.section surab
+.section surba
+.section surbb
+.section surca
+.section surcb
+.section surda
+.section surdb
+.section surea
+.section sureb
+.section surfa
+.section surfb
+.section surga
+.section surgb
+.section surha
+.section surhb
+.section suria
+.section surib
+.section surja
+.section surjb
+.section surka
+.section surkb
+.section surla
+.section surlb
+.section surma
+.section surmb
+.section surna
+.section surnb
+.section suroa
+.section surob
+.section surpa
+.section surpb
+.section surqa
+.section surqb
+.section surra
+.section surrb
+.section sursa
+.section sursb
+.section surta
+.section surtb
+.section surua
+.section surub
+.section surva
+.section survb
+.section surwa
+.section surwb
+.section surxa
+.section surxb
+.section surya
+.section suryb
+.section surza
+.section surzb
+.section sur1a
+.section sur1b
+.section sur2a
+.section sur2b
+.section sur3a
+.section sur3b
+.section sur4a
+.section sur4b
+.section sur5a
+.section sur5b
+.section sur6a
+.section sur6b
+.section sur7a
+.section sur7b
+.section sur8a
+.section sur8b
+.section sur9a
+.section sur9b
+.section sur0a
+.section sur0b
+.section susaa
+.section susab
+.section susba
+.section susbb
+.section susca
+.section suscb
+.section susda
+.section susdb
+.section susea
+.section suseb
+.section susfa
+.section susfb
+.section susga
+.section susgb
+.section susha
+.section sushb
+.section susia
+.section susib
+.section susja
+.section susjb
+.section suska
+.section suskb
+.section susla
+.section suslb
+.section susma
+.section susmb
+.section susna
+.section susnb
+.section susoa
+.section susob
+.section suspa
+.section suspb
+.section susqa
+.section susqb
+.section susra
+.section susrb
+.section sussa
+.section sussb
+.section susta
+.section sustb
+.section susua
+.section susub
+.section susva
+.section susvb
+.section suswa
+.section suswb
+.section susxa
+.section susxb
+.section susya
+.section susyb
+.section susza
+.section suszb
+.section sus1a
+.section sus1b
+.section sus2a
+.section sus2b
+.section sus3a
+.section sus3b
+.section sus4a
+.section sus4b
+.section sus5a
+.section sus5b
+.section sus6a
+.section sus6b
+.section sus7a
+.section sus7b
+.section sus8a
+.section sus8b
+.section sus9a
+.section sus9b
+.section sus0a
+.section sus0b
+.section sutaa
+.section sutab
+.section sutba
+.section sutbb
+.section sutca
+.section sutcb
+.section sutda
+.section sutdb
+.section sutea
+.section suteb
+.section sutfa
+.section sutfb
+.section sutga
+.section sutgb
+.section sutha
+.section suthb
+.section sutia
+.section sutib
+.section sutja
+.section sutjb
+.section sutka
+.section sutkb
+.section sutla
+.section sutlb
+.section sutma
+.section sutmb
+.section sutna
+.section sutnb
+.section sutoa
+.section sutob
+.section sutpa
+.section sutpb
+.section sutqa
+.section sutqb
+.section sutra
+.section sutrb
+.section sutsa
+.section sutsb
+.section sutta
+.section suttb
+.section sutua
+.section sutub
+.section sutva
+.section sutvb
+.section sutwa
+.section sutwb
+.section sutxa
+.section sutxb
+.section sutya
+.section sutyb
+.section sutza
+.section sutzb
+.section sut1a
+.section sut1b
+.section sut2a
+.section sut2b
+.section sut3a
+.section sut3b
+.section sut4a
+.section sut4b
+.section sut5a
+.section sut5b
+.section sut6a
+.section sut6b
+.section sut7a
+.section sut7b
+.section sut8a
+.section sut8b
+.section sut9a
+.section sut9b
+.section sut0a
+.section sut0b
+.section suuaa
+.section suuab
+.section suuba
+.section suubb
+.section suuca
+.section suucb
+.section suuda
+.section suudb
+.section suuea
+.section suueb
+.section suufa
+.section suufb
+.section suuga
+.section suugb
+.section suuha
+.section suuhb
+.section suuia
+.section suuib
+.section suuja
+.section suujb
+.section suuka
+.section suukb
+.section suula
+.section suulb
+.section suuma
+.section suumb
+.section suuna
+.section suunb
+.section suuoa
+.section suuob
+.section suupa
+.section suupb
+.section suuqa
+.section suuqb
+.section suura
+.section suurb
+.section suusa
+.section suusb
+.section suuta
+.section suutb
+.section suuua
+.section suuub
+.section suuva
+.section suuvb
+.section suuwa
+.section suuwb
+.section suuxa
+.section suuxb
+.section suuya
+.section suuyb
+.section suuza
+.section suuzb
+.section suu1a
+.section suu1b
+.section suu2a
+.section suu2b
+.section suu3a
+.section suu3b
+.section suu4a
+.section suu4b
+.section suu5a
+.section suu5b
+.section suu6a
+.section suu6b
+.section suu7a
+.section suu7b
+.section suu8a
+.section suu8b
+.section suu9a
+.section suu9b
+.section suu0a
+.section suu0b
+.section suvaa
+.section suvab
+.section suvba
+.section suvbb
+.section suvca
+.section suvcb
+.section suvda
+.section suvdb
+.section suvea
+.section suveb
+.section suvfa
+.section suvfb
+.section suvga
+.section suvgb
+.section suvha
+.section suvhb
+.section suvia
+.section suvib
+.section suvja
+.section suvjb
+.section suvka
+.section suvkb
+.section suvla
+.section suvlb
+.section suvma
+.section suvmb
+.section suvna
+.section suvnb
+.section suvoa
+.section suvob
+.section suvpa
+.section suvpb
+.section suvqa
+.section suvqb
+.section suvra
+.section suvrb
+.section suvsa
+.section suvsb
+.section suvta
+.section suvtb
+.section suvua
+.section suvub
+.section suvva
+.section suvvb
+.section suvwa
+.section suvwb
+.section suvxa
+.section suvxb
+.section suvya
+.section suvyb
+.section suvza
+.section suvzb
+.section suv1a
+.section suv1b
+.section suv2a
+.section suv2b
+.section suv3a
+.section suv3b
+.section suv4a
+.section suv4b
+.section suv5a
+.section suv5b
+.section suv6a
+.section suv6b
+.section suv7a
+.section suv7b
+.section suv8a
+.section suv8b
+.section suv9a
+.section suv9b
+.section suv0a
+.section suv0b
+.section suwaa
+.section suwab
+.section suwba
+.section suwbb
+.section suwca
+.section suwcb
+.section suwda
+.section suwdb
+.section suwea
+.section suweb
+.section suwfa
+.section suwfb
+.section suwga
+.section suwgb
+.section suwha
+.section suwhb
+.section suwia
+.section suwib
+.section suwja
+.section suwjb
+.section suwka
+.section suwkb
+.section suwla
+.section suwlb
+.section suwma
+.section suwmb
+.section suwna
+.section suwnb
+.section suwoa
+.section suwob
+.section suwpa
+.section suwpb
+.section suwqa
+.section suwqb
+.section suwra
+.section suwrb
+.section suwsa
+.section suwsb
+.section suwta
+.section suwtb
+.section suwua
+.section suwub
+.section suwva
+.section suwvb
+.section suwwa
+.section suwwb
+.section suwxa
+.section suwxb
+.section suwya
+.section suwyb
+.section suwza
+.section suwzb
+.section suw1a
+.section suw1b
+.section suw2a
+.section suw2b
+.section suw3a
+.section suw3b
+.section suw4a
+.section suw4b
+.section suw5a
+.section suw5b
+.section suw6a
+.section suw6b
+.section suw7a
+.section suw7b
+.section suw8a
+.section suw8b
+.section suw9a
+.section suw9b
+.section suw0a
+.section suw0b
+.section suxaa
+.section suxab
+.section suxba
+.section suxbb
+.section suxca
+.section suxcb
+.section suxda
+.section suxdb
+.section suxea
+.section suxeb
+.section suxfa
+.section suxfb
+.section suxga
+.section suxgb
+.section suxha
+.section suxhb
+.section suxia
+.section suxib
+.section suxja
+.section suxjb
+.section suxka
+.section suxkb
+.section suxla
+.section suxlb
+.section suxma
+.section suxmb
+.section suxna
+.section suxnb
+.section suxoa
+.section suxob
+.section suxpa
+.section suxpb
+.section suxqa
+.section suxqb
+.section suxra
+.section suxrb
+.section suxsa
+.section suxsb
+.section suxta
+.section suxtb
+.section suxua
+.section suxub
+.section suxva
+.section suxvb
+.section suxwa
+.section suxwb
+.section suxxa
+.section suxxb
+.section suxya
+.section suxyb
+.section suxza
+.section suxzb
+.section sux1a
+.section sux1b
+.section sux2a
+.section sux2b
+.section sux3a
+.section sux3b
+.section sux4a
+.section sux4b
+.section sux5a
+.section sux5b
+.section sux6a
+.section sux6b
+.section sux7a
+.section sux7b
+.section sux8a
+.section sux8b
+.section sux9a
+.section sux9b
+.section sux0a
+.section sux0b
+.section suyaa
+.section suyab
+.section suyba
+.section suybb
+.section suyca
+.section suycb
+.section suyda
+.section suydb
+.section suyea
+.section suyeb
+.section suyfa
+.section suyfb
+.section suyga
+.section suygb
+.section suyha
+.section suyhb
+.section suyia
+.section suyib
+.section suyja
+.section suyjb
+.section suyka
+.section suykb
+.section suyla
+.section suylb
+.section suyma
+.section suymb
+.section suyna
+.section suynb
+.section suyoa
+.section suyob
+.section suypa
+.section suypb
+.section suyqa
+.section suyqb
+.section suyra
+.section suyrb
+.section suysa
+.section suysb
+.section suyta
+.section suytb
+.section suyua
+.section suyub
+.section suyva
+.section suyvb
+.section suywa
+.section suywb
+.section suyxa
+.section suyxb
+.section suyya
+.section suyyb
+.section suyza
+.section suyzb
+.section suy1a
+.section suy1b
+.section suy2a
+.section suy2b
+.section suy3a
+.section suy3b
+.section suy4a
+.section suy4b
+.section suy5a
+.section suy5b
+.section suy6a
+.section suy6b
+.section suy7a
+.section suy7b
+.section suy8a
+.section suy8b
+.section suy9a
+.section suy9b
+.section suy0a
+.section suy0b
+.section suzaa
+.section suzab
+.section suzba
+.section suzbb
+.section suzca
+.section suzcb
+.section suzda
+.section suzdb
+.section suzea
+.section suzeb
+.section suzfa
+.section suzfb
+.section suzga
+.section suzgb
+.section suzha
+.section suzhb
+.section suzia
+.section suzib
+.section suzja
+.section suzjb
+.section suzka
+.section suzkb
+.section suzla
+.section suzlb
+.section suzma
+.section suzmb
+.section suzna
+.section suznb
+.section suzoa
+.section suzob
+.section suzpa
+.section suzpb
+.section suzqa
+.section suzqb
+.section suzra
+.section suzrb
+.section suzsa
+.section suzsb
+.section suzta
+.section suztb
+.section suzua
+.section suzub
+.section suzva
+.section suzvb
+.section suzwa
+.section suzwb
+.section suzxa
+.section suzxb
+.section suzya
+.section suzyb
+.section suzza
+.section suzzb
+.section suz1a
+.section suz1b
+.section suz2a
+.section suz2b
+.section suz3a
+.section suz3b
+.section suz4a
+.section suz4b
+.section suz5a
+.section suz5b
+.section suz6a
+.section suz6b
+.section suz7a
+.section suz7b
+.section suz8a
+.section suz8b
+.section suz9a
+.section suz9b
+.section suz0a
+.section suz0b
+.section su1aa
+.section su1ab
+.section su1ba
+.section su1bb
+.section su1ca
+.section su1cb
+.section su1da
+.section su1db
+.section su1ea
+.section su1eb
+.section su1fa
+.section su1fb
+.section su1ga
+.section su1gb
+.section su1ha
+.section su1hb
+.section su1ia
+.section su1ib
+.section su1ja
+.section su1jb
+.section su1ka
+.section su1kb
+.section su1la
+.section su1lb
+.section su1ma
+.section su1mb
+.section su1na
+.section su1nb
+.section su1oa
+.section su1ob
+.section su1pa
+.section su1pb
+.section su1qa
+.section su1qb
+.section su1ra
+.section su1rb
+.section su1sa
+.section su1sb
+.section su1ta
+.section su1tb
+.section su1ua
+.section su1ub
+.section su1va
+.section su1vb
+.section su1wa
+.section su1wb
+.section su1xa
+.section su1xb
+.section su1ya
+.section su1yb
+.section su1za
+.section su1zb
+.section su11a
+.section su11b
+.section su12a
+.section su12b
+.section su13a
+.section su13b
+.section su14a
+.section su14b
+.section su15a
+.section su15b
+.section su16a
+.section su16b
+.section su17a
+.section su17b
+.section su18a
+.section su18b
+.section su19a
+.section su19b
+.section su10a
+.section su10b
+.section su2aa
+.section su2ab
+.section su2ba
+.section su2bb
+.section su2ca
+.section su2cb
+.section su2da
+.section su2db
+.section su2ea
+.section su2eb
+.section su2fa
+.section su2fb
+.section su2ga
+.section su2gb
+.section su2ha
+.section su2hb
+.section su2ia
+.section su2ib
+.section su2ja
+.section su2jb
+.section su2ka
+.section su2kb
+.section su2la
+.section su2lb
+.section su2ma
+.section su2mb
+.section su2na
+.section su2nb
+.section su2oa
+.section su2ob
+.section su2pa
+.section su2pb
+.section su2qa
+.section su2qb
+.section su2ra
+.section su2rb
+.section su2sa
+.section su2sb
+.section su2ta
+.section su2tb
+.section su2ua
+.section su2ub
+.section su2va
+.section su2vb
+.section su2wa
+.section su2wb
+.section su2xa
+.section su2xb
+.section su2ya
+.section su2yb
+.section su2za
+.section su2zb
+.section su21a
+.section su21b
+.section su22a
+.section su22b
+.section su23a
+.section su23b
+.section su24a
+.section su24b
+.section su25a
+.section su25b
+.section su26a
+.section su26b
+.section su27a
+.section su27b
+.section su28a
+.section su28b
+.section su29a
+.section su29b
+.section su20a
+.section su20b
+.section su3aa
+.section su3ab
+.section su3ba
+.section su3bb
+.section su3ca
+.section su3cb
+.section su3da
+.section su3db
+.section su3ea
+.section su3eb
+.section su3fa
+.section su3fb
+.section su3ga
+.section su3gb
+.section su3ha
+.section su3hb
+.section su3ia
+.section su3ib
+.section su3ja
+.section su3jb
+.section su3ka
+.section su3kb
+.section su3la
+.section su3lb
+.section su3ma
+.section su3mb
+.section su3na
+.section su3nb
+.section su3oa
+.section su3ob
+.section su3pa
+.section su3pb
+.section su3qa
+.section su3qb
+.section su3ra
+.section su3rb
+.section su3sa
+.section su3sb
+.section su3ta
+.section su3tb
+.section su3ua
+.section su3ub
+.section su3va
+.section su3vb
+.section su3wa
+.section su3wb
+.section su3xa
+.section su3xb
+.section su3ya
+.section su3yb
+.section su3za
+.section su3zb
+.section su31a
+.section su31b
+.section su32a
+.section su32b
+.section su33a
+.section su33b
+.section su34a
+.section su34b
+.section su35a
+.section su35b
+.section su36a
+.section su36b
+.section su37a
+.section su37b
+.section su38a
+.section su38b
+.section su39a
+.section su39b
+.section su30a
+.section su30b
+.section su4aa
+.section su4ab
+.section su4ba
+.section su4bb
+.section su4ca
+.section su4cb
+.section su4da
+.section su4db
+.section su4ea
+.section su4eb
+.section su4fa
+.section su4fb
+.section su4ga
+.section su4gb
+.section su4ha
+.section su4hb
+.section su4ia
+.section su4ib
+.section su4ja
+.section su4jb
+.section su4ka
+.section su4kb
+.section su4la
+.section su4lb
+.section su4ma
+.section su4mb
+.section su4na
+.section su4nb
+.section su4oa
+.section su4ob
+.section su4pa
+.section su4pb
+.section su4qa
+.section su4qb
+.section su4ra
+.section su4rb
+.section su4sa
+.section su4sb
+.section su4ta
+.section su4tb
+.section su4ua
+.section su4ub
+.section su4va
+.section su4vb
+.section su4wa
+.section su4wb
+.section su4xa
+.section su4xb
+.section su4ya
+.section su4yb
+.section su4za
+.section su4zb
+.section su41a
+.section su41b
+.section su42a
+.section su42b
+.section su43a
+.section su43b
+.section su44a
+.section su44b
+.section su45a
+.section su45b
+.section su46a
+.section su46b
+.section su47a
+.section su47b
+.section su48a
+.section su48b
+.section su49a
+.section su49b
+.section su40a
+.section su40b
+.section su5aa
+.section su5ab
+.section su5ba
+.section su5bb
+.section su5ca
+.section su5cb
+.section su5da
+.section su5db
+.section su5ea
+.section su5eb
+.section su5fa
+.section su5fb
+.section su5ga
+.section su5gb
+.section su5ha
+.section su5hb
+.section su5ia
+.section su5ib
+.section su5ja
+.section su5jb
+.section su5ka
+.section su5kb
+.section su5la
+.section su5lb
+.section su5ma
+.section su5mb
+.section su5na
+.section su5nb
+.section su5oa
+.section su5ob
+.section su5pa
+.section su5pb
+.section su5qa
+.section su5qb
+.section su5ra
+.section su5rb
+.section su5sa
+.section su5sb
+.section su5ta
+.section su5tb
+.section su5ua
+.section su5ub
+.section su5va
+.section su5vb
+.section su5wa
+.section su5wb
+.section su5xa
+.section su5xb
+.section su5ya
+.section su5yb
+.section su5za
+.section su5zb
+.section su51a
+.section su51b
+.section su52a
+.section su52b
+.section su53a
+.section su53b
+.section su54a
+.section su54b
+.section su55a
+.section su55b
+.section su56a
+.section su56b
+.section su57a
+.section su57b
+.section su58a
+.section su58b
+.section su59a
+.section su59b
+.section su50a
+.section su50b
+.section su6aa
+.section su6ab
+.section su6ba
+.section su6bb
+.section su6ca
+.section su6cb
+.section su6da
+.section su6db
+.section su6ea
+.section su6eb
+.section su6fa
+.section su6fb
+.section su6ga
+.section su6gb
+.section su6ha
+.section su6hb
+.section su6ia
+.section su6ib
+.section su6ja
+.section su6jb
+.section su6ka
+.section su6kb
+.section su6la
+.section su6lb
+.section su6ma
+.section su6mb
+.section su6na
+.section su6nb
+.section su6oa
+.section su6ob
+.section su6pa
+.section su6pb
+.section su6qa
+.section su6qb
+.section su6ra
+.section su6rb
+.section su6sa
+.section su6sb
+.section su6ta
+.section su6tb
+.section su6ua
+.section su6ub
+.section su6va
+.section su6vb
+.section su6wa
+.section su6wb
+.section su6xa
+.section su6xb
+.section su6ya
+.section su6yb
+.section su6za
+.section su6zb
+.section su61a
+.section su61b
+.section su62a
+.section su62b
+.section su63a
+.section su63b
+.section su64a
+.section su64b
+.section su65a
+.section su65b
+.section su66a
+.section su66b
+.section su67a
+.section su67b
+.section su68a
+.section su68b
+.section su69a
+.section su69b
+.section su60a
+.section su60b
+.section su7aa
+.section su7ab
+.section su7ba
+.section su7bb
+.section su7ca
+.section su7cb
+.section su7da
+.section su7db
+.section su7ea
+.section su7eb
+.section su7fa
+.section su7fb
+.section su7ga
+.section su7gb
+.section su7ha
+.section su7hb
+.section su7ia
+.section su7ib
+.section su7ja
+.section su7jb
+.section su7ka
+.section su7kb
+.section su7la
+.section su7lb
+.section su7ma
+.section su7mb
+.section su7na
+.section su7nb
+.section su7oa
+.section su7ob
+.section su7pa
+.section su7pb
+.section su7qa
+.section su7qb
+.section su7ra
+.section su7rb
+.section su7sa
+.section su7sb
+.section su7ta
+.section su7tb
+.section su7ua
+.section su7ub
+.section su7va
+.section su7vb
+.section su7wa
+.section su7wb
+.section su7xa
+.section su7xb
+.section su7ya
+.section su7yb
+.section su7za
+.section su7zb
+.section su71a
+.section su71b
+.section su72a
+.section su72b
+.section su73a
+.section su73b
+.section su74a
+.section su74b
+.section su75a
+.section su75b
+.section su76a
+.section su76b
+.section su77a
+.section su77b
+.section su78a
+.section su78b
+.section su79a
+.section su79b
+.section su70a
+.section su70b
+.section su8aa
+.section su8ab
+.section su8ba
+.section su8bb
+.section su8ca
+.section su8cb
+.section su8da
+.section su8db
+.section su8ea
+.section su8eb
+.section su8fa
+.section su8fb
+.section su8ga
+.section su8gb
+.section su8ha
+.section su8hb
+.section su8ia
+.section su8ib
+.section su8ja
+.section su8jb
+.section su8ka
+.section su8kb
+.section su8la
+.section su8lb
+.section su8ma
+.section su8mb
+.section su8na
+.section su8nb
+.section su8oa
+.section su8ob
+.section su8pa
+.section su8pb
+.section su8qa
+.section su8qb
+.section su8ra
+.section su8rb
+.section su8sa
+.section su8sb
+.section su8ta
+.section su8tb
+.section su8ua
+.section su8ub
+.section su8va
+.section su8vb
+.section su8wa
+.section su8wb
+.section su8xa
+.section su8xb
+.section su8ya
+.section su8yb
+.section su8za
+.section su8zb
+.section su81a
+.section su81b
+.section su82a
+.section su82b
+.section su83a
+.section su83b
+.section su84a
+.section su84b
+.section su85a
+.section su85b
+.section su86a
+.section su86b
+.section su87a
+.section su87b
+.section su88a
+.section su88b
+.section su89a
+.section su89b
+.section su80a
+.section su80b
+.section su9aa
+.section su9ab
+.section su9ba
+.section su9bb
+.section su9ca
+.section su9cb
+.section su9da
+.section su9db
+.section su9ea
+.section su9eb
+.section su9fa
+.section su9fb
+.section su9ga
+.section su9gb
+.section su9ha
+.section su9hb
+.section su9ia
+.section su9ib
+.section su9ja
+.section su9jb
+.section su9ka
+.section su9kb
+.section su9la
+.section su9lb
+.section su9ma
+.section su9mb
+.section su9na
+.section su9nb
+.section su9oa
+.section su9ob
+.section su9pa
+.section su9pb
+.section su9qa
+.section su9qb
+.section su9ra
+.section su9rb
+.section su9sa
+.section su9sb
+.section su9ta
+.section su9tb
+.section su9ua
+.section su9ub
+.section su9va
+.section su9vb
+.section su9wa
+.section su9wb
+.section su9xa
+.section su9xb
+.section su9ya
+.section su9yb
+.section su9za
+.section su9zb
+.section su91a
+.section su91b
+.section su92a
+.section su92b
+.section su93a
+.section su93b
+.section su94a
+.section su94b
+.section su95a
+.section su95b
+.section su96a
+.section su96b
+.section su97a
+.section su97b
+.section su98a
+.section su98b
+.section su99a
+.section su99b
+.section su90a
+.section su90b
+.section su0aa
+.section su0ab
+.section su0ba
+.section su0bb
+.section su0ca
+.section su0cb
+.section su0da
+.section su0db
+.section su0ea
+.section su0eb
+.section su0fa
+.section su0fb
+.section su0ga
+.section su0gb
+.section su0ha
+.section su0hb
+.section su0ia
+.section su0ib
+.section su0ja
+.section su0jb
+.section su0ka
+.section su0kb
+.section su0la
+.section su0lb
+.section su0ma
+.section su0mb
+.section su0na
+.section su0nb
+.section su0oa
+.section su0ob
+.section su0pa
+.section su0pb
+.section su0qa
+.section su0qb
+.section su0ra
+.section su0rb
+.section su0sa
+.section su0sb
+.section su0ta
+.section su0tb
+.section su0ua
+.section su0ub
+.section su0va
+.section su0vb
+.section su0wa
+.section su0wb
+.section su0xa
+.section su0xb
+.section su0ya
+.section su0yb
+.section su0za
+.section su0zb
+.section su01a
+.section su01b
+.section su02a
+.section su02b
+.section su03a
+.section su03b
+.section su04a
+.section su04b
+.section su05a
+.section su05b
+.section su06a
+.section su06b
+.section su07a
+.section su07b
+.section su08a
+.section su08b
+.section su09a
+.section su09b
+.section su00a
+.section su00b
+.section svaaa
+.section svaab
+.section svaba
+.section svabb
+.section svaca
+.section svacb
+.section svada
+.section svadb
+.section svaea
+.section svaeb
+.section svafa
+.section svafb
+.section svaga
+.section svagb
+.section svaha
+.section svahb
+.section svaia
+.section svaib
+.section svaja
+.section svajb
+.section svaka
+.section svakb
+.section svala
+.section svalb
+.section svama
+.section svamb
+.section svana
+.section svanb
+.section svaoa
+.section svaob
+.section svapa
+.section svapb
+.section svaqa
+.section svaqb
+.section svara
+.section svarb
+.section svasa
+.section svasb
+.section svata
+.section svatb
+.section svaua
+.section svaub
+.section svava
+.section svavb
+.section svawa
+.section svawb
+.section svaxa
+.section svaxb
+.section svaya
+.section svayb
+.section svaza
+.section svazb
+.section sva1a
+.section sva1b
+.section sva2a
+.section sva2b
+.section sva3a
+.section sva3b
+.section sva4a
+.section sva4b
+.section sva5a
+.section sva5b
+.section sva6a
+.section sva6b
+.section sva7a
+.section sva7b
+.section sva8a
+.section sva8b
+.section sva9a
+.section sva9b
+.section sva0a
+.section sva0b
+.section svbaa
+.section svbab
+.section svbba
+.section svbbb
+.section svbca
+.section svbcb
+.section svbda
+.section svbdb
+.section svbea
+.section svbeb
+.section svbfa
+.section svbfb
+.section svbga
+.section svbgb
+.section svbha
+.section svbhb
+.section svbia
+.section svbib
+.section svbja
+.section svbjb
+.section svbka
+.section svbkb
+.section svbla
+.section svblb
+.section svbma
+.section svbmb
+.section svbna
+.section svbnb
+.section svboa
+.section svbob
+.section svbpa
+.section svbpb
+.section svbqa
+.section svbqb
+.section svbra
+.section svbrb
+.section svbsa
+.section svbsb
+.section svbta
+.section svbtb
+.section svbua
+.section svbub
+.section svbva
+.section svbvb
+.section svbwa
+.section svbwb
+.section svbxa
+.section svbxb
+.section svbya
+.section svbyb
+.section svbza
+.section svbzb
+.section svb1a
+.section svb1b
+.section svb2a
+.section svb2b
+.section svb3a
+.section svb3b
+.section svb4a
+.section svb4b
+.section svb5a
+.section svb5b
+.section svb6a
+.section svb6b
+.section svb7a
+.section svb7b
+.section svb8a
+.section svb8b
+.section svb9a
+.section svb9b
+.section svb0a
+.section svb0b
+.section svcaa
+.section svcab
+.section svcba
+.section svcbb
+.section svcca
+.section svccb
+.section svcda
+.section svcdb
+.section svcea
+.section svceb
+.section svcfa
+.section svcfb
+.section svcga
+.section svcgb
+.section svcha
+.section svchb
+.section svcia
+.section svcib
+.section svcja
+.section svcjb
+.section svcka
+.section svckb
+.section svcla
+.section svclb
+.section svcma
+.section svcmb
+.section svcna
+.section svcnb
+.section svcoa
+.section svcob
+.section svcpa
+.section svcpb
+.section svcqa
+.section svcqb
+.section svcra
+.section svcrb
+.section svcsa
+.section svcsb
+.section svcta
+.section svctb
+.section svcua
+.section svcub
+.section svcva
+.section svcvb
+.section svcwa
+.section svcwb
+.section svcxa
+.section svcxb
+.section svcya
+.section svcyb
+.section svcza
+.section svczb
+.section svc1a
+.section svc1b
+.section svc2a
+.section svc2b
+.section svc3a
+.section svc3b
+.section svc4a
+.section svc4b
+.section svc5a
+.section svc5b
+.section svc6a
+.section svc6b
+.section svc7a
+.section svc7b
+.section svc8a
+.section svc8b
+.section svc9a
+.section svc9b
+.section svc0a
+.section svc0b
+.section svdaa
+.section svdab
+.section svdba
+.section svdbb
+.section svdca
+.section svdcb
+.section svdda
+.section svddb
+.section svdea
+.section svdeb
+.section svdfa
+.section svdfb
+.section svdga
+.section svdgb
+.section svdha
+.section svdhb
+.section svdia
+.section svdib
+.section svdja
+.section svdjb
+.section svdka
+.section svdkb
+.section svdla
+.section svdlb
+.section svdma
+.section svdmb
+.section svdna
+.section svdnb
+.section svdoa
+.section svdob
+.section svdpa
+.section svdpb
+.section svdqa
+.section svdqb
+.section svdra
+.section svdrb
+.section svdsa
+.section svdsb
+.section svdta
+.section svdtb
+.section svdua
+.section svdub
+.section svdva
+.section svdvb
+.section svdwa
+.section svdwb
+.section svdxa
+.section svdxb
+.section svdya
+.section svdyb
+.section svdza
+.section svdzb
+.section svd1a
+.section svd1b
+.section svd2a
+.section svd2b
+.section svd3a
+.section svd3b
+.section svd4a
+.section svd4b
+.section svd5a
+.section svd5b
+.section svd6a
+.section svd6b
+.section svd7a
+.section svd7b
+.section svd8a
+.section svd8b
+.section svd9a
+.section svd9b
+.section svd0a
+.section svd0b
+.section sveaa
+.section sveab
+.section sveba
+.section svebb
+.section sveca
+.section svecb
+.section sveda
+.section svedb
+.section sveea
+.section sveeb
+.section svefa
+.section svefb
+.section svega
+.section svegb
+.section sveha
+.section svehb
+.section sveia
+.section sveib
+.section sveja
+.section svejb
+.section sveka
+.section svekb
+.section svela
+.section svelb
+.section svema
+.section svemb
+.section svena
+.section svenb
+.section sveoa
+.section sveob
+.section svepa
+.section svepb
+.section sveqa
+.section sveqb
+.section svera
+.section sverb
+.section svesa
+.section svesb
+.section sveta
+.section svetb
+.section sveua
+.section sveub
+.section sveva
+.section svevb
+.section svewa
+.section svewb
+.section svexa
+.section svexb
+.section sveya
+.section sveyb
+.section sveza
+.section svezb
+.section sve1a
+.section sve1b
+.section sve2a
+.section sve2b
+.section sve3a
+.section sve3b
+.section sve4a
+.section sve4b
+.section sve5a
+.section sve5b
+.section sve6a
+.section sve6b
+.section sve7a
+.section sve7b
+.section sve8a
+.section sve8b
+.section sve9a
+.section sve9b
+.section sve0a
+.section sve0b
+.section svfaa
+.section svfab
+.section svfba
+.section svfbb
+.section svfca
+.section svfcb
+.section svfda
+.section svfdb
+.section svfea
+.section svfeb
+.section svffa
+.section svffb
+.section svfga
+.section svfgb
+.section svfha
+.section svfhb
+.section svfia
+.section svfib
+.section svfja
+.section svfjb
+.section svfka
+.section svfkb
+.section svfla
+.section svflb
+.section svfma
+.section svfmb
+.section svfna
+.section svfnb
+.section svfoa
+.section svfob
+.section svfpa
+.section svfpb
+.section svfqa
+.section svfqb
+.section svfra
+.section svfrb
+.section svfsa
+.section svfsb
+.section svfta
+.section svftb
+.section svfua
+.section svfub
+.section svfva
+.section svfvb
+.section svfwa
+.section svfwb
+.section svfxa
+.section svfxb
+.section svfya
+.section svfyb
+.section svfza
+.section svfzb
+.section svf1a
+.section svf1b
+.section svf2a
+.section svf2b
+.section svf3a
+.section svf3b
+.section svf4a
+.section svf4b
+.section svf5a
+.section svf5b
+.section svf6a
+.section svf6b
+.section svf7a
+.section svf7b
+.section svf8a
+.section svf8b
+.section svf9a
+.section svf9b
+.section svf0a
+.section svf0b
+.section svgaa
+.section svgab
+.section svgba
+.section svgbb
+.section svgca
+.section svgcb
+.section svgda
+.section svgdb
+.section svgea
+.section svgeb
+.section svgfa
+.section svgfb
+.section svgga
+.section svggb
+.section svgha
+.section svghb
+.section svgia
+.section svgib
+.section svgja
+.section svgjb
+.section svgka
+.section svgkb
+.section svgla
+.section svglb
+.section svgma
+.section svgmb
+.section svgna
+.section svgnb
+.section svgoa
+.section svgob
+.section svgpa
+.section svgpb
+.section svgqa
+.section svgqb
+.section svgra
+.section svgrb
+.section svgsa
+.section svgsb
+.section svgta
+.section svgtb
+.section svgua
+.section svgub
+.section svgva
+.section svgvb
+.section svgwa
+.section svgwb
+.section svgxa
+.section svgxb
+.section svgya
+.section svgyb
+.section svgza
+.section svgzb
+.section svg1a
+.section svg1b
+.section svg2a
+.section svg2b
+.section svg3a
+.section svg3b
+.section svg4a
+.section svg4b
+.section svg5a
+.section svg5b
+.section svg6a
+.section svg6b
+.section svg7a
+.section svg7b
+.section svg8a
+.section svg8b
+.section svg9a
+.section svg9b
+.section svg0a
+.section svg0b
+.section svhaa
+.section svhab
+.section svhba
+.section svhbb
+.section svhca
+.section svhcb
+.section svhda
+.section svhdb
+.section svhea
+.section svheb
+.section svhfa
+.section svhfb
+.section svhga
+.section svhgb
+.section svhha
+.section svhhb
+.section svhia
+.section svhib
+.section svhja
+.section svhjb
+.section svhka
+.section svhkb
+.section svhla
+.section svhlb
+.section svhma
+.section svhmb
+.section svhna
+.section svhnb
+.section svhoa
+.section svhob
+.section svhpa
+.section svhpb
+.section svhqa
+.section svhqb
+.section svhra
+.section svhrb
+.section svhsa
+.section svhsb
+.section svhta
+.section svhtb
+.section svhua
+.section svhub
+.section svhva
+.section svhvb
+.section svhwa
+.section svhwb
+.section svhxa
+.section svhxb
+.section svhya
+.section svhyb
+.section svhza
+.section svhzb
+.section svh1a
+.section svh1b
+.section svh2a
+.section svh2b
+.section svh3a
+.section svh3b
+.section svh4a
+.section svh4b
+.section svh5a
+.section svh5b
+.section svh6a
+.section svh6b
+.section svh7a
+.section svh7b
+.section svh8a
+.section svh8b
+.section svh9a
+.section svh9b
+.section svh0a
+.section svh0b
+.section sviaa
+.section sviab
+.section sviba
+.section svibb
+.section svica
+.section svicb
+.section svida
+.section svidb
+.section sviea
+.section svieb
+.section svifa
+.section svifb
+.section sviga
+.section svigb
+.section sviha
+.section svihb
+.section sviia
+.section sviib
+.section svija
+.section svijb
+.section svika
+.section svikb
+.section svila
+.section svilb
+.section svima
+.section svimb
+.section svina
+.section svinb
+.section svioa
+.section sviob
+.section svipa
+.section svipb
+.section sviqa
+.section sviqb
+.section svira
+.section svirb
+.section svisa
+.section svisb
+.section svita
+.section svitb
+.section sviua
+.section sviub
+.section sviva
+.section svivb
+.section sviwa
+.section sviwb
+.section svixa
+.section svixb
+.section sviya
+.section sviyb
+.section sviza
+.section svizb
+.section svi1a
+.section svi1b
+.section svi2a
+.section svi2b
+.section svi3a
+.section svi3b
+.section svi4a
+.section svi4b
+.section svi5a
+.section svi5b
+.section svi6a
+.section svi6b
+.section svi7a
+.section svi7b
+.section svi8a
+.section svi8b
+.section svi9a
+.section svi9b
+.section svi0a
+.section svi0b
+.section svjaa
+.section svjab
+.section svjba
+.section svjbb
+.section svjca
+.section svjcb
+.section svjda
+.section svjdb
+.section svjea
+.section svjeb
+.section svjfa
+.section svjfb
+.section svjga
+.section svjgb
+.section svjha
+.section svjhb
+.section svjia
+.section svjib
+.section svjja
+.section svjjb
+.section svjka
+.section svjkb
+.section svjla
+.section svjlb
+.section svjma
+.section svjmb
+.section svjna
+.section svjnb
+.section svjoa
+.section svjob
+.section svjpa
+.section svjpb
+.section svjqa
+.section svjqb
+.section svjra
+.section svjrb
+.section svjsa
+.section svjsb
+.section svjta
+.section svjtb
+.section svjua
+.section svjub
+.section svjva
+.section svjvb
+.section svjwa
+.section svjwb
+.section svjxa
+.section svjxb
+.section svjya
+.section svjyb
+.section svjza
+.section svjzb
+.section svj1a
+.section svj1b
+.section svj2a
+.section svj2b
+.section svj3a
+.section svj3b
+.section svj4a
+.section svj4b
+.section svj5a
+.section svj5b
+.section svj6a
+.section svj6b
+.section svj7a
+.section svj7b
+.section svj8a
+.section svj8b
+.section svj9a
+.section svj9b
+.section svj0a
+.section svj0b
+.section svkaa
+.section svkab
+.section svkba
+.section svkbb
+.section svkca
+.section svkcb
+.section svkda
+.section svkdb
+.section svkea
+.section svkeb
+.section svkfa
+.section svkfb
+.section svkga
+.section svkgb
+.section svkha
+.section svkhb
+.section svkia
+.section svkib
+.section svkja
+.section svkjb
+.section svkka
+.section svkkb
+.section svkla
+.section svklb
+.section svkma
+.section svkmb
+.section svkna
+.section svknb
+.section svkoa
+.section svkob
+.section svkpa
+.section svkpb
+.section svkqa
+.section svkqb
+.section svkra
+.section svkrb
+.section svksa
+.section svksb
+.section svkta
+.section svktb
+.section svkua
+.section svkub
+.section svkva
+.section svkvb
+.section svkwa
+.section svkwb
+.section svkxa
+.section svkxb
+.section svkya
+.section svkyb
+.section svkza
+.section svkzb
+.section svk1a
+.section svk1b
+.section svk2a
+.section svk2b
+.section svk3a
+.section svk3b
+.section svk4a
+.section svk4b
+.section svk5a
+.section svk5b
+.section svk6a
+.section svk6b
+.section svk7a
+.section svk7b
+.section svk8a
+.section svk8b
+.section svk9a
+.section svk9b
+.section svk0a
+.section svk0b
+.section svlaa
+.section svlab
+.section svlba
+.section svlbb
+.section svlca
+.section svlcb
+.section svlda
+.section svldb
+.section svlea
+.section svleb
+.section svlfa
+.section svlfb
+.section svlga
+.section svlgb
+.section svlha
+.section svlhb
+.section svlia
+.section svlib
+.section svlja
+.section svljb
+.section svlka
+.section svlkb
+.section svlla
+.section svllb
+.section svlma
+.section svlmb
+.section svlna
+.section svlnb
+.section svloa
+.section svlob
+.section svlpa
+.section svlpb
+.section svlqa
+.section svlqb
+.section svlra
+.section svlrb
+.section svlsa
+.section svlsb
+.section svlta
+.section svltb
+.section svlua
+.section svlub
+.section svlva
+.section svlvb
+.section svlwa
+.section svlwb
+.section svlxa
+.section svlxb
+.section svlya
+.section svlyb
+.section svlza
+.section svlzb
+.section svl1a
+.section svl1b
+.section svl2a
+.section svl2b
+.section svl3a
+.section svl3b
+.section svl4a
+.section svl4b
+.section svl5a
+.section svl5b
+.section svl6a
+.section svl6b
+.section svl7a
+.section svl7b
+.section svl8a
+.section svl8b
+.section svl9a
+.section svl9b
+.section svl0a
+.section svl0b
+.section svmaa
+.section svmab
+.section svmba
+.section svmbb
+.section svmca
+.section svmcb
+.section svmda
+.section svmdb
+.section svmea
+.section svmeb
+.section svmfa
+.section svmfb
+.section svmga
+.section svmgb
+.section svmha
+.section svmhb
+.section svmia
+.section svmib
+.section svmja
+.section svmjb
+.section svmka
+.section svmkb
+.section svmla
+.section svmlb
+.section svmma
+.section svmmb
+.section svmna
+.section svmnb
+.section svmoa
+.section svmob
+.section svmpa
+.section svmpb
+.section svmqa
+.section svmqb
+.section svmra
+.section svmrb
+.section svmsa
+.section svmsb
+.section svmta
+.section svmtb
+.section svmua
+.section svmub
+.section svmva
+.section svmvb
+.section svmwa
+.section svmwb
+.section svmxa
+.section svmxb
+.section svmya
+.section svmyb
+.section svmza
+.section svmzb
+.section svm1a
+.section svm1b
+.section svm2a
+.section svm2b
+.section svm3a
+.section svm3b
+.section svm4a
+.section svm4b
+.section svm5a
+.section svm5b
+.section svm6a
+.section svm6b
+.section svm7a
+.section svm7b
+.section svm8a
+.section svm8b
+.section svm9a
+.section svm9b
+.section svm0a
+.section svm0b
+.section svnaa
+.section svnab
+.section svnba
+.section svnbb
+.section svnca
+.section svncb
+.section svnda
+.section svndb
+.section svnea
+.section svneb
+.section svnfa
+.section svnfb
+.section svnga
+.section svngb
+.section svnha
+.section svnhb
+.section svnia
+.section svnib
+.section svnja
+.section svnjb
+.section svnka
+.section svnkb
+.section svnla
+.section svnlb
+.section svnma
+.section svnmb
+.section svnna
+.section svnnb
+.section svnoa
+.section svnob
+.section svnpa
+.section svnpb
+.section svnqa
+.section svnqb
+.section svnra
+.section svnrb
+.section svnsa
+.section svnsb
+.section svnta
+.section svntb
+.section svnua
+.section svnub
+.section svnva
+.section svnvb
+.section svnwa
+.section svnwb
+.section svnxa
+.section svnxb
+.section svnya
+.section svnyb
+.section svnza
+.section svnzb
+.section svn1a
+.section svn1b
+.section svn2a
+.section svn2b
+.section svn3a
+.section svn3b
+.section svn4a
+.section svn4b
+.section svn5a
+.section svn5b
+.section svn6a
+.section svn6b
+.section svn7a
+.section svn7b
+.section svn8a
+.section svn8b
+.section svn9a
+.section svn9b
+.section svn0a
+.section svn0b
+.section svoaa
+.section svoab
+.section svoba
+.section svobb
+.section svoca
+.section svocb
+.section svoda
+.section svodb
+.section svoea
+.section svoeb
+.section svofa
+.section svofb
+.section svoga
+.section svogb
+.section svoha
+.section svohb
+.section svoia
+.section svoib
+.section svoja
+.section svojb
+.section svoka
+.section svokb
+.section svola
+.section svolb
+.section svoma
+.section svomb
+.section svona
+.section svonb
+.section svooa
+.section svoob
+.section svopa
+.section svopb
+.section svoqa
+.section svoqb
+.section svora
+.section svorb
+.section svosa
+.section svosb
+.section svota
+.section svotb
+.section svoua
+.section svoub
+.section svova
+.section svovb
+.section svowa
+.section svowb
+.section svoxa
+.section svoxb
+.section svoya
+.section svoyb
+.section svoza
+.section svozb
+.section svo1a
+.section svo1b
+.section svo2a
+.section svo2b
+.section svo3a
+.section svo3b
+.section svo4a
+.section svo4b
+.section svo5a
+.section svo5b
+.section svo6a
+.section svo6b
+.section svo7a
+.section svo7b
+.section svo8a
+.section svo8b
+.section svo9a
+.section svo9b
+.section svo0a
+.section svo0b
+.section svpaa
+.section svpab
+.section svpba
+.section svpbb
+.section svpca
+.section svpcb
+.section svpda
+.section svpdb
+.section svpea
+.section svpeb
+.section svpfa
+.section svpfb
+.section svpga
+.section svpgb
+.section svpha
+.section svphb
+.section svpia
+.section svpib
+.section svpja
+.section svpjb
+.section svpka
+.section svpkb
+.section svpla
+.section svplb
+.section svpma
+.section svpmb
+.section svpna
+.section svpnb
+.section svpoa
+.section svpob
+.section svppa
+.section svppb
+.section svpqa
+.section svpqb
+.section svpra
+.section svprb
+.section svpsa
+.section svpsb
+.section svpta
+.section svptb
+.section svpua
+.section svpub
+.section svpva
+.section svpvb
+.section svpwa
+.section svpwb
+.section svpxa
+.section svpxb
+.section svpya
+.section svpyb
+.section svpza
+.section svpzb
+.section svp1a
+.section svp1b
+.section svp2a
+.section svp2b
+.section svp3a
+.section svp3b
+.section svp4a
+.section svp4b
+.section svp5a
+.section svp5b
+.section svp6a
+.section svp6b
+.section svp7a
+.section svp7b
+.section svp8a
+.section svp8b
+.section svp9a
+.section svp9b
+.section svp0a
+.section svp0b
+.section svqaa
+.section svqab
+.section svqba
+.section svqbb
+.section svqca
+.section svqcb
+.section svqda
+.section svqdb
+.section svqea
+.section svqeb
+.section svqfa
+.section svqfb
+.section svqga
+.section svqgb
+.section svqha
+.section svqhb
+.section svqia
+.section svqib
+.section svqja
+.section svqjb
+.section svqka
+.section svqkb
+.section svqla
+.section svqlb
+.section svqma
+.section svqmb
+.section svqna
+.section svqnb
+.section svqoa
+.section svqob
+.section svqpa
+.section svqpb
+.section svqqa
+.section svqqb
+.section svqra
+.section svqrb
+.section svqsa
+.section svqsb
+.section svqta
+.section svqtb
+.section svqua
+.section svqub
+.section svqva
+.section svqvb
+.section svqwa
+.section svqwb
+.section svqxa
+.section svqxb
+.section svqya
+.section svqyb
+.section svqza
+.section svqzb
+.section svq1a
+.section svq1b
+.section svq2a
+.section svq2b
+.section svq3a
+.section svq3b
+.section svq4a
+.section svq4b
+.section svq5a
+.section svq5b
+.section svq6a
+.section svq6b
+.section svq7a
+.section svq7b
+.section svq8a
+.section svq8b
+.section svq9a
+.section svq9b
+.section svq0a
+.section svq0b
+.section svraa
+.section svrab
+.section svrba
+.section svrbb
+.section svrca
+.section svrcb
+.section svrda
+.section svrdb
+.section svrea
+.section svreb
+.section svrfa
+.section svrfb
+.section svrga
+.section svrgb
+.section svrha
+.section svrhb
+.section svria
+.section svrib
+.section svrja
+.section svrjb
+.section svrka
+.section svrkb
+.section svrla
+.section svrlb
+.section svrma
+.section svrmb
+.section svrna
+.section svrnb
+.section svroa
+.section svrob
+.section svrpa
+.section svrpb
+.section svrqa
+.section svrqb
+.section svrra
+.section svrrb
+.section svrsa
+.section svrsb
+.section svrta
+.section svrtb
+.section svrua
+.section svrub
+.section svrva
+.section svrvb
+.section svrwa
+.section svrwb
+.section svrxa
+.section svrxb
+.section svrya
+.section svryb
+.section svrza
+.section svrzb
+.section svr1a
+.section svr1b
+.section svr2a
+.section svr2b
+.section svr3a
+.section svr3b
+.section svr4a
+.section svr4b
+.section svr5a
+.section svr5b
+.section svr6a
+.section svr6b
+.section svr7a
+.section svr7b
+.section svr8a
+.section svr8b
+.section svr9a
+.section svr9b
+.section svr0a
+.section svr0b
+.section svsaa
+.section svsab
+.section svsba
+.section svsbb
+.section svsca
+.section svscb
+.section svsda
+.section svsdb
+.section svsea
+.section svseb
+.section svsfa
+.section svsfb
+.section svsga
+.section svsgb
+.section svsha
+.section svshb
+.section svsia
+.section svsib
+.section svsja
+.section svsjb
+.section svska
+.section svskb
+.section svsla
+.section svslb
+.section svsma
+.section svsmb
+.section svsna
+.section svsnb
+.section svsoa
+.section svsob
+.section svspa
+.section svspb
+.section svsqa
+.section svsqb
+.section svsra
+.section svsrb
+.section svssa
+.section svssb
+.section svsta
+.section svstb
+.section svsua
+.section svsub
+.section svsva
+.section svsvb
+.section svswa
+.section svswb
+.section svsxa
+.section svsxb
+.section svsya
+.section svsyb
+.section svsza
+.section svszb
+.section svs1a
+.section svs1b
+.section svs2a
+.section svs2b
+.section svs3a
+.section svs3b
+.section svs4a
+.section svs4b
+.section svs5a
+.section svs5b
+.section svs6a
+.section svs6b
+.section svs7a
+.section svs7b
+.section svs8a
+.section svs8b
+.section svs9a
+.section svs9b
+.section svs0a
+.section svs0b
+.section svtaa
+.section svtab
+.section svtba
+.section svtbb
+.section svtca
+.section svtcb
+.section svtda
+.section svtdb
+.section svtea
+.section svteb
+.section svtfa
+.section svtfb
+.section svtga
+.section svtgb
+.section svtha
+.section svthb
+.section svtia
+.section svtib
+.section svtja
+.section svtjb
+.section svtka
+.section svtkb
+.section svtla
+.section svtlb
+.section svtma
+.section svtmb
+.section svtna
+.section svtnb
+.section svtoa
+.section svtob
+.section svtpa
+.section svtpb
+.section svtqa
+.section svtqb
+.section svtra
+.section svtrb
+.section svtsa
+.section svtsb
+.section svtta
+.section svttb
+.section svtua
+.section svtub
+.section svtva
+.section svtvb
+.section svtwa
+.section svtwb
+.section svtxa
+.section svtxb
+.section svtya
+.section svtyb
+.section svtza
+.section svtzb
+.section svt1a
+.section svt1b
+.section svt2a
+.section svt2b
+.section svt3a
+.section svt3b
+.section svt4a
+.section svt4b
+.section svt5a
+.section svt5b
+.section svt6a
+.section svt6b
+.section svt7a
+.section svt7b
+.section svt8a
+.section svt8b
+.section svt9a
+.section svt9b
+.section svt0a
+.section svt0b
+.section svuaa
+.section svuab
+.section svuba
+.section svubb
+.section svuca
+.section svucb
+.section svuda
+.section svudb
+.section svuea
+.section svueb
+.section svufa
+.section svufb
+.section svuga
+.section svugb
+.section svuha
+.section svuhb
+.section svuia
+.section svuib
+.section svuja
+.section svujb
+.section svuka
+.section svukb
+.section svula
+.section svulb
+.section svuma
+.section svumb
+.section svuna
+.section svunb
+.section svuoa
+.section svuob
+.section svupa
+.section svupb
+.section svuqa
+.section svuqb
+.section svura
+.section svurb
+.section svusa
+.section svusb
+.section svuta
+.section svutb
+.section svuua
+.section svuub
+.section svuva
+.section svuvb
+.section svuwa
+.section svuwb
+.section svuxa
+.section svuxb
+.section svuya
+.section svuyb
+.section svuza
+.section svuzb
+.section svu1a
+.section svu1b
+.section svu2a
+.section svu2b
+.section svu3a
+.section svu3b
+.section svu4a
+.section svu4b
+.section svu5a
+.section svu5b
+.section svu6a
+.section svu6b
+.section svu7a
+.section svu7b
+.section svu8a
+.section svu8b
+.section svu9a
+.section svu9b
+.section svu0a
+.section svu0b
+.section svvaa
+.section svvab
+.section svvba
+.section svvbb
+.section svvca
+.section svvcb
+.section svvda
+.section svvdb
+.section svvea
+.section svveb
+.section svvfa
+.section svvfb
+.section svvga
+.section svvgb
+.section svvha
+.section svvhb
+.section svvia
+.section svvib
+.section svvja
+.section svvjb
+.section svvka
+.section svvkb
+.section svvla
+.section svvlb
+.section svvma
+.section svvmb
+.section svvna
+.section svvnb
+.section svvoa
+.section svvob
+.section svvpa
+.section svvpb
+.section svvqa
+.section svvqb
+.section svvra
+.section svvrb
+.section svvsa
+.section svvsb
+.section svvta
+.section svvtb
+.section svvua
+.section svvub
+.section svvva
+.section svvvb
+.section svvwa
+.section svvwb
+.section svvxa
+.section svvxb
+.section svvya
+.section svvyb
+.section svvza
+.section svvzb
+.section svv1a
+.section svv1b
+.section svv2a
+.section svv2b
+.section svv3a
+.section svv3b
+.section svv4a
+.section svv4b
+.section svv5a
+.section svv5b
+.section svv6a
+.section svv6b
+.section svv7a
+.section svv7b
+.section svv8a
+.section svv8b
+.section svv9a
+.section svv9b
+.section svv0a
+.section svv0b
+.section svwaa
+.section svwab
+.section svwba
+.section svwbb
+.section svwca
+.section svwcb
+.section svwda
+.section svwdb
+.section svwea
+.section svweb
+.section svwfa
+.section svwfb
+.section svwga
+.section svwgb
+.section svwha
+.section svwhb
+.section svwia
+.section svwib
+.section svwja
+.section svwjb
+.section svwka
+.section svwkb
+.section svwla
+.section svwlb
+.section svwma
+.section svwmb
+.section svwna
+.section svwnb
+.section svwoa
+.section svwob
+.section svwpa
+.section svwpb
+.section svwqa
+.section svwqb
+.section svwra
+.section svwrb
+.section svwsa
+.section svwsb
+.section svwta
+.section svwtb
+.section svwua
+.section svwub
+.section svwva
+.section svwvb
+.section svwwa
+.section svwwb
+.section svwxa
+.section svwxb
+.section svwya
+.section svwyb
+.section svwza
+.section svwzb
+.section svw1a
+.section svw1b
+.section svw2a
+.section svw2b
+.section svw3a
+.section svw3b
+.section svw4a
+.section svw4b
+.section svw5a
+.section svw5b
+.section svw6a
+.section svw6b
+.section svw7a
+.section svw7b
+.section svw8a
+.section svw8b
+.section svw9a
+.section svw9b
+.section svw0a
+.section svw0b
+.section svxaa
+.section svxab
+.section svxba
+.section svxbb
+.section svxca
+.section svxcb
+.section svxda
+.section svxdb
+.section svxea
+.section svxeb
+.section svxfa
+.section svxfb
+.section svxga
+.section svxgb
+.section svxha
+.section svxhb
+.section svxia
+.section svxib
+.section svxja
+.section svxjb
+.section svxka
+.section svxkb
+.section svxla
+.section svxlb
+.section svxma
+.section svxmb
+.section svxna
+.section svxnb
+.section svxoa
+.section svxob
+.section svxpa
+.section svxpb
+.section svxqa
+.section svxqb
+.section svxra
+.section svxrb
+.section svxsa
+.section svxsb
+.section svxta
+.section svxtb
+.section svxua
+.section svxub
+.section svxva
+.section svxvb
+.section svxwa
+.section svxwb
+.section svxxa
+.section svxxb
+.section svxya
+.section svxyb
+.section svxza
+.section svxzb
+.section svx1a
+.section svx1b
+.section svx2a
+.section svx2b
+.section svx3a
+.section svx3b
+.section svx4a
+.section svx4b
+.section svx5a
+.section svx5b
+.section svx6a
+.section svx6b
+.section svx7a
+.section svx7b
+.section svx8a
+.section svx8b
+.section svx9a
+.section svx9b
+.section svx0a
+.section svx0b
+.section svyaa
+.section svyab
+.section svyba
+.section svybb
+.section svyca
+.section svycb
+.section svyda
+.section svydb
+.section svyea
+.section svyeb
+.section svyfa
+.section svyfb
+.section svyga
+.section svygb
+.section svyha
+.section svyhb
+.section svyia
+.section svyib
+.section svyja
+.section svyjb
+.section svyka
+.section svykb
+.section svyla
+.section svylb
+.section svyma
+.section svymb
+.section svyna
+.section svynb
+.section svyoa
+.section svyob
+.section svypa
+.section svypb
+.section svyqa
+.section svyqb
+.section svyra
+.section svyrb
+.section svysa
+.section svysb
+.section svyta
+.section svytb
+.section svyua
+.section svyub
+.section svyva
+.section svyvb
+.section svywa
+.section svywb
+.section svyxa
+.section svyxb
+.section svyya
+.section svyyb
+.section svyza
+.section svyzb
+.section svy1a
+.section svy1b
+.section svy2a
+.section svy2b
+.section svy3a
+.section svy3b
+.section svy4a
+.section svy4b
+.section svy5a
+.section svy5b
+.section svy6a
+.section svy6b
+.section svy7a
+.section svy7b
+.section svy8a
+.section svy8b
+.section svy9a
+.section svy9b
+.section svy0a
+.section svy0b
+.section svzaa
+.section svzab
+.section svzba
+.section svzbb
+.section svzca
+.section svzcb
+.section svzda
+.section svzdb
+.section svzea
+.section svzeb
+.section svzfa
+.section svzfb
+.section svzga
+.section svzgb
+.section svzha
+.section svzhb
+.section svzia
+.section svzib
+.section svzja
+.section svzjb
+.section svzka
+.section svzkb
+.section svzla
+.section svzlb
+.section svzma
+.section svzmb
+.section svzna
+.section svznb
+.section svzoa
+.section svzob
+.section svzpa
+.section svzpb
+.section svzqa
+.section svzqb
+.section svzra
+.section svzrb
+.section svzsa
+.section svzsb
+.section svzta
+.section svztb
+.section svzua
+.section svzub
+.section svzva
+.section svzvb
+.section svzwa
+.section svzwb
+.section svzxa
+.section svzxb
+.section svzya
+.section svzyb
+.section svzza
+.section svzzb
+.section svz1a
+.section svz1b
+.section svz2a
+.section svz2b
+.section svz3a
+.section svz3b
+.section svz4a
+.section svz4b
+.section svz5a
+.section svz5b
+.section svz6a
+.section svz6b
+.section svz7a
+.section svz7b
+.section svz8a
+.section svz8b
+.section svz9a
+.section svz9b
+.section svz0a
+.section svz0b
+.section sv1aa
+.section sv1ab
+.section sv1ba
+.section sv1bb
+.section sv1ca
+.section sv1cb
+.section sv1da
+.section sv1db
+.section sv1ea
+.section sv1eb
+.section sv1fa
+.section sv1fb
+.section sv1ga
+.section sv1gb
+.section sv1ha
+.section sv1hb
+.section sv1ia
+.section sv1ib
+.section sv1ja
+.section sv1jb
+.section sv1ka
+.section sv1kb
+.section sv1la
+.section sv1lb
+.section sv1ma
+.section sv1mb
+.section sv1na
+.section sv1nb
+.section sv1oa
+.section sv1ob
+.section sv1pa
+.section sv1pb
+.section sv1qa
+.section sv1qb
+.section sv1ra
+.section sv1rb
+.section sv1sa
+.section sv1sb
+.section sv1ta
+.section sv1tb
+.section sv1ua
+.section sv1ub
+.section sv1va
+.section sv1vb
+.section sv1wa
+.section sv1wb
+.section sv1xa
+.section sv1xb
+.section sv1ya
+.section sv1yb
+.section sv1za
+.section sv1zb
+.section sv11a
+.section sv11b
+.section sv12a
+.section sv12b
+.section sv13a
+.section sv13b
+.section sv14a
+.section sv14b
+.section sv15a
+.section sv15b
+.section sv16a
+.section sv16b
+.section sv17a
+.section sv17b
+.section sv18a
+.section sv18b
+.section sv19a
+.section sv19b
+.section sv10a
+.section sv10b
+.section sv2aa
+.section sv2ab
+.section sv2ba
+.section sv2bb
+.section sv2ca
+.section sv2cb
+.section sv2da
+.section sv2db
+.section sv2ea
+.section sv2eb
+.section sv2fa
+.section sv2fb
+.section sv2ga
+.section sv2gb
+.section sv2ha
+.section sv2hb
+.section sv2ia
+.section sv2ib
+.section sv2ja
+.section sv2jb
+.section sv2ka
+.section sv2kb
+.section sv2la
+.section sv2lb
+.section sv2ma
+.section sv2mb
+.section sv2na
+.section sv2nb
+.section sv2oa
+.section sv2ob
+.section sv2pa
+.section sv2pb
+.section sv2qa
+.section sv2qb
+.section sv2ra
+.section sv2rb
+.section sv2sa
+.section sv2sb
+.section sv2ta
+.section sv2tb
+.section sv2ua
+.section sv2ub
+.section sv2va
+.section sv2vb
+.section sv2wa
+.section sv2wb
+.section sv2xa
+.section sv2xb
+.section sv2ya
+.section sv2yb
+.section sv2za
+.section sv2zb
+.section sv21a
+.section sv21b
+.section sv22a
+.section sv22b
+.section sv23a
+.section sv23b
+.section sv24a
+.section sv24b
+.section sv25a
+.section sv25b
+.section sv26a
+.section sv26b
+.section sv27a
+.section sv27b
+.section sv28a
+.section sv28b
+.section sv29a
+.section sv29b
+.section sv20a
+.section sv20b
+.section sv3aa
+.section sv3ab
+.section sv3ba
+.section sv3bb
+.section sv3ca
+.section sv3cb
+.section sv3da
+.section sv3db
+.section sv3ea
+.section sv3eb
+.section sv3fa
+.section sv3fb
+.section sv3ga
+.section sv3gb
+.section sv3ha
+.section sv3hb
+.section sv3ia
+.section sv3ib
+.section sv3ja
+.section sv3jb
+.section sv3ka
+.section sv3kb
+.section sv3la
+.section sv3lb
+.section sv3ma
+.section sv3mb
+.section sv3na
+.section sv3nb
+.section sv3oa
+.section sv3ob
+.section sv3pa
+.section sv3pb
+.section sv3qa
+.section sv3qb
+.section sv3ra
+.section sv3rb
+.section sv3sa
+.section sv3sb
+.section sv3ta
+.section sv3tb
+.section sv3ua
+.section sv3ub
+.section sv3va
+.section sv3vb
+.section sv3wa
+.section sv3wb
+.section sv3xa
+.section sv3xb
+.section sv3ya
+.section sv3yb
+.section sv3za
+.section sv3zb
+.section sv31a
+.section sv31b
+.section sv32a
+.section sv32b
+.section sv33a
+.section sv33b
+.section sv34a
+.section sv34b
+.section sv35a
+.section sv35b
+.section sv36a
+.section sv36b
+.section sv37a
+.section sv37b
+.section sv38a
+.section sv38b
+.section sv39a
+.section sv39b
+.section sv30a
+.section sv30b
+.section sv4aa
+.section sv4ab
+.section sv4ba
+.section sv4bb
+.section sv4ca
+.section sv4cb
+.section sv4da
+.section sv4db
+.section sv4ea
+.section sv4eb
+.section sv4fa
+.section sv4fb
+.section sv4ga
+.section sv4gb
+.section sv4ha
+.section sv4hb
+.section sv4ia
+.section sv4ib
+.section sv4ja
+.section sv4jb
+.section sv4ka
+.section sv4kb
+.section sv4la
+.section sv4lb
+.section sv4ma
+.section sv4mb
+.section sv4na
+.section sv4nb
+.section sv4oa
+.section sv4ob
+.section sv4pa
+.section sv4pb
+.section sv4qa
+.section sv4qb
+.section sv4ra
+.section sv4rb
+.section sv4sa
+.section sv4sb
+.section sv4ta
+.section sv4tb
+.section sv4ua
+.section sv4ub
+.section sv4va
+.section sv4vb
+.section sv4wa
+.section sv4wb
+.section sv4xa
+.section sv4xb
+.section sv4ya
+.section sv4yb
+.section sv4za
+.section sv4zb
+.section sv41a
+.section sv41b
+.section sv42a
+.section sv42b
+.section sv43a
+.section sv43b
+.section sv44a
+.section sv44b
+.section sv45a
+.section sv45b
+.section sv46a
+.section sv46b
+.section sv47a
+.section sv47b
+.section sv48a
+.section sv48b
+.section sv49a
+.section sv49b
+.section sv40a
+.section sv40b
+.section sv5aa
+.section sv5ab
+.section sv5ba
+.section sv5bb
+.section sv5ca
+.section sv5cb
+.section sv5da
+.section sv5db
+.section sv5ea
+.section sv5eb
+.section sv5fa
+.section sv5fb
+.section sv5ga
+.section sv5gb
+.section sv5ha
+.section sv5hb
+.section sv5ia
+.section sv5ib
+.section sv5ja
+.section sv5jb
+.section sv5ka
+.section sv5kb
+.section sv5la
+.section sv5lb
+.section sv5ma
+.section sv5mb
+.section sv5na
+.section sv5nb
+.section sv5oa
+.section sv5ob
+.section sv5pa
+.section sv5pb
+.section sv5qa
+.section sv5qb
+.section sv5ra
+.section sv5rb
+.section sv5sa
+.section sv5sb
+.section sv5ta
+.section sv5tb
+.section sv5ua
+.section sv5ub
+.section sv5va
+.section sv5vb
+.section sv5wa
+.section sv5wb
+.section sv5xa
+.section sv5xb
+.section sv5ya
+.section sv5yb
+.section sv5za
+.section sv5zb
+.section sv51a
+.section sv51b
+.section sv52a
+.section sv52b
+.section sv53a
+.section sv53b
+.section sv54a
+.section sv54b
+.section sv55a
+.section sv55b
+.section sv56a
+.section sv56b
+.section sv57a
+.section sv57b
+.section sv58a
+.section sv58b
+.section sv59a
+.section sv59b
+.section sv50a
+.section sv50b
+.section sv6aa
+.section sv6ab
+.section sv6ba
+.section sv6bb
+.section sv6ca
+.section sv6cb
+.section sv6da
+.section sv6db
+.section sv6ea
+.section sv6eb
+.section sv6fa
+.section sv6fb
+.section sv6ga
+.section sv6gb
+.section sv6ha
+.section sv6hb
+.section sv6ia
+.section sv6ib
+.section sv6ja
+.section sv6jb
+.section sv6ka
+.section sv6kb
+.section sv6la
+.section sv6lb
+.section sv6ma
+.section sv6mb
+.section sv6na
+.section sv6nb
+.section sv6oa
+.section sv6ob
+.section sv6pa
+.section sv6pb
+.section sv6qa
+.section sv6qb
+.section sv6ra
+.section sv6rb
+.section sv6sa
+.section sv6sb
+.section sv6ta
+.section sv6tb
+.section sv6ua
+.section sv6ub
+.section sv6va
+.section sv6vb
+.section sv6wa
+.section sv6wb
+.section sv6xa
+.section sv6xb
+.section sv6ya
+.section sv6yb
+.section sv6za
+.section sv6zb
+.section sv61a
+.section sv61b
+.section sv62a
+.section sv62b
+.section sv63a
+.section sv63b
+.section sv64a
+.section sv64b
+.section sv65a
+.section sv65b
+.section sv66a
+.section sv66b
+.section sv67a
+.section sv67b
+.section sv68a
+.section sv68b
+.section sv69a
+.section sv69b
+.section sv60a
+.section sv60b
+.section sv7aa
+.section sv7ab
+.section sv7ba
+.section sv7bb
+.section sv7ca
+.section sv7cb
+.section sv7da
+.section sv7db
+.section sv7ea
+.section sv7eb
+.section sv7fa
+.section sv7fb
+.section sv7ga
+.section sv7gb
+.section sv7ha
+.section sv7hb
+.section sv7ia
+.section sv7ib
+.section sv7ja
+.section sv7jb
+.section sv7ka
+.section sv7kb
+.section sv7la
+.section sv7lb
+.section sv7ma
+.section sv7mb
+.section sv7na
+.section sv7nb
+.section sv7oa
+.section sv7ob
+.section sv7pa
+.section sv7pb
+.section sv7qa
+.section sv7qb
+.section sv7ra
+.section sv7rb
+.section sv7sa
+.section sv7sb
+.section sv7ta
+.section sv7tb
+.section sv7ua
+.section sv7ub
+.section sv7va
+.section sv7vb
+.section sv7wa
+.section sv7wb
+.section sv7xa
+.section sv7xb
+.section sv7ya
+.section sv7yb
+.section sv7za
+.section sv7zb
+.section sv71a
+.section sv71b
+.section sv72a
+.section sv72b
+.section sv73a
+.section sv73b
+.section sv74a
+.section sv74b
+.section sv75a
+.section sv75b
+.section sv76a
+.section sv76b
+.section sv77a
+.section sv77b
+.section sv78a
+.section sv78b
+.section sv79a
+.section sv79b
+.section sv70a
+.section sv70b
+.section sv8aa
+.section sv8ab
+.section sv8ba
+.section sv8bb
+.section sv8ca
+.section sv8cb
+.section sv8da
+.section sv8db
+.section sv8ea
+.section sv8eb
+.section sv8fa
+.section sv8fb
+.section sv8ga
+.section sv8gb
+.section sv8ha
+.section sv8hb
+.section sv8ia
+.section sv8ib
+.section sv8ja
+.section sv8jb
+.section sv8ka
+.section sv8kb
+.section sv8la
+.section sv8lb
+.section sv8ma
+.section sv8mb
+.section sv8na
+.section sv8nb
+.section sv8oa
+.section sv8ob
+.section sv8pa
+.section sv8pb
+.section sv8qa
+.section sv8qb
+.section sv8ra
+.section sv8rb
+.section sv8sa
+.section sv8sb
+.section sv8ta
+.section sv8tb
+.section sv8ua
+.section sv8ub
+.section sv8va
+.section sv8vb
+.section sv8wa
+.section sv8wb
+.section sv8xa
+.section sv8xb
+.section sv8ya
+.section sv8yb
+.section sv8za
+.section sv8zb
+.section sv81a
+.section sv81b
+.section sv82a
+.section sv82b
+.section sv83a
+.section sv83b
+.section sv84a
+.section sv84b
+.section sv85a
+.section sv85b
+.section sv86a
+.section sv86b
+.section sv87a
+.section sv87b
+.section sv88a
+.section sv88b
+.section sv89a
+.section sv89b
+.section sv80a
+.section sv80b
+.section sv9aa
+.section sv9ab
+.section sv9ba
+.section sv9bb
+.section sv9ca
+.section sv9cb
+.section sv9da
+.section sv9db
+.section sv9ea
+.section sv9eb
+.section sv9fa
+.section sv9fb
+.section sv9ga
+.section sv9gb
+.section sv9ha
+.section sv9hb
+.section sv9ia
+.section sv9ib
+.section sv9ja
+.section sv9jb
+.section sv9ka
+.section sv9kb
+.section sv9la
+.section sv9lb
+.section sv9ma
+.section sv9mb
+.section sv9na
+.section sv9nb
+.section sv9oa
+.section sv9ob
+.section sv9pa
+.section sv9pb
+.section sv9qa
+.section sv9qb
+.section sv9ra
+.section sv9rb
+.section sv9sa
+.section sv9sb
+.section sv9ta
+.section sv9tb
+.section sv9ua
+.section sv9ub
+.section sv9va
+.section sv9vb
+.section sv9wa
+.section sv9wb
+.section sv9xa
+.section sv9xb
+.section sv9ya
+.section sv9yb
+.section sv9za
+.section sv9zb
+.section sv91a
+.section sv91b
+.section sv92a
+.section sv92b
+.section sv93a
+.section sv93b
+.section sv94a
+.section sv94b
+.section sv95a
+.section sv95b
+.section sv96a
+.section sv96b
+.section sv97a
+.section sv97b
+.section sv98a
+.section sv98b
+.section sv99a
+.section sv99b
+.section sv90a
+.section sv90b
+.section sv0aa
+.section sv0ab
+.section sv0ba
+.section sv0bb
+.section sv0ca
+.section sv0cb
+.section sv0da
+.section sv0db
+.section sv0ea
+.section sv0eb
+.section sv0fa
+.section sv0fb
+.section sv0ga
+.section sv0gb
+.section sv0ha
+.section sv0hb
+.section sv0ia
+.section sv0ib
+.section sv0ja
+.section sv0jb
+.section sv0ka
+.section sv0kb
+.section sv0la
+.section sv0lb
+.section sv0ma
+.section sv0mb
+.section sv0na
+.section sv0nb
+.section sv0oa
+.section sv0ob
+.section sv0pa
+.section sv0pb
+.section sv0qa
+.section sv0qb
+.section sv0ra
+.section sv0rb
+.section sv0sa
+.section sv0sb
+.section sv0ta
+.section sv0tb
+.section sv0ua
+.section sv0ub
+.section sv0va
+.section sv0vb
+.section sv0wa
+.section sv0wb
+.section sv0xa
+.section sv0xb
+.section sv0ya
+.section sv0yb
+.section sv0za
+.section sv0zb
+.section sv01a
+.section sv01b
+.section sv02a
+.section sv02b
+.section sv03a
+.section sv03b
+.section sv04a
+.section sv04b
+.section sv05a
+.section sv05b
+.section sv06a
+.section sv06b
+.section sv07a
+.section sv07b
+.section sv08a
+.section sv08b
+.section sv09a
+.section sv09b
+.section sv00a
+.section sv00b
+.section swaaa
+.section swaab
+.section swaba
+.section swabb
+.section swaca
+.section swacb
+.section swada
+.section swadb
+.section swaea
+.section swaeb
+.section swafa
+.section swafb
+.section swaga
+.section swagb
+.section swaha
+.section swahb
+.section swaia
+.section swaib
+.section swaja
+.section swajb
+.section swaka
+.section swakb
+.section swala
+.section swalb
+.section swama
+.section swamb
+.section swana
+.section swanb
+.section swaoa
+.section swaob
+.section swapa
+.section swapb
+.section swaqa
+.section swaqb
+.section swara
+.section swarb
+.section swasa
+.section swasb
+.section swata
+.section swatb
+.section swaua
+.section swaub
+.section swava
+.section swavb
+.section swawa
+.section swawb
+.section swaxa
+.section swaxb
+.section swaya
+.section swayb
+.section swaza
+.section swazb
+.section swa1a
+.section swa1b
+.section swa2a
+.section swa2b
+.section swa3a
+.section swa3b
+.section swa4a
+.section swa4b
+.section swa5a
+.section swa5b
+.section swa6a
+.section swa6b
+.section swa7a
+.section swa7b
+.section swa8a
+.section swa8b
+.section swa9a
+.section swa9b
+.section swa0a
+.section swa0b
+.section swbaa
+.section swbab
+.section swbba
+.section swbbb
+.section swbca
+.section swbcb
+.section swbda
+.section swbdb
+.section swbea
+.section swbeb
+.section swbfa
+.section swbfb
+.section swbga
+.section swbgb
+.section swbha
+.section swbhb
+.section swbia
+.section swbib
+.section swbja
+.section swbjb
+.section swbka
+.section swbkb
+.section swbla
+.section swblb
+.section swbma
+.section swbmb
+.section swbna
+.section swbnb
+.section swboa
+.section swbob
+.section swbpa
+.section swbpb
+.section swbqa
+.section swbqb
+.section swbra
+.section swbrb
+.section swbsa
+.section swbsb
+.section swbta
+.section swbtb
+.section swbua
+.section swbub
+.section swbva
+.section swbvb
+.section swbwa
+.section swbwb
+.section swbxa
+.section swbxb
+.section swbya
+.section swbyb
+.section swbza
+.section swbzb
+.section swb1a
+.section swb1b
+.section swb2a
+.section swb2b
+.section swb3a
+.section swb3b
+.section swb4a
+.section swb4b
+.section swb5a
+.section swb5b
+.section swb6a
+.section swb6b
+.section swb7a
+.section swb7b
+.section swb8a
+.section swb8b
+.section swb9a
+.section swb9b
+.section swb0a
+.section swb0b
+.section swcaa
+.section swcab
+.section swcba
+.section swcbb
+.section swcca
+.section swccb
+.section swcda
+.section swcdb
+.section swcea
+.section swceb
+.section swcfa
+.section swcfb
+.section swcga
+.section swcgb
+.section swcha
+.section swchb
+.section swcia
+.section swcib
+.section swcja
+.section swcjb
+.section swcka
+.section swckb
+.section swcla
+.section swclb
+.section swcma
+.section swcmb
+.section swcna
+.section swcnb
+.section swcoa
+.section swcob
+.section swcpa
+.section swcpb
+.section swcqa
+.section swcqb
+.section swcra
+.section swcrb
+.section swcsa
+.section swcsb
+.section swcta
+.section swctb
+.section swcua
+.section swcub
+.section swcva
+.section swcvb
+.section swcwa
+.section swcwb
+.section swcxa
+.section swcxb
+.section swcya
+.section swcyb
+.section swcza
+.section swczb
+.section swc1a
+.section swc1b
+.section swc2a
+.section swc2b
+.section swc3a
+.section swc3b
+.section swc4a
+.section swc4b
+.section swc5a
+.section swc5b
+.section swc6a
+.section swc6b
+.section swc7a
+.section swc7b
+.section swc8a
+.section swc8b
+.section swc9a
+.section swc9b
+.section swc0a
+.section swc0b
+.section swdaa
+.section swdab
+.section swdba
+.section swdbb
+.section swdca
+.section swdcb
+.section swdda
+.section swddb
+.section swdea
+.section swdeb
+.section swdfa
+.section swdfb
+.section swdga
+.section swdgb
+.section swdha
+.section swdhb
+.section swdia
+.section swdib
+.section swdja
+.section swdjb
+.section swdka
+.section swdkb
+.section swdla
+.section swdlb
+.section swdma
+.section swdmb
+.section swdna
+.section swdnb
+.section swdoa
+.section swdob
+.section swdpa
+.section swdpb
+.section swdqa
+.section swdqb
+.section swdra
+.section swdrb
+.section swdsa
+.section swdsb
+.section swdta
+.section swdtb
+.section swdua
+.section swdub
+.section swdva
+.section swdvb
+.section swdwa
+.section swdwb
+.section swdxa
+.section swdxb
+.section swdya
+.section swdyb
+.section swdza
+.section swdzb
+.section swd1a
+.section swd1b
+.section swd2a
+.section swd2b
+.section swd3a
+.section swd3b
+.section swd4a
+.section swd4b
+.section swd5a
+.section swd5b
+.section swd6a
+.section swd6b
+.section swd7a
+.section swd7b
+.section swd8a
+.section swd8b
+.section swd9a
+.section swd9b
+.section swd0a
+.section swd0b
+.section sweaa
+.section sweab
+.section sweba
+.section swebb
+.section sweca
+.section swecb
+.section sweda
+.section swedb
+.section sweea
+.section sweeb
+.section swefa
+.section swefb
+.section swega
+.section swegb
+.section sweha
+.section swehb
+.section sweia
+.section sweib
+.section sweja
+.section swejb
+.section sweka
+.section swekb
+.section swela
+.section swelb
+.section swema
+.section swemb
+.section swena
+.section swenb
+.section sweoa
+.section sweob
+.section swepa
+.section swepb
+.section sweqa
+.section sweqb
+.section swera
+.section swerb
+.section swesa
+.section swesb
+.section sweta
+.section swetb
+.section sweua
+.section sweub
+.section sweva
+.section swevb
+.section swewa
+.section swewb
+.section swexa
+.section swexb
+.section sweya
+.section sweyb
+.section sweza
+.section swezb
+.section swe1a
+.section swe1b
+.section swe2a
+.section swe2b
+.section swe3a
+.section swe3b
+.section swe4a
+.section swe4b
+.section swe5a
+.section swe5b
+.section swe6a
+.section swe6b
+.section swe7a
+.section swe7b
+.section swe8a
+.section swe8b
+.section swe9a
+.section swe9b
+.section swe0a
+.section swe0b
+.section swfaa
+.section swfab
+.section swfba
+.section swfbb
+.section swfca
+.section swfcb
+.section swfda
+.section swfdb
+.section swfea
+.section swfeb
+.section swffa
+.section swffb
+.section swfga
+.section swfgb
+.section swfha
+.section swfhb
+.section swfia
+.section swfib
+.section swfja
+.section swfjb
+.section swfka
+.section swfkb
+.section swfla
+.section swflb
+.section swfma
+.section swfmb
+.section swfna
+.section swfnb
+.section swfoa
+.section swfob
+.section swfpa
+.section swfpb
+.section swfqa
+.section swfqb
+.section swfra
+.section swfrb
+.section swfsa
+.section swfsb
+.section swfta
+.section swftb
+.section swfua
+.section swfub
+.section swfva
+.section swfvb
+.section swfwa
+.section swfwb
+.section swfxa
+.section swfxb
+.section swfya
+.section swfyb
+.section swfza
+.section swfzb
+.section swf1a
+.section swf1b
+.section swf2a
+.section swf2b
+.section swf3a
+.section swf3b
+.section swf4a
+.section swf4b
+.section swf5a
+.section swf5b
+.section swf6a
+.section swf6b
+.section swf7a
+.section swf7b
+.section swf8a
+.section swf8b
+.section swf9a
+.section swf9b
+.section swf0a
+.section swf0b
+.section swgaa
+.section swgab
+.section swgba
+.section swgbb
+.section swgca
+.section swgcb
+.section swgda
+.section swgdb
+.section swgea
+.section swgeb
+.section swgfa
+.section swgfb
+.section swgga
+.section swggb
+.section swgha
+.section swghb
+.section swgia
+.section swgib
+.section swgja
+.section swgjb
+.section swgka
+.section swgkb
+.section swgla
+.section swglb
+.section swgma
+.section swgmb
+.section swgna
+.section swgnb
+.section swgoa
+.section swgob
+.section swgpa
+.section swgpb
+.section swgqa
+.section swgqb
+.section swgra
+.section swgrb
+.section swgsa
+.section swgsb
+.section swgta
+.section swgtb
+.section swgua
+.section swgub
+.section swgva
+.section swgvb
+.section swgwa
+.section swgwb
+.section swgxa
+.section swgxb
+.section swgya
+.section swgyb
+.section swgza
+.section swgzb
+.section swg1a
+.section swg1b
+.section swg2a
+.section swg2b
+.section swg3a
+.section swg3b
+.section swg4a
+.section swg4b
+.section swg5a
+.section swg5b
+.section swg6a
+.section swg6b
+.section swg7a
+.section swg7b
+.section swg8a
+.section swg8b
+.section swg9a
+.section swg9b
+.section swg0a
+.section swg0b
+.section swhaa
+.section swhab
+.section swhba
+.section swhbb
+.section swhca
+.section swhcb
+.section swhda
+.section swhdb
+.section swhea
+.section swheb
+.section swhfa
+.section swhfb
+.section swhga
+.section swhgb
+.section swhha
+.section swhhb
+.section swhia
+.section swhib
+.section swhja
+.section swhjb
+.section swhka
+.section swhkb
+.section swhla
+.section swhlb
+.section swhma
+.section swhmb
+.section swhna
+.section swhnb
+.section swhoa
+.section swhob
+.section swhpa
+.section swhpb
+.section swhqa
+.section swhqb
+.section swhra
+.section swhrb
+.section swhsa
+.section swhsb
+.section swhta
+.section swhtb
+.section swhua
+.section swhub
+.section swhva
+.section swhvb
+.section swhwa
+.section swhwb
+.section swhxa
+.section swhxb
+.section swhya
+.section swhyb
+.section swhza
+.section swhzb
+.section swh1a
+.section swh1b
+.section swh2a
+.section swh2b
+.section swh3a
+.section swh3b
+.section swh4a
+.section swh4b
+.section swh5a
+.section swh5b
+.section swh6a
+.section swh6b
+.section swh7a
+.section swh7b
+.section swh8a
+.section swh8b
+.section swh9a
+.section swh9b
+.section swh0a
+.section swh0b
+.section swiaa
+.section swiab
+.section swiba
+.section swibb
+.section swica
+.section swicb
+.section swida
+.section swidb
+.section swiea
+.section swieb
+.section swifa
+.section swifb
+.section swiga
+.section swigb
+.section swiha
+.section swihb
+.section swiia
+.section swiib
+.section swija
+.section swijb
+.section swika
+.section swikb
+.section swila
+.section swilb
+.section swima
+.section swimb
+.section swina
+.section swinb
+.section swioa
+.section swiob
+.section swipa
+.section swipb
+.section swiqa
+.section swiqb
+.section swira
+.section swirb
+.section swisa
+.section swisb
+.section swita
+.section switb
+.section swiua
+.section swiub
+.section swiva
+.section swivb
+.section swiwa
+.section swiwb
+.section swixa
+.section swixb
+.section swiya
+.section swiyb
+.section swiza
+.section swizb
+.section swi1a
+.section swi1b
+.section swi2a
+.section swi2b
+.section swi3a
+.section swi3b
+.section swi4a
+.section swi4b
+.section swi5a
+.section swi5b
+.section swi6a
+.section swi6b
+.section swi7a
+.section swi7b
+.section swi8a
+.section swi8b
+.section swi9a
+.section swi9b
+.section swi0a
+.section swi0b
+.section swjaa
+.section swjab
+.section swjba
+.section swjbb
+.section swjca
+.section swjcb
+.section swjda
+.section swjdb
+.section swjea
+.section swjeb
+.section swjfa
+.section swjfb
+.section swjga
+.section swjgb
+.section swjha
+.section swjhb
+.section swjia
+.section swjib
+.section swjja
+.section swjjb
+.section swjka
+.section swjkb
+.section swjla
+.section swjlb
+.section swjma
+.section swjmb
+.section swjna
+.section swjnb
+.section swjoa
+.section swjob
+.section swjpa
+.section swjpb
+.section swjqa
+.section swjqb
+.section swjra
+.section swjrb
+.section swjsa
+.section swjsb
+.section swjta
+.section swjtb
+.section swjua
+.section swjub
+.section swjva
+.section swjvb
+.section swjwa
+.section swjwb
+.section swjxa
+.section swjxb
+.section swjya
+.section swjyb
+.section swjza
+.section swjzb
+.section swj1a
+.section swj1b
+.section swj2a
+.section swj2b
+.section swj3a
+.section swj3b
+.section swj4a
+.section swj4b
+.section swj5a
+.section swj5b
+.section swj6a
+.section swj6b
+.section swj7a
+.section swj7b
+.section swj8a
+.section swj8b
+.section swj9a
+.section swj9b
+.section swj0a
+.section swj0b
+.section swkaa
+.section swkab
+.section swkba
+.section swkbb
+.section swkca
+.section swkcb
+.section swkda
+.section swkdb
+.section swkea
+.section swkeb
+.section swkfa
+.section swkfb
+.section swkga
+.section swkgb
+.section swkha
+.section swkhb
+.section swkia
+.section swkib
+.section swkja
+.section swkjb
+.section swkka
+.section swkkb
+.section swkla
+.section swklb
+.section swkma
+.section swkmb
+.section swkna
+.section swknb
+.section swkoa
+.section swkob
+.section swkpa
+.section swkpb
+.section swkqa
+.section swkqb
+.section swkra
+.section swkrb
+.section swksa
+.section swksb
+.section swkta
+.section swktb
+.section swkua
+.section swkub
+.section swkva
+.section swkvb
+.section swkwa
+.section swkwb
+.section swkxa
+.section swkxb
+.section swkya
+.section swkyb
+.section swkza
+.section swkzb
+.section swk1a
+.section swk1b
+.section swk2a
+.section swk2b
+.section swk3a
+.section swk3b
+.section swk4a
+.section swk4b
+.section swk5a
+.section swk5b
+.section swk6a
+.section swk6b
+.section swk7a
+.section swk7b
+.section swk8a
+.section swk8b
+.section swk9a
+.section swk9b
+.section swk0a
+.section swk0b
+.section swlaa
+.section swlab
+.section swlba
+.section swlbb
+.section swlca
+.section swlcb
+.section swlda
+.section swldb
+.section swlea
+.section swleb
+.section swlfa
+.section swlfb
+.section swlga
+.section swlgb
+.section swlha
+.section swlhb
+.section swlia
+.section swlib
+.section swlja
+.section swljb
+.section swlka
+.section swlkb
+.section swlla
+.section swllb
+.section swlma
+.section swlmb
+.section swlna
+.section swlnb
+.section swloa
+.section swlob
+.section swlpa
+.section swlpb
+.section swlqa
+.section swlqb
+.section swlra
+.section swlrb
+.section swlsa
+.section swlsb
+.section swlta
+.section swltb
+.section swlua
+.section swlub
+.section swlva
+.section swlvb
+.section swlwa
+.section swlwb
+.section swlxa
+.section swlxb
+.section swlya
+.section swlyb
+.section swlza
+.section swlzb
+.section swl1a
+.section swl1b
+.section swl2a
+.section swl2b
+.section swl3a
+.section swl3b
+.section swl4a
+.section swl4b
+.section swl5a
+.section swl5b
+.section swl6a
+.section swl6b
+.section swl7a
+.section swl7b
+.section swl8a
+.section swl8b
+.section swl9a
+.section swl9b
+.section swl0a
+.section swl0b
+.section swmaa
+.section swmab
+.section swmba
+.section swmbb
+.section swmca
+.section swmcb
+.section swmda
+.section swmdb
+.section swmea
+.section swmeb
+.section swmfa
+.section swmfb
+.section swmga
+.section swmgb
+.section swmha
+.section swmhb
+.section swmia
+.section swmib
+.section swmja
+.section swmjb
+.section swmka
+.section swmkb
+.section swmla
+.section swmlb
+.section swmma
+.section swmmb
+.section swmna
+.section swmnb
+.section swmoa
+.section swmob
+.section swmpa
+.section swmpb
+.section swmqa
+.section swmqb
+.section swmra
+.section swmrb
+.section swmsa
+.section swmsb
+.section swmta
+.section swmtb
+.section swmua
+.section swmub
+.section swmva
+.section swmvb
+.section swmwa
+.section swmwb
+.section swmxa
+.section swmxb
+.section swmya
+.section swmyb
+.section swmza
+.section swmzb
+.section swm1a
+.section swm1b
+.section swm2a
+.section swm2b
+.section swm3a
+.section swm3b
+.section swm4a
+.section swm4b
+.section swm5a
+.section swm5b
+.section swm6a
+.section swm6b
+.section swm7a
+.section swm7b
+.section swm8a
+.section swm8b
+.section swm9a
+.section swm9b
+.section swm0a
+.section swm0b
+.section swnaa
+.section swnab
+.section swnba
+.section swnbb
+.section swnca
+.section swncb
+.section swnda
+.section swndb
+.section swnea
+.section swneb
+.section swnfa
+.section swnfb
+.section swnga
+.section swngb
+.section swnha
+.section swnhb
+.section swnia
+.section swnib
+.section swnja
+.section swnjb
+.section swnka
+.section swnkb
+.section swnla
+.section swnlb
+.section swnma
+.section swnmb
+.section swnna
+.section swnnb
+.section swnoa
+.section swnob
+.section swnpa
+.section swnpb
+.section swnqa
+.section swnqb
+.section swnra
+.section swnrb
+.section swnsa
+.section swnsb
+.section swnta
+.section swntb
+.section swnua
+.section swnub
+.section swnva
+.section swnvb
+.section swnwa
+.section swnwb
+.section swnxa
+.section swnxb
+.section swnya
+.section swnyb
+.section swnza
+.section swnzb
+.section swn1a
+.section swn1b
+.section swn2a
+.section swn2b
+.section swn3a
+.section swn3b
+.section swn4a
+.section swn4b
+.section swn5a
+.section swn5b
+.section swn6a
+.section swn6b
+.section swn7a
+.section swn7b
+.section swn8a
+.section swn8b
+.section swn9a
+.section swn9b
+.section swn0a
+.section swn0b
+.section swoaa
+.section swoab
+.section swoba
+.section swobb
+.section swoca
+.section swocb
+.section swoda
+.section swodb
+.section swoea
+.section swoeb
+.section swofa
+.section swofb
+.section swoga
+.section swogb
+.section swoha
+.section swohb
+.section swoia
+.section swoib
+.section swoja
+.section swojb
+.section swoka
+.section swokb
+.section swola
+.section swolb
+.section swoma
+.section swomb
+.section swona
+.section swonb
+.section swooa
+.section swoob
+.section swopa
+.section swopb
+.section swoqa
+.section swoqb
+.section swora
+.section sworb
+.section swosa
+.section swosb
+.section swota
+.section swotb
+.section swoua
+.section swoub
+.section swova
+.section swovb
+.section swowa
+.section swowb
+.section swoxa
+.section swoxb
+.section swoya
+.section swoyb
+.section swoza
+.section swozb
+.section swo1a
+.section swo1b
+.section swo2a
+.section swo2b
+.section swo3a
+.section swo3b
+.section swo4a
+.section swo4b
+.section swo5a
+.section swo5b
+.section swo6a
+.section swo6b
+.section swo7a
+.section swo7b
+.section swo8a
+.section swo8b
+.section swo9a
+.section swo9b
+.section swo0a
+.section swo0b
+.section swpaa
+.section swpab
+.section swpba
+.section swpbb
+.section swpca
+.section swpcb
+.section swpda
+.section swpdb
+.section swpea
+.section swpeb
+.section swpfa
+.section swpfb
+.section swpga
+.section swpgb
+.section swpha
+.section swphb
+.section swpia
+.section swpib
+.section swpja
+.section swpjb
+.section swpka
+.section swpkb
+.section swpla
+.section swplb
+.section swpma
+.section swpmb
+.section swpna
+.section swpnb
+.section swpoa
+.section swpob
+.section swppa
+.section swppb
+.section swpqa
+.section swpqb
+.section swpra
+.section swprb
+.section swpsa
+.section swpsb
+.section swpta
+.section swptb
+.section swpua
+.section swpub
+.section swpva
+.section swpvb
+.section swpwa
+.section swpwb
+.section swpxa
+.section swpxb
+.section swpya
+.section swpyb
+.section swpza
+.section swpzb
+.section swp1a
+.section swp1b
+.section swp2a
+.section swp2b
+.section swp3a
+.section swp3b
+.section swp4a
+.section swp4b
+.section swp5a
+.section swp5b
+.section swp6a
+.section swp6b
+.section swp7a
+.section swp7b
+.section swp8a
+.section swp8b
+.section swp9a
+.section swp9b
+.section swp0a
+.section swp0b
+.section swqaa
+.section swqab
+.section swqba
+.section swqbb
+.section swqca
+.section swqcb
+.section swqda
+.section swqdb
+.section swqea
+.section swqeb
+.section swqfa
+.section swqfb
+.section swqga
+.section swqgb
+.section swqha
+.section swqhb
+.section swqia
+.section swqib
+.section swqja
+.section swqjb
+.section swqka
+.section swqkb
+.section swqla
+.section swqlb
+.section swqma
+.section swqmb
+.section swqna
+.section swqnb
+.section swqoa
+.section swqob
+.section swqpa
+.section swqpb
+.section swqqa
+.section swqqb
+.section swqra
+.section swqrb
+.section swqsa
+.section swqsb
+.section swqta
+.section swqtb
+.section swqua
+.section swqub
+.section swqva
+.section swqvb
+.section swqwa
+.section swqwb
+.section swqxa
+.section swqxb
+.section swqya
+.section swqyb
+.section swqza
+.section swqzb
+.section swq1a
+.section swq1b
+.section swq2a
+.section swq2b
+.section swq3a
+.section swq3b
+.section swq4a
+.section swq4b
+.section swq5a
+.section swq5b
+.section swq6a
+.section swq6b
+.section swq7a
+.section swq7b
+.section swq8a
+.section swq8b
+.section swq9a
+.section swq9b
+.section swq0a
+.section swq0b
+.section swraa
+.section swrab
+.section swrba
+.section swrbb
+.section swrca
+.section swrcb
+.section swrda
+.section swrdb
+.section swrea
+.section swreb
+.section swrfa
+.section swrfb
+.section swrga
+.section swrgb
+.section swrha
+.section swrhb
+.section swria
+.section swrib
+.section swrja
+.section swrjb
+.section swrka
+.section swrkb
+.section swrla
+.section swrlb
+.section swrma
+.section swrmb
+.section swrna
+.section swrnb
+.section swroa
+.section swrob
+.section swrpa
+.section swrpb
+.section swrqa
+.section swrqb
+.section swrra
+.section swrrb
+.section swrsa
+.section swrsb
+.section swrta
+.section swrtb
+.section swrua
+.section swrub
+.section swrva
+.section swrvb
+.section swrwa
+.section swrwb
+.section swrxa
+.section swrxb
+.section swrya
+.section swryb
+.section swrza
+.section swrzb
+.section swr1a
+.section swr1b
+.section swr2a
+.section swr2b
+.section swr3a
+.section swr3b
+.section swr4a
+.section swr4b
+.section swr5a
+.section swr5b
+.section swr6a
+.section swr6b
+.section swr7a
+.section swr7b
+.section swr8a
+.section swr8b
+.section swr9a
+.section swr9b
+.section swr0a
+.section swr0b
+.section swsaa
+.section swsab
+.section swsba
+.section swsbb
+.section swsca
+.section swscb
+.section swsda
+.section swsdb
+.section swsea
+.section swseb
+.section swsfa
+.section swsfb
+.section swsga
+.section swsgb
+.section swsha
+.section swshb
+.section swsia
+.section swsib
+.section swsja
+.section swsjb
+.section swska
+.section swskb
+.section swsla
+.section swslb
+.section swsma
+.section swsmb
+.section swsna
+.section swsnb
+.section swsoa
+.section swsob
+.section swspa
+.section swspb
+.section swsqa
+.section swsqb
+.section swsra
+.section swsrb
+.section swssa
+.section swssb
+.section swsta
+.section swstb
+.section swsua
+.section swsub
+.section swsva
+.section swsvb
+.section swswa
+.section swswb
+.section swsxa
+.section swsxb
+.section swsya
+.section swsyb
+.section swsza
+.section swszb
+.section sws1a
+.section sws1b
+.section sws2a
+.section sws2b
+.section sws3a
+.section sws3b
+.section sws4a
+.section sws4b
+.section sws5a
+.section sws5b
+.section sws6a
+.section sws6b
+.section sws7a
+.section sws7b
+.section sws8a
+.section sws8b
+.section sws9a
+.section sws9b
+.section sws0a
+.section sws0b
+.section swtaa
+.section swtab
+.section swtba
+.section swtbb
+.section swtca
+.section swtcb
+.section swtda
+.section swtdb
+.section swtea
+.section swteb
+.section swtfa
+.section swtfb
+.section swtga
+.section swtgb
+.section swtha
+.section swthb
+.section swtia
+.section swtib
+.section swtja
+.section swtjb
+.section swtka
+.section swtkb
+.section swtla
+.section swtlb
+.section swtma
+.section swtmb
+.section swtna
+.section swtnb
+.section swtoa
+.section swtob
+.section swtpa
+.section swtpb
+.section swtqa
+.section swtqb
+.section swtra
+.section swtrb
+.section swtsa
+.section swtsb
+.section swtta
+.section swttb
+.section swtua
+.section swtub
+.section swtva
+.section swtvb
+.section swtwa
+.section swtwb
+.section swtxa
+.section swtxb
+.section swtya
+.section swtyb
+.section swtza
+.section swtzb
+.section swt1a
+.section swt1b
+.section swt2a
+.section swt2b
+.section swt3a
+.section swt3b
+.section swt4a
+.section swt4b
+.section swt5a
+.section swt5b
+.section swt6a
+.section swt6b
+.section swt7a
+.section swt7b
+.section swt8a
+.section swt8b
+.section swt9a
+.section swt9b
+.section swt0a
+.section swt0b
+.section swuaa
+.section swuab
+.section swuba
+.section swubb
+.section swuca
+.section swucb
+.section swuda
+.section swudb
+.section swuea
+.section swueb
+.section swufa
+.section swufb
+.section swuga
+.section swugb
+.section swuha
+.section swuhb
+.section swuia
+.section swuib
+.section swuja
+.section swujb
+.section swuka
+.section swukb
+.section swula
+.section swulb
+.section swuma
+.section swumb
+.section swuna
+.section swunb
+.section swuoa
+.section swuob
+.section swupa
+.section swupb
+.section swuqa
+.section swuqb
+.section swura
+.section swurb
+.section swusa
+.section swusb
+.section swuta
+.section swutb
+.section swuua
+.section swuub
+.section swuva
+.section swuvb
+.section swuwa
+.section swuwb
+.section swuxa
+.section swuxb
+.section swuya
+.section swuyb
+.section swuza
+.section swuzb
+.section swu1a
+.section swu1b
+.section swu2a
+.section swu2b
+.section swu3a
+.section swu3b
+.section swu4a
+.section swu4b
+.section swu5a
+.section swu5b
+.section swu6a
+.section swu6b
+.section swu7a
+.section swu7b
+.section swu8a
+.section swu8b
+.section swu9a
+.section swu9b
+.section swu0a
+.section swu0b
+.section swvaa
+.section swvab
+.section swvba
+.section swvbb
+.section swvca
+.section swvcb
+.section swvda
+.section swvdb
+.section swvea
+.section swveb
+.section swvfa
+.section swvfb
+.section swvga
+.section swvgb
+.section swvha
+.section swvhb
+.section swvia
+.section swvib
+.section swvja
+.section swvjb
+.section swvka
+.section swvkb
+.section swvla
+.section swvlb
+.section swvma
+.section swvmb
+.section swvna
+.section swvnb
+.section swvoa
+.section swvob
+.section swvpa
+.section swvpb
+.section swvqa
+.section swvqb
+.section swvra
+.section swvrb
+.section swvsa
+.section swvsb
+.section swvta
+.section swvtb
+.section swvua
+.section swvub
+.section swvva
+.section swvvb
+.section swvwa
+.section swvwb
+.section swvxa
+.section swvxb
+.section swvya
+.section swvyb
+.section swvza
+.section swvzb
+.section swv1a
+.section swv1b
+.section swv2a
+.section swv2b
+.section swv3a
+.section swv3b
+.section swv4a
+.section swv4b
+.section swv5a
+.section swv5b
+.section swv6a
+.section swv6b
+.section swv7a
+.section swv7b
+.section swv8a
+.section swv8b
+.section swv9a
+.section swv9b
+.section swv0a
+.section swv0b
+.section swwaa
+.section swwab
+.section swwba
+.section swwbb
+.section swwca
+.section swwcb
+.section swwda
+.section swwdb
+.section swwea
+.section swweb
+.section swwfa
+.section swwfb
+.section swwga
+.section swwgb
+.section swwha
+.section swwhb
+.section swwia
+.section swwib
+.section swwja
+.section swwjb
+.section swwka
+.section swwkb
+.section swwla
+.section swwlb
+.section swwma
+.section swwmb
+.section swwna
+.section swwnb
+.section swwoa
+.section swwob
+.section swwpa
+.section swwpb
+.section swwqa
+.section swwqb
+.section swwra
+.section swwrb
+.section swwsa
+.section swwsb
+.section swwta
+.section swwtb
+.section swwua
+.section swwub
+.section swwva
+.section swwvb
+.section swwwa
+.section swwwb
+.section swwxa
+.section swwxb
+.section swwya
+.section swwyb
+.section swwza
+.section swwzb
+.section sww1a
+.section sww1b
+.section sww2a
+.section sww2b
+.section sww3a
+.section sww3b
+.section sww4a
+.section sww4b
+.section sww5a
+.section sww5b
+.section sww6a
+.section sww6b
+.section sww7a
+.section sww7b
+.section sww8a
+.section sww8b
+.section sww9a
+.section sww9b
+.section sww0a
+.section sww0b
+.section swxaa
+.section swxab
+.section swxba
+.section swxbb
+.section swxca
+.section swxcb
+.section swxda
+.section swxdb
+.section swxea
+.section swxeb
+.section swxfa
+.section swxfb
+.section swxga
+.section swxgb
+.section swxha
+.section swxhb
+.section swxia
+.section swxib
+.section swxja
+.section swxjb
+.section swxka
+.section swxkb
+.section swxla
+.section swxlb
+.section swxma
+.section swxmb
+.section swxna
+.section swxnb
+.section swxoa
+.section swxob
+.section swxpa
+.section swxpb
+.section swxqa
+.section swxqb
+.section swxra
+.section swxrb
+.section swxsa
+.section swxsb
+.section swxta
+.section swxtb
+.section swxua
+.section swxub
+.section swxva
+.section swxvb
+.section swxwa
+.section swxwb
+.section swxxa
+.section swxxb
+.section swxya
+.section swxyb
+.section swxza
+.section swxzb
+.section swx1a
+.section swx1b
+.section swx2a
+.section swx2b
+.section swx3a
+.section swx3b
+.section swx4a
+.section swx4b
+.section swx5a
+.section swx5b
+.section swx6a
+.section swx6b
+.section swx7a
+.section swx7b
+.section swx8a
+.section swx8b
+.section swx9a
+.section swx9b
+.section swx0a
+.section swx0b
+.section swyaa
+.section swyab
+.section swyba
+.section swybb
+.section swyca
+.section swycb
+.section swyda
+.section swydb
+.section swyea
+.section swyeb
+.section swyfa
+.section swyfb
+.section swyga
+.section swygb
+.section swyha
+.section swyhb
+.section swyia
+.section swyib
+.section swyja
+.section swyjb
+.section swyka
+.section swykb
+.section swyla
+.section swylb
+.section swyma
+.section swymb
+.section swyna
+.section swynb
+.section swyoa
+.section swyob
+.section swypa
+.section swypb
+.section swyqa
+.section swyqb
+.section swyra
+.section swyrb
+.section swysa
+.section swysb
+.section swyta
+.section swytb
+.section swyua
+.section swyub
+.section swyva
+.section swyvb
+.section swywa
+.section swywb
+.section swyxa
+.section swyxb
+.section swyya
+.section swyyb
+.section swyza
+.section swyzb
+.section swy1a
+.section swy1b
+.section swy2a
+.section swy2b
+.section swy3a
+.section swy3b
+.section swy4a
+.section swy4b
+.section swy5a
+.section swy5b
+.section swy6a
+.section swy6b
+.section swy7a
+.section swy7b
+.section swy8a
+.section swy8b
+.section swy9a
+.section swy9b
+.section swy0a
+.section swy0b
+.section swzaa
+.section swzab
+.section swzba
+.section swzbb
+.section swzca
+.section swzcb
+.section swzda
+.section swzdb
+.section swzea
+.section swzeb
+.section swzfa
+.section swzfb
+.section swzga
+.section swzgb
+.section swzha
+.section swzhb
+.section swzia
+.section swzib
+.section swzja
+.section swzjb
+.section swzka
+.section swzkb
+.section swzla
+.section swzlb
+.section swzma
+.section swzmb
+.section swzna
+.section swznb
+.section swzoa
+.section swzob
+.section swzpa
+.section swzpb
+.section swzqa
+.section swzqb
+.section swzra
+.section swzrb
+.section swzsa
+.section swzsb
+.section swzta
+.section swztb
+.section swzua
+.section swzub
+.section swzva
+.section swzvb
+.section swzwa
+.section swzwb
+.section swzxa
+.section swzxb
+.section swzya
+.section swzyb
+.section swzza
+.section swzzb
+.section swz1a
+.section swz1b
+.section swz2a
+.section swz2b
+.section swz3a
+.section swz3b
+.section swz4a
+.section swz4b
+.section swz5a
+.section swz5b
+.section swz6a
+.section swz6b
+.section swz7a
+.section swz7b
+.section swz8a
+.section swz8b
+.section swz9a
+.section swz9b
+.section swz0a
+.section swz0b
+.section sw1aa
+.section sw1ab
+.section sw1ba
+.section sw1bb
+.section sw1ca
+.section sw1cb
+.section sw1da
+.section sw1db
+.section sw1ea
+.section sw1eb
+.section sw1fa
+.section sw1fb
+.section sw1ga
+.section sw1gb
+.section sw1ha
+.section sw1hb
+.section sw1ia
+.section sw1ib
+.section sw1ja
+.section sw1jb
+.section sw1ka
+.section sw1kb
+.section sw1la
+.section sw1lb
+.section sw1ma
+.section sw1mb
+.section sw1na
+.section sw1nb
+.section sw1oa
+.section sw1ob
+.section sw1pa
+.section sw1pb
+.section sw1qa
+.section sw1qb
+.section sw1ra
+.section sw1rb
+.section sw1sa
+.section sw1sb
+.section sw1ta
+.section sw1tb
+.section sw1ua
+.section sw1ub
+.section sw1va
+.section sw1vb
+.section sw1wa
+.section sw1wb
+.section sw1xa
+.section sw1xb
+.section sw1ya
+.section sw1yb
+.section sw1za
+.section sw1zb
+.section sw11a
+.section sw11b
+.section sw12a
+.section sw12b
+.section sw13a
+.section sw13b
+.section sw14a
+.section sw14b
+.section sw15a
+.section sw15b
+.section sw16a
+.section sw16b
+.section sw17a
+.section sw17b
+.section sw18a
+.section sw18b
+.section sw19a
+.section sw19b
+.section sw10a
+.section sw10b
+.section sw2aa
+.section sw2ab
+.section sw2ba
+.section sw2bb
+.section sw2ca
+.section sw2cb
+.section sw2da
+.section sw2db
+.section sw2ea
+.section sw2eb
+.section sw2fa
+.section sw2fb
+.section sw2ga
+.section sw2gb
+.section sw2ha
+.section sw2hb
+.section sw2ia
+.section sw2ib
+.section sw2ja
+.section sw2jb
+.section sw2ka
+.section sw2kb
+.section sw2la
+.section sw2lb
+.section sw2ma
+.section sw2mb
+.section sw2na
+.section sw2nb
+.section sw2oa
+.section sw2ob
+.section sw2pa
+.section sw2pb
+.section sw2qa
+.section sw2qb
+.section sw2ra
+.section sw2rb
+.section sw2sa
+.section sw2sb
+.section sw2ta
+.section sw2tb
+.section sw2ua
+.section sw2ub
+.section sw2va
+.section sw2vb
+.section sw2wa
+.section sw2wb
+.section sw2xa
+.section sw2xb
+.section sw2ya
+.section sw2yb
+.section sw2za
+.section sw2zb
+.section sw21a
+.section sw21b
+.section sw22a
+.section sw22b
+.section sw23a
+.section sw23b
+.section sw24a
+.section sw24b
+.section sw25a
+.section sw25b
+.section sw26a
+.section sw26b
+.section sw27a
+.section sw27b
+.section sw28a
+.section sw28b
+.section sw29a
+.section sw29b
+.section sw20a
+.section sw20b
+.section sw3aa
+.section sw3ab
+.section sw3ba
+.section sw3bb
+.section sw3ca
+.section sw3cb
+.section sw3da
+.section sw3db
+.section sw3ea
+.section sw3eb
+.section sw3fa
+.section sw3fb
+.section sw3ga
+.section sw3gb
+.section sw3ha
+.section sw3hb
+.section sw3ia
+.section sw3ib
+.section sw3ja
+.section sw3jb
+.section sw3ka
+.section sw3kb
+.section sw3la
+.section sw3lb
+.section sw3ma
+.section sw3mb
+.section sw3na
+.section sw3nb
+.section sw3oa
+.section sw3ob
+.section sw3pa
+.section sw3pb
+.section sw3qa
+.section sw3qb
+.section sw3ra
+.section sw3rb
+.section sw3sa
+.section sw3sb
+.section sw3ta
+.section sw3tb
+.section sw3ua
+.section sw3ub
+.section sw3va
+.section sw3vb
+.section sw3wa
+.section sw3wb
+.section sw3xa
+.section sw3xb
+.section sw3ya
+.section sw3yb
+.section sw3za
+.section sw3zb
+.section sw31a
+.section sw31b
+.section sw32a
+.section sw32b
+.section sw33a
+.section sw33b
+.section sw34a
+.section sw34b
+.section sw35a
+.section sw35b
+.section sw36a
+.section sw36b
+.section sw37a
+.section sw37b
+.section sw38a
+.section sw38b
+.section sw39a
+.section sw39b
+.section sw30a
+.section sw30b
+.section sw4aa
+.section sw4ab
+.section sw4ba
+.section sw4bb
+.section sw4ca
+.section sw4cb
+.section sw4da
+.section sw4db
+.section sw4ea
+.section sw4eb
+.section sw4fa
+.section sw4fb
+.section sw4ga
+.section sw4gb
+.section sw4ha
+.section sw4hb
+.section sw4ia
+.section sw4ib
+.section sw4ja
+.section sw4jb
+.section sw4ka
+.section sw4kb
+.section sw4la
+.section sw4lb
+.section sw4ma
+.section sw4mb
+.section sw4na
+.section sw4nb
+.section sw4oa
+.section sw4ob
+.section sw4pa
+.section sw4pb
+.section sw4qa
+.section sw4qb
+.section sw4ra
+.section sw4rb
+.section sw4sa
+.section sw4sb
+.section sw4ta
+.section sw4tb
+.section sw4ua
+.section sw4ub
+.section sw4va
+.section sw4vb
+.section sw4wa
+.section sw4wb
+.section sw4xa
+.section sw4xb
+.section sw4ya
+.section sw4yb
+.section sw4za
+.section sw4zb
+.section sw41a
+.section sw41b
+.section sw42a
+.section sw42b
+.section sw43a
+.section sw43b
+.section sw44a
+.section sw44b
+.section sw45a
+.section sw45b
+.section sw46a
+.section sw46b
+.section sw47a
+.section sw47b
+.section sw48a
+.section sw48b
+.section sw49a
+.section sw49b
+.section sw40a
+.section sw40b
+.section sw5aa
+.section sw5ab
+.section sw5ba
+.section sw5bb
+.section sw5ca
+.section sw5cb
+.section sw5da
+.section sw5db
+.section sw5ea
+.section sw5eb
+.section sw5fa
+.section sw5fb
+.section sw5ga
+.section sw5gb
+.section sw5ha
+.section sw5hb
+.section sw5ia
+.section sw5ib
+.section sw5ja
+.section sw5jb
+.section sw5ka
+.section sw5kb
+.section sw5la
+.section sw5lb
+.section sw5ma
+.section sw5mb
+.section sw5na
+.section sw5nb
+.section sw5oa
+.section sw5ob
+.section sw5pa
+.section sw5pb
+.section sw5qa
+.section sw5qb
+.section sw5ra
+.section sw5rb
+.section sw5sa
+.section sw5sb
+.section sw5ta
+.section sw5tb
+.section sw5ua
+.section sw5ub
+.section sw5va
+.section sw5vb
+.section sw5wa
+.section sw5wb
+.section sw5xa
+.section sw5xb
+.section sw5ya
+.section sw5yb
+.section sw5za
+.section sw5zb
+.section sw51a
+.section sw51b
+.section sw52a
+.section sw52b
+.section sw53a
+.section sw53b
+.section sw54a
+.section sw54b
+.section sw55a
+.section sw55b
+.section sw56a
+.section sw56b
+.section sw57a
+.section sw57b
+.section sw58a
+.section sw58b
+.section sw59a
+.section sw59b
+.section sw50a
+.section sw50b
+.section sw6aa
+.section sw6ab
+.section sw6ba
+.section sw6bb
+.section sw6ca
+.section sw6cb
+.section sw6da
+.section sw6db
+.section sw6ea
+.section sw6eb
+.section sw6fa
+.section sw6fb
+.section sw6ga
+.section sw6gb
+.section sw6ha
+.section sw6hb
+.section sw6ia
+.section sw6ib
+.section sw6ja
+.section sw6jb
+.section sw6ka
+.section sw6kb
+.section sw6la
+.section sw6lb
+.section sw6ma
+.section sw6mb
+.section sw6na
+.section sw6nb
+.section sw6oa
+.section sw6ob
+.section sw6pa
+.section sw6pb
+.section sw6qa
+.section sw6qb
+.section sw6ra
+.section sw6rb
+.section sw6sa
+.section sw6sb
+.section sw6ta
+.section sw6tb
+.section sw6ua
+.section sw6ub
+.section sw6va
+.section sw6vb
+.section sw6wa
+.section sw6wb
+.section sw6xa
+.section sw6xb
+.section sw6ya
+.section sw6yb
+.section sw6za
+.section sw6zb
+.section sw61a
+.section sw61b
+.section sw62a
+.section sw62b
+.section sw63a
+.section sw63b
+.section sw64a
+.section sw64b
+.section sw65a
+.section sw65b
+.section sw66a
+.section sw66b
+.section sw67a
+.section sw67b
+.section sw68a
+.section sw68b
+.section sw69a
+.section sw69b
+.section sw60a
+.section sw60b
+.section sw7aa
+.section sw7ab
+.section sw7ba
+.section sw7bb
+.section sw7ca
+.section sw7cb
+.section sw7da
+.section sw7db
+.section sw7ea
+.section sw7eb
+.section sw7fa
+.section sw7fb
+.section sw7ga
+.section sw7gb
+.section sw7ha
+.section sw7hb
+.section sw7ia
+.section sw7ib
+.section sw7ja
+.section sw7jb
+.section sw7ka
+.section sw7kb
+.section sw7la
+.section sw7lb
+.section sw7ma
+.section sw7mb
+.section sw7na
+.section sw7nb
+.section sw7oa
+.section sw7ob
+.section sw7pa
+.section sw7pb
+.section sw7qa
+.section sw7qb
+.section sw7ra
+.section sw7rb
+.section sw7sa
+.section sw7sb
+.section sw7ta
+.section sw7tb
+.section sw7ua
+.section sw7ub
+.section sw7va
+.section sw7vb
+.section sw7wa
+.section sw7wb
+.section sw7xa
+.section sw7xb
+.section sw7ya
+.section sw7yb
+.section sw7za
+.section sw7zb
+.section sw71a
+.section sw71b
+.section sw72a
+.section sw72b
+.section sw73a
+.section sw73b
+.section sw74a
+.section sw74b
+.section sw75a
+.section sw75b
+.section sw76a
+.section sw76b
+.section sw77a
+.section sw77b
+.section sw78a
+.section sw78b
+.section sw79a
+.section sw79b
+.section sw70a
+.section sw70b
+.section sw8aa
+.section sw8ab
+.section sw8ba
+.section sw8bb
+.section sw8ca
+.section sw8cb
+.section sw8da
+.section sw8db
+.section sw8ea
+.section sw8eb
+.section sw8fa
+.section sw8fb
+.section sw8ga
+.section sw8gb
+.section sw8ha
+.section sw8hb
+.section sw8ia
+.section sw8ib
+.section sw8ja
+.section sw8jb
+.section sw8ka
+.section sw8kb
+.section sw8la
+.section sw8lb
+.section sw8ma
+.section sw8mb
+.section sw8na
+.section sw8nb
+.section sw8oa
+.section sw8ob
+.section sw8pa
+.section sw8pb
+.section sw8qa
+.section sw8qb
+.section sw8ra
+.section sw8rb
+.section sw8sa
+.section sw8sb
+.section sw8ta
+.section sw8tb
+.section sw8ua
+.section sw8ub
+.section sw8va
+.section sw8vb
+.section sw8wa
+.section sw8wb
+.section sw8xa
+.section sw8xb
+.section sw8ya
+.section sw8yb
+.section sw8za
+.section sw8zb
+.section sw81a
+.section sw81b
+.section sw82a
+.section sw82b
+.section sw83a
+.section sw83b
+.section sw84a
+.section sw84b
+.section sw85a
+.section sw85b
+.section sw86a
+.section sw86b
+.section sw87a
+.section sw87b
+.section sw88a
+.section sw88b
+.section sw89a
+.section sw89b
+.section sw80a
+.section sw80b
+.section sw9aa
+.section sw9ab
+.section sw9ba
+.section sw9bb
+.section sw9ca
+.section sw9cb
+.section sw9da
+.section sw9db
+.section sw9ea
+.section sw9eb
+.section sw9fa
+.section sw9fb
+.section sw9ga
+.section sw9gb
+.section sw9ha
+.section sw9hb
+.section sw9ia
+.section sw9ib
+.section sw9ja
+.section sw9jb
+.section sw9ka
+.section sw9kb
+.section sw9la
+.section sw9lb
+.section sw9ma
+.section sw9mb
+.section sw9na
+.section sw9nb
+.section sw9oa
+.section sw9ob
+.section sw9pa
+.section sw9pb
+.section sw9qa
+.section sw9qb
+.section sw9ra
+.section sw9rb
+.section sw9sa
+.section sw9sb
+.section sw9ta
+.section sw9tb
+.section sw9ua
+.section sw9ub
+.section sw9va
+.section sw9vb
+.section sw9wa
+.section sw9wb
+.section sw9xa
+.section sw9xb
+.section sw9ya
+.section sw9yb
+.section sw9za
+.section sw9zb
+.section sw91a
+.section sw91b
+.section sw92a
+.section sw92b
+.section sw93a
+.section sw93b
+.section sw94a
+.section sw94b
+.section sw95a
+.section sw95b
+.section sw96a
+.section sw96b
+.section sw97a
+.section sw97b
+.section sw98a
+.section sw98b
+.section sw99a
+.section sw99b
+.section sw90a
+.section sw90b
+.section sw0aa
+.section sw0ab
+.section sw0ba
+.section sw0bb
+.section sw0ca
+.section sw0cb
+.section sw0da
+.section sw0db
+.section sw0ea
+.section sw0eb
+.section sw0fa
+.section sw0fb
+.section sw0ga
+.section sw0gb
+.section sw0ha
+.section sw0hb
+.section sw0ia
+.section sw0ib
+.section sw0ja
+.section sw0jb
+.section sw0ka
+.section sw0kb
+.section sw0la
+.section sw0lb
+.section sw0ma
+.section sw0mb
+.section sw0na
+.section sw0nb
+.section sw0oa
+.section sw0ob
+.section sw0pa
+.section sw0pb
+.section sw0qa
+.section sw0qb
+.section sw0ra
+.section sw0rb
+.section sw0sa
+.section sw0sb
+.section sw0ta
+.section sw0tb
+.section sw0ua
+.section sw0ub
+.section sw0va
+.section sw0vb
+.section sw0wa
+.section sw0wb
+.section sw0xa
+.section sw0xb
+.section sw0ya
+.section sw0yb
+.section sw0za
+.section sw0zb
+.section sw01a
+.section sw01b
+.section sw02a
+.section sw02b
+.section sw03a
+.section sw03b
+.section sw04a
+.section sw04b
+.section sw05a
+.section sw05b
+.section sw06a
+.section sw06b
+.section sw07a
+.section sw07b
+.section sw08a
+.section sw08b
+.section sw09a
+.section sw09b
+.section sw00a
+.section sw00b
+.section sxaaa
+.section sxaab
+.section sxaba
+.section sxabb
+.section sxaca
+.section sxacb
+.section sxada
+.section sxadb
+.section sxaea
+.section sxaeb
+.section sxafa
+.section sxafb
+.section sxaga
+.section sxagb
+.section sxaha
+.section sxahb
+.section sxaia
+.section sxaib
+.section sxaja
+.section sxajb
+.section sxaka
+.section sxakb
+.section sxala
+.section sxalb
+.section sxama
+.section sxamb
+.section sxana
+.section sxanb
+.section sxaoa
+.section sxaob
+.section sxapa
+.section sxapb
+.section sxaqa
+.section sxaqb
+.section sxara
+.section sxarb
+.section sxasa
+.section sxasb
+.section sxata
+.section sxatb
+.section sxaua
+.section sxaub
+.section sxava
+.section sxavb
+.section sxawa
+.section sxawb
+.section sxaxa
+.section sxaxb
+.section sxaya
+.section sxayb
+.section sxaza
+.section sxazb
+.section sxa1a
+.section sxa1b
+.section sxa2a
+.section sxa2b
+.section sxa3a
+.section sxa3b
+.section sxa4a
+.section sxa4b
+.section sxa5a
+.section sxa5b
+.section sxa6a
+.section sxa6b
+.section sxa7a
+.section sxa7b
+.section sxa8a
+.section sxa8b
+.section sxa9a
+.section sxa9b
+.section sxa0a
+.section sxa0b
+.section sxbaa
+.section sxbab
+.section sxbba
+.section sxbbb
+.section sxbca
+.section sxbcb
+.section sxbda
+.section sxbdb
+.section sxbea
+.section sxbeb
+.section sxbfa
+.section sxbfb
+.section sxbga
+.section sxbgb
+.section sxbha
+.section sxbhb
+.section sxbia
+.section sxbib
+.section sxbja
+.section sxbjb
+.section sxbka
+.section sxbkb
+.section sxbla
+.section sxblb
+.section sxbma
+.section sxbmb
+.section sxbna
+.section sxbnb
+.section sxboa
+.section sxbob
+.section sxbpa
+.section sxbpb
+.section sxbqa
+.section sxbqb
+.section sxbra
+.section sxbrb
+.section sxbsa
+.section sxbsb
+.section sxbta
+.section sxbtb
+.section sxbua
+.section sxbub
+.section sxbva
+.section sxbvb
+.section sxbwa
+.section sxbwb
+.section sxbxa
+.section sxbxb
+.section sxbya
+.section sxbyb
+.section sxbza
+.section sxbzb
+.section sxb1a
+.section sxb1b
+.section sxb2a
+.section sxb2b
+.section sxb3a
+.section sxb3b
+.section sxb4a
+.section sxb4b
+.section sxb5a
+.section sxb5b
+.section sxb6a
+.section sxb6b
+.section sxb7a
+.section sxb7b
+.section sxb8a
+.section sxb8b
+.section sxb9a
+.section sxb9b
+.section sxb0a
+.section sxb0b
+.section sxcaa
+.section sxcab
+.section sxcba
+.section sxcbb
+.section sxcca
+.section sxccb
+.section sxcda
+.section sxcdb
+.section sxcea
+.section sxceb
+.section sxcfa
+.section sxcfb
+.section sxcga
+.section sxcgb
+.section sxcha
+.section sxchb
+.section sxcia
+.section sxcib
+.section sxcja
+.section sxcjb
+.section sxcka
+.section sxckb
+.section sxcla
+.section sxclb
+.section sxcma
+.section sxcmb
+.section sxcna
+.section sxcnb
+.section sxcoa
+.section sxcob
+.section sxcpa
+.section sxcpb
+.section sxcqa
+.section sxcqb
+.section sxcra
+.section sxcrb
+.section sxcsa
+.section sxcsb
+.section sxcta
+.section sxctb
+.section sxcua
+.section sxcub
+.section sxcva
+.section sxcvb
+.section sxcwa
+.section sxcwb
+.section sxcxa
+.section sxcxb
+.section sxcya
+.section sxcyb
+.section sxcza
+.section sxczb
+.section sxc1a
+.section sxc1b
+.section sxc2a
+.section sxc2b
+.section sxc3a
+.section sxc3b
+.section sxc4a
+.section sxc4b
+.section sxc5a
+.section sxc5b
+.section sxc6a
+.section sxc6b
+.section sxc7a
+.section sxc7b
+.section sxc8a
+.section sxc8b
+.section sxc9a
+.section sxc9b
+.section sxc0a
+.section sxc0b
+.section sxdaa
+.section sxdab
+.section sxdba
+.section sxdbb
+.section sxdca
+.section sxdcb
+.section sxdda
+.section sxddb
+.section sxdea
+.section sxdeb
+.section sxdfa
+.section sxdfb
+.section sxdga
+.section sxdgb
+.section sxdha
+.section sxdhb
+.section sxdia
+.section sxdib
+.section sxdja
+.section sxdjb
+.section sxdka
+.section sxdkb
+.section sxdla
+.section sxdlb
+.section sxdma
+.section sxdmb
+.section sxdna
+.section sxdnb
+.section sxdoa
+.section sxdob
+.section sxdpa
+.section sxdpb
+.section sxdqa
+.section sxdqb
+.section sxdra
+.section sxdrb
+.section sxdsa
+.section sxdsb
+.section sxdta
+.section sxdtb
+.section sxdua
+.section sxdub
+.section sxdva
+.section sxdvb
+.section sxdwa
+.section sxdwb
+.section sxdxa
+.section sxdxb
+.section sxdya
+.section sxdyb
+.section sxdza
+.section sxdzb
+.section sxd1a
+.section sxd1b
+.section sxd2a
+.section sxd2b
+.section sxd3a
+.section sxd3b
+.section sxd4a
+.section sxd4b
+.section sxd5a
+.section sxd5b
+.section sxd6a
+.section sxd6b
+.section sxd7a
+.section sxd7b
+.section sxd8a
+.section sxd8b
+.section sxd9a
+.section sxd9b
+.section sxd0a
+.section sxd0b
+.section sxeaa
+.section sxeab
+.section sxeba
+.section sxebb
+.section sxeca
+.section sxecb
+.section sxeda
+.section sxedb
+.section sxeea
+.section sxeeb
+.section sxefa
+.section sxefb
+.section sxega
+.section sxegb
+.section sxeha
+.section sxehb
+.section sxeia
+.section sxeib
+.section sxeja
+.section sxejb
+.section sxeka
+.section sxekb
+.section sxela
+.section sxelb
+.section sxema
+.section sxemb
+.section sxena
+.section sxenb
+.section sxeoa
+.section sxeob
+.section sxepa
+.section sxepb
+.section sxeqa
+.section sxeqb
+.section sxera
+.section sxerb
+.section sxesa
+.section sxesb
+.section sxeta
+.section sxetb
+.section sxeua
+.section sxeub
+.section sxeva
+.section sxevb
+.section sxewa
+.section sxewb
+.section sxexa
+.section sxexb
+.section sxeya
+.section sxeyb
+.section sxeza
+.section sxezb
+.section sxe1a
+.section sxe1b
+.section sxe2a
+.section sxe2b
+.section sxe3a
+.section sxe3b
+.section sxe4a
+.section sxe4b
+.section sxe5a
+.section sxe5b
+.section sxe6a
+.section sxe6b
+.section sxe7a
+.section sxe7b
+.section sxe8a
+.section sxe8b
+.section sxe9a
+.section sxe9b
+.section sxe0a
+.section sxe0b
+.section sxfaa
+.section sxfab
+.section sxfba
+.section sxfbb
+.section sxfca
+.section sxfcb
+.section sxfda
+.section sxfdb
+.section sxfea
+.section sxfeb
+.section sxffa
+.section sxffb
+.section sxfga
+.section sxfgb
+.section sxfha
+.section sxfhb
+.section sxfia
+.section sxfib
+.section sxfja
+.section sxfjb
+.section sxfka
+.section sxfkb
+.section sxfla
+.section sxflb
+.section sxfma
+.section sxfmb
+.section sxfna
+.section sxfnb
+.section sxfoa
+.section sxfob
+.section sxfpa
+.section sxfpb
+.section sxfqa
+.section sxfqb
+.section sxfra
+.section sxfrb
+.section sxfsa
+.section sxfsb
+.section sxfta
+.section sxftb
+.section sxfua
+.section sxfub
+.section sxfva
+.section sxfvb
+.section sxfwa
+.section sxfwb
+.section sxfxa
+.section sxfxb
+.section sxfya
+.section sxfyb
+.section sxfza
+.section sxfzb
+.section sxf1a
+.section sxf1b
+.section sxf2a
+.section sxf2b
+.section sxf3a
+.section sxf3b
+.section sxf4a
+.section sxf4b
+.section sxf5a
+.section sxf5b
+.section sxf6a
+.section sxf6b
+.section sxf7a
+.section sxf7b
+.section sxf8a
+.section sxf8b
+.section sxf9a
+.section sxf9b
+.section sxf0a
+.section sxf0b
+.section sxgaa
+.section sxgab
+.section sxgba
+.section sxgbb
+.section sxgca
+.section sxgcb
+.section sxgda
+.section sxgdb
+.section sxgea
+.section sxgeb
+.section sxgfa
+.section sxgfb
+.section sxgga
+.section sxggb
+.section sxgha
+.section sxghb
+.section sxgia
+.section sxgib
+.section sxgja
+.section sxgjb
+.section sxgka
+.section sxgkb
+.section sxgla
+.section sxglb
+.section sxgma
+.section sxgmb
+.section sxgna
+.section sxgnb
+.section sxgoa
+.section sxgob
+.section sxgpa
+.section sxgpb
+.section sxgqa
+.section sxgqb
+.section sxgra
+.section sxgrb
+.section sxgsa
+.section sxgsb
+.section sxgta
+.section sxgtb
+.section sxgua
+.section sxgub
+.section sxgva
+.section sxgvb
+.section sxgwa
+.section sxgwb
+.section sxgxa
+.section sxgxb
+.section sxgya
+.section sxgyb
+.section sxgza
+.section sxgzb
+.section sxg1a
+.section sxg1b
+.section sxg2a
+.section sxg2b
+.section sxg3a
+.section sxg3b
+.section sxg4a
+.section sxg4b
+.section sxg5a
+.section sxg5b
+.section sxg6a
+.section sxg6b
+.section sxg7a
+.section sxg7b
+.section sxg8a
+.section sxg8b
+.section sxg9a
+.section sxg9b
+.section sxg0a
+.section sxg0b
+.section sxhaa
+.section sxhab
+.section sxhba
+.section sxhbb
+.section sxhca
+.section sxhcb
+.section sxhda
+.section sxhdb
+.section sxhea
+.section sxheb
+.section sxhfa
+.section sxhfb
+.section sxhga
+.section sxhgb
+.section sxhha
+.section sxhhb
+.section sxhia
+.section sxhib
+.section sxhja
+.section sxhjb
+.section sxhka
+.section sxhkb
+.section sxhla
+.section sxhlb
+.section sxhma
+.section sxhmb
+.section sxhna
+.section sxhnb
+.section sxhoa
+.section sxhob
+.section sxhpa
+.section sxhpb
+.section sxhqa
+.section sxhqb
+.section sxhra
+.section sxhrb
+.section sxhsa
+.section sxhsb
+.section sxhta
+.section sxhtb
+.section sxhua
+.section sxhub
+.section sxhva
+.section sxhvb
+.section sxhwa
+.section sxhwb
+.section sxhxa
+.section sxhxb
+.section sxhya
+.section sxhyb
+.section sxhza
+.section sxhzb
+.section sxh1a
+.section sxh1b
+.section sxh2a
+.section sxh2b
+.section sxh3a
+.section sxh3b
+.section sxh4a
+.section sxh4b
+.section sxh5a
+.section sxh5b
+.section sxh6a
+.section sxh6b
+.section sxh7a
+.section sxh7b
+.section sxh8a
+.section sxh8b
+.section sxh9a
+.section sxh9b
+.section sxh0a
+.section sxh0b
+.section sxiaa
+.section sxiab
+.section sxiba
+.section sxibb
+.section sxica
+.section sxicb
+.section sxida
+.section sxidb
+.section sxiea
+.section sxieb
+.section sxifa
+.section sxifb
+.section sxiga
+.section sxigb
+.section sxiha
+.section sxihb
+.section sxiia
+.section sxiib
+.section sxija
+.section sxijb
+.section sxika
+.section sxikb
+.section sxila
+.section sxilb
+.section sxima
+.section sximb
+.section sxina
+.section sxinb
+.section sxioa
+.section sxiob
+.section sxipa
+.section sxipb
+.section sxiqa
+.section sxiqb
+.section sxira
+.section sxirb
+.section sxisa
+.section sxisb
+.section sxita
+.section sxitb
+.section sxiua
+.section sxiub
+.section sxiva
+.section sxivb
+.section sxiwa
+.section sxiwb
+.section sxixa
+.section sxixb
+.section sxiya
+.section sxiyb
+.section sxiza
+.section sxizb
+.section sxi1a
+.section sxi1b
+.section sxi2a
+.section sxi2b
+.section sxi3a
+.section sxi3b
+.section sxi4a
+.section sxi4b
+.section sxi5a
+.section sxi5b
+.section sxi6a
+.section sxi6b
+.section sxi7a
+.section sxi7b
+.section sxi8a
+.section sxi8b
+.section sxi9a
+.section sxi9b
+.section sxi0a
+.section sxi0b
+.section sxjaa
+.section sxjab
+.section sxjba
+.section sxjbb
+.section sxjca
+.section sxjcb
+.section sxjda
+.section sxjdb
+.section sxjea
+.section sxjeb
+.section sxjfa
+.section sxjfb
+.section sxjga
+.section sxjgb
+.section sxjha
+.section sxjhb
+.section sxjia
+.section sxjib
+.section sxjja
+.section sxjjb
+.section sxjka
+.section sxjkb
+.section sxjla
+.section sxjlb
+.section sxjma
+.section sxjmb
+.section sxjna
+.section sxjnb
+.section sxjoa
+.section sxjob
+.section sxjpa
+.section sxjpb
+.section sxjqa
+.section sxjqb
+.section sxjra
+.section sxjrb
+.section sxjsa
+.section sxjsb
+.section sxjta
+.section sxjtb
+.section sxjua
+.section sxjub
+.section sxjva
+.section sxjvb
+.section sxjwa
+.section sxjwb
+.section sxjxa
+.section sxjxb
+.section sxjya
+.section sxjyb
+.section sxjza
+.section sxjzb
+.section sxj1a
+.section sxj1b
+.section sxj2a
+.section sxj2b
+.section sxj3a
+.section sxj3b
+.section sxj4a
+.section sxj4b
+.section sxj5a
+.section sxj5b
+.section sxj6a
+.section sxj6b
+.section sxj7a
+.section sxj7b
+.section sxj8a
+.section sxj8b
+.section sxj9a
+.section sxj9b
+.section sxj0a
+.section sxj0b
+.section sxkaa
+.section sxkab
+.section sxkba
+.section sxkbb
+.section sxkca
+.section sxkcb
+.section sxkda
+.section sxkdb
+.section sxkea
+.section sxkeb
+.section sxkfa
+.section sxkfb
+.section sxkga
+.section sxkgb
+.section sxkha
+.section sxkhb
+.section sxkia
+.section sxkib
+.section sxkja
+.section sxkjb
+.section sxkka
+.section sxkkb
+.section sxkla
+.section sxklb
+.section sxkma
+.section sxkmb
+.section sxkna
+.section sxknb
+.section sxkoa
+.section sxkob
+.section sxkpa
+.section sxkpb
+.section sxkqa
+.section sxkqb
+.section sxkra
+.section sxkrb
+.section sxksa
+.section sxksb
+.section sxkta
+.section sxktb
+.section sxkua
+.section sxkub
+.section sxkva
+.section sxkvb
+.section sxkwa
+.section sxkwb
+.section sxkxa
+.section sxkxb
+.section sxkya
+.section sxkyb
+.section sxkza
+.section sxkzb
+.section sxk1a
+.section sxk1b
+.section sxk2a
+.section sxk2b
+.section sxk3a
+.section sxk3b
+.section sxk4a
+.section sxk4b
+.section sxk5a
+.section sxk5b
+.section sxk6a
+.section sxk6b
+.section sxk7a
+.section sxk7b
+.section sxk8a
+.section sxk8b
+.section sxk9a
+.section sxk9b
+.section sxk0a
+.section sxk0b
+.section sxlaa
+.section sxlab
+.section sxlba
+.section sxlbb
+.section sxlca
+.section sxlcb
+.section sxlda
+.section sxldb
+.section sxlea
+.section sxleb
+.section sxlfa
+.section sxlfb
+.section sxlga
+.section sxlgb
+.section sxlha
+.section sxlhb
+.section sxlia
+.section sxlib
+.section sxlja
+.section sxljb
+.section sxlka
+.section sxlkb
+.section sxlla
+.section sxllb
+.section sxlma
+.section sxlmb
+.section sxlna
+.section sxlnb
+.section sxloa
+.section sxlob
+.section sxlpa
+.section sxlpb
+.section sxlqa
+.section sxlqb
+.section sxlra
+.section sxlrb
+.section sxlsa
+.section sxlsb
+.section sxlta
+.section sxltb
+.section sxlua
+.section sxlub
+.section sxlva
+.section sxlvb
+.section sxlwa
+.section sxlwb
+.section sxlxa
+.section sxlxb
+.section sxlya
+.section sxlyb
+.section sxlza
+.section sxlzb
+.section sxl1a
+.section sxl1b
+.section sxl2a
+.section sxl2b
+.section sxl3a
+.section sxl3b
+.section sxl4a
+.section sxl4b
+.section sxl5a
+.section sxl5b
+.section sxl6a
+.section sxl6b
+.section sxl7a
+.section sxl7b
+.section sxl8a
+.section sxl8b
+.section sxl9a
+.section sxl9b
+.section sxl0a
+.section sxl0b
+.section sxmaa
+.section sxmab
+.section sxmba
+.section sxmbb
+.section sxmca
+.section sxmcb
+.section sxmda
+.section sxmdb
+.section sxmea
+.section sxmeb
+.section sxmfa
+.section sxmfb
+.section sxmga
+.section sxmgb
+.section sxmha
+.section sxmhb
+.section sxmia
+.section sxmib
+.section sxmja
+.section sxmjb
+.section sxmka
+.section sxmkb
+.section sxmla
+.section sxmlb
+.section sxmma
+.section sxmmb
+.section sxmna
+.section sxmnb
+.section sxmoa
+.section sxmob
+.section sxmpa
+.section sxmpb
+.section sxmqa
+.section sxmqb
+.section sxmra
+.section sxmrb
+.section sxmsa
+.section sxmsb
+.section sxmta
+.section sxmtb
+.section sxmua
+.section sxmub
+.section sxmva
+.section sxmvb
+.section sxmwa
+.section sxmwb
+.section sxmxa
+.section sxmxb
+.section sxmya
+.section sxmyb
+.section sxmza
+.section sxmzb
+.section sxm1a
+.section sxm1b
+.section sxm2a
+.section sxm2b
+.section sxm3a
+.section sxm3b
+.section sxm4a
+.section sxm4b
+.section sxm5a
+.section sxm5b
+.section sxm6a
+.section sxm6b
+.section sxm7a
+.section sxm7b
+.section sxm8a
+.section sxm8b
+.section sxm9a
+.section sxm9b
+.section sxm0a
+.section sxm0b
+.section sxnaa
+.section sxnab
+.section sxnba
+.section sxnbb
+.section sxnca
+.section sxncb
+.section sxnda
+.section sxndb
+.section sxnea
+.section sxneb
+.section sxnfa
+.section sxnfb
+.section sxnga
+.section sxngb
+.section sxnha
+.section sxnhb
+.section sxnia
+.section sxnib
+.section sxnja
+.section sxnjb
+.section sxnka
+.section sxnkb
+.section sxnla
+.section sxnlb
+.section sxnma
+.section sxnmb
+.section sxnna
+.section sxnnb
+.section sxnoa
+.section sxnob
+.section sxnpa
+.section sxnpb
+.section sxnqa
+.section sxnqb
+.section sxnra
+.section sxnrb
+.section sxnsa
+.section sxnsb
+.section sxnta
+.section sxntb
+.section sxnua
+.section sxnub
+.section sxnva
+.section sxnvb
+.section sxnwa
+.section sxnwb
+.section sxnxa
+.section sxnxb
+.section sxnya
+.section sxnyb
+.section sxnza
+.section sxnzb
+.section sxn1a
+.section sxn1b
+.section sxn2a
+.section sxn2b
+.section sxn3a
+.section sxn3b
+.section sxn4a
+.section sxn4b
+.section sxn5a
+.section sxn5b
+.section sxn6a
+.section sxn6b
+.section sxn7a
+.section sxn7b
+.section sxn8a
+.section sxn8b
+.section sxn9a
+.section sxn9b
+.section sxn0a
+.section sxn0b
+.section sxoaa
+.section sxoab
+.section sxoba
+.section sxobb
+.section sxoca
+.section sxocb
+.section sxoda
+.section sxodb
+.section sxoea
+.section sxoeb
+.section sxofa
+.section sxofb
+.section sxoga
+.section sxogb
+.section sxoha
+.section sxohb
+.section sxoia
+.section sxoib
+.section sxoja
+.section sxojb
+.section sxoka
+.section sxokb
+.section sxola
+.section sxolb
+.section sxoma
+.section sxomb
+.section sxona
+.section sxonb
+.section sxooa
+.section sxoob
+.section sxopa
+.section sxopb
+.section sxoqa
+.section sxoqb
+.section sxora
+.section sxorb
+.section sxosa
+.section sxosb
+.section sxota
+.section sxotb
+.section sxoua
+.section sxoub
+.section sxova
+.section sxovb
+.section sxowa
+.section sxowb
+.section sxoxa
+.section sxoxb
+.section sxoya
+.section sxoyb
+.section sxoza
+.section sxozb
+.section sxo1a
+.section sxo1b
+.section sxo2a
+.section sxo2b
+.section sxo3a
+.section sxo3b
+.section sxo4a
+.section sxo4b
+.section sxo5a
+.section sxo5b
+.section sxo6a
+.section sxo6b
+.section sxo7a
+.section sxo7b
+.section sxo8a
+.section sxo8b
+.section sxo9a
+.section sxo9b
+.section sxo0a
+.section sxo0b
+.section sxpaa
+.section sxpab
+.section sxpba
+.section sxpbb
+.section sxpca
+.section sxpcb
+.section sxpda
+.section sxpdb
+.section sxpea
+.section sxpeb
+.section sxpfa
+.section sxpfb
+.section sxpga
+.section sxpgb
+.section sxpha
+.section sxphb
+.section sxpia
+.section sxpib
+.section sxpja
+.section sxpjb
+.section sxpka
+.section sxpkb
+.section sxpla
+.section sxplb
+.section sxpma
+.section sxpmb
+.section sxpna
+.section sxpnb
+.section sxpoa
+.section sxpob
+.section sxppa
+.section sxppb
+.section sxpqa
+.section sxpqb
+.section sxpra
+.section sxprb
+.section sxpsa
+.section sxpsb
+.section sxpta
+.section sxptb
+.section sxpua
+.section sxpub
+.section sxpva
+.section sxpvb
+.section sxpwa
+.section sxpwb
+.section sxpxa
+.section sxpxb
+.section sxpya
+.section sxpyb
+.section sxpza
+.section sxpzb
+.section sxp1a
+.section sxp1b
+.section sxp2a
+.section sxp2b
+.section sxp3a
+.section sxp3b
+.section sxp4a
+.section sxp4b
+.section sxp5a
+.section sxp5b
+.section sxp6a
+.section sxp6b
+.section sxp7a
+.section sxp7b
+.section sxp8a
+.section sxp8b
+.section sxp9a
+.section sxp9b
+.section sxp0a
+.section sxp0b
+.section sxqaa
+.section sxqab
+.section sxqba
+.section sxqbb
+.section sxqca
+.section sxqcb
+.section sxqda
+.section sxqdb
+.section sxqea
+.section sxqeb
+.section sxqfa
+.section sxqfb
+.section sxqga
+.section sxqgb
+.section sxqha
+.section sxqhb
+.section sxqia
+.section sxqib
+.section sxqja
+.section sxqjb
+.section sxqka
+.section sxqkb
+.section sxqla
+.section sxqlb
+.section sxqma
+.section sxqmb
+.section sxqna
+.section sxqnb
+.section sxqoa
+.section sxqob
+.section sxqpa
+.section sxqpb
+.section sxqqa
+.section sxqqb
+.section sxqra
+.section sxqrb
+.section sxqsa
+.section sxqsb
+.section sxqta
+.section sxqtb
+.section sxqua
+.section sxqub
+.section sxqva
+.section sxqvb
+.section sxqwa
+.section sxqwb
+.section sxqxa
+.section sxqxb
+.section sxqya
+.section sxqyb
+.section sxqza
+.section sxqzb
+.section sxq1a
+.section sxq1b
+.section sxq2a
+.section sxq2b
+.section sxq3a
+.section sxq3b
+.section sxq4a
+.section sxq4b
+.section sxq5a
+.section sxq5b
+.section sxq6a
+.section sxq6b
+.section sxq7a
+.section sxq7b
+.section sxq8a
+.section sxq8b
+.section sxq9a
+.section sxq9b
+.section sxq0a
+.section sxq0b
+.section sxraa
+.section sxrab
+.section sxrba
+.section sxrbb
+.section sxrca
+.section sxrcb
+.section sxrda
+.section sxrdb
+.section sxrea
+.section sxreb
+.section sxrfa
+.section sxrfb
+.section sxrga
+.section sxrgb
+.section sxrha
+.section sxrhb
+.section sxria
+.section sxrib
+.section sxrja
+.section sxrjb
+.section sxrka
+.section sxrkb
+.section sxrla
+.section sxrlb
+.section sxrma
+.section sxrmb
+.section sxrna
+.section sxrnb
+.section sxroa
+.section sxrob
+.section sxrpa
+.section sxrpb
+.section sxrqa
+.section sxrqb
+.section sxrra
+.section sxrrb
+.section sxrsa
+.section sxrsb
+.section sxrta
+.section sxrtb
+.section sxrua
+.section sxrub
+.section sxrva
+.section sxrvb
+.section sxrwa
+.section sxrwb
+.section sxrxa
+.section sxrxb
+.section sxrya
+.section sxryb
+.section sxrza
+.section sxrzb
+.section sxr1a
+.section sxr1b
+.section sxr2a
+.section sxr2b
+.section sxr3a
+.section sxr3b
+.section sxr4a
+.section sxr4b
+.section sxr5a
+.section sxr5b
+.section sxr6a
+.section sxr6b
+.section sxr7a
+.section sxr7b
+.section sxr8a
+.section sxr8b
+.section sxr9a
+.section sxr9b
+.section sxr0a
+.section sxr0b
+.section sxsaa
+.section sxsab
+.section sxsba
+.section sxsbb
+.section sxsca
+.section sxscb
+.section sxsda
+.section sxsdb
+.section sxsea
+.section sxseb
+.section sxsfa
+.section sxsfb
+.section sxsga
+.section sxsgb
+.section sxsha
+.section sxshb
+.section sxsia
+.section sxsib
+.section sxsja
+.section sxsjb
+.section sxska
+.section sxskb
+.section sxsla
+.section sxslb
+.section sxsma
+.section sxsmb
+.section sxsna
+.section sxsnb
+.section sxsoa
+.section sxsob
+.section sxspa
+.section sxspb
+.section sxsqa
+.section sxsqb
+.section sxsra
+.section sxsrb
+.section sxssa
+.section sxssb
+.section sxsta
+.section sxstb
+.section sxsua
+.section sxsub
+.section sxsva
+.section sxsvb
+.section sxswa
+.section sxswb
+.section sxsxa
+.section sxsxb
+.section sxsya
+.section sxsyb
+.section sxsza
+.section sxszb
+.section sxs1a
+.section sxs1b
+.section sxs2a
+.section sxs2b
+.section sxs3a
+.section sxs3b
+.section sxs4a
+.section sxs4b
+.section sxs5a
+.section sxs5b
+.section sxs6a
+.section sxs6b
+.section sxs7a
+.section sxs7b
+.section sxs8a
+.section sxs8b
+.section sxs9a
+.section sxs9b
+.section sxs0a
+.section sxs0b
+.section sxtaa
+.section sxtab
+.section sxtba
+.section sxtbb
+.section sxtca
+.section sxtcb
+.section sxtda
+.section sxtdb
+.section sxtea
+.section sxteb
+.section sxtfa
+.section sxtfb
+.section sxtga
+.section sxtgb
+.section sxtha
+.section sxthb
+.section sxtia
+.section sxtib
+.section sxtja
+.section sxtjb
+.section sxtka
+.section sxtkb
+.section sxtla
+.section sxtlb
+.section sxtma
+.section sxtmb
+.section sxtna
+.section sxtnb
+.section sxtoa
+.section sxtob
+.section sxtpa
+.section sxtpb
+.section sxtqa
+.section sxtqb
+.section sxtra
+.section sxtrb
+.section sxtsa
+.section sxtsb
+.section sxtta
+.section sxttb
+.section sxtua
+.section sxtub
+.section sxtva
+.section sxtvb
+.section sxtwa
+.section sxtwb
+.section sxtxa
+.section sxtxb
+.section sxtya
+.section sxtyb
+.section sxtza
+.section sxtzb
+.section sxt1a
+.section sxt1b
+.section sxt2a
+.section sxt2b
+.section sxt3a
+.section sxt3b
+.section sxt4a
+.section sxt4b
+.section sxt5a
+.section sxt5b
+.section sxt6a
+.section sxt6b
+.section sxt7a
+.section sxt7b
+.section sxt8a
+.section sxt8b
+.section sxt9a
+.section sxt9b
+.section sxt0a
+.section sxt0b
+.section sxuaa
+.section sxuab
+.section sxuba
+.section sxubb
+.section sxuca
+.section sxucb
+.section sxuda
+.section sxudb
+.section sxuea
+.section sxueb
+.section sxufa
+.section sxufb
+.section sxuga
+.section sxugb
+.section sxuha
+.section sxuhb
+.section sxuia
+.section sxuib
+.section sxuja
+.section sxujb
+.section sxuka
+.section sxukb
+.section sxula
+.section sxulb
+.section sxuma
+.section sxumb
+.section sxuna
+.section sxunb
+.section sxuoa
+.section sxuob
+.section sxupa
+.section sxupb
+.section sxuqa
+.section sxuqb
+.section sxura
+.section sxurb
+.section sxusa
+.section sxusb
+.section sxuta
+.section sxutb
+.section sxuua
+.section sxuub
+.section sxuva
+.section sxuvb
+.section sxuwa
+.section sxuwb
+.section sxuxa
+.section sxuxb
+.section sxuya
+.section sxuyb
+.section sxuza
+.section sxuzb
+.section sxu1a
+.section sxu1b
+.section sxu2a
+.section sxu2b
+.section sxu3a
+.section sxu3b
+.section sxu4a
+.section sxu4b
+.section sxu5a
+.section sxu5b
+.section sxu6a
+.section sxu6b
+.section sxu7a
+.section sxu7b
+.section sxu8a
+.section sxu8b
+.section sxu9a
+.section sxu9b
+.section sxu0a
+.section sxu0b
+.section sxvaa
+.section sxvab
+.section sxvba
+.section sxvbb
+.section sxvca
+.section sxvcb
+.section sxvda
+.section sxvdb
+.section sxvea
+.section sxveb
+.section sxvfa
+.section sxvfb
+.section sxvga
+.section sxvgb
+.section sxvha
+.section sxvhb
+.section sxvia
+.section sxvib
+.section sxvja
+.section sxvjb
+.section sxvka
+.section sxvkb
+.section sxvla
+.section sxvlb
+.section sxvma
+.section sxvmb
+.section sxvna
+.section sxvnb
+.section sxvoa
+.section sxvob
+.section sxvpa
+.section sxvpb
+.section sxvqa
+.section sxvqb
+.section sxvra
+.section sxvrb
+.section sxvsa
+.section sxvsb
+.section sxvta
+.section sxvtb
+.section sxvua
+.section sxvub
+.section sxvva
+.section sxvvb
+.section sxvwa
+.section sxvwb
+.section sxvxa
+.section sxvxb
+.section sxvya
+.section sxvyb
+.section sxvza
+.section sxvzb
+.section sxv1a
+.section sxv1b
+.section sxv2a
+.section sxv2b
+.section sxv3a
+.section sxv3b
+.section sxv4a
+.section sxv4b
+.section sxv5a
+.section sxv5b
+.section sxv6a
+.section sxv6b
+.section sxv7a
+.section sxv7b
+.section sxv8a
+.section sxv8b
+.section sxv9a
+.section sxv9b
+.section sxv0a
+.section sxv0b
+.section sxwaa
+.section sxwab
+.section sxwba
+.section sxwbb
+.section sxwca
+.section sxwcb
+.section sxwda
+.section sxwdb
+.section sxwea
+.section sxweb
+.section sxwfa
+.section sxwfb
+.section sxwga
+.section sxwgb
+.section sxwha
+.section sxwhb
+.section sxwia
+.section sxwib
+.section sxwja
+.section sxwjb
+.section sxwka
+.section sxwkb
+.section sxwla
+.section sxwlb
+.section sxwma
+.section sxwmb
+.section sxwna
+.section sxwnb
+.section sxwoa
+.section sxwob
+.section sxwpa
+.section sxwpb
+.section sxwqa
+.section sxwqb
+.section sxwra
+.section sxwrb
+.section sxwsa
+.section sxwsb
+.section sxwta
+.section sxwtb
+.section sxwua
+.section sxwub
+.section sxwva
+.section sxwvb
+.section sxwwa
+.section sxwwb
+.section sxwxa
+.section sxwxb
+.section sxwya
+.section sxwyb
+.section sxwza
+.section sxwzb
+.section sxw1a
+.section sxw1b
+.section sxw2a
+.section sxw2b
+.section sxw3a
+.section sxw3b
+.section sxw4a
+.section sxw4b
+.section sxw5a
+.section sxw5b
+.section sxw6a
+.section sxw6b
+.section sxw7a
+.section sxw7b
+.section sxw8a
+.section sxw8b
+.section sxw9a
+.section sxw9b
+.section sxw0a
+.section sxw0b
+.section sxxaa
+.section sxxab
+.section sxxba
+.section sxxbb
+.section sxxca
+.section sxxcb
+.section sxxda
+.section sxxdb
+.section sxxea
+.section sxxeb
+.section sxxfa
+.section sxxfb
+.section sxxga
+.section sxxgb
+.section sxxha
+.section sxxhb
+.section sxxia
+.section sxxib
+.section sxxja
+.section sxxjb
+.section sxxka
+.section sxxkb
+.section sxxla
+.section sxxlb
+.section sxxma
+.section sxxmb
+.section sxxna
+.section sxxnb
+.section sxxoa
+.section sxxob
+.section sxxpa
+.section sxxpb
+.section sxxqa
+.section sxxqb
+.section sxxra
+.section sxxrb
+.section sxxsa
+.section sxxsb
+.section sxxta
+.section sxxtb
+.section sxxua
+.section sxxub
+.section sxxva
+.section sxxvb
+.section sxxwa
+.section sxxwb
+.section sxxxa
+.section sxxxb
+.section sxxya
+.section sxxyb
+.section sxxza
+.section sxxzb
+.section sxx1a
+.section sxx1b
+.section sxx2a
+.section sxx2b
+.section sxx3a
+.section sxx3b
+.section sxx4a
+.section sxx4b
+.section sxx5a
+.section sxx5b
+.section sxx6a
+.section sxx6b
+.section sxx7a
+.section sxx7b
+.section sxx8a
+.section sxx8b
+.section sxx9a
+.section sxx9b
+.section sxx0a
+.section sxx0b
+.section sxyaa
+.section sxyab
+.section sxyba
+.section sxybb
+.section sxyca
+.section sxycb
+.section sxyda
+.section sxydb
+.section sxyea
+.section sxyeb
+.section sxyfa
+.section sxyfb
+.section sxyga
+.section sxygb
+.section sxyha
+.section sxyhb
+.section sxyia
+.section sxyib
+.section sxyja
+.section sxyjb
+.section sxyka
+.section sxykb
+.section sxyla
+.section sxylb
+.section sxyma
+.section sxymb
+.section sxyna
+.section sxynb
+.section sxyoa
+.section sxyob
+.section sxypa
+.section sxypb
+.section sxyqa
+.section sxyqb
+.section sxyra
+.section sxyrb
+.section sxysa
+.section sxysb
+.section sxyta
+.section sxytb
+.section sxyua
+.section sxyub
+.section sxyva
+.section sxyvb
+.section sxywa
+.section sxywb
+.section sxyxa
+.section sxyxb
+.section sxyya
+.section sxyyb
+.section sxyza
+.section sxyzb
+.section sxy1a
+.section sxy1b
+.section sxy2a
+.section sxy2b
+.section sxy3a
+.section sxy3b
+.section sxy4a
+.section sxy4b
+.section sxy5a
+.section sxy5b
+.section sxy6a
+.section sxy6b
+.section sxy7a
+.section sxy7b
+.section sxy8a
+.section sxy8b
+.section sxy9a
+.section sxy9b
+.section sxy0a
+.section sxy0b
+.section sxzaa
+.section sxzab
+.section sxzba
+.section sxzbb
+.section sxzca
+.section sxzcb
+.section sxzda
+.section sxzdb
+.section sxzea
+.section sxzeb
+.section sxzfa
+.section sxzfb
+.section sxzga
+.section sxzgb
+.section sxzha
+.section sxzhb
+.section sxzia
+.section sxzib
+.section sxzja
+.section sxzjb
+.section sxzka
+.section sxzkb
+.section sxzla
+.section sxzlb
+.section sxzma
+.section sxzmb
+.section sxzna
+.section sxznb
+.section sxzoa
+.section sxzob
+.section sxzpa
+.section sxzpb
+.section sxzqa
+.section sxzqb
+.section sxzra
+.section sxzrb
+.section sxzsa
+.section sxzsb
+.section sxzta
+.section sxztb
+.section sxzua
+.section sxzub
+.section sxzva
+.section sxzvb
+.section sxzwa
+.section sxzwb
+.section sxzxa
+.section sxzxb
+.section sxzya
+.section sxzyb
+.section sxzza
+.section sxzzb
+.section sxz1a
+.section sxz1b
+.section sxz2a
+.section sxz2b
+.section sxz3a
+.section sxz3b
+.section sxz4a
+.section sxz4b
+.section sxz5a
+.section sxz5b
+.section sxz6a
+.section sxz6b
+.section sxz7a
+.section sxz7b
+.section sxz8a
+.section sxz8b
+.section sxz9a
+.section sxz9b
+.section sxz0a
+.section sxz0b
+.section sx1aa
+.section sx1ab
+.section sx1ba
+.section sx1bb
+.section sx1ca
+.section sx1cb
+.section sx1da
+.section sx1db
+.section sx1ea
+.section sx1eb
+.section sx1fa
+.section sx1fb
+.section sx1ga
+.section sx1gb
+.section sx1ha
+.section sx1hb
+.section sx1ia
+.section sx1ib
+.section sx1ja
+.section sx1jb
+.section sx1ka
+.section sx1kb
+.section sx1la
+.section sx1lb
+.section sx1ma
+.section sx1mb
+.section sx1na
+.section sx1nb
+.section sx1oa
+.section sx1ob
+.section sx1pa
+.section sx1pb
+.section sx1qa
+.section sx1qb
+.section sx1ra
+.section sx1rb
+.section sx1sa
+.section sx1sb
+.section sx1ta
+.section sx1tb
+.section sx1ua
+.section sx1ub
+.section sx1va
+.section sx1vb
+.section sx1wa
+.section sx1wb
+.section sx1xa
+.section sx1xb
+.section sx1ya
+.section sx1yb
+.section sx1za
+.section sx1zb
+.section sx11a
+.section sx11b
+.section sx12a
+.section sx12b
+.section sx13a
+.section sx13b
+.section sx14a
+.section sx14b
+.section sx15a
+.section sx15b
+.section sx16a
+.section sx16b
+.section sx17a
+.section sx17b
+.section sx18a
+.section sx18b
+.section sx19a
+.section sx19b
+.section sx10a
+.section sx10b
+.section sx2aa
+.section sx2ab
+.section sx2ba
+.section sx2bb
+.section sx2ca
+.section sx2cb
+.section sx2da
+.section sx2db
+.section sx2ea
+.section sx2eb
+.section sx2fa
+.section sx2fb
+.section sx2ga
+.section sx2gb
+.section sx2ha
+.section sx2hb
+.section sx2ia
+.section sx2ib
+.section sx2ja
+.section sx2jb
+.section sx2ka
+.section sx2kb
+.section sx2la
+.section sx2lb
+.section sx2ma
+.section sx2mb
+.section sx2na
+.section sx2nb
+.section sx2oa
+.section sx2ob
+.section sx2pa
+.section sx2pb
+.section sx2qa
+.section sx2qb
+.section sx2ra
+.section sx2rb
+.section sx2sa
+.section sx2sb
+.section sx2ta
+.section sx2tb
+.section sx2ua
+.section sx2ub
+.section sx2va
+.section sx2vb
+.section sx2wa
+.section sx2wb
+.section sx2xa
+.section sx2xb
+.section sx2ya
+.section sx2yb
+.section sx2za
+.section sx2zb
+.section sx21a
+.section sx21b
+.section sx22a
+.section sx22b
+.section sx23a
+.section sx23b
+.section sx24a
+.section sx24b
+.section sx25a
+.section sx25b
+.section sx26a
+.section sx26b
+.section sx27a
+.section sx27b
+.section sx28a
+.section sx28b
+.section sx29a
+.section sx29b
+.section sx20a
+.section sx20b
+.section sx3aa
+.section sx3ab
+.section sx3ba
+.section sx3bb
+.section sx3ca
+.section sx3cb
+.section sx3da
+.section sx3db
+.section sx3ea
+.section sx3eb
+.section sx3fa
+.section sx3fb
+.section sx3ga
+.section sx3gb
+.section sx3ha
+.section sx3hb
+.section sx3ia
+.section sx3ib
+.section sx3ja
+.section sx3jb
+.section sx3ka
+.section sx3kb
+.section sx3la
+.section sx3lb
+.section sx3ma
+.section sx3mb
+.section sx3na
+.section sx3nb
+.section sx3oa
+.section sx3ob
+.section sx3pa
+.section sx3pb
+.section sx3qa
+.section sx3qb
+.section sx3ra
+.section sx3rb
+.section sx3sa
+.section sx3sb
+.section sx3ta
+.section sx3tb
+.section sx3ua
+.section sx3ub
+.section sx3va
+.section sx3vb
+.section sx3wa
+.section sx3wb
+.section sx3xa
+.section sx3xb
+.section sx3ya
+.section sx3yb
+.section sx3za
+.section sx3zb
+.section sx31a
+.section sx31b
+.section sx32a
+.section sx32b
+.section sx33a
+.section sx33b
+.section sx34a
+.section sx34b
+.section sx35a
+.section sx35b
+.section sx36a
+.section sx36b
+.section sx37a
+.section sx37b
+.section sx38a
+.section sx38b
+.section sx39a
+.section sx39b
+.section sx30a
+.section sx30b
+.section sx4aa
+.section sx4ab
+.section sx4ba
+.section sx4bb
+.section sx4ca
+.section sx4cb
+.section sx4da
+.section sx4db
+.section sx4ea
+.section sx4eb
+.section sx4fa
+.section sx4fb
+.section sx4ga
+.section sx4gb
+.section sx4ha
+.section sx4hb
+.section sx4ia
+.section sx4ib
+.section sx4ja
+.section sx4jb
+.section sx4ka
+.section sx4kb
+.section sx4la
+.section sx4lb
+.section sx4ma
+.section sx4mb
+.section sx4na
+.section sx4nb
+.section sx4oa
+.section sx4ob
+.section sx4pa
+.section sx4pb
+.section sx4qa
+.section sx4qb
+.section sx4ra
+.section sx4rb
+.section sx4sa
+.section sx4sb
+.section sx4ta
+.section sx4tb
+.section sx4ua
+.section sx4ub
+.section sx4va
+.section sx4vb
+.section sx4wa
+.section sx4wb
+.section sx4xa
+.section sx4xb
+.section sx4ya
+.section sx4yb
+.section sx4za
+.section sx4zb
+.section sx41a
+.section sx41b
+.section sx42a
+.section sx42b
+.section sx43a
+.section sx43b
+.section sx44a
+.section sx44b
+.section sx45a
+.section sx45b
+.section sx46a
+.section sx46b
+.section sx47a
+.section sx47b
+.section sx48a
+.section sx48b
+.section sx49a
+.section sx49b
+.section sx40a
+.section sx40b
+.section sx5aa
+.section sx5ab
+.section sx5ba
+.section sx5bb
+.section sx5ca
+.section sx5cb
+.section sx5da
+.section sx5db
+.section sx5ea
+.section sx5eb
+.section sx5fa
+.section sx5fb
+.section sx5ga
+.section sx5gb
+.section sx5ha
+.section sx5hb
+.section sx5ia
+.section sx5ib
+.section sx5ja
+.section sx5jb
+.section sx5ka
+.section sx5kb
+.section sx5la
+.section sx5lb
+.section sx5ma
+.section sx5mb
+.section sx5na
+.section sx5nb
+.section sx5oa
+.section sx5ob
+.section sx5pa
+.section sx5pb
+.section sx5qa
+.section sx5qb
+.section sx5ra
+.section sx5rb
+.section sx5sa
+.section sx5sb
+.section sx5ta
+.section sx5tb
+.section sx5ua
+.section sx5ub
+.section sx5va
+.section sx5vb
+.section sx5wa
+.section sx5wb
+.section sx5xa
+.section sx5xb
+.section sx5ya
+.section sx5yb
+.section sx5za
+.section sx5zb
+.section sx51a
+.section sx51b
+.section sx52a
+.section sx52b
+.section sx53a
+.section sx53b
+.section sx54a
+.section sx54b
+.section sx55a
+.section sx55b
+.section sx56a
+.section sx56b
+.section sx57a
+.section sx57b
+.section sx58a
+.section sx58b
+.section sx59a
+.section sx59b
+.section sx50a
+.section sx50b
+.section sx6aa
+.section sx6ab
+.section sx6ba
+.section sx6bb
+.section sx6ca
+.section sx6cb
+.section sx6da
+.section sx6db
+.section sx6ea
+.section sx6eb
+.section sx6fa
+.section sx6fb
+.section sx6ga
+.section sx6gb
+.section sx6ha
+.section sx6hb
+.section sx6ia
+.section sx6ib
+.section sx6ja
+.section sx6jb
+.section sx6ka
+.section sx6kb
+.section sx6la
+.section sx6lb
+.section sx6ma
+.section sx6mb
+.section sx6na
+.section sx6nb
+.section sx6oa
+.section sx6ob
+.section sx6pa
+.section sx6pb
+.section sx6qa
+.section sx6qb
+.section sx6ra
+.section sx6rb
+.section sx6sa
+.section sx6sb
+.section sx6ta
+.section sx6tb
+.section sx6ua
+.section sx6ub
+.section sx6va
+.section sx6vb
+.section sx6wa
+.section sx6wb
+.section sx6xa
+.section sx6xb
+.section sx6ya
+.section sx6yb
+.section sx6za
+.section sx6zb
+.section sx61a
+.section sx61b
+.section sx62a
+.section sx62b
+.section sx63a
+.section sx63b
+.section sx64a
+.section sx64b
+.section sx65a
+.section sx65b
+.section sx66a
+.section sx66b
+.section sx67a
+.section sx67b
+.section sx68a
+.section sx68b
+.section sx69a
+.section sx69b
+.section sx60a
+.section sx60b
+.section sx7aa
+.section sx7ab
+.section sx7ba
+.section sx7bb
+.section sx7ca
+.section sx7cb
+.section sx7da
+.section sx7db
+.section sx7ea
+.section sx7eb
+.section sx7fa
+.section sx7fb
+.section sx7ga
+.section sx7gb
+.section sx7ha
+.section sx7hb
+.section sx7ia
+.section sx7ib
+.section sx7ja
+.section sx7jb
+.section sx7ka
+.section sx7kb
+.section sx7la
+.section sx7lb
+.section sx7ma
+.section sx7mb
+.section sx7na
+.section sx7nb
+.section sx7oa
+.section sx7ob
+.section sx7pa
+.section sx7pb
+.section sx7qa
+.section sx7qb
+.section sx7ra
+.section sx7rb
+.section sx7sa
+.section sx7sb
+.section sx7ta
+.section sx7tb
+.section sx7ua
+.section sx7ub
+.section sx7va
+.section sx7vb
+.section sx7wa
+.section sx7wb
+.section sx7xa
+.section sx7xb
+.section sx7ya
+.section sx7yb
+.section sx7za
+.section sx7zb
+.section sx71a
+.section sx71b
+.section sx72a
+.section sx72b
+.section sx73a
+.section sx73b
+.section sx74a
+.section sx74b
+.section sx75a
+.section sx75b
+.section sx76a
+.section sx76b
+.section sx77a
+.section sx77b
+.section sx78a
+.section sx78b
+.section sx79a
+.section sx79b
+.section sx70a
+.section sx70b
+.section sx8aa
+.section sx8ab
+.section sx8ba
+.section sx8bb
+.section sx8ca
+.section sx8cb
+.section sx8da
+.section sx8db
+.section sx8ea
+.section sx8eb
+.section sx8fa
+.section sx8fb
+.section sx8ga
+.section sx8gb
+.section sx8ha
+.section sx8hb
+.section sx8ia
+.section sx8ib
+.section sx8ja
+.section sx8jb
+.section sx8ka
+.section sx8kb
+.section sx8la
+.section sx8lb
+.section sx8ma
+.section sx8mb
+.section sx8na
+.section sx8nb
+.section sx8oa
+.section sx8ob
+.section sx8pa
+.section sx8pb
+.section sx8qa
+.section sx8qb
+.section sx8ra
+.section sx8rb
+.section sx8sa
+.section sx8sb
+.section sx8ta
+.section sx8tb
+.section sx8ua
+.section sx8ub
+.section sx8va
+.section sx8vb
+.section sx8wa
+.section sx8wb
+.section sx8xa
+.section sx8xb
+.section sx8ya
+.section sx8yb
+.section sx8za
+.section sx8zb
+.section sx81a
+.section sx81b
+.section sx82a
+.section sx82b
+.section sx83a
+.section sx83b
+.section sx84a
+.section sx84b
+.section sx85a
+.section sx85b
+.section sx86a
+.section sx86b
+.section sx87a
+.section sx87b
+.section sx88a
+.section sx88b
+.section sx89a
+.section sx89b
+.section sx80a
+.section sx80b
+.section sx9aa
+.section sx9ab
+.section sx9ba
+.section sx9bb
+.section sx9ca
+.section sx9cb
+.section sx9da
+.section sx9db
+.section sx9ea
+.section sx9eb
+.section sx9fa
+.section sx9fb
+.section sx9ga
+.section sx9gb
+.section sx9ha
+.section sx9hb
+.section sx9ia
+.section sx9ib
+.section sx9ja
+.section sx9jb
+.section sx9ka
+.section sx9kb
+.section sx9la
+.section sx9lb
+.section sx9ma
+.section sx9mb
+.section sx9na
+.section sx9nb
+.section sx9oa
+.section sx9ob
+.section sx9pa
+.section sx9pb
+.section sx9qa
+.section sx9qb
+.section sx9ra
+.section sx9rb
+.section sx9sa
+.section sx9sb
+.section sx9ta
+.section sx9tb
+.section sx9ua
+.section sx9ub
+.section sx9va
+.section sx9vb
+.section sx9wa
+.section sx9wb
+.section sx9xa
+.section sx9xb
+.section sx9ya
+.section sx9yb
+.section sx9za
+.section sx9zb
+.section sx91a
+.section sx91b
+.section sx92a
+.section sx92b
+.section sx93a
+.section sx93b
+.section sx94a
+.section sx94b
+.section sx95a
+.section sx95b
+.section sx96a
+.section sx96b
+.section sx97a
+.section sx97b
+.section sx98a
+.section sx98b
+.section sx99a
+.section sx99b
+.section sx90a
+.section sx90b
+.section sx0aa
+.section sx0ab
+.section sx0ba
+.section sx0bb
+.section sx0ca
+.section sx0cb
+.section sx0da
+.section sx0db
+.section sx0ea
+.section sx0eb
+.section sx0fa
+.section sx0fb
+.section sx0ga
+.section sx0gb
+.section sx0ha
+.section sx0hb
+.section sx0ia
+.section sx0ib
+.section sx0ja
+.section sx0jb
+.section sx0ka
+.section sx0kb
+.section sx0la
+.section sx0lb
+.section sx0ma
+.section sx0mb
+.section sx0na
+.section sx0nb
+.section sx0oa
+.section sx0ob
+.section sx0pa
+.section sx0pb
+.section sx0qa
+.section sx0qb
+.section sx0ra
+.section sx0rb
+.section sx0sa
+.section sx0sb
+.section sx0ta
+.section sx0tb
+.section sx0ua
+.section sx0ub
+.section sx0va
+.section sx0vb
+.section sx0wa
+.section sx0wb
+.section sx0xa
+.section sx0xb
+.section sx0ya
+.section sx0yb
+.section sx0za
+.section sx0zb
+.section sx01a
+.section sx01b
+.section sx02a
+.section sx02b
+.section sx03a
+.section sx03b
+.section sx04a
+.section sx04b
+.section sx05a
+.section sx05b
+.section sx06a
+.section sx06b
+.section sx07a
+.section sx07b
+.section sx08a
+.section sx08b
+.section sx09a
+.section sx09b
+.section sx00a
+.section sx00b
+.section syaaa
+.section syaab
+.section syaba
+.section syabb
+.section syaca
+.section syacb
+.section syada
+.section syadb
+.section syaea
+.section syaeb
+.section syafa
+.section syafb
+.section syaga
+.section syagb
+.section syaha
+.section syahb
+.section syaia
+.section syaib
+.section syaja
+.section syajb
+.section syaka
+.section syakb
+.section syala
+.section syalb
+.section syama
+.section syamb
+.section syana
+.section syanb
+.section syaoa
+.section syaob
+.section syapa
+.section syapb
+.section syaqa
+.section syaqb
+.section syara
+.section syarb
+.section syasa
+.section syasb
+.section syata
+.section syatb
+.section syaua
+.section syaub
+.section syava
+.section syavb
+.section syawa
+.section syawb
+.section syaxa
+.section syaxb
+.section syaya
+.section syayb
+.section syaza
+.section syazb
+.section sya1a
+.section sya1b
+.section sya2a
+.section sya2b
+.section sya3a
+.section sya3b
+.section sya4a
+.section sya4b
+.section sya5a
+.section sya5b
+.section sya6a
+.section sya6b
+.section sya7a
+.section sya7b
+.section sya8a
+.section sya8b
+.section sya9a
+.section sya9b
+.section sya0a
+.section sya0b
+.section sybaa
+.section sybab
+.section sybba
+.section sybbb
+.section sybca
+.section sybcb
+.section sybda
+.section sybdb
+.section sybea
+.section sybeb
+.section sybfa
+.section sybfb
+.section sybga
+.section sybgb
+.section sybha
+.section sybhb
+.section sybia
+.section sybib
+.section sybja
+.section sybjb
+.section sybka
+.section sybkb
+.section sybla
+.section syblb
+.section sybma
+.section sybmb
+.section sybna
+.section sybnb
+.section syboa
+.section sybob
+.section sybpa
+.section sybpb
+.section sybqa
+.section sybqb
+.section sybra
+.section sybrb
+.section sybsa
+.section sybsb
+.section sybta
+.section sybtb
+.section sybua
+.section sybub
+.section sybva
+.section sybvb
+.section sybwa
+.section sybwb
+.section sybxa
+.section sybxb
+.section sybya
+.section sybyb
+.section sybza
+.section sybzb
+.section syb1a
+.section syb1b
+.section syb2a
+.section syb2b
+.section syb3a
+.section syb3b
+.section syb4a
+.section syb4b
+.section syb5a
+.section syb5b
+.section syb6a
+.section syb6b
+.section syb7a
+.section syb7b
+.section syb8a
+.section syb8b
+.section syb9a
+.section syb9b
+.section syb0a
+.section syb0b
+.section sycaa
+.section sycab
+.section sycba
+.section sycbb
+.section sycca
+.section syccb
+.section sycda
+.section sycdb
+.section sycea
+.section syceb
+.section sycfa
+.section sycfb
+.section sycga
+.section sycgb
+.section sycha
+.section sychb
+.section sycia
+.section sycib
+.section sycja
+.section sycjb
+.section sycka
+.section syckb
+.section sycla
+.section syclb
+.section sycma
+.section sycmb
+.section sycna
+.section sycnb
+.section sycoa
+.section sycob
+.section sycpa
+.section sycpb
+.section sycqa
+.section sycqb
+.section sycra
+.section sycrb
+.section sycsa
+.section sycsb
+.section sycta
+.section syctb
+.section sycua
+.section sycub
+.section sycva
+.section sycvb
+.section sycwa
+.section sycwb
+.section sycxa
+.section sycxb
+.section sycya
+.section sycyb
+.section sycza
+.section syczb
+.section syc1a
+.section syc1b
+.section syc2a
+.section syc2b
+.section syc3a
+.section syc3b
+.section syc4a
+.section syc4b
+.section syc5a
+.section syc5b
+.section syc6a
+.section syc6b
+.section syc7a
+.section syc7b
+.section syc8a
+.section syc8b
+.section syc9a
+.section syc9b
+.section syc0a
+.section syc0b
+.section sydaa
+.section sydab
+.section sydba
+.section sydbb
+.section sydca
+.section sydcb
+.section sydda
+.section syddb
+.section sydea
+.section sydeb
+.section sydfa
+.section sydfb
+.section sydga
+.section sydgb
+.section sydha
+.section sydhb
+.section sydia
+.section sydib
+.section sydja
+.section sydjb
+.section sydka
+.section sydkb
+.section sydla
+.section sydlb
+.section sydma
+.section sydmb
+.section sydna
+.section sydnb
+.section sydoa
+.section sydob
+.section sydpa
+.section sydpb
+.section sydqa
+.section sydqb
+.section sydra
+.section sydrb
+.section sydsa
+.section sydsb
+.section sydta
+.section sydtb
+.section sydua
+.section sydub
+.section sydva
+.section sydvb
+.section sydwa
+.section sydwb
+.section sydxa
+.section sydxb
+.section sydya
+.section sydyb
+.section sydza
+.section sydzb
+.section syd1a
+.section syd1b
+.section syd2a
+.section syd2b
+.section syd3a
+.section syd3b
+.section syd4a
+.section syd4b
+.section syd5a
+.section syd5b
+.section syd6a
+.section syd6b
+.section syd7a
+.section syd7b
+.section syd8a
+.section syd8b
+.section syd9a
+.section syd9b
+.section syd0a
+.section syd0b
+.section syeaa
+.section syeab
+.section syeba
+.section syebb
+.section syeca
+.section syecb
+.section syeda
+.section syedb
+.section syeea
+.section syeeb
+.section syefa
+.section syefb
+.section syega
+.section syegb
+.section syeha
+.section syehb
+.section syeia
+.section syeib
+.section syeja
+.section syejb
+.section syeka
+.section syekb
+.section syela
+.section syelb
+.section syema
+.section syemb
+.section syena
+.section syenb
+.section syeoa
+.section syeob
+.section syepa
+.section syepb
+.section syeqa
+.section syeqb
+.section syera
+.section syerb
+.section syesa
+.section syesb
+.section syeta
+.section syetb
+.section syeua
+.section syeub
+.section syeva
+.section syevb
+.section syewa
+.section syewb
+.section syexa
+.section syexb
+.section syeya
+.section syeyb
+.section syeza
+.section syezb
+.section sye1a
+.section sye1b
+.section sye2a
+.section sye2b
+.section sye3a
+.section sye3b
+.section sye4a
+.section sye4b
+.section sye5a
+.section sye5b
+.section sye6a
+.section sye6b
+.section sye7a
+.section sye7b
+.section sye8a
+.section sye8b
+.section sye9a
+.section sye9b
+.section sye0a
+.section sye0b
+.section syfaa
+.section syfab
+.section syfba
+.section syfbb
+.section syfca
+.section syfcb
+.section syfda
+.section syfdb
+.section syfea
+.section syfeb
+.section syffa
+.section syffb
+.section syfga
+.section syfgb
+.section syfha
+.section syfhb
+.section syfia
+.section syfib
+.section syfja
+.section syfjb
+.section syfka
+.section syfkb
+.section syfla
+.section syflb
+.section syfma
+.section syfmb
+.section syfna
+.section syfnb
+.section syfoa
+.section syfob
+.section syfpa
+.section syfpb
+.section syfqa
+.section syfqb
+.section syfra
+.section syfrb
+.section syfsa
+.section syfsb
+.section syfta
+.section syftb
+.section syfua
+.section syfub
+.section syfva
+.section syfvb
+.section syfwa
+.section syfwb
+.section syfxa
+.section syfxb
+.section syfya
+.section syfyb
+.section syfza
+.section syfzb
+.section syf1a
+.section syf1b
+.section syf2a
+.section syf2b
+.section syf3a
+.section syf3b
+.section syf4a
+.section syf4b
+.section syf5a
+.section syf5b
+.section syf6a
+.section syf6b
+.section syf7a
+.section syf7b
+.section syf8a
+.section syf8b
+.section syf9a
+.section syf9b
+.section syf0a
+.section syf0b
+.section sygaa
+.section sygab
+.section sygba
+.section sygbb
+.section sygca
+.section sygcb
+.section sygda
+.section sygdb
+.section sygea
+.section sygeb
+.section sygfa
+.section sygfb
+.section sygga
+.section syggb
+.section sygha
+.section syghb
+.section sygia
+.section sygib
+.section sygja
+.section sygjb
+.section sygka
+.section sygkb
+.section sygla
+.section syglb
+.section sygma
+.section sygmb
+.section sygna
+.section sygnb
+.section sygoa
+.section sygob
+.section sygpa
+.section sygpb
+.section sygqa
+.section sygqb
+.section sygra
+.section sygrb
+.section sygsa
+.section sygsb
+.section sygta
+.section sygtb
+.section sygua
+.section sygub
+.section sygva
+.section sygvb
+.section sygwa
+.section sygwb
+.section sygxa
+.section sygxb
+.section sygya
+.section sygyb
+.section sygza
+.section sygzb
+.section syg1a
+.section syg1b
+.section syg2a
+.section syg2b
+.section syg3a
+.section syg3b
+.section syg4a
+.section syg4b
+.section syg5a
+.section syg5b
+.section syg6a
+.section syg6b
+.section syg7a
+.section syg7b
+.section syg8a
+.section syg8b
+.section syg9a
+.section syg9b
+.section syg0a
+.section syg0b
+.section syhaa
+.section syhab
+.section syhba
+.section syhbb
+.section syhca
+.section syhcb
+.section syhda
+.section syhdb
+.section syhea
+.section syheb
+.section syhfa
+.section syhfb
+.section syhga
+.section syhgb
+.section syhha
+.section syhhb
+.section syhia
+.section syhib
+.section syhja
+.section syhjb
+.section syhka
+.section syhkb
+.section syhla
+.section syhlb
+.section syhma
+.section syhmb
+.section syhna
+.section syhnb
+.section syhoa
+.section syhob
+.section syhpa
+.section syhpb
+.section syhqa
+.section syhqb
+.section syhra
+.section syhrb
+.section syhsa
+.section syhsb
+.section syhta
+.section syhtb
+.section syhua
+.section syhub
+.section syhva
+.section syhvb
+.section syhwa
+.section syhwb
+.section syhxa
+.section syhxb
+.section syhya
+.section syhyb
+.section syhza
+.section syhzb
+.section syh1a
+.section syh1b
+.section syh2a
+.section syh2b
+.section syh3a
+.section syh3b
+.section syh4a
+.section syh4b
+.section syh5a
+.section syh5b
+.section syh6a
+.section syh6b
+.section syh7a
+.section syh7b
+.section syh8a
+.section syh8b
+.section syh9a
+.section syh9b
+.section syh0a
+.section syh0b
+.section syiaa
+.section syiab
+.section syiba
+.section syibb
+.section syica
+.section syicb
+.section syida
+.section syidb
+.section syiea
+.section syieb
+.section syifa
+.section syifb
+.section syiga
+.section syigb
+.section syiha
+.section syihb
+.section syiia
+.section syiib
+.section syija
+.section syijb
+.section syika
+.section syikb
+.section syila
+.section syilb
+.section syima
+.section syimb
+.section syina
+.section syinb
+.section syioa
+.section syiob
+.section syipa
+.section syipb
+.section syiqa
+.section syiqb
+.section syira
+.section syirb
+.section syisa
+.section syisb
+.section syita
+.section syitb
+.section syiua
+.section syiub
+.section syiva
+.section syivb
+.section syiwa
+.section syiwb
+.section syixa
+.section syixb
+.section syiya
+.section syiyb
+.section syiza
+.section syizb
+.section syi1a
+.section syi1b
+.section syi2a
+.section syi2b
+.section syi3a
+.section syi3b
+.section syi4a
+.section syi4b
+.section syi5a
+.section syi5b
+.section syi6a
+.section syi6b
+.section syi7a
+.section syi7b
+.section syi8a
+.section syi8b
+.section syi9a
+.section syi9b
+.section syi0a
+.section syi0b
+.section syjaa
+.section syjab
+.section syjba
+.section syjbb
+.section syjca
+.section syjcb
+.section syjda
+.section syjdb
+.section syjea
+.section syjeb
+.section syjfa
+.section syjfb
+.section syjga
+.section syjgb
+.section syjha
+.section syjhb
+.section syjia
+.section syjib
+.section syjja
+.section syjjb
+.section syjka
+.section syjkb
+.section syjla
+.section syjlb
+.section syjma
+.section syjmb
+.section syjna
+.section syjnb
+.section syjoa
+.section syjob
+.section syjpa
+.section syjpb
+.section syjqa
+.section syjqb
+.section syjra
+.section syjrb
+.section syjsa
+.section syjsb
+.section syjta
+.section syjtb
+.section syjua
+.section syjub
+.section syjva
+.section syjvb
+.section syjwa
+.section syjwb
+.section syjxa
+.section syjxb
+.section syjya
+.section syjyb
+.section syjza
+.section syjzb
+.section syj1a
+.section syj1b
+.section syj2a
+.section syj2b
+.section syj3a
+.section syj3b
+.section syj4a
+.section syj4b
+.section syj5a
+.section syj5b
+.section syj6a
+.section syj6b
+.section syj7a
+.section syj7b
+.section syj8a
+.section syj8b
+.section syj9a
+.section syj9b
+.section syj0a
+.section syj0b
+.section sykaa
+.section sykab
+.section sykba
+.section sykbb
+.section sykca
+.section sykcb
+.section sykda
+.section sykdb
+.section sykea
+.section sykeb
+.section sykfa
+.section sykfb
+.section sykga
+.section sykgb
+.section sykha
+.section sykhb
+.section sykia
+.section sykib
+.section sykja
+.section sykjb
+.section sykka
+.section sykkb
+.section sykla
+.section syklb
+.section sykma
+.section sykmb
+.section sykna
+.section syknb
+.section sykoa
+.section sykob
+.section sykpa
+.section sykpb
+.section sykqa
+.section sykqb
+.section sykra
+.section sykrb
+.section syksa
+.section syksb
+.section sykta
+.section syktb
+.section sykua
+.section sykub
+.section sykva
+.section sykvb
+.section sykwa
+.section sykwb
+.section sykxa
+.section sykxb
+.section sykya
+.section sykyb
+.section sykza
+.section sykzb
+.section syk1a
+.section syk1b
+.section syk2a
+.section syk2b
+.section syk3a
+.section syk3b
+.section syk4a
+.section syk4b
+.section syk5a
+.section syk5b
+.section syk6a
+.section syk6b
+.section syk7a
+.section syk7b
+.section syk8a
+.section syk8b
+.section syk9a
+.section syk9b
+.section syk0a
+.section syk0b
+.section sylaa
+.section sylab
+.section sylba
+.section sylbb
+.section sylca
+.section sylcb
+.section sylda
+.section syldb
+.section sylea
+.section syleb
+.section sylfa
+.section sylfb
+.section sylga
+.section sylgb
+.section sylha
+.section sylhb
+.section sylia
+.section sylib
+.section sylja
+.section syljb
+.section sylka
+.section sylkb
+.section sylla
+.section syllb
+.section sylma
+.section sylmb
+.section sylna
+.section sylnb
+.section syloa
+.section sylob
+.section sylpa
+.section sylpb
+.section sylqa
+.section sylqb
+.section sylra
+.section sylrb
+.section sylsa
+.section sylsb
+.section sylta
+.section syltb
+.section sylua
+.section sylub
+.section sylva
+.section sylvb
+.section sylwa
+.section sylwb
+.section sylxa
+.section sylxb
+.section sylya
+.section sylyb
+.section sylza
+.section sylzb
+.section syl1a
+.section syl1b
+.section syl2a
+.section syl2b
+.section syl3a
+.section syl3b
+.section syl4a
+.section syl4b
+.section syl5a
+.section syl5b
+.section syl6a
+.section syl6b
+.section syl7a
+.section syl7b
+.section syl8a
+.section syl8b
+.section syl9a
+.section syl9b
+.section syl0a
+.section syl0b
+.section symaa
+.section symab
+.section symba
+.section symbb
+.section symca
+.section symcb
+.section symda
+.section symdb
+.section symea
+.section symeb
+.section symfa
+.section symfb
+.section symga
+.section symgb
+.section symha
+.section symhb
+.section symia
+.section symib
+.section symja
+.section symjb
+.section symka
+.section symkb
+.section symla
+.section symlb
+.section symma
+.section symmb
+.section symna
+.section symnb
+.section symoa
+.section symob
+.section sympa
+.section sympb
+.section symqa
+.section symqb
+.section symra
+.section symrb
+.section symsa
+.section symsb
+.section symta
+.section symtb
+.section symua
+.section symub
+.section symva
+.section symvb
+.section symwa
+.section symwb
+.section symxa
+.section symxb
+.section symya
+.section symyb
+.section symza
+.section symzb
+.section sym1a
+.section sym1b
+.section sym2a
+.section sym2b
+.section sym3a
+.section sym3b
+.section sym4a
+.section sym4b
+.section sym5a
+.section sym5b
+.section sym6a
+.section sym6b
+.section sym7a
+.section sym7b
+.section sym8a
+.section sym8b
+.section sym9a
+.section sym9b
+.section sym0a
+.section sym0b
+.section synaa
+.section synab
+.section synba
+.section synbb
+.section synca
+.section syncb
+.section synda
+.section syndb
+.section synea
+.section syneb
+.section synfa
+.section synfb
+.section synga
+.section syngb
+.section synha
+.section synhb
+.section synia
+.section synib
+.section synja
+.section synjb
+.section synka
+.section synkb
+.section synla
+.section synlb
+.section synma
+.section synmb
+.section synna
+.section synnb
+.section synoa
+.section synob
+.section synpa
+.section synpb
+.section synqa
+.section synqb
+.section synra
+.section synrb
+.section synsa
+.section synsb
+.section synta
+.section syntb
+.section synua
+.section synub
+.section synva
+.section synvb
+.section synwa
+.section synwb
+.section synxa
+.section synxb
+.section synya
+.section synyb
+.section synza
+.section synzb
+.section syn1a
+.section syn1b
+.section syn2a
+.section syn2b
+.section syn3a
+.section syn3b
+.section syn4a
+.section syn4b
+.section syn5a
+.section syn5b
+.section syn6a
+.section syn6b
+.section syn7a
+.section syn7b
+.section syn8a
+.section syn8b
+.section syn9a
+.section syn9b
+.section syn0a
+.section syn0b
+.section syoaa
+.section syoab
+.section syoba
+.section syobb
+.section syoca
+.section syocb
+.section syoda
+.section syodb
+.section syoea
+.section syoeb
+.section syofa
+.section syofb
+.section syoga
+.section syogb
+.section syoha
+.section syohb
+.section syoia
+.section syoib
+.section syoja
+.section syojb
+.section syoka
+.section syokb
+.section syola
+.section syolb
+.section syoma
+.section syomb
+.section syona
+.section syonb
+.section syooa
+.section syoob
+.section syopa
+.section syopb
+.section syoqa
+.section syoqb
+.section syora
+.section syorb
+.section syosa
+.section syosb
+.section syota
+.section syotb
+.section syoua
+.section syoub
+.section syova
+.section syovb
+.section syowa
+.section syowb
+.section syoxa
+.section syoxb
+.section syoya
+.section syoyb
+.section syoza
+.section syozb
+.section syo1a
+.section syo1b
+.section syo2a
+.section syo2b
+.section syo3a
+.section syo3b
+.section syo4a
+.section syo4b
+.section syo5a
+.section syo5b
+.section syo6a
+.section syo6b
+.section syo7a
+.section syo7b
+.section syo8a
+.section syo8b
+.section syo9a
+.section syo9b
+.section syo0a
+.section syo0b
+.section sypaa
+.section sypab
+.section sypba
+.section sypbb
+.section sypca
+.section sypcb
+.section sypda
+.section sypdb
+.section sypea
+.section sypeb
+.section sypfa
+.section sypfb
+.section sypga
+.section sypgb
+.section sypha
+.section syphb
+.section sypia
+.section sypib
+.section sypja
+.section sypjb
+.section sypka
+.section sypkb
+.section sypla
+.section syplb
+.section sypma
+.section sypmb
+.section sypna
+.section sypnb
+.section sypoa
+.section sypob
+.section syppa
+.section syppb
+.section sypqa
+.section sypqb
+.section sypra
+.section syprb
+.section sypsa
+.section sypsb
+.section sypta
+.section syptb
+.section sypua
+.section sypub
+.section sypva
+.section sypvb
+.section sypwa
+.section sypwb
+.section sypxa
+.section sypxb
+.section sypya
+.section sypyb
+.section sypza
+.section sypzb
+.section syp1a
+.section syp1b
+.section syp2a
+.section syp2b
+.section syp3a
+.section syp3b
+.section syp4a
+.section syp4b
+.section syp5a
+.section syp5b
+.section syp6a
+.section syp6b
+.section syp7a
+.section syp7b
+.section syp8a
+.section syp8b
+.section syp9a
+.section syp9b
+.section syp0a
+.section syp0b
+.section syqaa
+.section syqab
+.section syqba
+.section syqbb
+.section syqca
+.section syqcb
+.section syqda
+.section syqdb
+.section syqea
+.section syqeb
+.section syqfa
+.section syqfb
+.section syqga
+.section syqgb
+.section syqha
+.section syqhb
+.section syqia
+.section syqib
+.section syqja
+.section syqjb
+.section syqka
+.section syqkb
+.section syqla
+.section syqlb
+.section syqma
+.section syqmb
+.section syqna
+.section syqnb
+.section syqoa
+.section syqob
+.section syqpa
+.section syqpb
+.section syqqa
+.section syqqb
+.section syqra
+.section syqrb
+.section syqsa
+.section syqsb
+.section syqta
+.section syqtb
+.section syqua
+.section syqub
+.section syqva
+.section syqvb
+.section syqwa
+.section syqwb
+.section syqxa
+.section syqxb
+.section syqya
+.section syqyb
+.section syqza
+.section syqzb
+.section syq1a
+.section syq1b
+.section syq2a
+.section syq2b
+.section syq3a
+.section syq3b
+.section syq4a
+.section syq4b
+.section syq5a
+.section syq5b
+.section syq6a
+.section syq6b
+.section syq7a
+.section syq7b
+.section syq8a
+.section syq8b
+.section syq9a
+.section syq9b
+.section syq0a
+.section syq0b
+.section syraa
+.section syrab
+.section syrba
+.section syrbb
+.section syrca
+.section syrcb
+.section syrda
+.section syrdb
+.section syrea
+.section syreb
+.section syrfa
+.section syrfb
+.section syrga
+.section syrgb
+.section syrha
+.section syrhb
+.section syria
+.section syrib
+.section syrja
+.section syrjb
+.section syrka
+.section syrkb
+.section syrla
+.section syrlb
+.section syrma
+.section syrmb
+.section syrna
+.section syrnb
+.section syroa
+.section syrob
+.section syrpa
+.section syrpb
+.section syrqa
+.section syrqb
+.section syrra
+.section syrrb
+.section syrsa
+.section syrsb
+.section syrta
+.section syrtb
+.section syrua
+.section syrub
+.section syrva
+.section syrvb
+.section syrwa
+.section syrwb
+.section syrxa
+.section syrxb
+.section syrya
+.section syryb
+.section syrza
+.section syrzb
+.section syr1a
+.section syr1b
+.section syr2a
+.section syr2b
+.section syr3a
+.section syr3b
+.section syr4a
+.section syr4b
+.section syr5a
+.section syr5b
+.section syr6a
+.section syr6b
+.section syr7a
+.section syr7b
+.section syr8a
+.section syr8b
+.section syr9a
+.section syr9b
+.section syr0a
+.section syr0b
+.section sysaa
+.section sysab
+.section sysba
+.section sysbb
+.section sysca
+.section syscb
+.section sysda
+.section sysdb
+.section sysea
+.section syseb
+.section sysfa
+.section sysfb
+.section sysga
+.section sysgb
+.section sysha
+.section syshb
+.section sysia
+.section sysib
+.section sysja
+.section sysjb
+.section syska
+.section syskb
+.section sysla
+.section syslb
+.section sysma
+.section sysmb
+.section sysna
+.section sysnb
+.section sysoa
+.section sysob
+.section syspa
+.section syspb
+.section sysqa
+.section sysqb
+.section sysra
+.section sysrb
+.section syssa
+.section syssb
+.section systa
+.section systb
+.section sysua
+.section sysub
+.section sysva
+.section sysvb
+.section syswa
+.section syswb
+.section sysxa
+.section sysxb
+.section sysya
+.section sysyb
+.section sysza
+.section syszb
+.section sys1a
+.section sys1b
+.section sys2a
+.section sys2b
+.section sys3a
+.section sys3b
+.section sys4a
+.section sys4b
+.section sys5a
+.section sys5b
+.section sys6a
+.section sys6b
+.section sys7a
+.section sys7b
+.section sys8a
+.section sys8b
+.section sys9a
+.section sys9b
+.section sys0a
+.section sys0b
+.section sytaa
+.section sytab
+.section sytba
+.section sytbb
+.section sytca
+.section sytcb
+.section sytda
+.section sytdb
+.section sytea
+.section syteb
+.section sytfa
+.section sytfb
+.section sytga
+.section sytgb
+.section sytha
+.section sythb
+.section sytia
+.section sytib
+.section sytja
+.section sytjb
+.section sytka
+.section sytkb
+.section sytla
+.section sytlb
+.section sytma
+.section sytmb
+.section sytna
+.section sytnb
+.section sytoa
+.section sytob
+.section sytpa
+.section sytpb
+.section sytqa
+.section sytqb
+.section sytra
+.section sytrb
+.section sytsa
+.section sytsb
+.section sytta
+.section syttb
+.section sytua
+.section sytub
+.section sytva
+.section sytvb
+.section sytwa
+.section sytwb
+.section sytxa
+.section sytxb
+.section sytya
+.section sytyb
+.section sytza
+.section sytzb
+.section syt1a
+.section syt1b
+.section syt2a
+.section syt2b
+.section syt3a
+.section syt3b
+.section syt4a
+.section syt4b
+.section syt5a
+.section syt5b
+.section syt6a
+.section syt6b
+.section syt7a
+.section syt7b
+.section syt8a
+.section syt8b
+.section syt9a
+.section syt9b
+.section syt0a
+.section syt0b
+.section syuaa
+.section syuab
+.section syuba
+.section syubb
+.section syuca
+.section syucb
+.section syuda
+.section syudb
+.section syuea
+.section syueb
+.section syufa
+.section syufb
+.section syuga
+.section syugb
+.section syuha
+.section syuhb
+.section syuia
+.section syuib
+.section syuja
+.section syujb
+.section syuka
+.section syukb
+.section syula
+.section syulb
+.section syuma
+.section syumb
+.section syuna
+.section syunb
+.section syuoa
+.section syuob
+.section syupa
+.section syupb
+.section syuqa
+.section syuqb
+.section syura
+.section syurb
+.section syusa
+.section syusb
+.section syuta
+.section syutb
+.section syuua
+.section syuub
+.section syuva
+.section syuvb
+.section syuwa
+.section syuwb
+.section syuxa
+.section syuxb
+.section syuya
+.section syuyb
+.section syuza
+.section syuzb
+.section syu1a
+.section syu1b
+.section syu2a
+.section syu2b
+.section syu3a
+.section syu3b
+.section syu4a
+.section syu4b
+.section syu5a
+.section syu5b
+.section syu6a
+.section syu6b
+.section syu7a
+.section syu7b
+.section syu8a
+.section syu8b
+.section syu9a
+.section syu9b
+.section syu0a
+.section syu0b
+.section syvaa
+.section syvab
+.section syvba
+.section syvbb
+.section syvca
+.section syvcb
+.section syvda
+.section syvdb
+.section syvea
+.section syveb
+.section syvfa
+.section syvfb
+.section syvga
+.section syvgb
+.section syvha
+.section syvhb
+.section syvia
+.section syvib
+.section syvja
+.section syvjb
+.section syvka
+.section syvkb
+.section syvla
+.section syvlb
+.section syvma
+.section syvmb
+.section syvna
+.section syvnb
+.section syvoa
+.section syvob
+.section syvpa
+.section syvpb
+.section syvqa
+.section syvqb
+.section syvra
+.section syvrb
+.section syvsa
+.section syvsb
+.section syvta
+.section syvtb
+.section syvua
+.section syvub
+.section syvva
+.section syvvb
+.section syvwa
+.section syvwb
+.section syvxa
+.section syvxb
+.section syvya
+.section syvyb
+.section syvza
+.section syvzb
+.section syv1a
+.section syv1b
+.section syv2a
+.section syv2b
+.section syv3a
+.section syv3b
+.section syv4a
+.section syv4b
+.section syv5a
+.section syv5b
+.section syv6a
+.section syv6b
+.section syv7a
+.section syv7b
+.section syv8a
+.section syv8b
+.section syv9a
+.section syv9b
+.section syv0a
+.section syv0b
+.section sywaa
+.section sywab
+.section sywba
+.section sywbb
+.section sywca
+.section sywcb
+.section sywda
+.section sywdb
+.section sywea
+.section syweb
+.section sywfa
+.section sywfb
+.section sywga
+.section sywgb
+.section sywha
+.section sywhb
+.section sywia
+.section sywib
+.section sywja
+.section sywjb
+.section sywka
+.section sywkb
+.section sywla
+.section sywlb
+.section sywma
+.section sywmb
+.section sywna
+.section sywnb
+.section sywoa
+.section sywob
+.section sywpa
+.section sywpb
+.section sywqa
+.section sywqb
+.section sywra
+.section sywrb
+.section sywsa
+.section sywsb
+.section sywta
+.section sywtb
+.section sywua
+.section sywub
+.section sywva
+.section sywvb
+.section sywwa
+.section sywwb
+.section sywxa
+.section sywxb
+.section sywya
+.section sywyb
+.section sywza
+.section sywzb
+.section syw1a
+.section syw1b
+.section syw2a
+.section syw2b
+.section syw3a
+.section syw3b
+.section syw4a
+.section syw4b
+.section syw5a
+.section syw5b
+.section syw6a
+.section syw6b
+.section syw7a
+.section syw7b
+.section syw8a
+.section syw8b
+.section syw9a
+.section syw9b
+.section syw0a
+.section syw0b
+.section syxaa
+.section syxab
+.section syxba
+.section syxbb
+.section syxca
+.section syxcb
+.section syxda
+.section syxdb
+.section syxea
+.section syxeb
+.section syxfa
+.section syxfb
+.section syxga
+.section syxgb
+.section syxha
+.section syxhb
+.section syxia
+.section syxib
+.section syxja
+.section syxjb
+.section syxka
+.section syxkb
+.section syxla
+.section syxlb
+.section syxma
+.section syxmb
+.section syxna
+.section syxnb
+.section syxoa
+.section syxob
+.section syxpa
+.section syxpb
+.section syxqa
+.section syxqb
+.section syxra
+.section syxrb
+.section syxsa
+.section syxsb
+.section syxta
+.section syxtb
+.section syxua
+.section syxub
+.section syxva
+.section syxvb
+.section syxwa
+.section syxwb
+.section syxxa
+.section syxxb
+.section syxya
+.section syxyb
+.section syxza
+.section syxzb
+.section syx1a
+.section syx1b
+.section syx2a
+.section syx2b
+.section syx3a
+.section syx3b
+.section syx4a
+.section syx4b
+.section syx5a
+.section syx5b
+.section syx6a
+.section syx6b
+.section syx7a
+.section syx7b
+.section syx8a
+.section syx8b
+.section syx9a
+.section syx9b
+.section syx0a
+.section syx0b
+.section syyaa
+.section syyab
+.section syyba
+.section syybb
+.section syyca
+.section syycb
+.section syyda
+.section syydb
+.section syyea
+.section syyeb
+.section syyfa
+.section syyfb
+.section syyga
+.section syygb
+.section syyha
+.section syyhb
+.section syyia
+.section syyib
+.section syyja
+.section syyjb
+.section syyka
+.section syykb
+.section syyla
+.section syylb
+.section syyma
+.section syymb
+.section syyna
+.section syynb
+.section syyoa
+.section syyob
+.section syypa
+.section syypb
+.section syyqa
+.section syyqb
+.section syyra
+.section syyrb
+.section syysa
+.section syysb
+.section syyta
+.section syytb
+.section syyua
+.section syyub
+.section syyva
+.section syyvb
+.section syywa
+.section syywb
+.section syyxa
+.section syyxb
+.section syyya
+.section syyyb
+.section syyza
+.section syyzb
+.section syy1a
+.section syy1b
+.section syy2a
+.section syy2b
+.section syy3a
+.section syy3b
+.section syy4a
+.section syy4b
+.section syy5a
+.section syy5b
+.section syy6a
+.section syy6b
+.section syy7a
+.section syy7b
+.section syy8a
+.section syy8b
+.section syy9a
+.section syy9b
+.section syy0a
+.section syy0b
+.section syzaa
+.section syzab
+.section syzba
+.section syzbb
+.section syzca
+.section syzcb
+.section syzda
+.section syzdb
+.section syzea
+.section syzeb
+.section syzfa
+.section syzfb
+.section syzga
+.section syzgb
+.section syzha
+.section syzhb
+.section syzia
+.section syzib
+.section syzja
+.section syzjb
+.section syzka
+.section syzkb
+.section syzla
+.section syzlb
+.section syzma
+.section syzmb
+.section syzna
+.section syznb
+.section syzoa
+.section syzob
+.section syzpa
+.section syzpb
+.section syzqa
+.section syzqb
+.section syzra
+.section syzrb
+.section syzsa
+.section syzsb
+.section syzta
+.section syztb
+.section syzua
+.section syzub
+.section syzva
+.section syzvb
+.section syzwa
+.section syzwb
+.section syzxa
+.section syzxb
+.section syzya
+.section syzyb
+.section syzza
+.section syzzb
+.section syz1a
+.section syz1b
+.section syz2a
+.section syz2b
+.section syz3a
+.section syz3b
+.section syz4a
+.section syz4b
+.section syz5a
+.section syz5b
+.section syz6a
+.section syz6b
+.section syz7a
+.section syz7b
+.section syz8a
+.section syz8b
+.section syz9a
+.section syz9b
+.section syz0a
+.section syz0b
+.section sy1aa
+.section sy1ab
+.section sy1ba
+.section sy1bb
+.section sy1ca
+.section sy1cb
+.section sy1da
+.section sy1db
+.section sy1ea
+.section sy1eb
+.section sy1fa
+.section sy1fb
+.section sy1ga
+.section sy1gb
+.section sy1ha
+.section sy1hb
+.section sy1ia
+.section sy1ib
+.section sy1ja
+.section sy1jb
+.section sy1ka
+.section sy1kb
+.section sy1la
+.section sy1lb
+.section sy1ma
+.section sy1mb
+.section sy1na
+.section sy1nb
+.section sy1oa
+.section sy1ob
+.section sy1pa
+.section sy1pb
+.section sy1qa
+.section sy1qb
+.section sy1ra
+.section sy1rb
+.section sy1sa
+.section sy1sb
+.section sy1ta
+.section sy1tb
+.section sy1ua
+.section sy1ub
+.section sy1va
+.section sy1vb
+.section sy1wa
+.section sy1wb
+.section sy1xa
+.section sy1xb
+.section sy1ya
+.section sy1yb
+.section sy1za
+.section sy1zb
+.section sy11a
+.section sy11b
+.section sy12a
+.section sy12b
+.section sy13a
+.section sy13b
+.section sy14a
+.section sy14b
+.section sy15a
+.section sy15b
+.section sy16a
+.section sy16b
+.section sy17a
+.section sy17b
+.section sy18a
+.section sy18b
+.section sy19a
+.section sy19b
+.section sy10a
+.section sy10b
+.section sy2aa
+.section sy2ab
+.section sy2ba
+.section sy2bb
+.section sy2ca
+.section sy2cb
+.section sy2da
+.section sy2db
+.section sy2ea
+.section sy2eb
+.section sy2fa
+.section sy2fb
+.section sy2ga
+.section sy2gb
+.section sy2ha
+.section sy2hb
+.section sy2ia
+.section sy2ib
+.section sy2ja
+.section sy2jb
+.section sy2ka
+.section sy2kb
+.section sy2la
+.section sy2lb
+.section sy2ma
+.section sy2mb
+.section sy2na
+.section sy2nb
+.section sy2oa
+.section sy2ob
+.section sy2pa
+.section sy2pb
+.section sy2qa
+.section sy2qb
+.section sy2ra
+.section sy2rb
+.section sy2sa
+.section sy2sb
+.section sy2ta
+.section sy2tb
+.section sy2ua
+.section sy2ub
+.section sy2va
+.section sy2vb
+.section sy2wa
+.section sy2wb
+.section sy2xa
+.section sy2xb
+.section sy2ya
+.section sy2yb
+.section sy2za
+.section sy2zb
+.section sy21a
+.section sy21b
+.section sy22a
+.section sy22b
+.section sy23a
+.section sy23b
+.section sy24a
+.section sy24b
+.section sy25a
+.section sy25b
+.section sy26a
+.section sy26b
+.section sy27a
+.section sy27b
+.section sy28a
+.section sy28b
+.section sy29a
+.section sy29b
+.section sy20a
+.section sy20b
+.section sy3aa
+.section sy3ab
+.section sy3ba
+.section sy3bb
+.section sy3ca
+.section sy3cb
+.section sy3da
+.section sy3db
+.section sy3ea
+.section sy3eb
+.section sy3fa
+.section sy3fb
+.section sy3ga
+.section sy3gb
+.section sy3ha
+.section sy3hb
+.section sy3ia
+.section sy3ib
+.section sy3ja
+.section sy3jb
+.section sy3ka
+.section sy3kb
+.section sy3la
+.section sy3lb
+.section sy3ma
+.section sy3mb
+.section sy3na
+.section sy3nb
+.section sy3oa
+.section sy3ob
+.section sy3pa
+.section sy3pb
+.section sy3qa
+.section sy3qb
+.section sy3ra
+.section sy3rb
+.section sy3sa
+.section sy3sb
+.section sy3ta
+.section sy3tb
+.section sy3ua
+.section sy3ub
+.section sy3va
+.section sy3vb
+.section sy3wa
+.section sy3wb
+.section sy3xa
+.section sy3xb
+.section sy3ya
+.section sy3yb
+.section sy3za
+.section sy3zb
+.section sy31a
+.section sy31b
+.section sy32a
+.section sy32b
+.section sy33a
+.section sy33b
+.section sy34a
+.section sy34b
+.section sy35a
+.section sy35b
+.section sy36a
+.section sy36b
+.section sy37a
+.section sy37b
+.section sy38a
+.section sy38b
+.section sy39a
+.section sy39b
+.section sy30a
+.section sy30b
+.section sy4aa
+.section sy4ab
+.section sy4ba
+.section sy4bb
+.section sy4ca
+.section sy4cb
+.section sy4da
+.section sy4db
+.section sy4ea
+.section sy4eb
+.section sy4fa
+.section sy4fb
+.section sy4ga
+.section sy4gb
+.section sy4ha
+.section sy4hb
+.section sy4ia
+.section sy4ib
+.section sy4ja
+.section sy4jb
+.section sy4ka
+.section sy4kb
+.section sy4la
+.section sy4lb
+.section sy4ma
+.section sy4mb
+.section sy4na
+.section sy4nb
+.section sy4oa
+.section sy4ob
+.section sy4pa
+.section sy4pb
+.section sy4qa
+.section sy4qb
+.section sy4ra
+.section sy4rb
+.section sy4sa
+.section sy4sb
+.section sy4ta
+.section sy4tb
+.section sy4ua
+.section sy4ub
+.section sy4va
+.section sy4vb
+.section sy4wa
+.section sy4wb
+.section sy4xa
+.section sy4xb
+.section sy4ya
+.section sy4yb
+.section sy4za
+.section sy4zb
+.section sy41a
+.section sy41b
+.section sy42a
+.section sy42b
+.section sy43a
+.section sy43b
+.section sy44a
+.section sy44b
+.section sy45a
+.section sy45b
+.section sy46a
+.section sy46b
+.section sy47a
+.section sy47b
+.section sy48a
+.section sy48b
+.section sy49a
+.section sy49b
+.section sy40a
+.section sy40b
+.section sy5aa
+.section sy5ab
+.section sy5ba
+.section sy5bb
+.section sy5ca
+.section sy5cb
+.section sy5da
+.section sy5db
+.section sy5ea
+.section sy5eb
+.section sy5fa
+.section sy5fb
+.section sy5ga
+.section sy5gb
+.section sy5ha
+.section sy5hb
+.section sy5ia
+.section sy5ib
+.section sy5ja
+.section sy5jb
+.section sy5ka
+.section sy5kb
+.section sy5la
+.section sy5lb
+.section sy5ma
+.section sy5mb
+.section sy5na
+.section sy5nb
+.section sy5oa
+.section sy5ob
+.section sy5pa
+.section sy5pb
+.section sy5qa
+.section sy5qb
+.section sy5ra
+.section sy5rb
+.section sy5sa
+.section sy5sb
+.section sy5ta
+.section sy5tb
+.section sy5ua
+.section sy5ub
+.section sy5va
+.section sy5vb
+.section sy5wa
+.section sy5wb
+.section sy5xa
+.section sy5xb
+.section sy5ya
+.section sy5yb
+.section sy5za
+.section sy5zb
+.section sy51a
+.section sy51b
+.section sy52a
+.section sy52b
+.section sy53a
+.section sy53b
+.section sy54a
+.section sy54b
+.section sy55a
+.section sy55b
+.section sy56a
+.section sy56b
+.section sy57a
+.section sy57b
+.section sy58a
+.section sy58b
+.section sy59a
+.section sy59b
+.section sy50a
+.section sy50b
+.section sy6aa
+.section sy6ab
+.section sy6ba
+.section sy6bb
+.section sy6ca
+.section sy6cb
+.section sy6da
+.section sy6db
+.section sy6ea
+.section sy6eb
+.section sy6fa
+.section sy6fb
+.section sy6ga
+.section sy6gb
+.section sy6ha
+.section sy6hb
+.section sy6ia
+.section sy6ib
+.section sy6ja
+.section sy6jb
+.section sy6ka
+.section sy6kb
+.section sy6la
+.section sy6lb
+.section sy6ma
+.section sy6mb
+.section sy6na
+.section sy6nb
+.section sy6oa
+.section sy6ob
+.section sy6pa
+.section sy6pb
+.section sy6qa
+.section sy6qb
+.section sy6ra
+.section sy6rb
+.section sy6sa
+.section sy6sb
+.section sy6ta
+.section sy6tb
+.section sy6ua
+.section sy6ub
+.section sy6va
+.section sy6vb
+.section sy6wa
+.section sy6wb
+.section sy6xa
+.section sy6xb
+.section sy6ya
+.section sy6yb
+.section sy6za
+.section sy6zb
+.section sy61a
+.section sy61b
+.section sy62a
+.section sy62b
+.section sy63a
+.section sy63b
+.section sy64a
+.section sy64b
+.section sy65a
+.section sy65b
+.section sy66a
+.section sy66b
+.section sy67a
+.section sy67b
+.section sy68a
+.section sy68b
+.section sy69a
+.section sy69b
+.section sy60a
+.section sy60b
+.section sy7aa
+.section sy7ab
+.section sy7ba
+.section sy7bb
+.section sy7ca
+.section sy7cb
+.section sy7da
+.section sy7db
+.section sy7ea
+.section sy7eb
+.section sy7fa
+.section sy7fb
+.section sy7ga
+.section sy7gb
+.section sy7ha
+.section sy7hb
+.section sy7ia
+.section sy7ib
+.section sy7ja
+.section sy7jb
+.section sy7ka
+.section sy7kb
+.section sy7la
+.section sy7lb
+.section sy7ma
+.section sy7mb
+.section sy7na
+.section sy7nb
+.section sy7oa
+.section sy7ob
+.section sy7pa
+.section sy7pb
+.section sy7qa
+.section sy7qb
+.section sy7ra
+.section sy7rb
+.section sy7sa
+.section sy7sb
+.section sy7ta
+.section sy7tb
+.section sy7ua
+.section sy7ub
+.section sy7va
+.section sy7vb
+.section sy7wa
+.section sy7wb
+.section sy7xa
+.section sy7xb
+.section sy7ya
+.section sy7yb
+.section sy7za
+.section sy7zb
+.section sy71a
+.section sy71b
+.section sy72a
+.section sy72b
+.section sy73a
+.section sy73b
+.section sy74a
+.section sy74b
+.section sy75a
+.section sy75b
+.section sy76a
+.section sy76b
+.section sy77a
+.section sy77b
+.section sy78a
+.section sy78b
+.section sy79a
+.section sy79b
+.section sy70a
+.section sy70b
+.section sy8aa
+.section sy8ab
+.section sy8ba
+.section sy8bb
+.section sy8ca
+.section sy8cb
+.section sy8da
+.section sy8db
+.section sy8ea
+.section sy8eb
+.section sy8fa
+.section sy8fb
+.section sy8ga
+.section sy8gb
+.section sy8ha
+.section sy8hb
+.section sy8ia
+.section sy8ib
+.section sy8ja
+.section sy8jb
+.section sy8ka
+.section sy8kb
+.section sy8la
+.section sy8lb
+.section sy8ma
+.section sy8mb
+.section sy8na
+.section sy8nb
+.section sy8oa
+.section sy8ob
+.section sy8pa
+.section sy8pb
+.section sy8qa
+.section sy8qb
+.section sy8ra
+.section sy8rb
+.section sy8sa
+.section sy8sb
+.section sy8ta
+.section sy8tb
+.section sy8ua
+.section sy8ub
+.section sy8va
+.section sy8vb
+.section sy8wa
+.section sy8wb
+.section sy8xa
+.section sy8xb
+.section sy8ya
+.section sy8yb
+.section sy8za
+.section sy8zb
+.section sy81a
+.section sy81b
+.section sy82a
+.section sy82b
+.section sy83a
+.section sy83b
+.section sy84a
+.section sy84b
+.section sy85a
+.section sy85b
+.section sy86a
+.section sy86b
+.section sy87a
+.section sy87b
+.section sy88a
+.section sy88b
+.section sy89a
+.section sy89b
+.section sy80a
+.section sy80b
+.section sy9aa
+.section sy9ab
+.section sy9ba
+.section sy9bb
+.section sy9ca
+.section sy9cb
+.section sy9da
+.section sy9db
+.section sy9ea
+.section sy9eb
+.section sy9fa
+.section sy9fb
+.section sy9ga
+.section sy9gb
+.section sy9ha
+.section sy9hb
+.section sy9ia
+.section sy9ib
+.section sy9ja
+.section sy9jb
+.section sy9ka
+.section sy9kb
+.section sy9la
+.section sy9lb
+.section sy9ma
+.section sy9mb
+.section sy9na
+.section sy9nb
+.section sy9oa
+.section sy9ob
+.section sy9pa
+.section sy9pb
+.section sy9qa
+.section sy9qb
+.section sy9ra
+.section sy9rb
+.section sy9sa
+.section sy9sb
+.section sy9ta
+.section sy9tb
+.section sy9ua
+.section sy9ub
+.section sy9va
+.section sy9vb
+.section sy9wa
+.section sy9wb
+.section sy9xa
+.section sy9xb
+.section sy9ya
+.section sy9yb
+.section sy9za
+.section sy9zb
+.section sy91a
+.section sy91b
+.section sy92a
+.section sy92b
+.section sy93a
+.section sy93b
+.section sy94a
+.section sy94b
+.section sy95a
+.section sy95b
+.section sy96a
+.section sy96b
+.section sy97a
+.section sy97b
+.section sy98a
+.section sy98b
+.section sy99a
+.section sy99b
+.section sy90a
+.section sy90b
+.section sy0aa
+.section sy0ab
+.section sy0ba
+.section sy0bb
+.section sy0ca
+.section sy0cb
+.section sy0da
+.section sy0db
+.section sy0ea
+.section sy0eb
+.section sy0fa
+.section sy0fb
+.section sy0ga
+.section sy0gb
+.section sy0ha
+.section sy0hb
+.section sy0ia
+.section sy0ib
+.section sy0ja
+.section sy0jb
+.section sy0ka
+.section sy0kb
+.section sy0la
+.section sy0lb
+.section sy0ma
+.section sy0mb
+.section sy0na
+.section sy0nb
+.section sy0oa
+.section sy0ob
+.section sy0pa
+.section sy0pb
+.section sy0qa
+.section sy0qb
+.section sy0ra
+.section sy0rb
+.section sy0sa
+.section sy0sb
+.section sy0ta
+.section sy0tb
+.section sy0ua
+.section sy0ub
+.section sy0va
+.section sy0vb
+.section sy0wa
+.section sy0wb
+.section sy0xa
+.section sy0xb
+.section sy0ya
+.section sy0yb
+.section sy0za
+.section sy0zb
+.section sy01a
+.section sy01b
+.section sy02a
+.section sy02b
+.section sy03a
+.section sy03b
+.section sy04a
+.section sy04b
+.section sy05a
+.section sy05b
+.section sy06a
+.section sy06b
+.section sy07a
+.section sy07b
+.section sy08a
+.section sy08b
+.section sy09a
+.section sy09b
+.section sy00a
+.section sy00b
+.section szaaa
+.section szaab
+.section szaba
+.section szabb
+.section szaca
+.section szacb
+.section szada
+.section szadb
+.section szaea
+.section szaeb
+.section szafa
+.section szafb
+.section szaga
+.section szagb
+.section szaha
+.section szahb
+.section szaia
+.section szaib
+.section szaja
+.section szajb
+.section szaka
+.section szakb
+.section szala
+.section szalb
+.section szama
+.section szamb
+.section szana
+.section szanb
+.section szaoa
+.section szaob
+.section szapa
+.section szapb
+.section szaqa
+.section szaqb
+.section szara
+.section szarb
+.section szasa
+.section szasb
+.section szata
+.section szatb
+.section szaua
+.section szaub
+.section szava
+.section szavb
+.section szawa
+.section szawb
+.section szaxa
+.section szaxb
+.section szaya
+.section szayb
+.section szaza
+.section szazb
+.section sza1a
+.section sza1b
+.section sza2a
+.section sza2b
+.section sza3a
+.section sza3b
+.section sza4a
+.section sza4b
+.section sza5a
+.section sza5b
+.section sza6a
+.section sza6b
+.section sza7a
+.section sza7b
+.section sza8a
+.section sza8b
+.section sza9a
+.section sza9b
+.section sza0a
+.section sza0b
+.section szbaa
+.section szbab
+.section szbba
+.section szbbb
+.section szbca
+.section szbcb
+.section szbda
+.section szbdb
+.section szbea
+.section szbeb
+.section szbfa
+.section szbfb
+.section szbga
+.section szbgb
+.section szbha
+.section szbhb
+.section szbia
+.section szbib
+.section szbja
+.section szbjb
+.section szbka
+.section szbkb
+.section szbla
+.section szblb
+.section szbma
+.section szbmb
+.section szbna
+.section szbnb
+.section szboa
+.section szbob
+.section szbpa
+.section szbpb
+.section szbqa
+.section szbqb
+.section szbra
+.section szbrb
+.section szbsa
+.section szbsb
+.section szbta
+.section szbtb
+.section szbua
+.section szbub
+.section szbva
+.section szbvb
+.section szbwa
+.section szbwb
+.section szbxa
+.section szbxb
+.section szbya
+.section szbyb
+.section szbza
+.section szbzb
+.section szb1a
+.section szb1b
+.section szb2a
+.section szb2b
+.section szb3a
+.section szb3b
+.section szb4a
+.section szb4b
+.section szb5a
+.section szb5b
+.section szb6a
+.section szb6b
+.section szb7a
+.section szb7b
+.section szb8a
+.section szb8b
+.section szb9a
+.section szb9b
+.section szb0a
+.section szb0b
+.section szcaa
+.section szcab
+.section szcba
+.section szcbb
+.section szcca
+.section szccb
+.section szcda
+.section szcdb
+.section szcea
+.section szceb
+.section szcfa
+.section szcfb
+.section szcga
+.section szcgb
+.section szcha
+.section szchb
+.section szcia
+.section szcib
+.section szcja
+.section szcjb
+.section szcka
+.section szckb
+.section szcla
+.section szclb
+.section szcma
+.section szcmb
+.section szcna
+.section szcnb
+.section szcoa
+.section szcob
+.section szcpa
+.section szcpb
+.section szcqa
+.section szcqb
+.section szcra
+.section szcrb
+.section szcsa
+.section szcsb
+.section szcta
+.section szctb
+.section szcua
+.section szcub
+.section szcva
+.section szcvb
+.section szcwa
+.section szcwb
+.section szcxa
+.section szcxb
+.section szcya
+.section szcyb
+.section szcza
+.section szczb
+.section szc1a
+.section szc1b
+.section szc2a
+.section szc2b
+.section szc3a
+.section szc3b
+.section szc4a
+.section szc4b
+.section szc5a
+.section szc5b
+.section szc6a
+.section szc6b
+.section szc7a
+.section szc7b
+.section szc8a
+.section szc8b
+.section szc9a
+.section szc9b
+.section szc0a
+.section szc0b
+.section szdaa
+.section szdab
+.section szdba
+.section szdbb
+.section szdca
+.section szdcb
+.section szdda
+.section szddb
+.section szdea
+.section szdeb
+.section szdfa
+.section szdfb
+.section szdga
+.section szdgb
+.section szdha
+.section szdhb
+.section szdia
+.section szdib
+.section szdja
+.section szdjb
+.section szdka
+.section szdkb
+.section szdla
+.section szdlb
+.section szdma
+.section szdmb
+.section szdna
+.section szdnb
+.section szdoa
+.section szdob
+.section szdpa
+.section szdpb
+.section szdqa
+.section szdqb
+.section szdra
+.section szdrb
+.section szdsa
+.section szdsb
+.section szdta
+.section szdtb
+.section szdua
+.section szdub
+.section szdva
+.section szdvb
+.section szdwa
+.section szdwb
+.section szdxa
+.section szdxb
+.section szdya
+.section szdyb
+.section szdza
+.section szdzb
+.section szd1a
+.section szd1b
+.section szd2a
+.section szd2b
+.section szd3a
+.section szd3b
+.section szd4a
+.section szd4b
+.section szd5a
+.section szd5b
+.section szd6a
+.section szd6b
+.section szd7a
+.section szd7b
+.section szd8a
+.section szd8b
+.section szd9a
+.section szd9b
+.section szd0a
+.section szd0b
+.section szeaa
+.section szeab
+.section szeba
+.section szebb
+.section szeca
+.section szecb
+.section szeda
+.section szedb
+.section szeea
+.section szeeb
+.section szefa
+.section szefb
+.section szega
+.section szegb
+.section szeha
+.section szehb
+.section szeia
+.section szeib
+.section szeja
+.section szejb
+.section szeka
+.section szekb
+.section szela
+.section szelb
+.section szema
+.section szemb
+.section szena
+.section szenb
+.section szeoa
+.section szeob
+.section szepa
+.section szepb
+.section szeqa
+.section szeqb
+.section szera
+.section szerb
+.section szesa
+.section szesb
+.section szeta
+.section szetb
+.section szeua
+.section szeub
+.section szeva
+.section szevb
+.section szewa
+.section szewb
+.section szexa
+.section szexb
+.section szeya
+.section szeyb
+.section szeza
+.section szezb
+.section sze1a
+.section sze1b
+.section sze2a
+.section sze2b
+.section sze3a
+.section sze3b
+.section sze4a
+.section sze4b
+.section sze5a
+.section sze5b
+.section sze6a
+.section sze6b
+.section sze7a
+.section sze7b
+.section sze8a
+.section sze8b
+.section sze9a
+.section sze9b
+.section sze0a
+.section sze0b
+.section szfaa
+.section szfab
+.section szfba
+.section szfbb
+.section szfca
+.section szfcb
+.section szfda
+.section szfdb
+.section szfea
+.section szfeb
+.section szffa
+.section szffb
+.section szfga
+.section szfgb
+.section szfha
+.section szfhb
+.section szfia
+.section szfib
+.section szfja
+.section szfjb
+.section szfka
+.section szfkb
+.section szfla
+.section szflb
+.section szfma
+.section szfmb
+.section szfna
+.section szfnb
+.section szfoa
+.section szfob
+.section szfpa
+.section szfpb
+.section szfqa
+.section szfqb
+.section szfra
+.section szfrb
+.section szfsa
+.section szfsb
+.section szfta
+.section szftb
+.section szfua
+.section szfub
+.section szfva
+.section szfvb
+.section szfwa
+.section szfwb
+.section szfxa
+.section szfxb
+.section szfya
+.section szfyb
+.section szfza
+.section szfzb
+.section szf1a
+.section szf1b
+.section szf2a
+.section szf2b
+.section szf3a
+.section szf3b
+.section szf4a
+.section szf4b
+.section szf5a
+.section szf5b
+.section szf6a
+.section szf6b
+.section szf7a
+.section szf7b
+.section szf8a
+.section szf8b
+.section szf9a
+.section szf9b
+.section szf0a
+.section szf0b
+.section szgaa
+.section szgab
+.section szgba
+.section szgbb
+.section szgca
+.section szgcb
+.section szgda
+.section szgdb
+.section szgea
+.section szgeb
+.section szgfa
+.section szgfb
+.section szgga
+.section szggb
+.section szgha
+.section szghb
+.section szgia
+.section szgib
+.section szgja
+.section szgjb
+.section szgka
+.section szgkb
+.section szgla
+.section szglb
+.section szgma
+.section szgmb
+.section szgna
+.section szgnb
+.section szgoa
+.section szgob
+.section szgpa
+.section szgpb
+.section szgqa
+.section szgqb
+.section szgra
+.section szgrb
+.section szgsa
+.section szgsb
+.section szgta
+.section szgtb
+.section szgua
+.section szgub
+.section szgva
+.section szgvb
diff --git a/test/MC/ELF/no-fixup.s b/test/MC/ELF/no-fixup.s
index 6e719bcc8c11..9d3489262f4f 100644
--- a/test/MC/ELF/no-fixup.s
+++ b/test/MC/ELF/no-fixup.s
@@ -1,14 +1,11 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -stats 2>%t.out
-// RUN: FileCheck --input-file=%t.out %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
+// RUN: llvm-objdump -r %t | FileCheck %s
 
-// Test that we create no fixups for this file since "a" and "b" are in the
-// same fragment.
+// Test that we create no fixups for this file since "a" and "b"
+// are in the same fragment. If b was in a different section, a
+// fixup causing a relocation would be generated in the object file.
 
-// CHECK:      assembler - Number of assembler layout and relaxation steps
-// CHECK-NEXT: assembler - Number of emitted assembler fragments
-// CHECK-NEXT: assembler - Number of emitted object file bytes
-// CHECK-NEXT: assembler - Number of fragment layouts
-// CHECK-NEXT: mcexpr    - Number of MCExpr evaluations
+// CHECK-NOT: RELOCATION RECORDS
 
 a:
   nop
diff --git a/test/MC/ELF/relax-all-flag.s b/test/MC/ELF/relax-all-flag.s
new file mode 100644
index 000000000000..4c1c78c3925d
--- /dev/null
+++ b/test/MC/ELF/relax-all-flag.s
@@ -0,0 +1,19 @@
+// By default, the jmp here does not need relaxation (so the 0xeb opdoce can be
+// used).
+// However, with -mc-relax-all passed to MC, all jumps are relaxed and we
+// expect to see a different instruction.
+
+// RUN: llvm-mc -filetype=obj -mc-relax-all -triple x86_64-pc-linux-gnu %s -o - \
+// RUN:  | llvm-objdump -disassemble - | FileCheck -check-prefix=RELAXALL %s
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+// RUN:  | llvm-objdump -disassemble - | FileCheck %s
+
+.text
+foo:
+  mov %rax, %rax
+  jmp foo
+
+// RELAXALL:    3:  e9
+// CHECK:       3:  eb
+
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
index 89764637feb0..9f0d39d9217b 100644
--- a/test/MC/MachO/ARM/lit.local.cfg
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.s']
+config.suffixes = ['.s', '.ll']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/MC/MachO/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8f646dbb396a..8e03d17a70c9 100644
--- a/test/MC/MachO/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
@@ -7,4 +7,4 @@ x:
       .align 4
       add r0, r1, r2
 
-@ CHECK: ('_section_data', '020081e0 00001a0e 00001a0e 00001a0e 020081e0')
+@ CHECK: ('_section_data', '020081e0 0000a0e1 0000a0e1 0000a0e1 020081e0')
diff --git a/test/MC/MachO/bad-dollar.s b/test/MC/MachO/bad-dollar.s
new file mode 100644
index 000000000000..fd72ed0230db
--- /dev/null
+++ b/test/MC/MachO/bad-dollar.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.long $1
+// CHECK-ERROR: 4:7: error: invalid token in expression
diff --git a/test/MC/MachO/bad-macro.s b/test/MC/MachO/bad-macro.s
new file mode 100644
index 000000000000..0aaba099e853
--- /dev/null
+++ b/test/MC/MachO/bad-macro.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-OUTPUT < %t %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+.macro test_macro reg1, reg2
+mov $1, %eax
+mov $2, %eax
+.endmacro
+test_macro %ebx, %ecx
+
+// CHECK-ERROR: 5:1: warning: macro defined with named parameters which are not used in macro body, possible positional parameter found in body which will have no effect
+
+// CHECK-OUTPUT: movl	$1, %eax
+// CHECK-OUTPUT: movl	$2, %eax
diff --git a/test/MC/MachO/gen-dwarf-cpp.s b/test/MC/MachO/gen-dwarf-cpp.s
index cb749f48eef6..e42a63a191b1 100644
--- a/test/MC/MachO/gen-dwarf-cpp.s
+++ b/test/MC/MachO/gen-dwarf-cpp.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 # 100 "t.s" 1
 .globl _bar
diff --git a/test/MC/MachO/gen-dwarf-macro-cpp.s b/test/MC/MachO/gen-dwarf-macro-cpp.s
index 05a449b4027c..6177814b6a78 100644
--- a/test/MC/MachO/gen-dwarf-macro-cpp.s
+++ b/test/MC/MachO/gen-dwarf-macro-cpp.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s
 
 # 1 "foo.S" 2
 .macro switcher
diff --git a/test/MC/MachO/gen-dwarf-producer.s b/test/MC/MachO/gen-dwarf-producer.s
new file mode 100644
index 000000000000..f7388db3a240
--- /dev/null
+++ b/test/MC/MachO/gen-dwarf-producer.s
@@ -0,0 +1,8 @@
+// RUN: env DEBUG_PRODUCER="my producer" llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+.globl _bar
+_bar:
+	ret
+
+// CHECK:    DW_AT_producer [DW_FORM_string]	("my producer")
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index 4fbc32d295b7..d763dd120ab2 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
-// RUN: llvm-dwarfdump %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
 
 .globl _bar
 _bar:
@@ -86,7 +86,7 @@ _x:	.long 1
 // CHECK: .debug_aranges contents:
 // CHECK: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
 
-// CHECK: .debug_lines contents:
+// CHECK: .debug_line contents:
 // CHECK: Line table prologue:
 // We don't check the total_length as it includes lengths of temp paths
 // CHECK:         version: 2
diff --git a/test/MC/MachO/linker-option-1.s b/test/MC/MachO/linker-option-1.s
new file mode 100644
index 000000000000..a01cab78f397
--- /dev/null
+++ b/test/MC/MachO/linker-option-1.s
@@ -0,0 +1,21 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-OUTPUT < %t %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+        
+// CHECK-OUTPUT: .linker_option "a"
+.linker_option "a"
+// CHECK-OUTPUT: .linker_option "a", "b"
+.linker_option "a", "b"
+// CHECK-OUTPUT-NOT: .linker_option
+// CHECK-ERROR: expected string in '.linker_option' directive
+// CHECK-ERROR: .linker_option 10
+// CHECK-ERROR:                ^
+.linker_option 10
+// CHECK-ERROR: expected string in '.linker_option' directive
+// CHECK-ERROR: .linker_option "a",
+// CHECK-ERROR:                    ^
+.linker_option "a",
+// CHECK-ERROR: unexpected token in '.linker_option' directive
+// CHECK-ERROR: .linker_option "a" "b"
+// CHECK-ERROR:                    ^
+.linker_option "a" "b"
diff --git a/test/MC/MachO/linker-option-2.s b/test/MC/MachO/linker-option-2.s
new file mode 100644
index 000000000000..bb5966be2734
--- /dev/null
+++ b/test/MC/MachO/linker-option-2.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -n -triple x86_64-apple-darwin10 %s -filetype=obj | macho-dump | FileCheck %s
+
+// CHECK: ('load_commands_size', 104)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 45)
+// CHECK:   ('size', 16)
+// CHECK:   ('count', 1)
+// CHECK:   ('_strings', [
+// CHECK: 	"a",
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 45)
+// CHECK:   ('size', 16)
+// CHECK:   ('count', 2)
+// CHECK:   ('_strings', [
+// CHECK: 	"a",
+// CHECK: 	"b",
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
+
+.linker_option "a"
+.linker_option "a", "b"
diff --git a/test/MC/MachO/linker-options.ll b/test/MC/MachO/linker-options.ll
new file mode 100644
index 000000000000..827adfd70890
--- /dev/null
+++ b/test/MC/MachO/linker-options.ll
@@ -0,0 +1,43 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -o - %s > %t
+; RUN: FileCheck --check-prefix=CHECK-ASM < %t %s
+
+; CHECK-ASM: .linker_option "-lz"
+; CHECK-ASM-NEXT: .linker_option "-framework", "Cocoa"
+
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | macho-dump > %t
+; RUN: FileCheck --check-prefix=CHECK-OBJ < %t %s
+
+; CHECK-OBJ: ('load_commands', [
+; CHECK-OBJ:   # Load Command 1
+; CHECK-OBJ:  (('command', 45)
+; CHECK-OBJ:   ('size', 16)
+; CHECK-OBJ:   ('count', 1)
+; CHECK-OBJ:   ('_strings', [
+; CHECK-OBJ: 	"-lz",
+; CHECK-OBJ:   ])
+; CHECK-OBJ:  ),
+; CHECK-OBJ:   # Load Command 2
+; CHECK-OBJ:  (('command', 45)
+; CHECK-OBJ:   ('size', 32)
+; CHECK-OBJ:   ('count', 2)
+; CHECK-OBJ:   ('_strings', [
+; CHECK-OBJ: 	"-framework",
+; CHECK-OBJ: 	"Cocoa",
+; CHECK-OBJ:   ])
+; CHECK-OBJ:   # Load Command 3
+; CHECK-OBJ:  (('command', 45)
+; CHECK-OBJ:   ('size', 24)
+; CHECK-OBJ:   ('count', 1)
+; CHECK-OBJ:   ('_strings', [
+; CHECK-OBJ: 	"-lmath",
+; CHECK-OBJ:   ])
+; CHECK-OBJ:  ),
+; CHECK-OBJ: ])
+
+!0 = metadata !{ i32 6, metadata !"Linker Options", 
+   metadata !{
+      metadata !{ metadata !"-lz" },
+      metadata !{ metadata !"-framework", metadata !"Cocoa" },
+      metadata !{ metadata !"-lmath" } } }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s
new file mode 100644
index 000000000000..93ff0b8bd277
--- /dev/null
+++ b/test/MC/Mips/eh-frame.s
@@ -0,0 +1,167 @@
+// Test the bits of .eh_frame on mips that are already implemented correctly.
+
+// FIXME: This test would be a lot cleaner if llvm-objdump had the
+// --dwarf=frames option.
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mips
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS32 %s
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mipsel
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS32EL %s
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mips64
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS64 %s
+
+// RUN: llvm-mc -filetype=obj %s -o %t.o -arch=mips64el
+// RUN: llvm-objdump -r -s %t.o | FileCheck --check-prefix=MIPS64EL %s
+
+func:
+	.cfi_startproc
+	.cfi_endproc
+
+// MIPS32: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS32-NEXT: R_MIPS_32
+// MIPS32: Contents of section .eh_frame:
+// MIPS32-NEXT: 0000
+
+// Length
+// MIPS32: 00000010
+
+// CIE ID
+// MIPS32: 00000000
+
+// Version
+// MIPS32: 01
+
+// Augmentation String
+// MIPS32: 7a5200
+
+// Code Alignment Factor
+// MIPS32: 01
+
+// Data Alignment Factor (-4)
+// MIPS32: 7c
+
+// Return Address Register
+// MIPS32: 1f
+
+// Augmentation Size
+// MIPS32: 01
+
+// MIPS32: .........zR..|..
+// MIPS32-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata4)
+// MIPS32: 0b
+// FIXME: The instructions are different from the ones produces by gas.
+
+// MIPS32EL: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS32EL-NEXT: R_MIPS_32
+// MIPS32EL: Contents of section .eh_frame:
+// MIPS32EL-NEXT: 0000
+
+// Length
+// MIPS32EL: 10000000
+
+// CIE ID
+// MIPS32EL: 00000000
+
+// Version
+// MIPS32EL: 01
+
+// Augmentation String
+// MIPS32EL: 7a5200
+
+// Code Alignment Factor
+// MIPS32EL: 01
+
+// Data Alignment Factor (-4)
+// MIPS32EL: 7c
+
+// Return Address Register
+// MIPS32EL: 1f
+
+// Augmentation Size
+// MIPS32EL: 01
+
+// MIPS32EL: .........zR..|..
+// MIPS32EL-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata4)
+// MIPS32EL: 0b
+// FIXME: The instructions are different from the ones produces by gas.
+
+// MIPS64: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS64-NEXT: R_MIPS_64
+// MIPS64: Contents of section .eh_frame:
+// MIPS64-NEXT: 0000
+
+// Length
+// MIPS64: 00000010
+
+// CIE ID
+// MIPS64: 00000000
+
+// Version
+// MIPS64: 01
+
+// Augmentation String
+// MIPS64: 7a5200
+
+// Code Alignment Factor
+// MIPS64: 01
+
+// Data Alignment Factor (-8). GAS uses -4. Should be ok as long as all
+// offsets we need are a multiple of 8.
+// MIPS64: 78
+
+// Return Address Register
+// MIPS64: 1f
+
+// Augmentation Size
+// MIPS64: 01
+
+// MIPS64: .........zR..x..
+// MIPS64-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata8)
+// MIPS64: 0c
+// FIXME: The instructions are different from the ones produces by gas.
+
+
+// MIPS64EL: RELOCATION RECORDS FOR [.eh_frame]:
+// MIPS64EL-NEXT: R_MIPS_64
+// MIPS64EL: Contents of section .eh_frame:
+// MIPS64EL-NEXT: 0000
+
+// Length
+// MIPS64EL: 10000000
+
+// CIE ID
+// MIPS64EL: 00000000
+
+// Version
+// MIPS64EL: 01
+
+// Augmentation String
+// MIPS64EL: 7a5200
+
+// Code Alignment Factor
+// MIPS64EL: 01
+
+// Data Alignment Factor (-8). GAS uses -4. Should be ok as long as all
+// offsets we need are a multiple of 8.
+// MIPS64EL: 78
+
+// Return Address Register
+// MIPS64EL: 1f
+
+// Augmentation Size
+// MIPS64EL: 01
+
+// MIPS64EL: .........zR..x..
+// MIPS64EL-NEXT: 0010
+
+// Augmentation (fde pointer encoding: DW_EH_PE_sdata8)
+// MIPS64EL: 0c
+// FIXME: The instructions are different from the ones produces by gas.
diff --git a/test/MC/Mips/elf-gprel-32-64.ll b/test/MC/Mips/elf-gprel-32-64.ll
new file mode 100644
index 000000000000..b94682214df7
--- /dev/null
+++ b/test/MC/Mips/elf-gprel-32-64.ll
@@ -0,0 +1,37 @@
+; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
+; RUN: | elf-dump --dump-section-data \
+; RUN: | FileCheck %s
+
+define i32 @test(i32 %c) nounwind {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb2
+    i32 2, label %sw.bb5
+    i32 3, label %sw.bb8
+  ]
+
+sw.bb:
+  br label %return
+sw.bb2:
+  br label %return
+sw.bb5:
+  br label %return
+sw.bb8:
+  br label %return
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ -1, %sw.default ], [ 7, %sw.bb8 ], [ 2, %sw.bb5 ], [ 3, %sw.bb2 ], [ 1, %sw.bb ]
+  ret i32 %retval.0
+}
+
+; Check that the appropriate relocations were created.
+
+; R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK: (('sh_name', 0x{{[a-z0-9]+}}) # '.rela.rodata'
+; CHECK:      ('r_type3', 0x00)
+; CHECK-NEXT: ('r_type2', 0x12)
+; CHECK-NEXT: ('r_type', 0x0c)
+
diff --git a/test/MC/Mips/elf-reginfo.ll b/test/MC/Mips/elf-reginfo.ll
new file mode 100644
index 000000000000..1d7a18866c50
--- /dev/null
+++ b/test/MC/Mips/elf-reginfo.ll
@@ -0,0 +1,31 @@
+ ; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
+ ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_64 %s
+ ; RUN: llc -filetype=obj -march=mipsel -mcpu=mips32 %s -o - \
+ ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_32 %s
+
+; Check for register information sections.
+;
+
+@str = private unnamed_addr constant [12 x i8] c"hello world\00"
+
+define i32 @main() nounwind {
+entry:
+; Check that the appropriate relocations were created.
+
+; check for .MIPS.options
+; CHECK_64:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.MIPS.options'
+; CHECK_64-NEXT: ('sh_type', 0x7000000d)
+; CHECK_64-NEXT: ('sh_flags', 0x0000000008000002)
+
+; check for .reginfo
+; CHECK_32:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.reginfo'
+; CHECK_32-NEXT: ('sh_type', 0x70000006)
+; CHECK_32-NEXT: ('sh_flags', 0x00000002)
+
+
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
+  ret i32 0
+
+}
+declare i32 @puts(i8* nocapture) nounwind
+  
diff --git a/test/MC/Mips/elf_eflags.ll b/test/MC/Mips/elf_eflags.ll
new file mode 100644
index 000000000000..315cb812bf77
--- /dev/null
+++ b/test/MC/Mips/elf_eflags.ll
@@ -0,0 +1,66 @@
+; This tests ELF EFLAGS setting with direct object.
+; When the assembler is ready a .s file for it will
+; be created.
+
+; Non-shared (static) is the absence of pic and or cpic.
+
+; EF_MIPS_NOREORDER (0x00000001) is always on by default currently
+; EF_MIPS_PIC (0x00000002)
+; EF_MIPS_CPIC (0x00000004) - not tested yet
+; EF_MIPS_ABI2 (0x00000020) - n32 not tested yet
+; EF_MIPS_ARCH_32 (0x50000000)
+; EF_MIPS_ARCH_64 (0x60000000)
+; EF_MIPS_ARCH_32R2 (0x70000000)
+; EF_MIPS_ARCH_64R2 (0x80000000)
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+ 
+; 32(R1) bit with NO_REORDER and static
+; CHECK-BE32: ('e_flags', 0x50001001)
+;
+; 32(R1) bit with NO_REORDER and PIC
+; CHECK-BE32_PIC: ('e_flags', 0x50001003)
+;
+; 32R2 bit with NO_REORDER and static
+; CHECK-BE32R2: ('e_flags', 0x70001001)
+;
+; 32R2 bit with NO_REORDER and PIC
+; CHECK-BE32R2_PIC: ('e_flags', 0x70001003)
+;
+; 32R2 bit MICROMIPS with NO_REORDER and static
+; CHECK-BE32R2-MICROMIPS: ('e_flags', 0x72001001)
+;
+; 32R2 bit MICROMIPS with NO_REORDER and PIC
+;CHECK-BE32R2-MICROMIPS_PIC:  ('e_flags', 0x72001003)
+;
+; 64(R1) bit with NO_REORDER and static
+; CHECK-BE64: ('e_flags', 0x60000001)
+;
+; 64(R1) bit with NO_REORDER and PIC
+; CHECK-BE64_PIC: ('e_flags', 0x60000003)
+;
+; 64R2 bit with NO_REORDER and static
+; CHECK-BE64R2: ('e_flags', 0x80000001)
+;
+; 64R2 bit with NO_REORDER and PIC
+; CHECK-BE64R2_PIC: ('e_flags', 0x80000003)
+;
+; 32R2 bit MIPS16 with PIC
+; CHECK-LE32R2-MIPS16: ('e_flags', 0x74001002)
+ 
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/test/MC/Mips/elf_st_other.ll b/test/MC/Mips/elf_st_other.ll
new file mode 100644
index 000000000000..f188ce7ba387
--- /dev/null
+++ b/test/MC/Mips/elf_st_other.ll
@@ -0,0 +1,13 @@
+; This tests value of ELF st_other field for function symbol table entries.
+; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
+
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+define i32 @main() nounwind {
+entry:
+  ret i32 0
+}
+
+; CHECK:  'main'
+; CHECK:  ('st_other', 0x80)
+
diff --git a/test/MC/Mips/hilo-addressing.s b/test/MC/Mips/hilo-addressing.s
new file mode 100644
index 000000000000..28459c206728
--- /dev/null
+++ b/test/MC/Mips/hilo-addressing.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -show-encoding -triple mips-unknown-unknown %s | FileCheck %s
+
+  .ent hilo_test
+     .equ    addr, 0xdeadbeef
+# CHECK: # encoding: [0x3c,0x04,0xde,0xae]
+    lui $4,%hi(addr)
+# CHECK: # encoding: [0x03,0xe0,0x00,0x08]
+    jr  $31
+# CHECK: # encoding: [0x80,0x82,0xbe,0xef]
+    lb  $2,%lo(addr)($4)
+    .end hilo_test
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index 2997782cd01b..7384d19e440f 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -13,6 +13,7 @@
 # CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
 # CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
 # CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $4, $5, 17767   # encoding: [0x67,0x45,0xa4,0x34]
 # CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
 # CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
 # CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
@@ -31,7 +32,7 @@
 # CHECK:  xori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
 # CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
 # CHECK:  wsbh   $6, $7          # encoding: [0xa0,0x30,0x07,0x7c]
-# CHECK:  nor    $7, $8, $zero   # encoding: [0x27,0x38,0x00,0x01]
+# CHECK:  not    $7, $8          # encoding: [0x27,0x38,0x00,0x01]
      and    $9,  $6, $7
      and    $9,  $6, 17767
      andi   $9,  $6, 17767
@@ -40,6 +41,7 @@
      ins    $19, $9, 6,7
      nor    $9,  $6, $7
      or     $3,  $3, $5
+     or     $4,  $5, 17767
      ori    $9,  $6, 17767
      rotr   $9,  $6, 7
      rotrv  $9,  $6, $7
@@ -78,9 +80,13 @@
 # CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
 # CHECK:  sub    $9, $6, $7      # encoding: [0x22,0x48,0xc7,0x00]
 # CHECK:  subu   $4, $3, $5      # encoding: [0x23,0x20,0x65,0x00]
-# CHECK:  sub     $6, $zero, $7  # encoding: [0x22,0x30,0x07,0x00]
-# CHECK:  subu    $6, $zero, $7  # encoding: [0x23,0x30,0x07,0x00]
-# CHECK:  add     $7, $8, $zero  # encoding: [0x20,0x38,0x00,0x01]
+# CHECK:  neg     $6, $7         # encoding: [0x22,0x30,0x07,0x00]
+# CHECK:  negu    $6, $7         # encoding: [0x23,0x30,0x07,0x00]
+# CHECK:  move    $7, $8         # encoding: [0x21,0x38,0x00,0x01]
+# CHECK:  .set    push
+# CHECK:  .set    mips32r2
+# CHECK:  rdhwr   $5, $29
+# CHECK:  .set    pop            # encoding: [0x3b,0xe8,0x05,0x7c]
     add    $9,$6,$7
     add    $9,$6,17767
     addu   $9,$6,-15001
@@ -98,3 +104,4 @@
     neg    $6,$7
     negu   $6,$7
     move   $7,$8
+    rdhwr   $5, $29
diff --git a/test/MC/Mips/mips-coprocessor-encodings.s b/test/MC/Mips/mips-coprocessor-encodings.s
index bad9163ba9fa..3d638c3ac9ce 100644
--- a/test/MC/Mips/mips-coprocessor-encodings.s
+++ b/test/MC/Mips/mips-coprocessor-encodings.s
@@ -1,4 +1,5 @@
-# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding | FileCheck --check-prefix=MIPS64 %s
+# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding \
+# RUN:| FileCheck --check-prefix=MIPS64 %s
 
 # MIPS64:	dmtc0	$12, $16, 2             # encoding: [0x40,0xac,0x80,0x02]
 # MIPS64:	dmtc0	$12, $16, 0             # encoding: [0x40,0xac,0x80,0x00]
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index cfc15e883a95..3385fe19309f 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -16,6 +16,22 @@
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
 # CHECK: addu    $7, $7, $8          # encoding: [0x21,0x38,0xe8,0x00]
+# CHECK: lui     $10, %hi(symbol)        # encoding: [A,A,0x0a,0x3c]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
+# CHECK: lw      $10, %lo(symbol)($10)   # encoding: [A,A,0x4a,0x8d]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: lui     $1, %hi(symbol)         # encoding: [A,A,0x01,0x3c]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
+# CHECK: sw      $10, %lo(symbol)($1)    # encoding: [A,A,0x2a,0xac]
+# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: lui     $10, 10                 # encoding: [0x0a,0x00,0x0a,0x3c]
+# CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
+# CHECK: lw      $10, 123($10)           # encoding: [0x7b,0x00,0x4a,0x8d]
+# CHECK: lui     $1, 2                   # encoding: [0x02,0x00,0x01,0x3c]
+# CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
+# CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
     li $5,123
     li $6,-2345
@@ -25,3 +41,9 @@
     la $7,65538
     la $a0, 20($a1)
     la $7,65538($8)
+
+    lw  $t2, symbol($a0)
+    sw  $t2, symbol($t1)
+
+    lw  $t2, 655483($a0)
+    sw  $t2, 123456($t1)
diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s
index 998be418d204..1dcb287738ce 100644
--- a/test/MC/Mips/mips-jump-instructions.s
+++ b/test/MC/Mips/mips-jump-instructions.s
@@ -1,30 +1,34 @@
-# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for jumps and branches.
 # CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Branch instructions
 #------------------------------------------------------------------------------
-# CHECK:   b 1332                 # encoding: [0x34,0x05,0x00,0x10]
+# CHECK:   b 1332                 # encoding: [0x4d,0x01,0x00,0x10]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bc1f 1332              # encoding: [0x34,0x05,0x00,0x45]
+# CHECK:   bc1f 1332              # encoding: [0x4d,0x01,0x00,0x45]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bc1t 1332              # encoding: [0x34,0x05,0x01,0x45]
+# CHECK:   bc1t 1332              # encoding: [0x4d,0x01,0x01,0x45]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   beq $9, $6, 1332       # encoding: [0x34,0x05,0x26,0x11]
+# CHECK:   beq $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x11]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgez $6, 1332          # encoding: [0x34,0x05,0xc1,0x04]
+# CHECK:   bgez $6, 1332          # encoding: [0x4d,0x01,0xc1,0x04]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgezal $6, 1332        # encoding: [0x34,0x05,0xd1,0x04]
+# CHECK:   bgezal $6, 1332        # encoding: [0x4d,0x01,0xd1,0x04]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgtz $6, 1332          # encoding: [0x34,0x05,0xc0,0x1c]
+# CHECK:   bgtz $6, 1332          # encoding: [0x4d,0x01,0xc0,0x1c]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   blez $6, 1332          # encoding: [0x34,0x05,0xc0,0x18]
+# CHECK:   blez $6, 1332          # encoding: [0x4d,0x01,0xc0,0x18]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bne $9, $6, 1332       # encoding: [0x34,0x05,0x26,0x15]
+# CHECK:   bne $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x15]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bal     1332           # encoding: [0x34,0x05,0x00,0x04]
+# CHECK:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
 # CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+
+.set noreorder
+
          b 1332
          nop
          bc1f 1332
@@ -50,15 +54,24 @@ end_of_code:
 #------------------------------------------------------------------------------
 # Jump instructions
 #------------------------------------------------------------------------------
-# CHECK:   j 1328               # encoding: [0x30,0x05,0x00,0x08]
+# CHECK:   j 1328               # encoding: [0x4c,0x01,0x00,0x08]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jal 1328             # encoding: [0x30,0x05,0x00,0x0c]
+# CHECK:   jal 1328             # encoding: [0x4c,0x01,0x00,0x0c]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr $25             # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr $10, $11        # encoding: [0x09,0x50,0x60,0x01]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
 # CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr  $25            # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jalr  $4, $25        # encoding: [0x09,0x20,0x20,0x03]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 
 
    j 1328
@@ -67,6 +80,15 @@ end_of_code:
    nop
    jalr $6
    nop
+   jalr $31, $25
+   nop
+   jalr $10, $11
+   nop
    jr $7
    nop
    j $7
+   nop
+   jal  $25
+   nop
+   jal  $4,$25
+   nop
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
new file mode 100644
index 000000000000..1b4ebdfbbd49
--- /dev/null
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -0,0 +1,100 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for arithmetic and logical instructions.
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+#------------------------------------------------------------------------------
+# Logical instructions
+#------------------------------------------------------------------------------
+# CHECK:  and    $9, $6, $7      # encoding: [0x24,0x48,0xc7,0x00]
+# CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  andi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x30]
+# CHECK:  clo    $6, $7          # encoding: [0x21,0x30,0xe6,0x70]
+# CHECK:  clz    $6, $7          # encoding: [0x20,0x30,0xe6,0x70]
+# CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
+# CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
+# CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
+# CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
+# CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
+# CHECK:  sll    $4, $3, 7       # encoding: [0xc0,0x21,0x03,0x00]
+# CHECK:  sllv   $2, $3, $5      # encoding: [0x04,0x10,0xa3,0x00]
+# CHECK:  slt    $3, $3, $5      # encoding: [0x2a,0x18,0x65,0x00]
+# CHECK:  slti   $3, $3, 103     # encoding: [0x67,0x00,0x63,0x28]
+# CHECK:  slti   $3, $3, 103     # encoding: [0x67,0x00,0x63,0x28]
+# CHECK:  sltiu  $3, $3, 103     # encoding: [0x67,0x00,0x63,0x2c]
+# CHECK:  sltu   $3, $3, $5      # encoding: [0x2b,0x18,0x65,0x00]
+# CHECK:  sra    $4, $3, 7       # encoding: [0xc3,0x21,0x03,0x00]
+# CHECK:  srav   $2, $3, $5      # encoding: [0x07,0x10,0xa3,0x00]
+# CHECK:  srl    $4, $3, 7       # encoding: [0xc2,0x21,0x03,0x00]
+# CHECK:  srlv   $2, $3, $5      # encoding: [0x06,0x10,0xa3,0x00]
+# CHECK:  xor    $3, $3, $5      # encoding: [0x26,0x18,0x65,0x00]
+# CHECK:  xori    $9, $6, 17767  # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  xori   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x38]
+# CHECK:  wsbh   $6, $7          # encoding: [0xa0,0x30,0x07,0x7c]
+# CHECK:  not    $7, $8          # encoding: [0x27,0x38,0x00,0x01]
+     and    $9,  $6, $7
+     and    $9,  $6, 17767
+     andi   $9,  $6, 17767
+     clo    $6,  $7
+     clz    $6,  $7
+     ins    $19, $9, 6,7
+     nor    $9,  $6, $7
+     or     $3,  $3, $5
+     ori    $9,  $6, 17767
+     rotr   $9,  $6, 7
+     rotrv  $9,  $6, $7
+     sll    $4,  $3, 7
+     sllv   $2,  $3, $5
+     slt    $3,  $3, $5
+     slt    $3,  $3, 103
+     slti   $3,  $3, 103
+     sltiu  $3,  $3, 103
+     sltu   $3,  $3, $5
+     sra    $4,  $3, 7
+     srav   $2,  $3, $5
+     srl    $4,  $3, 7
+     srlv   $2,  $3, $5
+     xor    $3,  $3, $5
+     xor    $9,  $6, 17767
+     xori   $9,  $6, 17767
+     wsbh   $6,  $7
+     not    $7  ,$8
+
+#------------------------------------------------------------------------------
+# Arithmetic instructions
+#------------------------------------------------------------------------------
+
+# CHECK:  dadd    $9, $6, $7      # encoding: [0x2c,0x48,0xc7,0x00]
+# CHECK:  daddi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x60]
+# CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
+# CHECK:  daddi   $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x60]
+# CHECK:  daddiu  $9, $6, -15001  # encoding: [0x67,0xc5,0xc9,0x64]
+# CHECK:  daddu   $9, $6, $7      # encoding: [0x2d,0x48,0xc7,0x00]
+# CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
+# CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
+# CHECK:  msub   $6, $7          # encoding: [0x04,0x00,0xc7,0x70]
+# CHECK:  msubu  $6, $7          # encoding: [0x05,0x00,0xc7,0x70]
+# CHECK:  mult   $3, $5          # encoding: [0x18,0x00,0x65,0x00]
+# CHECK:  multu  $3, $5          # encoding: [0x19,0x00,0x65,0x00]
+# CHECK:  dsubu   $4, $3, $5     # encoding: [0x2f,0x20,0x65,0x00]
+# CHECK:  move    $7, $8         # encoding: [0x2d,0x38,0x00,0x01]
+# CHECK:  .set    push
+# CHECK:  .set    mips32r2
+# CHECK:  rdhwr   $5, $29
+# CHECK:  .set    pop            # encoding: [0x3b,0xe8,0x05,0x7c]
+
+    dadd    $9,$6,$7
+    dadd    $9,$6,17767
+    daddu   $9,$6,-15001
+    daddi   $9,$6,17767
+    daddiu  $9,$6,-15001
+    daddu   $9,$6,$7
+    madd   $6,$7
+    maddu  $6,$7
+    msub   $6,$7
+    msubu  $6,$7
+    mult   $3,$5
+    multu  $3,$5
+    dsubu   $4,$3,$5
+    move   $7,$8
+    rdhwr   $5, $29
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index e2f75a827d0a..df7e64563371 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -1,16 +1,25 @@
-# RUN: llvm-mc -triple mips-unknown-unknown %s
-#this test produces no output so there isS no FileCheck call
+# RUN: llvm-mc -show-encoding -triple mips-unknown-unknown %s | FileCheck %s
+#
 $BB0_2:
   .ent directives_test
-	.frame	$sp,0,$ra
-	.mask 	0x00000000,0
-	.fmask	0x00000000,0
-	.set	noreorder
-	.set	nomacro
-	.set	noat
+    .frame    $sp,0,$ra
+    .mask     0x00000000,0
+    .fmask    0x00000000,0
+    .set    noreorder
+    .set    nomacro
+    .set    noat
 $JTI0_0:
-	.gpword	($BB0_2)
-	.set  at=$12
-	.set macro
-	.set reorder
-	.end directives_test
+    .gpword    ($BB0_2)
+    .word 0x77fffffc
+# CHECK: $JTI0_0:
+# CHECK-NEXT:     .4byte    2013265916
+    .set  at=$12
+    .set macro
+    .set reorder
+    .set  at=$a0
+    .set STORE_MASK,$t7
+    .set FPU_MASK,$f7
+#CHECK:    abs.s   $f6, $f7           # encoding: [0x46,0x00,0x39,0x85]
+#CHECK:    and     $3, $15, $15       # encoding: [0x01,0xef,0x18,0x24]
+    abs.s      $f6,FPU_MASK
+    and $3,$t7,STORE_MASK
diff --git a/test/MC/Mips/mips_gprel16.ll b/test/MC/Mips/mips_gprel16.ll
new file mode 100644
index 000000000000..b5a282de560b
--- /dev/null
+++ b/test/MC/Mips/mips_gprel16.ll
@@ -0,0 +1,33 @@
+; This addresses bug 14456. We were not writing
+; out the addend to the gprel16 relocation. The
+; addend is stored in the instruction immediate 
+; field.
+;llc gprel16.ll -o gprel16.o -mcpu=mips32r2 -march=mipsel -filetype=obj -relocation-model=static
+
+; RUN: llc -mcpu=mips32r2 -march=mipsel -filetype=obj -relocation-model=static %s -o - \
+; RUN: | llvm-objdump -disassemble -mattr +mips32r2 - \
+; RUN: | FileCheck %s
+
+target triple = "mipsel-sde--elf-gcc"
+
+@var1 = internal global i32 0, align 4
+@var2 = internal global i32 0, align 4
+
+define i32 @testvar1() nounwind {
+entry:
+; CHECK: lw ${{[0-9]+}}, 0($gp)
+  %0 = load i32* @var1, align 4
+  %tobool = icmp ne i32 %0, 0
+  %cond = select i1 %tobool, i32 1, i32 0
+  ret i32 %cond
+}
+
+define i32 @testvar2() nounwind {
+entry:
+; CHECK: lw ${{[0-9]+}}, 4($gp)
+  %0 = load i32* @var2, align 4
+  %tobool = icmp ne i32 %0, 0
+  %cond = select i1 %tobool, i32 1, i32 0
+  ret i32 %cond
+}
+
diff --git a/test/MC/Mips/nabi-regs.s b/test/MC/Mips/nabi-regs.s
new file mode 100644
index 000000000000..9371208a2a9e
--- /dev/null
+++ b/test/MC/Mips/nabi-regs.s
@@ -0,0 +1,36 @@
+# OABI (o32, o64) have a different symbolic register
+# set for the A and T registers because the NABI allows
+# for 4 more register parameters (A registers) offsetting
+# the T registers.
+#
+# For now just check N64
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mcpu=mips64r2 -arch=mips64 | \
+# RUN: FileCheck %s
+
+# CHECK: .section    __TEXT,__text,regular,pure_instructions
+    .text
+foo:
+
+# CHECK: add    $16, $16, $4            # encoding: [0x02,0x04,0x80,0x20]
+    add $s0,$s0,$a0
+# CHECK: add    $16, $16, $6            # encoding: [0x02,0x06,0x80,0x20]
+    add $s0,$s0,$a2
+# CHECK: add    $16, $16, $7            # encoding: [0x02,0x07,0x80,0x20]
+    add $s0,$s0,$a3
+# CHECK: add    $16, $16, $8            # encoding: [0x02,0x08,0x80,0x20]
+    add $s0,$s0,$a4
+# CHECK: add    $16, $16, $9            # encoding: [0x02,0x09,0x80,0x20]
+    add $s0,$s0,$a5
+# CHECK: add    $16, $16, $10           # encoding: [0x02,0x0a,0x80,0x20]
+    add $s0,$s0,$a6
+# CHECK: add    $16, $16, $11           # encoding: [0x02,0x0b,0x80,0x20]
+    add $s0,$s0,$a7
+# CHECK: add    $16, $16, $12           # encoding: [0x02,0x0c,0x80,0x20]
+    add $s0,$s0,$t0
+# CHECK: add    $16, $16, $13           # encoding: [0x02,0x0d,0x80,0x20]
+    add $s0,$s0,$t1
+# CHECK: add    $16, $16, $14           # encoding: [0x02,0x0e,0x80,0x20]
+    add $s0,$s0,$t2
+# CHECK: add    $16, $16, $15           # encoding: [0x02,0x0f,0x80,0x20]
+    add $s0,$s0,$t3
diff --git a/test/MC/Mips/set-at-directive.s b/test/MC/Mips/set-at-directive.s
new file mode 100644
index 000000000000..98a3a35b5428
--- /dev/null
+++ b/test/MC/Mips/set-at-directive.s
@@ -0,0 +1,132 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for ".set at" and set the correct value.
+
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+    .text
+foo:
+# CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
+    .set    at=$1
+    jr    $at
+    nop
+# CHECK:   jr    $2                      # encoding: [0x08,0x00,0x40,0x00]
+    .set    at=$2
+    jr    $at
+    nop
+# CHECK:   jr    $3                      # encoding: [0x08,0x00,0x60,0x00]
+    .set    at=$3
+    jr    $at
+    nop
+# CHECK:   jr    $4                      # encoding: [0x08,0x00,0x80,0x00]
+    .set    at=$a0
+    jr    $at
+    nop
+# CHECK:   jr    $5                      # encoding: [0x08,0x00,0xa0,0x00]
+    .set    at=$a1
+    jr    $at
+    nop
+# CHECK:   jr    $6                      # encoding: [0x08,0x00,0xc0,0x00]
+    .set    at=$a2
+    jr    $at
+    nop
+# CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+    .set    at=$a3
+    jr    $at
+    nop
+# CHECK:   jr    $8                      # encoding: [0x08,0x00,0x00,0x01]
+    .set    at=$8
+    jr    $at
+    nop
+# CHECK:   jr    $9                      # encoding: [0x08,0x00,0x20,0x01]
+    .set    at=$9
+    jr    $at
+    nop
+# CHECK:   jr    $10                     # encoding: [0x08,0x00,0x40,0x01]
+    .set    at=$10
+    jr    $at
+    nop
+# CHECK:   jr    $11                     # encoding: [0x08,0x00,0x60,0x01]
+    .set    at=$11
+    jr    $at
+    nop
+# CHECK:   jr    $12                     # encoding: [0x08,0x00,0x80,0x01]
+    .set    at=$12
+    jr    $at
+    nop
+# CHECK:   jr    $13                     # encoding: [0x08,0x00,0xa0,0x01]
+    .set    at=$13
+    jr    $at
+    nop
+# CHECK:   jr    $14                     # encoding: [0x08,0x00,0xc0,0x01]
+    .set    at=$14
+    jr    $at
+    nop
+# CHECK:   jr    $15                     # encoding: [0x08,0x00,0xe0,0x01]
+    .set    at=$15
+    jr    $at
+    nop
+# CHECK:   jr    $16                     # encoding: [0x08,0x00,0x00,0x02]
+    .set    at=$s0
+    jr    $at
+    nop
+# CHECK:   jr    $17                     # encoding: [0x08,0x00,0x20,0x02]
+    .set    at=$s1
+    jr    $at
+    nop
+# CHECK:   jr    $18                     # encoding: [0x08,0x00,0x40,0x02]
+    .set    at=$s2
+    jr    $at
+    nop
+# CHECK:   jr    $19                     # encoding: [0x08,0x00,0x60,0x02]
+    .set    at=$s3
+    jr    $at
+    nop
+# CHECK:   jr    $20                     # encoding: [0x08,0x00,0x80,0x02]
+    .set    at=$s4
+    jr    $at
+    nop
+# CHECK:   jr    $21                     # encoding: [0x08,0x00,0xa0,0x02]
+    .set    at=$s5
+    jr    $at
+    nop
+# CHECK:   jr    $22                     # encoding: [0x08,0x00,0xc0,0x02]
+    .set    at=$s6
+    jr    $at
+    nop
+# CHECK:   jr    $23                     # encoding: [0x08,0x00,0xe0,0x02]
+    .set    at=$s7
+    jr    $at
+    nop
+# CHECK:   jr    $24                     # encoding: [0x08,0x00,0x00,0x03]
+    .set    at=$24
+    jr    $at
+    nop
+# CHECK:   jr    $25                     # encoding: [0x08,0x00,0x20,0x03]
+    .set    at=$25
+    jr    $at
+    nop
+# CHECK:   jr    $26                     # encoding: [0x08,0x00,0x40,0x03]
+    .set    at=$26
+    jr    $at
+    nop
+# CHECK:   jr    $27                     # encoding: [0x08,0x00,0x60,0x03]
+    .set    at=$27
+    jr    $at
+    nop
+# CHECK:   jr    $gp                     # encoding: [0x08,0x00,0x80,0x03]
+    .set    at=$gp
+    jr    $at
+    nop
+# CHECK:   jr    $fp                     # encoding: [0x08,0x00,0xc0,0x03]
+    .set    at=$fp
+    jr    $at
+    nop
+# CHECK:   jr    $sp                     # encoding: [0x08,0x00,0xa0,0x03]
+    .set    at=$sp
+    jr    $at
+    nop
+# CHECK:   jr    $ra                     # encoding: [0x08,0x00,0xe0,0x03]
+    .set    at=$ra
+    jr    $at
+    nop
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.ll b/test/MC/PowerPC/ppc64-initial-cfa.ll
index 3936cf2e81e5..16236c9c650d 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.ll
+++ b/test/MC/PowerPC/ppc64-initial-cfa.ll
@@ -1,41 +1,78 @@
-;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \
-;; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=static %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s -check-prefix=STATIC
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=pic %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s -check-prefix=PIC
 
-;; FIXME: this file should be in .s form, change when asm parser is available.
+; FIXME: this file should be in .s form, change when asm parser is available.
 
 define void @f() {
 entry:
   ret void
 }
 
-;; CHECK:      ('sh_name', 0x{{.*}}) # '.eh_frame'
-;; CHECK-NEXT: ('sh_type', 0x00000001)
-;; CHECK-NEXT: ('sh_flags', 0x0000000000000002)
-;; CHECK-NEXT: ('sh_addr', 0x{{.*}})
-;; CHECK-NEXT: ('sh_offset', 0x{{.*}})
-;; CHECK-NEXT: ('sh_size', 0x0000000000000030)
-;; CHECK-NEXT: ('sh_link', 0x00000000)
-;; CHECK-NEXT: ('sh_info', 0x00000000)
-;; CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
-;; CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-;; CHECK-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 000c0100 00000018 00000018 00000000 00000000 00000000 00000010 00000000')
-
-;; CHECK:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
-;; CHECK-NEXT: ('sh_type', 0x00000004)
-;; CHECK-NEXT: ('sh_flags', 0x0000000000000000)
-;; CHECK-NEXT: ('sh_addr', 0x{{.*}})
-;; CHECK-NEXT: ('sh_offset', 0x{{.*}})
-;; CHECK-NEXT: ('sh_size', 0x0000000000000018)
-;; CHECK-NEXT: ('sh_link', 0x{{.*}})
-;; CHECK-NEXT: ('sh_info', 0x{{.*}})
-;; CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
-;; CHECK-NEXT: ('sh_entsize', 0x0000000000000018)
-;; CHECK-NEXT: ('_relocations', [
-;; CHECK-NEXT:  # Relocation 0
-;; CHECK-NEXT:  (('r_offset', 0x000000000000001c)
-;; CHECK-NEXT:   ('r_sym', 0x{{.*}})
-;; CHECK-NEXT:   ('r_type', 0x00000026)
-;; CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-;; CHECK-NEXT:  ),
-;; CHECK-NEXT: ])
+; STATIC:      ('sh_name', 0x{{.*}}) # '.eh_frame'
+; STATIC-NEXT: ('sh_type', 0x00000001)
+; STATIC-NEXT: ('sh_flags', 0x0000000000000002)
+; STATIC-NEXT: ('sh_addr', 0x{{.*}})
+; STATIC-NEXT: ('sh_offset', 0x{{.*}})
+; STATIC-NEXT: ('sh_size', 0x0000000000000028)
+; STATIC-NEXT: ('sh_link', 0x00000000)
+; STATIC-NEXT: ('sh_info', 0x00000000)
+; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
+; STATIC-NEXT: ('sh_entsize', 0x0000000000000000)
+; STATIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
 
+; STATIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
+; STATIC-NEXT: ('sh_type', 0x00000004)
+; STATIC-NEXT: ('sh_flags', 0x0000000000000000)
+; STATIC-NEXT: ('sh_addr', 0x{{.*}})
+; STATIC-NEXT: ('sh_offset', 0x{{.*}})
+; STATIC-NEXT: ('sh_size', 0x0000000000000018)
+; STATIC-NEXT: ('sh_link', 0x{{.*}})
+; STATIC-NEXT: ('sh_info', 0x{{.*}})
+; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
+; STATIC-NEXT: ('sh_entsize', 0x0000000000000018)
+; STATIC-NEXT: ('_relocations', [
+
+; Static build should create R_PPC64_REL32 relocations
+; STATIC-NEXT:  # Relocation 0
+; STATIC-NEXT:  (('r_offset', 0x000000000000001c)
+; STATIC-NEXT:   ('r_sym', 0x{{.*}})
+; STATIC-NEXT:   ('r_type', 0x0000001a)
+; STATIC-NEXT:   ('r_addend', 0x0000000000000000)
+; STATIC-NEXT:  ),
+; STATIC-NEXT: ])
+
+
+; PIC:      ('sh_name', 0x{{.*}}) # '.eh_frame'
+; PIC-NEXT: ('sh_type', 0x00000001)
+; PIC-NEXT: ('sh_flags', 0x0000000000000002)
+; PIC-NEXT: ('sh_addr', 0x{{.*}})
+; PIC-NEXT: ('sh_offset', 0x{{.*}})
+; PIC-NEXT: ('sh_size', 0x0000000000000028)
+; PIC-NEXT: ('sh_link', 0x00000000)
+; PIC-NEXT: ('sh_info', 0x00000000)
+; PIC-NEXT: ('sh_addralign', 0x0000000000000008)
+; PIC-NEXT: ('sh_entsize', 0x0000000000000000)
+; PIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
+
+; PIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
+; PIC-NEXT: ('sh_type', 0x00000004)
+; PIC-NEXT: ('sh_flags', 0x0000000000000000)
+; PIC-NEXT: ('sh_addr', 0x{{.*}})
+; PIC-NEXT: ('sh_offset', 0x{{.*}})
+; PIC-NEXT: ('sh_size', 0x0000000000000018)
+; PIC-NEXT: ('sh_link', 0x{{.*}})
+; PIC-NEXT: ('sh_info', 0x{{.*}})
+; PIC-NEXT: ('sh_addralign', 0x0000000000000008)
+; PIC-NEXT: ('sh_entsize', 0x0000000000000018)
+; PIC-NEXT: ('_relocations', [
+
+; PIC build should create R_PPC64_REL32 relocations
+; PIC-NEXT:  # Relocation 0
+; PIC-NEXT:  (('r_offset', 0x000000000000001c)
+; PIC-NEXT:   ('r_sym', 0x{{.*}})
+; PIC-NEXT:   ('r_type', 0x0000001a)
+; PIC-NEXT:   ('r_addend', 0x0000000000000000)
+; PIC-NEXT:  ),
+; PIC-NEXT: ])
diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll
index 5996af84f448..4919e91400ba 100644
--- a/test/MC/PowerPC/ppc64-relocs-01.ll
+++ b/test/MC/PowerPC/ppc64-relocs-01.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3  \
+;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 -code-model=small  \
 ;; RUN:  -filetype=obj %s -o - | \
 ;; RUN: elf-dump --dump-section-data | FileCheck %s
 
diff --git a/test/MC/X86/AlignedBundling/align-mode-argument-error.s b/test/MC/X86/AlignedBundling/align-mode-argument-error.s
new file mode 100644
index 000000000000..b4ce0a9d103a
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/align-mode-argument-error.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# Missing .bundle_align_mode argument
+# CHECK: error: unknown token
+
+  .bundle_align_mode
+  imull $17, %ebx, %ebp
+
diff --git a/test/MC/X86/AlignedBundling/asm-printing-bundle-directives.s b/test/MC/X86/AlignedBundling/asm-printing-bundle-directives.s
new file mode 100644
index 000000000000..387e0fe59bf2
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/asm-printing-bundle-directives.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# Just a simple test for the assembly emitter - making sure it emits back the
+# bundling directives.
+
+  .text
+foo:
+  .bundle_align_mode 4
+# CHECK:      .bundle_align_mode 4
+  pushq   %rbp
+  .bundle_lock
+# CHECK: .bundle_lock
+  cmpl    %r14d, %ebp
+  jle     .L_ELSE
+  .bundle_unlock
+# CHECK: .bundle_unlock
+  .bundle_lock align_to_end
+# CHECK: .bundle_lock align_to_end
+  add     %rbx, %rdx
+  .bundle_unlock
+
+
diff --git a/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s b/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s
new file mode 100644
index 000000000000..fbf5b5294460
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/autogen-inst-offset-align-to-end.s
@@ -0,0 +1,2899 @@
+# RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -triple i386 -disassemble -no-show-raw-insn - | FileCheck %s
+
+# !!! This test is auto-generated from utils/testgen/mc-bundling-x86-gen.py !!!
+#     It tests that bundle-aligned grouping works correctly in MC. Read the
+#     source of the script for more details.
+
+  .text
+  .bundle_align_mode 4
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 0: nop
+# CHECK: f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 21: nop
+# CHECK: 2f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 42: nop
+# CHECK: 4f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 63: nop
+# CHECK: 6f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 84: nop
+# CHECK: 8f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a5: nop
+# CHECK: af: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c6: nop
+# CHECK: cf: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e7: nop
+# CHECK: ef: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 108: nop
+# CHECK: 10f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 129: nop
+# CHECK: 12f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14a: nop
+# CHECK: 14f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16b: nop
+# CHECK: 16f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18c: nop
+# CHECK: 18f: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ad: nop
+# CHECK: 1af: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ce: nop
+# CHECK: 1cf: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ef: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 200: nop
+# CHECK: 20e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 221: nop
+# CHECK: 22e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 242: nop
+# CHECK: 24e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 263: nop
+# CHECK: 26e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 284: nop
+# CHECK: 28e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 2a5: nop
+# CHECK: 2ae: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 2c6: nop
+# CHECK: 2ce: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 2e7: nop
+# CHECK: 2ee: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 308: nop
+# CHECK: 30e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 329: nop
+# CHECK: 32e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 34a: nop
+# CHECK: 34e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 36b: nop
+# CHECK: 36e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 38c: nop
+# CHECK: 38e: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 3ad: nop
+# CHECK: 3ae: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 3ce: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 3ef: nop
+# CHECK: 3f0: nop
+# CHECK: 3fe: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 400: nop
+# CHECK: 40d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 421: nop
+# CHECK: 42d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 442: nop
+# CHECK: 44d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 463: nop
+# CHECK: 46d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 484: nop
+# CHECK: 48d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 4a5: nop
+# CHECK: 4ad: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 4c6: nop
+# CHECK: 4cd: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 4e7: nop
+# CHECK: 4ed: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 508: nop
+# CHECK: 50d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 529: nop
+# CHECK: 52d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 54a: nop
+# CHECK: 54d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 56b: nop
+# CHECK: 56d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 58c: nop
+# CHECK: 58d: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 5ad: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 5ce: nop
+# CHECK: 5d0: nop
+# CHECK: 5dd: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 5ef: nop
+# CHECK: 5f0: nop
+# CHECK: 5fd: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 600: nop
+# CHECK: 60c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 621: nop
+# CHECK: 62c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 642: nop
+# CHECK: 64c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 663: nop
+# CHECK: 66c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 684: nop
+# CHECK: 68c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 6a5: nop
+# CHECK: 6ac: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 6c6: nop
+# CHECK: 6cc: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 6e7: nop
+# CHECK: 6ec: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 708: nop
+# CHECK: 70c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 729: nop
+# CHECK: 72c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 74a: nop
+# CHECK: 74c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 76b: nop
+# CHECK: 76c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 78c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 7ad: nop
+# CHECK: 7b0: nop
+# CHECK: 7bc: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 7ce: nop
+# CHECK: 7d0: nop
+# CHECK: 7dc: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 7ef: nop
+# CHECK: 7f0: nop
+# CHECK: 7fc: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 800: nop
+# CHECK: 80b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 821: nop
+# CHECK: 82b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 842: nop
+# CHECK: 84b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 863: nop
+# CHECK: 86b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 884: nop
+# CHECK: 88b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 8a5: nop
+# CHECK: 8ab: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 8c6: nop
+# CHECK: 8cb: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 8e7: nop
+# CHECK: 8eb: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 908: nop
+# CHECK: 90b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 929: nop
+# CHECK: 92b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 94a: nop
+# CHECK: 94b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 96b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 98c: nop
+# CHECK: 990: nop
+# CHECK: 99b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 9ad: nop
+# CHECK: 9b0: nop
+# CHECK: 9bb: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 9ce: nop
+# CHECK: 9d0: nop
+# CHECK: 9db: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 9ef: nop
+# CHECK: 9f0: nop
+# CHECK: 9fb: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a00: nop
+# CHECK: a0a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a21: nop
+# CHECK: a2a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a42: nop
+# CHECK: a4a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a63: nop
+# CHECK: a6a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a84: nop
+# CHECK: a8a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: aa5: nop
+# CHECK: aaa: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ac6: nop
+# CHECK: aca: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ae7: nop
+# CHECK: aea: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b08: nop
+# CHECK: b0a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b29: nop
+# CHECK: b2a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b4a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b6b: nop
+# CHECK: b70: nop
+# CHECK: b7a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b8c: nop
+# CHECK: b90: nop
+# CHECK: b9a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: bad: nop
+# CHECK: bb0: nop
+# CHECK: bba: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: bce: nop
+# CHECK: bd0: nop
+# CHECK: bda: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: bef: nop
+# CHECK: bf0: nop
+# CHECK: bfa: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c00: nop
+# CHECK: c09: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c21: nop
+# CHECK: c29: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c42: nop
+# CHECK: c49: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c63: nop
+# CHECK: c69: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c84: nop
+# CHECK: c89: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ca5: nop
+# CHECK: ca9: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: cc6: nop
+# CHECK: cc9: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ce7: nop
+# CHECK: ce9: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d08: nop
+# CHECK: d09: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d29: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d4a: nop
+# CHECK: d50: nop
+# CHECK: d59: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d6b: nop
+# CHECK: d70: nop
+# CHECK: d79: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d8c: nop
+# CHECK: d90: nop
+# CHECK: d99: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: dad: nop
+# CHECK: db0: nop
+# CHECK: db9: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: dce: nop
+# CHECK: dd0: nop
+# CHECK: dd9: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: def: nop
+# CHECK: df0: nop
+# CHECK: df9: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e00: nop
+# CHECK: e08: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e21: nop
+# CHECK: e28: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e42: nop
+# CHECK: e48: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e63: nop
+# CHECK: e68: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e84: nop
+# CHECK: e88: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ea5: nop
+# CHECK: ea8: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ec6: nop
+# CHECK: ec8: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ee7: nop
+# CHECK: ee8: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f08: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f29: nop
+# CHECK: f30: nop
+# CHECK: f38: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f4a: nop
+# CHECK: f50: nop
+# CHECK: f58: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f6b: nop
+# CHECK: f70: nop
+# CHECK: f78: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f8c: nop
+# CHECK: f90: nop
+# CHECK: f98: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: fad: nop
+# CHECK: fb0: nop
+# CHECK: fb8: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: fce: nop
+# CHECK: fd0: nop
+# CHECK: fd8: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: fef: nop
+# CHECK: ff0: nop
+# CHECK: ff8: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1000: nop
+# CHECK: 1007: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1021: nop
+# CHECK: 1027: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1042: nop
+# CHECK: 1047: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1063: nop
+# CHECK: 1067: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1084: nop
+# CHECK: 1087: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 10a5: nop
+# CHECK: 10a7: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 10c6: nop
+# CHECK: 10c7: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 10e7: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1108: nop
+# CHECK: 1110: nop
+# CHECK: 1117: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1129: nop
+# CHECK: 1130: nop
+# CHECK: 1137: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 114a: nop
+# CHECK: 1150: nop
+# CHECK: 1157: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 116b: nop
+# CHECK: 1170: nop
+# CHECK: 1177: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 118c: nop
+# CHECK: 1190: nop
+# CHECK: 1197: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 11ad: nop
+# CHECK: 11b0: nop
+# CHECK: 11b7: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 11ce: nop
+# CHECK: 11d0: nop
+# CHECK: 11d7: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 11ef: nop
+# CHECK: 11f0: nop
+# CHECK: 11f7: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1200: nop
+# CHECK: 1206: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1221: nop
+# CHECK: 1226: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1242: nop
+# CHECK: 1246: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1263: nop
+# CHECK: 1266: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1284: nop
+# CHECK: 1286: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 12a5: nop
+# CHECK: 12a6: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 12c6: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 12e7: nop
+# CHECK: 12f0: nop
+# CHECK: 12f6: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1308: nop
+# CHECK: 1310: nop
+# CHECK: 1316: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1329: nop
+# CHECK: 1330: nop
+# CHECK: 1336: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 134a: nop
+# CHECK: 1350: nop
+# CHECK: 1356: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 136b: nop
+# CHECK: 1370: nop
+# CHECK: 1376: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 138c: nop
+# CHECK: 1390: nop
+# CHECK: 1396: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 13ad: nop
+# CHECK: 13b0: nop
+# CHECK: 13b6: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 13ce: nop
+# CHECK: 13d0: nop
+# CHECK: 13d6: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 13ef: nop
+# CHECK: 13f0: nop
+# CHECK: 13f6: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1400: nop
+# CHECK: 1405: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1421: nop
+# CHECK: 1425: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1442: nop
+# CHECK: 1445: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1463: nop
+# CHECK: 1465: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1484: nop
+# CHECK: 1485: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14a5: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14c6: nop
+# CHECK: 14d0: nop
+# CHECK: 14d5: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14e7: nop
+# CHECK: 14f0: nop
+# CHECK: 14f5: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1508: nop
+# CHECK: 1510: nop
+# CHECK: 1515: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1529: nop
+# CHECK: 1530: nop
+# CHECK: 1535: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 154a: nop
+# CHECK: 1550: nop
+# CHECK: 1555: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 156b: nop
+# CHECK: 1570: nop
+# CHECK: 1575: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 158c: nop
+# CHECK: 1590: nop
+# CHECK: 1595: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 15ad: nop
+# CHECK: 15b0: nop
+# CHECK: 15b5: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 15ce: nop
+# CHECK: 15d0: nop
+# CHECK: 15d5: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 15ef: nop
+# CHECK: 15f0: nop
+# CHECK: 15f5: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1600: nop
+# CHECK: 1604: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1621: nop
+# CHECK: 1624: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1642: nop
+# CHECK: 1644: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1663: nop
+# CHECK: 1664: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1684: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16a5: nop
+# CHECK: 16b0: nop
+# CHECK: 16b4: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16c6: nop
+# CHECK: 16d0: nop
+# CHECK: 16d4: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16e7: nop
+# CHECK: 16f0: nop
+# CHECK: 16f4: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1708: nop
+# CHECK: 1710: nop
+# CHECK: 1714: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1729: nop
+# CHECK: 1730: nop
+# CHECK: 1734: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 174a: nop
+# CHECK: 1750: nop
+# CHECK: 1754: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 176b: nop
+# CHECK: 1770: nop
+# CHECK: 1774: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 178c: nop
+# CHECK: 1790: nop
+# CHECK: 1794: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 17ad: nop
+# CHECK: 17b0: nop
+# CHECK: 17b4: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 17ce: nop
+# CHECK: 17d0: nop
+# CHECK: 17d4: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 17ef: nop
+# CHECK: 17f0: nop
+# CHECK: 17f4: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1800: nop
+# CHECK: 1803: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1821: nop
+# CHECK: 1823: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1842: nop
+# CHECK: 1843: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1863: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1884: nop
+# CHECK: 1890: nop
+# CHECK: 1893: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18a5: nop
+# CHECK: 18b0: nop
+# CHECK: 18b3: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18c6: nop
+# CHECK: 18d0: nop
+# CHECK: 18d3: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18e7: nop
+# CHECK: 18f0: nop
+# CHECK: 18f3: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1908: nop
+# CHECK: 1910: nop
+# CHECK: 1913: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1929: nop
+# CHECK: 1930: nop
+# CHECK: 1933: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 194a: nop
+# CHECK: 1950: nop
+# CHECK: 1953: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 196b: nop
+# CHECK: 1970: nop
+# CHECK: 1973: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 198c: nop
+# CHECK: 1990: nop
+# CHECK: 1993: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 19ad: nop
+# CHECK: 19b0: nop
+# CHECK: 19b3: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 19ce: nop
+# CHECK: 19d0: nop
+# CHECK: 19d3: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 19ef: nop
+# CHECK: 19f0: nop
+# CHECK: 19f3: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a00: nop
+# CHECK: 1a02: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a21: nop
+# CHECK: 1a22: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a42: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a63: nop
+# CHECK: 1a70: nop
+# CHECK: 1a72: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a84: nop
+# CHECK: 1a90: nop
+# CHECK: 1a92: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1aa5: nop
+# CHECK: 1ab0: nop
+# CHECK: 1ab2: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ac6: nop
+# CHECK: 1ad0: nop
+# CHECK: 1ad2: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ae7: nop
+# CHECK: 1af0: nop
+# CHECK: 1af2: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b08: nop
+# CHECK: 1b10: nop
+# CHECK: 1b12: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b29: nop
+# CHECK: 1b30: nop
+# CHECK: 1b32: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b4a: nop
+# CHECK: 1b50: nop
+# CHECK: 1b52: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b6b: nop
+# CHECK: 1b70: nop
+# CHECK: 1b72: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b8c: nop
+# CHECK: 1b90: nop
+# CHECK: 1b92: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1bad: nop
+# CHECK: 1bb0: nop
+# CHECK: 1bb2: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1bce: nop
+# CHECK: 1bd0: nop
+# CHECK: 1bd2: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1bef: nop
+# CHECK: 1bf0: nop
+# CHECK: 1bf2: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c00: nop
+# CHECK: 1c01: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c21: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c42: nop
+# CHECK: 1c50: nop
+# CHECK: 1c51: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c63: nop
+# CHECK: 1c70: nop
+# CHECK: 1c71: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c84: nop
+# CHECK: 1c90: nop
+# CHECK: 1c91: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ca5: nop
+# CHECK: 1cb0: nop
+# CHECK: 1cb1: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1cc6: nop
+# CHECK: 1cd0: nop
+# CHECK: 1cd1: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ce7: nop
+# CHECK: 1cf0: nop
+# CHECK: 1cf1: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d08: nop
+# CHECK: 1d10: nop
+# CHECK: 1d11: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d29: nop
+# CHECK: 1d30: nop
+# CHECK: 1d31: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d4a: nop
+# CHECK: 1d50: nop
+# CHECK: 1d51: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d6b: nop
+# CHECK: 1d70: nop
+# CHECK: 1d71: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d8c: nop
+# CHECK: 1d90: nop
+# CHECK: 1d91: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1dad: nop
+# CHECK: 1db0: nop
+# CHECK: 1db1: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1dce: nop
+# CHECK: 1dd0: nop
+# CHECK: 1dd1: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1def: nop
+# CHECK: 1df0: nop
+# CHECK: 1df1: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_0:
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e00: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e21: nop
+# CHECK: 1e30: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e42: nop
+# CHECK: 1e50: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e63: nop
+# CHECK: 1e70: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e84: nop
+# CHECK: 1e90: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ea5: nop
+# CHECK: 1eb0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ec6: nop
+# CHECK: 1ed0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ee7: nop
+# CHECK: 1ef0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f08: nop
+# CHECK: 1f10: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f29: nop
+# CHECK: 1f30: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f4a: nop
+# CHECK: 1f50: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f6b: nop
+# CHECK: 1f70: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f8c: nop
+# CHECK: 1f90: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1fad: nop
+# CHECK: 1fb0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1fce: nop
+# CHECK: 1fd0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock align_to_end
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1fef: nop
+# CHECK: 1ff0: incl
+
diff --git a/test/MC/X86/AlignedBundling/autogen-inst-offset-padding.s b/test/MC/X86/AlignedBundling/autogen-inst-offset-padding.s
new file mode 100644
index 000000000000..12786b34af72
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/autogen-inst-offset-padding.s
@@ -0,0 +1,2674 @@
+# RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -triple i386 -disassemble -no-show-raw-insn - | FileCheck %s
+
+# !!! This test is auto-generated from utils/testgen/mc-bundling-x86-gen.py !!!
+#     It tests that bundle-aligned grouping works correctly in MC. Read the
+#     source of the script for more details.
+
+  .text
+  .bundle_align_mode 4
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_0:
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 0: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 21: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 42: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 63: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 84: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a5: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c6: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e7: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 108: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 129: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14a: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16b: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18c: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ad: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ce: incl
+
+  .align 32, 0x90
+INSTRLEN_1_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 1
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ef: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_0:
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 200: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 221: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 242: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 263: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 284: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 2a5: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 2c6: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 2e7: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 308: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 329: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 34a: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 36b: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 38c: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 3ad: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 3ce: incl
+
+  .align 32, 0x90
+INSTRLEN_2_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 2
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 3ef: nop
+# CHECK: 3f0: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_0:
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 400: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 421: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 442: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 463: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 484: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 4a5: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 4c6: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 4e7: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 508: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 529: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 54a: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 56b: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 58c: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 5ad: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 5ce: nop
+# CHECK: 5d0: incl
+
+  .align 32, 0x90
+INSTRLEN_3_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 3
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 5ef: nop
+# CHECK: 5f0: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_0:
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 600: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 621: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 642: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 663: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 684: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 6a5: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 6c6: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 6e7: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 708: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 729: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 74a: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 76b: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 78c: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 7ad: nop
+# CHECK: 7b0: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 7ce: nop
+# CHECK: 7d0: incl
+
+  .align 32, 0x90
+INSTRLEN_4_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 4
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 7ef: nop
+# CHECK: 7f0: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_0:
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 800: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 821: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 842: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 863: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 884: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 8a5: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 8c6: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 8e7: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 908: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 929: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 94a: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 96b: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 98c: nop
+# CHECK: 990: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 9ad: nop
+# CHECK: 9b0: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 9ce: nop
+# CHECK: 9d0: incl
+
+  .align 32, 0x90
+INSTRLEN_5_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 5
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 9ef: nop
+# CHECK: 9f0: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_0:
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a00: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a21: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a42: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a63: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: a84: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: aa5: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ac6: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ae7: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b08: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b29: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b4a: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b6b: nop
+# CHECK: b70: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: b8c: nop
+# CHECK: b90: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: bad: nop
+# CHECK: bb0: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: bce: nop
+# CHECK: bd0: incl
+
+  .align 32, 0x90
+INSTRLEN_6_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 6
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: bef: nop
+# CHECK: bf0: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_0:
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c00: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c21: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c42: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c63: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: c84: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ca5: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: cc6: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ce7: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d08: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d29: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d4a: nop
+# CHECK: d50: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d6b: nop
+# CHECK: d70: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: d8c: nop
+# CHECK: d90: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: dad: nop
+# CHECK: db0: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: dce: nop
+# CHECK: dd0: incl
+
+  .align 32, 0x90
+INSTRLEN_7_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 7
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: def: nop
+# CHECK: df0: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_0:
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e00: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e21: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e42: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e63: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: e84: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ea5: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ec6: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: ee7: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f08: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f29: nop
+# CHECK: f30: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f4a: nop
+# CHECK: f50: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f6b: nop
+# CHECK: f70: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: f8c: nop
+# CHECK: f90: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: fad: nop
+# CHECK: fb0: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: fce: nop
+# CHECK: fd0: incl
+
+  .align 32, 0x90
+INSTRLEN_8_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 8
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: fef: nop
+# CHECK: ff0: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_0:
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1000: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1021: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1042: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1063: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1084: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 10a5: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 10c6: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 10e7: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1108: nop
+# CHECK: 1110: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1129: nop
+# CHECK: 1130: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 114a: nop
+# CHECK: 1150: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 116b: nop
+# CHECK: 1170: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 118c: nop
+# CHECK: 1190: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 11ad: nop
+# CHECK: 11b0: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 11ce: nop
+# CHECK: 11d0: incl
+
+  .align 32, 0x90
+INSTRLEN_9_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 9
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 11ef: nop
+# CHECK: 11f0: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_0:
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1200: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1221: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1242: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1263: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1284: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 12a5: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 12c6: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 12e7: nop
+# CHECK: 12f0: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1308: nop
+# CHECK: 1310: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1329: nop
+# CHECK: 1330: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 134a: nop
+# CHECK: 1350: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 136b: nop
+# CHECK: 1370: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 138c: nop
+# CHECK: 1390: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 13ad: nop
+# CHECK: 13b0: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 13ce: nop
+# CHECK: 13d0: incl
+
+  .align 32, 0x90
+INSTRLEN_10_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 10
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 13ef: nop
+# CHECK: 13f0: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_0:
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1400: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1421: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1442: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1463: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1484: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14a5: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14c6: nop
+# CHECK: 14d0: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 14e7: nop
+# CHECK: 14f0: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1508: nop
+# CHECK: 1510: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1529: nop
+# CHECK: 1530: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 154a: nop
+# CHECK: 1550: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 156b: nop
+# CHECK: 1570: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 158c: nop
+# CHECK: 1590: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 15ad: nop
+# CHECK: 15b0: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 15ce: nop
+# CHECK: 15d0: incl
+
+  .align 32, 0x90
+INSTRLEN_11_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 11
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 15ef: nop
+# CHECK: 15f0: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_0:
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1600: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1621: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1642: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1663: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1684: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16a5: nop
+# CHECK: 16b0: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16c6: nop
+# CHECK: 16d0: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 16e7: nop
+# CHECK: 16f0: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1708: nop
+# CHECK: 1710: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1729: nop
+# CHECK: 1730: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 174a: nop
+# CHECK: 1750: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 176b: nop
+# CHECK: 1770: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 178c: nop
+# CHECK: 1790: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 17ad: nop
+# CHECK: 17b0: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 17ce: nop
+# CHECK: 17d0: incl
+
+  .align 32, 0x90
+INSTRLEN_12_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 12
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 17ef: nop
+# CHECK: 17f0: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_0:
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1800: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1821: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1842: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1863: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1884: nop
+# CHECK: 1890: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18a5: nop
+# CHECK: 18b0: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18c6: nop
+# CHECK: 18d0: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 18e7: nop
+# CHECK: 18f0: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1908: nop
+# CHECK: 1910: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1929: nop
+# CHECK: 1930: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 194a: nop
+# CHECK: 1950: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 196b: nop
+# CHECK: 1970: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 198c: nop
+# CHECK: 1990: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 19ad: nop
+# CHECK: 19b0: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 19ce: nop
+# CHECK: 19d0: incl
+
+  .align 32, 0x90
+INSTRLEN_13_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 13
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 19ef: nop
+# CHECK: 19f0: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_0:
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a00: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a21: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a42: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a63: nop
+# CHECK: 1a70: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1a84: nop
+# CHECK: 1a90: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1aa5: nop
+# CHECK: 1ab0: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ac6: nop
+# CHECK: 1ad0: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ae7: nop
+# CHECK: 1af0: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b08: nop
+# CHECK: 1b10: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b29: nop
+# CHECK: 1b30: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b4a: nop
+# CHECK: 1b50: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b6b: nop
+# CHECK: 1b70: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1b8c: nop
+# CHECK: 1b90: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1bad: nop
+# CHECK: 1bb0: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1bce: nop
+# CHECK: 1bd0: incl
+
+  .align 32, 0x90
+INSTRLEN_14_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 14
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1bef: nop
+# CHECK: 1bf0: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_0:
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c00: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c21: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c42: nop
+# CHECK: 1c50: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c63: nop
+# CHECK: 1c70: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1c84: nop
+# CHECK: 1c90: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ca5: nop
+# CHECK: 1cb0: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1cc6: nop
+# CHECK: 1cd0: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ce7: nop
+# CHECK: 1cf0: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d08: nop
+# CHECK: 1d10: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d29: nop
+# CHECK: 1d30: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d4a: nop
+# CHECK: 1d50: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d6b: nop
+# CHECK: 1d70: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1d8c: nop
+# CHECK: 1d90: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1dad: nop
+# CHECK: 1db0: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1dce: nop
+# CHECK: 1dd0: incl
+
+  .align 32, 0x90
+INSTRLEN_15_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 15
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1def: nop
+# CHECK: 1df0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_0:
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e00: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_1:
+  .fill 1, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e21: nop
+# CHECK: 1e30: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_2:
+  .fill 2, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e42: nop
+# CHECK: 1e50: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_3:
+  .fill 3, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e63: nop
+# CHECK: 1e70: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_4:
+  .fill 4, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1e84: nop
+# CHECK: 1e90: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_5:
+  .fill 5, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ea5: nop
+# CHECK: 1eb0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_6:
+  .fill 6, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ec6: nop
+# CHECK: 1ed0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_7:
+  .fill 7, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1ee7: nop
+# CHECK: 1ef0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_8:
+  .fill 8, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f08: nop
+# CHECK: 1f10: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_9:
+  .fill 9, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f29: nop
+# CHECK: 1f30: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_10:
+  .fill 10, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f4a: nop
+# CHECK: 1f50: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_11:
+  .fill 11, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f6b: nop
+# CHECK: 1f70: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_12:
+  .fill 12, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1f8c: nop
+# CHECK: 1f90: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_13:
+  .fill 13, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1fad: nop
+# CHECK: 1fb0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_14:
+  .fill 14, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1fce: nop
+# CHECK: 1fd0: incl
+
+  .align 32, 0x90
+INSTRLEN_16_OFFSET_15:
+  .fill 15, 1, 0x90
+  .bundle_lock
+  .rept 16
+  inc %eax
+  .endr
+  .bundle_unlock
+# CHECK: 1fef: nop
+# CHECK: 1ff0: incl
+
diff --git a/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s b/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
new file mode 100644
index 000000000000..722bf7b9227f
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/bundle-group-too-large-error.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# CHECK: ERROR: Fragment can't be larger than a bundle size
+
+  .text
+foo:
+  .bundle_align_mode 4
+  pushq   %rbp
+
+  .bundle_lock
+  pushq   %r14
+  callq   bar
+  callq   bar
+  callq   bar
+  callq   bar
+  .bundle_unlock
+
diff --git a/test/MC/X86/AlignedBundling/bundle-lock-option-error.s b/test/MC/X86/AlignedBundling/bundle-lock-option-error.s
new file mode 100644
index 000000000000..82c5d7cf0e7b
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/bundle-lock-option-error.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# Missing .bundle_align_mode argument
+# CHECK: error: invalid option
+
+  .bundle_align_mode 4
+  .bundle_lock 5
+  imull $17, %ebx, %ebp
+  .bundle_unlock
+
+
diff --git a/test/MC/X86/AlignedBundling/different-sections.s b/test/MC/X86/AlignedBundling/different-sections.s
new file mode 100644
index 000000000000..3e9fcf376d2d
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/different-sections.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test two different executable sections with bundling.
+
+  .bundle_align_mode 3
+  .section text1, "x"
+# CHECK: section text1
+  imull $17, %ebx, %ebp
+  imull $17, %ebx, %ebp
+
+  imull $17, %ebx, %ebp
+# CHECK:      6: nop
+# CHECK-NEXT: 8: imull
+
+  .section text2, "x"
+# CHECK: section text2
+  imull $17, %ebx, %ebp
+  imull $17, %ebx, %ebp
+
+  imull $17, %ebx, %ebp
+# CHECK:      6: nop
+# CHECK-NEXT: 8: imull
+
+
diff --git a/test/MC/X86/AlignedBundling/lit.local.cfg b/test/MC/X86/AlignedBundling/lit.local.cfg
new file mode 100644
index 000000000000..6c49f08b7496
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s b/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s
new file mode 100644
index 000000000000..d45a9b4a5dfb
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/lock-without-bundle-mode-error.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# .bundle_lock can't come without a .bundle_align_mode before it
+
+# CHECK: ERROR: .bundle_lock forbidden when bundling is disabled
+
+  imull $17, %ebx, %ebp
+  .bundle_lock
+
+
diff --git a/test/MC/X86/AlignedBundling/long-nop-pad.s b/test/MC/X86/AlignedBundling/long-nop-pad.s
new file mode 100644
index 000000000000..ea33e2889b9e
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/long-nop-pad.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test that long nops are generated for padding where possible.
+
+  .text
+foo:
+  .bundle_align_mode 5
+
+# This callq instruction is 5 bytes long
+  .bundle_lock align_to_end
+  callq   bar
+  .bundle_unlock
+# To align this group to a bundle end, we need a 15-byte NOP and a 12-byte NOP.
+# CHECK:        0:  nop
+# CHECK-NEXT:   f:  nop
+# CHECK-NEXT:   1b: callq
+
+# This push instruction is 1 byte long
+  .bundle_lock align_to_end
+  push %rax
+  .bundle_unlock
+# To align this group to a bundle end, we need two 15-byte NOPs, and a 1-byte.
+# CHECK:        20:  nop
+# CHECK-NEXT:   2f:  nop
+# CHECK-NEXT:   3e:  nop
+# CHECK-NEXT:   3f: pushq
diff --git a/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s b/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
new file mode 100644
index 000000000000..6ca4046f0c7b
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/pad-align-to-bundle-end.s
@@ -0,0 +1,33 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test some variations of padding to the end of a bundle.
+
+  .text
+foo:
+  .bundle_align_mode 4
+
+# Each of these callq instructions is 5 bytes long
+  callq   bar
+  callq   bar
+  .bundle_lock align_to_end
+  callq   bar
+  .bundle_unlock
+# To align this group to a bundle end, we need a 1-byte NOP.
+# CHECK:        a:  nop
+# CHECK-NEXT:   b: callq
+
+  callq   bar
+  callq   bar
+  .bundle_lock align_to_end
+  callq   bar
+  callq   bar
+  .bundle_unlock
+# Here we have to pad until the end of the *next* boundary because
+# otherwise the group crosses a boundary.
+# CHECK:      1a: nop
+# The nop sequence may be implemented as one instruction or many, but if
+# it's one instruction, that instruction cannot itself cross the boundary.
+# CHECK:      20: nop
+# CHECK-NEXT: 26: callq
+# CHECK-NEXT: 2b: callq
diff --git a/test/MC/X86/AlignedBundling/pad-bundle-groups.s b/test/MC/X86/AlignedBundling/pad-bundle-groups.s
new file mode 100644
index 000000000000..b65ee7a5cc74
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/pad-bundle-groups.s
@@ -0,0 +1,46 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test some variations of padding for bundle-locked groups.
+
+  .text
+foo:
+  .bundle_align_mode 4
+
+# Each of these callq instructions is 5 bytes long
+  callq   bar
+  callq   bar
+
+  .bundle_lock
+  callq   bar
+  callq   bar
+  .bundle_unlock
+# We'll need a 6-byte NOP before this group
+# CHECK:        a:  nop
+# CHECK-NEXT:   10: callq
+# CHECK-NEXT:   15: callq
+
+  .bundle_lock
+  callq   bar
+  callq   bar
+  .bundle_unlock
+# Same here
+# CHECK:        1a:  nop
+# CHECK-NEXT:   20: callq
+# CHECK-NEXT:   25: callq
+
+  .align 16, 0x90
+  callq   bar
+  .bundle_lock
+  callq   bar
+  callq   bar
+  callq   bar
+  .bundle_unlock
+# And here we'll need a 11-byte NOP
+# CHECK:        30: callq
+# CHECK:        35: nop
+# CHECK-NEXT:   40: callq
+# CHECK-NEXT:   45: callq
+
+
+
diff --git a/test/MC/X86/AlignedBundling/relax-at-bundle-end.s b/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
new file mode 100644
index 000000000000..ab4affbbeac8
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/relax-at-bundle-end.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test that an instruction near a bundle end gets properly padded
+# after it is relaxed.
+.text
+foo:
+        .bundle_align_mode 5
+        .rept 29
+        push %rax
+        .endr
+# CHECK: 1c: push
+# CHECK: 1d: nop
+# CHECK: 20: jne
+        jne 0x100
+
diff --git a/test/MC/X86/AlignedBundling/relax-in-bundle-group.s b/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
new file mode 100644
index 000000000000..0a99bb5ce563
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/relax-in-bundle-group.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble - | FileCheck %s
+
+# Test that instructions inside bundle-locked groups are relaxed even if their
+# fixup is short enough not to warrant relaxation on its own.
+
+  .text
+foo:
+  .bundle_align_mode 4
+  pushq   %rbp
+
+  movl    %edi, %ebx
+  callq   bar
+  movl    %eax, %r14d
+  imull   $17, %ebx, %ebp
+  movl    %ebx, %edi
+  callq   bar
+  cmpl    %r14d, %ebp
+  .bundle_lock
+
+  jle     .L_ELSE
+# This group would've started at 0x18 and is too long, so a chunky NOP padding
+# is inserted to push it to 0x20.
+# CHECK: 18: {{[a-f0-9 ]+}} nopl
+
+# The long encoding for JLE should be used here even though its target is close
+# CHECK-NEXT: 20: 0f 8e
+
+  addl    %ebp, %eax
+
+  jmp     .L_RET
+# Same for the JMP
+# CHECK: 28: e9
+
+  .bundle_unlock
+
+.L_ELSE:
+  imull   %ebx, %eax
+.L_RET:
+
+  popq    %rbx
+
diff --git a/test/MC/X86/AlignedBundling/single-inst-bundling.s b/test/MC/X86/AlignedBundling/single-inst-bundling.s
new file mode 100644
index 000000000000..c0275f4d1ecb
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/single-inst-bundling.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test simple NOP insertion for single instructions.
+
+  .text
+foo:
+  # Will be bundle-aligning to 16 byte boundaries
+  .bundle_align_mode 4
+  pushq   %rbp
+  pushq   %r14
+  pushq   %rbx
+
+  movl    %edi, %ebx
+  callq   bar
+  movl    %eax, %r14d
+
+  imull   $17, %ebx, %ebp
+# This imull is 3 bytes long and should have started at 0xe, so two bytes
+# of nop padding are inserted instead and it starts at 0x10
+# CHECK:          nop
+# CHECK-NEXT:     10: imull
+
+  movl    %ebx, %edi
+  callq   bar
+  cmpl    %r14d, %ebp
+  jle     .L_ELSE
+# Due to the padding that's inserted before the addl, the jump target
+# becomes farther by one byte.
+# CHECK:         jle 5
+
+  addl    %ebp, %eax
+# CHECK:          nop
+# CHECK-NEXT:     20: addl
+
+  jmp     .L_RET
+.L_ELSE:
+  imull   %ebx, %eax
+.L_RET:
+  ret
+
+# Just sanity checking that data fills don't drive bundling crazy
+  .data
+  .byte 40
+  .byte 98
+
+
diff --git a/test/MC/X86/AlignedBundling/switch-section-locked-error.s b/test/MC/X86/AlignedBundling/switch-section-locked-error.s
new file mode 100644
index 000000000000..af41e1921252
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/switch-section-locked-error.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# This test invokes .bundle_lock and then switches to a different section
+# w/o the appropriate unlock.
+
+# CHECK: ERROR: Unterminated .bundle_lock
+
+  .bundle_align_mode 3
+  .section text1, "x"
+  imull $17, %ebx, %ebp
+  .bundle_lock
+  imull $17, %ebx, %ebp
+
+  .section text2, "x"
+  imull $17, %ebx, %ebp
+
diff --git a/test/MC/X86/AlignedBundling/unlock-without-lock-error.s b/test/MC/X86/AlignedBundling/unlock-without-lock-error.s
new file mode 100644
index 000000000000..699511d4e6b6
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/unlock-without-lock-error.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - 2>&1 | FileCheck %s
+
+# .bundle_unlock can't come without a .bundle_lock before it
+
+# CHECK: ERROR: .bundle_unlock without matching lock
+
+  .bundle_align_mode 3
+  imull $17, %ebx, %ebp
+  .bundle_unlock
+
+
diff --git a/test/MC/X86/fde-reloc.s b/test/MC/X86/fde-reloc.s
new file mode 100644
index 000000000000..63ac97662188
--- /dev/null
+++ b/test/MC/X86/fde-reloc.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-objdump -r - | FileCheck --check-prefix=X86-64 %s
+// RUN: llvm-mc -filetype=obj %s -o - -triple i686-pc-linux | llvm-objdump -r - | FileCheck --check-prefix=I686 %s
+
+// PR15448
+
+func:
+	.cfi_startproc
+	.cfi_endproc
+
+// X86-64: R_X86_64_PC32
+// I686: R_386_PC32
diff --git a/test/MC/X86/gnux32-dwarf-gen.s b/test/MC/X86/gnux32-dwarf-gen.s
new file mode 100644
index 000000000000..6603125343d0
--- /dev/null
+++ b/test/MC/X86/gnux32-dwarf-gen.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -g -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.64
+# RUN: llvm-dwarfdump -debug-dump=info %t.64 | FileCheck -check-prefix=DEFAULTABI %s
+
+# RUN: llvm-mc -g -filetype=obj -triple x86_64-pc-linux-gnux32 %s -o %t.32
+# RUN: llvm-dwarfdump -debug-dump=info %t.32 | FileCheck -check-prefix=X32ABI %s
+
+# This test checks the dwarf info section emitted to the output object by the
+# assembler, looking at the difference between the x32 ABI and default x86-64
+# ABI.
+
+# DEFAULTABI: addr_size = 0x08
+# X32ABI: addr_size = 0x04
+
+.globl _bar
+_bar:
+        movl    $0, %eax
+L1:     leave
+        ret
+_foo:
+_baz:
+        nop
+.data
+_x:     .long 1
+
diff --git a/test/MC/X86/intel-syntax-encoding.s b/test/MC/X86/intel-syntax-encoding.s
index 03b05511649a..9806ac3802e7 100644
--- a/test/MC/X86/intel-syntax-encoding.s
+++ b/test/MC/X86/intel-syntax-encoding.s
@@ -31,6 +31,27 @@
 // CHECK: encoding: [0x48,0x83,0xc0,0xf4]
 	add	rax, -12
 
+// CHECK: encoding: [0x66,0x83,0xd0,0xf4]
+	adc	ax, -12
+// CHECK: encoding: [0x83,0xd0,0xf4]
+	adc	eax, -12
+// CHECK: encoding: [0x48,0x83,0xd0,0xf4]
+	adc	rax, -12
+
+// CHECK: encoding: [0x66,0x83,0xd8,0xf4]
+	sbb	ax, -12
+// CHECK: encoding: [0x83,0xd8,0xf4]
+	sbb	eax, -12
+// CHECK: encoding: [0x48,0x83,0xd8,0xf4]
+	sbb	rax, -12
+
+// CHECK: encoding: [0x66,0x83,0xf8,0xf4]
+	cmp	ax, -12
+// CHECK: encoding: [0x83,0xf8,0xf4]
+	cmp	eax, -12
+// CHECK: encoding: [0x48,0x83,0xf8,0xf4]
+	cmp	rax, -12
+
 LBB0_3:
 // CHECK: encoding: [0xeb,A]
 	jmp	LBB0_3
diff --git a/test/MC/X86/intel-syntax-hex.s b/test/MC/X86/intel-syntax-hex.s
new file mode 100644
index 000000000000..b3a19fbaa345
--- /dev/null
+++ b/test/MC/X86/intel-syntax-hex.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+// rdar://12470373
+
+// Checks to make sure we parse the hexadecimal suffix properly.
+// CHECK: movl $10, %eax
+  mov eax, 10
+// CHECK: movl $16, %eax
+  mov eax, 10h
+// CHECK: movl $16, %eax
+  mov eax, 10H
+// CHECK: movl $4294967295, %eax
+  mov eax, 0ffffffffh
+// CHECK: movl $4294967295, %eax
+  mov eax, 0xffffffff
+// CHECK: movl $4294967295, %eax
+  mov eax, 0xffffffffh
+// CHECK: movl $15, %eax
+  mov eax, 0fh
+// CHECK: movl $162, %eax
+  mov eax, 0a2h
+// CHECK: movl $162, %eax
+  mov eax, 0xa2
+// CHECK: movl $162, %eax
+  mov eax, 0xa2h
+// CHECK: movl $674, %eax
+  mov eax, 2a2h
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 7edd26a1382f..8bfa58a4bed8 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -56,13 +56,195 @@ _main:
 // CHECK:	fld	%st(0)
 	fld	ST(0)
 // CHECK:	movl	%fs:(%rdi), %eax
-        mov     EAX, DWORD PTR FS:[RDI]
-// CHECK:	leal	(,%rdi,4), %r8d
-        lea     R8D, DWORD PTR [4*RDI]
-// CHECK:        movl    _fnan(,%ecx,4), %ecx
-        mov     ECX, DWORD PTR [4*ECX + _fnan]
-// CHECK:       movq    %fs:320, %rax
-        mov     RAX, QWORD PTR FS:[320]
-// CHECK:       vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
-        vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
+    mov EAX, DWORD PTR FS:[RDI]
+// CHECK: leal (,%rdi,4), %r8d
+    lea R8D, DWORD PTR [4*RDI]
+// CHECK: movl _fnan(,%ecx,4), %ecx
+    mov ECX, DWORD PTR [4*ECX + _fnan]
+// CHECK: movq %fs:320, %rax
+    mov RAX, QWORD PTR FS:[320]
+// CHECK: vpgatherdd %xmm8, (%r15,%xmm9,2), %xmm1
+    vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
+// CHECK: movsd	-8, %xmm5
+    movsd   XMM5, QWORD PTR [-8]
+// CHECK: movl %ecx, (%eax)
+    mov [eax], ecx
+// CHECK: movl %ecx, (,%ebx,4)
+    mov [4*ebx], ecx
+ // CHECK:   movl %ecx, (,%ebx,4)
+    mov [ebx*4], ecx
+// CHECK: movl %ecx, 1024
+    mov [1024], ecx
+// CHECK: movl %ecx, 4132
+    mov [0x1024], ecx
+// CHECK: movl %ecx, 32        
+    mov [16 + 16], ecx
+// CHECK: movl %ecx, 0
+    mov [16 - 16], ecx        
+// CHECK: movl %ecx, 32        
+    mov [16][16], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax + 4*ebx], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax + ebx*4], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [4*ebx + eax], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [ebx*4 + eax], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax][4*ebx], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [eax][ebx*4], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [4*ebx][eax], ecx
+// CHECK: movl %ecx, (%eax,%ebx,4)
+    mov [ebx*4][eax], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [eax + 12], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [12 + eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax + 16 + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + eax + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + 16 + eax], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [eax][12], ecx
+// CHECK: movl %ecx, 12(%eax)
+    mov [12][eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax][16 + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax + 16][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax][16][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][eax + 16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + eax][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][16 + eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16 + 16][eax], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [eax][16][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][eax][16], ecx
+// CHECK: movl %ecx, 32(%eax)
+    mov [16][16][eax], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [4*ebx + 16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [ebx*4 + 16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [4*ebx][16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [ebx*4][16], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16 + 4*ebx], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16 + ebx*4], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16][4*ebx], ecx
+// CHECK: movl %ecx, 16(,%ebx,4)
+    mov [16][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 4*ebx + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16 + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + 16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][4*ebx + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16 + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + 4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 4*ebx][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx + 16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][4*ebx][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [4*ebx][16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][4*ebx], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + ebx*4 + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16 + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + 16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][ebx*4 + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16 + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][eax + 16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][16 + eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax + ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + ebx*4][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax + 16][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4 + 16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16 + eax][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][ebx*4][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [eax][16][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][eax][16], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [ebx*4][16][eax], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][ebx*4], ecx
+// CHECK: movl %ecx, 16(%eax,%ebx,4)
+    mov [16][eax][ebx*4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 16], ecx
 	ret
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
index eee568e8fdc2..ad280c7cf7de 100644
--- a/test/MC/X86/lit.local.cfg
+++ b/test/MC/X86/lit.local.cfg
@@ -1,12 +1,5 @@
 config.suffixes = ['.ll', '.c', '.cpp', '.s']
 
-def getRoot(config):
-    if not config.parent:
-        return config
-    return getRoot(config.parent)
-
-root = getRoot(config)
-
-targets = set(root.targets_to_build.split())
+targets = set(config.root.targets_to_build.split())
 if not 'X86' in targets:
     config.unsupported = True
diff --git a/test/MC/X86/shuffle-comments.s b/test/MC/X86/shuffle-comments.s
new file mode 100644
index 000000000000..20fd4ebae4dc
--- /dev/null
+++ b/test/MC/X86/shuffle-comments.s
@@ -0,0 +1,271 @@
+# RUN: llvm-mc %s -triple=x86_64-unknown-unknown | FileCheck %s
+
+palignr $8, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+palignr $8, (%rax), %xmm1
+# CHECK: xmm1 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+
+palignr $16, %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+palignr $16, (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+palignr $0, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+palignr $0, (%rax), %xmm1
+# CHECK: xmm1 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+vpalignr $8, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+vpalignr $8, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+
+vpalignr $16, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+vpalignr $16, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+vpalignr $0, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+vpalignr $0, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+
+vpalignr $8, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm0[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],ymm0[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23]
+vpalignr $8, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = mem[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],mem[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23]
+
+vpalignr $16, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+vpalignr $16, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+
+vpalignr $0, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+vpalignr $0, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+
+pshufd $27, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[3,2,1,0]
+pshufd $27, (%rax), %xmm1
+# CHECK: xmm1 = mem[3,2,1,0]
+
+vpshufd $27, %xmm0, %xmm1
+# CHECK: xmm1 = xmm0[3,2,1,0]
+vpshufd $27, (%rax), %xmm1
+# CHECK: xmm1 = mem[3,2,1,0]
+
+vpshufd $27, %ymm0, %ymm1
+# CHECK: ymm1 = ymm0[3,2,1,0,7,6,5,4]
+vpshufd $27, (%rax), %ymm1
+# CHECK: ymm1 = mem[3,2,1,0,7,6,5,4]
+
+punpcklbw %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+punpcklbw (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3],xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpcklbw %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+vpunpcklbw (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3],xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpcklbw %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+vpunpcklbw (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[2],mem[2],ymm1[3],mem[3],ymm1[4],mem[4],ymm1[5],mem[5],ymm1[6],mem[6],ymm1[7],mem[7],ymm1[16],mem[16],ymm1[17],mem[17],ymm1[18],mem[18],ymm1[19],mem[19],ymm1[20],mem[20],ymm1[21],mem[21],ymm1[22],mem[22],ymm1[23],mem[23]
+
+punpckhbw %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+punpckhbw (%rax), %xmm1
+# CHECK: xmm1 = xmm1[8],mem[8],xmm1[9],mem[9],xmm1[10],mem[10],xmm1[11],mem[11],xmm1[12],mem[12],xmm1[13],mem[13],xmm1[14],mem[14],xmm1[15],mem[15]
+
+vpunpckhbw %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+vpunpckhbw (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[8],mem[8],xmm1[9],mem[9],xmm1[10],mem[10],xmm1[11],mem[11],xmm1[12],mem[12],xmm1[13],mem[13],xmm1[14],mem[14],xmm1[15],mem[15]
+
+vpunpckhbw %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
+vpunpckhbw (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[8],mem[8],ymm1[9],mem[9],ymm1[10],mem[10],ymm1[11],mem[11],ymm1[12],mem[12],ymm1[13],mem[13],ymm1[14],mem[14],ymm1[15],mem[15],ymm1[24],mem[24],ymm1[25],mem[25],ymm1[26],mem[26],ymm1[27],mem[27],ymm1[28],mem[28],ymm1[29],mem[29],ymm1[30],mem[30],ymm1[31],mem[31]
+
+punpcklwd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+punpcklwd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpcklwd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+vpunpcklwd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpcklwd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
+vpunpcklwd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[2],mem[2],ymm1[3],mem[3],ymm1[8],mem[8],ymm1[9],mem[9],ymm1[10],mem[10],ymm1[11],mem[11]
+
+punpckhwd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+punpckhwd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpckhwd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+vpunpckhwd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[4],mem[4],xmm1[5],mem[5],xmm1[6],mem[6],xmm1[7],mem[7]
+
+vpunpckhwd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
+vpunpckhwd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[4],mem[4],ymm1[5],mem[5],ymm1[6],mem[6],ymm1[7],mem[7],ymm1[12],mem[12],ymm1[13],mem[13],ymm1[14],mem[14],ymm1[15],mem[15]
+
+punpckldq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+punpckldq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vpunpckldq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+vpunpckldq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vpunpckldq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+vpunpckldq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[4],mem[4],ymm1[5],mem[5]
+
+punpckhdq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+punpckhdq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpckhdq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+vpunpckhdq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vpunpckhdq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
+vpunpckhdq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],mem[2],ymm1[3],mem[3],ymm1[6],mem[6],ymm1[7],mem[7]
+
+punpcklqdq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0]
+punpcklqdq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0]
+
+vpunpcklqdq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0]
+vpunpcklqdq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0]
+
+vpunpcklqdq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+vpunpcklqdq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[2],mem[2]
+
+punpckhqdq %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[1],xmm0[1]
+punpckhqdq (%rax), %xmm1
+# CHECK: xmm1 = xmm1[1],mem[1]
+
+vpunpckhqdq %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],xmm0[1]
+vpunpckhqdq (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],mem[1]
+
+vpunpckhqdq %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+vpunpckhqdq (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],mem[1],ymm1[3],mem[3]
+
+unpcklps %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+unpcklps (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vunpcklps %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+vunpcklps (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[1],mem[1]
+
+vunpcklps %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+vunpcklps (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[1],mem[1],ymm1[4],mem[4],ymm1[5],mem[5]
+
+unpckhps %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+unpckhps (%rax), %xmm1
+# CHECK: xmm1 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vunpckhps %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+vunpckhps (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[2],mem[2],xmm1[3],mem[3]
+
+vunpckhps %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
+vunpckhps (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[2],mem[2],ymm1[3],mem[3],ymm1[6],mem[6],ymm1[7],mem[7]
+
+unpcklpd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[0],xmm0[0]
+unpcklpd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[0],mem[0]
+
+vunpcklpd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0]
+vunpcklpd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0]
+
+vunpcklpd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+vunpcklpd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[0],mem[0],ymm1[2],mem[2]
+
+unpckhpd %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[1],xmm0[1]
+unpckhpd (%rax), %xmm1
+# CHECK: xmm1 = xmm1[1],mem[1]
+
+vunpckhpd %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],xmm0[1]
+vunpckhpd (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],mem[1]
+
+vunpckhpd %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+vunpckhpd (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],mem[1],ymm1[3],mem[3]
+
+shufps $27, %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[3,2],xmm0[1,0]
+shufps $27, (%rax), %xmm1
+# CHECK: xmm1 = xmm1[3,2],mem[1,0]
+
+vshufps $27, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[3,2],xmm0[1,0]
+vshufps $27, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[3,2],mem[1,0]
+
+vshufps $27, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[3,2],ymm0[1,0],ymm1[7,6],ymm0[5,4]
+vshufps $27, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[3,2],mem[1,0],ymm1[7,6],mem[5,4]
+
+shufpd $3, %xmm0, %xmm1
+# CHECK: xmm1 = xmm1[1],xmm0[1]
+shufpd $3, (%rax), %xmm1
+# CHECK: xmm1 = xmm1[1],mem[1]
+
+vshufpd $3, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],xmm0[1]
+vshufpd $3, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[1],mem[1]
+
+vshufpd $11, %ymm0, %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
+vshufpd $11, (%rax), %ymm1, %ymm2
+# CHECK: ymm2 = ymm1[1],mem[1],ymm1[2],mem[3]
diff --git a/test/MC/X86/x86-32-avx.s b/test/MC/X86/x86-32-avx.s
index 586f3fe73c57..ec4abdbb2a8b 100644
--- a/test/MC/X86/x86-32-avx.s
+++ b/test/MC/X86/x86-32-avx.s
@@ -655,14 +655,22 @@
 // CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
           vcvttss2si  (%ecx), %eax
 
-// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: vcvtsi2ssl  (%eax), %xmm1, %xmm2
 // CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
           vcvtsi2ss  (%eax), %xmm1, %xmm2
 
-// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: vcvtsi2ssl  (%eax), %xmm1, %xmm2
 // CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
           vcvtsi2ss  (%eax), %xmm1, %xmm2
 
+// CHECK: vcvtsi2ssl  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ssl  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2ssl  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ssl  (%eax), %xmm1, %xmm2
+
 // CHECK: vcvttsd2si  %xmm1, %eax
 // CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
           vcvttsd2si  %xmm1, %eax
@@ -671,14 +679,22 @@
 // CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
           vcvttsd2si  (%ecx), %eax
 
-// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: vcvtsi2sdl  (%eax), %xmm1, %xmm2
 // CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
           vcvtsi2sd  (%eax), %xmm1, %xmm2
 
-// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: vcvtsi2sdl  (%eax), %xmm1, %xmm2
 // CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
           vcvtsi2sd  (%eax), %xmm1, %xmm2
 
+// CHECK: vcvtsi2sdl  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sdl  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2sdl  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sdl  (%eax), %xmm1, %xmm2
+
 // CHECK: vmovaps  (%eax), %xmm2
 // CHECK: encoding: [0xc5,0xf8,0x28,0x10]
           vmovaps  (%eax), %xmm2
@@ -767,14 +783,22 @@
 // CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
           vmovhlps  %xmm1, %xmm2, %xmm3
 
-// CHECK: vcvtss2sil  %xmm1, %eax
+// CHECK: vcvtss2si  %xmm1, %eax
 // CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
           vcvtss2si  %xmm1, %eax
 
-// CHECK: vcvtss2sil  (%eax), %ebx
+// CHECK: vcvtss2si  (%eax), %ebx
 // CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
           vcvtss2si  (%eax), %ebx
 
+// CHECK: vcvtss2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
+          vcvtss2sil  %xmm1, %eax
+
+// CHECK: vcvtss2si  (%eax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2sil  (%eax), %ebx
+
 // CHECK: vcvtdq2ps  %xmm5, %xmm6
 // CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
           vcvtdq2ps  %xmm5, %xmm6
@@ -3103,19 +3127,35 @@
 // CHECK: encoding: [0xc5,0xf8,0x77]
           vzeroupper
 
-// CHECK: vcvtsd2sil  %xmm4, %ecx
+// CHECK: vcvtsd2si  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
           vcvtsd2sil  %xmm4, %ecx
 
-// CHECK: vcvtsd2sil  (%ecx), %ecx
+// CHECK: vcvtsd2si  (%ecx), %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
           vcvtsd2sil  (%ecx), %ecx
 
-// CHECK: vcvtsi2sd  (%ebp), %xmm0, %xmm7
+// CHECK: vcvtsd2si  %xmm4, %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
+          vcvtsd2si  %xmm4, %ecx
+
+// CHECK: vcvtsd2si  (%ecx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+          vcvtsd2si  (%ecx), %ecx
+
+// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
+          vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+
+// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
+          vcvtsi2sdl  (%esp), %xmm0, %xmm7
+
+// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
 // CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
           vcvtsi2sd  (%ebp), %xmm0, %xmm7
 
-// CHECK: vcvtsi2sd  (%esp), %xmm0, %xmm7
+// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
 // CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
           vcvtsi2sd  (%esp), %xmm0, %xmm7
 
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 082491651927..c348915d23ce 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -896,11 +896,11 @@
 // CHECK: 	cvtps2pi	%xmm5, %mm3
         	cvtps2pi	%xmm5,%mm3
 
-// CHECK: 	cvtsi2ss	%ecx, %xmm5
-        	cvtsi2ss	%ecx,%xmm5
+// CHECK: 	cvtsi2ssl	%ecx, %xmm5
+        	cvtsi2ssl	%ecx,%xmm5
 
-// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: 	cvtsi2ssl	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2ssl	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
 // CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
         	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
@@ -1157,11 +1157,11 @@
 // CHECK: 	cvtpi2pd	%mm3, %xmm5
         	cvtpi2pd	%mm3,%xmm5
 
-// CHECK: 	cvtsi2sd	%ecx, %xmm5
-        	cvtsi2sd	%ecx,%xmm5
+// CHECK: 	cvtsi2sdl	%ecx, %xmm5
+        	cvtsi2sdl	%ecx,%xmm5
 
-// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: 	cvtsi2sdl	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2sdl	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
 // CHECK: 	divpd	%xmm5, %xmm5
         	divpd	%xmm5,%xmm5
@@ -3948,6 +3948,10 @@
 // CHECK:  encoding: [0xd9,0xca]
         	fxch	%st(2)
 
+// CHECK: fcom
+// CHECK:  encoding: [0xd8,0xd1]
+        	fcom
+
 // CHECK: fcom	%st(2)
 // CHECK:  encoding: [0xd8,0xd2]
         	fcom	%st(2)
@@ -3968,6 +3972,10 @@
 // CHECK:  encoding: [0xda,0x15,0x78,0x56,0x34,0x12]
         	ficoml	0x12345678
 
+// CHECK: fcomp
+// CHECK:  encoding: [0xd8,0xd9]
+        	fcomp
+
 // CHECK: fcomp	%st(2)
 // CHECK:  encoding: [0xd8,0xda]
         	fcomp	%st(2)
@@ -7144,29 +7152,29 @@
 // CHECK:  encoding: [0x0f,0x2d,0xdd]
         	cvtps2pi	%xmm5,%mm3
 
-// CHECK: cvtsi2ss	%ecx, %xmm5
+// CHECK: cvtsi2ssl	%ecx, %xmm5
 // CHECK:  encoding: [0xf3,0x0f,0x2a,0xe9]
-        	cvtsi2ss	%ecx,%xmm5
+        	cvtsi2ssl	%ecx,%xmm5
 
-// CHECK: cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK: cvtsi2ssl	3735928559(%ebx,%ecx,8), %xmm5
 // CHECK:  encoding: [0xf3,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+        	cvtsi2ssl	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
-// CHECK: cvtsi2ss	69, %xmm5
+// CHECK: cvtsi2ssl	69, %xmm5
 // CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtsi2ss	0x45,%xmm5
+        	cvtsi2ssl	0x45,%xmm5
 
-// CHECK: cvtsi2ss	32493, %xmm5
+// CHECK: cvtsi2ssl	32493, %xmm5
 // CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtsi2ss	0x7eed,%xmm5
+        	cvtsi2ssl	0x7eed,%xmm5
 
-// CHECK: cvtsi2ss	3133065982, %xmm5
+// CHECK: cvtsi2ssl	3133065982, %xmm5
 // CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtsi2ss	0xbabecafe,%xmm5
+        	cvtsi2ssl	0xbabecafe,%xmm5
 
-// CHECK: cvtsi2ss	305419896, %xmm5
+// CHECK: cvtsi2ssl	305419896, %xmm5
 // CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtsi2ss	0x12345678,%xmm5
+        	cvtsi2ssl	0x12345678,%xmm5
 
 // CHECK: cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
 // CHECK:  encoding: [0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
@@ -8652,29 +8660,29 @@
 // CHECK:  encoding: [0x66,0x0f,0x2a,0xeb]
         	cvtpi2pd	%mm3,%xmm5
 
-// CHECK: cvtsi2sd	%ecx, %xmm5
+// CHECK: cvtsi2sdl	%ecx, %xmm5
 // CHECK:  encoding: [0xf2,0x0f,0x2a,0xe9]
-        	cvtsi2sd	%ecx,%xmm5
+        	cvtsi2sdl	%ecx,%xmm5
 
-// CHECK: cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK: cvtsi2sdl	3735928559(%ebx,%ecx,8), %xmm5
 // CHECK:  encoding: [0xf2,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+        	cvtsi2sdl	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
-// CHECK: cvtsi2sd	69, %xmm5
+// CHECK: cvtsi2sdl	69, %xmm5
 // CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
-        	cvtsi2sd	0x45,%xmm5
+        	cvtsi2sdl	0x45,%xmm5
 
-// CHECK: cvtsi2sd	32493, %xmm5
+// CHECK: cvtsi2sdl	32493, %xmm5
 // CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
-        	cvtsi2sd	0x7eed,%xmm5
+        	cvtsi2sdl	0x7eed,%xmm5
 
-// CHECK: cvtsi2sd	3133065982, %xmm5
+// CHECK: cvtsi2sdl	3133065982, %xmm5
 // CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
-        	cvtsi2sd	0xbabecafe,%xmm5
+        	cvtsi2sdl	0xbabecafe,%xmm5
 
-// CHECK: cvtsi2sd	305419896, %xmm5
+// CHECK: cvtsi2sdl	305419896, %xmm5
 // CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
-        	cvtsi2sd	0x12345678,%xmm5
+        	cvtsi2sdl	0x12345678,%xmm5
 
 // CHECK: divpd	3735928559(%ebx,%ecx,8), %xmm5
 // CHECK:  encoding: [0x66,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
@@ -16200,23 +16208,23 @@
 // CHECK: 	cvtps2pi	%xmm5, %mm3
         	cvtps2pi	%xmm5,%mm3
 
-// CHECK: 	cvtsi2ss	%ecx, %xmm5
-        	cvtsi2ss	%ecx,%xmm5
+// CHECK: 	cvtsi2ssl	%ecx, %xmm5
+        	cvtsi2ssl	%ecx,%xmm5
 
-// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: 	cvtsi2ssl	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2ssl	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
-// CHECK: 	cvtsi2ss	69, %xmm5
-        	cvtsi2ss	0x45,%xmm5
+// CHECK: 	cvtsi2ssl	69, %xmm5
+        	cvtsi2ssl	0x45,%xmm5
 
-// CHECK: 	cvtsi2ss	32493, %xmm5
-        	cvtsi2ss	0x7eed,%xmm5
+// CHECK: 	cvtsi2ssl	32493, %xmm5
+        	cvtsi2ssl	0x7eed,%xmm5
 
-// CHECK: 	cvtsi2ss	3133065982, %xmm5
-        	cvtsi2ss	0xbabecafe,%xmm5
+// CHECK: 	cvtsi2ssl	3133065982, %xmm5
+        	cvtsi2ssl	0xbabecafe,%xmm5
 
-// CHECK: 	cvtsi2ss	305419896, %xmm5
-        	cvtsi2ss	0x12345678,%xmm5
+// CHECK: 	cvtsi2ssl	305419896, %xmm5
+        	cvtsi2ssl	0x12345678,%xmm5
 
 // CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
         	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
@@ -17334,23 +17342,23 @@
 // CHECK: 	cvtpi2pd	%mm3, %xmm5
         	cvtpi2pd	%mm3,%xmm5
 
-// CHECK: 	cvtsi2sd	%ecx, %xmm5
-        	cvtsi2sd	%ecx,%xmm5
+// CHECK: 	cvtsi2sdl	%ecx, %xmm5
+        	cvtsi2sdl	%ecx,%xmm5
 
-// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
-        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: 	cvtsi2sdl	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2sdl	0xdeadbeef(%ebx,%ecx,8),%xmm5
 
-// CHECK: 	cvtsi2sd	69, %xmm5
-        	cvtsi2sd	0x45,%xmm5
+// CHECK: 	cvtsi2sdl	69, %xmm5
+        	cvtsi2sdl	0x45,%xmm5
 
-// CHECK: 	cvtsi2sd	32493, %xmm5
-        	cvtsi2sd	0x7eed,%xmm5
+// CHECK: 	cvtsi2sdl	32493, %xmm5
+        	cvtsi2sdl	0x7eed,%xmm5
 
-// CHECK: 	cvtsi2sd	3133065982, %xmm5
-        	cvtsi2sd	0xbabecafe,%xmm5
+// CHECK: 	cvtsi2sdl	3133065982, %xmm5
+        	cvtsi2sdl	0xbabecafe,%xmm5
 
-// CHECK: 	cvtsi2sd	305419896, %xmm5
-        	cvtsi2sd	0x12345678,%xmm5
+// CHECK: 	cvtsi2sdl	305419896, %xmm5
+        	cvtsi2sdl	0x12345678,%xmm5
 
 // CHECK: 	divpd	3735928559(%ebx,%ecx,8), %xmm5
         	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s
index 73d5878b41bc..d912915c585e 100644
--- a/test/MC/X86/x86-32-ms-inline-asm.s
+++ b/test/MC/X86/x86-32-ms-inline-asm.s
@@ -57,4 +57,37 @@ _t21:                                   ## @t21
 // CHECK: movl 4(%esi,%eax,2), %eax
 // CHECK: # encoding: [0x8b,0x44,0x46,0x04]
 
+	mov eax, 4[esi + 2*eax + 4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi][2*eax + 4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi + 2*eax][4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi][2*eax][4]
+// CHECK: movl 8(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x08]
+	mov eax, 4[esi][2*eax][4][8]
+// CHECK: movl 16(%esi,%eax,2), %eax
+// CHECK: # encoding: [0x8b,0x44,0x46,0x10]
+
+    prefetchnta 64[eax]
+// CHECK: prefetchnta 64(%eax)
+// CHECK: # encoding: [0x0f,0x18,0x40,0x40]
+        
+    pusha
+// CHECK: pushal
+// CHECK: # encoding: [0x60]
+    popa
+// CHECK: popal
+// CHECK: # encoding: [0x61]
+    pushad
+// CHECK: pushal
+// CHECK: # encoding: [0x60]
+    popad
+// CHECK: popal
+// CHECK: # encoding: [0x61]
+
 	ret
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 03cb62e7cba3..c5f1d15f8ff0 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -507,15 +507,15 @@ fsave	32493
 
 // rdar://8456382 - cvtsd2si support.
 cvtsd2si	%xmm1, %rax
-// CHECK: cvtsd2siq	%xmm1, %rax
+// CHECK: cvtsd2si	%xmm1, %rax
 // CHECK: encoding: [0xf2,0x48,0x0f,0x2d,0xc1]
 cvtsd2si	%xmm1, %eax
-// CHECK: cvtsd2sil	%xmm1, %eax
+// CHECK: cvtsd2si	%xmm1, %eax
 // CHECK: encoding: [0xf2,0x0f,0x2d,0xc1]
 
-cvtsd2siq %xmm0, %rax // CHECK: cvtsd2siq	%xmm0, %rax
-cvtsd2sil %xmm0, %eax // CHECK: cvtsd2sil	%xmm0, %eax
-cvtsd2si %xmm0, %rax  // CHECK: cvtsd2siq	%xmm0, %rax
+cvtsd2siq %xmm0, %rax // CHECK: cvtsd2si	%xmm0, %rax
+cvtsd2sil %xmm0, %eax // CHECK: cvtsd2si	%xmm0, %eax
+cvtsd2si %xmm0, %rax  // CHECK: cvtsd2si	%xmm0, %rax
 
 
 cvttpd2dq %xmm1, %xmm0  // CHECK: cvttpd2dq %xmm1, %xmm0
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index 46ff9ead39bf..6da9e21fef66 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -1404,25 +1404,25 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
           vcvttss2si  (%rcx), %eax
 
-// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: vcvtsi2ssl  (%rax), %xmm11, %xmm12
 // CHECK: encoding: [0xc5,0x22,0x2a,0x20]
-          vcvtsi2ss  (%rax), %xmm11, %xmm12
+          vcvtsi2ssl  (%rax), %xmm11, %xmm12
 
-// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: vcvtsi2ssl  (%rax), %xmm11, %xmm12
 // CHECK: encoding: [0xc5,0x22,0x2a,0x20]
-          vcvtsi2ss  (%rax), %xmm11, %xmm12
+          vcvtsi2ssl  (%rax), %xmm11, %xmm12
 
 // CHECK: vcvttsd2si  (%rcx), %eax
 // CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
           vcvttsd2si  (%rcx), %eax
 
-// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: vcvtsi2sdl  (%rax), %xmm11, %xmm12
 // CHECK: encoding: [0xc5,0x23,0x2a,0x20]
-          vcvtsi2sd  (%rax), %xmm11, %xmm12
+          vcvtsi2sdl  (%rax), %xmm11, %xmm12
 
-// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: vcvtsi2sdl  (%rax), %xmm11, %xmm12
 // CHECK: encoding: [0xc5,0x23,0x2a,0x20]
-          vcvtsi2sd  (%rax), %xmm11, %xmm12
+          vcvtsi2sdl  (%rax), %xmm11, %xmm12
 
 // CHECK: vmovaps  (%rax), %xmm12
 // CHECK: encoding: [0xc5,0x78,0x28,0x20]
@@ -1512,11 +1512,11 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
           vmovhlps  %xmm11, %xmm12, %xmm13
 
-// CHECK: vcvtss2sil  %xmm11, %eax
+// CHECK: vcvtss2si  %xmm11, %eax
 // CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
           vcvtss2si  %xmm11, %eax
 
-// CHECK: vcvtss2sil  (%rax), %ebx
+// CHECK: vcvtss2si  (%rax), %ebx
 // CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
           vcvtss2si  (%rax), %ebx
 
@@ -3860,29 +3860,29 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
           vperm2f128  $7, (%rax), %ymm10, %ymm11
 
-// CHECK: vcvtsd2sil  %xmm8, %r8d
+// CHECK: vcvtsd2si  %xmm8, %r8d
 // CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
-          vcvtsd2sil  %xmm8, %r8d
+          vcvtsd2si  %xmm8, %r8d
 
-// CHECK: vcvtsd2sil  (%rcx), %ecx
+// CHECK: vcvtsd2si  (%rcx), %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2sil  (%rcx), %ecx
+          vcvtsd2si  (%rcx), %ecx
 
-// CHECK: vcvtss2siq  %xmm4, %rcx
+// CHECK: vcvtss2si  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
-          vcvtss2siq  %xmm4, %rcx
+          vcvtss2si  %xmm4, %rcx
 
-// CHECK: vcvtss2siq  (%rcx), %r8
+// CHECK: vcvtss2si  (%rcx), %r8
 // CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
-          vcvtss2siq  (%rcx), %r8
+          vcvtss2si  (%rcx), %r8
 
-// CHECK: vcvtsi2sd  %r8d, %xmm8, %xmm15
+// CHECK: vcvtsi2sdl  %r8d, %xmm8, %xmm15
 // CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
-          vcvtsi2sd  %r8d, %xmm8, %xmm15
+          vcvtsi2sdl  %r8d, %xmm8, %xmm15
 
-// CHECK: vcvtsi2sd  (%rbp), %xmm8, %xmm15
+// CHECK: vcvtsi2sdl  (%rbp), %xmm8, %xmm15
 // CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
-          vcvtsi2sd  (%rbp), %xmm8, %xmm15
+          vcvtsi2sdl  (%rbp), %xmm8, %xmm15
 
 // CHECK: vcvtsi2sdq  %rcx, %xmm4, %xmm6
 // CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
@@ -3900,21 +3900,21 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
           vcvtsi2ssq  (%rcx), %xmm4, %xmm6
 
-// CHECK: vcvttsd2siq  %xmm4, %rcx
+// CHECK: vcvttsd2si  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
-          vcvttsd2siq  %xmm4, %rcx
+          vcvttsd2si  %xmm4, %rcx
 
-// CHECK: vcvttsd2siq  (%rcx), %rcx
+// CHECK: vcvttsd2si  (%rcx), %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
-          vcvttsd2siq  (%rcx), %rcx
+          vcvttsd2si  (%rcx), %rcx
 
-// CHECK: vcvttss2siq  %xmm4, %rcx
+// CHECK: vcvttss2si  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
-          vcvttss2siq  %xmm4, %rcx
+          vcvttss2si  %xmm4, %rcx
 
-// CHECK: vcvttss2siq  (%rcx), %rcx
+// CHECK: vcvttss2si  (%rcx), %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
-          vcvttss2siq  (%rcx), %rcx
+          vcvttss2si  (%rcx), %rcx
 
 // CHECK: vlddqu  (%rax), %ymm12
 // CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s
index 805fc23cf4cf..c9bd954e9049 100644
--- a/test/MC/X86/x86_64-fma4-encoding.s
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -25,6 +25,10 @@
 // CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
           vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
 
+// CHECK: vfmaddsd   %xmm10, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xc3,0xf9,0x6b,0xc2,0x10]
+          vfmaddsd   %xmm10, %xmm1, %xmm0, %xmm0
+
 // CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
 // CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
           vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
@@ -73,6 +77,67 @@
 // CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
           vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
 
+// PR15040
+// CHECK: vfmaddss  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddss  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddss   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddsd  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddsd  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddsd   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddps  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddpd  foo(%rip), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd  foo(%rip), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm1, foo(%rip), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd   %xmm1, foo(%rip),%xmm0, %xmm0
+
+// CHECK: vfmaddps  foo(%rip), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps  foo(%rip), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm1, foo(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddps   %ymm1, foo(%rip),%ymm0, %ymm0
+
+// CHECK: vfmaddpd  foo(%rip), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd  foo(%rip), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm1, foo(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: foo-5, kind: reloc_riprel_4byte
+          vfmaddpd   %ymm1, foo(%rip),%ymm0, %ymm0
+
 // vfmsub
 // CHECK: vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
 // CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
diff --git a/test/MC/X86/x86_64-rand-encoding.s b/test/MC/X86/x86_64-rand-encoding.s
new file mode 100644
index 000000000000..3a8cb817bc1a
--- /dev/null
+++ b/test/MC/X86/x86_64-rand-encoding.s
@@ -0,0 +1,49 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: rdrandw %ax
+// CHECK: encoding: [0x66,0x0f,0xc7,0xf0]
+          rdrand %ax
+
+// CHECK: rdrandl %eax
+// CHECK: encoding: [0x0f,0xc7,0xf0]
+          rdrand %eax
+
+// CHECK: rdrandq %rax
+// CHECK: encoding: [0x48,0x0f,0xc7,0xf0]
+          rdrand %rax
+
+// CHECK: rdrandw %r11w
+// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf3]
+          rdrand %r11w
+
+// CHECK: rdrandl %r11d
+// CHECK: encoding: [0x41,0x0f,0xc7,0xf3]
+          rdrand %r11d
+
+// CHECK: rdrandq %r11
+// CHECK: encoding: [0x49,0x0f,0xc7,0xf3]
+          rdrand %r11
+
+// CHECK: rdseedw %ax
+// CHECK: encoding: [0x66,0x0f,0xc7,0xf8]
+          rdseed %ax
+
+// CHECK: rdseedl %eax
+// CHECK: encoding: [0x0f,0xc7,0xf8]
+          rdseed %eax
+
+// CHECK: rdseedq %rax
+// CHECK: encoding: [0x48,0x0f,0xc7,0xf8]
+          rdseed %rax
+
+// CHECK: rdseedw %r11w
+// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfb]
+          rdseed %r11w
+
+// CHECK: rdseedl %r11d
+// CHECK: encoding: [0x41,0x0f,0xc7,0xfb]
+          rdseed %r11d
+
+// CHECK: rdseedq %r11
+// CHECK: encoding: [0x49,0x0f,0xc7,0xfb]
+          rdseed %r11
diff --git a/test/MC/X86/x86_64-rtm-encoding.s b/test/MC/X86/x86_64-rtm-encoding.s
index 44d6bacb7f32..d9975d67b314 100644
--- a/test/MC/X86/x86_64-rtm-encoding.s
+++ b/test/MC/X86/x86_64-rtm-encoding.s
@@ -8,6 +8,10 @@
 // CHECK: encoding: [0x0f,0x01,0xd5]
 	xend
 
+// CHECK: xtest
+// CHECK: encoding: [0x0f,0x01,0xd6]
+	xtest
+
 // CHECK: xabort
 // CHECK: encoding: [0xc6,0xf8,0x0d]
 	xabort $13
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index f161e06cb580..6e14d62fda4c 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -18,7 +18,7 @@ addl $0, 0(%rax)
 
 movl 0(%rax), 0(%edx)  // error: invalid operand for instruction
 
-// 32: error: instruction requires a CPU feature not currently enabled
+// 32: error: instruction requires: 64-bit mode
 sysexitq
 
 // rdar://10710167
diff --git a/test/MC/X86/x86_long_nop.s b/test/MC/X86/x86_long_nop.s
new file mode 100644
index 000000000000..ac1bc08ff38b
--- /dev/null
+++ b/test/MC/X86/x86_long_nop.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-apple-darwin10.0 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-apple-darwin8 %s | llvm-objdump -d -no-show-raw-insn - | FileCheck %s
+
+# Ensure alignment directives also emit sequences of 15-byte NOPs on processors
+# capable of using long NOPs.
+inc %eax
+.p2align 5
+inc %eax
+# CHECK: 0:  inc
+# CHECK-NEXT: 1:  nop
+# CHECK-NEXT: 10:  nop
+# CHECK-NEXT: 1f:  nop
+# CHECK-NEXT: 20:  inc
diff --git a/test/Makefile b/test/Makefile
index 810fdded465a..b47695100a9e 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -78,21 +78,24 @@ else # !SunOS
 ifeq ($(HOST_OS),AuroraUX)
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
 else # !AuroraUX
-# Fedora 13 x86-64 python fails with -v 76800
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ;
+# Newer versions of python try to allocate an insane amount of address space for
+# its thread-local storage, don't set a limit here.
+# When -v is not used, then -s has to be used to limit the stack size.
+# FIXME: Those limits should be enforced by lit instead of globally.
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -s 8192 ;
 endif # AuroraUX
 endif # SunOS
 
 check-local:: lit.site.cfg Unit/lit.site.cfg
 	( $(ULIMIT) \
-	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
+	  $(PYTHON) $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
 
 # This is a legacy alias dating from when both DejaGNU and lit were in use.
 check-local-lit:: check-local
 
 check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs
 	( $(ULIMIT) \
-	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
+	  $(PYTHON) $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
 
 clean::
 	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
@@ -129,13 +132,14 @@ endif
 
 lit.site.cfg: FORCE
 	@echo "Making LLVM 'lit.site.cfg' file..."
-	@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g > lit.tmp
+	@$(ECHOPATH) s=@LLVM_HOSTTRIPLE@=$(HOST_TRIPLE)=g > lit.tmp
+	@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
-	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
+	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=$(PYTHON)=g >> lit.tmp
 	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
diff --git a/test/Object/ARM/symbol-addr.ll b/test/Object/ARM/symbol-addr.ll
new file mode 100644
index 000000000000..6bcbde9f9f18
--- /dev/null
+++ b/test/Object/ARM/symbol-addr.ll
@@ -0,0 +1,12 @@
+; RUN: llc %s -mtriple=arm-unknown-unknown -filetype=obj -o - \
+; RUN:   | llvm-objdump -t - | FileCheck %s
+; RUN: llc %s -mtriple=thumb-unknown-unknown -filetype=obj -o - \
+; RUN:   | llvm-objdump -t - | FileCheck %s
+
+; Check that the symbol address does not include the ARM/Thumb instruction
+; indicator bit.
+; CHECK: 00000000 g     F .text  {{[0-9]+}} test
+
+define i32 @test() {
+  ret i32 1
+}
diff --git a/test/Object/Inputs/COFF/i386.yaml b/test/Object/Inputs/COFF/i386.yaml
index ca902220c13b..aec7a5813cf9 100644
--- a/test/Object/Inputs/COFF/i386.yaml
+++ b/test/Object/Inputs/COFF/i386.yaml
@@ -1,5 +1,6 @@
 header: !Header
   Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+  Characteristics: [ IMAGE_FILE_DEBUG_STRIPPED ]
 
 sections:
   - !Section
diff --git a/test/Object/Inputs/coff_archive.lib b/test/Object/Inputs/coff_archive.lib
new file mode 100755
index 000000000000..e079991bfbac
--- /dev/null
+++ b/test/Object/Inputs/coff_archive.lib
diff --git a/test/Object/Inputs/liblong_filenames.a b/test/Object/Inputs/liblong_filenames.a
new file mode 100644
index 000000000000..368d665c94ee
--- /dev/null
+++ b/test/Object/Inputs/liblong_filenames.a
diff --git a/test/Object/Inputs/libsimple_archive.a b/test/Object/Inputs/libsimple_archive.a
new file mode 100644
index 000000000000..6e232e3e3cf8
--- /dev/null
+++ b/test/Object/Inputs/libsimple_archive.a
diff --git a/test/Object/Inputs/macho-text-sections.macho-x86_64 b/test/Object/Inputs/macho-text-sections.macho-x86_64
new file mode 100644
index 000000000000..cce203ba0d88
--- /dev/null
+++ b/test/Object/Inputs/macho-text-sections.macho-x86_64
diff --git a/test/Object/Inputs/program-headers.elf-i386 b/test/Object/Inputs/program-headers.elf-i386
new file mode 100644
index 000000000000..eb92c71cee5a
--- /dev/null
+++ b/test/Object/Inputs/program-headers.elf-i386
diff --git a/test/Object/Inputs/program-headers.elf-x86-64 b/test/Object/Inputs/program-headers.elf-x86-64
new file mode 100644
index 000000000000..037bf14866a1
--- /dev/null
+++ b/test/Object/Inputs/program-headers.elf-x86-64
diff --git a/test/Object/Inputs/trivial-object-test.elf-mips64el b/test/Object/Inputs/trivial-object-test.elf-mips64el
new file mode 100644
index 000000000000..5ad9ba3a0bc0
--- /dev/null
+++ b/test/Object/Inputs/trivial-object-test.elf-mips64el
diff --git a/test/Object/Mips/feature.test b/test/Object/Mips/feature.test
index e8da60974603..340301450a50 100644
--- a/test/Object/Mips/feature.test
+++ b/test/Object/Mips/feature.test
@@ -2,10 +2,12 @@ RUN: llvm-objdump -disassemble -triple mips64el -mattr +mips64r2 %p/../Inputs/de
 RUN: | FileCheck %s
 
 CHECK: Disassembly of section .text:
-CHECK: .text:
+CHECK: dext:
 CHECK:        0:	08 00 e0 03                                  	jr	$ra
 CHECK:        4:	43 49 82 7c                                  	dext $2, $4, 5, 10
+CHECK: dextu:
 CHECK:        8:	08 00 e0 03                                  	jr	$ra
 CHECK:        c:	83 28 82 7c                                  	dext $2, $4, 2, 6
+CHECK: dextm:
 CHECK:       10:	08 00 e0 03                                  	jr	$ra
 CHECK:       14:	43 09 82 7c                                  	dext $2, $4, 5, 2
diff --git a/test/Object/X86/macho-text-sections.test b/test/Object/X86/macho-text-sections.test
new file mode 100644
index 000000000000..1b697dcadad6
--- /dev/null
+++ b/test/Object/X86/macho-text-sections.test
@@ -0,0 +1,3 @@
+RUN: llvm-objdump -disassemble %p/../Inputs/macho-text-sections.macho-x86_64 | FileCheck %s
+
+CHECK: Disassembly of section __notext,__notext
diff --git a/test/Object/archive-long-index.test b/test/Object/archive-long-index.test
new file mode 100644
index 000000000000..bd530edbf418
--- /dev/null
+++ b/test/Object/archive-long-index.test
@@ -0,0 +1,40 @@
+#
+# Check if the index is appearing properly in the output file
+#
+RUN: llvm-nm -s %p/Inputs/liblong_filenames.a | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: abcdefghijklmnopqrstuvwxyz12345678 in 1.o
+CHECKIDX: main in 1.o
+CHECKIDX: fn1 in 2.o
+CHECKIDX: fn3 in 3.o
+CHECKIDX: fn1 in 3.o
+CHECKIDX: shankar in 4.o
+CHECKIDX: a in 5.o
+CHECKIDX: b in 6.o
+CHECKIDX: a in abcdefghijklmnopqrstuvwxyz1.o
+CHECKIDX: b in abcdefghijklmnopqrstuvwxyz2.o
+CHECKIDX: bda in abcdefghijklmnopqrstuvwxyz2.o
+CHECKIDX: b in abcdefghijklmnopq.o
+CHECKIDX: 1.o:
+CHECKIDX: 00000000 D abcdefghijklmnopqrstuvwxyz12345678
+CHECKIDX:          U bda
+CHECKIDX: 00000000 T main
+CHECKIDX: 2.o:
+CHECKIDX: 00000000 T fn1
+CHECKIDX: 3.o:
+CHECKIDX: 0000000b T fn1
+CHECKIDX: 00000000 T fn3
+CHECKIDX: 4.o:
+CHECKIDX:          C shankar
+CHECKIDX: 5.o:
+CHECKIDX:          C a
+CHECKIDX: 6.o:
+CHECKIDX:          C b
+CHECKIDX: abcdefghijklmnopqrstuvwxyz1.o:
+CHECKIDX:          C a
+CHECKIDX: abcdefghijklmnopqrstuvwxyz2.o:
+CHECKIDX:          C b
+CHECKIDX: 00000000 T bda
+CHECKIDX: abcdefghijklmnopq.o:
+CHECKIDX:          C b
diff --git a/test/Object/coff-archive.test b/test/Object/coff-archive.test
new file mode 100644
index 000000000000..768fe1c4b129
--- /dev/null
+++ b/test/Object/coff-archive.test
@@ -0,0 +1,225 @@
+#
+# Check if the index is appearing properly in the output file 
+#
+RUN: llvm-nm --numeric-sort -s %p/Inputs/coff_archive.lib | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: ??0invalid_argument@std@@QAE@PBD@Z in Debug\mymath.obj
+CHECKIDX: ??0logic_error@std@@QAE@PBD@Z in Debug\mymath.obj
+CHECKIDX: ??1invalid_argument@std@@UAE@XZ in Debug\mymath.obj
+CHECKIDX: ??1logic_error@std@@UAE@XZ in Debug\mymath.obj
+CHECKIDX: ??_7invalid_argument@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ??_7logic_error@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ??_C@_0BC@IHENMCGI@b?5cannot?5be?5zero?$CB?$AA@ in Debug\mymath.obj
+CHECKIDX: ??_Ginvalid_argument@std@@UAEPAXI@Z in Debug\mymath.obj
+CHECKIDX: ??_Glogic_error@std@@UAEPAXI@Z in Debug\mymath.obj
+CHECKIDX: ??_R0?AVexception@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0?AVinvalid_argument@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0?AVlogic_error@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAVexception@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAVinvalid_argument@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAVlogic_error@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAX@8 in Debug\mymath.obj
+CHECKIDX: ??_R1A@?0A@EA@exception@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R1A@?0A@EA@invalid_argument@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R1A@?0A@EA@logic_error@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R2exception@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R2invalid_argument@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R2logic_error@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R3exception@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R3invalid_argument@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R3logic_error@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R4invalid_argument@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ??_R4logic_error@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ?Add@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?Divide@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?Multiply@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?Subtract@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@C@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@D@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@E@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@F@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@G@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@H@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@I@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@J@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@K@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@M@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@N@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@O@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@_J@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@_K@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@_N@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?value@?$integral_constant@I$0A@@tr1@std@@2IB in Debug\mymath.obj
+CHECKIDX: ?value@?$integral_constant@_N$00@tr1@std@@2_NB in Debug\mymath.obj
+CHECKIDX: ?value@?$integral_constant@_N$0A@@tr1@std@@2_NB in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAVexception@std@@@84 in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAVinvalid_argument@std@@@84 in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAVlogic_error@std@@@84 in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAX@84 in Debug\mymath.obj
+CHECKIDX: __CTA4PAVinvalid_argument@std@@ in Debug\mymath.obj
+CHECKIDX: __TI4PAVinvalid_argument@std@@ in Debug\mymath.obj
+CHECKIDX: __real@0000000000000000 in Debug\mymath.obj
+CHECKIDX: Debug\stdafx.obj:
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$T
+CHECKIDX: 00000000 i .drectve
+CHECKIDX: 00000001 a @feat.00
+CHECKIDX: 00ab9d1b a @comp.id
+CHECKIDX: Debug\mymath.obj:
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$T
+CHECKIDX: 00000000 i .drectve
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rtc$IMZ
+CHECKIDX: 00000000 r .rtc$TMZ
+CHECKIDX: 00000000 N .sxdata
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 T ??0invalid_argument@std@@QAE@PBD@Z
+CHECKIDX: 00000000 T ??0logic_error@std@@QAE@PBD@Z
+CHECKIDX: 00000000 T ??1invalid_argument@std@@UAE@XZ
+CHECKIDX: 00000000 T ??1logic_error@std@@UAE@XZ
+CHECKIDX: 00000000 R ??_C@_0BC@IHENMCGI@b?5cannot?5be?5zero?$CB?$AA@
+CHECKIDX: 00000000 T ??_Ginvalid_argument@std@@UAEPAXI@Z
+CHECKIDX: 00000000 T ??_Glogic_error@std@@UAEPAXI@Z
+CHECKIDX: 00000000 D ??_R0?AVexception@std@@@8
+CHECKIDX: 00000000 D ??_R0?AVinvalid_argument@std@@@8
+CHECKIDX: 00000000 D ??_R0?AVlogic_error@std@@@8
+CHECKIDX: 00000000 D ??_R0PAVexception@std@@@8
+CHECKIDX: 00000000 D ??_R0PAVinvalid_argument@std@@@8
+CHECKIDX: 00000000 D ??_R0PAVlogic_error@std@@@8
+CHECKIDX: 00000000 D ??_R0PAX@8
+CHECKIDX: 00000000 R ??_R1A@?0A@EA@exception@std@@8
+CHECKIDX: 00000000 R ??_R1A@?0A@EA@invalid_argument@std@@8
+CHECKIDX: 00000000 R ??_R1A@?0A@EA@logic_error@std@@8
+CHECKIDX: 00000000 R ??_R2exception@std@@8
+CHECKIDX: 00000000 R ??_R2invalid_argument@std@@8
+CHECKIDX: 00000000 R ??_R2logic_error@std@@8
+CHECKIDX: 00000000 R ??_R3exception@std@@8
+CHECKIDX: 00000000 R ??_R3invalid_argument@std@@8
+CHECKIDX: 00000000 R ??_R3logic_error@std@@8
+CHECKIDX: 00000000 R ??_R4invalid_argument@std@@6B@
+CHECKIDX: 00000000 R ??_R4logic_error@std@@6B@
+CHECKIDX: 00000000 T ?Add@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 T ?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 T ?Multiply@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 T ?Subtract@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@C@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@D@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@E@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@F@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@G@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@H@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@I@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@J@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@K@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@M@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@N@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@O@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@_J@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@_K@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@_N@std@@2HB
+CHECKIDX: 00000000 R ?value@?$integral_constant@I$0A@@tr1@std@@2IB
+CHECKIDX: 00000000 R ?value@?$integral_constant@_N$00@tr1@std@@2_NB
+CHECKIDX: 00000000 R ?value@?$integral_constant@_N$0A@@tr1@std@@2_NB
+CHECKIDX: 00000000 R __CT??_R0PAVexception@std@@@84
+CHECKIDX: 00000000 R __CT??_R0PAVinvalid_argument@std@@@84
+CHECKIDX: 00000000 R __CT??_R0PAVlogic_error@std@@@84
+CHECKIDX: 00000000 R __CT??_R0PAX@84
+CHECKIDX: 00000000 R __CTA4PAVinvalid_argument@std@@
+CHECKIDX: 00000000 r __RTC_InitBase.rtc$IMZ
+CHECKIDX: 00000000 r __RTC_Shutdown.rtc$TMZ
+CHECKIDX: 00000000 R __TI4PAVinvalid_argument@std@@
+CHECKIDX: 00000000 R __real@0000000000000000
+CHECKIDX: 00000000 t __unwindfunclet$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z$0
+CHECKIDX: 00000000 r __unwindtable$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000001 a @feat.00
+CHECKIDX: 00000004 R ??_7invalid_argument@std@@6B@
+CHECKIDX: 00000004 R ??_7logic_error@std@@6B@
+CHECKIDX: 00000008 r __ehfuncinfo$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 0000000e t __ehhandler$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00ab9d1b a @comp.id
+CHECKIDX:          U ??2@YAPAXI@Z
+CHECKIDX:          U ??3@YAXPAX@Z
+CHECKIDX:          U ??_7type_info@@6B@
+CHECKIDX:          w ??_Einvalid_argument@std@@UAEPAXI@Z
+CHECKIDX:          w ??_Elogic_error@std@@UAEPAXI@Z
+CHECKIDX:          U ??_Ginvalid_argument@std@@UAEPAXI@Z
+CHECKIDX:          U ??_Glogic_error@std@@UAEPAXI@Z
+CHECKIDX:          U ?what@exception@std@@UBEPBDXZ
+CHECKIDX:          U @__security_check_cookie@4
+CHECKIDX:          U __CxxThrowException@8
+CHECKIDX:          U __RTC_CheckEsp
+CHECKIDX:          U __RTC_InitBase
+CHECKIDX:          U __RTC_Shutdown
+CHECKIDX:          U ___CxxFrameHandler3
+CHECKIDX:          U ___security_cookie
+CHECKIDX:          U __fltused
+CHECKIDX:          U __imp_??0exception@std@@QAE@ABQBD@Z
+CHECKIDX:          U __imp_??1exception@std@@UAE@XZ
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
new file mode 100644
index 000000000000..0d96fd2bfd8e
--- /dev/null
+++ b/test/Object/obj2yaml.test
@@ -0,0 +1,170 @@
+RUN: obj2yaml %p/Inputs/trivial-object-test.coff-i386 | FileCheck %s --check-prefix COFF-I386
+RUN: obj2yaml %p/Inputs/trivial-object-test.coff-x86-64 | FileCheck %s --check-prefix COFF-X86-64
+
+
+COFF-I386: header: !Header
+COFF-I386-NEXT:  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+
+COFF-I386: sections:
+COFF-I386-NEXT:   - !Section
+COFF-I386-NEXT:    Name: .text
+COFF-I386-NEXT:    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+COFF-I386-NEXT:    SectionData:  !hex "83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3" # |....D$.......$...............D$.....|
+
+COFF-I386:    Relocations:
+COFF-I386-NEXT:      - !Relocation
+COFF-I386-NEXT:        VirtualAddress: 0xe
+COFF-I386-NEXT:        SymbolTableIndex: 5
+COFF-I386-NEXT:        Type: IMAGE_REL_I386_DIR32
+
+COFF-I386:      - !Relocation
+COFF-I386-NEXT:        VirtualAddress: 0x13
+COFF-I386-NEXT:        SymbolTableIndex: 6
+COFF-I386-NEXT:        Type: IMAGE_REL_I386_REL32
+
+COFF-I386:      - !Relocation
+COFF-I386-NEXT:        VirtualAddress: 0x18
+COFF-I386-NEXT:        SymbolTableIndex: 7
+COFF-I386-NEXT:        Type: IMAGE_REL_I386_REL32
+
+COFF-I386:  - !Section
+COFF-I386-NEXT:    Name: .data
+COFF-I386-NEXT:    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+COFF-I386-NEXT:    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+COFF-I386: symbols:
+COFF-I386-NEXT:  - !Symbol
+COFF-I386-NEXT:    Name: .text
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 1
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    NumberOfAuxSymbols: 1
+COFF-I386-NEXT:    AuxillaryData:  !hex "240000000300000000000000010000000000" # |$.................|
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: .data
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 2
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-I386-NEXT:    NumberOfAuxSymbols: 1
+COFF-I386-NEXT:    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: _main
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 1
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: L_.str
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 2
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: _puts
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 0
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-I386:  - !Symbol
+COFF-I386-NEXT:    Name: _SomeOtherFunction
+COFF-I386-NEXT:    Value: 0
+COFF-I386-NEXT:    SectionNumber: 0
+COFF-I386-NEXT:    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-I386-NEXT:    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-I386-NEXT:    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+
+COFF-X86-64: header: !Header
+COFF-X86-64-NEXT:  Machine: IMAGE_FILE_MACHINE_AMD64 # (0x8664)
+
+COFF-X86-64: sections:
+COFF-X86-64-NEXT:   - !Section
+COFF-X86-64-NEXT:     Name: .text
+COFF-X86-64-NEXT:     Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+COFF-X86-64-NEXT:     SectionData:  !hex "4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3" # |H..(.D$$....H.................D$$H..(.|
+
+COFF-X86-64:     Relocations:
+COFF-X86-64-NEXT:       - !Relocation
+COFF-X86-64-NEXT:         VirtualAddress: 0xf
+COFF-X86-64-NEXT:         SymbolTableIndex: 5
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
+
+COFF-X86-64:       - !Relocation
+COFF-X86-64-NEXT:         VirtualAddress: 0x14
+COFF-X86-64-NEXT:         SymbolTableIndex: 6
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
+
+COFF-X86-64:       - !Relocation
+COFF-X86-64-NEXT:         VirtualAddress: 0x19
+COFF-X86-64-NEXT:         SymbolTableIndex: 7
+COFF-X86-64-NEXT:         Type: IMAGE_REL_AMD64_REL32
+
+COFF-X86-64:   - !Section
+COFF-X86-64-NEXT:     Name: .data
+COFF-X86-64-NEXT:     Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+COFF-X86-64-NEXT:     SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+COFF-X86-64: symbols:
+COFF-X86-64-NEXT:   - !Symbol
+COFF-X86-64-NEXT:     Name: .text
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 1
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     NumberOfAuxSymbols: 1
+COFF-X86-64-NEXT:     AuxillaryData:  !hex "260000000300000000000000010000000000" # |&.................|
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: .data
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 2
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+COFF-X86-64-NEXT:     NumberOfAuxSymbols: 1
+COFF-X86-64-NEXT:     AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: main
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 1
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: L.str
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 2
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: puts
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 0
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+COFF-X86-64:   - !Symbol
+COFF-X86-64-NEXT:     Name: SomeOtherFunction
+COFF-X86-64-NEXT:     Value: 0
+COFF-X86-64-NEXT:     SectionNumber: 0
+COFF-X86-64-NEXT:     SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+COFF-X86-64-NEXT:     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+COFF-X86-64-NEXT:     StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
diff --git a/test/Object/objdump-private-headers.test b/test/Object/objdump-private-headers.test
new file mode 100644
index 000000000000..c562044b3c4e
--- /dev/null
+++ b/test/Object/objdump-private-headers.test
@@ -0,0 +1,18 @@
+RUN: llvm-objdump -p %p/Inputs/program-headers.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -p %p/Inputs/program-headers.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+
+ELF-i386: Program Header:
+ELF-i386:     LOAD off    0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12
+ELF-i386:          filesz 0x00000134 memsz 0x00000134 flags r-x
+ELF-i386:    STACK off    0x00000000 vaddr 0x00000000 paddr 0x00000000 align 2**2
+ELF-i386:          filesz 0x00000000 memsz 0x00000000 flags rw-
+
+ELF-x86-64: Program Header:
+ELF-x86-64:     LOAD off    0x0000000000000000 vaddr 0x0000000000400000 paddr 0x0000000000400000 align 2**21
+ELF-x86-64:          filesz 0x0000000000000138 memsz 0x0000000000000138 flags r-x
+ELF-x86-64: EH_FRAME off    0x00000000000000f4 vaddr 0x00000000004000f4 paddr 0x00000000004000f4 align 2**2
+ELF-x86-64:          filesz 0x0000000000000014 memsz 0x0000000000000014 flags r--
+ELF-x86-64:    STACK off    0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3
+ELF-x86-64:          filesz 0x0000000000000000 memsz 0x0000000000000000 flags rw-
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index 6d35a2651d7a..95c4c4dcaedf 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -8,6 +8,8 @@ RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-x86-64
 RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-hexagon \
 RUN:              | FileCheck %s -check-prefix ELF-hexagon
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-mips64el \
+RUN:              | FileCheck %s -check-prefix ELF-MIPS64EL
 
 RUN: llvm-objdump -r %p/Inputs/relocations.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-complex-x86-64
@@ -40,6 +42,11 @@ ELF-hexagon: R_HEX_LO16 puts
 ELF-hexagon: R_HEX_B15_PCREL testf
 ELF-hexagon: R_HEX_B22_PCREL puts
 
+// Note: this file was produced with gas to make sure we don't end up in a
+// situation where LLVM produces and accepts a broken file.
+ELF-MIPS64EL: .data
+ELF-MIPS64EL: R_MIPS_64
+
 ELF-complex-x86-64: .text
 ELF-complex-x86-64-NEXT: R_X86_64_8 .data-4
 ELF-complex-x86-64-NEXT: R_X86_64_16 .data-4
diff --git a/test/Object/objdump-sectionheaders.test b/test/Object/objdump-sectionheaders.test
index a417d07a81cc..bc2478cea2fb 100644
--- a/test/Object/objdump-sectionheaders.test
+++ b/test/Object/objdump-sectionheaders.test
@@ -6,11 +6,11 @@
 
 ; CHECK: Sections:
 ; CHECK: Idx Name          Size      Address          Type
-; CHECK:   0               000000000 00000000000000000 
-; CHECK:   1 .text         000000026 00000000000000000 TEXT DATA 
-; CHECK:   2 .rodata.str1.1 00000000d 00000000000000026 DATA 
-; CHECK:   3 .note.GNU-stack 000000000 00000000000000033 
-; CHECK:   4 .rela.text    000000048 00000000000000038 
-; CHECK:   5 .symtab       0000000c0 00000000000000080 
-; CHECK:   6 .strtab       000000033 00000000000000140 
-; CHECK:   7 .shstrtab     00000004b 00000000000000173 
+; CHECK:   0               00000000 0000000000000000
+; CHECK:   1 .text         00000026 0000000000000000 TEXT DATA
+; CHECK:   2 .rodata.str1.1 0000000d 0000000000000026 DATA
+; CHECK:   3 .note.GNU-stack 00000000 0000000000000033
+; CHECK:   4 .rela.text    00000048 0000000000000038
+; CHECK:   5 .symtab       000000c0 0000000000000080
+; CHECK:   6 .strtab       00000033 0000000000000140
+; CHECK:   7 .shstrtab     0000004b 0000000000000173
diff --git a/test/Object/readobj-elf-versioning.test b/test/Object/readobj-elf-versioning.test
index 0906f344e2c5..1f09ef32a11a 100644
--- a/test/Object/readobj-elf-versioning.test
+++ b/test/Object/readobj-elf-versioning.test
@@ -1,15 +1,46 @@
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.i386 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.i386 \
 RUN:         | FileCheck %s -check-prefix ELF32
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.x86_64 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN: llvm-readobj -dt %p/Inputs/elf-versioning-test.x86_64 \
 RUN:         | FileCheck %s -check-prefix ELF64
 
-ELF: foo@@VER2          FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
-ELF: foo@VER1           FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
-ELF: unversioned_define FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: DynamicSymbols [
+ELF:   Symbol {
+ELF:     Name: foo@@VER2
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: foo@VER1
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: unversioned_define
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF: ]
 
-ELF32: puts@GLIBC_2.0   FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
-ELF64: puts@GLIBC_2.2.5 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
+ELF32: DynamicSymbols [
+ELF32:   Symbol {
+ELF32:     Name: puts@GLIBC_2.0
+ELF32:     Binding: Global
+ELF32:     Type: Function
+ELF32:     Section:  (0x0)
+ELF32:   }
+ELF32: ]
+ELF64: DynamicSymbols [
+ELF64:   Symbol {
+ELF64:     Name: puts@GLIBC_2.2.5
+ELF64:     Binding: Global
+ELF64:     Type: Function
+ELF64:     Section:  (0x0)
+ELF64:   }
+ELF64: ]
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
index 3b5457ce0737..72dbd32ea9d5 100644
--- a/test/Object/readobj-shared-object.test
+++ b/test/Object/readobj-shared-object.test
@@ -1,59 +1,319 @@
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:              %p/Inputs/shared-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:              %p/Inputs/shared-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF32
 
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:            %p/Inputs/shared-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN: llvm-readobj -s -t -dt -dynamic-table -needed-libs \
+RUN:            %p/Inputs/shared-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF64
 
-ELF64:File Format : ELF64-x86-64
-ELF64:Arch        : x86_64
-ELF64:Address Size: 64 bits
-ELF64:Load Name   : libfoo.so
-
-ELF32:File Format : ELF32-i386
-ELF32:Arch        : i386
-ELF32:Address Size: 32 bits
-ELF32:Load Name   : libfoo.so
-
-ELF:Symbols:
-ELF:  .dynsym                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .dynstr                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .text                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .eh_frame              DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .tdata                 DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .dynamic               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .got.plt               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .data                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  .bss                   DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
-ELF:  shared.ll              FILE            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute,formatspecific
-ELF:  local_func             FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}
-ELF:  _GLOBAL_OFFSET_TABLE_  DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
-ELF:  _DYNAMIC               DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
-ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
-ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  Total: 21
-
-ELF:Dynamic Symbols:
-ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
-ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
-ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
-ELF:  Total: {{[0-9a-f]+}}
-
-ELF:Libraries needed:
-ELF:  libc.so.6
-ELF:  libm.so.6
-ELF:  Total: 2
+ELF64: Format:      ELF64-x86-64
+ELF64: Arch:        x86_64
+ELF64: AddressSize: 64bit
+ELF64: LoadName:    libfoo.so
 
+ELF32: Format:      ELF32-i386
+ELF32: Arch:        i386
+ELF32: AddressSize: 32bit
+ELF32: LoadName:    libfoo.so
 
+ELF: Sections [
+ELF:   Section {
+ELF:     Name:  (0)
+ELF:     Type: SHT_NULL
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .hash
+ELF:     Type: SHT_HASH
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .dynsym
+ELF:     Type: SHT_DYNSYM
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .dynstr
+ELF:     Type: SHT_STRTAB
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .text
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x6)
+ELF:       SHF_ALLOC
+ELF:       SHF_EXECINSTR
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .eh_frame
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x2)
+ELF:       SHF_ALLOC
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .tdata
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x403)
+ELF:       SHF_ALLOC
+ELF:       SHF_TLS
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .dynamic
+ELF:     Type: SHT_DYNAMIC
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .got.plt
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .data
+ELF:     Type: SHT_PROGBITS
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .bss
+ELF:     Type: SHT_NOBITS
+ELF:     Flags [ (0x3)
+ELF:       SHF_ALLOC
+ELF:       SHF_WRITE
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .shstrtab
+ELF:     Type: SHT_STRTAB
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .symtab
+ELF:     Type: SHT_SYMTAB
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF:   Section {
+ELF:     Name: .strtab
+ELF:     Type: SHT_STRTAB
+ELF:     Flags [ (0x0)
+ELF:     ]
+ELF:   }
+ELF: ]
+
+ELF: Symbols [
+ELF:   Symbol {
+ELF:     Name: .hash
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .hash
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .dynsym
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .dynsym
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .dynstr
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .dynstr
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .text
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .eh_frame
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .eh_frame
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .tdata
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .tdata
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .dynamic
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .dynamic
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .got.plt
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .got.plt
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .data
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .data
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: .bss
+ELF:     Binding: Local
+ELF:     Type: Section
+ELF:     Section: .bss
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: shared.ll
+ELF:     Binding: Local
+ELF:     Type: File
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: local_func
+ELF:     Binding: Local
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _GLOBAL_OFFSET_TABLE_
+ELF:     Binding: Local
+ELF:     Type: Object
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _DYNAMIC
+ELF:     Binding: Local
+ELF:     Type: Object
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: common_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .bss
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: tls_sym
+ELF:     Binding: Global
+ELF:     Type: TLS
+ELF:     Section: .tdata
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: defined_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .data
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: __bss_start
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _end
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: global_func
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _edata
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF: ]
+
+ELF: DynamicSymbols [
+ELF:   Symbol {
+ELF:     Name: common_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .bss
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: tls_sym
+ELF:     Binding: Global
+ELF:     Type: TLS
+ELF:     Section: .tdata
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: defined_sym
+ELF:     Binding: Global
+ELF:     Type: Object
+ELF:     Section: .data
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: __bss_start
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _end
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: global_func
+ELF:     Binding: Global
+ELF:     Type: Function
+ELF:     Section: .text
+ELF:   }
+ELF:   Symbol {
+ELF:     Name: _edata
+ELF:     Binding: Global
+ELF:     Type: None
+ELF:     Section: (0xFFF1)
+ELF:   }
+ELF: ]
+
+ELF: DynamicSection [ (9 entries)
+ELF:   Tag        Type                 Name/Value
+ELF:   00000001 NEEDED               SharedLibrary (libc.so.6)
+ELF:   00000001 NEEDED               SharedLibrary (libm.so.6)
+ELF:   0000000E SONAME               LibrarySoname (libfoo.so)
+ELF:   00000004 HASH                 {{[0-9a-f]+}}
+ELF:   00000005 STRTAB               {{[0-9a-f]+}}
+ELF:   00000006 SYMTAB               {{[0-9a-f]+}}
+ELF:   0000000A STRSZ                {{[0-9]+}} (bytes)
+ELF:   0000000B SYMENT               {{[0-9]+}} (bytes)
+ELF:   00000000 NULL                 0x0
+ELF: ]
+
+ELF:      NeededLibraries [
+ELF-NEXT:  libc.so.6
+ELF-NEXT:  libm.so.6
+ELF-NEXT: ]
diff --git a/test/Object/readobj.test b/test/Object/readobj.test
new file mode 100644
index 000000000000..e29f40492d77
--- /dev/null
+++ b/test/Object/readobj.test
@@ -0,0 +1,2 @@
+// Don't crash while reading non-dynamic files.
+RUN: llvm-readobj %p/Inputs/trivial-object-test.elf-x86-64
diff --git a/test/Object/simple-archive.test b/test/Object/simple-archive.test
new file mode 100644
index 000000000000..3e6760ed97a4
--- /dev/null
+++ b/test/Object/simple-archive.test
@@ -0,0 +1,12 @@
+#
+# Check if the index is appearing properly in the output file
+#
+RUN: llvm-nm -s %p/Inputs/libsimple_archive.a | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: abcdefghijklmnopqrstuvwxyz12345678 in 1.o
+CHECKIDX: main in 1.o
+CHECKIDX: 1.o:
+CHECKIDX: 00000000 D abcdefghijklmnopqrstuvwxyz12345678
+CHECKIDX:          U fn1
+CHECKIDX: 00000000 T main
diff --git a/test/Object/yaml2obj-readobj.test b/test/Object/yaml2obj-readobj.test
new file mode 100644
index 000000000000..545ccc48aa4c
--- /dev/null
+++ b/test/Object/yaml2obj-readobj.test
@@ -0,0 +1,5 @@
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix COFF-I386
+
+// COFF-I386:  Characteristics [ (0x200)
+// COFF-I386-NEXT:    IMAGE_FILE_DEBUG_STRIPPED (0x200)
+// COFF-I386-NEXT:  ]
diff --git a/test/Other/2008-10-15-MissingSpace.ll b/test/Other/2008-10-15-MissingSpace.ll
index cac696ed6ff2..bc78e84a0afc 100644
--- a/test/Other/2008-10-15-MissingSpace.ll
+++ b/test/Other/2008-10-15-MissingSpace.ll
@@ -1,8 +1,12 @@
-; RUN: llvm-as < %s | llvm-dis | not grep "void@"
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 ; PR2894
 declare void @g()
 define void @f() {
-  invoke void @g() to label %c unwind label %c
+; CHECK:  invoke void @g()
+; CHECK:           to label %d unwind label %c
+  invoke void @g() to label %d unwind label %c
+d:
+  ret void
 c:
   %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
             cleanup
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
index 1d207c75c62b..6e180cd1d898 100644
--- a/test/Other/close-stderr.ll
+++ b/test/Other/close-stderr.ll
@@ -1,9 +1,16 @@
 ; RUN: sh -c 'opt --reject-this-option 2>&-; echo $?; opt -o /dev/null /dev/null 2>&-; echo $?;' \
 ; RUN:   | FileCheck %s
+
 ; CHECK: {{^1$}}
+; On valgrind, we got 127 here.
+; XFAIL: valgrind
+
 ; CHECK: {{^0$}}
 ; XFAIL: vg_leak
 ; REQUIRES: shell
 
+; opt will fail to open /dev/null on native win32.
+; XFAIL: win32
+
 ; Test that the error handling when writing to stderr fails exits the
 ; program cleanly rather than aborting.
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index eafb16e23e9e..44b66284dd73 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -118,64 +118,64 @@
 ; Duplicate all of the above as function return values rather than
 ; global initializers.
 
-; PLAIN: define i8* @goo8() nounwind {
+; PLAIN: define i8* @goo8() #0 {
 ; PLAIN:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
 ; PLAIN:   ret i8* %t
 ; PLAIN: }
-; PLAIN: define i1* @goo1() nounwind {
+; PLAIN: define i1* @goo1() #0 {
 ; PLAIN:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1) to i1*
 ; PLAIN:   ret i1* %t
 ; PLAIN: }
-; PLAIN: define i8* @foo8() nounwind {
+; PLAIN: define i8* @foo8() #0 {
 ; PLAIN:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2) to i8*
 ; PLAIN:   ret i8* %t
 ; PLAIN: }
-; PLAIN: define i1* @foo1() nounwind {
+; PLAIN: define i1* @foo1() #0 {
 ; PLAIN:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
 ; PLAIN:   ret i1* %t
 ; PLAIN: }
-; PLAIN: define i8* @hoo8() nounwind {
+; PLAIN: define i8* @hoo8() #0 {
 ; PLAIN:   %t = bitcast i8* getelementptr (i8* null, i32 -1) to i8*
 ; PLAIN:   ret i8* %t
 ; PLAIN: }
-; PLAIN: define i1* @hoo1() nounwind {
+; PLAIN: define i1* @hoo1() #0 {
 ; PLAIN:   %t = bitcast i1* getelementptr (i1* null, i32 -1) to i1*
 ; PLAIN:   ret i1* %t
 ; PLAIN: }
-; OPT: define i8* @goo8() nounwind {
+; OPT: define i8* @goo8() #0 {
 ; OPT:   ret i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1)
 ; OPT: }
-; OPT: define i1* @goo1() nounwind {
+; OPT: define i1* @goo1() #0 {
 ; OPT:   ret i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1)
 ; OPT: }
-; OPT: define i8* @foo8() nounwind {
+; OPT: define i8* @foo8() #0 {
 ; OPT:   ret i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2)
 ; OPT: }
-; OPT: define i1* @foo1() nounwind {
+; OPT: define i1* @foo1() #0 {
 ; OPT:   ret i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2)
 ; OPT: }
-; OPT: define i8* @hoo8() nounwind {
+; OPT: define i8* @hoo8() #0 {
 ; OPT:   ret i8* getelementptr (i8* null, i32 -1)
 ; OPT: }
-; OPT: define i1* @hoo1() nounwind {
+; OPT: define i1* @hoo1() #0 {
 ; OPT:   ret i1* getelementptr (i1* null, i32 -1)
 ; OPT: }
-; TO: define i8* @goo8() nounwind {
+; TO: define i8* @goo8() #0 {
 ; TO:   ret i8* null
 ; TO: }
-; TO: define i1* @goo1() nounwind {
+; TO: define i1* @goo1() #0 {
 ; TO:   ret i1* null
 ; TO: }
-; TO: define i8* @foo8() nounwind {
+; TO: define i8* @foo8() #0 {
 ; TO:   ret i8* inttoptr (i64 -1 to i8*)
 ; TO: }
-; TO: define i1* @foo1() nounwind {
+; TO: define i1* @foo1() #0 {
 ; TO:   ret i1* inttoptr (i64 -1 to i1*)
 ; TO: }
-; TO: define i8* @hoo8() nounwind {
+; TO: define i8* @hoo8() #0 {
 ; TO:   ret i8* inttoptr (i64 -1 to i8*)
 ; TO: }
-; TO: define i1* @hoo1() nounwind {
+; TO: define i1* @hoo1() #0 {
 ; TO:   ret i1* inttoptr (i64 -1 to i1*)
 ; TO: }
 ; SCEV: Classifying expressions for: @goo8
@@ -220,94 +220,94 @@ define i1* @hoo1() nounwind {
   ret i1* %t
 }
 
-; PLAIN: define i64 @fa() nounwind {
+; PLAIN: define i64 @fa() #0 {
 ; PLAIN:   %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fb() nounwind {
+; PLAIN: define i64 @fb() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fc() nounwind {
+; PLAIN: define i64 @fc() #0 {
 ; PLAIN:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fd() nounwind {
+; PLAIN: define i64 @fd() #0 {
 ; PLAIN:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fe() nounwind {
+; PLAIN: define i64 @fe() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @ff() nounwind {
+; PLAIN: define i64 @ff() #0 {
 ; PLAIN:   %t = bitcast i64 1 to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fg() nounwind {
+; PLAIN: define i64 @fg() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fh() nounwind {
+; PLAIN: define i64 @fh() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; PLAIN: define i64 @fi() nounwind {
+; PLAIN: define i64 @fi() #0 {
 ; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
-; OPT: define i64 @fa() nounwind {
+; OPT: define i64 @fa() #0 {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
 ; OPT: }
-; OPT: define i64 @fb() nounwind {
+; OPT: define i64 @fb() #0 {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
-; OPT: define i64 @fc() nounwind {
+; OPT: define i64 @fc() #0 {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
 ; OPT: }
-; OPT: define i64 @fd() nounwind {
+; OPT: define i64 @fd() #0 {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
 ; OPT: }
-; OPT: define i64 @fe() nounwind {
+; OPT: define i64 @fe() #0 {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
 ; OPT: }
-; OPT: define i64 @ff() nounwind {
+; OPT: define i64 @ff() #0 {
 ; OPT:   ret i64 1
 ; OPT: }
-; OPT: define i64 @fg() nounwind {
+; OPT: define i64 @fg() #0 {
 ; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
-; OPT: define i64 @fh() nounwind {
+; OPT: define i64 @fh() #0 {
 ; OPT:   ret i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
 ; OPT: }
-; OPT: define i64 @fi() nounwind {
+; OPT: define i64 @fi() #0 {
 ; OPT:   ret i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64)
 ; OPT: }
-; TO: define i64 @fa() nounwind {
+; TO: define i64 @fa() #0 {
 ; TO:   ret i64 18480
 ; TO: }
-; TO: define i64 @fb() nounwind {
+; TO: define i64 @fb() #0 {
 ; TO:   ret i64 8
 ; TO: }
-; TO: define i64 @fc() nounwind {
+; TO: define i64 @fc() #0 {
 ; TO:   ret i64 16
 ; TO: }
-; TO: define i64 @fd() nounwind {
+; TO: define i64 @fd() #0 {
 ; TO:   ret i64 88
 ; TO: }
-; TO: define i64 @fe() nounwind {
+; TO: define i64 @fe() #0 {
 ; TO:   ret i64 16
 ; TO: }
-; TO: define i64 @ff() nounwind {
+; TO: define i64 @ff() #0 {
 ; TO:   ret i64 1
 ; TO: }
-; TO: define i64 @fg() nounwind {
+; TO: define i64 @fg() #0 {
 ; TO:   ret i64 8
 ; TO: }
-; TO: define i64 @fh() nounwind {
+; TO: define i64 @fh() #0 {
 ; TO:   ret i64 8
 ; TO: }
-; TO: define i64 @fi() nounwind {
+; TO: define i64 @fi() #0 {
 ; TO:   ret i64 8
 ; TO: }
 ; SCEV: Classifying expressions for: @fa
@@ -375,34 +375,34 @@ define i64 @fi() nounwind {
   ret i64 %t
 }
 
-; PLAIN: define i64* @fM() nounwind {
+; PLAIN: define i64* @fM() #0 {
 ; PLAIN:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
-; PLAIN: define i64* @fN() nounwind {
+; PLAIN: define i64* @fN() #0 {
 ; PLAIN:   %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
-; PLAIN: define i64* @fO() nounwind {
+; PLAIN: define i64* @fO() #0 {
 ; PLAIN:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
-; OPT: define i64* @fM() nounwind {
+; OPT: define i64* @fM() #0 {
 ; OPT:   ret i64* getelementptr (i64* null, i32 1)
 ; OPT: }
-; OPT: define i64* @fN() nounwind {
+; OPT: define i64* @fN() #0 {
 ; OPT:   ret i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 ; OPT: }
-; OPT: define i64* @fO() nounwind {
+; OPT: define i64* @fO() #0 {
 ; OPT:   ret i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 ; OPT: }
-; TO: define i64* @fM() nounwind {
+; TO: define i64* @fM() #0 {
 ; TO:   ret i64* inttoptr (i64 8 to i64*)
 ; TO: }
-; TO: define i64* @fN() nounwind {
+; TO: define i64* @fN() #0 {
 ; TO:   ret i64* inttoptr (i64 8 to i64*)
 ; TO: }
-; TO: define i64* @fO() nounwind {
+; TO: define i64* @fO() #0 {
 ; TO:   ret i64* inttoptr (i64 8 to i64*)
 ; TO: }
 ; SCEV: Classifying expressions for: @fM
@@ -428,14 +428,14 @@ define i64* @fO() nounwind {
   ret i64* %t
 }
 
-; PLAIN: define i32* @fZ() nounwind {
+; PLAIN: define i32* @fZ() #0 {
 ; PLAIN:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
 ; PLAIN:   ret i32* %t
 ; PLAIN: }
-; OPT: define i32* @fZ() nounwind {
+; OPT: define i32* @fZ() #0 {
 ; OPT:   ret i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 ; OPT: }
-; TO: define i32* @fZ() nounwind {
+; TO: define i32* @fZ() #0 {
 ; TO:   ret i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 ; TO: }
 ; SCEV: Classifying expressions for: @fZ
@@ -446,3 +446,25 @@ define i32* @fZ() nounwind {
   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
   ret i32* %t
 }
+
+; PR15262 - Check GEP folding with casts between address spaces.
+
+@p0 = global [4 x i8] zeroinitializer, align 1
+@p12 = addrspace(12) global [4 x i8] zeroinitializer, align 1
+
+define i8* @different_addrspace() nounwind noinline {
+; OPT: different_addrspace
+  %p = getelementptr inbounds i8* bitcast ([4 x i8] addrspace(12)* @p12 to i8*),
+                                  i32 2
+  ret i8* %p
+; OPT: ret i8* getelementptr (i8* bitcast ([4 x i8] addrspace(12)* @p12 to i8*), i32 2)
+}
+
+define i8* @same_addrspace() nounwind noinline {
+; OPT: same_addrspace
+  %p = getelementptr inbounds i8* bitcast ([4 x i8] * @p0 to i8*), i32 2
+  ret i8* %p
+; OPT: ret i8* getelementptr inbounds ([4 x i8]* @p0, i32 0, i32 2)
+}
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Other/extract-linkonce.ll b/test/Other/extract-linkonce.ll
new file mode 100644
index 000000000000..31fbf3ac4632
--- /dev/null
+++ b/test/Other/extract-linkonce.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-extract -func foo -S < %s | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
+
+; Test that we don't convert weak_odr to external definitions.
+
+; CHECK:      @bar = external hidden global i32
+; CHECK:      define hidden i32* @foo() {
+; CHECK-NEXT:  ret i32* @bar
+; CHECK-NEXT: }
+
+; DELETE: @bar = hidden global i32 42
+; DELETE: declare hidden i32* @foo()
+
+@bar = linkonce global i32 42
+
+define linkonce i32* @foo() {
+  ret i32* @bar
+}
+
+define void @g() {
+  call i32* @foo()
+  ret void
+}
diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump
index 69cdacde4523..61342d8f98e5 100755
--- a/test/Scripts/elf-dump
+++ b/test/Scripts/elf-dump
@@ -52,6 +52,31 @@ class StringTable:
        end = self.string_table.index('\x00', index)
        return self.string_table[index:end]
 
+class ProgramHeader:
+    def __init__(self, f):
+        self.p_type = f.read32()
+        if f.is64Bit:
+            self.p_flags = f.read32()
+        self.p_offset = f.readWord()
+        self.p_vaddr = f.readWord()
+        self.p_paddr = f.readWord()
+        self.p_filesz = f.readWord()
+        self.p_memsz = f.readWord()
+        if not f.is64Bit:
+            self.p_flags = f.read32()
+        self.p_align = f.readWord()
+
+    def dump(self):
+        print "  (('p_type', %s)" % common_dump.HexDump(self.p_type) 
+        print "   ('p_flags', %s)" % common_dump.HexDump(self.p_flags) 
+        print "   ('p_offset', %s)" % common_dump.HexDump(self.p_offset) 
+        print "   ('p_vaddr', %s)" % common_dump.HexDump(self.p_vaddr) 
+        print "   ('p_paddr', %s)" % common_dump.HexDump(self.p_paddr) 
+        print "   ('p_filesz', %s)" % common_dump.HexDump(self.p_filesz) 
+        print "   ('p_memsz', %s)" % common_dump.HexDump(self.p_memsz) 
+        print "   ('p_align', %s)" % common_dump.HexDump(self.p_align) 
+        print "  ),"
+
 class Section:
     def __init__(self, f):
         self.sh_name = f.read32()
@@ -189,19 +214,23 @@ def dumpELF(path, opts):
     print "('e_machine', %s)" % common_dump.HexDump(e_machine)
     print "('e_version', %s)" % common_dump.HexDump(f.read32())
     print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
-    print "('e_phoff', %s)" % common_dump.HexDump(f.readWord())
+    e_phoff = f.readWord()
+    print "('e_phoff', %s)" % common_dump.HexDump(e_phoff)
     e_shoff = f.readWord()
     print "('e_shoff', %s)" % common_dump.HexDump(e_shoff)
     print "('e_flags', %s)" % common_dump.HexDump(f.read32())
     print "('e_ehsize', %s)" % common_dump.HexDump(f.read16())
-    print "('e_phentsize', %s)" % common_dump.HexDump(f.read16())
-    print "('e_phnum', %s)" % common_dump.HexDump(f.read16())
+    e_phentsize = f.read16()
+    print "('e_phentsize', %s)" % common_dump.HexDump(e_phentsize)
+    e_phnum = f.read16()
+    print "('e_phnum', %s)" % common_dump.HexDump(e_phnum)
     e_shentsize = f.read16()
     print "('e_shentsize', %s)" % common_dump.HexDump(e_shentsize)
     e_shnum = f.read16()
     print "('e_shnum', %s)" % common_dump.HexDump(e_shnum)
     e_shstrndx = f.read16()
     print "('e_shstrndx', %s)" % common_dump.HexDump(e_shstrndx)
+    
 
     # Read all section headers
     sections = []
@@ -228,6 +257,19 @@ def dumpELF(path, opts):
         sections[index].dump(shstrtab, f, strtab, opts.dumpSectionData)
     print "])"
 
+    # Read all  program headers
+    headers = []
+    for index in range(e_phnum[0]):
+        f.seek(e_phoff[0] + index * e_phentsize[0])
+        h = ProgramHeader(f)
+        headers.append(h)
+
+    print "('_ProgramHeaders', ["
+    for index in range(e_phnum[0]):
+        print "  # Program Header %s" % index
+        headers[index].dump()
+    print "])"
+
 if __name__ == "__main__":
     from optparse import OptionParser, OptionGroup
     parser = OptionParser("usage: %prog [options] {files}")
diff --git a/test/TableGen/2006-09-18-LargeInt.td b/test/TableGen/2006-09-18-LargeInt.td
index f7ae4eecceb4..94cd1ec30710 100644
--- a/test/TableGen/2006-09-18-LargeInt.td
+++ b/test/TableGen/2006-09-18-LargeInt.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep -- 4294901760
-// XFAIL: vg_leak
 
 def X {
   int Y = 0xFFFF0000;
diff --git a/test/TableGen/2010-03-24-PrematureDefaults.td b/test/TableGen/2010-03-24-PrematureDefaults.td
index 24f6c93b3e17..716a1d59008c 100644
--- a/test/TableGen/2010-03-24-PrematureDefaults.td
+++ b/test/TableGen/2010-03-24-PrematureDefaults.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class A<int k, bits<2> x = 1> {
   int K = k;
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
index 7ceb4e74b2ff..14d616b52173 100644
--- a/test/TableGen/Dag.td
+++ b/test/TableGen/Dag.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 //===----------------------------------------------------------------------===//
 // Substitution of an int.
@@ -71,3 +70,15 @@ def VAL4 : bar<foo2, somedef2>;
 // CHECK-NEXT:  dag Dag3 = (somedef2 2);
 // CHECK-NEXT:  NAME = ?
 // CHECK-NEXT: }
+
+def VAL5 : bar<foo2, somedef2> {
+  // Named operands.
+  let Dag1 = (somedef1 1:$name1);
+
+  // Name, no node.
+  let Dag2 = (somedef2 $name2, $name3);
+}
+
+// CHECK:      def VAL5 {
+// CHECK-NEXT:  dag Dag1 = (somedef1 1:$name1);
+// CHECK-NEXT:  dag Dag2 = (somedef2 ?:$name2, ?:$name3);
diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td
index 46d3f62c6d04..b52a709731e1 100644
--- a/test/TableGen/DefmInherit.td
+++ b/test/TableGen/DefmInherit.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep "zing = 4" | count 4
-// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/DefmInsideMultiClass.td b/test/TableGen/DefmInsideMultiClass.td
index e6fc019b1e3e..0aea21280da0 100644
--- a/test/TableGen/DefmInsideMultiClass.td
+++ b/test/TableGen/DefmInsideMultiClass.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep ADDPSrr | count 1
-// XFAIL: vg_leak
 
 class Instruction<bits<4> opc, string Name> {
   bits<4> opcode = opc;
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
index 99b7e14c2d5f..9bc76e0f0cf8 100644
--- a/test/TableGen/ForeachList.td
+++ b/test/TableGen/ForeachList.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
index 4aacc74d8aa2..a49a60bf2692 100644
--- a/test/TableGen/ForeachLoop.td
+++ b/test/TableGen/ForeachLoop.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td
index 306959ebb6a2..919a1a7e9a32 100644
--- a/test/TableGen/LazyChange.td
+++ b/test/TableGen/LazyChange.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep "int Y = 3"
-// XFAIL: vg_leak
 
 class C {
   int X = 4;
diff --git a/test/TableGen/LetInsideMultiClasses.td b/test/TableGen/LetInsideMultiClasses.td
index cb13508e5117..72f48b6d8066 100644
--- a/test/TableGen/LetInsideMultiClasses.td
+++ b/test/TableGen/LetInsideMultiClasses.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep "bit IsDouble = 1;" | count 3
-// XFAIL: vg_leak
 
 class Instruction<bits<4> opc, string Name> {
   bits<4> opcode = opc;
diff --git a/test/TableGen/ListOfList.td b/test/TableGen/ListOfList.td
index 864401ec3cd3..adf9fe483eb4 100644
--- a/test/TableGen/ListOfList.td
+++ b/test/TableGen/ListOfList.td
@@ -1,7 +1,6 @@
 // RUN llvm-tblgen %s | FileCheck %s
 
 // RUN: llvm-tblgen %s | grep "foo" | count 1
-// XFAIL: vg_leak
 
 class Base<string t> {
   string text = t;
diff --git a/test/TableGen/LoLoL.td b/test/TableGen/LoLoL.td
index 778c9609d1a2..f758e1b60476 100644
--- a/test/TableGen/LoLoL.td
+++ b/test/TableGen/LoLoL.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Base<list<int> v> {
   list<int> values = v;
diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td
index 449c5d6c0403..ef320cf79f16 100644
--- a/test/TableGen/MultiClass.td
+++ b/test/TableGen/MultiClass.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep "zing = 4" | count 2
-// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiClassDefName.td b/test/TableGen/MultiClassDefName.td
index 296e30c7c788..75d6af5b42b9 100644
--- a/test/TableGen/MultiClassDefName.td
+++ b/test/TableGen/MultiClassDefName.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep WorldHelloCC | count 1
-// XFAIL: vg_leak
 
 class C<string n> {
   string name = n;
diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td
index c768fff0b670..9d1470a6616b 100644
--- a/test/TableGen/MultiClassInherit.td
+++ b/test/TableGen/MultiClassInherit.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep "zing = 4" | count 28
-// XFAIL: vg_leak
 
 class C1<int A, string B> { 
   int bar = A;
diff --git a/test/TableGen/MultiPat.td b/test/TableGen/MultiPat.td
index b3792777b6b5..b49b06c24caf 100644
--- a/test/TableGen/MultiPat.td
+++ b/test/TableGen/MultiPat.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
index e8c16f720d0e..5b63175b192a 100644
--- a/test/TableGen/NestedForeach.td
+++ b/test/TableGen/NestedForeach.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Droid<string series, int release, string model, int patchlevel> {
   string Series = series;
diff --git a/test/TableGen/Paste.td b/test/TableGen/Paste.td
index a7e2a5b318ba..33d61ccde128 100644
--- a/test/TableGen/Paste.td
+++ b/test/TableGen/Paste.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Instr<int i> {
   int index = i;
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index 761332312b0f..f26b9e65ac0d 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -1,6 +1,5 @@
 // Test evaluation of set operations in dags.
 // RUN: llvm-tblgen -print-sets %s | FileCheck %s
-// XFAIL: vg_leak
 //
 // The -print-sets driver configures a primitive SetTheory instance that
 // understands these sets:
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
index a11f6f87b427..e4c4704a5e39 100644
--- a/test/TableGen/SiblingForeach.td
+++ b/test/TableGen/SiblingForeach.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Set<int i = 0, int j = 0, int k = 0> {
   int I = i;
diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td
index 6d051d77c8a3..7a35d315c5d8 100644
--- a/test/TableGen/Slice.td
+++ b/test/TableGen/Slice.td
@@ -1,6 +1,4 @@
-// RUN: llvm-tblgen %s | grep "\[(set" | count 2
-// RUN: llvm-tblgen %s | grep "\[\]" | count 2
-// XFAIL: vg_leak
+// RUN: llvm-tblgen %s | FileCheck %s
 
 class ValueType<int size, int value> {
   int Size = size;
@@ -86,3 +84,8 @@ multiclass myscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns
   vscalar<opcode, asmstr, patterns>;
 
 defm NOT : myscalar<0x10, "not", [[], [(set FR32:$dst, (f32 (not FR32:$src)))]]>;
+
+// CHECK: Pattern = [(set FR32:$dst, (f32 (not FR32:$src)))];
+// CHECK: Pattern = [];
+// CHECK: Pattern = [(set FR32:$dst, (f32 (not FR32:$src)))];
+// CHECK: Pattern = [];
diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td
index 64b706dc6a10..bf2d257c5d01 100644
--- a/test/TableGen/TargetInstrSpec.td
+++ b/test/TableGen/TargetInstrSpec.td
@@ -1,6 +1,5 @@
 // RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))\]' | count 1
 // RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))\]' | count 1
-// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
index 9c502f475507..e88696217f70 100644
--- a/test/TableGen/TwoLevelName.td
+++ b/test/TableGen/TwoLevelName.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Type<string name, int length, int width> {
   string Name = name;
diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td
index 7948aff79528..b9e4b375359b 100644
--- a/test/TableGen/cast.td
+++ b/test/TableGen/cast.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep "add_ps" | count 3
-// XFAIL: vg_leak
 
 class ValueType<int size, int value> {
   int Size = size;
diff --git a/test/TableGen/defmclass.td b/test/TableGen/defmclass.td
index 80f03b319426..6198c000fddc 100644
--- a/test/TableGen/defmclass.td
+++ b/test/TableGen/defmclass.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class XD { bits<4> Prefix = 11; }
 // CHECK: Prefix = { 1, 1, 0, 0 };
diff --git a/test/TableGen/eq.td b/test/TableGen/eq.td
index f8daf880b9ed..fc3ad424e2f7 100644
--- a/test/TableGen/eq.td
+++ b/test/TableGen/eq.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 // CHECK: Value = 0
 // CHECK: Value = 1
 
diff --git a/test/TableGen/eqbit.td b/test/TableGen/eqbit.td
index 1d58fa0c1916..b77b1a26dfe1 100644
--- a/test/TableGen/eqbit.td
+++ b/test/TableGen/eqbit.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 // CHECK: a = 6
 // CHECK: a = 5
 
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index 902af25237e9..7b7c19972884 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,7 +1,6 @@
 // RUN: llvm-tblgen %s | grep 'Jr' | count 2
 // RUN: llvm-tblgen %s | grep 'Sr' | count 2
 // RUN: llvm-tblgen %s | grep '"NAME"' | count 1
-// XFAIL: vg_leak
 
 // Variables for foreach
 class decls {
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 1d8d62329ae3..e4df74f36860 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 // Support for an `!if' operator as part of a `let' statement.
 // CHECK:      class C
diff --git a/test/TableGen/ifbit.td b/test/TableGen/ifbit.td
index 88f575e9acfc..e3341219ffe8 100644
--- a/test/TableGen/ifbit.td
+++ b/test/TableGen/ifbit.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 // CHECK: a = 6
 // CHECK: a = 5
 
diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td
index dd85ddc67c94..efe00022f51d 100644
--- a/test/TableGen/lisp.td
+++ b/test/TableGen/lisp.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep ""
-// XFAIL: vg_leak
 
 class List<list<string> n> {
   list<string> names = n;
diff --git a/test/TableGen/list-element-bitref.td b/test/TableGen/list-element-bitref.td
index 5f3e3dabf4d4..7db3d31167fd 100644
--- a/test/TableGen/list-element-bitref.td
+++ b/test/TableGen/list-element-bitref.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class C<list<bits<8>> L> {
   bits<2> V0 = L[0]{1-0};
@@ -10,6 +9,6 @@ class C<list<bits<8>> L> {
 def c0 : C<[0b0101, 0b1010]>;
 
 // CHECK: def c0
-// CHECk-NEXT: bits<2> V0 = { 0, 1 };
-// CHECk-NEXT: bits<2> V1 = { 1, 0 };
-// CHECk-NEXT: string V2 = "Odd";
+// CHECK-NEXT: bits<2> V0 = { 0, 1 };
+// CHECK-NEXT: bits<2> V1 = { 1, 0 };
+// CHECK-NEXT: string V2 = "Odd";
diff --git a/test/TableGen/math.td b/test/TableGen/math.td
new file mode 100644
index 000000000000..bde267a652dc
--- /dev/null
+++ b/test/TableGen/math.td
@@ -0,0 +1,18 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+
+class Int<int value> {
+  int Value = value;
+}
+
+def v1024   : Int<1024>;
+// CHECK: def v1024
+// CHECK: Value = 1024
+
+def v1025   : Int<!add(v1024.Value, 1)>;
+// CHECK: def v1025
+// CHECK: Value = 1025
+
+def v2048   : Int<!add(v1024.Value, v1024.Value)>;
+// CHECK: def v2048
+// CHECK: Value = 2048
+
diff --git a/test/TableGen/pr8330.td b/test/TableGen/pr8330.td
index 7779b635e33c..e6720147890b 100644
--- a/test/TableGen/pr8330.td
+++ b/test/TableGen/pr8330.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Or4<bits<8> Val> {
   bits<8> V = {Val{7}, Val{6}, Val{5}, Val{4}, Val{3}, 1, Val{1}, Val{0} };
diff --git a/test/TableGen/strconcat.td b/test/TableGen/strconcat.td
index 85ee831b4dae..0173c49365cc 100644
--- a/test/TableGen/strconcat.td
+++ b/test/TableGen/strconcat.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | grep fufoo
-// XFAIL: vg_leak
 
 class Y<string S> {
   string T = !strconcat(S, "foo");
diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td
index 850ac38465c3..e265b44cf328 100644
--- a/test/TableGen/subst.td
+++ b/test/TableGen/subst.td
@@ -4,7 +4,6 @@
 // RUN: llvm-tblgen %s | grep "LAST" | count 1
 // RUN: llvm-tblgen %s | grep "TVAR" | count 2
 // RUN: llvm-tblgen %s | grep "Bogus" | count 1
-// XFAIL: vg_leak
 
 class Honorific<string t> {
   string honorific = t;
diff --git a/test/TableGen/subst2.td b/test/TableGen/subst2.td
index 7c007f7db12e..ce7307703dcc 100644
--- a/test/TableGen/subst2.td
+++ b/test/TableGen/subst2.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 // CHECK: No subst
 // CHECK: No foo
 // CHECK: RECURSE foo
diff --git a/test/TableGen/usevalname.td b/test/TableGen/usevalname.td
index d85b98ac33e6..a80ba12869e0 100644
--- a/test/TableGen/usevalname.td
+++ b/test/TableGen/usevalname.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Instr<list<dag> pat> {
   list<dag> Pattern = pat;
diff --git a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index e740b29f9288..1226b98a998e 100644
--- a/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,15 +1,19 @@
-; RUN: opt < %s -argpromotion -S | grep nounwind | count 2
+; RUN: opt < %s -argpromotion -S | FileCheck %s
 
+; CHECK: define internal i32 @deref(i32 %x.val) #0 {
 define internal i32 @deref(i32* %x) nounwind {
 entry:
-	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
-	ret i32 %tmp2
+  %tmp2 = load i32* %x, align 4
+  ret i32 %tmp2
 }
 
 define i32 @f(i32 %x) {
 entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	store i32 %x, i32* %x_addr, align 4
-	%tmp1 = call i32 @deref( i32* %x_addr ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp1
+  %x_addr = alloca i32
+  store i32 %x, i32* %x_addr, align 4
+; CHECK: %tmp1 = call i32 @deref(i32 %x_addr.val) [[NUW:#[0-9]+]]
+  %tmp1 = call i32 @deref( i32* %x_addr ) nounwind
+  ret i32 %tmp1
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll
index fed002aa98a9..5e1a0370dbb1 100644
--- a/test/Transforms/ArgumentPromotion/crash.ll
+++ b/test/Transforms/ArgumentPromotion/crash.ll
@@ -1,7 +1,5 @@
+; RUN: opt -inline -argpromotion < %s
 ; rdar://7879828
-; RUN: opt -inline -argpromotion %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
 
 define void @foo() {
   invoke void @foo2()
@@ -11,6 +9,8 @@ if.end432:
   unreachable
 
 for.end520: 
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+           cleanup
   unreachable
 }
 
@@ -57,3 +57,5 @@ init:
   %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1)
   ret i32 0
 }
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/BBVectorize/X86/pr15289.ll b/test/Transforms/BBVectorize/X86/pr15289.ll
new file mode 100644
index 000000000000..07cc5d8b96b7
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/pr15289.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -basicaa -bb-vectorize -disable-output
+; This is a bugpoint-reduced test case. It did not always assert, but does reproduce the bug
+; and running under valgrind (or some similar tool) will catch the error.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.2.0"
+
+%0 = type { [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }], [10 x { float, float }] }
+%1 = type { [10 x [8 x i8]] }
+%2 = type { i64, i64 }
+%3 = type { [10 x i64], i64, i64, i64, i64, i64 }
+%4 = type { i64, i64, i64, i64, i64, i64 }
+%5 = type { [10 x i64] }
+%6 = type { [10 x float], [10 x float], [10 x float], [10 x float] }
+%struct.__st_parameter_dt.1.3.5.7 = type { %struct.__st_parameter_common.0.2.4.6, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%struct.__st_parameter_common.0.2.4.6 = type { i32, i32, i8*, i32, i32, i8*, i32* }
+
+@cctenso_ = external unnamed_addr global %0, align 32
+@ctenso_ = external unnamed_addr global %1, align 32
+@i_dim_ = external unnamed_addr global %2, align 16
+@itenso1_ = external unnamed_addr global %3, align 32
+@itenso2_ = external unnamed_addr global %4, align 32
+@ltenso_ = external unnamed_addr global %5, align 32
+@rtenso_ = external unnamed_addr global %6, align 32
+@.cst = external unnamed_addr constant [8 x i8], align 8
+@.cst1 = external unnamed_addr constant [3 x i8], align 8
+@.cst2 = external unnamed_addr constant [29 x i8], align 8
+@.cst3 = external unnamed_addr constant [32 x i8], align 64
+
+define void @cart_to_dc2y_(double* noalias nocapture %xx, double* noalias nocapture %yy, double* noalias nocapture %zz, [5 x { double, double }]* noalias nocapture %c2ten) nounwind uwtable {
+entry:
+  %0 = fmul double undef, undef
+  %1 = fmul double undef, undef
+  %2 = fadd double undef, undef
+  %3 = fmul double undef, 0x3FE8B8B76E3E9919
+  %4 = fsub double %0, %1
+  %5 = fsub double -0.000000e+00, undef
+  %6 = fmul double undef, undef
+  %7 = fmul double %4, %6
+  %8 = fmul double undef, 2.000000e+00
+  %9 = fmul double %8, undef
+  %10 = fmul double undef, %9
+  %11 = fmul double %10, undef
+  %12 = fsub double undef, %7
+  %13 = fmul double %3, %12
+  %14 = fmul double %3, undef
+  %15 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 0
+  store double %13, double* %15, align 8, !tbaa !0
+  %16 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 0, i32 1
+  %17 = fmul double undef, %8
+  %18 = fmul double %17, undef
+  %19 = fmul double undef, %18
+  %20 = fadd double undef, undef
+  %21 = fmul double %3, %19
+  %22 = fsub double -0.000000e+00, %21
+  %23 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 0
+  store double %22, double* %23, align 8, !tbaa !0
+  %24 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 1, i32 1
+  %25 = fmul double undef, 0x3FE42F601A8C6794
+  %26 = fmul double undef, 2.000000e+00
+  %27 = fsub double %26, %0
+  %28 = fmul double %6, undef
+  %29 = fsub double undef, %28
+  %30 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 0
+  store double undef, double* %30, align 8, !tbaa !0
+  %31 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 2, i32 1
+  %32 = fmul double undef, %17
+  %33 = fmul double undef, %17
+  %34 = fmul double undef, %32
+  %35 = fmul double undef, %33
+  %36 = fsub double undef, %35
+  %37 = fmul double %3, %34
+  %38 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 0
+  store double %37, double* %38, align 8, !tbaa !0
+  %39 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 3, i32 1
+  %40 = fmul double undef, %8
+  %41 = fmul double undef, %40
+  %42 = fmul double undef, %41
+  %43 = fsub double undef, %42
+  %44 = fmul double %3, %43
+  %45 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 0
+  store double %13, double* %45, align 8, !tbaa !0
+  %46 = getelementptr inbounds [5 x { double, double }]* %c2ten, i64 0, i64 4, i32 1
+  %47 = fsub double -0.000000e+00, %14
+  store double %47, double* %16, align 8, !tbaa !0
+  store double undef, double* %24, align 8, !tbaa !0
+  store double -0.000000e+00, double* %31, align 8, !tbaa !0
+  store double undef, double* %39, align 8, !tbaa !0
+  store double undef, double* %46, align 8, !tbaa !0
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"alias set 17: real(kind=8)", metadata !1}
+!1 = metadata !{metadata !1}
diff --git a/test/Transforms/BBVectorize/X86/simple-int.ll b/test/Transforms/BBVectorize/X86/simple-int.ll
new file mode 100644
index 000000000000..f5dbe46b1480
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple-int.ll
@@ -0,0 +1,79 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test3
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+        %P2 = add i32 %P, 1
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
index e8e82ce02479..bdcb30da887f 100644
--- a/test/Transforms/BBVectorize/cycle.ll
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
 ; want to select the pairs:
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
index cea225d076e1..ea5cb5dd93f7 100644
--- a/test/Transforms/BBVectorize/ld1.ll
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
index c22ea5852a1b..e592edb44a02 100644
--- a/test/Transforms/BBVectorize/loop1.ll
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -1,7 +1,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
 ; The second check covers the use of alias analysis (with loop unrolling).
 
 define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
index 8c9cc3c188e3..e0120059b952 100644
--- a/test/Transforms/BBVectorize/req-depth.ll
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -bb-vectorize-ignore-target-info -S | FileCheck %s -check-prefix=CHECK-RD2
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%X1 = fsub double %A1, %B1
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
index aeaf98865bc9..a694e45bc181 100644
--- a/test/Transforms/BBVectorize/search-limit.ll
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
 
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: @test1
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index ae1d63bfd852..e4d51526ca11 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -1,7 +1,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
 declare double @llvm.cos.f64(double)
 declare double @llvm.powi.f64(double, i32)
 
@@ -31,6 +32,32 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1,
 ; CHECK: ret double %R
 }
 
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
 ; Basic depth-3 chain with cos
 define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%X1 = fsub double %A1, %B1
@@ -97,7 +124,10 @@ define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
 ; CHECK: ret double %R
 }
 
-; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
-; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #1
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) #1
 
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
index 7dd77c933f6d..8e51d297e8ec 100644
--- a/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
 
 ; Simple 3-pair chain with loads and stores
 define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
index 15ecb597025a..8caccfd32c34 100644
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -1,6 +1,6 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB
 
 ; Basic depth-3 chain with select
 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index 3527ae75b457..a447908d16cc 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %B1, double %B2) {
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index 153be73f832f..78bcc9f83080 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -bb-vectorize-ignore-target-info -instcombine -gvn -S | FileCheck %s
 
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index c68e77eb555a..d617e43be865 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -codegenprepare %s -S -o - | FileCheck %s
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 6d34cb17fffd..c5ee70c2ff12 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -3,6 +3,6 @@
 declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 false)
 	ret i13 %X
 }
diff --git a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
index ce79e3b2964a..a415995070e5 100644
--- a/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
+++ b/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -constmerge %s | FileCheck %s
+; RUN: opt -S -constmerge < %s | FileCheck %s
 
 ; CHECK: @foo = constant i32 6
 ; CHECK: @bar = constant i32 6
diff --git a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
index f561daf66781..5aafcfe3d4fb 100644
--- a/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
+++ b/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; PR8978
 
 declare i32 @zed(%struct.foobar*, %struct.foobar*)
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index b71eb437dbc3..b00345557c83 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; Test that in one run var3 is merged into var2 and var1 into var4.
 ; Test that we merge @var5 and @var6 into one with the higher alignment, and
 ; don't merge var7/var8 into var5/var6.
diff --git a/test/Transforms/ConstantMerge/unnamed-addr.ll b/test/Transforms/ConstantMerge/unnamed-addr.ll
index 24100837aabd..aff8540f2cb1 100644
--- a/test/Transforms/ConstantMerge/unnamed-addr.ll
+++ b/test/Transforms/ConstantMerge/unnamed-addr.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; RUN: opt -constmerge -S < %s | FileCheck %s
 ; Test which corresponding x and y are merged and that unnamed_addr
 ; is correctly set.
 
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 475cd8d772e6..39c437ccfae9 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -81,6 +81,26 @@ LessThanOrEqualToTwo:
   ret i32 0
 }
 
+declare i32* @f(i32*)
+define void @test5(i32* %x, i32* %y) {
+; CHECK: @test5
+entry:
+  %pre = icmp eq i32* %x, null
+  br i1 %pre, label %return, label %loop
+
+loop:
+  %phi = phi i32* [ %sel, %loop ], [ %x, %entry ]
+; CHECK: %phi = phi i32* [ %f, %loop ], [ %x, %entry ]
+  %f = tail call i32* @f(i32* %phi)
+  %cmp1 = icmp ne i32* %f, %y
+  %sel = select i1 %cmp1, i32* %f, i32* null
+  %cmp2 = icmp eq i32* %sel, null
+  br i1 %cmp2, label %return, label %loop
+
+return:
+  ret void
+}
+
 define i32 @switch1(i32 %s) {
 ; CHECK: @switch1
 entry:
@@ -105,7 +125,7 @@ negative:
   ]
 
 out:
-  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
   ret i32 %p
 
 next:
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index 7c6c575ea80f..f049265ce4ea 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -1,20 +1,20 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: cat %t | grep nounwind | count 2
-; RUN: cat %t | grep signext | count 2
-; RUN: cat %t | not grep inreg
-; RUN: cat %t | not grep zeroext
-; RUN: cat %t | not grep byval
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
-	%struct = type { }
+%struct = type { }
 
 @g = global i8 0
 
+; CHECK: define internal void @foo(i8 signext %y) [[NUW:#[0-9]+]]
+
 define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
-	store i8 %y, i8* @g
-	ret i8 0
+  store i8 %y, i8* @g
+  ret i8 0
 }
 
 define i32 @bar() {
-	%A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
-	ret i32 0
+; CHECK: call void @foo(i8 signext 1) [[NUW]]
+  %A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
+  ret i32 0
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 2f820bad8474..f5d2588dd059 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -8,14 +8,14 @@ entry:
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
   call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
   call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
-; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) nounwind, !dbg !13
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !13
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) nounwind noinline ssp {
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
   call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
   call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
@@ -38,6 +38,11 @@ bb2:                                              ; preds = %bb1, %bb
 
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
+; CHECK: attributes #0 = { nounwind ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { noinline nounwind ssp }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index dcbfaaa3d77b..24448b7009ed 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -deadargelim -S | FileCheck %s
+; RUN: opt -deadargelim -S < %s | FileCheck %s
 ; PR14016
 
 ; Check that debug info metadata for subprograms stores pointers to
@@ -36,19 +36,17 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5, metadata !8, metadata !9}
-!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
-!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !8, metadata !9}
+!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
+!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
 
 ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
 
-!9 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+!9 = metadata !{i32 786478, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
 
 ; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
 
@@ -62,3 +60,4 @@ entry:
 !17 = metadata !{i32 5, i32 25, metadata !18, null}
 !18 = metadata !{i32 786443, metadata !8, i32 5, i32 23, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
 !19 = metadata !{i32 5, i32 30, metadata !18, null}
+!20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index e3fe1bbb548b..cca58721e532 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -deadargelim -S %s | FileCheck %s
+; RUN: opt -deadargelim -S < %s | FileCheck %s
 
 define void @test(i32) {
   ret void
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index dc92dc9f171a..e41110c96ef4 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -deadargelim -S > %t
-; RUN: grep "define internal zeroext i32 @test1() nounwind" %t
-; RUN: grep "define internal <{ i32, i32 }> @test2" %t
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 %Ty = type <{ i32, i32 }>
 
@@ -9,11 +7,13 @@
 ; the function and then changing too much.
 
 ; This checks if the return value attributes are not removed
+; CHECK: define internal zeroext i32 @test1() #0
 define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
         ret i32 1
 }
 
 ; This checks if the struct doesn't get non-packed
+; CHECK: define internal <{ i32, i32 }> @test2
 define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
         ret <{ i32, i32 }> <{ i32 1, i32 2 }>
 }
@@ -28,3 +28,4 @@ define void @caller() {
         ret void
 }
 
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index 7d57804631da..15976f9f10d4 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -dse -S | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
 %t = type { i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
index 284fea4234fc..38cf1a066dae 100644
--- a/test/Transforms/DeadStoreElimination/dominate.ll
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -1,4 +1,4 @@
-; RUN: opt  %s -dse -disable-output
+; RUN: opt -dse -disable-output < %s
 ; test that we don't crash
 declare void @bar()
 
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index 6c7f940316a0..4022d76dcb52 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -dse -S | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
 declare void @test1f()
 
diff --git a/test/Transforms/DeadStoreElimination/pr11390.ll b/test/Transforms/DeadStoreElimination/pr11390.ll
index 2ce6eea365aa..f63aa1eb8aae 100644
--- a/test/Transforms/DeadStoreElimination/pr11390.ll
+++ b/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -dse -S -o - %s | FileCheck %s
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 ; PR11390
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/EarlyCSE/commute.ll b/test/Transforms/EarlyCSE/commute.ll
index f84a7dd1aae9..8cf04d1765b9 100644
--- a/test/Transforms/EarlyCSE/commute.ll
+++ b/test/Transforms/EarlyCSE/commute.ll
@@ -19,9 +19,9 @@ define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
   ; CHECK-NEXT: store
   ; CHECK-NEXT: store
   ; CHECK-NEXT: ret
-  %C = fcmp eq float %A, %B
+  %C = fcmp oeq float %A, %B
   store i1 %C, i1* %PA
-  %D = fcmp eq float %B, %A
+  %D = fcmp oeq float %B, %A
   store i1 %D, i1* %PB
   ret void
 }
diff --git a/test/Transforms/EarlyCSE/floatingpoint.ll b/test/Transforms/EarlyCSE/floatingpoint.ll
new file mode 100644
index 000000000000..2abecd74b63a
--- /dev/null
+++ b/test/Transforms/EarlyCSE/floatingpoint.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; Ensure we don't simplify away additions vectors of +0.0's (same as scalars).
+define <4 x float> @fV( <4 x float> %a) {
+       ; CHECK: %b = fadd <4 x float> %a, zeroinitializer
+       %b = fadd  <4 x float> %a, <float 0.0,float 0.0,float 0.0,float 0.0>
+       ret <4 x float> %b
+}
+
+define <4 x float> @fW( <4 x float> %a) {
+       ; CHECK: ret <4 x float> %a
+       %b = fadd  <4 x float> %a, <float -0.0,float -0.0,float -0.0,float -0.0>
+       ret <4 x float> %b
+}
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index 946453f586ed..36a765873487 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -1,18 +1,24 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readnone | count 4
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 @x = global i32 0
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readnone
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 	%tmp = call i32 @e( )		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
+; CHECK: define i32 @g() #0
 define i32 @g() readonly {
 	ret i32 0
 }
 
+; CHECK: define i32 @h() #0
 define i32 @h() readnone {
 	%tmp = load i32* @x		; <i32> [#uses=1]
 	ret i32 %tmp
 }
+
+; CHECK: attributes #0 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
index 22eca1320415..d8256ae8e647 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -1,9 +1,13 @@
-; RUN: opt < %s -basicaa -functionattrs -S | grep readonly | count 2
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
 
+; CHECK: define i32 @f() #0
 define i32 @f() {
 entry:
-	%tmp = call i32 @e( )		; <i32> [#uses=1]
-	ret i32 %tmp
+  %tmp = call i32 @e( )
+  ret i32 %tmp
 }
 
+; CHECK: declare i32 @e() #0
 declare i32 @e() readonly
+
+; CHECK: attributes #0 = { readonly }
diff --git a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
new file mode 100644
index 000000000000..d414b73524fd
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0
+declare i8* @fopen(i8*, i8*)
+
+; CHECK: declare i8 @strlen(i8* nocapture) #1
+declare i8 @strlen(i8*)
+
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
+declare i32* @realloc(i32*, i32)
+
+; Test deliberately wrong declaration
+declare i32 @strcpy(...)
+
+; CHECK-NOT: strcpy{{.*}}noalias
+; CHECK-NOT: strcpy{{.*}}nocapture
+; CHECK-NOT: strcpy{{.*}}nounwind
+; CHECK-NOT: strcpy{{.*}}readonly
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
new file mode 100644
index 000000000000..ae77380acc4a
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/annotate-1.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+declare i8* @fopen(i8*, i8*)
+; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) [[G0:#[0-9]]] 
+
+declare i8 @strlen(i8*)
+; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
+
+declare i32* @realloc(i32*, i32)
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]]
+
+; Test deliberately wrong declaration
+
+declare i32 @strcpy(...)
+; CHECK: declare i32 @strcpy(...)
+
+; CHECK: attributes [[G0]] = { nounwind }
+; CHECK: attributes [[G1]] = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index 7c2bff7a05f7..027ee0fd06a2 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -3,7 +3,7 @@
 ; Atomic load/store to local doesn't affect whether a function is
 ; readnone/readonly.
 define i32 @test1(i32 %x) uwtable ssp {
-; CHECK: define i32 @test1(i32 %x) uwtable readnone ssp {
+; CHECK: define i32 @test1(i32 %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store atomic i32 %x, i32* %x.addr seq_cst, align 4
@@ -13,9 +13,11 @@ entry:
 
 ; A function with an Acquire load is not readonly.
 define i32 @test2(i32* %x) uwtable ssp {
-; CHECK: define i32 @test2(i32* nocapture %x) uwtable ssp {
+; CHECK: define i32 @test2(i32* nocapture %x) #1 {
 entry:
   %r = load atomic i32* %x seq_cst, align 4
   ret i32 %r
 }
 
+; CHECK: attributes #0 = { readnone ssp uwtable }
+; CHECK: attributes #1 = { ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/noreturn.ll b/test/Transforms/FunctionAttrs/noreturn.ll
new file mode 100644
index 000000000000..470ebcb1d3cd
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/noreturn.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -functionattrs -instcombine -S | FileCheck %s
+
+define void @endless_loop() noreturn nounwind readnone ssp uwtable {
+entry:
+  br label %while.body
+
+while.body:
+  br label %while.body
+}
+;CHECK: @main
+;CHECK: endless_loop
+;CHECK: ret
+define i32 @main() noreturn nounwind ssp uwtable {
+entry:
+  tail call void @endless_loop()
+  unreachable
+}
+
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
new file mode 100644
index 000000000000..d1bce728e08c
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -0,0 +1,27 @@
+; RUN: echo '!9 = metadata !{metadata !"%T/linkagename.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: grep _Z3foov %T/linkagename.gcno
+; RUN: rm %T/linkagename.gcno
+
+; REQUIRES: shell
+
+define void @_Z3foov() {
+entry:
+  ret void, !dbg !8
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.gcov = !{!9}
+
+!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
+!2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 1, i32 0, metadata !5, null}
+
+
diff --git a/test/Transforms/GCOVProfiling/lit.local.cfg b/test/Transforms/GCOVProfiling/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
new file mode 100644
index 000000000000..d6d0f3314c26
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -0,0 +1,29 @@
+; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output < %t2
+; RUN: head -c12 %T/version.gcno | grep '^oncg\*204MVLL$'
+; RUN: rm %T/version.gcno
+; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
+; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
+; RUN: head -c12 %T/version.gcno | grep '^oncg\*704MVLL$'
+; RUN: rm %T/version.gcno
+
+define void @test() {
+  ret void, !dbg !8
+}
+
+; REQUIRES: shell
+
+!llvm.gcov = !{!9}
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{metadata !"./version", metadata !1}
+!1 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
+!2 = metadata !{i32 786473, metadata !"version", metadata !"/usr/local/google/home/nlewycky"} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 0}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!6 = metadata !{i32 786473, metadata !"<stdin>", metadata !"."} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 1, i32 0, metadata !5, null}
+;; !9 is added through the echo line at the top.
diff --git a/test/Transforms/GVN/2011-04-27-phioperands.ll b/test/Transforms/GVN/2011-04-27-phioperands.ll
index 6e5075db7c8e..42c46500c483 100644
--- a/test/Transforms/GVN/2011-04-27-phioperands.ll
+++ b/test/Transforms/GVN/2011-04-27-phioperands.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -disable-output
+; RUN: opt -gvn -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/GVN/MemdepMiscompile.ll b/test/Transforms/GVN/MemdepMiscompile.ll
new file mode 100644
index 000000000000..d42016961575
--- /dev/null
+++ b/test/Transforms/GVN/MemdepMiscompile.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; rdar://12801584
+; Value of %shouldExit can be changed by RunInMode.
+; Make sure we do not replace load %shouldExit in while.cond.backedge
+; with a phi node where the value from while.body is 0.
+define i32 @test() nounwind ssp {
+entry:
+; CHECK: test()
+; CHECK: while.body:
+; CHECK: call void @RunInMode
+; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
+; CHECK: while.cond.backedge:
+; CHECK: load i32* %shouldExit
+; CHECK: br i1 %cmp, label %while.body
+  %shouldExit = alloca i32, align 4
+  %tasksIdle = alloca i32, align 4
+  store i32 0, i32* %shouldExit, align 4
+  store i32 0, i32* %tasksIdle, align 4
+  call void @CTestInitialize(i32* %tasksIdle) nounwind
+  %0 = load i32* %shouldExit, align 4
+  %cmp1 = icmp eq i32 %0, 0
+  br i1 %cmp1, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  call void @RunInMode(i32 100) nounwind
+  %1 = load i32* %tasksIdle, align 4
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %while.cond.backedge, label %if.then
+
+if.then:
+  store i32 0, i32* %tasksIdle, align 4
+  call void @TimerCreate(i32* %shouldExit) nounwind
+  br label %while.cond.backedge
+
+while.cond.backedge:
+  %2 = load i32* %shouldExit, align 4
+  %cmp = icmp eq i32 %2, 0
+  br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge:
+  br label %while.end
+
+while.end:
+  ret i32 0
+}
+declare void @CTestInitialize(i32*)
+declare void @RunInMode(i32)
+declare void @TimerCreate(i32*)
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index dae65ddb2fe7..9ad63a7350c2 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,7 +1,6 @@
-; RUN: opt -no-aa -gvn -S %s
+; RUN: opt -no-aa -gvn -S < %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v1
-28:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-freebsd8.0"
 
 ; PR5744
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 4a8c8e4589c8..9fb612fcae13 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn %s -disable-output
+; RUN: opt -gvn -disable-output < %s
 
 ; PR5631
 
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
index 32392f3ab0c8..3a102b6c3539 100644
--- a/test/Transforms/GVN/edge.ll
+++ b/test/Transforms/GVN/edge.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @f1(i32 %x) {
   ; CHECK: define i32 @f1(
diff --git a/test/Transforms/GVN/fpmath.ll b/test/Transforms/GVN/fpmath.ll
index 8ab285448fbb..403df5c9008a 100644
--- a/test/Transforms/GVN/fpmath.ll
+++ b/test/Transforms/GVN/fpmath.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define double @test1(double %x, double %y) {
 ; CHECK: @test1(double %x, double %y)
diff --git a/test/Transforms/GVN/lpre-call-wrap-2.ll b/test/Transforms/GVN/lpre-call-wrap-2.ll
index e39f3ed87d1c..35e3534a9c89 100644
--- a/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -enable-load-pre < %s | FileCheck %s
 ;
 ; The partially redundant load in bb1 should be hoisted to "bb".  This comes
 ; from this C code (GCC PR 23455):
diff --git a/test/Transforms/GVN/lpre-call-wrap.ll b/test/Transforms/GVN/lpre-call-wrap.ll
index 40462798b534..0646f3fe0aad 100644
--- a/test/Transforms/GVN/lpre-call-wrap.ll
+++ b/test/Transforms/GVN/lpre-call-wrap.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
+; RUN: opt -S -gvn -enable-load-pre < %s | FileCheck %s
 ;
 ; Make sure the load in bb3.backedge is removed and moved into bb1 after the 
 ; call.  This makes the non-call case faster. 
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index afcb7fe3bb0f..c2eeed56ffc1 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | grep "Number of loads deleted"
 ; rdar://7363102
 
diff --git a/test/Transforms/GVN/null-aliases-nothing.ll b/test/Transforms/GVN/null-aliases-nothing.ll
index 9e4ae18c710c..37bf09d7f3ff 100644
--- a/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/test/Transforms/GVN/null-aliases-nothing.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
 %t = type { i32 }
 declare void @test1f(i8*)
diff --git a/test/Transforms/GVN/pr12979.ll b/test/Transforms/GVN/pr12979.ll
index 669da9127d0b..0198a56513ea 100644
--- a/test/Transforms/GVN/pr12979.ll
+++ b/test/Transforms/GVN/pr12979.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -gvn -S -o - | FileCheck %s
+; RUN: opt -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32 %x, i32 %y) {
 ; CHECK: @test1(i32 %x, i32 %y)
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 3759c415dabc..2115fe85661e 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32* %p) {
 ; CHECK: @test1(i32* %p)
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 72fa819d1c73..f470ed88bb9c 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -254,14 +254,11 @@ Cont:
   %A = load i8* %P3
   ret i8 %A
 
-;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
-;; bootstrap, see r82411
-;
-; HECK: @coerce_mustalias_nonlocal1
-; HECK: Cont:
-; HECK:   %A = phi i8 [
-; HECK-NOT: load
-; HECK: ret i8 %A
+; CHECK: @coerce_mustalias_nonlocal1
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
 }
 
 
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index 90661c62507b..85fe39a93b01 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i8* %p, i8* %q) {
 ; CHECK: @test1(i8* %p, i8* %q)
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 0f3efa09a1da..e71aed9e05ff 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | grep "1 globalopt - Number of global vars shrunk to booleans"
 
 @Stop = internal global i32 0                     ; <i32*> [#uses=3]
diff --git a/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll b/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
index 27352fa29066..629d57c88424 100644
--- a/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
+++ b/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
@@ -1,5 +1,5 @@
 ; PR6422
-; RUN: opt -globalopt -S %s
+; RUN: opt -globalopt -S < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll b/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
index 6f1996a867e3..ab7721fd9720 100644
--- a/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
+++ b/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt -S %s
+; RUN: opt -globalopt -S < %s
 ; PR6435
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/crash-2.ll b/test/Transforms/GlobalOpt/crash-2.ll
new file mode 100644
index 000000000000..684f6cee180b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/crash-2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | opt -globalopt -disable-output
+; NOTE: This needs to run through 'llvm-as' first to reproduce the error!
+; PR15440
+
+%union.U5.0.6.12 = type { i32 }
+%struct.S0.1.7.13 = type { i8, i8, i8, i8, i16, [2 x i8] }
+%struct.S1.2.8.14 = type { i32, i16, i8, i8 }
+
+@.str = external unnamed_addr constant [2 x i8], align 1
+@g_25 = external global i8, align 1
+@g_71 = internal global %struct.S0.1.7.13 { i8 1, i8 -93, i8 58, i8 -1, i16 -5, [2 x i8] undef }, align 4
+@g_114 = external global i8, align 1
+@g_30 = external global { i32, i8, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }, align 4
+@g_271 = internal global [7 x [6 x [5 x i8*]]] [[6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* null, i8* @g_25, i8* @g_25, i8* @g_25, i8* null], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* @g_25], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114], [5 x i8*] [i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null], [5 x i8*] [i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25]]], align 4
+
+define i32 @func() {
+  %tmp = load i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), align 1
+  ret i32 0
+}
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index 366a874f7352..80c777ccabc1 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt -disable-output %s
+; RUN: opt -globalopt -disable-output < %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
 
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index e3bc473f52ad..c9076109443d 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt %s -S | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
new file mode 100644
index 000000000000..9295c2025a2a
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; rdar://12580965.
+; ObjC++ test case.
+
+%struct.ButtonInitData = type { i8* }
+
+@_ZL14buttonInitData = internal global [1 x %struct.ButtonInitData] zeroinitializer, align 4
+
+@"\01L_OBJC_METH_VAR_NAME_40" = internal global [7 x i8] c"print:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_41" = internal externally_initialized  global i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0),  i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)]
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
+  store i8* %1, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() section "__TEXT,__StaticInit,regular,pure_instructions" {
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+declare void @test(i8*)
+
+define void @print() {
+; CHECK: %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  call void @test(i8* %1)
+  ret void
+}
+
+!2009 = metadata !{}
diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll
index 5a34a9c4dabd..51858069ac5b 100644
--- a/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/test/Transforms/GlobalOpt/integer-bool.ll
@@ -1,23 +1,28 @@
-; RUN: opt < %s -globalopt -instcombine | \
-; RUN:    llvm-dis | grep "ret i1 true"
-
+; RUN: opt < %s -S -globalopt -instcombine | FileCheck %s
 ;; check that global opt turns integers that only hold 0 or 1 into bools.
 
-@G = internal global i32 0              ; <i32*> [#uses=3]
+@G = internal addrspace(1) global i32 0
+; CHECK: @G
+; CHECK: addrspace(1)
+; CHECK: global i1 false
 
 define void @set1() {
-        store i32 0, i32* @G
-        ret void
+  store i32 0, i32 addrspace(1)* @G
+; CHECK: store i1 false
+  ret void
 }
 
 define void @set2() {
-        store i32 1, i32* @G
-        ret void
+  store i32 1, i32 addrspace(1)* @G
+; CHECK: store i1 true
+  ret void
 }
 
 define i1 @get() {
-        %A = load i32* @G               ; <i32> [#uses=1]
-        %C = icmp slt i32 %A, 2         ; <i1> [#uses=1]
-        ret i1 %C
+; CHECK: @get
+  %A = load i32 addrspace(1) * @G
+  %C = icmp slt i32 %A, 2
+  ret i1 %C
+; CHECK: ret i1 true
 }
 
diff --git a/test/Transforms/GlobalOpt/memset-null.ll b/test/Transforms/GlobalOpt/memset-null.ll
index 01534025faa3..53ec7551130e 100644
--- a/test/Transforms/GlobalOpt/memset-null.ll
+++ b/test/Transforms/GlobalOpt/memset-null.ll
@@ -1,4 +1,4 @@
-; RUN: opt -globalopt %s -S -o - | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 ; PR10047
 
 %0 = type { i32, void ()* }
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index ee7505873126..2ca91e50da2a 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -globalopt -S | FileCheck %s
+; RUN: opt -globalopt -S < %s | FileCheck %s
 
 @a = internal global i32 0, align 4
 @b = internal global i32 0, align 4
diff --git a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
index 402ea41167ce..968718084e42 100644
--- a/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
+++ b/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
@@ -4,9 +4,9 @@
 ; IPSCCP should propagate the 0 argument, eliminate the switch, and propagate
 ; the result.
 
-; CHECK: define i32 @main() noreturn nounwind {
+; CHECK: define i32 @main() #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) nounwind
+; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 123
 
 define i32 @main() noreturn nounwind {
@@ -28,3 +28,7 @@ sw.default:
 return:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { noreturn nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
index 150ae70a8262..e3de75e36fd8 100644
--- a/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
+++ b/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -indvars %s | FileCheck %s
+; RUN: opt -S -indvars < %s | FileCheck %s
 
 ; The indvar simplification code should ensure that the first PHI in the block 
 ; is the canonical one!
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 1b702a3b1a3c..aa6a2ee16521 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -indvars %s -disable-output
+; RUN: opt -indvars -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 declare i32 @putchar(i8) nounwind
diff --git a/test/Transforms/IndVarSimplify/dont-recompute.ll b/test/Transforms/IndVarSimplify/dont-recompute.ll
new file mode 100644
index 000000000000..d37b0e21f826
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/dont-recompute.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+; This tests that the IV is not recomputed outside of the loop when it is known
+; to be computed by the loop and used in the loop any way. In the example below
+; although a's value can be computed outside of the loop, there is no benefit
+; in doing so as it has to be computed by the loop anyway.
+;
+; extern void func(unsigned val);
+;
+; void test(unsigned m)
+; {
+;   unsigned a = 0;
+;
+;   for (int i=0; i<186; i++) {
+;     a += m;
+;     func(a);
+;   }
+;
+;   func(a);
+; }
+
+declare void @func(i32)
+
+; CHECK: @test
+define void @test(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+; CHECK: tail call void @func(i32 %add)
+  tail call void @func(i32 %add)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: tail call void @func(i32 %add.lcssa)
+  tail call void @func(i32 %add)
+  ret void
+}
+
+; CHECK: @test2
+define i32 @test2(i32 %m) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %add = add i32 %a.05, %m
+; CHECK: tail call void @func(i32 %add)
+  tail call void @func(i32 %add)
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, 186
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+; CHECK: for.end:
+; CHECK-NOT: mul i32 %m, 186
+; CHECK:%add.lcssa = phi i32 [ %add, %for.body ]
+; CHECK-NEXT: ret i32 %add.lcssa
+  ret i32 %add
+}
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 2e0f70ce461a..ed0514b08e33 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -2,7 +2,7 @@
 ; CHECK-NOT: and
 ; CHECK-NOT: zext
 
-target datalayout = "-p:64:64:64-n32:64"
+target datalayout = "p:64:64:64-n32:64"
 
 define void @foo(double* %d, i64 %n) nounwind {
 entry:
diff --git a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
index 52c9e5c3ffc9..dc36b9948254 100644
--- a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
+++ b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -indvars -disable-output -stats -info-output-file - | FileCheck %s
 ; Check that IndVarSimplify is not creating unnecessary canonical IVs
 ; that will never be used.
diff --git a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
index 5ced3b8e8da9..b8ca56050dca 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
@@ -3,10 +3,15 @@
 define i32 @main() {
 entry:
         invoke void @__main( )
-                        to label %LongJmpBlkPre unwind label %LongJmpBlkPre
+                        to label %LongJmpBlkPost unwind label %LongJmpBlkPre
 
-LongJmpBlkPre:          ; preds = %entry, %entry
+LongJmpBlkPost:
+        ret i32 0
+
+LongJmpBlkPre:
         %i.3 = phi i32 [ 0, %entry ], [ 0, %entry ]             ; <i32> [#uses=0]
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
         ret i32 0
 }
 
@@ -14,3 +19,4 @@ define void @__main() {
         ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
index 1bd55299a901..43bdd309c987 100644
--- a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
+++ b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
@@ -13,6 +13,8 @@ LJDecisionBB:           ; preds = %else
         br label %else
 
 RethrowExcept:          ; preds = %entry
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
         ret i32 0
 }
 
@@ -20,4 +22,4 @@ define void @__main() {
         ret void
 }
 
-
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
index b4380d01e483..ee5a378b1876 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
@@ -2,7 +2,6 @@
 ; PR993
 target datalayout = "e-p:32:32"
 target triple = "i386-unknown-openbsd3.9"
-deplibs = [ "stdc++", "c", "crtend" ]
 	%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
 	%"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** }
 	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
index b754d9f9f5d9..fb5a4b512b9c 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
@@ -2,7 +2,6 @@
 ; PR992
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
-deplibs = [ "stdc++", "c", "crtend" ]
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
 	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
 	%"struct.__cxxabiv1::__array_type_info" = type { %"struct.std::type_info" }
diff --git a/test/Transforms/Inline/2010-05-12-ValueMap.ll b/test/Transforms/Inline/2010-05-12-ValueMap.ll
index f9cc13f499b3..f452907efd07 100644
--- a/test/Transforms/Inline/2010-05-12-ValueMap.ll
+++ b/test/Transforms/Inline/2010-05-12-ValueMap.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -mergefunc -disable-output
+; RUN: opt -inline -mergefunc -disable-output < %s
 
 ; This tests for a bug where the inliner kept the functions in a ValueMap after
 ; it had completed and a ModulePass started to run. LLVM would crash deleting
diff --git a/test/Transforms/Inline/alloca_test.ll b/test/Transforms/Inline/alloca_test.ll
index e5791d5d2553..8464259ce1f8 100644
--- a/test/Transforms/Inline/alloca_test.ll
+++ b/test/Transforms/Inline/alloca_test.ll
@@ -1,7 +1,7 @@
 ; This test ensures that alloca instructions in the entry block for an inlined
 ; function are moved to the top of the function they are inlined into.
 ;
-; RUN: opt -S -inline %s | FileCheck %s
+; RUN: opt -S -inline < %s | FileCheck %s
 
 define i32 @func(i32 %i) {
         %X = alloca i32         ; <i32*> [#uses=1]
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 609a3d4e153e..39e25cb5d627 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -45,3 +45,48 @@ define i32 @test2(i1 %cond) {
 ; CHECK-NOT: = alloca
 ; CHECK: ret i32
 }
+
+declare void @barrier() noduplicate
+
+define internal i32 @f() {
+  call void @barrier() noduplicate
+  ret i32 1
+}
+
+define i32 @g() {
+  call void @barrier() noduplicate
+  ret i32 2
+}
+
+define internal i32 @h() {
+  call void @barrier() noduplicate
+  ret i32 3
+}
+
+define i32 @test3() {
+  %b = call i32 @f()
+  ret i32 %b
+}
+
+; The call to @f cannot be inlined as there is another callsite
+; calling @f, and @f contains a noduplicate call.
+;
+; The call to @g cannot be inlined as it has external linkage.
+;
+; The call to @h *can* be inlined.
+
+; CHECK: @test
+define i32 @test() {
+; CHECK: call i32 @f()
+  %a = call i32 @f()
+; CHECK: call i32 @g()
+  %b = call i32 @g()
+; CHECK-NOT: call i32 @h()
+  %c = call i32 @h()
+
+  %d = add i32 %a, %b
+  %e = add i32 %d, %c
+
+  ret i32 %e
+; CHECK: }
+}
diff --git a/test/Transforms/Inline/crash2.ll b/test/Transforms/Inline/crash2.ll
index cb1f44d5cca7..be634f625633 100644
--- a/test/Transforms/Inline/crash2.ll
+++ b/test/Transforms/Inline/crash2.ll
@@ -1,4 +1,4 @@
-; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1  %s -disable-output
+; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"
 
diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll
index 7716d6a47bec..97c52af9e0d5 100644
--- a/test/Transforms/Inline/delete-call.ll
+++ b/test/Transforms/Inline/delete-call.ll
@@ -1,5 +1,9 @@
-; RUN: opt %s -S  -inline -functionattrs -stats 2>&1 | grep "Number of call sites deleted, not inlined"
-; RUN: opt %s -S  -inline -stats 2>&1 | grep "Number of functions inlined"
+; REQUIRES: asserts
+; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
+; CHECK: Number of functions inlined
+
+; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=FUNCTIONATTRS %s
+; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 target triple = "i386-apple-darwin9.8"
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll
index c32be4e024a3..3f019676e4a4 100644
--- a/test/Transforms/Inline/devirtualize-3.ll
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s
 ; PR5009
 
 ; CHECK: define i32 @main() 
diff --git a/test/Transforms/Inline/devirtualize.ll b/test/Transforms/Inline/devirtualize.ll
index 51ea4baa3866..d46154ef6a98 100644
--- a/test/Transforms/Inline/devirtualize.ll
+++ b/test/Transforms/Inline/devirtualize.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
+; RUN: opt -S -Os < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/Transforms/Inline/gvn-inline-iteration.ll b/test/Transforms/Inline/gvn-inline-iteration.ll
index e502fd5777d5..526ed79e7b48 100644
--- a/test/Transforms/Inline/gvn-inline-iteration.ll
+++ b/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
+; RUN: opt -basicaa -inline -gvn -S -max-cg-scc-iterations=1 < %s | FileCheck %s
 ; rdar://6295824 and PR6724
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index 20d7426abd13..3ad573a04e42 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -Oz %s | FileCheck %s -check-prefix=OZ
-; RUN: opt -S -O2 %s | FileCheck %s -check-prefix=O2
+; RUN: opt -S -Oz < %s | FileCheck %s -check-prefix=OZ
+; RUN: opt -S -O2 < %s | FileCheck %s -check-prefix=O2
 
 ; The inline threshold for a function with the optsize attribute is currently
 ; the same as the global inline threshold for -Os. Check that the optsize
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 0b48a7282f45..77bc3784acb4 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -111,6 +111,82 @@ bb.false:
   ret i32 %sub
 }
 
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+
+define i8 @caller4(i8 %z) {
+; Check that we can constant fold through intrinsics such as the
+; overflow-detecting arithmetic instrinsics. These are particularly important
+; as they are used heavily in standard library code and generic C++ code where
+; the arguments are oftent constant but complete generality is required.
+;
+; CHECK: @caller4
+; CHECK-NOT: call
+; CHECK: ret i8 -1
+
+entry:
+  %x = call i8 @callee4(i8 254, i8 14, i8 %z)
+  ret i8 %x
+}
+
+define i8 @callee4(i8 %x, i8 %y, i8 %z) {
+  %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
+  %o = extractvalue {i8, i1} %uadd, 1
+  br i1 %o, label %bb.true, label %bb.false
+
+bb.true:
+  ret i8 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %z1 = add i8 %z, 1
+  %z2 = add i8 %z1, 1
+  %z3 = add i8 %z2, 1
+  %z4 = add i8 %z3, 1
+  %z5 = add i8 %z4, 1
+  %z6 = add i8 %z5, 1
+  %z7 = add i8 %z6, 1
+  %z8 = add i8 %z7, 1
+  ret i8 %z8
+}
+
+define i64 @caller5(i64 %y) {
+; Check that we can round trip constants through various kinds of casts etc w/o
+; losing track of the constant prop in the inline cost analysis.
+;
+; CHECK: @caller5
+; CHECK-NOT: call
+; CHECK: ret i64 -1
+
+entry:
+  %x = call i64 @callee5(i64 42, i64 %y)
+  ret i64 %x
+}
+
+define i64 @callee5(i64 %x, i64 %y) {
+  %inttoptr = inttoptr i64 %x to i8*
+  %bitcast = bitcast i8* %inttoptr to i32*
+  %ptrtoint = ptrtoint i32* %bitcast to i64
+  %trunc = trunc i64 %ptrtoint to i32
+  %zext = zext i32 %trunc to i64
+  %cmp = icmp eq i64 %zext, 42
+  br i1 %cmp, label %bb.true, label %bb.false
+
+bb.true:
+  ret i64 -1
+
+bb.false:
+  ; This block musn't be counted in the inline cost.
+  %y1 = add i64 %y, 1
+  %y2 = add i64 %y1, 1
+  %y3 = add i64 %y2, 1
+  %y4 = add i64 %y3, 1
+  %y5 = add i64 %y4, 1
+  %y6 = add i64 %y5, 1
+  %y7 = add i64 %y6, 1
+  %y8 = add i64 %y7, 1
+  ret i64 %y8
+}
+
 
 define i32 @PR13412.main() {
 ; This is a somewhat complicated three layer subprogram that was reported to
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 9f5f670b859b..c3941388f937 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -96,6 +96,7 @@ eh.resume:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]])
 ; CHECK-NEXT:   to label %[[LBL:[^\s]+]] unwind
 ; CHECK: [[LBL]]:
@@ -166,6 +167,7 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]])
 ; CHECK-NEXT:   to label %[[RESUME1:[^\s]+]] unwind
 ; CHECK: [[RESUME1]]:
@@ -185,6 +187,7 @@ eh.resume:
 ; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:   cleanup
 ; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:   catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]])
 ; CHECK-NEXT:   to label %[[RESUME2:[^\s]+]] unwind
 ; CHECK: [[RESUME2]]:
@@ -272,6 +275,7 @@ lpad.cont:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
@@ -318,6 +322,7 @@ terminate:
 ; CHECK:      landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT:    catch i8* bitcast (i8** @_ZTIi to i8*)
 ; CHECK-NEXT: invoke void @_ZN1AD1Ev(
 ; CHECK-NEXT:   to label %[[L:[^\s]+]] unwind
 ; CHECK:    [[L]]:
@@ -330,7 +335,7 @@ terminate:
 ; CHECK-NEXT: br label %[[JOIN]]
 ; CHECK:    [[JOIN]]:
 ; CHECK-NEXT: phi { i8*, i32 }
-; CHECK-NEXT: call void @opaque() nounwind
+; CHECK-NEXT: call void @opaque() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: br label %[[FIX:[^\s]+]]
 ; CHECK:    lpad:
 ; CHECK-NEXT: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
@@ -340,3 +345,8 @@ terminate:
 ; CHECK-NEXT: [[T1:%.*]] = phi i32 [ 0, %[[JOIN]] ], [ 1, %lpad ]
 ; CHECK-NEXT: call void @use(i32 [[T1]])
 ; CHECK-NEXT: call void @_ZSt9terminatev()
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { ssp uwtable }
+; CHECK: attributes #3 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/inline_minisize.ll b/test/Transforms/Inline/inline_minisize.ll
new file mode 100644
index 000000000000..3dddbcf3303d
--- /dev/null
+++ b/test/Transforms/Inline/inline_minisize.ll
@@ -0,0 +1,232 @@
+; RUN: opt -O2 -S < %s | FileCheck %s
+
+@data = common global i32* null, align 8
+
+define i32 @fct1(i32 %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %res = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %idxprom = sext i32 %tmp to i64
+  %tmp1 = load i32** @data, align 8
+  %arrayidx = getelementptr inbounds i32* %tmp1, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %tmp3, 1
+  %idxprom1 = sext i32 %add to i64
+  %tmp4 = load i32** @data, align 8
+  %arrayidx2 = getelementptr inbounds i32* %tmp4, i64 %idxprom1
+  %tmp5 = load i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %tmp2, %tmp5
+  store i32 %mul, i32* %res, align 4
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %tmp6 = load i32* %i, align 4
+  %tmp7 = load i32* %res, align 4
+  %cmp = icmp slt i32 %tmp6, %tmp7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp8 = load i32* %i, align 4
+  %idxprom3 = sext i32 %tmp8 to i64
+  %tmp9 = load i32** @data, align 8
+  %arrayidx4 = getelementptr inbounds i32* %tmp9, i64 %idxprom3
+  call void @fct0(i32* %arrayidx4)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %tmp10 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc10, %for.end
+  %tmp11 = load i32* %i, align 4
+  %tmp12 = load i32* %res, align 4
+  %cmp6 = icmp slt i32 %tmp11, %tmp12
+  br i1 %cmp6, label %for.body7, label %for.end12
+
+for.body7:                                        ; preds = %for.cond5
+  %tmp13 = load i32* %i, align 4
+  %idxprom8 = sext i32 %tmp13 to i64
+  %tmp14 = load i32** @data, align 8
+  %arrayidx9 = getelementptr inbounds i32* %tmp14, i64 %idxprom8
+  call void @fct0(i32* %arrayidx9)
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.body7
+  %tmp15 = load i32* %i, align 4
+  %inc11 = add nsw i32 %tmp15, 1
+  store i32 %inc11, i32* %i, align 4
+  br label %for.cond5
+
+for.end12:                                        ; preds = %for.cond5
+  store i32 0, i32* %i, align 4
+  br label %for.cond13
+
+for.cond13:                                       ; preds = %for.inc18, %for.end12
+  %tmp16 = load i32* %i, align 4
+  %tmp17 = load i32* %res, align 4
+  %cmp14 = icmp slt i32 %tmp16, %tmp17
+  br i1 %cmp14, label %for.body15, label %for.end20
+
+for.body15:                                       ; preds = %for.cond13
+  %tmp18 = load i32* %i, align 4
+  %idxprom16 = sext i32 %tmp18 to i64
+  %tmp19 = load i32** @data, align 8
+  %arrayidx17 = getelementptr inbounds i32* %tmp19, i64 %idxprom16
+  call void @fct0(i32* %arrayidx17)
+  br label %for.inc18
+
+for.inc18:                                        ; preds = %for.body15
+  %tmp20 = load i32* %i, align 4
+  %inc19 = add nsw i32 %tmp20, 1
+  store i32 %inc19, i32* %i, align 4
+  br label %for.cond13
+
+for.end20:                                        ; preds = %for.cond13
+  %tmp21 = load i32* %res, align 4
+  ret i32 %tmp21
+}
+
+declare void @fct0(i32*)
+
+define i32 @fct2(i32 %a) nounwind uwtable inlinehint ssp {
+entry:
+  %a.addr = alloca i32, align 4
+  %res = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %tmp = load i32* %a.addr, align 4
+  %shl = shl i32 %tmp, 1
+  %idxprom = sext i32 %shl to i64
+  %tmp1 = load i32** @data, align 8
+  %arrayidx = getelementptr inbounds i32* %tmp1, i64 %idxprom
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %a.addr, align 4
+  %shl1 = shl i32 %tmp3, 1
+  %add = add nsw i32 %shl1, 13
+  %idxprom2 = sext i32 %add to i64
+  %tmp4 = load i32** @data, align 8
+  %arrayidx3 = getelementptr inbounds i32* %tmp4, i64 %idxprom2
+  %tmp5 = load i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %tmp2, %tmp5
+  store i32 %mul, i32* %res, align 4
+  store i32 0, i32* %i, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %tmp6 = load i32* %i, align 4
+  %tmp7 = load i32* %res, align 4
+  %cmp = icmp slt i32 %tmp6, %tmp7
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp8 = load i32* %i, align 4
+  %idxprom4 = sext i32 %tmp8 to i64
+  %tmp9 = load i32** @data, align 8
+  %arrayidx5 = getelementptr inbounds i32* %tmp9, i64 %idxprom4
+  call void @fct0(i32* %arrayidx5)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %tmp10 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, i32* %i, align 4
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.inc11, %for.end
+  %tmp11 = load i32* %i, align 4
+  %tmp12 = load i32* %res, align 4
+  %cmp7 = icmp slt i32 %tmp11, %tmp12
+  br i1 %cmp7, label %for.body8, label %for.end13
+
+for.body8:                                        ; preds = %for.cond6
+  %tmp13 = load i32* %i, align 4
+  %idxprom9 = sext i32 %tmp13 to i64
+  %tmp14 = load i32** @data, align 8
+  %arrayidx10 = getelementptr inbounds i32* %tmp14, i64 %idxprom9
+  call void @fct0(i32* %arrayidx10)
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.body8
+  %tmp15 = load i32* %i, align 4
+  %inc12 = add nsw i32 %tmp15, 1
+  store i32 %inc12, i32* %i, align 4
+  br label %for.cond6
+
+for.end13:                                        ; preds = %for.cond6
+  store i32 0, i32* %i, align 4
+  br label %for.cond14
+
+for.cond14:                                       ; preds = %for.inc19, %for.end13
+  %tmp16 = load i32* %i, align 4
+  %tmp17 = load i32* %res, align 4
+  %cmp15 = icmp slt i32 %tmp16, %tmp17
+  br i1 %cmp15, label %for.body16, label %for.end21
+
+for.body16:                                       ; preds = %for.cond14
+  %tmp18 = load i32* %i, align 4
+  %idxprom17 = sext i32 %tmp18 to i64
+  %tmp19 = load i32** @data, align 8
+  %arrayidx18 = getelementptr inbounds i32* %tmp19, i64 %idxprom17
+  call void @fct0(i32* %arrayidx18)
+  br label %for.inc19
+
+for.inc19:                                        ; preds = %for.body16
+  %tmp20 = load i32* %i, align 4
+  %inc20 = add nsw i32 %tmp20, 1
+  store i32 %inc20, i32* %i, align 4
+  br label %for.cond14
+
+for.end21:                                        ; preds = %for.cond14
+  %tmp21 = load i32* %res, align 4
+  ret i32 %tmp21
+}
+
+define i32 @fct3(i32 %c) nounwind uwtable ssp {
+entry:
+  ;CHECK: @fct3
+  ;CHECK: call i32 @fct1
+  ; The inline keyword gives a sufficient benefits to inline fct2
+  ;CHECK-NOT: call i32 @fct2
+  %c.addr = alloca i32, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %tmp = load i32* %c.addr, align 4
+  %call = call i32 @fct1(i32 %tmp)
+  %tmp1 = load i32* %c.addr, align 4
+  %call1 = call i32 @fct2(i32 %tmp1)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
+
+define i32 @fct4(i32 %c) minsize nounwind uwtable ssp {
+entry:
+  ;CHECK: @fct4
+  ;CHECK: call i32 @fct1
+  ; With Oz (minsize attribute), the benefit of inlining fct2
+  ; is the same as fct1, thus no inlining for fct2
+  ;CHECK: call i32 @fct2
+  %c.addr = alloca i32, align 4
+  store i32 %c, i32* %c.addr, align 4
+  %tmp = load i32* %c.addr, align 4
+  %call = call i32 @fct1(i32 %tmp)
+  %tmp1 = load i32* %c.addr, align 4
+  %call1 = call i32 @fct2(i32 %tmp1)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/inline_ssp.ll b/test/Transforms/Inline/inline_ssp.ll
new file mode 100644
index 000000000000..a4b43a77bad2
--- /dev/null
+++ b/test/Transforms/Inline/inline_ssp.ll
@@ -0,0 +1,160 @@
+; RUN: opt -inline %s -S | FileCheck %s
+; Ensure SSP attributes are propagated correctly when inlining.
+
+@.str = private unnamed_addr constant [11 x i8] c"fun_nossp\0A\00", align 1
+@.str1 = private unnamed_addr constant [9 x i8] c"fun_ssp\0A\00", align 1
+@.str2 = private unnamed_addr constant [15 x i8] c"fun_sspstrong\0A\00", align 1
+@.str3 = private unnamed_addr constant [12 x i8] c"fun_sspreq\0A\00", align 1
+
+; These first four functions (@fun_sspreq, @fun_sspstrong, @fun_ssp, @fun_nossp)
+; are used by the remaining functions to ensure that the SSP attributes are
+; propagated correctly.  The caller should have its SSP attribute set as:
+; strictest(caller-ssp-attr, callee-ssp-attr), where strictness is ordered as:
+;  sspreq > sspstrong > ssp > [no ssp]
+define internal void @fun_sspreq() nounwind sspreq uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_sspstrong() nounwind sspstrong uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str2, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_ssp() nounwind ssp uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str1, i32 0, i32 0))
+  ret void
+}
+
+define internal void @fun_nossp() nounwind uwtable {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+; Tests start below 
+
+define void @inline_req_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_req_req() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_req_strong() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_req_ssp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_req_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_req_nossp() #0
+  call void @fun_sspreq()
+  ret void
+}
+
+define void @inline_strong_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_strong_req() #0
+  call void @fun_sspstrong()
+  ret void
+}
+
+
+define void @inline_strong_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_strong_strong() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_strong_ssp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_strong_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_strong_nossp() #1
+  call void @fun_sspstrong()
+  ret void
+}
+
+define void @inline_ssp_req() nounwind sspreq uwtable {
+entry:
+; CHECK: @inline_ssp_req() #0
+  call void @fun_ssp()
+  ret void
+}
+
+
+define void @inline_ssp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_ssp_strong() #1
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_ssp_ssp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_ssp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_ssp_nossp() #2
+  call void @fun_ssp()
+  ret void
+}
+
+define void @inline_nossp_req() nounwind uwtable sspreq {
+entry:
+; CHECK: @inline_nossp_req() #0
+  call void @fun_nossp()
+  ret void
+}
+
+
+define void @inline_nossp_strong() nounwind sspstrong uwtable {
+entry:
+; CHECK: @inline_nossp_strong() #1
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_ssp() nounwind ssp uwtable {
+entry:
+; CHECK: @inline_nossp_ssp() #2
+  call void @fun_nossp()
+  ret void
+}
+
+define void @inline_nossp_nossp() nounwind uwtable {
+entry:
+; CHECK: @inline_nossp_nossp() #3
+  call void @fun_nossp()
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: attributes #0 = { nounwind sspreq uwtable }
+; CHECK: attributes #1 = { nounwind sspstrong uwtable }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #3 = { nounwind uwtable }
diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll
new file mode 100644
index 000000000000..f4ffef3850f1
--- /dev/null
+++ b/test/Transforms/Inline/lifetime-no-datalayout.ll
@@ -0,0 +1,23 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+declare void @use(i8* %a)
+
+define void @helper() {
+  %a = alloca i8
+  call void @use(i8* %a)
+  ret void
+}
+
+; Size in llvm.lifetime.X should be -1 (unknown).
+define void @test() {
+; CHECK: @test
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 -1
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 -1
+  call void @helper()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+  ret void
+}
+
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
index a95c836b77de..fc73385295ed 100644
--- a/test/Transforms/Inline/lifetime.ll
+++ b/test/Transforms/Inline/lifetime.ll
@@ -1,22 +1,25 @@
-; RUN: opt -inline %s -S -o - | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 declare void @llvm.lifetime.start(i64, i8*)
 declare void @llvm.lifetime.end(i64, i8*)
 
 define void @helper_both_markers() {
   %a = alloca i8
-  call void @llvm.lifetime.start(i64 1, i8* %a)
-  call void @llvm.lifetime.end(i64 1, i8* %a)
+  ; Size in llvm.lifetime.start / llvm.lifetime.end differs from
+  ; allocation size. We should use the former.
+  call void @llvm.lifetime.start(i64 2, i8* %a)
+  call void @llvm.lifetime.end(i64 2, i8* %a)
   ret void
 }
 
 define void @test_both_markers() {
 ; CHECK: @test_both_markers
-; CHECK: llvm.lifetime.start(i64 1
-; CHECK-NEXT: llvm.lifetime.end(i64 1
+; CHECK: llvm.lifetime.start(i64 2
+; CHECK-NEXT: llvm.lifetime.end(i64 2
   call void @helper_both_markers()
-; CHECK-NEXT: llvm.lifetime.start(i64 1
-; CHECK-NEXT: llvm.lifetime.end(i64 1
+; CHECK-NEXT: llvm.lifetime.start(i64 2
+; CHECK-NEXT: llvm.lifetime.end(i64 2
   call void @helper_both_markers()
 ; CHECK-NEXT: ret void
   ret void
@@ -27,7 +30,7 @@ define void @test_both_markers() {
 declare void @use(i8* %a)
 
 define void @helper_no_markers() {
-  %a = alloca i8
+  %a = alloca i8 ; Allocation size is 1 byte.
   call void @use(i8* %a)
   ret void
 }
@@ -37,14 +40,14 @@ define void @helper_no_markers() {
 define void @test_no_marker() {
 ; CHECK: @test_no_marker
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 -1
+; CHECK: llvm.lifetime.start(i64 1
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 -1
+; CHECK: llvm.lifetime.end(i64 1
   call void @helper_no_markers()
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 -1
+; CHECK: llvm.lifetime.start(i64 1
 ; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 -1
+; CHECK: llvm.lifetime.end(i64 1
   call void @helper_no_markers()
 ; CHECK-NOT: lifetime
 ; CHECK: ret void
@@ -76,3 +79,22 @@ define void @test_two_casts() {
 ; CHECK: ret void
   ret void
 }
+
+define void @helper_arrays_alloca() {
+  %a = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %a to i8*
+  call void @use(i8* %1)
+  ret void
+}
+
+define void @test_arrays_alloca() {
+; CHECK: @test_arrays_alloca
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 40,
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 40,
+  call void @helper_arrays_alloca()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+  ret void
+}
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 6cde0e27fd1e..5520093ee457 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -2,7 +2,7 @@
 ; This effectively is just peeling off the first iteration of a loop, and the
 ; inliner heuristics are not set up for this.
 
-; RUN: opt -inline %s -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.3"
diff --git a/test/Transforms/Inline/noinline.ll b/test/Transforms/Inline/noinline.ll
index dc3f6e003094..7667114b68e6 100644
--- a/test/Transforms/Inline/noinline.ll
+++ b/test/Transforms/Inline/noinline.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 ; PR6682
 declare void @foo() nounwind
 
diff --git a/test/Transforms/Inline/recursive.ll b/test/Transforms/Inline/recursive.ll
index 5fe8d1639ca3..fe1c041af9a8 100644
--- a/test/Transforms/Inline/recursive.ll
+++ b/test/Transforms/Inline/recursive.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -inline -S | FileCheck %s
+; RUN: opt -inline -S < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
index 1da28562aae4..d266164fd870 100644
--- a/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
+++ b/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -instcombine %s | FileCheck %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
 ; PR2297
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
new file mode 100644
index 000000000000..b66495d9cbaa
--- /dev/null
+++ b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -inline -instcombine -functionattrs | llvm-dis
+;
+; Check that nocapture attributes are added when run after an SCC pass.
+; PR3520
+
+define i32 @use(i8* %x) nounwind readonly {
+; CHECK: @use(i8* nocapture %x)
+  %1 = tail call i64 @strlen(i8* %x) nounwind readonly
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+declare i64 @strlen(i8*) nounwind readonly
+; CHECK: declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index 2df12d670adb..bb3159e1e6fa 100644
--- a/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR6486
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
diff --git a/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
new file mode 100644
index 000000000000..09a96749f260
--- /dev/null
+++ b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; PR7265
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.anon = type { i32, [4 x i8] }
+
+@.str = private constant [3 x i8] c"%s\00"
+
+define void @CopyEventArg(%union.anon* %ev) nounwind {
+entry:
+  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind
+; CHECK: bitcast %union.anon* %ev to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+  ret void
+}
+
+declare i32 @sprintf(i8*, i8*, ...)
+
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index eb2899475695..800162197919 100644
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -5,8 +5,8 @@
 define i32 @main(i32 %argc) nounwind ssp {
 entry:
   %tmp3151 = trunc i32 %argc to i8
-; CHECK: %tmp3163 = shl i8 %tmp3162, 6
-; CHECK: and i8 %tmp3163, 64
+; CHECK: %0 = shl i8 %tmp3151, 5
+; CHECK: and i8 %0, 64
 ; CHECK-NOT: shl
 ; CHECK-NOT: shr
   %tmp3161 = or i8 %tmp3151, -17
diff --git a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
index 0907c490bb35..2dedd44e2be1 100644
--- a/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
+++ b/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
@@ -50,7 +50,7 @@ entry:
   %b = add <4 x i32> zeroinitializer, %a
   ret <4 x i32> %b
 ; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret <4 x i32> %a
 }
 
@@ -66,3 +66,7 @@ entry:
 
 declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind readnone ssp }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll b/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
new file mode 100644
index 000000000000..fc29b095e5ce
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define <4 x i32> @foo(<4 x i32*>* %in) {
+  %t17 = load <4 x i32*>* %in, align 8
+  %t18 = icmp eq <4 x i32*> %t17, zeroinitializer
+  %t19 = zext <4 x i1> %t18 to <4 x i32>
+  ret <4 x i32> %t19
+}
diff --git a/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
new file mode 100644
index 000000000000..b20c3a07c0ac
--- /dev/null
+++ b/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._my_struct = type <{ [12 x i8], [4 x i8] }>
+
+@initval = common global %struct._my_struct zeroinitializer, align 1
+
+; InstCombine will try to change the %struct._my_struct alloca into an
+; allocation of an i96 because of the bitcast to create %2. That's not valid,
+; as the other 32 bits of the structure still feed into the return value
+define { i64, i64 } @function(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK: @function
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %retval = alloca %struct._my_struct, align 8
+; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
+entry:
+  %retval = alloca %struct._my_struct, align 8
+  %k.sroa.0.0.copyload = load i96* bitcast (%struct._my_struct* @initval to i96*), align 1
+  %k.sroa.1.12.copyload = load i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
+  %0 = zext i32 %x to i96
+  %bf.value = shl nuw nsw i96 %0, 6
+  %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744
+  %1 = zext i32 %y to i96
+  %bf.value2 = shl nuw nsw i96 %1, 32
+  %bf.shl3 = and i96 %bf.value2, 288230371856744448
+  %bf.value.masked = and i96 %bf.value, 4294967232
+  %2 = zext i32 %z to i96
+  %bf.value8 = and i96 %2, 63
+  %bf.clear4 = or i96 %bf.shl3, %bf.value.masked
+  %bf.set5 = or i96 %bf.clear4, %bf.value8
+  %bf.set10 = or i96 %bf.set5, %bf.clear
+  %retval.0.cast7 = bitcast %struct._my_struct* %retval to i96*
+  store i96 %bf.set10, i96* %retval.0.cast7, align 8
+  %retval.12.idx8 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 1
+  %retval.12.cast9 = bitcast [4 x i8]* %retval.12.idx8 to i32*
+  store i32 %k.sroa.1.12.copyload, i32* %retval.12.cast9, align 4
+  %trunc = trunc i96 %bf.set10 to i64
+  %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0
+  %retval.8.idx12 = getelementptr inbounds %struct._my_struct* %retval, i64 0, i32 0, i64 8
+  %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64*
+  %retval.8.load14 = load i64* %retval.8.cast13, align 8
+  %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1
+  ret { i64, i64 } %.fca.1.insert
+}
diff --git a/test/Transforms/InstCombine/abs-1.ll b/test/Transforms/InstCombine/abs-1.ll
new file mode 100644
index 000000000000..807f238755b5
--- /dev/null
+++ b/test/Transforms/InstCombine/abs-1.ll
@@ -0,0 +1,41 @@
+; Test that the abs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @abs(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+
+; Check abs(x) -> x >s -1 ? x : -x.
+
+define i32 @test_simplify1(i32 %x) {
+; CHECK: @test_simplify1
+  %ret = call i32 @abs(i32 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i32 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i32 %x, i32 [[NEG]]
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[RET]]
+}
+
+define i64 @test_simplify2(i64 %x) {
+; CHECK: @test_simplify2
+  %ret = call i64 @labs(i64 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i64 %x, i64 [[NEG]]
+  ret i64 %ret
+; CHECK-NEXT: ret i64 [[RET]]
+}
+
+define i64 @test_simplify3(i64 %x) {
+; CHECK: @test_simplify3
+  %ret = call i64 @llabs(i64 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i64 %x, i64 [[NEG]]
+  ret i64 %ret
+; CHECK-NEXT: ret i64 [[RET]]
+}
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index d4a5d429912b..c3ef2dbb70f0 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -8,7 +8,7 @@
 ; CHECK: %q = add i64 %r, 1
 ; CHECK: ret i64 %q
 
-target datalayout = "-i32:8:32"
+target datalayout = "i32:8:32"
 
 @A = external global i32
 @B = weak_odr global i32 0
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
new file mode 100644
index 000000000000..4ded581a14c6
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; These tests are extracted from bitcast.ll.
+; Verify that they also work correctly on big-endian targets.
+
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
+  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
+  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+
+; CHECK: @test2
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 1
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+
+; CHECK: @test3
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 1
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-vector-fold.ll b/test/Transforms/InstCombine/bitcast-vector-fold.ll
index 8feec229171a..8fd7f35b7bb7 100644
--- a/test/Transforms/InstCombine/bitcast-vector-fold.ll
+++ b/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -31,3 +31,8 @@ define <4 x i32> @test6() {
 	%tmp3 = bitcast <2 x double> <double 0.5, double 1.0> to <4 x i32>
 	ret <4 x i32> %tmp3
 }
+
+define i32 @test7() {
+       %tmp3 = bitcast <2 x half> <half 0xH1100, half 0xH0011> to i32
+       ret i32 %tmp3
+}
+\ No newline at end of file
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 8f6ae7d83527..1e6113256bf3 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -11,7 +11,7 @@ define i32 @test1(i64 %a) {
         %t3 = xor <2 x i32> %t1, %t2
         %t4 = extractelement <2 x i32> %t3, i32 0
         ret i32 %t4
-        
+
 ; CHECK: @test1
 ; CHECK: ret i32 0
 }
@@ -30,7 +30,7 @@ define float @test2(<2 x float> %A, <2 x i32> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test2
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
@@ -55,7 +55,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 
   %add = fadd float %tmp24, %tmp4
   ret float %add
-  
+
 ; CHECK: @test3
 ; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
 ; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
@@ -75,7 +75,7 @@ define <2 x i32> @test4(i32 %A, i32 %B){
   ; CHECK: @test4
   ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
   ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
-  ; CHECK-NEXT: ret <2 x i32> 
+  ; CHECK-NEXT: ret <2 x i32>
 
 }
 
@@ -92,7 +92,7 @@ define <2 x float> @test5(float %A, float %B) {
   ; CHECK: @test5
   ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
   ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
-  ; CHECK-NEXT: ret <2 x float> 
+  ; CHECK-NEXT: ret <2 x float>
 }
 
 define <2 x float> @test6(float %A){
@@ -123,7 +123,7 @@ define i64 @Vec2(i64 %in) {
 }
 
 define i64 @All11(i64 %in) {
-  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1) 
+  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
   ret i64 %out
 ; CHECK: @All11
 ; CHECK: ret i64 0
@@ -131,9 +131,16 @@ define i64 @All11(i64 %in) {
 
 
 define i32 @All111(i32 %in) {
-  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1) 
+  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
   ret i32 %out
 ; CHECK: @All111
 ; CHECK: ret i32 0
 }
 
+define <2 x i16> @BitcastInsert(i32 %a) {
+  %v = insertelement <1 x i32> undef, i32 %a, i32 0
+  %r = bitcast <1 x i32> %v to <2 x i16>
+  ret <2 x i16> %r
+; CHECK: @BitcastInsert
+; CHECK: bitcast i32 %a to <2 x i16>
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index b4eb69d4363d..de738bb7c06d 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -473,14 +473,12 @@ define i64 @test51(i64 %A, i1 %cond) {
   %F = sext i32 %E to i64
   ret i64 %F
 ; CHECK: @test51
-
-; FIXME: disabled, see PR5997
-; HECK-NEXT: %C = and i64 %A, 4294967294
-; HECK-NEXT: %D = or i64 %A, 1
-; HECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
-; HECK-NEXT: %sext = shl i64 %E, 32
-; HECK-NEXT: %F = ashr i64 %sext, 32
-; HECK-NEXT: ret i64 %F
+; CHECK-NEXT: %C = and i64 %A, 4294967294
+; CHECK-NEXT: %D = or i64 %A, 1
+; CHECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
+; CHECK-NEXT: %sext = shl i64 %E, 32
+; CHECK-NEXT: %F = ashr exact i64 %sext, 32
+; CHECK-NEXT: ret i64 %F
 }
 
 define i32 @test52(i64 %A) {
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index f8e49110610a..72db66e3ab0f 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR5438
 
 ; TODO: This should also optimize down.
diff --git a/test/Transforms/InstCombine/constant-expr-datalayout.ll b/test/Transforms/InstCombine/constant-expr-datalayout.ll
new file mode 100644
index 000000000000..9a72c77afdb0
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-expr-datalayout.ll
@@ -0,0 +1,12 @@
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%test1.struct = type { i32, i32 }
+@test1.aligned_glbl = global %test1.struct zeroinitializer, align 4
+define void @test1(i64 *%ptr) {
+  store i64 and (i64 ptrtoint (i32* getelementptr (%test1.struct* @test1.aligned_glbl, i32 0, i32 1) to i64), i64 3), i64* %ptr
+; CHECK: store i64 0, i64* %ptr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/cos-1.ll b/test/Transforms/InstCombine/cos-1.ll
new file mode 100644
index 000000000000..b92e448abd9f
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-1.ll
@@ -0,0 +1,38 @@
+; Test that the cos library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NO-FLOAT-SHRINK
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s -check-prefix=DO-FLOAT-SHRINK
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare double @cos(double)
+
+; Check cos(-x) -> cos(x);
+
+define double @test_simplify1(double %d) {
+; NO-FLOAT-SHRINK: @test_simplify1
+  %neg = fsub double -0.000000e+00, %d
+  %cos = call double @cos(double %neg)
+; NO-FLOAT-SHRINK: call double @cos(double %d)
+  ret double %cos
+}
+
+define float @test_simplify2(float %f) {
+; DO-FLOAT-SHRINK: @test_simplify2
+  %conv1 = fpext float %f to double
+  %neg = fsub double -0.000000e+00, %conv1
+  %cos = call double @cos(double %neg)
+  %conv2 = fptrunc double %cos to float
+; DO-FLOAT-SHRINK: call float @cosf(float %f)
+  ret float %conv2
+}
+
+define float @test_simplify3(float %f) {
+; NO-FLOAT-SHRINK: @test_simplify3
+  %conv1 = fpext float %f to double
+  %neg = fsub double -0.000000e+00, %conv1
+  %cos = call double @cos(double %neg)
+; NO-FLOAT-SHRINK: call double @cos(double %conv1)
+  %conv2 = fptrunc double %cos to float
+  ret float %conv2
+}
diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll
new file mode 100644
index 000000000000..2f2dfafe484d
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-2.ll
@@ -0,0 +1,17 @@
+; Test that the cos library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare float @cos(double)
+
+; Check that cos functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %d) {
+; CHECK: @test_no_simplify1
+  %neg = fsub double -0.000000e+00, %d
+  %cos = call float @cos(double %neg)
+; CHECK: call float @cos(double %neg)
+  ret float %cos
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
new file mode 100644
index 000000000000..084efdc989f9
--- /dev/null
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -0,0 +1,24 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+
+@.str = private constant [3 x i8] c"%c\00"
+
+define void @foo() nounwind ssp {
+;CHECK: call i32 @putchar{{.+}} !dbg
+  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
+  ret void, !dbg !7
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"m.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"m.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 2, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 4, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 6, i32 1, metadata !6, null}
+
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index f6892fc3e1f9..cdbcd865117c 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -28,22 +28,21 @@ entry:
   ret i8* %call, !dbg !21
 }
 
-!llvm.dbg.lv.foobar = !{!0, !7, !9}
-!llvm.dbg.sp = !{!1}
+!llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 590081, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"string.h", metadata !"Game", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589841, i32 0, i32 12, metadata !"bits.c", metadata !"Game", metadata !"clang version 3.0 (trunk 127710)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 12, metadata !26, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
-!6 = metadata !{i32 589839, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 590081, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 589860, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 590081, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 786447, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786468, metadata !3, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 589846, metadata !3, metadata !"size_t", metadata !2, i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
 !11 = metadata !{i32 589846, metadata !3, metadata !"__darwin_size_t", metadata !2, i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786468, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !13 = metadata !{metadata !"any pointer", metadata !14}
 !14 = metadata !{metadata !"omnipotent char", metadata !15}
 !15 = metadata !{metadata !"Simple C/C++ TBAA", null}
@@ -53,5 +52,10 @@ entry:
 !19 = metadata !{metadata !"long", metadata !14}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 589835, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 786443, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 786443, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !1}
+!25 = metadata !{metadata !0, metadata !7, metadata !9}
+!26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
+!27 = metadata !{metadata !"string.h", metadata !"Game"}
+!28 = metadata !{metadata !"bits.c", metadata !"Game"}
diff --git a/test/Transforms/InstCombine/devirt.ll b/test/Transforms/InstCombine/devirt.ll
index 6189dc2af4f9..9c7cf5d697e8 100644
--- a/test/Transforms/InstCombine/devirt.ll
+++ b/test/Transforms/InstCombine/devirt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; CHECK-NOT: getelementptr
 ; CHECK-NOT: ptrtoint
diff --git a/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
index d81e9ae5bd73..c2c29368b1a8 100644
--- a/test/Transforms/InstCombine/disable-simplify-libcalls.ll
+++ b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
@@ -37,6 +37,18 @@ declare i64 @strtoll(i8*, i8**, i32)
 declare i64 @strtoul(i8*, i8**, i32)
 declare i64 @strtoull(i8*, i8**, i32)
 declare i64 @strcspn(i8*, i8*)
+declare i32 @abs(i32)
+declare i32 @ffs(i32)
+declare i32 @ffsl(i64)
+declare i32 @ffsll(i64)
+declare i32 @fprintf(i8*, i8*)
+declare i32 @isascii(i32)
+declare i32 @isdigit(i32)
+declare i32 @toascii(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+declare i32 @printf(i8*)
+declare i32 @sprintf(i8*, i8*)
 
 define double @t1(double %x) {
 ; CHECK: @t1
@@ -234,3 +246,90 @@ define i64 @t25(i8* %y) {
   ret i64 %ret
 ; CHECK: call i64 @strcspn
 }
+
+define i32 @t26(i32 %y) {
+; CHECK: @t26
+  %ret = call i32 @abs(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @abs
+}
+
+define i32 @t27(i32 %y) {
+; CHECK: @t27
+  %ret = call i32 @ffs(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffs
+}
+
+define i32 @t28(i64 %y) {
+; CHECK: @t28
+  %ret = call i32 @ffsl(i64 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffsl
+}
+
+define i32 @t29(i64 %y) {
+; CHECK: @t29
+  %ret = call i32 @ffsll(i64 %y)
+  ret i32 %ret
+; CHECK: call i32 @ffsll
+}
+
+define void @t30() {
+; CHECK: @t30
+  %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+  call i32 @fprintf(i8* null, i8* %x)
+  ret void
+; CHECK: call i32 @fprintf
+}
+
+define i32 @t31(i32 %y) {
+; CHECK: @t31
+  %ret = call i32 @isascii(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @isascii
+}
+
+define i32 @t32(i32 %y) {
+; CHECK: @t32
+  %ret = call i32 @isdigit(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @isdigit
+}
+
+define i32 @t33(i32 %y) {
+; CHECK: @t33
+  %ret = call i32 @toascii(i32 %y)
+  ret i32 %ret
+; CHECK: call i32 @toascii
+}
+
+define i64 @t34(i64 %y) {
+; CHECK: @t34
+  %ret = call i64 @labs(i64 %y)
+  ret i64 %ret
+; CHECK: call i64 @labs
+}
+
+define i64 @t35(i64 %y) {
+; CHECK: @t35
+  %ret = call i64 @llabs(i64 %y)
+  ret i64 %ret
+; CHECK: call i64 @llabs
+}
+
+define void @t36() {
+; CHECK: @t36
+  %x = getelementptr inbounds [1 x i8]* @empty, i32 0, i32 0
+  call i32 @printf(i8* %x)
+  ret void
+; CHECK: call i32 @printf
+}
+
+define void @t37(i8* %x) {
+; CHECK: @t37
+  %y = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+  call i32 @sprintf(i8* %x, i8* %y)
+  ret void
+; CHECK: call i32 @sprintf
+}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
new file mode 100644
index 000000000000..e5448ee00765
--- /dev/null
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -0,0 +1,333 @@
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define float @acos_test(float %f) nounwind readnone {
+; CHECK: acos_test
+   %conv = fpext float %f to double
+   %call = call double @acos(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @acosf(float %f)
+}
+
+define double @acos_test2(float %f) nounwind readnone {
+; CHECK: acos_test2
+   %conv = fpext float %f to double
+   %call = call double @acos(double %conv)
+   ret double %call
+; CHECK: call double @acos(double %conv)
+}
+
+define float @acosh_test(float %f) nounwind readnone {
+; CHECK: acosh_test
+   %conv = fpext float %f to double
+   %call = call double @acosh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @acoshf(float %f)
+}
+
+define double @acosh_test2(float %f) nounwind readnone {
+; CHECK: acosh_test2
+   %conv = fpext float %f to double
+   %call = call double @acosh(double %conv)
+   ret double %call
+; CHECK: call double @acosh(double %conv)
+}
+
+define float @asin_test(float %f) nounwind readnone {
+; CHECK: asin_test
+   %conv = fpext float %f to double
+   %call = call double @asin(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @asinf(float %f)
+}
+
+define double @asin_test2(float %f) nounwind readnone {
+; CHECK: asin_test2
+   %conv = fpext float %f to double
+   %call = call double @asin(double %conv)
+   ret double %call
+; CHECK: call double @asin(double %conv)
+}
+
+define float @asinh_test(float %f) nounwind readnone {
+; CHECK: asinh_test
+   %conv = fpext float %f to double
+   %call = call double @asinh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @asinhf(float %f)
+}
+
+define double @asinh_test2(float %f) nounwind readnone {
+; CHECK: asinh_test2
+   %conv = fpext float %f to double
+   %call = call double @asinh(double %conv)
+   ret double %call
+; CHECK: call double @asinh(double %conv)
+}
+
+define float @atan_test(float %f) nounwind readnone {
+; CHECK: atan_test
+   %conv = fpext float %f to double
+   %call = call double @atan(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @atanf(float %f)
+}
+
+define double @atan_test2(float %f) nounwind readnone {
+; CHECK: atan_test2
+   %conv = fpext float %f to double
+   %call = call double @atan(double %conv)
+   ret double %call
+; CHECK: call double @atan(double %conv)
+}
+define float @atanh_test(float %f) nounwind readnone {
+; CHECK: atanh_test
+   %conv = fpext float %f to double
+   %call = call double @atanh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @atanhf(float %f)
+}
+
+define double @atanh_test2(float %f) nounwind readnone {
+; CHECK: atanh_test2
+    %conv = fpext float %f to double
+    %call = call double @atanh(double %conv)
+    ret double %call
+; CHECK: call double @atanh(double %conv)
+}
+define float @cbrt_test(float %f) nounwind readnone {
+; CHECK: cbrt_test
+   %conv = fpext float %f to double
+   %call = call double @cbrt(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @cbrtf(float %f)
+}
+
+define double @cbrt_test2(float %f) nounwind readnone {
+; CHECK: cbrt_test2
+   %conv = fpext float %f to double
+   %call = call double @cbrt(double %conv)
+   ret double %call
+; CHECK: call double @cbrt(double %conv)
+}
+define float @exp_test(float %f) nounwind readnone {
+; CHECK: exp_test
+   %conv = fpext float %f to double
+   %call = call double @exp(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @expf(float %f)
+}
+
+define double @exp_test2(float %f) nounwind readnone {
+; CHECK: exp_test2
+   %conv = fpext float %f to double
+   %call = call double @exp(double %conv)
+   ret double %call
+; CHECK: call double @exp(double %conv)
+}
+define float @expm1_test(float %f) nounwind readnone {
+; CHECK: expm1_test
+   %conv = fpext float %f to double
+   %call = call double @expm1(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @expm1f(float %f)
+}
+
+define double @expm1_test2(float %f) nounwind readnone {
+; CHECK: expm1_test2
+   %conv = fpext float %f to double
+   %call = call double @expm1(double %conv)
+   ret double %call
+; CHECK: call double @expm1(double %conv)
+}
+define float @exp10_test(float %f) nounwind readnone {
+; CHECK: exp10_test
+   %conv = fpext float %f to double
+   %call = call double @exp10(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @exp10f(float %f)
+}
+
+define double @exp10_test2(float %f) nounwind readnone {
+; CHECK: exp10_test2
+   %conv = fpext float %f to double
+   %call = call double @exp10(double %conv)
+   ret double %call
+; CHECK: call double @exp10(double %conv)
+}
+define float @log_test(float %f) nounwind readnone {
+; CHECK: log_test
+   %conv = fpext float %f to double
+   %call = call double @log(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @logf(float %f)
+}
+
+define double @log_test2(float %f) nounwind readnone {
+; CHECK: log_test2
+   %conv = fpext float %f to double
+   %call = call double @log(double %conv)
+   ret double %call
+; CHECK: call double @log(double %conv)
+}
+define float @log10_test(float %f) nounwind readnone {
+; CHECK: log10_test
+   %conv = fpext float %f to double
+   %call = call double @log10(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @log10f(float %f)
+}
+
+define double @log10_test2(float %f) nounwind readnone {
+; CHECK: log10_test2
+   %conv = fpext float %f to double
+   %call = call double @log10(double %conv)
+   ret double %call
+; CHECK: call double @log10(double %conv)
+}
+define float @log1p_test(float %f) nounwind readnone {
+; CHECK: log1p_test
+   %conv = fpext float %f to double
+   %call = call double @log1p(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @log1pf(float %f)
+}
+
+define double @log1p_test2(float %f) nounwind readnone {
+; CHECK: log1p_test2
+   %conv = fpext float %f to double
+   %call = call double @log1p(double %conv)
+   ret double %call
+; CHECK: call double @log1p(double %conv)
+}
+define float @log2_test(float %f) nounwind readnone {
+; CHECK: log2_test
+   %conv = fpext float %f to double
+   %call = call double @log2(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @log2f(float %f)
+}
+
+define double @log2_test2(float %f) nounwind readnone {
+; CHECK: log2_test2
+   %conv = fpext float %f to double
+   %call = call double @log2(double %conv)
+   ret double %call
+; CHECK: call double @log2(double %conv)
+}
+define float @logb_test(float %f) nounwind readnone {
+; CHECK: logb_test
+   %conv = fpext float %f to double
+   %call = call double @logb(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @logbf(float %f)
+}
+
+define double @logb_test2(float %f) nounwind readnone {
+; CHECK: logb_test2
+   %conv = fpext float %f to double
+   %call = call double @logb(double %conv)
+   ret double %call
+; CHECK: call double @logb(double %conv)
+}
+define float @sin_test(float %f) nounwind readnone {
+; CHECK: sin_test
+   %conv = fpext float %f to double
+   %call = call double @sin(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @sinf(float %f)
+}
+
+define double @sin_test2(float %f) nounwind readnone {
+; CHECK: sin_test2
+   %conv = fpext float %f to double
+   %call = call double @sin(double %conv)
+   ret double %call
+; CHECK: call double @sin(double %conv)
+}
+define float @sqrt_test(float %f) nounwind readnone {
+; CHECK: sqrt_test
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @sqrtf(float %f)
+}
+
+define double @sqrt_test2(float %f) nounwind readnone {
+; CHECK: sqrt_test2
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   ret double %call
+; CHECK: call double @sqrt(double %conv)
+}
+define float @tan_test(float %f) nounwind readnone {
+; CHECK: tan_test
+   %conv = fpext float %f to double
+   %call = call double @tan(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @tanf(float %f)
+}
+
+define double @tan_test2(float %f) nounwind readnone {
+; CHECK: tan_test2
+   %conv = fpext float %f to double
+   %call = call double @tan(double %conv)
+   ret double %call
+; CHECK: call double @tan(double %conv)
+}
+define float @tanh_test(float %f) nounwind readnone {
+; CHECK: tanh_test
+   %conv = fpext float %f to double
+   %call = call double @tanh(double %conv)
+   %conv1 = fptrunc double %call to float
+   ret float %conv1
+; CHECK: call float @tanhf(float %f)
+}
+
+define double @tanh_test2(float %f) nounwind readnone {
+; CHECK: tanh_test2
+   %conv = fpext float %f to double
+   %call = call double @tanh(double %conv)
+   ret double %call
+; CHECK: call double @tanh(double %conv)
+}
+
+declare double @tanh(double) nounwind readnone
+declare double @tan(double) nounwind readnone
+declare double @sqrt(double) nounwind readnone
+declare double @sin(double) nounwind readnone
+declare double @log2(double) nounwind readnone
+declare double @log1p(double) nounwind readnone
+declare double @log10(double) nounwind readnone
+declare double @log(double) nounwind readnone
+declare double @logb(double) nounwind readnone
+declare double @exp10(double) nounwind readnone
+declare double @expm1(double) nounwind readnone
+declare double @exp(double) nounwind readnone
+declare double @cbrt(double) nounwind readnone
+declare double @atanh(double) nounwind readnone
+declare double @atan(double) nounwind readnone
+declare double @acos(double) nounwind readnone
+declare double @acosh(double) nounwind readnone
+declare double @asin(double) nounwind readnone
+declare double @asinh(double) nounwind readnone
diff --git a/test/Transforms/InstCombine/double-float-shrink-2.ll b/test/Transforms/InstCombine/double-float-shrink-2.ll
new file mode 100644
index 000000000000..7f6df92c96c5
--- /dev/null
+++ b/test/Transforms/InstCombine/double-float-shrink-2.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
+
+; DO-SIMPLIFY: call float @floorf(
+; DO-SIMPLIFY: call float @ceilf(
+; DO-SIMPLIFY: call float @roundf(
+; DO-SIMPLIFY: call float @nearbyintf(
+; DO-SIMPLIFY: call float @truncf(
+; DO-SIMPLIFY: call float @fabsf(
+
+; C89-SIMPLIFY: call float @floorf(
+; C89-SIMPLIFY: call float @ceilf(
+; C89-SIMPLIFY: call double @round(
+; C89-SIMPLIFY: call double @nearbyint(
+
+; DONT-SIMPLIFY: call double @floor(
+; DONT-SIMPLIFY: call double @ceil(
+; DONT-SIMPLIFY: call double @round(
+; DONT-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @trunc(
+; DONT-SIMPLIFY: call double @fabs(
+
+declare double @floor(double)
+declare double @ceil(double)
+declare double @round(double)
+declare double @nearbyint(double)
+declare double @trunc(double)
+declare double @fabs(double)
+
+define float @test_floor(float %C) {
+  %D = fpext float %C to double
+  ; --> floorf
+  %E = call double @floor(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_ceil(float %C) {
+  %D = fpext float %C to double
+  ; --> ceilf
+  %E = call double @ceil(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_round(float %C) {
+  %D = fpext float %C to double
+  ; --> roundf
+  %E = call double @round(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_nearbyint(float %C) {
+  %D = fpext float %C to double
+  ; --> nearbyintf
+  %E = call double @nearbyint(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_trunc(float %C) {
+  %D = fpext float %C to double
+  ; --> truncf
+  %E = call double @trunc(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+define float @test_fabs(float %C) {
+  %D = fpext float %C to double
+  ; --> fabsf
+  %E = call double @fabs(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 14741e3c1c33..88ca88c3b927 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -99,9 +99,9 @@ define i1 @ashr_icmp2(i64 %X) nounwind {
 ; PR9998
 ; Make sure we don't transform the ashr here into an sdiv
 ; CHECK: @pr9998
-; CHECK: = and i32 %V, 1
-; CHECK: %Z = icmp ne
-; CHECK: ret i1 %Z
+; CHECK:      [[BIT:%[A-Za-z0-9.]+]] = and i32 %V, 1
+; CHECK-NEXT: [[CMP:%[A-Za-z0-9.]+]] = icmp ne i32 [[BIT]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
 define i1 @pr9998(i32 %V) nounwind {
 entry:
   %W = shl i32 %V, 31
@@ -112,6 +112,7 @@ entry:
 }
 
 
+
 ; CHECK: @udiv_icmp1
 ; CHECK: icmp ne i64 %X, 0
 define i1 @udiv_icmp1(i64 %X) nounwind {
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
new file mode 100644
index 000000000000..1b0ad5000412
--- /dev/null
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -0,0 +1,76 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare double @exp2(double)
+declare float @exp2f(float)
+
+; Check exp2(sitofp(x)) -> ldexp(1.0, sext(x)).
+
+define double @test_simplify1(i32 %x) {
+; CHECK: @test_simplify1
+  %conv = sitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify2(i16 signext %x) {
+; CHECK: @test_simplify2
+  %conv = sitofp i16 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify3(i8 signext %x) {
+; CHECK: @test_simplify3
+  %conv = sitofp i8 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify4(i32 %x) {
+; CHECK: @test_simplify4
+  %conv = sitofp i32 %x to float
+  %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+  ret float %ret
+}
+
+; Check exp2(uitofp(x)) -> ldexp(1.0, zext(x)).
+
+define double @test_no_simplify1(i32 %x) {
+; CHECK: @test_no_simplify1
+  %conv = uitofp i32 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @exp2
+  ret double %ret
+}
+
+define double @test_simplify6(i16 zeroext %x) {
+; CHECK: @test_simplify6
+  %conv = uitofp i16 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define double @test_simplify7(i8 zeroext %x) {
+; CHECK: @test_simplify7
+  %conv = uitofp i8 %x to double
+  %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+  ret double %ret
+}
+
+define float @test_simplify8(i8 zeroext %x) {
+; CHECK: @test_simplify8
+  %conv = uitofp i8 %x to float
+  %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+  ret float %ret
+}
diff --git a/test/Transforms/InstCombine/exp2-2.ll b/test/Transforms/InstCombine/exp2-2.ll
new file mode 100644
index 000000000000..bed063798e29
--- /dev/null
+++ b/test/Transforms/InstCombine/exp2-2.ll
@@ -0,0 +1,17 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare float @exp2(double)
+
+; Check that exp2 functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(i32 %x) {
+; CHECK: @test_no_simplify1
+  %conv = sitofp i32 %x to double
+  %ret = call float @exp2(double %conv)
+; CHECK: call float @exp2(double %conv)
+  ret float %ret
+}
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
new file mode 100644
index 000000000000..edcbcc71dfb4
--- /dev/null
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -0,0 +1,467 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
+; 1.2f and 2.3f is supposed to be fold.
+define float @fold(float %a) {
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+; CHECK: @fold
+; CHECK: fmul fast float %a, 0x4006147AE0000000
+}
+
+; Same testing-case as the one used in fold() except that the operators have
+; fixed FP mode.
+define float @notfold(float %a) {
+; CHECK: @notfold
+; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+define float @fold2(float %a) {
+; CHECK: @fold2
+; CHECK: fmul fast float %a, 0x4006147AE0000000
+  %mul = fmul float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+; C * f1 + f1 = (C+1) * f1
+define double @fold3(double %f1) {
+  %t1 = fmul fast double 2.000000e+00, %f1
+  %t2 = fadd fast double %f1, %t1
+  ret double %t2
+; CHECK: @fold3
+; CHECK: fmul fast double %f1, 3.000000e+00
+}
+
+; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
+define float @fold4(float %f1, float %f2) {
+  %sub = fsub float 4.000000e+00, %f1
+  %sub1 = fsub float 5.000000e+00, %f2
+  %add = fadd fast float %sub, %sub1
+  ret float %add
+; CHECK: @fold4
+; CHECK: %1 = fadd fast float %f1, %f2
+; CHECK: fsub fast float 9.000000e+00, %1
+}
+
+; (X + C1) + C2 => X + (C1 + C2)
+define float @fold5(float %f1, float %f2) {
+  %add = fadd float %f1, 4.000000e+00
+  %add1 = fadd fast float %add, 5.000000e+00
+  ret float %add1
+; CHECK: @fold5
+; CHECK: fadd fast float %f1, 9.000000e+00
+}
+
+; (X + X) + X => 3.0 * X
+define float @fold6(float %f1) {
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %t1
+  ret float %t2
+; CHECK: @fold6
+; CHECK: fmul fast float %f1, 3.000000e+00
+}
+
+; C1 * X + (X + X) = (C1 + 2) * X
+define float @fold7(float %f1) {
+  %t1 = fmul fast float %f1, 5.000000e+00
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fold7
+; CHECK: fmul fast float %f1, 7.000000e+00
+}
+
+; (X + X) + (X + X) => 4.0 * X
+define float @fold8(float %f1) {
+  %t1 = fadd fast float %f1, %f1
+  %t2 = fadd fast float %f1, %f1
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fold8
+; CHECK: fmul fast float %f1, 4.000000e+00
+}
+
+; X - (X + Y) => 0 - Y
+define float @fold9(float %f1, float %f2) {
+  %t1 = fadd float %f1, %f2
+  %t3 = fsub fast float %f1, %t1
+  ret float %t3
+
+; CHECK: @fold9
+; CHECK: fsub fast float 0.000000e+00, %f2
+}
+
+; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
+; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
+; top of resulting simplified expression tree may potentially reveal some
+; optimization opportunities in the super-expression trees.
+;
+define float @fold10(float %f1, float %f2) {
+  %t1 = fadd fast float 2.000000e+00, %f1
+  %t2 = fsub fast float %f2, 3.000000e+00
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fold10
+; CHECK: %t3 = fadd fast float %t2, -1.000000e+00
+; CHECK: ret float %t3
+}
+
+; once cause Crash/miscompilation
+define float @fail1(float %f1, float %f2) {
+  %conv3 = fadd fast float %f1, -1.000000e+00
+  %add = fadd fast float %conv3, %conv3
+  %add2 = fadd fast float %add, %conv3
+  ret float %add2
+; CHECK: @fail1
+; CHECK: ret
+}
+
+define double @fail2(double %f1, double %f2) {
+  %t1 = fsub fast double %f1, %f2
+  %t2 = fadd fast double %f1, %f2
+  %t3 = fsub fast double %t1, %t2
+  ret double %t3
+; CHECK: @fail2
+; CHECK: ret
+}
+
+; c1 * x - x => (c1 - 1.0) * x
+define float @fold13(float %x) {
+  %mul = fmul fast float %x, 7.000000e+00
+  %sub = fsub fast float %mul, %x
+  ret float %sub
+; CHECK: fold13
+; CHECK: fmul fast float %x, 6.000000e+00
+; CHECK: ret
+}
+
+; =========================================================================
+;
+;   Testing-cases about fmul begin
+;
+; =========================================================================
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
+define float @fmul_distribute1(float %f1) {
+  %t1 = fmul float %f1, 6.0e+3
+  %t2 = fadd float %t1, 2.0e+3
+  %t3 = fmul fast float %t2, 5.0e+3
+  ret float %t3
+; CHECK: @fmul_distribute1
+; CHECK: %1 = fmul fast float %f1, 3.000000e+07
+; CHECK: %t3 = fadd fast float %1, 1.000000e+07
+}
+
+; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3
+define double @fmul_distribute2(double %f1, double %f2) {
+  %t1 = fdiv double %f1, 3.0e+0
+  %t2 = fadd double %t1, 5.0e+1
+  ; 0x10000000000000 = DBL_MIN
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+
+; CHECK: @fmul_distribute2
+; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000
+; CHECK: fadd fast double %1, 0x69000000000000
+}
+
+; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot
+; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN)
+define double @fmul_distribute3(double %f1) {
+  %t1 = fdiv double %f1, 3.0e+0
+  %t2 = fadd double %t1, 5.0e-1
+  %t3 = fmul fast double %t2, 0x10000000000000
+  ret double %t3
+
+; CHECK: @fmul_distribute3
+; CHECK: fmul fast double %t2, 0x10000000000000
+}
+
+; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution)
+define float @fmul_distribute4(float %f1) {
+  %t1 = fmul float %f1, 6.0e+3
+  %t2 = fsub float 2.0e+3, %t1
+  %t3 = fmul fast float %t2, 5.0e+3
+  ret float %t3
+; CHECK: @fmul_distribute4
+; CHECK: %1 = fmul fast float %f1, 3.000000e+07
+; CHECK: %t3 = fsub fast float 1.000000e+07, %1
+}
+
+; C1/X * C2 => (C1*C2) / X
+define float @fmul2(float %f1) {
+  %t1 = fdiv float 2.0e+3, %f1
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+; CHECK: @fmul2
+; CHECK: fdiv fast float 1.200000e+07, %f1
+}
+
+; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp)
+define float @fmul3(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 2.0e+3
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+; CHECK: @fmul3
+; CHECK: fmul fast float %f1, 3.000000e+00
+}
+
+; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special
+; value of a denormal. The 0x3810000000000000 here take value FLT_MIN
+;
+define float @fmul4(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 2.0e+3
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+; CHECK: @fmul4
+; CHECK: fmul fast float %t1, 0x3810000000000000
+}
+
+; X / C1 * C2 => X / (C2/C1) if  C1/C2 is either a special value of a denormal,
+;  and C2/C1 is a normal value.
+;
+define float @fmul5(float %f1, float %f2) {
+  %t1 = fdiv float %f1, 3.0e+0
+  %t3 = fmul fast float %t1, 0x3810000000000000
+  ret float %t3
+; CHECK: @fmul5
+; CHECK: fdiv fast float %f1, 0x47E8000000000000
+}
+
+; (X*Y) * X => (X*X) * Y
+define float @fmul6(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  ret float %mul1
+; CHECK: @fmul6
+; CHECK: fmul fast float %f1, %f1
+}
+
+; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses
+define float @fmul7(float %f1, float %f2) {
+  %mul = fmul float %f1, %f2
+  %mul1 = fmul fast float %mul, %f1
+  %add = fadd float %mul1, %mul
+  ret float %add
+; CHECK: @fmul7
+; CHECK: fmul fast float %mul, %f1
+}
+
+; =========================================================================
+;
+;   Testing-cases about negation
+;
+; =========================================================================
+define float @fneg1(float %f1, float %f2) {
+  %sub = fsub float -0.000000e+00, %f1
+  %sub1 = fsub nsz float 0.000000e+00, %f2
+  %mul = fmul float %sub, %sub1
+  ret float %mul
+; CHECK: @fneg1
+; CHECK: fmul float %f1, %f2
+}
+
+; =========================================================================
+;
+;   Testing-cases about div
+;
+; =========================================================================
+
+; X/C1 / C2 => X * (1/(C2*C1))
+define float @fdiv1(float %x) {
+  %div = fdiv float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FD7303B60000000 = 0.36231884057971014492
+; CHECK: @fdiv1
+; CHECK: fmul fast float %x, 0x3FD7303B60000000
+}
+
+; X*C1 / C2 => X * (C1/C2)
+define float @fdiv2(float %x) {
+  %mul = fmul float %x, 0x3FF3333340000000
+  %div1 = fdiv fast float %mul, 0x4002666660000000
+  ret float %div1
+
+; 0x3FF3333340000000 = 1.2f
+; 0x4002666660000000 = 2.3f
+; 0x3FE0B21660000000 = 0.52173918485641479492
+; CHECK: @fdiv2
+; CHECK: fmul fast float %x, 0x3FE0B21660000000
+}
+
+; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
+;
+define float @fdiv3(float %x) {
+  %div = fdiv float %x, 0x47EFFFFFE0000000
+  %div1 = fdiv fast float %div, 0x4002666660000000
+  ret float %div1
+; CHECK: @fdiv3
+; CHECK: fdiv float %x, 0x47EFFFFFE0000000
+}
+
+; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
+define float @fdiv4(float %x) {
+  %mul = fmul float %x, 0x47EFFFFFE0000000
+  %div = fdiv float %mul, 0x3FC99999A0000000
+  ret float %div
+; CHECK: @fdiv4
+; CHECK: fmul float %x, 0x47EFFFFFE0000000
+}
+
+; (X/Y)/Z = > X/(Y*Z)
+define float @fdiv5(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %t1, %f3
+  ret float %t2
+; CHECK: @fdiv5
+; CHECK: fmul float %f2, %f3
+}
+
+; Z/(X/Y) = > (Z*Y)/X
+define float @fdiv6(float %f1, float %f2, float %f3) {
+  %t1 = fdiv float %f1, %f2
+  %t2 = fdiv fast float %f3, %t1
+  ret float %t2
+; CHECK: @fdiv6
+; CHECK: fmul float %f3, %f2
+}
+
+; C1/(X*C2) => (C1/C2) / X
+define float @fdiv7(float %x) {
+  %t1 = fmul float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv7
+; CHECK: fdiv fast float 5.000000e+00, %x
+}
+
+; C1/(X/C2) => (C1*C2) / X
+define float @fdiv8(float %x) {
+  %t1 = fdiv float %x, 3.0e0
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv8
+; CHECK: fdiv fast float 4.500000e+01, %x
+}
+
+; C1/(C2/X) => (C1/C2) * X
+define float @fdiv9(float %x) {
+  %t1 = fdiv float 3.0e0, %x
+  %t2 = fdiv fast float 15.0e0, %t1
+  ret float %t2
+; CHECK: @fdiv9
+; CHECK: fmul fast float %x, 5.000000e+00
+}
+
+; =========================================================================
+;
+;   Testing-cases about factorization
+;
+; =========================================================================
+; x*z + y*z => (x+y) * z
+define float @fact_mul1(float %x, float %y, float %z) {
+  %t1 = fmul fast float %x, %z
+  %t2 = fmul fast float %y, %z
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul1
+; CHECK: fmul fast float %1, %z
+}
+
+; z*x + y*z => (x+y) * z
+define float @fact_mul2(float %x, float %y, float %z) {
+  %t1 = fmul fast float %z, %x
+  %t2 = fmul fast float %y, %z
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul2
+; CHECK: fmul fast float %1, %z
+}
+
+; z*x - z*y => (x-y) * z
+define float @fact_mul3(float %x, float %y, float %z) {
+  %t2 = fmul fast float %z, %y
+  %t1 = fmul fast float %z, %x
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul3
+; CHECK: fmul fast float %1, %z
+}
+
+; x*z - z*y => (x-y) * z
+define float @fact_mul4(float %x, float %y, float %z) {
+  %t1 = fmul fast float %x, %z
+  %t2 = fmul fast float %z, %y
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: @fact_mul4
+; CHECK: fmul fast float %1, %z
+}
+
+; x/y + x/z, no xform
+define float @fact_div1(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %x, %y
+  %t2 = fdiv fast float %x, %z
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div1
+; CHECK: fadd fast float %t1, %t2
+}
+
+; x/y + z/x; no xform
+define float @fact_div2(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %x, %y
+  %t2 = fdiv fast float %z, %x
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div2
+; CHECK: fadd fast float %t1, %t2
+}
+
+; y/x + z/x => (y+z)/x
+define float @fact_div3(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %y, %x
+  %t2 = fdiv fast float %z, %x
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div3
+; CHECK: fdiv fast float %1, %x
+}
+
+; y/x - z/x => (y-z)/x
+define float @fact_div4(float %x, float %y, float %z) {
+  %t1 = fdiv fast float %y, %x
+  %t2 = fdiv fast float %z, %x
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div4
+; CHECK: fdiv fast float %1, %x
+}
+
+; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
+define float @fact_div5(float %x) {
+  %t1 = fdiv fast float 0x3810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %t3 = fadd fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div5
+; CHECK: fdiv fast float 0x3818000000000000, %x
+}
+
+; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
+define float @fact_div6(float %x) {
+  %t1 = fdiv fast float 0x3810000000000000, %x
+  %t2 = fdiv fast float 0x3800000000000000, %x
+  %t3 = fsub fast float %t1, %t2
+  ret float %t3
+; CHECK: fact_div6
+; CHECK: %t3 = fsub fast float %t1, %t2
+}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
new file mode 100644
index 000000000000..0510df3d24b9
--- /dev/null
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -0,0 +1,134 @@
+; Test that the ffs* library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @ffs(i32)
+declare i32 @ffsl(i32)
+declare i32 @ffsll(i64)
+
+; Check ffs(0) -> 0.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @ffs(i32 0)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK-LINUX: @test_simplify2
+  %ret = call i32 @ffsl(i32 0)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3() {
+; CHECK-LINUX: @test_simplify3
+  %ret = call i32 @ffsll(i64 0)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 0
+}
+
+; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %ret = call i32 @ffs(i32 1)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+  %ret = call i32 @ffs(i32 2048)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 12
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+  %ret = call i32 @ffs(i32 65536)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 17
+}
+
+define i32 @test_simplify7() {
+; CHECK-LINUX: @test_simplify7
+  %ret = call i32 @ffsl(i32 65536)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 17
+}
+
+define i32 @test_simplify8() {
+; CHECK-LINUX: @test_simplify8
+  %ret = call i32 @ffsll(i64 1024)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 11
+}
+
+define i32 @test_simplify9() {
+; CHECK-LINUX: @test_simplify9
+  %ret = call i32 @ffsll(i64 65536)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 17
+}
+
+define i32 @test_simplify10() {
+; CHECK-LINUX: @test_simplify10
+  %ret = call i32 @ffsll(i64 17179869184)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 35
+}
+
+define i32 @test_simplify11() {
+; CHECK-LINUX: @test_simplify11
+  %ret = call i32 @ffsll(i64 281474976710656)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 49
+}
+
+define i32 @test_simplify12() {
+; CHECK-LINUX: @test_simplify12
+  %ret = call i32 @ffsll(i64 1152921504606846976)
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 61
+}
+
+; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
+
+define i32 @test_simplify13(i32 %x) {
+; CHECK: @test_simplify13
+  %ret = call i32 @ffs(i32 %x)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[RET]]
+}
+
+define i32 @test_simplify14(i32 %x) {
+; CHECK-LINUX: @test_simplify14
+  %ret = call i32 @ffsl(i32 %x)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 [[RET]]
+}
+
+define i32 @test_simplify15(i64 %x) {
+; CHECK-LINUX: @test_simplify15
+  %ret = call i32 @ffsll(i64 %x)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
+; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
+; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
+  ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 [[RET]]
+}
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
new file mode 100644
index 000000000000..3671b4c6991c
--- /dev/null
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; (-0.0 - X) * C => X * -C
+define float @test1(float %x) {
+  %sub = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test1
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (0.0 - X) * C => X * -C
+define float @test2(float %x) {
+  %sub = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub, 2.0e+1
+  ret float %mul
+
+; CHECK: @test2
+; CHECK: fmul float %x, -2.000000e+01
+}
+
+; (-0.0 - X) * (-0.0 - Y) => X * Y
+define float @test3(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %sub2 = fsub float -0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test3
+; CHECK: fmul float %x, %y
+}
+
+; (0.0 - X) * (0.0 - Y) => X * Y
+define float @test4(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %sub2 = fsub nsz float 0.000000e+00, %y
+  %mul = fmul float %sub1, %sub2
+  ret float %mul
+; CHECK: @test4
+; CHECK: fmul float %x, %y
+}
+
+; (-0.0 - X) * Y => -0.0 - (X * Y)
+define float @test5(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test5
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; (0.0 - X) * Y => 0.0 - (X * Y)
+define float @test6(float %x, float %y) {
+  %sub1 = fsub nsz float 0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  ret float %mul
+; CHECK: @test6
+; CHECK: %1 = fmul float %x, %y
+; CHECK: %mul = fsub float -0.000000e+00, %1
+}
+
+; "(-0.0 - X) * Y => -0.0 - (X * Y)" is disabled if expression "-0.0 - X"
+; has multiple uses.
+define float @test7(float %x, float %y) {
+  %sub1 = fsub float -0.000000e+00, %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+; CHECK: @test7
+; CHECK: fsub float -0.000000e+00, %x
+}
diff --git a/test/Transforms/InstCombine/fold-phi.ll b/test/Transforms/InstCombine/fold-phi.ll
new file mode 100644
index 000000000000..bd01d58aa586
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-phi.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: no_crash
+define float @no_crash(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, %a    ; PR14592
+  br i1 undef, label %bb0, label %end
+
+bb0:
+  br label %for.body
+
+end:
+  ret float %add5
+}
+
+; CHECK: fold_phi
+define float @fold_phi(float %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: phi float
+; CHECK-NEXT: br i1 undef
+  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
+  %add5 = fadd float %sum.057, 1.0 ;; Should be moved to the latch!
+  br i1 undef, label %bb0, label %end
+
+; CHECK: bb0:
+bb0:
+; CHECK: fadd float
+  br label %for.body
+
+end:
+  ret float %add5
+}
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index bc6aa0a6891f..09f053289dc1 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -13,3 +13,22 @@ define i8 @test2() {
 ; CHECK: ret i8 -1
 }
 
+; CHECK: test3
+define half @test3(float %a) {
+; CHECK: fptrunc
+; CHECK: llvm.fabs.f16
+  %b = call float @llvm.fabs.f32(float %a)
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+; CHECK: test4
+define half @test4(float %a) {
+; CHECK: fptrunc
+; CHECK: fsub
+  %b = fsub float -0.0, %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
+declare float @llvm.fabs.f32(float) nounwind readonly
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
new file mode 100644
index 000000000000..39d86b4588cc
--- /dev/null
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -0,0 +1,80 @@
+; Test that the fprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @fprintf(%FILE*, i8*, ...)
+
+; Check fprintf(fp, "foo") -> fwrite("foo", 3, 1, fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%c", chr) -> fputc(chr, fp).
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @fputc(i32 104, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%s", str) -> fputs(str, fp).
+; NOTE: The fputs simplifier simplifies this further to fwrite.
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, fmt, ...) -> fiprintf(fp, fmt, ...) if no floating point.
+
+define void @test_simplify4(%FILE* %fp) {
+; CHECK-IPRINTF: @test_simplify4
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1(%FILE* %fp) {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(%FILE* %fp, double %d) {
+; CHECK: @test_no_simplify2
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/fputs-1.ll b/test/Transforms/InstCombine/fputs-1.ll
new file mode 100644
index 000000000000..c7c5becfd038
--- /dev/null
+++ b/test/Transforms/InstCombine/fputs-1.ll
@@ -0,0 +1,43 @@
+; Test that the fputs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@empty = constant [1 x i8] zeroinitializer
+@A = constant [2 x i8] c"A\00"
+@hello = constant [7 x i8] c"hello\0A\00"
+
+declare i32 @fputs(i8*, %FILE*)
+
+; Check fputs(str, fp) --> fwrite(str, 1, strlen(s), fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; NOTE: The fwrite simplifier simplifies this further to fputc.
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %str = getelementptr [2 x i8]* @A, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 65, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %str = getelementptr [7 x i8]* @hello, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([7 x i8]* @hello, i32 0, i32 0), i32 6, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/fwrite-1.ll b/test/Transforms/InstCombine/fwrite-1.ll
new file mode 100644
index 000000000000..528cdec217f7
--- /dev/null
+++ b/test/Transforms/InstCombine/fwrite-1.ll
@@ -0,0 +1,57 @@
+; Test that the fwrite library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@str = constant [1 x i8] zeroinitializer
+@empty = constant [0 x i8] zeroinitializer
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
+
+; Check fwrite(S, 1, 1, fp) -> fputc(S[0], fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 0, i64 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i64 @test_no_simplify1(%FILE* %fp) {
+; CHECK: @test_no_simplify1
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  %ret = call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
+
+define void @test_no_simplify2(%FILE* %fp, i64 %size) {
+; CHECK: @test_no_simplify2
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 %size, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 1c120ecbe9eb..bb07736ef803 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -424,7 +424,7 @@ define i32 @test35() nounwind {
              i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
   ret i32 0
 ; CHECK: @test35
-; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) nounwind
+; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
 }
 
 ; Instcombine should constant-fold the GEP so that indices that have
@@ -492,3 +492,21 @@ define void @three_gep_f(%three_gep_t2* %x) {
 
 declare void @three_gep_g(i32*)
 declare void @three_gep_h(%three_gep_t2*)
+
+%struct.ham = type { i32, %struct.zot*, %struct.zot*, %struct.zot* }
+%struct.zot = type { i64, i8 }
+
+define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
+  %tmp = getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+  %tmp2 = load %struct.zot** %tmp, align 8
+  %tmp3 = bitcast %struct.zot* %tmp2 to i8*
+  %tmp4 = getelementptr inbounds i8* %tmp3, i64 -8
+  store i8 %arg1, i8* %tmp4, align 8
+  ret void
+
+; CHECK: @test39
+; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
+; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 8e064a4f2fc9..446c0e01dcaa 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -677,3 +677,212 @@ define i1 @test66(i64 %A, i64 %B) {
 ; CHECK-NEXT: ret i1 true
   ret i1 %cmp
 }
+
+; CHECK: @test67
+; CHECK: %and = and i32 %x, 96
+; CHECK: %cmp = icmp ne i32 %and, 0
+define i1 @test67(i32 %x) nounwind uwtable {
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 31
+  ret i1 %cmp
+}
+
+; CHECK: @test68
+; CHECK: %cmp = icmp ugt i32 %and, 30
+define i1 @test68(i32 %x) nounwind uwtable {
+  %and = and i32 %x, 127
+  %cmp = icmp sgt i32 %and, 30
+  ret i1 %cmp
+}
+
+; PR14708
+; CHECK: @test69
+; CHECK: %1 = and i32 %c, -33
+; CHECK: %2 = icmp eq i32 %1, 65
+; CHECK: ret i1 %2
+define i1 @test69(i32 %c) nounwind uwtable {
+  %1 = icmp eq i32 %c, 97
+  %2 = icmp eq i32 %c, 65
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+
+; CHECK: @icmp_sext16trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_sext16trunc(i32 %x) {
+  %trunc = trunc i32 %x to i16
+  %sext = sext i16 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sext8trunc
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_sext8trunc(i32 %x) {
+  %trunc = trunc i32 %x to i8
+  %sext = sext i8 %trunc to i32
+  %cmp = icmp slt i32 %sext, 36
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl16
+; CHECK-NEXT: %1 = trunc i32 %x to i16
+; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
+define i1 @icmp_shl16(i32 %x) {
+  %shl = shl i32 %x, 16
+  %cmp = icmp slt i32 %shl, 2359296
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl24
+; CHECK-NEXT: %1 = trunc i32 %x to i8
+; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
+define i1 @icmp_shl24(i32 %x) {
+  %shl = shl i32 %x, 24
+  %cmp = icmp slt i32 %shl, 603979776
+  ret i1 %cmp
+}
+
+; If the (shl x, C) preserved the sign and this is a sign test,
+; compare the LHS operand instead
+; CHECK: @icmp_shl_nsw_sgt
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_shl_nsw_sgt(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sgt i32 %shl, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_sge0
+; CHECK-NEXT: icmp sgt i32 %x, -1
+define i1 @icmp_shl_nsw_sge0(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_sge1
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_shl_nsw_sge1(i32 %x) {
+  %shl = shl nsw i32 %x, 21
+  %cmp = icmp sge i32 %shl, 1
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (shl x, C), 0
+; CHECK: @icmp_shl_nsw_eq
+; CHECK-NEXT: icmp eq i32 %x, 0
+define i1 @icmp_shl_nsw_eq(i32 %x) {
+  %mul = shl nsw i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_eq
+; CHECK-NOT: icmp eq i32 %mul, 0
+define i1 @icmp_shl_eq(i32 %x) {
+  %mul = shl i32 %x, 5
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_nsw_ne
+; CHECK-NEXT: icmp ne i32 %x, 0
+define i1 @icmp_shl_nsw_ne(i32 %x) {
+  %mul = shl nsw i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_shl_ne
+; CHECK-NOT: icmp ne i32 %x, 0
+define i1 @icmp_shl_ne(i32 %x) {
+  %mul = shl i32 %x, 7
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; If the (mul x, C) preserved the sign and this is sign test,
+; compare the LHS operand instead
+; CHECK: @icmp_mul_nsw
+; CHECK-NEXT: icmp sgt i32 %x, 0
+define i1 @icmp_mul_nsw(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw1
+; CHECK-NEXT: icmp slt i32 %x, 0
+define i1 @icmp_mul_nsw1(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp sle i32 %mul, -1
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_neg
+; CHECK-NEXT: icmp slt i32 %x, 1
+define i1 @icmp_mul_nsw_neg(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_neg1
+; CHECK-NEXT: icmp slt i32 %x, 0
+define i1 @icmp_mul_nsw_neg1(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp sge i32 %mul, 1
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_nsw_0
+; CHECK-NOT: icmp sgt i32 %x, 0
+define i1 @icmp_mul_nsw_0(i32 %x) {
+  %mul = mul nsw i32 %x, 0
+  %cmp = icmp sgt i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul
+; CHECK-NEXT: %mul = mul i32 %x, -12
+define i1 @icmp_mul(i32 %x) {
+  %mul = mul i32 %x, -12
+  %cmp = icmp sge i32 %mul, 0
+  ret i1 %cmp
+}
+
+; Checks for icmp (eq|ne) (mul x, C), 0
+; CHECK: @icmp_mul_neq0
+; CHECK-NEXT: icmp ne i32 %x, 0
+define i1 @icmp_mul_neq0(i32 %x) {
+  %mul = mul nsw i32 %x, -12
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul_eq0
+; CHECK-NEXT: icmp eq i32 %x, 0
+define i1 @icmp_mul_eq0(i32 %x) {
+  %mul = mul nsw i32 %x, 12
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul0_eq0
+; CHECK-NEXT: ret i1 true
+define i1 @icmp_mul0_eq0(i32 %x) {
+  %mul = mul i32 %x, 0
+  %cmp = icmp eq i32 %mul, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_mul0_ne0
+; CHECK-NEXT: ret i1 false
+define i1 @icmp_mul0_ne0(i32 %x) {
+  %mul = mul i32 %x, 0
+  %cmp = icmp ne i32 %mul, 0
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/idioms.ll b/test/Transforms/InstCombine/idioms.ll
index 6b3567fc6e8d..1a211668c3bf 100644
--- a/test/Transforms/InstCombine/idioms.ll
+++ b/test/Transforms/InstCombine/idioms.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ; Check that code corresponding to the following C function is
 ; simplified into a single ASR operation:
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 382e6b38574d..f334b3b1e935 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -instcombine -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 %overflow.result = type {i8, i1}
 
@@ -220,3 +220,39 @@ define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
 ; CHECK: @cttz_simplify1b
 ; CHECK-NEXT: ret i32 0
 }
+
+define i32 @ctlz_undef(i32 %Value) nounwind {
+  %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
+  ret i32 %ctlz
+
+; CHECK: @ctlz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @cttz_undef(i32 %Value) nounwind {
+  %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
+  ret i32 %cttz
+
+; CHECK: @cttz_undef
+; CHECK-NEXT: ret i32 undef
+}
+
+define i32 @ctlz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %ctlz, i32 32
+  ret i32 %s
+
+; CHECK: @ctlz_select
+; CHECK: select i1 %tobool, i32 %ctlz, i32 32
+}
+
+define i32 @cttz_select(i32 %Value) nounwind {
+  %tobool = icmp ne i32 %Value, 0
+  %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
+  %s = select i1 %tobool, i32 %cttz, i32 32
+  ret i32 %s
+
+; CHECK: @cttz_select
+; CHECK: select i1 %tobool, i32 %cttz, i32 32
+}
diff --git a/test/Transforms/InstCombine/isascii-1.ll b/test/Transforms/InstCombine/isascii-1.ll
new file mode 100644
index 000000000000..2a413d89b492
--- /dev/null
+++ b/test/Transforms/InstCombine/isascii-1.ll
@@ -0,0 +1,32 @@
+; Test that the isascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isascii(i32)
+
+; Check isascii(c) -> c <u 128.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @isascii(i32 127)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %ret = call i32 @isascii(i32 128)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3(i32 %x) {
+; CHECK: @test_simplify3
+  %ret = call i32 @isascii(i32 %x)
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 128
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/test/Transforms/InstCombine/isdigit-1.ll b/test/Transforms/InstCombine/isdigit-1.ll
new file mode 100644
index 000000000000..f291296c8826
--- /dev/null
+++ b/test/Transforms/InstCombine/isdigit-1.ll
@@ -0,0 +1,48 @@
+; Test that the isdigit library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isdigit(i32)
+
+; Check isdigit(c) -> (c - '0') <u 10;
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @isdigit(i32 47)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %ret = call i32 @isdigit(i32 48)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+  %ret = call i32 @isdigit(i32 57)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %ret = call i32 @isdigit(i32 58)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5(i32 %x) {
+; CHECK: @test_simplify5
+
+  %ret = call i32 @isdigit(i32 %x)
+; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add i32 %x, -48
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 [[ADD]], 10
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index 5cafb7787e36..d88188e4109c 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -47,6 +47,18 @@ define i1 @test4(i32 %X) {
 ; CHECK-NEXT: ret i1 %R
 }
 
+define i1 @test4_i16(i16 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i16 %X
+  %Q = load i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+; CHECK: @test4_i16
+; CHECK-NEXT: lshr i16 933, %X
+; CHECK-NEXT: and i16 {{.*}}, 1
+; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0
+; CHECK-NEXT: ret i1 %R
+}
+
 define i1 @test5(i32 %X) {
   %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
   %Q = load i16* %P
diff --git a/test/Transforms/InstCombine/load3.ll b/test/Transforms/InstCombine/load3.ll
index 35398e17db8c..db74426783c1 100644
--- a/test/Transforms/InstCombine/load3.ll
+++ b/test/Transforms/InstCombine/load3.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.0.0"
 
 ; Instcombine should be able to do trivial CSE of loads.
 
@@ -24,4 +24,23 @@ define float @test2() {
   
 ; CHECK: @test2
 ; CHECK: ret float 0x3806965600000000
-}
-\ No newline at end of file
+}
+
+@rslts32 = global [36 x i32] zeroinitializer, align 4
+
+@expect32 = internal constant [36 x i32][ i32 1, i32 2, i32 0, i32 100, i32 3,
+i32 4, i32 0, i32 -7, i32 4, i32 4, i32 8, i32 8, i32 1, i32 3, i32 8, i32 3,
+i32 4, i32 -2, i32 2, i32 8, i32 83, i32 77, i32 8, i32 17, i32 77, i32 88, i32
+22, i32 33, i32 44, i32 88, i32 77, i32 4, i32 4, i32 7, i32 -7, i32 -8] ,
+align 4
+
+; PR14986
+define void @test3() nounwind {
+; This is a weird way of computing zero.
+  %l = load i32* getelementptr ([36 x i32]* @expect32, i32 29826161, i32 28), align 4
+  store i32 %l, i32* getelementptr ([36 x i32]* @rslts32, i32 29826161, i32 28), align 4
+  ret void
+
+; CHECK: @test3
+; CHECK: store i32 1, i32* getelementptr inbounds ([36 x i32]* @rslts32, i32 0, i32 0)
+}
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index bb59817a4f69..f8c06768453d 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -10,8 +10,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %g, %i
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %e = icmp slt i32 %a, %b
@@ -22,8 +22,8 @@ define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
   %j = or i32 %i, %g
   ret i32 %j
 ; CHECK: %e = icmp slt i32 %a, %b
-; CHECK: %j = select i1 %e, i32 %c, i32 %d
-; CHECK: ret i32 %j
+; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -36,8 +36,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 entry:
@@ -49,8 +49,8 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
 
 define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
@@ -63,6 +63,6 @@ entry:
   %3 = or i32 %1, %2
   ret i32 %3
 ; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK: %1 = select i1 %0, i32 %c, i32 %d
-; CHECK: ret i32 %1
+; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[result]]
 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 4e3217dc2d96..cd12b29b1186 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -91,3 +91,32 @@ define void @test5(i8* %ptr, i8** %esc) {
   store volatile i8 4, i8* %g
   ret void
 }
+
+;; When a basic block contains only a call to free and this block is accessed
+;; through a test of the argument of free against null, move the call in the
+;; predecessor block.
+;; Using simplifycfg will remove the empty basic block and the branch operation
+;; Then, performing a dead elimination will remove the comparison.
+;; This is what happens with -O1 and upper.
+; CHECK: @test6
+define void @test6(i8* %foo) minsize {
+; CHECK:  %tobool = icmp eq i8* %foo, null
+;; Call to free moved
+; CHECK-NEXT: tail call void @free(i8* %foo)
+; CHECK-NEXT: br i1 %tobool, label %if.end, label %if.then
+; CHECK: if.then:
+;; Block is now empty and may be simplified by simplifycfg
+; CHECK-NEXT:   br label %if.end
+; CHECK: if.end:
+; CHECK-NEXT:  ret void
+entry:
+  %tobool = icmp eq i8* %foo, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void @free(i8* %foo)
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index 4238c5f8fb15..c97b201fc0e9 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -59,7 +59,7 @@ define i32 @test_simplify5() {
   %mem2 = getelementptr [4 x i8]* @foo, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 {{[0-9]+}}
+; CHECK: ret i32 1
 }
 
 define i32 @test_simplify6() {
@@ -68,5 +68,5 @@ define i32 @test_simplify6() {
   %mem2 = getelementptr [4 x i8]* @hel, i32 0, i32 0
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
   ret i32 %ret
-; CHECK: ret i32 {{-[0-9]+}}
+; CHECK: ret i32 -1
 }
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 83c893e17dd6..557b160a8785 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -134,3 +134,13 @@ define void @test8() {
 ; CHECK: bar
   ret void
 }
+
+define void @test9() {
+  %A = alloca %U, align 4
+  %a = bitcast %U* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test9
+; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U]* @H, i64 0, i64 1) to i8*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 6c8e6347634c..16213b8628ca 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -65,7 +65,7 @@ define i32 @test9(i32 %i) {
 ; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
-; CHECJ: sub i32 0, %i
+; CHECK: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
@@ -138,10 +138,8 @@ define i32 @test16(i32 %b, i1 %c) {
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
-; CHECK: [[TEST16:%.*]] = zext i1 %c to i32
-; CHECK-NEXT: %1 = sub i32 0, [[TEST16]]
-; CHECK-NEXT: %e = and i32 %1, %b
-; CHECK-NEXT: ret i32 %e
+; CHECK: [[TEST16:%.*]] = select i1 %c, i32 %b, i32 0
+; CHECK-NEXT: ret i32 [[TEST16]]
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
diff --git a/test/Transforms/InstCombine/obfuscated_splat.ll b/test/Transforms/InstCombine/obfuscated_splat.ll
index c25dade168a4..fa9cb423d02c 100644
--- a/test/Transforms/InstCombine/obfuscated_splat.ll
+++ b/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine -S %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
   %A = load <4 x float>* %in_ptr, align 16
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 31a3cb46e459..0ead9d123749 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,3 +256,131 @@ xpto:
 return:
   ret i32 7
 }
+
+declare noalias i8* @valloc(i32) nounwind
+
+; CHECK: @test14
+; CHECK: ret i32 6
+define i32 @test14(i32 %a) nounwind {
+  switch i32 %a, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %call = tail call noalias i8* @malloc(i32 6) nounwind
+  br label %sw.epilog
+
+sw.bb1:
+  %call2 = tail call noalias i8* @calloc(i32 3, i32 2) nounwind
+  br label %sw.epilog
+
+sw.default:
+  %call3 = tail call noalias i8* @valloc(i32 6) nounwind
+  br label %sw.epilog
+
+sw.epilog:
+  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
+  ret i32 %1
+}
+
+; CHECK: @test15
+; CHECK: llvm.objectsize
+define i32 @test15(i32 %a) nounwind {
+  switch i32 %a, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %call = tail call noalias i8* @malloc(i32 3) nounwind
+  br label %sw.epilog
+
+sw.bb1:
+  %call2 = tail call noalias i8* @calloc(i32 2, i32 1) nounwind
+  br label %sw.epilog
+
+sw.default:
+  %call3 = tail call noalias i8* @valloc(i32 3) nounwind
+  br label %sw.epilog
+
+sw.epilog:
+  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
+  ret i32 %1
+}
+
+; CHECK: @test16
+; CHECK: llvm.objectsize
+define i32 @test16(i8* %a, i32 %n) nounwind {
+  %b = alloca [5 x i8], align 1
+  %c = alloca [5 x i8], align 1
+  switch i32 %n, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  %bp = bitcast [5 x i8]* %b to i8*
+  br label %sw.epilog
+
+sw.bb1:
+  %cp = bitcast [5 x i8]* %c to i8*
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %phi = phi i8* [ %a, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
+  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
+  ret i32 %sz
+}
+
+; CHECK: @test17
+; CHECK: ret i32 5
+define i32 @test17(i32 %n) nounwind {
+  %b = alloca [5 x i8], align 1
+  %c = alloca [5 x i8], align 1
+  %bp = bitcast [5 x i8]* %b to i8*
+  switch i32 %n, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  %cp = bitcast [5 x i8]* %c to i8*
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  %phi = phi i8* [ %bp, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
+  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
+  ret i32 %sz
+}
+
+@globalalias = alias internal [60 x i8]* @a
+
+; CHECK: @test18
+; CHECK-NEXT: ret i32 60
+define i32 @test18() {
+  %bc = bitcast [60 x i8]* @globalalias to i8*
+  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  ret i32 %1
+}
+
+@globalalias2 = alias weak [60 x i8]* @a
+
+; CHECK: @test19
+; CHECK: llvm.objectsize
+define i32 @test19() {
+  %bc = bitcast [60 x i8]* @globalalias2 to i8*
+  %1 = call i32 @llvm.objectsize.i32(i8* %bc, i1 false)
+  ret i32 %1
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index c0bb28d15ccf..bde2a54048ad 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -344,10 +344,9 @@ define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32
   %and.i = and <4 x i32> %vecinit6.i191, %neg.i   ; <<4 x i32>> [#uses=1]
   %or.i = or <4 x i32> %and.i, %and.i129          ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %or.i
-; Don't turn this into a vector select until codegen matures to handle them
-; better.
+; codegen is mature enough to handle vector selects.
 ; CHECK: @test32
-; CHECK: or <4 x i32> %and.i, %and.i129
+; CHECK: select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
 }
 
 define i1 @test33(i1 %X, i1 %Y) {
diff --git a/test/Transforms/InstCombine/osx-names.ll b/test/Transforms/InstCombine/osx-names.ll
new file mode 100644
index 000000000000..7b83526aceb5
--- /dev/null
+++ b/test/Transforms/InstCombine/osx-names.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/9815881>
+; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
+; Make sure we use the correct names.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.2"
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__sFILEX = type opaque
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
+@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+define void @test1(%struct.__sFILE* %stream) nounwind {
+; CHECK: define void @test1
+; CHECK: call i32 @"fwrite$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
+; CHECK: define void @test2
+; CHECK: call i32 @"fputs$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
+  ret void
+}
+
+declare i32 @fprintf(%struct.__sFILE*, i8*, ...) nounwind
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
new file mode 100644
index 000000000000..8a311f0b74c6
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -0,0 +1,154 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7251832
+
+; NOTE: The readonly attribute on the pow call should be preserved
+; in the cases below where pow is transformed into another function call.
+
+declare float @powf(float, float) nounwind readonly
+declare double @pow(double, double) nounwind readonly
+
+; Check pow(1.0, x) -> 1.0.
+
+define float @test_simplify1(float %x) {
+; CHECK: @test_simplify1
+  %retval = call float @powf(float 1.0, float %x)
+  ret float %retval
+; CHECK-NEXT: ret float 1.000000e+00
+}
+
+define double @test_simplify2(double %x) {
+; CHECK: @test_simplify2
+  %retval = call double @pow(double 1.0, double %x)
+  ret double %retval
+; CHECK-NEXT: ret double 1.000000e+00
+}
+
+; Check pow(2.0, x) -> exp2(x).
+
+define float @test_simplify3(float %x) {
+; CHECK: @test_simplify3
+  %retval = call float @powf(float 2.0, float %x)
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) [[NUW_RO:#[0-9]+]]
+  ret float %retval
+; CHECK-NEXT: ret float [[EXP2F]]
+}
+
+define double @test_simplify4(double %x) {
+; CHECK: @test_simplify4
+  %retval = call double @pow(double 2.0, double %x)
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) [[NUW_RO]]
+  ret double %retval
+; CHECK-NEXT: ret double [[EXP2]]
+}
+
+; Check pow(x, 0.0) -> 1.0.
+
+define float @test_simplify5(float %x) {
+; CHECK: @test_simplify5
+  %retval = call float @powf(float %x, float 0.0)
+  ret float %retval
+; CHECK-NEXT: ret float 1.000000e+00
+}
+
+define double @test_simplify6(double %x) {
+; CHECK: @test_simplify6
+  %retval = call double @pow(double %x, double 0.0)
+  ret double %retval
+; CHECK-NEXT: ret double 1.000000e+00
+}
+
+; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
+
+define float @test_simplify7(float %x) {
+; CHECK: @test_simplify7
+  %retval = call float @powf(float %x, float 0.5)
+; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
+  ret float %retval
+; CHECK-NEXT: ret float [[SELECT]]
+}
+
+define double @test_simplify8(double %x) {
+; CHECK: @test_simplify8
+  %retval = call double @pow(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) [[NUW_RO]]
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) [[NUW_RO]]
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+  ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
+; Check pow(-infinity, 0.5) -> +infinity.
+
+define float @test_simplify9(float %x) {
+; CHECK: @test_simplify9
+  %retval = call float @powf(float 0xFFF0000000000000, float 0.5)
+  ret float %retval
+; CHECK-NEXT: ret float 0x7FF0000000000000
+}
+
+define double @test_simplify10(double %x) {
+; CHECK: @test_simplify10
+  %retval = call double @pow(double 0xFFF0000000000000, double 0.5)
+  ret double %retval
+; CHECK-NEXT: ret double 0x7FF0000000000000
+}
+
+; Check pow(x, 1.0) -> x.
+
+define float @test_simplify11(float %x) {
+; CHECK: @test_simplify11
+  %retval = call float @powf(float %x, float 1.0)
+  ret float %retval
+; CHECK-NEXT: ret float %x
+}
+
+define double @test_simplify12(double %x) {
+; CHECK: @test_simplify12
+  %retval = call double @pow(double %x, double 1.0)
+  ret double %retval
+; CHECK-NEXT: ret double %x
+}
+
+; Check pow(x, 2.0) -> x*x.
+
+define float @test_simplify13(float %x) {
+; CHECK: @test_simplify13
+  %retval = call float @powf(float %x, float 2.0)
+; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul float %x, %x
+  ret float %retval
+; CHECK-NEXT: ret float [[SQUARE]]
+}
+
+define double @test_simplify14(double %x) {
+; CHECK: @test_simplify14
+  %retval = call double @pow(double %x, double 2.0)
+; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul double %x, %x
+  ret double %retval
+; CHECK-NEXT: ret double [[SQUARE]]
+}
+
+; Check pow(x, -1.0) -> 1.0/x.
+
+define float @test_simplify15(float %x) {
+; CHECK: @test_simplify15
+  %retval = call float @powf(float %x, float -1.0)
+; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv float 1.000000e+00, %x
+  ret float %retval
+; CHECK-NEXT: ret float [[RECIPROCAL]]
+}
+
+define double @test_simplify16(double %x) {
+; CHECK: @test_simplify16
+  %retval = call double @pow(double %x, double -1.0)
+; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv double 1.000000e+00, %x
+  ret double %retval
+; CHECK-NEXT: ret double [[RECIPROCAL]]
+}
+
+; CHECK: attributes [[NUW_RO]] = { nounwind readonly }
diff --git a/test/Transforms/InstCombine/pow-2.ll b/test/Transforms/InstCombine/pow-2.ll
new file mode 100644
index 000000000000..af64cda0904a
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-2.ll
@@ -0,0 +1,14 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare float @pow(double, double)
+
+; Check that pow functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %x) {
+; CHECK: @test_no_simplify1
+  %retval = call float @pow(double 1.0, double %x)
+; CHECK-NEXT: call float @pow(double 1.000000e+00, double %x)
+  ret float %retval
+}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index 2b5c8f8a74ed..d34600f0fa58 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -1,24 +1,24 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define void @entry() nounwind {
-entry:
-  br label %for.cond
-
-for.cond:
+define void @entry() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:
   %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
-; CHECK: sub <1 x i32> <i32 92>, %local
-  %phi3 = sub <1 x i32> zeroinitializer, %local
-  br label %cond.end
-
-cond.false:
-  br label %cond.end
-
-cond.end:
-  %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
-  br label %cond.end47
-
-cond.end47:
-  %sum = add <1 x i32> %cond, <i32 92>
-  %phi2 = sub <1 x i32> zeroinitializer, %sum
-  br label %for.cond
-}
+; CHECK: sub <1 x i32> <i32 92>, %local
+  %phi3 = sub <1 x i32> zeroinitializer, %local
+  br label %cond.end
+
+cond.false:
+  br label %cond.end
+
+cond.end:
+  %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
+  br label %cond.end47
+
+cond.end47:
+  %sum = add <1 x i32> %cond, <i32 92>
+  %phi2 = sub <1 x i32> zeroinitializer, %sum
+  br label %for.cond
+}
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
new file mode 100644
index 000000000000..3a910ea437b7
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -0,0 +1,119 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent = constant [2 x i8] c"%\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+@empty = constant [1 x i8] c"\00"
+; CHECK: [[STR:@[a-z0-9]+]] = private unnamed_addr constant [12 x i8] c"hello world\00"
+
+declare i32 @printf(i8*, ...)
+
+; Check printf("") -> noop.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("x") -> putchar('x'), even for '%'.
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [2 x i8]* @percent, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 37)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("foo\n") -> puts("foo").
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* [[STR]], i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%c", chr) -> putchar(chr).
+
+define void @test_simplify5() {
+; CHECK: @test_simplify5
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%s\n", str) -> puts(str).
+
+define void @test_simplify6() {
+; CHECK: @test_simplify6
+  %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf(format, ...) -> iprintf(format, ...) if no floating point.
+
+define void @test_simplify7() {
+; CHECK-IPRINTF: @test_simplify7
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1() {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(i8* %fmt, double %d) {
+; CHECK: @test_no_simplify2
+  call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i32 @test_no_simplify3() {
+; CHECK: @test_no_simplify3
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  %ret = call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @h, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/test/Transforms/InstCombine/printf-2.ll b/test/Transforms/InstCombine/printf-2.ll
new file mode 100644
index 000000000000..466ee1c75770
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-2.ll
@@ -0,0 +1,41 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+
+declare void @printf(i8*, ...)
+
+; Check simplification of printf with void return type.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify6() {
+; CHECK: @test_simplify6
+  %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/ptr-int-cast.ll b/test/Transforms/InstCombine/ptr-int-cast.ll
index 9524d449dd8b..7a6ecff9c0be 100644
--- a/test/Transforms/InstCombine/ptr-int-cast.ll
+++ b/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -27,3 +27,34 @@ define i64 @f0(i32 %a0) nounwind {
        ret i64 %t1
 }
 
+define <4 x i32> @test4(<4 x i8*> %arg) nounwind {
+; CHECK: @test4
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: trunc <4 x i64> %1 to <4 x i32>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  ret <4 x i32> %p1
+}
+
+define <4 x i128> @test5(<4 x i8*> %arg) nounwind {
+; CHECK: @test5
+; CHECK: ptrtoint <4 x i8*> %arg to <4 x i64>
+; CHECK: zext <4 x i64> %1 to <4 x i128>
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i128>
+  ret <4 x i128> %p1
+}
+
+define <4 x i8*> @test6(<4 x i32> %arg) nounwind {
+; CHECK: @test6
+; CHECK: zext <4 x i32> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i32> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
+
+define <4 x i8*> @test7(<4 x i128> %arg) nounwind {
+; CHECK: @test7
+; CHECK: trunc <4 x i128> %arg to <4 x i64>
+; CHECK: inttoptr <4 x i64> %1 to <4 x i8*>
+  %p1 = inttoptr <4 x i128> %arg to <4 x i8*>
+  ret <4 x i8*> %p1
+}
diff --git a/test/Transforms/InstCombine/puts-1.ll b/test/Transforms/InstCombine/puts-1.ll
new file mode 100644
index 000000000000..ef4e1bbd824c
--- /dev/null
+++ b/test/Transforms/InstCombine/puts-1.ll
@@ -0,0 +1,31 @@
+; Test that the puts library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@empty = constant [1 x i8] zeroinitializer
+
+declare i32 @puts(i8*)
+
+; Check puts("") -> putchar('\n').
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @putchar(i32 10)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Don't simplify if the return value is used.
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @empty, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/test/Transforms/InstCombine/sdiv-1.ll b/test/Transforms/InstCombine/sdiv-1.ll
index c46b5eaef4a8..6ab18ac7f844 100644
--- a/test/Transforms/InstCombine/sdiv-1.ll
+++ b/test/Transforms/InstCombine/sdiv-1.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -inline -S | not grep '-715827882'
+; RUN: opt < %s -instcombine -inline -S | FileCheck %s
 ; PR3142
 
+; CHECK-NOT: -715827882
+
 define i32 @a(i32 %X) nounwind readnone {
 entry:
        %0 = sub i32 0, %X
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f1987973f462..968f37c9c129 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -184,3 +184,12 @@ define i32 @test16(i16 %x) nounwind {
 ; CHECK-NEXT: %ext = sext i16 %sext to i32
 ; CHECK-NEXT: ret i32 %ext
 }
+
+define i32 @test17(i1 %x) nounwind {
+  %c1 = sext i1 %x to i32
+  %c2 = sub i32 0, %c1
+  ret i32 %c2
+; CHECK: @test17
+; CHECK-NEXT: [[TEST17:%.*]] = zext i1 %x to i32
+; CHECK-NEXT: ret i32 [[TEST17]]
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 25e708b7f51d..41f8aa9ee812 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -523,9 +523,9 @@ entry:
   %tmp51 = xor i8 %tmp50, %tmp5
   %tmp52 = and i8 %tmp51, -128
   %tmp53 = lshr i8 %tmp52, 7
-; CHECK: lshr i8 %tmp51, 7
   %tmp54 = mul i8 %tmp53, 16
-; CHECK: shl nuw nsw i8 %tmp53, 4
+; CHECK: %0 = shl i8 %tmp4, 2
+; CHECK: %tmp54 = and i8 %0, 16
   %tmp55 = xor i8 %tmp54, %tmp51
 ; CHECK: ret i8 %tmp551
   ret i8 %tmp55
@@ -659,3 +659,89 @@ define i32 @test53(i32 %x) {
 ; CHECK-NEXT: %B = shl nuw i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
+
+define i32 @test54(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+; CHECK: @test54
+; CHECK: shl i32 %x, 3
+}
+
+
+define i32 @test55(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+; CHECK: @test55
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test56(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+; CHECK: @test56
+; CHECK: shl i32 %shr2, 4
+}
+
+
+define i32 @test57(i32 %x) {
+  %shr = lshr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+; CHECK: @test57
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test58(i32 %x) {
+  %shr = lshr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+; CHECK: @test58
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test59(i32 %x) {
+  %shr = ashr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+; CHECK: @test59
+; CHECK: %shl = shl i32 %shr1, 4
+}
+
+
+define i32 @test60(i32 %x) {
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+; CHECK: @test60
+; CHECK: ashr i32 %x, 3
+}
+
+
+define i32 @test61(i32 %x) {
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 2
+  ret i32 %or
+; CHECK: @test61
+; CHECK: ashr i32 %x, 4
+}
+
+; propagate "exact" trait
+define i32 @test62(i32 %x) {
+  %shr = ashr exact i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+; CHECK: @test62
+; CHECK: ashr exact i32 %x, 3
+}
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index ecee9830cd57..5ed1cd5590ae 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -82,6 +82,6 @@ entry:
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
 ; CHECK: @test8
-; CHECK: %shr = ashr i32 %x, 5
-; CHECK: ret i32 %shr
+; CHECK: %sub = ashr i32 %x, 5
+; CHECK: ret i32 %sub
 }
diff --git a/test/Transforms/InstCombine/sink_instruction.ll b/test/Transforms/InstCombine/sink_instruction.ll
index e521de208f21..5c4019a98df5 100644
--- a/test/Transforms/InstCombine/sink_instruction.ll
+++ b/test/Transforms/InstCombine/sink_instruction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -instcombine %s -S | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
 
 ;; This tests that the instructions in the entry blocks are sunk into each
 ;; arm of the 'if'.
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
new file mode 100644
index 000000000000..9b8c8b1b12c7
--- /dev/null
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -0,0 +1,100 @@
+; Test that the sprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@h = constant [2 x i8] c"h\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+; Check sprintf(dst, fmt) -> llvm.memcpy(str, fmt, strlen(fmt) + 1, 1).
+
+define void @test_simplify1(i8* %dst) {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(i8* %dst) {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [1 x i8]* @null, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(i8* %dst) {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%c", chr) -> *(i8*)dst = chr; *((i8*)dst + 1) = 0.
+
+define void @test_simplify4(i8* %dst) {
+; CHECK: @test_simplify4
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8 104)
+; CHECK-NEXT: store i8 104, i8* %dst, align 1
+; CHECK-NEXT: [[NUL:%[a-z0-9]+]] = getelementptr i8* %dst, i32 1
+; CHECK-NEXT: store i8 0, i8* [[NUL]], align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%s", str) -> llvm.memcpy(dest, str, strlen(str) + 1, 1).
+
+define void @test_simplify5(i8* %dst, i8* %str) {
+; CHECK: @test_simplify5
+  %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8* %str)
+; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
+; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, format, ...) -> siprintf(str, format, ...) if no floating.
+
+define void @test_simplify6(i8* %dst) {
+; CHECK-IPRINTF: @test_simplify6
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1(i8* %dst) {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
+; CHECK: @test_no_simplify2
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
index cc78417ebbd6..440b9748518d 100644
--- a/test/Transforms/InstCombine/sqrt.ll
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -instcombine %s | FileCheck %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
 
 define float @test1(float %x) nounwind readnone ssp {
 entry:
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 64460d7a6d61..164ba7632684 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -83,3 +83,37 @@ Cont:
 ; CHECK-NEXT:  ret void
 }
 
+
+; PR14753 - merging two stores should preserve the TBAA tag.
+define void @test6(i32 %n, float* %a, i32* %gi) nounwind uwtable ssp {
+entry:
+  store i32 42, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %0, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %1 = load i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+; CHECK: @test6
+; CHECK: for.cond:
+; CHECK-NEXT: phi i32 [ 42
+; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index 16c0c67970db..7139972fe043 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -1,29 +1,29 @@
 ; Test that the strto* library call simplifiers works correctly.
 ;
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtol(i8*, i8**, i32)
+; CHECK: declare i64 @strtol(i8*, i8** nocapture, i32)
 
 declare double @strtod(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare double @strtod(i8*, i8**, i32)
+; CHECK: declare double @strtod(i8*, i8** nocapture, i32)
 
 declare float @strtof(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare float @strtof(i8*, i8**, i32)
+; CHECK: declare float @strtof(i8*, i8** nocapture, i32)
 
 declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoul(i8*, i8**, i32)
+; CHECK: declare i64 @strtoul(i8*, i8** nocapture, i32)
 
 declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoll(i8*, i8**, i32)
+; CHECK: declare i64 @strtoll(i8*, i8** nocapture, i32)
 
 declare double @strtold(i8* %s, i8** %endptr)
-; CHECK: declare double @strtold(i8*, i8**)
+; CHECK: declare double @strtold(i8*, i8** nocapture)
 
 declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
-; CHECK: declare i64 @strtoull(i8*, i8**, i32)
+; CHECK: declare i64 @strtoull(i8*, i8** nocapture, i32)
 
 define void @test_simplify1(i8* %x, i8** %endptr) {
 ; CHECK: @test_simplify1
diff --git a/test/Transforms/InstCombine/toascii-1.ll b/test/Transforms/InstCombine/toascii-1.ll
new file mode 100644
index 000000000000..c4a13e229393
--- /dev/null
+++ b/test/Transforms/InstCombine/toascii-1.ll
@@ -0,0 +1,59 @@
+; Test that the toascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @toascii(i32)
+
+; Check isascii(c) -> c & 0x7f.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+  %ret = call i32 @toascii(i32 0)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+  %ret = call i32 @toascii(i32 1)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+  %ret = call i32 @toascii(i32 127)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+  %ret = call i32 @toascii(i32 128)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+  %ret = call i32 @toascii(i32 255)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+  %ret = call i32 @toascii(i32 256)
+  ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify7(i32 %x) {
+; CHECK: @test_simplify7
+  %ret = call i32 @toascii(i32 %x)
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 127
+  ret i32 %ret
+; CHECK-NEXT: ret i32 [[AND]]
+}
diff --git a/test/Transforms/InstCombine/vec_extract_elt.ll b/test/Transforms/InstCombine/vec_extract_elt.ll
index 63e4ee2112d8..166066a201bf 100644
--- a/test/Transforms/InstCombine/vec_extract_elt.ll
+++ b/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -7,3 +7,13 @@ define i32 @test(float %f) {
         ret i32 %tmp19
 }
 
+define i64 @test2(i64 %in) {
+  %vec = insertelement <8 x i64> undef, i64 %in, i32 0
+  %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer
+  %add = add <8 x i64> %splat, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+  %scl1 = extractelement <8 x i64> %add, i32 0
+  %scl2 = extractelement <8 x i64> %add, i32 0
+  %r = add i64 %scl1, %scl2
+  ret i64 %r
+}
+
diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll
index 7bbf53c270f4..2f2990b7b055 100644
--- a/test/Transforms/InstCombine/vector-casts.ll
+++ b/test/Transforms/InstCombine/vector-casts.ll
@@ -64,7 +64,8 @@ entry:
         
 ; CHECK: @test5
 ; CHECK:   sext <4 x i1> %cmp to <4 x i32>	
-; CHECK:   sext <4 x i1> %cmp4 to <4 x i32>	
+; The sext-and pair is canonicalized to a select.
+; CHECK:   select <4 x i1> %cmp4, <4 x i32>	%sext, <4 x i32> zeroinitializer
 }
 
 
diff --git a/test/Transforms/InstCombine/vector-type.ll b/test/Transforms/InstCombine/vector-type.ll
new file mode 100644
index 000000000000..59a4bdd19e70
--- /dev/null
+++ b/test/Transforms/InstCombine/vector-type.ll
@@ -0,0 +1,15 @@
+; The code in InstCombiner::FoldSelectOpOp was calling
+; Type::getVectorNumElements without checking first if the type was a vector.
+
+; RUN: opt < %s -instcombine -S
+
+define i32 @vselect1(i32 %a.coerce, i32 %b.coerce, i32 %c.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %2 = bitcast i32 %c.coerce to <2 x i16>
+  %cmp = icmp sge <2 x i16> %2, zeroinitializer
+  %or = select <2 x i1> %cmp, <2 x i16> %0, <2 x i16> %1
+  %3 = bitcast <2 x i16> %or to i32
+  ret i32 %3
+}
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
index 652362299562..90ca26212f2a 100644
--- a/test/Transforms/InstCombine/vector_gep1.ll
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -1,5 +1,5 @@
-; RUN: opt -instcombine %s -disable-output
-; RUN: opt -instsimplify %s -disable-output
+; RUN: opt -instcombine -disable-output < %s
+; RUN: opt -instsimplify -disable-output < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -35,3 +35,8 @@ define <2 x i1> @test5(<2 x i8*> %a) {
   %B = icmp ult <2 x i8*> %g, zeroinitializer
   ret <2 x i1> %B
 }
+
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+  %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
+  ret <2 x i32*> %w
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 89f00bd68475..be06d7999d84 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -51,3 +51,34 @@ define i32 @test4(i32 %A, i32 %B) {
 ; CHECK: %1 = ashr i32 %A, %B
 ; CHECK: ret i32 %1
 }
+
+; defect-2 in rdar://12329730
+; (X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+;   where the "X" has more than one use
+define i32 @test5(i32 %val1) {
+test5:
+  %xor = xor i32 %val1, 1234
+  %shr = lshr i32 %xor, 8
+  %xor1 = xor i32 %shr, 1
+  %add = add i32 %xor1, %xor
+  ret i32 %add
+; CHECK: @test5
+; CHECK: lshr i32 %val1, 8
+; CHECK: ret
+}
+
+; defect-1 in rdar://12329730
+; Simplify (X^Y) -> X or Y in the user's context if we know that 
+; only bits from X or Y are demanded.
+; e.g. the "x ^ 1234" can be optimized into x in the context of "t >> 16".
+;  Put in other word, t >> 16 -> x >> 16.
+; unsigned foo(unsigned x) { unsigned t = x ^ 1234; ;  return (t >> 16) + t;}
+define i32 @test6(i32 %x) {
+  %xor = xor i32 %x, 1234
+  %shr = lshr i32 %xor, 16
+  %add = add i32 %shr, %xor
+  ret i32 %add
+; CHECK: @test6
+; CHECK: lshr i32 %x, 16
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll
index 78bcedbbc2e1..b5310575502b 100644
--- a/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -4,9 +4,9 @@
 define i32 @a(i1 zeroext %x, i1 zeroext %y) {
 entry:
 ; CHECK: @a
-; CHECK: [[TMP1:%.*]] = zext i1 %y to i32
+; CHECK: [[TMP1:%.*]] = sext i1 %y to i32
 ; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1
-; CHECK-NEXT: sub i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: add i32 [[TMP2]], [[TMP1]]
   %conv = zext i1 %x to i32
   %conv3 = zext i1 %y to i32
   %conv3.neg = sub i32 0, %conv3
diff --git a/test/Transforms/InstSimplify/call-callconv.ll b/test/Transforms/InstSimplify/call-callconv.ll
new file mode 100644
index 000000000000..e475be781db9
--- /dev/null
+++ b/test/Transforms/InstSimplify/call-callconv.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Verify that the non-default calling conv doesn't prevent the libcall simplification
+
+@.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+define arm_aapcscc i32 @_abs(i32 %i) nounwind readnone {
+; CHECK: _abs
+  %call = tail call arm_aapcscc i32 @abs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @abs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_labs(i32 %i) nounwind readnone {
+; CHECK: _labs
+  %call = tail call arm_aapcscc i32 @labs(i32 %i) nounwind readnone
+  ret i32 %call
+; CHECK: %[[ISPOS:.*]] = icmp sgt i32 %i, -1
+; CHECK: %[[NEG:.*]] = sub i32 0, %i
+; CHECK: %[[RET:.*]] = select i1 %[[ISPOS]], i32 %i, i32 %[[NEG]]
+; CHECK: ret i32 %[[RET]]
+}
+
+declare arm_aapcscc i32 @labs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_strlen1() {
+; CHECK: _strlen1
+  %call = tail call arm_aapcscc i32 @strlen(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+; CHECK: ret i32 3
+}
+
+declare arm_aapcscc i32 @strlen(i8*)
+
+define arm_aapcscc zeroext i1 @_strlen2(i8* %str) {
+; CHECK: _strlen2
+  %call = tail call arm_aapcscc i32 @strlen(i8* %str)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+
+; CHECK: %[[STRLENFIRST:.*]] = load i8* %str
+; CHECK: %[[CMP:.*]] = icmp ne i8 %[[STRLENFIRST]], 0
+; CHECK: ret i1 %[[CMP]]
+}
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
new file mode 100644
index 000000000000..cf2f8476763f
--- /dev/null
+++ b/test/Transforms/InstSimplify/call.ll
@@ -0,0 +1,103 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+
+define i1 @test_uadd1() {
+; CHECK: @test_uadd1
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 3)
+  %overflow = extractvalue {i8, i1} %x, 1
+  ret i1 %overflow
+; CHECK-NEXT: ret i1 true
+}
+
+define i8 @test_uadd2() {
+; CHECK: @test_uadd2
+  %x = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 254, i8 44)
+  %result = extractvalue {i8, i1} %x, 0
+  ret i8 %result
+; CHECK-NEXT: ret i8 42
+}
+
+declare i256 @llvm.cttz.i256(i256 %src, i1 %is_zero_undef)
+
+define i256 @test_cttz() {
+; CHECK: @test_cttz
+  %x = call i256 @llvm.cttz.i256(i256 10, i1 false)
+  ret i256 %x
+; CHECK-NEXT: ret i256 1
+}
+
+declare i256 @llvm.ctpop.i256(i256 %src)
+
+define i256 @test_ctpop() {
+; CHECK: @test_ctpop
+  %x = call i256 @llvm.ctpop.i256(i256 10)
+  ret i256 %x
+; CHECK-NEXT: ret i256 2
+}
+
+; Test a non-intrinsic that we know about as a library call.
+declare float @fabs(float %x)
+
+define float @test_fabs_libcall() {
+; CHECK: @test_fabs_libcall
+
+  %x = call float @fabs(float -42.0)
+; This is still a real function call, so instsimplify won't nuke it -- other
+; passes have to do that.
+; CHECK-NEXT: call float @fabs
+
+  ret float %x
+; CHECK-NEXT: ret float 4.2{{0+}}e+01
+}
+
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+declare float @llvm.floor.f32(float) nounwind readnone
+declare float @llvm.ceil.f32(float) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare float @llvm.rint.f32(float) nounwind readnone
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+; Test idempotent intrinsics
+define float @test_idempotence(float %a) {
+; CHECK: @test_idempotence
+
+; CHECK: fabs
+; CHECK-NOT: fabs
+  %a0 = call float @llvm.fabs.f32(float %a)
+  %a1 = call float @llvm.fabs.f32(float %a0)
+
+; CHECK: floor
+; CHECK-NOT: floor
+  %b0 = call float @llvm.floor.f32(float %a)
+  %b1 = call float @llvm.floor.f32(float %b0)
+
+; CHECK: ceil
+; CHECK-NOT: ceil
+  %c0 = call float @llvm.ceil.f32(float %a)
+  %c1 = call float @llvm.ceil.f32(float %c0)
+
+; CHECK: trunc
+; CHECK-NOT: trunc
+  %d0 = call float @llvm.trunc.f32(float %a)
+  %d1 = call float @llvm.trunc.f32(float %d0)
+
+; CHECK: rint
+; CHECK-NOT: rint
+  %e0 = call float @llvm.rint.f32(float %a)
+  %e1 = call float @llvm.rint.f32(float %e0)
+
+; CHECK: nearbyint
+; CHECK-NOT: nearbyint
+  %f0 = call float @llvm.nearbyint.f32(float %a)
+  %f1 = call float @llvm.nearbyint.f32(float %f0)
+
+  %r0 = fadd float %a1, %b1
+  %r1 = fadd float %r0, %c1
+  %r2 = fadd float %r1, %d1
+  %r3 = fadd float %r2, %e1
+  %r4 = fadd float %r3, %f1
+
+  ret float %r4
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index ce2bb799c813..b764c761cfb2 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -165,6 +165,46 @@ entry:
   ret i1 %cmp
 }
 
+define i1 @gep13(i8* %ptr) {
+; CHECK: @gep13
+; We can prove this GEP is non-null because it is inbounds.
+  %x = getelementptr inbounds i8* %ptr, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep14({ {}, i8 }* %ptr) {
+; CHECK: @gep14
+; We can't simplify this because the offset of one in the GEP actually doesn't
+; move the pointer.
+  %x = getelementptr inbounds { {}, i8 }* %ptr, i32 0, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NOT: ret i1 false
+}
+
+define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
+; CHECK: @gep15
+; We can prove this GEP is non-null even though there is a user value, as we
+; would necessarily violate inbounds on one side or the other.
+  %x = getelementptr inbounds { {}, [4 x {i8, i8}]}* %ptr, i32 0, i32 1, i32 %y, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep16(i8* %ptr, i32 %a) {
+; CHECK: @gep16
+; We can prove this GEP is non-null because it is inbounds and because we know
+; %b is non-zero even though we don't know its value.
+  %b = or i32 %a, 1
+  %x = getelementptr inbounds i8* %ptr, i32 %b
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
@@ -607,3 +647,49 @@ unreachableblock:
   %Y = icmp eq i32* %X, null
   ret i1 %Y
 }
+
+; It's not valid to fold a comparison of an argument with an alloca, even though
+; that's tempting. An argument can't *alias* an alloca, however the aliasing rule
+; relies on restrictions against guessing an object's address and dereferencing.
+; There are no restrictions against guessing an object's address and comparing.
+
+define i1 @alloca_argument_compare(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare
+  ; CHECK: ret i1 %cmp
+}
+
+; As above, but with the operands reversed.
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK: alloca_argument_compare_swapped
+  ; CHECK: ret i1 %cmp
+}
+
+; Don't assume that a noalias argument isn't equal to a global variable's
+; address. This is an example where AliasAnalysis' NoAlias concept is
+; different from actual pointer inequality.
+
+@y = external global i32
+define zeroext i1 @external_compare(i32* noalias %x) {
+  %cmp = icmp eq i32* %x, @y
+  ret i1 %cmp
+  ; CHECK: external_compare
+  ; CHECK: ret i1 %cmp
+}
+
+define i1 @alloca_gep(i64 %a, i64 %b) {
+; CHECK: @alloca_gep
+; We can prove this GEP is non-null because it is inbounds and the pointer
+; is non-null.
+  %strs = alloca [1000 x [1001 x i8]], align 16
+  %x = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %a, i64 %b
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll
new file mode 100644
index 000000000000..154b96739791
--- /dev/null
+++ b/test/Transforms/InstSimplify/fast-math.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+;; x * 0 ==> 0 when no-nans and no-signed-zero
+; CHECK: mul_zero_1
+define float @mul_zero_1(float %a) {
+  %b = fmul nsz nnan float %a, 0.0
+; CHECK: ret float 0.0
+  ret float %b
+}
+; CHECK: mul_zero_2
+define float @mul_zero_2(float %a) {
+  %b = fmul fast float 0.0, %a
+; CHECK: ret float 0.0
+  ret float %b
+}
+
+;; x * 0 =/=> 0 when there could be nans or -0
+; CHECK: no_mul_zero_1
+define float @no_mul_zero_1(float %a) {
+  %b = fmul nsz float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+; CHECK: no_mul_zero_2
+define float @no_mul_zero_2(float %a) {
+  %b = fmul nnan float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+; CHECK: no_mul_zero_3
+define float @no_mul_zero_3(float %a) {
+  %b = fmul float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+
+; fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+;   where nnan and ninf have to occur at least once somewhere in this
+;   expression
+; CHECK: fadd_fsub_0
+define float @fadd_fsub_0(float %a) {
+; X + -X ==> 0
+  %t1 = fsub nnan ninf float 0.0, %a
+  %zero1 = fadd nnan ninf float %t1, %a
+
+  %t2 = fsub nnan float 0.0, %a
+  %zero2 = fadd ninf float %t2, %a
+
+  %t3 = fsub nnan ninf float 0.0, %a
+  %zero3 = fadd float %t3, %a
+
+  %t4 = fsub float 0.0, %a
+  %zero4 = fadd nnan ninf float %t4, %a
+
+; Dont fold this
+; CHECK: %nofold = fsub float 0.0
+  %nofold = fsub float 0.0, %a
+; CHECK: %no_zero = fadd nnan float %nofold, %a
+  %no_zero = fadd nnan float %nofold, %a
+
+; Coalesce the folded zeros
+  %zero5 = fadd float %zero1, %zero2
+  %zero6 = fadd float %zero3, %zero4
+  %zero7 = fadd float %zero5, %zero6
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero7
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fsub nnan ninf x, x ==> 0.0
+; CHECK: @fsub_x_x
+define float @fsub_x_x(float %a) {
+; X - X ==> 0
+  %zero1 = fsub nnan ninf float %a, %a
+
+; Dont fold
+; CHECK: %no_zero1 = fsub
+  %no_zero1 = fsub ninf float %a, %a
+; CHECK: %no_zero2 = fsub
+  %no_zero2 = fsub nnan float %a, %a
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero1
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fadd nsz X, 0 ==> X
+; CHECK: @nofold_fadd_x_0
+define float @nofold_fadd_x_0(float %a) {
+; Dont fold
+; CHECK: %no_zero1 = fadd
+  %no_zero1 = fadd ninf float %a, 0.0
+; CHECK: %no_zero2 = fadd
+  %no_zero2 = fadd nnan float %a, 0.0
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; CHECK: ret float %no_zero
+  ret float %no_zero
+}
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
new file mode 100644
index 000000000000..f9c364cade36
--- /dev/null
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; fsub 0, (fsub 0, X) ==> X
+; CHECK: @fsub_0_0_x
+define float @fsub_0_0_x(float %a) {
+  %t1 = fsub float -0.0, %a
+  %ret = fsub float -0.0, %t1
+
+; CHECK: ret float %a
+  ret float %ret
+}
+
+; fsub X, 0 ==> X
+; CHECK: @fsub_x_0
+define float @fsub_x_0(float %a) {
+  %ret = fsub float %a, 0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fadd X, -0 ==> X
+; CHECK: @fadd_x_n0
+define float @fadd_x_n0(float %a) {
+  %ret = fadd float %a, -0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fmul X, 1.0 ==> X
+; CHECK: @fmul_X_1
+define double @fmul_X_1(double %a) {
+  %b = fmul double 1.000000e+00, %a                ; <double> [#uses=1]
+  ; CHECK: ret double %a
+  ret double %b
+}
diff --git a/test/Transforms/InstSimplify/past-the-end.ll b/test/Transforms/InstSimplify/past-the-end.ll
new file mode 100644
index 000000000000..075da4a24be0
--- /dev/null
+++ b/test/Transforms/InstSimplify/past-the-end.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "p:32:32"
+
+; Check some past-the-end subtleties.
+
+@opte_a = global i32 0
+@opte_b = global i32 0
+
+; Comparing base addresses of two distinct globals. Never equal.
+
+define zeroext i1 @no_offsets() {
+  %t = icmp eq i32* @opte_a, @opte_b
+  ret i1 %t
+  ; CHECK: no_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct globals. Never equal.
+
+define zeroext i1 @both_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %y = getelementptr i32* @opte_b, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one global to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end() {
+  %x = getelementptr i32* @opte_a, i32 1
+  %t = icmp eq i32* %x, @opte_b
+  ret i1 %t
+  ; CHECK: just_one_past_the_end(
+  ; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32* @opte_a, i32 1), i32* @opte_b)
+}
+
+; Comparing base addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @no_alloca_offsets() {
+  %m = alloca i32
+  %n = alloca i32
+  %t = icmp eq i32* %m, %n
+  ret i1 %t
+  ; CHECK: no_alloca_offsets(
+  ; CHECK: ret i1 false
+}
+
+; Comparing past-the-end addresses of two distinct allocas. Never equal.
+
+define zeroext i1 @both_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %y = getelementptr i32* %n, i32 1
+  %t = icmp eq i32* %x, %y
+  ret i1 %t
+  ; CHECK: both_past_the_end_alloca(
+  ; CHECK-NOT: ret i1 true
+  ; TODO: refine this
+}
+
+; Comparing past-the-end addresses of one alloca to the base address
+; of another. Can't fold this.
+
+define zeroext i1 @just_one_past_the_end_alloca() {
+  %m = alloca i32
+  %n = alloca i32
+  %x = getelementptr i32* %m, i32 1
+  %t = icmp eq i32* %x, %n
+  ret i1 %t
+  ; CHECK: just_one_past_the_end_alloca(
+  ; CHECK: ret i1 %t
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
index 1eb1fd4c097e..8b4aa796013b 100644
--- a/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -46,3 +46,33 @@ define i64 @ptrdiff3(i8* %ptr) {
   %diff = sub i64 %last.int, %first.int
   ret i64 %diff
 }
+
+define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
+; Handle simple cases of vectors of pointers.
+; CHECK: @ptrdiff4
+; CHECK: ret <4 x i32> zeroinitializer
+  %p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
+  %bc = bitcast <4 x i8*> %arg to <4 x i32*>
+  %p2 = ptrtoint <4 x i32*> %bc to <4 x i32>
+  %sub = sub <4 x i32> %p1, %p2
+  ret <4 x i32> %sub
+}
+
+%struct.ham = type { i32, [2 x [2 x i32]] }
+
+@global = internal global %struct.ham zeroinitializer, align 4
+
+define i32 @ptrdiff5() nounwind {
+bb:
+  %tmp = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp1 = getelementptr inbounds [2 x [2 x i32]]* %tmp, i32 0, i32 0
+  %tmp2 = bitcast [2 x i32]* %tmp1 to i32*
+  %tmp3 = ptrtoint i32* %tmp2 to i32
+  %tmp4 = getelementptr inbounds %struct.ham* @global, i32 0, i32 1
+  %tmp5 = getelementptr inbounds [2 x [2 x i32]]* %tmp4, i32 0, i32 0
+  %tmp6 = ptrtoint [2 x i32]* %tmp5 to i32
+  %tmp7 = sub i32 %tmp3, %tmp6
+  ret i32 %tmp7
+; CHECK: @ptrdiff5
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index f65260e00f54..5ac1ddef64f8 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -1,4 +1,4 @@
-;RUN: opt -instsimplify %s -disable-output
+;RUN: opt -instsimplify -disable-output < %s
 declare void @helper(<2 x i8*>)
 define void @test(<2 x i8*> %a) {
   %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 46271379bd0d..fe3dc77c9c13 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -jump-threading -S | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 
 declare i32 @f1()
 declare i32 @f2()
@@ -476,3 +476,41 @@ exit1:
 ; CHECK: }
 }
 
+; In this test we check that block duplication is inhibited by the presence
+; of a function with the 'noduplicate' attribute.
+
+declare void @g()
+declare void @j()
+declare void @k()
+
+; CHECK: define void @h(i32 %p) {
+define void @h(i32 %p) {
+  %x = icmp ult i32 %p, 5
+  br i1 %x, label %l1, label %l2
+
+l1:
+  call void @j()
+  br label %l3
+
+l2:
+  call void @k()
+  br label %l3
+
+l3:
+; CHECK: call void @g() [[NOD:#[0-9]+]]
+; CHECK-NOT: call void @g() [[NOD]]
+  call void @g() noduplicate
+  %y = icmp ult i32 %p, 5
+  br i1 %y, label %l4, label %l5
+
+l4:
+  call void @j()
+  ret void
+
+l5:
+  call void @k()
+  ret void
+; CHECK: }
+}
+
+; CHECK: attributes [[NOD]] = { noduplicate }
diff --git a/test/Transforms/JumpThreading/degenerate-phi.ll b/test/Transforms/JumpThreading/degenerate-phi.ll
index 35d9fdec4281..2905b43af72c 100644
--- a/test/Transforms/JumpThreading/degenerate-phi.ll
+++ b/test/Transforms/JumpThreading/degenerate-phi.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -disable-output %s
+; RUN: opt -jump-threading -disable-output < %s
 ; PR9112
 
 ; This is actually a test for value tracking. Jump threading produces
diff --git a/test/Transforms/JumpThreading/or-undef.ll b/test/Transforms/JumpThreading/or-undef.ll
index 6e359925b6c6..6311b6df4373 100644
--- a/test/Transforms/JumpThreading/or-undef.ll
+++ b/test/Transforms/JumpThreading/or-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -jump-threading -S %s | FileCheck %s
+; RUN: opt -jump-threading -S < %s | FileCheck %s
 ; rdar://7620633
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
index fe8d44531322..2bf26041626c 100644
--- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
+++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm | lli %defaultjit
+; RUN: opt < %s -licm | lli -force-interpreter
 
 define i32 @main() {
 entry:
diff --git a/test/Transforms/LICM/2011-07-06-Alignment.ll b/test/Transforms/LICM/2011-07-06-Alignment.ll
index f97b7010bc02..569231489fec 100644
--- a/test/Transforms/LICM/2011-07-06-Alignment.ll
+++ b/test/Transforms/LICM/2011-07-06-Alignment.ll
@@ -1,4 +1,4 @@
-; RUN: opt -licm -S %s | FileCheck %s
+; RUN: opt -licm -S < %s | FileCheck %s
 
 @A = common global [1024 x float] zeroinitializer, align 4
 
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index de41d008a746..b43477a56df5 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -licm %s -disable-output
+; RUN: opt -licm -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index f9fc551df358..1ba94d6b489c 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm"
 
 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 98f93345e3c3..1ca377eb4a99 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -90,3 +90,29 @@ for.end:                                          ; preds = %for.body
 
 declare void @foo_may_call_exit(i32)
 
+; PR14854
+; CHECK: @test5
+; CHECK: extractvalue
+; CHECK: br label %tailrecurse
+; CHECK: tailrecurse:
+; CHECK: ifend:
+; CHECK: insertvalue
+define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %then, %entry
+  %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ]
+  %out = extractvalue { i32*, i32 } %e, 1
+  %d = insertvalue { i32*, i32 } %e, i32* null, 0
+  %cmp1 = icmp sgt i32 %out, %i.tr
+  br i1 %cmp1, label %then, label %ifend
+
+then:                                             ; preds = %tailrecurse
+  call void @foo()
+  %cmp2 = add i32 %i.tr, 1
+  br label %tailrecurse
+
+ifend:                                            ; preds = %tailrecurse
+  ret { i32*, i32 } %d
+}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 05a64d632274..e7eab92aa8d7 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,28 +1,28 @@
-; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; RUN: opt < %s -basicaa -tbaa -licm -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-@X = global i32 7		; <i32*> [#uses=4]
+@X = global i32 7   ; <i32*> [#uses=4]
 
 define void @test1(i32 %i) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test1
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
-Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
-	%x = load i32* @X		; <i32> [#uses=1]
-	%x2 = add i32 %x, 1		; <i32> [#uses=1]
-	store i32 %x2, i32* @X
-	%Next = add i32 %j, 1		; <i32> [#uses=2]
-	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
-	br i1 %cond, label %Out, label %Loop
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load i32* @X   ; <i32> [#uses=1]
+  %x2 = add i32 %x, 1   ; <i32> [#uses=1]
+  store i32 %x2, i32* @X
+  %Next = add i32 %j, 1   ; <i32> [#uses=2]
+  %cond = icmp eq i32 %Next, 0    ; <i1> [#uses=1]
+  br i1 %cond, label %Out, label %Loop
 
-Out:	
-	ret void
+Out:
+  ret void
 ; CHECK: Out:
 ; CHECK-NEXT:   store i32 %x2, i32* @X
 ; CHECK-NEXT:   ret void
@@ -31,22 +31,22 @@ Out:
 
 define void @test2(i32 %i) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test2
 ; CHECK: Entry:
 ; CHECK-NEXT:    %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:    br label %Loop
 
-Loop:		; preds = %Loop, %0
-	%X1 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
-	%A = load i32* %X1		; <i32> [#uses=1]
-	%V = add i32 %A, 1		; <i32> [#uses=1]
-	%X2 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
-	store i32 %V, i32* %X2
-	br i1 false, label %Loop, label %Exit
+Loop:   ; preds = %Loop, %0
+  %X1 = getelementptr i32* @X, i64 1    ; <i32*> [#uses=1]
+  %A = load i32* %X1    ; <i32> [#uses=1]
+  %V = add i32 %A, 1    ; <i32> [#uses=1]
+  %X2 = getelementptr i32* @X, i64 1    ; <i32*> [#uses=1]
+  store i32 %V, i32* %X2
+  br i1 false, label %Loop, label %Exit
 
-Exit:		; preds = %Loop
-	ret void
+Exit:   ; preds = %Loop
+  ret void
 ; CHECK: Exit:
 ; CHECK-NEXT:   store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:   ret void
@@ -56,19 +56,19 @@ Exit:		; preds = %Loop
 
 define void @test3(i32 %i) {
 ; CHECK: @test3
-	br label %Loop
+  br label %Loop
 Loop:
         ; Should not promote this to a register
-	%x = load volatile i32* @X
-	%x2 = add i32 %x, 1	
-	store i32 %x2, i32* @X
-	br i1 true, label %Out, label %Loop
-        
+  %x = load volatile i32* @X
+  %x2 = add i32 %x, 1
+  store i32 %x2, i32* @X
+  br i1 true, label %Out, label %Loop
+
 ; CHECK: Loop:
 ; CHECK-NEXT: load volatile
 
-Out:		; preds = %Loop
-	ret void
+Out:    ; preds = %Loop
+  ret void
 }
 
 ; PR8041
@@ -120,27 +120,27 @@ exit:
 
 define void @test5(i32 %i, i32** noalias %P2) {
 Entry:
-	br label %Loop
+  br label %Loop
 ; CHECK: @test5
 ; CHECK: Entry:
 ; CHECK-NEXT:   load i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
-Loop:		; preds = %Loop, %0
-	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
-	%x = load i32* @X		; <i32> [#uses=1]
-	%x2 = add i32 %x, 1		; <i32> [#uses=1]
-	store i32 %x2, i32* @X
-        
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load i32* @X   ; <i32> [#uses=1]
+  %x2 = add i32 %x, 1   ; <i32> [#uses=1]
+  store i32 %x2, i32* @X
+
         store volatile i32* @X, i32** %P2
-        
-	%Next = add i32 %j, 1		; <i32> [#uses=2]
-	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
-	br i1 %cond, label %Out, label %Loop
 
-Out:	
-	ret void
+  %Next = add i32 %j, 1   ; <i32> [#uses=2]
+  %cond = icmp eq i32 %Next, 0    ; <i1> [#uses=1]
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
 ; CHECK: Out:
 ; CHECK-NEXT:   store i32 %x2, i32* @X
 ; CHECK-NEXT:   ret void
@@ -148,3 +148,40 @@ Out:
 }
 
 
+; PR14753 - Preserve TBAA tags when promoting values in a loop.
+define void @test6(i32 %n, float* nocapture %a, i32* %gi) {
+entry:
+  store i32 0, i32* %gi, align 4, !tbaa !0
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %storemerge2 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %idxprom = sext i32 %storemerge2 to i64
+  %arrayidx = getelementptr inbounds float* %a, i64 %idxprom
+  store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
+  %0 = load i32* %gi, align 4, !tbaa !0
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %gi, align 4, !tbaa !0
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT:  %gi.promoted = load i32* %gi, align 4, !tbaa !0
+; CHECK: for.cond.for.end_crit_edge:
+; CHECK-NEXT:  store i32 %inc, i32* %gi, align 4, !tbaa !0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
index 40c6629e6f4f..cf9d8ce923ba 100644
--- a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
+++ b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -loop-deletion -disable-output
+; RUN: opt -loop-deletion -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/LoopDeletion/simplify-then-delete.ll b/test/Transforms/LoopDeletion/simplify-then-delete.ll
index 5a21672a5960..4278ef16d214 100644
--- a/test/Transforms/LoopDeletion/simplify-then-delete.ll
+++ b/test/Transforms/LoopDeletion/simplify-then-delete.ll
@@ -4,7 +4,7 @@
 ; Indvars and loop deletion should be able to eliminate all looping
 ; in this testcase.
 
-; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) nounwind {
+; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   ret i32 0
 ; CHECK-NEXT: }
@@ -63,3 +63,5 @@ w.e:
 w.e12:
   ret i32 0
 }
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopIdiom/X86/popcnt.ll b/test/Transforms/LoopIdiom/X86/popcnt.ll
new file mode 100644
index 000000000000..25df93d3a082
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/popcnt.ll
@@ -0,0 +1,140 @@
+; RUN: opt -loop-idiom < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -S | FileCheck %s
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;    }
+;    return c;
+;}
+; 
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;        mydata1 *= c;
+;        mydata2 *= (int)a;
+;    }
+;    return c + mydata1 + mydata2;
+;}
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+  %tobool9 = icmp eq i64 %a, 0
+  br i1 %tobool9, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.013, 1
+  %sub = add i64 %a.addr.010, -1
+  %and = and i64 %sub, %a.addr.010
+  %mul = mul nsw i32 %inc, %mydata1.addr.011
+  %conv = trunc i64 %and to i32
+  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+  %add2 = add i32 %add, %c.0.lcssa
+  ret i32 %add2
+}
+
+; Some variants once cause crash
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @PopCntCrash1(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %t = add i32 %c.05, %c.05
+  %inc = add nsw i32 %t, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+
+; CHECK: entry
+; CHECK: ret 
+}
+
+define i32 @PopCntCrash2(i64 %a, i32 %b) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ %b, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+define i32 @PopCntCrash3(i64 %a, i32 %x) {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  %cmp = icmp eq i32 %x, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll
index b7bcb21d56f8..78878f9fa663 100644
--- a/test/Transforms/LoopRotate/basic.ll
+++ b/test/Transforms/LoopRotate/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-rotate %s | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
@@ -33,3 +33,29 @@ for.end:                                          ; preds = %for.cond
 
 declare void @g(i32*)
 
+; CHECK: @test2
+define void @test2() nounwind ssp {
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 100
+; CHECK: call void @f
+; CHECK-NOT: call void @f
+  call void @f() noduplicate 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %inc = add nsw i32 %i.0, 1
+  call void @h()
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+; CHECK: }
+}
+
+declare void @f() noduplicate
+declare void @h()
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
index 954b83476551..fd922cb5569e 100644
--- a/test/Transforms/LoopRotate/crash.ll
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-rotate %s -disable-output -verify-dom-info -verify-loop-info
+; RUN: opt -loop-rotate -disable-output -verify-dom-info -verify-loop-info < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index b32ee82d3a57..6a8d30820f6e 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-rotate  %s  | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll
index 737283092250..8ad2dce71a65 100644
--- a/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S %s -loop-rotate | FileCheck %s
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0"
 
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
deleted file mode 100644
index c650d8cf76d8..000000000000
--- a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
-
-define void @foobar(i32 %n) nounwind {
-entry:
-	icmp eq i32 %n, 0		; <i1>:0 [#uses=2]
-	br i1 %0, label %return, label %bb.nph
-
-bb.nph:		; preds = %entry
-	%umax = select i1 %0, i32 1, i32 %n		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
-	tail call void @bar( i32 %i.03 ) nounwind
-	uitofp i32 %i.03 to double		; <double>:1 [#uses=1]
-	tail call void @foo( double %1 ) nounwind
-	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
-	br i1 %exitcond, label %return, label %bb
-
-return:		; preds = %bb, %entry
-	ret void
-}
-
-; Unable to eliminate cast because the mantissa bits for double are not enough
-; to hold all of i64 IV bits.
-define void @foobar2(i64 %n) nounwind {
-entry:
-	icmp eq i64 %n, 0		; <i1>:0 [#uses=2]
-	br i1 %0, label %return, label %bb.nph
-
-bb.nph:		; preds = %entry
-	%umax = select i1 %0, i64 1, i64 %n		; <i64> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i64> [#uses=3]
-	trunc i64 %i.03 to i32		; <i32>:1 [#uses=1]
-	tail call void @bar( i32 %1 ) nounwind
-	uitofp i64 %i.03 to double		; <double>:2 [#uses=1]
-	tail call void @foo( double %2 ) nounwind
-	%indvar.next = add i64 %i.03, 1		; <i64> [#uses=2]
-	%exitcond = icmp eq i64 %indvar.next, %umax		; <i1> [#uses=1]
-	br i1 %exitcond, label %return, label %bb
-
-return:		; preds = %bb, %entry
-	ret void
-}
-
-; Unable to eliminate cast due to potentional overflow.
-define void @foobar3() nounwind {
-entry:
-	tail call i32 (...)* @nn( ) nounwind		; <i32>:0 [#uses=1]
-	icmp eq i32 %0, 0		; <i1>:1 [#uses=1]
-	br i1 %1, label %return, label %bb
-
-bb:		; preds = %bb, %entry
-	%i.03 = phi i32 [ 0, %entry ], [ %3, %bb ]		; <i32> [#uses=3]
-	tail call void @bar( i32 %i.03 ) nounwind
-	uitofp i32 %i.03 to double		; <double>:2 [#uses=1]
-	tail call void @foo( double %2 ) nounwind
-	add i32 %i.03, 1		; <i32>:3 [#uses=2]
-	tail call i32 (...)* @nn( ) nounwind		; <i32>:4 [#uses=1]
-	icmp ugt i32 %4, %3		; <i1>:5 [#uses=1]
-	br i1 %5, label %bb, label %return
-
-return:		; preds = %bb, %entry
-	ret void
-}
-
-; Unable to eliminate cast due to overflow.
-define void @foobar4() nounwind {
-entry:
-	br label %bb.nph
-
-bb.nph:		; preds = %entry
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
-	%tmp2 = sext i8 %i.03 to i32		; <i32>:0 [#uses=1]
-	tail call void @bar( i32 %tmp2 ) nounwind
-	%tmp3 = uitofp i8 %i.03 to double		; <double>:1 [#uses=1]
-	tail call void @foo( double %tmp3 ) nounwind
-	%indvar.next = add i8 %i.03, 1		; <i32> [#uses=2]
-        %tmp = sext i8 %indvar.next to i32
-	%exitcond = icmp eq i32 %tmp, 32767		; <i1> [#uses=1]
-	br i1 %exitcond, label %return, label %bb
-
-return:		; preds = %bb, %entry
-	ret void
-}
-
-declare void @bar(i32)
-
-declare void @foo(double)
-
-declare i32 @nn(...)
-
diff --git a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll
deleted file mode 100644
index 5d9ed64ef422..000000000000
--- a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
-;
-; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
-; nonzero initial value.
-; rdar://9786536
-
-; First, make sure LSR doesn't crash on an empty IVUsers list.
-; CHECK: @dummyIV
-; CHECK-NOT: phi
-; CHECK-NOT: sitofp
-; CHECK: br
-define void @dummyIV() nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
-  %conv = sitofp i32 %i.01 to double
-  %inc = add nsw i32 %i.01, 1
-  br i1 undef, label %loop, label %for.end
-
-for.end:
-  unreachable
-}
-
-; Now check that the computed double constant is correct.
-; CHECK: @doubleIV
-; CHECK: phi double [ -3.900000e+01, %entry ]
-; CHECK: br
-define void @doubleIV() nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
-  %conv = sitofp i32 %i.01 to double
-  %div = fdiv double %conv, 4.000000e+01
-  %inc = add nsw i32 %i.01, 1
-  br i1 undef, label %loop, label %for.end
-
-for.end:
-  unreachable
-}
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
index 3793baccbbc1..9524be3ceee0 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce %s 2> %t
+; RUN: opt -loop-reduce -disable-output -debug-only=loop-reduce < %s 2> %t
 ; RUN: FileCheck %s < %t
 ; REQUIRES: asserts
 ;
@@ -10,15 +10,13 @@
 ; CHECK: After generating reuse formulae:
 ; CHECK: LSR is examining the following uses:
 ; CHECK: LSR Use: Kind=Special
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
 ; CHECK-NOT:reg
 ; CHECK: Filtering for use
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-freebsd9"
 
 %struct.snork = type { %struct.fuga, i32, i32, i32, i32, i32, i32 }
 %struct.fuga = type { %struct.gork, i64 }
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
new file mode 100644
index 000000000000..bce234cd4066
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-05-IndBr.ll
@@ -0,0 +1,44 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; Indirect branch in the preheader crashes replaceCongruentIVs.
+; rdar://12910141
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+; CHECK: @test
+; CHECK: bb8:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: phi i8
+; CHECK: ret void
+define void @test() nounwind ssp {
+bb:
+  br label %bb190
+
+bb8:                                              ; preds = %bb190, %bb11
+  %tmp = phi i8 [ %tmp14, %bb11 ], [ 25, %bb190 ]
+  %tmp9 = phi i8 [ %tmp12, %bb11 ], [ 25, %bb190 ]
+  %tmp10 = add i8 %tmp, -5
+  indirectbr i8* undef, [label %bb11, label %bb15]
+
+bb11:                                             ; preds = %bb8
+  %tmp12 = add i8 %tmp9, 1
+  %tmp13 = add i8 %tmp9, -19
+  %tmp14 = add i8 %tmp, 1
+  indirectbr i8* undef, [label %bb8]
+
+bb15:                                             ; preds = %bb8
+  indirectbr i8* undef, [label %bb16]
+
+bb16:                                             ; preds = %bb16, %bb15
+  indirectbr i8* undef, [label %bb37, label %bb190]
+
+
+bb37:                                             ; preds = %bb190
+  indirectbr i8* undef, [label %bb38]
+
+bb38:                                             ; preds = %bb37, %bb5
+  ret void
+
+bb190:                                            ; preds = %bb189, %bb187
+  indirectbr i8* undef, [label %bb37, label %bb8]
+}
diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
new file mode 100644
index 000000000000..8fbddf8ae4c8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a
+; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef.
+; Reproducing the miscompile requires no triple, hence no "TTI".
+; rdar://13007381
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Verify that nothing uses the "dead" ptrtoint from "undef".
+; CHECK: @VerifyDiagnosticConsumerTest
+; CHECK: bb:
+; CHECK: %0 = ptrtoint i8* undef to i64
+; CHECK-NOT: %0
+; CHECK: .lr.ph
+; CHECK-NOT: %0
+; CHECK: sub i64 %7, %tmp6
+; CHECK-NOT: %0
+; CHECK: ret void
+define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
+bb:
+  %tmp3 = call i8* @getCharData() nounwind
+  %tmp4 = call i8* @getCharData() nounwind
+  %tmp5 = ptrtoint i8* %tmp4 to i64
+  %tmp6 = ptrtoint i8* %tmp3 to i64
+  %tmp7 = sub i64 %tmp5, %tmp6
+  br i1 undef, label %bb87, label %.preheader
+
+.preheader:                                       ; preds = %bb10, %bb
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %bb10
+
+bb10:                                             ; preds = %.preheader
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42, label %.preheader
+
+_ZNK4llvm9StringRef4findEcm.exit42:               ; preds = %bb10
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit42.thread, label %.lr.ph
+
+_ZNK4llvm9StringRef4findEcm.exit42.thread:        ; preds = %_ZNK4llvm9StringRef4findEcm.exit42, %.preheader
+  unreachable
+
+.lr.ph:                                           ; preds = %_ZNK4llvm9StringRef4findEcm.exit42
+  br label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit.loopexit:        ; preds = %bb63
+  %tmp21 = icmp eq i64 %i.0.i, -1
+  br i1 %tmp21, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb36
+
+_ZNK4llvm9StringRef4findEcm.exit._crit_edge:      ; preds = %bb61, %_ZNK4llvm9StringRef4findEcm.exit.loopexit
+  unreachable
+
+bb36:                                             ; preds = %_ZNK4llvm9StringRef4findEcm.exit.loopexit, %.lr.ph
+  %loc.063 = phi i64 [ undef, %.lr.ph ], [ %i.0.i, %_ZNK4llvm9StringRef4findEcm.exit.loopexit ]
+  switch i8 undef, label %bb57 [
+    i8 10, label %bb48
+    i8 13, label %bb48
+  ]
+
+bb48:                                             ; preds = %bb36, %bb36
+  br label %bb58
+
+bb57:                                             ; preds = %bb36
+  br label %bb58
+
+bb58:                                             ; preds = %bb57, %bb48
+  %tmp59 = icmp ugt i64 %tmp7, undef
+  %tmp60 = select i1 %tmp59, i64 undef, i64 %tmp7
+  br label %bb61
+
+bb61:                                             ; preds = %bb63, %bb58
+  %i.0.i = phi i64 [ %tmp60, %bb58 ], [ %tmp67, %bb63 ]
+  %tmp62 = icmp eq i64 %i.0.i, %tmp7
+  br i1 %tmp62, label %_ZNK4llvm9StringRef4findEcm.exit._crit_edge, label %bb63
+
+bb63:                                             ; preds = %bb61
+  %tmp64 = getelementptr inbounds i8* %tmp3, i64 %i.0.i
+  %tmp65 = load i8* %tmp64, align 1
+  %tmp67 = add i64 %i.0.i, 1
+  br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61
+
+bb87:                                             ; preds = %bb
+  ret void
+}
+
+declare i8* @getCharData()
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 9189d79e2fb6..ee3cc4dd78fc 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -205,18 +205,18 @@ for.end:                                          ; preds = %for.body
 ; post-increment addressing, no add's or add.w's beyond the three
 ; mentioned. Most importantly, there should be no spills or reloads!
 ;
-; CHECK: testNeon:
-; CHECK: %.lr.ph
-; CHECK-NOT: lsl.w
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK: add.w r
-; CHECK-NOT: {{ldr|str|adds|add r}}
-; CHECK-NOT: add.w r
-; CHECK: bne
+; A9: testNeon:
+; A9: %.lr.ph
+; A9-NOT: lsl.w
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9: add.w r
+; A9-NOT: {{ldr|str|adds|add r}}
+; A9-NOT: add.w r
+; A9: bne
 define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
   %1 = icmp sgt i32 %limit, 0
   br i1 %1, label %.lr.ph, label %45
@@ -290,3 +290,80 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
 }
 
 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+; Handle chains in which the same offset is used for both loads and
+; stores to the same array.
+; rdar://11410078.
+;
+; A9: @testReuse
+; A9: %for.body
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE:[r[0-9]+]]], [[INC:r[0-9]]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vld1.8 {d{{[0-9]+}}}, [[BASE]], {{r[0-9]}}
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]], [[INC]]
+; A9: vst1.8 {d{{[0-9]+}}}, [[BASE]]
+; A9: bne
+define void @testReuse(i8* %src, i32 %stride) nounwind ssp {
+entry:
+  %mul = shl nsw i32 %stride, 2
+  %idx.neg = sub i32 0, %mul
+  %mul1 = mul nsw i32 %stride, 3
+  %idx.neg2 = sub i32 0, %mul1
+  %mul5 = shl nsw i32 %stride, 1
+  %idx.neg6 = sub i32 0, %mul5
+  %idx.neg10 = sub i32 0, %stride
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
+  %add.ptr = getelementptr inbounds i8* %src.addr, i32 %idx.neg
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+  %add.ptr3 = getelementptr inbounds i8* %src.addr, i32 %idx.neg2
+  %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+  %add.ptr7 = getelementptr inbounds i8* %src.addr, i32 %idx.neg6
+  %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+  %add.ptr11 = getelementptr inbounds i8* %src.addr, i32 %idx.neg10
+  %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
+  %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+  %add.ptr17 = getelementptr inbounds i8* %src.addr, i32 %stride
+  %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+  %add.ptr20 = getelementptr inbounds i8* %src.addr, i32 %mul5
+  %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+  %add.ptr23 = getelementptr inbounds i8* %src.addr, i32 %mul1
+  %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+  %vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
+  %vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
+  %vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
+  %vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
+  %vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
+  %vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+  %inc = add nsw i32 %i.0110, 1
+  %add.ptr45 = getelementptr inbounds i8* %src.addr, i32 8
+  %exitcond = icmp eq i32 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
new file mode 100644
index 000000000000..9a7f4865c591
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | grep "phi double" | count 1
+
+define void @foobar(i32 %n) nounwind {
+entry:
+	icmp eq i32 %n, 0		; <i1>:0 [#uses=2]
+	br i1 %0, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%umax = select i1 %0, i32 1, i32 %n		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	tail call void @bar( i32 %i.03 ) nounwind
+	uitofp i32 %i.03 to double		; <double>:1 [#uses=1]
+	tail call void @foo( double %1 ) nounwind
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; Unable to eliminate cast because the mantissa bits for double are not enough
+; to hold all of i64 IV bits.
+define void @foobar2(i64 %n) nounwind {
+entry:
+	icmp eq i64 %n, 0		; <i1>:0 [#uses=2]
+	br i1 %0, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%umax = select i1 %0, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i64> [#uses=3]
+	trunc i64 %i.03 to i32		; <i32>:1 [#uses=1]
+	tail call void @bar( i32 %1 ) nounwind
+	uitofp i64 %i.03 to double		; <double>:2 [#uses=1]
+	tail call void @foo( double %2 ) nounwind
+	%indvar.next = add i64 %i.03, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; Unable to eliminate cast due to potentional overflow.
+define void @foobar3() nounwind {
+entry:
+	tail call i32 (...)* @nn( ) nounwind		; <i32>:0 [#uses=1]
+	icmp eq i32 %0, 0		; <i1>:1 [#uses=1]
+	br i1 %1, label %return, label %bb
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %3, %bb ]		; <i32> [#uses=3]
+	tail call void @bar( i32 %i.03 ) nounwind
+	uitofp i32 %i.03 to double		; <double>:2 [#uses=1]
+	tail call void @foo( double %2 ) nounwind
+	add i32 %i.03, 1		; <i32>:3 [#uses=2]
+	tail call i32 (...)* @nn( ) nounwind		; <i32>:4 [#uses=1]
+	icmp ugt i32 %4, %3		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb, label %return
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; Unable to eliminate cast due to overflow.
+define void @foobar4() nounwind {
+entry:
+	br label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%tmp2 = sext i8 %i.03 to i32		; <i32>:0 [#uses=1]
+	tail call void @bar( i32 %tmp2 ) nounwind
+	%tmp3 = uitofp i8 %i.03 to double		; <double>:1 [#uses=1]
+	tail call void @foo( double %tmp3 ) nounwind
+	%indvar.next = add i8 %i.03, 1		; <i32> [#uses=2]
+        %tmp = sext i8 %indvar.next to i32
+	%exitcond = icmp eq i32 %tmp, 32767		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+declare void @bar(i32)
+
+declare void @foo(double)
+
+declare i32 @nn(...)
+
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
new file mode 100644
index 000000000000..a932b4792586
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
+;
+; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
+; nonzero initial value.
+; rdar://9786536
+
+; First, make sure LSR doesn't crash on an empty IVUsers list.
+; CHECK: @dummyIV
+; CHECK-NOT: phi
+; CHECK-NOT: sitofp
+; CHECK: br
+define void @dummyIV() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
+  %conv = sitofp i32 %i.01 to double
+  %inc = add nsw i32 %i.01, 1
+  br i1 undef, label %loop, label %for.end
+
+for.end:
+  unreachable
+}
+
+; Now check that the computed double constant is correct.
+; CHECK: @doubleIV
+; CHECK: phi double [ -3.900000e+01, %entry ]
+; CHECK: br
+define void @doubleIV() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
+  %conv = sitofp i32 %i.01 to double
+  %div = fdiv double %conv, 4.000000e+01
+  %inc = add nsw i32 %i.01, 1
+  br i1 undef, label %loop, label %for.end
+
+for.end:
+  unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index 510865096272..eedfc200f48b 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ;
 ; Test LSR's ability to prune formulae that refer to nonexistant
 ; AddRecs in other loops.
@@ -15,13 +15,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin"
 
 ; CHECK: @test
-; CHECK: # %for.body{{$}}
-; dummyiv copy should be removed
-; CHECK-NOT: movq
-; CHECK: # %for.cond19.preheader
-; dummycnt should be removed
-; CHECK-NOT: incq
-; CHECK: # %for.body23{{$}}
+; CHECK: for.body:
+; CHECK: %lsr.iv
+; CHECK-NOT: %dummyout
+; CHECK: ret
 define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
 entry:
   %cmp34 = icmp eq i64 %count, 0
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
index b87bf620decf..ff8cab83137b 100644
--- a/test/Transforms/LoopStrengthReduce/dominate-assert.ll
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce %s
+; RUN: opt -loop-reduce < %s
 ; we used to crash on this one
 
 declare i8* @_Znwm()
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index ad4959be340e..498be1a9a1a2 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -2,7 +2,7 @@
 ; having overlapping live ranges that result in copies.  We want the setcc 
 ; instruction immediately before the conditional branch.
 ;
-; RUN: opt -S -loop-reduce %s | FileCheck %s
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
 
 define void @foo(float* %D, i32 %E) {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 96904c66e640..45aeb4e691a0 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,18 +4,17 @@
 ; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %5 = lshr i64 %4, 1
-; CHECK:   %6 = mul i64 %5, 2
+; CHECK:   [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
+; CHECK:   [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
 
 %struct.Vector2 = type { i16*, [64 x i16], i32 }
 
diff --git a/test/Transforms/LoopUnroll/basic.ll b/test/Transforms/LoopUnroll/basic.ll
index eeb3e9a57b06..ab5bc568ede4 100644
--- a/test/Transforms/LoopUnroll/basic.ll
+++ b/test/Transforms/LoopUnroll/basic.ll
@@ -22,3 +22,26 @@ l1:                                               ; preds = %l1, %entry
 l2:                                               ; preds = %l1
   ret i32 0
 }
+
+; This should not unroll since the call is 'noduplicate'.
+
+; CHECK: @test2
+define i32 @test2(i8** %P) nounwind ssp {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() noduplicate
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+; CHECK: }
+}
+
+declare void @f()
diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll
index 55cf22373ece..aa928ccc60c1 100644
--- a/test/Transforms/LoopUnroll/runtime-loop3.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
 
 ; Test that nested loops can be unrolled.  We need to increase threshold to do it
diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
index 9d73d31d5044..31dba79be1f8 100644
--- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
+++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -loop-unswitch -stats -disable-output 2>&1 | grep "1 loop-unswitch - Number of branches unswitched" | count 1
 ; PR 3170
 define i32 @a(i32 %x, i32 %y) nounwind {
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index c1fd58810660..a8608b877205 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -1,5 +1,6 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 2 loop-unswitch - Number of switches unswitched
@@ -19,7 +20,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -40,7 +41,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
-; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   call void @decf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us5
 
 ; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
@@ -89,3 +90,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index f3db47119958..686cedbbc51a 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -1,5 +1,6 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 1 loop-unswitch - Number of switches unswitched
@@ -25,7 +26,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -45,7 +46,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge
 
 define i32 @test(i32* %var) {
@@ -82,3 +83,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 270899642ffa..3ba9fc2f5cf1 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -1,5 +1,6 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
-; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info < %s | FileCheck %s
 
 ; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
 ; STATS: 3 loop-unswitch - Number of switches unswitched
@@ -30,7 +31,7 @@
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
 ; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us.us
 
 ; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
@@ -50,7 +51,7 @@
 ; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
 
 ; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us
 
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
@@ -75,7 +76,7 @@
 ; CHECK-NEXT:   ]
 
 ; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
-; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
 ; CHECK-NEXT:   br label %loop_begin.backedge.us6
 
 ; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
@@ -136,3 +137,6 @@ loop_exit:
 
 declare void @incf() noreturn
 declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index 1e6f2cf15ee1..e98d82b6522d 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
 
 define i32 @test(i32* %A, i1 %C) {
 entry:
@@ -29,3 +29,40 @@ return:		; preds = %endif, %then
 	ret i32 %tmp.13
 }
 
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK: @test2
+define i32 @test2(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+  call void @decf() noreturn nounwind noduplicate
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index 73391ca8d19d..8261e389370a 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
 ; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s
 ; PR5373
@@ -21,11 +22,11 @@
 ; CHECK-NEXT: br label %cond.end.us
 
 ; CHECK: abort0.split:
-; CHECK-NEXT: call void @end0() noreturn nounwind
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: abort1:
-; CHECK-NEXT: call void @end1() noreturn nounwind
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
 ; CHECK-NEXT: unreachable
 
 ; CHECK: }
@@ -51,3 +52,7 @@ abort1:
 
 declare void @end0() noreturn
 declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }
diff --git a/test/Transforms/LoopUnswitch/preserve-analyses.ll b/test/Transforms/LoopUnswitch/preserve-analyses.ll
index 668f8ecaf8a5..f79612bef51e 100644
--- a/test/Transforms/LoopUnswitch/preserve-analyses.ll
+++ b/test/Transforms/LoopUnswitch/preserve-analyses.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info %s -disable-output
+; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
 
 ; Loop unswitch should be able to unswitch these loops and
 ; preserve LCSSA and LoopSimplify forms.
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
new file mode 100644
index 000000000000..2dd7fe34a70b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: icmp eq <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i32 %x, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %if.end
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %1 = add nsw i64 %indvars.iv, 45
+  %2 = trunc i64 %indvars.iv to i32
+  %mul = mul nsw i32 %2, %t
+  %3 = trunc i64 %1 to i32
+  %add1 = add nsw i32 %3, %mul
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
+  store i32 %z.0, i32* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %x
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end, %entry
+  ret i32 undef
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index 0176c9a18966..aa7cc0ee325d 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce
 
 ; Check that we don't fall into an infinite loop.
 define void @test() nounwind {
@@ -25,3 +25,47 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
  unreachable
 }
+
+;PR14701
+define void @start_model_rare() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %cond.false, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  unreachable
+
+cond.false:                                       ; preds = %if.end
+  br i1 undef, label %cond.false28, label %cond.true20
+
+cond.true20:                                      ; preds = %cond.false
+  unreachable
+
+cond.false28:                                     ; preds = %cond.false
+  br label %for.body40
+
+for.body40:                                       ; preds = %for.inc50, %cond.false28
+  %indvars.iv123 = phi i64 [ 3, %cond.false28 ], [ %indvars.iv.next124, %for.inc50 ]
+  %step.0121 = phi i32 [ 1, %cond.false28 ], [ %step.1, %for.inc50 ]
+  br i1 undef, label %if.then46, label %for.inc50
+
+if.then46:                                        ; preds = %for.body40
+  %inc47 = add nsw i32 %step.0121, 1
+  br label %for.inc50
+
+for.inc50:                                        ; preds = %if.then46, %for.body40
+  %k.1 = phi i32 [ undef, %for.body40 ], [ %inc47, %if.then46 ]
+  %step.1 = phi i32 [ %step.0121, %for.body40 ], [ %inc47, %if.then46 ]
+  %indvars.iv.next124 = add i64 %indvars.iv123, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next124 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end52, label %for.body40
+
+for.end52:                                        ; preds = %for.inc50
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index 2516e248bc96..405582c40899 100644
--- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 
 
 ; Check that we don't crash.
 
diff --git a/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
new file mode 100644
index 000000000000..c8d307f5d443
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S | FileCheck %s --check-prefix=SWIFT
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+;SWIFT: @foo
+;SWIFT: load <4 x i32>
+;SWIFT: load <4 x i32>
+;SWIFT: ret
+define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
+  %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i32 %i.02
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, %sum.01
+  %5 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
new file mode 100644
index 000000000000..6a68e81bcae0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -mcpu=swift -S -dce | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; Select VF = 8;
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+  %3 = load i16* %2, align 2
+  %4 = sext i16 %3 to i32
+  %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+  store i32 %4, i32* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
new file mode 100644
index 000000000000..cb77b09ef4ad
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
new file mode 100644
index 000000000000..d2e3de279f7c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
+; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
+
+; ModuleID = 'arm.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+%T216 = type <2 x i16>
+%T232 = type <2 x i32>
+%T264 = type <2 x i64>
+
+%T416 = type <4 x i16>
+%T432 = type <4 x i32>
+%T464 = type <4 x i64>
+
+define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'direct':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmul.i32
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'ups1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = sext %T416 %v0 to %T432
+  %r2 = sext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.s16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'upu1632':
+  %v0 = load %T416* %loadaddr
+; ASM: vldr
+  %v1 = load %T416* %loadaddr2
+; ASM: vldr
+  %r1 = zext %T416 %v0 to %T432
+  %r2 = zext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
+  %r3 = mul %T432 %r1, %r2 
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.u16
+  store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'ups3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = sext %T232 %r3 to %T264
+; ASM: vmovl.s32
+; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'upu3264':
+  %v0 = load %T232* %loadaddr
+; ASM: vldr
+  %v1 = load %T232* %loadaddr2
+; ASM: vldr
+  %r3 = mul %T232 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+  %st = zext %T232 %r3 to %T264
+; ASM: vmovl.u32
+; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
+  store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+  ret void
+}
+
+define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+; COST: function 'dn3216':
+  %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+  %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+  %r3 = mul %T432 %v0, %v1 
+; ASM: vmul.i32
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+  %st = trunc %T432 %r3 to %T416
+; ASM: vmovn.i32
+; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
+  store %T416 %st, %T416* %storeaddr
+; ASM: vstr
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
new file mode 100644
index 000000000000..c0795b6a79af
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK:foo_F64
+;CHECK: <2 x double>
+;CHECK:ret
+define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+  %2 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %3 = load double* %2, align 8
+  %4 = fmul fast double %prod.01, %3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+  ret double %prod.0.lcssa
+}
+
+;CHECK:foo_I8
+;CHECK: xor <16 x i8>
+;CHECK:ret
+define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i8* %A, i64 %indvars.iv
+  %3 = load i8* %2, align 1
+  %4 = xor i8 %3, %red.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %red.0.lcssa = phi i8 [ 0, %0 ], [ %4, %.lr.ph ]
+  ret i8 %red.0.lcssa
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index a2d176a534c9..6c0366eae973 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -27,7 +27,7 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 
 
 ;CHECK: @read_mod_i64
-;CHECK: load <8 x i64>
+;CHECK: load <2 x i64>
 ;CHECK: ret i32
 define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
@@ -37,7 +37,7 @@ define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i64* %a, i64 %indvars.iv
   %3 = load i64* %2, align 4
-  %4 = mul i64 %3, 3
+  %4 = add i64 %3, 3
   store i64 %4, i64* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
new file mode 100644
index 000000000000..6c924409af37
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -0,0 +1,28 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
+
+@B = common global [1024 x i32] zeroinitializer, align 16
+@A = common global [1024 x i32] zeroinitializer, align 16
+
+; We use to not vectorize this loop because the shift was deemed to expensive.
+; Now that we differentiate shift cost base on the operand value kind, we will
+; vectorize this loop.
+; CHECK: ashr <4 x i32>
+define void @f() {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %shl = ashr i32 %0, 3
+  %arrayidx2 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  store i32 %shl, i32* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 8f1bb545fa01..760d28deaf27 100644
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @conversion_cost1
-;CHECK: store <2 x i8>
+;CHECK: store <32 x i8>
 ;CHECK: ret
 define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 3
@@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = add nsw i64 %indvars.iv, 3
-  %3 = trunc i64 %2 to i32
-  %4 = sitofp i32 %3 to float
-  %5 = getelementptr inbounds float* %B, i64 %indvars.iv
-  store float %4, float* %5, align 4
+  %add = add nsw i64 %indvars.iv, 3
+  %tofp = sitofp i64 %add to float
+  %gep = getelementptr inbounds float* %B, i64 %indvars.iv
+  store float %tofp, float* %gep, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index 628f9912c8c9..b7f479acf962 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -8,8 +8,11 @@ target triple = "x86_64-apple-macosx10.8.0"
 @d = common global [2048 x i32] zeroinitializer, align 16
 @a = common global [2048 x i32] zeroinitializer, align 16
 
+; The program below gathers and scatters data. We better not vectorize it.
 ;CHECK: cost_model_1
-;CHECK: <4 x i32>
+;CHECK-NOT: <2 x i32>
+;CHECK-NOT: <4 x i32>
+;CHECK-NOT: <8 x i32>
 ;CHECK: ret void
 define void @cost_model_1() nounwind uwtable noinline ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index 574c529834ac..d2d0eac305f5 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -9,10 +10,19 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; Select VF = 8;
 ;CHECK: @example1
-;CHECK: load <8 x i32>
-;CHECK: add nsw <8 x i32>
-;CHECK: store <8 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
 ;CHECK: ret void
+
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example1() nounwind uwtable ssp {
   br label %1
 
@@ -34,13 +44,18 @@ define void @example1() nounwind uwtable ssp {
   ret void
 }
 
-
-; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. 
+; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
 ;CHECK: @example10b
 ;CHECK: load <4 x i16>
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example10b
+;UNROLL: load <4 x i16>
+;UNROLL: load <4 x i16>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
new file mode 100644
index 000000000000..186fba87d653
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: <4 x float>
+define void @trivial_loop(float* nocapture %a) nounwind uwtable optsize {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd float %0, 1.000000e+00
+  store float %add, float* %arrayidx, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/X86/no-vector.ll b/test/Transforms/LoopVectorize/X86/no-vector.ll
new file mode 100644
index 000000000000..692eec989591
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -mtriple=i386-unknown-freebsd -mcpu=i486 -loop-vectorize < %s
+
+define i32 @PR14639(i8* nocapture %s, i32 %len) nounwind {
+entry:
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %s, i32 %i.06
+  %0 = load i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %xor = xor i32 %conv, %r.05
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, %len
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  ret i32 %r.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
new file mode 100644
index 000000000000..452d0df133db
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The parallel loop has been invalidated by the new memory accesses introduced
+; by reg2mem (Loop::isParallel() starts to return false). Ensure the loop is
+; now non-vectorizable.
+
+;CHECK-NOT: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %indvars.iv.next.reg2mem = alloca i64
+  %indvars.iv.reg2mem = alloca i64
+  %"reg2mem alloca point" = bitcast i32 0 to i32
+  store i64 0, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
+  %indvars.iv.reload = load i64* %indvars.iv.reg2mem
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next = add i64 %indvars.iv.reload, 1
+  ; A new store without the parallel metadata here:
+  store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
+  %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop.parallel !3
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
+  br label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
new file mode 100644
index 000000000000..f648722734a1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; A tricky loop:
+;
+; void loop(int *a, int *b) {
+;    for (int i = 0; i < 512; ++i) {
+;        a[a[i]] = b[i];
+;        a[i] = b[i+1];
+;    }
+;}
+
+;CHECK: @loop
+;CHECK-NOT: <4 x i32>
+define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with parallel loop metadata added to the loop branch
+; and the memory instructions.
+
+;CHECK: @parallel_loop
+;CHECK: <4 x i32>
+define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This store might have originated from inlining a function with a parallel
+  ; loop. Refers to a list with the "original loop reference" (!4) also included.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; The same loop with an illegal parallel loop metadata: the memory
+; accesses refer to a different loop's identifier.
+
+;CHECK: @mixed_metadata
+;CHECK-NOT: <4 x i32>
+
+define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %idxprom3 = sext i32 %1 to i64
+  %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
+  ; This refers to the loop marked with !7 which we are not in at the moment.
+  ; It should prevent detecting as a parallel loop.
+  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 512
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !3}
+!4 = metadata !{metadata !4}
+!5 = metadata !{metadata !3, metadata !4}
+!6 = metadata !{metadata !6}
+!7 = metadata !{metadata !7}
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
new file mode 100644
index 000000000000..f580846a0228
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -loop-vectorize -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-apple-darwin"
+
+; PR15344
+define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
+; CHECK: @test1
+; CHECK: preheader
+; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
+; CHECK: vector.memcheck
+
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp = load double* null, align 8
+  br i1 undef, label %bb3, label %bb12
+
+bb3:                                              ; preds = %bb3, %bb2
+  %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
+  %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
+  %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5
+  %tmp7 = load double* %tmp6, align 4
+  %tmp8 = add nsw i32 %tmp5, 1
+  %tmp9 = fadd fast double %tmp4, undef
+  %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5
+  store float undef, float* %tmp10, align 4
+  %tmp11 = icmp eq i32 %tmp8, %arg1
+  br i1 %tmp11, label %bb12, label %bb3
+
+bb12:                                             ; preds = %bb3, %bb2
+  %tmp13 = phi double [ %tmp, %bb2 ], [ %tmp9, %bb3 ]
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
new file mode 100644
index 000000000000..f390b33c0388
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -0,0 +1,170 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+@G = common global [32 x [1024 x i32]] zeroinitializer, align 16
+@ub = common global [1024 x i32] zeroinitializer, align 16
+@uc = common global [1024 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@fa = common global [1024 x float] zeroinitializer, align 16
+@fb = common global [1024 x float] zeroinitializer, align 16
+@ic = common global [1024 x i32] zeroinitializer, align 16
+@da = common global [1024 x float] zeroinitializer, align 16
+@db = common global [1024 x float] zeroinitializer, align 16
+@dc = common global [1024 x float] zeroinitializer, align 16
+@dd = common global [1024 x float] zeroinitializer, align 16
+@dj = common global [1024 x i32] zeroinitializer, align 16
+
+; We can optimize this test without a tail.
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+; Can't vectorize in 'optsize' mode because we need a tail.
+;CHECK: @example2
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret void
+define void @example2(i32 %n, i32 %x) optsize {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph5, label %.preheader
+
+..preheader_crit_edge:                            ; preds = %.lr.ph5
+  %phitmp = sext i32 %n to i64
+  br label %.preheader
+
+.preheader:                                       ; preds = %..preheader_crit_edge, %0
+  %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ]
+  %2 = icmp eq i32 %n, 0
+  br i1 %2, label %._crit_edge, label %.lr.ph
+
+.lr.ph5:                                          ; preds = %0, %.lr.ph5
+  %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
+  %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+  store i32 %x, i32* %3, align 4
+  %indvars.iv.next7 = add i64 %indvars.iv6, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5
+
+.lr.ph:                                           ; preds = %.preheader, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
+  %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
+  %4 = add nsw i32 %.02, -1
+  %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %6 = load i32* %5, align 4
+  %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %8 = load i32* %7, align 4
+  %9 = and i32 %8, %6
+  %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %9, i32* %10, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %11 = icmp eq i32 %4, 0
+  br i1 %11, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %.preheader
+  ret void
+}
+
+; N is unknown, we need a tail. Can't vectorize.
+;CHECK: @example3
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) optsize {
+  %1 = icmp eq i32 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %2 = add nsw i32 %.05, -1
+  %3 = getelementptr inbounds i32* %.023, i64 1
+  %4 = load i32* %.023, align 16
+  %5 = getelementptr inbounds i32* %.014, i64 1
+  store i32 %4, i32* %.014, align 16
+  %6 = icmp eq i32 %2, 0
+  br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
+
+; We can't vectorize this one because we need a runtime ptr check.
+;CHECK: @example23
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example23(i16* nocapture %src, i32* nocapture %dst) optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+  %2 = getelementptr inbounds i16* %.04, i64 1
+  %3 = load i16* %.04, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw nsw i32 %4, 7
+  %6 = getelementptr inbounds i32* %.013, i64 1
+  store i32 %5, i32* %.013, align 4
+  %7 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %7, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
+; We CAN vectorize this example because the pointers are marked as noalias.
+;CHECK: @example23b
+;CHECK: <4 x i32>
+;CHECK: ret void
+define void @example23b(i16* noalias nocapture %src, i32* noalias nocapture %dst) optsize {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+  %2 = getelementptr inbounds i16* %.04, i64 1
+  %3 = load i16* %.04, align 2
+  %4 = zext i16 %3 to i32
+  %5 = shl nuw nsw i32 %4, 7
+  %6 = getelementptr inbounds i32* %.013, i64 1
+  store i32 %5, i32* %.013, align 4
+  %7 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %7, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/X86/struct-store.ll b/test/Transforms/LoopVectorize/X86/struct-store.ll
new file mode 100644
index 000000000000..a995e43a5ab1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/struct-store.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S
+
+; Make sure we are not crashing on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@glbl = external global [16 x { i64, i64 }], align 16
+
+declare void @fn()
+
+define void @test() {
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+  %tmp = getelementptr inbounds [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
+  store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 16
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
new file mode 100644
index 000000000000..ef63a145d0c1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK-NOT: load <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %5, label %1
+
+; <label>:5                                       ; preds = %1
+  ret i32 undef
+}
+
+;CHECK: @bar
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = add nsw i32 %3, 6
+  store i32 %4, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
new file mode 100644
index 000000000000..2d7b663804f5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't unroll when we have register pressure.
+;CHECK: reg_pressure
+;CHECK: load <4 x double>
+;CHECK-NOT: load  <4 x double>
+;CHECK: store <4 x double>
+;CHECK-NOT: store <4 x double>
+;CHECK: ret
+define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+  %1 = sext i32 %n to i64
+  br label %2
+
+; <label>:2                                       ; preds = %2, %0
+  %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
+  %3 = getelementptr inbounds double* %A, i64 %indvars.iv
+  %4 = load double* %3, align 8
+  %5 = fadd double %4, 3.000000e+00
+  %6 = fmul double %4, 2.000000e+00
+  %7 = fadd double %5, %6
+  %8 = fadd double %7, 2.000000e+00
+  %9 = fmul double %8, 5.000000e-01
+  %10 = fadd double %6, %9
+  %11 = fsub double %10, %5
+  %12 = fadd double %4, %11
+  %13 = fdiv double %8, %12
+  %14 = fmul double %13, %8
+  %15 = fmul double %6, %14
+  %16 = fmul double %5, %15
+  %17 = fadd double %16, -3.000000e+00
+  %18 = fsub double %4, %5
+  %19 = fadd double %6, %18
+  %20 = fadd double %13, %19
+  %21 = fadd double %20, %17
+  %22 = fadd double %21, 3.000000e+00
+  %23 = fmul double %4, %22
+  store double %23, double* %3, align 8
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %24 = trunc i64 %indvars.iv to i32
+  %25 = icmp eq i32 %24, 0
+  br i1 %25, label %26, label %2
+
+; <label>:26                                      ; preds = %2
+  ret void
+}
+
+; This is a small loop. Unroll it twice. 
+;CHECK: small_loop
+;CHECK: xor
+;CHECK: xor
+;CHECK: ret
+define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i16* %A, i64 %i.01
+  %3 = load i16* %2, align 2
+  %4 = xor i16 %3, 3
+  store i16 %4, i16* %2, align 2
+  %5 = add i64 %i.01, 1
+  %exitcond = icmp eq i64 %5, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
new file mode 100644
index 000000000000..3b3a7875ab36
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s  -loop-vectorize -mcpu=core2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s
+; REQUIRES: asserts
+; Make sure we use the right select kind when querying select costs.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+
+; CHECK: Checking a loop in "scalarselect"
+define void @scalarselect(i1 %cond) {
+  br label %1
+
+; <label>:1
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+
+; A scalar select has a cost of 1 on core2
+; CHECK: cost of 1 for VF 2 {{.*}}  select i1 %cond, i32 %6, i32 0
+
+  %sel = select i1 %cond, i32 %6, i32 zeroinitializer
+  store i32 %sel, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8
+  ret void
+}
+
+; CHECK: Checking a loop in "vectorselect"
+define void @vectorselect(i1 %cond) {
+  br label %1
+
+; <label>:1
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %8 = icmp ult i64 %indvars.iv, 8
+
+; A vector select has a cost of 4 on core2
+; CHECK: cost of 4 for VF 2 {{.*}}  select i1 %8, i32 %6, i32 0
+
+  %sel = select i1 %8, i32 %6, i32 zeroinitializer
+  store i32 %sel, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %9, label %1
+
+; <label>:9
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
new file mode 100644
index 000000000000..59bb8d0054c5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -0,0 +1,150 @@
+; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%0 = type { %0*, %1 }
+%1 = type { i8*, i32 }
+
+@p = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q = global [2048 x i16] zeroinitializer, align 16
+@r = global [2048 x i16] zeroinitializer, align 16
+
+; Tests for widest type
+; Ensure that we count the pointer store in the first test case. We have a
+; consecutive vector of pointers store, therefore we should count it towards the
+; widest vector count.
+;
+; CHECK: test_consecutive_store
+; CHECK: The Widest type: 64 bits
+define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
+  %4 = load %0** %2, align 8
+  %5 = icmp eq %0** %0, %1
+  br i1 %5, label %12, label %6
+
+; <label>:6                                       ; preds = %3
+  br label %7
+
+; <label>:7                                       ; preds = %7, %6
+  %8 = phi %0** [ %0, %6 ], [ %9, %7 ]
+  store %0* %4, %0** %8, align 8
+  %9 = getelementptr inbounds %0** %8, i64 1
+  %10 = icmp eq %0** %9, %1
+  br i1 %10, label %11, label %7
+
+; <label>:11                                      ; preds = %7
+  br label %12
+
+; <label>:12                                      ; preds = %11, %3
+  ret void
+}
+
+; However, if the store of a set of pointers is not to consecutive memory we do
+; NOT count the store towards the widest vector type.
+; In the test case below we add i16 types to store it in an array of pointer,
+; therefore the widest type should be i16.
+; int* p[2048][8];
+; short q[2048];
+;   for (int y = 0; y < 8; ++y)
+;     for (int i = 0; i < 1024; ++i) {
+;       p[i][y] = (int*) (1 + q[i]);
+;     }
+; CHECK: test_nonconsecutive_store
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_store() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %14, %0
+  %2 = phi i64 [ 0, %0 ], [ %15, %14 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
+  %5 = getelementptr inbounds [2048 x i16]* @q, i64 0, i64 %4
+  %6 = load i16* %5, align 2
+  %7 = sext i16 %6 to i64
+  %8 = add i64 %7, 1
+  %9 = inttoptr i64 %8 to i32*
+  %10 = getelementptr inbounds [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
+  store i32* %9, i32** %10, align 8
+  %11 = add i64 %4, 1
+  %12 = trunc i64 %11 to i32
+  %13 = icmp ne i32 %12, 1024
+  br i1 %13, label %3, label %14
+
+; <label>:14                                      ; preds = %3
+  %15 = add i64 %2, 1
+  %16 = trunc i64 %15 to i32
+  %17 = icmp ne i32 %16, 8
+  br i1 %17, label %1, label %18
+
+; <label>:18                                      ; preds = %14
+  ret void
+}
+
+
+@ia = global [1024 x i32*] zeroinitializer, align 16
+@ib = global [1024 x i32] zeroinitializer, align 16
+@ic = global [1024 x i8] zeroinitializer, align 16
+@p2 = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@q2 = global [2048 x i16] zeroinitializer, align 16
+
+;; Now we check the same rules for loads. We should take consecutive loads of
+;; pointer types into account.
+; CHECK: test_consecutive_ptr_load
+; CHECK: The Widest type: 64 bits
+define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
+  %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
+  %4 = getelementptr inbounds [1024 x i32*]* @ia, i32 0, i64 %2
+  %5 = load i32** %4, align 4
+  %6 = ptrtoint i32* %5 to i64
+  %7 = trunc i64 %6 to i8
+  %8 = add i8 %3, 1
+  %9 = add i8 %7, %8
+  %10 = add i64 %2, 1
+  %11 = icmp ne i64 %10, 1024
+  br i1 %11, label %1, label %12
+
+; <label>:12                                      ; preds = %1
+  %13 = phi i8 [ %9, %1 ]
+  ret i8 %13
+}
+
+;; However, we should not take unconsecutive loads of pointers into account.
+; CHECK: test_nonconsecutive_ptr_load
+; CHECK: The Widest type: 16 bits
+define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
+  br label %1
+
+; <label>:1                                       ; preds = %13, %0
+  %2 = phi i64 [ 0, %0 ], [ %14, %13 ]
+  br label %3
+
+; <label>:3                                       ; preds = %3, %1
+  %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
+  %5 = getelementptr inbounds [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
+  %6 = getelementptr inbounds [2048 x i16]* @q2, i64 0, i64 %4
+  %7 = load i32** %5, align 2
+  %8 = ptrtoint i32* %7 to i64
+  %9 = trunc i64 %8 to i16
+  store i16 %9, i16* %6, align 8
+  %10 = add i64 %4, 1
+  %11 = trunc i64 %10 to i32
+  %12 = icmp ne i32 %11, 1024
+  br i1 %12, label %3, label %13
+
+; <label>:13                                      ; preds = %3
+  %14 = add i64 %2, 1
+  %15 = trunc i64 %14 to i32
+  %16 = icmp ne i32 %15, 8
+  br i1 %16, label %1, label %17
+
+; <label>:17                                      ; preds = %13
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
new file mode 100644
index 000000000000..431e422c2fbe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: fc
+;CHECK: load <4 x i16>
+;CHECK-NEXT: shufflevector <4 x i16>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i16>
+;CHECK: ret
+define void @fc(i16* nocapture %p, i32 %n, i32 %size) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %cond.end, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ]
+  %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1
+  %0 = load i16* %incdec.ptr, align 2, !tbaa !0
+  %conv = zext i16 %0 to i32
+  %cmp = icmp ult i32 %conv, %size
+  br i1 %cmp, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %do.body
+  %sub = sub i32 %conv, %size
+  %phitmp = trunc i32 %sub to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %do.body, %cond.true
+  %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ]
+  store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %cond.end
+  ret void
+}
+
+;CHECK: example1
+;CHECK: load <4 x i32>
+;CHECK-NEXT: shufflevector <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define void @example1(i32* nocapture %a, i32 %n, i32 %wsize) nounwind uwtable ssp {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ]
+  %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ]
+  %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1
+  %0 = load i32* %incdec.ptr, align 4, !tbaa !3
+  %cmp = icmp slt i32 %0, %wsize
+  %sub = sub nsw i32 %0, %wsize
+  %cond = select i1 %cmp, i32 0, i32 %sub
+  store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3
+  %dec = add nsw i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
new file mode 100644
index 000000000000..08c84eff5dbf
--- /dev/null
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK: hexit
+;CHECK: zext <4 x i8>
+;CHECK: ret
+
+define noalias i8* @hexit(i8* nocapture %bytes, i64 %length) nounwind uwtable ssp {
+entry:
+  %shl = shl i64 %length, 1
+  %add28 = or i64 %shl, 1
+  %call = tail call i8* @calloc(i64 1, i64 %add28) nounwind
+  %cmp29 = icmp eq i64 %shl, 0
+  br i1 %cmp29, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = shl i64 %length, 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %shr = lshr i64 %i.030, 1
+  %arrayidx = getelementptr inbounds i8* %bytes, i64 %shr
+  %1 = load i8* %arrayidx, align 1, !tbaa !0
+  %conv = zext i8 %1 to i32
+  %and = shl i64 %i.030, 2
+  %neg = and i64 %and, 4
+  %and3 = xor i64 %neg, 4
+  %sh_prom = trunc i64 %and3 to i32
+  %shl4 = shl i32 15, %sh_prom
+  %and5 = and i32 %conv, %shl4
+  %shr11 = lshr i32 %and5, %sh_prom
+  %conv13 = and i32 %shr11, 254
+  %cmp15 = icmp ugt i32 %conv13, 9
+  %cond = select i1 %cmp15, i32 87, i32 48
+  %add17 = add nsw i32 %cond, %shr11
+  %conv18 = trunc i32 %add17 to i8
+  %arrayidx19 = getelementptr inbounds i8* %call, i64 %i.030
+  store i8 %conv18, i8* %arrayidx19, align 1, !tbaa !0
+  %inc = add i64 %i.030, 1
+  %exitcond = icmp eq i64 %inc, %0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i8* %call
+}
+
+declare noalias i8* @calloc(i64, i64) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
new file mode 100644
index 000000000000..2aa29ed2c820
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; rdar://problem/12848162
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK: trunc i64
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = trunc i64 %indvars.iv to i32
+  store i32 %3, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %4, label %1
+
+; <label>:4                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index 26902eba9e29..da0fb05fe843 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,10 +1,10 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: @cpp_new_arrays
-;CHECK: insertelement <4 x i32>
+;CHECK: sext i32
 ;CHECK: load <4 x float>
 ;CHECK: fadd <4 x float>
 ;CHECK: ret i32
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
new file mode 100644
index 000000000000..a2ea9511bb22
--- /dev/null
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
+; Make sure we vectorize with debugging turned on.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [1024 x i32] zeroinitializer, align 16
+@B = global [1024 x i32] zeroinitializer, align 16
+@C = global [1024 x i32] zeroinitializer, align 16
+
+; CHECK: @test
+define i32 @test() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18
+  br label %for.body, !dbg !18
+
+for.body:
+  ;CHECK: load <4 x i32>
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19
+  %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19
+  %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21
+  %add = add nsw i32 %1, %0, !dbg !19
+  %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
+  store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18
+  %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18
+  br i1 %exitcond, label %for.body, label %for.end, !dbg !18
+
+for.end:
+  ret i32 0, !dbg !24
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test", metadata !"/path/to/somewhere", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, metadata !""}
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"test", metadata !"test", metadata !"test", metadata !4, i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
+!4 = metadata !{i32 786473, metadata !"test", metadata !"/path/to/somewhere", null}
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0}
+!10 = metadata !{i32 786443, metadata !3, i32 6, i32 0, metadata !4, i32 0}
+!11 = metadata !{metadata !12, metadata !16, metadata !17}
+!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
+!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0}
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786465, i64 0, i64 1024}
+!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
+!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} 
+!18 = metadata !{i32 6, i32 0, metadata !10, null}
+!19 = metadata !{i32 7, i32 0, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1}
+!21 = metadata !{metadata !"int", metadata !22}
+!22 = metadata !{metadata !"omnipotent char", metadata !23}
+!23 = metadata !{metadata !"Simple C/C++ TBAA"}
+!24 = metadata !{i32 9, i32 0, metadata !3, null}
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index 2f22a764572f..656912e178f9 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
new file mode 100644
index 000000000000..565684cccb9a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+;CHECK: @foo
+;CHECK: fadd <4 x float>
+;CHECK: ret
+define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %add = fadd fast float %sum.04, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret float %add
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index fce29d240487..f335557c0019 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -24,6 +25,20 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example1
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: add nsw <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example1() nounwind uwtable ssp {
   br label %1
 
@@ -48,6 +63,12 @@ define void @example1() nounwind uwtable ssp {
 ;CHECK: @example2
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example2
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph5, label %.preheader
@@ -89,10 +110,15 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   ret void
 }
 
-; We can't vectorize this loop because it has non constant loop bounds.
 ;CHECK: @example3
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example3
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: <4 x i32>
+;UNROLL: ret void
 define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = icmp eq i32 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
@@ -116,6 +142,12 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;CHECK: @example4
 ;CHECK: load <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example4
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: load <4 x i32>
+;UNROLL: ret void
 define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = add nsw i32 %n, -1
   %2 = icmp eq i32 %n, 0
@@ -176,6 +208,12 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;CHECK: @example8
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
+;UNROLL: @example8
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: store <4 x i32>
+;UNROLL: ret void
 define void @example8(i32 %x) nounwind uwtable ssp {
   br label %.preheader
 
@@ -330,7 +368,7 @@ define void @example11() nounwind uwtable ssp {
 }
 
 ;CHECK: @example12
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
 define void @example12() nounwind uwtable ssp {
@@ -391,9 +429,9 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
   ret void
 }
 
-; Can't vectorize because of reductions.
+; Can vectorize.
 ;CHECK: @example14
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
 .preheader3:
@@ -537,9 +575,9 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   ret void
 }
 
-; Can't vectorize because the src and dst pointers are not disjoint.
 ;CHECK: @example21
-;CHECK-NOT: <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
 ;CHECK: ret i32
 define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
@@ -565,9 +603,8 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   ret i32 %a.0.lcssa
 }
 
-; Can't vectorize because there are multiple PHIs.
 ;CHECK: @example23
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
   br label %1
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
new file mode 100644
index 000000000000..121da8ba7e16
--- /dev/null
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -0,0 +1,1078 @@
+; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+@Bar = common global %struct.anon.0 zeroinitializer, align 4
+
+@PB = external global i32*
+@PA = external global i32*
+
+
+;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
+
+
+; /// Different objects, positive induction, constant distance
+; int noAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias01
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx2, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with widening slide
+; int noAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[i] = Foo.B[i+10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias02
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias02(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, positive induction with shortening slide
+; int noAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias03
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias03(i32 %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add1 = add nsw i32 %4, 10
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, positive stride, run-time check added
+; int noAlias04 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+i) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias04
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias04(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %2 = load i32* %i, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 %2
+  %3 = load i32* %add.ptr, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr1 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr2 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr2, align 4
+  ret i32 %10
+}
+
+; /// Different objects, positive induction, multi-array
+; int noAlias05 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.B[N][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias05
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias05(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx3 = getelementptr inbounds [100 x i32]* %arrayidx2, i32 0, i32 %5
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx5 = getelementptr inbounds [100 x i32]* %arrayidx4, i32 0, i32 %8
+  %10 = load i32* %arrayidx5, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, multi-array, different sub-elements
+; int noAlias06 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][i] = Bar.A[N+1][i] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias06
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias06(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx1 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %1
+  %3 = load i32* %arrayidx1, align 4
+  %4 = load i32* %a.addr, align 4
+  %add2 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %6 = load i32* %N, align 4
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx4 = getelementptr inbounds [100 x i32]* %arrayidx3, i32 0, i32 %5
+  store i32 %add2, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %8
+  %10 = load i32* %arrayidx6, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, constant distance
+; int noAlias07 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias07
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias07(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, shortening slide
+; int noAlias08 (int a) {
+;   int i;
+;   for (i=0; i<SIZE-10; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias08
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias08(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 90
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Different objects, negative induction, widening slide
+; int noAlias09 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias09
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias09(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 10
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, negative stride, run-time check added
+; int noAlias10 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+SIZE-i-1) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @noAlias10
+; CHECK-NOT: sub nsw <4 x i32>
+; CHECK: ret
+;
+; TODO: This test vectorizes (with run-time check) on real targets with -O3)
+; Check why it's not being vectorized even when forcing vectorization
+
+define i32 @noAlias10(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 100
+  %6 = load i32* %i, align 4
+  %idx.neg4 = sub i32 0, %6
+  %add.ptr5 = getelementptr inbounds i32* %add.ptr3, i32 %idx.neg4
+  %add.ptr6 = getelementptr inbounds i32* %add.ptr5, i32 -1
+  store i32 %add, i32* %add.ptr6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr7 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr7, align 4
+  ret i32 %10
+}
+
+; /// Different objects, negative induction, multi-array
+; int noAlias11 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.B[N][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias11
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias11(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub3 = sub nsw i32 100, %5
+  %sub4 = sub nsw i32 %sub3, 1
+  %6 = load i32* %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx6 = getelementptr inbounds [100 x i32]* %arrayidx5, i32 0, i32 %sub4
+  store i32 %add, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx8 = getelementptr inbounds [100 x i32]* %arrayidx7, i32 0, i32 %8
+  %10 = load i32* %arrayidx8, align 4
+  ret i32 %10
+}
+
+; /// Same objects, negative induction, multi-array, different sub-elements
+; int noAlias12 (int a) {
+;   int i, N=10;
+;   for (i=0; i<SIZE; i++)
+;     Bar.A[N][SIZE-i-1] = Bar.A[N+1][SIZE-i-1] + a;
+;   return Bar.A[N][a];
+; }
+; CHECK: define i32 @noAlias12
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias12(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %N = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 10, i32* %N, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %2 = load i32* %N, align 4
+  %add = add nsw i32 %2, 1
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
+  %arrayidx2 = getelementptr inbounds [100 x i32]* %arrayidx, i32 0, i32 %sub1
+  %3 = load i32* %arrayidx2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add3 = add nsw i32 %3, %4
+  %5 = load i32* %i, align 4
+  %sub4 = sub nsw i32 100, %5
+  %sub5 = sub nsw i32 %sub4, 1
+  %6 = load i32* %N, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
+  %arrayidx7 = getelementptr inbounds [100 x i32]* %arrayidx6, i32 0, i32 %sub5
+  store i32 %add3, i32* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32* %a.addr, align 4
+  %9 = load i32* %N, align 4
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
+  %arrayidx9 = getelementptr inbounds [100 x i32]* %arrayidx8, i32 0, i32 %8
+  %10 = load i32* %arrayidx9, align 4
+  ret i32 %10
+}
+
+; /// Same objects, positive induction, constant distance, just enough for vector size
+; int noAlias13 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.A[i+4] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias13
+; CHECK: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias13(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %add = add nsw i32 %1, 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add1 = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add1, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Same objects, negative induction, constant distance, just enough for vector size
+; int noAlias14 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.A[SIZE-i-5] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @noAlias14
+; CHECK: sub nsw <4 x i32>
+; CHECK: ret
+
+define i32 @noAlias14(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 5
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub2 = sub nsw i32 100, %4
+  %sub3 = sub nsw i32 %sub2, 1
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx5, align 4
+  ret i32 %7
+}
+
+
+;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
+
+
+; /// Different objects, swapped induction, alias at the end
+; int mayAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Different objects, swapped induction, alias at the beginning
+; int mayAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[SIZE-i-1] = Foo.B[i] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mayAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %4
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; /// Pointer access, run-time check added
+; int mayAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     *(PA+i) = *(PB+SIZE-i-1) + a;
+;   return *(PA+a);
+; }
+; CHECK: define i32 @mayAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mayAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32** @PB, align 4
+  %add.ptr = getelementptr inbounds i32* %1, i32 100
+  %2 = load i32* %i, align 4
+  %idx.neg = sub i32 0, %2
+  %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
+  %3 = load i32* %add.ptr2, align 4
+  %4 = load i32* %a.addr, align 4
+  %add = add nsw i32 %3, %4
+  %5 = load i32** @PA, align 4
+  %6 = load i32* %i, align 4
+  %add.ptr3 = getelementptr inbounds i32* %5, i32 %6
+  store i32 %add, i32* %add.ptr3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32* %i, align 4
+  %inc = add nsw i32 %7, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32** @PA, align 4
+  %9 = load i32* %a.addr, align 4
+  %add.ptr4 = getelementptr inbounds i32* %8, i32 %9
+  %10 = load i32* %add.ptr4, align 4
+  ret i32 %10
+}
+
+
+;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) ===
+
+
+; int mustAlias01 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias01
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias01(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 1
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
+
+; int mustAlias02 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias02
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias02(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx3, align 4
+  ret i32 %7
+}
+
+; int mustAlias03 (int a) {
+;   int i;
+;   for (i=0; i<SIZE; i++)
+;     Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
+;   return Foo.A[a];
+; }
+; CHECK: define i32 @mustAlias03
+; CHECK-NOT: add nsw <4 x i32>
+; CHECK: ret
+
+define i32 @mustAlias03(i32 %a) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %sub = sub nsw i32 100, %1
+  %sub1 = sub nsw i32 %sub, 10
+  %arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32* %arrayidx, align 4
+  %3 = load i32* %a.addr, align 4
+  %add = add nsw i32 %2, %3
+  %4 = load i32* %i, align 4
+  %add2 = add nsw i32 %4, 10
+  %arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32* %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
+  %7 = load i32* %arrayidx4, align 4
+  ret i32 %7
+}
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
new file mode 100644
index 000000000000..7759b7085a1b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global i8 0, align 1
+@b = common global i8 0, align 1
+
+define void @f() nounwind uwtable ssp {
+scalar.ph:
+  store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0
+  %0 = load i8* @a, align 1, !tbaa !0
+  br label %for.body
+
+for.body:
+  %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ]              ; <------- i8 induction var.
+  %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ]
+  %conv2 = sext i8 %c.015 to i32
+  %tobool = icmp ne i8 %c.015, 0
+  %.sink = select i1 %tobool, i8 %c.015, i8 %0
+  %mul = mul i8 %mul16, %.sink
+  %add = add nsw i32 %conv2, 1
+  %conv8 = trunc i32 %add to i8
+  %sext = shl i32 %add, 24
+  %phitmp14 = icmp slt i32 %sext, 268435456
+  br i1 %phitmp14, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  store i8 %mul, i8* @b, align 1, !tbaa !0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+
diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll
new file mode 100644
index 000000000000..3283456aa3c3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define fastcc void @DD_dump() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %lor.lhs.false, label %if.end25
+
+lor.lhs.false:                                    ; preds = %entry
+  br i1 undef, label %if.end21, label %if.else
+
+if.else:                                          ; preds = %lor.lhs.false
+  br i1 undef, label %num_q.exit, label %while.body.i.preheader
+
+while.body.i.preheader:                           ; preds = %if.else
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %if.end.i, %while.body.i.preheader
+  switch i8 undef, label %if.end.i [
+    i8 39, label %if.then.i
+    i8 92, label %if.then.i
+  ]
+
+if.then.i:                                        ; preds = %while.body.i, %while.body.i
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.then.i, %while.body.i
+  br i1 undef, label %num_q.exit, label %while.body.i
+
+num_q.exit:                                       ; preds = %if.end.i, %if.else
+  unreachable
+
+if.end21:                                         ; preds = %lor.lhs.false
+  unreachable
+
+if.end25:                                         ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
new file mode 100644
index 000000000000..3a2d82e15d63
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK: @reduction_func
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 30
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %add = add i32 %sum.011, 2
+  %add4 = add i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ 4, %for.inc ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
new file mode 100644
index 000000000000..6e7c03a556c4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is the loop in this example:
+;
+;int function0(int *a, int *b, int start, int end) {
+;
+;  for (int i=start; i<end; ++i) {
+;    unsigned k = a[i];
+;
+;    if (a[i] > b[i])   <------ notice the IF inside the loop.
+;      k = k * 5 + 3;
+;
+;    a[i] = k;  <---- K is a phi node that becomes vector-select.
+;  }
+;}
+
+;CHECK: @function0
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  %cmp16 = icmp slt i32 %start, %end
+  br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = sext i32 %start to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx4, align 4
+  %cmp5 = icmp sgt i32 %1, %2
+  br i1 %cmp5, label %if.then, label %if.end
+
+if.then:
+  %mul = mul i32 %1, 5
+  %add = add i32 %mul, 3
+  br label %if.end
+
+if.end:
+  %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
+  store i32 %k.0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %3, %end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 undef
+}
+
+
+
+; int func(int *A, int n) {
+;   unsigned sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     if (A[i] > 30)
+;       sum += A[i] + 2;
+;
+;   return sum;
+; }
+
+;CHECK: @reduction_func
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 30
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %add = add i32 %sum.011, 2
+  %add4 = add i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index 71ea7689fc04..3fa6b19ca928 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index b31bceb50df6..96595cdc16bc 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @array = common global [1024 x i32] zeroinitializer, align 16
 
 ;CHECK: @array_at_plus_one
-;CHECK: add <4 x i64>
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
 ;CHECK: add i64 %index, 12
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
new file mode 100644
index 000000000000..e79d78de67c5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -0,0 +1,935 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: @sqrt_f32
+;CHECK: llvm.sqrt.v4f32
+;CHECK: ret void
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+;CHECK: @sqrt_f64
+;CHECK: llvm.sqrt.v4f64
+;CHECK: ret void
+define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+;CHECK: @sin_f32
+;CHECK: llvm.sin.v4f32
+;CHECK: ret void
+define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.sin.f32(float) nounwind readnone
+
+;CHECK: @sin_f64
+;CHECK: llvm.sin.v4f64
+;CHECK: ret void
+define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.sin.f64(double) nounwind readnone
+
+;CHECK: @cos_f32
+;CHECK: llvm.cos.v4f32
+;CHECK: ret void
+define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.cos.f32(float) nounwind readnone
+
+;CHECK: @cos_f64
+;CHECK: llvm.cos.v4f64
+;CHECK: ret void
+define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.cos.f64(double) nounwind readnone
+
+;CHECK: @exp_f32
+;CHECK: llvm.exp.v4f32
+;CHECK: ret void
+define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.exp.f32(float) nounwind readnone
+
+;CHECK: @exp_f64
+;CHECK: llvm.exp.v4f64
+;CHECK: ret void
+define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.exp.f64(double) nounwind readnone
+
+;CHECK: @exp2_f32
+;CHECK: llvm.exp2.v4f32
+;CHECK: ret void
+define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.exp2.f32(float) nounwind readnone
+
+;CHECK: @exp2_f64
+;CHECK: llvm.exp2.v4f64
+;CHECK: ret void
+define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.exp2.f64(double) nounwind readnone
+
+;CHECK: @log_f32
+;CHECK: llvm.log.v4f32
+;CHECK: ret void
+define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log.f32(float) nounwind readnone
+
+;CHECK: @log_f64
+;CHECK: llvm.log.v4f64
+;CHECK: ret void
+define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log.f64(double) nounwind readnone
+
+;CHECK: @log10_f32
+;CHECK: llvm.log10.v4f32
+;CHECK: ret void
+define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log10.f32(float) nounwind readnone
+
+;CHECK: @log10_f64
+;CHECK: llvm.log10.v4f64
+;CHECK: ret void
+define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log10.f64(double) nounwind readnone
+
+;CHECK: @log2_f32
+;CHECK: llvm.log2.v4f32
+;CHECK: ret void
+define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log2.f32(float) nounwind readnone
+
+;CHECK: @log2_f64
+;CHECK: llvm.log2.v4f64
+;CHECK: ret void
+define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log2.f64(double) nounwind readnone
+
+;CHECK: @fabs_f32
+;CHECK: llvm.fabs.v4f32
+;CHECK: ret void
+define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.fabs(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fabs(double) nounwind readnone
+
+;CHECK: @floor_f32
+;CHECK: llvm.floor.v4f32
+;CHECK: ret void
+define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.floor.f32(float) nounwind readnone
+
+;CHECK: @floor_f64
+;CHECK: llvm.floor.v4f64
+;CHECK: ret void
+define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.floor.f64(double) nounwind readnone
+
+;CHECK: @ceil_f32
+;CHECK: llvm.ceil.v4f32
+;CHECK: ret void
+define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.ceil.f32(float) nounwind readnone
+
+;CHECK: @ceil_f64
+;CHECK: llvm.ceil.v4f64
+;CHECK: ret void
+define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.ceil.f64(double) nounwind readnone
+
+;CHECK: @trunc_f32
+;CHECK: llvm.trunc.v4f32
+;CHECK: ret void
+define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.trunc.f32(float) nounwind readnone
+
+;CHECK: @trunc_f64
+;CHECK: llvm.trunc.v4f64
+;CHECK: ret void
+define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.trunc.f64(double) nounwind readnone
+
+;CHECK: @rint_f32
+;CHECK: llvm.rint.v4f32
+;CHECK: ret void
+define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.rint.f32(float) nounwind readnone
+
+;CHECK: @rint_f64
+;CHECK: llvm.rint.v4f64
+;CHECK: ret void
+define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.rint.f64(double) nounwind readnone
+
+;CHECK: @nearbyint_f32
+;CHECK: llvm.nearbyint.v4f32
+;CHECK: ret void
+define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+;CHECK: @nearbyint_f64
+;CHECK: llvm.nearbyint.v4f64
+;CHECK: ret void
+define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.nearbyint.f64(double) nounwind readnone
+
+;CHECK: @fma_f32
+;CHECK: llvm.fma.v4f32
+;CHECK: ret void
+define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
+  %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+
+;CHECK: @fma_f64
+;CHECK: llvm.fma.v4f64
+;CHECK: ret void
+define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
+  %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+
+;CHECK: @fmuladd_f32
+;CHECK: llvm.fmuladd.v4f32
+;CHECK: ret void
+define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
+  %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
+
+;CHECK: @fmuladd_f64
+;CHECK: llvm.fmuladd.v4f64
+;CHECK: ret void
+define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
+  %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
+
+;CHECK: @pow_f32
+;CHECK: llvm.pow.v4f32
+;CHECK: ret void
+define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.pow.f32(float, float) nounwind readnone
+
+;CHECK: @pow_f64
+;CHECK: llvm.pow.v4f64
+;CHECK: ret void
+define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: fabs_libm
+; CHECK:  call <4 x float> @llvm.fabs.v4f32
+; CHECK: ret void
+define void @fabs_libm(float* nocapture %x) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %call = tail call float @fabsf(float %0) nounwind readnone
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare float @fabsf(float) nounwind readnone
+
+declare double @llvm.pow.f64(double, double) nounwind readnone
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll
new file mode 100644
index 000000000000..06b3b08aa0e3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lcssa-crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%type1 = type { %type2 }
+%type2 = type { [0 x i8*], i8**, i32, i32, i32 }
+
+define void @test() nounwind uwtable align 2 {
+  br label %for.body.lr.ph.i.i.i
+
+for.body.lr.ph.i.i.i:
+  br label %for.body.i.i.i
+
+for.body.i.i.i:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ]
+  br label %for.inc.i.i.i
+
+for.inc.i.i.i:
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, undef
+  br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.end.i.i.i:
+  %lcssa = phi %type1* [ undef, %for.inc.i.i.i ]
+  unreachable
+}
+
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
new file mode 100644
index 000000000000..45aa8c7cd9be
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; int __attribute__((noinline)) sum_array(int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @sum_array
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+  %1 = sext i32 %n to i64
+  %2 = getelementptr inbounds i32* %A, i64 %1
+  %3 = icmp eq i32 %n, 0
+  br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i:                                         ; preds = %0, %.lr.ph.i
+  %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+  %4 = load i32* %.03.i, align 4
+  %5 = add nsw i32 %4, %.012.i
+  %6 = getelementptr inbounds i32* %.03.i, i64 1
+  %7 = icmp eq i32* %6, %2
+  br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
+  %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+  ret i32 %.01.lcssa.i
+}
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
new file mode 100644
index 000000000000..de23bf02b63a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK-NOT: store <4 x i32>
+;CHECK: ret void
+define void @example12() noimplicitfloat { ;           <--------- "noimplicitfloat" attribute here!
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = trunc i64 %indvars.iv to i32
+  store i32 %3, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %4, label %1
+
+; <label>:4                                       ; preds = %1
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 1a6c15ed96c4..8262a18f1807 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll
new file mode 100644
index 000000000000..e5fad14d0dda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @test() {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  %it.sroa.0.091 = phi i32* [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
+  %incdec.ptr.i = getelementptr inbounds i32* %it.sroa.0.091, i64 1
+  %inc32 = add i32 undef, 1                                        ; <------------- Make sure we don't set NSW flags to the undef.
+  %cmp.i11 = icmp eq i32* %incdec.ptr.i, undef
+  br i1 %cmp.i11, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+
diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll
new file mode 100644
index 000000000000..b80d45995dc3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/phi-hang.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -loop-vectorize < %s
+
+; PR15384
+define void @test1(i32 %arg) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb5, %bb
+  %tmp = phi i32 [ 1, %bb ], [ %tmp7, %bb5 ]
+  %tmp2 = phi i32 [ %arg, %bb ], [ %tmp9, %bb5 ]
+  br i1 true, label %bb5, label %bb3
+
+bb3:                                              ; preds = %bb1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb1
+  %tmp6 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
+  %tmp7 = phi i32 [ 0, %bb4 ], [ %tmp6, %bb1 ]
+  %tmp8 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
+  %tmp9 = add nsw i32 %tmp2, 1
+  %tmp10 = icmp eq i32 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb1
+
+bb11:                                             ; preds = %bb5
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
new file mode 100644
index 000000000000..25599f8f4c3c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+@B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+
+;CHECK:_Z5test1v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test1v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
+  %2 = load i32* %b.01, align 4
+  %3 = shl nsw i32 %2, 1
+  store i32 %3, i32* %p.02, align 4
+  %4 = getelementptr inbounds i32* %p.02, i64 -1
+  %5 = getelementptr inbounds i32* %b.01, i64 1
+  %6 = icmp eq i32* %4, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %6, label %7, label %1
+
+; <label>:7                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test2v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test2v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test3v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test3v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32* %b.01, i64 1
+  %5 = icmp eq i32* %3, getelementptr ([36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index b4d1bac132f0..bfaa6d452bce 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index c1848b35fc6e..08b7b27e4257 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -7,6 +7,11 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -37,6 +42,11 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -67,6 +77,11 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
 ;CHECK: phi <4 x i32>
 ;CHECK: load <4 x i32>
 ;CHECK: mul nsw <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -95,6 +110,11 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 
 ;CHECK: @reduction_mul
 ;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
@@ -124,6 +144,11 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: @start_at_non_zero
 ;CHECK: phi <4 x i32>
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
 entry:
@@ -152,6 +177,11 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: @reduction_and
 ;CHECK: and <4 x i32>
 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -179,6 +209,11 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK: @reduction_or
 ;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -206,6 +241,11 @@ for.end:                                          ; preds = %for.body, %entry
 
 ;CHECK: @reduction_xor
 ;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
 ;CHECK: ret i32
 define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
 entry:
@@ -230,3 +270,56 @@ for.end:                                          ; preds = %for.body, %entry
   %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
   ret i32 %result.0.lcssa
 }
+
+; In this code the subtracted variable is on the RHS and this is not an induction variable.
+;CHECK: @reduction_sub_rhs
+;CHECK-NOT: phi <4 x i32>
+;CHECK-NOT: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %sub = sub nsw i32 %0, %x.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
+
+
+; In this test the reduction variable is on the LHS and we can vectorize it.
+;CHECK: @reduction_sub_lhs
+;CHECK: phi <4 x i32>
+;CHECK: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %sub = sub nsw i32 %x.05, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 23933cf7c7db..86098a6e7db2 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -9,6 +9,10 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     a[i] = b[i] * 3;
 ; }
 
+;CHECK: for.body.preheader:
+;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
+;CHECK: vector.memcheck:
+;CHECK: br i1 %found.conflict, label %middle.block, label %vector.ph
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
new file mode 100644
index 000000000000..15738936457a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
+; access both x[k] and x[k-1].
+;
+; void kernel11(double *x, double *y, int n) {
+;   for ( int k=1 ; k<n ; k++ )
+;     x[k] = x[k-1] + y[k];
+; }
+
+; CHECK: @kernel11
+; CHECK-NOT: <4 x double>
+; CHECK: ret
+define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
+  %1 = alloca double*, align 8
+  %2 = alloca double*, align 8
+  %3 = alloca i32, align 4
+  %k = alloca i32, align 4
+  store double* %x, double** %1, align 8
+  store double* %y, double** %2, align 8
+  store i32 %n, i32* %3, align 4
+  store i32 1, i32* %k, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %25, %0
+  %5 = load i32* %k, align 4
+  %6 = load i32* %3, align 4
+  %7 = icmp slt i32 %5, %6
+  br i1 %7, label %8, label %28
+
+; <label>:8                                       ; preds = %4
+  %9 = load i32* %k, align 4
+  %10 = sub nsw i32 %9, 1
+  %11 = sext i32 %10 to i64
+  %12 = load double** %1, align 8
+  %13 = getelementptr inbounds double* %12, i64 %11
+  %14 = load double* %13, align 8
+  %15 = load i32* %k, align 4
+  %16 = sext i32 %15 to i64
+  %17 = load double** %2, align 8
+  %18 = getelementptr inbounds double* %17, i64 %16
+  %19 = load double* %18, align 8
+  %20 = fadd double %14, %19
+  %21 = load i32* %k, align 4
+  %22 = sext i32 %21 to i64
+  %23 = load double** %1, align 8
+  %24 = getelementptr inbounds double* %23, i64 %22
+  store double %20, double* %24, align 8
+  br label %25
+
+; <label>:25                                      ; preds = %8
+  %26 = load i32* %k, align 4
+  %27 = add nsw i32 %26, 1
+  store i32 %27, i32* %k, align 4
+  br label %4
+
+; <label>:28                                      ; preds = %4
+  ret i32 0
+}
+
+
+
+; We don't vectorize this function because A[i*7] is scalarized, and the
+; different scalars can in theory wrap around and overwrite other scalar
+; elements. At the moment we only allow read/write access to arrays
+; that are consecutive.
+; 
+; void foo(int *a) {
+;   for (int i=0; i<256; ++i) {
+;     int x = a[i*7];
+;     if (x>3)
+;       x = x*x+x*4;
+;     a[i*7] = x+3;
+;   }
+; }
+
+; CHECK: @func2
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %7, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
+  %2 = mul nsw i64 %indvars.iv, 7
+  %3 = getelementptr inbounds i32* %a, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = icmp sgt i32 %4, 3
+  br i1 %5, label %6, label %7
+
+; <label>:6                                       ; preds = %1
+  %tmp = add i32 %4, 4
+  %tmp1 = mul i32 %tmp, %4
+  br label %7
+
+; <label>:7                                       ; preds = %6, %1
+  %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
+  %8 = add nsw i32 %x.0, 3
+  store i32 %8, i32* %3, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %9, label %1
+
+; <label>:9                                       ; preds = %7
+  ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index e537bde31bb0..7a14d247c9b4 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
new file mode 100644
index 000000000000..7e2dd5fc0fcf
--- /dev/null
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK: @inc
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index 4a6e4b231dfe..fa83dba3d367 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index 5aa3bc034d0b..998001c3187b 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
new file mode 100644
index 000000000000..de65d0d14870
--- /dev/null
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.coordinate = type { i32, i32 }
+
+; Make sure that we don't generate a wide load when accessing the struct.
+; struct coordinate {
+;  int x;
+;  int y;
+; };
+;
+;
+; int foo(struct coordinate *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK: @foo
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0
+  %0 = load i32* %x, align 4, !tbaa !0
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
new file mode 100644
index 000000000000..ac1694802a32
--- /dev/null
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;
+; We want to make sure that we are vectorizeing the scalar loop only once
+; even if the pass manager runs the vectorizer multiple times due to inlining.
+
+
+; This test checks that we add metadata to vectorized loops
+; CHECK: _Z4foo1Pii
+; CHECK: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+
+; This test comes from the loop:
+;
+;int foo (int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+;}
+define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata.
+; CHECK: _Z4foo2Pii
+; CHECK-NOT: <4 x i32>
+; CHECK: llvm.vectorizer.already_vectorized
+; CHECK: ret
+define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{}
+
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index eb027604134f..54cbe8df46b0 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 2f1ccb493da8..c0eaaa40154b 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -8,8 +8,10 @@ entry:
   %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8
-; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !0)
-; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata !9)
+; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata ![[IVAR:[0-9]*]])
+; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata ![[JVAR:[0-9]*]])
+; CHECK: ![[IVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [i]
+; CHECK: ![[JVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [j]
   store i32 %i, i32* %i_addr
   call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8
   store double %j, double* %j_addr
@@ -30,16 +32,18 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!llvm.dbg.cu = !{!3}
+
+!0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 2, i32 0, metadata !1, null}
-!9 = metadata !{i32 524545, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
-
+!11 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 4cb621f61ca2..f6119f8bbd85 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -30,23 +30,24 @@ return:                                           ; preds = %entry
   ret void, !dbg !19
 }
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !"bar.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !7 = metadata !{i32 8, i32 0, metadata !1, null}
 !8 = metadata !{i32 9, i32 0, metadata !1, null}
-!9 = metadata !{i32 524545, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{null, metadata !6, metadata !13, metadata !14}
-!13 = metadata !{i32 524324, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 786468, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
 !15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
-!16 = metadata !{i32 524545, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 524545, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 786689, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ]
 !18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
 !19 = metadata !{i32 10, i32 0, metadata !1, null}
+!20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 3fa16288c219..582a57b5d39c 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -70,20 +70,20 @@ define void @test4(i8 *%P) {
   %A = alloca %1
   %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a)
+  call void @test4a(i8* align 1 byval %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
 }
 
-declare void @test4a(i8* byval align 1)
+declare void @test4a(i8* align 1 byval)
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 %struct.S = type { i128, [4 x i8]}
 
 @sS = external global %struct.S, align 16
 
-declare void @test5a(%struct.S* byval align 16) nounwind ssp
+declare void @test5a(%struct.S* align 16 byval) nounwind ssp
 
 
 ; rdar://8713376 - This memcpy can't be eliminated.
@@ -94,7 +94,7 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
   %a = getelementptr %struct.S* %y, i64 0, i32 1, i64 0
   store i8 4, i8* %a
-  call void @test5a(%struct.S* byval align 16 %y)
+  call void @test5a(%struct.S* align 16 byval %y)
   ret i32 0
   ; CHECK: @test5(
   ; CHECK: store i8 4
@@ -114,19 +114,19 @@ define void @test6(i8 *%P) {
 ; isn't itself 8 byte aligned.
 %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 
-define i32 @test7(%struct.p* nocapture byval align 8 %q) nounwind ssp {
+define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp {
 entry:
   %agg.tmp = alloca %struct.p, align 4
   %tmp = bitcast %struct.p* %agg.tmp to i8*
   %tmp1 = bitcast %struct.p* %q to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
-  %call = call i32 @g(%struct.p* byval align 8 %agg.tmp) nounwind
+  %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
   ret i32 %call
 ; CHECK: @test7
-; CHECK: call i32 @g(%struct.p* byval align 8 %q) nounwind
+; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[NUW:#[0-9]+]]
 }
 
-declare i32 @g(%struct.p* byval align 8)
+declare i32 @g(%struct.p* align 8 byval)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
@@ -152,7 +152,7 @@ declare noalias i8* @malloc(i32)
 ; rdar://11341081
 %struct.big = type { [50 x i32] }
 
-define void @test9() nounwind uwtable ssp {
+define void @test9() nounwind ssp uwtable {
 entry:
 ; CHECK: test9
 ; CHECK: f1
@@ -170,3 +170,7 @@ entry:
 
 declare void @f1(%struct.big* sret)
 declare void @f2(%struct.big*)
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
+; CHECK: attributes #2 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
index e3e52b401af5..19cd6a5171da 100644
--- a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
+++ b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mergefunc %s -disable-output
+; RUN: opt -mergefunc -disable-output < %s
 ; This used to crash.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
diff --git a/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
new file mode 100644
index 000000000000..3f6a5ba157de
--- /dev/null
+++ b/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
@@ -0,0 +1,36 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to trigger a ConstantExpr::getBitCast assertion.
+
+define void @t1() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb12 [
+    i32 127, label %sw.bb
+    i32 126, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  unreachable
+
+sw.bb12:                                          ; preds = %entry
+  ret void
+}
+
+define void @t2() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb8 [
+    i32 4, label %sw.bb
+    i32 3, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  ret void
+
+sw.bb8:                                           ; preds = %entry
+  unreachable
+}
diff --git a/test/Transforms/MergeFunc/phi-speculation1.ll b/test/Transforms/MergeFunc/phi-speculation1.ll
index fd0baffb3108..548e5102be10 100644
--- a/test/Transforms/MergeFunc/phi-speculation1.ll
+++ b/test/Transforms/MergeFunc/phi-speculation1.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | not grep "functions merged"
 
 define i32 @foo1(i32 %x) {
diff --git a/test/Transforms/MergeFunc/phi-speculation2.ll b/test/Transforms/MergeFunc/phi-speculation2.ll
index eec8b5c5a90a..d42a465d0c65 100644
--- a/test/Transforms/MergeFunc/phi-speculation2.ll
+++ b/test/Transforms/MergeFunc/phi-speculation2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | grep "functions merged"
 
 define i32 @foo1(i32 %x) {
diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/vector.ll
index 4af079f8cdf7..dba5fa349aba 100644
--- a/test/Transforms/MergeFunc/vector.ll
+++ b/test/Transforms/MergeFunc/vector.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -mergefunc -stats -disable-output < %s 2>&1 | grep "functions merged"
 
 ; This test is checks whether we can merge
diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll
index dc64a0858ba8..22747224a193 100644
--- a/test/Transforms/MergeFunc/vectors-and-arrays.ll
+++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged
 ; This used to crash with an assert.
 
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index ad41bcf50f19..4020e1045081 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -metarenamer -S | FileCheck %s
+; RUN: opt -metarenamer -S < %s | FileCheck %s
 
 ; CHECK: target triple {{.*}}
 ; CHECK-NOT: {{^x*}}xxx{{^x*}}
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
index 8c7b5b1e654f..4541b3f2fdf3 100644
--- a/test/Transforms/ObjCARC/apelim.ll
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -38,8 +38,8 @@ entry:
 }
 
 ; CHECK: define internal void @_GLOBAL__I_y()
-; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
-; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() [[NUW:#[0-9]+]]
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) [[NUW]]
 ; CHECK: }
 define internal void @_GLOBAL__I_y() {
 entry:
@@ -51,3 +51,5 @@ entry:
 
 declare i8* @objc_autoreleasePoolPush()
 declare void @objc_autoreleasePoolPop(i8*)
+
+; CHECK: attributes #0 = { nounwind }
diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll
new file mode 100644
index 000000000000..4c56b4a3def9
--- /dev/null
+++ b/test/Transforms/ObjCARC/arc-annotations.ll
@@ -0,0 +1,307 @@
+; This file consists of various tests which ensure that the objc-arc-annotations
+; are working correctly. In the future, I will use this in other lit tests to
+; check the data flow analysis of ARC.
+
+; REQUIRES: asserts
+; RUN: opt -S -objc-arc -enable-objc-arc-annotations < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare void @objc_autoreleasePoolPop(i8*)
+declare i8* @objc_autoreleasePoolPush()
+declare i8* @objc_retainBlock(i8*)
+
+declare i8* @objc_retainedObject(i8*)
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_unretainedPointer(i8*)
+
+declare void @use_pointer(i8*)
+declare void @callee()
+declare void @callee_fnptr(void ()*)
+declare void @invokee()
+declare i8* @returner()
+
+; Simple retain+release pair deletion, with some intervening control
+; flow and harmless instructions.
+
+; CHECK: define void @test0(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !0, !llvm.arc.annotation.topdown !1
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !2
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store i32 7, i32* %x, !llvm.arc.annotation.bottomup !2
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !3, !llvm.arc.annotation.topdown !4
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test0(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the release isn't always executed when the retain is,
+; so the optimization is not safe.
+
+; TODO: Make the objc_release's argument be %0.
+
+; CHECK: define void @test1(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !5, !llvm.arc.annotation.topdown !6
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !7
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @callee(), !llvm.arc.annotation.topdown !8
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !9
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: alt_return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test1(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  br i1 %q, label %return, label %alt_return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+
+alt_return:
+  ret void
+}
+
+; Don't do partial elimination into two different CFG diamonds.
+
+; CHECK: define void @test1b(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %x) #0, !llvm.arc.annotation.bottomup !10, !llvm.arc.annotation.topdown !11
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: if.then:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   tail call void @callee(), !llvm.arc.annotation.bottomup !12, !llvm.arc.annotation.topdown !13
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK: if.end:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK: if.then3:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   tail call void @use_pointer(i8* %x), !llvm.arc.annotation.bottomup !14, !llvm.arc.annotation.topdown !15
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_MovableRelease)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use)
+; CHECK: if.end5:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_MovableRelease)
+; CHECK:   tail call void @objc_release(i8* %x) #0, !clang.imprecise_release !16, !llvm.arc.annotation.bottomup !17
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test1b(i8* %x, i1 %p, i1 %q) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  br i1 %p, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @callee()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br i1 %q, label %if.then3, label %if.end5
+
+if.then3:                                         ; preds = %if.end
+  tail call void @use_pointer(i8* %x)
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Like test0 but the pointer is passed to an intervening call,
+; so the optimization is not safe.
+
+; CHECK: define void @test2(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   %e = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !18, !llvm.arc.annotation.topdown !19
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: t:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !20
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: f:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease)
+; CHECK:   call void @use_pointer(i8* %e), !llvm.arc.annotation.bottomup !21, !llvm.arc.annotation.topdown !22
+; CHECK:   store float 3.000000e+00, float* %d, !llvm.arc.annotation.bottomup !20, !llvm.arc.annotation.topdown !23
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Use)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !24, !llvm.arc.annotation.topdown !25
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test2(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %e = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @use_pointer(i8* %e)
+  %d = bitcast i32* %x to float*
+  store float 3.0, float* %d
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the release is in a loop,
+; so the optimization is not safe.
+
+; TODO: For now, assume this can't happen.
+
+; CHECK: define void @test3(
+; CHECK: entry:
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
+; CHECK:   tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !26, !llvm.arc.annotation.topdown !27
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
+; CHECK: loop:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
+; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !28, !llvm.arc.annotation.topdown !29
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: return:
+; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
+; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
+; CHECK: }
+define void @test3(i32* %x, i1* %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  %j = load volatile i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  ret void
+}
+
+!0 = metadata !{}
+
+; CHECK: !0 = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"}
+; CHECK: !1 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !2 = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: !3 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !4 = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"}
+; CHECK: !5 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_None"}
+; CHECK: !6 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !7 = metadata !{metadata !"(test1,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: !8 = metadata !{metadata !"(test1,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
+; CHECK: !9 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !10 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_None"}
+; CHECK: !11 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !12 = metadata !{metadata !"(test1b,%x)", metadata !"S_Use", metadata !"S_CanRelease"}
+; CHECK: !13 = metadata !{metadata !"(test1b,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
+; CHECK: !14 = metadata !{metadata !"(test1b,%x)", metadata !"S_MovableRelease", metadata !"S_Use"}
+; CHECK: !15 = metadata !{metadata !"(test1b,%x)", metadata !"S_CanRelease", metadata !"S_Use"}
+; CHECK: !16 = metadata !{}
+; CHECK: !17 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_MovableRelease"}
+; CHECK: !18 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_None"}
+; CHECK: !19 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !20 = metadata !{metadata !"(test2,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: !21 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_CanRelease"}
+; CHECK: !22 = metadata !{metadata !"(test2,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
+; CHECK: !23 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_Use"}
+; CHECK: !24 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !25 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_None"}
+; CHECK: !26 = metadata !{metadata !"(test3,%x)", metadata !"S_Release", metadata !"S_None"}
+; CHECK: !27 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: !28 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: !29 = metadata !{metadata !"(test3,%x)", metadata !"S_Retain", metadata !"S_None"}
+
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 7b64b1be7c62..828a8a701127 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -92,10 +92,10 @@ alt_return:
 
 ; CHECK: define void @test1b(
 ; CHECK: entry:
-; CHECK:   tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
 ; CHECK: if.end5:
-; CHECK:   tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test1b(i8* %x, i1 %p, i1 %q) {
@@ -404,8 +404,8 @@ entry:
 ; a stack argument.
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test11(i8* %x) nounwind {
 entry:
@@ -428,11 +428,13 @@ entry:
   ret void
 }
 
-; Same as test11 but the value is returned. Do an RV optimization.
+; Same as test11 but the value is returned. Do not perform an RV optimization
+; since if the frontend emitted code for an __autoreleasing variable, we may
+; want it to be in the autorelease pool.
 
 ; CHECK: define i8* @test11b(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define i8* @test11b(i8* %x) nounwind {
 entry:
@@ -462,10 +464,10 @@ entry:
 ; Trivial retain,autorelease pair. Don't delete!
 
 ; CHECK: define void @test13(
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %x)
-; CHECK: tail call i8* @objc_autorelease(i8* %x) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %x) [[NUW]]
 ; CHECK: }
 define void @test13(i8* %x, i64 %n) {
 entry:
@@ -716,7 +718,7 @@ entry:
 ; Bitcast insertion
 
 ; CHECK: define void @test20(
-; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: invoke
 define void @test20(double* %self) {
 if.then12:
@@ -795,10 +797,10 @@ entry:
   ret void
 }
 
-; Don't optimize objc_retainBlock.
+; Don't optimize objc_retainBlock, but do strength reduce it.
 
 ; CHECK: define void @test23b
-; CHECK: @objc_retainBlock
+; CHECK: @objc_retain
 ; CHECK: @objc_release
 ; CHECK: }
 define void @test23b(i8* %p) {
@@ -980,7 +982,7 @@ done:
 ; CHECK: call i8* @objc_retain(
 ; CHECK: call void @callee()
 ; CHECK: store
-; CHECK: call void @objc_release(i8* %p) nounwind, !clang.imprecise_release
+; CHECK: call void @objc_release(i8* %p) [[NUW]], !clang.imprecise_release
 ; CHECK: done:
 ; CHECK-NOT: @objc_
 ; CHECK: }
@@ -1450,9 +1452,9 @@ define void @test45(i8** %pp, i8** %qq) {
 ; Don't delete retain and autorelease here.
 
 ; CHECK: define void @test46(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test46(i8* %p, i1 %a) {
 entry:
   call i8* @objc_retain(i8* %p)
@@ -1565,7 +1567,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 
 ; CHECK: define void @test54(
 ; CHECK: call i8* @returner()
-; CHECK-NEXT: call void @objc_release(i8* %t) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: ret void
 define void @test54() {
   %t = call i8* @returner()
@@ -1595,10 +1597,10 @@ entry:
 ; CHECK: define void @test56(
 ; CHECK-NOT: @objc
 ; CHECK: if.then:
-; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
 ; CHECK-NEXT: tail call void @use_pointer(i8* %x)
-; CHECK-NEXT: tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: br label %if.end
 ; CHECK-NOT: @objc
 ; CHECK: }
@@ -1630,10 +1632,10 @@ if.end:                                           ; preds = %entry, %if.then
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test57(i8* %x) nounwind {
@@ -1673,10 +1675,10 @@ entry:
 
 ; CHECK:      define void @test59(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
-; CHECK-NEXT:   call void @objc_release(i8* %x) nounwind
+; CHECK-NEXT:   call void @objc_release(i8* %x) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test59(i8* %x) nounwind {
@@ -1875,8 +1877,8 @@ return:                                           ; preds = %if.then, %entry
 ; rdar://11931823
 
 ; CHECK: define void @test66(
-; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
-; CHECK:   tail call void @objc_release(i8* %cond) nounwind
+; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
 ; CHECK: }
 define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
 entry:
@@ -2224,3 +2226,6 @@ end:                                              ; preds = %if.end125, %if.end1
 !0 = metadata !{}
 
 declare i32 @__gxx_personality_v0(...)
+
+; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 15194237c4c5..899298b5967e 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -86,9 +86,9 @@ for.end:                                          ; preds = %for.body
 
 ; Delete nested retain+release pairs around loops.
 
-;      CHECK: define void @test3(i8* %a) nounwind {
+;      CHECK: define void @test3(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -112,9 +112,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test4(i8* %a) nounwind {
+;      CHECK: define void @test4(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -142,9 +142,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test5(i8* %a) nounwind {
+;      CHECK: define void @test5(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -176,9 +176,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test6(i8* %a) nounwind {
+;      CHECK: define void @test6(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -209,9 +209,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test7(i8* %a) nounwind {
+;      CHECK: define void @test7(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -242,9 +242,9 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test8(i8* %a) nounwind {
+;      CHECK: define void @test8(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
@@ -274,7 +274,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test9(i8* %a) nounwind {
+;      CHECK: define void @test9(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -303,7 +303,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test10(i8* %a) nounwind {
+;      CHECK: define void @test10(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -332,7 +332,7 @@ exit:
   ret void
 }
 
-;      CHECK: define void @test11(i8* %a) nounwind {
+;      CHECK: define void @test11(i8* %a) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
@@ -362,15 +362,15 @@ exit:
 
 ; Don't delete anything if they're not balanced.
 
-;      CHECK: define void @test12(i8* %a) nounwind {
+;      CHECK: define void @test12(i8* %a) #0 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) nounwind
-; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) nounwind
+; CHECK-NEXT:   %outer = tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK-NEXT:   %inner = tail call i8* @objc_retain(i8* %a) [[NUW]]
 ; CHECK-NEXT:   br label %loop
 ;  CHECK-NOT:   @objc_
 ;      CHECK: exit:
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind
-; CHECK-NEXT: call void @objc_release(i8* %a) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]]
+; CHECK-NEXT: call void @objc_release(i8* %a) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test12(i8* %a) nounwind {
@@ -394,4 +394,6 @@ exit:
   ret void
 }
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
new file mode 100644
index 000000000000..4215b5c36465
--- /dev/null
+++ b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
@@ -0,0 +1,16 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+; This file makes sure that clang.arc.used is removed even if no other ARC
+; interesting calls are in the module.
+
+declare void @clang.arc.use(...) nounwind
+
+; Kill calls to @clang.arc.use(...)
+; CHECK: define void @test0(
+; CHECK-NOT: clang.arc.use
+; CHECK: }
+define void @test0(i8* %a, i8* %b) {
+  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  ret void
+}
+
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index 01d978a0e21d..01fd1e71436e 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -3,7 +3,7 @@
 ; CHECK:      %call = tail call i32* @qux()
 ; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) nounwind
+; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
 
 define void @foo() {
 entry:
@@ -21,3 +21,5 @@ declare void @bar(i8*)
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 2922f816d589..6999237300e7 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -10,7 +10,7 @@ declare void @use_pointer(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
@@ -25,10 +25,10 @@ entry:
 
 ;      CHECK: define void @test1(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load volatile i8** @x, align 8
 ; CHECK-NEXT:   store i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test1(i8* %p) {
@@ -44,10 +44,10 @@ entry:
 
 ;      CHECK: define void @test2(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store volatile i8* %0, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test2(i8* %p) {
@@ -64,11 +64,11 @@ entry:
 
 ; CHECK:      define void @test3(i8* %newValue) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test3(i8* %newValue) {
@@ -85,11 +85,11 @@ entry:
 
 ; CHECK:      define i1 @test4(i8* %newValue, i8* %foo) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
+; CHECK-NEXT:   %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
 ; CHECK-NEXT:   %x1 = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:   %t = icmp eq i8* %x1, %foo
-; CHECK-NEXT:   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT:   tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT:   ret i1 %t
 ; CHECK-NEXT: }
 define i1 @test4(i8* %newValue, i8* %foo) {
@@ -106,7 +106,7 @@ entry:
 
 ; CHECK: define i1 @test5(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test5(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -121,7 +121,7 @@ entry:
 
 ; CHECK: define i1 @test6(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
-; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) nounwind
+; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
 define i1 @test6(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -136,9 +136,9 @@ entry:
 
 ;      CHECK: define void @test7(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test7(i8* %p) {
@@ -155,7 +155,7 @@ entry:
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %tmp = load i8** @x, align 8
 ; CHECK-NEXT:   store i8* %p, i8** @x, align 8
-; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test8(i8* %p) {
@@ -167,3 +167,5 @@ entry:
 }
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 1510ed00e691..85b03be275ec 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -69,7 +69,7 @@ bb7:                                              ; preds = %bb6, %bb6, %bb5
 ; CHECK: define void @_Z6doTestP8NSString() {
 ; CHECK: invoke.cont:                                      ; preds = %entry
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
-; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) [[NUW:#[0-9]+]]
 define void @_Z6doTestP8NSString() {
 entry:
   %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
@@ -88,3 +88,6 @@ lpad:                                             ; preds = %entry
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
 !0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+
+; CHECK: attributes #0 = { optsize }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index c48f8a534fad..0b60683d9995 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -34,12 +34,12 @@ entry:
 ; Merge objc_retain and objc_autorelease into objc_retainAutorelease.
 
 ; CHECK: define void @test2(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: }
 define void @test2(i8* %x) nounwind {
 entry:
   %0 = tail call i8* @objc_retain(i8* %x) nounwind
-  tail call i8* @objc_autorelease(i8* %0) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
   call void @use_pointer(i8* %x)
   ret void
 }
@@ -47,7 +47,7 @@ entry:
 ; Same as test2 but the value is returned. Do an RV optimization.
 
 ; CHECK: define i8* @test2b(
-; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) [[NUW]]
 ; CHECK: }
 define i8* @test2b(i8* %x) nounwind {
 entry:
@@ -59,14 +59,14 @@ entry:
 ; Merge a retain,autorelease pair around a call.
 
 ; CHECK: define void @test3(
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK: @use_pointer(i8* %0)
 ; CHECK: }
 define void @test3(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   ret void
 }
 
@@ -75,7 +75,7 @@ entry:
 
 ; CHECK: define void @test4(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: @objc_retainAutorelease(i8* %x) nounwind
+; CHECK-NEXT: @objc_retainAutorelease(i8* %x) [[NUW]]
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @objc_release
 ; CHECK-NEXT: ret void
@@ -84,7 +84,7 @@ define void @test4(i8* %x, i64 %n) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
-  tail call i8* @objc_autorelease(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
   tail call void @objc_release(i8* %x) nounwind
   ret void
 }
@@ -92,9 +92,9 @@ entry:
 ; Don't merge retain and autorelease if they're not control-equivalent.
 
 ; CHECK: define void @test5(
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
-; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %0) [[NUW]]
 ; CHECK: }
 define void @test5(i8* %p, i1 %a) {
 entry:
@@ -102,7 +102,7 @@ entry:
   br i1 %a, label %true, label %false
 
 true:
-  tail call i8* @objc_autorelease(i8* %p) nounwind
+  call i8* @objc_autorelease(i8* %p) nounwind
   call void @use_pointer(i8* %p)
   ret void
 
@@ -119,8 +119,8 @@ false:
 ; Those entrypoints don't exist yet though.
 
 ; CHECK: define i8* @test6(
-; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
-; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) nounwind
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) [[NUW]]
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) [[NUW]]
 ; CHECK: }
 define i8* @test6() {
   %p = call i8* @returner()
@@ -161,3 +161,16 @@ return:                                           ; preds = %if.then, %entry
   %retval = phi i8* [ %c, %if.then ], [ null, %entry ]
   ret i8* %retval
 }
+
+; Kill calls to @clang.arc.use(...)
+; CHECK: define void @test9(
+; CHECK-NOT: clang.arc.use
+; CHECK: }
+define void @test9(i8* %a, i8* %b) {
+  call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind
+  ret void
+}
+
+declare void @clang.arc.use(...) nounwind
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
new file mode 100644
index 000000000000..05257d1d5cf8
--- /dev/null
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -0,0 +1,174 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+; rdar://11744105
+; bugzilla://14584
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%0 = type opaque
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._objc_cache = type opaque
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
+%struct._prop_t = type { i8*, i8* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@"OBJC_CLASS_$_NSObject" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSObject", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"new\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = linker_private unnamed_addr constant [11 x i8] c"Failed: %@\00", align 1
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i64 10 }, section "__DATA,__cfstring"
+@"OBJC_CLASS_$_NSException" = external global %struct._class_t
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_1" = internal global %struct._class_t* @"OBJC_CLASS_$_NSException", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@.str2 = linker_private unnamed_addr constant [4 x i8] c"Foo\00", align 1
+@_unnamed_cfstring_3 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([4 x i8]* @.str2, i32 0, i32 0), i64 3 }, section "__DATA,__cfstring"
+@"\01L_OBJC_METH_VAR_NAME_4" = internal global [14 x i8] c"raise:format:\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
+
+define i32 @main() uwtable ssp {
+entry:
+  %tmp = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
+  %tmp1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
+  %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
+; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12), !dbg !37
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+  %tmp3 = call i8* @objc_retain(i8* %call) nounwind, !dbg !39
+  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25), !dbg !39
+  invoke fastcc void @ThrowFunc(i8* %call)
+          to label %eh.cont unwind label %lpad, !dbg !40, !clang.arc.no_objc_arc_exceptions !38
+
+eh.cont:                                          ; preds = %entry
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  br label %if.end, !dbg !43
+
+lpad:                                             ; preds = %entry
+  %tmp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !40
+  %tmp5 = extractvalue { i8*, i32 } %tmp4, 0, !dbg !40
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %tmp5) nounwind, !dbg !44
+  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21), !dbg !46
+  call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !50, !clang.arc.no_objc_arc_exceptions !38
+  br label %if.end, !dbg !52
+
+if.end:                                           ; preds = %lpad, %eh.cont
+  call void (i8*, ...)* @NSLog(i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*), i8* %call), !dbg !53, !clang.arc.no_objc_arc_exceptions !38
+; CHECK: call void @objc_release(i8* %call)
+  call void @objc_release(i8* %call) nounwind, !dbg !54, !clang.imprecise_release !38
+  ret i32 0, !dbg !54
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+declare i8* @objc_retain(i8*) nonlazybind
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare void @objc_exception_rethrow()
+
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+entry:
+  %tmp = call i8* @objc_retain(i8* %obj) nounwind
+  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32), !dbg !55
+  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
+  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
+  %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
+  call void (i8*, i8*, %0*, %0*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
+  call void @objc_release(i8* %obj) nounwind, !dbg !58, !clang.imprecise_release !38
+  ret void, !dbg !58
+}
+
+declare i32 @__objc_personality_v0(...)
+
+declare void @objc_release(i8*) nonlazybind
+
+declare void @NSLog(i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+; CHECK: attributes #0 = { ssp uwtable }
+; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #2 = { nonlazybind }
+; CHECK: attributes #3 = { noinline ssp uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33, !34, !35, !36}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", metadata !"clang version 3.3 ", i1 true, i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !27}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = metadata !{i32 786473, metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{metadata !11}
+!11 = metadata !{metadata !12, metadata !21, metadata !25}
+!12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
+!13 = metadata !{i32 786443, metadata !5, i32 10, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = metadata !{i32 786454, null, metadata !"id", metadata !6, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = metadata !{i32 786451, null, metadata !"objc_object", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786445, metadata !16, metadata !"isa", metadata !6, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = metadata !{i32 786451, null, metadata !"objc_class", metadata !6, i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ]
+!21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
+!22 = metadata !{i32 786443, metadata !13, i32 12, i32 0, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = metadata !{i32 786454, null, metadata !"BOOL", metadata !6, i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = metadata !{i32 786468, null, metadata !"signed char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
+!26 = metadata !{i32 786443, metadata !22, i32 14, i32 0, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = metadata !{i32 786478, i32 0, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", metadata !6, i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = metadata !{null, metadata !14}
+!30 = metadata !{metadata !31}
+!31 = metadata !{metadata !32}
+!32 = metadata !{i32 786689, metadata !27, metadata !"obj", metadata !6, i32 16777220, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [obj] [line 4]
+!33 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!34 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!35 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!36 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!37 = metadata !{i32 11, i32 0, metadata !13, null}
+!38 = metadata !{}
+!39 = metadata !{i32 15, i32 0, metadata !26, null}
+!40 = metadata !{i32 17, i32 0, metadata !41, null}
+!41 = metadata !{i32 786443, metadata !26, i32 16, i32 0, metadata !6, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!42 = metadata !{i32 22, i32 0, metadata !26, null}
+!43 = metadata !{i32 23, i32 0, metadata !22, null}
+!44 = metadata !{i32 19, i32 0, metadata !41, null}
+!45 = metadata !{i8 0}
+!46 = metadata !{i32 20, i32 0, metadata !47, null}
+!47 = metadata !{i32 786443, metadata !48, i32 19, i32 0, metadata !6, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = metadata !{i32 786443, metadata !26, i32 19, i32 0, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!49 = metadata !{i32 21, i32 0, metadata !47, null}
+!50 = metadata !{i32 24, i32 0, metadata !51, null}
+!51 = metadata !{i32 786443, metadata !22, i32 23, i32 0, metadata !6, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!52 = metadata !{i32 25, i32 0, metadata !51, null}
+!53 = metadata !{i32 27, i32 0, metadata !13, null}
+!54 = metadata !{i32 28, i32 0, metadata !13, null}
+!55 = metadata !{i32 4, i32 0, metadata !27, null}
+!56 = metadata !{i32 6, i32 0, metadata !57, null}
+!57 = metadata !{i32 786443, metadata !27, i32 5, i32 0, metadata !6, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!58 = metadata !{i32 7, i32 0, metadata !57, null}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
index 3f694cf1d5a4..8f252a0d343a 100644
--- a/test/Transforms/ObjCARC/escape.ll
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -10,8 +10,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; with the objc_storeWeak call.
 
 ; CHECK: define void @test0(
-; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape !0
+; CHECK: call void @objc_release(i8* %tmp7) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0() nounwind {
 entry:
@@ -129,3 +129,6 @@ declare i8* @not_really_objc_storeWeak(i8**, i8*)
 declare void @objc_release(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nounwind ssp }
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
index 6917b02e0324..3648866de01a 100644
--- a/test/Transforms/ObjCARC/gvn.ll
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -objc-arc -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -objc-arc-aa -gvn < %s | FileCheck %s
 
 @x = common global i8* null, align 8
 
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
new file mode 100644
index 000000000000..9c7b81a95d23
--- /dev/null
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -0,0 +1,63 @@
+; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutorelease(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+
+declare void @clang.arc.use(...)
+
+declare void @test0_helper(i8*, i8**)
+
+; Ensure that we honor clang.arc.use as a use and don't miscompile
+; the reduced test case from <rdar://13195034>.
+;
+; FIXME: the fact that we re-order retains w.r.t. @clang.arc.use could
+; be problematic if we get run twice, e.g. under LTO.
+;
+; CHECK:      define void @test0(
+; CHECK:        @objc_retain(i8* %x)
+; CHECK-NEXT:   store i8* %y, i8** %temp0
+; CHECK-NEXT:   @objc_retain(i8* %y)
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_release(i8* %y)
+; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_autorelease(i8* %x)
+; CHECK-NEXT:   store i8* %x, i8** %out
+; CHECK-NEXT:   @objc_release(i8* [[VAL2]])
+; CHECK-NEXT:   ret void
+define void @test0(i8** %out, i8* %x, i8* %y) {
+entry:
+  %temp0 = alloca i8*, align 8
+  %temp1 = alloca i8*, align 8
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %1 = call i8* @objc_retain(i8* %y) nounwind
+  store i8* %y, i8** %temp0
+  call void @test0_helper(i8* %x, i8** %temp0)
+  %val1 = load i8** %temp0
+  %2 = call i8* @objc_retain(i8* %val1) nounwind
+  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void @objc_release(i8* %y) nounwind
+  store i8* %val1, i8** %temp1
+  call void @test0_helper(i8* %x, i8** %temp1)
+  %val2 = load i8** %temp1
+  %3 = call i8* @objc_retain(i8* %val2) nounwind
+  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void @objc_release(i8* %val1) nounwind
+  %4 = call i8* @objc_retain(i8* %x) nounwind
+  %5 = call i8* @objc_autorelease(i8* %x) nounwind
+  store i8* %x, i8** %out
+  call void @objc_release(i8* %val2) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 1a58e34940e1..f528b4ac35bc 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -12,10 +12,10 @@ declare i8* @returner()
 
 ; CHECK: define void @test0(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW:#[0-9]+]], !clang.imprecise_release !0
 ; CHECK:   ret void
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   ret void
 define void @test0(i8* %zipFile) {
 entry:
@@ -39,11 +39,11 @@ lpad:                                             ; preds = %entry
 
 ; CHECK: define void @test1(
 ; CHECK: invoke.cont:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: lpad:
-; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   call void @callee()
 ; CHECK:   br label %done
 ; CHECK: done:
@@ -108,7 +108,7 @@ finally.rethrow:                                  ; preds = %invoke.cont, %entry
 
 ; CHECK: define void @test3(
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test3(i8* %p, i1 %b) {
 entry:
@@ -140,10 +140,10 @@ if.end:
 ; CHECK: lpad:
 ; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
 ; CHECK-NEXT: cleanup
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 ; CHECK: if.end:
-; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test4(i8* %p, i1 %b) {
 entry:
@@ -215,4 +215,6 @@ if.end:
 declare i32 @__gxx_personality_v0(...)
 declare i32 @__objc_personality_v0(...)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
index 170d0a99c98b..5d058257c6ed 100644
--- a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
+++ b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
@@ -4,7 +4,7 @@
 ; and various scary looking things and fold it into an objc_retainAutorelease.
 
 ; CHECK: bb57:
-; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) nounwind
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) [[NUW:#[0-9]+]]
 ; CHECK: bb99:
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -212,10 +212,12 @@ bb99:                                             ; preds = %bb57
   br label %bb104
 
 bb104:                                            ; preds = %bb99, %bb57
-  %tmp105 = tail call i8* @objc_autorelease(i8* %tmp72) nounwind
+  %tmp105 = call i8* @objc_autorelease(i8* %tmp72) nounwind
   %tmp106 = bitcast i8* %tmp105 to %14*
   tail call void @objc_release(i8* %tmp85) nounwind
   %tmp107 = bitcast %18* %tmp47 to i8*
   tail call void @objc_release(i8* %tmp107) nounwind
   ret %14* %tmp106
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 32be03ec6ae0..ca9c58bcb3e3 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -770,9 +770,9 @@ forcoll.empty:
 @__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 
 ; CHECK: define void @test11(
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: tail call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test11() {
 entry:
@@ -820,3 +820,6 @@ entry:
   call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { nonlazybind }
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
index 9728f6e0d94f..58b5bbe9c7e9 100644
--- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -59,11 +59,12 @@ lpad:                                             ; preds = %entry
   resume { i8*, i32 } %t8
 }
 
-; There is no !clang.arc.no_objc_arc_exceptions
-; metadata here, so the optimizer shouldn't eliminate anything.
+; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer
+; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock
+; to an objc_retain.
 
 ; CHECK: define void @test0_no_metadata(
-; CHECK: call i8* @objc_retainBlock(
+; CHECK: call i8* @objc_retain(
 ; CHECK: invoke
 ; CHECK: call void @objc_release(
 ; CHECK: }
diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll
index 1faae5f68705..bdff0d7b4d58 100644
--- a/test/Transforms/ObjCARC/pr12270.ll
+++ b/test/Transforms/ObjCARC/pr12270.ll
@@ -1,4 +1,4 @@
-; RUN: opt -disable-output -objc-arc-contract %s
+; RUN: opt -disable-output -objc-arc-contract < %s
 ; test that we don't crash on unreachable code
 %2 = type opaque
 
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
index 01f208704c7b..f40be238baf3 100644
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ b/test/Transforms/ObjCARC/retain-block-alloca.ll
@@ -9,7 +9,7 @@
 @"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
 ; CHECK: define void @test(
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
+; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) [[NUW:#[0-9]+]]
 ; CHECK: @objc_msgSend
 ; CHECK-NEXT: @objc_release(i8* %3)
 define void @test(%0* %array) uwtable {
@@ -87,4 +87,8 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
 declare void @objc_release(i8*)
 
+; CHECK: attributes #0 = { uwtable }
+; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
new file mode 100644
index 000000000000..2c1ddce32836
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
@@ -0,0 +1,127 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*) nonlazybind
+declare void @objc_release(i8*) nonlazybind
+declare i8* @objc_retainBlock(i8*)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Use by an instruction which copies the value is an escape if the             ;
+; result is an escape. The current instructions with this property are:        ;
+;                                                                              ;
+; 1. BitCast.                                                                  ;
+; 2. GEP.                                                                      ;
+; 3. PhiNode.                                                                  ;
+; 4. SelectInst.                                                               ;
+;                                                                              ;
+; Make sure that such instructions do not confuse the optimizer into removing  ;
+; an objc_retainBlock that is needed.                                          ;
+;                                                                              ;
+; rdar://13273675. (With extra test cases to handle bitcast, phi, and select.  ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define void @bitcasttest(i8* %storage, void (...)* %block)  {
+; CHECK: define void @bitcasttest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %storage to void (...)**
+  %t5 = bitcast i8* %t3 to void (...)*
+  store void (...)* %t5, void (...)** %t4, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+define void @geptest(void (...)** %storage_array, void (...)* %block)  {
+; CHECK: define void @geptest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  
+  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
+  
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+define void @selecttest(void (...)** %store1, void (...)** %store2,
+                        void (...)* %block) {
+; CHECK: define void @selecttest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  %store = select i1 undef, void (...)** %store1, void (...)** %store2
+  store void (...)* %t4, void (...)** %store, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+define void @phinodetest(void (...)** %storage1,
+                         void (...)** %storage2,
+                         void (...)* %block) {
+; CHECK: define void @phinodetest
+entry:
+  %t1 = bitcast void (...)* %block to i8*
+; CHECK-NOT: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  br i1 undef, label %store1_set, label %store2_set
+
+store1_set:
+  br label %end
+
+store2_set:
+  br label %end
+
+end:
+  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK-NOT: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This test makes sure that we do not hang clang when visiting a use ;
+; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
+; test case looks a little convoluted since it was produced by	     ;
+; bugpoint.							     ;
+; 								     ;
+; bugzilla://14551						     ;
+; rdar://12851911						     ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define void @phinode_use_cycle(i8* %block) uwtable optsize ssp {
+; CHECK: define void @phinode_use_cycle(i8* %block)
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %if.then, %for.body, %entry
+  %block.05 = phi void (...)* [ null, %entry ], [ %1, %if.then ], [ %block.05, %for.body ]
+  br i1 undef, label %for.body, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %0 = call i8* @objc_retainBlock(i8* %block), !clang.arc.copy_on_escape !0
+  %1 = bitcast i8* %0 to void (...)*
+  %2 = bitcast void (...)* %block.05 to i8*
+  call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0
+  br label %for.body
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block-side-effects.ll b/test/Transforms/ObjCARC/retain-block-side-effects.ll
index e84d48f86912..7fa73cbfef15 100644
--- a/test/Transforms/ObjCARC/retain-block-side-effects.ll
+++ b/test/Transforms/ObjCARC/retain-block-side-effects.ll
@@ -4,7 +4,7 @@
 ; objc_retainBlock stores into %repeater so the load from after the
 ; call isn't forwardable from the store before the call.
 
-; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) nounwind
+; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) [[NUW:#[0-9]+]]
 ; CHECK: %tmp17 = bitcast i8* %tmp16 to void ()*
 ; CHECK: %tmp18 = load %struct.__block_byref_repeater** %byref.forwarding, align 8
 ; CHECK: %repeater12 = getelementptr inbounds %struct.__block_byref_repeater* %tmp18, i64 0, i32 6
@@ -37,3 +37,6 @@ entry:
 }
 
 declare i8* @objc_retainBlock(i8*)
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
index b3b62d300008..1bb3f0276adf 100644
--- a/test/Transforms/ObjCARC/retain-block.ll
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -28,8 +28,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_no_metadata(i8* %tmp) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW:#[0-9]+]]
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_no_metadata(i8* %tmp) {
 entry:
@@ -43,8 +43,8 @@ entry:
 ; optimization possible.
 
 ; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_escape(i8* %tmp, i8** %z) {
 entry:
@@ -58,8 +58,8 @@ entry:
 ; Same as test0_escape, but there's no intervening call.
 
 ; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
-; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
-; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK: }
 define void @test0_just_escape(i8* %tmp, i8** %z) {
 entry:
@@ -73,9 +73,9 @@ entry:
 
 ; CHECK: define void @test1(i8* %tmp) {
 ; CHECK-NOT: @objc
-; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NOT: @objc
-; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1(i8* %tmp) {
@@ -95,10 +95,10 @@ entry:
 
 ; CHECK: define void @test1_no_metadata(i8* %tmp) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_no_metadata(i8* %tmp) {
@@ -118,11 +118,11 @@ entry:
 
 ; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]], !clang.arc.copy_on_escape !0
 ; CHECK-NEXT: store i8* %tmp2, i8** %z
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
 ; CHECK-NEXT: @use_pointer(i8* %tmp2)
-; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc
 ; CHECK: }
 define void @test1_escape(i8* %tmp, i8** %z) {
@@ -136,3 +136,5 @@ entry:
   tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index f876e51592b6..165829f7c01f 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -13,7 +13,7 @@ declare void @objc_release(i8*)
 
 ; CHECK:      define i8* @test0(i8* %p) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret i8* %0
 ; CHECK-NEXT: }
 
@@ -21,8 +21,8 @@ define i8* @test0(i8* %p) {
 entry:
   %call = tail call i8* @objc_unretainedObject(i8* %p)
   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
-  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
-  ret i8* %1
+  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %call) nounwind
+  ret i8* %call
 }
 
 ; Properly create the @objc_retain declaration when it doesn't already exist.
@@ -65,3 +65,5 @@ lpad100:                                          ; preds = %invoke.cont93
 declare i32 @__gxx_personality_v0(...)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/rle-s2l.ll b/test/Transforms/ObjCARC/rle-s2l.ll
index 8f8d5c0d3825..2865c94dc88c 100644
--- a/test/Transforms/ObjCARC/rle-s2l.ll
+++ b/test/Transforms/ObjCARC/rle-s2l.ll
@@ -57,7 +57,7 @@ define void @test2(i8** %p) {
 
 ; CHECK:      define void @test3(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO:#[0-9]+]]
 ; CHECK-NEXT:   %1 = tail call i8* @objc_retain(i8* %x)
 ; CHECK-NEXT:   call void @use_pointer(i8* %x)
 ; CHECK-NEXT:   ret void
@@ -74,7 +74,7 @@ define void @test3(i8** %p) {
 
 ; CHECK:      define void @test4(i8** %p) {
 ; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
-; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @use_pointer(i8* %x) [[RO]]
 ; CHECK-NEXT:   call void @callee()
 ; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
 ; CHECK-NEXT:   call void @use_pointer(i8* %y)
@@ -133,3 +133,6 @@ define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) {
   call void @use_pointer(i8* %y)
   ret void
 }
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes [[RO]] = { readonly }
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index 9353a19f71a4..589c60f9f3aa 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -29,7 +29,7 @@ declare i8* @returner()
 ; CHECK:      define void @test0(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %x = call i8* @returner
-; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) nounwind
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK: t:
 ; CHECK-NOT: @objc_
 ; CHECK: return:
@@ -121,7 +121,7 @@ define i8* @test7() {
   %p = call i8* @returner()
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
-  call void @use_pointer(i8* %t)
+  call void @use_pointer(i8* %p)
   ret i8* %t
 }
 
@@ -133,7 +133,7 @@ define i8* @test7b() {
   call void @use_pointer(i8* %p)
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
   %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
-  ret i8* %t
+  ret i8* %p
 }
 
 ; Turn objc_retain into objc_retainAutoreleasedReturnValue if its operand
@@ -150,17 +150,17 @@ define void @test8() {
 ; Don't apply the RV optimization to autorelease if there's no retain.
 
 ; CHECK: define i8* @test9(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 define i8* @test9(i8* %p) {
   call i8* @objc_autorelease(i8* %p)
   ret i8* %p
 }
 
-; Apply the RV optimization.
+; Do not apply the RV optimization.
 
 ; CHECK: define i8* @test10(i8* %p)
-; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
-; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret i8* %p
 define i8* @test10(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -174,7 +174,7 @@ define i8* @test10(i8* %p) {
 ; CHECK: define i8* @test11(i8* %p)
 ; CHECK: tail call i8* @objc_retain(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK-NEXT: ret i8* %p
 define i8* @test11(i8* %p) {
   %1 = call i8* @objc_retain(i8* %p)
@@ -201,7 +201,7 @@ define i8* @test12(i8* %p) {
 
 ; CHECK: define i8* @test13(
 ; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
-; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: call i8* @objc_autorelease(i8* %p)
 ; CHECK: ret i8* %p
 define i8* @test13() {
   %p = call i8* @returner()
@@ -215,7 +215,7 @@ define i8* @test13() {
 ; argument is not a return value.
 
 ; CHECK: define void @test14(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test14(i8* %p) {
   call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
@@ -227,7 +227,7 @@ define void @test14(i8* %p) {
 
 ; CHECK: define void @test15(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test15() {
   %y = call i8* @returner()
@@ -240,7 +240,7 @@ define void @test15() {
 
 ; CHECK: define void @test16(
 ; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test16() {
   %y = call i8* @returner()
@@ -252,7 +252,7 @@ define void @test16() {
 ; argument is not a return value.
 
 ; CHECK: define void @test17(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test17(i8* %y) {
   call i8* @objc_retain(i8* %y)
@@ -265,7 +265,7 @@ define void @test17(i8* %y) {
 ; CHECK: define void @test18(
 ; CHECK-NEXT: %y = call i8* @returner()
 ; CHECK-NEXT: call void @callee()
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
 ; CHECK-NEXT: ret void
 define void @test18() {
   %y = call i8* @returner()
@@ -323,7 +323,7 @@ define i8* @test22(i8* %p) {
 ; Convert autoreleaseRV to autorelease.
 
 ; CHECK: define void @test23(
-; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
 define void @test23(i8* %p) {
   store i8 0, i8* %p
   call i8* @objc_autoreleaseReturnValue(i8* %p)
@@ -340,3 +340,5 @@ define {}* @test24(i8* %p) {
   %s = bitcast i8* %p to {}*
   ret {}* %s
 }
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
index 08e2dce1f551..5ac278a45d50 100644
--- a/test/Transforms/ObjCARC/split-backedge.ll
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -4,12 +4,12 @@
 ; rdar://11256239
 
 ; CHECK: define void @test0
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %call) nounwind
-; CHECK: call i8* @objc_retain(i8* %cond) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %call) nounwind
-; CHECK: call void @objc_release(i8* %cond) nounwind
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: call i8* @objc_retain(i8* %call) [[NUW]]
+; CHECK: call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %call) [[NUW]]
+; CHECK: call void @objc_release(i8* %cond) [[NUW]]
 define void @test0() {
 entry:
   br label %while.body
@@ -46,3 +46,5 @@ declare i8* @objc_retain(i8*)
 declare void @use_pointer(i8*)
 
 !0 = metadata !{}
+
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
new file mode 100644
index 000000000000..26cd67727e6a
--- /dev/null
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -0,0 +1,74 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+declare i8* @objc_release(i8* %x)
+declare i8* @objc_retain(i8* %x)
+declare i8* @objc_autorelease(i8* %x)
+declare i8* @objc_autoreleaseReturnValue(i8* %x)
+declare i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+
+; Never tail call objc_autorelease.
+define i8* @test0(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = call i8* @objc_autorelease(i8* %x)
+  ; CHECK: %tmp1 = call i8* @objc_autorelease(i8* %x)
+  %tmp1 = tail call i8* @objc_autorelease(i8* %x)
+
+  ret i8* %x
+}
+
+; Always tail call autoreleaseReturnValue.
+define i8* @test1(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp0 = call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %x
+}
+
+; Always tail call objc_retain.
+define i8* @test2(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
+  %tmp0 = call i8* @objc_retain(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %x)
+  %tmp1 = tail call i8* @objc_retain(i8* %x)
+  ret i8* %x
+}
+
+define i8* @tmp(i8* %x) {
+  ret i8* %x
+}
+
+; Always tail call objc_retainAutoreleasedReturnValue.
+define i8* @test3(i8* %x) {
+entry:
+  %y = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %tmp0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  %z = call i8* @tmp(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret i8* %x
+}
+
+; By itself, we should never change whether or not objc_release is tail called.
+define i8* @test4(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_release(i8* %x)
+  %tmp0 = call i8* @objc_release(i8* %x)
+  ; CHECK: %tmp1 = tail call i8* @objc_release(i8* %x)
+  %tmp1 = tail call i8* @objc_release(i8* %x)
+  ret i8* %x
+}
+
+; If we convert a tail called @objc_autoreleaseReturnValue to an
+; @objc_autorelease, ensure that the tail call is removed.
+define i8* @test5(i8* %x) {
+entry:
+  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
+  %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
+  ret i8* %tmp0
+}
+
diff --git a/test/Transforms/ObjCARC/weak-copies.ll b/test/Transforms/ObjCARC/weak-copies.ll
index e1a94bb4749a..5dab4e049e22 100644
--- a/test/Transforms/ObjCARC/weak-copies.ll
+++ b/test/Transforms/ObjCARC/weak-copies.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-darwin11.0.0"
 ; CHECK:      define void @foo() {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %call = call i8* @bar()
-; CHECK-NEXT:   call void @use(i8* %call) nounwind
+; CHECK-NEXT:   call void @use(i8* %call) [[NUW:#[0-9]+]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @foo() {
@@ -39,7 +39,7 @@ entry:
 
 ; Eliminate unnecessary weak pointer copies in a block initialization.
 
-; CHECK:      define void @qux(i8* %me) nounwind {
+; CHECK:      define void @qux(i8* %me) #0 {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %block = alloca %1, align 8
 ; CHECK-NOT:    alloca
@@ -84,4 +84,6 @@ declare i8* @objc_loadWeak(i8**)
 declare void @use(i8*) nounwind
 declare void @objc_destroyWeak(i8**)
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
 !0 = metadata !{}
diff --git a/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll b/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
index 8859da8de106..53d98e02ec88 100644
--- a/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
+++ b/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O2 %s -S -o - | FileCheck %s
+; RUN: opt -O2 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.1"
diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll
index ef9947f103a8..58b762a7af49 100644
--- a/test/Transforms/PhaseOrdering/PR6627.ll
+++ b/test/Transforms/PhaseOrdering/PR6627.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S %s | FileCheck %s
+; RUN: opt -O3 -S < %s | FileCheck %s
 ; XFAIL: *
 
 declare i32 @doo(...)
diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll
index 88ebca0a9c3d..8fbe8c58f451 100644
--- a/test/Transforms/PhaseOrdering/basic.ll
+++ b/test/Transforms/PhaseOrdering/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S %s | FileCheck %s
+; RUN: opt -O3 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.6.7"
diff --git a/test/Transforms/PhaseOrdering/gdce.ll b/test/Transforms/PhaseOrdering/gdce.ll
index 273e47e97cb4..95f06757a788 100644
--- a/test/Transforms/PhaseOrdering/gdce.ll
+++ b/test/Transforms/PhaseOrdering/gdce.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O2 -S %s | FileCheck %s
+; RUN: opt -O2 -S < %s | FileCheck %s
 
 ; Run global DCE to eliminate unused ctor and dtor.
 ; rdar://9142819
diff --git a/test/Transforms/PhaseOrdering/scev.ll b/test/Transforms/PhaseOrdering/scev.ll
index c73128082216..39adb6b73d3a 100644
--- a/test/Transforms/PhaseOrdering/scev.ll
+++ b/test/Transforms/PhaseOrdering/scev.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
+; RUN: opt -O3 -S -analyze -scalar-evolution < %s | FileCheck %s
 ;
 ; This file contains phase ordering tests for scalar evolution.
 ; Test that the standard passes don't obfuscate the IR so scalar evolution can't
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index e29b5dc9c0ce..770f97371d7e 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -reassociate -disable-output %s
+; RUN: opt -reassociate -disable-output < %s
 
 
 ; rdar://7507855
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
new file mode 100644
index 000000000000..d371a9b5b68f
--- /dev/null
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -0,0 +1,166 @@
+;RUN: opt -S -reassociate < %s | FileCheck %s
+
+; ==========================================================================
+;
+;   Xor reassociation general cases
+;  
+; ==========================================================================
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3, where c3 = c1^c2
+;   
+define i32 @xor1(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+
+;CHECK: @xor1
+;CHECK: %and.ra = and i32 %x, 435
+;CHECK: %xor = xor i32 %and.ra, 435
+}
+
+; Test rule : (x & c1) ^ (x & c2) = (x & (c1^c2))
+; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y
+define i32 @xor2(i32 %x, i32 %y) {
+  %and = and i32 %x, 123
+  %xor = xor i32 %and, %y
+  %and1 = and i32 %x, 456
+  %xor2 = xor i32 %xor, %and1
+  ret i32 %xor2
+
+;CHECK: @xor2
+;CHECK: %and.ra = and i32 %x, 435
+;CHECK: %xor2 = xor i32 %and.ra, %y
+}
+
+; Test rule: (x | c1) ^ (x & c2) = (x & c3) ^ c1, where c3 = ~c1 ^ c2
+;  c3 = ~c1 ^ c2
+define i32 @xor3(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, 456
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+
+;CHECK: @xor3
+;CHECK: %and.ra = and i32 %x, -436
+;CHECK: %xor = xor i32 %y, 123
+;CHECK: %xor1 = xor i32 %xor, %and.ra
+}
+
+; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2)
+define i32 @xor4(i32 %x, i32 %y) {
+  %and = and i32 %x, -124
+  %xor = xor i32 %y, 435
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor4
+; CHECK: %and = and i32 %x, -124
+; CHECK: %xor = xor i32 %y, 435
+; CHECK: %xor1 = xor i32 %xor, %and
+}
+
+; ==========================================================================
+;
+;  Xor reassociation special cases
+;  
+; ==========================================================================
+
+; Special case1: 
+;  (x | c1) ^ (x & ~c1) = c1
+define i32 @xor_special1(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, -124
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor_special1
+; CHECK: %xor1 = xor i32 %y, 123
+; CHECK: ret i32 %xor1
+}
+
+; Special case1: 
+;  (x | c1) ^ (x & c1) = x ^ c1
+define i32 @xor_special2(i32 %x, i32 %y) {
+  %or = or i32 %x, 123
+  %xor = xor i32 %or, %y
+  %and = and i32 %x, 123
+  %xor1 = xor i32 %xor, %and
+  ret i32 %xor1
+; CHECK: @xor_special2
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: ret i32 %xor1
+}
+
+; (x | c1) ^ (x | c1) => 0
+define i32 @xor_special3(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 123
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+;CHECK: @xor_special3
+;CHECK: ret i32 0
+}
+
+; (x & c1) ^ (x & c1) => 0
+define i32 @xor_special4(i32 %x) {
+  %or = and i32 %x, 123
+  %or1 = and i32 123, %x
+  %xor = xor i32 %or, %or1
+  ret i32 %xor
+;CHECK: @xor_special4
+;CHECK: ret i32 0
+}
+
+; ==========================================================================
+;
+;  Xor reassociation curtail code size
+;  
+; ==========================================================================
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3
+; is enabled if one of operands has multiple uses
+;   
+define i32 @xor_ra_size1(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+
+  %add = add i32 %xor, %or
+  ret i32 %add
+;CHECK: @xor_ra_size1
+;CHECK: %xor = xor i32 %and.ra, 435
+}
+
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3
+; is disenabled if bothf operands has multiple uses.
+;   
+define i32 @xor_ra_size2(i32 %x) {
+  %or = or i32 %x, 123
+  %or1 = or i32 %x, 456
+  %xor = xor i32 %or, %or1
+
+  %add = add i32 %xor, %or
+  %add2 = add i32 %add, %or1
+  ret i32 %add2
+
+;CHECK: @xor_ra_size2
+;CHECK: %or1 = or i32 %x, 456
+;CHECK: %xor = xor i32 %or, %or1
+}
+
+
+; ==========================================================================
+;
+;  Xor reassociation bugs
+;  
+; ==========================================================================
+
+@xor_bug1_data = external global <{}>, align 4
+define void @xor_bug1() {
+  %1 = ptrtoint i32* undef to i64
+  %2 = xor i64 %1, ptrtoint (<{}>* @xor_bug1_data to i64)
+  %3 = and i64 undef, %2
+  ret void
+}
diff --git a/test/Transforms/Reg2Mem/crash.ll b/test/Transforms/Reg2Mem/crash.ll
new file mode 100644
index 000000000000..02fed94b8527
--- /dev/null
+++ b/test/Transforms/Reg2Mem/crash.ll
@@ -0,0 +1,88 @@
+; RUN: opt -reg2mem -disable-output < %s
+; PR14782
+
+declare void @f1()
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @f2()
+
+declare void @f3()
+
+declare void @f4_()
+
+declare void @_Z12xxxdtsP10xxxpq()
+
+define hidden void @_ZN12xxxyzIi9xxxwLi29ELi0EE4f3NewES0_i() ssp align 2 {
+bb:
+  invoke void @f4_()
+          to label %bb1 unwind label %.thread
+
+.thread:                                          ; preds = %bb
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %bb13
+
+bb1:                                              ; preds = %bb
+  invoke void @f1()
+          to label %.noexc unwind label %bb10
+
+.noexc:                                           ; preds = %bb1
+  invoke void @f4_()
+          to label %bb6 unwind label %bb2
+
+bb2:                                              ; preds = %.noexc
+  %tmp3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  invoke void @f3()
+          to label %.body unwind label %bb4
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+
+bb6:                                              ; preds = %.noexc
+  invoke void @_Z12xxxdtsP10xxxpq()
+          to label %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit unwind label %bb10
+
+_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit:  ; preds = %bb6
+  invoke void @f2()
+          to label %bb7 unwind label %bb8
+
+bb7:                                              ; preds = %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit
+  ret void
+
+bb8:                                              ; preds = %_ZN6xxxdIN12xxxyzIi9xxxwLi29ELi0EE4fr1jS3_.exit
+  %tmp9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %_ZN10xxxpqdlev.exit
+
+bb10:                                             ; preds = %bb6, %bb1
+  %.1 = phi i1 [ true, %bb1 ], [ false, %bb6 ]
+  %tmp11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  br label %.body
+
+.body:                                            ; preds = %bb10, %bb2
+  %.1.lpad-body = phi i1 [ %.1, %bb10 ], [ true, %bb2 ]
+  invoke void @f2()
+          to label %bb12 unwind label %bb14
+
+bb12:                                             ; preds = %.body
+  br i1 %.1.lpad-body, label %bb13, label %_ZN10xxxpqdlev.exit
+
+bb13:                                             ; preds = %bb12, %.thread
+  invoke void @xxx_MemFree()
+          to label %_ZN10xxxpqdlev.exit unwind label %bb14
+
+_ZN10xxxpqdlev.exit:                              ; preds = %bb13, %bb12, %bb8
+  resume { i8*, i32 } undef
+
+bb14:                                             ; preds = %bb13, %.body
+  %tmp15 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+declare void @xxx_MemFree()
diff --git a/test/Transforms/Reg2Mem/lit.local.cfg b/test/Transforms/Reg2Mem/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Reg2Mem/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/crash.ll b/test/Transforms/SCCP/crash.ll
index 2f6da1d726a0..88528902d721 100644
--- a/test/Transforms/SCCP/crash.ll
+++ b/test/Transforms/SCCP/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -sccp -S
+; RUN: opt -sccp -S < %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 
diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll
index c6572fa5d141..b49da97ab2c0 100644
--- a/test/Transforms/SCCP/ipsccp-addr-taken.ll
+++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -ipsccp -S | FileCheck %s
+; RUN: opt -ipsccp -S < %s | FileCheck %s
 ; PR7876
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/SCCP/retvalue-undef.ll b/test/Transforms/SCCP/retvalue-undef.ll
index 389561f8a112..5a4ba113b7c0 100644
--- a/test/Transforms/SCCP/retvalue-undef.ll
+++ b/test/Transforms/SCCP/retvalue-undef.ll
@@ -1,4 +1,4 @@
-; RUN: opt -ipsccp -S %s | FileCheck %s
+; RUN: opt -ipsccp -S < %s | FileCheck %s
 ; PR6414
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/SCCP/undef-resolve.ll b/test/Transforms/SCCP/undef-resolve.ll
index a3dddb799a6a..a1a600c9607a 100644
--- a/test/Transforms/SCCP/undef-resolve.ll
+++ b/test/Transforms/SCCP/undef-resolve.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -sccp -S | FileCheck %s
+; RUN: opt -sccp -S < %s | FileCheck %s
 
 
 ; PR6940
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 9fe926ee2cc1..30dd21774343 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -500,14 +500,27 @@ entry:
 
 define i64 @test9() {
 ; Ensure we can handle loads off the end of an alloca even when wrapped in
-; weird bit casts and types. The result is undef, but this shouldn't crash
-; anything.
+; weird bit casts and types. This is valid IR due to the alignment and masking
+; off the bits past the end of the alloca.
+;
 ; CHECK: @test9
 ; CHECK-NOT: alloca
-; CHECK: ret i64 undef
+; CHECK:      %[[b2:.*]] = zext i8 26 to i64
+; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
+; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681
+; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]]
+; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8
+; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281
+; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]]
+; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256
+; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]]
+; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215
+; CHECK-NEXT: ret i64 %[[result]]
 
 entry:
-  %a = alloca { [3 x i8] }
+  %a = alloca { [3 x i8] }, align 8
   %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
   store i8 0, i8* %gep1, align 1
   %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
@@ -516,7 +529,8 @@ entry:
   store i8 26, i8* %gep3, align 1
   %cast = bitcast { [3 x i8] }* %a to { i64 }*
   %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
-  %result = load i64* %elt
+  %load = load i64* %elt
+  %result = and i64 %load, 16777215
   ret i64 %result
 }
 
@@ -575,8 +589,8 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24* %aiptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
 ; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
@@ -597,8 +611,8 @@ entry:
   %b1 = load i8* %b1ptr
   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8* %b2ptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
 ; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
@@ -617,11 +631,12 @@ define i32 @test13() {
 ; Ensure we don't crash and handle undefined loads that straddle the end of the
 ; allocation.
 ; CHECK: @test13
-; CHECK: %[[ret:.*]] = zext i16 undef to i32
-; CHECK: ret i32 %[[ret]]
+; CHECK:      %[[value:.*]] = zext i8 0 to i16
+; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
+; CHECK-NEXT: ret i32 %[[ret]]
 
 entry:
-  %a = alloca [3 x i8]
+  %a = alloca [3 x i8], align 2
   %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
   store i8 0, i8* %b0ptr
   %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
@@ -1160,19 +1175,71 @@ define void @PR14548(i1 %x) {
 entry:
   %a = alloca <{ i1 }>, align 8
   %b = alloca <{ i1 }>, align 8
-; Nothing of interest is simplified here.
-; CHECK: alloca
-; CHECK: alloca
+; CHECK:      %[[a:.*]] = alloca i8, align 8
 
   %b.i1 = bitcast <{ i1 }>* %b to i1*
   store i1 %x, i1* %b.i1, align 8
   %b.i8 = bitcast <{ i1 }>* %b to i8*
   %foo = load i8* %b.i8, align 1
+; CHECK-NEXT: {{.*}} = zext i1 %x to i8
+; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
+; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
 
   %a.i8 = bitcast <{ i1 }>* %a to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
   %bar = load i8* %a.i8, align 1
   %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0
   %baz = load i1* %a.i1, align 1
+; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
+; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8
+
   ret void
 }
+
+define <3 x i8> @PR14572.1(i32 %x) {
+; Ensure that a split integer store which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.1
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  %cast = bitcast <3 x i8>* %a to i32*
+  store i32 %x, i32* %cast, align 1
+  %y = load <3 x i8>* %a, align 4
+  ret <3 x i8> %y
+; CHECK: ret <3 x i8>
+}
+
+define i32 @PR14572.2(<3 x i8> %x) {
+; Ensure that a split integer load which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.2
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  store <3 x i8> %x, <3 x i8>* %a, align 1
+  %cast = bitcast <3 x i8>* %a to i32*
+  %y = load i32* %cast, align 4
+  ret i32 %y
+; CHECK: ret i32
+}
+
+define i32 @PR14601(i32 %x) {
+; Don't try to form a promotable integer alloca when there is a variable length
+; memory intrinsic.
+; CHECK: @PR14601
+
+entry:
+  %a = alloca i32
+; CHECK: alloca
+
+  %a.i8 = bitcast i32* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
+  %v = load i32* %a
+  ret i32 %v
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 1ac6d25d6341..64a0cc743974 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -24,8 +24,8 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24* %aiptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
 ; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
 ; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
@@ -46,8 +46,8 @@ entry:
   %b1 = load i8* %b1ptr
   %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8* %b2ptr
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 ; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
 ; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
 ; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
@@ -77,8 +77,8 @@ entry:
   %a2ptr = getelementptr [7 x i8]* %a, i64 0, i32 2
   %a3ptr = getelementptr [7 x i8]* %a, i64 0, i32 3
 
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 
   %a0i16ptr = bitcast i8* %a0ptr to i16*
   store i16 1, i16* %a0i16ptr
@@ -98,8 +98,8 @@ entry:
 ; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
 ; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
 
-; CHCEK-NOT: store
-; CHCEK-NOT: load
+; CHECK-NOT: store
+; CHECK-NOT: load
 
   %aiptr = bitcast [7 x i8]* %a to i56*
   %ai = load i56* %aiptr
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 921016a9c24b..b9931800e7f4 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -396,9 +396,10 @@ define i64 @PR14132(i1 %flag) {
 ; Here we form a PHI-node by promoting the pointer alloca first, and then in
 ; order to promote the other two allocas, we speculate the load of the
 ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
-; alloca, which is completely bogus. However, we were asserting on trying to
-; rewrite it. Now it is replaced with undef. Eventually we may replace it with
-; unrechable and even the CFG will go away here.
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this..
 entry:
   %a = alloca i64
   %b = alloca i8
@@ -414,13 +415,14 @@ entry:
 if.then:
   store i8* %b, i8** %ptr.cast
   br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
 
 if.end:
   %tmp = load i64** %ptr
   %result = load i64* %tmp
-; CHECK-NOT: store
 ; CHECK-NOT: load
-; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ]
+; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
 
   ret i64 %result
 ; CHECK-NEXT: ret i64 %[[result]]
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index ea28f5d1a647..02f6d040cc95 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -36,15 +36,15 @@ entry:
 
 define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK: @test2
-; FIXME: This should be handled!
 entry:
 	%a = alloca [2 x <4 x i32>]
-; CHECK: alloca <4 x i32>
+; CHECK-NOT: alloca
 
   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
   store <4 x i32> %x, <4 x i32>* %a.x
   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
   store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %tmp1 = load i32* %a.tmp1
@@ -54,10 +54,18 @@ entry:
   %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
   %tmp3.vec = load <2 x i32>* %a.tmp3.cast
   %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
+; CHECK-NOT: load
+; CHECK:      %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
 
   %tmp4 = add i32 %tmp1, %tmp2
   %tmp5 = add i32 %tmp3, %tmp4
   ret i32 %tmp5
+; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]]
+; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]]
+; CHECK-NEXT: ret i32 %[[sum2]]
 }
 
 define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
@@ -206,6 +214,154 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
   ret i64 %res
 }
 
+define <4 x i32> @test_subvec_store() {
+; CHECK: @test_subvec_store
+entry:
+  %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
+  %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+  store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
+; CHECK-NOT: store
+; CHECK:      %[[insert1:.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
+  %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+  store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>, <4 x i32> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
+  %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+  store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x i32> <i32 undef, i32 undef, i32 2, i32 2>, <4 x i32> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3
+  store i32 3, i32* %a.gep3
+; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x i32> %[[insert3]], i32 3, i32 3
+
+  %ret = load <4 x i32>* %a
+
+  ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[insert4]]
+}
+
+define <4 x i32> @test_subvec_load() {
+; CHECK: @test_subvec_load
+entry:
+  %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a
+; CHECK-NOT: store
+
+  %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
+  %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+  %first = load <2 x i32>* %a.cast0
+; CHECK-NOT: load
+; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+
+  %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
+  %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+  %second = load <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
+
+  %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
+  %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+  %third = load <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
+  %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+  ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[ret]]
+}
+
+declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
+
+define <4 x float> @test_subvec_memset() {
+; CHECK: @test_subvec_memset
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
+; CHECK-NOT: store
+; CHECK:      %[[insert1:.*]] = shufflevector <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x float> <float undef, float 0x3820202020000000, float 0x3820202020000000, float undef>, <4 x float> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x float> <float undef, float undef, float 0x3860606060000000, float 0x3860606060000000>, <4 x float> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
+; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x float> %[[insert3]], float 0x38E0E0E0E0000000, i32 3
+
+  %ret = load <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert4]]
+}
+
+define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
+; CHECK: @test_subvec_memcpy
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
+; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
+; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
+; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: %[[insert_x:.*]] = shufflevector <4 x float> %[[expand_x]], <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+  %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
+; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
+; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT: %[[insert_y:.*]] = shufflevector <4 x float> %[[expand_y]], <4 x float> %[[insert_x]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+  %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
+; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
+; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[insert_z:.*]] = shufflevector <4 x float> %[[expand_z]], <4 x float> %[[insert_y]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+  %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
+; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
+; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
+; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> %[[insert_z]], float %[[f]], i32 3
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
+; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
+; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
+
+  %ret = load <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert_f]]
+}
+
 define i32 @PR14212() {
 ; CHECK: @PR14212
 ; This caused a crash when "splitting" the load of the i32 in order to promote
@@ -222,7 +378,7 @@ entry:
 }
 
 define <2 x i8> @PR14349.1(i32 %x) {
-; CEHCK: @PR14349.1
+; CHECK: @PR14349.1
 ; The first testcase for broken SROA rewriting of split integer loads and
 ; stores due to smaller vector loads and stores. This particular test ensures
 ; that we can rewrite a split store of an integer to a store of a vector.
@@ -244,7 +400,7 @@ entry:
 }
 
 define i32 @PR14349.2(<2 x i8> %x) {
-; CEHCK: @PR14349.2
+; CHECK: @PR14349.2
 ; The first testcase for broken SROA rewriting of split integer loads and
 ; stores due to smaller vector loads and stores. This particular test ensures
 ; that we can rewrite a split load of an integer to a load of a vector.
diff --git a/test/Transforms/SROA/vectors-of-pointers.ll b/test/Transforms/SROA/vectors-of-pointers.ll
new file mode 100644
index 000000000000..7e995b9e4476
--- /dev/null
+++ b/test/Transforms/SROA/vectors-of-pointers.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -sroa
+
+; Make sure we don't crash on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @foo() {
+entry:
+  %Args.i = alloca <2 x i32*>, align 16
+  br i1 undef, label %bb0.exit158, label %if.then.i.i.i.i.i138
+
+if.then.i.i.i.i.i138:
+  unreachable
+
+bb0.exit158:
+  br i1 undef, label %bb0.exit257, label %if.then.i.i.i.i.i237
+
+if.then.i.i.i.i.i237:
+  unreachable
+
+bb0.exit257:
+  %0 = load <2 x i32*>* %Args.i, align 16
+  unreachable
+}
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
index 0b5e4152c423..3f28cb187f86 100644
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -1,7 +1,6 @@
 ; Scalar replacement was incorrectly promoting this alloca!!
 ;
-; RUN: opt < %s -scalarrepl -S | \
-; RUN:   sed "s/;.*//g" | grep "\["
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i8* @test() {
 	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
@@ -10,4 +9,4 @@ define i8* @test() {
 	store i8 0, i8* %B
 	ret i8* %C
 }
-
+; CHECK: alloca [
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index 58c5a3a0527d..8c60dceb8b07 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -1,5 +1,5 @@
-; RUN: opt -scalarrepl %s -disable-output
-; RUN: opt -scalarrepl-ssa %s -disable-output
+; RUN: opt -scalarrepl -disable-output < %s
+; RUN: opt -scalarrepl-ssa -disable-output < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index c1491345351e..7d3bcea8b857 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -40,22 +40,23 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1}
 
-!0 = metadata !{i32 589841, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !7 = metadata !{i32 1, i32 11, metadata !1, null}
-!8 = metadata !{i32 590081, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
 !9 = metadata !{i32 1, i32 18, metadata !1, null}
-!10 = metadata !{i32 590080, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 786688, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 786443, metadata !1, i32 1, i32 21, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
 !12 = metadata !{i32 2, i32 9, metadata !11, null}
 !13 = metadata !{i32 2, i32 14, metadata !11, null}
 !14 = metadata !{i32 3, i32 5, metadata !11, null}
 !15 = metadata !{i32 4, i32 5, metadata !11, null}
 !16 = metadata !{i32 5, i32 5, metadata !11, null}
+!17 = metadata !{metadata !1}
+!18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
diff --git a/test/Transforms/ScalarRepl/memcpy-align.ll b/test/Transforms/ScalarRepl/memcpy-align.ll
index a7af208f4f39..6046e1295d9e 100644
--- a/test/Transforms/ScalarRepl/memcpy-align.ll
+++ b/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -scalarrepl -S | FileCheck %s
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
 ; PR6832
 target datalayout =
 "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
index cb5101c2dd8e..05d9382cec40 100644
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -67,7 +67,7 @@ while.cond.backedge.i:                            ; preds = %if.end.i, %while.bo
 
 ; CHECK: func.exit:
 ; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
 func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
   %tmp3 = load i32* %x.i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
@@ -75,3 +75,6 @@ func.exit:                                        ; preds = %while.body.i.func.e
 }
 
 declare i32 @printf(i8* nocapture, ...) nounwind
+
+; CHECK: attributes #0 = { nounwind uwtable }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
index ffe0b1dd5f47..5c21c3bd9f34 100644
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -scalarrepl -S | FileCheck %s
+; RUN: opt -scalarrepl -S < %s | FileCheck %s
 ; Test promotion of allocas that have phis and select users.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index 056526cbd92b..d506cdfbd87a 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -1,12 +1,13 @@
-; RUN: opt < %s -scalarrepl -S | grep "load volatile"
-; RUN: opt < %s -scalarrepl -S | grep "store volatile"
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
 
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
 	store volatile i32 %T, i32* %B
+; CHECK: store volatile
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
 	%X = load volatile i32* %C
+; CHECK: load volatile
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
deleted file mode 100644
index feffb4e4c812..000000000000
--- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; Basic block #2 should not be merged into BB #3!
-;
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep "br label"
-;
-
-declare void @foo()
-
-define void @cprop_test12(i32* %data) {
-bb0:
-	%reg108 = load i32* %data		; <i32> [#uses=2]
-	%cond218 = icmp ne i32 %reg108, 5		; <i1> [#uses=1]
-	br i1 %cond218, label %bb3, label %bb2
-bb2:		; preds = %bb0
-	call void @foo( )
-	br label %bb3
-bb3:		; preds = %bb2, %bb0
-	%reg117 = phi i32 [ 110, %bb2 ], [ %reg108, %bb0 ]		; <i32> [#uses=1]
-	store i32 %reg117, i32* %data
-	ret void
-}
-
diff --git a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll b/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
deleted file mode 100644
index 88f32bc08279..000000000000
--- a/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
-; should be fixed eventually
-
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-define i32 @main(i32 %argc) {
-; <label>:0
-	br label %InlinedFunctionReturnNode
-InlinedFunctionReturnNode:		; preds = %0
-	%X = phi i32 [ 7, %0 ]		; <i32> [#uses=1]
-	%Y = add i32 %X, %argc		; <i32> [#uses=1]
-	ret i32 %Y
-}
-
diff --git a/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
index 7bffa1a8e0e2..333336de7673 100644
--- a/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
+++ b/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -simplifycfg -disable-output
+; RUN: opt -simplifycfg -disable-output < %s
 ; END.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
new file mode 100644
index 000000000000..aba08dc073a8
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
@@ -0,0 +1,21 @@
+; Basic block #2 should not be merged into BB #3!
+;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @foo()
+
+define void @cprop_test12(i32* %data) {
+bb0:
+	%reg108 = load i32* %data		; <i32> [#uses=2]
+	%cond218 = icmp ne i32 %reg108, 5		; <i1> [#uses=1]
+	br i1 %cond218, label %bb3, label %bb2
+bb2:		; preds = %bb0
+	call void @foo( )
+; CHECK: br label %bb3
+	br label %bb3
+bb3:		; preds = %bb2, %bb0
+	%reg117 = phi i32 [ 110, %bb2 ], [ %reg108, %bb0 ]		; <i32> [#uses=1]
+	store i32 %reg117, i32* %data
+	ret void
+}
+
diff --git a/test/Transforms/SimplifyCFG/PHINode.ll b/test/Transforms/SimplifyCFG/PHINode.ll
new file mode 100644
index 000000000000..25a242a55997
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PHINode.ll
@@ -0,0 +1,15 @@
+; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
+; should be fixed eventually
+
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define i32 @main(i32 %argc) {
+; <label>:0
+; CHECK-NOT: br label %InlinedFunctionReturnNode
+	br label %InlinedFunctionReturnNode
+InlinedFunctionReturnNode:		; preds = %0
+	%X = phi i32 [ 7, %0 ]		; <i32> [#uses=1]
+	%Y = add i32 %X, %argc		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
diff --git a/test/Transforms/SimplifyCFG/PR9946.ll b/test/Transforms/SimplifyCFG/PR9946.ll
index 4a61b846052e..c355a8f5cc98 100644
--- a/test/Transforms/SimplifyCFG/PR9946.ll
+++ b/test/Transforms/SimplifyCFG/PR9946.ll
@@ -1,4 +1,4 @@
-; RUN: opt  %s -simplifycfg -disable-output
+; RUN: opt -simplifycfg -disable-output < %s
 
 @foo = external constant i32
 
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index a61867fe89c7..dd2e5d1c3a77 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -44,3 +44,44 @@ join:
   ret i8 %c
 }
 
+define i8* @test4(i1* %dummy, i8* %a, i8* %b) {
+; Test that we don't speculate an arbitrarily large number of unfolded constant
+; expressions.
+; CHECK: @test4
+
+entry:
+  %cond1 = load volatile i1* %dummy
+  br i1 %cond1, label %if, label %end
+
+if:
+  %cond2 = load volatile i1* %dummy
+  br i1 %cond2, label %then, label %end
+
+then:
+  br label %end
+
+end:
+  %x1 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 1 to i8*), %then ]
+  %x2 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 2 to i8*), %then ]
+  %x3 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 3 to i8*), %then ]
+  %x4 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 4 to i8*), %then ]
+  %x5 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 5 to i8*), %then ]
+  %x6 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 6 to i8*), %then ]
+  %x7 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 7 to i8*), %then ]
+  %x8 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 8 to i8*), %then ]
+  %x9 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 9 to i8*), %then ]
+  %x10 = phi i8* [ %a, %entry ], [ %b, %if ], [ inttoptr (i64 10 to i8*), %then ]
+; CHECK-NOT: select
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+; CHECK: phi i8*
+
+  ret i8* %x10
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 8a59992f5e64..5f70465c64d4 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -777,3 +777,29 @@ return:
 ; CHECK: switch.lookup:
 ; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx
 }
+
+; Don't create a table with illegal type
+; rdar://12779436
+define i96 @illegaltype(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.default: br label %return
+return:
+  %retval.0 = phi i96 [ 15, %sw.default ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i96 %retval.0
+
+; CHECK: @illegaltype
+; CHECK-NOT: @switch.table
+; CHECK: switch i32 %c
+}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 0897c95a6778..0526883fe8f4 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplifycfg -S %s | FileCheck %s
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
 
 %0 = type { i32*, i32* }
 
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 7654d0271a9a..3e2a6237b275 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplifycfg %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
 
 define i8* @test1(i8* %x, i64 %y) nounwind {
 entry:
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 673a62bf035c..9cd709ff8ecf 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -35,7 +35,7 @@ define i32 @bar(i64 %x, i64 %y) nounwind {
 ; CHECK: @bar
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret i32 0
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 2
@@ -61,7 +61,7 @@ define void @bazz(i64 %x, i64 %y) nounwind {
 ; CHECK: @bazz
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.b() nounwind
+; CHECK-NEXT: tail call void @bees.b() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 10, i32 12
@@ -86,7 +86,7 @@ define void @quux(i64 %x, i64 %y) nounwind {
 ; CHECK: @quux
 entry:
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: tail call void @bees.a() [[NUW]]
 ; CHECK-NEXT: ret void
     %lt = icmp slt i64 %x, %y
     %qux = select i1 %lt, i32 0, i32 0
@@ -136,3 +136,6 @@ bees:
 declare void @llvm.trap() nounwind noreturn
 declare void @bees.a() nounwind
 declare void @bees.b() nounwind
+
+; CHECK: attributes [[NUW]] = { nounwind }
+; CHECK: attributes #1 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyCFG/trivial-throw.ll b/test/Transforms/SimplifyCFG/trivial-throw.ll
new file mode 100644
index 000000000000..ca2b5693e600
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/trivial-throw.ll
@@ -0,0 +1,77 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+; <rdar://problem/13360379>
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS13TestException = linkonce_odr constant [16 x i8] c"13TestException\00"
+@_ZTI13TestException = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([16 x i8]* @_ZTS13TestException, i32 0, i32 0) }
+
+define void @throw(i32 %n) #0 {
+entry:
+  %exception = call i8* @__cxa_allocate_exception(i64 1) #4
+  call void @__cxa_throw(i8* %exception, i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*), i8* null) #2
+  unreachable
+}
+
+define void @func() #0 {
+entry:
+; CHECK: func()
+; CHECK: invoke void @throw
+; CHECK-NOT: call void @throw
+  invoke void @throw(i32 42) #0
+          to label %exit unwind label %lpad
+
+lpad:
+  %tmp0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %tmp0
+
+exit:
+  invoke void @abort() #2
+          to label %invoke.cont unwind label %lpad1
+
+invoke.cont:
+  unreachable
+
+lpad1:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*)
+  %tmp2 = extractvalue { i8*, i32 } %tmp1, 1
+  %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI13TestException to i8*)) #4
+  %matches = icmp eq i32 %tmp2, %tmp3
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:
+  ret void
+
+eh.resume:
+  resume { i8*, i32 } %tmp1
+}
+
+define linkonce_odr hidden void @__clang_call_terminate(i8*) #1 {
+  %2 = call i8* @__cxa_begin_catch(i8* %0) #4
+  call void @_ZSt9terminatev() #5
+  unreachable
+}
+
+declare void @abort() #2
+
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+declare void @__cxa_end_catch()
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @_ZSt9terminatev()
+
+attributes #0 = { ssp uwtable }
+attributes #1 = { noinline noreturn nounwind }
+attributes #2 = { noreturn }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+attributes #5 = { noreturn nounwind }
diff --git a/test/Transforms/SimplifyCFG/volatile-phioper.ll b/test/Transforms/SimplifyCFG/volatile-phioper.ll
new file mode 100644
index 000000000000..164898897eff
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/volatile-phioper.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+;
+; rdar:13349374
+;
+; SimplifyCFG should not eliminate blocks with volatile stores.
+; Essentially, volatile needs to be backdoor that tells the optimizer
+; it can no longer use language standard as an excuse. The compiler
+; needs to expose the volatile access to the platform.
+;
+; CHECK: @test
+; CHECK: entry:
+; CHECK: @Trace
+; CHECK: while.body:
+; CHECK: store volatile
+; CHECK: end:
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @test(i8** nocapture %PeiServices) #0 {
+entry:
+  %call = tail call i32 (...)* @Trace() #2
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %while.body, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 (...)* @Trace() #2
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %if.then, %while.body
+  %Addr.017 = phi i8* [ %incdec.ptr, %while.body ], [ null, %if.then ], [ null, %entry ]
+  %x.016 = phi i8 [ %inc, %while.body ], [ 0, %if.then ], [ 0, %entry ]
+  %inc = add i8 %x.016, 1
+  %incdec.ptr = getelementptr inbounds i8* %Addr.017, i64 1
+  store volatile i8 %x.016, i8* %Addr.017, align 1
+  %0 = ptrtoint i8* %incdec.ptr to i64
+  %1 = trunc i64 %0 to i32
+  %cmp = icmp ult i32 %1, 4096
+  br i1 %cmp, label %while.body, label %end
+
+end:
+  ret void
+}
+declare i32 @Trace(...) #1
+
+attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #1 = { "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+attributes #2 = { nounwind }
+
+!0 = metadata !{i32 1039}
diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
deleted file mode 100644
index 73eb05b05e34..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S > %t
-; RUN: grep noalias %t | count 2
-; RUN: grep nocapture %t | count 3
-; RUN: grep nounwind %t | count 3
-; RUN: grep readonly %t | count 1
-
-declare i8* @fopen(i8*, i8*)
-declare i8 @strlen(i8*)
-declare i32* @realloc(i32*, i32)
-
-; Test deliberately wrong declaration
-declare i32 @strcpy(...)
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll b/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
deleted file mode 100644
index ac89199b0ec1..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -inline -simplify-libcalls -functionattrs | \
-; RUN:   llvm-dis | grep nocapture | count 2
-; Check that nocapture attributes are added when run after an SCC pass.
-; PR3520
-
-define i32 @use(i8* %x) nounwind readonly {
-entry:
-	%0 = tail call i64 @strlen(i8* %x) nounwind readonly		; <i64> [#uses=1]
-	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
-	ret i32 %1
-}
-
-declare i64 @strlen(i8*) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll b/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
deleted file mode 100644
index f67bae74f503..000000000000
--- a/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt -simplify-libcalls %s -S -o - | FileCheck %s
-; PR7265
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-%union.anon = type { i32, [4 x i8] }
-
-@.str = private constant [3 x i8] c"%s\00"        ; <[3 x i8]*> [#uses=2]
-
-define void @CopyEventArg(%union.anon* %ev) nounwind {
-entry:
-  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind ; <i32> [#uses=0]
-; CHECK: bitcast %union.anon* %ev to i8*
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
-  ret void
-}
-
-declare i32 @sprintf(i8*, i8*, ...)
-
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
deleted file mode 100644
index 6aecbeacd7e6..000000000000
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; Test that FFSOpt works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; CHECK-NOT: call{{.*}}@ffs
-
-@non_const = external global i32		; <i32*> [#uses=1]
-
-declare i32 @ffs(i32)
-
-declare i32 @ffsl(i32)
-
-declare i32 @ffsll(i64)
-
-define i32 @main() {
-	%arg = load i32* @non_const		; <i32> [#uses=1]
-	%val0 = call i32 @ffs( i32 %arg )		; <i32> [#uses=1]
-	%val1 = call i32 @ffs( i32 1 )		; <i32> [#uses=1]
-	%val2 = call i32 @ffs( i32 2048 )		; <i32> [#uses=1]
-	%val3 = call i32 @ffsl( i32 65536 )		; <i32> [#uses=1]
-	%val4 = call i32 @ffsll( i64 1024 )		; <i32> [#uses=1]
-	%val5 = call i32 @ffsll( i64 17179869184 )		; <i32> [#uses=1]
-	%val6 = call i32 @ffsll( i64 1152921504606846976 )		; <i32> [#uses=1]
-	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
-	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
-	%rslt3 = add i32 %val5, %val6		; <i32> [#uses=1]
-	%rslt4 = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
-	%rslt5 = add i32 %rslt4, %rslt3		; <i32> [#uses=2]
-	%rslt6 = add i32 %rslt5, %val0		; <i32> [#uses=0]
-	ret i32 %rslt5
-}
-
-
-; PR4206
-define i32 @a(i64) nounwind {
-        %2 = call i32 @ffsll(i64 %0)            ; <i32> [#uses=1]
-        ret i32 %2
-}
-
-; PR13028
-define i32 @b() nounwind {
-  %ffs = call i32 @ffsll(i64 0)
-  ret i32 %ffs
-; CHECK: @b
-; CHECK-NEXT: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/FPrintF.ll b/test/Transforms/SimplifyLibCalls/FPrintF.ll
deleted file mode 100644
index 51733e4a1ef6..000000000000
--- a/test/Transforms/SimplifyLibCalls/FPrintF.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; Test that the FPrintFOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*fprintf"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@str = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-@chr = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
-@hello = constant [13 x i8] c"hello world\0A\00"		; <[13 x i8]*> [#uses=1]
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=3]
-
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
-
-define i32 @foo() {
-entry:
-	%tmp.1 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.0 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.1, i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
-	%tmp.4 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.3 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.4, i8* getelementptr ([3 x i8]* @str, i32 0, i32 0), i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
-	%tmp.8 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.7 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.8, i8* getelementptr ([3 x i8]* @chr, i32 0, i32 0), i32 33 )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/FPuts.ll b/test/Transforms/SimplifyLibCalls/FPuts.ll
deleted file mode 100644
index aa01aba2656c..000000000000
--- a/test/Transforms/SimplifyLibCalls/FPuts.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; Test that the FPutsOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*fputs"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=1]
-@empty = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@len1 = constant [2 x i8] c"A\00"		; <[2 x i8]*> [#uses=1]
-@long = constant [7 x i8] c"hello\0A\00"		; <[7 x i8]*> [#uses=1]
-
-declare i32 @fputs(i8*, %struct._IO_FILE*)
-
-define i32 @main() {
-entry:
-	%out = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=3]
-	%s1 = getelementptr [1 x i8]* @empty, i32 0, i32 0		; <i8*> [#uses=1]
-	%s2 = getelementptr [2 x i8]* @len1, i32 0, i32 0		; <i8*> [#uses=1]
-	%s3 = getelementptr [7 x i8]* @long, i32 0, i32 0		; <i8*> [#uses=1]
-	%a = call i32 @fputs( i8* %s1, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%b = call i32 @fputs( i8* %s2, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%c = call i32 @fputs( i8* %s3, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/IsDigit.ll b/test/Transforms/SimplifyLibCalls/IsDigit.ll
deleted file mode 100644
index 51a769d9bb3d..000000000000
--- a/test/Transforms/SimplifyLibCalls/IsDigit.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that the IsDigitOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep call
-
-declare i32 @isdigit(i32)
-
-declare i32 @isascii(i32)
-
-define i32 @main() {
-	%val1 = call i32 @isdigit( i32 47 )		; <i32> [#uses=1]
-	%val2 = call i32 @isdigit( i32 48 )		; <i32> [#uses=1]
-	%val3 = call i32 @isdigit( i32 57 )		; <i32> [#uses=1]
-	%val4 = call i32 @isdigit( i32 58 )		; <i32> [#uses=1]
-	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
-	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
-	%sum = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
-	%rslt = call i32 @isdigit( i32 %sum )		; <i32> [#uses=1]
-	%tmp = call i32 @isascii( i32 %rslt )		; <i32> [#uses=1]
-	ret i32 %tmp
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
deleted file mode 100644
index 489c993f2110..000000000000
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-@str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
-@str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
-
-; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
-
-declare i32 @printf(i8*, ...)
-
-; CHECK: define void @f0
-; CHECK-NOT: printf
-; CHECK: }
-define void @f0() {
-entry:
-        %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str, i32 0, i32 0) )         ; <i32> [#uses=0]
-        ret void
-}
-
-; CHECK: define void @f1
-; CHECK-NOT: printf
-; CHECK: }
-define void @f1() {
-entry:
-        %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([2 x i8]* @str1, i32 0, i32 0) )         ; <i32> [#uses=0]
-        ret void
-}
-
-; Verify that we don't turn this into a putchar call (thus changing the return
-; value).
-;
-; CHECK: define i32 @f2
-; CHECK: printf
-; CHECK: }
-define i32 @f2() {
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @str1, i32 0, i32 0))
-  ret i32 %call
-}
diff --git a/test/Transforms/SimplifyLibCalls/Puts.ll b/test/Transforms/SimplifyLibCalls/Puts.ll
deleted file mode 100644
index 48431434cc61..000000000000
--- a/test/Transforms/SimplifyLibCalls/Puts.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Test that the PutsOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@.str = private constant [1 x i8] zeroinitializer
-
-declare i32 @puts(i8*)
-
-define void @foo() {
-entry:
-; CHECK: call i32 @putchar(i32 10)
-  %call = call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0))
-  ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/SPrintF.ll b/test/Transforms/SimplifyLibCalls/SPrintF.ll
deleted file mode 100644
index 514a7d9f6eee..000000000000
--- a/test/Transforms/SimplifyLibCalls/SPrintF.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; Test that the SPrintFOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*sprintf"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-@fmt1 = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-@fmt2 = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
-
-declare i32 @sprintf(i8*, i8*, ...)
-
-declare i32 @puts(i8*)
-
-define i32 @foo(i8* %p) {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=7]
-	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=2]
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%nh_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%fmt1_p = getelementptr [3 x i8]* @fmt1, i32 0, i32 0		; <i8*> [#uses=2]
-	%fmt2_p = getelementptr [3 x i8]* @fmt2, i32 0, i32 0		; <i8*> [#uses=1]
-	store i8 0, i8* %target_p
-	%r1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %hello_p )		; <i32> [#uses=1]
-	%r2 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %null_p )		; <i32> [#uses=1]
-	%r3 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %nh_p )		; <i32> [#uses=1]
-	%r4 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %hello_p )		; <i32> [#uses=1]
-	%r4.1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %p )		; <i32> [#uses=1]
-	%r5 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt2_p, i32 82 )		; <i32> [#uses=1]
-	%r6 = add i32 %r1, %r2		; <i32> [#uses=1]
-	%r7 = add i32 %r3, %r6		; <i32> [#uses=1]
-	%r8 = add i32 %r5, %r7		; <i32> [#uses=1]
-	%r9 = add i32 %r8, %r4		; <i32> [#uses=1]
-	%r10 = add i32 %r9, %r4.1		; <i32> [#uses=1]
-	ret i32 %r10
-}
diff --git a/test/Transforms/SimplifyLibCalls/ToAscii.ll b/test/Transforms/SimplifyLibCalls/ToAscii.ll
deleted file mode 100644
index aef47333b3c3..000000000000
--- a/test/Transforms/SimplifyLibCalls/ToAscii.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that the ToAsciiOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*toascii"
-
-declare i32 @toascii(i32)
-
-define i32 @main() {
-	%val1 = call i32 @toascii( i32 1 )		; <i32> [#uses=1]
-	%val2 = call i32 @toascii( i32 0 )		; <i32> [#uses=1]
-	%val3 = call i32 @toascii( i32 127 )		; <i32> [#uses=1]
-	%val4 = call i32 @toascii( i32 128 )		; <i32> [#uses=1]
-	%val5 = call i32 @toascii( i32 255 )		; <i32> [#uses=1]
-	%val6 = call i32 @toascii( i32 256 )		; <i32> [#uses=1]
-	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
-	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
-	%rslt3 = add i32 %val5, %val6		; <i32> [#uses=1]
-	%rslt4 = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
-	%rslt5 = add i32 %rslt4, %rslt3		; <i32> [#uses=1]
-	ret i32 %rslt5
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/abs.ll b/test/Transforms/SimplifyLibCalls/abs.ll
deleted file mode 100644
index 3934a5b98f74..000000000000
--- a/test/Transforms/SimplifyLibCalls/abs.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "select i1 %ispos"
-; PR2337
-
-define i32 @test(i32 %x) {
-entry:
-	%call = call i32 @abs( i32 %x )		; <i32> [#uses=1]
-	ret i32 %call
-}
-
-declare i32 @abs(i32)
-
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
deleted file mode 100644
index 6a8ce8c3881d..000000000000
--- a/test/Transforms/SimplifyLibCalls/cos.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define double @foo(double %d) nounwind readnone {
-; CHECK: @foo
-    %1 = fsub double -0.000000e+00, %d
-    %2 = call double @cos(double %1) nounwind readnone
-; CHECK: call double @cos(double %d)
-    ret double %2
-}
-
-declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/debug-line.ll b/test/Transforms/SimplifyLibCalls/debug-line.ll
deleted file mode 100644
index b668e4b9d342..000000000000
--- a/test/Transforms/SimplifyLibCalls/debug-line.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt -simplify-libcalls -S < %s | FileCheck %s
-
-
-@.str = private constant [3 x i8] c"%c\00"
-
-define void @foo() nounwind ssp {
-;CHECK: call i32 @putchar{{.+}} !dbg
-  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
-  ret void, !dbg !7
-}
-
-declare i32 @printf(i8*, ...)
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"m.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"m.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null}
-!5 = metadata !{i32 5, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !0, i32 4, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 6, i32 1, metadata !6, null}
-
diff --git a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
deleted file mode 100644
index b4ab8b4ceb9d..000000000000
--- a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
+++ /dev/null
@@ -1,333 +0,0 @@
-; RUN: opt  < %s -simplify-libcalls -enable-double-float-shrink -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define float @acos_test(float %f) nounwind readnone {
-; CHECK: acos_test
-    %conv = fpext float %f to double
-    %call = call double @acos(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @acosf(float %f)
-}
-
-define double @acos_test2(float %f) nounwind readnone {
-; CHECK: acos_test2
-    %conv = fpext float %f to double
-    %call = call double @acos(double %conv)
-    ret double %call
-; CHECK: call double @acos(double %conv)
-}
-
-define float @acosh_test(float %f) nounwind readnone {
-; CHECK: acosh_test
-    %conv = fpext float %f to double
-    %call = call double @acosh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @acoshf(float %f)
-}
-
-define double @acosh_test2(float %f) nounwind readnone {
-; CHECK: acosh_test2
-    %conv = fpext float %f to double
-    %call = call double @acosh(double %conv)
-    ret double %call
-; CHECK: call double @acosh(double %conv)
-}
-
-define float @asin_test(float %f) nounwind readnone {
-; CHECK: asin_test
-    %conv = fpext float %f to double
-    %call = call double @asin(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @asinf(float %f)
-}
-
-define double @asin_test2(float %f) nounwind readnone {
-; CHECK: asin_test2
-    %conv = fpext float %f to double
-    %call = call double @asin(double %conv)
-    ret double %call
-; CHECK: call double @asin(double %conv)
-}
-
-define float @asinh_test(float %f) nounwind readnone {
-; CHECK: asinh_test
-    %conv = fpext float %f to double
-    %call = call double @asinh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @asinhf(float %f)
-}
-
-define double @asinh_test2(float %f) nounwind readnone {
-; CHECK: asinh_test2
-    %conv = fpext float %f to double
-    %call = call double @asinh(double %conv)
-    ret double %call
-; CHECK: call double @asinh(double %conv)
-}
-
-define float @atan_test(float %f) nounwind readnone {
-; CHECK: atan_test
-    %conv = fpext float %f to double
-    %call = call double @atan(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @atanf(float %f)
-}
-
-define double @atan_test2(float %f) nounwind readnone {
-; CHECK: atan_test2
-    %conv = fpext float %f to double
-    %call = call double @atan(double %conv)
-    ret double %call
-; CHECK: call double @atan(double %conv)
-}
-define float @atanh_test(float %f) nounwind readnone {
-; CHECK: atanh_test
-    %conv = fpext float %f to double
-    %call = call double @atanh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @atanhf(float %f)
-}
-
-define double @atanh_test2(float %f) nounwind readnone {
-; CHECK: atanh_test2
-    %conv = fpext float %f to double
-    %call = call double @atanh(double %conv)
-    ret double %call
-; CHECK: call double @atanh(double %conv)
-}
-define float @cbrt_test(float %f) nounwind readnone {
-; CHECK: cbrt_test
-    %conv = fpext float %f to double
-    %call = call double @cbrt(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @cbrtf(float %f)
-}
-
-define double @cbrt_test2(float %f) nounwind readnone {
-; CHECK: cbrt_test2
-    %conv = fpext float %f to double
-    %call = call double @cbrt(double %conv)
-    ret double %call
-; CHECK: call double @cbrt(double %conv)
-}
-define float @exp_test(float %f) nounwind readnone {
-; CHECK: exp_test
-    %conv = fpext float %f to double
-    %call = call double @exp(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @expf(float %f)
-}
-
-define double @exp_test2(float %f) nounwind readnone {
-; CHECK: exp_test2
-    %conv = fpext float %f to double
-    %call = call double @exp(double %conv)
-    ret double %call
-; CHECK: call double @exp(double %conv)
-}
-define float @expm1_test(float %f) nounwind readnone {
-; CHECK: expm1_test
-    %conv = fpext float %f to double
-    %call = call double @expm1(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @expm1f(float %f)
-}
-
-define double @expm1_test2(float %f) nounwind readnone {
-; CHECK: expm1_test2
-    %conv = fpext float %f to double
-    %call = call double @expm1(double %conv)
-    ret double %call
-; CHECK: call double @expm1(double %conv)
-}
-define float @exp10_test(float %f) nounwind readnone {
-; CHECK: exp10_test
-    %conv = fpext float %f to double
-    %call = call double @exp10(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @exp10f(float %f)
-}
-
-define double @exp10_test2(float %f) nounwind readnone {
-; CHECK: exp10_test2
-    %conv = fpext float %f to double
-    %call = call double @exp10(double %conv)
-    ret double %call
-; CHECK: call double @exp10(double %conv)
-}
-define float @log_test(float %f) nounwind readnone {
-; CHECK: log_test
-    %conv = fpext float %f to double
-    %call = call double @log(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @logf(float %f)
-}
-
-define double @log_test2(float %f) nounwind readnone {
-; CHECK: log_test2
-    %conv = fpext float %f to double
-    %call = call double @log(double %conv)
-    ret double %call
-; CHECK: call double @log(double %conv)
-}
-define float @log10_test(float %f) nounwind readnone {
-; CHECK: log10_test
-    %conv = fpext float %f to double
-    %call = call double @log10(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @log10f(float %f)
-}
-
-define double @log10_test2(float %f) nounwind readnone {
-; CHECK: log10_test2
-    %conv = fpext float %f to double
-    %call = call double @log10(double %conv)
-    ret double %call
-; CHECK: call double @log10(double %conv)
-}
-define float @log1p_test(float %f) nounwind readnone {
-; CHECK: log1p_test
-    %conv = fpext float %f to double
-    %call = call double @log1p(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @log1pf(float %f)
-}
-
-define double @log1p_test2(float %f) nounwind readnone {
-; CHECK: log1p_test2
-    %conv = fpext float %f to double
-    %call = call double @log1p(double %conv)
-    ret double %call
-; CHECK: call double @log1p(double %conv)
-}
-define float @log2_test(float %f) nounwind readnone {
-; CHECK: log2_test
-    %conv = fpext float %f to double
-    %call = call double @log2(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @log2f(float %f)
-}
-
-define double @log2_test2(float %f) nounwind readnone {
-; CHECK: log2_test2
-    %conv = fpext float %f to double
-    %call = call double @log2(double %conv)
-    ret double %call
-; CHECK: call double @log2(double %conv)
-}
-define float @logb_test(float %f) nounwind readnone {
-; CHECK: logb_test
-    %conv = fpext float %f to double
-    %call = call double @logb(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @logbf(float %f)
-}
-
-define double @logb_test2(float %f) nounwind readnone {
-; CHECK: logb_test2
-    %conv = fpext float %f to double
-    %call = call double @logb(double %conv)
-    ret double %call
-; CHECK: call double @logb(double %conv)
-}
-define float @sin_test(float %f) nounwind readnone {
-; CHECK: sin_test
-    %conv = fpext float %f to double
-    %call = call double @sin(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @sinf(float %f)
-}
-
-define double @sin_test2(float %f) nounwind readnone {
-; CHECK: sin_test2
-    %conv = fpext float %f to double
-    %call = call double @sin(double %conv)
-    ret double %call
-; CHECK: call double @sin(double %conv)
-}
-define float @sqrt_test(float %f) nounwind readnone {
-; CHECK: sqrt_test
-    %conv = fpext float %f to double
-    %call = call double @sqrt(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @sqrtf(float %f)
-}
-
-define double @sqrt_test2(float %f) nounwind readnone {
-; CHECK: sqrt_test2
-    %conv = fpext float %f to double
-    %call = call double @sqrt(double %conv)
-    ret double %call
-; CHECK: call double @sqrt(double %conv)
-}
-define float @tan_test(float %f) nounwind readnone {
-; CHECK: tan_test
-    %conv = fpext float %f to double
-    %call = call double @tan(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @tanf(float %f)
-}
-
-define double @tan_test2(float %f) nounwind readnone {
-; CHECK: tan_test2
-    %conv = fpext float %f to double
-    %call = call double @tan(double %conv)
-    ret double %call
-; CHECK: call double @tan(double %conv)
-}
-define float @tanh_test(float %f) nounwind readnone {
-; CHECK: tanh_test
-    %conv = fpext float %f to double
-    %call = call double @tanh(double %conv)
-    %conv1 = fptrunc double %call to float
-    ret float %conv1
-; CHECK: call float @tanhf(float %f)
-}
-
-define double @tanh_test2(float %f) nounwind readnone {
-; CHECK: tanh_test2
-    %conv = fpext float %f to double
-    %call = call double @tanh(double %conv)
-    ret double %call
-; CHECK: call double @tanh(double %conv)
-}
-
-declare double @tanh(double) nounwind readnone
-declare double @tan(double) nounwind readnone
-declare double @sqrt(double) nounwind readnone
-declare double @sin(double) nounwind readnone
-declare double @log2(double) nounwind readnone
-declare double @log1p(double) nounwind readnone
-declare double @log10(double) nounwind readnone
-declare double @log(double) nounwind readnone
-declare double @logb(double) nounwind readnone
-declare double @exp10(double) nounwind readnone
-declare double @expm1(double) nounwind readnone
-declare double @exp(double) nounwind readnone
-declare double @cbrt(double) nounwind readnone
-declare double @atanh(double) nounwind readnone
-declare double @atan(double) nounwind readnone
-declare double @acos(double) nounwind readnone
-declare double @acosh(double) nounwind readnone
-declare double @asin(double) nounwind readnone
-declare double @asinh(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/exp2.ll b/test/Transforms/SimplifyLibCalls/exp2.ll
deleted file mode 100644
index a5927757cf93..000000000000
--- a/test/Transforms/SimplifyLibCalls/exp2.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "call.*ldexp" | count 4
-; rdar://5852514
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-define double @t1(i32 %x) nounwind  {
-entry:
-	%tmp12 = sitofp i32 %x to double		; <double> [#uses=1]
-	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
-	ret double %exp2
-}
-
-define float @t4(i8 zeroext  %x) nounwind  {
-entry:
-	%tmp12 = uitofp i8 %x to float		; <float> [#uses=1]
-	%tmp3 = tail call float @exp2f( float %tmp12 ) nounwind readonly 		; <float> [#uses=1]
-	ret float %tmp3
-}
-
-declare float @exp2f(float) nounwind readonly 
-
-define double @t3(i16 zeroext  %x) nounwind  {
-entry:
-	%tmp12 = uitofp i16 %x to double		; <double> [#uses=1]
-	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
-	ret double %exp2
-}
-
-define double @t2(i16 signext  %x) nounwind  {
-entry:
-	%tmp12 = sitofp i16 %x to double		; <double> [#uses=1]
-	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
-	ret double %exp2
-}
-
-declare double @exp2(double)
-
diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
index aecb887beb3a..ad54c3e38f13 100644
--- a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
+++ b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplify-libcalls -instcombine %s | FileCheck %s
+; RUN: opt -S -simplify-libcalls -instcombine < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
deleted file mode 100644
index 93c62c20023d..000000000000
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
-
-; DO-SIMPLIFY: call float @floorf(
-; DO-SIMPLIFY: call float @ceilf(
-; DO-SIMPLIFY: call float @roundf(
-; DO-SIMPLIFY: call float @nearbyintf(
-; DO-SIMPLIFY: call float @truncf(
-; DO-SIMPLIFY: call float @fabsf(
-
-; C89-SIMPLIFY: call float @floorf(
-; C89-SIMPLIFY: call float @ceilf(
-; C89-SIMPLIFY: call double @round(
-; C89-SIMPLIFY: call double @nearbyint(
-
-; DONT-SIMPLIFY: call double @floor(
-; DONT-SIMPLIFY: call double @ceil(
-; DONT-SIMPLIFY: call double @round(
-; DONT-SIMPLIFY: call double @nearbyint(
-; DONT-SIMPLIFY: call double @trunc(
-; DONT-SIMPLIFY: call double @fabs(
-
-declare double @floor(double)
-
-declare double @ceil(double)
-
-declare double @round(double)
-
-declare double @nearbyint(double)
-
-declare double @trunc(double)
-
-declare double @fabs(double)
-
-define float @test_floor(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-        ; --> floorf
-	%E = call double @floor( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_ceil(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-	; --> ceilf
-        %E = call double @ceil( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_round(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-	; --> roundf
-        %E = call double @round( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_nearbyint(float %C) {
-	%D = fpext float %C to double		; <double> [#uses=1]
-	; --> nearbyintf
-        %E = call double @nearbyint( double %D )		; <double> [#uses=1]
-	%F = fptrunc double %E to float		; <float> [#uses=1]
-	ret float %F
-}
-
-define float @test_trunc(float %C) {
-	%D = fpext float %C to double
-	; --> truncf
-        %E = call double @trunc(double %D)
-	%F = fptrunc double %E to float
-	ret float %F
-}
-
-define float @test_fabs(float %C) {
-	%D = fpext float %C to double
-	; --> fabsf
-        %E = call double @fabs(double %D)
-	%F = fptrunc double %E to float
-	ret float %F
-}
diff --git a/test/Transforms/SimplifyLibCalls/fwrite.ll b/test/Transforms/SimplifyLibCalls/fwrite.ll
deleted file mode 100644
index f0f3dcaac63e..000000000000
--- a/test/Transforms/SimplifyLibCalls/fwrite.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-%FILE = type { i32 }
-
-@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
-
-define i64 @foo(%FILE* %f) {
-; CHECK: %retval = call i64 @fwrite
-  %retval = call i64 @fwrite(i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), i64 1, i64 1, %FILE* %f)
-  ret i64 %retval
-}
-
-declare i64 @fwrite(i8*, i64, i64, %FILE *)
diff --git a/test/Transforms/SimplifyLibCalls/iprintf.ll b/test/Transforms/SimplifyLibCalls/iprintf.ll
deleted file mode 100644
index 7f036fe3ab8b..000000000000
--- a/test/Transforms/SimplifyLibCalls/iprintf.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S -o %t
-; RUN: FileCheck < %t %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "xcore-xmos-elf"
-
-@.str = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
-
-; Verify printf with no floating point arguments is transformed to iprintf
-define i32 @f0(i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f0
-; CHECK: @iprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)		; <i32> [#uses=0]
-	ret i32 %0
-}
-
-; Verify we don't turn this into an iprintf call
-define void @f1(double %x) nounwind {
-entry:
-; CHECK: define void @f1
-; CHECK: @printf
-; CHECK: }
-	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x) nounwind		; <i32> [#uses=0]
-	ret void
-}
-
-; Verify sprintf with no floating point arguments is transformed to siprintf
-define i32 @f2(i8* %p, i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f2
-; CHECK: @siprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
-	ret i32 %0
-}
-
-; Verify we don't turn this into an siprintf call
-define i32 @f3(i8* %p, double %x) nounwind {
-entry:
-; CHECK: define i32 @f3
-; CHECK: @sprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
-	ret i32 %0
-}
-
-; Verify fprintf with no floating point arguments is transformed to fiprintf
-define i32 @f4(i8* %p, i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f4
-; CHECK: @fiprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
-	ret i32 %0
-}
-
-; Verify we don't turn this into an fiprintf call
-define i32 @f5(i8* %p, double %x) nounwind {
-entry:
-; CHECK: define i32 @f5
-; CHECK: @fprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
-	ret i32 %0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-declare i32 @sprintf(i8* nocapture, i8* nocapture, ...) nounwind
-declare i32 @fprintf(i8* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/Transforms/SimplifyLibCalls/osx-names.ll b/test/Transforms/SimplifyLibCalls/osx-names.ll
deleted file mode 100644
index e321d1dd3171..000000000000
--- a/test/Transforms/SimplifyLibCalls/osx-names.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; <rdar://problem/9815881>
-; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
-; Make sure we use the correct names.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.7.2"
-
-%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-%struct.__sbuf = type { i8*, i32 }
-%struct.__sFILEX = type opaque
-
-@.str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
-@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
-
-define void @test1(%struct.__sFILE* %stream) nounwind {
-; CHECK: define void @test1
-; CHECK: call i32 @"fwrite$UNIX2003"
-  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
-  ret void
-}
-
-define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
-; CHECK: define void @test2
-; CHECK: call i32 @"fputs$UNIX2003"
-  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
-  ret void
-}
-
-declare i32 @fprintf(%struct.__sFILE*, i8*, ...) nounwind
diff --git a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll b/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
deleted file mode 100644
index 0480fdda8916..000000000000
--- a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; rdar://7251832
-
-; SimplifyLibcalls should optimize pow(x, 0.5) to sqrt plus code to handle
-; special cases. The readonly attribute on the call should be preserved.
-
-; CHECK: define float @foo(float %x) nounwind {
-; CHECK:   %sqrtf = call float @sqrtf(float %x) nounwind readonly
-; CHECK:   %fabsf = call float @fabsf(float %sqrtf) nounwind readonly
-; CHECK:   %1 = fcmp oeq float %x, 0xFFF0000000000000
-; CHECK:   %retval = select i1 %1, float 0x7FF0000000000000, float %fabsf
-; CHECK:   ret float %retval
-
-define float @foo(float %x) nounwind {
-  %retval = call float @powf(float %x, float 0.5)
-  ret float %retval
-}
-
-; CHECK: define double @doo(double %x) nounwind {
-; CHECK:   %sqrt = call double @sqrt(double %x) nounwind readonly
-; CHECK:   %fabs = call double @fabs(double %sqrt) nounwind readonly
-; CHECK:   %1 = fcmp oeq double %x, 0xFFF0000000000000
-; CHECK:   %retval = select i1 %1, double 0x7FF0000000000000, double %fabs
-; CHECK:   ret double %retval
-; CHECK: }
-
-define double @doo(double %x) nounwind {
-  %retval = call double @pow(double %x, double 0.5)
-  ret double %retval
-}
-
-declare float @powf(float, float) nounwind readonly
-declare double @pow(double, double) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/pow2.ll b/test/Transforms/SimplifyLibCalls/pow2.ll
deleted file mode 100644
index f0964e7d6daa..000000000000
--- a/test/Transforms/SimplifyLibCalls/pow2.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Testcase for calls to the standard C "pow" function
-;
-; RUN: opt < %s -simplify-libcalls -S | not grep "call .pow"
-
-
-declare double @pow(double, double)
-declare float @powf(float, float)
-
-define double @test1(double %X) {
-	%Y = call double @pow( double %X, double 0.000000e+00 )		; <double> [#uses=1]
-	ret double %Y
-}
-
-define double @test2(double %X) {
-	%Y = call double @pow( double %X, double -0.000000e+00 )		; <double> [#uses=1]
-	ret double %Y
-}
-
-define double @test3(double %X) {
-	%Y = call double @pow( double 1.000000e+00, double %X )		; <double> [#uses=1]
-	ret double %Y
-}
-
-define double @test4(double %X) {
-	%Y = call double @pow( double %X, double 2.0)
-	ret double %Y
-}
-
-define float @test4f(float %X) {
-	%Y = call float @powf( float %X, float 2.0)
-	ret float %Y
-}
-
-define float @test5f(float %X) {
-	%Y = call float @powf(float 2.0, float %X)  ;; exp2
-	ret float %Y
-}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index 3965c3782276..7de5a028054a 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -strip-dead-debug-info -disable-output %s
+; RUN: opt -strip-dead-debug-info -disable-output < %s
 define i32 @foo() nounwind ssp {
 entry:
   ret i32 0, !dbg !8
diff --git a/test/Transforms/StripSymbols/block-address.ll b/test/Transforms/StripSymbols/block-address.ll
index d22c6b1b157c..113d4d94fa40 100644
--- a/test/Transforms/StripSymbols/block-address.ll
+++ b/test/Transforms/StripSymbols/block-address.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -strip -S | FileCheck %s
+; RUN: opt -strip -S < %s | FileCheck %s
 ; PR10286
 
 @main_addrs = constant [2 x i8*] [i8* blockaddress(@f, %FOO), i8* blockaddress(@f, %BAR)]
diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll
index 5b5dbcc225c1..83d98b84ea70 100644
--- a/test/Transforms/TailCallElim/ackermann.ll
+++ b/test/Transforms/TailCallElim/ackermann.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; This function contains two tail calls, which should be eliminated
 ; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim"
 
diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
index e4f8b483c3c0..97e67b26424d 100644
--- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
+++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "call i32 @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 declare void @bar(i32*)
 
@@ -7,6 +6,7 @@ define i32 @foo(i32 %N) {
 	%A = alloca i32, i32 %N		; <i32*> [#uses=2]
 	store i32 17, i32* %A
 	call void @bar( i32* %A )
+; CHECK: tail call i32 @foo
 	%X = tail call i32 @foo( i32 %N )		; <i32> [#uses=1]
 	ret i32 %X
 }
diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll
index 42ac2f9dc4b9..f5b87f27644d 100644
--- a/test/Transforms/TailCallElim/dup_tail.ll
+++ b/test/Transforms/TailCallElim/dup_tail.ll
@@ -1,5 +1,8 @@
+; REQUIRES: asserts
 ; Duplicate the return into if.end to enable TCE.
-; RUN: opt %s -tailcallelim -stats -disable-output 2>&1 | grep "Number of return duplicated"
+; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK: Number of return duplicated
 
 define i32 @fib(i32 %n) nounwind ssp {
 entry:
diff --git a/test/Transforms/TailCallElim/intervening-inst.ll b/test/Transforms/TailCallElim/intervening-inst.ll
index 0c40bd5dc50d..10dffbd69425 100644
--- a/test/Transforms/TailCallElim/intervening-inst.ll
+++ b/test/Transforms/TailCallElim/intervening-inst.ll
@@ -1,5 +1,5 @@
 ; This function contains intervening instructions which should be moved out of the way
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @Test(i32 %X) {
 entry:
@@ -10,6 +10,7 @@ then.0:		; preds = %entry
 	ret i32 %tmp.4
 endif.0:		; preds = %entry
 	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.8 = call i32 @Test( i32 %tmp.10 )		; <i32> [#uses=1]
 	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
 	ret i32 %tmp.8
diff --git a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
index a556ddb6eb1d..741f5848bc67 100644
--- a/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
+++ b/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
@@ -1,4 +1,4 @@
-; RUN: opt -tailcallelim %s -S | FileCheck %s
+; RUN: opt -tailcallelim -S < %s | FileCheck %s
 ; PR615
 
 declare void @bar(i32*)
diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll
index 87cb9dd427b4..e49d87cc4b59 100644
--- a/test/Transforms/TailCallElim/nocapture.ll
+++ b/test/Transforms/TailCallElim/nocapture.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -tailcallelim -S | FileCheck %s
+; RUN: opt -tailcallelim -S < %s | FileCheck %s
 ; XFAIL: *
 
 declare void @use(i8* nocapture, i8* nocapture)
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 7f5c36e4a207..53c65dab101b 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 ; PR4323
 
 ; Several cases where tail call elimination should move the load above the call,
@@ -21,6 +21,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -47,6 +48,7 @@ unwind:		; preds = %else
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -66,6 +68,7 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -94,6 +97,7 @@ unwind:		; preds = %else
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%first = load i32* %a_arg		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
 	%second = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
diff --git a/test/Transforms/TailCallElim/return_constant.ll b/test/Transforms/TailCallElim/return_constant.ll
index 48e5641bb57a..e99e57e1457d 100644
--- a/test/Transforms/TailCallElim/return_constant.ll
+++ b/test/Transforms/TailCallElim/return_constant.ll
@@ -1,7 +1,7 @@
 ; Though this case seems to be fairly unlikely to occur in the wild, someone
 ; plunked it into the demo script, so maybe they care about it.
 ;
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 define i32 @aaa(i32 %c) {
 entry:
@@ -9,6 +9,7 @@ entry:
 	br i1 %tmp.1, label %return, label %else
 else:		; preds = %entry
 	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
+; CHECK-NOT: call
 	%tmp.3 = call i32 @aaa( i32 %tmp.5 )		; <i32> [#uses=0]
 	ret i32 0
 return:		; preds = %entry
diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
index 3d01d1709952..7049e4d588d4 100644
--- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
+++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -tailcallelim -S | \
-; RUN:    grep "tail call void @foo"
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
 
 
 declare void @foo()
 
 define void @bar() {
-	call void @foo( )
+; CHECK: tail call void @foo()
+	call void @foo()
 	ret void
 }
 
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
index 7853d7ba06fb..292186020f4f 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output 2>&1 | not grep tailduplicate
 ; XFAIL: *
 
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index ba4cbc5d7980..15cf626c72bf 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -28,6 +28,11 @@ if 'TMP' in os.environ:
 if 'TEMP' in os.environ:
     config.environment['TEMP'] = os.environ['TEMP']
 
+# Propagate path to symbolizer for ASan/MSan.
+for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
+    if symbolizer in os.environ:
+        config.environment[symbolizer] = os.environ[symbolizer]
+
 ###
 
 # Check that the object root is known.
diff --git a/test/Verifier/module-flags-1.ll b/test/Verifier/module-flags-1.ll
new file mode 100644
index 000000000000..e5feaf3a580d
--- /dev/null
+++ b/test/Verifier/module-flags-1.ll
@@ -0,0 +1,60 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check that module flags are structurally correct.
+;
+; CHECK: incorrect number of operands in module flag
+; CHECK: metadata !0
+!0 = metadata !{ i32 1 }
+; CHECK: invalid behavior operand in module flag (expected constant integer)
+; CHECK: metadata !"foo"
+!1 = metadata !{ metadata !"foo", metadata !"foo", i32 42 }
+; CHECK: invalid behavior operand in module flag (unexpected constant)
+; CHECK: i32 999
+!2 = metadata !{ i32 999, metadata !"foo", i32 43 }
+; CHECK: invalid ID operand in module flag (expected metadata string)
+; CHECK: i32 1
+!3 = metadata !{ i32 1, i32 1, i32 44 }
+; CHECK: invalid value for 'require' module flag (expected metadata pair)
+; CHECK: i32 45
+!4 = metadata !{ i32 3, metadata !"bla", i32 45 }
+; CHECK: invalid value for 'require' module flag (expected metadata pair)
+; CHECK: metadata !
+!5 = metadata !{ i32 3, metadata !"bla", metadata !{ i32 46 } }
+; CHECK: invalid value for 'require' module flag (first value operand should be a string)
+; CHECK: i32 47
+!6 = metadata !{ i32 3, metadata !"bla", metadata !{ i32 47, i32 48 } }
+
+; Check that module flags only have unique IDs.
+;
+; CHECK: module flag identifiers must be unique (or of 'require' type)
+!7 = metadata !{ i32 1, metadata !"foo", i32 49 }
+!8 = metadata !{ i32 2, metadata !"foo", i32 50 }
+; CHECK-NOT: module flag identifiers must be unique
+!9 = metadata !{ i32 2, metadata !"bar", i32 51 }
+!10 = metadata !{ i32 3, metadata !"bar", metadata !{ metadata !"bar", i32 51 } }
+
+; Check that any 'append'-type module flags are valid.
+; CHECK: invalid value for 'append'-type module flag (expected a metadata node)
+!16 = metadata !{ i32 5, metadata !"flag-2", i32 56 }
+; CHECK: invalid value for 'append'-type module flag (expected a metadata node)
+!17 = metadata !{ i32 5, metadata !"flag-3", i32 57 }
+; CHECK-NOT: invalid value for 'append'-type module flag (expected a metadata node)
+!18 = metadata !{ i32 5, metadata !"flag-4", metadata !{ i32 57 } }
+
+; Check that any 'require' module flags are valid.
+; CHECK: invalid requirement on flag, flag is not present in module
+!11 = metadata !{ i32 3, metadata !"bar",
+     metadata !{ metadata !"no-such-flag", i32 52 } }
+; CHECK: invalid requirement on flag, flag does not have the required value
+!12 = metadata !{ i32 1, metadata !"flag-0", i32 53 }
+!13 = metadata !{ i32 3, metadata !"bar",
+     metadata !{ metadata !"flag-0", i32 54 } }
+; CHECK-NOT: invalid requirement on flag, flag is not present in module
+; CHECK-NOT: invalid requirement on flag, flag does not have the required value
+!14 = metadata !{ i32 1, metadata !"flag-1", i32 55 }
+!15 = metadata !{ i32 3, metadata !"bar",
+     metadata !{ metadata !"flag-1", i32 55 } }
+
+!llvm.module.flags = !{
+  !0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15,
+  !16, !17, !18 }
diff --git a/test/lit.cfg b/test/lit.cfg
index 79eaa23c8ba9..0ecd8feb26a2 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -90,6 +90,11 @@ config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
 config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable',
                                                   '')
 
+# Propagate path to symbolizer for ASan/MSan.
+for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
+    if symbolizer in os.environ:
+        config.environment[symbolizer] = os.environ[symbolizer]
+
 ###
 
 import os
@@ -140,12 +145,16 @@ if config.test_exec_root is None:
 
 ###
 
-# Provide a target triple for mcjit tests
-mcjit_triple = config.target_triple
-# Force ELF format on Windows
-if re.search(r'cygwin|mingw32|win32', mcjit_triple):
-  mcjit_triple += "-elf"
-config.substitutions.append( ('%mcjit_triple', mcjit_triple) )
+# Provide a command line for mcjit tests
+lli_mcjit = 'lli -use-mcjit'
+# The target triple used by default by lli is the process target triple (some
+# triple appropriate for generating code for the current process) but because
+# we don't support COFF in MCJIT well enough for the tests, force ELF format on
+# Windows.  FIXME: the process target triple should be used here, but this is
+# difficult to obtain on Windows.
+if re.search(r'cygwin|mingw32|win32', config.host_triple):
+  lli_mcjit += ' -mtriple='+config.host_triple+'-elf'
+config.substitutions.append( ('%lli_mcjit', lli_mcjit) )
 
 # Provide a substition for those tests that need to run the jit to obtain data
 # but simply want use the currently considered most reliable jit for platform
@@ -197,7 +206,7 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
                 r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
                 r"\bllvm-cov\b",        r"\bllvm-diff\b",
                 r"\bllvm-dis\b",        r"\bllvm-dwarfdump\b",
-                r"\bllvm-extract\b",
+                r"\bllvm-extract\b",    r"\bllvm-jistlistener\b",
                 r"\bllvm-link\b",       r"\bllvm-mc\b",
                 r"\bllvm-nm\b",         r"\bllvm-objdump\b",
                 r"\bllvm-prof\b",       r"\bllvm-ranlib\b",
@@ -248,6 +257,13 @@ if loadable_module:
 if config.lto_is_enabled == "1" and platform.system() == "Darwin":
     config.available_features.add('lto_on_osx')
 
+# Sanitizers.
+if config.llvm_use_sanitizer == "Address":
+    config.available_features.add("asan")
+if (config.llvm_use_sanitizer == "Memory" or
+        config.llvm_use_sanitizer == "MemoryWithOrigins"):
+    config.available_features.add("msan")
+
 # llc knows whether he is compiled with -DNDEBUG.
 import subprocess
 try:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 2bbe63e6348e..8024b24fcde7 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -1,5 +1,6 @@
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
+config.host_triple = "@LLVM_HOSTTRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
@@ -16,6 +17,8 @@ config.targets_to_build = "@TARGETS_TO_BUILD@"
 config.llvm_bindings = "@LLVM_BINDINGS@"
 config.host_os = "@HOST_OS@"
 config.host_arch = "@HOST_ARCH@"
+config.llvm_use_intel_jitevents = "@LLVM_USE_INTEL_JITEVENTS@"
+config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/llvm-lit/chain.c b/test/tools/llvm-lit/chain.c
new file mode 100644
index 000000000000..6f6541d2e46a
--- /dev/null
+++ b/test/tools/llvm-lit/chain.c
@@ -0,0 +1,9 @@
+// This test should fail. lit used to interpret this as:
+//   (false && false) || true
+// instead of the intended
+//   false && (false || true
+//
+// RUN: false
+// RUN: false || true
+//
+// XFAIL: *
diff --git a/test/tools/llvm-lit/lit.local.cfg b/test/tools/llvm-lit/lit.local.cfg
new file mode 100644
index 000000000000..856a54932f0b
--- /dev/null
+++ b/test/tools/llvm-lit/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.c']
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.s b/test/tools/llvm-objdump/disassembly-show-raw.s
new file mode 100644
index 000000000000..32fcad4a369d
--- /dev/null
+++ b/test/tools/llvm-objdump/disassembly-show-raw.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d - \
+// RUN:                                    | FileCheck %s -check-prefix=WITHRAW
+// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d -no-show-raw-insn - \
+// RUN:                                    | FileCheck %s -check-prefix=NORAW
+
+// Expect to find the raw incoding when run with raw output (default), but not
+// when run explicitly with -no-show-raw-insn
+
+movl 0, %eax
+// WITHRAW: a1 00 00 00 00 movl
+
+// NORAW: movl
+// NORAW-NOT: a1 00
+
+
diff --git a/test/tools/llvm-objdump/lit.local.cfg b/test/tools/llvm-objdump/lit.local.cfg
new file mode 100644
index 000000000000..56bf00859572
--- /dev/null
+++ b/test/tools/llvm-objdump/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/tools/llvm-objdump/win64-unwind-data.s b/test/tools/llvm-objdump/win64-unwind-data.s
new file mode 100644
index 000000000000..1e4c7428ce32
--- /dev/null
+++ b/test/tools/llvm-objdump/win64-unwind-data.s
@@ -0,0 +1,106 @@
+// This test checks that the unwind data is dumped by llvm-objdump.
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-objdump -u - | FileCheck %s
+
+// CHECK:      Unwind info:
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text
+// CHECK-NEXT: End Address: .text + 0x001b
+// CHECK-NEXT: Unwind Info Address: .xdata
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 1 UNW_ExceptionHandler
+// CHECK-NEXT: Size of prolog: 18
+// CHECK-NEXT: Number of Codes: 8
+// CHECK-NEXT: Frame register: RBX
+// CHECK-NEXT: Frame offset: 0
+// CHECK-NEXT: Unwind Codes:
+// CHECK-NEXT: 0x00: UOP_SetFPReg
+// CHECK-NEXT: 0x0f: UOP_PushNonVol RBX
+// CHECK-NEXT: 0x0e: UOP_SaveXMM128 XMM8 [0x0000]
+// CHECK-NEXT: 0x09: UOP_SaveNonVol RSI [0x0010]
+// CHECK-NEXT: 0x04: UOP_AllocSmall 24
+// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text + 0x0012
+// CHECK-NEXT: End Address: .text + 0x0012
+// CHECK-NEXT: Unwind Info Address: .xdata + 0x001c
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 4 UNW_ChainInfo
+// CHECK-NEXT: Size of prolog: 0
+// CHECK-NEXT: Number of Codes: 0
+// CHECK-NEXT: No frame pointer used
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text + 0x001b
+// CHECK-NEXT: End Address: .text + 0x001c
+// CHECK-NEXT: Unwind Info Address: .xdata + 0x002c
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 0
+// CHECK-NEXT: Size of prolog: 0
+// CHECK-NEXT: Number of Codes: 0
+// CHECK-NEXT: No frame pointer used
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text + 0x001c
+// CHECK-NEXT: End Address: .text + 0x0039
+// CHECK-NEXT: Unwind Info Address: .xdata + 0x0034
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 0
+// CHECK-NEXT: Size of prolog: 14
+// CHECK-NEXT: Number of Codes: 6
+// CHECK-NEXT: No frame pointer used
+// CHECK-NEXT: Unwind Codes:
+// CHECK-NEXT: 0x0e: UOP_AllocLarge 8454128
+// CHECK-NEXT: 0x07: UOP_AllocLarge 8190
+// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
+
+    .text
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+func:
+    .seh_pushframe @code
+    subq $24, %rsp
+    .seh_stackalloc 24
+    movq %rsi, 16(%rsp)
+    .seh_savereg %rsi, 16
+    movups %xmm8, (%rsp)
+    .seh_savexmm %xmm8, 0
+    pushq %rbx
+    .seh_pushreg 3
+    mov %rsp, %rbx
+    .seh_setframe 3, 0
+    .seh_endprologue
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0
+    .text
+    .seh_startchained
+    .seh_endprologue
+    .seh_endchained
+    lea (%rbx), %rsp
+    pop %rbx
+    addq $24, %rsp
+    ret
+    .seh_endproc
+
+// Test emission of small functions.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
+
+// Function with big stack allocation.
+    .globl smallFunc
+    .def allocFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+allocFunc:
+    .seh_pushframe @code
+    subq $65520, %rsp
+    .seh_stackalloc 65520
+    sub $8454128, %rsp
+    .seh_stackalloc 8454128
+    .seh_endprologue
+    add $8454128, %rsp
+    addq $65520, %rsp
+    ret
+    .seh_endproc
diff --git a/test/tools/llvm-readobj/Inputs/trivial.ll b/test/tools/llvm-readobj/Inputs/trivial.ll
new file mode 100644
index 000000000000..2cd7ec89e246
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.ll
@@ -0,0 +1,19 @@
+; llc -mtriple=i386-pc-win32 trivial.ll -filetype=obj -o trivial-object-test.coff-i386
+; llc -mtriple=x86_64-pc-win32 trivial.ll -filetype=obj -o trivial-object-test.coff-x86-64
+; llc -mtriple=i386-linux-gnu trivial.ll -filetype=obj -o trivial-object-test.elf-i386 -relocation-model=pic
+; llc -mtriple=x86_64-linux-gnu trivial.ll -filetype=obj -o trivial-object-test.elf-x86-64 -relocation-model=pic
+; llc -mtriple=i386-apple-darwin10 trivial.ll -filetype=obj -o trivial-object-test.macho-i386 -relocation-model=pic
+; llc -mtriple=x86_64-apple-darwin10 trivial.ll -filetype=obj -o trivial-object-test.macho-x86-64 -relocation-model=pic
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello World\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @SomeOtherFunction(...)
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386
new file mode 100644
index 000000000000..282e5699a767
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64 b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64
new file mode 100644
index 000000000000..8a7060e61076
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.coff-x86-64
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386 b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386
new file mode 100644
index 000000000000..f85e40d6261f
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-i386
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64 b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64
new file mode 100644
index 000000000000..95285c1f230c
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-x86-64
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386 b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386
new file mode 100644
index 000000000000..5048171ccb0c
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-i386
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64 b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64
new file mode 100644
index 000000000000..bcdfc8aa6967
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-x86-64
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
new file mode 100644
index 000000000000..226eb9342334
--- /dev/null
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -0,0 +1,100 @@
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF32
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.coff-x86-64 \
+RUN:   | FileCheck %s -check-prefix COFF64
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF32
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-x86-64 \
+RUN:   | FileCheck %s -check-prefix ELF64
+
+COFF32:      File: {{(.*[/\\])?}}trivial.obj.coff-i386
+COFF32-NEXT: Format: COFF-i386
+COFF32-NEXT: Arch: i386
+COFF32-NEXT: AddressSize: 32bit
+COFF32-NEXT: ImageFileHeader {
+COFF32-NEXT:   Machine: IMAGE_FILE_MACHINE_I386 (0x14C)
+COFF32-NEXT:   SectionCount: 2
+COFF32-NEXT:   TimeDateStamp: 2013-03-20 17:56:46 (0x5149F85E)
+COFF32-NEXT:   PointerToSymbolTable: 0xA5
+COFF32-NEXT:   SymbolCount: 7
+COFF32-NEXT:   OptionalHeaderSize: 0
+COFF32-NEXT:   Characteristics [ (0x0)
+COFF32-NEXT:   ]
+COFF32-NEXT: }
+
+COFF64:      File: {{(.*[/\\])?}}trivial.obj.coff-x86-64
+COFF64-NEXT: Format: COFF-x86-64
+COFF64-NEXT: Arch: x86_64
+COFF64-NEXT: AddressSize: 64bit
+COFF64-NEXT: ImageFileHeader {
+COFF64-NEXT:   Machine: IMAGE_FILE_MACHINE_AMD64 (0x8664)
+COFF64-NEXT:   SectionCount: 2
+COFF64-NEXT:   TimeDateStamp: 2013-03-20 17:56:46 (0x5149F85E)
+COFF64-NEXT:   PointerToSymbolTable: 0xAB
+COFF64-NEXT:   SymbolCount: 7
+COFF64-NEXT:   OptionalHeaderSize: 0
+COFF64-NEXT:   Characteristics [ (0x0)
+COFF64-NEXT:   ]
+COFF64-NEXT: }
+
+ELF32:      File: {{(.*[/\\])?}}trivial.obj.elf-i386
+ELF32-NEXT: Format: ELF32-i386
+ELF32-NEXT: Arch: i386
+ELF32-NEXT: AddressSize: 32bit
+ELF32-NEXT: LoadName:
+ELF32-NEXT: ElfHeader {
+ELF32-NEXT:   Ident {
+ELF32-NEXT:     Magic: (7F 45 4C 46)
+ELF32-NEXT:     Class: 32-bit (0x1)
+ELF32-NEXT:     DataEncoding: LittleEndian (0x1)
+ELF32-NEXT:     FileVersion: 1
+ELF32-NEXT:     OS/ABI: GNU/Linux (0x3)
+ELF32-NEXT:     ABIVersion: 0
+ELF32-NEXT:     Unused: (00 00 00 00 00 00 00)
+ELF32-NEXT:   }
+ELF32-NEXT:   Type: Relocatable (0x1)
+ELF32-NEXT:   Machine: EM_386 (0x3)
+ELF32-NEXT:   Version: 1
+ELF32-NEXT:   Entry: 0x0
+ELF32-NEXT:   ProgramHeaderOffset: 0x0
+ELF32-NEXT:   SectionHeaderOffset: 0xC8
+ELF32-NEXT:   Flags [ (0x0)
+ELF32-NEXT:   ]
+ELF32-NEXT:   HeaderSize: 52
+ELF32-NEXT:   ProgramHeaderEntrySize: 0
+ELF32-NEXT:   ProgramHeaderCount: 0
+ELF32-NEXT:   SectionHeaderEntrySize: 40
+ELF32-NEXT:   SectionHeaderCount: 10
+ELF32-NEXT:   StringTableSectionIndex: 7
+ELF32-NEXT: }
+
+ELF64:      File: {{(.*[/\\])?}}trivial.obj.elf-x86-64
+ELF64-NEXT: Format: ELF64-x86-64
+ELF64-NEXT: Arch: x86_64
+ELF64-NEXT: AddressSize: 64bit
+ELF64-NEXT: LoadName:
+ELF64-NEXT: ElfHeader {
+ELF64-NEXT:   Ident {
+ELF64-NEXT:     Magic: (7F 45 4C 46)
+ELF64-NEXT:     Class: 64-bit (0x2)
+ELF64-NEXT:     DataEncoding: LittleEndian (0x1)
+ELF64-NEXT:     FileVersion: 1
+ELF64-NEXT:     OS/ABI: GNU/Linux (0x3)
+ELF64-NEXT:     ABIVersion: 0
+ELF64-NEXT:     Unused: (00 00 00 00 00 00 00)
+ELF64-NEXT:   }
+ELF64-NEXT:   Type: Relocatable (0x1)
+ELF64-NEXT:   Machine: EM_X86_64 (0x3E)
+ELF64-NEXT:   Version: 1
+ELF64-NEXT:   Entry: 0x0
+ELF64-NEXT:   ProgramHeaderOffset: 0x0
+ELF64-NEXT:   SectionHeaderOffset: 0xB8
+ELF64-NEXT:   Flags [ (0x0)
+ELF64-NEXT:   ]
+ELF64-NEXT:   HeaderSize: 64
+ELF64-NEXT:   ProgramHeaderEntrySize: 0
+ELF64-NEXT:   ProgramHeaderCount: 0
+ELF64-NEXT:   SectionHeaderEntrySize: 64
+ELF64-NEXT:   SectionHeaderCount: 10
+ELF64-NEXT:   StringTableSectionIndex: 7
+ELF64-NEXT: }
diff --git a/test/tools/llvm-readobj/lit.local.cfg b/test/tools/llvm-readobj/lit.local.cfg
new file mode 100644
index 000000000000..df9b335dd131
--- /dev/null
+++ b/test/tools/llvm-readobj/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.test']
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
new file mode 100644
index 000000000000..060856537356
--- /dev/null
+++ b/test/tools/llvm-readobj/relocations.test
@@ -0,0 +1,32 @@
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO
+
+COFF:      Relocations [
+COFF-NEXT:   Section (1) .text {
+COFF-NEXT:     0x4 IMAGE_REL_I386_DIR32 .data
+COFF-NEXT:     0x9 IMAGE_REL_I386_REL32 _puts
+COFF-NEXT:     0xE IMAGE_REL_I386_REL32 _SomeOtherFunction
+COFF-NEXT:   }
+COFF-NEXT: ]
+
+ELF:      Relocations [
+ELF-NEXT:   Section (1) .text {
+ELF-NEXT:     0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
+ELF-NEXT:     0x12 R_386_GOTOFF .L.str 0x0
+ELF-NEXT:     0x1A R_386_PLT32 puts 0x0
+ELF-NEXT:     0x1F R_386_PLT32 SomeOtherFunction 0x0
+ELF-NEXT:   }
+ELF-NEXT: ]
+
+MACHO:      Relocations [
+MACHO-NEXT:   Section __text {
+MACHO-NEXT:     0x18 GENERIC_RELOC_VANILLA _SomeOtherFunction 0x0
+MACHO-NEXT:     0x13 GENERIC_RELOC_VANILLA _puts 0x0
+MACHO-NEXT:     0xB GENERIC_RELOC_LOCAL_SECTDIFF _main 0x{{[0-9A-F]+}}
+MACHO-NEXT:     0x0 GENERIC_RELOC_PAIR _main 0x{{[0-9A-F]+}}
+MACHO-NEXT:   }
+MACHO-NEXT: ]
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
new file mode 100644
index 000000000000..3254040c0553
--- /dev/null
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -0,0 +1,175 @@
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO
+
+COFF:      Sections [
+COFF-NEXT:   Section {
+COFF-NEXT:     Number: 1
+COFF-NEXT:     Name: .text (2E 74 65 78 74 00 00 00)
+COFF-NEXT:     VirtualSize: 0x0
+COFF-NEXT:     VirtualAddress: 0x0
+COFF-NEXT:     RawDataSize: 22
+COFF-NEXT:     PointerToRawData: 0x64
+COFF-NEXT:     PointerToRelocations: 0x7A
+COFF-NEXT:     PointerToLineNumbers: 0x0
+COFF-NEXT:     RelocationCount: 3
+COFF-NEXT:     LineNumberCount: 0
+COFF-NEXT:     Characteristics [ (0x60500020)
+COFF-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+COFF-NEXT:       IMAGE_SCN_CNT_CODE (0x20)
+COFF-NEXT:       IMAGE_SCN_MEM_EXECUTE (0x20000000)
+COFF-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+COFF-NEXT:     ]
+COFF-NEXT:     Relocations [
+COFF-NEXT:       0x4 IMAGE_REL_I386_DIR32 .data
+COFF-NEXT:       0x9 IMAGE_REL_I386_REL32 _puts
+COFF-NEXT:       0xE IMAGE_REL_I386_REL32 _SomeOtherFunction
+COFF-NEXT:     ]
+COFF-NEXT:     Symbols [
+COFF-NEXT:       Symbol {
+COFF-NEXT:         Name: .text
+COFF-NEXT:         Value: 0
+COFF-NEXT:         Section: .text (1)
+COFF-NEXT:         BaseType: Null (0x0)
+COFF-NEXT:         ComplexType: Null (0x0)
+COFF-NEXT:         StorageClass: Static (0x3)
+COFF-NEXT:         AuxSymbolCount: 1
+COFF-NEXT:         AuxSectionDef {
+COFF-NEXT:           Length: 22
+COFF-NEXT:           RelocationCount: 3
+COFF-NEXT:           LineNumberCount: 0
+COFF-NEXT:           Checksum: 0x0
+COFF-NEXT:           Number: 1
+COFF-NEXT:           Selection: 0x0
+COFF-NEXT:           Unused: (00 00 00)
+COFF-NEXT:         }
+COFF-NEXT:       }
+COFF-NEXT:       Symbol {
+COFF-NEXT:         Name: _main
+COFF-NEXT:         Value: 0
+COFF-NEXT:         Section: .text (1)
+COFF-NEXT:         BaseType: Null (0x0)
+COFF-NEXT:         ComplexType: Function (0x2)
+COFF-NEXT:         StorageClass: External (0x2)
+COFF-NEXT:         AuxSymbolCount: 0
+COFF-NEXT:       }
+COFF-NEXT:     ]
+COFF-NEXT:     SectionData (
+COFF-NEXT:       0000: 50C70424 00000000 E8000000 00E80000  |P..$............|
+COFF-NEXT:       0010: 000031C0 5AC3                        |..1.Z.|
+COFF-NEXT:     )
+COFF-NEXT:   }
+
+ELF:      Sections [
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 0
+ELF-NEXT:     Name:  (0)
+ELF-NEXT:     Type: SHT_NULL (0x0)
+ELF-NEXT:     Flags [ (0x0)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 0
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:     Relocations [
+ELF-NEXT:     ]
+ELF-NEXT:     Symbols [
+ELF-NEXT:     ]
+ELF-NEXT:     SectionData (
+ELF-NEXT:     )
+ELF-NEXT:   }
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 1
+ELF-NEXT:     Name: .text (5)
+ELF-NEXT:     Type: SHT_PROGBITS (0x1)
+ELF-NEXT:     Flags [ (0x6)
+ELF-NEXT:       SHF_ALLOC (0x2)
+ELF-NEXT:       SHF_EXECINSTR (0x4)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x40
+ELF-NEXT:     Size: 42
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 16
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:     Relocations [
+ELF-NEXT:       0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
+ELF-NEXT:       0x12 R_386_GOTOFF .L.str 0x0
+ELF-NEXT:       0x1A R_386_PLT32 puts 0x0
+ELF-NEXT:       0x1F R_386_PLT32 SomeOtherFunction 0x0
+ELF-NEXT:     ]
+ELF-NEXT:     Symbols [
+ELF-NEXT:       Symbol {
+ELF-NEXT:         Name: .text (0)
+ELF-NEXT:         Value: 0x0
+ELF-NEXT:         Size: 0
+ELF-NEXT:         Binding: Local (0x0)
+ELF-NEXT:         Type: Section (0x3)
+ELF-NEXT:         Other: 0
+ELF-NEXT:         Section: .text (0x1)
+ELF-NEXT:       }
+ELF-NEXT:       Symbol {
+ELF-NEXT:         Name: main (12)
+ELF-NEXT:         Value: 0x0
+ELF-NEXT:         Size: 42
+ELF-NEXT:         Binding: Global (0x1)
+ELF-NEXT:         Type: Function (0x2)
+ELF-NEXT:         Other: 0
+ELF-NEXT:         Section: .text (0x1)
+ELF-NEXT:       }
+ELF-NEXT:     ]
+ELF-NEXT:     SectionData (
+ELF-NEXT:       0000: 5383EC08 E8000000 005B81C3 03000000  |S........[......|
+ELF-NEXT:       0010: 8D830000 00008904 24E8FCFF FFFFE8FC  |........$.......|
+ELF-NEXT:       0020: FFFFFF31 C083C408 5BC3               |...1....[.|
+ELF-NEXT:     )
+ELF-NEXT:   }
+
+MACHO:      Sections [
+MACHO-NEXT:   Section {
+MACHO-NEXT:     Index: 0
+MACHO-NEXT:     Name: __text (5F 5F 74 65 78 74 00
+MACHO-NEXT:     Segment:
+MACHO-NEXT:     Address: 0x0
+MACHO-NEXT:     Size: 0x22
+MACHO-NEXT:     Offset: 324
+MACHO-NEXT:     Alignment: 4
+MACHO-NEXT:     RelocationOffset: 0x174
+MACHO-NEXT:     RelocationCount: 4
+MACHO-NEXT:     Type: 0x0
+MACHO-NEXT:     Attributes [ (0x800004)
+MACHO-NEXT:       PureInstructions (0x800000)
+MACHO-NEXT:       SomeInstructions (0x4)
+MACHO-NEXT:     ]
+MACHO-NEXT:     Reserved1: 0x0
+MACHO-NEXT:     Reserved2: 0x0
+MACHO-NEXT:     Relocations [
+MACHO-NEXT:       0x18 GENERIC_RELOC_VANILLA _SomeOtherFunction 0x0
+MACHO-NEXT:       0x13 GENERIC_RELOC_VANILLA _puts 0x0
+MACHO-NEXT:       0xB GENERIC_RELOC_LOCAL_SECTDIFF _main 0x{{[0-9A-F]+}}
+MACHO-NEXT:       0x0 GENERIC_RELOC_PAIR _main 0x{{[0-9A-F]+}}
+MACHO-NEXT:     ]
+MACHO-NEXT:     Symbols [
+MACHO-NEXT:       Symbol {
+MACHO-NEXT:         Name: _main (1)
+MACHO-NEXT:         Type: 0xF
+MACHO-NEXT:         Section: __text (0x1)
+MACHO-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-NEXT:         Flags [ (0x0)
+MACHO-NEXT:         ]
+MACHO-NEXT:         Value: 0x0
+MACHO-NEXT:       }
+MACHO-NEXT:     ]
+MACHO-NEXT:     SectionData (
+MACHO-NEXT:       0000: 83EC0CE8 00000000 588D801A 00000089  |........X.......|
+MACHO-NEXT:       0010: 0424E8E9 FFFFFFE8 E4FFFFFF 31C083C4  |.$..........1...|
+MACHO-NEXT:       0020: 0CC3                                 |..|
+MACHO-NEXT:     )
+MACHO-NEXT:   }
diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test
new file mode 100644
index 000000000000..e5c6c063b233
--- /dev/null
+++ b/test/tools/llvm-readobj/sections.test
@@ -0,0 +1,113 @@
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO
+
+COFF:      Sections [
+COFF-NEXT:   Section {
+COFF-NEXT:     Number: 1
+COFF-NEXT:     Name: .text (2E 74 65 78 74 00 00 00)
+COFF-NEXT:     VirtualSize: 0x0
+COFF-NEXT:     VirtualAddress: 0x0
+COFF-NEXT:     RawDataSize: 22
+COFF-NEXT:     PointerToRawData: 0x64
+COFF-NEXT:     PointerToRelocations: 0x7A
+COFF-NEXT:     PointerToLineNumbers: 0x0
+COFF-NEXT:     RelocationCount: 3
+COFF-NEXT:     LineNumberCount: 0
+COFF-NEXT:     Characteristics [ (0x60500020)
+COFF-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+COFF-NEXT:       IMAGE_SCN_CNT_CODE (0x20)
+COFF-NEXT:       IMAGE_SCN_MEM_EXECUTE (0x20000000)
+COFF-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+COFF-NEXT:     ]
+COFF-NEXT:   }
+COFF-NEXT:   Section {
+COFF-NEXT:     Number: 2
+COFF-NEXT:     Name: .data (2E 64 61 74 61 00 00 00)
+COFF-NEXT:     VirtualSize: 0x0
+COFF-NEXT:     VirtualAddress: 0x0
+COFF-NEXT:     RawDataSize: 13
+COFF-NEXT:     PointerToRawData: 0x98
+COFF-NEXT:     PointerToRelocations: 0x0
+COFF-NEXT:     PointerToLineNumbers: 0x0
+COFF-NEXT:     RelocationCount: 0
+COFF-NEXT:     LineNumberCount: 0
+COFF-NEXT:     Characteristics [ (0xC0300040)
+COFF-NEXT:       IMAGE_SCN_ALIGN_4BYTES (0x300000)
+COFF-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA (0x40)
+COFF-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+COFF-NEXT:       IMAGE_SCN_MEM_WRITE (0x80000000)
+COFF-NEXT:     ]
+COFF-NEXT:   }
+COFF-NEXT: ]
+
+ELF:      Sections [
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 0
+ELF-NEXT:     Name:  (0)
+ELF-NEXT:     Type: SHT_NULL (0x0)
+ELF-NEXT:     Flags [ (0x0)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 0
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:   }
+ELF-NEXT:   Section {
+ELF-NEXT:     Index: 1
+ELF-NEXT:     Name: .text (5)
+ELF-NEXT:     Type: SHT_PROGBITS (0x1)
+ELF-NEXT:     Flags [ (0x6)
+ELF-NEXT:       SHF_ALLOC (0x2)
+ELF-NEXT:       SHF_EXECINSTR (0x4)
+ELF-NEXT:     ]
+ELF-NEXT:     Address: 0x0
+ELF-NEXT:     Offset: 0x40
+ELF-NEXT:     Size: 42
+ELF-NEXT:     Link: 0
+ELF-NEXT:     Info: 0
+ELF-NEXT:     AddressAlignment: 16
+ELF-NEXT:     EntrySize: 0
+ELF-NEXT:   }
+
+MACHO:      Sections [
+MACHO-NEXT:   Section {
+MACHO-NEXT:     Index: 0
+MACHO-NEXT:     Name: __text (
+MACHO-NEXT:     Segment: 
+MACHO-NEXT:     Address: 0x0
+MACHO-NEXT:     Size: 0x22
+MACHO-NEXT:     Offset: 324
+MACHO-NEXT:     Alignment: 4
+MACHO-NEXT:     RelocationOffset: 0x174
+MACHO-NEXT:     RelocationCount: 4
+MACHO-NEXT:     Type: 0x0
+MACHO-NEXT:     Attributes [ (0x800004)
+MACHO-NEXT:       PureInstructions (0x800000)
+MACHO-NEXT:       SomeInstructions (0x4)
+MACHO-NEXT:     ]
+MACHO-NEXT:     Reserved1: 0x0
+MACHO-NEXT:     Reserved2: 0x0
+MACHO-NEXT:   }
+MACHO-NEXT:   Section {
+MACHO-NEXT:     Index: 1
+MACHO-NEXT:     Name: __cstring (
+MACHO-NEXT:     Segment:
+MACHO-NEXT:     Address: 0x22
+MACHO-NEXT:     Size: 0xD
+MACHO-NEXT:     Offset: 358
+MACHO-NEXT:     Alignment: 0
+MACHO-NEXT:     RelocationOffset: 0x0
+MACHO-NEXT:     RelocationCount: 0
+MACHO-NEXT:     Type: ExtReloc (0x2)
+MACHO-NEXT:     Attributes [ (0x0)
+MACHO-NEXT:     ]
+MACHO-NEXT:     Reserved1: 0x0
+MACHO-NEXT:     Reserved2: 0x0
+MACHO-NEXT:   }
diff --git a/test/tools/llvm-readobj/symbols.test b/test/tools/llvm-readobj/symbols.test
new file mode 100644
index 000000000000..d33bd8ed2cd0
--- /dev/null
+++ b/test/tools/llvm-readobj/symbols.test
@@ -0,0 +1,44 @@
+RUN: llvm-readobj -t %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -t %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s -check-prefix ELF
+
+COFF:      Symbols [
+COFF-NEXT:   Symbol {
+COFF-NEXT:     Name: .text
+COFF-NEXT:     Value: 0
+COFF-NEXT:     Section: .text (1)
+COFF-NEXT:     BaseType: Null (0x0)
+COFF-NEXT:     ComplexType: Null (0x0)
+COFF-NEXT:     StorageClass: Static (0x3)
+COFF-NEXT:     AuxSymbolCount: 1
+COFF-NEXT:     AuxSectionDef {
+COFF-NEXT:       Length: 22
+COFF-NEXT:       RelocationCount: 3
+COFF-NEXT:       LineNumberCount: 0
+COFF-NEXT:       Checksum: 0x0
+COFF-NEXT:       Number: 1
+COFF-NEXT:       Selection: 0x0
+COFF-NEXT:       Unused: (00 00 00)
+COFF-NEXT:     }
+COFF-NEXT:   }
+
+ELF:      Symbols [
+ELF-NEXT:   Symbol {
+ELF-NEXT:     Name: trivial.ll (1)
+ELF-NEXT:     Value: 0x0
+ELF-NEXT:     Size: 0
+ELF-NEXT:     Binding: Local (0x0)
+ELF-NEXT:     Type: File (0x4)
+ELF-NEXT:     Other: 0
+ELF-NEXT:     Section:  (0xFFF1)
+ELF-NEXT:   }
+ELF-NEXT:   Symbol {
+ELF-NEXT:     Name: .L.str (39)
+ELF-NEXT:     Value: 0x0
+ELF-NEXT:     Size: 13
+ELF-NEXT:     Binding: Local (0x0)
+ELF-NEXT:     Type: Object (0x1)
+ELF-NEXT:     Other: 0
+ELF-NEXT:     Section: .rodata.str1.1 (0x5)
+ELF-NEXT:   }
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 691828562203..9b80ee5a23a2 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -31,6 +31,9 @@ add_subdirectory(llvm-objdump)
 add_subdirectory(llvm-readobj)
 add_subdirectory(llvm-rtdyld)
 add_subdirectory(llvm-dwarfdump)
+if( LLVM_USE_INTEL_JITEVENTS )
+  add_subdirectory(llvm-jitlistener)
+endif( LLVM_USE_INTEL_JITEVENTS )
 
 add_subdirectory(bugpoint)
 add_subdirectory(bugpoint-passes)
@@ -38,19 +41,25 @@ add_subdirectory(llvm-bcanalyzer)
 add_subdirectory(llvm-stress)
 add_subdirectory(llvm-mcmarkup)
 
+add_subdirectory(llvm-symbolizer)
+
+add_subdirectory(obj2yaml)
+
 if( NOT WIN32 )
   add_subdirectory(lto)
 endif()
 
 if( LLVM_ENABLE_PIC )
   # TODO: support other systems:
-  if( CMAKE_SYSTEM_NAME STREQUAL "Linux" )
+  if( (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+      OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") )
     add_subdirectory(gold)
   endif()
 endif()
 
 add_llvm_external_project(clang)
 add_llvm_external_project(lld)
+add_llvm_external_project(lldb)
 add_llvm_external_project(polly)
 
 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index 64164792a77f..25aa177b3504 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
 
 [component_0]
 type = Group
diff --git a/tools/Makefile b/tools/Makefile
index a29e49f0a1f8..b8f21d2ce192 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -34,7 +34,13 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  bugpoint llvm-bcanalyzer \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
-	         llvm-size llvm-stress llvm-mcmarkup
+	         llvm-size llvm-stress llvm-mcmarkup \
+	         llvm-symbolizer obj2yaml
+
+# If Intel JIT Events support is configured, build an extra tool to test it.
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  PARALLEL_DIRS += llvm-jitlistener
+endif
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
diff --git a/tools/bugpoint-passes/CMakeLists.txt b/tools/bugpoint-passes/CMakeLists.txt
index b2f1bb5d1350..05f190a2a586 100644
--- a/tools/bugpoint-passes/CMakeLists.txt
+++ b/tools/bugpoint-passes/CMakeLists.txt
@@ -1,3 +1,7 @@
+if( NOT LLVM_BUILD_TOOLS )
+  set(EXCLUDE_FROM_ALL ON)
+endif()
+
 add_llvm_loadable_module( BugpointPasses
   TestPasses.cpp
   )
diff --git a/tools/bugpoint-passes/TestPasses.cpp b/tools/bugpoint-passes/TestPasses.cpp
index 1535b0388561..118c98a45913 100644
--- a/tools/bugpoint-passes/TestPasses.cpp
+++ b/tools/bugpoint-passes/TestPasses.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Support/InstVisitor.h"
 
 using namespace llvm;
 
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 21636eabab1f..e49a96b1e0ca 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -15,15 +15,15 @@
 
 #include "BugDriver.h"
 #include "ToolRunner.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker.h"
-#include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Host.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index cc78489e3d90..2b621ecc1cf5 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -18,8 +18,8 @@
 
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt
index ee2235bf427e..0000d977acf3 100644
--- a/tools/bugpoint/CMakeLists.txt
+++ b/tools/bugpoint/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo
-  linker bitreader bitwriter vectorize)
+  linker bitreader bitwriter irreader vectorize objcarcopts)
 
 add_llvm_tool(bugpoint
   BugDriver.cpp
@@ -12,3 +12,4 @@ add_llvm_tool(bugpoint
   ToolRunner.cpp
   bugpoint.cpp
   )
+set_target_properties(bugpoint PROPERTIES ENABLE_EXPORTS 1)
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index aed16f47e012..ed211a600816 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -12,22 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
-#include "ToolRunner.h"
 #include "ListReducer.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/ValueSymbolTable.h"
+#include "ToolRunner.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/CommandLine.h"
 #include <set>
 using namespace llvm;
 
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 218a559d21d7..da360453ecce 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -230,7 +230,7 @@ bool BugDriver::initializeExecutionEnvironment() {
     }
     if (!SafeInterpreter) {
       SafeInterpreterSel = AutoPick;
-      Message = "Sorry, I can't automatically select an interpreter!\n";
+      Message = "Sorry, I can't automatically select a safe interpreter!\n";
     }
     break;
   case RunLLC:
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index b40b4f10db99..bb27767fa483 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
 #include <set>
 using namespace llvm;
 
diff --git a/tools/bugpoint/LLVMBuild.txt b/tools/bugpoint/LLVMBuild.txt
index 549d9d023395..01643553c5b5 100644
--- a/tools/bugpoint/LLVMBuild.txt
+++ b/tools/bugpoint/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = bugpoint
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Linker Scalar
+required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Linker Scalar ObjCARC
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index bd1c5da65c8f..8083e2d65fb1 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -15,11 +15,11 @@
 #ifndef BUGPOINT_LIST_REDUCER_H
 #define BUGPOINT_LIST_REDUCER_H
 
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <vector>
-#include <cstdlib>
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cstdlib>
+#include <vector>
 
 namespace llvm {
   
diff --git a/tools/bugpoint/Makefile b/tools/bugpoint/Makefile
index 34f4bddb0185..20493218b064 100644
--- a/tools/bugpoint/Makefile
+++ b/tools/bugpoint/Makefile
@@ -10,6 +10,6 @@
 LEVEL := ../..
 TOOLNAME := bugpoint
 LINK_COMPONENTS := asmparser instrumentation scalaropts ipo linker bitreader \
-                   bitwriter vectorize
+                   bitwriter irreader vectorize objcarcopts
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 82a3a862a2d8..c676a05cb6ce 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -15,17 +15,17 @@
 #include "BugDriver.h"
 #include "ListReducer.h"
 #include "ToolRunner.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Linker.h"
-#include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
+#include "llvm/Transforms/Utils/Cloning.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index c56911a32e85..87dc9f332cf7 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -16,18 +16,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/FileUtilities.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
 
 #define DONT_GET_PLUGIN_LOADER_OPTION
 #include "llvm/Support/PluginLoader.h"
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index d975d68d9697..735061d8bc07 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -13,12 +13,12 @@
 
 #define DEBUG_TYPE "toolrunner"
 #include "ToolRunner.h"
-#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include <fstream>
 #include <sstream>
 using namespace llvm;
@@ -531,12 +531,12 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0,
     return 0;
   }
 
-  Message = "Found llc: " + LLCPath + "\n";
   GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
   if (!gcc) {
     errs() << Message << "\n";
     exit(1);
   }
+  Message = "Found llc: " + LLCPath + "\n";
   return new LLC(LLCPath, gcc, Args, UseIntegratedAssembler);
 }
 
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index 7b93394fd8ca..bb83ce459e72 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -20,8 +20,8 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SystemUtils.h"
 #include <exception>
 #include <vector>
 
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 8f15b026a511..5e8fdd145eef 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -15,18 +15,18 @@
 
 #include "BugDriver.h"
 #include "ToolRunner.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/LinkAllIR.h"
 #include "llvm/LinkAllPasses.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/PassManager.h"
-#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Valgrind.h"
-#include "llvm/LinkAllVMCore.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 //Enable this macro to debug bugpoint itself.
@@ -120,6 +120,7 @@ int main(int argc, char **argv) {
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);
   initializeScalarOpts(Registry);
+  initializeObjCARCOpts(Registry);
   initializeVectorization(Registry);
   initializeIPO(Registry);
   initializeAnalysis(Registry);
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index b0a0dd2a4057..40f5fd608653 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -14,17 +14,14 @@
 
 #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H
 #include "plugin-api.h"
-
 #include "llvm-c/lto.h"
-
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Errno.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
-
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/system_error.h"
 #include <cerrno>
 #include <cstdlib>
 #include <cstring>
diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt
index 683f29862d5c..e5a5550e9ea6 100644
--- a/tools/llc/CMakeLists.txt
+++ b/tools/llc/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser irreader)
 
 add_llvm_tool(llc
   llc.cpp
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
index 8c8794f62069..45cdc6498f86 100644
--- a/tools/llc/LLVMBuild.txt
+++ b/tools/llc/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llc
 parent = Tools
-required_libraries = AsmParser BitReader all-targets
+required_libraries = AsmParser BitReader IRReader all-targets
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index b32d5575d53e..c24f378bc538 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llc
-LINK_COMPONENTS := all-targets bitreader asmparser
+LINK_COMPONENTS := all-targets bitreader asmparser irreader
 
 include $(LEVEL)/Makefile.common
 
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 4d4a74c009e0..1dce9d7b6011 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -13,29 +13,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Pass.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <memory>
@@ -51,6 +52,11 @@ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
 
+static cl::opt<unsigned>
+TimeCompilations("time-compilations", cl::Hidden, cl::init(1u),
+                 cl::value_desc("N"),
+                 cl::desc("Repeat compilation N times for timing"));
+
 // Determine optimization level.
 static cl::opt<char>
 OptLevel("O",
@@ -71,6 +77,8 @@ DisableSimplifyLibCalls("disable-simplify-libcalls",
                         cl::desc("Disable simplify-libcalls"),
                         cl::init(false));
 
+static int compileModule(char**, LLVMContext&);
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -181,6 +189,15 @@ int main(int argc, char **argv) {
 
   cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
 
+  // Compile the module TimeCompilations times to give better compile time
+  // metrics.
+  for (unsigned I = TimeCompilations; I; --I)
+    if (int RetVal = compileModule(argv, Context))
+      return RetVal;
+  return 0;
+}
+
+static int compileModule(char **argv, LLVMContext &Context) {
   // Load the module to be compiled...
   SMDiagnostic Err;
   std::auto_ptr<Module> M;
@@ -303,10 +320,8 @@ int main(int argc, char **argv) {
     TLI->disableAllFunctions();
   PM.add(TLI);
 
-  if (target.get()) {
-    PM.add(new TargetTransformInfo(target->getScalarTargetTransformInfo(),
-                                   target->getVectorTargetTransformInfo()));
-  }
+  // Add intenal analysis passes from the target machine.
+  Target.addAnalysisPasses(PM);
 
   // Add the target data from the target machine, if it exists, or the module.
   if (const DataLayout *TD = Target.getDataLayout())
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index ed479f5323dc..aaa6598e7118 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native)
+set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag native)
 
 if( LLVM_USE_OPROFILE )
   set(LLVM_LINK_COMPONENTS
@@ -11,7 +11,9 @@ endif( LLVM_USE_OPROFILE )
 if( LLVM_USE_INTEL_JITEVENTS )
   set(LLVM_LINK_COMPONENTS
     ${LLVM_LINK_COMPONENTS}
+    DebugInfo
     IntelJITEvents
+    Object
     )
 endif( LLVM_USE_INTEL_JITEVENTS )
 
diff --git a/tools/lli/LLVMBuild.txt b/tools/lli/LLVMBuild.txt
index 36ceb39b1270..5823792ff01c 100644
--- a/tools/lli/LLVMBuild.txt
+++ b/tools/lli/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = lli
 parent = Tools
-required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native
+required_libraries = AsmParser BitReader IRReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native
diff --git a/tools/lli/Makefile b/tools/lli/Makefile
index 31f3ab8a1e69..a6530584a2e9 100644
--- a/tools/lli/Makefile
+++ b/tools/lli/Makefile
@@ -12,12 +12,12 @@ TOOLNAME := lli
 
 include $(LEVEL)/Makefile.config
 
-LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag native
 
 # If Intel JIT Events support is confiured, link against the LLVM Intel JIT
 # Events interface library
 ifeq ($(USE_INTEL_JITEVENTS), 1)
-  LINK_COMPONENTS += inteljitevents
+  LINK_COMPONENTS += debuginfo inteljitevents object
 endif
 
 # If oprofile support is confiured, link against the LLVM oprofile interface
diff --git a/tools/lli/RecordingMemoryManager.cpp b/tools/lli/RecordingMemoryManager.cpp
index 9e1cff55277d..e4d992d3d432 100644
--- a/tools/lli/RecordingMemoryManager.cpp
+++ b/tools/lli/RecordingMemoryManager.cpp
@@ -15,29 +15,57 @@
 #include "RecordingMemoryManager.h"
 using namespace llvm;
 
+RecordingMemoryManager::~RecordingMemoryManager() {
+  for (SmallVectorImpl<Allocation>::iterator
+         I = AllocatedCodeMem.begin(), E = AllocatedCodeMem.end();
+       I != E; ++I)
+    sys::Memory::releaseMappedMemory(I->first);
+  for (SmallVectorImpl<Allocation>::iterator
+         I = AllocatedDataMem.begin(), E = AllocatedDataMem.end();
+       I != E; ++I)
+    sys::Memory::releaseMappedMemory(I->first);
+}
+
 uint8_t *RecordingMemoryManager::
 allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) {
   // The recording memory manager is just a local copy of the remote target.
   // The alignment requirement is just stored here for later use. Regular
-  // heap storage is sufficient here.
-  void *Addr = malloc(Size);
-  assert(Addr && "malloc() failure!");
-  sys::MemoryBlock Block(Addr, Size);
+  // heap storage is sufficient here, but we're using mapped memory to work
+  // around a bug in MCJIT.
+  sys::MemoryBlock Block = allocateSection(Size);
   AllocatedCodeMem.push_back(Allocation(Block, Alignment));
-  return (uint8_t*)Addr;
+  return (uint8_t*)Block.base();
 }
 
 uint8_t *RecordingMemoryManager::
-allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) {
+allocateDataSection(uintptr_t Size, unsigned Alignment,
+                    unsigned SectionID, bool IsReadOnly) {
   // The recording memory manager is just a local copy of the remote target.
   // The alignment requirement is just stored here for later use. Regular
-  // heap storage is sufficient here.
-  void *Addr = malloc(Size);
-  assert(Addr && "malloc() failure!");
-  sys::MemoryBlock Block(Addr, Size);
+  // heap storage is sufficient here, but we're using mapped memory to work
+  // around a bug in MCJIT.
+  sys::MemoryBlock Block = allocateSection(Size);
   AllocatedDataMem.push_back(Allocation(Block, Alignment));
-  return (uint8_t*)Addr;
+  return (uint8_t*)Block.base();
+}
+
+sys::MemoryBlock RecordingMemoryManager::allocateSection(uintptr_t Size) {
+  error_code ec;
+  sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(Size,
+                                                          &Near,
+                                                          sys::Memory::MF_READ |
+                                                          sys::Memory::MF_WRITE,
+                                                          ec);
+  assert(!ec && MB.base());
+
+  // FIXME: This is part of a work around to keep sections near one another
+  // when MCJIT performs relocations after code emission but before
+  // the generated code is moved to the remote target.
+  // Save this address as the basis for our next request
+  Near = MB;
+  return MB;
 }
+
 void RecordingMemoryManager::setMemoryWritable() { llvm_unreachable("Unexpected!"); }
 void RecordingMemoryManager::setMemoryExecutable() { llvm_unreachable("Unexpected!"); }
 void RecordingMemoryManager::setPoisonMemory(bool poison) { llvm_unreachable("Unexpected!"); }
@@ -81,7 +109,20 @@ void RecordingMemoryManager::endExceptionTable(const Function *F, uint8_t *Table
 void RecordingMemoryManager::deallocateExceptionTable(void *ET) {
   llvm_unreachable("Unexpected!");
 }
+
+static int jit_noop() {
+  return 0;
+}
+
 void *RecordingMemoryManager::getPointerToNamedFunction(const std::string &Name,
                                                         bool AbortOnFailure) {
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
   return NULL;
 }
diff --git a/tools/lli/RecordingMemoryManager.h b/tools/lli/RecordingMemoryManager.h
index 1590235a793c..991f535fd4c1 100644
--- a/tools/lli/RecordingMemoryManager.h
+++ b/tools/lli/RecordingMemoryManager.h
@@ -31,9 +31,15 @@ private:
   SmallVector<Allocation, 16> AllocatedDataMem;
   SmallVector<Allocation, 16> AllocatedCodeMem;
 
+  // FIXME: This is part of a work around to keep sections near one another
+  // when MCJIT performs relocations after code emission but before
+  // the generated code is moved to the remote target.
+  sys::MemoryBlock Near;
+  sys::MemoryBlock allocateSection(uintptr_t Size);
+
 public:
   RecordingMemoryManager() {}
-  virtual ~RecordingMemoryManager() {}
+  virtual ~RecordingMemoryManager();
 
   typedef SmallVectorImpl<Allocation>::const_iterator const_data_iterator;
   typedef SmallVectorImpl<Allocation>::const_iterator const_code_iterator;
@@ -47,10 +53,13 @@ public:
                                        unsigned SectionID);
 
   uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
+                                       unsigned SectionID, bool IsReadOnly);
 
   void *getPointerToNamedFunction(const std::string &Name,
                                   bool AbortOnFailure = true);
+
+  bool applyPermissions(std::string *ErrMsg) { return false; }
+
   // The following obsolete JITMemoryManager calls are stubbed out for
   // this model.
   void setMemoryWritable();
diff --git a/tools/lli/RemoteTarget.h b/tools/lli/RemoteTarget.h
index d05d3c6f4568..b2a6d0ef1d48 100644
--- a/tools/lli/RemoteTarget.h
+++ b/tools/lli/RemoteTarget.h
@@ -15,8 +15,8 @@
 #ifndef REMOTEPROCESS_H
 #define REMOTEPROCESS_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Memory.h"
 #include <stdlib.h>
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index d41a595de857..297763fcfbd8 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -14,11 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lli"
+#include "llvm/IR/LLVMContext.h"
 #include "RecordingMemoryManager.h"
 #include "RemoteTarget.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -28,33 +26,27 @@
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cerrno>
 
-#ifdef __linux__
-// These includes used by LLIMCJITMemoryManager::getPointerToNamedFunction()
-// for Glibc trickery. Look comments in this function for more information.
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
 #ifdef __CYGWIN__
 #include <cygwin/version.h>
 #if defined(CYGWIN_VERSION_DLL_MAJOR) && CYGWIN_VERSION_DLL_MAJOR<1007
@@ -217,211 +209,6 @@ static void do_shutdown() {
 #endif
 }
 
-// Memory manager for MCJIT
-class LLIMCJITMemoryManager : public JITMemoryManager {
-public:
-  SmallVector<sys::MemoryBlock, 16> AllocatedDataMem;
-  SmallVector<sys::MemoryBlock, 16> AllocatedCodeMem;
-  SmallVector<sys::MemoryBlock, 16> FreeCodeMem;
-
-  LLIMCJITMemoryManager() { }
-  ~LLIMCJITMemoryManager();
-
-  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
-
-  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
-
-  virtual void *getPointerToNamedFunction(const std::string &Name,
-                                          bool AbortOnFailure = true);
-
-  // Invalidate instruction cache for code sections. Some platforms with
-  // separate data cache and instruction cache require explicit cache flush,
-  // otherwise JIT code manipulations (like resolved relocations) will get to
-  // the data cache but not to the instruction cache.
-  virtual void invalidateInstructionCache();
-
-  // The RTDyldMemoryManager doesn't use the following functions, so we don't
-  // need implement them.
-  virtual void setMemoryWritable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setMemoryExecutable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setPoisonMemory(bool poison) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void AllocateGOT() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *getGOTBase() const {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *startFunctionBody(const Function *F,
-                                     uintptr_t &ActualSize){
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
-                               uint8_t *FunctionEnd) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void deallocateFunctionBody(void *Body) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t* startExceptionTable(const Function* F,
-                                       uintptr_t &ActualSize) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
-                                 uint8_t *TableEnd, uint8_t* FrameRegister) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void deallocateExceptionTable(void *ET) {
-    llvm_unreachable("Unexpected call!");
-  }
-};
-
-uint8_t *LLIMCJITMemoryManager::allocateDataSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID) {
-  if (!Alignment)
-    Alignment = 16;
-  // Ensure that enough memory is requested to allow aligning.
-  size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment;
-  uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment);
-
-  // Honour the alignment requirement.
-  uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment);
-
-  // Store the original address from calloc so we can free it later.
-  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment));
-  return AlignedAddr;
-}
-
-uint8_t *LLIMCJITMemoryManager::allocateCodeSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID) {
-  if (!Alignment)
-    Alignment = 16;
-  unsigned NeedAllocate = Alignment * ((Size + Alignment - 1)/Alignment + 1);
-  uintptr_t Addr = 0;
-  // Look in the list of free code memory regions and use a block there if one
-  // is available.
-  for (int i = 0, e = FreeCodeMem.size(); i != e; ++i) {
-    sys::MemoryBlock &MB = FreeCodeMem[i];
-    if (MB.size() >= NeedAllocate) {
-      Addr = (uintptr_t)MB.base();
-      uintptr_t EndOfBlock = Addr + MB.size();
-      // Align the address.
-      Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-      // Store cutted free memory block.
-      FreeCodeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
-                                        EndOfBlock - Addr - Size);
-      return (uint8_t*)Addr;
-    }
-  }
-
-  // No pre-allocated free block was large enough. Allocate a new memory region.
-  sys::MemoryBlock MB = sys::Memory::AllocateRWX(NeedAllocate, 0, 0);
-
-  AllocatedCodeMem.push_back(MB);
-  Addr = (uintptr_t)MB.base();
-  uintptr_t EndOfBlock = Addr + MB.size();
-  // Align the address.
-  Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-  // The AllocateRWX may allocate much more memory than we need. In this case,
-  // we store the unused memory as a free memory block.
-  unsigned FreeSize = EndOfBlock-Addr-Size;
-  if (FreeSize > 16)
-    FreeCodeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
-
-  // Return aligned address
-  return (uint8_t*)Addr;
-}
-
-void LLIMCJITMemoryManager::invalidateInstructionCache() {
-  for (int i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(AllocatedCodeMem[i].base(),
-                                            AllocatedCodeMem[i].size());
-}
-
-static int jit_noop() {
-  return 0;
-}
-
-void *LLIMCJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
-                                                       bool AbortOnFailure) {
-#if defined(__linux__)
-  //===--------------------------------------------------------------------===//
-  // Function stubs that are invoked instead of certain library calls
-  //
-  // Force the following functions to be linked in to anything that uses the
-  // JIT. This is a hack designed to work around the all-too-clever Glibc
-  // strategy of making these functions work differently when inlined vs. when
-  // not inlined, and hiding their real definitions in a separate archive file
-  // that the dynamic linker can't see. For more info, search for
-  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-  if (Name == "stat") return (void*)(intptr_t)&stat;
-  if (Name == "fstat") return (void*)(intptr_t)&fstat;
-  if (Name == "lstat") return (void*)(intptr_t)&lstat;
-  if (Name == "stat64") return (void*)(intptr_t)&stat64;
-  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
-  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
-  if (Name == "atexit") return (void*)(intptr_t)&atexit;
-  if (Name == "mknod") return (void*)(intptr_t)&mknod;
-#endif // __linux__
-
-  // We should not invoke parent's ctors/dtors from generated main()!
-  // On Mingw and Cygwin, the symbol __main is resolved to
-  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-  // (and register wrong callee's dtors with atexit(3)).
-  // We expect ExecutionEngine::runStaticConstructorsDestructors()
-  // is called before ExecutionEngine::runFunctionAsMain() is called.
-  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-  const char *NameStr = Name.c_str();
-  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-  if (Ptr) return Ptr;
-
-  // If it wasn't found and if it starts with an underscore ('_') character,
-  // try again without the underscore.
-  if (NameStr[0] == '_') {
-    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-    if (Ptr) return Ptr;
-  }
-
-  if (AbortOnFailure)
-    report_fatal_error("Program used external function '" + Name +
-                      "' which could not be resolved!");
-  return 0;
-}
-
-LLIMCJITMemoryManager::~LLIMCJITMemoryManager() {
-  for (unsigned i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::ReleaseRWX(AllocatedCodeMem[i]);
-  for (unsigned i = 0, e = AllocatedDataMem.size(); i != e; ++i)
-    free(AllocatedDataMem[i].base());
-}
-
-
 void layoutRemoteTargetMemory(RemoteTarget *T, RecordingMemoryManager *JMM) {
   // Lay out our sections in order, with all the code sections first, then
   // all the data sections.
@@ -561,7 +348,7 @@ int main(int argc, char **argv, char * const *envp) {
     if (RemoteMCJIT)
       JMM = new RecordingMemoryManager();
     else
-      JMM = new LLIMCJITMemoryManager();
+      JMM = new SectionMemoryManager();
     builder.setJITMemoryManager(JMM);
   } else {
     if (RemoteMCJIT) {
@@ -662,8 +449,13 @@ int main(int argc, char **argv, char * const *envp) {
   // MCJIT itself. FIXME.
   //
   // Run static constructors.
-  if (!RemoteMCJIT)
-    EE->runStaticConstructorsDestructors(false);
+  if (!RemoteMCJIT) {
+      if (UseMCJIT && !ForceInterpreter) {
+        // Give MCJIT a chance to apply relocations and set page permissions.
+        EE->finalizeObject();
+      }
+      EE->runStaticConstructorsDestructors(false);
+  }
 
   if (NoLazyCompilation) {
     for (Module::iterator I = Mod->begin(), E = Mod->end(); I != E; ++I) {
@@ -710,7 +502,7 @@ int main(int argc, char **argv, char * const *envp) {
     (void)EE->getPointerToFunction(EntryFn);
     // Clear instruction cache before code will be executed.
     if (JMM)
-      static_cast<LLIMCJITMemoryManager*>(JMM)->invalidateInstructionCache();
+      static_cast<SectionMemoryManager*>(JMM)->invalidateInstructionCache();
 
     // Run main.
     Result = EE->runFunctionAsMain(EntryFn, InputArgv, envp);
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index a8a5013a9a4c..86eb8e272f60 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -12,20 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdlib>
-#include <memory>
 #include <fstream>
+#include <memory>
 using namespace llvm;
 
 // Option for compatibility with AIX, not used but must allow it to be present.
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 1def9a4a2d70..273c4274b5d8 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -15,18 +15,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Parser.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/Signals.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 8109ca4d5be7..99479a46a88b 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -37,12 +37,11 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-
-#include <map>
 #include <algorithm>
+#include <map>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -99,16 +98,17 @@ static const char *GetBlockName(unsigned BlockID,
   if (CurStreamType != LLVMIRBitstream) return 0;
 
   switch (BlockID) {
-  default:                           return 0;
-  case bitc::MODULE_BLOCK_ID:        return "MODULE_BLOCK";
-  case bitc::PARAMATTR_BLOCK_ID:     return "PARAMATTR_BLOCK";
-  case bitc::TYPE_BLOCK_ID_NEW:      return "TYPE_BLOCK_ID";
-  case bitc::CONSTANTS_BLOCK_ID:     return "CONSTANTS_BLOCK";
-  case bitc::FUNCTION_BLOCK_ID:      return "FUNCTION_BLOCK";
-  case bitc::VALUE_SYMTAB_BLOCK_ID:  return "VALUE_SYMTAB";
-  case bitc::METADATA_BLOCK_ID:      return "METADATA_BLOCK";
-  case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
-  case bitc::USELIST_BLOCK_ID:       return "USELIST_BLOCK_ID";
+  default:                             return 0;
+  case bitc::MODULE_BLOCK_ID:          return "MODULE_BLOCK";
+  case bitc::PARAMATTR_BLOCK_ID:       return "PARAMATTR_BLOCK";
+  case bitc::PARAMATTR_GROUP_BLOCK_ID: return "PARAMATTR_GROUP_BLOCK_ID";
+  case bitc::TYPE_BLOCK_ID_NEW:        return "TYPE_BLOCK_ID";
+  case bitc::CONSTANTS_BLOCK_ID:       return "CONSTANTS_BLOCK";
+  case bitc::FUNCTION_BLOCK_ID:        return "FUNCTION_BLOCK";
+  case bitc::VALUE_SYMTAB_BLOCK_ID:    return "VALUE_SYMTAB";
+  case bitc::METADATA_BLOCK_ID:        return "METADATA_BLOCK";
+  case bitc::METADATA_ATTACHMENT_ID:   return "METADATA_ATTACHMENT_BLOCK";
+  case bitc::USELIST_BLOCK_ID:         return "USELIST_BLOCK_ID";
   }
 }
 
@@ -150,7 +150,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
     case bitc::MODULE_CODE_ASM:         return "ASM";
     case bitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
-    case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB";
+    case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB"; // FIXME: Remove in 4.0
     case bitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
     case bitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
     case bitc::MODULE_CODE_ALIAS:       return "ALIAS";
@@ -160,7 +160,9 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
   case bitc::PARAMATTR_BLOCK_ID:
     switch (CodeID) {
     default: return 0;
-    case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
+    case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
+    case bitc::PARAMATTR_CODE_ENTRY:     return "ENTRY";
+    case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
     }
   case bitc::TYPE_BLOCK_ID_NEW:
     switch (CodeID) {
@@ -319,10 +321,10 @@ static bool Error(const std::string &Err) {
 }
 
 /// ParseBlock - Read a block, updating statistics, etc.
-static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
+static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
+                       unsigned IndentLevel) {
   std::string Indent(IndentLevel*2, ' ');
   uint64_t BlockBitStart = Stream.GetCurrentBitNo();
-  unsigned BlockID = Stream.ReadSubBlockID();
 
   // Get the statistics for this BlockID.
   PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
@@ -355,7 +357,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       outs() << " BlockID=" << BlockID;
 
     outs() << " NumWords=" << NumWords
-           << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
+           << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
   }
 
   SmallVector<uint64_t, 64> Record;
@@ -367,12 +369,13 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 
     uint64_t RecordStartBit = Stream.GetCurrentBitNo();
 
-    // Read the code for this record.
-    unsigned AbbrevID = Stream.ReadCode();
-    switch (AbbrevID) {
-    case bitc::END_BLOCK: {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of block");
+    BitstreamEntry Entry =
+      Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+    
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return Error("malformed bitcode file");
+    case BitstreamEntry::EndBlock: {
       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
       if (Dump) {
@@ -384,80 +387,81 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       }
       return false;
     }
-    case bitc::ENTER_SUBBLOCK: {
+        
+    case BitstreamEntry::SubBlock: {
       uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
-      if (ParseBlock(Stream, IndentLevel+1))
+      if (ParseBlock(Stream, Entry.ID, IndentLevel+1))
         return true;
       ++BlockStats.NumSubBlocks;
       uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
-
+      
       // Don't include subblock sizes in the size of this block.
       BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
+      continue;
+    }
+    case BitstreamEntry::Record:
+      // The interesting case.
       break;
     }
-    case bitc::DEFINE_ABBREV:
+
+    if (Entry.ID == bitc::DEFINE_ABBREV) {
       Stream.ReadAbbrevRecord();
       ++BlockStats.NumAbbrevs;
-      break;
-    default:
-      Record.clear();
-
-      ++BlockStats.NumRecords;
-      if (AbbrevID != bitc::UNABBREV_RECORD)
-        ++BlockStats.NumAbbreviatedRecords;
-
-      const char *BlobStart = 0;
-      unsigned BlobLen = 0;
-      unsigned Code = Stream.ReadRecord(AbbrevID, Record, BlobStart, BlobLen);
-
-
-
-      // Increment the # occurrences of this code.
-      if (BlockStats.CodeFreq.size() <= Code)
-        BlockStats.CodeFreq.resize(Code+1);
-      BlockStats.CodeFreq[Code].NumInstances++;
-      BlockStats.CodeFreq[Code].TotalBits +=
-        Stream.GetCurrentBitNo()-RecordStartBit;
-      if (AbbrevID != bitc::UNABBREV_RECORD)
-        BlockStats.CodeFreq[Code].NumAbbrev++;
+      continue;
+    }
+    
+    Record.clear();
+
+    ++BlockStats.NumRecords;
+
+    StringRef Blob;
+    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
+
+    // Increment the # occurrences of this code.
+    if (BlockStats.CodeFreq.size() <= Code)
+      BlockStats.CodeFreq.resize(Code+1);
+    BlockStats.CodeFreq[Code].NumInstances++;
+    BlockStats.CodeFreq[Code].TotalBits +=
+      Stream.GetCurrentBitNo()-RecordStartBit;
+    if (Entry.ID != bitc::UNABBREV_RECORD) {
+      BlockStats.CodeFreq[Code].NumAbbrev++;
+      ++BlockStats.NumAbbreviatedRecords;
+    }
 
-      if (Dump) {
-        outs() << Indent << "  <";
-        if (const char *CodeName =
-              GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          outs() << CodeName;
-        else
-          outs() << "UnknownCode" << Code;
-        if (NonSymbolic &&
+    if (Dump) {
+      outs() << Indent << "  <";
+      if (const char *CodeName =
             GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          outs() << " codeid=" << Code;
-        if (AbbrevID != bitc::UNABBREV_RECORD)
-          outs() << " abbrevid=" << AbbrevID;
-
-        for (unsigned i = 0, e = Record.size(); i != e; ++i)
-          outs() << " op" << i << "=" << (int64_t)Record[i];
-
-        outs() << "/>";
-
-        if (BlobStart) {
-          outs() << " blob data = ";
-          bool BlobIsPrintable = true;
-          for (unsigned i = 0; i != BlobLen; ++i)
-            if (!isprint(BlobStart[i])) {
-              BlobIsPrintable = false;
-              break;
-            }
-
-          if (BlobIsPrintable)
-            outs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
-          else
-            outs() << "unprintable, " << BlobLen << " bytes.";
-        }
-
-        outs() << "\n";
+        outs() << CodeName;
+      else
+        outs() << "UnknownCode" << Code;
+      if (NonSymbolic &&
+          GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
+        outs() << " codeid=" << Code;
+      if (Entry.ID != bitc::UNABBREV_RECORD)
+        outs() << " abbrevid=" << Entry.ID;
+
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        outs() << " op" << i << "=" << (int64_t)Record[i];
+
+      outs() << "/>";
+
+      if (Blob.data()) {
+        outs() << " blob data = ";
+        bool BlobIsPrintable = true;
+        for (unsigned i = 0, e = Blob.size(); i != e; ++i)
+          if (!isprint(static_cast<unsigned char>(Blob[i]))) {
+            BlobIsPrintable = false;
+            break;
+          }
+
+        if (BlobIsPrintable)
+          outs() << "'" << Blob << "'";
+        else
+          outs() << "unprintable, " << Blob.size() << " bytes.";
       }
 
-      break;
+      outs() << "\n";
     }
   }
 }
@@ -520,7 +524,9 @@ static int AnalyzeBitcode() {
     if (Code != bitc::ENTER_SUBBLOCK)
       return Error("Invalid record at top-level");
 
-    if (ParseBlock(Stream, 0))
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    if (ParseBlock(Stream, BlockID, 0))
       return true;
     ++NumTopBlocks;
   }
diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt
index c59d69ea0d45..0df8b9ed79e2 100644
--- a/tools/llvm-diff/CMakeLists.txt
+++ b/tools/llvm-diff/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS support asmparser bitreader)
+set(LLVM_LINK_COMPONENTS support asmparser bitreader irreader)
 
 add_llvm_tool(llvm-diff
   llvm-diff.cpp
diff --git a/tools/llvm-diff/DiffConsumer.cpp b/tools/llvm-diff/DiffConsumer.cpp
index 91c1699bee53..9078013c1c16 100644
--- a/tools/llvm-diff/DiffConsumer.cpp
+++ b/tools/llvm-diff/DiffConsumer.cpp
@@ -12,9 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "DiffConsumer.h"
-
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
diff --git a/tools/llvm-diff/DiffConsumer.h b/tools/llvm-diff/DiffConsumer.h
index 98e369ba9768..6c2209f27e66 100644
--- a/tools/llvm-diff/DiffConsumer.h
+++ b/tools/llvm-diff/DiffConsumer.h
@@ -15,12 +15,11 @@
 #define _LLVM_DIFFCONSUMER_H_
 
 #include "DiffLog.h"
-
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
   class Module;
diff --git a/tools/llvm-diff/DiffLog.cpp b/tools/llvm-diff/DiffLog.cpp
index 9cc0c889fd1f..caf779bb4030 100644
--- a/tools/llvm-diff/DiffLog.cpp
+++ b/tools/llvm-diff/DiffLog.cpp
@@ -13,10 +13,9 @@
 
 #include "DiffLog.h"
 #include "DiffConsumer.h"
-
-#include "llvm/Instructions.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Instructions.h"
 
 using namespace llvm;
 
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index 0c1e30c987ea..4b11315b08f0 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -13,22 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "DifferenceEngine.h"
-
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/type_traits.h"
-
 #include <utility>
 
 using namespace llvm;
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
index 0246d8ff5d6b..73bf6eb6ea5b 100644
--- a/tools/llvm-diff/DifferenceEngine.h
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -15,11 +15,10 @@
 #ifndef _LLVM_DIFFERENCE_ENGINE_H_
 #define _LLVM_DIFFERENCE_ENGINE_H_
 
+#include "DiffConsumer.h"
+#include "DiffLog.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "DiffLog.h"
-#include "DiffConsumer.h"
-
 #include <utility>
 
 namespace llvm {
diff --git a/tools/llvm-diff/LLVMBuild.txt b/tools/llvm-diff/LLVMBuild.txt
index fa06a03353bb..5adfdc2bd6e9 100644
--- a/tools/llvm-diff/LLVMBuild.txt
+++ b/tools/llvm-diff/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-diff
 parent = Tools
-required_libraries = AsmParser BitReader
+required_libraries = AsmParser BitReader IRReader
diff --git a/tools/llvm-diff/Makefile b/tools/llvm-diff/Makefile
index f7fa7159c54f..bd97a6a9f5e9 100644
--- a/tools/llvm-diff/Makefile
+++ b/tools/llvm-diff/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-diff
-LINK_COMPONENTS := asmparser bitreader
+LINK_COMPONENTS := asmparser bitreader irreader
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index 45957b3f8c92..6eca1e2bfcde 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -13,19 +13,17 @@
 
 #include "DiffLog.h"
 #include "DifferenceEngine.h"
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
-
+#include "llvm/Support/raw_ostream.h"
 #include <string>
 #include <utility>
 
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 41f023d4c43f..2baa91da509e 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -16,21 +16,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
 
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index e73300a0cd8d..80948560ca9a 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/RelocVisitor.h"
-#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -52,6 +52,25 @@ static cl::opt<bool>
 PrintInlining("inlining", cl::init(false),
               cl::desc("Print all inlined frames for a given address"));
 
+static cl::opt<DIDumpType>
+DumpType("debug-dump", cl::init(DIDT_All),
+  cl::desc("Dump of debug sections:"),
+  cl::values(
+        clEnumValN(DIDT_All, "all", "Dump all debug sections"),
+        clEnumValN(DIDT_Abbrev, "abbrev", ".debug_abbrev"),
+        clEnumValN(DIDT_AbbrevDwo, "abbrev.dwo", ".debug_abbrev.dwo"),
+        clEnumValN(DIDT_Aranges, "aranges", ".debug_aranges"),
+        clEnumValN(DIDT_Info, "info", ".debug_info"),
+        clEnumValN(DIDT_InfoDwo, "info.dwo", ".debug_info.dwo"),
+        clEnumValN(DIDT_Line, "line", ".debug_line"),
+        clEnumValN(DIDT_Frames, "frames", ".debug_frame"),
+        clEnumValN(DIDT_Ranges, "ranges", ".debug_ranges"),
+        clEnumValN(DIDT_Pubnames, "pubnames", ".debug_pubnames"),
+        clEnumValN(DIDT_Str, "str", ".debug_str"),
+        clEnumValN(DIDT_StrDwo, "str.dwo", ".debug_str.dwo"),
+        clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo", ".debug_str_offsets.dwo"),
+        clEnumValEnd));
+
 static void PrintDILineInfo(DILineInfo dli) {
   if (PrintFunctions)
     outs() << (dli.getFunctionName() ? dli.getFunctionName() : "<unknown>")
@@ -69,105 +88,18 @@ static void DumpInput(const StringRef &Filename) {
   }
 
   OwningPtr<ObjectFile> Obj(ObjectFile::createObjectFile(Buff.take()));
-
-  StringRef DebugInfoSection;
-  RelocAddrMap RelocMap;
-  StringRef DebugAbbrevSection;
-  StringRef DebugLineSection;
-  StringRef DebugArangesSection;
-  StringRef DebugStringSection;
-  StringRef DebugRangesSection;
-
-  error_code ec;
-  for (section_iterator i = Obj->begin_sections(),
-                        e = Obj->end_sections();
-                        i != e; i.increment(ec)) {
-    StringRef name;
-    i->getName(name);
-    StringRef data;
-    i->getContents(data);
-
-    if (name.startswith("__DWARF,"))
-      name = name.substr(8); // Skip "__DWARF," prefix.
-    name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
-    if (name == "debug_info")
-      DebugInfoSection = data;
-    else if (name == "debug_abbrev")
-      DebugAbbrevSection = data;
-    else if (name == "debug_line")
-      DebugLineSection = data;
-    else if (name == "debug_aranges")
-      DebugArangesSection = data;
-    else if (name == "debug_str")
-      DebugStringSection = data;
-    else if (name == "debug_ranges")
-      DebugRangesSection = data;
-    // Any more debug info sections go here.
-    else
-      continue;
-
-    // TODO: For now only handle relocations for the debug_info section.
-    if (name != "debug_info")
-      continue;
-
-    if (i->begin_relocations() != i->end_relocations()) {
-      uint64_t SectionSize;
-      i->getSize(SectionSize);
-      for (relocation_iterator reloc_i = i->begin_relocations(),
-                               reloc_e = i->end_relocations();
-                               reloc_i != reloc_e; reloc_i.increment(ec)) {
-        uint64_t Address;
-        reloc_i->getAddress(Address);
-        uint64_t Type;
-        reloc_i->getType(Type);
-
-        RelocVisitor V(Obj->getFileFormatName());
-        // The section address is always 0 for debug sections.
-        RelocToApply R(V.visit(Type, *reloc_i));
-        if (V.error()) {
-          SmallString<32> Name;
-          error_code ec(reloc_i->getTypeName(Name));
-          if (ec) {
-            errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
-          }
-          errs() << "error: failed to compute relocation: "
-                 << Name << "\n";
-          continue;
-        }
-
-        if (Address + R.Width > SectionSize) {
-          errs() << "error: " << R.Width << "-byte relocation starting "
-                 << Address << " bytes into section " << name << " which is "
-                 << SectionSize << " bytes long.\n";
-          continue;
-        }
-        if (R.Width > 8) {
-          errs() << "error: can't handle a relocation of more than 8 bytes at "
-                    "a time.\n";
-          continue;
-        }
-        DEBUG(dbgs() << "Writing " << format("%p", R.Value)
-                     << " at " << format("%p", Address)
-                     << " with width " << format("%d", R.Width)
-                     << "\n");
-        RelocMap[Address] = std::make_pair(R.Width, R.Value);
-      }
-    }
+  if (!Obj) {
+    errs() << Filename << ": Unknown object file format\n";
+    return;
   }
 
-  OwningPtr<DIContext> dictx(DIContext::getDWARFContext(/*FIXME*/true,
-                                                        DebugInfoSection,
-                                                        DebugAbbrevSection,
-                                                        DebugArangesSection,
-                                                        DebugLineSection,
-                                                        DebugStringSection,
-                                                        DebugRangesSection,
-                                                        RelocMap));
+  OwningPtr<DIContext> DICtx(DIContext::getDWARFContext(Obj.get()));
+
   if (Address == -1ULL) {
     outs() << Filename
            << ":\tfile format " << Obj->getFileFormatName() << "\n\n";
     // Dump the complete DWARF structure.
-    dictx->dump(outs());
+    DICtx->dump(outs(), DumpType);
   } else {
     // Print line info for the specified address.
     int SpecFlags = DILineInfoSpecifier::FileLineInfo |
@@ -176,7 +108,7 @@ static void DumpInput(const StringRef &Filename) {
       SpecFlags |= DILineInfoSpecifier::FunctionName;
     if (PrintInlining) {
       DIInliningInfo InliningInfo =
-        dictx->getInliningInfoForAddress(Address, SpecFlags);
+        DICtx->getInliningInfoForAddress(Address, SpecFlags);
       uint32_t n = InliningInfo.getNumberOfFrames();
       if (n == 0) {
         // Print one empty debug line info in any case.
@@ -188,7 +120,7 @@ static void DumpInput(const StringRef &Filename) {
         }
       }
     } else {
-      DILineInfo dli = dictx->getLineInfoForAddress(Address, SpecFlags);
+      DILineInfo dli = DICtx->getLineInfoForAddress(Address, SpecFlags);
       PrintDILineInfo(dli);
     }
   }
diff --git a/tools/llvm-extract/CMakeLists.txt b/tools/llvm-extract/CMakeLists.txt
index a4e3266e3532..3163c4bbbddb 100644
--- a/tools/llvm-extract/CMakeLists.txt
+++ b/tools/llvm-extract/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter)
+set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter irreader)
 
 add_llvm_tool(llvm-extract
   llvm-extract.cpp
diff --git a/tools/llvm-extract/LLVMBuild.txt b/tools/llvm-extract/LLVMBuild.txt
index 1b1a4c36cdd1..70e3507a73eb 100644
--- a/tools/llvm-extract/LLVMBuild.txt
+++ b/tools/llvm-extract/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-extract
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO
+required_libraries = AsmParser BitReader BitWriter IRReader IPO
diff --git a/tools/llvm-extract/Makefile b/tools/llvm-extract/Makefile
index a1e93f5ce468..d371c5475926 100644
--- a/tools/llvm-extract/Makefile
+++ b/tools/llvm-extract/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-extract
-LINK_COMPONENTS := ipo bitreader bitwriter asmparser
+LINK_COMPONENTS := ipo bitreader bitwriter asmparser irreader
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index ac82d98b3b77..fd0a3818078f 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -12,23 +12,24 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/IPO.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-jitlistener/CMakeLists.txt b/tools/llvm-jitlistener/CMakeLists.txt
new file mode 100644
index 000000000000..c9704fb22489
--- /dev/null
+++ b/tools/llvm-jitlistener/CMakeLists.txt
@@ -0,0 +1,22 @@
+# This tool is excluded from the CMake build if Intel JIT events are disabled.
+
+link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
+include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} )
+
+set(LLVM_LINK_COMPONENTS
+  asmparser
+  bitreader
+  debuginfo
+  inteljitevents
+  interpreter
+  irreader
+  jit
+  mcjit
+  nativecodegen
+  object
+  selectiondag
+  )
+
+add_llvm_tool(llvm-jitlistener
+  llvm-jitlistener.cpp
+  )
diff --git a/tools/llvm-jitlistener/LLVMBuild.txt b/tools/llvm-jitlistener/LLVMBuild.txt
new file mode 100644
index 000000000000..1ce78acecbb6
--- /dev/null
+++ b/tools/llvm-jitlistener/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-jitlistener/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-jitlistener
+parent = Tools
+required_libraries = AsmParser BitReader IRReader Interpreter JIT MCJIT NativeCodeGen Object SelectionDAG Native
diff --git a/tools/llvm-jitlistener/Makefile b/tools/llvm-jitlistener/Makefile
new file mode 100644
index 000000000000..b13222731745
--- /dev/null
+++ b/tools/llvm-jitlistener/Makefile
@@ -0,0 +1,27 @@
+##===- tools/llvm-jitlistener/Makefile ---------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-jitlistener
+
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag Object
+
+# If Intel JIT Events support is configured, link against the LLVM Intel JIT
+# Events interface library.  If not, this tool will do nothing useful, but it
+# will build correctly.
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  LINK_COMPONENTS += debuginfo inteljitevents
+endif
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/llvm-jitlistener/llvm-jitlistener.cpp b/tools/llvm-jitlistener/llvm-jitlistener.cpp
new file mode 100644
index 000000000000..dbaf075e91cf
--- /dev/null
+++ b/tools/llvm-jitlistener/llvm-jitlistener.cpp
@@ -0,0 +1,207 @@
+//===-- llvm-jitlistener.cpp - Utility for testing MCJIT event listener ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a used by lit tests to verify the MCJIT JITEventListener
+// interface.  It registers a mock JIT event listener, generates a module from
+// an input IR file and dumps the reported event information to stdout.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+#include "../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+typedef std::vector<std::pair<std::string, unsigned int> > SourceLocations;
+typedef std::map<uint64_t, SourceLocations> NativeCodeMap;
+
+NativeCodeMap  ReportedDebugFuncs;
+
+int NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+  switch (EventType) {
+    case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: {
+      if (!EventSpecificData) {
+        errs() <<
+          "Error: The JIT event listener did not provide a event data.";
+        return -1;
+      }
+      iJIT_Method_Load* msg = static_cast<iJIT_Method_Load*>(EventSpecificData);
+
+      ReportedDebugFuncs[msg->method_id];
+
+      outs() << "Method load [" << msg->method_id << "]: " << msg->method_name
+             << ", Size = " << msg->method_size << "\n";
+
+      for(unsigned int i = 0; i < msg->line_number_size; ++i) {
+        if (!msg->line_number_table) {
+          errs() << "A function with a non-zero line count had no line table.";
+          return -1;
+        }
+        std::pair<std::string, unsigned int> loc(
+          std::string(msg->source_file_name),
+          msg->line_number_table[i].LineNumber);
+        ReportedDebugFuncs[msg->method_id].push_back(loc);
+        outs() << "  Line info @ " << msg->line_number_table[i].Offset
+               << ": " << msg->source_file_name
+               << ", line " << msg->line_number_table[i].LineNumber << "\n";
+      }
+      outs() << "\n";
+    }
+    break;
+    case iJVM_EVENT_TYPE_METHOD_UNLOAD_START: {
+      if (!EventSpecificData) {
+        errs() <<
+          "Error: The JIT event listener did not provide a event data.";
+        return -1;
+      }
+      unsigned int UnloadId
+        = *reinterpret_cast<unsigned int*>(EventSpecificData);
+      assert(1 == ReportedDebugFuncs.erase(UnloadId));
+      outs() << "Method unload [" << UnloadId << "]\n";
+    }
+    break;
+    default:
+      break;
+  }
+  return 0;
+}
+
+iJIT_IsProfilingActiveFlags IsProfilingActive(void) {
+  // for testing, pretend we have an Intel Parallel Amplifier XE 2011
+  // instance attached
+  return iJIT_SAMPLING_ON;
+}
+
+unsigned int GetNewMethodID(void) {
+  static unsigned int id = 0;
+  return ++id;
+}
+
+class JitEventListenerTest {
+protected:
+  void InitEE(const std::string &IRFile) {
+    LLVMContext &Context = getGlobalContext();
+
+    // If we have a native target, initialize it to ensure it is linked in and
+    // usable by the JIT.
+    InitializeNativeTarget();
+    InitializeNativeTargetAsmPrinter();
+
+    // Parse the bitcode...
+    SMDiagnostic Err;
+    TheModule = ParseIRFile(IRFile, Err, Context);
+    if (!TheModule) {
+      errs() << Err.getMessage();
+      return;
+    }
+
+    // FIXME: This is using the default legacy JITMemoryManager because it
+    // supports poison memory.  At some point, we'll need to update this to
+    // use an MCJIT-specific memory manager.  It might be nice to have the
+    // poison memory option there too.
+    JITMemoryManager *MemMgr = JITMemoryManager::CreateDefaultMemManager();
+    if (!MemMgr) {
+      errs() << "Unable to create memory manager.";
+      return;
+    }
+
+    // Tell the memory manager to poison freed memory so that accessing freed
+    // memory is more easily tested.
+    MemMgr->setPoisonMemory(true);
+
+    // Override the triple to generate ELF on Windows since that's supported
+    Triple Tuple(TheModule->getTargetTriple());
+    if (Tuple.getTriple().empty())
+      Tuple.setTriple(sys::getProcessTriple());
+
+    if (Tuple.isOSWindows() && Triple::ELF != Tuple.getEnvironment()) {
+      Tuple.setEnvironment(Triple::ELF);
+      TheModule->setTargetTriple(Tuple.getTriple());
+    }
+
+    // Compile the IR
+    std::string Error;
+    TheJIT.reset(EngineBuilder(TheModule)
+      .setEngineKind(EngineKind::JIT)
+      .setErrorStr(&Error)
+      .setJITMemoryManager(MemMgr)
+      .setUseMCJIT(true)
+      .create());
+    if (Error.empty() == false)
+      errs() << Error;
+  }
+
+  void DestroyEE() {
+    TheJIT.reset();
+  }
+
+  LLVMContext Context; // Global ownership
+  Module *TheModule; // Owned by ExecutionEngine.
+  JITMemoryManager *JMM; // Owned by ExecutionEngine.
+  OwningPtr<ExecutionEngine> TheJIT;
+
+public:
+  void ProcessInput(const std::string &Filename) {
+    InitEE(Filename);
+
+    llvm::OwningPtr<llvm::JITEventListener> Listener(JITEventListener::createIntelJITEventListener(
+        new IntelJITEventsWrapper(NotifyEvent, 0,
+          IsProfilingActive, 0, 0,
+          GetNewMethodID)));
+
+    TheJIT->RegisterJITEventListener(Listener.get());
+
+    TheJIT->finalizeObject();
+
+    // Destroy the JIT engine instead of unregistering to get unload events.
+    DestroyEE();
+  }
+};
+
+
+
+} // end anonymous namespace
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input IR file>"),
+               cl::Required);
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm jit event listener test utility\n");
+
+  JitEventListenerTest Test;
+
+  Test.ProcessInput(InputFilename);
+
+  return 0;
+}
diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt
index 11933f7f959e..4df53564e182 100644
--- a/tools/llvm-link/CMakeLists.txt
+++ b/tools/llvm-link/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser)
+set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser irreader)
 
 add_llvm_tool(llvm-link
   llvm-link.cpp
diff --git a/tools/llvm-link/LLVMBuild.txt b/tools/llvm-link/LLVMBuild.txt
index 6399dede784e..2e386f3c2316 100644
--- a/tools/llvm-link/LLVMBuild.txt
+++ b/tools/llvm-link/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-link
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter Linker
+required_libraries = AsmParser BitReader BitWriter IRReader Linker
diff --git a/tools/llvm-link/Makefile b/tools/llvm-link/Makefile
index 2553db0cd39c..ed30d2d256b8 100644
--- a/tools/llvm-link/Makefile
+++ b/tools/llvm-link/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-link
-LINK_COMPONENTS := linker bitreader bitwriter asmparser
+LINK_COMPONENTS := linker bitreader bitwriter asmparser irreader
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index 378a83368f37..83665cc1758e 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -13,18 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include <memory>
 using namespace llvm;
 
@@ -111,9 +112,6 @@ int main(int argc, char **argv) {
     }
   }
 
-  // TODO: Iterate over the -l list and link in any modules containing
-  // global symbols that have not been resolved so far.
-
   if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
 
   std::string ErrorInfo;
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 5f2fdb807142..06c7721d7e8c 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -13,16 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "Disassembler.h"
-#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
-#include "../../lib/MC/MCDisassembler/EDInst.h"
-#include "../../lib/MC/MCDisassembler/EDOperand.h"
-#include "../../lib/MC/MCDisassembler/EDToken.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/SourceMgr.h"
@@ -169,175 +165,3 @@ int Disassembler::disassemble(const Target &T,
 
   return ErrorOccurred;
 }
-
-static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
-  ByteArrayTy &ByteArray = *((ByteArrayTy*)Arg);
-
-  if (A >= ByteArray.size())
-    return -1;
-
-  *B = ByteArray[A].first;
-
-  return 0;
-}
-
-static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
-  EDDisassembler &disassembler = *(EDDisassembler *)((void **)Arg)[0];
-  raw_ostream &Out = *(raw_ostream *)((void **)Arg)[1];
-
-  if (const char *regName = disassembler.nameWithRegisterID(R))
-    Out << "[" << regName << "/" << R << "]";
-
-  if (disassembler.registerIsStackPointer(R))
-    Out << "(sp)";
-  if (disassembler.registerIsProgramCounter(R))
-    Out << "(pc)";
-
-  *V = 0;
-  return 0;
-}
-
-int Disassembler::disassembleEnhanced(const std::string &TS,
-                                      MemoryBuffer &Buffer,
-                                      SourceMgr &SM,
-                                      raw_ostream &Out) {
-  ByteArrayTy ByteArray;
-  StringRef Str = Buffer.getBuffer();
-
-  if (ByteArrayFromString(ByteArray, Str, SM)) {
-    return -1;
-  }
-
-  Triple T(TS);
-  EDDisassembler::AssemblySyntax AS;
-
-  switch (T.getArch()) {
-  default:
-    errs() << "error: no default assembly syntax for " << TS.c_str() << "\n";
-    return -1;
-  case Triple::arm:
-  case Triple::thumb:
-    AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
-    break;
-  case Triple::x86:
-  case Triple::x86_64:
-    AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
-    break;
-  }
-
-  OwningPtr<EDDisassembler>
-    disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS));
-
-  if (disassembler == 0) {
-    errs() << "error: couldn't get disassembler for " << TS << '\n';
-    return -1;
-  }
-
-  while (ByteArray.size()) {
-    OwningPtr<EDInst>
-      inst(disassembler->createInst(byteArrayReader, 0, &ByteArray));
-
-    if (inst == 0) {
-      errs() << "error: Didn't get an instruction\n";
-      return -1;
-    }
-
-    ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
-
-    unsigned numTokens = inst->numTokens();
-    if ((int)numTokens < 0) {
-      errs() << "error: couldn't count the instruction's tokens\n";
-      return -1;
-    }
-
-    for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
-      EDToken *token;
-
-      if (inst->getToken(token, tokenIndex)) {
-        errs() << "error: Couldn't get token\n";
-        return -1;
-      }
-
-      const char *buf;
-      if (token->getString(buf)) {
-        errs() << "error: Couldn't get string for token\n";
-        return -1;
-      }
-
-      Out << '[';
-      int operandIndex = token->operandID();
-
-      if (operandIndex >= 0)
-        Out << operandIndex << "-";
-
-      switch (token->type()) {
-      case EDToken::kTokenWhitespace: Out << "w"; break;
-      case EDToken::kTokenPunctuation: Out << "p"; break;
-      case EDToken::kTokenOpcode: Out << "o"; break;
-      case EDToken::kTokenLiteral: Out << "l"; break;
-      case EDToken::kTokenRegister: Out << "r"; break;
-      }
-
-      Out << ":" << buf;
-
-      if (token->type() == EDToken::kTokenLiteral) {
-        Out << "=";
-        if (token->literalSign())
-          Out << "-";
-        uint64_t absoluteValue;
-        if (token->literalAbsoluteValue(absoluteValue)) {
-          errs() << "error: Couldn't get the value of a literal token\n";
-          return -1;
-        }
-        Out << absoluteValue;
-      } else if (token->type() == EDToken::kTokenRegister) {
-        Out << "=";
-        unsigned regID;
-        if (token->registerID(regID)) {
-          errs() << "error: Couldn't get the ID of a register token\n";
-          return -1;
-        }
-        Out << "r" << regID;
-      }
-
-      Out << "]";
-    }
-
-    Out << " ";
-
-    if (inst->isBranch())
-      Out << "<br> ";
-    if (inst->isMove())
-      Out << "<mov> ";
-
-    unsigned numOperands = inst->numOperands();
-
-    if ((int)numOperands < 0) {
-      errs() << "error: Couldn't count operands\n";
-      return -1;
-    }
-
-    for (unsigned operandIndex = 0; operandIndex != numOperands;
-         ++operandIndex) {
-      Out << operandIndex << ":";
-
-      EDOperand *operand;
-      if (inst->getOperand(operand, operandIndex)) {
-        errs() << "error: couldn't get operand\n";
-        return -1;
-      }
-
-      uint64_t evaluatedResult;
-      void *Arg[] = { disassembler.get(), &Out };
-      if (operand->evaluate(evaluatedResult, verboseEvaluator, Arg)) {
-        errs() << "error: Couldn't evaluate an operand\n";
-        return -1;
-      }
-      Out << "=" << evaluatedResult << " ";
-    }
-
-    Out << '\n';
-  }
-
-  return 0;
-}
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index 17d622f1d9d0..5615da8d3d36 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -35,11 +35,6 @@ public:
                          MemoryBuffer &Buffer,
                          SourceMgr &SM,
                          raw_ostream &Out);
-
-  static int disassembleEnhanced(const std::string &tripleString,
-                                 MemoryBuffer &buffer,
-                                 SourceMgr &SM,
-                                 raw_ostream &Out);
 };
 
 } // namespace llvm
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index f7c3748f079b..243899bb881c 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -12,36 +12,35 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "Disassembler.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
-#include "Disassembler.h"
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -69,6 +68,9 @@ static cl::opt<bool>
 RelaxAll("mc-relax-all", cl::desc("Relax all fixups"));
 
 static cl::opt<bool>
+DisableCFI("disable-cfi", cl::desc("Do not use .cfi_* directives"));
+
+static cl::opt<bool>
 NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack"));
 
 enum OutputFileType {
@@ -154,12 +156,20 @@ static cl::opt<bool>
 GenDwarfForAssembly("g", cl::desc("Generate dwarf debugging info for assembly "
                                   "source files"));
 
+static cl::opt<std::string>
+DebugCompilationDir("fdebug-compilation-dir",
+                    cl::desc("Specifies the debug info's compilation dir"));
+
+static cl::opt<std::string>
+MainFileName("main-file-name",
+             cl::desc("Specifies the name we should consider the input file"));
+
 enum ActionType {
   AC_AsLex,
   AC_Assemble,
   AC_Disassemble,
-  AC_EDisassemble,
-  AC_MDisassemble
+  AC_MDisassemble,
+  AC_HDisassemble
 };
 
 static cl::opt<ActionType>
@@ -171,10 +181,11 @@ Action(cl::desc("Action to perform:"),
                              "Assemble a .s file (default)"),
                   clEnumValN(AC_Disassemble, "disassemble",
                              "Disassemble strings of hex bytes"),
-                  clEnumValN(AC_EDisassemble, "edis",
-                             "Enhanced disassembly of strings of hex bytes"),
                   clEnumValN(AC_MDisassemble, "mdis",
                              "Marked up disassembly of strings of hex bytes"),
+                  clEnumValN(AC_HDisassemble, "hdis",
+                             "Disassemble strings of hex bytes printing "
+                             "immediates as hex"),
                   clEnumValEnd));
 
 static const Target *GetTarget(const char *ProgName) {
@@ -224,6 +235,13 @@ static void setDwarfDebugFlags(int argc, char **argv) {
   }
 }
 
+static std::string DwarfDebugProducer;
+static void setDwarfDebugProducer(void) {
+  if(!getenv("DEBUG_PRODUCER"))
+    return;
+  DwarfDebugProducer += getenv("DEBUG_PRODUCER");
+}
+
 static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out) {
 
   AsmLexer Lexer(MAI);
@@ -251,9 +269,6 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, tool_output_file *Out)
     case AsmToken::Real:
       Out->os() << "real: " << Lexer.getTok().getString();
       break;
-    case AsmToken::Register:
-      Out->os() << "register: " << Lexer.getTok().getRegVal();
-      break;
     case AsmToken::String:
       Out->os() << "string: " << Lexer.getTok().getString();
       break;
@@ -344,6 +359,8 @@ int main(int argc, char **argv) {
   TripleName = Triple::normalize(TripleName);
   setDwarfDebugFlags(argc, argv);
 
+  setDwarfDebugProducer();
+
   const char *ProgName = argv[0];
   const Target *TheTarget = GetTarget(ProgName);
   if (!TheTarget)
@@ -365,7 +382,6 @@ int main(int argc, char **argv) {
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
 
-
   llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
 
@@ -382,8 +398,14 @@ int main(int argc, char **argv) {
     Ctx.setAllowTemporaryLabels(false);
 
   Ctx.setGenDwarfForAssembly(GenDwarfForAssembly);
-  if (!DwarfDebugFlags.empty()) 
+  if (!DwarfDebugFlags.empty())
     Ctx.setDwarfDebugFlags(StringRef(DwarfDebugFlags));
+  if (!DwarfDebugProducer.empty())
+    Ctx.setDwarfDebugProducer(StringRef(DwarfDebugProducer));
+  if (!DebugCompilationDir.empty())
+    Ctx.setCompilationDir(DebugCompilationDir);
+  if (!MainFileName.empty())
+    Ctx.setMainFileName(MainFileName);
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
@@ -405,7 +427,7 @@ int main(int argc, char **argv) {
   OwningPtr<MCSubtargetInfo>
     STI(TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
 
-  MCInstPrinter *IP;
+  MCInstPrinter *IP = NULL;
   if (FileType == OFT_AssemblyFile) {
     IP =
       TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *MCII, *MRI, *STI);
@@ -415,9 +437,10 @@ int main(int argc, char **argv) {
       CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, *STI, Ctx);
       MAB = TheTarget->createMCAsmBackend(TripleName, MCPU);
     }
+    bool UseCFI = !DisableCFI;
     Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
                                            /*useLoc*/ true,
-                                           /*useCFI*/ true,
+                                           UseCFI,
                                            /*useDwarfDirectory*/ true,
                                            IP, CE, MAB, ShowInst));
 
@@ -433,6 +456,7 @@ int main(int argc, char **argv) {
   }
 
   int Res = 1;
+  bool disassemble = false;
   switch (Action) {
   case AC_AsLex:
     Res = AsLexInput(SrcMgr, *MAI, Out.get());
@@ -441,16 +465,22 @@ int main(int argc, char **argv) {
     Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI);
     break;
   case AC_MDisassemble:
+    assert(IP && "Expected assembly output");
     IP->setUseMarkup(1);
-    // Fall through to do disassembly.
-  case AC_Disassemble:
-    Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str,
-                                    *Buffer, SrcMgr, Out->os());
+    disassemble = true;
     break;
-  case AC_EDisassemble:
-    Res =  Disassembler::disassembleEnhanced(TripleName, *Buffer, SrcMgr, Out->os());
+  case AC_HDisassemble:
+    assert(IP && "Expected assembly output");
+    IP->setPrintImmHex(1);
+    disassemble = true;
+    break;
+  case AC_Disassemble:
+    disassemble = true;
     break;
   }
+  if (disassemble)
+    Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str,
+                                    *Buffer, SrcMgr, Out->os());
 
   // Keep output if no errors.
   if (Res == 0) Out->keep();
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 0543e83f9cb4..a24aae6061a4 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -16,21 +16,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <cctype>
@@ -113,6 +113,10 @@ namespace {
   cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
                                cl::desc("Exclude aliases from output"));
 
+  cl::opt<bool> ArchiveMap("print-armap",
+    cl::desc("Print the archive map"));
+  cl::alias ArchiveMaps("s", cl::desc("Alias for --print-armap"),
+                                 cl::aliasopt(ArchiveMap));
   bool PrintAddress = true;
 
   bool MultipleFiles = false;
@@ -146,6 +150,8 @@ namespace {
       return true;
     else if (a.Address == b.Address && a.Name < b.Name)
       return true;
+    else if (a.Address == b.Address && a.Name == b.Name && a.Size < b.Size)
+      return true;
     else
       return false;
 
@@ -156,12 +162,21 @@ namespace {
       return true;
     else if (a.Size == b.Size && a.Name < b.Name)
       return true;
+    else if (a.Size == b.Size && a.Name == b.Name && a.Address < b.Address)
+      return true;
     else
       return false;
   }
 
   static bool CompareSymbolName(const NMSymbol &a, const NMSymbol &b) {
-    return a.Name < b.Name;
+    if (a.Name < b.Name)
+      return true;
+    else if (a.Name == b.Name && a.Size < b.Size)
+      return true;
+    else if (a.Name == b.Name && a.Size == b.Size && a.Address < b.Address)
+      return true;
+    else
+      return false;
   }
 
   StringRef CurrentFilename;
@@ -346,12 +361,32 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
       return;
 
     if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
+      if (ArchiveMap) {
+        outs() << "Archive map" << "\n";
+        for (object::Archive::symbol_iterator i = a->begin_symbols(), 
+             e = a->end_symbols(); i != e; ++i) {
+          object::Archive::child_iterator c;
+          StringRef symname;
+          StringRef filename;
+          if (error(i->getMember(c))) 
+              return;
+          if (error(i->getName(symname)))
+              return;
+          if (error(c->getName(filename)))
+              return;
+          outs() << symname << " in " << filename << "\n";
+        }
+        outs() << "\n";
+      }
+
       for (object::Archive::child_iterator i = a->begin_children(),
                                            e = a->end_children(); i != e; ++i) {
         OwningPtr<Binary> child;
         if (i->getAsBinary(child)) {
           // Try opening it as a bitcode file.
-          OwningPtr<MemoryBuffer> buff(i->getBuffer());
+          OwningPtr<MemoryBuffer> buff;
+          if (error(i->getMemoryBuffer(buff)))
+            return;
           Module *Result = 0;
           if (buff)
             Result = ParseBitcodeFile(buff.get(), Context, &ErrorMessage);
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
index f3b2e1fe4196..0c49d0b457f2 100644
--- a/tools/llvm-objdump/CMakeLists.txt
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -9,6 +9,8 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_tool(llvm-objdump
   llvm-objdump.cpp
+  COFFDump.cpp
+  ELFDump.cpp
   MachODump.cpp
   MCFunction.cpp
   )
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
new file mode 100644
index 000000000000..2ada683f2df1
--- /dev/null
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -0,0 +1,355 @@
+//===-- COFFDump.cpp - COFF-specific dumper ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the COFF-specific dumper for llvm-objdump.
+/// It outputs the Win64 EH data structures as plain text.
+/// The encoding of the unwind codes is decribed in MSDN:
+/// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-objdump.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Win64EH.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <cstring>
+
+using namespace llvm;
+using namespace object;
+using namespace llvm::Win64EH;
+
+// Returns the name of the unwind code.
+static StringRef getUnwindCodeTypeName(uint8_t Code) {
+  switch(Code) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol: return "UOP_PushNonVol";
+  case UOP_AllocLarge: return "UOP_AllocLarge";
+  case UOP_AllocSmall: return "UOP_AllocSmall";
+  case UOP_SetFPReg: return "UOP_SetFPReg";
+  case UOP_SaveNonVol: return "UOP_SaveNonVol";
+  case UOP_SaveNonVolBig: return "UOP_SaveNonVolBig";
+  case UOP_SaveXMM128: return "UOP_SaveXMM128";
+  case UOP_SaveXMM128Big: return "UOP_SaveXMM128Big";
+  case UOP_PushMachFrame: return "UOP_PushMachFrame";
+  }
+}
+
+// Returns the name of a referenced register.
+static StringRef getUnwindRegisterName(uint8_t Reg) {
+  switch(Reg) {
+  default: llvm_unreachable("Invalid register");
+  case 0: return "RAX";
+  case 1: return "RCX";
+  case 2: return "RDX";
+  case 3: return "RBX";
+  case 4: return "RSP";
+  case 5: return "RBP";
+  case 6: return "RSI";
+  case 7: return "RDI";
+  case 8: return "R8";
+  case 9: return "R9";
+  case 10: return "R10";
+  case 11: return "R11";
+  case 12: return "R12";
+  case 13: return "R13";
+  case 14: return "R14";
+  case 15: return "R15";
+  }
+}
+
+// Calculates the number of array slots required for the unwind code.
+static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) {
+  switch (UnwindCode.getUnwindOp()) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol:
+  case UOP_AllocSmall:
+  case UOP_SetFPReg:
+  case UOP_PushMachFrame:
+    return 1;
+  case UOP_SaveNonVol:
+  case UOP_SaveXMM128:
+    return 2;
+  case UOP_SaveNonVolBig:
+  case UOP_SaveXMM128Big:
+    return 3;
+  case UOP_AllocLarge:
+    return (UnwindCode.getOpInfo() == 0) ? 2 : 3;
+  }
+}
+
+// Prints one unwind code. Because an unwind code can occupy up to 3 slots in
+// the unwind codes array, this function requires that the correct number of
+// slots is provided.
+static void printUnwindCode(ArrayRef<UnwindCode> UCs) {
+  assert(UCs.size() >= getNumUsedSlots(UCs[0]));
+  outs() <<  format("    0x%02x: ", unsigned(UCs[0].u.CodeOffset))
+         << getUnwindCodeTypeName(UCs[0].getUnwindOp());
+  switch (UCs[0].getUnwindOp()) {
+  case UOP_PushNonVol:
+    outs() << " " << getUnwindRegisterName(UCs[0].getOpInfo());
+    break;
+  case UOP_AllocLarge:
+    if (UCs[0].getOpInfo() == 0) {
+      outs() << " " << UCs[1].FrameOffset;
+    } else {
+      outs() << " " << UCs[1].FrameOffset
+                       + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16);
+    }
+    break;
+  case UOP_AllocSmall:
+    outs() << " " << ((UCs[0].getOpInfo() + 1) * 8);
+    break;
+  case UOP_SetFPReg:
+    outs() << " ";
+    break;
+  case UOP_SaveNonVol:
+    outs() << " " << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(" [0x%04x]", 8 * UCs[1].FrameOffset);
+    break;
+  case UOP_SaveNonVolBig:
+    outs() << " " << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(" [0x%08x]", UCs[1].FrameOffset
+                    + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16));
+    break;
+  case UOP_SaveXMM128:
+    outs() << " XMM" << static_cast<uint32_t>(UCs[0].getOpInfo())
+           << format(" [0x%04x]", 16 * UCs[1].FrameOffset);
+    break;
+  case UOP_SaveXMM128Big:
+    outs() << " XMM" << UCs[0].getOpInfo()
+           << format(" [0x%08x]", UCs[1].FrameOffset
+                           + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16));
+    break;
+  case UOP_PushMachFrame:
+    outs() << " " << (UCs[0].getOpInfo() ? "w/o" : "w")
+           << " error code";
+    break;
+  }
+  outs() << "\n";
+}
+
+static void printAllUnwindCodes(ArrayRef<UnwindCode> UCs) {
+  for (const UnwindCode *I = UCs.begin(), *E = UCs.end(); I < E; ) {
+    unsigned UsedSlots = getNumUsedSlots(*I);
+    if (UsedSlots > UCs.size()) {
+      outs() << "Unwind data corrupted: Encountered unwind op "
+             << getUnwindCodeTypeName((*I).getUnwindOp())
+             << " which requires " << UsedSlots
+             << " slots, but only " << UCs.size()
+             << " remaining in buffer";
+      return ;
+    }
+    printUnwindCode(ArrayRef<UnwindCode>(I, E));
+    I += UsedSlots;
+  }
+}
+
+// Given a symbol sym this functions returns the address and section of it.
+static error_code resolveSectionAndAddress(const COFFObjectFile *Obj,
+                                           const SymbolRef &Sym,
+                                           const coff_section *&ResolvedSection,
+                                           uint64_t &ResolvedAddr) {
+  if (error_code ec = Sym.getAddress(ResolvedAddr)) return ec;
+  section_iterator iter(Obj->begin_sections());
+  if (error_code ec = Sym.getSection(iter)) return ec;
+  ResolvedSection = Obj->getCOFFSection(iter);
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the symbol used for the relocation at the offset.
+static error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
+                                uint64_t Offset, SymbolRef &Sym) {
+  for (std::vector<RelocationRef>::const_iterator I = Rels.begin(),
+                                                  E = Rels.end();
+                                                  I != E; ++I) {
+    uint64_t Ofs;
+    if (error_code ec = I->getOffset(Ofs)) return ec;
+    if (Ofs == Offset) {
+      if (error_code ec = I->getSymbol(Sym)) return ec;
+      break;
+    }
+  }
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function resolves the symbol used for the relocation at the offset and
+// returns the section content and the address inside the content pointed to
+// by the symbol.
+static error_code getSectionContents(const COFFObjectFile *Obj,
+                                     const std::vector<RelocationRef> &Rels,
+                                     uint64_t Offset,
+                                     ArrayRef<uint8_t> &Contents,
+                                     uint64_t &Addr) {
+  SymbolRef Sym;
+  if (error_code ec = resolveSymbol(Rels, Offset, Sym)) return ec;
+  const coff_section *Section;
+  if (error_code ec = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return ec;
+  if (error_code ec = Obj->getSectionContents(Section, Contents)) return ec;
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the name of the symbol used for the relocation at the
+// offset.
+static error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
+                                    uint64_t Offset, StringRef &Name) {
+  SymbolRef Sym;
+  if (error_code ec = resolveSymbol(Rels, Offset, Sym)) return ec;
+  if (error_code ec = Sym.getName(Name)) return ec;
+  return object_error::success;
+}
+
+static void printCOFFSymbolAddress(llvm::raw_ostream &Out,
+                                   const std::vector<RelocationRef> &Rels,
+                                   uint64_t Offset, uint32_t Disp) {
+  StringRef Sym;
+  if (error_code ec = resolveSymbolName(Rels, Offset, Sym)) {
+    error(ec);
+    return ;
+  }
+  Out << Sym;
+  if (Disp > 0)
+    Out << format(" + 0x%04x", Disp);
+}
+
+void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
+  const coff_file_header *Header;
+  if (error(Obj->getHeader(Header))) return;
+
+  if (Header->Machine != COFF::IMAGE_FILE_MACHINE_AMD64) {
+    errs() << "Unsupported image machine type "
+              "(currently only AMD64 is supported).\n";
+    return;
+  }
+
+  const coff_section *Pdata = 0;
+
+  error_code ec;
+  for (section_iterator SI = Obj->begin_sections(),
+                        SE = Obj->end_sections();
+                        SI != SE; SI.increment(ec)) {
+    if (error(ec)) return;
+
+    StringRef Name;
+    if (error(SI->getName(Name))) continue;
+
+    if (Name != ".pdata") continue;
+
+    Pdata = Obj->getCOFFSection(SI);
+    std::vector<RelocationRef> Rels;
+    for (relocation_iterator RI = SI->begin_relocations(),
+                             RE = SI->end_relocations();
+                             RI != RE; RI.increment(ec)) {
+      if (error(ec)) break;
+      Rels.push_back(*RI);
+    }
+
+    // Sort relocations by address.
+    std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+
+    ArrayRef<uint8_t> Contents;
+    if (error(Obj->getSectionContents(Pdata, Contents))) continue;
+    if (Contents.empty()) continue;
+
+    ArrayRef<RuntimeFunction> RFs(
+                  reinterpret_cast<const RuntimeFunction *>(Contents.data()),
+                                  Contents.size() / sizeof(RuntimeFunction));
+    for (const RuntimeFunction *I = RFs.begin(), *E = RFs.end(); I < E; ++I) {
+      const uint64_t SectionOffset = std::distance(RFs.begin(), I)
+                                     * sizeof(RuntimeFunction);
+
+      outs() << "Function Table:\n";
+
+      outs() << "  Start Address: ";
+      printCOFFSymbolAddress(outs(), Rels, SectionOffset +
+                             /*offsetof(RuntimeFunction, StartAddress)*/ 0,
+                             I->StartAddress);
+      outs() << "\n";
+
+      outs() << "  End Address: ";
+      printCOFFSymbolAddress(outs(), Rels, SectionOffset +
+                             /*offsetof(RuntimeFunction, EndAddress)*/ 4,
+                             I->EndAddress);
+      outs() << "\n";
+
+      outs() << "  Unwind Info Address: ";
+      printCOFFSymbolAddress(outs(), Rels, SectionOffset +
+                             /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
+                             I->UnwindInfoOffset);
+      outs() << "\n";
+
+      ArrayRef<uint8_t> XContents;
+      uint64_t UnwindInfoOffset = 0;
+      if (error(getSectionContents(Obj, Rels, SectionOffset +
+                              /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
+                                   XContents, UnwindInfoOffset))) continue;
+      if (XContents.empty()) continue;
+
+      UnwindInfoOffset += I->UnwindInfoOffset;
+      if (UnwindInfoOffset > XContents.size()) continue;
+
+      const Win64EH::UnwindInfo *UI =
+                            reinterpret_cast<const Win64EH::UnwindInfo *>
+                              (XContents.data() + UnwindInfoOffset);
+
+      // The casts to int are required in order to output the value as number.
+      // Without the casts the value would be interpreted as char data (which
+      // results in garbage output).
+      outs() << "  Version: " << static_cast<int>(UI->getVersion()) << "\n";
+      outs() << "  Flags: " << static_cast<int>(UI->getFlags());
+      if (UI->getFlags()) {
+          if (UI->getFlags() & UNW_ExceptionHandler)
+            outs() << " UNW_ExceptionHandler";
+          if (UI->getFlags() & UNW_TerminateHandler)
+            outs() << " UNW_TerminateHandler";
+          if (UI->getFlags() & UNW_ChainInfo)
+            outs() << " UNW_ChainInfo";
+      }
+      outs() << "\n";
+      outs() << "  Size of prolog: "
+             << static_cast<int>(UI->PrologSize) << "\n";
+      outs() << "  Number of Codes: "
+             << static_cast<int>(UI->NumCodes) << "\n";
+      // Maybe this should move to output of UOP_SetFPReg?
+      if (UI->getFrameRegister()) {
+        outs() << "  Frame register: "
+                << getUnwindRegisterName(UI->getFrameRegister())
+                << "\n";
+        outs() << "  Frame offset: "
+                << 16 * UI->getFrameOffset()
+                << "\n";
+      } else {
+        outs() << "  No frame pointer used\n";
+      }
+      if (UI->getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) {
+        // FIXME: Output exception handler data
+      } else if (UI->getFlags() & UNW_ChainInfo) {
+        // FIXME: Output chained unwind info
+      }
+
+      if (UI->NumCodes)
+        outs() << "  Unwind Codes:\n";
+
+      printAllUnwindCodes(ArrayRef<UnwindCode>(&UI->UnwindCodes[0],
+                          UI->NumCodes));
+
+      outs() << "\n\n";
+      outs().flush();
+    }
+  }
+}
diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp
new file mode 100644
index 000000000000..bd1523133f55
--- /dev/null
+++ b/tools/llvm-objdump/ELFDump.cpp
@@ -0,0 +1,100 @@
+//===-- ELFDump.cpp - ELF-specific dumper -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the ELF-specific dumper for llvm-objdump.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-objdump.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+template<class ELFT>
+void printProgramHeaders(
+    const ELFObjectFile<ELFT> *o) {
+  typedef ELFObjectFile<ELFT> ELFO;
+  outs() << "Program Header:\n";
+  for (typename ELFO::Elf_Phdr_Iter pi = o->begin_program_headers(),
+                                    pe = o->end_program_headers();
+                                    pi != pe; ++pi) {
+    switch (pi->p_type) {
+    case ELF::PT_LOAD:
+      outs() << "    LOAD ";
+      break;
+    case ELF::PT_GNU_STACK:
+      outs() << "   STACK ";
+      break;
+    case ELF::PT_GNU_EH_FRAME:
+      outs() << "EH_FRAME ";
+      break;
+    case ELF::PT_INTERP:
+      outs() << "  INTERP ";
+      break;
+    case ELF::PT_DYNAMIC:
+      outs() << " DYNAMIC ";
+      break;
+    case ELF::PT_PHDR:
+      outs() << "    PHDR ";
+      break;
+    case ELF::PT_TLS:
+      outs() << "    TLS ";
+      break;
+    default:
+      outs() << " UNKNOWN ";
+    }
+
+    const char *Fmt = ELFT::Is64Bits ? "0x%016" PRIx64 " " : "0x%08" PRIx64 " ";
+
+    outs() << "off    "
+           << format(Fmt, (uint64_t)pi->p_offset)
+           << "vaddr "
+           << format(Fmt, (uint64_t)pi->p_vaddr)
+           << "paddr "
+           << format(Fmt, (uint64_t)pi->p_paddr)
+           << format("align 2**%u\n", CountTrailingZeros_64(pi->p_align))
+           << "         filesz "
+           << format(Fmt, (uint64_t)pi->p_filesz)
+           << "memsz "
+           << format(Fmt, (uint64_t)pi->p_memsz)
+           << "flags "
+           << ((pi->p_flags & ELF::PF_R) ? "r" : "-")
+           << ((pi->p_flags & ELF::PF_W) ? "w" : "-")
+           << ((pi->p_flags & ELF::PF_X) ? "x" : "-")
+           << "\n";
+  }
+  outs() << "\n";
+}
+
+void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
+  // Little-endian 32-bit
+  if (const ELFObjectFile<ELFType<support::little, 4, false> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::little, 4, false> > >(Obj))
+    printProgramHeaders(ELFObj);
+
+  // Big-endian 32-bit
+  if (const ELFObjectFile<ELFType<support::big, 4, false> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::big, 4, false> > >(Obj))
+    printProgramHeaders(ELFObj);
+
+  // Little-endian 64-bit
+  if (const ELFObjectFile<ELFType<support::little, 8, true> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::little, 8, true> > >(Obj))
+    printProgramHeaders(ELFObj);
+
+  // Big-endian 64-bit
+  if (const ELFObjectFile<ELFType<support::big, 8, true> > *ELFObj =
+          dyn_cast<ELFObjectFile<ELFType<support::big, 8, true> > >(Obj))
+    printProgramHeaders(ELFObj);
+}
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 1feea421f23a..c324ff13a6b8 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -13,11 +13,9 @@
 
 #include "llvm-objdump.h"
 #include "MCFunction.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Object/MachO.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -28,10 +26,12 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
@@ -309,16 +309,10 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   raw_ostream &DebugOut = nulls();
 #endif
 
-  StringRef DebugAbbrevSection, DebugInfoSection, DebugArangesSection,
-            DebugLineSection, DebugStrSection;
   OwningPtr<DIContext> diContext;
-  OwningPtr<MachOObjectFile> DSYMObj;
-  MachOObject *DbgInfoObj = MachOObj;
+  ObjectFile *DbgObj = MachOOF.get();
   // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
-    ArrayRef<SectionRef> DebugSections = Sections;
-    std::vector<SectionRef> DSYMSections;
-
     // A separate DSym file path was specified, parse it as a macho file,
     // get the sections and supply it to the section name parsing machinery.
     if (!DSYMFile.empty()) {
@@ -327,42 +321,11 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n';
         return;
       }
-      DSYMObj.reset(static_cast<MachOObjectFile*>(
-            ObjectFile::createMachOObjectFile(Buf.take())));
-      const macho::Header &Header = DSYMObj->getObject()->getHeader();
-
-      std::vector<SymbolRef> Symbols;
-      SmallVector<uint64_t, 8> FoundFns;
-      getSectionsAndSymbols(Header, DSYMObj.get(), 0, DSYMSections, Symbols,
-                            FoundFns);
-      DebugSections = DSYMSections;
-      DbgInfoObj = DSYMObj.get()->getObject();
-    }
-
-    // Find the named debug info sections.
-    for (unsigned SectIdx = 0; SectIdx != DebugSections.size(); SectIdx++) {
-      StringRef SectName;
-      if (!DebugSections[SectIdx].getName(SectName)) {
-        if (SectName.equals("__DWARF,__debug_abbrev"))
-          DebugSections[SectIdx].getContents(DebugAbbrevSection);
-        else if (SectName.equals("__DWARF,__debug_info"))
-          DebugSections[SectIdx].getContents(DebugInfoSection);
-        else if (SectName.equals("__DWARF,__debug_aranges"))
-          DebugSections[SectIdx].getContents(DebugArangesSection);
-        else if (SectName.equals("__DWARF,__debug_line"))
-          DebugSections[SectIdx].getContents(DebugLineSection);
-        else if (SectName.equals("__DWARF,__debug_str"))
-          DebugSections[SectIdx].getContents(DebugStrSection);
-      }
+      DbgObj = ObjectFile::createMachOObjectFile(Buf.take());
     }
 
-    // Setup the DIContext.
-    diContext.reset(DIContext::getDWARFContext(DbgInfoObj->isLittleEndian(),
-                                               DebugInfoSection,
-                                               DebugAbbrevSection,
-                                               DebugArangesSection,
-                                               DebugLineSection,
-                                               DebugStrSection));
+    // Setup the DIContext
+    diContext.reset(DIContext::getDWARFContext(DbgObj));
   }
 
   FunctionMapTy FunctionMap;
@@ -371,9 +334,15 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
     StringRef SectName;
     if (Sections[SectIdx].getName(SectName) ||
-        SectName.compare("__TEXT,__text"))
+        SectName != "__text")
       continue; // Skip non-text sections
 
+    StringRef SegmentName;
+    DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
+    if (MachOOF->getSectionFinalSegmentName(DR, SegmentName) ||
+        SegmentName != "__TEXT")
+      continue;
+
     // Insert the functions from the function starts segment into our map.
     uint64_t VMAddr;
     Sections[SectIdx].getAddress(VMAddr);
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 13ea4e32958a..7832cf0dff4c 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -11,17 +11,17 @@
 // dumps out a plethora of information about an object file depending on the
 // flags.
 //
+// The flags and output of this program should be near identical to those of
+// binutils objdump.
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
 #include "MCFunction.h"
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/COFF.h"
-#include "llvm/Object/ObjectFile.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -29,6 +29,10 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -72,9 +76,9 @@ static cl::opt<bool>
 SymbolTable("t", cl::desc("Display the symbol table"));
 
 static cl::opt<bool>
-MachO("macho", cl::desc("Use MachO specific object file parser"));
+MachOOpt("macho", cl::desc("Use MachO specific object file parser"));
 static cl::alias
-MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachO));
+MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachOOpt));
 
 cl::opt<std::string>
 llvm::TripleName("triple", cl::desc("Target triple to disassemble for, "
@@ -100,9 +104,28 @@ MAttrs("mattr",
   cl::desc("Target specific attributes"),
   cl::value_desc("a1,+a2,-a3,..."));
 
+static cl::opt<bool>
+NoShowRawInsn("no-show-raw-insn", cl::desc("When disassembling instructions, "
+                                           "do not print the instruction bytes."));
+
+static cl::opt<bool>
+UnwindInfo("unwind-info", cl::desc("Display unwind information"));
+
+static cl::alias
+UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
+                cl::aliasopt(UnwindInfo));
+
+static cl::opt<bool>
+PrivateHeaders("private-headers",
+               cl::desc("Display format specific file headers"));
+
+static cl::alias
+PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
+                    cl::aliasopt(PrivateHeaders));
+
 static StringRef ToolName;
 
-static bool error(error_code ec) {
+bool llvm::error(error_code ec) {
   if (!ec) return false;
 
   outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
@@ -161,7 +184,7 @@ void llvm::DumpBytes(StringRef bytes) {
   outs() << output;
 }
 
-static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
+bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
   uint64_t a_addr, b_addr;
   if (error(a.getAddress(a_addr))) return false;
   if (error(b.getAddress(b_addr))) return false;
@@ -205,6 +228,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       if (!error(i->containsSymbol(*si, contains)) && contains) {
         uint64_t Address;
         if (error(si->getAddress(Address))) break;
+        if (Address == UnknownAddressOrSize) continue;
         Address -= SectionAddr;
 
         StringRef Name;
@@ -230,9 +254,18 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     // Sort relocations by address.
     std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
 
+    StringRef SegmentName = "";
+    if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
+      DataRefImpl DR = i->getRawDataRefImpl();
+      if (error(MachO->getSectionFinalSegmentName(DR, SegmentName)))
+        break;
+    }
     StringRef name;
     if (error(i->getName(name))) break;
-    outs() << "Disassembly of section " << name << ':';
+    outs() << "Disassembly of section ";
+    if (!SegmentName.empty())
+      outs() << SegmentName << ",";
+    outs() << name << ':';
 
     // If the section has no symbols just insert a dummy one and disassemble
     // the whole section.
@@ -321,8 +354,11 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
                                    DebugOut, nulls())) {
-          outs() << format("%8" PRIx64 ":\t", SectionAddr + Index);
-          DumpBytes(StringRef(Bytes.data() + Index, Size));
+          outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+          if (!NoShowRawInsn) {
+            outs() << "\t";
+            DumpBytes(StringRef(Bytes.data() + Index, Size));
+          }
           IP->printInst(&Inst, outs(), "");
           outs() << "\n";
         } else {
@@ -409,7 +445,7 @@ static void PrintSectionHeaders(const ObjectFile *o) {
     if (error(si->isBSS(BSS))) return;
     std::string Type = (std::string(Text ? "TEXT " : "") +
                         (Data ? "DATA " : "") + (BSS ? "BSS" : ""));
-    outs() << format("%3d %-13s %09" PRIx64 " %017" PRIx64 " %s\n",
+    outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n",
                      i, Name.str().c_str(), Size, Address, Type.c_str());
     ++i;
   }
@@ -446,7 +482,7 @@ static void PrintSectionContents(const ObjectFile *o) {
       // Print ascii.
       outs() << "  ";
       for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
-        if (std::isprint(Contents[addr + i] & 0xFF))
+        if (std::isprint(static_cast<unsigned char>(Contents[addr + i]) & 0xFF))
           outs() << Contents[addr + i];
         else
           outs() << ".";
@@ -539,7 +575,10 @@ static void PrintSymbolTable(const ObjectFile *o) {
       else if (Type == SymbolRef::ST_Function)
         FileFunc = 'F';
 
-      outs() << format("%08" PRIx64, Address) << " "
+      const char *Fmt = o->getBytesInAddress() > 4 ? "%016" PRIx64 :
+                                                     "%08" PRIx64;
+
+      outs() << format(Fmt, Address) << " "
              << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' '
              << (Weak ? 'w' : ' ') // Weak?
              << ' ' // Constructor. Not supported yet.
@@ -553,6 +592,13 @@ static void PrintSymbolTable(const ObjectFile *o) {
       else if (Section == o->end_sections())
         outs() << "*UND*";
       else {
+        if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(o)) {
+          StringRef SegmentName;
+          DataRefImpl DR = Section->getRawDataRefImpl();
+          if (error(MachO->getSectionFinalSegmentName(DR, SegmentName)))
+            SegmentName = "";
+          outs() << SegmentName << ",";
+        }
         StringRef SectionName;
         if (error(Section->getName(SectionName)))
           SectionName = "";
@@ -566,6 +612,19 @@ static void PrintSymbolTable(const ObjectFile *o) {
   }
 }
 
+static void PrintUnwindInfo(const ObjectFile *o) {
+  outs() << "Unwind info:\n\n";
+
+  if (const COFFObjectFile *coff = dyn_cast<COFFObjectFile>(o)) {
+    printCOFFUnwindInfo(coff);
+  } else {
+    // TODO: Extract DWARF dump tool to objdump.
+    errs() << "This operation is only currently supported "
+              "for COFF object files.\n";
+    return;
+  }
+}
+
 static void DumpObject(const ObjectFile *o) {
   outs() << '\n';
   outs() << o->getFileName()
@@ -581,6 +640,10 @@ static void DumpObject(const ObjectFile *o) {
     PrintSectionContents(o);
   if (SymbolTable)
     PrintSymbolTable(o);
+  if (UnwindInfo)
+    PrintUnwindInfo(o);
+  if (PrivateHeaders && o->isELF())
+    printELFFileHeader(o);
 }
 
 /// @brief Dump each object file in \a a;
@@ -611,7 +674,7 @@ static void DumpInput(StringRef file) {
     return;
   }
 
-  if (MachO && Disassemble) {
+  if (MachOOpt && Disassemble) {
     DisassembleInputMachO(file);
     return;
   }
@@ -659,7 +722,9 @@ int main(int argc, char **argv) {
       && !Relocations
       && !SectionHeaders
       && !SectionContents
-      && !SymbolTable) {
+      && !SymbolTable
+      && !UnwindInfo
+      && !PrivateHeaders) {
     cl::PrintHelpMessage();
     return 2;
   }
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index aa71b77c8abf..ca7bced635f8 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -17,12 +17,23 @@
 
 namespace llvm {
 
+namespace object {
+  class COFFObjectFile;
+  class ObjectFile;
+  class RelocationRef;
+}
+class error_code;
+
 extern cl::opt<std::string> TripleName;
 extern cl::opt<std::string> ArchName;
 
 // Various helper functions.
+bool error(error_code ec);
+bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
 void DumpBytes(StringRef bytes);
 void DisassembleInputMachO(StringRef Filename);
+void printCOFFUnwindInfo(const object::COFFObjectFile* o);
+void printELFFileHeader(const object::ObjectFile *o);
 
 class StringRefMemoryObject : public MemoryObject {
   virtual void anchor();
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 81e9503abe25..b2c3f061695a 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -13,23 +13,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InstrTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <iomanip>
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index d2f5f0fff910..fe9d3e2954af 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -11,16 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt
index be80469f28d5..3d20def8f51c 100644
--- a/tools/llvm-readobj/CMakeLists.txt
+++ b/tools/llvm-readobj/CMakeLists.txt
@@ -1,5 +1,15 @@
-set(LLVM_LINK_COMPONENTS archive bitreader object)
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  archive
+  bitreader
+  object)
 
 add_llvm_tool(llvm-readobj
   llvm-readobj.cpp
+  ObjDumper.cpp
+  COFFDumper.cpp
+  ELFDumper.cpp
+  MachODumper.cpp
+  Error.cpp
+  StreamWriter.cpp
   )
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
new file mode 100644
index 000000000000..be4e76cc634f
--- /dev/null
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -0,0 +1,1014 @@
+//===-- COFFDumper.cpp - COFF-specific dumper -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the COFF-specific dumper for llvm-readobj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-readobj.h"
+#include "ObjDumper.h"
+
+#include "Error.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Win64EH.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+#include <algorithm>
+#include <cstring>
+#include <time.h>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::Win64EH;
+
+namespace {
+
+class COFFDumper : public ObjDumper {
+public:
+  COFFDumper(const llvm::object::COFFObjectFile *Obj, StreamWriter& Writer)
+    : ObjDumper(Writer)
+    , Obj(Obj) {
+    cacheRelocations();
+  }
+
+  virtual void printFileHeaders() LLVM_OVERRIDE;
+  virtual void printSections() LLVM_OVERRIDE;
+  virtual void printRelocations() LLVM_OVERRIDE;
+  virtual void printSymbols() LLVM_OVERRIDE;
+  virtual void printDynamicSymbols() LLVM_OVERRIDE;
+  virtual void printUnwindInfo() LLVM_OVERRIDE;
+
+private:
+  void printSymbol(symbol_iterator SymI);
+
+  void printRelocation(section_iterator SecI, relocation_iterator RelI);
+
+  void printX64UnwindInfo();
+
+  void printRuntimeFunction(
+    const RuntimeFunction& RTF,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels);
+
+  void printUnwindInfo(
+    const Win64EH::UnwindInfo& UI,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels);
+
+  void printUnwindCode(const Win64EH::UnwindInfo& UI, ArrayRef<UnwindCode> UCs);
+
+  void cacheRelocations();
+
+  error_code getSectionContents(
+    const std::vector<RelocationRef> &Rels,
+    uint64_t Offset,
+    ArrayRef<uint8_t> &Contents,
+    uint64_t &Addr);
+
+  error_code getSection(
+    const std::vector<RelocationRef> &Rels,
+    uint64_t Offset,
+    const coff_section **Section,
+    uint64_t *AddrPtr);
+
+  typedef DenseMap<const coff_section*, std::vector<RelocationRef> > RelocMapTy;
+
+  const llvm::object::COFFObjectFile *Obj;
+  RelocMapTy RelocMap;
+  std::vector<RelocationRef> EmptyRelocs;
+};
+
+} // namespace
+
+
+namespace llvm {
+
+error_code createCOFFDumper(const object::ObjectFile *Obj,
+                            StreamWriter& Writer,
+                            OwningPtr<ObjDumper> &Result) {
+  const COFFObjectFile *COFFObj = dyn_cast<COFFObjectFile>(Obj);
+  if (!COFFObj)
+    return readobj_error::unsupported_obj_file_format;
+
+  Result.reset(new COFFDumper(COFFObj, Writer));
+  return readobj_error::success;
+}
+
+} // namespace llvm
+
+
+// Returns the name of the unwind code.
+static StringRef getUnwindCodeTypeName(uint8_t Code) {
+  switch(Code) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol: return "PUSH_NONVOL";
+  case UOP_AllocLarge: return "ALLOC_LARGE";
+  case UOP_AllocSmall: return "ALLOC_SMALL";
+  case UOP_SetFPReg: return "SET_FPREG";
+  case UOP_SaveNonVol: return "SAVE_NONVOL";
+  case UOP_SaveNonVolBig: return "SAVE_NONVOL_FAR";
+  case UOP_SaveXMM128: return "SAVE_XMM128";
+  case UOP_SaveXMM128Big: return "SAVE_XMM128_FAR";
+  case UOP_PushMachFrame: return "PUSH_MACHFRAME";
+  }
+}
+
+// Returns the name of a referenced register.
+static StringRef getUnwindRegisterName(uint8_t Reg) {
+  switch(Reg) {
+  default: llvm_unreachable("Invalid register");
+  case 0: return "RAX";
+  case 1: return "RCX";
+  case 2: return "RDX";
+  case 3: return "RBX";
+  case 4: return "RSP";
+  case 5: return "RBP";
+  case 6: return "RSI";
+  case 7: return "RDI";
+  case 8: return "R8";
+  case 9: return "R9";
+  case 10: return "R10";
+  case 11: return "R11";
+  case 12: return "R12";
+  case 13: return "R13";
+  case 14: return "R14";
+  case 15: return "R15";
+  }
+}
+
+// Calculates the number of array slots required for the unwind code.
+static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) {
+  switch (UnwindCode.getUnwindOp()) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol:
+  case UOP_AllocSmall:
+  case UOP_SetFPReg:
+  case UOP_PushMachFrame:
+    return 1;
+  case UOP_SaveNonVol:
+  case UOP_SaveXMM128:
+    return 2;
+  case UOP_SaveNonVolBig:
+  case UOP_SaveXMM128Big:
+    return 3;
+  case UOP_AllocLarge:
+    return (UnwindCode.getOpInfo() == 0) ? 2 : 3;
+  }
+}
+
+// Given a symbol sym this functions returns the address and section of it.
+static error_code resolveSectionAndAddress(const COFFObjectFile *Obj,
+                                           const SymbolRef &Sym,
+                                           const coff_section *&ResolvedSection,
+                                           uint64_t &ResolvedAddr) {
+  if (error_code EC = Sym.getAddress(ResolvedAddr))
+    return EC;
+
+  section_iterator iter(Obj->begin_sections());
+  if (error_code EC = Sym.getSection(iter))
+    return EC;
+
+  ResolvedSection = Obj->getCOFFSection(iter);
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the symbol used for the relocation at the offset.
+static error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
+                                uint64_t Offset, SymbolRef &Sym) {
+  for (std::vector<RelocationRef>::const_iterator RelI = Rels.begin(),
+                                                  RelE = Rels.end();
+                                                  RelI != RelE; ++RelI) {
+    uint64_t Ofs;
+    if (error_code EC = RelI->getOffset(Ofs))
+      return EC;
+
+    if (Ofs == Offset) {
+      if (error_code EC = RelI->getSymbol(Sym))
+        return EC;
+      return readobj_error::success;
+    }
+  }
+
+  return readobj_error::unknown_symbol;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the name of the symbol used for the relocation at the
+// offset.
+static error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
+                                    uint64_t Offset, StringRef &Name) {
+  SymbolRef Sym;
+  if (error_code EC = resolveSymbol(Rels, Offset, Sym)) return EC;
+  if (error_code EC = Sym.getName(Name)) return EC;
+  return object_error::success;
+}
+
+static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_UNKNOWN  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AM33     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AMD64    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARMV7    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_EBC      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_I386     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_IA64     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_M32R     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_MIPS16   ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_MIPSFPU  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_MIPSFPU16),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_POWERPC  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_POWERPCFP),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_R4000    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH3      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH3DSP   ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH4      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_SH5      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_THUMB    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_WCEMIPSV2)
+};
+
+static const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_RELOCS_STRIPPED        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_EXECUTABLE_IMAGE       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LINE_NUMS_STRIPPED     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LOCAL_SYMS_STRIPPED    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_AGGRESSIVE_WS_TRIM     ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LARGE_ADDRESS_AWARE    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_BYTES_REVERSED_LO      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_32BIT_MACHINE          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_DEBUG_STRIPPED         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_NET_RUN_FROM_SWAP      ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_SYSTEM                 ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_DLL                    ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_UP_SYSTEM_ONLY         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_BYTES_REVERSED_HI      )
+};
+
+static const EnumEntry<COFF::SectionCharacteristics>
+ImageSectionCharacteristics[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_TYPE_NO_PAD           ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_CODE              ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_INITIALIZED_DATA  ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_UNINITIALIZED_DATA),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_OTHER             ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_INFO              ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_REMOVE            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_COMDAT            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_GPREL                 ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_PURGEABLE         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_16BIT             ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_LOCKED            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_PRELOAD           ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_1BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_2BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_4BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_8BYTES          ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_16BYTES         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_32BYTES         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_64BYTES         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_128BYTES        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_256BYTES        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_512BYTES        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_1024BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_2048BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_4096BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_ALIGN_8192BYTES       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_LNK_NRELOC_OVFL       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_DISCARDABLE       ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_NOT_CACHED        ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_NOT_PAGED         ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_SHARED            ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_EXECUTE           ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_READ              ),
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE             )
+};
+
+static const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
+  { "Null"  , COFF::IMAGE_SYM_TYPE_NULL   },
+  { "Void"  , COFF::IMAGE_SYM_TYPE_VOID   },
+  { "Char"  , COFF::IMAGE_SYM_TYPE_CHAR   },
+  { "Short" , COFF::IMAGE_SYM_TYPE_SHORT  },
+  { "Int"   , COFF::IMAGE_SYM_TYPE_INT    },
+  { "Long"  , COFF::IMAGE_SYM_TYPE_LONG   },
+  { "Float" , COFF::IMAGE_SYM_TYPE_FLOAT  },
+  { "Double", COFF::IMAGE_SYM_TYPE_DOUBLE },
+  { "Struct", COFF::IMAGE_SYM_TYPE_STRUCT },
+  { "Union" , COFF::IMAGE_SYM_TYPE_UNION  },
+  { "Enum"  , COFF::IMAGE_SYM_TYPE_ENUM   },
+  { "MOE"   , COFF::IMAGE_SYM_TYPE_MOE    },
+  { "Byte"  , COFF::IMAGE_SYM_TYPE_BYTE   },
+  { "Word"  , COFF::IMAGE_SYM_TYPE_WORD   },
+  { "UInt"  , COFF::IMAGE_SYM_TYPE_UINT   },
+  { "DWord" , COFF::IMAGE_SYM_TYPE_DWORD  }
+};
+
+static const EnumEntry<COFF::SymbolComplexType> ImageSymDType[] = {
+  { "Null"    , COFF::IMAGE_SYM_DTYPE_NULL     },
+  { "Pointer" , COFF::IMAGE_SYM_DTYPE_POINTER  },
+  { "Function", COFF::IMAGE_SYM_DTYPE_FUNCTION },
+  { "Array"   , COFF::IMAGE_SYM_DTYPE_ARRAY    }
+};
+
+static const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
+  { "EndOfFunction"  , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION  },
+  { "Null"           , COFF::IMAGE_SYM_CLASS_NULL             },
+  { "Automatic"      , COFF::IMAGE_SYM_CLASS_AUTOMATIC        },
+  { "External"       , COFF::IMAGE_SYM_CLASS_EXTERNAL         },
+  { "Static"         , COFF::IMAGE_SYM_CLASS_STATIC           },
+  { "Register"       , COFF::IMAGE_SYM_CLASS_REGISTER         },
+  { "ExternalDef"    , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF     },
+  { "Label"          , COFF::IMAGE_SYM_CLASS_LABEL            },
+  { "UndefinedLabel" , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL  },
+  { "MemberOfStruct" , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT },
+  { "Argument"       , COFF::IMAGE_SYM_CLASS_ARGUMENT         },
+  { "StructTag"      , COFF::IMAGE_SYM_CLASS_STRUCT_TAG       },
+  { "MemberOfUnion"  , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION  },
+  { "UnionTag"       , COFF::IMAGE_SYM_CLASS_UNION_TAG        },
+  { "TypeDefinition" , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION  },
+  { "UndefinedStatic", COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC },
+  { "EnumTag"        , COFF::IMAGE_SYM_CLASS_ENUM_TAG         },
+  { "MemberOfEnum"   , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM   },
+  { "RegisterParam"  , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM   },
+  { "BitField"       , COFF::IMAGE_SYM_CLASS_BIT_FIELD        },
+  { "Block"          , COFF::IMAGE_SYM_CLASS_BLOCK            },
+  { "Function"       , COFF::IMAGE_SYM_CLASS_FUNCTION         },
+  { "EndOfStruct"    , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT    },
+  { "File"           , COFF::IMAGE_SYM_CLASS_FILE             },
+  { "Section"        , COFF::IMAGE_SYM_CLASS_SECTION          },
+  { "WeakExternal"   , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL    },
+  { "CLRToken"       , COFF::IMAGE_SYM_CLASS_CLR_TOKEN        }
+};
+
+static const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
+  { "NoDuplicates", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES },
+  { "Any"         , COFF::IMAGE_COMDAT_SELECT_ANY          },
+  { "SameSize"    , COFF::IMAGE_COMDAT_SELECT_SAME_SIZE    },
+  { "ExactMatch"  , COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH  },
+  { "Associative" , COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE  },
+  { "Largest"     , COFF::IMAGE_COMDAT_SELECT_LARGEST      },
+  { "Newest"      , COFF::IMAGE_COMDAT_SELECT_NEWEST       }
+};
+
+static const EnumEntry<COFF::WeakExternalCharacteristics>
+WeakExternalCharacteristics[] = {
+  { "NoLibrary", COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY },
+  { "Library"  , COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY   },
+  { "Alias"    , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS     }
+};
+
+static const EnumEntry<unsigned> UnwindFlags[] = {
+  { "ExceptionHandler", Win64EH::UNW_ExceptionHandler },
+  { "TerminateHandler", Win64EH::UNW_TerminateHandler },
+  { "ChainInfo"       , Win64EH::UNW_ChainInfo        }
+};
+
+static const EnumEntry<unsigned> UnwindOpInfo[] = {
+  { "RAX",  0 },
+  { "RCX",  1 },
+  { "RDX",  2 },
+  { "RBX",  3 },
+  { "RSP",  4 },
+  { "RBP",  5 },
+  { "RSI",  6 },
+  { "RDI",  7 },
+  { "R8",   8 },
+  { "R9",   9 },
+  { "R10", 10 },
+  { "R11", 11 },
+  { "R12", 12 },
+  { "R13", 13 },
+  { "R14", 14 },
+  { "R15", 15 }
+};
+
+// Some additional COFF structures not defined by llvm::object.
+namespace {
+  struct coff_aux_function_definition {
+    support::ulittle32_t TagIndex;
+    support::ulittle32_t TotalSize;
+    support::ulittle32_t PointerToLineNumber;
+    support::ulittle32_t PointerToNextFunction;
+    uint8_t Unused[2];
+  };
+
+  struct coff_aux_weak_external_definition {
+    support::ulittle32_t TagIndex;
+    support::ulittle32_t Characteristics;
+    uint8_t Unused[10];
+  };
+
+  struct coff_aux_file_record {
+    char FileName[18];
+  };
+
+  struct coff_aux_clr_token {
+    support::ulittle8_t AuxType;
+    support::ulittle8_t Reserved;
+    support::ulittle32_t SymbolTableIndex;
+    uint8_t Unused[12];
+  };
+} // namespace
+
+static uint64_t getOffsetOfLSDA(const Win64EH::UnwindInfo& UI) {
+  return static_cast<const char*>(UI.getLanguageSpecificData())
+         - reinterpret_cast<const char*>(&UI);
+}
+
+static uint32_t getLargeSlotValue(ArrayRef<UnwindCode> UCs) {
+  if (UCs.size() < 3)
+    return 0;
+
+  return UCs[1].FrameOffset + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16);
+}
+
+template<typename T>
+static error_code getSymbolAuxData(const COFFObjectFile *Obj,
+                                   const coff_symbol *Symbol, const T* &Aux) {
+  ArrayRef<uint8_t> AuxData = Obj->getSymbolAuxData(Symbol);
+  Aux = reinterpret_cast<const T*>(AuxData.data());
+  return readobj_error::success;
+}
+
+static std::string formatSymbol(const std::vector<RelocationRef> &Rels,
+                                uint64_t Offset, uint32_t Disp) {
+  std::string Buffer;
+  raw_string_ostream Str(Buffer);
+
+  StringRef Sym;
+  if (resolveSymbolName(Rels, Offset, Sym)) {
+    Str << format(" (0x%X)", Offset);
+    return Str.str();
+  }
+
+  Str << Sym;
+  if (Disp > 0) {
+    Str << format(" +0x%X (0x%X)", Disp, Offset);
+  } else {
+    Str << format(" (0x%X)", Offset);
+  }
+
+  return Str.str();
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function resolves the symbol used for the relocation at the offset and
+// returns the section content and the address inside the content pointed to
+// by the symbol.
+error_code COFFDumper::getSectionContents(
+    const std::vector<RelocationRef> &Rels, uint64_t Offset,
+    ArrayRef<uint8_t> &Contents, uint64_t &Addr) {
+
+  SymbolRef Sym;
+  const coff_section *Section;
+
+  if (error_code EC = resolveSymbol(Rels, Offset, Sym))
+    return EC;
+  if (error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return EC;
+  if (error_code EC = Obj->getSectionContents(Section, Contents))
+    return EC;
+
+  return object_error::success;
+}
+
+error_code COFFDumper::getSection(
+    const std::vector<RelocationRef> &Rels, uint64_t Offset,
+    const coff_section **SectionPtr, uint64_t *AddrPtr) {
+
+  SymbolRef Sym;
+  if (error_code EC = resolveSymbol(Rels, Offset, Sym))
+    return EC;
+
+  const coff_section *Section;
+  uint64_t Addr;
+  if (error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return EC;
+
+  if (SectionPtr)
+    *SectionPtr = Section;
+  if (AddrPtr)
+    *AddrPtr = Addr;
+
+  return object_error::success;
+}
+
+void COFFDumper::cacheRelocations() {
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC))
+      break;
+
+    const coff_section *Section = Obj->getCOFFSection(SecI);
+
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC))
+        break;
+
+      RelocMap[Section].push_back(*RelI);
+    }
+
+    // Sort relocations by address.
+    std::sort(RelocMap[Section].begin(), RelocMap[Section].end(),
+              relocAddressLess);
+  }
+}
+
+void COFFDumper::printFileHeaders() {
+  const coff_file_header *Header = 0;
+  if (error(Obj->getHeader(Header)))
+    return;
+
+  time_t TDS = Header->TimeDateStamp;
+  char FormattedTime[20] = { };
+  strftime(FormattedTime, 20, "%Y-%m-%d %H:%M:%S", gmtime(&TDS));
+
+  {
+    DictScope D(W, "ImageFileHeader");
+    W.printEnum  ("Machine", Header->Machine,
+                    makeArrayRef(ImageFileMachineType));
+    W.printNumber("SectionCount", Header->NumberOfSections);
+    W.printHex   ("TimeDateStamp", FormattedTime, Header->TimeDateStamp);
+    W.printHex   ("PointerToSymbolTable", Header->PointerToSymbolTable);
+    W.printNumber("SymbolCount", Header->NumberOfSymbols);
+    W.printNumber("OptionalHeaderSize", Header->SizeOfOptionalHeader);
+    W.printFlags ("Characteristics", Header->Characteristics,
+                    makeArrayRef(ImageFileCharacteristics));
+  }
+}
+
+void COFFDumper::printSections() {
+  error_code EC;
+
+  ListScope SectionsD(W, "Sections");
+  int SectionNumber = 0;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC))
+      break;
+
+    ++SectionNumber;
+    const coff_section *Section = Obj->getCOFFSection(SecI);
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+        Name = "";
+
+    DictScope D(W, "Section");
+    W.printNumber("Number", SectionNumber);
+    W.printBinary("Name", Name, Section->Name);
+    W.printHex   ("VirtualSize", Section->VirtualSize);
+    W.printHex   ("VirtualAddress", Section->VirtualAddress);
+    W.printNumber("RawDataSize", Section->SizeOfRawData);
+    W.printHex   ("PointerToRawData", Section->PointerToRawData);
+    W.printHex   ("PointerToRelocations", Section->PointerToRelocations);
+    W.printHex   ("PointerToLineNumbers", Section->PointerToLinenumbers);
+    W.printNumber("RelocationCount", Section->NumberOfRelocations);
+    W.printNumber("LineNumberCount", Section->NumberOfLinenumbers);
+    W.printFlags ("Characteristics", Section->Characteristics,
+                    makeArrayRef(ImageSectionCharacteristics),
+                    COFF::SectionCharacteristics(0x00F00000));
+
+    if (opts::SectionRelocations) {
+      ListScope D(W, "Relocations");
+      for (relocation_iterator RelI = SecI->begin_relocations(),
+                               RelE = SecI->end_relocations();
+                               RelI != RelE; RelI.increment(EC)) {
+        if (error(EC)) break;
+
+        printRelocation(SecI, RelI);
+      }
+    }
+
+    if (opts::SectionSymbols) {
+      ListScope D(W, "Symbols");
+      for (symbol_iterator SymI = Obj->begin_symbols(),
+                           SymE = Obj->end_symbols();
+                           SymI != SymE; SymI.increment(EC)) {
+        if (error(EC)) break;
+
+        bool Contained = false;
+        if (SecI->containsSymbol(*SymI, Contained) || !Contained)
+          continue;
+
+        printSymbol(SymI);
+      }
+    }
+
+    if (opts::SectionData) {
+      StringRef Data;
+      if (error(SecI->getContents(Data))) break;
+
+      W.printBinaryBlock("SectionData", Data);
+    }
+  }
+}
+
+void COFFDumper::printRelocations() {
+  ListScope D(W, "Relocations");
+
+  error_code EC;
+  int SectionNumber = 0;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    ++SectionNumber;
+    if (error(EC))
+      break;
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+
+    bool PrintedGroup = false;
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC)) break;
+
+      if (!PrintedGroup) {
+        W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
+        W.indent();
+        PrintedGroup = true;
+      }
+
+      printRelocation(SecI, RelI);
+    }
+
+    if (PrintedGroup) {
+      W.unindent();
+      W.startLine() << "}\n";
+    }
+  }
+}
+
+void COFFDumper::printRelocation(section_iterator SecI,
+                                 relocation_iterator RelI) {
+  uint64_t Offset;
+  uint64_t RelocType;
+  SmallString<32> RelocName;
+  SymbolRef Symbol;
+  StringRef SymbolName;
+  StringRef Contents;
+  if (error(RelI->getOffset(Offset))) return;
+  if (error(RelI->getType(RelocType))) return;
+  if (error(RelI->getTypeName(RelocName))) return;
+  if (error(RelI->getSymbol(Symbol))) return;
+  if (error(Symbol.getName(SymbolName))) return;
+  if (error(SecI->getContents(Contents))) return;
+
+  raw_ostream& OS = W.startLine();
+  OS << W.hex(Offset)
+     << " " << RelocName
+     << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+     << "\n";
+}
+
+void COFFDumper::printSymbols() {
+  ListScope Group(W, "Symbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_symbols(),
+                       SymE = Obj->end_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI);
+  }
+}
+
+void COFFDumper::printDynamicSymbols() {
+  ListScope Group(W, "DynamicSymbols");
+}
+
+void COFFDumper::printSymbol(symbol_iterator SymI) {
+  DictScope D(W, "Symbol");
+
+  const coff_symbol *Symbol = Obj->getCOFFSymbol(SymI);
+  const coff_section *Section;
+  if (error_code EC = Obj->getSection(Symbol->SectionNumber, Section)) {
+    W.startLine() << "Invalid section number: " << EC.message() << "\n";
+    W.flush();
+    return;
+  }
+
+  StringRef SymbolName;
+  if (Obj->getSymbolName(Symbol, SymbolName))
+    SymbolName = "";
+
+  StringRef SectionName;
+  if (Section && Obj->getSectionName(Section, SectionName))
+    SectionName = "";
+
+  W.printString("Name", SymbolName);
+  W.printNumber("Value", Symbol->Value);
+  W.printNumber("Section", SectionName, Symbol->SectionNumber);
+  W.printEnum  ("BaseType", Symbol->getBaseType(), makeArrayRef(ImageSymType));
+  W.printEnum  ("ComplexType", Symbol->getComplexType(),
+                                                   makeArrayRef(ImageSymDType));
+  W.printEnum  ("StorageClass", Symbol->StorageClass,
+                                                   makeArrayRef(ImageSymClass));
+  W.printNumber("AuxSymbolCount", Symbol->NumberOfAuxSymbols);
+
+  for (unsigned I = 0; I < Symbol->NumberOfAuxSymbols; ++I) {
+    if (Symbol->StorageClass     == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+        Symbol->getBaseType()    == COFF::IMAGE_SYM_TYPE_NULL &&
+        Symbol->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION &&
+        Symbol->SectionNumber > 0) {
+      const coff_aux_function_definition *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      DictScope AS(W, "AuxFunctionDef");
+      W.printNumber("TagIndex", Aux->TagIndex);
+      W.printNumber("TotalSize", Aux->TotalSize);
+      W.printHex("PointerToLineNumber", Aux->PointerToLineNumber);
+      W.printHex("PointerToNextFunction", Aux->PointerToNextFunction);
+      W.printBinary("Unused", makeArrayRef(Aux->Unused));
+
+    } else if (
+        Symbol->StorageClass   == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL ||
+        (Symbol->StorageClass  == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+         Symbol->SectionNumber == 0 &&
+         Symbol->Value         == 0)) {
+      const coff_aux_weak_external_definition *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      const coff_symbol *Linked;
+      StringRef LinkedName;
+      error_code EC;
+      if ((EC = Obj->getSymbol(Aux->TagIndex, Linked)) ||
+          (EC = Obj->getSymbolName(Linked, LinkedName))) {
+        LinkedName = "";
+        error(EC);
+      }
+
+      DictScope AS(W, "AuxWeakExternal");
+      W.printNumber("Linked", LinkedName, Aux->TagIndex);
+      W.printEnum  ("Search", Aux->Characteristics,
+                    makeArrayRef(WeakExternalCharacteristics));
+      W.printBinary("Unused", Aux->Unused);
+
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_FILE) {
+      const coff_aux_file_record *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC) {
+      const coff_aux_section_definition *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      DictScope AS(W, "AuxSectionDef");
+      W.printNumber("Length", Aux->Length);
+      W.printNumber("RelocationCount", Aux->NumberOfRelocations);
+      W.printNumber("LineNumberCount", Aux->NumberOfLinenumbers);
+      W.printHex("Checksum", Aux->CheckSum);
+      W.printNumber("Number", Aux->Number);
+      W.printEnum("Selection", Aux->Selection, makeArrayRef(ImageCOMDATSelect));
+      W.printBinary("Unused", makeArrayRef(Aux->Unused));
+
+      if (Section->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT
+          && Aux->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+        const coff_section *Assoc;
+        StringRef AssocName;
+        error_code EC;
+        if ((EC = Obj->getSection(Aux->Number, Assoc)) ||
+            (EC = Obj->getSectionName(Assoc, AssocName))) {
+          AssocName = "";
+          error(EC);
+        }
+
+        W.printNumber("AssocSection", AssocName, Aux->Number);
+      }
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_CLR_TOKEN) {
+      const coff_aux_clr_token *Aux;
+      if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
+        break;
+
+      DictScope AS(W, "AuxCLRToken");
+      W.printNumber("AuxType", Aux->AuxType);
+      W.printNumber("Reserved", Aux->Reserved);
+      W.printNumber("SymbolTableIndex", Aux->SymbolTableIndex);
+      W.printBinary("Unused", Aux->Unused);
+
+    } else {
+      W.startLine() << "<unhandled auxiliary record>\n";
+    }
+  }
+}
+
+void COFFDumper::printUnwindInfo() {
+  const coff_file_header *Header;
+  if (error(Obj->getHeader(Header)))
+    return;
+
+  ListScope D(W, "UnwindInformation");
+  if (Header->Machine != COFF::IMAGE_FILE_MACHINE_AMD64) {
+    W.startLine() << "Unsupported image machine type "
+              "(currently only AMD64 is supported).\n";
+    return;
+  }
+
+  printX64UnwindInfo();
+}
+
+void COFFDumper::printX64UnwindInfo() {
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+    if (Name != ".pdata" && !Name.startswith(".pdata$"))
+      continue;
+
+    const coff_section *PData = Obj->getCOFFSection(SecI);
+
+    ArrayRef<uint8_t> Contents;
+    if (error(Obj->getSectionContents(PData, Contents)) ||
+        Contents.empty())
+      continue;
+
+    ArrayRef<RuntimeFunction> RFs(
+      reinterpret_cast<const RuntimeFunction *>(Contents.data()),
+      Contents.size() / sizeof(RuntimeFunction));
+
+    for (const RuntimeFunction *I = RFs.begin(), *E = RFs.end(); I < E; ++I) {
+      const uint64_t OffsetInSection = std::distance(RFs.begin(), I)
+                                     * sizeof(RuntimeFunction);
+
+      printRuntimeFunction(*I, OffsetInSection, RelocMap[PData]);
+    }
+  }
+}
+
+void COFFDumper::printRuntimeFunction(
+    const RuntimeFunction& RTF,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels) {
+
+  DictScope D(W, "RuntimeFunction");
+  W.printString("StartAddress",
+                formatSymbol(Rels, OffsetInSection + 0, RTF.StartAddress));
+  W.printString("EndAddress",
+                formatSymbol(Rels, OffsetInSection + 4, RTF.EndAddress));
+  W.printString("UnwindInfoAddress",
+                formatSymbol(Rels, OffsetInSection + 8, RTF.UnwindInfoOffset));
+
+  const coff_section* XData = 0;
+  uint64_t UnwindInfoOffset = 0;
+  if (error(getSection(Rels, OffsetInSection + 8, &XData, &UnwindInfoOffset)))
+    return;
+
+  ArrayRef<uint8_t> XContents;
+  if (error(Obj->getSectionContents(XData, XContents)) || XContents.empty())
+    return;
+
+  UnwindInfoOffset += RTF.UnwindInfoOffset;
+  if (UnwindInfoOffset > XContents.size())
+    return;
+
+  const Win64EH::UnwindInfo *UI =
+    reinterpret_cast<const Win64EH::UnwindInfo *>(
+      XContents.data() + UnwindInfoOffset);
+
+  printUnwindInfo(*UI, UnwindInfoOffset, RelocMap[XData]);
+}
+
+void COFFDumper::printUnwindInfo(
+    const Win64EH::UnwindInfo& UI,
+    uint64_t OffsetInSection,
+    const std::vector<RelocationRef> &Rels) {
+  DictScope D(W, "UnwindInfo");
+  W.printNumber("Version", UI.getVersion());
+  W.printFlags("Flags", UI.getFlags(), makeArrayRef(UnwindFlags));
+  W.printNumber("PrologSize", UI.PrologSize);
+  if (UI.getFrameRegister() != 0) {
+    W.printEnum("FrameRegister", UI.getFrameRegister(),
+                makeArrayRef(UnwindOpInfo));
+    W.printHex("FrameOffset", UI.getFrameOffset());
+  } else {
+    W.printString("FrameRegister", StringRef("-"));
+    W.printString("FrameOffset", StringRef("-"));
+  }
+
+  W.printNumber("UnwindCodeCount", UI.NumCodes);
+  {
+    ListScope CodesD(W, "UnwindCodes");
+    ArrayRef<UnwindCode> UCs(&UI.UnwindCodes[0], UI.NumCodes);
+    for (const UnwindCode *I = UCs.begin(), *E = UCs.end(); I < E; ++I) {
+      unsigned UsedSlots = getNumUsedSlots(*I);
+      if (UsedSlots > UCs.size()) {
+        errs() << "Corrupt unwind data";
+        return;
+      }
+      printUnwindCode(UI, ArrayRef<UnwindCode>(I, E));
+      I += UsedSlots - 1;
+    }
+  }
+
+  uint64_t LSDAOffset = OffsetInSection + getOffsetOfLSDA(UI);
+  if (UI.getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) {
+    W.printString("Handler", formatSymbol(Rels, LSDAOffset,
+                                        UI.getLanguageSpecificHandlerOffset()));
+  } else if (UI.getFlags() & UNW_ChainInfo) {
+    const RuntimeFunction *Chained = UI.getChainedFunctionEntry();
+    if (Chained) {
+      DictScope D(W, "Chained");
+      W.printString("StartAddress", formatSymbol(Rels, LSDAOffset + 0,
+                                                        Chained->StartAddress));
+      W.printString("EndAddress", formatSymbol(Rels, LSDAOffset + 4,
+                                                          Chained->EndAddress));
+      W.printString("UnwindInfoAddress", formatSymbol(Rels, LSDAOffset + 8,
+                                                    Chained->UnwindInfoOffset));
+    }
+  }
+}
+
+// Prints one unwind code. Because an unwind code can occupy up to 3 slots in
+// the unwind codes array, this function requires that the correct number of
+// slots is provided.
+void COFFDumper::printUnwindCode(const Win64EH::UnwindInfo& UI,
+                                 ArrayRef<UnwindCode> UCs) {
+  assert(UCs.size() >= getNumUsedSlots(UCs[0]));
+
+  W.startLine() << format("0x%02X: ", unsigned(UCs[0].u.CodeOffset))
+                << getUnwindCodeTypeName(UCs[0].getUnwindOp());
+
+  uint32_t AllocSize = 0;
+
+  switch (UCs[0].getUnwindOp()) {
+  case UOP_PushNonVol:
+    outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo());
+    break;
+
+  case UOP_AllocLarge:
+    if (UCs[0].getOpInfo() == 0) {
+      AllocSize = UCs[1].FrameOffset * 8;
+    } else {
+      AllocSize = getLargeSlotValue(UCs);
+    }
+    outs() << " size=" << AllocSize;
+    break;
+  case UOP_AllocSmall:
+    outs() << " size=" << ((UCs[0].getOpInfo() + 1) * 8);
+    break;
+  case UOP_SetFPReg:
+    if (UI.getFrameRegister() == 0) {
+      outs() << " reg=<invalid>";
+    } else {
+      outs() << " reg=" << getUnwindRegisterName(UI.getFrameRegister())
+             << format(", offset=0x%X", UI.getFrameOffset() * 16);
+    }
+    break;
+  case UOP_SaveNonVol:
+    outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(", offset=0x%X", UCs[1].FrameOffset * 8);
+    break;
+  case UOP_SaveNonVolBig:
+    outs() << " reg=" << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(", offset=0x%X", getLargeSlotValue(UCs));
+    break;
+  case UOP_SaveXMM128:
+    outs() << " reg=XMM" << static_cast<uint32_t>(UCs[0].getOpInfo())
+           << format(", offset=0x%X", UCs[1].FrameOffset * 16);
+    break;
+  case UOP_SaveXMM128Big:
+    outs() << " reg=XMM" << static_cast<uint32_t>(UCs[0].getOpInfo())
+           << format(", offset=0x%X", getLargeSlotValue(UCs));
+    break;
+  case UOP_PushMachFrame:
+    outs() << " errcode=" << (UCs[0].getOpInfo() == 0 ? "no" : "yes");
+    break;
+  }
+
+  outs() << "\n";
+}
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
new file mode 100644
index 000000000000..9e111dd905ca
--- /dev/null
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -0,0 +1,800 @@
+//===-- ELFDumper.cpp - ELF-specific dumper ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the ELF-specific dumper for llvm-readobj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-readobj.h"
+#include "Error.h"
+#include "ObjDumper.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace ELF;
+
+
+#define LLVM_READOBJ_ENUM_CASE(ns, enum) \
+  case ns::enum: return #enum;
+
+namespace {
+
+template<typename ELFT>
+class ELFDumper : public ObjDumper {
+public:
+  ELFDumper(const ELFObjectFile<ELFT> *Obj, StreamWriter& Writer)
+    : ObjDumper(Writer)
+    , Obj(Obj) { }
+
+  virtual void printFileHeaders() LLVM_OVERRIDE;
+  virtual void printSections() LLVM_OVERRIDE;
+  virtual void printRelocations() LLVM_OVERRIDE;
+  virtual void printSymbols() LLVM_OVERRIDE;
+  virtual void printDynamicSymbols() LLVM_OVERRIDE;
+  virtual void printUnwindInfo() LLVM_OVERRIDE;
+
+  virtual void printDynamicTable() LLVM_OVERRIDE;
+  virtual void printNeededLibraries() LLVM_OVERRIDE;
+
+private:
+  typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
+  typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
+
+  void printSymbol(symbol_iterator SymI, bool IsDynamic = false);
+
+  void printRelocation(section_iterator SecI, relocation_iterator RelI);
+
+  const ELFObjectFile<ELFT> *Obj;
+};
+
+} // namespace
+
+
+namespace llvm {
+
+error_code createELFDumper(const object::ObjectFile *Obj,
+                           StreamWriter& Writer,
+                           OwningPtr<ObjDumper> &Result) {
+  typedef ELFType<support::little, 4, false> Little32ELF;
+  typedef ELFType<support::big,    4, false> Big32ELF;
+  typedef ELFType<support::little, 4, true > Little64ELF;
+  typedef ELFType<support::big,    8, true > Big64ELF;
+
+  typedef ELFObjectFile<Little32ELF> LittleELF32Obj;
+  typedef ELFObjectFile<Big32ELF   > BigELF32Obj;
+  typedef ELFObjectFile<Little64ELF> LittleELF64Obj;
+  typedef ELFObjectFile<Big64ELF   > BigELF64Obj;
+
+  // Little-endian 32-bit
+  if (const LittleELF32Obj *ELFObj = dyn_cast<LittleELF32Obj>(Obj)) {
+    Result.reset(new ELFDumper<Little32ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  // Big-endian 32-bit
+  if (const BigELF32Obj *ELFObj = dyn_cast<BigELF32Obj>(Obj)) {
+    Result.reset(new ELFDumper<Big32ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  // Little-endian 64-bit
+  if (const LittleELF64Obj *ELFObj = dyn_cast<LittleELF64Obj>(Obj)) {
+    Result.reset(new ELFDumper<Little64ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  // Big-endian 64-bit
+  if (const BigELF64Obj *ELFObj = dyn_cast<BigELF64Obj>(Obj)) {
+    Result.reset(new ELFDumper<Big64ELF>(ELFObj, Writer));
+    return readobj_error::success;
+  }
+
+  return readobj_error::unsupported_obj_file_format;
+}
+
+} // namespace llvm
+
+
+static const EnumEntry<unsigned> ElfClass[] = {
+  { "None",   ELF::ELFCLASSNONE },
+  { "32-bit", ELF::ELFCLASS32   },
+  { "64-bit", ELF::ELFCLASS64   },
+};
+
+static const EnumEntry<unsigned> ElfDataEncoding[] = {
+  { "None",         ELF::ELFDATANONE },
+  { "LittleEndian", ELF::ELFDATA2LSB },
+  { "BigEndian",    ELF::ELFDATA2MSB },
+};
+
+static const EnumEntry<unsigned> ElfObjectFileType[] = {
+  { "None",         ELF::ET_NONE },
+  { "Relocatable",  ELF::ET_REL  },
+  { "Executable",   ELF::ET_EXEC },
+  { "SharedObject", ELF::ET_DYN  },
+  { "Core",         ELF::ET_CORE },
+};
+
+static const EnumEntry<unsigned> ElfOSABI[] = {
+  { "SystemV",      ELF::ELFOSABI_NONE         },
+  { "HPUX",         ELF::ELFOSABI_HPUX         },
+  { "NetBSD",       ELF::ELFOSABI_NETBSD       },
+  { "GNU/Linux",    ELF::ELFOSABI_LINUX        },
+  { "GNU/Hurd",     ELF::ELFOSABI_HURD         },
+  { "Solaris",      ELF::ELFOSABI_SOLARIS      },
+  { "AIX",          ELF::ELFOSABI_AIX          },
+  { "IRIX",         ELF::ELFOSABI_IRIX         },
+  { "FreeBSD",      ELF::ELFOSABI_FREEBSD      },
+  { "TRU64",        ELF::ELFOSABI_TRU64        },
+  { "Modesto",      ELF::ELFOSABI_MODESTO      },
+  { "OpenBSD",      ELF::ELFOSABI_OPENBSD      },
+  { "OpenVMS",      ELF::ELFOSABI_OPENVMS      },
+  { "NSK",          ELF::ELFOSABI_NSK          },
+  { "AROS",         ELF::ELFOSABI_AROS         },
+  { "FenixOS",      ELF::ELFOSABI_FENIXOS      },
+  { "C6000_ELFABI", ELF::ELFOSABI_C6000_ELFABI },
+  { "C6000_LINUX" , ELF::ELFOSABI_C6000_LINUX  },
+  { "ARM",          ELF::ELFOSABI_ARM          },
+  { "Standalone"  , ELF::ELFOSABI_STANDALONE   }
+};
+
+static const EnumEntry<unsigned> ElfMachineType[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NONE         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M32          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPARC        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_386          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68K          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_88K          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_486          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_860          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_S370         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS_RS3_LE  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PARISC       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VPP500       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPARC32PLUS  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_960          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PPC          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PPC64        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_S390         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPU          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_V800         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FR20         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RH32         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RCE          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARM          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ALPHA        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SH           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SPARCV9      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TRICORE      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARC          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8_300       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8_300H      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8S          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_H8_500       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_IA_64        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MIPS_X       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_COLDFIRE     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC12       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MMA          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PCP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NCPU         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NDR1         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_STARCORE     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ME16         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST100        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TINYJ        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_X86_64       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PDSP         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PDP10        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PDP11        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FX66         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST9PLUS      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST7          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC11       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC08       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_68HC05       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SVX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST19         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VAX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CRIS         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_JAVELIN      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FIREPATH     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ZSP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MMIX         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_HUANY        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PRISM        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AVR          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_FR30         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_D10V         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_D30V         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_V850         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M32R         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MN10300      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MN10200      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_PJ           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_OPENRISC     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARC_COMPACT  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_XTENSA       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TMM_GPP      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NS32K        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TPC          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SNP1K        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ST200        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_IP2K         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MAX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CR           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_F2MC16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MSP430       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_BLACKFIN     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SE_C33       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SEP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARCA         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_UNICORE      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_EXCESS       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_DXP          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ALTERA_NIOS2 ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CRX          ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_XGATE        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_C166         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M16C         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_DSPIC30F     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CE           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_M32C         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TSK3000      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RS08         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SHARC        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG2        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SCORE7       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_DSP24        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE3   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_LATTICEMICO32),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SE_C17       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TI_C6000     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TI_C2000     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TI_C5500     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MMDSP_PLUS   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CYPRESS_M8C  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_R32C         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TRIMEDIA     ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_HEXAGON      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_8051         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_STXP7X       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_NDS32        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG1        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG1X       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MAXQ30       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_XIMO16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MANIK        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CRAYNV2      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RX           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_METAG        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MCST_ELBRUS  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ECOG16       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CR16         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ETPU         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_SLE9X        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_L10M         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_K10M         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AARCH64      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AVR32        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_STM8         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TILE64       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TILEPRO      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MICROBLAZE   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CUDA         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_TILEGX       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_CLOUDSHIELD  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_COREA_1ST    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_COREA_2ND    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_ARC_COMPACT2 ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_OPEN8        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_RL78         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE5   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_78KOR        ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_MBLAZE       )
+};
+
+static const EnumEntry<unsigned> ElfSymbolBindings[] = {
+  { "Local",  ELF::STB_LOCAL  },
+  { "Global", ELF::STB_GLOBAL },
+  { "Weak",   ELF::STB_WEAK   }
+};
+
+static const EnumEntry<unsigned> ElfSymbolTypes[] = {
+  { "None",      ELF::STT_NOTYPE    },
+  { "Object",    ELF::STT_OBJECT    },
+  { "Function",  ELF::STT_FUNC      },
+  { "Section",   ELF::STT_SECTION   },
+  { "File",      ELF::STT_FILE      },
+  { "Common",    ELF::STT_COMMON    },
+  { "TLS",       ELF::STT_TLS       },
+  { "GNU_IFunc", ELF::STT_GNU_IFUNC }
+};
+
+static const char *getElfSectionType(unsigned Arch, unsigned Type) {
+  switch (Arch) {
+  case Triple::arm:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_EXIDX);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION);
+    }
+  case Triple::hexagon:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_HEX_ORDERED);
+    }
+  case Triple::x86_64:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_X86_64_UNWIND);
+    }
+  case Triple::mips:
+  case Triple::mipsel:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_REGINFO);
+    LLVM_READOBJ_ENUM_CASE(ELF, SHT_MIPS_OPTIONS);
+    }
+  }
+
+  switch (Type) {
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NULL              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_PROGBITS          );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SYMTAB            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_STRTAB            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_RELA              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_HASH              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_DYNAMIC           );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NOTE              );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_NOBITS            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_REL               );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SHLIB             );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_DYNSYM            );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_INIT_ARRAY        );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_FINI_ARRAY        );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_PREINIT_ARRAY     );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GROUP             );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX      );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES    );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_HASH          );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_verdef        );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_verneed       );
+  LLVM_READOBJ_ENUM_CASE(ELF, SHT_GNU_versym        );
+  default: return "";
+  }
+}
+
+static const EnumEntry<unsigned> ElfSectionFlags[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_WRITE           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_ALLOC           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_EXECINSTR       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_MERGE           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_STRINGS         ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_INFO_LINK       ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_LINK_ORDER      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_OS_NONCONFORMING),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_GROUP           ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_TLS             ),
+  LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_CP_SECTION),
+  LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_DP_SECTION),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP    )
+};
+
+
+template<class ELFT>
+void ELFDumper<ELFT>::printFileHeaders() {
+  error_code EC;
+  typedef ELFObjectFile<ELFT> ELFO;
+
+  const typename ELFO::Elf_Ehdr *Header = Obj->getElfHeader();
+
+  {
+    DictScope D(W, "ElfHeader");
+    {
+      DictScope D(W, "Ident");
+      W.printBinary("Magic", makeArrayRef(Header->e_ident).slice(ELF::EI_MAG0,
+                                                                 4));
+      W.printEnum  ("Class", Header->e_ident[ELF::EI_CLASS],
+                      makeArrayRef(ElfClass));
+      W.printEnum  ("DataEncoding", Header->e_ident[ELF::EI_DATA],
+                      makeArrayRef(ElfDataEncoding));
+      W.printNumber("FileVersion", Header->e_ident[ELF::EI_VERSION]);
+      W.printEnum  ("OS/ABI", Header->e_ident[ELF::EI_OSABI],
+                      makeArrayRef(ElfOSABI));
+      W.printNumber("ABIVersion", Header->e_ident[ELF::EI_ABIVERSION]);
+      W.printBinary("Unused", makeArrayRef(Header->e_ident).slice(ELF::EI_PAD));
+    }
+
+    W.printEnum  ("Type", Header->e_type, makeArrayRef(ElfObjectFileType));
+    W.printEnum  ("Machine", Header->e_machine, makeArrayRef(ElfMachineType));
+    W.printNumber("Version", Header->e_version);
+    W.printHex   ("Entry", Header->e_entry);
+    W.printHex   ("ProgramHeaderOffset", Header->e_phoff);
+    W.printHex   ("SectionHeaderOffset", Header->e_shoff);
+    W.printFlags ("Flags", Header->e_flags);
+    W.printNumber("HeaderSize", Header->e_ehsize);
+    W.printNumber("ProgramHeaderEntrySize", Header->e_phentsize);
+    W.printNumber("ProgramHeaderCount", Header->e_phnum);
+    W.printNumber("SectionHeaderEntrySize", Header->e_shentsize);
+    W.printNumber("SectionHeaderCount", Header->e_shnum);
+    W.printNumber("StringTableSectionIndex", Header->e_shstrndx);
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printSections() {
+  ListScope SectionsD(W, "Sections");
+
+  int SectionIndex = -1;
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    ++SectionIndex;
+
+    const Elf_Shdr *Section = Obj->getElfSection(SecI);
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+        Name = "";
+
+    DictScope SectionD(W, "Section");
+    W.printNumber("Index", SectionIndex);
+    W.printNumber("Name", Name, Section->sh_name);
+    W.printHex   ("Type", getElfSectionType(Obj->getArch(), Section->sh_type),
+                    Section->sh_type);
+    W.printFlags ("Flags", Section->sh_flags, makeArrayRef(ElfSectionFlags));
+    W.printHex   ("Address", Section->sh_addr);
+    W.printHex   ("Offset", Section->sh_offset);
+    W.printNumber("Size", Section->sh_size);
+    W.printNumber("Link", Section->sh_link);
+    W.printNumber("Info", Section->sh_info);
+    W.printNumber("AddressAlignment", Section->sh_addralign);
+    W.printNumber("EntrySize", Section->sh_entsize);
+
+    if (opts::SectionRelocations) {
+      ListScope D(W, "Relocations");
+      for (relocation_iterator RelI = SecI->begin_relocations(),
+                               RelE = SecI->end_relocations();
+                               RelI != RelE; RelI.increment(EC)) {
+        if (error(EC)) break;
+
+        printRelocation(SecI, RelI);
+      }
+    }
+
+    if (opts::SectionSymbols) {
+      ListScope D(W, "Symbols");
+      for (symbol_iterator SymI = Obj->begin_symbols(),
+                           SymE = Obj->end_symbols();
+                           SymI != SymE; SymI.increment(EC)) {
+        if (error(EC)) break;
+
+        bool Contained = false;
+        if (SecI->containsSymbol(*SymI, Contained) || !Contained)
+          continue;
+
+        printSymbol(SymI);
+      }
+    }
+
+    if (opts::SectionData) {
+      StringRef Data;
+      if (error(SecI->getContents(Data))) break;
+
+      W.printBinaryBlock("SectionData", Data);
+    }
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printRelocations() {
+  ListScope D(W, "Relocations");
+
+  error_code EC;
+  int SectionNumber = -1;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    ++SectionNumber;
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+
+    bool PrintedGroup = false;
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC)) break;
+
+      if (!PrintedGroup) {
+        W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
+        W.indent();
+        PrintedGroup = true;
+      }
+
+      printRelocation(SecI, RelI);
+    }
+
+    if (PrintedGroup) {
+      W.unindent();
+      W.startLine() << "}\n";
+    }
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printRelocation(section_iterator Sec,
+                                      relocation_iterator RelI) {
+  uint64_t Offset;
+  SmallString<32> RelocName;
+  int64_t Info;
+  StringRef SymbolName;
+  SymbolRef Symbol;
+  if (error(RelI->getOffset(Offset))) return;
+  if (error(RelI->getTypeName(RelocName))) return;
+  if (error(RelI->getAdditionalInfo(Info))) return;
+  if (error(RelI->getSymbol(Symbol))) return;
+  if (error(Symbol.getName(SymbolName))) return;
+
+  raw_ostream& OS = W.startLine();
+  OS << W.hex(Offset)
+     << " " << RelocName
+     << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+     << " " << W.hex(Info)
+     << "\n";
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printSymbols() {
+  ListScope Group(W, "Symbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_symbols(), SymE = Obj->end_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI);
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printDynamicSymbols() {
+  ListScope Group(W, "DynamicSymbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_dynamic_symbols(),
+                       SymE = Obj->end_dynamic_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI, true);
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printSymbol(symbol_iterator SymI, bool IsDynamic) {
+  error_code EC;
+
+  const Elf_Sym *Symbol = Obj->getElfSymbol(SymI);
+  const Elf_Shdr *Section = Obj->getSection(Symbol);
+
+  StringRef SymbolName;
+  if (SymI->getName(SymbolName))
+    SymbolName = "";
+
+  StringRef SectionName;
+  if (Section && Obj->getSectionName(Section, SectionName))
+    SectionName = "";
+
+  std::string FullSymbolName(SymbolName);
+  if (IsDynamic) {
+    StringRef Version;
+    bool IsDefault;
+    if (error(Obj->getSymbolVersion(*SymI, Version, IsDefault)))
+      return;
+    if (!Version.empty()) {
+      FullSymbolName += (IsDefault ? "@@" : "@");
+      FullSymbolName += Version;
+    }
+  }
+
+  DictScope D(W, "Symbol");
+  W.printNumber("Name", FullSymbolName, Symbol->st_name);
+  W.printHex   ("Value", Symbol->st_value);
+  W.printNumber("Size", Symbol->st_size);
+  W.printEnum  ("Binding", Symbol->getBinding(),
+                  makeArrayRef(ElfSymbolBindings));
+  W.printEnum  ("Type", Symbol->getType(), makeArrayRef(ElfSymbolTypes));
+  W.printNumber("Other", Symbol->st_other);
+  W.printHex   ("Section", SectionName, Symbol->st_shndx);
+}
+
+#define LLVM_READOBJ_TYPE_CASE(name) \
+  case DT_##name: return #name
+
+static const char *getTypeString(uint64_t Type) {
+  switch (Type) {
+  LLVM_READOBJ_TYPE_CASE(BIND_NOW);
+  LLVM_READOBJ_TYPE_CASE(DEBUG);
+  LLVM_READOBJ_TYPE_CASE(FINI);
+  LLVM_READOBJ_TYPE_CASE(FINI_ARRAY);
+  LLVM_READOBJ_TYPE_CASE(FINI_ARRAYSZ);
+  LLVM_READOBJ_TYPE_CASE(FLAGS);
+  LLVM_READOBJ_TYPE_CASE(HASH);
+  LLVM_READOBJ_TYPE_CASE(INIT);
+  LLVM_READOBJ_TYPE_CASE(INIT_ARRAY);
+  LLVM_READOBJ_TYPE_CASE(INIT_ARRAYSZ);
+  LLVM_READOBJ_TYPE_CASE(PREINIT_ARRAY);
+  LLVM_READOBJ_TYPE_CASE(PREINIT_ARRAYSZ);
+  LLVM_READOBJ_TYPE_CASE(JMPREL);
+  LLVM_READOBJ_TYPE_CASE(NEEDED);
+  LLVM_READOBJ_TYPE_CASE(NULL);
+  LLVM_READOBJ_TYPE_CASE(PLTGOT);
+  LLVM_READOBJ_TYPE_CASE(PLTREL);
+  LLVM_READOBJ_TYPE_CASE(PLTRELSZ);
+  LLVM_READOBJ_TYPE_CASE(REL);
+  LLVM_READOBJ_TYPE_CASE(RELA);
+  LLVM_READOBJ_TYPE_CASE(RELENT);
+  LLVM_READOBJ_TYPE_CASE(RELSZ);
+  LLVM_READOBJ_TYPE_CASE(RELAENT);
+  LLVM_READOBJ_TYPE_CASE(RELASZ);
+  LLVM_READOBJ_TYPE_CASE(RPATH);
+  LLVM_READOBJ_TYPE_CASE(RUNPATH);
+  LLVM_READOBJ_TYPE_CASE(SONAME);
+  LLVM_READOBJ_TYPE_CASE(STRSZ);
+  LLVM_READOBJ_TYPE_CASE(STRTAB);
+  LLVM_READOBJ_TYPE_CASE(SYMBOLIC);
+  LLVM_READOBJ_TYPE_CASE(SYMENT);
+  LLVM_READOBJ_TYPE_CASE(SYMTAB);
+  LLVM_READOBJ_TYPE_CASE(TEXTREL);
+  default: return "unknown";
+  }
+}
+
+#undef LLVM_READOBJ_TYPE_CASE
+
+template<class ELFT>
+static void printValue(const ELFObjectFile<ELFT> *O, uint64_t Type,
+                       uint64_t Value, bool Is64, raw_ostream &OS) {
+  switch (Type) {
+  case DT_PLTREL:
+    if (Value == DT_REL) {
+      OS << "REL";
+      break;
+    } else if (Value == DT_RELA) {
+      OS << "RELA";
+      break;
+    }
+  // Fallthrough.
+  case DT_PLTGOT:
+  case DT_HASH:
+  case DT_STRTAB:
+  case DT_SYMTAB:
+  case DT_RELA:
+  case DT_INIT:
+  case DT_FINI:
+  case DT_REL:
+  case DT_JMPREL:
+  case DT_INIT_ARRAY:
+  case DT_FINI_ARRAY:
+  case DT_PREINIT_ARRAY:
+  case DT_DEBUG:
+  case DT_NULL:
+    OS << format("0x%" PRIX64, Value);
+    break;
+  case DT_PLTRELSZ:
+  case DT_RELASZ:
+  case DT_RELAENT:
+  case DT_STRSZ:
+  case DT_SYMENT:
+  case DT_RELSZ:
+  case DT_RELENT:
+  case DT_INIT_ARRAYSZ:
+  case DT_FINI_ARRAYSZ:
+  case DT_PREINIT_ARRAYSZ:
+    OS << Value << " (bytes)";
+    break;
+  case DT_NEEDED:
+    OS << "SharedLibrary ("
+       << O->getString(O->getDynamicStringTableSectionHeader(), Value) << ")";
+    break;
+  case DT_SONAME:
+    OS << "LibrarySoname ("
+       << O->getString(O->getDynamicStringTableSectionHeader(), Value) << ")";
+    break;
+  }
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printUnwindInfo() {
+  W.startLine() << "UnwindInfo not implemented.\n";
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printDynamicTable() {
+  typedef ELFObjectFile<ELFT> ELFO;
+  typedef typename ELFO::Elf_Dyn_iterator EDI;
+  EDI Start = Obj->begin_dynamic_table(),
+      End = Obj->end_dynamic_table(true);
+
+  if (Start == End)
+    return;
+
+  ptrdiff_t Total = std::distance(Start, End);
+  raw_ostream &OS = W.getOStream();
+  W.startLine() << "DynamicSection [ (" << Total << " entries)\n";
+
+  bool Is64 = Obj->getBytesInAddress() == 8;
+
+  W.startLine()
+     << "  Tag" << (Is64 ? "                " : "        ") << "Type"
+     << "                 " << "Name/Value\n";
+  for (; Start != End; ++Start) {
+    W.startLine()
+       << "  "
+       << format(Is64 ? "0x%016" PRIX64 : "0x%08" PRIX64, Start->getTag())
+       << " " << format("%-21s", getTypeString(Start->getTag()));
+    printValue(Obj, Start->getTag(), Start->getVal(), Is64, OS);
+    OS << "\n";
+  }
+
+  W.startLine() << "]\n";
+}
+
+static bool compareLibraryName(const LibraryRef &L, const LibraryRef &R) {
+  StringRef LPath, RPath;
+  L.getPath(LPath);
+  R.getPath(RPath);
+  return LPath < RPath;
+}
+
+template<class ELFT>
+void ELFDumper<ELFT>::printNeededLibraries() {
+  ListScope D(W, "NeededLibraries");
+
+  error_code EC;
+
+  typedef std::vector<LibraryRef> LibsTy;
+  LibsTy Libs;
+
+  for (library_iterator I = Obj->begin_libraries_needed(),
+                        E = Obj->end_libraries_needed();
+                        I != E; I.increment(EC)) {
+    if (EC)
+      report_fatal_error("Needed libraries iteration failed");
+
+    Libs.push_back(*I);
+  }
+
+  std::sort(Libs.begin(), Libs.end(), &compareLibraryName);
+
+  for (LibsTy::const_iterator I = Libs.begin(), E = Libs.end();
+                                  I != E; ++I) {
+    StringRef Path;
+    I->getPath(Path);
+    outs() << "  " << Path << "\n";
+  }
+}
diff --git a/tools/llvm-readobj/Error.cpp b/tools/llvm-readobj/Error.cpp
new file mode 100644
index 000000000000..a6c61321c6dc
--- /dev/null
+++ b/tools/llvm-readobj/Error.cpp
@@ -0,0 +1,62 @@
+//===- Error.cpp - system_error extensions for llvm-readobj -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the llvm-readobj tool.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class _readobj_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+} // namespace
+
+const char *_readobj_error_category::name() const {
+  return "llvm.readobj";
+}
+
+std::string _readobj_error_category::message(int ev) const {
+  switch (ev) {
+  case readobj_error::success: return "Success";
+  case readobj_error::file_not_found:
+    return "No such file.";
+  case readobj_error::unsupported_file_format:
+    return "The file was not recognized as a valid object file.";
+  case readobj_error::unrecognized_file_format:
+    return "Unrecognized file type.";
+  case readobj_error::unsupported_obj_file_format:
+    return "Unsupported object file format.";
+  case readobj_error::unknown_symbol:
+    return "Unknown symbol.";
+  default:
+    llvm_unreachable("An enumerator of readobj_error does not have a message "
+                     "defined.");
+  }
+}
+
+error_condition _readobj_error_category::default_error_condition(int ev) const {
+  if (ev == readobj_error::success)
+    return errc::success;
+  return errc::invalid_argument;
+}
+
+namespace llvm {
+const error_category &readobj_category() {
+  static _readobj_error_category o;
+  return o;
+}
+} // namespace llvm
diff --git a/tools/llvm-readobj/Error.h b/tools/llvm-readobj/Error.h
new file mode 100644
index 000000000000..cf68da89c1d3
--- /dev/null
+++ b/tools/llvm-readobj/Error.h
@@ -0,0 +1,48 @@
+//===- Error.h - system_error extensions for llvm-readobj -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares a new error_category for the llvm-readobj tool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_READOBJ_ERROR_H
+#define LLVM_READOBJ_ERROR_H
+
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+const error_category &readobj_category();
+
+struct readobj_error {
+  enum _ {
+    success = 0,
+    file_not_found,
+    unsupported_file_format,
+    unrecognized_file_format,
+    unsupported_obj_file_format,
+    unknown_symbol
+  };
+  _ v_;
+
+  readobj_error(_ v) : v_(v) {}
+  explicit readobj_error(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+};
+
+inline error_code make_error_code(readobj_error e) {
+  return error_code(static_cast<int>(e), readobj_category());
+}
+
+template <> struct is_error_code_enum<readobj_error> : true_type { };
+template <> struct is_error_code_enum<readobj_error::_> : true_type { };
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-readobj/LLVMBuild.txt b/tools/llvm-readobj/LLVMBuild.txt
index c9f934f4b6fa..813c12b752bc 100644
--- a/tools/llvm-readobj/LLVMBuild.txt
+++ b/tools/llvm-readobj/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-readobj
 parent = Tools
-required_libraries = Archive BitReader Object
+required_libraries = all-targets Archive BitReader Object
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
new file mode 100644
index 000000000000..798c9417720c
--- /dev/null
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -0,0 +1,438 @@
+//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachO-specific dumper for llvm-readobj.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-readobj.h"
+#include "Error.h"
+#include "ObjDumper.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+
+class MachODumper : public ObjDumper {
+public:
+  MachODumper(const llvm::object::MachOObjectFile *Obj, StreamWriter& Writer)
+    : ObjDumper(Writer)
+    , Obj(Obj) { }
+
+  virtual void printFileHeaders() LLVM_OVERRIDE;
+  virtual void printSections() LLVM_OVERRIDE;
+  virtual void printRelocations() LLVM_OVERRIDE;
+  virtual void printSymbols() LLVM_OVERRIDE;
+  virtual void printDynamicSymbols() LLVM_OVERRIDE;
+  virtual void printUnwindInfo() LLVM_OVERRIDE;
+
+private:
+  void printSymbol(symbol_iterator SymI);
+
+  void printRelocation(section_iterator SecI, relocation_iterator RelI);
+
+  const llvm::object::MachOObjectFile *Obj;
+};
+
+} // namespace
+
+
+namespace llvm {
+
+error_code createMachODumper(const object::ObjectFile *Obj,
+                             StreamWriter& Writer,
+                             OwningPtr<ObjDumper> &Result) {
+  const MachOObjectFile *MachOObj = dyn_cast<MachOObjectFile>(Obj);
+  if (!MachOObj)
+    return readobj_error::unsupported_obj_file_format;
+
+  Result.reset(new MachODumper(MachOObj, Writer));
+  return readobj_error::success;
+}
+
+} // namespace llvm
+
+
+static const EnumEntry<unsigned> MachOSectionTypes[] = {
+  { "Regular"                        , 0x00 },
+  { "ZeroFill"                       , 0x01 },
+  { "CStringLiterals"                , 0x02 },
+  { "4ByteLiterals"                  , 0x03 },
+  { "8ByteLiterals"                  , 0x04 },
+  { "LiteralPointers"                , 0x05 },
+  { "NonLazySymbolPointers"          , 0x06 },
+  { "LazySymbolPointers"             , 0x07 },
+  { "SymbolStubs"                    , 0x08 },
+  { "ModInitFuncs"                   , 0x09 },
+  { "ModTermFuncs"                   , 0x0A },
+  { "Coalesced"                      , 0x0B },
+  { "GBZeroFill"                     , 0x0C },
+  { "Interposing"                    , 0x0D },
+  { "16ByteLiterals"                 , 0x0E },
+  { "DTraceDOF"                      , 0x0F },
+  { "LazyDylibSymbolPoints"          , 0x10 },
+  { "ThreadLocalRegular"             , 0x11 },
+  { "ThreadLocalZerofill"            , 0x12 },
+  { "ThreadLocalVariables"           , 0x13 },
+  { "ThreadLocalVariablePointers"    , 0x14 },
+  { "ThreadLocalInitFunctionPointers", 0x15 }
+};
+
+static const EnumEntry<unsigned> MachOSectionAttributes[] = {
+  { "LocReloc"         , 1 <<  0 /*S_ATTR_LOC_RELOC          */ },
+  { "ExtReloc"         , 1 <<  1 /*S_ATTR_EXT_RELOC          */ },
+  { "SomeInstructions" , 1 <<  2 /*S_ATTR_SOME_INSTRUCTIONS  */ },
+  { "Debug"            , 1 << 17 /*S_ATTR_DEBUG              */ },
+  { "SelfModifyingCode", 1 << 18 /*S_ATTR_SELF_MODIFYING_CODE*/ },
+  { "LiveSupport"      , 1 << 19 /*S_ATTR_LIVE_SUPPORT       */ },
+  { "NoDeadStrip"      , 1 << 20 /*S_ATTR_NO_DEAD_STRIP      */ },
+  { "StripStaticSyms"  , 1 << 21 /*S_ATTR_STRIP_STATIC_SYMS  */ },
+  { "NoTOC"            , 1 << 22 /*S_ATTR_NO_TOC             */ },
+  { "PureInstructions" , 1 << 23 /*S_ATTR_PURE_INSTRUCTIONS  */ },
+};
+
+static const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
+  { "UndefinedNonLazy",                     0 },
+  { "ReferenceFlagUndefinedLazy",           1 },
+  { "ReferenceFlagDefined",                 2 },
+  { "ReferenceFlagPrivateDefined",          3 },
+  { "ReferenceFlagPrivateUndefinedNonLazy", 4 },
+  { "ReferenceFlagPrivateUndefinedLazy",    5 }
+};
+
+static const EnumEntry<unsigned> MachOSymbolFlags[] = {
+  { "ReferencedDynamically", 0x10 },
+  { "NoDeadStrip",           0x20 },
+  { "WeakRef",               0x40 },
+  { "WeakDef",               0x80 }
+};
+
+static const EnumEntry<unsigned> MachOSymbolTypes[] = {
+  { "Undef",           0x0 },
+  { "External",        0x1 },
+  { "Abs",             0x2 },
+  { "Indirect",        0xA },
+  { "PreboundUndef",   0xC },
+  { "Section",         0xE },
+  { "PrivateExternal", 0x10 }
+};
+
+namespace {
+  enum {
+    N_STAB = 0xE0
+  };
+
+  struct MachOSection {
+    ArrayRef<char> Name;
+    ArrayRef<char> SegmentName;
+    uint64_t Address;
+    uint64_t Size;
+    uint32_t Offset;
+    uint32_t Alignment;
+    uint32_t RelocationTableOffset;
+    uint32_t NumRelocationTableEntries;
+    uint32_t Flags;
+    uint32_t Reserved1;
+    uint32_t Reserved2;
+  };
+
+  struct MachOSymbol {
+    uint32_t StringIndex;
+    uint8_t Type;
+    uint8_t SectionIndex;
+    uint16_t Flags;
+    uint64_t Value;
+  };
+}
+
+static StringRef parseSegmentOrSectionName(ArrayRef<char> P) {
+  if (P[15] == 0)
+    // Null terminated.
+    return StringRef(P.data());
+  // Not null terminated, so this is a 16 char string.
+  return StringRef(P.data(), 16);
+}
+
+static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  if (LCI.Command.Type == macho::LCT_Segment64)
+    return true;
+  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
+  return false;
+}
+
+static void getSection(const MachOObject *MachOObj,
+                       DataRefImpl DRI,
+                       MachOSection &Section) {
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
+
+    Section.Name        = ArrayRef<char>(Sect->Name);
+    Section.SegmentName = ArrayRef<char>(Sect->SegmentName);
+    Section.Address     = Sect->Address;
+    Section.Size        = Sect->Size;
+    Section.Offset      = Sect->Offset;
+    Section.Alignment   = Sect->Align;
+    Section.RelocationTableOffset = Sect->RelocationTableOffset;
+    Section.NumRelocationTableEntries = Sect->NumRelocationTableEntries;
+    Section.Flags       = Sect->Flags;
+    Section.Reserved1   = Sect->Reserved1;
+    Section.Reserved2   = Sect->Reserved2;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+
+    Section.Name        = Sect->Name;
+    Section.SegmentName = Sect->SegmentName;
+    Section.Address     = Sect->Address;
+    Section.Size        = Sect->Size;
+    Section.Offset      = Sect->Offset;
+    Section.Alignment   = Sect->Align;
+    Section.RelocationTableOffset = Sect->RelocationTableOffset;
+    Section.NumRelocationTableEntries = Sect->NumRelocationTableEntries;
+    Section.Flags       = Sect->Flags;
+    Section.Reserved1   = Sect->Reserved1;
+    Section.Reserved2   = Sect->Reserved2;
+  }
+}
+
+static void getSymbolTableEntry(const MachOObject *MachO,
+                                DataRefImpl DRI,
+                                InMemoryStruct<macho::SymbolTableEntry> &Res) {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachO->getLoadCommandInfo(DRI.d.a);
+  MachO->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+  MachO->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, Res);
+}
+
+static void getSymbol64TableEntry(const MachOObject *MachO,
+                                  DataRefImpl DRI,
+                               InMemoryStruct<macho::Symbol64TableEntry> &Res) {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachO->getLoadCommandInfo(DRI.d.a);
+  MachO->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+  MachO->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, Res);
+}
+
+static void getSymbol(const MachOObject *MachOObj,
+                      DataRefImpl DRI,
+                      MachOSymbol &Symbol) {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(MachOObj, DRI, Entry);
+    Symbol.StringIndex  = Entry->StringIndex;
+    Symbol.Type         = Entry->Type;
+    Symbol.SectionIndex = Entry->SectionIndex;
+    Symbol.Flags        = Entry->Flags;
+    Symbol.Value        = Entry->Value;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(MachOObj, DRI, Entry);
+    Symbol.StringIndex  = Entry->StringIndex;
+    Symbol.Type         = Entry->Type;
+    Symbol.SectionIndex = Entry->SectionIndex;
+    Symbol.Flags        = Entry->Flags;
+    Symbol.Value        = Entry->Value;
+  }
+}
+
+void MachODumper::printFileHeaders() {
+  W.startLine() << "FileHeaders not implemented.\n";
+}
+
+void MachODumper::printSections() {
+  ListScope Group(W, "Sections");
+
+  int SectionIndex = -1;
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    ++SectionIndex;
+
+    const MachOObject *MachO = const_cast<MachOObjectFile*>(Obj)->getObject();
+
+    MachOSection Section;
+    getSection(MachO, SecI->getRawDataRefImpl(), Section);
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+        Name = "";
+
+    DictScope SectionD(W, "Section");
+    W.printNumber("Index", SectionIndex);
+    W.printBinary("Name", Name, Section.Name);
+    W.printBinary("Segment", parseSegmentOrSectionName(Section.SegmentName),
+                    Section.SegmentName);
+    W.printHex   ("Address", Section.Address);
+    W.printHex   ("Size", Section.Size);
+    W.printNumber("Offset", Section.Offset);
+    W.printNumber("Alignment", Section.Alignment);
+    W.printHex   ("RelocationOffset", Section.RelocationTableOffset);
+    W.printNumber("RelocationCount", Section.NumRelocationTableEntries);
+    W.printEnum  ("Type", Section.Flags & 0xFF,
+                  makeArrayRef(MachOSectionAttributes));
+    W.printFlags ("Attributes", Section.Flags >> 8,
+                  makeArrayRef(MachOSectionAttributes));
+    W.printHex   ("Reserved1", Section.Reserved1);
+    W.printHex   ("Reserved2", Section.Reserved2);
+
+    if (opts::SectionRelocations) {
+      ListScope D(W, "Relocations");
+      for (relocation_iterator RelI = SecI->begin_relocations(),
+                               RelE = SecI->end_relocations();
+                               RelI != RelE; RelI.increment(EC)) {
+        if (error(EC)) break;
+
+        printRelocation(SecI, RelI);
+      }
+    }
+
+    if (opts::SectionSymbols) {
+      ListScope D(W, "Symbols");
+      for (symbol_iterator SymI = Obj->begin_symbols(),
+                           SymE = Obj->end_symbols();
+                           SymI != SymE; SymI.increment(EC)) {
+        if (error(EC)) break;
+
+        bool Contained = false;
+        if (SecI->containsSymbol(*SymI, Contained) || !Contained)
+          continue;
+
+        printSymbol(SymI);
+      }
+    }
+
+    if (opts::SectionData) {
+      StringRef Data;
+      if (error(SecI->getContents(Data))) break;
+
+      W.printBinaryBlock("SectionData", Data);
+    }
+  }
+}
+
+void MachODumper::printRelocations() {
+  ListScope D(W, "Relocations");
+
+  error_code EC;
+  for (section_iterator SecI = Obj->begin_sections(),
+                        SecE = Obj->end_sections();
+                        SecI != SecE; SecI.increment(EC)) {
+    if (error(EC)) break;
+
+    StringRef Name;
+    if (error(SecI->getName(Name)))
+      continue;
+
+    bool PrintedGroup = false;
+    for (relocation_iterator RelI = SecI->begin_relocations(),
+                             RelE = SecI->end_relocations();
+                             RelI != RelE; RelI.increment(EC)) {
+      if (error(EC)) break;
+
+      if (!PrintedGroup) {
+        W.startLine() << "Section " << Name << " {\n";
+        W.indent();
+        PrintedGroup = true;
+      }
+
+      printRelocation(SecI, RelI);
+    }
+
+    if (PrintedGroup) {
+      W.unindent();
+      W.startLine() << "}\n";
+    }
+  }
+}
+
+void MachODumper::printRelocation(section_iterator SecI,
+                                  relocation_iterator RelI) {
+  uint64_t Offset;
+  SmallString<32> RelocName;
+  int64_t Info;
+  StringRef SymbolName;
+  SymbolRef Symbol;
+  if (error(RelI->getOffset(Offset))) return;
+  if (error(RelI->getTypeName(RelocName))) return;
+  if (error(RelI->getAdditionalInfo(Info))) return;
+  if (error(RelI->getSymbol(Symbol))) return;
+  if (error(Symbol.getName(SymbolName))) return;
+
+  raw_ostream& OS = W.startLine();
+  OS << W.hex(Offset)
+     << " " << RelocName
+     << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+     << " " << W.hex(Info)
+     << "\n";
+}
+
+void MachODumper::printSymbols() {
+  ListScope Group(W, "Symbols");
+
+  error_code EC;
+  for (symbol_iterator SymI = Obj->begin_symbols(),
+                       SymE = Obj->end_symbols();
+                       SymI != SymE; SymI.increment(EC)) {
+    if (error(EC)) break;
+
+    printSymbol(SymI);
+  }
+}
+
+void MachODumper::printDynamicSymbols() {
+  ListScope Group(W, "DynamicSymbols");
+}
+
+void MachODumper::printSymbol(symbol_iterator SymI) {
+  error_code EC;
+
+  StringRef SymbolName;
+  if (SymI->getName(SymbolName))
+    SymbolName = "";
+
+  const MachOObject *MachO = const_cast<MachOObjectFile*>(Obj)->getObject();
+
+  MachOSymbol Symbol;
+  getSymbol(MachO, SymI->getRawDataRefImpl(), Symbol);
+
+  StringRef SectionName;
+  section_iterator SecI(Obj->end_sections());
+  if (error(SymI->getSection(SecI)) ||
+      error(SecI->getName(SectionName)))
+    SectionName = "";
+
+  DictScope D(W, "Symbol");
+  W.printNumber("Name", SymbolName, Symbol.StringIndex);
+  if (Symbol.Type & N_STAB) {
+    W.printHex ("Type", "SymDebugTable", Symbol.Type);
+  } else {
+    W.printEnum("Type", Symbol.Type, makeArrayRef(MachOSymbolTypes));
+  }
+  W.printHex   ("Section", SectionName, Symbol.SectionIndex);
+  W.printEnum  ("RefType", static_cast<uint16_t>(Symbol.Flags & 0xF),
+                  makeArrayRef(MachOSymbolRefTypes));
+  W.printFlags ("Flags", static_cast<uint16_t>(Symbol.Flags & ~0xF),
+                  makeArrayRef(MachOSymbolFlags));
+  W.printHex   ("Value", Symbol.Value);
+}
+
+void MachODumper::printUnwindInfo() {
+  W.startLine() << "UnwindInfo not implemented.\n";
+}
diff --git a/tools/llvm-readobj/Makefile b/tools/llvm-readobj/Makefile
index a7a7de356303..1bb72955f08f 100644
--- a/tools/llvm-readobj/Makefile
+++ b/tools/llvm-readobj/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-readobj
-LINK_COMPONENTS := archive bitreader object
+LINK_COMPONENTS := archive bitreader object all-targets
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp
new file mode 100644
index 000000000000..61f511740a23
--- /dev/null
+++ b/tools/llvm-readobj/ObjDumper.cpp
@@ -0,0 +1,33 @@
+//===-- ObjDumper.cpp - Base dumper class -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements ObjDumper.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjDumper.h"
+
+#include "Error.h"
+#include "StreamWriter.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+ObjDumper::ObjDumper(StreamWriter& Writer)
+  : W(Writer) {
+}
+
+ObjDumper::~ObjDumper() {
+}
+
+} // namespace llvm
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
new file mode 100644
index 000000000000..8d191cbe07de
--- /dev/null
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -0,0 +1,60 @@
+//===-- ObjDumper.h -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_READOBJ_OBJDUMPER_H
+#define LLVM_READOBJ_OBJDUMPER_H
+
+namespace llvm {
+
+namespace object {
+  class ObjectFile;
+}
+
+class error_code;
+
+template<typename T>
+class OwningPtr;
+
+class StreamWriter;
+
+class ObjDumper {
+public:
+  ObjDumper(StreamWriter& Writer);
+  virtual ~ObjDumper();
+
+  virtual void printFileHeaders() = 0;
+  virtual void printSections() = 0;
+  virtual void printRelocations() = 0;
+  virtual void printSymbols() = 0;
+  virtual void printDynamicSymbols() = 0;
+  virtual void printUnwindInfo() = 0;
+
+  // Only implemented for ELF at this time.
+  virtual void printDynamicTable() { }
+  virtual void printNeededLibraries() { }
+
+protected:
+  StreamWriter& W;
+};
+
+error_code createCOFFDumper(const object::ObjectFile *Obj,
+                            StreamWriter& Writer,
+                            OwningPtr<ObjDumper> &Result);
+
+error_code createELFDumper(const object::ObjectFile *Obj,
+                           StreamWriter& Writer,
+                           OwningPtr<ObjDumper> &Result);
+
+error_code createMachODumper(const object::ObjectFile *Obj,
+                             StreamWriter& Writer,
+                             OwningPtr<ObjDumper> &Result);
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-readobj/StreamWriter.cpp b/tools/llvm-readobj/StreamWriter.cpp
new file mode 100644
index 000000000000..871811233a65
--- /dev/null
+++ b/tools/llvm-readobj/StreamWriter.cpp
@@ -0,0 +1,79 @@
+#include "StreamWriter.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+#include <cctype>
+
+using namespace llvm::support;
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const HexNumber& Value) {
+  uint64_t N = Value.Value;
+  // Zero is a special case.
+  if (N == 0)
+    return OS << "0x0";
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer + sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    uintptr_t X = N % 16;
+    *--CurPtr = (X < 10 ? '0' + X : 'A' + X - 10);
+    N /= 16;
+  }
+
+  OS << "0x";
+  return OS.write(CurPtr, EndPtr - CurPtr);
+}
+
+void StreamWriter::printBinaryImpl(StringRef Label, StringRef Str,
+                                   ArrayRef<uint8_t> Data, bool Block) {
+  if (Data.size() > 16)
+    Block = true;
+
+  if (Block) {
+    startLine() << Label;
+    if (Str.size() > 0)
+      OS << ": " << Str;
+    OS << " (\n";
+    for (size_t addr = 0, end = Data.size(); addr < end; addr += 16) {
+      startLine() << format("  %04" PRIX64 ": ", uint64_t(addr));
+      // Dump line of hex.
+      for (size_t i = 0; i < 16; ++i) {
+        if (i != 0 && i % 4 == 0)
+          OS << ' ';
+        if (addr + i < end)
+          OS << hexdigit((Data[addr + i] >> 4) & 0xF, false)
+             << hexdigit(Data[addr + i] & 0xF, false);
+        else
+          OS << "  ";
+      }
+      // Print ascii.
+      OS << "  |";
+      for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
+        if (std::isprint(Data[addr + i] & 0xFF))
+          OS << Data[addr + i];
+        else
+          OS << ".";
+      }
+      OS << "|\n";
+    }
+
+    startLine() << ")\n";
+  } else {
+    startLine() << Label << ":";
+    if (Str.size() > 0)
+      OS << " " << Str;
+    OS << " (";
+    for (size_t i = 0; i < Data.size(); ++i) {
+      if (i > 0)
+        OS << " ";
+
+      OS << format("%02X", static_cast<int>(Data[i]));
+    }
+    OS << ")\n";
+  }
+}
+
+} // namespace llvm
diff --git a/tools/llvm-readobj/StreamWriter.h b/tools/llvm-readobj/StreamWriter.h
new file mode 100644
index 000000000000..129f6e79336e
--- /dev/null
+++ b/tools/llvm-readobj/StreamWriter.h
@@ -0,0 +1,282 @@
+//===-- StreamWriter.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_READOBJ_STREAMWRITER_H
+#define LLVM_READOBJ_STREAMWRITER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvm::support;
+
+namespace llvm {
+
+template<typename T>
+struct EnumEntry {
+  StringRef Name;
+  T Value;
+};
+
+struct HexNumber {
+  // To avoid sign-extension we have to explicitly cast to the appropriate
+  // unsigned type. The overloads are here so that every type that is implicitly
+  // convertible to an integer (including enums and endian helpers) can be used
+  // without requiring type traits or call-site changes.
+  HexNumber(int8_t   Value) : Value(static_cast<uint8_t >(Value)) { }
+  HexNumber(int16_t  Value) : Value(static_cast<uint16_t>(Value)) { }
+  HexNumber(int32_t  Value) : Value(static_cast<uint32_t>(Value)) { }
+  HexNumber(int64_t  Value) : Value(static_cast<uint64_t>(Value)) { }
+  HexNumber(uint8_t  Value) : Value(Value) { }
+  HexNumber(uint16_t Value) : Value(Value) { }
+  HexNumber(uint32_t Value) : Value(Value) { }
+  HexNumber(uint64_t Value) : Value(Value) { }
+  uint64_t Value;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const HexNumber& Value);
+
+class StreamWriter {
+public:
+  StreamWriter(raw_ostream &OS)
+    : OS(OS)
+    , IndentLevel(0) {
+  }
+
+  void flush() {
+    OS.flush();
+  }
+
+  void indent(int Levels = 1) {
+    IndentLevel += Levels;
+  }
+
+  void unindent(int Levels = 1) {
+    IndentLevel = std::max(0, IndentLevel - Levels);
+  }
+
+  void printIndent() {
+    for (int i = 0; i < IndentLevel; ++i)
+      OS << "  ";
+  }
+
+  template<typename T>
+  HexNumber hex(T Value) {
+    return HexNumber(Value);
+  }
+
+  template<typename T, typename TEnum>
+  void printEnum(StringRef Label, T Value,
+                 ArrayRef<EnumEntry<TEnum> > EnumValues) {
+    StringRef Name;
+    bool Found = false;
+    for (size_t i = 0; i < EnumValues.size(); ++i) {
+      if (EnumValues[i].Value == Value) {
+        Name = EnumValues[i].Name;
+        Found = true;
+        break;
+      }
+    }
+
+    if (Found) {
+      startLine() << Label << ": " << Name << " (" << hex(Value) << ")\n";
+    } else {
+      startLine() << Label << ": " << hex(Value) << "\n";
+    }
+  }
+
+  template<typename T, typename TFlag>
+  void printFlags(StringRef Label, T Value, ArrayRef<EnumEntry<TFlag> > Flags,
+                  TFlag EnumMask = TFlag(0)) {
+    typedef EnumEntry<TFlag> FlagEntry;
+    typedef SmallVector<FlagEntry, 10> FlagVector;
+    FlagVector SetFlags;
+
+    for (typename ArrayRef<FlagEntry>::const_iterator I = Flags.begin(),
+                                                 E = Flags.end(); I != E; ++I) {
+      if (I->Value == 0)
+        continue;
+
+      bool IsEnum = (I->Value & EnumMask) != 0;
+      if ((!IsEnum && (Value & I->Value) == I->Value) ||
+          (IsEnum  && (Value & EnumMask) == I->Value)) {
+        SetFlags.push_back(*I);
+      }
+    }
+
+    std::sort(SetFlags.begin(), SetFlags.end(), &flagName<TFlag>);
+
+    startLine() << Label << " [ (" << hex(Value) << ")\n";
+    for (typename FlagVector::const_iterator I = SetFlags.begin(),
+                                             E = SetFlags.end();
+                                             I != E; ++I) {
+      startLine() << "  " << I->Name << " (" << hex(I->Value) << ")\n";
+    }
+    startLine() << "]\n";
+  }
+
+  template<typename T>
+  void printFlags(StringRef Label, T Value) {
+    startLine() << Label << " [ (" << hex(Value) << ")\n";
+    uint64_t Flag = 1;
+    uint64_t Curr = Value;
+    while (Curr > 0) {
+      if (Curr & 1)
+        startLine() << "  " << hex(Flag) << "\n";
+      Curr >>= 1;
+      Flag <<= 1;
+    }
+    startLine() << "]\n";
+  }
+
+  void printNumber(StringRef Label, uint64_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, uint32_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, uint16_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, uint8_t Value) {
+    startLine() << Label << ": " << unsigned(Value) << "\n";
+  }
+
+  void printNumber(StringRef Label, int64_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, int32_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, int16_t Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printNumber(StringRef Label, int8_t Value) {
+    startLine() << Label << ": " << int(Value) << "\n";
+  }
+
+  template<typename T>
+  void printHex(StringRef Label, T Value) {
+    startLine() << Label << ": " << hex(Value) << "\n";
+  }
+
+  template<typename T>
+  void printHex(StringRef Label, StringRef Str, T Value) {
+    startLine() << Label << ": " << Str << " (" << hex(Value) << ")\n";
+  }
+
+  void printString(StringRef Label, StringRef Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  void printString(StringRef Label, const std::string &Value) {
+    startLine() << Label << ": " << Value << "\n";
+  }
+
+  template<typename T>
+  void printNumber(StringRef Label, StringRef Str, T Value) {
+    startLine() << Label << ": " << Str << " (" << Value << ")\n";
+  }
+
+  void printBinary(StringRef Label, StringRef Str, ArrayRef<uint8_t> Value) {
+    printBinaryImpl(Label, Str, Value, false);
+  }
+
+  void printBinary(StringRef Label, StringRef Str, ArrayRef<char> Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, Str, V, false);
+  }
+
+  void printBinary(StringRef Label, ArrayRef<uint8_t> Value) {
+    printBinaryImpl(Label, StringRef(), Value, false);
+  }
+
+  void printBinary(StringRef Label, ArrayRef<char> Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, StringRef(), V, false);
+  }
+
+  void printBinary(StringRef Label, StringRef Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, StringRef(), V, false);
+  }
+
+  void printBinaryBlock(StringRef Label, StringRef Value) {
+    ArrayRef<uint8_t> V(reinterpret_cast<const uint8_t*>(Value.data()),
+                        Value.size());
+    printBinaryImpl(Label, StringRef(), V, true);
+  }
+
+  raw_ostream& startLine() {
+    printIndent();
+    return OS;
+  }
+
+  raw_ostream& getOStream() {
+    return OS;
+  }
+
+private:
+  template<typename T>
+  static bool flagName(const EnumEntry<T>& lhs, const EnumEntry<T>& rhs) {
+    return lhs.Name < rhs.Name;
+  }
+
+  void printBinaryImpl(StringRef Label, StringRef Str, ArrayRef<uint8_t> Value,
+                       bool Block);
+
+  raw_ostream &OS;
+  int IndentLevel;
+};
+
+struct DictScope {
+  DictScope(StreamWriter& W, StringRef N) : W(W) {
+    W.startLine() << N << " {\n";
+    W.indent();
+  }
+
+  ~DictScope() {
+    W.unindent();
+    W.startLine() << "}\n";
+  }
+
+  StreamWriter& W;
+};
+
+struct ListScope {
+  ListScope(StreamWriter& W, StringRef N) : W(W) {
+    W.startLine() << N << " [\n";
+    W.indent();
+  }
+
+  ~ListScope() {
+    W.unindent();
+    W.startLine() << "]\n";
+  }
+
+  StreamWriter& W;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 3be12899aea7..67c9a98f40f3 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -7,212 +7,277 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This program is a utility that works like traditional Unix "readelf",
-// except that it can handle any type of object file recognized by lib/Object.
+// This is a tool similar to readelf, except it works on multiple object file
+// formats. The main purpose of this tool is to provide detailed output suitable
+// for FileCheck.
 //
-// It makes use of the generic ObjectFile interface.
+// Flags should be similar to readelf where supported, but the output format
+// does not need to be identical. The point is to not make users learn yet
+// another set of flags.
 //
-// Caution: This utility is new, experimental, unsupported, and incomplete.
+// Output should be specialized for each format where appropriate.
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm-readobj.h"
+
+#include "Error.h"
+#include "ObjDumper.h"
+#include "StreamWriter.h"
+
+#include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/system_error.h"
+
+#include <string>
+
 
 using namespace llvm;
 using namespace llvm::object;
 
-static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input object>"), cl::init(""));
+namespace opts {
+  cl::list<std::string> InputFilenames(cl::Positional,
+    cl::desc("<input object files>"),
+    cl::ZeroOrMore);
 
-void DumpSymbolHeader() {
-  outs() << format("  %-32s", (const char*)"Name")
-         << format("  %-4s", (const char*)"Type")
-         << format("  %-16s", (const char*)"Address")
-         << format("  %-16s", (const char*)"Size")
-         << format("  %-16s", (const char*)"FileOffset")
-         << format("  %-26s", (const char*)"Flags")
-         << "\n";
+  // -file-headers, -h
+  cl::opt<bool> FileHeaders("file-headers",
+    cl::desc("Display file headers "));
+  cl::alias FileHeadersShort("h",
+    cl::desc("Alias for --file-headers"),
+    cl::aliasopt(FileHeaders));
+
+  // -sections, -s
+  cl::opt<bool> Sections("sections",
+    cl::desc("Display all sections."));
+  cl::alias SectionsShort("s",
+    cl::desc("Alias for --sections"),
+    cl::aliasopt(Sections));
+
+  // -section-relocations, -sr
+  cl::opt<bool> SectionRelocations("section-relocations",
+    cl::desc("Display relocations for each section shown."));
+  cl::alias SectionRelocationsShort("sr",
+    cl::desc("Alias for --section-relocations"),
+    cl::aliasopt(SectionRelocations));
+
+  // -section-symbols, -st
+  cl::opt<bool> SectionSymbols("section-symbols",
+    cl::desc("Display symbols for each section shown."));
+  cl::alias SectionSymbolsShort("st",
+    cl::desc("Alias for --section-symbols"),
+    cl::aliasopt(SectionSymbols));
+
+  // -section-data, -sd
+  cl::opt<bool> SectionData("section-data",
+    cl::desc("Display section data for each section shown."));
+  cl::alias SectionDataShort("sd",
+    cl::desc("Alias for --section-data"),
+    cl::aliasopt(SectionData));
+
+  // -relocations, -r
+  cl::opt<bool> Relocations("relocations",
+    cl::desc("Display the relocation entries in the file"));
+  cl::alias RelocationsShort("r",
+    cl::desc("Alias for --relocations"),
+    cl::aliasopt(Relocations));
+
+  // -symbols, -t
+  cl::opt<bool> Symbols("symbols",
+    cl::desc("Display the symbol table"));
+  cl::alias SymbolsShort("t",
+    cl::desc("Alias for --symbols"),
+    cl::aliasopt(Symbols));
+
+  // -dyn-symbols, -dt
+  cl::opt<bool> DynamicSymbols("dyn-symbols",
+    cl::desc("Display the dynamic symbol table"));
+  cl::alias DynamicSymbolsShort("dt",
+    cl::desc("Alias for --dyn-symbols"),
+    cl::aliasopt(DynamicSymbols));
+
+  // -unwind, -u
+  cl::opt<bool> UnwindInfo("unwind",
+    cl::desc("Display unwind information"));
+  cl::alias UnwindInfoShort("u",
+    cl::desc("Alias for --unwind"),
+    cl::aliasopt(UnwindInfo));
+
+  // -dynamic-table
+  cl::opt<bool> DynamicTable("dynamic-table",
+    cl::desc("Display the ELF .dynamic section table"));
+
+  // -needed-libs
+  cl::opt<bool> NeededLibraries("needed-libs",
+    cl::desc("Display the needed libraries"));
+} // namespace opts
+
+namespace llvm {
+
+bool error(error_code EC) {
+  if (!EC)
+    return false;
+
+  outs() << "\nError reading file: " << EC.message() << ".\n";
+  outs().flush();
+  return true;
 }
 
-const char *GetTypeStr(SymbolRef::Type Type) {
-  switch (Type) {
-  case SymbolRef::ST_Unknown: return "?";
-  case SymbolRef::ST_Data: return "DATA";
-  case SymbolRef::ST_Debug: return "DBG";
-  case SymbolRef::ST_File: return "FILE";
-  case SymbolRef::ST_Function: return "FUNC";
-  case SymbolRef::ST_Other: return "-";
-  }
-  return "INV";
+bool relocAddressLess(RelocationRef a, RelocationRef b) {
+  uint64_t a_addr, b_addr;
+  if (error(a.getAddress(a_addr))) return false;
+  if (error(b.getAddress(b_addr))) return false;
+  return a_addr < b_addr;
 }
 
-std::string GetFlagStr(uint32_t Flags) {
-  std::string result;
-  if (Flags & SymbolRef::SF_Undefined)
-    result += "undef,";
-  if (Flags & SymbolRef::SF_Global)
-    result += "global,";
-  if (Flags & SymbolRef::SF_Weak)
-    result += "weak,";
-  if (Flags & SymbolRef::SF_Absolute)
-    result += "absolute,";
-  if (Flags & SymbolRef::SF_ThreadLocal)
-    result += "threadlocal,";
-  if (Flags & SymbolRef::SF_Common)
-    result += "common,";
-  if (Flags & SymbolRef::SF_FormatSpecific)
-    result += "formatspecific,";
-
-  // Remove trailing comma
-  if (result.size() > 0) {
-    result.erase(result.size() - 1);
-  }
-  return result;
+} // namespace llvm
+
+
+static void reportError(StringRef Input, error_code EC) {
+  if (Input == "-")
+    Input = "<stdin>";
+
+  errs() << Input << ": " << EC.message() << "\n";
+  errs().flush();
 }
 
-void DumpSymbol(const SymbolRef &Sym, const ObjectFile *obj, bool IsDynamic) {
-    StringRef Name;
-    SymbolRef::Type Type;
-    uint32_t Flags;
-    uint64_t Address;
-    uint64_t Size;
-    uint64_t FileOffset;
-    Sym.getName(Name);
-    Sym.getAddress(Address);
-    Sym.getSize(Size);
-    Sym.getFileOffset(FileOffset);
-    Sym.getType(Type);
-    Sym.getFlags(Flags);
-    std::string FullName = Name;
-
-    // If this is a dynamic symbol from an ELF object, append
-    // the symbol's version to the name.
-    if (IsDynamic && obj->isELF()) {
-      StringRef Version;
-      bool IsDefault;
-      GetELFSymbolVersion(obj, Sym, Version, IsDefault);
-      if (!Version.empty()) {
-        FullName += (IsDefault ? "@@" : "@");
-        FullName += Version;
-      }
-    }
+static void reportError(StringRef Input, StringRef Message) {
+  if (Input == "-")
+    Input = "<stdin>";
 
-    // format() can't handle StringRefs
-    outs() << format("  %-32s", FullName.c_str())
-           << format("  %-4s", GetTypeStr(Type))
-           << format("  %16" PRIx64, Address)
-           << format("  %16" PRIx64, Size)
-           << format("  %16" PRIx64, FileOffset)
-           << "  " << GetFlagStr(Flags)
-           << "\n";
+  errs() << Input << ": " << Message << "\n";
 }
 
+/// @brief Creates an format-specific object file dumper.
+static error_code createDumper(const ObjectFile *Obj,
+                               StreamWriter &Writer,
+                               OwningPtr<ObjDumper> &Result) {
+  if (!Obj)
+    return readobj_error::unsupported_file_format;
 
-// Iterate through the normal symbols in the ObjectFile
-void DumpSymbols(const ObjectFile *obj) {
-  error_code ec;
-  uint32_t count = 0;
-  outs() << "Symbols:\n";
-  symbol_iterator it = obj->begin_symbols();
-  symbol_iterator ie = obj->end_symbols();
-  while (it != ie) {
-    DumpSymbol(*it, obj, false);
-    it.increment(ec);
-    if (ec)
-      report_fatal_error("Symbol iteration failed");
-    ++count;
-  }
-  outs() << "  Total: " << count << "\n\n";
+  if (Obj->isCOFF())
+    return createCOFFDumper(Obj, Writer, Result);
+  if (Obj->isELF())
+    return createELFDumper(Obj, Writer, Result);
+  if (Obj->isMachO())
+    return createMachODumper(Obj, Writer, Result);
+
+  return readobj_error::unsupported_obj_file_format;
 }
 
-// Iterate through the dynamic symbols in the ObjectFile.
-void DumpDynamicSymbols(const ObjectFile *obj) {
-  error_code ec;
-  uint32_t count = 0;
-  outs() << "Dynamic Symbols:\n";
-  symbol_iterator it = obj->begin_dynamic_symbols();
-  symbol_iterator ie = obj->end_dynamic_symbols();
-  while (it != ie) {
-    DumpSymbol(*it, obj, true);
-    it.increment(ec);
-    if (ec)
-      report_fatal_error("Symbol iteration failed");
-    ++count;
+
+/// @brief Dumps the specified object file.
+static void dumpObject(const ObjectFile *Obj) {
+  StreamWriter Writer(outs());
+  OwningPtr<ObjDumper> Dumper;
+  if (error_code EC = createDumper(Obj, Writer, Dumper)) {
+    reportError(Obj->getFileName(), EC);
+    return;
   }
-  outs() << "  Total: " << count << "\n\n";
-}
 
-void DumpLibrary(const LibraryRef &lib) {
-  StringRef path;
-  lib.getPath(path);
-  outs() << "  " << path << "\n";
+  outs() << '\n';
+  outs() << "File: " << Obj->getFileName() << "\n";
+  outs() << "Format: " << Obj->getFileFormatName() << "\n";
+  outs() << "Arch: "
+         << Triple::getArchTypeName((llvm::Triple::ArchType)Obj->getArch())
+         << "\n";
+  outs() << "AddressSize: " << (8*Obj->getBytesInAddress()) << "bit\n";
+  if (Obj->isELF())
+    outs() << "LoadName: " << Obj->getLoadName() << "\n";
+
+  if (opts::FileHeaders)
+    Dumper->printFileHeaders();
+  if (opts::Sections)
+    Dumper->printSections();
+  if (opts::Relocations)
+    Dumper->printRelocations();
+  if (opts::Symbols)
+    Dumper->printSymbols();
+  if (opts::DynamicSymbols)
+    Dumper->printDynamicSymbols();
+  if (opts::UnwindInfo)
+    Dumper->printUnwindInfo();
+  if (opts::DynamicTable)
+    Dumper->printDynamicTable();
+  if (opts::NeededLibraries)
+    Dumper->printNeededLibraries();
 }
 
-// Iterate through needed libraries
-void DumpLibrariesNeeded(const ObjectFile *obj) {
-  error_code ec;
-  uint32_t count = 0;
-  library_iterator it = obj->begin_libraries_needed();
-  library_iterator ie = obj->end_libraries_needed();
-  outs() << "Libraries needed:\n";
-  while (it != ie) {
-    DumpLibrary(*it);
-    it.increment(ec);
-    if (ec)
-      report_fatal_error("Needed libraries iteration failed");
-    ++count;
+
+/// @brief Dumps each object file in \a Arc;
+static void dumpArchive(const Archive *Arc) {
+  for (Archive::child_iterator ArcI = Arc->begin_children(),
+                               ArcE = Arc->end_children();
+                               ArcI != ArcE; ++ArcI) {
+    OwningPtr<Binary> child;
+    if (error_code EC = ArcI->getAsBinary(child)) {
+      // Ignore non-object files.
+      if (EC != object_error::invalid_file_type)
+        reportError(Arc->getFileName(), EC.message());
+      continue;
+    }
+
+    if (ObjectFile *Obj = dyn_cast<ObjectFile>(child.get()))
+      dumpObject(Obj);
+    else
+      reportError(Arc->getFileName(), readobj_error::unrecognized_file_format);
   }
-  outs() << "  Total: " << count << "\n\n";
 }
 
-void DumpHeaders(const ObjectFile *obj) {
-  outs() << "File Format : " << obj->getFileFormatName() << "\n";
-  outs() << "Arch        : "
-         << Triple::getArchTypeName((llvm::Triple::ArchType)obj->getArch())
-         << "\n";
-  outs() << "Address Size: " << (8*obj->getBytesInAddress()) << " bits\n";
-  outs() << "Load Name   : " << obj->getLoadName() << "\n";
-  outs() << "\n";
+
+/// @brief Opens \a File and dumps it.
+static void dumpInput(StringRef File) {
+  // If file isn't stdin, check that it exists.
+  if (File != "-" && !sys::fs::exists(File)) {
+    reportError(File, readobj_error::file_not_found);
+    return;
+  }
+
+  // Attempt to open the binary.
+  OwningPtr<Binary> Binary;
+  if (error_code EC = createBinary(File, Binary)) {
+    reportError(File, EC);
+    return;
+  }
+
+  if (Archive *Arc = dyn_cast<Archive>(Binary.get()))
+    dumpArchive(Arc);
+  else if (ObjectFile *Obj = dyn_cast<ObjectFile>(Binary.get()))
+    dumpObject(Obj);
+  else
+    reportError(File, readobj_error::unrecognized_file_format);
 }
 
-int main(int argc, char** argv) {
-  error_code ec;
+
+int main(int argc, const char *argv[]) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;
 
-  cl::ParseCommandLineOptions(argc, argv,
-                              "LLVM Object Reader\n");
+  // Initialize targets.
+  llvm::InitializeAllTargetInfos();
 
-  if (InputFilename.empty()) {
-    errs() << "Please specify an input filename\n";
-    return 1;
-  }
+  // Register the target printer for --version.
+  cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
-  // Open the object file
-  OwningPtr<MemoryBuffer> File;
-  if (MemoryBuffer::getFile(InputFilename, File)) {
-    errs() << InputFilename << ": Open failed\n";
-    return 1;
-  }
+  cl::ParseCommandLineOptions(argc, argv, "LLVM Object Reader\n");
 
-  ObjectFile *obj = ObjectFile::createObjectFile(File.take());
-  if (!obj) {
-    errs() << InputFilename << ": Object type not recognized\n";
-  }
+  // Default to stdin if no filename is specified.
+  if (opts::InputFilenames.size() == 0)
+    opts::InputFilenames.push_back("-");
+
+  std::for_each(opts::InputFilenames.begin(), opts::InputFilenames.end(),
+                dumpInput);
 
-  DumpHeaders(obj);
-  DumpSymbols(obj);
-  DumpDynamicSymbols(obj);
-  DumpLibrariesNeeded(obj);
   return 0;
 }
-
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
new file mode 100644
index 000000000000..be18268a7f64
--- /dev/null
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -0,0 +1,45 @@
+//===-- llvm-readobj.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_READ_OBJ_H
+#define LLVM_TOOLS_READ_OBJ_H
+
+#include "llvm/Support/CommandLine.h"
+#include <string>
+
+namespace llvm {
+  namespace object {
+    class RelocationRef;
+  }
+
+  class error_code;
+
+  // Various helper functions.
+  bool error(error_code ec);
+  bool relocAddressLess(object::RelocationRef A,
+                        object::RelocationRef B);
+} // namespace llvm
+
+namespace opts {
+  extern llvm::cl::list<std::string> InputFilenames;
+  extern llvm::cl::opt<bool> FileHeaders;
+  extern llvm::cl::opt<bool> Sections;
+  extern llvm::cl::opt<bool> SectionRelocations;
+  extern llvm::cl::opt<bool> SectionSymbols;
+  extern llvm::cl::opt<bool> SectionData;
+  extern llvm::cl::opt<bool> Relocations;
+  extern llvm::cl::opt<bool> Symbols;
+  extern llvm::cl::opt<bool> DynamicSymbols;
+  extern llvm::cl::opt<bool> UnwindInfo;
+} // namespace opts
+
+#define LLVM_READOBJ_ENUM_ENT(ns, enum) \
+  { #enum, ns::enum }
+
+#endif
diff --git a/tools/llvm-rtdyld/CMakeLists.txt b/tools/llvm-rtdyld/CMakeLists.txt
index 17e2c3e2d5bd..8d161d366da7 100644
--- a/tools/llvm-rtdyld/CMakeLists.txt
+++ b/tools/llvm-rtdyld/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC object RuntimeDyld JIT)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC object RuntimeDyld JIT debuginfo)
 
 add_llvm_tool(llvm-rtdyld
   llvm-rtdyld.cpp
diff --git a/tools/llvm-rtdyld/Makefile b/tools/llvm-rtdyld/Makefile
index 30fbee0979bd..fabdd683a997 100644
--- a/tools/llvm-rtdyld/Makefile
+++ b/tools/llvm-rtdyld/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-rtdyld
-LINK_COMPONENTS := all-targets support MC object RuntimeDyld JIT
+LINK_COMPONENTS := all-targets support MC object RuntimeDyld JIT debuginfo
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 7b5bd0388d88..4d8d345894db 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Object/MachOObject.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -31,7 +32,8 @@ InputFileList(cl::Positional, cl::ZeroOrMore,
               cl::desc("<input file>"));
 
 enum ActionType {
-  AC_Execute
+  AC_Execute,
+  AC_PrintLineInfo
 };
 
 static cl::opt<ActionType>
@@ -39,6 +41,8 @@ Action(cl::desc("Action to perform:"),
        cl::init(AC_Execute),
        cl::values(clEnumValN(AC_Execute, "execute",
                              "Load, link, and execute the inputs."),
+                  clEnumValN(AC_PrintLineInfo, "printline",
+                             "Load, link, and print line information for each function."),
                   clEnumValEnd));
 
 static cl::opt<std::string>
@@ -58,13 +62,15 @@ public:
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID);
   uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID);
+                               unsigned SectionID, bool IsReadOnly);
 
   virtual void *getPointerToNamedFunction(const std::string &Name,
                                           bool AbortOnFailure = true) {
     return 0;
   }
 
+  bool applyPermissions(std::string *ErrMsg) { return false; }
+
   // Invalidate instruction cache for sections with execute permissions.
   // Some platforms with separate data cache and instruction cache require
   // explicit cache flush, otherwise JIT code manipulations (like resolved
@@ -82,7 +88,8 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
 
 uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
                                                    unsigned Alignment,
-                                                   unsigned SectionID) {
+                                                   unsigned SectionID,
+                                                   bool IsReadOnly) {
   sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, 0, 0);
   DataMemory.push_back(MB);
   return (uint8_t*)MB.base();
@@ -111,6 +118,66 @@ static int Error(const Twine &Msg) {
 
 /* *** */
 
+static int printLineInfoForInput() {
+  // If we don't have any input files, read from stdin.
+  if (!InputFileList.size())
+    InputFileList.push_back("-");
+  for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+    // Instantiate a dynamic linker.
+    TrivialMemoryManager *MemMgr = new TrivialMemoryManager;
+    RuntimeDyld Dyld(MemMgr);
+
+    // Load the input memory buffer.
+    OwningPtr<MemoryBuffer> InputBuffer;
+    OwningPtr<ObjectImage>  LoadedObject;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFileList[i],
+                                                     InputBuffer))
+      return Error("unable to read input: '" + ec.message() + "'");
+
+    // Load the object file
+    LoadedObject.reset(Dyld.loadObject(new ObjectBuffer(InputBuffer.take())));
+    if (!LoadedObject) {
+      return Error(Dyld.getErrorString());
+    }
+
+    // Resolve all the relocations we can.
+    Dyld.resolveRelocations();
+
+    OwningPtr<DIContext> Context(DIContext::getDWARFContext(LoadedObject->getObjectFile()));
+
+    // Use symbol info to iterate functions in the object.
+    error_code ec;
+    for (object::symbol_iterator I = LoadedObject->begin_symbols(),
+                                 E = LoadedObject->end_symbols();
+                          I != E && !ec;
+                          I.increment(ec)) {
+      object::SymbolRef::Type SymType;
+      if (I->getType(SymType)) continue;
+      if (SymType == object::SymbolRef::ST_Function) {
+        StringRef  Name;
+        uint64_t   Addr;
+        uint64_t   Size;
+        if (I->getName(Name)) continue;
+        if (I->getAddress(Addr)) continue;
+        if (I->getSize(Size)) continue;
+
+        outs() << "Function: " << Name << ", Size = " << Size << "\n";
+
+        DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+        DILineInfoTable::iterator  Begin = Lines.begin();
+        DILineInfoTable::iterator  End = Lines.end();
+        for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+          outs() << "  Line info @ " << It->first - Addr << ": "
+                 << It->second.getFileName()
+                 << ", line:" << It->second.getLine() << "\n";
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
 static int executeInput() {
   // Instantiate a dynamic linker.
   TrivialMemoryManager *MemMgr = new TrivialMemoryManager;
@@ -177,5 +244,7 @@ int main(int argc, char **argv) {
   switch (Action) {
   case AC_Execute:
     return executeInput();
+  case AC_PrintLineInfo:
+    return printLineInfoForInput();
   }
 }
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 462da40afb63..3de6605285bf 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -23,8 +23,8 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <string>
diff --git a/tools/llvm-stress/Makefile b/tools/llvm-stress/Makefile
index 90d57c3fa98a..8767cbe41767 100644
--- a/tools/llvm-stress/Makefile
+++ b/tools/llvm-stress/Makefile
@@ -10,7 +10,7 @@
 LEVEL := ../..
 TOOLNAME := llvm-stress
 LINK_COMPONENTS := object
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo
+LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts ipo
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index 8473d94731a5..fbda1b7b6713 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -11,25 +11,25 @@
 // different components in LLVM.
 //
 //===----------------------------------------------------------------------===//
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Constants.h"
-#include "llvm/Instruction.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Support/PassNameParser.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include <memory>
-#include <sstream>
+#include <algorithm>
 #include <set>
+#include <sstream>
 #include <vector>
-#include <algorithm>
 using namespace llvm;
 
 static cl::opt<unsigned> SeedCL("seed",
@@ -379,9 +379,7 @@ struct ConstModifier: public Modifier {
         RandomBits[i] = Ran->Rand64();
 
       APInt RandomInt(Ty->getPrimitiveSizeInBits(), makeArrayRef(RandomBits));
-
-      bool isIEEE = !Ty->isX86_FP80Ty() && !Ty->isPPC_FP128Ty();
-      APFloat RandomFloat(RandomInt, isIEEE);
+      APFloat RandomFloat(Ty->getFltSemantics(), RandomInt);
 
       if (Ran->Rand() & 1)
         return PT->push_back(ConstantFP::getNullValue(Ty));
@@ -624,15 +622,15 @@ void FillFunction(Function *F, Random &R) {
 
   // List of modifiers which add new random instructions.
   std::vector<Modifier*> Modifiers;
-  std::auto_ptr<Modifier> LM(new LoadModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> SM(new StoreModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> EE(new ExtractElementModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> SHM(new ShuffModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> IE(new InsertElementModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> BM(new BinModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> CM(new CastModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> SLM(new SelectModifier(BB, &PT, &R));
-  std::auto_ptr<Modifier> PM(new CmpModifier(BB, &PT, &R));
+  OwningPtr<Modifier> LM(new LoadModifier(BB, &PT, &R));
+  OwningPtr<Modifier> SM(new StoreModifier(BB, &PT, &R));
+  OwningPtr<Modifier> EE(new ExtractElementModifier(BB, &PT, &R));
+  OwningPtr<Modifier> SHM(new ShuffModifier(BB, &PT, &R));
+  OwningPtr<Modifier> IE(new InsertElementModifier(BB, &PT, &R));
+  OwningPtr<Modifier> BM(new BinModifier(BB, &PT, &R));
+  OwningPtr<Modifier> CM(new CastModifier(BB, &PT, &R));
+  OwningPtr<Modifier> SLM(new SelectModifier(BB, &PT, &R));
+  OwningPtr<Modifier> PM(new CmpModifier(BB, &PT, &R));
   Modifiers.push_back(LM.get());
   Modifiers.push_back(SM.get());
   Modifiers.push_back(EE.get());
@@ -686,7 +684,7 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n");
   llvm_shutdown_obj Y;
 
-  std::auto_ptr<Module> M(new Module("/tmp/autogen.bc", getGlobalContext()));
+  OwningPtr<Module> M(new Module("/tmp/autogen.bc", getGlobalContext()));
   Function *F = GenEmptyFunction(M.get());
 
   // Pick an initial seed value
diff --git a/tools/llvm-symbolizer/CMakeLists.txt b/tools/llvm-symbolizer/CMakeLists.txt
new file mode 100644
index 000000000000..5967b891f415
--- /dev/null
+++ b/tools/llvm-symbolizer/CMakeLists.txt
@@ -0,0 +1,14 @@
+# FIXME: As we plan to execute llvm-symbolizer binary from compiler-rt
+# libraries, it has to be compiled for all supported targets (x86_64, i386 etc).
+# This means that we need LLVM libraries to be compiled for these
+# targets as well. Currently, there is no support for such a build strategy.
+
+set(LLVM_LINK_COMPONENTS
+  DebugInfo
+  Object
+  )
+
+add_llvm_tool(llvm-symbolizer
+  LLVMSymbolize.cpp
+  llvm-symbolizer.cpp
+  )
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp
new file mode 100644
index 000000000000..29d91a0e92a3
--- /dev/null
+++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp
@@ -0,0 +1,292 @@
+//===-- LLVMSymbolize.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation for LLVM symbolization library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMSymbolize.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Path.h"
+
+#include <sstream>
+
+namespace llvm {
+namespace symbolize {
+
+static bool error(error_code ec) {
+  if (!ec)
+    return false;
+  errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
+  return true;
+}
+
+static uint32_t
+getDILineInfoSpecifierFlags(const LLVMSymbolizer::Options &Opts) {
+  uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo |
+                   llvm::DILineInfoSpecifier::AbsoluteFilePath;
+  if (Opts.PrintFunctions)
+    Flags |= llvm::DILineInfoSpecifier::FunctionName;
+  return Flags;
+}
+
+static void patchFunctionNameInDILineInfo(const std::string &NewFunctionName,
+                                          DILineInfo &LineInfo) {
+  std::string FileName = LineInfo.getFileName();
+  LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName),
+                        LineInfo.getLine(), LineInfo.getColumn());
+}
+
+ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
+    : Module(Obj), DebugInfoContext(DICtx) {
+  error_code ec;
+  for (symbol_iterator si = Module->begin_symbols(), se = Module->end_symbols();
+       si != se; si.increment(ec)) {
+    if (error(ec))
+      return;
+    SymbolRef::Type SymbolType;
+    if (error(si->getType(SymbolType)))
+      continue;
+    if (SymbolType != SymbolRef::ST_Function &&
+        SymbolType != SymbolRef::ST_Data)
+      continue;
+    uint64_t SymbolAddress;
+    if (error(si->getAddress(SymbolAddress)) ||
+        SymbolAddress == UnknownAddressOrSize)
+      continue;
+    uint64_t SymbolSize;
+    if (error(si->getSize(SymbolSize)) || SymbolSize == UnknownAddressOrSize)
+      continue;
+    StringRef SymbolName;
+    if (error(si->getName(SymbolName)))
+      continue;
+    // FIXME: If a function has alias, there are two entries in symbol table
+    // with same address size. Make sure we choose the correct one.
+    SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
+    SymbolDesc SD = { SymbolAddress, SymbolAddress + SymbolSize };
+    M.insert(std::make_pair(SD, SymbolName));
+  }
+}
+
+bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
+                                        std::string &Name, uint64_t &Addr,
+                                        uint64_t &Size) const {
+  const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects;
+  SymbolDesc SD = { Address, Address + 1 };
+  SymbolMapTy::const_iterator it = M.find(SD);
+  if (it == M.end())
+    return false;
+  if (Address < it->first.Addr || Address >= it->first.AddrEnd)
+    return false;
+  Name = it->second.str();
+  Addr = it->first.Addr;
+  Size = it->first.AddrEnd - it->first.Addr;
+  return true;
+}
+
+DILineInfo ModuleInfo::symbolizeCode(
+    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
+  DILineInfo LineInfo;
+  if (DebugInfoContext) {
+    LineInfo = DebugInfoContext->getLineInfoForAddress(
+        ModuleOffset, getDILineInfoSpecifierFlags(Opts));
+  }
+  // Override function name from symbol table if necessary.
+  if (Opts.PrintFunctions && Opts.UseSymbolTable) {
+    std::string FunctionName;
+    uint64_t Start, Size;
+    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+                               FunctionName, Start, Size)) {
+      patchFunctionNameInDILineInfo(FunctionName, LineInfo);
+    }
+  }
+  return LineInfo;
+}
+
+DIInliningInfo ModuleInfo::symbolizeInlinedCode(
+    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
+  DIInliningInfo InlinedContext;
+  if (DebugInfoContext) {
+    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
+        ModuleOffset, getDILineInfoSpecifierFlags(Opts));
+  }
+  // Make sure there is at least one frame in context.
+  if (InlinedContext.getNumberOfFrames() == 0) {
+    InlinedContext.addFrame(DILineInfo());
+  }
+  // Override the function name in lower frame with name from symbol table.
+  if (Opts.PrintFunctions && Opts.UseSymbolTable) {
+    DIInliningInfo PatchedInlinedContext;
+    for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
+      DILineInfo LineInfo = InlinedContext.getFrame(i);
+      if (i == n - 1) {
+        std::string FunctionName;
+        uint64_t Start, Size;
+        if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+                                   FunctionName, Start, Size)) {
+          patchFunctionNameInDILineInfo(FunctionName, LineInfo);
+        }
+      }
+      PatchedInlinedContext.addFrame(LineInfo);
+    }
+    InlinedContext = PatchedInlinedContext;
+  }
+  return InlinedContext;
+}
+
+bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
+                               uint64_t &Start, uint64_t &Size) const {
+  return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
+                                Size);
+}
+
+const char LLVMSymbolizer::kBadString[] = "??";
+
+std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
+                                          uint64_t ModuleOffset) {
+  ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
+  if (Info == 0)
+    return printDILineInfo(DILineInfo());
+  if (Opts.PrintInlining) {
+    DIInliningInfo InlinedContext =
+        Info->symbolizeInlinedCode(ModuleOffset, Opts);
+    uint32_t FramesNum = InlinedContext.getNumberOfFrames();
+    assert(FramesNum > 0);
+    std::string Result;
+    for (uint32_t i = 0; i < FramesNum; i++) {
+      DILineInfo LineInfo = InlinedContext.getFrame(i);
+      Result += printDILineInfo(LineInfo);
+    }
+    return Result;
+  }
+  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
+  return printDILineInfo(LineInfo);
+}
+
+std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
+                                          uint64_t ModuleOffset) {
+  std::string Name = kBadString;
+  uint64_t Start = 0;
+  uint64_t Size = 0;
+  if (Opts.UseSymbolTable) {
+    if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
+      if (Info->symbolizeData(ModuleOffset, Name, Start, Size))
+        DemangleName(Name);
+    }
+  }
+  std::stringstream ss;
+  ss << Name << "\n" << Start << " " << Size << "\n";
+  return ss.str();
+}
+
+void LLVMSymbolizer::flush() {
+  DeleteContainerSeconds(Modules);
+}
+
+// Returns true if the object endianness is known.
+static bool getObjectEndianness(const ObjectFile *Obj, bool &IsLittleEndian) {
+  // FIXME: Implement this when libLLVMObject allows to do it easily.
+  IsLittleEndian = true;
+  return true;
+}
+
+static ObjectFile *getObjectFile(const std::string &Path) {
+  OwningPtr<MemoryBuffer> Buff;
+  if (error_code ec = MemoryBuffer::getFile(Path, Buff))
+    error(ec);
+  return ObjectFile::createObjectFile(Buff.take());
+}
+
+static std::string getDarwinDWARFResourceForModule(const std::string &Path) {
+  StringRef Basename = sys::path::filename(Path);
+  const std::string &DSymDirectory = Path + ".dSYM";
+  SmallString<16> ResourceName = StringRef(DSymDirectory);
+  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
+  sys::path::append(ResourceName, Basename);
+  return ResourceName.str();
+}
+
+ModuleInfo *
+LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
+  ModuleMapTy::iterator I = Modules.find(ModuleName);
+  if (I != Modules.end())
+    return I->second;
+
+  ObjectFile *Obj = getObjectFile(ModuleName);
+  if (Obj == 0) {
+    // Module name doesn't point to a valid object file.
+    Modules.insert(make_pair(ModuleName, (ModuleInfo *)0));
+    return 0;
+  }
+
+  DIContext *Context = 0;
+  bool IsLittleEndian;
+  if (getObjectEndianness(Obj, IsLittleEndian)) {
+    // On Darwin we may find DWARF in separate object file in
+    // resource directory.
+    ObjectFile *DbgObj = Obj;
+    if (isa<MachOObjectFile>(Obj)) {
+      const std::string &ResourceName =
+          getDarwinDWARFResourceForModule(ModuleName);
+      ObjectFile *ResourceObj = getObjectFile(ResourceName);
+      if (ResourceObj != 0)
+        DbgObj = ResourceObj;
+    }
+    Context = DIContext::getDWARFContext(DbgObj);
+    assert(Context);
+  }
+
+  ModuleInfo *Info = new ModuleInfo(Obj, Context);
+  Modules.insert(make_pair(ModuleName, Info));
+  return Info;
+}
+
+std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
+  // By default, DILineInfo contains "<invalid>" for function/filename it
+  // cannot fetch. We replace it to "??" to make our output closer to addr2line.
+  static const std::string kDILineInfoBadString = "<invalid>";
+  std::stringstream Result;
+  if (Opts.PrintFunctions) {
+    std::string FunctionName = LineInfo.getFunctionName();
+    if (FunctionName == kDILineInfoBadString)
+      FunctionName = kBadString;
+    DemangleName(FunctionName);
+    Result << FunctionName << "\n";
+  }
+  std::string Filename = LineInfo.getFileName();
+  if (Filename == kDILineInfoBadString)
+    Filename = kBadString;
+  Result << Filename << ":" << LineInfo.getLine() << ":" << LineInfo.getColumn()
+         << "\n";
+  return Result.str();
+}
+
+#if !defined(_MSC_VER)
+// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
+extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
+                                size_t *length, int *status);
+#endif
+
+void LLVMSymbolizer::DemangleName(std::string &Name) const {
+#if !defined(_MSC_VER)
+  if (!Opts.Demangle)
+    return;
+  int status = 0;
+  char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status);
+  if (status != 0)
+    return;
+  Name = DemangledName;
+  free(DemangledName);
+#endif
+}
+
+} // namespace symbolize
+} // namespace llvm
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h
new file mode 100644
index 000000000000..0733dfbbc52e
--- /dev/null
+++ b/tools/llvm-symbolizer/LLVMSymbolize.h
@@ -0,0 +1,98 @@
+//===-- LLVMSymbolize.h ----------------------------------------- C++ -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Header for LLVM symbolization library.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_SYMBOLIZE_H
+#define LLVM_SYMBOLIZE_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <map>
+#include <string>
+
+namespace llvm {
+
+using namespace object;
+
+namespace symbolize {
+
+class ModuleInfo;
+
+class LLVMSymbolizer {
+public:
+  struct Options {
+    bool UseSymbolTable : 1;
+    bool PrintFunctions : 1;
+    bool PrintInlining : 1;
+    bool Demangle : 1;
+    Options(bool UseSymbolTable = true, bool PrintFunctions = true,
+            bool PrintInlining = true, bool Demangle = true)
+        : UseSymbolTable(UseSymbolTable), PrintFunctions(PrintFunctions),
+          PrintInlining(PrintInlining), Demangle(Demangle) {
+    }
+  };
+
+  LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
+
+  // Returns the result of symbolization for module name/offset as
+  // a string (possibly containing newlines).
+  std::string
+  symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset);
+  std::string
+  symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset);
+  void flush();
+private:
+  ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName);
+  std::string printDILineInfo(DILineInfo LineInfo) const;
+  void DemangleName(std::string &Name) const;
+
+  typedef std::map<std::string, ModuleInfo *> ModuleMapTy;
+  ModuleMapTy Modules;
+  Options Opts;
+  static const char kBadString[];
+};
+
+class ModuleInfo {
+public:
+  ModuleInfo(ObjectFile *Obj, DIContext *DICtx);
+
+  DILineInfo symbolizeCode(uint64_t ModuleOffset,
+                           const LLVMSymbolizer::Options &Opts) const;
+  DIInliningInfo symbolizeInlinedCode(
+      uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const;
+  bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start,
+                     uint64_t &Size) const;
+
+private:
+  bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
+                              std::string &Name, uint64_t &Addr,
+                              uint64_t &Size) const;
+  OwningPtr<ObjectFile> Module;
+  OwningPtr<DIContext> DebugInfoContext;
+
+  struct SymbolDesc {
+    uint64_t Addr;
+    uint64_t AddrEnd;
+    friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
+      return s1.AddrEnd <= s2.Addr;
+    }
+  };
+  typedef std::map<SymbolDesc, StringRef> SymbolMapTy;
+  SymbolMapTy Functions;
+  SymbolMapTy Objects;
+};
+
+} // namespace symbolize
+} // namespace llvm
+
+#endif // LLVM_SYMBOLIZE_H
diff --git a/tools/llvm-symbolizer/Makefile b/tools/llvm-symbolizer/Makefile
new file mode 100644
index 000000000000..5ac83a58137d
--- /dev/null
+++ b/tools/llvm-symbolizer/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-symbolizer/Makefile ----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-symbolizer
+LINK_COMPONENTS := DebugInfo Object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
new file mode 100644
index 000000000000..0cafffaf7126
--- /dev/null
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -0,0 +1,119 @@
+//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility works much like "addr2line". It is able of transforming
+// tuples (module name, module offset) to code locations (function name,
+// file, line number, column number). It is targeted for compiler-rt tools
+// (especially AddressSanitizer and ThreadSanitizer) that can use it
+// to symbolize stack traces in their error reports.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMSymbolize.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <cstring>
+#include <string>
+
+using namespace llvm;
+using namespace symbolize;
+
+static cl::opt<bool>
+ClUseSymbolTable("use-symbol-table", cl::init(true),
+                 cl::desc("Prefer names in symbol table to names "
+                          "in debug info"));
+
+static cl::opt<bool>
+ClPrintFunctions("functions", cl::init(true),
+                 cl::desc("Print function names as well as line "
+                          "information for a given address"));
+
+static cl::opt<bool>
+ClPrintInlining("inlining", cl::init(true),
+                cl::desc("Print all inlined frames for a given address"));
+
+static cl::opt<bool>
+ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
+
+static bool parseCommand(bool &IsData, std::string &ModuleName,
+                         uint64_t &ModuleOffset) {
+  const char *kDataCmd = "DATA ";
+  const char *kCodeCmd = "CODE ";
+  const int kMaxInputStringLength = 1024;
+  const char kDelimiters[] = " \n";
+  char InputString[kMaxInputStringLength];
+  if (!fgets(InputString, sizeof(InputString), stdin))
+    return false;
+  IsData = false;
+  ModuleName = "";
+  std::string ModuleOffsetStr = "";
+  char *pos = InputString;
+  if (strncmp(pos, kDataCmd, strlen(kDataCmd)) == 0) {
+    IsData = true;
+    pos += strlen(kDataCmd);
+  } else if (strncmp(pos, kCodeCmd, strlen(kCodeCmd)) == 0) {
+    IsData = false;
+    pos += strlen(kCodeCmd);
+  } else {
+    // If no cmd, assume it's CODE.
+    IsData = false;
+  }
+  // Skip delimiters and parse input filename.
+  pos += strspn(pos, kDelimiters);
+  if (*pos == '"' || *pos == '\'') {
+    char quote = *pos;
+    pos++;
+    char *end = strchr(pos, quote);
+    if (end == 0)
+      return false;
+    ModuleName = std::string(pos, end - pos);
+    pos = end + 1;
+  } else {
+    int name_length = strcspn(pos, kDelimiters);
+    ModuleName = std::string(pos, name_length);
+    pos += name_length;
+  }
+  // Skip delimiters and parse module offset.
+  pos += strspn(pos, kDelimiters);
+  int offset_length = strcspn(pos, kDelimiters);
+  ModuleOffsetStr = std::string(pos, offset_length);
+  if (StringRef(ModuleOffsetStr).getAsInteger(0, ModuleOffset))
+    return false;
+  return true;
+}
+
+int main(int argc, char **argv) {
+  // Print stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm symbolizer for compiler-rt\n");
+  LLVMSymbolizer::Options Opts(ClUseSymbolTable, ClPrintFunctions,
+                               ClPrintInlining, ClDemangle);
+  LLVMSymbolizer Symbolizer(Opts);
+
+  bool IsData = false;
+  std::string ModuleName;
+  uint64_t ModuleOffset;
+  while (parseCommand(IsData, ModuleName, ModuleOffset)) {
+    std::string Result =
+        IsData ? Symbolizer.symbolizeData(ModuleName, ModuleOffset)
+               : Symbolizer.symbolizeCode(ModuleName, ModuleOffset);
+    outs() << Result << "\n";
+    outs().flush();
+  }
+  return 0;
+}
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
index 911297609b0c..5820b1415be7 100644
--- a/tools/lto/CMakeLists.txt
+++ b/tools/lto/CMakeLists.txt
@@ -6,10 +6,13 @@ add_definitions( -DLLVM_VERSION_INFO=\"${PACKAGE_VERSION}\" )
 
 set(SOURCES
   LTOCodeGenerator.cpp
+  LTODisassembler.cpp
   lto.cpp
   LTOModule.cpp
   )
 
+set(LLVM_COMMON_DEPENDS intrinsics_gen)
+
 if( NOT WIN32 AND LLVM_ENABLE_PIC )
   set(bsl ${BUILD_SHARED_LIBS})
   set(BUILD_SHARED_LIBS ON)
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index b1c4f437ffbb..e7c83f94f536 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -14,39 +14,44 @@
 
 #include "LTOCodeGenerator.h"
 #include "LTOModule.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Linker.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Config/config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Linker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/ObjCARC.h"
 using namespace llvm;
 
 static cl::opt<bool>
+DisableOpt("disable-opt", cl::init(false),
+  cl::desc("Do not run any optimization passes"));
+
+static cl::opt<bool>
 DisableInline("disable-inlining", cl::init(false),
   cl::desc("Do not run the inliner pass"));
 
@@ -371,26 +376,33 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
 
   // Add an appropriate DataLayout instance for this module...
   passes.add(new DataLayout(*_target->getDataLayout()));
-  passes.add(new TargetTransformInfo(_target->getScalarTargetTransformInfo(),
-                                     _target->getVectorTargetTransformInfo()));
+  _target->addAnalysisPasses(passes);
 
   // Enabling internalize here would use its AllButMain variant. It
   // keeps only main if it exists and does nothing for libraries. Instead
   // we create the pass ourselves with the symbol list provided by the linker.
-  PassManagerBuilder().populateLTOPassManager(passes, /*Internalize=*/false,
+  if (!DisableOpt) {
+    PassManagerBuilder().populateLTOPassManager(passes,
+                                              /*Internalize=*/false,
                                               !DisableInline,
                                               DisableGVNLoadPRE);
+  }
 
   // Make sure everything is still good.
   passes.add(createVerifierPass());
 
-  FunctionPassManager *codeGenPasses = new FunctionPassManager(mergedModule);
+  PassManager codeGenPasses;
 
-  codeGenPasses->add(new DataLayout(*_target->getDataLayout()));
+  codeGenPasses.add(new DataLayout(*_target->getDataLayout()));
+  _target->addAnalysisPasses(codeGenPasses);
 
   formatted_raw_ostream Out(out);
 
-  if (_target->addPassesToEmitFile(*codeGenPasses, Out,
+  // If the bitcode files contain ARC code and were compiled with optimization,
+  // the ObjCARCContractPass must be run, so do it unconditionally here.
+  codeGenPasses.add(createObjCARCContractPass());
+
+  if (_target->addPassesToEmitFile(codeGenPasses, Out,
                                    TargetMachine::CGFT_ObjectFile)) {
     errMsg = "target file type not supported";
     return true;
@@ -400,15 +412,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
   passes.run(*mergedModule);
 
   // Run the code generator, and write assembly file
-  codeGenPasses->doInitialization();
-
-  for (Module::iterator
-         it = mergedModule->begin(), e = mergedModule->end(); it != e; ++it)
-    if (!it->isDeclaration())
-      codeGenPasses->run(*it);
-
-  codeGenPasses->doFinalization();
-  delete codeGenPasses;
+  codeGenPasses.run(*mergedModule);
 
   return false; // success
 }
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 3081b7dad1d1..601dbfa0449a 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -14,10 +14,10 @@
 #ifndef LTO_CODE_GENERATOR_H
 #define LTO_CODE_GENERATOR_H
 
-#include "llvm/Linker.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm-c/lto.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Linker.h"
 #include <string>
 
 namespace llvm {
diff --git a/tools/lto/LTODisassembler.cpp b/tools/lto/LTODisassembler.cpp
new file mode 100644
index 000000000000..186acebc01bf
--- /dev/null
+++ b/tools/lto/LTODisassembler.cpp
@@ -0,0 +1,26 @@
+//===-- LTODisassembler.cpp - LTO Disassembler interface ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This function provides utility methods used by clients of libLTO that want
+// to use the disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/lto.h"
+#include "llvm/Support/TargetSelect.h"
+
+using namespace llvm;
+
+void lto_initialize_disassembler() {
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+}
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index ffdcbe644c81..ff67769192c3 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -13,19 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "LTOModule.h"
-#include "llvm/Constants.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -34,8 +35,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -320,8 +320,9 @@ MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
 }
 
 /// objcClassNameFromExpression - Get string that the data pointer points to.
-bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
-  if (ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
+bool
+LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
+  if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
     Constant *op = ce->getOperand(0);
     if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
       Constant *cn = gvn->getInitializer();
@@ -337,8 +338,8 @@ bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
 }
 
 /// addObjCClass - Parse i386/ppc ObjC class data structure.
-void LTOModule::addObjCClass(GlobalVariable *clgv) {
-  ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
+void LTOModule::addObjCClass(const GlobalVariable *clgv) {
+  const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
   if (!c) return;
 
   // second slot in __OBJC,__class is pointer to superclass name
@@ -374,8 +375,8 @@ void LTOModule::addObjCClass(GlobalVariable *clgv) {
 }
 
 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
-void LTOModule::addObjCCategory(GlobalVariable *clgv) {
-  ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
+void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
+  const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
   if (!c) return;
 
   // second slot in __OBJC,__category is pointer to target class name
@@ -399,7 +400,7 @@ void LTOModule::addObjCCategory(GlobalVariable *clgv) {
 }
 
 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
-void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
+void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
   std::string targetclassName;
   if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
     return;
@@ -419,7 +420,7 @@ void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
 }
 
 /// addDefinedDataSymbol - Add a data symbol as defined to the list.
-void LTOModule::addDefinedDataSymbol(GlobalValue *v) {
+void LTOModule::addDefinedDataSymbol(const GlobalValue *v) {
   // Add to list of defined symbols.
   addDefinedSymbol(v, false);
 
@@ -448,34 +449,34 @@ void LTOModule::addDefinedDataSymbol(GlobalValue *v) {
 
   // special case if this data blob is an ObjC class definition
   if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
-    if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+    if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
       addObjCClass(gv);
     }
   }
 
   // special case if this data blob is an ObjC category definition
   else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
-    if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+    if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
       addObjCCategory(gv);
     }
   }
 
   // special case if this data blob is the list of referenced classes
   else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
-    if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+    if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
       addObjCClassRef(gv);
     }
   }
 }
 
 /// addDefinedFunctionSymbol - Add a function symbol as defined to the list.
-void LTOModule::addDefinedFunctionSymbol(Function *f) {
+void LTOModule::addDefinedFunctionSymbol(const Function *f) {
   // add to list of defined symbols
   addDefinedSymbol(f, true);
 }
 
 /// addDefinedSymbol - Add a defined symbol to the list.
-void LTOModule::addDefinedSymbol(GlobalValue *def, bool isFunction) {
+void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) {
   // ignore all llvm.* symbols
   if (def->getName().startswith("llvm."))
     return;
@@ -492,7 +493,7 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, bool isFunction) {
   if (isFunction) {
     attr |= LTO_SYMBOL_PERMISSIONS_CODE;
   } else {
-    GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
+    const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
     if (gv && gv->isConstant())
       attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
     else
@@ -607,7 +608,8 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
 
 /// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet to a
 /// list to be resolved later.
-void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, bool isFunc) {
+void
+LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) {
   // ignore all llvm.* symbols
   if (decl->getName().startswith("llvm."))
     return;
@@ -731,7 +733,8 @@ namespace {
       return Symbols.end();
     }
 
-    RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
+    RecordStreamer(MCContext &Context)
+        : MCStreamer(SK_RecordStreamer, Context) {}
 
     virtual void EmitInstruction(const MCInst &Inst) {
       // Scan for values.
@@ -743,6 +746,9 @@ namespace {
       Symbol->setSection(*getCurrentSection());
       markDefined(*Symbol);
     }
+    virtual void EmitDebugLabel(MCSymbol *Symbol) {
+      EmitLabel(Symbol);
+    }
     virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
       // FIXME: should we handle aliases?
       markDefined(*Symbol);
@@ -760,8 +766,13 @@ namespace {
       markDefined(*Symbol);
     }
 
+    virtual void EmitBundleAlignMode(unsigned AlignPow2) {}
+    virtual void EmitBundleLock(bool AlignToEnd) {}
+    virtual void EmitBundleUnlock() {}
+
     // Noop calls.
     virtual void ChangeSection(const MCSection *Section) {}
+    virtual void InitToTextSection() {}
     virtual void InitSections() {}
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
     virtual void EmitThumbFunc(MCSymbol *Func) {}
@@ -794,6 +805,10 @@ namespace {
                                           const MCSymbol *Label,
                                           unsigned PointerSize) {}
     virtual void FinishImpl() {}
+
+    static bool classof(const MCStreamer *S) {
+      return S->getKind() == SK_RecordStreamer;
+    }
   };
 } // end anonymous namespace
 
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index 8e52206b5b5c..83f3a7def108 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -14,15 +14,15 @@
 #ifndef LTO_MODULE_H
 #define LTO_MODULE_H
 
-#include "llvm/Module.h"
+#include "llvm-c/lto.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm-c/lto.h"
-#include <vector>
 #include <string>
+#include <vector>
 
 // Forward references to llvm classes.
 namespace llvm {
@@ -44,7 +44,7 @@ private:
     const char        *name;
     uint32_t           attributes;
     bool               isFunction;
-    llvm::GlobalValue *symbol;
+    const llvm::GlobalValue *symbol;
   };
 
   llvm::OwningPtr<llvm::Module>           _module;
@@ -138,16 +138,16 @@ private:
 
   /// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet
   /// to a list to be resolved later.
-  void addPotentialUndefinedSymbol(llvm::GlobalValue *dcl, bool isFunc);
+  void addPotentialUndefinedSymbol(const llvm::GlobalValue *dcl, bool isFunc);
 
   /// addDefinedSymbol - Add a defined symbol to the list.
-  void addDefinedSymbol(llvm::GlobalValue *def, bool isFunction);
+  void addDefinedSymbol(const llvm::GlobalValue *def, bool isFunction);
 
   /// addDefinedFunctionSymbol - Add a function symbol as defined to the list.
-  void addDefinedFunctionSymbol(llvm::Function *f);
+  void addDefinedFunctionSymbol(const llvm::Function *f);
 
   /// addDefinedDataSymbol - Add a data symbol as defined to the list.
-  void addDefinedDataSymbol(llvm::GlobalValue *v);
+  void addDefinedDataSymbol(const llvm::GlobalValue *v);
 
   /// addAsmGlobalSymbols - Add global symbols from module-level ASM to the
   /// defined or undefined lists.
@@ -162,17 +162,17 @@ private:
   void addAsmGlobalSymbolUndef(const char *);
 
   /// addObjCClass - Parse i386/ppc ObjC class data structure.
-  void addObjCClass(llvm::GlobalVariable *clgv);
+  void addObjCClass(const llvm::GlobalVariable *clgv);
 
   /// addObjCCategory - Parse i386/ppc ObjC category data structure.
-  void addObjCCategory(llvm::GlobalVariable *clgv);
+  void addObjCCategory(const llvm::GlobalVariable *clgv);
 
   /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
-  void addObjCClassRef(llvm::GlobalVariable *clgv);
+  void addObjCClassRef(const llvm::GlobalVariable *clgv);
 
   /// objcClassNameFromExpression - Get string that the data pointer points
   /// to.
-  bool objcClassNameFromExpression(llvm::Constant* c, std::string &name);
+  bool objcClassNameFromExpression(const llvm::Constant* c, std::string &name);
 
   /// isTargetMatch - Returns 'true' if the memory buffer is for the specified
   /// target triple.
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index 3610fed03bac..ab2e16e5fab6 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -51,7 +51,7 @@ ifeq ($(HOST_OS),Darwin)
     endif
 
     # If we're doing an Apple-style build, add the LTO object path.
-    ifeq ($(RC_BUILDIT),YES)
+    ifeq ($(RC_XBS),YES)
        TempFile        := $(shell mkdir -p ${OBJROOT}/dSYMs ; mktemp ${OBJROOT}/dSYMs/llvm-lto.XXXXXX)
        LLVMLibsOptions := $(LLVMLibsOptions) \
                           -Wl,-object_path_lto -Wl,$(TempFile)
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index a7e633d14b9d..11ad532be896 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -13,10 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/lto.h"
-#include "llvm-c/Core.h"
-
-#include "LTOModule.h"
 #include "LTOCodeGenerator.h"
+#include "LTOModule.h"
+#include "llvm-c/Core.h"
 
 
 // Holds most recent error string.
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 4940bb147efa..46d0d74c82a8 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -1,5 +1,6 @@
 lto_get_error_message
 lto_get_version
+lto_initialize_disassembler
 lto_module_create
 lto_module_create_from_fd
 lto_module_create_from_fd_at_offset
@@ -28,6 +29,7 @@ lto_codegen_set_assembler_path
 lto_codegen_set_cpu
 lto_codegen_compile_to_file
 LLVMCreateDisasm
+LLVMCreateDisasmCPU
 LLVMDisasmDispose
 LLVMDisasmInstruction
 LLVMSetDisasmOptions
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
index 20deda9a0cbd..3bd3ecc8fde0 100644
--- a/tools/macho-dump/macho-dump.cpp
+++ b/tools/macho-dump/macho-dump.cpp
@@ -337,7 +337,7 @@ static int DumpDataInCodeDataCommand(MachOObject &Obj,
   InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
   Obj.ReadLinkeditDataLoadCommand(LCI, LLC);
   if (!LLC)
-    return Error("unable to read segment load command");
+    return Error("unable to read data-in-code load command");
 
   outs() << "  ('dataoff', " << LLC->DataOffset << ")\n"
          << "  ('datasize', " << LLC->DataSize << ")\n"
@@ -361,6 +361,31 @@ static int DumpDataInCodeDataCommand(MachOObject &Obj,
   return 0;
 }
 
+static int DumpLinkerOptionsCommand(MachOObject &Obj,
+                                    const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::LinkerOptionsLoadCommand> LOLC;
+  Obj.ReadLinkerOptionsLoadCommand(LCI, LOLC);
+  if (!LOLC)
+    return Error("unable to read linker options load command");
+
+  outs() << "  ('count', " << LOLC->Count << ")\n"
+         << "  ('_strings', [\n";
+
+  uint64_t DataSize = LOLC->Size - sizeof(macho::LinkerOptionsLoadCommand);
+  StringRef Data = Obj.getData(
+    LCI.Offset + sizeof(macho::LinkerOptionsLoadCommand), DataSize);
+  for (unsigned i = 0; i != LOLC->Count; ++i) {
+    std::pair<StringRef,StringRef> Split = Data.split('\0');
+    outs() << "\t\"";
+    outs().write_escaped(Split.first);
+    outs() << "\",\n";
+    Data = Split.second;
+  }
+  outs() <<"  ])\n";
+
+  return 0;
+}
+
 
 static int DumpLoadCommand(MachOObject &Obj, unsigned Index) {
   const MachOObject::LoadCommandInfo &LCI = Obj.getLoadCommandInfo(Index);
@@ -390,6 +415,9 @@ static int DumpLoadCommand(MachOObject &Obj, unsigned Index) {
   case macho::LCT_DataInCode:
     Res = DumpDataInCodeDataCommand(Obj, LCI);
     break;
+  case macho::LCT_LinkerOptions:
+    Res = DumpLinkerOptionsCommand(Obj, LCI);
+    break;
   default:
     Warning("unknown load command: " + Twine(LCI.Command.Type));
     break;
diff --git a/utils/obj2yaml/CMakeLists.txt b/tools/obj2yaml/CMakeLists.txt
index d64bf1bad866..d64bf1bad866 100644
--- a/utils/obj2yaml/CMakeLists.txt
+++ b/tools/obj2yaml/CMakeLists.txt
diff --git a/tools/obj2yaml/Makefile b/tools/obj2yaml/Makefile
new file mode 100644
index 000000000000..95f393ddd6f4
--- /dev/null
+++ b/tools/obj2yaml/Makefile
@@ -0,0 +1,20 @@
+##===- utils/obj2yaml/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = obj2yaml
+LINK_COMPONENTS := object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/obj2yaml/coff2yaml.cpp b/tools/obj2yaml/coff2yaml.cpp
new file mode 100644
index 000000000000..f0241d931e60
--- /dev/null
+++ b/tools/obj2yaml/coff2yaml.cpp
@@ -0,0 +1,361 @@
+//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "obj2yaml.h"
+#include "llvm/Object/COFF.h"
+
+
+template <typename One, typename Two>
+struct pod_pair { // I'd much rather use std::pair, but it's not a POD
+  One first;
+  Two second;
+};
+
+#define STRING_PAIR(x)  {llvm::COFF::x, #x}
+static const pod_pair<llvm::COFF::MachineTypes, const char *> 
+MachineTypePairs [] = {
+  STRING_PAIR(IMAGE_FILE_MACHINE_UNKNOWN),
+  STRING_PAIR(IMAGE_FILE_MACHINE_AM33),
+  STRING_PAIR(IMAGE_FILE_MACHINE_AMD64),
+  STRING_PAIR(IMAGE_FILE_MACHINE_ARM),
+  STRING_PAIR(IMAGE_FILE_MACHINE_ARMV7),
+  STRING_PAIR(IMAGE_FILE_MACHINE_EBC),
+  STRING_PAIR(IMAGE_FILE_MACHINE_I386),
+  STRING_PAIR(IMAGE_FILE_MACHINE_IA64),
+  STRING_PAIR(IMAGE_FILE_MACHINE_M32R),
+  STRING_PAIR(IMAGE_FILE_MACHINE_MIPS16),
+  STRING_PAIR(IMAGE_FILE_MACHINE_MIPSFPU),
+  STRING_PAIR(IMAGE_FILE_MACHINE_MIPSFPU16),
+  STRING_PAIR(IMAGE_FILE_MACHINE_POWERPC),
+  STRING_PAIR(IMAGE_FILE_MACHINE_POWERPCFP),
+  STRING_PAIR(IMAGE_FILE_MACHINE_R4000),
+  STRING_PAIR(IMAGE_FILE_MACHINE_SH3),
+  STRING_PAIR(IMAGE_FILE_MACHINE_SH3DSP),
+  STRING_PAIR(IMAGE_FILE_MACHINE_SH4),
+  STRING_PAIR(IMAGE_FILE_MACHINE_SH5),
+  STRING_PAIR(IMAGE_FILE_MACHINE_THUMB),
+  STRING_PAIR(IMAGE_FILE_MACHINE_WCEMIPSV2)
+};
+
+static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+SectionCharacteristicsPairs1 [] = {
+  STRING_PAIR(IMAGE_SCN_TYPE_NO_PAD),
+  STRING_PAIR(IMAGE_SCN_CNT_CODE),
+  STRING_PAIR(IMAGE_SCN_CNT_INITIALIZED_DATA),
+  STRING_PAIR(IMAGE_SCN_CNT_UNINITIALIZED_DATA),
+  STRING_PAIR(IMAGE_SCN_LNK_OTHER),
+  STRING_PAIR(IMAGE_SCN_LNK_INFO),
+  STRING_PAIR(IMAGE_SCN_LNK_REMOVE),
+  STRING_PAIR(IMAGE_SCN_LNK_COMDAT),
+  STRING_PAIR(IMAGE_SCN_GPREL),
+  STRING_PAIR(IMAGE_SCN_MEM_PURGEABLE),
+  STRING_PAIR(IMAGE_SCN_MEM_16BIT),
+  STRING_PAIR(IMAGE_SCN_MEM_LOCKED),
+  STRING_PAIR(IMAGE_SCN_MEM_PRELOAD)
+};
+
+static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+SectionCharacteristicsPairsAlignment [] = {
+  STRING_PAIR(IMAGE_SCN_ALIGN_1BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_2BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_4BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_8BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_16BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_32BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_64BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_128BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_256BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_512BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_1024BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_2048BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_4096BYTES),
+  STRING_PAIR(IMAGE_SCN_ALIGN_8192BYTES)
+};
+
+static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+SectionCharacteristicsPairs2 [] = {
+  STRING_PAIR(IMAGE_SCN_LNK_NRELOC_OVFL),
+  STRING_PAIR(IMAGE_SCN_MEM_DISCARDABLE),
+  STRING_PAIR(IMAGE_SCN_MEM_NOT_CACHED),
+  STRING_PAIR(IMAGE_SCN_MEM_NOT_PAGED),
+  STRING_PAIR(IMAGE_SCN_MEM_SHARED),
+  STRING_PAIR(IMAGE_SCN_MEM_EXECUTE),
+  STRING_PAIR(IMAGE_SCN_MEM_READ),
+  STRING_PAIR(IMAGE_SCN_MEM_WRITE)
+};
+  
+static const pod_pair<llvm::COFF::SymbolBaseType, const char *> 
+SymbolBaseTypePairs [] = {
+  STRING_PAIR(IMAGE_SYM_TYPE_NULL),
+  STRING_PAIR(IMAGE_SYM_TYPE_VOID),
+  STRING_PAIR(IMAGE_SYM_TYPE_CHAR),
+  STRING_PAIR(IMAGE_SYM_TYPE_SHORT),
+  STRING_PAIR(IMAGE_SYM_TYPE_INT),
+  STRING_PAIR(IMAGE_SYM_TYPE_LONG),
+  STRING_PAIR(IMAGE_SYM_TYPE_FLOAT),
+  STRING_PAIR(IMAGE_SYM_TYPE_DOUBLE),
+  STRING_PAIR(IMAGE_SYM_TYPE_STRUCT),
+  STRING_PAIR(IMAGE_SYM_TYPE_UNION),
+  STRING_PAIR(IMAGE_SYM_TYPE_ENUM),
+  STRING_PAIR(IMAGE_SYM_TYPE_MOE),
+  STRING_PAIR(IMAGE_SYM_TYPE_BYTE),
+  STRING_PAIR(IMAGE_SYM_TYPE_WORD),
+  STRING_PAIR(IMAGE_SYM_TYPE_UINT),
+  STRING_PAIR(IMAGE_SYM_TYPE_DWORD)
+};
+
+static const pod_pair<llvm::COFF::SymbolComplexType, const char *> 
+SymbolComplexTypePairs [] = {
+  STRING_PAIR(IMAGE_SYM_DTYPE_NULL),
+  STRING_PAIR(IMAGE_SYM_DTYPE_POINTER),
+  STRING_PAIR(IMAGE_SYM_DTYPE_FUNCTION),
+  STRING_PAIR(IMAGE_SYM_DTYPE_ARRAY),
+};
+  
+static const pod_pair<llvm::COFF::SymbolStorageClass, const char *> 
+SymbolStorageClassPairs [] = {
+  STRING_PAIR(IMAGE_SYM_CLASS_END_OF_FUNCTION),
+  STRING_PAIR(IMAGE_SYM_CLASS_NULL),
+  STRING_PAIR(IMAGE_SYM_CLASS_AUTOMATIC),
+  STRING_PAIR(IMAGE_SYM_CLASS_EXTERNAL),
+  STRING_PAIR(IMAGE_SYM_CLASS_STATIC),
+  STRING_PAIR(IMAGE_SYM_CLASS_REGISTER),
+  STRING_PAIR(IMAGE_SYM_CLASS_EXTERNAL_DEF),
+  STRING_PAIR(IMAGE_SYM_CLASS_LABEL),
+  STRING_PAIR(IMAGE_SYM_CLASS_UNDEFINED_LABEL),
+  STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_STRUCT),
+  STRING_PAIR(IMAGE_SYM_CLASS_ARGUMENT),
+  STRING_PAIR(IMAGE_SYM_CLASS_STRUCT_TAG),
+  STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_UNION),
+  STRING_PAIR(IMAGE_SYM_CLASS_UNION_TAG),
+  STRING_PAIR(IMAGE_SYM_CLASS_TYPE_DEFINITION),
+  STRING_PAIR(IMAGE_SYM_CLASS_UNDEFINED_STATIC),
+  STRING_PAIR(IMAGE_SYM_CLASS_ENUM_TAG),
+  STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_ENUM),
+  STRING_PAIR(IMAGE_SYM_CLASS_REGISTER_PARAM),
+  STRING_PAIR(IMAGE_SYM_CLASS_BIT_FIELD),
+  STRING_PAIR(IMAGE_SYM_CLASS_BLOCK),
+  STRING_PAIR(IMAGE_SYM_CLASS_FUNCTION),
+  STRING_PAIR(IMAGE_SYM_CLASS_END_OF_STRUCT),
+  STRING_PAIR(IMAGE_SYM_CLASS_FILE),
+  STRING_PAIR(IMAGE_SYM_CLASS_SECTION),
+  STRING_PAIR(IMAGE_SYM_CLASS_WEAK_EXTERNAL),
+  STRING_PAIR(IMAGE_SYM_CLASS_CLR_TOKEN),
+};
+
+static const pod_pair<llvm::COFF::RelocationTypeX86, const char *> 
+RelocationTypeX86Pairs [] = {
+  STRING_PAIR(IMAGE_REL_I386_ABSOLUTE),
+  STRING_PAIR(IMAGE_REL_I386_DIR16),
+  STRING_PAIR(IMAGE_REL_I386_REL16),
+  STRING_PAIR(IMAGE_REL_I386_DIR32),
+  STRING_PAIR(IMAGE_REL_I386_DIR32NB),
+  STRING_PAIR(IMAGE_REL_I386_SEG12),
+  STRING_PAIR(IMAGE_REL_I386_SECTION),
+  STRING_PAIR(IMAGE_REL_I386_SECREL),
+  STRING_PAIR(IMAGE_REL_I386_TOKEN),
+  STRING_PAIR(IMAGE_REL_I386_SECREL7),
+  STRING_PAIR(IMAGE_REL_I386_REL32),
+  STRING_PAIR(IMAGE_REL_AMD64_ABSOLUTE),
+  STRING_PAIR(IMAGE_REL_AMD64_ADDR64),
+  STRING_PAIR(IMAGE_REL_AMD64_ADDR32),
+  STRING_PAIR(IMAGE_REL_AMD64_ADDR32NB),
+  STRING_PAIR(IMAGE_REL_AMD64_REL32),
+  STRING_PAIR(IMAGE_REL_AMD64_REL32_1),
+  STRING_PAIR(IMAGE_REL_AMD64_REL32_2),
+  STRING_PAIR(IMAGE_REL_AMD64_REL32_3),
+  STRING_PAIR(IMAGE_REL_AMD64_REL32_4),
+  STRING_PAIR(IMAGE_REL_AMD64_REL32_5),
+  STRING_PAIR(IMAGE_REL_AMD64_SECTION),
+  STRING_PAIR(IMAGE_REL_AMD64_SECREL),
+  STRING_PAIR(IMAGE_REL_AMD64_SECREL7),
+  STRING_PAIR(IMAGE_REL_AMD64_TOKEN),
+  STRING_PAIR(IMAGE_REL_AMD64_SREL32),
+  STRING_PAIR(IMAGE_REL_AMD64_PAIR),
+  STRING_PAIR(IMAGE_REL_AMD64_SSPAN32)
+};
+
+static const pod_pair<llvm::COFF::RelocationTypesARM, const char *> 
+RelocationTypesARMPairs [] = {
+  STRING_PAIR(IMAGE_REL_ARM_ABSOLUTE),
+  STRING_PAIR(IMAGE_REL_ARM_ADDR32),
+  STRING_PAIR(IMAGE_REL_ARM_ADDR32NB),
+  STRING_PAIR(IMAGE_REL_ARM_BRANCH24),
+  STRING_PAIR(IMAGE_REL_ARM_BRANCH11),
+  STRING_PAIR(IMAGE_REL_ARM_TOKEN),
+  STRING_PAIR(IMAGE_REL_ARM_BLX24),
+  STRING_PAIR(IMAGE_REL_ARM_BLX11),
+  STRING_PAIR(IMAGE_REL_ARM_SECTION),
+  STRING_PAIR(IMAGE_REL_ARM_SECREL),
+  STRING_PAIR(IMAGE_REL_ARM_MOV32A),
+  STRING_PAIR(IMAGE_REL_ARM_MOV32T),
+  STRING_PAIR(IMAGE_REL_ARM_BRANCH20T),
+  STRING_PAIR(IMAGE_REL_ARM_BRANCH24T),
+  STRING_PAIR(IMAGE_REL_ARM_BLX23T)
+};
+#undef STRING_PAIR
+
+
+static const char endl = '\n';
+
+namespace yaml {  // COFF-specific yaml-writing specific routines
+
+static llvm::raw_ostream &writeName(llvm::raw_ostream &Out, 
+                             const char *Name, std::size_t NameSize) {
+  for (std::size_t i = 0; i < NameSize; ++i) {
+    if (!Name[i]) break;
+    Out << Name[i];
+  }
+  return Out;
+}
+
+// Given an array of pod_pair<enum, const char *>, write all enums that match
+template <typename T, std::size_t N>
+static llvm::raw_ostream &writeBitMask(llvm::raw_ostream &Out, 
+              const pod_pair<T, const char *> (&Arr)[N], unsigned long Val) {
+  for (std::size_t i = 0; i < N; ++i)
+    if (Val & Arr[i].first)
+      Out << Arr[i].second << ", ";
+  return Out;
+}
+
+} // end of yaml namespace
+
+// Given an array of pod_pair<enum, const char *>, look up a value
+template <typename T, std::size_t N>
+const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N], 
+                           unsigned long Val, const char *NotFound = NULL) {
+  T n = static_cast<T>(Val);
+  for (std::size_t i = 0; i < N; ++i)
+    if (n == Arr[i].first)
+      return Arr[i].second;
+  return NotFound;
+}
+
+
+static llvm::raw_ostream &yamlCOFFHeader(
+          const llvm::object::coff_file_header *Header,llvm::raw_ostream &Out) {
+
+  Out << "header: !Header" << endl;
+  Out << "  Machine: ";
+  Out << nameLookup(MachineTypePairs, Header->Machine, "# Unknown_MachineTypes")
+      << " # (";
+  return yaml::writeHexNumber(Out, Header->Machine) << ")" << endl << endl;
+}
+
+
+static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj, 
+                            std::size_t NumSections, llvm::raw_ostream &Out) {
+  llvm::error_code ec;
+  Out << "sections:" << endl;
+  for (llvm::object::section_iterator iter = Obj.begin_sections(); 
+                             iter != Obj.end_sections(); iter.increment(ec)) {
+    const llvm::object::coff_section *sect = Obj.getCOFFSection(iter);
+  
+    Out << "  - !Section" << endl;
+    Out << "    Name: ";
+    yaml::writeName(Out, sect->Name, sizeof(sect->Name)) << endl;
+
+    Out << "    Characteristics: [";
+    yaml::writeBitMask(Out, SectionCharacteristicsPairs1, sect->Characteristics);
+    Out << nameLookup(SectionCharacteristicsPairsAlignment, 
+        sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN") 
+        << ", ";
+    yaml::writeBitMask(Out, SectionCharacteristicsPairs2, sect->Characteristics);
+    Out << "] # ";
+    yaml::writeHexNumber(Out, sect->Characteristics) << endl;
+
+    llvm::ArrayRef<uint8_t> sectionData;
+    Obj.getSectionContents(sect, sectionData);    
+    Out << "    SectionData: ";
+    yaml::writeHexStream(Out, sectionData) << endl;
+    if (iter->begin_relocations() != iter->end_relocations())
+      Out << "    Relocations:\n";
+    for (llvm::object::relocation_iterator rIter = iter->begin_relocations();
+                       rIter != iter->end_relocations(); rIter.increment(ec)) {
+      const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
+
+        Out << "      - !Relocation" << endl;
+        Out << "        VirtualAddress: " ;
+        yaml::writeHexNumber(Out, reloc->VirtualAddress) << endl;
+        Out << "        SymbolTableIndex: " << reloc->SymbolTableIndex << endl;
+        Out << "        Type: " 
+            << nameLookup(RelocationTypeX86Pairs, reloc->Type) << endl;
+    // TODO: Use the correct reloc type for the machine.
+        Out << endl;
+      }
+
+  } 
+  return Out;
+}
+
+static llvm::raw_ostream& yamlCOFFSymbols(llvm::object::COFFObjectFile &Obj, 
+                              std::size_t NumSymbols, llvm::raw_ostream &Out) {
+  llvm::error_code ec;
+  Out << "symbols:" << endl;
+  for (llvm::object::symbol_iterator iter = Obj.begin_symbols(); 
+                             iter != Obj.end_symbols(); iter.increment(ec)) {
+ // Gather all the info that we need
+    llvm::StringRef str;
+    const llvm::object::coff_symbol *symbol = Obj.getCOFFSymbol(iter);
+    Obj.getSymbolName(symbol, str);
+    std::size_t  simpleType  = symbol->getBaseType();
+    std::size_t complexType  = symbol->getComplexType();
+    std::size_t storageClass = symbol->StorageClass;
+    
+    Out << "  - !Symbol" << endl;
+    Out << "    Name: " << str << endl; 
+
+    Out << "    Value: "         << symbol->Value << endl;
+    Out << "    SectionNumber: " << symbol->SectionNumber << endl;
+
+    Out << "    SimpleType: " 
+        << nameLookup(SymbolBaseTypePairs, simpleType, 
+            "# Unknown_SymbolBaseType") 
+        << " # (" << simpleType << ")" << endl;
+    
+    Out << "    ComplexType: " 
+        << nameLookup(SymbolComplexTypePairs, complexType, 
+                "# Unknown_SymbolComplexType") 
+        << " # (" << complexType << ")" << endl;
+    
+    Out << "    StorageClass: " 
+        << nameLookup(SymbolStorageClassPairs, storageClass,
+              "# Unknown_StorageClass") 
+        << " # (" << (int) storageClass << ")" << endl;
+
+    if (symbol->NumberOfAuxSymbols > 0) {
+      llvm::ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol);
+      Out << "    NumberOfAuxSymbols: " 
+          << (int) symbol->NumberOfAuxSymbols << endl;
+      Out << "    AuxillaryData: ";
+      yaml::writeHexStream(Out, aux);
+    }
+      
+    Out << endl;
+  }
+
+  return Out;
+}
+
+
+llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj) {
+  llvm::error_code ec;
+  llvm::object::COFFObjectFile obj(TheObj, ec);
+  if (!ec) {
+    const llvm::object::coff_file_header *hd;
+    ec = obj.getHeader(hd);
+    if (!ec) {
+      yamlCOFFHeader(hd, Out);
+      yamlCOFFSections(obj, hd->NumberOfSections, Out);
+      yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out);
+    }
+  }
+  return ec;
+}
diff --git a/tools/obj2yaml/obj2yaml.cpp b/tools/obj2yaml/obj2yaml.cpp
new file mode 100644
index 000000000000..bdc461a94715
--- /dev/null
+++ b/tools/obj2yaml/obj2yaml.cpp
@@ -0,0 +1,86 @@
+//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "obj2yaml.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+
+const char endl = '\n';
+
+namespace yaml {  // generic yaml-writing specific routines
+
+unsigned char printable(unsigned char Ch) {
+  return Ch >= ' ' && Ch <= '~' ? Ch : '.';
+}
+  
+llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out, 
+                                     const llvm::ArrayRef<uint8_t> arr) {
+  const char *hex = "0123456789ABCDEF";
+  Out << " !hex \"";
+
+  typedef llvm::ArrayRef<uint8_t>::const_iterator iter_t;
+  const iter_t end = arr.end();
+  for (iter_t iter = arr.begin(); iter != end; ++iter)
+    Out << hex[(*iter >> 4) & 0x0F] << hex[(*iter & 0x0F)];
+
+  Out << "\" # |";
+  for (iter_t iter = arr.begin(); iter != end; ++iter)
+    Out << printable(*iter);
+  Out << "|" << endl;
+
+  return Out;
+  }
+
+llvm::raw_ostream &writeHexNumber(llvm::raw_ostream &Out, unsigned long long N) {
+  if (N >= 10)
+    Out << "0x";
+  Out.write_hex(N);
+  return Out;
+}
+
+}
+
+
+using namespace llvm;
+enum ObjectFileType { coff };
+
+cl::opt<ObjectFileType> InputFormat(
+  cl::desc("Choose input format"),
+    cl::values(
+      clEnumVal(coff, "process COFF object files"),
+    clEnumValEnd));
+    
+cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+int main(int argc, char * argv[]) {
+  cl::ParseCommandLineOptions(argc, argv);
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+// Process the input file  
+  OwningPtr<MemoryBuffer> buf;
+
+// TODO: If this is an archive, then burst it and dump each entry
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf))
+    llvm::errs() << "Error: '" << ec.message() << "' opening file '" 
+              << InputFilename << "'" << endl;
+  else {
+    ec = coff2yaml(llvm::outs(), buf.take());
+    if (ec)
+      llvm::errs() << "Error: " << ec.message() << " dumping COFF file" << endl;
+  }
+
+  return 0;
+}
diff --git a/tools/obj2yaml/obj2yaml.h b/tools/obj2yaml/obj2yaml.h
new file mode 100644
index 000000000000..0bc376a6db82
--- /dev/null
+++ b/tools/obj2yaml/obj2yaml.h
@@ -0,0 +1,34 @@
+//===------ utils/obj2yaml.hpp - obj2yaml conversion tool -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This file declares some helper routines, and also the format-specific
+// writers. To add a new format, add the declaration here, and, in a separate
+// source file, implement it.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_OBJ2YAML_H
+#define LLVM_UTILS_OBJ2YAML_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace yaml {  // routines for writing YAML
+// Write a hex stream:
+//    <Prefix> !hex: "<hex digits>" #|<ASCII chars>\n
+  llvm::raw_ostream &writeHexStream
+    (llvm::raw_ostream &Out, const llvm::ArrayRef<uint8_t> arr);
+
+// Writes a number in hex; prefix it by 0x if it is >= 10
+  llvm::raw_ostream &writeHexNumber
+    (llvm::raw_ostream &Out, unsigned long long N);
+}
+
+llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj);
+
+#endif
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index a2b57bb3e115..55f544ff5e5c 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -17,10 +17,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Module.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index 32de6d406088..91959119e491 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter irreader instrumentation scalaropts objcarcopts ipo vectorize)
 
 add_llvm_tool(opt
   AnalysisWrappers.cpp
@@ -6,3 +6,4 @@ add_llvm_tool(opt
   PrintSCC.cpp
   opt.cpp
   )
+set_target_properties(opt PROPERTIES ENABLE_EXPORTS 1)
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index 30361f501cda..f271966d104f 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -14,81 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/ToolOutputFile.h"
-using namespace llvm;
-
-template<typename GraphType>
-static void WriteGraphToFile(raw_ostream &O, const std::string &GraphName,
-                             const GraphType &GT) {
-  std::string Filename = GraphName + ".dot";
-  O << "Writing '" << Filename << "'...";
-  std::string ErrInfo;
-  tool_output_file F(Filename.c_str(), ErrInfo);
-
-  if (ErrInfo.empty()) {
-    WriteGraph(F.os(), GT);
-    F.os().close();
-    if (!F.os().has_error()) {
-      O << "\n";
-      F.keep();
-      return;
-    }
-  }
-  O << "  error opening file for writing!\n";
-  F.os().clear_error();
-}
-
-
-//===----------------------------------------------------------------------===//
-//                              Call Graph Printer
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-  template<>
-  struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
-
-  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
-
-    static std::string getGraphName(CallGraph *F) {
-      return "Call Graph";
-    }
-
-    static std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
-      if (Node->getFunction())
-        return ((Value*)Node->getFunction())->getName();
-      return "external node";
-    }
-  };
-}
-
-
-namespace {
-  struct CallGraphPrinter : public ModulePass {
-    static char ID; // Pass ID, replacement for typeid
-    CallGraphPrinter() : ModulePass(ID) {}
-
-    virtual bool runOnModule(Module &M) {
-      WriteGraphToFile(llvm::errs(), "callgraph", &getAnalysis<CallGraph>());
-      return false;
-    }
-
-    void print(raw_ostream &OS, const llvm::Module*) const {}
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<CallGraph>();
-      AU.setPreservesAll();
-    }
-  };
-}
+#include "llvm/Pass.h"
 
-char CallGraphPrinter::ID = 0;
-static RegisterPass<CallGraphPrinter> P2("dot-callgraph",
-                                         "Print Call Graph to 'dot' file");
+using namespace llvm;
 
 //===----------------------------------------------------------------------===//
 //                            DomInfoPrinter Pass
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index b174431e042a..77b94469edd5 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = opt
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar all-targets
+required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Scalar ObjCARC all-targets
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index ee7e1cf796a0..a451005574a5 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -9,6 +9,6 @@
 
 LEVEL := ../..
 TOOLNAME := opt
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize all-targets
+LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize all-targets
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index 11efdcdfd226..a502fa743c19 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -25,12 +25,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SCCIterator.h"
 using namespace llvm;
 
 namespace {
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index bac0d4694799..ba82bded2bc8 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -12,40 +12,41 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/CodeGen/CommandFlags.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/RegionPass.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Support/PassNameParser.h"
-#include "llvm/Support/Signals.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/LinkAllIR.h"
+#include "llvm/LinkAllPasses.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/LinkAllPasses.h"
-#include "llvm/LinkAllVMCore.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include <memory>
 #include <algorithm>
+#include <memory>
 using namespace llvm;
 
 // The OptimizationList is automatically populated with registered Passes by the
@@ -523,16 +524,11 @@ CodeGenOpt::Level GetCodeGenOptLevel() {
 }
 
 // Returns the TargetMachine instance or zero if no triple is provided.
-static TargetMachine* GetTargetMachine(std::string TripleStr) {
-  if (TripleStr.empty())
-    return 0;
-
-  // Get the target specific parser.
+static TargetMachine* GetTargetMachine(Triple TheTriple) {
   std::string Error;
-  Triple TheTriple(Triple::normalize(TargetTriple));
-
   const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
                                                          Error);
+  // Some modules don't specify a triple, and this is okay.
   if (!TheTarget) {
     return 0;
   }
@@ -572,6 +568,7 @@ int main(int argc, char **argv) {
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);
   initializeScalarOpts(Registry);
+  initializeObjCARCOpts(Registry);
   initializeVectorization(Registry);
   initializeIPO(Registry);
   initializeAnalysis(Registry);
@@ -655,11 +652,15 @@ int main(int argc, char **argv) {
   if (TD)
     Passes.add(TD);
 
-  std::auto_ptr<TargetMachine> TM(GetTargetMachine(TargetTriple));
-  if (TM.get()) {
-    Passes.add(new TargetTransformInfo(TM->getScalarTargetTransformInfo(),
-                                       TM->getVectorTargetTransformInfo()));
-  }
+  Triple ModuleTriple(M->getTargetTriple());
+  TargetMachine *Machine = 0;
+  if (ModuleTriple.getArch())
+    Machine = GetTargetMachine(Triple(ModuleTriple));
+  std::auto_ptr<TargetMachine> TM(Machine);
+
+  // Add internal analysis passes from the target machine.
+  if (TM.get())
+    TM->addAnalysisPasses(Passes);
 
   OwningPtr<FunctionPassManager> FPasses;
   if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 117b8204b9ed..278983565ced 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <ostream>
-#include <string>
-#include "llvm/Support/raw_ostream.h"
-#include "gtest/gtest.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <ostream>
+#include <string>
 
 using namespace llvm;
 
@@ -33,6 +33,58 @@ static std::string convertToString(double d, unsigned Prec, unsigned Pad) {
 
 namespace {
 
+TEST(APFloatTest, Denormal) {
+  APFloat::roundingMode rdmd = APFloat::rmNearestTiesToEven;
+
+  // Test single precision
+  {
+    const char *MinNormalStr = "1.17549435082228750797e-38";
+    EXPECT_FALSE(APFloat(APFloat::IEEEsingle, MinNormalStr).isDenormal());
+    EXPECT_FALSE(APFloat(APFloat::IEEEsingle, 0.0).isDenormal());
+
+    APFloat Val2(APFloat::IEEEsingle, 2.0e0);
+    APFloat T(APFloat::IEEEsingle, MinNormalStr);
+    T.divide(Val2, rdmd);
+    EXPECT_TRUE(T.isDenormal());
+  }
+
+  // Test double precision
+  {
+    const char *MinNormalStr = "2.22507385850720138309e-308";
+    EXPECT_FALSE(APFloat(APFloat::IEEEdouble, MinNormalStr).isDenormal());
+    EXPECT_FALSE(APFloat(APFloat::IEEEdouble, 0.0).isDenormal());
+
+    APFloat Val2(APFloat::IEEEdouble, 2.0e0);
+    APFloat T(APFloat::IEEEdouble, MinNormalStr);
+    T.divide(Val2, rdmd);
+    EXPECT_TRUE(T.isDenormal());
+  }
+
+  // Test Intel double-ext
+  {
+    const char *MinNormalStr = "3.36210314311209350626e-4932";
+    EXPECT_FALSE(APFloat(APFloat::x87DoubleExtended, MinNormalStr).isDenormal());
+    EXPECT_FALSE(APFloat(APFloat::x87DoubleExtended, 0.0).isDenormal());
+
+    APFloat Val2(APFloat::x87DoubleExtended, 2.0e0);
+    APFloat T(APFloat::x87DoubleExtended, MinNormalStr);
+    T.divide(Val2, rdmd);
+    EXPECT_TRUE(T.isDenormal());
+  }
+
+  // Test quadruple precision
+  {
+    const char *MinNormalStr = "3.36210314311209350626267781732175260e-4932";
+    EXPECT_FALSE(APFloat(APFloat::IEEEquad, MinNormalStr).isDenormal());
+    EXPECT_FALSE(APFloat(APFloat::IEEEquad, 0.0).isDenormal());
+
+    APFloat Val2(APFloat::IEEEquad, 2.0e0);
+    APFloat T(APFloat::IEEEquad, MinNormalStr);
+    T.divide(Val2, rdmd);
+    EXPECT_TRUE(T.isDenormal());
+  }
+}
+
 TEST(APFloatTest, Zero) {
   EXPECT_EQ(0.0f,  APFloat(0.0f).convertToFloat());
   EXPECT_EQ(-0.0f, APFloat(-0.0f).convertToFloat());
@@ -742,6 +794,32 @@ TEST(APFloatTest, convert) {
   test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
   EXPECT_EQ(4294967295.0, test.convertToDouble());
   EXPECT_FALSE(losesInfo);
+
+  test = APFloat::getSNaN(APFloat::IEEEsingle);
+  APFloat X87SNaN = APFloat::getSNaN(APFloat::x87DoubleExtended);
+  test.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+               &losesInfo);
+  EXPECT_TRUE(test.bitwiseIsEqual(X87SNaN));
+  EXPECT_FALSE(losesInfo);
+
+  test = APFloat::getQNaN(APFloat::IEEEsingle);
+  APFloat X87QNaN = APFloat::getQNaN(APFloat::x87DoubleExtended);
+  test.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+               &losesInfo);
+  EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN));
+  EXPECT_FALSE(losesInfo);
+
+  test = APFloat::getSNaN(APFloat::x87DoubleExtended);
+  test.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+               &losesInfo);
+  EXPECT_TRUE(test.bitwiseIsEqual(X87SNaN));
+  EXPECT_FALSE(losesInfo);
+
+  test = APFloat::getQNaN(APFloat::x87DoubleExtended);
+  test.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+               &losesInfo);
+  EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN));
+  EXPECT_FALSE(losesInfo);
 }
 
 TEST(APFloatTest, PPCDoubleDouble) {
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 49d7e703de5b..f129fa71c8e0 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <ostream>
-#include "gtest/gtest.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
+#include "gtest/gtest.h"
+#include <ostream>
 
 using namespace llvm;
 
@@ -56,6 +56,14 @@ TEST(APIntTest, i33_Count) {
 #endif
 
 TEST(APIntTest, i65_Count) {
+  APInt i65(65, 0, true);
+  EXPECT_EQ(65u, i65.countLeadingZeros());
+  EXPECT_EQ(0u, i65.countLeadingOnes());
+  EXPECT_EQ(0u, i65.getActiveBits());
+  EXPECT_EQ(1u, i65.getActiveWords());
+  EXPECT_EQ(65u, i65.countTrailingZeros());
+  EXPECT_EQ(0u, i65.countPopulation());
+
   APInt i65minus(65, 0, true);
   i65minus.setBit(64);
   EXPECT_EQ(0u, i65minus.countLeadingZeros());
@@ -514,4 +522,14 @@ TEST(APIntTest, Rotate) {
   EXPECT_EQ(Rot, Big.rotr(144));
 }
 
+TEST(APIntTest, Splat) {
+  APInt ValA(8, 0x01);
+  EXPECT_EQ(ValA, APInt::getSplat(8, ValA));
+  EXPECT_EQ(APInt(64, 0x0101010101010101ULL), APInt::getSplat(64, ValA));
+
+  APInt ValB(3, 5);
+  EXPECT_EQ(APInt(4, 0xD), APInt::getSplat(4, ValB));
+  EXPECT_EQ(APInt(15, 0xDB6D), APInt::getSplat(15, ValB));
+}
+
 }
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index 94f7fda2a9e3..9aad793d8bc4 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -18,12 +18,15 @@ set(ADTSources
   IntEqClassesTest.cpp
   IntervalMapTest.cpp
   IntrusiveRefCntPtrTest.cpp
+  MapVectorTest.cpp
+  OptionalTest.cpp
   PackedVectorTest.cpp
   SCCIteratorTest.cpp
   SmallPtrSetTest.cpp
   SmallStringTest.cpp
   SmallVectorTest.cpp
   SparseBitVectorTest.cpp
+  SparseMultiSetTest.cpp
   SparseSetTest.cpp
   StringMapTest.cpp
   StringRefTest.cpp
diff --git a/unittests/ADT/MapVectorTest.cpp b/unittests/ADT/MapVectorTest.cpp
new file mode 100644
index 000000000000..11178bc15e84
--- /dev/null
+++ b/unittests/ADT/MapVectorTest.cpp
@@ -0,0 +1,55 @@
+//===- unittest/ADT/MapVectorTest.cpp - MapVector unit tests ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/MapVector.h"
+#include <utility>
+
+using namespace llvm;
+
+TEST(MapVectorTest, insert_pop) {
+  MapVector<int, int> MV;
+  std::pair<MapVector<int, int>::iterator, bool> R;
+
+  R = MV.insert(std::make_pair(1, 2));
+  ASSERT_EQ(R.first, MV.begin());
+  EXPECT_EQ(R.first->first, 1);
+  EXPECT_EQ(R.first->second, 2);
+  EXPECT_TRUE(R.second);
+
+  R = MV.insert(std::make_pair(1, 3));
+  ASSERT_EQ(R.first, MV.begin());
+  EXPECT_EQ(R.first->first, 1);
+  EXPECT_EQ(R.first->second, 2);
+  EXPECT_FALSE(R.second);
+
+  R = MV.insert(std::make_pair(4, 5));
+  ASSERT_NE(R.first, MV.end());
+  EXPECT_EQ(R.first->first, 4);
+  EXPECT_EQ(R.first->second, 5);
+  EXPECT_TRUE(R.second);
+
+  EXPECT_EQ(MV.size(), 2u);
+  EXPECT_EQ(MV[1], 2);
+  EXPECT_EQ(MV[4], 5);
+
+  MV.pop_back();
+  EXPECT_EQ(MV.size(), 1u);
+  EXPECT_EQ(MV[1], 2);
+
+  R = MV.insert(std::make_pair(4, 7));
+  ASSERT_NE(R.first, MV.end());
+  EXPECT_EQ(R.first->first, 4);
+  EXPECT_EQ(R.first->second, 7);
+  EXPECT_TRUE(R.second);  
+
+  EXPECT_EQ(MV.size(), 2u);
+  EXPECT_EQ(MV[1], 2);
+  EXPECT_EQ(MV[4], 7);
+}
diff --git a/unittests/ADT/OptionalTest.cpp b/unittests/ADT/OptionalTest.cpp
new file mode 100644
index 000000000000..21e38475d309
--- /dev/null
+++ b/unittests/ADT/OptionalTest.cpp
@@ -0,0 +1,284 @@
+//===- llvm/unittest/ADT/OptionalTest.cpp - Optional unit tests -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/Optional.h"
+using namespace llvm;
+
+namespace {
+
+struct NonDefaultConstructible {
+  static unsigned CopyConstructions;
+  static unsigned Destructions;
+  static unsigned CopyAssignments;
+  explicit NonDefaultConstructible(int) {
+  }
+  NonDefaultConstructible(const NonDefaultConstructible&) {
+    ++CopyConstructions;
+  }
+  NonDefaultConstructible &operator=(const NonDefaultConstructible&) {
+    ++CopyAssignments;
+    return *this;
+  }
+  ~NonDefaultConstructible() {
+    ++Destructions;
+  }
+  static void ResetCounts() {
+    CopyConstructions = 0;
+    Destructions = 0;
+    CopyAssignments = 0;
+  }
+};
+
+unsigned NonDefaultConstructible::CopyConstructions = 0;
+unsigned NonDefaultConstructible::Destructions = 0;
+unsigned NonDefaultConstructible::CopyAssignments = 0;
+
+// Test fixture
+class OptionalTest : public testing::Test {
+};
+
+TEST_F(OptionalTest, NonDefaultConstructibleTest) {
+  Optional<NonDefaultConstructible> O;
+  EXPECT_FALSE(O);
+}
+
+TEST_F(OptionalTest, ResetTest) {
+  NonDefaultConstructible::ResetCounts();
+  Optional<NonDefaultConstructible> O(NonDefaultConstructible(3));
+  EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(1u, NonDefaultConstructible::Destructions);
+  NonDefaultConstructible::ResetCounts();
+  O.reset();
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(1u, NonDefaultConstructible::Destructions);
+}
+
+TEST_F(OptionalTest, InitializationLeakTest) {
+  NonDefaultConstructible::ResetCounts();
+  Optional<NonDefaultConstructible>(NonDefaultConstructible(3));
+  EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(2u, NonDefaultConstructible::Destructions);
+}
+
+TEST_F(OptionalTest, CopyConstructionTest) {
+  NonDefaultConstructible::ResetCounts();
+  {
+    Optional<NonDefaultConstructible> A(NonDefaultConstructible(3));
+    EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(1u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+    Optional<NonDefaultConstructible> B(A);
+    EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+  }
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(2u, NonDefaultConstructible::Destructions);
+}
+
+TEST_F(OptionalTest, ConstructingCopyAssignmentTest) {
+  NonDefaultConstructible::ResetCounts();
+  {
+    Optional<NonDefaultConstructible> A(NonDefaultConstructible(3));
+    Optional<NonDefaultConstructible> B;
+    EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(1u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+    B = A;
+    EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+  }
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(2u, NonDefaultConstructible::Destructions);
+}
+
+TEST_F(OptionalTest, CopyingCopyAssignmentTest) {
+  NonDefaultConstructible::ResetCounts();
+  {
+    Optional<NonDefaultConstructible> A(NonDefaultConstructible(3));
+    Optional<NonDefaultConstructible> B(NonDefaultConstructible(4));
+    EXPECT_EQ(2u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(2u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+    B = A;
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(1u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+  }
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(2u, NonDefaultConstructible::Destructions);
+}
+
+TEST_F(OptionalTest, DeletingCopyAssignmentTest) {
+  NonDefaultConstructible::ResetCounts();
+  {
+    Optional<NonDefaultConstructible> A;
+    Optional<NonDefaultConstructible> B(NonDefaultConstructible(3));
+    EXPECT_EQ(1u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(1u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+    B = A;
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(1u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+  }
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+}
+
+TEST_F(OptionalTest, NullCopyConstructionTest) {
+  NonDefaultConstructible::ResetCounts();
+  {
+    Optional<NonDefaultConstructible> A;
+    Optional<NonDefaultConstructible> B;
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+    B = A;
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+    EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+    EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+    NonDefaultConstructible::ResetCounts();
+  }
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyConstructions);
+  EXPECT_EQ(0u, NonDefaultConstructible::CopyAssignments);
+  EXPECT_EQ(0u, NonDefaultConstructible::Destructions);
+}
+
+#if LLVM_HAS_RVALUE_REFERENCES
+struct MoveOnly {
+  static unsigned MoveConstructions;
+  static unsigned Destructions;
+  static unsigned MoveAssignments;
+  int val;
+  explicit MoveOnly(int val) : val(val) {
+  }
+  MoveOnly(MoveOnly&& other) {
+    val = other.val;
+    ++MoveConstructions;
+  }
+  MoveOnly &operator=(MoveOnly&& other) {
+    val = other.val;
+    ++MoveAssignments;
+    return *this;
+  }
+  ~MoveOnly() {
+    ++Destructions;
+  }
+  static void ResetCounts() {
+    MoveConstructions = 0;
+    Destructions = 0;
+    MoveAssignments = 0;
+  }
+};
+
+unsigned MoveOnly::MoveConstructions = 0;
+unsigned MoveOnly::Destructions = 0;
+unsigned MoveOnly::MoveAssignments = 0;
+
+TEST_F(OptionalTest, MoveOnlyNull) {
+  MoveOnly::ResetCounts();
+  Optional<MoveOnly> O;
+  EXPECT_EQ(0u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(0u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(0u, MoveOnly::Destructions);
+}
+
+TEST_F(OptionalTest, MoveOnlyConstruction) {
+  MoveOnly::ResetCounts();
+  Optional<MoveOnly> O(MoveOnly(3));
+  EXPECT_TRUE((bool)O);
+  EXPECT_EQ(3, O->val);
+  EXPECT_EQ(1u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(0u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(1u, MoveOnly::Destructions);
+}
+
+TEST_F(OptionalTest, MoveOnlyMoveConstruction) {
+  Optional<MoveOnly> A(MoveOnly(3));
+  MoveOnly::ResetCounts();
+  Optional<MoveOnly> B(std::move(A));
+  EXPECT_FALSE((bool)A);
+  EXPECT_TRUE((bool)B);
+  EXPECT_EQ(3, B->val);
+  EXPECT_EQ(1u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(0u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(1u, MoveOnly::Destructions);
+}
+
+TEST_F(OptionalTest, MoveOnlyAssignment) {
+  MoveOnly::ResetCounts();
+  Optional<MoveOnly> O;
+  O = MoveOnly(3);
+  EXPECT_TRUE((bool)O);
+  EXPECT_EQ(3, O->val);
+  EXPECT_EQ(1u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(0u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(1u, MoveOnly::Destructions);
+}
+
+TEST_F(OptionalTest, MoveOnlyInitializingAssignment) {
+  Optional<MoveOnly> A(MoveOnly(3));
+  Optional<MoveOnly> B;
+  MoveOnly::ResetCounts();
+  B = std::move(A);
+  EXPECT_FALSE((bool)A);
+  EXPECT_TRUE((bool)B);
+  EXPECT_EQ(3, B->val);
+  EXPECT_EQ(1u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(0u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(1u, MoveOnly::Destructions);
+}
+
+TEST_F(OptionalTest, MoveOnlyNullingAssignment) {
+  Optional<MoveOnly> A;
+  Optional<MoveOnly> B(MoveOnly(3));
+  MoveOnly::ResetCounts();
+  B = std::move(A);
+  EXPECT_FALSE((bool)A);
+  EXPECT_FALSE((bool)B);
+  EXPECT_EQ(0u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(0u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(1u, MoveOnly::Destructions);
+}
+
+TEST_F(OptionalTest, MoveOnlyAssigningAssignment) {
+  Optional<MoveOnly> A(MoveOnly(3));
+  Optional<MoveOnly> B(MoveOnly(4));
+  MoveOnly::ResetCounts();
+  B = std::move(A);
+  EXPECT_FALSE((bool)A);
+  EXPECT_TRUE((bool)B);
+  EXPECT_EQ(3, B->val);
+  EXPECT_EQ(0u, MoveOnly::MoveConstructions);
+  EXPECT_EQ(1u, MoveOnly::MoveAssignments);
+  EXPECT_EQ(1u, MoveOnly::Destructions);
+}
+#endif
+
+} // end anonymous namespace
+
diff --git a/unittests/ADT/SCCIteratorTest.cpp b/unittests/ADT/SCCIteratorTest.cpp
index 00fa0665dda8..92b4b317cfbd 100644
--- a/unittests/ADT/SCCIteratorTest.cpp
+++ b/unittests/ADT/SCCIteratorTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <limits.h>
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/GraphTraits.h"
 #include "gtest/gtest.h"
+#include <limits.h>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SmallPtrSetTest.cpp b/unittests/ADT/SmallPtrSetTest.cpp
index 9114875e0035..f85d7c941ebd 100644
--- a/unittests/ADT/SmallPtrSetTest.cpp
+++ b/unittests/ADT/SmallPtrSetTest.cpp
@@ -17,6 +17,61 @@
 using namespace llvm;
 
 // SmallPtrSet swapping test.
+TEST(SmallPtrSetTest, GrowthTest) {
+  int i;
+  int buf[8];
+  for(i=0; i<8; ++i) buf[i]=0;
+
+
+  SmallPtrSet<int *, 4> s;
+  typedef SmallPtrSet<int *, 4>::iterator iter;
+  
+  s.insert(&buf[0]);
+  s.insert(&buf[1]);
+  s.insert(&buf[2]);
+  s.insert(&buf[3]);
+  EXPECT_EQ(4U, s.size());
+
+  i = 0;
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  EXPECT_EQ(4, i);
+  for(i=0; i<8; ++i)
+      EXPECT_EQ(i<4?1:0,buf[i]);
+
+  s.insert(&buf[4]);
+  s.insert(&buf[5]);
+  s.insert(&buf[6]);
+  s.insert(&buf[7]);
+
+  i = 0;
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  EXPECT_EQ(8, i);
+  s.erase(&buf[4]);
+  s.erase(&buf[5]);
+  s.erase(&buf[6]);
+  s.erase(&buf[7]);
+  EXPECT_EQ(4U, s.size());
+
+  i = 0;
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  EXPECT_EQ(4, i);
+  for(i=0; i<8; ++i)
+      EXPECT_EQ(i<4?3:1,buf[i]);
+
+  s.clear();
+  for(i=0; i<8; ++i) buf[i]=0;
+  for(i=0; i<128; ++i) s.insert(&buf[i%8]); // test repeated entires
+  EXPECT_EQ(8U, s.size());
+  for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i)
+      (**I)++;
+  for(i=0; i<8; ++i)
+      EXPECT_EQ(1,buf[i]);
+}
+
+
 TEST(SmallPtrSetTest, SwapTest) {
   int buf[10];
 
diff --git a/unittests/ADT/SmallStringTest.cpp b/unittests/ADT/SmallStringTest.cpp
index 660ac44a8bca..9398e99c9119 100644
--- a/unittests/ADT/SmallStringTest.cpp
+++ b/unittests/ADT/SmallStringTest.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallString.h"
-#include <stdarg.h>
+#include "gtest/gtest.h"
 #include <climits>
 #include <cstring>
+#include <stdarg.h>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 7fd71f5eb067..90c7982699a7 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
-#include <stdarg.h>
+#include "gtest/gtest.h"
 #include <list>
+#include <stdarg.h>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SparseMultiSetTest.cpp b/unittests/ADT/SparseMultiSetTest.cpp
new file mode 100644
index 000000000000..032990e4bcd6
--- /dev/null
+++ b/unittests/ADT/SparseMultiSetTest.cpp
@@ -0,0 +1,235 @@
+//===------ ADT/SparseSetTest.cpp - SparseSet unit tests -  -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SparseMultiSet.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+typedef SparseMultiSet<unsigned> USet;
+
+// Empty set tests.
+TEST(SparseMultiSetTest, EmptySet) {
+  USet Set;
+  EXPECT_TRUE(Set.empty());
+  EXPECT_EQ(0u, Set.size());
+
+  Set.setUniverse(10);
+
+  // Lookups on empty set.
+  EXPECT_TRUE(Set.find(0) == Set.end());
+  EXPECT_TRUE(Set.find(9) == Set.end());
+
+  // Same thing on a const reference.
+  const USet &CSet = Set;
+  EXPECT_TRUE(CSet.empty());
+  EXPECT_EQ(0u, CSet.size());
+  EXPECT_TRUE(CSet.find(0) == CSet.end());
+  USet::const_iterator I = CSet.find(5);
+  EXPECT_TRUE(I == CSet.end());
+}
+
+// Single entry set tests.
+TEST(SparseMultiSetTest, SingleEntrySet) {
+  USet Set;
+  Set.setUniverse(10);
+  USet::iterator I = Set.insert(5);
+  EXPECT_TRUE(I != Set.end());
+  EXPECT_TRUE(*I == 5);
+
+  EXPECT_FALSE(Set.empty());
+  EXPECT_EQ(1u, Set.size());
+
+  EXPECT_TRUE(Set.find(0) == Set.end());
+  EXPECT_TRUE(Set.find(9) == Set.end());
+
+  EXPECT_FALSE(Set.contains(0));
+  EXPECT_TRUE(Set.contains(5));
+
+  // Extra insert.
+  I = Set.insert(5);
+  EXPECT_TRUE(I != Set.end());
+  EXPECT_TRUE(I == ++Set.find(5));
+  I--;
+  EXPECT_TRUE(I == Set.find(5));
+
+  // Erase non-existent element.
+  I = Set.find(1);
+  EXPECT_TRUE(I == Set.end());
+  EXPECT_EQ(2u, Set.size());
+  EXPECT_EQ(5u, *Set.find(5));
+
+  // Erase iterator.
+  I = Set.find(5);
+  EXPECT_TRUE(I != Set.end());
+  I = Set.erase(I);
+  EXPECT_TRUE(I != Set.end());
+  I = Set.erase(I);
+  EXPECT_TRUE(I == Set.end());
+  EXPECT_TRUE(Set.empty());
+}
+
+// Multiple entry set tests.
+TEST(SparseMultiSetTest, MultipleEntrySet) {
+  USet Set;
+  Set.setUniverse(10);
+
+  Set.insert(5);
+  Set.insert(5);
+  Set.insert(5);
+  Set.insert(3);
+  Set.insert(2);
+  Set.insert(1);
+  Set.insert(4);
+  EXPECT_EQ(7u, Set.size());
+
+  // Erase last element by key.
+  EXPECT_TRUE(Set.erase(Set.find(4)) == Set.end());
+  EXPECT_EQ(6u, Set.size());
+  EXPECT_FALSE(Set.contains(4));
+  EXPECT_TRUE(Set.find(4) == Set.end());
+
+  // Erase first element by key.
+  EXPECT_EQ(3u, Set.count(5));
+  EXPECT_TRUE(Set.find(5) != Set.end());
+  EXPECT_TRUE(Set.erase(Set.find(5)) != Set.end());
+  EXPECT_EQ(5u, Set.size());
+  EXPECT_EQ(2u, Set.count(5));
+
+  Set.insert(6);
+  Set.insert(7);
+  EXPECT_EQ(7u, Set.size());
+
+  // Erase tail by iterator.
+  EXPECT_TRUE(Set.getTail(6) == Set.getHead(6));
+  USet::iterator I = Set.erase(Set.find(6));
+  EXPECT_TRUE(I == Set.end());
+  EXPECT_EQ(6u, Set.size());
+
+  // Erase tails by iterator.
+  EXPECT_EQ(2u, Set.count(5));
+  I = Set.getTail(5);
+  I = Set.erase(I);
+  EXPECT_TRUE(I == Set.end());
+  --I;
+  EXPECT_EQ(1u, Set.count(5));
+  EXPECT_EQ(5u, *I);
+  I = Set.erase(I);
+  EXPECT_TRUE(I == Set.end());
+  EXPECT_EQ(0u, Set.count(5));
+
+  Set.insert(8);
+  Set.insert(8);
+  Set.insert(8);
+  Set.insert(8);
+  Set.insert(8);
+
+  // Erase all the 8s
+  EXPECT_EQ(5, std::distance(Set.getHead(8), Set.end()));
+  Set.eraseAll(8);
+  EXPECT_EQ(0, std::distance(Set.getHead(8), Set.end()));
+
+  // Clear and resize the universe.
+  Set.clear();
+  EXPECT_EQ(0u, Set.size());
+  EXPECT_FALSE(Set.contains(3));
+  Set.setUniverse(1000);
+
+  // Add more than 256 elements.
+  for (unsigned i = 100; i != 800; ++i)
+    Set.insert(i);
+
+  for (unsigned i = 0; i != 10; ++i)
+    Set.eraseAll(i);
+
+  for (unsigned i = 100; i != 800; ++i)
+    EXPECT_EQ(1u, Set.count(i));
+
+  EXPECT_FALSE(Set.contains(99));
+  EXPECT_FALSE(Set.contains(800));
+  EXPECT_EQ(700u, Set.size());
+}
+
+// Test out iterators
+TEST(SparseMultiSetTest, Iterators) {
+  USet Set;
+  Set.setUniverse(100);
+
+  Set.insert(0);
+  Set.insert(1);
+  Set.insert(2);
+  Set.insert(0);
+  Set.insert(1);
+  Set.insert(0);
+
+  USet::RangePair RangePair = Set.equal_range(0);
+  USet::iterator B = RangePair.first;
+  USet::iterator E = RangePair.second;
+
+  // Move the iterators around, going to end and coming back.
+  EXPECT_EQ(3, std::distance(B, E));
+  EXPECT_EQ(B, --(--(--E)));
+  EXPECT_EQ(++(++(++E)), Set.end());
+  EXPECT_EQ(B, --(--(--E)));
+  EXPECT_EQ(++(++(++E)), Set.end());
+
+  // Insert into the tail, and move around again
+  Set.insert(0);
+  EXPECT_EQ(B, --(--(--(--E))));
+  EXPECT_EQ(++(++(++(++E))), Set.end());
+  EXPECT_EQ(B, --(--(--(--E))));
+  EXPECT_EQ(++(++(++(++E))), Set.end());
+
+  // Erase a tail, and move around again
+  USet::iterator Erased = Set.erase(Set.getTail(0));
+  EXPECT_EQ(Erased, E);
+  EXPECT_EQ(B, --(--(--E)));
+
+  USet Set2;
+  Set2.setUniverse(11);
+  Set2.insert(3);
+  EXPECT_TRUE(!Set2.contains(0));
+  EXPECT_TRUE(!Set.contains(3));
+
+  EXPECT_EQ(Set2.getHead(3), Set2.getTail(3));
+  EXPECT_EQ(Set2.getHead(0), Set2.getTail(0));
+  B = Set2.find(3);
+  EXPECT_EQ(Set2.find(3), --(++B));
+}
+
+struct Alt {
+  unsigned Value;
+  explicit Alt(unsigned x) : Value(x) {}
+  unsigned getSparseSetIndex() const { return Value - 1000; }
+};
+
+TEST(SparseMultiSetTest, AltStructSet) {
+  typedef SparseMultiSet<Alt> ASet;
+  ASet Set;
+  Set.setUniverse(10);
+  Set.insert(Alt(1005));
+
+  ASet::iterator I = Set.find(5);
+  ASSERT_TRUE(I != Set.end());
+  EXPECT_EQ(1005u, I->Value);
+
+  Set.insert(Alt(1006));
+  Set.insert(Alt(1006));
+  I = Set.erase(Set.find(6));
+  ASSERT_TRUE(I != Set.end());
+  EXPECT_EQ(1006u, I->Value);
+  I = Set.erase(Set.find(6));
+  ASSERT_TRUE(I == Set.end());
+
+  EXPECT_TRUE(Set.contains(5));
+  EXPECT_FALSE(Set.contains(6));
+}
+} // namespace
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index ead372f365d6..fa87cd0e2c86 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/unittests/ADT/TinyPtrVectorTest.cpp b/unittests/ADT/TinyPtrVectorTest.cpp
index 05dd797e0141..a4f92ffbe383 100644
--- a/unittests/ADT/TinyPtrVectorTest.cpp
+++ b/unittests/ADT/TinyPtrVectorTest.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/type_traits.h"
+#include "gtest/gtest.h"
 #include <algorithm>
 #include <list>
 #include <vector>
@@ -157,7 +157,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveCtorTest) {
   this->expectValues(Copy2, this->testArray(42));
   this->expectValues(this->V2, this->testArray(0));
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   TypeParam Move(std::move(Copy2));
   this->expectValues(Move, this->testArray(42));
   this->expectValues(Copy2, this->testArray(0));
@@ -168,7 +168,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
 #endif
@@ -177,7 +177,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(1), this->testArray(0));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
@@ -187,7 +187,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(0));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
@@ -197,7 +197,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(0));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
@@ -207,7 +207,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(0), this->testArray(1));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
@@ -217,7 +217,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(0), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -227,7 +227,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(0), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
@@ -237,7 +237,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
 #endif
@@ -246,7 +246,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(1), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -256,7 +256,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(1), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
@@ -266,7 +266,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(1));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
@@ -276,7 +276,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -286,7 +286,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
@@ -296,7 +296,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(1));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
@@ -306,7 +306,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -316,7 +316,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 7c3ab9738940..b4028963908c 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -407,6 +407,11 @@ TEST(TripleTest, getOSVersion) {
   unsigned Major, Minor, Micro;
 
   T = Triple("i386-apple-darwin9");
+  EXPECT_TRUE(T.isMacOSX());
+  EXPECT_FALSE(T.isiOS());
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
   T.getMacOSXVersion(Major, Minor, Micro);
   EXPECT_EQ((unsigned)10, Major);
   EXPECT_EQ((unsigned)5, Minor);
@@ -417,6 +422,11 @@ TEST(TripleTest, getOSVersion) {
   EXPECT_EQ((unsigned)0, Micro);
 
   T = Triple("x86_64-apple-darwin9");
+  EXPECT_TRUE(T.isMacOSX());
+  EXPECT_FALSE(T.isiOS());
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
   T.getMacOSXVersion(Major, Minor, Micro);
   EXPECT_EQ((unsigned)10, Major);
   EXPECT_EQ((unsigned)5, Minor);
@@ -427,6 +437,11 @@ TEST(TripleTest, getOSVersion) {
   EXPECT_EQ((unsigned)0, Micro);
 
   T = Triple("x86_64-apple-macosx");
+  EXPECT_TRUE(T.isMacOSX());
+  EXPECT_FALSE(T.isiOS());
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
   T.getMacOSXVersion(Major, Minor, Micro);
   EXPECT_EQ((unsigned)10, Major);
   EXPECT_EQ((unsigned)4, Minor);
@@ -437,6 +452,11 @@ TEST(TripleTest, getOSVersion) {
   EXPECT_EQ((unsigned)0, Micro);
 
   T = Triple("x86_64-apple-macosx10.7");
+  EXPECT_TRUE(T.isMacOSX());
+  EXPECT_FALSE(T.isiOS());
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
   T.getMacOSXVersion(Major, Minor, Micro);
   EXPECT_EQ((unsigned)10, Major);
   EXPECT_EQ((unsigned)7, Minor);
@@ -447,6 +467,11 @@ TEST(TripleTest, getOSVersion) {
   EXPECT_EQ((unsigned)0, Micro);
 
   T = Triple("armv7-apple-ios");
+  EXPECT_FALSE(T.isMacOSX());
+  EXPECT_TRUE(T.isiOS());
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
   T.getMacOSXVersion(Major, Minor, Micro);
   EXPECT_EQ((unsigned)10, Major);
   EXPECT_EQ((unsigned)4, Minor);
@@ -457,6 +482,11 @@ TEST(TripleTest, getOSVersion) {
   EXPECT_EQ((unsigned)0, Micro);
 
   T = Triple("armv7-apple-ios5.0");
+  EXPECT_FALSE(T.isMacOSX());
+  EXPECT_TRUE(T.isiOS());
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
   T.getMacOSXVersion(Major, Minor, Micro);
   EXPECT_EQ((unsigned)10, Major);
   EXPECT_EQ((unsigned)4, Minor);
diff --git a/unittests/ADT/TwineTest.cpp b/unittests/ADT/TwineTest.cpp
index e9cc41d13fc0..39d3b561b668 100644
--- a/unittests/ADT/TwineTest.cpp
+++ b/unittests/ADT/TwineTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/ilistTest.cpp b/unittests/ADT/ilistTest.cpp
index 09a699a96246..0c0cd0fd56fe 100644
--- a/unittests/ADT/ilistTest.cpp
+++ b/unittests/ADT/ilistTest.cpp
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <ostream>
-#include "gtest/gtest.h"
 #include "llvm/ADT/ilist.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ilist_node.h"
+#include "gtest/gtest.h"
+#include <ostream>
 
 using namespace llvm;
 
@@ -21,6 +22,7 @@ struct Node : ilist_node<Node> {
 
   Node() {}
   Node(int _Value) : Value(_Value) {}
+  ~Node() { Value = -1; }
 };
 
 TEST(ilistTest, Basic) {
@@ -41,4 +43,56 @@ TEST(ilistTest, Basic) {
   EXPECT_EQ(1, ConstList.back().getPrevNode()->Value);
 }
 
+TEST(ilistTest, SpliceOne) {
+  ilist<Node> List;
+  List.push_back(1);
+
+  // The single-element splice operation supports noops.
+  List.splice(List.begin(), List, List.begin());
+  EXPECT_EQ(1u, List.size());
+  EXPECT_EQ(1, List.front().Value);
+  EXPECT_TRUE(llvm::next(List.begin()) == List.end());
+
+  // Altenative noop. Move the first element behind itself.
+  List.push_back(2);
+  List.push_back(3);
+  List.splice(llvm::next(List.begin()), List, List.begin());
+  EXPECT_EQ(3u, List.size());
+  EXPECT_EQ(1, List.front().Value);
+  EXPECT_EQ(2, llvm::next(List.begin())->Value);
+  EXPECT_EQ(3, List.back().Value);
+}
+
+TEST(ilistTest, UnsafeClear) {
+  ilist<Node> List;
+
+  // Before even allocating a sentinel.
+  List.clearAndLeakNodesUnsafely();
+  EXPECT_EQ(0u, List.size());
+
+  // Empty list with sentinel.
+  ilist<Node>::iterator E = List.end();
+  List.clearAndLeakNodesUnsafely();
+  EXPECT_EQ(0u, List.size());
+  // The sentinel shouldn't change.
+  EXPECT_TRUE(E == List.end());
+
+  // List with contents.
+  List.push_back(1);
+  ASSERT_EQ(1u, List.size());
+  Node *N = List.begin();
+  EXPECT_EQ(1, N->Value);
+  List.clearAndLeakNodesUnsafely();
+  EXPECT_EQ(0u, List.size());
+  ASSERT_EQ(1, N->Value);
+  delete N;
+
+  // List is still functional.
+  List.push_back(5);
+  List.push_back(6);
+  ASSERT_EQ(2u, List.size());
+  EXPECT_EQ(5, List.front().Value);
+  EXPECT_EQ(6, List.back().Value);
+}
+
 }
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index c30492a5f006..398d09e5a873 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Constants.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/ADT/SmallVector.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/unittests/Bitcode/BitReaderTest.cpp b/unittests/Bitcode/BitReaderTest.cpp
index 68cfe2836a29..f33af2ff474f 100644
--- a/unittests/Bitcode/BitReaderTest.cpp
+++ b/unittests/Bitcode/BitReaderTest.cpp
@@ -11,10 +11,10 @@
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "gtest/gtest.h"
@@ -45,9 +45,9 @@ static Module *makeLLVMModule() {
 }
 
 static void writeModuleToBuffer(SmallVectorImpl<char> &Buffer) {
-  Module *Mod = makeLLVMModule();
+  OwningPtr<Module> Mod(makeLLVMModule());
   raw_svector_ostream OS(Buffer);
-  WriteBitcodeToFile(Mod, OS);
+  WriteBitcodeToFile(Mod.get(), OS);
 }
 
 TEST(BitReaderTest, MaterializeFunctionsForBlockAddr) { // PR11677
@@ -55,7 +55,7 @@ TEST(BitReaderTest, MaterializeFunctionsForBlockAddr) { // PR11677
   writeModuleToBuffer(Mem);
   MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(Mem.str(), "test", false);
   std::string errMsg;
-  Module *m = getLazyBitcodeModule(Buffer, getGlobalContext(), &errMsg);
+  OwningPtr<Module> m(getLazyBitcodeModule(Buffer, getGlobalContext(), &errMsg));
   PassManager passes;
   passes.add(createVerifierPass());
   passes.run(*m);
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 84bd44439ee3..a3f8bf34e731 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(ADT)
 add_subdirectory(Analysis)
 add_subdirectory(ExecutionEngine)
 add_subdirectory(Bitcode)
+add_subdirectory(Option)
 add_subdirectory(Support)
 add_subdirectory(Transforms)
-add_subdirectory(VMCore)
+add_subdirectory(IR)
diff --git a/unittests/ExecutionEngine/CMakeLists.txt b/unittests/ExecutionEngine/CMakeLists.txt
index ed7f10a23c8a..4eefc1e3bb1b 100644
--- a/unittests/ExecutionEngine/CMakeLists.txt
+++ b/unittests/ExecutionEngine/CMakeLists.txt
@@ -6,5 +6,9 @@ add_llvm_unittest(ExecutionEngineTests
   ExecutionEngineTest.cpp
   )
 
-add_subdirectory(JIT)
-add_subdirectory(MCJIT)
+# Include JIT/MCJIT tests only if native arch is a JIT target.
+list(FIND LLVM_TARGETS_WITH_JIT "${LLVM_NATIVE_ARCH}" have_jit)
+if (NOT have_jit EQUAL -1 )
+  add_subdirectory(JIT)
+  add_subdirectory(MCJIT)
+endif()
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
index 74a2ccdd0663..3e304e79860b 100644
--- a/unittests/ExecutionEngine/ExecutionEngineTest.cpp
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/JIT/CMakeLists.txt b/unittests/ExecutionEngine/JIT/CMakeLists.txt
index 11cf784e1e59..ef37026dfc9f 100644
--- a/unittests/ExecutionEngine/JIT/CMakeLists.txt
+++ b/unittests/ExecutionEngine/JIT/CMakeLists.txt
@@ -19,7 +19,9 @@ if( LLVM_USE_INTEL_JITEVENTS )
     )
   set(LLVM_LINK_COMPONENTS
     ${LLVM_LINK_COMPONENTS}
+    DebugInfo
     IntelJITEvents
+    Object
     ) 
 endif( LLVM_USE_INTEL_JITEVENTS )
 
@@ -53,3 +55,4 @@ add_llvm_unittest(JITTests
 if(MINGW OR CYGWIN)
   set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols)
 endif()
+set_target_properties(JITTests PROPERTIES ENABLE_EXPORTS 1)
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
index 333888a5655d..6ba8bc42d12c 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -8,14 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/JITEventListener.h"
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/TypeBuilder.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
 #include "llvm/Support/TargetSelect.h"
 #include "gtest/gtest.h"
 #include <vector>
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
index 5f02b38847b8..d1c2124b9b10 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
@@ -10,24 +10,22 @@
 #ifndef JIT_EVENT_LISTENER_TEST_COMMON_H
 #define JIT_EVENT_LISTENER_TEST_COMMON_H
 
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/DIBuilder.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/TypeBuilder.h"
-#include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/Config/config.h"
-
 #include "gtest/gtest.h"
-
-#include <vector>
 #include <string>
 #include <utility>
+#include <vector>
 
 typedef std::vector<std::pair<std::string, unsigned int> > SourceLocations;
 typedef std::map<uint64_t, SourceLocations> NativeCodeMap;
diff --git a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
index be5d152c1c51..21ca0d448ced 100644
--- a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 59604dfbf5cf..30dadc9f3e3b 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -7,28 +7,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Constant.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/TypeBuilder.h"
+#include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Assembly/Parser.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeBuilder.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
-
 #include "gtest/gtest.h"
 #include <vector>
 
@@ -118,13 +117,14 @@ public:
     Base->endFunctionBody(F, FunctionStart, FunctionEnd);
   }
   virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID) {
-    return Base->allocateDataSection(Size, Alignment, SectionID);
+                                       unsigned SectionID, bool IsReadOnly) {
+    return Base->allocateDataSection(Size, Alignment, SectionID, IsReadOnly);
   }
   virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                        unsigned SectionID) {
     return Base->allocateCodeSection(Size, Alignment, SectionID);
   }
+  virtual bool applyPermissions(std::string *ErrMsg) { return false; }
   virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
     return Base->allocateSpace(Size, Alignment);
   }
@@ -161,7 +161,7 @@ public:
     uintptr_t ActualSizeResult;
   };
   std::vector<StartExceptionTableCall> startExceptionTableCalls;
-  virtual uint8_t* startExceptionTable(const Function* F,
+  virtual uint8_t *startExceptionTable(const Function *F,
                                        uintptr_t &ActualSize) {
     uintptr_t InitialActualSize = ActualSize;
     uint8_t *Result = Base->startExceptionTable(F, ActualSize);
@@ -203,14 +203,21 @@ bool LoadAssemblyInto(Module *M, const char *assembly) {
 
 class JITTest : public testing::Test {
  protected:
+  virtual RecordingJITMemoryManager *createMemoryManager() {
+    return new RecordingJITMemoryManager;
+  }
+
   virtual void SetUp() {
     M = new Module("<main>", Context);
-    RJMM = new RecordingJITMemoryManager;
+    RJMM = createMemoryManager();
     RJMM->setPoisonMemory(true);
     std::string Error;
+    TargetOptions Options;
+    Options.JITExceptionHandling = true;
     TheJIT.reset(EngineBuilder(M).setEngineKind(EngineKind::JIT)
                  .setJITMemoryManager(RJMM)
-                 .setErrorStr(&Error).create());
+                 .setErrorStr(&Error)
+                 .setTargetOptions(Options).create());
     ASSERT_TRUE(TheJIT.get() != NULL) << Error;
   }
 
@@ -297,6 +304,46 @@ TEST(JIT, GlobalInFunction) {
 
 #endif // !defined(__arm__) && !defined(__powerpc__)
 
+// Regression test for a bug.  The JITEmitter wasn't checking to verify that
+// it hadn't run out of space while generating the DWARF exception information
+// for an emitted function.
+
+class ExceptionMemoryManagerMock : public RecordingJITMemoryManager {
+ public:
+  virtual uint8_t *startExceptionTable(const Function *F,
+                                       uintptr_t &ActualSize) {
+    // force an insufficient size the first time through.
+    bool ChangeActualSize = false;
+    if (ActualSize == 0)
+      ChangeActualSize = true;;
+    uint8_t *result =
+      RecordingJITMemoryManager::startExceptionTable(F, ActualSize);
+    if (ChangeActualSize)
+      ActualSize = 1;
+    return result;
+  }
+};
+
+class JITExceptionMemoryTest : public JITTest {
+ protected:
+  virtual RecordingJITMemoryManager *createMemoryManager() {
+    return new ExceptionMemoryManagerMock;
+  }
+};
+
+TEST_F(JITExceptionMemoryTest, ExceptionTableOverflow) {
+  Function *F = Function::Create(TypeBuilder<void(void), false>::get(Context),
+                                 Function::ExternalLinkage,
+                                 "func1", M);
+  BasicBlock *Block = BasicBlock::Create(Context, "block", F);
+  IRBuilder<> Builder(Block);
+  Builder.CreateRetVoid();
+  TheJIT->getPointerToFunction(F);
+  ASSERT_TRUE(RJMM->startExceptionTableCalls.size() == 2);
+  ASSERT_TRUE(RJMM->deallocateExceptionTableCalls.size() == 1);
+  ASSERT_TRUE(RJMM->endExceptionTableCalls.size() == 1);
+}
+
 int PlusOne(int arg) {
   return arg + 1;
 }
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index 9e0bb9ea5930..ef8b827a862e 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -24,7 +24,7 @@ ifeq ($(USE_INTEL_JITEVENTS), 1)
   CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR)
 
   # Link against the LLVM Intel JIT Evens interface library
-  LINK_COMPONENTS += inteljitevents
+  LINK_COMPONENTS += debuginfo inteljitevents object
 endif
 
 ifeq ($(USE_OPROFILE), 1)
diff --git a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
index 4a22e2f641e7..53014672c268 100644
--- a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/Assembly/Parser.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
 #include <vector>
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
index 9b0ee609923c..7057fcaf1d6c 100644
--- a/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
@@ -7,12 +7,11 @@
 //
 //===--------------------------------------------------------------------------------------===//
 
-#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
 #include "JITEventListenerTestCommon.h"
-
-#include <map>
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include <list>
+#include <map>
 
 using namespace llvm;
 
diff --git a/unittests/ExecutionEngine/MCJIT/CMakeLists.txt b/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
index 3e9c5b631e45..c6b1f77e3e8b 100644
--- a/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -2,14 +2,14 @@ set(LLVM_LINK_COMPONENTS
   asmparser
   bitreader
   bitwriter
-  mcjit
   jit
+  mcjit
   nativecodegen
   )
 
 set(MCJITTestsSources
   MCJITTest.cpp
-  SectionMemoryManager.cpp
+  MCJITMemoryManagerTest.cpp
   )
 
 if(MSVC)
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp
new file mode 100644
index 000000000000..ab09acad0d3b
--- /dev/null
+++ b/unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp
@@ -0,0 +1,172 @@
+//===- MCJITMemoryManagerTest.cpp - Unit tests for the JIT memory manager -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(MCJITMemoryManagerTest, BasicAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t *code1 = MemMgr->allocateCodeSection(256, 0, 1);
+  uint8_t *data1 = MemMgr->allocateDataSection(256, 0, 2, true);
+  uint8_t *code2 = MemMgr->allocateCodeSection(256, 0, 3);
+  uint8_t *data2 = MemMgr->allocateDataSection(256, 0, 4, false);
+
+  EXPECT_NE((uint8_t*)0, code1);
+  EXPECT_NE((uint8_t*)0, code2);
+  EXPECT_NE((uint8_t*)0, data1);
+  EXPECT_NE((uint8_t*)0, data2);
+
+  // Initialize the data
+  for (unsigned i = 0; i < 256; ++i) {
+    code1[i] = 1;
+    code2[i] = 2;
+    data1[i] = 3;
+    data2[i] = 4;
+  }
+
+  // Verify the data (this is checking for overlaps in the addresses)
+  for (unsigned i = 0; i < 256; ++i) {
+    EXPECT_EQ(1, code1[i]);
+    EXPECT_EQ(2, code2[i]);
+    EXPECT_EQ(3, data1[i]);
+    EXPECT_EQ(4, data2[i]);
+  }
+
+  std::string Error;
+  EXPECT_FALSE(MemMgr->applyPermissions(&Error));
+}
+
+TEST(MCJITMemoryManagerTest, LargeAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t *code1 = MemMgr->allocateCodeSection(0x100000, 0, 1);
+  uint8_t *data1 = MemMgr->allocateDataSection(0x100000, 0, 2, true);
+  uint8_t *code2 = MemMgr->allocateCodeSection(0x100000, 0, 3);
+  uint8_t *data2 = MemMgr->allocateDataSection(0x100000, 0, 4, false);
+
+  EXPECT_NE((uint8_t*)0, code1);
+  EXPECT_NE((uint8_t*)0, code2);
+  EXPECT_NE((uint8_t*)0, data1);
+  EXPECT_NE((uint8_t*)0, data2);
+
+  // Initialize the data
+  for (unsigned i = 0; i < 0x100000; ++i) {
+    code1[i] = 1;
+    code2[i] = 2;
+    data1[i] = 3;
+    data2[i] = 4;
+  }
+
+  // Verify the data (this is checking for overlaps in the addresses)
+  for (unsigned i = 0; i < 0x100000; ++i) {
+    EXPECT_EQ(1, code1[i]);
+    EXPECT_EQ(2, code2[i]);
+    EXPECT_EQ(3, data1[i]);
+    EXPECT_EQ(4, data2[i]);
+  }
+
+  std::string Error;
+  EXPECT_FALSE(MemMgr->applyPermissions(&Error));
+}
+
+TEST(MCJITMemoryManagerTest, ManyAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t* code[10000];
+  uint8_t* data[10000];
+
+  for (unsigned i = 0; i < 10000; ++i) {
+    const bool isReadOnly = i % 2 == 0;
+
+    code[i] = MemMgr->allocateCodeSection(32, 0, 1);
+    data[i] = MemMgr->allocateDataSection(32, 0, 2, isReadOnly);
+
+    for (unsigned j = 0; j < 32; j++) {
+      code[i][j] = 1 + (i % 254);
+      data[i][j] = 2 + (i % 254);
+    }
+
+    EXPECT_NE((uint8_t *)0, code[i]);
+    EXPECT_NE((uint8_t *)0, data[i]);
+  }
+
+  // Verify the data (this is checking for overlaps in the addresses)
+  for (unsigned i = 0; i < 10000; ++i) {
+    for (unsigned j = 0; j < 32;j++ ) {
+      uint8_t ExpectedCode = 1 + (i % 254);
+      uint8_t ExpectedData = 2 + (i % 254);
+      EXPECT_EQ(ExpectedCode, code[i][j]);
+      EXPECT_EQ(ExpectedData, data[i][j]);
+    }
+  }
+
+  std::string Error;
+  EXPECT_FALSE(MemMgr->applyPermissions(&Error));
+}
+
+TEST(MCJITMemoryManagerTest, ManyVariedAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t* code[10000];
+  uint8_t* data[10000];
+
+  for (unsigned i = 0; i < 10000; ++i) {
+    uintptr_t CodeSize = i % 16 + 1;
+    uintptr_t DataSize = i % 8 + 1;
+
+    bool isReadOnly = i % 3 == 0;
+    unsigned Align = 8 << (i % 4);
+
+    code[i] = MemMgr->allocateCodeSection(CodeSize, Align, i);
+    data[i] = MemMgr->allocateDataSection(DataSize, Align, i + 10000,
+                                          isReadOnly);
+
+    for (unsigned j = 0; j < CodeSize; j++) {
+      code[i][j] = 1 + (i % 254);
+    }
+
+    for (unsigned j = 0; j < DataSize; j++) {
+      data[i][j] = 2 + (i % 254);
+    }
+
+    EXPECT_NE((uint8_t *)0, code[i]);
+    EXPECT_NE((uint8_t *)0, data[i]);
+
+    uintptr_t CodeAlign = Align ? (uintptr_t)code[i] % Align : 0;
+    uintptr_t DataAlign = Align ? (uintptr_t)data[i] % Align : 0;
+
+    EXPECT_EQ((uintptr_t)0, CodeAlign);
+    EXPECT_EQ((uintptr_t)0, DataAlign);
+  }
+
+  for (unsigned i = 0; i < 10000; ++i) {
+    uintptr_t CodeSize = i % 16 + 1;
+    uintptr_t DataSize = i % 8 + 1;
+
+    for (unsigned j = 0; j < CodeSize; j++) {
+      uint8_t ExpectedCode = 1 + (i % 254);
+      EXPECT_EQ(ExpectedCode, code[i][j]);
+    }
+
+    for (unsigned j = 0; j < DataSize; j++) {
+      uint8_t ExpectedData = 2 + (i % 254);
+      EXPECT_EQ(ExpectedData, data[i][j]); 
+    }
+  }
+}
+
+} // Namespace
+
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
index 6b79a683bce0..e9cf904b1813 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "MCJITTestBase.h"
-#include "SectionMemoryManager.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -47,6 +46,7 @@ TEST_F(MCJITTest, global_variable) {
   GlobalValue *Global = insertGlobalInt32(M.get(), "test_global", initialValue);
   createJIT(M.take());
   void *globalPtr =  TheJIT->getPointerToGlobal(Global);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != globalPtr)
     << "Unable to get pointer to global value from JIT";
@@ -61,6 +61,7 @@ TEST_F(MCJITTest, add_function) {
   Function *F = insertAddFunction(M.get());
   createJIT(M.take());
   void *addPtr = TheJIT->getPointerToFunction(F);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != addPtr)
     << "Unable to get pointer to function from JIT";
@@ -78,6 +79,7 @@ TEST_F(MCJITTest, run_main) {
   Function *Main = insertMainFunction(M.get(), 6);
   createJIT(M.take());
   void *vPtr = TheJIT->getPointerToFunction(Main);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != vPtr)
     << "Unable to get pointer to main() from JIT";
@@ -100,6 +102,7 @@ TEST_F(MCJITTest, return_global) {
 
   createJIT(M.take());
   void *rgvPtr = TheJIT->getPointerToFunction(ReturnGlobal);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != rgvPtr);
 
@@ -169,6 +172,7 @@ TEST_F(MCJITTest, multiple_functions) {
 
   createJIT(M.take());
   void *vPtr = TheJIT->getPointerToFunction(Outer);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != vPtr)
     << "Unable to get pointer to outer function from JIT";
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
index 9b4a4ac3cf00..fc774abd6215 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
@@ -17,20 +17,19 @@
 #ifndef MCJIT_TEST_BASE_H
 #define MCJIT_TEST_BASE_H
 
-#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/TypeBuilder.h"
-
-#include "SectionMemoryManager.h"
 
 // Used to skip tests on unsupported architectures and operating systems.
 // To skip a test, add this macro at the top of a test-case in a suite that
@@ -53,7 +52,7 @@ protected:
     , MArch("")
     , Builder(Context)
     , MM(new SectionMemoryManager)
-    , HostTriple(LLVM_HOSTTRIPLE)
+    , HostTriple(sys::getProcessTriple())
   {
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
deleted file mode 100644
index d6baf3c9bb8e..000000000000
--- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===-- SectionMemoryManager.cpp - The memory manager for MCJIT -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the implementation of the section-based memory manager
-// used by MCJIT.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/MathExtras.h"
-
-#include "SectionMemoryManager.h"
-
-#ifdef __linux__
-// These includes used by SectionMemoryManager::getPointerToNamedFunction()
-// for Glibc trickery. Look comments in this function for more information.
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
-namespace llvm {
-
-uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID) {
-  if (!Alignment)
-    Alignment = 16;
-  // Ensure that enough memory is requested to allow aligning.
-  size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment;
-  uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment);
-
-  // Honour the alignment requirement.
-  uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment);
-
-  // Store the original address from calloc so we can free it later.
-  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment));
-  return AlignedAddr;
-}
-
-uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID) {
-  if (!Alignment)
-    Alignment = 16;
-  unsigned NeedAllocate = Alignment * ((Size + Alignment - 1)/Alignment + 1);
-  uintptr_t Addr = 0;
-  // Look in the list of free code memory regions and use a block there if one
-  // is available.
-  for (int i = 0, e = FreeCodeMem.size(); i != e; ++i) {
-    sys::MemoryBlock &MB = FreeCodeMem[i];
-    if (MB.size() >= NeedAllocate) {
-      Addr = (uintptr_t)MB.base();
-      uintptr_t EndOfBlock = Addr + MB.size();
-      // Align the address.
-      Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-      // Store cutted free memory block.
-      FreeCodeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
-                                        EndOfBlock - Addr - Size);
-      return (uint8_t*)Addr;
-    }
-  }
-
-  // No pre-allocated free block was large enough. Allocate a new memory region.
-  sys::MemoryBlock MB = sys::Memory::AllocateRWX(NeedAllocate, 0, 0);
-
-  AllocatedCodeMem.push_back(MB);
-  Addr = (uintptr_t)MB.base();
-  uintptr_t EndOfBlock = Addr + MB.size();
-  // Align the address.
-  Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-  // The AllocateRWX may allocate much more memory than we need. In this case,
-  // we store the unused memory as a free memory block.
-  unsigned FreeSize = EndOfBlock-Addr-Size;
-  if (FreeSize > 16)
-    FreeCodeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
-
-  // Return aligned address
-  return (uint8_t*)Addr;
-}
-
-void SectionMemoryManager::invalidateInstructionCache() {
-  for (int i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(AllocatedCodeMem[i].base(),
-                                            AllocatedCodeMem[i].size());
-}
-
-void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name,
-                                                       bool AbortOnFailure) {
-#if defined(__linux__)
-  //===--------------------------------------------------------------------===//
-  // Function stubs that are invoked instead of certain library calls
-  //
-  // Force the following functions to be linked in to anything that uses the
-  // JIT. This is a hack designed to work around the all-too-clever Glibc
-  // strategy of making these functions work differently when inlined vs. when
-  // not inlined, and hiding their real definitions in a separate archive file
-  // that the dynamic linker can't see. For more info, search for
-  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-  if (Name == "stat") return (void*)(intptr_t)&stat;
-  if (Name == "fstat") return (void*)(intptr_t)&fstat;
-  if (Name == "lstat") return (void*)(intptr_t)&lstat;
-  if (Name == "stat64") return (void*)(intptr_t)&stat64;
-  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
-  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
-  if (Name == "atexit") return (void*)(intptr_t)&atexit;
-  if (Name == "mknod") return (void*)(intptr_t)&mknod;
-#endif // __linux__
-
-  const char *NameStr = Name.c_str();
-  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-  if (Ptr) return Ptr;
-
-  // If it wasn't found and if it starts with an underscore ('_') character,
-  // try again without the underscore.
-  if (NameStr[0] == '_') {
-    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-    if (Ptr) return Ptr;
-  }
-
-  if (AbortOnFailure)
-    report_fatal_error("Program used external function '" + Name +
-                      "' which could not be resolved!");
-  return 0;
-}
-
-SectionMemoryManager::~SectionMemoryManager() {
-  for (unsigned i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::ReleaseRWX(AllocatedCodeMem[i]);
-  for (unsigned i = 0, e = AllocatedDataMem.size(); i != e; ++i)
-    free(AllocatedDataMem[i].base());
-}
-
-} // namespace llvm
diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
deleted file mode 100644
index e44217c90638..000000000000
--- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
+++ /dev/null
@@ -1,118 +0,0 @@
-//===-- SectionMemoryManager.h - Memory allocator for MCJIT -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of a section-based memory manager used by
-// the MCJIT execution engine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
-#define LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-
-namespace llvm {
-
-// Section-based memory manager for MCJIT
-class SectionMemoryManager : public JITMemoryManager {
-
-public:
-
-  SectionMemoryManager() { }
-  ~SectionMemoryManager();
-
-  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
-
-  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
-
-  virtual void *getPointerToNamedFunction(const std::string &Name,
-                                          bool AbortOnFailure = true);
-
-  // Invalidate instruction cache for code sections. Some platforms with
-  // separate data cache and instruction cache require explicit cache flush,
-  // otherwise JIT code manipulations (like resolved relocations) will get to
-  // the data cache but not to the instruction cache.
-  virtual void invalidateInstructionCache();
-
-private:
-
-  SmallVector<sys::MemoryBlock, 16> AllocatedDataMem;
-  SmallVector<sys::MemoryBlock, 16> AllocatedCodeMem;
-  SmallVector<sys::MemoryBlock, 16> FreeCodeMem;
-
-public:
-
-  ///
-  /// Functions below are not used by MCJIT, but must be implemented because
-  /// they are declared as pure virtuals in the base class.
-  ///
-
-  virtual void setMemoryWritable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setMemoryExecutable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setPoisonMemory(bool poison) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void AllocateGOT() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *getGOTBase() const {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *startFunctionBody(const Function *F,
-                                     uintptr_t &ActualSize){
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
-                               uint8_t *FunctionEnd) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void deallocateFunctionBody(void *Body) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *startExceptionTable(const Function *F,
-                                       uintptr_t &ActualSize) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
-                                 uint8_t *TableEnd, uint8_t *FrameRegister) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void deallocateExceptionTable(void *ET) {
-    llvm_unreachable("Unexpected call!");
-  }
-};
-
-}
-
-#endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
index ca1195631a22..c779a6a47c14 100644
--- a/unittests/ExecutionEngine/Makefile
+++ b/unittests/ExecutionEngine/Makefile
@@ -10,7 +10,10 @@
 LEVEL = ../..
 TESTNAME = ExecutionEngine
 LINK_COMPONENTS :=interpreter
-PARALLEL_DIRS = JIT MCJIT
+
+ifeq ($(TARGET_HAS_JIT),1)
+	PARALLEL_DIRS = JIT MCJIT
+endif
 
 include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/IR/AttributesTest.cpp b/unittests/IR/AttributesTest.cpp
new file mode 100644
index 000000000000..2368bdf94dc4
--- /dev/null
+++ b/unittests/IR/AttributesTest.cpp
@@ -0,0 +1,34 @@
+//===- llvm/unittest/IR/AttributesTest.cpp - Attributes unit tests --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+
+TEST(Attributes, Uniquing) {
+  LLVMContext C;
+
+  Attribute AttrA = Attribute::get(C, Attribute::AlwaysInline);
+  Attribute AttrB = Attribute::get(C, Attribute::AlwaysInline);
+  EXPECT_EQ(AttrA, AttrB);
+
+  AttributeSet ASs[] = {
+    AttributeSet::get(C, 1, Attribute::ZExt),
+    AttributeSet::get(C, 2, Attribute::SExt)
+  };
+
+  AttributeSet SetA = AttributeSet::get(C, ASs);
+  AttributeSet SetB = AttributeSet::get(C, ASs);
+  EXPECT_EQ(SetA, SetB);
+}
+
+} // end anonymous namespace
diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt
new file mode 100644
index 000000000000..aed45979c069
--- /dev/null
+++ b/unittests/IR/CMakeLists.txt
@@ -0,0 +1,37 @@
+set(LLVM_LINK_COMPONENTS
+  asmparser
+  core
+  ipa
+  )
+
+set(IRSources
+  AttributesTest.cpp
+  ConstantsTest.cpp
+  DominatorTreeTest.cpp
+  IRBuilderTest.cpp
+  InstructionsTest.cpp
+  MDBuilderTest.cpp
+  MetadataTest.cpp
+  PassManagerTest.cpp
+  TypeBuilderTest.cpp
+  TypesTest.cpp
+  ValueMapTest.cpp
+  VerifierTest.cpp
+  WaymarkTest.cpp
+  )
+
+# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug.
+# See issue#331418 in Visual Studio.
+if(MSVC AND MSVC_VERSION LESS 1600)
+  list(REMOVE_ITEM IRSources ValueMapTest.cpp)
+endif()
+
+# HACK: Declare a couple of source files as optionally compiled to satisfy the
+# missing-file-checker in LLVM's weird CMake build.
+set(LLVM_OPTIONAL_SOURCES
+  ValueMapTest.cpp
+  )
+
+add_llvm_unittest(IRTests
+  ${IRSources}
+  )
diff --git a/unittests/IR/ConstantsTest.cpp b/unittests/IR/ConstantsTest.cpp
new file mode 100644
index 000000000000..fee38b891de4
--- /dev/null
+++ b/unittests/IR/ConstantsTest.cpp
@@ -0,0 +1,260 @@
+//===- llvm/unittest/IR/ConstantsTest.cpp - Constants unit tests ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(ConstantsTest, Integer_i1) {
+  IntegerType* Int1 = IntegerType::get(getGlobalContext(), 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+  Constant* Zero = ConstantInt::get(Int1, 0);
+  Constant* NegOne = ConstantInt::get(Int1, static_cast<uint64_t>(-1), true);
+  EXPECT_EQ(NegOne, ConstantInt::getSigned(Int1, -1));
+  Constant* Undef = UndefValue::get(Int1);
+
+  // Input:  @b = constant i1 add(i1 1 , i1 1)
+  // Output: @b = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getAdd(One, One));
+
+  // @c = constant i1 add(i1 -1, i1 1)
+  // @c = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getAdd(NegOne, One));
+
+  // @d = constant i1 add(i1 -1, i1 -1)
+  // @d = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getAdd(NegOne, NegOne));
+
+  // @e = constant i1 sub(i1 -1, i1 1)
+  // @e = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSub(NegOne, One));
+
+  // @f = constant i1 sub(i1 1 , i1 -1)
+  // @f = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSub(One, NegOne));
+
+  // @g = constant i1 sub(i1 1 , i1 1)
+  // @g = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSub(One, One));
+
+  // @h = constant i1 shl(i1 1 , i1 1)  ; undefined
+  // @h = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getShl(One, One));
+
+  // @i = constant i1 shl(i1 1 , i1 0)
+  // @i = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getShl(One, Zero));
+
+  // @j = constant i1 lshr(i1 1, i1 1)  ; undefined
+  // @j = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getLShr(One, One));
+
+  // @m = constant i1 ashr(i1 1, i1 1)  ; undefined
+  // @m = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getAShr(One, One));
+
+  // @n = constant i1 mul(i1 -1, i1 1)
+  // @n = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getMul(NegOne, One));
+
+  // @o = constant i1 sdiv(i1 -1, i1 1) ; overflow
+  // @o = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getSDiv(NegOne, One));
+
+  // @p = constant i1 sdiv(i1 1 , i1 -1); overflow
+  // @p = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getSDiv(One, NegOne));
+
+  // @q = constant i1 udiv(i1 -1, i1 1)
+  // @q = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getUDiv(NegOne, One));
+
+  // @r = constant i1 udiv(i1 1, i1 -1)
+  // @r = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getUDiv(One, NegOne));
+
+  // @s = constant i1 srem(i1 -1, i1 1) ; overflow
+  // @s = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSRem(NegOne, One));
+
+  // @t = constant i1 urem(i1 -1, i1 1)
+  // @t = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getURem(NegOne, One));
+
+  // @u = constant i1 srem(i1  1, i1 -1) ; overflow
+  // @u = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSRem(One, NegOne));
+}
+
+TEST(ConstantsTest, IntSigns) {
+  IntegerType* Int8Ty = Type::getInt8Ty(getGlobalContext());
+  EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, false)->getSExtValue());
+  EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, true)->getSExtValue());
+  EXPECT_EQ(100, ConstantInt::getSigned(Int8Ty, 100)->getSExtValue());
+  EXPECT_EQ(-50, ConstantInt::get(Int8Ty, 206)->getSExtValue());
+  EXPECT_EQ(-50, ConstantInt::getSigned(Int8Ty, -50)->getSExtValue());
+  EXPECT_EQ(206U, ConstantInt::getSigned(Int8Ty, -50)->getZExtValue());
+
+  // Overflow is handled by truncation.
+  EXPECT_EQ(0x3b, ConstantInt::get(Int8Ty, 0x13b)->getSExtValue());
+}
+
+TEST(ConstantsTest, FP128Test) {
+  Type *FP128Ty = Type::getFP128Ty(getGlobalContext());
+
+  IntegerType *Int128Ty = Type::getIntNTy(getGlobalContext(), 128);
+  Constant *Zero128 = Constant::getNullValue(Int128Ty);
+  Constant *X = ConstantExpr::getUIToFP(Zero128, FP128Ty);
+  EXPECT_TRUE(isa<ConstantFP>(X));
+}
+
+TEST(ConstantsTest, PointerCast) {
+  LLVMContext &C(getGlobalContext());
+  Type *Int8PtrTy = Type::getInt8PtrTy(C);
+  Type *Int32PtrTy = Type::getInt32PtrTy(C);
+  Type *Int64Ty = Type::getInt64Ty(C);
+  VectorType *Int8PtrVecTy = VectorType::get(Int8PtrTy, 4);
+  VectorType *Int32PtrVecTy = VectorType::get(Int32PtrTy, 4);
+  VectorType *Int64VecTy = VectorType::get(Int64Ty, 4);
+
+  // ptrtoint i8* to i64
+  EXPECT_EQ(Constant::getNullValue(Int64Ty),
+            ConstantExpr::getPointerCast(
+              Constant::getNullValue(Int8PtrTy), Int64Ty));
+
+  // bitcast i8* to i32*
+  EXPECT_EQ(Constant::getNullValue(Int32PtrTy),
+            ConstantExpr::getPointerCast(
+              Constant::getNullValue(Int8PtrTy), Int32PtrTy));
+
+  // ptrtoint <4 x i8*> to <4 x i64>
+  EXPECT_EQ(Constant::getNullValue(Int64VecTy),
+            ConstantExpr::getPointerCast(
+              Constant::getNullValue(Int8PtrVecTy), Int64VecTy));
+
+  // bitcast <4 x i8*> to <4 x i32*>
+  EXPECT_EQ(Constant::getNullValue(Int32PtrVecTy),
+            ConstantExpr::getPointerCast(
+              Constant::getNullValue(Int8PtrVecTy), Int32PtrVecTy));
+}
+
+#define CHECK(x, y) {                                         		\
+    std::string __s;                                            	\
+    raw_string_ostream __o(__s);                                	\
+    Instruction *__I = cast<ConstantExpr>(x)->getAsInstruction();	\
+    __I->print(__o);      						\
+    delete __I; 							\
+    __o.flush();                                                	\
+    EXPECT_EQ(std::string("  <badref> = " y), __s);             	\
+  }
+
+TEST(ConstantsTest, AsInstructionsTest) {
+  OwningPtr<Module> M(new Module("MyModule", getGlobalContext()));
+
+  Type *Int64Ty = Type::getInt64Ty(getGlobalContext());
+  Type *Int32Ty = Type::getInt32Ty(getGlobalContext());
+  Type *Int16Ty = Type::getInt16Ty(getGlobalContext());
+  Type *Int1Ty = Type::getInt1Ty(getGlobalContext());
+  Type *FloatTy = Type::getFloatTy(getGlobalContext());
+  Type *DoubleTy = Type::getDoubleTy(getGlobalContext());
+
+  Constant *Global = M->getOrInsertGlobal("dummy",
+                                         PointerType::getUnqual(Int32Ty));
+  Constant *Global2 = M->getOrInsertGlobal("dummy2",
+                                         PointerType::getUnqual(Int32Ty));
+
+  Constant *P0 = ConstantExpr::getPtrToInt(Global, Int32Ty);
+  Constant *P1 = ConstantExpr::getUIToFP(P0, FloatTy);
+  Constant *P2 = ConstantExpr::getUIToFP(P0, DoubleTy);
+  Constant *P3 = ConstantExpr::getTrunc(P0, Int1Ty);
+  Constant *P4 = ConstantExpr::getPtrToInt(Global2, Int32Ty);
+  Constant *P5 = ConstantExpr::getUIToFP(P4, FloatTy);
+  Constant *P6 = ConstantExpr::getBitCast(P4, VectorType::get(Int16Ty, 2));
+
+  Constant *One = ConstantInt::get(Int32Ty, 1);
+
+  #define P0STR "ptrtoint (i32** @dummy to i32)"
+  #define P1STR "uitofp (i32 ptrtoint (i32** @dummy to i32) to float)"
+  #define P2STR "uitofp (i32 ptrtoint (i32** @dummy to i32) to double)"
+  #define P3STR "ptrtoint (i32** @dummy to i1)"
+  #define P4STR "ptrtoint (i32** @dummy2 to i32)"
+  #define P5STR "uitofp (i32 ptrtoint (i32** @dummy2 to i32) to float)"
+  #define P6STR "bitcast (i32 ptrtoint (i32** @dummy2 to i32) to <2 x i16>)"
+
+  CHECK(ConstantExpr::getNeg(P0), "sub i32 0, " P0STR);
+  CHECK(ConstantExpr::getFNeg(P1), "fsub float -0.000000e+00, " P1STR);
+  CHECK(ConstantExpr::getNot(P0), "xor i32 " P0STR ", -1");
+  CHECK(ConstantExpr::getAdd(P0, P0), "add i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getAdd(P0, P0, false, true), "add nsw i32 " P0STR ", "
+        P0STR);
+  CHECK(ConstantExpr::getAdd(P0, P0, true, true), "add nuw nsw i32 " P0STR ", "
+        P0STR);
+  CHECK(ConstantExpr::getFAdd(P1, P1), "fadd float " P1STR ", " P1STR);
+  CHECK(ConstantExpr::getSub(P0, P0), "sub i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getFSub(P1, P1), "fsub float " P1STR ", " P1STR);
+  CHECK(ConstantExpr::getMul(P0, P0), "mul i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getFMul(P1, P1), "fmul float " P1STR ", " P1STR);
+  CHECK(ConstantExpr::getUDiv(P0, P0), "udiv i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getSDiv(P0, P0), "sdiv i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getFDiv(P1, P1), "fdiv float " P1STR ", " P1STR);
+  CHECK(ConstantExpr::getURem(P0, P0), "urem i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getSRem(P0, P0), "srem i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getFRem(P1, P1), "frem float " P1STR ", " P1STR);
+  CHECK(ConstantExpr::getAnd(P0, P0), "and i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getOr(P0, P0), "or i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getXor(P0, P0), "xor i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getShl(P0, P0), "shl i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getShl(P0, P0, true), "shl nuw i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getShl(P0, P0, false, true), "shl nsw i32 " P0STR ", "
+        P0STR);
+  CHECK(ConstantExpr::getLShr(P0, P0, false), "lshr i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getLShr(P0, P0, true), "lshr exact i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getAShr(P0, P0, false), "ashr i32 " P0STR ", " P0STR);
+  CHECK(ConstantExpr::getAShr(P0, P0, true), "ashr exact i32 " P0STR ", " P0STR);
+
+  CHECK(ConstantExpr::getSExt(P0, Int64Ty), "sext i32 " P0STR " to i64");
+  CHECK(ConstantExpr::getZExt(P0, Int64Ty), "zext i32 " P0STR " to i64");
+  CHECK(ConstantExpr::getFPTrunc(P2, FloatTy), "fptrunc double " P2STR
+        " to float");
+  CHECK(ConstantExpr::getFPExtend(P1, DoubleTy), "fpext float " P1STR
+        " to double");
+
+  CHECK(ConstantExpr::getExactUDiv(P0, P0), "udiv exact i32 " P0STR ", " P0STR);
+
+  CHECK(ConstantExpr::getSelect(P3, P0, P4), "select i1 " P3STR ", i32 " P0STR
+        ", i32 " P4STR);
+  CHECK(ConstantExpr::getICmp(CmpInst::ICMP_EQ, P0, P4), "icmp eq i32 " P0STR
+        ", " P4STR);
+  CHECK(ConstantExpr::getFCmp(CmpInst::FCMP_ULT, P1, P5), "fcmp ult float "
+        P1STR ", " P5STR);
+
+  std::vector<Constant*> V;
+  V.push_back(One);
+  // FIXME: getGetElementPtr() actually creates an inbounds ConstantGEP,
+  //        not a normal one!
+  //CHECK(ConstantExpr::getGetElementPtr(Global, V, false),
+  //      "getelementptr i32** @dummy, i32 1");
+  CHECK(ConstantExpr::getInBoundsGetElementPtr(Global, V),
+        "getelementptr inbounds i32** @dummy, i32 1");
+
+  CHECK(ConstantExpr::getExtractElement(P6, One), "extractelement <2 x i16> "
+        P6STR ", i32 1");
+}
+
+#undef CHECK
+
+}  // end anonymous namespace
+}  // end namespace llvm
diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp
new file mode 100644
index 000000000000..4e5af9395cc8
--- /dev/null
+++ b/unittests/IR/DominatorTreeTest.cpp
@@ -0,0 +1,204 @@
+//===- llvm/unittests/IR/DominatorTreeTest.cpp - Constants unit tests -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeDPassPass(PassRegistry&);
+
+  namespace {
+    struct DPass : public FunctionPass {
+      static char ID;
+      virtual bool runOnFunction(Function &F) {
+        DominatorTree *DT = &getAnalysis<DominatorTree>();
+        Function::iterator FI = F.begin();
+
+        BasicBlock *BB0 = FI++;
+        BasicBlock::iterator BBI = BB0->begin();
+        Instruction *Y1 = BBI++;
+        Instruction *Y2 = BBI++;
+        Instruction *Y3 = BBI++;
+
+        BasicBlock *BB1 = FI++;
+        BBI = BB1->begin();
+        Instruction *Y4 = BBI++;
+
+        BasicBlock *BB2 = FI++;
+        BBI = BB2->begin();
+        Instruction *Y5 = BBI++;
+
+        BasicBlock *BB3 = FI++;
+        BBI = BB3->begin();
+        Instruction *Y6 = BBI++;
+        Instruction *Y7 = BBI++;
+
+        BasicBlock *BB4 = FI++;
+        BBI = BB4->begin();
+        Instruction *Y8 = BBI++;
+        Instruction *Y9 = BBI++;
+
+        // Reachability
+        EXPECT_TRUE(DT->isReachableFromEntry(BB0));
+        EXPECT_TRUE(DT->isReachableFromEntry(BB1));
+        EXPECT_TRUE(DT->isReachableFromEntry(BB2));
+        EXPECT_FALSE(DT->isReachableFromEntry(BB3));
+        EXPECT_TRUE(DT->isReachableFromEntry(BB4));
+
+        // BB dominance
+        EXPECT_TRUE(DT->dominates(BB0, BB0));
+        EXPECT_TRUE(DT->dominates(BB0, BB1));
+        EXPECT_TRUE(DT->dominates(BB0, BB2));
+        EXPECT_TRUE(DT->dominates(BB0, BB3));
+        EXPECT_TRUE(DT->dominates(BB0, BB4));
+
+        EXPECT_FALSE(DT->dominates(BB1, BB0));
+        EXPECT_TRUE(DT->dominates(BB1, BB1));
+        EXPECT_FALSE(DT->dominates(BB1, BB2));
+        EXPECT_TRUE(DT->dominates(BB1, BB3));
+        EXPECT_FALSE(DT->dominates(BB1, BB4));
+
+        EXPECT_FALSE(DT->dominates(BB2, BB0));
+        EXPECT_FALSE(DT->dominates(BB2, BB1));
+        EXPECT_TRUE(DT->dominates(BB2, BB2));
+        EXPECT_TRUE(DT->dominates(BB2, BB3));
+        EXPECT_FALSE(DT->dominates(BB2, BB4));
+
+        EXPECT_FALSE(DT->dominates(BB3, BB0));
+        EXPECT_FALSE(DT->dominates(BB3, BB1));
+        EXPECT_FALSE(DT->dominates(BB3, BB2));
+        EXPECT_TRUE(DT->dominates(BB3, BB3));
+        EXPECT_FALSE(DT->dominates(BB3, BB4));
+
+        // BB proper dominance
+        EXPECT_FALSE(DT->properlyDominates(BB0, BB0));
+        EXPECT_TRUE(DT->properlyDominates(BB0, BB1));
+        EXPECT_TRUE(DT->properlyDominates(BB0, BB2));
+        EXPECT_TRUE(DT->properlyDominates(BB0, BB3));
+
+        EXPECT_FALSE(DT->properlyDominates(BB1, BB0));
+        EXPECT_FALSE(DT->properlyDominates(BB1, BB1));
+        EXPECT_FALSE(DT->properlyDominates(BB1, BB2));
+        EXPECT_TRUE(DT->properlyDominates(BB1, BB3));
+
+        EXPECT_FALSE(DT->properlyDominates(BB2, BB0));
+        EXPECT_FALSE(DT->properlyDominates(BB2, BB1));
+        EXPECT_FALSE(DT->properlyDominates(BB2, BB2));
+        EXPECT_TRUE(DT->properlyDominates(BB2, BB3));
+
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB0));
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB1));
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB2));
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB3));
+
+        // Instruction dominance in the same reachable BB
+        EXPECT_FALSE(DT->dominates(Y1, Y1));
+        EXPECT_TRUE(DT->dominates(Y1, Y2));
+        EXPECT_FALSE(DT->dominates(Y2, Y1));
+        EXPECT_FALSE(DT->dominates(Y2, Y2));
+
+        // Instruction dominance in the same unreachable BB
+        EXPECT_TRUE(DT->dominates(Y6, Y6));
+        EXPECT_TRUE(DT->dominates(Y6, Y7));
+        EXPECT_TRUE(DT->dominates(Y7, Y6));
+        EXPECT_TRUE(DT->dominates(Y7, Y7));
+
+        // Invoke
+        EXPECT_TRUE(DT->dominates(Y3, Y4));
+        EXPECT_FALSE(DT->dominates(Y3, Y5));
+
+        // Phi
+        EXPECT_TRUE(DT->dominates(Y2, Y9));
+        EXPECT_FALSE(DT->dominates(Y3, Y9));
+        EXPECT_FALSE(DT->dominates(Y8, Y9));
+
+        // Anything dominates unreachable
+        EXPECT_TRUE(DT->dominates(Y1, Y6));
+        EXPECT_TRUE(DT->dominates(Y3, Y6));
+
+        // Unreachable doesn't dominate reachable
+        EXPECT_FALSE(DT->dominates(Y6, Y1));
+
+        // Instruction, BB dominance
+        EXPECT_FALSE(DT->dominates(Y1, BB0));
+        EXPECT_TRUE(DT->dominates(Y1, BB1));
+        EXPECT_TRUE(DT->dominates(Y1, BB2));
+        EXPECT_TRUE(DT->dominates(Y1, BB3));
+        EXPECT_TRUE(DT->dominates(Y1, BB4));
+
+        EXPECT_FALSE(DT->dominates(Y3, BB0));
+        EXPECT_TRUE(DT->dominates(Y3, BB1));
+        EXPECT_FALSE(DT->dominates(Y3, BB2));
+        EXPECT_TRUE(DT->dominates(Y3, BB3));
+        EXPECT_FALSE(DT->dominates(Y3, BB4));
+
+        EXPECT_TRUE(DT->dominates(Y6, BB3));
+
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.addRequired<DominatorTree>();
+      }
+      DPass() : FunctionPass(ID) {
+        initializeDPassPass(*PassRegistry::getPassRegistry());
+      }
+    };
+    char DPass::ID = 0;
+
+
+    Module* makeLLVMModule(DPass *P) {
+      const char *ModuleStrig =
+        "declare i32 @g()\n" \
+        "define void @f(i32 %x) {\n" \
+        "bb0:\n" \
+        "  %y1 = add i32 %x, 1\n" \
+        "  %y2 = add i32 %x, 1\n" \
+        "  %y3 = invoke i32 @g() to label %bb1 unwind label %bb2\n" \
+        "bb1:\n" \
+        "  %y4 = add i32 %x, 1\n" \
+        "  br label %bb4\n" \
+        "bb2:\n" \
+        "  %y5 = landingpad i32 personality i32 ()* @g\n" \
+        "          cleanup\n" \
+        "  br label %bb4\n" \
+        "bb3:\n" \
+        "  %y6 = add i32 %x, 1\n" \
+        "  %y7 = add i32 %x, 1\n" \
+        "  ret void\n" \
+        "bb4:\n" \
+        "  %y8 = phi i32 [0, %bb2], [%y4, %bb1]\n"
+        "  %y9 = phi i32 [0, %bb2], [%y4, %bb1]\n"
+        "  ret void\n" \
+        "}\n";
+      LLVMContext &C = getGlobalContext();
+      SMDiagnostic Err;
+      return ParseAssemblyString(ModuleStrig, NULL, Err, C);
+    }
+
+    TEST(DominatorTree, Unreachable) {
+      DPass *P = new DPass();
+      OwningPtr<Module> M(makeLLVMModule(P));
+      PassManager Passes;
+      Passes.add(P);
+      Passes.run(*M);
+    }
+  }
+}
+
+INITIALIZE_PASS_BEGIN(DPass, "dpass", "dpass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DPass, "dpass", "dpass", false, false)
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
new file mode 100644
index 000000000000..fecc4a4fe6b4
--- /dev/null
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -0,0 +1,185 @@
+//===- llvm/unittest/IR/IRBuilderTest.cpp - IRBuilder tests ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class IRBuilderTest : public testing::Test {
+protected:
+  virtual void SetUp() {
+    M.reset(new Module("MyModule", getGlobalContext()));
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(getGlobalContext()),
+                                          /*isVarArg=*/false);
+    F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
+    BB = BasicBlock::Create(getGlobalContext(), "", F);
+    GV = new GlobalVariable(*M, Type::getFloatTy(getGlobalContext()), true,
+                            GlobalValue::ExternalLinkage, 0);
+  }
+
+  virtual void TearDown() {
+    BB = 0;
+    M.reset();
+  }
+
+  OwningPtr<Module> M;
+  Function *F;
+  BasicBlock *BB;
+  GlobalVariable *GV;
+};
+
+TEST_F(IRBuilderTest, Lifetime) {
+  IRBuilder<> Builder(BB);
+  AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty());
+  AllocaInst *Var2 = Builder.CreateAlloca(Builder.getInt32Ty());
+  AllocaInst *Var3 = Builder.CreateAlloca(Builder.getInt8Ty(),
+                                          Builder.getInt32(123));
+
+  CallInst *Start1 = Builder.CreateLifetimeStart(Var1);
+  CallInst *Start2 = Builder.CreateLifetimeStart(Var2);
+  CallInst *Start3 = Builder.CreateLifetimeStart(Var3, Builder.getInt64(100));
+
+  EXPECT_EQ(Start1->getArgOperand(0), Builder.getInt64(-1));
+  EXPECT_EQ(Start2->getArgOperand(0), Builder.getInt64(-1));
+  EXPECT_EQ(Start3->getArgOperand(0), Builder.getInt64(100));
+
+  EXPECT_EQ(Start1->getArgOperand(1), Var1);
+  EXPECT_NE(Start2->getArgOperand(1), Var2);
+  EXPECT_EQ(Start3->getArgOperand(1), Var3);
+
+  Value *End1 = Builder.CreateLifetimeEnd(Var1);
+  Builder.CreateLifetimeEnd(Var2);
+  Builder.CreateLifetimeEnd(Var3);
+
+  IntrinsicInst *II_Start1 = dyn_cast<IntrinsicInst>(Start1);
+  IntrinsicInst *II_End1 = dyn_cast<IntrinsicInst>(End1);
+  ASSERT_TRUE(II_Start1 != NULL);
+  EXPECT_EQ(II_Start1->getIntrinsicID(), Intrinsic::lifetime_start);
+  ASSERT_TRUE(II_End1 != NULL);
+  EXPECT_EQ(II_End1->getIntrinsicID(), Intrinsic::lifetime_end);
+}
+
+TEST_F(IRBuilderTest, CreateCondBr) {
+  IRBuilder<> Builder(BB);
+  BasicBlock *TBB = BasicBlock::Create(getGlobalContext(), "", F);
+  BasicBlock *FBB = BasicBlock::Create(getGlobalContext(), "", F);
+
+  BranchInst *BI = Builder.CreateCondBr(Builder.getTrue(), TBB, FBB);
+  TerminatorInst *TI = BB->getTerminator();
+  EXPECT_EQ(BI, TI);
+  EXPECT_EQ(2u, TI->getNumSuccessors());
+  EXPECT_EQ(TBB, TI->getSuccessor(0));
+  EXPECT_EQ(FBB, TI->getSuccessor(1));
+
+  BI->eraseFromParent();
+  MDNode *Weights = MDBuilder(getGlobalContext()).createBranchWeights(42, 13);
+  BI = Builder.CreateCondBr(Builder.getTrue(), TBB, FBB, Weights);
+  TI = BB->getTerminator();
+  EXPECT_EQ(BI, TI);
+  EXPECT_EQ(2u, TI->getNumSuccessors());
+  EXPECT_EQ(TBB, TI->getSuccessor(0));
+  EXPECT_EQ(FBB, TI->getSuccessor(1));
+  EXPECT_EQ(Weights, TI->getMetadata(LLVMContext::MD_prof));
+}
+
+TEST_F(IRBuilderTest, LandingPadName) {
+  IRBuilder<> Builder(BB);
+  LandingPadInst *LP = Builder.CreateLandingPad(Builder.getInt32Ty(),
+                                                Builder.getInt32(0), 0, "LP");
+  EXPECT_EQ(LP->getName(), "LP");
+}
+
+TEST_F(IRBuilderTest, GetIntTy) {
+  IRBuilder<> Builder(BB);
+  IntegerType *Ty1 = Builder.getInt1Ty();
+  EXPECT_EQ(Ty1, IntegerType::get(getGlobalContext(), 1));
+
+  DataLayout* DL = new DataLayout(M.get());
+  IntegerType *IntPtrTy = Builder.getIntPtrTy(DL);
+  unsigned IntPtrBitSize =  DL->getPointerSizeInBits(0);
+  EXPECT_EQ(IntPtrTy, IntegerType::get(getGlobalContext(), IntPtrBitSize));
+  delete DL;
+}
+
+TEST_F(IRBuilderTest, FastMathFlags) {
+  IRBuilder<> Builder(BB);
+  Value *F;
+  Instruction *FDiv, *FAdd;
+
+  F = Builder.CreateLoad(GV);
+  F = Builder.CreateFAdd(F, F);
+
+  EXPECT_FALSE(Builder.getFastMathFlags().any());
+  ASSERT_TRUE(isa<Instruction>(F));
+  FAdd = cast<Instruction>(F);
+  EXPECT_FALSE(FAdd->hasNoNaNs());
+
+  FastMathFlags FMF;
+  Builder.SetFastMathFlags(FMF);
+
+  F = Builder.CreateFAdd(F, F);
+  EXPECT_FALSE(Builder.getFastMathFlags().any());
+
+  FMF.setUnsafeAlgebra();
+  Builder.SetFastMathFlags(FMF);
+
+  F = Builder.CreateFAdd(F, F);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  ASSERT_TRUE(isa<Instruction>(F));
+  FAdd = cast<Instruction>(F);
+  EXPECT_TRUE(FAdd->hasNoNaNs());
+
+  F = Builder.CreateFDiv(F, F);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  EXPECT_TRUE(Builder.getFastMathFlags().UnsafeAlgebra);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_TRUE(FDiv->hasAllowReciprocal());
+
+  Builder.clearFastMathFlags();
+
+  F = Builder.CreateFDiv(F, F);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_FALSE(FDiv->hasAllowReciprocal());
+
+  FMF.clear();
+  FMF.setAllowReciprocal();
+  Builder.SetFastMathFlags(FMF);
+
+  F = Builder.CreateFDiv(F, F);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  EXPECT_TRUE(Builder.getFastMathFlags().AllowReciprocal);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_TRUE(FDiv->hasAllowReciprocal());
+
+  Builder.clearFastMathFlags();
+
+  F = Builder.CreateFDiv(F, F);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_FALSE(FDiv->getFastMathFlags().any());
+  FDiv->copyFastMathFlags(FAdd);
+  EXPECT_TRUE(FDiv->hasNoNaNs());
+
+}
+
+}
diff --git a/unittests/IR/InstructionsTest.cpp b/unittests/IR/InstructionsTest.cpp
new file mode 100644
index 000000000000..9f66af147180
--- /dev/null
+++ b/unittests/IR/InstructionsTest.cpp
@@ -0,0 +1,292 @@
+//===- llvm/unittest/IR/InstructionsTest.cpp - Instructions unit tests ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Operator.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(InstructionsTest, ReturnInst) {
+  LLVMContext &C(getGlobalContext());
+
+  // test for PR6589
+  const ReturnInst* r0 = ReturnInst::Create(C);
+  EXPECT_EQ(r0->getNumOperands(), 0U);
+  EXPECT_EQ(r0->op_begin(), r0->op_end());
+
+  IntegerType* Int1 = IntegerType::get(C, 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+  const ReturnInst* r1 = ReturnInst::Create(C, One);
+  EXPECT_EQ(1U, r1->getNumOperands());
+  User::const_op_iterator b(r1->op_begin());
+  EXPECT_NE(r1->op_end(), b);
+  EXPECT_EQ(One, *b);
+  EXPECT_EQ(One, r1->getOperand(0));
+  ++b;
+  EXPECT_EQ(r1->op_end(), b);
+
+  // clean up
+  delete r0;
+  delete r1;
+}
+
+TEST(InstructionsTest, BranchInst) {
+  LLVMContext &C(getGlobalContext());
+
+  // Make a BasicBlocks
+  BasicBlock* bb0 = BasicBlock::Create(C);
+  BasicBlock* bb1 = BasicBlock::Create(C);
+
+  // Mandatory BranchInst
+  const BranchInst* b0 = BranchInst::Create(bb0);
+
+  EXPECT_TRUE(b0->isUnconditional());
+  EXPECT_FALSE(b0->isConditional());
+  EXPECT_EQ(1U, b0->getNumSuccessors());
+
+  // check num operands
+  EXPECT_EQ(1U, b0->getNumOperands());
+
+  EXPECT_NE(b0->op_begin(), b0->op_end());
+  EXPECT_EQ(b0->op_end(), llvm::next(b0->op_begin()));
+
+  EXPECT_EQ(b0->op_end(), llvm::next(b0->op_begin()));
+
+  IntegerType* Int1 = IntegerType::get(C, 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+
+  // Conditional BranchInst
+  BranchInst* b1 = BranchInst::Create(bb0, bb1, One);
+
+  EXPECT_FALSE(b1->isUnconditional());
+  EXPECT_TRUE(b1->isConditional());
+  EXPECT_EQ(2U, b1->getNumSuccessors());
+
+  // check num operands
+  EXPECT_EQ(3U, b1->getNumOperands());
+
+  User::const_op_iterator b(b1->op_begin());
+
+  // check COND
+  EXPECT_NE(b, b1->op_end());
+  EXPECT_EQ(One, *b);
+  EXPECT_EQ(One, b1->getOperand(0));
+  EXPECT_EQ(One, b1->getCondition());
+  ++b;
+
+  // check ELSE
+  EXPECT_EQ(bb1, *b);
+  EXPECT_EQ(bb1, b1->getOperand(1));
+  EXPECT_EQ(bb1, b1->getSuccessor(1));
+  ++b;
+
+  // check THEN
+  EXPECT_EQ(bb0, *b);
+  EXPECT_EQ(bb0, b1->getOperand(2));
+  EXPECT_EQ(bb0, b1->getSuccessor(0));
+  ++b;
+
+  EXPECT_EQ(b1->op_end(), b);
+
+  // clean up
+  delete b0;
+  delete b1;
+
+  delete bb0;
+  delete bb1;
+}
+
+TEST(InstructionsTest, CastInst) {
+  LLVMContext &C(getGlobalContext());
+
+  Type* Int8Ty = Type::getInt8Ty(C);
+  Type* Int64Ty = Type::getInt64Ty(C);
+  Type* V8x8Ty = VectorType::get(Int8Ty, 8);
+  Type* V8x64Ty = VectorType::get(Int64Ty, 8);
+  Type* X86MMXTy = Type::getX86_MMXTy(C);
+
+  const Constant* c8 = Constant::getNullValue(V8x8Ty);
+  const Constant* c64 = Constant::getNullValue(V8x64Ty);
+
+  EXPECT_TRUE(CastInst::isCastable(V8x8Ty, X86MMXTy));
+  EXPECT_TRUE(CastInst::isCastable(X86MMXTy, V8x8Ty));
+  EXPECT_FALSE(CastInst::isCastable(Int64Ty, X86MMXTy));
+  EXPECT_TRUE(CastInst::isCastable(V8x64Ty, V8x8Ty));
+  EXPECT_TRUE(CastInst::isCastable(V8x8Ty, V8x64Ty));
+  EXPECT_EQ(CastInst::Trunc, CastInst::getCastOpcode(c64, true, V8x8Ty, true));
+  EXPECT_EQ(CastInst::SExt, CastInst::getCastOpcode(c8, true, V8x64Ty, true));
+}
+
+
+
+TEST(InstructionsTest, VectorGep) {
+  LLVMContext &C(getGlobalContext());
+
+  // Type Definitions
+  PointerType *Ptri8Ty = PointerType::get(IntegerType::get(C, 8), 0);
+  PointerType *Ptri32Ty = PointerType::get(IntegerType::get(C, 8), 0);
+
+  VectorType *V2xi8PTy = VectorType::get(Ptri8Ty, 2);
+  VectorType *V2xi32PTy = VectorType::get(Ptri32Ty, 2);
+
+  // Test different aspects of the vector-of-pointers type
+  // and GEPs which use this type.
+  ConstantInt *Ci32a = ConstantInt::get(C, APInt(32, 1492));
+  ConstantInt *Ci32b = ConstantInt::get(C, APInt(32, 1948));
+  std::vector<Constant*> ConstVa(2, Ci32a);
+  std::vector<Constant*> ConstVb(2, Ci32b);
+  Constant *C2xi32a = ConstantVector::get(ConstVa);
+  Constant *C2xi32b = ConstantVector::get(ConstVb);
+
+  CastInst *PtrVecA = new IntToPtrInst(C2xi32a, V2xi32PTy);
+  CastInst *PtrVecB = new IntToPtrInst(C2xi32b, V2xi32PTy);
+
+  ICmpInst *ICmp0 = new ICmpInst(ICmpInst::ICMP_SGT, PtrVecA, PtrVecB);
+  ICmpInst *ICmp1 = new ICmpInst(ICmpInst::ICMP_ULT, PtrVecA, PtrVecB);
+  EXPECT_NE(ICmp0, ICmp1); // suppress warning.
+
+  BasicBlock* BB0 = BasicBlock::Create(C);
+  // Test InsertAtEnd ICmpInst constructor.
+  ICmpInst *ICmp2 = new ICmpInst(*BB0, ICmpInst::ICMP_SGE, PtrVecA, PtrVecB);
+  EXPECT_NE(ICmp0, ICmp2); // suppress warning.
+
+  GetElementPtrInst *Gep0 = GetElementPtrInst::Create(PtrVecA, C2xi32a);
+  GetElementPtrInst *Gep1 = GetElementPtrInst::Create(PtrVecA, C2xi32b);
+  GetElementPtrInst *Gep2 = GetElementPtrInst::Create(PtrVecB, C2xi32a);
+  GetElementPtrInst *Gep3 = GetElementPtrInst::Create(PtrVecB, C2xi32b);
+
+  CastInst *BTC0 = new BitCastInst(Gep0, V2xi8PTy);
+  CastInst *BTC1 = new BitCastInst(Gep1, V2xi8PTy);
+  CastInst *BTC2 = new BitCastInst(Gep2, V2xi8PTy);
+  CastInst *BTC3 = new BitCastInst(Gep3, V2xi8PTy);
+
+  Value *S0 = BTC0->stripPointerCasts();
+  Value *S1 = BTC1->stripPointerCasts();
+  Value *S2 = BTC2->stripPointerCasts();
+  Value *S3 = BTC3->stripPointerCasts();
+
+  EXPECT_NE(S0, Gep0);
+  EXPECT_NE(S1, Gep1);
+  EXPECT_NE(S2, Gep2);
+  EXPECT_NE(S3, Gep3);
+
+  int64_t Offset;
+  DataLayout TD("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3"
+                "2:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80"
+                ":128:128-n8:16:32:64-S128");
+  // Make sure we don't crash
+  GetPointerBaseWithConstantOffset(Gep0, Offset, &TD);
+  GetPointerBaseWithConstantOffset(Gep1, Offset, &TD);
+  GetPointerBaseWithConstantOffset(Gep2, Offset, &TD);
+  GetPointerBaseWithConstantOffset(Gep3, Offset, &TD);
+
+  // Gep of Geps
+  GetElementPtrInst *GepII0 = GetElementPtrInst::Create(Gep0, C2xi32b);
+  GetElementPtrInst *GepII1 = GetElementPtrInst::Create(Gep1, C2xi32a);
+  GetElementPtrInst *GepII2 = GetElementPtrInst::Create(Gep2, C2xi32b);
+  GetElementPtrInst *GepII3 = GetElementPtrInst::Create(Gep3, C2xi32a);
+
+  EXPECT_EQ(GepII0->getNumIndices(), 1u);
+  EXPECT_EQ(GepII1->getNumIndices(), 1u);
+  EXPECT_EQ(GepII2->getNumIndices(), 1u);
+  EXPECT_EQ(GepII3->getNumIndices(), 1u);
+
+  EXPECT_FALSE(GepII0->hasAllZeroIndices());
+  EXPECT_FALSE(GepII1->hasAllZeroIndices());
+  EXPECT_FALSE(GepII2->hasAllZeroIndices());
+  EXPECT_FALSE(GepII3->hasAllZeroIndices());
+
+  delete GepII0;
+  delete GepII1;
+  delete GepII2;
+  delete GepII3;
+
+  delete BTC0;
+  delete BTC1;
+  delete BTC2;
+  delete BTC3;
+
+  delete Gep0;
+  delete Gep1;
+  delete Gep2;
+  delete Gep3;
+
+  ICmp2->eraseFromParent();
+  delete BB0;
+
+  delete ICmp0;
+  delete ICmp1;
+  delete PtrVecA;
+  delete PtrVecB;
+}
+
+TEST(InstructionsTest, FPMathOperator) {
+  LLVMContext &Context = getGlobalContext();
+  IRBuilder<> Builder(Context);
+  MDBuilder MDHelper(Context);
+  Instruction *I = Builder.CreatePHI(Builder.getDoubleTy(), 0);
+  MDNode *MD1 = MDHelper.createFPMath(1.0);
+  Value *V1 = Builder.CreateFAdd(I, I, "", MD1);
+  EXPECT_TRUE(isa<FPMathOperator>(V1));
+  FPMathOperator *O1 = cast<FPMathOperator>(V1);
+  EXPECT_EQ(O1->getFPAccuracy(), 1.0);
+  delete V1;
+  delete I;
+}
+
+
+TEST(InstructionsTest, isEliminableCastPair) {
+  LLVMContext &C(getGlobalContext());
+
+  Type* Int32Ty = Type::getInt32Ty(C);
+  Type* Int64Ty = Type::getInt64Ty(C);
+  Type* Int64PtrTy = Type::getInt64PtrTy(C);
+
+  // Source and destination pointers have same size -> bitcast.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt,
+                                           CastInst::IntToPtr,
+                                           Int64PtrTy, Int64Ty, Int64PtrTy,
+                                           Int32Ty, 0, Int32Ty),
+            CastInst::BitCast);
+
+  // Source and destination pointers have different sizes -> fail.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt,
+                                           CastInst::IntToPtr,
+                                           Int64PtrTy, Int64Ty, Int64PtrTy,
+                                           Int32Ty, 0, Int64Ty),
+            0U);
+
+  // Middle pointer big enough -> bitcast.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr,
+                                           CastInst::PtrToInt,
+                                           Int64Ty, Int64PtrTy, Int64Ty,
+                                           0, Int64Ty, 0),
+            CastInst::BitCast);
+
+  // Middle pointer too small -> fail.
+  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr,
+                                           CastInst::PtrToInt,
+                                           Int64Ty, Int64PtrTy, Int64Ty,
+                                           0, Int32Ty, 0),
+            0U);
+}
+
+}  // end anonymous namespace
+}  // end namespace llvm
diff --git a/unittests/IR/MDBuilderTest.cpp b/unittests/IR/MDBuilderTest.cpp
new file mode 100644
index 000000000000..665d559bf0b7
--- /dev/null
+++ b/unittests/IR/MDBuilderTest.cpp
@@ -0,0 +1,106 @@
+//===- llvm/unittests/MDBuilderTest.cpp - MDBuilder unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class MDBuilderTest : public testing::Test {
+protected:
+  LLVMContext Context;
+};
+
+TEST_F(MDBuilderTest, createString) {
+  MDBuilder MDHelper(Context);
+  MDString *Str0 = MDHelper.createString("");
+  MDString *Str1 = MDHelper.createString("string");
+  EXPECT_EQ(Str0->getString(), StringRef(""));
+  EXPECT_EQ(Str1->getString(), StringRef("string"));
+}
+TEST_F(MDBuilderTest, createFPMath) {
+  MDBuilder MDHelper(Context);
+  MDNode *MD0 = MDHelper.createFPMath(0.0);
+  MDNode *MD1 = MDHelper.createFPMath(1.0);
+  EXPECT_EQ(MD0, (MDNode *)0);
+  EXPECT_NE(MD1, (MDNode *)0);
+  EXPECT_EQ(MD1->getNumOperands(), 1U);
+  Value *Op = MD1->getOperand(0);
+  EXPECT_TRUE(isa<ConstantFP>(Op));
+  EXPECT_TRUE(Op->getType()->isFloatingPointTy());
+  ConstantFP *Val = cast<ConstantFP>(Op);
+  EXPECT_TRUE(Val->isExactlyValue(1.0));
+}
+TEST_F(MDBuilderTest, createRangeMetadata) {
+  MDBuilder MDHelper(Context);
+  APInt A(8, 1), B(8, 2);
+  MDNode *R0 = MDHelper.createRange(A, A);
+  MDNode *R1 = MDHelper.createRange(A, B);
+  EXPECT_EQ(R0, (MDNode *)0);
+  EXPECT_NE(R1, (MDNode *)0);
+  EXPECT_EQ(R1->getNumOperands(), 2U);
+  EXPECT_TRUE(isa<ConstantInt>(R1->getOperand(0)));
+  EXPECT_TRUE(isa<ConstantInt>(R1->getOperand(1)));
+  ConstantInt *C0 = cast<ConstantInt>(R1->getOperand(0));
+  ConstantInt *C1 = cast<ConstantInt>(R1->getOperand(1));
+  EXPECT_EQ(C0->getValue(), A);
+  EXPECT_EQ(C1->getValue(), B);
+}
+TEST_F(MDBuilderTest, createAnonymousTBAARoot) {
+  MDBuilder MDHelper(Context);
+  MDNode *R0 = MDHelper.createAnonymousTBAARoot();
+  MDNode *R1 = MDHelper.createAnonymousTBAARoot();
+  EXPECT_NE(R0, R1);
+  EXPECT_GE(R0->getNumOperands(), 1U);
+  EXPECT_GE(R1->getNumOperands(), 1U);
+  EXPECT_EQ(R0->getOperand(0), R0);
+  EXPECT_EQ(R1->getOperand(0), R1);
+  EXPECT_TRUE(R0->getNumOperands() == 1 || R0->getOperand(1) == 0);
+  EXPECT_TRUE(R1->getNumOperands() == 1 || R1->getOperand(1) == 0);
+}
+TEST_F(MDBuilderTest, createTBAARoot) {
+  MDBuilder MDHelper(Context);
+  MDNode *R0 = MDHelper.createTBAARoot("Root");
+  MDNode *R1 = MDHelper.createTBAARoot("Root");
+  EXPECT_EQ(R0, R1);
+  EXPECT_GE(R0->getNumOperands(), 1U);
+  EXPECT_TRUE(isa<MDString>(R0->getOperand(0)));
+  EXPECT_EQ(cast<MDString>(R0->getOperand(0))->getString(), "Root");
+  EXPECT_TRUE(R0->getNumOperands() == 1 || R0->getOperand(1) == 0);
+}
+TEST_F(MDBuilderTest, createTBAANode) {
+  MDBuilder MDHelper(Context);
+  MDNode *R = MDHelper.createTBAARoot("Root");
+  MDNode *N0 = MDHelper.createTBAANode("Node", R);
+  MDNode *N1 = MDHelper.createTBAANode("edoN", R);
+  MDNode *N2 = MDHelper.createTBAANode("Node", R, true);
+  MDNode *N3 = MDHelper.createTBAANode("Node", R);
+  EXPECT_EQ(N0, N3);
+  EXPECT_NE(N0, N1);
+  EXPECT_NE(N0, N2);
+  EXPECT_GE(N0->getNumOperands(), 2U);
+  EXPECT_GE(N1->getNumOperands(), 2U);
+  EXPECT_GE(N2->getNumOperands(), 3U);
+  EXPECT_TRUE(isa<MDString>(N0->getOperand(0)));
+  EXPECT_TRUE(isa<MDString>(N1->getOperand(0)));
+  EXPECT_TRUE(isa<MDString>(N2->getOperand(0)));
+  EXPECT_EQ(cast<MDString>(N0->getOperand(0))->getString(), "Node");
+  EXPECT_EQ(cast<MDString>(N1->getOperand(0))->getString(), "edoN");
+  EXPECT_EQ(cast<MDString>(N2->getOperand(0))->getString(), "Node");
+  EXPECT_EQ(N0->getOperand(1), R);
+  EXPECT_EQ(N1->getOperand(1), R);
+  EXPECT_EQ(N2->getOperand(1), R);
+  EXPECT_TRUE(isa<ConstantInt>(N2->getOperand(2)));
+  EXPECT_EQ(cast<ConstantInt>(N2->getOperand(2))->getZExtValue(), 1U);
+}
+}
diff --git a/unittests/IR/Makefile b/unittests/IR/Makefile
new file mode 100644
index 000000000000..7c59003c92a7
--- /dev/null
+++ b/unittests/IR/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/IR/Makefile -------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = IR
+LINK_COMPONENTS := core ipa asmparser
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp
new file mode 100644
index 000000000000..352e83ee662e
--- /dev/null
+++ b/unittests/IR/MetadataTest.cpp
@@ -0,0 +1,152 @@
+//===- llvm/unittest/IR/Metadata.cpp - Metadata unit tests ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+
+class MetadataTest : public testing::Test {
+protected:
+  LLVMContext Context;
+};
+typedef MetadataTest MDStringTest;
+
+// Test that construction of MDString with different value produces different
+// MDString objects, even with the same string pointer and nulls in the string.
+TEST_F(MDStringTest, CreateDifferent) {
+  char x[3] = { 'f', 0, 'A' };
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  x[2] = 'B';
+  MDString *s2 = MDString::get(Context, StringRef(&x[0], 3));
+  EXPECT_NE(s1, s2);
+}
+
+// Test that creation of MDStrings with the same string contents produces the
+// same MDString object, even with different pointers.
+TEST_F(MDStringTest, CreateSame) {
+  char x[4] = { 'a', 'b', 'c', 'X' };
+  char y[4] = { 'a', 'b', 'c', 'Y' };
+
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
+  EXPECT_EQ(s1, s2);
+}
+
+// Test that MDString prints out the string we fed it.
+TEST_F(MDStringTest, PrintingSimple) {
+  char *str = new char[13];
+  strncpy(str, "testing 1 2 3", 13);
+  MDString *s = MDString::get(Context, StringRef(str, 13));
+  strncpy(str, "aaaaaaaaaaaaa", 13);
+  delete[] str;
+
+  std::string Str;
+  raw_string_ostream oss(Str);
+  s->print(oss);
+  EXPECT_STREQ("metadata !\"testing 1 2 3\"", oss.str().c_str());
+}
+
+// Test printing of MDString with non-printable characters.
+TEST_F(MDStringTest, PrintingComplex) {
+  char str[5] = {0, '\n', '"', '\\', (char)-1};
+  MDString *s = MDString::get(Context, StringRef(str+0, 5));
+  std::string Str;
+  raw_string_ostream oss(Str);
+  s->print(oss);
+  EXPECT_STREQ("metadata !\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str());
+}
+
+typedef MetadataTest MDNodeTest;
+
+// Test the two constructors, and containing other Constants.
+TEST_F(MDNodeTest, Simple) {
+  char x[3] = { 'a', 'b', 'c' };
+  char y[3] = { '1', '2', '3' };
+
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
+  ConstantInt *CI = ConstantInt::get(getGlobalContext(), APInt(8, 0));
+
+  std::vector<Value *> V;
+  V.push_back(s1);
+  V.push_back(CI);
+  V.push_back(s2);
+
+  MDNode *n1 = MDNode::get(Context, V);
+  Value *const c1 = n1;
+  MDNode *n2 = MDNode::get(Context, c1);
+  Value *const c2 = n2;
+  MDNode *n3 = MDNode::get(Context, V);
+  MDNode *n4 = MDNode::getIfExists(Context, V);
+  MDNode *n5 = MDNode::getIfExists(Context, c1);
+  MDNode *n6 = MDNode::getIfExists(Context, c2);
+  EXPECT_NE(n1, n2);
+#ifdef ENABLE_MDNODE_UNIQUING
+  EXPECT_EQ(n1, n3);
+#else
+  (void) n3;
+#endif
+  EXPECT_EQ(n4, n1);
+  EXPECT_EQ(n5, n2);
+  EXPECT_EQ(n6, (Value*)0);
+
+  EXPECT_EQ(3u, n1->getNumOperands());
+  EXPECT_EQ(s1, n1->getOperand(0));
+  EXPECT_EQ(CI, n1->getOperand(1));
+  EXPECT_EQ(s2, n1->getOperand(2));
+
+  EXPECT_EQ(1u, n2->getNumOperands());
+  EXPECT_EQ(n1, n2->getOperand(0));
+}
+
+TEST_F(MDNodeTest, Delete) {
+  Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
+  Instruction *I = new BitCastInst(C, Type::getInt32Ty(getGlobalContext()));
+
+  Value *const V = I;
+  MDNode *n = MDNode::get(Context, V);
+  WeakVH wvh = n;
+
+  EXPECT_EQ(n, wvh);
+
+  delete I;
+}
+
+TEST(NamedMDNodeTest, Search) {
+  LLVMContext Context;
+  Constant *C = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  Constant *C2 = ConstantInt::get(Type::getInt32Ty(Context), 2);
+
+  Value *const V = C;
+  Value *const V2 = C2;
+  MDNode *n = MDNode::get(Context, V);
+  MDNode *n2 = MDNode::get(Context, V2);
+
+  Module M("MyModule", Context);
+  const char *Name = "llvm.NMD1";
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata(Name);
+  NMD->addOperand(n);
+  NMD->addOperand(n2);
+
+  std::string Str;
+  raw_string_ostream oss(Str);
+  NMD->print(oss);
+  EXPECT_STREQ("!llvm.NMD1 = !{!0, !1}\n",
+               oss.str().c_str());
+}
+}
diff --git a/unittests/IR/PassManagerTest.cpp b/unittests/IR/PassManagerTest.cpp
new file mode 100644
index 000000000000..1097da61b9d9
--- /dev/null
+++ b/unittests/IR/PassManagerTest.cpp
@@ -0,0 +1,552 @@
+//===- llvm/unittest/IR/PassManager.cpp - PassManager unit tests ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeModuleNDMPass(PassRegistry&);
+  void initializeFPassPass(PassRegistry&);
+  void initializeCGPassPass(PassRegistry&);
+  void initializeLPassPass(PassRegistry&);
+  void initializeBPassPass(PassRegistry&);
+
+  namespace {
+    // ND = no deps
+    // NM = no modifications
+    struct ModuleNDNM: public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleNDNM() : ModulePass(ID) { }
+      virtual bool runOnModule(Module &M) {
+        run++;
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.setPreservesAll();
+      }
+    };
+    char ModuleNDNM::ID=0;
+    char ModuleNDNM::run=0;
+
+    struct ModuleNDM : public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleNDM() : ModulePass(ID) {}
+      virtual bool runOnModule(Module &M) {
+        run++;
+        return true;
+      }
+    };
+    char ModuleNDM::ID=0;
+    char ModuleNDM::run=0;
+
+    struct ModuleNDM2 : public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleNDM2() : ModulePass(ID) {}
+      virtual bool runOnModule(Module &M) {
+        run++;
+        return true;
+      }
+    };
+    char ModuleNDM2::ID=0;
+    char ModuleNDM2::run=0;
+
+    struct ModuleDNM : public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleDNM() : ModulePass(ID) {
+        initializeModuleNDMPass(*PassRegistry::getPassRegistry());
+      }
+      virtual bool runOnModule(Module &M) {
+        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
+        run++;
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.addRequired<ModuleNDM>();
+        AU.setPreservesAll();
+      }
+    };
+    char ModuleDNM::ID=0;
+    char ModuleDNM::run=0;
+
+    template<typename P>
+    struct PassTestBase : public P {
+    protected:
+      static int runc;
+      static bool initialized;
+      static bool finalized;
+      int allocated;
+      void run() {
+        EXPECT_TRUE(initialized);
+        EXPECT_FALSE(finalized);
+        EXPECT_EQ(0, allocated);
+        allocated++;
+        runc++;
+      }
+    public:
+      static char ID;
+      static void finishedOK(int run) {
+        EXPECT_GT(runc, 0);
+        EXPECT_TRUE(initialized);
+        EXPECT_TRUE(finalized);
+        EXPECT_EQ(run, runc);
+      }
+      PassTestBase() : P(ID), allocated(0) {
+        initialized = false;
+        finalized = false;
+        runc = 0;
+      }
+
+      virtual void releaseMemory() {
+        EXPECT_GT(runc, 0);
+        EXPECT_GT(allocated, 0);
+        allocated--;
+      }
+    };
+    template<typename P> char PassTestBase<P>::ID;
+    template<typename P> int PassTestBase<P>::runc;
+    template<typename P> bool PassTestBase<P>::initialized;
+    template<typename P> bool PassTestBase<P>::finalized;
+
+    template<typename T, typename P>
+    struct PassTest : public PassTestBase<P> {
+    public:
+#ifndef _MSC_VER // MSVC complains that Pass is not base class.
+      using llvm::Pass::doInitialization;
+      using llvm::Pass::doFinalization;
+#endif
+      virtual bool doInitialization(T &t) {
+        EXPECT_FALSE(PassTestBase<P>::initialized);
+        PassTestBase<P>::initialized = true;
+        return false;
+      }
+      virtual bool doFinalization(T &t) {
+        EXPECT_FALSE(PassTestBase<P>::finalized);
+        PassTestBase<P>::finalized = true;
+        EXPECT_EQ(0, PassTestBase<P>::allocated);
+        return false;
+      }
+    };
+
+    struct CGPass : public PassTest<CallGraph, CallGraphSCCPass> {
+    public:
+      CGPass() {
+        initializeCGPassPass(*PassRegistry::getPassRegistry());
+      }
+      virtual bool runOnSCC(CallGraphSCC &SCMM) {
+        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
+        run();
+        return false;
+      }
+    };
+
+    struct FPass : public PassTest<Module, FunctionPass> {
+    public:
+      virtual bool runOnFunction(Function &F) {
+        // FIXME: PR4112
+        // EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
+        run();
+        return false;
+      }
+    };
+
+    struct LPass : public PassTestBase<LoopPass> {
+    private:
+      static int initcount;
+      static int fincount;
+    public:
+      LPass() {
+        initializeLPassPass(*PassRegistry::getPassRegistry());
+        initcount = 0; fincount=0;
+        EXPECT_FALSE(initialized);
+      }
+      static void finishedOK(int run, int finalized) {
+        PassTestBase<LoopPass>::finishedOK(run);
+        EXPECT_EQ(run, initcount);
+        EXPECT_EQ(finalized, fincount);
+      }
+      using llvm::Pass::doInitialization;
+      using llvm::Pass::doFinalization;
+      virtual bool doInitialization(Loop* L, LPPassManager &LPM) {
+        initialized = true;
+        initcount++;
+        return false;
+      }
+      virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
+        run();
+        return false;
+      }
+      virtual bool doFinalization() {
+        fincount++;
+        finalized = true;
+        return false;
+      }
+    };
+    int LPass::initcount=0;
+    int LPass::fincount=0;
+
+    struct BPass : public PassTestBase<BasicBlockPass> {
+    private:
+      static int inited;
+      static int fin;
+    public:
+      static void finishedOK(int run, int N) {
+        PassTestBase<BasicBlockPass>::finishedOK(run);
+        EXPECT_EQ(inited, N);
+        EXPECT_EQ(fin, N);
+      }
+      BPass() {
+        inited = 0;
+        fin = 0;
+      }
+      virtual bool doInitialization(Module &M) {
+        EXPECT_FALSE(initialized);
+        initialized = true;
+        return false;
+      }
+      virtual bool doInitialization(Function &F) {
+        inited++;
+        return false;
+      }
+      virtual bool runOnBasicBlock(BasicBlock &BB) {
+        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
+        run();
+        return false;
+      }
+      virtual bool doFinalization(Function &F) {
+        fin++;
+        return false;
+      }
+      virtual bool doFinalization(Module &M) {
+        EXPECT_FALSE(finalized);
+        finalized = true;
+        EXPECT_EQ(0, allocated);
+        return false;
+      }
+    };
+    int BPass::inited=0;
+    int BPass::fin=0;
+
+    struct OnTheFlyTest: public ModulePass {
+    public:
+      static char ID;
+      OnTheFlyTest() : ModulePass(ID) {
+        initializeFPassPass(*PassRegistry::getPassRegistry());
+      }
+      virtual bool runOnModule(Module &M) {
+        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
+        for (Module::iterator I=M.begin(),E=M.end(); I != E; ++I) {
+          Function &F = *I;
+          {
+            SCOPED_TRACE("Running on the fly function pass");
+            getAnalysis<FPass>(F);
+          }
+        }
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.addRequired<FPass>();
+      }
+    };
+    char OnTheFlyTest::ID=0;
+
+    TEST(PassManager, RunOnce) {
+      Module M("test-once", getGlobalContext());
+      struct ModuleNDNM *mNDNM = new ModuleNDNM();
+      struct ModuleDNM *mDNM = new ModuleDNM();
+      struct ModuleNDM *mNDM = new ModuleNDM();
+      struct ModuleNDM2 *mNDM2 = new ModuleNDM2();
+
+      mNDM->run = mNDNM->run = mDNM->run = mNDM2->run = 0;
+
+      PassManager Passes;
+      Passes.add(new DataLayout(&M));
+      Passes.add(mNDM2);
+      Passes.add(mNDM);
+      Passes.add(mNDNM);
+      Passes.add(mDNM);
+
+      Passes.run(M);
+      // each pass must be run exactly once, since nothing invalidates them
+      EXPECT_EQ(1, mNDM->run);
+      EXPECT_EQ(1, mNDNM->run);
+      EXPECT_EQ(1, mDNM->run);
+      EXPECT_EQ(1, mNDM2->run);
+    }
+
+    TEST(PassManager, ReRun) {
+      Module M("test-rerun", getGlobalContext());
+      struct ModuleNDNM *mNDNM = new ModuleNDNM();
+      struct ModuleDNM *mDNM = new ModuleDNM();
+      struct ModuleNDM *mNDM = new ModuleNDM();
+      struct ModuleNDM2 *mNDM2 = new ModuleNDM2();
+
+      mNDM->run = mNDNM->run = mDNM->run = mNDM2->run = 0;
+
+      PassManager Passes;
+      Passes.add(new DataLayout(&M));
+      Passes.add(mNDM);
+      Passes.add(mNDNM);
+      Passes.add(mNDM2);// invalidates mNDM needed by mDNM
+      Passes.add(mDNM);
+
+      Passes.run(M);
+      // Some passes must be rerun because a pass that modified the
+      // module/function was run in between
+      EXPECT_EQ(2, mNDM->run);
+      EXPECT_EQ(1, mNDNM->run);
+      EXPECT_EQ(1, mNDM2->run);
+      EXPECT_EQ(1, mDNM->run);
+    }
+
+    Module* makeLLVMModule();
+
+    template<typename T>
+    void MemoryTestHelper(int run) {
+      OwningPtr<Module> M(makeLLVMModule());
+      T *P = new T();
+      PassManager Passes;
+      Passes.add(new DataLayout(M.get()));
+      Passes.add(P);
+      Passes.run(*M);
+      T::finishedOK(run);
+    }
+
+    template<typename T>
+    void MemoryTestHelper(int run, int N) {
+      Module *M = makeLLVMModule();
+      T *P = new T();
+      PassManager Passes;
+      Passes.add(new DataLayout(M));
+      Passes.add(P);
+      Passes.run(*M);
+      T::finishedOK(run, N);
+      delete M;
+    }
+
+    TEST(PassManager, Memory) {
+      // SCC#1: test1->test2->test3->test1
+      // SCC#2: test4
+      // SCC#3: indirect call node
+      {
+        SCOPED_TRACE("Callgraph pass");
+        MemoryTestHelper<CGPass>(3);
+      }
+
+      {
+        SCOPED_TRACE("Function pass");
+        MemoryTestHelper<FPass>(4);// 4 functions
+      }
+
+      {
+        SCOPED_TRACE("Loop pass");
+        MemoryTestHelper<LPass>(2, 1); //2 loops, 1 function
+      }
+      {
+        SCOPED_TRACE("Basic block pass");
+        MemoryTestHelper<BPass>(7, 4); //9 basic blocks
+      }
+
+    }
+
+    TEST(PassManager, MemoryOnTheFly) {
+      Module *M = makeLLVMModule();
+      {
+        SCOPED_TRACE("Running OnTheFlyTest");
+        struct OnTheFlyTest *O = new OnTheFlyTest();
+        PassManager Passes;
+        Passes.add(new DataLayout(M));
+        Passes.add(O);
+        Passes.run(*M);
+
+        FPass::finishedOK(4);
+      }
+      delete M;
+    }
+
+    Module* makeLLVMModule() {
+      // Module Construction
+      Module* mod = new Module("test-mem", getGlobalContext());
+      mod->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
+                         "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-"
+                         "a0:0:64-s0:64:64-f80:128:128");
+      mod->setTargetTriple("x86_64-unknown-linux-gnu");
+
+      // Type Definitions
+      std::vector<Type*>FuncTy_0_args;
+      FunctionType* FuncTy_0 = FunctionType::get(
+        /*Result=*/IntegerType::get(getGlobalContext(), 32),
+        /*Params=*/FuncTy_0_args,
+        /*isVarArg=*/false);
+
+      std::vector<Type*>FuncTy_2_args;
+      FuncTy_2_args.push_back(IntegerType::get(getGlobalContext(), 1));
+      FunctionType* FuncTy_2 = FunctionType::get(
+        /*Result=*/Type::getVoidTy(getGlobalContext()),
+        /*Params=*/FuncTy_2_args,
+        /*isVarArg=*/false);
+
+
+      // Function Declarations
+
+      Function* func_test1 = Function::Create(
+        /*Type=*/FuncTy_0,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test1", mod);
+      func_test1->setCallingConv(CallingConv::C);
+      AttributeSet func_test1_PAL;
+      func_test1->setAttributes(func_test1_PAL);
+
+      Function* func_test2 = Function::Create(
+        /*Type=*/FuncTy_0,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test2", mod);
+      func_test2->setCallingConv(CallingConv::C);
+      AttributeSet func_test2_PAL;
+      func_test2->setAttributes(func_test2_PAL);
+
+      Function* func_test3 = Function::Create(
+        /*Type=*/FuncTy_0,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test3", mod);
+      func_test3->setCallingConv(CallingConv::C);
+      AttributeSet func_test3_PAL;
+      func_test3->setAttributes(func_test3_PAL);
+
+      Function* func_test4 = Function::Create(
+        /*Type=*/FuncTy_2,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test4", mod);
+      func_test4->setCallingConv(CallingConv::C);
+      AttributeSet func_test4_PAL;
+      func_test4->setAttributes(func_test4_PAL);
+
+      // Global Variable Declarations
+
+
+      // Constant Definitions
+
+      // Global Variable Definitions
+
+      // Function Definitions
+
+      // Function: test1 (func_test1)
+      {
+
+        BasicBlock* label_entry = BasicBlock::Create(getGlobalContext(), "entry",func_test1,0);
+
+        // Block entry (label_entry)
+        CallInst* int32_3 = CallInst::Create(func_test2, "", label_entry);
+        int32_3->setCallingConv(CallingConv::C);
+        int32_3->setTailCall(false);AttributeSet int32_3_PAL;
+        int32_3->setAttributes(int32_3_PAL);
+
+        ReturnInst::Create(getGlobalContext(), int32_3, label_entry);
+
+      }
+
+      // Function: test2 (func_test2)
+      {
+
+        BasicBlock* label_entry_5 = BasicBlock::Create(getGlobalContext(), "entry",func_test2,0);
+
+        // Block entry (label_entry_5)
+        CallInst* int32_6 = CallInst::Create(func_test3, "", label_entry_5);
+        int32_6->setCallingConv(CallingConv::C);
+        int32_6->setTailCall(false);AttributeSet int32_6_PAL;
+        int32_6->setAttributes(int32_6_PAL);
+
+        ReturnInst::Create(getGlobalContext(), int32_6, label_entry_5);
+
+      }
+
+      // Function: test3 (func_test3)
+      {
+
+        BasicBlock* label_entry_8 = BasicBlock::Create(getGlobalContext(), "entry",func_test3,0);
+
+        // Block entry (label_entry_8)
+        CallInst* int32_9 = CallInst::Create(func_test1, "", label_entry_8);
+        int32_9->setCallingConv(CallingConv::C);
+        int32_9->setTailCall(false);AttributeSet int32_9_PAL;
+        int32_9->setAttributes(int32_9_PAL);
+
+        ReturnInst::Create(getGlobalContext(), int32_9, label_entry_8);
+
+      }
+
+      // Function: test4 (func_test4)
+      {
+        Function::arg_iterator args = func_test4->arg_begin();
+        Value* int1_f = args++;
+        int1_f->setName("f");
+
+        BasicBlock* label_entry_11 = BasicBlock::Create(getGlobalContext(), "entry",func_test4,0);
+        BasicBlock* label_bb = BasicBlock::Create(getGlobalContext(), "bb",func_test4,0);
+        BasicBlock* label_bb1 = BasicBlock::Create(getGlobalContext(), "bb1",func_test4,0);
+        BasicBlock* label_return = BasicBlock::Create(getGlobalContext(), "return",func_test4,0);
+
+        // Block entry (label_entry_11)
+        BranchInst::Create(label_bb, label_entry_11);
+
+        // Block bb (label_bb)
+        BranchInst::Create(label_bb, label_bb1, int1_f, label_bb);
+
+        // Block bb1 (label_bb1)
+        BranchInst::Create(label_bb1, label_return, int1_f, label_bb1);
+
+        // Block return (label_return)
+        ReturnInst::Create(getGlobalContext(), label_return);
+
+      }
+      return mod;
+    }
+
+  }
+}
+
+INITIALIZE_PASS(ModuleNDM, "mndm", "mndm", false, false)
+INITIALIZE_PASS_BEGIN(CGPass, "cgp","cgp", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(CGPass, "cgp","cgp", false, false)
+INITIALIZE_PASS(FPass, "fp","fp", false, false)
+INITIALIZE_PASS_BEGIN(LPass, "lp","lp", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LPass, "lp","lp", false, false)
+INITIALIZE_PASS(BPass, "bp","bp", false, false)
diff --git a/unittests/IR/TypeBuilderTest.cpp b/unittests/IR/TypeBuilderTest.cpp
new file mode 100644
index 000000000000..be493cdc6396
--- /dev/null
+++ b/unittests/IR/TypeBuilderTest.cpp
@@ -0,0 +1,253 @@
+//===- llvm/unittest/TypeBuilderTest.cpp - TypeBuilder tests --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/TypeBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(TypeBuilderTest, Void) {
+  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, false>::get(getGlobalContext())));
+  // Special cases for C compatibility:
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<void*, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const void*, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<volatile void*, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const volatile void*, false>::get(
+              getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, HostIntegers) {
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<uint8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<int16_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<uint16_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<int32_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<uint32_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<int64_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<uint64_t, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(size_t) * CHAR_BIT),
+            (TypeBuilder<size_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(ptrdiff_t) * CHAR_BIT),
+            (TypeBuilder<ptrdiff_t, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, CrossCompilableIntegers) {
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, true>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, false>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, true>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Float) {
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<float, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<double, false>::get(getGlobalContext())));
+  // long double isn't supported yet.
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Derived) {
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<int8_t**, false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<int8_t[7], false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<int8_t[], false>::get(getGlobalContext())));
+
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<types::i<8>**, false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<types::i<8>[7], false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<types::i<8>[], false>::get(getGlobalContext())));
+
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<types::i<8>**, true>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<types::i<8>[7], true>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<types::i<8>[], true>::get(getGlobalContext())));
+
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile int8_t, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const types::i<8>, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile types::i<8>, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile types::i<8>, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const types::i<8>, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile types::i<8>, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile types::i<8>, true>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const volatile int8_t*const volatile, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Functions) {
+  std::vector<Type*> params;
+  EXPECT_EQ(FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false),
+            (TypeBuilder<void(), true>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<int32_t*, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(const int32_t*), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(const int32_t*, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char*, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char, char), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, char, ...),
+                         false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char, char, char),
+                         false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, char, char, ...),
+                         false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Context) {
+  // We used to cache TypeBuilder results in static local variables.  This
+  // produced the same type for different contexts, which of course broke
+  // things.
+  LLVMContext context1;
+  EXPECT_EQ(&context1,
+            &(TypeBuilder<types::i<1>, true>::get(context1))->getContext());
+  LLVMContext context2;
+  EXPECT_EQ(&context2,
+            &(TypeBuilder<types::i<1>, true>::get(context2))->getContext());
+}
+
+struct MyType {
+  int a;
+  int *b;
+  void *array[1];
+};
+
+struct MyPortableType {
+  int32_t a;
+  int32_t *b;
+  void *array[1];
+};
+
+}  // anonymous namespace
+
+namespace llvm {
+template<bool cross> class TypeBuilder<MyType, cross> {
+public:
+  static StructType *get(LLVMContext &Context) {
+    // Using the static result variable ensures that the type is
+    // only looked up once.
+    std::vector<Type*> st;
+    st.push_back(TypeBuilder<int, cross>::get(Context));
+    st.push_back(TypeBuilder<int*, cross>::get(Context));
+    st.push_back(TypeBuilder<void*[], cross>::get(Context));
+    static StructType *const result = StructType::get(Context, st);
+    return result;
+  }
+
+  // You may find this a convenient place to put some constants
+  // to help with getelementptr.  They don't have any effect on
+  // the operation of TypeBuilder.
+  enum Fields {
+    FIELD_A,
+    FIELD_B,
+    FIELD_ARRAY
+  };
+};
+
+template<bool cross> class TypeBuilder<MyPortableType, cross> {
+public:
+  static StructType *get(LLVMContext &Context) {
+    // Using the static result variable ensures that the type is
+    // only looked up once.
+    std::vector<Type*> st;
+    st.push_back(TypeBuilder<types::i<32>, cross>::get(Context));
+    st.push_back(TypeBuilder<types::i<32>*, cross>::get(Context));
+    st.push_back(TypeBuilder<types::i<8>*[], cross>::get(Context));
+    static StructType *const result = StructType::get(Context, st);
+    return result;
+  }
+
+  // You may find this a convenient place to put some constants
+  // to help with getelementptr.  They don't have any effect on
+  // the operation of TypeBuilder.
+  enum Fields {
+    FIELD_A,
+    FIELD_B,
+    FIELD_ARRAY
+  };
+};
+}  // namespace llvm
+namespace {
+
+TEST(TypeBuilderTest, Extensions) {
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
+                                     TypeBuilder<int, false>::get(getGlobalContext()),
+                                     TypeBuilder<int*, false>::get(getGlobalContext()),
+                                     TypeBuilder<void*[], false>::get(getGlobalContext()),
+                                     (void*)0)),
+            (TypeBuilder<MyType*, false>::get(getGlobalContext())));
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
+                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
+                                     (void*)0)),
+            (TypeBuilder<MyPortableType*, false>::get(getGlobalContext())));
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
+                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
+                                     (void*)0)),
+            (TypeBuilder<MyPortableType*, true>::get(getGlobalContext())));
+}
+
+}  // anonymous namespace
diff --git a/unittests/IR/TypesTest.cpp b/unittests/IR/TypesTest.cpp
new file mode 100644
index 000000000000..2cee640a13d3
--- /dev/null
+++ b/unittests/IR/TypesTest.cpp
@@ -0,0 +1,30 @@
+//===- llvm/unittest/IR/TypesTest.cpp - Type unit tests -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+
+TEST(TypesTest, StructType) {
+  LLVMContext C;
+
+  // PR13522
+  StructType *Struct = StructType::create(C, "FooBar");
+  EXPECT_EQ("FooBar", Struct->getName());
+  Struct->setName(Struct->getName().substr(0, 3));
+  EXPECT_EQ("Foo", Struct->getName());
+  Struct->setName("");
+  EXPECT_TRUE(Struct->getName().empty());
+  EXPECT_FALSE(Struct->hasName());
+}
+
+}  // end anonymous namespace
diff --git a/unittests/IR/ValueMapTest.cpp b/unittests/IR/ValueMapTest.cpp
new file mode 100644
index 000000000000..5aaf90583628
--- /dev/null
+++ b/unittests/IR/ValueMapTest.cpp
@@ -0,0 +1,294 @@
+//===- llvm/unittest/ADT/ValueMapTest.cpp - ValueMap unit tests -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture
+template<typename T>
+class ValueMapTest : public testing::Test {
+protected:
+  Constant *ConstantV;
+  OwningPtr<BitCastInst> BitcastV;
+  OwningPtr<BinaryOperator> AddV;
+
+  ValueMapTest() :
+    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
+    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))),
+    AddV(BinaryOperator::CreateAdd(ConstantV, ConstantV)) {
+  }
+};
+
+// Run everything on Value*, a subtype to make sure that casting works as
+// expected, and a const subtype to make sure we cast const correctly.
+typedef ::testing::Types<Value, Instruction, const Instruction> KeyTypes;
+TYPED_TEST_CASE(ValueMapTest, KeyTypes);
+
+TYPED_TEST(ValueMapTest, Null) {
+  ValueMap<TypeParam*, int> VM1;
+  VM1[NULL] = 7;
+  EXPECT_EQ(7, VM1.lookup(NULL));
+}
+
+TYPED_TEST(ValueMapTest, FollowsValue) {
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 7;
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(7, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  this->AddV.reset();
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(0U, VM.size());
+}
+
+TYPED_TEST(ValueMapTest, OperationsWork) {
+  ValueMap<TypeParam*, int> VM;
+  ValueMap<TypeParam*, int> VM2(16);  (void)VM2;
+  typename ValueMapConfig<TypeParam*>::ExtraData Data;
+  ValueMap<TypeParam*, int> VM3(Data, 16);  (void)VM3;
+  EXPECT_TRUE(VM.empty());
+
+  VM[this->BitcastV.get()] = 7;
+
+  // Find:
+  typename ValueMap<TypeParam*, int>::iterator I =
+    VM.find(this->BitcastV.get());
+  ASSERT_TRUE(I != VM.end());
+  EXPECT_EQ(this->BitcastV.get(), I->first);
+  EXPECT_EQ(7, I->second);
+  EXPECT_TRUE(VM.find(this->AddV.get()) == VM.end());
+
+  // Const find:
+  const ValueMap<TypeParam*, int> &CVM = VM;
+  typename ValueMap<TypeParam*, int>::const_iterator CI =
+    CVM.find(this->BitcastV.get());
+  ASSERT_TRUE(CI != CVM.end());
+  EXPECT_EQ(this->BitcastV.get(), CI->first);
+  EXPECT_EQ(7, CI->second);
+  EXPECT_TRUE(CVM.find(this->AddV.get()) == CVM.end());
+
+  // Insert:
+  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult1 =
+    VM.insert(std::make_pair(this->AddV.get(), 3));
+  EXPECT_EQ(this->AddV.get(), InsertResult1.first->first);
+  EXPECT_EQ(3, InsertResult1.first->second);
+  EXPECT_TRUE(InsertResult1.second);
+  EXPECT_EQ(true, VM.count(this->AddV.get()));
+  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult2 =
+    VM.insert(std::make_pair(this->AddV.get(), 5));
+  EXPECT_EQ(this->AddV.get(), InsertResult2.first->first);
+  EXPECT_EQ(3, InsertResult2.first->second);
+  EXPECT_FALSE(InsertResult2.second);
+
+  // Erase:
+  VM.erase(InsertResult2.first);
+  EXPECT_EQ(0U, VM.count(this->AddV.get()));
+  EXPECT_EQ(1U, VM.count(this->BitcastV.get()));
+  VM.erase(this->BitcastV.get());
+  EXPECT_EQ(0U, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(0U, VM.size());
+
+  // Range insert:
+  SmallVector<std::pair<Instruction*, int>, 2> Elems;
+  Elems.push_back(std::make_pair(this->AddV.get(), 1));
+  Elems.push_back(std::make_pair(this->BitcastV.get(), 2));
+  VM.insert(Elems.begin(), Elems.end());
+  EXPECT_EQ(1, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(2, VM.lookup(this->BitcastV.get()));
+}
+
+template<typename ExpectedType, typename VarType>
+void CompileAssertHasType(VarType) {
+  typedef char assert[is_same<ExpectedType, VarType>::value ? 1 : -1];
+}
+
+TYPED_TEST(ValueMapTest, Iteration) {
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 2;
+  VM[this->AddV.get()] = 3;
+  size_t size = 0;
+  for (typename ValueMap<TypeParam*, int>::iterator I = VM.begin(), E = VM.end();
+       I != E; ++I) {
+    ++size;
+    std::pair<TypeParam*, int> value = *I; (void)value;
+    CompileAssertHasType<TypeParam*>(I->first);
+    if (I->second == 2) {
+      EXPECT_EQ(this->BitcastV.get(), I->first);
+      I->second = 5;
+    } else if (I->second == 3) {
+      EXPECT_EQ(this->AddV.get(), I->first);
+      I->second = 6;
+    } else {
+      ADD_FAILURE() << "Iterated through an extra value.";
+    }
+  }
+  EXPECT_EQ(2U, size);
+  EXPECT_EQ(5, VM[this->BitcastV.get()]);
+  EXPECT_EQ(6, VM[this->AddV.get()]);
+
+  size = 0;
+  // Cast to const ValueMap to avoid a bug in DenseMap's iterators.
+  const ValueMap<TypeParam*, int>& CVM = VM;
+  for (typename ValueMap<TypeParam*, int>::const_iterator I = CVM.begin(),
+         E = CVM.end(); I != E; ++I) {
+    ++size;
+    std::pair<TypeParam*, int> value = *I;  (void)value;
+    CompileAssertHasType<TypeParam*>(I->first);
+    if (I->second == 5) {
+      EXPECT_EQ(this->BitcastV.get(), I->first);
+    } else if (I->second == 6) {
+      EXPECT_EQ(this->AddV.get(), I->first);
+    } else {
+      ADD_FAILURE() << "Iterated through an extra value.";
+    }
+  }
+  EXPECT_EQ(2U, size);
+}
+
+TYPED_TEST(ValueMapTest, DefaultCollisionBehavior) {
+  // By default, we overwrite the old value with the replaced value.
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 7;
+  VM[this->AddV.get()] = 9;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(9, VM.lookup(this->AddV.get()));
+}
+
+TYPED_TEST(ValueMapTest, ConfiguredCollisionBehavior) {
+  // TODO: Implement this when someone needs it.
+}
+
+template<typename KeyT>
+struct LockMutex : ValueMapConfig<KeyT> {
+  struct ExtraData {
+    sys::Mutex *M;
+    bool *CalledRAUW;
+    bool *CalledDeleted;
+  };
+  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
+    *Data.CalledRAUW = true;
+    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+  }
+  static void onDelete(const ExtraData &Data, KeyT Old) {
+    *Data.CalledDeleted = true;
+    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+  }
+  static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
+};
+#if LLVM_ENABLE_THREADS
+TYPED_TEST(ValueMapTest, LocksMutex) {
+  sys::Mutex M(false);  // Not recursive.
+  bool CalledRAUW = false, CalledDeleted = false;
+  typename LockMutex<TypeParam*>::ExtraData Data =
+    {&M, &CalledRAUW, &CalledDeleted};
+  ValueMap<TypeParam*, int, LockMutex<TypeParam*> > VM(Data);
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  this->AddV.reset();
+  EXPECT_TRUE(CalledRAUW);
+  EXPECT_TRUE(CalledDeleted);
+}
+#endif
+
+template<typename KeyT>
+struct NoFollow : ValueMapConfig<KeyT> {
+  enum { FollowRAUW = false };
+};
+
+TYPED_TEST(ValueMapTest, NoFollowRAUW) {
+  ValueMap<TypeParam*, int, NoFollow<TypeParam*> > VM;
+  VM[this->BitcastV.get()] = 7;
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  this->AddV.reset();
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  this->BitcastV.reset();
+  EXPECT_EQ(0, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(0U, VM.size());
+}
+
+template<typename KeyT>
+struct CountOps : ValueMapConfig<KeyT> {
+  struct ExtraData {
+    int *Deletions;
+    int *RAUWs;
+  };
+
+  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
+    ++*Data.RAUWs;
+  }
+  static void onDelete(const ExtraData &Data, KeyT Old) {
+    ++*Data.Deletions;
+  }
+};
+
+TYPED_TEST(ValueMapTest, CallsConfig) {
+  int Deletions = 0, RAUWs = 0;
+  typename CountOps<TypeParam*>::ExtraData Data = {&Deletions, &RAUWs};
+  ValueMap<TypeParam*, int, CountOps<TypeParam*> > VM(Data);
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(0, Deletions);
+  EXPECT_EQ(1, RAUWs);
+  this->AddV.reset();
+  EXPECT_EQ(1, Deletions);
+  EXPECT_EQ(1, RAUWs);
+  this->BitcastV.reset();
+  EXPECT_EQ(1, Deletions);
+  EXPECT_EQ(1, RAUWs);
+}
+
+template<typename KeyT>
+struct ModifyingConfig : ValueMapConfig<KeyT> {
+  // We'll put a pointer here back to the ValueMap this key is in, so
+  // that we can modify it (and clobber *this) before the ValueMap
+  // tries to do the same modification.  In previous versions of
+  // ValueMap, that exploded.
+  typedef ValueMap<KeyT, int, ModifyingConfig<KeyT> > **ExtraData;
+
+  static void onRAUW(ExtraData Map, KeyT Old, KeyT New) {
+    (*Map)->erase(Old);
+  }
+  static void onDelete(ExtraData Map, KeyT Old) {
+    (*Map)->erase(Old);
+  }
+};
+TYPED_TEST(ValueMapTest, SurvivesModificationByConfig) {
+  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > *MapAddress;
+  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > VM(&MapAddress);
+  MapAddress = &VM;
+  // Now the ModifyingConfig can modify the Map inside a callback.
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_FALSE(VM.count(this->BitcastV.get()));
+  EXPECT_FALSE(VM.count(this->AddV.get()));
+  VM[this->AddV.get()] = 7;
+  this->AddV.reset();
+  EXPECT_FALSE(VM.count(this->AddV.get()));
+}
+
+}
diff --git a/unittests/IR/VerifierTest.cpp b/unittests/IR/VerifierTest.cpp
new file mode 100644
index 000000000000..89119368fbd9
--- /dev/null
+++ b/unittests/IR/VerifierTest.cpp
@@ -0,0 +1,64 @@
+//===- llvm/unittest/IR/VerifierTest.cpp - Verifier unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(VerifierTest, Branch_i1) {
+  LLVMContext &C = getGlobalContext();
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false);
+  OwningPtr<Function> F(Function::Create(FTy, GlobalValue::ExternalLinkage));
+  BasicBlock *Entry = BasicBlock::Create(C, "entry", F.get());
+  BasicBlock *Exit = BasicBlock::Create(C, "exit", F.get());
+  ReturnInst::Create(C, Exit);
+
+  // To avoid triggering an assertion in BranchInst::Create, we first create
+  // a branch with an 'i1' condition ...
+
+  Constant *False = ConstantInt::getFalse(C);
+  BranchInst *BI = BranchInst::Create(Exit, Exit, False, Entry);
+
+  // ... then use setOperand to redirect it to a value of different type.
+
+  Constant *Zero32 = ConstantInt::get(IntegerType::get(C, 32), 0);
+  BI->setOperand(0, Zero32);
+
+  EXPECT_TRUE(verifyFunction(*F, ReturnStatusAction));
+}
+
+TEST(VerifierTest, AliasUnnamedAddr) {
+  LLVMContext &C = getGlobalContext();
+  Module M("M", C);
+  Type *Ty = Type::getInt8Ty(C);
+  Constant *Init = Constant::getNullValue(Ty);
+  GlobalVariable *Aliasee = new GlobalVariable(M, Ty, true,
+                                               GlobalValue::ExternalLinkage,
+                                               Init, "foo");
+  GlobalAlias *GA = new GlobalAlias(Type::getInt8PtrTy(C),
+                                    GlobalValue::ExternalLinkage,
+                                    "bar", Aliasee, &M);
+  GA->setUnnamedAddr(true);
+  std::string Error;
+  EXPECT_TRUE(verifyModule(M, ReturnStatusAction, &Error));
+  EXPECT_TRUE(StringRef(Error).startswith("Alias cannot have unnamed_addr"));
+}
+}
+}
diff --git a/unittests/IR/WaymarkTest.cpp b/unittests/IR/WaymarkTest.cpp
new file mode 100644
index 000000000000..cf7d76dffc97
--- /dev/null
+++ b/unittests/IR/WaymarkTest.cpp
@@ -0,0 +1,56 @@
+//===- llvm/unittest/IR/WaymarkTest.cpp - getUser() unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// we perform white-box tests
+//
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+
+namespace llvm {
+namespace {
+
+Constant *char2constant(char c) {
+  return ConstantInt::get(Type::getInt8Ty(getGlobalContext()), c);
+}
+
+
+TEST(WaymarkTest, NativeArray) {
+  static uint8_t tail[22] = "s02s33s30y2y0s1x0syxS";
+  Value * values[22];
+  std::transform(tail, tail + 22, values, char2constant);
+  FunctionType *FT = FunctionType::get(Type::getVoidTy(getGlobalContext()), true);
+  Function *F = Function::Create(FT, GlobalValue::ExternalLinkage);
+  const CallInst *A = CallInst::Create(F, makeArrayRef(values));
+  ASSERT_NE(A, (const CallInst*)NULL);
+  ASSERT_EQ(1U + 22, A->getNumOperands());
+  const Use *U = &A->getOperandUse(0);
+  const Use *Ue = &A->getOperandUse(22);
+  for (; U != Ue; ++U)
+  {
+    EXPECT_EQ(A, U->getUser());
+  }
+  delete A;
+}
+
+TEST(WaymarkTest, TwoBit) {
+  Use* many = (Use*)calloc(sizeof(Use), 8212 + 1);
+  ASSERT_TRUE(many);
+  Use::initTags(many, many + 8212);
+  for (Use *U = many, *Ue = many + 8212 - 1; U != Ue; ++U)
+  {
+    EXPECT_EQ(reinterpret_cast<User *>(Ue + 1), U->getUser());
+  }
+  free(many);
+}
+
+}  // end anonymous namespace
+}  // end namespace llvm
diff --git a/unittests/Makefile b/unittests/Makefile
index 27afccf02e36..926459ac08f8 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ..
 
-PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore Analysis Bitcode
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms IR Analysis Bitcode
 
 include $(LEVEL)/Makefile.common
 
diff --git a/unittests/Option/CMakeLists.txt b/unittests/Option/CMakeLists.txt
new file mode 100644
index 000000000000..185d503912fd
--- /dev/null
+++ b/unittests/Option/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS
+  Option
+  Support
+  )
+
+set(LLVM_TARGET_DEFINITIONS Opts.td)
+
+tablegen(LLVM Opts.inc -gen-opt-parser-defs)
+add_public_tablegen_target(OptsTestTableGen)
+
+add_llvm_unittest(OptionTests
+  OptionParsingTest.cpp
+  )
+
+add_dependencies(OptionTests OptsTestTableGen)
diff --git a/unittests/Option/OptionParsingTest.cpp b/unittests/Option/OptionParsingTest.cpp
new file mode 100644
index 000000000000..30944d9be797
--- /dev/null
+++ b/unittests/Option/OptionParsingTest.cpp
@@ -0,0 +1,106 @@
+//===- unittest/Support/OptionParsingTest.cpp - OptTable tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, FLAGS, PARAM, \
+              HELPTEXT, METAVAR) OPT_##ID,
+#include "Opts.inc"
+  LastOption
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Opts.inc"
+#undef PREFIX
+
+static const OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, FLAGS, PARAM, \
+               HELPTEXT, METAVAR)   \
+  { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, Option::KIND##Class, PARAM, \
+    FLAGS, OPT_##GROUP, OPT_##ALIAS },
+#include "Opts.inc"
+#undef OPTION
+};
+
+namespace {
+class TestOptTable : public OptTable {
+public:
+  TestOptTable()
+    : OptTable(InfoTable, sizeof(InfoTable) / sizeof(InfoTable[0])) {}
+};
+}
+
+const char *Args[] = {
+  "-A",
+  "-Bhi",
+  "--C=desu",
+  "-C", "bye",
+  "-D,adena",
+  "-E", "apple", "bloom",
+  "-Fblarg",
+  "-F", "42",
+  "-Gchuu", "2"
+  };
+
+TEST(Support, OptionParsing) {
+  TestOptTable T;
+  unsigned MAI, MAC;
+  OwningPtr<InputArgList>
+    AL(T.ParseArgs(Args,
+                   Args + (sizeof(Args) / sizeof(Args[0])),
+                   MAI,
+                   MAC));
+
+  // Check they all exist.
+  EXPECT_TRUE(AL->hasArg(OPT_A));
+  EXPECT_TRUE(AL->hasArg(OPT_B));
+  EXPECT_TRUE(AL->hasArg(OPT_C));
+  EXPECT_TRUE(AL->hasArg(OPT_D));
+  EXPECT_TRUE(AL->hasArg(OPT_E));
+  EXPECT_TRUE(AL->hasArg(OPT_F));
+  EXPECT_TRUE(AL->hasArg(OPT_G));
+
+  // Check the values.
+  EXPECT_EQ(AL->getLastArgValue(OPT_B), "hi");
+  EXPECT_EQ(AL->getLastArgValue(OPT_C), "bye");
+  EXPECT_EQ(AL->getLastArgValue(OPT_D), "adena");
+  std::vector<std::string> Es = AL->getAllArgValues(OPT_E);
+  EXPECT_EQ(Es[0], "apple");
+  EXPECT_EQ(Es[1], "bloom");
+  EXPECT_EQ(AL->getLastArgValue(OPT_F), "42");
+  std::vector<std::string> Gs = AL->getAllArgValues(OPT_G);
+  EXPECT_EQ(Gs[0], "chuu");
+  EXPECT_EQ(Gs[1], "2");
+
+  // Check the help text.
+  std::string Help;
+  raw_string_ostream RSO(Help);
+  T.PrintHelp(RSO, "test", "title!");
+  EXPECT_NE(Help.find("-A"), std::string::npos);
+
+  // Test aliases.
+  arg_iterator Cs = AL->filtered_begin(OPT_C);
+  ASSERT_NE(Cs, AL->filtered_end());
+  EXPECT_EQ(StringRef((*Cs)->getValue()), "desu");
+  ArgStringList ASL;
+  (*Cs)->render(*AL, ASL);
+  ASSERT_EQ(ASL.size(), 2u);
+  EXPECT_EQ(StringRef(ASL[0]), "-C");
+  EXPECT_EQ(StringRef(ASL[1]), "desu");
+}
diff --git a/unittests/Option/Opts.td b/unittests/Option/Opts.td
new file mode 100644
index 000000000000..3d6242f5185c
--- /dev/null
+++ b/unittests/Option/Opts.td
@@ -0,0 +1,13 @@
+include "llvm/Option/OptParser.td"
+
+def A : Flag<["-"], "A">, HelpText<"The A option">;
+def B : Joined<["-"], "B">, HelpText<"The B option">, MetaVarName<"B">;
+def C : Separate<["-"], "C">, HelpText<"The C option">, MetaVarName<"C">;
+def D : CommaJoined<["-"], "D">, HelpText<"The D option">, MetaVarName<"D">;
+def E : MultiArg<["-"], "E", 2>;
+def F : JoinedOrSeparate<["-"], "F">, HelpText<"The F option">, MetaVarName<"F">;
+def G : JoinedAndSeparate<["-"], "G">, HelpText<"The G option">, MetaVarName<"G">;
+
+def Ceq : Joined<["-", "--"], "C=">, Alias<C>;
+
+def H : Flag<["-"], "H">, Flags<[HelpHidden]>;
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index f01e6609390c..40f729585728 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -9,19 +9,29 @@
 
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
 
 namespace {
-
 // Disable warnings about questionable type definitions.
 // We're testing that even questionable types work with the alignment utilities.
 #ifdef _MSC_VER
 #pragma warning(disable:4584)
 #endif
 
+// Suppress direct base '{anonymous}::S1' inaccessible in '{anonymous}::D9'
+// due to ambiguity warning.
+//
+// Pragma based warning suppression was introduced in GGC 4.2.  Additionally
+// this warning is "enabled by default".  The warning still appears if -Wall is
+// suppressed.  Apparently GCC suppresses it when -w is specifed, which is odd.
+// At any rate, clang on the other hand gripes about -Wunknown-pragma, so
+// leaving it out of this.
+#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 && !defined(__clang__)
+#pragma GCC diagnostic warning "-w"
+#endif
+
 // Define some fixed alignment types to use in these tests.
 #if __has_feature(cxx_alignas)
 struct alignas(1) A1 { };
@@ -310,6 +320,16 @@ TEST(AlignOfTest, BasicAlignedArray) {
 #ifndef _MSC_VER
   EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArrayUnion<V8>));
 #endif
-}
 
+  EXPECT_EQ(1u, (alignOf<AlignedCharArray<1, 1> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArray<2, 1> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArray<4, 1> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArray<8, 1> >()));
+  EXPECT_EQ(16u, (alignOf<AlignedCharArray<16, 1> >()));
+
+  EXPECT_EQ(1u, sizeof(AlignedCharArray<1, 1>));
+  EXPECT_EQ(7u, sizeof(AlignedCharArray<1, 7>));
+  EXPECT_EQ(2u, sizeof(AlignedCharArray<2, 2>));
+  EXPECT_EQ(16u, sizeof(AlignedCharArray<2, 16>));
+}
 }
diff --git a/unittests/Support/AllocatorTest.cpp b/unittests/Support/AllocatorTest.cpp
index 8b463c11dfca..cb9fa430369b 100644
--- a/unittests/Support/AllocatorTest.cpp
+++ b/unittests/Support/AllocatorTest.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
-
 #include "gtest/gtest.h"
 #include <cstdlib>
 
diff --git a/unittests/Support/ArrayRecyclerTest.cpp b/unittests/Support/ArrayRecyclerTest.cpp
new file mode 100644
index 000000000000..1ff97ba9e2b9
--- /dev/null
+++ b/unittests/Support/ArrayRecyclerTest.cpp
@@ -0,0 +1,109 @@
+//===--- unittest/Support/ArrayRecyclerTest.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/Allocator.h"
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+namespace {
+
+struct Object {
+  int Num;
+  Object *Other;
+};
+typedef ArrayRecycler<Object> ARO;
+
+TEST(ArrayRecyclerTest, Capacity) {
+  // Capacity size should never be 0.
+  ARO::Capacity Cap = ARO::Capacity::get(0);
+  EXPECT_LT(0u, Cap.getSize());
+
+  size_t PrevSize = Cap.getSize();
+  for (unsigned N = 1; N != 100; ++N) {
+    Cap = ARO::Capacity::get(N);
+    EXPECT_LE(N, Cap.getSize());
+    if (PrevSize >= N)
+      EXPECT_EQ(PrevSize, Cap.getSize());
+    else
+      EXPECT_LT(PrevSize, Cap.getSize());
+    PrevSize = Cap.getSize();
+  }
+
+  // Check that the buckets are monotonically increasing.
+  Cap = ARO::Capacity::get(0);
+  PrevSize = Cap.getSize();
+  for (unsigned N = 0; N != 20; ++N) {
+    Cap = Cap.getNext();
+    EXPECT_LT(PrevSize, Cap.getSize());
+    PrevSize = Cap.getSize();
+  }
+}
+
+TEST(ArrayRecyclerTest, Basics) {
+  BumpPtrAllocator Allocator;
+  ArrayRecycler<Object> DUT;
+
+  ARO::Capacity Cap = ARO::Capacity::get(8);
+  Object *A1 = DUT.allocate(Cap, Allocator);
+  A1[0].Num = 21;
+  A1[7].Num = 17;
+
+  Object *A2 = DUT.allocate(Cap, Allocator);
+  A2[0].Num = 121;
+  A2[7].Num = 117;
+
+  Object *A3 = DUT.allocate(Cap, Allocator);
+  A3[0].Num = 221;
+  A3[7].Num = 217;
+
+  EXPECT_EQ(21, A1[0].Num);
+  EXPECT_EQ(17, A1[7].Num);
+  EXPECT_EQ(121, A2[0].Num);
+  EXPECT_EQ(117, A2[7].Num);
+  EXPECT_EQ(221, A3[0].Num);
+  EXPECT_EQ(217, A3[7].Num);
+
+  DUT.deallocate(Cap, A2);
+
+  // Check that deallocation didn't clobber anything.
+  EXPECT_EQ(21, A1[0].Num);
+  EXPECT_EQ(17, A1[7].Num);
+  EXPECT_EQ(221, A3[0].Num);
+  EXPECT_EQ(217, A3[7].Num);
+
+  // Verify recycling.
+  Object *A2x = DUT.allocate(Cap, Allocator);
+  EXPECT_EQ(A2, A2x);
+
+  DUT.deallocate(Cap, A2x);
+  DUT.deallocate(Cap, A1);
+  DUT.deallocate(Cap, A3);
+
+  // Objects are not required to be recycled in reverse deallocation order, but
+  // that is what the current implementation does.
+  Object *A3x = DUT.allocate(Cap, Allocator);
+  EXPECT_EQ(A3, A3x);
+  Object *A1x = DUT.allocate(Cap, Allocator);
+  EXPECT_EQ(A1, A1x);
+  Object *A2y = DUT.allocate(Cap, Allocator);
+  EXPECT_EQ(A2, A2y);
+
+  // Back to allocation from the BumpPtrAllocator.
+  Object *A4 = DUT.allocate(Cap, Allocator);
+  EXPECT_NE(A1, A4);
+  EXPECT_NE(A2, A4);
+  EXPECT_NE(A3, A4);
+
+  DUT.clear(Allocator);
+}
+
+} // end anonymous namespace
diff --git a/unittests/Support/BlockFrequencyTest.cpp b/unittests/Support/BlockFrequencyTest.cpp
index 9c5bd7b89346..ff66bc4e45aa 100644
--- a/unittests/Support/BlockFrequencyTest.cpp
+++ b/unittests/Support/BlockFrequencyTest.cpp
@@ -1,7 +1,6 @@
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
-
+#include "llvm/Support/DataTypes.h"
 #include "gtest/gtest.h"
 #include <climits>
 
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index 09a0ea50d748..b4b982f2ef23 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -6,12 +6,14 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(SupportTests
   AlignOfTest.cpp
   AllocatorTest.cpp
+  ArrayRecyclerTest.cpp
   BlockFrequencyTest.cpp
   Casting.cpp
   CommandLineTest.cpp
   ConstantRangeTest.cpp
   DataExtractorTest.cpp
   EndianTest.cpp
+  ErrorOrTest.cpp
   FileOutputBufferTest.cpp
   IntegersSubsetTest.cpp
   LeakDetectorTest.cpp
@@ -20,10 +22,12 @@ add_llvm_unittest(SupportTests
   MemoryBufferTest.cpp
   MemoryTest.cpp
   Path.cpp
+  ProcessTest.cpp
   RegexTest.cpp
   SwapByteOrderTest.cpp
   TimeValue.cpp
   ValueHandleTest.cpp
+  YAMLIOTest.cpp
   YAMLParserTest.cpp
   formatted_raw_ostream_test.cpp
   raw_ostream_test.cpp
diff --git a/unittests/Support/Casting.cpp b/unittests/Support/Casting.cpp
index ad564aa366df..01583e43e29b 100644
--- a/unittests/Support/Casting.cpp
+++ b/unittests/Support/Casting.cpp
@@ -10,7 +10,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include "gtest/gtest.h"
 #include <cstdlib>
 
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index 13e903858a54..43c8cbd123b4 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -9,11 +9,9 @@
 
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Config/config.h"
-
 #include "gtest/gtest.h"
-
-#include <string>
 #include <stdlib.h>
+#include <string>
 
 using namespace llvm;
 
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
index 263f93c9ff37..4d6bbf6f8402 100644
--- a/unittests/Support/ConstantRangeTest.cpp
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -8,8 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ConstantRange.h"
-#include "llvm/Instructions.h"
-
+#include "llvm/IR/Instructions.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/EndianTest.cpp b/unittests/Support/EndianTest.cpp
index 6fe0247d46b0..8f9355306383 100644
--- a/unittests/Support/EndianTest.cpp
+++ b/unittests/Support/EndianTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/DataTypes.h"
+#include "gtest/gtest.h"
 #include <cstdlib>
 #include <ctime>
 using namespace llvm;
@@ -21,36 +21,36 @@ namespace {
 
 TEST(Endian, Read) {
   // These are 5 bytes so we can be sure at least one of the reads is unaligned.
-  unsigned char big[] = {0x00, 0x01, 0x02, 0x03, 0x04};
-  unsigned char little[] = {0x00, 0x04, 0x03, 0x02, 0x01};
+  unsigned char bigval[] = {0x00, 0x01, 0x02, 0x03, 0x04};
+  unsigned char littleval[] = {0x00, 0x04, 0x03, 0x02, 0x01};
   int32_t BigAsHost = 0x00010203;
-  EXPECT_EQ(BigAsHost, (endian::read_be<int32_t, unaligned>(big)));
+  EXPECT_EQ(BigAsHost, (endian::read<int32_t, big, unaligned>(bigval)));
   int32_t LittleAsHost = 0x02030400;
-  EXPECT_EQ(LittleAsHost, (endian::read_le<int32_t, unaligned>(little)));
+  EXPECT_EQ(LittleAsHost,(endian::read<int32_t, little, unaligned>(littleval)));
 
-  EXPECT_EQ((endian::read_be<int32_t, unaligned>(big + 1)),
-            (endian::read_le<int32_t, unaligned>(little + 1)));
+  EXPECT_EQ((endian::read<int32_t, big, unaligned>(bigval + 1)),
+            (endian::read<int32_t, little, unaligned>(littleval + 1)));
 }
 
 TEST(Endian, Write) {
   unsigned char data[5];
-  endian::write_be<int32_t, unaligned>(data, -1362446643);
+  endian::write<int32_t, big, unaligned>(data, -1362446643);
   EXPECT_EQ(data[0], 0xAE);
   EXPECT_EQ(data[1], 0xCA);
   EXPECT_EQ(data[2], 0xB6);
   EXPECT_EQ(data[3], 0xCD);
-  endian::write_be<int32_t, unaligned>(data + 1, -1362446643);
+  endian::write<int32_t, big, unaligned>(data + 1, -1362446643);
   EXPECT_EQ(data[1], 0xAE);
   EXPECT_EQ(data[2], 0xCA);
   EXPECT_EQ(data[3], 0xB6);
   EXPECT_EQ(data[4], 0xCD);
 
-  endian::write_le<int32_t, unaligned>(data, -1362446643);
+  endian::write<int32_t, little, unaligned>(data, -1362446643);
   EXPECT_EQ(data[0], 0xCD);
   EXPECT_EQ(data[1], 0xB6);
   EXPECT_EQ(data[2], 0xCA);
   EXPECT_EQ(data[3], 0xAE);
-  endian::write_le<int32_t, unaligned>(data + 1, -1362446643);
+  endian::write<int32_t, little, unaligned>(data + 1, -1362446643);
   EXPECT_EQ(data[1], 0xCD);
   EXPECT_EQ(data[2], 0xB6);
   EXPECT_EQ(data[3], 0xCA);
@@ -69,4 +69,4 @@ TEST(Endian, PackedEndianSpecificIntegral) {
   EXPECT_EQ(*big_val, *little_val);
 }
 
-}
+} // end anon namespace
diff --git a/unittests/Support/ErrorOrTest.cpp b/unittests/Support/ErrorOrTest.cpp
new file mode 100644
index 000000000000..4853426c9470
--- /dev/null
+++ b/unittests/Support/ErrorOrTest.cpp
@@ -0,0 +1,104 @@
+//===- unittests/ErrorOrTest.cpp - ErrorOr.h tests ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ErrorOr.h"
+
+#include "gtest/gtest.h"
+
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+ErrorOr<int> t1() {return 1;}
+ErrorOr<int> t2() { return errc::invalid_argument; }
+
+TEST(ErrorOr, SimpleValue) {
+  ErrorOr<int> a = t1();
+  EXPECT_TRUE(a);
+  EXPECT_EQ(1, *a);
+
+  a = t2();
+  EXPECT_FALSE(a);
+  EXPECT_EQ(errc::invalid_argument, a);
+#ifdef EXPECT_DEBUG_DEATH
+  EXPECT_DEBUG_DEATH(*a, "Cannot get value when an error exists");
+#endif
+}
+
+#if LLVM_HAS_CXX11_STDLIB
+ErrorOr<std::unique_ptr<int> > t3() {
+  return std::unique_ptr<int>(new int(3));
+}
+#endif
+
+TEST(ErrorOr, Types) {
+  int x;
+  ErrorOr<int&> a(x);
+  *a = 42;
+  EXPECT_EQ(42, x);
+
+  EXPECT_FALSE(ErrorOr<void>(errc::broken_pipe));
+  EXPECT_TRUE(ErrorOr<void>(errc::success));
+
+#if LLVM_HAS_CXX11_STDLIB
+  // Move only types.
+  EXPECT_EQ(3, **t3());
+#endif
+}
+
+struct B {};
+struct D : B {};
+
+TEST(ErrorOr, Covariant) {
+  ErrorOr<B*> b(ErrorOr<D*>(0));
+  b = ErrorOr<D*>(0);
+
+#if LLVM_HAS_CXX11_STDLIB
+  ErrorOr<std::unique_ptr<B> > b1(ErrorOr<std::unique_ptr<D> >(0));
+  b1 = ErrorOr<std::unique_ptr<D> >(0);
+#endif
+}
+} // end anon namespace
+
+struct InvalidArgError {
+  InvalidArgError() {}
+  InvalidArgError(std::string S) : ArgName(S) {}
+  std::string ArgName;
+};
+
+namespace llvm {
+template<>
+struct ErrorOrUserDataTraits<InvalidArgError> : true_type {
+  static error_code error() {
+    return make_error_code(errc::invalid_argument);
+  }
+};
+} // end namespace llvm
+
+ErrorOr<int> t4() {
+  return InvalidArgError("adena");
+}
+
+ErrorOr<void> t5() {
+  return InvalidArgError("pie");
+}
+
+namespace {
+TEST(ErrorOr, UserErrorData) {
+  ErrorOr<int> a = t4();
+  EXPECT_EQ(errc::invalid_argument, a);
+  EXPECT_EQ("adena", t4().getError<InvalidArgError>().ArgName);
+  
+  ErrorOr<void> b = t5();
+  EXPECT_EQ(errc::invalid_argument, b);
+  EXPECT_EQ("pie", b.getError<InvalidArgError>().ArgName);
+}
+} // end anon namespace
diff --git a/unittests/Support/FileOutputBufferTest.cpp b/unittests/Support/FileOutputBufferTest.cpp
index edd350afcf5b..80d724536821 100644
--- a/unittests/Support/FileOutputBufferTest.cpp
+++ b/unittests/Support/FileOutputBufferTest.cpp
@@ -7,13 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -27,13 +26,6 @@ using namespace llvm::sys;
   } else {}
 
 namespace {
-
-
-// NOTE: Temporarily run this test on unix only.  Once the file mapping
-// routines are ported to Windows, this conditional can be removed.
-#if LLVM_ON_UNIX
-
-
 TEST(FileOutputBuffer, Test) {
   // Create unique temporary directory for these tests
   SmallString<128> TestDirectory;
@@ -45,7 +37,7 @@ TEST(FileOutputBuffer, Test) {
     ::close(fd);
     TestDirectory = path::parent_path(TestDirectory);
   }
-     
+
   // TEST 1: Verify commit case.
   SmallString<128> File1(TestDirectory);
 	File1.append("/file1");
@@ -61,7 +53,7 @@ TEST(FileOutputBuffer, Test) {
   }
   // Verify file exists and starts with special header.
   bool MagicMatches = false;
-  ASSERT_NO_ERROR(fs::has_magic(Twine(File1), Twine("AABBCCDDEEFFGGHHIIJJ"), 
+  ASSERT_NO_ERROR(fs::has_magic(Twine(File1), Twine("AABBCCDDEEFFGGHHIIJJ"),
                                                                 MagicMatches));
   EXPECT_TRUE(MagicMatches);
   // Verify file is correct size.
@@ -82,8 +74,7 @@ TEST(FileOutputBuffer, Test) {
   // Verify file does not exist (because buffer not commited).
   bool Exists = false;
   ASSERT_NO_ERROR(fs::exists(Twine(File2), Exists));
-  EXPECT_FALSE(Exists);  
-
+  EXPECT_FALSE(Exists);
 
   // TEST 3: Verify sizing down case.
   SmallString<128> File3(TestDirectory);
@@ -100,7 +91,7 @@ TEST(FileOutputBuffer, Test) {
   }
   // Verify file exists and starts with special header.
   bool MagicMatches3 = false;
-  ASSERT_NO_ERROR(fs::has_magic(Twine(File3), Twine("AABBCCDDEEFFGGHHIIJJ"), 
+  ASSERT_NO_ERROR(fs::has_magic(Twine(File3), Twine("AABBCCDDEEFFGGHHIIJJ"),
                                                               MagicMatches3));
   EXPECT_TRUE(MagicMatches3);
   // Verify file is correct size.
@@ -108,13 +99,12 @@ TEST(FileOutputBuffer, Test) {
   ASSERT_NO_ERROR(fs::file_size(Twine(File3), File3Size));
   ASSERT_EQ(File3Size, 5000ULL);
 
-
   // TEST 4: Verify file can be made executable.
   SmallString<128> File4(TestDirectory);
 	File4.append("/file4");
   {
     OwningPtr<FileOutputBuffer> Buffer;
-    ASSERT_NO_ERROR(FileOutputBuffer::create(File4, 8192, Buffer, 
+    ASSERT_NO_ERROR(FileOutputBuffer::create(File4, 8192, Buffer,
                                               FileOutputBuffer::F_executable));
     // Start buffer with special header.
     memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
@@ -131,7 +121,4 @@ TEST(FileOutputBuffer, Test) {
   uint32_t RemovedCount;
   ASSERT_NO_ERROR(fs::remove_all(TestDirectory.str(), RemovedCount));
 }
-
-#endif // LLVM_ON_UNIX
-
 } // anonymous namespace
diff --git a/unittests/Support/IntegersSubsetTest.cpp b/unittests/Support/IntegersSubsetTest.cpp
index 5d1dde4c37a4..f4298bf595aa 100644
--- a/unittests/Support/IntegersSubsetTest.cpp
+++ b/unittests/Support/IntegersSubsetTest.cpp
@@ -7,12 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/APInt.h"
 #include "llvm/Support/IntegersSubset.h" 
+#include "llvm/ADT/APInt.h"
 #include "llvm/Support/IntegersSubsetMapping.h"
-
 #include "gtest/gtest.h"
-
 #include <vector>
 
 using namespace llvm;
diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp
index bfeb0a7b6fba..8ddad38ecf17 100644
--- a/unittests/Support/ManagedStatic.cpp
+++ b/unittests/Support/ManagedStatic.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Threading.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Threading.h"
 #ifdef HAVE_PTHREAD_H
 #include <pthread.h>
 #endif
@@ -19,24 +19,41 @@ using namespace llvm;
 
 namespace {
 
-#ifdef HAVE_PTHREAD_H
+#if defined(HAVE_PTHREAD_H) && !__has_feature(memory_sanitizer)
 namespace test1 {
   llvm::ManagedStatic<int> ms;
   void *helper(void*) {
     *ms;
     return NULL;
   }
+
+  // Valgrind's leak checker complains glibc's stack allocation.
+  // To appease valgrind, we provide our own stack for each thread.
+  void *allocate_stack(pthread_attr_t &a, size_t n = 65536) {
+    void *stack = malloc(n);
+    pthread_attr_init(&a);
+#if defined(__linux__)
+    pthread_attr_setstack(&a, stack, n);
+#endif
+    return stack;
+  }
 }
 
 TEST(Initialize, MultipleThreads) {
   // Run this test under tsan: http://code.google.com/p/data-race-test/
 
+  pthread_attr_t a1, a2;
+  void *p1 = test1::allocate_stack(a1);
+  void *p2 = test1::allocate_stack(a2);
+
   llvm_start_multithreaded();
   pthread_t t1, t2;
-  pthread_create(&t1, NULL, test1::helper, NULL);
-  pthread_create(&t2, NULL, test1::helper, NULL);
+  pthread_create(&t1, &a1, test1::helper, NULL);
+  pthread_create(&t2, &a2, test1::helper, NULL);
   pthread_join(t1, NULL);
   pthread_join(t2, NULL);
+  free(p1);
+  free(p2);
   llvm_stop_multithreaded();
 }
 #endif
diff --git a/unittests/Support/MemoryBufferTest.cpp b/unittests/Support/MemoryBufferTest.cpp
index 6c78cd80e8b5..1d9f482c519d 100644
--- a/unittests/Support/MemoryBufferTest.cpp
+++ b/unittests/Support/MemoryBufferTest.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/MemoryTest.cpp b/unittests/Support/MemoryTest.cpp
index 21cb27eaf0ef..fae67a8dd256 100644
--- a/unittests/Support/MemoryTest.cpp
+++ b/unittests/Support/MemoryTest.cpp
@@ -1,356 +1,357 @@
-//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-
-#include "gtest/gtest.h"
-#include <cstdlib>
-
-using namespace llvm;
-using namespace sys;
-
-namespace {
-
-class MappedMemoryTest : public ::testing::TestWithParam<unsigned> {
-public:
-  MappedMemoryTest() {
-    Flags = GetParam();
-    PageSize = sys::Process::GetPageSize();
-  }
-
-protected:
-  // Adds RW flags to permit testing of the resulting memory
-  unsigned getTestableEquivalent(unsigned RequestedFlags) {
-    switch (RequestedFlags) {
-    case Memory::MF_READ:
-    case Memory::MF_WRITE:
-    case Memory::MF_READ|Memory::MF_WRITE:
-      return Memory::MF_READ|Memory::MF_WRITE;
-    case Memory::MF_READ|Memory::MF_EXEC:
-    case Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC:
-    case Memory::MF_EXEC:
-      return Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC;
-    }
-    // Default in case values are added to the enum, as required by some compilers
-    return Memory::MF_READ|Memory::MF_WRITE;
-  }
-
-  // Returns true if the memory blocks overlap
-  bool doesOverlap(MemoryBlock M1, MemoryBlock M2) {
-    if (M1.base() == M2.base())
-      return true;
-
-    if (M1.base() > M2.base())
-      return (unsigned char *)M2.base() + M2.size() > M1.base();
-
-    return (unsigned char *)M1.base() + M1.size() > M2.base();
-  }
-
-  unsigned Flags;
-  size_t   PageSize;
-};
-
-TEST_P(MappedMemoryTest, AllocAndRelease) {
-  error_code EC;
-  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(sizeof(int), M1.size());
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-}
-
-TEST_P(MappedMemoryTest, MultipleAllocAndRelease) {
-  error_code EC;
-  MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(64, 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(32, 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(16U, M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(64U, M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(32U, M3.size());
-
-  EXPECT_FALSE(doesOverlap(M1, M2));
-  EXPECT_FALSE(doesOverlap(M2, M3));
-  EXPECT_FALSE(doesOverlap(M1, M3));
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-  MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  EXPECT_NE((void*)0, M4.base());
-  EXPECT_LE(16U, M4.size());
-  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, BasicWrite) {
-  // This test applies only to writeable combinations
-  if (Flags && !(Flags & Memory::MF_WRITE))
-    return;
-
-  error_code EC;
-  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(sizeof(int), M1.size());
-
-  int *a = (int*)M1.base();
-  *a = 1;
-  EXPECT_EQ(1, *a);
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-}
-
-TEST_P(MappedMemoryTest, MultipleWrite) {
-  // This test applies only to writeable combinations
-  if (Flags && !(Flags & Memory::MF_WRITE))
-    return;
-  error_code EC;
-  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_FALSE(doesOverlap(M1, M2));
-  EXPECT_FALSE(doesOverlap(M2, M3));
-  EXPECT_FALSE(doesOverlap(M1, M3));
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(1U * sizeof(int), M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(8U * sizeof(int), M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(4U * sizeof(int), M3.size());
-
-  int *x = (int*)M1.base();
-  *x = 1;
-
-  int *y = (int*)M2.base();
-  for (int i = 0; i < 8; i++) {
-    y[i] = i;
-  }
-
-  int *z = (int*)M3.base();
-  *z = 42;
-
-  EXPECT_EQ(1, *x);
-  EXPECT_EQ(7, y[7]);
-  EXPECT_EQ(42, *z);
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-
-  MemoryBlock M4 = Memory::allocateMappedMemory(64 * sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  EXPECT_NE((void*)0, M4.base());
-  EXPECT_LE(64U * sizeof(int), M4.size());
-  x = (int*)M4.base();
-  *x = 4;
-  EXPECT_EQ(4, *x);
-  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
-
-  // Verify that M2 remains unaffected by other activity
-  for (int i = 0; i < 8; i++) {
-    EXPECT_EQ(i, y[i]);
-  }
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, EnabledWrite) {
-  error_code EC;
-  MemoryBlock M1 = Memory::allocateMappedMemory(2 * sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(2U * sizeof(int), M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(8U * sizeof(int), M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(4U * sizeof(int), M3.size());
-
-  EXPECT_FALSE(Memory::protectMappedMemory(M1, getTestableEquivalent(Flags)));
-  EXPECT_FALSE(Memory::protectMappedMemory(M2, getTestableEquivalent(Flags)));
-  EXPECT_FALSE(Memory::protectMappedMemory(M3, getTestableEquivalent(Flags)));
-
-  EXPECT_FALSE(doesOverlap(M1, M2));
-  EXPECT_FALSE(doesOverlap(M2, M3));
-  EXPECT_FALSE(doesOverlap(M1, M3));
-
-  int *x = (int*)M1.base();
-  *x = 1;
-  int *y = (int*)M2.base();
-  for (unsigned int i = 0; i < 8; i++) {
-    y[i] = i;
-  }
-  int *z = (int*)M3.base();
-  *z = 42;
-
-  EXPECT_EQ(1, *x);
-  EXPECT_EQ(7, y[7]);
-  EXPECT_EQ(42, *z);
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-  EXPECT_EQ(6, y[6]);
-
-  MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  EXPECT_NE((void*)0, M4.base());
-  EXPECT_LE(16U, M4.size());
-  EXPECT_EQ(error_code::success(), Memory::protectMappedMemory(M4, getTestableEquivalent(Flags)));
-  x = (int*)M4.base();
-  *x = 4;
-  EXPECT_EQ(4, *x);
-  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, SuccessiveNear) {
-  error_code EC;
-  MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(64, &M1, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(32, &M2, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(16U, M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(64U, M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(32U, M3.size());
-
-  EXPECT_FALSE(doesOverlap(M1, M2));
-  EXPECT_FALSE(doesOverlap(M2, M3));
-  EXPECT_FALSE(doesOverlap(M1, M3));
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, DuplicateNear) {
-  error_code EC;
-  MemoryBlock Near((void*)(3*PageSize), 16);
-  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(16U, M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(64U, M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(32U, M3.size());
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, ZeroNear) {
-  error_code EC;
-  MemoryBlock Near(0, 0);
-  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(16U, M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(64U, M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(32U, M3.size());
-
-  EXPECT_FALSE(doesOverlap(M1, M2));
-  EXPECT_FALSE(doesOverlap(M2, M3));
-  EXPECT_FALSE(doesOverlap(M1, M3));
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, ZeroSizeNear) {
-  error_code EC;
-  MemoryBlock Near((void*)(4*PageSize), 0);
-  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(16U, M1.size());
-  EXPECT_NE((void*)0, M2.base());
-  EXPECT_LE(64U, M2.size());
-  EXPECT_NE((void*)0, M3.base());
-  EXPECT_LE(32U, M3.size());
-
-  EXPECT_FALSE(doesOverlap(M1, M2));
-  EXPECT_FALSE(doesOverlap(M2, M3));
-  EXPECT_FALSE(doesOverlap(M1, M3));
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, UnalignedNear) {
-  error_code EC;
-  MemoryBlock Near((void*)(2*PageSize+5), 0);
-  MemoryBlock M1 = Memory::allocateMappedMemory(15, &Near, Flags, EC);
-  EXPECT_EQ(error_code::success(), EC);
-
-  EXPECT_NE((void*)0, M1.base());
-  EXPECT_LE(sizeof(int), M1.size());
-
-  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-}
-
-// Note that Memory::MF_WRITE is not supported exclusively across
-// operating systems and architectures and can imply MF_READ|MF_WRITE
-unsigned MemoryFlags[] = {
-                           Memory::MF_READ,
-                           Memory::MF_WRITE,
-                           Memory::MF_READ|Memory::MF_WRITE,
-                           Memory::MF_EXEC,
-                           Memory::MF_READ|Memory::MF_EXEC,
-                           Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC
-                         };
-
-INSTANTIATE_TEST_CASE_P(AllocationTests,
-                        MappedMemoryTest,
-                        ::testing::ValuesIn(MemoryFlags));
-
-}  // anonymous namespace
+//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
+//
+//		       The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Process.h"
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+using namespace sys;
+
+namespace {
+
+class MappedMemoryTest : public ::testing::TestWithParam<unsigned> {
+public:
+  MappedMemoryTest() {
+    Flags = GetParam();
+    PageSize = sys::process::get_self()->page_size();
+  }
+
+protected:
+  // Adds RW flags to permit testing of the resulting memory
+  unsigned getTestableEquivalent(unsigned RequestedFlags) {
+    switch (RequestedFlags) {
+    case Memory::MF_READ:
+    case Memory::MF_WRITE:
+    case Memory::MF_READ|Memory::MF_WRITE:
+      return Memory::MF_READ|Memory::MF_WRITE;
+    case Memory::MF_READ|Memory::MF_EXEC:
+    case Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC:
+    case Memory::MF_EXEC:
+      return Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC;
+    }
+    // Default in case values are added to the enum, as required by some compilers
+    return Memory::MF_READ|Memory::MF_WRITE;
+  }
+
+  // Returns true if the memory blocks overlap
+  bool doesOverlap(MemoryBlock M1, MemoryBlock M2) {
+    if (M1.base() == M2.base())
+      return true;
+
+    if (M1.base() > M2.base())
+      return (unsigned char *)M2.base() + M2.size() > M1.base();
+
+    return (unsigned char *)M1.base() + M1.size() > M2.base();
+  }
+
+  unsigned Flags;
+  size_t   PageSize;
+};
+
+TEST_P(MappedMemoryTest, AllocAndRelease) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(sizeof(int), M1.size());
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+TEST_P(MappedMemoryTest, MultipleAllocAndRelease) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  EXPECT_NE((void*)0, M4.base());
+  EXPECT_LE(16U, M4.size());
+  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, BasicWrite) {
+  // This test applies only to readable and writeable combinations
+  if (Flags &&
+      !((Flags & Memory::MF_READ) && (Flags & Memory::MF_WRITE)))
+    return;
+
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(sizeof(int), M1.size());
+
+  int *a = (int*)M1.base();
+  *a = 1;
+  EXPECT_EQ(1, *a);
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+TEST_P(MappedMemoryTest, MultipleWrite) {
+  // This test applies only to readable and writeable combinations
+  if (Flags &&
+      !((Flags & Memory::MF_READ) && (Flags & Memory::MF_WRITE)))
+    return;
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(1U * sizeof(int), M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(8U * sizeof(int), M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(4U * sizeof(int), M3.size());
+
+  int *x = (int*)M1.base();
+  *x = 1;
+
+  int *y = (int*)M2.base();
+  for (int i = 0; i < 8; i++) {
+    y[i] = i;
+  }
+
+  int *z = (int*)M3.base();
+  *z = 42;
+
+  EXPECT_EQ(1, *x);
+  EXPECT_EQ(7, y[7]);
+  EXPECT_EQ(42, *z);
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+
+  MemoryBlock M4 = Memory::allocateMappedMemory(64 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  EXPECT_NE((void*)0, M4.base());
+  EXPECT_LE(64U * sizeof(int), M4.size());
+  x = (int*)M4.base();
+  *x = 4;
+  EXPECT_EQ(4, *x);
+  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+
+  // Verify that M2 remains unaffected by other activity
+  for (int i = 0; i < 8; i++) {
+    EXPECT_EQ(i, y[i]);
+  }
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, EnabledWrite) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(2 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(2U * sizeof(int), M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(8U * sizeof(int), M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(4U * sizeof(int), M3.size());
+
+  EXPECT_FALSE(Memory::protectMappedMemory(M1, getTestableEquivalent(Flags)));
+  EXPECT_FALSE(Memory::protectMappedMemory(M2, getTestableEquivalent(Flags)));
+  EXPECT_FALSE(Memory::protectMappedMemory(M3, getTestableEquivalent(Flags)));
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  int *x = (int*)M1.base();
+  *x = 1;
+  int *y = (int*)M2.base();
+  for (unsigned int i = 0; i < 8; i++) {
+    y[i] = i;
+  }
+  int *z = (int*)M3.base();
+  *z = 42;
+
+  EXPECT_EQ(1, *x);
+  EXPECT_EQ(7, y[7]);
+  EXPECT_EQ(42, *z);
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_EQ(6, y[6]);
+
+  MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  EXPECT_NE((void*)0, M4.base());
+  EXPECT_LE(16U, M4.size());
+  EXPECT_EQ(error_code::success(), Memory::protectMappedMemory(M4, getTestableEquivalent(Flags)));
+  x = (int*)M4.base();
+  *x = 4;
+  EXPECT_EQ(4, *x);
+  EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, SuccessiveNear) {
+  error_code EC;
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &M1, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &M2, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, DuplicateNear) {
+  error_code EC;
+  MemoryBlock Near((void*)(3*PageSize), 16);
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, ZeroNear) {
+  error_code EC;
+  MemoryBlock Near(0, 0);
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, ZeroSizeNear) {
+  error_code EC;
+  MemoryBlock Near((void*)(4*PageSize), 0);
+  MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+  MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(16U, M1.size());
+  EXPECT_NE((void*)0, M2.base());
+  EXPECT_LE(64U, M2.size());
+  EXPECT_NE((void*)0, M3.base());
+  EXPECT_LE(32U, M3.size());
+
+  EXPECT_FALSE(doesOverlap(M1, M2));
+  EXPECT_FALSE(doesOverlap(M2, M3));
+  EXPECT_FALSE(doesOverlap(M1, M3));
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+  EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, UnalignedNear) {
+  error_code EC;
+  MemoryBlock Near((void*)(2*PageSize+5), 0);
+  MemoryBlock M1 = Memory::allocateMappedMemory(15, &Near, Flags, EC);
+  EXPECT_EQ(error_code::success(), EC);
+
+  EXPECT_NE((void*)0, M1.base());
+  EXPECT_LE(sizeof(int), M1.size());
+
+  EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+// Note that Memory::MF_WRITE is not supported exclusively across
+// operating systems and architectures and can imply MF_READ|MF_WRITE
+unsigned MemoryFlags[] = {
+			   Memory::MF_READ,
+			   Memory::MF_WRITE,
+			   Memory::MF_READ|Memory::MF_WRITE,
+			   Memory::MF_EXEC,
+			   Memory::MF_READ|Memory::MF_EXEC,
+			   Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC
+			 };
+
+INSTANTIATE_TEST_CASE_P(AllocationTests,
+			MappedMemoryTest,
+			::testing::ValuesIn(MemoryFlags));
+
+}  // anonymous namespace
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 63c9ae059157..45112597979c 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -7,11 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -225,6 +224,18 @@ TEST_F(FileSystemTest, TempFiles) {
   // Make sure Temp1 doesn't exist.
   ASSERT_NO_ERROR(fs::exists(Twine(TempPath), TempFileExists));
   EXPECT_FALSE(TempFileExists);
+
+#ifdef LLVM_ON_WIN32
+  // Path name > 260 chars should get an error.
+  const char *Path270 =
+    "abcdefghijklmnopqrstuvwxyz9abcdefghijklmnopqrstuvwxyz8"
+    "abcdefghijklmnopqrstuvwxyz7abcdefghijklmnopqrstuvwxyz6"
+    "abcdefghijklmnopqrstuvwxyz5abcdefghijklmnopqrstuvwxyz4"
+    "abcdefghijklmnopqrstuvwxyz3abcdefghijklmnopqrstuvwxyz2"
+    "abcdefghijklmnopqrstuvwxyz1abcdefghijklmnopqrstuvwxyz0";
+  EXPECT_EQ(fs::unique_file(Twine(Path270), FileDescriptor, TempPath),
+            windows_error::path_not_found);
+#endif
 }
 
 TEST_F(FileSystemTest, DirectoryIteration) {
@@ -351,6 +362,7 @@ TEST_F(FileSystemTest, FileMapping) {
   StringRef Val("hello there");
   {
     fs::mapped_file_region mfr(FileDescriptor,
+                               true,
                                fs::mapped_file_region::readwrite,
                                4096,
                                0,
@@ -375,7 +387,7 @@ TEST_F(FileSystemTest, FileMapping) {
   
   // Unmap temp file
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   fs::mapped_file_region m(Twine(TempPath),
                              fs::mapped_file_region::readonly,
                              0,
diff --git a/unittests/Support/ProcessTest.cpp b/unittests/Support/ProcessTest.cpp
new file mode 100644
index 000000000000..e57c0e6eaf81
--- /dev/null
+++ b/unittests/Support/ProcessTest.cpp
@@ -0,0 +1,42 @@
+//===- unittest/Support/ProcessTest.cpp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Process.h"
+#include "gtest/gtest.h"
+
+#ifdef LLVM_ON_WIN32
+#include "windows.h"
+#endif
+
+namespace {
+
+using namespace llvm;
+using namespace sys;
+
+TEST(ProcessTest, SelfProcess) {
+  EXPECT_TRUE(process::get_self());
+  EXPECT_EQ(process::get_self(), process::get_self());
+
+#if defined(LLVM_ON_UNIX)
+  EXPECT_EQ(getpid(), process::get_self()->get_id());
+#elif defined(LLVM_ON_WIN32)
+  EXPECT_EQ(GetCurrentProcess(), process::get_self()->get_id());
+#endif
+
+  EXPECT_LT(1u, process::get_self()->page_size());
+
+  EXPECT_LT(TimeValue::MinTime, process::get_self()->get_user_time());
+  EXPECT_GT(TimeValue::MaxTime, process::get_self()->get_user_time());
+  EXPECT_LT(TimeValue::MinTime, process::get_self()->get_system_time());
+  EXPECT_GT(TimeValue::MaxTime, process::get_self()->get_system_time());
+  EXPECT_LT(TimeValue::MinTime, process::get_self()->get_wall_time());
+  EXPECT_GT(TimeValue::MaxTime, process::get_self()->get_wall_time());
+}
+
+} // end anonymous namespace
diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp
index 65b66c3eee88..3577d1015e91 100644
--- a/unittests/Support/RegexTest.cpp
+++ b/unittests/Support/RegexTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/ADT/SmallVector.h"
+#include "gtest/gtest.h"
 #include <cstring>
 
 using namespace llvm;
@@ -51,7 +51,6 @@ TEST_F(RegexTest, Basics) {
   EXPECT_EQ(1u, Matches.size());
   EXPECT_EQ(String, Matches[0].str());
 
-
   std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
   String="YX99a:513b";
   NulPattern[7] = '\0';
@@ -62,6 +61,28 @@ TEST_F(RegexTest, Basics) {
   EXPECT_TRUE(r5.match(String));
 }
 
+TEST_F(RegexTest, Backreferences) {
+  Regex r1("([a-z]+)_\\1");
+  SmallVector<StringRef, 4> Matches;
+  EXPECT_TRUE(r1.match("abc_abc", &Matches));
+  EXPECT_EQ(2u, Matches.size());
+  EXPECT_FALSE(r1.match("abc_ab", &Matches));
+
+  Regex r2("a([0-9])b\\1c\\1");
+  EXPECT_TRUE(r2.match("a4b4c4", &Matches));
+  EXPECT_EQ(2u, Matches.size());
+  EXPECT_EQ("4", Matches[1].str());
+  EXPECT_FALSE(r2.match("a2b2c3"));
+
+  Regex r3("a([0-9])([a-z])b\\1\\2");
+  EXPECT_TRUE(r3.match("a6zb6z", &Matches));
+  EXPECT_EQ(3u, Matches.size());
+  EXPECT_EQ("6", Matches[1].str());
+  EXPECT_EQ("z", Matches[2].str());
+  EXPECT_FALSE(r3.match("a6zb6y"));
+  EXPECT_FALSE(r3.match("a6zb7z"));
+}
+
 TEST_F(RegexTest, Substitution) {
   std::string Error;
 
diff --git a/unittests/Support/ValueHandleTest.cpp b/unittests/Support/ValueHandleTest.cpp
index 2e5e5b167c41..05aafa2d05d8 100644
--- a/unittests/Support/ValueHandleTest.cpp
+++ b/unittests/Support/ValueHandleTest.cpp
@@ -8,14 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ValueHandle.h"
-
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/OwningPtr.h"
-
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
 #include "gtest/gtest.h"
-
 #include <memory>
 
 using namespace llvm;
diff --git a/unittests/Support/YAMLIOTest.cpp b/unittests/Support/YAMLIOTest.cpp
new file mode 100644
index 000000000000..0993d8c0b555
--- /dev/null
+++ b/unittests/Support/YAMLIOTest.cpp
@@ -0,0 +1,1299 @@
+//===- unittest/Support/YAMLIOTest.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "gtest/gtest.h"
+
+
+using llvm::yaml::Input;
+using llvm::yaml::Output;
+using llvm::yaml::IO;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::MappingNormalization;
+using llvm::yaml::ScalarTraits;
+using llvm::yaml::Hex8;
+using llvm::yaml::Hex16;
+using llvm::yaml::Hex32;
+using llvm::yaml::Hex64;
+
+
+//===----------------------------------------------------------------------===//
+//  Test MappingTraits
+//===----------------------------------------------------------------------===//
+
+struct FooBar {
+  int foo;
+  int bar;
+};
+typedef std::vector<FooBar> FooBarSequence;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(FooBar)
+
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct MappingTraits<FooBar> {
+    static void mapping(IO &io, FooBar& fb) {
+      io.mapRequired("foo",    fb.foo);
+      io.mapRequired("bar",    fb.bar);
+    }
+  };
+}
+}
+
+
+//
+// Test the reading of a yaml mapping
+//
+TEST(YAMLIO, TestMapRead) {
+  FooBar doc;
+  Input yin("---\nfoo:  3\nbar:  5\n...\n");
+  yin >> doc;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(doc.foo, 3);
+  EXPECT_EQ(doc.bar,5);
+}
+
+
+//
+// Test the reading of a yaml sequence of mappings
+//
+TEST(YAMLIO, TestSequenceMapRead) {
+  FooBarSequence seq;
+  Input yin("---\n - foo:  3\n   bar:  5\n - foo:  7\n   bar:  9\n...\n");
+  yin >> seq;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(seq.size(), 2UL);
+  FooBar& map1 = seq[0];
+  FooBar& map2 = seq[1];
+  EXPECT_EQ(map1.foo, 3);
+  EXPECT_EQ(map1.bar, 5);
+  EXPECT_EQ(map2.foo, 7);
+  EXPECT_EQ(map2.bar, 9);
+}
+
+
+//
+// Test writing then reading back a sequence of mappings
+//
+TEST(YAMLIO, TestSequenceMapWriteAndRead) {
+  std::string intermediate;
+  {
+    FooBar entry1;
+    entry1.foo = 10;
+    entry1.bar = -3;
+    FooBar entry2;
+    entry2.foo = 257;
+    entry2.bar = 0;
+    FooBarSequence seq;
+    seq.push_back(entry1);
+    seq.push_back(entry2);
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << seq;
+  }
+
+  {
+    Input yin(intermediate);
+    FooBarSequence seq2;
+    yin >> seq2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_EQ(seq2.size(), 2UL);
+    FooBar& map1 = seq2[0];
+    FooBar& map2 = seq2[1];
+    EXPECT_EQ(map1.foo, 10);
+    EXPECT_EQ(map1.bar, -3);
+    EXPECT_EQ(map2.foo, 257);
+    EXPECT_EQ(map2.bar, 0);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Test built-in types
+//===----------------------------------------------------------------------===//
+
+struct BuiltInTypes {
+  llvm::StringRef str;
+  uint64_t        u64;
+  uint32_t        u32;
+  uint16_t        u16;
+  uint8_t         u8;
+  bool            b;
+  int64_t         s64;
+  int32_t         s32;
+  int16_t         s16;
+  int8_t          s8;
+  float           f;
+  double          d;
+  Hex8            h8;
+  Hex16           h16;
+  Hex32           h32;
+  Hex64           h64;
+};
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct MappingTraits<BuiltInTypes> {
+    static void mapping(IO &io, BuiltInTypes& bt) {
+      io.mapRequired("str",      bt.str);
+      io.mapRequired("u64",      bt.u64);
+      io.mapRequired("u32",      bt.u32);
+      io.mapRequired("u16",      bt.u16);
+      io.mapRequired("u8",       bt.u8);
+      io.mapRequired("b",        bt.b);
+      io.mapRequired("s64",      bt.s64);
+      io.mapRequired("s32",      bt.s32);
+      io.mapRequired("s16",      bt.s16);
+      io.mapRequired("s8",       bt.s8);
+      io.mapRequired("f",        bt.f);
+      io.mapRequired("d",        bt.d);
+      io.mapRequired("h8",       bt.h8);
+      io.mapRequired("h16",      bt.h16);
+      io.mapRequired("h32",      bt.h32);
+      io.mapRequired("h64",      bt.h64);
+    }
+  };
+}
+}
+
+
+//
+// Test the reading of all built-in scalar conversions
+//
+TEST(YAMLIO, TestReadBuiltInTypes) {
+  BuiltInTypes map;
+  Input yin("---\n"
+            "str:      hello there\n"
+            "u64:      5000000000\n"
+            "u32:      4000000000\n"
+            "u16:      65000\n"
+            "u8:       255\n"
+            "b:        false\n"
+            "s64:      -5000000000\n"
+            "s32:      -2000000000\n"
+            "s16:      -32000\n"
+            "s8:       -127\n"
+            "f:        137.125\n"
+            "d:        -2.8625\n"
+            "h8:       0xFF\n"
+            "h16:      0x8765\n"
+            "h32:      0xFEDCBA98\n"
+            "h64:      0xFEDCBA9876543210\n"
+           "...\n");
+  yin >> map;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_TRUE(map.str.equals("hello there"));
+  EXPECT_EQ(map.u64, 5000000000ULL);
+  EXPECT_EQ(map.u32, 4000000000U);
+  EXPECT_EQ(map.u16, 65000);
+  EXPECT_EQ(map.u8,  255);
+  EXPECT_EQ(map.b,   false);
+  EXPECT_EQ(map.s64, -5000000000LL);
+  EXPECT_EQ(map.s32, -2000000000L);
+  EXPECT_EQ(map.s16, -32000);
+  EXPECT_EQ(map.s8,  -127);
+  EXPECT_EQ(map.f,   137.125);
+  EXPECT_EQ(map.d,   -2.8625);
+  EXPECT_EQ(map.h8,  Hex8(255));
+  EXPECT_EQ(map.h16, Hex16(0x8765));
+  EXPECT_EQ(map.h32, Hex32(0xFEDCBA98));
+  EXPECT_EQ(map.h64, Hex64(0xFEDCBA9876543210LL));
+}
+
+
+//
+// Test writing then reading back all built-in scalar types
+//
+TEST(YAMLIO, TestReadWriteBuiltInTypes) {
+  std::string intermediate;
+  {
+    BuiltInTypes map;
+    map.str = "one two";
+    map.u64 = 6000000000ULL;
+    map.u32 = 3000000000U;
+    map.u16 = 50000;
+    map.u8  = 254;
+    map.b   = true;
+    map.s64 = -6000000000LL;
+    map.s32 = -2000000000;
+    map.s16 = -32000;
+    map.s8  = -128;
+    map.f   = 3.25;
+    map.d   = -2.8625;
+    map.h8  = 254;
+    map.h16 = 50000;
+    map.h32 = 3000000000U;
+    map.h64 = 6000000000LL;
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << map;
+  }
+
+  {
+    Input yin(intermediate);
+    BuiltInTypes map;
+    yin >> map;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_TRUE(map.str.equals("one two"));
+    EXPECT_EQ(map.u64,      6000000000ULL);
+    EXPECT_EQ(map.u32,      3000000000U);
+    EXPECT_EQ(map.u16,      50000);
+    EXPECT_EQ(map.u8,       254);
+    EXPECT_EQ(map.b,        true);
+    EXPECT_EQ(map.s64,      -6000000000LL);
+    EXPECT_EQ(map.s32,      -2000000000L);
+    EXPECT_EQ(map.s16,      -32000);
+    EXPECT_EQ(map.s8,       -128);
+    EXPECT_EQ(map.f,        3.25);
+    EXPECT_EQ(map.d,        -2.8625);
+    EXPECT_EQ(map.h8,       Hex8(254));
+    EXPECT_EQ(map.h16,      Hex16(50000));
+    EXPECT_EQ(map.h32,      Hex32(3000000000U));
+    EXPECT_EQ(map.h64,      Hex64(6000000000LL));
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//  Test ScalarEnumerationTraits
+//===----------------------------------------------------------------------===//
+
+enum Colors {
+    cRed,
+    cBlue,
+    cGreen,
+    cYellow
+};
+
+struct ColorMap {
+  Colors      c1;
+  Colors      c2;
+  Colors      c3;
+  Colors      c4;
+  Colors      c5;
+  Colors      c6;
+};
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct ScalarEnumerationTraits<Colors> {
+    static void enumeration(IO &io, Colors &value) {
+      io.enumCase(value, "red",   cRed);
+      io.enumCase(value, "blue",  cBlue);
+      io.enumCase(value, "green", cGreen);
+      io.enumCase(value, "yellow",cYellow);
+    }
+  };
+  template <>
+  struct MappingTraits<ColorMap> {
+    static void mapping(IO &io, ColorMap& c) {
+      io.mapRequired("c1", c.c1);
+      io.mapRequired("c2", c.c2);
+      io.mapRequired("c3", c.c3);
+      io.mapOptional("c4", c.c4, cBlue);   // supplies default
+      io.mapOptional("c5", c.c5, cYellow); // supplies default
+      io.mapOptional("c6", c.c6, cRed);    // supplies default
+    }
+  };
+}
+}
+
+
+//
+// Test reading enumerated scalars
+//
+TEST(YAMLIO, TestEnumRead) {
+  ColorMap map;
+  Input yin("---\n"
+            "c1:  blue\n"
+            "c2:  red\n"
+            "c3:  green\n"
+            "c5:  yellow\n"
+            "...\n");
+  yin >> map;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(cBlue,  map.c1);
+  EXPECT_EQ(cRed,   map.c2);
+  EXPECT_EQ(cGreen, map.c3);
+  EXPECT_EQ(cBlue,  map.c4);  // tests default
+  EXPECT_EQ(cYellow,map.c5);  // tests overridden
+  EXPECT_EQ(cRed,   map.c6);  // tests default
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//  Test ScalarBitSetTraits
+//===----------------------------------------------------------------------===//
+
+enum MyFlags {
+  flagNone    = 0,
+  flagBig     = 1 << 0,
+  flagFlat    = 1 << 1,
+  flagRound   = 1 << 2,
+  flagPointy  = 1 << 3
+};
+inline MyFlags operator|(MyFlags a, MyFlags b) {
+  return static_cast<MyFlags>(
+                      static_cast<uint32_t>(a) | static_cast<uint32_t>(b));
+}
+
+struct FlagsMap {
+  MyFlags     f1;
+  MyFlags     f2;
+  MyFlags     f3;
+  MyFlags     f4;
+};
+
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct ScalarBitSetTraits<MyFlags> {
+    static void bitset(IO &io, MyFlags &value) {
+      io.bitSetCase(value, "big",   flagBig);
+      io.bitSetCase(value, "flat",  flagFlat);
+      io.bitSetCase(value, "round", flagRound);
+      io.bitSetCase(value, "pointy",flagPointy);
+    }
+  };
+  template <>
+  struct MappingTraits<FlagsMap> {
+    static void mapping(IO &io, FlagsMap& c) {
+      io.mapRequired("f1", c.f1);
+      io.mapRequired("f2", c.f2);
+      io.mapRequired("f3", c.f3);
+      io.mapOptional("f4", c.f4, MyFlags(flagRound));
+     }
+  };
+}
+}
+
+
+//
+// Test reading flow sequence representing bit-mask values
+//
+TEST(YAMLIO, TestFlagsRead) {
+  FlagsMap map;
+  Input yin("---\n"
+            "f1:  [ big ]\n"
+            "f2:  [ round, flat ]\n"
+            "f3:  []\n"
+            "...\n");
+  yin >> map;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(flagBig,              map.f1);
+  EXPECT_EQ(flagRound|flagFlat,   map.f2);
+  EXPECT_EQ(flagNone,             map.f3);  // check empty set
+  EXPECT_EQ(flagRound,            map.f4);  // check optional key
+}
+
+
+//
+// Test writing then reading back bit-mask values
+//
+TEST(YAMLIO, TestReadWriteFlags) {
+  std::string intermediate;
+  {
+    FlagsMap map;
+    map.f1 = flagBig;
+    map.f2 = flagRound | flagFlat;
+    map.f3 = flagNone;
+    map.f4 = flagNone;
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << map;
+  }
+
+  {
+    Input yin(intermediate);
+    FlagsMap map2;
+    yin >> map2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_EQ(flagBig,              map2.f1);
+    EXPECT_EQ(flagRound|flagFlat,   map2.f2);
+    EXPECT_EQ(flagNone,             map2.f3);
+    //EXPECT_EQ(flagRound,            map2.f4);  // check optional key
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//  Test ScalarTraits
+//===----------------------------------------------------------------------===//
+
+struct MyCustomType {
+  int length;
+  int width;
+};
+
+struct MyCustomTypeMap {
+  MyCustomType     f1;
+  MyCustomType     f2;
+  int              f3;
+};
+
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct MappingTraits<MyCustomTypeMap> {
+    static void mapping(IO &io, MyCustomTypeMap& s) {
+      io.mapRequired("f1", s.f1);
+      io.mapRequired("f2", s.f2);
+      io.mapRequired("f3", s.f3);
+     }
+  };
+  // MyCustomType is formatted as a yaml scalar.  A value of
+  // {length=3, width=4} would be represented in yaml as "3 by 4".
+  template<>
+  struct ScalarTraits<MyCustomType> {
+    static void output(const MyCustomType &value, void* ctxt, llvm::raw_ostream &out) {
+      out << llvm::format("%d by %d", value.length, value.width);
+    }
+    static StringRef input(StringRef scalar, void* ctxt, MyCustomType &value) {
+      size_t byStart = scalar.find("by");
+      if ( byStart != StringRef::npos ) {
+        StringRef lenStr = scalar.slice(0, byStart);
+        lenStr = lenStr.rtrim();
+        if ( lenStr.getAsInteger(0, value.length) ) {
+          return "malformed length";
+        }
+        StringRef widthStr = scalar.drop_front(byStart+2);
+        widthStr = widthStr.ltrim();
+        if ( widthStr.getAsInteger(0, value.width) ) {
+          return "malformed width";
+        }
+        return StringRef();
+      }
+      else {
+          return "malformed by";
+      }
+    }
+  };
+}
+}
+
+
+//
+// Test writing then reading back custom values
+//
+TEST(YAMLIO, TestReadWriteMyCustomType) {
+  std::string intermediate;
+  {
+    MyCustomTypeMap map;
+    map.f1.length = 1;
+    map.f1.width  = 4;
+    map.f2.length = 100;
+    map.f2.width  = 400;
+    map.f3 = 10;
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << map;
+  }
+
+  {
+    Input yin(intermediate);
+    MyCustomTypeMap map2;
+    yin >> map2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_EQ(1,      map2.f1.length);
+    EXPECT_EQ(4,      map2.f1.width);
+    EXPECT_EQ(100,    map2.f2.length);
+    EXPECT_EQ(400,    map2.f2.width);
+    EXPECT_EQ(10,     map2.f3);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Test flow sequences
+//===----------------------------------------------------------------------===//
+
+LLVM_YAML_STRONG_TYPEDEF(int, MyNumber)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(MyNumber)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::StringRef)
+
+namespace llvm {
+namespace yaml {
+  template<>
+  struct ScalarTraits<MyNumber> {
+    static void output(const MyNumber &value, void *, llvm::raw_ostream &out) {
+      out << value;
+    }
+
+    static StringRef input(StringRef scalar, void *, MyNumber &value) {
+      long long n;
+      if ( getAsSignedInteger(scalar, 0, n) )
+        return "invalid number";
+      value = n;
+      return StringRef();
+    }
+  };
+}
+}
+
+struct NameAndNumbers {
+  llvm::StringRef               name;
+  std::vector<llvm::StringRef>  strings;
+  std::vector<MyNumber>         single;
+  std::vector<MyNumber>         numbers;
+};
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct MappingTraits<NameAndNumbers> {
+    static void mapping(IO &io, NameAndNumbers& nn) {
+      io.mapRequired("name",     nn.name);
+      io.mapRequired("strings",  nn.strings);
+      io.mapRequired("single",   nn.single);
+      io.mapRequired("numbers",  nn.numbers);
+    }
+  };
+}
+}
+
+
+//
+// Test writing then reading back custom values
+//
+TEST(YAMLIO, TestReadWriteMyFlowSequence) {
+  std::string intermediate;
+  {
+    NameAndNumbers map;
+    map.name  = "hello";
+    map.strings.push_back(llvm::StringRef("one"));
+    map.strings.push_back(llvm::StringRef("two"));
+    map.single.push_back(1);
+    map.numbers.push_back(10);
+    map.numbers.push_back(-30);
+    map.numbers.push_back(1024);
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr); 
+    yout << map;
+    
+    // Verify sequences were written in flow style
+    ostr.flush();
+    llvm::StringRef flowOut(intermediate);
+    EXPECT_NE(llvm::StringRef::npos, flowOut.find("one, two"));
+    EXPECT_NE(llvm::StringRef::npos, flowOut.find("10, -30, 1024"));
+  }
+
+  {
+    Input yin(intermediate);
+    NameAndNumbers map2;
+    yin >> map2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_TRUE(map2.name.equals("hello"));
+    EXPECT_EQ(map2.strings.size(), 2UL);
+    EXPECT_TRUE(map2.strings[0].equals("one"));
+    EXPECT_TRUE(map2.strings[1].equals("two"));
+    EXPECT_EQ(map2.single.size(), 1UL);
+    EXPECT_EQ(1,       map2.single[0]);
+    EXPECT_EQ(map2.numbers.size(), 3UL);
+    EXPECT_EQ(10,      map2.numbers[0]);
+    EXPECT_EQ(-30,     map2.numbers[1]);
+    EXPECT_EQ(1024,    map2.numbers[2]);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Test normalizing/denormalizing
+//===----------------------------------------------------------------------===//
+
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, TotalSeconds)
+
+typedef std::vector<TotalSeconds> SecondsSequence;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(TotalSeconds)
+
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct MappingTraits<TotalSeconds> {
+
+    class NormalizedSeconds {
+    public:
+      NormalizedSeconds(IO &io)
+        : hours(0), minutes(0), seconds(0) {
+      }
+      NormalizedSeconds(IO &, TotalSeconds &secs)
+        : hours(secs/3600),
+          minutes((secs - (hours*3600))/60),
+          seconds(secs % 60) {
+      }
+      TotalSeconds denormalize(IO &) {
+        return TotalSeconds(hours*3600 + minutes*60 + seconds);
+      }
+
+      uint32_t     hours;
+      uint8_t      minutes;
+      uint8_t      seconds;
+    };
+
+    static void mapping(IO &io, TotalSeconds &secs) {
+      MappingNormalization<NormalizedSeconds, TotalSeconds> keys(io, secs);
+
+      io.mapOptional("hours",    keys->hours,    (uint32_t)0);
+      io.mapOptional("minutes",  keys->minutes,  (uint8_t)0);
+      io.mapRequired("seconds",  keys->seconds);
+    }
+  };
+}
+}
+
+
+//
+// Test the reading of a yaml sequence of mappings
+//
+TEST(YAMLIO, TestReadMySecondsSequence) {
+  SecondsSequence seq;
+  Input yin("---\n - hours:  1\n   seconds:  5\n - seconds:  59\n...\n");
+  yin >> seq;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(seq.size(), 2UL);
+  EXPECT_EQ(seq[0], 3605U);
+  EXPECT_EQ(seq[1], 59U);
+}
+
+
+//
+// Test writing then reading back custom values
+//
+TEST(YAMLIO, TestReadWriteMySecondsSequence) {
+  std::string intermediate;
+  {
+    SecondsSequence seq;
+    seq.push_back(4000);
+    seq.push_back(500);
+    seq.push_back(59);
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << seq;
+  }
+  {
+    Input yin(intermediate);
+    SecondsSequence seq2;
+    yin >> seq2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_EQ(seq2.size(), 3UL);
+    EXPECT_EQ(seq2[0], 4000U);
+    EXPECT_EQ(seq2[1], 500U);
+    EXPECT_EQ(seq2[2], 59U);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Test dynamic typing
+//===----------------------------------------------------------------------===//
+
+enum AFlags {
+    a1,
+    a2,
+    a3
+};
+
+enum BFlags {
+    b1,
+    b2,
+    b3
+};
+
+enum Kind {
+    kindA,
+    kindB
+};
+
+struct KindAndFlags {
+  KindAndFlags() : kind(kindA), flags(0) { }
+  KindAndFlags(Kind k, uint32_t f) : kind(k), flags(f) { }
+  Kind        kind;
+  uint32_t    flags;
+};
+
+typedef std::vector<KindAndFlags> KindAndFlagsSequence;
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(KindAndFlags)
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct ScalarEnumerationTraits<AFlags> {
+    static void enumeration(IO &io, AFlags &value) {
+      io.enumCase(value, "a1",  a1);
+      io.enumCase(value, "a2",  a2);
+      io.enumCase(value, "a3",  a3);
+    }
+  };
+  template <>
+  struct ScalarEnumerationTraits<BFlags> {
+    static void enumeration(IO &io, BFlags &value) {
+      io.enumCase(value, "b1",  b1);
+      io.enumCase(value, "b2",  b2);
+      io.enumCase(value, "b3",  b3);
+    }
+  };
+  template <>
+  struct ScalarEnumerationTraits<Kind> {
+    static void enumeration(IO &io, Kind &value) {
+      io.enumCase(value, "A",  kindA);
+      io.enumCase(value, "B",  kindB);
+    }
+  };
+  template <>
+  struct MappingTraits<KindAndFlags> {
+    static void mapping(IO &io, KindAndFlags& kf) {
+      io.mapRequired("kind",  kf.kind);
+      // Type of "flags" field varies depending on "kind" field.
+      // Use memcpy here to avoid breaking strict aliasing rules.
+      if (kf.kind == kindA) {
+        AFlags aflags = static_cast<AFlags>(kf.flags);
+        io.mapRequired("flags", aflags);
+        kf.flags = aflags;
+      } else {
+        BFlags bflags = static_cast<BFlags>(kf.flags);
+        io.mapRequired("flags", bflags);
+        kf.flags = bflags;
+      }
+    }
+  };
+}
+}
+
+
+//
+// Test the reading of a yaml sequence dynamic types
+//
+TEST(YAMLIO, TestReadKindAndFlagsSequence) {
+  KindAndFlagsSequence seq;
+  Input yin("---\n - kind:  A\n   flags:  a2\n - kind:  B\n   flags:  b1\n...\n");
+  yin >> seq;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(seq.size(), 2UL);
+  EXPECT_EQ(seq[0].kind,  kindA);
+  EXPECT_EQ(seq[0].flags, (uint32_t)a2);
+  EXPECT_EQ(seq[1].kind,  kindB);
+  EXPECT_EQ(seq[1].flags, (uint32_t)b1);
+}
+
+//
+// Test writing then reading back dynamic types
+//
+TEST(YAMLIO, TestReadWriteKindAndFlagsSequence) {
+  std::string intermediate;
+  {
+    KindAndFlagsSequence seq;
+    seq.push_back(KindAndFlags(kindA,a1));
+    seq.push_back(KindAndFlags(kindB,b1));
+    seq.push_back(KindAndFlags(kindA,a2));
+    seq.push_back(KindAndFlags(kindB,b2));
+    seq.push_back(KindAndFlags(kindA,a3));
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << seq;
+  }
+  {
+    Input yin(intermediate);
+    KindAndFlagsSequence seq2;
+    yin >> seq2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_EQ(seq2.size(), 5UL);
+    EXPECT_EQ(seq2[0].kind,  kindA);
+    EXPECT_EQ(seq2[0].flags, (uint32_t)a1);
+    EXPECT_EQ(seq2[1].kind,  kindB);
+    EXPECT_EQ(seq2[1].flags, (uint32_t)b1);
+    EXPECT_EQ(seq2[2].kind,  kindA);
+    EXPECT_EQ(seq2[2].flags, (uint32_t)a2);
+    EXPECT_EQ(seq2[3].kind,  kindB);
+    EXPECT_EQ(seq2[3].flags, (uint32_t)b2);
+    EXPECT_EQ(seq2[4].kind,  kindA);
+    EXPECT_EQ(seq2[4].flags, (uint32_t)a3);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Test document list
+//===----------------------------------------------------------------------===//
+
+struct FooBarMap {
+  int foo;
+  int bar;
+};
+typedef std::vector<FooBarMap> FooBarMapDocumentList;
+
+LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(FooBarMap)
+
+
+namespace llvm {
+namespace yaml {
+  template <>
+  struct MappingTraits<FooBarMap> {
+    static void mapping(IO &io, FooBarMap& fb) {
+      io.mapRequired("foo",    fb.foo);
+      io.mapRequired("bar",    fb.bar);
+    }
+  };
+}
+}
+
+
+//
+// Test the reading of a yaml mapping
+//
+TEST(YAMLIO, TestDocRead) {
+  FooBarMap doc;
+  Input yin("---\nfoo:  3\nbar:  5\n...\n");
+  yin >> doc;
+
+  EXPECT_FALSE(yin.error());
+  EXPECT_EQ(doc.foo, 3);
+  EXPECT_EQ(doc.bar,5);
+}
+
+
+
+//
+// Test writing then reading back a sequence of mappings
+//
+TEST(YAMLIO, TestSequenceDocListWriteAndRead) {
+  std::string intermediate;
+  {
+    FooBarMap doc1;
+    doc1.foo = 10;
+    doc1.bar = -3;
+    FooBarMap doc2;
+    doc2.foo = 257;
+    doc2.bar = 0;
+    std::vector<FooBarMap> docList;
+    docList.push_back(doc1);
+    docList.push_back(doc2);
+
+    llvm::raw_string_ostream ostr(intermediate);
+    Output yout(ostr);
+    yout << docList;
+  }
+
+
+  {
+    Input yin(intermediate);
+    std::vector<FooBarMap> docList2;
+    yin >> docList2;
+
+    EXPECT_FALSE(yin.error());
+    EXPECT_EQ(docList2.size(), 2UL);
+    FooBarMap& map1 = docList2[0];
+    FooBarMap& map2 = docList2[1];
+    EXPECT_EQ(map1.foo, 10);
+    EXPECT_EQ(map1.bar, -3);
+    EXPECT_EQ(map2.foo, 257);
+    EXPECT_EQ(map2.bar, 0);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Test error handling
+//===----------------------------------------------------------------------===//
+
+
+
+static void suppressErrorMessages(const llvm::SMDiagnostic &, void *) {
+}
+
+
+//
+// Test error handling of unknown enumerated scalar
+//
+TEST(YAMLIO, TestColorsReadError) {
+  ColorMap map;
+  Input yin("---\n"
+            "c1:  blue\n"
+            "c2:  purple\n"
+            "c3:  green\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> map;
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling of flow sequence with unknown value
+//
+TEST(YAMLIO, TestFlagsReadError) {
+  FlagsMap map;
+  Input yin("---\n"
+            "f1:  [ big ]\n"
+            "f2:  [ round, hollow ]\n"
+            "f3:  []\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> map;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in uint8_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint8_t)
+TEST(YAMLIO, TestReadBuiltInTypesUint8Error) {
+  std::vector<uint8_t> seq;
+  Input yin("---\n"
+            "- 255\n"
+            "- 0\n"
+            "- 257\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in uint16_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint16_t)
+TEST(YAMLIO, TestReadBuiltInTypesUint16Error) {
+  std::vector<uint16_t> seq;
+  Input yin("---\n"
+            "- 65535\n"
+            "- 0\n"
+            "- 66000\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in uint32_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint32_t)
+TEST(YAMLIO, TestReadBuiltInTypesUint32Error) {
+  std::vector<uint32_t> seq;
+  Input yin("---\n"
+            "- 4000000000\n"
+            "- 0\n"
+            "- 5000000000\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in uint64_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint64_t)
+TEST(YAMLIO, TestReadBuiltInTypesUint64Error) {
+  std::vector<uint64_t> seq;
+  Input yin("---\n"
+            "- 18446744073709551615\n"
+            "- 0\n"
+            "- 19446744073709551615\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in int8_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(int8_t)
+TEST(YAMLIO, TestReadBuiltInTypesint8OverError) {
+  std::vector<int8_t> seq;
+  Input yin("---\n"
+            "- -128\n"
+            "- 0\n"
+            "- 127\n"
+            "- 128\n"
+           "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in int8_t type
+//
+TEST(YAMLIO, TestReadBuiltInTypesint8UnderError) {
+  std::vector<int8_t> seq;
+  Input yin("---\n"
+            "- -128\n"
+            "- 0\n"
+            "- 127\n"
+            "- -129\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in int16_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(int16_t)
+TEST(YAMLIO, TestReadBuiltInTypesint16UnderError) {
+  std::vector<int16_t> seq;
+  Input yin("---\n"
+            "- 32767\n"
+            "- 0\n"
+            "- -32768\n"
+            "- -32769\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in int16_t type
+//
+TEST(YAMLIO, TestReadBuiltInTypesint16OverError) {
+  std::vector<int16_t> seq;
+  Input yin("---\n"
+            "- 32767\n"
+            "- 0\n"
+            "- -32768\n"
+            "- 32768\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in int32_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(int32_t)
+TEST(YAMLIO, TestReadBuiltInTypesint32UnderError) {
+  std::vector<int32_t> seq;
+  Input yin("---\n"
+            "- 2147483647\n"
+            "- 0\n"
+            "- -2147483648\n"
+            "- -2147483649\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in int32_t type
+//
+TEST(YAMLIO, TestReadBuiltInTypesint32OverError) {
+  std::vector<int32_t> seq;
+  Input yin("---\n"
+            "- 2147483647\n"
+            "- 0\n"
+            "- -2147483648\n"
+            "- 2147483649\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in int64_t type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(int64_t)
+TEST(YAMLIO, TestReadBuiltInTypesint64UnderError) {
+  std::vector<int64_t> seq;
+  Input yin("---\n"
+            "- -9223372036854775808\n"
+            "- 0\n"
+            "- 9223372036854775807\n"
+            "- -9223372036854775809\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in int64_t type
+//
+TEST(YAMLIO, TestReadBuiltInTypesint64OverError) {
+  std::vector<int64_t> seq;
+  Input yin("---\n"
+            "- -9223372036854775808\n"
+            "- 0\n"
+            "- 9223372036854775807\n"
+            "- 9223372036854775809\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in float type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(float)
+TEST(YAMLIO, TestReadBuiltInTypesFloatError) {
+  std::vector<float> seq;
+  Input yin("---\n"
+            "- 0.0\n"
+            "- 1000.1\n"
+            "- -123.456\n"
+            "- 1.2.3\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in float type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(double)
+TEST(YAMLIO, TestReadBuiltInTypesDoubleError) {
+  std::vector<double> seq;
+  Input yin("---\n"
+            "- 0.0\n"
+            "- 1000.1\n"
+            "- -123.456\n"
+            "- 1.2.3\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in Hex8 type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(Hex8)
+TEST(YAMLIO, TestReadBuiltInTypesHex8Error) {
+  std::vector<Hex8> seq;
+  Input yin("---\n"
+            "- 0x12\n"
+            "- 0xFE\n"
+            "- 0x123\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+
+//
+// Test error handling reading built-in Hex16 type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(Hex16)
+TEST(YAMLIO, TestReadBuiltInTypesHex16Error) {
+  std::vector<Hex16> seq;
+  Input yin("---\n"
+            "- 0x0012\n"
+            "- 0xFEFF\n"
+            "- 0x12345\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in Hex32 type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(Hex32)
+TEST(YAMLIO, TestReadBuiltInTypesHex32Error) {
+  std::vector<Hex32> seq;
+  Input yin("---\n"
+            "- 0x0012\n"
+            "- 0xFEFF0000\n"
+            "- 0x1234556789\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
+//
+// Test error handling reading built-in Hex64 type
+//
+LLVM_YAML_IS_SEQUENCE_VECTOR(Hex64)
+TEST(YAMLIO, TestReadBuiltInTypesHex64Error) {
+  std::vector<Hex64> seq;
+  Input yin("---\n"
+            "- 0x0012\n"
+            "- 0xFFEEDDCCBBAA9988\n"
+            "- 0x12345567890ABCDEF0\n"
+            "...\n");
+  yin.setDiagHandler(suppressErrorMessages);
+  yin >> seq;
+
+  EXPECT_TRUE(yin.error());
+}
+
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
index 480a5739f444..e9839358a019 100644
--- a/unittests/Support/YAMLParserTest.cpp
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
 #include "gtest/gtest.h"
@@ -21,6 +22,12 @@ static void SuppressDiagnosticsOutput(const SMDiagnostic &, void *) {
   // to reduce noise in unit test runs.
 }
 
+// Assumes Ctx is an SMDiagnostic where Diag can be stored.
+static void CollectDiagnosticsOutput(const SMDiagnostic &Diag, void *Ctx) {
+  SMDiagnostic* DiagOut = static_cast<SMDiagnostic*>(Ctx);
+  *DiagOut = Diag;
+}
+
 // Checks that the given input gives a parse error. Makes sure that an error
 // text is available and the parse fails.
 static void ExpectParseError(StringRef Message, StringRef Input) {
@@ -182,4 +189,31 @@ TEST(YAMLParser, WorksWithIteratorAlgorithms) {
   EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
 }
 
+TEST(YAMLParser, DefaultDiagnosticFilename) {
+  SourceMgr SM;
+
+  SMDiagnostic GeneratedDiag;
+  SM.setDiagHandler(CollectDiagnosticsOutput, &GeneratedDiag);
+
+  // When we construct a YAML stream over an unnamed string,
+  // the filename is hard-coded as "YAML".
+  yaml::Stream UnnamedStream("[]", SM);
+  UnnamedStream.printError(UnnamedStream.begin()->getRoot(), "Hello, World!");
+  EXPECT_EQ("YAML", GeneratedDiag.getFilename());
+}
+
+TEST(YAMLParser, DiagnosticFilenameFromBufferID) {
+  SourceMgr SM;
+
+  SMDiagnostic GeneratedDiag;
+  SM.setDiagHandler(CollectDiagnosticsOutput, &GeneratedDiag);
+
+  // When we construct a YAML stream over a named buffer,
+  // we get its ID as filename in diagnostics.
+  MemoryBuffer* Buffer = MemoryBuffer::getMemBuffer("[]", "buffername.yaml");
+  yaml::Stream Stream(Buffer, SM);
+  Stream.printError(Stream.begin()->getRoot(), "Hello, World!");
+  EXPECT_EQ("buffername.yaml", GeneratedDiag.getFilename());
+}
+
 } // end namespace llvm
diff --git a/unittests/Support/formatted_raw_ostream_test.cpp b/unittests/Support/formatted_raw_ostream_test.cpp
index 4725cedc2119..9bb804691337 100644
--- a/unittests/Support/formatted_raw_ostream_test.cpp
+++ b/unittests/Support/formatted_raw_ostream_test.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/FormattedStream.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index ea3d5bee78b9..cd304e720082 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/Argument.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Transforms/Utils/IntegerDivision.cpp b/unittests/Transforms/Utils/IntegerDivision.cpp
index a3211391d689..44c2328ee354 100644
--- a/unittests/Transforms/Utils/IntegerDivision.cpp
+++ b/unittests/Transforms/Utils/IntegerDivision.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Module.h"
 #include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp
index 727f5ea525d4..f0c3ecfbb9b8 100644
--- a/unittests/Transforms/Utils/Local.cpp
+++ b/unittests/Transforms/Utils/Local.cpp
@@ -7,12 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/BasicBlock.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Transforms/Utils/Local.h"
-
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/VMCore/CMakeLists.txt b/unittests/VMCore/CMakeLists.txt
deleted file mode 100644
index 4025c7a91fb3..000000000000
--- a/unittests/VMCore/CMakeLists.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  asmparser
-  core
-  ipa
-  )
-
-set(VMCoreSources
-  ConstantsTest.cpp
-  DominatorTreeTest.cpp
-  IRBuilderTest.cpp
-  InstructionsTest.cpp
-  MDBuilderTest.cpp
-  MetadataTest.cpp
-  PassManagerTest.cpp
-  TypeBuilderTest.cpp
-  TypesTest.cpp
-  ValueMapTest.cpp
-  VerifierTest.cpp
-  )
-
-# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug.
-# See issue#331418 in Visual Studio.
-if(MSVC AND MSVC_VERSION LESS 1600)
-  list(REMOVE_ITEM VMCoreSources ValueMapTest.cpp)
-endif()
-
-# HACK: Declare a couple of source files as optionally compiled to satisfy the
-# missing-file-checker in LLVM's weird CMake build.
-set(LLVM_OPTIONAL_SOURCES
-  ValueMapTest.cpp
-  )
-
-add_llvm_unittest(VMCoreTests
-  ${VMCoreSources}
-  )
diff --git a/unittests/VMCore/ConstantsTest.cpp b/unittests/VMCore/ConstantsTest.cpp
deleted file mode 100644
index 623ea0d10290..000000000000
--- a/unittests/VMCore/ConstantsTest.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-//===- llvm/unittest/VMCore/ConstantsTest.cpp - Constants unit tests ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "gtest/gtest.h"
-
-namespace llvm {
-namespace {
-
-TEST(ConstantsTest, Integer_i1) {
-  IntegerType* Int1 = IntegerType::get(getGlobalContext(), 1);
-  Constant* One = ConstantInt::get(Int1, 1, true);
-  Constant* Zero = ConstantInt::get(Int1, 0);
-  Constant* NegOne = ConstantInt::get(Int1, static_cast<uint64_t>(-1), true);
-  EXPECT_EQ(NegOne, ConstantInt::getSigned(Int1, -1));
-  Constant* Undef = UndefValue::get(Int1);
-
-  // Input:  @b = constant i1 add(i1 1 , i1 1)
-  // Output: @b = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getAdd(One, One));
-
-  // @c = constant i1 add(i1 -1, i1 1)
-  // @c = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getAdd(NegOne, One));
-
-  // @d = constant i1 add(i1 -1, i1 -1)
-  // @d = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getAdd(NegOne, NegOne));
-
-  // @e = constant i1 sub(i1 -1, i1 1)
-  // @e = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getSub(NegOne, One));
-
-  // @f = constant i1 sub(i1 1 , i1 -1)
-  // @f = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getSub(One, NegOne));
-
-  // @g = constant i1 sub(i1 1 , i1 1)
-  // @g = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getSub(One, One));
-
-  // @h = constant i1 shl(i1 1 , i1 1)  ; undefined
-  // @h = constant i1 undef
-  EXPECT_EQ(Undef, ConstantExpr::getShl(One, One));
-
-  // @i = constant i1 shl(i1 1 , i1 0)
-  // @i = constant i1 true
-  EXPECT_EQ(One, ConstantExpr::getShl(One, Zero));
-
-  // @j = constant i1 lshr(i1 1, i1 1)  ; undefined
-  // @j = constant i1 undef
-  EXPECT_EQ(Undef, ConstantExpr::getLShr(One, One));
-
-  // @m = constant i1 ashr(i1 1, i1 1)  ; undefined
-  // @m = constant i1 undef
-  EXPECT_EQ(Undef, ConstantExpr::getAShr(One, One));
-
-  // @n = constant i1 mul(i1 -1, i1 1)
-  // @n = constant i1 true
-  EXPECT_EQ(One, ConstantExpr::getMul(NegOne, One));
-
-  // @o = constant i1 sdiv(i1 -1, i1 1) ; overflow
-  // @o = constant i1 true
-  EXPECT_EQ(One, ConstantExpr::getSDiv(NegOne, One));
-
-  // @p = constant i1 sdiv(i1 1 , i1 -1); overflow
-  // @p = constant i1 true
-  EXPECT_EQ(One, ConstantExpr::getSDiv(One, NegOne));
-
-  // @q = constant i1 udiv(i1 -1, i1 1)
-  // @q = constant i1 true
-  EXPECT_EQ(One, ConstantExpr::getUDiv(NegOne, One));
-
-  // @r = constant i1 udiv(i1 1, i1 -1)
-  // @r = constant i1 true
-  EXPECT_EQ(One, ConstantExpr::getUDiv(One, NegOne));
-
-  // @s = constant i1 srem(i1 -1, i1 1) ; overflow
-  // @s = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getSRem(NegOne, One));
-
-  // @t = constant i1 urem(i1 -1, i1 1)
-  // @t = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getURem(NegOne, One));
-
-  // @u = constant i1 srem(i1  1, i1 -1) ; overflow
-  // @u = constant i1 false
-  EXPECT_EQ(Zero, ConstantExpr::getSRem(One, NegOne));
-}
-
-TEST(ConstantsTest, IntSigns) {
-  IntegerType* Int8Ty = Type::getInt8Ty(getGlobalContext());
-  EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, false)->getSExtValue());
-  EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, true)->getSExtValue());
-  EXPECT_EQ(100, ConstantInt::getSigned(Int8Ty, 100)->getSExtValue());
-  EXPECT_EQ(-50, ConstantInt::get(Int8Ty, 206)->getSExtValue());
-  EXPECT_EQ(-50, ConstantInt::getSigned(Int8Ty, -50)->getSExtValue());
-  EXPECT_EQ(206U, ConstantInt::getSigned(Int8Ty, -50)->getZExtValue());
-
-  // Overflow is handled by truncation.
-  EXPECT_EQ(0x3b, ConstantInt::get(Int8Ty, 0x13b)->getSExtValue());
-}
-
-TEST(ConstantsTest, FP128Test) {
-  Type *FP128Ty = Type::getFP128Ty(getGlobalContext());
-
-  IntegerType *Int128Ty = Type::getIntNTy(getGlobalContext(), 128);
-  Constant *Zero128 = Constant::getNullValue(Int128Ty);
-  Constant *X = ConstantExpr::getUIToFP(Zero128, FP128Ty);
-  EXPECT_TRUE(isa<ConstantFP>(X));
-}
-
-}  // end anonymous namespace
-}  // end namespace llvm
diff --git a/unittests/VMCore/DominatorTreeTest.cpp b/unittests/VMCore/DominatorTreeTest.cpp
deleted file mode 100644
index f6a90605a716..000000000000
--- a/unittests/VMCore/DominatorTreeTest.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Assembly/Parser.h"
-#include "llvm/Support/SourceMgr.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace llvm {
-  void initializeDPassPass(PassRegistry&);
-
-  namespace {
-    struct DPass : public FunctionPass {
-      static char ID;
-      virtual bool runOnFunction(Function &F) {
-        DominatorTree *DT = &getAnalysis<DominatorTree>();
-        Function::iterator FI = F.begin();
-
-        BasicBlock *BB0 = FI++;
-        BasicBlock::iterator BBI = BB0->begin();
-        Instruction *Y1 = BBI++;
-        Instruction *Y2 = BBI++;
-        Instruction *Y3 = BBI++;
-
-        BasicBlock *BB1 = FI++;
-        BBI = BB1->begin();
-        Instruction *Y4 = BBI++;
-
-        BasicBlock *BB2 = FI++;
-        BBI = BB2->begin();
-        Instruction *Y5 = BBI++;
-
-        BasicBlock *BB3 = FI++;
-        BBI = BB3->begin();
-        Instruction *Y6 = BBI++;
-        Instruction *Y7 = BBI++;
-
-        BasicBlock *BB4 = FI++;
-        BBI = BB4->begin();
-        Instruction *Y8 = BBI++;
-        Instruction *Y9 = BBI++;
-
-        // Reachability
-        EXPECT_TRUE(DT->isReachableFromEntry(BB0));
-        EXPECT_TRUE(DT->isReachableFromEntry(BB1));
-        EXPECT_TRUE(DT->isReachableFromEntry(BB2));
-        EXPECT_FALSE(DT->isReachableFromEntry(BB3));
-        EXPECT_TRUE(DT->isReachableFromEntry(BB4));
-
-        // BB dominance
-        EXPECT_TRUE(DT->dominates(BB0, BB0));
-        EXPECT_TRUE(DT->dominates(BB0, BB1));
-        EXPECT_TRUE(DT->dominates(BB0, BB2));
-        EXPECT_TRUE(DT->dominates(BB0, BB3));
-        EXPECT_TRUE(DT->dominates(BB0, BB4));
-
-        EXPECT_FALSE(DT->dominates(BB1, BB0));
-        EXPECT_TRUE(DT->dominates(BB1, BB1));
-        EXPECT_FALSE(DT->dominates(BB1, BB2));
-        EXPECT_TRUE(DT->dominates(BB1, BB3));
-        EXPECT_FALSE(DT->dominates(BB1, BB4));
-
-        EXPECT_FALSE(DT->dominates(BB2, BB0));
-        EXPECT_FALSE(DT->dominates(BB2, BB1));
-        EXPECT_TRUE(DT->dominates(BB2, BB2));
-        EXPECT_TRUE(DT->dominates(BB2, BB3));
-        EXPECT_FALSE(DT->dominates(BB2, BB4));
-
-        EXPECT_FALSE(DT->dominates(BB3, BB0));
-        EXPECT_FALSE(DT->dominates(BB3, BB1));
-        EXPECT_FALSE(DT->dominates(BB3, BB2));
-        EXPECT_TRUE(DT->dominates(BB3, BB3));
-        EXPECT_FALSE(DT->dominates(BB3, BB4));
-
-        // BB proper dominance
-        EXPECT_FALSE(DT->properlyDominates(BB0, BB0));
-        EXPECT_TRUE(DT->properlyDominates(BB0, BB1));
-        EXPECT_TRUE(DT->properlyDominates(BB0, BB2));
-        EXPECT_TRUE(DT->properlyDominates(BB0, BB3));
-
-        EXPECT_FALSE(DT->properlyDominates(BB1, BB0));
-        EXPECT_FALSE(DT->properlyDominates(BB1, BB1));
-        EXPECT_FALSE(DT->properlyDominates(BB1, BB2));
-        EXPECT_TRUE(DT->properlyDominates(BB1, BB3));
-
-        EXPECT_FALSE(DT->properlyDominates(BB2, BB0));
-        EXPECT_FALSE(DT->properlyDominates(BB2, BB1));
-        EXPECT_FALSE(DT->properlyDominates(BB2, BB2));
-        EXPECT_TRUE(DT->properlyDominates(BB2, BB3));
-
-        EXPECT_FALSE(DT->properlyDominates(BB3, BB0));
-        EXPECT_FALSE(DT->properlyDominates(BB3, BB1));
-        EXPECT_FALSE(DT->properlyDominates(BB3, BB2));
-        EXPECT_FALSE(DT->properlyDominates(BB3, BB3));
-
-        // Instruction dominance in the same reachable BB
-        EXPECT_FALSE(DT->dominates(Y1, Y1));
-        EXPECT_TRUE(DT->dominates(Y1, Y2));
-        EXPECT_FALSE(DT->dominates(Y2, Y1));
-        EXPECT_FALSE(DT->dominates(Y2, Y2));
-
-        // Instruction dominance in the same unreachable BB
-        EXPECT_TRUE(DT->dominates(Y6, Y6));
-        EXPECT_TRUE(DT->dominates(Y6, Y7));
-        EXPECT_TRUE(DT->dominates(Y7, Y6));
-        EXPECT_TRUE(DT->dominates(Y7, Y7));
-
-        // Invoke
-        EXPECT_TRUE(DT->dominates(Y3, Y4));
-        EXPECT_FALSE(DT->dominates(Y3, Y5));
-
-        // Phi
-        EXPECT_TRUE(DT->dominates(Y2, Y9));
-        EXPECT_FALSE(DT->dominates(Y3, Y9));
-        EXPECT_FALSE(DT->dominates(Y8, Y9));
-
-        // Anything dominates unreachable
-        EXPECT_TRUE(DT->dominates(Y1, Y6));
-        EXPECT_TRUE(DT->dominates(Y3, Y6));
-
-        // Unreachable doesn't dominate reachable
-        EXPECT_FALSE(DT->dominates(Y6, Y1));
-
-        // Instruction, BB dominance
-        EXPECT_FALSE(DT->dominates(Y1, BB0));
-        EXPECT_TRUE(DT->dominates(Y1, BB1));
-        EXPECT_TRUE(DT->dominates(Y1, BB2));
-        EXPECT_TRUE(DT->dominates(Y1, BB3));
-        EXPECT_TRUE(DT->dominates(Y1, BB4));
-
-        EXPECT_FALSE(DT->dominates(Y3, BB0));
-        EXPECT_TRUE(DT->dominates(Y3, BB1));
-        EXPECT_FALSE(DT->dominates(Y3, BB2));
-        EXPECT_TRUE(DT->dominates(Y3, BB3));
-        EXPECT_FALSE(DT->dominates(Y3, BB4));
-
-        EXPECT_TRUE(DT->dominates(Y6, BB3));
-
-        return false;
-      }
-      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-        AU.addRequired<DominatorTree>();
-      }
-      DPass() : FunctionPass(ID) {
-        initializeDPassPass(*PassRegistry::getPassRegistry());
-      }
-    };
-    char DPass::ID = 0;
-
-
-    Module* makeLLVMModule(DPass *P) {
-      const char *ModuleStrig =
-        "declare i32 @g()\n" \
-        "define void @f(i32 %x) {\n" \
-        "bb0:\n" \
-        "  %y1 = add i32 %x, 1\n" \
-        "  %y2 = add i32 %x, 1\n" \
-        "  %y3 = invoke i32 @g() to label %bb1 unwind label %bb2\n" \
-        "bb1:\n" \
-        "  %y4 = add i32 %x, 1\n" \
-        "  br label %bb4\n" \
-        "bb2:\n" \
-        "  %y5 = landingpad i32 personality i32 ()* @g\n" \
-        "          cleanup\n" \
-        "  br label %bb4\n" \
-        "bb3:\n" \
-        "  %y6 = add i32 %x, 1\n" \
-        "  %y7 = add i32 %x, 1\n" \
-        "  ret void\n" \
-        "bb4:\n" \
-        "  %y8 = phi i32 [0, %bb2], [%y4, %bb1]\n"
-        "  %y9 = phi i32 [0, %bb2], [%y4, %bb1]\n"
-        "  ret void\n" \
-        "}\n";
-      LLVMContext &C = getGlobalContext();
-      SMDiagnostic Err;
-      return ParseAssemblyString(ModuleStrig, NULL, Err, C);
-    }
-
-    TEST(DominatorTree, Unreachable) {
-      DPass *P = new DPass();
-      Module *M = makeLLVMModule(P);
-      PassManager Passes;
-      Passes.add(P);
-      Passes.run(*M);
-    }
-  }
-}
-
-INITIALIZE_PASS_BEGIN(DPass, "dpass", "dpass", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_END(DPass, "dpass", "dpass", false, false)
diff --git a/unittests/VMCore/IRBuilderTest.cpp b/unittests/VMCore/IRBuilderTest.cpp
deleted file mode 100644
index 9f26936df475..000000000000
--- a/unittests/VMCore/IRBuilderTest.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//===- llvm/unittest/VMCore/IRBuilderTest.cpp - IRBuilder tests -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/BasicBlock.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
-
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-
-class IRBuilderTest : public testing::Test {
-protected:
-  virtual void SetUp() {
-    M.reset(new Module("MyModule", getGlobalContext()));
-    FunctionType *FTy = FunctionType::get(Type::getVoidTy(getGlobalContext()),
-                                          /*isVarArg=*/false);
-    F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
-    BB = BasicBlock::Create(getGlobalContext(), "", F);
-  }
-
-  virtual void TearDown() {
-    BB = 0;
-    M.reset();
-  }
-
-  OwningPtr<Module> M;
-  Function *F;
-  BasicBlock *BB;
-};
-
-TEST_F(IRBuilderTest, Lifetime) {
-  IRBuilder<> Builder(BB);
-  AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty());
-  AllocaInst *Var2 = Builder.CreateAlloca(Builder.getInt32Ty());
-  AllocaInst *Var3 = Builder.CreateAlloca(Builder.getInt8Ty(),
-                                          Builder.getInt32(123));
-
-  CallInst *Start1 = Builder.CreateLifetimeStart(Var1);
-  CallInst *Start2 = Builder.CreateLifetimeStart(Var2);
-  CallInst *Start3 = Builder.CreateLifetimeStart(Var3, Builder.getInt64(100));
-
-  EXPECT_EQ(Start1->getArgOperand(0), Builder.getInt64(-1));
-  EXPECT_EQ(Start2->getArgOperand(0), Builder.getInt64(-1));
-  EXPECT_EQ(Start3->getArgOperand(0), Builder.getInt64(100));
-
-  EXPECT_EQ(Start1->getArgOperand(1), Var1);
-  EXPECT_NE(Start2->getArgOperand(1), Var2);
-  EXPECT_EQ(Start3->getArgOperand(1), Var3);
-
-  Value *End1 = Builder.CreateLifetimeEnd(Var1);
-  Builder.CreateLifetimeEnd(Var2);
-  Builder.CreateLifetimeEnd(Var3);
-
-  IntrinsicInst *II_Start1 = dyn_cast<IntrinsicInst>(Start1);
-  IntrinsicInst *II_End1 = dyn_cast<IntrinsicInst>(End1);
-  ASSERT_TRUE(II_Start1 != NULL);
-  EXPECT_EQ(II_Start1->getIntrinsicID(), Intrinsic::lifetime_start);
-  ASSERT_TRUE(II_End1 != NULL);
-  EXPECT_EQ(II_End1->getIntrinsicID(), Intrinsic::lifetime_end);
-}
-
-TEST_F(IRBuilderTest, CreateCondBr) {
-  IRBuilder<> Builder(BB);
-  BasicBlock *TBB = BasicBlock::Create(getGlobalContext(), "", F);
-  BasicBlock *FBB = BasicBlock::Create(getGlobalContext(), "", F);
-
-  BranchInst *BI = Builder.CreateCondBr(Builder.getTrue(), TBB, FBB);
-  TerminatorInst *TI = BB->getTerminator();
-  EXPECT_EQ(BI, TI);
-  EXPECT_EQ(2u, TI->getNumSuccessors());
-  EXPECT_EQ(TBB, TI->getSuccessor(0));
-  EXPECT_EQ(FBB, TI->getSuccessor(1));
-
-  BI->eraseFromParent();
-  MDNode *Weights = MDBuilder(getGlobalContext()).createBranchWeights(42, 13);
-  BI = Builder.CreateCondBr(Builder.getTrue(), TBB, FBB, Weights);
-  TI = BB->getTerminator();
-  EXPECT_EQ(BI, TI);
-  EXPECT_EQ(2u, TI->getNumSuccessors());
-  EXPECT_EQ(TBB, TI->getSuccessor(0));
-  EXPECT_EQ(FBB, TI->getSuccessor(1));
-  EXPECT_EQ(Weights, TI->getMetadata(LLVMContext::MD_prof));
-}
-
-TEST_F(IRBuilderTest, GetIntTy) {
-  IRBuilder<> Builder(BB);
-  IntegerType *Ty1 = Builder.getInt1Ty();
-  EXPECT_EQ(Ty1, IntegerType::get(getGlobalContext(), 1));
-
-  DataLayout* DL = new DataLayout(M.get());
-  IntegerType *IntPtrTy = Builder.getIntPtrTy(DL);
-  unsigned IntPtrBitSize =  DL->getPointerSizeInBits(0);
-  EXPECT_EQ(IntPtrTy, IntegerType::get(getGlobalContext(), IntPtrBitSize));
-}
-
-}
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
deleted file mode 100644
index a3b13ce92d15..000000000000
--- a/unittests/VMCore/InstructionsTest.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-//===- llvm/unittest/VMCore/InstructionsTest.cpp - Instructions unit tests ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Operator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "gtest/gtest.h"
-
-namespace llvm {
-namespace {
-
-TEST(InstructionsTest, ReturnInst) {
-  LLVMContext &C(getGlobalContext());
-
-  // test for PR6589
-  const ReturnInst* r0 = ReturnInst::Create(C);
-  EXPECT_EQ(r0->getNumOperands(), 0U);
-  EXPECT_EQ(r0->op_begin(), r0->op_end());
-
-  IntegerType* Int1 = IntegerType::get(C, 1);
-  Constant* One = ConstantInt::get(Int1, 1, true);
-  const ReturnInst* r1 = ReturnInst::Create(C, One);
-  EXPECT_EQ(1U, r1->getNumOperands());
-  User::const_op_iterator b(r1->op_begin());
-  EXPECT_NE(r1->op_end(), b);
-  EXPECT_EQ(One, *b);
-  EXPECT_EQ(One, r1->getOperand(0));
-  ++b;
-  EXPECT_EQ(r1->op_end(), b);
-
-  // clean up
-  delete r0;
-  delete r1;
-}
-
-TEST(InstructionsTest, BranchInst) {
-  LLVMContext &C(getGlobalContext());
-
-  // Make a BasicBlocks
-  BasicBlock* bb0 = BasicBlock::Create(C);
-  BasicBlock* bb1 = BasicBlock::Create(C);
-
-  // Mandatory BranchInst
-  const BranchInst* b0 = BranchInst::Create(bb0);
-
-  EXPECT_TRUE(b0->isUnconditional());
-  EXPECT_FALSE(b0->isConditional());
-  EXPECT_EQ(1U, b0->getNumSuccessors());
-
-  // check num operands
-  EXPECT_EQ(1U, b0->getNumOperands());
-
-  EXPECT_NE(b0->op_begin(), b0->op_end());
-  EXPECT_EQ(b0->op_end(), llvm::next(b0->op_begin()));
-
-  EXPECT_EQ(b0->op_end(), llvm::next(b0->op_begin()));
-
-  IntegerType* Int1 = IntegerType::get(C, 1);
-  Constant* One = ConstantInt::get(Int1, 1, true);
-
-  // Conditional BranchInst
-  BranchInst* b1 = BranchInst::Create(bb0, bb1, One);
-
-  EXPECT_FALSE(b1->isUnconditional());
-  EXPECT_TRUE(b1->isConditional());
-  EXPECT_EQ(2U, b1->getNumSuccessors());
-
-  // check num operands
-  EXPECT_EQ(3U, b1->getNumOperands());
-
-  User::const_op_iterator b(b1->op_begin());
-
-  // check COND
-  EXPECT_NE(b, b1->op_end());
-  EXPECT_EQ(One, *b);
-  EXPECT_EQ(One, b1->getOperand(0));
-  EXPECT_EQ(One, b1->getCondition());
-  ++b;
-
-  // check ELSE
-  EXPECT_EQ(bb1, *b);
-  EXPECT_EQ(bb1, b1->getOperand(1));
-  EXPECT_EQ(bb1, b1->getSuccessor(1));
-  ++b;
-
-  // check THEN
-  EXPECT_EQ(bb0, *b);
-  EXPECT_EQ(bb0, b1->getOperand(2));
-  EXPECT_EQ(bb0, b1->getSuccessor(0));
-  ++b;
-
-  EXPECT_EQ(b1->op_end(), b);
-
-  // clean up
-  delete b0;
-  delete b1;
-
-  delete bb0;
-  delete bb1;
-}
-
-TEST(InstructionsTest, CastInst) {
-  LLVMContext &C(getGlobalContext());
-
-  Type* Int8Ty = Type::getInt8Ty(C);
-  Type* Int64Ty = Type::getInt64Ty(C);
-  Type* V8x8Ty = VectorType::get(Int8Ty, 8);
-  Type* V8x64Ty = VectorType::get(Int64Ty, 8);
-  Type* X86MMXTy = Type::getX86_MMXTy(C);
-
-  const Constant* c8 = Constant::getNullValue(V8x8Ty);
-  const Constant* c64 = Constant::getNullValue(V8x64Ty);
-
-  EXPECT_TRUE(CastInst::isCastable(V8x8Ty, X86MMXTy));
-  EXPECT_TRUE(CastInst::isCastable(X86MMXTy, V8x8Ty));
-  EXPECT_FALSE(CastInst::isCastable(Int64Ty, X86MMXTy));
-  EXPECT_TRUE(CastInst::isCastable(V8x64Ty, V8x8Ty));
-  EXPECT_TRUE(CastInst::isCastable(V8x8Ty, V8x64Ty));
-  EXPECT_EQ(CastInst::Trunc, CastInst::getCastOpcode(c64, true, V8x8Ty, true));
-  EXPECT_EQ(CastInst::SExt, CastInst::getCastOpcode(c8, true, V8x64Ty, true));
-}
-
-
-
-TEST(InstructionsTest, VectorGep) {
-  LLVMContext &C(getGlobalContext());
-
-  // Type Definitions
-  PointerType *Ptri8Ty = PointerType::get(IntegerType::get(C, 8), 0);
-  PointerType *Ptri32Ty = PointerType::get(IntegerType::get(C, 8), 0);
-
-  VectorType *V2xi8PTy = VectorType::get(Ptri8Ty, 2);
-  VectorType *V2xi32PTy = VectorType::get(Ptri32Ty, 2);
-
-  // Test different aspects of the vector-of-pointers type
-  // and GEPs which use this type.
-  ConstantInt *Ci32a = ConstantInt::get(C, APInt(32, 1492));
-  ConstantInt *Ci32b = ConstantInt::get(C, APInt(32, 1948));
-  std::vector<Constant*> ConstVa(2, Ci32a);
-  std::vector<Constant*> ConstVb(2, Ci32b);
-  Constant *C2xi32a = ConstantVector::get(ConstVa);
-  Constant *C2xi32b = ConstantVector::get(ConstVb);
-
-  CastInst *PtrVecA = new IntToPtrInst(C2xi32a, V2xi32PTy);
-  CastInst *PtrVecB = new IntToPtrInst(C2xi32b, V2xi32PTy);
-
-  ICmpInst *ICmp0 = new ICmpInst(ICmpInst::ICMP_SGT, PtrVecA, PtrVecB);
-  ICmpInst *ICmp1 = new ICmpInst(ICmpInst::ICMP_ULT, PtrVecA, PtrVecB);
-  EXPECT_NE(ICmp0, ICmp1); // suppress warning.
-
-  GetElementPtrInst *Gep0 = GetElementPtrInst::Create(PtrVecA, C2xi32a);
-  GetElementPtrInst *Gep1 = GetElementPtrInst::Create(PtrVecA, C2xi32b);
-  GetElementPtrInst *Gep2 = GetElementPtrInst::Create(PtrVecB, C2xi32a);
-  GetElementPtrInst *Gep3 = GetElementPtrInst::Create(PtrVecB, C2xi32b);
-
-  CastInst *BTC0 = new BitCastInst(Gep0, V2xi8PTy);
-  CastInst *BTC1 = new BitCastInst(Gep1, V2xi8PTy);
-  CastInst *BTC2 = new BitCastInst(Gep2, V2xi8PTy);
-  CastInst *BTC3 = new BitCastInst(Gep3, V2xi8PTy);
-
-  Value *S0 = BTC0->stripPointerCasts();
-  Value *S1 = BTC1->stripPointerCasts();
-  Value *S2 = BTC2->stripPointerCasts();
-  Value *S3 = BTC3->stripPointerCasts();
-
-  EXPECT_NE(S0, Gep0);
-  EXPECT_NE(S1, Gep1);
-  EXPECT_NE(S2, Gep2);
-  EXPECT_NE(S3, Gep3);
-
-  int64_t Offset;
-  DataLayout TD("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3"
-                "2:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80"
-                ":128:128-n8:16:32:64-S128");
-  // Make sure we don't crash
-  GetPointerBaseWithConstantOffset(Gep0, Offset, TD);
-  GetPointerBaseWithConstantOffset(Gep1, Offset, TD);
-  GetPointerBaseWithConstantOffset(Gep2, Offset, TD);
-  GetPointerBaseWithConstantOffset(Gep3, Offset, TD);
-
-  // Gep of Geps
-  GetElementPtrInst *GepII0 = GetElementPtrInst::Create(Gep0, C2xi32b);
-  GetElementPtrInst *GepII1 = GetElementPtrInst::Create(Gep1, C2xi32a);
-  GetElementPtrInst *GepII2 = GetElementPtrInst::Create(Gep2, C2xi32b);
-  GetElementPtrInst *GepII3 = GetElementPtrInst::Create(Gep3, C2xi32a);
-
-  EXPECT_EQ(GepII0->getNumIndices(), 1u);
-  EXPECT_EQ(GepII1->getNumIndices(), 1u);
-  EXPECT_EQ(GepII2->getNumIndices(), 1u);
-  EXPECT_EQ(GepII3->getNumIndices(), 1u);
-
-  EXPECT_FALSE(GepII0->hasAllZeroIndices());
-  EXPECT_FALSE(GepII1->hasAllZeroIndices());
-  EXPECT_FALSE(GepII2->hasAllZeroIndices());
-  EXPECT_FALSE(GepII3->hasAllZeroIndices());
-
-  delete GepII0;
-  delete GepII1;
-  delete GepII2;
-  delete GepII3;
-
-  delete BTC0;
-  delete BTC1;
-  delete BTC2;
-  delete BTC3;
-
-  delete Gep0;
-  delete Gep1;
-  delete Gep2;
-  delete Gep3;
-
-  delete ICmp0;
-  delete ICmp1;
-  delete PtrVecA;
-  delete PtrVecB;
-}
-
-TEST(InstructionsTest, FPMathOperator) {
-  LLVMContext &Context = getGlobalContext();
-  IRBuilder<> Builder(Context);
-  MDBuilder MDHelper(Context);
-  Instruction *I = Builder.CreatePHI(Builder.getDoubleTy(), 0);
-  MDNode *MD1 = MDHelper.createFPMath(1.0);
-  Value *V1 = Builder.CreateFAdd(I, I, "", MD1);
-  EXPECT_TRUE(isa<FPMathOperator>(V1));
-  FPMathOperator *O1 = cast<FPMathOperator>(V1);
-  EXPECT_EQ(O1->getFPAccuracy(), 1.0);
-  delete V1;
-  delete I;
-}
-
-
-TEST(InstructionsTest, isEliminableCastPair) {
-  LLVMContext &C(getGlobalContext());
-
-  Type* Int32Ty = Type::getInt32Ty(C);
-  Type* Int64Ty = Type::getInt64Ty(C);
-  Type* Int64PtrTy = Type::getInt64PtrTy(C);
-
-  // Source and destination pointers have same size -> bitcast.
-  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt,
-                                           CastInst::IntToPtr,
-                                           Int64PtrTy, Int64Ty, Int64PtrTy,
-                                           Int32Ty, 0, Int32Ty),
-            CastInst::BitCast);
-
-  // Source and destination pointers have different sizes -> fail.
-  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt,
-                                           CastInst::IntToPtr,
-                                           Int64PtrTy, Int64Ty, Int64PtrTy,
-                                           Int32Ty, 0, Int64Ty),
-            0U);
-
-  // Middle pointer big enough -> bitcast.
-  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr,
-                                           CastInst::PtrToInt,
-                                           Int64Ty, Int64PtrTy, Int64Ty,
-                                           0, Int64Ty, 0),
-            CastInst::BitCast);
-
-  // Middle pointer too small -> fail.
-  EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr,
-                                           CastInst::PtrToInt,
-                                           Int64Ty, Int64PtrTy, Int64Ty,
-                                           0, Int32Ty, 0),
-            0U);
-}
-
-}  // end anonymous namespace
-}  // end namespace llvm
diff --git a/unittests/VMCore/MDBuilderTest.cpp b/unittests/VMCore/MDBuilderTest.cpp
deleted file mode 100644
index 847039b83760..000000000000
--- a/unittests/VMCore/MDBuilderTest.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- llvm/unittests/MDBuilderTest.cpp - MDBuilder unit tests ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IRBuilder.h"
-#include "llvm/MDBuilder.h"
-#include "llvm/Operator.h"
-
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-
-class MDBuilderTest : public testing::Test {
-protected:
-  LLVMContext Context;
-};
-
-TEST_F(MDBuilderTest, createString) {
-  MDBuilder MDHelper(Context);
-  MDString *Str0 = MDHelper.createString("");
-  MDString *Str1 = MDHelper.createString("string");
-  EXPECT_EQ(Str0->getString(), StringRef(""));
-  EXPECT_EQ(Str1->getString(), StringRef("string"));
-}
-TEST_F(MDBuilderTest, createFPMath) {
-  MDBuilder MDHelper(Context);
-  MDNode *MD0 = MDHelper.createFPMath(0.0);
-  MDNode *MD1 = MDHelper.createFPMath(1.0);
-  EXPECT_EQ(MD0, (MDNode *)0);
-  EXPECT_NE(MD1, (MDNode *)0);
-  EXPECT_EQ(MD1->getNumOperands(), 1U);
-  Value *Op = MD1->getOperand(0);
-  EXPECT_TRUE(isa<ConstantFP>(Op));
-  EXPECT_TRUE(Op->getType()->isFloatingPointTy());
-  ConstantFP *Val = cast<ConstantFP>(Op);
-  EXPECT_TRUE(Val->isExactlyValue(1.0));
-}
-TEST_F(MDBuilderTest, createRangeMetadata) {
-  MDBuilder MDHelper(Context);
-  APInt A(8, 1), B(8, 2);
-  MDNode *R0 = MDHelper.createRange(A, A);
-  MDNode *R1 = MDHelper.createRange(A, B);
-  EXPECT_EQ(R0, (MDNode *)0);
-  EXPECT_NE(R1, (MDNode *)0);
-  EXPECT_EQ(R1->getNumOperands(), 2U);
-  EXPECT_TRUE(isa<ConstantInt>(R1->getOperand(0)));
-  EXPECT_TRUE(isa<ConstantInt>(R1->getOperand(1)));
-  ConstantInt *C0 = cast<ConstantInt>(R1->getOperand(0));
-  ConstantInt *C1 = cast<ConstantInt>(R1->getOperand(1));
-  EXPECT_EQ(C0->getValue(), A);
-  EXPECT_EQ(C1->getValue(), B);
-}
-TEST_F(MDBuilderTest, createAnonymousTBAARoot) {
-  MDBuilder MDHelper(Context);
-  MDNode *R0 = MDHelper.createAnonymousTBAARoot();
-  MDNode *R1 = MDHelper.createAnonymousTBAARoot();
-  EXPECT_NE(R0, R1);
-  EXPECT_GE(R0->getNumOperands(), 1U);
-  EXPECT_GE(R1->getNumOperands(), 1U);
-  EXPECT_EQ(R0->getOperand(0), R0);
-  EXPECT_EQ(R1->getOperand(0), R1);
-  EXPECT_TRUE(R0->getNumOperands() == 1 || R0->getOperand(1) == 0);
-  EXPECT_TRUE(R1->getNumOperands() == 1 || R1->getOperand(1) == 0);
-}
-TEST_F(MDBuilderTest, createTBAARoot) {
-  MDBuilder MDHelper(Context);
-  MDNode *R0 = MDHelper.createTBAARoot("Root");
-  MDNode *R1 = MDHelper.createTBAARoot("Root");
-  EXPECT_EQ(R0, R1);
-  EXPECT_GE(R0->getNumOperands(), 1U);
-  EXPECT_TRUE(isa<MDString>(R0->getOperand(0)));
-  EXPECT_EQ(cast<MDString>(R0->getOperand(0))->getString(), "Root");
-  EXPECT_TRUE(R0->getNumOperands() == 1 || R0->getOperand(1) == 0);
-}
-TEST_F(MDBuilderTest, createTBAANode) {
-  MDBuilder MDHelper(Context);
-  MDNode *R = MDHelper.createTBAARoot("Root");
-  MDNode *N0 = MDHelper.createTBAANode("Node", R);
-  MDNode *N1 = MDHelper.createTBAANode("edoN", R);
-  MDNode *N2 = MDHelper.createTBAANode("Node", R, true);
-  MDNode *N3 = MDHelper.createTBAANode("Node", R);
-  EXPECT_EQ(N0, N3);
-  EXPECT_NE(N0, N1);
-  EXPECT_NE(N0, N2);
-  EXPECT_GE(N0->getNumOperands(), 2U);
-  EXPECT_GE(N1->getNumOperands(), 2U);
-  EXPECT_GE(N2->getNumOperands(), 3U);
-  EXPECT_TRUE(isa<MDString>(N0->getOperand(0)));
-  EXPECT_TRUE(isa<MDString>(N1->getOperand(0)));
-  EXPECT_TRUE(isa<MDString>(N2->getOperand(0)));
-  EXPECT_EQ(cast<MDString>(N0->getOperand(0))->getString(), "Node");
-  EXPECT_EQ(cast<MDString>(N1->getOperand(0))->getString(), "edoN");
-  EXPECT_EQ(cast<MDString>(N2->getOperand(0))->getString(), "Node");
-  EXPECT_EQ(N0->getOperand(1), R);
-  EXPECT_EQ(N1->getOperand(1), R);
-  EXPECT_EQ(N2->getOperand(1), R);
-  EXPECT_TRUE(isa<ConstantInt>(N2->getOperand(2)));
-  EXPECT_EQ(cast<ConstantInt>(N2->getOperand(2))->getZExtValue(), 1U);
-}
-}
diff --git a/unittests/VMCore/Makefile b/unittests/VMCore/Makefile
deleted file mode 100644
index d743dc5d5bae..000000000000
--- a/unittests/VMCore/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- unittests/VMCore/Makefile ---------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../..
-TESTNAME = VMCore
-LINK_COMPONENTS := core ipa asmparser
-
-include $(LEVEL)/Makefile.config
-include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
deleted file mode 100644
index 08927a2ff526..000000000000
--- a/unittests/VMCore/MetadataTest.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===- llvm/unittest/VMCore/Metadata.cpp - Metadata unit tests ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ValueHandle.h"
-using namespace llvm;
-
-namespace {
-
-class MetadataTest : public testing::Test {
-protected:
-  LLVMContext Context;
-};
-typedef MetadataTest MDStringTest;
-
-// Test that construction of MDString with different value produces different
-// MDString objects, even with the same string pointer and nulls in the string.
-TEST_F(MDStringTest, CreateDifferent) {
-  char x[3] = { 'f', 0, 'A' };
-  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
-  x[2] = 'B';
-  MDString *s2 = MDString::get(Context, StringRef(&x[0], 3));
-  EXPECT_NE(s1, s2);
-}
-
-// Test that creation of MDStrings with the same string contents produces the
-// same MDString object, even with different pointers.
-TEST_F(MDStringTest, CreateSame) {
-  char x[4] = { 'a', 'b', 'c', 'X' };
-  char y[4] = { 'a', 'b', 'c', 'Y' };
-
-  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
-  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
-  EXPECT_EQ(s1, s2);
-}
-
-// Test that MDString prints out the string we fed it.
-TEST_F(MDStringTest, PrintingSimple) {
-  char *str = new char[13];
-  strncpy(str, "testing 1 2 3", 13);
-  MDString *s = MDString::get(Context, StringRef(str, 13));
-  strncpy(str, "aaaaaaaaaaaaa", 13);
-  delete[] str;
-
-  std::string Str;
-  raw_string_ostream oss(Str);
-  s->print(oss);
-  EXPECT_STREQ("metadata !\"testing 1 2 3\"", oss.str().c_str());
-}
-
-// Test printing of MDString with non-printable characters.
-TEST_F(MDStringTest, PrintingComplex) {
-  char str[5] = {0, '\n', '"', '\\', (char)-1};
-  MDString *s = MDString::get(Context, StringRef(str+0, 5));
-  std::string Str;
-  raw_string_ostream oss(Str);
-  s->print(oss);
-  EXPECT_STREQ("metadata !\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str());
-}
-
-typedef MetadataTest MDNodeTest;
-
-// Test the two constructors, and containing other Constants.
-TEST_F(MDNodeTest, Simple) {
-  char x[3] = { 'a', 'b', 'c' };
-  char y[3] = { '1', '2', '3' };
-
-  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
-  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
-  ConstantInt *CI = ConstantInt::get(getGlobalContext(), APInt(8, 0));
-
-  std::vector<Value *> V;
-  V.push_back(s1);
-  V.push_back(CI);
-  V.push_back(s2);
-
-  MDNode *n1 = MDNode::get(Context, V);
-  Value *const c1 = n1;
-  MDNode *n2 = MDNode::get(Context, c1);
-  Value *const c2 = n2;
-  MDNode *n3 = MDNode::get(Context, V);
-  MDNode *n4 = MDNode::getIfExists(Context, V);
-  MDNode *n5 = MDNode::getIfExists(Context, c1);
-  MDNode *n6 = MDNode::getIfExists(Context, c2);
-  EXPECT_NE(n1, n2);
-#ifdef ENABLE_MDNODE_UNIQUING
-  EXPECT_EQ(n1, n3);
-#else
-  (void) n3;
-#endif
-  EXPECT_EQ(n4, n1);
-  EXPECT_EQ(n5, n2);
-  EXPECT_EQ(n6, (Value*)0);
-
-  EXPECT_EQ(3u, n1->getNumOperands());
-  EXPECT_EQ(s1, n1->getOperand(0));
-  EXPECT_EQ(CI, n1->getOperand(1));
-  EXPECT_EQ(s2, n1->getOperand(2));
-
-  EXPECT_EQ(1u, n2->getNumOperands());
-  EXPECT_EQ(n1, n2->getOperand(0));
-}
-
-TEST_F(MDNodeTest, Delete) {
-  Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
-  Instruction *I = new BitCastInst(C, Type::getInt32Ty(getGlobalContext()));
-
-  Value *const V = I;
-  MDNode *n = MDNode::get(Context, V);
-  WeakVH wvh = n;
-
-  EXPECT_EQ(n, wvh);
-
-  delete I;
-}
-
-TEST(NamedMDNodeTest, Search) {
-  LLVMContext Context;
-  Constant *C = ConstantInt::get(Type::getInt32Ty(Context), 1);
-  Constant *C2 = ConstantInt::get(Type::getInt32Ty(Context), 2);
-
-  Value *const V = C;
-  Value *const V2 = C2;
-  MDNode *n = MDNode::get(Context, V);
-  MDNode *n2 = MDNode::get(Context, V2);
-
-  Module M("MyModule", Context);
-  const char *Name = "llvm.NMD1";
-  NamedMDNode *NMD = M.getOrInsertNamedMetadata(Name);
-  NMD->addOperand(n);
-  NMD->addOperand(n2);
-
-  std::string Str;
-  raw_string_ostream oss(Str);
-  NMD->print(oss);
-  EXPECT_STREQ("!llvm.NMD1 = !{!0, !1}\n",
-               oss.str().c_str());
-}
-}
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
deleted file mode 100644
index 9c070c84bbc7..000000000000
--- a/unittests/VMCore/PassManagerTest.cpp
+++ /dev/null
@@ -1,548 +0,0 @@
-//===- llvm/unittest/VMCore/PassManager.cpp - Constants unit tests ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Module.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/CallingConv.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/PassManager.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace llvm {
-  void initializeModuleNDMPass(PassRegistry&);
-  void initializeFPassPass(PassRegistry&);
-  void initializeCGPassPass(PassRegistry&);
-  void initializeLPassPass(PassRegistry&);
-  void initializeBPassPass(PassRegistry&);
-
-  namespace {
-    // ND = no deps
-    // NM = no modifications
-    struct ModuleNDNM: public ModulePass {
-    public:
-      static char run;
-      static char ID;
-      ModuleNDNM() : ModulePass(ID) { }
-      virtual bool runOnModule(Module &M) {
-        run++;
-        return false;
-      }
-      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-        AU.setPreservesAll();
-      }
-    };
-    char ModuleNDNM::ID=0;
-    char ModuleNDNM::run=0;
-
-    struct ModuleNDM : public ModulePass {
-    public:
-      static char run;
-      static char ID;
-      ModuleNDM() : ModulePass(ID) {}
-      virtual bool runOnModule(Module &M) {
-        run++;
-        return true;
-      }
-    };
-    char ModuleNDM::ID=0;
-    char ModuleNDM::run=0;
-
-    struct ModuleNDM2 : public ModulePass {
-    public:
-      static char run;
-      static char ID;
-      ModuleNDM2() : ModulePass(ID) {}
-      virtual bool runOnModule(Module &M) {
-        run++;
-        return true;
-      }
-    };
-    char ModuleNDM2::ID=0;
-    char ModuleNDM2::run=0;
-
-    struct ModuleDNM : public ModulePass {
-    public:
-      static char run;
-      static char ID;
-      ModuleDNM() : ModulePass(ID) {
-        initializeModuleNDMPass(*PassRegistry::getPassRegistry());
-      }
-      virtual bool runOnModule(Module &M) {
-        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
-        run++;
-        return false;
-      }
-      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-        AU.addRequired<ModuleNDM>();
-        AU.setPreservesAll();
-      }
-    };
-    char ModuleDNM::ID=0;
-    char ModuleDNM::run=0;
-
-    template<typename P>
-    struct PassTestBase : public P {
-    protected:
-      static int runc;
-      static bool initialized;
-      static bool finalized;
-      int allocated;
-      void run() {
-        EXPECT_TRUE(initialized);
-        EXPECT_FALSE(finalized);
-        EXPECT_EQ(0, allocated);
-        allocated++;
-        runc++;
-      }
-    public:
-      static char ID;
-      static void finishedOK(int run) {
-        EXPECT_GT(runc, 0);
-        EXPECT_TRUE(initialized);
-        EXPECT_TRUE(finalized);
-        EXPECT_EQ(run, runc);
-      }
-      PassTestBase() : P(ID), allocated(0) {
-        initialized = false;
-        finalized = false;
-        runc = 0;
-      }
-
-      virtual void releaseMemory() {
-        EXPECT_GT(runc, 0);
-        EXPECT_GT(allocated, 0);
-        allocated--;
-      }
-    };
-    template<typename P> char PassTestBase<P>::ID;
-    template<typename P> int PassTestBase<P>::runc;
-    template<typename P> bool PassTestBase<P>::initialized;
-    template<typename P> bool PassTestBase<P>::finalized;
-
-    template<typename T, typename P>
-    struct PassTest : public PassTestBase<P> {
-    public:
-      virtual bool doInitialization(T &t) {
-        EXPECT_FALSE(PassTestBase<P>::initialized);
-        PassTestBase<P>::initialized = true;
-        return false;
-      }
-      virtual bool doFinalization(T &t) {
-        EXPECT_FALSE(PassTestBase<P>::finalized);
-        PassTestBase<P>::finalized = true;
-        EXPECT_EQ(0, PassTestBase<P>::allocated);
-        return false;
-      }
-    };
-
-    struct CGPass : public PassTest<CallGraph, CallGraphSCCPass> {
-    public:
-      CGPass() {
-        initializeCGPassPass(*PassRegistry::getPassRegistry());
-      }
-      virtual bool runOnSCC(CallGraphSCC &SCMM) {
-        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
-        run();
-        return false;
-      }
-    };
-
-    struct FPass : public PassTest<Module, FunctionPass> {
-    public:
-      virtual bool runOnFunction(Function &F) {
-        // FIXME: PR4112
-        // EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
-        run();
-        return false;
-      }
-    };
-
-    struct LPass : public PassTestBase<LoopPass> {
-    private:
-      static int initcount;
-      static int fincount;
-    public:
-      LPass() {
-        initializeLPassPass(*PassRegistry::getPassRegistry());
-        initcount = 0; fincount=0;
-        EXPECT_FALSE(initialized);
-      }
-      static void finishedOK(int run, int finalized) {
-        PassTestBase<LoopPass>::finishedOK(run);
-        EXPECT_EQ(run, initcount);
-        EXPECT_EQ(finalized, fincount);
-      }
-      virtual bool doInitialization(Loop* L, LPPassManager &LPM) {
-        initialized = true;
-        initcount++;
-        return false;
-      }
-      virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
-        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
-        run();
-        return false;
-      }
-      virtual bool doFinalization() {
-        fincount++;
-        finalized = true;
-        return false;
-      }
-    };
-    int LPass::initcount=0;
-    int LPass::fincount=0;
-
-    struct BPass : public PassTestBase<BasicBlockPass> {
-    private:
-      static int inited;
-      static int fin;
-    public:
-      static void finishedOK(int run, int N) {
-        PassTestBase<BasicBlockPass>::finishedOK(run);
-        EXPECT_EQ(inited, N);
-        EXPECT_EQ(fin, N);
-      }
-      BPass() {
-        inited = 0;
-        fin = 0;
-      }
-      virtual bool doInitialization(Module &M) {
-        EXPECT_FALSE(initialized);
-        initialized = true;
-        return false;
-      }
-      virtual bool doInitialization(Function &F) {
-        inited++;
-        return false;
-      }
-      virtual bool runOnBasicBlock(BasicBlock &BB) {
-        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
-        run();
-        return false;
-      }
-      virtual bool doFinalization(Function &F) {
-        fin++;
-        return false;
-      }
-      virtual bool doFinalization(Module &M) {
-        EXPECT_FALSE(finalized);
-        finalized = true;
-        EXPECT_EQ(0, allocated);
-        return false;
-      }
-    };
-    int BPass::inited=0;
-    int BPass::fin=0;
-
-    struct OnTheFlyTest: public ModulePass {
-    public:
-      static char ID;
-      OnTheFlyTest() : ModulePass(ID) {
-        initializeFPassPass(*PassRegistry::getPassRegistry());
-      }
-      virtual bool runOnModule(Module &M) {
-        EXPECT_TRUE(getAnalysisIfAvailable<DataLayout>());
-        for (Module::iterator I=M.begin(),E=M.end(); I != E; ++I) {
-          Function &F = *I;
-          {
-            SCOPED_TRACE("Running on the fly function pass");
-            getAnalysis<FPass>(F);
-          }
-        }
-        return false;
-      }
-      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-        AU.addRequired<FPass>();
-      }
-    };
-    char OnTheFlyTest::ID=0;
-
-    TEST(PassManager, RunOnce) {
-      Module M("test-once", getGlobalContext());
-      struct ModuleNDNM *mNDNM = new ModuleNDNM();
-      struct ModuleDNM *mDNM = new ModuleDNM();
-      struct ModuleNDM *mNDM = new ModuleNDM();
-      struct ModuleNDM2 *mNDM2 = new ModuleNDM2();
-
-      mNDM->run = mNDNM->run = mDNM->run = mNDM2->run = 0;
-
-      PassManager Passes;
-      Passes.add(new DataLayout(&M));
-      Passes.add(mNDM2);
-      Passes.add(mNDM);
-      Passes.add(mNDNM);
-      Passes.add(mDNM);
-
-      Passes.run(M);
-      // each pass must be run exactly once, since nothing invalidates them
-      EXPECT_EQ(1, mNDM->run);
-      EXPECT_EQ(1, mNDNM->run);
-      EXPECT_EQ(1, mDNM->run);
-      EXPECT_EQ(1, mNDM2->run);
-    }
-
-    TEST(PassManager, ReRun) {
-      Module M("test-rerun", getGlobalContext());
-      struct ModuleNDNM *mNDNM = new ModuleNDNM();
-      struct ModuleDNM *mDNM = new ModuleDNM();
-      struct ModuleNDM *mNDM = new ModuleNDM();
-      struct ModuleNDM2 *mNDM2 = new ModuleNDM2();
-
-      mNDM->run = mNDNM->run = mDNM->run = mNDM2->run = 0;
-
-      PassManager Passes;
-      Passes.add(new DataLayout(&M));
-      Passes.add(mNDM);
-      Passes.add(mNDNM);
-      Passes.add(mNDM2);// invalidates mNDM needed by mDNM
-      Passes.add(mDNM);
-
-      Passes.run(M);
-      // Some passes must be rerun because a pass that modified the
-      // module/function was run in between
-      EXPECT_EQ(2, mNDM->run);
-      EXPECT_EQ(1, mNDNM->run);
-      EXPECT_EQ(1, mNDM2->run);
-      EXPECT_EQ(1, mDNM->run);
-    }
-
-    Module* makeLLVMModule();
-
-    template<typename T>
-    void MemoryTestHelper(int run) {
-      OwningPtr<Module> M(makeLLVMModule());
-      T *P = new T();
-      PassManager Passes;
-      Passes.add(new DataLayout(M.get()));
-      Passes.add(P);
-      Passes.run(*M);
-      T::finishedOK(run);
-    }
-
-    template<typename T>
-    void MemoryTestHelper(int run, int N) {
-      Module *M = makeLLVMModule();
-      T *P = new T();
-      PassManager Passes;
-      Passes.add(new DataLayout(M));
-      Passes.add(P);
-      Passes.run(*M);
-      T::finishedOK(run, N);
-      delete M;
-    }
-
-    TEST(PassManager, Memory) {
-      // SCC#1: test1->test2->test3->test1
-      // SCC#2: test4
-      // SCC#3: indirect call node
-      {
-        SCOPED_TRACE("Callgraph pass");
-        MemoryTestHelper<CGPass>(3);
-      }
-
-      {
-        SCOPED_TRACE("Function pass");
-        MemoryTestHelper<FPass>(4);// 4 functions
-      }
-
-      {
-        SCOPED_TRACE("Loop pass");
-        MemoryTestHelper<LPass>(2, 1); //2 loops, 1 function
-      }
-      {
-        SCOPED_TRACE("Basic block pass");
-        MemoryTestHelper<BPass>(7, 4); //9 basic blocks
-      }
-
-    }
-
-    TEST(PassManager, MemoryOnTheFly) {
-      Module *M = makeLLVMModule();
-      {
-        SCOPED_TRACE("Running OnTheFlyTest");
-        struct OnTheFlyTest *O = new OnTheFlyTest();
-        PassManager Passes;
-        Passes.add(new DataLayout(M));
-        Passes.add(O);
-        Passes.run(*M);
-
-        FPass::finishedOK(4);
-      }
-      delete M;
-    }
-
-    Module* makeLLVMModule() {
-      // Module Construction
-      Module* mod = new Module("test-mem", getGlobalContext());
-      mod->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
-                         "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-"
-                         "a0:0:64-s0:64:64-f80:128:128");
-      mod->setTargetTriple("x86_64-unknown-linux-gnu");
-
-      // Type Definitions
-      std::vector<Type*>FuncTy_0_args;
-      FunctionType* FuncTy_0 = FunctionType::get(
-        /*Result=*/IntegerType::get(getGlobalContext(), 32),
-        /*Params=*/FuncTy_0_args,
-        /*isVarArg=*/false);
-
-      std::vector<Type*>FuncTy_2_args;
-      FuncTy_2_args.push_back(IntegerType::get(getGlobalContext(), 1));
-      FunctionType* FuncTy_2 = FunctionType::get(
-        /*Result=*/Type::getVoidTy(getGlobalContext()),
-        /*Params=*/FuncTy_2_args,
-        /*isVarArg=*/false);
-
-
-      // Function Declarations
-
-      Function* func_test1 = Function::Create(
-        /*Type=*/FuncTy_0,
-        /*Linkage=*/GlobalValue::ExternalLinkage,
-        /*Name=*/"test1", mod);
-      func_test1->setCallingConv(CallingConv::C);
-      AttrListPtr func_test1_PAL;
-      func_test1->setAttributes(func_test1_PAL);
-
-      Function* func_test2 = Function::Create(
-        /*Type=*/FuncTy_0,
-        /*Linkage=*/GlobalValue::ExternalLinkage,
-        /*Name=*/"test2", mod);
-      func_test2->setCallingConv(CallingConv::C);
-      AttrListPtr func_test2_PAL;
-      func_test2->setAttributes(func_test2_PAL);
-
-      Function* func_test3 = Function::Create(
-        /*Type=*/FuncTy_0,
-        /*Linkage=*/GlobalValue::ExternalLinkage,
-        /*Name=*/"test3", mod);
-      func_test3->setCallingConv(CallingConv::C);
-      AttrListPtr func_test3_PAL;
-      func_test3->setAttributes(func_test3_PAL);
-
-      Function* func_test4 = Function::Create(
-        /*Type=*/FuncTy_2,
-        /*Linkage=*/GlobalValue::ExternalLinkage,
-        /*Name=*/"test4", mod);
-      func_test4->setCallingConv(CallingConv::C);
-      AttrListPtr func_test4_PAL;
-      func_test4->setAttributes(func_test4_PAL);
-
-      // Global Variable Declarations
-
-
-      // Constant Definitions
-
-      // Global Variable Definitions
-
-      // Function Definitions
-
-      // Function: test1 (func_test1)
-      {
-
-        BasicBlock* label_entry = BasicBlock::Create(getGlobalContext(), "entry",func_test1,0);
-
-        // Block entry (label_entry)
-        CallInst* int32_3 = CallInst::Create(func_test2, "", label_entry);
-        int32_3->setCallingConv(CallingConv::C);
-        int32_3->setTailCall(false);AttrListPtr int32_3_PAL;
-        int32_3->setAttributes(int32_3_PAL);
-
-        ReturnInst::Create(getGlobalContext(), int32_3, label_entry);
-
-      }
-
-      // Function: test2 (func_test2)
-      {
-
-        BasicBlock* label_entry_5 = BasicBlock::Create(getGlobalContext(), "entry",func_test2,0);
-
-        // Block entry (label_entry_5)
-        CallInst* int32_6 = CallInst::Create(func_test3, "", label_entry_5);
-        int32_6->setCallingConv(CallingConv::C);
-        int32_6->setTailCall(false);AttrListPtr int32_6_PAL;
-        int32_6->setAttributes(int32_6_PAL);
-
-        ReturnInst::Create(getGlobalContext(), int32_6, label_entry_5);
-
-      }
-
-      // Function: test3 (func_test3)
-      {
-
-        BasicBlock* label_entry_8 = BasicBlock::Create(getGlobalContext(), "entry",func_test3,0);
-
-        // Block entry (label_entry_8)
-        CallInst* int32_9 = CallInst::Create(func_test1, "", label_entry_8);
-        int32_9->setCallingConv(CallingConv::C);
-        int32_9->setTailCall(false);AttrListPtr int32_9_PAL;
-        int32_9->setAttributes(int32_9_PAL);
-
-        ReturnInst::Create(getGlobalContext(), int32_9, label_entry_8);
-
-      }
-
-      // Function: test4 (func_test4)
-      {
-        Function::arg_iterator args = func_test4->arg_begin();
-        Value* int1_f = args++;
-        int1_f->setName("f");
-
-        BasicBlock* label_entry_11 = BasicBlock::Create(getGlobalContext(), "entry",func_test4,0);
-        BasicBlock* label_bb = BasicBlock::Create(getGlobalContext(), "bb",func_test4,0);
-        BasicBlock* label_bb1 = BasicBlock::Create(getGlobalContext(), "bb1",func_test4,0);
-        BasicBlock* label_return = BasicBlock::Create(getGlobalContext(), "return",func_test4,0);
-
-        // Block entry (label_entry_11)
-        BranchInst::Create(label_bb, label_entry_11);
-
-        // Block bb (label_bb)
-        BranchInst::Create(label_bb, label_bb1, int1_f, label_bb);
-
-        // Block bb1 (label_bb1)
-        BranchInst::Create(label_bb1, label_return, int1_f, label_bb1);
-
-        // Block return (label_return)
-        ReturnInst::Create(getGlobalContext(), label_return);
-
-      }
-      return mod;
-    }
-
-  }
-}
-
-INITIALIZE_PASS(ModuleNDM, "mndm", "mndm", false, false)
-INITIALIZE_PASS_BEGIN(CGPass, "cgp","cgp", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
-INITIALIZE_PASS_END(CGPass, "cgp","cgp", false, false)
-INITIALIZE_PASS(FPass, "fp","fp", false, false)
-INITIALIZE_PASS_BEGIN(LPass, "lp","lp", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LPass, "lp","lp", false, false)
-INITIALIZE_PASS(BPass, "bp","bp", false, false)
diff --git a/unittests/VMCore/TypeBuilderTest.cpp b/unittests/VMCore/TypeBuilderTest.cpp
deleted file mode 100644
index a746b1f7384b..000000000000
--- a/unittests/VMCore/TypeBuilderTest.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-//===- llvm/unittest/TypeBuilderTest.cpp - TypeBuilder tests --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TypeBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/ADT/ArrayRef.h"
-
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-
-TEST(TypeBuilderTest, Void) {
-  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, false>::get(getGlobalContext())));
-  // Special cases for C compatibility:
-  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
-            (TypeBuilder<void*, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
-            (TypeBuilder<const void*, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
-            (TypeBuilder<volatile void*, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
-            (TypeBuilder<const volatile void*, false>::get(
-              getGlobalContext())));
-}
-
-TEST(TypeBuilderTest, HostIntegers) {
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<int8_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<uint8_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<int16_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<uint16_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<int32_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<uint32_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<int64_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<uint64_t, false>::get(getGlobalContext())));
-
-  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(size_t) * CHAR_BIT),
-            (TypeBuilder<size_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(ptrdiff_t) * CHAR_BIT),
-            (TypeBuilder<ptrdiff_t, false>::get(getGlobalContext())));
-}
-
-TEST(TypeBuilderTest, CrossCompilableIntegers) {
-  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, true>::get(getGlobalContext())));
-  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, false>::get(getGlobalContext())));
-  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, true>::get(getGlobalContext())));
-  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, false>::get(getGlobalContext())));
-}
-
-TEST(TypeBuilderTest, Float) {
-  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<float, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<double, false>::get(getGlobalContext())));
-  // long double isn't supported yet.
-  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, false>::get(getGlobalContext())));
-}
-
-TEST(TypeBuilderTest, Derived) {
-  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
-            (TypeBuilder<int8_t**, false>::get(getGlobalContext())));
-  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
-            (TypeBuilder<int8_t[7], false>::get(getGlobalContext())));
-  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
-            (TypeBuilder<int8_t[], false>::get(getGlobalContext())));
-
-  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
-            (TypeBuilder<types::i<8>**, false>::get(getGlobalContext())));
-  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
-            (TypeBuilder<types::i<8>[7], false>::get(getGlobalContext())));
-  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
-            (TypeBuilder<types::i<8>[], false>::get(getGlobalContext())));
-
-  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
-            (TypeBuilder<types::i<8>**, true>::get(getGlobalContext())));
-  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
-            (TypeBuilder<types::i<8>[7], true>::get(getGlobalContext())));
-  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
-            (TypeBuilder<types::i<8>[], true>::get(getGlobalContext())));
-
-
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<const int8_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<volatile int8_t, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<const volatile int8_t, false>::get(getGlobalContext())));
-
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<const types::i<8>, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<volatile types::i<8>, false>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<const volatile types::i<8>, false>::get(getGlobalContext())));
-
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<const types::i<8>, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<volatile types::i<8>, true>::get(getGlobalContext())));
-  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
-            (TypeBuilder<const volatile types::i<8>, true>::get(getGlobalContext())));
-
-  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
-            (TypeBuilder<const volatile int8_t*const volatile, false>::get(getGlobalContext())));
-}
-
-TEST(TypeBuilderTest, Functions) {
-  std::vector<Type*> params;
-  EXPECT_EQ(FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false),
-            (TypeBuilder<void(), true>::get(getGlobalContext())));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
-            (TypeBuilder<int8_t(...), false>::get(getGlobalContext())));
-  params.push_back(TypeBuilder<int32_t*, false>::get(getGlobalContext()));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
-            (TypeBuilder<int8_t(const int32_t*), false>::get(getGlobalContext())));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
-            (TypeBuilder<int8_t(const int32_t*, ...), false>::get(getGlobalContext())));
-  params.push_back(TypeBuilder<char*, false>::get(getGlobalContext()));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
-            (TypeBuilder<int8_t(int32_t*, void*), false>::get(getGlobalContext())));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
-            (TypeBuilder<int8_t(int32_t*, char*, ...), false>::get(getGlobalContext())));
-  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
-            (TypeBuilder<int8_t(int32_t*, void*, char), false>::get(getGlobalContext())));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
-            (TypeBuilder<int8_t(int32_t*, char*, char, ...), false>::get(getGlobalContext())));
-  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
-            (TypeBuilder<int8_t(int32_t*, void*, char, char), false>::get(getGlobalContext())));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
-            (TypeBuilder<int8_t(int32_t*, char*, char, char, ...),
-                         false>::get(getGlobalContext())));
-  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
-            (TypeBuilder<int8_t(int32_t*, void*, char, char, char),
-                         false>::get(getGlobalContext())));
-  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
-            (TypeBuilder<int8_t(int32_t*, char*, char, char, char, ...),
-                         false>::get(getGlobalContext())));
-}
-
-TEST(TypeBuilderTest, Context) {
-  // We used to cache TypeBuilder results in static local variables.  This
-  // produced the same type for different contexts, which of course broke
-  // things.
-  LLVMContext context1;
-  EXPECT_EQ(&context1,
-            &(TypeBuilder<types::i<1>, true>::get(context1))->getContext());
-  LLVMContext context2;
-  EXPECT_EQ(&context2,
-            &(TypeBuilder<types::i<1>, true>::get(context2))->getContext());
-}
-
-struct MyType {
-  int a;
-  int *b;
-  void *array[1];
-};
-
-struct MyPortableType {
-  int32_t a;
-  int32_t *b;
-  void *array[1];
-};
-
-}  // anonymous namespace
-
-namespace llvm {
-template<bool cross> class TypeBuilder<MyType, cross> {
-public:
-  static StructType *get(LLVMContext &Context) {
-    // Using the static result variable ensures that the type is
-    // only looked up once.
-    std::vector<Type*> st;
-    st.push_back(TypeBuilder<int, cross>::get(Context));
-    st.push_back(TypeBuilder<int*, cross>::get(Context));
-    st.push_back(TypeBuilder<void*[], cross>::get(Context));
-    static StructType *const result = StructType::get(Context, st);
-    return result;
-  }
-
-  // You may find this a convenient place to put some constants
-  // to help with getelementptr.  They don't have any effect on
-  // the operation of TypeBuilder.
-  enum Fields {
-    FIELD_A,
-    FIELD_B,
-    FIELD_ARRAY
-  };
-};
-
-template<bool cross> class TypeBuilder<MyPortableType, cross> {
-public:
-  static StructType *get(LLVMContext &Context) {
-    // Using the static result variable ensures that the type is
-    // only looked up once.
-    std::vector<Type*> st;
-    st.push_back(TypeBuilder<types::i<32>, cross>::get(Context));
-    st.push_back(TypeBuilder<types::i<32>*, cross>::get(Context));
-    st.push_back(TypeBuilder<types::i<8>*[], cross>::get(Context));
-    static StructType *const result = StructType::get(Context, st);
-    return result;
-  }
-
-  // You may find this a convenient place to put some constants
-  // to help with getelementptr.  They don't have any effect on
-  // the operation of TypeBuilder.
-  enum Fields {
-    FIELD_A,
-    FIELD_B,
-    FIELD_ARRAY
-  };
-};
-}  // namespace llvm
-namespace {
-
-TEST(TypeBuilderTest, Extensions) {
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(
-                                     TypeBuilder<int, false>::get(getGlobalContext()),
-                                     TypeBuilder<int*, false>::get(getGlobalContext()),
-                                     TypeBuilder<void*[], false>::get(getGlobalContext()),
-                                     (void*)0)),
-            (TypeBuilder<MyType*, false>::get(getGlobalContext())));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(
-                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
-                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
-                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
-                                     (void*)0)),
-            (TypeBuilder<MyPortableType*, false>::get(getGlobalContext())));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(
-                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
-                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
-                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
-                                     (void*)0)),
-            (TypeBuilder<MyPortableType*, true>::get(getGlobalContext())));
-}
-
-}  // anonymous namespace
diff --git a/unittests/VMCore/TypesTest.cpp b/unittests/VMCore/TypesTest.cpp
deleted file mode 100644
index 0416643221ed..000000000000
--- a/unittests/VMCore/TypesTest.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- llvm/unittest/VMCore/TypesTest.cpp - Type unit tests ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "gtest/gtest.h"
-using namespace llvm;
-
-namespace {
-
-TEST(TypesTest, StructType) {
-  LLVMContext C;
-
-  // PR13522
-  StructType *Struct = StructType::create(C, "FooBar");
-  EXPECT_EQ("FooBar", Struct->getName());
-  Struct->setName(Struct->getName().substr(0, 3));
-  EXPECT_EQ("Foo", Struct->getName());
-  Struct->setName("");
-  EXPECT_TRUE(Struct->getName().empty());
-  EXPECT_FALSE(Struct->hasName());
-}
-
-}  // end anonymous namespace
diff --git a/unittests/VMCore/ValueMapTest.cpp b/unittests/VMCore/ValueMapTest.cpp
deleted file mode 100644
index 9bed37dff33e..000000000000
--- a/unittests/VMCore/ValueMapTest.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-//===- llvm/unittest/ADT/ValueMapTest.cpp - ValueMap unit tests -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Config/llvm-config.h"
-
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-
-// Test fixture
-template<typename T>
-class ValueMapTest : public testing::Test {
-protected:
-  Constant *ConstantV;
-  OwningPtr<BitCastInst> BitcastV;
-  OwningPtr<BinaryOperator> AddV;
-
-  ValueMapTest() :
-    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
-    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))),
-    AddV(BinaryOperator::CreateAdd(ConstantV, ConstantV)) {
-  }
-};
-
-// Run everything on Value*, a subtype to make sure that casting works as
-// expected, and a const subtype to make sure we cast const correctly.
-typedef ::testing::Types<Value, Instruction, const Instruction> KeyTypes;
-TYPED_TEST_CASE(ValueMapTest, KeyTypes);
-
-TYPED_TEST(ValueMapTest, Null) {
-  ValueMap<TypeParam*, int> VM1;
-  VM1[NULL] = 7;
-  EXPECT_EQ(7, VM1.lookup(NULL));
-}
-
-TYPED_TEST(ValueMapTest, FollowsValue) {
-  ValueMap<TypeParam*, int> VM;
-  VM[this->BitcastV.get()] = 7;
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.count(this->AddV.get()));
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(7, VM.lookup(this->AddV.get()));
-  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
-  this->AddV.reset();
-  EXPECT_EQ(0, VM.count(this->AddV.get()));
-  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
-  EXPECT_EQ(0U, VM.size());
-}
-
-TYPED_TEST(ValueMapTest, OperationsWork) {
-  ValueMap<TypeParam*, int> VM;
-  ValueMap<TypeParam*, int> VM2(16);  (void)VM2;
-  typename ValueMapConfig<TypeParam*>::ExtraData Data;
-  ValueMap<TypeParam*, int> VM3(Data, 16);  (void)VM3;
-  EXPECT_TRUE(VM.empty());
-
-  VM[this->BitcastV.get()] = 7;
-
-  // Find:
-  typename ValueMap<TypeParam*, int>::iterator I =
-    VM.find(this->BitcastV.get());
-  ASSERT_TRUE(I != VM.end());
-  EXPECT_EQ(this->BitcastV.get(), I->first);
-  EXPECT_EQ(7, I->second);
-  EXPECT_TRUE(VM.find(this->AddV.get()) == VM.end());
-
-  // Const find:
-  const ValueMap<TypeParam*, int> &CVM = VM;
-  typename ValueMap<TypeParam*, int>::const_iterator CI =
-    CVM.find(this->BitcastV.get());
-  ASSERT_TRUE(CI != CVM.end());
-  EXPECT_EQ(this->BitcastV.get(), CI->first);
-  EXPECT_EQ(7, CI->second);
-  EXPECT_TRUE(CVM.find(this->AddV.get()) == CVM.end());
-
-  // Insert:
-  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult1 =
-    VM.insert(std::make_pair(this->AddV.get(), 3));
-  EXPECT_EQ(this->AddV.get(), InsertResult1.first->first);
-  EXPECT_EQ(3, InsertResult1.first->second);
-  EXPECT_TRUE(InsertResult1.second);
-  EXPECT_EQ(true, VM.count(this->AddV.get()));
-  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult2 =
-    VM.insert(std::make_pair(this->AddV.get(), 5));
-  EXPECT_EQ(this->AddV.get(), InsertResult2.first->first);
-  EXPECT_EQ(3, InsertResult2.first->second);
-  EXPECT_FALSE(InsertResult2.second);
-
-  // Erase:
-  VM.erase(InsertResult2.first);
-  EXPECT_EQ(0U, VM.count(this->AddV.get()));
-  EXPECT_EQ(1U, VM.count(this->BitcastV.get()));
-  VM.erase(this->BitcastV.get());
-  EXPECT_EQ(0U, VM.count(this->BitcastV.get()));
-  EXPECT_EQ(0U, VM.size());
-
-  // Range insert:
-  SmallVector<std::pair<Instruction*, int>, 2> Elems;
-  Elems.push_back(std::make_pair(this->AddV.get(), 1));
-  Elems.push_back(std::make_pair(this->BitcastV.get(), 2));
-  VM.insert(Elems.begin(), Elems.end());
-  EXPECT_EQ(1, VM.lookup(this->AddV.get()));
-  EXPECT_EQ(2, VM.lookup(this->BitcastV.get()));
-}
-
-template<typename ExpectedType, typename VarType>
-void CompileAssertHasType(VarType) {
-  typedef char assert[is_same<ExpectedType, VarType>::value ? 1 : -1];
-}
-
-TYPED_TEST(ValueMapTest, Iteration) {
-  ValueMap<TypeParam*, int> VM;
-  VM[this->BitcastV.get()] = 2;
-  VM[this->AddV.get()] = 3;
-  size_t size = 0;
-  for (typename ValueMap<TypeParam*, int>::iterator I = VM.begin(), E = VM.end();
-       I != E; ++I) {
-    ++size;
-    std::pair<TypeParam*, int> value = *I; (void)value;
-    CompileAssertHasType<TypeParam*>(I->first);
-    if (I->second == 2) {
-      EXPECT_EQ(this->BitcastV.get(), I->first);
-      I->second = 5;
-    } else if (I->second == 3) {
-      EXPECT_EQ(this->AddV.get(), I->first);
-      I->second = 6;
-    } else {
-      ADD_FAILURE() << "Iterated through an extra value.";
-    }
-  }
-  EXPECT_EQ(2U, size);
-  EXPECT_EQ(5, VM[this->BitcastV.get()]);
-  EXPECT_EQ(6, VM[this->AddV.get()]);
-
-  size = 0;
-  // Cast to const ValueMap to avoid a bug in DenseMap's iterators.
-  const ValueMap<TypeParam*, int>& CVM = VM;
-  for (typename ValueMap<TypeParam*, int>::const_iterator I = CVM.begin(),
-         E = CVM.end(); I != E; ++I) {
-    ++size;
-    std::pair<TypeParam*, int> value = *I;  (void)value;
-    CompileAssertHasType<TypeParam*>(I->first);
-    if (I->second == 5) {
-      EXPECT_EQ(this->BitcastV.get(), I->first);
-    } else if (I->second == 6) {
-      EXPECT_EQ(this->AddV.get(), I->first);
-    } else {
-      ADD_FAILURE() << "Iterated through an extra value.";
-    }
-  }
-  EXPECT_EQ(2U, size);
-}
-
-TYPED_TEST(ValueMapTest, DefaultCollisionBehavior) {
-  // By default, we overwrite the old value with the replaced value.
-  ValueMap<TypeParam*, int> VM;
-  VM[this->BitcastV.get()] = 7;
-  VM[this->AddV.get()] = 9;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
-  EXPECT_EQ(9, VM.lookup(this->AddV.get()));
-}
-
-TYPED_TEST(ValueMapTest, ConfiguredCollisionBehavior) {
-  // TODO: Implement this when someone needs it.
-}
-
-template<typename KeyT>
-struct LockMutex : ValueMapConfig<KeyT> {
-  struct ExtraData {
-    sys::Mutex *M;
-    bool *CalledRAUW;
-    bool *CalledDeleted;
-  };
-  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
-    *Data.CalledRAUW = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
-  }
-  static void onDelete(const ExtraData &Data, KeyT Old) {
-    *Data.CalledDeleted = true;
-    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
-  }
-  static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
-};
-#if LLVM_ENABLE_THREADS
-TYPED_TEST(ValueMapTest, LocksMutex) {
-  sys::Mutex M(false);  // Not recursive.
-  bool CalledRAUW = false, CalledDeleted = false;
-  typename LockMutex<TypeParam*>::ExtraData Data =
-    {&M, &CalledRAUW, &CalledDeleted};
-  ValueMap<TypeParam*, int, LockMutex<TypeParam*> > VM(Data);
-  VM[this->BitcastV.get()] = 7;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  this->AddV.reset();
-  EXPECT_TRUE(CalledRAUW);
-  EXPECT_TRUE(CalledDeleted);
-}
-#endif
-
-template<typename KeyT>
-struct NoFollow : ValueMapConfig<KeyT> {
-  enum { FollowRAUW = false };
-};
-
-TYPED_TEST(ValueMapTest, NoFollowRAUW) {
-  ValueMap<TypeParam*, int, NoFollow<TypeParam*> > VM;
-  VM[this->BitcastV.get()] = 7;
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.count(this->AddV.get()));
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
-  this->AddV.reset();
-  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
-  this->BitcastV.reset();
-  EXPECT_EQ(0, VM.lookup(this->BitcastV.get()));
-  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
-  EXPECT_EQ(0U, VM.size());
-}
-
-template<typename KeyT>
-struct CountOps : ValueMapConfig<KeyT> {
-  struct ExtraData {
-    int *Deletions;
-    int *RAUWs;
-  };
-
-  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
-    ++*Data.RAUWs;
-  }
-  static void onDelete(const ExtraData &Data, KeyT Old) {
-    ++*Data.Deletions;
-  }
-};
-
-TYPED_TEST(ValueMapTest, CallsConfig) {
-  int Deletions = 0, RAUWs = 0;
-  typename CountOps<TypeParam*>::ExtraData Data = {&Deletions, &RAUWs};
-  ValueMap<TypeParam*, int, CountOps<TypeParam*> > VM(Data);
-  VM[this->BitcastV.get()] = 7;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_EQ(0, Deletions);
-  EXPECT_EQ(1, RAUWs);
-  this->AddV.reset();
-  EXPECT_EQ(1, Deletions);
-  EXPECT_EQ(1, RAUWs);
-  this->BitcastV.reset();
-  EXPECT_EQ(1, Deletions);
-  EXPECT_EQ(1, RAUWs);
-}
-
-template<typename KeyT>
-struct ModifyingConfig : ValueMapConfig<KeyT> {
-  // We'll put a pointer here back to the ValueMap this key is in, so
-  // that we can modify it (and clobber *this) before the ValueMap
-  // tries to do the same modification.  In previous versions of
-  // ValueMap, that exploded.
-  typedef ValueMap<KeyT, int, ModifyingConfig<KeyT> > **ExtraData;
-
-  static void onRAUW(ExtraData Map, KeyT Old, KeyT New) {
-    (*Map)->erase(Old);
-  }
-  static void onDelete(ExtraData Map, KeyT Old) {
-    (*Map)->erase(Old);
-  }
-};
-TYPED_TEST(ValueMapTest, SurvivesModificationByConfig) {
-  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > *MapAddress;
-  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > VM(&MapAddress);
-  MapAddress = &VM;
-  // Now the ModifyingConfig can modify the Map inside a callback.
-  VM[this->BitcastV.get()] = 7;
-  this->BitcastV->replaceAllUsesWith(this->AddV.get());
-  EXPECT_FALSE(VM.count(this->BitcastV.get()));
-  EXPECT_FALSE(VM.count(this->AddV.get()));
-  VM[this->AddV.get()] = 7;
-  this->AddV.reset();
-  EXPECT_FALSE(VM.count(this->AddV.get()));
-}
-
-}
diff --git a/unittests/VMCore/VerifierTest.cpp b/unittests/VMCore/VerifierTest.cpp
deleted file mode 100644
index 324b4e193ba6..000000000000
--- a/unittests/VMCore/VerifierTest.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//===- llvm/unittest/VMCore/VerifierTest.cpp - Verifier unit tests --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Analysis/Verifier.h"
-#include "gtest/gtest.h"
-
-namespace llvm {
-namespace {
-
-TEST(VerifierTest, Branch_i1) {
-  LLVMContext &C = getGlobalContext();
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false);
-  OwningPtr<Function> F(Function::Create(FTy, GlobalValue::ExternalLinkage));
-  BasicBlock *Entry = BasicBlock::Create(C, "entry", F.get());
-  BasicBlock *Exit = BasicBlock::Create(C, "exit", F.get());
-  ReturnInst::Create(C, Exit);
-
-  // To avoid triggering an assertion in BranchInst::Create, we first create
-  // a branch with an 'i1' condition ...
-
-  Constant *False = ConstantInt::getFalse(C);
-  BranchInst *BI = BranchInst::Create(Exit, Exit, False, Entry);
-
-  // ... then use setOperand to redirect it to a value of different type.
-
-  Constant *Zero32 = ConstantInt::get(IntegerType::get(C, 32), 0);
-  BI->setOperand(0, Zero32);
-
-  EXPECT_TRUE(verifyFunction(*F, ReturnStatusAction));
-}
-
-TEST(VerifierTest, AliasUnnamedAddr) {
-  LLVMContext &C = getGlobalContext();
-  Module M("M", C);
-  Type *Ty = Type::getInt8Ty(C);
-  Constant *Init = Constant::getNullValue(Ty);
-  GlobalVariable *Aliasee = new GlobalVariable(M, Ty, true,
-                                               GlobalValue::ExternalLinkage,
-                                               Init, "foo");
-  GlobalAlias *GA = new GlobalAlias(Type::getInt8PtrTy(C),
-                                    GlobalValue::ExternalLinkage,
-                                    "bar", Aliasee, &M);
-  GA->setUnnamedAddr(true);
-  std::string Error;
-  EXPECT_TRUE(verifyModule(M, ReturnStatusAction, &Error));
-  EXPECT_TRUE(StringRef(Error).startswith("Alias cannot have unnamed_addr"));
-}
-}
-}
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index e79162867eba..b0ef67ac88ba 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -17,17 +17,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
 #include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -63,23 +67,29 @@ class Pattern {
   /// RegEx - If non-empty, this is a regex pattern.
   std::string RegExStr;
 
+  /// \brief Contains the number of line this pattern is in.
+  unsigned LineNumber;
+
   /// VariableUses - Entries in this vector map to uses of a variable in the
   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
   /// value of bar at offset 3.
   std::vector<std::pair<StringRef, unsigned> > VariableUses;
 
-  /// VariableDefs - Entries in this vector map to definitions of a variable in
-  /// the pattern, e.g. "foo[[bar:.*]]baz".  In this case, the RegExStr will
-  /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1.  The
-  /// index indicates what parenthesized value captures the variable value.
-  std::vector<std::pair<StringRef, unsigned> > VariableDefs;
+  /// VariableDefs - Maps definitions of variables to their parenthesized
+  /// capture numbers.
+  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
+  std::map<StringRef, unsigned> VariableDefs;
 
 public:
 
   Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
 
-  bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
+  /// ParsePattern - Parse the given string into the Pattern.  SM provides the
+  /// SourceMgr used for error reports, and LineNumber is the line number in
+  /// the input file from which the pattern string was read.
+  /// Returns true in case of an error, false otherwise.
+  bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
 
   /// Match - Match the pattern string against the input buffer Buffer.  This
   /// returns the position that is matched or npos if there is no match.  If
@@ -97,17 +107,31 @@ public:
 
 private:
   static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
-  bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM);
+  bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
+  void AddBackrefToRegEx(unsigned BackrefNum);
 
   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
   /// matching this pattern at the start of \arg Buffer; a distance of zero
   /// should correspond to a perfect match.
   unsigned ComputeMatchDistance(StringRef Buffer,
                                const StringMap<StringRef> &VariableTable) const;
+
+  /// \brief Evaluates expression and stores the result to \p Value.
+  /// \return true on success. false when the expression has invalid syntax.
+  bool EvaluateExpression(StringRef Expr, std::string &Value) const;
+
+  /// \brief Finds the closing sequence of a regex variable usage or
+  /// definition. Str has to point in the beginning of the definition
+  /// (right after the opening sequence).
+  /// \return offset of the closing sequence within Str, or npos if it was not
+  /// found.
+  size_t FindRegexVarEnd(StringRef Str);
 };
 
 
-bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
+bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
+                           unsigned LineNumber) {
+  this->LineNumber = LineNumber;
   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
 
   // Ignore trailing whitespace.
@@ -140,8 +164,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
   while (!PatternStr.empty()) {
     // RegEx matches.
     if (PatternStr.startswith("{{")) {
-
-      // Otherwise, this is the start of a regex match.  Scan for the }}.
+      // This is the start of a regex match.  Scan for the }}.
       size_t End = PatternStr.find("}}");
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
@@ -171,8 +194,10 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
     // it.  This is to catch some common errors.
     if (PatternStr.startswith("[[")) {
-      // Verify that it is terminated properly.
-      size_t End = PatternStr.find("]]");
+      // Find the closing bracket pair ending the match.  End is going to be an
+      // offset relative to the beginning of the match string.
+      size_t End = FindRegexVarEnd(PatternStr.substr(2));
+
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
                         SourceMgr::DK_Error,
@@ -180,8 +205,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
         return true;
       }
 
-      StringRef MatchStr = PatternStr.substr(2, End-2);
-      PatternStr = PatternStr.substr(End+2);
+      StringRef MatchStr = PatternStr.substr(2, End);
+      PatternStr = PatternStr.substr(End+4);
 
       // Get the regex name (e.g. "foo").
       size_t NameEnd = MatchStr.find(':');
@@ -193,16 +218,31 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
         return true;
       }
 
-      // Verify that the name is well formed.
-      for (unsigned i = 0, e = Name.size(); i != e; ++i)
-        if (Name[i] != '_' && !isalnum(Name[i])) {
+      // Verify that the name/expression is well formed. FileCheck currently
+      // supports @LINE, @LINE+number, @LINE-number expressions. The check here
+      // is relaxed, more strict check is performed in \c EvaluateExpression.
+      bool IsExpression = false;
+      for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+        if (i == 0 && Name[i] == '@') {
+          if (NameEnd != StringRef::npos) {
+            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                            SourceMgr::DK_Error,
+                            "invalid name in named regex definition");
+            return true;
+          }
+          IsExpression = true;
+          continue;
+        }
+        if (Name[i] != '_' && !isalnum(Name[i]) &&
+            (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
                           SourceMgr::DK_Error, "invalid name in named regex");
           return true;
         }
+      }
 
       // Name can't start with a digit.
-      if (isdigit(Name[0])) {
+      if (isdigit(static_cast<unsigned char>(Name[0]))) {
         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
                         "invalid name in named regex");
         return true;
@@ -210,12 +250,25 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
 
       // Handle [[foo]].
       if (NameEnd == StringRef::npos) {
-        VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        // Handle variables that were defined earlier on the same line by
+        // emitting a backreference.
+        if (VariableDefs.find(Name) != VariableDefs.end()) {
+          unsigned VarParenNum = VariableDefs[Name];
+          if (VarParenNum < 1 || VarParenNum > 9) {
+            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                            SourceMgr::DK_Error,
+                            "Can't back-reference more than 9 variables");
+            return true;
+          }
+          AddBackrefToRegEx(VarParenNum);
+        } else {
+          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        }
         continue;
       }
 
       // Handle [[foo:.*]].
-      VariableDefs.push_back(std::make_pair(Name, CurParen));
+      VariableDefs[Name] = CurParen;
       RegExStr += '(';
       ++CurParen;
 
@@ -231,7 +284,6 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
     AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
     PatternStr = PatternStr.substr(FixedMatchEnd);
-    continue;
   }
 
   return false;
@@ -264,21 +316,46 @@ void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
   }
 }
 
-bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
+bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
                               SourceMgr &SM) {
-  Regex R(RegexStr);
+  Regex R(RS);
   std::string Error;
   if (!R.isValid(Error)) {
-    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), SourceMgr::DK_Error,
+    SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
                     "invalid regex: " + Error);
     return true;
   }
 
-  RegExStr += RegexStr.str();
+  RegExStr += RS.str();
   CurParen += R.getNumMatches();
   return false;
 }
 
+void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
+  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
+  std::string Backref = std::string("\\") +
+                        std::string(1, '0' + BackrefNum);
+  RegExStr += Backref;
+}
+
+bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
+  // The only supported expression is @LINE([\+-]\d+)?
+  if (!Expr.startswith("@LINE"))
+    return false;
+  Expr = Expr.substr(StringRef("@LINE").size());
+  int Offset = 0;
+  if (!Expr.empty()) {
+    if (Expr[0] == '+')
+      Expr = Expr.substr(1);
+    else if (Expr[0] != '-')
+      return false;
+    if (Expr.getAsInteger(10, Offset))
+      return false;
+  }
+  Value = llvm::itostr(LineNumber + Offset);
+  return true;
+}
+
 /// Match - Match the pattern string against the input buffer Buffer.  This
 /// returns the position that is matched or npos if there is no match.  If
 /// there is a match, the size of the matched string is returned in MatchLen.
@@ -307,15 +384,21 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
 
     unsigned InsertOffset = 0;
     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
-      StringMap<StringRef>::iterator it =
-        VariableTable.find(VariableUses[i].first);
-      // If the variable is undefined, return an error.
-      if (it == VariableTable.end())
-        return StringRef::npos;
-
-      // Look up the value and escape it so that we can plop it into the regex.
       std::string Value;
-      AddFixedStringToRegEx(it->second, Value);
+
+      if (VariableUses[i].first[0] == '@') {
+        if (!EvaluateExpression(VariableUses[i].first, Value))
+          return StringRef::npos;
+      } else {
+        StringMap<StringRef>::iterator it =
+          VariableTable.find(VariableUses[i].first);
+        // If the variable is undefined, return an error.
+        if (it == VariableTable.end())
+          return StringRef::npos;
+
+        // Look up the value and escape it so that we can plop it into the regex.
+        AddFixedStringToRegEx(it->second, Value);
+      }
 
       // Plop it into the regex at the adjusted offset.
       TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
@@ -337,10 +420,11 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
   StringRef FullMatch = MatchInfo[0];
 
   // If this defines any variables, remember their values.
-  for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
-    assert(VariableDefs[i].second < MatchInfo.size() &&
-           "Internal paren error");
-    VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
+  for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
+                                                     E = VariableDefs.end();
+       I != E; ++I) {
+    assert(I->second < MatchInfo.size() && "Internal paren error");
+    VariableTable[I->first] = MatchInfo[I->second];
   }
 
   MatchLen = FullMatch.size();
@@ -371,19 +455,31 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
   // variable values.
   if (!VariableUses.empty()) {
     for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
-      StringRef Var = VariableUses[i].first;
-      StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
       SmallString<256> Msg;
       raw_svector_ostream OS(Msg);
-
-      // Check for undefined variable references.
-      if (it == VariableTable.end()) {
-        OS << "uses undefined variable \"";
-        OS.write_escaped(Var) << "\"";;
+      StringRef Var = VariableUses[i].first;
+      if (Var[0] == '@') {
+        std::string Value;
+        if (EvaluateExpression(Var, Value)) {
+          OS << "with expression \"";
+          OS.write_escaped(Var) << "\" equal to \"";
+          OS.write_escaped(Value) << "\"";
+        } else {
+          OS << "uses incorrect expression \"";
+          OS.write_escaped(Var) << "\"";
+        }
       } else {
-        OS << "with variable \"";
-        OS.write_escaped(Var) << "\" equal to \"";
-        OS.write_escaped(it->second) << "\"";
+        StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
+
+        // Check for undefined variable references.
+        if (it == VariableTable.end()) {
+          OS << "uses undefined variable \"";
+          OS.write_escaped(Var) << "\"";
+        } else {
+          OS << "with variable \"";
+          OS.write_escaped(Var) << "\" equal to \"";
+          OS.write_escaped(it->second) << "\"";
+        }
       }
 
       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
@@ -432,6 +528,40 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
   }
 }
 
+size_t Pattern::FindRegexVarEnd(StringRef Str) {
+  // Offset keeps track of the current offset within the input Str
+  size_t Offset = 0;
+  // [...] Nesting depth
+  size_t BracketDepth = 0;
+
+  while (!Str.empty()) {
+    if (Str.startswith("]]") && BracketDepth == 0)
+      return Offset;
+    if (Str[0] == '\\') {
+      // Backslash escapes the next char within regexes, so skip them both.
+      Str = Str.substr(2);
+      Offset += 2;
+    } else {
+      switch (Str[0]) {
+        default:
+          break;
+        case '[':
+          BracketDepth++;
+          break;
+        case ']':
+          assert(BracketDepth > 0 && "Invalid regex");
+          BracketDepth--;
+          break;
+      }
+      Str = Str.substr(1);
+      Offset++;
+    }
+  }
+
+  return StringRef::npos;
+}
+
+
 //===----------------------------------------------------------------------===//
 // Check Strings.
 //===----------------------------------------------------------------------===//
@@ -457,9 +587,13 @@ struct CheckString {
     : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
 };
 
-/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
-/// memory buffer, free it, and return a new one.
-static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
+/// Canonicalize whitespaces in the input file. Line endings are replaced
+/// with UNIX-style '\n'.
+///
+/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
+/// characters to a single space.
+static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
+                                           bool PreserveHorizontal) {
   SmallString<128> NewFile;
   NewFile.reserve(MB->getBufferSize());
 
@@ -470,8 +604,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
       continue;
     }
 
-    // If current char is not a horizontal whitespace, dump it to output as is.
-    if (*Ptr != ' ' && *Ptr != '\t') {
+    // If current char is not a horizontal whitespace or if horizontal 
+    // whitespace canonicalization is disabled, dump it to output as is.
+    if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
       NewFile.push_back(*Ptr);
       continue;
     }
@@ -494,9 +629,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
 
 /// ReadCheckFile - Read the check file, which specifies the sequence of
 /// expected strings.  The strings are added to the CheckStrings vector.
+/// Returns true in case of an error, false otherwise.
 static bool ReadCheckFile(SourceMgr &SM,
                           std::vector<CheckString> &CheckStrings) {
-  // Open the check file, and tell SourceMgr about it.
   OwningPtr<MemoryBuffer> File;
   if (error_code ec =
         MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) {
@@ -504,28 +639,33 @@ static bool ReadCheckFile(SourceMgr &SM,
            << ec.message() << '\n';
     return true;
   }
-  MemoryBuffer *F = File.take();
 
   // If we want to canonicalize whitespace, strip excess whitespace from the
-  // buffer containing the CHECK lines.
-  if (!NoCanonicalizeWhiteSpace)
-    F = CanonicalizeInputFile(F);
+  // buffer containing the CHECK lines. Remove DOS style line endings.
+  MemoryBuffer *F =
+    CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
 
   SM.AddNewSourceBuffer(F, SMLoc());
 
   // Find all instances of CheckPrefix followed by : in the file.
   StringRef Buffer = F->getBuffer();
-
   std::vector<std::pair<SMLoc, Pattern> > NotMatches;
 
+  // LineNumber keeps track of the line on which CheckPrefix instances are
+  // found.
+  unsigned LineNumber = 1;
+
   while (1) {
     // See if Prefix occurs in the memory buffer.
-    Buffer = Buffer.substr(Buffer.find(CheckPrefix));
-
+    size_t PrefixLoc = Buffer.find(CheckPrefix);
     // If we didn't find a match, we're done.
-    if (Buffer.empty())
+    if (PrefixLoc == StringRef::npos)
       break;
 
+    LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
+
+    Buffer = Buffer.substr(PrefixLoc);
+
     const char *CheckPrefixStart = Buffer.data();
 
     // When we find a check prefix, keep track of whether we find CHECK: or
@@ -560,12 +700,11 @@ static bool ReadCheckFile(SourceMgr &SM,
 
     // Parse the pattern.
     Pattern P;
-    if (P.ParsePattern(Buffer.substr(0, EOL), SM))
+    if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))
       return true;
 
     Buffer = Buffer.substr(EOL);
 
-
     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
     if (IsCheckNext && CheckStrings.empty()) {
       SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
@@ -582,7 +721,6 @@ static bool ReadCheckFile(SourceMgr &SM,
       continue;
     }
 
-
     // Okay, add the string we captured to the output vector and move on.
     CheckStrings.push_back(CheckString(P,
                                        PatternLoc,
@@ -663,18 +801,18 @@ int main(int argc, char **argv) {
         MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
     errs() << "Could not open input file '" << InputFilename << "': "
            << ec.message() << '\n';
-    return true;
+    return 2;
   }
-  MemoryBuffer *F = File.take();
 
-  if (F->getBufferSize() == 0) {
+  if (File->getBufferSize() == 0) {
     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
-    return 1;
+    return 2;
   }
-  
+
   // Remove duplicate spaces in the input file if requested.
-  if (!NoCanonicalizeWhiteSpace)
-    F = CanonicalizeInputFile(F);
+  // Remove DOS style line endings.
+  MemoryBuffer *F =
+    CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace);
 
   SM.AddNewSourceBuffer(F, SMLoc());
 
diff --git a/utils/FileUpdate/FileUpdate.cpp b/utils/FileUpdate/FileUpdate.cpp
index 3ea1e4f306ee..9b48f94948aa 100644
--- a/utils/FileUpdate/FileUpdate.cpp
+++ b/utils/FileUpdate/FileUpdate.cpp
@@ -13,12 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
 
diff --git a/utils/GenLibDeps.pl b/utils/GenLibDeps.pl
index 656250c7e3d9..7748cabdab5b 100755
--- a/utils/GenLibDeps.pl
+++ b/utils/GenLibDeps.pl
@@ -98,7 +98,7 @@ if ($PEROBJ) {
     $libpath =~ s/^BitWriter/Bitcode\/Writer/;
     $libpath =~ s/^CppBackend/Target\/CppBackend/;
     $libpath =~ s/^MSIL/Target\/MSIL/;
-    $libpath =~ s/^Core/VMCore/;
+    $libpath =~ s/^Core/IR/;
     $libpath =~ s/^Instrumentation/Transforms\/Instrumentation/;
     $libpath =~ s/^Interpreter/ExecutionEngine\/Interpreter/;
     $libpath =~ s/^JIT/ExecutionEngine\/JIT/;
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
index 70713b25bf25..feba2e54f6a5 100644
--- a/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -39,19 +39,22 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/type_traits.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
+#include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cerrno>
 #include <cstdlib>
 #include <map>
 #include <string>
+
+// These includes must be last.
 #include <Windows.h>
 #include <WinError.h>
 #include <Dbghelp.h>
 #include <psapi.h>
+
 using namespace llvm;
 
 #undef max
diff --git a/utils/PerfectShuffle/PerfectShuffle.cpp b/utils/PerfectShuffle/PerfectShuffle.cpp
index 98f8f4cc0cab..d39414eede94 100644
--- a/utils/PerfectShuffle/PerfectShuffle.cpp
+++ b/utils/PerfectShuffle/PerfectShuffle.cpp
@@ -14,11 +14,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <iostream>
-#include <iomanip>
-#include <vector>
 #include <cassert>
 #include <cstdlib>
+#include <iomanip>
+#include <iostream>
+#include <vector>
 struct Operator;
 
 // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index ee83311c583b..6faf819529d0 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -100,9 +100,9 @@
 #include "StringToOffsetTable.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -958,8 +958,12 @@ static std::string getEnumNameForToken(StringRef Str) {
     case ':': Res += "_COLON_"; break;
     case '!': Res += "_EXCLAIM_"; break;
     case '.': Res += "_DOT_"; break;
+    case '<': Res += "_LT_"; break;
+    case '>': Res += "_GT_"; break;
     default:
-      if (isalnum(*it))
+      if ((*it >= 'A' && *it <= 'Z') ||
+          (*it >= 'a' && *it <= 'z') ||
+          (*it >= '0' && *it <= '9'))
         Res += *it;
       else
         Res += "_" + utostr((unsigned) *it) + "_";
@@ -1723,7 +1727,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
        << "    default: llvm_unreachable(\"invalid conversion entry!\");\n"
        << "    case CVT_Reg:\n"
        << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
-       << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
+       << "      Operands[*(p + 1)]->setConstraint(\"r\");\n"
        << "      ++NumMCOperands;\n"
        << "      break;\n"
        << "    case CVT_Tied:\n"
@@ -1754,7 +1758,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
       // Remember this converter for the kind enum.
       unsigned KindID = OperandConversionKinds.size();
-      OperandConversionKinds.insert("CVT_" + AsmMatchConverter);
+      OperandConversionKinds.insert("CVT_" +
+                                    getEnumNameForToken(AsmMatchConverter));
 
       // Add the converter row for this instruction.
       ConversionTable.push_back(std::vector<uint8_t>());
@@ -1762,7 +1767,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
       ConversionTable.back().push_back(CVT_Done);
 
       // Add the handler to the conversion driver function.
-      CvtOS << "    case CVT_" << AsmMatchConverter << ":\n"
+      CvtOS << "    case CVT_"
+            << getEnumNameForToken(AsmMatchConverter) << ":\n"
             << "      " << AsmMatchConverter << "(Inst, Operands);\n"
             << "      break;\n";
 
@@ -1800,6 +1806,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
         // the index of its entry in the vector).
         std::string Name = "CVT_" + (Op.Class->isRegisterClass() ? "Reg" :
                                      Op.Class->RenderMethod);
+        Name = getEnumNameForToken(Name);
 
         bool IsNewConverter = false;
         unsigned ID = getConverterOperandID(Name, OperandConversionKinds,
@@ -1823,9 +1830,13 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
         // Add a handler for the operand number lookup.
         OpOS << "    case " << Name << ":\n"
-             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
-             << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
-             << "      NumMCOperands += " << OpInfo.MINumOperands << ";\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n";
+
+        if (Op.Class->isRegisterClass())
+          OpOS << "      Operands[*(p + 1)]->setConstraint(\"r\");\n";
+        else
+          OpOS << "      Operands[*(p + 1)]->setConstraint(\"m\");\n";
+        OpOS << "      NumMCOperands += " << OpInfo.MINumOperands << ";\n"
              << "      break;\n";
         break;
       }
@@ -2867,6 +2878,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "(MatchClassKind)it->Classes[i]);\n";
   OS << "      if (Diag == Match_Success)\n";
   OS << "        continue;\n";
+  OS << "      // If the generic handler indicates an invalid operand\n";
+  OS << "      // failure, check for a special case.\n";
+  OS << "      if (Diag == Match_InvalidOperand) {\n";
+  OS << "        Diag = validateTargetOperandClass(Operands[i+1],\n";
+  OS.indent(43);
+  OS << "(MatchClassKind)it->Classes[i]);\n";
+  OS << "        if (Diag == Match_Success)\n";
+  OS << "          continue;\n";
+  OS << "      }\n";
   OS << "      // If this operand is broken for all of the instances of this\n";
   OS << "      // mnemonic, keep track of it so we can report loc info.\n";
   OS << "      // If we already had a match that only failed due to a\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index a4114d9815b6..ac8d896d3647 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -842,8 +842,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
             if (!IAP->isOpMapped(ROName)) {
               IAP->addOperand(ROName, i);
+              Record *R = CGA->ResultOperands[i].getRecord();
+              if (R->isSubClassOf("RegisterOperand"))
+                R = R->getValueAsDef("RegClass");
               Cond = std::string("MRI.getRegClass(") + Target.getName() + "::" +
-                CGA->ResultOperands[i].getRecord()->getName() + "RegClassID)"
+                R->getName() + "RegClassID)"
                 ".contains(MI->getOperand(" + llvm::utostr(i) + ").getReg())";
               IAP->addCond(Cond);
             } else {
@@ -863,12 +866,18 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
           break;
         }
-        case CodeGenInstAlias::ResultOperand::K_Imm:
-          Cond = std::string("MI->getOperand(") +
-            llvm::utostr(i) + ").getImm() == " +
-            llvm::utostr(CGA->ResultOperands[i].getImm());
+        case CodeGenInstAlias::ResultOperand::K_Imm: {
+          std::string Op = "MI->getOperand(" + llvm::utostr(i) + ")";
+
+          // Just because the alias has an immediate result, doesn't mean the
+          // MCInst will. An MCExpr could be present, for example.
+          IAP->addCond(Op + ".isImm()");
+
+          Cond = Op + ".getImm() == "
+            + llvm::utostr(CGA->ResultOperands[i].getImm());
           IAP->addCond(Cond);
           break;
+        }
         case CodeGenInstAlias::ResultOperand::K_Reg:
           // If this is zero_reg, something's playing tricks we're not
           // equipped to handle.
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index d0416c908131..3ee197443131 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -19,11 +19,11 @@ add_tablegen(llvm-tblgen LLVM
   DAGISelMatcher.cpp
   DFAPacketizerEmitter.cpp
   DisassemblerEmitter.cpp
-  EDEmitter.cpp
   FastISelEmitter.cpp
   FixedLenDecoderEmitter.cpp
   InstrInfoEmitter.cpp
   IntrinsicEmitter.cpp
+  OptParserEmitter.cpp
   PseudoLoweringEmitter.cpp
   RegisterInfoEmitter.cpp
   SetTheory.cpp
@@ -33,4 +33,5 @@ add_tablegen(llvm-tblgen LLVM
   X86DisassemblerTables.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
+  CTagsEmitter.cpp
   )
diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp
new file mode 100644
index 000000000000..8bf777839bdd
--- /dev/null
+++ b/utils/TableGen/CTagsEmitter.cpp
@@ -0,0 +1,99 @@
+//===- CTagsEmitter.cpp - Generate ctags-compatible index ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits an index of definitions in ctags(1) format.
+// A helper script, utils/TableGen/tdtags, provides an easier-to-use
+// interface; run 'tdtags -H' for documentation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ctags-emitter"
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+using namespace llvm;
+
+namespace llvm { extern SourceMgr SrcMgr; }
+
+namespace {
+
+class Tag {
+private:
+  const std::string *Id;
+  SMLoc Loc;
+public:
+  Tag(const std::string &Name, const SMLoc Location)
+      : Id(&Name), Loc(Location) {}
+  int operator<(const Tag &B) const { return *Id < *B.Id; }
+  void emit(raw_ostream &OS) const {
+    int BufferID = SrcMgr.FindBufferContainingLoc(Loc);
+    MemoryBuffer *CurMB = SrcMgr.getBufferInfo(BufferID).Buffer;
+    const char *BufferName = CurMB->getBufferIdentifier();
+    std::pair<unsigned, unsigned> LineAndColumn = SrcMgr.getLineAndColumn(Loc);
+    OS << *Id << "\t" << BufferName << "\t" << LineAndColumn.first << "\n";
+  }
+};
+
+class CTagsEmitter {
+private:
+  RecordKeeper &Records;
+public:
+  CTagsEmitter(RecordKeeper &R) : Records(R) {}
+
+  void run(raw_ostream &OS);
+
+private:
+  static SMLoc locate(const Record *R);
+};
+
+} // End anonymous namespace.
+
+SMLoc CTagsEmitter::locate(const Record *R) {
+  ArrayRef<SMLoc> Locs = R->getLoc();
+  if (Locs.empty()) {
+    SMLoc NullLoc;
+    return NullLoc;
+  }
+  return Locs.front();
+}
+
+void CTagsEmitter::run(raw_ostream &OS) {
+  const std::map<std::string, Record *> &Classes = Records.getClasses();
+  const std::map<std::string, Record *> &Defs = Records.getDefs();
+  std::vector<Tag> Tags;
+  // Collect tags.
+  Tags.reserve(Classes.size() + Defs.size());
+  for (std::map<std::string, Record *>::const_iterator I = Classes.begin(),
+                                                       E = Classes.end();
+       I != E; ++I)
+    Tags.push_back(Tag(I->first, locate(I->second)));
+  for (std::map<std::string, Record *>::const_iterator I = Defs.begin(),
+                                                       E = Defs.end();
+       I != E; ++I)
+    Tags.push_back(Tag(I->first, locate(I->second)));
+  // Emit tags.
+  std::sort(Tags.begin(), Tags.end());
+  OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n";
+  OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n";
+  for (std::vector<Tag>::const_iterator I = Tags.begin(), E = Tags.end();
+       I != E; ++I)
+    I->emit(OS);
+}
+
+namespace llvm {
+
+void EmitCTags(RecordKeeper &RK, raw_ostream &OS) { CTagsEmitter(RK).run(OS); }
+
+} // End llvm namespace.
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 3e4f626d4862..c94d384901f0 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -14,10 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <map>
 #include <string>
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index d5b581b5981a..8e5bb7760f65 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,13 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <cstdio>
 #include <set>
@@ -57,7 +57,7 @@ EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) {
 }
 
 
-EEVT::TypeSet::TypeSet(const std::vector<MVT::SimpleValueType> &VTList) {
+EEVT::TypeSet::TypeSet(ArrayRef<MVT::SimpleValueType> VTList) {
   assert(!VTList.empty() && "empty list?");
   TypeVec.append(VTList.begin(), VTList.end());
 
@@ -76,7 +76,7 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
                                           bool (*Pred)(MVT::SimpleValueType),
                                           const char *PredicateName) {
   assert(isCompletelyUnknown());
-  const std::vector<MVT::SimpleValueType> &LegalTypes =
+  ArrayRef<MVT::SimpleValueType> LegalTypes =
     TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
 
   if (TP.hasError())
@@ -956,6 +956,40 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
   llvm_unreachable("Invalid ConstraintType!");
 }
 
+// Update the node type to match an instruction operand or result as specified
+// in the ins or outs lists on the instruction definition. Return true if the
+// type was actually changed.
+bool TreePatternNode::UpdateNodeTypeFromInst(unsigned ResNo,
+                                             Record *Operand,
+                                             TreePattern &TP) {
+  // The 'unknown' operand indicates that types should be inferred from the
+  // context.
+  if (Operand->isSubClassOf("unknown_class"))
+    return false;
+
+  // The Operand class specifies a type directly.
+  if (Operand->isSubClassOf("Operand"))
+    return UpdateNodeType(ResNo, getValueType(Operand->getValueAsDef("Type")),
+                          TP);
+
+  // PointerLikeRegClass has a type that is determined at runtime.
+  if (Operand->isSubClassOf("PointerLikeRegClass"))
+    return UpdateNodeType(ResNo, MVT::iPTR, TP);
+
+  // Both RegisterClass and RegisterOperand operands derive their types from a
+  // register class def.
+  Record *RC = 0;
+  if (Operand->isSubClassOf("RegisterClass"))
+    RC = Operand;
+  else if (Operand->isSubClassOf("RegisterOperand"))
+    RC = Operand->getValueAsDef("RegClass");
+
+  assert(RC && "Unknown operand type");
+  CodeGenTarget &Tgt = TP.getDAGPatterns().getTargetInfo();
+  return UpdateNodeType(ResNo, Tgt.getRegisterClass(RC).getValueTypes(), TP);
+}
+
+
 //===----------------------------------------------------------------------===//
 // SDNodeInfo implementation
 //
@@ -1287,8 +1321,18 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 /// type which should be applied to it.  This will infer the type of register
 /// references from the register file information, for example.
 ///
+/// When Unnamed is set, return the type of a DAG operand with no name, such as
+/// the F8RC register class argument in:
+///
+///   (COPY_TO_REGCLASS GPR:$src, F8RC)
+///
+/// When Unnamed is false, return the type of a named DAG operand such as the
+/// GPR:$src operand above.
+///
 static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
-                                     bool NotRegisters, TreePattern &TP) {
+                                     bool NotRegisters,
+                                     bool Unnamed,
+                                     TreePattern &TP) {
   // Check to see if this is a register operand.
   if (R->isSubClassOf("RegisterOperand")) {
     assert(ResNo == 0 && "Regoperand ref only has one result!");
@@ -1302,6 +1346,13 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
   // Check to see if this is a register or a register class.
   if (R->isSubClassOf("RegisterClass")) {
     assert(ResNo == 0 && "Regclass ref only has one result!");
+    // An unnamed register class represents itself as an i32 immediate, for
+    // example on a COPY_TO_REGCLASS instruction.
+    if (Unnamed)
+      return EEVT::TypeSet(MVT::i32, TP);
+
+    // In a named operand, the register class provides the possible set of
+    // types.
     if (NotRegisters)
       return EEVT::TypeSet(); // Unknown.
     const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
@@ -1327,9 +1378,27 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
     return EEVT::TypeSet();
   }
 
-  if (R->isSubClassOf("ValueType") || R->isSubClassOf("CondCode")) {
+  if (R->isSubClassOf("ValueType")) {
     assert(ResNo == 0 && "This node only has one result!");
-    // Using a VTSDNode or CondCodeSDNode.
+    // An unnamed VTSDNode represents itself as an MVT::Other immediate.
+    //
+    //   (sext_inreg GPR:$src, i16)
+    //                         ~~~
+    if (Unnamed)
+      return EEVT::TypeSet(MVT::Other, TP);
+    // With a name, the ValueType simply provides the type of the named
+    // variable.
+    //
+    //   (sext_inreg i32:$src, i16)
+    //               ~~~~~~~~
+    if (NotRegisters)
+      return EEVT::TypeSet(); // Unknown.
+    return EEVT::TypeSet(getValueType(R), TP);
+  }
+
+  if (R->isSubClassOf("CondCode")) {
+    assert(ResNo == 0 && "This node only has one result!");
+    // Using a CondCodeSDNode.
     return EEVT::TypeSet(MVT::Other, TP);
   }
 
@@ -1435,7 +1504,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       bool MadeChange = false;
       for (unsigned i = 0, e = Types.size(); i != e; ++i)
         MadeChange |= UpdateNodeType(i, getImplicitType(DI->getDef(), i,
-                                                        NotRegisters, TP), TP);
+                                                        NotRegisters,
+                                                        !hasName(), TP), TP);
       return MadeChange;
     }
 
@@ -1498,25 +1568,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     return MadeChange;
   }
 
-  if (getOperator()->getName() == "COPY_TO_REGCLASS") {
-    bool MadeChange = false;
-    MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
-    MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
-
-    assert(getChild(0)->getNumTypes() == 1 &&
-           getChild(1)->getNumTypes() == 1 && "Unhandled case");
-
-    // child #1 of COPY_TO_REGCLASS should be a register class.  We don't care
-    // what type it gets, so if it didn't get a concrete type just give it the
-    // first viable type from the reg class.
-    if (!getChild(1)->hasTypeSet(0) &&
-        !getChild(1)->getExtType(0).isCompletelyUnknown()) {
-      MVT::SimpleValueType RCVT = getChild(1)->getExtType(0).getTypeList()[0];
-      MadeChange |= getChild(1)->UpdateNodeType(0, RCVT, TP);
-    }
-    return MadeChange;
-  }
-
   if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) {
     bool MadeChange = false;
 
@@ -1575,26 +1626,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
     // (outs) list of the instruction.
     // FIXME: Cap at one result so far.
     unsigned NumResultsToAdd = InstInfo.Operands.NumDefs ? 1 : 0;
-    for (unsigned ResNo = 0; ResNo != NumResultsToAdd; ++ResNo) {
-      Record *ResultNode = Inst.getResult(ResNo);
-
-      if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
-        MadeChange |= UpdateNodeType(ResNo, MVT::iPTR, TP);
-      } else if (ResultNode->isSubClassOf("RegisterOperand")) {
-        Record *RegClass = ResultNode->getValueAsDef("RegClass");
-        const CodeGenRegisterClass &RC =
-          CDP.getTargetInfo().getRegisterClass(RegClass);
-        MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP);
-      } else if (ResultNode->isSubClassOf("unknown_class")) {
-        // Nothing to do.
-      } else {
-        assert(ResultNode->isSubClassOf("RegisterClass") &&
-               "Operands should be register classes!");
-        const CodeGenRegisterClass &RC =
-          CDP.getTargetInfo().getRegisterClass(ResultNode);
-        MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP);
-      }
-    }
+    for (unsigned ResNo = 0; ResNo != NumResultsToAdd; ++ResNo)
+      MadeChange |= UpdateNodeTypeFromInst(ResNo, Inst.getResult(ResNo), TP);
 
     // If the instruction has implicit defs, we apply the first one as a result.
     // FIXME: This sucks, it should apply all implicit defs.
@@ -1636,30 +1669,44 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
         return false;
       }
 
-      MVT::SimpleValueType VT;
       TreePatternNode *Child = getChild(ChildNo++);
       unsigned ChildResNo = 0;  // Instructions always use res #0 of their op.
 
-      if (OperandNode->isSubClassOf("RegisterClass")) {
-        const CodeGenRegisterClass &RC =
-          CDP.getTargetInfo().getRegisterClass(OperandNode);
-        MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP);
-      } else if (OperandNode->isSubClassOf("RegisterOperand")) {
-        Record *RegClass = OperandNode->getValueAsDef("RegClass");
-        const CodeGenRegisterClass &RC =
-          CDP.getTargetInfo().getRegisterClass(RegClass);
-        MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP);
-      } else if (OperandNode->isSubClassOf("Operand")) {
-        VT = getValueType(OperandNode->getValueAsDef("Type"));
-        MadeChange |= Child->UpdateNodeType(ChildResNo, VT, TP);
-      } else if (OperandNode->isSubClassOf("PointerLikeRegClass")) {
-        MadeChange |= Child->UpdateNodeType(ChildResNo, MVT::iPTR, TP);
-      } else if (OperandNode->isSubClassOf("unknown_class")) {
-        // Nothing to do.
-      } else
-        llvm_unreachable("Unknown operand type!");
+      // If the operand has sub-operands, they may be provided by distinct
+      // child patterns, so attempt to match each sub-operand separately.
+      if (OperandNode->isSubClassOf("Operand")) {
+        DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo");
+        if (unsigned NumArgs = MIOpInfo->getNumArgs()) {
+          // But don't do that if the whole operand is being provided by
+          // a single ComplexPattern.
+          const ComplexPattern *AM = Child->getComplexPatternInfo(CDP);
+          if (!AM || AM->getNumOperands() < NumArgs) {
+            // Match first sub-operand against the child we already have.
+            Record *SubRec = cast<DefInit>(MIOpInfo->getArg(0))->getDef();
+            MadeChange |=
+              Child->UpdateNodeTypeFromInst(ChildResNo, SubRec, TP);
+
+            // And the remaining sub-operands against subsequent children.
+            for (unsigned Arg = 1; Arg < NumArgs; ++Arg) {
+              if (ChildNo >= getNumChildren()) {
+                TP.error("Instruction '" + getOperator()->getName() +
+                         "' expects more operands than were provided.");
+                return false;
+              }
+              Child = getChild(ChildNo++);
+
+              SubRec = cast<DefInit>(MIOpInfo->getArg(Arg))->getDef();
+              MadeChange |=
+                Child->UpdateNodeTypeFromInst(ChildResNo, SubRec, TP);
+            }
+            continue;
+          }
+        }
+      }
 
-      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
+      // If we didn't match by pieces above, attempt to match the whole
+      // operand now.
+      MadeChange |= Child->UpdateNodeTypeFromInst(ChildResNo, OperandNode, TP);
     }
 
     if (ChildNo != getNumChildren()) {
@@ -1668,6 +1715,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       return false;
     }
 
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
     return MadeChange;
   }
 
@@ -1817,6 +1866,16 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     return Res;
   }
 
+  // ?:$name or just $name.
+  if (TheInit == UnsetInit::get()) {
+    if (OpName.empty())
+      error("'?' argument requires a name to match with operand list");
+    TreePatternNode *Res = new TreePatternNode(TheInit, 1);
+    Args.push_back(OpName);
+    Res->setName(OpName);
+    return Res;
+  }
+
   if (IntInit *II = dyn_cast<IntInit>(TheInit)) {
     if (!OpName.empty())
       error("Constant int argument should not have a name!");
@@ -2383,6 +2442,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       I->error("set destination should be a register!");
 
     if (Val->getDef()->isSubClassOf("RegisterClass") ||
+        Val->getDef()->isSubClassOf("ValueType") ||
         Val->getDef()->isSubClassOf("RegisterOperand") ||
         Val->getDef()->isSubClassOf("PointerLikeRegClass")) {
       if (Dest->getName().empty())
@@ -2599,6 +2659,25 @@ getInstructionsInTree(TreePatternNode *Tree, SmallVectorImpl<Record*> &Instrs) {
     getInstructionsInTree(Tree->getChild(i), Instrs);
 }
 
+/// Check the class of a pattern leaf node against the instruction operand it
+/// represents.
+static bool checkOperandClass(CGIOperandList::OperandInfo &OI,
+                              Record *Leaf) {
+  if (OI.Rec == Leaf)
+    return true;
+
+  // Allow direct value types to be used in instruction set patterns.
+  // The type will be checked later.
+  if (Leaf->isSubClassOf("ValueType"))
+    return true;
+
+  // Patterns can also be ComplexPattern instances.
+  if (Leaf->isSubClassOf("ComplexPattern"))
+    return true;
+
+  return false;
+}
+
 /// ParseInstructions - Parse all of the instructions, inlining and resolving
 /// any fragments involved.  This populates the Instructions list with fully
 /// resolved instructions.
@@ -2708,7 +2787,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
         I->error("Operand $" + OpName + " should be a set destination: all "
                  "outputs must occur before inputs in operand list!");
 
-      if (CGI.Operands[i].Rec != R)
+      if (!checkOperandClass(CGI.Operands[i], R))
         I->error("Operand $" + OpName + " class mismatch!");
 
       // Remember the return type.
@@ -2747,7 +2826,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
       if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) {
         Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
-        if (Op.Rec != InRec && !InRec->isSubClassOf("ComplexPattern"))
+        if (!checkOperandClass(Op, InRec))
           I->error("Operand $" + OpName + "'s register class disagrees"
                    " between the operand and pattern");
       }
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 9be763f2ff11..7c2fa3674108 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -15,15 +15,15 @@
 #ifndef CODEGEN_DAGPATTERNS_H
 #define CODEGEN_DAGPATTERNS_H
 
-#include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
+#include "CodeGenTarget.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <set>
 #include <algorithm>
-#include <vector>
 #include <map>
+#include <set>
+#include <vector>
 
 namespace llvm {
   class Record;
@@ -59,7 +59,7 @@ namespace EEVT {
   public:
     TypeSet() {}
     TypeSet(MVT::SimpleValueType VT, TreePattern &TP);
-    TypeSet(const std::vector<MVT::SimpleValueType> &VTList);
+    TypeSet(ArrayRef<MVT::SimpleValueType> VTList);
 
     bool isCompletelyUnknown() const { return TypeVec.empty(); }
 
@@ -334,6 +334,7 @@ public:
   }
   ~TreePatternNode();
 
+  bool hasName() const { return !Name.empty(); }
   const std::string &getName() const { return Name; }
   void setName(StringRef N) { Name.assign(N.begin(), N.end()); }
 
@@ -463,6 +464,11 @@ public:   // Higher level manipulation routines.
     return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
   }
 
+  // Update node type with types inferred from an instruction operand or result
+  // def from the ins/outs lists.
+  // Return true if the type changed.
+  bool UpdateNodeTypeFromInst(unsigned ResNo, Record *Operand, TreePattern &TP);
+
   /// ContainsUnresolvedType - Return true if this tree contains any
   /// unresolved types.
   bool ContainsUnresolvedType() const {
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 0a8684d3da5a..367320498f59 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -13,11 +13,11 @@
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <set>
 using namespace llvm;
 
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 55d44399dff9..d1e115355483 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -14,12 +14,12 @@
 #ifndef CODEGEN_INSTRUCTION_H
 #define CODEGEN_INSTRUCTION_H
 
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/SourceMgr.h"
 #include <string>
-#include <vector>
 #include <utility>
+#include <vector>
 
 namespace llvm {
   class Record;
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 6efe952ea2bb..f0570f95b8ab 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -14,9 +14,9 @@
 #ifndef CODEGEN_INTRINSIC_H
 #define CODEGEN_INTRINSIC_H
 
+#include "llvm/CodeGen/ValueTypes.h"
 #include <string>
 #include <vector>
-#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
   class Record;
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index 1653d67da97a..ee32aa13e034 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -533,12 +533,11 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
        II = ColFieldValueMap.begin(), IE = ColFieldValueMap.end();
        II != IE; II++) {
     std::vector<Init*> FieldValues = (*II).second;
-    unsigned FieldSize = FieldValues.size();
 
     // Delete duplicate entries from ColFieldValueMap
-    for (unsigned i = 0; i < FieldSize - 1; i++) {
+    for (unsigned i = 0; i < FieldValues.size() - 1; i++) {
       Init *CurVal = FieldValues[i];
-      for (unsigned j = i+1; j < FieldSize; j++) {
+      for (unsigned j = i+1; j < FieldValues.size(); j++) {
         if (CurVal == FieldValues[j]) {
           FieldValues.erase(FieldValues.begin()+j);
         }
@@ -547,9 +546,9 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
 
     // Emit enumerated values for the column fields.
     OS << "enum " << (*II).first << " {\n";
-    for (unsigned i = 0; i < FieldSize; i++) {
+    for (unsigned i = 0, endFV = FieldValues.size(); i < endFV; i++) {
       OS << "\t" << (*II).first << "_" << FieldValues[i]->getAsUnquotedString();
-      if (i != FieldValues.size() - 1)
+      if (i != endFV - 1)
         OS << ",\n";
       else
         OS << "\n};\n\n";
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 580e319f24ec..993b8dba4267 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -14,12 +14,12 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/TableGen/Error.h"
 
 using namespace llvm;
 
@@ -636,8 +636,10 @@ struct TupleExpander : SetTheory::Expander {
       Elts.insert(NewReg);
 
       // Copy Proto super-classes.
-      for (unsigned i = 0, e = Proto->getSuperClasses().size(); i != e; ++i)
-        NewReg->addSuperClass(Proto->getSuperClasses()[i]);
+      ArrayRef<Record *> Supers = Proto->getSuperClasses();
+      ArrayRef<SMRange> Ranges = Proto->getSuperClassRanges();
+      for (unsigned i = 0, e = Supers.size(); i != e; ++i)
+        NewReg->addSuperClass(Supers[i], Ranges[i]);
 
       // Copy Proto fields.
       for (unsigned i = 0, e = Proto->getValues().size(); i != e; ++i) {
@@ -701,7 +703,9 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   // Rename anonymous register classes.
   if (R->getName().size() > 9 && R->getName()[9] == '.') {
     static unsigned AnonCounter = 0;
-    R->setName("AnonRegClass_"+utostr(AnonCounter++));
+    R->setName("AnonRegClass_" + utostr(AnonCounter));
+    // MSVC2012 ICEs if AnonCounter++ is directly passed to utostr.
+    ++AnonCounter;
   }
 
   std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
@@ -1196,6 +1200,12 @@ void CodeGenRegBank::computeSubRegIndexLaneMasks() {
     if (Idx->getComposites().empty()) {
       Idx->LaneMask = 1u << Bit;
       // Share bit 31 in the unlikely case there are more than 32 leafs.
+      //
+      // Sharing bits is harmless; it allows graceful degradation in targets
+      // with more than 32 vector lanes. They simply get a limited resolution
+      // view of lanes beyond the 32nd.
+      //
+      // See also the comment for getSubRegIndexLaneMask().
       if (Bit < 31) ++Bit;
     } else {
       Idx->LaneMask = 0;
@@ -1589,6 +1599,35 @@ void CodeGenRegBank::computeRegUnitSets() {
     }
     assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass");
   }
+
+  // For each register unit, ensure that we have the list of UnitSets that
+  // contain the unit. Normally, this matches an existing list of UnitSets for a
+  // register class. If not, we create a new entry in RegClassUnitSets as a
+  // "fake" register class.
+  for (unsigned UnitIdx = 0, UnitEnd = NumNativeRegUnits;
+       UnitIdx < UnitEnd; ++UnitIdx) {
+    std::vector<unsigned> RUSets;
+    for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) {
+      RegUnitSet &RUSet = RegUnitSets[i];
+      if (std::find(RUSet.Units.begin(), RUSet.Units.end(), UnitIdx)
+          == RUSet.Units.end())
+        continue;
+      RUSets.push_back(i);
+    }
+    unsigned RCUnitSetsIdx = 0;
+    for (unsigned e = RegClassUnitSets.size();
+         RCUnitSetsIdx != e; ++RCUnitSetsIdx) {
+      if (RegClassUnitSets[RCUnitSetsIdx] == RUSets) {
+        break;
+      }
+    }
+    RegUnits[UnitIdx].RegClassUnitSetsIdx = RCUnitSetsIdx;
+    if (RCUnitSetsIdx == RegClassUnitSets.size()) {
+      // Create a new list of UnitSets as a "fake" register class.
+      RegClassUnitSets.resize(RCUnitSetsIdx + 1);
+      RegClassUnitSets[RCUnitSetsIdx].swap(RUSets);
+    }
+  }
 }
 
 void CodeGenRegBank::computeDerivedInfo() {
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index e41107415612..4f2cc28d4924 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -16,17 +16,17 @@
 #define CODEGEN_REGISTERS_H
 
 #include "SetTheory.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Record.h"
 #include <cstdlib>
 #include <map>
-#include <string>
 #include <set>
+#include <string>
 #include <vector>
 
 namespace llvm {
@@ -261,7 +261,7 @@ namespace llvm {
   public:
     unsigned EnumValue;
     std::string Namespace;
-    std::vector<MVT::SimpleValueType> VTs;
+    SmallVector<MVT::SimpleValueType, 4> VTs;
     unsigned SpillSize;
     unsigned SpillAlignment;
     int CopyCost;
@@ -274,7 +274,7 @@ namespace llvm {
 
     const std::string &getName() const { return Name; }
     std::string getQualifiedName() const;
-    const std::vector<MVT::SimpleValueType> &getValueTypes() const {return VTs;}
+    ArrayRef<MVT::SimpleValueType> getValueTypes() const {return VTs;}
     unsigned getNumValueTypes() const { return VTs.size(); }
 
     MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const {
@@ -403,7 +403,11 @@ namespace llvm {
     // these two registers and their super-registers.
     const CodeGenRegister *Roots[2];
 
-    RegUnit() : Weight(0) { Roots[0] = Roots[1] = 0; }
+    // Index into RegClassUnitSets where we can find the list of UnitSets that
+    // contain this unit.
+    unsigned RegClassUnitSetsIdx;
+
+    RegUnit() : Weight(0), RegClassUnitSetsIdx(0) { Roots[0] = Roots[1] = 0; }
 
     ArrayRef<const CodeGenRegister*> getRoots() const {
       assert(!(Roots[1] && !Roots[0]) && "Invalid roots array");
@@ -462,6 +466,10 @@ namespace llvm {
 
     // Map RegisterClass index to the index of the RegUnitSet that contains the
     // class's units and any inferred RegUnit supersets.
+    //
+    // NOTE: This could grow beyond the number of register classes when we map
+    // register units to lists of unit sets. If the list of unit sets does not
+    // already exist for a register class, we create a new entry in this vector.
     std::vector<std::vector<unsigned> > RegClassUnitSets;
 
     // Add RC to *2RC maps.
@@ -615,6 +623,13 @@ namespace llvm {
       return RegUnitSets[Idx];
     }
 
+    // The number of pressure set lists may be larget than the number of
+    // register classes if some register units appeared in a list of sets that
+    // did not correspond to an existing register class.
+    unsigned getNumRegClassPressureSetLists() const {
+      return RegClassUnitSets.size();
+    }
+
     // Get a list of pressure set IDs for a register class. Liveness of a
     // register in this class impacts each pressure set in this list by the
     // weight of the register. An exact solution requires all registers in a
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 63cc97a8c1da..c02f0843d651 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -16,10 +16,10 @@
 
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/TableGen/Error.h"
 
 using namespace llvm;
 
@@ -88,7 +88,7 @@ struct InstRegexOp : public SetTheory::Operator {
 /// CodeGenModels ctor interprets machine model records and populates maps.
 CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
                                        const CodeGenTarget &TGT):
-  Records(RK), Target(TGT), NumItineraryClasses(0) {
+  Records(RK), Target(TGT) {
 
   Sets.addFieldExpander("InstRW", "Instrs");
 
@@ -217,7 +217,7 @@ void CodeGenSchedModels::collectSchedRW() {
   for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
          E = Target.inst_end(); I != E; ++I) {
     Record *SchedDef = (*I)->TheDef;
-    if (!SchedDef->isSubClassOf("Sched"))
+    if (SchedDef->isValueUnset("SchedRW"))
       continue;
     RecVec RWs = SchedDef->getValueAsListOfDefs("SchedRW");
     for (RecIter RWI = RWs.begin(), RWE = RWs.end(); RWI != RWE; ++RWI) {
@@ -502,40 +502,25 @@ void CodeGenSchedModels::collectSchedClasses() {
 
   // NoItinerary is always the first class at Idx=0
   SchedClasses.resize(1);
-  SchedClasses.back().Name = "NoItinerary";
+  SchedClasses.back().Index = 0;
+  SchedClasses.back().Name = "NoInstrModel";
+  SchedClasses.back().ItinClassDef = Records.getDef("NoItinerary");
   SchedClasses.back().ProcIndices.push_back(0);
-  SchedClassIdxMap[SchedClasses.back().Name] = 0;
 
-  // Gather and sort all itinerary classes used by instruction descriptions.
-  RecVec ItinClassList;
+  // Create a SchedClass for each unique combination of itinerary class and
+  // SchedRW list.
   for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
          E = Target.inst_end(); I != E; ++I) {
     Record *ItinDef = (*I)->TheDef->getValueAsDef("Itinerary");
-    // Map a new SchedClass with no index.
-    if (!SchedClassIdxMap.count(ItinDef->getName())) {
-      SchedClassIdxMap[ItinDef->getName()] = 0;
-      ItinClassList.push_back(ItinDef);
-    }
-  }
-  // Assign each itinerary class unique number, skipping NoItinerary==0
-  NumItineraryClasses = ItinClassList.size();
-  std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
-  for (unsigned i = 0, N = NumItineraryClasses; i < N; i++) {
-    Record *ItinDef = ItinClassList[i];
-    SchedClassIdxMap[ItinDef->getName()] = SchedClasses.size();
-    SchedClasses.push_back(CodeGenSchedClass(ItinDef));
-  }
-  // Infer classes from SchedReadWrite resources listed for each
-  // instruction definition that inherits from class Sched.
-  for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
-         E = Target.inst_end(); I != E; ++I) {
-    if (!(*I)->TheDef->isSubClassOf("Sched"))
-      continue;
     IdxVec Writes, Reads;
-    findRWs((*I)->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads);
+    if (!(*I)->TheDef->isValueUnset("SchedRW"))
+      findRWs((*I)->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads);
+
     // ProcIdx == 0 indicates the class applies to all processors.
     IdxVec ProcIndices(1, 0);
-    addSchedClass(Writes, Reads, ProcIndices);
+
+    unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, ProcIndices);
+    InstrClassMap[(*I)->TheDef] = SCIdx;
   }
   // Create classes for InstRW defs.
   RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW");
@@ -549,68 +534,70 @@ void CodeGenSchedModels::collectSchedClasses() {
   DEBUG(EnableDump = true);
   if (!EnableDump)
     return;
+
   for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
          E = Target.inst_end(); I != E; ++I) {
-    Record *SchedDef = (*I)->TheDef;
+
     std::string InstName = (*I)->TheDef->getName();
-    if (SchedDef->isSubClassOf("Sched")) {
+    unsigned SCIdx = InstrClassMap.lookup((*I)->TheDef);
+    if (!SCIdx) {
+      dbgs() << "No machine model for " << (*I)->TheDef->getName() << '\n';
+      continue;
+    }
+    CodeGenSchedClass &SC = getSchedClass(SCIdx);
+    if (SC.ProcIndices[0] != 0)
+      PrintFatalError((*I)->TheDef->getLoc(), "Instruction's sched class "
+                      "must not be subtarget specific.");
+
+    IdxVec ProcIndices;
+    if (SC.ItinClassDef->getName() != "NoItinerary") {
+      ProcIndices.push_back(0);
+      dbgs() << "Itinerary for " << InstName << ": "
+             << SC.ItinClassDef->getName() << '\n';
+    }
+    if (!SC.Writes.empty()) {
+      ProcIndices.push_back(0);
+      dbgs() << "SchedRW machine model for " << InstName;
+      for (IdxIter WI = SC.Writes.begin(), WE = SC.Writes.end(); WI != WE; ++WI)
+        dbgs() << " " << SchedWrites[*WI].Name;
+      for (IdxIter RI = SC.Reads.begin(), RE = SC.Reads.end(); RI != RE; ++RI)
+        dbgs() << " " << SchedReads[*RI].Name;
+      dbgs() << '\n';
+    }
+    const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
+    for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end();
+         RWI != RWE; ++RWI) {
+      const CodeGenProcModel &ProcModel =
+        getProcModel((*RWI)->getValueAsDef("SchedModel"));
+      ProcIndices.push_back(ProcModel.Index);
+      dbgs() << "InstRW on " << ProcModel.ModelName << " for " << InstName;
       IdxVec Writes;
       IdxVec Reads;
-      findRWs((*I)->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads);
-      dbgs() << "SchedRW machine model for " << InstName;
+      findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"),
+              Writes, Reads);
       for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI)
         dbgs() << " " << SchedWrites[*WI].Name;
       for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI)
         dbgs() << " " << SchedReads[*RI].Name;
       dbgs() << '\n';
     }
-    unsigned SCIdx = InstrClassMap.lookup((*I)->TheDef);
-    if (SCIdx) {
-      const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
-      for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end();
-           RWI != RWE; ++RWI) {
-        const CodeGenProcModel &ProcModel =
-          getProcModel((*RWI)->getValueAsDef("SchedModel"));
-        dbgs() << "InstRW on " << ProcModel.ModelName << " for " << InstName;
-        IdxVec Writes;
-        IdxVec Reads;
-        findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"),
-                Writes, Reads);
-        for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI)
-          dbgs() << " " << SchedWrites[*WI].Name;
-        for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI)
-          dbgs() << " " << SchedReads[*RI].Name;
-        dbgs() << '\n';
-      }
-      continue;
-    }
-    if (!SchedDef->isSubClassOf("Sched")
-        && (SchedDef->getValueAsDef("Itinerary")->getName() == "NoItinerary")) {
-      dbgs() << "No machine model for " << (*I)->TheDef->getName() << '\n';
+    for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(),
+           PE = ProcModels.end(); PI != PE; ++PI) {
+      if (!std::count(ProcIndices.begin(), ProcIndices.end(), PI->Index))
+        dbgs() << "No machine model for " << (*I)->TheDef->getName()
+               << " on processor " << PI->ModelName << '\n';
     }
   }
 }
 
-unsigned CodeGenSchedModels::getSchedClassIdx(
-  const RecVec &RWDefs) const {
-
-  IdxVec Writes, Reads;
-  findRWs(RWDefs, Writes, Reads);
-  return findSchedClassIdx(Writes, Reads);
-}
-
 /// Find an SchedClass that has been inferred from a per-operand list of
 /// SchedWrites and SchedReads.
-unsigned CodeGenSchedModels::findSchedClassIdx(const IdxVec &Writes,
+unsigned CodeGenSchedModels::findSchedClassIdx(Record *ItinClassDef,
+                                               const IdxVec &Writes,
                                                const IdxVec &Reads) const {
   for (SchedClassIter I = schedClassBegin(), E = schedClassEnd(); I != E; ++I) {
-    // Classes with InstRWs may have the same Writes/Reads as a class originally
-    // produced by a SchedRW definition. We need to be able to recover the
-    // original class index for processors that don't match any InstRWs.
-    if (I->ItinClassDef || !I->InstRWs.empty())
-      continue;
-
-    if (I->Writes == Writes && I->Reads == Reads) {
+    if (I->ItinClassDef == ItinClassDef
+        && I->Writes == Writes && I->Reads == Reads) {
       return I - schedClassBegin();
     }
   }
@@ -621,29 +608,17 @@ unsigned CodeGenSchedModels::findSchedClassIdx(const IdxVec &Writes,
 unsigned CodeGenSchedModels::getSchedClassIdx(
   const CodeGenInstruction &Inst) const {
 
-  unsigned SCIdx = InstrClassMap.lookup(Inst.TheDef);
-  if (SCIdx)
-    return SCIdx;
-
-  // If this opcode isn't mapped by the subtarget fallback to the instruction
-  // definition's SchedRW or ItinDef values.
-  if (Inst.TheDef->isSubClassOf("Sched")) {
-    RecVec RWs = Inst.TheDef->getValueAsListOfDefs("SchedRW");
-    return getSchedClassIdx(RWs);
-  }
-  Record *ItinDef = Inst.TheDef->getValueAsDef("Itinerary");
-  assert(SchedClassIdxMap.count(ItinDef->getName()) && "missing ItinClass");
-  unsigned Idx = SchedClassIdxMap.lookup(ItinDef->getName());
-  assert(Idx <= NumItineraryClasses && "bad ItinClass index");
-  return Idx;
+  return InstrClassMap.lookup(Inst.TheDef);
 }
 
 std::string CodeGenSchedModels::createSchedClassName(
-  const IdxVec &OperWrites, const IdxVec &OperReads) {
+  Record *ItinClassDef, const IdxVec &OperWrites, const IdxVec &OperReads) {
 
   std::string Name;
+  if (ItinClassDef && ItinClassDef->getName() != "NoItinerary")
+    Name = ItinClassDef->getName();
   for (IdxIter WI = OperWrites.begin(), WE = OperWrites.end(); WI != WE; ++WI) {
-    if (WI != OperWrites.begin())
+    if (!Name.empty())
       Name += '_';
     Name += SchedWrites[*WI].Name;
   }
@@ -665,17 +640,18 @@ std::string CodeGenSchedModels::createSchedClassName(const RecVec &InstDefs) {
   return Name;
 }
 
-/// Add an inferred sched class from a per-operand list of SchedWrites and
-/// SchedReads. ProcIndices contains the set of IDs of processors that may
-/// utilize this class.
-unsigned CodeGenSchedModels::addSchedClass(const IdxVec &OperWrites,
+/// Add an inferred sched class from an itinerary class and per-operand list of
+/// SchedWrites and SchedReads. ProcIndices contains the set of IDs of
+/// processors that may utilize this class.
+unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef,
+                                           const IdxVec &OperWrites,
                                            const IdxVec &OperReads,
                                            const IdxVec &ProcIndices)
 {
   assert(!ProcIndices.empty() && "expect at least one ProcIdx");
 
-  unsigned Idx = findSchedClassIdx(OperWrites, OperReads);
-  if (Idx) {
+  unsigned Idx = findSchedClassIdx(ItinClassDef, OperWrites, OperReads);
+  if (Idx || SchedClasses[0].isKeyEqual(ItinClassDef, OperWrites, OperReads)) {
     IdxVec PI;
     std::set_union(SchedClasses[Idx].ProcIndices.begin(),
                    SchedClasses[Idx].ProcIndices.end(),
@@ -687,7 +663,9 @@ unsigned CodeGenSchedModels::addSchedClass(const IdxVec &OperWrites,
   Idx = SchedClasses.size();
   SchedClasses.resize(Idx+1);
   CodeGenSchedClass &SC = SchedClasses.back();
-  SC.Name = createSchedClassName(OperWrites, OperReads);
+  SC.Index = Idx;
+  SC.Name = createSchedClassName(ItinClassDef, OperWrites, OperReads);
+  SC.ItinClassDef = ItinClassDef;
   SC.Writes = OperWrites;
   SC.Reads = OperReads;
   SC.ProcIndices = ProcIndices;
@@ -709,19 +687,10 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
     PrintFatalError(InstRWDef->getLoc(), "No matching instruction opcodes");
 
   for (RecIter I = InstDefs->begin(), E = InstDefs->end(); I != E; ++I) {
-    unsigned SCIdx = 0;
     InstClassMapTy::const_iterator Pos = InstrClassMap.find(*I);
-    if (Pos != InstrClassMap.end())
-      SCIdx = Pos->second;
-    else {
-      // This instruction has not been mapped yet. Get the original class. All
-      // instructions in the same InstrRW class must be from the same original
-      // class because that is the fall-back class for other processors.
-      Record *ItinDef = (*I)->getValueAsDef("Itinerary");
-      SCIdx = SchedClassIdxMap.lookup(ItinDef->getName());
-      if (!SCIdx && (*I)->isSubClassOf("Sched"))
-        SCIdx = getSchedClassIdx((*I)->getValueAsListOfDefs("SchedRW"));
-    }
+    if (Pos == InstrClassMap.end())
+      PrintFatalError((*I)->getLoc(), "No sched class for instruction.");
+    unsigned SCIdx = Pos->second;
     unsigned CIdx = 0, CEnd = ClassInstrs.size();
     for (; CIdx != CEnd; ++CIdx) {
       if (ClassInstrs[CIdx].first == SCIdx)
@@ -741,7 +710,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
     ArrayRef<Record*> InstDefs = ClassInstrs[CIdx].second;
     // If the all instrs in the current class are accounted for, then leave
     // them mapped to their old class.
-    if (SchedClasses[OldSCIdx].InstRWs.size() == InstDefs.size()) {
+    if (OldSCIdx && SchedClasses[OldSCIdx].InstRWs.size() == InstDefs.size()) {
       assert(SchedClasses[OldSCIdx].ProcIndices[0] == 0 &&
              "expected a generic SchedClass");
       continue;
@@ -749,6 +718,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
     unsigned SCIdx = SchedClasses.size();
     SchedClasses.resize(SCIdx+1);
     CodeGenSchedClass &SC = SchedClasses.back();
+    SC.Index = SCIdx;
     SC.Name = createSchedClassName(InstDefs);
     // Preserve ItinDef and Writes/Reads for processors without an InstRW entry.
     SC.ItinClassDef = SchedClasses[OldSCIdx].ItinClassDef;
@@ -780,32 +750,48 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
   }
 }
 
+// True if collectProcItins found anything.
+bool CodeGenSchedModels::hasItineraries() const {
+  for (CodeGenSchedModels::ProcIter PI = procModelBegin(), PE = procModelEnd();
+       PI != PE; ++PI) {
+    if (PI->hasItineraries())
+      return true;
+  }
+  return false;
+}
+
 // Gather the processor itineraries.
 void CodeGenSchedModels::collectProcItins() {
   for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(),
          PE = ProcModels.end(); PI != PE; ++PI) {
     CodeGenProcModel &ProcModel = *PI;
-    RecVec ItinRecords = ProcModel.ItinsDef->getValueAsListOfDefs("IID");
-    // Skip empty itinerary.
-    if (ItinRecords.empty())
+    if (!ProcModel.hasItineraries())
       continue;
 
-    ProcModel.ItinDefList.resize(NumItineraryClasses+1);
+    RecVec ItinRecords = ProcModel.ItinsDef->getValueAsListOfDefs("IID");
+    assert(!ItinRecords.empty() && "ProcModel.hasItineraries is incorrect");
+
+    // Populate ItinDefList with Itinerary records.
+    ProcModel.ItinDefList.resize(NumInstrSchedClasses);
 
     // Insert each itinerary data record in the correct position within
     // the processor model's ItinDefList.
     for (unsigned i = 0, N = ItinRecords.size(); i < N; i++) {
       Record *ItinData = ItinRecords[i];
       Record *ItinDef = ItinData->getValueAsDef("TheClass");
-      if (!SchedClassIdxMap.count(ItinDef->getName())) {
+      bool FoundClass = false;
+      for (SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd();
+           SCI != SCE; ++SCI) {
+        // Multiple SchedClasses may share an itinerary. Update all of them.
+        if (SCI->ItinClassDef == ItinDef) {
+          ProcModel.ItinDefList[SCI->Index] = ItinData;
+          FoundClass = true;
+        }
+      }
+      if (!FoundClass) {
         DEBUG(dbgs() << ProcModel.ItinsDef->getName()
-              << " has unused itinerary class " << ItinDef->getName() << '\n');
-        continue;
+              << " missing class for itinerary " << ItinDef->getName() << '\n');
       }
-      assert(SchedClassIdxMap.count(ItinDef->getName()) && "missing ItinClass");
-      unsigned Idx = SchedClassIdxMap.lookup(ItinDef->getName());
-      assert(Idx <= NumItineraryClasses && "bad ItinClass index");
-      ProcModel.ItinDefList[Idx] = ItinData;
     }
     // Check for missing itinerary entries.
     assert(!ProcModel.ItinDefList[0] && "NoItinerary class can't have rec");
@@ -839,13 +825,17 @@ void CodeGenSchedModels::collectProcItinRW() {
 /// Infer new classes from existing classes. In the process, this may create new
 /// SchedWrites from sequences of existing SchedWrites.
 void CodeGenSchedModels::inferSchedClasses() {
+  DEBUG(dbgs() << NumInstrSchedClasses << " instr sched classes.\n");
+
   // Visit all existing classes and newly created classes.
   for (unsigned Idx = 0; Idx != SchedClasses.size(); ++Idx) {
+    assert(SchedClasses[Idx].Index == Idx && "bad SCIdx");
+
     if (SchedClasses[Idx].ItinClassDef)
       inferFromItinClass(SchedClasses[Idx].ItinClassDef, Idx);
-    else if (!SchedClasses[Idx].InstRWs.empty())
+    if (!SchedClasses[Idx].InstRWs.empty())
       inferFromInstRWs(Idx);
-    else {
+    if (!SchedClasses[Idx].Writes.empty()) {
       inferFromRW(SchedClasses[Idx].Writes, SchedClasses[Idx].Reads,
                   Idx, SchedClasses[Idx].ProcIndices);
     }
@@ -1042,11 +1032,13 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions,
 
 // Populate IntersectingVariants with any variants or aliased sequences of the
 // given SchedRW whose processor indices and predicates are not mutually
-// exclusive with the given transition,
+// exclusive with the given transition.
 void PredTransitions::getIntersectingVariants(
   const CodeGenSchedRW &SchedRW, unsigned TransIdx,
   std::vector<TransVariant> &IntersectingVariants) {
 
+  bool GenericRW = false;
+
   std::vector<TransVariant> Variants;
   if (SchedRW.HasVariants) {
     unsigned VarProcIdx = 0;
@@ -1058,6 +1050,8 @@ void PredTransitions::getIntersectingVariants(
     const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
     for (RecIter RI = VarDefs.begin(), RE = VarDefs.end(); RI != RE; ++RI)
       Variants.push_back(TransVariant(*RI, SchedRW.Index, VarProcIdx, 0));
+    if (VarProcIdx == 0)
+      GenericRW = true;
   }
   for (RecIter AI = SchedRW.Aliases.begin(), AE = SchedRW.Aliases.end();
        AI != AE; ++AI) {
@@ -1081,6 +1075,8 @@ void PredTransitions::getIntersectingVariants(
       Variants.push_back(
         TransVariant(AliasRW.TheDef, SchedRW.Index, AliasProcIdx, 0));
     }
+    if (AliasProcIdx == 0)
+      GenericRW = true;
   }
   for (unsigned VIdx = 0, VEnd = Variants.size(); VIdx != VEnd; ++VIdx) {
     TransVariant &Variant = Variants[VIdx];
@@ -1118,6 +1114,10 @@ void PredTransitions::getIntersectingVariants(
       TransVec.push_back(TransVec[TransIdx]);
     }
   }
+  if (GenericRW && IntersectingVariants.empty()) {
+    PrintFatalError(SchedRW.TheDef->getLoc(), "No variant of this type has "
+                    "a matching predicate on any processor");
+  }
 }
 
 // Push the Reads/Writes selected by this variant onto the PredTransition
@@ -1215,10 +1215,6 @@ void PredTransitions::substituteVariantOperand(
       // This will push a copies of TransVec[TransIdx] on the back of TransVec.
       std::vector<TransVariant> IntersectingVariants;
       getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
-      if (IntersectingVariants.empty())
-        PrintFatalError(SchedRW.TheDef->getLoc(),
-                      "No variant of this type has "
-                      "a matching predicate on any processor");
       // Now expand each variant on top of its copy of the transition.
       for (std::vector<TransVariant>::const_iterator
              IVI = IntersectingVariants.begin(),
@@ -1295,8 +1291,8 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
     IdxVec ProcIndices(I->ProcIndices.begin(), I->ProcIndices.end());
     CodeGenSchedTransition SCTrans;
     SCTrans.ToClassIdx =
-      SchedModels.addSchedClass(OperWritesVariant, OperReadsVariant,
-                                ProcIndices);
+      SchedModels.addSchedClass(/*ItinClassDef=*/0, OperWritesVariant,
+                                OperReadsVariant, ProcIndices);
     SCTrans.ProcIndices = ProcIndices;
     // The final PredTerm is unique set of predicates guarding the transition.
     RecVec Preds;
@@ -1318,7 +1314,7 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
                                      const IdxVec &OperReads,
                                      unsigned FromClassIdx,
                                      const IdxVec &ProcIndices) {
-  DEBUG(dbgs() << "INFER RW: ");
+  DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") ");
 
   // Create a seed transition with an empty PredTerm and the expanded sequences
   // of SchedWrites for the current SchedClass.
@@ -1380,8 +1376,22 @@ void CodeGenSchedModels::collectProcResources() {
        SCI != SCE; ++SCI) {
     if (SCI->ItinClassDef)
       collectItinProcResources(SCI->ItinClassDef);
-    else
+    else {
+      // This class may have a default ReadWrite list which can be overriden by
+      // InstRW definitions.
+      if (!SCI->InstRWs.empty()) {
+        for (RecIter RWI = SCI->InstRWs.begin(), RWE = SCI->InstRWs.end();
+             RWI != RWE; ++RWI) {
+          Record *RWModelDef = (*RWI)->getValueAsDef("SchedModel");
+          IdxVec ProcIndices(1, getProcModel(RWModelDef).Index);
+          IdxVec Writes, Reads;
+          findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"),
+                  Writes, Reads);
+          collectRWResources(Writes, Reads, ProcIndices);
+        }
+      }
       collectRWResources(SCI->Writes, SCI->Reads, SCI->ProcIndices);
+    }
   }
   // Add resources separately defined by each subtarget.
   RecVec WRDefs = Records.getAllDerivedDefinitions("WriteRes");
@@ -1528,6 +1538,20 @@ Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind,
       ProcUnitDef = *RI;
     }
   }
+  RecVec ProcResGroups = Records.getAllDerivedDefinitions("ProcResGroup");
+  for (RecIter RI = ProcResGroups.begin(), RE = ProcResGroups.end();
+       RI != RE; ++RI) {
+
+    if (*RI == ProcResKind
+        && (*RI)->getValueAsDef("SchedModel") == PM.ModelDef) {
+      if (ProcUnitDef) {
+        PrintFatalError((*RI)->getLoc(),
+                        "Multiple ProcessorResourceUnits associated with "
+                        + ProcResKind->getName());
+      }
+      ProcUnitDef = *RI;
+    }
+  }
   if (!ProcUnitDef) {
     PrintFatalError(ProcResKind->getLoc(),
                     "No ProcessorResources associated with "
@@ -1549,6 +1573,9 @@ void CodeGenSchedModels::addProcResource(Record *ProcResKind,
       return;
 
     PM.ProcResourceDefs.push_back(ProcResUnits);
+    if (ProcResUnits->isSubClassOf("ProcResGroup"))
+      return;
+
     if (!ProcResUnits->getValueInit("Super")->isComplete())
       return;
 
@@ -1611,7 +1638,7 @@ void CodeGenSchedRW::dump() const {
 }
 
 void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const {
-  dbgs() << "SCHEDCLASS " << Name << '\n'
+  dbgs() << "SCHEDCLASS " << Index << ":" << Name << '\n'
          << "  Writes: ";
   for (unsigned i = 0, N = Writes.size(); i < N; ++i) {
     SchedModels->getSchedWrite(Writes[i]).dump();
@@ -1629,6 +1656,13 @@ void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const {
     }
   }
   dbgs() << "\n  ProcIdx: "; dumpIdxVec(ProcIndices); dbgs() << '\n';
+  if (!Transitions.empty()) {
+    dbgs() << "\n Transitions for Proc ";
+    for (std::vector<CodeGenSchedTransition>::const_iterator
+           TI = Transitions.begin(), TE = Transitions.end(); TI != TE; ++TI) {
+      dumpIdxVec(TI->ProcIndices);
+    }
+  }
 }
 
 void PredTransitions::dump() const {
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index eed058971b80..e5b9118fb062 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -16,10 +16,10 @@
 #define CODEGEN_SCHEDULE_H
 
 #include "SetTheory.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Record.h"
 
 namespace llvm {
 
@@ -55,10 +55,11 @@ struct CodeGenSchedRW {
   IdxVec Sequence;
   RecVec Aliases;
 
-  CodeGenSchedRW(): Index(0), TheDef(0), IsAlias(false), HasVariants(false),
-                    IsVariadic(false), IsSequence(false) {}
-  CodeGenSchedRW(unsigned Idx, Record *Def): Index(Idx), TheDef(Def),
-                                             IsAlias(false), IsVariadic(false) {
+  CodeGenSchedRW()
+    : Index(0), TheDef(0), IsRead(false), IsAlias(false),
+      HasVariants(false), IsVariadic(false), IsSequence(false) {}
+  CodeGenSchedRW(unsigned Idx, Record *Def)
+    : Index(Idx), TheDef(Def), IsAlias(false), IsVariadic(false) {
     Name = Def->getName();
     IsRead = Def->isSubClassOf("SchedRead");
     HasVariants = Def->isSubClassOf("SchedVariant");
@@ -72,9 +73,9 @@ struct CodeGenSchedRW {
   }
 
   CodeGenSchedRW(unsigned Idx, bool Read, const IdxVec &Seq,
-                 const std::string &Name):
-    Index(Idx), Name(Name), TheDef(0), IsRead(Read), IsAlias(false),
-    HasVariants(false), IsVariadic(false), IsSequence(true), Sequence(Seq) {
+                 const std::string &Name)
+    : Index(Idx), Name(Name), TheDef(0), IsRead(Read), IsAlias(false),
+      HasVariants(false), IsVariadic(false), IsSequence(true), Sequence(Seq) {
     assert(Sequence.size() > 1 && "implied sequence needs >1 RWs");
   }
 
@@ -124,6 +125,7 @@ struct CodeGenSchedTransition {
 /// itinerary class. Each inherits the processor index from the ItinRW record
 /// that mapped the itinerary class to the variant Writes or Reads.
 struct CodeGenSchedClass {
+  unsigned Index;
   std::string Name;
   Record *ItinClassDef;
 
@@ -140,12 +142,16 @@ struct CodeGenSchedClass {
   // off to join another inferred class.
   RecVec InstRWs;
 
-  CodeGenSchedClass(): ItinClassDef(0) {}
-  CodeGenSchedClass(Record *rec): ItinClassDef(rec) {
-    Name = rec->getName();
-    ProcIndices.push_back(0);
+  CodeGenSchedClass(): Index(0), ItinClassDef(0) {}
+
+  bool isKeyEqual(Record *IC, const IdxVec &W, const IdxVec &R) {
+    return ItinClassDef == IC && Writes == W && Reads == R;
   }
 
+  // Is this class generated from a variants if existing classes? Instructions
+  // are never mapped directly to inferred scheduling classes.
+  bool isInferred() const { return !ItinClassDef; }
+
 #ifndef NDEBUG
   void dump(const CodeGenSchedModels *SchedModels) const;
 #endif
@@ -188,11 +194,16 @@ struct CodeGenProcModel {
 
   // Per-operand machine model resources associated with this processor.
   RecVec ProcResourceDefs;
+  RecVec ProcResGroupDefs;
 
   CodeGenProcModel(unsigned Idx, const std::string &Name, Record *MDef,
                    Record *IDef) :
     Index(Idx), ModelName(Name), ModelDef(MDef), ItinsDef(IDef) {}
 
+  bool hasItineraries() const {
+    return !ItinsDef->getValueAsListOfDefs("IID").empty();
+  }
+
   bool hasInstrSchedModel() const {
     return !WriteResDefs.empty() || !ItinRWDefs.empty();
   }
@@ -226,24 +237,11 @@ class CodeGenSchedModels {
   // List of unique SchedClasses.
   std::vector<CodeGenSchedClass> SchedClasses;
 
-  // Map SchedClass name to itinerary index.
-  // These are either explicit itinerary classes or classes implied by
-  // instruction definitions with SchedReadWrite lists.
-  StringMap<unsigned> SchedClassIdxMap;
-
-  // SchedClass indices 1 up to and including NumItineraryClasses identify
-  // itinerary classes that are explicitly used for this target's instruction
-  // definitions. NoItinerary always has index 0 regardless of whether it is
-  // explicitly referenced.
-  //
-  // Any implied SchedClass has an index greater than NumItineraryClasses.
-  unsigned NumItineraryClasses;
-
   // Any inferred SchedClass has an index greater than NumInstrSchedClassses.
   unsigned NumInstrSchedClasses;
 
-  // Map Instruction to SchedClass index. Only for Instructions mentioned in
-  // InstRW records.
+  // Map each instruction to its unique SchedClass index considering the
+  // combination of it's itinerary class, SchedRW list, and InstRW records.
   typedef DenseMap<Record*, unsigned> InstClassMapTy;
   InstClassMapTy InstrClassMap;
 
@@ -279,6 +277,9 @@ public:
   ProcIter procModelBegin() const { return ProcModels.begin(); }
   ProcIter procModelEnd() const { return ProcModels.end(); }
 
+  // Return true if any processors have itineraries.
+  bool hasItineraries() const;
+
   // Get a SchedWrite from its index.
   const CodeGenSchedRW &getSchedWrite(unsigned Idx) const {
     assert(Idx < SchedWrites.size() && "bad SchedWrite index");
@@ -310,16 +311,6 @@ public:
   // Return true if the given write record is referenced by a ReadAdvance.
   bool hasReadOfWrite(Record *WriteDef) const;
 
-  // Check if any instructions are assigned to an explicit itinerary class other
-  // than NoItinerary.
-  bool hasItineraryClasses() const { return NumItineraryClasses > 0; }
-
-  // Return the number of itinerary classes in use by this target's instruction
-  // descriptions, not including "NoItinerary".
-  unsigned numItineraryClasses() const {
-    return NumItineraryClasses;
-  }
-
   // Get a SchedClass from its index.
   CodeGenSchedClass &getSchedClass(unsigned Idx) {
     assert(Idx < SchedClasses.size() && "bad SchedClass index");
@@ -335,28 +326,26 @@ public:
   // for NoItinerary.
   unsigned getSchedClassIdx(const CodeGenInstruction &Inst) const;
 
-  unsigned getSchedClassIdx(const RecVec &RWDefs) const;
-
-  unsigned getSchedClassIdxForItin(const Record *ItinDef) {
-    return SchedClassIdxMap[ItinDef->getName()];
-  }
-
   typedef std::vector<CodeGenSchedClass>::const_iterator SchedClassIter;
   SchedClassIter schedClassBegin() const { return SchedClasses.begin(); }
   SchedClassIter schedClassEnd() const { return SchedClasses.end(); }
 
+  unsigned numInstrSchedClasses() const { return NumInstrSchedClasses; }
+
   void findRWs(const RecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const;
   void findRWs(const RecVec &RWDefs, IdxVec &RWs, bool IsRead) const;
   void expandRWSequence(unsigned RWIdx, IdxVec &RWSeq, bool IsRead) const;
   void expandRWSeqForProc(unsigned RWIdx, IdxVec &RWSeq, bool IsRead,
                           const CodeGenProcModel &ProcModel) const;
 
-  unsigned addSchedClass(const IdxVec &OperWrites, const IdxVec &OperReads,
-                         const IdxVec &ProcIndices);
+  unsigned addSchedClass(Record *ItinDef, const IdxVec &OperWrites,
+                         const IdxVec &OperReads, const IdxVec &ProcIndices);
 
   unsigned findOrInsertRW(ArrayRef<unsigned> Seq, bool IsRead);
 
-  unsigned findSchedClassIdx(const IdxVec &Writes, const IdxVec &Reads) const;
+  unsigned findSchedClassIdx(Record *ItinClassDef,
+                             const IdxVec &Writes,
+                             const IdxVec &Reads) const;
 
   Record *findProcResUnits(Record *ProcResKind,
                            const CodeGenProcModel &PM) const;
@@ -374,7 +363,8 @@ private:
 
   void collectSchedClasses();
 
-  std::string createSchedClassName(const IdxVec &OperWrites,
+  std::string createSchedClassName(Record *ItinClassDef,
+                                   const IdxVec &OperWrites,
                                    const IdxVec &OperReads);
   std::string createSchedClassName(const RecVec &InstDefs);
   void createInstRWClass(Record *InstRWDef);
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index c9992eb39228..8b292b957288 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -17,11 +17,11 @@
 #include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
 #include "CodeGenSchedule.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -73,16 +73,20 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v4i1:     return "MVT::v4i1";
   case MVT::v8i1:     return "MVT::v8i1";
   case MVT::v16i1:    return "MVT::v16i1";
+  case MVT::v32i1:    return "MVT::v32i1";
+  case MVT::v64i1:    return "MVT::v64i1";
   case MVT::v2i8:     return "MVT::v2i8";
   case MVT::v4i8:     return "MVT::v4i8";
   case MVT::v8i8:     return "MVT::v8i8";
   case MVT::v16i8:    return "MVT::v16i8";
   case MVT::v32i8:    return "MVT::v32i8";
+  case MVT::v64i8:    return "MVT::v64i8";
   case MVT::v1i16:    return "MVT::v1i16";
   case MVT::v2i16:    return "MVT::v2i16";
   case MVT::v4i16:    return "MVT::v4i16";
   case MVT::v8i16:    return "MVT::v8i16";
   case MVT::v16i16:   return "MVT::v16i16";
+  case MVT::v32i16:   return "MVT::v32i16";
   case MVT::v1i32:    return "MVT::v1i32";
   case MVT::v2i32:    return "MVT::v2i32";
   case MVT::v4i32:    return "MVT::v4i32";
@@ -97,8 +101,10 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v2f32:    return "MVT::v2f32";
   case MVT::v4f32:    return "MVT::v4f32";
   case MVT::v8f32:    return "MVT::v8f32";
+  case MVT::v16f32:   return "MVT::v16f32";
   case MVT::v2f64:    return "MVT::v2f64";
   case MVT::v4f64:    return "MVT::v4f64";
+  case MVT::v8f64:    return "MVT::v8f64";
   case MVT::Metadata: return "MVT::Metadata";
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
@@ -223,7 +229,7 @@ getRegisterVTs(Record *R) const {
   for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
     const CodeGenRegisterClass &RC = *RCs[i];
     if (RC.contains(Reg)) {
-      const std::vector<MVT::SimpleValueType> &InVTs = RC.getValueTypes();
+      ArrayRef<MVT::SimpleValueType> InVTs = RC.getValueTypes();
       Result.insert(Result.end(), InVTs.begin(), InVTs.end());
     }
   }
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index ddeecee36fdf..627144302932 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -17,10 +17,10 @@
 #ifndef CODEGEN_TARGET_H
 #define CODEGEN_TARGET_H
 
-#include "CodeGenRegisters.h"
 #include "CodeGenInstruction.h"
-#include "llvm/TableGen/Record.h"
+#include "CodeGenRegisters.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 
 namespace llvm {
@@ -68,7 +68,7 @@ class CodeGenTarget {
   mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
   mutable CodeGenRegBank *RegBank;
   mutable std::vector<Record*> RegAltNameIndices;
-  mutable std::vector<MVT::SimpleValueType> LegalValueTypes;
+  mutable SmallVector<MVT::SimpleValueType, 8> LegalValueTypes;
   void ReadRegAltNameIndices() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
@@ -129,7 +129,7 @@ public:
   /// specified physical register.
   std::vector<MVT::SimpleValueType> getRegisterVTs(Record *R) const;
 
-  const std::vector<MVT::SimpleValueType> &getLegalValueTypes() const {
+  ArrayRef<MVT::SimpleValueType> getLegalValueTypes() const {
     if (LegalValueTypes.empty()) ReadLegalValueTypes();
     return LegalValueTypes;
   }
@@ -137,7 +137,7 @@ public:
   /// isLegalValueType - Return true if the specified value type is natively
   /// supported by the target (i.e. there are registers that directly hold it).
   bool isLegalValueType(MVT::SimpleValueType VT) const {
-    const std::vector<MVT::SimpleValueType> &LegalVTs = getLegalValueTypes();
+    ArrayRef<MVT::SimpleValueType> LegalVTs = getLegalValueTypes();
     for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i)
       if (LegalVTs[i] == VT) return true;
     return false;
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index bd77907a9bd9..d173cf006a46 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -10,9 +10,9 @@
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
 using namespace llvm;
 
 void Matcher::anchor() { }
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 7c6ce3babcd8..f978188aae59 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -10,10 +10,10 @@
 #ifndef TBLGEN_DAGISELMATCHER_H
 #define TBLGEN_DAGISELMATCHER_H
 
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/Casting.h"
 
 namespace llvm {
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 713f1743c143..93f84ce6e817 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -13,12 +13,12 @@
 
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/TableGen/Record.h"
 using namespace llvm;
 
 enum {
@@ -132,7 +132,7 @@ static uint64_t EmitVBRValue(uint64_t Val, raw_ostream &OS) {
   return NumBytes+1;
 }
 
-/// EmitMatcherOpcodes - Emit bytes for the specified matcher and return
+/// EmitMatcher - Emit bytes for the specified matcher and return
 /// the number of bytes emitted.
 unsigned MatcherTableEmitter::
 EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 573f55875ec6..ed41631456b0 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -10,11 +10,11 @@
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenRegisters.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <utility>
 using namespace llvm;
 
@@ -211,6 +211,12 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return AddMatcher(new CheckIntegerMatcher(II->getValue()));
   }
 
+  // An UnsetInit represents a named node without any constraints.
+  if (N->getLeafValue() == UnsetInit::get()) {
+    assert(N->hasName() && "Unnamed ? leaf");
+    return;
+  }
+
   DefInit *DI = dyn_cast<DefInit>(N->getLeafValue());
   if (DI == 0) {
     errs() << "Unknown leaf kind: " << *N << "\n";
@@ -218,6 +224,17 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   }
 
   Record *LeafRec = DI->getDef();
+
+  // A ValueType leaf node can represent a register when named, or itself when
+  // unnamed.
+  if (LeafRec->isSubClassOf("ValueType")) {
+    // A named ValueType leaf always matches: (add i32:$a, i32:$b).
+    if (N->hasName())
+      return;
+    // An unnamed ValueType as in (sext_inreg GPR:$foo, i8).
+    return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName()));
+  }
+
   if (// Handle register references.  Nothing to do here, they always match.
       LeafRec->isSubClassOf("RegisterClass") ||
       LeafRec->isSubClassOf("RegisterOperand") ||
@@ -236,9 +253,6 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
     return;
   }
 
-  if (LeafRec->isSubClassOf("ValueType"))
-    return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName()));
-
   if (LeafRec->isSubClassOf("CondCode"))
     return AddMatcher(new CheckCondCodeMatcher(LeafRec->getName()));
 
@@ -734,20 +748,33 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
       continue;
     }
 
-    const TreePatternNode *Child = N->getChild(ChildNo);
-
     // Otherwise this is a normal operand or a predicate operand without
     // 'execute always'; emit it.
-    unsigned BeforeAddingNumOps = InstOps.size();
-    EmitResultOperand(Child, InstOps);
-    assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");
 
-    // If the operand is an instruction and it produced multiple results, just
-    // take the first one.
-    if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))
-      InstOps.resize(BeforeAddingNumOps+1);
+    // For operands with multiple sub-operands we may need to emit
+    // multiple child patterns to cover them all.  However, ComplexPattern
+    // children may themselves emit multiple MI operands.
+    unsigned NumSubOps = 1;
+    if (OperandNode->isSubClassOf("Operand")) {
+      DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo");
+      if (unsigned NumArgs = MIOpInfo->getNumArgs())
+        NumSubOps = NumArgs;
+    }
+
+    unsigned FinalNumOps = InstOps.size() + NumSubOps;
+    while (InstOps.size() < FinalNumOps) {
+      const TreePatternNode *Child = N->getChild(ChildNo);
+      unsigned BeforeAddingNumOps = InstOps.size();
+      EmitResultOperand(Child, InstOps);
+      assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");
 
-    ++ChildNo;
+      // If the operand is an instruction and it produced multiple results, just
+      // take the first one.
+      if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))
+        InstOps.resize(BeforeAddingNumOps+1);
+
+      ++ChildNo;
+    }
   }
 
   // If this node has input glue or explicitly specified input physregs, we
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 0ad25a5428db..2549c47c3318 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -279,6 +279,7 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
 //
 //
 void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
+  static const std::string SentinelEntry = "{-1, -1}";
   DFA::StateSet::iterator SI = states.begin();
   // This table provides a map to the beginning of the transitions for State s
   // in DFAStateInputTable.
@@ -305,12 +306,17 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
     // If there are no valid transitions from this stage, we need a sentinel
     // transition.
     if (ValidTransitions == StateEntry[i]) {
-      OS << "{-1, -1},";
+      OS << SentinelEntry << ",";
       ++ValidTransitions;
     }
 
     OS << "\n";
   }
+
+  // Print out a sentinel entry at the end of the StateInputTable. This is
+  // needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
+  OS << SentinelEntry << "\n";
+  
   OS << "};\n\n";
   OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
 
@@ -319,6 +325,9 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
   for (unsigned i = 0; i < states.size(); ++i)
     OS << StateEntry[i] << ", ";
 
+  // Print out the index to the sentinel entry in StateInputTable
+  OS << ValidTransitions << ", ";
+
   OS << "\n};\n";
   OS << "} // namespace\n";
 
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index 2d11d2480de4..5a2a41b456d7 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -127,8 +127,9 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
 
   // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
   if (Target.getName() == "ARM" ||
-      Target.getName() == "Thumb") {
-    EmitFixedLenDecoder(Records, OS, "ARM",
+      Target.getName() == "Thumb" || 
+      Target.getName() == "AArch64") {
+    EmitFixedLenDecoder(Records, OS, Target.getName() == "AArch64" ? "AArch64" : "ARM",
                         "if (!Check(S, ", ")) return MCDisassembler::Fail;",
                         "S", "MCDisassembler::Fail",
                         "  MCDisassembler::DecodeStatus S = "
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
deleted file mode 100644
index ea2545050bc0..000000000000
--- a/utils/TableGen/EDEmitter.cpp
+++ /dev/null
@@ -1,1011 +0,0 @@
-//===- EDEmitter.cpp - Generate instruction descriptions for ED -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This tablegen backend is responsible for emitting a description of each
-// instruction in a format that the enhanced disassembler can use to tokenize
-// and parse instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AsmWriterInst.h"
-#include "CodeGenTarget.h"
-#include "llvm/MC/EDInstInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/TableGenBackend.h"
-#include <string>
-#include <vector>
-
-using namespace llvm;
-
-// TODO: There's a suspiciously large amount of "table" data in this
-// backend which should probably be in the TableGen file itself.
-
-///////////////////////////////////////////////////////////
-// Support classes for emitting nested C data structures //
-///////////////////////////////////////////////////////////
-
-// TODO: These classes are probably generally useful to other backends;
-// add them to TableGen's "helper" API's.
-
-namespace {
-class EnumEmitter {
-private:
-  std::string Name;
-  std::vector<std::string> Entries;
-public:
-  EnumEmitter(const char *N) : Name(N) {
-  }
-  int addEntry(const char *e) {
-    Entries.push_back(std::string(e));
-    return Entries.size() - 1;
-  }
-  void emit(raw_ostream &o, unsigned int &i) {
-    o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
-    i += 2;
-
-    unsigned int index = 0;
-    unsigned int numEntries = Entries.size();
-    for (index = 0; index < numEntries; ++index) {
-      o.indent(i) << Entries[index];
-      if (index < (numEntries - 1))
-        o << ",";
-      o << "\n";
-    }
-
-    i -= 2;
-    o.indent(i) << "};" << "\n";
-  }
-
-  void emitAsFlags(raw_ostream &o, unsigned int &i) {
-    o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
-    i += 2;
-
-    unsigned int index = 0;
-    unsigned int numEntries = Entries.size();
-    unsigned int flag = 1;
-    for (index = 0; index < numEntries; ++index) {
-      o.indent(i) << Entries[index] << " = " << format("0x%x", flag);
-      if (index < (numEntries - 1))
-        o << ",";
-      o << "\n";
-      flag <<= 1;
-    }
-
-    i -= 2;
-    o.indent(i) << "};" << "\n";
-  }
-};
-} // End anonymous namespace
-
-namespace {
-class ConstantEmitter {
-public:
-  virtual ~ConstantEmitter() { }
-  virtual void emit(raw_ostream &o, unsigned int &i) = 0;
-};
-} // End anonymous namespace
-
-namespace {
-class LiteralConstantEmitter : public ConstantEmitter {
-private:
-  bool IsNumber;
-  union {
-    int Number;
-    const char* String;
-  };
-public:
-  LiteralConstantEmitter(int number = 0) :
-    IsNumber(true),
-    Number(number) {
-  }
-  void set(const char *string) {
-    IsNumber = false;
-    Number = 0;
-    String = string;
-  }
-  bool is(const char *string) {
-    return !strcmp(String, string);
-  }
-  void emit(raw_ostream &o, unsigned int &i) {
-    if (IsNumber)
-      o << Number;
-    else
-      o << String;
-  }
-};
-} // End anonymous namespace
-
-namespace {
-class CompoundConstantEmitter : public ConstantEmitter {
-private:
-  unsigned int Padding;
-  std::vector<ConstantEmitter *> Entries;
-public:
-  CompoundConstantEmitter(unsigned int padding = 0) : Padding(padding) {
-  }
-  CompoundConstantEmitter &addEntry(ConstantEmitter *e) {
-    Entries.push_back(e);
-
-    return *this;
-  }
-  ~CompoundConstantEmitter() {
-    while (Entries.size()) {
-      ConstantEmitter *entry = Entries.back();
-      Entries.pop_back();
-      delete entry;
-    }
-  }
-  void emit(raw_ostream &o, unsigned int &i) {
-    o << "{" << "\n";
-    i += 2;
-
-    unsigned int index;
-    unsigned int numEntries = Entries.size();
-
-    unsigned int numToPrint;
-
-    if (Padding) {
-      if (numEntries > Padding) {
-        fprintf(stderr, "%u entries but %u padding\n", numEntries, Padding);
-        llvm_unreachable("More entries than padding");
-      }
-      numToPrint = Padding;
-    } else {
-      numToPrint = numEntries;
-    }
-
-    for (index = 0; index < numToPrint; ++index) {
-      o.indent(i);
-      if (index < numEntries)
-        Entries[index]->emit(o, i);
-      else
-        o << "-1";
-
-      if (index < (numToPrint - 1))
-        o << ",";
-      o << "\n";
-    }
-
-    i -= 2;
-    o.indent(i) << "}";
-  }
-};
-} // End anonymous namespace
-
-namespace {
-class FlagsConstantEmitter : public ConstantEmitter {
-private:
-  std::vector<std::string> Flags;
-public:
-  FlagsConstantEmitter() {
-  }
-  FlagsConstantEmitter &addEntry(const char *f) {
-    Flags.push_back(std::string(f));
-    return *this;
-  }
-  void emit(raw_ostream &o, unsigned int &i) {
-    unsigned int index;
-    unsigned int numFlags = Flags.size();
-    if (numFlags == 0)
-      o << "0";
-
-    for (index = 0; index < numFlags; ++index) {
-      o << Flags[index].c_str();
-      if (index < (numFlags - 1))
-        o << " | ";
-    }
-  }
-};
-} // End anonymous namespace
-
-/// populateOperandOrder - Accepts a CodeGenInstruction and generates its
-///   AsmWriterInst for the desired assembly syntax, giving an ordered list of
-///   operands in the order they appear in the printed instruction.  Then, for
-///   each entry in that list, determines the index of the same operand in the
-///   CodeGenInstruction, and emits the resulting mapping into an array, filling
-///   in unused slots with -1.
-///
-/// @arg operandOrder - The array that will be populated with the operand
-///                     mapping.  Each entry will contain -1 (invalid index
-///                     into the operands present in the AsmString) or a number
-///                     representing an index in the operand descriptor array.
-/// @arg inst         - The instruction to use when looking up the operands
-/// @arg syntax       - The syntax to use, according to LLVM's enumeration
-static void populateOperandOrder(CompoundConstantEmitter *operandOrder,
-                                 const CodeGenInstruction &inst,
-                                 unsigned syntax) {
-  unsigned int numArgs = 0;
-
-  AsmWriterInst awInst(inst, syntax, -1, -1);
-
-  std::vector<AsmWriterOperand>::iterator operandIterator;
-
-  for (operandIterator = awInst.Operands.begin();
-       operandIterator != awInst.Operands.end();
-       ++operandIterator) {
-    if (operandIterator->OperandType ==
-        AsmWriterOperand::isMachineInstrOperand) {
-      operandOrder->addEntry(
-        new LiteralConstantEmitter(operandIterator->CGIOpNo));
-      numArgs++;
-    }
-  }
-}
-
-/////////////////////////////////////////////////////
-// Support functions for handling X86 instructions //
-/////////////////////////////////////////////////////
-
-#define SET(flag) { type->set(flag); return 0; }
-
-#define REG(str) if (name == str) SET("kOperandTypeRegister");
-#define MEM(str) if (name == str) SET("kOperandTypeX86Memory");
-#define LEA(str) if (name == str) SET("kOperandTypeX86EffectiveAddress");
-#define IMM(str) if (name == str) SET("kOperandTypeImmediate");
-#define PCR(str) if (name == str) SET("kOperandTypeX86PCRelative");
-
-/// X86TypeFromOpName - Processes the name of a single X86 operand (which is
-///   actually its type) and translates it into an operand type
-///
-/// @arg flags    - The type object to set
-/// @arg name     - The name of the operand
-static int X86TypeFromOpName(LiteralConstantEmitter *type,
-                             const std::string &name) {
-  REG("GR8");
-  REG("GR8_NOREX");
-  REG("GR16");
-  REG("GR16_NOAX");
-  REG("GR32");
-  REG("GR32_NOAX");
-  REG("GR32_NOREX");
-  REG("GR32_TC");
-  REG("FR32");
-  REG("RFP32");
-  REG("GR64");
-  REG("GR64_NOAX");
-  REG("GR64_TC");
-  REG("FR64");
-  REG("VR64");
-  REG("RFP64");
-  REG("RFP80");
-  REG("VR128");
-  REG("VR256");
-  REG("RST");
-  REG("SEGMENT_REG");
-  REG("DEBUG_REG");
-  REG("CONTROL_REG");
-
-  IMM("i8imm");
-  IMM("i16imm");
-  IMM("i16i8imm");
-  IMM("i32imm");
-  IMM("i32i8imm");
-  IMM("u32u8imm");
-  IMM("i64imm");
-  IMM("i64i8imm");
-  IMM("i64i32imm");
-  IMM("SSECC");
-  IMM("AVXCC");
-
-  // all R, I, R, I, R
-  MEM("i8mem");
-  MEM("i8mem_NOREX");
-  MEM("i16mem");
-  MEM("i32mem");
-  MEM("i32mem_TC");
-  MEM("f32mem");
-  MEM("ssmem");
-  MEM("opaque32mem");
-  MEM("opaque48mem");
-  MEM("i64mem");
-  MEM("i64mem_TC");
-  MEM("f64mem");
-  MEM("sdmem");
-  MEM("f80mem");
-  MEM("opaque80mem");
-  MEM("i128mem");
-  MEM("i256mem");
-  MEM("f128mem");
-  MEM("f256mem");
-  MEM("opaque512mem");
-  // Gather
-  MEM("vx32mem")
-  MEM("vy32mem")
-  MEM("vx64mem")
-  MEM("vy64mem")
-
-  // all R, I, R, I
-  LEA("lea32mem");
-  LEA("lea64_32mem");
-  LEA("lea64mem");
-
-  // all I
-  PCR("i16imm_pcrel");
-  PCR("i32imm_pcrel");
-  PCR("i64i32imm_pcrel");
-  PCR("brtarget8");
-  PCR("offset8");
-  PCR("offset16");
-  PCR("offset32");
-  PCR("offset64");
-  PCR("brtarget");
-  PCR("uncondbrtarget");
-  PCR("bltarget");
-
-  // all I, ARM mode only, conditional/unconditional
-  PCR("br_target");
-  PCR("bl_target");
-  return 1;
-}
-
-#undef REG
-#undef MEM
-#undef LEA
-#undef IMM
-#undef PCR
-
-#undef SET
-
-/// X86PopulateOperands - Handles all the operands in an X86 instruction, adding
-///   the appropriate flags to their descriptors
-///
-/// \param operandTypes A reference the array of operand type objects
-/// \param inst         The instruction to use as a source of information
-static void X86PopulateOperands(
-  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
-  const CodeGenInstruction &inst) {
-  if (!inst.TheDef->isSubClassOf("X86Inst"))
-    return;
-
-  unsigned int index;
-  unsigned int numOperands = inst.Operands.size();
-
-  for (index = 0; index < numOperands; ++index) {
-    const CGIOperandList::OperandInfo &operandInfo = inst.Operands[index];
-    Record &rec = *operandInfo.Rec;
-
-    if (X86TypeFromOpName(operandTypes[index], rec.getName()) &&
-        !rec.isSubClassOf("PointerLikeRegClass")) {
-      errs() << "Operand type: " << rec.getName().c_str() << "\n";
-      errs() << "Operand name: " << operandInfo.Name.c_str() << "\n";
-      errs() << "Instruction name: " << inst.TheDef->getName().c_str() << "\n";
-      llvm_unreachable("Unhandled type");
-    }
-  }
-}
-
-/// decorate1 - Decorates a named operand with a new flag
-///
-/// \param operandFlags The array of operand flag objects, which don't have
-///                     names
-/// \param inst         The CodeGenInstruction, which provides a way to
-//                      translate between names and operand indices
-/// \param opName       The name of the operand
-/// \param opFlag       The name of the flag to add
-static inline void decorate1(
-  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
-  const CodeGenInstruction &inst,
-  const char *opName,
-  const char *opFlag) {
-  unsigned opIndex;
-
-  opIndex = inst.Operands.getOperandNamed(std::string(opName));
-
-  operandFlags[opIndex]->addEntry(opFlag);
-}
-
-#define DECORATE1(opName, opFlag) decorate1(operandFlags, inst, opName, opFlag)
-
-#define MOV(source, target) {               \
-  instType.set("kInstructionTypeMove");     \
-  DECORATE1(source, "kOperandFlagSource");  \
-  DECORATE1(target, "kOperandFlagTarget");  \
-}
-
-#define BRANCH(target) {                    \
-  instType.set("kInstructionTypeBranch");   \
-  DECORATE1(target, "kOperandFlagTarget");  \
-}
-
-#define PUSH(source) {                      \
-  instType.set("kInstructionTypePush");     \
-  DECORATE1(source, "kOperandFlagSource");  \
-}
-
-#define POP(target) {                       \
-  instType.set("kInstructionTypePop");      \
-  DECORATE1(target, "kOperandFlagTarget");  \
-}
-
-#define CALL(target) {                      \
-  instType.set("kInstructionTypeCall");     \
-  DECORATE1(target, "kOperandFlagTarget");  \
-}
-
-#define RETURN() {                          \
-  instType.set("kInstructionTypeReturn");   \
-}
-
-/// X86ExtractSemantics - Performs various checks on the name of an X86
-///   instruction to determine what sort of an instruction it is and then adds
-///   the appropriate flags to the instruction and its operands
-///
-/// \param instType     A reference to the type for the instruction as a whole
-/// \param operandFlags A reference to the array of operand flag object pointers
-/// \param inst         A reference to the original instruction
-static void X86ExtractSemantics(
-  LiteralConstantEmitter &instType,
-  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
-  const CodeGenInstruction &inst) {
-  const std::string &name = inst.TheDef->getName();
-
-  if (name.find("MOV") != name.npos) {
-    if (name.find("MOV_V") != name.npos) {
-      // ignore (this is a pseudoinstruction)
-    } else if (name.find("MASK") != name.npos) {
-      // ignore (this is a masking move)
-    } else if (name.find("r0") != name.npos) {
-      // ignore (this is a pseudoinstruction)
-    } else if (name.find("PS") != name.npos ||
-             name.find("PD") != name.npos) {
-      // ignore (this is a shuffling move)
-    } else if (name.find("MOVS") != name.npos) {
-      // ignore (this is a string move)
-    } else if (name.find("_F") != name.npos) {
-      // TODO handle _F moves to ST(0)
-    } else if (name.find("a") != name.npos) {
-      // TODO handle moves to/from %ax
-    } else if (name.find("CMOV") != name.npos) {
-      MOV("src2", "dst");
-    } else if (name.find("PC") != name.npos) {
-      MOV("label", "reg")
-    } else {
-      MOV("src", "dst");
-    }
-  }
-
-  if (name.find("JMP") != name.npos ||
-      name.find("J") == 0) {
-    if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
-      BRANCH("off");
-    } else {
-      BRANCH("dst");
-    }
-  }
-
-  if (name.find("PUSH") != name.npos) {
-    if (name.find("CS") != name.npos ||
-        name.find("DS") != name.npos ||
-        name.find("ES") != name.npos ||
-        name.find("FS") != name.npos ||
-        name.find("GS") != name.npos ||
-        name.find("SS") != name.npos) {
-      instType.set("kInstructionTypePush");
-      // TODO add support for fixed operands
-    } else if (name.find("F") != name.npos) {
-      // ignore (this pushes onto the FP stack)
-    } else if (name.find("A") != name.npos) {
-      // ignore (pushes all GP registoers onto the stack)
-    } else if (name[name.length() - 1] == 'm') {
-      PUSH("src");
-    } else if (name.find("i") != name.npos) {
-      PUSH("imm");
-    } else {
-      PUSH("reg");
-    }
-  }
-
-  if (name.find("POP") != name.npos) {
-    if (name.find("POPCNT") != name.npos) {
-      // ignore (not a real pop)
-    } else if (name.find("CS") != name.npos ||
-               name.find("DS") != name.npos ||
-               name.find("ES") != name.npos ||
-               name.find("FS") != name.npos ||
-               name.find("GS") != name.npos ||
-               name.find("SS") != name.npos) {
-      instType.set("kInstructionTypePop");
-      // TODO add support for fixed operands
-    } else if (name.find("F") != name.npos) {
-      // ignore (this pops from the FP stack)
-    } else if (name.find("A") != name.npos) {
-      // ignore (pushes all GP registoers onto the stack)
-    } else if (name[name.length() - 1] == 'm') {
-      POP("dst");
-    } else {
-      POP("reg");
-    }
-  }
-
-  if (name.find("CALL") != name.npos) {
-    if (name.find("ADJ") != name.npos) {
-      // ignore (not a call)
-    } else if (name.find("SYSCALL") != name.npos) {
-      // ignore (doesn't go anywhere we know about)
-    } else if (name.find("VMCALL") != name.npos) {
-      // ignore (rather different semantics than a regular call)
-    } else if (name.find("VMMCALL") != name.npos) {
-      // ignore (rather different semantics than a regular call)
-    } else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
-      CALL("off");
-    } else {
-      CALL("dst");
-    }
-  }
-
-  if (name.find("RET") != name.npos) {
-    RETURN();
-  }
-}
-
-#undef MOV
-#undef BRANCH
-#undef PUSH
-#undef POP
-#undef CALL
-#undef RETURN
-
-/////////////////////////////////////////////////////
-// Support functions for handling ARM instructions //
-/////////////////////////////////////////////////////
-
-#define SET(flag) { type->set(flag); return 0; }
-
-#define REG(str)    if (name == str) SET("kOperandTypeRegister");
-#define IMM(str)    if (name == str) SET("kOperandTypeImmediate");
-
-#define MISC(str, type)   if (name == str) SET(type);
-
-/// ARMFlagFromOpName - Processes the name of a single ARM operand (which is
-///   actually its type) and translates it into an operand type
-///
-/// \param type The type object to set
-/// \param name The name of the operand
-static int ARMFlagFromOpName(LiteralConstantEmitter *type,
-                             const std::string &name) {
-  REG("GPR");
-  REG("rGPR");
-  REG("GPRnopc");
-  REG("GPRsp");
-  REG("tcGPR");
-  REG("cc_out");
-  REG("s_cc_out");
-  REG("tGPR");
-  REG("DPR");
-  REG("DPR_VFP2");
-  REG("DPR_8");
-  REG("DPair");
-  REG("SPR");
-  REG("QPR");
-  REG("QQPR");
-  REG("QQQQPR");
-  REG("VecListOneD");
-  REG("VecListDPair");
-  REG("VecListDPairSpaced");
-  REG("VecListThreeD");
-  REG("VecListFourD");
-  REG("VecListOneDAllLanes");
-  REG("VecListDPairAllLanes");
-  REG("VecListDPairSpacedAllLanes");
-
-  IMM("i32imm");
-  IMM("fbits16");
-  IMM("fbits32");
-  IMM("i32imm_hilo16");
-  IMM("bf_inv_mask_imm");
-  IMM("lsb_pos_imm");
-  IMM("width_imm");
-  IMM("jtblock_operand");
-  IMM("nohash_imm");
-  IMM("p_imm");
-  IMM("pf_imm");
-  IMM("c_imm");
-  IMM("coproc_option_imm");
-  IMM("imod_op");
-  IMM("iflags_op");
-  IMM("cpinst_operand");
-  IMM("setend_op");
-  IMM("cps_opt");
-  IMM("vfp_f64imm");
-  IMM("vfp_f32imm");
-  IMM("memb_opt");
-  IMM("msr_mask");
-  IMM("neg_zero");
-  IMM("imm0_31");
-  IMM("imm0_31_m1");
-  IMM("imm1_16");
-  IMM("imm1_32");
-  IMM("nModImm");
-  IMM("nImmSplatI8");
-  IMM("nImmSplatI16");
-  IMM("nImmSplatI32");
-  IMM("nImmSplatI64");
-  IMM("nImmVMOVI32");
-  IMM("nImmVMOVF32");
-  IMM("imm8");
-  IMM("imm16");
-  IMM("imm32");
-  IMM("imm1_7");
-  IMM("imm1_15");
-  IMM("imm1_31");
-  IMM("imm0_1");
-  IMM("imm0_3");
-  IMM("imm0_7");
-  IMM("imm0_15");
-  IMM("imm0_255");
-  IMM("imm0_4095");
-  IMM("imm0_65535");
-  IMM("imm0_65535_expr");
-  IMM("imm24b");
-  IMM("pkh_lsl_amt");
-  IMM("pkh_asr_amt");
-  IMM("jt2block_operand");
-  IMM("t_imm0_1020s4");
-  IMM("t_imm0_508s4");
-  IMM("pclabel");
-  IMM("adrlabel");
-  IMM("t_adrlabel");
-  IMM("t2adrlabel");
-  IMM("shift_imm");
-  IMM("t2_shift_imm");
-  IMM("neon_vcvt_imm32");
-  IMM("shr_imm8");
-  IMM("shr_imm16");
-  IMM("shr_imm32");
-  IMM("shr_imm64");
-  IMM("t2ldrlabel");
-  IMM("postidx_imm8");
-  IMM("postidx_imm8s4");
-  IMM("imm_sr");
-  IMM("imm1_31");
-  IMM("VectorIndex8");
-  IMM("VectorIndex16");
-  IMM("VectorIndex32");
-
-  MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
-  MISC("uncondbrtarget", "kOperandTypeARMBranchTarget");           // ?
-  MISC("t_brtarget", "kOperandTypeARMBranchTarget");              // ?
-  MISC("t_bcctarget", "kOperandTypeARMBranchTarget");             // ?
-  MISC("t_cbtarget", "kOperandTypeARMBranchTarget");              // ?
-  MISC("bltarget", "kOperandTypeARMBranchTarget");                // ?
-
-  MISC("br_target", "kOperandTypeARMBranchTarget");                // ?
-  MISC("bl_target", "kOperandTypeARMBranchTarget");                // ?
-  MISC("blx_target", "kOperandTypeARMBranchTarget");                // ?
-
-  MISC("t_bltarget", "kOperandTypeARMBranchTarget");              // ?
-  MISC("t_blxtarget", "kOperandTypeARMBranchTarget");             // ?
-  MISC("so_reg_imm", "kOperandTypeARMSoRegReg");                         // R, R, I
-  MISC("so_reg_reg", "kOperandTypeARMSoRegImm");                         // R, R, I
-  MISC("shift_so_reg_reg", "kOperandTypeARMSoRegReg");                   // R, R, I
-  MISC("shift_so_reg_imm", "kOperandTypeARMSoRegImm");                   // R, R, I
-  MISC("t2_so_reg", "kOperandTypeThumb2SoReg");                   // R, I
-  MISC("so_imm", "kOperandTypeARMSoImm");                         // I
-  MISC("rot_imm", "kOperandTypeARMRotImm");                       // I
-  MISC("t2_so_imm", "kOperandTypeThumb2SoImm");                   // I
-  MISC("so_imm2part", "kOperandTypeARMSoImm2Part");               // I
-  MISC("pred", "kOperandTypeARMPredicate");                       // I, R
-  MISC("it_pred", "kOperandTypeARMPredicate");                    // I
-  MISC("addrmode_imm12", "kOperandTypeAddrModeImm12");            // R, I
-  MISC("ldst_so_reg", "kOperandTypeLdStSOReg");                   // R, R, I
-  MISC("postidx_reg", "kOperandTypeARMAddrMode3Offset");          // R, I
-  MISC("addrmode2", "kOperandTypeARMAddrMode2");                  // R, R, I
-  MISC("am2offset_reg", "kOperandTypeARMAddrMode2Offset");        // R, I
-  MISC("am2offset_imm", "kOperandTypeARMAddrMode2Offset");        // R, I
-  MISC("addrmode3", "kOperandTypeARMAddrMode3");                  // R, R, I
-  MISC("am3offset", "kOperandTypeARMAddrMode3Offset");            // R, I
-  MISC("ldstm_mode", "kOperandTypeARMLdStmMode");                 // I
-  MISC("addrmode5", "kOperandTypeARMAddrMode5");                  // R, I
-  MISC("addrmode6", "kOperandTypeARMAddrMode6");                  // R, R, I, I
-  MISC("am6offset", "kOperandTypeARMAddrMode6Offset");            // R, I, I
-  MISC("addrmode6dup", "kOperandTypeARMAddrMode6");               // R, R, I, I
-  MISC("addrmode6oneL32", "kOperandTypeARMAddrMode6");            // R, R, I, I
-  MISC("addrmodepc", "kOperandTypeARMAddrModePC");                // R, I
-  MISC("addr_offset_none", "kOperandTypeARMAddrMode7");           // R
-  MISC("reglist", "kOperandTypeARMRegisterList");                 // I, R, ...
-  MISC("dpr_reglist", "kOperandTypeARMDPRRegisterList");          // I, R, ...
-  MISC("spr_reglist", "kOperandTypeARMSPRRegisterList");          // I, R, ...
-  MISC("it_mask", "kOperandTypeThumbITMask");                     // I
-  MISC("t2addrmode_reg", "kOperandTypeThumb2AddrModeReg");        // R
-  MISC("t2addrmode_posimm8", "kOperandTypeThumb2AddrModeImm8");   // R, I
-  MISC("t2addrmode_negimm8", "kOperandTypeThumb2AddrModeImm8");   // R, I
-  MISC("t2addrmode_imm8", "kOperandTypeThumb2AddrModeImm8");      // R, I
-  MISC("t2am_imm8_offset", "kOperandTypeThumb2AddrModeImm8Offset");//I
-  MISC("t2addrmode_imm12", "kOperandTypeThumb2AddrModeImm12");    // R, I
-  MISC("t2addrmode_so_reg", "kOperandTypeThumb2AddrModeSoReg");   // R, R, I
-  MISC("t2addrmode_imm8s4", "kOperandTypeThumb2AddrModeImm8s4");  // R, I
-  MISC("t2addrmode_imm0_1020s4", "kOperandTypeThumb2AddrModeImm8s4");  // R, I
-  MISC("t2am_imm8s4_offset", "kOperandTypeThumb2AddrModeImm8s4Offset");
-                                                                  // R, I
-  MISC("tb_addrmode", "kOperandTypeARMTBAddrMode");               // I
-  MISC("t_addrmode_rrs1", "kOperandTypeThumbAddrModeRegS1");      // R, R
-  MISC("t_addrmode_rrs2", "kOperandTypeThumbAddrModeRegS2");      // R, R
-  MISC("t_addrmode_rrs4", "kOperandTypeThumbAddrModeRegS4");      // R, R
-  MISC("t_addrmode_is1", "kOperandTypeThumbAddrModeImmS1");       // R, I
-  MISC("t_addrmode_is2", "kOperandTypeThumbAddrModeImmS2");       // R, I
-  MISC("t_addrmode_is4", "kOperandTypeThumbAddrModeImmS4");       // R, I
-  MISC("t_addrmode_rr", "kOperandTypeThumbAddrModeRR");           // R, R
-  MISC("t_addrmode_sp", "kOperandTypeThumbAddrModeSP");           // R, I
-  MISC("t_addrmode_pc", "kOperandTypeThumbAddrModePC");           // R, I
-  MISC("addrmode_tbb", "kOperandTypeThumbAddrModeRR");            // R, R
-  MISC("addrmode_tbh", "kOperandTypeThumbAddrModeRR");            // R, R
-
-  return 1;
-}
-
-#undef REG
-#undef MEM
-#undef MISC
-
-#undef SET
-
-/// ARMPopulateOperands - Handles all the operands in an ARM instruction, adding
-///   the appropriate flags to their descriptors
-///
-/// \param operandTypes A reference the array of operand type objects
-/// \param inst         The instruction to use as a source of information
-static void ARMPopulateOperands(
-  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
-  const CodeGenInstruction &inst) {
-  if (!inst.TheDef->isSubClassOf("InstARM") &&
-      !inst.TheDef->isSubClassOf("InstThumb"))
-    return;
-
-  unsigned int index;
-  unsigned int numOperands = inst.Operands.size();
-
-  if (numOperands > EDIS_MAX_OPERANDS) {
-    errs() << "numOperands == " << numOperands << " > " <<
-      EDIS_MAX_OPERANDS << '\n';
-    llvm_unreachable("Too many operands");
-  }
-
-  for (index = 0; index < numOperands; ++index) {
-    const CGIOperandList::OperandInfo &operandInfo = inst.Operands[index];
-    Record &rec = *operandInfo.Rec;
-
-    if (ARMFlagFromOpName(operandTypes[index], rec.getName())) {
-      errs() << "Operand type: " << rec.getName() << '\n';
-      errs() << "Operand name: " << operandInfo.Name << '\n';
-      errs() << "Instruction name: " << inst.TheDef->getName() << '\n';
-      PrintFatalError("Unhandled type in EDEmitter");
-    }
-  }
-}
-
-#define BRANCH(target) {                    \
-  instType.set("kInstructionTypeBranch");   \
-  DECORATE1(target, "kOperandFlagTarget");  \
-}
-
-/// ARMExtractSemantics - Performs various checks on the name of an ARM
-///   instruction to determine what sort of an instruction it is and then adds
-///   the appropriate flags to the instruction and its operands
-///
-/// \param instType     A reference to the type for the instruction as a whole
-/// \param operandTypes A reference to the array of operand type object pointers
-/// \param operandFlags A reference to the array of operand flag object pointers
-/// \param inst         A reference to the original instruction
-static void ARMExtractSemantics(
-  LiteralConstantEmitter &instType,
-  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
-  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
-  const CodeGenInstruction &inst) {
-  const std::string &name = inst.TheDef->getName();
-
-  if (name == "tBcc"   ||
-      name == "tB"     ||
-      name == "t2Bcc"  ||
-      name == "Bcc"    ||
-      name == "tCBZ"   ||
-      name == "tCBNZ") {
-    BRANCH("target");
-  }
-
-  if (name == "tBLr9"      ||
-      name == "BLr9_pred"  ||
-      name == "tBLXi_r9"   ||
-      name == "tBLXr_r9"   ||
-      name == "BLXr9"      ||
-      name == "t2BXJ"      ||
-      name == "BXJ") {
-    BRANCH("func");
-
-    unsigned opIndex;
-    opIndex = inst.Operands.getOperandNamed("func");
-    if (operandTypes[opIndex]->is("kOperandTypeImmediate"))
-      operandTypes[opIndex]->set("kOperandTypeARMBranchTarget");
-  }
-}
-
-#undef BRANCH
-
-/// populateInstInfo - Fills an array of InstInfos with information about each
-///   instruction in a target
-///
-/// \param infoArray The array of InstInfo objects to populate
-/// \param target    The CodeGenTarget to use as a source of instructions
-static void populateInstInfo(CompoundConstantEmitter &infoArray,
-                             CodeGenTarget &target) {
-  const std::vector<const CodeGenInstruction*> &numberedInstructions =
-    target.getInstructionsByEnumValue();
-
-  unsigned int index;
-  unsigned int numInstructions = numberedInstructions.size();
-
-  for (index = 0; index < numInstructions; ++index) {
-    const CodeGenInstruction& inst = *numberedInstructions[index];
-
-    CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
-    infoArray.addEntry(infoStruct);
-
-    LiteralConstantEmitter *instType = new LiteralConstantEmitter;
-    infoStruct->addEntry(instType);
-
-    LiteralConstantEmitter *numOperandsEmitter =
-      new LiteralConstantEmitter(inst.Operands.size());
-    infoStruct->addEntry(numOperandsEmitter);
-
-    CompoundConstantEmitter *operandTypeArray = new CompoundConstantEmitter;
-    infoStruct->addEntry(operandTypeArray);
-
-    LiteralConstantEmitter *operandTypes[EDIS_MAX_OPERANDS];
-
-    CompoundConstantEmitter *operandFlagArray = new CompoundConstantEmitter;
-    infoStruct->addEntry(operandFlagArray);
-
-    FlagsConstantEmitter *operandFlags[EDIS_MAX_OPERANDS];
-
-    for (unsigned operandIndex = 0;
-         operandIndex < EDIS_MAX_OPERANDS;
-         ++operandIndex) {
-      operandTypes[operandIndex] = new LiteralConstantEmitter;
-      operandTypeArray->addEntry(operandTypes[operandIndex]);
-
-      operandFlags[operandIndex] = new FlagsConstantEmitter;
-      operandFlagArray->addEntry(operandFlags[operandIndex]);
-    }
-
-    unsigned numSyntaxes = 0;
-
-    // We don't need to do anything for pseudo-instructions, as we'll never
-    // see them here. We'll only see real instructions.
-    // We still need to emit null initializers for everything.
-    if (!inst.isPseudo) {
-      if (target.getName() == "X86") {
-        X86PopulateOperands(operandTypes, inst);
-        X86ExtractSemantics(*instType, operandFlags, inst);
-        numSyntaxes = 2;
-      }
-      else if (target.getName() == "ARM") {
-        ARMPopulateOperands(operandTypes, inst);
-        ARMExtractSemantics(*instType, operandTypes, operandFlags, inst);
-        numSyntaxes = 1;
-      }
-    }
-
-    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;
-
-    infoStruct->addEntry(operandOrderArray);
-
-    for (unsigned syntaxIndex = 0;
-         syntaxIndex < EDIS_MAX_SYNTAXES;
-         ++syntaxIndex) {
-      CompoundConstantEmitter *operandOrder =
-        new CompoundConstantEmitter(EDIS_MAX_OPERANDS);
-
-      operandOrderArray->addEntry(operandOrder);
-
-      if (syntaxIndex < numSyntaxes) {
-        populateOperandOrder(operandOrder, inst, syntaxIndex);
-      }
-    }
-
-    infoStruct = NULL;
-  }
-}
-
-static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
-  EnumEmitter operandTypes("OperandTypes");
-  operandTypes.addEntry("kOperandTypeNone");
-  operandTypes.addEntry("kOperandTypeImmediate");
-  operandTypes.addEntry("kOperandTypeRegister");
-  operandTypes.addEntry("kOperandTypeX86Memory");
-  operandTypes.addEntry("kOperandTypeX86EffectiveAddress");
-  operandTypes.addEntry("kOperandTypeX86PCRelative");
-  operandTypes.addEntry("kOperandTypeARMBranchTarget");
-  operandTypes.addEntry("kOperandTypeARMSoRegReg");
-  operandTypes.addEntry("kOperandTypeARMSoRegImm");
-  operandTypes.addEntry("kOperandTypeARMSoImm");
-  operandTypes.addEntry("kOperandTypeARMRotImm");
-  operandTypes.addEntry("kOperandTypeARMSoImm2Part");
-  operandTypes.addEntry("kOperandTypeARMPredicate");
-  operandTypes.addEntry("kOperandTypeAddrModeImm12");
-  operandTypes.addEntry("kOperandTypeLdStSOReg");
-  operandTypes.addEntry("kOperandTypeARMAddrMode2");
-  operandTypes.addEntry("kOperandTypeARMAddrMode2Offset");
-  operandTypes.addEntry("kOperandTypeARMAddrMode3");
-  operandTypes.addEntry("kOperandTypeARMAddrMode3Offset");
-  operandTypes.addEntry("kOperandTypeARMLdStmMode");
-  operandTypes.addEntry("kOperandTypeARMAddrMode5");
-  operandTypes.addEntry("kOperandTypeARMAddrMode6");
-  operandTypes.addEntry("kOperandTypeARMAddrMode6Offset");
-  operandTypes.addEntry("kOperandTypeARMAddrMode7");
-  operandTypes.addEntry("kOperandTypeARMAddrModePC");
-  operandTypes.addEntry("kOperandTypeARMRegisterList");
-  operandTypes.addEntry("kOperandTypeARMDPRRegisterList");
-  operandTypes.addEntry("kOperandTypeARMSPRRegisterList");
-  operandTypes.addEntry("kOperandTypeARMTBAddrMode");
-  operandTypes.addEntry("kOperandTypeThumbITMask");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS1");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS2");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS4");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS1");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS2");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS4");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeRR");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeSP");
-  operandTypes.addEntry("kOperandTypeThumbAddrModePC");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeReg");
-  operandTypes.addEntry("kOperandTypeThumb2SoReg");
-  operandTypes.addEntry("kOperandTypeThumb2SoImm");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8Offset");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm12");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeSoReg");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4");
-  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4Offset");
-  operandTypes.emit(o, i);
-
-  o << "\n";
-
-  EnumEmitter operandFlags("OperandFlags");
-  operandFlags.addEntry("kOperandFlagSource");
-  operandFlags.addEntry("kOperandFlagTarget");
-  operandFlags.emitAsFlags(o, i);
-
-  o << "\n";
-
-  EnumEmitter instructionTypes("InstructionTypes");
-  instructionTypes.addEntry("kInstructionTypeNone");
-  instructionTypes.addEntry("kInstructionTypeMove");
-  instructionTypes.addEntry("kInstructionTypeBranch");
-  instructionTypes.addEntry("kInstructionTypePush");
-  instructionTypes.addEntry("kInstructionTypePop");
-  instructionTypes.addEntry("kInstructionTypeCall");
-  instructionTypes.addEntry("kInstructionTypeReturn");
-  instructionTypes.emit(o, i);
-
-  o << "\n";
-}
-
-namespace llvm {
-
-void EmitEnhancedDisassemblerInfo(RecordKeeper &RK, raw_ostream &OS) {
-  emitSourceFileHeader("Enhanced Disassembler Info", OS);
-  unsigned int i = 0;
-
-  CompoundConstantEmitter infoArray;
-  CodeGenTarget target(RK);
-
-  populateInstInfo(infoArray, target);
-
-  emitCommonEnums(OS, i);
-
-  OS << "static const llvm::EDInstInfo instInfo"
-     << target.getName() << "[] = ";
-  infoArray.emit(OS, i);
-  OS << ";" << "\n";
-}
-
-} // End llvm namespace
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 5cabcadabdbc..0c3017f38920 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -15,8 +15,6 @@
 #define DEBUG_TYPE "decoder-emitter"
 
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -28,11 +26,12 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-
-#include <vector>
 #include <map>
 #include <string>
+#include <vector>
 
 using namespace llvm;
 
@@ -1867,7 +1866,7 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
      << "    if (numBits == sizeof(InsnType)*8)\n"
      << "      fieldMask = (InsnType)(-1LL);\n"
      << "    else\n"
-     << "      fieldMask = ((1 << numBits) - 1) << startBit;\n"
+     << "      fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n"
      << "    return (insn & fieldMask) >> startBit;\n"
      << "}\n\n";
 }
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 48d41d7b96bd..d6020a8461d2 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -16,8 +16,8 @@
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
-#include "TableGenBackends.h"
 #include "SequenceToOffsetTable.h"
+#include "TableGenBackends.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -271,7 +271,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   std::string ClassName = TargetName + "GenInstrInfo";
   OS << "namespace llvm {\n";
-  OS << "struct " << ClassName << " : public TargetInstrInfoImpl {\n"
+  OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
      << "  explicit " << ClassName << "(int SO = -1, int DO = -1);\n"
      << "};\n";
   OS << "} // End llvm namespace \n";
@@ -286,7 +286,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n";
   OS << "extern const char " << TargetName << "InstrNameData[];\n";
   OS << ClassName << "::" << ClassName << "(int SO, int DO)\n"
-     << "  : TargetInstrInfoImpl(SO, DO) {\n"
+     << "  : TargetInstrInfo(SO, DO) {\n"
      << "  InitMCInstrInfo(" << TargetName << "Insts, "
      << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
      << NumberedInstructions.size() << ");\n}\n";
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index fe55242930b1..df4d847a4d7f 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -221,27 +221,28 @@ enum IIT_Info {
   IIT_I16  = 3,
   IIT_I32  = 4,
   IIT_I64  = 5,
-  IIT_F32  = 6,
-  IIT_F64  = 7,
-  IIT_V2   = 8,
-  IIT_V4   = 9,
-  IIT_V8   = 10,
-  IIT_V16  = 11,
-  IIT_V32  = 12,
-  IIT_MMX  = 13,
+  IIT_F16  = 6,
+  IIT_F32  = 7,
+  IIT_F64  = 8,
+  IIT_V2   = 9,
+  IIT_V4   = 10,
+  IIT_V8   = 11,
+  IIT_V16  = 12,
+  IIT_V32  = 13,
   IIT_PTR  = 14,
   IIT_ARG  = 15,
-  
+
   // Values from 16+ are only encodable with the inefficient encoding.
-  IIT_METADATA = 16,
-  IIT_EMPTYSTRUCT = 17,
-  IIT_STRUCT2 = 18,
-  IIT_STRUCT3 = 19,
-  IIT_STRUCT4 = 20,
-  IIT_STRUCT5 = 21,
-  IIT_EXTEND_VEC_ARG = 22,
-  IIT_TRUNC_VEC_ARG = 23,
-  IIT_ANYPTR = 24
+  IIT_MMX  = 16,
+  IIT_METADATA = 17,
+  IIT_EMPTYSTRUCT = 18,
+  IIT_STRUCT2 = 19,
+  IIT_STRUCT3 = 20,
+  IIT_STRUCT4 = 21,
+  IIT_STRUCT5 = 22,
+  IIT_EXTEND_VEC_ARG = 23,
+  IIT_TRUNC_VEC_ARG = 24,
+  IIT_ANYPTR = 25
 };
 
 
@@ -261,6 +262,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
   
   switch (VT) {
   default: PrintFatalError("unhandled MVT in intrinsic!");
+  case MVT::f16: return Sig.push_back(IIT_F16);
   case MVT::f32: return Sig.push_back(IIT_F32);
   case MVT::f64: return Sig.push_back(IIT_F64);
   case MVT::Metadata: return Sig.push_back(IIT_METADATA);
@@ -511,10 +513,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   OS << "// Add parameter attributes that are not common to all intrinsics.\n";
   OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
   if (TargetOnly)
-    OS << "static AttrListPtr getAttributes(LLVMContext &C, " << TargetPrefix
+    OS << "static AttributeSet getAttributes(LLVMContext &C, " << TargetPrefix
        << "Intrinsic::ID id) {\n";
   else
-    OS << "AttrListPtr Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
+    OS << "AttributeSet Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
 
   // Compute the maximum number of attribute arguments and the map
   typedef std::map<const CodeGenIntrinsic*, unsigned,
@@ -532,9 +534,8 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
     N = ++AttrNum;
   }
 
-  // Emit an array of AttributeWithIndex.  Most intrinsics will have
-  // at least one entry, for the function itself (index ~1), which is
-  // usually nounwind.
+  // Emit an array of AttributeSet.  Most intrinsics will have at least one
+  // entry, for the function itself (index ~1), which is usually nounwind.
   OS << "  static const uint8_t IntrinsicsToAttributesMap[] = {\n";
 
   for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
@@ -545,10 +546,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   }
   OS << "  };\n\n";
 
-  OS << "  AttributeWithIndex AWI[" << maxArgAttrs+1 << "];\n";
+  OS << "  AttributeSet AS[" << maxArgAttrs+1 << "];\n";
   OS << "  unsigned NumAttrs = 0;\n";
   OS << "  if (id != 0) {\n";
-  OS << "    SmallVector<Attributes::AttrVal, 8> AttrVec;\n";
+  OS << "    SmallVector<Attribute::AttrKind, 8> AttrVec;\n";
   OS << "    switch(IntrinsicsToAttributesMap[id - ";
   if (TargetOnly)
     OS << "Intrinsic::num_intrinsics";
@@ -576,14 +577,14 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
         do {
           switch (intrinsic.ArgumentAttributes[ai].second) {
           case CodeGenIntrinsic::NoCapture:
-            OS << "      AttrVec.push_back(Attributes::NoCapture);\n";
+            OS << "      AttrVec.push_back(Attribute::NoCapture);\n";
             break;
           }
 
           ++ai;
         } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo);
 
-        OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
+        OS << "      AS[" << numAttrs++ << "] = AttributeSet::get(C, "
            << argNo+1 << ", AttrVec);\n";
       }
     }
@@ -594,34 +595,34 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
       OS << "      AttrVec.clear();\n";
 
       if (!intrinsic.canThrow)
-        OS << "      AttrVec.push_back(Attributes::NoUnwind);\n";
+        OS << "      AttrVec.push_back(Attribute::NoUnwind);\n";
       if (intrinsic.isNoReturn)
-        OS << "      AttrVec.push_back(Attributes::NoReturn);\n";
+        OS << "      AttrVec.push_back(Attribute::NoReturn);\n";
 
       switch (modRef) {
       case MRK_none: break;
       case MRK_readonly:
-        OS << "      AttrVec.push_back(Attributes::ReadOnly);\n";
+        OS << "      AttrVec.push_back(Attribute::ReadOnly);\n";
         break;
       case MRK_readnone:
-        OS << "      AttrVec.push_back(Attributes::ReadNone);\n"; 
+        OS << "      AttrVec.push_back(Attribute::ReadNone);\n"; 
         break;
       }
-      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
-         << "AttrListPtr::FunctionIndex, AttrVec);\n";
+      OS << "      AS[" << numAttrs++ << "] = AttributeSet::get(C, "
+         << "AttributeSet::FunctionIndex, AttrVec);\n";
     }
 
     if (numAttrs) {
       OS << "      NumAttrs = " << numAttrs << ";\n";
       OS << "      break;\n";
     } else {
-      OS << "      return AttrListPtr();\n";
+      OS << "      return AttributeSet();\n";
     }
   }
   
   OS << "    }\n";
   OS << "  }\n";
-  OS << "  return AttrListPtr::get(C, ArrayRef<AttributeWithIndex>(AWI, "
+  OS << "  return AttributeSet::get(C, ArrayRef<AttributeSet>(AS, "
              "NumAttrs));\n";
   OS << "}\n";
   OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
new file mode 100644
index 000000000000..0c1f6236e059
--- /dev/null
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -0,0 +1,266 @@
+//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Error.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <map>
+
+using namespace llvm;
+
+static int StrCmpOptionName(const char *A, const char *B) {
+  char a = *A, b = *B;
+  while (a == b) {
+    if (a == '\0')
+      return 0;
+
+    a = *++A;
+    b = *++B;
+  }
+
+  if (a == '\0') // A is a prefix of B.
+    return 1;
+  if (b == '\0') // B is a prefix of A.
+    return -1;
+
+  // Otherwise lexicographic.
+  return (a < b) ? -1 : 1;
+}
+
+static int CompareOptionRecords(const void *Av, const void *Bv) {
+  const Record *A = *(const Record*const*) Av;
+  const Record *B = *(const Record*const*) Bv;
+
+  // Sentinel options precede all others and are only ordered by precedence.
+  bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  if (ASent != BSent)
+    return ASent ? -1 : 1;
+
+  // Compare options by name, unless they are sentinels.
+  if (!ASent)
+    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").c_str(),
+                                   B->getValueAsString("Name").c_str()))
+    return Cmp;
+
+  if (!ASent) {
+    std::vector<std::string> APrefixes = A->getValueAsListOfStrings("Prefixes");
+    std::vector<std::string> BPrefixes = B->getValueAsListOfStrings("Prefixes");
+
+    for (std::vector<std::string>::const_iterator APre = APrefixes.begin(),
+                                                  AEPre = APrefixes.end(),
+                                                  BPre = BPrefixes.begin(),
+                                                  BEPre = BPrefixes.end();
+                                                  APre != AEPre &&
+                                                  BPre != BEPre;
+                                                  ++APre, ++BPre) {
+      if (int Cmp = StrCmpOptionName(APre->c_str(), BPre->c_str()))
+        return Cmp;
+    }
+  }
+
+  // Then by the kind precedence;
+  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  if (APrec == BPrec &&
+      A->getValueAsListOfStrings("Prefixes") ==
+      B->getValueAsListOfStrings("Prefixes")) {
+    PrintError(A->getLoc(), Twine("Option is equivilent to"));
+    PrintError(B->getLoc(), Twine("Other defined here"));
+    PrintFatalError("Equivalent Options found.");
+  }
+  return APrec < BPrec ? -1 : 1;
+}
+
+static const std::string getOptionName(const Record &R) {
+  // Use the record name unless EnumName is defined.
+  if (isa<UnsetInit>(R.getValueInit("EnumName")))
+    return R.getName();
+
+  return R.getValueAsString("EnumName");
+}
+
+static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) {
+  OS << '"';
+  OS.write_escaped(Str);
+  OS << '"';
+  return OS;
+}
+
+/// OptParserEmitter - This tablegen backend takes an input .td file
+/// describing a list of options and emits a data structure for parsing and
+/// working with those options when given an input command line.
+namespace llvm {
+void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
+  // Get the option groups and options.
+  const std::vector<Record*> &Groups =
+    Records.getAllDerivedDefinitions("OptionGroup");
+  std::vector<Record*> Opts = Records.getAllDerivedDefinitions("Option");
+
+  emitSourceFileHeader("Option Parsing Definitions", OS);
+
+  array_pod_sort(Opts.begin(), Opts.end(), CompareOptionRecords);
+  // Generate prefix groups.
+  typedef SmallVector<SmallString<2>, 2> PrefixKeyT;
+  typedef std::map<PrefixKeyT, std::string> PrefixesT;
+  PrefixesT Prefixes;
+  Prefixes.insert(std::make_pair(PrefixKeyT(), "prefix_0"));
+  unsigned CurPrefix = 0;
+  for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+    const Record &R = *Opts[i];
+    std::vector<std::string> prf = R.getValueAsListOfStrings("Prefixes");
+    PrefixKeyT prfkey(prf.begin(), prf.end());
+    unsigned NewPrefix = CurPrefix + 1;
+    if (Prefixes.insert(std::make_pair(prfkey, (Twine("prefix_") +
+                                              Twine(NewPrefix)).str())).second)
+      CurPrefix = NewPrefix;
+  }
+
+  // Dump prefixes.
+
+  OS << "/////////\n";
+  OS << "// Prefixes\n\n";
+  OS << "#ifdef PREFIX\n";
+  OS << "#define COMMA ,\n";
+  for (PrefixesT::const_iterator I = Prefixes.begin(), E = Prefixes.end();
+                                  I != E; ++I) {
+    OS << "PREFIX(";
+
+    // Prefix name.
+    OS << I->second;
+
+    // Prefix values.
+    OS << ", {";
+    for (PrefixKeyT::const_iterator PI = I->first.begin(),
+                                    PE = I->first.end(); PI != PE; ++PI) {
+      OS << "\"" << *PI << "\" COMMA ";
+    }
+    OS << "0})\n";
+  }
+  OS << "#undef COMMA\n";
+  OS << "#endif\n\n";
+
+  OS << "/////////\n";
+  OS << "// Groups\n\n";
+  OS << "#ifdef OPTION\n";
+  for (unsigned i = 0, e = Groups.size(); i != e; ++i) {
+    const Record &R = *Groups[i];
+
+    // Start a single option entry.
+    OS << "OPTION(";
+
+    // The option prefix;
+    OS << "0";
+
+    // The option string.
+    OS << ", \"" << R.getValueAsString("Name") << '"';
+
+    // The option identifier name.
+    OS  << ", "<< getOptionName(R);
+
+    // The option kind.
+    OS << ", Group";
+
+    // The containing option group (if any).
+    OS << ", ";
+    if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Group")))
+      OS << getOptionName(*DI->getDef());
+    else
+      OS << "INVALID";
+
+    // The other option arguments (unused for groups).
+    OS << ", INVALID, 0, 0";
+
+    // The option help text.
+    if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
+      OS << ",\n";
+      OS << "       ";
+      write_cstring(OS, R.getValueAsString("HelpText"));
+    } else
+      OS << ", 0";
+
+    // The option meta-variable name (unused).
+    OS << ", 0)\n";
+  }
+  OS << "\n";
+
+  OS << "//////////\n";
+  OS << "// Options\n\n";
+  for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+    const Record &R = *Opts[i];
+
+    // Start a single option entry.
+    OS << "OPTION(";
+
+    // The option prefix;
+    std::vector<std::string> prf = R.getValueAsListOfStrings("Prefixes");
+    OS << Prefixes[PrefixKeyT(prf.begin(), prf.end())] << ", ";
+
+    // The option string.
+    write_cstring(OS, R.getValueAsString("Name"));
+
+    // The option identifier name.
+    OS  << ", "<< getOptionName(R);
+
+    // The option kind.
+    OS << ", " << R.getValueAsDef("Kind")->getValueAsString("Name");
+
+    // The containing option group (if any).
+    OS << ", ";
+    if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Group")))
+      OS << getOptionName(*DI->getDef());
+    else
+      OS << "INVALID";
+
+    // The option alias (if any).
+    OS << ", ";
+    if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Alias")))
+      OS << getOptionName(*DI->getDef());
+    else
+      OS << "INVALID";
+
+    // The option flags.
+    const ListInit *LI = R.getValueAsListInit("Flags");
+    if (LI->empty()) {
+      OS << ", 0";
+    } else {
+      OS << ", ";
+      for (unsigned i = 0, e = LI->size(); i != e; ++i) {
+        if (i)
+          OS << " | ";
+        OS << cast<DefInit>(LI->getElement(i))->getDef()->getName();
+      }
+    }
+
+    // The option parameter field.
+    OS << ", " << R.getValueAsInt("NumArgs");
+
+    // The option help text.
+    if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
+      OS << ",\n";
+      OS << "       ";
+      write_cstring(OS, R.getValueAsString("HelpText"));
+    } else
+      OS << ", 0";
+
+    // The option meta-variable name.
+    OS << ", ";
+    if (!isa<UnsetInit>(R.getValueInit("MetaVarName")))
+      write_cstring(OS, R.getValueAsString("MetaVarName"));
+    else
+      OS << "0";
+
+    OS << ")\n";
+  }
+  OS << "#endif\n";
+}
+} // end namespace llvm
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index 64aaee756b1d..1ea6f7932a3e 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -252,6 +252,7 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
       MIOpNo += Dest.Operands[OpNo].MINumOperands;
     }
     if (Dest.Operands.isVariadic) {
+      MIOpNo = Source.Operands.size() + 1;
       o << "      // variable_ops\n";
       o << "      for (unsigned i = " << MIOpNo
         << ", e = MI->getNumOperands(); i != e; ++i)\n"
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 95b626723830..1b5d90b8bda2 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -185,6 +185,36 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  return RCWeightTable[RC->getID()];\n"
      << "}\n\n";
 
+  // Reasonable targets (not ARMv7) have unit weight for all units, so don't
+  // bother generating a table.
+  bool RegUnitsHaveUnitWeight = true;
+  for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
+       UnitIdx < UnitEnd; ++UnitIdx) {
+    if (RegBank.getRegUnit(UnitIdx).Weight > 1)
+      RegUnitsHaveUnitWeight = false;
+  }
+  OS << "/// Get the weight in units of pressure for this register unit.\n"
+     << "unsigned " << ClassName << "::\n"
+     << "getRegUnitWeight(unsigned RegUnit) const {\n"
+     << "  assert(RegUnit < " << RegBank.getNumNativeRegUnits()
+     << " && \"invalid register unit\");\n";
+  if (!RegUnitsHaveUnitWeight) {
+    OS << "  static const uint8_t RUWeightTable[] = {\n    ";
+    for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
+         UnitIdx < UnitEnd; ++UnitIdx) {
+      const RegUnit &RU = RegBank.getRegUnit(UnitIdx);
+      assert(RU.Weight < 256 && "RegUnit too heavy");
+      OS << RU.Weight << ", ";
+    }
+    OS << "0 };\n"
+       << "  return RUWeightTable[RegUnit];\n";
+  }
+  else {
+    OS << "  // All register units have unit weight.\n"
+       << "  return 1;\n";
+  }
+  OS << "}\n\n";
+
   OS << "\n"
      << "// Get the number of dimensions of register pressure.\n"
      << "unsigned " << ClassName << "::getNumRegPressureSets() const {\n"
@@ -215,14 +245,13 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  return PressureLimitTable[Idx];\n"
      << "}\n\n";
 
-  OS << "/// Get the dimensions of register pressure "
-     << "impacted by this register class.\n"
-     << "/// Returns a -1 terminated array of pressure set IDs\n"
-     << "const int* " << ClassName << "::\n"
-     << "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n"
-     << "  static const int RCSetsTable[] = {\n    ";
-  std::vector<unsigned> RCSetStarts(NumRCs);
-  for (unsigned i = 0, StartIdx = 0, e = NumRCs; i != e; ++i) {
+  // This table may be larger than NumRCs if some register units needed a list
+  // of unit sets that did not correspond to a register class.
+  unsigned NumRCUnitSets = RegBank.getNumRegClassPressureSetLists();
+  OS << "/// Table of pressure sets per register class or unit.\n"
+     << "static const int RCSetsTable[] = {\n    ";
+  std::vector<unsigned> RCSetStarts(NumRCUnitSets);
+  for (unsigned i = 0, StartIdx = 0, e = NumRCUnitSets; i != e; ++i) {
     RCSetStarts[i] = StartIdx;
     ArrayRef<unsigned> PSetIDs = RegBank.getRCPressureSetIDs(i);
     for (ArrayRef<unsigned>::iterator PSetI = PSetIDs.begin(),
@@ -230,10 +259,26 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
       OS << *PSetI << ",  ";
       ++StartIdx;
     }
-    OS << "-1,  \t// " << RegBank.getRegClasses()[i]->getName() << "\n    ";
+    OS << "-1,  \t// #" << RCSetStarts[i] << " ";
+    if (i < NumRCs)
+      OS << RegBank.getRegClasses()[i]->getName();
+    else {
+      OS << "inferred";
+      for (ArrayRef<unsigned>::iterator PSetI = PSetIDs.begin(),
+             PSetE = PSetIDs.end(); PSetI != PSetE; ++PSetI) {
+        OS << "~" << RegBank.getRegPressureSet(*PSetI).Name;
+      }
+    }
+    OS << "\n    ";
     ++StartIdx;
   }
-  OS << "-1 };\n";
+  OS << "-1 };\n\n";
+
+  OS << "/// Get the dimensions of register pressure impacted by this "
+     << "register class.\n"
+     << "/// Returns a -1 terminated array of pressure set IDs\n"
+     << "const int* " << ClassName << "::\n"
+     << "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n";
   OS << "  static const unsigned RCSetStartTable[] = {\n    ";
   for (unsigned i = 0, e = NumRCs; i != e; ++i) {
     OS << RCSetStarts[i] << ",";
@@ -242,6 +287,23 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  unsigned SetListStart = RCSetStartTable[RC->getID()];\n"
      << "  return &RCSetsTable[SetListStart];\n"
      << "}\n\n";
+
+  OS << "/// Get the dimensions of register pressure impacted by this "
+     << "register unit.\n"
+     << "/// Returns a -1 terminated array of pressure set IDs\n"
+     << "const int* " << ClassName << "::\n"
+     << "getRegUnitPressureSets(unsigned RegUnit) const {\n"
+     << "  assert(RegUnit < " << RegBank.getNumNativeRegUnits()
+     << " && \"invalid register unit\");\n";
+  OS << "  static const unsigned RUSetStartTable[] = {\n    ";
+  for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
+       UnitIdx < UnitEnd; ++UnitIdx) {
+    OS << RCSetStarts[RegBank.getRegUnit(UnitIdx).RegClassUnitSetsIdx] << ",";
+  }
+  OS << "0 };\n"
+     << "  unsigned SetListStart = RUSetStartTable[RegUnit];\n"
+     << "  return &RCSetsTable[SetListStart];\n"
+     << "}\n\n";
 }
 
 void
@@ -729,7 +791,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   const std::string &TargetName = Target.getName();
 
   // Emit the shared table of differential lists.
-  OS << "extern const uint16_t " << TargetName << "RegDiffLists[] = {\n";
+  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
@@ -859,9 +921,9 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // MCRegisterInfo initialization routine.
   OS << "static inline void Init" << TargetName
      << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, "
-     << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n"
+     << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0, unsigned PC = 0) {\n"
      << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
-     << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, "
+     << Regs.size()+1 << ", RA, PC, " << TargetName << "MCRegisterClasses, "
      << RegisterClasses.size() << ", "
      << TargetName << "RegUnitRoots, "
      << RegBank.getNumNativeRegUnits() << ", "
@@ -896,7 +958,7 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"
      << "  explicit " << ClassName
-     << "(unsigned RA, unsigned D = 0, unsigned E = 0);\n"
+     << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n"
      << "  virtual bool needsStackRealignment(const MachineFunction &) const\n"
      << "     { return false; }\n";
   if (!RegBank.getSubRegIndices().empty()) {
@@ -907,11 +969,13 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "  virtual const RegClassWeight &getRegClassWeight("
      << "const TargetRegisterClass *RC) const;\n"
+     << "  virtual unsigned getRegUnitWeight(unsigned RegUnit) const;\n"
      << "  virtual unsigned getNumRegPressureSets() const;\n"
      << "  virtual const char *getRegPressureSetName(unsigned Idx) const;\n"
      << "  virtual unsigned getRegPressureSetLimit(unsigned Idx) const;\n"
      << "  virtual const int *getRegClassPressureSets("
      << "const TargetRegisterClass *RC) const;\n"
+     << "  virtual const int *getRegUnitPressureSets(unsigned RegUnit) const;\n"
      << "};\n\n";
 
   ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses();
@@ -967,7 +1031,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
 
   // Build a shared array of value types.
-  SequenceToOffsetTable<std::vector<MVT::SimpleValueType> > VTSeqs;
+  SequenceToOffsetTable<SmallVector<MVT::SimpleValueType, 4> > VTSeqs;
   for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc)
     VTSeqs.add(RegisterClasses[rc]->VTs);
   VTSeqs.layout();
@@ -1074,12 +1138,12 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
         OS << "\nstatic inline unsigned " << RC.getName()
            << "AltOrderSelect(const MachineFunction &MF) {"
            << RC.AltOrderSelect << "}\n\n"
-           << "static ArrayRef<uint16_t> " << RC.getName()
+           << "static ArrayRef<MCPhysReg> " << RC.getName()
            << "GetRawAllocationOrder(const MachineFunction &MF) {\n";
         for (unsigned oi = 1 , oe = RC.getNumOrders(); oi != oe; ++oi) {
           ArrayRef<Record*> Elems = RC.getOrder(oi);
           if (!Elems.empty()) {
-            OS << "  static const uint16_t AltOrder" << oi << "[] = {";
+            OS << "  static const MCPhysReg AltOrder" << oi << "[] = {";
             for (unsigned elem = 0; elem != Elems.size(); ++elem)
               OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]);
             OS << " };\n";
@@ -1087,11 +1151,11 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
         }
         OS << "  const MCRegisterClass &MCR = " << Target.getName()
            << "MCRegisterClasses[" << RC.getQualifiedName() + "RegClassID];\n"
-           << "  const ArrayRef<uint16_t> Order[] = {\n"
+           << "  const ArrayRef<MCPhysReg> Order[] = {\n"
            << "    makeArrayRef(MCR.begin(), MCR.getNumRegs()";
         for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi)
           if (RC.getOrder(oi).empty())
-            OS << "),\n    ArrayRef<uint16_t>(";
+            OS << "),\n    ArrayRef<MCPhysReg>(";
           else
             OS << "),\n    makeArrayRef(AltOrder" << oi;
         OS << ")\n  };\n  const unsigned Select = " << RC.getName()
@@ -1194,7 +1258,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const uint16_t " << TargetName << "RegDiffLists[];\n";
+  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const uint16_t " << TargetName << "RegUnitRoots[][2];\n";
   OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[];\n";
@@ -1203,12 +1267,12 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   EmitRegMappingTables(OS, Regs, true);
 
   OS << ClassName << "::\n" << ClassName
-     << "(unsigned RA, unsigned DwarfFlavour, unsigned EHFlavour)\n"
+     << "(unsigned RA, unsigned DwarfFlavour, unsigned EHFlavour, unsigned PC)\n"
      << "  : TargetRegisterInfo(" << TargetName << "RegInfoDesc"
      << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
      << "             SubRegIndexNameTable, SubRegIndexLaneMaskTable) {\n"
      << "  InitMCRegisterInfo(" << TargetName << "RegDesc, "
-     << Regs.size()+1 << ", RA,\n                     " << TargetName
+     << Regs.size()+1 << ", RA, PC,\n                     " << TargetName
      << "MCRegisterClasses, " << RegisterClasses.size() << ",\n"
      << "                     " << TargetName << "RegUnitRoots,\n"
      << "                     " << RegBank.getNumNativeRegUnits() << ",\n"
@@ -1232,7 +1296,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
     assert(Regs && "Cannot expand CalleeSavedRegs instance");
 
     // Emit the *_SaveList list of callee-saved registers.
-    OS << "static const uint16_t " << CSRSet->getName()
+    OS << "static const MCPhysReg " << CSRSet->getName()
        << "_SaveList[] = { ";
     for (unsigned r = 0, re = Regs->size(); r != re; ++r)
       OS << getQualifiedName((*Regs)[r]) << ", ";
diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h
index d4db152a9681..fcda233dc913 100644
--- a/utils/TableGen/SequenceToOffsetTable.h
+++ b/utils/TableGen/SequenceToOffsetTable.h
@@ -17,11 +17,11 @@
 #define TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H
 
 #include "llvm/Support/raw_ostream.h"
-#include <functional>
 #include <algorithm>
-#include <vector>
 #include <cassert>
 #include <cctype>
+#include <functional>
+#include <vector>
 
 namespace llvm {
 
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index 0dd9853843fe..3e5c38cf0a51 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SetTheory.h"
+#include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/Support/Format.h"
 
 using namespace llvm;
 
diff --git a/utils/TableGen/SetTheory.h b/utils/TableGen/SetTheory.h
index 122372ab33c0..5baed79fb76a 100644
--- a/utils/TableGen/SetTheory.h
+++ b/utils/TableGen/SetTheory.h
@@ -47,8 +47,8 @@
 #ifndef SETTHEORY_H
 #define SETTHEORY_H
 
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SourceMgr.h"
 #include <map>
 #include <vector>
diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h
index a098d7d744a1..d94d3a266822 100644
--- a/utils/TableGen/StringToOffsetTable.h
+++ b/utils/TableGen/StringToOffsetTable.h
@@ -11,8 +11,8 @@
 #define TBLGEN_STRING_TO_OFFSET_TABLE_H
 
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
 
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index f1a06bb52887..98892e11441b 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -15,14 +15,14 @@
 
 #include "CodeGenTarget.h"
 #include "CodeGenSchedule.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include <algorithm>
 #include <map>
 #include <string>
@@ -87,6 +87,8 @@ class SubtargetEmitter {
                              const CodeGenProcModel &ProcModel);
   Record *FindReadAdvance(const CodeGenSchedRW &SchedRead,
                           const CodeGenProcModel &ProcModel);
+  void ExpandProcResources(RecVec &PRVec, std::vector<int64_t> &Cycles,
+                           const CodeGenProcModel &ProcModel);
   void GenSchedClassTables(const CodeGenProcModel &ProcModel,
                            SchedClassTables &SchedTables);
   void EmitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS);
@@ -445,17 +447,15 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
     // If this processor defines no itineraries, then leave the itinerary list
     // empty.
     std::vector<InstrItinerary> &ItinList = ProcItinLists.back();
-    if (ProcModel.ItinDefList.empty())
+    if (!ProcModel.hasItineraries())
       continue;
 
-    // Reserve index==0 for NoItinerary.
-    ItinList.resize(SchedModels.numItineraryClasses()+1);
-
     const std::string &Name = ProcModel.ItinsDef->getName();
 
-    // For each itinerary data
-    for (unsigned SchedClassIdx = 0,
-           SchedClassEnd = ProcModel.ItinDefList.size();
+    ItinList.resize(SchedModels.numInstrSchedClasses());
+    assert(ProcModel.ItinDefList.size() == ItinList.size() && "bad Itins");
+
+    for (unsigned SchedClassIdx = 0, SchedClassEnd = ItinList.size();
          SchedClassIdx < SchedClassEnd; ++SchedClassIdx) {
 
       // Next itinerary data
@@ -631,13 +631,31 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
   for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
     Record *PRDef = ProcModel.ProcResourceDefs[i];
 
-    // Find the SuperIdx
-    unsigned SuperIdx = 0;
     Record *SuperDef = 0;
-    if (PRDef->getValueInit("Super")->isComplete()) {
-      SuperDef =
-        SchedModels.findProcResUnits(PRDef->getValueAsDef("Super"), ProcModel);
-      SuperIdx = ProcModel.getProcResourceIdx(SuperDef);
+    unsigned SuperIdx = 0;
+    unsigned NumUnits = 0;
+    bool IsBuffered = true;
+    if (PRDef->isSubClassOf("ProcResGroup")) {
+      RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");
+      for (RecIter RUI = ResUnits.begin(), RUE = ResUnits.end();
+           RUI != RUE; ++RUI) {
+        if (!NumUnits)
+          IsBuffered = (*RUI)->getValueAsBit("Buffered");
+        else if(IsBuffered != (*RUI)->getValueAsBit("Buffered"))
+          PrintFatalError(PRDef->getLoc(),
+                          "Mixing buffered and unbuffered resources.");
+        NumUnits += (*RUI)->getValueAsInt("NumUnits");
+      }
+    }
+    else {
+      // Find the SuperIdx
+      if (PRDef->getValueInit("Super")->isComplete()) {
+        SuperDef = SchedModels.findProcResUnits(
+          PRDef->getValueAsDef("Super"), ProcModel);
+        SuperIdx = ProcModel.getProcResourceIdx(SuperDef);
+      }
+      NumUnits = PRDef->getValueAsInt("NumUnits");
+      IsBuffered = PRDef->getValueAsBit("Buffered");
     }
     // Emit the ProcResourceDesc
     if (i+1 == e)
@@ -645,8 +663,8 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
     OS << "  {DBGFIELD(\"" << PRDef->getName() << "\") ";
     if (PRDef->getName().size() < 15)
       OS.indent(15 - PRDef->getName().size());
-    OS << PRDef->getValueAsInt("NumUnits") << ", " << SuperIdx << ", "
-       << PRDef->getValueAsBit("Buffered") << "}" << Sep << " // #" << i+1;
+    OS << NumUnits << ", " << SuperIdx << ", "
+       << IsBuffered << "}" << Sep << " // #" << i+1;
     if (SuperDef)
       OS << ", Super=" << SuperDef->getName();
     OS << "\n";
@@ -763,6 +781,51 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
   return ResDef;
 }
 
+// Expand an explicit list of processor resources into a full list of implied
+// resource groups that cover them.
+//
+// FIXME: Effectively consider a super-resource a group that include all of its
+// subresources to allow mixing and matching super-resources and groups.
+//
+// FIXME: Warn if two overlapping groups don't have a common supergroup.
+void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
+                                           std::vector<int64_t> &Cycles,
+                                           const CodeGenProcModel &ProcModel) {
+  // Default to 1 resource cycle.
+  Cycles.resize(PRVec.size(), 1);
+  for (unsigned i = 0, e = PRVec.size(); i != e; ++i) {
+    RecVec SubResources;
+    if (PRVec[i]->isSubClassOf("ProcResGroup")) {
+      SubResources = PRVec[i]->getValueAsListOfDefs("Resources");
+      std::sort(SubResources.begin(), SubResources.end(), LessRecord());
+    }
+    else {
+      SubResources.push_back(PRVec[i]);
+    }
+    for (RecIter PRI = ProcModel.ProcResourceDefs.begin(),
+           PRE = ProcModel.ProcResourceDefs.end();
+         PRI != PRE; ++PRI) {
+      if (*PRI == PRVec[i] || !(*PRI)->isSubClassOf("ProcResGroup"))
+        continue;
+      RecVec SuperResources = (*PRI)->getValueAsListOfDefs("Resources");
+      std::sort(SuperResources.begin(), SuperResources.end(), LessRecord());
+      RecIter SubI = SubResources.begin(), SubE = SubResources.end();
+      RecIter SuperI = SuperResources.begin(), SuperE = SuperResources.end();
+      for ( ; SubI != SubE && SuperI != SuperE; ++SuperI) {
+        if (*SubI < *SuperI)
+          break;
+        else if (*SuperI < *SubI)
+          continue;
+        ++SubI;
+      }
+      if (SubI == SubE) {
+        PRVec.push_back(*PRI);
+        Cycles.push_back(Cycles[i]);
+      }
+    }
+  }
+}
+
 // Generate the SchedClass table for this processor and update global
 // tables. Must be called for each processor in order.
 void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
@@ -787,7 +850,22 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     SCDesc.ReadAdvanceIdx = 0;
 
     // A Variant SchedClass has no resources of its own.
-    if (!SCI->Transitions.empty()) {
+    bool HasVariants = false;
+    for (std::vector<CodeGenSchedTransition>::const_iterator
+           TI = SCI->Transitions.begin(), TE = SCI->Transitions.end();
+         TI != TE; ++TI) {
+      if (TI->ProcIndices[0] == 0) {
+        HasVariants = true;
+        break;
+      }
+      IdxIter PIPos = std::find(TI->ProcIndices.begin(),
+                                TI->ProcIndices.end(), ProcModel.Index);
+      if (PIPos != TI->ProcIndices.end()) {
+        HasVariants = true;
+        break;
+      }
+    }
+    if (HasVariants) {
       SCDesc.NumMicroOps = MCSchedClassDesc::VariantNumMicroOps;
       continue;
     }
@@ -804,27 +882,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     }
     IdxVec Writes = SCI->Writes;
     IdxVec Reads = SCI->Reads;
-    if (SCI->ItinClassDef) {
-      assert(SCI->InstRWs.empty() && "ItinClass should not have InstRWs");
-      // Check this processor's itinerary class resources.
-      for (RecIter II = ProcModel.ItinRWDefs.begin(),
-             IE = ProcModel.ItinRWDefs.end(); II != IE; ++II) {
-        RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses");
-        if (std::find(Matched.begin(), Matched.end(), SCI->ItinClassDef)
-            != Matched.end()) {
-          SchedModels.findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"),
-                              Writes, Reads);
-          break;
-        }
-      }
-      if (Writes.empty()) {
-        DEBUG(dbgs() << ProcModel.ItinsDef->getName()
-              << " does not have resources for itinerary class "
-              << SCI->ItinClassDef->getName() << '\n');
-      }
-    }
-    else if (!SCI->InstRWs.empty()) {
-      // This class may have a default ReadWrite list which can be overriden by
+    if (!SCI->InstRWs.empty()) {
+      // This class has a default ReadWrite list which can be overriden by
       // InstRW definitions.
       Record *RWDef = 0;
       for (RecIter RWI = SCI->InstRWs.begin(), RWE = SCI->InstRWs.end();
@@ -842,6 +901,23 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
                             Writes, Reads);
       }
     }
+    if (Writes.empty()) {
+      // Check this processor's itinerary class resources.
+      for (RecIter II = ProcModel.ItinRWDefs.begin(),
+             IE = ProcModel.ItinRWDefs.end(); II != IE; ++II) {
+        RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses");
+        if (std::find(Matched.begin(), Matched.end(), SCI->ItinClassDef)
+            != Matched.end()) {
+          SchedModels.findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"),
+                              Writes, Reads);
+          break;
+        }
+      }
+      if (Writes.empty()) {
+        DEBUG(dbgs() << ProcModel.ModelName
+              << " does not have resources for class " << SCI->Name << '\n');
+      }
+    }
     // Sum resources across all operand writes.
     std::vector<MCWriteProcResEntry> WriteProcResources;
     std::vector<MCWriteLatencyEntry> WriteLatencies;
@@ -859,7 +935,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
       WriterNames.push_back(SchedModels.getSchedWrite(WriteID).Name);
       // If this Write is not referenced by a ReadAdvance, don't distinguish it
       // from other WriteLatency entries.
-      if (!SchedModels.hasReadOfWrite(SchedModels.getSchedWrite(WriteID).TheDef)) {
+      if (!SchedModels.hasReadOfWrite(
+            SchedModels.getSchedWrite(WriteID).TheDef)) {
         WriteID = 0;
       }
       WLEntry.WriteResourceID = WriteID;
@@ -884,16 +961,29 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
         RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources");
         std::vector<int64_t> Cycles =
           WriteRes->getValueAsListOfInts("ResourceCycles");
+
+        ExpandProcResources(PRVec, Cycles, ProcModel);
+
         for (unsigned PRIdx = 0, PREnd = PRVec.size();
              PRIdx != PREnd; ++PRIdx) {
           MCWriteProcResEntry WPREntry;
           WPREntry.ProcResourceIdx = ProcModel.getProcResourceIdx(PRVec[PRIdx]);
           assert(WPREntry.ProcResourceIdx && "Bad ProcResourceIdx");
-          if (Cycles.size() > PRIdx)
-            WPREntry.Cycles = Cycles[PRIdx];
-          else
-            WPREntry.Cycles = 1;
-          WriteProcResources.push_back(WPREntry);
+          WPREntry.Cycles = Cycles[PRIdx];
+          // If this resource is already used in this sequence, add the current
+          // entry's cycles so that the same resource appears to be used
+          // serially, rather than multiple parallel uses. This is important for
+          // in-order machine where the resource consumption is a hazard.
+          unsigned WPRIdx = 0, WPREnd = WriteProcResources.size();
+          for( ; WPRIdx != WPREnd; ++WPRIdx) {
+            if (WriteProcResources[WPRIdx].ProcResourceIdx
+                == WPREntry.ProcResourceIdx) {
+              WriteProcResources[WPRIdx].Cycles += WPREntry.Cycles;
+              break;
+            }
+          }
+          if (WPRIdx == WPREnd)
+            WriteProcResources.push_back(WPREntry);
         }
       }
       WriteLatencies.push_back(WLEntry);
@@ -1062,7 +1152,7 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,
 
     // The first class is always invalid. We no way to distinguish it except by
     // name and position.
-    assert(SchedModels.getSchedClass(0).Name == "NoItinerary"
+    assert(SchedModels.getSchedClass(0).Name == "NoInstrModel"
            && "invalid class not first");
     OS << "  {DBGFIELD(\"InvalidSchedClass\")  "
        << MCSchedClassDesc::InvalidNumMicroOps
@@ -1108,6 +1198,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
+    EmitProcessorProp(OS, PI->ModelDef, "ILPWindow", ',');
     EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
     OS << "  " << PI->Index << ", // Processor ID\n";
     if (PI->hasInstrSchedModel())
@@ -1118,7 +1209,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
                      - SchedModels.schedClassBegin()) << ",\n";
     else
       OS << "  0, 0, 0, 0, // No instruction-level machine model.\n";
-    if (SchedModels.hasItineraryClasses())
+    if (SchedModels.hasItineraries())
       OS << "  " << PI->ItinsDef->getName() << ");\n";
     else
       OS << "  0); // No Itinerary\n";
@@ -1175,7 +1266,7 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
      << "#define DBGFIELD(x)\n"
      << "#endif\n";
 
-  if (SchedModels.hasItineraryClasses()) {
+  if (SchedModels.hasItineraries()) {
     std::vector<std::vector<InstrItinerary> > ProcItinLists;
     // Emit the stage data
     EmitStageAndOperandCycleData(OS, ProcItinLists);
@@ -1216,7 +1307,7 @@ void SubtargetEmitter::EmitSchedModelHelpers(std::string ClassName,
          SCE = SchedModels.schedClassEnd(); SCI != SCE; ++SCI) {
     if (SCI->Transitions.empty())
       continue;
-    VariantClasses.push_back(SCI - SchedModels.schedClassBegin());
+    VariantClasses.push_back(SCI->Index);
   }
   if (!VariantClasses.empty()) {
     OS << "  switch (SchedClass) {\n";
@@ -1263,13 +1354,8 @@ void SubtargetEmitter::EmitSchedModelHelpers(std::string ClassName,
         if (*PI == 0)
           break;
       }
-      unsigned SCIdx = 0;
-      if (SC.ItinClassDef)
-        SCIdx = SchedModels.getSchedClassIdxForItin(SC.ItinClassDef);
-      else
-        SCIdx = SchedModels.findSchedClassIdx(SC.Writes, SC.Reads);
-      if (SCIdx != *VCI)
-        OS << "    return " << SCIdx << ";\n";
+      if (SC.isInferred())
+        OS << "    return " << SC.Index << ";\n";
       OS << "    break;\n";
     }
     OS << "  };\n";
@@ -1375,7 +1461,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << Target << "WriteProcResTable, "
      << Target << "WriteLatencyTable, "
      << Target << "ReadAdvanceTable, ";
-  if (SchedModels.hasItineraryClasses()) {
+  if (SchedModels.hasItineraries()) {
     OS << '\n'; OS.indent(22);
     OS << Target << "Stages, "
        << Target << "OperandCycles, "
@@ -1432,7 +1518,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "extern const llvm::MCReadAdvanceEntry "
      << Target << "ReadAdvanceTable[];\n";
 
-  if (SchedModels.hasItineraryClasses()) {
+  if (SchedModels.hasItineraries()) {
     OS << "extern const llvm::InstrStage " << Target << "Stages[];\n";
     OS << "extern const unsigned " << Target << "OperandCycles[];\n";
     OS << "extern const unsigned " << Target << "ForwardingPaths[];\n";
@@ -1456,7 +1542,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << Target << "WriteLatencyTable, "
      << Target << "ReadAdvanceTable, ";
   OS << '\n'; OS.indent(22);
-  if (SchedModels.hasItineraryClasses()) {
+  if (SchedModels.hasItineraries()) {
     OS << Target << "Stages, "
        << Target << "OperandCycles, "
        << Target << "ForwardingPaths, ";
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 49efe7ed7374..b5c3ca760db6 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "TableGenBackends.h" // Declares all backends.
-
 #include "SetTheory.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -39,9 +38,10 @@ enum ActionType {
   GenSubtarget,
   GenIntrinsic,
   GenTgtIntrinsic,
-  GenEDInfo,
   PrintEnums,
-  PrintSets
+  PrintSets,
+  GenOptParserDefs,
+  GenCTags
 };
 
 namespace {
@@ -77,12 +77,14 @@ namespace {
                                "Generate intrinsic information"),
                     clEnumValN(GenTgtIntrinsic, "gen-tgt-intrinsic",
                                "Generate target intrinsic information"),
-                    clEnumValN(GenEDInfo, "gen-enhanced-disassembly-info",
-                               "Generate enhanced disassembly info"),
                     clEnumValN(PrintEnums, "print-enums",
                                "Print enum values for a class"),
                     clEnumValN(PrintSets, "print-sets",
                                "Print expanded sets for testing DAG exprs"),
+                    clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
+                               "Generate option definitions"),
+                    clEnumValN(GenCTags, "gen-ctags",
+                               "Generate ctags-compatible index"),
                     clEnumValEnd));
 
   cl::opt<std::string>
@@ -136,8 +138,8 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenTgtIntrinsic:
     EmitIntrinsics(Records, OS, true);
     break;
-  case GenEDInfo:
-    EmitEnhancedDisassemblerInfo(Records, OS);
+  case GenOptParserDefs:
+    EmitOptParser(Records, OS);
     break;
   case PrintEnums:
   {
@@ -162,6 +164,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
     }
     break;
   }
+  case GenCTags:
+    EmitCTags(Records, OS);
+    break;
   }
 
   return false;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index f0d25d8a2c81..28b626e17e89 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -68,12 +68,13 @@ void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS);
 void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS);
 void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS);
-void EmitEnhancedDisassemblerInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitFastISel(RecordKeeper &RK, raw_ostream &OS);
 void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
 void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
+void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
+void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index c13a0cc467e1..3ff922b8225c 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -10,8 +10,8 @@
 #ifndef X86DISASSEMBLERSHARED_H
 #define X86DISASSEMBLERSHARED_H
 
-#include <string>
 #include <string.h>
+#include <string>
 
 #define INSTRUCTION_SPECIFIER_FIELDS       \
   struct OperandSpecifier operands[X86_MAX_OPERANDS]; \
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 468a1f81c719..40a0c1b260b7 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -14,13 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86DisassemblerShared.h"
 #include "X86DisassemblerTables.h"
-
-#include "llvm/TableGen/TableGenBackend.h"
+#include "X86DisassemblerShared.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/TableGen/TableGenBackend.h"
 #include <map>
 
 using namespace llvm;
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index ea006c05b991..01aeaaf0bf90 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -19,9 +19,7 @@
 
 #include "X86DisassemblerShared.h"
 #include "X86ModRMFilters.h"
-
 #include "llvm/Support/raw_ostream.h"
-
 #include <vector>
 
 namespace llvm {
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index d6ed2fe2c615..61b9813b06b8 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -14,12 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86DisassemblerShared.h"
 #include "X86RecognizableInstr.h"
+#include "X86DisassemblerShared.h"
 #include "X86ModRMFilters.h"
-
 #include "llvm/Support/ErrorHandling.h"
-
 #include <string>
 
 using namespace llvm;
@@ -39,14 +37,15 @@ using namespace llvm;
   MAP(D1, 46)           \
   MAP(D4, 47)           \
   MAP(D5, 48)           \
-  MAP(D8, 49)           \
-  MAP(D9, 50)           \
-  MAP(DA, 51)           \
-  MAP(DB, 52)           \
-  MAP(DC, 53)           \
-  MAP(DD, 54)           \
-  MAP(DE, 55)           \
-  MAP(DF, 56)
+  MAP(D6, 49)           \
+  MAP(D8, 50)           \
+  MAP(D9, 51)           \
+  MAP(DA, 52)           \
+  MAP(DB, 53)           \
+  MAP(DC, 54)           \
+  MAP(DD, 55)           \
+  MAP(DE, 56)           \
+  MAP(DF, 57)
 
 // A clone of X86 since we can't depend on something that is generated.
 namespace X86Local {
@@ -121,6 +120,7 @@ namespace X86Local {
 #define TWO_BYTE_EXTENSION_TABLES \
   EXTENSION_TABLE(00)             \
   EXTENSION_TABLE(01)             \
+  EXTENSION_TABLE(0d)             \
   EXTENSION_TABLE(18)             \
   EXTENSION_TABLE(71)             \
   EXTENSION_TABLE(72)             \
@@ -765,6 +765,17 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     HANDLE_OPERAND(immediate)
     HANDLE_OPERAND(immediate)
     break;
+  case X86Local::MRM_F8:
+    if (Opcode == 0xc6) {
+      assert(numPhysicalOperands == 1 &&
+             "Unexpected number of operands for X86Local::MRM_F8");
+      HANDLE_OPERAND(immediate)
+    } else if (Opcode == 0xc7) {
+      assert(numPhysicalOperands == 1 &&
+             "Unexpected number of operands for X86Local::MRM_F8");
+      HANDLE_OPERAND(relocation)
+    }
+    break;
   case X86Local::MRMInitReg:
     // Ignored.
     break;
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index 9feb3c3c7d3b..9ec36a39df45 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -17,13 +17,11 @@
 #ifndef X86RECOGNIZABLEINSTR_H
 #define X86RECOGNIZABLEINSTR_H
 
-#include "X86DisassemblerTables.h"
-
 #include "CodeGenTarget.h"
-
-#include "llvm/TableGen/Record.h"
-#include "llvm/Support/DataTypes.h"
+#include "X86DisassemblerTables.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/TableGen/Record.h"
 
 namespace llvm {
 
diff --git a/utils/TableGen/tdtags b/utils/TableGen/tdtags
new file mode 100644
index 000000000000..5214485f4ec1
--- /dev/null
+++ b/utils/TableGen/tdtags
@@ -0,0 +1,453 @@
+#!/bin/sh
+#===-- tdtags - TableGen tags wrapper ---------------------------*- sh -*-===#
+# vim:set sts=2 sw=2 et:
+#===----------------------------------------------------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===#
+#
+# This is a wrapper script to simplify generating ctags(1)-compatible index
+# files for target .td files. Run tdtags -H for more documentation.
+#
+# For portability, this script is intended to conform to IEEE Std 1003.1-2008.
+#
+#===----------------------------------------------------------------------===#
+
+SELF=${0##*/}
+
+usage() {
+cat <<END
+Usage: $SELF [ <options> ] tdfile
+   or: $SELF [ <options> ] -x recipe [arg ...]
+OPTIONS
+  -H          Display further help.
+  -a          Append the tags to an existing tags file.
+  -f <file>   Write tags to the specified file (defaults to 'tags').
+  -I <dir>    Add the directory to the search path for tblgen include files.
+  -x <recipe> Generate tags file(s) for a common use case:
+  -q          Suppress $TBLGEN error messages.
+  -v          Be verbose; report progress.
+END
+  usage_recipes
+}
+
+usage_recipes() {
+cat <<END
+     all      - Generate an index in each directory that contains .td files
+                in the LLVM source tree.
+     here     - Generate an index for all .td files in the current directory.
+     recurse  - Generate an index in each directory that contains .td files
+                in and under the current directory.
+     target [<target> ...]
+              - Generate a tags file for each specified LLVM code generator
+                target, or if none are specified, all targets.
+END
+}
+
+help() {
+cat <<END
+NAME
+  $SELF - generate ctags(1)-compatible index files for tblgen .td source
+
+SYNOPSIS
+  $SELF [ options ] -x recipe [arg ...]
+  $SELF [ options ] [file ...]
+
+DESCRIPTION
+  With the '-x' option, $SELF produces one or more tags files for a
+  particular common use case. See the RECIPES section below for details.
+
+  Without the '-x' option, $SELF provides a ctags(1)-like interface to
+  $TBLGEN.
+
+OPTIONS
+  -a          Append newly generated tags to those already in an existing
+              tags file. Without ths option, any and all existing tags are
+              replaced. NOTE: When building a mixed tags file, using ${SELF}
+              for tblgen tags and ctags(1) for other languages, it is best
+              to run ${SELF} first without '-a', and ctags(1) second with '-a',
+              because ctags(1) handling is more capable.
+  -f <file>   Use the name <file> for the tags file, rather than the default
+              "tags". If the <file> is "-", then the tag index is written to
+              standard output.
+  -H          Display this document.
+  -I <dir>    Add the directory <dir> to the search path for 'include'
+              statements in tblgen source.
+  -x          Run a canned recipe, rather than operate on specified files.
+              When '-x' is present, the first non-option argument is the
+              name of a recipe, and any further arguments are arguments to
+              that recipe. With no arguments, lists the available recipes.
+  -q          Suppress $TBLGEN error messages. Not all .td files are well-
+              formed outside a specific context, so recipes will sometimes
+              produce error messages for certain .td files. These errors
+              do not affect the indices produced for valid files.
+  -v          Be verbose; report progress.
+
+RECIPES
+  $SELF -x all
+              Produce a tags file in every directory in the LLVM source tree
+              that contains any .td files.
+  $SELF -x here
+              Produce a tags file from .td files in the current directory.
+  $SELF -x recurse
+              Produce a tags file in every directory that contains any .td
+              files, in and under the current directory.
+  $SELF -x target [<target> ...]
+              Produce a tags file for each named code generator target, or
+              if none are named, for all code generator targets.
+END
+}
+
+# Temporary file management.
+#
+# Since SUS sh(1) has no arrays, this script makes extensive use of
+# temporary files. The follow are 'global' and used to carry information
+# across functions:
+#   $TMP:D    Include directories.
+#   $TMP:I    Included files.
+#   $TMP:T    Top-level files, that are not included by another.
+#   $TMP:W    Directories in which to generate tags (Worklist).
+# For portability to OS X, names must not differ only in case.
+#
+TMP=${TMPDIR:-/tmp}/$SELF:$$
+trap "rm -f $TMP*" 0
+trap exit 1 2 13 15
+>$TMP:D
+
+td_dump()
+{
+  if [ $OPT_VERBOSE -gt 1 ]
+  then
+    printf '===== %s =====\n' "$1"
+    cat <"$1"
+  fi
+}
+
+# Escape the arguments, taken as a whole.
+e() {
+  printf '%s' "$*" |
+    sed -e "s/'/'\\\\''/g" -e "1s/^/'/" -e "\$s/\$/'/"
+}
+
+# Determine whether the given directory contains at least one .td file.
+dir_has_td() {
+  for i in $1/*.td
+  do
+    [ -f "$i" ] && return 0
+  done
+  return 1
+}
+
+# Partition the supplied list of files, plus any files included from them,
+# into two groups:
+#   $TMP:T    Top-level files, that are not included by another.
+#   $TMP:I    Included files.
+# Add standard directories to the include paths in $TMP:D if this would
+# benefit the any of the included files.
+td_prep() {
+  >$TMP:E
+  >$TMP:J
+  for i in *.td
+  do
+    [ "x$i" = 'x*.td' ] && return 1
+    if [ -f "$i" ]
+    then
+      printf '%s\n' "$i" >>$TMP:E
+      sed -n -e 's/include[[:space:]]"\(.*\)".*/\1/p' <"$i" >>$TMP:J
+    else
+      printf >&2 '%s: "%s" not found.\n' "$SELF" "$i"
+      exit 7
+    fi
+  done
+  sort -u <$TMP:E >$TMP:X
+  sort -u <$TMP:J >$TMP:I
+  # A file that exists but is not included is toplevel.
+  comm -23 $TMP:X $TMP:I >$TMP:T
+  td_dump $TMP:T
+  td_dump $TMP:I
+  # Check include files.
+  while read i
+  do
+    [ -f "$i" ] && continue
+    while read d
+    do
+      [ -f "$d/$i" ] && break
+    done <$TMP:D
+    if [ -z "$d" ]
+    then
+      # See whether this include file can be found in a common location.
+      for d in $LLVM_SRC_ROOT/include \
+               $LLVM_SRC_ROOT/tools/clang/include
+      do
+        if [ -f "$d/$i" ]
+        then
+          printf '%s\n' "$d" >>$TMP:D
+          break
+        fi
+      done
+    fi
+  done <$TMP:I
+  td_dump $TMP:D
+}
+
+# Generate tags for the list of files in $TMP:T.
+td_tag() {
+  # Collect include directories.
+  inc=
+  while read d
+  do
+    inc="${inc}${inc:+ }$(e "-I=$d")"
+  done <$TMP:D
+
+  if [ $OPT_VERBOSE -ne 0 ]
+  then
+    printf >&2 'In "%s",\n' "$PWD"
+  fi
+
+  # Generate tags for each file.
+  n=0
+  while read i
+  do
+    if [ $OPT_VERBOSE -ne 0 ]
+    then
+      printf >&2 '  generating tags from "%s"\n' "$i"
+    fi
+    n=$((n + 1))
+    t=$(printf '%s:A:%05u' "$TMP" $n)
+    eval $TBLGEN --gen-ctags $inc "$i" >$t 2>$TMP:F
+    [ $OPT_NOTBLGENERR -eq 1 ] || cat $TMP:F
+  done <$TMP:T
+
+  # Add existing tags if requested.
+  if [ $OPT_APPEND -eq 1 -a -f "$OPT_TAGSFILE" ]
+  then
+    if [ $OPT_VERBOSE -ne 0 ]
+    then
+      printf >&2 '  and existing tags from "%s"\n' "$OPT_TAGSFILE"
+    fi
+    n=$((n + 1))
+    t=$(printf '%s:A:%05u' "$TMP" $n)
+    sed -e '/^!_TAG_/d' <"$OPT_TAGSFILE" | sort -u >$t
+  fi
+
+  # Merge tags.
+  if [ $n = 1 ]
+  then
+    mv -f "$t" $TMP:M
+  else
+    sort -m -u $TMP:A:* >$TMP:M
+  fi
+
+  # Emit tags.
+  if [ x${OPT_TAGSFILE}x = x-x ]
+  then
+    cat $TMP:M
+  else
+    if [ $OPT_VERBOSE -ne 0 ]
+    then
+      printf >&2 '  into "%s".\n' "$OPT_TAGSFILE"
+    fi
+    mv -f $TMP:M "$OPT_TAGSFILE"
+  fi
+}
+
+# Generate tags for the current directory.
+td_here() {
+  td_prep
+  [ -s $TMP:T ] || return 1
+  td_tag
+}
+
+# Generate tags for the current directory, and report an error if there are
+# no .td files present.
+do_here()
+{
+  if ! td_here
+  then
+    printf >&2 '%s: Nothing to do here.\n' "$SELF"
+    exit 1
+  fi
+}
+
+# Generate tags for all .td files under the current directory.
+do_recurse()
+{
+  td_find "$PWD"
+  td_dirs
+}
+
+# Generate tags for all .td files in LLVM.
+do_all()
+{
+  td_find "$LLVM_SRC_ROOT"
+  td_dirs
+}
+
+# Generate tags for each directory in the worklist $TMP:W.
+td_dirs()
+{
+  while read d
+  do
+    (cd "$d" && td_here)
+  done <$TMP:W
+}
+
+# Find directories containing .td files within the specified directory,
+# and record them in the worklist $TMP:W.
+td_find()
+{
+  find -L "$1" -type f -name '*.td' |
+    sed -e 's:/[^/]*$::' |
+    sort -u >$TMP:W
+  td_dump $TMP:W
+}
+
+# Generate tags for the specified code generator targets, or
+# if there are no arguments, all targets.
+do_targets() {
+  cd $LLVM_SRC_ROOT/lib/Target
+  if [ -z "$*" ]
+  then
+    td_find "$PWD"
+  else
+    # Check that every specified argument is a target directory;
+    # if not, list all target directories.
+    for d
+    do
+      if [ -d "$d" ] && dir_has_td "$d"
+      then
+        printf '%s/%s\n' "$PWD" "$d"
+      else
+        printf >&2 '%s: "%s" is not a target. Targets are:\n' "$SELF" "$d"
+        for d in *
+        do
+          [ -d "$d" ] || continue
+          dir_has_td "$d" && printf >&2 '  %s\n' "$d"
+        done
+        exit 2
+      fi
+    done >$TMP:W
+  fi
+  td_dirs
+}
+
+# Change to the directory at the top of the enclosing LLVM source tree,
+# if possible.
+llvm_src_root() {
+  while [ "$PWD" != / ]
+  do
+    # Use this directory if multiple notable subdirectories are present.
+    [ -d include/llvm -a -d lib/Target ] && return 0
+    cd ..
+  done
+  return 1
+}
+
+# Ensure sort(1) behaves consistently.
+LC_ALL=C
+export LC_ALL
+
+# Globals.
+TBLGEN=llvm-tblgen
+LLVM_SRC_ROOT=
+
+# Command options.
+OPT_TAGSFILE=tags
+OPT_RECIPES=0
+OPT_APPEND=0
+OPT_VERBOSE=0
+OPT_NOTBLGENERR=0
+
+while getopts 'af:hxqvHI:' opt
+do
+  case $opt in
+  a)
+    OPT_APPEND=1
+    ;;
+  f)
+    OPT_TAGSFILE="$OPTARG"
+    ;;
+  x)
+    OPT_RECIPES=1
+    ;;
+  q)
+    OPT_NOTBLGENERR=1
+    ;;
+  v)
+    OPT_VERBOSE=$((OPT_VERBOSE + 1))
+    ;;
+  I)
+    printf '%s\n' "$OPTARG" >>$TMP:D
+    ;;
+  [hH])
+    help
+    exit 0
+    ;;
+  *)
+    usage >&2
+    exit 4
+    ;;
+  esac
+done
+shift $((OPTIND - 1))
+
+# Handle the case where tdtags is a simple ctags(1)-like wrapper for tblgen.
+if [ $OPT_RECIPES -eq 0 ]
+then
+  if [ -z "$*" ]
+  then
+    help >&2
+    exit 5
+  fi
+  for i
+  do
+    printf '%s\n' "$i"
+  done >$TMP:T
+  td_tag
+  exit $?
+fi
+
+# Find the directory at the top of the enclosing LLVM source tree.
+if ! LLVM_SRC_ROOT=$(llvm_src_root && pwd)
+then
+  printf >&2 '%s: Run from within the LLVM source tree.\n' "$SELF"
+  exit 3
+fi
+
+# Select canned actions.
+RECIPE="$1"
+case "$RECIPE" in
+all)
+  shift
+  do_all
+  ;;
+.|cwd|here)
+  shift
+  do_here
+  ;;
+recurse)
+  shift
+  do_recurse
+  ;;
+target)
+  shift
+  do_targets "$@"
+  ;;
+*)
+  if [ -n "$RECIPE" ]
+  then
+    shift
+    printf >&2 '%s: Unknown recipe "-x %s". ' "$SELF" "$RECIPE"
+  fi
+  printf >&2 'Recipes:\n'
+  usage_recipes >&2
+  printf >&2 'Run "%s -H" for help.\n' "$SELF"
+  exit 6
+  ;;
+esac
+
+exit $?
diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl
index d92a767adf32..c896ea839be8 100755
--- a/utils/UpdateCMakeLists.pl
+++ b/utils/UpdateCMakeLists.pl
@@ -68,7 +68,7 @@ sub UpdateCMake {
   while(<IN>) {
     if (!$foundLibrary) {
       print OUT $_;
-      if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_executable\(/) {
+      if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_[^_]+_executable\(/) {
         $foundLibrary = 1;
         EmitCMakeList($dir);
       }
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 6aee8310463d..c056b9742b97 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -77,6 +77,45 @@ rm $SRC_DIR/Makefile || exit 1
 # Now create our own by editing the top-level Makefile, deleting every line marked "Apple-style":
 sed -e '/[Aa]pple-style/d' -e '/include.*GNUmakefile/d' $ORIG_SRC_DIR/Makefile > $SRC_DIR/Makefile || exit 1
 
+SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/.*\.\([0-9]*\).*/\1/'`
+if [ "x$SUBVERSION" != "x$RC_ProjectSourceVersion" ]; then
+    LLVM_SUBMIT_SUBVERSION=`printf "%02d" $SUBVERSION`
+    RC_ProjectSourceVersion=`echo $RC_ProjectSourceVersion | sed -e 's/\..*//'`
+    LLVM_SUBMIT_VERSION=$RC_ProjectSourceVersion
+fi
+if [ "x$LLVM_SUBMIT_SUBVERSION" = "x00" -o "x$LLVM_SUBMIT_SUBVERSION" = "x0" ]; then
+    LLVM_VERSION="$LLVM_SUBMIT_VERSION"
+else
+    LLVM_VERSION="$LLVM_SUBMIT_VERSION-$LLVM_SUBMIT_SUBVERSION"
+fi
+
+# Figure out how many make processes to run.
+SYSCTL=`sysctl -n hw.activecpu`
+# sysctl -n hw.* does not work when invoked via B&I chroot /BuildRoot.
+# Builders can default to 2, since even if they are single processor,
+# nothing else is running on the machine.
+if [ -z "$SYSCTL" ]; then
+    SYSCTL=2
+fi
+JOBS_FLAG="-j $SYSCTL"
+
+COMMON_CONFIGURE_OPTS="\
+  --prefix=$DEST_DIR$DEST_ROOT \
+  --enable-assertions=$LLVM_ASSERTIONS \
+  --enable-optimized=$LLVM_OPTIMIZED \
+  --disable-bindings"
+
+COMMON_MAKEFLAGS="\
+  UNIVERSAL=1 \
+  UNIVERSAL_SDK_PATH=$SDKROOT \
+  NO_RUNTIME_LIBS=1 \
+  DISABLE_EDIS=1 \
+  REQUIRES_RTTI=1 \
+  DEBUG_SYMBOLS=1 \
+  LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
+  LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
+  VERBOSE=1"
+
 # Build the LLVM tree universal.
 mkdir -p $DIR/obj-llvm || exit 1
 cd $DIR/obj-llvm || exit 1
@@ -89,6 +128,7 @@ if [ "$ARM_HOSTED_BUILD" = yes ]; then
   for prog in ar nm ranlib strip lipo ld as ; do
     P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
     T=`xcrun -sdk $SDKROOT -find ${prog}`
+    ln -s $T $DIR/bin/$prog
     echo '#!/bin/sh' > $P || exit 1
     echo 'exec '$T' "$@"' >> $P || exit 1
     chmod a+x $P || exit 1
@@ -97,80 +137,74 @@ if [ "$ARM_HOSTED_BUILD" = yes ]; then
   for prog in clang clang++ ; do
     P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
     T=`xcrun -sdk $SDKROOT -find ${prog}`
+    ln -s $T $DIR/bin/$prog
     echo '#!/bin/sh' > $P || exit 1
     echo 'exec '$T' -arch armv7 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
     chmod a+x $P || exit 1
   done
 
   PATH=$DIR/bin:$PATH
-fi
 
-if [ "$ARM_HOSTED_BUILD" = yes ]; then
-  configure_opts="--enable-targets=arm --host=arm-apple-darwin10 \
-                  --target=arm-apple-darwin10 --build=i686-apple-darwin10"
-elif [ "$IOS_SIM_BUILD" = yes ]; then
-  # Use a non-standard "darwin_sim" host triple to trigger a cross-build.
-  configure_opts="--enable-targets=x86 --host=i686-apple-darwin_sim \
-                  --build=i686-apple-darwin10"
+  unset SDKROOT && \
+  $SRC_DIR/configure $COMMON_CONFIGURE_OPTS \
+    --enable-targets=arm \
+    --host=arm-apple-darwin10 \
+    --target=arm-apple-darwin10 \
+    --build=i686-apple-darwin10 \
+    --program-prefix="" \
+    || exit 1
+
+  if [ -n "$IPHONEOS_DEPLOYMENT_TARGET" ]; then
+    COMMON_MAKEFLAGS="$COMMON_MAKEFLAGS \
+      DEPLOYMENT_TARGET=-mios-version-min=$IPHONEOS_DEPLOYMENT_TARGET"
+  fi
+
+  make $JOBS_FLAG $COMMON_MAKEFLAGS SDKROOT= UNIVERSAL_ARCH="$HOSTS" \
+    CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'"
+  if [ $? != 0 ] ; then
+    echo "error: LLVM 'make' failed!"
+    exit 1
+  fi 
+
 else
-  configure_opts="--enable-targets=arm,x86"
-fi
+# not $ARM_HOSTED_BUILD
+
+  export CC=`xcrun -find clang`
+  export CXX=`xcrun -find clang++`
+
+  if [ "$IOS_SIM_BUILD" = yes ]; then
+    # Use a non-standard "darwin_sim" host triple to trigger a cross-build.
+    configure_opts="--enable-targets=x86 --host=i686-apple-darwin_sim \
+                    --build=i686-apple-darwin10"
+    if [ -n "$IPHONEOS_DEPLOYMENT_TARGET" ]; then
+      COMMON_MAKEFLAGS="$COMMON_MAKEFLAGS \
+        DEPLOYMENT_TARGET=-mios-simulator-version-min=$IPHONEOS_DEPLOYMENT_TARGET"
+    fi
+  else
+    configure_opts="--enable-targets=arm,x86"
+    if [ -n "$MACOSX_DEPLOYMENT_TARGET" ]; then
+      COMMON_MAKEFLAGS="$COMMON_MAKEFLAGS \
+        DEPLOYMENT_TARGET=-mmacosx-version-min=$MACOSX_DEPLOYMENT_TARGET"
+    fi
+  fi
 
-if [ "$ARM_HOSTED_BUILD" != yes ]; then
   if [ $SDKROOT ]; then
     CPPFLAGS="$CPPFLAGS -isysroot $SDKROOT"
   fi
   for host in $HOSTS; do :; done
   CPPFLAGS="$CPPFLAGS -arch $host"
-fi
 
-if [ \! -f Makefile.config ]; then
-  $SRC_DIR/configure --prefix=$DEST_DIR$DEST_ROOT $configure_opts \
-    --enable-assertions=$LLVM_ASSERTIONS \
-    --enable-optimized=$LLVM_OPTIMIZED \
-    --disable-bindings \
+  $SRC_DIR/configure $COMMON_CONFIGURE_OPTS $configure_opts \
+    --program-prefix="" \
     CPPFLAGS="$CPPFLAGS" \
     || exit 1
-fi
-
-SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/.*\.\([0-9]*\).*/\1/'`
-
-if [ "x$SUBVERSION" != "x$RC_ProjectSourceVersion" ]; then
-    LLVM_SUBMIT_SUBVERSION=`printf "%02d" $SUBVERSION`
-    RC_ProjectSourceVersion=`echo $RC_ProjectSourceVersion | sed -e 's/\..*//'`
-    LLVM_SUBMIT_VERSION=$RC_ProjectSourceVersion
-fi
 
-if [ "x$LLVM_SUBMIT_SUBVERSION" = "x00" -o "x$LLVM_SUBMIT_SUBVERSION" = "x0" ]; then
-    LLVM_VERSION="$LLVM_SUBMIT_VERSION"
-else
-    LLVM_VERSION="$LLVM_SUBMIT_VERSION-$LLVM_SUBMIT_SUBVERSION"
-fi
-
-# Figure out how many make processes to run.
-SYSCTL=`sysctl -n hw.activecpu`
-# sysctl -n hw.* does not work when invoked via B&I chroot /BuildRoot.
-# Builders can default to 2, since even if they are single processor,
-# nothing else is running on the machine.
-if [ -z "$SYSCTL" ]; then
-    SYSCTL=2
-fi
-JOBS_FLAG="-j $SYSCTL"
-
-make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
-    UNIVERSAL_SDK_PATH=$SDKROOT \
-    NO_RUNTIME_LIBS=1 \
-    DISABLE_EDIS=1 \
-    REQUIRES_RTTI=1 \
-    DEBUG_SYMBOLS=1 \
-    LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
-    LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
-    CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'" \
-    VERBOSE=1
-
-if [ $? != 0 ] ; then
+  make $JOBS_FLAG $COMMON_MAKEFLAGS UNIVERSAL_ARCH="$HOSTS" \
+    CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'"
+  if [ $? != 0 ] ; then
     echo "error: LLVM 'make' failed!"
     exit 1
+  fi 
 fi 
 
 ################################################################################
@@ -185,14 +219,7 @@ rm -rf * || exit 1
 cd $DIR/obj-llvm || exit 1
 
 # Install the tree into the destination directory.
-make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
-    NO_RUNTIME_LIBS=1 \
-    DISABLE_EDIS=1 \
-    DEBUG_SYMBOLS=1 \
-    LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
-    LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
-    OPTIMIZE_OPTION='-O3' VERBOSE=1 install
-
+make $JOBS_FLAG $COMMON_MAKEFLAGS UNIVERSAL_ARCH="$HOSTS" install
 if ! test $? == 0 ; then
     echo "error: LLVM 'make install' failed!"
     exit 1
@@ -207,6 +234,16 @@ RC_ProjectSourceSubversion=`printf "%d" $LLVM_MINOR_VERSION`
 echo "#define LLVM_VERSION ${RC_ProjectSourceVersion}" > $DEST_DIR$DEST_ROOT/include/llvm/Version.h
 echo "#define LLVM_MINOR_VERSION ${RC_ProjectSourceSubversion}" >> $DEST_DIR$DEST_ROOT/include/llvm/Version.h
 
+# Run unifdef to preprocess the installed headers to reflect whether this
+# was a debug or release build.
+for file in `find $DEST_DIR$DEST_ROOT/include -type f -print`; do
+  if [ "$LLVM_ASSERTIONS" = yes ]; then
+    unifdef -UNDEBUG -D_DEBUG -o $file $file
+  else
+    unifdef -DNDEBUG -U_DEBUG -ULLVM_ENABLE_DUMP -o $file $file
+  fi
+done
+
 # Find the right version of strip to use.
 STRIP=strip
 if [ -n "$SDKROOT" ]; then
@@ -263,9 +300,10 @@ cd $SYM_DIR || exit 1
 rm -rf * || exit 1
 
 # Generate .dSYM files
+DSYMUTIL=`xcrun -find dsymutil`
 find $DEST_DIR -perm -0111 -type f \
     ! \( -name '*.la' -o -name gccas -o -name gccld -o -name llvm-config -o -name '*.a' \) \
-    -print | xargs -n 1 -P ${SYSCTL} dsymutil
+    -print | xargs -n 1 -P ${SYSCTL} ${DSYMUTIL}
 
 # Save .dSYM files and .a archives
 cd $DEST_DIR || exit 1
diff --git a/utils/clang-parse-diagnostics-file b/utils/clang-parse-diagnostics-file
index b8ea8eae310f..59b13f306505 100755
--- a/utils/clang-parse-diagnostics-file
+++ b/utils/clang-parse-diagnostics-file
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import os
 import plistlib
 
 def main():
@@ -59,20 +60,37 @@ Utility for dumping Clang-style logged diagnostics.\
 </array>
 </plist>""" % data
 
-    # Load the diagnostics.
+    # Get the list of files and diagnostics to report.
+    to_report = []
     diags = plistlib.readPlistFromString(data)
+    for file_diags in diags:
+        file = file_diags.get('main-file')
+
+        # Ignore diagnostics for 'conftest.c', which is the file autoconf uses
+        # for its tests (which frequently will have warnings).
+        if os.path.basename(file) == 'conftest.c':
+            continue
+
+        # Get the diagnostics for the selected levels.
+        selected_diags = [d
+                          for d in file_diags.get('diagnostics', ())
+                          if levels[d.get('level')] or opts.all]
+        if selected_diags:
+            to_report.append((file, selected_diags))
 
-    # Print out the diagnostics.
+    # If there are no diagnostics to report, show nothing.
+    if not to_report:
+        return
+
+    # Otherwise, print out the diagnostics.
     print
     print "**** BUILD DIAGNOSTICS ****"
-    for i, file_diags in enumerate(diags):
-        file = file_diags.get('main-file')
+    for file,selected_diags in to_report:
         print "*** %s ***" % file
-        for d in file_diags.get('diagnostics', ()):
-            if levels[d.get('level')] or opts.all:
-                print " %s:%s:%s: %s: %s" % (
-                    d.get('filename'), d.get('line'), d.get('column'),
-                    d.get('level'), d.get('message'))
+        for d in selected_diags:
+            print " %s:%s:%s: %s: %s" % (
+                d.get('filename'), d.get('line'), d.get('column'),
+                d.get('level'), d.get('message'))
 
 if __name__ == "__main__":
     main()
diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el
index 3780624b5a43..25d974218613 100644
--- a/utils/emacs/llvm-mode.el
+++ b/utils/emacs/llvm-mode.el
@@ -5,7 +5,6 @@
 ;; Create mode-specific tables.
 (defvar llvm-mode-syntax-table nil
   "Syntax table used while in LLVM mode.")
-
 (defvar llvm-font-lock-keywords
   (list
    ;; Comments
@@ -31,12 +30,14 @@
                     "define" "global" "constant" "const" "internal" "linkonce" "linkonce_odr"
                     "weak" "weak_odr" "appending" "uninitialized" "implementation" "..."
                     "null" "undef" "to" "except" "not" "target" "endian" "little" "big"
-                    "pointersize" "deplibs" "volatile" "fastcc" "coldcc" "cc") 'words) . font-lock-keyword-face)
+                    "pointersize" "volatile" "fastcc" "coldcc" "cc") 'words) . font-lock-keyword-face)
    ;; Arithmetic and Logical Operators
    `(,(regexp-opt '("add" "sub" "mul" "div" "rem" "and" "or" "xor"
                     "setne" "seteq" "setlt" "setgt" "setle" "setge") 'words) . font-lock-keyword-face)
+   ;; Floating-point operators
+   `(,(regexp-opt '("fadd" "fsub" "fmul" "fdiv" "frem") 'words) . font-lock-keyword-face)
    ;; Special instructions
-   `(,(regexp-opt '("phi" "tail" "call" "cast" "select" "to" "shl" "shr" "vaarg" "vanext") 'words) . font-lock-keyword-face)
+   `(,(regexp-opt '("phi" "tail" "call" "cast" "select" "to" "shl" "shr" "fcmp" "icmp" "vaarg" "vanext") 'words) . font-lock-keyword-face)
    ;; Control instructions
    `(,(regexp-opt '("ret" "br" "switch" "invoke" "unwind" "unreachable") 'words) . font-lock-keyword-face)
    ;; Memory operators
@@ -111,7 +112,7 @@
   (interactive)
   (kill-all-local-variables)
   (use-local-map llvm-mode-map)         ; Provides the local keymap.
-  (setq major-mode 'llvm-mode)          
+  (setq major-mode 'llvm-mode)
 
   (make-local-variable 'font-lock-defaults)
   (setq major-mode 'llvm-mode           ; This is how describe-mode
diff --git a/utils/git/find-rev b/utils/git/find-rev
index a6161db18985..059ca0b78de8 100755
--- a/utils/git/find-rev
+++ b/utils/git/find-rev
@@ -5,9 +5,9 @@ import os, sys, subprocess
 def main():
     from optparse import OptionParser, OptionGroup
     parser = OptionParser("usage: %prog [options] <repo> <revision>")
-    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
-                      help="Dump the contents of sections",
-                      action="store_true", default=False)    
+    parser.add_option("", "--branch", dest="branch",
+                      help="Ref for the branch to search [%default]",
+                      action="store", default="git-svn")    
     (opts, args) = parser.parse_args()
 
     if len(args) != 2:
@@ -21,7 +21,7 @@ def main():
         parser.error("invalid revision argument (not an integer)")
 
     os.chdir(repo)
-    p = subprocess.Popen(['git', 'rev-list', 'git-svn', '--pretty'],
+    p = subprocess.Popen(['git', 'rev-list', opts.branch, '--pretty'],
                          stdout=subprocess.PIPE)
 
     bestRev = bestCommit = None
diff --git a/utils/kate/llvm.xml b/utils/kate/llvm.xml
index 074fa16cb884..1778cfce384e 100644
--- a/utils/kate/llvm.xml
+++ b/utils/kate/llvm.xml
@@ -90,6 +90,7 @@
       <item> readonly </item>
       <item> ssp </item>
       <item> sspreq </item>
+      <item> sspstrong </item>
     </list>
     <list name="types">
       <item> float </item>
diff --git a/utils/lit/MANIFEST.in b/utils/lit/MANIFEST.in
new file mode 100644
index 000000000000..6491a02dd135
--- /dev/null
+++ b/utils/lit/MANIFEST.in
@@ -0,0 +1,7 @@
+include TODO lit.py
+recursive-include tests *
+global-exclude *pyc
+global-exclude *~
+prune tests/Output
+prune tests/*/Output
+prune tests/*/*/Output
diff --git a/utils/lit/TODO b/utils/lit/TODO
index 6d7f7ea529ae..d2ff842f3145 100644
--- a/utils/lit/TODO
+++ b/utils/lit/TODO
@@ -7,3 +7,20 @@
  - Support valgrind in all configs, and LLVM style valgrind.
 
  - Support a timeout / ulimit.
+
+ - Rename 'lit' injected variable for config to be lit_config.
+
+ - Allow import of 'lit' in test suite definitions.
+
+ - Create an explicit test suite object (instead of using the top-level
+   TestingConfig object).
+
+ - Allow 'lit' driver to cooperate with test suites to add options (or at least
+   sanitize accepted params).
+
+ - Consider move to identifying all tests by path-to-test-suite and then path to
+   subtest, and don't use test suite names.
+
+ - Consider move to change workflow to always load suites, then resolve command
+   line arguments.
+
diff --git a/utils/lit/lit/ExampleTests/Clang/lit.cfg b/utils/lit/lit/ExampleTests/Clang/lit.cfg
index 1e1e807f3676..9295bd9ddbb7 100644
--- a/utils/lit/lit/ExampleTests/Clang/lit.cfg
+++ b/utils/lit/lit/ExampleTests/Clang/lit.cfg
@@ -14,7 +14,7 @@ config.test_format = lit.formats.ShTest(execute_external = True)
 # suffixes: A list of file extensions to treat as test files.
 config.suffixes = ['.c', '.cpp', '.m', '.mm']
 
-# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
+# target_triple: Used by ShTest format for XFAIL checks.
 config.target_triple = 'foo'
 
 ###
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/data.txt b/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/data.txt
new file mode 100644
index 000000000000..45b983be36b7
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/data.txt
@@ -0,0 +1 @@
+hi
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp b/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
deleted file mode 100644
index 2bda07a31cfb..000000000000
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
-
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/pct-S.ll b/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/pct-S.ll
new file mode 100644
index 000000000000..3ff363315a32
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/pct-S.ll
@@ -0,0 +1 @@
+; RUN: grep "hi" %S/data.txt
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
index 3fdd63c22459..533c44501ff6 100644
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
@@ -8,11 +8,11 @@ import os
 config.name = 'LLVM'
 
 # testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.TclTest()
+config.test_format = lit.formats.ShTest()
 
 # suffixes: A list of file extensions to treat as test files, this is actually
 # set by on_clone().
-config.suffixes = []
+config.suffixes = [ '.ll' ]
 
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
@@ -64,74 +64,3 @@ if config.test_exec_root is None:
     lit.load_config(config, site_cfg)
     raise SystemExit
 
-###
-
-# Load site data from DejaGNU's site.exp.
-import re
-site_exp = {}
-# FIXME: Implement lit.site.cfg.
-for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
-    m = re.match('set ([^ ]+) "([^"]*)"', line)
-    if m:
-        site_exp[m.group(1)] = m.group(2)
-
-excludes = []
-
-# Provide target_triple for use in XFAIL.
-config.target_triple = site_exp['target_triplet']
-
-# Provide llvm_supports_target for use in local configs.
-targets = set(site_exp["TARGETS_TO_BUILD"].split())
-def llvm_supports_target(name):
-    return name in targets
-
-# Provide on_clone hook for reading 'dg.exp'.
-import os
-simpleLibData = re.compile(r"""load_lib llvm.exp
-
-RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
-                           re.MULTILINE)
-conditionalLibData = re.compile(r"""load_lib llvm.exp
-
-if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
- *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
-\}""", re.MULTILINE)
-def on_clone(parent, cfg, for_path):
-    def addSuffixes(match):
-        if match[0] == '{' and match[-1] == '}':
-            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
-        else:
-            cfg.suffixes = ['.' + match]
-
-    libPath = os.path.join(os.path.dirname(for_path),
-                           'dg.exp')
-    if not os.path.exists(libPath):
-        cfg.unsupported = True
-        return
-
-    # Reset unsupported, in case we inherited it.
-    cfg.unsupported = False
-    lib = open(libPath).read().strip()
-
-    # Check for a simple library.
-    m = simpleLibData.match(lib)
-    if m:
-        addSuffixes(m.group(1))
-        return
-
-    # Check for a conditional test set.
-    m = conditionalLibData.match(lib)
-    if m:
-        funcname,arg,match = m.groups()
-        addSuffixes(match)
-
-        func = globals().get(funcname)
-        if not func:
-            lit.error('unsupported predicate %r' % funcname)
-        elif not func(arg):
-            cfg.unsupported = True
-        return
-    # Otherwise, give up.
-    lit.error('unable to understand %r:\n%s' % (libPath, lib))
-
-config.on_clone = on_clone
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
index 3bfee547b7e3..d45f3ac76205 100644
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
@@ -1,8 +1,5 @@
 # -*- Python -*-
 
-## Autogenerated by Makefile ##
-# Do not edit!
-
 # Preserve some key paths for use by main LLVM test suite config.
 config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
 
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp b/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
deleted file mode 100644
index 4bc58d757990..000000000000
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
+++ /dev/null
@@ -1,10 +0,0 @@
-## these variables are automatically generated by make ##
-# Do not edit here.  If you wish to override these values
-# edit the last section
-set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend"
-set srcroot "/Volumes/Data/ddunbar/llvm"
-set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
-set srcdir "/Volumes/Data/ddunbar/llvm/test"
-set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
-## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
index bdcc35e0938c..94a02d8f8532 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
@@ -1,8 +1,5 @@
 # -*- Python -*-
 
-## Autogenerated by Makefile ##
-# Do not edit!
-
 # Preserve some key paths for use by main LLVM test suite config.
 config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
 
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
deleted file mode 100644
index 4bc58d757990..000000000000
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
+++ /dev/null
@@ -1,10 +0,0 @@
-## these variables are automatically generated by make ##
-# Do not edit here.  If you wish to override these values
-# edit the last section
-set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend"
-set srcroot "/Volumes/Data/ddunbar/llvm"
-set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
-set srcdir "/Volumes/Data/ddunbar/llvm/test"
-set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
-## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
deleted file mode 100644
index 2bda07a31cfb..000000000000
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
-
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
index 3fdd63c22459..533c44501ff6 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
@@ -8,11 +8,11 @@ import os
 config.name = 'LLVM'
 
 # testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.TclTest()
+config.test_format = lit.formats.ShTest()
 
 # suffixes: A list of file extensions to treat as test files, this is actually
 # set by on_clone().
-config.suffixes = []
+config.suffixes = [ '.ll' ]
 
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
@@ -64,74 +64,3 @@ if config.test_exec_root is None:
     lit.load_config(config, site_cfg)
     raise SystemExit
 
-###
-
-# Load site data from DejaGNU's site.exp.
-import re
-site_exp = {}
-# FIXME: Implement lit.site.cfg.
-for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
-    m = re.match('set ([^ ]+) "([^"]*)"', line)
-    if m:
-        site_exp[m.group(1)] = m.group(2)
-
-excludes = []
-
-# Provide target_triple for use in XFAIL.
-config.target_triple = site_exp['target_triplet']
-
-# Provide llvm_supports_target for use in local configs.
-targets = set(site_exp["TARGETS_TO_BUILD"].split())
-def llvm_supports_target(name):
-    return name in targets
-
-# Provide on_clone hook for reading 'dg.exp'.
-import os
-simpleLibData = re.compile(r"""load_lib llvm.exp
-
-RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
-                           re.MULTILINE)
-conditionalLibData = re.compile(r"""load_lib llvm.exp
-
-if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
- *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
-\}""", re.MULTILINE)
-def on_clone(parent, cfg, for_path):
-    def addSuffixes(match):
-        if match[0] == '{' and match[-1] == '}':
-            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
-        else:
-            cfg.suffixes = ['.' + match]
-
-    libPath = os.path.join(os.path.dirname(for_path),
-                           'dg.exp')
-    if not os.path.exists(libPath):
-        cfg.unsupported = True
-        return
-
-    # Reset unsupported, in case we inherited it.
-    cfg.unsupported = False
-    lib = open(libPath).read().strip()
-
-    # Check for a simple library.
-    m = simpleLibData.match(lib)
-    if m:
-        addSuffixes(m.group(1))
-        return
-
-    # Check for a conditional test set.
-    m = conditionalLibData.match(lib)
-    if m:
-        funcname,arg,match = m.groups()
-        addSuffixes(match)
-
-        func = globals().get(funcname)
-        if not func:
-            lit.error('unsupported predicate %r' % funcname)
-        elif not func(arg):
-            cfg.unsupported = True
-        return
-    # Otherwise, give up.
-    lit.error('unable to understand %r:\n%s' % (libPath, lib))
-
-config.on_clone = on_clone
diff --git a/utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg b/utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg
new file mode 100644
index 000000000000..6cc47522b16c
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg
@@ -0,0 +1,23 @@
+# -*- Python -*-
+
+Test = lit.Test
+
+class ManyTests(object):
+    def __init__(self, N=10000):
+        self.N = N
+
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        for i in range(self.N):
+            test_name = 'test-%04d' % (i,)
+            yield Test.Test(testSuite, path_in_suite + (test_name,),
+                            localConfig)
+
+    def execute(self, test, litConfig):
+        # Do a "non-trivial" amount of Python work.
+        sum = 0
+        for i in range(10000):
+            sum += i
+        return Test.PASS,''
+
+config.test_format = ManyTests()
diff --git a/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg b/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg
deleted file mode 100644
index 6a37129acdf1..000000000000
--- a/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- Python -*-
-
-config.test_format = lit.formats.TclTest()
-
-config.suffixes = ['.ll']
diff --git a/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll b/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll
deleted file mode 100644
index 6c55fe8a0b17..000000000000
--- a/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll
+++ /dev/null
@@ -1 +0,0 @@
-; RUN: gcc -### > /dev/null |& grep {gcc version}
diff --git a/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll b/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll
deleted file mode 100644
index 61240ba45941..000000000000
--- a/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: echo 'hi' > %t.1 | echo 'hello' > %t.2
-; RUN: not grep 'hi' %t.1
-; RUN: grep 'hello' %t.2
-
-
-
-
diff --git a/utils/lit/lit/ExampleTests/lit.cfg b/utils/lit/lit/ExampleTests/lit.cfg
index 2629918d9f6d..164daba90373 100644
--- a/utils/lit/lit/ExampleTests/lit.cfg
+++ b/utils/lit/lit/ExampleTests/lit.cfg
@@ -19,8 +19,8 @@ config.test_source_root = None
 # root).
 config.test_exec_root = None
 
-# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
+# target_triple: Used by ShTest format for XFAIL checks.
 config.target_triple = 'foo'
 
-# available_features: Used by ShTest and TclTest formats for REQUIRES checks.
+# available_features: Used by ShTest format for REQUIRES checks.
 config.available_features.add('some-feature-name')
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 0a359a3db8eb..9bcf20b2f11c 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -12,16 +12,15 @@ class LitConfig:
     import Test
 
     # Provide access to built-in formats.
-    import LitFormats as formats
+    import TestFormats as formats
 
     # Provide access to built-in utility functions.
     import Util as util
 
     def __init__(self, progname, path, quiet,
                  useValgrind, valgrindLeakCheck, valgrindArgs,
-                 useTclAsSh,
                  noExecute, ignoreStdErr, debug, isWindows,
-                 params):
+                 params, config_prefix = None):
         # The name of the test runner.
         self.progname = progname
         # The items to add to the PATH environment variable.
@@ -30,7 +29,6 @@ class LitConfig:
         self.useValgrind = bool(useValgrind)
         self.valgrindLeakCheck = bool(valgrindLeakCheck)
         self.valgrindUserArgs = list(valgrindArgs)
-        self.useTclAsSh = bool(useTclAsSh)
         self.noExecute = noExecute
         self.ignoreStdErr = ignoreStdErr
         self.debug = debug
@@ -38,6 +36,12 @@ class LitConfig:
         self.params = dict(params)
         self.bashPath = None
 
+        # Configuration files to look for when discovering test suites.
+        self.config_prefix = config_prefix or 'lit'
+        self.config_name = '%s.cfg' % (self.config_prefix,)
+        self.site_config_name = '%s.site.cfg' % (self.config_prefix,)
+        self.local_config_name = '%s.local.cfg' % (self.config_prefix,)
+
         self.numErrors = 0
         self.numWarnings = 0
 
@@ -80,7 +84,7 @@ class LitConfig:
                     break
 
         if self.bashPath is None:
-            self.warning("Unable to find 'bash', running Tcl tests internally.")
+            self.warning("Unable to find 'bash'.")
             self.bashPath = ''
 
         return self.bashPath
diff --git a/utils/lit/lit/LitFormats.py b/utils/lit/lit/LitFormats.py
deleted file mode 100644
index 931d107109b3..000000000000
--- a/utils/lit/lit/LitFormats.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from TestFormats import FileBasedTest
-from TestFormats import GoogleTest, ShTest, TclTest
-from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
diff --git a/utils/lit/lit/ShUtil.py b/utils/lit/lit/ShUtil.py
index dda622a48a84..50f79103199b 100644
--- a/utils/lit/lit/ShUtil.py
+++ b/utils/lit/lit/ShUtil.py
@@ -35,7 +35,7 @@ class ShLexer:
         if ('|' in chunk or '&' in chunk or 
             '<' in chunk or '>' in chunk or
             "'" in chunk or '"' in chunk or
-            '\\' in chunk):
+            ';' in chunk or '\\' in chunk):
             return None
         
         self.pos = self.pos - 1 + len(chunk)
@@ -48,7 +48,7 @@ class ShLexer:
             str = c
         while self.pos != self.end:
             c = self.look()
-            if c.isspace() or c in "|&":
+            if c.isspace() or c in "|&;":
                 break
             elif c in '><':
                 # This is an annoying case; we treat '2>' as a single token so
@@ -129,7 +129,7 @@ class ShLexer:
         lex_one_token - Lex a single 'sh' token. """
 
         c = self.eat()
-        if c in ';!':
+        if c == ';':
             return (c,)
         if c == '|':
             if self.maybe_eat('|'):
@@ -219,9 +219,6 @@ class ShParser:
 
     def parse_pipeline(self):
         negate = False
-        if self.look() == ('!',):
-            self.lex()
-            negate = True
 
         commands = [self.parse_command()]
         while self.look() == ('|',):
@@ -253,9 +250,9 @@ class TestShLexer(unittest.TestCase):
         return list(ShLexer(str, *args, **kwargs).lex())
 
     def test_basic(self):
-        self.assertEqual(self.lex('a|b>c&d<e'),
+        self.assertEqual(self.lex('a|b>c&d<e;f'),
                          ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', 
-                          ('<',), 'e'])
+                          ('<',), 'e', (';',), 'f'])
 
     def test_redirection_tokens(self):
         self.assertEqual(self.lex('a2>c'),
@@ -317,10 +314,6 @@ class TestShParse(unittest.TestCase):
                                    Command(['c'], [])],
                                   False))
 
-        self.assertEqual(self.parse('! a'),
-                         Pipeline([Command(['a'], [])],
-                                  True))
-
     def test_list(self):        
         self.assertEqual(self.parse('a ; b'),
                          Seq(Pipeline([Command(['a'], [])], False),
@@ -349,5 +342,10 @@ class TestShParse(unittest.TestCase):
                              '||',
                              Pipeline([Command(['c'], [])], False)))
 
+        self.assertEqual(self.parse('a; b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             ';',
+                             Pipeline([Command(['b'], [])], False)))
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/utils/lit/lit/TclUtil.py b/utils/lit/lit/TclUtil.py
deleted file mode 100644
index 4a3f34508d64..000000000000
--- a/utils/lit/lit/TclUtil.py
+++ /dev/null
@@ -1,322 +0,0 @@
-import itertools
-
-from ShCommands import Command, Pipeline
-
-def tcl_preprocess(data):
-    # Tcl has a preprocessing step to replace escaped newlines.
-    i = data.find('\\\n')
-    if i == -1:
-        return data
-
-    # Replace '\\\n' and subsequent whitespace by a single space.
-    n = len(data)
-    str = data[:i]
-    i += 2
-    while i < n and data[i] in ' \t':
-        i += 1
-    return str + ' ' + data[i:]
-
-class TclLexer:
-    """TclLexer - Lex a string into "words", following the Tcl syntax."""
-
-    def __init__(self, data):
-        self.data = tcl_preprocess(data)
-        self.pos = 0
-        self.end = len(self.data)
-
-    def at_end(self):
-        return self.pos == self.end
-
-    def eat(self):
-        c = self.data[self.pos]
-        self.pos += 1
-        return c
-
-    def look(self):
-        return self.data[self.pos]
-
-    def maybe_eat(self, c):
-        """
-        maybe_eat(c) - Consume the character c if it is the next character,
-        returning True if a character was consumed. """
-        if self.data[self.pos] == c:
-            self.pos += 1
-            return True
-        return False
-
-    def escape(self, c):
-        if c == 'a':
-            return '\x07'
-        elif c == 'b':
-            return '\x08'
-        elif c == 'f':
-            return '\x0c'
-        elif c == 'n':
-            return '\n'
-        elif c == 'r':
-            return '\r'
-        elif c == 't':
-            return '\t'
-        elif c == 'v':
-            return '\x0b'
-        elif c in 'uxo':
-            raise ValueError,'Invalid quoted character %r' % c
-        else:
-            return c
-        
-    def lex_braced(self):
-        # Lex until whitespace or end of string, the opening brace has already
-        # been consumed.
-
-        str = ''        
-        while 1:
-            if self.at_end():
-                raise ValueError,"Unterminated '{' quoted word"
-            
-            c = self.eat()
-            if c == '}':
-                break
-            elif c == '{':
-                str += '{' + self.lex_braced() + '}'
-            elif c == '\\' and self.look() in '{}':
-                str += self.eat()
-            else:
-                str += c
-
-        return str
-
-    def lex_quoted(self):
-        str = ''
-
-        while 1:
-            if self.at_end():
-                raise ValueError,"Unterminated '\"' quoted word"
-            
-            c = self.eat()
-            if c == '"':
-                break
-            elif c == '\\':
-                if self.at_end():
-                    raise ValueError,'Missing quoted character'
-
-                str += self.escape(self.eat())
-            else:
-                str += c
-
-        return str
-
-    def lex_unquoted(self, process_all=False):
-        # Lex until whitespace or end of string.
-        str = ''
-        while not self.at_end():
-            if not process_all:
-                if self.look().isspace() or self.look() == ';':
-                    break
-
-            c = self.eat()
-            if c == '\\':
-                if self.at_end():
-                    raise ValueError,'Missing quoted character'
-
-                str += self.escape(self.eat())
-            elif c == '[':
-                raise NotImplementedError, ('Command substitution is '
-                                            'not supported')
-            elif c == '$' and not self.at_end() and (self.look().isalpha() or
-                                                     self.look() == '{'):
-                raise NotImplementedError, ('Variable substitution is '
-                                            'not supported')
-            else:
-                str += c
-
-        return str
-
-    def lex_one_token(self):
-        if self.maybe_eat('"'):
-            return self.lex_quoted()
-        elif self.maybe_eat('{'):
-            # Check for argument substitution.
-            if not self.maybe_eat('*'):
-                return self.lex_braced()
-
-            if not self.maybe_eat('}'):
-                    return '*' + self.lex_braced()
-                
-            if self.at_end() or self.look().isspace():
-                return '*'
-
-            raise NotImplementedError, "Argument substitution is unsupported"
-        else:
-            return self.lex_unquoted()
-
-    def lex(self):
-        while not self.at_end():
-            c = self.look()
-            if c in ' \t':
-                self.eat()
-            elif c in ';\n':
-                self.eat()
-                yield (';',)
-            else:
-                yield self.lex_one_token()
-
-class TclExecCommand:
-    kRedirectPrefixes1 = ('<', '>')
-    kRedirectPrefixes2 = ('<@', '<<', '2>', '>&', '>>', '>@')
-    kRedirectPrefixes3 = ('2>@', '2>>', '>>&', '>&@')
-    kRedirectPrefixes4 = ('2>@1',)
-
-    def __init__(self, args):
-        self.args = iter(args)
-
-    def lex(self):
-        try:
-            return self.args.next()
-        except StopIteration:
-            return None
-
-    def look(self):
-        next = self.lex()
-        if next is not None:
-            self.args = itertools.chain([next], self.args)
-        return next
-
-    def parse_redirect(self, tok, length):
-        if len(tok) == length:
-            arg = self.lex()
-            if arg is None:
-                raise ValueError,'Missing argument to %r redirection' % tok
-        else:
-            tok,arg = tok[:length],tok[length:]
-
-        if tok[0] == '2':
-            op = (tok[1:],2)
-        else:
-            op = (tok,)
-        return (op, arg)
-
-    def parse_pipeline(self):
-        if self.look() is None:
-            raise ValueError,"Expected at least one argument to exec"
-
-        commands = [Command([],[])]
-        while 1:
-            arg = self.lex()
-            if arg is None:
-                break
-            elif arg == '|':
-                commands.append(Command([],[]))
-            elif arg == '|&':
-                # Write this as a redirect of stderr; it must come first because
-                # stdout may have already been redirected.
-                commands[-1].redirects.insert(0, (('>&',2),'1'))
-                commands.append(Command([],[]))
-            elif arg[:4] in TclExecCommand.kRedirectPrefixes4:
-                commands[-1].redirects.append(self.parse_redirect(arg, 4))
-            elif arg[:3] in TclExecCommand.kRedirectPrefixes3:
-                commands[-1].redirects.append(self.parse_redirect(arg, 3))
-            elif arg[:2] in TclExecCommand.kRedirectPrefixes2:
-                commands[-1].redirects.append(self.parse_redirect(arg, 2))
-            elif arg[:1] in TclExecCommand.kRedirectPrefixes1:
-                commands[-1].redirects.append(self.parse_redirect(arg, 1))
-            else:
-                commands[-1].args.append(arg)
-
-        return Pipeline(commands, False, pipe_err=True)
-
-    def parse(self):
-        ignoreStderr = False
-        keepNewline = False
-
-        # Parse arguments.
-        while 1:
-            next = self.look()
-            if not isinstance(next, str) or next[0] != '-':
-                break
-
-            if next == '--':
-                self.lex()
-                break
-            elif next == '-ignorestderr':
-                ignoreStderr = True
-            elif next == '-keepnewline':
-                keepNewline = True
-            else:
-                raise ValueError,"Invalid exec argument %r" % next
-
-        return (ignoreStderr, keepNewline, self.parse_pipeline())
-
-###
-
-import unittest
-
-class TestTclLexer(unittest.TestCase):
-    def lex(self, str, *args, **kwargs):
-        return list(TclLexer(str, *args, **kwargs).lex())
-
-    def test_preprocess(self):
-        self.assertEqual(tcl_preprocess('a b'), 'a b')
-        self.assertEqual(tcl_preprocess('a\\\nb c'), 'a b c')
-
-    def test_unquoted(self):
-        self.assertEqual(self.lex('a b c'),
-                         ['a', 'b', 'c'])
-        self.assertEqual(self.lex(r'a\nb\tc\ '),
-                         ['a\nb\tc '])
-        self.assertEqual(self.lex(r'a \\\$b c $\\'),
-                         ['a', r'\$b', 'c', '$\\'])
-
-    def test_braced(self):
-        self.assertEqual(self.lex('a {b c} {}'),
-                         ['a', 'b c', ''])
-        self.assertEqual(self.lex(r'a {b {c\n}}'),
-                         ['a', 'b {c\\n}'])
-        self.assertEqual(self.lex(r'a {b\{}'),
-                         ['a', 'b{'])
-        self.assertEqual(self.lex(r'{*}'), ['*'])
-        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
-        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
-        self.assertEqual(self.lex('{a\\\n   b}'),
-                         ['a b'])
-
-    def test_quoted(self):
-        self.assertEqual(self.lex('a "b c"'),
-                         ['a', 'b c'])
-
-    def test_terminators(self):
-        self.assertEqual(self.lex('a\nb'),
-                         ['a', (';',), 'b'])
-        self.assertEqual(self.lex('a;b'),
-                         ['a', (';',), 'b'])
-        self.assertEqual(self.lex('a   ;   b'),
-                         ['a', (';',), 'b'])
-
-class TestTclExecCommand(unittest.TestCase):
-    def parse(self, str):
-        return TclExecCommand(list(TclLexer(str).lex())).parse()
-
-    def test_basic(self):
-        self.assertEqual(self.parse('echo hello'),
-                         (False, False,
-                          Pipeline([Command(['echo', 'hello'], [])],
-                                   False, True)))
-        self.assertEqual(self.parse('echo hello | grep hello'),
-                         (False, False,
-                          Pipeline([Command(['echo', 'hello'], []),
-                                    Command(['grep', 'hello'], [])],
-                                   False, True)))
-
-    def test_redirect(self):
-        self.assertEqual(self.parse('echo hello > a >b >>c 2> d |& e'),
-                         (False, False,
-                          Pipeline([Command(['echo', 'hello'],
-                                            [(('>&',2),'1'),
-                                             (('>',),'a'),
-                                             (('>',),'b'),
-                                             (('>>',),'c'),
-                                             (('>',2),'d')]),
-                                    Command(['e'], [])],
-                                   False, True)))
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/utils/lit/lit/Test.py b/utils/lit/lit/Test.py
index db2e0324651e..9471e3a98bf5 100644
--- a/utils/lit/lit/Test.py
+++ b/utils/lit/lit/Test.py
@@ -7,6 +7,10 @@ class TestResult:
         self.name = name
         self.isFailure = isFailure
 
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.isFailure))
+
 PASS        = TestResult('PASS', False)
 XFAIL       = TestResult('XFAIL', False)
 FAIL        = TestResult('FAIL', True)
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index d1c0558b5f37..26541f183bf8 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -54,28 +54,36 @@ class GoogleTest(object):
             else:
                 yield ''.join(nested_tests) + ln
 
+    def getTestsInExecutable(self, testSuite, path_in_suite, execpath,
+                             litConfig, localConfig):
+        if not execpath.endswith(self.test_suffix):
+            return
+        (dirname, basename) = os.path.split(execpath)
+        # Discover the tests in this executable.
+        for testname in self.getGTestTests(execpath, litConfig, localConfig):
+            testPath = path_in_suite + (dirname, basename, testname)
+            yield Test.Test(testSuite, testPath, localConfig)
+    
     def getTestsInDirectory(self, testSuite, path_in_suite,
                             litConfig, localConfig):
         source_path = testSuite.getSourcePath(path_in_suite)
         for filename in os.listdir(source_path):
-            # Check for the one subdirectory (build directory) tests will be in.
-            if not '.' in self.test_sub_dir:
+            filepath = os.path.join(source_path, filename)
+            if os.path.isdir(filepath):
+                # Iterate over executables in a directory.
                 if not os.path.normcase(filename) in self.test_sub_dir:
                     continue
-
-            filepath = os.path.join(source_path, filename)
-            if not os.path.isdir(filepath):
-                continue
-
-            for subfilename in os.listdir(filepath):
-                if subfilename.endswith(self.test_suffix):
+                for subfilename in os.listdir(filepath):
                     execpath = os.path.join(filepath, subfilename)
-
-                    # Discover the tests in this executable.
-                    for name in self.getGTestTests(execpath, litConfig,
-                                                   localConfig):
-                        testPath = path_in_suite + (filename, subfilename, name)
-                        yield Test.Test(testSuite, testPath, localConfig)
+                    for test in self.getTestsInExecutable(
+                            testSuite, path_in_suite, execpath,
+                            litConfig, localConfig):
+                      yield test
+            elif ('.' in self.test_sub_dir):
+                for test in self.getTestsInExecutable(
+                        testSuite, path_in_suite, filepath,
+                        litConfig, localConfig):
+                    yield test
 
     def execute(self, test, litConfig):
         testPath,testName = os.path.split(test.getSourcePath())
@@ -89,6 +97,9 @@ class GoogleTest(object):
         if litConfig.useValgrind:
             cmd = litConfig.valgrindArgs + cmd
 
+        if litConfig.noExecute:
+            return Test.PASS, ''
+
         out, err, exitCode = TestRunner.executeCommand(
             cmd, env=test.config.environment)
 
@@ -124,14 +135,6 @@ class ShTest(FileBasedTest):
         return TestRunner.executeShTest(test, litConfig,
                                         self.execute_external)
 
-class TclTest(FileBasedTest):
-    def __init__(self, ignoreStdErr=False):
-        self.ignoreStdErr = ignoreStdErr
-        
-    def execute(self, test, litConfig):
-        litConfig.ignoreStdErr = self.ignoreStdErr
-        return TestRunner.executeTclTest(test, litConfig)
-
 ###
 
 import re
@@ -221,12 +224,3 @@ class OneCommandPerFileTest:
         report += """Output:\n--\n%s--""" % diags
 
         return Test.FAIL, report
-
-class SyntaxCheckTest(OneCommandPerFileTest):
-    def __init__(self, compiler, dir, extra_cxx_args=[], *args, **kwargs):
-        cmd = [compiler, '-x', 'c++', '-fsyntax-only'] + extra_cxx_args
-        OneCommandPerFileTest.__init__(self, cmd, dir,
-                                       useTempInput=1, *args, **kwargs)
-
-    def createTempInput(self, tmp, test):
-        print >>tmp, '#include "%s"' % test.source_path
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 0c1911ed3560..84176996a8c8 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -49,13 +49,14 @@ def executeShCmd(cmd, cfg, cwd, results):
             return executeShCmd(cmd.rhs, cfg, cwd, results)
 
         if cmd.op == '&':
-            raise NotImplementedError,"unsupported test command: '&'"
+            raise InternalShellError(cmd,"unsupported shell operator: '&'")
 
         if cmd.op == '||':
             res = executeShCmd(cmd.lhs, cfg, cwd, results)
             if res != 0:
                 res = executeShCmd(cmd.rhs, cfg, cwd, results)
             return res
+
         if cmd.op == '&&':
             res = executeShCmd(cmd.lhs, cfg, cwd, results)
             if res is None:
@@ -77,7 +78,7 @@ def executeShCmd(cmd, cfg, cwd, results):
     # output. This is null until we have seen some output using
     # stderr.
     for i,j in enumerate(cmd.commands):
-        # Apply the redirections, we use (N,) as a sentinal to indicate stdin,
+        # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
         # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
         # from a file are represented with a list [file, mode, file-object]
         # where file-object is initially None.
@@ -98,7 +99,7 @@ def executeShCmd(cmd, cfg, cwd, results):
             elif r[0] == ('<',):
                 redirects[0] = [r[1], 'r', None]
             else:
-                raise NotImplementedError,"Unsupported redirect: %r" % (r,)
+                raise InternalShellError(j,"Unsupported redirect: %r" % (r,))
 
         # Map from the final redirections to something subprocess can handle.
         final_redirects = []
@@ -107,14 +108,14 @@ def executeShCmd(cmd, cfg, cwd, results):
                 result = input
             elif r == (1,):
                 if index == 0:
-                    raise NotImplementedError,"Unsupported redirect for stdin"
+                    raise InternalShellError(j,"Unsupported redirect for stdin")
                 elif index == 1:
                     result = subprocess.PIPE
                 else:
                     result = subprocess.STDOUT
             elif r == (2,):
                 if index != 2:
-                    raise NotImplementedError,"Unsupported redirect on stdout"
+                    raise InternalShellError(j,"Unsupported redirect on stdout")
                 result = subprocess.PIPE
             else:
                 if r[2] is None:
@@ -241,98 +242,26 @@ def executeShCmd(cmd, cfg, cwd, results):
     return exitCode
 
 def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
-    ln = ' &&\n'.join(commands)
-    try:
-        cmd = ShUtil.ShParser(ln, litConfig.isWindows).parse()
-    except:
-        return (Test.FAIL, "shell parser error on: %r" % ln)
-
-    results = []
-    try:
-        exitCode = executeShCmd(cmd, test.config, cwd, results)
-    except InternalShellError,e:
-        out = ''
-        err = e.message
-        exitCode = 255
-
-    out = err = ''
-    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
-        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
-        out += 'Command %d Result: %r\n' % (i, res)
-        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
-        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
-
-    return out, err, exitCode
-
-def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
-    import TclUtil
     cmds = []
     for ln in commands:
-        # Given the unfortunate way LLVM's test are written, the line gets
-        # backslash substitution done twice.
-        ln = TclUtil.TclLexer(ln).lex_unquoted(process_all = True)
-
         try:
-            tokens = list(TclUtil.TclLexer(ln).lex())
+            cmds.append(ShUtil.ShParser(ln, litConfig.isWindows).parse())
         except:
-            return (Test.FAIL, "Tcl lexer error on: %r" % ln)
-
-        # Validate there are no control tokens.
-        for t in tokens:
-            if not isinstance(t, str):
-                return (Test.FAIL,
-                        "Invalid test line: %r containing %r" % (ln, t))
-
-        try:
-            cmds.append(TclUtil.TclExecCommand(tokens).parse_pipeline())
-        except:
-            return (Test.FAIL, "Tcl 'exec' parse error on: %r" % ln)
-
-    if litConfig.useValgrind:
-        for pipeline in cmds:
-            if pipeline.commands:
-                # Only valgrind the first command in each pipeline, to avoid
-                # valgrinding things like grep, not, and FileCheck.
-                cmd = pipeline.commands[0]
-                cmd.args = litConfig.valgrindArgs + cmd.args
+            return (Test.FAIL, "shell parser error on: %r" % ln)
 
     cmd = cmds[0]
     for c in cmds[1:]:
         cmd = ShUtil.Seq(cmd, '&&', c)
 
-    # FIXME: This is lame, we shouldn't need bash. See PR5240.
-    bashPath = litConfig.getBashPath()
-    if litConfig.useTclAsSh and bashPath:
-        script = tmpBase + '.script'
-
-        # Write script file
-        f = open(script,'w')
-        print >>f, 'set -o pipefail'
-        cmd.toShell(f, pipefail = True)
-        f.close()
-
-        if 0:
-            print >>sys.stdout, cmd
-            print >>sys.stdout, open(script).read()
-            print >>sys.stdout
-            return '', '', 0
-
-        command = [litConfig.getBashPath(), script]
-        out,err,exitCode = executeCommand(command, cwd=cwd,
-                                          env=test.config.environment)
-
-        return out,err,exitCode
-    else:
-        results = []
-        try:
-            exitCode = executeShCmd(cmd, test.config, cwd, results)
-        except InternalShellError,e:
-            results.append((e.command, '', e.message + '\n', 255))
-            exitCode = 255
+    results = []
+    try:
+        exitCode = executeShCmd(cmd, test.config, cwd, results)
+    except InternalShellError,e:
+        exitCode = 127
+        results.append((e.command, '', e.message, exitCode))
 
     out = err = ''
-
-    for i,(cmd, cmd_out, cmd_err, res) in enumerate(results):
+    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
         out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
         out += 'Command %d Result: %r\n' % (i, res)
         out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
@@ -348,11 +277,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
         script += '.bat'
 
     # Write script file
-    f = open(script,'w')
+    mode = 'w'
+    if litConfig.isWindows and not isWin32CMDEXE:
+      mode += 'b'  # Avoid CRLFs when writing bash scripts.
+    f = open(script, mode)
     if isWin32CMDEXE:
         f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
     else:
-        f.write(' &&\n'.join(commands))
+        f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
     f.write('\n')
     f.close()
 
@@ -424,15 +356,15 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
                           ('%{pathsep}', os.pathsep),
                           ('%t', tmpBase + '.tmp'),
                           ('%T', tmpDir),
-                          # FIXME: Remove this once we kill DejaGNU.
-                          ('%abs_tmp', tmpBase + '.tmp'),
                           ('#_MARKER_#', '%')])
 
     # Collect the test lines from the script.
     script = []
     xfails = []
     requires = []
+    line_number = 0
     for ln in open(sourcepath):
+        line_number += 1
         if 'RUN:' in ln:
             # Isolate the command to run.
             index = ln.index('RUN:')
@@ -441,6 +373,15 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
             # Trim trailing whitespace.
             ln = ln.rstrip()
 
+            # Substitute line number expressions
+            ln = re.sub('%\(line\)', str(line_number), ln)
+            def replace_line_number(match):
+                if match.group(1) == '+':
+                    return str(line_number + int(match.group(2)))
+                if match.group(1) == '-':
+                    return str(line_number - int(match.group(2)))
+            ln = re.sub('%\(line *([\+-]) *(\d+)\)', replace_line_number, ln)
+
             # Collapse lines with trailing '\\'.
             if script and script[-1][-1] == '\\':
                 script[-1] = script[-1][:-1] + ln
@@ -490,17 +431,14 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
     isXFail = isExpectedFail(test, xfails)
     return script,isXFail,tmpBase,execdir
 
-def formatTestOutput(status, out, err, exitCode, failDueToStderr, script):
+def formatTestOutput(status, out, err, exitCode, script):
     output = StringIO.StringIO()
     print >>output, "Script:"
     print >>output, "--"
     print >>output, '\n'.join(script)
     print >>output, "--"
     print >>output, "Exit Code: %r" % exitCode,
-    if failDueToStderr:
-        print >>output, "(but there was output on stderr)"
-    else:
-        print >>output
+    print >>output
     if out:
         print >>output, "Command Output (stdout):"
         print >>output, "--"
@@ -513,53 +451,6 @@ def formatTestOutput(status, out, err, exitCode, failDueToStderr, script):
         print >>output, "--"
     return (status, output.getvalue())
 
-def executeTclTest(test, litConfig):
-    if test.config.unsupported:
-        return (Test.UNSUPPORTED, 'Test is unsupported')
-
-    # Parse the test script, normalizing slashes in substitutions on Windows
-    # (since otherwise Tcl style lexing will treat them as escapes).
-    res = parseIntegratedTestScript(test, normalize_slashes=kIsWindows)
-    if len(res) == 2:
-        return res
-
-    script, isXFail, tmpBase, execdir = res
-
-    if litConfig.noExecute:
-        return (Test.PASS, '')
-
-    # Create the output directory if it does not already exist.
-    Util.mkdir_p(os.path.dirname(tmpBase))
-
-    res = executeTclScriptInternal(test, litConfig, tmpBase, script, execdir)
-    if len(res) == 2:
-        return res
-
-    # Test for failure. In addition to the exit code, Tcl commands are
-    # considered to fail if there is any standard error output.
-    out,err,exitCode = res
-    if isXFail:
-        ok = exitCode != 0 or err and not litConfig.ignoreStdErr
-        if ok:
-            status = Test.XFAIL
-        else:
-            status = Test.XPASS
-    else:
-        ok = exitCode == 0 and (not err or litConfig.ignoreStdErr)
-        if ok:
-            status = Test.PASS
-        else:
-            status = Test.FAIL
-
-    if ok:
-        return (status,'')
-
-    # Set a flag for formatTestOutput so it can explain why the test was
-    # considered to have failed, despite having an exit code of 0.
-    failDueToStderr = exitCode == 0 and err and not litConfig.ignoreStdErr
-
-    return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
-
 def executeShTest(test, litConfig, useExternalSh,
                   extra_substitutions=[]):
     if test.config.unsupported:
@@ -601,7 +492,4 @@ def executeShTest(test, litConfig, useExternalSh,
     if ok:
         return (status,'')
 
-    # Sh tests are not considered to fail just from stderr output.
-    failDueToStderr = False
-
-    return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
+    return formatTestOutput(status, out, err, exitCode, script)
diff --git a/utils/lit/lit/__init__.py b/utils/lit/lit/__init__.py
index f3fbb1cd8276..3e61bbd770c8 100644
--- a/utils/lit/lit/__init__.py
+++ b/utils/lit/lit/__init__.py
@@ -4,7 +4,7 @@ from main import main
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@zuster.org'
-__versioninfo__ = (0, 2, 0)
+__versioninfo__ = (0, 3, 0)
 __version__ = '.'.join(map(str, __versioninfo__)) + 'dev'
 
 __all__ = []
diff --git a/utils/lit/lit/discovery.py b/utils/lit/lit/discovery.py
new file mode 100644
index 000000000000..c869a671ef72
--- /dev/null
+++ b/utils/lit/lit/discovery.py
@@ -0,0 +1,234 @@
+"""
+Test discovery functions.
+"""
+
+import os
+import sys
+
+from lit.TestingConfig import TestingConfig
+from lit import LitConfig, Test
+
+def dirContainsTestSuite(path, lit_config):
+    cfgpath = os.path.join(path, lit_config.site_config_name)
+    if os.path.exists(cfgpath):
+        return cfgpath
+    cfgpath = os.path.join(path, lit_config.config_name)
+    if os.path.exists(cfgpath):
+        return cfgpath
+
+def getTestSuite(item, litConfig, cache):
+    """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
+
+    Find the test suite containing @arg item.
+
+    @retval (None, ...) - Indicates no test suite contains @arg item.
+    @retval (suite, relative_path) - The suite that @arg item is in, and its
+    relative path inside that suite.
+    """
+    def search1(path):
+        # Check for a site config or a lit config.
+        cfgpath = dirContainsTestSuite(path, litConfig)
+
+        # If we didn't find a config file, keep looking.
+        if not cfgpath:
+            parent,base = os.path.split(path)
+            if parent == path:
+                return (None, ())
+
+            ts, relative = search(parent)
+            return (ts, relative + (base,))
+
+        # We found a config file, load it.
+        if litConfig.debug:
+            litConfig.note('loading suite config %r' % cfgpath)
+
+        cfg = TestingConfig.frompath(cfgpath, None, litConfig, mustExist = True)
+        source_root = os.path.realpath(cfg.test_source_root or path)
+        exec_root = os.path.realpath(cfg.test_exec_root or path)
+        return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()
+
+    def search(path):
+        # Check for an already instantiated test suite.
+        res = cache.get(path)
+        if res is None:
+            cache[path] = res = search1(path)
+        return res
+
+    # Canonicalize the path.
+    item = os.path.realpath(item)
+
+    # Skip files and virtual components.
+    components = []
+    while not os.path.isdir(item):
+        parent,base = os.path.split(item)
+        if parent == item:
+            return (None, ())
+        components.append(base)
+        item = parent
+    components.reverse()
+
+    ts, relative = search(item)
+    return ts, tuple(relative + tuple(components))
+
+def getLocalConfig(ts, path_in_suite, litConfig, cache):
+    def search1(path_in_suite):
+        # Get the parent config.
+        if not path_in_suite:
+            parent = ts.config
+        else:
+            parent = search(path_in_suite[:-1])
+
+        # Load the local configuration.
+        source_path = ts.getSourcePath(path_in_suite)
+        cfgpath = os.path.join(source_path, litConfig.local_config_name)
+        if litConfig.debug:
+            litConfig.note('loading local config %r' % cfgpath)
+        return TestingConfig.frompath(cfgpath, parent, litConfig,
+                                    mustExist = False,
+                                    config = parent.clone(cfgpath))
+
+    def search(path_in_suite):
+        key = (ts, path_in_suite)
+        res = cache.get(key)
+        if res is None:
+            cache[key] = res = search1(path_in_suite)
+        return res
+
+    return search(path_in_suite)
+
+def getTests(path, litConfig, testSuiteCache, localConfigCache):
+    # Find the test suite for this input and its relative path.
+    ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
+    if ts is None:
+        litConfig.warning('unable to find test suite for %r' % path)
+        return (),()
+
+    if litConfig.debug:
+        litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
+                                                        path_in_suite))
+
+    return ts, getTestsInSuite(ts, path_in_suite, litConfig,
+                               testSuiteCache, localConfigCache)
+
+def getTestsInSuite(ts, path_in_suite, litConfig,
+                    testSuiteCache, localConfigCache):
+    # Check that the source path exists (errors here are reported by the
+    # caller).
+    source_path = ts.getSourcePath(path_in_suite)
+    if not os.path.exists(source_path):
+        return
+
+    # Check if the user named a test directly.
+    if not os.path.isdir(source_path):
+        lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache)
+        yield Test.Test(ts, path_in_suite, lc)
+        return
+
+    # Otherwise we have a directory to search for tests, start by getting the
+    # local configuration.
+    lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
+
+    # Search for tests.
+    if lc.test_format is not None:
+        for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
+                                                      litConfig, lc):
+            yield res
+
+    # Search subdirectories.
+    for filename in os.listdir(source_path):
+        # FIXME: This doesn't belong here?
+        if filename in ('Output', '.svn') or filename in lc.excludes:
+            continue
+
+        # Ignore non-directories.
+        file_sourcepath = os.path.join(source_path, filename)
+        if not os.path.isdir(file_sourcepath):
+            continue
+
+        # Check for nested test suites, first in the execpath in case there is a
+        # site configuration and then in the source path.
+        file_execpath = ts.getExecPath(path_in_suite + (filename,))
+        if dirContainsTestSuite(file_execpath, litConfig):
+            sub_ts, subiter = getTests(file_execpath, litConfig,
+                                       testSuiteCache, localConfigCache)
+        elif dirContainsTestSuite(file_sourcepath, litConfig):
+            sub_ts, subiter = getTests(file_sourcepath, litConfig,
+                                       testSuiteCache, localConfigCache)
+        else:
+            # Otherwise, continue loading from inside this test suite.
+            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
+                                      litConfig, testSuiteCache,
+                                      localConfigCache)
+            sub_ts = None
+
+        N = 0
+        for res in subiter:
+            N += 1
+            yield res
+        if sub_ts and not N:
+            litConfig.warning('test suite %r contained no tests' % sub_ts.name)
+
+def find_tests_for_inputs(lit_config, inputs):
+    """
+    find_tests_for_inputs(lit_config, inputs) -> [Test]
+
+    Given a configuration object and a list of input specifiers, find all the
+    tests to execute.
+    """
+
+    # Expand '@...' form in inputs.
+    actual_inputs = []
+    for input in inputs:
+        if os.path.exists(input) or not input.startswith('@'):
+            actual_inputs.append(input)
+        else:
+            f = open(input[1:])
+            try:
+                for ln in f:
+                    ln = ln.strip()
+                    if ln:
+                        actual_inputs.append(ln)
+            finally:
+                f.close()
+                    
+    # Load the tests from the inputs.
+    tests = []
+    test_suite_cache = {}
+    local_config_cache = {}
+    for input in actual_inputs:
+        prev = len(tests)
+        tests.extend(getTests(input, lit_config,
+                              test_suite_cache, local_config_cache)[1])
+        if prev == len(tests):
+            lit_config.warning('input %r contained no tests' % input)
+
+    # If there were any errors during test discovery, exit now.
+    if lit_config.numErrors:
+        print >>sys.stderr, '%d errors, exiting.' % lit_config.numErrors
+        sys.exit(2)
+
+    return tests
+
+def load_test_suite(inputs):
+    import platform
+    import unittest
+    from lit.LitTestCase import LitTestCase
+
+    # Create the global config object.
+    litConfig = LitConfig.LitConfig(progname = 'lit',
+                                    path = [],
+                                    quiet = False,
+                                    useValgrind = False,
+                                    valgrindLeakCheck = False,
+                                    valgrindArgs = [],
+                                    noExecute = False,
+                                    ignoreStdErr = False,
+                                    debug = False,
+                                    isWindows = (platform.system()=='Windows'),
+                                    params = {})
+
+    tests = find_tests_for_inputs(litConfig, inputs)
+
+    # Return a unittest test suite which just runs the tests in order.
+    return unittest.TestSuite([LitTestCase(test, litConfig) for test in tests])
+
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 25bbcbd9f275..da961eeedc63 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -12,18 +12,10 @@ import ProgressBar
 import TestRunner
 import Util
 
-from TestingConfig import TestingConfig
 import LitConfig
 import Test
 
-# Configuration files to look for when discovering test suites. These can be
-# overridden with --config-prefix.
-#
-# FIXME: Rename to 'config.lit', 'site.lit', and 'local.lit' ?
-gConfigName = 'lit.cfg'
-gSiteConfigName = 'lit.site.cfg'
-
-kLocalConfigName = 'lit.local.cfg'
+import lit.discovery
 
 class TestingProgressDisplay:
     def __init__(self, opts, numTests, progressBar=None):
@@ -137,166 +129,6 @@ class Tester(threading.Thread):
         test.setResult(result, output, elapsed)
         self.display.update(test)
 
-def dirContainsTestSuite(path):
-    cfgpath = os.path.join(path, gSiteConfigName)
-    if os.path.exists(cfgpath):
-        return cfgpath
-    cfgpath = os.path.join(path, gConfigName)
-    if os.path.exists(cfgpath):
-        return cfgpath
-
-def getTestSuite(item, litConfig, cache):
-    """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
-
-    Find the test suite containing @arg item.
-
-    @retval (None, ...) - Indicates no test suite contains @arg item.
-    @retval (suite, relative_path) - The suite that @arg item is in, and its
-    relative path inside that suite.
-    """
-    def search1(path):
-        # Check for a site config or a lit config.
-        cfgpath = dirContainsTestSuite(path)
-
-        # If we didn't find a config file, keep looking.
-        if not cfgpath:
-            parent,base = os.path.split(path)
-            if parent == path:
-                return (None, ())
-
-            ts, relative = search(parent)
-            return (ts, relative + (base,))
-
-        # We found a config file, load it.
-        if litConfig.debug:
-            litConfig.note('loading suite config %r' % cfgpath)
-
-        cfg = TestingConfig.frompath(cfgpath, None, litConfig, mustExist = True)
-        source_root = os.path.realpath(cfg.test_source_root or path)
-        exec_root = os.path.realpath(cfg.test_exec_root or path)
-        return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()
-
-    def search(path):
-        # Check for an already instantiated test suite.
-        res = cache.get(path)
-        if res is None:
-            cache[path] = res = search1(path)
-        return res
-
-    # Canonicalize the path.
-    item = os.path.realpath(item)
-
-    # Skip files and virtual components.
-    components = []
-    while not os.path.isdir(item):
-        parent,base = os.path.split(item)
-        if parent == item:
-            return (None, ())
-        components.append(base)
-        item = parent
-    components.reverse()
-
-    ts, relative = search(item)
-    return ts, tuple(relative + tuple(components))
-
-def getLocalConfig(ts, path_in_suite, litConfig, cache):
-    def search1(path_in_suite):
-        # Get the parent config.
-        if not path_in_suite:
-            parent = ts.config
-        else:
-            parent = search(path_in_suite[:-1])
-
-        # Load the local configuration.
-        source_path = ts.getSourcePath(path_in_suite)
-        cfgpath = os.path.join(source_path, kLocalConfigName)
-        if litConfig.debug:
-            litConfig.note('loading local config %r' % cfgpath)
-        return TestingConfig.frompath(cfgpath, parent, litConfig,
-                                    mustExist = False,
-                                    config = parent.clone(cfgpath))
-
-    def search(path_in_suite):
-        key = (ts, path_in_suite)
-        res = cache.get(key)
-        if res is None:
-            cache[key] = res = search1(path_in_suite)
-        return res
-
-    return search(path_in_suite)
-
-def getTests(path, litConfig, testSuiteCache, localConfigCache):
-    # Find the test suite for this input and its relative path.
-    ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
-    if ts is None:
-        litConfig.warning('unable to find test suite for %r' % path)
-        return (),()
-
-    if litConfig.debug:
-        litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
-                                                        path_in_suite))
-
-    return ts, getTestsInSuite(ts, path_in_suite, litConfig,
-                               testSuiteCache, localConfigCache)
-
-def getTestsInSuite(ts, path_in_suite, litConfig,
-                    testSuiteCache, localConfigCache):
-    # Check that the source path exists (errors here are reported by the
-    # caller).
-    source_path = ts.getSourcePath(path_in_suite)
-    if not os.path.exists(source_path):
-        return
-
-    # Check if the user named a test directly.
-    if not os.path.isdir(source_path):
-        lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache)
-        yield Test.Test(ts, path_in_suite, lc)
-        return
-
-    # Otherwise we have a directory to search for tests, start by getting the
-    # local configuration.
-    lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
-
-    # Search for tests.
-    if lc.test_format is not None:
-        for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
-                                                      litConfig, lc):
-            yield res
-
-    # Search subdirectories.
-    for filename in os.listdir(source_path):
-        # FIXME: This doesn't belong here?
-        if filename in ('Output', '.svn') or filename in lc.excludes:
-            continue
-
-        # Ignore non-directories.
-        file_sourcepath = os.path.join(source_path, filename)
-        if not os.path.isdir(file_sourcepath):
-            continue
-
-        # Check for nested test suites, first in the execpath in case there is a
-        # site configuration and then in the source path.
-        file_execpath = ts.getExecPath(path_in_suite + (filename,))
-        if dirContainsTestSuite(file_execpath):
-            sub_ts, subiter = getTests(file_execpath, litConfig,
-                                       testSuiteCache, localConfigCache)
-        elif dirContainsTestSuite(file_sourcepath):
-            sub_ts, subiter = getTests(file_sourcepath, litConfig,
-                                       testSuiteCache, localConfigCache)
-        else:
-            # Otherwise, continue loading from inside this test suite.
-            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
-                                      litConfig, testSuiteCache,
-                                      localConfigCache)
-            sub_ts = None
-
-        N = 0
-        for res in subiter:
-            N += 1
-            yield res
-        if sub_ts and not N:
-            litConfig.warning('test suite %r contained no tests' % sub_ts.name)
-
 def runTests(numThreads, litConfig, provider, display):
     # If only using one testing thread, don't use threads at all; this lets us
     # profile, among other things.
@@ -316,50 +148,8 @@ def runTests(numThreads, litConfig, provider, display):
     except KeyboardInterrupt:
         sys.exit(2)
 
-def load_test_suite(inputs):
-    import unittest
-
-    # Create the global config object.
-    litConfig = LitConfig.LitConfig(progname = 'lit',
-                                    path = [],
-                                    quiet = False,
-                                    useValgrind = False,
-                                    valgrindLeakCheck = False,
-                                    valgrindArgs = [],
-                                    useTclAsSh = False,
-                                    noExecute = False,
-                                    ignoreStdErr = False,
-                                    debug = False,
-                                    isWindows = (platform.system()=='Windows'),
-                                    params = {})
-
-    # Load the tests from the inputs.
-    tests = []
-    testSuiteCache = {}
-    localConfigCache = {}
-    for input in inputs:
-        prev = len(tests)
-        tests.extend(getTests(input, litConfig,
-                              testSuiteCache, localConfigCache)[1])
-        if prev == len(tests):
-            litConfig.warning('input %r contained no tests' % input)
-
-    # If there were any errors during test discovery, exit now.
-    if litConfig.numErrors:
-        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
-        sys.exit(2)
-
-    # Return a unittest test suite which just runs the tests in order.
-    def get_test_fn(test):
-        return unittest.FunctionTestCase(
-            lambda: test.config.test_format.execute(
-                test, litConfig),
-            description = test.getFullName())
-
-    from LitTestCase import LitTestCase
-    return unittest.TestSuite([LitTestCase(test, litConfig) for test in tests])
-
-def main(builtinParameters = {}):    # Bump the GIL check interval, its more important to get any one thread to a
+def main(builtinParameters = {}):
+    # Bump the GIL check interval, its more important to get any one thread to a
     # blocking operation (hopefully exec) than to try and unblock other threads.
     #
     # FIXME: This is a hack.
@@ -442,9 +232,6 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
     group.add_option("", "--show-suites", dest="showSuites",
                       help="Show discovered test suites",
                       action="store_true", default=False)
-    group.add_option("", "--no-tcl-as-sh", dest="useTclAsSh",
-                      help="Don't run Tcl scripts using 'sh'",
-                      action="store_false", default=True)
     group.add_option("", "--repeat", dest="repeatTests", metavar="N",
                       help="Repeat tests N times (for timing)",
                       action="store", default=None, type=int)
@@ -455,12 +242,6 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
     if not args:
         parser.error('No inputs specified')
 
-    if opts.configPrefix is not None:
-        global gConfigName, gSiteConfigName, kLocalConfigName
-        gConfigName = '%s.cfg' % opts.configPrefix
-        gSiteConfigName = '%s.site.cfg' % opts.configPrefix
-        kLocalConfigName = '%s.local.cfg' % opts.configPrefix
-
     if opts.numThreads is None:
 # Python <2.5 has a race condition causing lit to always fail with numThreads>1
 # http://bugs.python.org/issue1731717
@@ -489,50 +270,20 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
                                     useValgrind = opts.useValgrind,
                                     valgrindLeakCheck = opts.valgrindLeakCheck,
                                     valgrindArgs = opts.valgrindArgs,
-                                    useTclAsSh = opts.useTclAsSh,
                                     noExecute = opts.noExecute,
                                     ignoreStdErr = False,
                                     debug = opts.debug,
                                     isWindows = (platform.system()=='Windows'),
-                                    params = userParams)
+                                    params = userParams,
+                                    config_prefix = opts.configPrefix)
 
-    # Expand '@...' form in inputs.
-    actual_inputs = []
-    for input in inputs:
-        if os.path.exists(input) or not input.startswith('@'):
-            actual_inputs.append(input)
-        else:
-            f = open(input[1:])
-            try:
-                for ln in f:
-                    ln = ln.strip()
-                    if ln:
-                        actual_inputs.append(ln)
-            finally:
-                f.close()
-                    
-            
-    # Load the tests from the inputs.
-    tests = []
-    testSuiteCache = {}
-    localConfigCache = {}
-    for input in actual_inputs:
-        prev = len(tests)
-        tests.extend(getTests(input, litConfig,
-                              testSuiteCache, localConfigCache)[1])
-        if prev == len(tests):
-            litConfig.warning('input %r contained no tests' % input)
-
-    # If there were any errors during test discovery, exit now.
-    if litConfig.numErrors:
-        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
-        sys.exit(2)
+    tests = lit.discovery.find_tests_for_inputs(litConfig, inputs)
 
     if opts.showSuites:
-        suitesAndTests = dict([(ts,[])
-                               for ts,_ in testSuiteCache.values()
-                               if ts])
+        suitesAndTests = {}
         for t in tests:
+            if t.suite not in suitesAndTests:
+                suitesAndTests[t.suite] = []
             suitesAndTests[t.suite].append(t)
 
         print '-- Test Suites --'
diff --git a/utils/lit/tests/.coveragerc b/utils/lit/tests/.coveragerc
new file mode 100644
index 000000000000..c886d0ac1a6e
--- /dev/null
+++ b/utils/lit/tests/.coveragerc
@@ -0,0 +1,11 @@
+# .coveragerc to control coverage.py
+[run]
+branch = False
+parallel = True
+source = lit
+
+[html]
+directory = coverage_html_report
+
+[report]
+omit = Inputs
diff --git a/utils/lit/tests/Inputs/discovery/lit.cfg b/utils/lit/tests/Inputs/discovery/lit.cfg
new file mode 100644
index 000000000000..3513bfffd173
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/lit.cfg
@@ -0,0 +1,5 @@
+config.name = 'top-level-suite'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/discovery/subdir/lit.local.cfg b/utils/lit/tests/Inputs/discovery/subdir/lit.local.cfg
new file mode 100644
index 000000000000..5ae6b3cd017d
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/subdir/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.py']
diff --git a/utils/lit/tests/Inputs/discovery/subdir/test-three.py b/utils/lit/tests/Inputs/discovery/subdir/test-three.py
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/subdir/test-three.py
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/discovery/subsuite/lit.cfg b/utils/lit/tests/Inputs/discovery/subsuite/lit.cfg
new file mode 100644
index 000000000000..0c2979d74adc
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/subsuite/lit.cfg
@@ -0,0 +1,5 @@
+config.name = 'sub-suite'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/discovery/subsuite/test-one.txt b/utils/lit/tests/Inputs/discovery/subsuite/test-one.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/subsuite/test-one.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/discovery/subsuite/test-two.txt b/utils/lit/tests/Inputs/discovery/subsuite/test-two.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/subsuite/test-two.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/discovery/test-one.txt b/utils/lit/tests/Inputs/discovery/test-one.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/test-one.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/discovery/test-two.txt b/utils/lit/tests/Inputs/discovery/test-two.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/discovery/test-two.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-format/external_shell/fail.txt b/utils/lit/tests/Inputs/shtest-format/external_shell/fail.txt
new file mode 100644
index 000000000000..1e74be5dbd4b
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/external_shell/fail.txt
@@ -0,0 +1,3 @@
+# Run a command that fails with error on stdout.
+#
+# RUN: cat "does-not-exist"
diff --git a/utils/lit/tests/Inputs/shtest-format/external_shell/lit.local.cfg b/utils/lit/tests/Inputs/shtest-format/external_shell/lit.local.cfg
new file mode 100644
index 000000000000..d14d1479772d
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/external_shell/lit.local.cfg
@@ -0,0 +1 @@
+config.test_format = lit.formats.ShTest(execute_external=True)
diff --git a/utils/lit/tests/Inputs/shtest-format/external_shell/pass.txt b/utils/lit/tests/Inputs/shtest-format/external_shell/pass.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/external_shell/pass.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-format/fail.txt b/utils/lit/tests/Inputs/shtest-format/fail.txt
new file mode 100644
index 000000000000..49932c3006e1
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/fail.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/shtest-format/lit.cfg b/utils/lit/tests/Inputs/shtest-format/lit.cfg
new file mode 100644
index 000000000000..78dd1bfb2e3a
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/lit.cfg
@@ -0,0 +1,7 @@
+config.name = 'shtest-format'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
+config.target_triple = 'x86_64-unknown-unknown'
+config.available_features.add('a-present-feature')
diff --git a/utils/lit/tests/Inputs/shtest-format/no-test-line.txt b/utils/lit/tests/Inputs/shtest-format/no-test-line.txt
new file mode 100644
index 000000000000..f2316bd73ada
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/no-test-line.txt
@@ -0,0 +1 @@
+# Empty!
diff --git a/utils/lit/tests/Inputs/shtest-format/pass.txt b/utils/lit/tests/Inputs/shtest-format/pass.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/pass.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-format/requires-missing.txt b/utils/lit/tests/Inputs/shtest-format/requires-missing.txt
new file mode 100644
index 000000000000..9e6648d8b8f0
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/requires-missing.txt
@@ -0,0 +1,2 @@
+RUN: true
+REQUIRES: a-missing-feature
diff --git a/utils/lit/tests/Inputs/shtest-format/requires-present.txt b/utils/lit/tests/Inputs/shtest-format/requires-present.txt
new file mode 100644
index 000000000000..064f7074a76e
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/requires-present.txt
@@ -0,0 +1,2 @@
+RUN: true
+REQUIRES: a-present-feature
diff --git a/utils/lit/tests/Inputs/shtest-format/unsupported_dir/lit.local.cfg b/utils/lit/tests/Inputs/shtest-format/unsupported_dir/lit.local.cfg
new file mode 100644
index 000000000000..462e3dc5d11d
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/unsupported_dir/lit.local.cfg
@@ -0,0 +1 @@
+config.unsupported = True
diff --git a/utils/lit/tests/Inputs/shtest-format/unsupported_dir/some-test.txt b/utils/lit/tests/Inputs/shtest-format/unsupported_dir/some-test.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/unsupported_dir/some-test.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/shtest-format/xfail-feature.txt b/utils/lit/tests/Inputs/shtest-format/xfail-feature.txt
new file mode 100644
index 000000000000..bd6241f8e441
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/xfail-feature.txt
@@ -0,0 +1,2 @@
+# RUN: false
+# XFAIL: a-present-feature
diff --git a/utils/lit/tests/Inputs/shtest-format/xfail-target.txt b/utils/lit/tests/Inputs/shtest-format/xfail-target.txt
new file mode 100644
index 000000000000..36760bee435d
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/xfail-target.txt
@@ -0,0 +1,2 @@
+RUN: false
+XFAIL: x86_64
diff --git a/utils/lit/tests/Inputs/shtest-format/xfail.txt b/utils/lit/tests/Inputs/shtest-format/xfail.txt
new file mode 100644
index 000000000000..6814cda40148
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/xfail.txt
@@ -0,0 +1,2 @@
+RUN: false
+XFAIL: *
diff --git a/utils/lit/tests/Inputs/shtest-format/xpass.txt b/utils/lit/tests/Inputs/shtest-format/xpass.txt
new file mode 100644
index 000000000000..764d21798b37
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-format/xpass.txt
@@ -0,0 +1,2 @@
+RUN: true
+XFAIL: x86_64
diff --git a/utils/lit/tests/Inputs/shtest-shell/error-0.txt b/utils/lit/tests/Inputs/shtest-shell/error-0.txt
new file mode 100644
index 000000000000..631c8df233d6
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/error-0.txt
@@ -0,0 +1,3 @@
+# Check error on an internal shell error (unable to find command).
+#
+# RUN: not-a-real-command
diff --git a/utils/lit/tests/Inputs/shtest-shell/error-1.txt b/utils/lit/tests/Inputs/shtest-shell/error-1.txt
new file mode 100644
index 000000000000..e5c8be6b6664
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/error-1.txt
@@ -0,0 +1,3 @@
+# Check error on a shell parsing failure.
+#
+# RUN: echo "missing quote
diff --git a/utils/lit/tests/Inputs/shtest-shell/error-2.txt b/utils/lit/tests/Inputs/shtest-shell/error-2.txt
new file mode 100644
index 000000000000..a976286bddeb
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/error-2.txt
@@ -0,0 +1,3 @@
+# Check error on a unsupported redirect.
+#
+# RUN: echo "hello" 3>&1
diff --git a/utils/lit/tests/Inputs/shtest-shell/lit.cfg b/utils/lit/tests/Inputs/shtest-shell/lit.cfg
new file mode 100644
index 000000000000..4878b6560968
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/lit.cfg
@@ -0,0 +1,5 @@
+config.name = 'shtest-shell'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/shtest-shell/redirects.txt b/utils/lit/tests/Inputs/shtest-shell/redirects.txt
new file mode 100644
index 000000000000..6be88b67ce1b
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/redirects.txt
@@ -0,0 +1,41 @@
+# Check stdout redirect (> and >>).
+#
+# RUN: echo "not-present" > %t.stdout-write
+# RUN: echo "is-present" > %t.stdout-write
+# RUN: FileCheck --check-prefix=STDOUT-WRITE < %t.stdout-write %s
+#
+# STDOUT-WRITE-NOT: not-present
+# STDOUT-WRITE: is-present
+#
+# RUN: echo "appended-line" >> %t.stdout-write
+# RUN: FileCheck --check-prefix=STDOUT-APPEND < %t.stdout-write %s
+#
+# STDOUT-APPEND: is-present
+# STDOUT-APPEND: appended-line
+
+
+# Check stderr redirect (2> and 2>>).
+#
+# RUN: echo "not-present" > %t.stderr-write
+# RUN: %S/write-to-stderr.sh 2> %t.stderr-write
+# RUN: FileCheck --check-prefix=STDERR-WRITE < %t.stderr-write %s
+#
+# STDERR-WRITE-NOT: not-present
+# STDERR-WRITE: a line on stderr
+#
+# RUN: %S/write-to-stderr.sh 2>> %t.stderr-write
+# RUN: FileCheck --check-prefix=STDERR-APPEND < %t.stderr-write %s
+#
+# STDERR-APPEND: a line on stderr
+# STDERR-APPEND: a line on stderr
+
+
+# Check combined redirect (&>).
+#
+# RUN: echo "not-present" > %t.combined
+# RUN: %S/write-to-stdout-and-stderr.sh &> %t.combined
+# RUN: FileCheck --check-prefix=COMBINED-WRITE < %t.combined %s
+#
+# COMBINED-WRITE-NOT: not-present
+# COMBINED-WRITE: a line on stdout
+# COMBINED-WRITE: a line on stderr
diff --git a/utils/lit/tests/Inputs/shtest-shell/sequencing-0.txt b/utils/lit/tests/Inputs/shtest-shell/sequencing-0.txt
new file mode 100644
index 000000000000..6578db25afe1
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/sequencing-0.txt
@@ -0,0 +1,28 @@
+# Check sequencing operations.
+#
+# RUN: echo "first-line" > %t.out && echo "second-line" >> %t.out
+# RUN: FileCheck --check-prefix CHECK-AND < %t.out %s
+#
+# CHECK-AND: first-line
+# CHECK-AND: second-line
+#
+# The false case of && is tested in sequencing-2.txt
+
+
+# RUN: echo "first-line" > %t.out || echo "second-line" >> %t.out
+# RUN: FileCheck --check-prefix CHECK-OR-1 < %t.out %s
+#
+# CHECK-OR-1: first-line
+# CHECK-OR-1-NOT: second-line
+
+# RUN: false || echo "second-line" > %t.out
+# RUN: FileCheck --check-prefix CHECK-OR-2 < %t.out %s
+#
+# CHECK-OR-2: second-line
+
+
+# RUN: echo "first-line" > %t.out; echo "second-line" >> %t.out
+# RUN: FileCheck --check-prefix CHECK-SEQ < %t.out %s
+#
+# CHECK-SEQ: first-line
+# CHECK-SEQ: second-line
diff --git a/utils/lit/tests/Inputs/shtest-shell/sequencing-1.txt b/utils/lit/tests/Inputs/shtest-shell/sequencing-1.txt
new file mode 100644
index 000000000000..5a1794c26c1c
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/sequencing-1.txt
@@ -0,0 +1,2 @@
+# RUN: false && true
+# XFAIL: *
diff --git a/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh b/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh
new file mode 100755
index 000000000000..ead3fd3ce377
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/write-to-stderr.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+echo "a line on stderr" 1>&2
diff --git a/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh b/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh
new file mode 100755
index 000000000000..f20de5d9042d
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-shell/write-to-stdout-and-stderr.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo "a line on stdout"
+echo "a line on stderr" 1>&2
diff --git a/utils/lit/tests/Inputs/unittest-adaptor/lit.cfg b/utils/lit/tests/Inputs/unittest-adaptor/lit.cfg
new file mode 100644
index 000000000000..52de70966242
--- /dev/null
+++ b/utils/lit/tests/Inputs/unittest-adaptor/lit.cfg
@@ -0,0 +1,5 @@
+config.name = 'unittest-adaptor'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/unittest-adaptor/test-one.txt b/utils/lit/tests/Inputs/unittest-adaptor/test-one.txt
new file mode 100644
index 000000000000..b80b60b7a279
--- /dev/null
+++ b/utils/lit/tests/Inputs/unittest-adaptor/test-one.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/unittest-adaptor/test-two.txt b/utils/lit/tests/Inputs/unittest-adaptor/test-two.txt
new file mode 100644
index 000000000000..49932c3006e1
--- /dev/null
+++ b/utils/lit/tests/Inputs/unittest-adaptor/test-two.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/discovery.py b/utils/lit/tests/discovery.py
new file mode 100644
index 000000000000..54b99d39458c
--- /dev/null
+++ b/utils/lit/tests/discovery.py
@@ -0,0 +1,25 @@
+# Check the basic discovery process, including a sub-suite.
+#
+# RUN: %{lit} %{inputs}/discovery \
+# RUN:   -j 1 --debug --no-execute --show-suites -v > %t.out 2> %t.err
+# RUN: FileCheck --check-prefix=CHECK-BASIC-OUT < %t.out %s
+# RUN: FileCheck --check-prefix=CHECK-BASIC-ERR < %t.err %s
+#
+# CHECK-BASIC-ERR: loading suite config '{{.*}}/tests/Inputs/discovery/lit.cfg'
+# CHECK-BASIC-ERR: loading local config '{{.*}}/tests/Inputs/discovery/subdir/lit.local.cfg'
+# CHECK-BASIC-ERR: loading suite config '{{.*}}/tests/Inputs/discovery/subsuite/lit.cfg'
+#
+# CHECK-BASIC-OUT: -- Test Suites --
+# CHECK-BASIC-OUT:   sub-suite - 2 tests
+# CHECK-BASIC-OUT:     Source Root:
+# CHECK-BASIC-OUT:     Exec Root  :
+# CHECK-BASIC-OUT:   top-level-suite - 3 tests
+# CHECK-BASIC-OUT:     Source Root:
+# CHECK-BASIC-OUT:     Exec Root  :
+#
+# CHECK-BASIC-OUT: -- Testing: 5 tests, 1 threads --
+# CHECK-BASIC-OUT: PASS: sub-suite :: test-one
+# CHECK-BASIC-OUT: PASS: sub-suite :: test-two
+# CHECK-BASIC-OUT: PASS: top-level-suite :: subdir/test-three
+# CHECK-BASIC-OUT: PASS: top-level-suite :: test-one
+# CHECK-BASIC-OUT: PASS: top-level-suite :: test-two
diff --git a/utils/lit/tests/lit.cfg b/utils/lit/tests/lit.cfg
new file mode 100644
index 000000000000..32760ceb2735
--- /dev/null
+++ b/utils/lit/tests/lit.cfg
@@ -0,0 +1,36 @@
+# -*- Python -*-
+
+import os
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = 'lit'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.ShTest(execute_external=False)
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.py']
+
+# excludes: A list of individual files to exclude.
+config.excludes = ['Inputs']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+config.test_exec_root = config.test_source_root
+
+config.target_triple = None
+
+src_root = os.path.join(config.test_source_root, '..')
+config.environment['PYTHONPATH'] = src_root
+config.substitutions.append(('%{src_root}', src_root))
+config.substitutions.append(('%{inputs}', os.path.join(
+            src_root, 'tests', 'Inputs')))
+config.substitutions.append(('%{lit}', os.path.join(src_root, 'lit.py')))
+
+# Enable coverage.py reporting, assuming the coverage module has been installed
+# and sitecustomize.py in the virtualenv has been modified appropriately.
+if lit.params.get('check-coverage', None):
+    config.environment['COVERAGE_PROCESS_START'] = os.path.join(
+        os.path.dirname(__file__), ".coveragerc")
diff --git a/utils/lit/tests/shell-parsing.py b/utils/lit/tests/shell-parsing.py
new file mode 100644
index 000000000000..f644132f29d3
--- /dev/null
+++ b/utils/lit/tests/shell-parsing.py
@@ -0,0 +1,3 @@
+# Just run the ShUtil unit tests.
+#
+# RUN: python -m lit.ShUtil
diff --git a/utils/lit/tests/shtest-format.py b/utils/lit/tests/shtest-format.py
new file mode 100644
index 000000000000..4b36873a3d7f
--- /dev/null
+++ b/utils/lit/tests/shtest-format.py
@@ -0,0 +1,43 @@
+# Check the various features of the ShTest format.
+#
+# RUN: not %{lit} -j 1 -v %{inputs}/shtest-format > %t.out
+# RUN: FileCheck < %t.out %s
+#
+# END.
+
+# CHECK: -- Testing:
+
+# CHECK: FAIL: shtest-format :: external_shell/fail.txt
+# CHECK: *** TEST 'shtest-format :: external_shell/fail.txt' FAILED ***
+# CHECK: Command Output (stderr):
+# CHECK: cat: does-not-exist: No such file or directory
+# CHECK: --
+
+# CHECK: PASS: shtest-format :: external_shell/pass.txt
+
+# CHECK: FAIL: shtest-format :: fail.txt
+
+# CHECK: UNRESOLVED: shtest-format :: no-test-line.txt
+# CHECK: PASS: shtest-format :: pass.txt
+# CHECK: UNSUPPORTED: shtest-format :: requires-missing.txt
+# CHECK: PASS: shtest-format :: requires-present.txt
+# CHECK: UNSUPPORTED: shtest-format :: unsupported_dir/some-test.txt
+# CHECK: XFAIL: shtest-format :: xfail-feature.txt
+# CHECK: XFAIL: shtest-format :: xfail-target.txt
+# CHECK: XFAIL: shtest-format :: xfail.txt
+# CHECK: XPASS: shtest-format :: xpass.txt
+# CHECK: Testing Time
+
+# CHECK: Unexpected Passing Tests (1)
+# CHECK: shtest-format :: xpass.txt
+
+# CHECK: Failing Tests (2)
+# CHECK: shtest-format :: external_shell/fail.txt
+# CHECK: shtest-format :: fail.txt
+
+# CHECK: Expected Passes    : 3
+# CHECK: Expected Failures  : 3
+# CHECK: Unsupported Tests  : 2
+# CHECK: Unresolved Tests   : 1
+# CHECK: Unexpected Passes  : 1
+# CHECK: Unexpected Failures: 2
diff --git a/utils/lit/tests/shtest-shell.py b/utils/lit/tests/shtest-shell.py
new file mode 100644
index 000000000000..32479e19a102
--- /dev/null
+++ b/utils/lit/tests/shtest-shell.py
@@ -0,0 +1,33 @@
+# Check the internal shell handling component of the ShTest format.
+#
+# RUN: not %{lit} -j 1 -v %{inputs}/shtest-shell > %t.out
+# RUN: FileCheck < %t.out %s
+#
+# END.
+
+# CHECK: -- Testing:
+
+# CHECK: FAIL: shtest-shell :: error-0.txt
+# CHECK: *** TEST 'shtest-shell :: error-0.txt' FAILED ***
+# CHECK: Command 0: "not-a-real-command"
+# CHECK: Command 0 Result: 127
+# CHECK: Command 0 Stderr:
+# CHECK: 'not-a-real-command': command not found
+# CHECK: ***
+
+# FIXME: The output here sucks.
+#
+# CHECK: FAIL: shtest-shell :: error-1.txt
+# CHECK: *** TEST 'shtest-shell :: error-1.txt' FAILED ***
+# CHECK: shell parser error on: 'echo "missing quote'
+# CHECK: ***
+
+# CHECK: FAIL: shtest-shell :: error-2.txt
+# CHECK: *** TEST 'shtest-shell :: error-2.txt' FAILED ***
+# CHECK: Unsupported redirect:
+# CHECK: ***
+
+# CHECK: PASS: shtest-shell :: redirects.txt
+# CHECK: PASS: shtest-shell :: sequencing-0.txt
+# CHECK: XFAIL: shtest-shell :: sequencing-1.txt
+# CHECK: Failing Tests (3)
diff --git a/utils/lit/tests/unittest-adaptor.py b/utils/lit/tests/unittest-adaptor.py
new file mode 100644
index 000000000000..243dd4191d0d
--- /dev/null
+++ b/utils/lit/tests/unittest-adaptor.py
@@ -0,0 +1,18 @@
+# Check the lit adaption to run under unittest.
+#
+# RUN: python %s %{inputs}/unittest-adaptor 2> %t.err
+# RUN: FileCheck < %t.err %s
+#
+# CHECK: unittest-adaptor :: test-one.txt ... ok
+# CHECK: unittest-adaptor :: test-two.txt ... FAIL
+
+import unittest
+import sys
+
+import lit
+import lit.discovery
+
+input_path = sys.argv[1]
+unittest_suite = lit.discovery.load_test_suite([input_path])
+runner = unittest.TextTestRunner(verbosity=2)
+runner.run(unittest_suite)
diff --git a/utils/lit/tests/usage.py b/utils/lit/tests/usage.py
new file mode 100644
index 000000000000..e10d6134a03c
--- /dev/null
+++ b/utils/lit/tests/usage.py
@@ -0,0 +1,6 @@
+# Basic sanity check that usage works.
+#
+# RUN: %{lit} --help > %t.out
+# RUN: FileCheck < %t.out %s
+#
+# CHECK: Usage: lit.py [options] {file-or-path}
diff --git a/utils/lit/utils/README.txt b/utils/lit/utils/README.txt
new file mode 100644
index 000000000000..81862ba09912
--- /dev/null
+++ b/utils/lit/utils/README.txt
@@ -0,0 +1,2 @@
+Utilities for the project that aren't intended to be part of a source
+distribution.
diff --git a/utils/lit/utils/check-coverage b/utils/lit/utils/check-coverage
new file mode 100755
index 000000000000..bb3d17e75794
--- /dev/null
+++ b/utils/lit/utils/check-coverage
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+prog=$(basename $0)
+
+# Expect to be run from the parent lit directory.
+if [ ! -f setup.py ] || [ ! -d lit ]; then
+    printf 1>&2 "%s: expected to be run from base lit directory\n" "$prog"
+    exit 1
+fi
+
+# Parse command line arguments.
+if [ "$1" == "--generate-html" ]; then
+    GENERATE_HTML=1
+    shift
+fi
+
+# If invoked with no arguments, run all the tests.
+if [ $# == "0" ]; then
+    set -- "tests"
+fi
+
+# Check that the active python has been modified to enable coverage in its
+# sitecustomize.
+if ! python -c \
+      'import sitecustomize, sys; sys.exit("coverage" not in dir(sitecustomize))' \
+      &> /dev/null; then
+    printf 1>&2 "error: active python does not appear to enable coverage in its 'sitecustomize.py'\n"
+    exit 1
+fi
+
+# First, remove any existing coverage data files.
+rm -f tests/.coverage
+find tests -name .coverage.\* -exec rm {} \;
+
+# Next, run the tests.
+lit -sv --param check-coverage=1 "$@"
+
+# Next, move all the data files from subdirectories up.
+find tests/* -name .coverage.\* -exec mv {} tests \;
+
+# Combine all the data files.
+(cd tests && python -m coverage combine)
+
+# Finally, generate the report.
+(cd tests && python -m coverage report)
+
+# Generate the HTML report, if requested.
+if [ ! -z "$GENERATE_HTML" ]; then
+    (cd tests && python -m coverage html)
+fi
diff --git a/utils/lit/utils/check-sdist b/utils/lit/utils/check-sdist
new file mode 100755
index 000000000000..6186446aba1f
--- /dev/null
+++ b/utils/lit/utils/check-sdist
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+if [ $# == 1 ]; then
+    cd $1
+fi
+
+# Create a list of all the files in the source tree, excluding various things we
+# know don't belong.
+echo "Creating current directory contents list."
+find . | \
+    grep -v '^\./.gitignore' | \
+    grep -v '^\./dist' | \
+    grep -v '^\./utils' | \
+    grep -v '^\./venv' | \
+    grep -v '^\./lit.egg-info' | \
+    grep -v '^\./lit/ExampleTests' | \
+    grep -v '/Output' | \
+    grep -v '__pycache__' | \
+    grep -v '.pyc$' | grep -v '~$' | \
+    sort > /tmp/lit_source_files.txt
+
+# Create the source distribution.
+echo "Creating source distribution."
+rm -rf lit.egg-info dist
+python setup.py sdist > /tmp/lit_sdist_log.txt
+
+# Creating list of files in source distribution.
+echo "Creating source distribution file list."
+tar zft dist/lit*.tar.gz | \
+    sed -e 's#lit-[0-9.dev]*/#./#' | \
+    sed -e 's#/$##' | \
+    grep -v '^\./PKG-INFO' | \
+    grep -v '^\./setup.cfg' | \
+    grep -v '^\./lit.egg-info' | \
+    sort > /tmp/lit_sdist_files.txt
+
+# Diff the files.
+echo "Running diff..."
+if (diff /tmp/lit_source_files.txt /tmp/lit_sdist_files.txt); then
+    echo "Diff is clean!"
+else
+    echo "error: there were differences in the source lists!"
+    exit 1
+fi
diff --git a/utils/llvm-build/llvmbuild/main.py b/utils/llvm-build/llvmbuild/main.py
index 27d23d0855d1..87e8819bdec2 100644
--- a/utils/llvm-build/llvmbuild/main.py
+++ b/utils/llvm-build/llvmbuild/main.py
@@ -182,7 +182,9 @@ class LLVMProjectInfo(object):
         # out easily. If we don't, we should special case the check.
 
         self.ordered_component_infos = []
-        components_to_visit = set(self.component_infos)
+        components_to_visit = sorted(
+            set(self.component_infos),
+            key = lambda c: c.name)
         while components_to_visit:
             visit_component_info(iter(components_to_visit).next(), [], set())
 
@@ -807,7 +809,7 @@ given by --build-root) at the same SUBPATH""",
     # Determine the LLVM source path, if not given.
     source_root = opts.source_root
     if source_root:
-        if not os.path.exists(os.path.join(source_root, 'lib', 'VMCore',
+        if not os.path.exists(os.path.join(source_root, 'lib', 'IR',
                                            'Function.cpp')):
             parser.error('invalid LLVM source root: %r' % source_root)
     else:
@@ -815,7 +817,7 @@ given by --build-root) at the same SUBPATH""",
         llvm_build_path = os.path.dirname(llvmbuild_path)
         utils_path = os.path.dirname(llvm_build_path)
         source_root = os.path.dirname(utils_path)
-        if not os.path.exists(os.path.join(source_root, 'lib', 'VMCore',
+        if not os.path.exists(os.path.join(source_root, 'lib', 'IR',
                                            'Function.cpp')):
             parser.error('unable to infer LLVM source root, please specify')
 
diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check
index 623ebc6a32cc..3173027759b7 100755
--- a/utils/llvm-compilers-check
+++ b/utils/llvm-compilers-check
@@ -1,11 +1,11 @@
 #!/usr/bin/python3
 ##===- utils/llvmbuild - Build the LLVM project ----------------*-python-*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 #
 # This script builds many different flavors of the LLVM ecosystem.  It
@@ -147,6 +147,8 @@ def add_options(parser):
                       help=("Do not build dragonegg"))
     parser.add_option("--no-install", default=False, action="store_true",
                       help=("Do not do installs"))
+    parser.add_option("--keep-going", default=False, action="store_true",
+                      help=("Keep going after failures"))
     return
 
 def check_options(parser, options, valid_builds):
@@ -282,7 +284,7 @@ class Builder(threading.Thread):
 
         for key, value in env.items():
             execenv[key] = value
- 
+
         self.logger.debug("[" + prefix + "] " + "env " + str(env) + " "
                           + " ".join(command));
 
@@ -299,6 +301,11 @@ class Builder(threading.Thread):
                                  + str(line, "utf-8").rstrip())
                 line = proc.stdout.readline()
 
+            (stdoutdata, stderrdata) = proc.communicate()
+            retcode = proc.wait()
+
+            return retcode
+
         except:
             traceback.print_exc()
 
@@ -327,6 +334,7 @@ class Builder(threading.Thread):
                     self.logger.debug("Start Gather")
                     gather = True
                 line = proc.stdout.readline()
+
         except:
             traceback.print_exc()
         self.logger.debug(includes)
@@ -353,16 +361,16 @@ class Builder(threading.Thread):
 
         configure_flags = dict(
             llvm=dict(debug=["--prefix=" + self.install_prefix,
-                             "--with-extra-options=-Werror",
+                             "--enable-werror",
                              "--enable-assertions",
                              "--disable-optimized",
                              "--with-gcc-toolchain=" + cxxroot],
                       release=["--prefix=" + self.install_prefix,
-                               "--with-extra-options=-Werror",
+                               "--enable-werror",
                                "--enable-optimized",
                                "--with-gcc-toolchain=" + cxxroot],
                       paranoid=["--prefix=" + self.install_prefix,
-                                "--with-extra-options=-Werror",
+                                "--enable-werror",
                                 "--enable-assertions",
                                 "--enable-expensive-checks",
                                 "--disable-optimized",
@@ -438,7 +446,7 @@ class Builder(threading.Thread):
 
         for component in components:
             comp = component[:]
-            
+
             if (self.options.no_dragonegg):
                 if (comp == 'dragonegg'):
                     self.logger.info("Skipping " + component + " in "
@@ -458,43 +466,74 @@ class Builder(threading.Thread):
                                        "").split())
 
             self.logger.info("Configuring " + component + " in " + builddir)
-            self.configure(component, srcdir, builddir,
-                           config_args,
-                           configure_env[comp_key][build])
-
-            self.logger.info("Building " + component + " in " + builddir)
-            self.logger.info("Build: make " + str(make_flags[comp_key][build]))
-            self.make(component, srcdir, builddir,
-                      make_flags[comp_key][build],
-                      make_env[comp_key][build])
-
-            if (not self.options.no_install):
-                self.logger.info("Installing " + component + " in " + installdir)
-                self.make(component, srcdir, builddir,
-                          make_install_flags[comp_key][build],
-                          make_install_env[comp_key][build])
-
-            self.logger.info("Testing " + component + " in " + builddir)
-            self.logger.info("Test: make "
-                             + str(make_check_flags[comp_key][build]))
-            self.make(component, srcdir, builddir,
-                      make_check_flags[comp_key][build],
-                      make_check_env[comp_key][build])
-
+            configrc = self.configure(component, srcdir, builddir,
+                                      config_args,
+                                      configure_env[comp_key][build])
+
+            if (configrc == None) :
+                self.logger.info("[None] Failed to configure " + component + " in " + installdir)
+
+            if (configrc == 0 or self.options.keep_going) :
+                self.logger.info("Building " + component + " in " + builddir)
+                self.logger.info("Build: make " + str(make_flags[comp_key][build]))
+                buildrc = self.make(component, srcdir, builddir,
+                                    make_flags[comp_key][build],
+                                    make_env[comp_key][build])
+
+                if (buildrc == None) :
+                    self.logger.info("[None] Failed to build " + component + " in " + installdir)
+
+                if (buildrc == 0 or self.options.keep_going) :
+                    self.logger.info("Testing " + component + " in " + builddir)
+                    self.logger.info("Test: make "
+                                     + str(make_check_flags[comp_key][build]))
+                    testrc = self.make(component, srcdir, builddir,
+                                       make_check_flags[comp_key][build],
+                                       make_check_env[comp_key][build])
+
+                    if (testrc == None) :
+                        self.logger.info("[None] Failed to test " + component + " in " + installdir)
+
+                    if ((testrc == 0  or self.options.keep_going)
+                        and not self.options.no_install):
+                        self.logger.info("Installing " + component + " in " + installdir)
+                        self.make(component, srcdir, builddir,
+                                  make_install_flags[comp_key][build],
+                                  make_install_env[comp_key][build])
+                    else :
+                        self.logger.info("Failed testing " + component + " in " + installdir)
+
+                else :
+                    self.logger.info("Failed to build " + component + " in " + installdir)
+
+            else :
+                self.logger.info("Failed to configure " + component + " in " + installdir)
 
     def configure(self, component, srcdir, builddir, flags, env):
+        prefix = self.component_abbrev[component.replace("-", "_")]
+
         self.logger.debug("Configure " + str(flags) + " " + str(srcdir) + " -> "
                           + str(builddir))
 
         configure_files = dict(
             llvm=[(srcdir + "/configure", builddir + "/Makefile")],
-            dragonegg=[("","")])
+            dragonegg=[(None,None)])
 
 
         doconfig = False
         for conf, mf in configure_files[component.replace("-", "_")]:
+            if conf is None:
+                # No configure necessary
+                return 0
+
             if not os.path.exists(conf):
-                return
+                self.logger.info("[" + prefix + "] Configure failed, no configure script " + conf)
+                return -1
+
+            if not os.path.exists(mf):
+                self.logger.info("[" + prefix + "] Configure failed, no makefile " + mf)
+                return -1
+
             if os.path.exists(conf) and os.path.exists(mf):
                 confstat = os.stat(conf)
                 makestat = os.stat(mf)
@@ -506,16 +545,17 @@ class Builder(threading.Thread):
                 break
 
         if not doconfig and not self.options.force_configure:
-            return
+            return 0
 
         program = srcdir + "/configure"
         if not is_executable(program):
-            return
+            self.logger.info("[" + prefix + "] Configure failed, cannot execute " + program)
+            return -1
 
         args = [program]
         args += ["--verbose"]
         args += flags
-        self.execute(args, builddir, env, component)
+        return self.execute(args, builddir, env, component)
 
     def make(self, component, srcdir, builddir, flags, env):
         program = find_executable("make")
@@ -527,7 +567,7 @@ class Builder(threading.Thread):
 
         args = [program]
         args += flags
-        self.execute(args, builddir, env, component)
+        return self.execute(args, builddir, env, component)
 
 # Global constants
 build_abbrev = dict(debug="dbg", release="opt", paranoid="par")
diff --git a/utils/llvm-lit/llvm-lit.in b/utils/llvm-lit/llvm-lit.in
index 768dc5103c8b..87878d5638c6 100644
--- a/utils/llvm-lit/llvm-lit.in
+++ b/utils/llvm-lit/llvm-lit.in
@@ -13,8 +13,7 @@ sys.path.insert(0, os.path.join(llvm_source_root, 'utils', 'lit'))
 # Set up some builtin parameters, so that by default the LLVM test suite
 # configuration file knows how to find the object tree.
 builtin_parameters = {
-    'build_config' : "@CMAKE_CFG_INTDIR@",
-    'build_mode' : "@RUNTIME_BUILD_MODE@",
+    'build_mode' : "@CMAKE_CFG_INTDIR@",
     'llvm_site_config' : os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
     }
 
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 322036b2c209..d65f075076cf 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -174,7 +174,9 @@ FuncAttr      ::= noreturn
  | sspreq
  | returns_twice
  | nonlazybind
- | address_safety
+ | sanitize_address
+ | sanitize_thread
+ | sanitize_memory
  ;
 
 OptFuncAttrs  ::= + _ | OptFuncAttrs FuncAttr ;
diff --git a/utils/llvm.natvis b/utils/llvm.natvis
new file mode 100644
index 000000000000..6b4ef83c67e2
--- /dev/null
+++ b/utils/llvm.natvis
@@ -0,0 +1,181 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+Visual Studio 2012 Native Debugging Visualizers for LLVM
+
+Put this file into "%USERPROFILE%\Documents\Visual Studio 2012\Visualizers"
+or create a symbolic link so it updates automatically.
+-->
+<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+  <Type Name="llvm::SmallVector&lt;*,*&gt;">
+    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) == 0">empty</DisplayString>
+    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) != 0">{{ size={($T1*)EndX - ($T1*)BeginX} }}</DisplayString>
+    <Expand>
+      <Item Name="[size]">($T1*)EndX - ($T1*)BeginX</Item>
+      <Item Name="[capacity]">($T1*)CapacityX - ($T1*)BeginX</Item>
+      <ArrayItems>
+        <Size>($T1*)EndX - ($T1*)BeginX</Size>
+        <ValuePointer>($T1*)BeginX</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::SmallVectorImpl&lt;*&gt;">
+    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) == 0">empty</DisplayString>
+    <DisplayString Condition="(($T1*)EndX - ($T1*)BeginX) != 0">[{($T1*)EndX - ($T1*)BeginX}]</DisplayString>
+    <Expand>
+      <Item Name="[size]">($T1*)EndX - ($T1*)BeginX</Item>
+      <Item Name="[capacity]">($T1*)CapacityX - ($T1*)BeginX</Item>
+      <ArrayItems>
+        <Size>($T1*)EndX - ($T1*)BeginX</Size>
+        <ValuePointer>($T1*)BeginX</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::SmallString&lt;*&gt;">
+    <DisplayString>{BeginX,s}</DisplayString>
+    <StringView>BeginX,s</StringView>
+    <Expand>
+      <Item Name="[size]">(char*)EndX - (char*)BeginX</Item>
+      <Item Name="[capacity]">(char*)CapacityX - (char*)BeginX</Item>
+      <ArrayItems>
+        <Size>(char*)EndX - (char*)BeginX</Size>
+        <ValuePointer>(char*)BeginX</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::StringRef">
+    <DisplayString>[{Length}] {Data,s}</DisplayString>
+    <StringView>Data,s</StringView>
+    <Expand>
+      <Item Name="[length]">Length</Item>
+      <ArrayItems>
+        <Size>Length</Size>
+        <ValuePointer>Data</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::PointerIntPair&lt;*,*,*,*&gt;">
+    <DisplayString>{Value &amp; PointerBitMask} [{(Value &gt;&gt; IntShift) &amp; IntMask}]</DisplayString>
+    <Expand>
+      <Item Name="[ptr]">Value &amp; PointerBitMask</Item>
+      <Item Name="[int]">(Value &gt;&gt; IntShift) &amp; IntMask</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::PointerUnion&lt;*,*&gt;">
+    <DisplayString Condition="((Val.Value &gt;&gt; Val.IntShift) &amp; Val.IntMask) == 0">[P1] {($T1)(Val.Value &amp; Val.PointerBitMask)}</DisplayString>
+    <DisplayString Condition="((Val.Value &gt;&gt; Val.IntShift) &amp; Val.IntMask) != 0">[P2] {($T2)(Val.Value &amp; Val.PointerBitMask)}</DisplayString>
+    <Expand>
+      <Item Name="[ptr]" Condition="((Val.Value &gt;&gt; Val.IntShift) &amp; Val.IntMask) == 0">($T1)(Val.Value &amp; Val.PointerBitMask)</Item>
+      <Item Name="[ptr]" Condition="((Val.Value &gt;&gt; Val.IntShift) &amp; Val.IntMask) != 0">($T2)(Val.Value &amp; Val.PointerBitMask)</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::PointerUnion3&lt;*,*,*&gt;">
+    <DisplayString Condition="(Val.Val.Value &amp; 2) != 2 &amp;&amp; (Val.Val.Value &amp; 1) != 1">[P1] {($T1)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <DisplayString Condition="(Val.Val.Value &amp; 2) == 2">[P2] {($T2)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <DisplayString Condition="(Val.Val.Value &amp; 1) == 1">[P3] {($T3)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <Expand>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 2) != 2 &amp;&amp; (Val.Val.Value &amp; 1) != 1">($T1)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 2) == 2">($T2)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 1) == 1">($T3)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::PointerUnion4&lt;*,*,*,*&gt;">
+    <DisplayString Condition="(Val.Val.Value &amp; 3) != 3 &amp;&amp; (Val.Val.Value &amp; 2) != 2 &amp;&amp; (Val.Val.Value &amp; 1) != 1">[P1] {($T1)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <DisplayString Condition="(Val.Val.Value &amp; 3) != 3 &amp;&amp; (Val.Val.Value &amp; 2) == 2">[P2] {($T2)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <DisplayString Condition="(Val.Val.Value &amp; 3) != 3 &amp;&amp; (Val.Val.Value &amp; 1) == 1">[P3] {($T3)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <DisplayString Condition="(Val.Val.Value &amp; 3) == 3">[P4] {($T4)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)}</DisplayString>
+    <Expand>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 3) != 3 &amp;&amp; (Val.Val.Value &amp; 2) != 2 &amp;&amp; (Val.Val.Value &amp; 1) != 1">($T1)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 3) != 3 &amp;&amp; (Val.Val.Value &amp; 2) == 2">($T2)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 3) != 3 &amp;&amp; (Val.Val.Value &amp; 1) == 1">($T3)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+      <Item Name="[ptr]" Condition="(Val.Val.Value &amp; 3) == 3">($T4)((Val.Val.Value &gt;&gt; 2) &lt;&lt; 2)</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::iplist&lt;*,*&gt;">
+    <DisplayString Condition="Head == 0">{{ empty }}</DisplayString>
+    <DisplayString Condition="Head != 0">{{ head={Head} }}</DisplayString>
+    <Expand>
+      <LinkedListItems>
+        <HeadPointer>Head</HeadPointer>
+        <NextPointer>Next</NextPointer>
+        <ValueNode>this</ValueNode>
+      </LinkedListItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::IntrusiveRefCntPtr&lt;*&gt;">
+    <DisplayString Condition="Obj == 0">empty</DisplayString>
+    <DisplayString Condition="(Obj != 0) &amp;&amp; (Obj-&gt;ref_cnt == 1)">RefPtr [1 ref] {*Obj}</DisplayString>
+    <DisplayString Condition="(Obj != 0) &amp;&amp; (Obj-&gt;ref_cnt != 1)">RefPtr [{Obj-&gt;ref_cnt} refs] {*Obj}</DisplayString>
+    <Expand>
+      <Item Condition="Obj != 0" Name="[refs]">Obj-&gt;ref_cnt</Item>
+      <Item Condition="Obj != 0" Name="[ptr]">Obj</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::OwningPtr&lt;*&gt;">
+    <DisplayString Condition="Ptr == 0">empty</DisplayString>
+    <DisplayString Condition="Ptr != 0">OwningPtr {*Ptr}</DisplayString>
+    <Expand>
+      <Item Condition="Ptr != 0" Name="[ptr]">Ptr</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::SmallPtrSet&lt;*,*&gt;">
+    <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] elements={NumElements}, arraySize={CurArraySize} }}</DisplayString>
+    <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] elements={NumElements}, arraySize={CurArraySize} }}</DisplayString>
+    <Expand>
+      <Item Name="[NumElements]">NumElements</Item>
+      <Item Name="[CurArraySize]">CurArraySize</Item>
+      <IndexListItems>
+        <Size>CurArraySize + 1</Size>
+        <ValueNode>($T1*)&amp;CurArray[$i]</ValueNode>
+      </IndexListItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::DenseMap&lt;*,*,*&gt;">
+    <DisplayString Condition="NumEntries == 0">empty</DisplayString>
+    <DisplayString Condition="NumEntries != 0">{{ entries={NumEntries}, buckets={NumBuckets} }}</DisplayString>
+    <Expand>
+      <Item Name="[NumEntries]">NumEntries</Item>
+      <Item Name="[NumBuckets]">NumBuckets</Item>
+      <ArrayItems>
+        <Size>NumBuckets</Size>
+        <ValuePointer>Buckets</ValuePointer>
+      </ArrayItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::StringMap&lt;*,*&gt;">
+    <DisplayString>{{ NumBuckets={NumBuckets}, ItemSize={ItemSize} }}</DisplayString>
+    <Expand>
+      <Item Name="[NumBuckets]">NumBuckets</Item>
+      <Item Name="[ItemSize]">ItemSize</Item>
+      <IndexListItems>
+        <Size>NumBuckets</Size>
+        <ValueNode>(llvm::StringMapEntry&lt;$T1&gt;*)TheTable[$i]</ValueNode>
+      </IndexListItems>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::StringMapEntry&lt;*&gt;">
+    <DisplayString Condition="StrLen == 0">empty</DisplayString>
+    <DisplayString Condition="StrLen != 0">({((llvm::StringMapEntry&lt;$T1&gt;*)this)+1,s}, {second})</DisplayString>
+    <Expand>
+      <Item Name="[key]">((llvm::StringMapEntry&lt;$T1&gt;*)this)+1,s</Item>
+      <Item Name="[value]" Condition="StrLen != 0">second</Item>
+    </Expand>
+  </Type>
+
+  <Type Name="llvm::Triple">
+    <DisplayString>{Data}</DisplayString>
+  </Type>
+</AutoVisualizer>
diff --git a/utils/obj2yaml/Makefile b/utils/obj2yaml/Makefile
deleted file mode 100644
index 5b96bdd5b9d8..000000000000
--- a/utils/obj2yaml/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-##===- utils/obj2yaml/Makefile ----------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../..
-TOOLNAME = obj2yaml
-USEDLIBS = LLVMObject.a LLVMSupport.a
-
-# This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
-
-# Don't install this utility
-NO_INSTALL = 1
-
-include $(LEVEL)/Makefile.common
diff --git a/utils/obj2yaml/coff2yaml.cpp b/utils/obj2yaml/coff2yaml.cpp
deleted file mode 100644
index c9a71591ef70..000000000000
--- a/utils/obj2yaml/coff2yaml.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "obj2yaml.h"
-
-#include "llvm/Object/COFF.h"
-
-
-template <typename One, typename Two>
-struct pod_pair { // I'd much rather use std::pair, but it's not a POD
-  One first;
-  Two second;
-};
-
-#define STRING_PAIR(x)  {llvm::COFF::x, #x}
-static const pod_pair<llvm::COFF::MachineTypes, const char *> 
-MachineTypePairs [] = {
-  STRING_PAIR(IMAGE_FILE_MACHINE_UNKNOWN),
-  STRING_PAIR(IMAGE_FILE_MACHINE_AM33),
-  STRING_PAIR(IMAGE_FILE_MACHINE_AMD64),
-  STRING_PAIR(IMAGE_FILE_MACHINE_ARM),
-  STRING_PAIR(IMAGE_FILE_MACHINE_ARMV7),
-  STRING_PAIR(IMAGE_FILE_MACHINE_EBC),
-  STRING_PAIR(IMAGE_FILE_MACHINE_I386),
-  STRING_PAIR(IMAGE_FILE_MACHINE_IA64),
-  STRING_PAIR(IMAGE_FILE_MACHINE_M32R),
-  STRING_PAIR(IMAGE_FILE_MACHINE_MIPS16),
-  STRING_PAIR(IMAGE_FILE_MACHINE_MIPSFPU),
-  STRING_PAIR(IMAGE_FILE_MACHINE_MIPSFPU16),
-  STRING_PAIR(IMAGE_FILE_MACHINE_POWERPC),
-  STRING_PAIR(IMAGE_FILE_MACHINE_POWERPCFP),
-  STRING_PAIR(IMAGE_FILE_MACHINE_R4000),
-  STRING_PAIR(IMAGE_FILE_MACHINE_SH3),
-  STRING_PAIR(IMAGE_FILE_MACHINE_SH3DSP),
-  STRING_PAIR(IMAGE_FILE_MACHINE_SH4),
-  STRING_PAIR(IMAGE_FILE_MACHINE_SH5),
-  STRING_PAIR(IMAGE_FILE_MACHINE_THUMB),
-  STRING_PAIR(IMAGE_FILE_MACHINE_WCEMIPSV2)
-};
-
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
-SectionCharacteristicsPairs1 [] = {
-  STRING_PAIR(IMAGE_SCN_TYPE_NO_PAD),
-  STRING_PAIR(IMAGE_SCN_CNT_CODE),
-  STRING_PAIR(IMAGE_SCN_CNT_INITIALIZED_DATA),
-  STRING_PAIR(IMAGE_SCN_CNT_UNINITIALIZED_DATA),
-  STRING_PAIR(IMAGE_SCN_LNK_OTHER),
-  STRING_PAIR(IMAGE_SCN_LNK_INFO),
-  STRING_PAIR(IMAGE_SCN_LNK_REMOVE),
-  STRING_PAIR(IMAGE_SCN_LNK_COMDAT),
-  STRING_PAIR(IMAGE_SCN_GPREL),
-  STRING_PAIR(IMAGE_SCN_MEM_PURGEABLE),
-  STRING_PAIR(IMAGE_SCN_MEM_16BIT),
-  STRING_PAIR(IMAGE_SCN_MEM_LOCKED),
-  STRING_PAIR(IMAGE_SCN_MEM_PRELOAD)
-};
-
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
-SectionCharacteristicsPairsAlignment [] = {
-  STRING_PAIR(IMAGE_SCN_ALIGN_1BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_2BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_4BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_8BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_16BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_32BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_64BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_128BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_256BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_512BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_1024BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_2048BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_4096BYTES),
-  STRING_PAIR(IMAGE_SCN_ALIGN_8192BYTES)
-};
-
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
-SectionCharacteristicsPairs2 [] = {
-  STRING_PAIR(IMAGE_SCN_LNK_NRELOC_OVFL),
-  STRING_PAIR(IMAGE_SCN_MEM_DISCARDABLE),
-  STRING_PAIR(IMAGE_SCN_MEM_NOT_CACHED),
-  STRING_PAIR(IMAGE_SCN_MEM_NOT_PAGED),
-  STRING_PAIR(IMAGE_SCN_MEM_SHARED),
-  STRING_PAIR(IMAGE_SCN_MEM_EXECUTE),
-  STRING_PAIR(IMAGE_SCN_MEM_READ),
-  STRING_PAIR(IMAGE_SCN_MEM_WRITE)
-};
-  
-static const pod_pair<llvm::COFF::SymbolBaseType, const char *> 
-SymbolBaseTypePairs [] = {
-  STRING_PAIR(IMAGE_SYM_TYPE_NULL),
-  STRING_PAIR(IMAGE_SYM_TYPE_VOID),
-  STRING_PAIR(IMAGE_SYM_TYPE_CHAR),
-  STRING_PAIR(IMAGE_SYM_TYPE_SHORT),
-  STRING_PAIR(IMAGE_SYM_TYPE_INT),
-  STRING_PAIR(IMAGE_SYM_TYPE_LONG),
-  STRING_PAIR(IMAGE_SYM_TYPE_FLOAT),
-  STRING_PAIR(IMAGE_SYM_TYPE_DOUBLE),
-  STRING_PAIR(IMAGE_SYM_TYPE_STRUCT),
-  STRING_PAIR(IMAGE_SYM_TYPE_UNION),
-  STRING_PAIR(IMAGE_SYM_TYPE_ENUM),
-  STRING_PAIR(IMAGE_SYM_TYPE_MOE),
-  STRING_PAIR(IMAGE_SYM_TYPE_BYTE),
-  STRING_PAIR(IMAGE_SYM_TYPE_WORD),
-  STRING_PAIR(IMAGE_SYM_TYPE_UINT),
-  STRING_PAIR(IMAGE_SYM_TYPE_DWORD)
-};
-
-static const pod_pair<llvm::COFF::SymbolComplexType, const char *> 
-SymbolComplexTypePairs [] = {
-  STRING_PAIR(IMAGE_SYM_DTYPE_NULL),
-  STRING_PAIR(IMAGE_SYM_DTYPE_POINTER),
-  STRING_PAIR(IMAGE_SYM_DTYPE_FUNCTION),
-  STRING_PAIR(IMAGE_SYM_DTYPE_ARRAY),
-};
-  
-static const pod_pair<llvm::COFF::SymbolStorageClass, const char *> 
-SymbolStorageClassPairs [] = {
-  STRING_PAIR(IMAGE_SYM_CLASS_END_OF_FUNCTION),
-  STRING_PAIR(IMAGE_SYM_CLASS_NULL),
-  STRING_PAIR(IMAGE_SYM_CLASS_AUTOMATIC),
-  STRING_PAIR(IMAGE_SYM_CLASS_EXTERNAL),
-  STRING_PAIR(IMAGE_SYM_CLASS_STATIC),
-  STRING_PAIR(IMAGE_SYM_CLASS_REGISTER),
-  STRING_PAIR(IMAGE_SYM_CLASS_EXTERNAL_DEF),
-  STRING_PAIR(IMAGE_SYM_CLASS_LABEL),
-  STRING_PAIR(IMAGE_SYM_CLASS_UNDEFINED_LABEL),
-  STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_STRUCT),
-  STRING_PAIR(IMAGE_SYM_CLASS_ARGUMENT),
-  STRING_PAIR(IMAGE_SYM_CLASS_STRUCT_TAG),
-  STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_UNION),
-  STRING_PAIR(IMAGE_SYM_CLASS_UNION_TAG),
-  STRING_PAIR(IMAGE_SYM_CLASS_TYPE_DEFINITION),
-  STRING_PAIR(IMAGE_SYM_CLASS_UNDEFINED_STATIC),
-  STRING_PAIR(IMAGE_SYM_CLASS_ENUM_TAG),
-  STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_ENUM),
-  STRING_PAIR(IMAGE_SYM_CLASS_REGISTER_PARAM),
-  STRING_PAIR(IMAGE_SYM_CLASS_BIT_FIELD),
-  STRING_PAIR(IMAGE_SYM_CLASS_BLOCK),
-  STRING_PAIR(IMAGE_SYM_CLASS_FUNCTION),
-  STRING_PAIR(IMAGE_SYM_CLASS_END_OF_STRUCT),
-  STRING_PAIR(IMAGE_SYM_CLASS_FILE),
-  STRING_PAIR(IMAGE_SYM_CLASS_SECTION),
-  STRING_PAIR(IMAGE_SYM_CLASS_WEAK_EXTERNAL),
-  STRING_PAIR(IMAGE_SYM_CLASS_CLR_TOKEN),
-};
-
-static const pod_pair<llvm::COFF::RelocationTypeX86, const char *> 
-RelocationTypeX86Pairs [] = {
-  STRING_PAIR(IMAGE_REL_I386_ABSOLUTE),
-  STRING_PAIR(IMAGE_REL_I386_DIR16),
-  STRING_PAIR(IMAGE_REL_I386_REL16),
-  STRING_PAIR(IMAGE_REL_I386_DIR32),
-  STRING_PAIR(IMAGE_REL_I386_DIR32NB),
-  STRING_PAIR(IMAGE_REL_I386_SEG12),
-  STRING_PAIR(IMAGE_REL_I386_SECTION),
-  STRING_PAIR(IMAGE_REL_I386_SECREL),
-  STRING_PAIR(IMAGE_REL_I386_TOKEN),
-  STRING_PAIR(IMAGE_REL_I386_SECREL7),
-  STRING_PAIR(IMAGE_REL_I386_REL32),
-  STRING_PAIR(IMAGE_REL_AMD64_ABSOLUTE),
-  STRING_PAIR(IMAGE_REL_AMD64_ADDR64),
-  STRING_PAIR(IMAGE_REL_AMD64_ADDR32),
-  STRING_PAIR(IMAGE_REL_AMD64_ADDR32NB),
-  STRING_PAIR(IMAGE_REL_AMD64_REL32),
-  STRING_PAIR(IMAGE_REL_AMD64_REL32_1),
-  STRING_PAIR(IMAGE_REL_AMD64_REL32_2),
-  STRING_PAIR(IMAGE_REL_AMD64_REL32_3),
-  STRING_PAIR(IMAGE_REL_AMD64_REL32_4),
-  STRING_PAIR(IMAGE_REL_AMD64_REL32_5),
-  STRING_PAIR(IMAGE_REL_AMD64_SECTION),
-  STRING_PAIR(IMAGE_REL_AMD64_SECREL),
-  STRING_PAIR(IMAGE_REL_AMD64_SECREL7),
-  STRING_PAIR(IMAGE_REL_AMD64_TOKEN),
-  STRING_PAIR(IMAGE_REL_AMD64_SREL32),
-  STRING_PAIR(IMAGE_REL_AMD64_PAIR),
-  STRING_PAIR(IMAGE_REL_AMD64_SSPAN32)
-};
-
-static const pod_pair<llvm::COFF::RelocationTypesARM, const char *> 
-RelocationTypesARMPairs [] = {
-  STRING_PAIR(IMAGE_REL_ARM_ABSOLUTE),
-  STRING_PAIR(IMAGE_REL_ARM_ADDR32),
-  STRING_PAIR(IMAGE_REL_ARM_ADDR32NB),
-  STRING_PAIR(IMAGE_REL_ARM_BRANCH24),
-  STRING_PAIR(IMAGE_REL_ARM_BRANCH11),
-  STRING_PAIR(IMAGE_REL_ARM_TOKEN),
-  STRING_PAIR(IMAGE_REL_ARM_BLX24),
-  STRING_PAIR(IMAGE_REL_ARM_BLX11),
-  STRING_PAIR(IMAGE_REL_ARM_SECTION),
-  STRING_PAIR(IMAGE_REL_ARM_SECREL),
-  STRING_PAIR(IMAGE_REL_ARM_MOV32A),
-  STRING_PAIR(IMAGE_REL_ARM_MOV32T),
-  STRING_PAIR(IMAGE_REL_ARM_BRANCH20T),
-  STRING_PAIR(IMAGE_REL_ARM_BRANCH24T),
-  STRING_PAIR(IMAGE_REL_ARM_BLX23T)
-};
-#undef STRING_PAIR
-
-
-static const char endl = '\n';
-
-namespace yaml {  // COFF-specific yaml-writing specific routines
-
-static llvm::raw_ostream &writeName(llvm::raw_ostream &Out, 
-                             const char *Name, std::size_t NameSize) {
-  for (std::size_t i = 0; i < NameSize; ++i) {
-    if (!Name[i]) break;
-    Out << Name[i];
-  }
-  return Out;
-}
-
-// Given an array of pod_pair<enum, const char *>, write all enums that match
-template <typename T, std::size_t N>
-static llvm::raw_ostream &writeBitMask(llvm::raw_ostream &Out, 
-              const pod_pair<T, const char *> (&Arr)[N], unsigned long Val) {
-  for (std::size_t i = 0; i < N; ++i)
-    if (Val & Arr[i].first)
-      Out << Arr[i].second << ", ";
-  return Out;
-}
-
-} // end of yaml namespace
-
-// Given an array of pod_pair<enum, const char *>, look up a value
-template <typename T, std::size_t N>
-const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N], 
-                           unsigned long Val, const char *NotFound = NULL) {
-  T n = static_cast<T>(Val);
-  for (std::size_t i = 0; i < N; ++i)
-    if (n == Arr[i].first)
-      return Arr[i].second;
-  return NotFound;
-}
-
-
-static llvm::raw_ostream &yamlCOFFHeader(
-          const llvm::object::coff_file_header *Header,llvm::raw_ostream &Out) {
-
-  Out << "header: !Header" << endl;
-  Out << "  Machine: ";
-  Out << nameLookup(MachineTypePairs, Header->Machine, "# Unknown_MachineTypes")
-      << " # (";
-  return yaml::writeHexNumber(Out, Header->Machine) << ")" << endl << endl;
-}
-
-
-static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj, 
-                            std::size_t NumSections, llvm::raw_ostream &Out) {
-  llvm::error_code ec;
-  Out << "sections:" << endl;
-  for (llvm::object::section_iterator iter = Obj.begin_sections(); 
-                             iter != Obj.end_sections(); iter.increment(ec)) {
-    const llvm::object::coff_section *sect = Obj.getCOFFSection(iter);
-  
-    Out << "  - !Section" << endl;
-    Out << "    Name: ";
-    yaml::writeName(Out, sect->Name, sizeof(sect->Name)) << endl;
-
-    Out << "    Characteristics: [";
-    yaml::writeBitMask(Out, SectionCharacteristicsPairs1, sect->Characteristics);
-    Out << nameLookup(SectionCharacteristicsPairsAlignment, 
-        sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN") 
-        << ", ";
-    yaml::writeBitMask(Out, SectionCharacteristicsPairs2, sect->Characteristics);
-    Out << "] # ";
-    yaml::writeHexNumber(Out, sect->Characteristics) << endl;
-
-    llvm::ArrayRef<uint8_t> sectionData;
-    Obj.getSectionContents(sect, sectionData);    
-    Out << "    SectionData: ";
-    yaml::writeHexStream(Out, sectionData) << endl;
-    if (iter->begin_relocations() != iter->end_relocations())
-      Out << "    Relocations:\n";
-    for (llvm::object::relocation_iterator rIter = iter->begin_relocations();
-                       rIter != iter->end_relocations(); rIter.increment(ec)) {
-      const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
-
-        Out << "      - !Relocation" << endl;
-        Out << "        VirtualAddress: " ;
-        yaml::writeHexNumber(Out, reloc->VirtualAddress) << endl;
-        Out << "        SymbolTableIndex: " << reloc->SymbolTableIndex << endl;
-        Out << "        Type: " 
-            << nameLookup(RelocationTypeX86Pairs, reloc->Type) << endl;
-    // TODO: Use the correct reloc type for the machine.
-        Out << endl;
-      }
-
-  } 
-  return Out;
-}
-
-static llvm::raw_ostream& yamlCOFFSymbols(llvm::object::COFFObjectFile &Obj, 
-                              std::size_t NumSymbols, llvm::raw_ostream &Out) {
-  llvm::error_code ec;
-  Out << "symbols:" << endl;
-  for (llvm::object::symbol_iterator iter = Obj.begin_symbols(); 
-                             iter != Obj.end_symbols(); iter.increment(ec)) {
- // Gather all the info that we need
-    llvm::StringRef str;
-    const llvm::object::coff_symbol *symbol = Obj.getCOFFSymbol(iter);
-    Obj.getSymbolName(symbol, str);
-    std::size_t  simpleType  = symbol->getBaseType();
-    std::size_t complexType  = symbol->getComplexType();
-    std::size_t storageClass = symbol->StorageClass;
-    
-    Out << "  - !Symbol" << endl;
-    Out << "    Name: " << str << endl; 
-
-    Out << "    Value: "         << symbol->Value << endl;
-    Out << "    SectionNumber: " << symbol->SectionNumber << endl;
-
-    Out << "    SimpleType: " 
-        << nameLookup(SymbolBaseTypePairs, simpleType, 
-            "# Unknown_SymbolBaseType") 
-        << " # (" << simpleType << ")" << endl;
-    
-    Out << "    ComplexType: " 
-        << nameLookup(SymbolComplexTypePairs, complexType, 
-                "# Unknown_SymbolComplexType") 
-        << " # (" << complexType << ")" << endl;
-    
-    Out << "    StorageClass: " 
-        << nameLookup(SymbolStorageClassPairs, storageClass,
-              "# Unknown_StorageClass") 
-        << " # (" << (int) storageClass << ")" << endl;
-
-    if (symbol->NumberOfAuxSymbols > 0) {
-      llvm::ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol);
-      Out << "    NumberOfAuxSymbols: " 
-          << (int) symbol->NumberOfAuxSymbols << endl;
-      Out << "    AuxillaryData: ";
-      yaml::writeHexStream(Out, aux);
-    }
-      
-    Out << endl;
-  }
-
-  return Out;
-}
-
-
-llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj) {
-  llvm::error_code ec;
-  llvm::object::COFFObjectFile obj(TheObj, ec);
-  if (!ec) {
-    const llvm::object::coff_file_header *hd;
-    ec = obj.getHeader(hd);
-    if (!ec) {
-      yamlCOFFHeader(hd, Out);
-      yamlCOFFSections(obj, hd->NumberOfSections, Out);
-      yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out);
-    }
-  }
-  return ec;
-}
diff --git a/utils/obj2yaml/obj2yaml.cpp b/utils/obj2yaml/obj2yaml.cpp
deleted file mode 100644
index ff253fa13143..000000000000
--- a/utils/obj2yaml/obj2yaml.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "obj2yaml.h"
-
-#include "llvm/ADT/OwningPtr.h"
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Signals.h"
-
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/COFF.h"
-
-const char endl = '\n';
-
-namespace yaml {  // generic yaml-writing specific routines
-
-unsigned char printable(unsigned char Ch) {
-  return Ch >= ' ' && Ch <= '~' ? Ch : '.';
-}
-  
-llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out, 
-                                     const llvm::ArrayRef<uint8_t> arr) {
-  const char *hex = "0123456789ABCDEF";
-  Out << " !hex \"";
-
-  typedef llvm::ArrayRef<uint8_t>::const_iterator iter_t;
-  const iter_t end = arr.end();
-  for (iter_t iter = arr.begin(); iter != end; ++iter)
-    Out << hex[(*iter >> 4) & 0x0F] << hex[(*iter & 0x0F)];
-
-  Out << "\" # |";
-  for (iter_t iter = arr.begin(); iter != end; ++iter)
-    Out << printable(*iter);
-  Out << "|" << endl;
-
-  return Out;
-  }
-
-llvm::raw_ostream &writeHexNumber(llvm::raw_ostream &Out, unsigned long long N) {
-  if (N >= 10)
-    Out << "0x";
-  Out.write_hex(N);
-  return Out;
-}
-
-}
-
-
-using namespace llvm;
-enum ObjectFileType { coff };
-
-cl::opt<ObjectFileType> InputFormat(
-  cl::desc("Choose input format"),
-    cl::values(
-      clEnumVal(coff, "process COFF object files"),
-    clEnumValEnd));
-    
-cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-
-int main(int argc, char * argv[]) {
-  cl::ParseCommandLineOptions(argc, argv);
-  sys::PrintStackTraceOnErrorSignal();
-  PrettyStackTraceProgram X(argc, argv);
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-
-// Process the input file  
-  OwningPtr<MemoryBuffer> buf;
-
-// TODO: If this is an archive, then burst it and dump each entry
-  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf))
-    llvm::errs() << "Error: '" << ec.message() << "' opening file '" 
-              << InputFilename << "'" << endl;
-  else {
-    ec = coff2yaml(llvm::outs(), buf.take());
-    if (ec)
-      llvm::errs() << "Error: " << ec.message() << " dumping COFF file" << endl;
-  }
-
-  return 0;
-}
diff --git a/utils/obj2yaml/obj2yaml.h b/utils/obj2yaml/obj2yaml.h
deleted file mode 100644
index 2a23b49682df..000000000000
--- a/utils/obj2yaml/obj2yaml.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===------ utils/obj2yaml.hpp - obj2yaml conversion tool -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-// This file declares some helper routines, and also the format-specific
-// writers. To add a new format, add the declaration here, and, in a separate
-// source file, implement it.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_UTILS_OBJ2YAML_H
-#define LLVM_UTILS_OBJ2YAML_H
-
-#include "llvm/ADT/ArrayRef.h"
-
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-namespace yaml {  // routines for writing YAML
-// Write a hex stream:
-//    <Prefix> !hex: "<hex digits>" #|<ASCII chars>\n
-  llvm::raw_ostream &writeHexStream
-    (llvm::raw_ostream &Out, const llvm::ArrayRef<uint8_t> arr);
-
-// Writes a number in hex; prefix it by 0x if it is >= 10
-  llvm::raw_ostream &writeHexNumber
-    (llvm::raw_ostream &Out, unsigned long long N);
-}
-
-llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj);
-
-#endif
diff --git a/utils/sort_includes.py b/utils/sort_includes.py
new file mode 100755
index 000000000000..fef97550db8d
--- /dev/null
+++ b/utils/sort_includes.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+"""Script to sort the top-most block of #include lines.
+
+Assumes the LLVM coding conventions.
+
+Currently, this script only bothers sorting the llvm/... headers. Patches
+welcome for more functionality, and sorting other header groups.
+"""
+
+import argparse
+import os
+
+def sort_includes(f):
+  """Sort the #include lines of a specific file."""
+
+  # Skip files which are under INPUTS trees or test trees.
+  if 'INPUTS/' in f.name or 'test/' in f.name:
+    return
+
+  ext = os.path.splitext(f.name)[1]
+  if ext not in ['.cpp', '.c', '.h', '.inc', '.def']:
+    return
+
+  lines = f.readlines()
+  look_for_api_header = ext in ['.cpp', '.c']
+  found_headers = False
+  headers_begin = 0
+  headers_end = 0
+  api_headers = []
+  local_headers = []
+  project_headers = []
+  system_headers = []
+  for (i, l) in enumerate(lines):
+    if l.strip() == '':
+      continue
+    if l.startswith('#include'):
+      if not found_headers:
+        headers_begin = i
+        found_headers = True
+      headers_end = i
+      header = l[len('#include'):].lstrip()
+      if look_for_api_header and header.startswith('"'):
+        api_headers.append(header)
+        look_for_api_header = False
+        continue
+      if header.startswith('<') or header.startswith('"gtest/'):
+        system_headers.append(header)
+        continue
+      if (header.startswith('"llvm/') or header.startswith('"llvm-c/') or
+          header.startswith('"clang/') or header.startswith('"clang-c/')):
+        project_headers.append(header)
+        continue
+      local_headers.append(header)
+      continue
+
+    # Only allow comments and #defines prior to any includes. If either are
+    # mixed with includes, the order might be sensitive.
+    if found_headers:
+      break
+    if l.startswith('//') or l.startswith('#define') or l.startswith('#ifndef'):
+      continue
+    break
+  if not found_headers:
+    return
+
+  local_headers = sorted(set(local_headers))
+  project_headers = sorted(set(project_headers))
+  system_headers = sorted(set(system_headers))
+  headers = api_headers + local_headers + project_headers + system_headers
+  header_lines = ['#include ' + h for h in headers]
+  lines = lines[:headers_begin] + header_lines + lines[headers_end + 1:]
+
+  f.seek(0)
+  f.truncate()
+  f.writelines(lines)
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('files', nargs='+', type=argparse.FileType('r+'),
+                      help='the source files to sort includes within')
+  args = parser.parse_args()
+  for f in args.files:
+    sort_includes(f)
+
+if __name__ == '__main__':
+  main()
diff --git a/utils/testgen/mc-bundling-x86-gen.py b/utils/testgen/mc-bundling-x86-gen.py
new file mode 100644
index 000000000000..5c1c6c456280
--- /dev/null
+++ b/utils/testgen/mc-bundling-x86-gen.py
@@ -0,0 +1,103 @@
+
+#!/usr/bin/python
+
+# Auto-generates an exhaustive and repetitive test for correct bundle-locked
+# alignment on x86.
+# For every possible offset in an aligned bundle, a bundle-locked group of every
+# size in the inclusive range [1, bundle_size] is inserted. An appropriate CHECK
+# is added to verify that NOP padding occurred (or did not occur) as expected.
+# Run with --align-to-end to generate a similar test with align_to_end for each
+# .bundle_lock directive.
+
+# This script runs with Python 2.7 and 3.2+
+
+from __future__ import print_function
+import argparse
+
+BUNDLE_SIZE_POW2 = 4
+BUNDLE_SIZE = 2 ** BUNDLE_SIZE_POW2
+
+PREAMBLE = '''
+# RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - \\
+# RUN:   | llvm-objdump -triple i386 -disassemble -no-show-raw-insn - | FileCheck %s
+
+# !!! This test is auto-generated from utils/testgen/mc-bundling-x86-gen.py !!!
+#     It tests that bundle-aligned grouping works correctly in MC. Read the
+#     source of the script for more details.
+
+  .text
+  .bundle_align_mode {0}
+'''.format(BUNDLE_SIZE_POW2).lstrip()
+
+ALIGNTO = '  .align {0}, 0x90'
+NOPFILL = '  .fill {0}, 1, 0x90'
+
+def print_bundle_locked_sequence(len, align_to_end=False):
+  print('  .bundle_lock{0}'.format(' align_to_end' if align_to_end else ''))
+  print('  .rept {0}'.format(len))
+  print('  inc %eax')
+  print('  .endr')
+  print('  .bundle_unlock')
+
+def generate(align_to_end=False):
+  print(PREAMBLE)
+
+  ntest = 0
+  for instlen in range(1, BUNDLE_SIZE + 1):
+    for offset in range(0, BUNDLE_SIZE):
+      # Spread out all the instructions to not worry about cross-bundle
+      # interference.
+      print(ALIGNTO.format(2 * BUNDLE_SIZE))
+      print('INSTRLEN_{0}_OFFSET_{1}:'.format(instlen, offset))
+      if offset > 0:
+        print(NOPFILL.format(offset))
+      print_bundle_locked_sequence(instlen, align_to_end)
+
+      # Now generate an appropriate CHECK line
+      base_offset = ntest * 2 * BUNDLE_SIZE
+      inst_orig_offset = base_offset + offset  # had it not been padded...
+
+      def print_check(adjusted_offset=None, nop_split_offset=None):
+        if adjusted_offset is not None:
+          print('# CHECK: {0:x}: nop'.format(inst_orig_offset))
+          if nop_split_offset is not None:
+            print('# CHECK: {0:x}: nop'.format(nop_split_offset))
+          print('# CHECK: {0:x}: incl'.format(adjusted_offset))
+        else:
+          print('# CHECK: {0:x}: incl'.format(inst_orig_offset))
+
+      if align_to_end:
+        if offset + instlen == BUNDLE_SIZE:
+          # No padding needed
+          print_check()
+        elif offset + instlen < BUNDLE_SIZE:
+          # Pad to end at nearest bundle boundary
+          offset_to_end = base_offset + (BUNDLE_SIZE - instlen)
+          print_check(offset_to_end)
+        else: # offset + instlen > BUNDLE_SIZE
+          # Pad to end at next bundle boundary, splitting the nop sequence
+          # at the nearest bundle boundary
+          offset_to_nearest_bundle = base_offset + BUNDLE_SIZE
+          offset_to_end = base_offset + (BUNDLE_SIZE * 2 - instlen)
+          if offset_to_nearest_bundle == offset_to_end:
+            offset_to_nearest_bundle = None
+          print_check(offset_to_end, offset_to_nearest_bundle)
+      else:
+        if offset + instlen > BUNDLE_SIZE:
+          # Padding needed
+          aligned_offset = (inst_orig_offset + instlen) & ~(BUNDLE_SIZE - 1)
+          print_check(aligned_offset)
+        else:
+          # No padding needed
+          print_check()
+
+      print()
+      ntest += 1
+
+if __name__ == '__main__':
+  argparser = argparse.ArgumentParser()
+  argparser.add_argument('--align-to-end',
+                         action='store_true',
+                         help='generate .bundle_lock with align_to_end option')
+  args = argparser.parse_args()
+  generate(align_to_end=args.align_to_end)
diff --git a/utils/textmate/README b/utils/textmate/README
new file mode 100644
index 000000000000..b01352551afe
--- /dev/null
+++ b/utils/textmate/README
@@ -0,0 +1,8 @@
+This directory contains a "bundle" for doing syntax highlighting of TableGen
+files for the TextMate editor for OS X. The highlighting follows that done 
+by the TextMate "C" bundle.  Currently, keywords, comments, and strings are 
+highlighted.
+
+To install this bundle, copy it to the per user area:
+  cp -R utils/textmate/TableGen.tmbundle \
+    ~/Library/Application\ Support/TextMate/Bundles/TableGen.tmbundle 
diff --git a/utils/textmate/TableGen.tmbundle/Syntaxes/TableGen.tmLanguage b/utils/textmate/TableGen.tmbundle/Syntaxes/TableGen.tmLanguage
new file mode 100644
index 000000000000..f3cf2d618fd5
--- /dev/null
+++ b/utils/textmate/TableGen.tmbundle/Syntaxes/TableGen.tmLanguage
@@ -0,0 +1,132 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>fileTypes</key>
+	<array><string>td</string></array>
+	<key>foldingStartMarker</key>
+	<string>/\*\*|\{\s*$</string>
+	<key>foldingStopMarker</key>
+	<string>\*\*/|^\s*\}</string>
+	<key>name</key>
+	<string>TableGen</string>
+	<key>patterns</key>
+	<array>
+		<dict>
+			<key>include</key>
+			<string>#comments</string>
+		</dict>
+		<dict>
+			<key>match</key>
+			<string>\b(def|let|in|code|dag|string|list|bits|bit|field|include|defm|foreach|class|multiclass|int)\b</string>
+			<key>name</key>
+			<string>keyword.control.tablegen</string>
+		</dict>
+		<dict>
+			<key>begin</key>
+			<string>"</string>
+			<key>end</key>
+			<string>"</string>
+			<key>name</key>
+			<string>string.quoted.double.untitled</string>
+			<key>patterns</key>
+			<array>
+				<dict>
+					<key>match</key>
+					<string>\\.</string>
+					<key>name</key>
+					<string>constant.character.escape.tablegen</string>
+				</dict>
+			</array>
+		</dict>
+	</array>
+	<key>repository</key>
+	<dict>
+		<key>comments</key>
+		<dict>
+			<key>patterns</key>
+			<array>
+				<dict>
+					<key>captures</key>
+					<dict>
+						<key>1</key>
+						<dict>
+							<key>name</key>
+							<string>meta.toc-list.banner.block.tablegen</string>
+						</dict>
+					</dict>
+					<key>match</key>
+					<string>^/\* =(\s*.*?)\s*= \*/$\n?</string>
+					<key>name</key>
+					<string>comment.block.tablegen</string>
+				</dict>
+				<dict>
+					<key>begin</key>
+					<string>/\*</string>
+					<key>captures</key>
+					<dict>
+						<key>0</key>
+						<dict>
+							<key>name</key>
+							<string>punctuation.definition.comment.tablegen</string>
+						</dict>
+					</dict>
+					<key>end</key>
+					<string>\*/</string>
+					<key>name</key>
+					<string>comment.block.tablegen</string>
+				</dict>
+				<dict>
+					<key>match</key>
+					<string>\*/.*\n</string>
+					<key>name</key>
+					<string>invalid.illegal.stray-comment-end.tablegen</string>
+				</dict>
+				<dict>
+					<key>captures</key>
+					<dict>
+						<key>1</key>
+						<dict>
+							<key>name</key>
+							<string>meta.toc-list.banner.line.tablegen</string>
+						</dict>
+					</dict>
+					<key>match</key>
+					<string>^// =(\s*.*?)\s*=\s*$\n?</string>
+					<key>name</key>
+					<string>comment.line.banner.tablegen</string>
+				</dict>
+				<dict>
+					<key>begin</key>
+					<string>//</string>
+					<key>beginCaptures</key>
+					<dict>
+						<key>0</key>
+						<dict>
+							<key>name</key>
+							<string>punctuation.definition.comment.tablegen</string>
+						</dict>
+					</dict>
+					<key>end</key>
+					<string>$\n?</string>
+					<key>name</key>
+					<string>comment.line.double-slash.tablegen</string>
+					<key>patterns</key>
+					<array>
+						<dict>
+							<key>match</key>
+							<string>(?&gt;\\\s*\n)</string>
+							<key>name</key>
+							<string>punctuation.separator.continuation.tablegen</string>
+						</dict>
+					</array>
+				</dict>
+			</array>
+		</dict>
+	</dict>
+	<key>scopeName</key>
+	<string>source.tablegen</string>
+	<key>uuid</key>
+	<string>3A090BFC-E74B-4993-8DAE-7CCF6D238A32</string>
+</dict>
+</plist>
diff --git a/utils/textmate/TableGen.tmbundle/info.plist b/utils/textmate/TableGen.tmbundle/info.plist
new file mode 100644
index 000000000000..c2f680ac519b
--- /dev/null
+++ b/utils/textmate/TableGen.tmbundle/info.plist
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>name</key>
+	<string>TableGen</string>
+	<key>ordering</key>
+	<array/>
+	<key>uuid</key>
+	<string>96925448-7219-41E9-A7F0-8D5B70E9B877</string>
+</dict>
+</plist>
diff --git a/utils/unittest/UnitTestMain/TestMain.cpp b/utils/unittest/UnitTestMain/TestMain.cpp
index b35bae5abfb1..ce32b7380f22 100644
--- a/utils/unittest/UnitTestMain/TestMain.cpp
+++ b/utils/unittest/UnitTestMain/TestMain.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Signals.h"
 #include "gtest/gtest.h"
 
@@ -22,6 +23,7 @@
 int main(int argc, char **argv) {
   llvm::sys::PrintStackTraceOnErrorSignal();
   testing::InitGoogleTest(&argc, argv);
+  llvm::cl::ParseCommandLineOptions(argc, argv);
 
 # if defined(LLVM_ON_WIN32)
   // Disable all of the possible ways Windows conspires to make automated
diff --git a/utils/unittest/googletest/Makefile b/utils/unittest/googletest/Makefile
index 22c8f36fccb6..bf736704f549 100644
--- a/utils/unittest/googletest/Makefile
+++ b/utils/unittest/googletest/Makefile
@@ -36,4 +36,6 @@ endif
 
 NO_INSTALL = 1
 
+SOURCES = $(filter-out gtest-all.cc, $(notdir $(wildcard $(PROJ_SRC_DIR)/*.cc)))
+
 include $(LEVEL)/Makefile.common
diff --git a/utils/unittest/googletest/README.LLVM b/utils/unittest/googletest/README.LLVM
index 51340e9ceb0c..3565a3280e4f 100644
--- a/utils/unittest/googletest/README.LLVM
+++ b/utils/unittest/googletest/README.LLVM
@@ -19,9 +19,10 @@ $ rmdir src
 $ mv *.h include/gtest/internal/
 
 # Update paths to the included files
+$ perl -pi -e 's|^#include "src/|#include "|' gtest-all.cc
 $ perl -pi -e 's|^#include "src/|#include "gtest/internal/|' *.cc
 
-$ rm -f gtest-all.cc gtest_main.cc
+$ rm -f gtest_main.cc
 
 $ mv COPYING LICENSE.TXT
 
diff --git a/utils/unittest/googletest/gtest-all.cc b/utils/unittest/googletest/gtest-all.cc
new file mode 100644
index 000000000000..97753e5b9d5a
--- /dev/null
+++ b/utils/unittest/googletest/gtest-all.cc
@@ -0,0 +1,48 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// Google C++ Testing Framework (Google Test)
+//
+// Sometimes it's desirable to build Google Test by compiling a single file.
+// This file serves this purpose.
+
+// This line ensures that gtest.h can be compiled on its own, even
+// when it's fused.
+#include "gtest/gtest.h"
+
+// The following lines pull in the real gtest *.cc files.
+#include "gtest.cc"
+#include "gtest-death-test.cc"
+#include "gtest-filepath.cc"
+#include "gtest-port.cc"
+#include "gtest-printers.cc"
+#include "gtest-test-part.cc"
+#include "gtest-typed-test.cc"
diff --git a/utils/unittest/googletest/gtest-filepath.cc b/utils/unittest/googletest/gtest-filepath.cc
index bc610094e118..ad1bab8e9f57 100644
--- a/utils/unittest/googletest/gtest-filepath.cc
+++ b/utils/unittest/googletest/gtest-filepath.cc
@@ -69,7 +69,6 @@ namespace internal {
 // of them.
 const char kPathSeparator = '\\';
 const char kAlternatePathSeparator = '/';
-const char kPathSeparatorString[] = "\\";
 const char kAlternatePathSeparatorString[] = "/";
 # if GTEST_OS_WINDOWS_MOBILE
 // Windows CE doesn't have a current directory. You should not use
@@ -83,7 +82,6 @@ const char kCurrentDirectoryString[] = ".\\";
 # endif  // GTEST_OS_WINDOWS_MOBILE
 #else
 const char kPathSeparator = '/';
-const char kPathSeparatorString[] = "/";
 const char kCurrentDirectoryString[] = "./";
 #endif  // GTEST_OS_WINDOWS
 
diff --git a/utils/unittest/googletest/gtest-printers.cc b/utils/unittest/googletest/gtest-printers.cc
index ed63c7b3b91d..205a39425f0e 100644
--- a/utils/unittest/googletest/gtest-printers.cc
+++ b/utils/unittest/googletest/gtest-printers.cc
@@ -127,7 +127,7 @@ namespace internal {
 // Depending on the value of a char (or wchar_t), we print it in one
 // of three formats:
 //   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
-//   - as a hexidecimal escape sequence (e.g. '\x7F'), or
+//   - as a hexadecimal escape sequence (e.g. '\x7F'), or
 //   - as a special escape sequence (e.g. '\r', '\n').
 enum CharFormat {
   kAsIs,
@@ -230,7 +230,7 @@ void PrintCharAndCodeTo(Char c, ostream* os) {
     return;
   *os << " (" << String::Format("%d", c).c_str();
 
-  // For more convenience, we print c's code again in hexidecimal,
+  // For more convenience, we print c's code again in hexadecimal,
   // unless c was already printed in the form '\x##' or the code is in
   // [1, 9].
   if (format == kHexEscape || (1 <= c && c <= 9)) {
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
index f8a5cc9447b1..a94bf28421fb 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
@@ -56,7 +56,9 @@
 #include "gtest/internal/gtest-filepath.h"
 #include "gtest/internal/gtest-type-util.h"
 
+#if !GTEST_NO_LLVM_RAW_OSTREAM
 #include "llvm/Support/raw_os_ostream.h"
+#endif
 
 // Due to C++ preprocessor weirdness, we need double indirection to
 // concatenate two tokens when one of them is __LINE__.  Writing
@@ -100,6 +102,7 @@
 // std::ostream with an implicit conversion to raw_ostream& and stream
 // to that.  This causes the compiler to prefer std::ostream overloads
 // but still find raw_ostream& overloads.
+#if !GTEST_NO_LLVM_RAW_OSTREAM
 namespace llvm {
 class convertible_fwd_ostream : public std::ostream {
   raw_os_ostream ros_;
@@ -115,6 +118,12 @@ inline void GTestStreamToHelper(std::ostream* os, const T& val) {
   llvm::convertible_fwd_ostream cos(*os);
   cos << val;
 }
+#else
+template <typename T>
+inline void GTestStreamToHelper(std::ostream* os, const T& val) {
+  *os << val;
+}
+#endif
 
 class ProtocolMessage;
 namespace proto2 { class Message; }
diff --git a/utils/valgrind/x86_64-pc-linux-gnu.supp b/utils/valgrind/x86_64-pc-linux-gnu.supp
index fc863b85e291..c8e5cd091784 100644
--- a/utils/valgrind/x86_64-pc-linux-gnu.supp
+++ b/utils/valgrind/x86_64-pc-linux-gnu.supp
@@ -33,6 +33,12 @@
 }
 
 {
+   We don't care of cmp
+   Memcheck:Cond
+   obj:/usr/bin/cmp
+}
+
+{
    We don't care if grep leaks
    Memcheck:Leak
    obj:/bin/grep
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index c16274ba2f41..abd24e5dcca6 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   llvm
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:      $Revision: 166305 $
+" Version:      $Revision: 176075 $
 
 if version < 600
   syntax clear
@@ -14,50 +14,48 @@ syn case match
 " Types.
 " Types also include struct, array, vector, etc. but these don't
 " benefit as much from having dedicated highlighting rules.
-syn keyword llvmType void float double half
-syn keyword llvmType x86_fp80 fp128 ppc_fp128
+syn keyword llvmType void half float double x86_fp80 fp128 ppc_fp128
+syn keyword llvmType label metadata x86_mmx
 syn keyword llvmType type label opaque
 syn match   llvmType /\<i\d\+\>/
 
 " Instructions.
 " The true and false tokens can be used for comparison opcodes, but it's
 " much more common for these tokens to be used for boolean constants.
-syn keyword llvmStatement add fadd sub fsub mul fmul
-syn keyword llvmStatement sdiv udiv fdiv srem urem frem
-syn keyword llvmStatement and or xor
-syn keyword llvmStatement icmp fcmp
-syn keyword llvmStatement eq ne ugt uge ult ule sgt sge slt sle
-syn keyword llvmStatement oeq ogt oge olt ole one ord ueq ugt uge
-syn keyword llvmStatement ult ule une uno
-syn keyword llvmStatement nuw nsw exact inbounds
-syn keyword llvmStatement phi call select shl lshr ashr va_arg
-syn keyword llvmStatement trunc zext sext
-syn keyword llvmStatement fptrunc fpext fptoui fptosi uitofp sitofp
-syn keyword llvmStatement ptrtoint inttoptr bitcast
-syn keyword llvmStatement ret br indirectbr switch invoke unwind unreachable
-syn keyword llvmStatement malloc alloca free load store getelementptr
-syn keyword llvmStatement extractelement insertelement shufflevector
-syn keyword llvmStatement extractvalue insertvalue
+syn keyword llvmStatement add alloca and arcp ashr atomicrmw bitcast br call
+syn keyword llvmStatement cmpxchg eq exact extractelement extractvalue fadd fast
+syn keyword llvmStatement fcmp fdiv fence fmul fpext fptosi fptoui fptrunc free
+syn keyword llvmStatement frem fsub getelementptr icmp inbounds indirectbr
+syn keyword llvmStatement insertelement insertvalue inttoptr invoke landingpad
+syn keyword llvmStatement load lshr malloc max min mul nand ne ninf nnan nsw nsz
+syn keyword llvmStatement nuw oeq oge ogt ole olt one or ord phi ptrtoint resume
+syn keyword llvmStatement ret sdiv select sext sge sgt shl shufflevector sitofp
+syn keyword llvmStatement sle slt srem store sub switch trunc udiv ueq uge ugt
+syn keyword llvmStatement uitofp ule ult umax umin une uno unreachable unwind
+syn keyword llvmStatement urem va_arg xchg xor zext
 
 " Keywords.
-syn keyword llvmKeyword define declare global constant
-syn keyword llvmKeyword internal external private
-syn keyword llvmKeyword linkonce linkonce_odr weak weak_odr appending
-syn keyword llvmKeyword common extern_weak
-syn keyword llvmKeyword thread_local dllimport dllexport
-syn keyword llvmKeyword hidden protected default
-syn keyword llvmKeyword except deplibs
-syn keyword llvmKeyword volatile fastcc coldcc cc ccc
-syn keyword llvmKeyword x86_stdcallcc x86_fastcallcc
-syn keyword llvmKeyword ptx_kernel ptx_device
-syn keyword llvmKeyword signext zeroext inreg sret nounwind noreturn
-syn keyword llvmKeyword nocapture byval nest readnone readonly noalias uwtable
-syn keyword llvmKeyword inlinehint noinline alwaysinline optsize ssp sspreq
-syn keyword llvmKeyword noredzone noimplicitfloat naked alignstack
-syn keyword llvmKeyword module asm align tail to
-syn keyword llvmKeyword addrspace section alias sideeffect c gc
-syn keyword llvmKeyword target datalayout triple
-syn keyword llvmKeyword blockaddress
+syn keyword llvmKeyword acq_rel acquire sanitize_address addrspace alias align
+syn keyword llvmKeyword alignstack alwaysinline appending arm_aapcs_vfpcc
+syn keyword llvmKeyword arm_aapcscc arm_apcscc asm atomic available_externally
+syn keyword llvmKeyword blockaddress byval c catch cc ccc cleanup coldcc common
+syn keyword llvmKeyword constant datalayout declare default define deplibs
+syn keyword llvmKeyword dllexport dllimport except extern_weak external fastcc
+syn keyword llvmKeyword filter gc global hidden initialexec inlinehint inreg
+syn keyword llvmKeyword intel_ocl_bicc inteldialect internal linker_private
+syn keyword llvmKeyword linker_private_weak linker_private_weak_def_auto
+syn keyword llvmKeyword linkonce linkonce_odr linkonce_odr_auto_hide
+syn keyword llvmKeyword localdynamic localexec minsize module monotonic
+syn keyword llvmKeyword msp430_intrcc naked nest noalias nocapture
+syn keyword llvmKeyword noimplicitfloat noinline nonlazybind noredzone noreturn
+syn keyword llvmKeyword nounwind optsize personality private protected
+syn keyword llvmKeyword ptx_device ptx_kernel readnone readonly release
+syn keyword llvmKeyword returns_twice section seq_cst sideeffect signext
+syn keyword llvmKeyword singlethread spir_func spir_kernel sret ssp sspreq
+syn keyword llvmKeyword sspstrong tail target thread_local to triple
+syn keyword llvmKeyword unnamed_addr unordered uwtable volatile weak weak_odr
+syn keyword llvmKeyword x86_fastcallcc x86_stdcallcc x86_thiscallcc zeroext
+syn keyword llvmKeyword sanitize_thread sanitize_memory
 
 " Obsolete keywords.
 syn keyword llvmError  getresult begin end
diff --git a/utils/vim/vimrc b/utils/vim/vimrc
index 3f863d64bc49..c35eb0ecbbeb 100644
--- a/utils/vim/vimrc
+++ b/utils/vim/vimrc
@@ -1,5 +1,5 @@
 " LLVM coding guidelines conformance for VIM
-" $Revision: 117415 $
+" $Revision: 176235 $
 "
 " Maintainer: The LLVM Team, http://llvm.org
 " WARNING:    Read before you source in all these commands and macros!  Some
@@ -85,6 +85,13 @@ augroup filetype
   au! BufRead,BufNewFile *.td     set filetype=tablegen
 augroup END
 
+" Enable syntax highlighting for reStructuredText files. To use, copy
+" rest.vim (http://www.vim.org/scripts/script.php?script_id=973)
+" to ~/.vim/syntax .
+augroup filetype
+ au! BufRead,BufNewFile *.rst     set filetype=rest
+augroup END
+
 " Additional vim features to optionally uncomment.
 "set showcmd
 "set showmatch
diff --git a/utils/wciia.py b/utils/wciia.py
new file mode 100755
index 000000000000..c838819ebe4f
--- /dev/null
+++ b/utils/wciia.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+
+"""
+wciia - Whose Code Is It Anyway
+
+Determines code owner of the file/folder relative to the llvm source root.
+Code owner is determined from the content of the CODE_OWNERS.TXT 
+by parsing the D: field
+
+usage:
+
+utils/wciia.py  path
+
+limitations:
+- must be run from llvm source root
+- very simplistic algorithm
+- only handles * as a wildcard
+- not very user friendly 
+- does not handle the proposed F: field
+
+"""
+
+import os
+
+code_owners = {}
+
+def process_files_and_folders(owner):
+	filesfolders = owner['filesfolders']
+	# paths must be in ( ... ) so strip them
+	lpar = filesfolders.find('(')
+	rpar = filesfolders.rfind(')')
+	if rpar <= lpar:
+		# give up
+		return
+	paths = filesfolders[lpar+1:rpar]
+	# split paths
+	owner['paths'] = []
+	for path in paths.split():
+		owner['paths'].append(path)
+	
+def process_code_owner(owner):
+	if 'filesfolders' in owner:
+		filesfolders = owner['filesfolders']
+	else:
+#		print "F: field missing, using D: field"
+		owner['filesfolders'] = owner['description']
+	process_files_and_folders(owner)
+	code_owners[owner['name']] = owner
+	
+# process CODE_OWNERS.TXT first
+code_owners_file = open("CODE_OWNERS.TXT", "r").readlines()
+code_owner = {}
+for line in code_owners_file:
+    for word in line.split():
+	if word == "N:":
+		name = line[2:].strip()
+		if code_owner:
+			process_code_owner(code_owner)
+			code_owner = {}
+		# reset the values
+		code_owner['name'] = name
+	if word == "E:":
+		email = line[2:].strip()
+		code_owner['email'] = email
+	if word == "D:":
+		description = line[2:].strip()
+		code_owner['description'] = description
+	if word == "F:":
+		filesfolders = line[2:].strip()
+		code_owner['filesfolders'].append(filesfolders)
+	
+def find_owners(fpath):
+	onames = []
+	lmatch = -1
+	#  very simplistic way of findning the best match
+	for name in code_owners:
+		owner = code_owners[name]
+		if 'paths' in owner:
+			for path in owner['paths']:
+#				print "searching (" + path + ")"
+				# try exact match
+				if fpath == path:
+					return name
+				# see if path ends with a *
+				rstar = path.rfind('*')
+				if rstar>0:
+					# try the longest match,
+					rpos = -1
+					if len(fpath) < len(path):
+						rpos = path.find(fpath)
+					if rpos == 0:
+						onames.append(name)
+	onames.append('Chris Lattner')
+	return onames
+	
+# now lest try to find the owner of the file or folder
+import sys
+
+if len(sys.argv) < 2:
+	print "usage " + sys.argv[0] + " file_or_folder"  
+	exit(-1)
+	
+# the path we are checking
+path = str(sys.argv[1])
+
+# check if this is real path
+if not os.path.exists(path):
+	print "path (" + path + ") does not exist"
+	exit(-1)
+	
+owners_name = find_owners(path)
+
+# be gramatically correct
+print "The owner(s) of the (" + path + ") is(are) : " + str(owners_name)
+
+exit(0)
+
+# bottom up walk of the current .
+# not yet used 
+root = "."
+for dir,subdirList,fileList in os.walk( root , topdown=False ) :
+   print "dir :" , dir
+   for fname in fileList :
+      print "-" , fname
+   print
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
index e5ee52a16d96..eef4a725a1a3 100644
--- a/utils/yaml-bench/YAMLBench.cpp
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -17,11 +17,11 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/system_error.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 
 using namespace llvm;
 
diff --git a/utils/yaml2obj/yaml2obj.cpp b/utils/yaml2obj/yaml2obj.cpp
index 4fc620f4ea9b..17b65ae870d6 100644
--- a/utils/yaml2obj/yaml2obj.cpp
+++ b/utils/yaml2obj/yaml2obj.cpp
@@ -25,12 +25,11 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/Support/YAMLParser.h"
-
 #include <vector>
 
 using namespace llvm;
@@ -112,553 +111,140 @@ static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) {
   return true;
 }
 
+// The structure of the yaml files is not an exact 1:1 match to COFF. In order
+// to use yaml::IO, we use these structures which are closer to the source.
+namespace COFFYAML {
+  struct Relocation {
+    uint32_t VirtualAddress;
+    uint32_t SymbolTableIndex;
+    COFF::RelocationTypeX86 Type;
+  };
+
+  struct Section {
+    std::vector<COFF::SectionCharacteristics> Characteristics;
+    StringRef SectionData;
+    std::vector<Relocation> Relocations;
+    StringRef Name;
+  };
+
+  struct Header {
+    COFF::MachineTypes Machine;
+    std::vector<COFF::Characteristics> Characteristics;
+  };
+
+  struct Symbol {
+    COFF::SymbolBaseType SimpleType;
+    uint8_t NumberOfAuxSymbols;
+    StringRef Name;
+    COFF::SymbolStorageClass StorageClass;
+    StringRef AuxillaryData;
+    COFF::SymbolComplexType ComplexType;
+    uint32_t Value;
+    uint16_t SectionNumber;
+  };
+
+  struct Object {
+    Header HeaderData;
+    std::vector<Section> Sections;
+    std::vector<Symbol> Symbols;
+  };
+}
+
 /// This parses a yaml stream that represents a COFF object file.
 /// See docs/yaml2obj for the yaml scheema.
 struct COFFParser {
-  COFFParser(yaml::Stream &Input) : YS(Input) {
+  COFFParser(COFFYAML::Object &Obj) : Obj(Obj) {
     std::memset(&Header, 0, sizeof(Header));
     // A COFF string table always starts with a 4 byte size field. Offsets into
     // it include this size, so allocate it now.
     StringTable.append(4, 0);
   }
 
-  bool parseHeader(yaml::Node *HeaderN) {
-    yaml::MappingNode *MN = dyn_cast<yaml::MappingNode>(HeaderN);
-    if (!MN) {
-      YS.printError(HeaderN, "header's value must be a mapping node");
-      return false;
+  void parseHeader() {
+    Header.Machine = Obj.HeaderData.Machine;
+
+    const std::vector<COFF::Characteristics> &Characteristics =
+      Obj.HeaderData.Characteristics;
+    for (std::vector<COFF::Characteristics>::const_iterator I =
+           Characteristics.begin(), E = Characteristics.end(); I != E; ++I) {
+      uint16_t Characteristic = *I;
+      Header.Characteristics |= Characteristic;
     }
-    for (yaml::MappingNode::iterator i = MN->begin(), e = MN->end();
-                                     i != e; ++i) {
-      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
-      if (!Key) {
-        YS.printError(i->getKey(), "Keys must be scalar values");
-        return false;
-      }
-      SmallString<32> Storage;
-      StringRef KeyValue = Key->getValue(Storage);
-      if (KeyValue == "Characteristics") {
-        if (!parseHeaderCharacteristics(i->getValue()))
-          return false;
+  }
+
+  bool parseSections() {
+    for (std::vector<COFFYAML::Section>::iterator i = Obj.Sections.begin(),
+           e = Obj.Sections.end(); i != e; ++i) {
+      const COFFYAML::Section &YamlSection = *i;
+      Section Sec;
+      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
+
+      // If the name is less than 8 bytes, store it in place, otherwise
+      // store it in the string table.
+      StringRef Name = YamlSection.Name;
+      std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
+      if (Name.size() <= COFF::NameSize) {
+        std::copy(Name.begin(), Name.end(), Sec.Header.Name);
       } else {
-        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(i->getValue());
-        if (!Value) {
-          YS.printError(Value,
-            Twine(KeyValue) + " must be a scalar value");
-          return false;
-        }
-        if (KeyValue == "Machine") {
-          uint16_t Machine = COFF::MT_Invalid;
-          if (!getAs(Value, Machine)) {
-            // It's not a raw number, try matching the string.
-            StringRef ValueValue = Value->getValue(Storage);
-            Machine = StringSwitch<COFF::MachineTypes>(ValueValue)
-              .Case( "IMAGE_FILE_MACHINE_UNKNOWN"
-                   , COFF::IMAGE_FILE_MACHINE_UNKNOWN)
-              .Case( "IMAGE_FILE_MACHINE_AM33"
-                   , COFF::IMAGE_FILE_MACHINE_AM33)
-              .Case( "IMAGE_FILE_MACHINE_AMD64"
-                   , COFF::IMAGE_FILE_MACHINE_AMD64)
-              .Case( "IMAGE_FILE_MACHINE_ARM"
-                   , COFF::IMAGE_FILE_MACHINE_ARM)
-              .Case( "IMAGE_FILE_MACHINE_ARMV7"
-                   , COFF::IMAGE_FILE_MACHINE_ARMV7)
-              .Case( "IMAGE_FILE_MACHINE_EBC"
-                   , COFF::IMAGE_FILE_MACHINE_EBC)
-              .Case( "IMAGE_FILE_MACHINE_I386"
-                   , COFF::IMAGE_FILE_MACHINE_I386)
-              .Case( "IMAGE_FILE_MACHINE_IA64"
-                   , COFF::IMAGE_FILE_MACHINE_IA64)
-              .Case( "IMAGE_FILE_MACHINE_M32R"
-                   , COFF::IMAGE_FILE_MACHINE_M32R)
-              .Case( "IMAGE_FILE_MACHINE_MIPS16"
-                   , COFF::IMAGE_FILE_MACHINE_MIPS16)
-              .Case( "IMAGE_FILE_MACHINE_MIPSFPU"
-                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU)
-              .Case( "IMAGE_FILE_MACHINE_MIPSFPU16"
-                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU16)
-              .Case( "IMAGE_FILE_MACHINE_POWERPC"
-                   , COFF::IMAGE_FILE_MACHINE_POWERPC)
-              .Case( "IMAGE_FILE_MACHINE_POWERPCFP"
-                   , COFF::IMAGE_FILE_MACHINE_POWERPCFP)
-              .Case( "IMAGE_FILE_MACHINE_R4000"
-                   , COFF::IMAGE_FILE_MACHINE_R4000)
-              .Case( "IMAGE_FILE_MACHINE_SH3"
-                   , COFF::IMAGE_FILE_MACHINE_SH3)
-              .Case( "IMAGE_FILE_MACHINE_SH3DSP"
-                   , COFF::IMAGE_FILE_MACHINE_SH3DSP)
-              .Case( "IMAGE_FILE_MACHINE_SH4"
-                   , COFF::IMAGE_FILE_MACHINE_SH4)
-              .Case( "IMAGE_FILE_MACHINE_SH5"
-                   , COFF::IMAGE_FILE_MACHINE_SH5)
-              .Case( "IMAGE_FILE_MACHINE_THUMB"
-                   , COFF::IMAGE_FILE_MACHINE_THUMB)
-              .Case( "IMAGE_FILE_MACHINE_WCEMIPSV2"
-                   , COFF::IMAGE_FILE_MACHINE_WCEMIPSV2)
-              .Default(COFF::MT_Invalid);
-            if (Machine == COFF::MT_Invalid) {
-              YS.printError(Value, "Invalid value for Machine");
-              return false;
-            }
-          }
-          Header.Machine = Machine;
-        } else if (KeyValue == "NumberOfSections") {
-          if (!getAs(Value, Header.NumberOfSections)) {
-              YS.printError(Value, "Invalid value for NumberOfSections");
-              return false;
-          }
-        } else if (KeyValue == "TimeDateStamp") {
-          if (!getAs(Value, Header.TimeDateStamp)) {
-              YS.printError(Value, "Invalid value for TimeDateStamp");
-              return false;
-          }
-        } else if (KeyValue == "PointerToSymbolTable") {
-          if (!getAs(Value, Header.PointerToSymbolTable)) {
-              YS.printError(Value, "Invalid value for PointerToSymbolTable");
-              return false;
-          }
-        } else if (KeyValue == "NumberOfSymbols") {
-          if (!getAs(Value, Header.NumberOfSymbols)) {
-              YS.printError(Value, "Invalid value for NumberOfSymbols");
-              return false;
-          }
-        } else if (KeyValue == "SizeOfOptionalHeader") {
-          if (!getAs(Value, Header.SizeOfOptionalHeader)) {
-              YS.printError(Value, "Invalid value for SizeOfOptionalHeader");
-              return false;
-          }
-        } else {
-          YS.printError(Key, "Unrecognized key in header");
+        // Add string to the string table and format the index for output.
+        unsigned Index = getStringIndex(Name);
+        std::string str = utostr(Index);
+        if (str.size() > 7) {
+          errs() << "String table got too large";
           return false;
         }
+        Sec.Header.Name[0] = '/';
+        std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
       }
-    }
-    return true;
-  }
 
-  bool parseHeaderCharacteristics(yaml::Node *Characteristics) {
-    yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(Characteristics);
-    yaml::SequenceNode *SeqValue
-      = dyn_cast<yaml::SequenceNode>(Characteristics);
-    if (!Value && !SeqValue) {
-      YS.printError(Characteristics,
-        "Characteristics must either be a number or sequence");
-      return false;
-    }
-    if (Value) {
-      if (!getAs(Value, Header.Characteristics)) {
-        YS.printError(Value, "Invalid value for Characteristics");
-        return false;
-      }
-    } else {
-      for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
-                                        ce = SeqValue->end();
-                                        ci != ce; ++ci) {
-        yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
-        if (!CharValue) {
-          YS.printError(CharValue,
-            "Characteristics must be scalar values");
-          return false;
-        }
-        SmallString<32> Storage;
-        StringRef Char = CharValue->getValue(Storage);
-        uint16_t Characteristic = StringSwitch<COFF::Characteristics>(Char)
-          .Case( "IMAGE_FILE_RELOCS_STRIPPED"
-                , COFF::IMAGE_FILE_RELOCS_STRIPPED)
-          .Case( "IMAGE_FILE_EXECUTABLE_IMAGE"
-                , COFF::IMAGE_FILE_EXECUTABLE_IMAGE)
-          .Case( "IMAGE_FILE_LINE_NUMS_STRIPPED"
-                , COFF::IMAGE_FILE_LINE_NUMS_STRIPPED)
-          .Case( "IMAGE_FILE_LOCAL_SYMS_STRIPPED"
-                , COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED)
-          .Case( "IMAGE_FILE_AGGRESSIVE_WS_TRIM"
-                , COFF::IMAGE_FILE_AGGRESSIVE_WS_TRIM)
-          .Case( "IMAGE_FILE_LARGE_ADDRESS_AWARE"
-                , COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE)
-          .Case( "IMAGE_FILE_BYTES_REVERSED_LO"
-                , COFF::IMAGE_FILE_BYTES_REVERSED_LO)
-          .Case( "IMAGE_FILE_32BIT_MACHINE"
-                , COFF::IMAGE_FILE_32BIT_MACHINE)
-          .Case( "IMAGE_FILE_DEBUG_STRIPPED"
-                , COFF::IMAGE_FILE_DEBUG_STRIPPED)
-          .Case( "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP"
-                , COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP)
-          .Case( "IMAGE_FILE_SYSTEM"
-                , COFF::IMAGE_FILE_SYSTEM)
-          .Case( "IMAGE_FILE_DLL"
-                , COFF::IMAGE_FILE_DLL)
-          .Case( "IMAGE_FILE_UP_SYSTEM_ONLY"
-                , COFF::IMAGE_FILE_UP_SYSTEM_ONLY)
-          .Default(COFF::C_Invalid);
-        if (Characteristic == COFF::C_Invalid) {
-          // TODO: Typo-correct.
-          YS.printError(CharValue,
-            "Invalid value for Characteristic");
-          return false;
-        }
-        Header.Characteristics |= Characteristic;
+      for (std::vector<COFF::SectionCharacteristics>::const_iterator i =
+             YamlSection.Characteristics.begin(),
+             e = YamlSection.Characteristics.end();
+           i != e; ++i) {
+        uint32_t Characteristic = *i;
+        Sec.Header.Characteristics |= Characteristic;
       }
-    }
-    return true;
-  }
 
-  bool parseSections(yaml::Node *SectionsN) {
-    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SectionsN);
-    if (!SN) {
-      YS.printError(SectionsN, "Sections must be a sequence");
-      return false;
-    }
-    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
-                                      i != e; ++i) {
-      Section Sec;
-      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
-      yaml::MappingNode *SecMap = dyn_cast<yaml::MappingNode>(&*i);
-      if (!SecMap) {
-        YS.printError(&*i, "Section entry must be a map");
+      StringRef Data = YamlSection.SectionData;
+      if (!hexStringToByteArray(Data, Sec.Data)) {
+        errs() << "SectionData must be a collection of pairs of hex bytes";
         return false;
       }
-      for (yaml::MappingNode::iterator si = SecMap->begin(), se = SecMap->end();
-                                       si != se; ++si) {
-        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
-        if (!Key) {
-          YS.printError(si->getKey(), "Keys must be scalar values");
-          return false;
-        }
-        SmallString<32> Storage;
-        StringRef KeyValue = Key->getValue(Storage);
-
-        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
-        if (KeyValue == "Name") {
-          // If the name is less than 8 bytes, store it in place, otherwise
-          // store it in the string table.
-          StringRef Name = Value->getValue(Storage);
-          std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
-          if (Name.size() <= COFF::NameSize) {
-            std::copy(Name.begin(), Name.end(), Sec.Header.Name);
-          } else {
-            // Add string to the string table and format the index for output.
-            unsigned Index = getStringIndex(Name);
-            std::string str = utostr(Index);
-            if (str.size() > 7) {
-              YS.printError(Value, "String table got too large");
-              return false;
-            }
-            Sec.Header.Name[0] = '/';
-            std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
-          }
-        } else if (KeyValue == "VirtualSize") {
-          if (!getAs(Value, Sec.Header.VirtualSize)) {
-            YS.printError(Value, "Invalid value for VirtualSize");
-            return false;
-          }
-        } else if (KeyValue == "VirtualAddress") {
-          if (!getAs(Value, Sec.Header.VirtualAddress)) {
-            YS.printError(Value, "Invalid value for VirtualAddress");
-            return false;
-          }
-        } else if (KeyValue == "SizeOfRawData") {
-          if (!getAs(Value, Sec.Header.SizeOfRawData)) {
-            YS.printError(Value, "Invalid value for SizeOfRawData");
-            return false;
-          }
-        } else if (KeyValue == "PointerToRawData") {
-          if (!getAs(Value, Sec.Header.PointerToRawData)) {
-            YS.printError(Value, "Invalid value for PointerToRawData");
-            return false;
-          }
-        } else if (KeyValue == "PointerToRelocations") {
-          if (!getAs(Value, Sec.Header.PointerToRelocations)) {
-            YS.printError(Value, "Invalid value for PointerToRelocations");
-            return false;
-          }
-        } else if (KeyValue == "PointerToLineNumbers") {
-          if (!getAs(Value, Sec.Header.PointerToLineNumbers)) {
-            YS.printError(Value, "Invalid value for PointerToLineNumbers");
-            return false;
-          }
-        } else if (KeyValue == "NumberOfRelocations") {
-          if (!getAs(Value, Sec.Header.NumberOfRelocations)) {
-            YS.printError(Value, "Invalid value for NumberOfRelocations");
-            return false;
-          }
-        } else if (KeyValue == "NumberOfLineNumbers") {
-          if (!getAs(Value, Sec.Header.NumberOfLineNumbers)) {
-            YS.printError(Value, "Invalid value for NumberOfLineNumbers");
-            return false;
-          }
-        } else if (KeyValue == "Characteristics") {
-          yaml::SequenceNode *SeqValue
-            = dyn_cast<yaml::SequenceNode>(si->getValue());
-          if (!Value && !SeqValue) {
-            YS.printError(si->getValue(),
-              "Characteristics must either be a number or sequence");
-            return false;
-          }
-          if (Value) {
-            if (!getAs(Value, Sec.Header.Characteristics)) {
-              YS.printError(Value, "Invalid value for Characteristics");
-              return false;
-            }
-          } else {
-            for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
-                                              ce = SeqValue->end();
-                                              ci != ce; ++ci) {
-              yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
-              if (!CharValue) {
-                YS.printError(CharValue, "Invalid value for Characteristics");
-                return false;
-              }
-              StringRef Char = CharValue->getValue(Storage);
-              uint32_t Characteristic =
-                StringSwitch<COFF::SectionCharacteristics>(Char)
-                .Case( "IMAGE_SCN_TYPE_NO_PAD"
-                     , COFF::IMAGE_SCN_TYPE_NO_PAD)
-                .Case( "IMAGE_SCN_CNT_CODE"
-                     , COFF::IMAGE_SCN_CNT_CODE)
-                .Case( "IMAGE_SCN_CNT_INITIALIZED_DATA"
-                     , COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
-                .Case( "IMAGE_SCN_CNT_UNINITIALIZED_DATA"
-                     , COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
-                .Case( "IMAGE_SCN_LNK_OTHER"
-                     , COFF::IMAGE_SCN_LNK_OTHER)
-                .Case( "IMAGE_SCN_LNK_INFO"
-                     , COFF::IMAGE_SCN_LNK_INFO)
-                .Case( "IMAGE_SCN_LNK_REMOVE"
-                     , COFF::IMAGE_SCN_LNK_REMOVE)
-                .Case( "IMAGE_SCN_LNK_COMDAT"
-                     , COFF::IMAGE_SCN_LNK_COMDAT)
-                .Case( "IMAGE_SCN_GPREL"
-                     , COFF::IMAGE_SCN_GPREL)
-                .Case( "IMAGE_SCN_MEM_PURGEABLE"
-                     , COFF::IMAGE_SCN_MEM_PURGEABLE)
-                .Case( "IMAGE_SCN_MEM_16BIT"
-                     , COFF::IMAGE_SCN_MEM_16BIT)
-                .Case( "IMAGE_SCN_MEM_LOCKED"
-                     , COFF::IMAGE_SCN_MEM_LOCKED)
-                .Case( "IMAGE_SCN_MEM_PRELOAD"
-                     , COFF::IMAGE_SCN_MEM_PRELOAD)
-                .Case( "IMAGE_SCN_ALIGN_1BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_1BYTES)
-                .Case( "IMAGE_SCN_ALIGN_2BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_2BYTES)
-                .Case( "IMAGE_SCN_ALIGN_4BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_4BYTES)
-                .Case( "IMAGE_SCN_ALIGN_8BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_8BYTES)
-                .Case( "IMAGE_SCN_ALIGN_16BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_16BYTES)
-                .Case( "IMAGE_SCN_ALIGN_32BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_32BYTES)
-                .Case( "IMAGE_SCN_ALIGN_64BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_64BYTES)
-                .Case( "IMAGE_SCN_ALIGN_128BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_128BYTES)
-                .Case( "IMAGE_SCN_ALIGN_256BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_256BYTES)
-                .Case( "IMAGE_SCN_ALIGN_512BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_512BYTES)
-                .Case( "IMAGE_SCN_ALIGN_1024BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_1024BYTES)
-                .Case( "IMAGE_SCN_ALIGN_2048BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_2048BYTES)
-                .Case( "IMAGE_SCN_ALIGN_4096BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_4096BYTES)
-                .Case( "IMAGE_SCN_ALIGN_8192BYTES"
-                     , COFF::IMAGE_SCN_ALIGN_8192BYTES)
-                .Case( "IMAGE_SCN_LNK_NRELOC_OVFL"
-                     , COFF::IMAGE_SCN_LNK_NRELOC_OVFL)
-                .Case( "IMAGE_SCN_MEM_DISCARDABLE"
-                     , COFF::IMAGE_SCN_MEM_DISCARDABLE)
-                .Case( "IMAGE_SCN_MEM_NOT_CACHED"
-                     , COFF::IMAGE_SCN_MEM_NOT_CACHED)
-                .Case( "IMAGE_SCN_MEM_NOT_PAGED"
-                     , COFF::IMAGE_SCN_MEM_NOT_PAGED)
-                .Case( "IMAGE_SCN_MEM_SHARED"
-                     , COFF::IMAGE_SCN_MEM_SHARED)
-                .Case( "IMAGE_SCN_MEM_EXECUTE"
-                     , COFF::IMAGE_SCN_MEM_EXECUTE)
-                .Case( "IMAGE_SCN_MEM_READ"
-                     , COFF::IMAGE_SCN_MEM_READ)
-                .Case( "IMAGE_SCN_MEM_WRITE"
-                     , COFF::IMAGE_SCN_MEM_WRITE)
-                .Default(COFF::SC_Invalid);
-              if (Characteristic == COFF::SC_Invalid) {
-                YS.printError(CharValue, "Invalid value for Characteristic");
-                return false;
-              }
-              Sec.Header.Characteristics |= Characteristic;
-            }
-          }
-        } else if (KeyValue == "SectionData") {
-          yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
-          SmallString<32> Storage;
-          StringRef Data = Value->getValue(Storage);
-          if (!hexStringToByteArray(Data, Sec.Data)) {
-            YS.printError(Value, "SectionData must be a collection of pairs of"
-                                 "hex bytes");
-            return false;
-          }
-        } else
-          si->skip();
-      }
       Sections.push_back(Sec);
     }
     return true;
   }
 
-  bool parseSymbols(yaml::Node *SymbolsN) {
-    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SymbolsN);
-    if (!SN) {
-      YS.printError(SymbolsN, "Symbols must be a sequence");
-      return false;
-    }
-    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
-                                      i != e; ++i) {
+  bool parseSymbols() {
+    for (std::vector<COFFYAML::Symbol>::iterator i = Obj.Symbols.begin(),
+           e = Obj.Symbols.end(); i != e; ++i) {
+      COFFYAML::Symbol YamlSymbol = *i;
       Symbol Sym;
       std::memset(&Sym.Header, 0, sizeof(Sym.Header));
-      yaml::MappingNode *SymMap = dyn_cast<yaml::MappingNode>(&*i);
-      if (!SymMap) {
-        YS.printError(&*i, "Symbol must be a map");
-        return false;
+
+      // If the name is less than 8 bytes, store it in place, otherwise
+      // store it in the string table.
+      StringRef Name = YamlSymbol.Name;
+      std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
+      if (Name.size() <= COFF::NameSize) {
+        std::copy(Name.begin(), Name.end(), Sym.Header.Name);
+      } else {
+        // Add string to the string table and format the index for output.
+        unsigned Index = getStringIndex(Name);
+        *reinterpret_cast<support::aligned_ulittle32_t*>(
+            Sym.Header.Name + 4) = Index;
       }
-      for (yaml::MappingNode::iterator si = SymMap->begin(), se = SymMap->end();
-                                       si != se; ++si) {
-        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
-        if (!Key) {
-          YS.printError(si->getKey(), "Keys must be scalar values");
-          return false;
-        }
-        SmallString<32> Storage;
-        StringRef KeyValue = Key->getValue(Storage);
 
-        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
-        if (!Value) {
-          YS.printError(si->getValue(), "Must be a scalar value");
-          return false;
-        }
-        if (KeyValue == "Name") {
-          // If the name is less than 8 bytes, store it in place, otherwise
-          // store it in the string table.
-          StringRef Name = Value->getValue(Storage);
-          std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
-          if (Name.size() <= COFF::NameSize) {
-            std::copy(Name.begin(), Name.end(), Sym.Header.Name);
-          } else {
-            // Add string to the string table and format the index for output.
-            unsigned Index = getStringIndex(Name);
-            *reinterpret_cast<support::aligned_ulittle32_t*>(
-              Sym.Header.Name + 4) = Index;
-          }
-        } else if (KeyValue == "Value") {
-          if (!getAs(Value, Sym.Header.Value)) {
-            YS.printError(Value, "Invalid value for Value");
-            return false;
-          }
-        } else if (KeyValue == "SimpleType") {
-          Sym.Header.Type |= StringSwitch<COFF::SymbolBaseType>(
-            Value->getValue(Storage))
-            .Case("IMAGE_SYM_TYPE_NULL", COFF::IMAGE_SYM_TYPE_NULL)
-            .Case("IMAGE_SYM_TYPE_VOID", COFF::IMAGE_SYM_TYPE_VOID)
-            .Case("IMAGE_SYM_TYPE_CHAR", COFF::IMAGE_SYM_TYPE_CHAR)
-            .Case("IMAGE_SYM_TYPE_SHORT", COFF::IMAGE_SYM_TYPE_SHORT)
-            .Case("IMAGE_SYM_TYPE_INT", COFF::IMAGE_SYM_TYPE_INT)
-            .Case("IMAGE_SYM_TYPE_LONG", COFF::IMAGE_SYM_TYPE_LONG)
-            .Case("IMAGE_SYM_TYPE_FLOAT", COFF::IMAGE_SYM_TYPE_FLOAT)
-            .Case("IMAGE_SYM_TYPE_DOUBLE", COFF::IMAGE_SYM_TYPE_DOUBLE)
-            .Case("IMAGE_SYM_TYPE_STRUCT", COFF::IMAGE_SYM_TYPE_STRUCT)
-            .Case("IMAGE_SYM_TYPE_UNION", COFF::IMAGE_SYM_TYPE_UNION)
-            .Case("IMAGE_SYM_TYPE_ENUM", COFF::IMAGE_SYM_TYPE_ENUM)
-            .Case("IMAGE_SYM_TYPE_MOE", COFF::IMAGE_SYM_TYPE_MOE)
-            .Case("IMAGE_SYM_TYPE_BYTE", COFF::IMAGE_SYM_TYPE_BYTE)
-            .Case("IMAGE_SYM_TYPE_WORD", COFF::IMAGE_SYM_TYPE_WORD)
-            .Case("IMAGE_SYM_TYPE_UINT", COFF::IMAGE_SYM_TYPE_UINT)
-            .Case("IMAGE_SYM_TYPE_DWORD", COFF::IMAGE_SYM_TYPE_DWORD)
-            .Default(COFF::IMAGE_SYM_TYPE_NULL);
-        } else if (KeyValue == "ComplexType") {
-          Sym.Header.Type |= StringSwitch<COFF::SymbolComplexType>(
-            Value->getValue(Storage))
-            .Case("IMAGE_SYM_DTYPE_NULL", COFF::IMAGE_SYM_DTYPE_NULL)
-            .Case("IMAGE_SYM_DTYPE_POINTER", COFF::IMAGE_SYM_DTYPE_POINTER)
-            .Case("IMAGE_SYM_DTYPE_FUNCTION", COFF::IMAGE_SYM_DTYPE_FUNCTION)
-            .Case("IMAGE_SYM_DTYPE_ARRAY", COFF::IMAGE_SYM_DTYPE_ARRAY)
-            .Default(COFF::IMAGE_SYM_DTYPE_NULL)
-            << COFF::SCT_COMPLEX_TYPE_SHIFT;
-        } else if (KeyValue == "StorageClass") {
-          Sym.Header.StorageClass = StringSwitch<COFF::SymbolStorageClass>(
-            Value->getValue(Storage))
-            .Case( "IMAGE_SYM_CLASS_END_OF_FUNCTION"
-                 , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION)
-            .Case( "IMAGE_SYM_CLASS_NULL"
-                 , COFF::IMAGE_SYM_CLASS_NULL)
-            .Case( "IMAGE_SYM_CLASS_AUTOMATIC"
-                 , COFF::IMAGE_SYM_CLASS_AUTOMATIC)
-            .Case( "IMAGE_SYM_CLASS_EXTERNAL"
-                 , COFF::IMAGE_SYM_CLASS_EXTERNAL)
-            .Case( "IMAGE_SYM_CLASS_STATIC"
-                 , COFF::IMAGE_SYM_CLASS_STATIC)
-            .Case( "IMAGE_SYM_CLASS_REGISTER"
-                 , COFF::IMAGE_SYM_CLASS_REGISTER)
-            .Case( "IMAGE_SYM_CLASS_EXTERNAL_DEF"
-                 , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF)
-            .Case( "IMAGE_SYM_CLASS_LABEL"
-                 , COFF::IMAGE_SYM_CLASS_LABEL)
-            .Case( "IMAGE_SYM_CLASS_UNDEFINED_LABEL"
-                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL)
-            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_STRUCT"
-                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT)
-            .Case( "IMAGE_SYM_CLASS_ARGUMENT"
-                 , COFF::IMAGE_SYM_CLASS_ARGUMENT)
-            .Case( "IMAGE_SYM_CLASS_STRUCT_TAG"
-                 , COFF::IMAGE_SYM_CLASS_STRUCT_TAG)
-            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_UNION"
-                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION)
-            .Case( "IMAGE_SYM_CLASS_UNION_TAG"
-                 , COFF::IMAGE_SYM_CLASS_UNION_TAG)
-            .Case( "IMAGE_SYM_CLASS_TYPE_DEFINITION"
-                 , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION)
-            .Case( "IMAGE_SYM_CLASS_UNDEFINED_STATIC"
-                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC)
-            .Case( "IMAGE_SYM_CLASS_ENUM_TAG"
-                 , COFF::IMAGE_SYM_CLASS_ENUM_TAG)
-            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_ENUM"
-                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM)
-            .Case( "IMAGE_SYM_CLASS_REGISTER_PARAM"
-                 , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM)
-            .Case( "IMAGE_SYM_CLASS_BIT_FIELD"
-                 , COFF::IMAGE_SYM_CLASS_BIT_FIELD)
-            .Case( "IMAGE_SYM_CLASS_BLOCK"
-                 , COFF::IMAGE_SYM_CLASS_BLOCK)
-            .Case( "IMAGE_SYM_CLASS_FUNCTION"
-                 , COFF::IMAGE_SYM_CLASS_FUNCTION)
-            .Case( "IMAGE_SYM_CLASS_END_OF_STRUCT"
-                 , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT)
-            .Case( "IMAGE_SYM_CLASS_FILE"
-                 , COFF::IMAGE_SYM_CLASS_FILE)
-            .Case( "IMAGE_SYM_CLASS_SECTION"
-                 , COFF::IMAGE_SYM_CLASS_SECTION)
-            .Case( "IMAGE_SYM_CLASS_WEAK_EXTERNAL"
-                 , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
-            .Case( "IMAGE_SYM_CLASS_CLR_TOKEN"
-                 , COFF::IMAGE_SYM_CLASS_CLR_TOKEN)
-            .Default(COFF::SSC_Invalid);
-          if (Sym.Header.StorageClass == COFF::SSC_Invalid) {
-            YS.printError(Value, "Invalid value for StorageClass");
-            return false;
-          }
-        } else if (KeyValue == "SectionNumber") {
-          if (!getAs(Value, Sym.Header.SectionNumber)) {
-              YS.printError(Value, "Invalid value for SectionNumber");
-              return false;
-          }
-        } else if (KeyValue == "AuxillaryData") {
-          StringRef Data = Value->getValue(Storage);
-          if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
-            YS.printError(Value, "AuxillaryData must be a collection of pairs"
-                                 "of hex bytes");
-            return false;
-          }
-        } else
-          si->skip();
+      Sym.Header.Value = YamlSymbol.Value;
+      Sym.Header.Type |= YamlSymbol.SimpleType;
+      Sym.Header.Type |= YamlSymbol.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT;
+      Sym.Header.StorageClass = YamlSymbol.StorageClass;
+      Sym.Header.SectionNumber = YamlSymbol.SectionNumber;
+
+      StringRef Data = YamlSymbol.AuxillaryData;
+      if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
+        errs() << "AuxillaryData must be a collection of pairs of hex bytes";
+        return false;
       }
       Symbols.push_back(Sym);
     }
@@ -666,33 +252,12 @@ struct COFFParser {
   }
 
   bool parse() {
-    yaml::Document &D = *YS.begin();
-    yaml::MappingNode *Root = dyn_cast<yaml::MappingNode>(D.getRoot());
-    if (!Root) {
-      YS.printError(D.getRoot(), "Root node must be a map");
+    parseHeader();
+    if (!parseSections())
       return false;
-    }
-    for (yaml::MappingNode::iterator i = Root->begin(), e = Root->end();
-                                     i != e; ++i) {
-      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
-      if (!Key) {
-        YS.printError(i->getKey(), "Keys must be scalar values");
-        return false;
-      }
-      SmallString<32> Storage;
-      StringRef KeyValue = Key->getValue(Storage);
-      if (KeyValue == "header") {
-        if (!parseHeader(i->getValue()))
-          return false;
-      } else if (KeyValue == "sections") {
-        if (!parseSections(i->getValue()))
-          return false;
-      } else if (KeyValue == "symbols") {
-        if (!parseSymbols(i->getValue()))
-          return false;
-      }
-    }
-    return !YS.failed();
+    if (!parseSymbols())
+      return false;
+    return true;
   }
 
   unsigned getStringIndex(StringRef Str) {
@@ -707,7 +272,7 @@ struct COFFParser {
     return i->second;
   }
 
-  yaml::Stream &YS;
+  COFFYAML::Object &Obj;
   COFF::header Header;
 
   struct Section {
@@ -791,7 +356,8 @@ template <typename value_type>
 raw_ostream &operator <<( raw_ostream &OS
                         , const binary_le_impl<value_type> &BLE) {
   char Buffer[sizeof(BLE.Value)];
-  support::endian::write_le<value_type, support::unaligned>(Buffer, BLE.Value);
+  support::endian::write<value_type, support::little, support::unaligned>(
+    Buffer, BLE.Value);
   OS.write(Buffer, sizeof(BLE.Value));
   return OS;
 }
@@ -854,6 +420,260 @@ void writeCOFF(COFFParser &CP, raw_ostream &OS) {
   OS.write(&CP.StringTable[0], CP.StringTable.size());
 }
 
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Relocation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFF::SectionCharacteristics)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFF::Characteristics)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
+
+namespace llvm {
+namespace yaml {
+#define ECase(X) IO.enumCase(Value, #X, COFF::X);
+
+template <>
+struct ScalarEnumerationTraits<COFF::SymbolComplexType> {
+  static void enumeration(IO &IO, COFF::SymbolComplexType &Value) {
+    ECase(IMAGE_SYM_DTYPE_NULL);
+    ECase(IMAGE_SYM_DTYPE_POINTER);
+    ECase(IMAGE_SYM_DTYPE_FUNCTION);
+    ECase(IMAGE_SYM_DTYPE_ARRAY);
+  }
+};
+
+// FIXME: We cannot use ScalarBitSetTraits because of
+// IMAGE_SYM_CLASS_END_OF_FUNCTION which is -1.
+template <>
+struct ScalarEnumerationTraits<COFF::SymbolStorageClass> {
+  static void enumeration(IO &IO, COFF::SymbolStorageClass &Value) {
+    ECase(IMAGE_SYM_CLASS_END_OF_FUNCTION);
+    ECase(IMAGE_SYM_CLASS_NULL);
+    ECase(IMAGE_SYM_CLASS_AUTOMATIC);
+    ECase(IMAGE_SYM_CLASS_EXTERNAL);
+    ECase(IMAGE_SYM_CLASS_STATIC);
+    ECase(IMAGE_SYM_CLASS_REGISTER);
+    ECase(IMAGE_SYM_CLASS_EXTERNAL_DEF);
+    ECase(IMAGE_SYM_CLASS_LABEL);
+    ECase(IMAGE_SYM_CLASS_UNDEFINED_LABEL);
+    ECase(IMAGE_SYM_CLASS_MEMBER_OF_STRUCT);
+    ECase(IMAGE_SYM_CLASS_ARGUMENT);
+    ECase(IMAGE_SYM_CLASS_STRUCT_TAG);
+    ECase(IMAGE_SYM_CLASS_MEMBER_OF_UNION);
+    ECase(IMAGE_SYM_CLASS_UNION_TAG);
+    ECase(IMAGE_SYM_CLASS_TYPE_DEFINITION);
+    ECase(IMAGE_SYM_CLASS_UNDEFINED_STATIC);
+    ECase(IMAGE_SYM_CLASS_ENUM_TAG);
+    ECase(IMAGE_SYM_CLASS_MEMBER_OF_ENUM);
+    ECase(IMAGE_SYM_CLASS_REGISTER_PARAM);
+    ECase(IMAGE_SYM_CLASS_BIT_FIELD);
+    ECase(IMAGE_SYM_CLASS_BLOCK);
+    ECase(IMAGE_SYM_CLASS_FUNCTION);
+    ECase(IMAGE_SYM_CLASS_END_OF_STRUCT);
+    ECase(IMAGE_SYM_CLASS_FILE);
+    ECase(IMAGE_SYM_CLASS_SECTION);
+    ECase(IMAGE_SYM_CLASS_WEAK_EXTERNAL);
+    ECase(IMAGE_SYM_CLASS_CLR_TOKEN);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::SymbolBaseType> {
+  static void enumeration(IO &IO, COFF::SymbolBaseType &Value) {
+    ECase(IMAGE_SYM_TYPE_NULL);
+    ECase(IMAGE_SYM_TYPE_VOID);
+    ECase(IMAGE_SYM_TYPE_CHAR);
+    ECase(IMAGE_SYM_TYPE_SHORT);
+    ECase(IMAGE_SYM_TYPE_INT);
+    ECase(IMAGE_SYM_TYPE_LONG);
+    ECase(IMAGE_SYM_TYPE_FLOAT);
+    ECase(IMAGE_SYM_TYPE_DOUBLE);
+    ECase(IMAGE_SYM_TYPE_STRUCT);
+    ECase(IMAGE_SYM_TYPE_UNION);
+    ECase(IMAGE_SYM_TYPE_ENUM);
+    ECase(IMAGE_SYM_TYPE_MOE);
+    ECase(IMAGE_SYM_TYPE_BYTE);
+    ECase(IMAGE_SYM_TYPE_WORD);
+    ECase(IMAGE_SYM_TYPE_UINT);
+    ECase(IMAGE_SYM_TYPE_DWORD);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::MachineTypes> {
+  static void enumeration(IO &IO, COFF::MachineTypes &Value) {
+    ECase(IMAGE_FILE_MACHINE_UNKNOWN);
+    ECase(IMAGE_FILE_MACHINE_AM33);
+    ECase(IMAGE_FILE_MACHINE_AMD64);
+    ECase(IMAGE_FILE_MACHINE_ARM);
+    ECase(IMAGE_FILE_MACHINE_ARMV7);
+    ECase(IMAGE_FILE_MACHINE_EBC);
+    ECase(IMAGE_FILE_MACHINE_I386);
+    ECase(IMAGE_FILE_MACHINE_IA64);
+    ECase(IMAGE_FILE_MACHINE_M32R);
+    ECase(IMAGE_FILE_MACHINE_MIPS16);
+    ECase(IMAGE_FILE_MACHINE_MIPSFPU);
+    ECase(IMAGE_FILE_MACHINE_MIPSFPU16);
+    ECase(IMAGE_FILE_MACHINE_POWERPC);
+    ECase(IMAGE_FILE_MACHINE_POWERPCFP);
+    ECase(IMAGE_FILE_MACHINE_R4000);
+    ECase(IMAGE_FILE_MACHINE_SH3);
+    ECase(IMAGE_FILE_MACHINE_SH3DSP);
+    ECase(IMAGE_FILE_MACHINE_SH4);
+    ECase(IMAGE_FILE_MACHINE_SH5);
+    ECase(IMAGE_FILE_MACHINE_THUMB);
+    ECase(IMAGE_FILE_MACHINE_WCEMIPSV2);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::Characteristics> {
+  static void enumeration(IO &IO, COFF::Characteristics &Value) {
+    ECase(IMAGE_FILE_RELOCS_STRIPPED);
+    ECase(IMAGE_FILE_EXECUTABLE_IMAGE);
+    ECase(IMAGE_FILE_LINE_NUMS_STRIPPED);
+    ECase(IMAGE_FILE_LOCAL_SYMS_STRIPPED);
+    ECase(IMAGE_FILE_AGGRESSIVE_WS_TRIM);
+    ECase(IMAGE_FILE_LARGE_ADDRESS_AWARE);
+    ECase(IMAGE_FILE_BYTES_REVERSED_LO);
+    ECase(IMAGE_FILE_32BIT_MACHINE);
+    ECase(IMAGE_FILE_DEBUG_STRIPPED);
+    ECase(IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP);
+    ECase(IMAGE_FILE_NET_RUN_FROM_SWAP);
+    ECase(IMAGE_FILE_SYSTEM);
+    ECase(IMAGE_FILE_DLL);
+    ECase(IMAGE_FILE_UP_SYSTEM_ONLY);
+    ECase(IMAGE_FILE_BYTES_REVERSED_HI);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::SectionCharacteristics> {
+  static void enumeration(IO &IO, COFF::SectionCharacteristics &Value) {
+    ECase(IMAGE_SCN_TYPE_NO_PAD);
+    ECase(IMAGE_SCN_CNT_CODE);
+    ECase(IMAGE_SCN_CNT_INITIALIZED_DATA);
+    ECase(IMAGE_SCN_CNT_UNINITIALIZED_DATA);
+    ECase(IMAGE_SCN_LNK_OTHER);
+    ECase(IMAGE_SCN_LNK_INFO);
+    ECase(IMAGE_SCN_LNK_REMOVE);
+    ECase(IMAGE_SCN_LNK_COMDAT);
+    ECase(IMAGE_SCN_GPREL);
+    ECase(IMAGE_SCN_MEM_PURGEABLE);
+    ECase(IMAGE_SCN_MEM_16BIT);
+    ECase(IMAGE_SCN_MEM_LOCKED);
+    ECase(IMAGE_SCN_MEM_PRELOAD);
+    ECase(IMAGE_SCN_ALIGN_1BYTES);
+    ECase(IMAGE_SCN_ALIGN_2BYTES);
+    ECase(IMAGE_SCN_ALIGN_4BYTES);
+    ECase(IMAGE_SCN_ALIGN_8BYTES);
+    ECase(IMAGE_SCN_ALIGN_16BYTES);
+    ECase(IMAGE_SCN_ALIGN_32BYTES);
+    ECase(IMAGE_SCN_ALIGN_64BYTES);
+    ECase(IMAGE_SCN_ALIGN_128BYTES);
+    ECase(IMAGE_SCN_ALIGN_256BYTES);
+    ECase(IMAGE_SCN_ALIGN_512BYTES);
+    ECase(IMAGE_SCN_ALIGN_1024BYTES);
+    ECase(IMAGE_SCN_ALIGN_2048BYTES);
+    ECase(IMAGE_SCN_ALIGN_4096BYTES);
+    ECase(IMAGE_SCN_ALIGN_8192BYTES);
+    ECase(IMAGE_SCN_LNK_NRELOC_OVFL);
+    ECase(IMAGE_SCN_MEM_DISCARDABLE);
+    ECase(IMAGE_SCN_MEM_NOT_CACHED);
+    ECase(IMAGE_SCN_MEM_NOT_PAGED);
+    ECase(IMAGE_SCN_MEM_SHARED);
+    ECase(IMAGE_SCN_MEM_EXECUTE);
+    ECase(IMAGE_SCN_MEM_READ);
+    ECase(IMAGE_SCN_MEM_WRITE);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<COFF::RelocationTypeX86> {
+  static void enumeration(IO &IO, COFF::RelocationTypeX86 &Value) {
+    ECase(IMAGE_REL_I386_ABSOLUTE);
+    ECase(IMAGE_REL_I386_DIR16);
+    ECase(IMAGE_REL_I386_REL16);
+    ECase(IMAGE_REL_I386_DIR32);
+    ECase(IMAGE_REL_I386_DIR32NB);
+    ECase(IMAGE_REL_I386_SEG12);
+    ECase(IMAGE_REL_I386_SECTION);
+    ECase(IMAGE_REL_I386_SECREL);
+    ECase(IMAGE_REL_I386_TOKEN);
+    ECase(IMAGE_REL_I386_SECREL7);
+    ECase(IMAGE_REL_I386_REL32);
+    ECase(IMAGE_REL_AMD64_ABSOLUTE);
+    ECase(IMAGE_REL_AMD64_ADDR64);
+    ECase(IMAGE_REL_AMD64_ADDR32);
+    ECase(IMAGE_REL_AMD64_ADDR32NB);
+    ECase(IMAGE_REL_AMD64_REL32);
+    ECase(IMAGE_REL_AMD64_REL32_1);
+    ECase(IMAGE_REL_AMD64_REL32_2);
+    ECase(IMAGE_REL_AMD64_REL32_3);
+    ECase(IMAGE_REL_AMD64_REL32_4);
+    ECase(IMAGE_REL_AMD64_REL32_5);
+    ECase(IMAGE_REL_AMD64_SECTION);
+    ECase(IMAGE_REL_AMD64_SECREL);
+    ECase(IMAGE_REL_AMD64_SECREL7);
+    ECase(IMAGE_REL_AMD64_TOKEN);
+    ECase(IMAGE_REL_AMD64_SREL32);
+    ECase(IMAGE_REL_AMD64_PAIR);
+    ECase(IMAGE_REL_AMD64_SSPAN32);
+  }
+};
+
+#undef ECase
+
+template <>
+struct MappingTraits<COFFYAML::Symbol> {
+  static void mapping(IO &IO, COFFYAML::Symbol &S) {
+    IO.mapRequired("SimpleType", S.SimpleType);
+    IO.mapOptional("NumberOfAuxSymbols", S.NumberOfAuxSymbols);
+    IO.mapRequired("Name", S.Name);
+    IO.mapRequired("StorageClass", S.StorageClass);
+    IO.mapOptional("AuxillaryData", S.AuxillaryData); // FIXME: typo
+    IO.mapRequired("ComplexType", S.ComplexType);
+    IO.mapRequired("Value", S.Value);
+    IO.mapRequired("SectionNumber", S.SectionNumber);
+  }
+};
+
+template <>
+struct MappingTraits<COFFYAML::Header> {
+  static void mapping(IO &IO, COFFYAML::Header &H) {
+    IO.mapRequired("Machine", H.Machine);
+    IO.mapOptional("Characteristics", H.Characteristics);
+  }
+};
+
+template <>
+struct MappingTraits<COFFYAML::Relocation> {
+  static void mapping(IO &IO, COFFYAML::Relocation &Rel) {
+    IO.mapRequired("Type", Rel.Type);
+    IO.mapRequired("VirtualAddress", Rel.VirtualAddress);
+    IO.mapRequired("SymbolTableIndex", Rel.SymbolTableIndex);
+  }
+};
+
+template <>
+struct MappingTraits<COFFYAML::Section> {
+  static void mapping(IO &IO, COFFYAML::Section &Sec) {
+    IO.mapOptional("Relocations", Sec.Relocations);
+    IO.mapRequired("SectionData", Sec.SectionData);
+    IO.mapRequired("Characteristics", Sec.Characteristics);
+    IO.mapRequired("Name", Sec.Name);
+  }
+};
+
+template <>
+struct MappingTraits<COFFYAML::Object> {
+  static void mapping(IO &IO, COFFYAML::Object &Obj) {
+    IO.mapRequired("sections", Obj.Sections);
+    IO.mapRequired("header", Obj.HeaderData);
+    IO.mapRequired("symbols", Obj.Symbols);
+  }
+};
+} // end namespace yaml
+} // end namespace llvm
+
 int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv);
   sys::PrintStackTraceOnErrorSignal();
@@ -864,13 +684,20 @@ int main(int argc, char **argv) {
   if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
     return 1;
 
-  SourceMgr SM;
-  yaml::Stream S(Buf->getBuffer(), SM);
-  COFFParser CP(S);
+  yaml::Input YIn(Buf->getBuffer());
+  COFFYAML::Object Doc;
+  YIn >> Doc;
+  if (YIn.error()) {
+    errs() << "yaml2obj: Failed to parse YAML file!\n";
+    return 1;
+  }
+
+  COFFParser CP(Doc);
   if (!CP.parse()) {
     errs() << "yaml2obj: Failed to parse YAML file!\n";
     return 1;
   }
+
   if (!layoutCOFF(CP)) {
     errs() << "yaml2obj: Failed to layout COFF file!\n";
     return 1;
author	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2013-04-08 18:41:23 +0000
commit	4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (patch)
tree	06099edc18d30894081a822b756f117cbe0b8207
parent	482e7bddf617ae804dc47133cb07eb4aa81e45de (diff)
download	src-test2-vendor/llvm/llvm-trunk-r178860.tar.gz src-test2-vendor/llvm/llvm-trunk-r178860.zip